10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 51893Sraf * Common Development and Distribution License (the "License"). 61893Sraf * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 211219Sraf 220Sstevel@tonic-gate /* 23*5891Sraf * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 240Sstevel@tonic-gate * Use is subject to license terms. 250Sstevel@tonic-gate */ 260Sstevel@tonic-gate 270Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 280Sstevel@tonic-gate 290Sstevel@tonic-gate #include <sys/sdt.h> 300Sstevel@tonic-gate 310Sstevel@tonic-gate #include "lint.h" 320Sstevel@tonic-gate #include "thr_uberdata.h" 330Sstevel@tonic-gate 340Sstevel@tonic-gate /* 350Sstevel@tonic-gate * This mutex is initialized to be held by lwp#1. 360Sstevel@tonic-gate * It is used to block a thread that has returned from a mutex_lock() 374574Sraf * of a LOCK_PRIO_INHERIT mutex with an unrecoverable error. 380Sstevel@tonic-gate */ 390Sstevel@tonic-gate mutex_t stall_mutex = DEFAULTMUTEX; 400Sstevel@tonic-gate 410Sstevel@tonic-gate static int shared_mutex_held(mutex_t *); 424574Sraf static int mutex_unlock_internal(mutex_t *, int); 434574Sraf static int mutex_queuelock_adaptive(mutex_t *); 444574Sraf static void mutex_wakeup_all(mutex_t *); 450Sstevel@tonic-gate 460Sstevel@tonic-gate /* 470Sstevel@tonic-gate * Lock statistics support functions. 480Sstevel@tonic-gate */ 490Sstevel@tonic-gate void 500Sstevel@tonic-gate record_begin_hold(tdb_mutex_stats_t *msp) 510Sstevel@tonic-gate { 520Sstevel@tonic-gate tdb_incr(msp->mutex_lock); 530Sstevel@tonic-gate msp->mutex_begin_hold = gethrtime(); 540Sstevel@tonic-gate } 550Sstevel@tonic-gate 560Sstevel@tonic-gate hrtime_t 570Sstevel@tonic-gate record_hold_time(tdb_mutex_stats_t *msp) 580Sstevel@tonic-gate { 590Sstevel@tonic-gate hrtime_t now = gethrtime(); 600Sstevel@tonic-gate 610Sstevel@tonic-gate if (msp->mutex_begin_hold) 620Sstevel@tonic-gate msp->mutex_hold_time += now - msp->mutex_begin_hold; 630Sstevel@tonic-gate msp->mutex_begin_hold = 0; 640Sstevel@tonic-gate return (now); 650Sstevel@tonic-gate } 660Sstevel@tonic-gate 670Sstevel@tonic-gate /* 680Sstevel@tonic-gate * Called once at library initialization. 690Sstevel@tonic-gate */ 700Sstevel@tonic-gate void 710Sstevel@tonic-gate mutex_setup(void) 720Sstevel@tonic-gate { 730Sstevel@tonic-gate if (set_lock_byte(&stall_mutex.mutex_lockw)) 740Sstevel@tonic-gate thr_panic("mutex_setup() cannot acquire stall_mutex"); 750Sstevel@tonic-gate stall_mutex.mutex_owner = (uintptr_t)curthread; 760Sstevel@tonic-gate } 770Sstevel@tonic-gate 780Sstevel@tonic-gate /* 795629Sraf * The default spin count of 1000 is experimentally determined. 805629Sraf * On sun4u machines with any number of processors it could be raised 810Sstevel@tonic-gate * to 10,000 but that (experimentally) makes almost no difference. 825629Sraf * The environment variable: 830Sstevel@tonic-gate * _THREAD_ADAPTIVE_SPIN=count 845629Sraf * can be used to override and set the count in the range [0 .. 1,000,000]. 850Sstevel@tonic-gate */ 860Sstevel@tonic-gate int thread_adaptive_spin = 1000; 870Sstevel@tonic-gate uint_t thread_max_spinners = 100; 880Sstevel@tonic-gate int thread_queue_verify = 0; 890Sstevel@tonic-gate static int ncpus; 900Sstevel@tonic-gate 910Sstevel@tonic-gate /* 920Sstevel@tonic-gate * Distinguish spinning for queue locks from spinning for regular locks. 935629Sraf * We try harder to acquire queue locks by spinning. 940Sstevel@tonic-gate * The environment variable: 950Sstevel@tonic-gate * _THREAD_QUEUE_SPIN=count 960Sstevel@tonic-gate * can be used to override and set the count in the range [0 .. 1,000,000]. 970Sstevel@tonic-gate */ 985629Sraf int thread_queue_spin = 10000; 990Sstevel@tonic-gate 1004574Sraf #define ALL_ATTRIBUTES \ 1014574Sraf (LOCK_RECURSIVE | LOCK_ERRORCHECK | \ 1024574Sraf LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT | \ 1034574Sraf LOCK_ROBUST) 1040Sstevel@tonic-gate 1050Sstevel@tonic-gate /* 1064574Sraf * 'type' can be one of USYNC_THREAD, USYNC_PROCESS, or USYNC_PROCESS_ROBUST, 1074574Sraf * augmented by zero or more the flags: 1084574Sraf * LOCK_RECURSIVE 1094574Sraf * LOCK_ERRORCHECK 1104574Sraf * LOCK_PRIO_INHERIT 1114574Sraf * LOCK_PRIO_PROTECT 1124574Sraf * LOCK_ROBUST 1130Sstevel@tonic-gate */ 1140Sstevel@tonic-gate #pragma weak _private_mutex_init = __mutex_init 1150Sstevel@tonic-gate #pragma weak mutex_init = __mutex_init 1160Sstevel@tonic-gate #pragma weak _mutex_init = __mutex_init 1170Sstevel@tonic-gate /* ARGSUSED2 */ 1180Sstevel@tonic-gate int 1190Sstevel@tonic-gate __mutex_init(mutex_t *mp, int type, void *arg) 1200Sstevel@tonic-gate { 1214574Sraf int basetype = (type & ~ALL_ATTRIBUTES); 1224574Sraf int error = 0; 1234574Sraf 1244574Sraf if (basetype == USYNC_PROCESS_ROBUST) { 1254574Sraf /* 1264574Sraf * USYNC_PROCESS_ROBUST is a deprecated historical type. 1274574Sraf * We change it into (USYNC_PROCESS | LOCK_ROBUST) but 1284574Sraf * retain the USYNC_PROCESS_ROBUST flag so we can return 1294574Sraf * ELOCKUNMAPPED when necessary (only USYNC_PROCESS_ROBUST 1304574Sraf * mutexes will ever draw ELOCKUNMAPPED). 1314574Sraf */ 1324574Sraf type |= (USYNC_PROCESS | LOCK_ROBUST); 1334574Sraf basetype = USYNC_PROCESS; 1344574Sraf } 1354574Sraf 1364574Sraf if (!(basetype == USYNC_THREAD || basetype == USYNC_PROCESS) || 1374574Sraf (type & (LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT)) 1384574Sraf == (LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT)) { 1394574Sraf error = EINVAL; 1404574Sraf } else if (type & LOCK_ROBUST) { 1414574Sraf /* 1424574Sraf * Callers of mutex_init() with the LOCK_ROBUST attribute 1434574Sraf * are required to pass an initially all-zero mutex. 1444574Sraf * Multiple calls to mutex_init() are allowed; all but 1454574Sraf * the first return EBUSY. A call to mutex_init() is 1464574Sraf * allowed to make an inconsistent robust lock consistent 1474574Sraf * (for historical usage, even though the proper interface 1484574Sraf * for this is mutex_consistent()). Note that we use 1494574Sraf * atomic_or_16() to set the LOCK_INITED flag so as 1504574Sraf * not to disturb surrounding bits (LOCK_OWNERDEAD, etc). 1514574Sraf */ 1524574Sraf extern void _atomic_or_16(volatile uint16_t *, uint16_t); 1534574Sraf if (!(mp->mutex_flag & LOCK_INITED)) { 1544574Sraf mp->mutex_type = (uint8_t)type; 1554574Sraf _atomic_or_16(&mp->mutex_flag, LOCK_INITED); 1564574Sraf mp->mutex_magic = MUTEX_MAGIC; 1574574Sraf } else if (type != mp->mutex_type || 1584574Sraf ((type & LOCK_PRIO_PROTECT) && 1594574Sraf mp->mutex_ceiling != (*(int *)arg))) { 1604574Sraf error = EINVAL; 1614574Sraf } else if (__mutex_consistent(mp) != 0) { 1624574Sraf error = EBUSY; 1634574Sraf } 1644574Sraf /* register a process robust mutex with the kernel */ 1654574Sraf if (basetype == USYNC_PROCESS) 1664574Sraf register_lock(mp); 1674574Sraf } else { 1680Sstevel@tonic-gate (void) _memset(mp, 0, sizeof (*mp)); 1690Sstevel@tonic-gate mp->mutex_type = (uint8_t)type; 1700Sstevel@tonic-gate mp->mutex_flag = LOCK_INITED; 1714574Sraf mp->mutex_magic = MUTEX_MAGIC; 1720Sstevel@tonic-gate } 1734574Sraf 1744574Sraf if (error == 0 && (type & LOCK_PRIO_PROTECT)) 1754574Sraf mp->mutex_ceiling = (uint8_t)(*(int *)arg); 1764574Sraf 1770Sstevel@tonic-gate return (error); 1780Sstevel@tonic-gate } 1790Sstevel@tonic-gate 1800Sstevel@tonic-gate /* 1810Sstevel@tonic-gate * Delete mp from list of ceil mutexes owned by curthread. 1820Sstevel@tonic-gate * Return 1 if the head of the chain was updated. 1830Sstevel@tonic-gate */ 1840Sstevel@tonic-gate int 1850Sstevel@tonic-gate _ceil_mylist_del(mutex_t *mp) 1860Sstevel@tonic-gate { 1870Sstevel@tonic-gate ulwp_t *self = curthread; 1880Sstevel@tonic-gate mxchain_t **mcpp; 1890Sstevel@tonic-gate mxchain_t *mcp; 1900Sstevel@tonic-gate 1910Sstevel@tonic-gate mcpp = &self->ul_mxchain; 1920Sstevel@tonic-gate while ((*mcpp)->mxchain_mx != mp) 1930Sstevel@tonic-gate mcpp = &(*mcpp)->mxchain_next; 1940Sstevel@tonic-gate mcp = *mcpp; 1950Sstevel@tonic-gate *mcpp = mcp->mxchain_next; 1960Sstevel@tonic-gate lfree(mcp, sizeof (*mcp)); 1970Sstevel@tonic-gate return (mcpp == &self->ul_mxchain); 1980Sstevel@tonic-gate } 1990Sstevel@tonic-gate 2000Sstevel@tonic-gate /* 2010Sstevel@tonic-gate * Add mp to head of list of ceil mutexes owned by curthread. 2020Sstevel@tonic-gate * Return ENOMEM if no memory could be allocated. 2030Sstevel@tonic-gate */ 2040Sstevel@tonic-gate int 2050Sstevel@tonic-gate _ceil_mylist_add(mutex_t *mp) 2060Sstevel@tonic-gate { 2070Sstevel@tonic-gate ulwp_t *self = curthread; 2080Sstevel@tonic-gate mxchain_t *mcp; 2090Sstevel@tonic-gate 2100Sstevel@tonic-gate if ((mcp = lmalloc(sizeof (*mcp))) == NULL) 2110Sstevel@tonic-gate return (ENOMEM); 2120Sstevel@tonic-gate mcp->mxchain_mx = mp; 2130Sstevel@tonic-gate mcp->mxchain_next = self->ul_mxchain; 2140Sstevel@tonic-gate self->ul_mxchain = mcp; 2150Sstevel@tonic-gate return (0); 2160Sstevel@tonic-gate } 2170Sstevel@tonic-gate 2180Sstevel@tonic-gate /* 2190Sstevel@tonic-gate * Inherit priority from ceiling. The inheritance impacts the effective 2200Sstevel@tonic-gate * priority, not the assigned priority. See _thread_setschedparam_main(). 2210Sstevel@tonic-gate */ 2220Sstevel@tonic-gate void 2230Sstevel@tonic-gate _ceil_prio_inherit(int ceil) 2240Sstevel@tonic-gate { 2250Sstevel@tonic-gate ulwp_t *self = curthread; 2260Sstevel@tonic-gate struct sched_param param; 2270Sstevel@tonic-gate 2280Sstevel@tonic-gate (void) _memset(¶m, 0, sizeof (param)); 2290Sstevel@tonic-gate param.sched_priority = ceil; 2300Sstevel@tonic-gate if (_thread_setschedparam_main(self->ul_lwpid, 2310Sstevel@tonic-gate self->ul_policy, ¶m, PRIO_INHERIT)) { 2320Sstevel@tonic-gate /* 2330Sstevel@tonic-gate * Panic since unclear what error code to return. 2340Sstevel@tonic-gate * If we do return the error codes returned by above 2350Sstevel@tonic-gate * called routine, update the man page... 2360Sstevel@tonic-gate */ 2370Sstevel@tonic-gate thr_panic("_thread_setschedparam_main() fails"); 2380Sstevel@tonic-gate } 2390Sstevel@tonic-gate } 2400Sstevel@tonic-gate 2410Sstevel@tonic-gate /* 2420Sstevel@tonic-gate * Waive inherited ceiling priority. Inherit from head of owned ceiling locks 2430Sstevel@tonic-gate * if holding at least one ceiling lock. If no ceiling locks are held at this 2440Sstevel@tonic-gate * point, disinherit completely, reverting back to assigned priority. 2450Sstevel@tonic-gate */ 2460Sstevel@tonic-gate void 2470Sstevel@tonic-gate _ceil_prio_waive(void) 2480Sstevel@tonic-gate { 2490Sstevel@tonic-gate ulwp_t *self = curthread; 2500Sstevel@tonic-gate struct sched_param param; 2510Sstevel@tonic-gate 2520Sstevel@tonic-gate (void) _memset(¶m, 0, sizeof (param)); 2530Sstevel@tonic-gate if (self->ul_mxchain == NULL) { 2540Sstevel@tonic-gate /* 2550Sstevel@tonic-gate * No ceil locks held. Zero the epri, revert back to ul_pri. 2560Sstevel@tonic-gate * Since thread's hash lock is not held, one cannot just 2570Sstevel@tonic-gate * read ul_pri here...do it in the called routine... 2580Sstevel@tonic-gate */ 2590Sstevel@tonic-gate param.sched_priority = self->ul_pri; /* ignored */ 2600Sstevel@tonic-gate if (_thread_setschedparam_main(self->ul_lwpid, 2610Sstevel@tonic-gate self->ul_policy, ¶m, PRIO_DISINHERIT)) 2620Sstevel@tonic-gate thr_panic("_thread_setschedparam_main() fails"); 2630Sstevel@tonic-gate } else { 2640Sstevel@tonic-gate /* 2650Sstevel@tonic-gate * Set priority to that of the mutex at the head 2660Sstevel@tonic-gate * of the ceilmutex chain. 2670Sstevel@tonic-gate */ 2680Sstevel@tonic-gate param.sched_priority = 2690Sstevel@tonic-gate self->ul_mxchain->mxchain_mx->mutex_ceiling; 2700Sstevel@tonic-gate if (_thread_setschedparam_main(self->ul_lwpid, 2710Sstevel@tonic-gate self->ul_policy, ¶m, PRIO_INHERIT)) 2720Sstevel@tonic-gate thr_panic("_thread_setschedparam_main() fails"); 2730Sstevel@tonic-gate } 2740Sstevel@tonic-gate } 2750Sstevel@tonic-gate 2760Sstevel@tonic-gate /* 2775629Sraf * Clear the lock byte. Retain the waiters byte and the spinners byte. 2785629Sraf * Return the old value of the lock word. 2795629Sraf */ 2805629Sraf static uint32_t 2815629Sraf clear_lockbyte(volatile uint32_t *lockword) 2825629Sraf { 2835629Sraf uint32_t old; 2845629Sraf uint32_t new; 2855629Sraf 2865629Sraf do { 2875629Sraf old = *lockword; 2885629Sraf new = old & ~LOCKMASK; 2895629Sraf } while (atomic_cas_32(lockword, old, new) != old); 2905629Sraf 2915629Sraf return (old); 2925629Sraf } 2935629Sraf 2945629Sraf /* 2955629Sraf * Increment the spinners count in the mutex lock word. 2965629Sraf * Return 0 on success. Return -1 if the count would overflow. 2975629Sraf */ 2985629Sraf static int 2995629Sraf spinners_incr(volatile uint32_t *lockword, uint8_t max_spinners) 3005629Sraf { 3015629Sraf uint32_t old; 3025629Sraf uint32_t new; 3035629Sraf 3045629Sraf do { 3055629Sraf old = *lockword; 3065629Sraf if (((old & SPINNERMASK) >> SPINNERSHIFT) >= max_spinners) 3075629Sraf return (-1); 3085629Sraf new = old + (1 << SPINNERSHIFT); 3095629Sraf } while (atomic_cas_32(lockword, old, new) != old); 3105629Sraf 3115629Sraf return (0); 3125629Sraf } 3135629Sraf 3145629Sraf /* 3155629Sraf * Decrement the spinners count in the mutex lock word. 3165629Sraf * Return the new value of the lock word. 3175629Sraf */ 3185629Sraf static uint32_t 3195629Sraf spinners_decr(volatile uint32_t *lockword) 3205629Sraf { 3215629Sraf uint32_t old; 3225629Sraf uint32_t new; 3235629Sraf 3245629Sraf do { 3255629Sraf new = old = *lockword; 3265629Sraf if (new & SPINNERMASK) 3275629Sraf new -= (1 << SPINNERSHIFT); 3285629Sraf } while (atomic_cas_32(lockword, old, new) != old); 3295629Sraf 3305629Sraf return (new); 3315629Sraf } 3325629Sraf 3335629Sraf /* 3340Sstevel@tonic-gate * Non-preemptive spin locks. Used by queue_lock(). 3350Sstevel@tonic-gate * No lock statistics are gathered for these locks. 3365629Sraf * No DTrace probes are provided for these locks. 3370Sstevel@tonic-gate */ 3380Sstevel@tonic-gate void 3390Sstevel@tonic-gate spin_lock_set(mutex_t *mp) 3400Sstevel@tonic-gate { 3410Sstevel@tonic-gate ulwp_t *self = curthread; 3420Sstevel@tonic-gate 3430Sstevel@tonic-gate no_preempt(self); 3440Sstevel@tonic-gate if (set_lock_byte(&mp->mutex_lockw) == 0) { 3450Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 3460Sstevel@tonic-gate return; 3470Sstevel@tonic-gate } 3480Sstevel@tonic-gate /* 3490Sstevel@tonic-gate * Spin for a while, attempting to acquire the lock. 3500Sstevel@tonic-gate */ 3510Sstevel@tonic-gate if (self->ul_spin_lock_spin != UINT_MAX) 3520Sstevel@tonic-gate self->ul_spin_lock_spin++; 3530Sstevel@tonic-gate if (mutex_queuelock_adaptive(mp) == 0 || 3540Sstevel@tonic-gate set_lock_byte(&mp->mutex_lockw) == 0) { 3550Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 3560Sstevel@tonic-gate return; 3570Sstevel@tonic-gate } 3580Sstevel@tonic-gate /* 3590Sstevel@tonic-gate * Try harder if we were previously at a no premption level. 3600Sstevel@tonic-gate */ 3610Sstevel@tonic-gate if (self->ul_preempt > 1) { 3620Sstevel@tonic-gate if (self->ul_spin_lock_spin2 != UINT_MAX) 3630Sstevel@tonic-gate self->ul_spin_lock_spin2++; 3640Sstevel@tonic-gate if (mutex_queuelock_adaptive(mp) == 0 || 3650Sstevel@tonic-gate set_lock_byte(&mp->mutex_lockw) == 0) { 3660Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 3670Sstevel@tonic-gate return; 3680Sstevel@tonic-gate } 3690Sstevel@tonic-gate } 3700Sstevel@tonic-gate /* 3710Sstevel@tonic-gate * Give up and block in the kernel for the mutex. 3720Sstevel@tonic-gate */ 3730Sstevel@tonic-gate if (self->ul_spin_lock_sleep != UINT_MAX) 3740Sstevel@tonic-gate self->ul_spin_lock_sleep++; 3750Sstevel@tonic-gate (void) ___lwp_mutex_timedlock(mp, NULL); 3760Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 3770Sstevel@tonic-gate } 3780Sstevel@tonic-gate 3790Sstevel@tonic-gate void 3800Sstevel@tonic-gate spin_lock_clear(mutex_t *mp) 3810Sstevel@tonic-gate { 3820Sstevel@tonic-gate ulwp_t *self = curthread; 3830Sstevel@tonic-gate 3840Sstevel@tonic-gate mp->mutex_owner = 0; 3854570Sraf if (atomic_swap_32(&mp->mutex_lockword, 0) & WAITERMASK) { 3864574Sraf (void) ___lwp_mutex_wakeup(mp, 0); 3870Sstevel@tonic-gate if (self->ul_spin_lock_wakeup != UINT_MAX) 3880Sstevel@tonic-gate self->ul_spin_lock_wakeup++; 3890Sstevel@tonic-gate } 3900Sstevel@tonic-gate preempt(self); 3910Sstevel@tonic-gate } 3920Sstevel@tonic-gate 3930Sstevel@tonic-gate /* 3940Sstevel@tonic-gate * Allocate the sleep queue hash table. 3950Sstevel@tonic-gate */ 3960Sstevel@tonic-gate void 3970Sstevel@tonic-gate queue_alloc(void) 3980Sstevel@tonic-gate { 3990Sstevel@tonic-gate ulwp_t *self = curthread; 4000Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 4014574Sraf mutex_t *mp; 4020Sstevel@tonic-gate void *data; 4030Sstevel@tonic-gate int i; 4040Sstevel@tonic-gate 4050Sstevel@tonic-gate /* 4060Sstevel@tonic-gate * No locks are needed; we call here only when single-threaded. 4070Sstevel@tonic-gate */ 4080Sstevel@tonic-gate ASSERT(self == udp->ulwp_one); 4090Sstevel@tonic-gate ASSERT(!udp->uberflags.uf_mt); 4100Sstevel@tonic-gate if ((data = _private_mmap(NULL, 2 * QHASHSIZE * sizeof (queue_head_t), 4110Sstevel@tonic-gate PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, (off_t)0)) 4120Sstevel@tonic-gate == MAP_FAILED) 4130Sstevel@tonic-gate thr_panic("cannot allocate thread queue_head table"); 4140Sstevel@tonic-gate udp->queue_head = (queue_head_t *)data; 4154574Sraf for (i = 0; i < 2 * QHASHSIZE; i++) { 4164574Sraf mp = &udp->queue_head[i].qh_lock; 4174574Sraf mp->mutex_flag = LOCK_INITED; 4184574Sraf mp->mutex_magic = MUTEX_MAGIC; 4194574Sraf } 4200Sstevel@tonic-gate } 4210Sstevel@tonic-gate 4220Sstevel@tonic-gate #if defined(THREAD_DEBUG) 4230Sstevel@tonic-gate 4240Sstevel@tonic-gate /* 4250Sstevel@tonic-gate * Debugging: verify correctness of a sleep queue. 4260Sstevel@tonic-gate */ 4270Sstevel@tonic-gate void 4280Sstevel@tonic-gate QVERIFY(queue_head_t *qp) 4290Sstevel@tonic-gate { 4300Sstevel@tonic-gate ulwp_t *self = curthread; 4310Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 4320Sstevel@tonic-gate ulwp_t *ulwp; 4330Sstevel@tonic-gate ulwp_t *prev; 4340Sstevel@tonic-gate uint_t index; 4350Sstevel@tonic-gate uint32_t cnt = 0; 4360Sstevel@tonic-gate char qtype; 4370Sstevel@tonic-gate void *wchan; 4380Sstevel@tonic-gate 4390Sstevel@tonic-gate ASSERT(qp >= udp->queue_head && (qp - udp->queue_head) < 2 * QHASHSIZE); 4400Sstevel@tonic-gate ASSERT(MUTEX_OWNED(&qp->qh_lock, self)); 4410Sstevel@tonic-gate ASSERT((qp->qh_head != NULL && qp->qh_tail != NULL) || 4425629Sraf (qp->qh_head == NULL && qp->qh_tail == NULL)); 4430Sstevel@tonic-gate if (!thread_queue_verify) 4440Sstevel@tonic-gate return; 4450Sstevel@tonic-gate /* real expensive stuff, only for _THREAD_QUEUE_VERIFY */ 4460Sstevel@tonic-gate qtype = ((qp - udp->queue_head) < QHASHSIZE)? MX : CV; 4470Sstevel@tonic-gate for (prev = NULL, ulwp = qp->qh_head; ulwp != NULL; 4480Sstevel@tonic-gate prev = ulwp, ulwp = ulwp->ul_link, cnt++) { 4490Sstevel@tonic-gate ASSERT(ulwp->ul_qtype == qtype); 4500Sstevel@tonic-gate ASSERT(ulwp->ul_wchan != NULL); 4510Sstevel@tonic-gate ASSERT(ulwp->ul_sleepq == qp); 4520Sstevel@tonic-gate wchan = ulwp->ul_wchan; 4530Sstevel@tonic-gate index = QUEUE_HASH(wchan, qtype); 4540Sstevel@tonic-gate ASSERT(&udp->queue_head[index] == qp); 4550Sstevel@tonic-gate } 4560Sstevel@tonic-gate ASSERT(qp->qh_tail == prev); 4570Sstevel@tonic-gate ASSERT(qp->qh_qlen == cnt); 4580Sstevel@tonic-gate } 4590Sstevel@tonic-gate 4600Sstevel@tonic-gate #else /* THREAD_DEBUG */ 4610Sstevel@tonic-gate 4620Sstevel@tonic-gate #define QVERIFY(qp) 4630Sstevel@tonic-gate 4640Sstevel@tonic-gate #endif /* THREAD_DEBUG */ 4650Sstevel@tonic-gate 4660Sstevel@tonic-gate /* 4670Sstevel@tonic-gate * Acquire a queue head. 4680Sstevel@tonic-gate */ 4690Sstevel@tonic-gate queue_head_t * 4700Sstevel@tonic-gate queue_lock(void *wchan, int qtype) 4710Sstevel@tonic-gate { 4720Sstevel@tonic-gate uberdata_t *udp = curthread->ul_uberdata; 4730Sstevel@tonic-gate queue_head_t *qp; 4740Sstevel@tonic-gate 4750Sstevel@tonic-gate ASSERT(qtype == MX || qtype == CV); 4760Sstevel@tonic-gate 4770Sstevel@tonic-gate /* 4780Sstevel@tonic-gate * It is possible that we could be called while still single-threaded. 4790Sstevel@tonic-gate * If so, we call queue_alloc() to allocate the queue_head[] array. 4800Sstevel@tonic-gate */ 4810Sstevel@tonic-gate if ((qp = udp->queue_head) == NULL) { 4820Sstevel@tonic-gate queue_alloc(); 4830Sstevel@tonic-gate qp = udp->queue_head; 4840Sstevel@tonic-gate } 4850Sstevel@tonic-gate qp += QUEUE_HASH(wchan, qtype); 4860Sstevel@tonic-gate spin_lock_set(&qp->qh_lock); 4870Sstevel@tonic-gate /* 4880Sstevel@tonic-gate * At once per nanosecond, qh_lockcount will wrap after 512 years. 4890Sstevel@tonic-gate * Were we to care about this, we could peg the value at UINT64_MAX. 4900Sstevel@tonic-gate */ 4910Sstevel@tonic-gate qp->qh_lockcount++; 4920Sstevel@tonic-gate QVERIFY(qp); 4930Sstevel@tonic-gate return (qp); 4940Sstevel@tonic-gate } 4950Sstevel@tonic-gate 4960Sstevel@tonic-gate /* 4970Sstevel@tonic-gate * Release a queue head. 4980Sstevel@tonic-gate */ 4990Sstevel@tonic-gate void 5000Sstevel@tonic-gate queue_unlock(queue_head_t *qp) 5010Sstevel@tonic-gate { 5020Sstevel@tonic-gate QVERIFY(qp); 5030Sstevel@tonic-gate spin_lock_clear(&qp->qh_lock); 5040Sstevel@tonic-gate } 5050Sstevel@tonic-gate 5060Sstevel@tonic-gate /* 5070Sstevel@tonic-gate * For rwlock queueing, we must queue writers ahead of readers of the 5080Sstevel@tonic-gate * same priority. We do this by making writers appear to have a half 5090Sstevel@tonic-gate * point higher priority for purposes of priority comparisons below. 5100Sstevel@tonic-gate */ 5110Sstevel@tonic-gate #define CMP_PRIO(ulwp) ((real_priority(ulwp) << 1) + (ulwp)->ul_writer) 5120Sstevel@tonic-gate 5130Sstevel@tonic-gate void 5140Sstevel@tonic-gate enqueue(queue_head_t *qp, ulwp_t *ulwp, void *wchan, int qtype) 5150Sstevel@tonic-gate { 5160Sstevel@tonic-gate ulwp_t **ulwpp; 5170Sstevel@tonic-gate ulwp_t *next; 5180Sstevel@tonic-gate int pri = CMP_PRIO(ulwp); 5190Sstevel@tonic-gate int force_fifo = (qtype & FIFOQ); 5200Sstevel@tonic-gate int do_fifo; 5210Sstevel@tonic-gate 5220Sstevel@tonic-gate qtype &= ~FIFOQ; 5230Sstevel@tonic-gate ASSERT(qtype == MX || qtype == CV); 5240Sstevel@tonic-gate ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread)); 5250Sstevel@tonic-gate ASSERT(ulwp->ul_sleepq != qp); 5260Sstevel@tonic-gate 5270Sstevel@tonic-gate /* 5280Sstevel@tonic-gate * LIFO queue ordering is unfair and can lead to starvation, 5290Sstevel@tonic-gate * but it gives better performance for heavily contended locks. 5300Sstevel@tonic-gate * We use thread_queue_fifo (range is 0..8) to determine 5310Sstevel@tonic-gate * the frequency of FIFO vs LIFO queuing: 5320Sstevel@tonic-gate * 0 : every 256th time (almost always LIFO) 5330Sstevel@tonic-gate * 1 : every 128th time 5340Sstevel@tonic-gate * 2 : every 64th time 5350Sstevel@tonic-gate * 3 : every 32nd time 5360Sstevel@tonic-gate * 4 : every 16th time (the default value, mostly LIFO) 5370Sstevel@tonic-gate * 5 : every 8th time 5380Sstevel@tonic-gate * 6 : every 4th time 5390Sstevel@tonic-gate * 7 : every 2nd time 5400Sstevel@tonic-gate * 8 : every time (never LIFO, always FIFO) 5410Sstevel@tonic-gate * Note that there is always some degree of FIFO ordering. 5420Sstevel@tonic-gate * This breaks live lock conditions that occur in applications 5430Sstevel@tonic-gate * that are written assuming (incorrectly) that threads acquire 5440Sstevel@tonic-gate * locks fairly, that is, in roughly round-robin order. 5450Sstevel@tonic-gate * In any event, the queue is maintained in priority order. 5460Sstevel@tonic-gate * 5470Sstevel@tonic-gate * If we are given the FIFOQ flag in qtype, fifo queueing is forced. 5480Sstevel@tonic-gate * SUSV3 requires this for semaphores. 5490Sstevel@tonic-gate */ 5500Sstevel@tonic-gate do_fifo = (force_fifo || 5515629Sraf ((++qp->qh_qcnt << curthread->ul_queue_fifo) & 0xff) == 0); 5520Sstevel@tonic-gate 5530Sstevel@tonic-gate if (qp->qh_head == NULL) { 5540Sstevel@tonic-gate /* 5550Sstevel@tonic-gate * The queue is empty. LIFO/FIFO doesn't matter. 5560Sstevel@tonic-gate */ 5570Sstevel@tonic-gate ASSERT(qp->qh_tail == NULL); 5580Sstevel@tonic-gate ulwpp = &qp->qh_head; 5590Sstevel@tonic-gate } else if (do_fifo) { 5600Sstevel@tonic-gate /* 5610Sstevel@tonic-gate * Enqueue after the last thread whose priority is greater 5620Sstevel@tonic-gate * than or equal to the priority of the thread being queued. 5630Sstevel@tonic-gate * Attempt first to go directly onto the tail of the queue. 5640Sstevel@tonic-gate */ 5650Sstevel@tonic-gate if (pri <= CMP_PRIO(qp->qh_tail)) 5660Sstevel@tonic-gate ulwpp = &qp->qh_tail->ul_link; 5670Sstevel@tonic-gate else { 5680Sstevel@tonic-gate for (ulwpp = &qp->qh_head; (next = *ulwpp) != NULL; 5690Sstevel@tonic-gate ulwpp = &next->ul_link) 5700Sstevel@tonic-gate if (pri > CMP_PRIO(next)) 5710Sstevel@tonic-gate break; 5720Sstevel@tonic-gate } 5730Sstevel@tonic-gate } else { 5740Sstevel@tonic-gate /* 5750Sstevel@tonic-gate * Enqueue before the first thread whose priority is less 5760Sstevel@tonic-gate * than or equal to the priority of the thread being queued. 5770Sstevel@tonic-gate * Hopefully we can go directly onto the head of the queue. 5780Sstevel@tonic-gate */ 5790Sstevel@tonic-gate for (ulwpp = &qp->qh_head; (next = *ulwpp) != NULL; 5800Sstevel@tonic-gate ulwpp = &next->ul_link) 5810Sstevel@tonic-gate if (pri >= CMP_PRIO(next)) 5820Sstevel@tonic-gate break; 5830Sstevel@tonic-gate } 5840Sstevel@tonic-gate if ((ulwp->ul_link = *ulwpp) == NULL) 5850Sstevel@tonic-gate qp->qh_tail = ulwp; 5860Sstevel@tonic-gate *ulwpp = ulwp; 5870Sstevel@tonic-gate 5880Sstevel@tonic-gate ulwp->ul_sleepq = qp; 5890Sstevel@tonic-gate ulwp->ul_wchan = wchan; 5900Sstevel@tonic-gate ulwp->ul_qtype = qtype; 5910Sstevel@tonic-gate if (qp->qh_qmax < ++qp->qh_qlen) 5920Sstevel@tonic-gate qp->qh_qmax = qp->qh_qlen; 5930Sstevel@tonic-gate } 5940Sstevel@tonic-gate 5950Sstevel@tonic-gate /* 5960Sstevel@tonic-gate * Return a pointer to the queue slot of the 5970Sstevel@tonic-gate * highest priority thread on the queue. 5980Sstevel@tonic-gate * On return, prevp, if not NULL, will contain a pointer 5990Sstevel@tonic-gate * to the thread's predecessor on the queue 6000Sstevel@tonic-gate */ 6010Sstevel@tonic-gate static ulwp_t ** 6020Sstevel@tonic-gate queue_slot(queue_head_t *qp, void *wchan, int *more, ulwp_t **prevp) 6030Sstevel@tonic-gate { 6040Sstevel@tonic-gate ulwp_t **ulwpp; 6050Sstevel@tonic-gate ulwp_t *ulwp; 6060Sstevel@tonic-gate ulwp_t *prev = NULL; 6070Sstevel@tonic-gate ulwp_t **suspp = NULL; 6080Sstevel@tonic-gate ulwp_t *susprev; 6090Sstevel@tonic-gate 6100Sstevel@tonic-gate ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread)); 6110Sstevel@tonic-gate 6120Sstevel@tonic-gate /* 6130Sstevel@tonic-gate * Find a waiter on the sleep queue. 6140Sstevel@tonic-gate */ 6150Sstevel@tonic-gate for (ulwpp = &qp->qh_head; (ulwp = *ulwpp) != NULL; 6160Sstevel@tonic-gate prev = ulwp, ulwpp = &ulwp->ul_link) { 6170Sstevel@tonic-gate if (ulwp->ul_wchan == wchan) { 6180Sstevel@tonic-gate if (!ulwp->ul_stop) 6190Sstevel@tonic-gate break; 6200Sstevel@tonic-gate /* 6210Sstevel@tonic-gate * Try not to return a suspended thread. 6220Sstevel@tonic-gate * This mimics the old libthread's behavior. 6230Sstevel@tonic-gate */ 6240Sstevel@tonic-gate if (suspp == NULL) { 6250Sstevel@tonic-gate suspp = ulwpp; 6260Sstevel@tonic-gate susprev = prev; 6270Sstevel@tonic-gate } 6280Sstevel@tonic-gate } 6290Sstevel@tonic-gate } 6300Sstevel@tonic-gate 6310Sstevel@tonic-gate if (ulwp == NULL && suspp != NULL) { 6320Sstevel@tonic-gate ulwp = *(ulwpp = suspp); 6330Sstevel@tonic-gate prev = susprev; 6340Sstevel@tonic-gate suspp = NULL; 6350Sstevel@tonic-gate } 6360Sstevel@tonic-gate if (ulwp == NULL) { 6370Sstevel@tonic-gate if (more != NULL) 6380Sstevel@tonic-gate *more = 0; 6390Sstevel@tonic-gate return (NULL); 6400Sstevel@tonic-gate } 6410Sstevel@tonic-gate 6420Sstevel@tonic-gate if (prevp != NULL) 6430Sstevel@tonic-gate *prevp = prev; 6440Sstevel@tonic-gate if (more == NULL) 6450Sstevel@tonic-gate return (ulwpp); 6460Sstevel@tonic-gate 6470Sstevel@tonic-gate /* 6480Sstevel@tonic-gate * Scan the remainder of the queue for another waiter. 6490Sstevel@tonic-gate */ 6500Sstevel@tonic-gate if (suspp != NULL) { 6510Sstevel@tonic-gate *more = 1; 6520Sstevel@tonic-gate return (ulwpp); 6530Sstevel@tonic-gate } 6540Sstevel@tonic-gate for (ulwp = ulwp->ul_link; ulwp != NULL; ulwp = ulwp->ul_link) { 6550Sstevel@tonic-gate if (ulwp->ul_wchan == wchan) { 6560Sstevel@tonic-gate *more = 1; 6570Sstevel@tonic-gate return (ulwpp); 6580Sstevel@tonic-gate } 6590Sstevel@tonic-gate } 6600Sstevel@tonic-gate 6610Sstevel@tonic-gate *more = 0; 6620Sstevel@tonic-gate return (ulwpp); 6630Sstevel@tonic-gate } 6640Sstevel@tonic-gate 6650Sstevel@tonic-gate ulwp_t * 6664570Sraf queue_unlink(queue_head_t *qp, ulwp_t **ulwpp, ulwp_t *prev) 6670Sstevel@tonic-gate { 6680Sstevel@tonic-gate ulwp_t *ulwp; 6690Sstevel@tonic-gate 6700Sstevel@tonic-gate ulwp = *ulwpp; 6710Sstevel@tonic-gate *ulwpp = ulwp->ul_link; 6720Sstevel@tonic-gate ulwp->ul_link = NULL; 6730Sstevel@tonic-gate if (qp->qh_tail == ulwp) 6740Sstevel@tonic-gate qp->qh_tail = prev; 6750Sstevel@tonic-gate qp->qh_qlen--; 6760Sstevel@tonic-gate ulwp->ul_sleepq = NULL; 6770Sstevel@tonic-gate ulwp->ul_wchan = NULL; 6780Sstevel@tonic-gate 6790Sstevel@tonic-gate return (ulwp); 6800Sstevel@tonic-gate } 6810Sstevel@tonic-gate 6824570Sraf ulwp_t * 6834570Sraf dequeue(queue_head_t *qp, void *wchan, int *more) 6844570Sraf { 6854570Sraf ulwp_t **ulwpp; 6864570Sraf ulwp_t *prev; 6874570Sraf 6884570Sraf if ((ulwpp = queue_slot(qp, wchan, more, &prev)) == NULL) 6894570Sraf return (NULL); 6904570Sraf return (queue_unlink(qp, ulwpp, prev)); 6914570Sraf } 6924570Sraf 6930Sstevel@tonic-gate /* 6940Sstevel@tonic-gate * Return a pointer to the highest priority thread sleeping on wchan. 6950Sstevel@tonic-gate */ 6960Sstevel@tonic-gate ulwp_t * 6970Sstevel@tonic-gate queue_waiter(queue_head_t *qp, void *wchan) 6980Sstevel@tonic-gate { 6990Sstevel@tonic-gate ulwp_t **ulwpp; 7000Sstevel@tonic-gate 7010Sstevel@tonic-gate if ((ulwpp = queue_slot(qp, wchan, NULL, NULL)) == NULL) 7020Sstevel@tonic-gate return (NULL); 7030Sstevel@tonic-gate return (*ulwpp); 7040Sstevel@tonic-gate } 7050Sstevel@tonic-gate 7060Sstevel@tonic-gate uint8_t 7070Sstevel@tonic-gate dequeue_self(queue_head_t *qp, void *wchan) 7080Sstevel@tonic-gate { 7090Sstevel@tonic-gate ulwp_t *self = curthread; 7100Sstevel@tonic-gate ulwp_t **ulwpp; 7110Sstevel@tonic-gate ulwp_t *ulwp; 7120Sstevel@tonic-gate ulwp_t *prev = NULL; 7130Sstevel@tonic-gate int found = 0; 7140Sstevel@tonic-gate int more = 0; 7150Sstevel@tonic-gate 7160Sstevel@tonic-gate ASSERT(MUTEX_OWNED(&qp->qh_lock, self)); 7170Sstevel@tonic-gate 7180Sstevel@tonic-gate /* find self on the sleep queue */ 7190Sstevel@tonic-gate for (ulwpp = &qp->qh_head; (ulwp = *ulwpp) != NULL; 7200Sstevel@tonic-gate prev = ulwp, ulwpp = &ulwp->ul_link) { 7210Sstevel@tonic-gate if (ulwp == self) { 7220Sstevel@tonic-gate /* dequeue ourself */ 7230Sstevel@tonic-gate ASSERT(self->ul_wchan == wchan); 7244570Sraf (void) queue_unlink(qp, ulwpp, prev); 7250Sstevel@tonic-gate self->ul_cvmutex = NULL; 7260Sstevel@tonic-gate self->ul_cv_wake = 0; 7270Sstevel@tonic-gate found = 1; 7280Sstevel@tonic-gate break; 7290Sstevel@tonic-gate } 7300Sstevel@tonic-gate if (ulwp->ul_wchan == wchan) 7310Sstevel@tonic-gate more = 1; 7320Sstevel@tonic-gate } 7330Sstevel@tonic-gate 7340Sstevel@tonic-gate if (!found) 7350Sstevel@tonic-gate thr_panic("dequeue_self(): curthread not found on queue"); 7360Sstevel@tonic-gate 7370Sstevel@tonic-gate if (more) 7380Sstevel@tonic-gate return (1); 7390Sstevel@tonic-gate 7400Sstevel@tonic-gate /* scan the remainder of the queue for another waiter */ 7410Sstevel@tonic-gate for (ulwp = *ulwpp; ulwp != NULL; ulwp = ulwp->ul_link) { 7420Sstevel@tonic-gate if (ulwp->ul_wchan == wchan) 7430Sstevel@tonic-gate return (1); 7440Sstevel@tonic-gate } 7450Sstevel@tonic-gate 7460Sstevel@tonic-gate return (0); 7470Sstevel@tonic-gate } 7480Sstevel@tonic-gate 7490Sstevel@tonic-gate /* 7500Sstevel@tonic-gate * Called from call_user_handler() and _thrp_suspend() to take 7510Sstevel@tonic-gate * ourself off of our sleep queue so we can grab locks. 7520Sstevel@tonic-gate */ 7530Sstevel@tonic-gate void 7540Sstevel@tonic-gate unsleep_self(void) 7550Sstevel@tonic-gate { 7560Sstevel@tonic-gate ulwp_t *self = curthread; 7570Sstevel@tonic-gate queue_head_t *qp; 7580Sstevel@tonic-gate 7590Sstevel@tonic-gate /* 7600Sstevel@tonic-gate * Calling enter_critical()/exit_critical() here would lead 7610Sstevel@tonic-gate * to recursion. Just manipulate self->ul_critical directly. 7620Sstevel@tonic-gate */ 7630Sstevel@tonic-gate self->ul_critical++; 7640Sstevel@tonic-gate while (self->ul_sleepq != NULL) { 7650Sstevel@tonic-gate qp = queue_lock(self->ul_wchan, self->ul_qtype); 7660Sstevel@tonic-gate /* 7670Sstevel@tonic-gate * We may have been moved from a CV queue to a 7680Sstevel@tonic-gate * mutex queue while we were attempting queue_lock(). 7690Sstevel@tonic-gate * If so, just loop around and try again. 7700Sstevel@tonic-gate * dequeue_self() clears self->ul_sleepq. 7710Sstevel@tonic-gate */ 7724570Sraf if (qp == self->ul_sleepq) { 7730Sstevel@tonic-gate (void) dequeue_self(qp, self->ul_wchan); 7744570Sraf self->ul_writer = 0; 7754570Sraf } 7760Sstevel@tonic-gate queue_unlock(qp); 7770Sstevel@tonic-gate } 7780Sstevel@tonic-gate self->ul_critical--; 7790Sstevel@tonic-gate } 7800Sstevel@tonic-gate 7810Sstevel@tonic-gate /* 7820Sstevel@tonic-gate * Common code for calling the the ___lwp_mutex_timedlock() system call. 7830Sstevel@tonic-gate * Returns with mutex_owner and mutex_ownerpid set correctly. 7840Sstevel@tonic-gate */ 7854574Sraf static int 7860Sstevel@tonic-gate mutex_lock_kernel(mutex_t *mp, timespec_t *tsp, tdb_mutex_stats_t *msp) 7870Sstevel@tonic-gate { 7880Sstevel@tonic-gate ulwp_t *self = curthread; 7890Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 7904574Sraf int mtype = mp->mutex_type; 7910Sstevel@tonic-gate hrtime_t begin_sleep; 7924574Sraf int acquired; 7930Sstevel@tonic-gate int error; 7940Sstevel@tonic-gate 7950Sstevel@tonic-gate self->ul_sp = stkptr(); 7960Sstevel@tonic-gate self->ul_wchan = mp; 7970Sstevel@tonic-gate if (__td_event_report(self, TD_SLEEP, udp)) { 7980Sstevel@tonic-gate self->ul_td_evbuf.eventnum = TD_SLEEP; 7990Sstevel@tonic-gate self->ul_td_evbuf.eventdata = mp; 8000Sstevel@tonic-gate tdb_event(TD_SLEEP, udp); 8010Sstevel@tonic-gate } 8020Sstevel@tonic-gate if (msp) { 8030Sstevel@tonic-gate tdb_incr(msp->mutex_sleep); 8040Sstevel@tonic-gate begin_sleep = gethrtime(); 8050Sstevel@tonic-gate } 8060Sstevel@tonic-gate 8070Sstevel@tonic-gate DTRACE_PROBE1(plockstat, mutex__block, mp); 8080Sstevel@tonic-gate 8090Sstevel@tonic-gate for (;;) { 8104574Sraf /* 8114574Sraf * A return value of EOWNERDEAD or ELOCKUNMAPPED 8124574Sraf * means we successfully acquired the lock. 8134574Sraf */ 8144574Sraf if ((error = ___lwp_mutex_timedlock(mp, tsp)) != 0 && 8154574Sraf error != EOWNERDEAD && error != ELOCKUNMAPPED) { 8164574Sraf acquired = 0; 8170Sstevel@tonic-gate break; 8180Sstevel@tonic-gate } 8190Sstevel@tonic-gate 8204574Sraf if (mtype & USYNC_PROCESS) { 8210Sstevel@tonic-gate /* 8220Sstevel@tonic-gate * Defend against forkall(). We may be the child, 8230Sstevel@tonic-gate * in which case we don't actually own the mutex. 8240Sstevel@tonic-gate */ 8250Sstevel@tonic-gate enter_critical(self); 8260Sstevel@tonic-gate if (mp->mutex_ownerpid == udp->pid) { 8270Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 8280Sstevel@tonic-gate exit_critical(self); 8294574Sraf acquired = 1; 8300Sstevel@tonic-gate break; 8310Sstevel@tonic-gate } 8320Sstevel@tonic-gate exit_critical(self); 8330Sstevel@tonic-gate } else { 8340Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 8354574Sraf acquired = 1; 8360Sstevel@tonic-gate break; 8370Sstevel@tonic-gate } 8380Sstevel@tonic-gate } 8390Sstevel@tonic-gate if (msp) 8400Sstevel@tonic-gate msp->mutex_sleep_time += gethrtime() - begin_sleep; 8410Sstevel@tonic-gate self->ul_wchan = NULL; 8420Sstevel@tonic-gate self->ul_sp = 0; 8430Sstevel@tonic-gate 8444574Sraf if (acquired) { 8454574Sraf DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1); 8464574Sraf DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 8474574Sraf } else { 8484574Sraf DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0); 8494574Sraf DTRACE_PROBE2(plockstat, mutex__error, mp, error); 8504574Sraf } 8514574Sraf 8520Sstevel@tonic-gate return (error); 8530Sstevel@tonic-gate } 8540Sstevel@tonic-gate 8550Sstevel@tonic-gate /* 8560Sstevel@tonic-gate * Common code for calling the ___lwp_mutex_trylock() system call. 8570Sstevel@tonic-gate * Returns with mutex_owner and mutex_ownerpid set correctly. 8580Sstevel@tonic-gate */ 8590Sstevel@tonic-gate int 8600Sstevel@tonic-gate mutex_trylock_kernel(mutex_t *mp) 8610Sstevel@tonic-gate { 8620Sstevel@tonic-gate ulwp_t *self = curthread; 8630Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 8644574Sraf int mtype = mp->mutex_type; 8650Sstevel@tonic-gate int error; 8664574Sraf int acquired; 8670Sstevel@tonic-gate 8680Sstevel@tonic-gate for (;;) { 8694574Sraf /* 8704574Sraf * A return value of EOWNERDEAD or ELOCKUNMAPPED 8714574Sraf * means we successfully acquired the lock. 8724574Sraf */ 8734574Sraf if ((error = ___lwp_mutex_trylock(mp)) != 0 && 8744574Sraf error != EOWNERDEAD && error != ELOCKUNMAPPED) { 8754574Sraf acquired = 0; 8760Sstevel@tonic-gate break; 8770Sstevel@tonic-gate } 8780Sstevel@tonic-gate 8794574Sraf if (mtype & USYNC_PROCESS) { 8800Sstevel@tonic-gate /* 8810Sstevel@tonic-gate * Defend against forkall(). We may be the child, 8820Sstevel@tonic-gate * in which case we don't actually own the mutex. 8830Sstevel@tonic-gate */ 8840Sstevel@tonic-gate enter_critical(self); 8850Sstevel@tonic-gate if (mp->mutex_ownerpid == udp->pid) { 8860Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 8870Sstevel@tonic-gate exit_critical(self); 8884574Sraf acquired = 1; 8890Sstevel@tonic-gate break; 8900Sstevel@tonic-gate } 8910Sstevel@tonic-gate exit_critical(self); 8920Sstevel@tonic-gate } else { 8930Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 8944574Sraf acquired = 1; 8950Sstevel@tonic-gate break; 8960Sstevel@tonic-gate } 8970Sstevel@tonic-gate } 8980Sstevel@tonic-gate 8994574Sraf if (acquired) { 9004574Sraf DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 9014574Sraf } else if (error != EBUSY) { 9024574Sraf DTRACE_PROBE2(plockstat, mutex__error, mp, error); 9034574Sraf } 9044574Sraf 9050Sstevel@tonic-gate return (error); 9060Sstevel@tonic-gate } 9070Sstevel@tonic-gate 9080Sstevel@tonic-gate volatile sc_shared_t * 9090Sstevel@tonic-gate setup_schedctl(void) 9100Sstevel@tonic-gate { 9110Sstevel@tonic-gate ulwp_t *self = curthread; 9120Sstevel@tonic-gate volatile sc_shared_t *scp; 9130Sstevel@tonic-gate sc_shared_t *tmp; 9140Sstevel@tonic-gate 9150Sstevel@tonic-gate if ((scp = self->ul_schedctl) == NULL && /* no shared state yet */ 9160Sstevel@tonic-gate !self->ul_vfork && /* not a child of vfork() */ 9170Sstevel@tonic-gate !self->ul_schedctl_called) { /* haven't been called before */ 9180Sstevel@tonic-gate enter_critical(self); 9190Sstevel@tonic-gate self->ul_schedctl_called = &self->ul_uberdata->uberflags; 9200Sstevel@tonic-gate if ((tmp = __schedctl()) != (sc_shared_t *)(-1)) 9210Sstevel@tonic-gate self->ul_schedctl = scp = tmp; 9220Sstevel@tonic-gate exit_critical(self); 9230Sstevel@tonic-gate } 9240Sstevel@tonic-gate /* 9250Sstevel@tonic-gate * Unless the call to setup_schedctl() is surrounded 9260Sstevel@tonic-gate * by enter_critical()/exit_critical(), the address 9270Sstevel@tonic-gate * we are returning could be invalid due to a forkall() 9280Sstevel@tonic-gate * having occurred in another thread. 9290Sstevel@tonic-gate */ 9300Sstevel@tonic-gate return (scp); 9310Sstevel@tonic-gate } 9320Sstevel@tonic-gate 9330Sstevel@tonic-gate /* 9340Sstevel@tonic-gate * Interfaces from libsched, incorporated into libc. 9350Sstevel@tonic-gate * libsched.so.1 is now a filter library onto libc. 9360Sstevel@tonic-gate */ 9370Sstevel@tonic-gate #pragma weak schedctl_lookup = _schedctl_init 9380Sstevel@tonic-gate #pragma weak _schedctl_lookup = _schedctl_init 9390Sstevel@tonic-gate #pragma weak schedctl_init = _schedctl_init 9400Sstevel@tonic-gate schedctl_t * 9410Sstevel@tonic-gate _schedctl_init(void) 9420Sstevel@tonic-gate { 9430Sstevel@tonic-gate volatile sc_shared_t *scp = setup_schedctl(); 9440Sstevel@tonic-gate return ((scp == NULL)? NULL : (schedctl_t *)&scp->sc_preemptctl); 9450Sstevel@tonic-gate } 9460Sstevel@tonic-gate 9470Sstevel@tonic-gate #pragma weak schedctl_exit = _schedctl_exit 9480Sstevel@tonic-gate void 9490Sstevel@tonic-gate _schedctl_exit(void) 9500Sstevel@tonic-gate { 9510Sstevel@tonic-gate } 9520Sstevel@tonic-gate 9530Sstevel@tonic-gate /* 9540Sstevel@tonic-gate * Contract private interface for java. 9550Sstevel@tonic-gate * Set up the schedctl data if it doesn't exist yet. 9560Sstevel@tonic-gate * Return a pointer to the pointer to the schedctl data. 9570Sstevel@tonic-gate */ 9580Sstevel@tonic-gate volatile sc_shared_t *volatile * 9590Sstevel@tonic-gate _thr_schedctl(void) 9600Sstevel@tonic-gate { 9610Sstevel@tonic-gate ulwp_t *self = curthread; 9620Sstevel@tonic-gate volatile sc_shared_t *volatile *ptr; 9630Sstevel@tonic-gate 9640Sstevel@tonic-gate if (self->ul_vfork) 9650Sstevel@tonic-gate return (NULL); 9660Sstevel@tonic-gate if (*(ptr = &self->ul_schedctl) == NULL) 9670Sstevel@tonic-gate (void) setup_schedctl(); 9680Sstevel@tonic-gate return (ptr); 9690Sstevel@tonic-gate } 9700Sstevel@tonic-gate 9710Sstevel@tonic-gate /* 9720Sstevel@tonic-gate * Block signals and attempt to block preemption. 9730Sstevel@tonic-gate * no_preempt()/preempt() must be used in pairs but can be nested. 9740Sstevel@tonic-gate */ 9750Sstevel@tonic-gate void 9760Sstevel@tonic-gate no_preempt(ulwp_t *self) 9770Sstevel@tonic-gate { 9780Sstevel@tonic-gate volatile sc_shared_t *scp; 9790Sstevel@tonic-gate 9800Sstevel@tonic-gate if (self->ul_preempt++ == 0) { 9810Sstevel@tonic-gate enter_critical(self); 9820Sstevel@tonic-gate if ((scp = self->ul_schedctl) != NULL || 9830Sstevel@tonic-gate (scp = setup_schedctl()) != NULL) { 9840Sstevel@tonic-gate /* 9850Sstevel@tonic-gate * Save the pre-existing preempt value. 9860Sstevel@tonic-gate */ 9870Sstevel@tonic-gate self->ul_savpreempt = scp->sc_preemptctl.sc_nopreempt; 9880Sstevel@tonic-gate scp->sc_preemptctl.sc_nopreempt = 1; 9890Sstevel@tonic-gate } 9900Sstevel@tonic-gate } 9910Sstevel@tonic-gate } 9920Sstevel@tonic-gate 9930Sstevel@tonic-gate /* 9940Sstevel@tonic-gate * Undo the effects of no_preempt(). 9950Sstevel@tonic-gate */ 9960Sstevel@tonic-gate void 9970Sstevel@tonic-gate preempt(ulwp_t *self) 9980Sstevel@tonic-gate { 9990Sstevel@tonic-gate volatile sc_shared_t *scp; 10000Sstevel@tonic-gate 10010Sstevel@tonic-gate ASSERT(self->ul_preempt > 0); 10020Sstevel@tonic-gate if (--self->ul_preempt == 0) { 10030Sstevel@tonic-gate if ((scp = self->ul_schedctl) != NULL) { 10040Sstevel@tonic-gate /* 10050Sstevel@tonic-gate * Restore the pre-existing preempt value. 10060Sstevel@tonic-gate */ 10070Sstevel@tonic-gate scp->sc_preemptctl.sc_nopreempt = self->ul_savpreempt; 10080Sstevel@tonic-gate if (scp->sc_preemptctl.sc_yield && 10090Sstevel@tonic-gate scp->sc_preemptctl.sc_nopreempt == 0) { 10100Sstevel@tonic-gate lwp_yield(); 10110Sstevel@tonic-gate if (scp->sc_preemptctl.sc_yield) { 10120Sstevel@tonic-gate /* 10130Sstevel@tonic-gate * Shouldn't happen. This is either 10140Sstevel@tonic-gate * a race condition or the thread 10150Sstevel@tonic-gate * just entered the real-time class. 10160Sstevel@tonic-gate */ 10170Sstevel@tonic-gate lwp_yield(); 10180Sstevel@tonic-gate scp->sc_preemptctl.sc_yield = 0; 10190Sstevel@tonic-gate } 10200Sstevel@tonic-gate } 10210Sstevel@tonic-gate } 10220Sstevel@tonic-gate exit_critical(self); 10230Sstevel@tonic-gate } 10240Sstevel@tonic-gate } 10250Sstevel@tonic-gate 10260Sstevel@tonic-gate /* 10270Sstevel@tonic-gate * If a call to preempt() would cause the current thread to yield or to 10280Sstevel@tonic-gate * take deferred actions in exit_critical(), then unpark the specified 10290Sstevel@tonic-gate * lwp so it can run while we delay. Return the original lwpid if the 10300Sstevel@tonic-gate * unpark was not performed, else return zero. The tests are a repeat 10310Sstevel@tonic-gate * of some of the tests in preempt(), above. This is a statistical 10320Sstevel@tonic-gate * optimization solely for cond_sleep_queue(), below. 10330Sstevel@tonic-gate */ 10340Sstevel@tonic-gate static lwpid_t 10350Sstevel@tonic-gate preempt_unpark(ulwp_t *self, lwpid_t lwpid) 10360Sstevel@tonic-gate { 10370Sstevel@tonic-gate volatile sc_shared_t *scp = self->ul_schedctl; 10380Sstevel@tonic-gate 10390Sstevel@tonic-gate ASSERT(self->ul_preempt == 1 && self->ul_critical > 0); 10400Sstevel@tonic-gate if ((scp != NULL && scp->sc_preemptctl.sc_yield) || 10410Sstevel@tonic-gate (self->ul_curplease && self->ul_critical == 1)) { 10420Sstevel@tonic-gate (void) __lwp_unpark(lwpid); 10430Sstevel@tonic-gate lwpid = 0; 10440Sstevel@tonic-gate } 10450Sstevel@tonic-gate return (lwpid); 10460Sstevel@tonic-gate } 10470Sstevel@tonic-gate 10480Sstevel@tonic-gate /* 10494613Sraf * Spin for a while (if 'tryhard' is true), trying to grab the lock. 10500Sstevel@tonic-gate * If this fails, return EBUSY and let the caller deal with it. 10510Sstevel@tonic-gate * If this succeeds, return 0 with mutex_owner set to curthread. 10520Sstevel@tonic-gate */ 10534574Sraf static int 10544613Sraf mutex_trylock_adaptive(mutex_t *mp, int tryhard) 10550Sstevel@tonic-gate { 10560Sstevel@tonic-gate ulwp_t *self = curthread; 10574574Sraf int error = EBUSY; 10580Sstevel@tonic-gate ulwp_t *ulwp; 10590Sstevel@tonic-gate volatile sc_shared_t *scp; 10605629Sraf volatile uint8_t *lockp = (volatile uint8_t *)&mp->mutex_lockw; 10615629Sraf volatile uint64_t *ownerp = (volatile uint64_t *)&mp->mutex_owner; 10625629Sraf uint32_t new_lockword; 10635629Sraf int count = 0; 10645629Sraf int max_count; 10655629Sraf uint8_t max_spinners; 10664574Sraf 10674574Sraf ASSERT(!(mp->mutex_type & USYNC_PROCESS)); 10684574Sraf 10694574Sraf if (MUTEX_OWNER(mp) == self) 10700Sstevel@tonic-gate return (EBUSY); 10710Sstevel@tonic-gate 10724574Sraf /* short-cut, not definitive (see below) */ 10734574Sraf if (mp->mutex_flag & LOCK_NOTRECOVERABLE) { 10744574Sraf ASSERT(mp->mutex_type & LOCK_ROBUST); 10755629Sraf error = ENOTRECOVERABLE; 10765629Sraf goto done; 10774574Sraf } 10784574Sraf 10795629Sraf /* 10805629Sraf * Make one attempt to acquire the lock before 10815629Sraf * incurring the overhead of the spin loop. 10825629Sraf */ 10835629Sraf if (set_lock_byte(lockp) == 0) { 10845629Sraf *ownerp = (uintptr_t)self; 10855629Sraf error = 0; 10865629Sraf goto done; 10875629Sraf } 10885629Sraf if (!tryhard) 10895629Sraf goto done; 10905629Sraf if (ncpus == 0) 10915629Sraf ncpus = (int)_sysconf(_SC_NPROCESSORS_ONLN); 10925629Sraf if ((max_spinners = self->ul_max_spinners) >= ncpus) 10935629Sraf max_spinners = ncpus - 1; 10945629Sraf max_count = (max_spinners != 0)? self->ul_adaptive_spin : 0; 10955629Sraf if (max_count == 0) 10965629Sraf goto done; 10975629Sraf 10980Sstevel@tonic-gate /* 10990Sstevel@tonic-gate * This spin loop is unfair to lwps that have already dropped into 11000Sstevel@tonic-gate * the kernel to sleep. They will starve on a highly-contended mutex. 11010Sstevel@tonic-gate * This is just too bad. The adaptive spin algorithm is intended 11020Sstevel@tonic-gate * to allow programs with highly-contended locks (that is, broken 11030Sstevel@tonic-gate * programs) to execute with reasonable speed despite their contention. 11040Sstevel@tonic-gate * Being fair would reduce the speed of such programs and well-written 11050Sstevel@tonic-gate * programs will not suffer in any case. 11060Sstevel@tonic-gate */ 11075629Sraf enter_critical(self); 11085629Sraf if (spinners_incr(&mp->mutex_lockword, max_spinners) == -1) { 11095629Sraf exit_critical(self); 11105629Sraf goto done; 11115629Sraf } 11125629Sraf DTRACE_PROBE1(plockstat, mutex__spin, mp); 11135629Sraf for (count = 1; ; count++) { 11140Sstevel@tonic-gate if (*lockp == 0 && set_lock_byte(lockp) == 0) { 11150Sstevel@tonic-gate *ownerp = (uintptr_t)self; 11164574Sraf error = 0; 11174574Sraf break; 11180Sstevel@tonic-gate } 11195629Sraf if (count == max_count) 11205629Sraf break; 11210Sstevel@tonic-gate SMT_PAUSE(); 11220Sstevel@tonic-gate /* 11230Sstevel@tonic-gate * Stop spinning if the mutex owner is not running on 11240Sstevel@tonic-gate * a processor; it will not drop the lock any time soon 11250Sstevel@tonic-gate * and we would just be wasting time to keep spinning. 11260Sstevel@tonic-gate * 11270Sstevel@tonic-gate * Note that we are looking at another thread (ulwp_t) 11280Sstevel@tonic-gate * without ensuring that the other thread does not exit. 11290Sstevel@tonic-gate * The scheme relies on ulwp_t structures never being 11300Sstevel@tonic-gate * deallocated by the library (the library employs a free 11310Sstevel@tonic-gate * list of ulwp_t structs that are reused when new threads 11320Sstevel@tonic-gate * are created) and on schedctl shared memory never being 11330Sstevel@tonic-gate * deallocated once created via __schedctl(). 11340Sstevel@tonic-gate * 11350Sstevel@tonic-gate * Thus, the worst that can happen when the spinning thread 11360Sstevel@tonic-gate * looks at the owner's schedctl data is that it is looking 11370Sstevel@tonic-gate * at some other thread's schedctl data. This almost never 11380Sstevel@tonic-gate * happens and is benign when it does. 11390Sstevel@tonic-gate */ 11400Sstevel@tonic-gate if ((ulwp = (ulwp_t *)(uintptr_t)*ownerp) != NULL && 11410Sstevel@tonic-gate ((scp = ulwp->ul_schedctl) == NULL || 11420Sstevel@tonic-gate scp->sc_state != SC_ONPROC)) 11430Sstevel@tonic-gate break; 11440Sstevel@tonic-gate } 11455629Sraf new_lockword = spinners_decr(&mp->mutex_lockword); 11465629Sraf if (error && (new_lockword & (LOCKMASK | SPINNERMASK)) == 0) { 11475629Sraf /* 11485629Sraf * We haven't yet acquired the lock, the lock 11495629Sraf * is free, and there are no other spinners. 11505629Sraf * Make one final attempt to acquire the lock. 11515629Sraf * 11525629Sraf * This isn't strictly necessary since mutex_lock_queue() 11535629Sraf * (the next action this thread will take if it doesn't 11545629Sraf * acquire the lock here) makes one attempt to acquire 11555629Sraf * the lock before putting the thread to sleep. 11565629Sraf * 11575629Sraf * If the next action for this thread (on failure here) 11585629Sraf * were not to call mutex_lock_queue(), this would be 11595629Sraf * necessary for correctness, to avoid ending up with an 11605629Sraf * unheld mutex with waiters but no one to wake them up. 11615629Sraf */ 11625629Sraf if (set_lock_byte(lockp) == 0) { 11635629Sraf *ownerp = (uintptr_t)self; 11645629Sraf error = 0; 11655629Sraf } 11665629Sraf count++; 11675629Sraf } 11680Sstevel@tonic-gate exit_critical(self); 11690Sstevel@tonic-gate 11705629Sraf done: 11714574Sraf if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) { 11724574Sraf ASSERT(mp->mutex_type & LOCK_ROBUST); 11734574Sraf /* 11744574Sraf * We shouldn't own the mutex; clear the lock. 11754574Sraf */ 11764574Sraf mp->mutex_owner = 0; 11775629Sraf if (clear_lockbyte(&mp->mutex_lockword) & WAITERMASK) 11784574Sraf mutex_wakeup_all(mp); 11794574Sraf error = ENOTRECOVERABLE; 11804574Sraf } 11814574Sraf 11824574Sraf if (error) { 11835629Sraf if (count) { 11845629Sraf DTRACE_PROBE2(plockstat, mutex__spun, 0, count); 11855629Sraf } 11864574Sraf if (error != EBUSY) { 11874574Sraf DTRACE_PROBE2(plockstat, mutex__error, mp, error); 11884574Sraf } 11894574Sraf } else { 11905629Sraf if (count) { 11915629Sraf DTRACE_PROBE2(plockstat, mutex__spun, 1, count); 11925629Sraf } 11934574Sraf DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count); 11944574Sraf if (mp->mutex_flag & LOCK_OWNERDEAD) { 11954574Sraf ASSERT(mp->mutex_type & LOCK_ROBUST); 11964574Sraf error = EOWNERDEAD; 11974574Sraf } 11984574Sraf } 11994574Sraf 12004574Sraf return (error); 12010Sstevel@tonic-gate } 12020Sstevel@tonic-gate 12030Sstevel@tonic-gate /* 12040Sstevel@tonic-gate * Same as mutex_trylock_adaptive(), except specifically for queue locks. 12050Sstevel@tonic-gate * The owner field is not set here; the caller (spin_lock_set()) sets it. 12060Sstevel@tonic-gate */ 12074574Sraf static int 12080Sstevel@tonic-gate mutex_queuelock_adaptive(mutex_t *mp) 12090Sstevel@tonic-gate { 12100Sstevel@tonic-gate ulwp_t *ulwp; 12110Sstevel@tonic-gate volatile sc_shared_t *scp; 12120Sstevel@tonic-gate volatile uint8_t *lockp; 12130Sstevel@tonic-gate volatile uint64_t *ownerp; 12140Sstevel@tonic-gate int count = curthread->ul_queue_spin; 12150Sstevel@tonic-gate 12160Sstevel@tonic-gate ASSERT(mp->mutex_type == USYNC_THREAD); 12170Sstevel@tonic-gate 12180Sstevel@tonic-gate if (count == 0) 12190Sstevel@tonic-gate return (EBUSY); 12200Sstevel@tonic-gate 12210Sstevel@tonic-gate lockp = (volatile uint8_t *)&mp->mutex_lockw; 12220Sstevel@tonic-gate ownerp = (volatile uint64_t *)&mp->mutex_owner; 12230Sstevel@tonic-gate while (--count >= 0) { 12240Sstevel@tonic-gate if (*lockp == 0 && set_lock_byte(lockp) == 0) 12250Sstevel@tonic-gate return (0); 12260Sstevel@tonic-gate SMT_PAUSE(); 12270Sstevel@tonic-gate if ((ulwp = (ulwp_t *)(uintptr_t)*ownerp) != NULL && 12280Sstevel@tonic-gate ((scp = ulwp->ul_schedctl) == NULL || 12290Sstevel@tonic-gate scp->sc_state != SC_ONPROC)) 12300Sstevel@tonic-gate break; 12310Sstevel@tonic-gate } 12320Sstevel@tonic-gate 12330Sstevel@tonic-gate return (EBUSY); 12340Sstevel@tonic-gate } 12350Sstevel@tonic-gate 12360Sstevel@tonic-gate /* 12370Sstevel@tonic-gate * Like mutex_trylock_adaptive(), but for process-shared mutexes. 12384613Sraf * Spin for a while (if 'tryhard' is true), trying to grab the lock. 12390Sstevel@tonic-gate * If this fails, return EBUSY and let the caller deal with it. 12400Sstevel@tonic-gate * If this succeeds, return 0 with mutex_owner set to curthread 12410Sstevel@tonic-gate * and mutex_ownerpid set to the current pid. 12420Sstevel@tonic-gate */ 12434574Sraf static int 12444613Sraf mutex_trylock_process(mutex_t *mp, int tryhard) 12450Sstevel@tonic-gate { 12460Sstevel@tonic-gate ulwp_t *self = curthread; 12475629Sraf uberdata_t *udp = self->ul_uberdata; 12484574Sraf int error = EBUSY; 12495629Sraf volatile uint8_t *lockp = (volatile uint8_t *)&mp->mutex_lockw; 12505629Sraf uint32_t new_lockword; 12515629Sraf int count = 0; 12525629Sraf int max_count; 12535629Sraf uint8_t max_spinners; 12544574Sraf 12554574Sraf ASSERT(mp->mutex_type & USYNC_PROCESS); 12564574Sraf 12574574Sraf if (shared_mutex_held(mp)) 12580Sstevel@tonic-gate return (EBUSY); 12590Sstevel@tonic-gate 12604574Sraf /* short-cut, not definitive (see below) */ 12614574Sraf if (mp->mutex_flag & LOCK_NOTRECOVERABLE) { 12624574Sraf ASSERT(mp->mutex_type & LOCK_ROBUST); 12635629Sraf error = ENOTRECOVERABLE; 12645629Sraf goto done; 12654574Sraf } 12664574Sraf 12675629Sraf /* 12685629Sraf * Make one attempt to acquire the lock before 12695629Sraf * incurring the overhead of the spin loop. 12705629Sraf */ 12715629Sraf enter_critical(self); 12725629Sraf if (set_lock_byte(lockp) == 0) { 12735629Sraf mp->mutex_owner = (uintptr_t)self; 12745629Sraf mp->mutex_ownerpid = udp->pid; 12755629Sraf exit_critical(self); 12765629Sraf error = 0; 12775629Sraf goto done; 12785629Sraf } 12795629Sraf exit_critical(self); 12805629Sraf if (!tryhard) 12815629Sraf goto done; 12824574Sraf if (ncpus == 0) 12834574Sraf ncpus = (int)_sysconf(_SC_NPROCESSORS_ONLN); 12845629Sraf if ((max_spinners = self->ul_max_spinners) >= ncpus) 12855629Sraf max_spinners = ncpus - 1; 12865629Sraf max_count = (max_spinners != 0)? self->ul_adaptive_spin : 0; 12875629Sraf if (max_count == 0) 12885629Sraf goto done; 12895629Sraf 12900Sstevel@tonic-gate /* 12910Sstevel@tonic-gate * This is a process-shared mutex. 12920Sstevel@tonic-gate * We cannot know if the owner is running on a processor. 12930Sstevel@tonic-gate * We just spin and hope that it is on a processor. 12940Sstevel@tonic-gate */ 12954574Sraf enter_critical(self); 12965629Sraf if (spinners_incr(&mp->mutex_lockword, max_spinners) == -1) { 12975629Sraf exit_critical(self); 12985629Sraf goto done; 12995629Sraf } 13005629Sraf DTRACE_PROBE1(plockstat, mutex__spin, mp); 13015629Sraf for (count = 1; ; count++) { 13024574Sraf if (*lockp == 0 && set_lock_byte(lockp) == 0) { 13034574Sraf mp->mutex_owner = (uintptr_t)self; 13045629Sraf mp->mutex_ownerpid = udp->pid; 13054574Sraf error = 0; 13064574Sraf break; 13074574Sraf } 13085629Sraf if (count == max_count) 13095629Sraf break; 13104574Sraf SMT_PAUSE(); 13114574Sraf } 13125629Sraf new_lockword = spinners_decr(&mp->mutex_lockword); 13135629Sraf if (error && (new_lockword & (LOCKMASK | SPINNERMASK)) == 0) { 13145629Sraf /* 13155629Sraf * We haven't yet acquired the lock, the lock 13165629Sraf * is free, and there are no other spinners. 13175629Sraf * Make one final attempt to acquire the lock. 13185629Sraf * 13195629Sraf * This isn't strictly necessary since mutex_lock_kernel() 13205629Sraf * (the next action this thread will take if it doesn't 13215629Sraf * acquire the lock here) makes one attempt to acquire 13225629Sraf * the lock before putting the thread to sleep. 13235629Sraf * 13245629Sraf * If the next action for this thread (on failure here) 13255629Sraf * were not to call mutex_lock_kernel(), this would be 13265629Sraf * necessary for correctness, to avoid ending up with an 13275629Sraf * unheld mutex with waiters but no one to wake them up. 13285629Sraf */ 13295629Sraf if (set_lock_byte(lockp) == 0) { 13305629Sraf mp->mutex_owner = (uintptr_t)self; 13315629Sraf mp->mutex_ownerpid = udp->pid; 13325629Sraf error = 0; 13335629Sraf } 13345629Sraf count++; 13355629Sraf } 13364574Sraf exit_critical(self); 13374574Sraf 13385629Sraf done: 13394574Sraf if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) { 13404574Sraf ASSERT(mp->mutex_type & LOCK_ROBUST); 13414574Sraf /* 13424574Sraf * We shouldn't own the mutex; clear the lock. 13434574Sraf */ 13444574Sraf mp->mutex_owner = 0; 13454574Sraf mp->mutex_ownerpid = 0; 13465629Sraf if (clear_lockbyte(&mp->mutex_lockword) & WAITERMASK) { 13474574Sraf no_preempt(self); 13484574Sraf (void) ___lwp_mutex_wakeup(mp, 1); 13494574Sraf preempt(self); 13500Sstevel@tonic-gate } 13514574Sraf error = ENOTRECOVERABLE; 13520Sstevel@tonic-gate } 13530Sstevel@tonic-gate 13544574Sraf if (error) { 13555629Sraf if (count) { 13565629Sraf DTRACE_PROBE2(plockstat, mutex__spun, 0, count); 13575629Sraf } 13584574Sraf if (error != EBUSY) { 13594574Sraf DTRACE_PROBE2(plockstat, mutex__error, mp, error); 13604574Sraf } 13614574Sraf } else { 13625629Sraf if (count) { 13635629Sraf DTRACE_PROBE2(plockstat, mutex__spun, 1, count); 13645629Sraf } 13654574Sraf DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count); 13664574Sraf if (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 13674574Sraf ASSERT(mp->mutex_type & LOCK_ROBUST); 13684574Sraf if (mp->mutex_flag & LOCK_OWNERDEAD) 13694574Sraf error = EOWNERDEAD; 13704574Sraf else if (mp->mutex_type & USYNC_PROCESS_ROBUST) 13714574Sraf error = ELOCKUNMAPPED; 13724574Sraf else 13734574Sraf error = EOWNERDEAD; 13744574Sraf } 13754574Sraf } 13764574Sraf 13774574Sraf return (error); 13780Sstevel@tonic-gate } 13790Sstevel@tonic-gate 13800Sstevel@tonic-gate /* 13810Sstevel@tonic-gate * Mutex wakeup code for releasing a USYNC_THREAD mutex. 13820Sstevel@tonic-gate * Returns the lwpid of the thread that was dequeued, if any. 13830Sstevel@tonic-gate * The caller of mutex_wakeup() must call __lwp_unpark(lwpid) 13840Sstevel@tonic-gate * to wake up the specified lwp. 13850Sstevel@tonic-gate */ 13864574Sraf static lwpid_t 13870Sstevel@tonic-gate mutex_wakeup(mutex_t *mp) 13880Sstevel@tonic-gate { 13890Sstevel@tonic-gate lwpid_t lwpid = 0; 13900Sstevel@tonic-gate queue_head_t *qp; 13910Sstevel@tonic-gate ulwp_t *ulwp; 13920Sstevel@tonic-gate int more; 13930Sstevel@tonic-gate 13940Sstevel@tonic-gate /* 13950Sstevel@tonic-gate * Dequeue a waiter from the sleep queue. Don't touch the mutex 13960Sstevel@tonic-gate * waiters bit if no one was found on the queue because the mutex 13970Sstevel@tonic-gate * might have been deallocated or reallocated for another purpose. 13980Sstevel@tonic-gate */ 13990Sstevel@tonic-gate qp = queue_lock(mp, MX); 14000Sstevel@tonic-gate if ((ulwp = dequeue(qp, mp, &more)) != NULL) { 14010Sstevel@tonic-gate lwpid = ulwp->ul_lwpid; 14020Sstevel@tonic-gate mp->mutex_waiters = (more? 1 : 0); 14030Sstevel@tonic-gate } 14040Sstevel@tonic-gate queue_unlock(qp); 14050Sstevel@tonic-gate return (lwpid); 14060Sstevel@tonic-gate } 14070Sstevel@tonic-gate 14080Sstevel@tonic-gate /* 14094574Sraf * Mutex wakeup code for releasing all waiters on a USYNC_THREAD mutex. 14104574Sraf */ 14114574Sraf static void 14124574Sraf mutex_wakeup_all(mutex_t *mp) 14134574Sraf { 14144574Sraf queue_head_t *qp; 14154574Sraf int nlwpid = 0; 14164574Sraf int maxlwps = MAXLWPS; 14174574Sraf ulwp_t **ulwpp; 14184574Sraf ulwp_t *ulwp; 14194574Sraf ulwp_t *prev = NULL; 14204574Sraf lwpid_t buffer[MAXLWPS]; 14214574Sraf lwpid_t *lwpid = buffer; 14224574Sraf 14234574Sraf /* 14244574Sraf * Walk the list of waiters and prepare to wake up all of them. 14254574Sraf * The waiters flag has already been cleared from the mutex. 14264574Sraf * 14274574Sraf * We keep track of lwpids that are to be unparked in lwpid[]. 14284574Sraf * __lwp_unpark_all() is called to unpark all of them after 14294574Sraf * they have been removed from the sleep queue and the sleep 14304574Sraf * queue lock has been dropped. If we run out of space in our 14314574Sraf * on-stack buffer, we need to allocate more but we can't call 14324574Sraf * lmalloc() because we are holding a queue lock when the overflow 14334574Sraf * occurs and lmalloc() acquires a lock. We can't use alloca() 14344574Sraf * either because the application may have allocated a small 14354574Sraf * stack and we don't want to overrun the stack. So we call 14364574Sraf * alloc_lwpids() to allocate a bigger buffer using the mmap() 14374574Sraf * system call directly since that path acquires no locks. 14384574Sraf */ 14394574Sraf qp = queue_lock(mp, MX); 14404574Sraf ulwpp = &qp->qh_head; 14414574Sraf while ((ulwp = *ulwpp) != NULL) { 14424574Sraf if (ulwp->ul_wchan != mp) { 14434574Sraf prev = ulwp; 14444574Sraf ulwpp = &ulwp->ul_link; 14454574Sraf } else { 14464574Sraf if (nlwpid == maxlwps) 14474574Sraf lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps); 14484574Sraf (void) queue_unlink(qp, ulwpp, prev); 14494574Sraf lwpid[nlwpid++] = ulwp->ul_lwpid; 14504574Sraf } 14514574Sraf } 14524574Sraf 14534574Sraf if (nlwpid == 0) { 14544574Sraf queue_unlock(qp); 14554574Sraf } else { 14565629Sraf mp->mutex_waiters = 0; 14574574Sraf no_preempt(curthread); 14584574Sraf queue_unlock(qp); 14594574Sraf if (nlwpid == 1) 14604574Sraf (void) __lwp_unpark(lwpid[0]); 14614574Sraf else 14624574Sraf (void) __lwp_unpark_all(lwpid, nlwpid); 14634574Sraf preempt(curthread); 14644574Sraf } 14654574Sraf 14664574Sraf if (lwpid != buffer) 14674574Sraf (void) _private_munmap(lwpid, maxlwps * sizeof (lwpid_t)); 14684574Sraf } 14694574Sraf 14704574Sraf /* 14715629Sraf * Release a process-private mutex. 14725629Sraf * As an optimization, if there are waiters but there are also spinners 14735629Sraf * attempting to acquire the mutex, then don't bother waking up a waiter; 14745629Sraf * one of the spinners will acquire the mutex soon and it would be a waste 14755629Sraf * of resources to wake up some thread just to have it spin for a while 14765629Sraf * and then possibly go back to sleep. See mutex_trylock_adaptive(). 14770Sstevel@tonic-gate */ 14784574Sraf static lwpid_t 14794574Sraf mutex_unlock_queue(mutex_t *mp, int release_all) 14800Sstevel@tonic-gate { 14815629Sraf lwpid_t lwpid = 0; 14825629Sraf uint32_t old_lockword; 14835629Sraf 14845629Sraf mp->mutex_owner = 0; 14855629Sraf DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 14865629Sraf old_lockword = clear_lockbyte(&mp->mutex_lockword); 14875629Sraf if ((old_lockword & WAITERMASK) && 14885629Sraf (release_all || (old_lockword & SPINNERMASK) == 0)) { 14895629Sraf ulwp_t *self = curthread; 14900Sstevel@tonic-gate no_preempt(self); /* ensure a prompt wakeup */ 14915629Sraf if (release_all) 14925629Sraf mutex_wakeup_all(mp); 14935629Sraf else 14945629Sraf lwpid = mutex_wakeup(mp); 14955629Sraf if (lwpid == 0) 14965629Sraf preempt(self); 14974574Sraf } 14980Sstevel@tonic-gate return (lwpid); 14990Sstevel@tonic-gate } 15000Sstevel@tonic-gate 15010Sstevel@tonic-gate /* 15020Sstevel@tonic-gate * Like mutex_unlock_queue(), but for process-shared mutexes. 15030Sstevel@tonic-gate */ 15044574Sraf static void 15054574Sraf mutex_unlock_process(mutex_t *mp, int release_all) 15060Sstevel@tonic-gate { 15075629Sraf uint32_t old_lockword; 15085629Sraf 15090Sstevel@tonic-gate mp->mutex_owner = 0; 15100Sstevel@tonic-gate mp->mutex_ownerpid = 0; 15110Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 15125629Sraf old_lockword = clear_lockbyte(&mp->mutex_lockword); 15135629Sraf if ((old_lockword & WAITERMASK) && 15145629Sraf (release_all || (old_lockword & SPINNERMASK) == 0)) { 15155629Sraf ulwp_t *self = curthread; 15165629Sraf no_preempt(self); /* ensure a prompt wakeup */ 15175629Sraf (void) ___lwp_mutex_wakeup(mp, release_all); 15185629Sraf preempt(self); 15190Sstevel@tonic-gate } 15200Sstevel@tonic-gate } 15210Sstevel@tonic-gate 15220Sstevel@tonic-gate /* 15230Sstevel@tonic-gate * Return the real priority of a thread. 15240Sstevel@tonic-gate */ 15250Sstevel@tonic-gate int 15260Sstevel@tonic-gate real_priority(ulwp_t *ulwp) 15270Sstevel@tonic-gate { 15280Sstevel@tonic-gate if (ulwp->ul_epri == 0) 15290Sstevel@tonic-gate return (ulwp->ul_mappedpri? ulwp->ul_mappedpri : ulwp->ul_pri); 15300Sstevel@tonic-gate return (ulwp->ul_emappedpri? ulwp->ul_emappedpri : ulwp->ul_epri); 15310Sstevel@tonic-gate } 15320Sstevel@tonic-gate 15330Sstevel@tonic-gate void 15340Sstevel@tonic-gate stall(void) 15350Sstevel@tonic-gate { 15360Sstevel@tonic-gate for (;;) 15370Sstevel@tonic-gate (void) mutex_lock_kernel(&stall_mutex, NULL, NULL); 15380Sstevel@tonic-gate } 15390Sstevel@tonic-gate 15400Sstevel@tonic-gate /* 15410Sstevel@tonic-gate * Acquire a USYNC_THREAD mutex via user-level sleep queues. 15420Sstevel@tonic-gate * We failed set_lock_byte(&mp->mutex_lockw) before coming here. 15434574Sraf * If successful, returns with mutex_owner set correctly. 15440Sstevel@tonic-gate */ 15450Sstevel@tonic-gate int 15460Sstevel@tonic-gate mutex_lock_queue(ulwp_t *self, tdb_mutex_stats_t *msp, mutex_t *mp, 15470Sstevel@tonic-gate timespec_t *tsp) 15480Sstevel@tonic-gate { 15490Sstevel@tonic-gate uberdata_t *udp = curthread->ul_uberdata; 15500Sstevel@tonic-gate queue_head_t *qp; 15510Sstevel@tonic-gate hrtime_t begin_sleep; 15520Sstevel@tonic-gate int error = 0; 15530Sstevel@tonic-gate 15540Sstevel@tonic-gate self->ul_sp = stkptr(); 15550Sstevel@tonic-gate if (__td_event_report(self, TD_SLEEP, udp)) { 15560Sstevel@tonic-gate self->ul_wchan = mp; 15570Sstevel@tonic-gate self->ul_td_evbuf.eventnum = TD_SLEEP; 15580Sstevel@tonic-gate self->ul_td_evbuf.eventdata = mp; 15590Sstevel@tonic-gate tdb_event(TD_SLEEP, udp); 15600Sstevel@tonic-gate } 15610Sstevel@tonic-gate if (msp) { 15620Sstevel@tonic-gate tdb_incr(msp->mutex_sleep); 15630Sstevel@tonic-gate begin_sleep = gethrtime(); 15640Sstevel@tonic-gate } 15650Sstevel@tonic-gate 15660Sstevel@tonic-gate DTRACE_PROBE1(plockstat, mutex__block, mp); 15670Sstevel@tonic-gate 15680Sstevel@tonic-gate /* 15690Sstevel@tonic-gate * Put ourself on the sleep queue, and while we are 15700Sstevel@tonic-gate * unable to grab the lock, go park in the kernel. 15710Sstevel@tonic-gate * Take ourself off the sleep queue after we acquire the lock. 15720Sstevel@tonic-gate * The waiter bit can be set/cleared only while holding the queue lock. 15730Sstevel@tonic-gate */ 15740Sstevel@tonic-gate qp = queue_lock(mp, MX); 15750Sstevel@tonic-gate enqueue(qp, self, mp, MX); 15760Sstevel@tonic-gate mp->mutex_waiters = 1; 15770Sstevel@tonic-gate for (;;) { 15780Sstevel@tonic-gate if (set_lock_byte(&mp->mutex_lockw) == 0) { 15790Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 15800Sstevel@tonic-gate mp->mutex_waiters = dequeue_self(qp, mp); 15810Sstevel@tonic-gate break; 15820Sstevel@tonic-gate } 15830Sstevel@tonic-gate set_parking_flag(self, 1); 15840Sstevel@tonic-gate queue_unlock(qp); 15850Sstevel@tonic-gate /* 15860Sstevel@tonic-gate * __lwp_park() will return the residual time in tsp 15870Sstevel@tonic-gate * if we are unparked before the timeout expires. 15880Sstevel@tonic-gate */ 15895629Sraf error = __lwp_park(tsp, 0); 15900Sstevel@tonic-gate set_parking_flag(self, 0); 15910Sstevel@tonic-gate /* 15920Sstevel@tonic-gate * We could have taken a signal or suspended ourself. 15930Sstevel@tonic-gate * If we did, then we removed ourself from the queue. 15940Sstevel@tonic-gate * Someone else may have removed us from the queue 15950Sstevel@tonic-gate * as a consequence of mutex_unlock(). We may have 15960Sstevel@tonic-gate * gotten a timeout from __lwp_park(). Or we may still 15970Sstevel@tonic-gate * be on the queue and this is just a spurious wakeup. 15980Sstevel@tonic-gate */ 15990Sstevel@tonic-gate qp = queue_lock(mp, MX); 16000Sstevel@tonic-gate if (self->ul_sleepq == NULL) { 16015629Sraf if (error) { 16025629Sraf mp->mutex_waiters = queue_waiter(qp, mp)? 1 : 0; 16035629Sraf if (error != EINTR) 16045629Sraf break; 16055629Sraf error = 0; 16065629Sraf } 16070Sstevel@tonic-gate if (set_lock_byte(&mp->mutex_lockw) == 0) { 16080Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 16090Sstevel@tonic-gate break; 16100Sstevel@tonic-gate } 16110Sstevel@tonic-gate enqueue(qp, self, mp, MX); 16120Sstevel@tonic-gate mp->mutex_waiters = 1; 16130Sstevel@tonic-gate } 16140Sstevel@tonic-gate ASSERT(self->ul_sleepq == qp && 16150Sstevel@tonic-gate self->ul_qtype == MX && 16160Sstevel@tonic-gate self->ul_wchan == mp); 16170Sstevel@tonic-gate if (error) { 16185629Sraf if (error != EINTR) { 16195629Sraf mp->mutex_waiters = dequeue_self(qp, mp); 16205629Sraf break; 16215629Sraf } 16225629Sraf error = 0; 16230Sstevel@tonic-gate } 16240Sstevel@tonic-gate } 16250Sstevel@tonic-gate ASSERT(self->ul_sleepq == NULL && self->ul_link == NULL && 16260Sstevel@tonic-gate self->ul_wchan == NULL); 16270Sstevel@tonic-gate self->ul_sp = 0; 16280Sstevel@tonic-gate queue_unlock(qp); 16294574Sraf 16300Sstevel@tonic-gate if (msp) 16310Sstevel@tonic-gate msp->mutex_sleep_time += gethrtime() - begin_sleep; 16320Sstevel@tonic-gate 16330Sstevel@tonic-gate ASSERT(error == 0 || error == EINVAL || error == ETIME); 16344574Sraf 16354574Sraf if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) { 16364574Sraf ASSERT(mp->mutex_type & LOCK_ROBUST); 16374574Sraf /* 16384574Sraf * We shouldn't own the mutex; clear the lock. 16394574Sraf */ 16404574Sraf mp->mutex_owner = 0; 16415629Sraf if (clear_lockbyte(&mp->mutex_lockword) & WAITERMASK) 16424574Sraf mutex_wakeup_all(mp); 16434574Sraf error = ENOTRECOVERABLE; 16444574Sraf } 16454574Sraf 16464574Sraf if (error) { 16474574Sraf DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0); 16484574Sraf DTRACE_PROBE2(plockstat, mutex__error, mp, error); 16494574Sraf } else { 16504574Sraf DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1); 16514574Sraf DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 16524574Sraf if (mp->mutex_flag & LOCK_OWNERDEAD) { 16534574Sraf ASSERT(mp->mutex_type & LOCK_ROBUST); 16544574Sraf error = EOWNERDEAD; 16554574Sraf } 16564574Sraf } 16574574Sraf 16580Sstevel@tonic-gate return (error); 16590Sstevel@tonic-gate } 16600Sstevel@tonic-gate 16614574Sraf static int 16624574Sraf mutex_recursion(mutex_t *mp, int mtype, int try) 16634574Sraf { 16644574Sraf ASSERT(mutex_is_held(mp)); 16654574Sraf ASSERT(mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)); 16664574Sraf ASSERT(try == MUTEX_TRY || try == MUTEX_LOCK); 16674574Sraf 16684574Sraf if (mtype & LOCK_RECURSIVE) { 16694574Sraf if (mp->mutex_rcount == RECURSION_MAX) { 16704574Sraf DTRACE_PROBE2(plockstat, mutex__error, mp, EAGAIN); 16714574Sraf return (EAGAIN); 16724574Sraf } 16734574Sraf mp->mutex_rcount++; 16744574Sraf DTRACE_PROBE3(plockstat, mutex__acquire, mp, 1, 0); 16754574Sraf return (0); 16764574Sraf } 16774574Sraf if (try == MUTEX_LOCK) { 16784574Sraf DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK); 16794574Sraf return (EDEADLK); 16804574Sraf } 16814574Sraf return (EBUSY); 16824574Sraf } 16834574Sraf 16844574Sraf /* 16854574Sraf * Register this USYNC_PROCESS|LOCK_ROBUST mutex with the kernel so 16864574Sraf * it can apply LOCK_OWNERDEAD|LOCK_UNMAPPED if it becomes necessary. 16874574Sraf * We use tdb_hash_lock here and in the synch object tracking code in 16884574Sraf * the tdb_agent.c file. There is no conflict between these two usages. 16894574Sraf */ 16904574Sraf void 16914574Sraf register_lock(mutex_t *mp) 16924574Sraf { 16934574Sraf uberdata_t *udp = curthread->ul_uberdata; 16944574Sraf uint_t hash = LOCK_HASH(mp); 16954574Sraf robust_t *rlp; 16964574Sraf robust_t **rlpp; 16974574Sraf robust_t **table; 16984574Sraf 16994574Sraf if ((table = udp->robustlocks) == NULL) { 17004574Sraf lmutex_lock(&udp->tdb_hash_lock); 17014574Sraf if ((table = udp->robustlocks) == NULL) { 17024574Sraf table = lmalloc(LOCKHASHSZ * sizeof (robust_t *)); 17034574Sraf _membar_producer(); 17044574Sraf udp->robustlocks = table; 17054574Sraf } 17064574Sraf lmutex_unlock(&udp->tdb_hash_lock); 17074574Sraf } 17084574Sraf _membar_consumer(); 17094574Sraf 17104574Sraf /* 17114574Sraf * First search the registered table with no locks held. 17124574Sraf * This is safe because the table never shrinks 17134574Sraf * and we can only get a false negative. 17144574Sraf */ 17154574Sraf for (rlp = table[hash]; rlp != NULL; rlp = rlp->robust_next) { 17164574Sraf if (rlp->robust_lock == mp) /* already registered */ 17174574Sraf return; 17184574Sraf } 17194574Sraf 17204574Sraf /* 17214574Sraf * The lock was not found. 17224574Sraf * Repeat the operation with tdb_hash_lock held. 17234574Sraf */ 17244574Sraf lmutex_lock(&udp->tdb_hash_lock); 17254574Sraf 17264574Sraf for (rlpp = &table[hash]; 17274574Sraf (rlp = *rlpp) != NULL; 17284574Sraf rlpp = &rlp->robust_next) { 17294574Sraf if (rlp->robust_lock == mp) { /* already registered */ 17304574Sraf lmutex_unlock(&udp->tdb_hash_lock); 17314574Sraf return; 17324574Sraf } 17334574Sraf } 17344574Sraf 17354574Sraf /* 17364574Sraf * The lock has never been registered. 17374574Sraf * Register it now and add it to the table. 17384574Sraf */ 17394574Sraf (void) ___lwp_mutex_register(mp); 17404574Sraf rlp = lmalloc(sizeof (*rlp)); 17414574Sraf rlp->robust_lock = mp; 17424574Sraf _membar_producer(); 17434574Sraf *rlpp = rlp; 17444574Sraf 17454574Sraf lmutex_unlock(&udp->tdb_hash_lock); 17464574Sraf } 17474574Sraf 17484574Sraf /* 17494574Sraf * This is called in the child of fork()/forkall() to start over 17504574Sraf * with a clean slate. (Each process must register its own locks.) 17514574Sraf * No locks are needed because all other threads are suspended or gone. 17524574Sraf */ 17534574Sraf void 17544574Sraf unregister_locks(void) 17554574Sraf { 17564574Sraf uberdata_t *udp = curthread->ul_uberdata; 17574574Sraf uint_t hash; 17584574Sraf robust_t **table; 17594574Sraf robust_t *rlp; 17604574Sraf robust_t *next; 17614574Sraf 17624574Sraf if ((table = udp->robustlocks) != NULL) { 17634574Sraf for (hash = 0; hash < LOCKHASHSZ; hash++) { 17644574Sraf rlp = table[hash]; 17654574Sraf while (rlp != NULL) { 17664574Sraf next = rlp->robust_next; 17674574Sraf lfree(rlp, sizeof (*rlp)); 17684574Sraf rlp = next; 17694574Sraf } 17704574Sraf } 17714574Sraf lfree(table, LOCKHASHSZ * sizeof (robust_t *)); 17724574Sraf udp->robustlocks = NULL; 17734574Sraf } 17744574Sraf } 17754574Sraf 17760Sstevel@tonic-gate /* 17770Sstevel@tonic-gate * Returns with mutex_owner set correctly. 17780Sstevel@tonic-gate */ 17794574Sraf static int 17800Sstevel@tonic-gate mutex_lock_internal(mutex_t *mp, timespec_t *tsp, int try) 17810Sstevel@tonic-gate { 17820Sstevel@tonic-gate ulwp_t *self = curthread; 17830Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 17840Sstevel@tonic-gate int mtype = mp->mutex_type; 17850Sstevel@tonic-gate tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 17860Sstevel@tonic-gate int error = 0; 17874574Sraf uint8_t ceil; 17884574Sraf int myprio; 17890Sstevel@tonic-gate 17900Sstevel@tonic-gate ASSERT(try == MUTEX_TRY || try == MUTEX_LOCK); 17910Sstevel@tonic-gate 17920Sstevel@tonic-gate if (!self->ul_schedctl_called) 17930Sstevel@tonic-gate (void) setup_schedctl(); 17940Sstevel@tonic-gate 17950Sstevel@tonic-gate if (msp && try == MUTEX_TRY) 17960Sstevel@tonic-gate tdb_incr(msp->mutex_try); 17970Sstevel@tonic-gate 17984574Sraf if ((mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)) && mutex_is_held(mp)) 17994574Sraf return (mutex_recursion(mp, mtype, try)); 18000Sstevel@tonic-gate 18010Sstevel@tonic-gate if (self->ul_error_detection && try == MUTEX_LOCK && 18020Sstevel@tonic-gate tsp == NULL && mutex_is_held(mp)) 18030Sstevel@tonic-gate lock_error(mp, "mutex_lock", NULL, NULL); 18040Sstevel@tonic-gate 18054574Sraf if (mtype & LOCK_PRIO_PROTECT) { 18064574Sraf ceil = mp->mutex_ceiling; 18074574Sraf ASSERT(_validate_rt_prio(SCHED_FIFO, ceil) == 0); 18084574Sraf myprio = real_priority(self); 18094574Sraf if (myprio > ceil) { 18104574Sraf DTRACE_PROBE2(plockstat, mutex__error, mp, EINVAL); 18114574Sraf return (EINVAL); 18124574Sraf } 18134574Sraf if ((error = _ceil_mylist_add(mp)) != 0) { 18144574Sraf DTRACE_PROBE2(plockstat, mutex__error, mp, error); 18154574Sraf return (error); 18160Sstevel@tonic-gate } 18174574Sraf if (myprio < ceil) 18184574Sraf _ceil_prio_inherit(ceil); 18194574Sraf } 18204574Sraf 18214574Sraf if ((mtype & (USYNC_PROCESS | LOCK_ROBUST)) 18224574Sraf == (USYNC_PROCESS | LOCK_ROBUST)) 18234574Sraf register_lock(mp); 18244574Sraf 18254574Sraf if (mtype & LOCK_PRIO_INHERIT) { 18264574Sraf /* go straight to the kernel */ 18274574Sraf if (try == MUTEX_TRY) 18284574Sraf error = mutex_trylock_kernel(mp); 18294574Sraf else /* MUTEX_LOCK */ 18304574Sraf error = mutex_lock_kernel(mp, tsp, msp); 18314574Sraf /* 18324574Sraf * The kernel never sets or clears the lock byte 18334574Sraf * for LOCK_PRIO_INHERIT mutexes. 18344574Sraf * Set it here for consistency. 18354574Sraf */ 18364574Sraf switch (error) { 18374574Sraf case 0: 18384574Sraf mp->mutex_lockw = LOCKSET; 18394574Sraf break; 18404574Sraf case EOWNERDEAD: 18414574Sraf case ELOCKUNMAPPED: 18424574Sraf mp->mutex_lockw = LOCKSET; 18434574Sraf /* FALLTHROUGH */ 18444574Sraf case ENOTRECOVERABLE: 18454574Sraf ASSERT(mtype & LOCK_ROBUST); 18464574Sraf break; 18474574Sraf case EDEADLK: 18484574Sraf if (try == MUTEX_LOCK) 18494574Sraf stall(); 18504574Sraf error = EBUSY; 18514574Sraf break; 18520Sstevel@tonic-gate } 18530Sstevel@tonic-gate } else if (mtype & USYNC_PROCESS) { 18544613Sraf error = mutex_trylock_process(mp, try == MUTEX_LOCK); 18554574Sraf if (error == EBUSY && try == MUTEX_LOCK) 18560Sstevel@tonic-gate error = mutex_lock_kernel(mp, tsp, msp); 18575629Sraf } else { /* USYNC_THREAD */ 18584613Sraf error = mutex_trylock_adaptive(mp, try == MUTEX_LOCK); 18594574Sraf if (error == EBUSY && try == MUTEX_LOCK) 18604574Sraf error = mutex_lock_queue(self, msp, mp, tsp); 18610Sstevel@tonic-gate } 18620Sstevel@tonic-gate 18630Sstevel@tonic-gate switch (error) { 18644574Sraf case 0: 18650Sstevel@tonic-gate case EOWNERDEAD: 18660Sstevel@tonic-gate case ELOCKUNMAPPED: 18674574Sraf if (mtype & LOCK_ROBUST) 18684574Sraf remember_lock(mp); 18690Sstevel@tonic-gate if (msp) 18700Sstevel@tonic-gate record_begin_hold(msp); 18710Sstevel@tonic-gate break; 18720Sstevel@tonic-gate default: 18734574Sraf if (mtype & LOCK_PRIO_PROTECT) { 18744574Sraf (void) _ceil_mylist_del(mp); 18754574Sraf if (myprio < ceil) 18764574Sraf _ceil_prio_waive(); 18774574Sraf } 18780Sstevel@tonic-gate if (try == MUTEX_TRY) { 18790Sstevel@tonic-gate if (msp) 18800Sstevel@tonic-gate tdb_incr(msp->mutex_try_fail); 18810Sstevel@tonic-gate if (__td_event_report(self, TD_LOCK_TRY, udp)) { 18820Sstevel@tonic-gate self->ul_td_evbuf.eventnum = TD_LOCK_TRY; 18830Sstevel@tonic-gate tdb_event(TD_LOCK_TRY, udp); 18840Sstevel@tonic-gate } 18850Sstevel@tonic-gate } 18860Sstevel@tonic-gate break; 18870Sstevel@tonic-gate } 18880Sstevel@tonic-gate 18890Sstevel@tonic-gate return (error); 18900Sstevel@tonic-gate } 18910Sstevel@tonic-gate 18920Sstevel@tonic-gate int 18930Sstevel@tonic-gate fast_process_lock(mutex_t *mp, timespec_t *tsp, int mtype, int try) 18940Sstevel@tonic-gate { 18950Sstevel@tonic-gate ulwp_t *self = curthread; 18960Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 18970Sstevel@tonic-gate 18980Sstevel@tonic-gate /* 18990Sstevel@tonic-gate * We know that USYNC_PROCESS is set in mtype and that 19000Sstevel@tonic-gate * zero, one, or both of the flags LOCK_RECURSIVE and 19010Sstevel@tonic-gate * LOCK_ERRORCHECK are set, and that no other flags are set. 19020Sstevel@tonic-gate */ 19034574Sraf ASSERT((mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0); 19040Sstevel@tonic-gate enter_critical(self); 19050Sstevel@tonic-gate if (set_lock_byte(&mp->mutex_lockw) == 0) { 19060Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 19070Sstevel@tonic-gate mp->mutex_ownerpid = udp->pid; 19080Sstevel@tonic-gate exit_critical(self); 19090Sstevel@tonic-gate DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 19100Sstevel@tonic-gate return (0); 19110Sstevel@tonic-gate } 19120Sstevel@tonic-gate exit_critical(self); 19130Sstevel@tonic-gate 19144574Sraf if ((mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)) && shared_mutex_held(mp)) 19154574Sraf return (mutex_recursion(mp, mtype, try)); 19164574Sraf 19174613Sraf if (try == MUTEX_LOCK) { 19184613Sraf if (mutex_trylock_process(mp, 1) == 0) 19194613Sraf return (0); 19200Sstevel@tonic-gate return (mutex_lock_kernel(mp, tsp, NULL)); 19214613Sraf } 19220Sstevel@tonic-gate 19230Sstevel@tonic-gate if (__td_event_report(self, TD_LOCK_TRY, udp)) { 19240Sstevel@tonic-gate self->ul_td_evbuf.eventnum = TD_LOCK_TRY; 19250Sstevel@tonic-gate tdb_event(TD_LOCK_TRY, udp); 19260Sstevel@tonic-gate } 19270Sstevel@tonic-gate return (EBUSY); 19280Sstevel@tonic-gate } 19290Sstevel@tonic-gate 19300Sstevel@tonic-gate static int 19310Sstevel@tonic-gate mutex_lock_impl(mutex_t *mp, timespec_t *tsp) 19320Sstevel@tonic-gate { 19330Sstevel@tonic-gate ulwp_t *self = curthread; 19340Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 19350Sstevel@tonic-gate uberflags_t *gflags; 19360Sstevel@tonic-gate int mtype; 19370Sstevel@tonic-gate 19380Sstevel@tonic-gate /* 19390Sstevel@tonic-gate * Optimize the case of USYNC_THREAD, including 19400Sstevel@tonic-gate * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases, 19410Sstevel@tonic-gate * no error detection, no lock statistics, 19420Sstevel@tonic-gate * and the process has only a single thread. 19430Sstevel@tonic-gate * (Most likely a traditional single-threaded application.) 19440Sstevel@tonic-gate */ 19450Sstevel@tonic-gate if ((((mtype = mp->mutex_type) & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | 19460Sstevel@tonic-gate udp->uberflags.uf_all) == 0) { 19470Sstevel@tonic-gate /* 19480Sstevel@tonic-gate * Only one thread exists so we don't need an atomic operation. 19490Sstevel@tonic-gate */ 19500Sstevel@tonic-gate if (mp->mutex_lockw == 0) { 19510Sstevel@tonic-gate mp->mutex_lockw = LOCKSET; 19520Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 19530Sstevel@tonic-gate DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 19540Sstevel@tonic-gate return (0); 19550Sstevel@tonic-gate } 19564574Sraf if (mtype && MUTEX_OWNER(mp) == self) 19574574Sraf return (mutex_recursion(mp, mtype, MUTEX_LOCK)); 19580Sstevel@tonic-gate /* 19590Sstevel@tonic-gate * We have reached a deadlock, probably because the 19600Sstevel@tonic-gate * process is executing non-async-signal-safe code in 19610Sstevel@tonic-gate * a signal handler and is attempting to acquire a lock 19620Sstevel@tonic-gate * that it already owns. This is not surprising, given 19630Sstevel@tonic-gate * bad programming practices over the years that has 19640Sstevel@tonic-gate * resulted in applications calling printf() and such 19650Sstevel@tonic-gate * in their signal handlers. Unless the user has told 19660Sstevel@tonic-gate * us that the signal handlers are safe by setting: 19670Sstevel@tonic-gate * export _THREAD_ASYNC_SAFE=1 19680Sstevel@tonic-gate * we return EDEADLK rather than actually deadlocking. 19690Sstevel@tonic-gate */ 19700Sstevel@tonic-gate if (tsp == NULL && 19710Sstevel@tonic-gate MUTEX_OWNER(mp) == self && !self->ul_async_safe) { 19720Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK); 19730Sstevel@tonic-gate return (EDEADLK); 19740Sstevel@tonic-gate } 19750Sstevel@tonic-gate } 19760Sstevel@tonic-gate 19770Sstevel@tonic-gate /* 19780Sstevel@tonic-gate * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS, 19790Sstevel@tonic-gate * no error detection, and no lock statistics. 19800Sstevel@tonic-gate * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases. 19810Sstevel@tonic-gate */ 19820Sstevel@tonic-gate if ((gflags = self->ul_schedctl_called) != NULL && 19830Sstevel@tonic-gate (gflags->uf_trs_ted | 19840Sstevel@tonic-gate (mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK))) == 0) { 19850Sstevel@tonic-gate if (mtype & USYNC_PROCESS) 19860Sstevel@tonic-gate return (fast_process_lock(mp, tsp, mtype, MUTEX_LOCK)); 19870Sstevel@tonic-gate if (set_lock_byte(&mp->mutex_lockw) == 0) { 19880Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 19890Sstevel@tonic-gate DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 19900Sstevel@tonic-gate return (0); 19910Sstevel@tonic-gate } 19924574Sraf if (mtype && MUTEX_OWNER(mp) == self) 19934574Sraf return (mutex_recursion(mp, mtype, MUTEX_LOCK)); 19944613Sraf if (mutex_trylock_adaptive(mp, 1) != 0) 19954574Sraf return (mutex_lock_queue(self, NULL, mp, tsp)); 19964574Sraf return (0); 19970Sstevel@tonic-gate } 19980Sstevel@tonic-gate 19990Sstevel@tonic-gate /* else do it the long way */ 20000Sstevel@tonic-gate return (mutex_lock_internal(mp, tsp, MUTEX_LOCK)); 20010Sstevel@tonic-gate } 20020Sstevel@tonic-gate 2003*5891Sraf /* 2004*5891Sraf * Of the following function names (all the same function, of course), 2005*5891Sraf * only _private_mutex_lock() is not exported from libc. This means 2006*5891Sraf * that calling _private_mutex_lock() within libc will not invoke the 2007*5891Sraf * dynamic linker. This is critical for any code called in the child 2008*5891Sraf * of vfork() (via posix_spawn()) because invoking the dynamic linker 2009*5891Sraf * in such a case would corrupt the parent's address space. There are 2010*5891Sraf * other places in libc where avoiding the dynamic linker is necessary. 2011*5891Sraf * Of course, _private_mutex_lock() can be called in cases not requiring 2012*5891Sraf * the avoidance of the dynamic linker too, and often is. 2013*5891Sraf */ 20140Sstevel@tonic-gate #pragma weak _private_mutex_lock = __mutex_lock 20150Sstevel@tonic-gate #pragma weak mutex_lock = __mutex_lock 20160Sstevel@tonic-gate #pragma weak _mutex_lock = __mutex_lock 20170Sstevel@tonic-gate #pragma weak pthread_mutex_lock = __mutex_lock 20180Sstevel@tonic-gate #pragma weak _pthread_mutex_lock = __mutex_lock 20190Sstevel@tonic-gate int 20200Sstevel@tonic-gate __mutex_lock(mutex_t *mp) 20210Sstevel@tonic-gate { 20220Sstevel@tonic-gate ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 20230Sstevel@tonic-gate return (mutex_lock_impl(mp, NULL)); 20240Sstevel@tonic-gate } 20250Sstevel@tonic-gate 20260Sstevel@tonic-gate #pragma weak pthread_mutex_timedlock = _pthread_mutex_timedlock 20270Sstevel@tonic-gate int 20280Sstevel@tonic-gate _pthread_mutex_timedlock(mutex_t *mp, const timespec_t *abstime) 20290Sstevel@tonic-gate { 20300Sstevel@tonic-gate timespec_t tslocal; 20310Sstevel@tonic-gate int error; 20320Sstevel@tonic-gate 20330Sstevel@tonic-gate ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 20340Sstevel@tonic-gate abstime_to_reltime(CLOCK_REALTIME, abstime, &tslocal); 20350Sstevel@tonic-gate error = mutex_lock_impl(mp, &tslocal); 20360Sstevel@tonic-gate if (error == ETIME) 20370Sstevel@tonic-gate error = ETIMEDOUT; 20380Sstevel@tonic-gate return (error); 20390Sstevel@tonic-gate } 20400Sstevel@tonic-gate 20410Sstevel@tonic-gate #pragma weak pthread_mutex_reltimedlock_np = _pthread_mutex_reltimedlock_np 20420Sstevel@tonic-gate int 20430Sstevel@tonic-gate _pthread_mutex_reltimedlock_np(mutex_t *mp, const timespec_t *reltime) 20440Sstevel@tonic-gate { 20450Sstevel@tonic-gate timespec_t tslocal; 20460Sstevel@tonic-gate int error; 20470Sstevel@tonic-gate 20480Sstevel@tonic-gate ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 20490Sstevel@tonic-gate tslocal = *reltime; 20500Sstevel@tonic-gate error = mutex_lock_impl(mp, &tslocal); 20510Sstevel@tonic-gate if (error == ETIME) 20520Sstevel@tonic-gate error = ETIMEDOUT; 20530Sstevel@tonic-gate return (error); 20540Sstevel@tonic-gate } 20550Sstevel@tonic-gate 20560Sstevel@tonic-gate #pragma weak _private_mutex_trylock = __mutex_trylock 20570Sstevel@tonic-gate #pragma weak mutex_trylock = __mutex_trylock 20580Sstevel@tonic-gate #pragma weak _mutex_trylock = __mutex_trylock 20590Sstevel@tonic-gate #pragma weak pthread_mutex_trylock = __mutex_trylock 20600Sstevel@tonic-gate #pragma weak _pthread_mutex_trylock = __mutex_trylock 20610Sstevel@tonic-gate int 20620Sstevel@tonic-gate __mutex_trylock(mutex_t *mp) 20630Sstevel@tonic-gate { 20640Sstevel@tonic-gate ulwp_t *self = curthread; 20650Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 20660Sstevel@tonic-gate uberflags_t *gflags; 20670Sstevel@tonic-gate int mtype; 20680Sstevel@tonic-gate 20690Sstevel@tonic-gate ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 20700Sstevel@tonic-gate /* 20710Sstevel@tonic-gate * Optimize the case of USYNC_THREAD, including 20720Sstevel@tonic-gate * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases, 20730Sstevel@tonic-gate * no error detection, no lock statistics, 20740Sstevel@tonic-gate * and the process has only a single thread. 20750Sstevel@tonic-gate * (Most likely a traditional single-threaded application.) 20760Sstevel@tonic-gate */ 20770Sstevel@tonic-gate if ((((mtype = mp->mutex_type) & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | 20780Sstevel@tonic-gate udp->uberflags.uf_all) == 0) { 20790Sstevel@tonic-gate /* 20800Sstevel@tonic-gate * Only one thread exists so we don't need an atomic operation. 20810Sstevel@tonic-gate */ 20820Sstevel@tonic-gate if (mp->mutex_lockw == 0) { 20830Sstevel@tonic-gate mp->mutex_lockw = LOCKSET; 20840Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 20850Sstevel@tonic-gate DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 20860Sstevel@tonic-gate return (0); 20870Sstevel@tonic-gate } 20884574Sraf if (mtype && MUTEX_OWNER(mp) == self) 20894574Sraf return (mutex_recursion(mp, mtype, MUTEX_TRY)); 20900Sstevel@tonic-gate return (EBUSY); 20910Sstevel@tonic-gate } 20920Sstevel@tonic-gate 20930Sstevel@tonic-gate /* 20940Sstevel@tonic-gate * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS, 20950Sstevel@tonic-gate * no error detection, and no lock statistics. 20960Sstevel@tonic-gate * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases. 20970Sstevel@tonic-gate */ 20980Sstevel@tonic-gate if ((gflags = self->ul_schedctl_called) != NULL && 20990Sstevel@tonic-gate (gflags->uf_trs_ted | 21000Sstevel@tonic-gate (mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK))) == 0) { 21010Sstevel@tonic-gate if (mtype & USYNC_PROCESS) 21020Sstevel@tonic-gate return (fast_process_lock(mp, NULL, mtype, MUTEX_TRY)); 21030Sstevel@tonic-gate if (set_lock_byte(&mp->mutex_lockw) == 0) { 21040Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 21050Sstevel@tonic-gate DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 21060Sstevel@tonic-gate return (0); 21070Sstevel@tonic-gate } 21084574Sraf if (mtype && MUTEX_OWNER(mp) == self) 21094574Sraf return (mutex_recursion(mp, mtype, MUTEX_TRY)); 21104613Sraf if (__td_event_report(self, TD_LOCK_TRY, udp)) { 21114613Sraf self->ul_td_evbuf.eventnum = TD_LOCK_TRY; 21124613Sraf tdb_event(TD_LOCK_TRY, udp); 21130Sstevel@tonic-gate } 21144613Sraf return (EBUSY); 21150Sstevel@tonic-gate } 21160Sstevel@tonic-gate 21170Sstevel@tonic-gate /* else do it the long way */ 21180Sstevel@tonic-gate return (mutex_lock_internal(mp, NULL, MUTEX_TRY)); 21190Sstevel@tonic-gate } 21200Sstevel@tonic-gate 21210Sstevel@tonic-gate int 21224574Sraf mutex_unlock_internal(mutex_t *mp, int retain_robust_flags) 21230Sstevel@tonic-gate { 21240Sstevel@tonic-gate ulwp_t *self = curthread; 21250Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 21260Sstevel@tonic-gate int mtype = mp->mutex_type; 21270Sstevel@tonic-gate tdb_mutex_stats_t *msp; 21284574Sraf int error = 0; 21294574Sraf int release_all; 21300Sstevel@tonic-gate lwpid_t lwpid; 21310Sstevel@tonic-gate 21320Sstevel@tonic-gate if ((mtype & LOCK_ERRORCHECK) && !mutex_is_held(mp)) 21330Sstevel@tonic-gate return (EPERM); 21340Sstevel@tonic-gate 21350Sstevel@tonic-gate if (self->ul_error_detection && !mutex_is_held(mp)) 21360Sstevel@tonic-gate lock_error(mp, "mutex_unlock", NULL, NULL); 21370Sstevel@tonic-gate 21380Sstevel@tonic-gate if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 21390Sstevel@tonic-gate mp->mutex_rcount--; 21400Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 21410Sstevel@tonic-gate return (0); 21420Sstevel@tonic-gate } 21430Sstevel@tonic-gate 21440Sstevel@tonic-gate if ((msp = MUTEX_STATS(mp, udp)) != NULL) 21450Sstevel@tonic-gate (void) record_hold_time(msp); 21460Sstevel@tonic-gate 21474574Sraf if (!retain_robust_flags && !(mtype & LOCK_PRIO_INHERIT) && 21484574Sraf (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED))) { 21494574Sraf ASSERT(mp->mutex_type & LOCK_ROBUST); 21504574Sraf mp->mutex_flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 21514574Sraf mp->mutex_flag |= LOCK_NOTRECOVERABLE; 21524574Sraf } 21534574Sraf release_all = ((mp->mutex_flag & LOCK_NOTRECOVERABLE) != 0); 21544574Sraf 21554574Sraf if (mtype & LOCK_PRIO_INHERIT) { 21560Sstevel@tonic-gate no_preempt(self); 21570Sstevel@tonic-gate mp->mutex_owner = 0; 21580Sstevel@tonic-gate mp->mutex_ownerpid = 0; 21590Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 21604574Sraf mp->mutex_lockw = LOCKCLEAR; 21614574Sraf error = ___lwp_mutex_unlock(mp); 21620Sstevel@tonic-gate preempt(self); 21630Sstevel@tonic-gate } else if (mtype & USYNC_PROCESS) { 21645629Sraf mutex_unlock_process(mp, release_all); 21650Sstevel@tonic-gate } else { /* USYNC_THREAD */ 21664574Sraf if ((lwpid = mutex_unlock_queue(mp, release_all)) != 0) { 21670Sstevel@tonic-gate (void) __lwp_unpark(lwpid); 21680Sstevel@tonic-gate preempt(self); 21690Sstevel@tonic-gate } 21700Sstevel@tonic-gate } 21710Sstevel@tonic-gate 21724574Sraf if (mtype & LOCK_ROBUST) 21734574Sraf forget_lock(mp); 21744574Sraf 21754574Sraf if ((mtype & LOCK_PRIO_PROTECT) && _ceil_mylist_del(mp)) 21764574Sraf _ceil_prio_waive(); 21774574Sraf 21780Sstevel@tonic-gate return (error); 21790Sstevel@tonic-gate } 21800Sstevel@tonic-gate 21810Sstevel@tonic-gate #pragma weak _private_mutex_unlock = __mutex_unlock 21820Sstevel@tonic-gate #pragma weak mutex_unlock = __mutex_unlock 21830Sstevel@tonic-gate #pragma weak _mutex_unlock = __mutex_unlock 21840Sstevel@tonic-gate #pragma weak pthread_mutex_unlock = __mutex_unlock 21850Sstevel@tonic-gate #pragma weak _pthread_mutex_unlock = __mutex_unlock 21860Sstevel@tonic-gate int 21870Sstevel@tonic-gate __mutex_unlock(mutex_t *mp) 21880Sstevel@tonic-gate { 21890Sstevel@tonic-gate ulwp_t *self = curthread; 21900Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 21910Sstevel@tonic-gate uberflags_t *gflags; 21920Sstevel@tonic-gate lwpid_t lwpid; 21930Sstevel@tonic-gate int mtype; 21940Sstevel@tonic-gate short el; 21950Sstevel@tonic-gate 21960Sstevel@tonic-gate /* 21970Sstevel@tonic-gate * Optimize the case of USYNC_THREAD, including 21980Sstevel@tonic-gate * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases, 21990Sstevel@tonic-gate * no error detection, no lock statistics, 22000Sstevel@tonic-gate * and the process has only a single thread. 22010Sstevel@tonic-gate * (Most likely a traditional single-threaded application.) 22020Sstevel@tonic-gate */ 22030Sstevel@tonic-gate if ((((mtype = mp->mutex_type) & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | 22040Sstevel@tonic-gate udp->uberflags.uf_all) == 0) { 22050Sstevel@tonic-gate if (mtype) { 22060Sstevel@tonic-gate /* 22070Sstevel@tonic-gate * At this point we know that one or both of the 22080Sstevel@tonic-gate * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set. 22090Sstevel@tonic-gate */ 22100Sstevel@tonic-gate if ((mtype & LOCK_ERRORCHECK) && !MUTEX_OWNED(mp, self)) 22110Sstevel@tonic-gate return (EPERM); 22120Sstevel@tonic-gate if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 22130Sstevel@tonic-gate mp->mutex_rcount--; 22140Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 22150Sstevel@tonic-gate return (0); 22160Sstevel@tonic-gate } 22170Sstevel@tonic-gate } 22180Sstevel@tonic-gate /* 22190Sstevel@tonic-gate * Only one thread exists so we don't need an atomic operation. 22200Sstevel@tonic-gate * Also, there can be no waiters. 22210Sstevel@tonic-gate */ 22220Sstevel@tonic-gate mp->mutex_owner = 0; 22230Sstevel@tonic-gate mp->mutex_lockword = 0; 22240Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 22250Sstevel@tonic-gate return (0); 22260Sstevel@tonic-gate } 22270Sstevel@tonic-gate 22280Sstevel@tonic-gate /* 22290Sstevel@tonic-gate * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS, 22300Sstevel@tonic-gate * no error detection, and no lock statistics. 22310Sstevel@tonic-gate * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases. 22320Sstevel@tonic-gate */ 22330Sstevel@tonic-gate if ((gflags = self->ul_schedctl_called) != NULL) { 22340Sstevel@tonic-gate if (((el = gflags->uf_trs_ted) | mtype) == 0) { 22350Sstevel@tonic-gate fast_unlock: 22365629Sraf if ((lwpid = mutex_unlock_queue(mp, 0)) != 0) { 22370Sstevel@tonic-gate (void) __lwp_unpark(lwpid); 22380Sstevel@tonic-gate preempt(self); 22390Sstevel@tonic-gate } 22400Sstevel@tonic-gate return (0); 22410Sstevel@tonic-gate } 22420Sstevel@tonic-gate if (el) /* error detection or lock statistics */ 22430Sstevel@tonic-gate goto slow_unlock; 22440Sstevel@tonic-gate if ((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0) { 22450Sstevel@tonic-gate /* 22460Sstevel@tonic-gate * At this point we know that one or both of the 22470Sstevel@tonic-gate * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set. 22480Sstevel@tonic-gate */ 22490Sstevel@tonic-gate if ((mtype & LOCK_ERRORCHECK) && !MUTEX_OWNED(mp, self)) 22500Sstevel@tonic-gate return (EPERM); 22510Sstevel@tonic-gate if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 22520Sstevel@tonic-gate mp->mutex_rcount--; 22530Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 22540Sstevel@tonic-gate return (0); 22550Sstevel@tonic-gate } 22560Sstevel@tonic-gate goto fast_unlock; 22570Sstevel@tonic-gate } 22580Sstevel@tonic-gate if ((mtype & 22590Sstevel@tonic-gate ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0) { 22600Sstevel@tonic-gate /* 22610Sstevel@tonic-gate * At this point we know that zero, one, or both of the 22620Sstevel@tonic-gate * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set and 22630Sstevel@tonic-gate * that the USYNC_PROCESS flag is set. 22640Sstevel@tonic-gate */ 22650Sstevel@tonic-gate if ((mtype & LOCK_ERRORCHECK) && !shared_mutex_held(mp)) 22660Sstevel@tonic-gate return (EPERM); 22670Sstevel@tonic-gate if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 22680Sstevel@tonic-gate mp->mutex_rcount--; 22690Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 22700Sstevel@tonic-gate return (0); 22710Sstevel@tonic-gate } 22725629Sraf mutex_unlock_process(mp, 0); 22730Sstevel@tonic-gate return (0); 22740Sstevel@tonic-gate } 22750Sstevel@tonic-gate } 22760Sstevel@tonic-gate 22770Sstevel@tonic-gate /* else do it the long way */ 22780Sstevel@tonic-gate slow_unlock: 22794574Sraf return (mutex_unlock_internal(mp, 0)); 22800Sstevel@tonic-gate } 22810Sstevel@tonic-gate 22820Sstevel@tonic-gate /* 22830Sstevel@tonic-gate * Internally to the library, almost all mutex lock/unlock actions 22840Sstevel@tonic-gate * go through these lmutex_ functions, to protect critical regions. 22850Sstevel@tonic-gate * We replicate a bit of code from __mutex_lock() and __mutex_unlock() 22860Sstevel@tonic-gate * to make these functions faster since we know that the mutex type 22870Sstevel@tonic-gate * of all internal locks is USYNC_THREAD. We also know that internal 22880Sstevel@tonic-gate * locking can never fail, so we panic if it does. 22890Sstevel@tonic-gate */ 22900Sstevel@tonic-gate void 22910Sstevel@tonic-gate lmutex_lock(mutex_t *mp) 22920Sstevel@tonic-gate { 22930Sstevel@tonic-gate ulwp_t *self = curthread; 22940Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 22950Sstevel@tonic-gate 22960Sstevel@tonic-gate ASSERT(mp->mutex_type == USYNC_THREAD); 22970Sstevel@tonic-gate 22980Sstevel@tonic-gate enter_critical(self); 22990Sstevel@tonic-gate /* 23000Sstevel@tonic-gate * Optimize the case of no lock statistics and only a single thread. 23010Sstevel@tonic-gate * (Most likely a traditional single-threaded application.) 23020Sstevel@tonic-gate */ 23030Sstevel@tonic-gate if (udp->uberflags.uf_all == 0) { 23040Sstevel@tonic-gate /* 23050Sstevel@tonic-gate * Only one thread exists; the mutex must be free. 23060Sstevel@tonic-gate */ 23070Sstevel@tonic-gate ASSERT(mp->mutex_lockw == 0); 23080Sstevel@tonic-gate mp->mutex_lockw = LOCKSET; 23090Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 23100Sstevel@tonic-gate DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 23110Sstevel@tonic-gate } else { 23120Sstevel@tonic-gate tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 23130Sstevel@tonic-gate 23140Sstevel@tonic-gate if (!self->ul_schedctl_called) 23150Sstevel@tonic-gate (void) setup_schedctl(); 23160Sstevel@tonic-gate 23170Sstevel@tonic-gate if (set_lock_byte(&mp->mutex_lockw) == 0) { 23180Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 23190Sstevel@tonic-gate DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 23204613Sraf } else if (mutex_trylock_adaptive(mp, 1) != 0) { 23210Sstevel@tonic-gate (void) mutex_lock_queue(self, msp, mp, NULL); 23220Sstevel@tonic-gate } 23230Sstevel@tonic-gate 23240Sstevel@tonic-gate if (msp) 23250Sstevel@tonic-gate record_begin_hold(msp); 23260Sstevel@tonic-gate } 23270Sstevel@tonic-gate } 23280Sstevel@tonic-gate 23290Sstevel@tonic-gate void 23300Sstevel@tonic-gate lmutex_unlock(mutex_t *mp) 23310Sstevel@tonic-gate { 23320Sstevel@tonic-gate ulwp_t *self = curthread; 23330Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 23340Sstevel@tonic-gate 23350Sstevel@tonic-gate ASSERT(mp->mutex_type == USYNC_THREAD); 23360Sstevel@tonic-gate 23370Sstevel@tonic-gate /* 23380Sstevel@tonic-gate * Optimize the case of no lock statistics and only a single thread. 23390Sstevel@tonic-gate * (Most likely a traditional single-threaded application.) 23400Sstevel@tonic-gate */ 23410Sstevel@tonic-gate if (udp->uberflags.uf_all == 0) { 23420Sstevel@tonic-gate /* 23430Sstevel@tonic-gate * Only one thread exists so there can be no waiters. 23440Sstevel@tonic-gate */ 23450Sstevel@tonic-gate mp->mutex_owner = 0; 23460Sstevel@tonic-gate mp->mutex_lockword = 0; 23470Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 23480Sstevel@tonic-gate } else { 23490Sstevel@tonic-gate tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 23500Sstevel@tonic-gate lwpid_t lwpid; 23510Sstevel@tonic-gate 23520Sstevel@tonic-gate if (msp) 23530Sstevel@tonic-gate (void) record_hold_time(msp); 23544574Sraf if ((lwpid = mutex_unlock_queue(mp, 0)) != 0) { 23550Sstevel@tonic-gate (void) __lwp_unpark(lwpid); 23560Sstevel@tonic-gate preempt(self); 23570Sstevel@tonic-gate } 23580Sstevel@tonic-gate } 23590Sstevel@tonic-gate exit_critical(self); 23600Sstevel@tonic-gate } 23610Sstevel@tonic-gate 23622248Sraf /* 23632248Sraf * For specialized code in libc, like the asynchronous i/o code, 23642248Sraf * the following sig_*() locking primitives are used in order 23652248Sraf * to make the code asynchronous signal safe. Signals are 23662248Sraf * deferred while locks acquired by these functions are held. 23672248Sraf */ 23682248Sraf void 23692248Sraf sig_mutex_lock(mutex_t *mp) 23702248Sraf { 23712248Sraf sigoff(curthread); 23722248Sraf (void) _private_mutex_lock(mp); 23732248Sraf } 23742248Sraf 23752248Sraf void 23762248Sraf sig_mutex_unlock(mutex_t *mp) 23772248Sraf { 23782248Sraf (void) _private_mutex_unlock(mp); 23792248Sraf sigon(curthread); 23802248Sraf } 23812248Sraf 23822248Sraf int 23832248Sraf sig_mutex_trylock(mutex_t *mp) 23842248Sraf { 23852248Sraf int error; 23862248Sraf 23872248Sraf sigoff(curthread); 23882248Sraf if ((error = _private_mutex_trylock(mp)) != 0) 23892248Sraf sigon(curthread); 23902248Sraf return (error); 23912248Sraf } 23922248Sraf 23932248Sraf /* 23942248Sraf * sig_cond_wait() is a cancellation point. 23952248Sraf */ 23962248Sraf int 23972248Sraf sig_cond_wait(cond_t *cv, mutex_t *mp) 23982248Sraf { 23992248Sraf int error; 24002248Sraf 24012248Sraf ASSERT(curthread->ul_sigdefer != 0); 24022248Sraf _private_testcancel(); 2403*5891Sraf error = __cond_wait(cv, mp); 24042248Sraf if (error == EINTR && curthread->ul_cursig) { 24052248Sraf sig_mutex_unlock(mp); 24062248Sraf /* take the deferred signal here */ 24072248Sraf sig_mutex_lock(mp); 24082248Sraf } 24092248Sraf _private_testcancel(); 24102248Sraf return (error); 24112248Sraf } 24122248Sraf 24132248Sraf /* 24142248Sraf * sig_cond_reltimedwait() is a cancellation point. 24152248Sraf */ 24162248Sraf int 24172248Sraf sig_cond_reltimedwait(cond_t *cv, mutex_t *mp, const timespec_t *ts) 24182248Sraf { 24192248Sraf int error; 24202248Sraf 24212248Sraf ASSERT(curthread->ul_sigdefer != 0); 24222248Sraf _private_testcancel(); 2423*5891Sraf error = __cond_reltimedwait(cv, mp, ts); 24242248Sraf if (error == EINTR && curthread->ul_cursig) { 24252248Sraf sig_mutex_unlock(mp); 24262248Sraf /* take the deferred signal here */ 24272248Sraf sig_mutex_lock(mp); 24282248Sraf } 24292248Sraf _private_testcancel(); 24302248Sraf return (error); 24312248Sraf } 24322248Sraf 2433*5891Sraf /* 2434*5891Sraf * For specialized code in libc, like the stdio code. 2435*5891Sraf * the following cancel_safe_*() locking primitives are used in 2436*5891Sraf * order to make the code cancellation-safe. Cancellation is 2437*5891Sraf * deferred while locks acquired by these functions are held. 2438*5891Sraf */ 2439*5891Sraf void 2440*5891Sraf cancel_safe_mutex_lock(mutex_t *mp) 2441*5891Sraf { 2442*5891Sraf (void) _private_mutex_lock(mp); 2443*5891Sraf curthread->ul_libc_locks++; 2444*5891Sraf } 2445*5891Sraf 2446*5891Sraf int 2447*5891Sraf cancel_safe_mutex_trylock(mutex_t *mp) 2448*5891Sraf { 2449*5891Sraf int error; 2450*5891Sraf 2451*5891Sraf if ((error = _private_mutex_trylock(mp)) == 0) 2452*5891Sraf curthread->ul_libc_locks++; 2453*5891Sraf return (error); 2454*5891Sraf } 2455*5891Sraf 2456*5891Sraf void 2457*5891Sraf cancel_safe_mutex_unlock(mutex_t *mp) 2458*5891Sraf { 2459*5891Sraf ulwp_t *self = curthread; 2460*5891Sraf 2461*5891Sraf ASSERT(self->ul_libc_locks != 0); 2462*5891Sraf 2463*5891Sraf (void) _private_mutex_unlock(mp); 2464*5891Sraf 2465*5891Sraf /* 2466*5891Sraf * Decrement the count of locks held by cancel_safe_mutex_lock(). 2467*5891Sraf * If we are then in a position to terminate cleanly and 2468*5891Sraf * if there is a pending cancellation and cancellation 2469*5891Sraf * is not disabled and we received EINTR from a recent 2470*5891Sraf * system call then perform the cancellation action now. 2471*5891Sraf */ 2472*5891Sraf if (--self->ul_libc_locks == 0 && 2473*5891Sraf !(self->ul_vfork | self->ul_nocancel | 2474*5891Sraf self->ul_critical | self->ul_sigdefer) && 2475*5891Sraf cancel_active()) 2476*5891Sraf _pthread_exit(PTHREAD_CANCELED); 2477*5891Sraf } 2478*5891Sraf 24790Sstevel@tonic-gate static int 24800Sstevel@tonic-gate shared_mutex_held(mutex_t *mparg) 24810Sstevel@tonic-gate { 24820Sstevel@tonic-gate /* 24834574Sraf * The 'volatile' is necessary to make sure the compiler doesn't 24844574Sraf * reorder the tests of the various components of the mutex. 24854574Sraf * They must be tested in this order: 24864574Sraf * mutex_lockw 24874574Sraf * mutex_owner 24884574Sraf * mutex_ownerpid 24894574Sraf * This relies on the fact that everywhere mutex_lockw is cleared, 24904574Sraf * mutex_owner and mutex_ownerpid are cleared before mutex_lockw 24914574Sraf * is cleared, and that everywhere mutex_lockw is set, mutex_owner 24924574Sraf * and mutex_ownerpid are set after mutex_lockw is set, and that 24934574Sraf * mutex_lockw is set or cleared with a memory barrier. 24940Sstevel@tonic-gate */ 24950Sstevel@tonic-gate volatile mutex_t *mp = (volatile mutex_t *)mparg; 24960Sstevel@tonic-gate ulwp_t *self = curthread; 24970Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 24980Sstevel@tonic-gate 24994574Sraf return (MUTEX_OWNED(mp, self) && mp->mutex_ownerpid == udp->pid); 25000Sstevel@tonic-gate } 25010Sstevel@tonic-gate 25020Sstevel@tonic-gate /* 25030Sstevel@tonic-gate * Some crufty old programs define their own version of _mutex_held() 25040Sstevel@tonic-gate * to be simply return(1). This breaks internal libc logic, so we 25050Sstevel@tonic-gate * define a private version for exclusive use by libc, mutex_is_held(), 25060Sstevel@tonic-gate * and also a new public function, __mutex_held(), to be used in new 25070Sstevel@tonic-gate * code to circumvent these crufty old programs. 25080Sstevel@tonic-gate */ 25090Sstevel@tonic-gate #pragma weak mutex_held = mutex_is_held 25100Sstevel@tonic-gate #pragma weak _mutex_held = mutex_is_held 25110Sstevel@tonic-gate #pragma weak __mutex_held = mutex_is_held 25120Sstevel@tonic-gate int 25134574Sraf mutex_is_held(mutex_t *mparg) 25140Sstevel@tonic-gate { 25154574Sraf volatile mutex_t *mp = (volatile mutex_t *)mparg; 25164574Sraf 25174574Sraf if (mparg->mutex_type & USYNC_PROCESS) 25184574Sraf return (shared_mutex_held(mparg)); 25190Sstevel@tonic-gate return (MUTEX_OWNED(mp, curthread)); 25200Sstevel@tonic-gate } 25210Sstevel@tonic-gate 25220Sstevel@tonic-gate #pragma weak _private_mutex_destroy = __mutex_destroy 25230Sstevel@tonic-gate #pragma weak mutex_destroy = __mutex_destroy 25240Sstevel@tonic-gate #pragma weak _mutex_destroy = __mutex_destroy 25250Sstevel@tonic-gate #pragma weak pthread_mutex_destroy = __mutex_destroy 25260Sstevel@tonic-gate #pragma weak _pthread_mutex_destroy = __mutex_destroy 25270Sstevel@tonic-gate int 25280Sstevel@tonic-gate __mutex_destroy(mutex_t *mp) 25290Sstevel@tonic-gate { 25304574Sraf if (mp->mutex_type & USYNC_PROCESS) 25314574Sraf forget_lock(mp); 25324574Sraf (void) _memset(mp, 0, sizeof (*mp)); 25330Sstevel@tonic-gate tdb_sync_obj_deregister(mp); 25340Sstevel@tonic-gate return (0); 25350Sstevel@tonic-gate } 25360Sstevel@tonic-gate 25374574Sraf #pragma weak mutex_consistent = __mutex_consistent 25384574Sraf #pragma weak _mutex_consistent = __mutex_consistent 25394574Sraf #pragma weak pthread_mutex_consistent_np = __mutex_consistent 25404574Sraf #pragma weak _pthread_mutex_consistent_np = __mutex_consistent 25414574Sraf int 25424574Sraf __mutex_consistent(mutex_t *mp) 25434574Sraf { 25444574Sraf /* 25454574Sraf * Do this only for an inconsistent, initialized robust lock 25464574Sraf * that we hold. For all other cases, return EINVAL. 25474574Sraf */ 25484574Sraf if (mutex_is_held(mp) && 25494574Sraf (mp->mutex_type & LOCK_ROBUST) && 25504574Sraf (mp->mutex_flag & LOCK_INITED) && 25514574Sraf (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED))) { 25524574Sraf mp->mutex_flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 25534574Sraf mp->mutex_rcount = 0; 25544574Sraf return (0); 25554574Sraf } 25564574Sraf return (EINVAL); 25574574Sraf } 25584574Sraf 25590Sstevel@tonic-gate /* 25600Sstevel@tonic-gate * Spin locks are separate from ordinary mutexes, 25610Sstevel@tonic-gate * but we use the same data structure for them. 25620Sstevel@tonic-gate */ 25630Sstevel@tonic-gate 25640Sstevel@tonic-gate #pragma weak pthread_spin_init = _pthread_spin_init 25650Sstevel@tonic-gate int 25660Sstevel@tonic-gate _pthread_spin_init(pthread_spinlock_t *lock, int pshared) 25670Sstevel@tonic-gate { 25680Sstevel@tonic-gate mutex_t *mp = (mutex_t *)lock; 25690Sstevel@tonic-gate 25700Sstevel@tonic-gate (void) _memset(mp, 0, sizeof (*mp)); 25710Sstevel@tonic-gate if (pshared == PTHREAD_PROCESS_SHARED) 25720Sstevel@tonic-gate mp->mutex_type = USYNC_PROCESS; 25730Sstevel@tonic-gate else 25740Sstevel@tonic-gate mp->mutex_type = USYNC_THREAD; 25750Sstevel@tonic-gate mp->mutex_flag = LOCK_INITED; 25760Sstevel@tonic-gate mp->mutex_magic = MUTEX_MAGIC; 25770Sstevel@tonic-gate return (0); 25780Sstevel@tonic-gate } 25790Sstevel@tonic-gate 25800Sstevel@tonic-gate #pragma weak pthread_spin_destroy = _pthread_spin_destroy 25810Sstevel@tonic-gate int 25820Sstevel@tonic-gate _pthread_spin_destroy(pthread_spinlock_t *lock) 25830Sstevel@tonic-gate { 25840Sstevel@tonic-gate (void) _memset(lock, 0, sizeof (*lock)); 25850Sstevel@tonic-gate return (0); 25860Sstevel@tonic-gate } 25870Sstevel@tonic-gate 25880Sstevel@tonic-gate #pragma weak pthread_spin_trylock = _pthread_spin_trylock 25890Sstevel@tonic-gate int 25900Sstevel@tonic-gate _pthread_spin_trylock(pthread_spinlock_t *lock) 25910Sstevel@tonic-gate { 25920Sstevel@tonic-gate mutex_t *mp = (mutex_t *)lock; 25930Sstevel@tonic-gate ulwp_t *self = curthread; 25940Sstevel@tonic-gate int error = 0; 25950Sstevel@tonic-gate 25960Sstevel@tonic-gate no_preempt(self); 25970Sstevel@tonic-gate if (set_lock_byte(&mp->mutex_lockw) != 0) 25980Sstevel@tonic-gate error = EBUSY; 25990Sstevel@tonic-gate else { 26000Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 26010Sstevel@tonic-gate if (mp->mutex_type == USYNC_PROCESS) 26020Sstevel@tonic-gate mp->mutex_ownerpid = self->ul_uberdata->pid; 26030Sstevel@tonic-gate DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 26040Sstevel@tonic-gate } 26050Sstevel@tonic-gate preempt(self); 26060Sstevel@tonic-gate return (error); 26070Sstevel@tonic-gate } 26080Sstevel@tonic-gate 26090Sstevel@tonic-gate #pragma weak pthread_spin_lock = _pthread_spin_lock 26100Sstevel@tonic-gate int 26110Sstevel@tonic-gate _pthread_spin_lock(pthread_spinlock_t *lock) 26120Sstevel@tonic-gate { 26134574Sraf mutex_t *mp = (mutex_t *)lock; 26144574Sraf ulwp_t *self = curthread; 26154574Sraf volatile uint8_t *lockp = (volatile uint8_t *)&mp->mutex_lockw; 26164574Sraf int count = 0; 26174574Sraf 26184574Sraf ASSERT(!self->ul_critical || self->ul_bindflags); 26194574Sraf 26204574Sraf DTRACE_PROBE1(plockstat, mutex__spin, mp); 26214574Sraf 26220Sstevel@tonic-gate /* 26230Sstevel@tonic-gate * We don't care whether the owner is running on a processor. 26240Sstevel@tonic-gate * We just spin because that's what this interface requires. 26250Sstevel@tonic-gate */ 26260Sstevel@tonic-gate for (;;) { 26270Sstevel@tonic-gate if (*lockp == 0) { /* lock byte appears to be clear */ 26284574Sraf no_preempt(self); 26294574Sraf if (set_lock_byte(lockp) == 0) 26304574Sraf break; 26314574Sraf preempt(self); 26320Sstevel@tonic-gate } 26335629Sraf if (count < INT_MAX) 26345629Sraf count++; 26350Sstevel@tonic-gate SMT_PAUSE(); 26360Sstevel@tonic-gate } 26374574Sraf mp->mutex_owner = (uintptr_t)self; 26384574Sraf if (mp->mutex_type == USYNC_PROCESS) 26394574Sraf mp->mutex_ownerpid = self->ul_uberdata->pid; 26404574Sraf preempt(self); 26415629Sraf if (count) { 26425629Sraf DTRACE_PROBE2(plockstat, mutex__spun, 1, count); 26435629Sraf } 26444574Sraf DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count); 26454574Sraf return (0); 26460Sstevel@tonic-gate } 26470Sstevel@tonic-gate 26480Sstevel@tonic-gate #pragma weak pthread_spin_unlock = _pthread_spin_unlock 26490Sstevel@tonic-gate int 26500Sstevel@tonic-gate _pthread_spin_unlock(pthread_spinlock_t *lock) 26510Sstevel@tonic-gate { 26520Sstevel@tonic-gate mutex_t *mp = (mutex_t *)lock; 26530Sstevel@tonic-gate ulwp_t *self = curthread; 26540Sstevel@tonic-gate 26550Sstevel@tonic-gate no_preempt(self); 26560Sstevel@tonic-gate mp->mutex_owner = 0; 26570Sstevel@tonic-gate mp->mutex_ownerpid = 0; 26580Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 26594570Sraf (void) atomic_swap_32(&mp->mutex_lockword, 0); 26600Sstevel@tonic-gate preempt(self); 26610Sstevel@tonic-gate return (0); 26620Sstevel@tonic-gate } 26630Sstevel@tonic-gate 26645629Sraf #define INITIAL_LOCKS 8 /* initial size of ul_heldlocks.array */ 26654574Sraf 26664574Sraf /* 26674574Sraf * Find/allocate an entry for 'lock' in our array of held locks. 26684574Sraf */ 26694574Sraf static mutex_t ** 26704574Sraf find_lock_entry(mutex_t *lock) 26714574Sraf { 26724574Sraf ulwp_t *self = curthread; 26734574Sraf mutex_t **remembered = NULL; 26744574Sraf mutex_t **lockptr; 26754574Sraf uint_t nlocks; 26764574Sraf 26774574Sraf if ((nlocks = self->ul_heldlockcnt) != 0) 26784574Sraf lockptr = self->ul_heldlocks.array; 26794574Sraf else { 26804574Sraf nlocks = 1; 26814574Sraf lockptr = &self->ul_heldlocks.single; 26824574Sraf } 26834574Sraf 26844574Sraf for (; nlocks; nlocks--, lockptr++) { 26854574Sraf if (*lockptr == lock) 26864574Sraf return (lockptr); 26874574Sraf if (*lockptr == NULL && remembered == NULL) 26884574Sraf remembered = lockptr; 26894574Sraf } 26904574Sraf if (remembered != NULL) { 26914574Sraf *remembered = lock; 26924574Sraf return (remembered); 26934574Sraf } 26944574Sraf 26954574Sraf /* 26964574Sraf * No entry available. Allocate more space, converting 26974574Sraf * the single entry into an array of entries if necessary. 26984574Sraf */ 26994574Sraf if ((nlocks = self->ul_heldlockcnt) == 0) { 27004574Sraf /* 27014574Sraf * Initial allocation of the array. 27024574Sraf * Convert the single entry into an array. 27034574Sraf */ 27044574Sraf self->ul_heldlockcnt = nlocks = INITIAL_LOCKS; 27054574Sraf lockptr = lmalloc(nlocks * sizeof (mutex_t *)); 27064574Sraf /* 27074574Sraf * The single entry becomes the first entry in the array. 27084574Sraf */ 27094574Sraf *lockptr = self->ul_heldlocks.single; 27104574Sraf self->ul_heldlocks.array = lockptr; 27114574Sraf /* 27124574Sraf * Return the next available entry in the array. 27134574Sraf */ 27144574Sraf *++lockptr = lock; 27154574Sraf return (lockptr); 27164574Sraf } 27174574Sraf /* 27184574Sraf * Reallocate the array, double the size each time. 27194574Sraf */ 27204574Sraf lockptr = lmalloc(nlocks * 2 * sizeof (mutex_t *)); 27214574Sraf (void) _memcpy(lockptr, self->ul_heldlocks.array, 27224574Sraf nlocks * sizeof (mutex_t *)); 27234574Sraf lfree(self->ul_heldlocks.array, nlocks * sizeof (mutex_t *)); 27244574Sraf self->ul_heldlocks.array = lockptr; 27254574Sraf self->ul_heldlockcnt *= 2; 27264574Sraf /* 27274574Sraf * Return the next available entry in the newly allocated array. 27284574Sraf */ 27294574Sraf *(lockptr += nlocks) = lock; 27304574Sraf return (lockptr); 27314574Sraf } 27324574Sraf 27334574Sraf /* 27344574Sraf * Insert 'lock' into our list of held locks. 27354574Sraf * Currently only used for LOCK_ROBUST mutexes. 27364574Sraf */ 27374574Sraf void 27384574Sraf remember_lock(mutex_t *lock) 27394574Sraf { 27404574Sraf (void) find_lock_entry(lock); 27414574Sraf } 27424574Sraf 27434574Sraf /* 27444574Sraf * Remove 'lock' from our list of held locks. 27454574Sraf * Currently only used for LOCK_ROBUST mutexes. 27464574Sraf */ 27474574Sraf void 27484574Sraf forget_lock(mutex_t *lock) 27494574Sraf { 27504574Sraf *find_lock_entry(lock) = NULL; 27514574Sraf } 27524574Sraf 27534574Sraf /* 27544574Sraf * Free the array of held locks. 27554574Sraf */ 27564574Sraf void 27574574Sraf heldlock_free(ulwp_t *ulwp) 27584574Sraf { 27594574Sraf uint_t nlocks; 27604574Sraf 27614574Sraf if ((nlocks = ulwp->ul_heldlockcnt) != 0) 27624574Sraf lfree(ulwp->ul_heldlocks.array, nlocks * sizeof (mutex_t *)); 27634574Sraf ulwp->ul_heldlockcnt = 0; 27644574Sraf ulwp->ul_heldlocks.array = NULL; 27654574Sraf } 27664574Sraf 27674574Sraf /* 27684574Sraf * Mark all held LOCK_ROBUST mutexes LOCK_OWNERDEAD. 27694574Sraf * Called from _thrp_exit() to deal with abandoned locks. 27704574Sraf */ 27714574Sraf void 27724574Sraf heldlock_exit(void) 27734574Sraf { 27744574Sraf ulwp_t *self = curthread; 27754574Sraf mutex_t **lockptr; 27764574Sraf uint_t nlocks; 27774574Sraf mutex_t *mp; 27784574Sraf 27794574Sraf if ((nlocks = self->ul_heldlockcnt) != 0) 27804574Sraf lockptr = self->ul_heldlocks.array; 27814574Sraf else { 27824574Sraf nlocks = 1; 27834574Sraf lockptr = &self->ul_heldlocks.single; 27844574Sraf } 27854574Sraf 27864574Sraf for (; nlocks; nlocks--, lockptr++) { 27874574Sraf /* 27884574Sraf * The kernel takes care of transitioning held 27894574Sraf * LOCK_PRIO_INHERIT mutexes to LOCK_OWNERDEAD. 27904574Sraf * We avoid that case here. 27914574Sraf */ 27924574Sraf if ((mp = *lockptr) != NULL && 27934574Sraf mutex_is_held(mp) && 27944574Sraf (mp->mutex_type & (LOCK_ROBUST | LOCK_PRIO_INHERIT)) == 27954574Sraf LOCK_ROBUST) { 27964574Sraf mp->mutex_rcount = 0; 27974574Sraf if (!(mp->mutex_flag & LOCK_UNMAPPED)) 27984574Sraf mp->mutex_flag |= LOCK_OWNERDEAD; 27994574Sraf (void) mutex_unlock_internal(mp, 1); 28004574Sraf } 28014574Sraf } 28024574Sraf 28034574Sraf heldlock_free(self); 28044574Sraf } 28054574Sraf 28060Sstevel@tonic-gate #pragma weak cond_init = _cond_init 28070Sstevel@tonic-gate /* ARGSUSED2 */ 28080Sstevel@tonic-gate int 28090Sstevel@tonic-gate _cond_init(cond_t *cvp, int type, void *arg) 28100Sstevel@tonic-gate { 28110Sstevel@tonic-gate if (type != USYNC_THREAD && type != USYNC_PROCESS) 28120Sstevel@tonic-gate return (EINVAL); 28130Sstevel@tonic-gate (void) _memset(cvp, 0, sizeof (*cvp)); 28140Sstevel@tonic-gate cvp->cond_type = (uint16_t)type; 28150Sstevel@tonic-gate cvp->cond_magic = COND_MAGIC; 28160Sstevel@tonic-gate return (0); 28170Sstevel@tonic-gate } 28180Sstevel@tonic-gate 28190Sstevel@tonic-gate /* 28200Sstevel@tonic-gate * cond_sleep_queue(): utility function for cond_wait_queue(). 28210Sstevel@tonic-gate * 28220Sstevel@tonic-gate * Go to sleep on a condvar sleep queue, expect to be waked up 28230Sstevel@tonic-gate * by someone calling cond_signal() or cond_broadcast() or due 28240Sstevel@tonic-gate * to receiving a UNIX signal or being cancelled, or just simply 28250Sstevel@tonic-gate * due to a spurious wakeup (like someome calling forkall()). 28260Sstevel@tonic-gate * 28270Sstevel@tonic-gate * The associated mutex is *not* reacquired before returning. 28280Sstevel@tonic-gate * That must be done by the caller of cond_sleep_queue(). 28290Sstevel@tonic-gate */ 28304574Sraf static int 28310Sstevel@tonic-gate cond_sleep_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 28320Sstevel@tonic-gate { 28330Sstevel@tonic-gate ulwp_t *self = curthread; 28340Sstevel@tonic-gate queue_head_t *qp; 28350Sstevel@tonic-gate queue_head_t *mqp; 28360Sstevel@tonic-gate lwpid_t lwpid; 28370Sstevel@tonic-gate int signalled; 28380Sstevel@tonic-gate int error; 28394574Sraf int release_all; 28400Sstevel@tonic-gate 28410Sstevel@tonic-gate /* 28420Sstevel@tonic-gate * Put ourself on the CV sleep queue, unlock the mutex, then 28430Sstevel@tonic-gate * park ourself and unpark a candidate lwp to grab the mutex. 28440Sstevel@tonic-gate * We must go onto the CV sleep queue before dropping the 28450Sstevel@tonic-gate * mutex in order to guarantee atomicity of the operation. 28460Sstevel@tonic-gate */ 28470Sstevel@tonic-gate self->ul_sp = stkptr(); 28480Sstevel@tonic-gate qp = queue_lock(cvp, CV); 28490Sstevel@tonic-gate enqueue(qp, self, cvp, CV); 28500Sstevel@tonic-gate cvp->cond_waiters_user = 1; 28510Sstevel@tonic-gate self->ul_cvmutex = mp; 28520Sstevel@tonic-gate self->ul_cv_wake = (tsp != NULL); 28530Sstevel@tonic-gate self->ul_signalled = 0; 28544574Sraf if (mp->mutex_flag & LOCK_OWNERDEAD) { 28554574Sraf mp->mutex_flag &= ~LOCK_OWNERDEAD; 28564574Sraf mp->mutex_flag |= LOCK_NOTRECOVERABLE; 28574574Sraf } 28584574Sraf release_all = ((mp->mutex_flag & LOCK_NOTRECOVERABLE) != 0); 28594574Sraf lwpid = mutex_unlock_queue(mp, release_all); 28600Sstevel@tonic-gate for (;;) { 28610Sstevel@tonic-gate set_parking_flag(self, 1); 28620Sstevel@tonic-gate queue_unlock(qp); 28630Sstevel@tonic-gate if (lwpid != 0) { 28640Sstevel@tonic-gate lwpid = preempt_unpark(self, lwpid); 28650Sstevel@tonic-gate preempt(self); 28660Sstevel@tonic-gate } 28670Sstevel@tonic-gate /* 28680Sstevel@tonic-gate * We may have a deferred signal present, 28690Sstevel@tonic-gate * in which case we should return EINTR. 28700Sstevel@tonic-gate * Also, we may have received a SIGCANCEL; if so 28710Sstevel@tonic-gate * and we are cancelable we should return EINTR. 28720Sstevel@tonic-gate * We force an immediate EINTR return from 28730Sstevel@tonic-gate * __lwp_park() by turning our parking flag off. 28740Sstevel@tonic-gate */ 28750Sstevel@tonic-gate if (self->ul_cursig != 0 || 28760Sstevel@tonic-gate (self->ul_cancelable && self->ul_cancel_pending)) 28770Sstevel@tonic-gate set_parking_flag(self, 0); 28780Sstevel@tonic-gate /* 28790Sstevel@tonic-gate * __lwp_park() will return the residual time in tsp 28800Sstevel@tonic-gate * if we are unparked before the timeout expires. 28810Sstevel@tonic-gate */ 28820Sstevel@tonic-gate error = __lwp_park(tsp, lwpid); 28830Sstevel@tonic-gate set_parking_flag(self, 0); 28840Sstevel@tonic-gate lwpid = 0; /* unpark the other lwp only once */ 28850Sstevel@tonic-gate /* 28860Sstevel@tonic-gate * We were waked up by cond_signal(), cond_broadcast(), 28870Sstevel@tonic-gate * by an interrupt or timeout (EINTR or ETIME), 28880Sstevel@tonic-gate * or we may just have gotten a spurious wakeup. 28890Sstevel@tonic-gate */ 28900Sstevel@tonic-gate qp = queue_lock(cvp, CV); 28910Sstevel@tonic-gate mqp = queue_lock(mp, MX); 28920Sstevel@tonic-gate if (self->ul_sleepq == NULL) 28930Sstevel@tonic-gate break; 28940Sstevel@tonic-gate /* 28950Sstevel@tonic-gate * We are on either the condvar sleep queue or the 28961893Sraf * mutex sleep queue. Break out of the sleep if we 28971893Sraf * were interrupted or we timed out (EINTR or ETIME). 28980Sstevel@tonic-gate * Else this is a spurious wakeup; continue the loop. 28990Sstevel@tonic-gate */ 29001893Sraf if (self->ul_sleepq == mqp) { /* mutex queue */ 29011893Sraf if (error) { 29021893Sraf mp->mutex_waiters = dequeue_self(mqp, mp); 29031893Sraf break; 29041893Sraf } 29051893Sraf tsp = NULL; /* no more timeout */ 29061893Sraf } else if (self->ul_sleepq == qp) { /* condvar queue */ 29070Sstevel@tonic-gate if (error) { 29080Sstevel@tonic-gate cvp->cond_waiters_user = dequeue_self(qp, cvp); 29090Sstevel@tonic-gate break; 29100Sstevel@tonic-gate } 29110Sstevel@tonic-gate /* 29120Sstevel@tonic-gate * Else a spurious wakeup on the condvar queue. 29130Sstevel@tonic-gate * __lwp_park() has already adjusted the timeout. 29140Sstevel@tonic-gate */ 29150Sstevel@tonic-gate } else { 29160Sstevel@tonic-gate thr_panic("cond_sleep_queue(): thread not on queue"); 29170Sstevel@tonic-gate } 29180Sstevel@tonic-gate queue_unlock(mqp); 29190Sstevel@tonic-gate } 29200Sstevel@tonic-gate 29210Sstevel@tonic-gate self->ul_sp = 0; 29220Sstevel@tonic-gate ASSERT(self->ul_cvmutex == NULL && self->ul_cv_wake == 0); 29230Sstevel@tonic-gate ASSERT(self->ul_sleepq == NULL && self->ul_link == NULL && 29240Sstevel@tonic-gate self->ul_wchan == NULL); 29250Sstevel@tonic-gate 29260Sstevel@tonic-gate signalled = self->ul_signalled; 29270Sstevel@tonic-gate self->ul_signalled = 0; 29280Sstevel@tonic-gate queue_unlock(qp); 29290Sstevel@tonic-gate queue_unlock(mqp); 29300Sstevel@tonic-gate 29310Sstevel@tonic-gate /* 29320Sstevel@tonic-gate * If we were concurrently cond_signal()d and any of: 29330Sstevel@tonic-gate * received a UNIX signal, were cancelled, or got a timeout, 29340Sstevel@tonic-gate * then perform another cond_signal() to avoid consuming it. 29350Sstevel@tonic-gate */ 29360Sstevel@tonic-gate if (error && signalled) 29370Sstevel@tonic-gate (void) cond_signal_internal(cvp); 29380Sstevel@tonic-gate 29390Sstevel@tonic-gate return (error); 29400Sstevel@tonic-gate } 29410Sstevel@tonic-gate 29420Sstevel@tonic-gate int 29435629Sraf cond_wait_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 29440Sstevel@tonic-gate { 29450Sstevel@tonic-gate ulwp_t *self = curthread; 29460Sstevel@tonic-gate int error; 29474574Sraf int merror; 29480Sstevel@tonic-gate 29490Sstevel@tonic-gate /* 29500Sstevel@tonic-gate * The old thread library was programmed to defer signals 29510Sstevel@tonic-gate * while in cond_wait() so that the associated mutex would 29520Sstevel@tonic-gate * be guaranteed to be held when the application signal 29530Sstevel@tonic-gate * handler was invoked. 29540Sstevel@tonic-gate * 29550Sstevel@tonic-gate * We do not behave this way by default; the state of the 29560Sstevel@tonic-gate * associated mutex in the signal handler is undefined. 29570Sstevel@tonic-gate * 29580Sstevel@tonic-gate * To accommodate applications that depend on the old 29590Sstevel@tonic-gate * behavior, the _THREAD_COND_WAIT_DEFER environment 29600Sstevel@tonic-gate * variable can be set to 1 and we will behave in the 29610Sstevel@tonic-gate * old way with respect to cond_wait(). 29620Sstevel@tonic-gate */ 29630Sstevel@tonic-gate if (self->ul_cond_wait_defer) 29640Sstevel@tonic-gate sigoff(self); 29650Sstevel@tonic-gate 29660Sstevel@tonic-gate error = cond_sleep_queue(cvp, mp, tsp); 29670Sstevel@tonic-gate 29680Sstevel@tonic-gate /* 29690Sstevel@tonic-gate * Reacquire the mutex. 29700Sstevel@tonic-gate */ 29715629Sraf if ((merror = mutex_lock_impl(mp, NULL)) != 0) 29724574Sraf error = merror; 29730Sstevel@tonic-gate 29740Sstevel@tonic-gate /* 29750Sstevel@tonic-gate * Take any deferred signal now, after we have reacquired the mutex. 29760Sstevel@tonic-gate */ 29770Sstevel@tonic-gate if (self->ul_cond_wait_defer) 29780Sstevel@tonic-gate sigon(self); 29790Sstevel@tonic-gate 29800Sstevel@tonic-gate return (error); 29810Sstevel@tonic-gate } 29820Sstevel@tonic-gate 29830Sstevel@tonic-gate /* 29840Sstevel@tonic-gate * cond_sleep_kernel(): utility function for cond_wait_kernel(). 29850Sstevel@tonic-gate * See the comment ahead of cond_sleep_queue(), above. 29860Sstevel@tonic-gate */ 29874574Sraf static int 29880Sstevel@tonic-gate cond_sleep_kernel(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 29890Sstevel@tonic-gate { 29900Sstevel@tonic-gate int mtype = mp->mutex_type; 29910Sstevel@tonic-gate ulwp_t *self = curthread; 29920Sstevel@tonic-gate int error; 29930Sstevel@tonic-gate 29944574Sraf if ((mtype & LOCK_PRIO_PROTECT) && _ceil_mylist_del(mp)) 29954574Sraf _ceil_prio_waive(); 29960Sstevel@tonic-gate 29970Sstevel@tonic-gate self->ul_sp = stkptr(); 29980Sstevel@tonic-gate self->ul_wchan = cvp; 29990Sstevel@tonic-gate mp->mutex_owner = 0; 30000Sstevel@tonic-gate mp->mutex_ownerpid = 0; 30014574Sraf if (mtype & LOCK_PRIO_INHERIT) 30020Sstevel@tonic-gate mp->mutex_lockw = LOCKCLEAR; 30030Sstevel@tonic-gate /* 30040Sstevel@tonic-gate * ___lwp_cond_wait() returns immediately with EINTR if 30050Sstevel@tonic-gate * set_parking_flag(self,0) is called on this lwp before it 30060Sstevel@tonic-gate * goes to sleep in the kernel. sigacthandler() calls this 30070Sstevel@tonic-gate * when a deferred signal is noted. This assures that we don't 30080Sstevel@tonic-gate * get stuck in ___lwp_cond_wait() with all signals blocked 30090Sstevel@tonic-gate * due to taking a deferred signal before going to sleep. 30100Sstevel@tonic-gate */ 30110Sstevel@tonic-gate set_parking_flag(self, 1); 30120Sstevel@tonic-gate if (self->ul_cursig != 0 || 30130Sstevel@tonic-gate (self->ul_cancelable && self->ul_cancel_pending)) 30140Sstevel@tonic-gate set_parking_flag(self, 0); 30150Sstevel@tonic-gate error = ___lwp_cond_wait(cvp, mp, tsp, 1); 30160Sstevel@tonic-gate set_parking_flag(self, 0); 30170Sstevel@tonic-gate self->ul_sp = 0; 30180Sstevel@tonic-gate self->ul_wchan = NULL; 30190Sstevel@tonic-gate return (error); 30200Sstevel@tonic-gate } 30210Sstevel@tonic-gate 30220Sstevel@tonic-gate int 30230Sstevel@tonic-gate cond_wait_kernel(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 30240Sstevel@tonic-gate { 30250Sstevel@tonic-gate ulwp_t *self = curthread; 30260Sstevel@tonic-gate int error; 30270Sstevel@tonic-gate int merror; 30280Sstevel@tonic-gate 30290Sstevel@tonic-gate /* 30300Sstevel@tonic-gate * See the large comment in cond_wait_queue(), above. 30310Sstevel@tonic-gate */ 30320Sstevel@tonic-gate if (self->ul_cond_wait_defer) 30330Sstevel@tonic-gate sigoff(self); 30340Sstevel@tonic-gate 30350Sstevel@tonic-gate error = cond_sleep_kernel(cvp, mp, tsp); 30360Sstevel@tonic-gate 30370Sstevel@tonic-gate /* 30380Sstevel@tonic-gate * Override the return code from ___lwp_cond_wait() 30390Sstevel@tonic-gate * with any non-zero return code from mutex_lock(). 30400Sstevel@tonic-gate * This addresses robust lock failures in particular; 30410Sstevel@tonic-gate * the caller must see the EOWNERDEAD or ENOTRECOVERABLE 30420Sstevel@tonic-gate * errors in order to take corrective action. 30430Sstevel@tonic-gate */ 30445629Sraf if ((merror = mutex_lock_impl(mp, NULL)) != 0) 30450Sstevel@tonic-gate error = merror; 30460Sstevel@tonic-gate 30470Sstevel@tonic-gate /* 30480Sstevel@tonic-gate * Take any deferred signal now, after we have reacquired the mutex. 30490Sstevel@tonic-gate */ 30500Sstevel@tonic-gate if (self->ul_cond_wait_defer) 30510Sstevel@tonic-gate sigon(self); 30520Sstevel@tonic-gate 30530Sstevel@tonic-gate return (error); 30540Sstevel@tonic-gate } 30550Sstevel@tonic-gate 30560Sstevel@tonic-gate /* 30570Sstevel@tonic-gate * Common code for _cond_wait() and _cond_timedwait() 30580Sstevel@tonic-gate */ 30590Sstevel@tonic-gate int 30600Sstevel@tonic-gate cond_wait_common(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 30610Sstevel@tonic-gate { 30620Sstevel@tonic-gate int mtype = mp->mutex_type; 30630Sstevel@tonic-gate hrtime_t begin_sleep = 0; 30640Sstevel@tonic-gate ulwp_t *self = curthread; 30650Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 30660Sstevel@tonic-gate tdb_cond_stats_t *csp = COND_STATS(cvp, udp); 30670Sstevel@tonic-gate tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 30680Sstevel@tonic-gate uint8_t rcount; 30690Sstevel@tonic-gate int error = 0; 30700Sstevel@tonic-gate 30710Sstevel@tonic-gate /* 30720Sstevel@tonic-gate * The SUSV3 Posix spec for pthread_cond_timedwait() states: 30730Sstevel@tonic-gate * Except in the case of [ETIMEDOUT], all these error checks 30740Sstevel@tonic-gate * shall act as if they were performed immediately at the 30750Sstevel@tonic-gate * beginning of processing for the function and shall cause 30760Sstevel@tonic-gate * an error return, in effect, prior to modifying the state 30770Sstevel@tonic-gate * of the mutex specified by mutex or the condition variable 30780Sstevel@tonic-gate * specified by cond. 30790Sstevel@tonic-gate * Therefore, we must return EINVAL now if the timout is invalid. 30800Sstevel@tonic-gate */ 30810Sstevel@tonic-gate if (tsp != NULL && 30820Sstevel@tonic-gate (tsp->tv_sec < 0 || (ulong_t)tsp->tv_nsec >= NANOSEC)) 30830Sstevel@tonic-gate return (EINVAL); 30840Sstevel@tonic-gate 30850Sstevel@tonic-gate if (__td_event_report(self, TD_SLEEP, udp)) { 30860Sstevel@tonic-gate self->ul_sp = stkptr(); 30870Sstevel@tonic-gate self->ul_wchan = cvp; 30880Sstevel@tonic-gate self->ul_td_evbuf.eventnum = TD_SLEEP; 30890Sstevel@tonic-gate self->ul_td_evbuf.eventdata = cvp; 30900Sstevel@tonic-gate tdb_event(TD_SLEEP, udp); 30910Sstevel@tonic-gate self->ul_sp = 0; 30920Sstevel@tonic-gate } 30930Sstevel@tonic-gate if (csp) { 30940Sstevel@tonic-gate if (tsp) 30950Sstevel@tonic-gate tdb_incr(csp->cond_timedwait); 30960Sstevel@tonic-gate else 30970Sstevel@tonic-gate tdb_incr(csp->cond_wait); 30980Sstevel@tonic-gate } 30990Sstevel@tonic-gate if (msp) 31000Sstevel@tonic-gate begin_sleep = record_hold_time(msp); 31010Sstevel@tonic-gate else if (csp) 31020Sstevel@tonic-gate begin_sleep = gethrtime(); 31030Sstevel@tonic-gate 31040Sstevel@tonic-gate if (self->ul_error_detection) { 31050Sstevel@tonic-gate if (!mutex_is_held(mp)) 31060Sstevel@tonic-gate lock_error(mp, "cond_wait", cvp, NULL); 31070Sstevel@tonic-gate if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) 31080Sstevel@tonic-gate lock_error(mp, "recursive mutex in cond_wait", 31095629Sraf cvp, NULL); 31100Sstevel@tonic-gate if (cvp->cond_type & USYNC_PROCESS) { 31114574Sraf if (!(mtype & USYNC_PROCESS)) 31120Sstevel@tonic-gate lock_error(mp, "cond_wait", cvp, 31135629Sraf "condvar process-shared, " 31145629Sraf "mutex process-private"); 31150Sstevel@tonic-gate } else { 31164574Sraf if (mtype & USYNC_PROCESS) 31170Sstevel@tonic-gate lock_error(mp, "cond_wait", cvp, 31185629Sraf "condvar process-private, " 31195629Sraf "mutex process-shared"); 31200Sstevel@tonic-gate } 31210Sstevel@tonic-gate } 31220Sstevel@tonic-gate 31230Sstevel@tonic-gate /* 31240Sstevel@tonic-gate * We deal with recursive mutexes by completely 31250Sstevel@tonic-gate * dropping the lock and restoring the recursion 31260Sstevel@tonic-gate * count after waking up. This is arguably wrong, 31270Sstevel@tonic-gate * but it obeys the principle of least astonishment. 31280Sstevel@tonic-gate */ 31290Sstevel@tonic-gate rcount = mp->mutex_rcount; 31300Sstevel@tonic-gate mp->mutex_rcount = 0; 31314574Sraf if ((mtype & 31324574Sraf (USYNC_PROCESS | LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT)) | 31330Sstevel@tonic-gate (cvp->cond_type & USYNC_PROCESS)) 31340Sstevel@tonic-gate error = cond_wait_kernel(cvp, mp, tsp); 31350Sstevel@tonic-gate else 31365629Sraf error = cond_wait_queue(cvp, mp, tsp); 31370Sstevel@tonic-gate mp->mutex_rcount = rcount; 31380Sstevel@tonic-gate 31390Sstevel@tonic-gate if (csp) { 31400Sstevel@tonic-gate hrtime_t lapse = gethrtime() - begin_sleep; 31410Sstevel@tonic-gate if (tsp == NULL) 31420Sstevel@tonic-gate csp->cond_wait_sleep_time += lapse; 31430Sstevel@tonic-gate else { 31440Sstevel@tonic-gate csp->cond_timedwait_sleep_time += lapse; 31450Sstevel@tonic-gate if (error == ETIME) 31460Sstevel@tonic-gate tdb_incr(csp->cond_timedwait_timeout); 31470Sstevel@tonic-gate } 31480Sstevel@tonic-gate } 31490Sstevel@tonic-gate return (error); 31500Sstevel@tonic-gate } 31510Sstevel@tonic-gate 31520Sstevel@tonic-gate /* 3153*5891Sraf * cond_wait() and _cond_wait() are cancellation points but __cond_wait() 3154*5891Sraf * is not. Internally, libc calls the non-cancellation version. 3155*5891Sraf * Other libraries need to use pthread_setcancelstate(), as appropriate, 3156*5891Sraf * since __cond_wait() is not exported from libc. 31570Sstevel@tonic-gate */ 31580Sstevel@tonic-gate int 3159*5891Sraf __cond_wait(cond_t *cvp, mutex_t *mp) 31600Sstevel@tonic-gate { 31610Sstevel@tonic-gate ulwp_t *self = curthread; 31620Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 31630Sstevel@tonic-gate uberflags_t *gflags; 31640Sstevel@tonic-gate 31650Sstevel@tonic-gate /* 31660Sstevel@tonic-gate * Optimize the common case of USYNC_THREAD plus 31670Sstevel@tonic-gate * no error detection, no lock statistics, and no event tracing. 31680Sstevel@tonic-gate */ 31690Sstevel@tonic-gate if ((gflags = self->ul_schedctl_called) != NULL && 31700Sstevel@tonic-gate (cvp->cond_type | mp->mutex_type | gflags->uf_trs_ted | 31710Sstevel@tonic-gate self->ul_td_events_enable | 31720Sstevel@tonic-gate udp->tdb.tdb_ev_global_mask.event_bits[0]) == 0) 31735629Sraf return (cond_wait_queue(cvp, mp, NULL)); 31740Sstevel@tonic-gate 31750Sstevel@tonic-gate /* 31760Sstevel@tonic-gate * Else do it the long way. 31770Sstevel@tonic-gate */ 31780Sstevel@tonic-gate return (cond_wait_common(cvp, mp, NULL)); 31790Sstevel@tonic-gate } 31800Sstevel@tonic-gate 3181*5891Sraf #pragma weak cond_wait = _cond_wait 31820Sstevel@tonic-gate int 3183*5891Sraf _cond_wait(cond_t *cvp, mutex_t *mp) 31840Sstevel@tonic-gate { 31850Sstevel@tonic-gate int error; 31860Sstevel@tonic-gate 31870Sstevel@tonic-gate _cancelon(); 3188*5891Sraf error = __cond_wait(cvp, mp); 31890Sstevel@tonic-gate if (error == EINTR) 31900Sstevel@tonic-gate _canceloff(); 31910Sstevel@tonic-gate else 31920Sstevel@tonic-gate _canceloff_nocancel(); 31930Sstevel@tonic-gate return (error); 31940Sstevel@tonic-gate } 31950Sstevel@tonic-gate 3196*5891Sraf /* 3197*5891Sraf * pthread_cond_wait() is a cancellation point. 3198*5891Sraf */ 31990Sstevel@tonic-gate #pragma weak pthread_cond_wait = _pthread_cond_wait 32000Sstevel@tonic-gate int 32010Sstevel@tonic-gate _pthread_cond_wait(cond_t *cvp, mutex_t *mp) 32020Sstevel@tonic-gate { 32030Sstevel@tonic-gate int error; 32040Sstevel@tonic-gate 3205*5891Sraf error = _cond_wait(cvp, mp); 32060Sstevel@tonic-gate return ((error == EINTR)? 0 : error); 32070Sstevel@tonic-gate } 32080Sstevel@tonic-gate 32090Sstevel@tonic-gate /* 3210*5891Sraf * cond_timedwait() and _cond_timedwait() are cancellation points 3211*5891Sraf * but __cond_timedwait() is not. 32120Sstevel@tonic-gate */ 32130Sstevel@tonic-gate int 3214*5891Sraf __cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime) 32150Sstevel@tonic-gate { 32160Sstevel@tonic-gate clockid_t clock_id = cvp->cond_clockid; 32170Sstevel@tonic-gate timespec_t reltime; 32180Sstevel@tonic-gate int error; 32190Sstevel@tonic-gate 32200Sstevel@tonic-gate if (clock_id != CLOCK_REALTIME && clock_id != CLOCK_HIGHRES) 32210Sstevel@tonic-gate clock_id = CLOCK_REALTIME; 32220Sstevel@tonic-gate abstime_to_reltime(clock_id, abstime, &reltime); 32230Sstevel@tonic-gate error = cond_wait_common(cvp, mp, &reltime); 32240Sstevel@tonic-gate if (error == ETIME && clock_id == CLOCK_HIGHRES) { 32250Sstevel@tonic-gate /* 32260Sstevel@tonic-gate * Don't return ETIME if we didn't really get a timeout. 32270Sstevel@tonic-gate * This can happen if we return because someone resets 32280Sstevel@tonic-gate * the system clock. Just return zero in this case, 32290Sstevel@tonic-gate * giving a spurious wakeup but not a timeout. 32300Sstevel@tonic-gate */ 32310Sstevel@tonic-gate if ((hrtime_t)(uint32_t)abstime->tv_sec * NANOSEC + 32320Sstevel@tonic-gate abstime->tv_nsec > gethrtime()) 32330Sstevel@tonic-gate error = 0; 32340Sstevel@tonic-gate } 32350Sstevel@tonic-gate return (error); 32360Sstevel@tonic-gate } 32370Sstevel@tonic-gate 3238*5891Sraf #pragma weak cond_timedwait = _cond_timedwait 32390Sstevel@tonic-gate int 3240*5891Sraf _cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime) 32410Sstevel@tonic-gate { 32420Sstevel@tonic-gate int error; 32430Sstevel@tonic-gate 32440Sstevel@tonic-gate _cancelon(); 3245*5891Sraf error = __cond_timedwait(cvp, mp, abstime); 32460Sstevel@tonic-gate if (error == EINTR) 32470Sstevel@tonic-gate _canceloff(); 32480Sstevel@tonic-gate else 32490Sstevel@tonic-gate _canceloff_nocancel(); 32500Sstevel@tonic-gate return (error); 32510Sstevel@tonic-gate } 32520Sstevel@tonic-gate 3253*5891Sraf /* 3254*5891Sraf * pthread_cond_timedwait() is a cancellation point. 3255*5891Sraf */ 32560Sstevel@tonic-gate #pragma weak pthread_cond_timedwait = _pthread_cond_timedwait 32570Sstevel@tonic-gate int 32580Sstevel@tonic-gate _pthread_cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime) 32590Sstevel@tonic-gate { 32600Sstevel@tonic-gate int error; 32610Sstevel@tonic-gate 3262*5891Sraf error = _cond_timedwait(cvp, mp, abstime); 32630Sstevel@tonic-gate if (error == ETIME) 32640Sstevel@tonic-gate error = ETIMEDOUT; 32650Sstevel@tonic-gate else if (error == EINTR) 32660Sstevel@tonic-gate error = 0; 32670Sstevel@tonic-gate return (error); 32680Sstevel@tonic-gate } 32690Sstevel@tonic-gate 32700Sstevel@tonic-gate /* 3271*5891Sraf * cond_reltimedwait() and _cond_reltimedwait() are cancellation points 3272*5891Sraf * but __cond_reltimedwait() is not. 32730Sstevel@tonic-gate */ 32740Sstevel@tonic-gate int 3275*5891Sraf __cond_reltimedwait(cond_t *cvp, mutex_t *mp, const timespec_t *reltime) 32760Sstevel@tonic-gate { 32770Sstevel@tonic-gate timespec_t tslocal = *reltime; 32780Sstevel@tonic-gate 32790Sstevel@tonic-gate return (cond_wait_common(cvp, mp, &tslocal)); 32800Sstevel@tonic-gate } 32810Sstevel@tonic-gate 3282*5891Sraf #pragma weak cond_reltimedwait = _cond_reltimedwait 32830Sstevel@tonic-gate int 3284*5891Sraf _cond_reltimedwait(cond_t *cvp, mutex_t *mp, const timespec_t *reltime) 32850Sstevel@tonic-gate { 32860Sstevel@tonic-gate int error; 32870Sstevel@tonic-gate 32880Sstevel@tonic-gate _cancelon(); 3289*5891Sraf error = __cond_reltimedwait(cvp, mp, reltime); 32900Sstevel@tonic-gate if (error == EINTR) 32910Sstevel@tonic-gate _canceloff(); 32920Sstevel@tonic-gate else 32930Sstevel@tonic-gate _canceloff_nocancel(); 32940Sstevel@tonic-gate return (error); 32950Sstevel@tonic-gate } 32960Sstevel@tonic-gate 32970Sstevel@tonic-gate #pragma weak pthread_cond_reltimedwait_np = _pthread_cond_reltimedwait_np 32980Sstevel@tonic-gate int 32990Sstevel@tonic-gate _pthread_cond_reltimedwait_np(cond_t *cvp, mutex_t *mp, 33000Sstevel@tonic-gate const timespec_t *reltime) 33010Sstevel@tonic-gate { 33020Sstevel@tonic-gate int error; 33030Sstevel@tonic-gate 3304*5891Sraf error = _cond_reltimedwait(cvp, mp, reltime); 33050Sstevel@tonic-gate if (error == ETIME) 33060Sstevel@tonic-gate error = ETIMEDOUT; 33070Sstevel@tonic-gate else if (error == EINTR) 33080Sstevel@tonic-gate error = 0; 33090Sstevel@tonic-gate return (error); 33100Sstevel@tonic-gate } 33110Sstevel@tonic-gate 33120Sstevel@tonic-gate #pragma weak pthread_cond_signal = cond_signal_internal 33130Sstevel@tonic-gate #pragma weak _pthread_cond_signal = cond_signal_internal 33140Sstevel@tonic-gate #pragma weak cond_signal = cond_signal_internal 33150Sstevel@tonic-gate #pragma weak _cond_signal = cond_signal_internal 33160Sstevel@tonic-gate int 33170Sstevel@tonic-gate cond_signal_internal(cond_t *cvp) 33180Sstevel@tonic-gate { 33190Sstevel@tonic-gate ulwp_t *self = curthread; 33200Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 33210Sstevel@tonic-gate tdb_cond_stats_t *csp = COND_STATS(cvp, udp); 33220Sstevel@tonic-gate int error = 0; 33230Sstevel@tonic-gate queue_head_t *qp; 33240Sstevel@tonic-gate mutex_t *mp; 33250Sstevel@tonic-gate queue_head_t *mqp; 33260Sstevel@tonic-gate ulwp_t **ulwpp; 33270Sstevel@tonic-gate ulwp_t *ulwp; 33280Sstevel@tonic-gate ulwp_t *prev = NULL; 33290Sstevel@tonic-gate ulwp_t *next; 33300Sstevel@tonic-gate ulwp_t **suspp = NULL; 33310Sstevel@tonic-gate ulwp_t *susprev; 33320Sstevel@tonic-gate 33330Sstevel@tonic-gate if (csp) 33340Sstevel@tonic-gate tdb_incr(csp->cond_signal); 33350Sstevel@tonic-gate 33360Sstevel@tonic-gate if (cvp->cond_waiters_kernel) /* someone sleeping in the kernel? */ 33370Sstevel@tonic-gate error = __lwp_cond_signal(cvp); 33380Sstevel@tonic-gate 33390Sstevel@tonic-gate if (!cvp->cond_waiters_user) /* no one sleeping at user-level */ 33400Sstevel@tonic-gate return (error); 33410Sstevel@tonic-gate 33420Sstevel@tonic-gate /* 33430Sstevel@tonic-gate * Move someone from the condvar sleep queue to the mutex sleep 33440Sstevel@tonic-gate * queue for the mutex that he will acquire on being waked up. 33450Sstevel@tonic-gate * We can do this only if we own the mutex he will acquire. 33460Sstevel@tonic-gate * If we do not own the mutex, or if his ul_cv_wake flag 33470Sstevel@tonic-gate * is set, just dequeue and unpark him. 33480Sstevel@tonic-gate */ 33490Sstevel@tonic-gate qp = queue_lock(cvp, CV); 33500Sstevel@tonic-gate for (ulwpp = &qp->qh_head; (ulwp = *ulwpp) != NULL; 33510Sstevel@tonic-gate prev = ulwp, ulwpp = &ulwp->ul_link) { 33520Sstevel@tonic-gate if (ulwp->ul_wchan == cvp) { 33530Sstevel@tonic-gate if (!ulwp->ul_stop) 33540Sstevel@tonic-gate break; 33550Sstevel@tonic-gate /* 33560Sstevel@tonic-gate * Try not to dequeue a suspended thread. 33570Sstevel@tonic-gate * This mimics the old libthread's behavior. 33580Sstevel@tonic-gate */ 33590Sstevel@tonic-gate if (suspp == NULL) { 33600Sstevel@tonic-gate suspp = ulwpp; 33610Sstevel@tonic-gate susprev = prev; 33620Sstevel@tonic-gate } 33630Sstevel@tonic-gate } 33640Sstevel@tonic-gate } 33650Sstevel@tonic-gate if (ulwp == NULL && suspp != NULL) { 33660Sstevel@tonic-gate ulwp = *(ulwpp = suspp); 33670Sstevel@tonic-gate prev = susprev; 33680Sstevel@tonic-gate suspp = NULL; 33690Sstevel@tonic-gate } 33700Sstevel@tonic-gate if (ulwp == NULL) { /* no one on the sleep queue */ 33710Sstevel@tonic-gate cvp->cond_waiters_user = 0; 33720Sstevel@tonic-gate queue_unlock(qp); 33730Sstevel@tonic-gate return (error); 33740Sstevel@tonic-gate } 33750Sstevel@tonic-gate /* 33760Sstevel@tonic-gate * Scan the remainder of the CV queue for another waiter. 33770Sstevel@tonic-gate */ 33780Sstevel@tonic-gate if (suspp != NULL) { 33790Sstevel@tonic-gate next = *suspp; 33800Sstevel@tonic-gate } else { 33810Sstevel@tonic-gate for (next = ulwp->ul_link; next != NULL; next = next->ul_link) 33820Sstevel@tonic-gate if (next->ul_wchan == cvp) 33830Sstevel@tonic-gate break; 33840Sstevel@tonic-gate } 33850Sstevel@tonic-gate if (next == NULL) 33860Sstevel@tonic-gate cvp->cond_waiters_user = 0; 33870Sstevel@tonic-gate 33880Sstevel@tonic-gate /* 33890Sstevel@tonic-gate * Inform the thread that he was the recipient of a cond_signal(). 33900Sstevel@tonic-gate * This lets him deal with cond_signal() and, concurrently, 33910Sstevel@tonic-gate * one or more of a cancellation, a UNIX signal, or a timeout. 33920Sstevel@tonic-gate * These latter conditions must not consume a cond_signal(). 33930Sstevel@tonic-gate */ 33940Sstevel@tonic-gate ulwp->ul_signalled = 1; 33950Sstevel@tonic-gate 33960Sstevel@tonic-gate /* 33970Sstevel@tonic-gate * Dequeue the waiter but leave his ul_sleepq non-NULL 33980Sstevel@tonic-gate * while we move him to the mutex queue so that he can 33990Sstevel@tonic-gate * deal properly with spurious wakeups. 34000Sstevel@tonic-gate */ 34010Sstevel@tonic-gate *ulwpp = ulwp->ul_link; 34024574Sraf ulwp->ul_link = NULL; 34030Sstevel@tonic-gate if (qp->qh_tail == ulwp) 34040Sstevel@tonic-gate qp->qh_tail = prev; 34050Sstevel@tonic-gate qp->qh_qlen--; 34060Sstevel@tonic-gate 34070Sstevel@tonic-gate mp = ulwp->ul_cvmutex; /* the mutex he will acquire */ 34080Sstevel@tonic-gate ulwp->ul_cvmutex = NULL; 34090Sstevel@tonic-gate ASSERT(mp != NULL); 34100Sstevel@tonic-gate 34110Sstevel@tonic-gate if (ulwp->ul_cv_wake || !MUTEX_OWNED(mp, self)) { 34120Sstevel@tonic-gate lwpid_t lwpid = ulwp->ul_lwpid; 34130Sstevel@tonic-gate 34140Sstevel@tonic-gate no_preempt(self); 34150Sstevel@tonic-gate ulwp->ul_sleepq = NULL; 34160Sstevel@tonic-gate ulwp->ul_wchan = NULL; 34170Sstevel@tonic-gate ulwp->ul_cv_wake = 0; 34180Sstevel@tonic-gate queue_unlock(qp); 34190Sstevel@tonic-gate (void) __lwp_unpark(lwpid); 34200Sstevel@tonic-gate preempt(self); 34210Sstevel@tonic-gate } else { 34220Sstevel@tonic-gate mqp = queue_lock(mp, MX); 34230Sstevel@tonic-gate enqueue(mqp, ulwp, mp, MX); 34240Sstevel@tonic-gate mp->mutex_waiters = 1; 34250Sstevel@tonic-gate queue_unlock(mqp); 34260Sstevel@tonic-gate queue_unlock(qp); 34270Sstevel@tonic-gate } 34280Sstevel@tonic-gate 34290Sstevel@tonic-gate return (error); 34300Sstevel@tonic-gate } 34310Sstevel@tonic-gate 34324570Sraf /* 34334574Sraf * Utility function called by mutex_wakeup_all(), cond_broadcast(), 34344574Sraf * and rw_queue_release() to (re)allocate a big buffer to hold the 34354574Sraf * lwpids of all the threads to be set running after they are removed 34364574Sraf * from their sleep queues. Since we are holding a queue lock, we 34374574Sraf * cannot call any function that might acquire a lock. mmap(), munmap(), 34384574Sraf * lwp_unpark_all() are simple system calls and are safe in this regard. 34394570Sraf */ 34404570Sraf lwpid_t * 34414570Sraf alloc_lwpids(lwpid_t *lwpid, int *nlwpid_ptr, int *maxlwps_ptr) 34424570Sraf { 34434570Sraf /* 34444570Sraf * Allocate NEWLWPS ids on the first overflow. 34454570Sraf * Double the allocation each time after that. 34464570Sraf */ 34474570Sraf int nlwpid = *nlwpid_ptr; 34484570Sraf int maxlwps = *maxlwps_ptr; 34494570Sraf int first_allocation; 34504570Sraf int newlwps; 34514570Sraf void *vaddr; 34524570Sraf 34534570Sraf ASSERT(nlwpid == maxlwps); 34544570Sraf 34554570Sraf first_allocation = (maxlwps == MAXLWPS); 34564570Sraf newlwps = first_allocation? NEWLWPS : 2 * maxlwps; 34574570Sraf vaddr = _private_mmap(NULL, newlwps * sizeof (lwpid_t), 34584570Sraf PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, (off_t)0); 34594570Sraf 34604570Sraf if (vaddr == MAP_FAILED) { 34614570Sraf /* 34624570Sraf * Let's hope this never happens. 34634570Sraf * If it does, then we have a terrible 34644570Sraf * thundering herd on our hands. 34654570Sraf */ 34664570Sraf (void) __lwp_unpark_all(lwpid, nlwpid); 34674570Sraf *nlwpid_ptr = 0; 34684570Sraf } else { 34694570Sraf (void) _memcpy(vaddr, lwpid, maxlwps * sizeof (lwpid_t)); 34704570Sraf if (!first_allocation) 34714570Sraf (void) _private_munmap(lwpid, 34724570Sraf maxlwps * sizeof (lwpid_t)); 34734570Sraf lwpid = vaddr; 34744570Sraf *maxlwps_ptr = newlwps; 34754570Sraf } 34764570Sraf 34774570Sraf return (lwpid); 34784570Sraf } 34790Sstevel@tonic-gate 34800Sstevel@tonic-gate #pragma weak pthread_cond_broadcast = cond_broadcast_internal 34810Sstevel@tonic-gate #pragma weak _pthread_cond_broadcast = cond_broadcast_internal 34820Sstevel@tonic-gate #pragma weak cond_broadcast = cond_broadcast_internal 34830Sstevel@tonic-gate #pragma weak _cond_broadcast = cond_broadcast_internal 34840Sstevel@tonic-gate int 34850Sstevel@tonic-gate cond_broadcast_internal(cond_t *cvp) 34860Sstevel@tonic-gate { 34870Sstevel@tonic-gate ulwp_t *self = curthread; 34880Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 34890Sstevel@tonic-gate tdb_cond_stats_t *csp = COND_STATS(cvp, udp); 34900Sstevel@tonic-gate int error = 0; 34910Sstevel@tonic-gate queue_head_t *qp; 34920Sstevel@tonic-gate mutex_t *mp; 34930Sstevel@tonic-gate mutex_t *mp_cache = NULL; 34944570Sraf queue_head_t *mqp = NULL; 34950Sstevel@tonic-gate ulwp_t **ulwpp; 34960Sstevel@tonic-gate ulwp_t *ulwp; 34970Sstevel@tonic-gate ulwp_t *prev = NULL; 34984570Sraf int nlwpid = 0; 34994570Sraf int maxlwps = MAXLWPS; 35000Sstevel@tonic-gate lwpid_t buffer[MAXLWPS]; 35010Sstevel@tonic-gate lwpid_t *lwpid = buffer; 35020Sstevel@tonic-gate 35030Sstevel@tonic-gate if (csp) 35040Sstevel@tonic-gate tdb_incr(csp->cond_broadcast); 35050Sstevel@tonic-gate 35060Sstevel@tonic-gate if (cvp->cond_waiters_kernel) /* someone sleeping in the kernel? */ 35070Sstevel@tonic-gate error = __lwp_cond_broadcast(cvp); 35080Sstevel@tonic-gate 35090Sstevel@tonic-gate if (!cvp->cond_waiters_user) /* no one sleeping at user-level */ 35100Sstevel@tonic-gate return (error); 35110Sstevel@tonic-gate 35120Sstevel@tonic-gate /* 35130Sstevel@tonic-gate * Move everyone from the condvar sleep queue to the mutex sleep 35140Sstevel@tonic-gate * queue for the mutex that they will acquire on being waked up. 35150Sstevel@tonic-gate * We can do this only if we own the mutex they will acquire. 35160Sstevel@tonic-gate * If we do not own the mutex, or if their ul_cv_wake flag 35170Sstevel@tonic-gate * is set, just dequeue and unpark them. 35180Sstevel@tonic-gate * 35190Sstevel@tonic-gate * We keep track of lwpids that are to be unparked in lwpid[]. 35200Sstevel@tonic-gate * __lwp_unpark_all() is called to unpark all of them after 35210Sstevel@tonic-gate * they have been removed from the sleep queue and the sleep 35220Sstevel@tonic-gate * queue lock has been dropped. If we run out of space in our 35230Sstevel@tonic-gate * on-stack buffer, we need to allocate more but we can't call 35240Sstevel@tonic-gate * lmalloc() because we are holding a queue lock when the overflow 35250Sstevel@tonic-gate * occurs and lmalloc() acquires a lock. We can't use alloca() 35264570Sraf * either because the application may have allocated a small 35274570Sraf * stack and we don't want to overrun the stack. So we call 35284570Sraf * alloc_lwpids() to allocate a bigger buffer using the mmap() 35290Sstevel@tonic-gate * system call directly since that path acquires no locks. 35300Sstevel@tonic-gate */ 35310Sstevel@tonic-gate qp = queue_lock(cvp, CV); 35320Sstevel@tonic-gate cvp->cond_waiters_user = 0; 35330Sstevel@tonic-gate ulwpp = &qp->qh_head; 35340Sstevel@tonic-gate while ((ulwp = *ulwpp) != NULL) { 35350Sstevel@tonic-gate if (ulwp->ul_wchan != cvp) { 35360Sstevel@tonic-gate prev = ulwp; 35370Sstevel@tonic-gate ulwpp = &ulwp->ul_link; 35380Sstevel@tonic-gate continue; 35390Sstevel@tonic-gate } 35400Sstevel@tonic-gate *ulwpp = ulwp->ul_link; 35414574Sraf ulwp->ul_link = NULL; 35420Sstevel@tonic-gate if (qp->qh_tail == ulwp) 35430Sstevel@tonic-gate qp->qh_tail = prev; 35440Sstevel@tonic-gate qp->qh_qlen--; 35450Sstevel@tonic-gate mp = ulwp->ul_cvmutex; /* his mutex */ 35460Sstevel@tonic-gate ulwp->ul_cvmutex = NULL; 35470Sstevel@tonic-gate ASSERT(mp != NULL); 35480Sstevel@tonic-gate if (ulwp->ul_cv_wake || !MUTEX_OWNED(mp, self)) { 35490Sstevel@tonic-gate ulwp->ul_sleepq = NULL; 35500Sstevel@tonic-gate ulwp->ul_wchan = NULL; 35510Sstevel@tonic-gate ulwp->ul_cv_wake = 0; 35524570Sraf if (nlwpid == maxlwps) 35534570Sraf lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps); 35540Sstevel@tonic-gate lwpid[nlwpid++] = ulwp->ul_lwpid; 35550Sstevel@tonic-gate } else { 35560Sstevel@tonic-gate if (mp != mp_cache) { 35570Sstevel@tonic-gate mp_cache = mp; 35584570Sraf if (mqp != NULL) 35594570Sraf queue_unlock(mqp); 35604570Sraf mqp = queue_lock(mp, MX); 35610Sstevel@tonic-gate } 35620Sstevel@tonic-gate enqueue(mqp, ulwp, mp, MX); 35630Sstevel@tonic-gate mp->mutex_waiters = 1; 35640Sstevel@tonic-gate } 35650Sstevel@tonic-gate } 35664570Sraf if (mqp != NULL) 35674570Sraf queue_unlock(mqp); 35684570Sraf if (nlwpid == 0) { 35694570Sraf queue_unlock(qp); 35704570Sraf } else { 35714570Sraf no_preempt(self); 35724570Sraf queue_unlock(qp); 35730Sstevel@tonic-gate if (nlwpid == 1) 35740Sstevel@tonic-gate (void) __lwp_unpark(lwpid[0]); 35750Sstevel@tonic-gate else 35760Sstevel@tonic-gate (void) __lwp_unpark_all(lwpid, nlwpid); 35774570Sraf preempt(self); 35780Sstevel@tonic-gate } 35790Sstevel@tonic-gate if (lwpid != buffer) 35800Sstevel@tonic-gate (void) _private_munmap(lwpid, maxlwps * sizeof (lwpid_t)); 35810Sstevel@tonic-gate return (error); 35820Sstevel@tonic-gate } 35830Sstevel@tonic-gate 35840Sstevel@tonic-gate #pragma weak pthread_cond_destroy = _cond_destroy 35850Sstevel@tonic-gate #pragma weak _pthread_cond_destroy = _cond_destroy 35860Sstevel@tonic-gate #pragma weak cond_destroy = _cond_destroy 35870Sstevel@tonic-gate int 35880Sstevel@tonic-gate _cond_destroy(cond_t *cvp) 35890Sstevel@tonic-gate { 35900Sstevel@tonic-gate cvp->cond_magic = 0; 35910Sstevel@tonic-gate tdb_sync_obj_deregister(cvp); 35920Sstevel@tonic-gate return (0); 35930Sstevel@tonic-gate } 35940Sstevel@tonic-gate 35950Sstevel@tonic-gate #if defined(THREAD_DEBUG) 35960Sstevel@tonic-gate void 35970Sstevel@tonic-gate assert_no_libc_locks_held(void) 35980Sstevel@tonic-gate { 35990Sstevel@tonic-gate ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 36000Sstevel@tonic-gate } 36010Sstevel@tonic-gate #endif 36020Sstevel@tonic-gate 36030Sstevel@tonic-gate /* protected by link_lock */ 36040Sstevel@tonic-gate uint64_t spin_lock_spin; 36050Sstevel@tonic-gate uint64_t spin_lock_spin2; 36060Sstevel@tonic-gate uint64_t spin_lock_sleep; 36070Sstevel@tonic-gate uint64_t spin_lock_wakeup; 36080Sstevel@tonic-gate 36090Sstevel@tonic-gate /* 36100Sstevel@tonic-gate * Record spin lock statistics. 36110Sstevel@tonic-gate * Called by a thread exiting itself in thrp_exit(). 36120Sstevel@tonic-gate * Also called via atexit() from the thread calling 36130Sstevel@tonic-gate * exit() to do all the other threads as well. 36140Sstevel@tonic-gate */ 36150Sstevel@tonic-gate void 36160Sstevel@tonic-gate record_spin_locks(ulwp_t *ulwp) 36170Sstevel@tonic-gate { 36180Sstevel@tonic-gate spin_lock_spin += ulwp->ul_spin_lock_spin; 36190Sstevel@tonic-gate spin_lock_spin2 += ulwp->ul_spin_lock_spin2; 36200Sstevel@tonic-gate spin_lock_sleep += ulwp->ul_spin_lock_sleep; 36210Sstevel@tonic-gate spin_lock_wakeup += ulwp->ul_spin_lock_wakeup; 36220Sstevel@tonic-gate ulwp->ul_spin_lock_spin = 0; 36230Sstevel@tonic-gate ulwp->ul_spin_lock_spin2 = 0; 36240Sstevel@tonic-gate ulwp->ul_spin_lock_sleep = 0; 36250Sstevel@tonic-gate ulwp->ul_spin_lock_wakeup = 0; 36260Sstevel@tonic-gate } 36270Sstevel@tonic-gate 36280Sstevel@tonic-gate /* 36290Sstevel@tonic-gate * atexit function: dump the queue statistics to stderr. 36300Sstevel@tonic-gate */ 36311219Sraf #if !defined(__lint) 36321219Sraf #define fprintf _fprintf 36331219Sraf #endif 36340Sstevel@tonic-gate #include <stdio.h> 36350Sstevel@tonic-gate void 36360Sstevel@tonic-gate dump_queue_statistics(void) 36370Sstevel@tonic-gate { 36380Sstevel@tonic-gate uberdata_t *udp = curthread->ul_uberdata; 36390Sstevel@tonic-gate queue_head_t *qp; 36400Sstevel@tonic-gate int qn; 36410Sstevel@tonic-gate uint64_t spin_lock_total = 0; 36420Sstevel@tonic-gate 36430Sstevel@tonic-gate if (udp->queue_head == NULL || thread_queue_dump == 0) 36440Sstevel@tonic-gate return; 36450Sstevel@tonic-gate 36460Sstevel@tonic-gate if (fprintf(stderr, "\n%5d mutex queues:\n", QHASHSIZE) < 0 || 36470Sstevel@tonic-gate fprintf(stderr, "queue# lockcount max qlen\n") < 0) 36480Sstevel@tonic-gate return; 36490Sstevel@tonic-gate for (qn = 0, qp = udp->queue_head; qn < QHASHSIZE; qn++, qp++) { 36500Sstevel@tonic-gate if (qp->qh_lockcount == 0) 36510Sstevel@tonic-gate continue; 36520Sstevel@tonic-gate spin_lock_total += qp->qh_lockcount; 36530Sstevel@tonic-gate if (fprintf(stderr, "%5d %12llu%12u\n", qn, 36545629Sraf (u_longlong_t)qp->qh_lockcount, qp->qh_qmax) < 0) 36555629Sraf return; 36560Sstevel@tonic-gate } 36570Sstevel@tonic-gate 36580Sstevel@tonic-gate if (fprintf(stderr, "\n%5d condvar queues:\n", QHASHSIZE) < 0 || 36590Sstevel@tonic-gate fprintf(stderr, "queue# lockcount max qlen\n") < 0) 36600Sstevel@tonic-gate return; 36610Sstevel@tonic-gate for (qn = 0; qn < QHASHSIZE; qn++, qp++) { 36620Sstevel@tonic-gate if (qp->qh_lockcount == 0) 36630Sstevel@tonic-gate continue; 36640Sstevel@tonic-gate spin_lock_total += qp->qh_lockcount; 36650Sstevel@tonic-gate if (fprintf(stderr, "%5d %12llu%12u\n", qn, 36665629Sraf (u_longlong_t)qp->qh_lockcount, qp->qh_qmax) < 0) 36675629Sraf return; 36680Sstevel@tonic-gate } 36690Sstevel@tonic-gate 36700Sstevel@tonic-gate (void) fprintf(stderr, "\n spin_lock_total = %10llu\n", 36715629Sraf (u_longlong_t)spin_lock_total); 36720Sstevel@tonic-gate (void) fprintf(stderr, " spin_lock_spin = %10llu\n", 36735629Sraf (u_longlong_t)spin_lock_spin); 36740Sstevel@tonic-gate (void) fprintf(stderr, " spin_lock_spin2 = %10llu\n", 36755629Sraf (u_longlong_t)spin_lock_spin2); 36760Sstevel@tonic-gate (void) fprintf(stderr, " spin_lock_sleep = %10llu\n", 36775629Sraf (u_longlong_t)spin_lock_sleep); 36780Sstevel@tonic-gate (void) fprintf(stderr, " spin_lock_wakeup = %10llu\n", 36795629Sraf (u_longlong_t)spin_lock_wakeup); 36800Sstevel@tonic-gate } 3681