10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 51893Sraf * Common Development and Distribution License (the "License"). 61893Sraf * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 211219Sraf 220Sstevel@tonic-gate /* 234570Sraf * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 240Sstevel@tonic-gate * Use is subject to license terms. 250Sstevel@tonic-gate */ 260Sstevel@tonic-gate 270Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 280Sstevel@tonic-gate 290Sstevel@tonic-gate #include <sys/sdt.h> 300Sstevel@tonic-gate 310Sstevel@tonic-gate #include "lint.h" 320Sstevel@tonic-gate #include "thr_uberdata.h" 330Sstevel@tonic-gate 340Sstevel@tonic-gate /* 350Sstevel@tonic-gate * This mutex is initialized to be held by lwp#1. 360Sstevel@tonic-gate * It is used to block a thread that has returned from a mutex_lock() 374574Sraf * of a LOCK_PRIO_INHERIT mutex with an unrecoverable error. 380Sstevel@tonic-gate */ 390Sstevel@tonic-gate mutex_t stall_mutex = DEFAULTMUTEX; 400Sstevel@tonic-gate 410Sstevel@tonic-gate static int shared_mutex_held(mutex_t *); 424574Sraf static int mutex_unlock_internal(mutex_t *, int); 434574Sraf static int mutex_queuelock_adaptive(mutex_t *); 444574Sraf static void mutex_wakeup_all(mutex_t *); 450Sstevel@tonic-gate 460Sstevel@tonic-gate /* 470Sstevel@tonic-gate * Lock statistics support functions. 480Sstevel@tonic-gate */ 490Sstevel@tonic-gate void 500Sstevel@tonic-gate record_begin_hold(tdb_mutex_stats_t *msp) 510Sstevel@tonic-gate { 520Sstevel@tonic-gate tdb_incr(msp->mutex_lock); 530Sstevel@tonic-gate msp->mutex_begin_hold = gethrtime(); 540Sstevel@tonic-gate } 550Sstevel@tonic-gate 560Sstevel@tonic-gate hrtime_t 570Sstevel@tonic-gate record_hold_time(tdb_mutex_stats_t *msp) 580Sstevel@tonic-gate { 590Sstevel@tonic-gate hrtime_t now = gethrtime(); 600Sstevel@tonic-gate 610Sstevel@tonic-gate if (msp->mutex_begin_hold) 620Sstevel@tonic-gate msp->mutex_hold_time += now - msp->mutex_begin_hold; 630Sstevel@tonic-gate msp->mutex_begin_hold = 0; 640Sstevel@tonic-gate return (now); 650Sstevel@tonic-gate } 660Sstevel@tonic-gate 670Sstevel@tonic-gate /* 680Sstevel@tonic-gate * Called once at library initialization. 690Sstevel@tonic-gate */ 700Sstevel@tonic-gate void 710Sstevel@tonic-gate mutex_setup(void) 720Sstevel@tonic-gate { 730Sstevel@tonic-gate if (set_lock_byte(&stall_mutex.mutex_lockw)) 740Sstevel@tonic-gate thr_panic("mutex_setup() cannot acquire stall_mutex"); 750Sstevel@tonic-gate stall_mutex.mutex_owner = (uintptr_t)curthread; 760Sstevel@tonic-gate } 770Sstevel@tonic-gate 780Sstevel@tonic-gate /* 79*5629Sraf * The default spin count of 1000 is experimentally determined. 80*5629Sraf * On sun4u machines with any number of processors it could be raised 810Sstevel@tonic-gate * to 10,000 but that (experimentally) makes almost no difference. 82*5629Sraf * The environment variable: 830Sstevel@tonic-gate * _THREAD_ADAPTIVE_SPIN=count 84*5629Sraf * can be used to override and set the count in the range [0 .. 1,000,000]. 850Sstevel@tonic-gate */ 860Sstevel@tonic-gate int thread_adaptive_spin = 1000; 870Sstevel@tonic-gate uint_t thread_max_spinners = 100; 880Sstevel@tonic-gate int thread_queue_verify = 0; 890Sstevel@tonic-gate static int ncpus; 900Sstevel@tonic-gate 910Sstevel@tonic-gate /* 920Sstevel@tonic-gate * Distinguish spinning for queue locks from spinning for regular locks. 93*5629Sraf * We try harder to acquire queue locks by spinning. 940Sstevel@tonic-gate * The environment variable: 950Sstevel@tonic-gate * _THREAD_QUEUE_SPIN=count 960Sstevel@tonic-gate * can be used to override and set the count in the range [0 .. 1,000,000]. 970Sstevel@tonic-gate */ 98*5629Sraf int thread_queue_spin = 10000; 990Sstevel@tonic-gate 1004574Sraf #define ALL_ATTRIBUTES \ 1014574Sraf (LOCK_RECURSIVE | LOCK_ERRORCHECK | \ 1024574Sraf LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT | \ 1034574Sraf LOCK_ROBUST) 1040Sstevel@tonic-gate 1050Sstevel@tonic-gate /* 1064574Sraf * 'type' can be one of USYNC_THREAD, USYNC_PROCESS, or USYNC_PROCESS_ROBUST, 1074574Sraf * augmented by zero or more the flags: 1084574Sraf * LOCK_RECURSIVE 1094574Sraf * LOCK_ERRORCHECK 1104574Sraf * LOCK_PRIO_INHERIT 1114574Sraf * LOCK_PRIO_PROTECT 1124574Sraf * LOCK_ROBUST 1130Sstevel@tonic-gate */ 1140Sstevel@tonic-gate #pragma weak _private_mutex_init = __mutex_init 1150Sstevel@tonic-gate #pragma weak mutex_init = __mutex_init 1160Sstevel@tonic-gate #pragma weak _mutex_init = __mutex_init 1170Sstevel@tonic-gate /* ARGSUSED2 */ 1180Sstevel@tonic-gate int 1190Sstevel@tonic-gate __mutex_init(mutex_t *mp, int type, void *arg) 1200Sstevel@tonic-gate { 1214574Sraf int basetype = (type & ~ALL_ATTRIBUTES); 1224574Sraf int error = 0; 1234574Sraf 1244574Sraf if (basetype == USYNC_PROCESS_ROBUST) { 1254574Sraf /* 1264574Sraf * USYNC_PROCESS_ROBUST is a deprecated historical type. 1274574Sraf * We change it into (USYNC_PROCESS | LOCK_ROBUST) but 1284574Sraf * retain the USYNC_PROCESS_ROBUST flag so we can return 1294574Sraf * ELOCKUNMAPPED when necessary (only USYNC_PROCESS_ROBUST 1304574Sraf * mutexes will ever draw ELOCKUNMAPPED). 1314574Sraf */ 1324574Sraf type |= (USYNC_PROCESS | LOCK_ROBUST); 1334574Sraf basetype = USYNC_PROCESS; 1344574Sraf } 1354574Sraf 1364574Sraf if (!(basetype == USYNC_THREAD || basetype == USYNC_PROCESS) || 1374574Sraf (type & (LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT)) 1384574Sraf == (LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT)) { 1394574Sraf error = EINVAL; 1404574Sraf } else if (type & LOCK_ROBUST) { 1414574Sraf /* 1424574Sraf * Callers of mutex_init() with the LOCK_ROBUST attribute 1434574Sraf * are required to pass an initially all-zero mutex. 1444574Sraf * Multiple calls to mutex_init() are allowed; all but 1454574Sraf * the first return EBUSY. A call to mutex_init() is 1464574Sraf * allowed to make an inconsistent robust lock consistent 1474574Sraf * (for historical usage, even though the proper interface 1484574Sraf * for this is mutex_consistent()). Note that we use 1494574Sraf * atomic_or_16() to set the LOCK_INITED flag so as 1504574Sraf * not to disturb surrounding bits (LOCK_OWNERDEAD, etc). 1514574Sraf */ 1524574Sraf extern void _atomic_or_16(volatile uint16_t *, uint16_t); 1534574Sraf if (!(mp->mutex_flag & LOCK_INITED)) { 1544574Sraf mp->mutex_type = (uint8_t)type; 1554574Sraf _atomic_or_16(&mp->mutex_flag, LOCK_INITED); 1564574Sraf mp->mutex_magic = MUTEX_MAGIC; 1574574Sraf } else if (type != mp->mutex_type || 1584574Sraf ((type & LOCK_PRIO_PROTECT) && 1594574Sraf mp->mutex_ceiling != (*(int *)arg))) { 1604574Sraf error = EINVAL; 1614574Sraf } else if (__mutex_consistent(mp) != 0) { 1624574Sraf error = EBUSY; 1634574Sraf } 1644574Sraf /* register a process robust mutex with the kernel */ 1654574Sraf if (basetype == USYNC_PROCESS) 1664574Sraf register_lock(mp); 1674574Sraf } else { 1680Sstevel@tonic-gate (void) _memset(mp, 0, sizeof (*mp)); 1690Sstevel@tonic-gate mp->mutex_type = (uint8_t)type; 1700Sstevel@tonic-gate mp->mutex_flag = LOCK_INITED; 1714574Sraf mp->mutex_magic = MUTEX_MAGIC; 1720Sstevel@tonic-gate } 1734574Sraf 1744574Sraf if (error == 0 && (type & LOCK_PRIO_PROTECT)) 1754574Sraf mp->mutex_ceiling = (uint8_t)(*(int *)arg); 1764574Sraf 1770Sstevel@tonic-gate return (error); 1780Sstevel@tonic-gate } 1790Sstevel@tonic-gate 1800Sstevel@tonic-gate /* 1810Sstevel@tonic-gate * Delete mp from list of ceil mutexes owned by curthread. 1820Sstevel@tonic-gate * Return 1 if the head of the chain was updated. 1830Sstevel@tonic-gate */ 1840Sstevel@tonic-gate int 1850Sstevel@tonic-gate _ceil_mylist_del(mutex_t *mp) 1860Sstevel@tonic-gate { 1870Sstevel@tonic-gate ulwp_t *self = curthread; 1880Sstevel@tonic-gate mxchain_t **mcpp; 1890Sstevel@tonic-gate mxchain_t *mcp; 1900Sstevel@tonic-gate 1910Sstevel@tonic-gate mcpp = &self->ul_mxchain; 1920Sstevel@tonic-gate while ((*mcpp)->mxchain_mx != mp) 1930Sstevel@tonic-gate mcpp = &(*mcpp)->mxchain_next; 1940Sstevel@tonic-gate mcp = *mcpp; 1950Sstevel@tonic-gate *mcpp = mcp->mxchain_next; 1960Sstevel@tonic-gate lfree(mcp, sizeof (*mcp)); 1970Sstevel@tonic-gate return (mcpp == &self->ul_mxchain); 1980Sstevel@tonic-gate } 1990Sstevel@tonic-gate 2000Sstevel@tonic-gate /* 2010Sstevel@tonic-gate * Add mp to head of list of ceil mutexes owned by curthread. 2020Sstevel@tonic-gate * Return ENOMEM if no memory could be allocated. 2030Sstevel@tonic-gate */ 2040Sstevel@tonic-gate int 2050Sstevel@tonic-gate _ceil_mylist_add(mutex_t *mp) 2060Sstevel@tonic-gate { 2070Sstevel@tonic-gate ulwp_t *self = curthread; 2080Sstevel@tonic-gate mxchain_t *mcp; 2090Sstevel@tonic-gate 2100Sstevel@tonic-gate if ((mcp = lmalloc(sizeof (*mcp))) == NULL) 2110Sstevel@tonic-gate return (ENOMEM); 2120Sstevel@tonic-gate mcp->mxchain_mx = mp; 2130Sstevel@tonic-gate mcp->mxchain_next = self->ul_mxchain; 2140Sstevel@tonic-gate self->ul_mxchain = mcp; 2150Sstevel@tonic-gate return (0); 2160Sstevel@tonic-gate } 2170Sstevel@tonic-gate 2180Sstevel@tonic-gate /* 2190Sstevel@tonic-gate * Inherit priority from ceiling. The inheritance impacts the effective 2200Sstevel@tonic-gate * priority, not the assigned priority. See _thread_setschedparam_main(). 2210Sstevel@tonic-gate */ 2220Sstevel@tonic-gate void 2230Sstevel@tonic-gate _ceil_prio_inherit(int ceil) 2240Sstevel@tonic-gate { 2250Sstevel@tonic-gate ulwp_t *self = curthread; 2260Sstevel@tonic-gate struct sched_param param; 2270Sstevel@tonic-gate 2280Sstevel@tonic-gate (void) _memset(¶m, 0, sizeof (param)); 2290Sstevel@tonic-gate param.sched_priority = ceil; 2300Sstevel@tonic-gate if (_thread_setschedparam_main(self->ul_lwpid, 2310Sstevel@tonic-gate self->ul_policy, ¶m, PRIO_INHERIT)) { 2320Sstevel@tonic-gate /* 2330Sstevel@tonic-gate * Panic since unclear what error code to return. 2340Sstevel@tonic-gate * If we do return the error codes returned by above 2350Sstevel@tonic-gate * called routine, update the man page... 2360Sstevel@tonic-gate */ 2370Sstevel@tonic-gate thr_panic("_thread_setschedparam_main() fails"); 2380Sstevel@tonic-gate } 2390Sstevel@tonic-gate } 2400Sstevel@tonic-gate 2410Sstevel@tonic-gate /* 2420Sstevel@tonic-gate * Waive inherited ceiling priority. Inherit from head of owned ceiling locks 2430Sstevel@tonic-gate * if holding at least one ceiling lock. If no ceiling locks are held at this 2440Sstevel@tonic-gate * point, disinherit completely, reverting back to assigned priority. 2450Sstevel@tonic-gate */ 2460Sstevel@tonic-gate void 2470Sstevel@tonic-gate _ceil_prio_waive(void) 2480Sstevel@tonic-gate { 2490Sstevel@tonic-gate ulwp_t *self = curthread; 2500Sstevel@tonic-gate struct sched_param param; 2510Sstevel@tonic-gate 2520Sstevel@tonic-gate (void) _memset(¶m, 0, sizeof (param)); 2530Sstevel@tonic-gate if (self->ul_mxchain == NULL) { 2540Sstevel@tonic-gate /* 2550Sstevel@tonic-gate * No ceil locks held. Zero the epri, revert back to ul_pri. 2560Sstevel@tonic-gate * Since thread's hash lock is not held, one cannot just 2570Sstevel@tonic-gate * read ul_pri here...do it in the called routine... 2580Sstevel@tonic-gate */ 2590Sstevel@tonic-gate param.sched_priority = self->ul_pri; /* ignored */ 2600Sstevel@tonic-gate if (_thread_setschedparam_main(self->ul_lwpid, 2610Sstevel@tonic-gate self->ul_policy, ¶m, PRIO_DISINHERIT)) 2620Sstevel@tonic-gate thr_panic("_thread_setschedparam_main() fails"); 2630Sstevel@tonic-gate } else { 2640Sstevel@tonic-gate /* 2650Sstevel@tonic-gate * Set priority to that of the mutex at the head 2660Sstevel@tonic-gate * of the ceilmutex chain. 2670Sstevel@tonic-gate */ 2680Sstevel@tonic-gate param.sched_priority = 2690Sstevel@tonic-gate self->ul_mxchain->mxchain_mx->mutex_ceiling; 2700Sstevel@tonic-gate if (_thread_setschedparam_main(self->ul_lwpid, 2710Sstevel@tonic-gate self->ul_policy, ¶m, PRIO_INHERIT)) 2720Sstevel@tonic-gate thr_panic("_thread_setschedparam_main() fails"); 2730Sstevel@tonic-gate } 2740Sstevel@tonic-gate } 2750Sstevel@tonic-gate 2760Sstevel@tonic-gate /* 277*5629Sraf * Clear the lock byte. Retain the waiters byte and the spinners byte. 278*5629Sraf * Return the old value of the lock word. 279*5629Sraf */ 280*5629Sraf static uint32_t 281*5629Sraf clear_lockbyte(volatile uint32_t *lockword) 282*5629Sraf { 283*5629Sraf uint32_t old; 284*5629Sraf uint32_t new; 285*5629Sraf 286*5629Sraf do { 287*5629Sraf old = *lockword; 288*5629Sraf new = old & ~LOCKMASK; 289*5629Sraf } while (atomic_cas_32(lockword, old, new) != old); 290*5629Sraf 291*5629Sraf return (old); 292*5629Sraf } 293*5629Sraf 294*5629Sraf /* 295*5629Sraf * Increment the spinners count in the mutex lock word. 296*5629Sraf * Return 0 on success. Return -1 if the count would overflow. 297*5629Sraf */ 298*5629Sraf static int 299*5629Sraf spinners_incr(volatile uint32_t *lockword, uint8_t max_spinners) 300*5629Sraf { 301*5629Sraf uint32_t old; 302*5629Sraf uint32_t new; 303*5629Sraf 304*5629Sraf do { 305*5629Sraf old = *lockword; 306*5629Sraf if (((old & SPINNERMASK) >> SPINNERSHIFT) >= max_spinners) 307*5629Sraf return (-1); 308*5629Sraf new = old + (1 << SPINNERSHIFT); 309*5629Sraf } while (atomic_cas_32(lockword, old, new) != old); 310*5629Sraf 311*5629Sraf return (0); 312*5629Sraf } 313*5629Sraf 314*5629Sraf /* 315*5629Sraf * Decrement the spinners count in the mutex lock word. 316*5629Sraf * Return the new value of the lock word. 317*5629Sraf */ 318*5629Sraf static uint32_t 319*5629Sraf spinners_decr(volatile uint32_t *lockword) 320*5629Sraf { 321*5629Sraf uint32_t old; 322*5629Sraf uint32_t new; 323*5629Sraf 324*5629Sraf do { 325*5629Sraf new = old = *lockword; 326*5629Sraf if (new & SPINNERMASK) 327*5629Sraf new -= (1 << SPINNERSHIFT); 328*5629Sraf } while (atomic_cas_32(lockword, old, new) != old); 329*5629Sraf 330*5629Sraf return (new); 331*5629Sraf } 332*5629Sraf 333*5629Sraf /* 3340Sstevel@tonic-gate * Non-preemptive spin locks. Used by queue_lock(). 3350Sstevel@tonic-gate * No lock statistics are gathered for these locks. 336*5629Sraf * No DTrace probes are provided for these locks. 3370Sstevel@tonic-gate */ 3380Sstevel@tonic-gate void 3390Sstevel@tonic-gate spin_lock_set(mutex_t *mp) 3400Sstevel@tonic-gate { 3410Sstevel@tonic-gate ulwp_t *self = curthread; 3420Sstevel@tonic-gate 3430Sstevel@tonic-gate no_preempt(self); 3440Sstevel@tonic-gate if (set_lock_byte(&mp->mutex_lockw) == 0) { 3450Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 3460Sstevel@tonic-gate return; 3470Sstevel@tonic-gate } 3480Sstevel@tonic-gate /* 3490Sstevel@tonic-gate * Spin for a while, attempting to acquire the lock. 3500Sstevel@tonic-gate */ 3510Sstevel@tonic-gate if (self->ul_spin_lock_spin != UINT_MAX) 3520Sstevel@tonic-gate self->ul_spin_lock_spin++; 3530Sstevel@tonic-gate if (mutex_queuelock_adaptive(mp) == 0 || 3540Sstevel@tonic-gate set_lock_byte(&mp->mutex_lockw) == 0) { 3550Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 3560Sstevel@tonic-gate return; 3570Sstevel@tonic-gate } 3580Sstevel@tonic-gate /* 3590Sstevel@tonic-gate * Try harder if we were previously at a no premption level. 3600Sstevel@tonic-gate */ 3610Sstevel@tonic-gate if (self->ul_preempt > 1) { 3620Sstevel@tonic-gate if (self->ul_spin_lock_spin2 != UINT_MAX) 3630Sstevel@tonic-gate self->ul_spin_lock_spin2++; 3640Sstevel@tonic-gate if (mutex_queuelock_adaptive(mp) == 0 || 3650Sstevel@tonic-gate set_lock_byte(&mp->mutex_lockw) == 0) { 3660Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 3670Sstevel@tonic-gate return; 3680Sstevel@tonic-gate } 3690Sstevel@tonic-gate } 3700Sstevel@tonic-gate /* 3710Sstevel@tonic-gate * Give up and block in the kernel for the mutex. 3720Sstevel@tonic-gate */ 3730Sstevel@tonic-gate if (self->ul_spin_lock_sleep != UINT_MAX) 3740Sstevel@tonic-gate self->ul_spin_lock_sleep++; 3750Sstevel@tonic-gate (void) ___lwp_mutex_timedlock(mp, NULL); 3760Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 3770Sstevel@tonic-gate } 3780Sstevel@tonic-gate 3790Sstevel@tonic-gate void 3800Sstevel@tonic-gate spin_lock_clear(mutex_t *mp) 3810Sstevel@tonic-gate { 3820Sstevel@tonic-gate ulwp_t *self = curthread; 3830Sstevel@tonic-gate 3840Sstevel@tonic-gate mp->mutex_owner = 0; 3854570Sraf if (atomic_swap_32(&mp->mutex_lockword, 0) & WAITERMASK) { 3864574Sraf (void) ___lwp_mutex_wakeup(mp, 0); 3870Sstevel@tonic-gate if (self->ul_spin_lock_wakeup != UINT_MAX) 3880Sstevel@tonic-gate self->ul_spin_lock_wakeup++; 3890Sstevel@tonic-gate } 3900Sstevel@tonic-gate preempt(self); 3910Sstevel@tonic-gate } 3920Sstevel@tonic-gate 3930Sstevel@tonic-gate /* 3940Sstevel@tonic-gate * Allocate the sleep queue hash table. 3950Sstevel@tonic-gate */ 3960Sstevel@tonic-gate void 3970Sstevel@tonic-gate queue_alloc(void) 3980Sstevel@tonic-gate { 3990Sstevel@tonic-gate ulwp_t *self = curthread; 4000Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 4014574Sraf mutex_t *mp; 4020Sstevel@tonic-gate void *data; 4030Sstevel@tonic-gate int i; 4040Sstevel@tonic-gate 4050Sstevel@tonic-gate /* 4060Sstevel@tonic-gate * No locks are needed; we call here only when single-threaded. 4070Sstevel@tonic-gate */ 4080Sstevel@tonic-gate ASSERT(self == udp->ulwp_one); 4090Sstevel@tonic-gate ASSERT(!udp->uberflags.uf_mt); 4100Sstevel@tonic-gate if ((data = _private_mmap(NULL, 2 * QHASHSIZE * sizeof (queue_head_t), 4110Sstevel@tonic-gate PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, (off_t)0)) 4120Sstevel@tonic-gate == MAP_FAILED) 4130Sstevel@tonic-gate thr_panic("cannot allocate thread queue_head table"); 4140Sstevel@tonic-gate udp->queue_head = (queue_head_t *)data; 4154574Sraf for (i = 0; i < 2 * QHASHSIZE; i++) { 4164574Sraf mp = &udp->queue_head[i].qh_lock; 4174574Sraf mp->mutex_flag = LOCK_INITED; 4184574Sraf mp->mutex_magic = MUTEX_MAGIC; 4194574Sraf } 4200Sstevel@tonic-gate } 4210Sstevel@tonic-gate 4220Sstevel@tonic-gate #if defined(THREAD_DEBUG) 4230Sstevel@tonic-gate 4240Sstevel@tonic-gate /* 4250Sstevel@tonic-gate * Debugging: verify correctness of a sleep queue. 4260Sstevel@tonic-gate */ 4270Sstevel@tonic-gate void 4280Sstevel@tonic-gate QVERIFY(queue_head_t *qp) 4290Sstevel@tonic-gate { 4300Sstevel@tonic-gate ulwp_t *self = curthread; 4310Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 4320Sstevel@tonic-gate ulwp_t *ulwp; 4330Sstevel@tonic-gate ulwp_t *prev; 4340Sstevel@tonic-gate uint_t index; 4350Sstevel@tonic-gate uint32_t cnt = 0; 4360Sstevel@tonic-gate char qtype; 4370Sstevel@tonic-gate void *wchan; 4380Sstevel@tonic-gate 4390Sstevel@tonic-gate ASSERT(qp >= udp->queue_head && (qp - udp->queue_head) < 2 * QHASHSIZE); 4400Sstevel@tonic-gate ASSERT(MUTEX_OWNED(&qp->qh_lock, self)); 4410Sstevel@tonic-gate ASSERT((qp->qh_head != NULL && qp->qh_tail != NULL) || 442*5629Sraf (qp->qh_head == NULL && qp->qh_tail == NULL)); 4430Sstevel@tonic-gate if (!thread_queue_verify) 4440Sstevel@tonic-gate return; 4450Sstevel@tonic-gate /* real expensive stuff, only for _THREAD_QUEUE_VERIFY */ 4460Sstevel@tonic-gate qtype = ((qp - udp->queue_head) < QHASHSIZE)? MX : CV; 4470Sstevel@tonic-gate for (prev = NULL, ulwp = qp->qh_head; ulwp != NULL; 4480Sstevel@tonic-gate prev = ulwp, ulwp = ulwp->ul_link, cnt++) { 4490Sstevel@tonic-gate ASSERT(ulwp->ul_qtype == qtype); 4500Sstevel@tonic-gate ASSERT(ulwp->ul_wchan != NULL); 4510Sstevel@tonic-gate ASSERT(ulwp->ul_sleepq == qp); 4520Sstevel@tonic-gate wchan = ulwp->ul_wchan; 4530Sstevel@tonic-gate index = QUEUE_HASH(wchan, qtype); 4540Sstevel@tonic-gate ASSERT(&udp->queue_head[index] == qp); 4550Sstevel@tonic-gate } 4560Sstevel@tonic-gate ASSERT(qp->qh_tail == prev); 4570Sstevel@tonic-gate ASSERT(qp->qh_qlen == cnt); 4580Sstevel@tonic-gate } 4590Sstevel@tonic-gate 4600Sstevel@tonic-gate #else /* THREAD_DEBUG */ 4610Sstevel@tonic-gate 4620Sstevel@tonic-gate #define QVERIFY(qp) 4630Sstevel@tonic-gate 4640Sstevel@tonic-gate #endif /* THREAD_DEBUG */ 4650Sstevel@tonic-gate 4660Sstevel@tonic-gate /* 4670Sstevel@tonic-gate * Acquire a queue head. 4680Sstevel@tonic-gate */ 4690Sstevel@tonic-gate queue_head_t * 4700Sstevel@tonic-gate queue_lock(void *wchan, int qtype) 4710Sstevel@tonic-gate { 4720Sstevel@tonic-gate uberdata_t *udp = curthread->ul_uberdata; 4730Sstevel@tonic-gate queue_head_t *qp; 4740Sstevel@tonic-gate 4750Sstevel@tonic-gate ASSERT(qtype == MX || qtype == CV); 4760Sstevel@tonic-gate 4770Sstevel@tonic-gate /* 4780Sstevel@tonic-gate * It is possible that we could be called while still single-threaded. 4790Sstevel@tonic-gate * If so, we call queue_alloc() to allocate the queue_head[] array. 4800Sstevel@tonic-gate */ 4810Sstevel@tonic-gate if ((qp = udp->queue_head) == NULL) { 4820Sstevel@tonic-gate queue_alloc(); 4830Sstevel@tonic-gate qp = udp->queue_head; 4840Sstevel@tonic-gate } 4850Sstevel@tonic-gate qp += QUEUE_HASH(wchan, qtype); 4860Sstevel@tonic-gate spin_lock_set(&qp->qh_lock); 4870Sstevel@tonic-gate /* 4880Sstevel@tonic-gate * At once per nanosecond, qh_lockcount will wrap after 512 years. 4890Sstevel@tonic-gate * Were we to care about this, we could peg the value at UINT64_MAX. 4900Sstevel@tonic-gate */ 4910Sstevel@tonic-gate qp->qh_lockcount++; 4920Sstevel@tonic-gate QVERIFY(qp); 4930Sstevel@tonic-gate return (qp); 4940Sstevel@tonic-gate } 4950Sstevel@tonic-gate 4960Sstevel@tonic-gate /* 4970Sstevel@tonic-gate * Release a queue head. 4980Sstevel@tonic-gate */ 4990Sstevel@tonic-gate void 5000Sstevel@tonic-gate queue_unlock(queue_head_t *qp) 5010Sstevel@tonic-gate { 5020Sstevel@tonic-gate QVERIFY(qp); 5030Sstevel@tonic-gate spin_lock_clear(&qp->qh_lock); 5040Sstevel@tonic-gate } 5050Sstevel@tonic-gate 5060Sstevel@tonic-gate /* 5070Sstevel@tonic-gate * For rwlock queueing, we must queue writers ahead of readers of the 5080Sstevel@tonic-gate * same priority. We do this by making writers appear to have a half 5090Sstevel@tonic-gate * point higher priority for purposes of priority comparisons below. 5100Sstevel@tonic-gate */ 5110Sstevel@tonic-gate #define CMP_PRIO(ulwp) ((real_priority(ulwp) << 1) + (ulwp)->ul_writer) 5120Sstevel@tonic-gate 5130Sstevel@tonic-gate void 5140Sstevel@tonic-gate enqueue(queue_head_t *qp, ulwp_t *ulwp, void *wchan, int qtype) 5150Sstevel@tonic-gate { 5160Sstevel@tonic-gate ulwp_t **ulwpp; 5170Sstevel@tonic-gate ulwp_t *next; 5180Sstevel@tonic-gate int pri = CMP_PRIO(ulwp); 5190Sstevel@tonic-gate int force_fifo = (qtype & FIFOQ); 5200Sstevel@tonic-gate int do_fifo; 5210Sstevel@tonic-gate 5220Sstevel@tonic-gate qtype &= ~FIFOQ; 5230Sstevel@tonic-gate ASSERT(qtype == MX || qtype == CV); 5240Sstevel@tonic-gate ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread)); 5250Sstevel@tonic-gate ASSERT(ulwp->ul_sleepq != qp); 5260Sstevel@tonic-gate 5270Sstevel@tonic-gate /* 5280Sstevel@tonic-gate * LIFO queue ordering is unfair and can lead to starvation, 5290Sstevel@tonic-gate * but it gives better performance for heavily contended locks. 5300Sstevel@tonic-gate * We use thread_queue_fifo (range is 0..8) to determine 5310Sstevel@tonic-gate * the frequency of FIFO vs LIFO queuing: 5320Sstevel@tonic-gate * 0 : every 256th time (almost always LIFO) 5330Sstevel@tonic-gate * 1 : every 128th time 5340Sstevel@tonic-gate * 2 : every 64th time 5350Sstevel@tonic-gate * 3 : every 32nd time 5360Sstevel@tonic-gate * 4 : every 16th time (the default value, mostly LIFO) 5370Sstevel@tonic-gate * 5 : every 8th time 5380Sstevel@tonic-gate * 6 : every 4th time 5390Sstevel@tonic-gate * 7 : every 2nd time 5400Sstevel@tonic-gate * 8 : every time (never LIFO, always FIFO) 5410Sstevel@tonic-gate * Note that there is always some degree of FIFO ordering. 5420Sstevel@tonic-gate * This breaks live lock conditions that occur in applications 5430Sstevel@tonic-gate * that are written assuming (incorrectly) that threads acquire 5440Sstevel@tonic-gate * locks fairly, that is, in roughly round-robin order. 5450Sstevel@tonic-gate * In any event, the queue is maintained in priority order. 5460Sstevel@tonic-gate * 5470Sstevel@tonic-gate * If we are given the FIFOQ flag in qtype, fifo queueing is forced. 5480Sstevel@tonic-gate * SUSV3 requires this for semaphores. 5490Sstevel@tonic-gate */ 5500Sstevel@tonic-gate do_fifo = (force_fifo || 551*5629Sraf ((++qp->qh_qcnt << curthread->ul_queue_fifo) & 0xff) == 0); 5520Sstevel@tonic-gate 5530Sstevel@tonic-gate if (qp->qh_head == NULL) { 5540Sstevel@tonic-gate /* 5550Sstevel@tonic-gate * The queue is empty. LIFO/FIFO doesn't matter. 5560Sstevel@tonic-gate */ 5570Sstevel@tonic-gate ASSERT(qp->qh_tail == NULL); 5580Sstevel@tonic-gate ulwpp = &qp->qh_head; 5590Sstevel@tonic-gate } else if (do_fifo) { 5600Sstevel@tonic-gate /* 5610Sstevel@tonic-gate * Enqueue after the last thread whose priority is greater 5620Sstevel@tonic-gate * than or equal to the priority of the thread being queued. 5630Sstevel@tonic-gate * Attempt first to go directly onto the tail of the queue. 5640Sstevel@tonic-gate */ 5650Sstevel@tonic-gate if (pri <= CMP_PRIO(qp->qh_tail)) 5660Sstevel@tonic-gate ulwpp = &qp->qh_tail->ul_link; 5670Sstevel@tonic-gate else { 5680Sstevel@tonic-gate for (ulwpp = &qp->qh_head; (next = *ulwpp) != NULL; 5690Sstevel@tonic-gate ulwpp = &next->ul_link) 5700Sstevel@tonic-gate if (pri > CMP_PRIO(next)) 5710Sstevel@tonic-gate break; 5720Sstevel@tonic-gate } 5730Sstevel@tonic-gate } else { 5740Sstevel@tonic-gate /* 5750Sstevel@tonic-gate * Enqueue before the first thread whose priority is less 5760Sstevel@tonic-gate * than or equal to the priority of the thread being queued. 5770Sstevel@tonic-gate * Hopefully we can go directly onto the head of the queue. 5780Sstevel@tonic-gate */ 5790Sstevel@tonic-gate for (ulwpp = &qp->qh_head; (next = *ulwpp) != NULL; 5800Sstevel@tonic-gate ulwpp = &next->ul_link) 5810Sstevel@tonic-gate if (pri >= CMP_PRIO(next)) 5820Sstevel@tonic-gate break; 5830Sstevel@tonic-gate } 5840Sstevel@tonic-gate if ((ulwp->ul_link = *ulwpp) == NULL) 5850Sstevel@tonic-gate qp->qh_tail = ulwp; 5860Sstevel@tonic-gate *ulwpp = ulwp; 5870Sstevel@tonic-gate 5880Sstevel@tonic-gate ulwp->ul_sleepq = qp; 5890Sstevel@tonic-gate ulwp->ul_wchan = wchan; 5900Sstevel@tonic-gate ulwp->ul_qtype = qtype; 5910Sstevel@tonic-gate if (qp->qh_qmax < ++qp->qh_qlen) 5920Sstevel@tonic-gate qp->qh_qmax = qp->qh_qlen; 5930Sstevel@tonic-gate } 5940Sstevel@tonic-gate 5950Sstevel@tonic-gate /* 5960Sstevel@tonic-gate * Return a pointer to the queue slot of the 5970Sstevel@tonic-gate * highest priority thread on the queue. 5980Sstevel@tonic-gate * On return, prevp, if not NULL, will contain a pointer 5990Sstevel@tonic-gate * to the thread's predecessor on the queue 6000Sstevel@tonic-gate */ 6010Sstevel@tonic-gate static ulwp_t ** 6020Sstevel@tonic-gate queue_slot(queue_head_t *qp, void *wchan, int *more, ulwp_t **prevp) 6030Sstevel@tonic-gate { 6040Sstevel@tonic-gate ulwp_t **ulwpp; 6050Sstevel@tonic-gate ulwp_t *ulwp; 6060Sstevel@tonic-gate ulwp_t *prev = NULL; 6070Sstevel@tonic-gate ulwp_t **suspp = NULL; 6080Sstevel@tonic-gate ulwp_t *susprev; 6090Sstevel@tonic-gate 6100Sstevel@tonic-gate ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread)); 6110Sstevel@tonic-gate 6120Sstevel@tonic-gate /* 6130Sstevel@tonic-gate * Find a waiter on the sleep queue. 6140Sstevel@tonic-gate */ 6150Sstevel@tonic-gate for (ulwpp = &qp->qh_head; (ulwp = *ulwpp) != NULL; 6160Sstevel@tonic-gate prev = ulwp, ulwpp = &ulwp->ul_link) { 6170Sstevel@tonic-gate if (ulwp->ul_wchan == wchan) { 6180Sstevel@tonic-gate if (!ulwp->ul_stop) 6190Sstevel@tonic-gate break; 6200Sstevel@tonic-gate /* 6210Sstevel@tonic-gate * Try not to return a suspended thread. 6220Sstevel@tonic-gate * This mimics the old libthread's behavior. 6230Sstevel@tonic-gate */ 6240Sstevel@tonic-gate if (suspp == NULL) { 6250Sstevel@tonic-gate suspp = ulwpp; 6260Sstevel@tonic-gate susprev = prev; 6270Sstevel@tonic-gate } 6280Sstevel@tonic-gate } 6290Sstevel@tonic-gate } 6300Sstevel@tonic-gate 6310Sstevel@tonic-gate if (ulwp == NULL && suspp != NULL) { 6320Sstevel@tonic-gate ulwp = *(ulwpp = suspp); 6330Sstevel@tonic-gate prev = susprev; 6340Sstevel@tonic-gate suspp = NULL; 6350Sstevel@tonic-gate } 6360Sstevel@tonic-gate if (ulwp == NULL) { 6370Sstevel@tonic-gate if (more != NULL) 6380Sstevel@tonic-gate *more = 0; 6390Sstevel@tonic-gate return (NULL); 6400Sstevel@tonic-gate } 6410Sstevel@tonic-gate 6420Sstevel@tonic-gate if (prevp != NULL) 6430Sstevel@tonic-gate *prevp = prev; 6440Sstevel@tonic-gate if (more == NULL) 6450Sstevel@tonic-gate return (ulwpp); 6460Sstevel@tonic-gate 6470Sstevel@tonic-gate /* 6480Sstevel@tonic-gate * Scan the remainder of the queue for another waiter. 6490Sstevel@tonic-gate */ 6500Sstevel@tonic-gate if (suspp != NULL) { 6510Sstevel@tonic-gate *more = 1; 6520Sstevel@tonic-gate return (ulwpp); 6530Sstevel@tonic-gate } 6540Sstevel@tonic-gate for (ulwp = ulwp->ul_link; ulwp != NULL; ulwp = ulwp->ul_link) { 6550Sstevel@tonic-gate if (ulwp->ul_wchan == wchan) { 6560Sstevel@tonic-gate *more = 1; 6570Sstevel@tonic-gate return (ulwpp); 6580Sstevel@tonic-gate } 6590Sstevel@tonic-gate } 6600Sstevel@tonic-gate 6610Sstevel@tonic-gate *more = 0; 6620Sstevel@tonic-gate return (ulwpp); 6630Sstevel@tonic-gate } 6640Sstevel@tonic-gate 6650Sstevel@tonic-gate ulwp_t * 6664570Sraf queue_unlink(queue_head_t *qp, ulwp_t **ulwpp, ulwp_t *prev) 6670Sstevel@tonic-gate { 6680Sstevel@tonic-gate ulwp_t *ulwp; 6690Sstevel@tonic-gate 6700Sstevel@tonic-gate ulwp = *ulwpp; 6710Sstevel@tonic-gate *ulwpp = ulwp->ul_link; 6720Sstevel@tonic-gate ulwp->ul_link = NULL; 6730Sstevel@tonic-gate if (qp->qh_tail == ulwp) 6740Sstevel@tonic-gate qp->qh_tail = prev; 6750Sstevel@tonic-gate qp->qh_qlen--; 6760Sstevel@tonic-gate ulwp->ul_sleepq = NULL; 6770Sstevel@tonic-gate ulwp->ul_wchan = NULL; 6780Sstevel@tonic-gate 6790Sstevel@tonic-gate return (ulwp); 6800Sstevel@tonic-gate } 6810Sstevel@tonic-gate 6824570Sraf ulwp_t * 6834570Sraf dequeue(queue_head_t *qp, void *wchan, int *more) 6844570Sraf { 6854570Sraf ulwp_t **ulwpp; 6864570Sraf ulwp_t *prev; 6874570Sraf 6884570Sraf if ((ulwpp = queue_slot(qp, wchan, more, &prev)) == NULL) 6894570Sraf return (NULL); 6904570Sraf return (queue_unlink(qp, ulwpp, prev)); 6914570Sraf } 6924570Sraf 6930Sstevel@tonic-gate /* 6940Sstevel@tonic-gate * Return a pointer to the highest priority thread sleeping on wchan. 6950Sstevel@tonic-gate */ 6960Sstevel@tonic-gate ulwp_t * 6970Sstevel@tonic-gate queue_waiter(queue_head_t *qp, void *wchan) 6980Sstevel@tonic-gate { 6990Sstevel@tonic-gate ulwp_t **ulwpp; 7000Sstevel@tonic-gate 7010Sstevel@tonic-gate if ((ulwpp = queue_slot(qp, wchan, NULL, NULL)) == NULL) 7020Sstevel@tonic-gate return (NULL); 7030Sstevel@tonic-gate return (*ulwpp); 7040Sstevel@tonic-gate } 7050Sstevel@tonic-gate 7060Sstevel@tonic-gate uint8_t 7070Sstevel@tonic-gate dequeue_self(queue_head_t *qp, void *wchan) 7080Sstevel@tonic-gate { 7090Sstevel@tonic-gate ulwp_t *self = curthread; 7100Sstevel@tonic-gate ulwp_t **ulwpp; 7110Sstevel@tonic-gate ulwp_t *ulwp; 7120Sstevel@tonic-gate ulwp_t *prev = NULL; 7130Sstevel@tonic-gate int found = 0; 7140Sstevel@tonic-gate int more = 0; 7150Sstevel@tonic-gate 7160Sstevel@tonic-gate ASSERT(MUTEX_OWNED(&qp->qh_lock, self)); 7170Sstevel@tonic-gate 7180Sstevel@tonic-gate /* find self on the sleep queue */ 7190Sstevel@tonic-gate for (ulwpp = &qp->qh_head; (ulwp = *ulwpp) != NULL; 7200Sstevel@tonic-gate prev = ulwp, ulwpp = &ulwp->ul_link) { 7210Sstevel@tonic-gate if (ulwp == self) { 7220Sstevel@tonic-gate /* dequeue ourself */ 7230Sstevel@tonic-gate ASSERT(self->ul_wchan == wchan); 7244570Sraf (void) queue_unlink(qp, ulwpp, prev); 7250Sstevel@tonic-gate self->ul_cvmutex = NULL; 7260Sstevel@tonic-gate self->ul_cv_wake = 0; 7270Sstevel@tonic-gate found = 1; 7280Sstevel@tonic-gate break; 7290Sstevel@tonic-gate } 7300Sstevel@tonic-gate if (ulwp->ul_wchan == wchan) 7310Sstevel@tonic-gate more = 1; 7320Sstevel@tonic-gate } 7330Sstevel@tonic-gate 7340Sstevel@tonic-gate if (!found) 7350Sstevel@tonic-gate thr_panic("dequeue_self(): curthread not found on queue"); 7360Sstevel@tonic-gate 7370Sstevel@tonic-gate if (more) 7380Sstevel@tonic-gate return (1); 7390Sstevel@tonic-gate 7400Sstevel@tonic-gate /* scan the remainder of the queue for another waiter */ 7410Sstevel@tonic-gate for (ulwp = *ulwpp; ulwp != NULL; ulwp = ulwp->ul_link) { 7420Sstevel@tonic-gate if (ulwp->ul_wchan == wchan) 7430Sstevel@tonic-gate return (1); 7440Sstevel@tonic-gate } 7450Sstevel@tonic-gate 7460Sstevel@tonic-gate return (0); 7470Sstevel@tonic-gate } 7480Sstevel@tonic-gate 7490Sstevel@tonic-gate /* 7500Sstevel@tonic-gate * Called from call_user_handler() and _thrp_suspend() to take 7510Sstevel@tonic-gate * ourself off of our sleep queue so we can grab locks. 7520Sstevel@tonic-gate */ 7530Sstevel@tonic-gate void 7540Sstevel@tonic-gate unsleep_self(void) 7550Sstevel@tonic-gate { 7560Sstevel@tonic-gate ulwp_t *self = curthread; 7570Sstevel@tonic-gate queue_head_t *qp; 7580Sstevel@tonic-gate 7590Sstevel@tonic-gate /* 7600Sstevel@tonic-gate * Calling enter_critical()/exit_critical() here would lead 7610Sstevel@tonic-gate * to recursion. Just manipulate self->ul_critical directly. 7620Sstevel@tonic-gate */ 7630Sstevel@tonic-gate self->ul_critical++; 7640Sstevel@tonic-gate while (self->ul_sleepq != NULL) { 7650Sstevel@tonic-gate qp = queue_lock(self->ul_wchan, self->ul_qtype); 7660Sstevel@tonic-gate /* 7670Sstevel@tonic-gate * We may have been moved from a CV queue to a 7680Sstevel@tonic-gate * mutex queue while we were attempting queue_lock(). 7690Sstevel@tonic-gate * If so, just loop around and try again. 7700Sstevel@tonic-gate * dequeue_self() clears self->ul_sleepq. 7710Sstevel@tonic-gate */ 7724570Sraf if (qp == self->ul_sleepq) { 7730Sstevel@tonic-gate (void) dequeue_self(qp, self->ul_wchan); 7744570Sraf self->ul_writer = 0; 7754570Sraf } 7760Sstevel@tonic-gate queue_unlock(qp); 7770Sstevel@tonic-gate } 7780Sstevel@tonic-gate self->ul_critical--; 7790Sstevel@tonic-gate } 7800Sstevel@tonic-gate 7810Sstevel@tonic-gate /* 7820Sstevel@tonic-gate * Common code for calling the the ___lwp_mutex_timedlock() system call. 7830Sstevel@tonic-gate * Returns with mutex_owner and mutex_ownerpid set correctly. 7840Sstevel@tonic-gate */ 7854574Sraf static int 7860Sstevel@tonic-gate mutex_lock_kernel(mutex_t *mp, timespec_t *tsp, tdb_mutex_stats_t *msp) 7870Sstevel@tonic-gate { 7880Sstevel@tonic-gate ulwp_t *self = curthread; 7890Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 7904574Sraf int mtype = mp->mutex_type; 7910Sstevel@tonic-gate hrtime_t begin_sleep; 7924574Sraf int acquired; 7930Sstevel@tonic-gate int error; 7940Sstevel@tonic-gate 7950Sstevel@tonic-gate self->ul_sp = stkptr(); 7960Sstevel@tonic-gate self->ul_wchan = mp; 7970Sstevel@tonic-gate if (__td_event_report(self, TD_SLEEP, udp)) { 7980Sstevel@tonic-gate self->ul_td_evbuf.eventnum = TD_SLEEP; 7990Sstevel@tonic-gate self->ul_td_evbuf.eventdata = mp; 8000Sstevel@tonic-gate tdb_event(TD_SLEEP, udp); 8010Sstevel@tonic-gate } 8020Sstevel@tonic-gate if (msp) { 8030Sstevel@tonic-gate tdb_incr(msp->mutex_sleep); 8040Sstevel@tonic-gate begin_sleep = gethrtime(); 8050Sstevel@tonic-gate } 8060Sstevel@tonic-gate 8070Sstevel@tonic-gate DTRACE_PROBE1(plockstat, mutex__block, mp); 8080Sstevel@tonic-gate 8090Sstevel@tonic-gate for (;;) { 8104574Sraf /* 8114574Sraf * A return value of EOWNERDEAD or ELOCKUNMAPPED 8124574Sraf * means we successfully acquired the lock. 8134574Sraf */ 8144574Sraf if ((error = ___lwp_mutex_timedlock(mp, tsp)) != 0 && 8154574Sraf error != EOWNERDEAD && error != ELOCKUNMAPPED) { 8164574Sraf acquired = 0; 8170Sstevel@tonic-gate break; 8180Sstevel@tonic-gate } 8190Sstevel@tonic-gate 8204574Sraf if (mtype & USYNC_PROCESS) { 8210Sstevel@tonic-gate /* 8220Sstevel@tonic-gate * Defend against forkall(). We may be the child, 8230Sstevel@tonic-gate * in which case we don't actually own the mutex. 8240Sstevel@tonic-gate */ 8250Sstevel@tonic-gate enter_critical(self); 8260Sstevel@tonic-gate if (mp->mutex_ownerpid == udp->pid) { 8270Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 8280Sstevel@tonic-gate exit_critical(self); 8294574Sraf acquired = 1; 8300Sstevel@tonic-gate break; 8310Sstevel@tonic-gate } 8320Sstevel@tonic-gate exit_critical(self); 8330Sstevel@tonic-gate } else { 8340Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 8354574Sraf acquired = 1; 8360Sstevel@tonic-gate break; 8370Sstevel@tonic-gate } 8380Sstevel@tonic-gate } 8390Sstevel@tonic-gate if (msp) 8400Sstevel@tonic-gate msp->mutex_sleep_time += gethrtime() - begin_sleep; 8410Sstevel@tonic-gate self->ul_wchan = NULL; 8420Sstevel@tonic-gate self->ul_sp = 0; 8430Sstevel@tonic-gate 8444574Sraf if (acquired) { 8454574Sraf DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1); 8464574Sraf DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 8474574Sraf } else { 8484574Sraf DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0); 8494574Sraf DTRACE_PROBE2(plockstat, mutex__error, mp, error); 8504574Sraf } 8514574Sraf 8520Sstevel@tonic-gate return (error); 8530Sstevel@tonic-gate } 8540Sstevel@tonic-gate 8550Sstevel@tonic-gate /* 8560Sstevel@tonic-gate * Common code for calling the ___lwp_mutex_trylock() system call. 8570Sstevel@tonic-gate * Returns with mutex_owner and mutex_ownerpid set correctly. 8580Sstevel@tonic-gate */ 8590Sstevel@tonic-gate int 8600Sstevel@tonic-gate mutex_trylock_kernel(mutex_t *mp) 8610Sstevel@tonic-gate { 8620Sstevel@tonic-gate ulwp_t *self = curthread; 8630Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 8644574Sraf int mtype = mp->mutex_type; 8650Sstevel@tonic-gate int error; 8664574Sraf int acquired; 8670Sstevel@tonic-gate 8680Sstevel@tonic-gate for (;;) { 8694574Sraf /* 8704574Sraf * A return value of EOWNERDEAD or ELOCKUNMAPPED 8714574Sraf * means we successfully acquired the lock. 8724574Sraf */ 8734574Sraf if ((error = ___lwp_mutex_trylock(mp)) != 0 && 8744574Sraf error != EOWNERDEAD && error != ELOCKUNMAPPED) { 8754574Sraf acquired = 0; 8760Sstevel@tonic-gate break; 8770Sstevel@tonic-gate } 8780Sstevel@tonic-gate 8794574Sraf if (mtype & USYNC_PROCESS) { 8800Sstevel@tonic-gate /* 8810Sstevel@tonic-gate * Defend against forkall(). We may be the child, 8820Sstevel@tonic-gate * in which case we don't actually own the mutex. 8830Sstevel@tonic-gate */ 8840Sstevel@tonic-gate enter_critical(self); 8850Sstevel@tonic-gate if (mp->mutex_ownerpid == udp->pid) { 8860Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 8870Sstevel@tonic-gate exit_critical(self); 8884574Sraf acquired = 1; 8890Sstevel@tonic-gate break; 8900Sstevel@tonic-gate } 8910Sstevel@tonic-gate exit_critical(self); 8920Sstevel@tonic-gate } else { 8930Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 8944574Sraf acquired = 1; 8950Sstevel@tonic-gate break; 8960Sstevel@tonic-gate } 8970Sstevel@tonic-gate } 8980Sstevel@tonic-gate 8994574Sraf if (acquired) { 9004574Sraf DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 9014574Sraf } else if (error != EBUSY) { 9024574Sraf DTRACE_PROBE2(plockstat, mutex__error, mp, error); 9034574Sraf } 9044574Sraf 9050Sstevel@tonic-gate return (error); 9060Sstevel@tonic-gate } 9070Sstevel@tonic-gate 9080Sstevel@tonic-gate volatile sc_shared_t * 9090Sstevel@tonic-gate setup_schedctl(void) 9100Sstevel@tonic-gate { 9110Sstevel@tonic-gate ulwp_t *self = curthread; 9120Sstevel@tonic-gate volatile sc_shared_t *scp; 9130Sstevel@tonic-gate sc_shared_t *tmp; 9140Sstevel@tonic-gate 9150Sstevel@tonic-gate if ((scp = self->ul_schedctl) == NULL && /* no shared state yet */ 9160Sstevel@tonic-gate !self->ul_vfork && /* not a child of vfork() */ 9170Sstevel@tonic-gate !self->ul_schedctl_called) { /* haven't been called before */ 9180Sstevel@tonic-gate enter_critical(self); 9190Sstevel@tonic-gate self->ul_schedctl_called = &self->ul_uberdata->uberflags; 9200Sstevel@tonic-gate if ((tmp = __schedctl()) != (sc_shared_t *)(-1)) 9210Sstevel@tonic-gate self->ul_schedctl = scp = tmp; 9220Sstevel@tonic-gate exit_critical(self); 9230Sstevel@tonic-gate } 9240Sstevel@tonic-gate /* 9250Sstevel@tonic-gate * Unless the call to setup_schedctl() is surrounded 9260Sstevel@tonic-gate * by enter_critical()/exit_critical(), the address 9270Sstevel@tonic-gate * we are returning could be invalid due to a forkall() 9280Sstevel@tonic-gate * having occurred in another thread. 9290Sstevel@tonic-gate */ 9300Sstevel@tonic-gate return (scp); 9310Sstevel@tonic-gate } 9320Sstevel@tonic-gate 9330Sstevel@tonic-gate /* 9340Sstevel@tonic-gate * Interfaces from libsched, incorporated into libc. 9350Sstevel@tonic-gate * libsched.so.1 is now a filter library onto libc. 9360Sstevel@tonic-gate */ 9370Sstevel@tonic-gate #pragma weak schedctl_lookup = _schedctl_init 9380Sstevel@tonic-gate #pragma weak _schedctl_lookup = _schedctl_init 9390Sstevel@tonic-gate #pragma weak schedctl_init = _schedctl_init 9400Sstevel@tonic-gate schedctl_t * 9410Sstevel@tonic-gate _schedctl_init(void) 9420Sstevel@tonic-gate { 9430Sstevel@tonic-gate volatile sc_shared_t *scp = setup_schedctl(); 9440Sstevel@tonic-gate return ((scp == NULL)? NULL : (schedctl_t *)&scp->sc_preemptctl); 9450Sstevel@tonic-gate } 9460Sstevel@tonic-gate 9470Sstevel@tonic-gate #pragma weak schedctl_exit = _schedctl_exit 9480Sstevel@tonic-gate void 9490Sstevel@tonic-gate _schedctl_exit(void) 9500Sstevel@tonic-gate { 9510Sstevel@tonic-gate } 9520Sstevel@tonic-gate 9530Sstevel@tonic-gate /* 9540Sstevel@tonic-gate * Contract private interface for java. 9550Sstevel@tonic-gate * Set up the schedctl data if it doesn't exist yet. 9560Sstevel@tonic-gate * Return a pointer to the pointer to the schedctl data. 9570Sstevel@tonic-gate */ 9580Sstevel@tonic-gate volatile sc_shared_t *volatile * 9590Sstevel@tonic-gate _thr_schedctl(void) 9600Sstevel@tonic-gate { 9610Sstevel@tonic-gate ulwp_t *self = curthread; 9620Sstevel@tonic-gate volatile sc_shared_t *volatile *ptr; 9630Sstevel@tonic-gate 9640Sstevel@tonic-gate if (self->ul_vfork) 9650Sstevel@tonic-gate return (NULL); 9660Sstevel@tonic-gate if (*(ptr = &self->ul_schedctl) == NULL) 9670Sstevel@tonic-gate (void) setup_schedctl(); 9680Sstevel@tonic-gate return (ptr); 9690Sstevel@tonic-gate } 9700Sstevel@tonic-gate 9710Sstevel@tonic-gate /* 9720Sstevel@tonic-gate * Block signals and attempt to block preemption. 9730Sstevel@tonic-gate * no_preempt()/preempt() must be used in pairs but can be nested. 9740Sstevel@tonic-gate */ 9750Sstevel@tonic-gate void 9760Sstevel@tonic-gate no_preempt(ulwp_t *self) 9770Sstevel@tonic-gate { 9780Sstevel@tonic-gate volatile sc_shared_t *scp; 9790Sstevel@tonic-gate 9800Sstevel@tonic-gate if (self->ul_preempt++ == 0) { 9810Sstevel@tonic-gate enter_critical(self); 9820Sstevel@tonic-gate if ((scp = self->ul_schedctl) != NULL || 9830Sstevel@tonic-gate (scp = setup_schedctl()) != NULL) { 9840Sstevel@tonic-gate /* 9850Sstevel@tonic-gate * Save the pre-existing preempt value. 9860Sstevel@tonic-gate */ 9870Sstevel@tonic-gate self->ul_savpreempt = scp->sc_preemptctl.sc_nopreempt; 9880Sstevel@tonic-gate scp->sc_preemptctl.sc_nopreempt = 1; 9890Sstevel@tonic-gate } 9900Sstevel@tonic-gate } 9910Sstevel@tonic-gate } 9920Sstevel@tonic-gate 9930Sstevel@tonic-gate /* 9940Sstevel@tonic-gate * Undo the effects of no_preempt(). 9950Sstevel@tonic-gate */ 9960Sstevel@tonic-gate void 9970Sstevel@tonic-gate preempt(ulwp_t *self) 9980Sstevel@tonic-gate { 9990Sstevel@tonic-gate volatile sc_shared_t *scp; 10000Sstevel@tonic-gate 10010Sstevel@tonic-gate ASSERT(self->ul_preempt > 0); 10020Sstevel@tonic-gate if (--self->ul_preempt == 0) { 10030Sstevel@tonic-gate if ((scp = self->ul_schedctl) != NULL) { 10040Sstevel@tonic-gate /* 10050Sstevel@tonic-gate * Restore the pre-existing preempt value. 10060Sstevel@tonic-gate */ 10070Sstevel@tonic-gate scp->sc_preemptctl.sc_nopreempt = self->ul_savpreempt; 10080Sstevel@tonic-gate if (scp->sc_preemptctl.sc_yield && 10090Sstevel@tonic-gate scp->sc_preemptctl.sc_nopreempt == 0) { 10100Sstevel@tonic-gate lwp_yield(); 10110Sstevel@tonic-gate if (scp->sc_preemptctl.sc_yield) { 10120Sstevel@tonic-gate /* 10130Sstevel@tonic-gate * Shouldn't happen. This is either 10140Sstevel@tonic-gate * a race condition or the thread 10150Sstevel@tonic-gate * just entered the real-time class. 10160Sstevel@tonic-gate */ 10170Sstevel@tonic-gate lwp_yield(); 10180Sstevel@tonic-gate scp->sc_preemptctl.sc_yield = 0; 10190Sstevel@tonic-gate } 10200Sstevel@tonic-gate } 10210Sstevel@tonic-gate } 10220Sstevel@tonic-gate exit_critical(self); 10230Sstevel@tonic-gate } 10240Sstevel@tonic-gate } 10250Sstevel@tonic-gate 10260Sstevel@tonic-gate /* 10270Sstevel@tonic-gate * If a call to preempt() would cause the current thread to yield or to 10280Sstevel@tonic-gate * take deferred actions in exit_critical(), then unpark the specified 10290Sstevel@tonic-gate * lwp so it can run while we delay. Return the original lwpid if the 10300Sstevel@tonic-gate * unpark was not performed, else return zero. The tests are a repeat 10310Sstevel@tonic-gate * of some of the tests in preempt(), above. This is a statistical 10320Sstevel@tonic-gate * optimization solely for cond_sleep_queue(), below. 10330Sstevel@tonic-gate */ 10340Sstevel@tonic-gate static lwpid_t 10350Sstevel@tonic-gate preempt_unpark(ulwp_t *self, lwpid_t lwpid) 10360Sstevel@tonic-gate { 10370Sstevel@tonic-gate volatile sc_shared_t *scp = self->ul_schedctl; 10380Sstevel@tonic-gate 10390Sstevel@tonic-gate ASSERT(self->ul_preempt == 1 && self->ul_critical > 0); 10400Sstevel@tonic-gate if ((scp != NULL && scp->sc_preemptctl.sc_yield) || 10410Sstevel@tonic-gate (self->ul_curplease && self->ul_critical == 1)) { 10420Sstevel@tonic-gate (void) __lwp_unpark(lwpid); 10430Sstevel@tonic-gate lwpid = 0; 10440Sstevel@tonic-gate } 10450Sstevel@tonic-gate return (lwpid); 10460Sstevel@tonic-gate } 10470Sstevel@tonic-gate 10480Sstevel@tonic-gate /* 10494613Sraf * Spin for a while (if 'tryhard' is true), trying to grab the lock. 10500Sstevel@tonic-gate * If this fails, return EBUSY and let the caller deal with it. 10510Sstevel@tonic-gate * If this succeeds, return 0 with mutex_owner set to curthread. 10520Sstevel@tonic-gate */ 10534574Sraf static int 10544613Sraf mutex_trylock_adaptive(mutex_t *mp, int tryhard) 10550Sstevel@tonic-gate { 10560Sstevel@tonic-gate ulwp_t *self = curthread; 10574574Sraf int error = EBUSY; 10580Sstevel@tonic-gate ulwp_t *ulwp; 10590Sstevel@tonic-gate volatile sc_shared_t *scp; 1060*5629Sraf volatile uint8_t *lockp = (volatile uint8_t *)&mp->mutex_lockw; 1061*5629Sraf volatile uint64_t *ownerp = (volatile uint64_t *)&mp->mutex_owner; 1062*5629Sraf uint32_t new_lockword; 1063*5629Sraf int count = 0; 1064*5629Sraf int max_count; 1065*5629Sraf uint8_t max_spinners; 10664574Sraf 10674574Sraf ASSERT(!(mp->mutex_type & USYNC_PROCESS)); 10684574Sraf 10694574Sraf if (MUTEX_OWNER(mp) == self) 10700Sstevel@tonic-gate return (EBUSY); 10710Sstevel@tonic-gate 10724574Sraf /* short-cut, not definitive (see below) */ 10734574Sraf if (mp->mutex_flag & LOCK_NOTRECOVERABLE) { 10744574Sraf ASSERT(mp->mutex_type & LOCK_ROBUST); 1075*5629Sraf error = ENOTRECOVERABLE; 1076*5629Sraf goto done; 10774574Sraf } 10784574Sraf 1079*5629Sraf /* 1080*5629Sraf * Make one attempt to acquire the lock before 1081*5629Sraf * incurring the overhead of the spin loop. 1082*5629Sraf */ 1083*5629Sraf if (set_lock_byte(lockp) == 0) { 1084*5629Sraf *ownerp = (uintptr_t)self; 1085*5629Sraf error = 0; 1086*5629Sraf goto done; 1087*5629Sraf } 1088*5629Sraf if (!tryhard) 1089*5629Sraf goto done; 1090*5629Sraf if (ncpus == 0) 1091*5629Sraf ncpus = (int)_sysconf(_SC_NPROCESSORS_ONLN); 1092*5629Sraf if ((max_spinners = self->ul_max_spinners) >= ncpus) 1093*5629Sraf max_spinners = ncpus - 1; 1094*5629Sraf max_count = (max_spinners != 0)? self->ul_adaptive_spin : 0; 1095*5629Sraf if (max_count == 0) 1096*5629Sraf goto done; 1097*5629Sraf 10980Sstevel@tonic-gate /* 10990Sstevel@tonic-gate * This spin loop is unfair to lwps that have already dropped into 11000Sstevel@tonic-gate * the kernel to sleep. They will starve on a highly-contended mutex. 11010Sstevel@tonic-gate * This is just too bad. The adaptive spin algorithm is intended 11020Sstevel@tonic-gate * to allow programs with highly-contended locks (that is, broken 11030Sstevel@tonic-gate * programs) to execute with reasonable speed despite their contention. 11040Sstevel@tonic-gate * Being fair would reduce the speed of such programs and well-written 11050Sstevel@tonic-gate * programs will not suffer in any case. 11060Sstevel@tonic-gate */ 1107*5629Sraf enter_critical(self); 1108*5629Sraf if (spinners_incr(&mp->mutex_lockword, max_spinners) == -1) { 1109*5629Sraf exit_critical(self); 1110*5629Sraf goto done; 1111*5629Sraf } 1112*5629Sraf DTRACE_PROBE1(plockstat, mutex__spin, mp); 1113*5629Sraf for (count = 1; ; count++) { 11140Sstevel@tonic-gate if (*lockp == 0 && set_lock_byte(lockp) == 0) { 11150Sstevel@tonic-gate *ownerp = (uintptr_t)self; 11164574Sraf error = 0; 11174574Sraf break; 11180Sstevel@tonic-gate } 1119*5629Sraf if (count == max_count) 1120*5629Sraf break; 11210Sstevel@tonic-gate SMT_PAUSE(); 11220Sstevel@tonic-gate /* 11230Sstevel@tonic-gate * Stop spinning if the mutex owner is not running on 11240Sstevel@tonic-gate * a processor; it will not drop the lock any time soon 11250Sstevel@tonic-gate * and we would just be wasting time to keep spinning. 11260Sstevel@tonic-gate * 11270Sstevel@tonic-gate * Note that we are looking at another thread (ulwp_t) 11280Sstevel@tonic-gate * without ensuring that the other thread does not exit. 11290Sstevel@tonic-gate * The scheme relies on ulwp_t structures never being 11300Sstevel@tonic-gate * deallocated by the library (the library employs a free 11310Sstevel@tonic-gate * list of ulwp_t structs that are reused when new threads 11320Sstevel@tonic-gate * are created) and on schedctl shared memory never being 11330Sstevel@tonic-gate * deallocated once created via __schedctl(). 11340Sstevel@tonic-gate * 11350Sstevel@tonic-gate * Thus, the worst that can happen when the spinning thread 11360Sstevel@tonic-gate * looks at the owner's schedctl data is that it is looking 11370Sstevel@tonic-gate * at some other thread's schedctl data. This almost never 11380Sstevel@tonic-gate * happens and is benign when it does. 11390Sstevel@tonic-gate */ 11400Sstevel@tonic-gate if ((ulwp = (ulwp_t *)(uintptr_t)*ownerp) != NULL && 11410Sstevel@tonic-gate ((scp = ulwp->ul_schedctl) == NULL || 11420Sstevel@tonic-gate scp->sc_state != SC_ONPROC)) 11430Sstevel@tonic-gate break; 11440Sstevel@tonic-gate } 1145*5629Sraf new_lockword = spinners_decr(&mp->mutex_lockword); 1146*5629Sraf if (error && (new_lockword & (LOCKMASK | SPINNERMASK)) == 0) { 1147*5629Sraf /* 1148*5629Sraf * We haven't yet acquired the lock, the lock 1149*5629Sraf * is free, and there are no other spinners. 1150*5629Sraf * Make one final attempt to acquire the lock. 1151*5629Sraf * 1152*5629Sraf * This isn't strictly necessary since mutex_lock_queue() 1153*5629Sraf * (the next action this thread will take if it doesn't 1154*5629Sraf * acquire the lock here) makes one attempt to acquire 1155*5629Sraf * the lock before putting the thread to sleep. 1156*5629Sraf * 1157*5629Sraf * If the next action for this thread (on failure here) 1158*5629Sraf * were not to call mutex_lock_queue(), this would be 1159*5629Sraf * necessary for correctness, to avoid ending up with an 1160*5629Sraf * unheld mutex with waiters but no one to wake them up. 1161*5629Sraf */ 1162*5629Sraf if (set_lock_byte(lockp) == 0) { 1163*5629Sraf *ownerp = (uintptr_t)self; 1164*5629Sraf error = 0; 1165*5629Sraf } 1166*5629Sraf count++; 1167*5629Sraf } 11680Sstevel@tonic-gate exit_critical(self); 11690Sstevel@tonic-gate 1170*5629Sraf done: 11714574Sraf if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) { 11724574Sraf ASSERT(mp->mutex_type & LOCK_ROBUST); 11734574Sraf /* 11744574Sraf * We shouldn't own the mutex; clear the lock. 11754574Sraf */ 11764574Sraf mp->mutex_owner = 0; 1177*5629Sraf if (clear_lockbyte(&mp->mutex_lockword) & WAITERMASK) 11784574Sraf mutex_wakeup_all(mp); 11794574Sraf error = ENOTRECOVERABLE; 11804574Sraf } 11814574Sraf 11824574Sraf if (error) { 1183*5629Sraf if (count) { 1184*5629Sraf DTRACE_PROBE2(plockstat, mutex__spun, 0, count); 1185*5629Sraf } 11864574Sraf if (error != EBUSY) { 11874574Sraf DTRACE_PROBE2(plockstat, mutex__error, mp, error); 11884574Sraf } 11894574Sraf } else { 1190*5629Sraf if (count) { 1191*5629Sraf DTRACE_PROBE2(plockstat, mutex__spun, 1, count); 1192*5629Sraf } 11934574Sraf DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count); 11944574Sraf if (mp->mutex_flag & LOCK_OWNERDEAD) { 11954574Sraf ASSERT(mp->mutex_type & LOCK_ROBUST); 11964574Sraf error = EOWNERDEAD; 11974574Sraf } 11984574Sraf } 11994574Sraf 12004574Sraf return (error); 12010Sstevel@tonic-gate } 12020Sstevel@tonic-gate 12030Sstevel@tonic-gate /* 12040Sstevel@tonic-gate * Same as mutex_trylock_adaptive(), except specifically for queue locks. 12050Sstevel@tonic-gate * The owner field is not set here; the caller (spin_lock_set()) sets it. 12060Sstevel@tonic-gate */ 12074574Sraf static int 12080Sstevel@tonic-gate mutex_queuelock_adaptive(mutex_t *mp) 12090Sstevel@tonic-gate { 12100Sstevel@tonic-gate ulwp_t *ulwp; 12110Sstevel@tonic-gate volatile sc_shared_t *scp; 12120Sstevel@tonic-gate volatile uint8_t *lockp; 12130Sstevel@tonic-gate volatile uint64_t *ownerp; 12140Sstevel@tonic-gate int count = curthread->ul_queue_spin; 12150Sstevel@tonic-gate 12160Sstevel@tonic-gate ASSERT(mp->mutex_type == USYNC_THREAD); 12170Sstevel@tonic-gate 12180Sstevel@tonic-gate if (count == 0) 12190Sstevel@tonic-gate return (EBUSY); 12200Sstevel@tonic-gate 12210Sstevel@tonic-gate lockp = (volatile uint8_t *)&mp->mutex_lockw; 12220Sstevel@tonic-gate ownerp = (volatile uint64_t *)&mp->mutex_owner; 12230Sstevel@tonic-gate while (--count >= 0) { 12240Sstevel@tonic-gate if (*lockp == 0 && set_lock_byte(lockp) == 0) 12250Sstevel@tonic-gate return (0); 12260Sstevel@tonic-gate SMT_PAUSE(); 12270Sstevel@tonic-gate if ((ulwp = (ulwp_t *)(uintptr_t)*ownerp) != NULL && 12280Sstevel@tonic-gate ((scp = ulwp->ul_schedctl) == NULL || 12290Sstevel@tonic-gate scp->sc_state != SC_ONPROC)) 12300Sstevel@tonic-gate break; 12310Sstevel@tonic-gate } 12320Sstevel@tonic-gate 12330Sstevel@tonic-gate return (EBUSY); 12340Sstevel@tonic-gate } 12350Sstevel@tonic-gate 12360Sstevel@tonic-gate /* 12370Sstevel@tonic-gate * Like mutex_trylock_adaptive(), but for process-shared mutexes. 12384613Sraf * Spin for a while (if 'tryhard' is true), trying to grab the lock. 12390Sstevel@tonic-gate * If this fails, return EBUSY and let the caller deal with it. 12400Sstevel@tonic-gate * If this succeeds, return 0 with mutex_owner set to curthread 12410Sstevel@tonic-gate * and mutex_ownerpid set to the current pid. 12420Sstevel@tonic-gate */ 12434574Sraf static int 12444613Sraf mutex_trylock_process(mutex_t *mp, int tryhard) 12450Sstevel@tonic-gate { 12460Sstevel@tonic-gate ulwp_t *self = curthread; 1247*5629Sraf uberdata_t *udp = self->ul_uberdata; 12484574Sraf int error = EBUSY; 1249*5629Sraf volatile uint8_t *lockp = (volatile uint8_t *)&mp->mutex_lockw; 1250*5629Sraf uint32_t new_lockword; 1251*5629Sraf int count = 0; 1252*5629Sraf int max_count; 1253*5629Sraf uint8_t max_spinners; 12544574Sraf 12554574Sraf ASSERT(mp->mutex_type & USYNC_PROCESS); 12564574Sraf 12574574Sraf if (shared_mutex_held(mp)) 12580Sstevel@tonic-gate return (EBUSY); 12590Sstevel@tonic-gate 12604574Sraf /* short-cut, not definitive (see below) */ 12614574Sraf if (mp->mutex_flag & LOCK_NOTRECOVERABLE) { 12624574Sraf ASSERT(mp->mutex_type & LOCK_ROBUST); 1263*5629Sraf error = ENOTRECOVERABLE; 1264*5629Sraf goto done; 12654574Sraf } 12664574Sraf 1267*5629Sraf /* 1268*5629Sraf * Make one attempt to acquire the lock before 1269*5629Sraf * incurring the overhead of the spin loop. 1270*5629Sraf */ 1271*5629Sraf enter_critical(self); 1272*5629Sraf if (set_lock_byte(lockp) == 0) { 1273*5629Sraf mp->mutex_owner = (uintptr_t)self; 1274*5629Sraf mp->mutex_ownerpid = udp->pid; 1275*5629Sraf exit_critical(self); 1276*5629Sraf error = 0; 1277*5629Sraf goto done; 1278*5629Sraf } 1279*5629Sraf exit_critical(self); 1280*5629Sraf if (!tryhard) 1281*5629Sraf goto done; 12824574Sraf if (ncpus == 0) 12834574Sraf ncpus = (int)_sysconf(_SC_NPROCESSORS_ONLN); 1284*5629Sraf if ((max_spinners = self->ul_max_spinners) >= ncpus) 1285*5629Sraf max_spinners = ncpus - 1; 1286*5629Sraf max_count = (max_spinners != 0)? self->ul_adaptive_spin : 0; 1287*5629Sraf if (max_count == 0) 1288*5629Sraf goto done; 1289*5629Sraf 12900Sstevel@tonic-gate /* 12910Sstevel@tonic-gate * This is a process-shared mutex. 12920Sstevel@tonic-gate * We cannot know if the owner is running on a processor. 12930Sstevel@tonic-gate * We just spin and hope that it is on a processor. 12940Sstevel@tonic-gate */ 12954574Sraf enter_critical(self); 1296*5629Sraf if (spinners_incr(&mp->mutex_lockword, max_spinners) == -1) { 1297*5629Sraf exit_critical(self); 1298*5629Sraf goto done; 1299*5629Sraf } 1300*5629Sraf DTRACE_PROBE1(plockstat, mutex__spin, mp); 1301*5629Sraf for (count = 1; ; count++) { 13024574Sraf if (*lockp == 0 && set_lock_byte(lockp) == 0) { 13034574Sraf mp->mutex_owner = (uintptr_t)self; 1304*5629Sraf mp->mutex_ownerpid = udp->pid; 13054574Sraf error = 0; 13064574Sraf break; 13074574Sraf } 1308*5629Sraf if (count == max_count) 1309*5629Sraf break; 13104574Sraf SMT_PAUSE(); 13114574Sraf } 1312*5629Sraf new_lockword = spinners_decr(&mp->mutex_lockword); 1313*5629Sraf if (error && (new_lockword & (LOCKMASK | SPINNERMASK)) == 0) { 1314*5629Sraf /* 1315*5629Sraf * We haven't yet acquired the lock, the lock 1316*5629Sraf * is free, and there are no other spinners. 1317*5629Sraf * Make one final attempt to acquire the lock. 1318*5629Sraf * 1319*5629Sraf * This isn't strictly necessary since mutex_lock_kernel() 1320*5629Sraf * (the next action this thread will take if it doesn't 1321*5629Sraf * acquire the lock here) makes one attempt to acquire 1322*5629Sraf * the lock before putting the thread to sleep. 1323*5629Sraf * 1324*5629Sraf * If the next action for this thread (on failure here) 1325*5629Sraf * were not to call mutex_lock_kernel(), this would be 1326*5629Sraf * necessary for correctness, to avoid ending up with an 1327*5629Sraf * unheld mutex with waiters but no one to wake them up. 1328*5629Sraf */ 1329*5629Sraf if (set_lock_byte(lockp) == 0) { 1330*5629Sraf mp->mutex_owner = (uintptr_t)self; 1331*5629Sraf mp->mutex_ownerpid = udp->pid; 1332*5629Sraf error = 0; 1333*5629Sraf } 1334*5629Sraf count++; 1335*5629Sraf } 13364574Sraf exit_critical(self); 13374574Sraf 1338*5629Sraf done: 13394574Sraf if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) { 13404574Sraf ASSERT(mp->mutex_type & LOCK_ROBUST); 13414574Sraf /* 13424574Sraf * We shouldn't own the mutex; clear the lock. 13434574Sraf */ 13444574Sraf mp->mutex_owner = 0; 13454574Sraf mp->mutex_ownerpid = 0; 1346*5629Sraf if (clear_lockbyte(&mp->mutex_lockword) & WAITERMASK) { 13474574Sraf no_preempt(self); 13484574Sraf (void) ___lwp_mutex_wakeup(mp, 1); 13494574Sraf preempt(self); 13500Sstevel@tonic-gate } 13514574Sraf error = ENOTRECOVERABLE; 13520Sstevel@tonic-gate } 13530Sstevel@tonic-gate 13544574Sraf if (error) { 1355*5629Sraf if (count) { 1356*5629Sraf DTRACE_PROBE2(plockstat, mutex__spun, 0, count); 1357*5629Sraf } 13584574Sraf if (error != EBUSY) { 13594574Sraf DTRACE_PROBE2(plockstat, mutex__error, mp, error); 13604574Sraf } 13614574Sraf } else { 1362*5629Sraf if (count) { 1363*5629Sraf DTRACE_PROBE2(plockstat, mutex__spun, 1, count); 1364*5629Sraf } 13654574Sraf DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count); 13664574Sraf if (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 13674574Sraf ASSERT(mp->mutex_type & LOCK_ROBUST); 13684574Sraf if (mp->mutex_flag & LOCK_OWNERDEAD) 13694574Sraf error = EOWNERDEAD; 13704574Sraf else if (mp->mutex_type & USYNC_PROCESS_ROBUST) 13714574Sraf error = ELOCKUNMAPPED; 13724574Sraf else 13734574Sraf error = EOWNERDEAD; 13744574Sraf } 13754574Sraf } 13764574Sraf 13774574Sraf return (error); 13780Sstevel@tonic-gate } 13790Sstevel@tonic-gate 13800Sstevel@tonic-gate /* 13810Sstevel@tonic-gate * Mutex wakeup code for releasing a USYNC_THREAD mutex. 13820Sstevel@tonic-gate * Returns the lwpid of the thread that was dequeued, if any. 13830Sstevel@tonic-gate * The caller of mutex_wakeup() must call __lwp_unpark(lwpid) 13840Sstevel@tonic-gate * to wake up the specified lwp. 13850Sstevel@tonic-gate */ 13864574Sraf static lwpid_t 13870Sstevel@tonic-gate mutex_wakeup(mutex_t *mp) 13880Sstevel@tonic-gate { 13890Sstevel@tonic-gate lwpid_t lwpid = 0; 13900Sstevel@tonic-gate queue_head_t *qp; 13910Sstevel@tonic-gate ulwp_t *ulwp; 13920Sstevel@tonic-gate int more; 13930Sstevel@tonic-gate 13940Sstevel@tonic-gate /* 13950Sstevel@tonic-gate * Dequeue a waiter from the sleep queue. Don't touch the mutex 13960Sstevel@tonic-gate * waiters bit if no one was found on the queue because the mutex 13970Sstevel@tonic-gate * might have been deallocated or reallocated for another purpose. 13980Sstevel@tonic-gate */ 13990Sstevel@tonic-gate qp = queue_lock(mp, MX); 14000Sstevel@tonic-gate if ((ulwp = dequeue(qp, mp, &more)) != NULL) { 14010Sstevel@tonic-gate lwpid = ulwp->ul_lwpid; 14020Sstevel@tonic-gate mp->mutex_waiters = (more? 1 : 0); 14030Sstevel@tonic-gate } 14040Sstevel@tonic-gate queue_unlock(qp); 14050Sstevel@tonic-gate return (lwpid); 14060Sstevel@tonic-gate } 14070Sstevel@tonic-gate 14080Sstevel@tonic-gate /* 14094574Sraf * Mutex wakeup code for releasing all waiters on a USYNC_THREAD mutex. 14104574Sraf */ 14114574Sraf static void 14124574Sraf mutex_wakeup_all(mutex_t *mp) 14134574Sraf { 14144574Sraf queue_head_t *qp; 14154574Sraf int nlwpid = 0; 14164574Sraf int maxlwps = MAXLWPS; 14174574Sraf ulwp_t **ulwpp; 14184574Sraf ulwp_t *ulwp; 14194574Sraf ulwp_t *prev = NULL; 14204574Sraf lwpid_t buffer[MAXLWPS]; 14214574Sraf lwpid_t *lwpid = buffer; 14224574Sraf 14234574Sraf /* 14244574Sraf * Walk the list of waiters and prepare to wake up all of them. 14254574Sraf * The waiters flag has already been cleared from the mutex. 14264574Sraf * 14274574Sraf * We keep track of lwpids that are to be unparked in lwpid[]. 14284574Sraf * __lwp_unpark_all() is called to unpark all of them after 14294574Sraf * they have been removed from the sleep queue and the sleep 14304574Sraf * queue lock has been dropped. If we run out of space in our 14314574Sraf * on-stack buffer, we need to allocate more but we can't call 14324574Sraf * lmalloc() because we are holding a queue lock when the overflow 14334574Sraf * occurs and lmalloc() acquires a lock. We can't use alloca() 14344574Sraf * either because the application may have allocated a small 14354574Sraf * stack and we don't want to overrun the stack. So we call 14364574Sraf * alloc_lwpids() to allocate a bigger buffer using the mmap() 14374574Sraf * system call directly since that path acquires no locks. 14384574Sraf */ 14394574Sraf qp = queue_lock(mp, MX); 14404574Sraf ulwpp = &qp->qh_head; 14414574Sraf while ((ulwp = *ulwpp) != NULL) { 14424574Sraf if (ulwp->ul_wchan != mp) { 14434574Sraf prev = ulwp; 14444574Sraf ulwpp = &ulwp->ul_link; 14454574Sraf } else { 14464574Sraf if (nlwpid == maxlwps) 14474574Sraf lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps); 14484574Sraf (void) queue_unlink(qp, ulwpp, prev); 14494574Sraf lwpid[nlwpid++] = ulwp->ul_lwpid; 14504574Sraf } 14514574Sraf } 14524574Sraf 14534574Sraf if (nlwpid == 0) { 14544574Sraf queue_unlock(qp); 14554574Sraf } else { 1456*5629Sraf mp->mutex_waiters = 0; 14574574Sraf no_preempt(curthread); 14584574Sraf queue_unlock(qp); 14594574Sraf if (nlwpid == 1) 14604574Sraf (void) __lwp_unpark(lwpid[0]); 14614574Sraf else 14624574Sraf (void) __lwp_unpark_all(lwpid, nlwpid); 14634574Sraf preempt(curthread); 14644574Sraf } 14654574Sraf 14664574Sraf if (lwpid != buffer) 14674574Sraf (void) _private_munmap(lwpid, maxlwps * sizeof (lwpid_t)); 14684574Sraf } 14694574Sraf 14704574Sraf /* 1471*5629Sraf * Release a process-private mutex. 1472*5629Sraf * As an optimization, if there are waiters but there are also spinners 1473*5629Sraf * attempting to acquire the mutex, then don't bother waking up a waiter; 1474*5629Sraf * one of the spinners will acquire the mutex soon and it would be a waste 1475*5629Sraf * of resources to wake up some thread just to have it spin for a while 1476*5629Sraf * and then possibly go back to sleep. See mutex_trylock_adaptive(). 14770Sstevel@tonic-gate */ 14784574Sraf static lwpid_t 14794574Sraf mutex_unlock_queue(mutex_t *mp, int release_all) 14800Sstevel@tonic-gate { 1481*5629Sraf lwpid_t lwpid = 0; 1482*5629Sraf uint32_t old_lockword; 1483*5629Sraf 1484*5629Sraf mp->mutex_owner = 0; 1485*5629Sraf DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 1486*5629Sraf old_lockword = clear_lockbyte(&mp->mutex_lockword); 1487*5629Sraf if ((old_lockword & WAITERMASK) && 1488*5629Sraf (release_all || (old_lockword & SPINNERMASK) == 0)) { 1489*5629Sraf ulwp_t *self = curthread; 14900Sstevel@tonic-gate no_preempt(self); /* ensure a prompt wakeup */ 1491*5629Sraf if (release_all) 1492*5629Sraf mutex_wakeup_all(mp); 1493*5629Sraf else 1494*5629Sraf lwpid = mutex_wakeup(mp); 1495*5629Sraf if (lwpid == 0) 1496*5629Sraf preempt(self); 14974574Sraf } 14980Sstevel@tonic-gate return (lwpid); 14990Sstevel@tonic-gate } 15000Sstevel@tonic-gate 15010Sstevel@tonic-gate /* 15020Sstevel@tonic-gate * Like mutex_unlock_queue(), but for process-shared mutexes. 15030Sstevel@tonic-gate */ 15044574Sraf static void 15054574Sraf mutex_unlock_process(mutex_t *mp, int release_all) 15060Sstevel@tonic-gate { 1507*5629Sraf uint32_t old_lockword; 1508*5629Sraf 15090Sstevel@tonic-gate mp->mutex_owner = 0; 15100Sstevel@tonic-gate mp->mutex_ownerpid = 0; 15110Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 1512*5629Sraf old_lockword = clear_lockbyte(&mp->mutex_lockword); 1513*5629Sraf if ((old_lockword & WAITERMASK) && 1514*5629Sraf (release_all || (old_lockword & SPINNERMASK) == 0)) { 1515*5629Sraf ulwp_t *self = curthread; 1516*5629Sraf no_preempt(self); /* ensure a prompt wakeup */ 1517*5629Sraf (void) ___lwp_mutex_wakeup(mp, release_all); 1518*5629Sraf preempt(self); 15190Sstevel@tonic-gate } 15200Sstevel@tonic-gate } 15210Sstevel@tonic-gate 15220Sstevel@tonic-gate /* 15230Sstevel@tonic-gate * Return the real priority of a thread. 15240Sstevel@tonic-gate */ 15250Sstevel@tonic-gate int 15260Sstevel@tonic-gate real_priority(ulwp_t *ulwp) 15270Sstevel@tonic-gate { 15280Sstevel@tonic-gate if (ulwp->ul_epri == 0) 15290Sstevel@tonic-gate return (ulwp->ul_mappedpri? ulwp->ul_mappedpri : ulwp->ul_pri); 15300Sstevel@tonic-gate return (ulwp->ul_emappedpri? ulwp->ul_emappedpri : ulwp->ul_epri); 15310Sstevel@tonic-gate } 15320Sstevel@tonic-gate 15330Sstevel@tonic-gate void 15340Sstevel@tonic-gate stall(void) 15350Sstevel@tonic-gate { 15360Sstevel@tonic-gate for (;;) 15370Sstevel@tonic-gate (void) mutex_lock_kernel(&stall_mutex, NULL, NULL); 15380Sstevel@tonic-gate } 15390Sstevel@tonic-gate 15400Sstevel@tonic-gate /* 15410Sstevel@tonic-gate * Acquire a USYNC_THREAD mutex via user-level sleep queues. 15420Sstevel@tonic-gate * We failed set_lock_byte(&mp->mutex_lockw) before coming here. 15434574Sraf * If successful, returns with mutex_owner set correctly. 15440Sstevel@tonic-gate */ 15450Sstevel@tonic-gate int 15460Sstevel@tonic-gate mutex_lock_queue(ulwp_t *self, tdb_mutex_stats_t *msp, mutex_t *mp, 15470Sstevel@tonic-gate timespec_t *tsp) 15480Sstevel@tonic-gate { 15490Sstevel@tonic-gate uberdata_t *udp = curthread->ul_uberdata; 15500Sstevel@tonic-gate queue_head_t *qp; 15510Sstevel@tonic-gate hrtime_t begin_sleep; 15520Sstevel@tonic-gate int error = 0; 15530Sstevel@tonic-gate 15540Sstevel@tonic-gate self->ul_sp = stkptr(); 15550Sstevel@tonic-gate if (__td_event_report(self, TD_SLEEP, udp)) { 15560Sstevel@tonic-gate self->ul_wchan = mp; 15570Sstevel@tonic-gate self->ul_td_evbuf.eventnum = TD_SLEEP; 15580Sstevel@tonic-gate self->ul_td_evbuf.eventdata = mp; 15590Sstevel@tonic-gate tdb_event(TD_SLEEP, udp); 15600Sstevel@tonic-gate } 15610Sstevel@tonic-gate if (msp) { 15620Sstevel@tonic-gate tdb_incr(msp->mutex_sleep); 15630Sstevel@tonic-gate begin_sleep = gethrtime(); 15640Sstevel@tonic-gate } 15650Sstevel@tonic-gate 15660Sstevel@tonic-gate DTRACE_PROBE1(plockstat, mutex__block, mp); 15670Sstevel@tonic-gate 15680Sstevel@tonic-gate /* 15690Sstevel@tonic-gate * Put ourself on the sleep queue, and while we are 15700Sstevel@tonic-gate * unable to grab the lock, go park in the kernel. 15710Sstevel@tonic-gate * Take ourself off the sleep queue after we acquire the lock. 15720Sstevel@tonic-gate * The waiter bit can be set/cleared only while holding the queue lock. 15730Sstevel@tonic-gate */ 15740Sstevel@tonic-gate qp = queue_lock(mp, MX); 15750Sstevel@tonic-gate enqueue(qp, self, mp, MX); 15760Sstevel@tonic-gate mp->mutex_waiters = 1; 15770Sstevel@tonic-gate for (;;) { 15780Sstevel@tonic-gate if (set_lock_byte(&mp->mutex_lockw) == 0) { 15790Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 15800Sstevel@tonic-gate mp->mutex_waiters = dequeue_self(qp, mp); 15810Sstevel@tonic-gate break; 15820Sstevel@tonic-gate } 15830Sstevel@tonic-gate set_parking_flag(self, 1); 15840Sstevel@tonic-gate queue_unlock(qp); 15850Sstevel@tonic-gate /* 15860Sstevel@tonic-gate * __lwp_park() will return the residual time in tsp 15870Sstevel@tonic-gate * if we are unparked before the timeout expires. 15880Sstevel@tonic-gate */ 1589*5629Sraf error = __lwp_park(tsp, 0); 15900Sstevel@tonic-gate set_parking_flag(self, 0); 15910Sstevel@tonic-gate /* 15920Sstevel@tonic-gate * We could have taken a signal or suspended ourself. 15930Sstevel@tonic-gate * If we did, then we removed ourself from the queue. 15940Sstevel@tonic-gate * Someone else may have removed us from the queue 15950Sstevel@tonic-gate * as a consequence of mutex_unlock(). We may have 15960Sstevel@tonic-gate * gotten a timeout from __lwp_park(). Or we may still 15970Sstevel@tonic-gate * be on the queue and this is just a spurious wakeup. 15980Sstevel@tonic-gate */ 15990Sstevel@tonic-gate qp = queue_lock(mp, MX); 16000Sstevel@tonic-gate if (self->ul_sleepq == NULL) { 1601*5629Sraf if (error) { 1602*5629Sraf mp->mutex_waiters = queue_waiter(qp, mp)? 1 : 0; 1603*5629Sraf if (error != EINTR) 1604*5629Sraf break; 1605*5629Sraf error = 0; 1606*5629Sraf } 16070Sstevel@tonic-gate if (set_lock_byte(&mp->mutex_lockw) == 0) { 16080Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 16090Sstevel@tonic-gate break; 16100Sstevel@tonic-gate } 16110Sstevel@tonic-gate enqueue(qp, self, mp, MX); 16120Sstevel@tonic-gate mp->mutex_waiters = 1; 16130Sstevel@tonic-gate } 16140Sstevel@tonic-gate ASSERT(self->ul_sleepq == qp && 16150Sstevel@tonic-gate self->ul_qtype == MX && 16160Sstevel@tonic-gate self->ul_wchan == mp); 16170Sstevel@tonic-gate if (error) { 1618*5629Sraf if (error != EINTR) { 1619*5629Sraf mp->mutex_waiters = dequeue_self(qp, mp); 1620*5629Sraf break; 1621*5629Sraf } 1622*5629Sraf error = 0; 16230Sstevel@tonic-gate } 16240Sstevel@tonic-gate } 16250Sstevel@tonic-gate ASSERT(self->ul_sleepq == NULL && self->ul_link == NULL && 16260Sstevel@tonic-gate self->ul_wchan == NULL); 16270Sstevel@tonic-gate self->ul_sp = 0; 16280Sstevel@tonic-gate queue_unlock(qp); 16294574Sraf 16300Sstevel@tonic-gate if (msp) 16310Sstevel@tonic-gate msp->mutex_sleep_time += gethrtime() - begin_sleep; 16320Sstevel@tonic-gate 16330Sstevel@tonic-gate ASSERT(error == 0 || error == EINVAL || error == ETIME); 16344574Sraf 16354574Sraf if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) { 16364574Sraf ASSERT(mp->mutex_type & LOCK_ROBUST); 16374574Sraf /* 16384574Sraf * We shouldn't own the mutex; clear the lock. 16394574Sraf */ 16404574Sraf mp->mutex_owner = 0; 1641*5629Sraf if (clear_lockbyte(&mp->mutex_lockword) & WAITERMASK) 16424574Sraf mutex_wakeup_all(mp); 16434574Sraf error = ENOTRECOVERABLE; 16444574Sraf } 16454574Sraf 16464574Sraf if (error) { 16474574Sraf DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0); 16484574Sraf DTRACE_PROBE2(plockstat, mutex__error, mp, error); 16494574Sraf } else { 16504574Sraf DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1); 16514574Sraf DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 16524574Sraf if (mp->mutex_flag & LOCK_OWNERDEAD) { 16534574Sraf ASSERT(mp->mutex_type & LOCK_ROBUST); 16544574Sraf error = EOWNERDEAD; 16554574Sraf } 16564574Sraf } 16574574Sraf 16580Sstevel@tonic-gate return (error); 16590Sstevel@tonic-gate } 16600Sstevel@tonic-gate 16614574Sraf static int 16624574Sraf mutex_recursion(mutex_t *mp, int mtype, int try) 16634574Sraf { 16644574Sraf ASSERT(mutex_is_held(mp)); 16654574Sraf ASSERT(mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)); 16664574Sraf ASSERT(try == MUTEX_TRY || try == MUTEX_LOCK); 16674574Sraf 16684574Sraf if (mtype & LOCK_RECURSIVE) { 16694574Sraf if (mp->mutex_rcount == RECURSION_MAX) { 16704574Sraf DTRACE_PROBE2(plockstat, mutex__error, mp, EAGAIN); 16714574Sraf return (EAGAIN); 16724574Sraf } 16734574Sraf mp->mutex_rcount++; 16744574Sraf DTRACE_PROBE3(plockstat, mutex__acquire, mp, 1, 0); 16754574Sraf return (0); 16764574Sraf } 16774574Sraf if (try == MUTEX_LOCK) { 16784574Sraf DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK); 16794574Sraf return (EDEADLK); 16804574Sraf } 16814574Sraf return (EBUSY); 16824574Sraf } 16834574Sraf 16844574Sraf /* 16854574Sraf * Register this USYNC_PROCESS|LOCK_ROBUST mutex with the kernel so 16864574Sraf * it can apply LOCK_OWNERDEAD|LOCK_UNMAPPED if it becomes necessary. 16874574Sraf * We use tdb_hash_lock here and in the synch object tracking code in 16884574Sraf * the tdb_agent.c file. There is no conflict between these two usages. 16894574Sraf */ 16904574Sraf void 16914574Sraf register_lock(mutex_t *mp) 16924574Sraf { 16934574Sraf uberdata_t *udp = curthread->ul_uberdata; 16944574Sraf uint_t hash = LOCK_HASH(mp); 16954574Sraf robust_t *rlp; 16964574Sraf robust_t **rlpp; 16974574Sraf robust_t **table; 16984574Sraf 16994574Sraf if ((table = udp->robustlocks) == NULL) { 17004574Sraf lmutex_lock(&udp->tdb_hash_lock); 17014574Sraf if ((table = udp->robustlocks) == NULL) { 17024574Sraf table = lmalloc(LOCKHASHSZ * sizeof (robust_t *)); 17034574Sraf _membar_producer(); 17044574Sraf udp->robustlocks = table; 17054574Sraf } 17064574Sraf lmutex_unlock(&udp->tdb_hash_lock); 17074574Sraf } 17084574Sraf _membar_consumer(); 17094574Sraf 17104574Sraf /* 17114574Sraf * First search the registered table with no locks held. 17124574Sraf * This is safe because the table never shrinks 17134574Sraf * and we can only get a false negative. 17144574Sraf */ 17154574Sraf for (rlp = table[hash]; rlp != NULL; rlp = rlp->robust_next) { 17164574Sraf if (rlp->robust_lock == mp) /* already registered */ 17174574Sraf return; 17184574Sraf } 17194574Sraf 17204574Sraf /* 17214574Sraf * The lock was not found. 17224574Sraf * Repeat the operation with tdb_hash_lock held. 17234574Sraf */ 17244574Sraf lmutex_lock(&udp->tdb_hash_lock); 17254574Sraf 17264574Sraf for (rlpp = &table[hash]; 17274574Sraf (rlp = *rlpp) != NULL; 17284574Sraf rlpp = &rlp->robust_next) { 17294574Sraf if (rlp->robust_lock == mp) { /* already registered */ 17304574Sraf lmutex_unlock(&udp->tdb_hash_lock); 17314574Sraf return; 17324574Sraf } 17334574Sraf } 17344574Sraf 17354574Sraf /* 17364574Sraf * The lock has never been registered. 17374574Sraf * Register it now and add it to the table. 17384574Sraf */ 17394574Sraf (void) ___lwp_mutex_register(mp); 17404574Sraf rlp = lmalloc(sizeof (*rlp)); 17414574Sraf rlp->robust_lock = mp; 17424574Sraf _membar_producer(); 17434574Sraf *rlpp = rlp; 17444574Sraf 17454574Sraf lmutex_unlock(&udp->tdb_hash_lock); 17464574Sraf } 17474574Sraf 17484574Sraf /* 17494574Sraf * This is called in the child of fork()/forkall() to start over 17504574Sraf * with a clean slate. (Each process must register its own locks.) 17514574Sraf * No locks are needed because all other threads are suspended or gone. 17524574Sraf */ 17534574Sraf void 17544574Sraf unregister_locks(void) 17554574Sraf { 17564574Sraf uberdata_t *udp = curthread->ul_uberdata; 17574574Sraf uint_t hash; 17584574Sraf robust_t **table; 17594574Sraf robust_t *rlp; 17604574Sraf robust_t *next; 17614574Sraf 17624574Sraf if ((table = udp->robustlocks) != NULL) { 17634574Sraf for (hash = 0; hash < LOCKHASHSZ; hash++) { 17644574Sraf rlp = table[hash]; 17654574Sraf while (rlp != NULL) { 17664574Sraf next = rlp->robust_next; 17674574Sraf lfree(rlp, sizeof (*rlp)); 17684574Sraf rlp = next; 17694574Sraf } 17704574Sraf } 17714574Sraf lfree(table, LOCKHASHSZ * sizeof (robust_t *)); 17724574Sraf udp->robustlocks = NULL; 17734574Sraf } 17744574Sraf } 17754574Sraf 17760Sstevel@tonic-gate /* 17770Sstevel@tonic-gate * Returns with mutex_owner set correctly. 17780Sstevel@tonic-gate */ 17794574Sraf static int 17800Sstevel@tonic-gate mutex_lock_internal(mutex_t *mp, timespec_t *tsp, int try) 17810Sstevel@tonic-gate { 17820Sstevel@tonic-gate ulwp_t *self = curthread; 17830Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 17840Sstevel@tonic-gate int mtype = mp->mutex_type; 17850Sstevel@tonic-gate tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 17860Sstevel@tonic-gate int error = 0; 17874574Sraf uint8_t ceil; 17884574Sraf int myprio; 17890Sstevel@tonic-gate 17900Sstevel@tonic-gate ASSERT(try == MUTEX_TRY || try == MUTEX_LOCK); 17910Sstevel@tonic-gate 17920Sstevel@tonic-gate if (!self->ul_schedctl_called) 17930Sstevel@tonic-gate (void) setup_schedctl(); 17940Sstevel@tonic-gate 17950Sstevel@tonic-gate if (msp && try == MUTEX_TRY) 17960Sstevel@tonic-gate tdb_incr(msp->mutex_try); 17970Sstevel@tonic-gate 17984574Sraf if ((mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)) && mutex_is_held(mp)) 17994574Sraf return (mutex_recursion(mp, mtype, try)); 18000Sstevel@tonic-gate 18010Sstevel@tonic-gate if (self->ul_error_detection && try == MUTEX_LOCK && 18020Sstevel@tonic-gate tsp == NULL && mutex_is_held(mp)) 18030Sstevel@tonic-gate lock_error(mp, "mutex_lock", NULL, NULL); 18040Sstevel@tonic-gate 18054574Sraf if (mtype & LOCK_PRIO_PROTECT) { 18064574Sraf ceil = mp->mutex_ceiling; 18074574Sraf ASSERT(_validate_rt_prio(SCHED_FIFO, ceil) == 0); 18084574Sraf myprio = real_priority(self); 18094574Sraf if (myprio > ceil) { 18104574Sraf DTRACE_PROBE2(plockstat, mutex__error, mp, EINVAL); 18114574Sraf return (EINVAL); 18124574Sraf } 18134574Sraf if ((error = _ceil_mylist_add(mp)) != 0) { 18144574Sraf DTRACE_PROBE2(plockstat, mutex__error, mp, error); 18154574Sraf return (error); 18160Sstevel@tonic-gate } 18174574Sraf if (myprio < ceil) 18184574Sraf _ceil_prio_inherit(ceil); 18194574Sraf } 18204574Sraf 18214574Sraf if ((mtype & (USYNC_PROCESS | LOCK_ROBUST)) 18224574Sraf == (USYNC_PROCESS | LOCK_ROBUST)) 18234574Sraf register_lock(mp); 18244574Sraf 18254574Sraf if (mtype & LOCK_PRIO_INHERIT) { 18264574Sraf /* go straight to the kernel */ 18274574Sraf if (try == MUTEX_TRY) 18284574Sraf error = mutex_trylock_kernel(mp); 18294574Sraf else /* MUTEX_LOCK */ 18304574Sraf error = mutex_lock_kernel(mp, tsp, msp); 18314574Sraf /* 18324574Sraf * The kernel never sets or clears the lock byte 18334574Sraf * for LOCK_PRIO_INHERIT mutexes. 18344574Sraf * Set it here for consistency. 18354574Sraf */ 18364574Sraf switch (error) { 18374574Sraf case 0: 18384574Sraf mp->mutex_lockw = LOCKSET; 18394574Sraf break; 18404574Sraf case EOWNERDEAD: 18414574Sraf case ELOCKUNMAPPED: 18424574Sraf mp->mutex_lockw = LOCKSET; 18434574Sraf /* FALLTHROUGH */ 18444574Sraf case ENOTRECOVERABLE: 18454574Sraf ASSERT(mtype & LOCK_ROBUST); 18464574Sraf break; 18474574Sraf case EDEADLK: 18484574Sraf if (try == MUTEX_LOCK) 18494574Sraf stall(); 18504574Sraf error = EBUSY; 18514574Sraf break; 18520Sstevel@tonic-gate } 18530Sstevel@tonic-gate } else if (mtype & USYNC_PROCESS) { 18544613Sraf error = mutex_trylock_process(mp, try == MUTEX_LOCK); 18554574Sraf if (error == EBUSY && try == MUTEX_LOCK) 18560Sstevel@tonic-gate error = mutex_lock_kernel(mp, tsp, msp); 1857*5629Sraf } else { /* USYNC_THREAD */ 18584613Sraf error = mutex_trylock_adaptive(mp, try == MUTEX_LOCK); 18594574Sraf if (error == EBUSY && try == MUTEX_LOCK) 18604574Sraf error = mutex_lock_queue(self, msp, mp, tsp); 18610Sstevel@tonic-gate } 18620Sstevel@tonic-gate 18630Sstevel@tonic-gate switch (error) { 18644574Sraf case 0: 18650Sstevel@tonic-gate case EOWNERDEAD: 18660Sstevel@tonic-gate case ELOCKUNMAPPED: 18674574Sraf if (mtype & LOCK_ROBUST) 18684574Sraf remember_lock(mp); 18690Sstevel@tonic-gate if (msp) 18700Sstevel@tonic-gate record_begin_hold(msp); 18710Sstevel@tonic-gate break; 18720Sstevel@tonic-gate default: 18734574Sraf if (mtype & LOCK_PRIO_PROTECT) { 18744574Sraf (void) _ceil_mylist_del(mp); 18754574Sraf if (myprio < ceil) 18764574Sraf _ceil_prio_waive(); 18774574Sraf } 18780Sstevel@tonic-gate if (try == MUTEX_TRY) { 18790Sstevel@tonic-gate if (msp) 18800Sstevel@tonic-gate tdb_incr(msp->mutex_try_fail); 18810Sstevel@tonic-gate if (__td_event_report(self, TD_LOCK_TRY, udp)) { 18820Sstevel@tonic-gate self->ul_td_evbuf.eventnum = TD_LOCK_TRY; 18830Sstevel@tonic-gate tdb_event(TD_LOCK_TRY, udp); 18840Sstevel@tonic-gate } 18850Sstevel@tonic-gate } 18860Sstevel@tonic-gate break; 18870Sstevel@tonic-gate } 18880Sstevel@tonic-gate 18890Sstevel@tonic-gate return (error); 18900Sstevel@tonic-gate } 18910Sstevel@tonic-gate 18920Sstevel@tonic-gate int 18930Sstevel@tonic-gate fast_process_lock(mutex_t *mp, timespec_t *tsp, int mtype, int try) 18940Sstevel@tonic-gate { 18950Sstevel@tonic-gate ulwp_t *self = curthread; 18960Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 18970Sstevel@tonic-gate 18980Sstevel@tonic-gate /* 18990Sstevel@tonic-gate * We know that USYNC_PROCESS is set in mtype and that 19000Sstevel@tonic-gate * zero, one, or both of the flags LOCK_RECURSIVE and 19010Sstevel@tonic-gate * LOCK_ERRORCHECK are set, and that no other flags are set. 19020Sstevel@tonic-gate */ 19034574Sraf ASSERT((mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0); 19040Sstevel@tonic-gate enter_critical(self); 19050Sstevel@tonic-gate if (set_lock_byte(&mp->mutex_lockw) == 0) { 19060Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 19070Sstevel@tonic-gate mp->mutex_ownerpid = udp->pid; 19080Sstevel@tonic-gate exit_critical(self); 19090Sstevel@tonic-gate DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 19100Sstevel@tonic-gate return (0); 19110Sstevel@tonic-gate } 19120Sstevel@tonic-gate exit_critical(self); 19130Sstevel@tonic-gate 19144574Sraf if ((mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)) && shared_mutex_held(mp)) 19154574Sraf return (mutex_recursion(mp, mtype, try)); 19164574Sraf 19174613Sraf if (try == MUTEX_LOCK) { 19184613Sraf if (mutex_trylock_process(mp, 1) == 0) 19194613Sraf return (0); 19200Sstevel@tonic-gate return (mutex_lock_kernel(mp, tsp, NULL)); 19214613Sraf } 19220Sstevel@tonic-gate 19230Sstevel@tonic-gate if (__td_event_report(self, TD_LOCK_TRY, udp)) { 19240Sstevel@tonic-gate self->ul_td_evbuf.eventnum = TD_LOCK_TRY; 19250Sstevel@tonic-gate tdb_event(TD_LOCK_TRY, udp); 19260Sstevel@tonic-gate } 19270Sstevel@tonic-gate return (EBUSY); 19280Sstevel@tonic-gate } 19290Sstevel@tonic-gate 19300Sstevel@tonic-gate static int 19310Sstevel@tonic-gate mutex_lock_impl(mutex_t *mp, timespec_t *tsp) 19320Sstevel@tonic-gate { 19330Sstevel@tonic-gate ulwp_t *self = curthread; 19340Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 19350Sstevel@tonic-gate uberflags_t *gflags; 19360Sstevel@tonic-gate int mtype; 19370Sstevel@tonic-gate 19380Sstevel@tonic-gate /* 19390Sstevel@tonic-gate * Optimize the case of USYNC_THREAD, including 19400Sstevel@tonic-gate * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases, 19410Sstevel@tonic-gate * no error detection, no lock statistics, 19420Sstevel@tonic-gate * and the process has only a single thread. 19430Sstevel@tonic-gate * (Most likely a traditional single-threaded application.) 19440Sstevel@tonic-gate */ 19450Sstevel@tonic-gate if ((((mtype = mp->mutex_type) & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | 19460Sstevel@tonic-gate udp->uberflags.uf_all) == 0) { 19470Sstevel@tonic-gate /* 19480Sstevel@tonic-gate * Only one thread exists so we don't need an atomic operation. 19490Sstevel@tonic-gate */ 19500Sstevel@tonic-gate if (mp->mutex_lockw == 0) { 19510Sstevel@tonic-gate mp->mutex_lockw = LOCKSET; 19520Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 19530Sstevel@tonic-gate DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 19540Sstevel@tonic-gate return (0); 19550Sstevel@tonic-gate } 19564574Sraf if (mtype && MUTEX_OWNER(mp) == self) 19574574Sraf return (mutex_recursion(mp, mtype, MUTEX_LOCK)); 19580Sstevel@tonic-gate /* 19590Sstevel@tonic-gate * We have reached a deadlock, probably because the 19600Sstevel@tonic-gate * process is executing non-async-signal-safe code in 19610Sstevel@tonic-gate * a signal handler and is attempting to acquire a lock 19620Sstevel@tonic-gate * that it already owns. This is not surprising, given 19630Sstevel@tonic-gate * bad programming practices over the years that has 19640Sstevel@tonic-gate * resulted in applications calling printf() and such 19650Sstevel@tonic-gate * in their signal handlers. Unless the user has told 19660Sstevel@tonic-gate * us that the signal handlers are safe by setting: 19670Sstevel@tonic-gate * export _THREAD_ASYNC_SAFE=1 19680Sstevel@tonic-gate * we return EDEADLK rather than actually deadlocking. 19690Sstevel@tonic-gate */ 19700Sstevel@tonic-gate if (tsp == NULL && 19710Sstevel@tonic-gate MUTEX_OWNER(mp) == self && !self->ul_async_safe) { 19720Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK); 19730Sstevel@tonic-gate return (EDEADLK); 19740Sstevel@tonic-gate } 19750Sstevel@tonic-gate } 19760Sstevel@tonic-gate 19770Sstevel@tonic-gate /* 19780Sstevel@tonic-gate * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS, 19790Sstevel@tonic-gate * no error detection, and no lock statistics. 19800Sstevel@tonic-gate * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases. 19810Sstevel@tonic-gate */ 19820Sstevel@tonic-gate if ((gflags = self->ul_schedctl_called) != NULL && 19830Sstevel@tonic-gate (gflags->uf_trs_ted | 19840Sstevel@tonic-gate (mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK))) == 0) { 19850Sstevel@tonic-gate if (mtype & USYNC_PROCESS) 19860Sstevel@tonic-gate return (fast_process_lock(mp, tsp, mtype, MUTEX_LOCK)); 19870Sstevel@tonic-gate if (set_lock_byte(&mp->mutex_lockw) == 0) { 19880Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 19890Sstevel@tonic-gate DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 19900Sstevel@tonic-gate return (0); 19910Sstevel@tonic-gate } 19924574Sraf if (mtype && MUTEX_OWNER(mp) == self) 19934574Sraf return (mutex_recursion(mp, mtype, MUTEX_LOCK)); 19944613Sraf if (mutex_trylock_adaptive(mp, 1) != 0) 19954574Sraf return (mutex_lock_queue(self, NULL, mp, tsp)); 19964574Sraf return (0); 19970Sstevel@tonic-gate } 19980Sstevel@tonic-gate 19990Sstevel@tonic-gate /* else do it the long way */ 20000Sstevel@tonic-gate return (mutex_lock_internal(mp, tsp, MUTEX_LOCK)); 20010Sstevel@tonic-gate } 20020Sstevel@tonic-gate 20030Sstevel@tonic-gate #pragma weak _private_mutex_lock = __mutex_lock 20040Sstevel@tonic-gate #pragma weak mutex_lock = __mutex_lock 20050Sstevel@tonic-gate #pragma weak _mutex_lock = __mutex_lock 20060Sstevel@tonic-gate #pragma weak pthread_mutex_lock = __mutex_lock 20070Sstevel@tonic-gate #pragma weak _pthread_mutex_lock = __mutex_lock 20080Sstevel@tonic-gate int 20090Sstevel@tonic-gate __mutex_lock(mutex_t *mp) 20100Sstevel@tonic-gate { 20110Sstevel@tonic-gate ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 20120Sstevel@tonic-gate return (mutex_lock_impl(mp, NULL)); 20130Sstevel@tonic-gate } 20140Sstevel@tonic-gate 20150Sstevel@tonic-gate #pragma weak pthread_mutex_timedlock = _pthread_mutex_timedlock 20160Sstevel@tonic-gate int 20170Sstevel@tonic-gate _pthread_mutex_timedlock(mutex_t *mp, const timespec_t *abstime) 20180Sstevel@tonic-gate { 20190Sstevel@tonic-gate timespec_t tslocal; 20200Sstevel@tonic-gate int error; 20210Sstevel@tonic-gate 20220Sstevel@tonic-gate ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 20230Sstevel@tonic-gate abstime_to_reltime(CLOCK_REALTIME, abstime, &tslocal); 20240Sstevel@tonic-gate error = mutex_lock_impl(mp, &tslocal); 20250Sstevel@tonic-gate if (error == ETIME) 20260Sstevel@tonic-gate error = ETIMEDOUT; 20270Sstevel@tonic-gate return (error); 20280Sstevel@tonic-gate } 20290Sstevel@tonic-gate 20300Sstevel@tonic-gate #pragma weak pthread_mutex_reltimedlock_np = _pthread_mutex_reltimedlock_np 20310Sstevel@tonic-gate int 20320Sstevel@tonic-gate _pthread_mutex_reltimedlock_np(mutex_t *mp, const timespec_t *reltime) 20330Sstevel@tonic-gate { 20340Sstevel@tonic-gate timespec_t tslocal; 20350Sstevel@tonic-gate int error; 20360Sstevel@tonic-gate 20370Sstevel@tonic-gate ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 20380Sstevel@tonic-gate tslocal = *reltime; 20390Sstevel@tonic-gate error = mutex_lock_impl(mp, &tslocal); 20400Sstevel@tonic-gate if (error == ETIME) 20410Sstevel@tonic-gate error = ETIMEDOUT; 20420Sstevel@tonic-gate return (error); 20430Sstevel@tonic-gate } 20440Sstevel@tonic-gate 20450Sstevel@tonic-gate #pragma weak _private_mutex_trylock = __mutex_trylock 20460Sstevel@tonic-gate #pragma weak mutex_trylock = __mutex_trylock 20470Sstevel@tonic-gate #pragma weak _mutex_trylock = __mutex_trylock 20480Sstevel@tonic-gate #pragma weak pthread_mutex_trylock = __mutex_trylock 20490Sstevel@tonic-gate #pragma weak _pthread_mutex_trylock = __mutex_trylock 20500Sstevel@tonic-gate int 20510Sstevel@tonic-gate __mutex_trylock(mutex_t *mp) 20520Sstevel@tonic-gate { 20530Sstevel@tonic-gate ulwp_t *self = curthread; 20540Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 20550Sstevel@tonic-gate uberflags_t *gflags; 20560Sstevel@tonic-gate int mtype; 20570Sstevel@tonic-gate 20580Sstevel@tonic-gate ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 20590Sstevel@tonic-gate /* 20600Sstevel@tonic-gate * Optimize the case of USYNC_THREAD, including 20610Sstevel@tonic-gate * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases, 20620Sstevel@tonic-gate * no error detection, no lock statistics, 20630Sstevel@tonic-gate * and the process has only a single thread. 20640Sstevel@tonic-gate * (Most likely a traditional single-threaded application.) 20650Sstevel@tonic-gate */ 20660Sstevel@tonic-gate if ((((mtype = mp->mutex_type) & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | 20670Sstevel@tonic-gate udp->uberflags.uf_all) == 0) { 20680Sstevel@tonic-gate /* 20690Sstevel@tonic-gate * Only one thread exists so we don't need an atomic operation. 20700Sstevel@tonic-gate */ 20710Sstevel@tonic-gate if (mp->mutex_lockw == 0) { 20720Sstevel@tonic-gate mp->mutex_lockw = LOCKSET; 20730Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 20740Sstevel@tonic-gate DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 20750Sstevel@tonic-gate return (0); 20760Sstevel@tonic-gate } 20774574Sraf if (mtype && MUTEX_OWNER(mp) == self) 20784574Sraf return (mutex_recursion(mp, mtype, MUTEX_TRY)); 20790Sstevel@tonic-gate return (EBUSY); 20800Sstevel@tonic-gate } 20810Sstevel@tonic-gate 20820Sstevel@tonic-gate /* 20830Sstevel@tonic-gate * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS, 20840Sstevel@tonic-gate * no error detection, and no lock statistics. 20850Sstevel@tonic-gate * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases. 20860Sstevel@tonic-gate */ 20870Sstevel@tonic-gate if ((gflags = self->ul_schedctl_called) != NULL && 20880Sstevel@tonic-gate (gflags->uf_trs_ted | 20890Sstevel@tonic-gate (mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK))) == 0) { 20900Sstevel@tonic-gate if (mtype & USYNC_PROCESS) 20910Sstevel@tonic-gate return (fast_process_lock(mp, NULL, mtype, MUTEX_TRY)); 20920Sstevel@tonic-gate if (set_lock_byte(&mp->mutex_lockw) == 0) { 20930Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 20940Sstevel@tonic-gate DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 20950Sstevel@tonic-gate return (0); 20960Sstevel@tonic-gate } 20974574Sraf if (mtype && MUTEX_OWNER(mp) == self) 20984574Sraf return (mutex_recursion(mp, mtype, MUTEX_TRY)); 20994613Sraf if (__td_event_report(self, TD_LOCK_TRY, udp)) { 21004613Sraf self->ul_td_evbuf.eventnum = TD_LOCK_TRY; 21014613Sraf tdb_event(TD_LOCK_TRY, udp); 21020Sstevel@tonic-gate } 21034613Sraf return (EBUSY); 21040Sstevel@tonic-gate } 21050Sstevel@tonic-gate 21060Sstevel@tonic-gate /* else do it the long way */ 21070Sstevel@tonic-gate return (mutex_lock_internal(mp, NULL, MUTEX_TRY)); 21080Sstevel@tonic-gate } 21090Sstevel@tonic-gate 21100Sstevel@tonic-gate int 21114574Sraf mutex_unlock_internal(mutex_t *mp, int retain_robust_flags) 21120Sstevel@tonic-gate { 21130Sstevel@tonic-gate ulwp_t *self = curthread; 21140Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 21150Sstevel@tonic-gate int mtype = mp->mutex_type; 21160Sstevel@tonic-gate tdb_mutex_stats_t *msp; 21174574Sraf int error = 0; 21184574Sraf int release_all; 21190Sstevel@tonic-gate lwpid_t lwpid; 21200Sstevel@tonic-gate 21210Sstevel@tonic-gate if ((mtype & LOCK_ERRORCHECK) && !mutex_is_held(mp)) 21220Sstevel@tonic-gate return (EPERM); 21230Sstevel@tonic-gate 21240Sstevel@tonic-gate if (self->ul_error_detection && !mutex_is_held(mp)) 21250Sstevel@tonic-gate lock_error(mp, "mutex_unlock", NULL, NULL); 21260Sstevel@tonic-gate 21270Sstevel@tonic-gate if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 21280Sstevel@tonic-gate mp->mutex_rcount--; 21290Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 21300Sstevel@tonic-gate return (0); 21310Sstevel@tonic-gate } 21320Sstevel@tonic-gate 21330Sstevel@tonic-gate if ((msp = MUTEX_STATS(mp, udp)) != NULL) 21340Sstevel@tonic-gate (void) record_hold_time(msp); 21350Sstevel@tonic-gate 21364574Sraf if (!retain_robust_flags && !(mtype & LOCK_PRIO_INHERIT) && 21374574Sraf (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED))) { 21384574Sraf ASSERT(mp->mutex_type & LOCK_ROBUST); 21394574Sraf mp->mutex_flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 21404574Sraf mp->mutex_flag |= LOCK_NOTRECOVERABLE; 21414574Sraf } 21424574Sraf release_all = ((mp->mutex_flag & LOCK_NOTRECOVERABLE) != 0); 21434574Sraf 21444574Sraf if (mtype & LOCK_PRIO_INHERIT) { 21450Sstevel@tonic-gate no_preempt(self); 21460Sstevel@tonic-gate mp->mutex_owner = 0; 21470Sstevel@tonic-gate mp->mutex_ownerpid = 0; 21480Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 21494574Sraf mp->mutex_lockw = LOCKCLEAR; 21504574Sraf error = ___lwp_mutex_unlock(mp); 21510Sstevel@tonic-gate preempt(self); 21520Sstevel@tonic-gate } else if (mtype & USYNC_PROCESS) { 2153*5629Sraf mutex_unlock_process(mp, release_all); 21540Sstevel@tonic-gate } else { /* USYNC_THREAD */ 21554574Sraf if ((lwpid = mutex_unlock_queue(mp, release_all)) != 0) { 21560Sstevel@tonic-gate (void) __lwp_unpark(lwpid); 21570Sstevel@tonic-gate preempt(self); 21580Sstevel@tonic-gate } 21590Sstevel@tonic-gate } 21600Sstevel@tonic-gate 21614574Sraf if (mtype & LOCK_ROBUST) 21624574Sraf forget_lock(mp); 21634574Sraf 21644574Sraf if ((mtype & LOCK_PRIO_PROTECT) && _ceil_mylist_del(mp)) 21654574Sraf _ceil_prio_waive(); 21664574Sraf 21670Sstevel@tonic-gate return (error); 21680Sstevel@tonic-gate } 21690Sstevel@tonic-gate 21700Sstevel@tonic-gate #pragma weak _private_mutex_unlock = __mutex_unlock 21710Sstevel@tonic-gate #pragma weak mutex_unlock = __mutex_unlock 21720Sstevel@tonic-gate #pragma weak _mutex_unlock = __mutex_unlock 21730Sstevel@tonic-gate #pragma weak pthread_mutex_unlock = __mutex_unlock 21740Sstevel@tonic-gate #pragma weak _pthread_mutex_unlock = __mutex_unlock 21750Sstevel@tonic-gate int 21760Sstevel@tonic-gate __mutex_unlock(mutex_t *mp) 21770Sstevel@tonic-gate { 21780Sstevel@tonic-gate ulwp_t *self = curthread; 21790Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 21800Sstevel@tonic-gate uberflags_t *gflags; 21810Sstevel@tonic-gate lwpid_t lwpid; 21820Sstevel@tonic-gate int mtype; 21830Sstevel@tonic-gate short el; 21840Sstevel@tonic-gate 21850Sstevel@tonic-gate /* 21860Sstevel@tonic-gate * Optimize the case of USYNC_THREAD, including 21870Sstevel@tonic-gate * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases, 21880Sstevel@tonic-gate * no error detection, no lock statistics, 21890Sstevel@tonic-gate * and the process has only a single thread. 21900Sstevel@tonic-gate * (Most likely a traditional single-threaded application.) 21910Sstevel@tonic-gate */ 21920Sstevel@tonic-gate if ((((mtype = mp->mutex_type) & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | 21930Sstevel@tonic-gate udp->uberflags.uf_all) == 0) { 21940Sstevel@tonic-gate if (mtype) { 21950Sstevel@tonic-gate /* 21960Sstevel@tonic-gate * At this point we know that one or both of the 21970Sstevel@tonic-gate * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set. 21980Sstevel@tonic-gate */ 21990Sstevel@tonic-gate if ((mtype & LOCK_ERRORCHECK) && !MUTEX_OWNED(mp, self)) 22000Sstevel@tonic-gate return (EPERM); 22010Sstevel@tonic-gate if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 22020Sstevel@tonic-gate mp->mutex_rcount--; 22030Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 22040Sstevel@tonic-gate return (0); 22050Sstevel@tonic-gate } 22060Sstevel@tonic-gate } 22070Sstevel@tonic-gate /* 22080Sstevel@tonic-gate * Only one thread exists so we don't need an atomic operation. 22090Sstevel@tonic-gate * Also, there can be no waiters. 22100Sstevel@tonic-gate */ 22110Sstevel@tonic-gate mp->mutex_owner = 0; 22120Sstevel@tonic-gate mp->mutex_lockword = 0; 22130Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 22140Sstevel@tonic-gate return (0); 22150Sstevel@tonic-gate } 22160Sstevel@tonic-gate 22170Sstevel@tonic-gate /* 22180Sstevel@tonic-gate * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS, 22190Sstevel@tonic-gate * no error detection, and no lock statistics. 22200Sstevel@tonic-gate * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases. 22210Sstevel@tonic-gate */ 22220Sstevel@tonic-gate if ((gflags = self->ul_schedctl_called) != NULL) { 22230Sstevel@tonic-gate if (((el = gflags->uf_trs_ted) | mtype) == 0) { 22240Sstevel@tonic-gate fast_unlock: 2225*5629Sraf if ((lwpid = mutex_unlock_queue(mp, 0)) != 0) { 22260Sstevel@tonic-gate (void) __lwp_unpark(lwpid); 22270Sstevel@tonic-gate preempt(self); 22280Sstevel@tonic-gate } 22290Sstevel@tonic-gate return (0); 22300Sstevel@tonic-gate } 22310Sstevel@tonic-gate if (el) /* error detection or lock statistics */ 22320Sstevel@tonic-gate goto slow_unlock; 22330Sstevel@tonic-gate if ((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0) { 22340Sstevel@tonic-gate /* 22350Sstevel@tonic-gate * At this point we know that one or both of the 22360Sstevel@tonic-gate * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set. 22370Sstevel@tonic-gate */ 22380Sstevel@tonic-gate if ((mtype & LOCK_ERRORCHECK) && !MUTEX_OWNED(mp, self)) 22390Sstevel@tonic-gate return (EPERM); 22400Sstevel@tonic-gate if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 22410Sstevel@tonic-gate mp->mutex_rcount--; 22420Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 22430Sstevel@tonic-gate return (0); 22440Sstevel@tonic-gate } 22450Sstevel@tonic-gate goto fast_unlock; 22460Sstevel@tonic-gate } 22470Sstevel@tonic-gate if ((mtype & 22480Sstevel@tonic-gate ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0) { 22490Sstevel@tonic-gate /* 22500Sstevel@tonic-gate * At this point we know that zero, one, or both of the 22510Sstevel@tonic-gate * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set and 22520Sstevel@tonic-gate * that the USYNC_PROCESS flag is set. 22530Sstevel@tonic-gate */ 22540Sstevel@tonic-gate if ((mtype & LOCK_ERRORCHECK) && !shared_mutex_held(mp)) 22550Sstevel@tonic-gate return (EPERM); 22560Sstevel@tonic-gate if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 22570Sstevel@tonic-gate mp->mutex_rcount--; 22580Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 22590Sstevel@tonic-gate return (0); 22600Sstevel@tonic-gate } 2261*5629Sraf mutex_unlock_process(mp, 0); 22620Sstevel@tonic-gate return (0); 22630Sstevel@tonic-gate } 22640Sstevel@tonic-gate } 22650Sstevel@tonic-gate 22660Sstevel@tonic-gate /* else do it the long way */ 22670Sstevel@tonic-gate slow_unlock: 22684574Sraf return (mutex_unlock_internal(mp, 0)); 22690Sstevel@tonic-gate } 22700Sstevel@tonic-gate 22710Sstevel@tonic-gate /* 22720Sstevel@tonic-gate * Internally to the library, almost all mutex lock/unlock actions 22730Sstevel@tonic-gate * go through these lmutex_ functions, to protect critical regions. 22740Sstevel@tonic-gate * We replicate a bit of code from __mutex_lock() and __mutex_unlock() 22750Sstevel@tonic-gate * to make these functions faster since we know that the mutex type 22760Sstevel@tonic-gate * of all internal locks is USYNC_THREAD. We also know that internal 22770Sstevel@tonic-gate * locking can never fail, so we panic if it does. 22780Sstevel@tonic-gate */ 22790Sstevel@tonic-gate void 22800Sstevel@tonic-gate lmutex_lock(mutex_t *mp) 22810Sstevel@tonic-gate { 22820Sstevel@tonic-gate ulwp_t *self = curthread; 22830Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 22840Sstevel@tonic-gate 22850Sstevel@tonic-gate ASSERT(mp->mutex_type == USYNC_THREAD); 22860Sstevel@tonic-gate 22870Sstevel@tonic-gate enter_critical(self); 22880Sstevel@tonic-gate /* 22890Sstevel@tonic-gate * Optimize the case of no lock statistics and only a single thread. 22900Sstevel@tonic-gate * (Most likely a traditional single-threaded application.) 22910Sstevel@tonic-gate */ 22920Sstevel@tonic-gate if (udp->uberflags.uf_all == 0) { 22930Sstevel@tonic-gate /* 22940Sstevel@tonic-gate * Only one thread exists; the mutex must be free. 22950Sstevel@tonic-gate */ 22960Sstevel@tonic-gate ASSERT(mp->mutex_lockw == 0); 22970Sstevel@tonic-gate mp->mutex_lockw = LOCKSET; 22980Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 22990Sstevel@tonic-gate DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 23000Sstevel@tonic-gate } else { 23010Sstevel@tonic-gate tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 23020Sstevel@tonic-gate 23030Sstevel@tonic-gate if (!self->ul_schedctl_called) 23040Sstevel@tonic-gate (void) setup_schedctl(); 23050Sstevel@tonic-gate 23060Sstevel@tonic-gate if (set_lock_byte(&mp->mutex_lockw) == 0) { 23070Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 23080Sstevel@tonic-gate DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 23094613Sraf } else if (mutex_trylock_adaptive(mp, 1) != 0) { 23100Sstevel@tonic-gate (void) mutex_lock_queue(self, msp, mp, NULL); 23110Sstevel@tonic-gate } 23120Sstevel@tonic-gate 23130Sstevel@tonic-gate if (msp) 23140Sstevel@tonic-gate record_begin_hold(msp); 23150Sstevel@tonic-gate } 23160Sstevel@tonic-gate } 23170Sstevel@tonic-gate 23180Sstevel@tonic-gate void 23190Sstevel@tonic-gate lmutex_unlock(mutex_t *mp) 23200Sstevel@tonic-gate { 23210Sstevel@tonic-gate ulwp_t *self = curthread; 23220Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 23230Sstevel@tonic-gate 23240Sstevel@tonic-gate ASSERT(mp->mutex_type == USYNC_THREAD); 23250Sstevel@tonic-gate 23260Sstevel@tonic-gate /* 23270Sstevel@tonic-gate * Optimize the case of no lock statistics and only a single thread. 23280Sstevel@tonic-gate * (Most likely a traditional single-threaded application.) 23290Sstevel@tonic-gate */ 23300Sstevel@tonic-gate if (udp->uberflags.uf_all == 0) { 23310Sstevel@tonic-gate /* 23320Sstevel@tonic-gate * Only one thread exists so there can be no waiters. 23330Sstevel@tonic-gate */ 23340Sstevel@tonic-gate mp->mutex_owner = 0; 23350Sstevel@tonic-gate mp->mutex_lockword = 0; 23360Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 23370Sstevel@tonic-gate } else { 23380Sstevel@tonic-gate tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 23390Sstevel@tonic-gate lwpid_t lwpid; 23400Sstevel@tonic-gate 23410Sstevel@tonic-gate if (msp) 23420Sstevel@tonic-gate (void) record_hold_time(msp); 23434574Sraf if ((lwpid = mutex_unlock_queue(mp, 0)) != 0) { 23440Sstevel@tonic-gate (void) __lwp_unpark(lwpid); 23450Sstevel@tonic-gate preempt(self); 23460Sstevel@tonic-gate } 23470Sstevel@tonic-gate } 23480Sstevel@tonic-gate exit_critical(self); 23490Sstevel@tonic-gate } 23500Sstevel@tonic-gate 23512248Sraf /* 23522248Sraf * For specialized code in libc, like the asynchronous i/o code, 23532248Sraf * the following sig_*() locking primitives are used in order 23542248Sraf * to make the code asynchronous signal safe. Signals are 23552248Sraf * deferred while locks acquired by these functions are held. 23562248Sraf */ 23572248Sraf void 23582248Sraf sig_mutex_lock(mutex_t *mp) 23592248Sraf { 23602248Sraf sigoff(curthread); 23612248Sraf (void) _private_mutex_lock(mp); 23622248Sraf } 23632248Sraf 23642248Sraf void 23652248Sraf sig_mutex_unlock(mutex_t *mp) 23662248Sraf { 23672248Sraf (void) _private_mutex_unlock(mp); 23682248Sraf sigon(curthread); 23692248Sraf } 23702248Sraf 23712248Sraf int 23722248Sraf sig_mutex_trylock(mutex_t *mp) 23732248Sraf { 23742248Sraf int error; 23752248Sraf 23762248Sraf sigoff(curthread); 23772248Sraf if ((error = _private_mutex_trylock(mp)) != 0) 23782248Sraf sigon(curthread); 23792248Sraf return (error); 23802248Sraf } 23812248Sraf 23822248Sraf /* 23832248Sraf * sig_cond_wait() is a cancellation point. 23842248Sraf */ 23852248Sraf int 23862248Sraf sig_cond_wait(cond_t *cv, mutex_t *mp) 23872248Sraf { 23882248Sraf int error; 23892248Sraf 23902248Sraf ASSERT(curthread->ul_sigdefer != 0); 23912248Sraf _private_testcancel(); 23922248Sraf error = _cond_wait(cv, mp); 23932248Sraf if (error == EINTR && curthread->ul_cursig) { 23942248Sraf sig_mutex_unlock(mp); 23952248Sraf /* take the deferred signal here */ 23962248Sraf sig_mutex_lock(mp); 23972248Sraf } 23982248Sraf _private_testcancel(); 23992248Sraf return (error); 24002248Sraf } 24012248Sraf 24022248Sraf /* 24032248Sraf * sig_cond_reltimedwait() is a cancellation point. 24042248Sraf */ 24052248Sraf int 24062248Sraf sig_cond_reltimedwait(cond_t *cv, mutex_t *mp, const timespec_t *ts) 24072248Sraf { 24082248Sraf int error; 24092248Sraf 24102248Sraf ASSERT(curthread->ul_sigdefer != 0); 24112248Sraf _private_testcancel(); 24122248Sraf error = _cond_reltimedwait(cv, mp, ts); 24132248Sraf if (error == EINTR && curthread->ul_cursig) { 24142248Sraf sig_mutex_unlock(mp); 24152248Sraf /* take the deferred signal here */ 24162248Sraf sig_mutex_lock(mp); 24172248Sraf } 24182248Sraf _private_testcancel(); 24192248Sraf return (error); 24202248Sraf } 24212248Sraf 24220Sstevel@tonic-gate static int 24230Sstevel@tonic-gate shared_mutex_held(mutex_t *mparg) 24240Sstevel@tonic-gate { 24250Sstevel@tonic-gate /* 24264574Sraf * The 'volatile' is necessary to make sure the compiler doesn't 24274574Sraf * reorder the tests of the various components of the mutex. 24284574Sraf * They must be tested in this order: 24294574Sraf * mutex_lockw 24304574Sraf * mutex_owner 24314574Sraf * mutex_ownerpid 24324574Sraf * This relies on the fact that everywhere mutex_lockw is cleared, 24334574Sraf * mutex_owner and mutex_ownerpid are cleared before mutex_lockw 24344574Sraf * is cleared, and that everywhere mutex_lockw is set, mutex_owner 24354574Sraf * and mutex_ownerpid are set after mutex_lockw is set, and that 24364574Sraf * mutex_lockw is set or cleared with a memory barrier. 24370Sstevel@tonic-gate */ 24380Sstevel@tonic-gate volatile mutex_t *mp = (volatile mutex_t *)mparg; 24390Sstevel@tonic-gate ulwp_t *self = curthread; 24400Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 24410Sstevel@tonic-gate 24424574Sraf return (MUTEX_OWNED(mp, self) && mp->mutex_ownerpid == udp->pid); 24430Sstevel@tonic-gate } 24440Sstevel@tonic-gate 24450Sstevel@tonic-gate /* 24460Sstevel@tonic-gate * Some crufty old programs define their own version of _mutex_held() 24470Sstevel@tonic-gate * to be simply return(1). This breaks internal libc logic, so we 24480Sstevel@tonic-gate * define a private version for exclusive use by libc, mutex_is_held(), 24490Sstevel@tonic-gate * and also a new public function, __mutex_held(), to be used in new 24500Sstevel@tonic-gate * code to circumvent these crufty old programs. 24510Sstevel@tonic-gate */ 24520Sstevel@tonic-gate #pragma weak mutex_held = mutex_is_held 24530Sstevel@tonic-gate #pragma weak _mutex_held = mutex_is_held 24540Sstevel@tonic-gate #pragma weak __mutex_held = mutex_is_held 24550Sstevel@tonic-gate int 24564574Sraf mutex_is_held(mutex_t *mparg) 24570Sstevel@tonic-gate { 24584574Sraf volatile mutex_t *mp = (volatile mutex_t *)mparg; 24594574Sraf 24604574Sraf if (mparg->mutex_type & USYNC_PROCESS) 24614574Sraf return (shared_mutex_held(mparg)); 24620Sstevel@tonic-gate return (MUTEX_OWNED(mp, curthread)); 24630Sstevel@tonic-gate } 24640Sstevel@tonic-gate 24650Sstevel@tonic-gate #pragma weak _private_mutex_destroy = __mutex_destroy 24660Sstevel@tonic-gate #pragma weak mutex_destroy = __mutex_destroy 24670Sstevel@tonic-gate #pragma weak _mutex_destroy = __mutex_destroy 24680Sstevel@tonic-gate #pragma weak pthread_mutex_destroy = __mutex_destroy 24690Sstevel@tonic-gate #pragma weak _pthread_mutex_destroy = __mutex_destroy 24700Sstevel@tonic-gate int 24710Sstevel@tonic-gate __mutex_destroy(mutex_t *mp) 24720Sstevel@tonic-gate { 24734574Sraf if (mp->mutex_type & USYNC_PROCESS) 24744574Sraf forget_lock(mp); 24754574Sraf (void) _memset(mp, 0, sizeof (*mp)); 24760Sstevel@tonic-gate tdb_sync_obj_deregister(mp); 24770Sstevel@tonic-gate return (0); 24780Sstevel@tonic-gate } 24790Sstevel@tonic-gate 24804574Sraf #pragma weak mutex_consistent = __mutex_consistent 24814574Sraf #pragma weak _mutex_consistent = __mutex_consistent 24824574Sraf #pragma weak pthread_mutex_consistent_np = __mutex_consistent 24834574Sraf #pragma weak _pthread_mutex_consistent_np = __mutex_consistent 24844574Sraf int 24854574Sraf __mutex_consistent(mutex_t *mp) 24864574Sraf { 24874574Sraf /* 24884574Sraf * Do this only for an inconsistent, initialized robust lock 24894574Sraf * that we hold. For all other cases, return EINVAL. 24904574Sraf */ 24914574Sraf if (mutex_is_held(mp) && 24924574Sraf (mp->mutex_type & LOCK_ROBUST) && 24934574Sraf (mp->mutex_flag & LOCK_INITED) && 24944574Sraf (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED))) { 24954574Sraf mp->mutex_flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 24964574Sraf mp->mutex_rcount = 0; 24974574Sraf return (0); 24984574Sraf } 24994574Sraf return (EINVAL); 25004574Sraf } 25014574Sraf 25020Sstevel@tonic-gate /* 25030Sstevel@tonic-gate * Spin locks are separate from ordinary mutexes, 25040Sstevel@tonic-gate * but we use the same data structure for them. 25050Sstevel@tonic-gate */ 25060Sstevel@tonic-gate 25070Sstevel@tonic-gate #pragma weak pthread_spin_init = _pthread_spin_init 25080Sstevel@tonic-gate int 25090Sstevel@tonic-gate _pthread_spin_init(pthread_spinlock_t *lock, int pshared) 25100Sstevel@tonic-gate { 25110Sstevel@tonic-gate mutex_t *mp = (mutex_t *)lock; 25120Sstevel@tonic-gate 25130Sstevel@tonic-gate (void) _memset(mp, 0, sizeof (*mp)); 25140Sstevel@tonic-gate if (pshared == PTHREAD_PROCESS_SHARED) 25150Sstevel@tonic-gate mp->mutex_type = USYNC_PROCESS; 25160Sstevel@tonic-gate else 25170Sstevel@tonic-gate mp->mutex_type = USYNC_THREAD; 25180Sstevel@tonic-gate mp->mutex_flag = LOCK_INITED; 25190Sstevel@tonic-gate mp->mutex_magic = MUTEX_MAGIC; 25200Sstevel@tonic-gate return (0); 25210Sstevel@tonic-gate } 25220Sstevel@tonic-gate 25230Sstevel@tonic-gate #pragma weak pthread_spin_destroy = _pthread_spin_destroy 25240Sstevel@tonic-gate int 25250Sstevel@tonic-gate _pthread_spin_destroy(pthread_spinlock_t *lock) 25260Sstevel@tonic-gate { 25270Sstevel@tonic-gate (void) _memset(lock, 0, sizeof (*lock)); 25280Sstevel@tonic-gate return (0); 25290Sstevel@tonic-gate } 25300Sstevel@tonic-gate 25310Sstevel@tonic-gate #pragma weak pthread_spin_trylock = _pthread_spin_trylock 25320Sstevel@tonic-gate int 25330Sstevel@tonic-gate _pthread_spin_trylock(pthread_spinlock_t *lock) 25340Sstevel@tonic-gate { 25350Sstevel@tonic-gate mutex_t *mp = (mutex_t *)lock; 25360Sstevel@tonic-gate ulwp_t *self = curthread; 25370Sstevel@tonic-gate int error = 0; 25380Sstevel@tonic-gate 25390Sstevel@tonic-gate no_preempt(self); 25400Sstevel@tonic-gate if (set_lock_byte(&mp->mutex_lockw) != 0) 25410Sstevel@tonic-gate error = EBUSY; 25420Sstevel@tonic-gate else { 25430Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 25440Sstevel@tonic-gate if (mp->mutex_type == USYNC_PROCESS) 25450Sstevel@tonic-gate mp->mutex_ownerpid = self->ul_uberdata->pid; 25460Sstevel@tonic-gate DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 25470Sstevel@tonic-gate } 25480Sstevel@tonic-gate preempt(self); 25490Sstevel@tonic-gate return (error); 25500Sstevel@tonic-gate } 25510Sstevel@tonic-gate 25520Sstevel@tonic-gate #pragma weak pthread_spin_lock = _pthread_spin_lock 25530Sstevel@tonic-gate int 25540Sstevel@tonic-gate _pthread_spin_lock(pthread_spinlock_t *lock) 25550Sstevel@tonic-gate { 25564574Sraf mutex_t *mp = (mutex_t *)lock; 25574574Sraf ulwp_t *self = curthread; 25584574Sraf volatile uint8_t *lockp = (volatile uint8_t *)&mp->mutex_lockw; 25594574Sraf int count = 0; 25604574Sraf 25614574Sraf ASSERT(!self->ul_critical || self->ul_bindflags); 25624574Sraf 25634574Sraf DTRACE_PROBE1(plockstat, mutex__spin, mp); 25644574Sraf 25650Sstevel@tonic-gate /* 25660Sstevel@tonic-gate * We don't care whether the owner is running on a processor. 25670Sstevel@tonic-gate * We just spin because that's what this interface requires. 25680Sstevel@tonic-gate */ 25690Sstevel@tonic-gate for (;;) { 25700Sstevel@tonic-gate if (*lockp == 0) { /* lock byte appears to be clear */ 25714574Sraf no_preempt(self); 25724574Sraf if (set_lock_byte(lockp) == 0) 25734574Sraf break; 25744574Sraf preempt(self); 25750Sstevel@tonic-gate } 2576*5629Sraf if (count < INT_MAX) 2577*5629Sraf count++; 25780Sstevel@tonic-gate SMT_PAUSE(); 25790Sstevel@tonic-gate } 25804574Sraf mp->mutex_owner = (uintptr_t)self; 25814574Sraf if (mp->mutex_type == USYNC_PROCESS) 25824574Sraf mp->mutex_ownerpid = self->ul_uberdata->pid; 25834574Sraf preempt(self); 2584*5629Sraf if (count) { 2585*5629Sraf DTRACE_PROBE2(plockstat, mutex__spun, 1, count); 2586*5629Sraf } 25874574Sraf DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count); 25884574Sraf return (0); 25890Sstevel@tonic-gate } 25900Sstevel@tonic-gate 25910Sstevel@tonic-gate #pragma weak pthread_spin_unlock = _pthread_spin_unlock 25920Sstevel@tonic-gate int 25930Sstevel@tonic-gate _pthread_spin_unlock(pthread_spinlock_t *lock) 25940Sstevel@tonic-gate { 25950Sstevel@tonic-gate mutex_t *mp = (mutex_t *)lock; 25960Sstevel@tonic-gate ulwp_t *self = curthread; 25970Sstevel@tonic-gate 25980Sstevel@tonic-gate no_preempt(self); 25990Sstevel@tonic-gate mp->mutex_owner = 0; 26000Sstevel@tonic-gate mp->mutex_ownerpid = 0; 26010Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 26024570Sraf (void) atomic_swap_32(&mp->mutex_lockword, 0); 26030Sstevel@tonic-gate preempt(self); 26040Sstevel@tonic-gate return (0); 26050Sstevel@tonic-gate } 26060Sstevel@tonic-gate 2607*5629Sraf #define INITIAL_LOCKS 8 /* initial size of ul_heldlocks.array */ 26084574Sraf 26094574Sraf /* 26104574Sraf * Find/allocate an entry for 'lock' in our array of held locks. 26114574Sraf */ 26124574Sraf static mutex_t ** 26134574Sraf find_lock_entry(mutex_t *lock) 26144574Sraf { 26154574Sraf ulwp_t *self = curthread; 26164574Sraf mutex_t **remembered = NULL; 26174574Sraf mutex_t **lockptr; 26184574Sraf uint_t nlocks; 26194574Sraf 26204574Sraf if ((nlocks = self->ul_heldlockcnt) != 0) 26214574Sraf lockptr = self->ul_heldlocks.array; 26224574Sraf else { 26234574Sraf nlocks = 1; 26244574Sraf lockptr = &self->ul_heldlocks.single; 26254574Sraf } 26264574Sraf 26274574Sraf for (; nlocks; nlocks--, lockptr++) { 26284574Sraf if (*lockptr == lock) 26294574Sraf return (lockptr); 26304574Sraf if (*lockptr == NULL && remembered == NULL) 26314574Sraf remembered = lockptr; 26324574Sraf } 26334574Sraf if (remembered != NULL) { 26344574Sraf *remembered = lock; 26354574Sraf return (remembered); 26364574Sraf } 26374574Sraf 26384574Sraf /* 26394574Sraf * No entry available. Allocate more space, converting 26404574Sraf * the single entry into an array of entries if necessary. 26414574Sraf */ 26424574Sraf if ((nlocks = self->ul_heldlockcnt) == 0) { 26434574Sraf /* 26444574Sraf * Initial allocation of the array. 26454574Sraf * Convert the single entry into an array. 26464574Sraf */ 26474574Sraf self->ul_heldlockcnt = nlocks = INITIAL_LOCKS; 26484574Sraf lockptr = lmalloc(nlocks * sizeof (mutex_t *)); 26494574Sraf /* 26504574Sraf * The single entry becomes the first entry in the array. 26514574Sraf */ 26524574Sraf *lockptr = self->ul_heldlocks.single; 26534574Sraf self->ul_heldlocks.array = lockptr; 26544574Sraf /* 26554574Sraf * Return the next available entry in the array. 26564574Sraf */ 26574574Sraf *++lockptr = lock; 26584574Sraf return (lockptr); 26594574Sraf } 26604574Sraf /* 26614574Sraf * Reallocate the array, double the size each time. 26624574Sraf */ 26634574Sraf lockptr = lmalloc(nlocks * 2 * sizeof (mutex_t *)); 26644574Sraf (void) _memcpy(lockptr, self->ul_heldlocks.array, 26654574Sraf nlocks * sizeof (mutex_t *)); 26664574Sraf lfree(self->ul_heldlocks.array, nlocks * sizeof (mutex_t *)); 26674574Sraf self->ul_heldlocks.array = lockptr; 26684574Sraf self->ul_heldlockcnt *= 2; 26694574Sraf /* 26704574Sraf * Return the next available entry in the newly allocated array. 26714574Sraf */ 26724574Sraf *(lockptr += nlocks) = lock; 26734574Sraf return (lockptr); 26744574Sraf } 26754574Sraf 26764574Sraf /* 26774574Sraf * Insert 'lock' into our list of held locks. 26784574Sraf * Currently only used for LOCK_ROBUST mutexes. 26794574Sraf */ 26804574Sraf void 26814574Sraf remember_lock(mutex_t *lock) 26824574Sraf { 26834574Sraf (void) find_lock_entry(lock); 26844574Sraf } 26854574Sraf 26864574Sraf /* 26874574Sraf * Remove 'lock' from our list of held locks. 26884574Sraf * Currently only used for LOCK_ROBUST mutexes. 26894574Sraf */ 26904574Sraf void 26914574Sraf forget_lock(mutex_t *lock) 26924574Sraf { 26934574Sraf *find_lock_entry(lock) = NULL; 26944574Sraf } 26954574Sraf 26964574Sraf /* 26974574Sraf * Free the array of held locks. 26984574Sraf */ 26994574Sraf void 27004574Sraf heldlock_free(ulwp_t *ulwp) 27014574Sraf { 27024574Sraf uint_t nlocks; 27034574Sraf 27044574Sraf if ((nlocks = ulwp->ul_heldlockcnt) != 0) 27054574Sraf lfree(ulwp->ul_heldlocks.array, nlocks * sizeof (mutex_t *)); 27064574Sraf ulwp->ul_heldlockcnt = 0; 27074574Sraf ulwp->ul_heldlocks.array = NULL; 27084574Sraf } 27094574Sraf 27104574Sraf /* 27114574Sraf * Mark all held LOCK_ROBUST mutexes LOCK_OWNERDEAD. 27124574Sraf * Called from _thrp_exit() to deal with abandoned locks. 27134574Sraf */ 27144574Sraf void 27154574Sraf heldlock_exit(void) 27164574Sraf { 27174574Sraf ulwp_t *self = curthread; 27184574Sraf mutex_t **lockptr; 27194574Sraf uint_t nlocks; 27204574Sraf mutex_t *mp; 27214574Sraf 27224574Sraf if ((nlocks = self->ul_heldlockcnt) != 0) 27234574Sraf lockptr = self->ul_heldlocks.array; 27244574Sraf else { 27254574Sraf nlocks = 1; 27264574Sraf lockptr = &self->ul_heldlocks.single; 27274574Sraf } 27284574Sraf 27294574Sraf for (; nlocks; nlocks--, lockptr++) { 27304574Sraf /* 27314574Sraf * The kernel takes care of transitioning held 27324574Sraf * LOCK_PRIO_INHERIT mutexes to LOCK_OWNERDEAD. 27334574Sraf * We avoid that case here. 27344574Sraf */ 27354574Sraf if ((mp = *lockptr) != NULL && 27364574Sraf mutex_is_held(mp) && 27374574Sraf (mp->mutex_type & (LOCK_ROBUST | LOCK_PRIO_INHERIT)) == 27384574Sraf LOCK_ROBUST) { 27394574Sraf mp->mutex_rcount = 0; 27404574Sraf if (!(mp->mutex_flag & LOCK_UNMAPPED)) 27414574Sraf mp->mutex_flag |= LOCK_OWNERDEAD; 27424574Sraf (void) mutex_unlock_internal(mp, 1); 27434574Sraf } 27444574Sraf } 27454574Sraf 27464574Sraf heldlock_free(self); 27474574Sraf } 27484574Sraf 27490Sstevel@tonic-gate #pragma weak cond_init = _cond_init 27500Sstevel@tonic-gate /* ARGSUSED2 */ 27510Sstevel@tonic-gate int 27520Sstevel@tonic-gate _cond_init(cond_t *cvp, int type, void *arg) 27530Sstevel@tonic-gate { 27540Sstevel@tonic-gate if (type != USYNC_THREAD && type != USYNC_PROCESS) 27550Sstevel@tonic-gate return (EINVAL); 27560Sstevel@tonic-gate (void) _memset(cvp, 0, sizeof (*cvp)); 27570Sstevel@tonic-gate cvp->cond_type = (uint16_t)type; 27580Sstevel@tonic-gate cvp->cond_magic = COND_MAGIC; 27590Sstevel@tonic-gate return (0); 27600Sstevel@tonic-gate } 27610Sstevel@tonic-gate 27620Sstevel@tonic-gate /* 27630Sstevel@tonic-gate * cond_sleep_queue(): utility function for cond_wait_queue(). 27640Sstevel@tonic-gate * 27650Sstevel@tonic-gate * Go to sleep on a condvar sleep queue, expect to be waked up 27660Sstevel@tonic-gate * by someone calling cond_signal() or cond_broadcast() or due 27670Sstevel@tonic-gate * to receiving a UNIX signal or being cancelled, or just simply 27680Sstevel@tonic-gate * due to a spurious wakeup (like someome calling forkall()). 27690Sstevel@tonic-gate * 27700Sstevel@tonic-gate * The associated mutex is *not* reacquired before returning. 27710Sstevel@tonic-gate * That must be done by the caller of cond_sleep_queue(). 27720Sstevel@tonic-gate */ 27734574Sraf static int 27740Sstevel@tonic-gate cond_sleep_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 27750Sstevel@tonic-gate { 27760Sstevel@tonic-gate ulwp_t *self = curthread; 27770Sstevel@tonic-gate queue_head_t *qp; 27780Sstevel@tonic-gate queue_head_t *mqp; 27790Sstevel@tonic-gate lwpid_t lwpid; 27800Sstevel@tonic-gate int signalled; 27810Sstevel@tonic-gate int error; 27824574Sraf int release_all; 27830Sstevel@tonic-gate 27840Sstevel@tonic-gate /* 27850Sstevel@tonic-gate * Put ourself on the CV sleep queue, unlock the mutex, then 27860Sstevel@tonic-gate * park ourself and unpark a candidate lwp to grab the mutex. 27870Sstevel@tonic-gate * We must go onto the CV sleep queue before dropping the 27880Sstevel@tonic-gate * mutex in order to guarantee atomicity of the operation. 27890Sstevel@tonic-gate */ 27900Sstevel@tonic-gate self->ul_sp = stkptr(); 27910Sstevel@tonic-gate qp = queue_lock(cvp, CV); 27920Sstevel@tonic-gate enqueue(qp, self, cvp, CV); 27930Sstevel@tonic-gate cvp->cond_waiters_user = 1; 27940Sstevel@tonic-gate self->ul_cvmutex = mp; 27950Sstevel@tonic-gate self->ul_cv_wake = (tsp != NULL); 27960Sstevel@tonic-gate self->ul_signalled = 0; 27974574Sraf if (mp->mutex_flag & LOCK_OWNERDEAD) { 27984574Sraf mp->mutex_flag &= ~LOCK_OWNERDEAD; 27994574Sraf mp->mutex_flag |= LOCK_NOTRECOVERABLE; 28004574Sraf } 28014574Sraf release_all = ((mp->mutex_flag & LOCK_NOTRECOVERABLE) != 0); 28024574Sraf lwpid = mutex_unlock_queue(mp, release_all); 28030Sstevel@tonic-gate for (;;) { 28040Sstevel@tonic-gate set_parking_flag(self, 1); 28050Sstevel@tonic-gate queue_unlock(qp); 28060Sstevel@tonic-gate if (lwpid != 0) { 28070Sstevel@tonic-gate lwpid = preempt_unpark(self, lwpid); 28080Sstevel@tonic-gate preempt(self); 28090Sstevel@tonic-gate } 28100Sstevel@tonic-gate /* 28110Sstevel@tonic-gate * We may have a deferred signal present, 28120Sstevel@tonic-gate * in which case we should return EINTR. 28130Sstevel@tonic-gate * Also, we may have received a SIGCANCEL; if so 28140Sstevel@tonic-gate * and we are cancelable we should return EINTR. 28150Sstevel@tonic-gate * We force an immediate EINTR return from 28160Sstevel@tonic-gate * __lwp_park() by turning our parking flag off. 28170Sstevel@tonic-gate */ 28180Sstevel@tonic-gate if (self->ul_cursig != 0 || 28190Sstevel@tonic-gate (self->ul_cancelable && self->ul_cancel_pending)) 28200Sstevel@tonic-gate set_parking_flag(self, 0); 28210Sstevel@tonic-gate /* 28220Sstevel@tonic-gate * __lwp_park() will return the residual time in tsp 28230Sstevel@tonic-gate * if we are unparked before the timeout expires. 28240Sstevel@tonic-gate */ 28250Sstevel@tonic-gate error = __lwp_park(tsp, lwpid); 28260Sstevel@tonic-gate set_parking_flag(self, 0); 28270Sstevel@tonic-gate lwpid = 0; /* unpark the other lwp only once */ 28280Sstevel@tonic-gate /* 28290Sstevel@tonic-gate * We were waked up by cond_signal(), cond_broadcast(), 28300Sstevel@tonic-gate * by an interrupt or timeout (EINTR or ETIME), 28310Sstevel@tonic-gate * or we may just have gotten a spurious wakeup. 28320Sstevel@tonic-gate */ 28330Sstevel@tonic-gate qp = queue_lock(cvp, CV); 28340Sstevel@tonic-gate mqp = queue_lock(mp, MX); 28350Sstevel@tonic-gate if (self->ul_sleepq == NULL) 28360Sstevel@tonic-gate break; 28370Sstevel@tonic-gate /* 28380Sstevel@tonic-gate * We are on either the condvar sleep queue or the 28391893Sraf * mutex sleep queue. Break out of the sleep if we 28401893Sraf * were interrupted or we timed out (EINTR or ETIME). 28410Sstevel@tonic-gate * Else this is a spurious wakeup; continue the loop. 28420Sstevel@tonic-gate */ 28431893Sraf if (self->ul_sleepq == mqp) { /* mutex queue */ 28441893Sraf if (error) { 28451893Sraf mp->mutex_waiters = dequeue_self(mqp, mp); 28461893Sraf break; 28471893Sraf } 28481893Sraf tsp = NULL; /* no more timeout */ 28491893Sraf } else if (self->ul_sleepq == qp) { /* condvar queue */ 28500Sstevel@tonic-gate if (error) { 28510Sstevel@tonic-gate cvp->cond_waiters_user = dequeue_self(qp, cvp); 28520Sstevel@tonic-gate break; 28530Sstevel@tonic-gate } 28540Sstevel@tonic-gate /* 28550Sstevel@tonic-gate * Else a spurious wakeup on the condvar queue. 28560Sstevel@tonic-gate * __lwp_park() has already adjusted the timeout. 28570Sstevel@tonic-gate */ 28580Sstevel@tonic-gate } else { 28590Sstevel@tonic-gate thr_panic("cond_sleep_queue(): thread not on queue"); 28600Sstevel@tonic-gate } 28610Sstevel@tonic-gate queue_unlock(mqp); 28620Sstevel@tonic-gate } 28630Sstevel@tonic-gate 28640Sstevel@tonic-gate self->ul_sp = 0; 28650Sstevel@tonic-gate ASSERT(self->ul_cvmutex == NULL && self->ul_cv_wake == 0); 28660Sstevel@tonic-gate ASSERT(self->ul_sleepq == NULL && self->ul_link == NULL && 28670Sstevel@tonic-gate self->ul_wchan == NULL); 28680Sstevel@tonic-gate 28690Sstevel@tonic-gate signalled = self->ul_signalled; 28700Sstevel@tonic-gate self->ul_signalled = 0; 28710Sstevel@tonic-gate queue_unlock(qp); 28720Sstevel@tonic-gate queue_unlock(mqp); 28730Sstevel@tonic-gate 28740Sstevel@tonic-gate /* 28750Sstevel@tonic-gate * If we were concurrently cond_signal()d and any of: 28760Sstevel@tonic-gate * received a UNIX signal, were cancelled, or got a timeout, 28770Sstevel@tonic-gate * then perform another cond_signal() to avoid consuming it. 28780Sstevel@tonic-gate */ 28790Sstevel@tonic-gate if (error && signalled) 28800Sstevel@tonic-gate (void) cond_signal_internal(cvp); 28810Sstevel@tonic-gate 28820Sstevel@tonic-gate return (error); 28830Sstevel@tonic-gate } 28840Sstevel@tonic-gate 28850Sstevel@tonic-gate int 2886*5629Sraf cond_wait_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 28870Sstevel@tonic-gate { 28880Sstevel@tonic-gate ulwp_t *self = curthread; 28890Sstevel@tonic-gate int error; 28904574Sraf int merror; 28910Sstevel@tonic-gate 28920Sstevel@tonic-gate /* 28930Sstevel@tonic-gate * The old thread library was programmed to defer signals 28940Sstevel@tonic-gate * while in cond_wait() so that the associated mutex would 28950Sstevel@tonic-gate * be guaranteed to be held when the application signal 28960Sstevel@tonic-gate * handler was invoked. 28970Sstevel@tonic-gate * 28980Sstevel@tonic-gate * We do not behave this way by default; the state of the 28990Sstevel@tonic-gate * associated mutex in the signal handler is undefined. 29000Sstevel@tonic-gate * 29010Sstevel@tonic-gate * To accommodate applications that depend on the old 29020Sstevel@tonic-gate * behavior, the _THREAD_COND_WAIT_DEFER environment 29030Sstevel@tonic-gate * variable can be set to 1 and we will behave in the 29040Sstevel@tonic-gate * old way with respect to cond_wait(). 29050Sstevel@tonic-gate */ 29060Sstevel@tonic-gate if (self->ul_cond_wait_defer) 29070Sstevel@tonic-gate sigoff(self); 29080Sstevel@tonic-gate 29090Sstevel@tonic-gate error = cond_sleep_queue(cvp, mp, tsp); 29100Sstevel@tonic-gate 29110Sstevel@tonic-gate /* 29120Sstevel@tonic-gate * Reacquire the mutex. 29130Sstevel@tonic-gate */ 2914*5629Sraf if ((merror = mutex_lock_impl(mp, NULL)) != 0) 29154574Sraf error = merror; 29160Sstevel@tonic-gate 29170Sstevel@tonic-gate /* 29180Sstevel@tonic-gate * Take any deferred signal now, after we have reacquired the mutex. 29190Sstevel@tonic-gate */ 29200Sstevel@tonic-gate if (self->ul_cond_wait_defer) 29210Sstevel@tonic-gate sigon(self); 29220Sstevel@tonic-gate 29230Sstevel@tonic-gate return (error); 29240Sstevel@tonic-gate } 29250Sstevel@tonic-gate 29260Sstevel@tonic-gate /* 29270Sstevel@tonic-gate * cond_sleep_kernel(): utility function for cond_wait_kernel(). 29280Sstevel@tonic-gate * See the comment ahead of cond_sleep_queue(), above. 29290Sstevel@tonic-gate */ 29304574Sraf static int 29310Sstevel@tonic-gate cond_sleep_kernel(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 29320Sstevel@tonic-gate { 29330Sstevel@tonic-gate int mtype = mp->mutex_type; 29340Sstevel@tonic-gate ulwp_t *self = curthread; 29350Sstevel@tonic-gate int error; 29360Sstevel@tonic-gate 29374574Sraf if ((mtype & LOCK_PRIO_PROTECT) && _ceil_mylist_del(mp)) 29384574Sraf _ceil_prio_waive(); 29390Sstevel@tonic-gate 29400Sstevel@tonic-gate self->ul_sp = stkptr(); 29410Sstevel@tonic-gate self->ul_wchan = cvp; 29420Sstevel@tonic-gate mp->mutex_owner = 0; 29430Sstevel@tonic-gate mp->mutex_ownerpid = 0; 29444574Sraf if (mtype & LOCK_PRIO_INHERIT) 29450Sstevel@tonic-gate mp->mutex_lockw = LOCKCLEAR; 29460Sstevel@tonic-gate /* 29470Sstevel@tonic-gate * ___lwp_cond_wait() returns immediately with EINTR if 29480Sstevel@tonic-gate * set_parking_flag(self,0) is called on this lwp before it 29490Sstevel@tonic-gate * goes to sleep in the kernel. sigacthandler() calls this 29500Sstevel@tonic-gate * when a deferred signal is noted. This assures that we don't 29510Sstevel@tonic-gate * get stuck in ___lwp_cond_wait() with all signals blocked 29520Sstevel@tonic-gate * due to taking a deferred signal before going to sleep. 29530Sstevel@tonic-gate */ 29540Sstevel@tonic-gate set_parking_flag(self, 1); 29550Sstevel@tonic-gate if (self->ul_cursig != 0 || 29560Sstevel@tonic-gate (self->ul_cancelable && self->ul_cancel_pending)) 29570Sstevel@tonic-gate set_parking_flag(self, 0); 29580Sstevel@tonic-gate error = ___lwp_cond_wait(cvp, mp, tsp, 1); 29590Sstevel@tonic-gate set_parking_flag(self, 0); 29600Sstevel@tonic-gate self->ul_sp = 0; 29610Sstevel@tonic-gate self->ul_wchan = NULL; 29620Sstevel@tonic-gate return (error); 29630Sstevel@tonic-gate } 29640Sstevel@tonic-gate 29650Sstevel@tonic-gate int 29660Sstevel@tonic-gate cond_wait_kernel(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 29670Sstevel@tonic-gate { 29680Sstevel@tonic-gate ulwp_t *self = curthread; 29690Sstevel@tonic-gate int error; 29700Sstevel@tonic-gate int merror; 29710Sstevel@tonic-gate 29720Sstevel@tonic-gate /* 29730Sstevel@tonic-gate * See the large comment in cond_wait_queue(), above. 29740Sstevel@tonic-gate */ 29750Sstevel@tonic-gate if (self->ul_cond_wait_defer) 29760Sstevel@tonic-gate sigoff(self); 29770Sstevel@tonic-gate 29780Sstevel@tonic-gate error = cond_sleep_kernel(cvp, mp, tsp); 29790Sstevel@tonic-gate 29800Sstevel@tonic-gate /* 29810Sstevel@tonic-gate * Override the return code from ___lwp_cond_wait() 29820Sstevel@tonic-gate * with any non-zero return code from mutex_lock(). 29830Sstevel@tonic-gate * This addresses robust lock failures in particular; 29840Sstevel@tonic-gate * the caller must see the EOWNERDEAD or ENOTRECOVERABLE 29850Sstevel@tonic-gate * errors in order to take corrective action. 29860Sstevel@tonic-gate */ 2987*5629Sraf if ((merror = mutex_lock_impl(mp, NULL)) != 0) 29880Sstevel@tonic-gate error = merror; 29890Sstevel@tonic-gate 29900Sstevel@tonic-gate /* 29910Sstevel@tonic-gate * Take any deferred signal now, after we have reacquired the mutex. 29920Sstevel@tonic-gate */ 29930Sstevel@tonic-gate if (self->ul_cond_wait_defer) 29940Sstevel@tonic-gate sigon(self); 29950Sstevel@tonic-gate 29960Sstevel@tonic-gate return (error); 29970Sstevel@tonic-gate } 29980Sstevel@tonic-gate 29990Sstevel@tonic-gate /* 30000Sstevel@tonic-gate * Common code for _cond_wait() and _cond_timedwait() 30010Sstevel@tonic-gate */ 30020Sstevel@tonic-gate int 30030Sstevel@tonic-gate cond_wait_common(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 30040Sstevel@tonic-gate { 30050Sstevel@tonic-gate int mtype = mp->mutex_type; 30060Sstevel@tonic-gate hrtime_t begin_sleep = 0; 30070Sstevel@tonic-gate ulwp_t *self = curthread; 30080Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 30090Sstevel@tonic-gate tdb_cond_stats_t *csp = COND_STATS(cvp, udp); 30100Sstevel@tonic-gate tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 30110Sstevel@tonic-gate uint8_t rcount; 30120Sstevel@tonic-gate int error = 0; 30130Sstevel@tonic-gate 30140Sstevel@tonic-gate /* 30150Sstevel@tonic-gate * The SUSV3 Posix spec for pthread_cond_timedwait() states: 30160Sstevel@tonic-gate * Except in the case of [ETIMEDOUT], all these error checks 30170Sstevel@tonic-gate * shall act as if they were performed immediately at the 30180Sstevel@tonic-gate * beginning of processing for the function and shall cause 30190Sstevel@tonic-gate * an error return, in effect, prior to modifying the state 30200Sstevel@tonic-gate * of the mutex specified by mutex or the condition variable 30210Sstevel@tonic-gate * specified by cond. 30220Sstevel@tonic-gate * Therefore, we must return EINVAL now if the timout is invalid. 30230Sstevel@tonic-gate */ 30240Sstevel@tonic-gate if (tsp != NULL && 30250Sstevel@tonic-gate (tsp->tv_sec < 0 || (ulong_t)tsp->tv_nsec >= NANOSEC)) 30260Sstevel@tonic-gate return (EINVAL); 30270Sstevel@tonic-gate 30280Sstevel@tonic-gate if (__td_event_report(self, TD_SLEEP, udp)) { 30290Sstevel@tonic-gate self->ul_sp = stkptr(); 30300Sstevel@tonic-gate self->ul_wchan = cvp; 30310Sstevel@tonic-gate self->ul_td_evbuf.eventnum = TD_SLEEP; 30320Sstevel@tonic-gate self->ul_td_evbuf.eventdata = cvp; 30330Sstevel@tonic-gate tdb_event(TD_SLEEP, udp); 30340Sstevel@tonic-gate self->ul_sp = 0; 30350Sstevel@tonic-gate } 30360Sstevel@tonic-gate if (csp) { 30370Sstevel@tonic-gate if (tsp) 30380Sstevel@tonic-gate tdb_incr(csp->cond_timedwait); 30390Sstevel@tonic-gate else 30400Sstevel@tonic-gate tdb_incr(csp->cond_wait); 30410Sstevel@tonic-gate } 30420Sstevel@tonic-gate if (msp) 30430Sstevel@tonic-gate begin_sleep = record_hold_time(msp); 30440Sstevel@tonic-gate else if (csp) 30450Sstevel@tonic-gate begin_sleep = gethrtime(); 30460Sstevel@tonic-gate 30470Sstevel@tonic-gate if (self->ul_error_detection) { 30480Sstevel@tonic-gate if (!mutex_is_held(mp)) 30490Sstevel@tonic-gate lock_error(mp, "cond_wait", cvp, NULL); 30500Sstevel@tonic-gate if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) 30510Sstevel@tonic-gate lock_error(mp, "recursive mutex in cond_wait", 3052*5629Sraf cvp, NULL); 30530Sstevel@tonic-gate if (cvp->cond_type & USYNC_PROCESS) { 30544574Sraf if (!(mtype & USYNC_PROCESS)) 30550Sstevel@tonic-gate lock_error(mp, "cond_wait", cvp, 3056*5629Sraf "condvar process-shared, " 3057*5629Sraf "mutex process-private"); 30580Sstevel@tonic-gate } else { 30594574Sraf if (mtype & USYNC_PROCESS) 30600Sstevel@tonic-gate lock_error(mp, "cond_wait", cvp, 3061*5629Sraf "condvar process-private, " 3062*5629Sraf "mutex process-shared"); 30630Sstevel@tonic-gate } 30640Sstevel@tonic-gate } 30650Sstevel@tonic-gate 30660Sstevel@tonic-gate /* 30670Sstevel@tonic-gate * We deal with recursive mutexes by completely 30680Sstevel@tonic-gate * dropping the lock and restoring the recursion 30690Sstevel@tonic-gate * count after waking up. This is arguably wrong, 30700Sstevel@tonic-gate * but it obeys the principle of least astonishment. 30710Sstevel@tonic-gate */ 30720Sstevel@tonic-gate rcount = mp->mutex_rcount; 30730Sstevel@tonic-gate mp->mutex_rcount = 0; 30744574Sraf if ((mtype & 30754574Sraf (USYNC_PROCESS | LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT)) | 30760Sstevel@tonic-gate (cvp->cond_type & USYNC_PROCESS)) 30770Sstevel@tonic-gate error = cond_wait_kernel(cvp, mp, tsp); 30780Sstevel@tonic-gate else 3079*5629Sraf error = cond_wait_queue(cvp, mp, tsp); 30800Sstevel@tonic-gate mp->mutex_rcount = rcount; 30810Sstevel@tonic-gate 30820Sstevel@tonic-gate if (csp) { 30830Sstevel@tonic-gate hrtime_t lapse = gethrtime() - begin_sleep; 30840Sstevel@tonic-gate if (tsp == NULL) 30850Sstevel@tonic-gate csp->cond_wait_sleep_time += lapse; 30860Sstevel@tonic-gate else { 30870Sstevel@tonic-gate csp->cond_timedwait_sleep_time += lapse; 30880Sstevel@tonic-gate if (error == ETIME) 30890Sstevel@tonic-gate tdb_incr(csp->cond_timedwait_timeout); 30900Sstevel@tonic-gate } 30910Sstevel@tonic-gate } 30920Sstevel@tonic-gate return (error); 30930Sstevel@tonic-gate } 30940Sstevel@tonic-gate 30950Sstevel@tonic-gate /* 30960Sstevel@tonic-gate * cond_wait() is a cancellation point but _cond_wait() is not. 30970Sstevel@tonic-gate * System libraries call the non-cancellation version. 30980Sstevel@tonic-gate * It is expected that only applications call the cancellation version. 30990Sstevel@tonic-gate */ 31000Sstevel@tonic-gate int 31010Sstevel@tonic-gate _cond_wait(cond_t *cvp, mutex_t *mp) 31020Sstevel@tonic-gate { 31030Sstevel@tonic-gate ulwp_t *self = curthread; 31040Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 31050Sstevel@tonic-gate uberflags_t *gflags; 31060Sstevel@tonic-gate 31070Sstevel@tonic-gate /* 31080Sstevel@tonic-gate * Optimize the common case of USYNC_THREAD plus 31090Sstevel@tonic-gate * no error detection, no lock statistics, and no event tracing. 31100Sstevel@tonic-gate */ 31110Sstevel@tonic-gate if ((gflags = self->ul_schedctl_called) != NULL && 31120Sstevel@tonic-gate (cvp->cond_type | mp->mutex_type | gflags->uf_trs_ted | 31130Sstevel@tonic-gate self->ul_td_events_enable | 31140Sstevel@tonic-gate udp->tdb.tdb_ev_global_mask.event_bits[0]) == 0) 3115*5629Sraf return (cond_wait_queue(cvp, mp, NULL)); 31160Sstevel@tonic-gate 31170Sstevel@tonic-gate /* 31180Sstevel@tonic-gate * Else do it the long way. 31190Sstevel@tonic-gate */ 31200Sstevel@tonic-gate return (cond_wait_common(cvp, mp, NULL)); 31210Sstevel@tonic-gate } 31220Sstevel@tonic-gate 31230Sstevel@tonic-gate int 31240Sstevel@tonic-gate cond_wait(cond_t *cvp, mutex_t *mp) 31250Sstevel@tonic-gate { 31260Sstevel@tonic-gate int error; 31270Sstevel@tonic-gate 31280Sstevel@tonic-gate _cancelon(); 31290Sstevel@tonic-gate error = _cond_wait(cvp, mp); 31300Sstevel@tonic-gate if (error == EINTR) 31310Sstevel@tonic-gate _canceloff(); 31320Sstevel@tonic-gate else 31330Sstevel@tonic-gate _canceloff_nocancel(); 31340Sstevel@tonic-gate return (error); 31350Sstevel@tonic-gate } 31360Sstevel@tonic-gate 31370Sstevel@tonic-gate #pragma weak pthread_cond_wait = _pthread_cond_wait 31380Sstevel@tonic-gate int 31390Sstevel@tonic-gate _pthread_cond_wait(cond_t *cvp, mutex_t *mp) 31400Sstevel@tonic-gate { 31410Sstevel@tonic-gate int error; 31420Sstevel@tonic-gate 31430Sstevel@tonic-gate error = cond_wait(cvp, mp); 31440Sstevel@tonic-gate return ((error == EINTR)? 0 : error); 31450Sstevel@tonic-gate } 31460Sstevel@tonic-gate 31470Sstevel@tonic-gate /* 31480Sstevel@tonic-gate * cond_timedwait() is a cancellation point but _cond_timedwait() is not. 31490Sstevel@tonic-gate * System libraries call the non-cancellation version. 31500Sstevel@tonic-gate * It is expected that only applications call the cancellation version. 31510Sstevel@tonic-gate */ 31520Sstevel@tonic-gate int 31530Sstevel@tonic-gate _cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime) 31540Sstevel@tonic-gate { 31550Sstevel@tonic-gate clockid_t clock_id = cvp->cond_clockid; 31560Sstevel@tonic-gate timespec_t reltime; 31570Sstevel@tonic-gate int error; 31580Sstevel@tonic-gate 31590Sstevel@tonic-gate if (clock_id != CLOCK_REALTIME && clock_id != CLOCK_HIGHRES) 31600Sstevel@tonic-gate clock_id = CLOCK_REALTIME; 31610Sstevel@tonic-gate abstime_to_reltime(clock_id, abstime, &reltime); 31620Sstevel@tonic-gate error = cond_wait_common(cvp, mp, &reltime); 31630Sstevel@tonic-gate if (error == ETIME && clock_id == CLOCK_HIGHRES) { 31640Sstevel@tonic-gate /* 31650Sstevel@tonic-gate * Don't return ETIME if we didn't really get a timeout. 31660Sstevel@tonic-gate * This can happen if we return because someone resets 31670Sstevel@tonic-gate * the system clock. Just return zero in this case, 31680Sstevel@tonic-gate * giving a spurious wakeup but not a timeout. 31690Sstevel@tonic-gate */ 31700Sstevel@tonic-gate if ((hrtime_t)(uint32_t)abstime->tv_sec * NANOSEC + 31710Sstevel@tonic-gate abstime->tv_nsec > gethrtime()) 31720Sstevel@tonic-gate error = 0; 31730Sstevel@tonic-gate } 31740Sstevel@tonic-gate return (error); 31750Sstevel@tonic-gate } 31760Sstevel@tonic-gate 31770Sstevel@tonic-gate int 31780Sstevel@tonic-gate cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime) 31790Sstevel@tonic-gate { 31800Sstevel@tonic-gate int error; 31810Sstevel@tonic-gate 31820Sstevel@tonic-gate _cancelon(); 31830Sstevel@tonic-gate error = _cond_timedwait(cvp, mp, abstime); 31840Sstevel@tonic-gate if (error == EINTR) 31850Sstevel@tonic-gate _canceloff(); 31860Sstevel@tonic-gate else 31870Sstevel@tonic-gate _canceloff_nocancel(); 31880Sstevel@tonic-gate return (error); 31890Sstevel@tonic-gate } 31900Sstevel@tonic-gate 31910Sstevel@tonic-gate #pragma weak pthread_cond_timedwait = _pthread_cond_timedwait 31920Sstevel@tonic-gate int 31930Sstevel@tonic-gate _pthread_cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime) 31940Sstevel@tonic-gate { 31950Sstevel@tonic-gate int error; 31960Sstevel@tonic-gate 31970Sstevel@tonic-gate error = cond_timedwait(cvp, mp, abstime); 31980Sstevel@tonic-gate if (error == ETIME) 31990Sstevel@tonic-gate error = ETIMEDOUT; 32000Sstevel@tonic-gate else if (error == EINTR) 32010Sstevel@tonic-gate error = 0; 32020Sstevel@tonic-gate return (error); 32030Sstevel@tonic-gate } 32040Sstevel@tonic-gate 32050Sstevel@tonic-gate /* 32060Sstevel@tonic-gate * cond_reltimedwait() is a cancellation point but _cond_reltimedwait() 32070Sstevel@tonic-gate * is not. System libraries call the non-cancellation version. 32080Sstevel@tonic-gate * It is expected that only applications call the cancellation version. 32090Sstevel@tonic-gate */ 32100Sstevel@tonic-gate int 32110Sstevel@tonic-gate _cond_reltimedwait(cond_t *cvp, mutex_t *mp, const timespec_t *reltime) 32120Sstevel@tonic-gate { 32130Sstevel@tonic-gate timespec_t tslocal = *reltime; 32140Sstevel@tonic-gate 32150Sstevel@tonic-gate return (cond_wait_common(cvp, mp, &tslocal)); 32160Sstevel@tonic-gate } 32170Sstevel@tonic-gate 32180Sstevel@tonic-gate #pragma weak cond_reltimedwait = _cond_reltimedwait_cancel 32190Sstevel@tonic-gate int 32200Sstevel@tonic-gate _cond_reltimedwait_cancel(cond_t *cvp, mutex_t *mp, const timespec_t *reltime) 32210Sstevel@tonic-gate { 32220Sstevel@tonic-gate int error; 32230Sstevel@tonic-gate 32240Sstevel@tonic-gate _cancelon(); 32250Sstevel@tonic-gate error = _cond_reltimedwait(cvp, mp, reltime); 32260Sstevel@tonic-gate if (error == EINTR) 32270Sstevel@tonic-gate _canceloff(); 32280Sstevel@tonic-gate else 32290Sstevel@tonic-gate _canceloff_nocancel(); 32300Sstevel@tonic-gate return (error); 32310Sstevel@tonic-gate } 32320Sstevel@tonic-gate 32330Sstevel@tonic-gate #pragma weak pthread_cond_reltimedwait_np = _pthread_cond_reltimedwait_np 32340Sstevel@tonic-gate int 32350Sstevel@tonic-gate _pthread_cond_reltimedwait_np(cond_t *cvp, mutex_t *mp, 32360Sstevel@tonic-gate const timespec_t *reltime) 32370Sstevel@tonic-gate { 32380Sstevel@tonic-gate int error; 32390Sstevel@tonic-gate 32400Sstevel@tonic-gate error = _cond_reltimedwait_cancel(cvp, mp, reltime); 32410Sstevel@tonic-gate if (error == ETIME) 32420Sstevel@tonic-gate error = ETIMEDOUT; 32430Sstevel@tonic-gate else if (error == EINTR) 32440Sstevel@tonic-gate error = 0; 32450Sstevel@tonic-gate return (error); 32460Sstevel@tonic-gate } 32470Sstevel@tonic-gate 32480Sstevel@tonic-gate #pragma weak pthread_cond_signal = cond_signal_internal 32490Sstevel@tonic-gate #pragma weak _pthread_cond_signal = cond_signal_internal 32500Sstevel@tonic-gate #pragma weak cond_signal = cond_signal_internal 32510Sstevel@tonic-gate #pragma weak _cond_signal = cond_signal_internal 32520Sstevel@tonic-gate int 32530Sstevel@tonic-gate cond_signal_internal(cond_t *cvp) 32540Sstevel@tonic-gate { 32550Sstevel@tonic-gate ulwp_t *self = curthread; 32560Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 32570Sstevel@tonic-gate tdb_cond_stats_t *csp = COND_STATS(cvp, udp); 32580Sstevel@tonic-gate int error = 0; 32590Sstevel@tonic-gate queue_head_t *qp; 32600Sstevel@tonic-gate mutex_t *mp; 32610Sstevel@tonic-gate queue_head_t *mqp; 32620Sstevel@tonic-gate ulwp_t **ulwpp; 32630Sstevel@tonic-gate ulwp_t *ulwp; 32640Sstevel@tonic-gate ulwp_t *prev = NULL; 32650Sstevel@tonic-gate ulwp_t *next; 32660Sstevel@tonic-gate ulwp_t **suspp = NULL; 32670Sstevel@tonic-gate ulwp_t *susprev; 32680Sstevel@tonic-gate 32690Sstevel@tonic-gate if (csp) 32700Sstevel@tonic-gate tdb_incr(csp->cond_signal); 32710Sstevel@tonic-gate 32720Sstevel@tonic-gate if (cvp->cond_waiters_kernel) /* someone sleeping in the kernel? */ 32730Sstevel@tonic-gate error = __lwp_cond_signal(cvp); 32740Sstevel@tonic-gate 32750Sstevel@tonic-gate if (!cvp->cond_waiters_user) /* no one sleeping at user-level */ 32760Sstevel@tonic-gate return (error); 32770Sstevel@tonic-gate 32780Sstevel@tonic-gate /* 32790Sstevel@tonic-gate * Move someone from the condvar sleep queue to the mutex sleep 32800Sstevel@tonic-gate * queue for the mutex that he will acquire on being waked up. 32810Sstevel@tonic-gate * We can do this only if we own the mutex he will acquire. 32820Sstevel@tonic-gate * If we do not own the mutex, or if his ul_cv_wake flag 32830Sstevel@tonic-gate * is set, just dequeue and unpark him. 32840Sstevel@tonic-gate */ 32850Sstevel@tonic-gate qp = queue_lock(cvp, CV); 32860Sstevel@tonic-gate for (ulwpp = &qp->qh_head; (ulwp = *ulwpp) != NULL; 32870Sstevel@tonic-gate prev = ulwp, ulwpp = &ulwp->ul_link) { 32880Sstevel@tonic-gate if (ulwp->ul_wchan == cvp) { 32890Sstevel@tonic-gate if (!ulwp->ul_stop) 32900Sstevel@tonic-gate break; 32910Sstevel@tonic-gate /* 32920Sstevel@tonic-gate * Try not to dequeue a suspended thread. 32930Sstevel@tonic-gate * This mimics the old libthread's behavior. 32940Sstevel@tonic-gate */ 32950Sstevel@tonic-gate if (suspp == NULL) { 32960Sstevel@tonic-gate suspp = ulwpp; 32970Sstevel@tonic-gate susprev = prev; 32980Sstevel@tonic-gate } 32990Sstevel@tonic-gate } 33000Sstevel@tonic-gate } 33010Sstevel@tonic-gate if (ulwp == NULL && suspp != NULL) { 33020Sstevel@tonic-gate ulwp = *(ulwpp = suspp); 33030Sstevel@tonic-gate prev = susprev; 33040Sstevel@tonic-gate suspp = NULL; 33050Sstevel@tonic-gate } 33060Sstevel@tonic-gate if (ulwp == NULL) { /* no one on the sleep queue */ 33070Sstevel@tonic-gate cvp->cond_waiters_user = 0; 33080Sstevel@tonic-gate queue_unlock(qp); 33090Sstevel@tonic-gate return (error); 33100Sstevel@tonic-gate } 33110Sstevel@tonic-gate /* 33120Sstevel@tonic-gate * Scan the remainder of the CV queue for another waiter. 33130Sstevel@tonic-gate */ 33140Sstevel@tonic-gate if (suspp != NULL) { 33150Sstevel@tonic-gate next = *suspp; 33160Sstevel@tonic-gate } else { 33170Sstevel@tonic-gate for (next = ulwp->ul_link; next != NULL; next = next->ul_link) 33180Sstevel@tonic-gate if (next->ul_wchan == cvp) 33190Sstevel@tonic-gate break; 33200Sstevel@tonic-gate } 33210Sstevel@tonic-gate if (next == NULL) 33220Sstevel@tonic-gate cvp->cond_waiters_user = 0; 33230Sstevel@tonic-gate 33240Sstevel@tonic-gate /* 33250Sstevel@tonic-gate * Inform the thread that he was the recipient of a cond_signal(). 33260Sstevel@tonic-gate * This lets him deal with cond_signal() and, concurrently, 33270Sstevel@tonic-gate * one or more of a cancellation, a UNIX signal, or a timeout. 33280Sstevel@tonic-gate * These latter conditions must not consume a cond_signal(). 33290Sstevel@tonic-gate */ 33300Sstevel@tonic-gate ulwp->ul_signalled = 1; 33310Sstevel@tonic-gate 33320Sstevel@tonic-gate /* 33330Sstevel@tonic-gate * Dequeue the waiter but leave his ul_sleepq non-NULL 33340Sstevel@tonic-gate * while we move him to the mutex queue so that he can 33350Sstevel@tonic-gate * deal properly with spurious wakeups. 33360Sstevel@tonic-gate */ 33370Sstevel@tonic-gate *ulwpp = ulwp->ul_link; 33384574Sraf ulwp->ul_link = NULL; 33390Sstevel@tonic-gate if (qp->qh_tail == ulwp) 33400Sstevel@tonic-gate qp->qh_tail = prev; 33410Sstevel@tonic-gate qp->qh_qlen--; 33420Sstevel@tonic-gate 33430Sstevel@tonic-gate mp = ulwp->ul_cvmutex; /* the mutex he will acquire */ 33440Sstevel@tonic-gate ulwp->ul_cvmutex = NULL; 33450Sstevel@tonic-gate ASSERT(mp != NULL); 33460Sstevel@tonic-gate 33470Sstevel@tonic-gate if (ulwp->ul_cv_wake || !MUTEX_OWNED(mp, self)) { 33480Sstevel@tonic-gate lwpid_t lwpid = ulwp->ul_lwpid; 33490Sstevel@tonic-gate 33500Sstevel@tonic-gate no_preempt(self); 33510Sstevel@tonic-gate ulwp->ul_sleepq = NULL; 33520Sstevel@tonic-gate ulwp->ul_wchan = NULL; 33530Sstevel@tonic-gate ulwp->ul_cv_wake = 0; 33540Sstevel@tonic-gate queue_unlock(qp); 33550Sstevel@tonic-gate (void) __lwp_unpark(lwpid); 33560Sstevel@tonic-gate preempt(self); 33570Sstevel@tonic-gate } else { 33580Sstevel@tonic-gate mqp = queue_lock(mp, MX); 33590Sstevel@tonic-gate enqueue(mqp, ulwp, mp, MX); 33600Sstevel@tonic-gate mp->mutex_waiters = 1; 33610Sstevel@tonic-gate queue_unlock(mqp); 33620Sstevel@tonic-gate queue_unlock(qp); 33630Sstevel@tonic-gate } 33640Sstevel@tonic-gate 33650Sstevel@tonic-gate return (error); 33660Sstevel@tonic-gate } 33670Sstevel@tonic-gate 33684570Sraf /* 33694574Sraf * Utility function called by mutex_wakeup_all(), cond_broadcast(), 33704574Sraf * and rw_queue_release() to (re)allocate a big buffer to hold the 33714574Sraf * lwpids of all the threads to be set running after they are removed 33724574Sraf * from their sleep queues. Since we are holding a queue lock, we 33734574Sraf * cannot call any function that might acquire a lock. mmap(), munmap(), 33744574Sraf * lwp_unpark_all() are simple system calls and are safe in this regard. 33754570Sraf */ 33764570Sraf lwpid_t * 33774570Sraf alloc_lwpids(lwpid_t *lwpid, int *nlwpid_ptr, int *maxlwps_ptr) 33784570Sraf { 33794570Sraf /* 33804570Sraf * Allocate NEWLWPS ids on the first overflow. 33814570Sraf * Double the allocation each time after that. 33824570Sraf */ 33834570Sraf int nlwpid = *nlwpid_ptr; 33844570Sraf int maxlwps = *maxlwps_ptr; 33854570Sraf int first_allocation; 33864570Sraf int newlwps; 33874570Sraf void *vaddr; 33884570Sraf 33894570Sraf ASSERT(nlwpid == maxlwps); 33904570Sraf 33914570Sraf first_allocation = (maxlwps == MAXLWPS); 33924570Sraf newlwps = first_allocation? NEWLWPS : 2 * maxlwps; 33934570Sraf vaddr = _private_mmap(NULL, newlwps * sizeof (lwpid_t), 33944570Sraf PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, (off_t)0); 33954570Sraf 33964570Sraf if (vaddr == MAP_FAILED) { 33974570Sraf /* 33984570Sraf * Let's hope this never happens. 33994570Sraf * If it does, then we have a terrible 34004570Sraf * thundering herd on our hands. 34014570Sraf */ 34024570Sraf (void) __lwp_unpark_all(lwpid, nlwpid); 34034570Sraf *nlwpid_ptr = 0; 34044570Sraf } else { 34054570Sraf (void) _memcpy(vaddr, lwpid, maxlwps * sizeof (lwpid_t)); 34064570Sraf if (!first_allocation) 34074570Sraf (void) _private_munmap(lwpid, 34084570Sraf maxlwps * sizeof (lwpid_t)); 34094570Sraf lwpid = vaddr; 34104570Sraf *maxlwps_ptr = newlwps; 34114570Sraf } 34124570Sraf 34134570Sraf return (lwpid); 34144570Sraf } 34150Sstevel@tonic-gate 34160Sstevel@tonic-gate #pragma weak pthread_cond_broadcast = cond_broadcast_internal 34170Sstevel@tonic-gate #pragma weak _pthread_cond_broadcast = cond_broadcast_internal 34180Sstevel@tonic-gate #pragma weak cond_broadcast = cond_broadcast_internal 34190Sstevel@tonic-gate #pragma weak _cond_broadcast = cond_broadcast_internal 34200Sstevel@tonic-gate int 34210Sstevel@tonic-gate cond_broadcast_internal(cond_t *cvp) 34220Sstevel@tonic-gate { 34230Sstevel@tonic-gate ulwp_t *self = curthread; 34240Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 34250Sstevel@tonic-gate tdb_cond_stats_t *csp = COND_STATS(cvp, udp); 34260Sstevel@tonic-gate int error = 0; 34270Sstevel@tonic-gate queue_head_t *qp; 34280Sstevel@tonic-gate mutex_t *mp; 34290Sstevel@tonic-gate mutex_t *mp_cache = NULL; 34304570Sraf queue_head_t *mqp = NULL; 34310Sstevel@tonic-gate ulwp_t **ulwpp; 34320Sstevel@tonic-gate ulwp_t *ulwp; 34330Sstevel@tonic-gate ulwp_t *prev = NULL; 34344570Sraf int nlwpid = 0; 34354570Sraf int maxlwps = MAXLWPS; 34360Sstevel@tonic-gate lwpid_t buffer[MAXLWPS]; 34370Sstevel@tonic-gate lwpid_t *lwpid = buffer; 34380Sstevel@tonic-gate 34390Sstevel@tonic-gate if (csp) 34400Sstevel@tonic-gate tdb_incr(csp->cond_broadcast); 34410Sstevel@tonic-gate 34420Sstevel@tonic-gate if (cvp->cond_waiters_kernel) /* someone sleeping in the kernel? */ 34430Sstevel@tonic-gate error = __lwp_cond_broadcast(cvp); 34440Sstevel@tonic-gate 34450Sstevel@tonic-gate if (!cvp->cond_waiters_user) /* no one sleeping at user-level */ 34460Sstevel@tonic-gate return (error); 34470Sstevel@tonic-gate 34480Sstevel@tonic-gate /* 34490Sstevel@tonic-gate * Move everyone from the condvar sleep queue to the mutex sleep 34500Sstevel@tonic-gate * queue for the mutex that they will acquire on being waked up. 34510Sstevel@tonic-gate * We can do this only if we own the mutex they will acquire. 34520Sstevel@tonic-gate * If we do not own the mutex, or if their ul_cv_wake flag 34530Sstevel@tonic-gate * is set, just dequeue and unpark them. 34540Sstevel@tonic-gate * 34550Sstevel@tonic-gate * We keep track of lwpids that are to be unparked in lwpid[]. 34560Sstevel@tonic-gate * __lwp_unpark_all() is called to unpark all of them after 34570Sstevel@tonic-gate * they have been removed from the sleep queue and the sleep 34580Sstevel@tonic-gate * queue lock has been dropped. If we run out of space in our 34590Sstevel@tonic-gate * on-stack buffer, we need to allocate more but we can't call 34600Sstevel@tonic-gate * lmalloc() because we are holding a queue lock when the overflow 34610Sstevel@tonic-gate * occurs and lmalloc() acquires a lock. We can't use alloca() 34624570Sraf * either because the application may have allocated a small 34634570Sraf * stack and we don't want to overrun the stack. So we call 34644570Sraf * alloc_lwpids() to allocate a bigger buffer using the mmap() 34650Sstevel@tonic-gate * system call directly since that path acquires no locks. 34660Sstevel@tonic-gate */ 34670Sstevel@tonic-gate qp = queue_lock(cvp, CV); 34680Sstevel@tonic-gate cvp->cond_waiters_user = 0; 34690Sstevel@tonic-gate ulwpp = &qp->qh_head; 34700Sstevel@tonic-gate while ((ulwp = *ulwpp) != NULL) { 34710Sstevel@tonic-gate if (ulwp->ul_wchan != cvp) { 34720Sstevel@tonic-gate prev = ulwp; 34730Sstevel@tonic-gate ulwpp = &ulwp->ul_link; 34740Sstevel@tonic-gate continue; 34750Sstevel@tonic-gate } 34760Sstevel@tonic-gate *ulwpp = ulwp->ul_link; 34774574Sraf ulwp->ul_link = NULL; 34780Sstevel@tonic-gate if (qp->qh_tail == ulwp) 34790Sstevel@tonic-gate qp->qh_tail = prev; 34800Sstevel@tonic-gate qp->qh_qlen--; 34810Sstevel@tonic-gate mp = ulwp->ul_cvmutex; /* his mutex */ 34820Sstevel@tonic-gate ulwp->ul_cvmutex = NULL; 34830Sstevel@tonic-gate ASSERT(mp != NULL); 34840Sstevel@tonic-gate if (ulwp->ul_cv_wake || !MUTEX_OWNED(mp, self)) { 34850Sstevel@tonic-gate ulwp->ul_sleepq = NULL; 34860Sstevel@tonic-gate ulwp->ul_wchan = NULL; 34870Sstevel@tonic-gate ulwp->ul_cv_wake = 0; 34884570Sraf if (nlwpid == maxlwps) 34894570Sraf lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps); 34900Sstevel@tonic-gate lwpid[nlwpid++] = ulwp->ul_lwpid; 34910Sstevel@tonic-gate } else { 34920Sstevel@tonic-gate if (mp != mp_cache) { 34930Sstevel@tonic-gate mp_cache = mp; 34944570Sraf if (mqp != NULL) 34954570Sraf queue_unlock(mqp); 34964570Sraf mqp = queue_lock(mp, MX); 34970Sstevel@tonic-gate } 34980Sstevel@tonic-gate enqueue(mqp, ulwp, mp, MX); 34990Sstevel@tonic-gate mp->mutex_waiters = 1; 35000Sstevel@tonic-gate } 35010Sstevel@tonic-gate } 35024570Sraf if (mqp != NULL) 35034570Sraf queue_unlock(mqp); 35044570Sraf if (nlwpid == 0) { 35054570Sraf queue_unlock(qp); 35064570Sraf } else { 35074570Sraf no_preempt(self); 35084570Sraf queue_unlock(qp); 35090Sstevel@tonic-gate if (nlwpid == 1) 35100Sstevel@tonic-gate (void) __lwp_unpark(lwpid[0]); 35110Sstevel@tonic-gate else 35120Sstevel@tonic-gate (void) __lwp_unpark_all(lwpid, nlwpid); 35134570Sraf preempt(self); 35140Sstevel@tonic-gate } 35150Sstevel@tonic-gate if (lwpid != buffer) 35160Sstevel@tonic-gate (void) _private_munmap(lwpid, maxlwps * sizeof (lwpid_t)); 35170Sstevel@tonic-gate return (error); 35180Sstevel@tonic-gate } 35190Sstevel@tonic-gate 35200Sstevel@tonic-gate #pragma weak pthread_cond_destroy = _cond_destroy 35210Sstevel@tonic-gate #pragma weak _pthread_cond_destroy = _cond_destroy 35220Sstevel@tonic-gate #pragma weak cond_destroy = _cond_destroy 35230Sstevel@tonic-gate int 35240Sstevel@tonic-gate _cond_destroy(cond_t *cvp) 35250Sstevel@tonic-gate { 35260Sstevel@tonic-gate cvp->cond_magic = 0; 35270Sstevel@tonic-gate tdb_sync_obj_deregister(cvp); 35280Sstevel@tonic-gate return (0); 35290Sstevel@tonic-gate } 35300Sstevel@tonic-gate 35310Sstevel@tonic-gate #if defined(THREAD_DEBUG) 35320Sstevel@tonic-gate void 35330Sstevel@tonic-gate assert_no_libc_locks_held(void) 35340Sstevel@tonic-gate { 35350Sstevel@tonic-gate ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 35360Sstevel@tonic-gate } 35370Sstevel@tonic-gate #endif 35380Sstevel@tonic-gate 35390Sstevel@tonic-gate /* protected by link_lock */ 35400Sstevel@tonic-gate uint64_t spin_lock_spin; 35410Sstevel@tonic-gate uint64_t spin_lock_spin2; 35420Sstevel@tonic-gate uint64_t spin_lock_sleep; 35430Sstevel@tonic-gate uint64_t spin_lock_wakeup; 35440Sstevel@tonic-gate 35450Sstevel@tonic-gate /* 35460Sstevel@tonic-gate * Record spin lock statistics. 35470Sstevel@tonic-gate * Called by a thread exiting itself in thrp_exit(). 35480Sstevel@tonic-gate * Also called via atexit() from the thread calling 35490Sstevel@tonic-gate * exit() to do all the other threads as well. 35500Sstevel@tonic-gate */ 35510Sstevel@tonic-gate void 35520Sstevel@tonic-gate record_spin_locks(ulwp_t *ulwp) 35530Sstevel@tonic-gate { 35540Sstevel@tonic-gate spin_lock_spin += ulwp->ul_spin_lock_spin; 35550Sstevel@tonic-gate spin_lock_spin2 += ulwp->ul_spin_lock_spin2; 35560Sstevel@tonic-gate spin_lock_sleep += ulwp->ul_spin_lock_sleep; 35570Sstevel@tonic-gate spin_lock_wakeup += ulwp->ul_spin_lock_wakeup; 35580Sstevel@tonic-gate ulwp->ul_spin_lock_spin = 0; 35590Sstevel@tonic-gate ulwp->ul_spin_lock_spin2 = 0; 35600Sstevel@tonic-gate ulwp->ul_spin_lock_sleep = 0; 35610Sstevel@tonic-gate ulwp->ul_spin_lock_wakeup = 0; 35620Sstevel@tonic-gate } 35630Sstevel@tonic-gate 35640Sstevel@tonic-gate /* 35650Sstevel@tonic-gate * atexit function: dump the queue statistics to stderr. 35660Sstevel@tonic-gate */ 35671219Sraf #if !defined(__lint) 35681219Sraf #define fprintf _fprintf 35691219Sraf #endif 35700Sstevel@tonic-gate #include <stdio.h> 35710Sstevel@tonic-gate void 35720Sstevel@tonic-gate dump_queue_statistics(void) 35730Sstevel@tonic-gate { 35740Sstevel@tonic-gate uberdata_t *udp = curthread->ul_uberdata; 35750Sstevel@tonic-gate queue_head_t *qp; 35760Sstevel@tonic-gate int qn; 35770Sstevel@tonic-gate uint64_t spin_lock_total = 0; 35780Sstevel@tonic-gate 35790Sstevel@tonic-gate if (udp->queue_head == NULL || thread_queue_dump == 0) 35800Sstevel@tonic-gate return; 35810Sstevel@tonic-gate 35820Sstevel@tonic-gate if (fprintf(stderr, "\n%5d mutex queues:\n", QHASHSIZE) < 0 || 35830Sstevel@tonic-gate fprintf(stderr, "queue# lockcount max qlen\n") < 0) 35840Sstevel@tonic-gate return; 35850Sstevel@tonic-gate for (qn = 0, qp = udp->queue_head; qn < QHASHSIZE; qn++, qp++) { 35860Sstevel@tonic-gate if (qp->qh_lockcount == 0) 35870Sstevel@tonic-gate continue; 35880Sstevel@tonic-gate spin_lock_total += qp->qh_lockcount; 35890Sstevel@tonic-gate if (fprintf(stderr, "%5d %12llu%12u\n", qn, 3590*5629Sraf (u_longlong_t)qp->qh_lockcount, qp->qh_qmax) < 0) 3591*5629Sraf return; 35920Sstevel@tonic-gate } 35930Sstevel@tonic-gate 35940Sstevel@tonic-gate if (fprintf(stderr, "\n%5d condvar queues:\n", QHASHSIZE) < 0 || 35950Sstevel@tonic-gate fprintf(stderr, "queue# lockcount max qlen\n") < 0) 35960Sstevel@tonic-gate return; 35970Sstevel@tonic-gate for (qn = 0; qn < QHASHSIZE; qn++, qp++) { 35980Sstevel@tonic-gate if (qp->qh_lockcount == 0) 35990Sstevel@tonic-gate continue; 36000Sstevel@tonic-gate spin_lock_total += qp->qh_lockcount; 36010Sstevel@tonic-gate if (fprintf(stderr, "%5d %12llu%12u\n", qn, 3602*5629Sraf (u_longlong_t)qp->qh_lockcount, qp->qh_qmax) < 0) 3603*5629Sraf return; 36040Sstevel@tonic-gate } 36050Sstevel@tonic-gate 36060Sstevel@tonic-gate (void) fprintf(stderr, "\n spin_lock_total = %10llu\n", 3607*5629Sraf (u_longlong_t)spin_lock_total); 36080Sstevel@tonic-gate (void) fprintf(stderr, " spin_lock_spin = %10llu\n", 3609*5629Sraf (u_longlong_t)spin_lock_spin); 36100Sstevel@tonic-gate (void) fprintf(stderr, " spin_lock_spin2 = %10llu\n", 3611*5629Sraf (u_longlong_t)spin_lock_spin2); 36120Sstevel@tonic-gate (void) fprintf(stderr, " spin_lock_sleep = %10llu\n", 3613*5629Sraf (u_longlong_t)spin_lock_sleep); 36140Sstevel@tonic-gate (void) fprintf(stderr, " spin_lock_wakeup = %10llu\n", 3615*5629Sraf (u_longlong_t)spin_lock_wakeup); 36160Sstevel@tonic-gate } 3617