10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 51893Sraf * Common Development and Distribution License (the "License"). 61893Sraf * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 211219Sraf 220Sstevel@tonic-gate /* 234570Sraf * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 240Sstevel@tonic-gate * Use is subject to license terms. 250Sstevel@tonic-gate */ 260Sstevel@tonic-gate 270Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 280Sstevel@tonic-gate 290Sstevel@tonic-gate #include <sys/sdt.h> 300Sstevel@tonic-gate 310Sstevel@tonic-gate #include "lint.h" 320Sstevel@tonic-gate #include "thr_uberdata.h" 330Sstevel@tonic-gate 340Sstevel@tonic-gate /* 350Sstevel@tonic-gate * This mutex is initialized to be held by lwp#1. 360Sstevel@tonic-gate * It is used to block a thread that has returned from a mutex_lock() 374574Sraf * of a LOCK_PRIO_INHERIT mutex with an unrecoverable error. 380Sstevel@tonic-gate */ 390Sstevel@tonic-gate mutex_t stall_mutex = DEFAULTMUTEX; 400Sstevel@tonic-gate 410Sstevel@tonic-gate static int shared_mutex_held(mutex_t *); 424574Sraf static int mutex_unlock_internal(mutex_t *, int); 434574Sraf static int mutex_queuelock_adaptive(mutex_t *); 444574Sraf static void mutex_wakeup_all(mutex_t *); 450Sstevel@tonic-gate 460Sstevel@tonic-gate /* 470Sstevel@tonic-gate * Lock statistics support functions. 480Sstevel@tonic-gate */ 490Sstevel@tonic-gate void 500Sstevel@tonic-gate record_begin_hold(tdb_mutex_stats_t *msp) 510Sstevel@tonic-gate { 520Sstevel@tonic-gate tdb_incr(msp->mutex_lock); 530Sstevel@tonic-gate msp->mutex_begin_hold = gethrtime(); 540Sstevel@tonic-gate } 550Sstevel@tonic-gate 560Sstevel@tonic-gate hrtime_t 570Sstevel@tonic-gate record_hold_time(tdb_mutex_stats_t *msp) 580Sstevel@tonic-gate { 590Sstevel@tonic-gate hrtime_t now = gethrtime(); 600Sstevel@tonic-gate 610Sstevel@tonic-gate if (msp->mutex_begin_hold) 620Sstevel@tonic-gate msp->mutex_hold_time += now - msp->mutex_begin_hold; 630Sstevel@tonic-gate msp->mutex_begin_hold = 0; 640Sstevel@tonic-gate return (now); 650Sstevel@tonic-gate } 660Sstevel@tonic-gate 670Sstevel@tonic-gate /* 680Sstevel@tonic-gate * Called once at library initialization. 690Sstevel@tonic-gate */ 700Sstevel@tonic-gate void 710Sstevel@tonic-gate mutex_setup(void) 720Sstevel@tonic-gate { 730Sstevel@tonic-gate if (set_lock_byte(&stall_mutex.mutex_lockw)) 740Sstevel@tonic-gate thr_panic("mutex_setup() cannot acquire stall_mutex"); 750Sstevel@tonic-gate stall_mutex.mutex_owner = (uintptr_t)curthread; 760Sstevel@tonic-gate } 770Sstevel@tonic-gate 780Sstevel@tonic-gate /* 790Sstevel@tonic-gate * The default spin counts of 1000 and 500 are experimentally determined. 800Sstevel@tonic-gate * On sun4u machines with any number of processors they could be raised 810Sstevel@tonic-gate * to 10,000 but that (experimentally) makes almost no difference. 820Sstevel@tonic-gate * The environment variables: 830Sstevel@tonic-gate * _THREAD_ADAPTIVE_SPIN=count 840Sstevel@tonic-gate * _THREAD_RELEASE_SPIN=count 850Sstevel@tonic-gate * can be used to override and set the counts in the range [0 .. 1,000,000]. 860Sstevel@tonic-gate */ 870Sstevel@tonic-gate int thread_adaptive_spin = 1000; 880Sstevel@tonic-gate uint_t thread_max_spinners = 100; 890Sstevel@tonic-gate int thread_release_spin = 500; 900Sstevel@tonic-gate int thread_queue_verify = 0; 910Sstevel@tonic-gate static int ncpus; 920Sstevel@tonic-gate 930Sstevel@tonic-gate /* 940Sstevel@tonic-gate * Distinguish spinning for queue locks from spinning for regular locks. 950Sstevel@tonic-gate * The environment variable: 960Sstevel@tonic-gate * _THREAD_QUEUE_SPIN=count 970Sstevel@tonic-gate * can be used to override and set the count in the range [0 .. 1,000,000]. 980Sstevel@tonic-gate * There is no release spin concept for queue locks. 990Sstevel@tonic-gate */ 1000Sstevel@tonic-gate int thread_queue_spin = 1000; 1010Sstevel@tonic-gate 1020Sstevel@tonic-gate /* 1030Sstevel@tonic-gate * Use the otherwise-unused 'mutex_ownerpid' field of a USYNC_THREAD 1040Sstevel@tonic-gate * mutex to be a count of adaptive spins in progress. 1050Sstevel@tonic-gate */ 1060Sstevel@tonic-gate #define mutex_spinners mutex_ownerpid 1070Sstevel@tonic-gate 1084574Sraf #define ALL_ATTRIBUTES \ 1094574Sraf (LOCK_RECURSIVE | LOCK_ERRORCHECK | \ 1104574Sraf LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT | \ 1114574Sraf LOCK_ROBUST) 1120Sstevel@tonic-gate 1130Sstevel@tonic-gate /* 1144574Sraf * 'type' can be one of USYNC_THREAD, USYNC_PROCESS, or USYNC_PROCESS_ROBUST, 1154574Sraf * augmented by zero or more the flags: 1164574Sraf * LOCK_RECURSIVE 1174574Sraf * LOCK_ERRORCHECK 1184574Sraf * LOCK_PRIO_INHERIT 1194574Sraf * LOCK_PRIO_PROTECT 1204574Sraf * LOCK_ROBUST 1210Sstevel@tonic-gate */ 1220Sstevel@tonic-gate #pragma weak _private_mutex_init = __mutex_init 1230Sstevel@tonic-gate #pragma weak mutex_init = __mutex_init 1240Sstevel@tonic-gate #pragma weak _mutex_init = __mutex_init 1250Sstevel@tonic-gate /* ARGSUSED2 */ 1260Sstevel@tonic-gate int 1270Sstevel@tonic-gate __mutex_init(mutex_t *mp, int type, void *arg) 1280Sstevel@tonic-gate { 1294574Sraf int basetype = (type & ~ALL_ATTRIBUTES); 1304574Sraf int error = 0; 1314574Sraf 1324574Sraf if (basetype == USYNC_PROCESS_ROBUST) { 1334574Sraf /* 1344574Sraf * USYNC_PROCESS_ROBUST is a deprecated historical type. 1354574Sraf * We change it into (USYNC_PROCESS | LOCK_ROBUST) but 1364574Sraf * retain the USYNC_PROCESS_ROBUST flag so we can return 1374574Sraf * ELOCKUNMAPPED when necessary (only USYNC_PROCESS_ROBUST 1384574Sraf * mutexes will ever draw ELOCKUNMAPPED). 1394574Sraf */ 1404574Sraf type |= (USYNC_PROCESS | LOCK_ROBUST); 1414574Sraf basetype = USYNC_PROCESS; 1424574Sraf } 1434574Sraf 1444574Sraf if (!(basetype == USYNC_THREAD || basetype == USYNC_PROCESS) || 1454574Sraf (type & (LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT)) 1464574Sraf == (LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT)) { 1474574Sraf error = EINVAL; 1484574Sraf } else if (type & LOCK_ROBUST) { 1494574Sraf /* 1504574Sraf * Callers of mutex_init() with the LOCK_ROBUST attribute 1514574Sraf * are required to pass an initially all-zero mutex. 1524574Sraf * Multiple calls to mutex_init() are allowed; all but 1534574Sraf * the first return EBUSY. A call to mutex_init() is 1544574Sraf * allowed to make an inconsistent robust lock consistent 1554574Sraf * (for historical usage, even though the proper interface 1564574Sraf * for this is mutex_consistent()). Note that we use 1574574Sraf * atomic_or_16() to set the LOCK_INITED flag so as 1584574Sraf * not to disturb surrounding bits (LOCK_OWNERDEAD, etc). 1594574Sraf */ 1604574Sraf extern void _atomic_or_16(volatile uint16_t *, uint16_t); 1614574Sraf if (!(mp->mutex_flag & LOCK_INITED)) { 1624574Sraf mp->mutex_type = (uint8_t)type; 1634574Sraf _atomic_or_16(&mp->mutex_flag, LOCK_INITED); 1644574Sraf mp->mutex_magic = MUTEX_MAGIC; 1654574Sraf } else if (type != mp->mutex_type || 1664574Sraf ((type & LOCK_PRIO_PROTECT) && 1674574Sraf mp->mutex_ceiling != (*(int *)arg))) { 1684574Sraf error = EINVAL; 1694574Sraf } else if (__mutex_consistent(mp) != 0) { 1704574Sraf error = EBUSY; 1714574Sraf } 1724574Sraf /* register a process robust mutex with the kernel */ 1734574Sraf if (basetype == USYNC_PROCESS) 1744574Sraf register_lock(mp); 1754574Sraf } else { 1760Sstevel@tonic-gate (void) _memset(mp, 0, sizeof (*mp)); 1770Sstevel@tonic-gate mp->mutex_type = (uint8_t)type; 1780Sstevel@tonic-gate mp->mutex_flag = LOCK_INITED; 1794574Sraf mp->mutex_magic = MUTEX_MAGIC; 1800Sstevel@tonic-gate } 1814574Sraf 1824574Sraf if (error == 0 && (type & LOCK_PRIO_PROTECT)) 1834574Sraf mp->mutex_ceiling = (uint8_t)(*(int *)arg); 1844574Sraf 1850Sstevel@tonic-gate return (error); 1860Sstevel@tonic-gate } 1870Sstevel@tonic-gate 1880Sstevel@tonic-gate /* 1890Sstevel@tonic-gate * Delete mp from list of ceil mutexes owned by curthread. 1900Sstevel@tonic-gate * Return 1 if the head of the chain was updated. 1910Sstevel@tonic-gate */ 1920Sstevel@tonic-gate int 1930Sstevel@tonic-gate _ceil_mylist_del(mutex_t *mp) 1940Sstevel@tonic-gate { 1950Sstevel@tonic-gate ulwp_t *self = curthread; 1960Sstevel@tonic-gate mxchain_t **mcpp; 1970Sstevel@tonic-gate mxchain_t *mcp; 1980Sstevel@tonic-gate 1990Sstevel@tonic-gate mcpp = &self->ul_mxchain; 2000Sstevel@tonic-gate while ((*mcpp)->mxchain_mx != mp) 2010Sstevel@tonic-gate mcpp = &(*mcpp)->mxchain_next; 2020Sstevel@tonic-gate mcp = *mcpp; 2030Sstevel@tonic-gate *mcpp = mcp->mxchain_next; 2040Sstevel@tonic-gate lfree(mcp, sizeof (*mcp)); 2050Sstevel@tonic-gate return (mcpp == &self->ul_mxchain); 2060Sstevel@tonic-gate } 2070Sstevel@tonic-gate 2080Sstevel@tonic-gate /* 2090Sstevel@tonic-gate * Add mp to head of list of ceil mutexes owned by curthread. 2100Sstevel@tonic-gate * Return ENOMEM if no memory could be allocated. 2110Sstevel@tonic-gate */ 2120Sstevel@tonic-gate int 2130Sstevel@tonic-gate _ceil_mylist_add(mutex_t *mp) 2140Sstevel@tonic-gate { 2150Sstevel@tonic-gate ulwp_t *self = curthread; 2160Sstevel@tonic-gate mxchain_t *mcp; 2170Sstevel@tonic-gate 2180Sstevel@tonic-gate if ((mcp = lmalloc(sizeof (*mcp))) == NULL) 2190Sstevel@tonic-gate return (ENOMEM); 2200Sstevel@tonic-gate mcp->mxchain_mx = mp; 2210Sstevel@tonic-gate mcp->mxchain_next = self->ul_mxchain; 2220Sstevel@tonic-gate self->ul_mxchain = mcp; 2230Sstevel@tonic-gate return (0); 2240Sstevel@tonic-gate } 2250Sstevel@tonic-gate 2260Sstevel@tonic-gate /* 2270Sstevel@tonic-gate * Inherit priority from ceiling. The inheritance impacts the effective 2280Sstevel@tonic-gate * priority, not the assigned priority. See _thread_setschedparam_main(). 2290Sstevel@tonic-gate */ 2300Sstevel@tonic-gate void 2310Sstevel@tonic-gate _ceil_prio_inherit(int ceil) 2320Sstevel@tonic-gate { 2330Sstevel@tonic-gate ulwp_t *self = curthread; 2340Sstevel@tonic-gate struct sched_param param; 2350Sstevel@tonic-gate 2360Sstevel@tonic-gate (void) _memset(¶m, 0, sizeof (param)); 2370Sstevel@tonic-gate param.sched_priority = ceil; 2380Sstevel@tonic-gate if (_thread_setschedparam_main(self->ul_lwpid, 2390Sstevel@tonic-gate self->ul_policy, ¶m, PRIO_INHERIT)) { 2400Sstevel@tonic-gate /* 2410Sstevel@tonic-gate * Panic since unclear what error code to return. 2420Sstevel@tonic-gate * If we do return the error codes returned by above 2430Sstevel@tonic-gate * called routine, update the man page... 2440Sstevel@tonic-gate */ 2450Sstevel@tonic-gate thr_panic("_thread_setschedparam_main() fails"); 2460Sstevel@tonic-gate } 2470Sstevel@tonic-gate } 2480Sstevel@tonic-gate 2490Sstevel@tonic-gate /* 2500Sstevel@tonic-gate * Waive inherited ceiling priority. Inherit from head of owned ceiling locks 2510Sstevel@tonic-gate * if holding at least one ceiling lock. If no ceiling locks are held at this 2520Sstevel@tonic-gate * point, disinherit completely, reverting back to assigned priority. 2530Sstevel@tonic-gate */ 2540Sstevel@tonic-gate void 2550Sstevel@tonic-gate _ceil_prio_waive(void) 2560Sstevel@tonic-gate { 2570Sstevel@tonic-gate ulwp_t *self = curthread; 2580Sstevel@tonic-gate struct sched_param param; 2590Sstevel@tonic-gate 2600Sstevel@tonic-gate (void) _memset(¶m, 0, sizeof (param)); 2610Sstevel@tonic-gate if (self->ul_mxchain == NULL) { 2620Sstevel@tonic-gate /* 2630Sstevel@tonic-gate * No ceil locks held. Zero the epri, revert back to ul_pri. 2640Sstevel@tonic-gate * Since thread's hash lock is not held, one cannot just 2650Sstevel@tonic-gate * read ul_pri here...do it in the called routine... 2660Sstevel@tonic-gate */ 2670Sstevel@tonic-gate param.sched_priority = self->ul_pri; /* ignored */ 2680Sstevel@tonic-gate if (_thread_setschedparam_main(self->ul_lwpid, 2690Sstevel@tonic-gate self->ul_policy, ¶m, PRIO_DISINHERIT)) 2700Sstevel@tonic-gate thr_panic("_thread_setschedparam_main() fails"); 2710Sstevel@tonic-gate } else { 2720Sstevel@tonic-gate /* 2730Sstevel@tonic-gate * Set priority to that of the mutex at the head 2740Sstevel@tonic-gate * of the ceilmutex chain. 2750Sstevel@tonic-gate */ 2760Sstevel@tonic-gate param.sched_priority = 2770Sstevel@tonic-gate self->ul_mxchain->mxchain_mx->mutex_ceiling; 2780Sstevel@tonic-gate if (_thread_setschedparam_main(self->ul_lwpid, 2790Sstevel@tonic-gate self->ul_policy, ¶m, PRIO_INHERIT)) 2800Sstevel@tonic-gate thr_panic("_thread_setschedparam_main() fails"); 2810Sstevel@tonic-gate } 2820Sstevel@tonic-gate } 2830Sstevel@tonic-gate 2840Sstevel@tonic-gate /* 2850Sstevel@tonic-gate * Non-preemptive spin locks. Used by queue_lock(). 2860Sstevel@tonic-gate * No lock statistics are gathered for these locks. 2870Sstevel@tonic-gate */ 2880Sstevel@tonic-gate void 2890Sstevel@tonic-gate spin_lock_set(mutex_t *mp) 2900Sstevel@tonic-gate { 2910Sstevel@tonic-gate ulwp_t *self = curthread; 2920Sstevel@tonic-gate 2930Sstevel@tonic-gate no_preempt(self); 2940Sstevel@tonic-gate if (set_lock_byte(&mp->mutex_lockw) == 0) { 2950Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 2960Sstevel@tonic-gate return; 2970Sstevel@tonic-gate } 2980Sstevel@tonic-gate /* 2990Sstevel@tonic-gate * Spin for a while, attempting to acquire the lock. 3000Sstevel@tonic-gate */ 3010Sstevel@tonic-gate if (self->ul_spin_lock_spin != UINT_MAX) 3020Sstevel@tonic-gate self->ul_spin_lock_spin++; 3030Sstevel@tonic-gate if (mutex_queuelock_adaptive(mp) == 0 || 3040Sstevel@tonic-gate set_lock_byte(&mp->mutex_lockw) == 0) { 3050Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 3060Sstevel@tonic-gate return; 3070Sstevel@tonic-gate } 3080Sstevel@tonic-gate /* 3090Sstevel@tonic-gate * Try harder if we were previously at a no premption level. 3100Sstevel@tonic-gate */ 3110Sstevel@tonic-gate if (self->ul_preempt > 1) { 3120Sstevel@tonic-gate if (self->ul_spin_lock_spin2 != UINT_MAX) 3130Sstevel@tonic-gate self->ul_spin_lock_spin2++; 3140Sstevel@tonic-gate if (mutex_queuelock_adaptive(mp) == 0 || 3150Sstevel@tonic-gate set_lock_byte(&mp->mutex_lockw) == 0) { 3160Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 3170Sstevel@tonic-gate return; 3180Sstevel@tonic-gate } 3190Sstevel@tonic-gate } 3200Sstevel@tonic-gate /* 3210Sstevel@tonic-gate * Give up and block in the kernel for the mutex. 3220Sstevel@tonic-gate */ 3230Sstevel@tonic-gate if (self->ul_spin_lock_sleep != UINT_MAX) 3240Sstevel@tonic-gate self->ul_spin_lock_sleep++; 3250Sstevel@tonic-gate (void) ___lwp_mutex_timedlock(mp, NULL); 3260Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 3270Sstevel@tonic-gate } 3280Sstevel@tonic-gate 3290Sstevel@tonic-gate void 3300Sstevel@tonic-gate spin_lock_clear(mutex_t *mp) 3310Sstevel@tonic-gate { 3320Sstevel@tonic-gate ulwp_t *self = curthread; 3330Sstevel@tonic-gate 3340Sstevel@tonic-gate mp->mutex_owner = 0; 3354570Sraf if (atomic_swap_32(&mp->mutex_lockword, 0) & WAITERMASK) { 3364574Sraf (void) ___lwp_mutex_wakeup(mp, 0); 3370Sstevel@tonic-gate if (self->ul_spin_lock_wakeup != UINT_MAX) 3380Sstevel@tonic-gate self->ul_spin_lock_wakeup++; 3390Sstevel@tonic-gate } 3400Sstevel@tonic-gate preempt(self); 3410Sstevel@tonic-gate } 3420Sstevel@tonic-gate 3430Sstevel@tonic-gate /* 3440Sstevel@tonic-gate * Allocate the sleep queue hash table. 3450Sstevel@tonic-gate */ 3460Sstevel@tonic-gate void 3470Sstevel@tonic-gate queue_alloc(void) 3480Sstevel@tonic-gate { 3490Sstevel@tonic-gate ulwp_t *self = curthread; 3500Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 3514574Sraf mutex_t *mp; 3520Sstevel@tonic-gate void *data; 3530Sstevel@tonic-gate int i; 3540Sstevel@tonic-gate 3550Sstevel@tonic-gate /* 3560Sstevel@tonic-gate * No locks are needed; we call here only when single-threaded. 3570Sstevel@tonic-gate */ 3580Sstevel@tonic-gate ASSERT(self == udp->ulwp_one); 3590Sstevel@tonic-gate ASSERT(!udp->uberflags.uf_mt); 3600Sstevel@tonic-gate if ((data = _private_mmap(NULL, 2 * QHASHSIZE * sizeof (queue_head_t), 3610Sstevel@tonic-gate PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, (off_t)0)) 3620Sstevel@tonic-gate == MAP_FAILED) 3630Sstevel@tonic-gate thr_panic("cannot allocate thread queue_head table"); 3640Sstevel@tonic-gate udp->queue_head = (queue_head_t *)data; 3654574Sraf for (i = 0; i < 2 * QHASHSIZE; i++) { 3664574Sraf mp = &udp->queue_head[i].qh_lock; 3674574Sraf mp->mutex_flag = LOCK_INITED; 3684574Sraf mp->mutex_magic = MUTEX_MAGIC; 3694574Sraf } 3700Sstevel@tonic-gate } 3710Sstevel@tonic-gate 3720Sstevel@tonic-gate #if defined(THREAD_DEBUG) 3730Sstevel@tonic-gate 3740Sstevel@tonic-gate /* 3750Sstevel@tonic-gate * Debugging: verify correctness of a sleep queue. 3760Sstevel@tonic-gate */ 3770Sstevel@tonic-gate void 3780Sstevel@tonic-gate QVERIFY(queue_head_t *qp) 3790Sstevel@tonic-gate { 3800Sstevel@tonic-gate ulwp_t *self = curthread; 3810Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 3820Sstevel@tonic-gate ulwp_t *ulwp; 3830Sstevel@tonic-gate ulwp_t *prev; 3840Sstevel@tonic-gate uint_t index; 3850Sstevel@tonic-gate uint32_t cnt = 0; 3860Sstevel@tonic-gate char qtype; 3870Sstevel@tonic-gate void *wchan; 3880Sstevel@tonic-gate 3890Sstevel@tonic-gate ASSERT(qp >= udp->queue_head && (qp - udp->queue_head) < 2 * QHASHSIZE); 3900Sstevel@tonic-gate ASSERT(MUTEX_OWNED(&qp->qh_lock, self)); 3910Sstevel@tonic-gate ASSERT((qp->qh_head != NULL && qp->qh_tail != NULL) || 3920Sstevel@tonic-gate (qp->qh_head == NULL && qp->qh_tail == NULL)); 3930Sstevel@tonic-gate if (!thread_queue_verify) 3940Sstevel@tonic-gate return; 3950Sstevel@tonic-gate /* real expensive stuff, only for _THREAD_QUEUE_VERIFY */ 3960Sstevel@tonic-gate qtype = ((qp - udp->queue_head) < QHASHSIZE)? MX : CV; 3970Sstevel@tonic-gate for (prev = NULL, ulwp = qp->qh_head; ulwp != NULL; 3980Sstevel@tonic-gate prev = ulwp, ulwp = ulwp->ul_link, cnt++) { 3990Sstevel@tonic-gate ASSERT(ulwp->ul_qtype == qtype); 4000Sstevel@tonic-gate ASSERT(ulwp->ul_wchan != NULL); 4010Sstevel@tonic-gate ASSERT(ulwp->ul_sleepq == qp); 4020Sstevel@tonic-gate wchan = ulwp->ul_wchan; 4030Sstevel@tonic-gate index = QUEUE_HASH(wchan, qtype); 4040Sstevel@tonic-gate ASSERT(&udp->queue_head[index] == qp); 4050Sstevel@tonic-gate } 4060Sstevel@tonic-gate ASSERT(qp->qh_tail == prev); 4070Sstevel@tonic-gate ASSERT(qp->qh_qlen == cnt); 4080Sstevel@tonic-gate } 4090Sstevel@tonic-gate 4100Sstevel@tonic-gate #else /* THREAD_DEBUG */ 4110Sstevel@tonic-gate 4120Sstevel@tonic-gate #define QVERIFY(qp) 4130Sstevel@tonic-gate 4140Sstevel@tonic-gate #endif /* THREAD_DEBUG */ 4150Sstevel@tonic-gate 4160Sstevel@tonic-gate /* 4170Sstevel@tonic-gate * Acquire a queue head. 4180Sstevel@tonic-gate */ 4190Sstevel@tonic-gate queue_head_t * 4200Sstevel@tonic-gate queue_lock(void *wchan, int qtype) 4210Sstevel@tonic-gate { 4220Sstevel@tonic-gate uberdata_t *udp = curthread->ul_uberdata; 4230Sstevel@tonic-gate queue_head_t *qp; 4240Sstevel@tonic-gate 4250Sstevel@tonic-gate ASSERT(qtype == MX || qtype == CV); 4260Sstevel@tonic-gate 4270Sstevel@tonic-gate /* 4280Sstevel@tonic-gate * It is possible that we could be called while still single-threaded. 4290Sstevel@tonic-gate * If so, we call queue_alloc() to allocate the queue_head[] array. 4300Sstevel@tonic-gate */ 4310Sstevel@tonic-gate if ((qp = udp->queue_head) == NULL) { 4320Sstevel@tonic-gate queue_alloc(); 4330Sstevel@tonic-gate qp = udp->queue_head; 4340Sstevel@tonic-gate } 4350Sstevel@tonic-gate qp += QUEUE_HASH(wchan, qtype); 4360Sstevel@tonic-gate spin_lock_set(&qp->qh_lock); 4370Sstevel@tonic-gate /* 4380Sstevel@tonic-gate * At once per nanosecond, qh_lockcount will wrap after 512 years. 4390Sstevel@tonic-gate * Were we to care about this, we could peg the value at UINT64_MAX. 4400Sstevel@tonic-gate */ 4410Sstevel@tonic-gate qp->qh_lockcount++; 4420Sstevel@tonic-gate QVERIFY(qp); 4430Sstevel@tonic-gate return (qp); 4440Sstevel@tonic-gate } 4450Sstevel@tonic-gate 4460Sstevel@tonic-gate /* 4470Sstevel@tonic-gate * Release a queue head. 4480Sstevel@tonic-gate */ 4490Sstevel@tonic-gate void 4500Sstevel@tonic-gate queue_unlock(queue_head_t *qp) 4510Sstevel@tonic-gate { 4520Sstevel@tonic-gate QVERIFY(qp); 4530Sstevel@tonic-gate spin_lock_clear(&qp->qh_lock); 4540Sstevel@tonic-gate } 4550Sstevel@tonic-gate 4560Sstevel@tonic-gate /* 4570Sstevel@tonic-gate * For rwlock queueing, we must queue writers ahead of readers of the 4580Sstevel@tonic-gate * same priority. We do this by making writers appear to have a half 4590Sstevel@tonic-gate * point higher priority for purposes of priority comparisons below. 4600Sstevel@tonic-gate */ 4610Sstevel@tonic-gate #define CMP_PRIO(ulwp) ((real_priority(ulwp) << 1) + (ulwp)->ul_writer) 4620Sstevel@tonic-gate 4630Sstevel@tonic-gate void 4640Sstevel@tonic-gate enqueue(queue_head_t *qp, ulwp_t *ulwp, void *wchan, int qtype) 4650Sstevel@tonic-gate { 4660Sstevel@tonic-gate ulwp_t **ulwpp; 4670Sstevel@tonic-gate ulwp_t *next; 4680Sstevel@tonic-gate int pri = CMP_PRIO(ulwp); 4690Sstevel@tonic-gate int force_fifo = (qtype & FIFOQ); 4700Sstevel@tonic-gate int do_fifo; 4710Sstevel@tonic-gate 4720Sstevel@tonic-gate qtype &= ~FIFOQ; 4730Sstevel@tonic-gate ASSERT(qtype == MX || qtype == CV); 4740Sstevel@tonic-gate ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread)); 4750Sstevel@tonic-gate ASSERT(ulwp->ul_sleepq != qp); 4760Sstevel@tonic-gate 4770Sstevel@tonic-gate /* 4780Sstevel@tonic-gate * LIFO queue ordering is unfair and can lead to starvation, 4790Sstevel@tonic-gate * but it gives better performance for heavily contended locks. 4800Sstevel@tonic-gate * We use thread_queue_fifo (range is 0..8) to determine 4810Sstevel@tonic-gate * the frequency of FIFO vs LIFO queuing: 4820Sstevel@tonic-gate * 0 : every 256th time (almost always LIFO) 4830Sstevel@tonic-gate * 1 : every 128th time 4840Sstevel@tonic-gate * 2 : every 64th time 4850Sstevel@tonic-gate * 3 : every 32nd time 4860Sstevel@tonic-gate * 4 : every 16th time (the default value, mostly LIFO) 4870Sstevel@tonic-gate * 5 : every 8th time 4880Sstevel@tonic-gate * 6 : every 4th time 4890Sstevel@tonic-gate * 7 : every 2nd time 4900Sstevel@tonic-gate * 8 : every time (never LIFO, always FIFO) 4910Sstevel@tonic-gate * Note that there is always some degree of FIFO ordering. 4920Sstevel@tonic-gate * This breaks live lock conditions that occur in applications 4930Sstevel@tonic-gate * that are written assuming (incorrectly) that threads acquire 4940Sstevel@tonic-gate * locks fairly, that is, in roughly round-robin order. 4950Sstevel@tonic-gate * In any event, the queue is maintained in priority order. 4960Sstevel@tonic-gate * 4970Sstevel@tonic-gate * If we are given the FIFOQ flag in qtype, fifo queueing is forced. 4980Sstevel@tonic-gate * SUSV3 requires this for semaphores. 4990Sstevel@tonic-gate */ 5000Sstevel@tonic-gate do_fifo = (force_fifo || 5010Sstevel@tonic-gate ((++qp->qh_qcnt << curthread->ul_queue_fifo) & 0xff) == 0); 5020Sstevel@tonic-gate 5030Sstevel@tonic-gate if (qp->qh_head == NULL) { 5040Sstevel@tonic-gate /* 5050Sstevel@tonic-gate * The queue is empty. LIFO/FIFO doesn't matter. 5060Sstevel@tonic-gate */ 5070Sstevel@tonic-gate ASSERT(qp->qh_tail == NULL); 5080Sstevel@tonic-gate ulwpp = &qp->qh_head; 5090Sstevel@tonic-gate } else if (do_fifo) { 5100Sstevel@tonic-gate /* 5110Sstevel@tonic-gate * Enqueue after the last thread whose priority is greater 5120Sstevel@tonic-gate * than or equal to the priority of the thread being queued. 5130Sstevel@tonic-gate * Attempt first to go directly onto the tail of the queue. 5140Sstevel@tonic-gate */ 5150Sstevel@tonic-gate if (pri <= CMP_PRIO(qp->qh_tail)) 5160Sstevel@tonic-gate ulwpp = &qp->qh_tail->ul_link; 5170Sstevel@tonic-gate else { 5180Sstevel@tonic-gate for (ulwpp = &qp->qh_head; (next = *ulwpp) != NULL; 5190Sstevel@tonic-gate ulwpp = &next->ul_link) 5200Sstevel@tonic-gate if (pri > CMP_PRIO(next)) 5210Sstevel@tonic-gate break; 5220Sstevel@tonic-gate } 5230Sstevel@tonic-gate } else { 5240Sstevel@tonic-gate /* 5250Sstevel@tonic-gate * Enqueue before the first thread whose priority is less 5260Sstevel@tonic-gate * than or equal to the priority of the thread being queued. 5270Sstevel@tonic-gate * Hopefully we can go directly onto the head of the queue. 5280Sstevel@tonic-gate */ 5290Sstevel@tonic-gate for (ulwpp = &qp->qh_head; (next = *ulwpp) != NULL; 5300Sstevel@tonic-gate ulwpp = &next->ul_link) 5310Sstevel@tonic-gate if (pri >= CMP_PRIO(next)) 5320Sstevel@tonic-gate break; 5330Sstevel@tonic-gate } 5340Sstevel@tonic-gate if ((ulwp->ul_link = *ulwpp) == NULL) 5350Sstevel@tonic-gate qp->qh_tail = ulwp; 5360Sstevel@tonic-gate *ulwpp = ulwp; 5370Sstevel@tonic-gate 5380Sstevel@tonic-gate ulwp->ul_sleepq = qp; 5390Sstevel@tonic-gate ulwp->ul_wchan = wchan; 5400Sstevel@tonic-gate ulwp->ul_qtype = qtype; 5410Sstevel@tonic-gate if (qp->qh_qmax < ++qp->qh_qlen) 5420Sstevel@tonic-gate qp->qh_qmax = qp->qh_qlen; 5430Sstevel@tonic-gate } 5440Sstevel@tonic-gate 5450Sstevel@tonic-gate /* 5460Sstevel@tonic-gate * Return a pointer to the queue slot of the 5470Sstevel@tonic-gate * highest priority thread on the queue. 5480Sstevel@tonic-gate * On return, prevp, if not NULL, will contain a pointer 5490Sstevel@tonic-gate * to the thread's predecessor on the queue 5500Sstevel@tonic-gate */ 5510Sstevel@tonic-gate static ulwp_t ** 5520Sstevel@tonic-gate queue_slot(queue_head_t *qp, void *wchan, int *more, ulwp_t **prevp) 5530Sstevel@tonic-gate { 5540Sstevel@tonic-gate ulwp_t **ulwpp; 5550Sstevel@tonic-gate ulwp_t *ulwp; 5560Sstevel@tonic-gate ulwp_t *prev = NULL; 5570Sstevel@tonic-gate ulwp_t **suspp = NULL; 5580Sstevel@tonic-gate ulwp_t *susprev; 5590Sstevel@tonic-gate 5600Sstevel@tonic-gate ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread)); 5610Sstevel@tonic-gate 5620Sstevel@tonic-gate /* 5630Sstevel@tonic-gate * Find a waiter on the sleep queue. 5640Sstevel@tonic-gate */ 5650Sstevel@tonic-gate for (ulwpp = &qp->qh_head; (ulwp = *ulwpp) != NULL; 5660Sstevel@tonic-gate prev = ulwp, ulwpp = &ulwp->ul_link) { 5670Sstevel@tonic-gate if (ulwp->ul_wchan == wchan) { 5680Sstevel@tonic-gate if (!ulwp->ul_stop) 5690Sstevel@tonic-gate break; 5700Sstevel@tonic-gate /* 5710Sstevel@tonic-gate * Try not to return a suspended thread. 5720Sstevel@tonic-gate * This mimics the old libthread's behavior. 5730Sstevel@tonic-gate */ 5740Sstevel@tonic-gate if (suspp == NULL) { 5750Sstevel@tonic-gate suspp = ulwpp; 5760Sstevel@tonic-gate susprev = prev; 5770Sstevel@tonic-gate } 5780Sstevel@tonic-gate } 5790Sstevel@tonic-gate } 5800Sstevel@tonic-gate 5810Sstevel@tonic-gate if (ulwp == NULL && suspp != NULL) { 5820Sstevel@tonic-gate ulwp = *(ulwpp = suspp); 5830Sstevel@tonic-gate prev = susprev; 5840Sstevel@tonic-gate suspp = NULL; 5850Sstevel@tonic-gate } 5860Sstevel@tonic-gate if (ulwp == NULL) { 5870Sstevel@tonic-gate if (more != NULL) 5880Sstevel@tonic-gate *more = 0; 5890Sstevel@tonic-gate return (NULL); 5900Sstevel@tonic-gate } 5910Sstevel@tonic-gate 5920Sstevel@tonic-gate if (prevp != NULL) 5930Sstevel@tonic-gate *prevp = prev; 5940Sstevel@tonic-gate if (more == NULL) 5950Sstevel@tonic-gate return (ulwpp); 5960Sstevel@tonic-gate 5970Sstevel@tonic-gate /* 5980Sstevel@tonic-gate * Scan the remainder of the queue for another waiter. 5990Sstevel@tonic-gate */ 6000Sstevel@tonic-gate if (suspp != NULL) { 6010Sstevel@tonic-gate *more = 1; 6020Sstevel@tonic-gate return (ulwpp); 6030Sstevel@tonic-gate } 6040Sstevel@tonic-gate for (ulwp = ulwp->ul_link; ulwp != NULL; ulwp = ulwp->ul_link) { 6050Sstevel@tonic-gate if (ulwp->ul_wchan == wchan) { 6060Sstevel@tonic-gate *more = 1; 6070Sstevel@tonic-gate return (ulwpp); 6080Sstevel@tonic-gate } 6090Sstevel@tonic-gate } 6100Sstevel@tonic-gate 6110Sstevel@tonic-gate *more = 0; 6120Sstevel@tonic-gate return (ulwpp); 6130Sstevel@tonic-gate } 6140Sstevel@tonic-gate 6150Sstevel@tonic-gate ulwp_t * 6164570Sraf queue_unlink(queue_head_t *qp, ulwp_t **ulwpp, ulwp_t *prev) 6170Sstevel@tonic-gate { 6180Sstevel@tonic-gate ulwp_t *ulwp; 6190Sstevel@tonic-gate 6200Sstevel@tonic-gate ulwp = *ulwpp; 6210Sstevel@tonic-gate *ulwpp = ulwp->ul_link; 6220Sstevel@tonic-gate ulwp->ul_link = NULL; 6230Sstevel@tonic-gate if (qp->qh_tail == ulwp) 6240Sstevel@tonic-gate qp->qh_tail = prev; 6250Sstevel@tonic-gate qp->qh_qlen--; 6260Sstevel@tonic-gate ulwp->ul_sleepq = NULL; 6270Sstevel@tonic-gate ulwp->ul_wchan = NULL; 6280Sstevel@tonic-gate 6290Sstevel@tonic-gate return (ulwp); 6300Sstevel@tonic-gate } 6310Sstevel@tonic-gate 6324570Sraf ulwp_t * 6334570Sraf dequeue(queue_head_t *qp, void *wchan, int *more) 6344570Sraf { 6354570Sraf ulwp_t **ulwpp; 6364570Sraf ulwp_t *prev; 6374570Sraf 6384570Sraf if ((ulwpp = queue_slot(qp, wchan, more, &prev)) == NULL) 6394570Sraf return (NULL); 6404570Sraf return (queue_unlink(qp, ulwpp, prev)); 6414570Sraf } 6424570Sraf 6430Sstevel@tonic-gate /* 6440Sstevel@tonic-gate * Return a pointer to the highest priority thread sleeping on wchan. 6450Sstevel@tonic-gate */ 6460Sstevel@tonic-gate ulwp_t * 6470Sstevel@tonic-gate queue_waiter(queue_head_t *qp, void *wchan) 6480Sstevel@tonic-gate { 6490Sstevel@tonic-gate ulwp_t **ulwpp; 6500Sstevel@tonic-gate 6510Sstevel@tonic-gate if ((ulwpp = queue_slot(qp, wchan, NULL, NULL)) == NULL) 6520Sstevel@tonic-gate return (NULL); 6530Sstevel@tonic-gate return (*ulwpp); 6540Sstevel@tonic-gate } 6550Sstevel@tonic-gate 6560Sstevel@tonic-gate uint8_t 6570Sstevel@tonic-gate dequeue_self(queue_head_t *qp, void *wchan) 6580Sstevel@tonic-gate { 6590Sstevel@tonic-gate ulwp_t *self = curthread; 6600Sstevel@tonic-gate ulwp_t **ulwpp; 6610Sstevel@tonic-gate ulwp_t *ulwp; 6620Sstevel@tonic-gate ulwp_t *prev = NULL; 6630Sstevel@tonic-gate int found = 0; 6640Sstevel@tonic-gate int more = 0; 6650Sstevel@tonic-gate 6660Sstevel@tonic-gate ASSERT(MUTEX_OWNED(&qp->qh_lock, self)); 6670Sstevel@tonic-gate 6680Sstevel@tonic-gate /* find self on the sleep queue */ 6690Sstevel@tonic-gate for (ulwpp = &qp->qh_head; (ulwp = *ulwpp) != NULL; 6700Sstevel@tonic-gate prev = ulwp, ulwpp = &ulwp->ul_link) { 6710Sstevel@tonic-gate if (ulwp == self) { 6720Sstevel@tonic-gate /* dequeue ourself */ 6730Sstevel@tonic-gate ASSERT(self->ul_wchan == wchan); 6744570Sraf (void) queue_unlink(qp, ulwpp, prev); 6750Sstevel@tonic-gate self->ul_cvmutex = NULL; 6760Sstevel@tonic-gate self->ul_cv_wake = 0; 6770Sstevel@tonic-gate found = 1; 6780Sstevel@tonic-gate break; 6790Sstevel@tonic-gate } 6800Sstevel@tonic-gate if (ulwp->ul_wchan == wchan) 6810Sstevel@tonic-gate more = 1; 6820Sstevel@tonic-gate } 6830Sstevel@tonic-gate 6840Sstevel@tonic-gate if (!found) 6850Sstevel@tonic-gate thr_panic("dequeue_self(): curthread not found on queue"); 6860Sstevel@tonic-gate 6870Sstevel@tonic-gate if (more) 6880Sstevel@tonic-gate return (1); 6890Sstevel@tonic-gate 6900Sstevel@tonic-gate /* scan the remainder of the queue for another waiter */ 6910Sstevel@tonic-gate for (ulwp = *ulwpp; ulwp != NULL; ulwp = ulwp->ul_link) { 6920Sstevel@tonic-gate if (ulwp->ul_wchan == wchan) 6930Sstevel@tonic-gate return (1); 6940Sstevel@tonic-gate } 6950Sstevel@tonic-gate 6960Sstevel@tonic-gate return (0); 6970Sstevel@tonic-gate } 6980Sstevel@tonic-gate 6990Sstevel@tonic-gate /* 7000Sstevel@tonic-gate * Called from call_user_handler() and _thrp_suspend() to take 7010Sstevel@tonic-gate * ourself off of our sleep queue so we can grab locks. 7020Sstevel@tonic-gate */ 7030Sstevel@tonic-gate void 7040Sstevel@tonic-gate unsleep_self(void) 7050Sstevel@tonic-gate { 7060Sstevel@tonic-gate ulwp_t *self = curthread; 7070Sstevel@tonic-gate queue_head_t *qp; 7080Sstevel@tonic-gate 7090Sstevel@tonic-gate /* 7100Sstevel@tonic-gate * Calling enter_critical()/exit_critical() here would lead 7110Sstevel@tonic-gate * to recursion. Just manipulate self->ul_critical directly. 7120Sstevel@tonic-gate */ 7130Sstevel@tonic-gate self->ul_critical++; 7140Sstevel@tonic-gate while (self->ul_sleepq != NULL) { 7150Sstevel@tonic-gate qp = queue_lock(self->ul_wchan, self->ul_qtype); 7160Sstevel@tonic-gate /* 7170Sstevel@tonic-gate * We may have been moved from a CV queue to a 7180Sstevel@tonic-gate * mutex queue while we were attempting queue_lock(). 7190Sstevel@tonic-gate * If so, just loop around and try again. 7200Sstevel@tonic-gate * dequeue_self() clears self->ul_sleepq. 7210Sstevel@tonic-gate */ 7224570Sraf if (qp == self->ul_sleepq) { 7230Sstevel@tonic-gate (void) dequeue_self(qp, self->ul_wchan); 7244570Sraf self->ul_writer = 0; 7254570Sraf } 7260Sstevel@tonic-gate queue_unlock(qp); 7270Sstevel@tonic-gate } 7280Sstevel@tonic-gate self->ul_critical--; 7290Sstevel@tonic-gate } 7300Sstevel@tonic-gate 7310Sstevel@tonic-gate /* 7320Sstevel@tonic-gate * Common code for calling the the ___lwp_mutex_timedlock() system call. 7330Sstevel@tonic-gate * Returns with mutex_owner and mutex_ownerpid set correctly. 7340Sstevel@tonic-gate */ 7354574Sraf static int 7360Sstevel@tonic-gate mutex_lock_kernel(mutex_t *mp, timespec_t *tsp, tdb_mutex_stats_t *msp) 7370Sstevel@tonic-gate { 7380Sstevel@tonic-gate ulwp_t *self = curthread; 7390Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 7404574Sraf int mtype = mp->mutex_type; 7410Sstevel@tonic-gate hrtime_t begin_sleep; 7424574Sraf int acquired; 7430Sstevel@tonic-gate int error; 7440Sstevel@tonic-gate 7450Sstevel@tonic-gate self->ul_sp = stkptr(); 7460Sstevel@tonic-gate self->ul_wchan = mp; 7470Sstevel@tonic-gate if (__td_event_report(self, TD_SLEEP, udp)) { 7480Sstevel@tonic-gate self->ul_td_evbuf.eventnum = TD_SLEEP; 7490Sstevel@tonic-gate self->ul_td_evbuf.eventdata = mp; 7500Sstevel@tonic-gate tdb_event(TD_SLEEP, udp); 7510Sstevel@tonic-gate } 7520Sstevel@tonic-gate if (msp) { 7530Sstevel@tonic-gate tdb_incr(msp->mutex_sleep); 7540Sstevel@tonic-gate begin_sleep = gethrtime(); 7550Sstevel@tonic-gate } 7560Sstevel@tonic-gate 7570Sstevel@tonic-gate DTRACE_PROBE1(plockstat, mutex__block, mp); 7580Sstevel@tonic-gate 7590Sstevel@tonic-gate for (;;) { 7604574Sraf /* 7614574Sraf * A return value of EOWNERDEAD or ELOCKUNMAPPED 7624574Sraf * means we successfully acquired the lock. 7634574Sraf */ 7644574Sraf if ((error = ___lwp_mutex_timedlock(mp, tsp)) != 0 && 7654574Sraf error != EOWNERDEAD && error != ELOCKUNMAPPED) { 7664574Sraf acquired = 0; 7670Sstevel@tonic-gate break; 7680Sstevel@tonic-gate } 7690Sstevel@tonic-gate 7704574Sraf if (mtype & USYNC_PROCESS) { 7710Sstevel@tonic-gate /* 7720Sstevel@tonic-gate * Defend against forkall(). We may be the child, 7730Sstevel@tonic-gate * in which case we don't actually own the mutex. 7740Sstevel@tonic-gate */ 7750Sstevel@tonic-gate enter_critical(self); 7760Sstevel@tonic-gate if (mp->mutex_ownerpid == udp->pid) { 7770Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 7780Sstevel@tonic-gate exit_critical(self); 7794574Sraf acquired = 1; 7800Sstevel@tonic-gate break; 7810Sstevel@tonic-gate } 7820Sstevel@tonic-gate exit_critical(self); 7830Sstevel@tonic-gate } else { 7840Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 7854574Sraf acquired = 1; 7860Sstevel@tonic-gate break; 7870Sstevel@tonic-gate } 7880Sstevel@tonic-gate } 7890Sstevel@tonic-gate if (msp) 7900Sstevel@tonic-gate msp->mutex_sleep_time += gethrtime() - begin_sleep; 7910Sstevel@tonic-gate self->ul_wchan = NULL; 7920Sstevel@tonic-gate self->ul_sp = 0; 7930Sstevel@tonic-gate 7944574Sraf if (acquired) { 7954574Sraf DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1); 7964574Sraf DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 7974574Sraf } else { 7984574Sraf DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0); 7994574Sraf DTRACE_PROBE2(plockstat, mutex__error, mp, error); 8004574Sraf } 8014574Sraf 8020Sstevel@tonic-gate return (error); 8030Sstevel@tonic-gate } 8040Sstevel@tonic-gate 8050Sstevel@tonic-gate /* 8060Sstevel@tonic-gate * Common code for calling the ___lwp_mutex_trylock() system call. 8070Sstevel@tonic-gate * Returns with mutex_owner and mutex_ownerpid set correctly. 8080Sstevel@tonic-gate */ 8090Sstevel@tonic-gate int 8100Sstevel@tonic-gate mutex_trylock_kernel(mutex_t *mp) 8110Sstevel@tonic-gate { 8120Sstevel@tonic-gate ulwp_t *self = curthread; 8130Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 8144574Sraf int mtype = mp->mutex_type; 8150Sstevel@tonic-gate int error; 8164574Sraf int acquired; 8170Sstevel@tonic-gate 8180Sstevel@tonic-gate for (;;) { 8194574Sraf /* 8204574Sraf * A return value of EOWNERDEAD or ELOCKUNMAPPED 8214574Sraf * means we successfully acquired the lock. 8224574Sraf */ 8234574Sraf if ((error = ___lwp_mutex_trylock(mp)) != 0 && 8244574Sraf error != EOWNERDEAD && error != ELOCKUNMAPPED) { 8254574Sraf acquired = 0; 8260Sstevel@tonic-gate break; 8270Sstevel@tonic-gate } 8280Sstevel@tonic-gate 8294574Sraf if (mtype & USYNC_PROCESS) { 8300Sstevel@tonic-gate /* 8310Sstevel@tonic-gate * Defend against forkall(). We may be the child, 8320Sstevel@tonic-gate * in which case we don't actually own the mutex. 8330Sstevel@tonic-gate */ 8340Sstevel@tonic-gate enter_critical(self); 8350Sstevel@tonic-gate if (mp->mutex_ownerpid == udp->pid) { 8360Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 8370Sstevel@tonic-gate exit_critical(self); 8384574Sraf acquired = 1; 8390Sstevel@tonic-gate break; 8400Sstevel@tonic-gate } 8410Sstevel@tonic-gate exit_critical(self); 8420Sstevel@tonic-gate } else { 8430Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 8444574Sraf acquired = 1; 8450Sstevel@tonic-gate break; 8460Sstevel@tonic-gate } 8470Sstevel@tonic-gate } 8480Sstevel@tonic-gate 8494574Sraf if (acquired) { 8504574Sraf DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 8514574Sraf } else if (error != EBUSY) { 8524574Sraf DTRACE_PROBE2(plockstat, mutex__error, mp, error); 8534574Sraf } 8544574Sraf 8550Sstevel@tonic-gate return (error); 8560Sstevel@tonic-gate } 8570Sstevel@tonic-gate 8580Sstevel@tonic-gate volatile sc_shared_t * 8590Sstevel@tonic-gate setup_schedctl(void) 8600Sstevel@tonic-gate { 8610Sstevel@tonic-gate ulwp_t *self = curthread; 8620Sstevel@tonic-gate volatile sc_shared_t *scp; 8630Sstevel@tonic-gate sc_shared_t *tmp; 8640Sstevel@tonic-gate 8650Sstevel@tonic-gate if ((scp = self->ul_schedctl) == NULL && /* no shared state yet */ 8660Sstevel@tonic-gate !self->ul_vfork && /* not a child of vfork() */ 8670Sstevel@tonic-gate !self->ul_schedctl_called) { /* haven't been called before */ 8680Sstevel@tonic-gate enter_critical(self); 8690Sstevel@tonic-gate self->ul_schedctl_called = &self->ul_uberdata->uberflags; 8700Sstevel@tonic-gate if ((tmp = __schedctl()) != (sc_shared_t *)(-1)) 8710Sstevel@tonic-gate self->ul_schedctl = scp = tmp; 8720Sstevel@tonic-gate exit_critical(self); 8730Sstevel@tonic-gate } 8740Sstevel@tonic-gate /* 8750Sstevel@tonic-gate * Unless the call to setup_schedctl() is surrounded 8760Sstevel@tonic-gate * by enter_critical()/exit_critical(), the address 8770Sstevel@tonic-gate * we are returning could be invalid due to a forkall() 8780Sstevel@tonic-gate * having occurred in another thread. 8790Sstevel@tonic-gate */ 8800Sstevel@tonic-gate return (scp); 8810Sstevel@tonic-gate } 8820Sstevel@tonic-gate 8830Sstevel@tonic-gate /* 8840Sstevel@tonic-gate * Interfaces from libsched, incorporated into libc. 8850Sstevel@tonic-gate * libsched.so.1 is now a filter library onto libc. 8860Sstevel@tonic-gate */ 8870Sstevel@tonic-gate #pragma weak schedctl_lookup = _schedctl_init 8880Sstevel@tonic-gate #pragma weak _schedctl_lookup = _schedctl_init 8890Sstevel@tonic-gate #pragma weak schedctl_init = _schedctl_init 8900Sstevel@tonic-gate schedctl_t * 8910Sstevel@tonic-gate _schedctl_init(void) 8920Sstevel@tonic-gate { 8930Sstevel@tonic-gate volatile sc_shared_t *scp = setup_schedctl(); 8940Sstevel@tonic-gate return ((scp == NULL)? NULL : (schedctl_t *)&scp->sc_preemptctl); 8950Sstevel@tonic-gate } 8960Sstevel@tonic-gate 8970Sstevel@tonic-gate #pragma weak schedctl_exit = _schedctl_exit 8980Sstevel@tonic-gate void 8990Sstevel@tonic-gate _schedctl_exit(void) 9000Sstevel@tonic-gate { 9010Sstevel@tonic-gate } 9020Sstevel@tonic-gate 9030Sstevel@tonic-gate /* 9040Sstevel@tonic-gate * Contract private interface for java. 9050Sstevel@tonic-gate * Set up the schedctl data if it doesn't exist yet. 9060Sstevel@tonic-gate * Return a pointer to the pointer to the schedctl data. 9070Sstevel@tonic-gate */ 9080Sstevel@tonic-gate volatile sc_shared_t *volatile * 9090Sstevel@tonic-gate _thr_schedctl(void) 9100Sstevel@tonic-gate { 9110Sstevel@tonic-gate ulwp_t *self = curthread; 9120Sstevel@tonic-gate volatile sc_shared_t *volatile *ptr; 9130Sstevel@tonic-gate 9140Sstevel@tonic-gate if (self->ul_vfork) 9150Sstevel@tonic-gate return (NULL); 9160Sstevel@tonic-gate if (*(ptr = &self->ul_schedctl) == NULL) 9170Sstevel@tonic-gate (void) setup_schedctl(); 9180Sstevel@tonic-gate return (ptr); 9190Sstevel@tonic-gate } 9200Sstevel@tonic-gate 9210Sstevel@tonic-gate /* 9220Sstevel@tonic-gate * Block signals and attempt to block preemption. 9230Sstevel@tonic-gate * no_preempt()/preempt() must be used in pairs but can be nested. 9240Sstevel@tonic-gate */ 9250Sstevel@tonic-gate void 9260Sstevel@tonic-gate no_preempt(ulwp_t *self) 9270Sstevel@tonic-gate { 9280Sstevel@tonic-gate volatile sc_shared_t *scp; 9290Sstevel@tonic-gate 9300Sstevel@tonic-gate if (self->ul_preempt++ == 0) { 9310Sstevel@tonic-gate enter_critical(self); 9320Sstevel@tonic-gate if ((scp = self->ul_schedctl) != NULL || 9330Sstevel@tonic-gate (scp = setup_schedctl()) != NULL) { 9340Sstevel@tonic-gate /* 9350Sstevel@tonic-gate * Save the pre-existing preempt value. 9360Sstevel@tonic-gate */ 9370Sstevel@tonic-gate self->ul_savpreempt = scp->sc_preemptctl.sc_nopreempt; 9380Sstevel@tonic-gate scp->sc_preemptctl.sc_nopreempt = 1; 9390Sstevel@tonic-gate } 9400Sstevel@tonic-gate } 9410Sstevel@tonic-gate } 9420Sstevel@tonic-gate 9430Sstevel@tonic-gate /* 9440Sstevel@tonic-gate * Undo the effects of no_preempt(). 9450Sstevel@tonic-gate */ 9460Sstevel@tonic-gate void 9470Sstevel@tonic-gate preempt(ulwp_t *self) 9480Sstevel@tonic-gate { 9490Sstevel@tonic-gate volatile sc_shared_t *scp; 9500Sstevel@tonic-gate 9510Sstevel@tonic-gate ASSERT(self->ul_preempt > 0); 9520Sstevel@tonic-gate if (--self->ul_preempt == 0) { 9530Sstevel@tonic-gate if ((scp = self->ul_schedctl) != NULL) { 9540Sstevel@tonic-gate /* 9550Sstevel@tonic-gate * Restore the pre-existing preempt value. 9560Sstevel@tonic-gate */ 9570Sstevel@tonic-gate scp->sc_preemptctl.sc_nopreempt = self->ul_savpreempt; 9580Sstevel@tonic-gate if (scp->sc_preemptctl.sc_yield && 9590Sstevel@tonic-gate scp->sc_preemptctl.sc_nopreempt == 0) { 9600Sstevel@tonic-gate lwp_yield(); 9610Sstevel@tonic-gate if (scp->sc_preemptctl.sc_yield) { 9620Sstevel@tonic-gate /* 9630Sstevel@tonic-gate * Shouldn't happen. This is either 9640Sstevel@tonic-gate * a race condition or the thread 9650Sstevel@tonic-gate * just entered the real-time class. 9660Sstevel@tonic-gate */ 9670Sstevel@tonic-gate lwp_yield(); 9680Sstevel@tonic-gate scp->sc_preemptctl.sc_yield = 0; 9690Sstevel@tonic-gate } 9700Sstevel@tonic-gate } 9710Sstevel@tonic-gate } 9720Sstevel@tonic-gate exit_critical(self); 9730Sstevel@tonic-gate } 9740Sstevel@tonic-gate } 9750Sstevel@tonic-gate 9760Sstevel@tonic-gate /* 9770Sstevel@tonic-gate * If a call to preempt() would cause the current thread to yield or to 9780Sstevel@tonic-gate * take deferred actions in exit_critical(), then unpark the specified 9790Sstevel@tonic-gate * lwp so it can run while we delay. Return the original lwpid if the 9800Sstevel@tonic-gate * unpark was not performed, else return zero. The tests are a repeat 9810Sstevel@tonic-gate * of some of the tests in preempt(), above. This is a statistical 9820Sstevel@tonic-gate * optimization solely for cond_sleep_queue(), below. 9830Sstevel@tonic-gate */ 9840Sstevel@tonic-gate static lwpid_t 9850Sstevel@tonic-gate preempt_unpark(ulwp_t *self, lwpid_t lwpid) 9860Sstevel@tonic-gate { 9870Sstevel@tonic-gate volatile sc_shared_t *scp = self->ul_schedctl; 9880Sstevel@tonic-gate 9890Sstevel@tonic-gate ASSERT(self->ul_preempt == 1 && self->ul_critical > 0); 9900Sstevel@tonic-gate if ((scp != NULL && scp->sc_preemptctl.sc_yield) || 9910Sstevel@tonic-gate (self->ul_curplease && self->ul_critical == 1)) { 9920Sstevel@tonic-gate (void) __lwp_unpark(lwpid); 9930Sstevel@tonic-gate lwpid = 0; 9940Sstevel@tonic-gate } 9950Sstevel@tonic-gate return (lwpid); 9960Sstevel@tonic-gate } 9970Sstevel@tonic-gate 9980Sstevel@tonic-gate /* 999*4613Sraf * Spin for a while (if 'tryhard' is true), trying to grab the lock. 10000Sstevel@tonic-gate * If this fails, return EBUSY and let the caller deal with it. 10010Sstevel@tonic-gate * If this succeeds, return 0 with mutex_owner set to curthread. 10020Sstevel@tonic-gate */ 10034574Sraf static int 1004*4613Sraf mutex_trylock_adaptive(mutex_t *mp, int tryhard) 10050Sstevel@tonic-gate { 10060Sstevel@tonic-gate ulwp_t *self = curthread; 10074574Sraf int error = EBUSY; 10080Sstevel@tonic-gate ulwp_t *ulwp; 10090Sstevel@tonic-gate volatile sc_shared_t *scp; 10100Sstevel@tonic-gate volatile uint8_t *lockp; 10110Sstevel@tonic-gate volatile uint64_t *ownerp; 10124574Sraf int count; 10134574Sraf int max; 10144574Sraf 10154574Sraf ASSERT(!(mp->mutex_type & USYNC_PROCESS)); 10164574Sraf 10174574Sraf if (MUTEX_OWNER(mp) == self) 10180Sstevel@tonic-gate return (EBUSY); 10190Sstevel@tonic-gate 10204574Sraf /* short-cut, not definitive (see below) */ 10214574Sraf if (mp->mutex_flag & LOCK_NOTRECOVERABLE) { 10224574Sraf ASSERT(mp->mutex_type & LOCK_ROBUST); 10234574Sraf DTRACE_PROBE2(plockstat, mutex__error, mp, ENOTRECOVERABLE); 10244574Sraf return (ENOTRECOVERABLE); 10254574Sraf } 10264574Sraf 1027*4613Sraf if (!tryhard || 1028*4613Sraf (max = self->ul_adaptive_spin) == 0 || 10294574Sraf mp->mutex_spinners >= self->ul_max_spinners) 10304574Sraf max = 1; /* try at least once */ 10314574Sraf 10324574Sraf DTRACE_PROBE1(plockstat, mutex__spin, mp); 10334574Sraf 10340Sstevel@tonic-gate lockp = (volatile uint8_t *)&mp->mutex_lockw; 10350Sstevel@tonic-gate ownerp = (volatile uint64_t *)&mp->mutex_owner; 10360Sstevel@tonic-gate /* 10370Sstevel@tonic-gate * This spin loop is unfair to lwps that have already dropped into 10380Sstevel@tonic-gate * the kernel to sleep. They will starve on a highly-contended mutex. 10390Sstevel@tonic-gate * This is just too bad. The adaptive spin algorithm is intended 10400Sstevel@tonic-gate * to allow programs with highly-contended locks (that is, broken 10410Sstevel@tonic-gate * programs) to execute with reasonable speed despite their contention. 10420Sstevel@tonic-gate * Being fair would reduce the speed of such programs and well-written 10430Sstevel@tonic-gate * programs will not suffer in any case. 10440Sstevel@tonic-gate */ 10450Sstevel@tonic-gate enter_critical(self); /* protects ul_schedctl */ 10464570Sraf atomic_inc_32(&mp->mutex_spinners); 10474574Sraf for (count = 1; count <= max; count++) { 10480Sstevel@tonic-gate if (*lockp == 0 && set_lock_byte(lockp) == 0) { 10490Sstevel@tonic-gate *ownerp = (uintptr_t)self; 10504574Sraf error = 0; 10514574Sraf break; 10520Sstevel@tonic-gate } 10530Sstevel@tonic-gate SMT_PAUSE(); 10540Sstevel@tonic-gate /* 10550Sstevel@tonic-gate * Stop spinning if the mutex owner is not running on 10560Sstevel@tonic-gate * a processor; it will not drop the lock any time soon 10570Sstevel@tonic-gate * and we would just be wasting time to keep spinning. 10580Sstevel@tonic-gate * 10590Sstevel@tonic-gate * Note that we are looking at another thread (ulwp_t) 10600Sstevel@tonic-gate * without ensuring that the other thread does not exit. 10610Sstevel@tonic-gate * The scheme relies on ulwp_t structures never being 10620Sstevel@tonic-gate * deallocated by the library (the library employs a free 10630Sstevel@tonic-gate * list of ulwp_t structs that are reused when new threads 10640Sstevel@tonic-gate * are created) and on schedctl shared memory never being 10650Sstevel@tonic-gate * deallocated once created via __schedctl(). 10660Sstevel@tonic-gate * 10670Sstevel@tonic-gate * Thus, the worst that can happen when the spinning thread 10680Sstevel@tonic-gate * looks at the owner's schedctl data is that it is looking 10690Sstevel@tonic-gate * at some other thread's schedctl data. This almost never 10700Sstevel@tonic-gate * happens and is benign when it does. 10710Sstevel@tonic-gate */ 10720Sstevel@tonic-gate if ((ulwp = (ulwp_t *)(uintptr_t)*ownerp) != NULL && 10730Sstevel@tonic-gate ((scp = ulwp->ul_schedctl) == NULL || 10740Sstevel@tonic-gate scp->sc_state != SC_ONPROC)) 10750Sstevel@tonic-gate break; 10760Sstevel@tonic-gate } 10774570Sraf atomic_dec_32(&mp->mutex_spinners); 10780Sstevel@tonic-gate exit_critical(self); 10790Sstevel@tonic-gate 10804574Sraf if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) { 10814574Sraf ASSERT(mp->mutex_type & LOCK_ROBUST); 10824574Sraf /* 10834574Sraf * We shouldn't own the mutex; clear the lock. 10844574Sraf */ 10854574Sraf mp->mutex_owner = 0; 10864574Sraf if (atomic_swap_32(&mp->mutex_lockword, 0) & WAITERMASK) 10874574Sraf mutex_wakeup_all(mp); 10884574Sraf error = ENOTRECOVERABLE; 10894574Sraf } 10904574Sraf 10914574Sraf if (error) { 10924574Sraf DTRACE_PROBE2(plockstat, mutex__spun, 0, count); 10934574Sraf if (error != EBUSY) { 10944574Sraf DTRACE_PROBE2(plockstat, mutex__error, mp, error); 10954574Sraf } 10964574Sraf } else { 10974574Sraf DTRACE_PROBE2(plockstat, mutex__spun, 1, count); 10984574Sraf DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count); 10994574Sraf if (mp->mutex_flag & LOCK_OWNERDEAD) { 11004574Sraf ASSERT(mp->mutex_type & LOCK_ROBUST); 11014574Sraf error = EOWNERDEAD; 11024574Sraf } 11034574Sraf } 11044574Sraf 11054574Sraf return (error); 11060Sstevel@tonic-gate } 11070Sstevel@tonic-gate 11080Sstevel@tonic-gate /* 11090Sstevel@tonic-gate * Same as mutex_trylock_adaptive(), except specifically for queue locks. 11100Sstevel@tonic-gate * The owner field is not set here; the caller (spin_lock_set()) sets it. 11110Sstevel@tonic-gate */ 11124574Sraf static int 11130Sstevel@tonic-gate mutex_queuelock_adaptive(mutex_t *mp) 11140Sstevel@tonic-gate { 11150Sstevel@tonic-gate ulwp_t *ulwp; 11160Sstevel@tonic-gate volatile sc_shared_t *scp; 11170Sstevel@tonic-gate volatile uint8_t *lockp; 11180Sstevel@tonic-gate volatile uint64_t *ownerp; 11190Sstevel@tonic-gate int count = curthread->ul_queue_spin; 11200Sstevel@tonic-gate 11210Sstevel@tonic-gate ASSERT(mp->mutex_type == USYNC_THREAD); 11220Sstevel@tonic-gate 11230Sstevel@tonic-gate if (count == 0) 11240Sstevel@tonic-gate return (EBUSY); 11250Sstevel@tonic-gate 11260Sstevel@tonic-gate lockp = (volatile uint8_t *)&mp->mutex_lockw; 11270Sstevel@tonic-gate ownerp = (volatile uint64_t *)&mp->mutex_owner; 11280Sstevel@tonic-gate while (--count >= 0) { 11290Sstevel@tonic-gate if (*lockp == 0 && set_lock_byte(lockp) == 0) 11300Sstevel@tonic-gate return (0); 11310Sstevel@tonic-gate SMT_PAUSE(); 11320Sstevel@tonic-gate if ((ulwp = (ulwp_t *)(uintptr_t)*ownerp) != NULL && 11330Sstevel@tonic-gate ((scp = ulwp->ul_schedctl) == NULL || 11340Sstevel@tonic-gate scp->sc_state != SC_ONPROC)) 11350Sstevel@tonic-gate break; 11360Sstevel@tonic-gate } 11370Sstevel@tonic-gate 11380Sstevel@tonic-gate return (EBUSY); 11390Sstevel@tonic-gate } 11400Sstevel@tonic-gate 11410Sstevel@tonic-gate /* 11420Sstevel@tonic-gate * Like mutex_trylock_adaptive(), but for process-shared mutexes. 1143*4613Sraf * Spin for a while (if 'tryhard' is true), trying to grab the lock. 11440Sstevel@tonic-gate * If this fails, return EBUSY and let the caller deal with it. 11450Sstevel@tonic-gate * If this succeeds, return 0 with mutex_owner set to curthread 11460Sstevel@tonic-gate * and mutex_ownerpid set to the current pid. 11470Sstevel@tonic-gate */ 11484574Sraf static int 1149*4613Sraf mutex_trylock_process(mutex_t *mp, int tryhard) 11500Sstevel@tonic-gate { 11510Sstevel@tonic-gate ulwp_t *self = curthread; 11524574Sraf int error = EBUSY; 11530Sstevel@tonic-gate volatile uint8_t *lockp; 11544574Sraf int count; 11554574Sraf int max; 11564574Sraf 11574574Sraf ASSERT(mp->mutex_type & USYNC_PROCESS); 11584574Sraf 11594574Sraf if (shared_mutex_held(mp)) 11600Sstevel@tonic-gate return (EBUSY); 11610Sstevel@tonic-gate 11624574Sraf /* short-cut, not definitive (see below) */ 11634574Sraf if (mp->mutex_flag & LOCK_NOTRECOVERABLE) { 11644574Sraf ASSERT(mp->mutex_type & LOCK_ROBUST); 11654574Sraf DTRACE_PROBE2(plockstat, mutex__error, mp, ENOTRECOVERABLE); 11664574Sraf return (ENOTRECOVERABLE); 11674574Sraf } 11684574Sraf 11694574Sraf if (ncpus == 0) 11704574Sraf ncpus = (int)_sysconf(_SC_NPROCESSORS_ONLN); 1171*4613Sraf max = (tryhard && ncpus > 1)? self->ul_adaptive_spin : 1; 11724574Sraf if (max == 0) 11734574Sraf max = 1; /* try at least once */ 11744574Sraf 11754574Sraf DTRACE_PROBE1(plockstat, mutex__spin, mp); 11764574Sraf 11770Sstevel@tonic-gate lockp = (volatile uint8_t *)&mp->mutex_lockw; 11780Sstevel@tonic-gate /* 11790Sstevel@tonic-gate * This is a process-shared mutex. 11800Sstevel@tonic-gate * We cannot know if the owner is running on a processor. 11810Sstevel@tonic-gate * We just spin and hope that it is on a processor. 11820Sstevel@tonic-gate */ 11834574Sraf enter_critical(self); 11844574Sraf for (count = 1; count <= max; count++) { 11854574Sraf if (*lockp == 0 && set_lock_byte(lockp) == 0) { 11864574Sraf mp->mutex_owner = (uintptr_t)self; 11874574Sraf mp->mutex_ownerpid = self->ul_uberdata->pid; 11884574Sraf error = 0; 11894574Sraf break; 11904574Sraf } 11914574Sraf SMT_PAUSE(); 11924574Sraf } 11934574Sraf exit_critical(self); 11944574Sraf 11954574Sraf if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) { 11964574Sraf ASSERT(mp->mutex_type & LOCK_ROBUST); 11974574Sraf /* 11984574Sraf * We shouldn't own the mutex; clear the lock. 11994574Sraf */ 12004574Sraf mp->mutex_owner = 0; 12014574Sraf mp->mutex_ownerpid = 0; 12024574Sraf if (atomic_swap_32(&mp->mutex_lockword, 0) & WAITERMASK) { 12034574Sraf no_preempt(self); 12044574Sraf (void) ___lwp_mutex_wakeup(mp, 1); 12054574Sraf preempt(self); 12060Sstevel@tonic-gate } 12074574Sraf error = ENOTRECOVERABLE; 12080Sstevel@tonic-gate } 12090Sstevel@tonic-gate 12104574Sraf if (error) { 12114574Sraf DTRACE_PROBE2(plockstat, mutex__spun, 0, count); 12124574Sraf if (error != EBUSY) { 12134574Sraf DTRACE_PROBE2(plockstat, mutex__error, mp, error); 12144574Sraf } 12154574Sraf } else { 12164574Sraf DTRACE_PROBE2(plockstat, mutex__spun, 1, count); 12174574Sraf DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count); 12184574Sraf if (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 12194574Sraf ASSERT(mp->mutex_type & LOCK_ROBUST); 12204574Sraf if (mp->mutex_flag & LOCK_OWNERDEAD) 12214574Sraf error = EOWNERDEAD; 12224574Sraf else if (mp->mutex_type & USYNC_PROCESS_ROBUST) 12234574Sraf error = ELOCKUNMAPPED; 12244574Sraf else 12254574Sraf error = EOWNERDEAD; 12264574Sraf } 12274574Sraf } 12284574Sraf 12294574Sraf return (error); 12300Sstevel@tonic-gate } 12310Sstevel@tonic-gate 12320Sstevel@tonic-gate /* 12330Sstevel@tonic-gate * Mutex wakeup code for releasing a USYNC_THREAD mutex. 12340Sstevel@tonic-gate * Returns the lwpid of the thread that was dequeued, if any. 12350Sstevel@tonic-gate * The caller of mutex_wakeup() must call __lwp_unpark(lwpid) 12360Sstevel@tonic-gate * to wake up the specified lwp. 12370Sstevel@tonic-gate */ 12384574Sraf static lwpid_t 12390Sstevel@tonic-gate mutex_wakeup(mutex_t *mp) 12400Sstevel@tonic-gate { 12410Sstevel@tonic-gate lwpid_t lwpid = 0; 12420Sstevel@tonic-gate queue_head_t *qp; 12430Sstevel@tonic-gate ulwp_t *ulwp; 12440Sstevel@tonic-gate int more; 12450Sstevel@tonic-gate 12460Sstevel@tonic-gate /* 12470Sstevel@tonic-gate * Dequeue a waiter from the sleep queue. Don't touch the mutex 12480Sstevel@tonic-gate * waiters bit if no one was found on the queue because the mutex 12490Sstevel@tonic-gate * might have been deallocated or reallocated for another purpose. 12500Sstevel@tonic-gate */ 12510Sstevel@tonic-gate qp = queue_lock(mp, MX); 12520Sstevel@tonic-gate if ((ulwp = dequeue(qp, mp, &more)) != NULL) { 12530Sstevel@tonic-gate lwpid = ulwp->ul_lwpid; 12540Sstevel@tonic-gate mp->mutex_waiters = (more? 1 : 0); 12550Sstevel@tonic-gate } 12560Sstevel@tonic-gate queue_unlock(qp); 12570Sstevel@tonic-gate return (lwpid); 12580Sstevel@tonic-gate } 12590Sstevel@tonic-gate 12600Sstevel@tonic-gate /* 12614574Sraf * Mutex wakeup code for releasing all waiters on a USYNC_THREAD mutex. 12624574Sraf */ 12634574Sraf static void 12644574Sraf mutex_wakeup_all(mutex_t *mp) 12654574Sraf { 12664574Sraf queue_head_t *qp; 12674574Sraf int nlwpid = 0; 12684574Sraf int maxlwps = MAXLWPS; 12694574Sraf ulwp_t **ulwpp; 12704574Sraf ulwp_t *ulwp; 12714574Sraf ulwp_t *prev = NULL; 12724574Sraf lwpid_t buffer[MAXLWPS]; 12734574Sraf lwpid_t *lwpid = buffer; 12744574Sraf 12754574Sraf /* 12764574Sraf * Walk the list of waiters and prepare to wake up all of them. 12774574Sraf * The waiters flag has already been cleared from the mutex. 12784574Sraf * 12794574Sraf * We keep track of lwpids that are to be unparked in lwpid[]. 12804574Sraf * __lwp_unpark_all() is called to unpark all of them after 12814574Sraf * they have been removed from the sleep queue and the sleep 12824574Sraf * queue lock has been dropped. If we run out of space in our 12834574Sraf * on-stack buffer, we need to allocate more but we can't call 12844574Sraf * lmalloc() because we are holding a queue lock when the overflow 12854574Sraf * occurs and lmalloc() acquires a lock. We can't use alloca() 12864574Sraf * either because the application may have allocated a small 12874574Sraf * stack and we don't want to overrun the stack. So we call 12884574Sraf * alloc_lwpids() to allocate a bigger buffer using the mmap() 12894574Sraf * system call directly since that path acquires no locks. 12904574Sraf */ 12914574Sraf qp = queue_lock(mp, MX); 12924574Sraf ulwpp = &qp->qh_head; 12934574Sraf while ((ulwp = *ulwpp) != NULL) { 12944574Sraf if (ulwp->ul_wchan != mp) { 12954574Sraf prev = ulwp; 12964574Sraf ulwpp = &ulwp->ul_link; 12974574Sraf } else { 12984574Sraf if (nlwpid == maxlwps) 12994574Sraf lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps); 13004574Sraf (void) queue_unlink(qp, ulwpp, prev); 13014574Sraf lwpid[nlwpid++] = ulwp->ul_lwpid; 13024574Sraf } 13034574Sraf } 13044574Sraf mp->mutex_waiters = 0; 13054574Sraf 13064574Sraf if (nlwpid == 0) { 13074574Sraf queue_unlock(qp); 13084574Sraf } else { 13094574Sraf no_preempt(curthread); 13104574Sraf queue_unlock(qp); 13114574Sraf if (nlwpid == 1) 13124574Sraf (void) __lwp_unpark(lwpid[0]); 13134574Sraf else 13144574Sraf (void) __lwp_unpark_all(lwpid, nlwpid); 13154574Sraf preempt(curthread); 13164574Sraf } 13174574Sraf 13184574Sraf if (lwpid != buffer) 13194574Sraf (void) _private_munmap(lwpid, maxlwps * sizeof (lwpid_t)); 13204574Sraf } 13214574Sraf 13224574Sraf /* 13230Sstevel@tonic-gate * Spin for a while, testing to see if the lock has been grabbed. 13240Sstevel@tonic-gate * If this fails, call mutex_wakeup() to release a waiter. 13250Sstevel@tonic-gate */ 13264574Sraf static lwpid_t 13274574Sraf mutex_unlock_queue(mutex_t *mp, int release_all) 13280Sstevel@tonic-gate { 13290Sstevel@tonic-gate ulwp_t *self = curthread; 13300Sstevel@tonic-gate uint32_t *lockw = &mp->mutex_lockword; 13310Sstevel@tonic-gate lwpid_t lwpid; 13320Sstevel@tonic-gate volatile uint8_t *lockp; 13330Sstevel@tonic-gate volatile uint32_t *spinp; 13340Sstevel@tonic-gate int count; 13350Sstevel@tonic-gate 13360Sstevel@tonic-gate /* 13370Sstevel@tonic-gate * We use the swap primitive to clear the lock, but we must 13380Sstevel@tonic-gate * atomically retain the waiters bit for the remainder of this 13390Sstevel@tonic-gate * code to work. We first check to see if the waiters bit is 13400Sstevel@tonic-gate * set and if so clear the lock by swapping in a word containing 13410Sstevel@tonic-gate * only the waiters bit. This could produce a false positive test 13420Sstevel@tonic-gate * for whether there are waiters that need to be waked up, but 13430Sstevel@tonic-gate * this just causes an extra call to mutex_wakeup() to do nothing. 13440Sstevel@tonic-gate * The opposite case is more delicate: If there are no waiters, 13450Sstevel@tonic-gate * we swap in a zero lock byte and a zero waiters bit. The result 13460Sstevel@tonic-gate * of the swap could indicate that there really was a waiter so in 13470Sstevel@tonic-gate * this case we go directly to mutex_wakeup() without performing 13480Sstevel@tonic-gate * any of the adaptive code because the waiter bit has been cleared 13490Sstevel@tonic-gate * and the adaptive code is unreliable in this case. 13500Sstevel@tonic-gate */ 13514574Sraf if (release_all || !(*lockw & WAITERMASK)) { 13520Sstevel@tonic-gate mp->mutex_owner = 0; 13530Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 13544570Sraf if (!(atomic_swap_32(lockw, 0) & WAITERMASK)) 13554574Sraf return (0); /* no waiters */ 13560Sstevel@tonic-gate no_preempt(self); /* ensure a prompt wakeup */ 13570Sstevel@tonic-gate } else { 13580Sstevel@tonic-gate no_preempt(self); /* ensure a prompt wakeup */ 13590Sstevel@tonic-gate lockp = (volatile uint8_t *)&mp->mutex_lockw; 13600Sstevel@tonic-gate spinp = (volatile uint32_t *)&mp->mutex_spinners; 13610Sstevel@tonic-gate mp->mutex_owner = 0; 13620Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 13634570Sraf /* clear lock, retain waiter */ 13644570Sraf (void) atomic_swap_32(lockw, WAITER); 13650Sstevel@tonic-gate 13660Sstevel@tonic-gate /* 13670Sstevel@tonic-gate * We spin here fewer times than mutex_trylock_adaptive(). 13680Sstevel@tonic-gate * We are trying to balance two conflicting goals: 13690Sstevel@tonic-gate * 1. Avoid waking up anyone if a spinning thread 13700Sstevel@tonic-gate * grabs the lock. 13710Sstevel@tonic-gate * 2. Wake up a sleeping thread promptly to get on 13720Sstevel@tonic-gate * with useful work. 13730Sstevel@tonic-gate * We don't spin at all if there is no acquiring spinner; 13740Sstevel@tonic-gate * (mp->mutex_spinners is non-zero if there are spinners). 13750Sstevel@tonic-gate */ 13760Sstevel@tonic-gate for (count = self->ul_release_spin; 13770Sstevel@tonic-gate *spinp && count > 0; count--) { 13780Sstevel@tonic-gate /* 13790Sstevel@tonic-gate * There is a waiter that we will have to wake 13800Sstevel@tonic-gate * up unless someone else grabs the lock while 13810Sstevel@tonic-gate * we are busy spinning. Like the spin loop in 13820Sstevel@tonic-gate * mutex_trylock_adaptive(), this spin loop is 13830Sstevel@tonic-gate * unfair to lwps that have already dropped into 13840Sstevel@tonic-gate * the kernel to sleep. They will starve on a 13850Sstevel@tonic-gate * highly-contended mutex. Too bad. 13860Sstevel@tonic-gate */ 13870Sstevel@tonic-gate if (*lockp != 0) { /* somebody grabbed the lock */ 13880Sstevel@tonic-gate preempt(self); 13890Sstevel@tonic-gate return (0); 13900Sstevel@tonic-gate } 13910Sstevel@tonic-gate SMT_PAUSE(); 13920Sstevel@tonic-gate } 13930Sstevel@tonic-gate 13940Sstevel@tonic-gate /* 13950Sstevel@tonic-gate * No one grabbed the lock. 13960Sstevel@tonic-gate * Wake up some lwp that is waiting for it. 13970Sstevel@tonic-gate */ 13980Sstevel@tonic-gate mp->mutex_waiters = 0; 13994574Sraf } 14004574Sraf 14014574Sraf if (release_all) { 14024574Sraf mutex_wakeup_all(mp); 14034574Sraf lwpid = 0; 14044574Sraf } else { 14050Sstevel@tonic-gate lwpid = mutex_wakeup(mp); 14060Sstevel@tonic-gate } 14070Sstevel@tonic-gate if (lwpid == 0) 14080Sstevel@tonic-gate preempt(self); 14090Sstevel@tonic-gate return (lwpid); 14100Sstevel@tonic-gate } 14110Sstevel@tonic-gate 14120Sstevel@tonic-gate /* 14130Sstevel@tonic-gate * Like mutex_unlock_queue(), but for process-shared mutexes. 14140Sstevel@tonic-gate * We tested the waiters field before calling here and it was non-zero. 14150Sstevel@tonic-gate */ 14164574Sraf static void 14174574Sraf mutex_unlock_process(mutex_t *mp, int release_all) 14180Sstevel@tonic-gate { 14190Sstevel@tonic-gate ulwp_t *self = curthread; 14200Sstevel@tonic-gate int count; 14210Sstevel@tonic-gate volatile uint8_t *lockp; 14220Sstevel@tonic-gate 14230Sstevel@tonic-gate /* 14240Sstevel@tonic-gate * See the comments in mutex_unlock_queue(), above. 14250Sstevel@tonic-gate */ 14264574Sraf if (ncpus == 0) 14274574Sraf ncpus = (int)_sysconf(_SC_NPROCESSORS_ONLN); 14284574Sraf count = (ncpus > 1)? self->ul_release_spin : 0; 14290Sstevel@tonic-gate no_preempt(self); 14300Sstevel@tonic-gate mp->mutex_owner = 0; 14310Sstevel@tonic-gate mp->mutex_ownerpid = 0; 14320Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 14334574Sraf if (release_all || count == 0) { 14340Sstevel@tonic-gate /* clear lock, test waiter */ 14354570Sraf if (!(atomic_swap_32(&mp->mutex_lockword, 0) & WAITERMASK)) { 14360Sstevel@tonic-gate /* no waiters now */ 14370Sstevel@tonic-gate preempt(self); 14380Sstevel@tonic-gate return; 14390Sstevel@tonic-gate } 14400Sstevel@tonic-gate } else { 14410Sstevel@tonic-gate /* clear lock, retain waiter */ 14424570Sraf (void) atomic_swap_32(&mp->mutex_lockword, WAITER); 14430Sstevel@tonic-gate lockp = (volatile uint8_t *)&mp->mutex_lockw; 14440Sstevel@tonic-gate while (--count >= 0) { 14450Sstevel@tonic-gate if (*lockp != 0) { 14460Sstevel@tonic-gate /* somebody grabbed the lock */ 14470Sstevel@tonic-gate preempt(self); 14480Sstevel@tonic-gate return; 14490Sstevel@tonic-gate } 14500Sstevel@tonic-gate SMT_PAUSE(); 14510Sstevel@tonic-gate } 14520Sstevel@tonic-gate /* 14530Sstevel@tonic-gate * We must clear the waiters field before going 14540Sstevel@tonic-gate * to the kernel, else it could remain set forever. 14550Sstevel@tonic-gate */ 14560Sstevel@tonic-gate mp->mutex_waiters = 0; 14570Sstevel@tonic-gate } 14584574Sraf (void) ___lwp_mutex_wakeup(mp, release_all); 14590Sstevel@tonic-gate preempt(self); 14600Sstevel@tonic-gate } 14610Sstevel@tonic-gate 14620Sstevel@tonic-gate /* 14630Sstevel@tonic-gate * Return the real priority of a thread. 14640Sstevel@tonic-gate */ 14650Sstevel@tonic-gate int 14660Sstevel@tonic-gate real_priority(ulwp_t *ulwp) 14670Sstevel@tonic-gate { 14680Sstevel@tonic-gate if (ulwp->ul_epri == 0) 14690Sstevel@tonic-gate return (ulwp->ul_mappedpri? ulwp->ul_mappedpri : ulwp->ul_pri); 14700Sstevel@tonic-gate return (ulwp->ul_emappedpri? ulwp->ul_emappedpri : ulwp->ul_epri); 14710Sstevel@tonic-gate } 14720Sstevel@tonic-gate 14730Sstevel@tonic-gate void 14740Sstevel@tonic-gate stall(void) 14750Sstevel@tonic-gate { 14760Sstevel@tonic-gate for (;;) 14770Sstevel@tonic-gate (void) mutex_lock_kernel(&stall_mutex, NULL, NULL); 14780Sstevel@tonic-gate } 14790Sstevel@tonic-gate 14800Sstevel@tonic-gate /* 14810Sstevel@tonic-gate * Acquire a USYNC_THREAD mutex via user-level sleep queues. 14820Sstevel@tonic-gate * We failed set_lock_byte(&mp->mutex_lockw) before coming here. 14834574Sraf * If successful, returns with mutex_owner set correctly. 14840Sstevel@tonic-gate */ 14850Sstevel@tonic-gate int 14860Sstevel@tonic-gate mutex_lock_queue(ulwp_t *self, tdb_mutex_stats_t *msp, mutex_t *mp, 14870Sstevel@tonic-gate timespec_t *tsp) 14880Sstevel@tonic-gate { 14890Sstevel@tonic-gate uberdata_t *udp = curthread->ul_uberdata; 14900Sstevel@tonic-gate queue_head_t *qp; 14910Sstevel@tonic-gate hrtime_t begin_sleep; 14920Sstevel@tonic-gate int error = 0; 14930Sstevel@tonic-gate 14940Sstevel@tonic-gate self->ul_sp = stkptr(); 14950Sstevel@tonic-gate if (__td_event_report(self, TD_SLEEP, udp)) { 14960Sstevel@tonic-gate self->ul_wchan = mp; 14970Sstevel@tonic-gate self->ul_td_evbuf.eventnum = TD_SLEEP; 14980Sstevel@tonic-gate self->ul_td_evbuf.eventdata = mp; 14990Sstevel@tonic-gate tdb_event(TD_SLEEP, udp); 15000Sstevel@tonic-gate } 15010Sstevel@tonic-gate if (msp) { 15020Sstevel@tonic-gate tdb_incr(msp->mutex_sleep); 15030Sstevel@tonic-gate begin_sleep = gethrtime(); 15040Sstevel@tonic-gate } 15050Sstevel@tonic-gate 15060Sstevel@tonic-gate DTRACE_PROBE1(plockstat, mutex__block, mp); 15070Sstevel@tonic-gate 15080Sstevel@tonic-gate /* 15090Sstevel@tonic-gate * Put ourself on the sleep queue, and while we are 15100Sstevel@tonic-gate * unable to grab the lock, go park in the kernel. 15110Sstevel@tonic-gate * Take ourself off the sleep queue after we acquire the lock. 15120Sstevel@tonic-gate * The waiter bit can be set/cleared only while holding the queue lock. 15130Sstevel@tonic-gate */ 15140Sstevel@tonic-gate qp = queue_lock(mp, MX); 15150Sstevel@tonic-gate enqueue(qp, self, mp, MX); 15160Sstevel@tonic-gate mp->mutex_waiters = 1; 15170Sstevel@tonic-gate for (;;) { 15180Sstevel@tonic-gate if (set_lock_byte(&mp->mutex_lockw) == 0) { 15190Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 15200Sstevel@tonic-gate mp->mutex_waiters = dequeue_self(qp, mp); 15210Sstevel@tonic-gate break; 15220Sstevel@tonic-gate } 15230Sstevel@tonic-gate set_parking_flag(self, 1); 15240Sstevel@tonic-gate queue_unlock(qp); 15250Sstevel@tonic-gate /* 15260Sstevel@tonic-gate * __lwp_park() will return the residual time in tsp 15270Sstevel@tonic-gate * if we are unparked before the timeout expires. 15280Sstevel@tonic-gate */ 15290Sstevel@tonic-gate if ((error = __lwp_park(tsp, 0)) == EINTR) 15300Sstevel@tonic-gate error = 0; 15310Sstevel@tonic-gate set_parking_flag(self, 0); 15320Sstevel@tonic-gate /* 15330Sstevel@tonic-gate * We could have taken a signal or suspended ourself. 15340Sstevel@tonic-gate * If we did, then we removed ourself from the queue. 15350Sstevel@tonic-gate * Someone else may have removed us from the queue 15360Sstevel@tonic-gate * as a consequence of mutex_unlock(). We may have 15370Sstevel@tonic-gate * gotten a timeout from __lwp_park(). Or we may still 15380Sstevel@tonic-gate * be on the queue and this is just a spurious wakeup. 15390Sstevel@tonic-gate */ 15400Sstevel@tonic-gate qp = queue_lock(mp, MX); 15410Sstevel@tonic-gate if (self->ul_sleepq == NULL) { 15424574Sraf if (error) 15430Sstevel@tonic-gate break; 15440Sstevel@tonic-gate if (set_lock_byte(&mp->mutex_lockw) == 0) { 15450Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 15460Sstevel@tonic-gate break; 15470Sstevel@tonic-gate } 15480Sstevel@tonic-gate enqueue(qp, self, mp, MX); 15490Sstevel@tonic-gate mp->mutex_waiters = 1; 15500Sstevel@tonic-gate } 15510Sstevel@tonic-gate ASSERT(self->ul_sleepq == qp && 15520Sstevel@tonic-gate self->ul_qtype == MX && 15530Sstevel@tonic-gate self->ul_wchan == mp); 15540Sstevel@tonic-gate if (error) { 15550Sstevel@tonic-gate mp->mutex_waiters = dequeue_self(qp, mp); 15560Sstevel@tonic-gate break; 15570Sstevel@tonic-gate } 15580Sstevel@tonic-gate } 15590Sstevel@tonic-gate ASSERT(self->ul_sleepq == NULL && self->ul_link == NULL && 15600Sstevel@tonic-gate self->ul_wchan == NULL); 15610Sstevel@tonic-gate self->ul_sp = 0; 15620Sstevel@tonic-gate queue_unlock(qp); 15634574Sraf 15640Sstevel@tonic-gate if (msp) 15650Sstevel@tonic-gate msp->mutex_sleep_time += gethrtime() - begin_sleep; 15660Sstevel@tonic-gate 15670Sstevel@tonic-gate ASSERT(error == 0 || error == EINVAL || error == ETIME); 15684574Sraf 15694574Sraf if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) { 15704574Sraf ASSERT(mp->mutex_type & LOCK_ROBUST); 15714574Sraf /* 15724574Sraf * We shouldn't own the mutex; clear the lock. 15734574Sraf */ 15744574Sraf mp->mutex_owner = 0; 15754574Sraf if (atomic_swap_32(&mp->mutex_lockword, 0) & WAITERMASK) 15764574Sraf mutex_wakeup_all(mp); 15774574Sraf error = ENOTRECOVERABLE; 15784574Sraf } 15794574Sraf 15804574Sraf if (error) { 15814574Sraf DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0); 15824574Sraf DTRACE_PROBE2(plockstat, mutex__error, mp, error); 15834574Sraf } else { 15844574Sraf DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1); 15854574Sraf DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 15864574Sraf if (mp->mutex_flag & LOCK_OWNERDEAD) { 15874574Sraf ASSERT(mp->mutex_type & LOCK_ROBUST); 15884574Sraf error = EOWNERDEAD; 15894574Sraf } 15904574Sraf } 15914574Sraf 15920Sstevel@tonic-gate return (error); 15930Sstevel@tonic-gate } 15940Sstevel@tonic-gate 15954574Sraf static int 15964574Sraf mutex_recursion(mutex_t *mp, int mtype, int try) 15974574Sraf { 15984574Sraf ASSERT(mutex_is_held(mp)); 15994574Sraf ASSERT(mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)); 16004574Sraf ASSERT(try == MUTEX_TRY || try == MUTEX_LOCK); 16014574Sraf 16024574Sraf if (mtype & LOCK_RECURSIVE) { 16034574Sraf if (mp->mutex_rcount == RECURSION_MAX) { 16044574Sraf DTRACE_PROBE2(plockstat, mutex__error, mp, EAGAIN); 16054574Sraf return (EAGAIN); 16064574Sraf } 16074574Sraf mp->mutex_rcount++; 16084574Sraf DTRACE_PROBE3(plockstat, mutex__acquire, mp, 1, 0); 16094574Sraf return (0); 16104574Sraf } 16114574Sraf if (try == MUTEX_LOCK) { 16124574Sraf DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK); 16134574Sraf return (EDEADLK); 16144574Sraf } 16154574Sraf return (EBUSY); 16164574Sraf } 16174574Sraf 16184574Sraf /* 16194574Sraf * Register this USYNC_PROCESS|LOCK_ROBUST mutex with the kernel so 16204574Sraf * it can apply LOCK_OWNERDEAD|LOCK_UNMAPPED if it becomes necessary. 16214574Sraf * We use tdb_hash_lock here and in the synch object tracking code in 16224574Sraf * the tdb_agent.c file. There is no conflict between these two usages. 16234574Sraf */ 16244574Sraf void 16254574Sraf register_lock(mutex_t *mp) 16264574Sraf { 16274574Sraf uberdata_t *udp = curthread->ul_uberdata; 16284574Sraf uint_t hash = LOCK_HASH(mp); 16294574Sraf robust_t *rlp; 16304574Sraf robust_t **rlpp; 16314574Sraf robust_t **table; 16324574Sraf 16334574Sraf if ((table = udp->robustlocks) == NULL) { 16344574Sraf lmutex_lock(&udp->tdb_hash_lock); 16354574Sraf if ((table = udp->robustlocks) == NULL) { 16364574Sraf table = lmalloc(LOCKHASHSZ * sizeof (robust_t *)); 16374574Sraf _membar_producer(); 16384574Sraf udp->robustlocks = table; 16394574Sraf } 16404574Sraf lmutex_unlock(&udp->tdb_hash_lock); 16414574Sraf } 16424574Sraf _membar_consumer(); 16434574Sraf 16444574Sraf /* 16454574Sraf * First search the registered table with no locks held. 16464574Sraf * This is safe because the table never shrinks 16474574Sraf * and we can only get a false negative. 16484574Sraf */ 16494574Sraf for (rlp = table[hash]; rlp != NULL; rlp = rlp->robust_next) { 16504574Sraf if (rlp->robust_lock == mp) /* already registered */ 16514574Sraf return; 16524574Sraf } 16534574Sraf 16544574Sraf /* 16554574Sraf * The lock was not found. 16564574Sraf * Repeat the operation with tdb_hash_lock held. 16574574Sraf */ 16584574Sraf lmutex_lock(&udp->tdb_hash_lock); 16594574Sraf 16604574Sraf for (rlpp = &table[hash]; 16614574Sraf (rlp = *rlpp) != NULL; 16624574Sraf rlpp = &rlp->robust_next) { 16634574Sraf if (rlp->robust_lock == mp) { /* already registered */ 16644574Sraf lmutex_unlock(&udp->tdb_hash_lock); 16654574Sraf return; 16664574Sraf } 16674574Sraf } 16684574Sraf 16694574Sraf /* 16704574Sraf * The lock has never been registered. 16714574Sraf * Register it now and add it to the table. 16724574Sraf */ 16734574Sraf (void) ___lwp_mutex_register(mp); 16744574Sraf rlp = lmalloc(sizeof (*rlp)); 16754574Sraf rlp->robust_lock = mp; 16764574Sraf _membar_producer(); 16774574Sraf *rlpp = rlp; 16784574Sraf 16794574Sraf lmutex_unlock(&udp->tdb_hash_lock); 16804574Sraf } 16814574Sraf 16824574Sraf /* 16834574Sraf * This is called in the child of fork()/forkall() to start over 16844574Sraf * with a clean slate. (Each process must register its own locks.) 16854574Sraf * No locks are needed because all other threads are suspended or gone. 16864574Sraf */ 16874574Sraf void 16884574Sraf unregister_locks(void) 16894574Sraf { 16904574Sraf uberdata_t *udp = curthread->ul_uberdata; 16914574Sraf uint_t hash; 16924574Sraf robust_t **table; 16934574Sraf robust_t *rlp; 16944574Sraf robust_t *next; 16954574Sraf 16964574Sraf if ((table = udp->robustlocks) != NULL) { 16974574Sraf for (hash = 0; hash < LOCKHASHSZ; hash++) { 16984574Sraf rlp = table[hash]; 16994574Sraf while (rlp != NULL) { 17004574Sraf next = rlp->robust_next; 17014574Sraf lfree(rlp, sizeof (*rlp)); 17024574Sraf rlp = next; 17034574Sraf } 17044574Sraf } 17054574Sraf lfree(table, LOCKHASHSZ * sizeof (robust_t *)); 17064574Sraf udp->robustlocks = NULL; 17074574Sraf } 17084574Sraf } 17094574Sraf 17100Sstevel@tonic-gate /* 17110Sstevel@tonic-gate * Returns with mutex_owner set correctly. 17120Sstevel@tonic-gate */ 17134574Sraf static int 17140Sstevel@tonic-gate mutex_lock_internal(mutex_t *mp, timespec_t *tsp, int try) 17150Sstevel@tonic-gate { 17160Sstevel@tonic-gate ulwp_t *self = curthread; 17170Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 17180Sstevel@tonic-gate int mtype = mp->mutex_type; 17190Sstevel@tonic-gate tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 17200Sstevel@tonic-gate int error = 0; 17214574Sraf uint8_t ceil; 17224574Sraf int myprio; 17230Sstevel@tonic-gate 17240Sstevel@tonic-gate ASSERT(try == MUTEX_TRY || try == MUTEX_LOCK); 17250Sstevel@tonic-gate 17260Sstevel@tonic-gate if (!self->ul_schedctl_called) 17270Sstevel@tonic-gate (void) setup_schedctl(); 17280Sstevel@tonic-gate 17290Sstevel@tonic-gate if (msp && try == MUTEX_TRY) 17300Sstevel@tonic-gate tdb_incr(msp->mutex_try); 17310Sstevel@tonic-gate 17324574Sraf if ((mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)) && mutex_is_held(mp)) 17334574Sraf return (mutex_recursion(mp, mtype, try)); 17340Sstevel@tonic-gate 17350Sstevel@tonic-gate if (self->ul_error_detection && try == MUTEX_LOCK && 17360Sstevel@tonic-gate tsp == NULL && mutex_is_held(mp)) 17370Sstevel@tonic-gate lock_error(mp, "mutex_lock", NULL, NULL); 17380Sstevel@tonic-gate 17394574Sraf if (mtype & LOCK_PRIO_PROTECT) { 17404574Sraf ceil = mp->mutex_ceiling; 17414574Sraf ASSERT(_validate_rt_prio(SCHED_FIFO, ceil) == 0); 17424574Sraf myprio = real_priority(self); 17434574Sraf if (myprio > ceil) { 17444574Sraf DTRACE_PROBE2(plockstat, mutex__error, mp, EINVAL); 17454574Sraf return (EINVAL); 17464574Sraf } 17474574Sraf if ((error = _ceil_mylist_add(mp)) != 0) { 17484574Sraf DTRACE_PROBE2(plockstat, mutex__error, mp, error); 17494574Sraf return (error); 17500Sstevel@tonic-gate } 17514574Sraf if (myprio < ceil) 17524574Sraf _ceil_prio_inherit(ceil); 17534574Sraf } 17544574Sraf 17554574Sraf if ((mtype & (USYNC_PROCESS | LOCK_ROBUST)) 17564574Sraf == (USYNC_PROCESS | LOCK_ROBUST)) 17574574Sraf register_lock(mp); 17584574Sraf 17594574Sraf if (mtype & LOCK_PRIO_INHERIT) { 17604574Sraf /* go straight to the kernel */ 17614574Sraf if (try == MUTEX_TRY) 17624574Sraf error = mutex_trylock_kernel(mp); 17634574Sraf else /* MUTEX_LOCK */ 17644574Sraf error = mutex_lock_kernel(mp, tsp, msp); 17654574Sraf /* 17664574Sraf * The kernel never sets or clears the lock byte 17674574Sraf * for LOCK_PRIO_INHERIT mutexes. 17684574Sraf * Set it here for consistency. 17694574Sraf */ 17704574Sraf switch (error) { 17714574Sraf case 0: 17724574Sraf mp->mutex_lockw = LOCKSET; 17734574Sraf break; 17744574Sraf case EOWNERDEAD: 17754574Sraf case ELOCKUNMAPPED: 17764574Sraf mp->mutex_lockw = LOCKSET; 17774574Sraf /* FALLTHROUGH */ 17784574Sraf case ENOTRECOVERABLE: 17794574Sraf ASSERT(mtype & LOCK_ROBUST); 17804574Sraf break; 17814574Sraf case EDEADLK: 17824574Sraf if (try == MUTEX_LOCK) 17834574Sraf stall(); 17844574Sraf error = EBUSY; 17854574Sraf break; 17860Sstevel@tonic-gate } 17870Sstevel@tonic-gate } else if (mtype & USYNC_PROCESS) { 1788*4613Sraf error = mutex_trylock_process(mp, try == MUTEX_LOCK); 17894574Sraf if (error == EBUSY && try == MUTEX_LOCK) 17900Sstevel@tonic-gate error = mutex_lock_kernel(mp, tsp, msp); 17910Sstevel@tonic-gate } else { /* USYNC_THREAD */ 1792*4613Sraf error = mutex_trylock_adaptive(mp, try == MUTEX_LOCK); 17934574Sraf if (error == EBUSY && try == MUTEX_LOCK) 17944574Sraf error = mutex_lock_queue(self, msp, mp, tsp); 17950Sstevel@tonic-gate } 17960Sstevel@tonic-gate 17970Sstevel@tonic-gate switch (error) { 17984574Sraf case 0: 17990Sstevel@tonic-gate case EOWNERDEAD: 18000Sstevel@tonic-gate case ELOCKUNMAPPED: 18014574Sraf if (mtype & LOCK_ROBUST) 18024574Sraf remember_lock(mp); 18030Sstevel@tonic-gate if (msp) 18040Sstevel@tonic-gate record_begin_hold(msp); 18050Sstevel@tonic-gate break; 18060Sstevel@tonic-gate default: 18074574Sraf if (mtype & LOCK_PRIO_PROTECT) { 18084574Sraf (void) _ceil_mylist_del(mp); 18094574Sraf if (myprio < ceil) 18104574Sraf _ceil_prio_waive(); 18114574Sraf } 18120Sstevel@tonic-gate if (try == MUTEX_TRY) { 18130Sstevel@tonic-gate if (msp) 18140Sstevel@tonic-gate tdb_incr(msp->mutex_try_fail); 18150Sstevel@tonic-gate if (__td_event_report(self, TD_LOCK_TRY, udp)) { 18160Sstevel@tonic-gate self->ul_td_evbuf.eventnum = TD_LOCK_TRY; 18170Sstevel@tonic-gate tdb_event(TD_LOCK_TRY, udp); 18180Sstevel@tonic-gate } 18190Sstevel@tonic-gate } 18200Sstevel@tonic-gate break; 18210Sstevel@tonic-gate } 18220Sstevel@tonic-gate 18230Sstevel@tonic-gate return (error); 18240Sstevel@tonic-gate } 18250Sstevel@tonic-gate 18260Sstevel@tonic-gate int 18270Sstevel@tonic-gate fast_process_lock(mutex_t *mp, timespec_t *tsp, int mtype, int try) 18280Sstevel@tonic-gate { 18290Sstevel@tonic-gate ulwp_t *self = curthread; 18300Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 18310Sstevel@tonic-gate 18320Sstevel@tonic-gate /* 18330Sstevel@tonic-gate * We know that USYNC_PROCESS is set in mtype and that 18340Sstevel@tonic-gate * zero, one, or both of the flags LOCK_RECURSIVE and 18350Sstevel@tonic-gate * LOCK_ERRORCHECK are set, and that no other flags are set. 18360Sstevel@tonic-gate */ 18374574Sraf ASSERT((mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0); 18380Sstevel@tonic-gate enter_critical(self); 18390Sstevel@tonic-gate if (set_lock_byte(&mp->mutex_lockw) == 0) { 18400Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 18410Sstevel@tonic-gate mp->mutex_ownerpid = udp->pid; 18420Sstevel@tonic-gate exit_critical(self); 18430Sstevel@tonic-gate DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 18440Sstevel@tonic-gate return (0); 18450Sstevel@tonic-gate } 18460Sstevel@tonic-gate exit_critical(self); 18470Sstevel@tonic-gate 18484574Sraf if ((mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)) && shared_mutex_held(mp)) 18494574Sraf return (mutex_recursion(mp, mtype, try)); 18504574Sraf 1851*4613Sraf if (try == MUTEX_LOCK) { 1852*4613Sraf if (mutex_trylock_process(mp, 1) == 0) 1853*4613Sraf return (0); 18540Sstevel@tonic-gate return (mutex_lock_kernel(mp, tsp, NULL)); 1855*4613Sraf } 18560Sstevel@tonic-gate 18570Sstevel@tonic-gate if (__td_event_report(self, TD_LOCK_TRY, udp)) { 18580Sstevel@tonic-gate self->ul_td_evbuf.eventnum = TD_LOCK_TRY; 18590Sstevel@tonic-gate tdb_event(TD_LOCK_TRY, udp); 18600Sstevel@tonic-gate } 18610Sstevel@tonic-gate return (EBUSY); 18620Sstevel@tonic-gate } 18630Sstevel@tonic-gate 18640Sstevel@tonic-gate static int 18650Sstevel@tonic-gate mutex_lock_impl(mutex_t *mp, timespec_t *tsp) 18660Sstevel@tonic-gate { 18670Sstevel@tonic-gate ulwp_t *self = curthread; 18680Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 18690Sstevel@tonic-gate uberflags_t *gflags; 18700Sstevel@tonic-gate int mtype; 18710Sstevel@tonic-gate 18720Sstevel@tonic-gate /* 18730Sstevel@tonic-gate * Optimize the case of USYNC_THREAD, including 18740Sstevel@tonic-gate * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases, 18750Sstevel@tonic-gate * no error detection, no lock statistics, 18760Sstevel@tonic-gate * and the process has only a single thread. 18770Sstevel@tonic-gate * (Most likely a traditional single-threaded application.) 18780Sstevel@tonic-gate */ 18790Sstevel@tonic-gate if ((((mtype = mp->mutex_type) & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | 18800Sstevel@tonic-gate udp->uberflags.uf_all) == 0) { 18810Sstevel@tonic-gate /* 18820Sstevel@tonic-gate * Only one thread exists so we don't need an atomic operation. 18830Sstevel@tonic-gate */ 18840Sstevel@tonic-gate if (mp->mutex_lockw == 0) { 18850Sstevel@tonic-gate mp->mutex_lockw = LOCKSET; 18860Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 18870Sstevel@tonic-gate DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 18880Sstevel@tonic-gate return (0); 18890Sstevel@tonic-gate } 18904574Sraf if (mtype && MUTEX_OWNER(mp) == self) 18914574Sraf return (mutex_recursion(mp, mtype, MUTEX_LOCK)); 18920Sstevel@tonic-gate /* 18930Sstevel@tonic-gate * We have reached a deadlock, probably because the 18940Sstevel@tonic-gate * process is executing non-async-signal-safe code in 18950Sstevel@tonic-gate * a signal handler and is attempting to acquire a lock 18960Sstevel@tonic-gate * that it already owns. This is not surprising, given 18970Sstevel@tonic-gate * bad programming practices over the years that has 18980Sstevel@tonic-gate * resulted in applications calling printf() and such 18990Sstevel@tonic-gate * in their signal handlers. Unless the user has told 19000Sstevel@tonic-gate * us that the signal handlers are safe by setting: 19010Sstevel@tonic-gate * export _THREAD_ASYNC_SAFE=1 19020Sstevel@tonic-gate * we return EDEADLK rather than actually deadlocking. 19030Sstevel@tonic-gate */ 19040Sstevel@tonic-gate if (tsp == NULL && 19050Sstevel@tonic-gate MUTEX_OWNER(mp) == self && !self->ul_async_safe) { 19060Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK); 19070Sstevel@tonic-gate return (EDEADLK); 19080Sstevel@tonic-gate } 19090Sstevel@tonic-gate } 19100Sstevel@tonic-gate 19110Sstevel@tonic-gate /* 19120Sstevel@tonic-gate * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS, 19130Sstevel@tonic-gate * no error detection, and no lock statistics. 19140Sstevel@tonic-gate * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases. 19150Sstevel@tonic-gate */ 19160Sstevel@tonic-gate if ((gflags = self->ul_schedctl_called) != NULL && 19170Sstevel@tonic-gate (gflags->uf_trs_ted | 19180Sstevel@tonic-gate (mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK))) == 0) { 19190Sstevel@tonic-gate if (mtype & USYNC_PROCESS) 19200Sstevel@tonic-gate return (fast_process_lock(mp, tsp, mtype, MUTEX_LOCK)); 19210Sstevel@tonic-gate if (set_lock_byte(&mp->mutex_lockw) == 0) { 19220Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 19230Sstevel@tonic-gate DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 19240Sstevel@tonic-gate return (0); 19250Sstevel@tonic-gate } 19264574Sraf if (mtype && MUTEX_OWNER(mp) == self) 19274574Sraf return (mutex_recursion(mp, mtype, MUTEX_LOCK)); 1928*4613Sraf if (mutex_trylock_adaptive(mp, 1) != 0) 19294574Sraf return (mutex_lock_queue(self, NULL, mp, tsp)); 19304574Sraf return (0); 19310Sstevel@tonic-gate } 19320Sstevel@tonic-gate 19330Sstevel@tonic-gate /* else do it the long way */ 19340Sstevel@tonic-gate return (mutex_lock_internal(mp, tsp, MUTEX_LOCK)); 19350Sstevel@tonic-gate } 19360Sstevel@tonic-gate 19370Sstevel@tonic-gate #pragma weak _private_mutex_lock = __mutex_lock 19380Sstevel@tonic-gate #pragma weak mutex_lock = __mutex_lock 19390Sstevel@tonic-gate #pragma weak _mutex_lock = __mutex_lock 19400Sstevel@tonic-gate #pragma weak pthread_mutex_lock = __mutex_lock 19410Sstevel@tonic-gate #pragma weak _pthread_mutex_lock = __mutex_lock 19420Sstevel@tonic-gate int 19430Sstevel@tonic-gate __mutex_lock(mutex_t *mp) 19440Sstevel@tonic-gate { 19450Sstevel@tonic-gate ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 19460Sstevel@tonic-gate return (mutex_lock_impl(mp, NULL)); 19470Sstevel@tonic-gate } 19480Sstevel@tonic-gate 19490Sstevel@tonic-gate #pragma weak pthread_mutex_timedlock = _pthread_mutex_timedlock 19500Sstevel@tonic-gate int 19510Sstevel@tonic-gate _pthread_mutex_timedlock(mutex_t *mp, const timespec_t *abstime) 19520Sstevel@tonic-gate { 19530Sstevel@tonic-gate timespec_t tslocal; 19540Sstevel@tonic-gate int error; 19550Sstevel@tonic-gate 19560Sstevel@tonic-gate ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 19570Sstevel@tonic-gate abstime_to_reltime(CLOCK_REALTIME, abstime, &tslocal); 19580Sstevel@tonic-gate error = mutex_lock_impl(mp, &tslocal); 19590Sstevel@tonic-gate if (error == ETIME) 19600Sstevel@tonic-gate error = ETIMEDOUT; 19610Sstevel@tonic-gate return (error); 19620Sstevel@tonic-gate } 19630Sstevel@tonic-gate 19640Sstevel@tonic-gate #pragma weak pthread_mutex_reltimedlock_np = _pthread_mutex_reltimedlock_np 19650Sstevel@tonic-gate int 19660Sstevel@tonic-gate _pthread_mutex_reltimedlock_np(mutex_t *mp, const timespec_t *reltime) 19670Sstevel@tonic-gate { 19680Sstevel@tonic-gate timespec_t tslocal; 19690Sstevel@tonic-gate int error; 19700Sstevel@tonic-gate 19710Sstevel@tonic-gate ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 19720Sstevel@tonic-gate tslocal = *reltime; 19730Sstevel@tonic-gate error = mutex_lock_impl(mp, &tslocal); 19740Sstevel@tonic-gate if (error == ETIME) 19750Sstevel@tonic-gate error = ETIMEDOUT; 19760Sstevel@tonic-gate return (error); 19770Sstevel@tonic-gate } 19780Sstevel@tonic-gate 19790Sstevel@tonic-gate #pragma weak _private_mutex_trylock = __mutex_trylock 19800Sstevel@tonic-gate #pragma weak mutex_trylock = __mutex_trylock 19810Sstevel@tonic-gate #pragma weak _mutex_trylock = __mutex_trylock 19820Sstevel@tonic-gate #pragma weak pthread_mutex_trylock = __mutex_trylock 19830Sstevel@tonic-gate #pragma weak _pthread_mutex_trylock = __mutex_trylock 19840Sstevel@tonic-gate int 19850Sstevel@tonic-gate __mutex_trylock(mutex_t *mp) 19860Sstevel@tonic-gate { 19870Sstevel@tonic-gate ulwp_t *self = curthread; 19880Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 19890Sstevel@tonic-gate uberflags_t *gflags; 19900Sstevel@tonic-gate int mtype; 19910Sstevel@tonic-gate 19920Sstevel@tonic-gate ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 19930Sstevel@tonic-gate /* 19940Sstevel@tonic-gate * Optimize the case of USYNC_THREAD, including 19950Sstevel@tonic-gate * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases, 19960Sstevel@tonic-gate * no error detection, no lock statistics, 19970Sstevel@tonic-gate * and the process has only a single thread. 19980Sstevel@tonic-gate * (Most likely a traditional single-threaded application.) 19990Sstevel@tonic-gate */ 20000Sstevel@tonic-gate if ((((mtype = mp->mutex_type) & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | 20010Sstevel@tonic-gate udp->uberflags.uf_all) == 0) { 20020Sstevel@tonic-gate /* 20030Sstevel@tonic-gate * Only one thread exists so we don't need an atomic operation. 20040Sstevel@tonic-gate */ 20050Sstevel@tonic-gate if (mp->mutex_lockw == 0) { 20060Sstevel@tonic-gate mp->mutex_lockw = LOCKSET; 20070Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 20080Sstevel@tonic-gate DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 20090Sstevel@tonic-gate return (0); 20100Sstevel@tonic-gate } 20114574Sraf if (mtype && MUTEX_OWNER(mp) == self) 20124574Sraf return (mutex_recursion(mp, mtype, MUTEX_TRY)); 20130Sstevel@tonic-gate return (EBUSY); 20140Sstevel@tonic-gate } 20150Sstevel@tonic-gate 20160Sstevel@tonic-gate /* 20170Sstevel@tonic-gate * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS, 20180Sstevel@tonic-gate * no error detection, and no lock statistics. 20190Sstevel@tonic-gate * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases. 20200Sstevel@tonic-gate */ 20210Sstevel@tonic-gate if ((gflags = self->ul_schedctl_called) != NULL && 20220Sstevel@tonic-gate (gflags->uf_trs_ted | 20230Sstevel@tonic-gate (mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK))) == 0) { 20240Sstevel@tonic-gate if (mtype & USYNC_PROCESS) 20250Sstevel@tonic-gate return (fast_process_lock(mp, NULL, mtype, MUTEX_TRY)); 20260Sstevel@tonic-gate if (set_lock_byte(&mp->mutex_lockw) == 0) { 20270Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 20280Sstevel@tonic-gate DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 20290Sstevel@tonic-gate return (0); 20300Sstevel@tonic-gate } 20314574Sraf if (mtype && MUTEX_OWNER(mp) == self) 20324574Sraf return (mutex_recursion(mp, mtype, MUTEX_TRY)); 2033*4613Sraf if (__td_event_report(self, TD_LOCK_TRY, udp)) { 2034*4613Sraf self->ul_td_evbuf.eventnum = TD_LOCK_TRY; 2035*4613Sraf tdb_event(TD_LOCK_TRY, udp); 20360Sstevel@tonic-gate } 2037*4613Sraf return (EBUSY); 20380Sstevel@tonic-gate } 20390Sstevel@tonic-gate 20400Sstevel@tonic-gate /* else do it the long way */ 20410Sstevel@tonic-gate return (mutex_lock_internal(mp, NULL, MUTEX_TRY)); 20420Sstevel@tonic-gate } 20430Sstevel@tonic-gate 20440Sstevel@tonic-gate int 20454574Sraf mutex_unlock_internal(mutex_t *mp, int retain_robust_flags) 20460Sstevel@tonic-gate { 20470Sstevel@tonic-gate ulwp_t *self = curthread; 20480Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 20490Sstevel@tonic-gate int mtype = mp->mutex_type; 20500Sstevel@tonic-gate tdb_mutex_stats_t *msp; 20514574Sraf int error = 0; 20524574Sraf int release_all; 20530Sstevel@tonic-gate lwpid_t lwpid; 20540Sstevel@tonic-gate 20550Sstevel@tonic-gate if ((mtype & LOCK_ERRORCHECK) && !mutex_is_held(mp)) 20560Sstevel@tonic-gate return (EPERM); 20570Sstevel@tonic-gate 20580Sstevel@tonic-gate if (self->ul_error_detection && !mutex_is_held(mp)) 20590Sstevel@tonic-gate lock_error(mp, "mutex_unlock", NULL, NULL); 20600Sstevel@tonic-gate 20610Sstevel@tonic-gate if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 20620Sstevel@tonic-gate mp->mutex_rcount--; 20630Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 20640Sstevel@tonic-gate return (0); 20650Sstevel@tonic-gate } 20660Sstevel@tonic-gate 20670Sstevel@tonic-gate if ((msp = MUTEX_STATS(mp, udp)) != NULL) 20680Sstevel@tonic-gate (void) record_hold_time(msp); 20690Sstevel@tonic-gate 20704574Sraf if (!retain_robust_flags && !(mtype & LOCK_PRIO_INHERIT) && 20714574Sraf (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED))) { 20724574Sraf ASSERT(mp->mutex_type & LOCK_ROBUST); 20734574Sraf mp->mutex_flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 20744574Sraf mp->mutex_flag |= LOCK_NOTRECOVERABLE; 20754574Sraf } 20764574Sraf release_all = ((mp->mutex_flag & LOCK_NOTRECOVERABLE) != 0); 20774574Sraf 20784574Sraf if (mtype & LOCK_PRIO_INHERIT) { 20790Sstevel@tonic-gate no_preempt(self); 20800Sstevel@tonic-gate mp->mutex_owner = 0; 20810Sstevel@tonic-gate mp->mutex_ownerpid = 0; 20820Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 20834574Sraf mp->mutex_lockw = LOCKCLEAR; 20844574Sraf error = ___lwp_mutex_unlock(mp); 20850Sstevel@tonic-gate preempt(self); 20860Sstevel@tonic-gate } else if (mtype & USYNC_PROCESS) { 20874574Sraf if (mp->mutex_lockword & WAITERMASK) { 20884574Sraf mutex_unlock_process(mp, release_all); 20894574Sraf } else { 20900Sstevel@tonic-gate mp->mutex_owner = 0; 20910Sstevel@tonic-gate mp->mutex_ownerpid = 0; 20920Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 20934570Sraf if (atomic_swap_32(&mp->mutex_lockword, 0) & 20944574Sraf WAITERMASK) { /* a waiter suddenly appeared */ 20950Sstevel@tonic-gate no_preempt(self); 20964574Sraf (void) ___lwp_mutex_wakeup(mp, release_all); 20970Sstevel@tonic-gate preempt(self); 20980Sstevel@tonic-gate } 20990Sstevel@tonic-gate } 21000Sstevel@tonic-gate } else { /* USYNC_THREAD */ 21014574Sraf if ((lwpid = mutex_unlock_queue(mp, release_all)) != 0) { 21020Sstevel@tonic-gate (void) __lwp_unpark(lwpid); 21030Sstevel@tonic-gate preempt(self); 21040Sstevel@tonic-gate } 21050Sstevel@tonic-gate } 21060Sstevel@tonic-gate 21074574Sraf if (mtype & LOCK_ROBUST) 21084574Sraf forget_lock(mp); 21094574Sraf 21104574Sraf if ((mtype & LOCK_PRIO_PROTECT) && _ceil_mylist_del(mp)) 21114574Sraf _ceil_prio_waive(); 21124574Sraf 21130Sstevel@tonic-gate return (error); 21140Sstevel@tonic-gate } 21150Sstevel@tonic-gate 21160Sstevel@tonic-gate #pragma weak _private_mutex_unlock = __mutex_unlock 21170Sstevel@tonic-gate #pragma weak mutex_unlock = __mutex_unlock 21180Sstevel@tonic-gate #pragma weak _mutex_unlock = __mutex_unlock 21190Sstevel@tonic-gate #pragma weak pthread_mutex_unlock = __mutex_unlock 21200Sstevel@tonic-gate #pragma weak _pthread_mutex_unlock = __mutex_unlock 21210Sstevel@tonic-gate int 21220Sstevel@tonic-gate __mutex_unlock(mutex_t *mp) 21230Sstevel@tonic-gate { 21240Sstevel@tonic-gate ulwp_t *self = curthread; 21250Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 21260Sstevel@tonic-gate uberflags_t *gflags; 21270Sstevel@tonic-gate lwpid_t lwpid; 21280Sstevel@tonic-gate int mtype; 21290Sstevel@tonic-gate short el; 21300Sstevel@tonic-gate 21310Sstevel@tonic-gate /* 21320Sstevel@tonic-gate * Optimize the case of USYNC_THREAD, including 21330Sstevel@tonic-gate * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases, 21340Sstevel@tonic-gate * no error detection, no lock statistics, 21350Sstevel@tonic-gate * and the process has only a single thread. 21360Sstevel@tonic-gate * (Most likely a traditional single-threaded application.) 21370Sstevel@tonic-gate */ 21380Sstevel@tonic-gate if ((((mtype = mp->mutex_type) & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | 21390Sstevel@tonic-gate udp->uberflags.uf_all) == 0) { 21400Sstevel@tonic-gate if (mtype) { 21410Sstevel@tonic-gate /* 21420Sstevel@tonic-gate * At this point we know that one or both of the 21430Sstevel@tonic-gate * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set. 21440Sstevel@tonic-gate */ 21450Sstevel@tonic-gate if ((mtype & LOCK_ERRORCHECK) && !MUTEX_OWNED(mp, self)) 21460Sstevel@tonic-gate return (EPERM); 21470Sstevel@tonic-gate if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 21480Sstevel@tonic-gate mp->mutex_rcount--; 21490Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 21500Sstevel@tonic-gate return (0); 21510Sstevel@tonic-gate } 21520Sstevel@tonic-gate } 21530Sstevel@tonic-gate /* 21540Sstevel@tonic-gate * Only one thread exists so we don't need an atomic operation. 21550Sstevel@tonic-gate * Also, there can be no waiters. 21560Sstevel@tonic-gate */ 21570Sstevel@tonic-gate mp->mutex_owner = 0; 21580Sstevel@tonic-gate mp->mutex_lockword = 0; 21590Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 21600Sstevel@tonic-gate return (0); 21610Sstevel@tonic-gate } 21620Sstevel@tonic-gate 21630Sstevel@tonic-gate /* 21640Sstevel@tonic-gate * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS, 21650Sstevel@tonic-gate * no error detection, and no lock statistics. 21660Sstevel@tonic-gate * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases. 21670Sstevel@tonic-gate */ 21680Sstevel@tonic-gate if ((gflags = self->ul_schedctl_called) != NULL) { 21690Sstevel@tonic-gate if (((el = gflags->uf_trs_ted) | mtype) == 0) { 21700Sstevel@tonic-gate fast_unlock: 21710Sstevel@tonic-gate if (!(mp->mutex_lockword & WAITERMASK)) { 21720Sstevel@tonic-gate /* no waiter exists right now */ 21730Sstevel@tonic-gate mp->mutex_owner = 0; 21740Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 21754570Sraf if (atomic_swap_32(&mp->mutex_lockword, 0) & 21760Sstevel@tonic-gate WAITERMASK) { 21770Sstevel@tonic-gate /* a waiter suddenly appeared */ 21780Sstevel@tonic-gate no_preempt(self); 21790Sstevel@tonic-gate if ((lwpid = mutex_wakeup(mp)) != 0) 21800Sstevel@tonic-gate (void) __lwp_unpark(lwpid); 21810Sstevel@tonic-gate preempt(self); 21820Sstevel@tonic-gate } 21834574Sraf } else if ((lwpid = mutex_unlock_queue(mp, 0)) != 0) { 21840Sstevel@tonic-gate (void) __lwp_unpark(lwpid); 21850Sstevel@tonic-gate preempt(self); 21860Sstevel@tonic-gate } 21870Sstevel@tonic-gate return (0); 21880Sstevel@tonic-gate } 21890Sstevel@tonic-gate if (el) /* error detection or lock statistics */ 21900Sstevel@tonic-gate goto slow_unlock; 21910Sstevel@tonic-gate if ((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0) { 21920Sstevel@tonic-gate /* 21930Sstevel@tonic-gate * At this point we know that one or both of the 21940Sstevel@tonic-gate * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set. 21950Sstevel@tonic-gate */ 21960Sstevel@tonic-gate if ((mtype & LOCK_ERRORCHECK) && !MUTEX_OWNED(mp, self)) 21970Sstevel@tonic-gate return (EPERM); 21980Sstevel@tonic-gate if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 21990Sstevel@tonic-gate mp->mutex_rcount--; 22000Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 22010Sstevel@tonic-gate return (0); 22020Sstevel@tonic-gate } 22030Sstevel@tonic-gate goto fast_unlock; 22040Sstevel@tonic-gate } 22050Sstevel@tonic-gate if ((mtype & 22060Sstevel@tonic-gate ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0) { 22070Sstevel@tonic-gate /* 22080Sstevel@tonic-gate * At this point we know that zero, one, or both of the 22090Sstevel@tonic-gate * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set and 22100Sstevel@tonic-gate * that the USYNC_PROCESS flag is set. 22110Sstevel@tonic-gate */ 22120Sstevel@tonic-gate if ((mtype & LOCK_ERRORCHECK) && !shared_mutex_held(mp)) 22130Sstevel@tonic-gate return (EPERM); 22140Sstevel@tonic-gate if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 22150Sstevel@tonic-gate mp->mutex_rcount--; 22160Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 22170Sstevel@tonic-gate return (0); 22180Sstevel@tonic-gate } 22194574Sraf if (mp->mutex_lockword & WAITERMASK) { 22204574Sraf mutex_unlock_process(mp, 0); 22214574Sraf } else { 22220Sstevel@tonic-gate mp->mutex_owner = 0; 22230Sstevel@tonic-gate mp->mutex_ownerpid = 0; 22240Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 22254570Sraf if (atomic_swap_32(&mp->mutex_lockword, 0) & 22260Sstevel@tonic-gate WAITERMASK) { 22270Sstevel@tonic-gate no_preempt(self); 22284574Sraf (void) ___lwp_mutex_wakeup(mp, 0); 22290Sstevel@tonic-gate preempt(self); 22300Sstevel@tonic-gate } 22310Sstevel@tonic-gate } 22320Sstevel@tonic-gate return (0); 22330Sstevel@tonic-gate } 22340Sstevel@tonic-gate } 22350Sstevel@tonic-gate 22360Sstevel@tonic-gate /* else do it the long way */ 22370Sstevel@tonic-gate slow_unlock: 22384574Sraf return (mutex_unlock_internal(mp, 0)); 22390Sstevel@tonic-gate } 22400Sstevel@tonic-gate 22410Sstevel@tonic-gate /* 22420Sstevel@tonic-gate * Internally to the library, almost all mutex lock/unlock actions 22430Sstevel@tonic-gate * go through these lmutex_ functions, to protect critical regions. 22440Sstevel@tonic-gate * We replicate a bit of code from __mutex_lock() and __mutex_unlock() 22450Sstevel@tonic-gate * to make these functions faster since we know that the mutex type 22460Sstevel@tonic-gate * of all internal locks is USYNC_THREAD. We also know that internal 22470Sstevel@tonic-gate * locking can never fail, so we panic if it does. 22480Sstevel@tonic-gate */ 22490Sstevel@tonic-gate void 22500Sstevel@tonic-gate lmutex_lock(mutex_t *mp) 22510Sstevel@tonic-gate { 22520Sstevel@tonic-gate ulwp_t *self = curthread; 22530Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 22540Sstevel@tonic-gate 22550Sstevel@tonic-gate ASSERT(mp->mutex_type == USYNC_THREAD); 22560Sstevel@tonic-gate 22570Sstevel@tonic-gate enter_critical(self); 22580Sstevel@tonic-gate /* 22590Sstevel@tonic-gate * Optimize the case of no lock statistics and only a single thread. 22600Sstevel@tonic-gate * (Most likely a traditional single-threaded application.) 22610Sstevel@tonic-gate */ 22620Sstevel@tonic-gate if (udp->uberflags.uf_all == 0) { 22630Sstevel@tonic-gate /* 22640Sstevel@tonic-gate * Only one thread exists; the mutex must be free. 22650Sstevel@tonic-gate */ 22660Sstevel@tonic-gate ASSERT(mp->mutex_lockw == 0); 22670Sstevel@tonic-gate mp->mutex_lockw = LOCKSET; 22680Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 22690Sstevel@tonic-gate DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 22700Sstevel@tonic-gate } else { 22710Sstevel@tonic-gate tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 22720Sstevel@tonic-gate 22730Sstevel@tonic-gate if (!self->ul_schedctl_called) 22740Sstevel@tonic-gate (void) setup_schedctl(); 22750Sstevel@tonic-gate 22760Sstevel@tonic-gate if (set_lock_byte(&mp->mutex_lockw) == 0) { 22770Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 22780Sstevel@tonic-gate DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2279*4613Sraf } else if (mutex_trylock_adaptive(mp, 1) != 0) { 22800Sstevel@tonic-gate (void) mutex_lock_queue(self, msp, mp, NULL); 22810Sstevel@tonic-gate } 22820Sstevel@tonic-gate 22830Sstevel@tonic-gate if (msp) 22840Sstevel@tonic-gate record_begin_hold(msp); 22850Sstevel@tonic-gate } 22860Sstevel@tonic-gate } 22870Sstevel@tonic-gate 22880Sstevel@tonic-gate void 22890Sstevel@tonic-gate lmutex_unlock(mutex_t *mp) 22900Sstevel@tonic-gate { 22910Sstevel@tonic-gate ulwp_t *self = curthread; 22920Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 22930Sstevel@tonic-gate 22940Sstevel@tonic-gate ASSERT(mp->mutex_type == USYNC_THREAD); 22950Sstevel@tonic-gate 22960Sstevel@tonic-gate /* 22970Sstevel@tonic-gate * Optimize the case of no lock statistics and only a single thread. 22980Sstevel@tonic-gate * (Most likely a traditional single-threaded application.) 22990Sstevel@tonic-gate */ 23000Sstevel@tonic-gate if (udp->uberflags.uf_all == 0) { 23010Sstevel@tonic-gate /* 23020Sstevel@tonic-gate * Only one thread exists so there can be no waiters. 23030Sstevel@tonic-gate */ 23040Sstevel@tonic-gate mp->mutex_owner = 0; 23050Sstevel@tonic-gate mp->mutex_lockword = 0; 23060Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 23070Sstevel@tonic-gate } else { 23080Sstevel@tonic-gate tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 23090Sstevel@tonic-gate lwpid_t lwpid; 23100Sstevel@tonic-gate 23110Sstevel@tonic-gate if (msp) 23120Sstevel@tonic-gate (void) record_hold_time(msp); 23134574Sraf if ((lwpid = mutex_unlock_queue(mp, 0)) != 0) { 23140Sstevel@tonic-gate (void) __lwp_unpark(lwpid); 23150Sstevel@tonic-gate preempt(self); 23160Sstevel@tonic-gate } 23170Sstevel@tonic-gate } 23180Sstevel@tonic-gate exit_critical(self); 23190Sstevel@tonic-gate } 23200Sstevel@tonic-gate 23212248Sraf /* 23222248Sraf * For specialized code in libc, like the asynchronous i/o code, 23232248Sraf * the following sig_*() locking primitives are used in order 23242248Sraf * to make the code asynchronous signal safe. Signals are 23252248Sraf * deferred while locks acquired by these functions are held. 23262248Sraf */ 23272248Sraf void 23282248Sraf sig_mutex_lock(mutex_t *mp) 23292248Sraf { 23302248Sraf sigoff(curthread); 23312248Sraf (void) _private_mutex_lock(mp); 23322248Sraf } 23332248Sraf 23342248Sraf void 23352248Sraf sig_mutex_unlock(mutex_t *mp) 23362248Sraf { 23372248Sraf (void) _private_mutex_unlock(mp); 23382248Sraf sigon(curthread); 23392248Sraf } 23402248Sraf 23412248Sraf int 23422248Sraf sig_mutex_trylock(mutex_t *mp) 23432248Sraf { 23442248Sraf int error; 23452248Sraf 23462248Sraf sigoff(curthread); 23472248Sraf if ((error = _private_mutex_trylock(mp)) != 0) 23482248Sraf sigon(curthread); 23492248Sraf return (error); 23502248Sraf } 23512248Sraf 23522248Sraf /* 23532248Sraf * sig_cond_wait() is a cancellation point. 23542248Sraf */ 23552248Sraf int 23562248Sraf sig_cond_wait(cond_t *cv, mutex_t *mp) 23572248Sraf { 23582248Sraf int error; 23592248Sraf 23602248Sraf ASSERT(curthread->ul_sigdefer != 0); 23612248Sraf _private_testcancel(); 23622248Sraf error = _cond_wait(cv, mp); 23632248Sraf if (error == EINTR && curthread->ul_cursig) { 23642248Sraf sig_mutex_unlock(mp); 23652248Sraf /* take the deferred signal here */ 23662248Sraf sig_mutex_lock(mp); 23672248Sraf } 23682248Sraf _private_testcancel(); 23692248Sraf return (error); 23702248Sraf } 23712248Sraf 23722248Sraf /* 23732248Sraf * sig_cond_reltimedwait() is a cancellation point. 23742248Sraf */ 23752248Sraf int 23762248Sraf sig_cond_reltimedwait(cond_t *cv, mutex_t *mp, const timespec_t *ts) 23772248Sraf { 23782248Sraf int error; 23792248Sraf 23802248Sraf ASSERT(curthread->ul_sigdefer != 0); 23812248Sraf _private_testcancel(); 23822248Sraf error = _cond_reltimedwait(cv, mp, ts); 23832248Sraf if (error == EINTR && curthread->ul_cursig) { 23842248Sraf sig_mutex_unlock(mp); 23852248Sraf /* take the deferred signal here */ 23862248Sraf sig_mutex_lock(mp); 23872248Sraf } 23882248Sraf _private_testcancel(); 23892248Sraf return (error); 23902248Sraf } 23912248Sraf 23920Sstevel@tonic-gate static int 23930Sstevel@tonic-gate shared_mutex_held(mutex_t *mparg) 23940Sstevel@tonic-gate { 23950Sstevel@tonic-gate /* 23964574Sraf * The 'volatile' is necessary to make sure the compiler doesn't 23974574Sraf * reorder the tests of the various components of the mutex. 23984574Sraf * They must be tested in this order: 23994574Sraf * mutex_lockw 24004574Sraf * mutex_owner 24014574Sraf * mutex_ownerpid 24024574Sraf * This relies on the fact that everywhere mutex_lockw is cleared, 24034574Sraf * mutex_owner and mutex_ownerpid are cleared before mutex_lockw 24044574Sraf * is cleared, and that everywhere mutex_lockw is set, mutex_owner 24054574Sraf * and mutex_ownerpid are set after mutex_lockw is set, and that 24064574Sraf * mutex_lockw is set or cleared with a memory barrier. 24070Sstevel@tonic-gate */ 24080Sstevel@tonic-gate volatile mutex_t *mp = (volatile mutex_t *)mparg; 24090Sstevel@tonic-gate ulwp_t *self = curthread; 24100Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 24110Sstevel@tonic-gate 24124574Sraf return (MUTEX_OWNED(mp, self) && mp->mutex_ownerpid == udp->pid); 24130Sstevel@tonic-gate } 24140Sstevel@tonic-gate 24150Sstevel@tonic-gate /* 24160Sstevel@tonic-gate * Some crufty old programs define their own version of _mutex_held() 24170Sstevel@tonic-gate * to be simply return(1). This breaks internal libc logic, so we 24180Sstevel@tonic-gate * define a private version for exclusive use by libc, mutex_is_held(), 24190Sstevel@tonic-gate * and also a new public function, __mutex_held(), to be used in new 24200Sstevel@tonic-gate * code to circumvent these crufty old programs. 24210Sstevel@tonic-gate */ 24220Sstevel@tonic-gate #pragma weak mutex_held = mutex_is_held 24230Sstevel@tonic-gate #pragma weak _mutex_held = mutex_is_held 24240Sstevel@tonic-gate #pragma weak __mutex_held = mutex_is_held 24250Sstevel@tonic-gate int 24264574Sraf mutex_is_held(mutex_t *mparg) 24270Sstevel@tonic-gate { 24284574Sraf volatile mutex_t *mp = (volatile mutex_t *)mparg; 24294574Sraf 24304574Sraf if (mparg->mutex_type & USYNC_PROCESS) 24314574Sraf return (shared_mutex_held(mparg)); 24320Sstevel@tonic-gate return (MUTEX_OWNED(mp, curthread)); 24330Sstevel@tonic-gate } 24340Sstevel@tonic-gate 24350Sstevel@tonic-gate #pragma weak _private_mutex_destroy = __mutex_destroy 24360Sstevel@tonic-gate #pragma weak mutex_destroy = __mutex_destroy 24370Sstevel@tonic-gate #pragma weak _mutex_destroy = __mutex_destroy 24380Sstevel@tonic-gate #pragma weak pthread_mutex_destroy = __mutex_destroy 24390Sstevel@tonic-gate #pragma weak _pthread_mutex_destroy = __mutex_destroy 24400Sstevel@tonic-gate int 24410Sstevel@tonic-gate __mutex_destroy(mutex_t *mp) 24420Sstevel@tonic-gate { 24434574Sraf if (mp->mutex_type & USYNC_PROCESS) 24444574Sraf forget_lock(mp); 24454574Sraf (void) _memset(mp, 0, sizeof (*mp)); 24460Sstevel@tonic-gate tdb_sync_obj_deregister(mp); 24470Sstevel@tonic-gate return (0); 24480Sstevel@tonic-gate } 24490Sstevel@tonic-gate 24504574Sraf #pragma weak mutex_consistent = __mutex_consistent 24514574Sraf #pragma weak _mutex_consistent = __mutex_consistent 24524574Sraf #pragma weak pthread_mutex_consistent_np = __mutex_consistent 24534574Sraf #pragma weak _pthread_mutex_consistent_np = __mutex_consistent 24544574Sraf int 24554574Sraf __mutex_consistent(mutex_t *mp) 24564574Sraf { 24574574Sraf /* 24584574Sraf * Do this only for an inconsistent, initialized robust lock 24594574Sraf * that we hold. For all other cases, return EINVAL. 24604574Sraf */ 24614574Sraf if (mutex_is_held(mp) && 24624574Sraf (mp->mutex_type & LOCK_ROBUST) && 24634574Sraf (mp->mutex_flag & LOCK_INITED) && 24644574Sraf (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED))) { 24654574Sraf mp->mutex_flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 24664574Sraf mp->mutex_rcount = 0; 24674574Sraf return (0); 24684574Sraf } 24694574Sraf return (EINVAL); 24704574Sraf } 24714574Sraf 24720Sstevel@tonic-gate /* 24730Sstevel@tonic-gate * Spin locks are separate from ordinary mutexes, 24740Sstevel@tonic-gate * but we use the same data structure for them. 24750Sstevel@tonic-gate */ 24760Sstevel@tonic-gate 24770Sstevel@tonic-gate #pragma weak pthread_spin_init = _pthread_spin_init 24780Sstevel@tonic-gate int 24790Sstevel@tonic-gate _pthread_spin_init(pthread_spinlock_t *lock, int pshared) 24800Sstevel@tonic-gate { 24810Sstevel@tonic-gate mutex_t *mp = (mutex_t *)lock; 24820Sstevel@tonic-gate 24830Sstevel@tonic-gate (void) _memset(mp, 0, sizeof (*mp)); 24840Sstevel@tonic-gate if (pshared == PTHREAD_PROCESS_SHARED) 24850Sstevel@tonic-gate mp->mutex_type = USYNC_PROCESS; 24860Sstevel@tonic-gate else 24870Sstevel@tonic-gate mp->mutex_type = USYNC_THREAD; 24880Sstevel@tonic-gate mp->mutex_flag = LOCK_INITED; 24890Sstevel@tonic-gate mp->mutex_magic = MUTEX_MAGIC; 24900Sstevel@tonic-gate return (0); 24910Sstevel@tonic-gate } 24920Sstevel@tonic-gate 24930Sstevel@tonic-gate #pragma weak pthread_spin_destroy = _pthread_spin_destroy 24940Sstevel@tonic-gate int 24950Sstevel@tonic-gate _pthread_spin_destroy(pthread_spinlock_t *lock) 24960Sstevel@tonic-gate { 24970Sstevel@tonic-gate (void) _memset(lock, 0, sizeof (*lock)); 24980Sstevel@tonic-gate return (0); 24990Sstevel@tonic-gate } 25000Sstevel@tonic-gate 25010Sstevel@tonic-gate #pragma weak pthread_spin_trylock = _pthread_spin_trylock 25020Sstevel@tonic-gate int 25030Sstevel@tonic-gate _pthread_spin_trylock(pthread_spinlock_t *lock) 25040Sstevel@tonic-gate { 25050Sstevel@tonic-gate mutex_t *mp = (mutex_t *)lock; 25060Sstevel@tonic-gate ulwp_t *self = curthread; 25070Sstevel@tonic-gate int error = 0; 25080Sstevel@tonic-gate 25090Sstevel@tonic-gate no_preempt(self); 25100Sstevel@tonic-gate if (set_lock_byte(&mp->mutex_lockw) != 0) 25110Sstevel@tonic-gate error = EBUSY; 25120Sstevel@tonic-gate else { 25130Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 25140Sstevel@tonic-gate if (mp->mutex_type == USYNC_PROCESS) 25150Sstevel@tonic-gate mp->mutex_ownerpid = self->ul_uberdata->pid; 25160Sstevel@tonic-gate DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 25170Sstevel@tonic-gate } 25180Sstevel@tonic-gate preempt(self); 25190Sstevel@tonic-gate return (error); 25200Sstevel@tonic-gate } 25210Sstevel@tonic-gate 25220Sstevel@tonic-gate #pragma weak pthread_spin_lock = _pthread_spin_lock 25230Sstevel@tonic-gate int 25240Sstevel@tonic-gate _pthread_spin_lock(pthread_spinlock_t *lock) 25250Sstevel@tonic-gate { 25264574Sraf mutex_t *mp = (mutex_t *)lock; 25274574Sraf ulwp_t *self = curthread; 25284574Sraf volatile uint8_t *lockp = (volatile uint8_t *)&mp->mutex_lockw; 25294574Sraf int count = 0; 25304574Sraf 25314574Sraf ASSERT(!self->ul_critical || self->ul_bindflags); 25324574Sraf 25334574Sraf DTRACE_PROBE1(plockstat, mutex__spin, mp); 25344574Sraf 25350Sstevel@tonic-gate /* 25360Sstevel@tonic-gate * We don't care whether the owner is running on a processor. 25370Sstevel@tonic-gate * We just spin because that's what this interface requires. 25380Sstevel@tonic-gate */ 25390Sstevel@tonic-gate for (;;) { 25404574Sraf if (count < INT_MAX) 25414574Sraf count++; 25420Sstevel@tonic-gate if (*lockp == 0) { /* lock byte appears to be clear */ 25434574Sraf no_preempt(self); 25444574Sraf if (set_lock_byte(lockp) == 0) 25454574Sraf break; 25464574Sraf preempt(self); 25470Sstevel@tonic-gate } 25480Sstevel@tonic-gate SMT_PAUSE(); 25490Sstevel@tonic-gate } 25504574Sraf mp->mutex_owner = (uintptr_t)self; 25514574Sraf if (mp->mutex_type == USYNC_PROCESS) 25524574Sraf mp->mutex_ownerpid = self->ul_uberdata->pid; 25534574Sraf preempt(self); 25544574Sraf DTRACE_PROBE2(plockstat, mutex__spun, 1, count); 25554574Sraf DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count); 25564574Sraf return (0); 25570Sstevel@tonic-gate } 25580Sstevel@tonic-gate 25590Sstevel@tonic-gate #pragma weak pthread_spin_unlock = _pthread_spin_unlock 25600Sstevel@tonic-gate int 25610Sstevel@tonic-gate _pthread_spin_unlock(pthread_spinlock_t *lock) 25620Sstevel@tonic-gate { 25630Sstevel@tonic-gate mutex_t *mp = (mutex_t *)lock; 25640Sstevel@tonic-gate ulwp_t *self = curthread; 25650Sstevel@tonic-gate 25660Sstevel@tonic-gate no_preempt(self); 25670Sstevel@tonic-gate mp->mutex_owner = 0; 25680Sstevel@tonic-gate mp->mutex_ownerpid = 0; 25690Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 25704570Sraf (void) atomic_swap_32(&mp->mutex_lockword, 0); 25710Sstevel@tonic-gate preempt(self); 25720Sstevel@tonic-gate return (0); 25730Sstevel@tonic-gate } 25740Sstevel@tonic-gate 25754574Sraf #define INITIAL_LOCKS 8 /* initialial size of ul_heldlocks.array */ 25764574Sraf 25774574Sraf /* 25784574Sraf * Find/allocate an entry for 'lock' in our array of held locks. 25794574Sraf */ 25804574Sraf static mutex_t ** 25814574Sraf find_lock_entry(mutex_t *lock) 25824574Sraf { 25834574Sraf ulwp_t *self = curthread; 25844574Sraf mutex_t **remembered = NULL; 25854574Sraf mutex_t **lockptr; 25864574Sraf uint_t nlocks; 25874574Sraf 25884574Sraf if ((nlocks = self->ul_heldlockcnt) != 0) 25894574Sraf lockptr = self->ul_heldlocks.array; 25904574Sraf else { 25914574Sraf nlocks = 1; 25924574Sraf lockptr = &self->ul_heldlocks.single; 25934574Sraf } 25944574Sraf 25954574Sraf for (; nlocks; nlocks--, lockptr++) { 25964574Sraf if (*lockptr == lock) 25974574Sraf return (lockptr); 25984574Sraf if (*lockptr == NULL && remembered == NULL) 25994574Sraf remembered = lockptr; 26004574Sraf } 26014574Sraf if (remembered != NULL) { 26024574Sraf *remembered = lock; 26034574Sraf return (remembered); 26044574Sraf } 26054574Sraf 26064574Sraf /* 26074574Sraf * No entry available. Allocate more space, converting 26084574Sraf * the single entry into an array of entries if necessary. 26094574Sraf */ 26104574Sraf if ((nlocks = self->ul_heldlockcnt) == 0) { 26114574Sraf /* 26124574Sraf * Initial allocation of the array. 26134574Sraf * Convert the single entry into an array. 26144574Sraf */ 26154574Sraf self->ul_heldlockcnt = nlocks = INITIAL_LOCKS; 26164574Sraf lockptr = lmalloc(nlocks * sizeof (mutex_t *)); 26174574Sraf /* 26184574Sraf * The single entry becomes the first entry in the array. 26194574Sraf */ 26204574Sraf *lockptr = self->ul_heldlocks.single; 26214574Sraf self->ul_heldlocks.array = lockptr; 26224574Sraf /* 26234574Sraf * Return the next available entry in the array. 26244574Sraf */ 26254574Sraf *++lockptr = lock; 26264574Sraf return (lockptr); 26274574Sraf } 26284574Sraf /* 26294574Sraf * Reallocate the array, double the size each time. 26304574Sraf */ 26314574Sraf lockptr = lmalloc(nlocks * 2 * sizeof (mutex_t *)); 26324574Sraf (void) _memcpy(lockptr, self->ul_heldlocks.array, 26334574Sraf nlocks * sizeof (mutex_t *)); 26344574Sraf lfree(self->ul_heldlocks.array, nlocks * sizeof (mutex_t *)); 26354574Sraf self->ul_heldlocks.array = lockptr; 26364574Sraf self->ul_heldlockcnt *= 2; 26374574Sraf /* 26384574Sraf * Return the next available entry in the newly allocated array. 26394574Sraf */ 26404574Sraf *(lockptr += nlocks) = lock; 26414574Sraf return (lockptr); 26424574Sraf } 26434574Sraf 26444574Sraf /* 26454574Sraf * Insert 'lock' into our list of held locks. 26464574Sraf * Currently only used for LOCK_ROBUST mutexes. 26474574Sraf */ 26484574Sraf void 26494574Sraf remember_lock(mutex_t *lock) 26504574Sraf { 26514574Sraf (void) find_lock_entry(lock); 26524574Sraf } 26534574Sraf 26544574Sraf /* 26554574Sraf * Remove 'lock' from our list of held locks. 26564574Sraf * Currently only used for LOCK_ROBUST mutexes. 26574574Sraf */ 26584574Sraf void 26594574Sraf forget_lock(mutex_t *lock) 26604574Sraf { 26614574Sraf *find_lock_entry(lock) = NULL; 26624574Sraf } 26634574Sraf 26644574Sraf /* 26654574Sraf * Free the array of held locks. 26664574Sraf */ 26674574Sraf void 26684574Sraf heldlock_free(ulwp_t *ulwp) 26694574Sraf { 26704574Sraf uint_t nlocks; 26714574Sraf 26724574Sraf if ((nlocks = ulwp->ul_heldlockcnt) != 0) 26734574Sraf lfree(ulwp->ul_heldlocks.array, nlocks * sizeof (mutex_t *)); 26744574Sraf ulwp->ul_heldlockcnt = 0; 26754574Sraf ulwp->ul_heldlocks.array = NULL; 26764574Sraf } 26774574Sraf 26784574Sraf /* 26794574Sraf * Mark all held LOCK_ROBUST mutexes LOCK_OWNERDEAD. 26804574Sraf * Called from _thrp_exit() to deal with abandoned locks. 26814574Sraf */ 26824574Sraf void 26834574Sraf heldlock_exit(void) 26844574Sraf { 26854574Sraf ulwp_t *self = curthread; 26864574Sraf mutex_t **lockptr; 26874574Sraf uint_t nlocks; 26884574Sraf mutex_t *mp; 26894574Sraf 26904574Sraf if ((nlocks = self->ul_heldlockcnt) != 0) 26914574Sraf lockptr = self->ul_heldlocks.array; 26924574Sraf else { 26934574Sraf nlocks = 1; 26944574Sraf lockptr = &self->ul_heldlocks.single; 26954574Sraf } 26964574Sraf 26974574Sraf for (; nlocks; nlocks--, lockptr++) { 26984574Sraf /* 26994574Sraf * The kernel takes care of transitioning held 27004574Sraf * LOCK_PRIO_INHERIT mutexes to LOCK_OWNERDEAD. 27014574Sraf * We avoid that case here. 27024574Sraf */ 27034574Sraf if ((mp = *lockptr) != NULL && 27044574Sraf mutex_is_held(mp) && 27054574Sraf (mp->mutex_type & (LOCK_ROBUST | LOCK_PRIO_INHERIT)) == 27064574Sraf LOCK_ROBUST) { 27074574Sraf mp->mutex_rcount = 0; 27084574Sraf if (!(mp->mutex_flag & LOCK_UNMAPPED)) 27094574Sraf mp->mutex_flag |= LOCK_OWNERDEAD; 27104574Sraf (void) mutex_unlock_internal(mp, 1); 27114574Sraf } 27124574Sraf } 27134574Sraf 27144574Sraf heldlock_free(self); 27154574Sraf } 27164574Sraf 27170Sstevel@tonic-gate #pragma weak cond_init = _cond_init 27180Sstevel@tonic-gate /* ARGSUSED2 */ 27190Sstevel@tonic-gate int 27200Sstevel@tonic-gate _cond_init(cond_t *cvp, int type, void *arg) 27210Sstevel@tonic-gate { 27220Sstevel@tonic-gate if (type != USYNC_THREAD && type != USYNC_PROCESS) 27230Sstevel@tonic-gate return (EINVAL); 27240Sstevel@tonic-gate (void) _memset(cvp, 0, sizeof (*cvp)); 27250Sstevel@tonic-gate cvp->cond_type = (uint16_t)type; 27260Sstevel@tonic-gate cvp->cond_magic = COND_MAGIC; 27270Sstevel@tonic-gate return (0); 27280Sstevel@tonic-gate } 27290Sstevel@tonic-gate 27300Sstevel@tonic-gate /* 27310Sstevel@tonic-gate * cond_sleep_queue(): utility function for cond_wait_queue(). 27320Sstevel@tonic-gate * 27330Sstevel@tonic-gate * Go to sleep on a condvar sleep queue, expect to be waked up 27340Sstevel@tonic-gate * by someone calling cond_signal() or cond_broadcast() or due 27350Sstevel@tonic-gate * to receiving a UNIX signal or being cancelled, or just simply 27360Sstevel@tonic-gate * due to a spurious wakeup (like someome calling forkall()). 27370Sstevel@tonic-gate * 27380Sstevel@tonic-gate * The associated mutex is *not* reacquired before returning. 27390Sstevel@tonic-gate * That must be done by the caller of cond_sleep_queue(). 27400Sstevel@tonic-gate */ 27414574Sraf static int 27420Sstevel@tonic-gate cond_sleep_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 27430Sstevel@tonic-gate { 27440Sstevel@tonic-gate ulwp_t *self = curthread; 27450Sstevel@tonic-gate queue_head_t *qp; 27460Sstevel@tonic-gate queue_head_t *mqp; 27470Sstevel@tonic-gate lwpid_t lwpid; 27480Sstevel@tonic-gate int signalled; 27490Sstevel@tonic-gate int error; 27504574Sraf int release_all; 27510Sstevel@tonic-gate 27520Sstevel@tonic-gate /* 27530Sstevel@tonic-gate * Put ourself on the CV sleep queue, unlock the mutex, then 27540Sstevel@tonic-gate * park ourself and unpark a candidate lwp to grab the mutex. 27550Sstevel@tonic-gate * We must go onto the CV sleep queue before dropping the 27560Sstevel@tonic-gate * mutex in order to guarantee atomicity of the operation. 27570Sstevel@tonic-gate */ 27580Sstevel@tonic-gate self->ul_sp = stkptr(); 27590Sstevel@tonic-gate qp = queue_lock(cvp, CV); 27600Sstevel@tonic-gate enqueue(qp, self, cvp, CV); 27610Sstevel@tonic-gate cvp->cond_waiters_user = 1; 27620Sstevel@tonic-gate self->ul_cvmutex = mp; 27630Sstevel@tonic-gate self->ul_cv_wake = (tsp != NULL); 27640Sstevel@tonic-gate self->ul_signalled = 0; 27654574Sraf if (mp->mutex_flag & LOCK_OWNERDEAD) { 27664574Sraf mp->mutex_flag &= ~LOCK_OWNERDEAD; 27674574Sraf mp->mutex_flag |= LOCK_NOTRECOVERABLE; 27684574Sraf } 27694574Sraf release_all = ((mp->mutex_flag & LOCK_NOTRECOVERABLE) != 0); 27704574Sraf lwpid = mutex_unlock_queue(mp, release_all); 27710Sstevel@tonic-gate for (;;) { 27720Sstevel@tonic-gate set_parking_flag(self, 1); 27730Sstevel@tonic-gate queue_unlock(qp); 27740Sstevel@tonic-gate if (lwpid != 0) { 27750Sstevel@tonic-gate lwpid = preempt_unpark(self, lwpid); 27760Sstevel@tonic-gate preempt(self); 27770Sstevel@tonic-gate } 27780Sstevel@tonic-gate /* 27790Sstevel@tonic-gate * We may have a deferred signal present, 27800Sstevel@tonic-gate * in which case we should return EINTR. 27810Sstevel@tonic-gate * Also, we may have received a SIGCANCEL; if so 27820Sstevel@tonic-gate * and we are cancelable we should return EINTR. 27830Sstevel@tonic-gate * We force an immediate EINTR return from 27840Sstevel@tonic-gate * __lwp_park() by turning our parking flag off. 27850Sstevel@tonic-gate */ 27860Sstevel@tonic-gate if (self->ul_cursig != 0 || 27870Sstevel@tonic-gate (self->ul_cancelable && self->ul_cancel_pending)) 27880Sstevel@tonic-gate set_parking_flag(self, 0); 27890Sstevel@tonic-gate /* 27900Sstevel@tonic-gate * __lwp_park() will return the residual time in tsp 27910Sstevel@tonic-gate * if we are unparked before the timeout expires. 27920Sstevel@tonic-gate */ 27930Sstevel@tonic-gate error = __lwp_park(tsp, lwpid); 27940Sstevel@tonic-gate set_parking_flag(self, 0); 27950Sstevel@tonic-gate lwpid = 0; /* unpark the other lwp only once */ 27960Sstevel@tonic-gate /* 27970Sstevel@tonic-gate * We were waked up by cond_signal(), cond_broadcast(), 27980Sstevel@tonic-gate * by an interrupt or timeout (EINTR or ETIME), 27990Sstevel@tonic-gate * or we may just have gotten a spurious wakeup. 28000Sstevel@tonic-gate */ 28010Sstevel@tonic-gate qp = queue_lock(cvp, CV); 28020Sstevel@tonic-gate mqp = queue_lock(mp, MX); 28030Sstevel@tonic-gate if (self->ul_sleepq == NULL) 28040Sstevel@tonic-gate break; 28050Sstevel@tonic-gate /* 28060Sstevel@tonic-gate * We are on either the condvar sleep queue or the 28071893Sraf * mutex sleep queue. Break out of the sleep if we 28081893Sraf * were interrupted or we timed out (EINTR or ETIME). 28090Sstevel@tonic-gate * Else this is a spurious wakeup; continue the loop. 28100Sstevel@tonic-gate */ 28111893Sraf if (self->ul_sleepq == mqp) { /* mutex queue */ 28121893Sraf if (error) { 28131893Sraf mp->mutex_waiters = dequeue_self(mqp, mp); 28141893Sraf break; 28151893Sraf } 28161893Sraf tsp = NULL; /* no more timeout */ 28171893Sraf } else if (self->ul_sleepq == qp) { /* condvar queue */ 28180Sstevel@tonic-gate if (error) { 28190Sstevel@tonic-gate cvp->cond_waiters_user = dequeue_self(qp, cvp); 28200Sstevel@tonic-gate break; 28210Sstevel@tonic-gate } 28220Sstevel@tonic-gate /* 28230Sstevel@tonic-gate * Else a spurious wakeup on the condvar queue. 28240Sstevel@tonic-gate * __lwp_park() has already adjusted the timeout. 28250Sstevel@tonic-gate */ 28260Sstevel@tonic-gate } else { 28270Sstevel@tonic-gate thr_panic("cond_sleep_queue(): thread not on queue"); 28280Sstevel@tonic-gate } 28290Sstevel@tonic-gate queue_unlock(mqp); 28300Sstevel@tonic-gate } 28310Sstevel@tonic-gate 28320Sstevel@tonic-gate self->ul_sp = 0; 28330Sstevel@tonic-gate ASSERT(self->ul_cvmutex == NULL && self->ul_cv_wake == 0); 28340Sstevel@tonic-gate ASSERT(self->ul_sleepq == NULL && self->ul_link == NULL && 28350Sstevel@tonic-gate self->ul_wchan == NULL); 28360Sstevel@tonic-gate 28370Sstevel@tonic-gate signalled = self->ul_signalled; 28380Sstevel@tonic-gate self->ul_signalled = 0; 28390Sstevel@tonic-gate queue_unlock(qp); 28400Sstevel@tonic-gate queue_unlock(mqp); 28410Sstevel@tonic-gate 28420Sstevel@tonic-gate /* 28430Sstevel@tonic-gate * If we were concurrently cond_signal()d and any of: 28440Sstevel@tonic-gate * received a UNIX signal, were cancelled, or got a timeout, 28450Sstevel@tonic-gate * then perform another cond_signal() to avoid consuming it. 28460Sstevel@tonic-gate */ 28470Sstevel@tonic-gate if (error && signalled) 28480Sstevel@tonic-gate (void) cond_signal_internal(cvp); 28490Sstevel@tonic-gate 28500Sstevel@tonic-gate return (error); 28510Sstevel@tonic-gate } 28520Sstevel@tonic-gate 28530Sstevel@tonic-gate int 28540Sstevel@tonic-gate cond_wait_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp, 28550Sstevel@tonic-gate tdb_mutex_stats_t *msp) 28560Sstevel@tonic-gate { 28570Sstevel@tonic-gate ulwp_t *self = curthread; 28580Sstevel@tonic-gate int error; 28594574Sraf int merror; 28600Sstevel@tonic-gate 28610Sstevel@tonic-gate /* 28620Sstevel@tonic-gate * The old thread library was programmed to defer signals 28630Sstevel@tonic-gate * while in cond_wait() so that the associated mutex would 28640Sstevel@tonic-gate * be guaranteed to be held when the application signal 28650Sstevel@tonic-gate * handler was invoked. 28660Sstevel@tonic-gate * 28670Sstevel@tonic-gate * We do not behave this way by default; the state of the 28680Sstevel@tonic-gate * associated mutex in the signal handler is undefined. 28690Sstevel@tonic-gate * 28700Sstevel@tonic-gate * To accommodate applications that depend on the old 28710Sstevel@tonic-gate * behavior, the _THREAD_COND_WAIT_DEFER environment 28720Sstevel@tonic-gate * variable can be set to 1 and we will behave in the 28730Sstevel@tonic-gate * old way with respect to cond_wait(). 28740Sstevel@tonic-gate */ 28750Sstevel@tonic-gate if (self->ul_cond_wait_defer) 28760Sstevel@tonic-gate sigoff(self); 28770Sstevel@tonic-gate 28780Sstevel@tonic-gate error = cond_sleep_queue(cvp, mp, tsp); 28790Sstevel@tonic-gate 28800Sstevel@tonic-gate /* 28810Sstevel@tonic-gate * Reacquire the mutex. 28820Sstevel@tonic-gate */ 2883*4613Sraf if ((merror = mutex_trylock_adaptive(mp, 1)) == EBUSY) 28844574Sraf merror = mutex_lock_queue(self, msp, mp, NULL); 28854574Sraf if (merror) 28864574Sraf error = merror; 28874574Sraf if (msp && (merror == 0 || merror == EOWNERDEAD)) 28880Sstevel@tonic-gate record_begin_hold(msp); 28890Sstevel@tonic-gate 28900Sstevel@tonic-gate /* 28910Sstevel@tonic-gate * Take any deferred signal now, after we have reacquired the mutex. 28920Sstevel@tonic-gate */ 28930Sstevel@tonic-gate if (self->ul_cond_wait_defer) 28940Sstevel@tonic-gate sigon(self); 28950Sstevel@tonic-gate 28960Sstevel@tonic-gate return (error); 28970Sstevel@tonic-gate } 28980Sstevel@tonic-gate 28990Sstevel@tonic-gate /* 29000Sstevel@tonic-gate * cond_sleep_kernel(): utility function for cond_wait_kernel(). 29010Sstevel@tonic-gate * See the comment ahead of cond_sleep_queue(), above. 29020Sstevel@tonic-gate */ 29034574Sraf static int 29040Sstevel@tonic-gate cond_sleep_kernel(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 29050Sstevel@tonic-gate { 29060Sstevel@tonic-gate int mtype = mp->mutex_type; 29070Sstevel@tonic-gate ulwp_t *self = curthread; 29080Sstevel@tonic-gate int error; 29090Sstevel@tonic-gate 29104574Sraf if ((mtype & LOCK_PRIO_PROTECT) && _ceil_mylist_del(mp)) 29114574Sraf _ceil_prio_waive(); 29120Sstevel@tonic-gate 29130Sstevel@tonic-gate self->ul_sp = stkptr(); 29140Sstevel@tonic-gate self->ul_wchan = cvp; 29150Sstevel@tonic-gate mp->mutex_owner = 0; 29160Sstevel@tonic-gate mp->mutex_ownerpid = 0; 29174574Sraf if (mtype & LOCK_PRIO_INHERIT) 29180Sstevel@tonic-gate mp->mutex_lockw = LOCKCLEAR; 29190Sstevel@tonic-gate /* 29200Sstevel@tonic-gate * ___lwp_cond_wait() returns immediately with EINTR if 29210Sstevel@tonic-gate * set_parking_flag(self,0) is called on this lwp before it 29220Sstevel@tonic-gate * goes to sleep in the kernel. sigacthandler() calls this 29230Sstevel@tonic-gate * when a deferred signal is noted. This assures that we don't 29240Sstevel@tonic-gate * get stuck in ___lwp_cond_wait() with all signals blocked 29250Sstevel@tonic-gate * due to taking a deferred signal before going to sleep. 29260Sstevel@tonic-gate */ 29270Sstevel@tonic-gate set_parking_flag(self, 1); 29280Sstevel@tonic-gate if (self->ul_cursig != 0 || 29290Sstevel@tonic-gate (self->ul_cancelable && self->ul_cancel_pending)) 29300Sstevel@tonic-gate set_parking_flag(self, 0); 29310Sstevel@tonic-gate error = ___lwp_cond_wait(cvp, mp, tsp, 1); 29320Sstevel@tonic-gate set_parking_flag(self, 0); 29330Sstevel@tonic-gate self->ul_sp = 0; 29340Sstevel@tonic-gate self->ul_wchan = NULL; 29350Sstevel@tonic-gate return (error); 29360Sstevel@tonic-gate } 29370Sstevel@tonic-gate 29380Sstevel@tonic-gate int 29390Sstevel@tonic-gate cond_wait_kernel(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 29400Sstevel@tonic-gate { 29410Sstevel@tonic-gate ulwp_t *self = curthread; 29420Sstevel@tonic-gate int error; 29430Sstevel@tonic-gate int merror; 29440Sstevel@tonic-gate 29450Sstevel@tonic-gate /* 29460Sstevel@tonic-gate * See the large comment in cond_wait_queue(), above. 29470Sstevel@tonic-gate */ 29480Sstevel@tonic-gate if (self->ul_cond_wait_defer) 29490Sstevel@tonic-gate sigoff(self); 29500Sstevel@tonic-gate 29510Sstevel@tonic-gate error = cond_sleep_kernel(cvp, mp, tsp); 29520Sstevel@tonic-gate 29530Sstevel@tonic-gate /* 29540Sstevel@tonic-gate * Override the return code from ___lwp_cond_wait() 29550Sstevel@tonic-gate * with any non-zero return code from mutex_lock(). 29560Sstevel@tonic-gate * This addresses robust lock failures in particular; 29570Sstevel@tonic-gate * the caller must see the EOWNERDEAD or ENOTRECOVERABLE 29580Sstevel@tonic-gate * errors in order to take corrective action. 29590Sstevel@tonic-gate */ 29600Sstevel@tonic-gate if ((merror = _private_mutex_lock(mp)) != 0) 29610Sstevel@tonic-gate error = merror; 29620Sstevel@tonic-gate 29630Sstevel@tonic-gate /* 29640Sstevel@tonic-gate * Take any deferred signal now, after we have reacquired the mutex. 29650Sstevel@tonic-gate */ 29660Sstevel@tonic-gate if (self->ul_cond_wait_defer) 29670Sstevel@tonic-gate sigon(self); 29680Sstevel@tonic-gate 29690Sstevel@tonic-gate return (error); 29700Sstevel@tonic-gate } 29710Sstevel@tonic-gate 29720Sstevel@tonic-gate /* 29730Sstevel@tonic-gate * Common code for _cond_wait() and _cond_timedwait() 29740Sstevel@tonic-gate */ 29750Sstevel@tonic-gate int 29760Sstevel@tonic-gate cond_wait_common(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 29770Sstevel@tonic-gate { 29780Sstevel@tonic-gate int mtype = mp->mutex_type; 29790Sstevel@tonic-gate hrtime_t begin_sleep = 0; 29800Sstevel@tonic-gate ulwp_t *self = curthread; 29810Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 29820Sstevel@tonic-gate tdb_cond_stats_t *csp = COND_STATS(cvp, udp); 29830Sstevel@tonic-gate tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 29840Sstevel@tonic-gate uint8_t rcount; 29850Sstevel@tonic-gate int error = 0; 29860Sstevel@tonic-gate 29870Sstevel@tonic-gate /* 29880Sstevel@tonic-gate * The SUSV3 Posix spec for pthread_cond_timedwait() states: 29890Sstevel@tonic-gate * Except in the case of [ETIMEDOUT], all these error checks 29900Sstevel@tonic-gate * shall act as if they were performed immediately at the 29910Sstevel@tonic-gate * beginning of processing for the function and shall cause 29920Sstevel@tonic-gate * an error return, in effect, prior to modifying the state 29930Sstevel@tonic-gate * of the mutex specified by mutex or the condition variable 29940Sstevel@tonic-gate * specified by cond. 29950Sstevel@tonic-gate * Therefore, we must return EINVAL now if the timout is invalid. 29960Sstevel@tonic-gate */ 29970Sstevel@tonic-gate if (tsp != NULL && 29980Sstevel@tonic-gate (tsp->tv_sec < 0 || (ulong_t)tsp->tv_nsec >= NANOSEC)) 29990Sstevel@tonic-gate return (EINVAL); 30000Sstevel@tonic-gate 30010Sstevel@tonic-gate if (__td_event_report(self, TD_SLEEP, udp)) { 30020Sstevel@tonic-gate self->ul_sp = stkptr(); 30030Sstevel@tonic-gate self->ul_wchan = cvp; 30040Sstevel@tonic-gate self->ul_td_evbuf.eventnum = TD_SLEEP; 30050Sstevel@tonic-gate self->ul_td_evbuf.eventdata = cvp; 30060Sstevel@tonic-gate tdb_event(TD_SLEEP, udp); 30070Sstevel@tonic-gate self->ul_sp = 0; 30080Sstevel@tonic-gate } 30090Sstevel@tonic-gate if (csp) { 30100Sstevel@tonic-gate if (tsp) 30110Sstevel@tonic-gate tdb_incr(csp->cond_timedwait); 30120Sstevel@tonic-gate else 30130Sstevel@tonic-gate tdb_incr(csp->cond_wait); 30140Sstevel@tonic-gate } 30150Sstevel@tonic-gate if (msp) 30160Sstevel@tonic-gate begin_sleep = record_hold_time(msp); 30170Sstevel@tonic-gate else if (csp) 30180Sstevel@tonic-gate begin_sleep = gethrtime(); 30190Sstevel@tonic-gate 30200Sstevel@tonic-gate if (self->ul_error_detection) { 30210Sstevel@tonic-gate if (!mutex_is_held(mp)) 30220Sstevel@tonic-gate lock_error(mp, "cond_wait", cvp, NULL); 30230Sstevel@tonic-gate if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) 30240Sstevel@tonic-gate lock_error(mp, "recursive mutex in cond_wait", 30250Sstevel@tonic-gate cvp, NULL); 30260Sstevel@tonic-gate if (cvp->cond_type & USYNC_PROCESS) { 30274574Sraf if (!(mtype & USYNC_PROCESS)) 30280Sstevel@tonic-gate lock_error(mp, "cond_wait", cvp, 30290Sstevel@tonic-gate "condvar process-shared, " 30300Sstevel@tonic-gate "mutex process-private"); 30310Sstevel@tonic-gate } else { 30324574Sraf if (mtype & USYNC_PROCESS) 30330Sstevel@tonic-gate lock_error(mp, "cond_wait", cvp, 30340Sstevel@tonic-gate "condvar process-private, " 30350Sstevel@tonic-gate "mutex process-shared"); 30360Sstevel@tonic-gate } 30370Sstevel@tonic-gate } 30380Sstevel@tonic-gate 30390Sstevel@tonic-gate /* 30400Sstevel@tonic-gate * We deal with recursive mutexes by completely 30410Sstevel@tonic-gate * dropping the lock and restoring the recursion 30420Sstevel@tonic-gate * count after waking up. This is arguably wrong, 30430Sstevel@tonic-gate * but it obeys the principle of least astonishment. 30440Sstevel@tonic-gate */ 30450Sstevel@tonic-gate rcount = mp->mutex_rcount; 30460Sstevel@tonic-gate mp->mutex_rcount = 0; 30474574Sraf if ((mtype & 30484574Sraf (USYNC_PROCESS | LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT)) | 30490Sstevel@tonic-gate (cvp->cond_type & USYNC_PROCESS)) 30500Sstevel@tonic-gate error = cond_wait_kernel(cvp, mp, tsp); 30510Sstevel@tonic-gate else 30520Sstevel@tonic-gate error = cond_wait_queue(cvp, mp, tsp, msp); 30530Sstevel@tonic-gate mp->mutex_rcount = rcount; 30540Sstevel@tonic-gate 30550Sstevel@tonic-gate if (csp) { 30560Sstevel@tonic-gate hrtime_t lapse = gethrtime() - begin_sleep; 30570Sstevel@tonic-gate if (tsp == NULL) 30580Sstevel@tonic-gate csp->cond_wait_sleep_time += lapse; 30590Sstevel@tonic-gate else { 30600Sstevel@tonic-gate csp->cond_timedwait_sleep_time += lapse; 30610Sstevel@tonic-gate if (error == ETIME) 30620Sstevel@tonic-gate tdb_incr(csp->cond_timedwait_timeout); 30630Sstevel@tonic-gate } 30640Sstevel@tonic-gate } 30650Sstevel@tonic-gate return (error); 30660Sstevel@tonic-gate } 30670Sstevel@tonic-gate 30680Sstevel@tonic-gate /* 30690Sstevel@tonic-gate * cond_wait() is a cancellation point but _cond_wait() is not. 30700Sstevel@tonic-gate * System libraries call the non-cancellation version. 30710Sstevel@tonic-gate * It is expected that only applications call the cancellation version. 30720Sstevel@tonic-gate */ 30730Sstevel@tonic-gate int 30740Sstevel@tonic-gate _cond_wait(cond_t *cvp, mutex_t *mp) 30750Sstevel@tonic-gate { 30760Sstevel@tonic-gate ulwp_t *self = curthread; 30770Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 30780Sstevel@tonic-gate uberflags_t *gflags; 30790Sstevel@tonic-gate 30800Sstevel@tonic-gate /* 30810Sstevel@tonic-gate * Optimize the common case of USYNC_THREAD plus 30820Sstevel@tonic-gate * no error detection, no lock statistics, and no event tracing. 30830Sstevel@tonic-gate */ 30840Sstevel@tonic-gate if ((gflags = self->ul_schedctl_called) != NULL && 30850Sstevel@tonic-gate (cvp->cond_type | mp->mutex_type | gflags->uf_trs_ted | 30860Sstevel@tonic-gate self->ul_td_events_enable | 30870Sstevel@tonic-gate udp->tdb.tdb_ev_global_mask.event_bits[0]) == 0) 30880Sstevel@tonic-gate return (cond_wait_queue(cvp, mp, NULL, NULL)); 30890Sstevel@tonic-gate 30900Sstevel@tonic-gate /* 30910Sstevel@tonic-gate * Else do it the long way. 30920Sstevel@tonic-gate */ 30930Sstevel@tonic-gate return (cond_wait_common(cvp, mp, NULL)); 30940Sstevel@tonic-gate } 30950Sstevel@tonic-gate 30960Sstevel@tonic-gate int 30970Sstevel@tonic-gate cond_wait(cond_t *cvp, mutex_t *mp) 30980Sstevel@tonic-gate { 30990Sstevel@tonic-gate int error; 31000Sstevel@tonic-gate 31010Sstevel@tonic-gate _cancelon(); 31020Sstevel@tonic-gate error = _cond_wait(cvp, mp); 31030Sstevel@tonic-gate if (error == EINTR) 31040Sstevel@tonic-gate _canceloff(); 31050Sstevel@tonic-gate else 31060Sstevel@tonic-gate _canceloff_nocancel(); 31070Sstevel@tonic-gate return (error); 31080Sstevel@tonic-gate } 31090Sstevel@tonic-gate 31100Sstevel@tonic-gate #pragma weak pthread_cond_wait = _pthread_cond_wait 31110Sstevel@tonic-gate int 31120Sstevel@tonic-gate _pthread_cond_wait(cond_t *cvp, mutex_t *mp) 31130Sstevel@tonic-gate { 31140Sstevel@tonic-gate int error; 31150Sstevel@tonic-gate 31160Sstevel@tonic-gate error = cond_wait(cvp, mp); 31170Sstevel@tonic-gate return ((error == EINTR)? 0 : error); 31180Sstevel@tonic-gate } 31190Sstevel@tonic-gate 31200Sstevel@tonic-gate /* 31210Sstevel@tonic-gate * cond_timedwait() is a cancellation point but _cond_timedwait() is not. 31220Sstevel@tonic-gate * System libraries call the non-cancellation version. 31230Sstevel@tonic-gate * It is expected that only applications call the cancellation version. 31240Sstevel@tonic-gate */ 31250Sstevel@tonic-gate int 31260Sstevel@tonic-gate _cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime) 31270Sstevel@tonic-gate { 31280Sstevel@tonic-gate clockid_t clock_id = cvp->cond_clockid; 31290Sstevel@tonic-gate timespec_t reltime; 31300Sstevel@tonic-gate int error; 31310Sstevel@tonic-gate 31320Sstevel@tonic-gate if (clock_id != CLOCK_REALTIME && clock_id != CLOCK_HIGHRES) 31330Sstevel@tonic-gate clock_id = CLOCK_REALTIME; 31340Sstevel@tonic-gate abstime_to_reltime(clock_id, abstime, &reltime); 31350Sstevel@tonic-gate error = cond_wait_common(cvp, mp, &reltime); 31360Sstevel@tonic-gate if (error == ETIME && clock_id == CLOCK_HIGHRES) { 31370Sstevel@tonic-gate /* 31380Sstevel@tonic-gate * Don't return ETIME if we didn't really get a timeout. 31390Sstevel@tonic-gate * This can happen if we return because someone resets 31400Sstevel@tonic-gate * the system clock. Just return zero in this case, 31410Sstevel@tonic-gate * giving a spurious wakeup but not a timeout. 31420Sstevel@tonic-gate */ 31430Sstevel@tonic-gate if ((hrtime_t)(uint32_t)abstime->tv_sec * NANOSEC + 31440Sstevel@tonic-gate abstime->tv_nsec > gethrtime()) 31450Sstevel@tonic-gate error = 0; 31460Sstevel@tonic-gate } 31470Sstevel@tonic-gate return (error); 31480Sstevel@tonic-gate } 31490Sstevel@tonic-gate 31500Sstevel@tonic-gate int 31510Sstevel@tonic-gate cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime) 31520Sstevel@tonic-gate { 31530Sstevel@tonic-gate int error; 31540Sstevel@tonic-gate 31550Sstevel@tonic-gate _cancelon(); 31560Sstevel@tonic-gate error = _cond_timedwait(cvp, mp, abstime); 31570Sstevel@tonic-gate if (error == EINTR) 31580Sstevel@tonic-gate _canceloff(); 31590Sstevel@tonic-gate else 31600Sstevel@tonic-gate _canceloff_nocancel(); 31610Sstevel@tonic-gate return (error); 31620Sstevel@tonic-gate } 31630Sstevel@tonic-gate 31640Sstevel@tonic-gate #pragma weak pthread_cond_timedwait = _pthread_cond_timedwait 31650Sstevel@tonic-gate int 31660Sstevel@tonic-gate _pthread_cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime) 31670Sstevel@tonic-gate { 31680Sstevel@tonic-gate int error; 31690Sstevel@tonic-gate 31700Sstevel@tonic-gate error = cond_timedwait(cvp, mp, abstime); 31710Sstevel@tonic-gate if (error == ETIME) 31720Sstevel@tonic-gate error = ETIMEDOUT; 31730Sstevel@tonic-gate else if (error == EINTR) 31740Sstevel@tonic-gate error = 0; 31750Sstevel@tonic-gate return (error); 31760Sstevel@tonic-gate } 31770Sstevel@tonic-gate 31780Sstevel@tonic-gate /* 31790Sstevel@tonic-gate * cond_reltimedwait() is a cancellation point but _cond_reltimedwait() 31800Sstevel@tonic-gate * is not. System libraries call the non-cancellation version. 31810Sstevel@tonic-gate * It is expected that only applications call the cancellation version. 31820Sstevel@tonic-gate */ 31830Sstevel@tonic-gate int 31840Sstevel@tonic-gate _cond_reltimedwait(cond_t *cvp, mutex_t *mp, const timespec_t *reltime) 31850Sstevel@tonic-gate { 31860Sstevel@tonic-gate timespec_t tslocal = *reltime; 31870Sstevel@tonic-gate 31880Sstevel@tonic-gate return (cond_wait_common(cvp, mp, &tslocal)); 31890Sstevel@tonic-gate } 31900Sstevel@tonic-gate 31910Sstevel@tonic-gate #pragma weak cond_reltimedwait = _cond_reltimedwait_cancel 31920Sstevel@tonic-gate int 31930Sstevel@tonic-gate _cond_reltimedwait_cancel(cond_t *cvp, mutex_t *mp, const timespec_t *reltime) 31940Sstevel@tonic-gate { 31950Sstevel@tonic-gate int error; 31960Sstevel@tonic-gate 31970Sstevel@tonic-gate _cancelon(); 31980Sstevel@tonic-gate error = _cond_reltimedwait(cvp, mp, reltime); 31990Sstevel@tonic-gate if (error == EINTR) 32000Sstevel@tonic-gate _canceloff(); 32010Sstevel@tonic-gate else 32020Sstevel@tonic-gate _canceloff_nocancel(); 32030Sstevel@tonic-gate return (error); 32040Sstevel@tonic-gate } 32050Sstevel@tonic-gate 32060Sstevel@tonic-gate #pragma weak pthread_cond_reltimedwait_np = _pthread_cond_reltimedwait_np 32070Sstevel@tonic-gate int 32080Sstevel@tonic-gate _pthread_cond_reltimedwait_np(cond_t *cvp, mutex_t *mp, 32090Sstevel@tonic-gate const timespec_t *reltime) 32100Sstevel@tonic-gate { 32110Sstevel@tonic-gate int error; 32120Sstevel@tonic-gate 32130Sstevel@tonic-gate error = _cond_reltimedwait_cancel(cvp, mp, reltime); 32140Sstevel@tonic-gate if (error == ETIME) 32150Sstevel@tonic-gate error = ETIMEDOUT; 32160Sstevel@tonic-gate else if (error == EINTR) 32170Sstevel@tonic-gate error = 0; 32180Sstevel@tonic-gate return (error); 32190Sstevel@tonic-gate } 32200Sstevel@tonic-gate 32210Sstevel@tonic-gate #pragma weak pthread_cond_signal = cond_signal_internal 32220Sstevel@tonic-gate #pragma weak _pthread_cond_signal = cond_signal_internal 32230Sstevel@tonic-gate #pragma weak cond_signal = cond_signal_internal 32240Sstevel@tonic-gate #pragma weak _cond_signal = cond_signal_internal 32250Sstevel@tonic-gate int 32260Sstevel@tonic-gate cond_signal_internal(cond_t *cvp) 32270Sstevel@tonic-gate { 32280Sstevel@tonic-gate ulwp_t *self = curthread; 32290Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 32300Sstevel@tonic-gate tdb_cond_stats_t *csp = COND_STATS(cvp, udp); 32310Sstevel@tonic-gate int error = 0; 32320Sstevel@tonic-gate queue_head_t *qp; 32330Sstevel@tonic-gate mutex_t *mp; 32340Sstevel@tonic-gate queue_head_t *mqp; 32350Sstevel@tonic-gate ulwp_t **ulwpp; 32360Sstevel@tonic-gate ulwp_t *ulwp; 32370Sstevel@tonic-gate ulwp_t *prev = NULL; 32380Sstevel@tonic-gate ulwp_t *next; 32390Sstevel@tonic-gate ulwp_t **suspp = NULL; 32400Sstevel@tonic-gate ulwp_t *susprev; 32410Sstevel@tonic-gate 32420Sstevel@tonic-gate if (csp) 32430Sstevel@tonic-gate tdb_incr(csp->cond_signal); 32440Sstevel@tonic-gate 32450Sstevel@tonic-gate if (cvp->cond_waiters_kernel) /* someone sleeping in the kernel? */ 32460Sstevel@tonic-gate error = __lwp_cond_signal(cvp); 32470Sstevel@tonic-gate 32480Sstevel@tonic-gate if (!cvp->cond_waiters_user) /* no one sleeping at user-level */ 32490Sstevel@tonic-gate return (error); 32500Sstevel@tonic-gate 32510Sstevel@tonic-gate /* 32520Sstevel@tonic-gate * Move someone from the condvar sleep queue to the mutex sleep 32530Sstevel@tonic-gate * queue for the mutex that he will acquire on being waked up. 32540Sstevel@tonic-gate * We can do this only if we own the mutex he will acquire. 32550Sstevel@tonic-gate * If we do not own the mutex, or if his ul_cv_wake flag 32560Sstevel@tonic-gate * is set, just dequeue and unpark him. 32570Sstevel@tonic-gate */ 32580Sstevel@tonic-gate qp = queue_lock(cvp, CV); 32590Sstevel@tonic-gate for (ulwpp = &qp->qh_head; (ulwp = *ulwpp) != NULL; 32600Sstevel@tonic-gate prev = ulwp, ulwpp = &ulwp->ul_link) { 32610Sstevel@tonic-gate if (ulwp->ul_wchan == cvp) { 32620Sstevel@tonic-gate if (!ulwp->ul_stop) 32630Sstevel@tonic-gate break; 32640Sstevel@tonic-gate /* 32650Sstevel@tonic-gate * Try not to dequeue a suspended thread. 32660Sstevel@tonic-gate * This mimics the old libthread's behavior. 32670Sstevel@tonic-gate */ 32680Sstevel@tonic-gate if (suspp == NULL) { 32690Sstevel@tonic-gate suspp = ulwpp; 32700Sstevel@tonic-gate susprev = prev; 32710Sstevel@tonic-gate } 32720Sstevel@tonic-gate } 32730Sstevel@tonic-gate } 32740Sstevel@tonic-gate if (ulwp == NULL && suspp != NULL) { 32750Sstevel@tonic-gate ulwp = *(ulwpp = suspp); 32760Sstevel@tonic-gate prev = susprev; 32770Sstevel@tonic-gate suspp = NULL; 32780Sstevel@tonic-gate } 32790Sstevel@tonic-gate if (ulwp == NULL) { /* no one on the sleep queue */ 32800Sstevel@tonic-gate cvp->cond_waiters_user = 0; 32810Sstevel@tonic-gate queue_unlock(qp); 32820Sstevel@tonic-gate return (error); 32830Sstevel@tonic-gate } 32840Sstevel@tonic-gate /* 32850Sstevel@tonic-gate * Scan the remainder of the CV queue for another waiter. 32860Sstevel@tonic-gate */ 32870Sstevel@tonic-gate if (suspp != NULL) { 32880Sstevel@tonic-gate next = *suspp; 32890Sstevel@tonic-gate } else { 32900Sstevel@tonic-gate for (next = ulwp->ul_link; next != NULL; next = next->ul_link) 32910Sstevel@tonic-gate if (next->ul_wchan == cvp) 32920Sstevel@tonic-gate break; 32930Sstevel@tonic-gate } 32940Sstevel@tonic-gate if (next == NULL) 32950Sstevel@tonic-gate cvp->cond_waiters_user = 0; 32960Sstevel@tonic-gate 32970Sstevel@tonic-gate /* 32980Sstevel@tonic-gate * Inform the thread that he was the recipient of a cond_signal(). 32990Sstevel@tonic-gate * This lets him deal with cond_signal() and, concurrently, 33000Sstevel@tonic-gate * one or more of a cancellation, a UNIX signal, or a timeout. 33010Sstevel@tonic-gate * These latter conditions must not consume a cond_signal(). 33020Sstevel@tonic-gate */ 33030Sstevel@tonic-gate ulwp->ul_signalled = 1; 33040Sstevel@tonic-gate 33050Sstevel@tonic-gate /* 33060Sstevel@tonic-gate * Dequeue the waiter but leave his ul_sleepq non-NULL 33070Sstevel@tonic-gate * while we move him to the mutex queue so that he can 33080Sstevel@tonic-gate * deal properly with spurious wakeups. 33090Sstevel@tonic-gate */ 33100Sstevel@tonic-gate *ulwpp = ulwp->ul_link; 33114574Sraf ulwp->ul_link = NULL; 33120Sstevel@tonic-gate if (qp->qh_tail == ulwp) 33130Sstevel@tonic-gate qp->qh_tail = prev; 33140Sstevel@tonic-gate qp->qh_qlen--; 33150Sstevel@tonic-gate 33160Sstevel@tonic-gate mp = ulwp->ul_cvmutex; /* the mutex he will acquire */ 33170Sstevel@tonic-gate ulwp->ul_cvmutex = NULL; 33180Sstevel@tonic-gate ASSERT(mp != NULL); 33190Sstevel@tonic-gate 33200Sstevel@tonic-gate if (ulwp->ul_cv_wake || !MUTEX_OWNED(mp, self)) { 33210Sstevel@tonic-gate lwpid_t lwpid = ulwp->ul_lwpid; 33220Sstevel@tonic-gate 33230Sstevel@tonic-gate no_preempt(self); 33240Sstevel@tonic-gate ulwp->ul_sleepq = NULL; 33250Sstevel@tonic-gate ulwp->ul_wchan = NULL; 33260Sstevel@tonic-gate ulwp->ul_cv_wake = 0; 33270Sstevel@tonic-gate queue_unlock(qp); 33280Sstevel@tonic-gate (void) __lwp_unpark(lwpid); 33290Sstevel@tonic-gate preempt(self); 33300Sstevel@tonic-gate } else { 33310Sstevel@tonic-gate mqp = queue_lock(mp, MX); 33320Sstevel@tonic-gate enqueue(mqp, ulwp, mp, MX); 33330Sstevel@tonic-gate mp->mutex_waiters = 1; 33340Sstevel@tonic-gate queue_unlock(mqp); 33350Sstevel@tonic-gate queue_unlock(qp); 33360Sstevel@tonic-gate } 33370Sstevel@tonic-gate 33380Sstevel@tonic-gate return (error); 33390Sstevel@tonic-gate } 33400Sstevel@tonic-gate 33414570Sraf /* 33424574Sraf * Utility function called by mutex_wakeup_all(), cond_broadcast(), 33434574Sraf * and rw_queue_release() to (re)allocate a big buffer to hold the 33444574Sraf * lwpids of all the threads to be set running after they are removed 33454574Sraf * from their sleep queues. Since we are holding a queue lock, we 33464574Sraf * cannot call any function that might acquire a lock. mmap(), munmap(), 33474574Sraf * lwp_unpark_all() are simple system calls and are safe in this regard. 33484570Sraf */ 33494570Sraf lwpid_t * 33504570Sraf alloc_lwpids(lwpid_t *lwpid, int *nlwpid_ptr, int *maxlwps_ptr) 33514570Sraf { 33524570Sraf /* 33534570Sraf * Allocate NEWLWPS ids on the first overflow. 33544570Sraf * Double the allocation each time after that. 33554570Sraf */ 33564570Sraf int nlwpid = *nlwpid_ptr; 33574570Sraf int maxlwps = *maxlwps_ptr; 33584570Sraf int first_allocation; 33594570Sraf int newlwps; 33604570Sraf void *vaddr; 33614570Sraf 33624570Sraf ASSERT(nlwpid == maxlwps); 33634570Sraf 33644570Sraf first_allocation = (maxlwps == MAXLWPS); 33654570Sraf newlwps = first_allocation? NEWLWPS : 2 * maxlwps; 33664570Sraf vaddr = _private_mmap(NULL, newlwps * sizeof (lwpid_t), 33674570Sraf PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, (off_t)0); 33684570Sraf 33694570Sraf if (vaddr == MAP_FAILED) { 33704570Sraf /* 33714570Sraf * Let's hope this never happens. 33724570Sraf * If it does, then we have a terrible 33734570Sraf * thundering herd on our hands. 33744570Sraf */ 33754570Sraf (void) __lwp_unpark_all(lwpid, nlwpid); 33764570Sraf *nlwpid_ptr = 0; 33774570Sraf } else { 33784570Sraf (void) _memcpy(vaddr, lwpid, maxlwps * sizeof (lwpid_t)); 33794570Sraf if (!first_allocation) 33804570Sraf (void) _private_munmap(lwpid, 33814570Sraf maxlwps * sizeof (lwpid_t)); 33824570Sraf lwpid = vaddr; 33834570Sraf *maxlwps_ptr = newlwps; 33844570Sraf } 33854570Sraf 33864570Sraf return (lwpid); 33874570Sraf } 33880Sstevel@tonic-gate 33890Sstevel@tonic-gate #pragma weak pthread_cond_broadcast = cond_broadcast_internal 33900Sstevel@tonic-gate #pragma weak _pthread_cond_broadcast = cond_broadcast_internal 33910Sstevel@tonic-gate #pragma weak cond_broadcast = cond_broadcast_internal 33920Sstevel@tonic-gate #pragma weak _cond_broadcast = cond_broadcast_internal 33930Sstevel@tonic-gate int 33940Sstevel@tonic-gate cond_broadcast_internal(cond_t *cvp) 33950Sstevel@tonic-gate { 33960Sstevel@tonic-gate ulwp_t *self = curthread; 33970Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 33980Sstevel@tonic-gate tdb_cond_stats_t *csp = COND_STATS(cvp, udp); 33990Sstevel@tonic-gate int error = 0; 34000Sstevel@tonic-gate queue_head_t *qp; 34010Sstevel@tonic-gate mutex_t *mp; 34020Sstevel@tonic-gate mutex_t *mp_cache = NULL; 34034570Sraf queue_head_t *mqp = NULL; 34040Sstevel@tonic-gate ulwp_t **ulwpp; 34050Sstevel@tonic-gate ulwp_t *ulwp; 34060Sstevel@tonic-gate ulwp_t *prev = NULL; 34074570Sraf int nlwpid = 0; 34084570Sraf int maxlwps = MAXLWPS; 34090Sstevel@tonic-gate lwpid_t buffer[MAXLWPS]; 34100Sstevel@tonic-gate lwpid_t *lwpid = buffer; 34110Sstevel@tonic-gate 34120Sstevel@tonic-gate if (csp) 34130Sstevel@tonic-gate tdb_incr(csp->cond_broadcast); 34140Sstevel@tonic-gate 34150Sstevel@tonic-gate if (cvp->cond_waiters_kernel) /* someone sleeping in the kernel? */ 34160Sstevel@tonic-gate error = __lwp_cond_broadcast(cvp); 34170Sstevel@tonic-gate 34180Sstevel@tonic-gate if (!cvp->cond_waiters_user) /* no one sleeping at user-level */ 34190Sstevel@tonic-gate return (error); 34200Sstevel@tonic-gate 34210Sstevel@tonic-gate /* 34220Sstevel@tonic-gate * Move everyone from the condvar sleep queue to the mutex sleep 34230Sstevel@tonic-gate * queue for the mutex that they will acquire on being waked up. 34240Sstevel@tonic-gate * We can do this only if we own the mutex they will acquire. 34250Sstevel@tonic-gate * If we do not own the mutex, or if their ul_cv_wake flag 34260Sstevel@tonic-gate * is set, just dequeue and unpark them. 34270Sstevel@tonic-gate * 34280Sstevel@tonic-gate * We keep track of lwpids that are to be unparked in lwpid[]. 34290Sstevel@tonic-gate * __lwp_unpark_all() is called to unpark all of them after 34300Sstevel@tonic-gate * they have been removed from the sleep queue and the sleep 34310Sstevel@tonic-gate * queue lock has been dropped. If we run out of space in our 34320Sstevel@tonic-gate * on-stack buffer, we need to allocate more but we can't call 34330Sstevel@tonic-gate * lmalloc() because we are holding a queue lock when the overflow 34340Sstevel@tonic-gate * occurs and lmalloc() acquires a lock. We can't use alloca() 34354570Sraf * either because the application may have allocated a small 34364570Sraf * stack and we don't want to overrun the stack. So we call 34374570Sraf * alloc_lwpids() to allocate a bigger buffer using the mmap() 34380Sstevel@tonic-gate * system call directly since that path acquires no locks. 34390Sstevel@tonic-gate */ 34400Sstevel@tonic-gate qp = queue_lock(cvp, CV); 34410Sstevel@tonic-gate cvp->cond_waiters_user = 0; 34420Sstevel@tonic-gate ulwpp = &qp->qh_head; 34430Sstevel@tonic-gate while ((ulwp = *ulwpp) != NULL) { 34440Sstevel@tonic-gate if (ulwp->ul_wchan != cvp) { 34450Sstevel@tonic-gate prev = ulwp; 34460Sstevel@tonic-gate ulwpp = &ulwp->ul_link; 34470Sstevel@tonic-gate continue; 34480Sstevel@tonic-gate } 34490Sstevel@tonic-gate *ulwpp = ulwp->ul_link; 34504574Sraf ulwp->ul_link = NULL; 34510Sstevel@tonic-gate if (qp->qh_tail == ulwp) 34520Sstevel@tonic-gate qp->qh_tail = prev; 34530Sstevel@tonic-gate qp->qh_qlen--; 34540Sstevel@tonic-gate mp = ulwp->ul_cvmutex; /* his mutex */ 34550Sstevel@tonic-gate ulwp->ul_cvmutex = NULL; 34560Sstevel@tonic-gate ASSERT(mp != NULL); 34570Sstevel@tonic-gate if (ulwp->ul_cv_wake || !MUTEX_OWNED(mp, self)) { 34580Sstevel@tonic-gate ulwp->ul_sleepq = NULL; 34590Sstevel@tonic-gate ulwp->ul_wchan = NULL; 34600Sstevel@tonic-gate ulwp->ul_cv_wake = 0; 34614570Sraf if (nlwpid == maxlwps) 34624570Sraf lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps); 34630Sstevel@tonic-gate lwpid[nlwpid++] = ulwp->ul_lwpid; 34640Sstevel@tonic-gate } else { 34650Sstevel@tonic-gate if (mp != mp_cache) { 34660Sstevel@tonic-gate mp_cache = mp; 34674570Sraf if (mqp != NULL) 34684570Sraf queue_unlock(mqp); 34694570Sraf mqp = queue_lock(mp, MX); 34700Sstevel@tonic-gate } 34710Sstevel@tonic-gate enqueue(mqp, ulwp, mp, MX); 34720Sstevel@tonic-gate mp->mutex_waiters = 1; 34730Sstevel@tonic-gate } 34740Sstevel@tonic-gate } 34754570Sraf if (mqp != NULL) 34764570Sraf queue_unlock(mqp); 34774570Sraf if (nlwpid == 0) { 34784570Sraf queue_unlock(qp); 34794570Sraf } else { 34804570Sraf no_preempt(self); 34814570Sraf queue_unlock(qp); 34820Sstevel@tonic-gate if (nlwpid == 1) 34830Sstevel@tonic-gate (void) __lwp_unpark(lwpid[0]); 34840Sstevel@tonic-gate else 34850Sstevel@tonic-gate (void) __lwp_unpark_all(lwpid, nlwpid); 34864570Sraf preempt(self); 34870Sstevel@tonic-gate } 34880Sstevel@tonic-gate if (lwpid != buffer) 34890Sstevel@tonic-gate (void) _private_munmap(lwpid, maxlwps * sizeof (lwpid_t)); 34900Sstevel@tonic-gate return (error); 34910Sstevel@tonic-gate } 34920Sstevel@tonic-gate 34930Sstevel@tonic-gate #pragma weak pthread_cond_destroy = _cond_destroy 34940Sstevel@tonic-gate #pragma weak _pthread_cond_destroy = _cond_destroy 34950Sstevel@tonic-gate #pragma weak cond_destroy = _cond_destroy 34960Sstevel@tonic-gate int 34970Sstevel@tonic-gate _cond_destroy(cond_t *cvp) 34980Sstevel@tonic-gate { 34990Sstevel@tonic-gate cvp->cond_magic = 0; 35000Sstevel@tonic-gate tdb_sync_obj_deregister(cvp); 35010Sstevel@tonic-gate return (0); 35020Sstevel@tonic-gate } 35030Sstevel@tonic-gate 35040Sstevel@tonic-gate #if defined(THREAD_DEBUG) 35050Sstevel@tonic-gate void 35060Sstevel@tonic-gate assert_no_libc_locks_held(void) 35070Sstevel@tonic-gate { 35080Sstevel@tonic-gate ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 35090Sstevel@tonic-gate } 35100Sstevel@tonic-gate #endif 35110Sstevel@tonic-gate 35120Sstevel@tonic-gate /* protected by link_lock */ 35130Sstevel@tonic-gate uint64_t spin_lock_spin; 35140Sstevel@tonic-gate uint64_t spin_lock_spin2; 35150Sstevel@tonic-gate uint64_t spin_lock_sleep; 35160Sstevel@tonic-gate uint64_t spin_lock_wakeup; 35170Sstevel@tonic-gate 35180Sstevel@tonic-gate /* 35190Sstevel@tonic-gate * Record spin lock statistics. 35200Sstevel@tonic-gate * Called by a thread exiting itself in thrp_exit(). 35210Sstevel@tonic-gate * Also called via atexit() from the thread calling 35220Sstevel@tonic-gate * exit() to do all the other threads as well. 35230Sstevel@tonic-gate */ 35240Sstevel@tonic-gate void 35250Sstevel@tonic-gate record_spin_locks(ulwp_t *ulwp) 35260Sstevel@tonic-gate { 35270Sstevel@tonic-gate spin_lock_spin += ulwp->ul_spin_lock_spin; 35280Sstevel@tonic-gate spin_lock_spin2 += ulwp->ul_spin_lock_spin2; 35290Sstevel@tonic-gate spin_lock_sleep += ulwp->ul_spin_lock_sleep; 35300Sstevel@tonic-gate spin_lock_wakeup += ulwp->ul_spin_lock_wakeup; 35310Sstevel@tonic-gate ulwp->ul_spin_lock_spin = 0; 35320Sstevel@tonic-gate ulwp->ul_spin_lock_spin2 = 0; 35330Sstevel@tonic-gate ulwp->ul_spin_lock_sleep = 0; 35340Sstevel@tonic-gate ulwp->ul_spin_lock_wakeup = 0; 35350Sstevel@tonic-gate } 35360Sstevel@tonic-gate 35370Sstevel@tonic-gate /* 35380Sstevel@tonic-gate * atexit function: dump the queue statistics to stderr. 35390Sstevel@tonic-gate */ 35401219Sraf #if !defined(__lint) 35411219Sraf #define fprintf _fprintf 35421219Sraf #endif 35430Sstevel@tonic-gate #include <stdio.h> 35440Sstevel@tonic-gate void 35450Sstevel@tonic-gate dump_queue_statistics(void) 35460Sstevel@tonic-gate { 35470Sstevel@tonic-gate uberdata_t *udp = curthread->ul_uberdata; 35480Sstevel@tonic-gate queue_head_t *qp; 35490Sstevel@tonic-gate int qn; 35500Sstevel@tonic-gate uint64_t spin_lock_total = 0; 35510Sstevel@tonic-gate 35520Sstevel@tonic-gate if (udp->queue_head == NULL || thread_queue_dump == 0) 35530Sstevel@tonic-gate return; 35540Sstevel@tonic-gate 35550Sstevel@tonic-gate if (fprintf(stderr, "\n%5d mutex queues:\n", QHASHSIZE) < 0 || 35560Sstevel@tonic-gate fprintf(stderr, "queue# lockcount max qlen\n") < 0) 35570Sstevel@tonic-gate return; 35580Sstevel@tonic-gate for (qn = 0, qp = udp->queue_head; qn < QHASHSIZE; qn++, qp++) { 35590Sstevel@tonic-gate if (qp->qh_lockcount == 0) 35600Sstevel@tonic-gate continue; 35610Sstevel@tonic-gate spin_lock_total += qp->qh_lockcount; 35620Sstevel@tonic-gate if (fprintf(stderr, "%5d %12llu%12u\n", qn, 35630Sstevel@tonic-gate (u_longlong_t)qp->qh_lockcount, qp->qh_qmax) < 0) 35640Sstevel@tonic-gate return; 35650Sstevel@tonic-gate } 35660Sstevel@tonic-gate 35670Sstevel@tonic-gate if (fprintf(stderr, "\n%5d condvar queues:\n", QHASHSIZE) < 0 || 35680Sstevel@tonic-gate fprintf(stderr, "queue# lockcount max qlen\n") < 0) 35690Sstevel@tonic-gate return; 35700Sstevel@tonic-gate for (qn = 0; qn < QHASHSIZE; qn++, qp++) { 35710Sstevel@tonic-gate if (qp->qh_lockcount == 0) 35720Sstevel@tonic-gate continue; 35730Sstevel@tonic-gate spin_lock_total += qp->qh_lockcount; 35740Sstevel@tonic-gate if (fprintf(stderr, "%5d %12llu%12u\n", qn, 35750Sstevel@tonic-gate (u_longlong_t)qp->qh_lockcount, qp->qh_qmax) < 0) 35760Sstevel@tonic-gate return; 35770Sstevel@tonic-gate } 35780Sstevel@tonic-gate 35790Sstevel@tonic-gate (void) fprintf(stderr, "\n spin_lock_total = %10llu\n", 35800Sstevel@tonic-gate (u_longlong_t)spin_lock_total); 35810Sstevel@tonic-gate (void) fprintf(stderr, " spin_lock_spin = %10llu\n", 35820Sstevel@tonic-gate (u_longlong_t)spin_lock_spin); 35830Sstevel@tonic-gate (void) fprintf(stderr, " spin_lock_spin2 = %10llu\n", 35840Sstevel@tonic-gate (u_longlong_t)spin_lock_spin2); 35850Sstevel@tonic-gate (void) fprintf(stderr, " spin_lock_sleep = %10llu\n", 35860Sstevel@tonic-gate (u_longlong_t)spin_lock_sleep); 35870Sstevel@tonic-gate (void) fprintf(stderr, " spin_lock_wakeup = %10llu\n", 35880Sstevel@tonic-gate (u_longlong_t)spin_lock_wakeup); 35890Sstevel@tonic-gate } 3590