xref: /onnv-gate/usr/src/uts/common/syscall/lwp_sobj.c (revision 11798:1e7f1f154004)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
54570Sraf  * Common Development and Distribution License (the "License").
64570Sraf  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
214570Sraf 
224570Sraf /*
23*11798SRoger.Faulkner@Sun.COM  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
244570Sraf  * Use is subject to license terms.
254570Sraf  */
264570Sraf 
270Sstevel@tonic-gate /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
280Sstevel@tonic-gate /*	  All Rights Reserved	*/
290Sstevel@tonic-gate 
300Sstevel@tonic-gate #include <sys/param.h>
310Sstevel@tonic-gate #include <sys/types.h>
320Sstevel@tonic-gate #include <sys/sysmacros.h>
330Sstevel@tonic-gate #include <sys/systm.h>
340Sstevel@tonic-gate #include <sys/cred.h>
350Sstevel@tonic-gate #include <sys/user.h>
360Sstevel@tonic-gate #include <sys/errno.h>
370Sstevel@tonic-gate #include <sys/file.h>
380Sstevel@tonic-gate #include <sys/proc.h>
390Sstevel@tonic-gate #include <sys/prsystm.h>
400Sstevel@tonic-gate #include <sys/kmem.h>
410Sstevel@tonic-gate #include <sys/sobject.h>
420Sstevel@tonic-gate #include <sys/fault.h>
430Sstevel@tonic-gate #include <sys/procfs.h>
440Sstevel@tonic-gate #include <sys/watchpoint.h>
450Sstevel@tonic-gate #include <sys/time.h>
460Sstevel@tonic-gate #include <sys/cmn_err.h>
470Sstevel@tonic-gate #include <sys/machlock.h>
480Sstevel@tonic-gate #include <sys/debug.h>
490Sstevel@tonic-gate #include <sys/synch.h>
500Sstevel@tonic-gate #include <sys/synch32.h>
510Sstevel@tonic-gate #include <sys/mman.h>
520Sstevel@tonic-gate #include <sys/class.h>
530Sstevel@tonic-gate #include <sys/schedctl.h>
540Sstevel@tonic-gate #include <sys/sleepq.h>
550Sstevel@tonic-gate #include <sys/policy.h>
560Sstevel@tonic-gate #include <sys/tnf_probe.h>
570Sstevel@tonic-gate #include <sys/lwpchan_impl.h>
580Sstevel@tonic-gate #include <sys/turnstile.h>
590Sstevel@tonic-gate #include <sys/atomic.h>
600Sstevel@tonic-gate #include <sys/lwp_timer_impl.h>
610Sstevel@tonic-gate #include <sys/lwp_upimutex_impl.h>
620Sstevel@tonic-gate #include <vm/as.h>
630Sstevel@tonic-gate #include <sys/sdt.h>
640Sstevel@tonic-gate 
650Sstevel@tonic-gate static kthread_t *lwpsobj_owner(caddr_t);
660Sstevel@tonic-gate static void lwp_unsleep(kthread_t *t);
670Sstevel@tonic-gate static void lwp_change_pri(kthread_t *t, pri_t pri, pri_t *t_prip);
680Sstevel@tonic-gate static void lwp_mutex_cleanup(lwpchan_entry_t *ent, uint16_t lockflg);
699264SRoger.Faulkner@Sun.COM static void lwp_mutex_unregister(void *uaddr);
7010887SRoger.Faulkner@Sun.COM static void set_owner_pid(lwp_mutex_t *, uintptr_t, pid_t);
7110887SRoger.Faulkner@Sun.COM static int iswanted(kthread_t *, lwpchan_t *);
720Sstevel@tonic-gate 
730Sstevel@tonic-gate extern int lwp_cond_signal(lwp_cond_t *cv);
740Sstevel@tonic-gate 
750Sstevel@tonic-gate /*
760Sstevel@tonic-gate  * Maximum number of user prio inheritance locks that can be held by a thread.
770Sstevel@tonic-gate  * Used to limit kmem for each thread. This is a per-thread limit that
780Sstevel@tonic-gate  * can be administered on a system wide basis (using /etc/system).
790Sstevel@tonic-gate  *
800Sstevel@tonic-gate  * Also, when a limit, say maxlwps is added for numbers of lwps within a
810Sstevel@tonic-gate  * process, the per-thread limit automatically becomes a process-wide limit
820Sstevel@tonic-gate  * of maximum number of held upi locks within a process:
830Sstevel@tonic-gate  *      maxheldupimx = maxnestupimx * maxlwps;
840Sstevel@tonic-gate  */
850Sstevel@tonic-gate static uint32_t maxnestupimx = 2000;
860Sstevel@tonic-gate 
870Sstevel@tonic-gate /*
880Sstevel@tonic-gate  * The sobj_ops vector exports a set of functions needed when a thread
890Sstevel@tonic-gate  * is asleep on a synchronization object of this type.
900Sstevel@tonic-gate  */
910Sstevel@tonic-gate static sobj_ops_t lwp_sobj_ops = {
920Sstevel@tonic-gate 	SOBJ_USER, lwpsobj_owner, lwp_unsleep, lwp_change_pri
930Sstevel@tonic-gate };
940Sstevel@tonic-gate 
950Sstevel@tonic-gate static kthread_t *lwpsobj_pi_owner(upimutex_t *up);
960Sstevel@tonic-gate 
970Sstevel@tonic-gate static sobj_ops_t lwp_sobj_pi_ops = {
980Sstevel@tonic-gate 	SOBJ_USER_PI, lwpsobj_pi_owner, turnstile_unsleep,
990Sstevel@tonic-gate 	turnstile_change_pri
1000Sstevel@tonic-gate };
1010Sstevel@tonic-gate 
1020Sstevel@tonic-gate static sleepq_head_t	lwpsleepq[NSLEEPQ];
1030Sstevel@tonic-gate upib_t			upimutextab[UPIMUTEX_TABSIZE];
1040Sstevel@tonic-gate 
1050Sstevel@tonic-gate #define	LWPCHAN_LOCK_SHIFT	10	/* 1024 locks for each pool */
1060Sstevel@tonic-gate #define	LWPCHAN_LOCK_SIZE	(1 << LWPCHAN_LOCK_SHIFT)
1070Sstevel@tonic-gate 
1080Sstevel@tonic-gate /*
1090Sstevel@tonic-gate  * We know that both lc_wchan and lc_wchan0 are addresses that most
1100Sstevel@tonic-gate  * likely are 8-byte aligned, so we shift off the low-order 3 bits.
1110Sstevel@tonic-gate  * 'pool' is either 0 or 1.
1120Sstevel@tonic-gate  */
1130Sstevel@tonic-gate #define	LWPCHAN_LOCK_HASH(X, pool) \
1140Sstevel@tonic-gate 	(((((X) >> 3) ^ ((X) >> (LWPCHAN_LOCK_SHIFT + 3))) & \
1150Sstevel@tonic-gate 	(LWPCHAN_LOCK_SIZE - 1)) + ((pool)? LWPCHAN_LOCK_SIZE : 0))
1160Sstevel@tonic-gate 
1170Sstevel@tonic-gate static kmutex_t		lwpchanlock[2 * LWPCHAN_LOCK_SIZE];
1180Sstevel@tonic-gate 
1190Sstevel@tonic-gate /*
1200Sstevel@tonic-gate  * Is this a POSIX threads user-level lock requiring priority inheritance?
1210Sstevel@tonic-gate  */
1220Sstevel@tonic-gate #define	UPIMUTEX(type)	((type) & LOCK_PRIO_INHERIT)
1230Sstevel@tonic-gate 
1240Sstevel@tonic-gate static sleepq_head_t *
lwpsqhash(lwpchan_t * lwpchan)1250Sstevel@tonic-gate lwpsqhash(lwpchan_t *lwpchan)
1260Sstevel@tonic-gate {
1270Sstevel@tonic-gate 	uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0;
1280Sstevel@tonic-gate 	return (&lwpsleepq[SQHASHINDEX(x)]);
1290Sstevel@tonic-gate }
1300Sstevel@tonic-gate 
1310Sstevel@tonic-gate /*
1320Sstevel@tonic-gate  * Lock an lwpchan.
1330Sstevel@tonic-gate  * Keep this in sync with lwpchan_unlock(), below.
1340Sstevel@tonic-gate  */
1350Sstevel@tonic-gate static void
lwpchan_lock(lwpchan_t * lwpchan,int pool)1360Sstevel@tonic-gate lwpchan_lock(lwpchan_t *lwpchan, int pool)
1370Sstevel@tonic-gate {
1380Sstevel@tonic-gate 	uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0;
1390Sstevel@tonic-gate 	mutex_enter(&lwpchanlock[LWPCHAN_LOCK_HASH(x, pool)]);
1400Sstevel@tonic-gate }
1410Sstevel@tonic-gate 
1420Sstevel@tonic-gate /*
1430Sstevel@tonic-gate  * Unlock an lwpchan.
1440Sstevel@tonic-gate  * Keep this in sync with lwpchan_lock(), above.
1450Sstevel@tonic-gate  */
1460Sstevel@tonic-gate static void
lwpchan_unlock(lwpchan_t * lwpchan,int pool)1470Sstevel@tonic-gate lwpchan_unlock(lwpchan_t *lwpchan, int pool)
1480Sstevel@tonic-gate {
1490Sstevel@tonic-gate 	uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0;
1500Sstevel@tonic-gate 	mutex_exit(&lwpchanlock[LWPCHAN_LOCK_HASH(x, pool)]);
1510Sstevel@tonic-gate }
1520Sstevel@tonic-gate 
1530Sstevel@tonic-gate /*
1540Sstevel@tonic-gate  * Delete mappings from the lwpchan cache for pages that are being
1550Sstevel@tonic-gate  * unmapped by as_unmap().  Given a range of addresses, "start" to "end",
1560Sstevel@tonic-gate  * all mappings within the range are deleted from the lwpchan cache.
1570Sstevel@tonic-gate  */
1580Sstevel@tonic-gate void
lwpchan_delete_mapping(proc_t * p,caddr_t start,caddr_t end)1590Sstevel@tonic-gate lwpchan_delete_mapping(proc_t *p, caddr_t start, caddr_t end)
1600Sstevel@tonic-gate {
1610Sstevel@tonic-gate 	lwpchan_data_t *lcp;
1620Sstevel@tonic-gate 	lwpchan_hashbucket_t *hashbucket;
1630Sstevel@tonic-gate 	lwpchan_hashbucket_t *endbucket;
1640Sstevel@tonic-gate 	lwpchan_entry_t *ent;
1650Sstevel@tonic-gate 	lwpchan_entry_t **prev;
1660Sstevel@tonic-gate 	caddr_t addr;
1670Sstevel@tonic-gate 
1680Sstevel@tonic-gate 	mutex_enter(&p->p_lcp_lock);
1690Sstevel@tonic-gate 	lcp = p->p_lcp;
1700Sstevel@tonic-gate 	hashbucket = lcp->lwpchan_cache;
1710Sstevel@tonic-gate 	endbucket = hashbucket + lcp->lwpchan_size;
1720Sstevel@tonic-gate 	for (; hashbucket < endbucket; hashbucket++) {
1730Sstevel@tonic-gate 		if (hashbucket->lwpchan_chain == NULL)
1740Sstevel@tonic-gate 			continue;
1750Sstevel@tonic-gate 		mutex_enter(&hashbucket->lwpchan_lock);
1760Sstevel@tonic-gate 		prev = &hashbucket->lwpchan_chain;
1770Sstevel@tonic-gate 		/* check entire chain */
1780Sstevel@tonic-gate 		while ((ent = *prev) != NULL) {
1790Sstevel@tonic-gate 			addr = ent->lwpchan_addr;
1800Sstevel@tonic-gate 			if (start <= addr && addr < end) {
1810Sstevel@tonic-gate 				*prev = ent->lwpchan_next;
1826057Sraf 				/*
1836057Sraf 				 * We do this only for the obsolete type
1846057Sraf 				 * USYNC_PROCESS_ROBUST.  Otherwise robust
1856057Sraf 				 * locks do not draw ELOCKUNMAPPED or
1866057Sraf 				 * EOWNERDEAD due to being unmapped.
1876057Sraf 				 */
1880Sstevel@tonic-gate 				if (ent->lwpchan_pool == LWPCHAN_MPPOOL &&
1896057Sraf 				    (ent->lwpchan_type & USYNC_PROCESS_ROBUST))
1900Sstevel@tonic-gate 					lwp_mutex_cleanup(ent, LOCK_UNMAPPED);
1919264SRoger.Faulkner@Sun.COM 				/*
1929264SRoger.Faulkner@Sun.COM 				 * If there is a user-level robust lock
1939264SRoger.Faulkner@Sun.COM 				 * registration, mark it as invalid.
1949264SRoger.Faulkner@Sun.COM 				 */
1959264SRoger.Faulkner@Sun.COM 				if ((addr = ent->lwpchan_uaddr) != NULL)
1969264SRoger.Faulkner@Sun.COM 					lwp_mutex_unregister(addr);
1970Sstevel@tonic-gate 				kmem_free(ent, sizeof (*ent));
1980Sstevel@tonic-gate 				atomic_add_32(&lcp->lwpchan_entries, -1);
1990Sstevel@tonic-gate 			} else {
2000Sstevel@tonic-gate 				prev = &ent->lwpchan_next;
2010Sstevel@tonic-gate 			}
2020Sstevel@tonic-gate 		}
2030Sstevel@tonic-gate 		mutex_exit(&hashbucket->lwpchan_lock);
2040Sstevel@tonic-gate 	}
2050Sstevel@tonic-gate 	mutex_exit(&p->p_lcp_lock);
2060Sstevel@tonic-gate }
2070Sstevel@tonic-gate 
2080Sstevel@tonic-gate /*
2090Sstevel@tonic-gate  * Given an lwpchan cache pointer and a process virtual address,
2100Sstevel@tonic-gate  * return a pointer to the corresponding lwpchan hash bucket.
2110Sstevel@tonic-gate  */
2120Sstevel@tonic-gate static lwpchan_hashbucket_t *
lwpchan_bucket(lwpchan_data_t * lcp,uintptr_t addr)2130Sstevel@tonic-gate lwpchan_bucket(lwpchan_data_t *lcp, uintptr_t addr)
2140Sstevel@tonic-gate {
2150Sstevel@tonic-gate 	uint_t i;
2160Sstevel@tonic-gate 
2170Sstevel@tonic-gate 	/*
2180Sstevel@tonic-gate 	 * All user-level sync object addresses are 8-byte aligned.
2190Sstevel@tonic-gate 	 * Ignore the lowest 3 bits of the address and use the
2200Sstevel@tonic-gate 	 * higher-order 2*lwpchan_bits bits for the hash index.
2210Sstevel@tonic-gate 	 */
2220Sstevel@tonic-gate 	addr >>= 3;
2230Sstevel@tonic-gate 	i = (addr ^ (addr >> lcp->lwpchan_bits)) & lcp->lwpchan_mask;
2240Sstevel@tonic-gate 	return (lcp->lwpchan_cache + i);
2250Sstevel@tonic-gate }
2260Sstevel@tonic-gate 
2270Sstevel@tonic-gate /*
2280Sstevel@tonic-gate  * (Re)allocate the per-process lwpchan cache.
2290Sstevel@tonic-gate  */
2300Sstevel@tonic-gate static void
lwpchan_alloc_cache(proc_t * p,uint_t bits)2310Sstevel@tonic-gate lwpchan_alloc_cache(proc_t *p, uint_t bits)
2320Sstevel@tonic-gate {
2330Sstevel@tonic-gate 	lwpchan_data_t *lcp;
2340Sstevel@tonic-gate 	lwpchan_data_t *old_lcp;
2350Sstevel@tonic-gate 	lwpchan_hashbucket_t *hashbucket;
2360Sstevel@tonic-gate 	lwpchan_hashbucket_t *endbucket;
2370Sstevel@tonic-gate 	lwpchan_hashbucket_t *newbucket;
2380Sstevel@tonic-gate 	lwpchan_entry_t *ent;
2390Sstevel@tonic-gate 	lwpchan_entry_t *next;
2400Sstevel@tonic-gate 	uint_t count;
2410Sstevel@tonic-gate 
2420Sstevel@tonic-gate 	ASSERT(bits >= LWPCHAN_INITIAL_BITS && bits <= LWPCHAN_MAX_BITS);
2430Sstevel@tonic-gate 
2440Sstevel@tonic-gate 	lcp = kmem_alloc(sizeof (lwpchan_data_t), KM_SLEEP);
2450Sstevel@tonic-gate 	lcp->lwpchan_bits = bits;
2460Sstevel@tonic-gate 	lcp->lwpchan_size = 1 << lcp->lwpchan_bits;
2470Sstevel@tonic-gate 	lcp->lwpchan_mask = lcp->lwpchan_size - 1;
2480Sstevel@tonic-gate 	lcp->lwpchan_entries = 0;
2490Sstevel@tonic-gate 	lcp->lwpchan_cache = kmem_zalloc(lcp->lwpchan_size *
2506057Sraf 	    sizeof (lwpchan_hashbucket_t), KM_SLEEP);
2510Sstevel@tonic-gate 	lcp->lwpchan_next_data = NULL;
2520Sstevel@tonic-gate 
2530Sstevel@tonic-gate 	mutex_enter(&p->p_lcp_lock);
2540Sstevel@tonic-gate 	if ((old_lcp = p->p_lcp) != NULL) {
2550Sstevel@tonic-gate 		if (old_lcp->lwpchan_bits >= bits) {
2560Sstevel@tonic-gate 			/* someone beat us to it */
2570Sstevel@tonic-gate 			mutex_exit(&p->p_lcp_lock);
2580Sstevel@tonic-gate 			kmem_free(lcp->lwpchan_cache, lcp->lwpchan_size *
2596057Sraf 			    sizeof (lwpchan_hashbucket_t));
2600Sstevel@tonic-gate 			kmem_free(lcp, sizeof (lwpchan_data_t));
2610Sstevel@tonic-gate 			return;
2620Sstevel@tonic-gate 		}
2630Sstevel@tonic-gate 		/*
2640Sstevel@tonic-gate 		 * Acquire all of the old hash table locks.
2650Sstevel@tonic-gate 		 */
2660Sstevel@tonic-gate 		hashbucket = old_lcp->lwpchan_cache;
2670Sstevel@tonic-gate 		endbucket = hashbucket + old_lcp->lwpchan_size;
2680Sstevel@tonic-gate 		for (; hashbucket < endbucket; hashbucket++)
2690Sstevel@tonic-gate 			mutex_enter(&hashbucket->lwpchan_lock);
2700Sstevel@tonic-gate 		/*
2710Sstevel@tonic-gate 		 * Move all of the old hash table entries to the
2720Sstevel@tonic-gate 		 * new hash table.  The new hash table has not yet
2730Sstevel@tonic-gate 		 * been installed so we don't need any of its locks.
2740Sstevel@tonic-gate 		 */
2750Sstevel@tonic-gate 		count = 0;
2760Sstevel@tonic-gate 		hashbucket = old_lcp->lwpchan_cache;
2770Sstevel@tonic-gate 		for (; hashbucket < endbucket; hashbucket++) {
2780Sstevel@tonic-gate 			ent = hashbucket->lwpchan_chain;
2790Sstevel@tonic-gate 			while (ent != NULL) {
2800Sstevel@tonic-gate 				next = ent->lwpchan_next;
2810Sstevel@tonic-gate 				newbucket = lwpchan_bucket(lcp,
2826057Sraf 				    (uintptr_t)ent->lwpchan_addr);
2830Sstevel@tonic-gate 				ent->lwpchan_next = newbucket->lwpchan_chain;
2840Sstevel@tonic-gate 				newbucket->lwpchan_chain = ent;
2850Sstevel@tonic-gate 				ent = next;
2860Sstevel@tonic-gate 				count++;
2870Sstevel@tonic-gate 			}
2880Sstevel@tonic-gate 			hashbucket->lwpchan_chain = NULL;
2890Sstevel@tonic-gate 		}
2900Sstevel@tonic-gate 		lcp->lwpchan_entries = count;
2910Sstevel@tonic-gate 	}
2920Sstevel@tonic-gate 
2930Sstevel@tonic-gate 	/*
2940Sstevel@tonic-gate 	 * Retire the old hash table.  We can't actually kmem_free() it
2950Sstevel@tonic-gate 	 * now because someone may still have a pointer to it.  Instead,
2960Sstevel@tonic-gate 	 * we link it onto the new hash table's list of retired hash tables.
2970Sstevel@tonic-gate 	 * The new hash table is double the size of the previous one, so
2980Sstevel@tonic-gate 	 * the total size of all retired hash tables is less than the size
2990Sstevel@tonic-gate 	 * of the new one.  exit() and exec() free the retired hash tables
3000Sstevel@tonic-gate 	 * (see lwpchan_destroy_cache(), below).
3010Sstevel@tonic-gate 	 */
3020Sstevel@tonic-gate 	lcp->lwpchan_next_data = old_lcp;
3030Sstevel@tonic-gate 
3040Sstevel@tonic-gate 	/*
3050Sstevel@tonic-gate 	 * As soon as we store the new lcp, future locking operations will
3060Sstevel@tonic-gate 	 * use it.  Therefore, we must ensure that all the state we've just
3070Sstevel@tonic-gate 	 * established reaches global visibility before the new lcp does.
3080Sstevel@tonic-gate 	 */
3090Sstevel@tonic-gate 	membar_producer();
3100Sstevel@tonic-gate 	p->p_lcp = lcp;
3110Sstevel@tonic-gate 
3120Sstevel@tonic-gate 	if (old_lcp != NULL) {
3130Sstevel@tonic-gate 		/*
3140Sstevel@tonic-gate 		 * Release all of the old hash table locks.
3150Sstevel@tonic-gate 		 */
3160Sstevel@tonic-gate 		hashbucket = old_lcp->lwpchan_cache;
3170Sstevel@tonic-gate 		for (; hashbucket < endbucket; hashbucket++)
3180Sstevel@tonic-gate 			mutex_exit(&hashbucket->lwpchan_lock);
3190Sstevel@tonic-gate 	}
3200Sstevel@tonic-gate 	mutex_exit(&p->p_lcp_lock);
3210Sstevel@tonic-gate }
3220Sstevel@tonic-gate 
3230Sstevel@tonic-gate /*
3240Sstevel@tonic-gate  * Deallocate the lwpchan cache, and any dynamically allocated mappings.
3250Sstevel@tonic-gate  * Called when the process exits or execs.  All lwps except one have
3260Sstevel@tonic-gate  * exited so we need no locks here.
3270Sstevel@tonic-gate  */
3280Sstevel@tonic-gate void
lwpchan_destroy_cache(int exec)3290Sstevel@tonic-gate lwpchan_destroy_cache(int exec)
3300Sstevel@tonic-gate {
3310Sstevel@tonic-gate 	proc_t *p = curproc;
3320Sstevel@tonic-gate 	lwpchan_hashbucket_t *hashbucket;
3330Sstevel@tonic-gate 	lwpchan_hashbucket_t *endbucket;
3340Sstevel@tonic-gate 	lwpchan_data_t *lcp;
3350Sstevel@tonic-gate 	lwpchan_entry_t *ent;
3360Sstevel@tonic-gate 	lwpchan_entry_t *next;
3370Sstevel@tonic-gate 	uint16_t lockflg;
3380Sstevel@tonic-gate 
3390Sstevel@tonic-gate 	lcp = p->p_lcp;
3400Sstevel@tonic-gate 	p->p_lcp = NULL;
3410Sstevel@tonic-gate 
3420Sstevel@tonic-gate 	lockflg = exec? LOCK_UNMAPPED : LOCK_OWNERDEAD;
3430Sstevel@tonic-gate 	hashbucket = lcp->lwpchan_cache;
3440Sstevel@tonic-gate 	endbucket = hashbucket + lcp->lwpchan_size;
3450Sstevel@tonic-gate 	for (; hashbucket < endbucket; hashbucket++) {
3460Sstevel@tonic-gate 		ent = hashbucket->lwpchan_chain;
3470Sstevel@tonic-gate 		hashbucket->lwpchan_chain = NULL;
3480Sstevel@tonic-gate 		while (ent != NULL) {
3490Sstevel@tonic-gate 			next = ent->lwpchan_next;
3500Sstevel@tonic-gate 			if (ent->lwpchan_pool == LWPCHAN_MPPOOL &&
3517751SRoger.Faulkner@Sun.COM 			    (ent->lwpchan_type & (USYNC_PROCESS | LOCK_ROBUST))
3527751SRoger.Faulkner@Sun.COM 			    == (USYNC_PROCESS | LOCK_ROBUST))
3530Sstevel@tonic-gate 				lwp_mutex_cleanup(ent, lockflg);
3540Sstevel@tonic-gate 			kmem_free(ent, sizeof (*ent));
3550Sstevel@tonic-gate 			ent = next;
3560Sstevel@tonic-gate 		}
3570Sstevel@tonic-gate 	}
3580Sstevel@tonic-gate 
3590Sstevel@tonic-gate 	while (lcp != NULL) {
3600Sstevel@tonic-gate 		lwpchan_data_t *next_lcp = lcp->lwpchan_next_data;
3610Sstevel@tonic-gate 		kmem_free(lcp->lwpchan_cache, lcp->lwpchan_size *
3626057Sraf 		    sizeof (lwpchan_hashbucket_t));
3630Sstevel@tonic-gate 		kmem_free(lcp, sizeof (lwpchan_data_t));
3640Sstevel@tonic-gate 		lcp = next_lcp;
3650Sstevel@tonic-gate 	}
3660Sstevel@tonic-gate }
3670Sstevel@tonic-gate 
3680Sstevel@tonic-gate /*
3690Sstevel@tonic-gate  * Return zero when there is an entry in the lwpchan cache for the
3700Sstevel@tonic-gate  * given process virtual address and non-zero when there is not.
3710Sstevel@tonic-gate  * The returned non-zero value is the current length of the
3720Sstevel@tonic-gate  * hash chain plus one.  The caller holds the hash bucket lock.
3730Sstevel@tonic-gate  */
3740Sstevel@tonic-gate static uint_t
lwpchan_cache_mapping(caddr_t addr,int type,int pool,lwpchan_t * lwpchan,lwpchan_hashbucket_t * hashbucket)3750Sstevel@tonic-gate lwpchan_cache_mapping(caddr_t addr, int type, int pool, lwpchan_t *lwpchan,
3760Sstevel@tonic-gate 	lwpchan_hashbucket_t *hashbucket)
3770Sstevel@tonic-gate {
3780Sstevel@tonic-gate 	lwpchan_entry_t *ent;
3790Sstevel@tonic-gate 	uint_t count = 1;
3800Sstevel@tonic-gate 
3810Sstevel@tonic-gate 	for (ent = hashbucket->lwpchan_chain; ent; ent = ent->lwpchan_next) {
3820Sstevel@tonic-gate 		if (ent->lwpchan_addr == addr) {
3830Sstevel@tonic-gate 			if (ent->lwpchan_type != type ||
3840Sstevel@tonic-gate 			    ent->lwpchan_pool != pool) {
3850Sstevel@tonic-gate 				/*
3860Sstevel@tonic-gate 				 * This shouldn't happen, but might if the
3870Sstevel@tonic-gate 				 * process reuses its memory for different
3880Sstevel@tonic-gate 				 * types of sync objects.  We test first
3890Sstevel@tonic-gate 				 * to avoid grabbing the memory cache line.
3900Sstevel@tonic-gate 				 */
3910Sstevel@tonic-gate 				ent->lwpchan_type = (uint16_t)type;
3920Sstevel@tonic-gate 				ent->lwpchan_pool = (uint16_t)pool;
3930Sstevel@tonic-gate 			}
3940Sstevel@tonic-gate 			*lwpchan = ent->lwpchan_lwpchan;
3950Sstevel@tonic-gate 			return (0);
3960Sstevel@tonic-gate 		}
3970Sstevel@tonic-gate 		count++;
3980Sstevel@tonic-gate 	}
3990Sstevel@tonic-gate 	return (count);
4000Sstevel@tonic-gate }
4010Sstevel@tonic-gate 
4020Sstevel@tonic-gate /*
4030Sstevel@tonic-gate  * Return the cached lwpchan mapping if cached, otherwise insert
4040Sstevel@tonic-gate  * a virtual address to lwpchan mapping into the cache.
4050Sstevel@tonic-gate  */
4060Sstevel@tonic-gate static int
lwpchan_get_mapping(struct as * as,caddr_t addr,caddr_t uaddr,int type,lwpchan_t * lwpchan,int pool)4079264SRoger.Faulkner@Sun.COM lwpchan_get_mapping(struct as *as, caddr_t addr, caddr_t uaddr,
4080Sstevel@tonic-gate 	int type, lwpchan_t *lwpchan, int pool)
4090Sstevel@tonic-gate {
4100Sstevel@tonic-gate 	proc_t *p = curproc;
4110Sstevel@tonic-gate 	lwpchan_data_t *lcp;
4120Sstevel@tonic-gate 	lwpchan_hashbucket_t *hashbucket;
4130Sstevel@tonic-gate 	lwpchan_entry_t *ent;
4140Sstevel@tonic-gate 	memid_t	memid;
4150Sstevel@tonic-gate 	uint_t count;
4160Sstevel@tonic-gate 	uint_t bits;
4170Sstevel@tonic-gate 
4180Sstevel@tonic-gate top:
4190Sstevel@tonic-gate 	/* initialize the lwpchan cache, if necesary */
4200Sstevel@tonic-gate 	if ((lcp = p->p_lcp) == NULL) {
4210Sstevel@tonic-gate 		lwpchan_alloc_cache(p, LWPCHAN_INITIAL_BITS);
4220Sstevel@tonic-gate 		goto top;
4230Sstevel@tonic-gate 	}
4240Sstevel@tonic-gate 	hashbucket = lwpchan_bucket(lcp, (uintptr_t)addr);
4250Sstevel@tonic-gate 	mutex_enter(&hashbucket->lwpchan_lock);
4260Sstevel@tonic-gate 	if (lcp != p->p_lcp) {
4270Sstevel@tonic-gate 		/* someone resized the lwpchan cache; start over */
4280Sstevel@tonic-gate 		mutex_exit(&hashbucket->lwpchan_lock);
4290Sstevel@tonic-gate 		goto top;
4300Sstevel@tonic-gate 	}
4310Sstevel@tonic-gate 	if (lwpchan_cache_mapping(addr, type, pool, lwpchan, hashbucket) == 0) {
4320Sstevel@tonic-gate 		/* it's in the cache */
4330Sstevel@tonic-gate 		mutex_exit(&hashbucket->lwpchan_lock);
4340Sstevel@tonic-gate 		return (1);
4350Sstevel@tonic-gate 	}
4360Sstevel@tonic-gate 	mutex_exit(&hashbucket->lwpchan_lock);
4370Sstevel@tonic-gate 	if (as_getmemid(as, addr, &memid) != 0)
4380Sstevel@tonic-gate 		return (0);
4390Sstevel@tonic-gate 	lwpchan->lc_wchan0 = (caddr_t)(uintptr_t)memid.val[0];
4400Sstevel@tonic-gate 	lwpchan->lc_wchan = (caddr_t)(uintptr_t)memid.val[1];
4410Sstevel@tonic-gate 	ent = kmem_alloc(sizeof (lwpchan_entry_t), KM_SLEEP);
4420Sstevel@tonic-gate 	mutex_enter(&hashbucket->lwpchan_lock);
4430Sstevel@tonic-gate 	if (lcp != p->p_lcp) {
4440Sstevel@tonic-gate 		/* someone resized the lwpchan cache; start over */
4450Sstevel@tonic-gate 		mutex_exit(&hashbucket->lwpchan_lock);
4460Sstevel@tonic-gate 		kmem_free(ent, sizeof (*ent));
4470Sstevel@tonic-gate 		goto top;
4480Sstevel@tonic-gate 	}
4490Sstevel@tonic-gate 	count = lwpchan_cache_mapping(addr, type, pool, lwpchan, hashbucket);
4500Sstevel@tonic-gate 	if (count == 0) {
4510Sstevel@tonic-gate 		/* someone else added this entry to the cache */
4520Sstevel@tonic-gate 		mutex_exit(&hashbucket->lwpchan_lock);
4530Sstevel@tonic-gate 		kmem_free(ent, sizeof (*ent));
4540Sstevel@tonic-gate 		return (1);
4550Sstevel@tonic-gate 	}
4560Sstevel@tonic-gate 	if (count > lcp->lwpchan_bits + 2 && /* larger table, longer chains */
4570Sstevel@tonic-gate 	    (bits = lcp->lwpchan_bits) < LWPCHAN_MAX_BITS) {
4580Sstevel@tonic-gate 		/* hash chain too long; reallocate the hash table */
4590Sstevel@tonic-gate 		mutex_exit(&hashbucket->lwpchan_lock);
4600Sstevel@tonic-gate 		kmem_free(ent, sizeof (*ent));
4610Sstevel@tonic-gate 		lwpchan_alloc_cache(p, bits + 1);
4620Sstevel@tonic-gate 		goto top;
4630Sstevel@tonic-gate 	}
4640Sstevel@tonic-gate 	ent->lwpchan_addr = addr;
4659264SRoger.Faulkner@Sun.COM 	ent->lwpchan_uaddr = uaddr;
4660Sstevel@tonic-gate 	ent->lwpchan_type = (uint16_t)type;
4670Sstevel@tonic-gate 	ent->lwpchan_pool = (uint16_t)pool;
4680Sstevel@tonic-gate 	ent->lwpchan_lwpchan = *lwpchan;
4690Sstevel@tonic-gate 	ent->lwpchan_next = hashbucket->lwpchan_chain;
4700Sstevel@tonic-gate 	hashbucket->lwpchan_chain = ent;
4710Sstevel@tonic-gate 	atomic_add_32(&lcp->lwpchan_entries, 1);
4720Sstevel@tonic-gate 	mutex_exit(&hashbucket->lwpchan_lock);
4730Sstevel@tonic-gate 	return (1);
4740Sstevel@tonic-gate }
4750Sstevel@tonic-gate 
4760Sstevel@tonic-gate /*
4770Sstevel@tonic-gate  * Return a unique pair of identifiers that corresponds to a
4780Sstevel@tonic-gate  * synchronization object's virtual address.  Process-shared
4790Sstevel@tonic-gate  * sync objects usually get vnode/offset from as_getmemid().
4800Sstevel@tonic-gate  */
4810Sstevel@tonic-gate static int
get_lwpchan(struct as * as,caddr_t addr,int type,lwpchan_t * lwpchan,int pool)4820Sstevel@tonic-gate get_lwpchan(struct as *as, caddr_t addr, int type, lwpchan_t *lwpchan, int pool)
4830Sstevel@tonic-gate {
4840Sstevel@tonic-gate 	/*
4850Sstevel@tonic-gate 	 * If the lwp synch object is defined to be process-private,
4860Sstevel@tonic-gate 	 * we just make the first field of the lwpchan be 'as' and
4870Sstevel@tonic-gate 	 * the second field be the synch object's virtual address.
4880Sstevel@tonic-gate 	 * (segvn_getmemid() does the same for MAP_PRIVATE mappings.)
4890Sstevel@tonic-gate 	 * The lwpchan cache is used only for process-shared objects.
4900Sstevel@tonic-gate 	 */
4914574Sraf 	if (!(type & USYNC_PROCESS)) {
4920Sstevel@tonic-gate 		lwpchan->lc_wchan0 = (caddr_t)as;
4930Sstevel@tonic-gate 		lwpchan->lc_wchan = addr;
4940Sstevel@tonic-gate 		return (1);
4950Sstevel@tonic-gate 	}
4964574Sraf 
4979264SRoger.Faulkner@Sun.COM 	return (lwpchan_get_mapping(as, addr, NULL, type, lwpchan, pool));
4980Sstevel@tonic-gate }
4990Sstevel@tonic-gate 
5000Sstevel@tonic-gate static void
lwp_block(lwpchan_t * lwpchan)5010Sstevel@tonic-gate lwp_block(lwpchan_t *lwpchan)
5020Sstevel@tonic-gate {
5030Sstevel@tonic-gate 	kthread_t *t = curthread;
5040Sstevel@tonic-gate 	klwp_t *lwp = ttolwp(t);
5050Sstevel@tonic-gate 	sleepq_head_t *sqh;
5060Sstevel@tonic-gate 
5070Sstevel@tonic-gate 	thread_lock(t);
5080Sstevel@tonic-gate 	t->t_flag |= T_WAKEABLE;
5090Sstevel@tonic-gate 	t->t_lwpchan = *lwpchan;
5100Sstevel@tonic-gate 	t->t_sobj_ops = &lwp_sobj_ops;
5110Sstevel@tonic-gate 	t->t_release = 0;
5120Sstevel@tonic-gate 	sqh = lwpsqhash(lwpchan);
5130Sstevel@tonic-gate 	disp_lock_enter_high(&sqh->sq_lock);
5140Sstevel@tonic-gate 	CL_SLEEP(t);
5150Sstevel@tonic-gate 	DTRACE_SCHED(sleep);
5160Sstevel@tonic-gate 	THREAD_SLEEP(t, &sqh->sq_lock);
5170Sstevel@tonic-gate 	sleepq_insert(&sqh->sq_queue, t);
5180Sstevel@tonic-gate 	thread_unlock(t);
5190Sstevel@tonic-gate 	lwp->lwp_asleep = 1;
5200Sstevel@tonic-gate 	lwp->lwp_sysabort = 0;
5210Sstevel@tonic-gate 	lwp->lwp_ru.nvcsw++;
5220Sstevel@tonic-gate 	(void) new_mstate(curthread, LMS_SLEEP);
5230Sstevel@tonic-gate }
5240Sstevel@tonic-gate 
5250Sstevel@tonic-gate static kthread_t *
lwpsobj_pi_owner(upimutex_t * up)5260Sstevel@tonic-gate lwpsobj_pi_owner(upimutex_t *up)
5270Sstevel@tonic-gate {
5280Sstevel@tonic-gate 	return (up->upi_owner);
5290Sstevel@tonic-gate }
5300Sstevel@tonic-gate 
5310Sstevel@tonic-gate static struct upimutex *
upi_get(upib_t * upibp,lwpchan_t * lcp)5320Sstevel@tonic-gate upi_get(upib_t *upibp, lwpchan_t *lcp)
5330Sstevel@tonic-gate {
5340Sstevel@tonic-gate 	struct upimutex *upip;
5350Sstevel@tonic-gate 
5360Sstevel@tonic-gate 	for (upip = upibp->upib_first; upip != NULL;
5370Sstevel@tonic-gate 	    upip = upip->upi_nextchain) {
5380Sstevel@tonic-gate 		if (upip->upi_lwpchan.lc_wchan0 == lcp->lc_wchan0 &&
5390Sstevel@tonic-gate 		    upip->upi_lwpchan.lc_wchan == lcp->lc_wchan)
5400Sstevel@tonic-gate 			break;
5410Sstevel@tonic-gate 	}
5420Sstevel@tonic-gate 	return (upip);
5430Sstevel@tonic-gate }
5440Sstevel@tonic-gate 
5450Sstevel@tonic-gate static void
upi_chain_add(upib_t * upibp,struct upimutex * upimutex)5460Sstevel@tonic-gate upi_chain_add(upib_t *upibp, struct upimutex *upimutex)
5470Sstevel@tonic-gate {
5480Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&upibp->upib_lock));
5490Sstevel@tonic-gate 
5500Sstevel@tonic-gate 	/*
5510Sstevel@tonic-gate 	 * Insert upimutex at front of list. Maybe a bit unfair
5520Sstevel@tonic-gate 	 * but assume that not many lwpchans hash to the same
5530Sstevel@tonic-gate 	 * upimutextab bucket, i.e. the list of upimutexes from
5540Sstevel@tonic-gate 	 * upib_first is not too long.
5550Sstevel@tonic-gate 	 */
5560Sstevel@tonic-gate 	upimutex->upi_nextchain = upibp->upib_first;
5570Sstevel@tonic-gate 	upibp->upib_first = upimutex;
5580Sstevel@tonic-gate }
5590Sstevel@tonic-gate 
5600Sstevel@tonic-gate static void
upi_chain_del(upib_t * upibp,struct upimutex * upimutex)5610Sstevel@tonic-gate upi_chain_del(upib_t *upibp, struct upimutex *upimutex)
5620Sstevel@tonic-gate {
5630Sstevel@tonic-gate 	struct upimutex **prev;
5640Sstevel@tonic-gate 
5650Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&upibp->upib_lock));
5660Sstevel@tonic-gate 
5670Sstevel@tonic-gate 	prev = &upibp->upib_first;
5680Sstevel@tonic-gate 	while (*prev != upimutex) {
5690Sstevel@tonic-gate 		prev = &(*prev)->upi_nextchain;
5700Sstevel@tonic-gate 	}
5710Sstevel@tonic-gate 	*prev = upimutex->upi_nextchain;
5720Sstevel@tonic-gate 	upimutex->upi_nextchain = NULL;
5730Sstevel@tonic-gate }
5740Sstevel@tonic-gate 
5750Sstevel@tonic-gate /*
5760Sstevel@tonic-gate  * Add upimutex to chain of upimutexes held by curthread.
5770Sstevel@tonic-gate  * Returns number of upimutexes held by curthread.
5780Sstevel@tonic-gate  */
5790Sstevel@tonic-gate static uint32_t
upi_mylist_add(struct upimutex * upimutex)5800Sstevel@tonic-gate upi_mylist_add(struct upimutex *upimutex)
5810Sstevel@tonic-gate {
5820Sstevel@tonic-gate 	kthread_t *t = curthread;
5830Sstevel@tonic-gate 
5840Sstevel@tonic-gate 	/*
5850Sstevel@tonic-gate 	 * Insert upimutex at front of list of upimutexes owned by t. This
5860Sstevel@tonic-gate 	 * would match typical LIFO order in which nested locks are acquired
5870Sstevel@tonic-gate 	 * and released.
5880Sstevel@tonic-gate 	 */
5890Sstevel@tonic-gate 	upimutex->upi_nextowned = t->t_upimutex;
5900Sstevel@tonic-gate 	t->t_upimutex = upimutex;
5910Sstevel@tonic-gate 	t->t_nupinest++;
5920Sstevel@tonic-gate 	ASSERT(t->t_nupinest > 0);
5930Sstevel@tonic-gate 	return (t->t_nupinest);
5940Sstevel@tonic-gate }
5950Sstevel@tonic-gate 
5960Sstevel@tonic-gate /*
5970Sstevel@tonic-gate  * Delete upimutex from list of upimutexes owned by curthread.
5980Sstevel@tonic-gate  */
5990Sstevel@tonic-gate static void
upi_mylist_del(struct upimutex * upimutex)6000Sstevel@tonic-gate upi_mylist_del(struct upimutex *upimutex)
6010Sstevel@tonic-gate {
6020Sstevel@tonic-gate 	kthread_t *t = curthread;
6030Sstevel@tonic-gate 	struct upimutex **prev;
6040Sstevel@tonic-gate 
6050Sstevel@tonic-gate 	/*
6060Sstevel@tonic-gate 	 * Since the order in which nested locks are acquired and released,
6070Sstevel@tonic-gate 	 * is typically LIFO, and typical nesting levels are not too deep, the
6080Sstevel@tonic-gate 	 * following should not be expensive in the general case.
6090Sstevel@tonic-gate 	 */
6100Sstevel@tonic-gate 	prev = &t->t_upimutex;
6110Sstevel@tonic-gate 	while (*prev != upimutex) {
6120Sstevel@tonic-gate 		prev = &(*prev)->upi_nextowned;
6130Sstevel@tonic-gate 	}
6140Sstevel@tonic-gate 	*prev = upimutex->upi_nextowned;
6150Sstevel@tonic-gate 	upimutex->upi_nextowned = NULL;
6160Sstevel@tonic-gate 	ASSERT(t->t_nupinest > 0);
6170Sstevel@tonic-gate 	t->t_nupinest--;
6180Sstevel@tonic-gate }
6190Sstevel@tonic-gate 
6200Sstevel@tonic-gate /*
6210Sstevel@tonic-gate  * Returns true if upimutex is owned. Should be called only when upim points
6220Sstevel@tonic-gate  * to kmem which cannot disappear from underneath.
6230Sstevel@tonic-gate  */
6240Sstevel@tonic-gate static int
upi_owned(upimutex_t * upim)6250Sstevel@tonic-gate upi_owned(upimutex_t *upim)
6260Sstevel@tonic-gate {
6270Sstevel@tonic-gate 	return (upim->upi_owner == curthread);
6280Sstevel@tonic-gate }
6290Sstevel@tonic-gate 
6300Sstevel@tonic-gate /*
6310Sstevel@tonic-gate  * Returns pointer to kernel object (upimutex_t *) if lp is owned.
6320Sstevel@tonic-gate  */
6330Sstevel@tonic-gate static struct upimutex *
lwp_upimutex_owned(lwp_mutex_t * lp,uint8_t type)6340Sstevel@tonic-gate lwp_upimutex_owned(lwp_mutex_t *lp, uint8_t type)
6350Sstevel@tonic-gate {
6360Sstevel@tonic-gate 	lwpchan_t lwpchan;
6370Sstevel@tonic-gate 	upib_t *upibp;
6380Sstevel@tonic-gate 	struct upimutex *upimutex;
6390Sstevel@tonic-gate 
6400Sstevel@tonic-gate 	if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type,
6410Sstevel@tonic-gate 	    &lwpchan, LWPCHAN_MPPOOL))
6420Sstevel@tonic-gate 		return (NULL);
6430Sstevel@tonic-gate 
6440Sstevel@tonic-gate 	upibp = &UPI_CHAIN(lwpchan);
6450Sstevel@tonic-gate 	mutex_enter(&upibp->upib_lock);
6460Sstevel@tonic-gate 	upimutex = upi_get(upibp, &lwpchan);
6470Sstevel@tonic-gate 	if (upimutex == NULL || upimutex->upi_owner != curthread) {
6480Sstevel@tonic-gate 		mutex_exit(&upibp->upib_lock);
6490Sstevel@tonic-gate 		return (NULL);
6500Sstevel@tonic-gate 	}
6510Sstevel@tonic-gate 	mutex_exit(&upibp->upib_lock);
6520Sstevel@tonic-gate 	return (upimutex);
6530Sstevel@tonic-gate }
6540Sstevel@tonic-gate 
6550Sstevel@tonic-gate /*
6560Sstevel@tonic-gate  * Unlocks upimutex, waking up waiters if any. upimutex kmem is freed if
6570Sstevel@tonic-gate  * no lock hand-off occurrs.
6580Sstevel@tonic-gate  */
6590Sstevel@tonic-gate static void
upimutex_unlock(struct upimutex * upimutex,uint16_t flag)6600Sstevel@tonic-gate upimutex_unlock(struct upimutex *upimutex, uint16_t flag)
6610Sstevel@tonic-gate {
6620Sstevel@tonic-gate 	turnstile_t *ts;
6630Sstevel@tonic-gate 	upib_t *upibp;
6640Sstevel@tonic-gate 	kthread_t *newowner;
6650Sstevel@tonic-gate 
6660Sstevel@tonic-gate 	upi_mylist_del(upimutex);
6670Sstevel@tonic-gate 	upibp = upimutex->upi_upibp;
6680Sstevel@tonic-gate 	mutex_enter(&upibp->upib_lock);
6690Sstevel@tonic-gate 	if (upimutex->upi_waiter != 0) { /* if waiters */
6700Sstevel@tonic-gate 		ts = turnstile_lookup(upimutex);
6710Sstevel@tonic-gate 		if (ts != NULL && !(flag & LOCK_NOTRECOVERABLE)) {
6720Sstevel@tonic-gate 			/* hand-off lock to highest prio waiter */
6730Sstevel@tonic-gate 			newowner = ts->ts_sleepq[TS_WRITER_Q].sq_first;
6740Sstevel@tonic-gate 			upimutex->upi_owner = newowner;
6750Sstevel@tonic-gate 			if (ts->ts_waiters == 1)
6760Sstevel@tonic-gate 				upimutex->upi_waiter = 0;
6770Sstevel@tonic-gate 			turnstile_wakeup(ts, TS_WRITER_Q, 1, newowner);
6780Sstevel@tonic-gate 			mutex_exit(&upibp->upib_lock);
6790Sstevel@tonic-gate 			return;
6800Sstevel@tonic-gate 		} else if (ts != NULL) {
6810Sstevel@tonic-gate 			/* LOCK_NOTRECOVERABLE: wakeup all */
6820Sstevel@tonic-gate 			turnstile_wakeup(ts, TS_WRITER_Q, ts->ts_waiters, NULL);
6830Sstevel@tonic-gate 		} else {
6840Sstevel@tonic-gate 			/*
6850Sstevel@tonic-gate 			 * Misleading w bit. Waiters might have been
6860Sstevel@tonic-gate 			 * interrupted. No need to clear the w bit (upimutex
6870Sstevel@tonic-gate 			 * will soon be freed). Re-calculate PI from existing
6880Sstevel@tonic-gate 			 * waiters.
6890Sstevel@tonic-gate 			 */
6900Sstevel@tonic-gate 			turnstile_exit(upimutex);
6910Sstevel@tonic-gate 			turnstile_pi_recalc();
6920Sstevel@tonic-gate 		}
6930Sstevel@tonic-gate 	}
6940Sstevel@tonic-gate 	/*
6950Sstevel@tonic-gate 	 * no waiters, or LOCK_NOTRECOVERABLE.
6960Sstevel@tonic-gate 	 * remove from the bucket chain of upi mutexes.
6970Sstevel@tonic-gate 	 * de-allocate kernel memory (upimutex).
6980Sstevel@tonic-gate 	 */
6990Sstevel@tonic-gate 	upi_chain_del(upimutex->upi_upibp, upimutex);
7000Sstevel@tonic-gate 	mutex_exit(&upibp->upib_lock);
7010Sstevel@tonic-gate 	kmem_free(upimutex, sizeof (upimutex_t));
7020Sstevel@tonic-gate }
7030Sstevel@tonic-gate 
7040Sstevel@tonic-gate static int
lwp_upimutex_lock(lwp_mutex_t * lp,uint8_t type,int try,lwp_timer_t * lwptp)7050Sstevel@tonic-gate lwp_upimutex_lock(lwp_mutex_t *lp, uint8_t type, int try, lwp_timer_t *lwptp)
7060Sstevel@tonic-gate {
7070Sstevel@tonic-gate 	label_t ljb;
7080Sstevel@tonic-gate 	int error = 0;
7090Sstevel@tonic-gate 	lwpchan_t lwpchan;
7100Sstevel@tonic-gate 	uint16_t flag;
7110Sstevel@tonic-gate 	upib_t *upibp;
7120Sstevel@tonic-gate 	volatile struct upimutex *upimutex = NULL;
7130Sstevel@tonic-gate 	turnstile_t *ts;
7140Sstevel@tonic-gate 	uint32_t nupinest;
7150Sstevel@tonic-gate 	volatile int upilocked = 0;
7160Sstevel@tonic-gate 
7170Sstevel@tonic-gate 	if (on_fault(&ljb)) {
7180Sstevel@tonic-gate 		if (upilocked)
7190Sstevel@tonic-gate 			upimutex_unlock((upimutex_t *)upimutex, 0);
7200Sstevel@tonic-gate 		error = EFAULT;
7210Sstevel@tonic-gate 		goto out;
7220Sstevel@tonic-gate 	}
7230Sstevel@tonic-gate 	if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type,
7240Sstevel@tonic-gate 	    &lwpchan, LWPCHAN_MPPOOL)) {
7250Sstevel@tonic-gate 		error = EFAULT;
7260Sstevel@tonic-gate 		goto out;
7270Sstevel@tonic-gate 	}
7280Sstevel@tonic-gate 	upibp = &UPI_CHAIN(lwpchan);
7290Sstevel@tonic-gate retry:
7300Sstevel@tonic-gate 	mutex_enter(&upibp->upib_lock);
7310Sstevel@tonic-gate 	upimutex = upi_get(upibp, &lwpchan);
7320Sstevel@tonic-gate 	if (upimutex == NULL)  {
7330Sstevel@tonic-gate 		/* lock available since lwpchan has no upimutex */
7340Sstevel@tonic-gate 		upimutex = kmem_zalloc(sizeof (upimutex_t), KM_SLEEP);
7350Sstevel@tonic-gate 		upi_chain_add(upibp, (upimutex_t *)upimutex);
7360Sstevel@tonic-gate 		upimutex->upi_owner = curthread; /* grab lock */
7370Sstevel@tonic-gate 		upimutex->upi_upibp = upibp;
7380Sstevel@tonic-gate 		upimutex->upi_vaddr = lp;
7390Sstevel@tonic-gate 		upimutex->upi_lwpchan = lwpchan;
7400Sstevel@tonic-gate 		mutex_exit(&upibp->upib_lock);
7410Sstevel@tonic-gate 		nupinest = upi_mylist_add((upimutex_t *)upimutex);
7420Sstevel@tonic-gate 		upilocked = 1;
7430Sstevel@tonic-gate 		fuword16_noerr(&lp->mutex_flag, &flag);
7440Sstevel@tonic-gate 		if (nupinest > maxnestupimx &&
7450Sstevel@tonic-gate 		    secpolicy_resource(CRED()) != 0) {
7460Sstevel@tonic-gate 			upimutex_unlock((upimutex_t *)upimutex, flag);
7470Sstevel@tonic-gate 			error = ENOMEM;
7480Sstevel@tonic-gate 			goto out;
7490Sstevel@tonic-gate 		}
7504574Sraf 		if (flag & LOCK_NOTRECOVERABLE) {
7510Sstevel@tonic-gate 			/*
7520Sstevel@tonic-gate 			 * Since the setting of LOCK_NOTRECOVERABLE
7530Sstevel@tonic-gate 			 * was done under the high-level upi mutex,
7540Sstevel@tonic-gate 			 * in lwp_upimutex_unlock(), this flag needs to
7550Sstevel@tonic-gate 			 * be checked while holding the upi mutex.
7564574Sraf 			 * If set, this thread should return without
7574574Sraf 			 * the lock held, and with the right error code.
7580Sstevel@tonic-gate 			 */
7590Sstevel@tonic-gate 			upimutex_unlock((upimutex_t *)upimutex, flag);
7600Sstevel@tonic-gate 			upilocked = 0;
7610Sstevel@tonic-gate 			error = ENOTRECOVERABLE;
7624574Sraf 		} else if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) {
7634574Sraf 			if (flag & LOCK_OWNERDEAD)
7644574Sraf 				error = EOWNERDEAD;
7654574Sraf 			else if (type & USYNC_PROCESS_ROBUST)
7664574Sraf 				error = ELOCKUNMAPPED;
7674574Sraf 			else
7684574Sraf 				error = EOWNERDEAD;
7690Sstevel@tonic-gate 		}
7700Sstevel@tonic-gate 		goto out;
7710Sstevel@tonic-gate 	}
7720Sstevel@tonic-gate 	/*
7730Sstevel@tonic-gate 	 * If a upimutex object exists, it must have an owner.
7740Sstevel@tonic-gate 	 * This is due to lock hand-off, and release of upimutex when no
7750Sstevel@tonic-gate 	 * waiters are present at unlock time,
7760Sstevel@tonic-gate 	 */
7770Sstevel@tonic-gate 	ASSERT(upimutex->upi_owner != NULL);
7780Sstevel@tonic-gate 	if (upimutex->upi_owner == curthread) {
7790Sstevel@tonic-gate 		/*
7800Sstevel@tonic-gate 		 * The user wrapper can check if the mutex type is
7810Sstevel@tonic-gate 		 * ERRORCHECK: if not, it should stall at user-level.
7820Sstevel@tonic-gate 		 * If so, it should return the error code.
7830Sstevel@tonic-gate 		 */
7840Sstevel@tonic-gate 		mutex_exit(&upibp->upib_lock);
7850Sstevel@tonic-gate 		error = EDEADLK;
7860Sstevel@tonic-gate 		goto out;
7870Sstevel@tonic-gate 	}
7880Sstevel@tonic-gate 	if (try == UPIMUTEX_TRY) {
7890Sstevel@tonic-gate 		mutex_exit(&upibp->upib_lock);
7900Sstevel@tonic-gate 		error = EBUSY;
7910Sstevel@tonic-gate 		goto out;
7920Sstevel@tonic-gate 	}
7930Sstevel@tonic-gate 	/*
7940Sstevel@tonic-gate 	 * Block for the lock.
7950Sstevel@tonic-gate 	 */
7960Sstevel@tonic-gate 	if ((error = lwptp->lwpt_time_error) != 0) {
7970Sstevel@tonic-gate 		/*
7980Sstevel@tonic-gate 		 * The SUSV3 Posix spec is very clear that we
7990Sstevel@tonic-gate 		 * should get no error from validating the
8000Sstevel@tonic-gate 		 * timer until we would actually sleep.
8010Sstevel@tonic-gate 		 */
8020Sstevel@tonic-gate 		mutex_exit(&upibp->upib_lock);
8030Sstevel@tonic-gate 		goto out;
8040Sstevel@tonic-gate 	}
8050Sstevel@tonic-gate 	if (lwptp->lwpt_tsp != NULL) {
8060Sstevel@tonic-gate 		/*
8076622Sraf 		 * Unlike the protocol for other lwp timedwait operations,
8086622Sraf 		 * we must drop t_delay_lock before going to sleep in
8096622Sraf 		 * turnstile_block() for a upi mutex.
8106622Sraf 		 * See the comments below and in turnstile.c
8110Sstevel@tonic-gate 		 */
8120Sstevel@tonic-gate 		mutex_enter(&curthread->t_delay_lock);
8136622Sraf 		(void) lwp_timer_enqueue(lwptp);
8146622Sraf 		mutex_exit(&curthread->t_delay_lock);
8150Sstevel@tonic-gate 	}
8160Sstevel@tonic-gate 	/*
8170Sstevel@tonic-gate 	 * Now, set the waiter bit and block for the lock in turnstile_block().
8180Sstevel@tonic-gate 	 * No need to preserve the previous wbit since a lock try is not
8190Sstevel@tonic-gate 	 * attempted after setting the wait bit. Wait bit is set under
8200Sstevel@tonic-gate 	 * the upib_lock, which is not released until the turnstile lock
8210Sstevel@tonic-gate 	 * is acquired. Say, the upimutex is L:
8220Sstevel@tonic-gate 	 *
8230Sstevel@tonic-gate 	 * 1. upib_lock is held so the waiter does not have to retry L after
8240Sstevel@tonic-gate 	 *    setting the wait bit: since the owner has to grab the upib_lock
8250Sstevel@tonic-gate 	 *    to unlock L, it will certainly see the wait bit set.
8260Sstevel@tonic-gate 	 * 2. upib_lock is not released until the turnstile lock is acquired.
8270Sstevel@tonic-gate 	 *    This is the key to preventing a missed wake-up. Otherwise, the
8280Sstevel@tonic-gate 	 *    owner could acquire the upib_lock, and the tc_lock, to call
8290Sstevel@tonic-gate 	 *    turnstile_wakeup(). All this, before the waiter gets tc_lock
8300Sstevel@tonic-gate 	 *    to sleep in turnstile_block(). turnstile_wakeup() will then not
8310Sstevel@tonic-gate 	 *    find this waiter, resulting in the missed wakeup.
8320Sstevel@tonic-gate 	 * 3. The upib_lock, being a kernel mutex, cannot be released while
8330Sstevel@tonic-gate 	 *    holding the tc_lock (since mutex_exit() could need to acquire
8340Sstevel@tonic-gate 	 *    the same tc_lock)...and so is held when calling turnstile_block().
8350Sstevel@tonic-gate 	 *    The address of upib_lock is passed to turnstile_block() which
8360Sstevel@tonic-gate 	 *    releases it after releasing all turnstile locks, and before going
8370Sstevel@tonic-gate 	 *    to sleep in swtch().
8380Sstevel@tonic-gate 	 * 4. The waiter value cannot be a count of waiters, because a waiter
8390Sstevel@tonic-gate 	 *    can be interrupted. The interrupt occurs under the tc_lock, at
8400Sstevel@tonic-gate 	 *    which point, the upib_lock cannot be locked, to decrement waiter
8410Sstevel@tonic-gate 	 *    count. So, just treat the waiter state as a bit, not a count.
8420Sstevel@tonic-gate 	 */
8430Sstevel@tonic-gate 	ts = turnstile_lookup((upimutex_t *)upimutex);
8440Sstevel@tonic-gate 	upimutex->upi_waiter = 1;
8450Sstevel@tonic-gate 	error = turnstile_block(ts, TS_WRITER_Q, (upimutex_t *)upimutex,
8460Sstevel@tonic-gate 	    &lwp_sobj_pi_ops, &upibp->upib_lock, lwptp);
8470Sstevel@tonic-gate 	/*
8480Sstevel@tonic-gate 	 * Hand-off implies that we wakeup holding the lock, except when:
8490Sstevel@tonic-gate 	 *	- deadlock is detected
8500Sstevel@tonic-gate 	 *	- lock is not recoverable
8510Sstevel@tonic-gate 	 *	- we got an interrupt or timeout
8520Sstevel@tonic-gate 	 * If we wake up due to an interrupt or timeout, we may
8530Sstevel@tonic-gate 	 * or may not be holding the lock due to mutex hand-off.
8540Sstevel@tonic-gate 	 * Use lwp_upimutex_owned() to check if we do hold the lock.
8550Sstevel@tonic-gate 	 */
8560Sstevel@tonic-gate 	if (error != 0) {
8570Sstevel@tonic-gate 		if ((error == EINTR || error == ETIME) &&
8580Sstevel@tonic-gate 		    (upimutex = lwp_upimutex_owned(lp, type))) {
8590Sstevel@tonic-gate 			/*
8600Sstevel@tonic-gate 			 * Unlock and return - the re-startable syscall will
8610Sstevel@tonic-gate 			 * try the lock again if we got EINTR.
8620Sstevel@tonic-gate 			 */
8630Sstevel@tonic-gate 			(void) upi_mylist_add((upimutex_t *)upimutex);
8640Sstevel@tonic-gate 			upimutex_unlock((upimutex_t *)upimutex, 0);
8650Sstevel@tonic-gate 		}
8660Sstevel@tonic-gate 		/*
8670Sstevel@tonic-gate 		 * The only other possible error is EDEADLK.  If so, upimutex
8680Sstevel@tonic-gate 		 * is valid, since its owner is deadlocked with curthread.
8690Sstevel@tonic-gate 		 */
8700Sstevel@tonic-gate 		ASSERT(error == EINTR || error == ETIME ||
8710Sstevel@tonic-gate 		    (error == EDEADLK && !upi_owned((upimutex_t *)upimutex)));
8720Sstevel@tonic-gate 		ASSERT(!lwp_upimutex_owned(lp, type));
8730Sstevel@tonic-gate 		goto out;
8740Sstevel@tonic-gate 	}
8750Sstevel@tonic-gate 	if (lwp_upimutex_owned(lp, type)) {
8760Sstevel@tonic-gate 		ASSERT(lwp_upimutex_owned(lp, type) == upimutex);
8770Sstevel@tonic-gate 		nupinest = upi_mylist_add((upimutex_t *)upimutex);
8780Sstevel@tonic-gate 		upilocked = 1;
8790Sstevel@tonic-gate 	}
8800Sstevel@tonic-gate 	/*
8810Sstevel@tonic-gate 	 * Now, need to read the user-level lp->mutex_flag to do the following:
8820Sstevel@tonic-gate 	 *
8834574Sraf 	 * - if lock is held, check if EOWNERDEAD or ELOCKUNMAPPED
8844574Sraf 	 *   should be returned.
8854574Sraf 	 * - if lock isn't held, check if ENOTRECOVERABLE should
8864574Sraf 	 *   be returned.
8870Sstevel@tonic-gate 	 *
8880Sstevel@tonic-gate 	 * Now, either lp->mutex_flag is readable or it's not. If not
8894574Sraf 	 * readable, the on_fault path will cause a return with EFAULT
8904574Sraf 	 * as it should.  If it is readable, the state of the flag
8914574Sraf 	 * encodes the robustness state of the lock:
8920Sstevel@tonic-gate 	 *
8934574Sraf 	 * If the upimutex is locked here, the flag's LOCK_OWNERDEAD
8944574Sraf 	 * or LOCK_UNMAPPED setting will influence the return code
8954574Sraf 	 * appropriately.  If the upimutex is not locked here, this
8964574Sraf 	 * could be due to a spurious wake-up or a NOTRECOVERABLE
8974574Sraf 	 * event.  The flag's setting can be used to distinguish
8980Sstevel@tonic-gate 	 * between these two events.
8990Sstevel@tonic-gate 	 */
9000Sstevel@tonic-gate 	fuword16_noerr(&lp->mutex_flag, &flag);
9010Sstevel@tonic-gate 	if (upilocked) {
9020Sstevel@tonic-gate 		/*
9030Sstevel@tonic-gate 		 * If the thread wakes up from turnstile_block with the lock
9040Sstevel@tonic-gate 		 * held, the flag could not be set to LOCK_NOTRECOVERABLE,
9050Sstevel@tonic-gate 		 * since it would not have been handed-off the lock.
9060Sstevel@tonic-gate 		 * So, no need to check for this case.
9070Sstevel@tonic-gate 		 */
9080Sstevel@tonic-gate 		if (nupinest > maxnestupimx &&
9090Sstevel@tonic-gate 		    secpolicy_resource(CRED()) != 0) {
9100Sstevel@tonic-gate 			upimutex_unlock((upimutex_t *)upimutex, flag);
9110Sstevel@tonic-gate 			upilocked = 0;
9120Sstevel@tonic-gate 			error = ENOMEM;
9134574Sraf 		} else if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) {
9144574Sraf 			if (flag & LOCK_OWNERDEAD)
9154574Sraf 				error = EOWNERDEAD;
9164574Sraf 			else if (type & USYNC_PROCESS_ROBUST)
9174574Sraf 				error = ELOCKUNMAPPED;
9184574Sraf 			else
9194574Sraf 				error = EOWNERDEAD;
9200Sstevel@tonic-gate 		}
9210Sstevel@tonic-gate 	} else {
9220Sstevel@tonic-gate 		/*
9230Sstevel@tonic-gate 		 * Wake-up without the upimutex held. Either this is a
9240Sstevel@tonic-gate 		 * spurious wake-up (due to signals, forkall(), whatever), or
9250Sstevel@tonic-gate 		 * it is a LOCK_NOTRECOVERABLE robustness event. The setting
9260Sstevel@tonic-gate 		 * of the mutex flag can be used to distinguish between the
9270Sstevel@tonic-gate 		 * two events.
9280Sstevel@tonic-gate 		 */
9290Sstevel@tonic-gate 		if (flag & LOCK_NOTRECOVERABLE) {
9300Sstevel@tonic-gate 			error = ENOTRECOVERABLE;
9310Sstevel@tonic-gate 		} else {
9320Sstevel@tonic-gate 			/*
9330Sstevel@tonic-gate 			 * Here, the flag could be set to LOCK_OWNERDEAD or
9340Sstevel@tonic-gate 			 * not. In both cases, this is a spurious wakeup,
9350Sstevel@tonic-gate 			 * since the upi lock is not held, but the thread
9360Sstevel@tonic-gate 			 * has returned from turnstile_block().
9370Sstevel@tonic-gate 			 *
9380Sstevel@tonic-gate 			 * The user flag could be LOCK_OWNERDEAD if, at the
9390Sstevel@tonic-gate 			 * same time as curthread having been woken up
9400Sstevel@tonic-gate 			 * spuriously, the owner (say Tdead) has died, marked
9410Sstevel@tonic-gate 			 * the mutex flag accordingly, and handed off the lock
9420Sstevel@tonic-gate 			 * to some other waiter (say Tnew). curthread just
9430Sstevel@tonic-gate 			 * happened to read the flag while Tnew has yet to deal
9440Sstevel@tonic-gate 			 * with the owner-dead event.
9450Sstevel@tonic-gate 			 *
9460Sstevel@tonic-gate 			 * In this event, curthread should retry the lock.
9470Sstevel@tonic-gate 			 * If Tnew is able to cleanup the lock, curthread
9480Sstevel@tonic-gate 			 * will eventually get the lock with a zero error code,
9490Sstevel@tonic-gate 			 * If Tnew is unable to cleanup, its eventual call to
9500Sstevel@tonic-gate 			 * unlock the lock will result in the mutex flag being
9510Sstevel@tonic-gate 			 * set to LOCK_NOTRECOVERABLE, and the wake-up of
9520Sstevel@tonic-gate 			 * all waiters, including curthread, which will then
9530Sstevel@tonic-gate 			 * eventually return ENOTRECOVERABLE due to the above
9540Sstevel@tonic-gate 			 * check.
9550Sstevel@tonic-gate 			 *
9560Sstevel@tonic-gate 			 * Of course, if the user-flag is not set with
9570Sstevel@tonic-gate 			 * LOCK_OWNERDEAD, retrying is the thing to do, since
9580Sstevel@tonic-gate 			 * this is definitely a spurious wakeup.
9590Sstevel@tonic-gate 			 */
9600Sstevel@tonic-gate 			goto retry;
9610Sstevel@tonic-gate 		}
9620Sstevel@tonic-gate 	}
9630Sstevel@tonic-gate 
9640Sstevel@tonic-gate out:
9650Sstevel@tonic-gate 	no_fault();
9660Sstevel@tonic-gate 	return (error);
9670Sstevel@tonic-gate }
9680Sstevel@tonic-gate 
9690Sstevel@tonic-gate 
9700Sstevel@tonic-gate static int
lwp_upimutex_unlock(lwp_mutex_t * lp,uint8_t type)9710Sstevel@tonic-gate lwp_upimutex_unlock(lwp_mutex_t *lp, uint8_t type)
9720Sstevel@tonic-gate {
9730Sstevel@tonic-gate 	label_t ljb;
9740Sstevel@tonic-gate 	int error = 0;
9750Sstevel@tonic-gate 	lwpchan_t lwpchan;
9760Sstevel@tonic-gate 	uint16_t flag;
9770Sstevel@tonic-gate 	upib_t *upibp;
9780Sstevel@tonic-gate 	volatile struct upimutex *upimutex = NULL;
9790Sstevel@tonic-gate 	volatile int upilocked = 0;
9800Sstevel@tonic-gate 
9810Sstevel@tonic-gate 	if (on_fault(&ljb)) {
9820Sstevel@tonic-gate 		if (upilocked)
9830Sstevel@tonic-gate 			upimutex_unlock((upimutex_t *)upimutex, 0);
9840Sstevel@tonic-gate 		error = EFAULT;
9850Sstevel@tonic-gate 		goto out;
9860Sstevel@tonic-gate 	}
9870Sstevel@tonic-gate 	if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type,
9880Sstevel@tonic-gate 	    &lwpchan, LWPCHAN_MPPOOL)) {
9890Sstevel@tonic-gate 		error = EFAULT;
9900Sstevel@tonic-gate 		goto out;
9910Sstevel@tonic-gate 	}
9920Sstevel@tonic-gate 	upibp = &UPI_CHAIN(lwpchan);
9930Sstevel@tonic-gate 	mutex_enter(&upibp->upib_lock);
9940Sstevel@tonic-gate 	upimutex = upi_get(upibp, &lwpchan);
9950Sstevel@tonic-gate 	/*
9960Sstevel@tonic-gate 	 * If the lock is not held, or the owner is not curthread, return
9970Sstevel@tonic-gate 	 * error. The user-level wrapper can return this error or stall,
9980Sstevel@tonic-gate 	 * depending on whether mutex is of ERRORCHECK type or not.
9990Sstevel@tonic-gate 	 */
10000Sstevel@tonic-gate 	if (upimutex == NULL || upimutex->upi_owner != curthread) {
10010Sstevel@tonic-gate 		mutex_exit(&upibp->upib_lock);
10020Sstevel@tonic-gate 		error = EPERM;
10030Sstevel@tonic-gate 		goto out;
10040Sstevel@tonic-gate 	}
10050Sstevel@tonic-gate 	mutex_exit(&upibp->upib_lock); /* release for user memory access */
10060Sstevel@tonic-gate 	upilocked = 1;
10070Sstevel@tonic-gate 	fuword16_noerr(&lp->mutex_flag, &flag);
10084574Sraf 	if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) {
10090Sstevel@tonic-gate 		/*
10100Sstevel@tonic-gate 		 * transition mutex to the LOCK_NOTRECOVERABLE state.
10110Sstevel@tonic-gate 		 */
10124574Sraf 		flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED);
10130Sstevel@tonic-gate 		flag |= LOCK_NOTRECOVERABLE;
10140Sstevel@tonic-gate 		suword16_noerr(&lp->mutex_flag, flag);
10150Sstevel@tonic-gate 	}
101610887SRoger.Faulkner@Sun.COM 	set_owner_pid(lp, 0, 0);
10170Sstevel@tonic-gate 	upimutex_unlock((upimutex_t *)upimutex, flag);
10180Sstevel@tonic-gate 	upilocked = 0;
10190Sstevel@tonic-gate out:
10200Sstevel@tonic-gate 	no_fault();
10210Sstevel@tonic-gate 	return (error);
10220Sstevel@tonic-gate }
10230Sstevel@tonic-gate 
10240Sstevel@tonic-gate /*
102510887SRoger.Faulkner@Sun.COM  * Set the owner and ownerpid fields of a user-level mutex.
102610887SRoger.Faulkner@Sun.COM  */
102710887SRoger.Faulkner@Sun.COM static void
set_owner_pid(lwp_mutex_t * lp,uintptr_t owner,pid_t pid)102810887SRoger.Faulkner@Sun.COM set_owner_pid(lwp_mutex_t *lp, uintptr_t owner, pid_t pid)
102910887SRoger.Faulkner@Sun.COM {
103010887SRoger.Faulkner@Sun.COM 	union {
103110887SRoger.Faulkner@Sun.COM 		uint64_t word64;
103210887SRoger.Faulkner@Sun.COM 		uint32_t word32[2];
103310887SRoger.Faulkner@Sun.COM 	} un;
103410887SRoger.Faulkner@Sun.COM 
103510887SRoger.Faulkner@Sun.COM 	un.word64 = (uint64_t)owner;
103610887SRoger.Faulkner@Sun.COM 
103710887SRoger.Faulkner@Sun.COM 	suword32_noerr(&lp->mutex_ownerpid, pid);
103810887SRoger.Faulkner@Sun.COM #if defined(_LP64)
103910887SRoger.Faulkner@Sun.COM 	if (((uintptr_t)lp & (_LONG_LONG_ALIGNMENT - 1)) == 0) { /* aligned */
104010887SRoger.Faulkner@Sun.COM 		suword64_noerr(&lp->mutex_owner, un.word64);
104110887SRoger.Faulkner@Sun.COM 		return;
104210887SRoger.Faulkner@Sun.COM 	}
104310887SRoger.Faulkner@Sun.COM #endif
104410887SRoger.Faulkner@Sun.COM 	/* mutex is unaligned or we are running on a 32-bit kernel */
104510887SRoger.Faulkner@Sun.COM 	suword32_noerr((uint32_t *)&lp->mutex_owner, un.word32[0]);
104610887SRoger.Faulkner@Sun.COM 	suword32_noerr((uint32_t *)&lp->mutex_owner + 1, un.word32[1]);
104710887SRoger.Faulkner@Sun.COM }
104810887SRoger.Faulkner@Sun.COM 
104910887SRoger.Faulkner@Sun.COM /*
10504574Sraf  * Clear the contents of a user-level mutex; return the flags.
10514574Sraf  * Used only by upi_dead() and lwp_mutex_cleanup(), below.
10524574Sraf  */
10534574Sraf static uint16_t
lwp_clear_mutex(lwp_mutex_t * lp,uint16_t lockflg)10544574Sraf lwp_clear_mutex(lwp_mutex_t *lp, uint16_t lockflg)
10554574Sraf {
10564574Sraf 	uint16_t flag;
10574574Sraf 
10584574Sraf 	fuword16_noerr(&lp->mutex_flag, &flag);
10596057Sraf 	if ((flag &
10606057Sraf 	    (LOCK_OWNERDEAD | LOCK_UNMAPPED | LOCK_NOTRECOVERABLE)) == 0) {
10614574Sraf 		flag |= lockflg;
10624574Sraf 		suword16_noerr(&lp->mutex_flag, flag);
10634574Sraf 	}
106410887SRoger.Faulkner@Sun.COM 	set_owner_pid(lp, 0, 0);
10654574Sraf 	suword8_noerr(&lp->mutex_rcount, 0);
10664574Sraf 
10674574Sraf 	return (flag);
10684574Sraf }
10694574Sraf 
10704574Sraf /*
10714574Sraf  * Mark user mutex state, corresponding to kernel upimutex,
10724574Sraf  * as LOCK_UNMAPPED or LOCK_OWNERDEAD, as appropriate
10730Sstevel@tonic-gate  */
10740Sstevel@tonic-gate static int
upi_dead(upimutex_t * upip,uint16_t lockflg)10754574Sraf upi_dead(upimutex_t *upip, uint16_t lockflg)
10760Sstevel@tonic-gate {
10770Sstevel@tonic-gate 	label_t ljb;
10780Sstevel@tonic-gate 	int error = 0;
10790Sstevel@tonic-gate 	lwp_mutex_t *lp;
10800Sstevel@tonic-gate 
10810Sstevel@tonic-gate 	if (on_fault(&ljb)) {
10820Sstevel@tonic-gate 		error = EFAULT;
10830Sstevel@tonic-gate 		goto out;
10840Sstevel@tonic-gate 	}
10850Sstevel@tonic-gate 
10860Sstevel@tonic-gate 	lp = upip->upi_vaddr;
10874574Sraf 	(void) lwp_clear_mutex(lp, lockflg);
10884574Sraf 	suword8_noerr(&lp->mutex_lockw, 0);
10890Sstevel@tonic-gate out:
10900Sstevel@tonic-gate 	no_fault();
10910Sstevel@tonic-gate 	return (error);
10920Sstevel@tonic-gate }
10930Sstevel@tonic-gate 
10940Sstevel@tonic-gate /*
10950Sstevel@tonic-gate  * Unlock all upimutexes held by curthread, since curthread is dying.
10960Sstevel@tonic-gate  * For each upimutex, attempt to mark its corresponding user mutex object as
10970Sstevel@tonic-gate  * dead.
10980Sstevel@tonic-gate  */
10990Sstevel@tonic-gate void
upimutex_cleanup()11000Sstevel@tonic-gate upimutex_cleanup()
11010Sstevel@tonic-gate {
11020Sstevel@tonic-gate 	kthread_t *t = curthread;
11034574Sraf 	uint16_t lockflg = (ttoproc(t)->p_proc_flag & P_PR_EXEC)?
11044574Sraf 	    LOCK_UNMAPPED : LOCK_OWNERDEAD;
11050Sstevel@tonic-gate 	struct upimutex *upip;
11060Sstevel@tonic-gate 
11070Sstevel@tonic-gate 	while ((upip = t->t_upimutex) != NULL) {
11084574Sraf 		if (upi_dead(upip, lockflg) != 0) {
11090Sstevel@tonic-gate 			/*
11100Sstevel@tonic-gate 			 * If the user object associated with this upimutex is
11110Sstevel@tonic-gate 			 * unmapped, unlock upimutex with the
11120Sstevel@tonic-gate 			 * LOCK_NOTRECOVERABLE flag, so that all waiters are
11130Sstevel@tonic-gate 			 * woken up. Since user object is unmapped, it could
11140Sstevel@tonic-gate 			 * not be marked as dead or notrecoverable.
11150Sstevel@tonic-gate 			 * The waiters will now all wake up and return
11160Sstevel@tonic-gate 			 * ENOTRECOVERABLE, since they would find that the lock
11170Sstevel@tonic-gate 			 * has not been handed-off to them.
11180Sstevel@tonic-gate 			 * See lwp_upimutex_lock().
11190Sstevel@tonic-gate 			 */
11200Sstevel@tonic-gate 			upimutex_unlock(upip, LOCK_NOTRECOVERABLE);
11210Sstevel@tonic-gate 		} else {
11220Sstevel@tonic-gate 			/*
11230Sstevel@tonic-gate 			 * The user object has been updated as dead.
11240Sstevel@tonic-gate 			 * Unlock the upimutex: if no waiters, upip kmem will
11250Sstevel@tonic-gate 			 * be freed. If there is a waiter, the lock will be
11260Sstevel@tonic-gate 			 * handed off. If exit() is in progress, each existing
11270Sstevel@tonic-gate 			 * waiter will successively get the lock, as owners
11280Sstevel@tonic-gate 			 * die, and each new owner will call this routine as
11290Sstevel@tonic-gate 			 * it dies. The last owner will free kmem, since
11300Sstevel@tonic-gate 			 * it will find the upimutex has no waiters. So,
11310Sstevel@tonic-gate 			 * eventually, the kmem is guaranteed to be freed.
11320Sstevel@tonic-gate 			 */
11330Sstevel@tonic-gate 			upimutex_unlock(upip, 0);
11340Sstevel@tonic-gate 		}
11350Sstevel@tonic-gate 		/*
11360Sstevel@tonic-gate 		 * Note that the call to upimutex_unlock() above will delete
11370Sstevel@tonic-gate 		 * upimutex from the t_upimutexes chain. And so the
11380Sstevel@tonic-gate 		 * while loop will eventually terminate.
11390Sstevel@tonic-gate 		 */
11400Sstevel@tonic-gate 	}
11410Sstevel@tonic-gate }
11420Sstevel@tonic-gate 
11430Sstevel@tonic-gate int
lwp_mutex_timedlock(lwp_mutex_t * lp,timespec_t * tsp,uintptr_t owner)114410887SRoger.Faulkner@Sun.COM lwp_mutex_timedlock(lwp_mutex_t *lp, timespec_t *tsp, uintptr_t owner)
11450Sstevel@tonic-gate {
11460Sstevel@tonic-gate 	kthread_t *t = curthread;
11470Sstevel@tonic-gate 	klwp_t *lwp = ttolwp(t);
11480Sstevel@tonic-gate 	proc_t *p = ttoproc(t);
11490Sstevel@tonic-gate 	lwp_timer_t lwpt;
11500Sstevel@tonic-gate 	caddr_t timedwait;
11510Sstevel@tonic-gate 	int error = 0;
11520Sstevel@tonic-gate 	int time_error;
11530Sstevel@tonic-gate 	clock_t tim = -1;
11540Sstevel@tonic-gate 	uchar_t waiters;
11550Sstevel@tonic-gate 	volatile int locked = 0;
11560Sstevel@tonic-gate 	volatile int watched = 0;
11570Sstevel@tonic-gate 	label_t ljb;
11580Sstevel@tonic-gate 	volatile uint8_t type = 0;
11590Sstevel@tonic-gate 	lwpchan_t lwpchan;
11600Sstevel@tonic-gate 	sleepq_head_t *sqh;
11610Sstevel@tonic-gate 	uint16_t flag;
11620Sstevel@tonic-gate 	int imm_timeout = 0;
11630Sstevel@tonic-gate 
11640Sstevel@tonic-gate 	if ((caddr_t)lp >= p->p_as->a_userlimit)
11650Sstevel@tonic-gate 		return (set_errno(EFAULT));
11660Sstevel@tonic-gate 
116710230SRoger.Faulkner@Sun.COM 	/*
116810230SRoger.Faulkner@Sun.COM 	 * Put the lwp in an orderly state for debugging,
116910230SRoger.Faulkner@Sun.COM 	 * in case we are stopped while sleeping, below.
117010230SRoger.Faulkner@Sun.COM 	 */
117110230SRoger.Faulkner@Sun.COM 	prstop(PR_REQUESTED, 0);
117210230SRoger.Faulkner@Sun.COM 
11730Sstevel@tonic-gate 	timedwait = (caddr_t)tsp;
11740Sstevel@tonic-gate 	if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 &&
11750Sstevel@tonic-gate 	    lwpt.lwpt_imm_timeout) {
11760Sstevel@tonic-gate 		imm_timeout = 1;
11770Sstevel@tonic-gate 		timedwait = NULL;
11780Sstevel@tonic-gate 	}
11790Sstevel@tonic-gate 
11800Sstevel@tonic-gate 	/*
11810Sstevel@tonic-gate 	 * Although LMS_USER_LOCK implies "asleep waiting for user-mode lock",
11820Sstevel@tonic-gate 	 * this micro state is really a run state. If the thread indeed blocks,
11830Sstevel@tonic-gate 	 * this state becomes valid. If not, the state is converted back to
11840Sstevel@tonic-gate 	 * LMS_SYSTEM. So, it is OK to set the mstate here, instead of just
11850Sstevel@tonic-gate 	 * when blocking.
11860Sstevel@tonic-gate 	 */
11870Sstevel@tonic-gate 	(void) new_mstate(t, LMS_USER_LOCK);
11880Sstevel@tonic-gate 	if (on_fault(&ljb)) {
11890Sstevel@tonic-gate 		if (locked)
11900Sstevel@tonic-gate 			lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
11910Sstevel@tonic-gate 		error = EFAULT;
11920Sstevel@tonic-gate 		goto out;
11930Sstevel@tonic-gate 	}
11946577Sraf 	/*
11956577Sraf 	 * Force Copy-on-write if necessary and ensure that the
11966577Sraf 	 * synchronization object resides in read/write memory.
11976577Sraf 	 * Cause an EFAULT return now if this is not so.
11986577Sraf 	 */
11990Sstevel@tonic-gate 	fuword8_noerr(&lp->mutex_type, (uint8_t *)&type);
12006577Sraf 	suword8_noerr(&lp->mutex_type, type);
12010Sstevel@tonic-gate 	if (UPIMUTEX(type)) {
12020Sstevel@tonic-gate 		no_fault();
12030Sstevel@tonic-gate 		error = lwp_upimutex_lock(lp, type, UPIMUTEX_BLOCK, &lwpt);
120410887SRoger.Faulkner@Sun.COM 		if (error == 0 || error == EOWNERDEAD || error == ELOCKUNMAPPED)
120510887SRoger.Faulkner@Sun.COM 			set_owner_pid(lp, owner,
120610887SRoger.Faulkner@Sun.COM 			    (type & USYNC_PROCESS)? p->p_pid : 0);
12070Sstevel@tonic-gate 		if (tsp && !time_error)	/* copyout the residual time left */
12080Sstevel@tonic-gate 			error = lwp_timer_copyout(&lwpt, error);
12090Sstevel@tonic-gate 		if (error)
12100Sstevel@tonic-gate 			return (set_errno(error));
12110Sstevel@tonic-gate 		return (0);
12120Sstevel@tonic-gate 	}
12130Sstevel@tonic-gate 	if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type,
12140Sstevel@tonic-gate 	    &lwpchan, LWPCHAN_MPPOOL)) {
12150Sstevel@tonic-gate 		error = EFAULT;
12160Sstevel@tonic-gate 		goto out;
12170Sstevel@tonic-gate 	}
12180Sstevel@tonic-gate 	lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL);
12190Sstevel@tonic-gate 	locked = 1;
12204574Sraf 	if (type & LOCK_ROBUST) {
12210Sstevel@tonic-gate 		fuword16_noerr(&lp->mutex_flag, &flag);
12220Sstevel@tonic-gate 		if (flag & LOCK_NOTRECOVERABLE) {
12230Sstevel@tonic-gate 			lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
12240Sstevel@tonic-gate 			error = ENOTRECOVERABLE;
12250Sstevel@tonic-gate 			goto out;
12260Sstevel@tonic-gate 		}
12270Sstevel@tonic-gate 	}
12284574Sraf 	fuword8_noerr(&lp->mutex_waiters, &waiters);
12294574Sraf 	suword8_noerr(&lp->mutex_waiters, 1);
12300Sstevel@tonic-gate 
12310Sstevel@tonic-gate 	/*
12320Sstevel@tonic-gate 	 * If watchpoints are set, they need to be restored, since
12330Sstevel@tonic-gate 	 * atomic accesses of memory such as the call to ulock_try()
12340Sstevel@tonic-gate 	 * below cannot be watched.
12350Sstevel@tonic-gate 	 */
12360Sstevel@tonic-gate 
12370Sstevel@tonic-gate 	watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
12380Sstevel@tonic-gate 
12390Sstevel@tonic-gate 	while (!ulock_try(&lp->mutex_lockw)) {
12400Sstevel@tonic-gate 		if (time_error) {
12410Sstevel@tonic-gate 			/*
12420Sstevel@tonic-gate 			 * The SUSV3 Posix spec is very clear that we
12430Sstevel@tonic-gate 			 * should get no error from validating the
12440Sstevel@tonic-gate 			 * timer until we would actually sleep.
12450Sstevel@tonic-gate 			 */
12460Sstevel@tonic-gate 			error = time_error;
12470Sstevel@tonic-gate 			break;
12480Sstevel@tonic-gate 		}
12490Sstevel@tonic-gate 
12500Sstevel@tonic-gate 		if (watched) {
12510Sstevel@tonic-gate 			watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
12520Sstevel@tonic-gate 			watched = 0;
12530Sstevel@tonic-gate 		}
12540Sstevel@tonic-gate 
12550Sstevel@tonic-gate 		if (timedwait) {
12560Sstevel@tonic-gate 			/*
12570Sstevel@tonic-gate 			 * If we successfully queue the timeout,
12580Sstevel@tonic-gate 			 * then don't drop t_delay_lock until
12590Sstevel@tonic-gate 			 * we are on the sleep queue (below).
12600Sstevel@tonic-gate 			 */
12610Sstevel@tonic-gate 			mutex_enter(&t->t_delay_lock);
12620Sstevel@tonic-gate 			if (lwp_timer_enqueue(&lwpt) != 0) {
12630Sstevel@tonic-gate 				mutex_exit(&t->t_delay_lock);
12640Sstevel@tonic-gate 				imm_timeout = 1;
12650Sstevel@tonic-gate 				timedwait = NULL;
12660Sstevel@tonic-gate 			}
12670Sstevel@tonic-gate 		}
12680Sstevel@tonic-gate 		lwp_block(&lwpchan);
12690Sstevel@tonic-gate 		/*
12700Sstevel@tonic-gate 		 * Nothing should happen to cause the lwp to go to
12710Sstevel@tonic-gate 		 * sleep again until after it returns from swtch().
12720Sstevel@tonic-gate 		 */
12730Sstevel@tonic-gate 		if (timedwait)
12740Sstevel@tonic-gate 			mutex_exit(&t->t_delay_lock);
12750Sstevel@tonic-gate 		locked = 0;
12760Sstevel@tonic-gate 		lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
12770Sstevel@tonic-gate 		if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || imm_timeout)
12780Sstevel@tonic-gate 			setrun(t);
12790Sstevel@tonic-gate 		swtch();
12800Sstevel@tonic-gate 		t->t_flag &= ~T_WAKEABLE;
12810Sstevel@tonic-gate 		if (timedwait)
12820Sstevel@tonic-gate 			tim = lwp_timer_dequeue(&lwpt);
12830Sstevel@tonic-gate 		setallwatch();
12840Sstevel@tonic-gate 		if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || MUSTRETURN(p, t))
12850Sstevel@tonic-gate 			error = EINTR;
12860Sstevel@tonic-gate 		else if (imm_timeout || (timedwait && tim == -1))
12870Sstevel@tonic-gate 			error = ETIME;
12880Sstevel@tonic-gate 		if (error) {
12890Sstevel@tonic-gate 			lwp->lwp_asleep = 0;
12900Sstevel@tonic-gate 			lwp->lwp_sysabort = 0;
12910Sstevel@tonic-gate 			watched = watch_disable_addr((caddr_t)lp, sizeof (*lp),
12920Sstevel@tonic-gate 			    S_WRITE);
12930Sstevel@tonic-gate 
12940Sstevel@tonic-gate 			/*
12950Sstevel@tonic-gate 			 * Need to re-compute waiters bit. The waiters field in
12960Sstevel@tonic-gate 			 * the lock is not reliable. Either of two things could
12970Sstevel@tonic-gate 			 * have occurred: no lwp may have called lwp_release()
12980Sstevel@tonic-gate 			 * for me but I have woken up due to a signal or
12990Sstevel@tonic-gate 			 * timeout.  In this case, the waiter bit is incorrect
13000Sstevel@tonic-gate 			 * since it is still set to 1, set above.
13010Sstevel@tonic-gate 			 * OR an lwp_release() did occur for some other lwp on
13020Sstevel@tonic-gate 			 * the same lwpchan. In this case, the waiter bit is
13030Sstevel@tonic-gate 			 * correct.  But which event occurred, one can't tell.
13040Sstevel@tonic-gate 			 * So, recompute.
13050Sstevel@tonic-gate 			 */
13060Sstevel@tonic-gate 			lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL);
13070Sstevel@tonic-gate 			locked = 1;
13080Sstevel@tonic-gate 			sqh = lwpsqhash(&lwpchan);
13090Sstevel@tonic-gate 			disp_lock_enter(&sqh->sq_lock);
13100Sstevel@tonic-gate 			waiters = iswanted(sqh->sq_queue.sq_first, &lwpchan);
13110Sstevel@tonic-gate 			disp_lock_exit(&sqh->sq_lock);
13120Sstevel@tonic-gate 			break;
13130Sstevel@tonic-gate 		}
13140Sstevel@tonic-gate 		lwp->lwp_asleep = 0;
13150Sstevel@tonic-gate 		watched = watch_disable_addr((caddr_t)lp, sizeof (*lp),
13160Sstevel@tonic-gate 		    S_WRITE);
13170Sstevel@tonic-gate 		lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL);
13180Sstevel@tonic-gate 		locked = 1;
13190Sstevel@tonic-gate 		fuword8_noerr(&lp->mutex_waiters, &waiters);
13200Sstevel@tonic-gate 		suword8_noerr(&lp->mutex_waiters, 1);
13214574Sraf 		if (type & LOCK_ROBUST) {
13220Sstevel@tonic-gate 			fuword16_noerr(&lp->mutex_flag, &flag);
13230Sstevel@tonic-gate 			if (flag & LOCK_NOTRECOVERABLE) {
13240Sstevel@tonic-gate 				error = ENOTRECOVERABLE;
13250Sstevel@tonic-gate 				break;
13260Sstevel@tonic-gate 			}
13270Sstevel@tonic-gate 		}
13280Sstevel@tonic-gate 	}
13290Sstevel@tonic-gate 
13300Sstevel@tonic-gate 	if (t->t_mstate == LMS_USER_LOCK)
13310Sstevel@tonic-gate 		(void) new_mstate(t, LMS_SYSTEM);
13320Sstevel@tonic-gate 
13334574Sraf 	if (error == 0) {
133410887SRoger.Faulkner@Sun.COM 		set_owner_pid(lp, owner, (type & USYNC_PROCESS)? p->p_pid : 0);
13354574Sraf 		if (type & LOCK_ROBUST) {
13360Sstevel@tonic-gate 			fuword16_noerr(&lp->mutex_flag, &flag);
13374574Sraf 			if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) {
13384574Sraf 				if (flag & LOCK_OWNERDEAD)
13394574Sraf 					error = EOWNERDEAD;
13404574Sraf 				else if (type & USYNC_PROCESS_ROBUST)
13414574Sraf 					error = ELOCKUNMAPPED;
13424574Sraf 				else
13434574Sraf 					error = EOWNERDEAD;
13444574Sraf 			}
13450Sstevel@tonic-gate 		}
13460Sstevel@tonic-gate 	}
13470Sstevel@tonic-gate 	suword8_noerr(&lp->mutex_waiters, waiters);
13480Sstevel@tonic-gate 	locked = 0;
13490Sstevel@tonic-gate 	lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
13500Sstevel@tonic-gate out:
13510Sstevel@tonic-gate 	no_fault();
13520Sstevel@tonic-gate 	if (watched)
13530Sstevel@tonic-gate 		watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
13540Sstevel@tonic-gate 	if (tsp && !time_error)		/* copyout the residual time left */
13550Sstevel@tonic-gate 		error = lwp_timer_copyout(&lwpt, error);
13560Sstevel@tonic-gate 	if (error)
13570Sstevel@tonic-gate 		return (set_errno(error));
13580Sstevel@tonic-gate 	return (0);
13590Sstevel@tonic-gate }
13600Sstevel@tonic-gate 
13610Sstevel@tonic-gate static int
iswanted(kthread_t * t,lwpchan_t * lwpchan)13620Sstevel@tonic-gate iswanted(kthread_t *t, lwpchan_t *lwpchan)
13630Sstevel@tonic-gate {
13640Sstevel@tonic-gate 	/*
13650Sstevel@tonic-gate 	 * The caller holds the dispatcher lock on the sleep queue.
13660Sstevel@tonic-gate 	 */
13670Sstevel@tonic-gate 	while (t != NULL) {
13680Sstevel@tonic-gate 		if (t->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 &&
13690Sstevel@tonic-gate 		    t->t_lwpchan.lc_wchan == lwpchan->lc_wchan)
13700Sstevel@tonic-gate 			return (1);
13710Sstevel@tonic-gate 		t = t->t_link;
13720Sstevel@tonic-gate 	}
13730Sstevel@tonic-gate 	return (0);
13740Sstevel@tonic-gate }
13750Sstevel@tonic-gate 
13760Sstevel@tonic-gate /*
13770Sstevel@tonic-gate  * Return the highest priority thread sleeping on this lwpchan.
13780Sstevel@tonic-gate  */
13790Sstevel@tonic-gate static kthread_t *
lwp_queue_waiter(lwpchan_t * lwpchan)13800Sstevel@tonic-gate lwp_queue_waiter(lwpchan_t *lwpchan)
13810Sstevel@tonic-gate {
13820Sstevel@tonic-gate 	sleepq_head_t *sqh;
13830Sstevel@tonic-gate 	kthread_t *tp;
13840Sstevel@tonic-gate 
13850Sstevel@tonic-gate 	sqh = lwpsqhash(lwpchan);
13860Sstevel@tonic-gate 	disp_lock_enter(&sqh->sq_lock);		/* lock the sleep queue */
13870Sstevel@tonic-gate 	for (tp = sqh->sq_queue.sq_first; tp != NULL; tp = tp->t_link) {
13880Sstevel@tonic-gate 		if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 &&
13890Sstevel@tonic-gate 		    tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan)
13900Sstevel@tonic-gate 			break;
13910Sstevel@tonic-gate 	}
13920Sstevel@tonic-gate 	disp_lock_exit(&sqh->sq_lock);
13930Sstevel@tonic-gate 	return (tp);
13940Sstevel@tonic-gate }
13950Sstevel@tonic-gate 
13960Sstevel@tonic-gate static int
lwp_release(lwpchan_t * lwpchan,uchar_t * waiters,int sync_type)13970Sstevel@tonic-gate lwp_release(lwpchan_t *lwpchan, uchar_t *waiters, int sync_type)
13980Sstevel@tonic-gate {
13990Sstevel@tonic-gate 	sleepq_head_t *sqh;
14000Sstevel@tonic-gate 	kthread_t *tp;
14010Sstevel@tonic-gate 	kthread_t **tpp;
14020Sstevel@tonic-gate 
14030Sstevel@tonic-gate 	sqh = lwpsqhash(lwpchan);
14040Sstevel@tonic-gate 	disp_lock_enter(&sqh->sq_lock);		/* lock the sleep queue */
14050Sstevel@tonic-gate 	tpp = &sqh->sq_queue.sq_first;
14060Sstevel@tonic-gate 	while ((tp = *tpp) != NULL) {
14070Sstevel@tonic-gate 		if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 &&
14080Sstevel@tonic-gate 		    tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) {
14090Sstevel@tonic-gate 			/*
14100Sstevel@tonic-gate 			 * The following is typically false. It could be true
14110Sstevel@tonic-gate 			 * only if lwp_release() is called from
14120Sstevel@tonic-gate 			 * lwp_mutex_wakeup() after reading the waiters field
14130Sstevel@tonic-gate 			 * from memory in which the lwp lock used to be, but has
14140Sstevel@tonic-gate 			 * since been re-used to hold a lwp cv or lwp semaphore.
14150Sstevel@tonic-gate 			 * The thread "tp" found to match the lwp lock's wchan
14160Sstevel@tonic-gate 			 * is actually sleeping for the cv or semaphore which
14170Sstevel@tonic-gate 			 * now has the same wchan. In this case, lwp_release()
14180Sstevel@tonic-gate 			 * should return failure.
14190Sstevel@tonic-gate 			 */
14200Sstevel@tonic-gate 			if (sync_type != (tp->t_flag & T_WAITCVSEM)) {
14210Sstevel@tonic-gate 				ASSERT(sync_type == 0);
14220Sstevel@tonic-gate 				/*
14230Sstevel@tonic-gate 				 * assert that this can happen only for mutexes
14240Sstevel@tonic-gate 				 * i.e. sync_type == 0, for correctly written
14250Sstevel@tonic-gate 				 * user programs.
14260Sstevel@tonic-gate 				 */
14270Sstevel@tonic-gate 				disp_lock_exit(&sqh->sq_lock);
14280Sstevel@tonic-gate 				return (0);
14290Sstevel@tonic-gate 			}
14300Sstevel@tonic-gate 			*waiters = iswanted(tp->t_link, lwpchan);
14310Sstevel@tonic-gate 			sleepq_unlink(tpp, tp);
14320Sstevel@tonic-gate 			DTRACE_SCHED1(wakeup, kthread_t *, tp);
14330Sstevel@tonic-gate 			tp->t_wchan0 = NULL;
14340Sstevel@tonic-gate 			tp->t_wchan = NULL;
14350Sstevel@tonic-gate 			tp->t_sobj_ops = NULL;
14360Sstevel@tonic-gate 			tp->t_release = 1;
14370Sstevel@tonic-gate 			THREAD_TRANSITION(tp);	/* drops sleepq lock */
14380Sstevel@tonic-gate 			CL_WAKEUP(tp);
14390Sstevel@tonic-gate 			thread_unlock(tp);	/* drop run queue lock */
14400Sstevel@tonic-gate 			return (1);
14410Sstevel@tonic-gate 		}
14420Sstevel@tonic-gate 		tpp = &tp->t_link;
14430Sstevel@tonic-gate 	}
14440Sstevel@tonic-gate 	*waiters = 0;
14450Sstevel@tonic-gate 	disp_lock_exit(&sqh->sq_lock);
14460Sstevel@tonic-gate 	return (0);
14470Sstevel@tonic-gate }
14480Sstevel@tonic-gate 
14490Sstevel@tonic-gate static void
lwp_release_all(lwpchan_t * lwpchan)14500Sstevel@tonic-gate lwp_release_all(lwpchan_t *lwpchan)
14510Sstevel@tonic-gate {
14520Sstevel@tonic-gate 	sleepq_head_t	*sqh;
14530Sstevel@tonic-gate 	kthread_t *tp;
14540Sstevel@tonic-gate 	kthread_t **tpp;
14550Sstevel@tonic-gate 
14560Sstevel@tonic-gate 	sqh = lwpsqhash(lwpchan);
14570Sstevel@tonic-gate 	disp_lock_enter(&sqh->sq_lock);		/* lock sleep q queue */
14580Sstevel@tonic-gate 	tpp = &sqh->sq_queue.sq_first;
14590Sstevel@tonic-gate 	while ((tp = *tpp) != NULL) {
14600Sstevel@tonic-gate 		if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 &&
14610Sstevel@tonic-gate 		    tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) {
14620Sstevel@tonic-gate 			sleepq_unlink(tpp, tp);
14630Sstevel@tonic-gate 			DTRACE_SCHED1(wakeup, kthread_t *, tp);
14640Sstevel@tonic-gate 			tp->t_wchan0 = NULL;
14650Sstevel@tonic-gate 			tp->t_wchan = NULL;
14660Sstevel@tonic-gate 			tp->t_sobj_ops = NULL;
14670Sstevel@tonic-gate 			CL_WAKEUP(tp);
14680Sstevel@tonic-gate 			thread_unlock_high(tp);	/* release run queue lock */
14690Sstevel@tonic-gate 		} else {
14700Sstevel@tonic-gate 			tpp = &tp->t_link;
14710Sstevel@tonic-gate 		}
14720Sstevel@tonic-gate 	}
14730Sstevel@tonic-gate 	disp_lock_exit(&sqh->sq_lock);		/* drop sleep q lock */
14740Sstevel@tonic-gate }
14750Sstevel@tonic-gate 
14760Sstevel@tonic-gate /*
14770Sstevel@tonic-gate  * unblock a lwp that is trying to acquire this mutex. the blocked
14780Sstevel@tonic-gate  * lwp resumes and retries to acquire the lock.
14790Sstevel@tonic-gate  */
14800Sstevel@tonic-gate int
lwp_mutex_wakeup(lwp_mutex_t * lp,int release_all)14814574Sraf lwp_mutex_wakeup(lwp_mutex_t *lp, int release_all)
14820Sstevel@tonic-gate {
14830Sstevel@tonic-gate 	proc_t *p = ttoproc(curthread);
14840Sstevel@tonic-gate 	lwpchan_t lwpchan;
14850Sstevel@tonic-gate 	uchar_t waiters;
14860Sstevel@tonic-gate 	volatile int locked = 0;
14870Sstevel@tonic-gate 	volatile int watched = 0;
14880Sstevel@tonic-gate 	volatile uint8_t type = 0;
14890Sstevel@tonic-gate 	label_t ljb;
14900Sstevel@tonic-gate 	int error = 0;
14910Sstevel@tonic-gate 
14920Sstevel@tonic-gate 	if ((caddr_t)lp >= p->p_as->a_userlimit)
14930Sstevel@tonic-gate 		return (set_errno(EFAULT));
14940Sstevel@tonic-gate 
14950Sstevel@tonic-gate 	watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
14960Sstevel@tonic-gate 
14970Sstevel@tonic-gate 	if (on_fault(&ljb)) {
14980Sstevel@tonic-gate 		if (locked)
14990Sstevel@tonic-gate 			lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
15000Sstevel@tonic-gate 		error = EFAULT;
15010Sstevel@tonic-gate 		goto out;
15020Sstevel@tonic-gate 	}
15030Sstevel@tonic-gate 	/*
15046577Sraf 	 * Force Copy-on-write if necessary and ensure that the
15056577Sraf 	 * synchronization object resides in read/write memory.
15066577Sraf 	 * Cause an EFAULT return now if this is not so.
15070Sstevel@tonic-gate 	 */
15080Sstevel@tonic-gate 	fuword8_noerr(&lp->mutex_type, (uint8_t *)&type);
15090Sstevel@tonic-gate 	suword8_noerr(&lp->mutex_type, type);
15100Sstevel@tonic-gate 	if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type,
15110Sstevel@tonic-gate 	    &lwpchan, LWPCHAN_MPPOOL)) {
15120Sstevel@tonic-gate 		error = EFAULT;
15130Sstevel@tonic-gate 		goto out;
15140Sstevel@tonic-gate 	}
15150Sstevel@tonic-gate 	lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL);
15160Sstevel@tonic-gate 	locked = 1;
15170Sstevel@tonic-gate 	/*
15180Sstevel@tonic-gate 	 * Always wake up an lwp (if any) waiting on lwpchan. The woken lwp will
15190Sstevel@tonic-gate 	 * re-try the lock in lwp_mutex_timedlock(). The call to lwp_release()
15200Sstevel@tonic-gate 	 * may fail.  If it fails, do not write into the waiter bit.
15210Sstevel@tonic-gate 	 * The call to lwp_release() might fail due to one of three reasons:
15220Sstevel@tonic-gate 	 *
15230Sstevel@tonic-gate 	 * 	1. due to the thread which set the waiter bit not actually
15240Sstevel@tonic-gate 	 *	   sleeping since it got the lock on the re-try. The waiter
15250Sstevel@tonic-gate 	 *	   bit will then be correctly updated by that thread. This
15260Sstevel@tonic-gate 	 *	   window may be closed by reading the wait bit again here
15270Sstevel@tonic-gate 	 *	   and not calling lwp_release() at all if it is zero.
15280Sstevel@tonic-gate 	 *	2. the thread which set the waiter bit and went to sleep
15290Sstevel@tonic-gate 	 *	   was woken up by a signal. This time, the waiter recomputes
15300Sstevel@tonic-gate 	 *	   the wait bit in the return with EINTR code.
15310Sstevel@tonic-gate 	 *	3. the waiter bit read by lwp_mutex_wakeup() was in
15320Sstevel@tonic-gate 	 *	   memory that has been re-used after the lock was dropped.
15330Sstevel@tonic-gate 	 *	   In this case, writing into the waiter bit would cause data
15340Sstevel@tonic-gate 	 *	   corruption.
15350Sstevel@tonic-gate 	 */
15364574Sraf 	if (release_all)
15374574Sraf 		lwp_release_all(&lwpchan);
15386057Sraf 	else if (lwp_release(&lwpchan, &waiters, 0))
15390Sstevel@tonic-gate 		suword8_noerr(&lp->mutex_waiters, waiters);
15400Sstevel@tonic-gate 	lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
15410Sstevel@tonic-gate out:
15420Sstevel@tonic-gate 	no_fault();
15430Sstevel@tonic-gate 	if (watched)
15440Sstevel@tonic-gate 		watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
15450Sstevel@tonic-gate 	if (error)
15460Sstevel@tonic-gate 		return (set_errno(error));
15470Sstevel@tonic-gate 	return (0);
15480Sstevel@tonic-gate }
15490Sstevel@tonic-gate 
15500Sstevel@tonic-gate /*
15510Sstevel@tonic-gate  * lwp_cond_wait() has four arguments, a pointer to a condition variable,
15520Sstevel@tonic-gate  * a pointer to a mutex, a pointer to a timespec for a timed wait and
15530Sstevel@tonic-gate  * a flag telling the kernel whether or not to honor the kernel/user
15540Sstevel@tonic-gate  * schedctl parking protocol (see schedctl_is_park() in schedctl.c).
15550Sstevel@tonic-gate  * The kernel puts the lwp to sleep on a unique pair of caddr_t's called an
15560Sstevel@tonic-gate  * lwpchan, returned by get_lwpchan().  If the timespec pointer is non-NULL,
15570Sstevel@tonic-gate  * it is used an an in/out parameter.  On entry, it contains the relative
15580Sstevel@tonic-gate  * time until timeout.  On exit, we copyout the residual time left to it.
15590Sstevel@tonic-gate  */
15600Sstevel@tonic-gate int
lwp_cond_wait(lwp_cond_t * cv,lwp_mutex_t * mp,timespec_t * tsp,int check_park)15610Sstevel@tonic-gate lwp_cond_wait(lwp_cond_t *cv, lwp_mutex_t *mp, timespec_t *tsp, int check_park)
15620Sstevel@tonic-gate {
15630Sstevel@tonic-gate 	kthread_t *t = curthread;
15640Sstevel@tonic-gate 	klwp_t *lwp = ttolwp(t);
15650Sstevel@tonic-gate 	proc_t *p = ttoproc(t);
15660Sstevel@tonic-gate 	lwp_timer_t lwpt;
15670Sstevel@tonic-gate 	lwpchan_t cv_lwpchan;
15680Sstevel@tonic-gate 	lwpchan_t m_lwpchan;
15690Sstevel@tonic-gate 	caddr_t timedwait;
15700Sstevel@tonic-gate 	volatile uint16_t type = 0;
15710Sstevel@tonic-gate 	volatile uint8_t mtype = 0;
15720Sstevel@tonic-gate 	uchar_t waiters;
15730Sstevel@tonic-gate 	volatile int error;
15740Sstevel@tonic-gate 	clock_t tim = -1;
15750Sstevel@tonic-gate 	volatile int locked = 0;
15760Sstevel@tonic-gate 	volatile int m_locked = 0;
15770Sstevel@tonic-gate 	volatile int cvwatched = 0;
15780Sstevel@tonic-gate 	volatile int mpwatched = 0;
15790Sstevel@tonic-gate 	label_t ljb;
15800Sstevel@tonic-gate 	volatile int no_lwpchan = 1;
15810Sstevel@tonic-gate 	int imm_timeout = 0;
15820Sstevel@tonic-gate 	int imm_unpark = 0;
15830Sstevel@tonic-gate 
15840Sstevel@tonic-gate 	if ((caddr_t)cv >= p->p_as->a_userlimit ||
15850Sstevel@tonic-gate 	    (caddr_t)mp >= p->p_as->a_userlimit)
15860Sstevel@tonic-gate 		return (set_errno(EFAULT));
15870Sstevel@tonic-gate 
158810230SRoger.Faulkner@Sun.COM 	/*
158910230SRoger.Faulkner@Sun.COM 	 * Put the lwp in an orderly state for debugging,
159010230SRoger.Faulkner@Sun.COM 	 * in case we are stopped while sleeping, below.
159110230SRoger.Faulkner@Sun.COM 	 */
159210230SRoger.Faulkner@Sun.COM 	prstop(PR_REQUESTED, 0);
159310230SRoger.Faulkner@Sun.COM 
15940Sstevel@tonic-gate 	timedwait = (caddr_t)tsp;
15950Sstevel@tonic-gate 	if ((error = lwp_timer_copyin(&lwpt, tsp)) != 0)
15960Sstevel@tonic-gate 		return (set_errno(error));
15970Sstevel@tonic-gate 	if (lwpt.lwpt_imm_timeout) {
15980Sstevel@tonic-gate 		imm_timeout = 1;
15990Sstevel@tonic-gate 		timedwait = NULL;
16000Sstevel@tonic-gate 	}
16010Sstevel@tonic-gate 
16020Sstevel@tonic-gate 	(void) new_mstate(t, LMS_USER_LOCK);
16030Sstevel@tonic-gate 
16040Sstevel@tonic-gate 	if (on_fault(&ljb)) {
16050Sstevel@tonic-gate 		if (no_lwpchan) {
16060Sstevel@tonic-gate 			error = EFAULT;
16070Sstevel@tonic-gate 			goto out;
16080Sstevel@tonic-gate 		}
16090Sstevel@tonic-gate 		if (m_locked) {
16100Sstevel@tonic-gate 			m_locked = 0;
16110Sstevel@tonic-gate 			lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL);
16120Sstevel@tonic-gate 		}
16130Sstevel@tonic-gate 		if (locked) {
16140Sstevel@tonic-gate 			locked = 0;
16150Sstevel@tonic-gate 			lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL);
16160Sstevel@tonic-gate 		}
16170Sstevel@tonic-gate 		/*
16180Sstevel@tonic-gate 		 * set up another on_fault() for a possible fault
16190Sstevel@tonic-gate 		 * on the user lock accessed at "efault"
16200Sstevel@tonic-gate 		 */
16210Sstevel@tonic-gate 		if (on_fault(&ljb)) {
16220Sstevel@tonic-gate 			if (m_locked) {
16230Sstevel@tonic-gate 				m_locked = 0;
16240Sstevel@tonic-gate 				lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL);
16250Sstevel@tonic-gate 			}
16260Sstevel@tonic-gate 			goto out;
16270Sstevel@tonic-gate 		}
16280Sstevel@tonic-gate 		error = EFAULT;
16290Sstevel@tonic-gate 		goto efault;
16300Sstevel@tonic-gate 	}
16310Sstevel@tonic-gate 
16320Sstevel@tonic-gate 	/*
16336577Sraf 	 * Force Copy-on-write if necessary and ensure that the
16346577Sraf 	 * synchronization object resides in read/write memory.
16356577Sraf 	 * Cause an EFAULT return now if this is not so.
16360Sstevel@tonic-gate 	 */
16370Sstevel@tonic-gate 	fuword8_noerr(&mp->mutex_type, (uint8_t *)&mtype);
16386577Sraf 	suword8_noerr(&mp->mutex_type, mtype);
16390Sstevel@tonic-gate 	if (UPIMUTEX(mtype) == 0) {
16400Sstevel@tonic-gate 		/* convert user level mutex, "mp", to a unique lwpchan */
16410Sstevel@tonic-gate 		/* check if mtype is ok to use below, instead of type from cv */
16420Sstevel@tonic-gate 		if (!get_lwpchan(p->p_as, (caddr_t)mp, mtype,
16430Sstevel@tonic-gate 		    &m_lwpchan, LWPCHAN_MPPOOL)) {
16440Sstevel@tonic-gate 			error = EFAULT;
16450Sstevel@tonic-gate 			goto out;
16460Sstevel@tonic-gate 		}
16470Sstevel@tonic-gate 	}
16480Sstevel@tonic-gate 	fuword16_noerr(&cv->cond_type, (uint16_t *)&type);
16490Sstevel@tonic-gate 	suword16_noerr(&cv->cond_type, type);
16500Sstevel@tonic-gate 	/* convert user level condition variable, "cv", to a unique lwpchan */
16510Sstevel@tonic-gate 	if (!get_lwpchan(p->p_as, (caddr_t)cv, type,
16520Sstevel@tonic-gate 	    &cv_lwpchan, LWPCHAN_CVPOOL)) {
16530Sstevel@tonic-gate 		error = EFAULT;
16540Sstevel@tonic-gate 		goto out;
16550Sstevel@tonic-gate 	}
16560Sstevel@tonic-gate 	no_lwpchan = 0;
16570Sstevel@tonic-gate 	cvwatched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE);
16580Sstevel@tonic-gate 	if (UPIMUTEX(mtype) == 0)
16590Sstevel@tonic-gate 		mpwatched = watch_disable_addr((caddr_t)mp, sizeof (*mp),
16600Sstevel@tonic-gate 		    S_WRITE);
16610Sstevel@tonic-gate 
16620Sstevel@tonic-gate 	/*
16630Sstevel@tonic-gate 	 * lwpchan_lock ensures that the calling lwp is put to sleep atomically
16640Sstevel@tonic-gate 	 * with respect to a possible wakeup which is a result of either
16650Sstevel@tonic-gate 	 * an lwp_cond_signal() or an lwp_cond_broadcast().
16660Sstevel@tonic-gate 	 *
16670Sstevel@tonic-gate 	 * What's misleading, is that the lwp is put to sleep after the
16680Sstevel@tonic-gate 	 * condition variable's mutex is released.  This is OK as long as
16690Sstevel@tonic-gate 	 * the release operation is also done while holding lwpchan_lock.
16700Sstevel@tonic-gate 	 * The lwp is then put to sleep when the possibility of pagefaulting
16710Sstevel@tonic-gate 	 * or sleeping is completely eliminated.
16720Sstevel@tonic-gate 	 */
16730Sstevel@tonic-gate 	lwpchan_lock(&cv_lwpchan, LWPCHAN_CVPOOL);
16740Sstevel@tonic-gate 	locked = 1;
16750Sstevel@tonic-gate 	if (UPIMUTEX(mtype) == 0) {
16760Sstevel@tonic-gate 		lwpchan_lock(&m_lwpchan, LWPCHAN_MPPOOL);
16770Sstevel@tonic-gate 		m_locked = 1;
16780Sstevel@tonic-gate 		suword8_noerr(&cv->cond_waiters_kernel, 1);
16790Sstevel@tonic-gate 		/*
16800Sstevel@tonic-gate 		 * unlock the condition variable's mutex. (pagefaults are
16810Sstevel@tonic-gate 		 * possible here.)
16820Sstevel@tonic-gate 		 */
168310887SRoger.Faulkner@Sun.COM 		set_owner_pid(mp, 0, 0);
16840Sstevel@tonic-gate 		ulock_clear(&mp->mutex_lockw);
16850Sstevel@tonic-gate 		fuword8_noerr(&mp->mutex_waiters, &waiters);
16860Sstevel@tonic-gate 		if (waiters != 0) {
16870Sstevel@tonic-gate 			/*
16880Sstevel@tonic-gate 			 * Given the locking of lwpchan_lock around the release
16890Sstevel@tonic-gate 			 * of the mutex and checking for waiters, the following
16900Sstevel@tonic-gate 			 * call to lwp_release() can fail ONLY if the lock
16910Sstevel@tonic-gate 			 * acquirer is interrupted after setting the waiter bit,
16920Sstevel@tonic-gate 			 * calling lwp_block() and releasing lwpchan_lock.
16930Sstevel@tonic-gate 			 * In this case, it could get pulled off the lwp sleep
16940Sstevel@tonic-gate 			 * q (via setrun()) before the following call to
16950Sstevel@tonic-gate 			 * lwp_release() occurs. In this case, the lock
16960Sstevel@tonic-gate 			 * requestor will update the waiter bit correctly by
16970Sstevel@tonic-gate 			 * re-evaluating it.
16980Sstevel@tonic-gate 			 */
16996057Sraf 			if (lwp_release(&m_lwpchan, &waiters, 0))
17000Sstevel@tonic-gate 				suword8_noerr(&mp->mutex_waiters, waiters);
17010Sstevel@tonic-gate 		}
17020Sstevel@tonic-gate 		m_locked = 0;
17030Sstevel@tonic-gate 		lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL);
17040Sstevel@tonic-gate 	} else {
17050Sstevel@tonic-gate 		suword8_noerr(&cv->cond_waiters_kernel, 1);
17060Sstevel@tonic-gate 		error = lwp_upimutex_unlock(mp, mtype);
17070Sstevel@tonic-gate 		if (error) {	/* if the upimutex unlock failed */
17080Sstevel@tonic-gate 			locked = 0;
17090Sstevel@tonic-gate 			lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL);
17100Sstevel@tonic-gate 			goto out;
17110Sstevel@tonic-gate 		}
17120Sstevel@tonic-gate 	}
17130Sstevel@tonic-gate 	no_fault();
17140Sstevel@tonic-gate 
17150Sstevel@tonic-gate 	if (mpwatched) {
17160Sstevel@tonic-gate 		watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE);
17170Sstevel@tonic-gate 		mpwatched = 0;
17180Sstevel@tonic-gate 	}
17190Sstevel@tonic-gate 	if (cvwatched) {
17200Sstevel@tonic-gate 		watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE);
17210Sstevel@tonic-gate 		cvwatched = 0;
17220Sstevel@tonic-gate 	}
17230Sstevel@tonic-gate 
17240Sstevel@tonic-gate 	if (check_park && (!schedctl_is_park() || t->t_unpark)) {
17250Sstevel@tonic-gate 		/*
17260Sstevel@tonic-gate 		 * We received a signal at user-level before calling here
17270Sstevel@tonic-gate 		 * or another thread wants us to return immediately
17280Sstevel@tonic-gate 		 * with EINTR.  See lwp_unpark().
17290Sstevel@tonic-gate 		 */
17300Sstevel@tonic-gate 		imm_unpark = 1;
17310Sstevel@tonic-gate 		t->t_unpark = 0;
17320Sstevel@tonic-gate 		timedwait = NULL;
17330Sstevel@tonic-gate 	} else if (timedwait) {
17340Sstevel@tonic-gate 		/*
17350Sstevel@tonic-gate 		 * If we successfully queue the timeout,
17360Sstevel@tonic-gate 		 * then don't drop t_delay_lock until
17370Sstevel@tonic-gate 		 * we are on the sleep queue (below).
17380Sstevel@tonic-gate 		 */
17390Sstevel@tonic-gate 		mutex_enter(&t->t_delay_lock);
17400Sstevel@tonic-gate 		if (lwp_timer_enqueue(&lwpt) != 0) {
17410Sstevel@tonic-gate 			mutex_exit(&t->t_delay_lock);
17420Sstevel@tonic-gate 			imm_timeout = 1;
17430Sstevel@tonic-gate 			timedwait = NULL;
17440Sstevel@tonic-gate 		}
17450Sstevel@tonic-gate 	}
17460Sstevel@tonic-gate 	t->t_flag |= T_WAITCVSEM;
17470Sstevel@tonic-gate 	lwp_block(&cv_lwpchan);
17480Sstevel@tonic-gate 	/*
17490Sstevel@tonic-gate 	 * Nothing should happen to cause the lwp to go to sleep
17500Sstevel@tonic-gate 	 * until after it returns from swtch().
17510Sstevel@tonic-gate 	 */
17520Sstevel@tonic-gate 	if (timedwait)
17530Sstevel@tonic-gate 		mutex_exit(&t->t_delay_lock);
17540Sstevel@tonic-gate 	locked = 0;
17550Sstevel@tonic-gate 	lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL);
17560Sstevel@tonic-gate 	if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) ||
17570Sstevel@tonic-gate 	    (imm_timeout | imm_unpark))
17580Sstevel@tonic-gate 		setrun(t);
17590Sstevel@tonic-gate 	swtch();
17600Sstevel@tonic-gate 	t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE);
17610Sstevel@tonic-gate 	if (timedwait)
17620Sstevel@tonic-gate 		tim = lwp_timer_dequeue(&lwpt);
17630Sstevel@tonic-gate 	if (ISSIG(t, FORREAL) || lwp->lwp_sysabort ||
17640Sstevel@tonic-gate 	    MUSTRETURN(p, t) || imm_unpark)
17650Sstevel@tonic-gate 		error = EINTR;
17660Sstevel@tonic-gate 	else if (imm_timeout || (timedwait && tim == -1))
17670Sstevel@tonic-gate 		error = ETIME;
17680Sstevel@tonic-gate 	lwp->lwp_asleep = 0;
17690Sstevel@tonic-gate 	lwp->lwp_sysabort = 0;
17700Sstevel@tonic-gate 	setallwatch();
17710Sstevel@tonic-gate 
17720Sstevel@tonic-gate 	if (t->t_mstate == LMS_USER_LOCK)
17730Sstevel@tonic-gate 		(void) new_mstate(t, LMS_SYSTEM);
17740Sstevel@tonic-gate 
17750Sstevel@tonic-gate 	if (tsp && check_park)		/* copyout the residual time left */
17760Sstevel@tonic-gate 		error = lwp_timer_copyout(&lwpt, error);
17770Sstevel@tonic-gate 
17780Sstevel@tonic-gate 	/* the mutex is reacquired by the caller on return to user level */
17790Sstevel@tonic-gate 	if (error) {
17800Sstevel@tonic-gate 		/*
17810Sstevel@tonic-gate 		 * If we were concurrently lwp_cond_signal()d and we
17820Sstevel@tonic-gate 		 * received a UNIX signal or got a timeout, then perform
17830Sstevel@tonic-gate 		 * another lwp_cond_signal() to avoid consuming the wakeup.
17840Sstevel@tonic-gate 		 */
17850Sstevel@tonic-gate 		if (t->t_release)
17860Sstevel@tonic-gate 			(void) lwp_cond_signal(cv);
17870Sstevel@tonic-gate 		return (set_errno(error));
17880Sstevel@tonic-gate 	}
17890Sstevel@tonic-gate 	return (0);
17900Sstevel@tonic-gate 
17910Sstevel@tonic-gate efault:
17920Sstevel@tonic-gate 	/*
17930Sstevel@tonic-gate 	 * make sure that the user level lock is dropped before
17940Sstevel@tonic-gate 	 * returning to caller, since the caller always re-acquires it.
17950Sstevel@tonic-gate 	 */
17960Sstevel@tonic-gate 	if (UPIMUTEX(mtype) == 0) {
17970Sstevel@tonic-gate 		lwpchan_lock(&m_lwpchan, LWPCHAN_MPPOOL);
17980Sstevel@tonic-gate 		m_locked = 1;
179910887SRoger.Faulkner@Sun.COM 		set_owner_pid(mp, 0, 0);
18000Sstevel@tonic-gate 		ulock_clear(&mp->mutex_lockw);
18010Sstevel@tonic-gate 		fuword8_noerr(&mp->mutex_waiters, &waiters);
18020Sstevel@tonic-gate 		if (waiters != 0) {
18030Sstevel@tonic-gate 			/*
18040Sstevel@tonic-gate 			 * See comment above on lock clearing and lwp_release()
18050Sstevel@tonic-gate 			 * success/failure.
18060Sstevel@tonic-gate 			 */
18076057Sraf 			if (lwp_release(&m_lwpchan, &waiters, 0))
18080Sstevel@tonic-gate 				suword8_noerr(&mp->mutex_waiters, waiters);
18090Sstevel@tonic-gate 		}
18100Sstevel@tonic-gate 		m_locked = 0;
18110Sstevel@tonic-gate 		lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL);
18120Sstevel@tonic-gate 	} else {
18130Sstevel@tonic-gate 		(void) lwp_upimutex_unlock(mp, mtype);
18140Sstevel@tonic-gate 	}
18150Sstevel@tonic-gate out:
18160Sstevel@tonic-gate 	no_fault();
18170Sstevel@tonic-gate 	if (mpwatched)
18180Sstevel@tonic-gate 		watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE);
18190Sstevel@tonic-gate 	if (cvwatched)
18200Sstevel@tonic-gate 		watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE);
18210Sstevel@tonic-gate 	if (t->t_mstate == LMS_USER_LOCK)
18220Sstevel@tonic-gate 		(void) new_mstate(t, LMS_SYSTEM);
18230Sstevel@tonic-gate 	return (set_errno(error));
18240Sstevel@tonic-gate }
18250Sstevel@tonic-gate 
18260Sstevel@tonic-gate /*
18270Sstevel@tonic-gate  * wakeup one lwp that's blocked on this condition variable.
18280Sstevel@tonic-gate  */
18290Sstevel@tonic-gate int
lwp_cond_signal(lwp_cond_t * cv)18300Sstevel@tonic-gate lwp_cond_signal(lwp_cond_t *cv)
18310Sstevel@tonic-gate {
18320Sstevel@tonic-gate 	proc_t *p = ttoproc(curthread);
18330Sstevel@tonic-gate 	lwpchan_t lwpchan;
18340Sstevel@tonic-gate 	uchar_t waiters;
18350Sstevel@tonic-gate 	volatile uint16_t type = 0;
18360Sstevel@tonic-gate 	volatile int locked = 0;
18370Sstevel@tonic-gate 	volatile int watched = 0;
18380Sstevel@tonic-gate 	label_t ljb;
18390Sstevel@tonic-gate 	int error = 0;
18400Sstevel@tonic-gate 
18410Sstevel@tonic-gate 	if ((caddr_t)cv >= p->p_as->a_userlimit)
18420Sstevel@tonic-gate 		return (set_errno(EFAULT));
18430Sstevel@tonic-gate 
18440Sstevel@tonic-gate 	watched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE);
18450Sstevel@tonic-gate 
18460Sstevel@tonic-gate 	if (on_fault(&ljb)) {
18470Sstevel@tonic-gate 		if (locked)
18480Sstevel@tonic-gate 			lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
18490Sstevel@tonic-gate 		error = EFAULT;
18500Sstevel@tonic-gate 		goto out;
18510Sstevel@tonic-gate 	}
18520Sstevel@tonic-gate 	/*
18536577Sraf 	 * Force Copy-on-write if necessary and ensure that the
18546577Sraf 	 * synchronization object resides in read/write memory.
18556577Sraf 	 * Cause an EFAULT return now if this is not so.
18560Sstevel@tonic-gate 	 */
18570Sstevel@tonic-gate 	fuword16_noerr(&cv->cond_type, (uint16_t *)&type);
18580Sstevel@tonic-gate 	suword16_noerr(&cv->cond_type, type);
18590Sstevel@tonic-gate 	if (!get_lwpchan(curproc->p_as, (caddr_t)cv, type,
18600Sstevel@tonic-gate 	    &lwpchan, LWPCHAN_CVPOOL)) {
18610Sstevel@tonic-gate 		error = EFAULT;
18620Sstevel@tonic-gate 		goto out;
18630Sstevel@tonic-gate 	}
18640Sstevel@tonic-gate 	lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL);
18650Sstevel@tonic-gate 	locked = 1;
18660Sstevel@tonic-gate 	fuword8_noerr(&cv->cond_waiters_kernel, &waiters);
18670Sstevel@tonic-gate 	if (waiters != 0) {
18680Sstevel@tonic-gate 		/*
18690Sstevel@tonic-gate 		 * The following call to lwp_release() might fail but it is
18700Sstevel@tonic-gate 		 * OK to write into the waiters bit below, since the memory
18710Sstevel@tonic-gate 		 * could not have been re-used or unmapped (for correctly
18720Sstevel@tonic-gate 		 * written user programs) as in the case of lwp_mutex_wakeup().
18730Sstevel@tonic-gate 		 * For an incorrect program, we should not care about data
18740Sstevel@tonic-gate 		 * corruption since this is just one instance of other places
18750Sstevel@tonic-gate 		 * where corruption can occur for such a program. Of course
18760Sstevel@tonic-gate 		 * if the memory is unmapped, normal fault recovery occurs.
18770Sstevel@tonic-gate 		 */
18780Sstevel@tonic-gate 		(void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM);
18790Sstevel@tonic-gate 		suword8_noerr(&cv->cond_waiters_kernel, waiters);
18800Sstevel@tonic-gate 	}
18810Sstevel@tonic-gate 	lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
18820Sstevel@tonic-gate out:
18830Sstevel@tonic-gate 	no_fault();
18840Sstevel@tonic-gate 	if (watched)
18850Sstevel@tonic-gate 		watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE);
18860Sstevel@tonic-gate 	if (error)
18870Sstevel@tonic-gate 		return (set_errno(error));
18880Sstevel@tonic-gate 	return (0);
18890Sstevel@tonic-gate }
18900Sstevel@tonic-gate 
18910Sstevel@tonic-gate /*
18920Sstevel@tonic-gate  * wakeup every lwp that's blocked on this condition variable.
18930Sstevel@tonic-gate  */
18940Sstevel@tonic-gate int
lwp_cond_broadcast(lwp_cond_t * cv)18950Sstevel@tonic-gate lwp_cond_broadcast(lwp_cond_t *cv)
18960Sstevel@tonic-gate {
18970Sstevel@tonic-gate 	proc_t *p = ttoproc(curthread);
18980Sstevel@tonic-gate 	lwpchan_t lwpchan;
18990Sstevel@tonic-gate 	volatile uint16_t type = 0;
19000Sstevel@tonic-gate 	volatile int locked = 0;
19010Sstevel@tonic-gate 	volatile int watched = 0;
19020Sstevel@tonic-gate 	label_t ljb;
19030Sstevel@tonic-gate 	uchar_t waiters;
19040Sstevel@tonic-gate 	int error = 0;
19050Sstevel@tonic-gate 
19060Sstevel@tonic-gate 	if ((caddr_t)cv >= p->p_as->a_userlimit)
19070Sstevel@tonic-gate 		return (set_errno(EFAULT));
19080Sstevel@tonic-gate 
19090Sstevel@tonic-gate 	watched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE);
19100Sstevel@tonic-gate 
19110Sstevel@tonic-gate 	if (on_fault(&ljb)) {
19120Sstevel@tonic-gate 		if (locked)
19130Sstevel@tonic-gate 			lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
19140Sstevel@tonic-gate 		error = EFAULT;
19150Sstevel@tonic-gate 		goto out;
19160Sstevel@tonic-gate 	}
19170Sstevel@tonic-gate 	/*
19186577Sraf 	 * Force Copy-on-write if necessary and ensure that the
19196577Sraf 	 * synchronization object resides in read/write memory.
19206577Sraf 	 * Cause an EFAULT return now if this is not so.
19210Sstevel@tonic-gate 	 */
19220Sstevel@tonic-gate 	fuword16_noerr(&cv->cond_type, (uint16_t *)&type);
19230Sstevel@tonic-gate 	suword16_noerr(&cv->cond_type, type);
19240Sstevel@tonic-gate 	if (!get_lwpchan(curproc->p_as, (caddr_t)cv, type,
19250Sstevel@tonic-gate 	    &lwpchan, LWPCHAN_CVPOOL)) {
19260Sstevel@tonic-gate 		error = EFAULT;
19270Sstevel@tonic-gate 		goto out;
19280Sstevel@tonic-gate 	}
19290Sstevel@tonic-gate 	lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL);
19300Sstevel@tonic-gate 	locked = 1;
19310Sstevel@tonic-gate 	fuword8_noerr(&cv->cond_waiters_kernel, &waiters);
19320Sstevel@tonic-gate 	if (waiters != 0) {
19330Sstevel@tonic-gate 		lwp_release_all(&lwpchan);
19340Sstevel@tonic-gate 		suword8_noerr(&cv->cond_waiters_kernel, 0);
19350Sstevel@tonic-gate 	}
19360Sstevel@tonic-gate 	lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
19370Sstevel@tonic-gate out:
19380Sstevel@tonic-gate 	no_fault();
19390Sstevel@tonic-gate 	if (watched)
19400Sstevel@tonic-gate 		watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE);
19410Sstevel@tonic-gate 	if (error)
19420Sstevel@tonic-gate 		return (set_errno(error));
19430Sstevel@tonic-gate 	return (0);
19440Sstevel@tonic-gate }
19450Sstevel@tonic-gate 
19460Sstevel@tonic-gate int
lwp_sema_trywait(lwp_sema_t * sp)19470Sstevel@tonic-gate lwp_sema_trywait(lwp_sema_t *sp)
19480Sstevel@tonic-gate {
19490Sstevel@tonic-gate 	kthread_t *t = curthread;
19500Sstevel@tonic-gate 	proc_t *p = ttoproc(t);
19510Sstevel@tonic-gate 	label_t ljb;
19520Sstevel@tonic-gate 	volatile int locked = 0;
19530Sstevel@tonic-gate 	volatile int watched = 0;
19540Sstevel@tonic-gate 	volatile uint16_t type = 0;
19550Sstevel@tonic-gate 	int count;
19560Sstevel@tonic-gate 	lwpchan_t lwpchan;
19570Sstevel@tonic-gate 	uchar_t waiters;
19580Sstevel@tonic-gate 	int error = 0;
19590Sstevel@tonic-gate 
19600Sstevel@tonic-gate 	if ((caddr_t)sp >= p->p_as->a_userlimit)
19610Sstevel@tonic-gate 		return (set_errno(EFAULT));
19620Sstevel@tonic-gate 
19630Sstevel@tonic-gate 	watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE);
19640Sstevel@tonic-gate 
19650Sstevel@tonic-gate 	if (on_fault(&ljb)) {
19660Sstevel@tonic-gate 		if (locked)
19670Sstevel@tonic-gate 			lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
19680Sstevel@tonic-gate 		error = EFAULT;
19690Sstevel@tonic-gate 		goto out;
19700Sstevel@tonic-gate 	}
19710Sstevel@tonic-gate 	/*
19726577Sraf 	 * Force Copy-on-write if necessary and ensure that the
19736577Sraf 	 * synchronization object resides in read/write memory.
19746577Sraf 	 * Cause an EFAULT return now if this is not so.
19750Sstevel@tonic-gate 	 */
19760Sstevel@tonic-gate 	fuword16_noerr((void *)&sp->sema_type, (uint16_t *)&type);
19770Sstevel@tonic-gate 	suword16_noerr((void *)&sp->sema_type, type);
19780Sstevel@tonic-gate 	if (!get_lwpchan(p->p_as, (caddr_t)sp, type,
19790Sstevel@tonic-gate 	    &lwpchan, LWPCHAN_CVPOOL)) {
19800Sstevel@tonic-gate 		error = EFAULT;
19810Sstevel@tonic-gate 		goto out;
19820Sstevel@tonic-gate 	}
19830Sstevel@tonic-gate 	lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL);
19840Sstevel@tonic-gate 	locked = 1;
19850Sstevel@tonic-gate 	fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count);
19860Sstevel@tonic-gate 	if (count == 0)
19870Sstevel@tonic-gate 		error = EBUSY;
19880Sstevel@tonic-gate 	else
19890Sstevel@tonic-gate 		suword32_noerr((void *)&sp->sema_count, --count);
19900Sstevel@tonic-gate 	if (count != 0) {
19910Sstevel@tonic-gate 		fuword8_noerr(&sp->sema_waiters, &waiters);
19920Sstevel@tonic-gate 		if (waiters != 0) {
19930Sstevel@tonic-gate 			(void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM);
19940Sstevel@tonic-gate 			suword8_noerr(&sp->sema_waiters, waiters);
19950Sstevel@tonic-gate 		}
19960Sstevel@tonic-gate 	}
19970Sstevel@tonic-gate 	lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
19980Sstevel@tonic-gate out:
19990Sstevel@tonic-gate 	no_fault();
20000Sstevel@tonic-gate 	if (watched)
20010Sstevel@tonic-gate 		watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE);
20020Sstevel@tonic-gate 	if (error)
20030Sstevel@tonic-gate 		return (set_errno(error));
20040Sstevel@tonic-gate 	return (0);
20050Sstevel@tonic-gate }
20060Sstevel@tonic-gate 
20070Sstevel@tonic-gate /*
20080Sstevel@tonic-gate  * See lwp_cond_wait(), above, for an explanation of the 'check_park' argument.
20090Sstevel@tonic-gate  */
20100Sstevel@tonic-gate int
lwp_sema_timedwait(lwp_sema_t * sp,timespec_t * tsp,int check_park)20110Sstevel@tonic-gate lwp_sema_timedwait(lwp_sema_t *sp, timespec_t *tsp, int check_park)
20120Sstevel@tonic-gate {
20130Sstevel@tonic-gate 	kthread_t *t = curthread;
20140Sstevel@tonic-gate 	klwp_t *lwp = ttolwp(t);
20150Sstevel@tonic-gate 	proc_t *p = ttoproc(t);
20160Sstevel@tonic-gate 	lwp_timer_t lwpt;
20170Sstevel@tonic-gate 	caddr_t timedwait;
20180Sstevel@tonic-gate 	clock_t tim = -1;
20190Sstevel@tonic-gate 	label_t ljb;
20200Sstevel@tonic-gate 	volatile int locked = 0;
20210Sstevel@tonic-gate 	volatile int watched = 0;
20220Sstevel@tonic-gate 	volatile uint16_t type = 0;
20230Sstevel@tonic-gate 	int count;
20240Sstevel@tonic-gate 	lwpchan_t lwpchan;
20250Sstevel@tonic-gate 	uchar_t waiters;
20260Sstevel@tonic-gate 	int error = 0;
20270Sstevel@tonic-gate 	int time_error;
20280Sstevel@tonic-gate 	int imm_timeout = 0;
20290Sstevel@tonic-gate 	int imm_unpark = 0;
20300Sstevel@tonic-gate 
20310Sstevel@tonic-gate 	if ((caddr_t)sp >= p->p_as->a_userlimit)
20320Sstevel@tonic-gate 		return (set_errno(EFAULT));
20330Sstevel@tonic-gate 
203410230SRoger.Faulkner@Sun.COM 	/*
203510230SRoger.Faulkner@Sun.COM 	 * Put the lwp in an orderly state for debugging,
203610230SRoger.Faulkner@Sun.COM 	 * in case we are stopped while sleeping, below.
203710230SRoger.Faulkner@Sun.COM 	 */
203810230SRoger.Faulkner@Sun.COM 	prstop(PR_REQUESTED, 0);
203910230SRoger.Faulkner@Sun.COM 
20400Sstevel@tonic-gate 	timedwait = (caddr_t)tsp;
20410Sstevel@tonic-gate 	if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 &&
20420Sstevel@tonic-gate 	    lwpt.lwpt_imm_timeout) {
20430Sstevel@tonic-gate 		imm_timeout = 1;
20440Sstevel@tonic-gate 		timedwait = NULL;
20450Sstevel@tonic-gate 	}
20460Sstevel@tonic-gate 
20470Sstevel@tonic-gate 	watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE);
20480Sstevel@tonic-gate 
20490Sstevel@tonic-gate 	if (on_fault(&ljb)) {
20500Sstevel@tonic-gate 		if (locked)
20510Sstevel@tonic-gate 			lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
20520Sstevel@tonic-gate 		error = EFAULT;
20530Sstevel@tonic-gate 		goto out;
20540Sstevel@tonic-gate 	}
20550Sstevel@tonic-gate 	/*
20566577Sraf 	 * Force Copy-on-write if necessary and ensure that the
20576577Sraf 	 * synchronization object resides in read/write memory.
20586577Sraf 	 * Cause an EFAULT return now if this is not so.
20590Sstevel@tonic-gate 	 */
20600Sstevel@tonic-gate 	fuword16_noerr((void *)&sp->sema_type, (uint16_t *)&type);
20610Sstevel@tonic-gate 	suword16_noerr((void *)&sp->sema_type, type);
20620Sstevel@tonic-gate 	if (!get_lwpchan(p->p_as, (caddr_t)sp, type,
20630Sstevel@tonic-gate 	    &lwpchan, LWPCHAN_CVPOOL)) {
20640Sstevel@tonic-gate 		error = EFAULT;
20650Sstevel@tonic-gate 		goto out;
20660Sstevel@tonic-gate 	}
20670Sstevel@tonic-gate 	lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL);
20680Sstevel@tonic-gate 	locked = 1;
20690Sstevel@tonic-gate 	fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count);
20700Sstevel@tonic-gate 	while (error == 0 && count == 0) {
20710Sstevel@tonic-gate 		if (time_error) {
20720Sstevel@tonic-gate 			/*
20730Sstevel@tonic-gate 			 * The SUSV3 Posix spec is very clear that we
20740Sstevel@tonic-gate 			 * should get no error from validating the
20750Sstevel@tonic-gate 			 * timer until we would actually sleep.
20760Sstevel@tonic-gate 			 */
20770Sstevel@tonic-gate 			error = time_error;
20780Sstevel@tonic-gate 			break;
20790Sstevel@tonic-gate 		}
20800Sstevel@tonic-gate 		suword8_noerr(&sp->sema_waiters, 1);
20810Sstevel@tonic-gate 		if (watched)
20820Sstevel@tonic-gate 			watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE);
20830Sstevel@tonic-gate 		if (check_park && (!schedctl_is_park() || t->t_unpark)) {
20840Sstevel@tonic-gate 			/*
20850Sstevel@tonic-gate 			 * We received a signal at user-level before calling
20860Sstevel@tonic-gate 			 * here or another thread wants us to return
20870Sstevel@tonic-gate 			 * immediately with EINTR.  See lwp_unpark().
20880Sstevel@tonic-gate 			 */
20890Sstevel@tonic-gate 			imm_unpark = 1;
20900Sstevel@tonic-gate 			t->t_unpark = 0;
20910Sstevel@tonic-gate 			timedwait = NULL;
20920Sstevel@tonic-gate 		} else if (timedwait) {
20930Sstevel@tonic-gate 			/*
20940Sstevel@tonic-gate 			 * If we successfully queue the timeout,
20950Sstevel@tonic-gate 			 * then don't drop t_delay_lock until
20960Sstevel@tonic-gate 			 * we are on the sleep queue (below).
20970Sstevel@tonic-gate 			 */
20980Sstevel@tonic-gate 			mutex_enter(&t->t_delay_lock);
20990Sstevel@tonic-gate 			if (lwp_timer_enqueue(&lwpt) != 0) {
21000Sstevel@tonic-gate 				mutex_exit(&t->t_delay_lock);
21010Sstevel@tonic-gate 				imm_timeout = 1;
21020Sstevel@tonic-gate 				timedwait = NULL;
21030Sstevel@tonic-gate 			}
21040Sstevel@tonic-gate 		}
21050Sstevel@tonic-gate 		t->t_flag |= T_WAITCVSEM;
21060Sstevel@tonic-gate 		lwp_block(&lwpchan);
21070Sstevel@tonic-gate 		/*
21080Sstevel@tonic-gate 		 * Nothing should happen to cause the lwp to sleep
21090Sstevel@tonic-gate 		 * again until after it returns from swtch().
21100Sstevel@tonic-gate 		 */
21110Sstevel@tonic-gate 		if (timedwait)
21120Sstevel@tonic-gate 			mutex_exit(&t->t_delay_lock);
21130Sstevel@tonic-gate 		locked = 0;
21140Sstevel@tonic-gate 		lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
21150Sstevel@tonic-gate 		if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) ||
21160Sstevel@tonic-gate 		    (imm_timeout | imm_unpark))
21170Sstevel@tonic-gate 			setrun(t);
21180Sstevel@tonic-gate 		swtch();
21190Sstevel@tonic-gate 		t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE);
21200Sstevel@tonic-gate 		if (timedwait)
21210Sstevel@tonic-gate 			tim = lwp_timer_dequeue(&lwpt);
21220Sstevel@tonic-gate 		setallwatch();
21230Sstevel@tonic-gate 		if (ISSIG(t, FORREAL) || lwp->lwp_sysabort ||
21240Sstevel@tonic-gate 		    MUSTRETURN(p, t) || imm_unpark)
21250Sstevel@tonic-gate 			error = EINTR;
21260Sstevel@tonic-gate 		else if (imm_timeout || (timedwait && tim == -1))
21270Sstevel@tonic-gate 			error = ETIME;
21280Sstevel@tonic-gate 		lwp->lwp_asleep = 0;
21290Sstevel@tonic-gate 		lwp->lwp_sysabort = 0;
21300Sstevel@tonic-gate 		watched = watch_disable_addr((caddr_t)sp,
21310Sstevel@tonic-gate 		    sizeof (*sp), S_WRITE);
21320Sstevel@tonic-gate 		lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL);
21330Sstevel@tonic-gate 		locked = 1;
21340Sstevel@tonic-gate 		fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count);
21350Sstevel@tonic-gate 	}
21360Sstevel@tonic-gate 	if (error == 0)
21370Sstevel@tonic-gate 		suword32_noerr((void *)&sp->sema_count, --count);
21380Sstevel@tonic-gate 	if (count != 0) {
21390Sstevel@tonic-gate 		(void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM);
21400Sstevel@tonic-gate 		suword8_noerr(&sp->sema_waiters, waiters);
21410Sstevel@tonic-gate 	}
21420Sstevel@tonic-gate 	lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
21430Sstevel@tonic-gate out:
21440Sstevel@tonic-gate 	no_fault();
21450Sstevel@tonic-gate 	if (watched)
21460Sstevel@tonic-gate 		watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE);
21470Sstevel@tonic-gate 	if (tsp && check_park && !time_error)
21480Sstevel@tonic-gate 		error = lwp_timer_copyout(&lwpt, error);
21490Sstevel@tonic-gate 	if (error)
21500Sstevel@tonic-gate 		return (set_errno(error));
21510Sstevel@tonic-gate 	return (0);
21520Sstevel@tonic-gate }
21530Sstevel@tonic-gate 
21540Sstevel@tonic-gate int
lwp_sema_post(lwp_sema_t * sp)21550Sstevel@tonic-gate lwp_sema_post(lwp_sema_t *sp)
21560Sstevel@tonic-gate {
21570Sstevel@tonic-gate 	proc_t *p = ttoproc(curthread);
21580Sstevel@tonic-gate 	label_t ljb;
21590Sstevel@tonic-gate 	volatile int locked = 0;
21600Sstevel@tonic-gate 	volatile int watched = 0;
21610Sstevel@tonic-gate 	volatile uint16_t type = 0;
21620Sstevel@tonic-gate 	int count;
21630Sstevel@tonic-gate 	lwpchan_t lwpchan;
21640Sstevel@tonic-gate 	uchar_t waiters;
21650Sstevel@tonic-gate 	int error = 0;
21660Sstevel@tonic-gate 
21670Sstevel@tonic-gate 	if ((caddr_t)sp >= p->p_as->a_userlimit)
21680Sstevel@tonic-gate 		return (set_errno(EFAULT));
21690Sstevel@tonic-gate 
21700Sstevel@tonic-gate 	watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE);
21710Sstevel@tonic-gate 
21720Sstevel@tonic-gate 	if (on_fault(&ljb)) {
21730Sstevel@tonic-gate 		if (locked)
21740Sstevel@tonic-gate 			lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
21750Sstevel@tonic-gate 		error = EFAULT;
21760Sstevel@tonic-gate 		goto out;
21770Sstevel@tonic-gate 	}
21780Sstevel@tonic-gate 	/*
21796577Sraf 	 * Force Copy-on-write if necessary and ensure that the
21806577Sraf 	 * synchronization object resides in read/write memory.
21816577Sraf 	 * Cause an EFAULT return now if this is not so.
21820Sstevel@tonic-gate 	 */
21830Sstevel@tonic-gate 	fuword16_noerr(&sp->sema_type, (uint16_t *)&type);
21840Sstevel@tonic-gate 	suword16_noerr(&sp->sema_type, type);
21850Sstevel@tonic-gate 	if (!get_lwpchan(curproc->p_as, (caddr_t)sp, type,
21860Sstevel@tonic-gate 	    &lwpchan, LWPCHAN_CVPOOL)) {
21870Sstevel@tonic-gate 		error = EFAULT;
21880Sstevel@tonic-gate 		goto out;
21890Sstevel@tonic-gate 	}
21900Sstevel@tonic-gate 	lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL);
21910Sstevel@tonic-gate 	locked = 1;
21920Sstevel@tonic-gate 	fuword32_noerr(&sp->sema_count, (uint32_t *)&count);
21930Sstevel@tonic-gate 	if (count == _SEM_VALUE_MAX)
21940Sstevel@tonic-gate 		error = EOVERFLOW;
21950Sstevel@tonic-gate 	else
21960Sstevel@tonic-gate 		suword32_noerr(&sp->sema_count, ++count);
21970Sstevel@tonic-gate 	if (count == 1) {
21980Sstevel@tonic-gate 		fuword8_noerr(&sp->sema_waiters, &waiters);
21990Sstevel@tonic-gate 		if (waiters) {
22000Sstevel@tonic-gate 			(void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM);
22010Sstevel@tonic-gate 			suword8_noerr(&sp->sema_waiters, waiters);
22020Sstevel@tonic-gate 		}
22030Sstevel@tonic-gate 	}
22040Sstevel@tonic-gate 	lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
22050Sstevel@tonic-gate out:
22060Sstevel@tonic-gate 	no_fault();
22070Sstevel@tonic-gate 	if (watched)
22080Sstevel@tonic-gate 		watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE);
22090Sstevel@tonic-gate 	if (error)
22100Sstevel@tonic-gate 		return (set_errno(error));
22110Sstevel@tonic-gate 	return (0);
22120Sstevel@tonic-gate }
22130Sstevel@tonic-gate 
22140Sstevel@tonic-gate #define	TRW_WANT_WRITE		0x1
22150Sstevel@tonic-gate #define	TRW_LOCK_GRANTED	0x2
22160Sstevel@tonic-gate 
22170Sstevel@tonic-gate #define	READ_LOCK		0
22180Sstevel@tonic-gate #define	WRITE_LOCK		1
22190Sstevel@tonic-gate #define	TRY_FLAG		0x10
22200Sstevel@tonic-gate #define	READ_LOCK_TRY		(READ_LOCK | TRY_FLAG)
22210Sstevel@tonic-gate #define	WRITE_LOCK_TRY		(WRITE_LOCK | TRY_FLAG)
22220Sstevel@tonic-gate 
22230Sstevel@tonic-gate /*
22240Sstevel@tonic-gate  * Release one writer or one or more readers. Compute the rwstate word to
22250Sstevel@tonic-gate  * reflect the new state of the queue. For a safe hand-off we copy the new
22260Sstevel@tonic-gate  * rwstate value back to userland before we wake any of the new lock holders.
22270Sstevel@tonic-gate  *
22280Sstevel@tonic-gate  * Note that sleepq_insert() implements a prioritized FIFO (with writers
22290Sstevel@tonic-gate  * being given precedence over readers of the same priority).
22300Sstevel@tonic-gate  *
22310Sstevel@tonic-gate  * If the first thread is a reader we scan the queue releasing all readers
22320Sstevel@tonic-gate  * until we hit a writer or the end of the queue. If the first thread is a
22334570Sraf  * writer we still need to check for another writer.
22340Sstevel@tonic-gate  */
22350Sstevel@tonic-gate void
lwp_rwlock_release(lwpchan_t * lwpchan,lwp_rwlock_t * rw)22360Sstevel@tonic-gate lwp_rwlock_release(lwpchan_t *lwpchan, lwp_rwlock_t *rw)
22370Sstevel@tonic-gate {
22380Sstevel@tonic-gate 	sleepq_head_t *sqh;
22390Sstevel@tonic-gate 	kthread_t *tp;
22400Sstevel@tonic-gate 	kthread_t **tpp;
22410Sstevel@tonic-gate 	kthread_t *tpnext;
22420Sstevel@tonic-gate 	kthread_t *wakelist = NULL;
22430Sstevel@tonic-gate 	uint32_t rwstate = 0;
22440Sstevel@tonic-gate 	int wcount = 0;
22450Sstevel@tonic-gate 	int rcount = 0;
22460Sstevel@tonic-gate 
22470Sstevel@tonic-gate 	sqh = lwpsqhash(lwpchan);
22480Sstevel@tonic-gate 	disp_lock_enter(&sqh->sq_lock);
22490Sstevel@tonic-gate 	tpp = &sqh->sq_queue.sq_first;
22500Sstevel@tonic-gate 	while ((tp = *tpp) != NULL) {
22510Sstevel@tonic-gate 		if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 &&
22520Sstevel@tonic-gate 		    tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) {
22530Sstevel@tonic-gate 			if (tp->t_writer & TRW_WANT_WRITE) {
22540Sstevel@tonic-gate 				if ((wcount++ == 0) && (rcount == 0)) {
22550Sstevel@tonic-gate 					rwstate |= URW_WRITE_LOCKED;
22560Sstevel@tonic-gate 
22570Sstevel@tonic-gate 					/* Just one writer to wake. */
22580Sstevel@tonic-gate 					sleepq_unlink(tpp, tp);
22590Sstevel@tonic-gate 					wakelist = tp;
22600Sstevel@tonic-gate 
22610Sstevel@tonic-gate 					/* tpp already set for next thread. */
22620Sstevel@tonic-gate 					continue;
22630Sstevel@tonic-gate 				} else {
22644570Sraf 					rwstate |= URW_HAS_WAITERS;
22650Sstevel@tonic-gate 					/* We need look no further. */
22660Sstevel@tonic-gate 					break;
22670Sstevel@tonic-gate 				}
22680Sstevel@tonic-gate 			} else {
22690Sstevel@tonic-gate 				rcount++;
22700Sstevel@tonic-gate 				if (wcount == 0) {
22710Sstevel@tonic-gate 					rwstate++;
22720Sstevel@tonic-gate 
22730Sstevel@tonic-gate 					/* Add reader to wake list. */
22740Sstevel@tonic-gate 					sleepq_unlink(tpp, tp);
22750Sstevel@tonic-gate 					tp->t_link = wakelist;
22760Sstevel@tonic-gate 					wakelist = tp;
22770Sstevel@tonic-gate 
22780Sstevel@tonic-gate 					/* tpp already set for next thread. */
22790Sstevel@tonic-gate 					continue;
22804570Sraf 				} else {
22810Sstevel@tonic-gate 					rwstate |= URW_HAS_WAITERS;
22824570Sraf 					/* We need look no further. */
22834570Sraf 					break;
22844570Sraf 				}
22850Sstevel@tonic-gate 			}
22860Sstevel@tonic-gate 		}
22870Sstevel@tonic-gate 		tpp = &tp->t_link;
22880Sstevel@tonic-gate 	}
22890Sstevel@tonic-gate 
22900Sstevel@tonic-gate 	/* Copy the new rwstate back to userland. */
22910Sstevel@tonic-gate 	suword32_noerr(&rw->rwlock_readers, rwstate);
22920Sstevel@tonic-gate 
22930Sstevel@tonic-gate 	/* Wake the new lock holder(s) up. */
22940Sstevel@tonic-gate 	tp = wakelist;
22950Sstevel@tonic-gate 	while (tp != NULL) {
22960Sstevel@tonic-gate 		DTRACE_SCHED1(wakeup, kthread_t *, tp);
22970Sstevel@tonic-gate 		tp->t_wchan0 = NULL;
22980Sstevel@tonic-gate 		tp->t_wchan = NULL;
22990Sstevel@tonic-gate 		tp->t_sobj_ops = NULL;
23000Sstevel@tonic-gate 		tp->t_writer |= TRW_LOCK_GRANTED;
23010Sstevel@tonic-gate 		tpnext = tp->t_link;
23020Sstevel@tonic-gate 		tp->t_link = NULL;
23030Sstevel@tonic-gate 		CL_WAKEUP(tp);
23040Sstevel@tonic-gate 		thread_unlock_high(tp);
23050Sstevel@tonic-gate 		tp = tpnext;
23060Sstevel@tonic-gate 	}
23070Sstevel@tonic-gate 
23080Sstevel@tonic-gate 	disp_lock_exit(&sqh->sq_lock);
23090Sstevel@tonic-gate }
23100Sstevel@tonic-gate 
23110Sstevel@tonic-gate /*
23120Sstevel@tonic-gate  * We enter here holding the user-level mutex, which we must release before
23130Sstevel@tonic-gate  * returning or blocking. Based on lwp_cond_wait().
23140Sstevel@tonic-gate  */
23150Sstevel@tonic-gate static int
lwp_rwlock_lock(lwp_rwlock_t * rw,timespec_t * tsp,int rd_wr)23160Sstevel@tonic-gate lwp_rwlock_lock(lwp_rwlock_t *rw, timespec_t *tsp, int rd_wr)
23170Sstevel@tonic-gate {
23180Sstevel@tonic-gate 	lwp_mutex_t *mp = NULL;
23190Sstevel@tonic-gate 	kthread_t *t = curthread;
23200Sstevel@tonic-gate 	kthread_t *tp;
23210Sstevel@tonic-gate 	klwp_t *lwp = ttolwp(t);
23220Sstevel@tonic-gate 	proc_t *p = ttoproc(t);
23230Sstevel@tonic-gate 	lwp_timer_t lwpt;
23240Sstevel@tonic-gate 	lwpchan_t lwpchan;
23250Sstevel@tonic-gate 	lwpchan_t mlwpchan;
23260Sstevel@tonic-gate 	caddr_t timedwait;
23270Sstevel@tonic-gate 	volatile uint16_t type = 0;
23280Sstevel@tonic-gate 	volatile uint8_t mtype = 0;
23290Sstevel@tonic-gate 	uchar_t mwaiters;
23300Sstevel@tonic-gate 	volatile int error = 0;
23310Sstevel@tonic-gate 	int time_error;
23320Sstevel@tonic-gate 	clock_t tim = -1;
23330Sstevel@tonic-gate 	volatile int locked = 0;
23340Sstevel@tonic-gate 	volatile int mlocked = 0;
23350Sstevel@tonic-gate 	volatile int watched = 0;
23360Sstevel@tonic-gate 	volatile int mwatched = 0;
23370Sstevel@tonic-gate 	label_t ljb;
23380Sstevel@tonic-gate 	volatile int no_lwpchan = 1;
23390Sstevel@tonic-gate 	int imm_timeout = 0;
23400Sstevel@tonic-gate 	int try_flag;
23410Sstevel@tonic-gate 	uint32_t rwstate;
23420Sstevel@tonic-gate 	int acquired = 0;
23430Sstevel@tonic-gate 
23440Sstevel@tonic-gate 	/* We only check rw because the mutex is included in it. */
23450Sstevel@tonic-gate 	if ((caddr_t)rw >= p->p_as->a_userlimit)
23460Sstevel@tonic-gate 		return (set_errno(EFAULT));
23470Sstevel@tonic-gate 
234810230SRoger.Faulkner@Sun.COM 	/*
234910230SRoger.Faulkner@Sun.COM 	 * Put the lwp in an orderly state for debugging,
235010230SRoger.Faulkner@Sun.COM 	 * in case we are stopped while sleeping, below.
235110230SRoger.Faulkner@Sun.COM 	 */
235210230SRoger.Faulkner@Sun.COM 	prstop(PR_REQUESTED, 0);
235310230SRoger.Faulkner@Sun.COM 
23540Sstevel@tonic-gate 	/* We must only report this error if we are about to sleep (later). */
23550Sstevel@tonic-gate 	timedwait = (caddr_t)tsp;
23560Sstevel@tonic-gate 	if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 &&
23570Sstevel@tonic-gate 	    lwpt.lwpt_imm_timeout) {
23580Sstevel@tonic-gate 		imm_timeout = 1;
23590Sstevel@tonic-gate 		timedwait = NULL;
23600Sstevel@tonic-gate 	}
23610Sstevel@tonic-gate 
23620Sstevel@tonic-gate 	(void) new_mstate(t, LMS_USER_LOCK);
23630Sstevel@tonic-gate 
23640Sstevel@tonic-gate 	if (on_fault(&ljb)) {
23650Sstevel@tonic-gate 		if (no_lwpchan) {
23660Sstevel@tonic-gate 			error = EFAULT;
23670Sstevel@tonic-gate 			goto out_nodrop;
23680Sstevel@tonic-gate 		}
23690Sstevel@tonic-gate 		if (mlocked) {
23700Sstevel@tonic-gate 			mlocked = 0;
23710Sstevel@tonic-gate 			lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL);
23720Sstevel@tonic-gate 		}
23730Sstevel@tonic-gate 		if (locked) {
23740Sstevel@tonic-gate 			locked = 0;
23750Sstevel@tonic-gate 			lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
23760Sstevel@tonic-gate 		}
23770Sstevel@tonic-gate 		/*
23780Sstevel@tonic-gate 		 * Set up another on_fault() for a possible fault
23790Sstevel@tonic-gate 		 * on the user lock accessed at "out_drop".
23800Sstevel@tonic-gate 		 */
23810Sstevel@tonic-gate 		if (on_fault(&ljb)) {
23820Sstevel@tonic-gate 			if (mlocked) {
23830Sstevel@tonic-gate 				mlocked = 0;
23840Sstevel@tonic-gate 				lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL);
23850Sstevel@tonic-gate 			}
23860Sstevel@tonic-gate 			error = EFAULT;
23870Sstevel@tonic-gate 			goto out_nodrop;
23880Sstevel@tonic-gate 		}
23890Sstevel@tonic-gate 		error = EFAULT;
23900Sstevel@tonic-gate 		goto out_nodrop;
23910Sstevel@tonic-gate 	}
23920Sstevel@tonic-gate 
23930Sstevel@tonic-gate 	/* Process rd_wr (including sanity check). */
23940Sstevel@tonic-gate 	try_flag = (rd_wr & TRY_FLAG);
23950Sstevel@tonic-gate 	rd_wr &= ~TRY_FLAG;
23960Sstevel@tonic-gate 	if ((rd_wr != READ_LOCK) && (rd_wr != WRITE_LOCK)) {
23970Sstevel@tonic-gate 		error = EINVAL;
23980Sstevel@tonic-gate 		goto out_nodrop;
23990Sstevel@tonic-gate 	}
24000Sstevel@tonic-gate 
24016577Sraf 	/*
24026577Sraf 	 * Force Copy-on-write if necessary and ensure that the
24036577Sraf 	 * synchronization object resides in read/write memory.
24046577Sraf 	 * Cause an EFAULT return now if this is not so.
24056577Sraf 	 */
24060Sstevel@tonic-gate 	mp = &rw->mutex;
24070Sstevel@tonic-gate 	fuword8_noerr(&mp->mutex_type, (uint8_t *)&mtype);
24080Sstevel@tonic-gate 	fuword16_noerr(&rw->rwlock_type, (uint16_t *)&type);
24096577Sraf 	suword8_noerr(&mp->mutex_type, mtype);
24106577Sraf 	suword16_noerr(&rw->rwlock_type, type);
24116577Sraf 
24126577Sraf 	/* We can only continue for simple USYNC_PROCESS locks. */
24130Sstevel@tonic-gate 	if ((mtype != USYNC_PROCESS) || (type != USYNC_PROCESS)) {
24140Sstevel@tonic-gate 		error = EINVAL;
24150Sstevel@tonic-gate 		goto out_nodrop;
24160Sstevel@tonic-gate 	}
24170Sstevel@tonic-gate 
24180Sstevel@tonic-gate 	/* Convert user level mutex, "mp", to a unique lwpchan. */
24190Sstevel@tonic-gate 	if (!get_lwpchan(p->p_as, (caddr_t)mp, mtype,
24200Sstevel@tonic-gate 	    &mlwpchan, LWPCHAN_MPPOOL)) {
24210Sstevel@tonic-gate 		error = EFAULT;
24220Sstevel@tonic-gate 		goto out_nodrop;
24230Sstevel@tonic-gate 	}
24240Sstevel@tonic-gate 
24250Sstevel@tonic-gate 	/* Convert user level rwlock, "rw", to a unique lwpchan. */
24260Sstevel@tonic-gate 	if (!get_lwpchan(p->p_as, (caddr_t)rw, type,
24270Sstevel@tonic-gate 	    &lwpchan, LWPCHAN_CVPOOL)) {
24280Sstevel@tonic-gate 		error = EFAULT;
24290Sstevel@tonic-gate 		goto out_nodrop;
24300Sstevel@tonic-gate 	}
24310Sstevel@tonic-gate 
24320Sstevel@tonic-gate 	no_lwpchan = 0;
24330Sstevel@tonic-gate 	watched = watch_disable_addr((caddr_t)rw, sizeof (*rw), S_WRITE);
24340Sstevel@tonic-gate 	mwatched = watch_disable_addr((caddr_t)mp, sizeof (*mp), S_WRITE);
24350Sstevel@tonic-gate 
24360Sstevel@tonic-gate 	/*
24370Sstevel@tonic-gate 	 * lwpchan_lock() ensures that the calling LWP is put to sleep
24380Sstevel@tonic-gate 	 * atomically with respect to a possible wakeup which is a result
24390Sstevel@tonic-gate 	 * of lwp_rwlock_unlock().
24400Sstevel@tonic-gate 	 *
24410Sstevel@tonic-gate 	 * What's misleading is that the LWP is put to sleep after the
24420Sstevel@tonic-gate 	 * rwlock's mutex is released. This is OK as long as the release
24430Sstevel@tonic-gate 	 * operation is also done while holding mlwpchan. The LWP is then
24440Sstevel@tonic-gate 	 * put to sleep when the possibility of pagefaulting or sleeping
24450Sstevel@tonic-gate 	 * has been completely eliminated.
24460Sstevel@tonic-gate 	 */
24470Sstevel@tonic-gate 	lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL);
24480Sstevel@tonic-gate 	locked = 1;
24490Sstevel@tonic-gate 	lwpchan_lock(&mlwpchan, LWPCHAN_MPPOOL);
24500Sstevel@tonic-gate 	mlocked = 1;
24510Sstevel@tonic-gate 
24520Sstevel@tonic-gate 	/*
24530Sstevel@tonic-gate 	 * Fetch the current rwlock state.
24540Sstevel@tonic-gate 	 *
24554570Sraf 	 * The possibility of spurious wake-ups or killed waiters means
24564570Sraf 	 * rwstate's URW_HAS_WAITERS bit may indicate false positives.
24574570Sraf 	 * We only fix these if they are important to us.
24580Sstevel@tonic-gate 	 *
24590Sstevel@tonic-gate 	 * Although various error states can be observed here (e.g. the lock
24600Sstevel@tonic-gate 	 * is not held, but there are waiters) we assume these are applicaton
24610Sstevel@tonic-gate 	 * errors and so we take no corrective action.
24620Sstevel@tonic-gate 	 */
24630Sstevel@tonic-gate 	fuword32_noerr(&rw->rwlock_readers, &rwstate);
24644570Sraf 	/*
24654570Sraf 	 * We cannot legitimately get here from user-level
24664570Sraf 	 * without URW_HAS_WAITERS being set.
24674570Sraf 	 * Set it now to guard against user-level error.
24684570Sraf 	 */
24694570Sraf 	rwstate |= URW_HAS_WAITERS;
24700Sstevel@tonic-gate 
24710Sstevel@tonic-gate 	/*
24724570Sraf 	 * We can try only if the lock isn't held by a writer.
24730Sstevel@tonic-gate 	 */
24744570Sraf 	if (!(rwstate & URW_WRITE_LOCKED)) {
24750Sstevel@tonic-gate 		tp = lwp_queue_waiter(&lwpchan);
24760Sstevel@tonic-gate 		if (tp == NULL) {
24770Sstevel@tonic-gate 			/*
24780Sstevel@tonic-gate 			 * Hmmm, rwstate indicates waiters but there are
24790Sstevel@tonic-gate 			 * none queued. This could just be the result of a
24804570Sraf 			 * spurious wakeup, so let's ignore it.
24814570Sraf 			 *
24824570Sraf 			 * We now have a chance to acquire the lock
24834570Sraf 			 * uncontended, but this is the last chance for
24844570Sraf 			 * a writer to acquire the lock without blocking.
24850Sstevel@tonic-gate 			 */
24860Sstevel@tonic-gate 			if (rd_wr == READ_LOCK) {
24870Sstevel@tonic-gate 				rwstate++;
24880Sstevel@tonic-gate 				acquired = 1;
24894570Sraf 			} else if ((rwstate & URW_READERS_MASK) == 0) {
24904570Sraf 				rwstate |= URW_WRITE_LOCKED;
24910Sstevel@tonic-gate 				acquired = 1;
24920Sstevel@tonic-gate 			}
24930Sstevel@tonic-gate 		} else if (rd_wr == READ_LOCK) {
24940Sstevel@tonic-gate 			/*
24950Sstevel@tonic-gate 			 * This is the last chance for a reader to acquire
24960Sstevel@tonic-gate 			 * the lock now, but it can only do so if there is
24970Sstevel@tonic-gate 			 * no writer of equal or greater priority at the
24980Sstevel@tonic-gate 			 * head of the queue .
24990Sstevel@tonic-gate 			 *
25000Sstevel@tonic-gate 			 * It is also just possible that there is a reader
25010Sstevel@tonic-gate 			 * at the head of the queue. This may be the result
25020Sstevel@tonic-gate 			 * of a spurious wakeup or an application failure.
25030Sstevel@tonic-gate 			 * In this case we only acquire the lock if we have
25040Sstevel@tonic-gate 			 * equal or greater priority. It is not our job to
25050Sstevel@tonic-gate 			 * release spurious waiters.
25060Sstevel@tonic-gate 			 */
25070Sstevel@tonic-gate 			pri_t our_pri = DISP_PRIO(t);
25080Sstevel@tonic-gate 			pri_t his_pri = DISP_PRIO(tp);
25090Sstevel@tonic-gate 
25100Sstevel@tonic-gate 			if ((our_pri > his_pri) || ((our_pri == his_pri) &&
25110Sstevel@tonic-gate 			    !(tp->t_writer & TRW_WANT_WRITE))) {
25120Sstevel@tonic-gate 				rwstate++;
25130Sstevel@tonic-gate 				acquired = 1;
25140Sstevel@tonic-gate 			}
25150Sstevel@tonic-gate 		}
25160Sstevel@tonic-gate 	}
25170Sstevel@tonic-gate 
25180Sstevel@tonic-gate 	if (acquired || try_flag || time_error) {
25190Sstevel@tonic-gate 		/*
25204570Sraf 		 * We're not going to block this time.
25210Sstevel@tonic-gate 		 */
25220Sstevel@tonic-gate 		suword32_noerr(&rw->rwlock_readers, rwstate);
25230Sstevel@tonic-gate 		lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
25240Sstevel@tonic-gate 		locked = 0;
25250Sstevel@tonic-gate 
25260Sstevel@tonic-gate 		if (acquired) {
25270Sstevel@tonic-gate 			/*
25280Sstevel@tonic-gate 			 * Got the lock!
25290Sstevel@tonic-gate 			 */
25300Sstevel@tonic-gate 			error = 0;
25310Sstevel@tonic-gate 
25320Sstevel@tonic-gate 		} else if (try_flag) {
25330Sstevel@tonic-gate 			/*
25340Sstevel@tonic-gate 			 * We didn't get the lock and we're about to block.
25350Sstevel@tonic-gate 			 * If we're doing a trylock, return EBUSY instead.
25360Sstevel@tonic-gate 			 */
25370Sstevel@tonic-gate 			error = EBUSY;
25380Sstevel@tonic-gate 
25390Sstevel@tonic-gate 		} else if (time_error) {
25400Sstevel@tonic-gate 			/*
25410Sstevel@tonic-gate 			 * The SUSV3 POSIX spec is very clear that we should
25420Sstevel@tonic-gate 			 * get no error from validating the timer (above)
25430Sstevel@tonic-gate 			 * until we would actually sleep.
25440Sstevel@tonic-gate 			 */
25450Sstevel@tonic-gate 			error = time_error;
25460Sstevel@tonic-gate 		}
25470Sstevel@tonic-gate 
25480Sstevel@tonic-gate 		goto out_drop;
25490Sstevel@tonic-gate 	}
25500Sstevel@tonic-gate 
25510Sstevel@tonic-gate 	/*
25520Sstevel@tonic-gate 	 * We're about to block, so indicate what kind of waiter we are.
25530Sstevel@tonic-gate 	 */
25540Sstevel@tonic-gate 	t->t_writer = 0;
25554570Sraf 	if (rd_wr == WRITE_LOCK)
25560Sstevel@tonic-gate 		t->t_writer = TRW_WANT_WRITE;
25570Sstevel@tonic-gate 	suword32_noerr(&rw->rwlock_readers, rwstate);
25580Sstevel@tonic-gate 
25590Sstevel@tonic-gate 	/*
25600Sstevel@tonic-gate 	 * Unlock the rwlock's mutex (pagefaults are possible here).
25610Sstevel@tonic-gate 	 */
256210887SRoger.Faulkner@Sun.COM 	set_owner_pid(mp, 0, 0);
25630Sstevel@tonic-gate 	ulock_clear(&mp->mutex_lockw);
25640Sstevel@tonic-gate 	fuword8_noerr(&mp->mutex_waiters, &mwaiters);
25650Sstevel@tonic-gate 	if (mwaiters != 0) {
25660Sstevel@tonic-gate 		/*
25670Sstevel@tonic-gate 		 * Given the locking of mlwpchan around the release of
25680Sstevel@tonic-gate 		 * the mutex and checking for waiters, the following
25690Sstevel@tonic-gate 		 * call to lwp_release() can fail ONLY if the lock
25700Sstevel@tonic-gate 		 * acquirer is interrupted after setting the waiter bit,
25710Sstevel@tonic-gate 		 * calling lwp_block() and releasing mlwpchan.
25720Sstevel@tonic-gate 		 * In this case, it could get pulled off the LWP sleep
25730Sstevel@tonic-gate 		 * queue (via setrun()) before the following call to
25740Sstevel@tonic-gate 		 * lwp_release() occurs, and the lock requestor will
25750Sstevel@tonic-gate 		 * update the waiter bit correctly by re-evaluating it.
25760Sstevel@tonic-gate 		 */
25776057Sraf 		if (lwp_release(&mlwpchan, &mwaiters, 0))
25780Sstevel@tonic-gate 			suword8_noerr(&mp->mutex_waiters, mwaiters);
25790Sstevel@tonic-gate 	}
25800Sstevel@tonic-gate 	lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL);
25810Sstevel@tonic-gate 	mlocked = 0;
25820Sstevel@tonic-gate 	no_fault();
25830Sstevel@tonic-gate 
25840Sstevel@tonic-gate 	if (mwatched) {
25850Sstevel@tonic-gate 		watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE);
25860Sstevel@tonic-gate 		mwatched = 0;
25870Sstevel@tonic-gate 	}
25880Sstevel@tonic-gate 	if (watched) {
25890Sstevel@tonic-gate 		watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE);
25900Sstevel@tonic-gate 		watched = 0;
25910Sstevel@tonic-gate 	}
25920Sstevel@tonic-gate 
25930Sstevel@tonic-gate 	if (timedwait) {
25940Sstevel@tonic-gate 		/*
25950Sstevel@tonic-gate 		 * If we successfully queue the timeout,
25960Sstevel@tonic-gate 		 * then don't drop t_delay_lock until
25970Sstevel@tonic-gate 		 * we are on the sleep queue (below).
25980Sstevel@tonic-gate 		 */
25990Sstevel@tonic-gate 		mutex_enter(&t->t_delay_lock);
26000Sstevel@tonic-gate 		if (lwp_timer_enqueue(&lwpt) != 0) {
26010Sstevel@tonic-gate 			mutex_exit(&t->t_delay_lock);
26020Sstevel@tonic-gate 			imm_timeout = 1;
26030Sstevel@tonic-gate 			timedwait = NULL;
26040Sstevel@tonic-gate 		}
26050Sstevel@tonic-gate 	}
26060Sstevel@tonic-gate 	t->t_flag |= T_WAITCVSEM;
26070Sstevel@tonic-gate 	lwp_block(&lwpchan);
26080Sstevel@tonic-gate 
26090Sstevel@tonic-gate 	/*
26100Sstevel@tonic-gate 	 * Nothing should happen to cause the LWp to go to sleep until after
26110Sstevel@tonic-gate 	 * it returns from swtch().
26120Sstevel@tonic-gate 	 */
26130Sstevel@tonic-gate 	if (timedwait)
26140Sstevel@tonic-gate 		mutex_exit(&t->t_delay_lock);
26150Sstevel@tonic-gate 	locked = 0;
26160Sstevel@tonic-gate 	lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
26176622Sraf 	if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || imm_timeout)
26180Sstevel@tonic-gate 		setrun(t);
26190Sstevel@tonic-gate 	swtch();
26200Sstevel@tonic-gate 
26210Sstevel@tonic-gate 	/*
26220Sstevel@tonic-gate 	 * We're back, but we need to work out why. Were we interrupted? Did
26230Sstevel@tonic-gate 	 * we timeout? Were we granted the lock?
26240Sstevel@tonic-gate 	 */
26250Sstevel@tonic-gate 	error = EAGAIN;
26260Sstevel@tonic-gate 	acquired = (t->t_writer & TRW_LOCK_GRANTED);
26270Sstevel@tonic-gate 	t->t_writer = 0;
26280Sstevel@tonic-gate 	t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE);
26290Sstevel@tonic-gate 	if (timedwait)
26300Sstevel@tonic-gate 		tim = lwp_timer_dequeue(&lwpt);
26310Sstevel@tonic-gate 	if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || MUSTRETURN(p, t))
26320Sstevel@tonic-gate 		error = EINTR;
26330Sstevel@tonic-gate 	else if (imm_timeout || (timedwait && tim == -1))
26340Sstevel@tonic-gate 		error = ETIME;
26350Sstevel@tonic-gate 	lwp->lwp_asleep = 0;
26360Sstevel@tonic-gate 	lwp->lwp_sysabort = 0;
26370Sstevel@tonic-gate 	setallwatch();
26380Sstevel@tonic-gate 
26390Sstevel@tonic-gate 	/*
26400Sstevel@tonic-gate 	 * If we were granted the lock we don't care about EINTR or ETIME.
26410Sstevel@tonic-gate 	 */
26420Sstevel@tonic-gate 	if (acquired)
26430Sstevel@tonic-gate 		error = 0;
26440Sstevel@tonic-gate 
26450Sstevel@tonic-gate 	if (t->t_mstate == LMS_USER_LOCK)
26460Sstevel@tonic-gate 		(void) new_mstate(t, LMS_SYSTEM);
26470Sstevel@tonic-gate 
26480Sstevel@tonic-gate 	if (error)
26490Sstevel@tonic-gate 		return (set_errno(error));
26500Sstevel@tonic-gate 	return (0);
26510Sstevel@tonic-gate 
26520Sstevel@tonic-gate out_drop:
26530Sstevel@tonic-gate 	/*
26540Sstevel@tonic-gate 	 * Make sure that the user level lock is dropped before returning
26550Sstevel@tonic-gate 	 * to the caller.
26560Sstevel@tonic-gate 	 */
26570Sstevel@tonic-gate 	if (!mlocked) {
26580Sstevel@tonic-gate 		lwpchan_lock(&mlwpchan, LWPCHAN_MPPOOL);
26590Sstevel@tonic-gate 		mlocked = 1;
26600Sstevel@tonic-gate 	}
266110887SRoger.Faulkner@Sun.COM 	set_owner_pid(mp, 0, 0);
26620Sstevel@tonic-gate 	ulock_clear(&mp->mutex_lockw);
26630Sstevel@tonic-gate 	fuword8_noerr(&mp->mutex_waiters, &mwaiters);
26640Sstevel@tonic-gate 	if (mwaiters != 0) {
26650Sstevel@tonic-gate 		/*
26660Sstevel@tonic-gate 		 * See comment above on lock clearing and lwp_release()
26670Sstevel@tonic-gate 		 * success/failure.
26680Sstevel@tonic-gate 		 */
26696057Sraf 		if (lwp_release(&mlwpchan, &mwaiters, 0))
26700Sstevel@tonic-gate 			suword8_noerr(&mp->mutex_waiters, mwaiters);
26710Sstevel@tonic-gate 	}
26720Sstevel@tonic-gate 	lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL);
26730Sstevel@tonic-gate 	mlocked = 0;
26740Sstevel@tonic-gate 
26750Sstevel@tonic-gate out_nodrop:
26760Sstevel@tonic-gate 	no_fault();
26770Sstevel@tonic-gate 	if (mwatched)
26780Sstevel@tonic-gate 		watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE);
26790Sstevel@tonic-gate 	if (watched)
26800Sstevel@tonic-gate 		watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE);
26810Sstevel@tonic-gate 	if (t->t_mstate == LMS_USER_LOCK)
26820Sstevel@tonic-gate 		(void) new_mstate(t, LMS_SYSTEM);
26830Sstevel@tonic-gate 	if (error)
26840Sstevel@tonic-gate 		return (set_errno(error));
26850Sstevel@tonic-gate 	return (0);
26860Sstevel@tonic-gate }
26870Sstevel@tonic-gate 
26880Sstevel@tonic-gate /*
26890Sstevel@tonic-gate  * We enter here holding the user-level mutex but, unlike lwp_rwlock_lock(),
26900Sstevel@tonic-gate  * we never drop the lock.
26910Sstevel@tonic-gate  */
26920Sstevel@tonic-gate static int
lwp_rwlock_unlock(lwp_rwlock_t * rw)26930Sstevel@tonic-gate lwp_rwlock_unlock(lwp_rwlock_t *rw)
26940Sstevel@tonic-gate {
26950Sstevel@tonic-gate 	kthread_t *t = curthread;
26960Sstevel@tonic-gate 	proc_t *p = ttoproc(t);
26970Sstevel@tonic-gate 	lwpchan_t lwpchan;
26980Sstevel@tonic-gate 	volatile uint16_t type = 0;
26990Sstevel@tonic-gate 	volatile int error = 0;
27000Sstevel@tonic-gate 	volatile int locked = 0;
27010Sstevel@tonic-gate 	volatile int watched = 0;
27020Sstevel@tonic-gate 	label_t ljb;
27030Sstevel@tonic-gate 	volatile int no_lwpchan = 1;
27040Sstevel@tonic-gate 	uint32_t rwstate;
27050Sstevel@tonic-gate 
27060Sstevel@tonic-gate 	/* We only check rw because the mutex is included in it. */
27070Sstevel@tonic-gate 	if ((caddr_t)rw >= p->p_as->a_userlimit)
27080Sstevel@tonic-gate 		return (set_errno(EFAULT));
27090Sstevel@tonic-gate 
27100Sstevel@tonic-gate 	if (on_fault(&ljb)) {
27110Sstevel@tonic-gate 		if (no_lwpchan) {
27120Sstevel@tonic-gate 			error = EFAULT;
27130Sstevel@tonic-gate 			goto out_nodrop;
27140Sstevel@tonic-gate 		}
27150Sstevel@tonic-gate 		if (locked) {
27160Sstevel@tonic-gate 			locked = 0;
27170Sstevel@tonic-gate 			lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
27180Sstevel@tonic-gate 		}
27190Sstevel@tonic-gate 		error = EFAULT;
27200Sstevel@tonic-gate 		goto out_nodrop;
27210Sstevel@tonic-gate 	}
27220Sstevel@tonic-gate 
27236577Sraf 	/*
27246577Sraf 	 * Force Copy-on-write if necessary and ensure that the
27256577Sraf 	 * synchronization object resides in read/write memory.
27266577Sraf 	 * Cause an EFAULT return now if this is not so.
27276577Sraf 	 */
27286577Sraf 	fuword16_noerr(&rw->rwlock_type, (uint16_t *)&type);
27296577Sraf 	suword16_noerr(&rw->rwlock_type, type);
27306577Sraf 
27310Sstevel@tonic-gate 	/* We can only continue for simple USYNC_PROCESS locks. */
27320Sstevel@tonic-gate 	if (type != USYNC_PROCESS) {
27330Sstevel@tonic-gate 		error = EINVAL;
27340Sstevel@tonic-gate 		goto out_nodrop;
27350Sstevel@tonic-gate 	}
27360Sstevel@tonic-gate 
27370Sstevel@tonic-gate 	/* Convert user level rwlock, "rw", to a unique lwpchan. */
27380Sstevel@tonic-gate 	if (!get_lwpchan(p->p_as, (caddr_t)rw, type,
27390Sstevel@tonic-gate 	    &lwpchan, LWPCHAN_CVPOOL)) {
27400Sstevel@tonic-gate 		error = EFAULT;
27410Sstevel@tonic-gate 		goto out_nodrop;
27420Sstevel@tonic-gate 	}
27430Sstevel@tonic-gate 
27440Sstevel@tonic-gate 	no_lwpchan = 0;
27450Sstevel@tonic-gate 	watched = watch_disable_addr((caddr_t)rw, sizeof (*rw), S_WRITE);
27460Sstevel@tonic-gate 
27470Sstevel@tonic-gate 	lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL);
27480Sstevel@tonic-gate 	locked = 1;
27490Sstevel@tonic-gate 
27500Sstevel@tonic-gate 	/*
27510Sstevel@tonic-gate 	 * We can resolve multiple readers (except the last reader) here.
27520Sstevel@tonic-gate 	 * For the last reader or a writer we need lwp_rwlock_release(),
27530Sstevel@tonic-gate 	 * to which we also delegate the task of copying the new rwstate
27540Sstevel@tonic-gate 	 * back to userland (see the comment there).
27550Sstevel@tonic-gate 	 */
27560Sstevel@tonic-gate 	fuword32_noerr(&rw->rwlock_readers, &rwstate);
27570Sstevel@tonic-gate 	if (rwstate & URW_WRITE_LOCKED)
27580Sstevel@tonic-gate 		lwp_rwlock_release(&lwpchan, rw);
27590Sstevel@tonic-gate 	else if ((rwstate & URW_READERS_MASK) > 0) {
27600Sstevel@tonic-gate 		rwstate--;
27610Sstevel@tonic-gate 		if ((rwstate & URW_READERS_MASK) == 0)
27620Sstevel@tonic-gate 			lwp_rwlock_release(&lwpchan, rw);
27630Sstevel@tonic-gate 		else
27640Sstevel@tonic-gate 			suword32_noerr(&rw->rwlock_readers, rwstate);
27650Sstevel@tonic-gate 	}
27660Sstevel@tonic-gate 
27670Sstevel@tonic-gate 	lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
27680Sstevel@tonic-gate 	locked = 0;
27690Sstevel@tonic-gate 	error = 0;
27700Sstevel@tonic-gate 
27710Sstevel@tonic-gate out_nodrop:
27720Sstevel@tonic-gate 	no_fault();
27730Sstevel@tonic-gate 	if (watched)
27740Sstevel@tonic-gate 		watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE);
27750Sstevel@tonic-gate 	if (error)
27760Sstevel@tonic-gate 		return (set_errno(error));
27770Sstevel@tonic-gate 	return (0);
27780Sstevel@tonic-gate }
27790Sstevel@tonic-gate 
27800Sstevel@tonic-gate int
lwp_rwlock_sys(int subcode,lwp_rwlock_t * rwlp,timespec_t * tsp)27810Sstevel@tonic-gate lwp_rwlock_sys(int subcode, lwp_rwlock_t *rwlp, timespec_t *tsp)
27820Sstevel@tonic-gate {
27830Sstevel@tonic-gate 	switch (subcode) {
27840Sstevel@tonic-gate 	case 0:
27850Sstevel@tonic-gate 		return (lwp_rwlock_lock(rwlp, tsp, READ_LOCK));
27860Sstevel@tonic-gate 	case 1:
27870Sstevel@tonic-gate 		return (lwp_rwlock_lock(rwlp, tsp, WRITE_LOCK));
27880Sstevel@tonic-gate 	case 2:
27890Sstevel@tonic-gate 		return (lwp_rwlock_lock(rwlp, NULL, READ_LOCK_TRY));
27900Sstevel@tonic-gate 	case 3:
27910Sstevel@tonic-gate 		return (lwp_rwlock_lock(rwlp, NULL, WRITE_LOCK_TRY));
27920Sstevel@tonic-gate 	case 4:
27930Sstevel@tonic-gate 		return (lwp_rwlock_unlock(rwlp));
27940Sstevel@tonic-gate 	}
27950Sstevel@tonic-gate 	return (set_errno(EINVAL));
27960Sstevel@tonic-gate }
27970Sstevel@tonic-gate 
27980Sstevel@tonic-gate /*
27990Sstevel@tonic-gate  * Return the owner of the user-level s-object.
28000Sstevel@tonic-gate  * Since we can't really do this, return NULL.
28010Sstevel@tonic-gate  */
28020Sstevel@tonic-gate /* ARGSUSED */
28030Sstevel@tonic-gate static kthread_t *
lwpsobj_owner(caddr_t sobj)28040Sstevel@tonic-gate lwpsobj_owner(caddr_t sobj)
28050Sstevel@tonic-gate {
28060Sstevel@tonic-gate 	return ((kthread_t *)NULL);
28070Sstevel@tonic-gate }
28080Sstevel@tonic-gate 
28090Sstevel@tonic-gate /*
28100Sstevel@tonic-gate  * Wake up a thread asleep on a user-level synchronization
28110Sstevel@tonic-gate  * object.
28120Sstevel@tonic-gate  */
28130Sstevel@tonic-gate static void
lwp_unsleep(kthread_t * t)28140Sstevel@tonic-gate lwp_unsleep(kthread_t *t)
28150Sstevel@tonic-gate {
28160Sstevel@tonic-gate 	ASSERT(THREAD_LOCK_HELD(t));
28170Sstevel@tonic-gate 	if (t->t_wchan0 != NULL) {
28180Sstevel@tonic-gate 		sleepq_head_t *sqh;
28190Sstevel@tonic-gate 		sleepq_t *sqp = t->t_sleepq;
28200Sstevel@tonic-gate 
28210Sstevel@tonic-gate 		if (sqp != NULL) {
28220Sstevel@tonic-gate 			sqh = lwpsqhash(&t->t_lwpchan);
28230Sstevel@tonic-gate 			ASSERT(&sqh->sq_queue == sqp);
28240Sstevel@tonic-gate 			sleepq_unsleep(t);
28250Sstevel@tonic-gate 			disp_lock_exit_high(&sqh->sq_lock);
28260Sstevel@tonic-gate 			CL_SETRUN(t);
28270Sstevel@tonic-gate 			return;
28280Sstevel@tonic-gate 		}
28290Sstevel@tonic-gate 	}
28300Sstevel@tonic-gate 	panic("lwp_unsleep: thread %p not on sleepq", (void *)t);
28310Sstevel@tonic-gate }
28320Sstevel@tonic-gate 
28330Sstevel@tonic-gate /*
28340Sstevel@tonic-gate  * Change the priority of a thread asleep on a user-level
28350Sstevel@tonic-gate  * synchronization object. To maintain proper priority order,
28360Sstevel@tonic-gate  * we:
28370Sstevel@tonic-gate  *	o dequeue the thread.
28380Sstevel@tonic-gate  *	o change its priority.
28390Sstevel@tonic-gate  *	o re-enqueue the thread.
28400Sstevel@tonic-gate  * Assumption: the thread is locked on entry.
28410Sstevel@tonic-gate  */
28420Sstevel@tonic-gate static void
lwp_change_pri(kthread_t * t,pri_t pri,pri_t * t_prip)28430Sstevel@tonic-gate lwp_change_pri(kthread_t *t, pri_t pri, pri_t *t_prip)
28440Sstevel@tonic-gate {
28450Sstevel@tonic-gate 	ASSERT(THREAD_LOCK_HELD(t));
28460Sstevel@tonic-gate 	if (t->t_wchan0 != NULL) {
28470Sstevel@tonic-gate 		sleepq_t   *sqp = t->t_sleepq;
28480Sstevel@tonic-gate 
28490Sstevel@tonic-gate 		sleepq_dequeue(t);
28500Sstevel@tonic-gate 		*t_prip = pri;
28510Sstevel@tonic-gate 		sleepq_insert(sqp, t);
28520Sstevel@tonic-gate 	} else
28530Sstevel@tonic-gate 		panic("lwp_change_pri: %p not on a sleep queue", (void *)t);
28540Sstevel@tonic-gate }
28550Sstevel@tonic-gate 
28560Sstevel@tonic-gate /*
28577751SRoger.Faulkner@Sun.COM  * Clean up a left-over process-shared robust mutex
28580Sstevel@tonic-gate  */
28590Sstevel@tonic-gate static void
lwp_mutex_cleanup(lwpchan_entry_t * ent,uint16_t lockflg)28600Sstevel@tonic-gate lwp_mutex_cleanup(lwpchan_entry_t *ent, uint16_t lockflg)
28610Sstevel@tonic-gate {
28620Sstevel@tonic-gate 	uint16_t flag;
28630Sstevel@tonic-gate 	uchar_t waiters;
28640Sstevel@tonic-gate 	label_t ljb;
28650Sstevel@tonic-gate 	pid_t owner_pid;
28660Sstevel@tonic-gate 	lwp_mutex_t *lp;
28670Sstevel@tonic-gate 	volatile int locked = 0;
28680Sstevel@tonic-gate 	volatile int watched = 0;
28694574Sraf 	volatile struct upimutex *upimutex = NULL;
28704574Sraf 	volatile int upilocked = 0;
28710Sstevel@tonic-gate 
28727751SRoger.Faulkner@Sun.COM 	if ((ent->lwpchan_type & (USYNC_PROCESS | LOCK_ROBUST))
28737751SRoger.Faulkner@Sun.COM 	    != (USYNC_PROCESS | LOCK_ROBUST))
28747751SRoger.Faulkner@Sun.COM 		return;
28750Sstevel@tonic-gate 
28760Sstevel@tonic-gate 	lp = (lwp_mutex_t *)ent->lwpchan_addr;
28770Sstevel@tonic-gate 	watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
28780Sstevel@tonic-gate 	if (on_fault(&ljb)) {
28790Sstevel@tonic-gate 		if (locked)
28800Sstevel@tonic-gate 			lwpchan_unlock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL);
28814574Sraf 		if (upilocked)
28824574Sraf 			upimutex_unlock((upimutex_t *)upimutex, 0);
28830Sstevel@tonic-gate 		goto out;
28840Sstevel@tonic-gate 	}
28857751SRoger.Faulkner@Sun.COM 
28867751SRoger.Faulkner@Sun.COM 	fuword32_noerr(&lp->mutex_ownerpid, (uint32_t *)&owner_pid);
28877751SRoger.Faulkner@Sun.COM 
28884574Sraf 	if (UPIMUTEX(ent->lwpchan_type)) {
28894574Sraf 		lwpchan_t lwpchan = ent->lwpchan_lwpchan;
28904574Sraf 		upib_t *upibp = &UPI_CHAIN(lwpchan);
28914574Sraf 
28927751SRoger.Faulkner@Sun.COM 		if (owner_pid != curproc->p_pid)
28937751SRoger.Faulkner@Sun.COM 			goto out;
28944574Sraf 		mutex_enter(&upibp->upib_lock);
28954574Sraf 		upimutex = upi_get(upibp, &lwpchan);
28964574Sraf 		if (upimutex == NULL || upimutex->upi_owner != curthread) {
28974574Sraf 			mutex_exit(&upibp->upib_lock);
28984574Sraf 			goto out;
28994574Sraf 		}
29004574Sraf 		mutex_exit(&upibp->upib_lock);
29014574Sraf 		upilocked = 1;
29024574Sraf 		flag = lwp_clear_mutex(lp, lockflg);
29034574Sraf 		suword8_noerr(&lp->mutex_lockw, 0);
29044574Sraf 		upimutex_unlock((upimutex_t *)upimutex, flag);
29054574Sraf 	} else {
29064574Sraf 		lwpchan_lock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL);
29074574Sraf 		locked = 1;
29087751SRoger.Faulkner@Sun.COM 		/*
29097751SRoger.Faulkner@Sun.COM 		 * Clear the spinners count because one of our
29107751SRoger.Faulkner@Sun.COM 		 * threads could have been spinning for this lock
29117751SRoger.Faulkner@Sun.COM 		 * at user level when the process was suddenly killed.
29127751SRoger.Faulkner@Sun.COM 		 * There is no harm in this since user-level libc code
29137751SRoger.Faulkner@Sun.COM 		 * will adapt to the sudden change in the spinner count.
29147751SRoger.Faulkner@Sun.COM 		 */
29157751SRoger.Faulkner@Sun.COM 		suword8_noerr(&lp->mutex_spinners, 0);
29167751SRoger.Faulkner@Sun.COM 		if (owner_pid != curproc->p_pid) {
29176057Sraf 			/*
29187751SRoger.Faulkner@Sun.COM 			 * We are not the owner.  There may or may not be one.
29197751SRoger.Faulkner@Sun.COM 			 * If there are waiters, we wake up one or all of them.
29207751SRoger.Faulkner@Sun.COM 			 * It doesn't hurt to wake them up in error since
29217751SRoger.Faulkner@Sun.COM 			 * they will just retry the lock and go to sleep
29227751SRoger.Faulkner@Sun.COM 			 * again if necessary.
29236057Sraf 			 */
29246057Sraf 			fuword8_noerr(&lp->mutex_waiters, &waiters);
29256057Sraf 			if (waiters != 0) {	/* there are waiters */
29266057Sraf 				fuword16_noerr(&lp->mutex_flag, &flag);
29276057Sraf 				if (flag & LOCK_NOTRECOVERABLE) {
29286057Sraf 					lwp_release_all(&ent->lwpchan_lwpchan);
29296057Sraf 					suword8_noerr(&lp->mutex_waiters, 0);
29306057Sraf 				} else if (lwp_release(&ent->lwpchan_lwpchan,
29316057Sraf 				    &waiters, 0)) {
29326057Sraf 					suword8_noerr(&lp->mutex_waiters,
29336057Sraf 					    waiters);
29346057Sraf 				}
29356057Sraf 			}
29366057Sraf 		} else {
29377751SRoger.Faulkner@Sun.COM 			/*
29387751SRoger.Faulkner@Sun.COM 			 * We are the owner.  Release it.
29397751SRoger.Faulkner@Sun.COM 			 */
29406057Sraf 			(void) lwp_clear_mutex(lp, lockflg);
29416057Sraf 			ulock_clear(&lp->mutex_lockw);
29426057Sraf 			fuword8_noerr(&lp->mutex_waiters, &waiters);
29436057Sraf 			if (waiters &&
29446057Sraf 			    lwp_release(&ent->lwpchan_lwpchan, &waiters, 0))
29456057Sraf 				suword8_noerr(&lp->mutex_waiters, waiters);
29466057Sraf 		}
29474574Sraf 		lwpchan_unlock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL);
29484574Sraf 	}
29490Sstevel@tonic-gate out:
29500Sstevel@tonic-gate 	no_fault();
29510Sstevel@tonic-gate 	if (watched)
29520Sstevel@tonic-gate 		watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
29530Sstevel@tonic-gate }
29540Sstevel@tonic-gate 
29550Sstevel@tonic-gate /*
29564574Sraf  * Register a process-shared robust mutex in the lwpchan cache.
29570Sstevel@tonic-gate  */
29580Sstevel@tonic-gate int
lwp_mutex_register(lwp_mutex_t * lp,caddr_t uaddr)29599264SRoger.Faulkner@Sun.COM lwp_mutex_register(lwp_mutex_t *lp, caddr_t uaddr)
29600Sstevel@tonic-gate {
29610Sstevel@tonic-gate 	int error = 0;
29624574Sraf 	volatile int watched;
29630Sstevel@tonic-gate 	label_t ljb;
29644574Sraf 	uint8_t type;
29650Sstevel@tonic-gate 	lwpchan_t lwpchan;
29660Sstevel@tonic-gate 
29670Sstevel@tonic-gate 	if ((caddr_t)lp >= (caddr_t)USERLIMIT)
29680Sstevel@tonic-gate 		return (set_errno(EFAULT));
29690Sstevel@tonic-gate 
29700Sstevel@tonic-gate 	watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
29710Sstevel@tonic-gate 
29720Sstevel@tonic-gate 	if (on_fault(&ljb)) {
29730Sstevel@tonic-gate 		error = EFAULT;
29744574Sraf 	} else {
29756577Sraf 		/*
29766577Sraf 		 * Force Copy-on-write if necessary and ensure that the
29776577Sraf 		 * synchronization object resides in read/write memory.
29786577Sraf 		 * Cause an EFAULT return now if this is not so.
29796577Sraf 		 */
29804574Sraf 		fuword8_noerr(&lp->mutex_type, &type);
29816577Sraf 		suword8_noerr(&lp->mutex_type, type);
29824574Sraf 		if ((type & (USYNC_PROCESS|LOCK_ROBUST))
29834574Sraf 		    != (USYNC_PROCESS|LOCK_ROBUST)) {
29844574Sraf 			error = EINVAL;
29859264SRoger.Faulkner@Sun.COM 		} else if (!lwpchan_get_mapping(curproc->p_as, (caddr_t)lp,
29869264SRoger.Faulkner@Sun.COM 		    uaddr, type, &lwpchan, LWPCHAN_MPPOOL)) {
29876577Sraf 			error = EFAULT;
29884574Sraf 		}
29890Sstevel@tonic-gate 	}
29900Sstevel@tonic-gate 	no_fault();
29910Sstevel@tonic-gate 	if (watched)
29920Sstevel@tonic-gate 		watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
29930Sstevel@tonic-gate 	if (error)
29940Sstevel@tonic-gate 		return (set_errno(error));
29950Sstevel@tonic-gate 	return (0);
29960Sstevel@tonic-gate }
29970Sstevel@tonic-gate 
29989264SRoger.Faulkner@Sun.COM /*
29999264SRoger.Faulkner@Sun.COM  * There is a user-level robust lock registration in libc.
30009264SRoger.Faulkner@Sun.COM  * Mark it as invalid by storing -1 into the location of the pointer.
30019264SRoger.Faulkner@Sun.COM  */
30029264SRoger.Faulkner@Sun.COM static void
lwp_mutex_unregister(void * uaddr)30039264SRoger.Faulkner@Sun.COM lwp_mutex_unregister(void *uaddr)
30049264SRoger.Faulkner@Sun.COM {
30059264SRoger.Faulkner@Sun.COM 	if (get_udatamodel() == DATAMODEL_NATIVE) {
30069264SRoger.Faulkner@Sun.COM 		(void) sulword(uaddr, (ulong_t)-1);
30079264SRoger.Faulkner@Sun.COM #ifdef _SYSCALL32_IMPL
30089264SRoger.Faulkner@Sun.COM 	} else {
30099264SRoger.Faulkner@Sun.COM 		(void) suword32(uaddr, (uint32_t)-1);
30109264SRoger.Faulkner@Sun.COM #endif
30119264SRoger.Faulkner@Sun.COM 	}
30129264SRoger.Faulkner@Sun.COM }
30139264SRoger.Faulkner@Sun.COM 
30140Sstevel@tonic-gate int
lwp_mutex_trylock(lwp_mutex_t * lp,uintptr_t owner)301510887SRoger.Faulkner@Sun.COM lwp_mutex_trylock(lwp_mutex_t *lp, uintptr_t owner)
30160Sstevel@tonic-gate {
30170Sstevel@tonic-gate 	kthread_t *t = curthread;
30180Sstevel@tonic-gate 	proc_t *p = ttoproc(t);
30190Sstevel@tonic-gate 	int error = 0;
30200Sstevel@tonic-gate 	volatile int locked = 0;
30210Sstevel@tonic-gate 	volatile int watched = 0;
30220Sstevel@tonic-gate 	label_t ljb;
30230Sstevel@tonic-gate 	volatile uint8_t type = 0;
30240Sstevel@tonic-gate 	uint16_t flag;
30250Sstevel@tonic-gate 	lwpchan_t lwpchan;
30260Sstevel@tonic-gate 
30270Sstevel@tonic-gate 	if ((caddr_t)lp >= p->p_as->a_userlimit)
30280Sstevel@tonic-gate 		return (set_errno(EFAULT));
30290Sstevel@tonic-gate 
30300Sstevel@tonic-gate 	(void) new_mstate(t, LMS_USER_LOCK);
30310Sstevel@tonic-gate 
30320Sstevel@tonic-gate 	if (on_fault(&ljb)) {
30330Sstevel@tonic-gate 		if (locked)
30340Sstevel@tonic-gate 			lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
30350Sstevel@tonic-gate 		error = EFAULT;
30360Sstevel@tonic-gate 		goto out;
30370Sstevel@tonic-gate 	}
30386577Sraf 	/*
30396577Sraf 	 * Force Copy-on-write if necessary and ensure that the
30406577Sraf 	 * synchronization object resides in read/write memory.
30416577Sraf 	 * Cause an EFAULT return now if this is not so.
30426577Sraf 	 */
30430Sstevel@tonic-gate 	fuword8_noerr(&lp->mutex_type, (uint8_t *)&type);
30446577Sraf 	suword8_noerr(&lp->mutex_type, type);
30450Sstevel@tonic-gate 	if (UPIMUTEX(type)) {
30460Sstevel@tonic-gate 		no_fault();
30470Sstevel@tonic-gate 		error = lwp_upimutex_lock(lp, type, UPIMUTEX_TRY, NULL);
304810887SRoger.Faulkner@Sun.COM 		if (error == 0 || error == EOWNERDEAD || error == ELOCKUNMAPPED)
304910887SRoger.Faulkner@Sun.COM 			set_owner_pid(lp, owner,
305010887SRoger.Faulkner@Sun.COM 			    (type & USYNC_PROCESS)? p->p_pid : 0);
30510Sstevel@tonic-gate 		if (error)
30520Sstevel@tonic-gate 			return (set_errno(error));
30530Sstevel@tonic-gate 		return (0);
30540Sstevel@tonic-gate 	}
30550Sstevel@tonic-gate 	if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type,
30560Sstevel@tonic-gate 	    &lwpchan, LWPCHAN_MPPOOL)) {
30570Sstevel@tonic-gate 		error = EFAULT;
30580Sstevel@tonic-gate 		goto out;
30590Sstevel@tonic-gate 	}
30600Sstevel@tonic-gate 	lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL);
30610Sstevel@tonic-gate 	locked = 1;
30624574Sraf 	if (type & LOCK_ROBUST) {
30634574Sraf 		fuword16_noerr(&lp->mutex_flag, &flag);
30640Sstevel@tonic-gate 		if (flag & LOCK_NOTRECOVERABLE) {
30650Sstevel@tonic-gate 			lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
30660Sstevel@tonic-gate 			error =  ENOTRECOVERABLE;
30670Sstevel@tonic-gate 			goto out;
30680Sstevel@tonic-gate 		}
30690Sstevel@tonic-gate 	}
30700Sstevel@tonic-gate 
30710Sstevel@tonic-gate 	watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
30720Sstevel@tonic-gate 
30730Sstevel@tonic-gate 	if (!ulock_try(&lp->mutex_lockw))
30740Sstevel@tonic-gate 		error = EBUSY;
30754574Sraf 	else {
307610887SRoger.Faulkner@Sun.COM 		set_owner_pid(lp, owner, (type & USYNC_PROCESS)? p->p_pid : 0);
30774574Sraf 		if (type & LOCK_ROBUST) {
30784574Sraf 			fuword16_noerr(&lp->mutex_flag, &flag);
30794574Sraf 			if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) {
30804574Sraf 				if (flag & LOCK_OWNERDEAD)
30814574Sraf 					error = EOWNERDEAD;
30824574Sraf 				else if (type & USYNC_PROCESS_ROBUST)
30834574Sraf 					error = ELOCKUNMAPPED;
30844574Sraf 				else
30854574Sraf 					error = EOWNERDEAD;
30864574Sraf 			}
30870Sstevel@tonic-gate 		}
30880Sstevel@tonic-gate 	}
30890Sstevel@tonic-gate 	locked = 0;
30900Sstevel@tonic-gate 	lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
30910Sstevel@tonic-gate out:
30920Sstevel@tonic-gate 
30930Sstevel@tonic-gate 	if (t->t_mstate == LMS_USER_LOCK)
30940Sstevel@tonic-gate 		(void) new_mstate(t, LMS_SYSTEM);
30950Sstevel@tonic-gate 
30960Sstevel@tonic-gate 	no_fault();
30970Sstevel@tonic-gate 	if (watched)
30980Sstevel@tonic-gate 		watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
30990Sstevel@tonic-gate 	if (error)
31000Sstevel@tonic-gate 		return (set_errno(error));
31010Sstevel@tonic-gate 	return (0);
31020Sstevel@tonic-gate }
31030Sstevel@tonic-gate 
31040Sstevel@tonic-gate /*
31050Sstevel@tonic-gate  * unlock the mutex and unblock lwps that is trying to acquire this mutex.
31060Sstevel@tonic-gate  * the blocked lwp resumes and retries to acquire the lock.
31070Sstevel@tonic-gate  */
31080Sstevel@tonic-gate int
lwp_mutex_unlock(lwp_mutex_t * lp)31090Sstevel@tonic-gate lwp_mutex_unlock(lwp_mutex_t *lp)
31100Sstevel@tonic-gate {
31110Sstevel@tonic-gate 	proc_t *p = ttoproc(curthread);
31120Sstevel@tonic-gate 	lwpchan_t lwpchan;
31130Sstevel@tonic-gate 	uchar_t waiters;
31140Sstevel@tonic-gate 	volatile int locked = 0;
31150Sstevel@tonic-gate 	volatile int watched = 0;
31160Sstevel@tonic-gate 	volatile uint8_t type = 0;
31170Sstevel@tonic-gate 	label_t ljb;
31180Sstevel@tonic-gate 	uint16_t flag;
31190Sstevel@tonic-gate 	int error = 0;
31200Sstevel@tonic-gate 
31210Sstevel@tonic-gate 	if ((caddr_t)lp >= p->p_as->a_userlimit)
31220Sstevel@tonic-gate 		return (set_errno(EFAULT));
31230Sstevel@tonic-gate 
31240Sstevel@tonic-gate 	if (on_fault(&ljb)) {
31250Sstevel@tonic-gate 		if (locked)
31260Sstevel@tonic-gate 			lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
31270Sstevel@tonic-gate 		error = EFAULT;
31280Sstevel@tonic-gate 		goto out;
31290Sstevel@tonic-gate 	}
31306577Sraf 
31316577Sraf 	/*
31326577Sraf 	 * Force Copy-on-write if necessary and ensure that the
31336577Sraf 	 * synchronization object resides in read/write memory.
31346577Sraf 	 * Cause an EFAULT return now if this is not so.
31356577Sraf 	 */
31360Sstevel@tonic-gate 	fuword8_noerr(&lp->mutex_type, (uint8_t *)&type);
31376577Sraf 	suword8_noerr(&lp->mutex_type, type);
31386577Sraf 
31390Sstevel@tonic-gate 	if (UPIMUTEX(type)) {
31400Sstevel@tonic-gate 		no_fault();
31410Sstevel@tonic-gate 		error = lwp_upimutex_unlock(lp, type);
31420Sstevel@tonic-gate 		if (error)
31430Sstevel@tonic-gate 			return (set_errno(error));
31440Sstevel@tonic-gate 		return (0);
31450Sstevel@tonic-gate 	}
31460Sstevel@tonic-gate 
31470Sstevel@tonic-gate 	watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
31480Sstevel@tonic-gate 
31490Sstevel@tonic-gate 	if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type,
31500Sstevel@tonic-gate 	    &lwpchan, LWPCHAN_MPPOOL)) {
31510Sstevel@tonic-gate 		error = EFAULT;
31520Sstevel@tonic-gate 		goto out;
31530Sstevel@tonic-gate 	}
31540Sstevel@tonic-gate 	lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL);
31550Sstevel@tonic-gate 	locked = 1;
31564574Sraf 	if (type & LOCK_ROBUST) {
31574574Sraf 		fuword16_noerr(&lp->mutex_flag, &flag);
31584574Sraf 		if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) {
31594574Sraf 			flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED);
31604574Sraf 			flag |= LOCK_NOTRECOVERABLE;
31614574Sraf 			suword16_noerr(&lp->mutex_flag, flag);
31620Sstevel@tonic-gate 		}
31634574Sraf 	}
316410887SRoger.Faulkner@Sun.COM 	set_owner_pid(lp, 0, 0);
31650Sstevel@tonic-gate 	ulock_clear(&lp->mutex_lockw);
31660Sstevel@tonic-gate 	/*
31670Sstevel@tonic-gate 	 * Always wake up an lwp (if any) waiting on lwpchan. The woken lwp will
31680Sstevel@tonic-gate 	 * re-try the lock in lwp_mutex_timedlock(). The call to lwp_release()
31690Sstevel@tonic-gate 	 * may fail.  If it fails, do not write into the waiter bit.
31700Sstevel@tonic-gate 	 * The call to lwp_release() might fail due to one of three reasons:
31710Sstevel@tonic-gate 	 *
31720Sstevel@tonic-gate 	 * 	1. due to the thread which set the waiter bit not actually
31730Sstevel@tonic-gate 	 *	   sleeping since it got the lock on the re-try. The waiter
31740Sstevel@tonic-gate 	 *	   bit will then be correctly updated by that thread. This
31750Sstevel@tonic-gate 	 *	   window may be closed by reading the wait bit again here
31760Sstevel@tonic-gate 	 *	   and not calling lwp_release() at all if it is zero.
31770Sstevel@tonic-gate 	 *	2. the thread which set the waiter bit and went to sleep
31780Sstevel@tonic-gate 	 *	   was woken up by a signal. This time, the waiter recomputes
31790Sstevel@tonic-gate 	 *	   the wait bit in the return with EINTR code.
31800Sstevel@tonic-gate 	 *	3. the waiter bit read by lwp_mutex_wakeup() was in
31810Sstevel@tonic-gate 	 *	   memory that has been re-used after the lock was dropped.
31820Sstevel@tonic-gate 	 *	   In this case, writing into the waiter bit would cause data
31830Sstevel@tonic-gate 	 *	   corruption.
31840Sstevel@tonic-gate 	 */
31850Sstevel@tonic-gate 	fuword8_noerr(&lp->mutex_waiters, &waiters);
31860Sstevel@tonic-gate 	if (waiters) {
31874574Sraf 		if ((type & LOCK_ROBUST) &&
31880Sstevel@tonic-gate 		    (flag & LOCK_NOTRECOVERABLE)) {
31890Sstevel@tonic-gate 			lwp_release_all(&lwpchan);
31900Sstevel@tonic-gate 			suword8_noerr(&lp->mutex_waiters, 0);
31916057Sraf 		} else if (lwp_release(&lwpchan, &waiters, 0)) {
31920Sstevel@tonic-gate 			suword8_noerr(&lp->mutex_waiters, waiters);
31930Sstevel@tonic-gate 		}
31940Sstevel@tonic-gate 	}
31950Sstevel@tonic-gate 
31960Sstevel@tonic-gate 	lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
31970Sstevel@tonic-gate out:
31980Sstevel@tonic-gate 	no_fault();
31990Sstevel@tonic-gate 	if (watched)
32000Sstevel@tonic-gate 		watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
32010Sstevel@tonic-gate 	if (error)
32020Sstevel@tonic-gate 		return (set_errno(error));
32030Sstevel@tonic-gate 	return (0);
32040Sstevel@tonic-gate }
3205