xref: /onnv-gate/usr/src/uts/common/os/pid.c (revision 749:d7f9da43aeb7)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
50Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
60Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
70Sstevel@tonic-gate  * with the License.
80Sstevel@tonic-gate  *
90Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
100Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
110Sstevel@tonic-gate  * See the License for the specific language governing permissions
120Sstevel@tonic-gate  * and limitations under the License.
130Sstevel@tonic-gate  *
140Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
150Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
160Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
170Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
180Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
190Sstevel@tonic-gate  *
200Sstevel@tonic-gate  * CDDL HEADER END
210Sstevel@tonic-gate  */
22390Sraf 
230Sstevel@tonic-gate /*
24390Sraf  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
250Sstevel@tonic-gate  * Use is subject to license terms.
260Sstevel@tonic-gate  */
270Sstevel@tonic-gate 
280Sstevel@tonic-gate /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
290Sstevel@tonic-gate /*	  All Rights Reserved  	*/
300Sstevel@tonic-gate 
310Sstevel@tonic-gate 
320Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
330Sstevel@tonic-gate 
340Sstevel@tonic-gate #include <sys/types.h>
350Sstevel@tonic-gate #include <sys/param.h>
360Sstevel@tonic-gate #include <sys/sysmacros.h>
370Sstevel@tonic-gate #include <sys/proc.h>
380Sstevel@tonic-gate #include <sys/kmem.h>
390Sstevel@tonic-gate #include <sys/tuneable.h>
400Sstevel@tonic-gate #include <sys/var.h>
410Sstevel@tonic-gate #include <sys/cred.h>
420Sstevel@tonic-gate #include <sys/systm.h>
430Sstevel@tonic-gate #include <sys/prsystm.h>
440Sstevel@tonic-gate #include <sys/vnode.h>
450Sstevel@tonic-gate #include <sys/session.h>
460Sstevel@tonic-gate #include <sys/cpuvar.h>
470Sstevel@tonic-gate #include <sys/cmn_err.h>
480Sstevel@tonic-gate #include <sys/bitmap.h>
490Sstevel@tonic-gate #include <sys/debug.h>
500Sstevel@tonic-gate #include <c2/audit.h>
510Sstevel@tonic-gate #include <sys/zone.h>
520Sstevel@tonic-gate 
530Sstevel@tonic-gate /* directory entries for /proc */
540Sstevel@tonic-gate union procent {
550Sstevel@tonic-gate 	proc_t *pe_proc;
560Sstevel@tonic-gate 	union procent *pe_next;
570Sstevel@tonic-gate };
580Sstevel@tonic-gate 
590Sstevel@tonic-gate struct pid pid0 = {
600Sstevel@tonic-gate 	0,		/* pid_prinactive */
610Sstevel@tonic-gate 	1,		/* pid_pgorphaned */
620Sstevel@tonic-gate 	0,		/* pid_padding	*/
630Sstevel@tonic-gate 	0,		/* pid_prslot	*/
640Sstevel@tonic-gate 	0,		/* pid_id	*/
650Sstevel@tonic-gate 	NULL,		/* pid_pglink	*/
66*749Ssusans 	NULL,		/* pid_pgtail	*/
670Sstevel@tonic-gate 	NULL,		/* pid_link	*/
680Sstevel@tonic-gate 	3		/* pid_ref	*/
690Sstevel@tonic-gate };
700Sstevel@tonic-gate 
710Sstevel@tonic-gate static int pid_hashlen = 4;	/* desired average hash chain length */
720Sstevel@tonic-gate static int pid_hashsz;		/* number of buckets in the hash table */
730Sstevel@tonic-gate 
740Sstevel@tonic-gate #define	HASHPID(pid)	(pidhash[((pid)&(pid_hashsz-1))])
750Sstevel@tonic-gate 
760Sstevel@tonic-gate extern uint_t nproc;
770Sstevel@tonic-gate extern struct kmem_cache *process_cache;
780Sstevel@tonic-gate static void	upcount_init(void);
790Sstevel@tonic-gate 
800Sstevel@tonic-gate kmutex_t	pidlock;	/* global process lock */
810Sstevel@tonic-gate kmutex_t	pr_pidlock;	/* /proc global process lock */
820Sstevel@tonic-gate kcondvar_t	*pr_pid_cv;	/* for /proc, one per process slot */
830Sstevel@tonic-gate struct plock	*proc_lock;	/* persistent array of p_lock's */
840Sstevel@tonic-gate 
850Sstevel@tonic-gate /*
860Sstevel@tonic-gate  * See the comment above pid_getlockslot() for a detailed explanation of this
870Sstevel@tonic-gate  * constant.  Note that a PLOCK_SHIFT of 3 implies 64-byte coherence
880Sstevel@tonic-gate  * granularity; if the coherence granularity is ever changed, this constant
890Sstevel@tonic-gate  * should be modified to reflect the change to minimize proc_lock false
900Sstevel@tonic-gate  * sharing (correctness, however, is guaranteed regardless of the coherence
910Sstevel@tonic-gate  * granularity).
920Sstevel@tonic-gate  */
930Sstevel@tonic-gate #define	PLOCK_SHIFT	3
940Sstevel@tonic-gate 
950Sstevel@tonic-gate static kmutex_t	pidlinklock;
960Sstevel@tonic-gate static struct pid **pidhash;
970Sstevel@tonic-gate static pid_t minpid;
980Sstevel@tonic-gate static pid_t mpid;
990Sstevel@tonic-gate static union procent *procdir;
1000Sstevel@tonic-gate static union procent *procentfree;
1010Sstevel@tonic-gate 
1020Sstevel@tonic-gate static struct pid *
1030Sstevel@tonic-gate pid_lookup(pid_t pid)
1040Sstevel@tonic-gate {
1050Sstevel@tonic-gate 	struct pid *pidp;
1060Sstevel@tonic-gate 
1070Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pidlinklock));
1080Sstevel@tonic-gate 
1090Sstevel@tonic-gate 	for (pidp = HASHPID(pid); pidp; pidp = pidp->pid_link) {
1100Sstevel@tonic-gate 		if (pidp->pid_id == pid) {
1110Sstevel@tonic-gate 			ASSERT(pidp->pid_ref > 0);
1120Sstevel@tonic-gate 			break;
1130Sstevel@tonic-gate 		}
1140Sstevel@tonic-gate 	}
1150Sstevel@tonic-gate 	return (pidp);
1160Sstevel@tonic-gate }
1170Sstevel@tonic-gate 
1180Sstevel@tonic-gate void
1190Sstevel@tonic-gate pid_setmin(void)
1200Sstevel@tonic-gate {
1210Sstevel@tonic-gate 	if (jump_pid && jump_pid > mpid)
1220Sstevel@tonic-gate 		minpid = mpid = jump_pid;
1230Sstevel@tonic-gate 	else
1240Sstevel@tonic-gate 		minpid = mpid + 1;
1250Sstevel@tonic-gate }
1260Sstevel@tonic-gate 
1270Sstevel@tonic-gate /*
1280Sstevel@tonic-gate  * When prslots are simply used as an index to determine a process' p_lock,
1290Sstevel@tonic-gate  * adjacent prslots share adjacent p_locks.  On machines where the size
1300Sstevel@tonic-gate  * of a mutex is smaller than that of a cache line (which, as of this writing,
1310Sstevel@tonic-gate  * is true for all machines on which Solaris runs), this can potentially
1320Sstevel@tonic-gate  * induce false sharing.  The standard solution for false sharing is to pad
1330Sstevel@tonic-gate  * out one's data structures (in this case, struct plock).  However,
1340Sstevel@tonic-gate  * given the size and (generally) sparse use of the proc_lock array, this
1350Sstevel@tonic-gate  * is suboptimal.  We therefore stride through the proc_lock array with
1360Sstevel@tonic-gate  * a stride of PLOCK_SHIFT.  PLOCK_SHIFT should be defined as:
1370Sstevel@tonic-gate  *
1380Sstevel@tonic-gate  *   log_2 (coherence_granularity / sizeof (kmutex_t))
1390Sstevel@tonic-gate  *
1400Sstevel@tonic-gate  * Under this scheme, false sharing is still possible -- but only when
1410Sstevel@tonic-gate  * the number of active processes is very large.  Note that the one-to-one
1420Sstevel@tonic-gate  * mapping between prslots and lockslots is maintained.
1430Sstevel@tonic-gate  */
1440Sstevel@tonic-gate static int
1450Sstevel@tonic-gate pid_getlockslot(int prslot)
1460Sstevel@tonic-gate {
1470Sstevel@tonic-gate 	int even = (v.v_proc >> PLOCK_SHIFT) << PLOCK_SHIFT;
1480Sstevel@tonic-gate 	int perlap = even >> PLOCK_SHIFT;
1490Sstevel@tonic-gate 
1500Sstevel@tonic-gate 	if (prslot >= even)
1510Sstevel@tonic-gate 		return (prslot);
1520Sstevel@tonic-gate 
1530Sstevel@tonic-gate 	return (((prslot % perlap) << PLOCK_SHIFT) + (prslot / perlap));
1540Sstevel@tonic-gate }
1550Sstevel@tonic-gate 
1560Sstevel@tonic-gate /*
1570Sstevel@tonic-gate  * This function assigns a pid for use in a fork request.  It allocates
1580Sstevel@tonic-gate  * a pid structure, tries to find an empty slot in the proc table,
1590Sstevel@tonic-gate  * and selects the process id.
1600Sstevel@tonic-gate  *
1610Sstevel@tonic-gate  * pid_assign() returns the new pid on success, -1 on failure.
1620Sstevel@tonic-gate  */
1630Sstevel@tonic-gate pid_t
1640Sstevel@tonic-gate pid_assign(proc_t *prp)
1650Sstevel@tonic-gate {
1660Sstevel@tonic-gate 	struct pid *pidp;
1670Sstevel@tonic-gate 	union procent *pep;
1680Sstevel@tonic-gate 	pid_t newpid, startpid;
1690Sstevel@tonic-gate 
1700Sstevel@tonic-gate 	pidp = kmem_zalloc(sizeof (struct pid), KM_SLEEP);
1710Sstevel@tonic-gate 
1720Sstevel@tonic-gate 	mutex_enter(&pidlinklock);
1730Sstevel@tonic-gate 	if ((pep = procentfree) == NULL) {
1740Sstevel@tonic-gate 		/*
1750Sstevel@tonic-gate 		 * ran out of /proc directory entries
1760Sstevel@tonic-gate 		 */
1770Sstevel@tonic-gate 		goto failed;
1780Sstevel@tonic-gate 	}
1790Sstevel@tonic-gate 
1800Sstevel@tonic-gate 	/*
1810Sstevel@tonic-gate 	 * Allocate a pid
1820Sstevel@tonic-gate 	 */
1830Sstevel@tonic-gate 	startpid = mpid;
1840Sstevel@tonic-gate 	do  {
1850Sstevel@tonic-gate 		newpid = (++mpid == maxpid ? mpid = minpid : mpid);
1860Sstevel@tonic-gate 	} while (pid_lookup(newpid) && newpid != startpid);
1870Sstevel@tonic-gate 
1880Sstevel@tonic-gate 	if (newpid == startpid && pid_lookup(newpid)) {
1890Sstevel@tonic-gate 		/* couldn't find a free pid */
1900Sstevel@tonic-gate 		goto failed;
1910Sstevel@tonic-gate 	}
1920Sstevel@tonic-gate 
1930Sstevel@tonic-gate 	procentfree = pep->pe_next;
1940Sstevel@tonic-gate 	pep->pe_proc = prp;
1950Sstevel@tonic-gate 	prp->p_pidp = pidp;
1960Sstevel@tonic-gate 
1970Sstevel@tonic-gate 	/*
1980Sstevel@tonic-gate 	 * Put pid into the pid hash table.
1990Sstevel@tonic-gate 	 */
2000Sstevel@tonic-gate 	pidp->pid_link = HASHPID(newpid);
2010Sstevel@tonic-gate 	HASHPID(newpid) = pidp;
2020Sstevel@tonic-gate 	pidp->pid_ref = 1;
2030Sstevel@tonic-gate 	pidp->pid_id = newpid;
2040Sstevel@tonic-gate 	pidp->pid_prslot = pep - procdir;
2050Sstevel@tonic-gate 	prp->p_lockp = &proc_lock[pid_getlockslot(pidp->pid_prslot)];
2060Sstevel@tonic-gate 	mutex_exit(&pidlinklock);
2070Sstevel@tonic-gate 
2080Sstevel@tonic-gate 	return (newpid);
2090Sstevel@tonic-gate 
2100Sstevel@tonic-gate failed:
2110Sstevel@tonic-gate 	mutex_exit(&pidlinklock);
2120Sstevel@tonic-gate 	kmem_free(pidp, sizeof (struct pid));
2130Sstevel@tonic-gate 	return (-1);
2140Sstevel@tonic-gate }
2150Sstevel@tonic-gate 
2160Sstevel@tonic-gate /*
2170Sstevel@tonic-gate  * decrement the reference count for pid
2180Sstevel@tonic-gate  */
2190Sstevel@tonic-gate int
2200Sstevel@tonic-gate pid_rele(struct pid *pidp)
2210Sstevel@tonic-gate {
2220Sstevel@tonic-gate 	struct pid **pidpp;
2230Sstevel@tonic-gate 
2240Sstevel@tonic-gate 	mutex_enter(&pidlinklock);
2250Sstevel@tonic-gate 	ASSERT(pidp != &pid0);
2260Sstevel@tonic-gate 
2270Sstevel@tonic-gate 	pidpp = &HASHPID(pidp->pid_id);
2280Sstevel@tonic-gate 	for (;;) {
2290Sstevel@tonic-gate 		ASSERT(*pidpp != NULL);
2300Sstevel@tonic-gate 		if (*pidpp == pidp)
2310Sstevel@tonic-gate 			break;
2320Sstevel@tonic-gate 		pidpp = &(*pidpp)->pid_link;
2330Sstevel@tonic-gate 	}
2340Sstevel@tonic-gate 
2350Sstevel@tonic-gate 	*pidpp = pidp->pid_link;
2360Sstevel@tonic-gate 	mutex_exit(&pidlinklock);
2370Sstevel@tonic-gate 
2380Sstevel@tonic-gate 	kmem_free(pidp, sizeof (*pidp));
2390Sstevel@tonic-gate 	return (0);
2400Sstevel@tonic-gate }
2410Sstevel@tonic-gate 
2420Sstevel@tonic-gate void
2430Sstevel@tonic-gate proc_entry_free(struct pid *pidp)
2440Sstevel@tonic-gate {
2450Sstevel@tonic-gate 	mutex_enter(&pidlinklock);
2460Sstevel@tonic-gate 	pidp->pid_prinactive = 1;
2470Sstevel@tonic-gate 	procdir[pidp->pid_prslot].pe_next = procentfree;
2480Sstevel@tonic-gate 	procentfree = &procdir[pidp->pid_prslot];
2490Sstevel@tonic-gate 	mutex_exit(&pidlinklock);
2500Sstevel@tonic-gate }
2510Sstevel@tonic-gate 
2520Sstevel@tonic-gate void
2530Sstevel@tonic-gate pid_exit(proc_t *prp)
2540Sstevel@tonic-gate {
2550Sstevel@tonic-gate 	struct pid *pidp;
2560Sstevel@tonic-gate 
2570Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pidlock));
2580Sstevel@tonic-gate 
2590Sstevel@tonic-gate 	/*
2600Sstevel@tonic-gate 	 * Exit process group.  If it is NULL, it's because fork failed
2610Sstevel@tonic-gate 	 * before calling pgjoin().
2620Sstevel@tonic-gate 	 */
2630Sstevel@tonic-gate 	ASSERT(prp->p_pgidp != NULL || prp->p_stat == SIDL);
2640Sstevel@tonic-gate 	if (prp->p_pgidp != NULL)
2650Sstevel@tonic-gate 		pgexit(prp);
2660Sstevel@tonic-gate 
2670Sstevel@tonic-gate 	SESS_RELE(prp->p_sessp);
2680Sstevel@tonic-gate 
2690Sstevel@tonic-gate 	pidp = prp->p_pidp;
2700Sstevel@tonic-gate 
2710Sstevel@tonic-gate 	proc_entry_free(pidp);
2720Sstevel@tonic-gate 
2730Sstevel@tonic-gate #ifdef C2_AUDIT
2740Sstevel@tonic-gate 	if (audit_active)
2750Sstevel@tonic-gate 		audit_pfree(prp);
2760Sstevel@tonic-gate #endif
2770Sstevel@tonic-gate 
2780Sstevel@tonic-gate 	if (practive == prp) {
2790Sstevel@tonic-gate 		practive = prp->p_next;
2800Sstevel@tonic-gate 	}
2810Sstevel@tonic-gate 
2820Sstevel@tonic-gate 	if (prp->p_next) {
2830Sstevel@tonic-gate 		prp->p_next->p_prev = prp->p_prev;
2840Sstevel@tonic-gate 	}
2850Sstevel@tonic-gate 	if (prp->p_prev) {
2860Sstevel@tonic-gate 		prp->p_prev->p_next = prp->p_next;
2870Sstevel@tonic-gate 	}
2880Sstevel@tonic-gate 
2890Sstevel@tonic-gate 	PID_RELE(pidp);
2900Sstevel@tonic-gate 
2910Sstevel@tonic-gate 	mutex_destroy(&prp->p_crlock);
2920Sstevel@tonic-gate 	kmem_cache_free(process_cache, prp);
2930Sstevel@tonic-gate 	nproc--;
2940Sstevel@tonic-gate }
2950Sstevel@tonic-gate 
2960Sstevel@tonic-gate /*
2970Sstevel@tonic-gate  * Find a process visible from the specified zone given its process ID.
2980Sstevel@tonic-gate  */
2990Sstevel@tonic-gate proc_t *
3000Sstevel@tonic-gate prfind_zone(pid_t pid, zoneid_t zoneid)
3010Sstevel@tonic-gate {
3020Sstevel@tonic-gate 	struct pid *pidp;
3030Sstevel@tonic-gate 	proc_t *p;
3040Sstevel@tonic-gate 
3050Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pidlock));
3060Sstevel@tonic-gate 
3070Sstevel@tonic-gate 	mutex_enter(&pidlinklock);
3080Sstevel@tonic-gate 	pidp = pid_lookup(pid);
3090Sstevel@tonic-gate 	mutex_exit(&pidlinklock);
3100Sstevel@tonic-gate 	if (pidp != NULL && pidp->pid_prinactive == 0) {
3110Sstevel@tonic-gate 		p = procdir[pidp->pid_prslot].pe_proc;
3120Sstevel@tonic-gate 		if (zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid)
3130Sstevel@tonic-gate 			return (p);
3140Sstevel@tonic-gate 	}
3150Sstevel@tonic-gate 	return (NULL);
3160Sstevel@tonic-gate }
3170Sstevel@tonic-gate 
3180Sstevel@tonic-gate /*
3190Sstevel@tonic-gate  * Find a process given its process ID.  This obeys zone restrictions,
3200Sstevel@tonic-gate  * so if the caller is in a non-global zone it won't find processes
3210Sstevel@tonic-gate  * associated with other zones.  Use prfind_zone(pid, ALL_ZONES) to
3220Sstevel@tonic-gate  * bypass this restriction.
3230Sstevel@tonic-gate  */
3240Sstevel@tonic-gate proc_t *
3250Sstevel@tonic-gate prfind(pid_t pid)
3260Sstevel@tonic-gate {
3270Sstevel@tonic-gate 	zoneid_t zoneid;
3280Sstevel@tonic-gate 
3290Sstevel@tonic-gate 	if (INGLOBALZONE(curproc))
3300Sstevel@tonic-gate 		zoneid = ALL_ZONES;
3310Sstevel@tonic-gate 	else
3320Sstevel@tonic-gate 		zoneid = getzoneid();
3330Sstevel@tonic-gate 	return (prfind_zone(pid, zoneid));
3340Sstevel@tonic-gate }
3350Sstevel@tonic-gate 
3360Sstevel@tonic-gate proc_t *
3370Sstevel@tonic-gate pgfind_zone(pid_t pgid, zoneid_t zoneid)
3380Sstevel@tonic-gate {
3390Sstevel@tonic-gate 	struct pid *pidp;
3400Sstevel@tonic-gate 
3410Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pidlock));
3420Sstevel@tonic-gate 
3430Sstevel@tonic-gate 	mutex_enter(&pidlinklock);
3440Sstevel@tonic-gate 	pidp = pid_lookup(pgid);
3450Sstevel@tonic-gate 	mutex_exit(&pidlinklock);
3460Sstevel@tonic-gate 	if (pidp != NULL) {
3470Sstevel@tonic-gate 		proc_t *p = pidp->pid_pglink;
3480Sstevel@tonic-gate 
3490Sstevel@tonic-gate 		if (zoneid == ALL_ZONES || pgid == 0 || p == NULL ||
3500Sstevel@tonic-gate 		    p->p_zone->zone_id == zoneid)
3510Sstevel@tonic-gate 			return (p);
3520Sstevel@tonic-gate 	}
3530Sstevel@tonic-gate 	return (NULL);
3540Sstevel@tonic-gate }
3550Sstevel@tonic-gate 
3560Sstevel@tonic-gate /*
3570Sstevel@tonic-gate  * return the head of the list of processes whose process group ID is 'pgid',
3580Sstevel@tonic-gate  * or NULL, if no such process group
3590Sstevel@tonic-gate  */
3600Sstevel@tonic-gate proc_t *
3610Sstevel@tonic-gate pgfind(pid_t pgid)
3620Sstevel@tonic-gate {
3630Sstevel@tonic-gate 	zoneid_t zoneid;
3640Sstevel@tonic-gate 
3650Sstevel@tonic-gate 	if (INGLOBALZONE(curproc))
3660Sstevel@tonic-gate 		zoneid = ALL_ZONES;
3670Sstevel@tonic-gate 	else
3680Sstevel@tonic-gate 		zoneid = getzoneid();
3690Sstevel@tonic-gate 	return (pgfind_zone(pgid, zoneid));
3700Sstevel@tonic-gate }
3710Sstevel@tonic-gate 
3720Sstevel@tonic-gate /*
3730Sstevel@tonic-gate  * If pid exists, find its proc, acquire its p_lock and mark it P_PR_LOCK.
3740Sstevel@tonic-gate  * Returns the proc pointer on success, NULL on failure.  sprlock() is
3750Sstevel@tonic-gate  * really just a stripped-down version of pr_p_lock() to allow practive
3760Sstevel@tonic-gate  * walkers like dofusers() and dumpsys() to synchronize with /proc.
3770Sstevel@tonic-gate  */
3780Sstevel@tonic-gate proc_t *
3790Sstevel@tonic-gate sprlock_zone(pid_t pid, zoneid_t zoneid)
3800Sstevel@tonic-gate {
3810Sstevel@tonic-gate 	proc_t *p;
3820Sstevel@tonic-gate 	kmutex_t *mp;
3830Sstevel@tonic-gate 
3840Sstevel@tonic-gate 	for (;;) {
3850Sstevel@tonic-gate 		mutex_enter(&pidlock);
3860Sstevel@tonic-gate 		if ((p = prfind_zone(pid, zoneid)) == NULL) {
3870Sstevel@tonic-gate 			mutex_exit(&pidlock);
3880Sstevel@tonic-gate 			return (NULL);
3890Sstevel@tonic-gate 		}
3900Sstevel@tonic-gate 		/*
3910Sstevel@tonic-gate 		 * p_lock is persistent, but p itself is not -- it could
3920Sstevel@tonic-gate 		 * vanish during cv_wait().  Load p->p_lock now so we can
3930Sstevel@tonic-gate 		 * drop it after cv_wait() without referencing p.
3940Sstevel@tonic-gate 		 */
3950Sstevel@tonic-gate 		mp = &p->p_lock;
3960Sstevel@tonic-gate 		mutex_enter(mp);
3970Sstevel@tonic-gate 		mutex_exit(&pidlock);
3980Sstevel@tonic-gate 		/*
3990Sstevel@tonic-gate 		 * If the process is in some half-baked state, fail.
4000Sstevel@tonic-gate 		 */
4010Sstevel@tonic-gate 		if (p->p_stat == SZOMB || p->p_stat == SIDL ||
402390Sraf 		    (p->p_flag & (SEXITING | SEXITLWPS))) {
4030Sstevel@tonic-gate 			mutex_exit(mp);
4040Sstevel@tonic-gate 			return (NULL);
4050Sstevel@tonic-gate 		}
4060Sstevel@tonic-gate 		if (panicstr)
4070Sstevel@tonic-gate 			return (p);
4080Sstevel@tonic-gate 		if (!(p->p_proc_flag & P_PR_LOCK))
4090Sstevel@tonic-gate 			break;
4100Sstevel@tonic-gate 		cv_wait(&pr_pid_cv[p->p_slot], mp);
4110Sstevel@tonic-gate 		mutex_exit(mp);
4120Sstevel@tonic-gate 	}
4130Sstevel@tonic-gate 	p->p_proc_flag |= P_PR_LOCK;
4140Sstevel@tonic-gate 	THREAD_KPRI_REQUEST();
4150Sstevel@tonic-gate 	return (p);
4160Sstevel@tonic-gate }
4170Sstevel@tonic-gate 
4180Sstevel@tonic-gate proc_t *
4190Sstevel@tonic-gate sprlock(pid_t pid)
4200Sstevel@tonic-gate {
4210Sstevel@tonic-gate 	zoneid_t zoneid;
4220Sstevel@tonic-gate 
4230Sstevel@tonic-gate 	if (INGLOBALZONE(curproc))
4240Sstevel@tonic-gate 		zoneid = ALL_ZONES;
4250Sstevel@tonic-gate 	else
4260Sstevel@tonic-gate 		zoneid = getzoneid();
4270Sstevel@tonic-gate 	return (sprlock_zone(pid, zoneid));
4280Sstevel@tonic-gate }
4290Sstevel@tonic-gate 
4300Sstevel@tonic-gate void
4310Sstevel@tonic-gate sprlock_proc(proc_t *p)
4320Sstevel@tonic-gate {
4330Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&p->p_lock));
4340Sstevel@tonic-gate 
4350Sstevel@tonic-gate 	while (p->p_proc_flag & P_PR_LOCK) {
4360Sstevel@tonic-gate 		cv_wait(&pr_pid_cv[p->p_slot], &p->p_lock);
4370Sstevel@tonic-gate 	}
4380Sstevel@tonic-gate 
4390Sstevel@tonic-gate 	p->p_proc_flag |= P_PR_LOCK;
4400Sstevel@tonic-gate 	THREAD_KPRI_REQUEST();
4410Sstevel@tonic-gate }
4420Sstevel@tonic-gate 
4430Sstevel@tonic-gate void
4440Sstevel@tonic-gate sprunlock(proc_t *p)
4450Sstevel@tonic-gate {
4460Sstevel@tonic-gate 	if (panicstr) {
4470Sstevel@tonic-gate 		mutex_exit(&p->p_lock);
4480Sstevel@tonic-gate 		return;
4490Sstevel@tonic-gate 	}
4500Sstevel@tonic-gate 
4510Sstevel@tonic-gate 	ASSERT(p->p_proc_flag & P_PR_LOCK);
4520Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&p->p_lock));
4530Sstevel@tonic-gate 
4540Sstevel@tonic-gate 	cv_signal(&pr_pid_cv[p->p_slot]);
4550Sstevel@tonic-gate 	p->p_proc_flag &= ~P_PR_LOCK;
4560Sstevel@tonic-gate 	mutex_exit(&p->p_lock);
4570Sstevel@tonic-gate 	THREAD_KPRI_RELEASE();
4580Sstevel@tonic-gate }
4590Sstevel@tonic-gate 
4600Sstevel@tonic-gate void
4610Sstevel@tonic-gate pid_init(void)
4620Sstevel@tonic-gate {
4630Sstevel@tonic-gate 	int i;
4640Sstevel@tonic-gate 
4650Sstevel@tonic-gate 	pid_hashsz = 1 << highbit(v.v_proc / pid_hashlen);
4660Sstevel@tonic-gate 
4670Sstevel@tonic-gate 	pidhash = kmem_zalloc(sizeof (struct pid *) * pid_hashsz, KM_SLEEP);
4680Sstevel@tonic-gate 	procdir = kmem_alloc(sizeof (union procent) * v.v_proc, KM_SLEEP);
4690Sstevel@tonic-gate 	pr_pid_cv = kmem_zalloc(sizeof (kcondvar_t) * v.v_proc, KM_SLEEP);
4700Sstevel@tonic-gate 	proc_lock = kmem_zalloc(sizeof (struct plock) * v.v_proc, KM_SLEEP);
4710Sstevel@tonic-gate 
4720Sstevel@tonic-gate 	nproc = 1;
4730Sstevel@tonic-gate 	practive = proc_sched;
4740Sstevel@tonic-gate 	proc_sched->p_next = NULL;
4750Sstevel@tonic-gate 	procdir[0].pe_proc = proc_sched;
4760Sstevel@tonic-gate 
4770Sstevel@tonic-gate 	procentfree = &procdir[1];
4780Sstevel@tonic-gate 	for (i = 1; i < v.v_proc - 1; i++)
4790Sstevel@tonic-gate 		procdir[i].pe_next = &procdir[i+1];
4800Sstevel@tonic-gate 	procdir[i].pe_next = NULL;
4810Sstevel@tonic-gate 
4820Sstevel@tonic-gate 	HASHPID(0) = &pid0;
4830Sstevel@tonic-gate 
4840Sstevel@tonic-gate 	upcount_init();
4850Sstevel@tonic-gate }
4860Sstevel@tonic-gate 
4870Sstevel@tonic-gate proc_t *
4880Sstevel@tonic-gate pid_entry(int slot)
4890Sstevel@tonic-gate {
4900Sstevel@tonic-gate 	union procent *pep;
4910Sstevel@tonic-gate 	proc_t *prp;
4920Sstevel@tonic-gate 
4930Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pidlock));
4940Sstevel@tonic-gate 	ASSERT(slot >= 0 && slot < v.v_proc);
4950Sstevel@tonic-gate 
4960Sstevel@tonic-gate 	pep = procdir[slot].pe_next;
4970Sstevel@tonic-gate 	if (pep >= procdir && pep < &procdir[v.v_proc])
4980Sstevel@tonic-gate 		return (NULL);
4990Sstevel@tonic-gate 	prp = procdir[slot].pe_proc;
5000Sstevel@tonic-gate 	if (prp != 0 && prp->p_stat == SIDL)
5010Sstevel@tonic-gate 		return (NULL);
5020Sstevel@tonic-gate 	return (prp);
5030Sstevel@tonic-gate }
5040Sstevel@tonic-gate 
5050Sstevel@tonic-gate /*
5060Sstevel@tonic-gate  * Send the specified signal to all processes whose process group ID is
5070Sstevel@tonic-gate  * equal to 'pgid'
5080Sstevel@tonic-gate  */
5090Sstevel@tonic-gate 
5100Sstevel@tonic-gate void
5110Sstevel@tonic-gate signal(pid_t pgid, int sig)
5120Sstevel@tonic-gate {
5130Sstevel@tonic-gate 	struct pid *pidp;
5140Sstevel@tonic-gate 	proc_t *prp;
5150Sstevel@tonic-gate 
5160Sstevel@tonic-gate 	mutex_enter(&pidlock);
5170Sstevel@tonic-gate 	mutex_enter(&pidlinklock);
5180Sstevel@tonic-gate 	if (pgid == 0 || (pidp = pid_lookup(pgid)) == NULL) {
5190Sstevel@tonic-gate 		mutex_exit(&pidlinklock);
5200Sstevel@tonic-gate 		mutex_exit(&pidlock);
5210Sstevel@tonic-gate 		return;
5220Sstevel@tonic-gate 	}
5230Sstevel@tonic-gate 	mutex_exit(&pidlinklock);
5240Sstevel@tonic-gate 	for (prp = pidp->pid_pglink; prp; prp = prp->p_pglink) {
5250Sstevel@tonic-gate 		mutex_enter(&prp->p_lock);
5260Sstevel@tonic-gate 		sigtoproc(prp, NULL, sig);
5270Sstevel@tonic-gate 		mutex_exit(&prp->p_lock);
5280Sstevel@tonic-gate 	}
5290Sstevel@tonic-gate 	mutex_exit(&pidlock);
5300Sstevel@tonic-gate }
5310Sstevel@tonic-gate 
5320Sstevel@tonic-gate /*
5330Sstevel@tonic-gate  * Send the specified signal to the specified process
5340Sstevel@tonic-gate  */
5350Sstevel@tonic-gate 
5360Sstevel@tonic-gate void
5370Sstevel@tonic-gate prsignal(struct pid *pidp, int sig)
5380Sstevel@tonic-gate {
5390Sstevel@tonic-gate 	if (!(pidp->pid_prinactive))
5400Sstevel@tonic-gate 		psignal(procdir[pidp->pid_prslot].pe_proc, sig);
5410Sstevel@tonic-gate }
5420Sstevel@tonic-gate 
5430Sstevel@tonic-gate #include <sys/sunddi.h>
5440Sstevel@tonic-gate 
5450Sstevel@tonic-gate /*
5460Sstevel@tonic-gate  * DDI/DKI interfaces for drivers to send signals to processes
5470Sstevel@tonic-gate  */
5480Sstevel@tonic-gate 
5490Sstevel@tonic-gate /*
5500Sstevel@tonic-gate  * obtain an opaque reference to a process for signaling
5510Sstevel@tonic-gate  */
5520Sstevel@tonic-gate void *
5530Sstevel@tonic-gate proc_ref(void)
5540Sstevel@tonic-gate {
5550Sstevel@tonic-gate 	struct pid *pidp;
5560Sstevel@tonic-gate 
5570Sstevel@tonic-gate 	mutex_enter(&pidlock);
5580Sstevel@tonic-gate 	pidp = curproc->p_pidp;
5590Sstevel@tonic-gate 	PID_HOLD(pidp);
5600Sstevel@tonic-gate 	mutex_exit(&pidlock);
5610Sstevel@tonic-gate 
5620Sstevel@tonic-gate 	return (pidp);
5630Sstevel@tonic-gate }
5640Sstevel@tonic-gate 
5650Sstevel@tonic-gate /*
5660Sstevel@tonic-gate  * release a reference to a process
5670Sstevel@tonic-gate  * - a process can exit even if a driver has a reference to it
5680Sstevel@tonic-gate  * - one proc_unref for every proc_ref
5690Sstevel@tonic-gate  */
5700Sstevel@tonic-gate void
5710Sstevel@tonic-gate proc_unref(void *pref)
5720Sstevel@tonic-gate {
5730Sstevel@tonic-gate 	mutex_enter(&pidlock);
5740Sstevel@tonic-gate 	PID_RELE((struct pid *)pref);
5750Sstevel@tonic-gate 	mutex_exit(&pidlock);
5760Sstevel@tonic-gate }
5770Sstevel@tonic-gate 
5780Sstevel@tonic-gate /*
5790Sstevel@tonic-gate  * send a signal to a process
5800Sstevel@tonic-gate  *
5810Sstevel@tonic-gate  * - send the process the signal
5820Sstevel@tonic-gate  * - if the process went away, return a -1
5830Sstevel@tonic-gate  * - if the process is still there return 0
5840Sstevel@tonic-gate  */
5850Sstevel@tonic-gate int
5860Sstevel@tonic-gate proc_signal(void *pref, int sig)
5870Sstevel@tonic-gate {
5880Sstevel@tonic-gate 	struct pid *pidp = pref;
5890Sstevel@tonic-gate 
5900Sstevel@tonic-gate 	prsignal(pidp, sig);
5910Sstevel@tonic-gate 	return (pidp->pid_prinactive ? -1 : 0);
5920Sstevel@tonic-gate }
5930Sstevel@tonic-gate 
5940Sstevel@tonic-gate 
5950Sstevel@tonic-gate static struct upcount	**upc_hash;	/* a boot time allocated array */
5960Sstevel@tonic-gate static ulong_t		upc_hashmask;
5970Sstevel@tonic-gate #define	UPC_HASH(x, y)	((ulong_t)(x ^ y) & upc_hashmask)
5980Sstevel@tonic-gate 
5990Sstevel@tonic-gate /*
6000Sstevel@tonic-gate  * Get us off the ground.  Called once at boot.
6010Sstevel@tonic-gate  */
6020Sstevel@tonic-gate void
6030Sstevel@tonic-gate upcount_init(void)
6040Sstevel@tonic-gate {
6050Sstevel@tonic-gate 	ulong_t	upc_hashsize;
6060Sstevel@tonic-gate 
6070Sstevel@tonic-gate 	/*
6080Sstevel@tonic-gate 	 * An entry per MB of memory is our current guess
6090Sstevel@tonic-gate 	 */
6100Sstevel@tonic-gate 	/*
6110Sstevel@tonic-gate 	 * 2^20 is a meg, so shifting right by 20 - PAGESHIFT
6120Sstevel@tonic-gate 	 * converts pages to megs (without overflowing a u_int
6130Sstevel@tonic-gate 	 * if you have more than 4G of memory, like ptob(physmem)/1M
6140Sstevel@tonic-gate 	 * would).
6150Sstevel@tonic-gate 	 */
6160Sstevel@tonic-gate 	upc_hashsize = (1 << highbit(physmem >> (20 - PAGESHIFT)));
6170Sstevel@tonic-gate 	upc_hashmask = upc_hashsize - 1;
6180Sstevel@tonic-gate 	upc_hash = kmem_zalloc(upc_hashsize * sizeof (struct upcount *),
6190Sstevel@tonic-gate 	    KM_SLEEP);
6200Sstevel@tonic-gate }
6210Sstevel@tonic-gate 
6220Sstevel@tonic-gate /*
6230Sstevel@tonic-gate  * Increment the number of processes associated with a given uid and zoneid.
6240Sstevel@tonic-gate  */
6250Sstevel@tonic-gate void
6260Sstevel@tonic-gate upcount_inc(uid_t uid, zoneid_t zoneid)
6270Sstevel@tonic-gate {
6280Sstevel@tonic-gate 	struct upcount	**upc, **hupc;
6290Sstevel@tonic-gate 	struct upcount	*new;
6300Sstevel@tonic-gate 
6310Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pidlock));
6320Sstevel@tonic-gate 	new = NULL;
6330Sstevel@tonic-gate 	hupc = &upc_hash[UPC_HASH(uid, zoneid)];
6340Sstevel@tonic-gate top:
6350Sstevel@tonic-gate 	upc = hupc;
6360Sstevel@tonic-gate 	while ((*upc) != NULL) {
6370Sstevel@tonic-gate 		if ((*upc)->up_uid == uid && (*upc)->up_zoneid == zoneid) {
6380Sstevel@tonic-gate 			(*upc)->up_count++;
6390Sstevel@tonic-gate 			if (new) {
6400Sstevel@tonic-gate 				/*
6410Sstevel@tonic-gate 				 * did not need `new' afterall.
6420Sstevel@tonic-gate 				 */
6430Sstevel@tonic-gate 				kmem_free(new, sizeof (*new));
6440Sstevel@tonic-gate 			}
6450Sstevel@tonic-gate 			return;
6460Sstevel@tonic-gate 		}
6470Sstevel@tonic-gate 		upc = &(*upc)->up_next;
6480Sstevel@tonic-gate 	}
6490Sstevel@tonic-gate 
6500Sstevel@tonic-gate 	/*
6510Sstevel@tonic-gate 	 * There is no entry for this <uid,zoneid> pair.
6520Sstevel@tonic-gate 	 * Allocate one.  If we have to drop pidlock, check
6530Sstevel@tonic-gate 	 * again.
6540Sstevel@tonic-gate 	 */
6550Sstevel@tonic-gate 	if (new == NULL) {
6560Sstevel@tonic-gate 		new = (struct upcount *)kmem_alloc(sizeof (*new), KM_NOSLEEP);
6570Sstevel@tonic-gate 		if (new == NULL) {
6580Sstevel@tonic-gate 			mutex_exit(&pidlock);
6590Sstevel@tonic-gate 			new = (struct upcount *)kmem_alloc(sizeof (*new),
6600Sstevel@tonic-gate 			    KM_SLEEP);
6610Sstevel@tonic-gate 			mutex_enter(&pidlock);
6620Sstevel@tonic-gate 			goto top;
6630Sstevel@tonic-gate 		}
6640Sstevel@tonic-gate 	}
6650Sstevel@tonic-gate 
6660Sstevel@tonic-gate 
6670Sstevel@tonic-gate 	/*
6680Sstevel@tonic-gate 	 * On the assumption that a new user is going to do some
6690Sstevel@tonic-gate 	 * more forks, put the new upcount structure on the front.
6700Sstevel@tonic-gate 	 */
6710Sstevel@tonic-gate 	upc = hupc;
6720Sstevel@tonic-gate 
6730Sstevel@tonic-gate 	new->up_uid = uid;
6740Sstevel@tonic-gate 	new->up_zoneid = zoneid;
6750Sstevel@tonic-gate 	new->up_count = 1;
6760Sstevel@tonic-gate 	new->up_next = *upc;
6770Sstevel@tonic-gate 
6780Sstevel@tonic-gate 	*upc = new;
6790Sstevel@tonic-gate }
6800Sstevel@tonic-gate 
6810Sstevel@tonic-gate /*
6820Sstevel@tonic-gate  * Decrement the number of processes a given uid and zoneid has.
6830Sstevel@tonic-gate  */
6840Sstevel@tonic-gate void
6850Sstevel@tonic-gate upcount_dec(uid_t uid, zoneid_t zoneid)
6860Sstevel@tonic-gate {
6870Sstevel@tonic-gate 	struct	upcount **upc;
6880Sstevel@tonic-gate 	struct	upcount *done;
6890Sstevel@tonic-gate 
6900Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pidlock));
6910Sstevel@tonic-gate 
6920Sstevel@tonic-gate 	upc = &upc_hash[UPC_HASH(uid, zoneid)];
6930Sstevel@tonic-gate 	while ((*upc) != NULL) {
6940Sstevel@tonic-gate 		if ((*upc)->up_uid == uid && (*upc)->up_zoneid == zoneid) {
6950Sstevel@tonic-gate 			(*upc)->up_count--;
6960Sstevel@tonic-gate 			if ((*upc)->up_count == 0) {
6970Sstevel@tonic-gate 				done = *upc;
6980Sstevel@tonic-gate 				*upc = (*upc)->up_next;
6990Sstevel@tonic-gate 				kmem_free(done, sizeof (*done));
7000Sstevel@tonic-gate 			}
7010Sstevel@tonic-gate 			return;
7020Sstevel@tonic-gate 		}
7030Sstevel@tonic-gate 		upc = &(*upc)->up_next;
7040Sstevel@tonic-gate 	}
7050Sstevel@tonic-gate 	cmn_err(CE_PANIC, "decr_upcount-off the end");
7060Sstevel@tonic-gate }
7070Sstevel@tonic-gate 
7080Sstevel@tonic-gate /*
7090Sstevel@tonic-gate  * Returns the number of processes a uid has.
7100Sstevel@tonic-gate  * Non-existent uid's are assumed to have no processes.
7110Sstevel@tonic-gate  */
7120Sstevel@tonic-gate int
7130Sstevel@tonic-gate upcount_get(uid_t uid, zoneid_t zoneid)
7140Sstevel@tonic-gate {
7150Sstevel@tonic-gate 	struct	upcount *upc;
7160Sstevel@tonic-gate 
7170Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pidlock));
7180Sstevel@tonic-gate 
7190Sstevel@tonic-gate 	upc = upc_hash[UPC_HASH(uid, zoneid)];
7200Sstevel@tonic-gate 	while (upc != NULL) {
7210Sstevel@tonic-gate 		if (upc->up_uid == uid && upc->up_zoneid == zoneid) {
7220Sstevel@tonic-gate 			return (upc->up_count);
7230Sstevel@tonic-gate 		}
7240Sstevel@tonic-gate 		upc = upc->up_next;
7250Sstevel@tonic-gate 	}
7260Sstevel@tonic-gate 	return (0);
7270Sstevel@tonic-gate }
728