xref: /onnv-gate/usr/src/uts/common/os/pid.c (revision 2712:f74a135872bc)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*2712Snn35248  * Common Development and Distribution License (the "License").
6*2712Snn35248  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
21390Sraf 
220Sstevel@tonic-gate /*
23*2712Snn35248  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate  * Use is subject to license terms.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
280Sstevel@tonic-gate /*	  All Rights Reserved  	*/
290Sstevel@tonic-gate 
300Sstevel@tonic-gate 
310Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
320Sstevel@tonic-gate 
330Sstevel@tonic-gate #include <sys/types.h>
340Sstevel@tonic-gate #include <sys/param.h>
350Sstevel@tonic-gate #include <sys/sysmacros.h>
360Sstevel@tonic-gate #include <sys/proc.h>
370Sstevel@tonic-gate #include <sys/kmem.h>
380Sstevel@tonic-gate #include <sys/tuneable.h>
390Sstevel@tonic-gate #include <sys/var.h>
400Sstevel@tonic-gate #include <sys/cred.h>
410Sstevel@tonic-gate #include <sys/systm.h>
420Sstevel@tonic-gate #include <sys/prsystm.h>
430Sstevel@tonic-gate #include <sys/vnode.h>
440Sstevel@tonic-gate #include <sys/session.h>
450Sstevel@tonic-gate #include <sys/cpuvar.h>
460Sstevel@tonic-gate #include <sys/cmn_err.h>
470Sstevel@tonic-gate #include <sys/bitmap.h>
480Sstevel@tonic-gate #include <sys/debug.h>
490Sstevel@tonic-gate #include <c2/audit.h>
500Sstevel@tonic-gate #include <sys/zone.h>
510Sstevel@tonic-gate 
520Sstevel@tonic-gate /* directory entries for /proc */
530Sstevel@tonic-gate union procent {
540Sstevel@tonic-gate 	proc_t *pe_proc;
550Sstevel@tonic-gate 	union procent *pe_next;
560Sstevel@tonic-gate };
570Sstevel@tonic-gate 
580Sstevel@tonic-gate struct pid pid0 = {
590Sstevel@tonic-gate 	0,		/* pid_prinactive */
600Sstevel@tonic-gate 	1,		/* pid_pgorphaned */
610Sstevel@tonic-gate 	0,		/* pid_padding	*/
620Sstevel@tonic-gate 	0,		/* pid_prslot	*/
630Sstevel@tonic-gate 	0,		/* pid_id	*/
640Sstevel@tonic-gate 	NULL,		/* pid_pglink	*/
65749Ssusans 	NULL,		/* pid_pgtail	*/
660Sstevel@tonic-gate 	NULL,		/* pid_link	*/
670Sstevel@tonic-gate 	3		/* pid_ref	*/
680Sstevel@tonic-gate };
690Sstevel@tonic-gate 
700Sstevel@tonic-gate static int pid_hashlen = 4;	/* desired average hash chain length */
710Sstevel@tonic-gate static int pid_hashsz;		/* number of buckets in the hash table */
720Sstevel@tonic-gate 
730Sstevel@tonic-gate #define	HASHPID(pid)	(pidhash[((pid)&(pid_hashsz-1))])
740Sstevel@tonic-gate 
750Sstevel@tonic-gate extern uint_t nproc;
760Sstevel@tonic-gate extern struct kmem_cache *process_cache;
770Sstevel@tonic-gate static void	upcount_init(void);
780Sstevel@tonic-gate 
790Sstevel@tonic-gate kmutex_t	pidlock;	/* global process lock */
800Sstevel@tonic-gate kmutex_t	pr_pidlock;	/* /proc global process lock */
810Sstevel@tonic-gate kcondvar_t	*pr_pid_cv;	/* for /proc, one per process slot */
820Sstevel@tonic-gate struct plock	*proc_lock;	/* persistent array of p_lock's */
830Sstevel@tonic-gate 
840Sstevel@tonic-gate /*
850Sstevel@tonic-gate  * See the comment above pid_getlockslot() for a detailed explanation of this
860Sstevel@tonic-gate  * constant.  Note that a PLOCK_SHIFT of 3 implies 64-byte coherence
870Sstevel@tonic-gate  * granularity; if the coherence granularity is ever changed, this constant
880Sstevel@tonic-gate  * should be modified to reflect the change to minimize proc_lock false
890Sstevel@tonic-gate  * sharing (correctness, however, is guaranteed regardless of the coherence
900Sstevel@tonic-gate  * granularity).
910Sstevel@tonic-gate  */
920Sstevel@tonic-gate #define	PLOCK_SHIFT	3
930Sstevel@tonic-gate 
940Sstevel@tonic-gate static kmutex_t	pidlinklock;
950Sstevel@tonic-gate static struct pid **pidhash;
960Sstevel@tonic-gate static pid_t minpid;
970Sstevel@tonic-gate static pid_t mpid;
980Sstevel@tonic-gate static union procent *procdir;
990Sstevel@tonic-gate static union procent *procentfree;
1000Sstevel@tonic-gate 
1010Sstevel@tonic-gate static struct pid *
1020Sstevel@tonic-gate pid_lookup(pid_t pid)
1030Sstevel@tonic-gate {
1040Sstevel@tonic-gate 	struct pid *pidp;
1050Sstevel@tonic-gate 
1060Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pidlinklock));
1070Sstevel@tonic-gate 
1080Sstevel@tonic-gate 	for (pidp = HASHPID(pid); pidp; pidp = pidp->pid_link) {
1090Sstevel@tonic-gate 		if (pidp->pid_id == pid) {
1100Sstevel@tonic-gate 			ASSERT(pidp->pid_ref > 0);
1110Sstevel@tonic-gate 			break;
1120Sstevel@tonic-gate 		}
1130Sstevel@tonic-gate 	}
1140Sstevel@tonic-gate 	return (pidp);
1150Sstevel@tonic-gate }
1160Sstevel@tonic-gate 
117*2712Snn35248 struct pid *
118*2712Snn35248 pid_find(pid_t pid)
119*2712Snn35248 {
120*2712Snn35248 	struct pid *pidp;
121*2712Snn35248 
122*2712Snn35248 	mutex_enter(&pidlinklock);
123*2712Snn35248 	pidp = pid_lookup(pid);
124*2712Snn35248 	mutex_exit(&pidlinklock);
125*2712Snn35248 
126*2712Snn35248 	return (pidp);
127*2712Snn35248 }
128*2712Snn35248 
1290Sstevel@tonic-gate void
1300Sstevel@tonic-gate pid_setmin(void)
1310Sstevel@tonic-gate {
1320Sstevel@tonic-gate 	if (jump_pid && jump_pid > mpid)
1330Sstevel@tonic-gate 		minpid = mpid = jump_pid;
1340Sstevel@tonic-gate 	else
1350Sstevel@tonic-gate 		minpid = mpid + 1;
1360Sstevel@tonic-gate }
1370Sstevel@tonic-gate 
1380Sstevel@tonic-gate /*
1390Sstevel@tonic-gate  * When prslots are simply used as an index to determine a process' p_lock,
1400Sstevel@tonic-gate  * adjacent prslots share adjacent p_locks.  On machines where the size
1410Sstevel@tonic-gate  * of a mutex is smaller than that of a cache line (which, as of this writing,
1420Sstevel@tonic-gate  * is true for all machines on which Solaris runs), this can potentially
1430Sstevel@tonic-gate  * induce false sharing.  The standard solution for false sharing is to pad
1440Sstevel@tonic-gate  * out one's data structures (in this case, struct plock).  However,
1450Sstevel@tonic-gate  * given the size and (generally) sparse use of the proc_lock array, this
1460Sstevel@tonic-gate  * is suboptimal.  We therefore stride through the proc_lock array with
1470Sstevel@tonic-gate  * a stride of PLOCK_SHIFT.  PLOCK_SHIFT should be defined as:
1480Sstevel@tonic-gate  *
1490Sstevel@tonic-gate  *   log_2 (coherence_granularity / sizeof (kmutex_t))
1500Sstevel@tonic-gate  *
1510Sstevel@tonic-gate  * Under this scheme, false sharing is still possible -- but only when
1520Sstevel@tonic-gate  * the number of active processes is very large.  Note that the one-to-one
1530Sstevel@tonic-gate  * mapping between prslots and lockslots is maintained.
1540Sstevel@tonic-gate  */
1550Sstevel@tonic-gate static int
1560Sstevel@tonic-gate pid_getlockslot(int prslot)
1570Sstevel@tonic-gate {
1580Sstevel@tonic-gate 	int even = (v.v_proc >> PLOCK_SHIFT) << PLOCK_SHIFT;
1590Sstevel@tonic-gate 	int perlap = even >> PLOCK_SHIFT;
1600Sstevel@tonic-gate 
1610Sstevel@tonic-gate 	if (prslot >= even)
1620Sstevel@tonic-gate 		return (prslot);
1630Sstevel@tonic-gate 
1640Sstevel@tonic-gate 	return (((prslot % perlap) << PLOCK_SHIFT) + (prslot / perlap));
1650Sstevel@tonic-gate }
1660Sstevel@tonic-gate 
1670Sstevel@tonic-gate /*
168*2712Snn35248  * This function allocates a pid structure, a free pid, and optionally a
169*2712Snn35248  * slot in the proc table for it.
1700Sstevel@tonic-gate  *
171*2712Snn35248  * pid_allocate() returns the new pid on success, -1 on failure.
1720Sstevel@tonic-gate  */
1730Sstevel@tonic-gate pid_t
174*2712Snn35248 pid_allocate(proc_t *prp, int flags)
1750Sstevel@tonic-gate {
1760Sstevel@tonic-gate 	struct pid *pidp;
1770Sstevel@tonic-gate 	union procent *pep;
1780Sstevel@tonic-gate 	pid_t newpid, startpid;
1790Sstevel@tonic-gate 
1800Sstevel@tonic-gate 	pidp = kmem_zalloc(sizeof (struct pid), KM_SLEEP);
1810Sstevel@tonic-gate 
1820Sstevel@tonic-gate 	mutex_enter(&pidlinklock);
183*2712Snn35248 	if ((flags & PID_ALLOC_PROC) && (pep = procentfree) == NULL) {
1840Sstevel@tonic-gate 		/*
1850Sstevel@tonic-gate 		 * ran out of /proc directory entries
1860Sstevel@tonic-gate 		 */
1870Sstevel@tonic-gate 		goto failed;
1880Sstevel@tonic-gate 	}
1890Sstevel@tonic-gate 
1900Sstevel@tonic-gate 	/*
1910Sstevel@tonic-gate 	 * Allocate a pid
1920Sstevel@tonic-gate 	 */
1930Sstevel@tonic-gate 	startpid = mpid;
1940Sstevel@tonic-gate 	do  {
1950Sstevel@tonic-gate 		newpid = (++mpid == maxpid ? mpid = minpid : mpid);
1960Sstevel@tonic-gate 	} while (pid_lookup(newpid) && newpid != startpid);
1970Sstevel@tonic-gate 
1980Sstevel@tonic-gate 	if (newpid == startpid && pid_lookup(newpid)) {
1990Sstevel@tonic-gate 		/* couldn't find a free pid */
2000Sstevel@tonic-gate 		goto failed;
2010Sstevel@tonic-gate 	}
2020Sstevel@tonic-gate 
2030Sstevel@tonic-gate 	/*
2040Sstevel@tonic-gate 	 * Put pid into the pid hash table.
2050Sstevel@tonic-gate 	 */
2060Sstevel@tonic-gate 	pidp->pid_link = HASHPID(newpid);
2070Sstevel@tonic-gate 	HASHPID(newpid) = pidp;
2080Sstevel@tonic-gate 	pidp->pid_ref = 1;
2090Sstevel@tonic-gate 	pidp->pid_id = newpid;
210*2712Snn35248 
211*2712Snn35248 	if (flags & PID_ALLOC_PROC) {
212*2712Snn35248 		procentfree = pep->pe_next;
213*2712Snn35248 		pidp->pid_prslot = pep - procdir;
214*2712Snn35248 		pep->pe_proc = prp;
215*2712Snn35248 		prp->p_pidp = pidp;
216*2712Snn35248 		prp->p_lockp = &proc_lock[pid_getlockslot(pidp->pid_prslot)];
217*2712Snn35248 	} else {
218*2712Snn35248 		pidp->pid_prslot = 0;
219*2712Snn35248 	}
220*2712Snn35248 
2210Sstevel@tonic-gate 	mutex_exit(&pidlinklock);
2220Sstevel@tonic-gate 
2230Sstevel@tonic-gate 	return (newpid);
2240Sstevel@tonic-gate 
2250Sstevel@tonic-gate failed:
2260Sstevel@tonic-gate 	mutex_exit(&pidlinklock);
2270Sstevel@tonic-gate 	kmem_free(pidp, sizeof (struct pid));
2280Sstevel@tonic-gate 	return (-1);
2290Sstevel@tonic-gate }
2300Sstevel@tonic-gate 
2310Sstevel@tonic-gate /*
2320Sstevel@tonic-gate  * decrement the reference count for pid
2330Sstevel@tonic-gate  */
2340Sstevel@tonic-gate int
2350Sstevel@tonic-gate pid_rele(struct pid *pidp)
2360Sstevel@tonic-gate {
2370Sstevel@tonic-gate 	struct pid **pidpp;
2380Sstevel@tonic-gate 
2390Sstevel@tonic-gate 	mutex_enter(&pidlinklock);
2400Sstevel@tonic-gate 	ASSERT(pidp != &pid0);
2410Sstevel@tonic-gate 
2420Sstevel@tonic-gate 	pidpp = &HASHPID(pidp->pid_id);
2430Sstevel@tonic-gate 	for (;;) {
2440Sstevel@tonic-gate 		ASSERT(*pidpp != NULL);
2450Sstevel@tonic-gate 		if (*pidpp == pidp)
2460Sstevel@tonic-gate 			break;
2470Sstevel@tonic-gate 		pidpp = &(*pidpp)->pid_link;
2480Sstevel@tonic-gate 	}
2490Sstevel@tonic-gate 
2500Sstevel@tonic-gate 	*pidpp = pidp->pid_link;
2510Sstevel@tonic-gate 	mutex_exit(&pidlinklock);
2520Sstevel@tonic-gate 
2530Sstevel@tonic-gate 	kmem_free(pidp, sizeof (*pidp));
2540Sstevel@tonic-gate 	return (0);
2550Sstevel@tonic-gate }
2560Sstevel@tonic-gate 
2570Sstevel@tonic-gate void
2580Sstevel@tonic-gate proc_entry_free(struct pid *pidp)
2590Sstevel@tonic-gate {
2600Sstevel@tonic-gate 	mutex_enter(&pidlinklock);
2610Sstevel@tonic-gate 	pidp->pid_prinactive = 1;
2620Sstevel@tonic-gate 	procdir[pidp->pid_prslot].pe_next = procentfree;
2630Sstevel@tonic-gate 	procentfree = &procdir[pidp->pid_prslot];
2640Sstevel@tonic-gate 	mutex_exit(&pidlinklock);
2650Sstevel@tonic-gate }
2660Sstevel@tonic-gate 
2670Sstevel@tonic-gate void
2680Sstevel@tonic-gate pid_exit(proc_t *prp)
2690Sstevel@tonic-gate {
2700Sstevel@tonic-gate 	struct pid *pidp;
2710Sstevel@tonic-gate 
2720Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pidlock));
2730Sstevel@tonic-gate 
2740Sstevel@tonic-gate 	/*
2750Sstevel@tonic-gate 	 * Exit process group.  If it is NULL, it's because fork failed
2760Sstevel@tonic-gate 	 * before calling pgjoin().
2770Sstevel@tonic-gate 	 */
2780Sstevel@tonic-gate 	ASSERT(prp->p_pgidp != NULL || prp->p_stat == SIDL);
2790Sstevel@tonic-gate 	if (prp->p_pgidp != NULL)
2800Sstevel@tonic-gate 		pgexit(prp);
2810Sstevel@tonic-gate 
282*2712Snn35248 	sess_rele(prp->p_sessp, B_TRUE);
2830Sstevel@tonic-gate 
2840Sstevel@tonic-gate 	pidp = prp->p_pidp;
2850Sstevel@tonic-gate 
2860Sstevel@tonic-gate 	proc_entry_free(pidp);
2870Sstevel@tonic-gate 
2880Sstevel@tonic-gate #ifdef C2_AUDIT
2890Sstevel@tonic-gate 	if (audit_active)
2900Sstevel@tonic-gate 		audit_pfree(prp);
2910Sstevel@tonic-gate #endif
2920Sstevel@tonic-gate 
2930Sstevel@tonic-gate 	if (practive == prp) {
2940Sstevel@tonic-gate 		practive = prp->p_next;
2950Sstevel@tonic-gate 	}
2960Sstevel@tonic-gate 
2970Sstevel@tonic-gate 	if (prp->p_next) {
2980Sstevel@tonic-gate 		prp->p_next->p_prev = prp->p_prev;
2990Sstevel@tonic-gate 	}
3000Sstevel@tonic-gate 	if (prp->p_prev) {
3010Sstevel@tonic-gate 		prp->p_prev->p_next = prp->p_next;
3020Sstevel@tonic-gate 	}
3030Sstevel@tonic-gate 
3040Sstevel@tonic-gate 	PID_RELE(pidp);
3050Sstevel@tonic-gate 
3060Sstevel@tonic-gate 	mutex_destroy(&prp->p_crlock);
3070Sstevel@tonic-gate 	kmem_cache_free(process_cache, prp);
3080Sstevel@tonic-gate 	nproc--;
3090Sstevel@tonic-gate }
3100Sstevel@tonic-gate 
3110Sstevel@tonic-gate /*
3120Sstevel@tonic-gate  * Find a process visible from the specified zone given its process ID.
3130Sstevel@tonic-gate  */
3140Sstevel@tonic-gate proc_t *
3150Sstevel@tonic-gate prfind_zone(pid_t pid, zoneid_t zoneid)
3160Sstevel@tonic-gate {
3170Sstevel@tonic-gate 	struct pid *pidp;
3180Sstevel@tonic-gate 	proc_t *p;
3190Sstevel@tonic-gate 
3200Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pidlock));
3210Sstevel@tonic-gate 
3220Sstevel@tonic-gate 	mutex_enter(&pidlinklock);
3230Sstevel@tonic-gate 	pidp = pid_lookup(pid);
3240Sstevel@tonic-gate 	mutex_exit(&pidlinklock);
3250Sstevel@tonic-gate 	if (pidp != NULL && pidp->pid_prinactive == 0) {
3260Sstevel@tonic-gate 		p = procdir[pidp->pid_prslot].pe_proc;
3270Sstevel@tonic-gate 		if (zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid)
3280Sstevel@tonic-gate 			return (p);
3290Sstevel@tonic-gate 	}
3300Sstevel@tonic-gate 	return (NULL);
3310Sstevel@tonic-gate }
3320Sstevel@tonic-gate 
3330Sstevel@tonic-gate /*
3340Sstevel@tonic-gate  * Find a process given its process ID.  This obeys zone restrictions,
3350Sstevel@tonic-gate  * so if the caller is in a non-global zone it won't find processes
3360Sstevel@tonic-gate  * associated with other zones.  Use prfind_zone(pid, ALL_ZONES) to
3370Sstevel@tonic-gate  * bypass this restriction.
3380Sstevel@tonic-gate  */
3390Sstevel@tonic-gate proc_t *
3400Sstevel@tonic-gate prfind(pid_t pid)
3410Sstevel@tonic-gate {
3420Sstevel@tonic-gate 	zoneid_t zoneid;
3430Sstevel@tonic-gate 
3440Sstevel@tonic-gate 	if (INGLOBALZONE(curproc))
3450Sstevel@tonic-gate 		zoneid = ALL_ZONES;
3460Sstevel@tonic-gate 	else
3470Sstevel@tonic-gate 		zoneid = getzoneid();
3480Sstevel@tonic-gate 	return (prfind_zone(pid, zoneid));
3490Sstevel@tonic-gate }
3500Sstevel@tonic-gate 
3510Sstevel@tonic-gate proc_t *
3520Sstevel@tonic-gate pgfind_zone(pid_t pgid, zoneid_t zoneid)
3530Sstevel@tonic-gate {
3540Sstevel@tonic-gate 	struct pid *pidp;
3550Sstevel@tonic-gate 
3560Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pidlock));
3570Sstevel@tonic-gate 
3580Sstevel@tonic-gate 	mutex_enter(&pidlinklock);
3590Sstevel@tonic-gate 	pidp = pid_lookup(pgid);
3600Sstevel@tonic-gate 	mutex_exit(&pidlinklock);
3610Sstevel@tonic-gate 	if (pidp != NULL) {
3620Sstevel@tonic-gate 		proc_t *p = pidp->pid_pglink;
3630Sstevel@tonic-gate 
3640Sstevel@tonic-gate 		if (zoneid == ALL_ZONES || pgid == 0 || p == NULL ||
3650Sstevel@tonic-gate 		    p->p_zone->zone_id == zoneid)
3660Sstevel@tonic-gate 			return (p);
3670Sstevel@tonic-gate 	}
3680Sstevel@tonic-gate 	return (NULL);
3690Sstevel@tonic-gate }
3700Sstevel@tonic-gate 
3710Sstevel@tonic-gate /*
3720Sstevel@tonic-gate  * return the head of the list of processes whose process group ID is 'pgid',
3730Sstevel@tonic-gate  * or NULL, if no such process group
3740Sstevel@tonic-gate  */
3750Sstevel@tonic-gate proc_t *
3760Sstevel@tonic-gate pgfind(pid_t pgid)
3770Sstevel@tonic-gate {
3780Sstevel@tonic-gate 	zoneid_t zoneid;
3790Sstevel@tonic-gate 
3800Sstevel@tonic-gate 	if (INGLOBALZONE(curproc))
3810Sstevel@tonic-gate 		zoneid = ALL_ZONES;
3820Sstevel@tonic-gate 	else
3830Sstevel@tonic-gate 		zoneid = getzoneid();
3840Sstevel@tonic-gate 	return (pgfind_zone(pgid, zoneid));
3850Sstevel@tonic-gate }
3860Sstevel@tonic-gate 
3870Sstevel@tonic-gate /*
3880Sstevel@tonic-gate  * If pid exists, find its proc, acquire its p_lock and mark it P_PR_LOCK.
3890Sstevel@tonic-gate  * Returns the proc pointer on success, NULL on failure.  sprlock() is
3900Sstevel@tonic-gate  * really just a stripped-down version of pr_p_lock() to allow practive
3910Sstevel@tonic-gate  * walkers like dofusers() and dumpsys() to synchronize with /proc.
3920Sstevel@tonic-gate  */
3930Sstevel@tonic-gate proc_t *
3940Sstevel@tonic-gate sprlock_zone(pid_t pid, zoneid_t zoneid)
3950Sstevel@tonic-gate {
3960Sstevel@tonic-gate 	proc_t *p;
3970Sstevel@tonic-gate 	kmutex_t *mp;
3980Sstevel@tonic-gate 
3990Sstevel@tonic-gate 	for (;;) {
4000Sstevel@tonic-gate 		mutex_enter(&pidlock);
4010Sstevel@tonic-gate 		if ((p = prfind_zone(pid, zoneid)) == NULL) {
4020Sstevel@tonic-gate 			mutex_exit(&pidlock);
4030Sstevel@tonic-gate 			return (NULL);
4040Sstevel@tonic-gate 		}
4050Sstevel@tonic-gate 		/*
4060Sstevel@tonic-gate 		 * p_lock is persistent, but p itself is not -- it could
4070Sstevel@tonic-gate 		 * vanish during cv_wait().  Load p->p_lock now so we can
4080Sstevel@tonic-gate 		 * drop it after cv_wait() without referencing p.
4090Sstevel@tonic-gate 		 */
4100Sstevel@tonic-gate 		mp = &p->p_lock;
4110Sstevel@tonic-gate 		mutex_enter(mp);
4120Sstevel@tonic-gate 		mutex_exit(&pidlock);
4130Sstevel@tonic-gate 		/*
4140Sstevel@tonic-gate 		 * If the process is in some half-baked state, fail.
4150Sstevel@tonic-gate 		 */
4160Sstevel@tonic-gate 		if (p->p_stat == SZOMB || p->p_stat == SIDL ||
417390Sraf 		    (p->p_flag & (SEXITING | SEXITLWPS))) {
4180Sstevel@tonic-gate 			mutex_exit(mp);
4190Sstevel@tonic-gate 			return (NULL);
4200Sstevel@tonic-gate 		}
4210Sstevel@tonic-gate 		if (panicstr)
4220Sstevel@tonic-gate 			return (p);
4230Sstevel@tonic-gate 		if (!(p->p_proc_flag & P_PR_LOCK))
4240Sstevel@tonic-gate 			break;
4250Sstevel@tonic-gate 		cv_wait(&pr_pid_cv[p->p_slot], mp);
4260Sstevel@tonic-gate 		mutex_exit(mp);
4270Sstevel@tonic-gate 	}
4280Sstevel@tonic-gate 	p->p_proc_flag |= P_PR_LOCK;
4290Sstevel@tonic-gate 	THREAD_KPRI_REQUEST();
4300Sstevel@tonic-gate 	return (p);
4310Sstevel@tonic-gate }
4320Sstevel@tonic-gate 
4330Sstevel@tonic-gate proc_t *
4340Sstevel@tonic-gate sprlock(pid_t pid)
4350Sstevel@tonic-gate {
4360Sstevel@tonic-gate 	zoneid_t zoneid;
4370Sstevel@tonic-gate 
4380Sstevel@tonic-gate 	if (INGLOBALZONE(curproc))
4390Sstevel@tonic-gate 		zoneid = ALL_ZONES;
4400Sstevel@tonic-gate 	else
4410Sstevel@tonic-gate 		zoneid = getzoneid();
4420Sstevel@tonic-gate 	return (sprlock_zone(pid, zoneid));
4430Sstevel@tonic-gate }
4440Sstevel@tonic-gate 
4450Sstevel@tonic-gate void
4460Sstevel@tonic-gate sprlock_proc(proc_t *p)
4470Sstevel@tonic-gate {
4480Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&p->p_lock));
4490Sstevel@tonic-gate 
4500Sstevel@tonic-gate 	while (p->p_proc_flag & P_PR_LOCK) {
4510Sstevel@tonic-gate 		cv_wait(&pr_pid_cv[p->p_slot], &p->p_lock);
4520Sstevel@tonic-gate 	}
4530Sstevel@tonic-gate 
4540Sstevel@tonic-gate 	p->p_proc_flag |= P_PR_LOCK;
4550Sstevel@tonic-gate 	THREAD_KPRI_REQUEST();
4560Sstevel@tonic-gate }
4570Sstevel@tonic-gate 
4580Sstevel@tonic-gate void
4590Sstevel@tonic-gate sprunlock(proc_t *p)
4600Sstevel@tonic-gate {
4610Sstevel@tonic-gate 	if (panicstr) {
4620Sstevel@tonic-gate 		mutex_exit(&p->p_lock);
4630Sstevel@tonic-gate 		return;
4640Sstevel@tonic-gate 	}
4650Sstevel@tonic-gate 
4660Sstevel@tonic-gate 	ASSERT(p->p_proc_flag & P_PR_LOCK);
4670Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&p->p_lock));
4680Sstevel@tonic-gate 
4690Sstevel@tonic-gate 	cv_signal(&pr_pid_cv[p->p_slot]);
4700Sstevel@tonic-gate 	p->p_proc_flag &= ~P_PR_LOCK;
4710Sstevel@tonic-gate 	mutex_exit(&p->p_lock);
4720Sstevel@tonic-gate 	THREAD_KPRI_RELEASE();
4730Sstevel@tonic-gate }
4740Sstevel@tonic-gate 
4750Sstevel@tonic-gate void
4760Sstevel@tonic-gate pid_init(void)
4770Sstevel@tonic-gate {
4780Sstevel@tonic-gate 	int i;
4790Sstevel@tonic-gate 
4800Sstevel@tonic-gate 	pid_hashsz = 1 << highbit(v.v_proc / pid_hashlen);
4810Sstevel@tonic-gate 
4820Sstevel@tonic-gate 	pidhash = kmem_zalloc(sizeof (struct pid *) * pid_hashsz, KM_SLEEP);
4830Sstevel@tonic-gate 	procdir = kmem_alloc(sizeof (union procent) * v.v_proc, KM_SLEEP);
4840Sstevel@tonic-gate 	pr_pid_cv = kmem_zalloc(sizeof (kcondvar_t) * v.v_proc, KM_SLEEP);
4850Sstevel@tonic-gate 	proc_lock = kmem_zalloc(sizeof (struct plock) * v.v_proc, KM_SLEEP);
4860Sstevel@tonic-gate 
4870Sstevel@tonic-gate 	nproc = 1;
4880Sstevel@tonic-gate 	practive = proc_sched;
4890Sstevel@tonic-gate 	proc_sched->p_next = NULL;
4900Sstevel@tonic-gate 	procdir[0].pe_proc = proc_sched;
4910Sstevel@tonic-gate 
4920Sstevel@tonic-gate 	procentfree = &procdir[1];
4930Sstevel@tonic-gate 	for (i = 1; i < v.v_proc - 1; i++)
4940Sstevel@tonic-gate 		procdir[i].pe_next = &procdir[i+1];
4950Sstevel@tonic-gate 	procdir[i].pe_next = NULL;
4960Sstevel@tonic-gate 
4970Sstevel@tonic-gate 	HASHPID(0) = &pid0;
4980Sstevel@tonic-gate 
4990Sstevel@tonic-gate 	upcount_init();
5000Sstevel@tonic-gate }
5010Sstevel@tonic-gate 
5020Sstevel@tonic-gate proc_t *
5030Sstevel@tonic-gate pid_entry(int slot)
5040Sstevel@tonic-gate {
5050Sstevel@tonic-gate 	union procent *pep;
5060Sstevel@tonic-gate 	proc_t *prp;
5070Sstevel@tonic-gate 
5080Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pidlock));
5090Sstevel@tonic-gate 	ASSERT(slot >= 0 && slot < v.v_proc);
5100Sstevel@tonic-gate 
5110Sstevel@tonic-gate 	pep = procdir[slot].pe_next;
5120Sstevel@tonic-gate 	if (pep >= procdir && pep < &procdir[v.v_proc])
5130Sstevel@tonic-gate 		return (NULL);
5140Sstevel@tonic-gate 	prp = procdir[slot].pe_proc;
5150Sstevel@tonic-gate 	if (prp != 0 && prp->p_stat == SIDL)
5160Sstevel@tonic-gate 		return (NULL);
5170Sstevel@tonic-gate 	return (prp);
5180Sstevel@tonic-gate }
5190Sstevel@tonic-gate 
5200Sstevel@tonic-gate /*
5210Sstevel@tonic-gate  * Send the specified signal to all processes whose process group ID is
5220Sstevel@tonic-gate  * equal to 'pgid'
5230Sstevel@tonic-gate  */
5240Sstevel@tonic-gate 
5250Sstevel@tonic-gate void
5260Sstevel@tonic-gate signal(pid_t pgid, int sig)
5270Sstevel@tonic-gate {
5280Sstevel@tonic-gate 	struct pid *pidp;
5290Sstevel@tonic-gate 	proc_t *prp;
5300Sstevel@tonic-gate 
5310Sstevel@tonic-gate 	mutex_enter(&pidlock);
5320Sstevel@tonic-gate 	mutex_enter(&pidlinklock);
5330Sstevel@tonic-gate 	if (pgid == 0 || (pidp = pid_lookup(pgid)) == NULL) {
5340Sstevel@tonic-gate 		mutex_exit(&pidlinklock);
5350Sstevel@tonic-gate 		mutex_exit(&pidlock);
5360Sstevel@tonic-gate 		return;
5370Sstevel@tonic-gate 	}
5380Sstevel@tonic-gate 	mutex_exit(&pidlinklock);
5390Sstevel@tonic-gate 	for (prp = pidp->pid_pglink; prp; prp = prp->p_pglink) {
5400Sstevel@tonic-gate 		mutex_enter(&prp->p_lock);
5410Sstevel@tonic-gate 		sigtoproc(prp, NULL, sig);
5420Sstevel@tonic-gate 		mutex_exit(&prp->p_lock);
5430Sstevel@tonic-gate 	}
5440Sstevel@tonic-gate 	mutex_exit(&pidlock);
5450Sstevel@tonic-gate }
5460Sstevel@tonic-gate 
5470Sstevel@tonic-gate /*
5480Sstevel@tonic-gate  * Send the specified signal to the specified process
5490Sstevel@tonic-gate  */
5500Sstevel@tonic-gate 
5510Sstevel@tonic-gate void
5520Sstevel@tonic-gate prsignal(struct pid *pidp, int sig)
5530Sstevel@tonic-gate {
5540Sstevel@tonic-gate 	if (!(pidp->pid_prinactive))
5550Sstevel@tonic-gate 		psignal(procdir[pidp->pid_prslot].pe_proc, sig);
5560Sstevel@tonic-gate }
5570Sstevel@tonic-gate 
5580Sstevel@tonic-gate #include <sys/sunddi.h>
5590Sstevel@tonic-gate 
5600Sstevel@tonic-gate /*
5610Sstevel@tonic-gate  * DDI/DKI interfaces for drivers to send signals to processes
5620Sstevel@tonic-gate  */
5630Sstevel@tonic-gate 
5640Sstevel@tonic-gate /*
5650Sstevel@tonic-gate  * obtain an opaque reference to a process for signaling
5660Sstevel@tonic-gate  */
5670Sstevel@tonic-gate void *
5680Sstevel@tonic-gate proc_ref(void)
5690Sstevel@tonic-gate {
5700Sstevel@tonic-gate 	struct pid *pidp;
5710Sstevel@tonic-gate 
5720Sstevel@tonic-gate 	mutex_enter(&pidlock);
5730Sstevel@tonic-gate 	pidp = curproc->p_pidp;
5740Sstevel@tonic-gate 	PID_HOLD(pidp);
5750Sstevel@tonic-gate 	mutex_exit(&pidlock);
5760Sstevel@tonic-gate 
5770Sstevel@tonic-gate 	return (pidp);
5780Sstevel@tonic-gate }
5790Sstevel@tonic-gate 
5800Sstevel@tonic-gate /*
5810Sstevel@tonic-gate  * release a reference to a process
5820Sstevel@tonic-gate  * - a process can exit even if a driver has a reference to it
5830Sstevel@tonic-gate  * - one proc_unref for every proc_ref
5840Sstevel@tonic-gate  */
5850Sstevel@tonic-gate void
5860Sstevel@tonic-gate proc_unref(void *pref)
5870Sstevel@tonic-gate {
5880Sstevel@tonic-gate 	mutex_enter(&pidlock);
5890Sstevel@tonic-gate 	PID_RELE((struct pid *)pref);
5900Sstevel@tonic-gate 	mutex_exit(&pidlock);
5910Sstevel@tonic-gate }
5920Sstevel@tonic-gate 
5930Sstevel@tonic-gate /*
5940Sstevel@tonic-gate  * send a signal to a process
5950Sstevel@tonic-gate  *
5960Sstevel@tonic-gate  * - send the process the signal
5970Sstevel@tonic-gate  * - if the process went away, return a -1
5980Sstevel@tonic-gate  * - if the process is still there return 0
5990Sstevel@tonic-gate  */
6000Sstevel@tonic-gate int
6010Sstevel@tonic-gate proc_signal(void *pref, int sig)
6020Sstevel@tonic-gate {
6030Sstevel@tonic-gate 	struct pid *pidp = pref;
6040Sstevel@tonic-gate 
6050Sstevel@tonic-gate 	prsignal(pidp, sig);
6060Sstevel@tonic-gate 	return (pidp->pid_prinactive ? -1 : 0);
6070Sstevel@tonic-gate }
6080Sstevel@tonic-gate 
6090Sstevel@tonic-gate 
6100Sstevel@tonic-gate static struct upcount	**upc_hash;	/* a boot time allocated array */
6110Sstevel@tonic-gate static ulong_t		upc_hashmask;
6120Sstevel@tonic-gate #define	UPC_HASH(x, y)	((ulong_t)(x ^ y) & upc_hashmask)
6130Sstevel@tonic-gate 
6140Sstevel@tonic-gate /*
6150Sstevel@tonic-gate  * Get us off the ground.  Called once at boot.
6160Sstevel@tonic-gate  */
6170Sstevel@tonic-gate void
6180Sstevel@tonic-gate upcount_init(void)
6190Sstevel@tonic-gate {
6200Sstevel@tonic-gate 	ulong_t	upc_hashsize;
6210Sstevel@tonic-gate 
6220Sstevel@tonic-gate 	/*
6230Sstevel@tonic-gate 	 * An entry per MB of memory is our current guess
6240Sstevel@tonic-gate 	 */
6250Sstevel@tonic-gate 	/*
6260Sstevel@tonic-gate 	 * 2^20 is a meg, so shifting right by 20 - PAGESHIFT
6270Sstevel@tonic-gate 	 * converts pages to megs (without overflowing a u_int
6280Sstevel@tonic-gate 	 * if you have more than 4G of memory, like ptob(physmem)/1M
6290Sstevel@tonic-gate 	 * would).
6300Sstevel@tonic-gate 	 */
6310Sstevel@tonic-gate 	upc_hashsize = (1 << highbit(physmem >> (20 - PAGESHIFT)));
6320Sstevel@tonic-gate 	upc_hashmask = upc_hashsize - 1;
6330Sstevel@tonic-gate 	upc_hash = kmem_zalloc(upc_hashsize * sizeof (struct upcount *),
6340Sstevel@tonic-gate 	    KM_SLEEP);
6350Sstevel@tonic-gate }
6360Sstevel@tonic-gate 
6370Sstevel@tonic-gate /*
6380Sstevel@tonic-gate  * Increment the number of processes associated with a given uid and zoneid.
6390Sstevel@tonic-gate  */
6400Sstevel@tonic-gate void
6410Sstevel@tonic-gate upcount_inc(uid_t uid, zoneid_t zoneid)
6420Sstevel@tonic-gate {
6430Sstevel@tonic-gate 	struct upcount	**upc, **hupc;
6440Sstevel@tonic-gate 	struct upcount	*new;
6450Sstevel@tonic-gate 
6460Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pidlock));
6470Sstevel@tonic-gate 	new = NULL;
6480Sstevel@tonic-gate 	hupc = &upc_hash[UPC_HASH(uid, zoneid)];
6490Sstevel@tonic-gate top:
6500Sstevel@tonic-gate 	upc = hupc;
6510Sstevel@tonic-gate 	while ((*upc) != NULL) {
6520Sstevel@tonic-gate 		if ((*upc)->up_uid == uid && (*upc)->up_zoneid == zoneid) {
6530Sstevel@tonic-gate 			(*upc)->up_count++;
6540Sstevel@tonic-gate 			if (new) {
6550Sstevel@tonic-gate 				/*
6560Sstevel@tonic-gate 				 * did not need `new' afterall.
6570Sstevel@tonic-gate 				 */
6580Sstevel@tonic-gate 				kmem_free(new, sizeof (*new));
6590Sstevel@tonic-gate 			}
6600Sstevel@tonic-gate 			return;
6610Sstevel@tonic-gate 		}
6620Sstevel@tonic-gate 		upc = &(*upc)->up_next;
6630Sstevel@tonic-gate 	}
6640Sstevel@tonic-gate 
6650Sstevel@tonic-gate 	/*
6660Sstevel@tonic-gate 	 * There is no entry for this <uid,zoneid> pair.
6670Sstevel@tonic-gate 	 * Allocate one.  If we have to drop pidlock, check
6680Sstevel@tonic-gate 	 * again.
6690Sstevel@tonic-gate 	 */
6700Sstevel@tonic-gate 	if (new == NULL) {
6710Sstevel@tonic-gate 		new = (struct upcount *)kmem_alloc(sizeof (*new), KM_NOSLEEP);
6720Sstevel@tonic-gate 		if (new == NULL) {
6730Sstevel@tonic-gate 			mutex_exit(&pidlock);
6740Sstevel@tonic-gate 			new = (struct upcount *)kmem_alloc(sizeof (*new),
6750Sstevel@tonic-gate 			    KM_SLEEP);
6760Sstevel@tonic-gate 			mutex_enter(&pidlock);
6770Sstevel@tonic-gate 			goto top;
6780Sstevel@tonic-gate 		}
6790Sstevel@tonic-gate 	}
6800Sstevel@tonic-gate 
6810Sstevel@tonic-gate 
6820Sstevel@tonic-gate 	/*
6830Sstevel@tonic-gate 	 * On the assumption that a new user is going to do some
6840Sstevel@tonic-gate 	 * more forks, put the new upcount structure on the front.
6850Sstevel@tonic-gate 	 */
6860Sstevel@tonic-gate 	upc = hupc;
6870Sstevel@tonic-gate 
6880Sstevel@tonic-gate 	new->up_uid = uid;
6890Sstevel@tonic-gate 	new->up_zoneid = zoneid;
6900Sstevel@tonic-gate 	new->up_count = 1;
6910Sstevel@tonic-gate 	new->up_next = *upc;
6920Sstevel@tonic-gate 
6930Sstevel@tonic-gate 	*upc = new;
6940Sstevel@tonic-gate }
6950Sstevel@tonic-gate 
6960Sstevel@tonic-gate /*
6970Sstevel@tonic-gate  * Decrement the number of processes a given uid and zoneid has.
6980Sstevel@tonic-gate  */
6990Sstevel@tonic-gate void
7000Sstevel@tonic-gate upcount_dec(uid_t uid, zoneid_t zoneid)
7010Sstevel@tonic-gate {
7020Sstevel@tonic-gate 	struct	upcount **upc;
7030Sstevel@tonic-gate 	struct	upcount *done;
7040Sstevel@tonic-gate 
7050Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pidlock));
7060Sstevel@tonic-gate 
7070Sstevel@tonic-gate 	upc = &upc_hash[UPC_HASH(uid, zoneid)];
7080Sstevel@tonic-gate 	while ((*upc) != NULL) {
7090Sstevel@tonic-gate 		if ((*upc)->up_uid == uid && (*upc)->up_zoneid == zoneid) {
7100Sstevel@tonic-gate 			(*upc)->up_count--;
7110Sstevel@tonic-gate 			if ((*upc)->up_count == 0) {
7120Sstevel@tonic-gate 				done = *upc;
7130Sstevel@tonic-gate 				*upc = (*upc)->up_next;
7140Sstevel@tonic-gate 				kmem_free(done, sizeof (*done));
7150Sstevel@tonic-gate 			}
7160Sstevel@tonic-gate 			return;
7170Sstevel@tonic-gate 		}
7180Sstevel@tonic-gate 		upc = &(*upc)->up_next;
7190Sstevel@tonic-gate 	}
7200Sstevel@tonic-gate 	cmn_err(CE_PANIC, "decr_upcount-off the end");
7210Sstevel@tonic-gate }
7220Sstevel@tonic-gate 
7230Sstevel@tonic-gate /*
7240Sstevel@tonic-gate  * Returns the number of processes a uid has.
7250Sstevel@tonic-gate  * Non-existent uid's are assumed to have no processes.
7260Sstevel@tonic-gate  */
7270Sstevel@tonic-gate int
7280Sstevel@tonic-gate upcount_get(uid_t uid, zoneid_t zoneid)
7290Sstevel@tonic-gate {
7300Sstevel@tonic-gate 	struct	upcount *upc;
7310Sstevel@tonic-gate 
7320Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pidlock));
7330Sstevel@tonic-gate 
7340Sstevel@tonic-gate 	upc = upc_hash[UPC_HASH(uid, zoneid)];
7350Sstevel@tonic-gate 	while (upc != NULL) {
7360Sstevel@tonic-gate 		if (upc->up_uid == uid && upc->up_zoneid == zoneid) {
7370Sstevel@tonic-gate 			return (upc->up_count);
7380Sstevel@tonic-gate 		}
7390Sstevel@tonic-gate 		upc = upc->up_next;
7400Sstevel@tonic-gate 	}
7410Sstevel@tonic-gate 	return (0);
7420Sstevel@tonic-gate }
743