xref: /onnv-gate/usr/src/uts/common/os/pid.c (revision 11173:87f3734e64df)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
52712Snn35248  * Common Development and Distribution License (the "License").
62712Snn35248  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
21390Sraf 
220Sstevel@tonic-gate /*
23*11173SJonathan.Adams@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate  * Use is subject to license terms.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
280Sstevel@tonic-gate /*	  All Rights Reserved  	*/
290Sstevel@tonic-gate 
300Sstevel@tonic-gate #include <sys/types.h>
310Sstevel@tonic-gate #include <sys/param.h>
320Sstevel@tonic-gate #include <sys/sysmacros.h>
330Sstevel@tonic-gate #include <sys/proc.h>
340Sstevel@tonic-gate #include <sys/kmem.h>
350Sstevel@tonic-gate #include <sys/tuneable.h>
360Sstevel@tonic-gate #include <sys/var.h>
370Sstevel@tonic-gate #include <sys/cred.h>
380Sstevel@tonic-gate #include <sys/systm.h>
390Sstevel@tonic-gate #include <sys/prsystm.h>
400Sstevel@tonic-gate #include <sys/vnode.h>
410Sstevel@tonic-gate #include <sys/session.h>
420Sstevel@tonic-gate #include <sys/cpuvar.h>
430Sstevel@tonic-gate #include <sys/cmn_err.h>
440Sstevel@tonic-gate #include <sys/bitmap.h>
450Sstevel@tonic-gate #include <sys/debug.h>
460Sstevel@tonic-gate #include <c2/audit.h>
470Sstevel@tonic-gate #include <sys/zone.h>
480Sstevel@tonic-gate 
490Sstevel@tonic-gate /* directory entries for /proc */
500Sstevel@tonic-gate union procent {
510Sstevel@tonic-gate 	proc_t *pe_proc;
520Sstevel@tonic-gate 	union procent *pe_next;
530Sstevel@tonic-gate };
540Sstevel@tonic-gate 
550Sstevel@tonic-gate struct pid pid0 = {
560Sstevel@tonic-gate 	0,		/* pid_prinactive */
570Sstevel@tonic-gate 	1,		/* pid_pgorphaned */
580Sstevel@tonic-gate 	0,		/* pid_padding	*/
590Sstevel@tonic-gate 	0,		/* pid_prslot	*/
600Sstevel@tonic-gate 	0,		/* pid_id	*/
610Sstevel@tonic-gate 	NULL,		/* pid_pglink	*/
62749Ssusans 	NULL,		/* pid_pgtail	*/
630Sstevel@tonic-gate 	NULL,		/* pid_link	*/
640Sstevel@tonic-gate 	3		/* pid_ref	*/
650Sstevel@tonic-gate };
660Sstevel@tonic-gate 
670Sstevel@tonic-gate static int pid_hashlen = 4;	/* desired average hash chain length */
680Sstevel@tonic-gate static int pid_hashsz;		/* number of buckets in the hash table */
690Sstevel@tonic-gate 
700Sstevel@tonic-gate #define	HASHPID(pid)	(pidhash[((pid)&(pid_hashsz-1))])
710Sstevel@tonic-gate 
720Sstevel@tonic-gate extern uint_t nproc;
730Sstevel@tonic-gate extern struct kmem_cache *process_cache;
740Sstevel@tonic-gate static void	upcount_init(void);
750Sstevel@tonic-gate 
760Sstevel@tonic-gate kmutex_t	pidlock;	/* global process lock */
770Sstevel@tonic-gate kmutex_t	pr_pidlock;	/* /proc global process lock */
780Sstevel@tonic-gate kcondvar_t	*pr_pid_cv;	/* for /proc, one per process slot */
790Sstevel@tonic-gate struct plock	*proc_lock;	/* persistent array of p_lock's */
800Sstevel@tonic-gate 
810Sstevel@tonic-gate /*
820Sstevel@tonic-gate  * See the comment above pid_getlockslot() for a detailed explanation of this
830Sstevel@tonic-gate  * constant.  Note that a PLOCK_SHIFT of 3 implies 64-byte coherence
840Sstevel@tonic-gate  * granularity; if the coherence granularity is ever changed, this constant
850Sstevel@tonic-gate  * should be modified to reflect the change to minimize proc_lock false
860Sstevel@tonic-gate  * sharing (correctness, however, is guaranteed regardless of the coherence
870Sstevel@tonic-gate  * granularity).
880Sstevel@tonic-gate  */
890Sstevel@tonic-gate #define	PLOCK_SHIFT	3
900Sstevel@tonic-gate 
910Sstevel@tonic-gate static kmutex_t	pidlinklock;
920Sstevel@tonic-gate static struct pid **pidhash;
930Sstevel@tonic-gate static pid_t minpid;
94*11173SJonathan.Adams@Sun.COM static pid_t mpid = FAMOUS_PIDS;	/* one more than the last famous pid */
950Sstevel@tonic-gate static union procent *procdir;
960Sstevel@tonic-gate static union procent *procentfree;
970Sstevel@tonic-gate 
980Sstevel@tonic-gate static struct pid *
990Sstevel@tonic-gate pid_lookup(pid_t pid)
1000Sstevel@tonic-gate {
1010Sstevel@tonic-gate 	struct pid *pidp;
1020Sstevel@tonic-gate 
1030Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pidlinklock));
1040Sstevel@tonic-gate 
1050Sstevel@tonic-gate 	for (pidp = HASHPID(pid); pidp; pidp = pidp->pid_link) {
1060Sstevel@tonic-gate 		if (pidp->pid_id == pid) {
1070Sstevel@tonic-gate 			ASSERT(pidp->pid_ref > 0);
1080Sstevel@tonic-gate 			break;
1090Sstevel@tonic-gate 		}
1100Sstevel@tonic-gate 	}
1110Sstevel@tonic-gate 	return (pidp);
1120Sstevel@tonic-gate }
1130Sstevel@tonic-gate 
1142712Snn35248 struct pid *
1152712Snn35248 pid_find(pid_t pid)
1162712Snn35248 {
1172712Snn35248 	struct pid *pidp;
1182712Snn35248 
1192712Snn35248 	mutex_enter(&pidlinklock);
1202712Snn35248 	pidp = pid_lookup(pid);
1212712Snn35248 	mutex_exit(&pidlinklock);
1222712Snn35248 
1232712Snn35248 	return (pidp);
1242712Snn35248 }
1252712Snn35248 
1260Sstevel@tonic-gate void
1270Sstevel@tonic-gate pid_setmin(void)
1280Sstevel@tonic-gate {
1290Sstevel@tonic-gate 	if (jump_pid && jump_pid > mpid)
1300Sstevel@tonic-gate 		minpid = mpid = jump_pid;
1310Sstevel@tonic-gate 	else
132*11173SJonathan.Adams@Sun.COM 		minpid = mpid;
1330Sstevel@tonic-gate }
1340Sstevel@tonic-gate 
1350Sstevel@tonic-gate /*
1360Sstevel@tonic-gate  * When prslots are simply used as an index to determine a process' p_lock,
1370Sstevel@tonic-gate  * adjacent prslots share adjacent p_locks.  On machines where the size
1380Sstevel@tonic-gate  * of a mutex is smaller than that of a cache line (which, as of this writing,
1390Sstevel@tonic-gate  * is true for all machines on which Solaris runs), this can potentially
1400Sstevel@tonic-gate  * induce false sharing.  The standard solution for false sharing is to pad
1410Sstevel@tonic-gate  * out one's data structures (in this case, struct plock).  However,
1420Sstevel@tonic-gate  * given the size and (generally) sparse use of the proc_lock array, this
1430Sstevel@tonic-gate  * is suboptimal.  We therefore stride through the proc_lock array with
1440Sstevel@tonic-gate  * a stride of PLOCK_SHIFT.  PLOCK_SHIFT should be defined as:
1450Sstevel@tonic-gate  *
1460Sstevel@tonic-gate  *   log_2 (coherence_granularity / sizeof (kmutex_t))
1470Sstevel@tonic-gate  *
1480Sstevel@tonic-gate  * Under this scheme, false sharing is still possible -- but only when
1490Sstevel@tonic-gate  * the number of active processes is very large.  Note that the one-to-one
1500Sstevel@tonic-gate  * mapping between prslots and lockslots is maintained.
1510Sstevel@tonic-gate  */
1520Sstevel@tonic-gate static int
1530Sstevel@tonic-gate pid_getlockslot(int prslot)
1540Sstevel@tonic-gate {
1550Sstevel@tonic-gate 	int even = (v.v_proc >> PLOCK_SHIFT) << PLOCK_SHIFT;
1560Sstevel@tonic-gate 	int perlap = even >> PLOCK_SHIFT;
1570Sstevel@tonic-gate 
1580Sstevel@tonic-gate 	if (prslot >= even)
1590Sstevel@tonic-gate 		return (prslot);
1600Sstevel@tonic-gate 
1610Sstevel@tonic-gate 	return (((prslot % perlap) << PLOCK_SHIFT) + (prslot / perlap));
1620Sstevel@tonic-gate }
1630Sstevel@tonic-gate 
1640Sstevel@tonic-gate /*
1652712Snn35248  * This function allocates a pid structure, a free pid, and optionally a
1662712Snn35248  * slot in the proc table for it.
1670Sstevel@tonic-gate  *
1682712Snn35248  * pid_allocate() returns the new pid on success, -1 on failure.
1690Sstevel@tonic-gate  */
1700Sstevel@tonic-gate pid_t
171*11173SJonathan.Adams@Sun.COM pid_allocate(proc_t *prp, pid_t pid, int flags)
1720Sstevel@tonic-gate {
1730Sstevel@tonic-gate 	struct pid *pidp;
1740Sstevel@tonic-gate 	union procent *pep;
1750Sstevel@tonic-gate 	pid_t newpid, startpid;
1760Sstevel@tonic-gate 
1770Sstevel@tonic-gate 	pidp = kmem_zalloc(sizeof (struct pid), KM_SLEEP);
1780Sstevel@tonic-gate 
1790Sstevel@tonic-gate 	mutex_enter(&pidlinklock);
1802712Snn35248 	if ((flags & PID_ALLOC_PROC) && (pep = procentfree) == NULL) {
1810Sstevel@tonic-gate 		/*
1820Sstevel@tonic-gate 		 * ran out of /proc directory entries
1830Sstevel@tonic-gate 		 */
1840Sstevel@tonic-gate 		goto failed;
1850Sstevel@tonic-gate 	}
1860Sstevel@tonic-gate 
187*11173SJonathan.Adams@Sun.COM 	if (pid != 0) {
188*11173SJonathan.Adams@Sun.COM 		VERIFY(minpid == 0);
189*11173SJonathan.Adams@Sun.COM 		VERIFY3P(pid, <, mpid);
190*11173SJonathan.Adams@Sun.COM 		VERIFY3P(pid_lookup(pid), ==, NULL);
191*11173SJonathan.Adams@Sun.COM 		newpid = pid;
192*11173SJonathan.Adams@Sun.COM 	} else {
193*11173SJonathan.Adams@Sun.COM 		/*
194*11173SJonathan.Adams@Sun.COM 		 * Allocate a pid
195*11173SJonathan.Adams@Sun.COM 		 */
196*11173SJonathan.Adams@Sun.COM 		ASSERT(minpid <= mpid && mpid <= maxpid);
1970Sstevel@tonic-gate 
198*11173SJonathan.Adams@Sun.COM 		startpid = mpid;
199*11173SJonathan.Adams@Sun.COM 		for (;;) {
200*11173SJonathan.Adams@Sun.COM 			newpid = mpid;
201*11173SJonathan.Adams@Sun.COM 			if (mpid >= maxpid)
202*11173SJonathan.Adams@Sun.COM 				mpid = minpid;
203*11173SJonathan.Adams@Sun.COM 			else
204*11173SJonathan.Adams@Sun.COM 				mpid++;
205*11173SJonathan.Adams@Sun.COM 
206*11173SJonathan.Adams@Sun.COM 			if (pid_lookup(newpid) == NULL)
207*11173SJonathan.Adams@Sun.COM 				break;
208*11173SJonathan.Adams@Sun.COM 
209*11173SJonathan.Adams@Sun.COM 			if (mpid == startpid)
210*11173SJonathan.Adams@Sun.COM 				goto failed;
211*11173SJonathan.Adams@Sun.COM 		}
2120Sstevel@tonic-gate 	}
2130Sstevel@tonic-gate 
2140Sstevel@tonic-gate 	/*
2150Sstevel@tonic-gate 	 * Put pid into the pid hash table.
2160Sstevel@tonic-gate 	 */
2170Sstevel@tonic-gate 	pidp->pid_link = HASHPID(newpid);
2180Sstevel@tonic-gate 	HASHPID(newpid) = pidp;
2190Sstevel@tonic-gate 	pidp->pid_ref = 1;
2200Sstevel@tonic-gate 	pidp->pid_id = newpid;
2212712Snn35248 
2222712Snn35248 	if (flags & PID_ALLOC_PROC) {
2232712Snn35248 		procentfree = pep->pe_next;
2242712Snn35248 		pidp->pid_prslot = pep - procdir;
2252712Snn35248 		pep->pe_proc = prp;
2262712Snn35248 		prp->p_pidp = pidp;
2272712Snn35248 		prp->p_lockp = &proc_lock[pid_getlockslot(pidp->pid_prslot)];
2282712Snn35248 	} else {
2292712Snn35248 		pidp->pid_prslot = 0;
2302712Snn35248 	}
2312712Snn35248 
2320Sstevel@tonic-gate 	mutex_exit(&pidlinklock);
2330Sstevel@tonic-gate 
2340Sstevel@tonic-gate 	return (newpid);
2350Sstevel@tonic-gate 
2360Sstevel@tonic-gate failed:
2370Sstevel@tonic-gate 	mutex_exit(&pidlinklock);
2380Sstevel@tonic-gate 	kmem_free(pidp, sizeof (struct pid));
2390Sstevel@tonic-gate 	return (-1);
2400Sstevel@tonic-gate }
2410Sstevel@tonic-gate 
2420Sstevel@tonic-gate /*
2430Sstevel@tonic-gate  * decrement the reference count for pid
2440Sstevel@tonic-gate  */
2450Sstevel@tonic-gate int
2460Sstevel@tonic-gate pid_rele(struct pid *pidp)
2470Sstevel@tonic-gate {
2480Sstevel@tonic-gate 	struct pid **pidpp;
2490Sstevel@tonic-gate 
2500Sstevel@tonic-gate 	mutex_enter(&pidlinklock);
2510Sstevel@tonic-gate 	ASSERT(pidp != &pid0);
2520Sstevel@tonic-gate 
2530Sstevel@tonic-gate 	pidpp = &HASHPID(pidp->pid_id);
2540Sstevel@tonic-gate 	for (;;) {
2550Sstevel@tonic-gate 		ASSERT(*pidpp != NULL);
2560Sstevel@tonic-gate 		if (*pidpp == pidp)
2570Sstevel@tonic-gate 			break;
2580Sstevel@tonic-gate 		pidpp = &(*pidpp)->pid_link;
2590Sstevel@tonic-gate 	}
2600Sstevel@tonic-gate 
2610Sstevel@tonic-gate 	*pidpp = pidp->pid_link;
2620Sstevel@tonic-gate 	mutex_exit(&pidlinklock);
2630Sstevel@tonic-gate 
2640Sstevel@tonic-gate 	kmem_free(pidp, sizeof (*pidp));
2650Sstevel@tonic-gate 	return (0);
2660Sstevel@tonic-gate }
2670Sstevel@tonic-gate 
2680Sstevel@tonic-gate void
2690Sstevel@tonic-gate proc_entry_free(struct pid *pidp)
2700Sstevel@tonic-gate {
2710Sstevel@tonic-gate 	mutex_enter(&pidlinklock);
2720Sstevel@tonic-gate 	pidp->pid_prinactive = 1;
2730Sstevel@tonic-gate 	procdir[pidp->pid_prslot].pe_next = procentfree;
2740Sstevel@tonic-gate 	procentfree = &procdir[pidp->pid_prslot];
2750Sstevel@tonic-gate 	mutex_exit(&pidlinklock);
2760Sstevel@tonic-gate }
2770Sstevel@tonic-gate 
2780Sstevel@tonic-gate void
2790Sstevel@tonic-gate pid_exit(proc_t *prp)
2800Sstevel@tonic-gate {
2810Sstevel@tonic-gate 	struct pid *pidp;
2820Sstevel@tonic-gate 
2830Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pidlock));
2840Sstevel@tonic-gate 
2850Sstevel@tonic-gate 	/*
2860Sstevel@tonic-gate 	 * Exit process group.  If it is NULL, it's because fork failed
2870Sstevel@tonic-gate 	 * before calling pgjoin().
2880Sstevel@tonic-gate 	 */
2890Sstevel@tonic-gate 	ASSERT(prp->p_pgidp != NULL || prp->p_stat == SIDL);
2900Sstevel@tonic-gate 	if (prp->p_pgidp != NULL)
2910Sstevel@tonic-gate 		pgexit(prp);
2920Sstevel@tonic-gate 
2932712Snn35248 	sess_rele(prp->p_sessp, B_TRUE);
2940Sstevel@tonic-gate 
2950Sstevel@tonic-gate 	pidp = prp->p_pidp;
2960Sstevel@tonic-gate 
2970Sstevel@tonic-gate 	proc_entry_free(pidp);
2980Sstevel@tonic-gate 
2990Sstevel@tonic-gate 	if (audit_active)
3000Sstevel@tonic-gate 		audit_pfree(prp);
3010Sstevel@tonic-gate 
3020Sstevel@tonic-gate 	if (practive == prp) {
3030Sstevel@tonic-gate 		practive = prp->p_next;
3040Sstevel@tonic-gate 	}
3050Sstevel@tonic-gate 
3060Sstevel@tonic-gate 	if (prp->p_next) {
3070Sstevel@tonic-gate 		prp->p_next->p_prev = prp->p_prev;
3080Sstevel@tonic-gate 	}
3090Sstevel@tonic-gate 	if (prp->p_prev) {
3100Sstevel@tonic-gate 		prp->p_prev->p_next = prp->p_next;
3110Sstevel@tonic-gate 	}
3120Sstevel@tonic-gate 
3130Sstevel@tonic-gate 	PID_RELE(pidp);
3140Sstevel@tonic-gate 
3150Sstevel@tonic-gate 	mutex_destroy(&prp->p_crlock);
3160Sstevel@tonic-gate 	kmem_cache_free(process_cache, prp);
3170Sstevel@tonic-gate 	nproc--;
3180Sstevel@tonic-gate }
3190Sstevel@tonic-gate 
3200Sstevel@tonic-gate /*
3210Sstevel@tonic-gate  * Find a process visible from the specified zone given its process ID.
3220Sstevel@tonic-gate  */
3230Sstevel@tonic-gate proc_t *
3240Sstevel@tonic-gate prfind_zone(pid_t pid, zoneid_t zoneid)
3250Sstevel@tonic-gate {
3260Sstevel@tonic-gate 	struct pid *pidp;
3270Sstevel@tonic-gate 	proc_t *p;
3280Sstevel@tonic-gate 
3290Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pidlock));
3300Sstevel@tonic-gate 
3310Sstevel@tonic-gate 	mutex_enter(&pidlinklock);
3320Sstevel@tonic-gate 	pidp = pid_lookup(pid);
3330Sstevel@tonic-gate 	mutex_exit(&pidlinklock);
3340Sstevel@tonic-gate 	if (pidp != NULL && pidp->pid_prinactive == 0) {
3350Sstevel@tonic-gate 		p = procdir[pidp->pid_prslot].pe_proc;
3360Sstevel@tonic-gate 		if (zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid)
3370Sstevel@tonic-gate 			return (p);
3380Sstevel@tonic-gate 	}
3390Sstevel@tonic-gate 	return (NULL);
3400Sstevel@tonic-gate }
3410Sstevel@tonic-gate 
3420Sstevel@tonic-gate /*
3430Sstevel@tonic-gate  * Find a process given its process ID.  This obeys zone restrictions,
3440Sstevel@tonic-gate  * so if the caller is in a non-global zone it won't find processes
3450Sstevel@tonic-gate  * associated with other zones.  Use prfind_zone(pid, ALL_ZONES) to
3460Sstevel@tonic-gate  * bypass this restriction.
3470Sstevel@tonic-gate  */
3480Sstevel@tonic-gate proc_t *
3490Sstevel@tonic-gate prfind(pid_t pid)
3500Sstevel@tonic-gate {
3510Sstevel@tonic-gate 	zoneid_t zoneid;
3520Sstevel@tonic-gate 
3530Sstevel@tonic-gate 	if (INGLOBALZONE(curproc))
3540Sstevel@tonic-gate 		zoneid = ALL_ZONES;
3550Sstevel@tonic-gate 	else
3560Sstevel@tonic-gate 		zoneid = getzoneid();
3570Sstevel@tonic-gate 	return (prfind_zone(pid, zoneid));
3580Sstevel@tonic-gate }
3590Sstevel@tonic-gate 
3600Sstevel@tonic-gate proc_t *
3610Sstevel@tonic-gate pgfind_zone(pid_t pgid, zoneid_t zoneid)
3620Sstevel@tonic-gate {
3630Sstevel@tonic-gate 	struct pid *pidp;
3640Sstevel@tonic-gate 
3650Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pidlock));
3660Sstevel@tonic-gate 
3670Sstevel@tonic-gate 	mutex_enter(&pidlinklock);
3680Sstevel@tonic-gate 	pidp = pid_lookup(pgid);
3690Sstevel@tonic-gate 	mutex_exit(&pidlinklock);
3700Sstevel@tonic-gate 	if (pidp != NULL) {
3710Sstevel@tonic-gate 		proc_t *p = pidp->pid_pglink;
3720Sstevel@tonic-gate 
3730Sstevel@tonic-gate 		if (zoneid == ALL_ZONES || pgid == 0 || p == NULL ||
3740Sstevel@tonic-gate 		    p->p_zone->zone_id == zoneid)
3750Sstevel@tonic-gate 			return (p);
3760Sstevel@tonic-gate 	}
3770Sstevel@tonic-gate 	return (NULL);
3780Sstevel@tonic-gate }
3790Sstevel@tonic-gate 
3800Sstevel@tonic-gate /*
3810Sstevel@tonic-gate  * return the head of the list of processes whose process group ID is 'pgid',
3820Sstevel@tonic-gate  * or NULL, if no such process group
3830Sstevel@tonic-gate  */
3840Sstevel@tonic-gate proc_t *
3850Sstevel@tonic-gate pgfind(pid_t pgid)
3860Sstevel@tonic-gate {
3870Sstevel@tonic-gate 	zoneid_t zoneid;
3880Sstevel@tonic-gate 
3890Sstevel@tonic-gate 	if (INGLOBALZONE(curproc))
3900Sstevel@tonic-gate 		zoneid = ALL_ZONES;
3910Sstevel@tonic-gate 	else
3920Sstevel@tonic-gate 		zoneid = getzoneid();
3930Sstevel@tonic-gate 	return (pgfind_zone(pgid, zoneid));
3940Sstevel@tonic-gate }
3950Sstevel@tonic-gate 
3960Sstevel@tonic-gate /*
3973247Sgjelinek  * Sets P_PR_LOCK on a non-system process.  Process must be fully created
3983247Sgjelinek  * and not exiting to succeed.
3993247Sgjelinek  *
4003247Sgjelinek  * Returns 0 on success.
4013247Sgjelinek  * Returns 1 if P_PR_LOCK is set.
4023247Sgjelinek  * Returns -1 if proc is in invalid state.
4033247Sgjelinek  */
4043247Sgjelinek int
4053247Sgjelinek sprtrylock_proc(proc_t *p)
4063247Sgjelinek {
4073247Sgjelinek 	ASSERT(MUTEX_HELD(&p->p_lock));
4083247Sgjelinek 
4093247Sgjelinek 	/* skip system and incomplete processes */
4103247Sgjelinek 	if (p->p_stat == SIDL || p->p_stat == SZOMB ||
4113247Sgjelinek 	    (p->p_flag & (SSYS | SEXITING | SEXITLWPS))) {
4123247Sgjelinek 		return (-1);
4133247Sgjelinek 	}
4143247Sgjelinek 
4153247Sgjelinek 	if (p->p_proc_flag & P_PR_LOCK)
4163247Sgjelinek 		return (1);
4173247Sgjelinek 
4183247Sgjelinek 	p->p_proc_flag |= P_PR_LOCK;
4193247Sgjelinek 	THREAD_KPRI_REQUEST();
4203247Sgjelinek 
4213247Sgjelinek 	return (0);
4223247Sgjelinek }
4233247Sgjelinek 
4243247Sgjelinek /*
4253247Sgjelinek  * Wait for P_PR_LOCK to become clear.  Returns with p_lock dropped,
4263247Sgjelinek  * and the proc pointer no longer valid, as the proc may have exited.
4273247Sgjelinek  */
4283247Sgjelinek void
4293247Sgjelinek sprwaitlock_proc(proc_t *p)
4303247Sgjelinek {
4313247Sgjelinek 	kmutex_t *mp;
4323247Sgjelinek 
4333247Sgjelinek 	ASSERT(MUTEX_HELD(&p->p_lock));
4343247Sgjelinek 	ASSERT(p->p_proc_flag & P_PR_LOCK);
4353247Sgjelinek 
4363247Sgjelinek 	/*
4373247Sgjelinek 	 * p_lock is persistent, but p itself is not -- it could
4383247Sgjelinek 	 * vanish during cv_wait().  Load p->p_lock now so we can
4393247Sgjelinek 	 * drop it after cv_wait() without referencing p.
4403247Sgjelinek 	 */
4413247Sgjelinek 	mp = &p->p_lock;
4423247Sgjelinek 	cv_wait(&pr_pid_cv[p->p_slot], mp);
4433247Sgjelinek 	mutex_exit(mp);
4443247Sgjelinek }
4453247Sgjelinek 
4463247Sgjelinek /*
4470Sstevel@tonic-gate  * If pid exists, find its proc, acquire its p_lock and mark it P_PR_LOCK.
4480Sstevel@tonic-gate  * Returns the proc pointer on success, NULL on failure.  sprlock() is
4490Sstevel@tonic-gate  * really just a stripped-down version of pr_p_lock() to allow practive
4500Sstevel@tonic-gate  * walkers like dofusers() and dumpsys() to synchronize with /proc.
4510Sstevel@tonic-gate  */
4520Sstevel@tonic-gate proc_t *
4530Sstevel@tonic-gate sprlock_zone(pid_t pid, zoneid_t zoneid)
4540Sstevel@tonic-gate {
4550Sstevel@tonic-gate 	proc_t *p;
4563247Sgjelinek 	int ret;
4570Sstevel@tonic-gate 
4580Sstevel@tonic-gate 	for (;;) {
4590Sstevel@tonic-gate 		mutex_enter(&pidlock);
4600Sstevel@tonic-gate 		if ((p = prfind_zone(pid, zoneid)) == NULL) {
4610Sstevel@tonic-gate 			mutex_exit(&pidlock);
4620Sstevel@tonic-gate 			return (NULL);
4630Sstevel@tonic-gate 		}
4643247Sgjelinek 		mutex_enter(&p->p_lock);
4650Sstevel@tonic-gate 		mutex_exit(&pidlock);
4663247Sgjelinek 
4670Sstevel@tonic-gate 		if (panicstr)
4680Sstevel@tonic-gate 			return (p);
4693247Sgjelinek 
4703247Sgjelinek 		ret = sprtrylock_proc(p);
4713247Sgjelinek 		if (ret == -1) {
4723247Sgjelinek 			mutex_exit(&p->p_lock);
4733247Sgjelinek 			return (NULL);
4743247Sgjelinek 		} else if (ret == 0) {
4750Sstevel@tonic-gate 			break;
4763247Sgjelinek 		}
4773247Sgjelinek 		sprwaitlock_proc(p);
4780Sstevel@tonic-gate 	}
4790Sstevel@tonic-gate 	return (p);
4800Sstevel@tonic-gate }
4810Sstevel@tonic-gate 
4820Sstevel@tonic-gate proc_t *
4830Sstevel@tonic-gate sprlock(pid_t pid)
4840Sstevel@tonic-gate {
4850Sstevel@tonic-gate 	zoneid_t zoneid;
4860Sstevel@tonic-gate 
4870Sstevel@tonic-gate 	if (INGLOBALZONE(curproc))
4880Sstevel@tonic-gate 		zoneid = ALL_ZONES;
4890Sstevel@tonic-gate 	else
4900Sstevel@tonic-gate 		zoneid = getzoneid();
4910Sstevel@tonic-gate 	return (sprlock_zone(pid, zoneid));
4920Sstevel@tonic-gate }
4930Sstevel@tonic-gate 
4940Sstevel@tonic-gate void
4950Sstevel@tonic-gate sprlock_proc(proc_t *p)
4960Sstevel@tonic-gate {
4970Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&p->p_lock));
4980Sstevel@tonic-gate 
4990Sstevel@tonic-gate 	while (p->p_proc_flag & P_PR_LOCK) {
5000Sstevel@tonic-gate 		cv_wait(&pr_pid_cv[p->p_slot], &p->p_lock);
5010Sstevel@tonic-gate 	}
5020Sstevel@tonic-gate 
5030Sstevel@tonic-gate 	p->p_proc_flag |= P_PR_LOCK;
5040Sstevel@tonic-gate 	THREAD_KPRI_REQUEST();
5050Sstevel@tonic-gate }
5060Sstevel@tonic-gate 
5070Sstevel@tonic-gate void
5080Sstevel@tonic-gate sprunlock(proc_t *p)
5090Sstevel@tonic-gate {
5100Sstevel@tonic-gate 	if (panicstr) {
5110Sstevel@tonic-gate 		mutex_exit(&p->p_lock);
5120Sstevel@tonic-gate 		return;
5130Sstevel@tonic-gate 	}
5140Sstevel@tonic-gate 
5150Sstevel@tonic-gate 	ASSERT(p->p_proc_flag & P_PR_LOCK);
5160Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&p->p_lock));
5170Sstevel@tonic-gate 
5180Sstevel@tonic-gate 	cv_signal(&pr_pid_cv[p->p_slot]);
5190Sstevel@tonic-gate 	p->p_proc_flag &= ~P_PR_LOCK;
5200Sstevel@tonic-gate 	mutex_exit(&p->p_lock);
5210Sstevel@tonic-gate 	THREAD_KPRI_RELEASE();
5220Sstevel@tonic-gate }
5230Sstevel@tonic-gate 
5240Sstevel@tonic-gate void
5250Sstevel@tonic-gate pid_init(void)
5260Sstevel@tonic-gate {
5270Sstevel@tonic-gate 	int i;
5280Sstevel@tonic-gate 
5290Sstevel@tonic-gate 	pid_hashsz = 1 << highbit(v.v_proc / pid_hashlen);
5300Sstevel@tonic-gate 
5310Sstevel@tonic-gate 	pidhash = kmem_zalloc(sizeof (struct pid *) * pid_hashsz, KM_SLEEP);
5320Sstevel@tonic-gate 	procdir = kmem_alloc(sizeof (union procent) * v.v_proc, KM_SLEEP);
5330Sstevel@tonic-gate 	pr_pid_cv = kmem_zalloc(sizeof (kcondvar_t) * v.v_proc, KM_SLEEP);
5340Sstevel@tonic-gate 	proc_lock = kmem_zalloc(sizeof (struct plock) * v.v_proc, KM_SLEEP);
5350Sstevel@tonic-gate 
5360Sstevel@tonic-gate 	nproc = 1;
5370Sstevel@tonic-gate 	practive = proc_sched;
5380Sstevel@tonic-gate 	proc_sched->p_next = NULL;
5390Sstevel@tonic-gate 	procdir[0].pe_proc = proc_sched;
5400Sstevel@tonic-gate 
5410Sstevel@tonic-gate 	procentfree = &procdir[1];
5420Sstevel@tonic-gate 	for (i = 1; i < v.v_proc - 1; i++)
5430Sstevel@tonic-gate 		procdir[i].pe_next = &procdir[i+1];
5440Sstevel@tonic-gate 	procdir[i].pe_next = NULL;
5450Sstevel@tonic-gate 
5460Sstevel@tonic-gate 	HASHPID(0) = &pid0;
5470Sstevel@tonic-gate 
5480Sstevel@tonic-gate 	upcount_init();
5490Sstevel@tonic-gate }
5500Sstevel@tonic-gate 
5510Sstevel@tonic-gate proc_t *
5520Sstevel@tonic-gate pid_entry(int slot)
5530Sstevel@tonic-gate {
5540Sstevel@tonic-gate 	union procent *pep;
5550Sstevel@tonic-gate 	proc_t *prp;
5560Sstevel@tonic-gate 
5570Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pidlock));
5580Sstevel@tonic-gate 	ASSERT(slot >= 0 && slot < v.v_proc);
5590Sstevel@tonic-gate 
5600Sstevel@tonic-gate 	pep = procdir[slot].pe_next;
5610Sstevel@tonic-gate 	if (pep >= procdir && pep < &procdir[v.v_proc])
5620Sstevel@tonic-gate 		return (NULL);
5630Sstevel@tonic-gate 	prp = procdir[slot].pe_proc;
5640Sstevel@tonic-gate 	if (prp != 0 && prp->p_stat == SIDL)
5650Sstevel@tonic-gate 		return (NULL);
5660Sstevel@tonic-gate 	return (prp);
5670Sstevel@tonic-gate }
5680Sstevel@tonic-gate 
5690Sstevel@tonic-gate /*
5700Sstevel@tonic-gate  * Send the specified signal to all processes whose process group ID is
5710Sstevel@tonic-gate  * equal to 'pgid'
5720Sstevel@tonic-gate  */
5730Sstevel@tonic-gate 
5740Sstevel@tonic-gate void
5750Sstevel@tonic-gate signal(pid_t pgid, int sig)
5760Sstevel@tonic-gate {
5770Sstevel@tonic-gate 	struct pid *pidp;
5780Sstevel@tonic-gate 	proc_t *prp;
5790Sstevel@tonic-gate 
5800Sstevel@tonic-gate 	mutex_enter(&pidlock);
5810Sstevel@tonic-gate 	mutex_enter(&pidlinklock);
5820Sstevel@tonic-gate 	if (pgid == 0 || (pidp = pid_lookup(pgid)) == NULL) {
5830Sstevel@tonic-gate 		mutex_exit(&pidlinklock);
5840Sstevel@tonic-gate 		mutex_exit(&pidlock);
5850Sstevel@tonic-gate 		return;
5860Sstevel@tonic-gate 	}
5870Sstevel@tonic-gate 	mutex_exit(&pidlinklock);
5880Sstevel@tonic-gate 	for (prp = pidp->pid_pglink; prp; prp = prp->p_pglink) {
5890Sstevel@tonic-gate 		mutex_enter(&prp->p_lock);
5900Sstevel@tonic-gate 		sigtoproc(prp, NULL, sig);
5910Sstevel@tonic-gate 		mutex_exit(&prp->p_lock);
5920Sstevel@tonic-gate 	}
5930Sstevel@tonic-gate 	mutex_exit(&pidlock);
5940Sstevel@tonic-gate }
5950Sstevel@tonic-gate 
5960Sstevel@tonic-gate /*
5970Sstevel@tonic-gate  * Send the specified signal to the specified process
5980Sstevel@tonic-gate  */
5990Sstevel@tonic-gate 
6000Sstevel@tonic-gate void
6010Sstevel@tonic-gate prsignal(struct pid *pidp, int sig)
6020Sstevel@tonic-gate {
6030Sstevel@tonic-gate 	if (!(pidp->pid_prinactive))
6040Sstevel@tonic-gate 		psignal(procdir[pidp->pid_prslot].pe_proc, sig);
6050Sstevel@tonic-gate }
6060Sstevel@tonic-gate 
6070Sstevel@tonic-gate #include <sys/sunddi.h>
6080Sstevel@tonic-gate 
6090Sstevel@tonic-gate /*
6100Sstevel@tonic-gate  * DDI/DKI interfaces for drivers to send signals to processes
6110Sstevel@tonic-gate  */
6120Sstevel@tonic-gate 
6130Sstevel@tonic-gate /*
6140Sstevel@tonic-gate  * obtain an opaque reference to a process for signaling
6150Sstevel@tonic-gate  */
6160Sstevel@tonic-gate void *
6170Sstevel@tonic-gate proc_ref(void)
6180Sstevel@tonic-gate {
6190Sstevel@tonic-gate 	struct pid *pidp;
6200Sstevel@tonic-gate 
6210Sstevel@tonic-gate 	mutex_enter(&pidlock);
6220Sstevel@tonic-gate 	pidp = curproc->p_pidp;
6230Sstevel@tonic-gate 	PID_HOLD(pidp);
6240Sstevel@tonic-gate 	mutex_exit(&pidlock);
6250Sstevel@tonic-gate 
6260Sstevel@tonic-gate 	return (pidp);
6270Sstevel@tonic-gate }
6280Sstevel@tonic-gate 
6290Sstevel@tonic-gate /*
6300Sstevel@tonic-gate  * release a reference to a process
6310Sstevel@tonic-gate  * - a process can exit even if a driver has a reference to it
6320Sstevel@tonic-gate  * - one proc_unref for every proc_ref
6330Sstevel@tonic-gate  */
6340Sstevel@tonic-gate void
6350Sstevel@tonic-gate proc_unref(void *pref)
6360Sstevel@tonic-gate {
6370Sstevel@tonic-gate 	mutex_enter(&pidlock);
6380Sstevel@tonic-gate 	PID_RELE((struct pid *)pref);
6390Sstevel@tonic-gate 	mutex_exit(&pidlock);
6400Sstevel@tonic-gate }
6410Sstevel@tonic-gate 
6420Sstevel@tonic-gate /*
6430Sstevel@tonic-gate  * send a signal to a process
6440Sstevel@tonic-gate  *
6450Sstevel@tonic-gate  * - send the process the signal
6460Sstevel@tonic-gate  * - if the process went away, return a -1
6470Sstevel@tonic-gate  * - if the process is still there return 0
6480Sstevel@tonic-gate  */
6490Sstevel@tonic-gate int
6500Sstevel@tonic-gate proc_signal(void *pref, int sig)
6510Sstevel@tonic-gate {
6520Sstevel@tonic-gate 	struct pid *pidp = pref;
6530Sstevel@tonic-gate 
6540Sstevel@tonic-gate 	prsignal(pidp, sig);
6550Sstevel@tonic-gate 	return (pidp->pid_prinactive ? -1 : 0);
6560Sstevel@tonic-gate }
6570Sstevel@tonic-gate 
6580Sstevel@tonic-gate 
6590Sstevel@tonic-gate static struct upcount	**upc_hash;	/* a boot time allocated array */
6600Sstevel@tonic-gate static ulong_t		upc_hashmask;
6610Sstevel@tonic-gate #define	UPC_HASH(x, y)	((ulong_t)(x ^ y) & upc_hashmask)
6620Sstevel@tonic-gate 
6630Sstevel@tonic-gate /*
6640Sstevel@tonic-gate  * Get us off the ground.  Called once at boot.
6650Sstevel@tonic-gate  */
6660Sstevel@tonic-gate void
6670Sstevel@tonic-gate upcount_init(void)
6680Sstevel@tonic-gate {
6690Sstevel@tonic-gate 	ulong_t	upc_hashsize;
6700Sstevel@tonic-gate 
6710Sstevel@tonic-gate 	/*
6720Sstevel@tonic-gate 	 * An entry per MB of memory is our current guess
6730Sstevel@tonic-gate 	 */
6740Sstevel@tonic-gate 	/*
6750Sstevel@tonic-gate 	 * 2^20 is a meg, so shifting right by 20 - PAGESHIFT
6760Sstevel@tonic-gate 	 * converts pages to megs (without overflowing a u_int
6770Sstevel@tonic-gate 	 * if you have more than 4G of memory, like ptob(physmem)/1M
6780Sstevel@tonic-gate 	 * would).
6790Sstevel@tonic-gate 	 */
6800Sstevel@tonic-gate 	upc_hashsize = (1 << highbit(physmem >> (20 - PAGESHIFT)));
6810Sstevel@tonic-gate 	upc_hashmask = upc_hashsize - 1;
6820Sstevel@tonic-gate 	upc_hash = kmem_zalloc(upc_hashsize * sizeof (struct upcount *),
6830Sstevel@tonic-gate 	    KM_SLEEP);
6840Sstevel@tonic-gate }
6850Sstevel@tonic-gate 
6860Sstevel@tonic-gate /*
6870Sstevel@tonic-gate  * Increment the number of processes associated with a given uid and zoneid.
6880Sstevel@tonic-gate  */
6890Sstevel@tonic-gate void
6900Sstevel@tonic-gate upcount_inc(uid_t uid, zoneid_t zoneid)
6910Sstevel@tonic-gate {
6920Sstevel@tonic-gate 	struct upcount	**upc, **hupc;
6930Sstevel@tonic-gate 	struct upcount	*new;
6940Sstevel@tonic-gate 
6950Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pidlock));
6960Sstevel@tonic-gate 	new = NULL;
6970Sstevel@tonic-gate 	hupc = &upc_hash[UPC_HASH(uid, zoneid)];
6980Sstevel@tonic-gate top:
6990Sstevel@tonic-gate 	upc = hupc;
7000Sstevel@tonic-gate 	while ((*upc) != NULL) {
7010Sstevel@tonic-gate 		if ((*upc)->up_uid == uid && (*upc)->up_zoneid == zoneid) {
7020Sstevel@tonic-gate 			(*upc)->up_count++;
7030Sstevel@tonic-gate 			if (new) {
7040Sstevel@tonic-gate 				/*
7050Sstevel@tonic-gate 				 * did not need `new' afterall.
7060Sstevel@tonic-gate 				 */
7070Sstevel@tonic-gate 				kmem_free(new, sizeof (*new));
7080Sstevel@tonic-gate 			}
7090Sstevel@tonic-gate 			return;
7100Sstevel@tonic-gate 		}
7110Sstevel@tonic-gate 		upc = &(*upc)->up_next;
7120Sstevel@tonic-gate 	}
7130Sstevel@tonic-gate 
7140Sstevel@tonic-gate 	/*
7150Sstevel@tonic-gate 	 * There is no entry for this <uid,zoneid> pair.
7160Sstevel@tonic-gate 	 * Allocate one.  If we have to drop pidlock, check
7170Sstevel@tonic-gate 	 * again.
7180Sstevel@tonic-gate 	 */
7190Sstevel@tonic-gate 	if (new == NULL) {
7200Sstevel@tonic-gate 		new = (struct upcount *)kmem_alloc(sizeof (*new), KM_NOSLEEP);
7210Sstevel@tonic-gate 		if (new == NULL) {
7220Sstevel@tonic-gate 			mutex_exit(&pidlock);
7230Sstevel@tonic-gate 			new = (struct upcount *)kmem_alloc(sizeof (*new),
7240Sstevel@tonic-gate 			    KM_SLEEP);
7250Sstevel@tonic-gate 			mutex_enter(&pidlock);
7260Sstevel@tonic-gate 			goto top;
7270Sstevel@tonic-gate 		}
7280Sstevel@tonic-gate 	}
7290Sstevel@tonic-gate 
7300Sstevel@tonic-gate 
7310Sstevel@tonic-gate 	/*
7320Sstevel@tonic-gate 	 * On the assumption that a new user is going to do some
7330Sstevel@tonic-gate 	 * more forks, put the new upcount structure on the front.
7340Sstevel@tonic-gate 	 */
7350Sstevel@tonic-gate 	upc = hupc;
7360Sstevel@tonic-gate 
7370Sstevel@tonic-gate 	new->up_uid = uid;
7380Sstevel@tonic-gate 	new->up_zoneid = zoneid;
7390Sstevel@tonic-gate 	new->up_count = 1;
7400Sstevel@tonic-gate 	new->up_next = *upc;
7410Sstevel@tonic-gate 
7420Sstevel@tonic-gate 	*upc = new;
7430Sstevel@tonic-gate }
7440Sstevel@tonic-gate 
7450Sstevel@tonic-gate /*
7460Sstevel@tonic-gate  * Decrement the number of processes a given uid and zoneid has.
7470Sstevel@tonic-gate  */
7480Sstevel@tonic-gate void
7490Sstevel@tonic-gate upcount_dec(uid_t uid, zoneid_t zoneid)
7500Sstevel@tonic-gate {
7510Sstevel@tonic-gate 	struct	upcount **upc;
7520Sstevel@tonic-gate 	struct	upcount *done;
7530Sstevel@tonic-gate 
7540Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pidlock));
7550Sstevel@tonic-gate 
7560Sstevel@tonic-gate 	upc = &upc_hash[UPC_HASH(uid, zoneid)];
7570Sstevel@tonic-gate 	while ((*upc) != NULL) {
7580Sstevel@tonic-gate 		if ((*upc)->up_uid == uid && (*upc)->up_zoneid == zoneid) {
7590Sstevel@tonic-gate 			(*upc)->up_count--;
7600Sstevel@tonic-gate 			if ((*upc)->up_count == 0) {
7610Sstevel@tonic-gate 				done = *upc;
7620Sstevel@tonic-gate 				*upc = (*upc)->up_next;
7630Sstevel@tonic-gate 				kmem_free(done, sizeof (*done));
7640Sstevel@tonic-gate 			}
7650Sstevel@tonic-gate 			return;
7660Sstevel@tonic-gate 		}
7670Sstevel@tonic-gate 		upc = &(*upc)->up_next;
7680Sstevel@tonic-gate 	}
7690Sstevel@tonic-gate 	cmn_err(CE_PANIC, "decr_upcount-off the end");
7700Sstevel@tonic-gate }
7710Sstevel@tonic-gate 
7720Sstevel@tonic-gate /*
7730Sstevel@tonic-gate  * Returns the number of processes a uid has.
7740Sstevel@tonic-gate  * Non-existent uid's are assumed to have no processes.
7750Sstevel@tonic-gate  */
7760Sstevel@tonic-gate int
7770Sstevel@tonic-gate upcount_get(uid_t uid, zoneid_t zoneid)
7780Sstevel@tonic-gate {
7790Sstevel@tonic-gate 	struct	upcount *upc;
7800Sstevel@tonic-gate 
7810Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pidlock));
7820Sstevel@tonic-gate 
7830Sstevel@tonic-gate 	upc = upc_hash[UPC_HASH(uid, zoneid)];
7840Sstevel@tonic-gate 	while (upc != NULL) {
7850Sstevel@tonic-gate 		if (upc->up_uid == uid && upc->up_zoneid == zoneid) {
7860Sstevel@tonic-gate 			return (upc->up_count);
7870Sstevel@tonic-gate 		}
7880Sstevel@tonic-gate 		upc = upc->up_next;
7890Sstevel@tonic-gate 	}
7900Sstevel@tonic-gate 	return (0);
7910Sstevel@tonic-gate }
792