10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
52712Snn35248 * Common Development and Distribution License (the "License").
62712Snn35248 * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
21390Sraf
220Sstevel@tonic-gate /*
2312648SSurya.Prakki@Sun.COM * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
240Sstevel@tonic-gate */
250Sstevel@tonic-gate
260Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
270Sstevel@tonic-gate /* All Rights Reserved */
280Sstevel@tonic-gate
290Sstevel@tonic-gate #include <sys/types.h>
300Sstevel@tonic-gate #include <sys/param.h>
310Sstevel@tonic-gate #include <sys/sysmacros.h>
320Sstevel@tonic-gate #include <sys/proc.h>
330Sstevel@tonic-gate #include <sys/kmem.h>
340Sstevel@tonic-gate #include <sys/tuneable.h>
350Sstevel@tonic-gate #include <sys/var.h>
360Sstevel@tonic-gate #include <sys/cred.h>
370Sstevel@tonic-gate #include <sys/systm.h>
380Sstevel@tonic-gate #include <sys/prsystm.h>
390Sstevel@tonic-gate #include <sys/vnode.h>
400Sstevel@tonic-gate #include <sys/session.h>
410Sstevel@tonic-gate #include <sys/cpuvar.h>
420Sstevel@tonic-gate #include <sys/cmn_err.h>
430Sstevel@tonic-gate #include <sys/bitmap.h>
440Sstevel@tonic-gate #include <sys/debug.h>
450Sstevel@tonic-gate #include <c2/audit.h>
46*12834SMenno.Lageman@Sun.COM #include <sys/project.h>
47*12834SMenno.Lageman@Sun.COM #include <sys/task.h>
480Sstevel@tonic-gate #include <sys/zone.h>
490Sstevel@tonic-gate
500Sstevel@tonic-gate /* directory entries for /proc */
510Sstevel@tonic-gate union procent {
520Sstevel@tonic-gate proc_t *pe_proc;
530Sstevel@tonic-gate union procent *pe_next;
540Sstevel@tonic-gate };
550Sstevel@tonic-gate
560Sstevel@tonic-gate struct pid pid0 = {
570Sstevel@tonic-gate 0, /* pid_prinactive */
580Sstevel@tonic-gate 1, /* pid_pgorphaned */
590Sstevel@tonic-gate 0, /* pid_padding */
600Sstevel@tonic-gate 0, /* pid_prslot */
610Sstevel@tonic-gate 0, /* pid_id */
620Sstevel@tonic-gate NULL, /* pid_pglink */
63749Ssusans NULL, /* pid_pgtail */
640Sstevel@tonic-gate NULL, /* pid_link */
650Sstevel@tonic-gate 3 /* pid_ref */
660Sstevel@tonic-gate };
670Sstevel@tonic-gate
680Sstevel@tonic-gate static int pid_hashlen = 4; /* desired average hash chain length */
690Sstevel@tonic-gate static int pid_hashsz; /* number of buckets in the hash table */
700Sstevel@tonic-gate
710Sstevel@tonic-gate #define HASHPID(pid) (pidhash[((pid)&(pid_hashsz-1))])
720Sstevel@tonic-gate
730Sstevel@tonic-gate extern uint_t nproc;
740Sstevel@tonic-gate extern struct kmem_cache *process_cache;
750Sstevel@tonic-gate static void upcount_init(void);
760Sstevel@tonic-gate
770Sstevel@tonic-gate kmutex_t pidlock; /* global process lock */
780Sstevel@tonic-gate kmutex_t pr_pidlock; /* /proc global process lock */
790Sstevel@tonic-gate kcondvar_t *pr_pid_cv; /* for /proc, one per process slot */
800Sstevel@tonic-gate struct plock *proc_lock; /* persistent array of p_lock's */
810Sstevel@tonic-gate
820Sstevel@tonic-gate /*
830Sstevel@tonic-gate * See the comment above pid_getlockslot() for a detailed explanation of this
840Sstevel@tonic-gate * constant. Note that a PLOCK_SHIFT of 3 implies 64-byte coherence
850Sstevel@tonic-gate * granularity; if the coherence granularity is ever changed, this constant
860Sstevel@tonic-gate * should be modified to reflect the change to minimize proc_lock false
870Sstevel@tonic-gate * sharing (correctness, however, is guaranteed regardless of the coherence
880Sstevel@tonic-gate * granularity).
890Sstevel@tonic-gate */
900Sstevel@tonic-gate #define PLOCK_SHIFT 3
910Sstevel@tonic-gate
920Sstevel@tonic-gate static kmutex_t pidlinklock;
930Sstevel@tonic-gate static struct pid **pidhash;
940Sstevel@tonic-gate static pid_t minpid;
9511173SJonathan.Adams@Sun.COM static pid_t mpid = FAMOUS_PIDS; /* one more than the last famous pid */
960Sstevel@tonic-gate static union procent *procdir;
970Sstevel@tonic-gate static union procent *procentfree;
980Sstevel@tonic-gate
990Sstevel@tonic-gate static struct pid *
pid_lookup(pid_t pid)1000Sstevel@tonic-gate pid_lookup(pid_t pid)
1010Sstevel@tonic-gate {
1020Sstevel@tonic-gate struct pid *pidp;
1030Sstevel@tonic-gate
1040Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pidlinklock));
1050Sstevel@tonic-gate
1060Sstevel@tonic-gate for (pidp = HASHPID(pid); pidp; pidp = pidp->pid_link) {
1070Sstevel@tonic-gate if (pidp->pid_id == pid) {
1080Sstevel@tonic-gate ASSERT(pidp->pid_ref > 0);
1090Sstevel@tonic-gate break;
1100Sstevel@tonic-gate }
1110Sstevel@tonic-gate }
1120Sstevel@tonic-gate return (pidp);
1130Sstevel@tonic-gate }
1140Sstevel@tonic-gate
1150Sstevel@tonic-gate void
pid_setmin(void)1160Sstevel@tonic-gate pid_setmin(void)
1170Sstevel@tonic-gate {
1180Sstevel@tonic-gate if (jump_pid && jump_pid > mpid)
1190Sstevel@tonic-gate minpid = mpid = jump_pid;
1200Sstevel@tonic-gate else
12111173SJonathan.Adams@Sun.COM minpid = mpid;
1220Sstevel@tonic-gate }
1230Sstevel@tonic-gate
1240Sstevel@tonic-gate /*
1250Sstevel@tonic-gate * When prslots are simply used as an index to determine a process' p_lock,
1260Sstevel@tonic-gate * adjacent prslots share adjacent p_locks. On machines where the size
1270Sstevel@tonic-gate * of a mutex is smaller than that of a cache line (which, as of this writing,
1280Sstevel@tonic-gate * is true for all machines on which Solaris runs), this can potentially
1290Sstevel@tonic-gate * induce false sharing. The standard solution for false sharing is to pad
1300Sstevel@tonic-gate * out one's data structures (in this case, struct plock). However,
1310Sstevel@tonic-gate * given the size and (generally) sparse use of the proc_lock array, this
1320Sstevel@tonic-gate * is suboptimal. We therefore stride through the proc_lock array with
1330Sstevel@tonic-gate * a stride of PLOCK_SHIFT. PLOCK_SHIFT should be defined as:
1340Sstevel@tonic-gate *
1350Sstevel@tonic-gate * log_2 (coherence_granularity / sizeof (kmutex_t))
1360Sstevel@tonic-gate *
1370Sstevel@tonic-gate * Under this scheme, false sharing is still possible -- but only when
1380Sstevel@tonic-gate * the number of active processes is very large. Note that the one-to-one
1390Sstevel@tonic-gate * mapping between prslots and lockslots is maintained.
1400Sstevel@tonic-gate */
1410Sstevel@tonic-gate static int
pid_getlockslot(int prslot)1420Sstevel@tonic-gate pid_getlockslot(int prslot)
1430Sstevel@tonic-gate {
1440Sstevel@tonic-gate int even = (v.v_proc >> PLOCK_SHIFT) << PLOCK_SHIFT;
1450Sstevel@tonic-gate int perlap = even >> PLOCK_SHIFT;
1460Sstevel@tonic-gate
1470Sstevel@tonic-gate if (prslot >= even)
1480Sstevel@tonic-gate return (prslot);
1490Sstevel@tonic-gate
1500Sstevel@tonic-gate return (((prslot % perlap) << PLOCK_SHIFT) + (prslot / perlap));
1510Sstevel@tonic-gate }
1520Sstevel@tonic-gate
1530Sstevel@tonic-gate /*
1542712Snn35248 * This function allocates a pid structure, a free pid, and optionally a
1552712Snn35248 * slot in the proc table for it.
1560Sstevel@tonic-gate *
1572712Snn35248 * pid_allocate() returns the new pid on success, -1 on failure.
1580Sstevel@tonic-gate */
1590Sstevel@tonic-gate pid_t
pid_allocate(proc_t * prp,pid_t pid,int flags)16011173SJonathan.Adams@Sun.COM pid_allocate(proc_t *prp, pid_t pid, int flags)
1610Sstevel@tonic-gate {
1620Sstevel@tonic-gate struct pid *pidp;
1630Sstevel@tonic-gate union procent *pep;
1640Sstevel@tonic-gate pid_t newpid, startpid;
1650Sstevel@tonic-gate
1660Sstevel@tonic-gate pidp = kmem_zalloc(sizeof (struct pid), KM_SLEEP);
1670Sstevel@tonic-gate
1680Sstevel@tonic-gate mutex_enter(&pidlinklock);
1692712Snn35248 if ((flags & PID_ALLOC_PROC) && (pep = procentfree) == NULL) {
1700Sstevel@tonic-gate /*
1710Sstevel@tonic-gate * ran out of /proc directory entries
1720Sstevel@tonic-gate */
1730Sstevel@tonic-gate goto failed;
1740Sstevel@tonic-gate }
1750Sstevel@tonic-gate
17611173SJonathan.Adams@Sun.COM if (pid != 0) {
17711173SJonathan.Adams@Sun.COM VERIFY(minpid == 0);
17811173SJonathan.Adams@Sun.COM VERIFY3P(pid, <, mpid);
17911173SJonathan.Adams@Sun.COM VERIFY3P(pid_lookup(pid), ==, NULL);
18011173SJonathan.Adams@Sun.COM newpid = pid;
18111173SJonathan.Adams@Sun.COM } else {
18211173SJonathan.Adams@Sun.COM /*
18311173SJonathan.Adams@Sun.COM * Allocate a pid
18411173SJonathan.Adams@Sun.COM */
18511203SRoger.Faulkner@Sun.COM ASSERT(minpid <= mpid && mpid < maxpid);
1860Sstevel@tonic-gate
18711173SJonathan.Adams@Sun.COM startpid = mpid;
18811173SJonathan.Adams@Sun.COM for (;;) {
18911173SJonathan.Adams@Sun.COM newpid = mpid;
19011203SRoger.Faulkner@Sun.COM if (++mpid == maxpid)
19111173SJonathan.Adams@Sun.COM mpid = minpid;
19211173SJonathan.Adams@Sun.COM
19311173SJonathan.Adams@Sun.COM if (pid_lookup(newpid) == NULL)
19411173SJonathan.Adams@Sun.COM break;
19511173SJonathan.Adams@Sun.COM
19611173SJonathan.Adams@Sun.COM if (mpid == startpid)
19711173SJonathan.Adams@Sun.COM goto failed;
19811173SJonathan.Adams@Sun.COM }
1990Sstevel@tonic-gate }
2000Sstevel@tonic-gate
2010Sstevel@tonic-gate /*
2020Sstevel@tonic-gate * Put pid into the pid hash table.
2030Sstevel@tonic-gate */
2040Sstevel@tonic-gate pidp->pid_link = HASHPID(newpid);
2050Sstevel@tonic-gate HASHPID(newpid) = pidp;
2060Sstevel@tonic-gate pidp->pid_ref = 1;
2070Sstevel@tonic-gate pidp->pid_id = newpid;
2082712Snn35248
2092712Snn35248 if (flags & PID_ALLOC_PROC) {
2102712Snn35248 procentfree = pep->pe_next;
2112712Snn35248 pidp->pid_prslot = pep - procdir;
2122712Snn35248 pep->pe_proc = prp;
2132712Snn35248 prp->p_pidp = pidp;
2142712Snn35248 prp->p_lockp = &proc_lock[pid_getlockslot(pidp->pid_prslot)];
2152712Snn35248 } else {
2162712Snn35248 pidp->pid_prslot = 0;
2172712Snn35248 }
2182712Snn35248
2190Sstevel@tonic-gate mutex_exit(&pidlinklock);
2200Sstevel@tonic-gate
2210Sstevel@tonic-gate return (newpid);
2220Sstevel@tonic-gate
2230Sstevel@tonic-gate failed:
2240Sstevel@tonic-gate mutex_exit(&pidlinklock);
2250Sstevel@tonic-gate kmem_free(pidp, sizeof (struct pid));
2260Sstevel@tonic-gate return (-1);
2270Sstevel@tonic-gate }
2280Sstevel@tonic-gate
2290Sstevel@tonic-gate /*
2300Sstevel@tonic-gate * decrement the reference count for pid
2310Sstevel@tonic-gate */
2320Sstevel@tonic-gate int
pid_rele(struct pid * pidp)2330Sstevel@tonic-gate pid_rele(struct pid *pidp)
2340Sstevel@tonic-gate {
2350Sstevel@tonic-gate struct pid **pidpp;
2360Sstevel@tonic-gate
2370Sstevel@tonic-gate mutex_enter(&pidlinklock);
2380Sstevel@tonic-gate ASSERT(pidp != &pid0);
2390Sstevel@tonic-gate
2400Sstevel@tonic-gate pidpp = &HASHPID(pidp->pid_id);
2410Sstevel@tonic-gate for (;;) {
2420Sstevel@tonic-gate ASSERT(*pidpp != NULL);
2430Sstevel@tonic-gate if (*pidpp == pidp)
2440Sstevel@tonic-gate break;
2450Sstevel@tonic-gate pidpp = &(*pidpp)->pid_link;
2460Sstevel@tonic-gate }
2470Sstevel@tonic-gate
2480Sstevel@tonic-gate *pidpp = pidp->pid_link;
2490Sstevel@tonic-gate mutex_exit(&pidlinklock);
2500Sstevel@tonic-gate
2510Sstevel@tonic-gate kmem_free(pidp, sizeof (*pidp));
2520Sstevel@tonic-gate return (0);
2530Sstevel@tonic-gate }
2540Sstevel@tonic-gate
2550Sstevel@tonic-gate void
proc_entry_free(struct pid * pidp)2560Sstevel@tonic-gate proc_entry_free(struct pid *pidp)
2570Sstevel@tonic-gate {
2580Sstevel@tonic-gate mutex_enter(&pidlinklock);
2590Sstevel@tonic-gate pidp->pid_prinactive = 1;
2600Sstevel@tonic-gate procdir[pidp->pid_prslot].pe_next = procentfree;
2610Sstevel@tonic-gate procentfree = &procdir[pidp->pid_prslot];
2620Sstevel@tonic-gate mutex_exit(&pidlinklock);
2630Sstevel@tonic-gate }
2640Sstevel@tonic-gate
265*12834SMenno.Lageman@Sun.COM /*
266*12834SMenno.Lageman@Sun.COM * The original task needs to be passed in since the process has already been
267*12834SMenno.Lageman@Sun.COM * detached from the task at this point in time.
268*12834SMenno.Lageman@Sun.COM */
2690Sstevel@tonic-gate void
pid_exit(proc_t * prp,struct task * tk)270*12834SMenno.Lageman@Sun.COM pid_exit(proc_t *prp, struct task *tk)
2710Sstevel@tonic-gate {
2720Sstevel@tonic-gate struct pid *pidp;
273*12834SMenno.Lageman@Sun.COM zone_t *zone = prp->p_zone;
2740Sstevel@tonic-gate
2750Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pidlock));
2760Sstevel@tonic-gate
2770Sstevel@tonic-gate /*
2780Sstevel@tonic-gate * Exit process group. If it is NULL, it's because fork failed
2790Sstevel@tonic-gate * before calling pgjoin().
2800Sstevel@tonic-gate */
2810Sstevel@tonic-gate ASSERT(prp->p_pgidp != NULL || prp->p_stat == SIDL);
2820Sstevel@tonic-gate if (prp->p_pgidp != NULL)
2830Sstevel@tonic-gate pgexit(prp);
2840Sstevel@tonic-gate
2852712Snn35248 sess_rele(prp->p_sessp, B_TRUE);
2860Sstevel@tonic-gate
2870Sstevel@tonic-gate pidp = prp->p_pidp;
2880Sstevel@tonic-gate
2890Sstevel@tonic-gate proc_entry_free(pidp);
2900Sstevel@tonic-gate
2910Sstevel@tonic-gate if (audit_active)
2920Sstevel@tonic-gate audit_pfree(prp);
2930Sstevel@tonic-gate
2940Sstevel@tonic-gate if (practive == prp) {
2950Sstevel@tonic-gate practive = prp->p_next;
2960Sstevel@tonic-gate }
2970Sstevel@tonic-gate
2980Sstevel@tonic-gate if (prp->p_next) {
2990Sstevel@tonic-gate prp->p_next->p_prev = prp->p_prev;
3000Sstevel@tonic-gate }
3010Sstevel@tonic-gate if (prp->p_prev) {
3020Sstevel@tonic-gate prp->p_prev->p_next = prp->p_next;
3030Sstevel@tonic-gate }
3040Sstevel@tonic-gate
3050Sstevel@tonic-gate PID_RELE(pidp);
3060Sstevel@tonic-gate
3070Sstevel@tonic-gate mutex_destroy(&prp->p_crlock);
3080Sstevel@tonic-gate kmem_cache_free(process_cache, prp);
3090Sstevel@tonic-gate nproc--;
310*12834SMenno.Lageman@Sun.COM
311*12834SMenno.Lageman@Sun.COM /*
312*12834SMenno.Lageman@Sun.COM * Decrement the process counts of the original task, project and zone.
313*12834SMenno.Lageman@Sun.COM */
314*12834SMenno.Lageman@Sun.COM mutex_enter(&zone->zone_nlwps_lock);
315*12834SMenno.Lageman@Sun.COM tk->tk_nprocs--;
316*12834SMenno.Lageman@Sun.COM tk->tk_proj->kpj_nprocs--;
317*12834SMenno.Lageman@Sun.COM zone->zone_nprocs--;
318*12834SMenno.Lageman@Sun.COM mutex_exit(&zone->zone_nlwps_lock);
3190Sstevel@tonic-gate }
3200Sstevel@tonic-gate
3210Sstevel@tonic-gate /*
3220Sstevel@tonic-gate * Find a process visible from the specified zone given its process ID.
3230Sstevel@tonic-gate */
3240Sstevel@tonic-gate proc_t *
prfind_zone(pid_t pid,zoneid_t zoneid)3250Sstevel@tonic-gate prfind_zone(pid_t pid, zoneid_t zoneid)
3260Sstevel@tonic-gate {
3270Sstevel@tonic-gate struct pid *pidp;
3280Sstevel@tonic-gate proc_t *p;
3290Sstevel@tonic-gate
3300Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pidlock));
3310Sstevel@tonic-gate
3320Sstevel@tonic-gate mutex_enter(&pidlinklock);
3330Sstevel@tonic-gate pidp = pid_lookup(pid);
3340Sstevel@tonic-gate mutex_exit(&pidlinklock);
3350Sstevel@tonic-gate if (pidp != NULL && pidp->pid_prinactive == 0) {
3360Sstevel@tonic-gate p = procdir[pidp->pid_prslot].pe_proc;
3370Sstevel@tonic-gate if (zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid)
3380Sstevel@tonic-gate return (p);
3390Sstevel@tonic-gate }
3400Sstevel@tonic-gate return (NULL);
3410Sstevel@tonic-gate }
3420Sstevel@tonic-gate
3430Sstevel@tonic-gate /*
3440Sstevel@tonic-gate * Find a process given its process ID. This obeys zone restrictions,
3450Sstevel@tonic-gate * so if the caller is in a non-global zone it won't find processes
3460Sstevel@tonic-gate * associated with other zones. Use prfind_zone(pid, ALL_ZONES) to
3470Sstevel@tonic-gate * bypass this restriction.
3480Sstevel@tonic-gate */
3490Sstevel@tonic-gate proc_t *
prfind(pid_t pid)3500Sstevel@tonic-gate prfind(pid_t pid)
3510Sstevel@tonic-gate {
3520Sstevel@tonic-gate zoneid_t zoneid;
3530Sstevel@tonic-gate
3540Sstevel@tonic-gate if (INGLOBALZONE(curproc))
3550Sstevel@tonic-gate zoneid = ALL_ZONES;
3560Sstevel@tonic-gate else
3570Sstevel@tonic-gate zoneid = getzoneid();
3580Sstevel@tonic-gate return (prfind_zone(pid, zoneid));
3590Sstevel@tonic-gate }
3600Sstevel@tonic-gate
3610Sstevel@tonic-gate proc_t *
pgfind_zone(pid_t pgid,zoneid_t zoneid)3620Sstevel@tonic-gate pgfind_zone(pid_t pgid, zoneid_t zoneid)
3630Sstevel@tonic-gate {
3640Sstevel@tonic-gate struct pid *pidp;
3650Sstevel@tonic-gate
3660Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pidlock));
3670Sstevel@tonic-gate
3680Sstevel@tonic-gate mutex_enter(&pidlinklock);
3690Sstevel@tonic-gate pidp = pid_lookup(pgid);
3700Sstevel@tonic-gate mutex_exit(&pidlinklock);
3710Sstevel@tonic-gate if (pidp != NULL) {
3720Sstevel@tonic-gate proc_t *p = pidp->pid_pglink;
3730Sstevel@tonic-gate
3740Sstevel@tonic-gate if (zoneid == ALL_ZONES || pgid == 0 || p == NULL ||
3750Sstevel@tonic-gate p->p_zone->zone_id == zoneid)
3760Sstevel@tonic-gate return (p);
3770Sstevel@tonic-gate }
3780Sstevel@tonic-gate return (NULL);
3790Sstevel@tonic-gate }
3800Sstevel@tonic-gate
3810Sstevel@tonic-gate /*
3820Sstevel@tonic-gate * return the head of the list of processes whose process group ID is 'pgid',
3830Sstevel@tonic-gate * or NULL, if no such process group
3840Sstevel@tonic-gate */
3850Sstevel@tonic-gate proc_t *
pgfind(pid_t pgid)3860Sstevel@tonic-gate pgfind(pid_t pgid)
3870Sstevel@tonic-gate {
3880Sstevel@tonic-gate zoneid_t zoneid;
3890Sstevel@tonic-gate
3900Sstevel@tonic-gate if (INGLOBALZONE(curproc))
3910Sstevel@tonic-gate zoneid = ALL_ZONES;
3920Sstevel@tonic-gate else
3930Sstevel@tonic-gate zoneid = getzoneid();
3940Sstevel@tonic-gate return (pgfind_zone(pgid, zoneid));
3950Sstevel@tonic-gate }
3960Sstevel@tonic-gate
3970Sstevel@tonic-gate /*
3983247Sgjelinek * Sets P_PR_LOCK on a non-system process. Process must be fully created
3993247Sgjelinek * and not exiting to succeed.
4003247Sgjelinek *
4013247Sgjelinek * Returns 0 on success.
4023247Sgjelinek * Returns 1 if P_PR_LOCK is set.
4033247Sgjelinek * Returns -1 if proc is in invalid state.
4043247Sgjelinek */
4053247Sgjelinek int
sprtrylock_proc(proc_t * p)4063247Sgjelinek sprtrylock_proc(proc_t *p)
4073247Sgjelinek {
4083247Sgjelinek ASSERT(MUTEX_HELD(&p->p_lock));
4093247Sgjelinek
4103247Sgjelinek /* skip system and incomplete processes */
4113247Sgjelinek if (p->p_stat == SIDL || p->p_stat == SZOMB ||
4123247Sgjelinek (p->p_flag & (SSYS | SEXITING | SEXITLWPS))) {
4133247Sgjelinek return (-1);
4143247Sgjelinek }
4153247Sgjelinek
4163247Sgjelinek if (p->p_proc_flag & P_PR_LOCK)
4173247Sgjelinek return (1);
4183247Sgjelinek
4193247Sgjelinek p->p_proc_flag |= P_PR_LOCK;
4203247Sgjelinek THREAD_KPRI_REQUEST();
4213247Sgjelinek
4223247Sgjelinek return (0);
4233247Sgjelinek }
4243247Sgjelinek
4253247Sgjelinek /*
4263247Sgjelinek * Wait for P_PR_LOCK to become clear. Returns with p_lock dropped,
4273247Sgjelinek * and the proc pointer no longer valid, as the proc may have exited.
4283247Sgjelinek */
4293247Sgjelinek void
sprwaitlock_proc(proc_t * p)4303247Sgjelinek sprwaitlock_proc(proc_t *p)
4313247Sgjelinek {
4323247Sgjelinek kmutex_t *mp;
4333247Sgjelinek
4343247Sgjelinek ASSERT(MUTEX_HELD(&p->p_lock));
4353247Sgjelinek ASSERT(p->p_proc_flag & P_PR_LOCK);
4363247Sgjelinek
4373247Sgjelinek /*
4383247Sgjelinek * p_lock is persistent, but p itself is not -- it could
4393247Sgjelinek * vanish during cv_wait(). Load p->p_lock now so we can
4403247Sgjelinek * drop it after cv_wait() without referencing p.
4413247Sgjelinek */
4423247Sgjelinek mp = &p->p_lock;
4433247Sgjelinek cv_wait(&pr_pid_cv[p->p_slot], mp);
4443247Sgjelinek mutex_exit(mp);
4453247Sgjelinek }
4463247Sgjelinek
4473247Sgjelinek /*
4480Sstevel@tonic-gate * If pid exists, find its proc, acquire its p_lock and mark it P_PR_LOCK.
4490Sstevel@tonic-gate * Returns the proc pointer on success, NULL on failure. sprlock() is
4500Sstevel@tonic-gate * really just a stripped-down version of pr_p_lock() to allow practive
4510Sstevel@tonic-gate * walkers like dofusers() and dumpsys() to synchronize with /proc.
4520Sstevel@tonic-gate */
4530Sstevel@tonic-gate proc_t *
sprlock_zone(pid_t pid,zoneid_t zoneid)4540Sstevel@tonic-gate sprlock_zone(pid_t pid, zoneid_t zoneid)
4550Sstevel@tonic-gate {
4560Sstevel@tonic-gate proc_t *p;
4573247Sgjelinek int ret;
4580Sstevel@tonic-gate
4590Sstevel@tonic-gate for (;;) {
4600Sstevel@tonic-gate mutex_enter(&pidlock);
4610Sstevel@tonic-gate if ((p = prfind_zone(pid, zoneid)) == NULL) {
4620Sstevel@tonic-gate mutex_exit(&pidlock);
4630Sstevel@tonic-gate return (NULL);
4640Sstevel@tonic-gate }
4653247Sgjelinek mutex_enter(&p->p_lock);
4660Sstevel@tonic-gate mutex_exit(&pidlock);
4673247Sgjelinek
4680Sstevel@tonic-gate if (panicstr)
4690Sstevel@tonic-gate return (p);
4703247Sgjelinek
4713247Sgjelinek ret = sprtrylock_proc(p);
4723247Sgjelinek if (ret == -1) {
4733247Sgjelinek mutex_exit(&p->p_lock);
4743247Sgjelinek return (NULL);
4753247Sgjelinek } else if (ret == 0) {
4760Sstevel@tonic-gate break;
4773247Sgjelinek }
4783247Sgjelinek sprwaitlock_proc(p);
4790Sstevel@tonic-gate }
4800Sstevel@tonic-gate return (p);
4810Sstevel@tonic-gate }
4820Sstevel@tonic-gate
4830Sstevel@tonic-gate proc_t *
sprlock(pid_t pid)4840Sstevel@tonic-gate sprlock(pid_t pid)
4850Sstevel@tonic-gate {
4860Sstevel@tonic-gate zoneid_t zoneid;
4870Sstevel@tonic-gate
4880Sstevel@tonic-gate if (INGLOBALZONE(curproc))
4890Sstevel@tonic-gate zoneid = ALL_ZONES;
4900Sstevel@tonic-gate else
4910Sstevel@tonic-gate zoneid = getzoneid();
4920Sstevel@tonic-gate return (sprlock_zone(pid, zoneid));
4930Sstevel@tonic-gate }
4940Sstevel@tonic-gate
4950Sstevel@tonic-gate void
sprlock_proc(proc_t * p)4960Sstevel@tonic-gate sprlock_proc(proc_t *p)
4970Sstevel@tonic-gate {
4980Sstevel@tonic-gate ASSERT(MUTEX_HELD(&p->p_lock));
4990Sstevel@tonic-gate
5000Sstevel@tonic-gate while (p->p_proc_flag & P_PR_LOCK) {
5010Sstevel@tonic-gate cv_wait(&pr_pid_cv[p->p_slot], &p->p_lock);
5020Sstevel@tonic-gate }
5030Sstevel@tonic-gate
5040Sstevel@tonic-gate p->p_proc_flag |= P_PR_LOCK;
5050Sstevel@tonic-gate THREAD_KPRI_REQUEST();
5060Sstevel@tonic-gate }
5070Sstevel@tonic-gate
5080Sstevel@tonic-gate void
sprunlock(proc_t * p)5090Sstevel@tonic-gate sprunlock(proc_t *p)
5100Sstevel@tonic-gate {
5110Sstevel@tonic-gate if (panicstr) {
5120Sstevel@tonic-gate mutex_exit(&p->p_lock);
5130Sstevel@tonic-gate return;
5140Sstevel@tonic-gate }
5150Sstevel@tonic-gate
5160Sstevel@tonic-gate ASSERT(p->p_proc_flag & P_PR_LOCK);
5170Sstevel@tonic-gate ASSERT(MUTEX_HELD(&p->p_lock));
5180Sstevel@tonic-gate
5190Sstevel@tonic-gate cv_signal(&pr_pid_cv[p->p_slot]);
5200Sstevel@tonic-gate p->p_proc_flag &= ~P_PR_LOCK;
5210Sstevel@tonic-gate mutex_exit(&p->p_lock);
5220Sstevel@tonic-gate THREAD_KPRI_RELEASE();
5230Sstevel@tonic-gate }
5240Sstevel@tonic-gate
5250Sstevel@tonic-gate void
pid_init(void)5260Sstevel@tonic-gate pid_init(void)
5270Sstevel@tonic-gate {
5280Sstevel@tonic-gate int i;
5290Sstevel@tonic-gate
5300Sstevel@tonic-gate pid_hashsz = 1 << highbit(v.v_proc / pid_hashlen);
5310Sstevel@tonic-gate
5320Sstevel@tonic-gate pidhash = kmem_zalloc(sizeof (struct pid *) * pid_hashsz, KM_SLEEP);
5330Sstevel@tonic-gate procdir = kmem_alloc(sizeof (union procent) * v.v_proc, KM_SLEEP);
5340Sstevel@tonic-gate pr_pid_cv = kmem_zalloc(sizeof (kcondvar_t) * v.v_proc, KM_SLEEP);
5350Sstevel@tonic-gate proc_lock = kmem_zalloc(sizeof (struct plock) * v.v_proc, KM_SLEEP);
5360Sstevel@tonic-gate
5370Sstevel@tonic-gate nproc = 1;
5380Sstevel@tonic-gate practive = proc_sched;
5390Sstevel@tonic-gate proc_sched->p_next = NULL;
5400Sstevel@tonic-gate procdir[0].pe_proc = proc_sched;
5410Sstevel@tonic-gate
5420Sstevel@tonic-gate procentfree = &procdir[1];
5430Sstevel@tonic-gate for (i = 1; i < v.v_proc - 1; i++)
5440Sstevel@tonic-gate procdir[i].pe_next = &procdir[i+1];
5450Sstevel@tonic-gate procdir[i].pe_next = NULL;
5460Sstevel@tonic-gate
5470Sstevel@tonic-gate HASHPID(0) = &pid0;
5480Sstevel@tonic-gate
5490Sstevel@tonic-gate upcount_init();
5500Sstevel@tonic-gate }
5510Sstevel@tonic-gate
5520Sstevel@tonic-gate proc_t *
pid_entry(int slot)5530Sstevel@tonic-gate pid_entry(int slot)
5540Sstevel@tonic-gate {
5550Sstevel@tonic-gate union procent *pep;
5560Sstevel@tonic-gate proc_t *prp;
5570Sstevel@tonic-gate
5580Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pidlock));
5590Sstevel@tonic-gate ASSERT(slot >= 0 && slot < v.v_proc);
5600Sstevel@tonic-gate
5610Sstevel@tonic-gate pep = procdir[slot].pe_next;
5620Sstevel@tonic-gate if (pep >= procdir && pep < &procdir[v.v_proc])
5630Sstevel@tonic-gate return (NULL);
5640Sstevel@tonic-gate prp = procdir[slot].pe_proc;
5650Sstevel@tonic-gate if (prp != 0 && prp->p_stat == SIDL)
5660Sstevel@tonic-gate return (NULL);
5670Sstevel@tonic-gate return (prp);
5680Sstevel@tonic-gate }
5690Sstevel@tonic-gate
5700Sstevel@tonic-gate /*
5710Sstevel@tonic-gate * Send the specified signal to all processes whose process group ID is
5720Sstevel@tonic-gate * equal to 'pgid'
5730Sstevel@tonic-gate */
5740Sstevel@tonic-gate
5750Sstevel@tonic-gate void
signal(pid_t pgid,int sig)5760Sstevel@tonic-gate signal(pid_t pgid, int sig)
5770Sstevel@tonic-gate {
5780Sstevel@tonic-gate struct pid *pidp;
5790Sstevel@tonic-gate proc_t *prp;
5800Sstevel@tonic-gate
5810Sstevel@tonic-gate mutex_enter(&pidlock);
5820Sstevel@tonic-gate mutex_enter(&pidlinklock);
5830Sstevel@tonic-gate if (pgid == 0 || (pidp = pid_lookup(pgid)) == NULL) {
5840Sstevel@tonic-gate mutex_exit(&pidlinklock);
5850Sstevel@tonic-gate mutex_exit(&pidlock);
5860Sstevel@tonic-gate return;
5870Sstevel@tonic-gate }
5880Sstevel@tonic-gate mutex_exit(&pidlinklock);
5890Sstevel@tonic-gate for (prp = pidp->pid_pglink; prp; prp = prp->p_pglink) {
5900Sstevel@tonic-gate mutex_enter(&prp->p_lock);
5910Sstevel@tonic-gate sigtoproc(prp, NULL, sig);
5920Sstevel@tonic-gate mutex_exit(&prp->p_lock);
5930Sstevel@tonic-gate }
5940Sstevel@tonic-gate mutex_exit(&pidlock);
5950Sstevel@tonic-gate }
5960Sstevel@tonic-gate
5970Sstevel@tonic-gate /*
5980Sstevel@tonic-gate * Send the specified signal to the specified process
5990Sstevel@tonic-gate */
6000Sstevel@tonic-gate
6010Sstevel@tonic-gate void
prsignal(struct pid * pidp,int sig)6020Sstevel@tonic-gate prsignal(struct pid *pidp, int sig)
6030Sstevel@tonic-gate {
6040Sstevel@tonic-gate if (!(pidp->pid_prinactive))
6050Sstevel@tonic-gate psignal(procdir[pidp->pid_prslot].pe_proc, sig);
6060Sstevel@tonic-gate }
6070Sstevel@tonic-gate
6080Sstevel@tonic-gate #include <sys/sunddi.h>
6090Sstevel@tonic-gate
6100Sstevel@tonic-gate /*
6110Sstevel@tonic-gate * DDI/DKI interfaces for drivers to send signals to processes
6120Sstevel@tonic-gate */
6130Sstevel@tonic-gate
6140Sstevel@tonic-gate /*
6150Sstevel@tonic-gate * obtain an opaque reference to a process for signaling
6160Sstevel@tonic-gate */
6170Sstevel@tonic-gate void *
proc_ref(void)6180Sstevel@tonic-gate proc_ref(void)
6190Sstevel@tonic-gate {
6200Sstevel@tonic-gate struct pid *pidp;
6210Sstevel@tonic-gate
6220Sstevel@tonic-gate mutex_enter(&pidlock);
6230Sstevel@tonic-gate pidp = curproc->p_pidp;
6240Sstevel@tonic-gate PID_HOLD(pidp);
6250Sstevel@tonic-gate mutex_exit(&pidlock);
6260Sstevel@tonic-gate
6270Sstevel@tonic-gate return (pidp);
6280Sstevel@tonic-gate }
6290Sstevel@tonic-gate
6300Sstevel@tonic-gate /*
6310Sstevel@tonic-gate * release a reference to a process
6320Sstevel@tonic-gate * - a process can exit even if a driver has a reference to it
6330Sstevel@tonic-gate * - one proc_unref for every proc_ref
6340Sstevel@tonic-gate */
6350Sstevel@tonic-gate void
proc_unref(void * pref)6360Sstevel@tonic-gate proc_unref(void *pref)
6370Sstevel@tonic-gate {
6380Sstevel@tonic-gate mutex_enter(&pidlock);
6390Sstevel@tonic-gate PID_RELE((struct pid *)pref);
6400Sstevel@tonic-gate mutex_exit(&pidlock);
6410Sstevel@tonic-gate }
6420Sstevel@tonic-gate
6430Sstevel@tonic-gate /*
6440Sstevel@tonic-gate * send a signal to a process
6450Sstevel@tonic-gate *
6460Sstevel@tonic-gate * - send the process the signal
6470Sstevel@tonic-gate * - if the process went away, return a -1
6480Sstevel@tonic-gate * - if the process is still there return 0
6490Sstevel@tonic-gate */
6500Sstevel@tonic-gate int
proc_signal(void * pref,int sig)6510Sstevel@tonic-gate proc_signal(void *pref, int sig)
6520Sstevel@tonic-gate {
6530Sstevel@tonic-gate struct pid *pidp = pref;
6540Sstevel@tonic-gate
6550Sstevel@tonic-gate prsignal(pidp, sig);
6560Sstevel@tonic-gate return (pidp->pid_prinactive ? -1 : 0);
6570Sstevel@tonic-gate }
6580Sstevel@tonic-gate
6590Sstevel@tonic-gate
6600Sstevel@tonic-gate static struct upcount **upc_hash; /* a boot time allocated array */
6610Sstevel@tonic-gate static ulong_t upc_hashmask;
6620Sstevel@tonic-gate #define UPC_HASH(x, y) ((ulong_t)(x ^ y) & upc_hashmask)
6630Sstevel@tonic-gate
6640Sstevel@tonic-gate /*
6650Sstevel@tonic-gate * Get us off the ground. Called once at boot.
6660Sstevel@tonic-gate */
6670Sstevel@tonic-gate void
upcount_init(void)6680Sstevel@tonic-gate upcount_init(void)
6690Sstevel@tonic-gate {
6700Sstevel@tonic-gate ulong_t upc_hashsize;
6710Sstevel@tonic-gate
6720Sstevel@tonic-gate /*
6730Sstevel@tonic-gate * An entry per MB of memory is our current guess
6740Sstevel@tonic-gate */
6750Sstevel@tonic-gate /*
6760Sstevel@tonic-gate * 2^20 is a meg, so shifting right by 20 - PAGESHIFT
6770Sstevel@tonic-gate * converts pages to megs (without overflowing a u_int
6780Sstevel@tonic-gate * if you have more than 4G of memory, like ptob(physmem)/1M
6790Sstevel@tonic-gate * would).
6800Sstevel@tonic-gate */
6810Sstevel@tonic-gate upc_hashsize = (1 << highbit(physmem >> (20 - PAGESHIFT)));
6820Sstevel@tonic-gate upc_hashmask = upc_hashsize - 1;
6830Sstevel@tonic-gate upc_hash = kmem_zalloc(upc_hashsize * sizeof (struct upcount *),
6840Sstevel@tonic-gate KM_SLEEP);
6850Sstevel@tonic-gate }
6860Sstevel@tonic-gate
6870Sstevel@tonic-gate /*
6880Sstevel@tonic-gate * Increment the number of processes associated with a given uid and zoneid.
6890Sstevel@tonic-gate */
6900Sstevel@tonic-gate void
upcount_inc(uid_t uid,zoneid_t zoneid)6910Sstevel@tonic-gate upcount_inc(uid_t uid, zoneid_t zoneid)
6920Sstevel@tonic-gate {
6930Sstevel@tonic-gate struct upcount **upc, **hupc;
6940Sstevel@tonic-gate struct upcount *new;
6950Sstevel@tonic-gate
6960Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pidlock));
6970Sstevel@tonic-gate new = NULL;
6980Sstevel@tonic-gate hupc = &upc_hash[UPC_HASH(uid, zoneid)];
6990Sstevel@tonic-gate top:
7000Sstevel@tonic-gate upc = hupc;
7010Sstevel@tonic-gate while ((*upc) != NULL) {
7020Sstevel@tonic-gate if ((*upc)->up_uid == uid && (*upc)->up_zoneid == zoneid) {
7030Sstevel@tonic-gate (*upc)->up_count++;
7040Sstevel@tonic-gate if (new) {
7050Sstevel@tonic-gate /*
7060Sstevel@tonic-gate * did not need `new' afterall.
7070Sstevel@tonic-gate */
7080Sstevel@tonic-gate kmem_free(new, sizeof (*new));
7090Sstevel@tonic-gate }
7100Sstevel@tonic-gate return;
7110Sstevel@tonic-gate }
7120Sstevel@tonic-gate upc = &(*upc)->up_next;
7130Sstevel@tonic-gate }
7140Sstevel@tonic-gate
7150Sstevel@tonic-gate /*
7160Sstevel@tonic-gate * There is no entry for this <uid,zoneid> pair.
7170Sstevel@tonic-gate * Allocate one. If we have to drop pidlock, check
7180Sstevel@tonic-gate * again.
7190Sstevel@tonic-gate */
7200Sstevel@tonic-gate if (new == NULL) {
7210Sstevel@tonic-gate new = (struct upcount *)kmem_alloc(sizeof (*new), KM_NOSLEEP);
7220Sstevel@tonic-gate if (new == NULL) {
7230Sstevel@tonic-gate mutex_exit(&pidlock);
7240Sstevel@tonic-gate new = (struct upcount *)kmem_alloc(sizeof (*new),
7250Sstevel@tonic-gate KM_SLEEP);
7260Sstevel@tonic-gate mutex_enter(&pidlock);
7270Sstevel@tonic-gate goto top;
7280Sstevel@tonic-gate }
7290Sstevel@tonic-gate }
7300Sstevel@tonic-gate
7310Sstevel@tonic-gate
7320Sstevel@tonic-gate /*
7330Sstevel@tonic-gate * On the assumption that a new user is going to do some
7340Sstevel@tonic-gate * more forks, put the new upcount structure on the front.
7350Sstevel@tonic-gate */
7360Sstevel@tonic-gate upc = hupc;
7370Sstevel@tonic-gate
7380Sstevel@tonic-gate new->up_uid = uid;
7390Sstevel@tonic-gate new->up_zoneid = zoneid;
7400Sstevel@tonic-gate new->up_count = 1;
7410Sstevel@tonic-gate new->up_next = *upc;
7420Sstevel@tonic-gate
7430Sstevel@tonic-gate *upc = new;
7440Sstevel@tonic-gate }
7450Sstevel@tonic-gate
7460Sstevel@tonic-gate /*
7470Sstevel@tonic-gate * Decrement the number of processes a given uid and zoneid has.
7480Sstevel@tonic-gate */
7490Sstevel@tonic-gate void
upcount_dec(uid_t uid,zoneid_t zoneid)7500Sstevel@tonic-gate upcount_dec(uid_t uid, zoneid_t zoneid)
7510Sstevel@tonic-gate {
7520Sstevel@tonic-gate struct upcount **upc;
7530Sstevel@tonic-gate struct upcount *done;
7540Sstevel@tonic-gate
7550Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pidlock));
7560Sstevel@tonic-gate
7570Sstevel@tonic-gate upc = &upc_hash[UPC_HASH(uid, zoneid)];
7580Sstevel@tonic-gate while ((*upc) != NULL) {
7590Sstevel@tonic-gate if ((*upc)->up_uid == uid && (*upc)->up_zoneid == zoneid) {
7600Sstevel@tonic-gate (*upc)->up_count--;
7610Sstevel@tonic-gate if ((*upc)->up_count == 0) {
7620Sstevel@tonic-gate done = *upc;
7630Sstevel@tonic-gate *upc = (*upc)->up_next;
7640Sstevel@tonic-gate kmem_free(done, sizeof (*done));
7650Sstevel@tonic-gate }
7660Sstevel@tonic-gate return;
7670Sstevel@tonic-gate }
7680Sstevel@tonic-gate upc = &(*upc)->up_next;
7690Sstevel@tonic-gate }
7700Sstevel@tonic-gate cmn_err(CE_PANIC, "decr_upcount-off the end");
7710Sstevel@tonic-gate }
7720Sstevel@tonic-gate
7730Sstevel@tonic-gate /*
7740Sstevel@tonic-gate * Returns the number of processes a uid has.
7750Sstevel@tonic-gate * Non-existent uid's are assumed to have no processes.
7760Sstevel@tonic-gate */
7770Sstevel@tonic-gate int
upcount_get(uid_t uid,zoneid_t zoneid)7780Sstevel@tonic-gate upcount_get(uid_t uid, zoneid_t zoneid)
7790Sstevel@tonic-gate {
7800Sstevel@tonic-gate struct upcount *upc;
7810Sstevel@tonic-gate
7820Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pidlock));
7830Sstevel@tonic-gate
7840Sstevel@tonic-gate upc = upc_hash[UPC_HASH(uid, zoneid)];
7850Sstevel@tonic-gate while (upc != NULL) {
7860Sstevel@tonic-gate if (upc->up_uid == uid && upc->up_zoneid == zoneid) {
7870Sstevel@tonic-gate return (upc->up_count);
7880Sstevel@tonic-gate }
7890Sstevel@tonic-gate upc = upc->up_next;
7900Sstevel@tonic-gate }
7910Sstevel@tonic-gate return (0);
7920Sstevel@tonic-gate }
793