xref: /netbsd-src/sys/kern/kern_proc.c (revision d20841bb642898112fe68f0ad3f7b26dddf56f07)
1 /*	$NetBSD: kern_proc.c,v 1.71 2004/02/06 06:59:33 pk Exp $	*/
2 
3 /*-
4  * Copyright (c) 1999 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9  * NASA Ames Research Center.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the NetBSD
22  *	Foundation, Inc. and its contributors.
23  * 4. Neither the name of The NetBSD Foundation nor the names of its
24  *    contributors may be used to endorse or promote products derived
25  *    from this software without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37  * POSSIBILITY OF SUCH DAMAGE.
38  */
39 
40 /*
41  * Copyright (c) 1982, 1986, 1989, 1991, 1993
42  *	The Regents of the University of California.  All rights reserved.
43  *
44  * Redistribution and use in source and binary forms, with or without
45  * modification, are permitted provided that the following conditions
46  * are met:
47  * 1. Redistributions of source code must retain the above copyright
48  *    notice, this list of conditions and the following disclaimer.
49  * 2. Redistributions in binary form must reproduce the above copyright
50  *    notice, this list of conditions and the following disclaimer in the
51  *    documentation and/or other materials provided with the distribution.
52  * 3. Neither the name of the University nor the names of its contributors
53  *    may be used to endorse or promote products derived from this software
54  *    without specific prior written permission.
55  *
56  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
57  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
58  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
59  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
60  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
61  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
62  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
64  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
65  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
66  * SUCH DAMAGE.
67  *
68  *	@(#)kern_proc.c	8.7 (Berkeley) 2/14/95
69  */
70 
71 #include <sys/cdefs.h>
72 __KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.71 2004/02/06 06:59:33 pk Exp $");
73 
74 #include "opt_kstack.h"
75 
76 #include <sys/param.h>
77 #include <sys/systm.h>
78 #include <sys/kernel.h>
79 #include <sys/proc.h>
80 #include <sys/resourcevar.h>
81 #include <sys/buf.h>
82 #include <sys/acct.h>
83 #include <sys/wait.h>
84 #include <sys/file.h>
85 #include <ufs/ufs/quota.h>
86 #include <sys/uio.h>
87 #include <sys/malloc.h>
88 #include <sys/pool.h>
89 #include <sys/mbuf.h>
90 #include <sys/ioctl.h>
91 #include <sys/tty.h>
92 #include <sys/signalvar.h>
93 #include <sys/ras.h>
94 #include <sys/sa.h>
95 #include <sys/savar.h>
96 
97 static void pg_delete(pid_t);
98 
99 /*
100  * Structure associated with user cacheing.
101  */
102 struct uidinfo {
103 	LIST_ENTRY(uidinfo) ui_hash;
104 	uid_t	ui_uid;
105 	long	ui_proccnt;
106 };
107 #define	UIHASH(uid)	(&uihashtbl[(uid) & uihash])
108 LIST_HEAD(uihashhead, uidinfo) *uihashtbl;
109 u_long uihash;		/* size of hash table - 1 */
110 
111 /*
112  * Other process lists
113  */
114 
115 struct proclist allproc;
116 struct proclist zombproc;	/* resources have been freed */
117 
118 
119 /*
120  * Process list locking:
121  *
122  * We have two types of locks on the proclists: read locks and write
123  * locks.  Read locks can be used in interrupt context, so while we
124  * hold the write lock, we must also block clock interrupts to
125  * lock out any scheduling changes that may happen in interrupt
126  * context.
127  *
128  * The proclist lock locks the following structures:
129  *
130  *	allproc
131  *	zombproc
132  *	pid_table
133  */
134 struct lock proclist_lock;
135 
136 /*
137  * pid to proc lookup is done by indexing the pid_table array.
138  * Since pid numbers are only allocated when an empty slot
139  * has been found, there is no need to search any lists ever.
140  * (an orphaned pgrp will lock the slot, a session will lock
141  * the pgrp with the same number.)
142  * If the table is too small it is reallocated with twice the
143  * previous size and the entries 'unzipped' into the two halves.
144  * A linked list of free entries is passed through the pt_proc
145  * field of 'free' items - set odd to be an invalid ptr.
146  */
147 
148 struct pid_table {
149 	struct proc	*pt_proc;
150 	struct pgrp	*pt_pgrp;
151 };
152 #if 1	/* strongly typed cast - should be a noop */
153 static __inline uint p2u(struct proc *p) { return (uint)(uintptr_t)p; };
154 #else
155 #define p2u(p) ((uint)p)
156 #endif
157 #define P_VALID(p) (!(p2u(p) & 1))
158 #define P_NEXT(p) (p2u(p) >> 1)
159 #define P_FREE(pid) ((struct proc *)(uintptr_t)((pid) << 1 | 1))
160 
161 #define INITIAL_PID_TABLE_SIZE	(1 << 5)
162 static struct pid_table *pid_table;
163 static uint pid_tbl_mask = INITIAL_PID_TABLE_SIZE - 1;
164 static uint pid_alloc_lim;	/* max we allocate before growing table */
165 static uint pid_alloc_cnt;	/* number of allocated pids */
166 
167 /* links through free slots - never empty! */
168 static uint next_free_pt, last_free_pt;
169 static pid_t pid_max = PID_MAX;		/* largest value we allocate */
170 
171 struct pool proc_pool;
172 struct pool lwp_pool;
173 struct pool lwp_uc_pool;
174 struct pool pcred_pool;
175 struct pool plimit_pool;
176 struct pool pstats_pool;
177 struct pool pgrp_pool;
178 struct pool rusage_pool;
179 struct pool ras_pool;
180 struct pool sadata_pool;
181 struct pool saupcall_pool;
182 struct pool sastack_pool;
183 struct pool ptimer_pool;
184 
185 MALLOC_DEFINE(M_EMULDATA, "emuldata", "Per-process emulation data");
186 MALLOC_DEFINE(M_PROC, "proc", "Proc structures");
187 MALLOC_DEFINE(M_SESSION, "session", "session header");
188 MALLOC_DEFINE(M_SUBPROC, "subproc", "Proc sub-structures");
189 
190 /*
191  * The process list descriptors, used during pid allocation and
192  * by sysctl.  No locking on this data structure is needed since
193  * it is completely static.
194  */
195 const struct proclist_desc proclists[] = {
196 	{ &allproc	},
197 	{ &zombproc	},
198 	{ NULL		},
199 };
200 
201 static void orphanpg __P((struct pgrp *));
202 #ifdef DEBUG
203 void pgrpdump __P((void));
204 #endif
205 
206 /*
207  * Initialize global process hashing structures.
208  */
209 void
210 procinit(void)
211 {
212 	const struct proclist_desc *pd;
213 	int i;
214 #define	LINK_EMPTY ((PID_MAX + INITIAL_PID_TABLE_SIZE) & ~(INITIAL_PID_TABLE_SIZE - 1))
215 
216 	for (pd = proclists; pd->pd_list != NULL; pd++)
217 		LIST_INIT(pd->pd_list);
218 
219 	spinlockinit(&proclist_lock, "proclk", 0);
220 
221 	pid_table = malloc(INITIAL_PID_TABLE_SIZE * sizeof *pid_table,
222 			    M_PROC, M_WAITOK);
223 	/* Set free list running through table...
224 	   Preset 'use count' above PID_MAX so we allocate pid 1 next. */
225 	for (i = 0; i <= pid_tbl_mask; i++) {
226 		pid_table[i].pt_proc = P_FREE(LINK_EMPTY + i + 1);
227 		pid_table[i].pt_pgrp = 0;
228 	}
229 	/* slot 0 is just grabbed */
230 	next_free_pt = 1;
231 	/* Need to fix last entry. */
232 	last_free_pt = pid_tbl_mask;
233 	pid_table[last_free_pt].pt_proc = P_FREE(LINK_EMPTY);
234 	/* point at which we grow table - to avoid reusing pids too often */
235 	pid_alloc_lim = pid_tbl_mask - 1;
236 #undef LINK_EMPTY
237 
238 	LIST_INIT(&alllwp);
239 
240 	uihashtbl =
241 	    hashinit(maxproc / 16, HASH_LIST, M_PROC, M_WAITOK, &uihash);
242 
243 	pool_init(&proc_pool, sizeof(struct proc), 0, 0, 0, "procpl",
244 	    &pool_allocator_nointr);
245 	pool_init(&lwp_pool, sizeof(struct lwp), 0, 0, 0, "lwppl",
246 	    &pool_allocator_nointr);
247 	pool_init(&lwp_uc_pool, sizeof(ucontext_t), 0, 0, 0, "lwpucpl",
248 	    &pool_allocator_nointr);
249 	pool_init(&pgrp_pool, sizeof(struct pgrp), 0, 0, 0, "pgrppl",
250 	    &pool_allocator_nointr);
251 	pool_init(&pcred_pool, sizeof(struct pcred), 0, 0, 0, "pcredpl",
252 	    &pool_allocator_nointr);
253 	pool_init(&plimit_pool, sizeof(struct plimit), 0, 0, 0, "plimitpl",
254 	    &pool_allocator_nointr);
255 	pool_init(&pstats_pool, sizeof(struct pstats), 0, 0, 0, "pstatspl",
256 	    &pool_allocator_nointr);
257 	pool_init(&rusage_pool, sizeof(struct rusage), 0, 0, 0, "rusgepl",
258 	    &pool_allocator_nointr);
259 	pool_init(&ras_pool, sizeof(struct ras), 0, 0, 0, "raspl",
260 	    &pool_allocator_nointr);
261 	pool_init(&sadata_pool, sizeof(struct sadata), 0, 0, 0, "sadatapl",
262 	    &pool_allocator_nointr);
263 	pool_init(&saupcall_pool, sizeof(struct sadata_upcall), 0, 0, 0,
264 	    "saupcpl", &pool_allocator_nointr);
265 	pool_init(&sastack_pool, sizeof(struct sastack), 0, 0, 0, "sastackpl",
266 	    &pool_allocator_nointr);
267 	pool_init(&ptimer_pool, sizeof(struct ptimer), 0, 0, 0, "ptimerpl",
268 	    &pool_allocator_nointr);
269 }
270 
271 /*
272  * Acquire a read lock on the proclist.
273  */
274 void
275 proclist_lock_read(void)
276 {
277 	int error;
278 
279 	error = spinlockmgr(&proclist_lock, LK_SHARED, NULL);
280 #ifdef DIAGNOSTIC
281 	if (__predict_false(error != 0))
282 		panic("proclist_lock_read: failed to acquire lock");
283 #endif
284 }
285 
286 /*
287  * Release a read lock on the proclist.
288  */
289 void
290 proclist_unlock_read(void)
291 {
292 
293 	(void) spinlockmgr(&proclist_lock, LK_RELEASE, NULL);
294 }
295 
296 /*
297  * Acquire a write lock on the proclist.
298  */
299 int
300 proclist_lock_write(void)
301 {
302 	int s, error;
303 
304 	s = splclock();
305 	error = spinlockmgr(&proclist_lock, LK_EXCLUSIVE, NULL);
306 #ifdef DIAGNOSTIC
307 	if (__predict_false(error != 0))
308 		panic("proclist_lock: failed to acquire lock");
309 #endif
310 	return (s);
311 }
312 
313 /*
314  * Release a write lock on the proclist.
315  */
316 void
317 proclist_unlock_write(int s)
318 {
319 
320 	(void) spinlockmgr(&proclist_lock, LK_RELEASE, NULL);
321 	splx(s);
322 }
323 
324 /*
325  * Change the count associated with number of processes
326  * a given user is using.
327  */
328 int
329 chgproccnt(uid_t uid, int diff)
330 {
331 	struct uidinfo *uip;
332 	struct uihashhead *uipp;
333 
334 	uipp = UIHASH(uid);
335 
336 	LIST_FOREACH(uip, uipp, ui_hash)
337 		if (uip->ui_uid == uid)
338 			break;
339 
340 	if (uip) {
341 		uip->ui_proccnt += diff;
342 		if (uip->ui_proccnt > 0)
343 			return (uip->ui_proccnt);
344 		if (uip->ui_proccnt < 0)
345 			panic("chgproccnt: procs < 0");
346 		LIST_REMOVE(uip, ui_hash);
347 		FREE(uip, M_PROC);
348 		return (0);
349 	}
350 	if (diff <= 0) {
351 		if (diff == 0)
352 			return(0);
353 		panic("chgproccnt: lost user");
354 	}
355 	MALLOC(uip, struct uidinfo *, sizeof(*uip), M_PROC, M_WAITOK);
356 	LIST_INSERT_HEAD(uipp, uip, ui_hash);
357 	uip->ui_uid = uid;
358 	uip->ui_proccnt = diff;
359 	return (diff);
360 }
361 
362 /*
363  * Check that the specifies process group in in the session of the
364  * specified process.
365  * Treats -ve ids as process ids.
366  * Used to validate TIOCSPGRP requests.
367  */
368 int
369 pgid_in_session(struct proc *p, pid_t pg_id)
370 {
371 	struct pgrp *pgrp;
372 
373 	if (pg_id < 0) {
374 		struct proc *p1 = pfind(-pg_id);
375 		if (p1 == NULL)
376 			return EINVAL;
377 		pgrp = p1->p_pgrp;
378 	} else {
379 		pgrp = pgfind(pg_id);
380 		if (pgrp == NULL)
381 			return EINVAL;
382 	}
383 	if (pgrp->pg_session != p->p_pgrp->pg_session)
384 		return EPERM;
385 	return 0;
386 }
387 
388 /*
389  * Is p an inferior of q?
390  */
391 int
392 inferior(struct proc *p, struct proc *q)
393 {
394 
395 	for (; p != q; p = p->p_pptr)
396 		if (p->p_pid == 0)
397 			return (0);
398 	return (1);
399 }
400 
401 /*
402  * Locate a process by number
403  */
404 struct proc *
405 p_find(pid_t pid, uint flags)
406 {
407 	struct proc *p;
408 	char stat;
409 
410 	if (!(flags & PFIND_LOCKED))
411 		proclist_lock_read();
412 	p = pid_table[pid & pid_tbl_mask].pt_proc;
413 	/* Only allow live processes to be found by pid. */
414 	if (P_VALID(p) && p->p_pid == pid &&
415 	    ((stat = p->p_stat) == SACTIVE || stat == SSTOP
416 		    || (stat == SZOMB && (flags & PFIND_ZOMBIE)))) {
417 		if (flags & PFIND_UNLOCK_OK)
418 			 proclist_unlock_read();
419 		return p;
420 	}
421 	if (flags & PFIND_UNLOCK_FAIL)
422 		 proclist_unlock_read();
423 	return NULL;
424 }
425 
426 
427 /*
428  * Locate a process group by number
429  */
430 struct pgrp *
431 pg_find(pid_t pgid, uint flags)
432 {
433 	struct pgrp *pg;
434 
435 	if (!(flags & PFIND_LOCKED))
436 		proclist_lock_read();
437 	pg = pid_table[pgid & pid_tbl_mask].pt_pgrp;
438 	/*
439 	 * Can't look up a pgrp that only exists because the session
440 	 * hasn't died yet (traditional)
441 	 */
442 	if (pg == NULL || pg->pg_id != pgid || LIST_EMPTY(&pg->pg_members)) {
443 		if (flags & PFIND_UNLOCK_FAIL)
444 			 proclist_unlock_read();
445 		return NULL;
446 	}
447 
448 	if (flags & PFIND_UNLOCK_OK)
449 		proclist_unlock_read();
450 	return pg;
451 }
452 
453 /*
454  * Set entry for process 0
455  */
456 void
457 proc0_insert(struct proc *p, struct lwp *l, struct pgrp *pgrp,
458 	struct session *sess)
459 {
460 	int s;
461 
462 	simple_lock_init(&p->p_lock);
463 	LIST_INIT(&p->p_lwps);
464 	LIST_INSERT_HEAD(&p->p_lwps, l, l_sibling);
465 	p->p_nlwps = 1;
466 	simple_lock_init(&p->p_sigctx.ps_silock);
467 	CIRCLEQ_INIT(&p->p_sigctx.ps_siginfo);
468 
469 	s = proclist_lock_write();
470 
471 	pid_table[0].pt_proc = p;
472 	LIST_INSERT_HEAD(&allproc, p, p_list);
473 	LIST_INSERT_HEAD(&alllwp, l, l_list);
474 
475 	p->p_pgrp = pgrp;
476 	pid_table[0].pt_pgrp = pgrp;
477 	LIST_INIT(&pgrp->pg_members);
478 	LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist);
479 
480 	pgrp->pg_session = sess;
481 	sess->s_count = 1;
482 	sess->s_sid = 0;
483 	sess->s_leader = p;
484 
485 	proclist_unlock_write(s);
486 }
487 
488 static void
489 expand_pid_table(void)
490 {
491 	uint pt_size = pid_tbl_mask + 1;
492 	struct pid_table *n_pt, *new_pt;
493 	struct proc *proc;
494 	struct pgrp *pgrp;
495 	int i;
496 	int s;
497 	pid_t pid;
498 
499 	new_pt = malloc(pt_size * 2 * sizeof *new_pt, M_PROC, M_WAITOK);
500 
501 	s = proclist_lock_write();
502 	if (pt_size != pid_tbl_mask + 1) {
503 		/* Another process beat us to it... */
504 		proclist_unlock_write(s);
505 		FREE(new_pt, M_PROC);
506 		return;
507 	}
508 
509 	/*
510 	 * Copy entries from old table into new one.
511 	 * If 'pid' is 'odd' we need to place in the upper half,
512 	 * even pid's to the lower half.
513 	 * Free items stay in the low half so we don't have to
514 	 * fixup the reference to them.
515 	 * We stuff free items on the front of the freelist
516 	 * because we can't write to unmodified entries.
517 	 * Processing the table backwards maintians a semblance
518 	 * of issueing pid numbers that increase with time.
519 	 */
520 	i = pt_size - 1;
521 	n_pt = new_pt + i;
522 	for (; ; i--, n_pt--) {
523 		proc = pid_table[i].pt_proc;
524 		pgrp = pid_table[i].pt_pgrp;
525 		if (!P_VALID(proc)) {
526 			/* Up 'use count' so that link is valid */
527 			pid = (P_NEXT(proc) + pt_size) & ~pt_size;
528 			proc = P_FREE(pid);
529 			if (pgrp)
530 				pid = pgrp->pg_id;
531 		} else
532 			pid = proc->p_pid;
533 
534 		/* Save entry in appropriate half of table */
535 		n_pt[pid & pt_size].pt_proc = proc;
536 		n_pt[pid & pt_size].pt_pgrp = pgrp;
537 
538 		/* Put other piece on start of free list */
539 		pid = (pid ^ pt_size) & ~pid_tbl_mask;
540 		n_pt[pid & pt_size].pt_proc =
541 				    P_FREE((pid & ~pt_size) | next_free_pt);
542 		n_pt[pid & pt_size].pt_pgrp = 0;
543 		next_free_pt = i | (pid & pt_size);
544 		if (i == 0)
545 			break;
546 	}
547 
548 	/* Switch tables */
549 	n_pt = pid_table;
550 	pid_table = new_pt;
551 	pid_tbl_mask = pt_size * 2 - 1;
552 
553 	/*
554 	 * pid_max starts as PID_MAX (= 30000), once we have 16384
555 	 * allocated pids we need it to be larger!
556 	 */
557 	if (pid_tbl_mask > PID_MAX) {
558 		pid_max = pid_tbl_mask * 2 + 1;
559 		pid_alloc_lim |= pid_alloc_lim << 1;
560 	} else
561 		pid_alloc_lim <<= 1;	/* doubles number of free slots... */
562 
563 	proclist_unlock_write(s);
564 	FREE(n_pt, M_PROC);
565 }
566 
567 struct proc *
568 proc_alloc(void)
569 {
570 	struct proc *p;
571 	int s;
572 	int nxt;
573 	pid_t pid;
574 	struct pid_table *pt;
575 
576 	p = pool_get(&proc_pool, PR_WAITOK);
577 	p->p_stat = SIDL;			/* protect against others */
578 
579 	/* allocate next free pid */
580 
581 	for (;;expand_pid_table()) {
582 		if (__predict_false(pid_alloc_cnt >= pid_alloc_lim))
583 			/* ensure pids cycle through 2000+ values */
584 			continue;
585 		s = proclist_lock_write();
586 		pt = &pid_table[next_free_pt];
587 #ifdef DIAGNOSTIC
588 		if (__predict_false(P_VALID(pt->pt_proc) || pt->pt_pgrp))
589 			panic("proc_alloc: slot busy");
590 #endif
591 		nxt = P_NEXT(pt->pt_proc);
592 		if (nxt & pid_tbl_mask)
593 			break;
594 		/* Table full - expand (NB last entry not used....) */
595 		proclist_unlock_write(s);
596 	}
597 
598 	/* pid is 'saved use count' + 'size' + entry */
599 	pid = (nxt & ~pid_tbl_mask) + pid_tbl_mask + 1 + next_free_pt;
600 	if ((uint)pid > (uint)pid_max)
601 		pid &= pid_tbl_mask;
602 	p->p_pid = pid;
603 	next_free_pt = nxt & pid_tbl_mask;
604 
605 	/* Grab table slot */
606 	pt->pt_proc = p;
607 	pid_alloc_cnt++;
608 
609 	proclist_unlock_write(s);
610 
611 	return p;
612 }
613 
614 /*
615  * Free last resources of a process - called from proc_free (in kern_exit.c)
616  */
617 void
618 proc_free_mem(struct proc *p)
619 {
620 	int s;
621 	pid_t pid = p->p_pid;
622 	struct pid_table *pt;
623 
624 	s = proclist_lock_write();
625 
626 	pt = &pid_table[pid & pid_tbl_mask];
627 #ifdef DIAGNOSTIC
628 	if (__predict_false(pt->pt_proc != p))
629 		panic("proc_free: pid_table mismatch, pid %x, proc %p",
630 			pid, p);
631 #endif
632 	/* save pid use count in slot */
633 	pt->pt_proc = P_FREE(pid & ~pid_tbl_mask);
634 
635 	if (pt->pt_pgrp == NULL) {
636 		/* link last freed entry onto ours */
637 		pid &= pid_tbl_mask;
638 		pt = &pid_table[last_free_pt];
639 		pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pid);
640 		last_free_pt = pid;
641 		pid_alloc_cnt--;
642 	}
643 
644 	nprocs--;
645 	proclist_unlock_write(s);
646 
647 	pool_put(&proc_pool, p);
648 }
649 
650 /*
651  * Move p to a new or existing process group (and session)
652  *
653  * If we are creating a new pgrp, the pgid should equal
654  * the calling processes pid.
655  * If is only valid to enter a process group that is in the session
656  * of the process.
657  * Also mksess should only be set if we are creating a process group
658  *
659  * Only called from sys_setsid, sys_setpgid/sys_setprp and the
660  * SYSV setpgrp support for hpux == enterpgrp(curproc, curproc->p_pid)
661  */
662 int
663 enterpgrp(struct proc *p, pid_t pgid, int mksess)
664 {
665 	struct pgrp *new_pgrp, *pgrp;
666 	struct session *sess;
667 	struct proc *curp = curproc;
668 	pid_t pid = p->p_pid;
669 	int rval;
670 	int s;
671 	pid_t pg_id = NO_PGID;
672 
673 	/* Allocate data areas we might need before doing any validity checks */
674 	proclist_lock_read();		/* Because pid_table might change */
675 	if (pid_table[pgid & pid_tbl_mask].pt_pgrp == 0) {
676 		proclist_unlock_read();
677 		new_pgrp = pool_get(&pgrp_pool, PR_WAITOK);
678 	} else {
679 		proclist_unlock_read();
680 		new_pgrp = NULL;
681 	}
682 	if (mksess)
683 		MALLOC(sess, struct session *, sizeof(struct session),
684 			    M_SESSION, M_WAITOK);
685 	else
686 		sess = NULL;
687 
688 	s = proclist_lock_write();
689 	rval = EPERM;	/* most common error (to save typing) */
690 
691 	/* Check pgrp exists or can be created */
692 	pgrp = pid_table[pgid & pid_tbl_mask].pt_pgrp;
693 	if (pgrp != NULL && pgrp->pg_id != pgid)
694 		goto done;
695 
696 	/* Can only set another process under restricted circumstances. */
697 	if (p != curp) {
698 		/* must exist and be one of our children... */
699 		if (p != pid_table[pid & pid_tbl_mask].pt_proc
700 		    || !inferior(p, curp)) {
701 			rval = ESRCH;
702 			goto done;
703 		}
704 		/* ... in the same session... */
705 		if (sess != NULL || p->p_session != curp->p_session)
706 			goto done;
707 		/* ... existing pgid must be in same session ... */
708 		if (pgrp != NULL && pgrp->pg_session != p->p_session)
709 			goto done;
710 		/* ... and not done an exec. */
711 		if (p->p_flag & P_EXEC) {
712 			rval = EACCES;
713 			goto done;
714 		}
715 	}
716 
717 	/* Changing the process group/session of a session
718 	   leader is definitely off limits. */
719 	if (SESS_LEADER(p)) {
720 		if (sess == NULL && p->p_pgrp == pgrp)
721 			/* unless it's a definite noop */
722 			rval = 0;
723 		goto done;
724 	}
725 
726 	/* Can only create a process group with id of process */
727 	if (pgrp == NULL && pgid != pid)
728 		goto done;
729 
730 	/* Can only create a session if creating pgrp */
731 	if (sess != NULL && pgrp != NULL)
732 		goto done;
733 
734 	/* Check we allocated memory for a pgrp... */
735 	if (pgrp == NULL && new_pgrp == NULL)
736 		goto done;
737 
738 	/* Don't attach to 'zombie' pgrp */
739 	if (pgrp != NULL && LIST_EMPTY(&pgrp->pg_members))
740 		goto done;
741 
742 	/* Expect to succeed now */
743 	rval = 0;
744 
745 	if (pgrp == p->p_pgrp)
746 		/* nothing to do */
747 		goto done;
748 
749 	/* Ok all setup, link up required structures */
750 	if (pgrp == NULL) {
751 		pgrp = new_pgrp;
752 		new_pgrp = 0;
753 		if (sess != NULL) {
754 			sess->s_sid = p->p_pid;
755 			sess->s_leader = p;
756 			sess->s_count = 1;
757 			sess->s_ttyvp = NULL;
758 			sess->s_ttyp = NULL;
759 			sess->s_flags = p->p_session->s_flags & ~S_LOGIN_SET;
760 			memcpy(sess->s_login, p->p_session->s_login,
761 			    sizeof(sess->s_login));
762 			p->p_flag &= ~P_CONTROLT;
763 		} else {
764 			sess = p->p_pgrp->pg_session;
765 			SESSHOLD(sess);
766 		}
767 		pgrp->pg_session = sess;
768 		sess = 0;
769 
770 		pgrp->pg_id = pgid;
771 		LIST_INIT(&pgrp->pg_members);
772 #ifdef DIAGNOSTIC
773 		if (__predict_false(pid_table[pgid & pid_tbl_mask].pt_pgrp))
774 			panic("enterpgrp: pgrp table slot in use");
775 		if (__predict_false(mksess && p != curp))
776 			panic("enterpgrp: mksession and p != curproc");
777 #endif
778 		pid_table[pgid & pid_tbl_mask].pt_pgrp = pgrp;
779 		pgrp->pg_jobc = 0;
780 	}
781 
782 	/*
783 	 * Adjust eligibility of affected pgrps to participate in job control.
784 	 * Increment eligibility counts before decrementing, otherwise we
785 	 * could reach 0 spuriously during the first call.
786 	 */
787 	fixjobc(p, pgrp, 1);
788 	fixjobc(p, p->p_pgrp, 0);
789 
790 	/* Move process to requested group */
791 	LIST_REMOVE(p, p_pglist);
792 	if (LIST_EMPTY(&p->p_pgrp->pg_members))
793 		/* defer delete until we've dumped the lock */
794 		pg_id = p->p_pgrp->pg_id;
795 	p->p_pgrp = pgrp;
796 	LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist);
797 
798     done:
799 	proclist_unlock_write(s);
800 	if (sess != NULL)
801 		free(sess, M_SESSION);
802 	if (new_pgrp != NULL)
803 		pool_put(&pgrp_pool, new_pgrp);
804 	if (pg_id != NO_PGID)
805 		pg_delete(pg_id);
806 #ifdef DEBUG_PGRP
807 	if (__predict_false(rval))
808 		printf("enterpgrp(%d,%d,%d), curproc %d, rval %d\n",
809 			pid, pgid, mksess, curp->p_pid, rval);
810 #endif
811 	return rval;
812 }
813 
814 /*
815  * remove process from process group
816  */
817 int
818 leavepgrp(struct proc *p)
819 {
820 	int s;
821 	struct pgrp *pgrp;
822 	pid_t pg_id;
823 
824 	s = proclist_lock_write();
825 	pgrp = p->p_pgrp;
826 	LIST_REMOVE(p, p_pglist);
827 	p->p_pgrp = 0;
828 	pg_id = LIST_EMPTY(&pgrp->pg_members) ? pgrp->pg_id : NO_PGID;
829 	proclist_unlock_write(s);
830 
831 	if (pg_id != NO_PGID)
832 		pg_delete(pg_id);
833 	return 0;
834 }
835 
836 static void
837 pg_free(pid_t pg_id)
838 {
839 	struct pgrp *pgrp;
840 	struct pid_table *pt;
841 	int s;
842 
843 	s = proclist_lock_write();
844 	pt = &pid_table[pg_id & pid_tbl_mask];
845 	pgrp = pt->pt_pgrp;
846 #ifdef DIAGNOSTIC
847 	if (__predict_false(!pgrp || pgrp->pg_id != pg_id
848 	    || !LIST_EMPTY(&pgrp->pg_members)))
849 		panic("pg_free: process group absent or has members");
850 #endif
851 	pt->pt_pgrp = 0;
852 
853 	if (!P_VALID(pt->pt_proc)) {
854 		/* orphaned pgrp, put slot onto free list */
855 #ifdef DIAGNOSTIC
856 		if (__predict_false(P_NEXT(pt->pt_proc) & pid_tbl_mask))
857 			panic("pg_free: process slot on free list");
858 #endif
859 
860 		pg_id &= pid_tbl_mask;
861 		pt = &pid_table[last_free_pt];
862 		pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pg_id);
863 		last_free_pt = pg_id;
864 		pid_alloc_cnt--;
865 	}
866 	proclist_unlock_write(s);
867 
868 	pool_put(&pgrp_pool, pgrp);
869 }
870 
871 /*
872  * delete a process group
873  */
874 static void
875 pg_delete(pid_t pg_id)
876 {
877 	struct pgrp *pgrp;
878 	struct tty *ttyp;
879 	struct session *ss;
880 	int s, is_pgrp_leader;
881 
882 	s = proclist_lock_write();
883 	pgrp = pid_table[pg_id & pid_tbl_mask].pt_pgrp;
884 	if (pgrp == NULL || pgrp->pg_id != pg_id ||
885 	    !LIST_EMPTY(&pgrp->pg_members)) {
886 		proclist_unlock_write(s);
887 		return;
888 	}
889 
890 	ss = pgrp->pg_session;
891 
892 	/* Remove reference (if any) from tty to this process group */
893 	ttyp = ss->s_ttyp;
894 	if (ttyp != NULL && ttyp->t_pgrp == pgrp) {
895 		ttyp->t_pgrp = NULL;
896 #ifdef DIAGNOSTIC
897 		if (ttyp->t_session != ss)
898 			panic("pg_delete: wrong session on terminal");
899 #endif
900 	}
901 
902 	/*
903 	 * The leading process group in a session is freed
904 	 * by sessdelete() if last reference.
905 	 */
906 	is_pgrp_leader = (ss->s_sid == pgrp->pg_id);
907 	proclist_unlock_write(s);
908 	SESSRELE(ss);
909 
910 	if (is_pgrp_leader)
911 		return;
912 
913 	pg_free(pg_id);
914 }
915 
916 /*
917  * Delete session - called from SESSRELE when s_count becomes zero.
918  */
919 void
920 sessdelete(struct session *ss)
921 {
922 	/*
923 	 * We keep the pgrp with the same id as the session in
924 	 * order to stop a process being given the same pid.
925 	 * Since the pgrp holds a reference to the session, it
926 	 * must be a 'zombie' pgrp by now.
927 	 */
928 
929 	pg_free(ss->s_sid);
930 
931 	FREE(ss, M_SESSION);
932 }
933 
934 /*
935  * Adjust pgrp jobc counters when specified process changes process group.
936  * We count the number of processes in each process group that "qualify"
937  * the group for terminal job control (those with a parent in a different
938  * process group of the same session).  If that count reaches zero, the
939  * process group becomes orphaned.  Check both the specified process'
940  * process group and that of its children.
941  * entering == 0 => p is leaving specified group.
942  * entering == 1 => p is entering specified group.
943  *
944  * Call with proclist_lock held.
945  */
946 void
947 fixjobc(struct proc *p, struct pgrp *pgrp, int entering)
948 {
949 	struct pgrp *hispgrp;
950 	struct session *mysession = pgrp->pg_session;
951 	struct proc *child;
952 
953 	/*
954 	 * Check p's parent to see whether p qualifies its own process
955 	 * group; if so, adjust count for p's process group.
956 	 */
957 	hispgrp = p->p_pptr->p_pgrp;
958 	if (hispgrp != pgrp && hispgrp->pg_session == mysession) {
959 		if (entering)
960 			pgrp->pg_jobc++;
961 		else if (--pgrp->pg_jobc == 0)
962 			orphanpg(pgrp);
963 	}
964 
965 	/*
966 	 * Check this process' children to see whether they qualify
967 	 * their process groups; if so, adjust counts for children's
968 	 * process groups.
969 	 */
970 	LIST_FOREACH(child, &p->p_children, p_sibling) {
971 		hispgrp = child->p_pgrp;
972 		if (hispgrp != pgrp && hispgrp->pg_session == mysession &&
973 		    !P_ZOMBIE(child)) {
974 			if (entering)
975 				hispgrp->pg_jobc++;
976 			else if (--hispgrp->pg_jobc == 0)
977 				orphanpg(hispgrp);
978 		}
979 	}
980 }
981 
982 /*
983  * A process group has become orphaned;
984  * if there are any stopped processes in the group,
985  * hang-up all process in that group.
986  *
987  * Call with proclist_lock held.
988  */
989 static void
990 orphanpg(struct pgrp *pg)
991 {
992 	struct proc *p;
993 
994 	LIST_FOREACH(p, &pg->pg_members, p_pglist) {
995 		if (p->p_stat == SSTOP) {
996 			LIST_FOREACH(p, &pg->pg_members, p_pglist) {
997 				psignal(p, SIGHUP);
998 				psignal(p, SIGCONT);
999 			}
1000 			return;
1001 		}
1002 	}
1003 }
1004 
1005 /* mark process as suid/sgid, reset some values to defaults */
1006 void
1007 p_sugid(struct proc *p)
1008 {
1009 	struct plimit *newlim;
1010 
1011 	p->p_flag |= P_SUGID;
1012 	/* reset what needs to be reset in plimit */
1013 	if (p->p_limit->pl_corename != defcorename) {
1014 		if (p->p_limit->p_refcnt > 1 &&
1015 		    (p->p_limit->p_lflags & PL_SHAREMOD) == 0) {
1016 			newlim = limcopy(p->p_limit);
1017 			limfree(p->p_limit);
1018 			p->p_limit = newlim;
1019 		}
1020 		free(p->p_limit->pl_corename, M_TEMP);
1021 		p->p_limit->pl_corename = defcorename;
1022 	}
1023 }
1024 
1025 #ifdef DDB
1026 #include <ddb/db_output.h>
1027 void pidtbl_dump(void);
1028 void
1029 pidtbl_dump(void)
1030 {
1031 	struct pid_table *pt;
1032 	struct proc *p;
1033 	struct pgrp *pgrp;
1034 	int id;
1035 
1036 	db_printf("pid table %p size %x, next %x, last %x\n",
1037 		pid_table, pid_tbl_mask+1,
1038 		next_free_pt, last_free_pt);
1039 	for (pt = pid_table, id = 0; id <= pid_tbl_mask; id++, pt++) {
1040 		p = pt->pt_proc;
1041 		if (!P_VALID(p) && !pt->pt_pgrp)
1042 			continue;
1043 		db_printf("  id %x: ", id);
1044 		if (P_VALID(p))
1045 			db_printf("proc %p id %d (0x%x) %s\n",
1046 				p, p->p_pid, p->p_pid, p->p_comm);
1047 		else
1048 			db_printf("next %x use %x\n",
1049 				P_NEXT(p) & pid_tbl_mask,
1050 				P_NEXT(p) & ~pid_tbl_mask);
1051 		if ((pgrp = pt->pt_pgrp)) {
1052 			db_printf("\tsession %p, sid %d, count %d, login %s\n",
1053 			    pgrp->pg_session, pgrp->pg_session->s_sid,
1054 			    pgrp->pg_session->s_count,
1055 			    pgrp->pg_session->s_login);
1056 			db_printf("\tpgrp %p, pg_id %d, pg_jobc %d, members %p\n",
1057 			    pgrp, pgrp->pg_id, pgrp->pg_jobc,
1058 			    pgrp->pg_members.lh_first);
1059 			for (p = pgrp->pg_members.lh_first; p != 0;
1060 			    p = p->p_pglist.le_next) {
1061 				db_printf("\t\tpid %d addr %p pgrp %p %s\n",
1062 				    p->p_pid, p, p->p_pgrp, p->p_comm);
1063 			}
1064 		}
1065 	}
1066 }
1067 #endif /* DDB */
1068 
1069 #ifdef KSTACK_CHECK_MAGIC
1070 #include <sys/user.h>
1071 
1072 #define	KSTACK_MAGIC	0xdeadbeaf
1073 
1074 /* XXX should be per process basis? */
1075 int kstackleftmin = KSTACK_SIZE;
1076 int kstackleftthres = KSTACK_SIZE / 8; /* warn if remaining stack is
1077 					  less than this */
1078 
1079 void
1080 kstack_setup_magic(const struct lwp *l)
1081 {
1082 	u_int32_t *ip;
1083 	u_int32_t const *end;
1084 
1085 	KASSERT(l != NULL);
1086 	KASSERT(l != &lwp0);
1087 
1088 	/*
1089 	 * fill all the stack with magic number
1090 	 * so that later modification on it can be detected.
1091 	 */
1092 	ip = (u_int32_t *)KSTACK_LOWEST_ADDR(l);
1093 	end = (u_int32_t *)((caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE);
1094 	for (; ip < end; ip++) {
1095 		*ip = KSTACK_MAGIC;
1096 	}
1097 }
1098 
1099 void
1100 kstack_check_magic(const struct lwp *l)
1101 {
1102 	u_int32_t const *ip, *end;
1103 	int stackleft;
1104 
1105 	KASSERT(l != NULL);
1106 
1107 	/* don't check proc0 */ /*XXX*/
1108 	if (l == &lwp0)
1109 		return;
1110 
1111 #ifdef __MACHINE_STACK_GROWS_UP
1112 	/* stack grows upwards (eg. hppa) */
1113 	ip = (u_int32_t *)((caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE);
1114 	end = (u_int32_t *)KSTACK_LOWEST_ADDR(l);
1115 	for (ip--; ip >= end; ip--)
1116 		if (*ip != KSTACK_MAGIC)
1117 			break;
1118 
1119 	stackleft = (caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE - (caddr_t)ip;
1120 #else /* __MACHINE_STACK_GROWS_UP */
1121 	/* stack grows downwards (eg. i386) */
1122 	ip = (u_int32_t *)KSTACK_LOWEST_ADDR(l);
1123 	end = (u_int32_t *)((caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE);
1124 	for (; ip < end; ip++)
1125 		if (*ip != KSTACK_MAGIC)
1126 			break;
1127 
1128 	stackleft = (caddr_t)ip - KSTACK_LOWEST_ADDR(l);
1129 #endif /* __MACHINE_STACK_GROWS_UP */
1130 
1131 	if (kstackleftmin > stackleft) {
1132 		kstackleftmin = stackleft;
1133 		if (stackleft < kstackleftthres)
1134 			printf("warning: kernel stack left %d bytes"
1135 			    "(pid %u:lid %u)\n", stackleft,
1136 			    (u_int)l->l_proc->p_pid, (u_int)l->l_lid);
1137 	}
1138 
1139 	if (stackleft <= 0) {
1140 		panic("magic on the top of kernel stack changed for "
1141 		    "pid %u, lid %u: maybe kernel stack overflow",
1142 		    (u_int)l->l_proc->p_pid, (u_int)l->l_lid);
1143 	}
1144 }
1145 #endif /* KSTACK_CHECK_MAGIC */
1146