xref: /netbsd-src/sys/kern/kern_proc.c (revision 33cd1faa348fe1cb7947ea59b0556b9bad76cee9)
1 /*	$NetBSD: kern_proc.c,v 1.75 2004/03/14 01:08:47 cl Exp $	*/
2 
3 /*-
4  * Copyright (c) 1999 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9  * NASA Ames Research Center.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the NetBSD
22  *	Foundation, Inc. and its contributors.
23  * 4. Neither the name of The NetBSD Foundation nor the names of its
24  *    contributors may be used to endorse or promote products derived
25  *    from this software without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37  * POSSIBILITY OF SUCH DAMAGE.
38  */
39 
40 /*
41  * Copyright (c) 1982, 1986, 1989, 1991, 1993
42  *	The Regents of the University of California.  All rights reserved.
43  *
44  * Redistribution and use in source and binary forms, with or without
45  * modification, are permitted provided that the following conditions
46  * are met:
47  * 1. Redistributions of source code must retain the above copyright
48  *    notice, this list of conditions and the following disclaimer.
49  * 2. Redistributions in binary form must reproduce the above copyright
50  *    notice, this list of conditions and the following disclaimer in the
51  *    documentation and/or other materials provided with the distribution.
52  * 3. Neither the name of the University nor the names of its contributors
53  *    may be used to endorse or promote products derived from this software
54  *    without specific prior written permission.
55  *
56  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
57  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
58  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
59  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
60  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
61  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
62  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
64  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
65  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
66  * SUCH DAMAGE.
67  *
68  *	@(#)kern_proc.c	8.7 (Berkeley) 2/14/95
69  */
70 
71 #include <sys/cdefs.h>
72 __KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.75 2004/03/14 01:08:47 cl Exp $");
73 
74 #include "opt_kstack.h"
75 
76 #include <sys/param.h>
77 #include <sys/systm.h>
78 #include <sys/kernel.h>
79 #include <sys/proc.h>
80 #include <sys/resourcevar.h>
81 #include <sys/buf.h>
82 #include <sys/acct.h>
83 #include <sys/wait.h>
84 #include <sys/file.h>
85 #include <ufs/ufs/quota.h>
86 #include <sys/uio.h>
87 #include <sys/malloc.h>
88 #include <sys/pool.h>
89 #include <sys/mbuf.h>
90 #include <sys/ioctl.h>
91 #include <sys/tty.h>
92 #include <sys/signalvar.h>
93 #include <sys/ras.h>
94 #include <sys/sa.h>
95 #include <sys/savar.h>
96 
97 /*
98  * Structure associated with user caching.
99  */
100 struct uidinfo {
101 	LIST_ENTRY(uidinfo) ui_hash;
102 	uid_t	ui_uid;
103 	long	ui_proccnt;
104 };
105 #define	UIHASH(uid)	(&uihashtbl[(uid) & uihash])
106 LIST_HEAD(uihashhead, uidinfo) *uihashtbl;
107 u_long uihash;		/* size of hash table - 1 */
108 
109 /*
110  * Other process lists
111  */
112 
113 struct proclist allproc;
114 struct proclist zombproc;	/* resources have been freed */
115 
116 
117 /*
118  * Process list locking:
119  *
120  * We have two types of locks on the proclists: read locks and write
121  * locks.  Read locks can be used in interrupt context, so while we
122  * hold the write lock, we must also block clock interrupts to
123  * lock out any scheduling changes that may happen in interrupt
124  * context.
125  *
126  * The proclist lock locks the following structures:
127  *
128  *	allproc
129  *	zombproc
130  *	pid_table
131  */
132 struct lock proclist_lock;
133 
134 /*
135  * pid to proc lookup is done by indexing the pid_table array.
136  * Since pid numbers are only allocated when an empty slot
137  * has been found, there is no need to search any lists ever.
138  * (an orphaned pgrp will lock the slot, a session will lock
139  * the pgrp with the same number.)
140  * If the table is too small it is reallocated with twice the
141  * previous size and the entries 'unzipped' into the two halves.
142  * A linked list of free entries is passed through the pt_proc
143  * field of 'free' items - set odd to be an invalid ptr.
144  */
145 
146 struct pid_table {
147 	struct proc	*pt_proc;
148 	struct pgrp	*pt_pgrp;
149 };
150 #if 1	/* strongly typed cast - should be a noop */
151 static __inline uint p2u(struct proc *p) { return (uint)(uintptr_t)p; }
152 #else
153 #define p2u(p) ((uint)p)
154 #endif
155 #define P_VALID(p) (!(p2u(p) & 1))
156 #define P_NEXT(p) (p2u(p) >> 1)
157 #define P_FREE(pid) ((struct proc *)(uintptr_t)((pid) << 1 | 1))
158 
159 #define INITIAL_PID_TABLE_SIZE	(1 << 5)
160 static struct pid_table *pid_table;
161 static uint pid_tbl_mask = INITIAL_PID_TABLE_SIZE - 1;
162 static uint pid_alloc_lim;	/* max we allocate before growing table */
163 static uint pid_alloc_cnt;	/* number of allocated pids */
164 
165 /* links through free slots - never empty! */
166 static uint next_free_pt, last_free_pt;
167 static pid_t pid_max = PID_MAX;		/* largest value we allocate */
168 
169 struct pool proc_pool;
170 struct pool lwp_pool;
171 struct pool lwp_uc_pool;
172 struct pool pcred_pool;
173 struct pool plimit_pool;
174 struct pool pstats_pool;
175 struct pool pgrp_pool;
176 struct pool rusage_pool;
177 struct pool ras_pool;
178 struct pool sadata_pool;
179 struct pool saupcall_pool;
180 struct pool sastack_pool;
181 struct pool savp_pool;
182 struct pool ptimer_pool;
183 
184 MALLOC_DEFINE(M_EMULDATA, "emuldata", "Per-process emulation data");
185 MALLOC_DEFINE(M_PROC, "proc", "Proc structures");
186 MALLOC_DEFINE(M_SESSION, "session", "session header");
187 MALLOC_DEFINE(M_SUBPROC, "subproc", "Proc sub-structures");
188 
189 /*
190  * The process list descriptors, used during pid allocation and
191  * by sysctl.  No locking on this data structure is needed since
192  * it is completely static.
193  */
194 const struct proclist_desc proclists[] = {
195 	{ &allproc	},
196 	{ &zombproc	},
197 	{ NULL		},
198 };
199 
200 static void orphanpg(struct pgrp *);
201 static void pg_delete(pid_t);
202 
203 /*
204  * Initialize global process hashing structures.
205  */
206 void
207 procinit(void)
208 {
209 	const struct proclist_desc *pd;
210 	int i;
211 #define	LINK_EMPTY ((PID_MAX + INITIAL_PID_TABLE_SIZE) & ~(INITIAL_PID_TABLE_SIZE - 1))
212 
213 	for (pd = proclists; pd->pd_list != NULL; pd++)
214 		LIST_INIT(pd->pd_list);
215 
216 	spinlockinit(&proclist_lock, "proclk", 0);
217 
218 	pid_table = malloc(INITIAL_PID_TABLE_SIZE * sizeof *pid_table,
219 			    M_PROC, M_WAITOK);
220 	/* Set free list running through table...
221 	   Preset 'use count' above PID_MAX so we allocate pid 1 next. */
222 	for (i = 0; i <= pid_tbl_mask; i++) {
223 		pid_table[i].pt_proc = P_FREE(LINK_EMPTY + i + 1);
224 		pid_table[i].pt_pgrp = 0;
225 	}
226 	/* slot 0 is just grabbed */
227 	next_free_pt = 1;
228 	/* Need to fix last entry. */
229 	last_free_pt = pid_tbl_mask;
230 	pid_table[last_free_pt].pt_proc = P_FREE(LINK_EMPTY);
231 	/* point at which we grow table - to avoid reusing pids too often */
232 	pid_alloc_lim = pid_tbl_mask - 1;
233 #undef LINK_EMPTY
234 
235 	LIST_INIT(&alllwp);
236 
237 	uihashtbl =
238 	    hashinit(maxproc / 16, HASH_LIST, M_PROC, M_WAITOK, &uihash);
239 
240 	pool_init(&proc_pool, sizeof(struct proc), 0, 0, 0, "procpl",
241 	    &pool_allocator_nointr);
242 	pool_init(&lwp_pool, sizeof(struct lwp), 0, 0, 0, "lwppl",
243 	    &pool_allocator_nointr);
244 	pool_init(&lwp_uc_pool, sizeof(ucontext_t), 0, 0, 0, "lwpucpl",
245 	    &pool_allocator_nointr);
246 	pool_init(&pgrp_pool, sizeof(struct pgrp), 0, 0, 0, "pgrppl",
247 	    &pool_allocator_nointr);
248 	pool_init(&pcred_pool, sizeof(struct pcred), 0, 0, 0, "pcredpl",
249 	    &pool_allocator_nointr);
250 	pool_init(&plimit_pool, sizeof(struct plimit), 0, 0, 0, "plimitpl",
251 	    &pool_allocator_nointr);
252 	pool_init(&pstats_pool, sizeof(struct pstats), 0, 0, 0, "pstatspl",
253 	    &pool_allocator_nointr);
254 	pool_init(&rusage_pool, sizeof(struct rusage), 0, 0, 0, "rusgepl",
255 	    &pool_allocator_nointr);
256 	pool_init(&ras_pool, sizeof(struct ras), 0, 0, 0, "raspl",
257 	    &pool_allocator_nointr);
258 	pool_init(&sadata_pool, sizeof(struct sadata), 0, 0, 0, "sadatapl",
259 	    &pool_allocator_nointr);
260 	pool_init(&saupcall_pool, sizeof(struct sadata_upcall), 0, 0, 0,
261 	    "saupcpl", &pool_allocator_nointr);
262 	pool_init(&sastack_pool, sizeof(struct sastack), 0, 0, 0, "sastackpl",
263 	    &pool_allocator_nointr);
264 	pool_init(&savp_pool, sizeof(struct sadata_vp), 0, 0, 0, "savppl",
265 	    &pool_allocator_nointr);
266 	pool_init(&ptimer_pool, sizeof(struct ptimer), 0, 0, 0, "ptimerpl",
267 	    &pool_allocator_nointr);
268 }
269 
270 /*
271  * Acquire a read lock on the proclist.
272  */
273 void
274 proclist_lock_read(void)
275 {
276 	int error;
277 
278 	error = spinlockmgr(&proclist_lock, LK_SHARED, NULL);
279 #ifdef DIAGNOSTIC
280 	if (__predict_false(error != 0))
281 		panic("proclist_lock_read: failed to acquire lock");
282 #endif
283 }
284 
285 /*
286  * Release a read lock on the proclist.
287  */
288 void
289 proclist_unlock_read(void)
290 {
291 
292 	(void) spinlockmgr(&proclist_lock, LK_RELEASE, NULL);
293 }
294 
295 /*
296  * Acquire a write lock on the proclist.
297  */
298 int
299 proclist_lock_write(void)
300 {
301 	int s, error;
302 
303 	s = splclock();
304 	error = spinlockmgr(&proclist_lock, LK_EXCLUSIVE, NULL);
305 #ifdef DIAGNOSTIC
306 	if (__predict_false(error != 0))
307 		panic("proclist_lock: failed to acquire lock");
308 #endif
309 	return (s);
310 }
311 
312 /*
313  * Release a write lock on the proclist.
314  */
315 void
316 proclist_unlock_write(int s)
317 {
318 
319 	(void) spinlockmgr(&proclist_lock, LK_RELEASE, NULL);
320 	splx(s);
321 }
322 
323 /*
324  * Change the count associated with number of processes
325  * a given user is using.
326  */
327 int
328 chgproccnt(uid_t uid, int diff)
329 {
330 	struct uidinfo *uip;
331 	struct uihashhead *uipp;
332 
333 	uipp = UIHASH(uid);
334 
335 	LIST_FOREACH(uip, uipp, ui_hash)
336 		if (uip->ui_uid == uid)
337 			break;
338 
339 	if (uip) {
340 		uip->ui_proccnt += diff;
341 		if (uip->ui_proccnt > 0)
342 			return (uip->ui_proccnt);
343 		if (uip->ui_proccnt < 0)
344 			panic("chgproccnt: procs < 0");
345 		LIST_REMOVE(uip, ui_hash);
346 		FREE(uip, M_PROC);
347 		return (0);
348 	}
349 	if (diff <= 0) {
350 		if (diff == 0)
351 			return(0);
352 		panic("chgproccnt: lost user");
353 	}
354 	MALLOC(uip, struct uidinfo *, sizeof(*uip), M_PROC, M_WAITOK);
355 	LIST_INSERT_HEAD(uipp, uip, ui_hash);
356 	uip->ui_uid = uid;
357 	uip->ui_proccnt = diff;
358 	return (diff);
359 }
360 
361 /*
362  * Check that the specified process group is in the session of the
363  * specified process.
364  * Treats -ve ids as process ids.
365  * Used to validate TIOCSPGRP requests.
366  */
367 int
368 pgid_in_session(struct proc *p, pid_t pg_id)
369 {
370 	struct pgrp *pgrp;
371 
372 	if (pg_id < 0) {
373 		struct proc *p1 = pfind(-pg_id);
374 		if (p1 == NULL)
375 			return EINVAL;
376 		pgrp = p1->p_pgrp;
377 	} else {
378 		pgrp = pgfind(pg_id);
379 		if (pgrp == NULL)
380 			return EINVAL;
381 	}
382 	if (pgrp->pg_session != p->p_pgrp->pg_session)
383 		return EPERM;
384 	return 0;
385 }
386 
387 /*
388  * Is p an inferior of q?
389  */
390 int
391 inferior(struct proc *p, struct proc *q)
392 {
393 
394 	for (; p != q; p = p->p_pptr)
395 		if (p->p_pid == 0)
396 			return (0);
397 	return (1);
398 }
399 
400 /*
401  * Locate a process by number
402  */
403 struct proc *
404 p_find(pid_t pid, uint flags)
405 {
406 	struct proc *p;
407 	char stat;
408 
409 	if (!(flags & PFIND_LOCKED))
410 		proclist_lock_read();
411 	p = pid_table[pid & pid_tbl_mask].pt_proc;
412 	/* Only allow live processes to be found by pid. */
413 	if (P_VALID(p) && p->p_pid == pid &&
414 	    ((stat = p->p_stat) == SACTIVE || stat == SSTOP
415 		    || (stat == SZOMB && (flags & PFIND_ZOMBIE)))) {
416 		if (flags & PFIND_UNLOCK_OK)
417 			 proclist_unlock_read();
418 		return p;
419 	}
420 	if (flags & PFIND_UNLOCK_FAIL)
421 		 proclist_unlock_read();
422 	return NULL;
423 }
424 
425 
426 /*
427  * Locate a process group by number
428  */
429 struct pgrp *
430 pg_find(pid_t pgid, uint flags)
431 {
432 	struct pgrp *pg;
433 
434 	if (!(flags & PFIND_LOCKED))
435 		proclist_lock_read();
436 	pg = pid_table[pgid & pid_tbl_mask].pt_pgrp;
437 	/*
438 	 * Can't look up a pgrp that only exists because the session
439 	 * hasn't died yet (traditional)
440 	 */
441 	if (pg == NULL || pg->pg_id != pgid || LIST_EMPTY(&pg->pg_members)) {
442 		if (flags & PFIND_UNLOCK_FAIL)
443 			 proclist_unlock_read();
444 		return NULL;
445 	}
446 
447 	if (flags & PFIND_UNLOCK_OK)
448 		proclist_unlock_read();
449 	return pg;
450 }
451 
452 /*
453  * Set entry for process 0
454  */
455 void
456 proc0_insert(struct proc *p, struct lwp *l, struct pgrp *pgrp,
457 	struct session *sess)
458 {
459 	int s;
460 
461 	simple_lock_init(&p->p_lock);
462 	LIST_INIT(&p->p_lwps);
463 	LIST_INSERT_HEAD(&p->p_lwps, l, l_sibling);
464 	p->p_nlwps = 1;
465 	simple_lock_init(&p->p_sigctx.ps_silock);
466 	CIRCLEQ_INIT(&p->p_sigctx.ps_siginfo);
467 
468 	s = proclist_lock_write();
469 
470 	pid_table[0].pt_proc = p;
471 	LIST_INSERT_HEAD(&allproc, p, p_list);
472 	LIST_INSERT_HEAD(&alllwp, l, l_list);
473 
474 	p->p_pgrp = pgrp;
475 	pid_table[0].pt_pgrp = pgrp;
476 	LIST_INIT(&pgrp->pg_members);
477 	LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist);
478 
479 	pgrp->pg_session = sess;
480 	sess->s_count = 1;
481 	sess->s_sid = 0;
482 	sess->s_leader = p;
483 
484 	proclist_unlock_write(s);
485 }
486 
487 static void
488 expand_pid_table(void)
489 {
490 	uint pt_size = pid_tbl_mask + 1;
491 	struct pid_table *n_pt, *new_pt;
492 	struct proc *proc;
493 	struct pgrp *pgrp;
494 	int i;
495 	int s;
496 	pid_t pid;
497 
498 	new_pt = malloc(pt_size * 2 * sizeof *new_pt, M_PROC, M_WAITOK);
499 
500 	s = proclist_lock_write();
501 	if (pt_size != pid_tbl_mask + 1) {
502 		/* Another process beat us to it... */
503 		proclist_unlock_write(s);
504 		FREE(new_pt, M_PROC);
505 		return;
506 	}
507 
508 	/*
509 	 * Copy entries from old table into new one.
510 	 * If 'pid' is 'odd' we need to place in the upper half,
511 	 * even pid's to the lower half.
512 	 * Free items stay in the low half so we don't have to
513 	 * fixup the reference to them.
514 	 * We stuff free items on the front of the freelist
515 	 * because we can't write to unmodified entries.
516 	 * Processing the table backwards maintains a semblance
517 	 * of issueing pid numbers that increase with time.
518 	 */
519 	i = pt_size - 1;
520 	n_pt = new_pt + i;
521 	for (; ; i--, n_pt--) {
522 		proc = pid_table[i].pt_proc;
523 		pgrp = pid_table[i].pt_pgrp;
524 		if (!P_VALID(proc)) {
525 			/* Up 'use count' so that link is valid */
526 			pid = (P_NEXT(proc) + pt_size) & ~pt_size;
527 			proc = P_FREE(pid);
528 			if (pgrp)
529 				pid = pgrp->pg_id;
530 		} else
531 			pid = proc->p_pid;
532 
533 		/* Save entry in appropriate half of table */
534 		n_pt[pid & pt_size].pt_proc = proc;
535 		n_pt[pid & pt_size].pt_pgrp = pgrp;
536 
537 		/* Put other piece on start of free list */
538 		pid = (pid ^ pt_size) & ~pid_tbl_mask;
539 		n_pt[pid & pt_size].pt_proc =
540 				    P_FREE((pid & ~pt_size) | next_free_pt);
541 		n_pt[pid & pt_size].pt_pgrp = 0;
542 		next_free_pt = i | (pid & pt_size);
543 		if (i == 0)
544 			break;
545 	}
546 
547 	/* Switch tables */
548 	n_pt = pid_table;
549 	pid_table = new_pt;
550 	pid_tbl_mask = pt_size * 2 - 1;
551 
552 	/*
553 	 * pid_max starts as PID_MAX (= 30000), once we have 16384
554 	 * allocated pids we need it to be larger!
555 	 */
556 	if (pid_tbl_mask > PID_MAX) {
557 		pid_max = pid_tbl_mask * 2 + 1;
558 		pid_alloc_lim |= pid_alloc_lim << 1;
559 	} else
560 		pid_alloc_lim <<= 1;	/* doubles number of free slots... */
561 
562 	proclist_unlock_write(s);
563 	FREE(n_pt, M_PROC);
564 }
565 
566 struct proc *
567 proc_alloc(void)
568 {
569 	struct proc *p;
570 	int s;
571 	int nxt;
572 	pid_t pid;
573 	struct pid_table *pt;
574 
575 	p = pool_get(&proc_pool, PR_WAITOK);
576 	p->p_stat = SIDL;			/* protect against others */
577 
578 	/* allocate next free pid */
579 
580 	for (;;expand_pid_table()) {
581 		if (__predict_false(pid_alloc_cnt >= pid_alloc_lim))
582 			/* ensure pids cycle through 2000+ values */
583 			continue;
584 		s = proclist_lock_write();
585 		pt = &pid_table[next_free_pt];
586 #ifdef DIAGNOSTIC
587 		if (__predict_false(P_VALID(pt->pt_proc) || pt->pt_pgrp))
588 			panic("proc_alloc: slot busy");
589 #endif
590 		nxt = P_NEXT(pt->pt_proc);
591 		if (nxt & pid_tbl_mask)
592 			break;
593 		/* Table full - expand (NB last entry not used....) */
594 		proclist_unlock_write(s);
595 	}
596 
597 	/* pid is 'saved use count' + 'size' + entry */
598 	pid = (nxt & ~pid_tbl_mask) + pid_tbl_mask + 1 + next_free_pt;
599 	if ((uint)pid > (uint)pid_max)
600 		pid &= pid_tbl_mask;
601 	p->p_pid = pid;
602 	next_free_pt = nxt & pid_tbl_mask;
603 
604 	/* Grab table slot */
605 	pt->pt_proc = p;
606 	pid_alloc_cnt++;
607 
608 	proclist_unlock_write(s);
609 
610 	return p;
611 }
612 
613 /*
614  * Free last resources of a process - called from proc_free (in kern_exit.c)
615  */
616 void
617 proc_free_mem(struct proc *p)
618 {
619 	int s;
620 	pid_t pid = p->p_pid;
621 	struct pid_table *pt;
622 
623 	s = proclist_lock_write();
624 
625 	pt = &pid_table[pid & pid_tbl_mask];
626 #ifdef DIAGNOSTIC
627 	if (__predict_false(pt->pt_proc != p))
628 		panic("proc_free: pid_table mismatch, pid %x, proc %p",
629 			pid, p);
630 #endif
631 	/* save pid use count in slot */
632 	pt->pt_proc = P_FREE(pid & ~pid_tbl_mask);
633 
634 	if (pt->pt_pgrp == NULL) {
635 		/* link last freed entry onto ours */
636 		pid &= pid_tbl_mask;
637 		pt = &pid_table[last_free_pt];
638 		pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pid);
639 		last_free_pt = pid;
640 		pid_alloc_cnt--;
641 	}
642 
643 	nprocs--;
644 	proclist_unlock_write(s);
645 
646 	pool_put(&proc_pool, p);
647 }
648 
649 /*
650  * Move p to a new or existing process group (and session)
651  *
652  * If we are creating a new pgrp, the pgid should equal
653  * the calling process' pid.
654  * If is only valid to enter a process group that is in the session
655  * of the process.
656  * Also mksess should only be set if we are creating a process group
657  *
658  * Only called from sys_setsid, sys_setpgid/sys_setpgrp and the
659  * SYSV setpgrp support for hpux == enterpgrp(curproc, curproc->p_pid)
660  */
661 int
662 enterpgrp(struct proc *p, pid_t pgid, int mksess)
663 {
664 	struct pgrp *new_pgrp, *pgrp;
665 	struct session *sess;
666 	struct proc *curp = curproc;
667 	pid_t pid = p->p_pid;
668 	int rval;
669 	int s;
670 	pid_t pg_id = NO_PGID;
671 
672 	/* Allocate data areas we might need before doing any validity checks */
673 	proclist_lock_read();		/* Because pid_table might change */
674 	if (pid_table[pgid & pid_tbl_mask].pt_pgrp == 0) {
675 		proclist_unlock_read();
676 		new_pgrp = pool_get(&pgrp_pool, PR_WAITOK);
677 	} else {
678 		proclist_unlock_read();
679 		new_pgrp = NULL;
680 	}
681 	if (mksess)
682 		MALLOC(sess, struct session *, sizeof(struct session),
683 			    M_SESSION, M_WAITOK);
684 	else
685 		sess = NULL;
686 
687 	s = proclist_lock_write();
688 	rval = EPERM;	/* most common error (to save typing) */
689 
690 	/* Check pgrp exists or can be created */
691 	pgrp = pid_table[pgid & pid_tbl_mask].pt_pgrp;
692 	if (pgrp != NULL && pgrp->pg_id != pgid)
693 		goto done;
694 
695 	/* Can only set another process under restricted circumstances. */
696 	if (p != curp) {
697 		/* must exist and be one of our children... */
698 		if (p != pid_table[pid & pid_tbl_mask].pt_proc
699 		    || !inferior(p, curp)) {
700 			rval = ESRCH;
701 			goto done;
702 		}
703 		/* ... in the same session... */
704 		if (sess != NULL || p->p_session != curp->p_session)
705 			goto done;
706 		/* ... existing pgid must be in same session ... */
707 		if (pgrp != NULL && pgrp->pg_session != p->p_session)
708 			goto done;
709 		/* ... and not done an exec. */
710 		if (p->p_flag & P_EXEC) {
711 			rval = EACCES;
712 			goto done;
713 		}
714 	}
715 
716 	/* Changing the process group/session of a session
717 	   leader is definitely off limits. */
718 	if (SESS_LEADER(p)) {
719 		if (sess == NULL && p->p_pgrp == pgrp)
720 			/* unless it's a definite noop */
721 			rval = 0;
722 		goto done;
723 	}
724 
725 	/* Can only create a process group with id of process */
726 	if (pgrp == NULL && pgid != pid)
727 		goto done;
728 
729 	/* Can only create a session if creating pgrp */
730 	if (sess != NULL && pgrp != NULL)
731 		goto done;
732 
733 	/* Check we allocated memory for a pgrp... */
734 	if (pgrp == NULL && new_pgrp == NULL)
735 		goto done;
736 
737 	/* Don't attach to 'zombie' pgrp */
738 	if (pgrp != NULL && LIST_EMPTY(&pgrp->pg_members))
739 		goto done;
740 
741 	/* Expect to succeed now */
742 	rval = 0;
743 
744 	if (pgrp == p->p_pgrp)
745 		/* nothing to do */
746 		goto done;
747 
748 	/* Ok all setup, link up required structures */
749 	if (pgrp == NULL) {
750 		pgrp = new_pgrp;
751 		new_pgrp = 0;
752 		if (sess != NULL) {
753 			sess->s_sid = p->p_pid;
754 			sess->s_leader = p;
755 			sess->s_count = 1;
756 			sess->s_ttyvp = NULL;
757 			sess->s_ttyp = NULL;
758 			sess->s_flags = p->p_session->s_flags & ~S_LOGIN_SET;
759 			memcpy(sess->s_login, p->p_session->s_login,
760 			    sizeof(sess->s_login));
761 			p->p_flag &= ~P_CONTROLT;
762 		} else {
763 			sess = p->p_pgrp->pg_session;
764 			SESSHOLD(sess);
765 		}
766 		pgrp->pg_session = sess;
767 		sess = 0;
768 
769 		pgrp->pg_id = pgid;
770 		LIST_INIT(&pgrp->pg_members);
771 #ifdef DIAGNOSTIC
772 		if (__predict_false(pid_table[pgid & pid_tbl_mask].pt_pgrp))
773 			panic("enterpgrp: pgrp table slot in use");
774 		if (__predict_false(mksess && p != curp))
775 			panic("enterpgrp: mksession and p != curproc");
776 #endif
777 		pid_table[pgid & pid_tbl_mask].pt_pgrp = pgrp;
778 		pgrp->pg_jobc = 0;
779 	}
780 
781 	/*
782 	 * Adjust eligibility of affected pgrps to participate in job control.
783 	 * Increment eligibility counts before decrementing, otherwise we
784 	 * could reach 0 spuriously during the first call.
785 	 */
786 	fixjobc(p, pgrp, 1);
787 	fixjobc(p, p->p_pgrp, 0);
788 
789 	/* Move process to requested group */
790 	LIST_REMOVE(p, p_pglist);
791 	if (LIST_EMPTY(&p->p_pgrp->pg_members))
792 		/* defer delete until we've dumped the lock */
793 		pg_id = p->p_pgrp->pg_id;
794 	p->p_pgrp = pgrp;
795 	LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist);
796 
797     done:
798 	proclist_unlock_write(s);
799 	if (sess != NULL)
800 		free(sess, M_SESSION);
801 	if (new_pgrp != NULL)
802 		pool_put(&pgrp_pool, new_pgrp);
803 	if (pg_id != NO_PGID)
804 		pg_delete(pg_id);
805 #ifdef DEBUG_PGRP
806 	if (__predict_false(rval))
807 		printf("enterpgrp(%d,%d,%d), curproc %d, rval %d\n",
808 			pid, pgid, mksess, curp->p_pid, rval);
809 #endif
810 	return rval;
811 }
812 
813 /*
814  * remove process from process group
815  */
816 int
817 leavepgrp(struct proc *p)
818 {
819 	int s;
820 	struct pgrp *pgrp;
821 	pid_t pg_id;
822 
823 	s = proclist_lock_write();
824 	pgrp = p->p_pgrp;
825 	LIST_REMOVE(p, p_pglist);
826 	p->p_pgrp = 0;
827 	pg_id = LIST_EMPTY(&pgrp->pg_members) ? pgrp->pg_id : NO_PGID;
828 	proclist_unlock_write(s);
829 
830 	if (pg_id != NO_PGID)
831 		pg_delete(pg_id);
832 	return 0;
833 }
834 
835 static void
836 pg_free(pid_t pg_id)
837 {
838 	struct pgrp *pgrp;
839 	struct pid_table *pt;
840 	int s;
841 
842 	s = proclist_lock_write();
843 	pt = &pid_table[pg_id & pid_tbl_mask];
844 	pgrp = pt->pt_pgrp;
845 #ifdef DIAGNOSTIC
846 	if (__predict_false(!pgrp || pgrp->pg_id != pg_id
847 	    || !LIST_EMPTY(&pgrp->pg_members)))
848 		panic("pg_free: process group absent or has members");
849 #endif
850 	pt->pt_pgrp = 0;
851 
852 	if (!P_VALID(pt->pt_proc)) {
853 		/* orphaned pgrp, put slot onto free list */
854 #ifdef DIAGNOSTIC
855 		if (__predict_false(P_NEXT(pt->pt_proc) & pid_tbl_mask))
856 			panic("pg_free: process slot on free list");
857 #endif
858 
859 		pg_id &= pid_tbl_mask;
860 		pt = &pid_table[last_free_pt];
861 		pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pg_id);
862 		last_free_pt = pg_id;
863 		pid_alloc_cnt--;
864 	}
865 	proclist_unlock_write(s);
866 
867 	pool_put(&pgrp_pool, pgrp);
868 }
869 
870 /*
871  * delete a process group
872  */
873 static void
874 pg_delete(pid_t pg_id)
875 {
876 	struct pgrp *pgrp;
877 	struct tty *ttyp;
878 	struct session *ss;
879 	int s, is_pgrp_leader;
880 
881 	s = proclist_lock_write();
882 	pgrp = pid_table[pg_id & pid_tbl_mask].pt_pgrp;
883 	if (pgrp == NULL || pgrp->pg_id != pg_id ||
884 	    !LIST_EMPTY(&pgrp->pg_members)) {
885 		proclist_unlock_write(s);
886 		return;
887 	}
888 
889 	ss = pgrp->pg_session;
890 
891 	/* Remove reference (if any) from tty to this process group */
892 	ttyp = ss->s_ttyp;
893 	if (ttyp != NULL && ttyp->t_pgrp == pgrp) {
894 		ttyp->t_pgrp = NULL;
895 #ifdef DIAGNOSTIC
896 		if (ttyp->t_session != ss)
897 			panic("pg_delete: wrong session on terminal");
898 #endif
899 	}
900 
901 	/*
902 	 * The leading process group in a session is freed
903 	 * by sessdelete() if last reference.
904 	 */
905 	is_pgrp_leader = (ss->s_sid == pgrp->pg_id);
906 	proclist_unlock_write(s);
907 	SESSRELE(ss);
908 
909 	if (is_pgrp_leader)
910 		return;
911 
912 	pg_free(pg_id);
913 }
914 
915 /*
916  * Delete session - called from SESSRELE when s_count becomes zero.
917  */
918 void
919 sessdelete(struct session *ss)
920 {
921 	/*
922 	 * We keep the pgrp with the same id as the session in
923 	 * order to stop a process being given the same pid.
924 	 * Since the pgrp holds a reference to the session, it
925 	 * must be a 'zombie' pgrp by now.
926 	 */
927 
928 	pg_free(ss->s_sid);
929 
930 	FREE(ss, M_SESSION);
931 }
932 
933 /*
934  * Adjust pgrp jobc counters when specified process changes process group.
935  * We count the number of processes in each process group that "qualify"
936  * the group for terminal job control (those with a parent in a different
937  * process group of the same session).  If that count reaches zero, the
938  * process group becomes orphaned.  Check both the specified process'
939  * process group and that of its children.
940  * entering == 0 => p is leaving specified group.
941  * entering == 1 => p is entering specified group.
942  *
943  * Call with proclist_lock held.
944  */
945 void
946 fixjobc(struct proc *p, struct pgrp *pgrp, int entering)
947 {
948 	struct pgrp *hispgrp;
949 	struct session *mysession = pgrp->pg_session;
950 	struct proc *child;
951 
952 	/*
953 	 * Check p's parent to see whether p qualifies its own process
954 	 * group; if so, adjust count for p's process group.
955 	 */
956 	hispgrp = p->p_pptr->p_pgrp;
957 	if (hispgrp != pgrp && hispgrp->pg_session == mysession) {
958 		if (entering)
959 			pgrp->pg_jobc++;
960 		else if (--pgrp->pg_jobc == 0)
961 			orphanpg(pgrp);
962 	}
963 
964 	/*
965 	 * Check this process' children to see whether they qualify
966 	 * their process groups; if so, adjust counts for children's
967 	 * process groups.
968 	 */
969 	LIST_FOREACH(child, &p->p_children, p_sibling) {
970 		hispgrp = child->p_pgrp;
971 		if (hispgrp != pgrp && hispgrp->pg_session == mysession &&
972 		    !P_ZOMBIE(child)) {
973 			if (entering)
974 				hispgrp->pg_jobc++;
975 			else if (--hispgrp->pg_jobc == 0)
976 				orphanpg(hispgrp);
977 		}
978 	}
979 }
980 
981 /*
982  * A process group has become orphaned;
983  * if there are any stopped processes in the group,
984  * hang-up all process in that group.
985  *
986  * Call with proclist_lock held.
987  */
988 static void
989 orphanpg(struct pgrp *pg)
990 {
991 	struct proc *p;
992 
993 	LIST_FOREACH(p, &pg->pg_members, p_pglist) {
994 		if (p->p_stat == SSTOP) {
995 			LIST_FOREACH(p, &pg->pg_members, p_pglist) {
996 				psignal(p, SIGHUP);
997 				psignal(p, SIGCONT);
998 			}
999 			return;
1000 		}
1001 	}
1002 }
1003 
1004 /* mark process as suid/sgid, reset some values to defaults */
1005 void
1006 p_sugid(struct proc *p)
1007 {
1008 	struct plimit *newlim;
1009 
1010 	p->p_flag |= P_SUGID;
1011 	/* reset what needs to be reset in plimit */
1012 	if (p->p_limit->pl_corename != defcorename) {
1013 		if (p->p_limit->p_refcnt > 1 &&
1014 		    (p->p_limit->p_lflags & PL_SHAREMOD) == 0) {
1015 			newlim = limcopy(p->p_limit);
1016 			limfree(p->p_limit);
1017 			p->p_limit = newlim;
1018 		}
1019 		free(p->p_limit->pl_corename, M_TEMP);
1020 		p->p_limit->pl_corename = defcorename;
1021 	}
1022 }
1023 
1024 #ifdef DDB
1025 #include <ddb/db_output.h>
1026 void pidtbl_dump(void);
1027 void
1028 pidtbl_dump(void)
1029 {
1030 	struct pid_table *pt;
1031 	struct proc *p;
1032 	struct pgrp *pgrp;
1033 	int id;
1034 
1035 	db_printf("pid table %p size %x, next %x, last %x\n",
1036 		pid_table, pid_tbl_mask+1,
1037 		next_free_pt, last_free_pt);
1038 	for (pt = pid_table, id = 0; id <= pid_tbl_mask; id++, pt++) {
1039 		p = pt->pt_proc;
1040 		if (!P_VALID(p) && !pt->pt_pgrp)
1041 			continue;
1042 		db_printf("  id %x: ", id);
1043 		if (P_VALID(p))
1044 			db_printf("proc %p id %d (0x%x) %s\n",
1045 				p, p->p_pid, p->p_pid, p->p_comm);
1046 		else
1047 			db_printf("next %x use %x\n",
1048 				P_NEXT(p) & pid_tbl_mask,
1049 				P_NEXT(p) & ~pid_tbl_mask);
1050 		if ((pgrp = pt->pt_pgrp)) {
1051 			db_printf("\tsession %p, sid %d, count %d, login %s\n",
1052 			    pgrp->pg_session, pgrp->pg_session->s_sid,
1053 			    pgrp->pg_session->s_count,
1054 			    pgrp->pg_session->s_login);
1055 			db_printf("\tpgrp %p, pg_id %d, pg_jobc %d, members %p\n",
1056 			    pgrp, pgrp->pg_id, pgrp->pg_jobc,
1057 			    pgrp->pg_members.lh_first);
1058 			for (p = pgrp->pg_members.lh_first; p != 0;
1059 			    p = p->p_pglist.le_next) {
1060 				db_printf("\t\tpid %d addr %p pgrp %p %s\n",
1061 				    p->p_pid, p, p->p_pgrp, p->p_comm);
1062 			}
1063 		}
1064 	}
1065 }
1066 #endif /* DDB */
1067 
1068 #ifdef KSTACK_CHECK_MAGIC
1069 #include <sys/user.h>
1070 
1071 #define	KSTACK_MAGIC	0xdeadbeaf
1072 
1073 /* XXX should be per process basis? */
1074 int kstackleftmin = KSTACK_SIZE;
1075 int kstackleftthres = KSTACK_SIZE / 8; /* warn if remaining stack is
1076 					  less than this */
1077 
1078 void
1079 kstack_setup_magic(const struct lwp *l)
1080 {
1081 	u_int32_t *ip;
1082 	u_int32_t const *end;
1083 
1084 	KASSERT(l != NULL);
1085 	KASSERT(l != &lwp0);
1086 
1087 	/*
1088 	 * fill all the stack with magic number
1089 	 * so that later modification on it can be detected.
1090 	 */
1091 	ip = (u_int32_t *)KSTACK_LOWEST_ADDR(l);
1092 	end = (u_int32_t *)((caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE);
1093 	for (; ip < end; ip++) {
1094 		*ip = KSTACK_MAGIC;
1095 	}
1096 }
1097 
1098 void
1099 kstack_check_magic(const struct lwp *l)
1100 {
1101 	u_int32_t const *ip, *end;
1102 	int stackleft;
1103 
1104 	KASSERT(l != NULL);
1105 
1106 	/* don't check proc0 */ /*XXX*/
1107 	if (l == &lwp0)
1108 		return;
1109 
1110 #ifdef __MACHINE_STACK_GROWS_UP
1111 	/* stack grows upwards (eg. hppa) */
1112 	ip = (u_int32_t *)((caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE);
1113 	end = (u_int32_t *)KSTACK_LOWEST_ADDR(l);
1114 	for (ip--; ip >= end; ip--)
1115 		if (*ip != KSTACK_MAGIC)
1116 			break;
1117 
1118 	stackleft = (caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE - (caddr_t)ip;
1119 #else /* __MACHINE_STACK_GROWS_UP */
1120 	/* stack grows downwards (eg. i386) */
1121 	ip = (u_int32_t *)KSTACK_LOWEST_ADDR(l);
1122 	end = (u_int32_t *)((caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE);
1123 	for (; ip < end; ip++)
1124 		if (*ip != KSTACK_MAGIC)
1125 			break;
1126 
1127 	stackleft = (caddr_t)ip - KSTACK_LOWEST_ADDR(l);
1128 #endif /* __MACHINE_STACK_GROWS_UP */
1129 
1130 	if (kstackleftmin > stackleft) {
1131 		kstackleftmin = stackleft;
1132 		if (stackleft < kstackleftthres)
1133 			printf("warning: kernel stack left %d bytes"
1134 			    "(pid %u:lid %u)\n", stackleft,
1135 			    (u_int)l->l_proc->p_pid, (u_int)l->l_lid);
1136 	}
1137 
1138 	if (stackleft <= 0) {
1139 		panic("magic on the top of kernel stack changed for "
1140 		    "pid %u, lid %u: maybe kernel stack overflow",
1141 		    (u_int)l->l_proc->p_pid, (u_int)l->l_lid);
1142 	}
1143 }
1144 #endif /* KSTACK_CHECK_MAGIC */
1145