1 /* $NetBSD: kern_proc.c,v 1.64 2003/03/19 20:35:04 dsl Exp $ */ 2 3 /*- 4 * Copyright (c) 1999 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1982, 1986, 1989, 1991, 1993 42 * The Regents of the University of California. All rights reserved. 43 * 44 * Redistribution and use in source and binary forms, with or without 45 * modification, are permitted provided that the following conditions 46 * are met: 47 * 1. Redistributions of source code must retain the above copyright 48 * notice, this list of conditions and the following disclaimer. 49 * 2. Redistributions in binary form must reproduce the above copyright 50 * notice, this list of conditions and the following disclaimer in the 51 * documentation and/or other materials provided with the distribution. 52 * 3. All advertising materials mentioning features or use of this software 53 * must display the following acknowledgement: 54 * This product includes software developed by the University of 55 * California, Berkeley and its contributors. 56 * 4. Neither the name of the University nor the names of its contributors 57 * may be used to endorse or promote products derived from this software 58 * without specific prior written permission. 59 * 60 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 61 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 62 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 63 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 64 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 65 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 66 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 67 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 68 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 69 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 70 * SUCH DAMAGE. 71 * 72 * @(#)kern_proc.c 8.7 (Berkeley) 2/14/95 73 */ 74 75 #include <sys/cdefs.h> 76 __KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.64 2003/03/19 20:35:04 dsl Exp $"); 77 78 #include "opt_kstack.h" 79 80 #include <sys/param.h> 81 #include <sys/systm.h> 82 #include <sys/kernel.h> 83 #include <sys/proc.h> 84 #include <sys/resourcevar.h> 85 #include <sys/buf.h> 86 #include <sys/acct.h> 87 #include <sys/wait.h> 88 #include <sys/file.h> 89 #include <ufs/ufs/quota.h> 90 #include <sys/uio.h> 91 #include <sys/malloc.h> 92 #include <sys/pool.h> 93 #include <sys/mbuf.h> 94 #include <sys/ioctl.h> 95 #include <sys/tty.h> 96 #include <sys/signalvar.h> 97 #include <sys/ras.h> 98 #include <sys/sa.h> 99 #include <sys/savar.h> 100 101 static void pg_delete(pid_t); 102 103 /* 104 * Structure associated with user cacheing. 105 */ 106 struct uidinfo { 107 LIST_ENTRY(uidinfo) ui_hash; 108 uid_t ui_uid; 109 long ui_proccnt; 110 }; 111 #define UIHASH(uid) (&uihashtbl[(uid) & uihash]) 112 LIST_HEAD(uihashhead, uidinfo) *uihashtbl; 113 u_long uihash; /* size of hash table - 1 */ 114 115 /* 116 * Other process lists 117 */ 118 119 struct proclist allproc; 120 struct proclist zombproc; /* resources have been freed */ 121 122 123 /* 124 * Process list locking: 125 * 126 * We have two types of locks on the proclists: read locks and write 127 * locks. Read locks can be used in interrupt context, so while we 128 * hold the write lock, we must also block clock interrupts to 129 * lock out any scheduling changes that may happen in interrupt 130 * context. 131 * 132 * The proclist lock locks the following structures: 133 * 134 * allproc 135 * zombproc 136 * pid_table 137 */ 138 struct lock proclist_lock; 139 140 /* 141 * List of processes that has called exit, but need to be reaped. 142 * Locking of this proclist is special; it's accessed in a 143 * critical section of process exit, and thus locking it can't 144 * modify interrupt state. 145 * We use a simple spin lock for this proclist. 146 * Processes on this proclist are also on zombproc. 147 */ 148 struct simplelock deadproc_slock; 149 struct deadprocs deadprocs = SLIST_HEAD_INITIALIZER(deadprocs); 150 151 /* 152 * pid to proc lookup is done by indexing the pid_table array. 153 * Since pid numbers are only allocated when an empty slot 154 * has been found, there is no need to search any lists ever. 155 * (an orphaned pgrp will lock the slot, a session will lock 156 * the pgrp with the same number.) 157 * If the table is too small it is reallocated with twice the 158 * previous size and the entries 'unzipped' into the two halves. 159 * A linked list of free entries is passed through the pt_proc 160 * field of 'free' items - set odd to be an invalid ptr. 161 */ 162 163 struct pid_table { 164 struct proc *pt_proc; 165 struct pgrp *pt_pgrp; 166 }; 167 #if 1 /* strongly typed cast - should be a noop */ 168 static __inline uint p2u(struct proc *p) { return (uint)(uintptr_t)p; }; 169 #else 170 #define p2u(p) ((uint)p) 171 #endif 172 #define P_VALID(p) (!(p2u(p) & 1)) 173 #define P_NEXT(p) (p2u(p) >> 1) 174 #define P_FREE(pid) ((struct proc *)(uintptr_t)((pid) << 1 | 1)) 175 176 #define INITIAL_PID_TABLE_SIZE (1 << 5) 177 static struct pid_table *pid_table; 178 static uint pid_tbl_mask = INITIAL_PID_TABLE_SIZE - 1; 179 static uint pid_alloc_lim; /* max we allocate before growing table */ 180 static uint pid_alloc_cnt; /* number of allocated pids */ 181 182 /* links through free slots - never empty! */ 183 static uint next_free_pt, last_free_pt; 184 static pid_t pid_max = PID_MAX; /* largest value we allocate */ 185 186 struct pool proc_pool; 187 struct pool lwp_pool; 188 struct pool lwp_uc_pool; 189 struct pool pcred_pool; 190 struct pool plimit_pool; 191 struct pool pstats_pool; 192 struct pool pgrp_pool; 193 struct pool rusage_pool; 194 struct pool ras_pool; 195 struct pool sadata_pool; 196 struct pool saupcall_pool; 197 struct pool ptimer_pool; 198 199 MALLOC_DEFINE(M_EMULDATA, "emuldata", "Per-process emulation data"); 200 MALLOC_DEFINE(M_PROC, "proc", "Proc structures"); 201 MALLOC_DEFINE(M_SESSION, "session", "session header"); 202 MALLOC_DEFINE(M_SUBPROC, "subproc", "Proc sub-structures"); 203 204 /* 205 * The process list descriptors, used during pid allocation and 206 * by sysctl. No locking on this data structure is needed since 207 * it is completely static. 208 */ 209 const struct proclist_desc proclists[] = { 210 { &allproc }, 211 { &zombproc }, 212 { NULL }, 213 }; 214 215 static void orphanpg __P((struct pgrp *)); 216 #ifdef DEBUG 217 void pgrpdump __P((void)); 218 #endif 219 220 /* 221 * Initialize global process hashing structures. 222 */ 223 void 224 procinit(void) 225 { 226 const struct proclist_desc *pd; 227 int i; 228 #define LINK_EMPTY ((PID_MAX + INITIAL_PID_TABLE_SIZE) & ~(INITIAL_PID_TABLE_SIZE - 1)) 229 230 for (pd = proclists; pd->pd_list != NULL; pd++) 231 LIST_INIT(pd->pd_list); 232 233 spinlockinit(&proclist_lock, "proclk", 0); 234 235 simple_lock_init(&deadproc_slock); 236 237 pid_table = malloc(INITIAL_PID_TABLE_SIZE * sizeof *pid_table, 238 M_PROC, M_WAITOK); 239 /* Set free list running through table... 240 Preset 'use count' above PID_MAX so we allocate pid 1 next. */ 241 for (i = 0; i <= pid_tbl_mask; i++) { 242 pid_table[i].pt_proc = P_FREE(LINK_EMPTY + i + 1); 243 pid_table[i].pt_pgrp = 0; 244 } 245 /* slot 0 is just grabbed */ 246 next_free_pt = 1; 247 /* Need to fix last entry. */ 248 last_free_pt = pid_tbl_mask; 249 pid_table[last_free_pt].pt_proc = P_FREE(LINK_EMPTY); 250 /* point at which we grow table - to avoid reusing pids too often */ 251 pid_alloc_lim = pid_tbl_mask - 1; 252 #undef LINK_EMPTY 253 254 LIST_INIT(&alllwp); 255 LIST_INIT(&deadlwp); 256 LIST_INIT(&zomblwp); 257 258 uihashtbl = 259 hashinit(maxproc / 16, HASH_LIST, M_PROC, M_WAITOK, &uihash); 260 261 pool_init(&proc_pool, sizeof(struct proc), 0, 0, 0, "procpl", 262 &pool_allocator_nointr); 263 pool_init(&lwp_pool, sizeof(struct lwp), 0, 0, 0, "lwppl", 264 &pool_allocator_nointr); 265 pool_init(&lwp_uc_pool, sizeof(ucontext_t), 0, 0, 0, "lwpucpl", 266 &pool_allocator_nointr); 267 pool_init(&pgrp_pool, sizeof(struct pgrp), 0, 0, 0, "pgrppl", 268 &pool_allocator_nointr); 269 pool_init(&pcred_pool, sizeof(struct pcred), 0, 0, 0, "pcredpl", 270 &pool_allocator_nointr); 271 pool_init(&plimit_pool, sizeof(struct plimit), 0, 0, 0, "plimitpl", 272 &pool_allocator_nointr); 273 pool_init(&pstats_pool, sizeof(struct pstats), 0, 0, 0, "pstatspl", 274 &pool_allocator_nointr); 275 pool_init(&rusage_pool, sizeof(struct rusage), 0, 0, 0, "rusgepl", 276 &pool_allocator_nointr); 277 pool_init(&ras_pool, sizeof(struct ras), 0, 0, 0, "raspl", 278 &pool_allocator_nointr); 279 pool_init(&sadata_pool, sizeof(struct sadata), 0, 0, 0, "sadatapl", 280 &pool_allocator_nointr); 281 pool_init(&saupcall_pool, sizeof(struct sadata_upcall), 0, 0, 0, 282 "saupcpl", 283 &pool_allocator_nointr); 284 pool_init(&ptimer_pool, sizeof(struct ptimer), 0, 0, 0, "ptimerpl", 285 &pool_allocator_nointr); 286 } 287 288 /* 289 * Acquire a read lock on the proclist. 290 */ 291 void 292 proclist_lock_read(void) 293 { 294 int error; 295 296 error = spinlockmgr(&proclist_lock, LK_SHARED, NULL); 297 #ifdef DIAGNOSTIC 298 if (__predict_false(error != 0)) 299 panic("proclist_lock_read: failed to acquire lock"); 300 #endif 301 } 302 303 /* 304 * Release a read lock on the proclist. 305 */ 306 void 307 proclist_unlock_read(void) 308 { 309 310 (void) spinlockmgr(&proclist_lock, LK_RELEASE, NULL); 311 } 312 313 /* 314 * Acquire a write lock on the proclist. 315 */ 316 int 317 proclist_lock_write(void) 318 { 319 int s, error; 320 321 s = splclock(); 322 error = spinlockmgr(&proclist_lock, LK_EXCLUSIVE, NULL); 323 #ifdef DIAGNOSTIC 324 if (__predict_false(error != 0)) 325 panic("proclist_lock: failed to acquire lock"); 326 #endif 327 return (s); 328 } 329 330 /* 331 * Release a write lock on the proclist. 332 */ 333 void 334 proclist_unlock_write(int s) 335 { 336 337 (void) spinlockmgr(&proclist_lock, LK_RELEASE, NULL); 338 splx(s); 339 } 340 341 /* 342 * Change the count associated with number of processes 343 * a given user is using. 344 */ 345 int 346 chgproccnt(uid_t uid, int diff) 347 { 348 struct uidinfo *uip; 349 struct uihashhead *uipp; 350 351 uipp = UIHASH(uid); 352 353 LIST_FOREACH(uip, uipp, ui_hash) 354 if (uip->ui_uid == uid) 355 break; 356 357 if (uip) { 358 uip->ui_proccnt += diff; 359 if (uip->ui_proccnt > 0) 360 return (uip->ui_proccnt); 361 if (uip->ui_proccnt < 0) 362 panic("chgproccnt: procs < 0"); 363 LIST_REMOVE(uip, ui_hash); 364 FREE(uip, M_PROC); 365 return (0); 366 } 367 if (diff <= 0) { 368 if (diff == 0) 369 return(0); 370 panic("chgproccnt: lost user"); 371 } 372 MALLOC(uip, struct uidinfo *, sizeof(*uip), M_PROC, M_WAITOK); 373 LIST_INSERT_HEAD(uipp, uip, ui_hash); 374 uip->ui_uid = uid; 375 uip->ui_proccnt = diff; 376 return (diff); 377 } 378 379 /* 380 * Check that the specifies process group in in the session of the 381 * specified process. 382 * Treats -ve ids as process ids. 383 * Used to validate TIOCSPGRP requests. 384 */ 385 int 386 pgid_in_session(struct proc *p, pid_t pg_id) 387 { 388 struct pgrp *pgrp; 389 390 if (pg_id < 0) { 391 struct proc *p1 = pfind(-pg_id); 392 if (p1 == NULL) 393 return EINVAL; 394 pgrp = p1->p_pgrp; 395 } else { 396 pgrp = pgfind(pg_id); 397 if (pgrp == NULL) 398 return EINVAL; 399 } 400 if (pgrp->pg_session != p->p_pgrp->pg_session) 401 return EPERM; 402 return 0; 403 } 404 405 /* 406 * Is p an inferior of q? 407 */ 408 int 409 inferior(struct proc *p, struct proc *q) 410 { 411 412 for (; p != q; p = p->p_pptr) 413 if (p->p_pid == 0) 414 return (0); 415 return (1); 416 } 417 418 /* 419 * Locate a process by number 420 */ 421 struct proc * 422 pfind(pid_t pid) 423 { 424 struct proc *p; 425 426 proclist_lock_read(); 427 p = pid_table[pid & pid_tbl_mask].pt_proc; 428 /* Only allow live processes to be found by pid. */ 429 if (!P_VALID(p) || p->p_pid != pid || 430 !((1 << SACTIVE | 1 << SSTOP) & 1 << p->p_stat)) 431 p = 0; 432 433 /* XXX MP - need to have a reference count... */ 434 proclist_unlock_read(); 435 return p; 436 } 437 438 439 /* 440 * Locate a process group by number 441 */ 442 struct pgrp * 443 pgfind(pid_t pgid) 444 { 445 struct pgrp *pgrp; 446 447 proclist_lock_read(); 448 pgrp = pid_table[pgid & pid_tbl_mask].pt_pgrp; 449 /* 450 * Can't look up a pgrp that only exists because the session 451 * hasn't died yet (traditional) 452 */ 453 if (pgrp == NULL || pgrp->pg_id != pgid 454 || LIST_EMPTY(&pgrp->pg_members)) 455 pgrp = 0; 456 457 /* XXX MP - need to have a reference count... */ 458 proclist_unlock_read(); 459 return pgrp; 460 } 461 462 /* 463 * Set entry for process 0 464 */ 465 void 466 proc0_insert(struct proc *p, struct lwp *l, struct pgrp *pgrp, 467 struct session *sess) 468 { 469 int s; 470 471 LIST_INIT(&p->p_lwps); 472 LIST_INSERT_HEAD(&p->p_lwps, l, l_sibling); 473 p->p_nlwps = 1; 474 475 s = proclist_lock_write(); 476 477 pid_table[0].pt_proc = p; 478 LIST_INSERT_HEAD(&allproc, p, p_list); 479 LIST_INSERT_HEAD(&alllwp, l, l_list); 480 481 p->p_pgrp = pgrp; 482 pid_table[0].pt_pgrp = pgrp; 483 LIST_INIT(&pgrp->pg_members); 484 LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist); 485 486 pgrp->pg_session = sess; 487 sess->s_count = 1; 488 sess->s_sid = 0; 489 sess->s_leader = p; 490 491 proclist_unlock_write(s); 492 } 493 494 static void 495 expand_pid_table(void) 496 { 497 uint pt_size = pid_tbl_mask + 1; 498 struct pid_table *n_pt, *new_pt; 499 struct proc *proc; 500 struct pgrp *pgrp; 501 int i; 502 int s; 503 pid_t pid; 504 505 new_pt = malloc(pt_size * 2 * sizeof *new_pt, M_PROC, M_WAITOK); 506 507 s = proclist_lock_write(); 508 if (pt_size != pid_tbl_mask + 1) { 509 /* Another process beat us to it... */ 510 proclist_unlock_write(s); 511 FREE(new_pt, M_PROC); 512 return; 513 } 514 515 /* 516 * Copy entries from old table into new one. 517 * If 'pid' is 'odd' we need to place in the upper half, 518 * even pid's to the lower half. 519 * Free items stay in the low half so we don't have to 520 * fixup the reference to them. 521 * We stuff free items on the front of the freelist 522 * because we can't write to unmodified entries. 523 * Processing the table backwards maintians a semblance 524 * of issueing pid numbers that increase with time. 525 */ 526 i = pt_size - 1; 527 n_pt = new_pt + i; 528 for (; ; i--, n_pt--) { 529 proc = pid_table[i].pt_proc; 530 pgrp = pid_table[i].pt_pgrp; 531 if (!P_VALID(proc)) { 532 /* Up 'use count' so that link is valid */ 533 pid = (P_NEXT(proc) + pt_size) & ~pt_size; 534 proc = P_FREE(pid); 535 if (pgrp) 536 pid = pgrp->pg_id; 537 } else 538 pid = proc->p_pid; 539 540 /* Save entry in appropriate half of table */ 541 n_pt[pid & pt_size].pt_proc = proc; 542 n_pt[pid & pt_size].pt_pgrp = pgrp; 543 544 /* Put other piece on start of free list */ 545 pid = (pid ^ pt_size) & ~pid_tbl_mask; 546 n_pt[pid & pt_size].pt_proc = 547 P_FREE((pid & ~pt_size) | next_free_pt); 548 n_pt[pid & pt_size].pt_pgrp = 0; 549 next_free_pt = i | (pid & pt_size); 550 if (i == 0) 551 break; 552 } 553 554 /* Switch tables */ 555 n_pt = pid_table; 556 pid_table = new_pt; 557 pid_tbl_mask = pt_size * 2 - 1; 558 559 /* 560 * pid_max starts as PID_MAX (= 30000), once we have 16384 561 * allocated pids we need it to be larger! 562 */ 563 if (pid_tbl_mask > PID_MAX) { 564 pid_max = pid_tbl_mask * 2 + 1; 565 pid_alloc_lim |= pid_alloc_lim << 1; 566 } else 567 pid_alloc_lim <<= 1; /* doubles number of free slots... */ 568 569 proclist_unlock_write(s); 570 FREE(n_pt, M_PROC); 571 } 572 573 struct proc * 574 proc_alloc(void) 575 { 576 struct proc *p; 577 int s; 578 int nxt; 579 pid_t pid; 580 struct pid_table *pt; 581 582 p = pool_get(&proc_pool, PR_WAITOK); 583 p->p_stat = SIDL; /* protect against others */ 584 585 /* allocate next free pid */ 586 587 for (;;expand_pid_table()) { 588 if (__predict_false(pid_alloc_cnt >= pid_alloc_lim)) 589 /* ensure pids cycle through 2000+ values */ 590 continue; 591 s = proclist_lock_write(); 592 pt = &pid_table[next_free_pt]; 593 #ifdef DIAGNOSTIC 594 if (__predict_false(P_VALID(pt->pt_proc) || pt->pt_pgrp)) 595 panic("proc_alloc: slot busy"); 596 #endif 597 nxt = P_NEXT(pt->pt_proc); 598 if (nxt & pid_tbl_mask) 599 break; 600 /* Table full - expand (NB last entry not used....) */ 601 proclist_unlock_write(s); 602 } 603 604 /* pid is 'saved use count' + 'size' + entry */ 605 pid = (nxt & ~pid_tbl_mask) + pid_tbl_mask + 1 + next_free_pt; 606 if ((uint)pid > (uint)pid_max) 607 pid &= pid_tbl_mask; 608 p->p_pid = pid; 609 next_free_pt = nxt & pid_tbl_mask; 610 611 /* Grab table slot */ 612 pt->pt_proc = p; 613 pid_alloc_cnt++; 614 615 proclist_unlock_write(s); 616 617 return p; 618 } 619 620 /* 621 * Free last resources of a process - called from proc_free (in kern_exit.c) 622 */ 623 void 624 proc_free_mem(struct proc *p) 625 { 626 int s; 627 pid_t pid = p->p_pid; 628 struct pid_table *pt; 629 630 s = proclist_lock_write(); 631 632 pt = &pid_table[pid & pid_tbl_mask]; 633 #ifdef DIAGNOSTIC 634 if (__predict_false(pt->pt_proc != p)) 635 panic("proc_free: pid_table mismatch, pid %x, proc %p", 636 pid, p); 637 #endif 638 /* save pid use count in slot */ 639 pt->pt_proc = P_FREE(pid & ~pid_tbl_mask); 640 641 if (pt->pt_pgrp == NULL) { 642 /* link last freed entry onto ours */ 643 pid &= pid_tbl_mask; 644 pt = &pid_table[last_free_pt]; 645 pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pid); 646 last_free_pt = pid; 647 pid_alloc_cnt--; 648 } 649 650 nprocs--; 651 proclist_unlock_write(s); 652 653 pool_put(&proc_pool, p); 654 } 655 656 /* 657 * Move p to a new or existing process group (and session) 658 * 659 * If we are creating a new pgrp, the pgid should equal 660 * the calling processes pid. 661 * If is only valid to enter a process group that is in the session 662 * of the process. 663 * Also mksess should only be set if we are creating a process group 664 * 665 * Only called from sys_setsid, sys_setpgid/sys_setprp and the 666 * SYSV setpgrp support for hpux == enterpgrp(curproc, curproc->p_pid) 667 */ 668 int 669 enterpgrp(struct proc *p, pid_t pgid, int mksess) 670 { 671 struct pgrp *new_pgrp, *pgrp; 672 struct session *sess; 673 struct proc *curp = curproc; 674 pid_t pid = p->p_pid; 675 int rval; 676 int s; 677 pid_t pg_id = NO_PGID; 678 679 /* Allocate data areas we might need before doing any validity checks */ 680 proclist_lock_read(); /* Because pid_table might change */ 681 if (pid_table[pgid & pid_tbl_mask].pt_pgrp == 0) { 682 proclist_unlock_read(); 683 new_pgrp = pool_get(&pgrp_pool, PR_WAITOK); 684 } else { 685 proclist_unlock_read(); 686 new_pgrp = NULL; 687 } 688 if (mksess) 689 MALLOC(sess, struct session *, sizeof(struct session), 690 M_SESSION, M_WAITOK); 691 else 692 sess = NULL; 693 694 s = proclist_lock_write(); 695 rval = EPERM; /* most common error (to save typing) */ 696 697 /* Check pgrp exists or can be created */ 698 pgrp = pid_table[pgid & pid_tbl_mask].pt_pgrp; 699 if (pgrp != NULL && pgrp->pg_id != pgid) 700 goto done; 701 702 /* Can only set another process under restricted circumstances. */ 703 if (p != curp) { 704 /* must exist and be one of our children... */ 705 if (p != pid_table[pid & pid_tbl_mask].pt_proc 706 || !inferior(p, curp)) { 707 rval = ESRCH; 708 goto done; 709 } 710 /* ... in the same session... */ 711 if (sess != NULL || p->p_session != curp->p_session) 712 goto done; 713 /* ... existing pgid must be in same session ... */ 714 if (pgrp != NULL && pgrp->pg_session != p->p_session) 715 goto done; 716 /* ... and not done an exec. */ 717 if (p->p_flag & P_EXEC) { 718 rval = EACCES; 719 goto done; 720 } 721 } 722 723 /* Changing the process group/session of a session 724 leader is definitely off limits. */ 725 if (SESS_LEADER(p)) { 726 if (sess == NULL && p->p_pgrp == pgrp) 727 /* unless it's a definite noop */ 728 rval = 0; 729 goto done; 730 } 731 732 /* Can only create a process group with id of process */ 733 if (pgrp == NULL && pgid != pid) 734 goto done; 735 736 /* Can only create a session if creating pgrp */ 737 if (sess != NULL && pgrp != NULL) 738 goto done; 739 740 /* Check we allocated memory for a pgrp... */ 741 if (pgrp == NULL && new_pgrp == NULL) 742 goto done; 743 744 /* Don't attach to 'zombie' pgrp */ 745 if (pgrp != NULL && LIST_EMPTY(&pgrp->pg_members)) 746 goto done; 747 748 /* Expect to succeed now */ 749 rval = 0; 750 751 if (pgrp == p->p_pgrp) 752 /* nothing to do */ 753 goto done; 754 755 /* Ok all setup, link up required structures */ 756 if (pgrp == NULL) { 757 pgrp = new_pgrp; 758 new_pgrp = 0; 759 if (sess != NULL) { 760 sess->s_sid = p->p_pid; 761 sess->s_leader = p; 762 sess->s_count = 1; 763 sess->s_ttyvp = NULL; 764 sess->s_ttyp = NULL; 765 sess->s_flags = p->p_session->s_flags & ~S_LOGIN_SET; 766 memcpy(sess->s_login, p->p_session->s_login, 767 sizeof(sess->s_login)); 768 p->p_flag &= ~P_CONTROLT; 769 } else { 770 sess = p->p_pgrp->pg_session; 771 SESSHOLD(sess); 772 } 773 pgrp->pg_session = sess; 774 sess = 0; 775 776 pgrp->pg_id = pgid; 777 LIST_INIT(&pgrp->pg_members); 778 #ifdef DIAGNOSTIC 779 if (__predict_false(pid_table[pgid & pid_tbl_mask].pt_pgrp)) 780 panic("enterpgrp: pgrp table slot in use"); 781 if (__predict_false(mksess && p != curp)) 782 panic("enterpgrp: mksession and p != curproc"); 783 #endif 784 pid_table[pgid & pid_tbl_mask].pt_pgrp = pgrp; 785 pgrp->pg_jobc = 0; 786 } 787 788 /* 789 * Adjust eligibility of affected pgrps to participate in job control. 790 * Increment eligibility counts before decrementing, otherwise we 791 * could reach 0 spuriously during the first call. 792 */ 793 fixjobc(p, pgrp, 1); 794 fixjobc(p, p->p_pgrp, 0); 795 796 /* Move process to requested group */ 797 LIST_REMOVE(p, p_pglist); 798 if (LIST_EMPTY(&p->p_pgrp->pg_members)) 799 /* defer delete until we've dumped the lock */ 800 pg_id = p->p_pgrp->pg_id; 801 p->p_pgrp = pgrp; 802 LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist); 803 804 done: 805 proclist_unlock_write(s); 806 if (sess != NULL) 807 free(sess, M_SESSION); 808 if (new_pgrp != NULL) 809 pool_put(&pgrp_pool, new_pgrp); 810 if (pg_id != NO_PGID) 811 pg_delete(pg_id); 812 #ifdef DEBUG_PGRP 813 if (__predict_false(rval)) 814 printf("enterpgrp(%d,%d,%d), curproc %d, rval %d\n", 815 pid, pgid, mksess, curp->p_pid, rval); 816 #endif 817 return rval; 818 } 819 820 /* 821 * remove process from process group 822 */ 823 int 824 leavepgrp(struct proc *p) 825 { 826 int s = proclist_lock_write(); 827 struct pgrp *pgrp; 828 pid_t pg_id; 829 830 pgrp = p->p_pgrp; 831 LIST_REMOVE(p, p_pglist); 832 p->p_pgrp = 0; 833 pg_id = LIST_EMPTY(&pgrp->pg_members) ? pgrp->pg_id : NO_PGID; 834 proclist_unlock_write(s); 835 836 if (pg_id != NO_PGID) 837 pg_delete(pg_id); 838 return 0; 839 } 840 841 static void 842 pg_free(pid_t pg_id) 843 { 844 struct pgrp *pgrp; 845 struct pid_table *pt; 846 int s; 847 848 s = proclist_lock_write(); 849 pt = &pid_table[pg_id & pid_tbl_mask]; 850 pgrp = pt->pt_pgrp; 851 #ifdef DIAGNOSTIC 852 if (__predict_false(!pgrp || pgrp->pg_id != pg_id 853 || !LIST_EMPTY(&pgrp->pg_members))) 854 panic("pg_free: process group absent or has members"); 855 #endif 856 pt->pt_pgrp = 0; 857 858 if (!P_VALID(pt->pt_proc)) { 859 /* orphaned pgrp, put slot onto free list */ 860 #ifdef DIAGNOSTIC 861 if (__predict_false(P_NEXT(pt->pt_proc) & pid_tbl_mask)) 862 panic("pg_free: process slot on free list"); 863 #endif 864 865 pg_id &= pid_tbl_mask; 866 pt = &pid_table[last_free_pt]; 867 pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pg_id); 868 last_free_pt = pg_id; 869 pid_alloc_cnt--; 870 } 871 proclist_unlock_write(s); 872 873 pool_put(&pgrp_pool, pgrp); 874 } 875 876 /* 877 * delete a process group 878 */ 879 static void 880 pg_delete(pid_t pg_id) 881 { 882 struct pgrp *pgrp; 883 struct tty *ttyp; 884 struct session *ss; 885 int s; 886 887 s = proclist_lock_write(); 888 pgrp = pid_table[pg_id & pid_tbl_mask].pt_pgrp; 889 if (pgrp == NULL || pgrp->pg_id != pg_id || 890 !LIST_EMPTY(&pgrp->pg_members)) { 891 proclist_unlock_write(s); 892 return; 893 } 894 895 /* Remove reference (if any) from tty to this process group */ 896 ttyp = pgrp->pg_session->s_ttyp; 897 if (ttyp != NULL && ttyp->t_pgrp == pgrp) 898 ttyp->t_pgrp = NULL; 899 900 ss = pgrp->pg_session; 901 902 if (ss->s_sid == pgrp->pg_id) { 903 proclist_unlock_write(s); 904 SESSRELE(ss); 905 /* pgrp freed by sessdelete() if last reference */ 906 return; 907 } 908 909 proclist_unlock_write(s); 910 SESSRELE(ss); 911 pg_free(pg_id); 912 } 913 914 /* 915 * Delete session - called from SESSRELE when s_count becomes zero. 916 */ 917 void 918 sessdelete(struct session *ss) 919 { 920 /* 921 * We keep the pgrp with the same id as the session in 922 * order to stop a process being given the same pid. 923 * Since the pgrp holds a reference to the session, it 924 * must be a 'zombie' pgrp by now. 925 */ 926 927 pg_free(ss->s_sid); 928 929 FREE(ss, M_SESSION); 930 } 931 932 /* 933 * Adjust pgrp jobc counters when specified process changes process group. 934 * We count the number of processes in each process group that "qualify" 935 * the group for terminal job control (those with a parent in a different 936 * process group of the same session). If that count reaches zero, the 937 * process group becomes orphaned. Check both the specified process' 938 * process group and that of its children. 939 * entering == 0 => p is leaving specified group. 940 * entering == 1 => p is entering specified group. 941 */ 942 void 943 fixjobc(struct proc *p, struct pgrp *pgrp, int entering) 944 { 945 struct pgrp *hispgrp; 946 struct session *mysession = pgrp->pg_session; 947 948 /* 949 * Check p's parent to see whether p qualifies its own process 950 * group; if so, adjust count for p's process group. 951 */ 952 if ((hispgrp = p->p_pptr->p_pgrp) != pgrp && 953 hispgrp->pg_session == mysession) { 954 if (entering) 955 pgrp->pg_jobc++; 956 else if (--pgrp->pg_jobc == 0) 957 orphanpg(pgrp); 958 } 959 960 /* 961 * Check this process' children to see whether they qualify 962 * their process groups; if so, adjust counts for children's 963 * process groups. 964 */ 965 LIST_FOREACH(p, &p->p_children, p_sibling) { 966 if ((hispgrp = p->p_pgrp) != pgrp && 967 hispgrp->pg_session == mysession && 968 P_ZOMBIE(p) == 0) { 969 if (entering) 970 hispgrp->pg_jobc++; 971 else if (--hispgrp->pg_jobc == 0) 972 orphanpg(hispgrp); 973 } 974 } 975 } 976 977 /* 978 * A process group has become orphaned; 979 * if there are any stopped processes in the group, 980 * hang-up all process in that group. 981 */ 982 static void 983 orphanpg(struct pgrp *pg) 984 { 985 struct proc *p; 986 987 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 988 if (p->p_stat == SSTOP) { 989 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 990 psignal(p, SIGHUP); 991 psignal(p, SIGCONT); 992 } 993 return; 994 } 995 } 996 } 997 998 /* mark process as suid/sgid, reset some values to defaults */ 999 void 1000 p_sugid(struct proc *p) 1001 { 1002 struct plimit *newlim; 1003 1004 p->p_flag |= P_SUGID; 1005 /* reset what needs to be reset in plimit */ 1006 if (p->p_limit->pl_corename != defcorename) { 1007 if (p->p_limit->p_refcnt > 1 && 1008 (p->p_limit->p_lflags & PL_SHAREMOD) == 0) { 1009 newlim = limcopy(p->p_limit); 1010 limfree(p->p_limit); 1011 p->p_limit = newlim; 1012 } 1013 free(p->p_limit->pl_corename, M_TEMP); 1014 p->p_limit->pl_corename = defcorename; 1015 } 1016 } 1017 1018 #ifdef DDB 1019 #include <ddb/db_output.h> 1020 void pidtbl_dump(void); 1021 void 1022 pidtbl_dump(void) 1023 { 1024 struct pid_table *pt; 1025 struct proc *p; 1026 struct pgrp *pgrp; 1027 int id; 1028 1029 db_printf("pid table %p size %x, next %x, last %x\n", 1030 pid_table, pid_tbl_mask+1, 1031 next_free_pt, last_free_pt); 1032 for (pt = pid_table, id = 0; id <= pid_tbl_mask; id++, pt++) { 1033 p = pt->pt_proc; 1034 if (!P_VALID(p) && !pt->pt_pgrp) 1035 continue; 1036 db_printf(" id %x: ", id); 1037 if (P_VALID(p)) 1038 db_printf("proc %p id %d (0x%x) %s\n", 1039 p, p->p_pid, p->p_pid, p->p_comm); 1040 else 1041 db_printf("next %x use %x\n", 1042 P_NEXT(p) & pid_tbl_mask, 1043 P_NEXT(p) & ~pid_tbl_mask); 1044 if ((pgrp = pt->pt_pgrp)) { 1045 db_printf("\tsession %p, sid %d, count %d, login %s\n", 1046 pgrp->pg_session, pgrp->pg_session->s_sid, 1047 pgrp->pg_session->s_count, 1048 pgrp->pg_session->s_login); 1049 db_printf("\tpgrp %p, pg_id %d, pg_jobc %d, members %p\n", 1050 pgrp, pgrp->pg_id, pgrp->pg_jobc, 1051 pgrp->pg_members.lh_first); 1052 for (p = pgrp->pg_members.lh_first; p != 0; 1053 p = p->p_pglist.le_next) { 1054 db_printf("\t\tpid %d addr %p pgrp %p %s\n", 1055 p->p_pid, p, p->p_pgrp, p->p_comm); 1056 } 1057 } 1058 } 1059 } 1060 #endif /* DDB */ 1061 1062 #ifdef KSTACK_CHECK_MAGIC 1063 #include <sys/user.h> 1064 1065 #define KSTACK_MAGIC 0xdeadbeaf 1066 1067 /* XXX should be per process basis? */ 1068 int kstackleftmin = KSTACK_SIZE; 1069 int kstackleftthres = KSTACK_SIZE / 8; /* warn if remaining stack is 1070 less than this */ 1071 1072 void 1073 kstack_setup_magic(const struct lwp *l) 1074 { 1075 u_int32_t *ip; 1076 u_int32_t const *end; 1077 1078 KASSERT(l != NULL); 1079 KASSERT(l != &lwp0); 1080 1081 /* 1082 * fill all the stack with magic number 1083 * so that later modification on it can be detected. 1084 */ 1085 ip = (u_int32_t *)KSTACK_LOWEST_ADDR(l); 1086 end = (u_int32_t *)((caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE); 1087 for (; ip < end; ip++) { 1088 *ip = KSTACK_MAGIC; 1089 } 1090 } 1091 1092 void 1093 kstack_check_magic(const struct lwp *l) 1094 { 1095 u_int32_t const *ip, *end; 1096 int stackleft; 1097 1098 KASSERT(l != NULL); 1099 1100 /* don't check proc0 */ /*XXX*/ 1101 if (l == &lwp0) 1102 return; 1103 1104 #ifdef __MACHINE_STACK_GROWS_UP 1105 /* stack grows upwards (eg. hppa) */ 1106 ip = (u_int32_t *)((caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE); 1107 end = (u_int32_t *)KSTACK_LOWEST_ADDR(l); 1108 for (ip--; ip >= end; ip--) 1109 if (*ip != KSTACK_MAGIC) 1110 break; 1111 1112 stackleft = (caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE - (caddr_t)ip; 1113 #else /* __MACHINE_STACK_GROWS_UP */ 1114 /* stack grows downwards (eg. i386) */ 1115 ip = (u_int32_t *)KSTACK_LOWEST_ADDR(l); 1116 end = (u_int32_t *)((caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE); 1117 for (; ip < end; ip++) 1118 if (*ip != KSTACK_MAGIC) 1119 break; 1120 1121 stackleft = (caddr_t)ip - KSTACK_LOWEST_ADDR(l); 1122 #endif /* __MACHINE_STACK_GROWS_UP */ 1123 1124 if (kstackleftmin > stackleft) { 1125 kstackleftmin = stackleft; 1126 if (stackleft < kstackleftthres) 1127 printf("warning: kernel stack left %d bytes" 1128 "(pid %u:lid %u)\n", stackleft, 1129 (u_int)l->l_proc->p_pid, (u_int)l->l_lid); 1130 } 1131 1132 if (stackleft <= 0) { 1133 panic("magic on the top of kernel stack changed for " 1134 "pid %u, lid %u: maybe kernel stack overflow", 1135 (u_int)l->l_proc->p_pid, (u_int)l->l_lid); 1136 } 1137 } 1138 #endif /* KSTACK_CHECK_MAGIC */ 1139