1 /* $NetBSD: kern_proc.c,v 1.52 2002/09/04 01:32:33 matt Exp $ */ 2 3 /*- 4 * Copyright (c) 1999 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1982, 1986, 1989, 1991, 1993 42 * The Regents of the University of California. All rights reserved. 43 * 44 * Redistribution and use in source and binary forms, with or without 45 * modification, are permitted provided that the following conditions 46 * are met: 47 * 1. Redistributions of source code must retain the above copyright 48 * notice, this list of conditions and the following disclaimer. 49 * 2. Redistributions in binary form must reproduce the above copyright 50 * notice, this list of conditions and the following disclaimer in the 51 * documentation and/or other materials provided with the distribution. 52 * 3. All advertising materials mentioning features or use of this software 53 * must display the following acknowledgement: 54 * This product includes software developed by the University of 55 * California, Berkeley and its contributors. 56 * 4. Neither the name of the University nor the names of its contributors 57 * may be used to endorse or promote products derived from this software 58 * without specific prior written permission. 59 * 60 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 61 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 62 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 63 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 64 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 65 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 66 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 67 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 68 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 69 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 70 * SUCH DAMAGE. 71 * 72 * @(#)kern_proc.c 8.7 (Berkeley) 2/14/95 73 */ 74 75 #include <sys/cdefs.h> 76 __KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.52 2002/09/04 01:32:33 matt Exp $"); 77 78 #include "opt_kstack.h" 79 80 #include <sys/param.h> 81 #include <sys/systm.h> 82 #include <sys/map.h> 83 #include <sys/kernel.h> 84 #include <sys/proc.h> 85 #include <sys/resourcevar.h> 86 #include <sys/buf.h> 87 #include <sys/acct.h> 88 #include <sys/wait.h> 89 #include <sys/file.h> 90 #include <ufs/ufs/quota.h> 91 #include <sys/uio.h> 92 #include <sys/malloc.h> 93 #include <sys/pool.h> 94 #include <sys/mbuf.h> 95 #include <sys/ioctl.h> 96 #include <sys/tty.h> 97 #include <sys/signalvar.h> 98 #include <sys/ras.h> 99 100 /* 101 * Structure associated with user cacheing. 102 */ 103 struct uidinfo { 104 LIST_ENTRY(uidinfo) ui_hash; 105 uid_t ui_uid; 106 long ui_proccnt; 107 }; 108 #define UIHASH(uid) (&uihashtbl[(uid) & uihash]) 109 LIST_HEAD(uihashhead, uidinfo) *uihashtbl; 110 u_long uihash; /* size of hash table - 1 */ 111 112 /* 113 * Other process lists 114 */ 115 struct pidhashhead *pidhashtbl; 116 u_long pidhash; 117 struct pgrphashhead *pgrphashtbl; 118 u_long pgrphash; 119 120 struct proclist allproc; 121 struct proclist zombproc; /* resources have been freed */ 122 123 /* 124 * Process list locking: 125 * 126 * We have two types of locks on the proclists: read locks and write 127 * locks. Read locks can be used in interrupt context, so while we 128 * hold the write lock, we must also block clock interrupts to 129 * lock out any scheduling changes that may happen in interrupt 130 * context. 131 * 132 * The proclist lock locks the following structures: 133 * 134 * allproc 135 * zombproc 136 * pidhashtbl 137 */ 138 struct lock proclist_lock; 139 140 /* 141 * Locking of this proclist is special; it's accessed in a 142 * critical section of process exit, and thus locking it can't 143 * modify interrupt state. We use a simple spin lock for this 144 * proclist. Processes on this proclist are also on zombproc; 145 * we use the p_hash member to linkup to deadproc. 146 */ 147 struct simplelock deadproc_slock; 148 struct proclist deadproc; /* dead, but not yet undead */ 149 150 struct pool proc_pool; 151 struct pool pcred_pool; 152 struct pool plimit_pool; 153 struct pool pgrp_pool; 154 struct pool rusage_pool; 155 struct pool ras_pool; 156 157 /* 158 * The process list descriptors, used during pid allocation and 159 * by sysctl. No locking on this data structure is needed since 160 * it is completely static. 161 */ 162 const struct proclist_desc proclists[] = { 163 { &allproc }, 164 { &zombproc }, 165 { NULL }, 166 }; 167 168 static void orphanpg __P((struct pgrp *)); 169 #ifdef DEBUG 170 void pgrpdump __P((void)); 171 #endif 172 173 /* 174 * Initialize global process hashing structures. 175 */ 176 void 177 procinit() 178 { 179 const struct proclist_desc *pd; 180 181 for (pd = proclists; pd->pd_list != NULL; pd++) 182 LIST_INIT(pd->pd_list); 183 184 spinlockinit(&proclist_lock, "proclk", 0); 185 186 LIST_INIT(&deadproc); 187 simple_lock_init(&deadproc_slock); 188 189 pidhashtbl = 190 hashinit(maxproc / 4, HASH_LIST, M_PROC, M_WAITOK, &pidhash); 191 pgrphashtbl = 192 hashinit(maxproc / 4, HASH_LIST, M_PROC, M_WAITOK, &pgrphash); 193 uihashtbl = 194 hashinit(maxproc / 16, HASH_LIST, M_PROC, M_WAITOK, &uihash); 195 196 pool_init(&proc_pool, sizeof(struct proc), 0, 0, 0, "procpl", 197 &pool_allocator_nointr); 198 pool_init(&pgrp_pool, sizeof(struct pgrp), 0, 0, 0, "pgrppl", 199 &pool_allocator_nointr); 200 pool_init(&pcred_pool, sizeof(struct pcred), 0, 0, 0, "pcredpl", 201 &pool_allocator_nointr); 202 pool_init(&plimit_pool, sizeof(struct plimit), 0, 0, 0, "plimitpl", 203 &pool_allocator_nointr); 204 pool_init(&rusage_pool, sizeof(struct rusage), 0, 0, 0, "rusgepl", 205 &pool_allocator_nointr); 206 pool_init(&ras_pool, sizeof(struct ras), 0, 0, 0, "raspl", 207 &pool_allocator_nointr); 208 } 209 210 /* 211 * Acquire a read lock on the proclist. 212 */ 213 void 214 proclist_lock_read() 215 { 216 int error; 217 218 error = spinlockmgr(&proclist_lock, LK_SHARED, NULL); 219 #ifdef DIAGNOSTIC 220 if (__predict_false(error != 0)) 221 panic("proclist_lock_read: failed to acquire lock"); 222 #endif 223 } 224 225 /* 226 * Release a read lock on the proclist. 227 */ 228 void 229 proclist_unlock_read() 230 { 231 232 (void) spinlockmgr(&proclist_lock, LK_RELEASE, NULL); 233 } 234 235 /* 236 * Acquire a write lock on the proclist. 237 */ 238 int 239 proclist_lock_write() 240 { 241 int s, error; 242 243 s = splclock(); 244 error = spinlockmgr(&proclist_lock, LK_EXCLUSIVE, NULL); 245 #ifdef DIAGNOSTIC 246 if (__predict_false(error != 0)) 247 panic("proclist_lock: failed to acquire lock"); 248 #endif 249 return (s); 250 } 251 252 /* 253 * Release a write lock on the proclist. 254 */ 255 void 256 proclist_unlock_write(s) 257 int s; 258 { 259 260 (void) spinlockmgr(&proclist_lock, LK_RELEASE, NULL); 261 splx(s); 262 } 263 264 /* 265 * Change the count associated with number of processes 266 * a given user is using. 267 */ 268 int 269 chgproccnt(uid, diff) 270 uid_t uid; 271 int diff; 272 { 273 struct uidinfo *uip; 274 struct uihashhead *uipp; 275 276 uipp = UIHASH(uid); 277 278 LIST_FOREACH(uip, uipp, ui_hash) 279 if (uip->ui_uid == uid) 280 break; 281 282 if (uip) { 283 uip->ui_proccnt += diff; 284 if (uip->ui_proccnt > 0) 285 return (uip->ui_proccnt); 286 if (uip->ui_proccnt < 0) 287 panic("chgproccnt: procs < 0"); 288 LIST_REMOVE(uip, ui_hash); 289 FREE(uip, M_PROC); 290 return (0); 291 } 292 if (diff <= 0) { 293 if (diff == 0) 294 return(0); 295 panic("chgproccnt: lost user"); 296 } 297 MALLOC(uip, struct uidinfo *, sizeof(*uip), M_PROC, M_WAITOK); 298 LIST_INSERT_HEAD(uipp, uip, ui_hash); 299 uip->ui_uid = uid; 300 uip->ui_proccnt = diff; 301 return (diff); 302 } 303 304 /* 305 * Is p an inferior of q? 306 */ 307 int 308 inferior(p, q) 309 struct proc *p; 310 struct proc *q; 311 { 312 313 for (; p != q; p = p->p_pptr) 314 if (p->p_pid == 0) 315 return (0); 316 return (1); 317 } 318 319 /* 320 * Locate a process by number 321 */ 322 struct proc * 323 pfind(pid) 324 pid_t pid; 325 { 326 struct proc *p; 327 328 proclist_lock_read(); 329 LIST_FOREACH(p, PIDHASH(pid), p_hash) 330 if (p->p_pid == pid) 331 goto out; 332 out: 333 proclist_unlock_read(); 334 return (p); 335 } 336 337 /* 338 * Locate a process group by number 339 */ 340 struct pgrp * 341 pgfind(pgid) 342 pid_t pgid; 343 { 344 struct pgrp *pgrp; 345 346 LIST_FOREACH(pgrp, PGRPHASH(pgid), pg_hash) 347 if (pgrp->pg_id == pgid) 348 return (pgrp); 349 return (NULL); 350 } 351 352 /* 353 * Move p to a new or existing process group (and session) 354 */ 355 int 356 enterpgrp(p, pgid, mksess) 357 struct proc *p; 358 pid_t pgid; 359 int mksess; 360 { 361 struct pgrp *pgrp = pgfind(pgid); 362 363 #ifdef DIAGNOSTIC 364 if (__predict_false(pgrp != NULL && mksess)) /* firewalls */ 365 panic("enterpgrp: setsid into non-empty pgrp"); 366 if (__predict_false(SESS_LEADER(p))) 367 panic("enterpgrp: session leader attempted setpgrp"); 368 #endif 369 if (pgrp == NULL) { 370 pid_t savepid = p->p_pid; 371 struct proc *np; 372 /* 373 * new process group 374 */ 375 #ifdef DIAGNOSTIC 376 if (__predict_false(p->p_pid != pgid)) 377 panic("enterpgrp: new pgrp and pid != pgid"); 378 #endif 379 pgrp = pool_get(&pgrp_pool, PR_WAITOK); 380 if ((np = pfind(savepid)) == NULL || np != p) { 381 pool_put(&pgrp_pool, pgrp); 382 return (ESRCH); 383 } 384 if (mksess) { 385 struct session *sess; 386 387 /* 388 * new session 389 */ 390 MALLOC(sess, struct session *, sizeof(struct session), 391 M_SESSION, M_WAITOK); 392 if ((np = pfind(savepid)) == NULL || np != p) { 393 FREE(sess, M_SESSION); 394 pool_put(&pgrp_pool, pgrp); 395 return (ESRCH); 396 } 397 sess->s_sid = p->p_pid; 398 sess->s_leader = p; 399 sess->s_count = 1; 400 sess->s_ttyvp = NULL; 401 sess->s_ttyp = NULL; 402 memcpy(sess->s_login, p->p_session->s_login, 403 sizeof(sess->s_login)); 404 p->p_flag &= ~P_CONTROLT; 405 pgrp->pg_session = sess; 406 #ifdef DIAGNOSTIC 407 if (__predict_false(p != curproc)) 408 panic("enterpgrp: mksession and p != curproc"); 409 #endif 410 } else { 411 SESSHOLD(p->p_session); 412 pgrp->pg_session = p->p_session; 413 } 414 pgrp->pg_id = pgid; 415 LIST_INIT(&pgrp->pg_members); 416 LIST_INSERT_HEAD(PGRPHASH(pgid), pgrp, pg_hash); 417 pgrp->pg_jobc = 0; 418 } else if (pgrp == p->p_pgrp) 419 return (0); 420 421 /* 422 * Adjust eligibility of affected pgrps to participate in job control. 423 * Increment eligibility counts before decrementing, otherwise we 424 * could reach 0 spuriously during the first call. 425 */ 426 fixjobc(p, pgrp, 1); 427 fixjobc(p, p->p_pgrp, 0); 428 429 LIST_REMOVE(p, p_pglist); 430 if (LIST_EMPTY(&p->p_pgrp->pg_members)) 431 pgdelete(p->p_pgrp); 432 p->p_pgrp = pgrp; 433 LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist); 434 return (0); 435 } 436 437 /* 438 * remove process from process group 439 */ 440 int 441 leavepgrp(p) 442 struct proc *p; 443 { 444 445 LIST_REMOVE(p, p_pglist); 446 if (LIST_EMPTY(&p->p_pgrp->pg_members)) 447 pgdelete(p->p_pgrp); 448 p->p_pgrp = 0; 449 return (0); 450 } 451 452 /* 453 * delete a process group 454 */ 455 void 456 pgdelete(pgrp) 457 struct pgrp *pgrp; 458 { 459 460 /* Remove reference (if any) from tty to this process group */ 461 if (pgrp->pg_session->s_ttyp != NULL && 462 pgrp->pg_session->s_ttyp->t_pgrp == pgrp) 463 pgrp->pg_session->s_ttyp->t_pgrp = NULL; 464 LIST_REMOVE(pgrp, pg_hash); 465 SESSRELE(pgrp->pg_session); 466 pool_put(&pgrp_pool, pgrp); 467 } 468 469 /* 470 * Adjust pgrp jobc counters when specified process changes process group. 471 * We count the number of processes in each process group that "qualify" 472 * the group for terminal job control (those with a parent in a different 473 * process group of the same session). If that count reaches zero, the 474 * process group becomes orphaned. Check both the specified process' 475 * process group and that of its children. 476 * entering == 0 => p is leaving specified group. 477 * entering == 1 => p is entering specified group. 478 */ 479 void 480 fixjobc(p, pgrp, entering) 481 struct proc *p; 482 struct pgrp *pgrp; 483 int entering; 484 { 485 struct pgrp *hispgrp; 486 struct session *mysession = pgrp->pg_session; 487 488 /* 489 * Check p's parent to see whether p qualifies its own process 490 * group; if so, adjust count for p's process group. 491 */ 492 if ((hispgrp = p->p_pptr->p_pgrp) != pgrp && 493 hispgrp->pg_session == mysession) { 494 if (entering) 495 pgrp->pg_jobc++; 496 else if (--pgrp->pg_jobc == 0) 497 orphanpg(pgrp); 498 } 499 500 /* 501 * Check this process' children to see whether they qualify 502 * their process groups; if so, adjust counts for children's 503 * process groups. 504 */ 505 LIST_FOREACH(p, &p->p_children, p_sibling) { 506 if ((hispgrp = p->p_pgrp) != pgrp && 507 hispgrp->pg_session == mysession && 508 P_ZOMBIE(p) == 0) { 509 if (entering) 510 hispgrp->pg_jobc++; 511 else if (--hispgrp->pg_jobc == 0) 512 orphanpg(hispgrp); 513 } 514 } 515 } 516 517 /* 518 * A process group has become orphaned; 519 * if there are any stopped processes in the group, 520 * hang-up all process in that group. 521 */ 522 static void 523 orphanpg(pg) 524 struct pgrp *pg; 525 { 526 struct proc *p; 527 528 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 529 if (p->p_stat == SSTOP) { 530 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 531 psignal(p, SIGHUP); 532 psignal(p, SIGCONT); 533 } 534 return; 535 } 536 } 537 } 538 539 /* mark process as suid/sgid, reset some values do defaults */ 540 void 541 p_sugid(p) 542 struct proc *p; 543 { 544 struct plimit *newlim; 545 546 p->p_flag |= P_SUGID; 547 /* reset what needs to be reset in plimit */ 548 if (p->p_limit->pl_corename != defcorename) { 549 if (p->p_limit->p_refcnt > 1 && 550 (p->p_limit->p_lflags & PL_SHAREMOD) == 0) { 551 newlim = limcopy(p->p_limit); 552 limfree(p->p_limit); 553 p->p_limit = newlim; 554 } 555 free(p->p_limit->pl_corename, M_TEMP); 556 p->p_limit->pl_corename = defcorename; 557 } 558 } 559 560 #ifdef DEBUG 561 void 562 pgrpdump() 563 { 564 struct pgrp *pgrp; 565 struct proc *p; 566 int i; 567 568 for (i = 0; i <= pgrphash; i++) { 569 if ((pgrp = LIST_FIRST(&pgrphashtbl[i])) != NULL) { 570 printf("\tindx %d\n", i); 571 for (; pgrp != 0; pgrp = pgrp->pg_hash.le_next) { 572 printf("\tpgrp %p, pgid %d, sess %p, " 573 "sesscnt %d, mem %p\n", 574 pgrp, pgrp->pg_id, pgrp->pg_session, 575 pgrp->pg_session->s_count, 576 LIST_FIRST(&pgrp->pg_members)); 577 LIST_FOREACH(p, &pgrp->pg_members, p_pglist) { 578 printf("\t\tpid %d addr %p pgrp %p\n", 579 p->p_pid, p, p->p_pgrp); 580 } 581 } 582 } 583 } 584 } 585 #endif /* DEBUG */ 586 587 #ifdef KSTACK_CHECK_MAGIC 588 #include <sys/user.h> 589 590 #define KSTACK_MAGIC 0xdeadbeaf 591 592 /* XXX should be per process basis? */ 593 int kstackleftmin = KSTACK_SIZE; 594 int kstackleftthres = KSTACK_SIZE / 8; /* warn if remaining stack is 595 less than this */ 596 597 void 598 kstack_setup_magic(const struct proc *p) 599 { 600 u_int32_t *ip; 601 u_int32_t const *end; 602 603 KASSERT(p != 0); 604 KASSERT(p != &proc0); 605 606 /* 607 * fill all the stack with magic number 608 * so that later modification on it can be detected. 609 */ 610 ip = (u_int32_t *)KSTACK_LOWEST_ADDR(p); 611 end = (u_int32_t *)((caddr_t)KSTACK_LOWEST_ADDR(p) + KSTACK_SIZE); 612 for (; ip < end; ip++) { 613 *ip = KSTACK_MAGIC; 614 } 615 } 616 617 void 618 kstack_check_magic(const struct proc *p) 619 { 620 u_int32_t const *ip, *end; 621 int stackleft; 622 623 KASSERT(p != 0); 624 625 /* don't check proc0 */ /*XXX*/ 626 if (p == &proc0) 627 return; 628 629 #ifdef __MACHINE_STACK_GROWS_UP 630 /* stack grows upwards (eg. hppa) */ 631 ip = (u_int32_t *)((caddr_t)KSTACK_LOWEST_ADDR(p) + KSTACK_SIZE); 632 end = (u_int32_t *)KSTACK_LOWEST_ADDR(p); 633 for (ip--; ip >= end; ip--) 634 if (*ip != KSTACK_MAGIC) 635 break; 636 637 stackleft = (caddr_t)KSTACK_LOWEST_ADDR(p) + KSTACK_SIZE - (caddr_t)ip; 638 #else /* __MACHINE_STACK_GROWS_UP */ 639 /* stack grows downwards (eg. i386) */ 640 ip = (u_int32_t *)KSTACK_LOWEST_ADDR(p); 641 end = (u_int32_t *)((caddr_t)KSTACK_LOWEST_ADDR(p) + KSTACK_SIZE); 642 for (; ip < end; ip++) 643 if (*ip != KSTACK_MAGIC) 644 break; 645 646 stackleft = (caddr_t)ip - KSTACK_LOWEST_ADDR(p); 647 #endif /* __MACHINE_STACK_GROWS_UP */ 648 649 if (kstackleftmin > stackleft) { 650 kstackleftmin = stackleft; 651 if (stackleft < kstackleftthres) 652 printf("warning: kernel stack left %d bytes(pid %u)\n", 653 stackleft, p->p_pid); 654 } 655 656 if (stackleft <= 0) { 657 panic("magic on the top of kernel stack changed for pid %u: " 658 "maybe kernel stack overflow\n", p->p_pid); 659 } 660 } 661 #endif /* KSTACK_CHECK_MAGIC */ 662