1 /* $NetBSD: kern_proc.c,v 1.65 2003/08/07 16:31:47 agc Exp $ */ 2 3 /*- 4 * Copyright (c) 1999 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1982, 1986, 1989, 1991, 1993 42 * The Regents of the University of California. All rights reserved. 43 * 44 * Redistribution and use in source and binary forms, with or without 45 * modification, are permitted provided that the following conditions 46 * are met: 47 * 1. Redistributions of source code must retain the above copyright 48 * notice, this list of conditions and the following disclaimer. 49 * 2. Redistributions in binary form must reproduce the above copyright 50 * notice, this list of conditions and the following disclaimer in the 51 * documentation and/or other materials provided with the distribution. 52 * 3. Neither the name of the University nor the names of its contributors 53 * may be used to endorse or promote products derived from this software 54 * without specific prior written permission. 55 * 56 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 57 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 58 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 59 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 60 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 61 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 62 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 64 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 65 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 66 * SUCH DAMAGE. 67 * 68 * @(#)kern_proc.c 8.7 (Berkeley) 2/14/95 69 */ 70 71 #include <sys/cdefs.h> 72 __KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.65 2003/08/07 16:31:47 agc Exp $"); 73 74 #include "opt_kstack.h" 75 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/kernel.h> 79 #include <sys/proc.h> 80 #include <sys/resourcevar.h> 81 #include <sys/buf.h> 82 #include <sys/acct.h> 83 #include <sys/wait.h> 84 #include <sys/file.h> 85 #include <ufs/ufs/quota.h> 86 #include <sys/uio.h> 87 #include <sys/malloc.h> 88 #include <sys/pool.h> 89 #include <sys/mbuf.h> 90 #include <sys/ioctl.h> 91 #include <sys/tty.h> 92 #include <sys/signalvar.h> 93 #include <sys/ras.h> 94 #include <sys/sa.h> 95 #include <sys/savar.h> 96 97 static void pg_delete(pid_t); 98 99 /* 100 * Structure associated with user cacheing. 101 */ 102 struct uidinfo { 103 LIST_ENTRY(uidinfo) ui_hash; 104 uid_t ui_uid; 105 long ui_proccnt; 106 }; 107 #define UIHASH(uid) (&uihashtbl[(uid) & uihash]) 108 LIST_HEAD(uihashhead, uidinfo) *uihashtbl; 109 u_long uihash; /* size of hash table - 1 */ 110 111 /* 112 * Other process lists 113 */ 114 115 struct proclist allproc; 116 struct proclist zombproc; /* resources have been freed */ 117 118 119 /* 120 * Process list locking: 121 * 122 * We have two types of locks on the proclists: read locks and write 123 * locks. Read locks can be used in interrupt context, so while we 124 * hold the write lock, we must also block clock interrupts to 125 * lock out any scheduling changes that may happen in interrupt 126 * context. 127 * 128 * The proclist lock locks the following structures: 129 * 130 * allproc 131 * zombproc 132 * pid_table 133 */ 134 struct lock proclist_lock; 135 136 /* 137 * List of processes that has called exit, but need to be reaped. 138 * Locking of this proclist is special; it's accessed in a 139 * critical section of process exit, and thus locking it can't 140 * modify interrupt state. 141 * We use a simple spin lock for this proclist. 142 * Processes on this proclist are also on zombproc. 143 */ 144 struct simplelock deadproc_slock; 145 struct deadprocs deadprocs = SLIST_HEAD_INITIALIZER(deadprocs); 146 147 /* 148 * pid to proc lookup is done by indexing the pid_table array. 149 * Since pid numbers are only allocated when an empty slot 150 * has been found, there is no need to search any lists ever. 151 * (an orphaned pgrp will lock the slot, a session will lock 152 * the pgrp with the same number.) 153 * If the table is too small it is reallocated with twice the 154 * previous size and the entries 'unzipped' into the two halves. 155 * A linked list of free entries is passed through the pt_proc 156 * field of 'free' items - set odd to be an invalid ptr. 157 */ 158 159 struct pid_table { 160 struct proc *pt_proc; 161 struct pgrp *pt_pgrp; 162 }; 163 #if 1 /* strongly typed cast - should be a noop */ 164 static __inline uint p2u(struct proc *p) { return (uint)(uintptr_t)p; }; 165 #else 166 #define p2u(p) ((uint)p) 167 #endif 168 #define P_VALID(p) (!(p2u(p) & 1)) 169 #define P_NEXT(p) (p2u(p) >> 1) 170 #define P_FREE(pid) ((struct proc *)(uintptr_t)((pid) << 1 | 1)) 171 172 #define INITIAL_PID_TABLE_SIZE (1 << 5) 173 static struct pid_table *pid_table; 174 static uint pid_tbl_mask = INITIAL_PID_TABLE_SIZE - 1; 175 static uint pid_alloc_lim; /* max we allocate before growing table */ 176 static uint pid_alloc_cnt; /* number of allocated pids */ 177 178 /* links through free slots - never empty! */ 179 static uint next_free_pt, last_free_pt; 180 static pid_t pid_max = PID_MAX; /* largest value we allocate */ 181 182 struct pool proc_pool; 183 struct pool lwp_pool; 184 struct pool lwp_uc_pool; 185 struct pool pcred_pool; 186 struct pool plimit_pool; 187 struct pool pstats_pool; 188 struct pool pgrp_pool; 189 struct pool rusage_pool; 190 struct pool ras_pool; 191 struct pool sadata_pool; 192 struct pool saupcall_pool; 193 struct pool ptimer_pool; 194 195 MALLOC_DEFINE(M_EMULDATA, "emuldata", "Per-process emulation data"); 196 MALLOC_DEFINE(M_PROC, "proc", "Proc structures"); 197 MALLOC_DEFINE(M_SESSION, "session", "session header"); 198 MALLOC_DEFINE(M_SUBPROC, "subproc", "Proc sub-structures"); 199 200 /* 201 * The process list descriptors, used during pid allocation and 202 * by sysctl. No locking on this data structure is needed since 203 * it is completely static. 204 */ 205 const struct proclist_desc proclists[] = { 206 { &allproc }, 207 { &zombproc }, 208 { NULL }, 209 }; 210 211 static void orphanpg __P((struct pgrp *)); 212 #ifdef DEBUG 213 void pgrpdump __P((void)); 214 #endif 215 216 /* 217 * Initialize global process hashing structures. 218 */ 219 void 220 procinit(void) 221 { 222 const struct proclist_desc *pd; 223 int i; 224 #define LINK_EMPTY ((PID_MAX + INITIAL_PID_TABLE_SIZE) & ~(INITIAL_PID_TABLE_SIZE - 1)) 225 226 for (pd = proclists; pd->pd_list != NULL; pd++) 227 LIST_INIT(pd->pd_list); 228 229 spinlockinit(&proclist_lock, "proclk", 0); 230 231 simple_lock_init(&deadproc_slock); 232 233 pid_table = malloc(INITIAL_PID_TABLE_SIZE * sizeof *pid_table, 234 M_PROC, M_WAITOK); 235 /* Set free list running through table... 236 Preset 'use count' above PID_MAX so we allocate pid 1 next. */ 237 for (i = 0; i <= pid_tbl_mask; i++) { 238 pid_table[i].pt_proc = P_FREE(LINK_EMPTY + i + 1); 239 pid_table[i].pt_pgrp = 0; 240 } 241 /* slot 0 is just grabbed */ 242 next_free_pt = 1; 243 /* Need to fix last entry. */ 244 last_free_pt = pid_tbl_mask; 245 pid_table[last_free_pt].pt_proc = P_FREE(LINK_EMPTY); 246 /* point at which we grow table - to avoid reusing pids too often */ 247 pid_alloc_lim = pid_tbl_mask - 1; 248 #undef LINK_EMPTY 249 250 LIST_INIT(&alllwp); 251 LIST_INIT(&deadlwp); 252 LIST_INIT(&zomblwp); 253 254 uihashtbl = 255 hashinit(maxproc / 16, HASH_LIST, M_PROC, M_WAITOK, &uihash); 256 257 pool_init(&proc_pool, sizeof(struct proc), 0, 0, 0, "procpl", 258 &pool_allocator_nointr); 259 pool_init(&lwp_pool, sizeof(struct lwp), 0, 0, 0, "lwppl", 260 &pool_allocator_nointr); 261 pool_init(&lwp_uc_pool, sizeof(ucontext_t), 0, 0, 0, "lwpucpl", 262 &pool_allocator_nointr); 263 pool_init(&pgrp_pool, sizeof(struct pgrp), 0, 0, 0, "pgrppl", 264 &pool_allocator_nointr); 265 pool_init(&pcred_pool, sizeof(struct pcred), 0, 0, 0, "pcredpl", 266 &pool_allocator_nointr); 267 pool_init(&plimit_pool, sizeof(struct plimit), 0, 0, 0, "plimitpl", 268 &pool_allocator_nointr); 269 pool_init(&pstats_pool, sizeof(struct pstats), 0, 0, 0, "pstatspl", 270 &pool_allocator_nointr); 271 pool_init(&rusage_pool, sizeof(struct rusage), 0, 0, 0, "rusgepl", 272 &pool_allocator_nointr); 273 pool_init(&ras_pool, sizeof(struct ras), 0, 0, 0, "raspl", 274 &pool_allocator_nointr); 275 pool_init(&sadata_pool, sizeof(struct sadata), 0, 0, 0, "sadatapl", 276 &pool_allocator_nointr); 277 pool_init(&saupcall_pool, sizeof(struct sadata_upcall), 0, 0, 0, 278 "saupcpl", 279 &pool_allocator_nointr); 280 pool_init(&ptimer_pool, sizeof(struct ptimer), 0, 0, 0, "ptimerpl", 281 &pool_allocator_nointr); 282 } 283 284 /* 285 * Acquire a read lock on the proclist. 286 */ 287 void 288 proclist_lock_read(void) 289 { 290 int error; 291 292 error = spinlockmgr(&proclist_lock, LK_SHARED, NULL); 293 #ifdef DIAGNOSTIC 294 if (__predict_false(error != 0)) 295 panic("proclist_lock_read: failed to acquire lock"); 296 #endif 297 } 298 299 /* 300 * Release a read lock on the proclist. 301 */ 302 void 303 proclist_unlock_read(void) 304 { 305 306 (void) spinlockmgr(&proclist_lock, LK_RELEASE, NULL); 307 } 308 309 /* 310 * Acquire a write lock on the proclist. 311 */ 312 int 313 proclist_lock_write(void) 314 { 315 int s, error; 316 317 s = splclock(); 318 error = spinlockmgr(&proclist_lock, LK_EXCLUSIVE, NULL); 319 #ifdef DIAGNOSTIC 320 if (__predict_false(error != 0)) 321 panic("proclist_lock: failed to acquire lock"); 322 #endif 323 return (s); 324 } 325 326 /* 327 * Release a write lock on the proclist. 328 */ 329 void 330 proclist_unlock_write(int s) 331 { 332 333 (void) spinlockmgr(&proclist_lock, LK_RELEASE, NULL); 334 splx(s); 335 } 336 337 /* 338 * Change the count associated with number of processes 339 * a given user is using. 340 */ 341 int 342 chgproccnt(uid_t uid, int diff) 343 { 344 struct uidinfo *uip; 345 struct uihashhead *uipp; 346 347 uipp = UIHASH(uid); 348 349 LIST_FOREACH(uip, uipp, ui_hash) 350 if (uip->ui_uid == uid) 351 break; 352 353 if (uip) { 354 uip->ui_proccnt += diff; 355 if (uip->ui_proccnt > 0) 356 return (uip->ui_proccnt); 357 if (uip->ui_proccnt < 0) 358 panic("chgproccnt: procs < 0"); 359 LIST_REMOVE(uip, ui_hash); 360 FREE(uip, M_PROC); 361 return (0); 362 } 363 if (diff <= 0) { 364 if (diff == 0) 365 return(0); 366 panic("chgproccnt: lost user"); 367 } 368 MALLOC(uip, struct uidinfo *, sizeof(*uip), M_PROC, M_WAITOK); 369 LIST_INSERT_HEAD(uipp, uip, ui_hash); 370 uip->ui_uid = uid; 371 uip->ui_proccnt = diff; 372 return (diff); 373 } 374 375 /* 376 * Check that the specifies process group in in the session of the 377 * specified process. 378 * Treats -ve ids as process ids. 379 * Used to validate TIOCSPGRP requests. 380 */ 381 int 382 pgid_in_session(struct proc *p, pid_t pg_id) 383 { 384 struct pgrp *pgrp; 385 386 if (pg_id < 0) { 387 struct proc *p1 = pfind(-pg_id); 388 if (p1 == NULL) 389 return EINVAL; 390 pgrp = p1->p_pgrp; 391 } else { 392 pgrp = pgfind(pg_id); 393 if (pgrp == NULL) 394 return EINVAL; 395 } 396 if (pgrp->pg_session != p->p_pgrp->pg_session) 397 return EPERM; 398 return 0; 399 } 400 401 /* 402 * Is p an inferior of q? 403 */ 404 int 405 inferior(struct proc *p, struct proc *q) 406 { 407 408 for (; p != q; p = p->p_pptr) 409 if (p->p_pid == 0) 410 return (0); 411 return (1); 412 } 413 414 /* 415 * Locate a process by number 416 */ 417 struct proc * 418 pfind(pid_t pid) 419 { 420 struct proc *p; 421 422 proclist_lock_read(); 423 p = pid_table[pid & pid_tbl_mask].pt_proc; 424 /* Only allow live processes to be found by pid. */ 425 if (!P_VALID(p) || p->p_pid != pid || 426 !((1 << SACTIVE | 1 << SSTOP) & 1 << p->p_stat)) 427 p = 0; 428 429 /* XXX MP - need to have a reference count... */ 430 proclist_unlock_read(); 431 return p; 432 } 433 434 435 /* 436 * Locate a process group by number 437 */ 438 struct pgrp * 439 pgfind(pid_t pgid) 440 { 441 struct pgrp *pgrp; 442 443 proclist_lock_read(); 444 pgrp = pid_table[pgid & pid_tbl_mask].pt_pgrp; 445 /* 446 * Can't look up a pgrp that only exists because the session 447 * hasn't died yet (traditional) 448 */ 449 if (pgrp == NULL || pgrp->pg_id != pgid 450 || LIST_EMPTY(&pgrp->pg_members)) 451 pgrp = 0; 452 453 /* XXX MP - need to have a reference count... */ 454 proclist_unlock_read(); 455 return pgrp; 456 } 457 458 /* 459 * Set entry for process 0 460 */ 461 void 462 proc0_insert(struct proc *p, struct lwp *l, struct pgrp *pgrp, 463 struct session *sess) 464 { 465 int s; 466 467 LIST_INIT(&p->p_lwps); 468 LIST_INSERT_HEAD(&p->p_lwps, l, l_sibling); 469 p->p_nlwps = 1; 470 471 s = proclist_lock_write(); 472 473 pid_table[0].pt_proc = p; 474 LIST_INSERT_HEAD(&allproc, p, p_list); 475 LIST_INSERT_HEAD(&alllwp, l, l_list); 476 477 p->p_pgrp = pgrp; 478 pid_table[0].pt_pgrp = pgrp; 479 LIST_INIT(&pgrp->pg_members); 480 LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist); 481 482 pgrp->pg_session = sess; 483 sess->s_count = 1; 484 sess->s_sid = 0; 485 sess->s_leader = p; 486 487 proclist_unlock_write(s); 488 } 489 490 static void 491 expand_pid_table(void) 492 { 493 uint pt_size = pid_tbl_mask + 1; 494 struct pid_table *n_pt, *new_pt; 495 struct proc *proc; 496 struct pgrp *pgrp; 497 int i; 498 int s; 499 pid_t pid; 500 501 new_pt = malloc(pt_size * 2 * sizeof *new_pt, M_PROC, M_WAITOK); 502 503 s = proclist_lock_write(); 504 if (pt_size != pid_tbl_mask + 1) { 505 /* Another process beat us to it... */ 506 proclist_unlock_write(s); 507 FREE(new_pt, M_PROC); 508 return; 509 } 510 511 /* 512 * Copy entries from old table into new one. 513 * If 'pid' is 'odd' we need to place in the upper half, 514 * even pid's to the lower half. 515 * Free items stay in the low half so we don't have to 516 * fixup the reference to them. 517 * We stuff free items on the front of the freelist 518 * because we can't write to unmodified entries. 519 * Processing the table backwards maintians a semblance 520 * of issueing pid numbers that increase with time. 521 */ 522 i = pt_size - 1; 523 n_pt = new_pt + i; 524 for (; ; i--, n_pt--) { 525 proc = pid_table[i].pt_proc; 526 pgrp = pid_table[i].pt_pgrp; 527 if (!P_VALID(proc)) { 528 /* Up 'use count' so that link is valid */ 529 pid = (P_NEXT(proc) + pt_size) & ~pt_size; 530 proc = P_FREE(pid); 531 if (pgrp) 532 pid = pgrp->pg_id; 533 } else 534 pid = proc->p_pid; 535 536 /* Save entry in appropriate half of table */ 537 n_pt[pid & pt_size].pt_proc = proc; 538 n_pt[pid & pt_size].pt_pgrp = pgrp; 539 540 /* Put other piece on start of free list */ 541 pid = (pid ^ pt_size) & ~pid_tbl_mask; 542 n_pt[pid & pt_size].pt_proc = 543 P_FREE((pid & ~pt_size) | next_free_pt); 544 n_pt[pid & pt_size].pt_pgrp = 0; 545 next_free_pt = i | (pid & pt_size); 546 if (i == 0) 547 break; 548 } 549 550 /* Switch tables */ 551 n_pt = pid_table; 552 pid_table = new_pt; 553 pid_tbl_mask = pt_size * 2 - 1; 554 555 /* 556 * pid_max starts as PID_MAX (= 30000), once we have 16384 557 * allocated pids we need it to be larger! 558 */ 559 if (pid_tbl_mask > PID_MAX) { 560 pid_max = pid_tbl_mask * 2 + 1; 561 pid_alloc_lim |= pid_alloc_lim << 1; 562 } else 563 pid_alloc_lim <<= 1; /* doubles number of free slots... */ 564 565 proclist_unlock_write(s); 566 FREE(n_pt, M_PROC); 567 } 568 569 struct proc * 570 proc_alloc(void) 571 { 572 struct proc *p; 573 int s; 574 int nxt; 575 pid_t pid; 576 struct pid_table *pt; 577 578 p = pool_get(&proc_pool, PR_WAITOK); 579 p->p_stat = SIDL; /* protect against others */ 580 581 /* allocate next free pid */ 582 583 for (;;expand_pid_table()) { 584 if (__predict_false(pid_alloc_cnt >= pid_alloc_lim)) 585 /* ensure pids cycle through 2000+ values */ 586 continue; 587 s = proclist_lock_write(); 588 pt = &pid_table[next_free_pt]; 589 #ifdef DIAGNOSTIC 590 if (__predict_false(P_VALID(pt->pt_proc) || pt->pt_pgrp)) 591 panic("proc_alloc: slot busy"); 592 #endif 593 nxt = P_NEXT(pt->pt_proc); 594 if (nxt & pid_tbl_mask) 595 break; 596 /* Table full - expand (NB last entry not used....) */ 597 proclist_unlock_write(s); 598 } 599 600 /* pid is 'saved use count' + 'size' + entry */ 601 pid = (nxt & ~pid_tbl_mask) + pid_tbl_mask + 1 + next_free_pt; 602 if ((uint)pid > (uint)pid_max) 603 pid &= pid_tbl_mask; 604 p->p_pid = pid; 605 next_free_pt = nxt & pid_tbl_mask; 606 607 /* Grab table slot */ 608 pt->pt_proc = p; 609 pid_alloc_cnt++; 610 611 proclist_unlock_write(s); 612 613 return p; 614 } 615 616 /* 617 * Free last resources of a process - called from proc_free (in kern_exit.c) 618 */ 619 void 620 proc_free_mem(struct proc *p) 621 { 622 int s; 623 pid_t pid = p->p_pid; 624 struct pid_table *pt; 625 626 s = proclist_lock_write(); 627 628 pt = &pid_table[pid & pid_tbl_mask]; 629 #ifdef DIAGNOSTIC 630 if (__predict_false(pt->pt_proc != p)) 631 panic("proc_free: pid_table mismatch, pid %x, proc %p", 632 pid, p); 633 #endif 634 /* save pid use count in slot */ 635 pt->pt_proc = P_FREE(pid & ~pid_tbl_mask); 636 637 if (pt->pt_pgrp == NULL) { 638 /* link last freed entry onto ours */ 639 pid &= pid_tbl_mask; 640 pt = &pid_table[last_free_pt]; 641 pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pid); 642 last_free_pt = pid; 643 pid_alloc_cnt--; 644 } 645 646 nprocs--; 647 proclist_unlock_write(s); 648 649 pool_put(&proc_pool, p); 650 } 651 652 /* 653 * Move p to a new or existing process group (and session) 654 * 655 * If we are creating a new pgrp, the pgid should equal 656 * the calling processes pid. 657 * If is only valid to enter a process group that is in the session 658 * of the process. 659 * Also mksess should only be set if we are creating a process group 660 * 661 * Only called from sys_setsid, sys_setpgid/sys_setprp and the 662 * SYSV setpgrp support for hpux == enterpgrp(curproc, curproc->p_pid) 663 */ 664 int 665 enterpgrp(struct proc *p, pid_t pgid, int mksess) 666 { 667 struct pgrp *new_pgrp, *pgrp; 668 struct session *sess; 669 struct proc *curp = curproc; 670 pid_t pid = p->p_pid; 671 int rval; 672 int s; 673 pid_t pg_id = NO_PGID; 674 675 /* Allocate data areas we might need before doing any validity checks */ 676 proclist_lock_read(); /* Because pid_table might change */ 677 if (pid_table[pgid & pid_tbl_mask].pt_pgrp == 0) { 678 proclist_unlock_read(); 679 new_pgrp = pool_get(&pgrp_pool, PR_WAITOK); 680 } else { 681 proclist_unlock_read(); 682 new_pgrp = NULL; 683 } 684 if (mksess) 685 MALLOC(sess, struct session *, sizeof(struct session), 686 M_SESSION, M_WAITOK); 687 else 688 sess = NULL; 689 690 s = proclist_lock_write(); 691 rval = EPERM; /* most common error (to save typing) */ 692 693 /* Check pgrp exists or can be created */ 694 pgrp = pid_table[pgid & pid_tbl_mask].pt_pgrp; 695 if (pgrp != NULL && pgrp->pg_id != pgid) 696 goto done; 697 698 /* Can only set another process under restricted circumstances. */ 699 if (p != curp) { 700 /* must exist and be one of our children... */ 701 if (p != pid_table[pid & pid_tbl_mask].pt_proc 702 || !inferior(p, curp)) { 703 rval = ESRCH; 704 goto done; 705 } 706 /* ... in the same session... */ 707 if (sess != NULL || p->p_session != curp->p_session) 708 goto done; 709 /* ... existing pgid must be in same session ... */ 710 if (pgrp != NULL && pgrp->pg_session != p->p_session) 711 goto done; 712 /* ... and not done an exec. */ 713 if (p->p_flag & P_EXEC) { 714 rval = EACCES; 715 goto done; 716 } 717 } 718 719 /* Changing the process group/session of a session 720 leader is definitely off limits. */ 721 if (SESS_LEADER(p)) { 722 if (sess == NULL && p->p_pgrp == pgrp) 723 /* unless it's a definite noop */ 724 rval = 0; 725 goto done; 726 } 727 728 /* Can only create a process group with id of process */ 729 if (pgrp == NULL && pgid != pid) 730 goto done; 731 732 /* Can only create a session if creating pgrp */ 733 if (sess != NULL && pgrp != NULL) 734 goto done; 735 736 /* Check we allocated memory for a pgrp... */ 737 if (pgrp == NULL && new_pgrp == NULL) 738 goto done; 739 740 /* Don't attach to 'zombie' pgrp */ 741 if (pgrp != NULL && LIST_EMPTY(&pgrp->pg_members)) 742 goto done; 743 744 /* Expect to succeed now */ 745 rval = 0; 746 747 if (pgrp == p->p_pgrp) 748 /* nothing to do */ 749 goto done; 750 751 /* Ok all setup, link up required structures */ 752 if (pgrp == NULL) { 753 pgrp = new_pgrp; 754 new_pgrp = 0; 755 if (sess != NULL) { 756 sess->s_sid = p->p_pid; 757 sess->s_leader = p; 758 sess->s_count = 1; 759 sess->s_ttyvp = NULL; 760 sess->s_ttyp = NULL; 761 sess->s_flags = p->p_session->s_flags & ~S_LOGIN_SET; 762 memcpy(sess->s_login, p->p_session->s_login, 763 sizeof(sess->s_login)); 764 p->p_flag &= ~P_CONTROLT; 765 } else { 766 sess = p->p_pgrp->pg_session; 767 SESSHOLD(sess); 768 } 769 pgrp->pg_session = sess; 770 sess = 0; 771 772 pgrp->pg_id = pgid; 773 LIST_INIT(&pgrp->pg_members); 774 #ifdef DIAGNOSTIC 775 if (__predict_false(pid_table[pgid & pid_tbl_mask].pt_pgrp)) 776 panic("enterpgrp: pgrp table slot in use"); 777 if (__predict_false(mksess && p != curp)) 778 panic("enterpgrp: mksession and p != curproc"); 779 #endif 780 pid_table[pgid & pid_tbl_mask].pt_pgrp = pgrp; 781 pgrp->pg_jobc = 0; 782 } 783 784 /* 785 * Adjust eligibility of affected pgrps to participate in job control. 786 * Increment eligibility counts before decrementing, otherwise we 787 * could reach 0 spuriously during the first call. 788 */ 789 fixjobc(p, pgrp, 1); 790 fixjobc(p, p->p_pgrp, 0); 791 792 /* Move process to requested group */ 793 LIST_REMOVE(p, p_pglist); 794 if (LIST_EMPTY(&p->p_pgrp->pg_members)) 795 /* defer delete until we've dumped the lock */ 796 pg_id = p->p_pgrp->pg_id; 797 p->p_pgrp = pgrp; 798 LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist); 799 800 done: 801 proclist_unlock_write(s); 802 if (sess != NULL) 803 free(sess, M_SESSION); 804 if (new_pgrp != NULL) 805 pool_put(&pgrp_pool, new_pgrp); 806 if (pg_id != NO_PGID) 807 pg_delete(pg_id); 808 #ifdef DEBUG_PGRP 809 if (__predict_false(rval)) 810 printf("enterpgrp(%d,%d,%d), curproc %d, rval %d\n", 811 pid, pgid, mksess, curp->p_pid, rval); 812 #endif 813 return rval; 814 } 815 816 /* 817 * remove process from process group 818 */ 819 int 820 leavepgrp(struct proc *p) 821 { 822 int s = proclist_lock_write(); 823 struct pgrp *pgrp; 824 pid_t pg_id; 825 826 pgrp = p->p_pgrp; 827 LIST_REMOVE(p, p_pglist); 828 p->p_pgrp = 0; 829 pg_id = LIST_EMPTY(&pgrp->pg_members) ? pgrp->pg_id : NO_PGID; 830 proclist_unlock_write(s); 831 832 if (pg_id != NO_PGID) 833 pg_delete(pg_id); 834 return 0; 835 } 836 837 static void 838 pg_free(pid_t pg_id) 839 { 840 struct pgrp *pgrp; 841 struct pid_table *pt; 842 int s; 843 844 s = proclist_lock_write(); 845 pt = &pid_table[pg_id & pid_tbl_mask]; 846 pgrp = pt->pt_pgrp; 847 #ifdef DIAGNOSTIC 848 if (__predict_false(!pgrp || pgrp->pg_id != pg_id 849 || !LIST_EMPTY(&pgrp->pg_members))) 850 panic("pg_free: process group absent or has members"); 851 #endif 852 pt->pt_pgrp = 0; 853 854 if (!P_VALID(pt->pt_proc)) { 855 /* orphaned pgrp, put slot onto free list */ 856 #ifdef DIAGNOSTIC 857 if (__predict_false(P_NEXT(pt->pt_proc) & pid_tbl_mask)) 858 panic("pg_free: process slot on free list"); 859 #endif 860 861 pg_id &= pid_tbl_mask; 862 pt = &pid_table[last_free_pt]; 863 pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pg_id); 864 last_free_pt = pg_id; 865 pid_alloc_cnt--; 866 } 867 proclist_unlock_write(s); 868 869 pool_put(&pgrp_pool, pgrp); 870 } 871 872 /* 873 * delete a process group 874 */ 875 static void 876 pg_delete(pid_t pg_id) 877 { 878 struct pgrp *pgrp; 879 struct tty *ttyp; 880 struct session *ss; 881 int s; 882 883 s = proclist_lock_write(); 884 pgrp = pid_table[pg_id & pid_tbl_mask].pt_pgrp; 885 if (pgrp == NULL || pgrp->pg_id != pg_id || 886 !LIST_EMPTY(&pgrp->pg_members)) { 887 proclist_unlock_write(s); 888 return; 889 } 890 891 /* Remove reference (if any) from tty to this process group */ 892 ttyp = pgrp->pg_session->s_ttyp; 893 if (ttyp != NULL && ttyp->t_pgrp == pgrp) 894 ttyp->t_pgrp = NULL; 895 896 ss = pgrp->pg_session; 897 898 if (ss->s_sid == pgrp->pg_id) { 899 proclist_unlock_write(s); 900 SESSRELE(ss); 901 /* pgrp freed by sessdelete() if last reference */ 902 return; 903 } 904 905 proclist_unlock_write(s); 906 SESSRELE(ss); 907 pg_free(pg_id); 908 } 909 910 /* 911 * Delete session - called from SESSRELE when s_count becomes zero. 912 */ 913 void 914 sessdelete(struct session *ss) 915 { 916 /* 917 * We keep the pgrp with the same id as the session in 918 * order to stop a process being given the same pid. 919 * Since the pgrp holds a reference to the session, it 920 * must be a 'zombie' pgrp by now. 921 */ 922 923 pg_free(ss->s_sid); 924 925 FREE(ss, M_SESSION); 926 } 927 928 /* 929 * Adjust pgrp jobc counters when specified process changes process group. 930 * We count the number of processes in each process group that "qualify" 931 * the group for terminal job control (those with a parent in a different 932 * process group of the same session). If that count reaches zero, the 933 * process group becomes orphaned. Check both the specified process' 934 * process group and that of its children. 935 * entering == 0 => p is leaving specified group. 936 * entering == 1 => p is entering specified group. 937 */ 938 void 939 fixjobc(struct proc *p, struct pgrp *pgrp, int entering) 940 { 941 struct pgrp *hispgrp; 942 struct session *mysession = pgrp->pg_session; 943 944 /* 945 * Check p's parent to see whether p qualifies its own process 946 * group; if so, adjust count for p's process group. 947 */ 948 if ((hispgrp = p->p_pptr->p_pgrp) != pgrp && 949 hispgrp->pg_session == mysession) { 950 if (entering) 951 pgrp->pg_jobc++; 952 else if (--pgrp->pg_jobc == 0) 953 orphanpg(pgrp); 954 } 955 956 /* 957 * Check this process' children to see whether they qualify 958 * their process groups; if so, adjust counts for children's 959 * process groups. 960 */ 961 LIST_FOREACH(p, &p->p_children, p_sibling) { 962 if ((hispgrp = p->p_pgrp) != pgrp && 963 hispgrp->pg_session == mysession && 964 P_ZOMBIE(p) == 0) { 965 if (entering) 966 hispgrp->pg_jobc++; 967 else if (--hispgrp->pg_jobc == 0) 968 orphanpg(hispgrp); 969 } 970 } 971 } 972 973 /* 974 * A process group has become orphaned; 975 * if there are any stopped processes in the group, 976 * hang-up all process in that group. 977 */ 978 static void 979 orphanpg(struct pgrp *pg) 980 { 981 struct proc *p; 982 983 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 984 if (p->p_stat == SSTOP) { 985 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 986 psignal(p, SIGHUP); 987 psignal(p, SIGCONT); 988 } 989 return; 990 } 991 } 992 } 993 994 /* mark process as suid/sgid, reset some values to defaults */ 995 void 996 p_sugid(struct proc *p) 997 { 998 struct plimit *newlim; 999 1000 p->p_flag |= P_SUGID; 1001 /* reset what needs to be reset in plimit */ 1002 if (p->p_limit->pl_corename != defcorename) { 1003 if (p->p_limit->p_refcnt > 1 && 1004 (p->p_limit->p_lflags & PL_SHAREMOD) == 0) { 1005 newlim = limcopy(p->p_limit); 1006 limfree(p->p_limit); 1007 p->p_limit = newlim; 1008 } 1009 free(p->p_limit->pl_corename, M_TEMP); 1010 p->p_limit->pl_corename = defcorename; 1011 } 1012 } 1013 1014 #ifdef DDB 1015 #include <ddb/db_output.h> 1016 void pidtbl_dump(void); 1017 void 1018 pidtbl_dump(void) 1019 { 1020 struct pid_table *pt; 1021 struct proc *p; 1022 struct pgrp *pgrp; 1023 int id; 1024 1025 db_printf("pid table %p size %x, next %x, last %x\n", 1026 pid_table, pid_tbl_mask+1, 1027 next_free_pt, last_free_pt); 1028 for (pt = pid_table, id = 0; id <= pid_tbl_mask; id++, pt++) { 1029 p = pt->pt_proc; 1030 if (!P_VALID(p) && !pt->pt_pgrp) 1031 continue; 1032 db_printf(" id %x: ", id); 1033 if (P_VALID(p)) 1034 db_printf("proc %p id %d (0x%x) %s\n", 1035 p, p->p_pid, p->p_pid, p->p_comm); 1036 else 1037 db_printf("next %x use %x\n", 1038 P_NEXT(p) & pid_tbl_mask, 1039 P_NEXT(p) & ~pid_tbl_mask); 1040 if ((pgrp = pt->pt_pgrp)) { 1041 db_printf("\tsession %p, sid %d, count %d, login %s\n", 1042 pgrp->pg_session, pgrp->pg_session->s_sid, 1043 pgrp->pg_session->s_count, 1044 pgrp->pg_session->s_login); 1045 db_printf("\tpgrp %p, pg_id %d, pg_jobc %d, members %p\n", 1046 pgrp, pgrp->pg_id, pgrp->pg_jobc, 1047 pgrp->pg_members.lh_first); 1048 for (p = pgrp->pg_members.lh_first; p != 0; 1049 p = p->p_pglist.le_next) { 1050 db_printf("\t\tpid %d addr %p pgrp %p %s\n", 1051 p->p_pid, p, p->p_pgrp, p->p_comm); 1052 } 1053 } 1054 } 1055 } 1056 #endif /* DDB */ 1057 1058 #ifdef KSTACK_CHECK_MAGIC 1059 #include <sys/user.h> 1060 1061 #define KSTACK_MAGIC 0xdeadbeaf 1062 1063 /* XXX should be per process basis? */ 1064 int kstackleftmin = KSTACK_SIZE; 1065 int kstackleftthres = KSTACK_SIZE / 8; /* warn if remaining stack is 1066 less than this */ 1067 1068 void 1069 kstack_setup_magic(const struct lwp *l) 1070 { 1071 u_int32_t *ip; 1072 u_int32_t const *end; 1073 1074 KASSERT(l != NULL); 1075 KASSERT(l != &lwp0); 1076 1077 /* 1078 * fill all the stack with magic number 1079 * so that later modification on it can be detected. 1080 */ 1081 ip = (u_int32_t *)KSTACK_LOWEST_ADDR(l); 1082 end = (u_int32_t *)((caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE); 1083 for (; ip < end; ip++) { 1084 *ip = KSTACK_MAGIC; 1085 } 1086 } 1087 1088 void 1089 kstack_check_magic(const struct lwp *l) 1090 { 1091 u_int32_t const *ip, *end; 1092 int stackleft; 1093 1094 KASSERT(l != NULL); 1095 1096 /* don't check proc0 */ /*XXX*/ 1097 if (l == &lwp0) 1098 return; 1099 1100 #ifdef __MACHINE_STACK_GROWS_UP 1101 /* stack grows upwards (eg. hppa) */ 1102 ip = (u_int32_t *)((caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE); 1103 end = (u_int32_t *)KSTACK_LOWEST_ADDR(l); 1104 for (ip--; ip >= end; ip--) 1105 if (*ip != KSTACK_MAGIC) 1106 break; 1107 1108 stackleft = (caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE - (caddr_t)ip; 1109 #else /* __MACHINE_STACK_GROWS_UP */ 1110 /* stack grows downwards (eg. i386) */ 1111 ip = (u_int32_t *)KSTACK_LOWEST_ADDR(l); 1112 end = (u_int32_t *)((caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE); 1113 for (; ip < end; ip++) 1114 if (*ip != KSTACK_MAGIC) 1115 break; 1116 1117 stackleft = (caddr_t)ip - KSTACK_LOWEST_ADDR(l); 1118 #endif /* __MACHINE_STACK_GROWS_UP */ 1119 1120 if (kstackleftmin > stackleft) { 1121 kstackleftmin = stackleft; 1122 if (stackleft < kstackleftthres) 1123 printf("warning: kernel stack left %d bytes" 1124 "(pid %u:lid %u)\n", stackleft, 1125 (u_int)l->l_proc->p_pid, (u_int)l->l_lid); 1126 } 1127 1128 if (stackleft <= 0) { 1129 panic("magic on the top of kernel stack changed for " 1130 "pid %u, lid %u: maybe kernel stack overflow", 1131 (u_int)l->l_proc->p_pid, (u_int)l->l_lid); 1132 } 1133 } 1134 #endif /* KSTACK_CHECK_MAGIC */ 1135