1 /* $NetBSD: kern_proc.c,v 1.71 2004/02/06 06:59:33 pk Exp $ */ 2 3 /*- 4 * Copyright (c) 1999 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1982, 1986, 1989, 1991, 1993 42 * The Regents of the University of California. All rights reserved. 43 * 44 * Redistribution and use in source and binary forms, with or without 45 * modification, are permitted provided that the following conditions 46 * are met: 47 * 1. Redistributions of source code must retain the above copyright 48 * notice, this list of conditions and the following disclaimer. 49 * 2. Redistributions in binary form must reproduce the above copyright 50 * notice, this list of conditions and the following disclaimer in the 51 * documentation and/or other materials provided with the distribution. 52 * 3. Neither the name of the University nor the names of its contributors 53 * may be used to endorse or promote products derived from this software 54 * without specific prior written permission. 55 * 56 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 57 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 58 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 59 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 60 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 61 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 62 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 64 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 65 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 66 * SUCH DAMAGE. 67 * 68 * @(#)kern_proc.c 8.7 (Berkeley) 2/14/95 69 */ 70 71 #include <sys/cdefs.h> 72 __KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.71 2004/02/06 06:59:33 pk Exp $"); 73 74 #include "opt_kstack.h" 75 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/kernel.h> 79 #include <sys/proc.h> 80 #include <sys/resourcevar.h> 81 #include <sys/buf.h> 82 #include <sys/acct.h> 83 #include <sys/wait.h> 84 #include <sys/file.h> 85 #include <ufs/ufs/quota.h> 86 #include <sys/uio.h> 87 #include <sys/malloc.h> 88 #include <sys/pool.h> 89 #include <sys/mbuf.h> 90 #include <sys/ioctl.h> 91 #include <sys/tty.h> 92 #include <sys/signalvar.h> 93 #include <sys/ras.h> 94 #include <sys/sa.h> 95 #include <sys/savar.h> 96 97 static void pg_delete(pid_t); 98 99 /* 100 * Structure associated with user cacheing. 101 */ 102 struct uidinfo { 103 LIST_ENTRY(uidinfo) ui_hash; 104 uid_t ui_uid; 105 long ui_proccnt; 106 }; 107 #define UIHASH(uid) (&uihashtbl[(uid) & uihash]) 108 LIST_HEAD(uihashhead, uidinfo) *uihashtbl; 109 u_long uihash; /* size of hash table - 1 */ 110 111 /* 112 * Other process lists 113 */ 114 115 struct proclist allproc; 116 struct proclist zombproc; /* resources have been freed */ 117 118 119 /* 120 * Process list locking: 121 * 122 * We have two types of locks on the proclists: read locks and write 123 * locks. Read locks can be used in interrupt context, so while we 124 * hold the write lock, we must also block clock interrupts to 125 * lock out any scheduling changes that may happen in interrupt 126 * context. 127 * 128 * The proclist lock locks the following structures: 129 * 130 * allproc 131 * zombproc 132 * pid_table 133 */ 134 struct lock proclist_lock; 135 136 /* 137 * pid to proc lookup is done by indexing the pid_table array. 138 * Since pid numbers are only allocated when an empty slot 139 * has been found, there is no need to search any lists ever. 140 * (an orphaned pgrp will lock the slot, a session will lock 141 * the pgrp with the same number.) 142 * If the table is too small it is reallocated with twice the 143 * previous size and the entries 'unzipped' into the two halves. 144 * A linked list of free entries is passed through the pt_proc 145 * field of 'free' items - set odd to be an invalid ptr. 146 */ 147 148 struct pid_table { 149 struct proc *pt_proc; 150 struct pgrp *pt_pgrp; 151 }; 152 #if 1 /* strongly typed cast - should be a noop */ 153 static __inline uint p2u(struct proc *p) { return (uint)(uintptr_t)p; }; 154 #else 155 #define p2u(p) ((uint)p) 156 #endif 157 #define P_VALID(p) (!(p2u(p) & 1)) 158 #define P_NEXT(p) (p2u(p) >> 1) 159 #define P_FREE(pid) ((struct proc *)(uintptr_t)((pid) << 1 | 1)) 160 161 #define INITIAL_PID_TABLE_SIZE (1 << 5) 162 static struct pid_table *pid_table; 163 static uint pid_tbl_mask = INITIAL_PID_TABLE_SIZE - 1; 164 static uint pid_alloc_lim; /* max we allocate before growing table */ 165 static uint pid_alloc_cnt; /* number of allocated pids */ 166 167 /* links through free slots - never empty! */ 168 static uint next_free_pt, last_free_pt; 169 static pid_t pid_max = PID_MAX; /* largest value we allocate */ 170 171 struct pool proc_pool; 172 struct pool lwp_pool; 173 struct pool lwp_uc_pool; 174 struct pool pcred_pool; 175 struct pool plimit_pool; 176 struct pool pstats_pool; 177 struct pool pgrp_pool; 178 struct pool rusage_pool; 179 struct pool ras_pool; 180 struct pool sadata_pool; 181 struct pool saupcall_pool; 182 struct pool sastack_pool; 183 struct pool ptimer_pool; 184 185 MALLOC_DEFINE(M_EMULDATA, "emuldata", "Per-process emulation data"); 186 MALLOC_DEFINE(M_PROC, "proc", "Proc structures"); 187 MALLOC_DEFINE(M_SESSION, "session", "session header"); 188 MALLOC_DEFINE(M_SUBPROC, "subproc", "Proc sub-structures"); 189 190 /* 191 * The process list descriptors, used during pid allocation and 192 * by sysctl. No locking on this data structure is needed since 193 * it is completely static. 194 */ 195 const struct proclist_desc proclists[] = { 196 { &allproc }, 197 { &zombproc }, 198 { NULL }, 199 }; 200 201 static void orphanpg __P((struct pgrp *)); 202 #ifdef DEBUG 203 void pgrpdump __P((void)); 204 #endif 205 206 /* 207 * Initialize global process hashing structures. 208 */ 209 void 210 procinit(void) 211 { 212 const struct proclist_desc *pd; 213 int i; 214 #define LINK_EMPTY ((PID_MAX + INITIAL_PID_TABLE_SIZE) & ~(INITIAL_PID_TABLE_SIZE - 1)) 215 216 for (pd = proclists; pd->pd_list != NULL; pd++) 217 LIST_INIT(pd->pd_list); 218 219 spinlockinit(&proclist_lock, "proclk", 0); 220 221 pid_table = malloc(INITIAL_PID_TABLE_SIZE * sizeof *pid_table, 222 M_PROC, M_WAITOK); 223 /* Set free list running through table... 224 Preset 'use count' above PID_MAX so we allocate pid 1 next. */ 225 for (i = 0; i <= pid_tbl_mask; i++) { 226 pid_table[i].pt_proc = P_FREE(LINK_EMPTY + i + 1); 227 pid_table[i].pt_pgrp = 0; 228 } 229 /* slot 0 is just grabbed */ 230 next_free_pt = 1; 231 /* Need to fix last entry. */ 232 last_free_pt = pid_tbl_mask; 233 pid_table[last_free_pt].pt_proc = P_FREE(LINK_EMPTY); 234 /* point at which we grow table - to avoid reusing pids too often */ 235 pid_alloc_lim = pid_tbl_mask - 1; 236 #undef LINK_EMPTY 237 238 LIST_INIT(&alllwp); 239 240 uihashtbl = 241 hashinit(maxproc / 16, HASH_LIST, M_PROC, M_WAITOK, &uihash); 242 243 pool_init(&proc_pool, sizeof(struct proc), 0, 0, 0, "procpl", 244 &pool_allocator_nointr); 245 pool_init(&lwp_pool, sizeof(struct lwp), 0, 0, 0, "lwppl", 246 &pool_allocator_nointr); 247 pool_init(&lwp_uc_pool, sizeof(ucontext_t), 0, 0, 0, "lwpucpl", 248 &pool_allocator_nointr); 249 pool_init(&pgrp_pool, sizeof(struct pgrp), 0, 0, 0, "pgrppl", 250 &pool_allocator_nointr); 251 pool_init(&pcred_pool, sizeof(struct pcred), 0, 0, 0, "pcredpl", 252 &pool_allocator_nointr); 253 pool_init(&plimit_pool, sizeof(struct plimit), 0, 0, 0, "plimitpl", 254 &pool_allocator_nointr); 255 pool_init(&pstats_pool, sizeof(struct pstats), 0, 0, 0, "pstatspl", 256 &pool_allocator_nointr); 257 pool_init(&rusage_pool, sizeof(struct rusage), 0, 0, 0, "rusgepl", 258 &pool_allocator_nointr); 259 pool_init(&ras_pool, sizeof(struct ras), 0, 0, 0, "raspl", 260 &pool_allocator_nointr); 261 pool_init(&sadata_pool, sizeof(struct sadata), 0, 0, 0, "sadatapl", 262 &pool_allocator_nointr); 263 pool_init(&saupcall_pool, sizeof(struct sadata_upcall), 0, 0, 0, 264 "saupcpl", &pool_allocator_nointr); 265 pool_init(&sastack_pool, sizeof(struct sastack), 0, 0, 0, "sastackpl", 266 &pool_allocator_nointr); 267 pool_init(&ptimer_pool, sizeof(struct ptimer), 0, 0, 0, "ptimerpl", 268 &pool_allocator_nointr); 269 } 270 271 /* 272 * Acquire a read lock on the proclist. 273 */ 274 void 275 proclist_lock_read(void) 276 { 277 int error; 278 279 error = spinlockmgr(&proclist_lock, LK_SHARED, NULL); 280 #ifdef DIAGNOSTIC 281 if (__predict_false(error != 0)) 282 panic("proclist_lock_read: failed to acquire lock"); 283 #endif 284 } 285 286 /* 287 * Release a read lock on the proclist. 288 */ 289 void 290 proclist_unlock_read(void) 291 { 292 293 (void) spinlockmgr(&proclist_lock, LK_RELEASE, NULL); 294 } 295 296 /* 297 * Acquire a write lock on the proclist. 298 */ 299 int 300 proclist_lock_write(void) 301 { 302 int s, error; 303 304 s = splclock(); 305 error = spinlockmgr(&proclist_lock, LK_EXCLUSIVE, NULL); 306 #ifdef DIAGNOSTIC 307 if (__predict_false(error != 0)) 308 panic("proclist_lock: failed to acquire lock"); 309 #endif 310 return (s); 311 } 312 313 /* 314 * Release a write lock on the proclist. 315 */ 316 void 317 proclist_unlock_write(int s) 318 { 319 320 (void) spinlockmgr(&proclist_lock, LK_RELEASE, NULL); 321 splx(s); 322 } 323 324 /* 325 * Change the count associated with number of processes 326 * a given user is using. 327 */ 328 int 329 chgproccnt(uid_t uid, int diff) 330 { 331 struct uidinfo *uip; 332 struct uihashhead *uipp; 333 334 uipp = UIHASH(uid); 335 336 LIST_FOREACH(uip, uipp, ui_hash) 337 if (uip->ui_uid == uid) 338 break; 339 340 if (uip) { 341 uip->ui_proccnt += diff; 342 if (uip->ui_proccnt > 0) 343 return (uip->ui_proccnt); 344 if (uip->ui_proccnt < 0) 345 panic("chgproccnt: procs < 0"); 346 LIST_REMOVE(uip, ui_hash); 347 FREE(uip, M_PROC); 348 return (0); 349 } 350 if (diff <= 0) { 351 if (diff == 0) 352 return(0); 353 panic("chgproccnt: lost user"); 354 } 355 MALLOC(uip, struct uidinfo *, sizeof(*uip), M_PROC, M_WAITOK); 356 LIST_INSERT_HEAD(uipp, uip, ui_hash); 357 uip->ui_uid = uid; 358 uip->ui_proccnt = diff; 359 return (diff); 360 } 361 362 /* 363 * Check that the specifies process group in in the session of the 364 * specified process. 365 * Treats -ve ids as process ids. 366 * Used to validate TIOCSPGRP requests. 367 */ 368 int 369 pgid_in_session(struct proc *p, pid_t pg_id) 370 { 371 struct pgrp *pgrp; 372 373 if (pg_id < 0) { 374 struct proc *p1 = pfind(-pg_id); 375 if (p1 == NULL) 376 return EINVAL; 377 pgrp = p1->p_pgrp; 378 } else { 379 pgrp = pgfind(pg_id); 380 if (pgrp == NULL) 381 return EINVAL; 382 } 383 if (pgrp->pg_session != p->p_pgrp->pg_session) 384 return EPERM; 385 return 0; 386 } 387 388 /* 389 * Is p an inferior of q? 390 */ 391 int 392 inferior(struct proc *p, struct proc *q) 393 { 394 395 for (; p != q; p = p->p_pptr) 396 if (p->p_pid == 0) 397 return (0); 398 return (1); 399 } 400 401 /* 402 * Locate a process by number 403 */ 404 struct proc * 405 p_find(pid_t pid, uint flags) 406 { 407 struct proc *p; 408 char stat; 409 410 if (!(flags & PFIND_LOCKED)) 411 proclist_lock_read(); 412 p = pid_table[pid & pid_tbl_mask].pt_proc; 413 /* Only allow live processes to be found by pid. */ 414 if (P_VALID(p) && p->p_pid == pid && 415 ((stat = p->p_stat) == SACTIVE || stat == SSTOP 416 || (stat == SZOMB && (flags & PFIND_ZOMBIE)))) { 417 if (flags & PFIND_UNLOCK_OK) 418 proclist_unlock_read(); 419 return p; 420 } 421 if (flags & PFIND_UNLOCK_FAIL) 422 proclist_unlock_read(); 423 return NULL; 424 } 425 426 427 /* 428 * Locate a process group by number 429 */ 430 struct pgrp * 431 pg_find(pid_t pgid, uint flags) 432 { 433 struct pgrp *pg; 434 435 if (!(flags & PFIND_LOCKED)) 436 proclist_lock_read(); 437 pg = pid_table[pgid & pid_tbl_mask].pt_pgrp; 438 /* 439 * Can't look up a pgrp that only exists because the session 440 * hasn't died yet (traditional) 441 */ 442 if (pg == NULL || pg->pg_id != pgid || LIST_EMPTY(&pg->pg_members)) { 443 if (flags & PFIND_UNLOCK_FAIL) 444 proclist_unlock_read(); 445 return NULL; 446 } 447 448 if (flags & PFIND_UNLOCK_OK) 449 proclist_unlock_read(); 450 return pg; 451 } 452 453 /* 454 * Set entry for process 0 455 */ 456 void 457 proc0_insert(struct proc *p, struct lwp *l, struct pgrp *pgrp, 458 struct session *sess) 459 { 460 int s; 461 462 simple_lock_init(&p->p_lock); 463 LIST_INIT(&p->p_lwps); 464 LIST_INSERT_HEAD(&p->p_lwps, l, l_sibling); 465 p->p_nlwps = 1; 466 simple_lock_init(&p->p_sigctx.ps_silock); 467 CIRCLEQ_INIT(&p->p_sigctx.ps_siginfo); 468 469 s = proclist_lock_write(); 470 471 pid_table[0].pt_proc = p; 472 LIST_INSERT_HEAD(&allproc, p, p_list); 473 LIST_INSERT_HEAD(&alllwp, l, l_list); 474 475 p->p_pgrp = pgrp; 476 pid_table[0].pt_pgrp = pgrp; 477 LIST_INIT(&pgrp->pg_members); 478 LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist); 479 480 pgrp->pg_session = sess; 481 sess->s_count = 1; 482 sess->s_sid = 0; 483 sess->s_leader = p; 484 485 proclist_unlock_write(s); 486 } 487 488 static void 489 expand_pid_table(void) 490 { 491 uint pt_size = pid_tbl_mask + 1; 492 struct pid_table *n_pt, *new_pt; 493 struct proc *proc; 494 struct pgrp *pgrp; 495 int i; 496 int s; 497 pid_t pid; 498 499 new_pt = malloc(pt_size * 2 * sizeof *new_pt, M_PROC, M_WAITOK); 500 501 s = proclist_lock_write(); 502 if (pt_size != pid_tbl_mask + 1) { 503 /* Another process beat us to it... */ 504 proclist_unlock_write(s); 505 FREE(new_pt, M_PROC); 506 return; 507 } 508 509 /* 510 * Copy entries from old table into new one. 511 * If 'pid' is 'odd' we need to place in the upper half, 512 * even pid's to the lower half. 513 * Free items stay in the low half so we don't have to 514 * fixup the reference to them. 515 * We stuff free items on the front of the freelist 516 * because we can't write to unmodified entries. 517 * Processing the table backwards maintians a semblance 518 * of issueing pid numbers that increase with time. 519 */ 520 i = pt_size - 1; 521 n_pt = new_pt + i; 522 for (; ; i--, n_pt--) { 523 proc = pid_table[i].pt_proc; 524 pgrp = pid_table[i].pt_pgrp; 525 if (!P_VALID(proc)) { 526 /* Up 'use count' so that link is valid */ 527 pid = (P_NEXT(proc) + pt_size) & ~pt_size; 528 proc = P_FREE(pid); 529 if (pgrp) 530 pid = pgrp->pg_id; 531 } else 532 pid = proc->p_pid; 533 534 /* Save entry in appropriate half of table */ 535 n_pt[pid & pt_size].pt_proc = proc; 536 n_pt[pid & pt_size].pt_pgrp = pgrp; 537 538 /* Put other piece on start of free list */ 539 pid = (pid ^ pt_size) & ~pid_tbl_mask; 540 n_pt[pid & pt_size].pt_proc = 541 P_FREE((pid & ~pt_size) | next_free_pt); 542 n_pt[pid & pt_size].pt_pgrp = 0; 543 next_free_pt = i | (pid & pt_size); 544 if (i == 0) 545 break; 546 } 547 548 /* Switch tables */ 549 n_pt = pid_table; 550 pid_table = new_pt; 551 pid_tbl_mask = pt_size * 2 - 1; 552 553 /* 554 * pid_max starts as PID_MAX (= 30000), once we have 16384 555 * allocated pids we need it to be larger! 556 */ 557 if (pid_tbl_mask > PID_MAX) { 558 pid_max = pid_tbl_mask * 2 + 1; 559 pid_alloc_lim |= pid_alloc_lim << 1; 560 } else 561 pid_alloc_lim <<= 1; /* doubles number of free slots... */ 562 563 proclist_unlock_write(s); 564 FREE(n_pt, M_PROC); 565 } 566 567 struct proc * 568 proc_alloc(void) 569 { 570 struct proc *p; 571 int s; 572 int nxt; 573 pid_t pid; 574 struct pid_table *pt; 575 576 p = pool_get(&proc_pool, PR_WAITOK); 577 p->p_stat = SIDL; /* protect against others */ 578 579 /* allocate next free pid */ 580 581 for (;;expand_pid_table()) { 582 if (__predict_false(pid_alloc_cnt >= pid_alloc_lim)) 583 /* ensure pids cycle through 2000+ values */ 584 continue; 585 s = proclist_lock_write(); 586 pt = &pid_table[next_free_pt]; 587 #ifdef DIAGNOSTIC 588 if (__predict_false(P_VALID(pt->pt_proc) || pt->pt_pgrp)) 589 panic("proc_alloc: slot busy"); 590 #endif 591 nxt = P_NEXT(pt->pt_proc); 592 if (nxt & pid_tbl_mask) 593 break; 594 /* Table full - expand (NB last entry not used....) */ 595 proclist_unlock_write(s); 596 } 597 598 /* pid is 'saved use count' + 'size' + entry */ 599 pid = (nxt & ~pid_tbl_mask) + pid_tbl_mask + 1 + next_free_pt; 600 if ((uint)pid > (uint)pid_max) 601 pid &= pid_tbl_mask; 602 p->p_pid = pid; 603 next_free_pt = nxt & pid_tbl_mask; 604 605 /* Grab table slot */ 606 pt->pt_proc = p; 607 pid_alloc_cnt++; 608 609 proclist_unlock_write(s); 610 611 return p; 612 } 613 614 /* 615 * Free last resources of a process - called from proc_free (in kern_exit.c) 616 */ 617 void 618 proc_free_mem(struct proc *p) 619 { 620 int s; 621 pid_t pid = p->p_pid; 622 struct pid_table *pt; 623 624 s = proclist_lock_write(); 625 626 pt = &pid_table[pid & pid_tbl_mask]; 627 #ifdef DIAGNOSTIC 628 if (__predict_false(pt->pt_proc != p)) 629 panic("proc_free: pid_table mismatch, pid %x, proc %p", 630 pid, p); 631 #endif 632 /* save pid use count in slot */ 633 pt->pt_proc = P_FREE(pid & ~pid_tbl_mask); 634 635 if (pt->pt_pgrp == NULL) { 636 /* link last freed entry onto ours */ 637 pid &= pid_tbl_mask; 638 pt = &pid_table[last_free_pt]; 639 pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pid); 640 last_free_pt = pid; 641 pid_alloc_cnt--; 642 } 643 644 nprocs--; 645 proclist_unlock_write(s); 646 647 pool_put(&proc_pool, p); 648 } 649 650 /* 651 * Move p to a new or existing process group (and session) 652 * 653 * If we are creating a new pgrp, the pgid should equal 654 * the calling processes pid. 655 * If is only valid to enter a process group that is in the session 656 * of the process. 657 * Also mksess should only be set if we are creating a process group 658 * 659 * Only called from sys_setsid, sys_setpgid/sys_setprp and the 660 * SYSV setpgrp support for hpux == enterpgrp(curproc, curproc->p_pid) 661 */ 662 int 663 enterpgrp(struct proc *p, pid_t pgid, int mksess) 664 { 665 struct pgrp *new_pgrp, *pgrp; 666 struct session *sess; 667 struct proc *curp = curproc; 668 pid_t pid = p->p_pid; 669 int rval; 670 int s; 671 pid_t pg_id = NO_PGID; 672 673 /* Allocate data areas we might need before doing any validity checks */ 674 proclist_lock_read(); /* Because pid_table might change */ 675 if (pid_table[pgid & pid_tbl_mask].pt_pgrp == 0) { 676 proclist_unlock_read(); 677 new_pgrp = pool_get(&pgrp_pool, PR_WAITOK); 678 } else { 679 proclist_unlock_read(); 680 new_pgrp = NULL; 681 } 682 if (mksess) 683 MALLOC(sess, struct session *, sizeof(struct session), 684 M_SESSION, M_WAITOK); 685 else 686 sess = NULL; 687 688 s = proclist_lock_write(); 689 rval = EPERM; /* most common error (to save typing) */ 690 691 /* Check pgrp exists or can be created */ 692 pgrp = pid_table[pgid & pid_tbl_mask].pt_pgrp; 693 if (pgrp != NULL && pgrp->pg_id != pgid) 694 goto done; 695 696 /* Can only set another process under restricted circumstances. */ 697 if (p != curp) { 698 /* must exist and be one of our children... */ 699 if (p != pid_table[pid & pid_tbl_mask].pt_proc 700 || !inferior(p, curp)) { 701 rval = ESRCH; 702 goto done; 703 } 704 /* ... in the same session... */ 705 if (sess != NULL || p->p_session != curp->p_session) 706 goto done; 707 /* ... existing pgid must be in same session ... */ 708 if (pgrp != NULL && pgrp->pg_session != p->p_session) 709 goto done; 710 /* ... and not done an exec. */ 711 if (p->p_flag & P_EXEC) { 712 rval = EACCES; 713 goto done; 714 } 715 } 716 717 /* Changing the process group/session of a session 718 leader is definitely off limits. */ 719 if (SESS_LEADER(p)) { 720 if (sess == NULL && p->p_pgrp == pgrp) 721 /* unless it's a definite noop */ 722 rval = 0; 723 goto done; 724 } 725 726 /* Can only create a process group with id of process */ 727 if (pgrp == NULL && pgid != pid) 728 goto done; 729 730 /* Can only create a session if creating pgrp */ 731 if (sess != NULL && pgrp != NULL) 732 goto done; 733 734 /* Check we allocated memory for a pgrp... */ 735 if (pgrp == NULL && new_pgrp == NULL) 736 goto done; 737 738 /* Don't attach to 'zombie' pgrp */ 739 if (pgrp != NULL && LIST_EMPTY(&pgrp->pg_members)) 740 goto done; 741 742 /* Expect to succeed now */ 743 rval = 0; 744 745 if (pgrp == p->p_pgrp) 746 /* nothing to do */ 747 goto done; 748 749 /* Ok all setup, link up required structures */ 750 if (pgrp == NULL) { 751 pgrp = new_pgrp; 752 new_pgrp = 0; 753 if (sess != NULL) { 754 sess->s_sid = p->p_pid; 755 sess->s_leader = p; 756 sess->s_count = 1; 757 sess->s_ttyvp = NULL; 758 sess->s_ttyp = NULL; 759 sess->s_flags = p->p_session->s_flags & ~S_LOGIN_SET; 760 memcpy(sess->s_login, p->p_session->s_login, 761 sizeof(sess->s_login)); 762 p->p_flag &= ~P_CONTROLT; 763 } else { 764 sess = p->p_pgrp->pg_session; 765 SESSHOLD(sess); 766 } 767 pgrp->pg_session = sess; 768 sess = 0; 769 770 pgrp->pg_id = pgid; 771 LIST_INIT(&pgrp->pg_members); 772 #ifdef DIAGNOSTIC 773 if (__predict_false(pid_table[pgid & pid_tbl_mask].pt_pgrp)) 774 panic("enterpgrp: pgrp table slot in use"); 775 if (__predict_false(mksess && p != curp)) 776 panic("enterpgrp: mksession and p != curproc"); 777 #endif 778 pid_table[pgid & pid_tbl_mask].pt_pgrp = pgrp; 779 pgrp->pg_jobc = 0; 780 } 781 782 /* 783 * Adjust eligibility of affected pgrps to participate in job control. 784 * Increment eligibility counts before decrementing, otherwise we 785 * could reach 0 spuriously during the first call. 786 */ 787 fixjobc(p, pgrp, 1); 788 fixjobc(p, p->p_pgrp, 0); 789 790 /* Move process to requested group */ 791 LIST_REMOVE(p, p_pglist); 792 if (LIST_EMPTY(&p->p_pgrp->pg_members)) 793 /* defer delete until we've dumped the lock */ 794 pg_id = p->p_pgrp->pg_id; 795 p->p_pgrp = pgrp; 796 LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist); 797 798 done: 799 proclist_unlock_write(s); 800 if (sess != NULL) 801 free(sess, M_SESSION); 802 if (new_pgrp != NULL) 803 pool_put(&pgrp_pool, new_pgrp); 804 if (pg_id != NO_PGID) 805 pg_delete(pg_id); 806 #ifdef DEBUG_PGRP 807 if (__predict_false(rval)) 808 printf("enterpgrp(%d,%d,%d), curproc %d, rval %d\n", 809 pid, pgid, mksess, curp->p_pid, rval); 810 #endif 811 return rval; 812 } 813 814 /* 815 * remove process from process group 816 */ 817 int 818 leavepgrp(struct proc *p) 819 { 820 int s; 821 struct pgrp *pgrp; 822 pid_t pg_id; 823 824 s = proclist_lock_write(); 825 pgrp = p->p_pgrp; 826 LIST_REMOVE(p, p_pglist); 827 p->p_pgrp = 0; 828 pg_id = LIST_EMPTY(&pgrp->pg_members) ? pgrp->pg_id : NO_PGID; 829 proclist_unlock_write(s); 830 831 if (pg_id != NO_PGID) 832 pg_delete(pg_id); 833 return 0; 834 } 835 836 static void 837 pg_free(pid_t pg_id) 838 { 839 struct pgrp *pgrp; 840 struct pid_table *pt; 841 int s; 842 843 s = proclist_lock_write(); 844 pt = &pid_table[pg_id & pid_tbl_mask]; 845 pgrp = pt->pt_pgrp; 846 #ifdef DIAGNOSTIC 847 if (__predict_false(!pgrp || pgrp->pg_id != pg_id 848 || !LIST_EMPTY(&pgrp->pg_members))) 849 panic("pg_free: process group absent or has members"); 850 #endif 851 pt->pt_pgrp = 0; 852 853 if (!P_VALID(pt->pt_proc)) { 854 /* orphaned pgrp, put slot onto free list */ 855 #ifdef DIAGNOSTIC 856 if (__predict_false(P_NEXT(pt->pt_proc) & pid_tbl_mask)) 857 panic("pg_free: process slot on free list"); 858 #endif 859 860 pg_id &= pid_tbl_mask; 861 pt = &pid_table[last_free_pt]; 862 pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pg_id); 863 last_free_pt = pg_id; 864 pid_alloc_cnt--; 865 } 866 proclist_unlock_write(s); 867 868 pool_put(&pgrp_pool, pgrp); 869 } 870 871 /* 872 * delete a process group 873 */ 874 static void 875 pg_delete(pid_t pg_id) 876 { 877 struct pgrp *pgrp; 878 struct tty *ttyp; 879 struct session *ss; 880 int s, is_pgrp_leader; 881 882 s = proclist_lock_write(); 883 pgrp = pid_table[pg_id & pid_tbl_mask].pt_pgrp; 884 if (pgrp == NULL || pgrp->pg_id != pg_id || 885 !LIST_EMPTY(&pgrp->pg_members)) { 886 proclist_unlock_write(s); 887 return; 888 } 889 890 ss = pgrp->pg_session; 891 892 /* Remove reference (if any) from tty to this process group */ 893 ttyp = ss->s_ttyp; 894 if (ttyp != NULL && ttyp->t_pgrp == pgrp) { 895 ttyp->t_pgrp = NULL; 896 #ifdef DIAGNOSTIC 897 if (ttyp->t_session != ss) 898 panic("pg_delete: wrong session on terminal"); 899 #endif 900 } 901 902 /* 903 * The leading process group in a session is freed 904 * by sessdelete() if last reference. 905 */ 906 is_pgrp_leader = (ss->s_sid == pgrp->pg_id); 907 proclist_unlock_write(s); 908 SESSRELE(ss); 909 910 if (is_pgrp_leader) 911 return; 912 913 pg_free(pg_id); 914 } 915 916 /* 917 * Delete session - called from SESSRELE when s_count becomes zero. 918 */ 919 void 920 sessdelete(struct session *ss) 921 { 922 /* 923 * We keep the pgrp with the same id as the session in 924 * order to stop a process being given the same pid. 925 * Since the pgrp holds a reference to the session, it 926 * must be a 'zombie' pgrp by now. 927 */ 928 929 pg_free(ss->s_sid); 930 931 FREE(ss, M_SESSION); 932 } 933 934 /* 935 * Adjust pgrp jobc counters when specified process changes process group. 936 * We count the number of processes in each process group that "qualify" 937 * the group for terminal job control (those with a parent in a different 938 * process group of the same session). If that count reaches zero, the 939 * process group becomes orphaned. Check both the specified process' 940 * process group and that of its children. 941 * entering == 0 => p is leaving specified group. 942 * entering == 1 => p is entering specified group. 943 * 944 * Call with proclist_lock held. 945 */ 946 void 947 fixjobc(struct proc *p, struct pgrp *pgrp, int entering) 948 { 949 struct pgrp *hispgrp; 950 struct session *mysession = pgrp->pg_session; 951 struct proc *child; 952 953 /* 954 * Check p's parent to see whether p qualifies its own process 955 * group; if so, adjust count for p's process group. 956 */ 957 hispgrp = p->p_pptr->p_pgrp; 958 if (hispgrp != pgrp && hispgrp->pg_session == mysession) { 959 if (entering) 960 pgrp->pg_jobc++; 961 else if (--pgrp->pg_jobc == 0) 962 orphanpg(pgrp); 963 } 964 965 /* 966 * Check this process' children to see whether they qualify 967 * their process groups; if so, adjust counts for children's 968 * process groups. 969 */ 970 LIST_FOREACH(child, &p->p_children, p_sibling) { 971 hispgrp = child->p_pgrp; 972 if (hispgrp != pgrp && hispgrp->pg_session == mysession && 973 !P_ZOMBIE(child)) { 974 if (entering) 975 hispgrp->pg_jobc++; 976 else if (--hispgrp->pg_jobc == 0) 977 orphanpg(hispgrp); 978 } 979 } 980 } 981 982 /* 983 * A process group has become orphaned; 984 * if there are any stopped processes in the group, 985 * hang-up all process in that group. 986 * 987 * Call with proclist_lock held. 988 */ 989 static void 990 orphanpg(struct pgrp *pg) 991 { 992 struct proc *p; 993 994 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 995 if (p->p_stat == SSTOP) { 996 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 997 psignal(p, SIGHUP); 998 psignal(p, SIGCONT); 999 } 1000 return; 1001 } 1002 } 1003 } 1004 1005 /* mark process as suid/sgid, reset some values to defaults */ 1006 void 1007 p_sugid(struct proc *p) 1008 { 1009 struct plimit *newlim; 1010 1011 p->p_flag |= P_SUGID; 1012 /* reset what needs to be reset in plimit */ 1013 if (p->p_limit->pl_corename != defcorename) { 1014 if (p->p_limit->p_refcnt > 1 && 1015 (p->p_limit->p_lflags & PL_SHAREMOD) == 0) { 1016 newlim = limcopy(p->p_limit); 1017 limfree(p->p_limit); 1018 p->p_limit = newlim; 1019 } 1020 free(p->p_limit->pl_corename, M_TEMP); 1021 p->p_limit->pl_corename = defcorename; 1022 } 1023 } 1024 1025 #ifdef DDB 1026 #include <ddb/db_output.h> 1027 void pidtbl_dump(void); 1028 void 1029 pidtbl_dump(void) 1030 { 1031 struct pid_table *pt; 1032 struct proc *p; 1033 struct pgrp *pgrp; 1034 int id; 1035 1036 db_printf("pid table %p size %x, next %x, last %x\n", 1037 pid_table, pid_tbl_mask+1, 1038 next_free_pt, last_free_pt); 1039 for (pt = pid_table, id = 0; id <= pid_tbl_mask; id++, pt++) { 1040 p = pt->pt_proc; 1041 if (!P_VALID(p) && !pt->pt_pgrp) 1042 continue; 1043 db_printf(" id %x: ", id); 1044 if (P_VALID(p)) 1045 db_printf("proc %p id %d (0x%x) %s\n", 1046 p, p->p_pid, p->p_pid, p->p_comm); 1047 else 1048 db_printf("next %x use %x\n", 1049 P_NEXT(p) & pid_tbl_mask, 1050 P_NEXT(p) & ~pid_tbl_mask); 1051 if ((pgrp = pt->pt_pgrp)) { 1052 db_printf("\tsession %p, sid %d, count %d, login %s\n", 1053 pgrp->pg_session, pgrp->pg_session->s_sid, 1054 pgrp->pg_session->s_count, 1055 pgrp->pg_session->s_login); 1056 db_printf("\tpgrp %p, pg_id %d, pg_jobc %d, members %p\n", 1057 pgrp, pgrp->pg_id, pgrp->pg_jobc, 1058 pgrp->pg_members.lh_first); 1059 for (p = pgrp->pg_members.lh_first; p != 0; 1060 p = p->p_pglist.le_next) { 1061 db_printf("\t\tpid %d addr %p pgrp %p %s\n", 1062 p->p_pid, p, p->p_pgrp, p->p_comm); 1063 } 1064 } 1065 } 1066 } 1067 #endif /* DDB */ 1068 1069 #ifdef KSTACK_CHECK_MAGIC 1070 #include <sys/user.h> 1071 1072 #define KSTACK_MAGIC 0xdeadbeaf 1073 1074 /* XXX should be per process basis? */ 1075 int kstackleftmin = KSTACK_SIZE; 1076 int kstackleftthres = KSTACK_SIZE / 8; /* warn if remaining stack is 1077 less than this */ 1078 1079 void 1080 kstack_setup_magic(const struct lwp *l) 1081 { 1082 u_int32_t *ip; 1083 u_int32_t const *end; 1084 1085 KASSERT(l != NULL); 1086 KASSERT(l != &lwp0); 1087 1088 /* 1089 * fill all the stack with magic number 1090 * so that later modification on it can be detected. 1091 */ 1092 ip = (u_int32_t *)KSTACK_LOWEST_ADDR(l); 1093 end = (u_int32_t *)((caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE); 1094 for (; ip < end; ip++) { 1095 *ip = KSTACK_MAGIC; 1096 } 1097 } 1098 1099 void 1100 kstack_check_magic(const struct lwp *l) 1101 { 1102 u_int32_t const *ip, *end; 1103 int stackleft; 1104 1105 KASSERT(l != NULL); 1106 1107 /* don't check proc0 */ /*XXX*/ 1108 if (l == &lwp0) 1109 return; 1110 1111 #ifdef __MACHINE_STACK_GROWS_UP 1112 /* stack grows upwards (eg. hppa) */ 1113 ip = (u_int32_t *)((caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE); 1114 end = (u_int32_t *)KSTACK_LOWEST_ADDR(l); 1115 for (ip--; ip >= end; ip--) 1116 if (*ip != KSTACK_MAGIC) 1117 break; 1118 1119 stackleft = (caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE - (caddr_t)ip; 1120 #else /* __MACHINE_STACK_GROWS_UP */ 1121 /* stack grows downwards (eg. i386) */ 1122 ip = (u_int32_t *)KSTACK_LOWEST_ADDR(l); 1123 end = (u_int32_t *)((caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE); 1124 for (; ip < end; ip++) 1125 if (*ip != KSTACK_MAGIC) 1126 break; 1127 1128 stackleft = (caddr_t)ip - KSTACK_LOWEST_ADDR(l); 1129 #endif /* __MACHINE_STACK_GROWS_UP */ 1130 1131 if (kstackleftmin > stackleft) { 1132 kstackleftmin = stackleft; 1133 if (stackleft < kstackleftthres) 1134 printf("warning: kernel stack left %d bytes" 1135 "(pid %u:lid %u)\n", stackleft, 1136 (u_int)l->l_proc->p_pid, (u_int)l->l_lid); 1137 } 1138 1139 if (stackleft <= 0) { 1140 panic("magic on the top of kernel stack changed for " 1141 "pid %u, lid %u: maybe kernel stack overflow", 1142 (u_int)l->l_proc->p_pid, (u_int)l->l_lid); 1143 } 1144 } 1145 #endif /* KSTACK_CHECK_MAGIC */ 1146