1 /* $NetBSD: kern_proc.c,v 1.75 2004/03/14 01:08:47 cl Exp $ */ 2 3 /*- 4 * Copyright (c) 1999 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1982, 1986, 1989, 1991, 1993 42 * The Regents of the University of California. All rights reserved. 43 * 44 * Redistribution and use in source and binary forms, with or without 45 * modification, are permitted provided that the following conditions 46 * are met: 47 * 1. Redistributions of source code must retain the above copyright 48 * notice, this list of conditions and the following disclaimer. 49 * 2. Redistributions in binary form must reproduce the above copyright 50 * notice, this list of conditions and the following disclaimer in the 51 * documentation and/or other materials provided with the distribution. 52 * 3. Neither the name of the University nor the names of its contributors 53 * may be used to endorse or promote products derived from this software 54 * without specific prior written permission. 55 * 56 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 57 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 58 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 59 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 60 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 61 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 62 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 64 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 65 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 66 * SUCH DAMAGE. 67 * 68 * @(#)kern_proc.c 8.7 (Berkeley) 2/14/95 69 */ 70 71 #include <sys/cdefs.h> 72 __KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.75 2004/03/14 01:08:47 cl Exp $"); 73 74 #include "opt_kstack.h" 75 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/kernel.h> 79 #include <sys/proc.h> 80 #include <sys/resourcevar.h> 81 #include <sys/buf.h> 82 #include <sys/acct.h> 83 #include <sys/wait.h> 84 #include <sys/file.h> 85 #include <ufs/ufs/quota.h> 86 #include <sys/uio.h> 87 #include <sys/malloc.h> 88 #include <sys/pool.h> 89 #include <sys/mbuf.h> 90 #include <sys/ioctl.h> 91 #include <sys/tty.h> 92 #include <sys/signalvar.h> 93 #include <sys/ras.h> 94 #include <sys/sa.h> 95 #include <sys/savar.h> 96 97 /* 98 * Structure associated with user caching. 99 */ 100 struct uidinfo { 101 LIST_ENTRY(uidinfo) ui_hash; 102 uid_t ui_uid; 103 long ui_proccnt; 104 }; 105 #define UIHASH(uid) (&uihashtbl[(uid) & uihash]) 106 LIST_HEAD(uihashhead, uidinfo) *uihashtbl; 107 u_long uihash; /* size of hash table - 1 */ 108 109 /* 110 * Other process lists 111 */ 112 113 struct proclist allproc; 114 struct proclist zombproc; /* resources have been freed */ 115 116 117 /* 118 * Process list locking: 119 * 120 * We have two types of locks on the proclists: read locks and write 121 * locks. Read locks can be used in interrupt context, so while we 122 * hold the write lock, we must also block clock interrupts to 123 * lock out any scheduling changes that may happen in interrupt 124 * context. 125 * 126 * The proclist lock locks the following structures: 127 * 128 * allproc 129 * zombproc 130 * pid_table 131 */ 132 struct lock proclist_lock; 133 134 /* 135 * pid to proc lookup is done by indexing the pid_table array. 136 * Since pid numbers are only allocated when an empty slot 137 * has been found, there is no need to search any lists ever. 138 * (an orphaned pgrp will lock the slot, a session will lock 139 * the pgrp with the same number.) 140 * If the table is too small it is reallocated with twice the 141 * previous size and the entries 'unzipped' into the two halves. 142 * A linked list of free entries is passed through the pt_proc 143 * field of 'free' items - set odd to be an invalid ptr. 144 */ 145 146 struct pid_table { 147 struct proc *pt_proc; 148 struct pgrp *pt_pgrp; 149 }; 150 #if 1 /* strongly typed cast - should be a noop */ 151 static __inline uint p2u(struct proc *p) { return (uint)(uintptr_t)p; } 152 #else 153 #define p2u(p) ((uint)p) 154 #endif 155 #define P_VALID(p) (!(p2u(p) & 1)) 156 #define P_NEXT(p) (p2u(p) >> 1) 157 #define P_FREE(pid) ((struct proc *)(uintptr_t)((pid) << 1 | 1)) 158 159 #define INITIAL_PID_TABLE_SIZE (1 << 5) 160 static struct pid_table *pid_table; 161 static uint pid_tbl_mask = INITIAL_PID_TABLE_SIZE - 1; 162 static uint pid_alloc_lim; /* max we allocate before growing table */ 163 static uint pid_alloc_cnt; /* number of allocated pids */ 164 165 /* links through free slots - never empty! */ 166 static uint next_free_pt, last_free_pt; 167 static pid_t pid_max = PID_MAX; /* largest value we allocate */ 168 169 struct pool proc_pool; 170 struct pool lwp_pool; 171 struct pool lwp_uc_pool; 172 struct pool pcred_pool; 173 struct pool plimit_pool; 174 struct pool pstats_pool; 175 struct pool pgrp_pool; 176 struct pool rusage_pool; 177 struct pool ras_pool; 178 struct pool sadata_pool; 179 struct pool saupcall_pool; 180 struct pool sastack_pool; 181 struct pool savp_pool; 182 struct pool ptimer_pool; 183 184 MALLOC_DEFINE(M_EMULDATA, "emuldata", "Per-process emulation data"); 185 MALLOC_DEFINE(M_PROC, "proc", "Proc structures"); 186 MALLOC_DEFINE(M_SESSION, "session", "session header"); 187 MALLOC_DEFINE(M_SUBPROC, "subproc", "Proc sub-structures"); 188 189 /* 190 * The process list descriptors, used during pid allocation and 191 * by sysctl. No locking on this data structure is needed since 192 * it is completely static. 193 */ 194 const struct proclist_desc proclists[] = { 195 { &allproc }, 196 { &zombproc }, 197 { NULL }, 198 }; 199 200 static void orphanpg(struct pgrp *); 201 static void pg_delete(pid_t); 202 203 /* 204 * Initialize global process hashing structures. 205 */ 206 void 207 procinit(void) 208 { 209 const struct proclist_desc *pd; 210 int i; 211 #define LINK_EMPTY ((PID_MAX + INITIAL_PID_TABLE_SIZE) & ~(INITIAL_PID_TABLE_SIZE - 1)) 212 213 for (pd = proclists; pd->pd_list != NULL; pd++) 214 LIST_INIT(pd->pd_list); 215 216 spinlockinit(&proclist_lock, "proclk", 0); 217 218 pid_table = malloc(INITIAL_PID_TABLE_SIZE * sizeof *pid_table, 219 M_PROC, M_WAITOK); 220 /* Set free list running through table... 221 Preset 'use count' above PID_MAX so we allocate pid 1 next. */ 222 for (i = 0; i <= pid_tbl_mask; i++) { 223 pid_table[i].pt_proc = P_FREE(LINK_EMPTY + i + 1); 224 pid_table[i].pt_pgrp = 0; 225 } 226 /* slot 0 is just grabbed */ 227 next_free_pt = 1; 228 /* Need to fix last entry. */ 229 last_free_pt = pid_tbl_mask; 230 pid_table[last_free_pt].pt_proc = P_FREE(LINK_EMPTY); 231 /* point at which we grow table - to avoid reusing pids too often */ 232 pid_alloc_lim = pid_tbl_mask - 1; 233 #undef LINK_EMPTY 234 235 LIST_INIT(&alllwp); 236 237 uihashtbl = 238 hashinit(maxproc / 16, HASH_LIST, M_PROC, M_WAITOK, &uihash); 239 240 pool_init(&proc_pool, sizeof(struct proc), 0, 0, 0, "procpl", 241 &pool_allocator_nointr); 242 pool_init(&lwp_pool, sizeof(struct lwp), 0, 0, 0, "lwppl", 243 &pool_allocator_nointr); 244 pool_init(&lwp_uc_pool, sizeof(ucontext_t), 0, 0, 0, "lwpucpl", 245 &pool_allocator_nointr); 246 pool_init(&pgrp_pool, sizeof(struct pgrp), 0, 0, 0, "pgrppl", 247 &pool_allocator_nointr); 248 pool_init(&pcred_pool, sizeof(struct pcred), 0, 0, 0, "pcredpl", 249 &pool_allocator_nointr); 250 pool_init(&plimit_pool, sizeof(struct plimit), 0, 0, 0, "plimitpl", 251 &pool_allocator_nointr); 252 pool_init(&pstats_pool, sizeof(struct pstats), 0, 0, 0, "pstatspl", 253 &pool_allocator_nointr); 254 pool_init(&rusage_pool, sizeof(struct rusage), 0, 0, 0, "rusgepl", 255 &pool_allocator_nointr); 256 pool_init(&ras_pool, sizeof(struct ras), 0, 0, 0, "raspl", 257 &pool_allocator_nointr); 258 pool_init(&sadata_pool, sizeof(struct sadata), 0, 0, 0, "sadatapl", 259 &pool_allocator_nointr); 260 pool_init(&saupcall_pool, sizeof(struct sadata_upcall), 0, 0, 0, 261 "saupcpl", &pool_allocator_nointr); 262 pool_init(&sastack_pool, sizeof(struct sastack), 0, 0, 0, "sastackpl", 263 &pool_allocator_nointr); 264 pool_init(&savp_pool, sizeof(struct sadata_vp), 0, 0, 0, "savppl", 265 &pool_allocator_nointr); 266 pool_init(&ptimer_pool, sizeof(struct ptimer), 0, 0, 0, "ptimerpl", 267 &pool_allocator_nointr); 268 } 269 270 /* 271 * Acquire a read lock on the proclist. 272 */ 273 void 274 proclist_lock_read(void) 275 { 276 int error; 277 278 error = spinlockmgr(&proclist_lock, LK_SHARED, NULL); 279 #ifdef DIAGNOSTIC 280 if (__predict_false(error != 0)) 281 panic("proclist_lock_read: failed to acquire lock"); 282 #endif 283 } 284 285 /* 286 * Release a read lock on the proclist. 287 */ 288 void 289 proclist_unlock_read(void) 290 { 291 292 (void) spinlockmgr(&proclist_lock, LK_RELEASE, NULL); 293 } 294 295 /* 296 * Acquire a write lock on the proclist. 297 */ 298 int 299 proclist_lock_write(void) 300 { 301 int s, error; 302 303 s = splclock(); 304 error = spinlockmgr(&proclist_lock, LK_EXCLUSIVE, NULL); 305 #ifdef DIAGNOSTIC 306 if (__predict_false(error != 0)) 307 panic("proclist_lock: failed to acquire lock"); 308 #endif 309 return (s); 310 } 311 312 /* 313 * Release a write lock on the proclist. 314 */ 315 void 316 proclist_unlock_write(int s) 317 { 318 319 (void) spinlockmgr(&proclist_lock, LK_RELEASE, NULL); 320 splx(s); 321 } 322 323 /* 324 * Change the count associated with number of processes 325 * a given user is using. 326 */ 327 int 328 chgproccnt(uid_t uid, int diff) 329 { 330 struct uidinfo *uip; 331 struct uihashhead *uipp; 332 333 uipp = UIHASH(uid); 334 335 LIST_FOREACH(uip, uipp, ui_hash) 336 if (uip->ui_uid == uid) 337 break; 338 339 if (uip) { 340 uip->ui_proccnt += diff; 341 if (uip->ui_proccnt > 0) 342 return (uip->ui_proccnt); 343 if (uip->ui_proccnt < 0) 344 panic("chgproccnt: procs < 0"); 345 LIST_REMOVE(uip, ui_hash); 346 FREE(uip, M_PROC); 347 return (0); 348 } 349 if (diff <= 0) { 350 if (diff == 0) 351 return(0); 352 panic("chgproccnt: lost user"); 353 } 354 MALLOC(uip, struct uidinfo *, sizeof(*uip), M_PROC, M_WAITOK); 355 LIST_INSERT_HEAD(uipp, uip, ui_hash); 356 uip->ui_uid = uid; 357 uip->ui_proccnt = diff; 358 return (diff); 359 } 360 361 /* 362 * Check that the specified process group is in the session of the 363 * specified process. 364 * Treats -ve ids as process ids. 365 * Used to validate TIOCSPGRP requests. 366 */ 367 int 368 pgid_in_session(struct proc *p, pid_t pg_id) 369 { 370 struct pgrp *pgrp; 371 372 if (pg_id < 0) { 373 struct proc *p1 = pfind(-pg_id); 374 if (p1 == NULL) 375 return EINVAL; 376 pgrp = p1->p_pgrp; 377 } else { 378 pgrp = pgfind(pg_id); 379 if (pgrp == NULL) 380 return EINVAL; 381 } 382 if (pgrp->pg_session != p->p_pgrp->pg_session) 383 return EPERM; 384 return 0; 385 } 386 387 /* 388 * Is p an inferior of q? 389 */ 390 int 391 inferior(struct proc *p, struct proc *q) 392 { 393 394 for (; p != q; p = p->p_pptr) 395 if (p->p_pid == 0) 396 return (0); 397 return (1); 398 } 399 400 /* 401 * Locate a process by number 402 */ 403 struct proc * 404 p_find(pid_t pid, uint flags) 405 { 406 struct proc *p; 407 char stat; 408 409 if (!(flags & PFIND_LOCKED)) 410 proclist_lock_read(); 411 p = pid_table[pid & pid_tbl_mask].pt_proc; 412 /* Only allow live processes to be found by pid. */ 413 if (P_VALID(p) && p->p_pid == pid && 414 ((stat = p->p_stat) == SACTIVE || stat == SSTOP 415 || (stat == SZOMB && (flags & PFIND_ZOMBIE)))) { 416 if (flags & PFIND_UNLOCK_OK) 417 proclist_unlock_read(); 418 return p; 419 } 420 if (flags & PFIND_UNLOCK_FAIL) 421 proclist_unlock_read(); 422 return NULL; 423 } 424 425 426 /* 427 * Locate a process group by number 428 */ 429 struct pgrp * 430 pg_find(pid_t pgid, uint flags) 431 { 432 struct pgrp *pg; 433 434 if (!(flags & PFIND_LOCKED)) 435 proclist_lock_read(); 436 pg = pid_table[pgid & pid_tbl_mask].pt_pgrp; 437 /* 438 * Can't look up a pgrp that only exists because the session 439 * hasn't died yet (traditional) 440 */ 441 if (pg == NULL || pg->pg_id != pgid || LIST_EMPTY(&pg->pg_members)) { 442 if (flags & PFIND_UNLOCK_FAIL) 443 proclist_unlock_read(); 444 return NULL; 445 } 446 447 if (flags & PFIND_UNLOCK_OK) 448 proclist_unlock_read(); 449 return pg; 450 } 451 452 /* 453 * Set entry for process 0 454 */ 455 void 456 proc0_insert(struct proc *p, struct lwp *l, struct pgrp *pgrp, 457 struct session *sess) 458 { 459 int s; 460 461 simple_lock_init(&p->p_lock); 462 LIST_INIT(&p->p_lwps); 463 LIST_INSERT_HEAD(&p->p_lwps, l, l_sibling); 464 p->p_nlwps = 1; 465 simple_lock_init(&p->p_sigctx.ps_silock); 466 CIRCLEQ_INIT(&p->p_sigctx.ps_siginfo); 467 468 s = proclist_lock_write(); 469 470 pid_table[0].pt_proc = p; 471 LIST_INSERT_HEAD(&allproc, p, p_list); 472 LIST_INSERT_HEAD(&alllwp, l, l_list); 473 474 p->p_pgrp = pgrp; 475 pid_table[0].pt_pgrp = pgrp; 476 LIST_INIT(&pgrp->pg_members); 477 LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist); 478 479 pgrp->pg_session = sess; 480 sess->s_count = 1; 481 sess->s_sid = 0; 482 sess->s_leader = p; 483 484 proclist_unlock_write(s); 485 } 486 487 static void 488 expand_pid_table(void) 489 { 490 uint pt_size = pid_tbl_mask + 1; 491 struct pid_table *n_pt, *new_pt; 492 struct proc *proc; 493 struct pgrp *pgrp; 494 int i; 495 int s; 496 pid_t pid; 497 498 new_pt = malloc(pt_size * 2 * sizeof *new_pt, M_PROC, M_WAITOK); 499 500 s = proclist_lock_write(); 501 if (pt_size != pid_tbl_mask + 1) { 502 /* Another process beat us to it... */ 503 proclist_unlock_write(s); 504 FREE(new_pt, M_PROC); 505 return; 506 } 507 508 /* 509 * Copy entries from old table into new one. 510 * If 'pid' is 'odd' we need to place in the upper half, 511 * even pid's to the lower half. 512 * Free items stay in the low half so we don't have to 513 * fixup the reference to them. 514 * We stuff free items on the front of the freelist 515 * because we can't write to unmodified entries. 516 * Processing the table backwards maintains a semblance 517 * of issueing pid numbers that increase with time. 518 */ 519 i = pt_size - 1; 520 n_pt = new_pt + i; 521 for (; ; i--, n_pt--) { 522 proc = pid_table[i].pt_proc; 523 pgrp = pid_table[i].pt_pgrp; 524 if (!P_VALID(proc)) { 525 /* Up 'use count' so that link is valid */ 526 pid = (P_NEXT(proc) + pt_size) & ~pt_size; 527 proc = P_FREE(pid); 528 if (pgrp) 529 pid = pgrp->pg_id; 530 } else 531 pid = proc->p_pid; 532 533 /* Save entry in appropriate half of table */ 534 n_pt[pid & pt_size].pt_proc = proc; 535 n_pt[pid & pt_size].pt_pgrp = pgrp; 536 537 /* Put other piece on start of free list */ 538 pid = (pid ^ pt_size) & ~pid_tbl_mask; 539 n_pt[pid & pt_size].pt_proc = 540 P_FREE((pid & ~pt_size) | next_free_pt); 541 n_pt[pid & pt_size].pt_pgrp = 0; 542 next_free_pt = i | (pid & pt_size); 543 if (i == 0) 544 break; 545 } 546 547 /* Switch tables */ 548 n_pt = pid_table; 549 pid_table = new_pt; 550 pid_tbl_mask = pt_size * 2 - 1; 551 552 /* 553 * pid_max starts as PID_MAX (= 30000), once we have 16384 554 * allocated pids we need it to be larger! 555 */ 556 if (pid_tbl_mask > PID_MAX) { 557 pid_max = pid_tbl_mask * 2 + 1; 558 pid_alloc_lim |= pid_alloc_lim << 1; 559 } else 560 pid_alloc_lim <<= 1; /* doubles number of free slots... */ 561 562 proclist_unlock_write(s); 563 FREE(n_pt, M_PROC); 564 } 565 566 struct proc * 567 proc_alloc(void) 568 { 569 struct proc *p; 570 int s; 571 int nxt; 572 pid_t pid; 573 struct pid_table *pt; 574 575 p = pool_get(&proc_pool, PR_WAITOK); 576 p->p_stat = SIDL; /* protect against others */ 577 578 /* allocate next free pid */ 579 580 for (;;expand_pid_table()) { 581 if (__predict_false(pid_alloc_cnt >= pid_alloc_lim)) 582 /* ensure pids cycle through 2000+ values */ 583 continue; 584 s = proclist_lock_write(); 585 pt = &pid_table[next_free_pt]; 586 #ifdef DIAGNOSTIC 587 if (__predict_false(P_VALID(pt->pt_proc) || pt->pt_pgrp)) 588 panic("proc_alloc: slot busy"); 589 #endif 590 nxt = P_NEXT(pt->pt_proc); 591 if (nxt & pid_tbl_mask) 592 break; 593 /* Table full - expand (NB last entry not used....) */ 594 proclist_unlock_write(s); 595 } 596 597 /* pid is 'saved use count' + 'size' + entry */ 598 pid = (nxt & ~pid_tbl_mask) + pid_tbl_mask + 1 + next_free_pt; 599 if ((uint)pid > (uint)pid_max) 600 pid &= pid_tbl_mask; 601 p->p_pid = pid; 602 next_free_pt = nxt & pid_tbl_mask; 603 604 /* Grab table slot */ 605 pt->pt_proc = p; 606 pid_alloc_cnt++; 607 608 proclist_unlock_write(s); 609 610 return p; 611 } 612 613 /* 614 * Free last resources of a process - called from proc_free (in kern_exit.c) 615 */ 616 void 617 proc_free_mem(struct proc *p) 618 { 619 int s; 620 pid_t pid = p->p_pid; 621 struct pid_table *pt; 622 623 s = proclist_lock_write(); 624 625 pt = &pid_table[pid & pid_tbl_mask]; 626 #ifdef DIAGNOSTIC 627 if (__predict_false(pt->pt_proc != p)) 628 panic("proc_free: pid_table mismatch, pid %x, proc %p", 629 pid, p); 630 #endif 631 /* save pid use count in slot */ 632 pt->pt_proc = P_FREE(pid & ~pid_tbl_mask); 633 634 if (pt->pt_pgrp == NULL) { 635 /* link last freed entry onto ours */ 636 pid &= pid_tbl_mask; 637 pt = &pid_table[last_free_pt]; 638 pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pid); 639 last_free_pt = pid; 640 pid_alloc_cnt--; 641 } 642 643 nprocs--; 644 proclist_unlock_write(s); 645 646 pool_put(&proc_pool, p); 647 } 648 649 /* 650 * Move p to a new or existing process group (and session) 651 * 652 * If we are creating a new pgrp, the pgid should equal 653 * the calling process' pid. 654 * If is only valid to enter a process group that is in the session 655 * of the process. 656 * Also mksess should only be set if we are creating a process group 657 * 658 * Only called from sys_setsid, sys_setpgid/sys_setpgrp and the 659 * SYSV setpgrp support for hpux == enterpgrp(curproc, curproc->p_pid) 660 */ 661 int 662 enterpgrp(struct proc *p, pid_t pgid, int mksess) 663 { 664 struct pgrp *new_pgrp, *pgrp; 665 struct session *sess; 666 struct proc *curp = curproc; 667 pid_t pid = p->p_pid; 668 int rval; 669 int s; 670 pid_t pg_id = NO_PGID; 671 672 /* Allocate data areas we might need before doing any validity checks */ 673 proclist_lock_read(); /* Because pid_table might change */ 674 if (pid_table[pgid & pid_tbl_mask].pt_pgrp == 0) { 675 proclist_unlock_read(); 676 new_pgrp = pool_get(&pgrp_pool, PR_WAITOK); 677 } else { 678 proclist_unlock_read(); 679 new_pgrp = NULL; 680 } 681 if (mksess) 682 MALLOC(sess, struct session *, sizeof(struct session), 683 M_SESSION, M_WAITOK); 684 else 685 sess = NULL; 686 687 s = proclist_lock_write(); 688 rval = EPERM; /* most common error (to save typing) */ 689 690 /* Check pgrp exists or can be created */ 691 pgrp = pid_table[pgid & pid_tbl_mask].pt_pgrp; 692 if (pgrp != NULL && pgrp->pg_id != pgid) 693 goto done; 694 695 /* Can only set another process under restricted circumstances. */ 696 if (p != curp) { 697 /* must exist and be one of our children... */ 698 if (p != pid_table[pid & pid_tbl_mask].pt_proc 699 || !inferior(p, curp)) { 700 rval = ESRCH; 701 goto done; 702 } 703 /* ... in the same session... */ 704 if (sess != NULL || p->p_session != curp->p_session) 705 goto done; 706 /* ... existing pgid must be in same session ... */ 707 if (pgrp != NULL && pgrp->pg_session != p->p_session) 708 goto done; 709 /* ... and not done an exec. */ 710 if (p->p_flag & P_EXEC) { 711 rval = EACCES; 712 goto done; 713 } 714 } 715 716 /* Changing the process group/session of a session 717 leader is definitely off limits. */ 718 if (SESS_LEADER(p)) { 719 if (sess == NULL && p->p_pgrp == pgrp) 720 /* unless it's a definite noop */ 721 rval = 0; 722 goto done; 723 } 724 725 /* Can only create a process group with id of process */ 726 if (pgrp == NULL && pgid != pid) 727 goto done; 728 729 /* Can only create a session if creating pgrp */ 730 if (sess != NULL && pgrp != NULL) 731 goto done; 732 733 /* Check we allocated memory for a pgrp... */ 734 if (pgrp == NULL && new_pgrp == NULL) 735 goto done; 736 737 /* Don't attach to 'zombie' pgrp */ 738 if (pgrp != NULL && LIST_EMPTY(&pgrp->pg_members)) 739 goto done; 740 741 /* Expect to succeed now */ 742 rval = 0; 743 744 if (pgrp == p->p_pgrp) 745 /* nothing to do */ 746 goto done; 747 748 /* Ok all setup, link up required structures */ 749 if (pgrp == NULL) { 750 pgrp = new_pgrp; 751 new_pgrp = 0; 752 if (sess != NULL) { 753 sess->s_sid = p->p_pid; 754 sess->s_leader = p; 755 sess->s_count = 1; 756 sess->s_ttyvp = NULL; 757 sess->s_ttyp = NULL; 758 sess->s_flags = p->p_session->s_flags & ~S_LOGIN_SET; 759 memcpy(sess->s_login, p->p_session->s_login, 760 sizeof(sess->s_login)); 761 p->p_flag &= ~P_CONTROLT; 762 } else { 763 sess = p->p_pgrp->pg_session; 764 SESSHOLD(sess); 765 } 766 pgrp->pg_session = sess; 767 sess = 0; 768 769 pgrp->pg_id = pgid; 770 LIST_INIT(&pgrp->pg_members); 771 #ifdef DIAGNOSTIC 772 if (__predict_false(pid_table[pgid & pid_tbl_mask].pt_pgrp)) 773 panic("enterpgrp: pgrp table slot in use"); 774 if (__predict_false(mksess && p != curp)) 775 panic("enterpgrp: mksession and p != curproc"); 776 #endif 777 pid_table[pgid & pid_tbl_mask].pt_pgrp = pgrp; 778 pgrp->pg_jobc = 0; 779 } 780 781 /* 782 * Adjust eligibility of affected pgrps to participate in job control. 783 * Increment eligibility counts before decrementing, otherwise we 784 * could reach 0 spuriously during the first call. 785 */ 786 fixjobc(p, pgrp, 1); 787 fixjobc(p, p->p_pgrp, 0); 788 789 /* Move process to requested group */ 790 LIST_REMOVE(p, p_pglist); 791 if (LIST_EMPTY(&p->p_pgrp->pg_members)) 792 /* defer delete until we've dumped the lock */ 793 pg_id = p->p_pgrp->pg_id; 794 p->p_pgrp = pgrp; 795 LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist); 796 797 done: 798 proclist_unlock_write(s); 799 if (sess != NULL) 800 free(sess, M_SESSION); 801 if (new_pgrp != NULL) 802 pool_put(&pgrp_pool, new_pgrp); 803 if (pg_id != NO_PGID) 804 pg_delete(pg_id); 805 #ifdef DEBUG_PGRP 806 if (__predict_false(rval)) 807 printf("enterpgrp(%d,%d,%d), curproc %d, rval %d\n", 808 pid, pgid, mksess, curp->p_pid, rval); 809 #endif 810 return rval; 811 } 812 813 /* 814 * remove process from process group 815 */ 816 int 817 leavepgrp(struct proc *p) 818 { 819 int s; 820 struct pgrp *pgrp; 821 pid_t pg_id; 822 823 s = proclist_lock_write(); 824 pgrp = p->p_pgrp; 825 LIST_REMOVE(p, p_pglist); 826 p->p_pgrp = 0; 827 pg_id = LIST_EMPTY(&pgrp->pg_members) ? pgrp->pg_id : NO_PGID; 828 proclist_unlock_write(s); 829 830 if (pg_id != NO_PGID) 831 pg_delete(pg_id); 832 return 0; 833 } 834 835 static void 836 pg_free(pid_t pg_id) 837 { 838 struct pgrp *pgrp; 839 struct pid_table *pt; 840 int s; 841 842 s = proclist_lock_write(); 843 pt = &pid_table[pg_id & pid_tbl_mask]; 844 pgrp = pt->pt_pgrp; 845 #ifdef DIAGNOSTIC 846 if (__predict_false(!pgrp || pgrp->pg_id != pg_id 847 || !LIST_EMPTY(&pgrp->pg_members))) 848 panic("pg_free: process group absent or has members"); 849 #endif 850 pt->pt_pgrp = 0; 851 852 if (!P_VALID(pt->pt_proc)) { 853 /* orphaned pgrp, put slot onto free list */ 854 #ifdef DIAGNOSTIC 855 if (__predict_false(P_NEXT(pt->pt_proc) & pid_tbl_mask)) 856 panic("pg_free: process slot on free list"); 857 #endif 858 859 pg_id &= pid_tbl_mask; 860 pt = &pid_table[last_free_pt]; 861 pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pg_id); 862 last_free_pt = pg_id; 863 pid_alloc_cnt--; 864 } 865 proclist_unlock_write(s); 866 867 pool_put(&pgrp_pool, pgrp); 868 } 869 870 /* 871 * delete a process group 872 */ 873 static void 874 pg_delete(pid_t pg_id) 875 { 876 struct pgrp *pgrp; 877 struct tty *ttyp; 878 struct session *ss; 879 int s, is_pgrp_leader; 880 881 s = proclist_lock_write(); 882 pgrp = pid_table[pg_id & pid_tbl_mask].pt_pgrp; 883 if (pgrp == NULL || pgrp->pg_id != pg_id || 884 !LIST_EMPTY(&pgrp->pg_members)) { 885 proclist_unlock_write(s); 886 return; 887 } 888 889 ss = pgrp->pg_session; 890 891 /* Remove reference (if any) from tty to this process group */ 892 ttyp = ss->s_ttyp; 893 if (ttyp != NULL && ttyp->t_pgrp == pgrp) { 894 ttyp->t_pgrp = NULL; 895 #ifdef DIAGNOSTIC 896 if (ttyp->t_session != ss) 897 panic("pg_delete: wrong session on terminal"); 898 #endif 899 } 900 901 /* 902 * The leading process group in a session is freed 903 * by sessdelete() if last reference. 904 */ 905 is_pgrp_leader = (ss->s_sid == pgrp->pg_id); 906 proclist_unlock_write(s); 907 SESSRELE(ss); 908 909 if (is_pgrp_leader) 910 return; 911 912 pg_free(pg_id); 913 } 914 915 /* 916 * Delete session - called from SESSRELE when s_count becomes zero. 917 */ 918 void 919 sessdelete(struct session *ss) 920 { 921 /* 922 * We keep the pgrp with the same id as the session in 923 * order to stop a process being given the same pid. 924 * Since the pgrp holds a reference to the session, it 925 * must be a 'zombie' pgrp by now. 926 */ 927 928 pg_free(ss->s_sid); 929 930 FREE(ss, M_SESSION); 931 } 932 933 /* 934 * Adjust pgrp jobc counters when specified process changes process group. 935 * We count the number of processes in each process group that "qualify" 936 * the group for terminal job control (those with a parent in a different 937 * process group of the same session). If that count reaches zero, the 938 * process group becomes orphaned. Check both the specified process' 939 * process group and that of its children. 940 * entering == 0 => p is leaving specified group. 941 * entering == 1 => p is entering specified group. 942 * 943 * Call with proclist_lock held. 944 */ 945 void 946 fixjobc(struct proc *p, struct pgrp *pgrp, int entering) 947 { 948 struct pgrp *hispgrp; 949 struct session *mysession = pgrp->pg_session; 950 struct proc *child; 951 952 /* 953 * Check p's parent to see whether p qualifies its own process 954 * group; if so, adjust count for p's process group. 955 */ 956 hispgrp = p->p_pptr->p_pgrp; 957 if (hispgrp != pgrp && hispgrp->pg_session == mysession) { 958 if (entering) 959 pgrp->pg_jobc++; 960 else if (--pgrp->pg_jobc == 0) 961 orphanpg(pgrp); 962 } 963 964 /* 965 * Check this process' children to see whether they qualify 966 * their process groups; if so, adjust counts for children's 967 * process groups. 968 */ 969 LIST_FOREACH(child, &p->p_children, p_sibling) { 970 hispgrp = child->p_pgrp; 971 if (hispgrp != pgrp && hispgrp->pg_session == mysession && 972 !P_ZOMBIE(child)) { 973 if (entering) 974 hispgrp->pg_jobc++; 975 else if (--hispgrp->pg_jobc == 0) 976 orphanpg(hispgrp); 977 } 978 } 979 } 980 981 /* 982 * A process group has become orphaned; 983 * if there are any stopped processes in the group, 984 * hang-up all process in that group. 985 * 986 * Call with proclist_lock held. 987 */ 988 static void 989 orphanpg(struct pgrp *pg) 990 { 991 struct proc *p; 992 993 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 994 if (p->p_stat == SSTOP) { 995 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 996 psignal(p, SIGHUP); 997 psignal(p, SIGCONT); 998 } 999 return; 1000 } 1001 } 1002 } 1003 1004 /* mark process as suid/sgid, reset some values to defaults */ 1005 void 1006 p_sugid(struct proc *p) 1007 { 1008 struct plimit *newlim; 1009 1010 p->p_flag |= P_SUGID; 1011 /* reset what needs to be reset in plimit */ 1012 if (p->p_limit->pl_corename != defcorename) { 1013 if (p->p_limit->p_refcnt > 1 && 1014 (p->p_limit->p_lflags & PL_SHAREMOD) == 0) { 1015 newlim = limcopy(p->p_limit); 1016 limfree(p->p_limit); 1017 p->p_limit = newlim; 1018 } 1019 free(p->p_limit->pl_corename, M_TEMP); 1020 p->p_limit->pl_corename = defcorename; 1021 } 1022 } 1023 1024 #ifdef DDB 1025 #include <ddb/db_output.h> 1026 void pidtbl_dump(void); 1027 void 1028 pidtbl_dump(void) 1029 { 1030 struct pid_table *pt; 1031 struct proc *p; 1032 struct pgrp *pgrp; 1033 int id; 1034 1035 db_printf("pid table %p size %x, next %x, last %x\n", 1036 pid_table, pid_tbl_mask+1, 1037 next_free_pt, last_free_pt); 1038 for (pt = pid_table, id = 0; id <= pid_tbl_mask; id++, pt++) { 1039 p = pt->pt_proc; 1040 if (!P_VALID(p) && !pt->pt_pgrp) 1041 continue; 1042 db_printf(" id %x: ", id); 1043 if (P_VALID(p)) 1044 db_printf("proc %p id %d (0x%x) %s\n", 1045 p, p->p_pid, p->p_pid, p->p_comm); 1046 else 1047 db_printf("next %x use %x\n", 1048 P_NEXT(p) & pid_tbl_mask, 1049 P_NEXT(p) & ~pid_tbl_mask); 1050 if ((pgrp = pt->pt_pgrp)) { 1051 db_printf("\tsession %p, sid %d, count %d, login %s\n", 1052 pgrp->pg_session, pgrp->pg_session->s_sid, 1053 pgrp->pg_session->s_count, 1054 pgrp->pg_session->s_login); 1055 db_printf("\tpgrp %p, pg_id %d, pg_jobc %d, members %p\n", 1056 pgrp, pgrp->pg_id, pgrp->pg_jobc, 1057 pgrp->pg_members.lh_first); 1058 for (p = pgrp->pg_members.lh_first; p != 0; 1059 p = p->p_pglist.le_next) { 1060 db_printf("\t\tpid %d addr %p pgrp %p %s\n", 1061 p->p_pid, p, p->p_pgrp, p->p_comm); 1062 } 1063 } 1064 } 1065 } 1066 #endif /* DDB */ 1067 1068 #ifdef KSTACK_CHECK_MAGIC 1069 #include <sys/user.h> 1070 1071 #define KSTACK_MAGIC 0xdeadbeaf 1072 1073 /* XXX should be per process basis? */ 1074 int kstackleftmin = KSTACK_SIZE; 1075 int kstackleftthres = KSTACK_SIZE / 8; /* warn if remaining stack is 1076 less than this */ 1077 1078 void 1079 kstack_setup_magic(const struct lwp *l) 1080 { 1081 u_int32_t *ip; 1082 u_int32_t const *end; 1083 1084 KASSERT(l != NULL); 1085 KASSERT(l != &lwp0); 1086 1087 /* 1088 * fill all the stack with magic number 1089 * so that later modification on it can be detected. 1090 */ 1091 ip = (u_int32_t *)KSTACK_LOWEST_ADDR(l); 1092 end = (u_int32_t *)((caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE); 1093 for (; ip < end; ip++) { 1094 *ip = KSTACK_MAGIC; 1095 } 1096 } 1097 1098 void 1099 kstack_check_magic(const struct lwp *l) 1100 { 1101 u_int32_t const *ip, *end; 1102 int stackleft; 1103 1104 KASSERT(l != NULL); 1105 1106 /* don't check proc0 */ /*XXX*/ 1107 if (l == &lwp0) 1108 return; 1109 1110 #ifdef __MACHINE_STACK_GROWS_UP 1111 /* stack grows upwards (eg. hppa) */ 1112 ip = (u_int32_t *)((caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE); 1113 end = (u_int32_t *)KSTACK_LOWEST_ADDR(l); 1114 for (ip--; ip >= end; ip--) 1115 if (*ip != KSTACK_MAGIC) 1116 break; 1117 1118 stackleft = (caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE - (caddr_t)ip; 1119 #else /* __MACHINE_STACK_GROWS_UP */ 1120 /* stack grows downwards (eg. i386) */ 1121 ip = (u_int32_t *)KSTACK_LOWEST_ADDR(l); 1122 end = (u_int32_t *)((caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE); 1123 for (; ip < end; ip++) 1124 if (*ip != KSTACK_MAGIC) 1125 break; 1126 1127 stackleft = (caddr_t)ip - KSTACK_LOWEST_ADDR(l); 1128 #endif /* __MACHINE_STACK_GROWS_UP */ 1129 1130 if (kstackleftmin > stackleft) { 1131 kstackleftmin = stackleft; 1132 if (stackleft < kstackleftthres) 1133 printf("warning: kernel stack left %d bytes" 1134 "(pid %u:lid %u)\n", stackleft, 1135 (u_int)l->l_proc->p_pid, (u_int)l->l_lid); 1136 } 1137 1138 if (stackleft <= 0) { 1139 panic("magic on the top of kernel stack changed for " 1140 "pid %u, lid %u: maybe kernel stack overflow", 1141 (u_int)l->l_proc->p_pid, (u_int)l->l_lid); 1142 } 1143 } 1144 #endif /* KSTACK_CHECK_MAGIC */ 1145