1 /* $NetBSD: kern_proc.c,v 1.80 2004/10/03 22:26:35 yamt Exp $ */ 2 3 /*- 4 * Copyright (c) 1999 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1982, 1986, 1989, 1991, 1993 42 * The Regents of the University of California. All rights reserved. 43 * 44 * Redistribution and use in source and binary forms, with or without 45 * modification, are permitted provided that the following conditions 46 * are met: 47 * 1. Redistributions of source code must retain the above copyright 48 * notice, this list of conditions and the following disclaimer. 49 * 2. Redistributions in binary form must reproduce the above copyright 50 * notice, this list of conditions and the following disclaimer in the 51 * documentation and/or other materials provided with the distribution. 52 * 3. Neither the name of the University nor the names of its contributors 53 * may be used to endorse or promote products derived from this software 54 * without specific prior written permission. 55 * 56 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 57 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 58 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 59 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 60 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 61 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 62 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 64 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 65 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 66 * SUCH DAMAGE. 67 * 68 * @(#)kern_proc.c 8.7 (Berkeley) 2/14/95 69 */ 70 71 #include <sys/cdefs.h> 72 __KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.80 2004/10/03 22:26:35 yamt Exp $"); 73 74 #include "opt_kstack.h" 75 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/kernel.h> 79 #include <sys/proc.h> 80 #include <sys/resourcevar.h> 81 #include <sys/buf.h> 82 #include <sys/acct.h> 83 #include <sys/wait.h> 84 #include <sys/file.h> 85 #include <ufs/ufs/quota.h> 86 #include <sys/uio.h> 87 #include <sys/malloc.h> 88 #include <sys/pool.h> 89 #include <sys/mbuf.h> 90 #include <sys/ioctl.h> 91 #include <sys/tty.h> 92 #include <sys/signalvar.h> 93 #include <sys/ras.h> 94 #include <sys/sa.h> 95 #include <sys/savar.h> 96 #include <uvm/uvm_extern.h> 97 98 /* 99 * Other process lists 100 */ 101 102 struct proclist allproc; 103 struct proclist zombproc; /* resources have been freed */ 104 105 106 /* 107 * Process list locking: 108 * 109 * We have two types of locks on the proclists: read locks and write 110 * locks. Read locks can be used in interrupt context, so while we 111 * hold the write lock, we must also block clock interrupts to 112 * lock out any scheduling changes that may happen in interrupt 113 * context. 114 * 115 * The proclist lock locks the following structures: 116 * 117 * allproc 118 * zombproc 119 * pid_table 120 */ 121 struct lock proclist_lock; 122 123 /* 124 * pid to proc lookup is done by indexing the pid_table array. 125 * Since pid numbers are only allocated when an empty slot 126 * has been found, there is no need to search any lists ever. 127 * (an orphaned pgrp will lock the slot, a session will lock 128 * the pgrp with the same number.) 129 * If the table is too small it is reallocated with twice the 130 * previous size and the entries 'unzipped' into the two halves. 131 * A linked list of free entries is passed through the pt_proc 132 * field of 'free' items - set odd to be an invalid ptr. 133 */ 134 135 struct pid_table { 136 struct proc *pt_proc; 137 struct pgrp *pt_pgrp; 138 }; 139 #if 1 /* strongly typed cast - should be a noop */ 140 static __inline uint p2u(struct proc *p) { return (uint)(uintptr_t)p; } 141 #else 142 #define p2u(p) ((uint)p) 143 #endif 144 #define P_VALID(p) (!(p2u(p) & 1)) 145 #define P_NEXT(p) (p2u(p) >> 1) 146 #define P_FREE(pid) ((struct proc *)(uintptr_t)((pid) << 1 | 1)) 147 148 #define INITIAL_PID_TABLE_SIZE (1 << 5) 149 static struct pid_table *pid_table; 150 static uint pid_tbl_mask = INITIAL_PID_TABLE_SIZE - 1; 151 static uint pid_alloc_lim; /* max we allocate before growing table */ 152 static uint pid_alloc_cnt; /* number of allocated pids */ 153 154 /* links through free slots - never empty! */ 155 static uint next_free_pt, last_free_pt; 156 static pid_t pid_max = PID_MAX; /* largest value we allocate */ 157 158 POOL_INIT(proc_pool, sizeof(struct proc), 0, 0, 0, "procpl", 159 &pool_allocator_nointr); 160 POOL_INIT(lwp_pool, sizeof(struct lwp), 0, 0, 0, "lwppl", 161 &pool_allocator_nointr); 162 POOL_INIT(lwp_uc_pool, sizeof(ucontext_t), 0, 0, 0, "lwpucpl", 163 &pool_allocator_nointr); 164 POOL_INIT(pgrp_pool, sizeof(struct pgrp), 0, 0, 0, "pgrppl", 165 &pool_allocator_nointr); 166 POOL_INIT(pcred_pool, sizeof(struct pcred), 0, 0, 0, "pcredpl", 167 &pool_allocator_nointr); 168 POOL_INIT(plimit_pool, sizeof(struct plimit), 0, 0, 0, "plimitpl", 169 &pool_allocator_nointr); 170 POOL_INIT(pstats_pool, sizeof(struct pstats), 0, 0, 0, "pstatspl", 171 &pool_allocator_nointr); 172 POOL_INIT(rusage_pool, sizeof(struct rusage), 0, 0, 0, "rusgepl", 173 &pool_allocator_nointr); 174 POOL_INIT(ras_pool, sizeof(struct ras), 0, 0, 0, "raspl", 175 &pool_allocator_nointr); 176 POOL_INIT(sadata_pool, sizeof(struct sadata), 0, 0, 0, "sadatapl", 177 &pool_allocator_nointr); 178 POOL_INIT(saupcall_pool, sizeof(struct sadata_upcall), 0, 0, 0, "saupcpl", 179 &pool_allocator_nointr); 180 POOL_INIT(sastack_pool, sizeof(struct sastack), 0, 0, 0, "sastackpl", 181 &pool_allocator_nointr); 182 POOL_INIT(savp_pool, sizeof(struct sadata_vp), 0, 0, 0, "savppl", 183 &pool_allocator_nointr); 184 POOL_INIT(ptimer_pool, sizeof(struct ptimer), 0, 0, 0, "ptimerpl", 185 &pool_allocator_nointr); 186 POOL_INIT(session_pool, sizeof(struct session), 0, 0, 0, "sessionpl", 187 &pool_allocator_nointr); 188 189 MALLOC_DEFINE(M_EMULDATA, "emuldata", "Per-process emulation data"); 190 MALLOC_DEFINE(M_PROC, "proc", "Proc structures"); 191 MALLOC_DEFINE(M_SUBPROC, "subproc", "Proc sub-structures"); 192 193 /* 194 * The process list descriptors, used during pid allocation and 195 * by sysctl. No locking on this data structure is needed since 196 * it is completely static. 197 */ 198 const struct proclist_desc proclists[] = { 199 { &allproc }, 200 { &zombproc }, 201 { NULL }, 202 }; 203 204 static void orphanpg(struct pgrp *); 205 static void pg_delete(pid_t); 206 207 /* 208 * Initialize global process hashing structures. 209 */ 210 void 211 procinit(void) 212 { 213 const struct proclist_desc *pd; 214 int i; 215 #define LINK_EMPTY ((PID_MAX + INITIAL_PID_TABLE_SIZE) & ~(INITIAL_PID_TABLE_SIZE - 1)) 216 217 for (pd = proclists; pd->pd_list != NULL; pd++) 218 LIST_INIT(pd->pd_list); 219 220 spinlockinit(&proclist_lock, "proclk", 0); 221 222 pid_table = malloc(INITIAL_PID_TABLE_SIZE * sizeof *pid_table, 223 M_PROC, M_WAITOK); 224 /* Set free list running through table... 225 Preset 'use count' above PID_MAX so we allocate pid 1 next. */ 226 for (i = 0; i <= pid_tbl_mask; i++) { 227 pid_table[i].pt_proc = P_FREE(LINK_EMPTY + i + 1); 228 pid_table[i].pt_pgrp = 0; 229 } 230 /* slot 0 is just grabbed */ 231 next_free_pt = 1; 232 /* Need to fix last entry. */ 233 last_free_pt = pid_tbl_mask; 234 pid_table[last_free_pt].pt_proc = P_FREE(LINK_EMPTY); 235 /* point at which we grow table - to avoid reusing pids too often */ 236 pid_alloc_lim = pid_tbl_mask - 1; 237 #undef LINK_EMPTY 238 239 LIST_INIT(&alllwp); 240 241 uihashtbl = 242 hashinit(maxproc / 16, HASH_LIST, M_PROC, M_WAITOK, &uihash); 243 } 244 245 /* 246 * Acquire a read lock on the proclist. 247 */ 248 void 249 proclist_lock_read(void) 250 { 251 int error; 252 253 error = spinlockmgr(&proclist_lock, LK_SHARED, NULL); 254 #ifdef DIAGNOSTIC 255 if (__predict_false(error != 0)) 256 panic("proclist_lock_read: failed to acquire lock"); 257 #endif 258 } 259 260 /* 261 * Release a read lock on the proclist. 262 */ 263 void 264 proclist_unlock_read(void) 265 { 266 267 (void) spinlockmgr(&proclist_lock, LK_RELEASE, NULL); 268 } 269 270 /* 271 * Acquire a write lock on the proclist. 272 */ 273 int 274 proclist_lock_write(void) 275 { 276 int s, error; 277 278 s = splclock(); 279 error = spinlockmgr(&proclist_lock, LK_EXCLUSIVE, NULL); 280 #ifdef DIAGNOSTIC 281 if (__predict_false(error != 0)) 282 panic("proclist_lock: failed to acquire lock"); 283 #endif 284 return (s); 285 } 286 287 /* 288 * Release a write lock on the proclist. 289 */ 290 void 291 proclist_unlock_write(int s) 292 { 293 294 (void) spinlockmgr(&proclist_lock, LK_RELEASE, NULL); 295 splx(s); 296 } 297 298 /* 299 * Check that the specified process group is in the session of the 300 * specified process. 301 * Treats -ve ids as process ids. 302 * Used to validate TIOCSPGRP requests. 303 */ 304 int 305 pgid_in_session(struct proc *p, pid_t pg_id) 306 { 307 struct pgrp *pgrp; 308 309 if (pg_id < 0) { 310 struct proc *p1 = pfind(-pg_id); 311 if (p1 == NULL) 312 return EINVAL; 313 pgrp = p1->p_pgrp; 314 } else { 315 pgrp = pgfind(pg_id); 316 if (pgrp == NULL) 317 return EINVAL; 318 } 319 if (pgrp->pg_session != p->p_pgrp->pg_session) 320 return EPERM; 321 return 0; 322 } 323 324 /* 325 * Is p an inferior of q? 326 */ 327 int 328 inferior(struct proc *p, struct proc *q) 329 { 330 331 for (; p != q; p = p->p_pptr) 332 if (p->p_pid == 0) 333 return (0); 334 return (1); 335 } 336 337 /* 338 * Locate a process by number 339 */ 340 struct proc * 341 p_find(pid_t pid, uint flags) 342 { 343 struct proc *p; 344 char stat; 345 346 if (!(flags & PFIND_LOCKED)) 347 proclist_lock_read(); 348 p = pid_table[pid & pid_tbl_mask].pt_proc; 349 /* Only allow live processes to be found by pid. */ 350 if (P_VALID(p) && p->p_pid == pid && 351 ((stat = p->p_stat) == SACTIVE || stat == SSTOP 352 || (stat == SZOMB && (flags & PFIND_ZOMBIE)))) { 353 if (flags & PFIND_UNLOCK_OK) 354 proclist_unlock_read(); 355 return p; 356 } 357 if (flags & PFIND_UNLOCK_FAIL) 358 proclist_unlock_read(); 359 return NULL; 360 } 361 362 363 /* 364 * Locate a process group by number 365 */ 366 struct pgrp * 367 pg_find(pid_t pgid, uint flags) 368 { 369 struct pgrp *pg; 370 371 if (!(flags & PFIND_LOCKED)) 372 proclist_lock_read(); 373 pg = pid_table[pgid & pid_tbl_mask].pt_pgrp; 374 /* 375 * Can't look up a pgrp that only exists because the session 376 * hasn't died yet (traditional) 377 */ 378 if (pg == NULL || pg->pg_id != pgid || LIST_EMPTY(&pg->pg_members)) { 379 if (flags & PFIND_UNLOCK_FAIL) 380 proclist_unlock_read(); 381 return NULL; 382 } 383 384 if (flags & PFIND_UNLOCK_OK) 385 proclist_unlock_read(); 386 return pg; 387 } 388 389 /* 390 * Set entry for process 0 391 */ 392 void 393 proc0_insert(struct proc *p, struct lwp *l, struct pgrp *pgrp, 394 struct session *sess) 395 { 396 int s; 397 398 simple_lock_init(&p->p_lock); 399 LIST_INIT(&p->p_lwps); 400 LIST_INSERT_HEAD(&p->p_lwps, l, l_sibling); 401 p->p_nlwps = 1; 402 simple_lock_init(&p->p_sigctx.ps_silock); 403 CIRCLEQ_INIT(&p->p_sigctx.ps_siginfo); 404 405 s = proclist_lock_write(); 406 407 pid_table[0].pt_proc = p; 408 LIST_INSERT_HEAD(&allproc, p, p_list); 409 LIST_INSERT_HEAD(&alllwp, l, l_list); 410 411 p->p_pgrp = pgrp; 412 pid_table[0].pt_pgrp = pgrp; 413 LIST_INIT(&pgrp->pg_members); 414 LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist); 415 416 pgrp->pg_session = sess; 417 sess->s_count = 1; 418 sess->s_sid = 0; 419 sess->s_leader = p; 420 421 proclist_unlock_write(s); 422 } 423 424 static void 425 expand_pid_table(void) 426 { 427 uint pt_size = pid_tbl_mask + 1; 428 struct pid_table *n_pt, *new_pt; 429 struct proc *proc; 430 struct pgrp *pgrp; 431 int i; 432 int s; 433 pid_t pid; 434 435 new_pt = malloc(pt_size * 2 * sizeof *new_pt, M_PROC, M_WAITOK); 436 437 s = proclist_lock_write(); 438 if (pt_size != pid_tbl_mask + 1) { 439 /* Another process beat us to it... */ 440 proclist_unlock_write(s); 441 FREE(new_pt, M_PROC); 442 return; 443 } 444 445 /* 446 * Copy entries from old table into new one. 447 * If 'pid' is 'odd' we need to place in the upper half, 448 * even pid's to the lower half. 449 * Free items stay in the low half so we don't have to 450 * fixup the reference to them. 451 * We stuff free items on the front of the freelist 452 * because we can't write to unmodified entries. 453 * Processing the table backwards maintains a semblance 454 * of issueing pid numbers that increase with time. 455 */ 456 i = pt_size - 1; 457 n_pt = new_pt + i; 458 for (; ; i--, n_pt--) { 459 proc = pid_table[i].pt_proc; 460 pgrp = pid_table[i].pt_pgrp; 461 if (!P_VALID(proc)) { 462 /* Up 'use count' so that link is valid */ 463 pid = (P_NEXT(proc) + pt_size) & ~pt_size; 464 proc = P_FREE(pid); 465 if (pgrp) 466 pid = pgrp->pg_id; 467 } else 468 pid = proc->p_pid; 469 470 /* Save entry in appropriate half of table */ 471 n_pt[pid & pt_size].pt_proc = proc; 472 n_pt[pid & pt_size].pt_pgrp = pgrp; 473 474 /* Put other piece on start of free list */ 475 pid = (pid ^ pt_size) & ~pid_tbl_mask; 476 n_pt[pid & pt_size].pt_proc = 477 P_FREE((pid & ~pt_size) | next_free_pt); 478 n_pt[pid & pt_size].pt_pgrp = 0; 479 next_free_pt = i | (pid & pt_size); 480 if (i == 0) 481 break; 482 } 483 484 /* Switch tables */ 485 n_pt = pid_table; 486 pid_table = new_pt; 487 pid_tbl_mask = pt_size * 2 - 1; 488 489 /* 490 * pid_max starts as PID_MAX (= 30000), once we have 16384 491 * allocated pids we need it to be larger! 492 */ 493 if (pid_tbl_mask > PID_MAX) { 494 pid_max = pid_tbl_mask * 2 + 1; 495 pid_alloc_lim |= pid_alloc_lim << 1; 496 } else 497 pid_alloc_lim <<= 1; /* doubles number of free slots... */ 498 499 proclist_unlock_write(s); 500 FREE(n_pt, M_PROC); 501 } 502 503 struct proc * 504 proc_alloc(void) 505 { 506 struct proc *p; 507 int s; 508 int nxt; 509 pid_t pid; 510 struct pid_table *pt; 511 512 p = pool_get(&proc_pool, PR_WAITOK); 513 p->p_stat = SIDL; /* protect against others */ 514 515 /* allocate next free pid */ 516 517 for (;;expand_pid_table()) { 518 if (__predict_false(pid_alloc_cnt >= pid_alloc_lim)) 519 /* ensure pids cycle through 2000+ values */ 520 continue; 521 s = proclist_lock_write(); 522 pt = &pid_table[next_free_pt]; 523 #ifdef DIAGNOSTIC 524 if (__predict_false(P_VALID(pt->pt_proc) || pt->pt_pgrp)) 525 panic("proc_alloc: slot busy"); 526 #endif 527 nxt = P_NEXT(pt->pt_proc); 528 if (nxt & pid_tbl_mask) 529 break; 530 /* Table full - expand (NB last entry not used....) */ 531 proclist_unlock_write(s); 532 } 533 534 /* pid is 'saved use count' + 'size' + entry */ 535 pid = (nxt & ~pid_tbl_mask) + pid_tbl_mask + 1 + next_free_pt; 536 if ((uint)pid > (uint)pid_max) 537 pid &= pid_tbl_mask; 538 p->p_pid = pid; 539 next_free_pt = nxt & pid_tbl_mask; 540 541 /* Grab table slot */ 542 pt->pt_proc = p; 543 pid_alloc_cnt++; 544 545 proclist_unlock_write(s); 546 547 return p; 548 } 549 550 /* 551 * Free last resources of a process - called from proc_free (in kern_exit.c) 552 */ 553 void 554 proc_free_mem(struct proc *p) 555 { 556 int s; 557 pid_t pid = p->p_pid; 558 struct pid_table *pt; 559 560 s = proclist_lock_write(); 561 562 pt = &pid_table[pid & pid_tbl_mask]; 563 #ifdef DIAGNOSTIC 564 if (__predict_false(pt->pt_proc != p)) 565 panic("proc_free: pid_table mismatch, pid %x, proc %p", 566 pid, p); 567 #endif 568 /* save pid use count in slot */ 569 pt->pt_proc = P_FREE(pid & ~pid_tbl_mask); 570 571 if (pt->pt_pgrp == NULL) { 572 /* link last freed entry onto ours */ 573 pid &= pid_tbl_mask; 574 pt = &pid_table[last_free_pt]; 575 pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pid); 576 last_free_pt = pid; 577 pid_alloc_cnt--; 578 } 579 580 nprocs--; 581 proclist_unlock_write(s); 582 583 pool_put(&proc_pool, p); 584 } 585 586 /* 587 * Move p to a new or existing process group (and session) 588 * 589 * If we are creating a new pgrp, the pgid should equal 590 * the calling process' pid. 591 * If is only valid to enter a process group that is in the session 592 * of the process. 593 * Also mksess should only be set if we are creating a process group 594 * 595 * Only called from sys_setsid, sys_setpgid/sys_setpgrp and the 596 * SYSV setpgrp support for hpux == enterpgrp(curproc, curproc->p_pid) 597 */ 598 int 599 enterpgrp(struct proc *p, pid_t pgid, int mksess) 600 { 601 struct pgrp *new_pgrp, *pgrp; 602 struct session *sess; 603 struct proc *curp = curproc; 604 pid_t pid = p->p_pid; 605 int rval; 606 int s; 607 pid_t pg_id = NO_PGID; 608 609 /* Allocate data areas we might need before doing any validity checks */ 610 proclist_lock_read(); /* Because pid_table might change */ 611 if (pid_table[pgid & pid_tbl_mask].pt_pgrp == 0) { 612 proclist_unlock_read(); 613 new_pgrp = pool_get(&pgrp_pool, PR_WAITOK); 614 } else { 615 proclist_unlock_read(); 616 new_pgrp = NULL; 617 } 618 if (mksess) 619 sess = pool_get(&session_pool, M_WAITOK); 620 else 621 sess = NULL; 622 623 s = proclist_lock_write(); 624 rval = EPERM; /* most common error (to save typing) */ 625 626 /* Check pgrp exists or can be created */ 627 pgrp = pid_table[pgid & pid_tbl_mask].pt_pgrp; 628 if (pgrp != NULL && pgrp->pg_id != pgid) 629 goto done; 630 631 /* Can only set another process under restricted circumstances. */ 632 if (p != curp) { 633 /* must exist and be one of our children... */ 634 if (p != pid_table[pid & pid_tbl_mask].pt_proc 635 || !inferior(p, curp)) { 636 rval = ESRCH; 637 goto done; 638 } 639 /* ... in the same session... */ 640 if (sess != NULL || p->p_session != curp->p_session) 641 goto done; 642 /* ... existing pgid must be in same session ... */ 643 if (pgrp != NULL && pgrp->pg_session != p->p_session) 644 goto done; 645 /* ... and not done an exec. */ 646 if (p->p_flag & P_EXEC) { 647 rval = EACCES; 648 goto done; 649 } 650 } 651 652 /* Changing the process group/session of a session 653 leader is definitely off limits. */ 654 if (SESS_LEADER(p)) { 655 if (sess == NULL && p->p_pgrp == pgrp) 656 /* unless it's a definite noop */ 657 rval = 0; 658 goto done; 659 } 660 661 /* Can only create a process group with id of process */ 662 if (pgrp == NULL && pgid != pid) 663 goto done; 664 665 /* Can only create a session if creating pgrp */ 666 if (sess != NULL && pgrp != NULL) 667 goto done; 668 669 /* Check we allocated memory for a pgrp... */ 670 if (pgrp == NULL && new_pgrp == NULL) 671 goto done; 672 673 /* Don't attach to 'zombie' pgrp */ 674 if (pgrp != NULL && LIST_EMPTY(&pgrp->pg_members)) 675 goto done; 676 677 /* Expect to succeed now */ 678 rval = 0; 679 680 if (pgrp == p->p_pgrp) 681 /* nothing to do */ 682 goto done; 683 684 /* Ok all setup, link up required structures */ 685 if (pgrp == NULL) { 686 pgrp = new_pgrp; 687 new_pgrp = 0; 688 if (sess != NULL) { 689 sess->s_sid = p->p_pid; 690 sess->s_leader = p; 691 sess->s_count = 1; 692 sess->s_ttyvp = NULL; 693 sess->s_ttyp = NULL; 694 sess->s_flags = p->p_session->s_flags & ~S_LOGIN_SET; 695 memcpy(sess->s_login, p->p_session->s_login, 696 sizeof(sess->s_login)); 697 p->p_flag &= ~P_CONTROLT; 698 } else { 699 sess = p->p_pgrp->pg_session; 700 SESSHOLD(sess); 701 } 702 pgrp->pg_session = sess; 703 sess = 0; 704 705 pgrp->pg_id = pgid; 706 LIST_INIT(&pgrp->pg_members); 707 #ifdef DIAGNOSTIC 708 if (__predict_false(pid_table[pgid & pid_tbl_mask].pt_pgrp)) 709 panic("enterpgrp: pgrp table slot in use"); 710 if (__predict_false(mksess && p != curp)) 711 panic("enterpgrp: mksession and p != curproc"); 712 #endif 713 pid_table[pgid & pid_tbl_mask].pt_pgrp = pgrp; 714 pgrp->pg_jobc = 0; 715 } 716 717 /* 718 * Adjust eligibility of affected pgrps to participate in job control. 719 * Increment eligibility counts before decrementing, otherwise we 720 * could reach 0 spuriously during the first call. 721 */ 722 fixjobc(p, pgrp, 1); 723 fixjobc(p, p->p_pgrp, 0); 724 725 /* Move process to requested group */ 726 LIST_REMOVE(p, p_pglist); 727 if (LIST_EMPTY(&p->p_pgrp->pg_members)) 728 /* defer delete until we've dumped the lock */ 729 pg_id = p->p_pgrp->pg_id; 730 p->p_pgrp = pgrp; 731 LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist); 732 733 done: 734 proclist_unlock_write(s); 735 if (sess != NULL) 736 pool_put(&session_pool, sess); 737 if (new_pgrp != NULL) 738 pool_put(&pgrp_pool, new_pgrp); 739 if (pg_id != NO_PGID) 740 pg_delete(pg_id); 741 #ifdef DEBUG_PGRP 742 if (__predict_false(rval)) 743 printf("enterpgrp(%d,%d,%d), curproc %d, rval %d\n", 744 pid, pgid, mksess, curp->p_pid, rval); 745 #endif 746 return rval; 747 } 748 749 /* 750 * remove process from process group 751 */ 752 int 753 leavepgrp(struct proc *p) 754 { 755 int s; 756 struct pgrp *pgrp; 757 pid_t pg_id; 758 759 s = proclist_lock_write(); 760 pgrp = p->p_pgrp; 761 LIST_REMOVE(p, p_pglist); 762 p->p_pgrp = 0; 763 pg_id = LIST_EMPTY(&pgrp->pg_members) ? pgrp->pg_id : NO_PGID; 764 proclist_unlock_write(s); 765 766 if (pg_id != NO_PGID) 767 pg_delete(pg_id); 768 return 0; 769 } 770 771 static void 772 pg_free(pid_t pg_id) 773 { 774 struct pgrp *pgrp; 775 struct pid_table *pt; 776 int s; 777 778 s = proclist_lock_write(); 779 pt = &pid_table[pg_id & pid_tbl_mask]; 780 pgrp = pt->pt_pgrp; 781 #ifdef DIAGNOSTIC 782 if (__predict_false(!pgrp || pgrp->pg_id != pg_id 783 || !LIST_EMPTY(&pgrp->pg_members))) 784 panic("pg_free: process group absent or has members"); 785 #endif 786 pt->pt_pgrp = 0; 787 788 if (!P_VALID(pt->pt_proc)) { 789 /* orphaned pgrp, put slot onto free list */ 790 #ifdef DIAGNOSTIC 791 if (__predict_false(P_NEXT(pt->pt_proc) & pid_tbl_mask)) 792 panic("pg_free: process slot on free list"); 793 #endif 794 795 pg_id &= pid_tbl_mask; 796 pt = &pid_table[last_free_pt]; 797 pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pg_id); 798 last_free_pt = pg_id; 799 pid_alloc_cnt--; 800 } 801 proclist_unlock_write(s); 802 803 pool_put(&pgrp_pool, pgrp); 804 } 805 806 /* 807 * delete a process group 808 */ 809 static void 810 pg_delete(pid_t pg_id) 811 { 812 struct pgrp *pgrp; 813 struct tty *ttyp; 814 struct session *ss; 815 int s, is_pgrp_leader; 816 817 s = proclist_lock_write(); 818 pgrp = pid_table[pg_id & pid_tbl_mask].pt_pgrp; 819 if (pgrp == NULL || pgrp->pg_id != pg_id || 820 !LIST_EMPTY(&pgrp->pg_members)) { 821 proclist_unlock_write(s); 822 return; 823 } 824 825 ss = pgrp->pg_session; 826 827 /* Remove reference (if any) from tty to this process group */ 828 ttyp = ss->s_ttyp; 829 if (ttyp != NULL && ttyp->t_pgrp == pgrp) { 830 ttyp->t_pgrp = NULL; 831 #ifdef DIAGNOSTIC 832 if (ttyp->t_session != ss) 833 panic("pg_delete: wrong session on terminal"); 834 #endif 835 } 836 837 /* 838 * The leading process group in a session is freed 839 * by sessdelete() if last reference. 840 */ 841 is_pgrp_leader = (ss->s_sid == pgrp->pg_id); 842 proclist_unlock_write(s); 843 SESSRELE(ss); 844 845 if (is_pgrp_leader) 846 return; 847 848 pg_free(pg_id); 849 } 850 851 /* 852 * Delete session - called from SESSRELE when s_count becomes zero. 853 */ 854 void 855 sessdelete(struct session *ss) 856 { 857 /* 858 * We keep the pgrp with the same id as the session in 859 * order to stop a process being given the same pid. 860 * Since the pgrp holds a reference to the session, it 861 * must be a 'zombie' pgrp by now. 862 */ 863 864 pg_free(ss->s_sid); 865 866 pool_put(&session_pool, ss); 867 } 868 869 /* 870 * Adjust pgrp jobc counters when specified process changes process group. 871 * We count the number of processes in each process group that "qualify" 872 * the group for terminal job control (those with a parent in a different 873 * process group of the same session). If that count reaches zero, the 874 * process group becomes orphaned. Check both the specified process' 875 * process group and that of its children. 876 * entering == 0 => p is leaving specified group. 877 * entering == 1 => p is entering specified group. 878 * 879 * Call with proclist_lock held. 880 */ 881 void 882 fixjobc(struct proc *p, struct pgrp *pgrp, int entering) 883 { 884 struct pgrp *hispgrp; 885 struct session *mysession = pgrp->pg_session; 886 struct proc *child; 887 888 /* 889 * Check p's parent to see whether p qualifies its own process 890 * group; if so, adjust count for p's process group. 891 */ 892 hispgrp = p->p_pptr->p_pgrp; 893 if (hispgrp != pgrp && hispgrp->pg_session == mysession) { 894 if (entering) 895 pgrp->pg_jobc++; 896 else if (--pgrp->pg_jobc == 0) 897 orphanpg(pgrp); 898 } 899 900 /* 901 * Check this process' children to see whether they qualify 902 * their process groups; if so, adjust counts for children's 903 * process groups. 904 */ 905 LIST_FOREACH(child, &p->p_children, p_sibling) { 906 hispgrp = child->p_pgrp; 907 if (hispgrp != pgrp && hispgrp->pg_session == mysession && 908 !P_ZOMBIE(child)) { 909 if (entering) 910 hispgrp->pg_jobc++; 911 else if (--hispgrp->pg_jobc == 0) 912 orphanpg(hispgrp); 913 } 914 } 915 } 916 917 /* 918 * A process group has become orphaned; 919 * if there are any stopped processes in the group, 920 * hang-up all process in that group. 921 * 922 * Call with proclist_lock held. 923 */ 924 static void 925 orphanpg(struct pgrp *pg) 926 { 927 struct proc *p; 928 929 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 930 if (p->p_stat == SSTOP) { 931 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 932 psignal(p, SIGHUP); 933 psignal(p, SIGCONT); 934 } 935 return; 936 } 937 } 938 } 939 940 /* mark process as suid/sgid, reset some values to defaults */ 941 void 942 p_sugid(struct proc *p) 943 { 944 struct plimit *lim; 945 char *cn; 946 947 p->p_flag |= P_SUGID; 948 /* reset what needs to be reset in plimit */ 949 lim = p->p_limit; 950 if (lim->pl_corename != defcorename) { 951 if (lim->p_refcnt > 1 && 952 (lim->p_lflags & PL_SHAREMOD) == 0) { 953 p->p_limit = limcopy(lim); 954 limfree(lim); 955 lim = p->p_limit; 956 } 957 simple_lock(&lim->p_slock); 958 cn = lim->pl_corename; 959 lim->pl_corename = defcorename; 960 simple_unlock(&lim->p_slock); 961 if (cn != defcorename) 962 free(cn, M_TEMP); 963 } 964 } 965 966 #ifdef DDB 967 #include <ddb/db_output.h> 968 void pidtbl_dump(void); 969 void 970 pidtbl_dump(void) 971 { 972 struct pid_table *pt; 973 struct proc *p; 974 struct pgrp *pgrp; 975 int id; 976 977 db_printf("pid table %p size %x, next %x, last %x\n", 978 pid_table, pid_tbl_mask+1, 979 next_free_pt, last_free_pt); 980 for (pt = pid_table, id = 0; id <= pid_tbl_mask; id++, pt++) { 981 p = pt->pt_proc; 982 if (!P_VALID(p) && !pt->pt_pgrp) 983 continue; 984 db_printf(" id %x: ", id); 985 if (P_VALID(p)) 986 db_printf("proc %p id %d (0x%x) %s\n", 987 p, p->p_pid, p->p_pid, p->p_comm); 988 else 989 db_printf("next %x use %x\n", 990 P_NEXT(p) & pid_tbl_mask, 991 P_NEXT(p) & ~pid_tbl_mask); 992 if ((pgrp = pt->pt_pgrp)) { 993 db_printf("\tsession %p, sid %d, count %d, login %s\n", 994 pgrp->pg_session, pgrp->pg_session->s_sid, 995 pgrp->pg_session->s_count, 996 pgrp->pg_session->s_login); 997 db_printf("\tpgrp %p, pg_id %d, pg_jobc %d, members %p\n", 998 pgrp, pgrp->pg_id, pgrp->pg_jobc, 999 pgrp->pg_members.lh_first); 1000 for (p = pgrp->pg_members.lh_first; p != 0; 1001 p = p->p_pglist.le_next) { 1002 db_printf("\t\tpid %d addr %p pgrp %p %s\n", 1003 p->p_pid, p, p->p_pgrp, p->p_comm); 1004 } 1005 } 1006 } 1007 } 1008 #endif /* DDB */ 1009 1010 #ifdef KSTACK_CHECK_MAGIC 1011 #include <sys/user.h> 1012 1013 #define KSTACK_MAGIC 0xdeadbeaf 1014 1015 /* XXX should be per process basis? */ 1016 int kstackleftmin = KSTACK_SIZE; 1017 int kstackleftthres = KSTACK_SIZE / 8; /* warn if remaining stack is 1018 less than this */ 1019 1020 void 1021 kstack_setup_magic(const struct lwp *l) 1022 { 1023 u_int32_t *ip; 1024 u_int32_t const *end; 1025 1026 KASSERT(l != NULL); 1027 KASSERT(l != &lwp0); 1028 1029 /* 1030 * fill all the stack with magic number 1031 * so that later modification on it can be detected. 1032 */ 1033 ip = (u_int32_t *)KSTACK_LOWEST_ADDR(l); 1034 end = (u_int32_t *)((caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE); 1035 for (; ip < end; ip++) { 1036 *ip = KSTACK_MAGIC; 1037 } 1038 } 1039 1040 void 1041 kstack_check_magic(const struct lwp *l) 1042 { 1043 u_int32_t const *ip, *end; 1044 int stackleft; 1045 1046 KASSERT(l != NULL); 1047 1048 /* don't check proc0 */ /*XXX*/ 1049 if (l == &lwp0) 1050 return; 1051 1052 #ifdef __MACHINE_STACK_GROWS_UP 1053 /* stack grows upwards (eg. hppa) */ 1054 ip = (u_int32_t *)((caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE); 1055 end = (u_int32_t *)KSTACK_LOWEST_ADDR(l); 1056 for (ip--; ip >= end; ip--) 1057 if (*ip != KSTACK_MAGIC) 1058 break; 1059 1060 stackleft = (caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE - (caddr_t)ip; 1061 #else /* __MACHINE_STACK_GROWS_UP */ 1062 /* stack grows downwards (eg. i386) */ 1063 ip = (u_int32_t *)KSTACK_LOWEST_ADDR(l); 1064 end = (u_int32_t *)((caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE); 1065 for (; ip < end; ip++) 1066 if (*ip != KSTACK_MAGIC) 1067 break; 1068 1069 stackleft = (caddr_t)ip - KSTACK_LOWEST_ADDR(l); 1070 #endif /* __MACHINE_STACK_GROWS_UP */ 1071 1072 if (kstackleftmin > stackleft) { 1073 kstackleftmin = stackleft; 1074 if (stackleft < kstackleftthres) 1075 printf("warning: kernel stack left %d bytes" 1076 "(pid %u:lid %u)\n", stackleft, 1077 (u_int)l->l_proc->p_pid, (u_int)l->l_lid); 1078 } 1079 1080 if (stackleft <= 0) { 1081 panic("magic on the top of kernel stack changed for " 1082 "pid %u, lid %u: maybe kernel stack overflow", 1083 (u_int)l->l_proc->p_pid, (u_int)l->l_lid); 1084 } 1085 } 1086 #endif /* KSTACK_CHECK_MAGIC */ 1087 1088 /* XXX shouldn't be here */ 1089 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG) 1090 #define PROCLIST_ASSERT_LOCKED_READ() \ 1091 KASSERT(lockstatus(&proclist_lock) == LK_SHARED) 1092 #else 1093 #define PROCLIST_ASSERT_LOCKED_READ() /* nothing */ 1094 #endif 1095 1096 int 1097 proclist_foreach_call(struct proclist *list, 1098 int (*callback)(struct proc *, void *arg), void *arg) 1099 { 1100 struct proc marker; 1101 struct proc *p; 1102 struct lwp * const l = curlwp; 1103 int ret = 0; 1104 1105 marker.p_flag = P_MARKER; 1106 PHOLD(l); 1107 proclist_lock_read(); 1108 for (p = LIST_FIRST(list); ret == 0 && p != NULL;) { 1109 if (p->p_flag & P_MARKER) { 1110 p = LIST_NEXT(p, p_list); 1111 continue; 1112 } 1113 LIST_INSERT_AFTER(p, &marker, p_list); 1114 ret = (*callback)(p, arg); 1115 PROCLIST_ASSERT_LOCKED_READ(); 1116 p = LIST_NEXT(&marker, p_list); 1117 LIST_REMOVE(&marker, p_list); 1118 } 1119 proclist_unlock_read(); 1120 PRELE(l); 1121 1122 return ret; 1123 } 1124