1 /* $NetBSD: kern_proc.c,v 1.91 2006/06/25 08:12:54 yamt Exp $ */ 2 3 /*- 4 * Copyright (c) 1999 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1982, 1986, 1989, 1991, 1993 42 * The Regents of the University of California. All rights reserved. 43 * 44 * Redistribution and use in source and binary forms, with or without 45 * modification, are permitted provided that the following conditions 46 * are met: 47 * 1. Redistributions of source code must retain the above copyright 48 * notice, this list of conditions and the following disclaimer. 49 * 2. Redistributions in binary form must reproduce the above copyright 50 * notice, this list of conditions and the following disclaimer in the 51 * documentation and/or other materials provided with the distribution. 52 * 3. Neither the name of the University nor the names of its contributors 53 * may be used to endorse or promote products derived from this software 54 * without specific prior written permission. 55 * 56 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 57 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 58 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 59 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 60 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 61 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 62 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 64 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 65 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 66 * SUCH DAMAGE. 67 * 68 * @(#)kern_proc.c 8.7 (Berkeley) 2/14/95 69 */ 70 71 #include <sys/cdefs.h> 72 __KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.91 2006/06/25 08:12:54 yamt Exp $"); 73 74 #include "opt_kstack.h" 75 #include "opt_maxuprc.h" 76 #include "opt_multiprocessor.h" 77 #include "opt_lockdebug.h" 78 79 #include <sys/param.h> 80 #include <sys/systm.h> 81 #include <sys/kernel.h> 82 #include <sys/proc.h> 83 #include <sys/resourcevar.h> 84 #include <sys/buf.h> 85 #include <sys/acct.h> 86 #include <sys/wait.h> 87 #include <sys/file.h> 88 #include <ufs/ufs/quota.h> 89 #include <sys/uio.h> 90 #include <sys/malloc.h> 91 #include <sys/pool.h> 92 #include <sys/mbuf.h> 93 #include <sys/ioctl.h> 94 #include <sys/tty.h> 95 #include <sys/signalvar.h> 96 #include <sys/ras.h> 97 #include <sys/sa.h> 98 #include <sys/savar.h> 99 #include <sys/filedesc.h> 100 #include <sys/kauth.h> 101 102 #include <uvm/uvm.h> 103 #include <uvm/uvm_extern.h> 104 105 /* 106 * Other process lists 107 */ 108 109 struct proclist allproc; 110 struct proclist zombproc; /* resources have been freed */ 111 112 113 /* 114 * Process list locking: 115 * 116 * We have two types of locks on the proclists: read locks and write 117 * locks. Read locks can be used in interrupt context, so while we 118 * hold the write lock, we must also block clock interrupts to 119 * lock out any scheduling changes that may happen in interrupt 120 * context. 121 * 122 * The proclist lock locks the following structures: 123 * 124 * allproc 125 * zombproc 126 * pid_table 127 */ 128 struct lock proclist_lock; 129 130 /* 131 * pid to proc lookup is done by indexing the pid_table array. 132 * Since pid numbers are only allocated when an empty slot 133 * has been found, there is no need to search any lists ever. 134 * (an orphaned pgrp will lock the slot, a session will lock 135 * the pgrp with the same number.) 136 * If the table is too small it is reallocated with twice the 137 * previous size and the entries 'unzipped' into the two halves. 138 * A linked list of free entries is passed through the pt_proc 139 * field of 'free' items - set odd to be an invalid ptr. 140 */ 141 142 struct pid_table { 143 struct proc *pt_proc; 144 struct pgrp *pt_pgrp; 145 }; 146 #if 1 /* strongly typed cast - should be a noop */ 147 static inline uint p2u(struct proc *p) { return (uint)(uintptr_t)p; } 148 #else 149 #define p2u(p) ((uint)p) 150 #endif 151 #define P_VALID(p) (!(p2u(p) & 1)) 152 #define P_NEXT(p) (p2u(p) >> 1) 153 #define P_FREE(pid) ((struct proc *)(uintptr_t)((pid) << 1 | 1)) 154 155 #define INITIAL_PID_TABLE_SIZE (1 << 5) 156 static struct pid_table *pid_table; 157 static uint pid_tbl_mask = INITIAL_PID_TABLE_SIZE - 1; 158 static uint pid_alloc_lim; /* max we allocate before growing table */ 159 static uint pid_alloc_cnt; /* number of allocated pids */ 160 161 /* links through free slots - never empty! */ 162 static uint next_free_pt, last_free_pt; 163 static pid_t pid_max = PID_MAX; /* largest value we allocate */ 164 165 /* Components of the first process -- never freed. */ 166 struct session session0; 167 struct pgrp pgrp0; 168 struct proc proc0; 169 struct lwp lwp0; 170 kauth_cred_t cred0; 171 struct filedesc0 filedesc0; 172 struct cwdinfo cwdi0; 173 struct plimit limit0; 174 struct pstats pstat0; 175 struct vmspace vmspace0; 176 struct sigacts sigacts0; 177 178 extern struct user *proc0paddr; 179 180 extern const struct emul emul_netbsd; /* defined in kern_exec.c */ 181 182 int nofile = NOFILE; 183 int maxuprc = MAXUPRC; 184 int cmask = CMASK; 185 186 POOL_INIT(proc_pool, sizeof(struct proc), 0, 0, 0, "procpl", 187 &pool_allocator_nointr); 188 POOL_INIT(lwp_pool, sizeof(struct lwp), 0, 0, 0, "lwppl", 189 &pool_allocator_nointr); 190 POOL_INIT(lwp_uc_pool, sizeof(ucontext_t), 0, 0, 0, "lwpucpl", 191 &pool_allocator_nointr); 192 POOL_INIT(pgrp_pool, sizeof(struct pgrp), 0, 0, 0, "pgrppl", 193 &pool_allocator_nointr); 194 POOL_INIT(plimit_pool, sizeof(struct plimit), 0, 0, 0, "plimitpl", 195 &pool_allocator_nointr); 196 POOL_INIT(pstats_pool, sizeof(struct pstats), 0, 0, 0, "pstatspl", 197 &pool_allocator_nointr); 198 POOL_INIT(rusage_pool, sizeof(struct rusage), 0, 0, 0, "rusgepl", 199 &pool_allocator_nointr); 200 POOL_INIT(ras_pool, sizeof(struct ras), 0, 0, 0, "raspl", 201 &pool_allocator_nointr); 202 POOL_INIT(session_pool, sizeof(struct session), 0, 0, 0, "sessionpl", 203 &pool_allocator_nointr); 204 205 MALLOC_DEFINE(M_EMULDATA, "emuldata", "Per-process emulation data"); 206 MALLOC_DEFINE(M_PROC, "proc", "Proc structures"); 207 MALLOC_DEFINE(M_SUBPROC, "subproc", "Proc sub-structures"); 208 209 /* 210 * The process list descriptors, used during pid allocation and 211 * by sysctl. No locking on this data structure is needed since 212 * it is completely static. 213 */ 214 const struct proclist_desc proclists[] = { 215 { &allproc }, 216 { &zombproc }, 217 { NULL }, 218 }; 219 220 static void orphanpg(struct pgrp *); 221 static void pg_delete(pid_t); 222 223 /* 224 * Initialize global process hashing structures. 225 */ 226 void 227 procinit(void) 228 { 229 const struct proclist_desc *pd; 230 int i; 231 #define LINK_EMPTY ((PID_MAX + INITIAL_PID_TABLE_SIZE) & ~(INITIAL_PID_TABLE_SIZE - 1)) 232 233 for (pd = proclists; pd->pd_list != NULL; pd++) 234 LIST_INIT(pd->pd_list); 235 236 spinlockinit(&proclist_lock, "proclk", 0); 237 238 pid_table = malloc(INITIAL_PID_TABLE_SIZE * sizeof *pid_table, 239 M_PROC, M_WAITOK); 240 /* Set free list running through table... 241 Preset 'use count' above PID_MAX so we allocate pid 1 next. */ 242 for (i = 0; i <= pid_tbl_mask; i++) { 243 pid_table[i].pt_proc = P_FREE(LINK_EMPTY + i + 1); 244 pid_table[i].pt_pgrp = 0; 245 } 246 /* slot 0 is just grabbed */ 247 next_free_pt = 1; 248 /* Need to fix last entry. */ 249 last_free_pt = pid_tbl_mask; 250 pid_table[last_free_pt].pt_proc = P_FREE(LINK_EMPTY); 251 /* point at which we grow table - to avoid reusing pids too often */ 252 pid_alloc_lim = pid_tbl_mask - 1; 253 #undef LINK_EMPTY 254 255 LIST_INIT(&alllwp); 256 257 uihashtbl = 258 hashinit(maxproc / 16, HASH_LIST, M_PROC, M_WAITOK, &uihash); 259 } 260 261 /* 262 * Initialize process 0. 263 */ 264 void 265 proc0_init(void) 266 { 267 struct proc *p; 268 struct pgrp *pg; 269 struct session *sess; 270 struct lwp *l; 271 int s; 272 u_int i; 273 rlim_t lim; 274 275 p = &proc0; 276 pg = &pgrp0; 277 sess = &session0; 278 l = &lwp0; 279 280 simple_lock_init(&p->p_lock); 281 LIST_INIT(&p->p_lwps); 282 LIST_INSERT_HEAD(&p->p_lwps, l, l_sibling); 283 p->p_nlwps = 1; 284 simple_lock_init(&p->p_sigctx.ps_silock); 285 CIRCLEQ_INIT(&p->p_sigctx.ps_siginfo); 286 287 s = proclist_lock_write(); 288 289 pid_table[0].pt_proc = p; 290 LIST_INSERT_HEAD(&allproc, p, p_list); 291 LIST_INSERT_HEAD(&alllwp, l, l_list); 292 293 p->p_pgrp = pg; 294 pid_table[0].pt_pgrp = pg; 295 LIST_INIT(&pg->pg_members); 296 LIST_INSERT_HEAD(&pg->pg_members, p, p_pglist); 297 298 pg->pg_session = sess; 299 sess->s_count = 1; 300 sess->s_sid = 0; 301 sess->s_leader = p; 302 303 proclist_unlock_write(s); 304 305 /* 306 * Set P_NOCLDWAIT so that kernel threads are reparented to 307 * init(8) when they exit. init(8) can easily wait them out 308 * for us. 309 */ 310 p->p_flag = P_SYSTEM | P_NOCLDWAIT; 311 p->p_stat = SACTIVE; 312 p->p_nice = NZERO; 313 p->p_emul = &emul_netbsd; 314 #ifdef __HAVE_SYSCALL_INTERN 315 (*p->p_emul->e_syscall_intern)(p); 316 #endif 317 strncpy(p->p_comm, "swapper", MAXCOMLEN); 318 319 l->l_flag = L_INMEM; 320 l->l_stat = LSONPROC; 321 p->p_nrlwps = 1; 322 323 callout_init(&l->l_tsleep_ch); 324 325 /* Create credentials. */ 326 cred0 = kauth_cred_alloc(); 327 p->p_cred = cred0; 328 329 /* Create the CWD info. */ 330 p->p_cwdi = &cwdi0; 331 cwdi0.cwdi_cmask = cmask; 332 cwdi0.cwdi_refcnt = 1; 333 simple_lock_init(&cwdi0.cwdi_slock); 334 335 /* Create the limits structures. */ 336 p->p_limit = &limit0; 337 simple_lock_init(&limit0.p_slock); 338 for (i = 0; i < sizeof(p->p_rlimit)/sizeof(p->p_rlimit[0]); i++) 339 limit0.pl_rlimit[i].rlim_cur = 340 limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY; 341 342 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles; 343 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur = 344 maxfiles < nofile ? maxfiles : nofile; 345 346 limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc; 347 limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur = 348 maxproc < maxuprc ? maxproc : maxuprc; 349 350 lim = ptoa(uvmexp.free); 351 limit0.pl_rlimit[RLIMIT_RSS].rlim_max = lim; 352 limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_max = lim; 353 limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = lim / 3; 354 limit0.pl_corename = defcorename; 355 limit0.p_refcnt = 1; 356 357 /* Configure virtual memory system, set vm rlimits. */ 358 uvm_init_limits(p); 359 360 /* Initialize file descriptor table for proc0. */ 361 p->p_fd = &filedesc0.fd_fd; 362 fdinit1(&filedesc0); 363 364 /* 365 * Initialize proc0's vmspace, which uses the kernel pmap. 366 * All kernel processes (which never have user space mappings) 367 * share proc0's vmspace, and thus, the kernel pmap. 368 */ 369 uvmspace_init(&vmspace0, pmap_kernel(), round_page(VM_MIN_ADDRESS), 370 trunc_page(VM_MAX_ADDRESS)); 371 p->p_vmspace = &vmspace0; 372 373 l->l_addr = proc0paddr; /* XXX */ 374 375 p->p_stats = &pstat0; 376 377 /* Initialize signal state for proc0. */ 378 p->p_sigacts = &sigacts0; 379 siginit(p); 380 } 381 382 /* 383 * Acquire a read lock on the proclist. 384 */ 385 void 386 proclist_lock_read(void) 387 { 388 int error; 389 390 error = spinlockmgr(&proclist_lock, LK_SHARED, NULL); 391 #ifdef DIAGNOSTIC 392 if (__predict_false(error != 0)) 393 panic("proclist_lock_read: failed to acquire lock"); 394 #endif 395 } 396 397 /* 398 * Release a read lock on the proclist. 399 */ 400 void 401 proclist_unlock_read(void) 402 { 403 404 (void) spinlockmgr(&proclist_lock, LK_RELEASE, NULL); 405 } 406 407 /* 408 * Acquire a write lock on the proclist. 409 */ 410 int 411 proclist_lock_write(void) 412 { 413 int s, error; 414 415 s = splclock(); 416 error = spinlockmgr(&proclist_lock, LK_EXCLUSIVE, NULL); 417 #ifdef DIAGNOSTIC 418 if (__predict_false(error != 0)) 419 panic("proclist_lock: failed to acquire lock"); 420 #endif 421 return s; 422 } 423 424 /* 425 * Release a write lock on the proclist. 426 */ 427 void 428 proclist_unlock_write(int s) 429 { 430 431 (void) spinlockmgr(&proclist_lock, LK_RELEASE, NULL); 432 splx(s); 433 } 434 435 /* 436 * Check that the specified process group is in the session of the 437 * specified process. 438 * Treats -ve ids as process ids. 439 * Used to validate TIOCSPGRP requests. 440 */ 441 int 442 pgid_in_session(struct proc *p, pid_t pg_id) 443 { 444 struct pgrp *pgrp; 445 446 if (pg_id < 0) { 447 struct proc *p1 = pfind(-pg_id); 448 if (p1 == NULL) 449 return EINVAL; 450 pgrp = p1->p_pgrp; 451 } else { 452 pgrp = pgfind(pg_id); 453 if (pgrp == NULL) 454 return EINVAL; 455 } 456 if (pgrp->pg_session != p->p_pgrp->pg_session) 457 return EPERM; 458 return 0; 459 } 460 461 /* 462 * Is p an inferior of q? 463 */ 464 int 465 inferior(struct proc *p, struct proc *q) 466 { 467 468 for (; p != q; p = p->p_pptr) 469 if (p->p_pid == 0) 470 return 0; 471 return 1; 472 } 473 474 /* 475 * Locate a process by number 476 */ 477 struct proc * 478 p_find(pid_t pid, uint flags) 479 { 480 struct proc *p; 481 char stat; 482 483 if (!(flags & PFIND_LOCKED)) 484 proclist_lock_read(); 485 p = pid_table[pid & pid_tbl_mask].pt_proc; 486 /* Only allow live processes to be found by pid. */ 487 if (P_VALID(p) && p->p_pid == pid && 488 ((stat = p->p_stat) == SACTIVE || stat == SSTOP 489 || (stat == SZOMB && (flags & PFIND_ZOMBIE)))) { 490 if (flags & PFIND_UNLOCK_OK) 491 proclist_unlock_read(); 492 return p; 493 } 494 if (flags & PFIND_UNLOCK_FAIL) 495 proclist_unlock_read(); 496 return NULL; 497 } 498 499 500 /* 501 * Locate a process group by number 502 */ 503 struct pgrp * 504 pg_find(pid_t pgid, uint flags) 505 { 506 struct pgrp *pg; 507 508 if (!(flags & PFIND_LOCKED)) 509 proclist_lock_read(); 510 pg = pid_table[pgid & pid_tbl_mask].pt_pgrp; 511 /* 512 * Can't look up a pgrp that only exists because the session 513 * hasn't died yet (traditional) 514 */ 515 if (pg == NULL || pg->pg_id != pgid || LIST_EMPTY(&pg->pg_members)) { 516 if (flags & PFIND_UNLOCK_FAIL) 517 proclist_unlock_read(); 518 return NULL; 519 } 520 521 if (flags & PFIND_UNLOCK_OK) 522 proclist_unlock_read(); 523 return pg; 524 } 525 526 static void 527 expand_pid_table(void) 528 { 529 uint pt_size = pid_tbl_mask + 1; 530 struct pid_table *n_pt, *new_pt; 531 struct proc *proc; 532 struct pgrp *pgrp; 533 int i; 534 int s; 535 pid_t pid; 536 537 new_pt = malloc(pt_size * 2 * sizeof *new_pt, M_PROC, M_WAITOK); 538 539 s = proclist_lock_write(); 540 if (pt_size != pid_tbl_mask + 1) { 541 /* Another process beat us to it... */ 542 proclist_unlock_write(s); 543 FREE(new_pt, M_PROC); 544 return; 545 } 546 547 /* 548 * Copy entries from old table into new one. 549 * If 'pid' is 'odd' we need to place in the upper half, 550 * even pid's to the lower half. 551 * Free items stay in the low half so we don't have to 552 * fixup the reference to them. 553 * We stuff free items on the front of the freelist 554 * because we can't write to unmodified entries. 555 * Processing the table backwards maintains a semblance 556 * of issueing pid numbers that increase with time. 557 */ 558 i = pt_size - 1; 559 n_pt = new_pt + i; 560 for (; ; i--, n_pt--) { 561 proc = pid_table[i].pt_proc; 562 pgrp = pid_table[i].pt_pgrp; 563 if (!P_VALID(proc)) { 564 /* Up 'use count' so that link is valid */ 565 pid = (P_NEXT(proc) + pt_size) & ~pt_size; 566 proc = P_FREE(pid); 567 if (pgrp) 568 pid = pgrp->pg_id; 569 } else 570 pid = proc->p_pid; 571 572 /* Save entry in appropriate half of table */ 573 n_pt[pid & pt_size].pt_proc = proc; 574 n_pt[pid & pt_size].pt_pgrp = pgrp; 575 576 /* Put other piece on start of free list */ 577 pid = (pid ^ pt_size) & ~pid_tbl_mask; 578 n_pt[pid & pt_size].pt_proc = 579 P_FREE((pid & ~pt_size) | next_free_pt); 580 n_pt[pid & pt_size].pt_pgrp = 0; 581 next_free_pt = i | (pid & pt_size); 582 if (i == 0) 583 break; 584 } 585 586 /* Switch tables */ 587 n_pt = pid_table; 588 pid_table = new_pt; 589 pid_tbl_mask = pt_size * 2 - 1; 590 591 /* 592 * pid_max starts as PID_MAX (= 30000), once we have 16384 593 * allocated pids we need it to be larger! 594 */ 595 if (pid_tbl_mask > PID_MAX) { 596 pid_max = pid_tbl_mask * 2 + 1; 597 pid_alloc_lim |= pid_alloc_lim << 1; 598 } else 599 pid_alloc_lim <<= 1; /* doubles number of free slots... */ 600 601 proclist_unlock_write(s); 602 FREE(n_pt, M_PROC); 603 } 604 605 struct proc * 606 proc_alloc(void) 607 { 608 struct proc *p; 609 int s; 610 int nxt; 611 pid_t pid; 612 struct pid_table *pt; 613 614 p = pool_get(&proc_pool, PR_WAITOK); 615 p->p_stat = SIDL; /* protect against others */ 616 617 /* allocate next free pid */ 618 619 for (;;expand_pid_table()) { 620 if (__predict_false(pid_alloc_cnt >= pid_alloc_lim)) 621 /* ensure pids cycle through 2000+ values */ 622 continue; 623 s = proclist_lock_write(); 624 pt = &pid_table[next_free_pt]; 625 #ifdef DIAGNOSTIC 626 if (__predict_false(P_VALID(pt->pt_proc) || pt->pt_pgrp)) 627 panic("proc_alloc: slot busy"); 628 #endif 629 nxt = P_NEXT(pt->pt_proc); 630 if (nxt & pid_tbl_mask) 631 break; 632 /* Table full - expand (NB last entry not used....) */ 633 proclist_unlock_write(s); 634 } 635 636 /* pid is 'saved use count' + 'size' + entry */ 637 pid = (nxt & ~pid_tbl_mask) + pid_tbl_mask + 1 + next_free_pt; 638 if ((uint)pid > (uint)pid_max) 639 pid &= pid_tbl_mask; 640 p->p_pid = pid; 641 next_free_pt = nxt & pid_tbl_mask; 642 643 /* Grab table slot */ 644 pt->pt_proc = p; 645 pid_alloc_cnt++; 646 647 proclist_unlock_write(s); 648 649 return p; 650 } 651 652 /* 653 * Free last resources of a process - called from proc_free (in kern_exit.c) 654 */ 655 void 656 proc_free_mem(struct proc *p) 657 { 658 int s; 659 pid_t pid = p->p_pid; 660 struct pid_table *pt; 661 662 s = proclist_lock_write(); 663 664 pt = &pid_table[pid & pid_tbl_mask]; 665 #ifdef DIAGNOSTIC 666 if (__predict_false(pt->pt_proc != p)) 667 panic("proc_free: pid_table mismatch, pid %x, proc %p", 668 pid, p); 669 #endif 670 /* save pid use count in slot */ 671 pt->pt_proc = P_FREE(pid & ~pid_tbl_mask); 672 673 if (pt->pt_pgrp == NULL) { 674 /* link last freed entry onto ours */ 675 pid &= pid_tbl_mask; 676 pt = &pid_table[last_free_pt]; 677 pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pid); 678 last_free_pt = pid; 679 pid_alloc_cnt--; 680 } 681 682 nprocs--; 683 proclist_unlock_write(s); 684 685 pool_put(&proc_pool, p); 686 } 687 688 /* 689 * Move p to a new or existing process group (and session) 690 * 691 * If we are creating a new pgrp, the pgid should equal 692 * the calling process' pid. 693 * If is only valid to enter a process group that is in the session 694 * of the process. 695 * Also mksess should only be set if we are creating a process group 696 * 697 * Only called from sys_setsid, sys_setpgid/sys_setpgrp and the 698 * SYSV setpgrp support for hpux == enterpgrp(curproc, curproc->p_pid) 699 */ 700 int 701 enterpgrp(struct proc *p, pid_t pgid, int mksess) 702 { 703 struct pgrp *new_pgrp, *pgrp; 704 struct session *sess; 705 struct proc *curp = curproc; 706 pid_t pid = p->p_pid; 707 int rval; 708 int s; 709 pid_t pg_id = NO_PGID; 710 711 /* Allocate data areas we might need before doing any validity checks */ 712 proclist_lock_read(); /* Because pid_table might change */ 713 if (pid_table[pgid & pid_tbl_mask].pt_pgrp == 0) { 714 proclist_unlock_read(); 715 new_pgrp = pool_get(&pgrp_pool, PR_WAITOK); 716 } else { 717 proclist_unlock_read(); 718 new_pgrp = NULL; 719 } 720 if (mksess) 721 sess = pool_get(&session_pool, M_WAITOK); 722 else 723 sess = NULL; 724 725 s = proclist_lock_write(); 726 rval = EPERM; /* most common error (to save typing) */ 727 728 /* Check pgrp exists or can be created */ 729 pgrp = pid_table[pgid & pid_tbl_mask].pt_pgrp; 730 if (pgrp != NULL && pgrp->pg_id != pgid) 731 goto done; 732 733 /* Can only set another process under restricted circumstances. */ 734 if (p != curp) { 735 /* must exist and be one of our children... */ 736 if (p != pid_table[pid & pid_tbl_mask].pt_proc 737 || !inferior(p, curp)) { 738 rval = ESRCH; 739 goto done; 740 } 741 /* ... in the same session... */ 742 if (sess != NULL || p->p_session != curp->p_session) 743 goto done; 744 /* ... existing pgid must be in same session ... */ 745 if (pgrp != NULL && pgrp->pg_session != p->p_session) 746 goto done; 747 /* ... and not done an exec. */ 748 if (p->p_flag & P_EXEC) { 749 rval = EACCES; 750 goto done; 751 } 752 } 753 754 /* Changing the process group/session of a session 755 leader is definitely off limits. */ 756 if (SESS_LEADER(p)) { 757 if (sess == NULL && p->p_pgrp == pgrp) 758 /* unless it's a definite noop */ 759 rval = 0; 760 goto done; 761 } 762 763 /* Can only create a process group with id of process */ 764 if (pgrp == NULL && pgid != pid) 765 goto done; 766 767 /* Can only create a session if creating pgrp */ 768 if (sess != NULL && pgrp != NULL) 769 goto done; 770 771 /* Check we allocated memory for a pgrp... */ 772 if (pgrp == NULL && new_pgrp == NULL) 773 goto done; 774 775 /* Don't attach to 'zombie' pgrp */ 776 if (pgrp != NULL && LIST_EMPTY(&pgrp->pg_members)) 777 goto done; 778 779 /* Expect to succeed now */ 780 rval = 0; 781 782 if (pgrp == p->p_pgrp) 783 /* nothing to do */ 784 goto done; 785 786 /* Ok all setup, link up required structures */ 787 if (pgrp == NULL) { 788 pgrp = new_pgrp; 789 new_pgrp = 0; 790 if (sess != NULL) { 791 sess->s_sid = p->p_pid; 792 sess->s_leader = p; 793 sess->s_count = 1; 794 sess->s_ttyvp = NULL; 795 sess->s_ttyp = NULL; 796 sess->s_flags = p->p_session->s_flags & ~S_LOGIN_SET; 797 memcpy(sess->s_login, p->p_session->s_login, 798 sizeof(sess->s_login)); 799 p->p_flag &= ~P_CONTROLT; 800 } else { 801 sess = p->p_pgrp->pg_session; 802 SESSHOLD(sess); 803 } 804 pgrp->pg_session = sess; 805 sess = 0; 806 807 pgrp->pg_id = pgid; 808 LIST_INIT(&pgrp->pg_members); 809 #ifdef DIAGNOSTIC 810 if (__predict_false(pid_table[pgid & pid_tbl_mask].pt_pgrp)) 811 panic("enterpgrp: pgrp table slot in use"); 812 if (__predict_false(mksess && p != curp)) 813 panic("enterpgrp: mksession and p != curproc"); 814 #endif 815 pid_table[pgid & pid_tbl_mask].pt_pgrp = pgrp; 816 pgrp->pg_jobc = 0; 817 } 818 819 /* 820 * Adjust eligibility of affected pgrps to participate in job control. 821 * Increment eligibility counts before decrementing, otherwise we 822 * could reach 0 spuriously during the first call. 823 */ 824 fixjobc(p, pgrp, 1); 825 fixjobc(p, p->p_pgrp, 0); 826 827 /* Move process to requested group */ 828 LIST_REMOVE(p, p_pglist); 829 if (LIST_EMPTY(&p->p_pgrp->pg_members)) 830 /* defer delete until we've dumped the lock */ 831 pg_id = p->p_pgrp->pg_id; 832 p->p_pgrp = pgrp; 833 LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist); 834 835 done: 836 proclist_unlock_write(s); 837 if (sess != NULL) 838 pool_put(&session_pool, sess); 839 if (new_pgrp != NULL) 840 pool_put(&pgrp_pool, new_pgrp); 841 if (pg_id != NO_PGID) 842 pg_delete(pg_id); 843 #ifdef DEBUG_PGRP 844 if (__predict_false(rval)) 845 printf("enterpgrp(%d,%d,%d), curproc %d, rval %d\n", 846 pid, pgid, mksess, curp->p_pid, rval); 847 #endif 848 return rval; 849 } 850 851 /* 852 * remove process from process group 853 */ 854 int 855 leavepgrp(struct proc *p) 856 { 857 int s; 858 struct pgrp *pgrp; 859 pid_t pg_id; 860 861 s = proclist_lock_write(); 862 pgrp = p->p_pgrp; 863 LIST_REMOVE(p, p_pglist); 864 p->p_pgrp = 0; 865 pg_id = LIST_EMPTY(&pgrp->pg_members) ? pgrp->pg_id : NO_PGID; 866 proclist_unlock_write(s); 867 868 if (pg_id != NO_PGID) 869 pg_delete(pg_id); 870 return 0; 871 } 872 873 static void 874 pg_free(pid_t pg_id) 875 { 876 struct pgrp *pgrp; 877 struct pid_table *pt; 878 int s; 879 880 s = proclist_lock_write(); 881 pt = &pid_table[pg_id & pid_tbl_mask]; 882 pgrp = pt->pt_pgrp; 883 #ifdef DIAGNOSTIC 884 if (__predict_false(!pgrp || pgrp->pg_id != pg_id 885 || !LIST_EMPTY(&pgrp->pg_members))) 886 panic("pg_free: process group absent or has members"); 887 #endif 888 pt->pt_pgrp = 0; 889 890 if (!P_VALID(pt->pt_proc)) { 891 /* orphaned pgrp, put slot onto free list */ 892 #ifdef DIAGNOSTIC 893 if (__predict_false(P_NEXT(pt->pt_proc) & pid_tbl_mask)) 894 panic("pg_free: process slot on free list"); 895 #endif 896 897 pg_id &= pid_tbl_mask; 898 pt = &pid_table[last_free_pt]; 899 pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pg_id); 900 last_free_pt = pg_id; 901 pid_alloc_cnt--; 902 } 903 proclist_unlock_write(s); 904 905 pool_put(&pgrp_pool, pgrp); 906 } 907 908 /* 909 * delete a process group 910 */ 911 static void 912 pg_delete(pid_t pg_id) 913 { 914 struct pgrp *pgrp; 915 struct tty *ttyp; 916 struct session *ss; 917 int s, is_pgrp_leader; 918 919 s = proclist_lock_write(); 920 pgrp = pid_table[pg_id & pid_tbl_mask].pt_pgrp; 921 if (pgrp == NULL || pgrp->pg_id != pg_id || 922 !LIST_EMPTY(&pgrp->pg_members)) { 923 proclist_unlock_write(s); 924 return; 925 } 926 927 ss = pgrp->pg_session; 928 929 /* Remove reference (if any) from tty to this process group */ 930 ttyp = ss->s_ttyp; 931 if (ttyp != NULL && ttyp->t_pgrp == pgrp) { 932 ttyp->t_pgrp = NULL; 933 #ifdef DIAGNOSTIC 934 if (ttyp->t_session != ss) 935 panic("pg_delete: wrong session on terminal"); 936 #endif 937 } 938 939 /* 940 * The leading process group in a session is freed 941 * by sessdelete() if last reference. 942 */ 943 is_pgrp_leader = (ss->s_sid == pgrp->pg_id); 944 proclist_unlock_write(s); 945 SESSRELE(ss); 946 947 if (is_pgrp_leader) 948 return; 949 950 pg_free(pg_id); 951 } 952 953 /* 954 * Delete session - called from SESSRELE when s_count becomes zero. 955 */ 956 void 957 sessdelete(struct session *ss) 958 { 959 /* 960 * We keep the pgrp with the same id as the session in 961 * order to stop a process being given the same pid. 962 * Since the pgrp holds a reference to the session, it 963 * must be a 'zombie' pgrp by now. 964 */ 965 966 pg_free(ss->s_sid); 967 968 pool_put(&session_pool, ss); 969 } 970 971 /* 972 * Adjust pgrp jobc counters when specified process changes process group. 973 * We count the number of processes in each process group that "qualify" 974 * the group for terminal job control (those with a parent in a different 975 * process group of the same session). If that count reaches zero, the 976 * process group becomes orphaned. Check both the specified process' 977 * process group and that of its children. 978 * entering == 0 => p is leaving specified group. 979 * entering == 1 => p is entering specified group. 980 * 981 * Call with proclist_lock held. 982 */ 983 void 984 fixjobc(struct proc *p, struct pgrp *pgrp, int entering) 985 { 986 struct pgrp *hispgrp; 987 struct session *mysession = pgrp->pg_session; 988 struct proc *child; 989 990 /* 991 * Check p's parent to see whether p qualifies its own process 992 * group; if so, adjust count for p's process group. 993 */ 994 hispgrp = p->p_pptr->p_pgrp; 995 if (hispgrp != pgrp && hispgrp->pg_session == mysession) { 996 if (entering) 997 pgrp->pg_jobc++; 998 else if (--pgrp->pg_jobc == 0) 999 orphanpg(pgrp); 1000 } 1001 1002 /* 1003 * Check this process' children to see whether they qualify 1004 * their process groups; if so, adjust counts for children's 1005 * process groups. 1006 */ 1007 LIST_FOREACH(child, &p->p_children, p_sibling) { 1008 hispgrp = child->p_pgrp; 1009 if (hispgrp != pgrp && hispgrp->pg_session == mysession && 1010 !P_ZOMBIE(child)) { 1011 if (entering) 1012 hispgrp->pg_jobc++; 1013 else if (--hispgrp->pg_jobc == 0) 1014 orphanpg(hispgrp); 1015 } 1016 } 1017 } 1018 1019 /* 1020 * A process group has become orphaned; 1021 * if there are any stopped processes in the group, 1022 * hang-up all process in that group. 1023 * 1024 * Call with proclist_lock held. 1025 */ 1026 static void 1027 orphanpg(struct pgrp *pg) 1028 { 1029 struct proc *p; 1030 1031 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 1032 if (p->p_stat == SSTOP) { 1033 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 1034 psignal(p, SIGHUP); 1035 psignal(p, SIGCONT); 1036 } 1037 return; 1038 } 1039 } 1040 } 1041 1042 /* mark process as suid/sgid, reset some values to defaults */ 1043 void 1044 p_sugid(struct proc *p) 1045 { 1046 struct plimit *lim; 1047 char *cn; 1048 1049 p->p_flag |= P_SUGID; 1050 /* reset what needs to be reset in plimit */ 1051 lim = p->p_limit; 1052 if (lim->pl_corename != defcorename) { 1053 if (lim->p_refcnt > 1 && 1054 (lim->p_lflags & PL_SHAREMOD) == 0) { 1055 p->p_limit = limcopy(lim); 1056 limfree(lim); 1057 lim = p->p_limit; 1058 } 1059 simple_lock(&lim->p_slock); 1060 cn = lim->pl_corename; 1061 lim->pl_corename = defcorename; 1062 simple_unlock(&lim->p_slock); 1063 if (cn != defcorename) 1064 free(cn, M_TEMP); 1065 } 1066 } 1067 1068 #ifdef DDB 1069 #include <ddb/db_output.h> 1070 void pidtbl_dump(void); 1071 void 1072 pidtbl_dump(void) 1073 { 1074 struct pid_table *pt; 1075 struct proc *p; 1076 struct pgrp *pgrp; 1077 int id; 1078 1079 db_printf("pid table %p size %x, next %x, last %x\n", 1080 pid_table, pid_tbl_mask+1, 1081 next_free_pt, last_free_pt); 1082 for (pt = pid_table, id = 0; id <= pid_tbl_mask; id++, pt++) { 1083 p = pt->pt_proc; 1084 if (!P_VALID(p) && !pt->pt_pgrp) 1085 continue; 1086 db_printf(" id %x: ", id); 1087 if (P_VALID(p)) 1088 db_printf("proc %p id %d (0x%x) %s\n", 1089 p, p->p_pid, p->p_pid, p->p_comm); 1090 else 1091 db_printf("next %x use %x\n", 1092 P_NEXT(p) & pid_tbl_mask, 1093 P_NEXT(p) & ~pid_tbl_mask); 1094 if ((pgrp = pt->pt_pgrp)) { 1095 db_printf("\tsession %p, sid %d, count %d, login %s\n", 1096 pgrp->pg_session, pgrp->pg_session->s_sid, 1097 pgrp->pg_session->s_count, 1098 pgrp->pg_session->s_login); 1099 db_printf("\tpgrp %p, pg_id %d, pg_jobc %d, members %p\n", 1100 pgrp, pgrp->pg_id, pgrp->pg_jobc, 1101 pgrp->pg_members.lh_first); 1102 for (p = pgrp->pg_members.lh_first; p != 0; 1103 p = p->p_pglist.le_next) { 1104 db_printf("\t\tpid %d addr %p pgrp %p %s\n", 1105 p->p_pid, p, p->p_pgrp, p->p_comm); 1106 } 1107 } 1108 } 1109 } 1110 #endif /* DDB */ 1111 1112 #ifdef KSTACK_CHECK_MAGIC 1113 #include <sys/user.h> 1114 1115 #define KSTACK_MAGIC 0xdeadbeaf 1116 1117 /* XXX should be per process basis? */ 1118 int kstackleftmin = KSTACK_SIZE; 1119 int kstackleftthres = KSTACK_SIZE / 8; /* warn if remaining stack is 1120 less than this */ 1121 1122 void 1123 kstack_setup_magic(const struct lwp *l) 1124 { 1125 uint32_t *ip; 1126 uint32_t const *end; 1127 1128 KASSERT(l != NULL); 1129 KASSERT(l != &lwp0); 1130 1131 /* 1132 * fill all the stack with magic number 1133 * so that later modification on it can be detected. 1134 */ 1135 ip = (uint32_t *)KSTACK_LOWEST_ADDR(l); 1136 end = (uint32_t *)((caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE); 1137 for (; ip < end; ip++) { 1138 *ip = KSTACK_MAGIC; 1139 } 1140 } 1141 1142 void 1143 kstack_check_magic(const struct lwp *l) 1144 { 1145 uint32_t const *ip, *end; 1146 int stackleft; 1147 1148 KASSERT(l != NULL); 1149 1150 /* don't check proc0 */ /*XXX*/ 1151 if (l == &lwp0) 1152 return; 1153 1154 #ifdef __MACHINE_STACK_GROWS_UP 1155 /* stack grows upwards (eg. hppa) */ 1156 ip = (uint32_t *)((caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE); 1157 end = (uint32_t *)KSTACK_LOWEST_ADDR(l); 1158 for (ip--; ip >= end; ip--) 1159 if (*ip != KSTACK_MAGIC) 1160 break; 1161 1162 stackleft = (caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE - (caddr_t)ip; 1163 #else /* __MACHINE_STACK_GROWS_UP */ 1164 /* stack grows downwards (eg. i386) */ 1165 ip = (uint32_t *)KSTACK_LOWEST_ADDR(l); 1166 end = (uint32_t *)((caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE); 1167 for (; ip < end; ip++) 1168 if (*ip != KSTACK_MAGIC) 1169 break; 1170 1171 stackleft = (caddr_t)ip - KSTACK_LOWEST_ADDR(l); 1172 #endif /* __MACHINE_STACK_GROWS_UP */ 1173 1174 if (kstackleftmin > stackleft) { 1175 kstackleftmin = stackleft; 1176 if (stackleft < kstackleftthres) 1177 printf("warning: kernel stack left %d bytes" 1178 "(pid %u:lid %u)\n", stackleft, 1179 (u_int)l->l_proc->p_pid, (u_int)l->l_lid); 1180 } 1181 1182 if (stackleft <= 0) { 1183 panic("magic on the top of kernel stack changed for " 1184 "pid %u, lid %u: maybe kernel stack overflow", 1185 (u_int)l->l_proc->p_pid, (u_int)l->l_lid); 1186 } 1187 } 1188 #endif /* KSTACK_CHECK_MAGIC */ 1189 1190 /* XXX shouldn't be here */ 1191 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG) 1192 #define PROCLIST_ASSERT_LOCKED_READ() \ 1193 KASSERT(lockstatus(&proclist_lock) == LK_SHARED) 1194 #else 1195 #define PROCLIST_ASSERT_LOCKED_READ() /* nothing */ 1196 #endif 1197 1198 int 1199 proclist_foreach_call(struct proclist *list, 1200 int (*callback)(struct proc *, void *arg), void *arg) 1201 { 1202 struct proc marker; 1203 struct proc *p; 1204 struct lwp * const l = curlwp; 1205 int ret = 0; 1206 1207 marker.p_flag = P_MARKER; 1208 PHOLD(l); 1209 proclist_lock_read(); 1210 for (p = LIST_FIRST(list); ret == 0 && p != NULL;) { 1211 if (p->p_flag & P_MARKER) { 1212 p = LIST_NEXT(p, p_list); 1213 continue; 1214 } 1215 LIST_INSERT_AFTER(p, &marker, p_list); 1216 ret = (*callback)(p, arg); 1217 PROCLIST_ASSERT_LOCKED_READ(); 1218 p = LIST_NEXT(&marker, p_list); 1219 LIST_REMOVE(&marker, p_list); 1220 } 1221 proclist_unlock_read(); 1222 PRELE(l); 1223 1224 return ret; 1225 } 1226 1227 int 1228 proc_vmspace_getref(struct proc *p, struct vmspace **vm) 1229 { 1230 1231 /* XXXCDC: how should locking work here? */ 1232 1233 /* curproc exception is for coredump. */ 1234 1235 if ((p != curproc && (p->p_flag & P_WEXIT) != 0) || 1236 (p->p_vmspace->vm_refcnt < 1)) { /* XXX */ 1237 return EFAULT; 1238 } 1239 1240 uvmspace_addref(p->p_vmspace); 1241 *vm = p->p_vmspace; 1242 1243 return 0; 1244 } 1245