1 /* $NetBSD: kern_proc.c,v 1.85 2005/12/26 18:45:27 perry Exp $ */ 2 3 /*- 4 * Copyright (c) 1999 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1982, 1986, 1989, 1991, 1993 42 * The Regents of the University of California. All rights reserved. 43 * 44 * Redistribution and use in source and binary forms, with or without 45 * modification, are permitted provided that the following conditions 46 * are met: 47 * 1. Redistributions of source code must retain the above copyright 48 * notice, this list of conditions and the following disclaimer. 49 * 2. Redistributions in binary form must reproduce the above copyright 50 * notice, this list of conditions and the following disclaimer in the 51 * documentation and/or other materials provided with the distribution. 52 * 3. Neither the name of the University nor the names of its contributors 53 * may be used to endorse or promote products derived from this software 54 * without specific prior written permission. 55 * 56 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 57 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 58 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 59 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 60 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 61 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 62 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 64 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 65 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 66 * SUCH DAMAGE. 67 * 68 * @(#)kern_proc.c 8.7 (Berkeley) 2/14/95 69 */ 70 71 #include <sys/cdefs.h> 72 __KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.85 2005/12/26 18:45:27 perry Exp $"); 73 74 #include "opt_kstack.h" 75 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/kernel.h> 79 #include <sys/proc.h> 80 #include <sys/resourcevar.h> 81 #include <sys/buf.h> 82 #include <sys/acct.h> 83 #include <sys/wait.h> 84 #include <sys/file.h> 85 #include <ufs/ufs/quota.h> 86 #include <sys/uio.h> 87 #include <sys/malloc.h> 88 #include <sys/pool.h> 89 #include <sys/mbuf.h> 90 #include <sys/ioctl.h> 91 #include <sys/tty.h> 92 #include <sys/signalvar.h> 93 #include <sys/ras.h> 94 #include <sys/sa.h> 95 #include <sys/savar.h> 96 #include <sys/filedesc.h> 97 98 #include <uvm/uvm.h> 99 #include <uvm/uvm_extern.h> 100 101 /* 102 * Other process lists 103 */ 104 105 struct proclist allproc; 106 struct proclist zombproc; /* resources have been freed */ 107 108 109 /* 110 * Process list locking: 111 * 112 * We have two types of locks on the proclists: read locks and write 113 * locks. Read locks can be used in interrupt context, so while we 114 * hold the write lock, we must also block clock interrupts to 115 * lock out any scheduling changes that may happen in interrupt 116 * context. 117 * 118 * The proclist lock locks the following structures: 119 * 120 * allproc 121 * zombproc 122 * pid_table 123 */ 124 struct lock proclist_lock; 125 126 /* 127 * pid to proc lookup is done by indexing the pid_table array. 128 * Since pid numbers are only allocated when an empty slot 129 * has been found, there is no need to search any lists ever. 130 * (an orphaned pgrp will lock the slot, a session will lock 131 * the pgrp with the same number.) 132 * If the table is too small it is reallocated with twice the 133 * previous size and the entries 'unzipped' into the two halves. 134 * A linked list of free entries is passed through the pt_proc 135 * field of 'free' items - set odd to be an invalid ptr. 136 */ 137 138 struct pid_table { 139 struct proc *pt_proc; 140 struct pgrp *pt_pgrp; 141 }; 142 #if 1 /* strongly typed cast - should be a noop */ 143 static inline uint p2u(struct proc *p) { return (uint)(uintptr_t)p; } 144 #else 145 #define p2u(p) ((uint)p) 146 #endif 147 #define P_VALID(p) (!(p2u(p) & 1)) 148 #define P_NEXT(p) (p2u(p) >> 1) 149 #define P_FREE(pid) ((struct proc *)(uintptr_t)((pid) << 1 | 1)) 150 151 #define INITIAL_PID_TABLE_SIZE (1 << 5) 152 static struct pid_table *pid_table; 153 static uint pid_tbl_mask = INITIAL_PID_TABLE_SIZE - 1; 154 static uint pid_alloc_lim; /* max we allocate before growing table */ 155 static uint pid_alloc_cnt; /* number of allocated pids */ 156 157 /* links through free slots - never empty! */ 158 static uint next_free_pt, last_free_pt; 159 static pid_t pid_max = PID_MAX; /* largest value we allocate */ 160 161 /* Components of the first process -- never freed. */ 162 struct session session0; 163 struct pgrp pgrp0; 164 struct proc proc0; 165 struct lwp lwp0; 166 struct pcred cred0; 167 struct filedesc0 filedesc0; 168 struct cwdinfo cwdi0; 169 struct plimit limit0; 170 struct pstats pstat0; 171 struct vmspace vmspace0; 172 struct sigacts sigacts0; 173 174 extern struct user *proc0paddr; 175 176 extern const struct emul emul_netbsd; /* defined in kern_exec.c */ 177 178 int nofile = NOFILE; 179 int maxuprc = MAXUPRC; 180 int cmask = CMASK; 181 182 POOL_INIT(proc_pool, sizeof(struct proc), 0, 0, 0, "procpl", 183 &pool_allocator_nointr); 184 POOL_INIT(lwp_pool, sizeof(struct lwp), 0, 0, 0, "lwppl", 185 &pool_allocator_nointr); 186 POOL_INIT(lwp_uc_pool, sizeof(ucontext_t), 0, 0, 0, "lwpucpl", 187 &pool_allocator_nointr); 188 POOL_INIT(pgrp_pool, sizeof(struct pgrp), 0, 0, 0, "pgrppl", 189 &pool_allocator_nointr); 190 POOL_INIT(pcred_pool, sizeof(struct pcred), 0, 0, 0, "pcredpl", 191 &pool_allocator_nointr); 192 POOL_INIT(plimit_pool, sizeof(struct plimit), 0, 0, 0, "plimitpl", 193 &pool_allocator_nointr); 194 POOL_INIT(pstats_pool, sizeof(struct pstats), 0, 0, 0, "pstatspl", 195 &pool_allocator_nointr); 196 POOL_INIT(rusage_pool, sizeof(struct rusage), 0, 0, 0, "rusgepl", 197 &pool_allocator_nointr); 198 POOL_INIT(ras_pool, sizeof(struct ras), 0, 0, 0, "raspl", 199 &pool_allocator_nointr); 200 POOL_INIT(sadata_pool, sizeof(struct sadata), 0, 0, 0, "sadatapl", 201 &pool_allocator_nointr); 202 POOL_INIT(saupcall_pool, sizeof(struct sadata_upcall), 0, 0, 0, "saupcpl", 203 &pool_allocator_nointr); 204 POOL_INIT(sastack_pool, sizeof(struct sastack), 0, 0, 0, "sastackpl", 205 &pool_allocator_nointr); 206 POOL_INIT(savp_pool, sizeof(struct sadata_vp), 0, 0, 0, "savppl", 207 &pool_allocator_nointr); 208 POOL_INIT(session_pool, sizeof(struct session), 0, 0, 0, "sessionpl", 209 &pool_allocator_nointr); 210 211 MALLOC_DEFINE(M_EMULDATA, "emuldata", "Per-process emulation data"); 212 MALLOC_DEFINE(M_PROC, "proc", "Proc structures"); 213 MALLOC_DEFINE(M_SUBPROC, "subproc", "Proc sub-structures"); 214 215 /* 216 * The process list descriptors, used during pid allocation and 217 * by sysctl. No locking on this data structure is needed since 218 * it is completely static. 219 */ 220 const struct proclist_desc proclists[] = { 221 { &allproc }, 222 { &zombproc }, 223 { NULL }, 224 }; 225 226 static void orphanpg(struct pgrp *); 227 static void pg_delete(pid_t); 228 229 /* 230 * Initialize global process hashing structures. 231 */ 232 void 233 procinit(void) 234 { 235 const struct proclist_desc *pd; 236 int i; 237 #define LINK_EMPTY ((PID_MAX + INITIAL_PID_TABLE_SIZE) & ~(INITIAL_PID_TABLE_SIZE - 1)) 238 239 for (pd = proclists; pd->pd_list != NULL; pd++) 240 LIST_INIT(pd->pd_list); 241 242 spinlockinit(&proclist_lock, "proclk", 0); 243 244 pid_table = malloc(INITIAL_PID_TABLE_SIZE * sizeof *pid_table, 245 M_PROC, M_WAITOK); 246 /* Set free list running through table... 247 Preset 'use count' above PID_MAX so we allocate pid 1 next. */ 248 for (i = 0; i <= pid_tbl_mask; i++) { 249 pid_table[i].pt_proc = P_FREE(LINK_EMPTY + i + 1); 250 pid_table[i].pt_pgrp = 0; 251 } 252 /* slot 0 is just grabbed */ 253 next_free_pt = 1; 254 /* Need to fix last entry. */ 255 last_free_pt = pid_tbl_mask; 256 pid_table[last_free_pt].pt_proc = P_FREE(LINK_EMPTY); 257 /* point at which we grow table - to avoid reusing pids too often */ 258 pid_alloc_lim = pid_tbl_mask - 1; 259 #undef LINK_EMPTY 260 261 LIST_INIT(&alllwp); 262 263 uihashtbl = 264 hashinit(maxproc / 16, HASH_LIST, M_PROC, M_WAITOK, &uihash); 265 } 266 267 /* 268 * Initialize process 0. 269 */ 270 void 271 proc0_init(void) 272 { 273 struct proc *p; 274 struct pgrp *pg; 275 struct session *sess; 276 struct lwp *l; 277 int s; 278 u_int i; 279 rlim_t lim; 280 281 p = &proc0; 282 pg = &pgrp0; 283 sess = &session0; 284 l = &lwp0; 285 286 simple_lock_init(&p->p_lock); 287 LIST_INIT(&p->p_lwps); 288 LIST_INSERT_HEAD(&p->p_lwps, l, l_sibling); 289 p->p_nlwps = 1; 290 simple_lock_init(&p->p_sigctx.ps_silock); 291 CIRCLEQ_INIT(&p->p_sigctx.ps_siginfo); 292 293 s = proclist_lock_write(); 294 295 pid_table[0].pt_proc = p; 296 LIST_INSERT_HEAD(&allproc, p, p_list); 297 LIST_INSERT_HEAD(&alllwp, l, l_list); 298 299 p->p_pgrp = pg; 300 pid_table[0].pt_pgrp = pg; 301 LIST_INIT(&pg->pg_members); 302 LIST_INSERT_HEAD(&pg->pg_members, p, p_pglist); 303 304 pg->pg_session = sess; 305 sess->s_count = 1; 306 sess->s_sid = 0; 307 sess->s_leader = p; 308 309 proclist_unlock_write(s); 310 311 /* 312 * Set P_NOCLDWAIT so that kernel threads are reparented to 313 * init(8) when they exit. init(8) can easily wait them out 314 * for us. 315 */ 316 p->p_flag = P_SYSTEM | P_NOCLDWAIT; 317 p->p_stat = SACTIVE; 318 p->p_nice = NZERO; 319 p->p_emul = &emul_netbsd; 320 #ifdef __HAVE_SYSCALL_INTERN 321 (*p->p_emul->e_syscall_intern)(p); 322 #endif 323 strncpy(p->p_comm, "swapper", MAXCOMLEN); 324 325 l->l_flag = L_INMEM; 326 l->l_stat = LSONPROC; 327 p->p_nrlwps = 1; 328 329 callout_init(&l->l_tsleep_ch); 330 331 /* Create credentials. */ 332 cred0.p_refcnt = 1; 333 p->p_cred = &cred0; 334 p->p_ucred = crget(); 335 p->p_ucred->cr_ngroups = 1; /* group 0 */ 336 337 /* Create the CWD info. */ 338 p->p_cwdi = &cwdi0; 339 cwdi0.cwdi_cmask = cmask; 340 cwdi0.cwdi_refcnt = 1; 341 simple_lock_init(&cwdi0.cwdi_slock); 342 343 /* Create the limits structures. */ 344 p->p_limit = &limit0; 345 simple_lock_init(&limit0.p_slock); 346 for (i = 0; i < sizeof(p->p_rlimit)/sizeof(p->p_rlimit[0]); i++) 347 limit0.pl_rlimit[i].rlim_cur = 348 limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY; 349 350 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles; 351 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur = 352 maxfiles < nofile ? maxfiles : nofile; 353 354 limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc; 355 limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur = 356 maxproc < maxuprc ? maxproc : maxuprc; 357 358 lim = ptoa(uvmexp.free); 359 limit0.pl_rlimit[RLIMIT_RSS].rlim_max = lim; 360 limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_max = lim; 361 limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = lim / 3; 362 limit0.pl_corename = defcorename; 363 limit0.p_refcnt = 1; 364 365 /* Configure virtual memory system, set vm rlimits. */ 366 uvm_init_limits(p); 367 368 /* Initialize file descriptor table for proc0. */ 369 p->p_fd = &filedesc0.fd_fd; 370 fdinit1(&filedesc0); 371 372 /* 373 * Initialize proc0's vmspace, which uses the kernel pmap. 374 * All kernel processes (which never have user space mappings) 375 * share proc0's vmspace, and thus, the kernel pmap. 376 */ 377 uvmspace_init(&vmspace0, pmap_kernel(), round_page(VM_MIN_ADDRESS), 378 trunc_page(VM_MAX_ADDRESS)); 379 p->p_vmspace = &vmspace0; 380 381 l->l_addr = proc0paddr; /* XXX */ 382 383 p->p_stats = &pstat0; 384 385 /* Initialize signal state for proc0. */ 386 p->p_sigacts = &sigacts0; 387 siginit(p); 388 } 389 390 /* 391 * Acquire a read lock on the proclist. 392 */ 393 void 394 proclist_lock_read(void) 395 { 396 int error; 397 398 error = spinlockmgr(&proclist_lock, LK_SHARED, NULL); 399 #ifdef DIAGNOSTIC 400 if (__predict_false(error != 0)) 401 panic("proclist_lock_read: failed to acquire lock"); 402 #endif 403 } 404 405 /* 406 * Release a read lock on the proclist. 407 */ 408 void 409 proclist_unlock_read(void) 410 { 411 412 (void) spinlockmgr(&proclist_lock, LK_RELEASE, NULL); 413 } 414 415 /* 416 * Acquire a write lock on the proclist. 417 */ 418 int 419 proclist_lock_write(void) 420 { 421 int s, error; 422 423 s = splclock(); 424 error = spinlockmgr(&proclist_lock, LK_EXCLUSIVE, NULL); 425 #ifdef DIAGNOSTIC 426 if (__predict_false(error != 0)) 427 panic("proclist_lock: failed to acquire lock"); 428 #endif 429 return s; 430 } 431 432 /* 433 * Release a write lock on the proclist. 434 */ 435 void 436 proclist_unlock_write(int s) 437 { 438 439 (void) spinlockmgr(&proclist_lock, LK_RELEASE, NULL); 440 splx(s); 441 } 442 443 /* 444 * Check that the specified process group is in the session of the 445 * specified process. 446 * Treats -ve ids as process ids. 447 * Used to validate TIOCSPGRP requests. 448 */ 449 int 450 pgid_in_session(struct proc *p, pid_t pg_id) 451 { 452 struct pgrp *pgrp; 453 454 if (pg_id < 0) { 455 struct proc *p1 = pfind(-pg_id); 456 if (p1 == NULL) 457 return EINVAL; 458 pgrp = p1->p_pgrp; 459 } else { 460 pgrp = pgfind(pg_id); 461 if (pgrp == NULL) 462 return EINVAL; 463 } 464 if (pgrp->pg_session != p->p_pgrp->pg_session) 465 return EPERM; 466 return 0; 467 } 468 469 /* 470 * Is p an inferior of q? 471 */ 472 int 473 inferior(struct proc *p, struct proc *q) 474 { 475 476 for (; p != q; p = p->p_pptr) 477 if (p->p_pid == 0) 478 return 0; 479 return 1; 480 } 481 482 /* 483 * Locate a process by number 484 */ 485 struct proc * 486 p_find(pid_t pid, uint flags) 487 { 488 struct proc *p; 489 char stat; 490 491 if (!(flags & PFIND_LOCKED)) 492 proclist_lock_read(); 493 p = pid_table[pid & pid_tbl_mask].pt_proc; 494 /* Only allow live processes to be found by pid. */ 495 if (P_VALID(p) && p->p_pid == pid && 496 ((stat = p->p_stat) == SACTIVE || stat == SSTOP 497 || (stat == SZOMB && (flags & PFIND_ZOMBIE)))) { 498 if (flags & PFIND_UNLOCK_OK) 499 proclist_unlock_read(); 500 return p; 501 } 502 if (flags & PFIND_UNLOCK_FAIL) 503 proclist_unlock_read(); 504 return NULL; 505 } 506 507 508 /* 509 * Locate a process group by number 510 */ 511 struct pgrp * 512 pg_find(pid_t pgid, uint flags) 513 { 514 struct pgrp *pg; 515 516 if (!(flags & PFIND_LOCKED)) 517 proclist_lock_read(); 518 pg = pid_table[pgid & pid_tbl_mask].pt_pgrp; 519 /* 520 * Can't look up a pgrp that only exists because the session 521 * hasn't died yet (traditional) 522 */ 523 if (pg == NULL || pg->pg_id != pgid || LIST_EMPTY(&pg->pg_members)) { 524 if (flags & PFIND_UNLOCK_FAIL) 525 proclist_unlock_read(); 526 return NULL; 527 } 528 529 if (flags & PFIND_UNLOCK_OK) 530 proclist_unlock_read(); 531 return pg; 532 } 533 534 static void 535 expand_pid_table(void) 536 { 537 uint pt_size = pid_tbl_mask + 1; 538 struct pid_table *n_pt, *new_pt; 539 struct proc *proc; 540 struct pgrp *pgrp; 541 int i; 542 int s; 543 pid_t pid; 544 545 new_pt = malloc(pt_size * 2 * sizeof *new_pt, M_PROC, M_WAITOK); 546 547 s = proclist_lock_write(); 548 if (pt_size != pid_tbl_mask + 1) { 549 /* Another process beat us to it... */ 550 proclist_unlock_write(s); 551 FREE(new_pt, M_PROC); 552 return; 553 } 554 555 /* 556 * Copy entries from old table into new one. 557 * If 'pid' is 'odd' we need to place in the upper half, 558 * even pid's to the lower half. 559 * Free items stay in the low half so we don't have to 560 * fixup the reference to them. 561 * We stuff free items on the front of the freelist 562 * because we can't write to unmodified entries. 563 * Processing the table backwards maintains a semblance 564 * of issueing pid numbers that increase with time. 565 */ 566 i = pt_size - 1; 567 n_pt = new_pt + i; 568 for (; ; i--, n_pt--) { 569 proc = pid_table[i].pt_proc; 570 pgrp = pid_table[i].pt_pgrp; 571 if (!P_VALID(proc)) { 572 /* Up 'use count' so that link is valid */ 573 pid = (P_NEXT(proc) + pt_size) & ~pt_size; 574 proc = P_FREE(pid); 575 if (pgrp) 576 pid = pgrp->pg_id; 577 } else 578 pid = proc->p_pid; 579 580 /* Save entry in appropriate half of table */ 581 n_pt[pid & pt_size].pt_proc = proc; 582 n_pt[pid & pt_size].pt_pgrp = pgrp; 583 584 /* Put other piece on start of free list */ 585 pid = (pid ^ pt_size) & ~pid_tbl_mask; 586 n_pt[pid & pt_size].pt_proc = 587 P_FREE((pid & ~pt_size) | next_free_pt); 588 n_pt[pid & pt_size].pt_pgrp = 0; 589 next_free_pt = i | (pid & pt_size); 590 if (i == 0) 591 break; 592 } 593 594 /* Switch tables */ 595 n_pt = pid_table; 596 pid_table = new_pt; 597 pid_tbl_mask = pt_size * 2 - 1; 598 599 /* 600 * pid_max starts as PID_MAX (= 30000), once we have 16384 601 * allocated pids we need it to be larger! 602 */ 603 if (pid_tbl_mask > PID_MAX) { 604 pid_max = pid_tbl_mask * 2 + 1; 605 pid_alloc_lim |= pid_alloc_lim << 1; 606 } else 607 pid_alloc_lim <<= 1; /* doubles number of free slots... */ 608 609 proclist_unlock_write(s); 610 FREE(n_pt, M_PROC); 611 } 612 613 struct proc * 614 proc_alloc(void) 615 { 616 struct proc *p; 617 int s; 618 int nxt; 619 pid_t pid; 620 struct pid_table *pt; 621 622 p = pool_get(&proc_pool, PR_WAITOK); 623 p->p_stat = SIDL; /* protect against others */ 624 625 /* allocate next free pid */ 626 627 for (;;expand_pid_table()) { 628 if (__predict_false(pid_alloc_cnt >= pid_alloc_lim)) 629 /* ensure pids cycle through 2000+ values */ 630 continue; 631 s = proclist_lock_write(); 632 pt = &pid_table[next_free_pt]; 633 #ifdef DIAGNOSTIC 634 if (__predict_false(P_VALID(pt->pt_proc) || pt->pt_pgrp)) 635 panic("proc_alloc: slot busy"); 636 #endif 637 nxt = P_NEXT(pt->pt_proc); 638 if (nxt & pid_tbl_mask) 639 break; 640 /* Table full - expand (NB last entry not used....) */ 641 proclist_unlock_write(s); 642 } 643 644 /* pid is 'saved use count' + 'size' + entry */ 645 pid = (nxt & ~pid_tbl_mask) + pid_tbl_mask + 1 + next_free_pt; 646 if ((uint)pid > (uint)pid_max) 647 pid &= pid_tbl_mask; 648 p->p_pid = pid; 649 next_free_pt = nxt & pid_tbl_mask; 650 651 /* Grab table slot */ 652 pt->pt_proc = p; 653 pid_alloc_cnt++; 654 655 proclist_unlock_write(s); 656 657 return p; 658 } 659 660 /* 661 * Free last resources of a process - called from proc_free (in kern_exit.c) 662 */ 663 void 664 proc_free_mem(struct proc *p) 665 { 666 int s; 667 pid_t pid = p->p_pid; 668 struct pid_table *pt; 669 670 s = proclist_lock_write(); 671 672 pt = &pid_table[pid & pid_tbl_mask]; 673 #ifdef DIAGNOSTIC 674 if (__predict_false(pt->pt_proc != p)) 675 panic("proc_free: pid_table mismatch, pid %x, proc %p", 676 pid, p); 677 #endif 678 /* save pid use count in slot */ 679 pt->pt_proc = P_FREE(pid & ~pid_tbl_mask); 680 681 if (pt->pt_pgrp == NULL) { 682 /* link last freed entry onto ours */ 683 pid &= pid_tbl_mask; 684 pt = &pid_table[last_free_pt]; 685 pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pid); 686 last_free_pt = pid; 687 pid_alloc_cnt--; 688 } 689 690 nprocs--; 691 proclist_unlock_write(s); 692 693 pool_put(&proc_pool, p); 694 } 695 696 /* 697 * Move p to a new or existing process group (and session) 698 * 699 * If we are creating a new pgrp, the pgid should equal 700 * the calling process' pid. 701 * If is only valid to enter a process group that is in the session 702 * of the process. 703 * Also mksess should only be set if we are creating a process group 704 * 705 * Only called from sys_setsid, sys_setpgid/sys_setpgrp and the 706 * SYSV setpgrp support for hpux == enterpgrp(curproc, curproc->p_pid) 707 */ 708 int 709 enterpgrp(struct proc *p, pid_t pgid, int mksess) 710 { 711 struct pgrp *new_pgrp, *pgrp; 712 struct session *sess; 713 struct proc *curp = curproc; 714 pid_t pid = p->p_pid; 715 int rval; 716 int s; 717 pid_t pg_id = NO_PGID; 718 719 /* Allocate data areas we might need before doing any validity checks */ 720 proclist_lock_read(); /* Because pid_table might change */ 721 if (pid_table[pgid & pid_tbl_mask].pt_pgrp == 0) { 722 proclist_unlock_read(); 723 new_pgrp = pool_get(&pgrp_pool, PR_WAITOK); 724 } else { 725 proclist_unlock_read(); 726 new_pgrp = NULL; 727 } 728 if (mksess) 729 sess = pool_get(&session_pool, M_WAITOK); 730 else 731 sess = NULL; 732 733 s = proclist_lock_write(); 734 rval = EPERM; /* most common error (to save typing) */ 735 736 /* Check pgrp exists or can be created */ 737 pgrp = pid_table[pgid & pid_tbl_mask].pt_pgrp; 738 if (pgrp != NULL && pgrp->pg_id != pgid) 739 goto done; 740 741 /* Can only set another process under restricted circumstances. */ 742 if (p != curp) { 743 /* must exist and be one of our children... */ 744 if (p != pid_table[pid & pid_tbl_mask].pt_proc 745 || !inferior(p, curp)) { 746 rval = ESRCH; 747 goto done; 748 } 749 /* ... in the same session... */ 750 if (sess != NULL || p->p_session != curp->p_session) 751 goto done; 752 /* ... existing pgid must be in same session ... */ 753 if (pgrp != NULL && pgrp->pg_session != p->p_session) 754 goto done; 755 /* ... and not done an exec. */ 756 if (p->p_flag & P_EXEC) { 757 rval = EACCES; 758 goto done; 759 } 760 } 761 762 /* Changing the process group/session of a session 763 leader is definitely off limits. */ 764 if (SESS_LEADER(p)) { 765 if (sess == NULL && p->p_pgrp == pgrp) 766 /* unless it's a definite noop */ 767 rval = 0; 768 goto done; 769 } 770 771 /* Can only create a process group with id of process */ 772 if (pgrp == NULL && pgid != pid) 773 goto done; 774 775 /* Can only create a session if creating pgrp */ 776 if (sess != NULL && pgrp != NULL) 777 goto done; 778 779 /* Check we allocated memory for a pgrp... */ 780 if (pgrp == NULL && new_pgrp == NULL) 781 goto done; 782 783 /* Don't attach to 'zombie' pgrp */ 784 if (pgrp != NULL && LIST_EMPTY(&pgrp->pg_members)) 785 goto done; 786 787 /* Expect to succeed now */ 788 rval = 0; 789 790 if (pgrp == p->p_pgrp) 791 /* nothing to do */ 792 goto done; 793 794 /* Ok all setup, link up required structures */ 795 if (pgrp == NULL) { 796 pgrp = new_pgrp; 797 new_pgrp = 0; 798 if (sess != NULL) { 799 sess->s_sid = p->p_pid; 800 sess->s_leader = p; 801 sess->s_count = 1; 802 sess->s_ttyvp = NULL; 803 sess->s_ttyp = NULL; 804 sess->s_flags = p->p_session->s_flags & ~S_LOGIN_SET; 805 memcpy(sess->s_login, p->p_session->s_login, 806 sizeof(sess->s_login)); 807 p->p_flag &= ~P_CONTROLT; 808 } else { 809 sess = p->p_pgrp->pg_session; 810 SESSHOLD(sess); 811 } 812 pgrp->pg_session = sess; 813 sess = 0; 814 815 pgrp->pg_id = pgid; 816 LIST_INIT(&pgrp->pg_members); 817 #ifdef DIAGNOSTIC 818 if (__predict_false(pid_table[pgid & pid_tbl_mask].pt_pgrp)) 819 panic("enterpgrp: pgrp table slot in use"); 820 if (__predict_false(mksess && p != curp)) 821 panic("enterpgrp: mksession and p != curproc"); 822 #endif 823 pid_table[pgid & pid_tbl_mask].pt_pgrp = pgrp; 824 pgrp->pg_jobc = 0; 825 } 826 827 /* 828 * Adjust eligibility of affected pgrps to participate in job control. 829 * Increment eligibility counts before decrementing, otherwise we 830 * could reach 0 spuriously during the first call. 831 */ 832 fixjobc(p, pgrp, 1); 833 fixjobc(p, p->p_pgrp, 0); 834 835 /* Move process to requested group */ 836 LIST_REMOVE(p, p_pglist); 837 if (LIST_EMPTY(&p->p_pgrp->pg_members)) 838 /* defer delete until we've dumped the lock */ 839 pg_id = p->p_pgrp->pg_id; 840 p->p_pgrp = pgrp; 841 LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist); 842 843 done: 844 proclist_unlock_write(s); 845 if (sess != NULL) 846 pool_put(&session_pool, sess); 847 if (new_pgrp != NULL) 848 pool_put(&pgrp_pool, new_pgrp); 849 if (pg_id != NO_PGID) 850 pg_delete(pg_id); 851 #ifdef DEBUG_PGRP 852 if (__predict_false(rval)) 853 printf("enterpgrp(%d,%d,%d), curproc %d, rval %d\n", 854 pid, pgid, mksess, curp->p_pid, rval); 855 #endif 856 return rval; 857 } 858 859 /* 860 * remove process from process group 861 */ 862 int 863 leavepgrp(struct proc *p) 864 { 865 int s; 866 struct pgrp *pgrp; 867 pid_t pg_id; 868 869 s = proclist_lock_write(); 870 pgrp = p->p_pgrp; 871 LIST_REMOVE(p, p_pglist); 872 p->p_pgrp = 0; 873 pg_id = LIST_EMPTY(&pgrp->pg_members) ? pgrp->pg_id : NO_PGID; 874 proclist_unlock_write(s); 875 876 if (pg_id != NO_PGID) 877 pg_delete(pg_id); 878 return 0; 879 } 880 881 static void 882 pg_free(pid_t pg_id) 883 { 884 struct pgrp *pgrp; 885 struct pid_table *pt; 886 int s; 887 888 s = proclist_lock_write(); 889 pt = &pid_table[pg_id & pid_tbl_mask]; 890 pgrp = pt->pt_pgrp; 891 #ifdef DIAGNOSTIC 892 if (__predict_false(!pgrp || pgrp->pg_id != pg_id 893 || !LIST_EMPTY(&pgrp->pg_members))) 894 panic("pg_free: process group absent or has members"); 895 #endif 896 pt->pt_pgrp = 0; 897 898 if (!P_VALID(pt->pt_proc)) { 899 /* orphaned pgrp, put slot onto free list */ 900 #ifdef DIAGNOSTIC 901 if (__predict_false(P_NEXT(pt->pt_proc) & pid_tbl_mask)) 902 panic("pg_free: process slot on free list"); 903 #endif 904 905 pg_id &= pid_tbl_mask; 906 pt = &pid_table[last_free_pt]; 907 pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pg_id); 908 last_free_pt = pg_id; 909 pid_alloc_cnt--; 910 } 911 proclist_unlock_write(s); 912 913 pool_put(&pgrp_pool, pgrp); 914 } 915 916 /* 917 * delete a process group 918 */ 919 static void 920 pg_delete(pid_t pg_id) 921 { 922 struct pgrp *pgrp; 923 struct tty *ttyp; 924 struct session *ss; 925 int s, is_pgrp_leader; 926 927 s = proclist_lock_write(); 928 pgrp = pid_table[pg_id & pid_tbl_mask].pt_pgrp; 929 if (pgrp == NULL || pgrp->pg_id != pg_id || 930 !LIST_EMPTY(&pgrp->pg_members)) { 931 proclist_unlock_write(s); 932 return; 933 } 934 935 ss = pgrp->pg_session; 936 937 /* Remove reference (if any) from tty to this process group */ 938 ttyp = ss->s_ttyp; 939 if (ttyp != NULL && ttyp->t_pgrp == pgrp) { 940 ttyp->t_pgrp = NULL; 941 #ifdef DIAGNOSTIC 942 if (ttyp->t_session != ss) 943 panic("pg_delete: wrong session on terminal"); 944 #endif 945 } 946 947 /* 948 * The leading process group in a session is freed 949 * by sessdelete() if last reference. 950 */ 951 is_pgrp_leader = (ss->s_sid == pgrp->pg_id); 952 proclist_unlock_write(s); 953 SESSRELE(ss); 954 955 if (is_pgrp_leader) 956 return; 957 958 pg_free(pg_id); 959 } 960 961 /* 962 * Delete session - called from SESSRELE when s_count becomes zero. 963 */ 964 void 965 sessdelete(struct session *ss) 966 { 967 /* 968 * We keep the pgrp with the same id as the session in 969 * order to stop a process being given the same pid. 970 * Since the pgrp holds a reference to the session, it 971 * must be a 'zombie' pgrp by now. 972 */ 973 974 pg_free(ss->s_sid); 975 976 pool_put(&session_pool, ss); 977 } 978 979 /* 980 * Adjust pgrp jobc counters when specified process changes process group. 981 * We count the number of processes in each process group that "qualify" 982 * the group for terminal job control (those with a parent in a different 983 * process group of the same session). If that count reaches zero, the 984 * process group becomes orphaned. Check both the specified process' 985 * process group and that of its children. 986 * entering == 0 => p is leaving specified group. 987 * entering == 1 => p is entering specified group. 988 * 989 * Call with proclist_lock held. 990 */ 991 void 992 fixjobc(struct proc *p, struct pgrp *pgrp, int entering) 993 { 994 struct pgrp *hispgrp; 995 struct session *mysession = pgrp->pg_session; 996 struct proc *child; 997 998 /* 999 * Check p's parent to see whether p qualifies its own process 1000 * group; if so, adjust count for p's process group. 1001 */ 1002 hispgrp = p->p_pptr->p_pgrp; 1003 if (hispgrp != pgrp && hispgrp->pg_session == mysession) { 1004 if (entering) 1005 pgrp->pg_jobc++; 1006 else if (--pgrp->pg_jobc == 0) 1007 orphanpg(pgrp); 1008 } 1009 1010 /* 1011 * Check this process' children to see whether they qualify 1012 * their process groups; if so, adjust counts for children's 1013 * process groups. 1014 */ 1015 LIST_FOREACH(child, &p->p_children, p_sibling) { 1016 hispgrp = child->p_pgrp; 1017 if (hispgrp != pgrp && hispgrp->pg_session == mysession && 1018 !P_ZOMBIE(child)) { 1019 if (entering) 1020 hispgrp->pg_jobc++; 1021 else if (--hispgrp->pg_jobc == 0) 1022 orphanpg(hispgrp); 1023 } 1024 } 1025 } 1026 1027 /* 1028 * A process group has become orphaned; 1029 * if there are any stopped processes in the group, 1030 * hang-up all process in that group. 1031 * 1032 * Call with proclist_lock held. 1033 */ 1034 static void 1035 orphanpg(struct pgrp *pg) 1036 { 1037 struct proc *p; 1038 1039 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 1040 if (p->p_stat == SSTOP) { 1041 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 1042 psignal(p, SIGHUP); 1043 psignal(p, SIGCONT); 1044 } 1045 return; 1046 } 1047 } 1048 } 1049 1050 /* mark process as suid/sgid, reset some values to defaults */ 1051 void 1052 p_sugid(struct proc *p) 1053 { 1054 struct plimit *lim; 1055 char *cn; 1056 1057 p->p_flag |= P_SUGID; 1058 /* reset what needs to be reset in plimit */ 1059 lim = p->p_limit; 1060 if (lim->pl_corename != defcorename) { 1061 if (lim->p_refcnt > 1 && 1062 (lim->p_lflags & PL_SHAREMOD) == 0) { 1063 p->p_limit = limcopy(lim); 1064 limfree(lim); 1065 lim = p->p_limit; 1066 } 1067 simple_lock(&lim->p_slock); 1068 cn = lim->pl_corename; 1069 lim->pl_corename = defcorename; 1070 simple_unlock(&lim->p_slock); 1071 if (cn != defcorename) 1072 free(cn, M_TEMP); 1073 } 1074 } 1075 1076 #ifdef DDB 1077 #include <ddb/db_output.h> 1078 void pidtbl_dump(void); 1079 void 1080 pidtbl_dump(void) 1081 { 1082 struct pid_table *pt; 1083 struct proc *p; 1084 struct pgrp *pgrp; 1085 int id; 1086 1087 db_printf("pid table %p size %x, next %x, last %x\n", 1088 pid_table, pid_tbl_mask+1, 1089 next_free_pt, last_free_pt); 1090 for (pt = pid_table, id = 0; id <= pid_tbl_mask; id++, pt++) { 1091 p = pt->pt_proc; 1092 if (!P_VALID(p) && !pt->pt_pgrp) 1093 continue; 1094 db_printf(" id %x: ", id); 1095 if (P_VALID(p)) 1096 db_printf("proc %p id %d (0x%x) %s\n", 1097 p, p->p_pid, p->p_pid, p->p_comm); 1098 else 1099 db_printf("next %x use %x\n", 1100 P_NEXT(p) & pid_tbl_mask, 1101 P_NEXT(p) & ~pid_tbl_mask); 1102 if ((pgrp = pt->pt_pgrp)) { 1103 db_printf("\tsession %p, sid %d, count %d, login %s\n", 1104 pgrp->pg_session, pgrp->pg_session->s_sid, 1105 pgrp->pg_session->s_count, 1106 pgrp->pg_session->s_login); 1107 db_printf("\tpgrp %p, pg_id %d, pg_jobc %d, members %p\n", 1108 pgrp, pgrp->pg_id, pgrp->pg_jobc, 1109 pgrp->pg_members.lh_first); 1110 for (p = pgrp->pg_members.lh_first; p != 0; 1111 p = p->p_pglist.le_next) { 1112 db_printf("\t\tpid %d addr %p pgrp %p %s\n", 1113 p->p_pid, p, p->p_pgrp, p->p_comm); 1114 } 1115 } 1116 } 1117 } 1118 #endif /* DDB */ 1119 1120 #ifdef KSTACK_CHECK_MAGIC 1121 #include <sys/user.h> 1122 1123 #define KSTACK_MAGIC 0xdeadbeaf 1124 1125 /* XXX should be per process basis? */ 1126 int kstackleftmin = KSTACK_SIZE; 1127 int kstackleftthres = KSTACK_SIZE / 8; /* warn if remaining stack is 1128 less than this */ 1129 1130 void 1131 kstack_setup_magic(const struct lwp *l) 1132 { 1133 uint32_t *ip; 1134 uint32_t const *end; 1135 1136 KASSERT(l != NULL); 1137 KASSERT(l != &lwp0); 1138 1139 /* 1140 * fill all the stack with magic number 1141 * so that later modification on it can be detected. 1142 */ 1143 ip = (uint32_t *)KSTACK_LOWEST_ADDR(l); 1144 end = (uint32_t *)((caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE); 1145 for (; ip < end; ip++) { 1146 *ip = KSTACK_MAGIC; 1147 } 1148 } 1149 1150 void 1151 kstack_check_magic(const struct lwp *l) 1152 { 1153 uint32_t const *ip, *end; 1154 int stackleft; 1155 1156 KASSERT(l != NULL); 1157 1158 /* don't check proc0 */ /*XXX*/ 1159 if (l == &lwp0) 1160 return; 1161 1162 #ifdef __MACHINE_STACK_GROWS_UP 1163 /* stack grows upwards (eg. hppa) */ 1164 ip = (uint32_t *)((caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE); 1165 end = (uint32_t *)KSTACK_LOWEST_ADDR(l); 1166 for (ip--; ip >= end; ip--) 1167 if (*ip != KSTACK_MAGIC) 1168 break; 1169 1170 stackleft = (caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE - (caddr_t)ip; 1171 #else /* __MACHINE_STACK_GROWS_UP */ 1172 /* stack grows downwards (eg. i386) */ 1173 ip = (uint32_t *)KSTACK_LOWEST_ADDR(l); 1174 end = (uint32_t *)((caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE); 1175 for (; ip < end; ip++) 1176 if (*ip != KSTACK_MAGIC) 1177 break; 1178 1179 stackleft = (caddr_t)ip - KSTACK_LOWEST_ADDR(l); 1180 #endif /* __MACHINE_STACK_GROWS_UP */ 1181 1182 if (kstackleftmin > stackleft) { 1183 kstackleftmin = stackleft; 1184 if (stackleft < kstackleftthres) 1185 printf("warning: kernel stack left %d bytes" 1186 "(pid %u:lid %u)\n", stackleft, 1187 (u_int)l->l_proc->p_pid, (u_int)l->l_lid); 1188 } 1189 1190 if (stackleft <= 0) { 1191 panic("magic on the top of kernel stack changed for " 1192 "pid %u, lid %u: maybe kernel stack overflow", 1193 (u_int)l->l_proc->p_pid, (u_int)l->l_lid); 1194 } 1195 } 1196 #endif /* KSTACK_CHECK_MAGIC */ 1197 1198 /* XXX shouldn't be here */ 1199 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG) 1200 #define PROCLIST_ASSERT_LOCKED_READ() \ 1201 KASSERT(lockstatus(&proclist_lock) == LK_SHARED) 1202 #else 1203 #define PROCLIST_ASSERT_LOCKED_READ() /* nothing */ 1204 #endif 1205 1206 int 1207 proclist_foreach_call(struct proclist *list, 1208 int (*callback)(struct proc *, void *arg), void *arg) 1209 { 1210 struct proc marker; 1211 struct proc *p; 1212 struct lwp * const l = curlwp; 1213 int ret = 0; 1214 1215 marker.p_flag = P_MARKER; 1216 PHOLD(l); 1217 proclist_lock_read(); 1218 for (p = LIST_FIRST(list); ret == 0 && p != NULL;) { 1219 if (p->p_flag & P_MARKER) { 1220 p = LIST_NEXT(p, p_list); 1221 continue; 1222 } 1223 LIST_INSERT_AFTER(p, &marker, p_list); 1224 ret = (*callback)(p, arg); 1225 PROCLIST_ASSERT_LOCKED_READ(); 1226 p = LIST_NEXT(&marker, p_list); 1227 LIST_REMOVE(&marker, p_list); 1228 } 1229 proclist_unlock_read(); 1230 PRELE(l); 1231 1232 return ret; 1233 } 1234