1 /* $NetBSD: kern_proc.c,v 1.88 2006/04/10 11:16:22 onoe Exp $ */ 2 3 /*- 4 * Copyright (c) 1999 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1982, 1986, 1989, 1991, 1993 42 * The Regents of the University of California. All rights reserved. 43 * 44 * Redistribution and use in source and binary forms, with or without 45 * modification, are permitted provided that the following conditions 46 * are met: 47 * 1. Redistributions of source code must retain the above copyright 48 * notice, this list of conditions and the following disclaimer. 49 * 2. Redistributions in binary form must reproduce the above copyright 50 * notice, this list of conditions and the following disclaimer in the 51 * documentation and/or other materials provided with the distribution. 52 * 3. Neither the name of the University nor the names of its contributors 53 * may be used to endorse or promote products derived from this software 54 * without specific prior written permission. 55 * 56 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 57 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 58 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 59 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 60 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 61 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 62 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 64 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 65 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 66 * SUCH DAMAGE. 67 * 68 * @(#)kern_proc.c 8.7 (Berkeley) 2/14/95 69 */ 70 71 #include <sys/cdefs.h> 72 __KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.88 2006/04/10 11:16:22 onoe Exp $"); 73 74 #include "opt_kstack.h" 75 #include "opt_maxuprc.h" 76 77 #include <sys/param.h> 78 #include <sys/systm.h> 79 #include <sys/kernel.h> 80 #include <sys/proc.h> 81 #include <sys/resourcevar.h> 82 #include <sys/buf.h> 83 #include <sys/acct.h> 84 #include <sys/wait.h> 85 #include <sys/file.h> 86 #include <ufs/ufs/quota.h> 87 #include <sys/uio.h> 88 #include <sys/malloc.h> 89 #include <sys/pool.h> 90 #include <sys/mbuf.h> 91 #include <sys/ioctl.h> 92 #include <sys/tty.h> 93 #include <sys/signalvar.h> 94 #include <sys/ras.h> 95 #include <sys/sa.h> 96 #include <sys/savar.h> 97 #include <sys/filedesc.h> 98 99 #include <uvm/uvm.h> 100 #include <uvm/uvm_extern.h> 101 102 /* 103 * Other process lists 104 */ 105 106 struct proclist allproc; 107 struct proclist zombproc; /* resources have been freed */ 108 109 110 /* 111 * Process list locking: 112 * 113 * We have two types of locks on the proclists: read locks and write 114 * locks. Read locks can be used in interrupt context, so while we 115 * hold the write lock, we must also block clock interrupts to 116 * lock out any scheduling changes that may happen in interrupt 117 * context. 118 * 119 * The proclist lock locks the following structures: 120 * 121 * allproc 122 * zombproc 123 * pid_table 124 */ 125 struct lock proclist_lock; 126 127 /* 128 * pid to proc lookup is done by indexing the pid_table array. 129 * Since pid numbers are only allocated when an empty slot 130 * has been found, there is no need to search any lists ever. 131 * (an orphaned pgrp will lock the slot, a session will lock 132 * the pgrp with the same number.) 133 * If the table is too small it is reallocated with twice the 134 * previous size and the entries 'unzipped' into the two halves. 135 * A linked list of free entries is passed through the pt_proc 136 * field of 'free' items - set odd to be an invalid ptr. 137 */ 138 139 struct pid_table { 140 struct proc *pt_proc; 141 struct pgrp *pt_pgrp; 142 }; 143 #if 1 /* strongly typed cast - should be a noop */ 144 static inline uint p2u(struct proc *p) { return (uint)(uintptr_t)p; } 145 #else 146 #define p2u(p) ((uint)p) 147 #endif 148 #define P_VALID(p) (!(p2u(p) & 1)) 149 #define P_NEXT(p) (p2u(p) >> 1) 150 #define P_FREE(pid) ((struct proc *)(uintptr_t)((pid) << 1 | 1)) 151 152 #define INITIAL_PID_TABLE_SIZE (1 << 5) 153 static struct pid_table *pid_table; 154 static uint pid_tbl_mask = INITIAL_PID_TABLE_SIZE - 1; 155 static uint pid_alloc_lim; /* max we allocate before growing table */ 156 static uint pid_alloc_cnt; /* number of allocated pids */ 157 158 /* links through free slots - never empty! */ 159 static uint next_free_pt, last_free_pt; 160 static pid_t pid_max = PID_MAX; /* largest value we allocate */ 161 162 /* Components of the first process -- never freed. */ 163 struct session session0; 164 struct pgrp pgrp0; 165 struct proc proc0; 166 struct lwp lwp0; 167 struct pcred cred0; 168 struct filedesc0 filedesc0; 169 struct cwdinfo cwdi0; 170 struct plimit limit0; 171 struct pstats pstat0; 172 struct vmspace vmspace0; 173 struct sigacts sigacts0; 174 175 extern struct user *proc0paddr; 176 177 extern const struct emul emul_netbsd; /* defined in kern_exec.c */ 178 179 int nofile = NOFILE; 180 int maxuprc = MAXUPRC; 181 int cmask = CMASK; 182 183 POOL_INIT(proc_pool, sizeof(struct proc), 0, 0, 0, "procpl", 184 &pool_allocator_nointr); 185 POOL_INIT(lwp_pool, sizeof(struct lwp), 0, 0, 0, "lwppl", 186 &pool_allocator_nointr); 187 POOL_INIT(lwp_uc_pool, sizeof(ucontext_t), 0, 0, 0, "lwpucpl", 188 &pool_allocator_nointr); 189 POOL_INIT(pgrp_pool, sizeof(struct pgrp), 0, 0, 0, "pgrppl", 190 &pool_allocator_nointr); 191 POOL_INIT(pcred_pool, sizeof(struct pcred), 0, 0, 0, "pcredpl", 192 &pool_allocator_nointr); 193 POOL_INIT(plimit_pool, sizeof(struct plimit), 0, 0, 0, "plimitpl", 194 &pool_allocator_nointr); 195 POOL_INIT(pstats_pool, sizeof(struct pstats), 0, 0, 0, "pstatspl", 196 &pool_allocator_nointr); 197 POOL_INIT(rusage_pool, sizeof(struct rusage), 0, 0, 0, "rusgepl", 198 &pool_allocator_nointr); 199 POOL_INIT(ras_pool, sizeof(struct ras), 0, 0, 0, "raspl", 200 &pool_allocator_nointr); 201 POOL_INIT(sadata_pool, sizeof(struct sadata), 0, 0, 0, "sadatapl", 202 &pool_allocator_nointr); 203 POOL_INIT(saupcall_pool, sizeof(struct sadata_upcall), 0, 0, 0, "saupcpl", 204 &pool_allocator_nointr); 205 POOL_INIT(sastack_pool, sizeof(struct sastack), 0, 0, 0, "sastackpl", 206 &pool_allocator_nointr); 207 POOL_INIT(savp_pool, sizeof(struct sadata_vp), 0, 0, 0, "savppl", 208 &pool_allocator_nointr); 209 POOL_INIT(session_pool, sizeof(struct session), 0, 0, 0, "sessionpl", 210 &pool_allocator_nointr); 211 212 MALLOC_DEFINE(M_EMULDATA, "emuldata", "Per-process emulation data"); 213 MALLOC_DEFINE(M_PROC, "proc", "Proc structures"); 214 MALLOC_DEFINE(M_SUBPROC, "subproc", "Proc sub-structures"); 215 216 /* 217 * The process list descriptors, used during pid allocation and 218 * by sysctl. No locking on this data structure is needed since 219 * it is completely static. 220 */ 221 const struct proclist_desc proclists[] = { 222 { &allproc }, 223 { &zombproc }, 224 { NULL }, 225 }; 226 227 static void orphanpg(struct pgrp *); 228 static void pg_delete(pid_t); 229 230 /* 231 * Initialize global process hashing structures. 232 */ 233 void 234 procinit(void) 235 { 236 const struct proclist_desc *pd; 237 int i; 238 #define LINK_EMPTY ((PID_MAX + INITIAL_PID_TABLE_SIZE) & ~(INITIAL_PID_TABLE_SIZE - 1)) 239 240 for (pd = proclists; pd->pd_list != NULL; pd++) 241 LIST_INIT(pd->pd_list); 242 243 spinlockinit(&proclist_lock, "proclk", 0); 244 245 pid_table = malloc(INITIAL_PID_TABLE_SIZE * sizeof *pid_table, 246 M_PROC, M_WAITOK); 247 /* Set free list running through table... 248 Preset 'use count' above PID_MAX so we allocate pid 1 next. */ 249 for (i = 0; i <= pid_tbl_mask; i++) { 250 pid_table[i].pt_proc = P_FREE(LINK_EMPTY + i + 1); 251 pid_table[i].pt_pgrp = 0; 252 } 253 /* slot 0 is just grabbed */ 254 next_free_pt = 1; 255 /* Need to fix last entry. */ 256 last_free_pt = pid_tbl_mask; 257 pid_table[last_free_pt].pt_proc = P_FREE(LINK_EMPTY); 258 /* point at which we grow table - to avoid reusing pids too often */ 259 pid_alloc_lim = pid_tbl_mask - 1; 260 #undef LINK_EMPTY 261 262 LIST_INIT(&alllwp); 263 264 uihashtbl = 265 hashinit(maxproc / 16, HASH_LIST, M_PROC, M_WAITOK, &uihash); 266 } 267 268 /* 269 * Initialize process 0. 270 */ 271 void 272 proc0_init(void) 273 { 274 struct proc *p; 275 struct pgrp *pg; 276 struct session *sess; 277 struct lwp *l; 278 int s; 279 u_int i; 280 rlim_t lim; 281 282 p = &proc0; 283 pg = &pgrp0; 284 sess = &session0; 285 l = &lwp0; 286 287 simple_lock_init(&p->p_lock); 288 LIST_INIT(&p->p_lwps); 289 LIST_INSERT_HEAD(&p->p_lwps, l, l_sibling); 290 p->p_nlwps = 1; 291 simple_lock_init(&p->p_sigctx.ps_silock); 292 CIRCLEQ_INIT(&p->p_sigctx.ps_siginfo); 293 294 s = proclist_lock_write(); 295 296 pid_table[0].pt_proc = p; 297 LIST_INSERT_HEAD(&allproc, p, p_list); 298 LIST_INSERT_HEAD(&alllwp, l, l_list); 299 300 p->p_pgrp = pg; 301 pid_table[0].pt_pgrp = pg; 302 LIST_INIT(&pg->pg_members); 303 LIST_INSERT_HEAD(&pg->pg_members, p, p_pglist); 304 305 pg->pg_session = sess; 306 sess->s_count = 1; 307 sess->s_sid = 0; 308 sess->s_leader = p; 309 310 proclist_unlock_write(s); 311 312 /* 313 * Set P_NOCLDWAIT so that kernel threads are reparented to 314 * init(8) when they exit. init(8) can easily wait them out 315 * for us. 316 */ 317 p->p_flag = P_SYSTEM | P_NOCLDWAIT; 318 p->p_stat = SACTIVE; 319 p->p_nice = NZERO; 320 p->p_emul = &emul_netbsd; 321 #ifdef __HAVE_SYSCALL_INTERN 322 (*p->p_emul->e_syscall_intern)(p); 323 #endif 324 strncpy(p->p_comm, "swapper", MAXCOMLEN); 325 326 l->l_flag = L_INMEM; 327 l->l_stat = LSONPROC; 328 p->p_nrlwps = 1; 329 330 callout_init(&l->l_tsleep_ch); 331 332 /* Create credentials. */ 333 cred0.p_refcnt = 1; 334 p->p_cred = &cred0; 335 p->p_ucred = crget(); 336 p->p_ucred->cr_ngroups = 1; /* group 0 */ 337 338 /* Create the CWD info. */ 339 p->p_cwdi = &cwdi0; 340 cwdi0.cwdi_cmask = cmask; 341 cwdi0.cwdi_refcnt = 1; 342 simple_lock_init(&cwdi0.cwdi_slock); 343 344 /* Create the limits structures. */ 345 p->p_limit = &limit0; 346 simple_lock_init(&limit0.p_slock); 347 for (i = 0; i < sizeof(p->p_rlimit)/sizeof(p->p_rlimit[0]); i++) 348 limit0.pl_rlimit[i].rlim_cur = 349 limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY; 350 351 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles; 352 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur = 353 maxfiles < nofile ? maxfiles : nofile; 354 355 limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc; 356 limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur = 357 maxproc < maxuprc ? maxproc : maxuprc; 358 359 lim = ptoa(uvmexp.free); 360 limit0.pl_rlimit[RLIMIT_RSS].rlim_max = lim; 361 limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_max = lim; 362 limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = lim / 3; 363 limit0.pl_corename = defcorename; 364 limit0.p_refcnt = 1; 365 366 /* Configure virtual memory system, set vm rlimits. */ 367 uvm_init_limits(p); 368 369 /* Initialize file descriptor table for proc0. */ 370 p->p_fd = &filedesc0.fd_fd; 371 fdinit1(&filedesc0); 372 373 /* 374 * Initialize proc0's vmspace, which uses the kernel pmap. 375 * All kernel processes (which never have user space mappings) 376 * share proc0's vmspace, and thus, the kernel pmap. 377 */ 378 uvmspace_init(&vmspace0, pmap_kernel(), round_page(VM_MIN_ADDRESS), 379 trunc_page(VM_MAX_ADDRESS)); 380 p->p_vmspace = &vmspace0; 381 382 l->l_addr = proc0paddr; /* XXX */ 383 384 p->p_stats = &pstat0; 385 386 /* Initialize signal state for proc0. */ 387 p->p_sigacts = &sigacts0; 388 siginit(p); 389 } 390 391 /* 392 * Acquire a read lock on the proclist. 393 */ 394 void 395 proclist_lock_read(void) 396 { 397 int error; 398 399 error = spinlockmgr(&proclist_lock, LK_SHARED, NULL); 400 #ifdef DIAGNOSTIC 401 if (__predict_false(error != 0)) 402 panic("proclist_lock_read: failed to acquire lock"); 403 #endif 404 } 405 406 /* 407 * Release a read lock on the proclist. 408 */ 409 void 410 proclist_unlock_read(void) 411 { 412 413 (void) spinlockmgr(&proclist_lock, LK_RELEASE, NULL); 414 } 415 416 /* 417 * Acquire a write lock on the proclist. 418 */ 419 int 420 proclist_lock_write(void) 421 { 422 int s, error; 423 424 s = splclock(); 425 error = spinlockmgr(&proclist_lock, LK_EXCLUSIVE, NULL); 426 #ifdef DIAGNOSTIC 427 if (__predict_false(error != 0)) 428 panic("proclist_lock: failed to acquire lock"); 429 #endif 430 return s; 431 } 432 433 /* 434 * Release a write lock on the proclist. 435 */ 436 void 437 proclist_unlock_write(int s) 438 { 439 440 (void) spinlockmgr(&proclist_lock, LK_RELEASE, NULL); 441 splx(s); 442 } 443 444 /* 445 * Check that the specified process group is in the session of the 446 * specified process. 447 * Treats -ve ids as process ids. 448 * Used to validate TIOCSPGRP requests. 449 */ 450 int 451 pgid_in_session(struct proc *p, pid_t pg_id) 452 { 453 struct pgrp *pgrp; 454 455 if (pg_id < 0) { 456 struct proc *p1 = pfind(-pg_id); 457 if (p1 == NULL) 458 return EINVAL; 459 pgrp = p1->p_pgrp; 460 } else { 461 pgrp = pgfind(pg_id); 462 if (pgrp == NULL) 463 return EINVAL; 464 } 465 if (pgrp->pg_session != p->p_pgrp->pg_session) 466 return EPERM; 467 return 0; 468 } 469 470 /* 471 * Is p an inferior of q? 472 */ 473 int 474 inferior(struct proc *p, struct proc *q) 475 { 476 477 for (; p != q; p = p->p_pptr) 478 if (p->p_pid == 0) 479 return 0; 480 return 1; 481 } 482 483 /* 484 * Locate a process by number 485 */ 486 struct proc * 487 p_find(pid_t pid, uint flags) 488 { 489 struct proc *p; 490 char stat; 491 492 if (!(flags & PFIND_LOCKED)) 493 proclist_lock_read(); 494 p = pid_table[pid & pid_tbl_mask].pt_proc; 495 /* Only allow live processes to be found by pid. */ 496 if (P_VALID(p) && p->p_pid == pid && 497 ((stat = p->p_stat) == SACTIVE || stat == SSTOP 498 || (stat == SZOMB && (flags & PFIND_ZOMBIE)))) { 499 if (flags & PFIND_UNLOCK_OK) 500 proclist_unlock_read(); 501 return p; 502 } 503 if (flags & PFIND_UNLOCK_FAIL) 504 proclist_unlock_read(); 505 return NULL; 506 } 507 508 509 /* 510 * Locate a process group by number 511 */ 512 struct pgrp * 513 pg_find(pid_t pgid, uint flags) 514 { 515 struct pgrp *pg; 516 517 if (!(flags & PFIND_LOCKED)) 518 proclist_lock_read(); 519 pg = pid_table[pgid & pid_tbl_mask].pt_pgrp; 520 /* 521 * Can't look up a pgrp that only exists because the session 522 * hasn't died yet (traditional) 523 */ 524 if (pg == NULL || pg->pg_id != pgid || LIST_EMPTY(&pg->pg_members)) { 525 if (flags & PFIND_UNLOCK_FAIL) 526 proclist_unlock_read(); 527 return NULL; 528 } 529 530 if (flags & PFIND_UNLOCK_OK) 531 proclist_unlock_read(); 532 return pg; 533 } 534 535 static void 536 expand_pid_table(void) 537 { 538 uint pt_size = pid_tbl_mask + 1; 539 struct pid_table *n_pt, *new_pt; 540 struct proc *proc; 541 struct pgrp *pgrp; 542 int i; 543 int s; 544 pid_t pid; 545 546 new_pt = malloc(pt_size * 2 * sizeof *new_pt, M_PROC, M_WAITOK); 547 548 s = proclist_lock_write(); 549 if (pt_size != pid_tbl_mask + 1) { 550 /* Another process beat us to it... */ 551 proclist_unlock_write(s); 552 FREE(new_pt, M_PROC); 553 return; 554 } 555 556 /* 557 * Copy entries from old table into new one. 558 * If 'pid' is 'odd' we need to place in the upper half, 559 * even pid's to the lower half. 560 * Free items stay in the low half so we don't have to 561 * fixup the reference to them. 562 * We stuff free items on the front of the freelist 563 * because we can't write to unmodified entries. 564 * Processing the table backwards maintains a semblance 565 * of issueing pid numbers that increase with time. 566 */ 567 i = pt_size - 1; 568 n_pt = new_pt + i; 569 for (; ; i--, n_pt--) { 570 proc = pid_table[i].pt_proc; 571 pgrp = pid_table[i].pt_pgrp; 572 if (!P_VALID(proc)) { 573 /* Up 'use count' so that link is valid */ 574 pid = (P_NEXT(proc) + pt_size) & ~pt_size; 575 proc = P_FREE(pid); 576 if (pgrp) 577 pid = pgrp->pg_id; 578 } else 579 pid = proc->p_pid; 580 581 /* Save entry in appropriate half of table */ 582 n_pt[pid & pt_size].pt_proc = proc; 583 n_pt[pid & pt_size].pt_pgrp = pgrp; 584 585 /* Put other piece on start of free list */ 586 pid = (pid ^ pt_size) & ~pid_tbl_mask; 587 n_pt[pid & pt_size].pt_proc = 588 P_FREE((pid & ~pt_size) | next_free_pt); 589 n_pt[pid & pt_size].pt_pgrp = 0; 590 next_free_pt = i | (pid & pt_size); 591 if (i == 0) 592 break; 593 } 594 595 /* Switch tables */ 596 n_pt = pid_table; 597 pid_table = new_pt; 598 pid_tbl_mask = pt_size * 2 - 1; 599 600 /* 601 * pid_max starts as PID_MAX (= 30000), once we have 16384 602 * allocated pids we need it to be larger! 603 */ 604 if (pid_tbl_mask > PID_MAX) { 605 pid_max = pid_tbl_mask * 2 + 1; 606 pid_alloc_lim |= pid_alloc_lim << 1; 607 } else 608 pid_alloc_lim <<= 1; /* doubles number of free slots... */ 609 610 proclist_unlock_write(s); 611 FREE(n_pt, M_PROC); 612 } 613 614 struct proc * 615 proc_alloc(void) 616 { 617 struct proc *p; 618 int s; 619 int nxt; 620 pid_t pid; 621 struct pid_table *pt; 622 623 p = pool_get(&proc_pool, PR_WAITOK); 624 p->p_stat = SIDL; /* protect against others */ 625 626 /* allocate next free pid */ 627 628 for (;;expand_pid_table()) { 629 if (__predict_false(pid_alloc_cnt >= pid_alloc_lim)) 630 /* ensure pids cycle through 2000+ values */ 631 continue; 632 s = proclist_lock_write(); 633 pt = &pid_table[next_free_pt]; 634 #ifdef DIAGNOSTIC 635 if (__predict_false(P_VALID(pt->pt_proc) || pt->pt_pgrp)) 636 panic("proc_alloc: slot busy"); 637 #endif 638 nxt = P_NEXT(pt->pt_proc); 639 if (nxt & pid_tbl_mask) 640 break; 641 /* Table full - expand (NB last entry not used....) */ 642 proclist_unlock_write(s); 643 } 644 645 /* pid is 'saved use count' + 'size' + entry */ 646 pid = (nxt & ~pid_tbl_mask) + pid_tbl_mask + 1 + next_free_pt; 647 if ((uint)pid > (uint)pid_max) 648 pid &= pid_tbl_mask; 649 p->p_pid = pid; 650 next_free_pt = nxt & pid_tbl_mask; 651 652 /* Grab table slot */ 653 pt->pt_proc = p; 654 pid_alloc_cnt++; 655 656 proclist_unlock_write(s); 657 658 return p; 659 } 660 661 /* 662 * Free last resources of a process - called from proc_free (in kern_exit.c) 663 */ 664 void 665 proc_free_mem(struct proc *p) 666 { 667 int s; 668 pid_t pid = p->p_pid; 669 struct pid_table *pt; 670 671 s = proclist_lock_write(); 672 673 pt = &pid_table[pid & pid_tbl_mask]; 674 #ifdef DIAGNOSTIC 675 if (__predict_false(pt->pt_proc != p)) 676 panic("proc_free: pid_table mismatch, pid %x, proc %p", 677 pid, p); 678 #endif 679 /* save pid use count in slot */ 680 pt->pt_proc = P_FREE(pid & ~pid_tbl_mask); 681 682 if (pt->pt_pgrp == NULL) { 683 /* link last freed entry onto ours */ 684 pid &= pid_tbl_mask; 685 pt = &pid_table[last_free_pt]; 686 pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pid); 687 last_free_pt = pid; 688 pid_alloc_cnt--; 689 } 690 691 nprocs--; 692 proclist_unlock_write(s); 693 694 pool_put(&proc_pool, p); 695 } 696 697 /* 698 * Move p to a new or existing process group (and session) 699 * 700 * If we are creating a new pgrp, the pgid should equal 701 * the calling process' pid. 702 * If is only valid to enter a process group that is in the session 703 * of the process. 704 * Also mksess should only be set if we are creating a process group 705 * 706 * Only called from sys_setsid, sys_setpgid/sys_setpgrp and the 707 * SYSV setpgrp support for hpux == enterpgrp(curproc, curproc->p_pid) 708 */ 709 int 710 enterpgrp(struct proc *p, pid_t pgid, int mksess) 711 { 712 struct pgrp *new_pgrp, *pgrp; 713 struct session *sess; 714 struct proc *curp = curproc; 715 pid_t pid = p->p_pid; 716 int rval; 717 int s; 718 pid_t pg_id = NO_PGID; 719 720 /* Allocate data areas we might need before doing any validity checks */ 721 proclist_lock_read(); /* Because pid_table might change */ 722 if (pid_table[pgid & pid_tbl_mask].pt_pgrp == 0) { 723 proclist_unlock_read(); 724 new_pgrp = pool_get(&pgrp_pool, PR_WAITOK); 725 } else { 726 proclist_unlock_read(); 727 new_pgrp = NULL; 728 } 729 if (mksess) 730 sess = pool_get(&session_pool, M_WAITOK); 731 else 732 sess = NULL; 733 734 s = proclist_lock_write(); 735 rval = EPERM; /* most common error (to save typing) */ 736 737 /* Check pgrp exists or can be created */ 738 pgrp = pid_table[pgid & pid_tbl_mask].pt_pgrp; 739 if (pgrp != NULL && pgrp->pg_id != pgid) 740 goto done; 741 742 /* Can only set another process under restricted circumstances. */ 743 if (p != curp) { 744 /* must exist and be one of our children... */ 745 if (p != pid_table[pid & pid_tbl_mask].pt_proc 746 || !inferior(p, curp)) { 747 rval = ESRCH; 748 goto done; 749 } 750 /* ... in the same session... */ 751 if (sess != NULL || p->p_session != curp->p_session) 752 goto done; 753 /* ... existing pgid must be in same session ... */ 754 if (pgrp != NULL && pgrp->pg_session != p->p_session) 755 goto done; 756 /* ... and not done an exec. */ 757 if (p->p_flag & P_EXEC) { 758 rval = EACCES; 759 goto done; 760 } 761 } 762 763 /* Changing the process group/session of a session 764 leader is definitely off limits. */ 765 if (SESS_LEADER(p)) { 766 if (sess == NULL && p->p_pgrp == pgrp) 767 /* unless it's a definite noop */ 768 rval = 0; 769 goto done; 770 } 771 772 /* Can only create a process group with id of process */ 773 if (pgrp == NULL && pgid != pid) 774 goto done; 775 776 /* Can only create a session if creating pgrp */ 777 if (sess != NULL && pgrp != NULL) 778 goto done; 779 780 /* Check we allocated memory for a pgrp... */ 781 if (pgrp == NULL && new_pgrp == NULL) 782 goto done; 783 784 /* Don't attach to 'zombie' pgrp */ 785 if (pgrp != NULL && LIST_EMPTY(&pgrp->pg_members)) 786 goto done; 787 788 /* Expect to succeed now */ 789 rval = 0; 790 791 if (pgrp == p->p_pgrp) 792 /* nothing to do */ 793 goto done; 794 795 /* Ok all setup, link up required structures */ 796 if (pgrp == NULL) { 797 pgrp = new_pgrp; 798 new_pgrp = 0; 799 if (sess != NULL) { 800 sess->s_sid = p->p_pid; 801 sess->s_leader = p; 802 sess->s_count = 1; 803 sess->s_ttyvp = NULL; 804 sess->s_ttyp = NULL; 805 sess->s_flags = p->p_session->s_flags & ~S_LOGIN_SET; 806 memcpy(sess->s_login, p->p_session->s_login, 807 sizeof(sess->s_login)); 808 p->p_flag &= ~P_CONTROLT; 809 } else { 810 sess = p->p_pgrp->pg_session; 811 SESSHOLD(sess); 812 } 813 pgrp->pg_session = sess; 814 sess = 0; 815 816 pgrp->pg_id = pgid; 817 LIST_INIT(&pgrp->pg_members); 818 #ifdef DIAGNOSTIC 819 if (__predict_false(pid_table[pgid & pid_tbl_mask].pt_pgrp)) 820 panic("enterpgrp: pgrp table slot in use"); 821 if (__predict_false(mksess && p != curp)) 822 panic("enterpgrp: mksession and p != curproc"); 823 #endif 824 pid_table[pgid & pid_tbl_mask].pt_pgrp = pgrp; 825 pgrp->pg_jobc = 0; 826 } 827 828 /* 829 * Adjust eligibility of affected pgrps to participate in job control. 830 * Increment eligibility counts before decrementing, otherwise we 831 * could reach 0 spuriously during the first call. 832 */ 833 fixjobc(p, pgrp, 1); 834 fixjobc(p, p->p_pgrp, 0); 835 836 /* Move process to requested group */ 837 LIST_REMOVE(p, p_pglist); 838 if (LIST_EMPTY(&p->p_pgrp->pg_members)) 839 /* defer delete until we've dumped the lock */ 840 pg_id = p->p_pgrp->pg_id; 841 p->p_pgrp = pgrp; 842 LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist); 843 844 done: 845 proclist_unlock_write(s); 846 if (sess != NULL) 847 pool_put(&session_pool, sess); 848 if (new_pgrp != NULL) 849 pool_put(&pgrp_pool, new_pgrp); 850 if (pg_id != NO_PGID) 851 pg_delete(pg_id); 852 #ifdef DEBUG_PGRP 853 if (__predict_false(rval)) 854 printf("enterpgrp(%d,%d,%d), curproc %d, rval %d\n", 855 pid, pgid, mksess, curp->p_pid, rval); 856 #endif 857 return rval; 858 } 859 860 /* 861 * remove process from process group 862 */ 863 int 864 leavepgrp(struct proc *p) 865 { 866 int s; 867 struct pgrp *pgrp; 868 pid_t pg_id; 869 870 s = proclist_lock_write(); 871 pgrp = p->p_pgrp; 872 LIST_REMOVE(p, p_pglist); 873 p->p_pgrp = 0; 874 pg_id = LIST_EMPTY(&pgrp->pg_members) ? pgrp->pg_id : NO_PGID; 875 proclist_unlock_write(s); 876 877 if (pg_id != NO_PGID) 878 pg_delete(pg_id); 879 return 0; 880 } 881 882 static void 883 pg_free(pid_t pg_id) 884 { 885 struct pgrp *pgrp; 886 struct pid_table *pt; 887 int s; 888 889 s = proclist_lock_write(); 890 pt = &pid_table[pg_id & pid_tbl_mask]; 891 pgrp = pt->pt_pgrp; 892 #ifdef DIAGNOSTIC 893 if (__predict_false(!pgrp || pgrp->pg_id != pg_id 894 || !LIST_EMPTY(&pgrp->pg_members))) 895 panic("pg_free: process group absent or has members"); 896 #endif 897 pt->pt_pgrp = 0; 898 899 if (!P_VALID(pt->pt_proc)) { 900 /* orphaned pgrp, put slot onto free list */ 901 #ifdef DIAGNOSTIC 902 if (__predict_false(P_NEXT(pt->pt_proc) & pid_tbl_mask)) 903 panic("pg_free: process slot on free list"); 904 #endif 905 906 pg_id &= pid_tbl_mask; 907 pt = &pid_table[last_free_pt]; 908 pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pg_id); 909 last_free_pt = pg_id; 910 pid_alloc_cnt--; 911 } 912 proclist_unlock_write(s); 913 914 pool_put(&pgrp_pool, pgrp); 915 } 916 917 /* 918 * delete a process group 919 */ 920 static void 921 pg_delete(pid_t pg_id) 922 { 923 struct pgrp *pgrp; 924 struct tty *ttyp; 925 struct session *ss; 926 int s, is_pgrp_leader; 927 928 s = proclist_lock_write(); 929 pgrp = pid_table[pg_id & pid_tbl_mask].pt_pgrp; 930 if (pgrp == NULL || pgrp->pg_id != pg_id || 931 !LIST_EMPTY(&pgrp->pg_members)) { 932 proclist_unlock_write(s); 933 return; 934 } 935 936 ss = pgrp->pg_session; 937 938 /* Remove reference (if any) from tty to this process group */ 939 ttyp = ss->s_ttyp; 940 if (ttyp != NULL && ttyp->t_pgrp == pgrp) { 941 ttyp->t_pgrp = NULL; 942 #ifdef DIAGNOSTIC 943 if (ttyp->t_session != ss) 944 panic("pg_delete: wrong session on terminal"); 945 #endif 946 } 947 948 /* 949 * The leading process group in a session is freed 950 * by sessdelete() if last reference. 951 */ 952 is_pgrp_leader = (ss->s_sid == pgrp->pg_id); 953 proclist_unlock_write(s); 954 SESSRELE(ss); 955 956 if (is_pgrp_leader) 957 return; 958 959 pg_free(pg_id); 960 } 961 962 /* 963 * Delete session - called from SESSRELE when s_count becomes zero. 964 */ 965 void 966 sessdelete(struct session *ss) 967 { 968 /* 969 * We keep the pgrp with the same id as the session in 970 * order to stop a process being given the same pid. 971 * Since the pgrp holds a reference to the session, it 972 * must be a 'zombie' pgrp by now. 973 */ 974 975 pg_free(ss->s_sid); 976 977 pool_put(&session_pool, ss); 978 } 979 980 /* 981 * Adjust pgrp jobc counters when specified process changes process group. 982 * We count the number of processes in each process group that "qualify" 983 * the group for terminal job control (those with a parent in a different 984 * process group of the same session). If that count reaches zero, the 985 * process group becomes orphaned. Check both the specified process' 986 * process group and that of its children. 987 * entering == 0 => p is leaving specified group. 988 * entering == 1 => p is entering specified group. 989 * 990 * Call with proclist_lock held. 991 */ 992 void 993 fixjobc(struct proc *p, struct pgrp *pgrp, int entering) 994 { 995 struct pgrp *hispgrp; 996 struct session *mysession = pgrp->pg_session; 997 struct proc *child; 998 999 /* 1000 * Check p's parent to see whether p qualifies its own process 1001 * group; if so, adjust count for p's process group. 1002 */ 1003 hispgrp = p->p_pptr->p_pgrp; 1004 if (hispgrp != pgrp && hispgrp->pg_session == mysession) { 1005 if (entering) 1006 pgrp->pg_jobc++; 1007 else if (--pgrp->pg_jobc == 0) 1008 orphanpg(pgrp); 1009 } 1010 1011 /* 1012 * Check this process' children to see whether they qualify 1013 * their process groups; if so, adjust counts for children's 1014 * process groups. 1015 */ 1016 LIST_FOREACH(child, &p->p_children, p_sibling) { 1017 hispgrp = child->p_pgrp; 1018 if (hispgrp != pgrp && hispgrp->pg_session == mysession && 1019 !P_ZOMBIE(child)) { 1020 if (entering) 1021 hispgrp->pg_jobc++; 1022 else if (--hispgrp->pg_jobc == 0) 1023 orphanpg(hispgrp); 1024 } 1025 } 1026 } 1027 1028 /* 1029 * A process group has become orphaned; 1030 * if there are any stopped processes in the group, 1031 * hang-up all process in that group. 1032 * 1033 * Call with proclist_lock held. 1034 */ 1035 static void 1036 orphanpg(struct pgrp *pg) 1037 { 1038 struct proc *p; 1039 1040 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 1041 if (p->p_stat == SSTOP) { 1042 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 1043 psignal(p, SIGHUP); 1044 psignal(p, SIGCONT); 1045 } 1046 return; 1047 } 1048 } 1049 } 1050 1051 /* mark process as suid/sgid, reset some values to defaults */ 1052 void 1053 p_sugid(struct proc *p) 1054 { 1055 struct plimit *lim; 1056 char *cn; 1057 1058 p->p_flag |= P_SUGID; 1059 /* reset what needs to be reset in plimit */ 1060 lim = p->p_limit; 1061 if (lim->pl_corename != defcorename) { 1062 if (lim->p_refcnt > 1 && 1063 (lim->p_lflags & PL_SHAREMOD) == 0) { 1064 p->p_limit = limcopy(lim); 1065 limfree(lim); 1066 lim = p->p_limit; 1067 } 1068 simple_lock(&lim->p_slock); 1069 cn = lim->pl_corename; 1070 lim->pl_corename = defcorename; 1071 simple_unlock(&lim->p_slock); 1072 if (cn != defcorename) 1073 free(cn, M_TEMP); 1074 } 1075 } 1076 1077 #ifdef DDB 1078 #include <ddb/db_output.h> 1079 void pidtbl_dump(void); 1080 void 1081 pidtbl_dump(void) 1082 { 1083 struct pid_table *pt; 1084 struct proc *p; 1085 struct pgrp *pgrp; 1086 int id; 1087 1088 db_printf("pid table %p size %x, next %x, last %x\n", 1089 pid_table, pid_tbl_mask+1, 1090 next_free_pt, last_free_pt); 1091 for (pt = pid_table, id = 0; id <= pid_tbl_mask; id++, pt++) { 1092 p = pt->pt_proc; 1093 if (!P_VALID(p) && !pt->pt_pgrp) 1094 continue; 1095 db_printf(" id %x: ", id); 1096 if (P_VALID(p)) 1097 db_printf("proc %p id %d (0x%x) %s\n", 1098 p, p->p_pid, p->p_pid, p->p_comm); 1099 else 1100 db_printf("next %x use %x\n", 1101 P_NEXT(p) & pid_tbl_mask, 1102 P_NEXT(p) & ~pid_tbl_mask); 1103 if ((pgrp = pt->pt_pgrp)) { 1104 db_printf("\tsession %p, sid %d, count %d, login %s\n", 1105 pgrp->pg_session, pgrp->pg_session->s_sid, 1106 pgrp->pg_session->s_count, 1107 pgrp->pg_session->s_login); 1108 db_printf("\tpgrp %p, pg_id %d, pg_jobc %d, members %p\n", 1109 pgrp, pgrp->pg_id, pgrp->pg_jobc, 1110 pgrp->pg_members.lh_first); 1111 for (p = pgrp->pg_members.lh_first; p != 0; 1112 p = p->p_pglist.le_next) { 1113 db_printf("\t\tpid %d addr %p pgrp %p %s\n", 1114 p->p_pid, p, p->p_pgrp, p->p_comm); 1115 } 1116 } 1117 } 1118 } 1119 #endif /* DDB */ 1120 1121 #ifdef KSTACK_CHECK_MAGIC 1122 #include <sys/user.h> 1123 1124 #define KSTACK_MAGIC 0xdeadbeaf 1125 1126 /* XXX should be per process basis? */ 1127 int kstackleftmin = KSTACK_SIZE; 1128 int kstackleftthres = KSTACK_SIZE / 8; /* warn if remaining stack is 1129 less than this */ 1130 1131 void 1132 kstack_setup_magic(const struct lwp *l) 1133 { 1134 uint32_t *ip; 1135 uint32_t const *end; 1136 1137 KASSERT(l != NULL); 1138 KASSERT(l != &lwp0); 1139 1140 /* 1141 * fill all the stack with magic number 1142 * so that later modification on it can be detected. 1143 */ 1144 ip = (uint32_t *)KSTACK_LOWEST_ADDR(l); 1145 end = (uint32_t *)((caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE); 1146 for (; ip < end; ip++) { 1147 *ip = KSTACK_MAGIC; 1148 } 1149 } 1150 1151 void 1152 kstack_check_magic(const struct lwp *l) 1153 { 1154 uint32_t const *ip, *end; 1155 int stackleft; 1156 1157 KASSERT(l != NULL); 1158 1159 /* don't check proc0 */ /*XXX*/ 1160 if (l == &lwp0) 1161 return; 1162 1163 #ifdef __MACHINE_STACK_GROWS_UP 1164 /* stack grows upwards (eg. hppa) */ 1165 ip = (uint32_t *)((caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE); 1166 end = (uint32_t *)KSTACK_LOWEST_ADDR(l); 1167 for (ip--; ip >= end; ip--) 1168 if (*ip != KSTACK_MAGIC) 1169 break; 1170 1171 stackleft = (caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE - (caddr_t)ip; 1172 #else /* __MACHINE_STACK_GROWS_UP */ 1173 /* stack grows downwards (eg. i386) */ 1174 ip = (uint32_t *)KSTACK_LOWEST_ADDR(l); 1175 end = (uint32_t *)((caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE); 1176 for (; ip < end; ip++) 1177 if (*ip != KSTACK_MAGIC) 1178 break; 1179 1180 stackleft = (caddr_t)ip - KSTACK_LOWEST_ADDR(l); 1181 #endif /* __MACHINE_STACK_GROWS_UP */ 1182 1183 if (kstackleftmin > stackleft) { 1184 kstackleftmin = stackleft; 1185 if (stackleft < kstackleftthres) 1186 printf("warning: kernel stack left %d bytes" 1187 "(pid %u:lid %u)\n", stackleft, 1188 (u_int)l->l_proc->p_pid, (u_int)l->l_lid); 1189 } 1190 1191 if (stackleft <= 0) { 1192 panic("magic on the top of kernel stack changed for " 1193 "pid %u, lid %u: maybe kernel stack overflow", 1194 (u_int)l->l_proc->p_pid, (u_int)l->l_lid); 1195 } 1196 } 1197 #endif /* KSTACK_CHECK_MAGIC */ 1198 1199 /* XXX shouldn't be here */ 1200 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG) 1201 #define PROCLIST_ASSERT_LOCKED_READ() \ 1202 KASSERT(lockstatus(&proclist_lock) == LK_SHARED) 1203 #else 1204 #define PROCLIST_ASSERT_LOCKED_READ() /* nothing */ 1205 #endif 1206 1207 int 1208 proclist_foreach_call(struct proclist *list, 1209 int (*callback)(struct proc *, void *arg), void *arg) 1210 { 1211 struct proc marker; 1212 struct proc *p; 1213 struct lwp * const l = curlwp; 1214 int ret = 0; 1215 1216 marker.p_flag = P_MARKER; 1217 PHOLD(l); 1218 proclist_lock_read(); 1219 for (p = LIST_FIRST(list); ret == 0 && p != NULL;) { 1220 if (p->p_flag & P_MARKER) { 1221 p = LIST_NEXT(p, p_list); 1222 continue; 1223 } 1224 LIST_INSERT_AFTER(p, &marker, p_list); 1225 ret = (*callback)(p, arg); 1226 PROCLIST_ASSERT_LOCKED_READ(); 1227 p = LIST_NEXT(&marker, p_list); 1228 LIST_REMOVE(&marker, p_list); 1229 } 1230 proclist_unlock_read(); 1231 PRELE(l); 1232 1233 return ret; 1234 } 1235 1236 int 1237 proc_vmspace_getref(struct proc *p, struct vmspace **vm) 1238 { 1239 1240 /* XXXCDC: how should locking work here? */ 1241 1242 /* curproc exception is for coredump. */ 1243 1244 if ((p != curproc && (p->p_flag & P_WEXIT) != 0) || 1245 (p->p_vmspace->vm_refcnt < 1)) { /* XXX */ 1246 return EFAULT; 1247 } 1248 1249 uvmspace_addref(p->p_vmspace); 1250 *vm = p->p_vmspace; 1251 1252 return 0; 1253 } 1254