1 /* $NetBSD: kern_proc.c,v 1.82 2005/08/05 11:05:44 junyoung Exp $ */ 2 3 /*- 4 * Copyright (c) 1999 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1982, 1986, 1989, 1991, 1993 42 * The Regents of the University of California. All rights reserved. 43 * 44 * Redistribution and use in source and binary forms, with or without 45 * modification, are permitted provided that the following conditions 46 * are met: 47 * 1. Redistributions of source code must retain the above copyright 48 * notice, this list of conditions and the following disclaimer. 49 * 2. Redistributions in binary form must reproduce the above copyright 50 * notice, this list of conditions and the following disclaimer in the 51 * documentation and/or other materials provided with the distribution. 52 * 3. Neither the name of the University nor the names of its contributors 53 * may be used to endorse or promote products derived from this software 54 * without specific prior written permission. 55 * 56 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 57 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 58 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 59 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 60 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 61 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 62 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 64 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 65 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 66 * SUCH DAMAGE. 67 * 68 * @(#)kern_proc.c 8.7 (Berkeley) 2/14/95 69 */ 70 71 #include <sys/cdefs.h> 72 __KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.82 2005/08/05 11:05:44 junyoung Exp $"); 73 74 #include "opt_kstack.h" 75 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/kernel.h> 79 #include <sys/proc.h> 80 #include <sys/resourcevar.h> 81 #include <sys/buf.h> 82 #include <sys/acct.h> 83 #include <sys/wait.h> 84 #include <sys/file.h> 85 #include <ufs/ufs/quota.h> 86 #include <sys/uio.h> 87 #include <sys/malloc.h> 88 #include <sys/pool.h> 89 #include <sys/mbuf.h> 90 #include <sys/ioctl.h> 91 #include <sys/tty.h> 92 #include <sys/signalvar.h> 93 #include <sys/ras.h> 94 #include <sys/sa.h> 95 #include <sys/savar.h> 96 #include <sys/filedesc.h> 97 98 #include <uvm/uvm.h> 99 #include <uvm/uvm_extern.h> 100 101 /* 102 * Other process lists 103 */ 104 105 struct proclist allproc; 106 struct proclist zombproc; /* resources have been freed */ 107 108 109 /* 110 * Process list locking: 111 * 112 * We have two types of locks on the proclists: read locks and write 113 * locks. Read locks can be used in interrupt context, so while we 114 * hold the write lock, we must also block clock interrupts to 115 * lock out any scheduling changes that may happen in interrupt 116 * context. 117 * 118 * The proclist lock locks the following structures: 119 * 120 * allproc 121 * zombproc 122 * pid_table 123 */ 124 struct lock proclist_lock; 125 126 /* 127 * pid to proc lookup is done by indexing the pid_table array. 128 * Since pid numbers are only allocated when an empty slot 129 * has been found, there is no need to search any lists ever. 130 * (an orphaned pgrp will lock the slot, a session will lock 131 * the pgrp with the same number.) 132 * If the table is too small it is reallocated with twice the 133 * previous size and the entries 'unzipped' into the two halves. 134 * A linked list of free entries is passed through the pt_proc 135 * field of 'free' items - set odd to be an invalid ptr. 136 */ 137 138 struct pid_table { 139 struct proc *pt_proc; 140 struct pgrp *pt_pgrp; 141 }; 142 #if 1 /* strongly typed cast - should be a noop */ 143 static __inline uint p2u(struct proc *p) { return (uint)(uintptr_t)p; } 144 #else 145 #define p2u(p) ((uint)p) 146 #endif 147 #define P_VALID(p) (!(p2u(p) & 1)) 148 #define P_NEXT(p) (p2u(p) >> 1) 149 #define P_FREE(pid) ((struct proc *)(uintptr_t)((pid) << 1 | 1)) 150 151 #define INITIAL_PID_TABLE_SIZE (1 << 5) 152 static struct pid_table *pid_table; 153 static uint pid_tbl_mask = INITIAL_PID_TABLE_SIZE - 1; 154 static uint pid_alloc_lim; /* max we allocate before growing table */ 155 static uint pid_alloc_cnt; /* number of allocated pids */ 156 157 /* links through free slots - never empty! */ 158 static uint next_free_pt, last_free_pt; 159 static pid_t pid_max = PID_MAX; /* largest value we allocate */ 160 161 /* Components of the first process -- never freed. */ 162 struct session session0; 163 struct pgrp pgrp0; 164 struct proc proc0; 165 struct lwp lwp0; 166 struct pcred cred0; 167 struct filedesc0 filedesc0; 168 struct cwdinfo cwdi0; 169 struct plimit limit0; 170 struct pstats pstat0; 171 struct vmspace vmspace0; 172 struct sigacts sigacts0; 173 174 extern struct user *proc0paddr; 175 176 extern const struct emul emul_netbsd; /* defined in kern_exec.c */ 177 178 int nofile = NOFILE; 179 int maxuprc = MAXUPRC; 180 int cmask = CMASK; 181 182 POOL_INIT(proc_pool, sizeof(struct proc), 0, 0, 0, "procpl", 183 &pool_allocator_nointr); 184 POOL_INIT(lwp_pool, sizeof(struct lwp), 0, 0, 0, "lwppl", 185 &pool_allocator_nointr); 186 POOL_INIT(lwp_uc_pool, sizeof(ucontext_t), 0, 0, 0, "lwpucpl", 187 &pool_allocator_nointr); 188 POOL_INIT(pgrp_pool, sizeof(struct pgrp), 0, 0, 0, "pgrppl", 189 &pool_allocator_nointr); 190 POOL_INIT(pcred_pool, sizeof(struct pcred), 0, 0, 0, "pcredpl", 191 &pool_allocator_nointr); 192 POOL_INIT(plimit_pool, sizeof(struct plimit), 0, 0, 0, "plimitpl", 193 &pool_allocator_nointr); 194 POOL_INIT(pstats_pool, sizeof(struct pstats), 0, 0, 0, "pstatspl", 195 &pool_allocator_nointr); 196 POOL_INIT(rusage_pool, sizeof(struct rusage), 0, 0, 0, "rusgepl", 197 &pool_allocator_nointr); 198 POOL_INIT(ras_pool, sizeof(struct ras), 0, 0, 0, "raspl", 199 &pool_allocator_nointr); 200 POOL_INIT(sadata_pool, sizeof(struct sadata), 0, 0, 0, "sadatapl", 201 &pool_allocator_nointr); 202 POOL_INIT(saupcall_pool, sizeof(struct sadata_upcall), 0, 0, 0, "saupcpl", 203 &pool_allocator_nointr); 204 POOL_INIT(sastack_pool, sizeof(struct sastack), 0, 0, 0, "sastackpl", 205 &pool_allocator_nointr); 206 POOL_INIT(savp_pool, sizeof(struct sadata_vp), 0, 0, 0, "savppl", 207 &pool_allocator_nointr); 208 POOL_INIT(ptimer_pool, sizeof(struct ptimer), 0, 0, 0, "ptimerpl", 209 &pool_allocator_nointr); 210 POOL_INIT(session_pool, sizeof(struct session), 0, 0, 0, "sessionpl", 211 &pool_allocator_nointr); 212 213 MALLOC_DEFINE(M_EMULDATA, "emuldata", "Per-process emulation data"); 214 MALLOC_DEFINE(M_PROC, "proc", "Proc structures"); 215 MALLOC_DEFINE(M_SUBPROC, "subproc", "Proc sub-structures"); 216 217 /* 218 * The process list descriptors, used during pid allocation and 219 * by sysctl. No locking on this data structure is needed since 220 * it is completely static. 221 */ 222 const struct proclist_desc proclists[] = { 223 { &allproc }, 224 { &zombproc }, 225 { NULL }, 226 }; 227 228 static void orphanpg(struct pgrp *); 229 static void pg_delete(pid_t); 230 231 /* 232 * Initialize global process hashing structures. 233 */ 234 void 235 procinit(void) 236 { 237 const struct proclist_desc *pd; 238 int i; 239 #define LINK_EMPTY ((PID_MAX + INITIAL_PID_TABLE_SIZE) & ~(INITIAL_PID_TABLE_SIZE - 1)) 240 241 for (pd = proclists; pd->pd_list != NULL; pd++) 242 LIST_INIT(pd->pd_list); 243 244 spinlockinit(&proclist_lock, "proclk", 0); 245 246 pid_table = malloc(INITIAL_PID_TABLE_SIZE * sizeof *pid_table, 247 M_PROC, M_WAITOK); 248 /* Set free list running through table... 249 Preset 'use count' above PID_MAX so we allocate pid 1 next. */ 250 for (i = 0; i <= pid_tbl_mask; i++) { 251 pid_table[i].pt_proc = P_FREE(LINK_EMPTY + i + 1); 252 pid_table[i].pt_pgrp = 0; 253 } 254 /* slot 0 is just grabbed */ 255 next_free_pt = 1; 256 /* Need to fix last entry. */ 257 last_free_pt = pid_tbl_mask; 258 pid_table[last_free_pt].pt_proc = P_FREE(LINK_EMPTY); 259 /* point at which we grow table - to avoid reusing pids too often */ 260 pid_alloc_lim = pid_tbl_mask - 1; 261 #undef LINK_EMPTY 262 263 LIST_INIT(&alllwp); 264 265 uihashtbl = 266 hashinit(maxproc / 16, HASH_LIST, M_PROC, M_WAITOK, &uihash); 267 } 268 269 /* 270 * Initialize process 0. 271 */ 272 void 273 proc0_init(void) 274 { 275 struct proc *p; 276 struct pgrp *pg; 277 struct session *sess; 278 struct lwp *l; 279 int s; 280 u_int i; 281 rlim_t lim; 282 283 p = &proc0; 284 pg = &pgrp0; 285 sess = &session0; 286 l = &lwp0; 287 288 simple_lock_init(&p->p_lock); 289 LIST_INIT(&p->p_lwps); 290 LIST_INSERT_HEAD(&p->p_lwps, l, l_sibling); 291 p->p_nlwps = 1; 292 simple_lock_init(&p->p_sigctx.ps_silock); 293 CIRCLEQ_INIT(&p->p_sigctx.ps_siginfo); 294 295 s = proclist_lock_write(); 296 297 pid_table[0].pt_proc = p; 298 LIST_INSERT_HEAD(&allproc, p, p_list); 299 LIST_INSERT_HEAD(&alllwp, l, l_list); 300 301 p->p_pgrp = pg; 302 pid_table[0].pt_pgrp = pg; 303 LIST_INIT(&pg->pg_members); 304 LIST_INSERT_HEAD(&pg->pg_members, p, p_pglist); 305 306 pg->pg_session = sess; 307 sess->s_count = 1; 308 sess->s_sid = 0; 309 sess->s_leader = p; 310 311 proclist_unlock_write(s); 312 313 /* 314 * Set P_NOCLDWAIT so that kernel threads are reparented to 315 * init(8) when they exit. init(8) can easily wait them out 316 * for us. 317 */ 318 p->p_flag = P_SYSTEM | P_NOCLDWAIT; 319 p->p_stat = SACTIVE; 320 p->p_nice = NZERO; 321 p->p_emul = &emul_netbsd; 322 #ifdef __HAVE_SYSCALL_INTERN 323 (*p->p_emul->e_syscall_intern)(p); 324 #endif 325 strncpy(p->p_comm, "swapper", MAXCOMLEN); 326 327 l->l_flag = L_INMEM; 328 l->l_stat = LSONPROC; 329 p->p_nrlwps = 1; 330 331 callout_init(&l->l_tsleep_ch); 332 333 /* Create credentials. */ 334 cred0.p_refcnt = 1; 335 p->p_cred = &cred0; 336 p->p_ucred = crget(); 337 p->p_ucred->cr_ngroups = 1; /* group 0 */ 338 339 /* Create the CWD info. */ 340 p->p_cwdi = &cwdi0; 341 cwdi0.cwdi_cmask = cmask; 342 cwdi0.cwdi_refcnt = 1; 343 simple_lock_init(&cwdi0.cwdi_slock); 344 345 /* Create the limits structures. */ 346 p->p_limit = &limit0; 347 simple_lock_init(&limit0.p_slock); 348 for (i = 0; i < sizeof(p->p_rlimit)/sizeof(p->p_rlimit[0]); i++) 349 limit0.pl_rlimit[i].rlim_cur = 350 limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY; 351 352 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles; 353 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur = 354 maxfiles < nofile ? maxfiles : nofile; 355 356 limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc; 357 limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur = 358 maxproc < maxuprc ? maxproc : maxuprc; 359 360 lim = ptoa(uvmexp.free); 361 limit0.pl_rlimit[RLIMIT_RSS].rlim_max = lim; 362 limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_max = lim; 363 limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = lim / 3; 364 limit0.pl_corename = defcorename; 365 limit0.p_refcnt = 1; 366 367 /* Configure virtual memory system, set vm rlimits. */ 368 uvm_init_limits(p); 369 370 /* Initialize file descriptor table for proc0. */ 371 p->p_fd = &filedesc0.fd_fd; 372 fdinit1(&filedesc0); 373 374 /* 375 * Initialize proc0's vmspace, which uses the kernel pmap. 376 * All kernel processes (which never have user space mappings) 377 * share proc0's vmspace, and thus, the kernel pmap. 378 */ 379 uvmspace_init(&vmspace0, pmap_kernel(), round_page(VM_MIN_ADDRESS), 380 trunc_page(VM_MAX_ADDRESS)); 381 p->p_vmspace = &vmspace0; 382 383 l->l_addr = proc0paddr; /* XXX */ 384 385 p->p_stats = &pstat0; 386 387 /* Initialize signal state for proc0. */ 388 p->p_sigacts = &sigacts0; 389 siginit(p); 390 } 391 392 /* 393 * Acquire a read lock on the proclist. 394 */ 395 void 396 proclist_lock_read(void) 397 { 398 int error; 399 400 error = spinlockmgr(&proclist_lock, LK_SHARED, NULL); 401 #ifdef DIAGNOSTIC 402 if (__predict_false(error != 0)) 403 panic("proclist_lock_read: failed to acquire lock"); 404 #endif 405 } 406 407 /* 408 * Release a read lock on the proclist. 409 */ 410 void 411 proclist_unlock_read(void) 412 { 413 414 (void) spinlockmgr(&proclist_lock, LK_RELEASE, NULL); 415 } 416 417 /* 418 * Acquire a write lock on the proclist. 419 */ 420 int 421 proclist_lock_write(void) 422 { 423 int s, error; 424 425 s = splclock(); 426 error = spinlockmgr(&proclist_lock, LK_EXCLUSIVE, NULL); 427 #ifdef DIAGNOSTIC 428 if (__predict_false(error != 0)) 429 panic("proclist_lock: failed to acquire lock"); 430 #endif 431 return s; 432 } 433 434 /* 435 * Release a write lock on the proclist. 436 */ 437 void 438 proclist_unlock_write(int s) 439 { 440 441 (void) spinlockmgr(&proclist_lock, LK_RELEASE, NULL); 442 splx(s); 443 } 444 445 /* 446 * Check that the specified process group is in the session of the 447 * specified process. 448 * Treats -ve ids as process ids. 449 * Used to validate TIOCSPGRP requests. 450 */ 451 int 452 pgid_in_session(struct proc *p, pid_t pg_id) 453 { 454 struct pgrp *pgrp; 455 456 if (pg_id < 0) { 457 struct proc *p1 = pfind(-pg_id); 458 if (p1 == NULL) 459 return EINVAL; 460 pgrp = p1->p_pgrp; 461 } else { 462 pgrp = pgfind(pg_id); 463 if (pgrp == NULL) 464 return EINVAL; 465 } 466 if (pgrp->pg_session != p->p_pgrp->pg_session) 467 return EPERM; 468 return 0; 469 } 470 471 /* 472 * Is p an inferior of q? 473 */ 474 int 475 inferior(struct proc *p, struct proc *q) 476 { 477 478 for (; p != q; p = p->p_pptr) 479 if (p->p_pid == 0) 480 return 0; 481 return 1; 482 } 483 484 /* 485 * Locate a process by number 486 */ 487 struct proc * 488 p_find(pid_t pid, uint flags) 489 { 490 struct proc *p; 491 char stat; 492 493 if (!(flags & PFIND_LOCKED)) 494 proclist_lock_read(); 495 p = pid_table[pid & pid_tbl_mask].pt_proc; 496 /* Only allow live processes to be found by pid. */ 497 if (P_VALID(p) && p->p_pid == pid && 498 ((stat = p->p_stat) == SACTIVE || stat == SSTOP 499 || (stat == SZOMB && (flags & PFIND_ZOMBIE)))) { 500 if (flags & PFIND_UNLOCK_OK) 501 proclist_unlock_read(); 502 return p; 503 } 504 if (flags & PFIND_UNLOCK_FAIL) 505 proclist_unlock_read(); 506 return NULL; 507 } 508 509 510 /* 511 * Locate a process group by number 512 */ 513 struct pgrp * 514 pg_find(pid_t pgid, uint flags) 515 { 516 struct pgrp *pg; 517 518 if (!(flags & PFIND_LOCKED)) 519 proclist_lock_read(); 520 pg = pid_table[pgid & pid_tbl_mask].pt_pgrp; 521 /* 522 * Can't look up a pgrp that only exists because the session 523 * hasn't died yet (traditional) 524 */ 525 if (pg == NULL || pg->pg_id != pgid || LIST_EMPTY(&pg->pg_members)) { 526 if (flags & PFIND_UNLOCK_FAIL) 527 proclist_unlock_read(); 528 return NULL; 529 } 530 531 if (flags & PFIND_UNLOCK_OK) 532 proclist_unlock_read(); 533 return pg; 534 } 535 536 static void 537 expand_pid_table(void) 538 { 539 uint pt_size = pid_tbl_mask + 1; 540 struct pid_table *n_pt, *new_pt; 541 struct proc *proc; 542 struct pgrp *pgrp; 543 int i; 544 int s; 545 pid_t pid; 546 547 new_pt = malloc(pt_size * 2 * sizeof *new_pt, M_PROC, M_WAITOK); 548 549 s = proclist_lock_write(); 550 if (pt_size != pid_tbl_mask + 1) { 551 /* Another process beat us to it... */ 552 proclist_unlock_write(s); 553 FREE(new_pt, M_PROC); 554 return; 555 } 556 557 /* 558 * Copy entries from old table into new one. 559 * If 'pid' is 'odd' we need to place in the upper half, 560 * even pid's to the lower half. 561 * Free items stay in the low half so we don't have to 562 * fixup the reference to them. 563 * We stuff free items on the front of the freelist 564 * because we can't write to unmodified entries. 565 * Processing the table backwards maintains a semblance 566 * of issueing pid numbers that increase with time. 567 */ 568 i = pt_size - 1; 569 n_pt = new_pt + i; 570 for (; ; i--, n_pt--) { 571 proc = pid_table[i].pt_proc; 572 pgrp = pid_table[i].pt_pgrp; 573 if (!P_VALID(proc)) { 574 /* Up 'use count' so that link is valid */ 575 pid = (P_NEXT(proc) + pt_size) & ~pt_size; 576 proc = P_FREE(pid); 577 if (pgrp) 578 pid = pgrp->pg_id; 579 } else 580 pid = proc->p_pid; 581 582 /* Save entry in appropriate half of table */ 583 n_pt[pid & pt_size].pt_proc = proc; 584 n_pt[pid & pt_size].pt_pgrp = pgrp; 585 586 /* Put other piece on start of free list */ 587 pid = (pid ^ pt_size) & ~pid_tbl_mask; 588 n_pt[pid & pt_size].pt_proc = 589 P_FREE((pid & ~pt_size) | next_free_pt); 590 n_pt[pid & pt_size].pt_pgrp = 0; 591 next_free_pt = i | (pid & pt_size); 592 if (i == 0) 593 break; 594 } 595 596 /* Switch tables */ 597 n_pt = pid_table; 598 pid_table = new_pt; 599 pid_tbl_mask = pt_size * 2 - 1; 600 601 /* 602 * pid_max starts as PID_MAX (= 30000), once we have 16384 603 * allocated pids we need it to be larger! 604 */ 605 if (pid_tbl_mask > PID_MAX) { 606 pid_max = pid_tbl_mask * 2 + 1; 607 pid_alloc_lim |= pid_alloc_lim << 1; 608 } else 609 pid_alloc_lim <<= 1; /* doubles number of free slots... */ 610 611 proclist_unlock_write(s); 612 FREE(n_pt, M_PROC); 613 } 614 615 struct proc * 616 proc_alloc(void) 617 { 618 struct proc *p; 619 int s; 620 int nxt; 621 pid_t pid; 622 struct pid_table *pt; 623 624 p = pool_get(&proc_pool, PR_WAITOK); 625 p->p_stat = SIDL; /* protect against others */ 626 627 /* allocate next free pid */ 628 629 for (;;expand_pid_table()) { 630 if (__predict_false(pid_alloc_cnt >= pid_alloc_lim)) 631 /* ensure pids cycle through 2000+ values */ 632 continue; 633 s = proclist_lock_write(); 634 pt = &pid_table[next_free_pt]; 635 #ifdef DIAGNOSTIC 636 if (__predict_false(P_VALID(pt->pt_proc) || pt->pt_pgrp)) 637 panic("proc_alloc: slot busy"); 638 #endif 639 nxt = P_NEXT(pt->pt_proc); 640 if (nxt & pid_tbl_mask) 641 break; 642 /* Table full - expand (NB last entry not used....) */ 643 proclist_unlock_write(s); 644 } 645 646 /* pid is 'saved use count' + 'size' + entry */ 647 pid = (nxt & ~pid_tbl_mask) + pid_tbl_mask + 1 + next_free_pt; 648 if ((uint)pid > (uint)pid_max) 649 pid &= pid_tbl_mask; 650 p->p_pid = pid; 651 next_free_pt = nxt & pid_tbl_mask; 652 653 /* Grab table slot */ 654 pt->pt_proc = p; 655 pid_alloc_cnt++; 656 657 proclist_unlock_write(s); 658 659 return p; 660 } 661 662 /* 663 * Free last resources of a process - called from proc_free (in kern_exit.c) 664 */ 665 void 666 proc_free_mem(struct proc *p) 667 { 668 int s; 669 pid_t pid = p->p_pid; 670 struct pid_table *pt; 671 672 s = proclist_lock_write(); 673 674 pt = &pid_table[pid & pid_tbl_mask]; 675 #ifdef DIAGNOSTIC 676 if (__predict_false(pt->pt_proc != p)) 677 panic("proc_free: pid_table mismatch, pid %x, proc %p", 678 pid, p); 679 #endif 680 /* save pid use count in slot */ 681 pt->pt_proc = P_FREE(pid & ~pid_tbl_mask); 682 683 if (pt->pt_pgrp == NULL) { 684 /* link last freed entry onto ours */ 685 pid &= pid_tbl_mask; 686 pt = &pid_table[last_free_pt]; 687 pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pid); 688 last_free_pt = pid; 689 pid_alloc_cnt--; 690 } 691 692 nprocs--; 693 proclist_unlock_write(s); 694 695 pool_put(&proc_pool, p); 696 } 697 698 /* 699 * Move p to a new or existing process group (and session) 700 * 701 * If we are creating a new pgrp, the pgid should equal 702 * the calling process' pid. 703 * If is only valid to enter a process group that is in the session 704 * of the process. 705 * Also mksess should only be set if we are creating a process group 706 * 707 * Only called from sys_setsid, sys_setpgid/sys_setpgrp and the 708 * SYSV setpgrp support for hpux == enterpgrp(curproc, curproc->p_pid) 709 */ 710 int 711 enterpgrp(struct proc *p, pid_t pgid, int mksess) 712 { 713 struct pgrp *new_pgrp, *pgrp; 714 struct session *sess; 715 struct proc *curp = curproc; 716 pid_t pid = p->p_pid; 717 int rval; 718 int s; 719 pid_t pg_id = NO_PGID; 720 721 /* Allocate data areas we might need before doing any validity checks */ 722 proclist_lock_read(); /* Because pid_table might change */ 723 if (pid_table[pgid & pid_tbl_mask].pt_pgrp == 0) { 724 proclist_unlock_read(); 725 new_pgrp = pool_get(&pgrp_pool, PR_WAITOK); 726 } else { 727 proclist_unlock_read(); 728 new_pgrp = NULL; 729 } 730 if (mksess) 731 sess = pool_get(&session_pool, M_WAITOK); 732 else 733 sess = NULL; 734 735 s = proclist_lock_write(); 736 rval = EPERM; /* most common error (to save typing) */ 737 738 /* Check pgrp exists or can be created */ 739 pgrp = pid_table[pgid & pid_tbl_mask].pt_pgrp; 740 if (pgrp != NULL && pgrp->pg_id != pgid) 741 goto done; 742 743 /* Can only set another process under restricted circumstances. */ 744 if (p != curp) { 745 /* must exist and be one of our children... */ 746 if (p != pid_table[pid & pid_tbl_mask].pt_proc 747 || !inferior(p, curp)) { 748 rval = ESRCH; 749 goto done; 750 } 751 /* ... in the same session... */ 752 if (sess != NULL || p->p_session != curp->p_session) 753 goto done; 754 /* ... existing pgid must be in same session ... */ 755 if (pgrp != NULL && pgrp->pg_session != p->p_session) 756 goto done; 757 /* ... and not done an exec. */ 758 if (p->p_flag & P_EXEC) { 759 rval = EACCES; 760 goto done; 761 } 762 } 763 764 /* Changing the process group/session of a session 765 leader is definitely off limits. */ 766 if (SESS_LEADER(p)) { 767 if (sess == NULL && p->p_pgrp == pgrp) 768 /* unless it's a definite noop */ 769 rval = 0; 770 goto done; 771 } 772 773 /* Can only create a process group with id of process */ 774 if (pgrp == NULL && pgid != pid) 775 goto done; 776 777 /* Can only create a session if creating pgrp */ 778 if (sess != NULL && pgrp != NULL) 779 goto done; 780 781 /* Check we allocated memory for a pgrp... */ 782 if (pgrp == NULL && new_pgrp == NULL) 783 goto done; 784 785 /* Don't attach to 'zombie' pgrp */ 786 if (pgrp != NULL && LIST_EMPTY(&pgrp->pg_members)) 787 goto done; 788 789 /* Expect to succeed now */ 790 rval = 0; 791 792 if (pgrp == p->p_pgrp) 793 /* nothing to do */ 794 goto done; 795 796 /* Ok all setup, link up required structures */ 797 if (pgrp == NULL) { 798 pgrp = new_pgrp; 799 new_pgrp = 0; 800 if (sess != NULL) { 801 sess->s_sid = p->p_pid; 802 sess->s_leader = p; 803 sess->s_count = 1; 804 sess->s_ttyvp = NULL; 805 sess->s_ttyp = NULL; 806 sess->s_flags = p->p_session->s_flags & ~S_LOGIN_SET; 807 memcpy(sess->s_login, p->p_session->s_login, 808 sizeof(sess->s_login)); 809 p->p_flag &= ~P_CONTROLT; 810 } else { 811 sess = p->p_pgrp->pg_session; 812 SESSHOLD(sess); 813 } 814 pgrp->pg_session = sess; 815 sess = 0; 816 817 pgrp->pg_id = pgid; 818 LIST_INIT(&pgrp->pg_members); 819 #ifdef DIAGNOSTIC 820 if (__predict_false(pid_table[pgid & pid_tbl_mask].pt_pgrp)) 821 panic("enterpgrp: pgrp table slot in use"); 822 if (__predict_false(mksess && p != curp)) 823 panic("enterpgrp: mksession and p != curproc"); 824 #endif 825 pid_table[pgid & pid_tbl_mask].pt_pgrp = pgrp; 826 pgrp->pg_jobc = 0; 827 } 828 829 /* 830 * Adjust eligibility of affected pgrps to participate in job control. 831 * Increment eligibility counts before decrementing, otherwise we 832 * could reach 0 spuriously during the first call. 833 */ 834 fixjobc(p, pgrp, 1); 835 fixjobc(p, p->p_pgrp, 0); 836 837 /* Move process to requested group */ 838 LIST_REMOVE(p, p_pglist); 839 if (LIST_EMPTY(&p->p_pgrp->pg_members)) 840 /* defer delete until we've dumped the lock */ 841 pg_id = p->p_pgrp->pg_id; 842 p->p_pgrp = pgrp; 843 LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist); 844 845 done: 846 proclist_unlock_write(s); 847 if (sess != NULL) 848 pool_put(&session_pool, sess); 849 if (new_pgrp != NULL) 850 pool_put(&pgrp_pool, new_pgrp); 851 if (pg_id != NO_PGID) 852 pg_delete(pg_id); 853 #ifdef DEBUG_PGRP 854 if (__predict_false(rval)) 855 printf("enterpgrp(%d,%d,%d), curproc %d, rval %d\n", 856 pid, pgid, mksess, curp->p_pid, rval); 857 #endif 858 return rval; 859 } 860 861 /* 862 * remove process from process group 863 */ 864 int 865 leavepgrp(struct proc *p) 866 { 867 int s; 868 struct pgrp *pgrp; 869 pid_t pg_id; 870 871 s = proclist_lock_write(); 872 pgrp = p->p_pgrp; 873 LIST_REMOVE(p, p_pglist); 874 p->p_pgrp = 0; 875 pg_id = LIST_EMPTY(&pgrp->pg_members) ? pgrp->pg_id : NO_PGID; 876 proclist_unlock_write(s); 877 878 if (pg_id != NO_PGID) 879 pg_delete(pg_id); 880 return 0; 881 } 882 883 static void 884 pg_free(pid_t pg_id) 885 { 886 struct pgrp *pgrp; 887 struct pid_table *pt; 888 int s; 889 890 s = proclist_lock_write(); 891 pt = &pid_table[pg_id & pid_tbl_mask]; 892 pgrp = pt->pt_pgrp; 893 #ifdef DIAGNOSTIC 894 if (__predict_false(!pgrp || pgrp->pg_id != pg_id 895 || !LIST_EMPTY(&pgrp->pg_members))) 896 panic("pg_free: process group absent or has members"); 897 #endif 898 pt->pt_pgrp = 0; 899 900 if (!P_VALID(pt->pt_proc)) { 901 /* orphaned pgrp, put slot onto free list */ 902 #ifdef DIAGNOSTIC 903 if (__predict_false(P_NEXT(pt->pt_proc) & pid_tbl_mask)) 904 panic("pg_free: process slot on free list"); 905 #endif 906 907 pg_id &= pid_tbl_mask; 908 pt = &pid_table[last_free_pt]; 909 pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pg_id); 910 last_free_pt = pg_id; 911 pid_alloc_cnt--; 912 } 913 proclist_unlock_write(s); 914 915 pool_put(&pgrp_pool, pgrp); 916 } 917 918 /* 919 * delete a process group 920 */ 921 static void 922 pg_delete(pid_t pg_id) 923 { 924 struct pgrp *pgrp; 925 struct tty *ttyp; 926 struct session *ss; 927 int s, is_pgrp_leader; 928 929 s = proclist_lock_write(); 930 pgrp = pid_table[pg_id & pid_tbl_mask].pt_pgrp; 931 if (pgrp == NULL || pgrp->pg_id != pg_id || 932 !LIST_EMPTY(&pgrp->pg_members)) { 933 proclist_unlock_write(s); 934 return; 935 } 936 937 ss = pgrp->pg_session; 938 939 /* Remove reference (if any) from tty to this process group */ 940 ttyp = ss->s_ttyp; 941 if (ttyp != NULL && ttyp->t_pgrp == pgrp) { 942 ttyp->t_pgrp = NULL; 943 #ifdef DIAGNOSTIC 944 if (ttyp->t_session != ss) 945 panic("pg_delete: wrong session on terminal"); 946 #endif 947 } 948 949 /* 950 * The leading process group in a session is freed 951 * by sessdelete() if last reference. 952 */ 953 is_pgrp_leader = (ss->s_sid == pgrp->pg_id); 954 proclist_unlock_write(s); 955 SESSRELE(ss); 956 957 if (is_pgrp_leader) 958 return; 959 960 pg_free(pg_id); 961 } 962 963 /* 964 * Delete session - called from SESSRELE when s_count becomes zero. 965 */ 966 void 967 sessdelete(struct session *ss) 968 { 969 /* 970 * We keep the pgrp with the same id as the session in 971 * order to stop a process being given the same pid. 972 * Since the pgrp holds a reference to the session, it 973 * must be a 'zombie' pgrp by now. 974 */ 975 976 pg_free(ss->s_sid); 977 978 pool_put(&session_pool, ss); 979 } 980 981 /* 982 * Adjust pgrp jobc counters when specified process changes process group. 983 * We count the number of processes in each process group that "qualify" 984 * the group for terminal job control (those with a parent in a different 985 * process group of the same session). If that count reaches zero, the 986 * process group becomes orphaned. Check both the specified process' 987 * process group and that of its children. 988 * entering == 0 => p is leaving specified group. 989 * entering == 1 => p is entering specified group. 990 * 991 * Call with proclist_lock held. 992 */ 993 void 994 fixjobc(struct proc *p, struct pgrp *pgrp, int entering) 995 { 996 struct pgrp *hispgrp; 997 struct session *mysession = pgrp->pg_session; 998 struct proc *child; 999 1000 /* 1001 * Check p's parent to see whether p qualifies its own process 1002 * group; if so, adjust count for p's process group. 1003 */ 1004 hispgrp = p->p_pptr->p_pgrp; 1005 if (hispgrp != pgrp && hispgrp->pg_session == mysession) { 1006 if (entering) 1007 pgrp->pg_jobc++; 1008 else if (--pgrp->pg_jobc == 0) 1009 orphanpg(pgrp); 1010 } 1011 1012 /* 1013 * Check this process' children to see whether they qualify 1014 * their process groups; if so, adjust counts for children's 1015 * process groups. 1016 */ 1017 LIST_FOREACH(child, &p->p_children, p_sibling) { 1018 hispgrp = child->p_pgrp; 1019 if (hispgrp != pgrp && hispgrp->pg_session == mysession && 1020 !P_ZOMBIE(child)) { 1021 if (entering) 1022 hispgrp->pg_jobc++; 1023 else if (--hispgrp->pg_jobc == 0) 1024 orphanpg(hispgrp); 1025 } 1026 } 1027 } 1028 1029 /* 1030 * A process group has become orphaned; 1031 * if there are any stopped processes in the group, 1032 * hang-up all process in that group. 1033 * 1034 * Call with proclist_lock held. 1035 */ 1036 static void 1037 orphanpg(struct pgrp *pg) 1038 { 1039 struct proc *p; 1040 1041 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 1042 if (p->p_stat == SSTOP) { 1043 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 1044 psignal(p, SIGHUP); 1045 psignal(p, SIGCONT); 1046 } 1047 return; 1048 } 1049 } 1050 } 1051 1052 /* mark process as suid/sgid, reset some values to defaults */ 1053 void 1054 p_sugid(struct proc *p) 1055 { 1056 struct plimit *lim; 1057 char *cn; 1058 1059 p->p_flag |= P_SUGID; 1060 /* reset what needs to be reset in plimit */ 1061 lim = p->p_limit; 1062 if (lim->pl_corename != defcorename) { 1063 if (lim->p_refcnt > 1 && 1064 (lim->p_lflags & PL_SHAREMOD) == 0) { 1065 p->p_limit = limcopy(lim); 1066 limfree(lim); 1067 lim = p->p_limit; 1068 } 1069 simple_lock(&lim->p_slock); 1070 cn = lim->pl_corename; 1071 lim->pl_corename = defcorename; 1072 simple_unlock(&lim->p_slock); 1073 if (cn != defcorename) 1074 free(cn, M_TEMP); 1075 } 1076 } 1077 1078 #ifdef DDB 1079 #include <ddb/db_output.h> 1080 void pidtbl_dump(void); 1081 void 1082 pidtbl_dump(void) 1083 { 1084 struct pid_table *pt; 1085 struct proc *p; 1086 struct pgrp *pgrp; 1087 int id; 1088 1089 db_printf("pid table %p size %x, next %x, last %x\n", 1090 pid_table, pid_tbl_mask+1, 1091 next_free_pt, last_free_pt); 1092 for (pt = pid_table, id = 0; id <= pid_tbl_mask; id++, pt++) { 1093 p = pt->pt_proc; 1094 if (!P_VALID(p) && !pt->pt_pgrp) 1095 continue; 1096 db_printf(" id %x: ", id); 1097 if (P_VALID(p)) 1098 db_printf("proc %p id %d (0x%x) %s\n", 1099 p, p->p_pid, p->p_pid, p->p_comm); 1100 else 1101 db_printf("next %x use %x\n", 1102 P_NEXT(p) & pid_tbl_mask, 1103 P_NEXT(p) & ~pid_tbl_mask); 1104 if ((pgrp = pt->pt_pgrp)) { 1105 db_printf("\tsession %p, sid %d, count %d, login %s\n", 1106 pgrp->pg_session, pgrp->pg_session->s_sid, 1107 pgrp->pg_session->s_count, 1108 pgrp->pg_session->s_login); 1109 db_printf("\tpgrp %p, pg_id %d, pg_jobc %d, members %p\n", 1110 pgrp, pgrp->pg_id, pgrp->pg_jobc, 1111 pgrp->pg_members.lh_first); 1112 for (p = pgrp->pg_members.lh_first; p != 0; 1113 p = p->p_pglist.le_next) { 1114 db_printf("\t\tpid %d addr %p pgrp %p %s\n", 1115 p->p_pid, p, p->p_pgrp, p->p_comm); 1116 } 1117 } 1118 } 1119 } 1120 #endif /* DDB */ 1121 1122 #ifdef KSTACK_CHECK_MAGIC 1123 #include <sys/user.h> 1124 1125 #define KSTACK_MAGIC 0xdeadbeaf 1126 1127 /* XXX should be per process basis? */ 1128 int kstackleftmin = KSTACK_SIZE; 1129 int kstackleftthres = KSTACK_SIZE / 8; /* warn if remaining stack is 1130 less than this */ 1131 1132 void 1133 kstack_setup_magic(const struct lwp *l) 1134 { 1135 u_int32_t *ip; 1136 u_int32_t const *end; 1137 1138 KASSERT(l != NULL); 1139 KASSERT(l != &lwp0); 1140 1141 /* 1142 * fill all the stack with magic number 1143 * so that later modification on it can be detected. 1144 */ 1145 ip = (u_int32_t *)KSTACK_LOWEST_ADDR(l); 1146 end = (u_int32_t *)((caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE); 1147 for (; ip < end; ip++) { 1148 *ip = KSTACK_MAGIC; 1149 } 1150 } 1151 1152 void 1153 kstack_check_magic(const struct lwp *l) 1154 { 1155 u_int32_t const *ip, *end; 1156 int stackleft; 1157 1158 KASSERT(l != NULL); 1159 1160 /* don't check proc0 */ /*XXX*/ 1161 if (l == &lwp0) 1162 return; 1163 1164 #ifdef __MACHINE_STACK_GROWS_UP 1165 /* stack grows upwards (eg. hppa) */ 1166 ip = (u_int32_t *)((caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE); 1167 end = (u_int32_t *)KSTACK_LOWEST_ADDR(l); 1168 for (ip--; ip >= end; ip--) 1169 if (*ip != KSTACK_MAGIC) 1170 break; 1171 1172 stackleft = (caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE - (caddr_t)ip; 1173 #else /* __MACHINE_STACK_GROWS_UP */ 1174 /* stack grows downwards (eg. i386) */ 1175 ip = (u_int32_t *)KSTACK_LOWEST_ADDR(l); 1176 end = (u_int32_t *)((caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE); 1177 for (; ip < end; ip++) 1178 if (*ip != KSTACK_MAGIC) 1179 break; 1180 1181 stackleft = (caddr_t)ip - KSTACK_LOWEST_ADDR(l); 1182 #endif /* __MACHINE_STACK_GROWS_UP */ 1183 1184 if (kstackleftmin > stackleft) { 1185 kstackleftmin = stackleft; 1186 if (stackleft < kstackleftthres) 1187 printf("warning: kernel stack left %d bytes" 1188 "(pid %u:lid %u)\n", stackleft, 1189 (u_int)l->l_proc->p_pid, (u_int)l->l_lid); 1190 } 1191 1192 if (stackleft <= 0) { 1193 panic("magic on the top of kernel stack changed for " 1194 "pid %u, lid %u: maybe kernel stack overflow", 1195 (u_int)l->l_proc->p_pid, (u_int)l->l_lid); 1196 } 1197 } 1198 #endif /* KSTACK_CHECK_MAGIC */ 1199 1200 /* XXX shouldn't be here */ 1201 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG) 1202 #define PROCLIST_ASSERT_LOCKED_READ() \ 1203 KASSERT(lockstatus(&proclist_lock) == LK_SHARED) 1204 #else 1205 #define PROCLIST_ASSERT_LOCKED_READ() /* nothing */ 1206 #endif 1207 1208 int 1209 proclist_foreach_call(struct proclist *list, 1210 int (*callback)(struct proc *, void *arg), void *arg) 1211 { 1212 struct proc marker; 1213 struct proc *p; 1214 struct lwp * const l = curlwp; 1215 int ret = 0; 1216 1217 marker.p_flag = P_MARKER; 1218 PHOLD(l); 1219 proclist_lock_read(); 1220 for (p = LIST_FIRST(list); ret == 0 && p != NULL;) { 1221 if (p->p_flag & P_MARKER) { 1222 p = LIST_NEXT(p, p_list); 1223 continue; 1224 } 1225 LIST_INSERT_AFTER(p, &marker, p_list); 1226 ret = (*callback)(p, arg); 1227 PROCLIST_ASSERT_LOCKED_READ(); 1228 p = LIST_NEXT(&marker, p_list); 1229 LIST_REMOVE(&marker, p_list); 1230 } 1231 proclist_unlock_read(); 1232 PRELE(l); 1233 1234 return ret; 1235 } 1236