1 /* $NetBSD: kern_proc.c,v 1.193 2014/07/12 09:57:25 njoly Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2006, 2007, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center, and by Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1982, 1986, 1989, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)kern_proc.c 8.7 (Berkeley) 2/14/95 62 */ 63 64 #include <sys/cdefs.h> 65 __KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.193 2014/07/12 09:57:25 njoly Exp $"); 66 67 #ifdef _KERNEL_OPT 68 #include "opt_kstack.h" 69 #include "opt_maxuprc.h" 70 #include "opt_dtrace.h" 71 #include "opt_compat_netbsd32.h" 72 #endif 73 74 #include <sys/param.h> 75 #include <sys/systm.h> 76 #include <sys/kernel.h> 77 #include <sys/proc.h> 78 #include <sys/resourcevar.h> 79 #include <sys/buf.h> 80 #include <sys/acct.h> 81 #include <sys/wait.h> 82 #include <sys/file.h> 83 #include <ufs/ufs/quota.h> 84 #include <sys/uio.h> 85 #include <sys/pool.h> 86 #include <sys/pset.h> 87 #include <sys/mbuf.h> 88 #include <sys/ioctl.h> 89 #include <sys/tty.h> 90 #include <sys/signalvar.h> 91 #include <sys/ras.h> 92 #include <sys/filedesc.h> 93 #include <sys/syscall_stats.h> 94 #include <sys/kauth.h> 95 #include <sys/sleepq.h> 96 #include <sys/atomic.h> 97 #include <sys/kmem.h> 98 #include <sys/dtrace_bsd.h> 99 #include <sys/sysctl.h> 100 #include <sys/exec.h> 101 #include <sys/cpu.h> 102 103 #include <uvm/uvm_extern.h> 104 105 #ifdef COMPAT_NETBSD32 106 #include <compat/netbsd32/netbsd32.h> 107 #endif 108 109 /* 110 * Process lists. 111 */ 112 113 struct proclist allproc __cacheline_aligned; 114 struct proclist zombproc __cacheline_aligned; 115 116 kmutex_t * proc_lock __cacheline_aligned; 117 118 /* 119 * pid to proc lookup is done by indexing the pid_table array. 120 * Since pid numbers are only allocated when an empty slot 121 * has been found, there is no need to search any lists ever. 122 * (an orphaned pgrp will lock the slot, a session will lock 123 * the pgrp with the same number.) 124 * If the table is too small it is reallocated with twice the 125 * previous size and the entries 'unzipped' into the two halves. 126 * A linked list of free entries is passed through the pt_proc 127 * field of 'free' items - set odd to be an invalid ptr. 128 */ 129 130 struct pid_table { 131 struct proc *pt_proc; 132 struct pgrp *pt_pgrp; 133 pid_t pt_pid; 134 }; 135 #if 1 /* strongly typed cast - should be a noop */ 136 static inline uint p2u(struct proc *p) { return (uint)(uintptr_t)p; } 137 #else 138 #define p2u(p) ((uint)p) 139 #endif 140 #define P_VALID(p) (!(p2u(p) & 1)) 141 #define P_NEXT(p) (p2u(p) >> 1) 142 #define P_FREE(pid) ((struct proc *)(uintptr_t)((pid) << 1 | 1)) 143 144 /* 145 * Table of process IDs (PIDs). 146 */ 147 static struct pid_table *pid_table __read_mostly; 148 149 #define INITIAL_PID_TABLE_SIZE (1 << 5) 150 151 /* Table mask, threshold for growing and number of allocated PIDs. */ 152 static u_int pid_tbl_mask __read_mostly; 153 static u_int pid_alloc_lim __read_mostly; 154 static u_int pid_alloc_cnt __cacheline_aligned; 155 156 /* Next free, last free and maximum PIDs. */ 157 static u_int next_free_pt __cacheline_aligned; 158 static u_int last_free_pt __cacheline_aligned; 159 static pid_t pid_max __read_mostly; 160 161 /* Components of the first process -- never freed. */ 162 163 extern struct emul emul_netbsd; /* defined in kern_exec.c */ 164 165 struct session session0 = { 166 .s_count = 1, 167 .s_sid = 0, 168 }; 169 struct pgrp pgrp0 = { 170 .pg_members = LIST_HEAD_INITIALIZER(&pgrp0.pg_members), 171 .pg_session = &session0, 172 }; 173 filedesc_t filedesc0; 174 struct cwdinfo cwdi0 = { 175 .cwdi_cmask = CMASK, 176 .cwdi_refcnt = 1, 177 }; 178 struct plimit limit0; 179 struct pstats pstat0; 180 struct vmspace vmspace0; 181 struct sigacts sigacts0; 182 struct proc proc0 = { 183 .p_lwps = LIST_HEAD_INITIALIZER(&proc0.p_lwps), 184 .p_sigwaiters = LIST_HEAD_INITIALIZER(&proc0.p_sigwaiters), 185 .p_nlwps = 1, 186 .p_nrlwps = 1, 187 .p_nlwpid = 1, /* must match lwp0.l_lid */ 188 .p_pgrp = &pgrp0, 189 .p_comm = "system", 190 /* 191 * Set P_NOCLDWAIT so that kernel threads are reparented to init(8) 192 * when they exit. init(8) can easily wait them out for us. 193 */ 194 .p_flag = PK_SYSTEM | PK_NOCLDWAIT, 195 .p_stat = SACTIVE, 196 .p_nice = NZERO, 197 .p_emul = &emul_netbsd, 198 .p_cwdi = &cwdi0, 199 .p_limit = &limit0, 200 .p_fd = &filedesc0, 201 .p_vmspace = &vmspace0, 202 .p_stats = &pstat0, 203 .p_sigacts = &sigacts0, 204 #ifdef PROC0_MD_INITIALIZERS 205 PROC0_MD_INITIALIZERS 206 #endif 207 }; 208 kauth_cred_t cred0; 209 210 static const int nofile = NOFILE; 211 static const int maxuprc = MAXUPRC; 212 213 static int sysctl_doeproc(SYSCTLFN_PROTO); 214 static int sysctl_kern_proc_args(SYSCTLFN_PROTO); 215 216 /* 217 * The process list descriptors, used during pid allocation and 218 * by sysctl. No locking on this data structure is needed since 219 * it is completely static. 220 */ 221 const struct proclist_desc proclists[] = { 222 { &allproc }, 223 { &zombproc }, 224 { NULL }, 225 }; 226 227 static struct pgrp * pg_remove(pid_t); 228 static void pg_delete(pid_t); 229 static void orphanpg(struct pgrp *); 230 231 static specificdata_domain_t proc_specificdata_domain; 232 233 static pool_cache_t proc_cache; 234 235 static kauth_listener_t proc_listener; 236 237 static int 238 proc_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, 239 void *arg0, void *arg1, void *arg2, void *arg3) 240 { 241 struct proc *p; 242 int result; 243 244 result = KAUTH_RESULT_DEFER; 245 p = arg0; 246 247 switch (action) { 248 case KAUTH_PROCESS_CANSEE: { 249 enum kauth_process_req req; 250 251 req = (enum kauth_process_req)arg1; 252 253 switch (req) { 254 case KAUTH_REQ_PROCESS_CANSEE_ARGS: 255 case KAUTH_REQ_PROCESS_CANSEE_ENTRY: 256 case KAUTH_REQ_PROCESS_CANSEE_OPENFILES: 257 result = KAUTH_RESULT_ALLOW; 258 259 break; 260 261 case KAUTH_REQ_PROCESS_CANSEE_ENV: 262 if (kauth_cred_getuid(cred) != 263 kauth_cred_getuid(p->p_cred) || 264 kauth_cred_getuid(cred) != 265 kauth_cred_getsvuid(p->p_cred)) 266 break; 267 268 result = KAUTH_RESULT_ALLOW; 269 270 break; 271 272 default: 273 break; 274 } 275 276 break; 277 } 278 279 case KAUTH_PROCESS_FORK: { 280 int lnprocs = (int)(unsigned long)arg2; 281 282 /* 283 * Don't allow a nonprivileged user to use the last few 284 * processes. The variable lnprocs is the current number of 285 * processes, maxproc is the limit. 286 */ 287 if (__predict_false((lnprocs >= maxproc - 5))) 288 break; 289 290 result = KAUTH_RESULT_ALLOW; 291 292 break; 293 } 294 295 case KAUTH_PROCESS_CORENAME: 296 case KAUTH_PROCESS_STOPFLAG: 297 if (proc_uidmatch(cred, p->p_cred) == 0) 298 result = KAUTH_RESULT_ALLOW; 299 300 break; 301 302 default: 303 break; 304 } 305 306 return result; 307 } 308 309 /* 310 * Initialize global process hashing structures. 311 */ 312 void 313 procinit(void) 314 { 315 const struct proclist_desc *pd; 316 u_int i; 317 #define LINK_EMPTY ((PID_MAX + INITIAL_PID_TABLE_SIZE) & ~(INITIAL_PID_TABLE_SIZE - 1)) 318 319 for (pd = proclists; pd->pd_list != NULL; pd++) 320 LIST_INIT(pd->pd_list); 321 322 proc_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 323 pid_table = kmem_alloc(INITIAL_PID_TABLE_SIZE 324 * sizeof(struct pid_table), KM_SLEEP); 325 pid_tbl_mask = INITIAL_PID_TABLE_SIZE - 1; 326 pid_max = PID_MAX; 327 328 /* Set free list running through table... 329 Preset 'use count' above PID_MAX so we allocate pid 1 next. */ 330 for (i = 0; i <= pid_tbl_mask; i++) { 331 pid_table[i].pt_proc = P_FREE(LINK_EMPTY + i + 1); 332 pid_table[i].pt_pgrp = 0; 333 pid_table[i].pt_pid = 0; 334 } 335 /* slot 0 is just grabbed */ 336 next_free_pt = 1; 337 /* Need to fix last entry. */ 338 last_free_pt = pid_tbl_mask; 339 pid_table[last_free_pt].pt_proc = P_FREE(LINK_EMPTY); 340 /* point at which we grow table - to avoid reusing pids too often */ 341 pid_alloc_lim = pid_tbl_mask - 1; 342 #undef LINK_EMPTY 343 344 proc_specificdata_domain = specificdata_domain_create(); 345 KASSERT(proc_specificdata_domain != NULL); 346 347 proc_cache = pool_cache_init(sizeof(struct proc), 0, 0, 0, 348 "procpl", NULL, IPL_NONE, NULL, NULL, NULL); 349 350 proc_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS, 351 proc_listener_cb, NULL); 352 } 353 354 void 355 procinit_sysctl(void) 356 { 357 static struct sysctllog *clog; 358 359 sysctl_createv(&clog, 0, NULL, NULL, 360 CTLFLAG_PERMANENT, 361 CTLTYPE_NODE, "proc", 362 SYSCTL_DESCR("System-wide process information"), 363 sysctl_doeproc, 0, NULL, 0, 364 CTL_KERN, KERN_PROC, CTL_EOL); 365 sysctl_createv(&clog, 0, NULL, NULL, 366 CTLFLAG_PERMANENT, 367 CTLTYPE_NODE, "proc2", 368 SYSCTL_DESCR("Machine-independent process information"), 369 sysctl_doeproc, 0, NULL, 0, 370 CTL_KERN, KERN_PROC2, CTL_EOL); 371 sysctl_createv(&clog, 0, NULL, NULL, 372 CTLFLAG_PERMANENT, 373 CTLTYPE_NODE, "proc_args", 374 SYSCTL_DESCR("Process argument information"), 375 sysctl_kern_proc_args, 0, NULL, 0, 376 CTL_KERN, KERN_PROC_ARGS, CTL_EOL); 377 378 /* 379 "nodes" under these: 380 381 KERN_PROC_ALL 382 KERN_PROC_PID pid 383 KERN_PROC_PGRP pgrp 384 KERN_PROC_SESSION sess 385 KERN_PROC_TTY tty 386 KERN_PROC_UID uid 387 KERN_PROC_RUID uid 388 KERN_PROC_GID gid 389 KERN_PROC_RGID gid 390 391 all in all, probably not worth the effort... 392 */ 393 } 394 395 /* 396 * Initialize process 0. 397 */ 398 void 399 proc0_init(void) 400 { 401 struct proc *p; 402 struct pgrp *pg; 403 struct rlimit *rlim; 404 rlim_t lim; 405 int i; 406 407 p = &proc0; 408 pg = &pgrp0; 409 410 mutex_init(&p->p_stmutex, MUTEX_DEFAULT, IPL_HIGH); 411 mutex_init(&p->p_auxlock, MUTEX_DEFAULT, IPL_NONE); 412 p->p_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 413 414 rw_init(&p->p_reflock); 415 cv_init(&p->p_waitcv, "wait"); 416 cv_init(&p->p_lwpcv, "lwpwait"); 417 418 LIST_INSERT_HEAD(&p->p_lwps, &lwp0, l_sibling); 419 420 pid_table[0].pt_proc = p; 421 LIST_INSERT_HEAD(&allproc, p, p_list); 422 423 pid_table[0].pt_pgrp = pg; 424 LIST_INSERT_HEAD(&pg->pg_members, p, p_pglist); 425 426 #ifdef __HAVE_SYSCALL_INTERN 427 (*p->p_emul->e_syscall_intern)(p); 428 #endif 429 430 /* Create credentials. */ 431 cred0 = kauth_cred_alloc(); 432 p->p_cred = cred0; 433 434 /* Create the CWD info. */ 435 rw_init(&cwdi0.cwdi_lock); 436 437 /* Create the limits structures. */ 438 mutex_init(&limit0.pl_lock, MUTEX_DEFAULT, IPL_NONE); 439 440 rlim = limit0.pl_rlimit; 441 for (i = 0; i < __arraycount(limit0.pl_rlimit); i++) { 442 rlim[i].rlim_cur = RLIM_INFINITY; 443 rlim[i].rlim_max = RLIM_INFINITY; 444 } 445 446 rlim[RLIMIT_NOFILE].rlim_max = maxfiles; 447 rlim[RLIMIT_NOFILE].rlim_cur = maxfiles < nofile ? maxfiles : nofile; 448 449 rlim[RLIMIT_NPROC].rlim_max = maxproc; 450 rlim[RLIMIT_NPROC].rlim_cur = maxproc < maxuprc ? maxproc : maxuprc; 451 452 lim = MIN(VM_MAXUSER_ADDRESS, ctob((rlim_t)uvmexp.free)); 453 rlim[RLIMIT_RSS].rlim_max = lim; 454 rlim[RLIMIT_MEMLOCK].rlim_max = lim; 455 rlim[RLIMIT_MEMLOCK].rlim_cur = lim / 3; 456 457 rlim[RLIMIT_NTHR].rlim_max = maxlwp; 458 rlim[RLIMIT_NTHR].rlim_cur = maxlwp < maxuprc ? maxlwp : maxuprc; 459 460 /* Note that default core name has zero length. */ 461 limit0.pl_corename = defcorename; 462 limit0.pl_cnlen = 0; 463 limit0.pl_refcnt = 1; 464 limit0.pl_writeable = false; 465 limit0.pl_sv_limit = NULL; 466 467 /* Configure virtual memory system, set vm rlimits. */ 468 uvm_init_limits(p); 469 470 /* Initialize file descriptor table for proc0. */ 471 fd_init(&filedesc0); 472 473 /* 474 * Initialize proc0's vmspace, which uses the kernel pmap. 475 * All kernel processes (which never have user space mappings) 476 * share proc0's vmspace, and thus, the kernel pmap. 477 */ 478 uvmspace_init(&vmspace0, pmap_kernel(), round_page(VM_MIN_ADDRESS), 479 trunc_page(VM_MAX_ADDRESS), 480 #ifdef __USE_TOPDOWN_VM 481 true 482 #else 483 false 484 #endif 485 ); 486 487 /* Initialize signal state for proc0. XXX IPL_SCHED */ 488 mutex_init(&p->p_sigacts->sa_mutex, MUTEX_DEFAULT, IPL_SCHED); 489 siginit(p); 490 491 proc_initspecific(p); 492 kdtrace_proc_ctor(NULL, p); 493 } 494 495 /* 496 * Session reference counting. 497 */ 498 499 void 500 proc_sesshold(struct session *ss) 501 { 502 503 KASSERT(mutex_owned(proc_lock)); 504 ss->s_count++; 505 } 506 507 void 508 proc_sessrele(struct session *ss) 509 { 510 511 KASSERT(mutex_owned(proc_lock)); 512 /* 513 * We keep the pgrp with the same id as the session in order to 514 * stop a process being given the same pid. Since the pgrp holds 515 * a reference to the session, it must be a 'zombie' pgrp by now. 516 */ 517 if (--ss->s_count == 0) { 518 struct pgrp *pg; 519 520 pg = pg_remove(ss->s_sid); 521 mutex_exit(proc_lock); 522 523 kmem_free(pg, sizeof(struct pgrp)); 524 kmem_free(ss, sizeof(struct session)); 525 } else { 526 mutex_exit(proc_lock); 527 } 528 } 529 530 /* 531 * Check that the specified process group is in the session of the 532 * specified process. 533 * Treats -ve ids as process ids. 534 * Used to validate TIOCSPGRP requests. 535 */ 536 int 537 pgid_in_session(struct proc *p, pid_t pg_id) 538 { 539 struct pgrp *pgrp; 540 struct session *session; 541 int error; 542 543 mutex_enter(proc_lock); 544 if (pg_id < 0) { 545 struct proc *p1 = proc_find(-pg_id); 546 if (p1 == NULL) { 547 error = EINVAL; 548 goto fail; 549 } 550 pgrp = p1->p_pgrp; 551 } else { 552 pgrp = pgrp_find(pg_id); 553 if (pgrp == NULL) { 554 error = EINVAL; 555 goto fail; 556 } 557 } 558 session = pgrp->pg_session; 559 error = (session != p->p_pgrp->pg_session) ? EPERM : 0; 560 fail: 561 mutex_exit(proc_lock); 562 return error; 563 } 564 565 /* 566 * p_inferior: is p an inferior of q? 567 */ 568 static inline bool 569 p_inferior(struct proc *p, struct proc *q) 570 { 571 572 KASSERT(mutex_owned(proc_lock)); 573 574 for (; p != q; p = p->p_pptr) 575 if (p->p_pid == 0) 576 return false; 577 return true; 578 } 579 580 /* 581 * proc_find: locate a process by the ID. 582 * 583 * => Must be called with proc_lock held. 584 */ 585 proc_t * 586 proc_find_raw(pid_t pid) 587 { 588 struct pid_table *pt; 589 proc_t *p; 590 591 KASSERT(mutex_owned(proc_lock)); 592 pt = &pid_table[pid & pid_tbl_mask]; 593 p = pt->pt_proc; 594 if (__predict_false(!P_VALID(p) || pt->pt_pid != pid)) { 595 return NULL; 596 } 597 return p; 598 } 599 600 proc_t * 601 proc_find(pid_t pid) 602 { 603 proc_t *p; 604 605 p = proc_find_raw(pid); 606 if (__predict_false(p == NULL)) { 607 return NULL; 608 } 609 610 /* 611 * Only allow live processes to be found by PID. 612 * XXX: p_stat might change, since unlocked. 613 */ 614 if (__predict_true(p->p_stat == SACTIVE || p->p_stat == SSTOP)) { 615 return p; 616 } 617 return NULL; 618 } 619 620 /* 621 * pgrp_find: locate a process group by the ID. 622 * 623 * => Must be called with proc_lock held. 624 */ 625 struct pgrp * 626 pgrp_find(pid_t pgid) 627 { 628 struct pgrp *pg; 629 630 KASSERT(mutex_owned(proc_lock)); 631 632 pg = pid_table[pgid & pid_tbl_mask].pt_pgrp; 633 634 /* 635 * Cannot look up a process group that only exists because the 636 * session has not died yet (traditional). 637 */ 638 if (pg == NULL || pg->pg_id != pgid || LIST_EMPTY(&pg->pg_members)) { 639 return NULL; 640 } 641 return pg; 642 } 643 644 static void 645 expand_pid_table(void) 646 { 647 size_t pt_size, tsz; 648 struct pid_table *n_pt, *new_pt; 649 struct proc *proc; 650 struct pgrp *pgrp; 651 pid_t pid, rpid; 652 u_int i; 653 uint new_pt_mask; 654 655 pt_size = pid_tbl_mask + 1; 656 tsz = pt_size * 2 * sizeof(struct pid_table); 657 new_pt = kmem_alloc(tsz, KM_SLEEP); 658 new_pt_mask = pt_size * 2 - 1; 659 660 mutex_enter(proc_lock); 661 if (pt_size != pid_tbl_mask + 1) { 662 /* Another process beat us to it... */ 663 mutex_exit(proc_lock); 664 kmem_free(new_pt, tsz); 665 return; 666 } 667 668 /* 669 * Copy entries from old table into new one. 670 * If 'pid' is 'odd' we need to place in the upper half, 671 * even pid's to the lower half. 672 * Free items stay in the low half so we don't have to 673 * fixup the reference to them. 674 * We stuff free items on the front of the freelist 675 * because we can't write to unmodified entries. 676 * Processing the table backwards maintains a semblance 677 * of issuing pid numbers that increase with time. 678 */ 679 i = pt_size - 1; 680 n_pt = new_pt + i; 681 for (; ; i--, n_pt--) { 682 proc = pid_table[i].pt_proc; 683 pgrp = pid_table[i].pt_pgrp; 684 if (!P_VALID(proc)) { 685 /* Up 'use count' so that link is valid */ 686 pid = (P_NEXT(proc) + pt_size) & ~pt_size; 687 rpid = 0; 688 proc = P_FREE(pid); 689 if (pgrp) 690 pid = pgrp->pg_id; 691 } else { 692 pid = pid_table[i].pt_pid; 693 rpid = pid; 694 } 695 696 /* Save entry in appropriate half of table */ 697 n_pt[pid & pt_size].pt_proc = proc; 698 n_pt[pid & pt_size].pt_pgrp = pgrp; 699 n_pt[pid & pt_size].pt_pid = rpid; 700 701 /* Put other piece on start of free list */ 702 pid = (pid ^ pt_size) & ~pid_tbl_mask; 703 n_pt[pid & pt_size].pt_proc = 704 P_FREE((pid & ~pt_size) | next_free_pt); 705 n_pt[pid & pt_size].pt_pgrp = 0; 706 n_pt[pid & pt_size].pt_pid = 0; 707 708 next_free_pt = i | (pid & pt_size); 709 if (i == 0) 710 break; 711 } 712 713 /* Save old table size and switch tables */ 714 tsz = pt_size * sizeof(struct pid_table); 715 n_pt = pid_table; 716 pid_table = new_pt; 717 pid_tbl_mask = new_pt_mask; 718 719 /* 720 * pid_max starts as PID_MAX (= 30000), once we have 16384 721 * allocated pids we need it to be larger! 722 */ 723 if (pid_tbl_mask > PID_MAX) { 724 pid_max = pid_tbl_mask * 2 + 1; 725 pid_alloc_lim |= pid_alloc_lim << 1; 726 } else 727 pid_alloc_lim <<= 1; /* doubles number of free slots... */ 728 729 mutex_exit(proc_lock); 730 kmem_free(n_pt, tsz); 731 } 732 733 struct proc * 734 proc_alloc(void) 735 { 736 struct proc *p; 737 738 p = pool_cache_get(proc_cache, PR_WAITOK); 739 p->p_stat = SIDL; /* protect against others */ 740 proc_initspecific(p); 741 kdtrace_proc_ctor(NULL, p); 742 p->p_pid = -1; 743 proc_alloc_pid(p); 744 return p; 745 } 746 747 /* 748 * proc_alloc_pid: allocate PID and record the given proc 'p' so that 749 * proc_find_raw() can find it by the PID. 750 */ 751 752 pid_t 753 proc_alloc_pid(struct proc *p) 754 { 755 struct pid_table *pt; 756 pid_t pid; 757 int nxt; 758 759 for (;;expand_pid_table()) { 760 if (__predict_false(pid_alloc_cnt >= pid_alloc_lim)) 761 /* ensure pids cycle through 2000+ values */ 762 continue; 763 mutex_enter(proc_lock); 764 pt = &pid_table[next_free_pt]; 765 #ifdef DIAGNOSTIC 766 if (__predict_false(P_VALID(pt->pt_proc) || pt->pt_pgrp)) 767 panic("proc_alloc: slot busy"); 768 #endif 769 nxt = P_NEXT(pt->pt_proc); 770 if (nxt & pid_tbl_mask) 771 break; 772 /* Table full - expand (NB last entry not used....) */ 773 mutex_exit(proc_lock); 774 } 775 776 /* pid is 'saved use count' + 'size' + entry */ 777 pid = (nxt & ~pid_tbl_mask) + pid_tbl_mask + 1 + next_free_pt; 778 if ((uint)pid > (uint)pid_max) 779 pid &= pid_tbl_mask; 780 next_free_pt = nxt & pid_tbl_mask; 781 782 /* Grab table slot */ 783 pt->pt_proc = p; 784 785 KASSERT(pt->pt_pid == 0); 786 pt->pt_pid = pid; 787 if (p->p_pid == -1) { 788 p->p_pid = pid; 789 } 790 pid_alloc_cnt++; 791 mutex_exit(proc_lock); 792 793 return pid; 794 } 795 796 /* 797 * Free a process id - called from proc_free (in kern_exit.c) 798 * 799 * Called with the proc_lock held. 800 */ 801 void 802 proc_free_pid(pid_t pid) 803 { 804 struct pid_table *pt; 805 806 KASSERT(mutex_owned(proc_lock)); 807 808 pt = &pid_table[pid & pid_tbl_mask]; 809 810 /* save pid use count in slot */ 811 pt->pt_proc = P_FREE(pid & ~pid_tbl_mask); 812 KASSERT(pt->pt_pid == pid); 813 pt->pt_pid = 0; 814 815 if (pt->pt_pgrp == NULL) { 816 /* link last freed entry onto ours */ 817 pid &= pid_tbl_mask; 818 pt = &pid_table[last_free_pt]; 819 pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pid); 820 pt->pt_pid = 0; 821 last_free_pt = pid; 822 pid_alloc_cnt--; 823 } 824 825 atomic_dec_uint(&nprocs); 826 } 827 828 void 829 proc_free_mem(struct proc *p) 830 { 831 832 kdtrace_proc_dtor(NULL, p); 833 pool_cache_put(proc_cache, p); 834 } 835 836 /* 837 * proc_enterpgrp: move p to a new or existing process group (and session). 838 * 839 * If we are creating a new pgrp, the pgid should equal 840 * the calling process' pid. 841 * If is only valid to enter a process group that is in the session 842 * of the process. 843 * Also mksess should only be set if we are creating a process group 844 * 845 * Only called from sys_setsid, sys_setpgid and posix_spawn/spawn_return. 846 */ 847 int 848 proc_enterpgrp(struct proc *curp, pid_t pid, pid_t pgid, bool mksess) 849 { 850 struct pgrp *new_pgrp, *pgrp; 851 struct session *sess; 852 struct proc *p; 853 int rval; 854 pid_t pg_id = NO_PGID; 855 856 sess = mksess ? kmem_alloc(sizeof(*sess), KM_SLEEP) : NULL; 857 858 /* Allocate data areas we might need before doing any validity checks */ 859 mutex_enter(proc_lock); /* Because pid_table might change */ 860 if (pid_table[pgid & pid_tbl_mask].pt_pgrp == 0) { 861 mutex_exit(proc_lock); 862 new_pgrp = kmem_alloc(sizeof(*new_pgrp), KM_SLEEP); 863 mutex_enter(proc_lock); 864 } else 865 new_pgrp = NULL; 866 rval = EPERM; /* most common error (to save typing) */ 867 868 /* Check pgrp exists or can be created */ 869 pgrp = pid_table[pgid & pid_tbl_mask].pt_pgrp; 870 if (pgrp != NULL && pgrp->pg_id != pgid) 871 goto done; 872 873 /* Can only set another process under restricted circumstances. */ 874 if (pid != curp->p_pid) { 875 /* Must exist and be one of our children... */ 876 p = proc_find(pid); 877 if (p == NULL || !p_inferior(p, curp)) { 878 rval = ESRCH; 879 goto done; 880 } 881 /* ... in the same session... */ 882 if (sess != NULL || p->p_session != curp->p_session) 883 goto done; 884 /* ... existing pgid must be in same session ... */ 885 if (pgrp != NULL && pgrp->pg_session != p->p_session) 886 goto done; 887 /* ... and not done an exec. */ 888 if (p->p_flag & PK_EXEC) { 889 rval = EACCES; 890 goto done; 891 } 892 } else { 893 /* ... setsid() cannot re-enter a pgrp */ 894 if (mksess && (curp->p_pgid == curp->p_pid || 895 pgrp_find(curp->p_pid))) 896 goto done; 897 p = curp; 898 } 899 900 /* Changing the process group/session of a session 901 leader is definitely off limits. */ 902 if (SESS_LEADER(p)) { 903 if (sess == NULL && p->p_pgrp == pgrp) 904 /* unless it's a definite noop */ 905 rval = 0; 906 goto done; 907 } 908 909 /* Can only create a process group with id of process */ 910 if (pgrp == NULL && pgid != pid) 911 goto done; 912 913 /* Can only create a session if creating pgrp */ 914 if (sess != NULL && pgrp != NULL) 915 goto done; 916 917 /* Check we allocated memory for a pgrp... */ 918 if (pgrp == NULL && new_pgrp == NULL) 919 goto done; 920 921 /* Don't attach to 'zombie' pgrp */ 922 if (pgrp != NULL && LIST_EMPTY(&pgrp->pg_members)) 923 goto done; 924 925 /* Expect to succeed now */ 926 rval = 0; 927 928 if (pgrp == p->p_pgrp) 929 /* nothing to do */ 930 goto done; 931 932 /* Ok all setup, link up required structures */ 933 934 if (pgrp == NULL) { 935 pgrp = new_pgrp; 936 new_pgrp = NULL; 937 if (sess != NULL) { 938 sess->s_sid = p->p_pid; 939 sess->s_leader = p; 940 sess->s_count = 1; 941 sess->s_ttyvp = NULL; 942 sess->s_ttyp = NULL; 943 sess->s_flags = p->p_session->s_flags & ~S_LOGIN_SET; 944 memcpy(sess->s_login, p->p_session->s_login, 945 sizeof(sess->s_login)); 946 p->p_lflag &= ~PL_CONTROLT; 947 } else { 948 sess = p->p_pgrp->pg_session; 949 proc_sesshold(sess); 950 } 951 pgrp->pg_session = sess; 952 sess = NULL; 953 954 pgrp->pg_id = pgid; 955 LIST_INIT(&pgrp->pg_members); 956 #ifdef DIAGNOSTIC 957 if (__predict_false(pid_table[pgid & pid_tbl_mask].pt_pgrp)) 958 panic("enterpgrp: pgrp table slot in use"); 959 if (__predict_false(mksess && p != curp)) 960 panic("enterpgrp: mksession and p != curproc"); 961 #endif 962 pid_table[pgid & pid_tbl_mask].pt_pgrp = pgrp; 963 pgrp->pg_jobc = 0; 964 } 965 966 /* 967 * Adjust eligibility of affected pgrps to participate in job control. 968 * Increment eligibility counts before decrementing, otherwise we 969 * could reach 0 spuriously during the first call. 970 */ 971 fixjobc(p, pgrp, 1); 972 fixjobc(p, p->p_pgrp, 0); 973 974 /* Interlock with ttread(). */ 975 mutex_spin_enter(&tty_lock); 976 977 /* Move process to requested group. */ 978 LIST_REMOVE(p, p_pglist); 979 if (LIST_EMPTY(&p->p_pgrp->pg_members)) 980 /* defer delete until we've dumped the lock */ 981 pg_id = p->p_pgrp->pg_id; 982 p->p_pgrp = pgrp; 983 LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist); 984 985 /* Done with the swap; we can release the tty mutex. */ 986 mutex_spin_exit(&tty_lock); 987 988 done: 989 if (pg_id != NO_PGID) { 990 /* Releases proc_lock. */ 991 pg_delete(pg_id); 992 } else { 993 mutex_exit(proc_lock); 994 } 995 if (sess != NULL) 996 kmem_free(sess, sizeof(*sess)); 997 if (new_pgrp != NULL) 998 kmem_free(new_pgrp, sizeof(*new_pgrp)); 999 #ifdef DEBUG_PGRP 1000 if (__predict_false(rval)) 1001 printf("enterpgrp(%d,%d,%d), curproc %d, rval %d\n", 1002 pid, pgid, mksess, curp->p_pid, rval); 1003 #endif 1004 return rval; 1005 } 1006 1007 /* 1008 * proc_leavepgrp: remove a process from its process group. 1009 * => must be called with the proc_lock held, which will be released; 1010 */ 1011 void 1012 proc_leavepgrp(struct proc *p) 1013 { 1014 struct pgrp *pgrp; 1015 1016 KASSERT(mutex_owned(proc_lock)); 1017 1018 /* Interlock with ttread() */ 1019 mutex_spin_enter(&tty_lock); 1020 pgrp = p->p_pgrp; 1021 LIST_REMOVE(p, p_pglist); 1022 p->p_pgrp = NULL; 1023 mutex_spin_exit(&tty_lock); 1024 1025 if (LIST_EMPTY(&pgrp->pg_members)) { 1026 /* Releases proc_lock. */ 1027 pg_delete(pgrp->pg_id); 1028 } else { 1029 mutex_exit(proc_lock); 1030 } 1031 } 1032 1033 /* 1034 * pg_remove: remove a process group from the table. 1035 * => must be called with the proc_lock held; 1036 * => returns process group to free; 1037 */ 1038 static struct pgrp * 1039 pg_remove(pid_t pg_id) 1040 { 1041 struct pgrp *pgrp; 1042 struct pid_table *pt; 1043 1044 KASSERT(mutex_owned(proc_lock)); 1045 1046 pt = &pid_table[pg_id & pid_tbl_mask]; 1047 pgrp = pt->pt_pgrp; 1048 1049 KASSERT(pgrp != NULL); 1050 KASSERT(pgrp->pg_id == pg_id); 1051 KASSERT(LIST_EMPTY(&pgrp->pg_members)); 1052 1053 pt->pt_pgrp = NULL; 1054 1055 if (!P_VALID(pt->pt_proc)) { 1056 /* Orphaned pgrp, put slot onto free list. */ 1057 KASSERT((P_NEXT(pt->pt_proc) & pid_tbl_mask) == 0); 1058 pg_id &= pid_tbl_mask; 1059 pt = &pid_table[last_free_pt]; 1060 pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pg_id); 1061 KASSERT(pt->pt_pid == 0); 1062 last_free_pt = pg_id; 1063 pid_alloc_cnt--; 1064 } 1065 return pgrp; 1066 } 1067 1068 /* 1069 * pg_delete: delete and free a process group. 1070 * => must be called with the proc_lock held, which will be released. 1071 */ 1072 static void 1073 pg_delete(pid_t pg_id) 1074 { 1075 struct pgrp *pg; 1076 struct tty *ttyp; 1077 struct session *ss; 1078 1079 KASSERT(mutex_owned(proc_lock)); 1080 1081 pg = pid_table[pg_id & pid_tbl_mask].pt_pgrp; 1082 if (pg == NULL || pg->pg_id != pg_id || !LIST_EMPTY(&pg->pg_members)) { 1083 mutex_exit(proc_lock); 1084 return; 1085 } 1086 1087 ss = pg->pg_session; 1088 1089 /* Remove reference (if any) from tty to this process group */ 1090 mutex_spin_enter(&tty_lock); 1091 ttyp = ss->s_ttyp; 1092 if (ttyp != NULL && ttyp->t_pgrp == pg) { 1093 ttyp->t_pgrp = NULL; 1094 KASSERT(ttyp->t_session == ss); 1095 } 1096 mutex_spin_exit(&tty_lock); 1097 1098 /* 1099 * The leading process group in a session is freed by proc_sessrele(), 1100 * if last reference. Note: proc_sessrele() releases proc_lock. 1101 */ 1102 pg = (ss->s_sid != pg->pg_id) ? pg_remove(pg_id) : NULL; 1103 proc_sessrele(ss); 1104 1105 if (pg != NULL) { 1106 /* Free it, if was not done by proc_sessrele(). */ 1107 kmem_free(pg, sizeof(struct pgrp)); 1108 } 1109 } 1110 1111 /* 1112 * Adjust pgrp jobc counters when specified process changes process group. 1113 * We count the number of processes in each process group that "qualify" 1114 * the group for terminal job control (those with a parent in a different 1115 * process group of the same session). If that count reaches zero, the 1116 * process group becomes orphaned. Check both the specified process' 1117 * process group and that of its children. 1118 * entering == 0 => p is leaving specified group. 1119 * entering == 1 => p is entering specified group. 1120 * 1121 * Call with proc_lock held. 1122 */ 1123 void 1124 fixjobc(struct proc *p, struct pgrp *pgrp, int entering) 1125 { 1126 struct pgrp *hispgrp; 1127 struct session *mysession = pgrp->pg_session; 1128 struct proc *child; 1129 1130 KASSERT(mutex_owned(proc_lock)); 1131 1132 /* 1133 * Check p's parent to see whether p qualifies its own process 1134 * group; if so, adjust count for p's process group. 1135 */ 1136 hispgrp = p->p_pptr->p_pgrp; 1137 if (hispgrp != pgrp && hispgrp->pg_session == mysession) { 1138 if (entering) { 1139 pgrp->pg_jobc++; 1140 p->p_lflag &= ~PL_ORPHANPG; 1141 } else if (--pgrp->pg_jobc == 0) 1142 orphanpg(pgrp); 1143 } 1144 1145 /* 1146 * Check this process' children to see whether they qualify 1147 * their process groups; if so, adjust counts for children's 1148 * process groups. 1149 */ 1150 LIST_FOREACH(child, &p->p_children, p_sibling) { 1151 hispgrp = child->p_pgrp; 1152 if (hispgrp != pgrp && hispgrp->pg_session == mysession && 1153 !P_ZOMBIE(child)) { 1154 if (entering) { 1155 child->p_lflag &= ~PL_ORPHANPG; 1156 hispgrp->pg_jobc++; 1157 } else if (--hispgrp->pg_jobc == 0) 1158 orphanpg(hispgrp); 1159 } 1160 } 1161 } 1162 1163 /* 1164 * A process group has become orphaned; 1165 * if there are any stopped processes in the group, 1166 * hang-up all process in that group. 1167 * 1168 * Call with proc_lock held. 1169 */ 1170 static void 1171 orphanpg(struct pgrp *pg) 1172 { 1173 struct proc *p; 1174 1175 KASSERT(mutex_owned(proc_lock)); 1176 1177 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 1178 if (p->p_stat == SSTOP) { 1179 p->p_lflag |= PL_ORPHANPG; 1180 psignal(p, SIGHUP); 1181 psignal(p, SIGCONT); 1182 } 1183 } 1184 } 1185 1186 #ifdef DDB 1187 #include <ddb/db_output.h> 1188 void pidtbl_dump(void); 1189 void 1190 pidtbl_dump(void) 1191 { 1192 struct pid_table *pt; 1193 struct proc *p; 1194 struct pgrp *pgrp; 1195 int id; 1196 1197 db_printf("pid table %p size %x, next %x, last %x\n", 1198 pid_table, pid_tbl_mask+1, 1199 next_free_pt, last_free_pt); 1200 for (pt = pid_table, id = 0; id <= pid_tbl_mask; id++, pt++) { 1201 p = pt->pt_proc; 1202 if (!P_VALID(p) && !pt->pt_pgrp) 1203 continue; 1204 db_printf(" id %x: ", id); 1205 if (P_VALID(p)) 1206 db_printf("slotpid %d proc %p id %d (0x%x) %s\n", 1207 pt->pt_pid, p, p->p_pid, p->p_pid, p->p_comm); 1208 else 1209 db_printf("next %x use %x\n", 1210 P_NEXT(p) & pid_tbl_mask, 1211 P_NEXT(p) & ~pid_tbl_mask); 1212 if ((pgrp = pt->pt_pgrp)) { 1213 db_printf("\tsession %p, sid %d, count %d, login %s\n", 1214 pgrp->pg_session, pgrp->pg_session->s_sid, 1215 pgrp->pg_session->s_count, 1216 pgrp->pg_session->s_login); 1217 db_printf("\tpgrp %p, pg_id %d, pg_jobc %d, members %p\n", 1218 pgrp, pgrp->pg_id, pgrp->pg_jobc, 1219 LIST_FIRST(&pgrp->pg_members)); 1220 LIST_FOREACH(p, &pgrp->pg_members, p_pglist) { 1221 db_printf("\t\tpid %d addr %p pgrp %p %s\n", 1222 p->p_pid, p, p->p_pgrp, p->p_comm); 1223 } 1224 } 1225 } 1226 } 1227 #endif /* DDB */ 1228 1229 #ifdef KSTACK_CHECK_MAGIC 1230 1231 #define KSTACK_MAGIC 0xdeadbeaf 1232 1233 /* XXX should be per process basis? */ 1234 static int kstackleftmin = KSTACK_SIZE; 1235 static int kstackleftthres = KSTACK_SIZE / 8; 1236 1237 void 1238 kstack_setup_magic(const struct lwp *l) 1239 { 1240 uint32_t *ip; 1241 uint32_t const *end; 1242 1243 KASSERT(l != NULL); 1244 KASSERT(l != &lwp0); 1245 1246 /* 1247 * fill all the stack with magic number 1248 * so that later modification on it can be detected. 1249 */ 1250 ip = (uint32_t *)KSTACK_LOWEST_ADDR(l); 1251 end = (uint32_t *)((char *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE); 1252 for (; ip < end; ip++) { 1253 *ip = KSTACK_MAGIC; 1254 } 1255 } 1256 1257 void 1258 kstack_check_magic(const struct lwp *l) 1259 { 1260 uint32_t const *ip, *end; 1261 int stackleft; 1262 1263 KASSERT(l != NULL); 1264 1265 /* don't check proc0 */ /*XXX*/ 1266 if (l == &lwp0) 1267 return; 1268 1269 #ifdef __MACHINE_STACK_GROWS_UP 1270 /* stack grows upwards (eg. hppa) */ 1271 ip = (uint32_t *)((void *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE); 1272 end = (uint32_t *)KSTACK_LOWEST_ADDR(l); 1273 for (ip--; ip >= end; ip--) 1274 if (*ip != KSTACK_MAGIC) 1275 break; 1276 1277 stackleft = (void *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE - (void *)ip; 1278 #else /* __MACHINE_STACK_GROWS_UP */ 1279 /* stack grows downwards (eg. i386) */ 1280 ip = (uint32_t *)KSTACK_LOWEST_ADDR(l); 1281 end = (uint32_t *)((char *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE); 1282 for (; ip < end; ip++) 1283 if (*ip != KSTACK_MAGIC) 1284 break; 1285 1286 stackleft = ((const char *)ip) - (const char *)KSTACK_LOWEST_ADDR(l); 1287 #endif /* __MACHINE_STACK_GROWS_UP */ 1288 1289 if (kstackleftmin > stackleft) { 1290 kstackleftmin = stackleft; 1291 if (stackleft < kstackleftthres) 1292 printf("warning: kernel stack left %d bytes" 1293 "(pid %u:lid %u)\n", stackleft, 1294 (u_int)l->l_proc->p_pid, (u_int)l->l_lid); 1295 } 1296 1297 if (stackleft <= 0) { 1298 panic("magic on the top of kernel stack changed for " 1299 "pid %u, lid %u: maybe kernel stack overflow", 1300 (u_int)l->l_proc->p_pid, (u_int)l->l_lid); 1301 } 1302 } 1303 #endif /* KSTACK_CHECK_MAGIC */ 1304 1305 int 1306 proclist_foreach_call(struct proclist *list, 1307 int (*callback)(struct proc *, void *arg), void *arg) 1308 { 1309 struct proc marker; 1310 struct proc *p; 1311 int ret = 0; 1312 1313 marker.p_flag = PK_MARKER; 1314 mutex_enter(proc_lock); 1315 for (p = LIST_FIRST(list); ret == 0 && p != NULL;) { 1316 if (p->p_flag & PK_MARKER) { 1317 p = LIST_NEXT(p, p_list); 1318 continue; 1319 } 1320 LIST_INSERT_AFTER(p, &marker, p_list); 1321 ret = (*callback)(p, arg); 1322 KASSERT(mutex_owned(proc_lock)); 1323 p = LIST_NEXT(&marker, p_list); 1324 LIST_REMOVE(&marker, p_list); 1325 } 1326 mutex_exit(proc_lock); 1327 1328 return ret; 1329 } 1330 1331 int 1332 proc_vmspace_getref(struct proc *p, struct vmspace **vm) 1333 { 1334 1335 /* XXXCDC: how should locking work here? */ 1336 1337 /* curproc exception is for coredump. */ 1338 1339 if ((p != curproc && (p->p_sflag & PS_WEXIT) != 0) || 1340 (p->p_vmspace->vm_refcnt < 1)) { /* XXX */ 1341 return EFAULT; 1342 } 1343 1344 uvmspace_addref(p->p_vmspace); 1345 *vm = p->p_vmspace; 1346 1347 return 0; 1348 } 1349 1350 /* 1351 * Acquire a write lock on the process credential. 1352 */ 1353 void 1354 proc_crmod_enter(void) 1355 { 1356 struct lwp *l = curlwp; 1357 struct proc *p = l->l_proc; 1358 kauth_cred_t oc; 1359 1360 /* Reset what needs to be reset in plimit. */ 1361 if (p->p_limit->pl_corename != defcorename) { 1362 lim_setcorename(p, defcorename, 0); 1363 } 1364 1365 mutex_enter(p->p_lock); 1366 1367 /* Ensure the LWP cached credentials are up to date. */ 1368 if ((oc = l->l_cred) != p->p_cred) { 1369 kauth_cred_hold(p->p_cred); 1370 l->l_cred = p->p_cred; 1371 kauth_cred_free(oc); 1372 } 1373 } 1374 1375 /* 1376 * Set in a new process credential, and drop the write lock. The credential 1377 * must have a reference already. Optionally, free a no-longer required 1378 * credential. The scheduler also needs to inspect p_cred, so we also 1379 * briefly acquire the sched state mutex. 1380 */ 1381 void 1382 proc_crmod_leave(kauth_cred_t scred, kauth_cred_t fcred, bool sugid) 1383 { 1384 struct lwp *l = curlwp, *l2; 1385 struct proc *p = l->l_proc; 1386 kauth_cred_t oc; 1387 1388 KASSERT(mutex_owned(p->p_lock)); 1389 1390 /* Is there a new credential to set in? */ 1391 if (scred != NULL) { 1392 p->p_cred = scred; 1393 LIST_FOREACH(l2, &p->p_lwps, l_sibling) { 1394 if (l2 != l) 1395 l2->l_prflag |= LPR_CRMOD; 1396 } 1397 1398 /* Ensure the LWP cached credentials are up to date. */ 1399 if ((oc = l->l_cred) != scred) { 1400 kauth_cred_hold(scred); 1401 l->l_cred = scred; 1402 } 1403 } else 1404 oc = NULL; /* XXXgcc */ 1405 1406 if (sugid) { 1407 /* 1408 * Mark process as having changed credentials, stops 1409 * tracing etc. 1410 */ 1411 p->p_flag |= PK_SUGID; 1412 } 1413 1414 mutex_exit(p->p_lock); 1415 1416 /* If there is a credential to be released, free it now. */ 1417 if (fcred != NULL) { 1418 KASSERT(scred != NULL); 1419 kauth_cred_free(fcred); 1420 if (oc != scred) 1421 kauth_cred_free(oc); 1422 } 1423 } 1424 1425 /* 1426 * proc_specific_key_create -- 1427 * Create a key for subsystem proc-specific data. 1428 */ 1429 int 1430 proc_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor) 1431 { 1432 1433 return (specificdata_key_create(proc_specificdata_domain, keyp, dtor)); 1434 } 1435 1436 /* 1437 * proc_specific_key_delete -- 1438 * Delete a key for subsystem proc-specific data. 1439 */ 1440 void 1441 proc_specific_key_delete(specificdata_key_t key) 1442 { 1443 1444 specificdata_key_delete(proc_specificdata_domain, key); 1445 } 1446 1447 /* 1448 * proc_initspecific -- 1449 * Initialize a proc's specificdata container. 1450 */ 1451 void 1452 proc_initspecific(struct proc *p) 1453 { 1454 int error __diagused; 1455 1456 error = specificdata_init(proc_specificdata_domain, &p->p_specdataref); 1457 KASSERT(error == 0); 1458 } 1459 1460 /* 1461 * proc_finispecific -- 1462 * Finalize a proc's specificdata container. 1463 */ 1464 void 1465 proc_finispecific(struct proc *p) 1466 { 1467 1468 specificdata_fini(proc_specificdata_domain, &p->p_specdataref); 1469 } 1470 1471 /* 1472 * proc_getspecific -- 1473 * Return proc-specific data corresponding to the specified key. 1474 */ 1475 void * 1476 proc_getspecific(struct proc *p, specificdata_key_t key) 1477 { 1478 1479 return (specificdata_getspecific(proc_specificdata_domain, 1480 &p->p_specdataref, key)); 1481 } 1482 1483 /* 1484 * proc_setspecific -- 1485 * Set proc-specific data corresponding to the specified key. 1486 */ 1487 void 1488 proc_setspecific(struct proc *p, specificdata_key_t key, void *data) 1489 { 1490 1491 specificdata_setspecific(proc_specificdata_domain, 1492 &p->p_specdataref, key, data); 1493 } 1494 1495 int 1496 proc_uidmatch(kauth_cred_t cred, kauth_cred_t target) 1497 { 1498 int r = 0; 1499 1500 if (kauth_cred_getuid(cred) != kauth_cred_getuid(target) || 1501 kauth_cred_getuid(cred) != kauth_cred_getsvuid(target)) { 1502 /* 1503 * suid proc of ours or proc not ours 1504 */ 1505 r = EPERM; 1506 } else if (kauth_cred_getgid(target) != kauth_cred_getsvgid(target)) { 1507 /* 1508 * sgid proc has sgid back to us temporarily 1509 */ 1510 r = EPERM; 1511 } else { 1512 /* 1513 * our rgid must be in target's group list (ie, 1514 * sub-processes started by a sgid process) 1515 */ 1516 int ismember = 0; 1517 1518 if (kauth_cred_ismember_gid(cred, 1519 kauth_cred_getgid(target), &ismember) != 0 || 1520 !ismember) 1521 r = EPERM; 1522 } 1523 1524 return (r); 1525 } 1526 1527 /* 1528 * sysctl stuff 1529 */ 1530 1531 #define KERN_PROCSLOP (5 * sizeof(struct kinfo_proc)) 1532 1533 static const u_int sysctl_flagmap[] = { 1534 PK_ADVLOCK, P_ADVLOCK, 1535 PK_EXEC, P_EXEC, 1536 PK_NOCLDWAIT, P_NOCLDWAIT, 1537 PK_32, P_32, 1538 PK_CLDSIGIGN, P_CLDSIGIGN, 1539 PK_SUGID, P_SUGID, 1540 0 1541 }; 1542 1543 static const u_int sysctl_sflagmap[] = { 1544 PS_NOCLDSTOP, P_NOCLDSTOP, 1545 PS_WEXIT, P_WEXIT, 1546 PS_STOPFORK, P_STOPFORK, 1547 PS_STOPEXEC, P_STOPEXEC, 1548 PS_STOPEXIT, P_STOPEXIT, 1549 0 1550 }; 1551 1552 static const u_int sysctl_slflagmap[] = { 1553 PSL_TRACED, P_TRACED, 1554 PSL_FSTRACE, P_FSTRACE, 1555 PSL_CHTRACED, P_CHTRACED, 1556 PSL_SYSCALL, P_SYSCALL, 1557 0 1558 }; 1559 1560 static const u_int sysctl_lflagmap[] = { 1561 PL_CONTROLT, P_CONTROLT, 1562 PL_PPWAIT, P_PPWAIT, 1563 0 1564 }; 1565 1566 static const u_int sysctl_stflagmap[] = { 1567 PST_PROFIL, P_PROFIL, 1568 0 1569 1570 }; 1571 1572 /* used by kern_lwp also */ 1573 const u_int sysctl_lwpflagmap[] = { 1574 LW_SINTR, L_SINTR, 1575 LW_SYSTEM, L_SYSTEM, 1576 0 1577 }; 1578 1579 /* 1580 * Find the most ``active'' lwp of a process and return it for ps display 1581 * purposes 1582 */ 1583 static struct lwp * 1584 proc_active_lwp(struct proc *p) 1585 { 1586 static const int ostat[] = { 1587 0, 1588 2, /* LSIDL */ 1589 6, /* LSRUN */ 1590 5, /* LSSLEEP */ 1591 4, /* LSSTOP */ 1592 0, /* LSZOMB */ 1593 1, /* LSDEAD */ 1594 7, /* LSONPROC */ 1595 3 /* LSSUSPENDED */ 1596 }; 1597 1598 struct lwp *l, *lp = NULL; 1599 LIST_FOREACH(l, &p->p_lwps, l_sibling) { 1600 KASSERT(l->l_stat >= 0 && l->l_stat < __arraycount(ostat)); 1601 if (lp == NULL || 1602 ostat[l->l_stat] > ostat[lp->l_stat] || 1603 (ostat[l->l_stat] == ostat[lp->l_stat] && 1604 l->l_cpticks > lp->l_cpticks)) { 1605 lp = l; 1606 continue; 1607 } 1608 } 1609 return lp; 1610 } 1611 1612 static int 1613 sysctl_doeproc(SYSCTLFN_ARGS) 1614 { 1615 union { 1616 struct kinfo_proc kproc; 1617 struct kinfo_proc2 kproc2; 1618 } *kbuf; 1619 struct proc *p, *next, *marker; 1620 char *where, *dp; 1621 int type, op, arg, error; 1622 u_int elem_size, kelem_size, elem_count; 1623 size_t buflen, needed; 1624 bool match, zombie, mmmbrains; 1625 1626 if (namelen == 1 && name[0] == CTL_QUERY) 1627 return (sysctl_query(SYSCTLFN_CALL(rnode))); 1628 1629 dp = where = oldp; 1630 buflen = where != NULL ? *oldlenp : 0; 1631 error = 0; 1632 needed = 0; 1633 type = rnode->sysctl_num; 1634 1635 if (type == KERN_PROC) { 1636 if (namelen != 2 && !(namelen == 1 && name[0] == KERN_PROC_ALL)) 1637 return (EINVAL); 1638 op = name[0]; 1639 if (op != KERN_PROC_ALL) 1640 arg = name[1]; 1641 else 1642 arg = 0; /* Quell compiler warning */ 1643 elem_count = 0; /* Ditto */ 1644 kelem_size = elem_size = sizeof(kbuf->kproc); 1645 } else { 1646 if (namelen != 4) 1647 return (EINVAL); 1648 op = name[0]; 1649 arg = name[1]; 1650 elem_size = name[2]; 1651 elem_count = name[3]; 1652 kelem_size = sizeof(kbuf->kproc2); 1653 } 1654 1655 sysctl_unlock(); 1656 1657 kbuf = kmem_alloc(sizeof(*kbuf), KM_SLEEP); 1658 marker = kmem_alloc(sizeof(*marker), KM_SLEEP); 1659 marker->p_flag = PK_MARKER; 1660 1661 mutex_enter(proc_lock); 1662 mmmbrains = false; 1663 for (p = LIST_FIRST(&allproc);; p = next) { 1664 if (p == NULL) { 1665 if (!mmmbrains) { 1666 p = LIST_FIRST(&zombproc); 1667 mmmbrains = true; 1668 } 1669 if (p == NULL) 1670 break; 1671 } 1672 next = LIST_NEXT(p, p_list); 1673 if ((p->p_flag & PK_MARKER) != 0) 1674 continue; 1675 1676 /* 1677 * Skip embryonic processes. 1678 */ 1679 if (p->p_stat == SIDL) 1680 continue; 1681 1682 mutex_enter(p->p_lock); 1683 error = kauth_authorize_process(l->l_cred, 1684 KAUTH_PROCESS_CANSEE, p, 1685 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL); 1686 if (error != 0) { 1687 mutex_exit(p->p_lock); 1688 continue; 1689 } 1690 1691 /* 1692 * TODO - make more efficient (see notes below). 1693 * do by session. 1694 */ 1695 switch (op) { 1696 case KERN_PROC_PID: 1697 /* could do this with just a lookup */ 1698 match = (p->p_pid == (pid_t)arg); 1699 break; 1700 1701 case KERN_PROC_PGRP: 1702 /* could do this by traversing pgrp */ 1703 match = (p->p_pgrp->pg_id == (pid_t)arg); 1704 break; 1705 1706 case KERN_PROC_SESSION: 1707 match = (p->p_session->s_sid == (pid_t)arg); 1708 break; 1709 1710 case KERN_PROC_TTY: 1711 match = true; 1712 if (arg == (int) KERN_PROC_TTY_REVOKE) { 1713 if ((p->p_lflag & PL_CONTROLT) == 0 || 1714 p->p_session->s_ttyp == NULL || 1715 p->p_session->s_ttyvp != NULL) { 1716 match = false; 1717 } 1718 } else if ((p->p_lflag & PL_CONTROLT) == 0 || 1719 p->p_session->s_ttyp == NULL) { 1720 if ((dev_t)arg != KERN_PROC_TTY_NODEV) { 1721 match = false; 1722 } 1723 } else if (p->p_session->s_ttyp->t_dev != (dev_t)arg) { 1724 match = false; 1725 } 1726 break; 1727 1728 case KERN_PROC_UID: 1729 match = (kauth_cred_geteuid(p->p_cred) == (uid_t)arg); 1730 break; 1731 1732 case KERN_PROC_RUID: 1733 match = (kauth_cred_getuid(p->p_cred) == (uid_t)arg); 1734 break; 1735 1736 case KERN_PROC_GID: 1737 match = (kauth_cred_getegid(p->p_cred) == (uid_t)arg); 1738 break; 1739 1740 case KERN_PROC_RGID: 1741 match = (kauth_cred_getgid(p->p_cred) == (uid_t)arg); 1742 break; 1743 1744 case KERN_PROC_ALL: 1745 match = true; 1746 /* allow everything */ 1747 break; 1748 1749 default: 1750 error = EINVAL; 1751 mutex_exit(p->p_lock); 1752 goto cleanup; 1753 } 1754 if (!match) { 1755 mutex_exit(p->p_lock); 1756 continue; 1757 } 1758 1759 /* 1760 * Grab a hold on the process. 1761 */ 1762 if (mmmbrains) { 1763 zombie = true; 1764 } else { 1765 zombie = !rw_tryenter(&p->p_reflock, RW_READER); 1766 } 1767 if (zombie) { 1768 LIST_INSERT_AFTER(p, marker, p_list); 1769 } 1770 1771 if (buflen >= elem_size && 1772 (type == KERN_PROC || elem_count > 0)) { 1773 if (type == KERN_PROC) { 1774 kbuf->kproc.kp_proc = *p; 1775 fill_eproc(p, &kbuf->kproc.kp_eproc, zombie); 1776 } else { 1777 fill_kproc2(p, &kbuf->kproc2, zombie); 1778 elem_count--; 1779 } 1780 mutex_exit(p->p_lock); 1781 mutex_exit(proc_lock); 1782 /* 1783 * Copy out elem_size, but not larger than kelem_size 1784 */ 1785 error = sysctl_copyout(l, kbuf, dp, 1786 min(kelem_size, elem_size)); 1787 mutex_enter(proc_lock); 1788 if (error) { 1789 goto bah; 1790 } 1791 dp += elem_size; 1792 buflen -= elem_size; 1793 } else { 1794 mutex_exit(p->p_lock); 1795 } 1796 needed += elem_size; 1797 1798 /* 1799 * Release reference to process. 1800 */ 1801 if (zombie) { 1802 next = LIST_NEXT(marker, p_list); 1803 LIST_REMOVE(marker, p_list); 1804 } else { 1805 rw_exit(&p->p_reflock); 1806 next = LIST_NEXT(p, p_list); 1807 } 1808 } 1809 mutex_exit(proc_lock); 1810 1811 if (where != NULL) { 1812 *oldlenp = dp - where; 1813 if (needed > *oldlenp) { 1814 error = ENOMEM; 1815 goto out; 1816 } 1817 } else { 1818 needed += KERN_PROCSLOP; 1819 *oldlenp = needed; 1820 } 1821 if (kbuf) 1822 kmem_free(kbuf, sizeof(*kbuf)); 1823 if (marker) 1824 kmem_free(marker, sizeof(*marker)); 1825 sysctl_relock(); 1826 return 0; 1827 bah: 1828 if (zombie) 1829 LIST_REMOVE(marker, p_list); 1830 else 1831 rw_exit(&p->p_reflock); 1832 cleanup: 1833 mutex_exit(proc_lock); 1834 out: 1835 if (kbuf) 1836 kmem_free(kbuf, sizeof(*kbuf)); 1837 if (marker) 1838 kmem_free(marker, sizeof(*marker)); 1839 sysctl_relock(); 1840 return error; 1841 } 1842 1843 int 1844 copyin_psstrings(struct proc *p, struct ps_strings *arginfo) 1845 { 1846 1847 #ifdef COMPAT_NETBSD32 1848 if (p->p_flag & PK_32) { 1849 struct ps_strings32 arginfo32; 1850 1851 int error = copyin_proc(p, (void *)p->p_psstrp, &arginfo32, 1852 sizeof(arginfo32)); 1853 if (error) 1854 return error; 1855 arginfo->ps_argvstr = (void *)(uintptr_t)arginfo32.ps_argvstr; 1856 arginfo->ps_nargvstr = arginfo32.ps_nargvstr; 1857 arginfo->ps_envstr = (void *)(uintptr_t)arginfo32.ps_envstr; 1858 arginfo->ps_nenvstr = arginfo32.ps_nenvstr; 1859 return 0; 1860 } 1861 #endif 1862 return copyin_proc(p, (void *)p->p_psstrp, arginfo, sizeof(*arginfo)); 1863 } 1864 1865 static int 1866 copy_procargs_sysctl_cb(void *cookie_, const void *src, size_t off, size_t len) 1867 { 1868 void **cookie = cookie_; 1869 struct lwp *l = cookie[0]; 1870 char *dst = cookie[1]; 1871 1872 return sysctl_copyout(l, src, dst + off, len); 1873 } 1874 1875 /* 1876 * sysctl helper routine for kern.proc_args pseudo-subtree. 1877 */ 1878 static int 1879 sysctl_kern_proc_args(SYSCTLFN_ARGS) 1880 { 1881 struct ps_strings pss; 1882 struct proc *p; 1883 pid_t pid; 1884 int type, error; 1885 void *cookie[2]; 1886 1887 if (namelen == 1 && name[0] == CTL_QUERY) 1888 return (sysctl_query(SYSCTLFN_CALL(rnode))); 1889 1890 if (newp != NULL || namelen != 2) 1891 return (EINVAL); 1892 pid = name[0]; 1893 type = name[1]; 1894 1895 switch (type) { 1896 case KERN_PROC_ARGV: 1897 case KERN_PROC_NARGV: 1898 case KERN_PROC_ENV: 1899 case KERN_PROC_NENV: 1900 /* ok */ 1901 break; 1902 default: 1903 return (EINVAL); 1904 } 1905 1906 sysctl_unlock(); 1907 1908 /* check pid */ 1909 mutex_enter(proc_lock); 1910 if ((p = proc_find(pid)) == NULL) { 1911 error = EINVAL; 1912 goto out_locked; 1913 } 1914 mutex_enter(p->p_lock); 1915 1916 /* Check permission. */ 1917 if (type == KERN_PROC_ARGV || type == KERN_PROC_NARGV) 1918 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE, 1919 p, KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ARGS), NULL, NULL); 1920 else if (type == KERN_PROC_ENV || type == KERN_PROC_NENV) 1921 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE, 1922 p, KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENV), NULL, NULL); 1923 else 1924 error = EINVAL; /* XXXGCC */ 1925 if (error) { 1926 mutex_exit(p->p_lock); 1927 goto out_locked; 1928 } 1929 1930 if (oldp == NULL) { 1931 if (type == KERN_PROC_NARGV || type == KERN_PROC_NENV) 1932 *oldlenp = sizeof (int); 1933 else 1934 *oldlenp = ARG_MAX; /* XXX XXX XXX */ 1935 error = 0; 1936 mutex_exit(p->p_lock); 1937 goto out_locked; 1938 } 1939 1940 /* 1941 * Zombies don't have a stack, so we can't read their psstrings. 1942 * System processes also don't have a user stack. 1943 */ 1944 if (P_ZOMBIE(p) || (p->p_flag & PK_SYSTEM) != 0) { 1945 error = EINVAL; 1946 mutex_exit(p->p_lock); 1947 goto out_locked; 1948 } 1949 1950 error = rw_tryenter(&p->p_reflock, RW_READER) ? 0 : EBUSY; 1951 mutex_exit(p->p_lock); 1952 if (error) { 1953 goto out_locked; 1954 } 1955 mutex_exit(proc_lock); 1956 1957 if (type == KERN_PROC_NARGV || type == KERN_PROC_NENV) { 1958 int value; 1959 if ((error = copyin_psstrings(p, &pss)) == 0) { 1960 if (type == KERN_PROC_NARGV) 1961 value = pss.ps_nargvstr; 1962 else 1963 value = pss.ps_nenvstr; 1964 error = sysctl_copyout(l, &value, oldp, sizeof(value)); 1965 *oldlenp = sizeof(value); 1966 } 1967 } else { 1968 cookie[0] = l; 1969 cookie[1] = oldp; 1970 error = copy_procargs(p, type, oldlenp, 1971 copy_procargs_sysctl_cb, cookie); 1972 } 1973 rw_exit(&p->p_reflock); 1974 sysctl_relock(); 1975 return error; 1976 1977 out_locked: 1978 mutex_exit(proc_lock); 1979 sysctl_relock(); 1980 return error; 1981 } 1982 1983 int 1984 copy_procargs(struct proc *p, int oid, size_t *limit, 1985 int (*cb)(void *, const void *, size_t, size_t), void *cookie) 1986 { 1987 struct ps_strings pss; 1988 size_t len, i, loaded, entry_len; 1989 struct uio auio; 1990 struct iovec aiov; 1991 int error, argvlen; 1992 char *arg; 1993 char **argv; 1994 vaddr_t user_argv; 1995 struct vmspace *vmspace; 1996 1997 /* 1998 * Allocate a temporary buffer to hold the argument vector and 1999 * the arguments themselve. 2000 */ 2001 arg = kmem_alloc(PAGE_SIZE, KM_SLEEP); 2002 argv = kmem_alloc(PAGE_SIZE, KM_SLEEP); 2003 2004 /* 2005 * Lock the process down in memory. 2006 */ 2007 vmspace = p->p_vmspace; 2008 uvmspace_addref(vmspace); 2009 2010 /* 2011 * Read in the ps_strings structure. 2012 */ 2013 if ((error = copyin_psstrings(p, &pss)) != 0) 2014 goto done; 2015 2016 /* 2017 * Now read the address of the argument vector. 2018 */ 2019 switch (oid) { 2020 case KERN_PROC_ARGV: 2021 user_argv = (uintptr_t)pss.ps_argvstr; 2022 argvlen = pss.ps_nargvstr; 2023 break; 2024 case KERN_PROC_ENV: 2025 user_argv = (uintptr_t)pss.ps_envstr; 2026 argvlen = pss.ps_nenvstr; 2027 break; 2028 default: 2029 error = EINVAL; 2030 goto done; 2031 } 2032 2033 if (argvlen < 0) { 2034 error = EIO; 2035 goto done; 2036 } 2037 2038 #ifdef COMPAT_NETBSD32 2039 if (p->p_flag & PK_32) 2040 entry_len = sizeof(netbsd32_charp); 2041 else 2042 #endif 2043 entry_len = sizeof(char *); 2044 2045 /* 2046 * Now copy each string. 2047 */ 2048 len = 0; /* bytes written to user buffer */ 2049 loaded = 0; /* bytes from argv already processed */ 2050 i = 0; /* To make compiler happy */ 2051 2052 for (; argvlen; --argvlen) { 2053 int finished = 0; 2054 vaddr_t base; 2055 size_t xlen; 2056 int j; 2057 2058 if (loaded == 0) { 2059 size_t rem = entry_len * argvlen; 2060 loaded = MIN(rem, PAGE_SIZE); 2061 error = copyin_vmspace(vmspace, 2062 (const void *)user_argv, argv, loaded); 2063 if (error) 2064 break; 2065 user_argv += loaded; 2066 i = 0; 2067 } 2068 2069 #ifdef COMPAT_NETBSD32 2070 if (p->p_flag & PK_32) { 2071 netbsd32_charp *argv32; 2072 2073 argv32 = (netbsd32_charp *)argv; 2074 base = (vaddr_t)NETBSD32PTR64(argv32[i++]); 2075 } else 2076 #endif 2077 base = (vaddr_t)argv[i++]; 2078 loaded -= entry_len; 2079 2080 /* 2081 * The program has messed around with its arguments, 2082 * possibly deleting some, and replacing them with 2083 * NULL's. Treat this as the last argument and not 2084 * a failure. 2085 */ 2086 if (base == 0) 2087 break; 2088 2089 while (!finished) { 2090 xlen = PAGE_SIZE - (base & PAGE_MASK); 2091 2092 aiov.iov_base = arg; 2093 aiov.iov_len = PAGE_SIZE; 2094 auio.uio_iov = &aiov; 2095 auio.uio_iovcnt = 1; 2096 auio.uio_offset = base; 2097 auio.uio_resid = xlen; 2098 auio.uio_rw = UIO_READ; 2099 UIO_SETUP_SYSSPACE(&auio); 2100 error = uvm_io(&vmspace->vm_map, &auio); 2101 if (error) 2102 goto done; 2103 2104 /* Look for the end of the string */ 2105 for (j = 0; j < xlen; j++) { 2106 if (arg[j] == '\0') { 2107 xlen = j + 1; 2108 finished = 1; 2109 break; 2110 } 2111 } 2112 2113 /* Check for user buffer overflow */ 2114 if (len + xlen > *limit) { 2115 finished = 1; 2116 if (len > *limit) 2117 xlen = 0; 2118 else 2119 xlen = *limit - len; 2120 } 2121 2122 /* Copyout the page */ 2123 error = (*cb)(cookie, arg, len, xlen); 2124 if (error) 2125 goto done; 2126 2127 len += xlen; 2128 base += xlen; 2129 } 2130 } 2131 *limit = len; 2132 2133 done: 2134 kmem_free(argv, PAGE_SIZE); 2135 kmem_free(arg, PAGE_SIZE); 2136 uvmspace_free(vmspace); 2137 return error; 2138 } 2139 2140 /* 2141 * Fill in an eproc structure for the specified process. 2142 */ 2143 void 2144 fill_eproc(struct proc *p, struct eproc *ep, bool zombie) 2145 { 2146 struct tty *tp; 2147 struct lwp *l; 2148 2149 KASSERT(mutex_owned(proc_lock)); 2150 KASSERT(mutex_owned(p->p_lock)); 2151 2152 memset(ep, 0, sizeof(*ep)); 2153 2154 ep->e_paddr = p; 2155 ep->e_sess = p->p_session; 2156 if (p->p_cred) { 2157 kauth_cred_topcred(p->p_cred, &ep->e_pcred); 2158 kauth_cred_toucred(p->p_cred, &ep->e_ucred); 2159 } 2160 if (p->p_stat != SIDL && !P_ZOMBIE(p) && !zombie) { 2161 struct vmspace *vm = p->p_vmspace; 2162 2163 ep->e_vm.vm_rssize = vm_resident_count(vm); 2164 ep->e_vm.vm_tsize = vm->vm_tsize; 2165 ep->e_vm.vm_dsize = vm->vm_dsize; 2166 ep->e_vm.vm_ssize = vm->vm_ssize; 2167 ep->e_vm.vm_map.size = vm->vm_map.size; 2168 2169 /* Pick the primary (first) LWP */ 2170 l = proc_active_lwp(p); 2171 KASSERT(l != NULL); 2172 lwp_lock(l); 2173 if (l->l_wchan) 2174 strncpy(ep->e_wmesg, l->l_wmesg, WMESGLEN); 2175 lwp_unlock(l); 2176 } 2177 if (p->p_pptr) 2178 ep->e_ppid = p->p_pptr->p_pid; 2179 if (p->p_pgrp && p->p_session) { 2180 ep->e_pgid = p->p_pgrp->pg_id; 2181 ep->e_jobc = p->p_pgrp->pg_jobc; 2182 ep->e_sid = p->p_session->s_sid; 2183 if ((p->p_lflag & PL_CONTROLT) && 2184 (tp = ep->e_sess->s_ttyp)) { 2185 ep->e_tdev = tp->t_dev; 2186 ep->e_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PGID; 2187 ep->e_tsess = tp->t_session; 2188 } else 2189 ep->e_tdev = (uint32_t)NODEV; 2190 ep->e_flag = ep->e_sess->s_ttyvp ? EPROC_CTTY : 0; 2191 if (SESS_LEADER(p)) 2192 ep->e_flag |= EPROC_SLEADER; 2193 strncpy(ep->e_login, ep->e_sess->s_login, MAXLOGNAME); 2194 } 2195 ep->e_xsize = ep->e_xrssize = 0; 2196 ep->e_xccount = ep->e_xswrss = 0; 2197 } 2198 2199 /* 2200 * Fill in a kinfo_proc2 structure for the specified process. 2201 */ 2202 void 2203 fill_kproc2(struct proc *p, struct kinfo_proc2 *ki, bool zombie) 2204 { 2205 struct tty *tp; 2206 struct lwp *l, *l2; 2207 struct timeval ut, st, rt; 2208 sigset_t ss1, ss2; 2209 struct rusage ru; 2210 struct vmspace *vm; 2211 2212 KASSERT(mutex_owned(proc_lock)); 2213 KASSERT(mutex_owned(p->p_lock)); 2214 2215 sigemptyset(&ss1); 2216 sigemptyset(&ss2); 2217 memset(ki, 0, sizeof(*ki)); 2218 2219 ki->p_paddr = PTRTOUINT64(p); 2220 ki->p_fd = PTRTOUINT64(p->p_fd); 2221 ki->p_cwdi = PTRTOUINT64(p->p_cwdi); 2222 ki->p_stats = PTRTOUINT64(p->p_stats); 2223 ki->p_limit = PTRTOUINT64(p->p_limit); 2224 ki->p_vmspace = PTRTOUINT64(p->p_vmspace); 2225 ki->p_sigacts = PTRTOUINT64(p->p_sigacts); 2226 ki->p_sess = PTRTOUINT64(p->p_session); 2227 ki->p_tsess = 0; /* may be changed if controlling tty below */ 2228 ki->p_ru = PTRTOUINT64(&p->p_stats->p_ru); 2229 ki->p_eflag = 0; 2230 ki->p_exitsig = p->p_exitsig; 2231 ki->p_flag = L_INMEM; /* Process never swapped out */ 2232 ki->p_flag |= sysctl_map_flags(sysctl_flagmap, p->p_flag); 2233 ki->p_flag |= sysctl_map_flags(sysctl_sflagmap, p->p_sflag); 2234 ki->p_flag |= sysctl_map_flags(sysctl_slflagmap, p->p_slflag); 2235 ki->p_flag |= sysctl_map_flags(sysctl_lflagmap, p->p_lflag); 2236 ki->p_flag |= sysctl_map_flags(sysctl_stflagmap, p->p_stflag); 2237 ki->p_pid = p->p_pid; 2238 if (p->p_pptr) 2239 ki->p_ppid = p->p_pptr->p_pid; 2240 else 2241 ki->p_ppid = 0; 2242 ki->p_uid = kauth_cred_geteuid(p->p_cred); 2243 ki->p_ruid = kauth_cred_getuid(p->p_cred); 2244 ki->p_gid = kauth_cred_getegid(p->p_cred); 2245 ki->p_rgid = kauth_cred_getgid(p->p_cred); 2246 ki->p_svuid = kauth_cred_getsvuid(p->p_cred); 2247 ki->p_svgid = kauth_cred_getsvgid(p->p_cred); 2248 ki->p_ngroups = kauth_cred_ngroups(p->p_cred); 2249 kauth_cred_getgroups(p->p_cred, ki->p_groups, 2250 min(ki->p_ngroups, sizeof(ki->p_groups) / sizeof(ki->p_groups[0])), 2251 UIO_SYSSPACE); 2252 2253 ki->p_uticks = p->p_uticks; 2254 ki->p_sticks = p->p_sticks; 2255 ki->p_iticks = p->p_iticks; 2256 ki->p_tpgid = NO_PGID; /* may be changed if controlling tty below */ 2257 ki->p_tracep = PTRTOUINT64(p->p_tracep); 2258 ki->p_traceflag = p->p_traceflag; 2259 2260 memcpy(&ki->p_sigignore, &p->p_sigctx.ps_sigignore,sizeof(ki_sigset_t)); 2261 memcpy(&ki->p_sigcatch, &p->p_sigctx.ps_sigcatch, sizeof(ki_sigset_t)); 2262 2263 ki->p_cpticks = 0; 2264 ki->p_pctcpu = p->p_pctcpu; 2265 ki->p_estcpu = 0; 2266 ki->p_stat = p->p_stat; /* Will likely be overridden by LWP status */ 2267 ki->p_realstat = p->p_stat; 2268 ki->p_nice = p->p_nice; 2269 ki->p_xstat = p->p_xstat; 2270 ki->p_acflag = p->p_acflag; 2271 2272 strncpy(ki->p_comm, p->p_comm, 2273 min(sizeof(ki->p_comm), sizeof(p->p_comm))); 2274 strncpy(ki->p_ename, p->p_emul->e_name, sizeof(ki->p_ename)); 2275 2276 ki->p_nlwps = p->p_nlwps; 2277 ki->p_realflag = ki->p_flag; 2278 2279 if (p->p_stat != SIDL && !P_ZOMBIE(p) && !zombie) { 2280 vm = p->p_vmspace; 2281 ki->p_vm_rssize = vm_resident_count(vm); 2282 ki->p_vm_tsize = vm->vm_tsize; 2283 ki->p_vm_dsize = vm->vm_dsize; 2284 ki->p_vm_ssize = vm->vm_ssize; 2285 ki->p_vm_vsize = atop(vm->vm_map.size); 2286 /* 2287 * Since the stack is initially mapped mostly with 2288 * PROT_NONE and grown as needed, adjust the "mapped size" 2289 * to skip the unused stack portion. 2290 */ 2291 ki->p_vm_msize = 2292 atop(vm->vm_map.size) - vm->vm_issize + vm->vm_ssize; 2293 2294 /* Pick the primary (first) LWP */ 2295 l = proc_active_lwp(p); 2296 KASSERT(l != NULL); 2297 lwp_lock(l); 2298 ki->p_nrlwps = p->p_nrlwps; 2299 ki->p_forw = 0; 2300 ki->p_back = 0; 2301 ki->p_addr = PTRTOUINT64(l->l_addr); 2302 ki->p_stat = l->l_stat; 2303 ki->p_flag |= sysctl_map_flags(sysctl_lwpflagmap, l->l_flag); 2304 ki->p_swtime = l->l_swtime; 2305 ki->p_slptime = l->l_slptime; 2306 if (l->l_stat == LSONPROC) 2307 ki->p_schedflags = l->l_cpu->ci_schedstate.spc_flags; 2308 else 2309 ki->p_schedflags = 0; 2310 ki->p_priority = lwp_eprio(l); 2311 ki->p_usrpri = l->l_priority; 2312 if (l->l_wchan) 2313 strncpy(ki->p_wmesg, l->l_wmesg, sizeof(ki->p_wmesg)); 2314 ki->p_wchan = PTRTOUINT64(l->l_wchan); 2315 ki->p_cpuid = cpu_index(l->l_cpu); 2316 lwp_unlock(l); 2317 LIST_FOREACH(l, &p->p_lwps, l_sibling) { 2318 /* This is hardly correct, but... */ 2319 sigplusset(&l->l_sigpend.sp_set, &ss1); 2320 sigplusset(&l->l_sigmask, &ss2); 2321 ki->p_cpticks += l->l_cpticks; 2322 ki->p_pctcpu += l->l_pctcpu; 2323 ki->p_estcpu += l->l_estcpu; 2324 } 2325 } 2326 sigplusset(&p->p_sigpend.sp_set, &ss2); 2327 memcpy(&ki->p_siglist, &ss1, sizeof(ki_sigset_t)); 2328 memcpy(&ki->p_sigmask, &ss2, sizeof(ki_sigset_t)); 2329 2330 if (p->p_session != NULL) { 2331 ki->p_sid = p->p_session->s_sid; 2332 ki->p__pgid = p->p_pgrp->pg_id; 2333 if (p->p_session->s_ttyvp) 2334 ki->p_eflag |= EPROC_CTTY; 2335 if (SESS_LEADER(p)) 2336 ki->p_eflag |= EPROC_SLEADER; 2337 strncpy(ki->p_login, p->p_session->s_login, 2338 min(sizeof ki->p_login - 1, sizeof p->p_session->s_login)); 2339 ki->p_jobc = p->p_pgrp->pg_jobc; 2340 if ((p->p_lflag & PL_CONTROLT) && (tp = p->p_session->s_ttyp)) { 2341 ki->p_tdev = tp->t_dev; 2342 ki->p_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PGID; 2343 ki->p_tsess = PTRTOUINT64(tp->t_session); 2344 } else { 2345 ki->p_tdev = (int32_t)NODEV; 2346 } 2347 } 2348 2349 if (!P_ZOMBIE(p) && !zombie) { 2350 ki->p_uvalid = 1; 2351 ki->p_ustart_sec = p->p_stats->p_start.tv_sec; 2352 ki->p_ustart_usec = p->p_stats->p_start.tv_usec; 2353 2354 calcru(p, &ut, &st, NULL, &rt); 2355 ki->p_rtime_sec = rt.tv_sec; 2356 ki->p_rtime_usec = rt.tv_usec; 2357 ki->p_uutime_sec = ut.tv_sec; 2358 ki->p_uutime_usec = ut.tv_usec; 2359 ki->p_ustime_sec = st.tv_sec; 2360 ki->p_ustime_usec = st.tv_usec; 2361 2362 memcpy(&ru, &p->p_stats->p_ru, sizeof(ru)); 2363 ki->p_uru_nvcsw = 0; 2364 ki->p_uru_nivcsw = 0; 2365 LIST_FOREACH(l2, &p->p_lwps, l_sibling) { 2366 ki->p_uru_nvcsw += (l2->l_ncsw - l2->l_nivcsw); 2367 ki->p_uru_nivcsw += l2->l_nivcsw; 2368 ruadd(&ru, &l2->l_ru); 2369 } 2370 ki->p_uru_maxrss = ru.ru_maxrss; 2371 ki->p_uru_ixrss = ru.ru_ixrss; 2372 ki->p_uru_idrss = ru.ru_idrss; 2373 ki->p_uru_isrss = ru.ru_isrss; 2374 ki->p_uru_minflt = ru.ru_minflt; 2375 ki->p_uru_majflt = ru.ru_majflt; 2376 ki->p_uru_nswap = ru.ru_nswap; 2377 ki->p_uru_inblock = ru.ru_inblock; 2378 ki->p_uru_oublock = ru.ru_oublock; 2379 ki->p_uru_msgsnd = ru.ru_msgsnd; 2380 ki->p_uru_msgrcv = ru.ru_msgrcv; 2381 ki->p_uru_nsignals = ru.ru_nsignals; 2382 2383 timeradd(&p->p_stats->p_cru.ru_utime, 2384 &p->p_stats->p_cru.ru_stime, &ut); 2385 ki->p_uctime_sec = ut.tv_sec; 2386 ki->p_uctime_usec = ut.tv_usec; 2387 } 2388 } 2389