1 /* $NetBSD: kern_proc.c,v 1.192 2014/02/25 18:30:11 pooka Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2006, 2007, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center, and by Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1982, 1986, 1989, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)kern_proc.c 8.7 (Berkeley) 2/14/95 62 */ 63 64 #include <sys/cdefs.h> 65 __KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.192 2014/02/25 18:30:11 pooka Exp $"); 66 67 #ifdef _KERNEL_OPT 68 #include "opt_kstack.h" 69 #include "opt_maxuprc.h" 70 #include "opt_dtrace.h" 71 #include "opt_compat_netbsd32.h" 72 #endif 73 74 #include <sys/param.h> 75 #include <sys/systm.h> 76 #include <sys/kernel.h> 77 #include <sys/proc.h> 78 #include <sys/resourcevar.h> 79 #include <sys/buf.h> 80 #include <sys/acct.h> 81 #include <sys/wait.h> 82 #include <sys/file.h> 83 #include <ufs/ufs/quota.h> 84 #include <sys/uio.h> 85 #include <sys/pool.h> 86 #include <sys/pset.h> 87 #include <sys/mbuf.h> 88 #include <sys/ioctl.h> 89 #include <sys/tty.h> 90 #include <sys/signalvar.h> 91 #include <sys/ras.h> 92 #include <sys/filedesc.h> 93 #include <sys/syscall_stats.h> 94 #include <sys/kauth.h> 95 #include <sys/sleepq.h> 96 #include <sys/atomic.h> 97 #include <sys/kmem.h> 98 #include <sys/dtrace_bsd.h> 99 #include <sys/sysctl.h> 100 #include <sys/exec.h> 101 #include <sys/cpu.h> 102 103 #include <uvm/uvm_extern.h> 104 105 #ifdef COMPAT_NETBSD32 106 #include <compat/netbsd32/netbsd32.h> 107 #endif 108 109 /* 110 * Process lists. 111 */ 112 113 struct proclist allproc __cacheline_aligned; 114 struct proclist zombproc __cacheline_aligned; 115 116 kmutex_t * proc_lock __cacheline_aligned; 117 118 /* 119 * pid to proc lookup is done by indexing the pid_table array. 120 * Since pid numbers are only allocated when an empty slot 121 * has been found, there is no need to search any lists ever. 122 * (an orphaned pgrp will lock the slot, a session will lock 123 * the pgrp with the same number.) 124 * If the table is too small it is reallocated with twice the 125 * previous size and the entries 'unzipped' into the two halves. 126 * A linked list of free entries is passed through the pt_proc 127 * field of 'free' items - set odd to be an invalid ptr. 128 */ 129 130 struct pid_table { 131 struct proc *pt_proc; 132 struct pgrp *pt_pgrp; 133 pid_t pt_pid; 134 }; 135 #if 1 /* strongly typed cast - should be a noop */ 136 static inline uint p2u(struct proc *p) { return (uint)(uintptr_t)p; } 137 #else 138 #define p2u(p) ((uint)p) 139 #endif 140 #define P_VALID(p) (!(p2u(p) & 1)) 141 #define P_NEXT(p) (p2u(p) >> 1) 142 #define P_FREE(pid) ((struct proc *)(uintptr_t)((pid) << 1 | 1)) 143 144 /* 145 * Table of process IDs (PIDs). 146 */ 147 static struct pid_table *pid_table __read_mostly; 148 149 #define INITIAL_PID_TABLE_SIZE (1 << 5) 150 151 /* Table mask, threshold for growing and number of allocated PIDs. */ 152 static u_int pid_tbl_mask __read_mostly; 153 static u_int pid_alloc_lim __read_mostly; 154 static u_int pid_alloc_cnt __cacheline_aligned; 155 156 /* Next free, last free and maximum PIDs. */ 157 static u_int next_free_pt __cacheline_aligned; 158 static u_int last_free_pt __cacheline_aligned; 159 static pid_t pid_max __read_mostly; 160 161 /* Components of the first process -- never freed. */ 162 163 extern struct emul emul_netbsd; /* defined in kern_exec.c */ 164 165 struct session session0 = { 166 .s_count = 1, 167 .s_sid = 0, 168 }; 169 struct pgrp pgrp0 = { 170 .pg_members = LIST_HEAD_INITIALIZER(&pgrp0.pg_members), 171 .pg_session = &session0, 172 }; 173 filedesc_t filedesc0; 174 struct cwdinfo cwdi0 = { 175 .cwdi_cmask = CMASK, 176 .cwdi_refcnt = 1, 177 }; 178 struct plimit limit0; 179 struct pstats pstat0; 180 struct vmspace vmspace0; 181 struct sigacts sigacts0; 182 struct proc proc0 = { 183 .p_lwps = LIST_HEAD_INITIALIZER(&proc0.p_lwps), 184 .p_sigwaiters = LIST_HEAD_INITIALIZER(&proc0.p_sigwaiters), 185 .p_nlwps = 1, 186 .p_nrlwps = 1, 187 .p_nlwpid = 1, /* must match lwp0.l_lid */ 188 .p_pgrp = &pgrp0, 189 .p_comm = "system", 190 /* 191 * Set P_NOCLDWAIT so that kernel threads are reparented to init(8) 192 * when they exit. init(8) can easily wait them out for us. 193 */ 194 .p_flag = PK_SYSTEM | PK_NOCLDWAIT, 195 .p_stat = SACTIVE, 196 .p_nice = NZERO, 197 .p_emul = &emul_netbsd, 198 .p_cwdi = &cwdi0, 199 .p_limit = &limit0, 200 .p_fd = &filedesc0, 201 .p_vmspace = &vmspace0, 202 .p_stats = &pstat0, 203 .p_sigacts = &sigacts0, 204 #ifdef PROC0_MD_INITIALIZERS 205 PROC0_MD_INITIALIZERS 206 #endif 207 }; 208 kauth_cred_t cred0; 209 210 static const int nofile = NOFILE; 211 static const int maxuprc = MAXUPRC; 212 213 static int sysctl_doeproc(SYSCTLFN_PROTO); 214 static int sysctl_kern_proc_args(SYSCTLFN_PROTO); 215 static void fill_kproc2(struct proc *, struct kinfo_proc2 *, bool); 216 217 /* 218 * The process list descriptors, used during pid allocation and 219 * by sysctl. No locking on this data structure is needed since 220 * it is completely static. 221 */ 222 const struct proclist_desc proclists[] = { 223 { &allproc }, 224 { &zombproc }, 225 { NULL }, 226 }; 227 228 static struct pgrp * pg_remove(pid_t); 229 static void pg_delete(pid_t); 230 static void orphanpg(struct pgrp *); 231 232 static specificdata_domain_t proc_specificdata_domain; 233 234 static pool_cache_t proc_cache; 235 236 static kauth_listener_t proc_listener; 237 238 static int 239 proc_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, 240 void *arg0, void *arg1, void *arg2, void *arg3) 241 { 242 struct proc *p; 243 int result; 244 245 result = KAUTH_RESULT_DEFER; 246 p = arg0; 247 248 switch (action) { 249 case KAUTH_PROCESS_CANSEE: { 250 enum kauth_process_req req; 251 252 req = (enum kauth_process_req)arg1; 253 254 switch (req) { 255 case KAUTH_REQ_PROCESS_CANSEE_ARGS: 256 case KAUTH_REQ_PROCESS_CANSEE_ENTRY: 257 case KAUTH_REQ_PROCESS_CANSEE_OPENFILES: 258 result = KAUTH_RESULT_ALLOW; 259 260 break; 261 262 case KAUTH_REQ_PROCESS_CANSEE_ENV: 263 if (kauth_cred_getuid(cred) != 264 kauth_cred_getuid(p->p_cred) || 265 kauth_cred_getuid(cred) != 266 kauth_cred_getsvuid(p->p_cred)) 267 break; 268 269 result = KAUTH_RESULT_ALLOW; 270 271 break; 272 273 default: 274 break; 275 } 276 277 break; 278 } 279 280 case KAUTH_PROCESS_FORK: { 281 int lnprocs = (int)(unsigned long)arg2; 282 283 /* 284 * Don't allow a nonprivileged user to use the last few 285 * processes. The variable lnprocs is the current number of 286 * processes, maxproc is the limit. 287 */ 288 if (__predict_false((lnprocs >= maxproc - 5))) 289 break; 290 291 result = KAUTH_RESULT_ALLOW; 292 293 break; 294 } 295 296 case KAUTH_PROCESS_CORENAME: 297 case KAUTH_PROCESS_STOPFLAG: 298 if (proc_uidmatch(cred, p->p_cred) == 0) 299 result = KAUTH_RESULT_ALLOW; 300 301 break; 302 303 default: 304 break; 305 } 306 307 return result; 308 } 309 310 /* 311 * Initialize global process hashing structures. 312 */ 313 void 314 procinit(void) 315 { 316 const struct proclist_desc *pd; 317 u_int i; 318 #define LINK_EMPTY ((PID_MAX + INITIAL_PID_TABLE_SIZE) & ~(INITIAL_PID_TABLE_SIZE - 1)) 319 320 for (pd = proclists; pd->pd_list != NULL; pd++) 321 LIST_INIT(pd->pd_list); 322 323 proc_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 324 pid_table = kmem_alloc(INITIAL_PID_TABLE_SIZE 325 * sizeof(struct pid_table), KM_SLEEP); 326 pid_tbl_mask = INITIAL_PID_TABLE_SIZE - 1; 327 pid_max = PID_MAX; 328 329 /* Set free list running through table... 330 Preset 'use count' above PID_MAX so we allocate pid 1 next. */ 331 for (i = 0; i <= pid_tbl_mask; i++) { 332 pid_table[i].pt_proc = P_FREE(LINK_EMPTY + i + 1); 333 pid_table[i].pt_pgrp = 0; 334 pid_table[i].pt_pid = 0; 335 } 336 /* slot 0 is just grabbed */ 337 next_free_pt = 1; 338 /* Need to fix last entry. */ 339 last_free_pt = pid_tbl_mask; 340 pid_table[last_free_pt].pt_proc = P_FREE(LINK_EMPTY); 341 /* point at which we grow table - to avoid reusing pids too often */ 342 pid_alloc_lim = pid_tbl_mask - 1; 343 #undef LINK_EMPTY 344 345 proc_specificdata_domain = specificdata_domain_create(); 346 KASSERT(proc_specificdata_domain != NULL); 347 348 proc_cache = pool_cache_init(sizeof(struct proc), 0, 0, 0, 349 "procpl", NULL, IPL_NONE, NULL, NULL, NULL); 350 351 proc_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS, 352 proc_listener_cb, NULL); 353 } 354 355 void 356 procinit_sysctl(void) 357 { 358 static struct sysctllog *clog; 359 360 sysctl_createv(&clog, 0, NULL, NULL, 361 CTLFLAG_PERMANENT, 362 CTLTYPE_NODE, "proc", 363 SYSCTL_DESCR("System-wide process information"), 364 sysctl_doeproc, 0, NULL, 0, 365 CTL_KERN, KERN_PROC, CTL_EOL); 366 sysctl_createv(&clog, 0, NULL, NULL, 367 CTLFLAG_PERMANENT, 368 CTLTYPE_NODE, "proc2", 369 SYSCTL_DESCR("Machine-independent process information"), 370 sysctl_doeproc, 0, NULL, 0, 371 CTL_KERN, KERN_PROC2, CTL_EOL); 372 sysctl_createv(&clog, 0, NULL, NULL, 373 CTLFLAG_PERMANENT, 374 CTLTYPE_NODE, "proc_args", 375 SYSCTL_DESCR("Process argument information"), 376 sysctl_kern_proc_args, 0, NULL, 0, 377 CTL_KERN, KERN_PROC_ARGS, CTL_EOL); 378 379 /* 380 "nodes" under these: 381 382 KERN_PROC_ALL 383 KERN_PROC_PID pid 384 KERN_PROC_PGRP pgrp 385 KERN_PROC_SESSION sess 386 KERN_PROC_TTY tty 387 KERN_PROC_UID uid 388 KERN_PROC_RUID uid 389 KERN_PROC_GID gid 390 KERN_PROC_RGID gid 391 392 all in all, probably not worth the effort... 393 */ 394 } 395 396 /* 397 * Initialize process 0. 398 */ 399 void 400 proc0_init(void) 401 { 402 struct proc *p; 403 struct pgrp *pg; 404 struct rlimit *rlim; 405 rlim_t lim; 406 int i; 407 408 p = &proc0; 409 pg = &pgrp0; 410 411 mutex_init(&p->p_stmutex, MUTEX_DEFAULT, IPL_HIGH); 412 mutex_init(&p->p_auxlock, MUTEX_DEFAULT, IPL_NONE); 413 p->p_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 414 415 rw_init(&p->p_reflock); 416 cv_init(&p->p_waitcv, "wait"); 417 cv_init(&p->p_lwpcv, "lwpwait"); 418 419 LIST_INSERT_HEAD(&p->p_lwps, &lwp0, l_sibling); 420 421 pid_table[0].pt_proc = p; 422 LIST_INSERT_HEAD(&allproc, p, p_list); 423 424 pid_table[0].pt_pgrp = pg; 425 LIST_INSERT_HEAD(&pg->pg_members, p, p_pglist); 426 427 #ifdef __HAVE_SYSCALL_INTERN 428 (*p->p_emul->e_syscall_intern)(p); 429 #endif 430 431 /* Create credentials. */ 432 cred0 = kauth_cred_alloc(); 433 p->p_cred = cred0; 434 435 /* Create the CWD info. */ 436 rw_init(&cwdi0.cwdi_lock); 437 438 /* Create the limits structures. */ 439 mutex_init(&limit0.pl_lock, MUTEX_DEFAULT, IPL_NONE); 440 441 rlim = limit0.pl_rlimit; 442 for (i = 0; i < __arraycount(limit0.pl_rlimit); i++) { 443 rlim[i].rlim_cur = RLIM_INFINITY; 444 rlim[i].rlim_max = RLIM_INFINITY; 445 } 446 447 rlim[RLIMIT_NOFILE].rlim_max = maxfiles; 448 rlim[RLIMIT_NOFILE].rlim_cur = maxfiles < nofile ? maxfiles : nofile; 449 450 rlim[RLIMIT_NPROC].rlim_max = maxproc; 451 rlim[RLIMIT_NPROC].rlim_cur = maxproc < maxuprc ? maxproc : maxuprc; 452 453 lim = MIN(VM_MAXUSER_ADDRESS, ctob((rlim_t)uvmexp.free)); 454 rlim[RLIMIT_RSS].rlim_max = lim; 455 rlim[RLIMIT_MEMLOCK].rlim_max = lim; 456 rlim[RLIMIT_MEMLOCK].rlim_cur = lim / 3; 457 458 rlim[RLIMIT_NTHR].rlim_max = maxlwp; 459 rlim[RLIMIT_NTHR].rlim_cur = maxlwp < maxuprc ? maxlwp : maxuprc; 460 461 /* Note that default core name has zero length. */ 462 limit0.pl_corename = defcorename; 463 limit0.pl_cnlen = 0; 464 limit0.pl_refcnt = 1; 465 limit0.pl_writeable = false; 466 limit0.pl_sv_limit = NULL; 467 468 /* Configure virtual memory system, set vm rlimits. */ 469 uvm_init_limits(p); 470 471 /* Initialize file descriptor table for proc0. */ 472 fd_init(&filedesc0); 473 474 /* 475 * Initialize proc0's vmspace, which uses the kernel pmap. 476 * All kernel processes (which never have user space mappings) 477 * share proc0's vmspace, and thus, the kernel pmap. 478 */ 479 uvmspace_init(&vmspace0, pmap_kernel(), round_page(VM_MIN_ADDRESS), 480 trunc_page(VM_MAX_ADDRESS), 481 #ifdef __USE_TOPDOWN_VM 482 true 483 #else 484 false 485 #endif 486 ); 487 488 /* Initialize signal state for proc0. XXX IPL_SCHED */ 489 mutex_init(&p->p_sigacts->sa_mutex, MUTEX_DEFAULT, IPL_SCHED); 490 siginit(p); 491 492 proc_initspecific(p); 493 kdtrace_proc_ctor(NULL, p); 494 } 495 496 /* 497 * Session reference counting. 498 */ 499 500 void 501 proc_sesshold(struct session *ss) 502 { 503 504 KASSERT(mutex_owned(proc_lock)); 505 ss->s_count++; 506 } 507 508 void 509 proc_sessrele(struct session *ss) 510 { 511 512 KASSERT(mutex_owned(proc_lock)); 513 /* 514 * We keep the pgrp with the same id as the session in order to 515 * stop a process being given the same pid. Since the pgrp holds 516 * a reference to the session, it must be a 'zombie' pgrp by now. 517 */ 518 if (--ss->s_count == 0) { 519 struct pgrp *pg; 520 521 pg = pg_remove(ss->s_sid); 522 mutex_exit(proc_lock); 523 524 kmem_free(pg, sizeof(struct pgrp)); 525 kmem_free(ss, sizeof(struct session)); 526 } else { 527 mutex_exit(proc_lock); 528 } 529 } 530 531 /* 532 * Check that the specified process group is in the session of the 533 * specified process. 534 * Treats -ve ids as process ids. 535 * Used to validate TIOCSPGRP requests. 536 */ 537 int 538 pgid_in_session(struct proc *p, pid_t pg_id) 539 { 540 struct pgrp *pgrp; 541 struct session *session; 542 int error; 543 544 mutex_enter(proc_lock); 545 if (pg_id < 0) { 546 struct proc *p1 = proc_find(-pg_id); 547 if (p1 == NULL) { 548 error = EINVAL; 549 goto fail; 550 } 551 pgrp = p1->p_pgrp; 552 } else { 553 pgrp = pgrp_find(pg_id); 554 if (pgrp == NULL) { 555 error = EINVAL; 556 goto fail; 557 } 558 } 559 session = pgrp->pg_session; 560 error = (session != p->p_pgrp->pg_session) ? EPERM : 0; 561 fail: 562 mutex_exit(proc_lock); 563 return error; 564 } 565 566 /* 567 * p_inferior: is p an inferior of q? 568 */ 569 static inline bool 570 p_inferior(struct proc *p, struct proc *q) 571 { 572 573 KASSERT(mutex_owned(proc_lock)); 574 575 for (; p != q; p = p->p_pptr) 576 if (p->p_pid == 0) 577 return false; 578 return true; 579 } 580 581 /* 582 * proc_find: locate a process by the ID. 583 * 584 * => Must be called with proc_lock held. 585 */ 586 proc_t * 587 proc_find_raw(pid_t pid) 588 { 589 struct pid_table *pt; 590 proc_t *p; 591 592 KASSERT(mutex_owned(proc_lock)); 593 pt = &pid_table[pid & pid_tbl_mask]; 594 p = pt->pt_proc; 595 if (__predict_false(!P_VALID(p) || pt->pt_pid != pid)) { 596 return NULL; 597 } 598 return p; 599 } 600 601 proc_t * 602 proc_find(pid_t pid) 603 { 604 proc_t *p; 605 606 p = proc_find_raw(pid); 607 if (__predict_false(p == NULL)) { 608 return NULL; 609 } 610 611 /* 612 * Only allow live processes to be found by PID. 613 * XXX: p_stat might change, since unlocked. 614 */ 615 if (__predict_true(p->p_stat == SACTIVE || p->p_stat == SSTOP)) { 616 return p; 617 } 618 return NULL; 619 } 620 621 /* 622 * pgrp_find: locate a process group by the ID. 623 * 624 * => Must be called with proc_lock held. 625 */ 626 struct pgrp * 627 pgrp_find(pid_t pgid) 628 { 629 struct pgrp *pg; 630 631 KASSERT(mutex_owned(proc_lock)); 632 633 pg = pid_table[pgid & pid_tbl_mask].pt_pgrp; 634 635 /* 636 * Cannot look up a process group that only exists because the 637 * session has not died yet (traditional). 638 */ 639 if (pg == NULL || pg->pg_id != pgid || LIST_EMPTY(&pg->pg_members)) { 640 return NULL; 641 } 642 return pg; 643 } 644 645 static void 646 expand_pid_table(void) 647 { 648 size_t pt_size, tsz; 649 struct pid_table *n_pt, *new_pt; 650 struct proc *proc; 651 struct pgrp *pgrp; 652 pid_t pid, rpid; 653 u_int i; 654 uint new_pt_mask; 655 656 pt_size = pid_tbl_mask + 1; 657 tsz = pt_size * 2 * sizeof(struct pid_table); 658 new_pt = kmem_alloc(tsz, KM_SLEEP); 659 new_pt_mask = pt_size * 2 - 1; 660 661 mutex_enter(proc_lock); 662 if (pt_size != pid_tbl_mask + 1) { 663 /* Another process beat us to it... */ 664 mutex_exit(proc_lock); 665 kmem_free(new_pt, tsz); 666 return; 667 } 668 669 /* 670 * Copy entries from old table into new one. 671 * If 'pid' is 'odd' we need to place in the upper half, 672 * even pid's to the lower half. 673 * Free items stay in the low half so we don't have to 674 * fixup the reference to them. 675 * We stuff free items on the front of the freelist 676 * because we can't write to unmodified entries. 677 * Processing the table backwards maintains a semblance 678 * of issuing pid numbers that increase with time. 679 */ 680 i = pt_size - 1; 681 n_pt = new_pt + i; 682 for (; ; i--, n_pt--) { 683 proc = pid_table[i].pt_proc; 684 pgrp = pid_table[i].pt_pgrp; 685 if (!P_VALID(proc)) { 686 /* Up 'use count' so that link is valid */ 687 pid = (P_NEXT(proc) + pt_size) & ~pt_size; 688 rpid = 0; 689 proc = P_FREE(pid); 690 if (pgrp) 691 pid = pgrp->pg_id; 692 } else { 693 pid = pid_table[i].pt_pid; 694 rpid = pid; 695 } 696 697 /* Save entry in appropriate half of table */ 698 n_pt[pid & pt_size].pt_proc = proc; 699 n_pt[pid & pt_size].pt_pgrp = pgrp; 700 n_pt[pid & pt_size].pt_pid = rpid; 701 702 /* Put other piece on start of free list */ 703 pid = (pid ^ pt_size) & ~pid_tbl_mask; 704 n_pt[pid & pt_size].pt_proc = 705 P_FREE((pid & ~pt_size) | next_free_pt); 706 n_pt[pid & pt_size].pt_pgrp = 0; 707 n_pt[pid & pt_size].pt_pid = 0; 708 709 next_free_pt = i | (pid & pt_size); 710 if (i == 0) 711 break; 712 } 713 714 /* Save old table size and switch tables */ 715 tsz = pt_size * sizeof(struct pid_table); 716 n_pt = pid_table; 717 pid_table = new_pt; 718 pid_tbl_mask = new_pt_mask; 719 720 /* 721 * pid_max starts as PID_MAX (= 30000), once we have 16384 722 * allocated pids we need it to be larger! 723 */ 724 if (pid_tbl_mask > PID_MAX) { 725 pid_max = pid_tbl_mask * 2 + 1; 726 pid_alloc_lim |= pid_alloc_lim << 1; 727 } else 728 pid_alloc_lim <<= 1; /* doubles number of free slots... */ 729 730 mutex_exit(proc_lock); 731 kmem_free(n_pt, tsz); 732 } 733 734 struct proc * 735 proc_alloc(void) 736 { 737 struct proc *p; 738 739 p = pool_cache_get(proc_cache, PR_WAITOK); 740 p->p_stat = SIDL; /* protect against others */ 741 proc_initspecific(p); 742 kdtrace_proc_ctor(NULL, p); 743 p->p_pid = -1; 744 proc_alloc_pid(p); 745 return p; 746 } 747 748 /* 749 * proc_alloc_pid: allocate PID and record the given proc 'p' so that 750 * proc_find_raw() can find it by the PID. 751 */ 752 753 pid_t 754 proc_alloc_pid(struct proc *p) 755 { 756 struct pid_table *pt; 757 pid_t pid; 758 int nxt; 759 760 for (;;expand_pid_table()) { 761 if (__predict_false(pid_alloc_cnt >= pid_alloc_lim)) 762 /* ensure pids cycle through 2000+ values */ 763 continue; 764 mutex_enter(proc_lock); 765 pt = &pid_table[next_free_pt]; 766 #ifdef DIAGNOSTIC 767 if (__predict_false(P_VALID(pt->pt_proc) || pt->pt_pgrp)) 768 panic("proc_alloc: slot busy"); 769 #endif 770 nxt = P_NEXT(pt->pt_proc); 771 if (nxt & pid_tbl_mask) 772 break; 773 /* Table full - expand (NB last entry not used....) */ 774 mutex_exit(proc_lock); 775 } 776 777 /* pid is 'saved use count' + 'size' + entry */ 778 pid = (nxt & ~pid_tbl_mask) + pid_tbl_mask + 1 + next_free_pt; 779 if ((uint)pid > (uint)pid_max) 780 pid &= pid_tbl_mask; 781 next_free_pt = nxt & pid_tbl_mask; 782 783 /* Grab table slot */ 784 pt->pt_proc = p; 785 786 KASSERT(pt->pt_pid == 0); 787 pt->pt_pid = pid; 788 if (p->p_pid == -1) { 789 p->p_pid = pid; 790 } 791 pid_alloc_cnt++; 792 mutex_exit(proc_lock); 793 794 return pid; 795 } 796 797 /* 798 * Free a process id - called from proc_free (in kern_exit.c) 799 * 800 * Called with the proc_lock held. 801 */ 802 void 803 proc_free_pid(pid_t pid) 804 { 805 struct pid_table *pt; 806 807 KASSERT(mutex_owned(proc_lock)); 808 809 pt = &pid_table[pid & pid_tbl_mask]; 810 811 /* save pid use count in slot */ 812 pt->pt_proc = P_FREE(pid & ~pid_tbl_mask); 813 KASSERT(pt->pt_pid == pid); 814 pt->pt_pid = 0; 815 816 if (pt->pt_pgrp == NULL) { 817 /* link last freed entry onto ours */ 818 pid &= pid_tbl_mask; 819 pt = &pid_table[last_free_pt]; 820 pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pid); 821 pt->pt_pid = 0; 822 last_free_pt = pid; 823 pid_alloc_cnt--; 824 } 825 826 atomic_dec_uint(&nprocs); 827 } 828 829 void 830 proc_free_mem(struct proc *p) 831 { 832 833 kdtrace_proc_dtor(NULL, p); 834 pool_cache_put(proc_cache, p); 835 } 836 837 /* 838 * proc_enterpgrp: move p to a new or existing process group (and session). 839 * 840 * If we are creating a new pgrp, the pgid should equal 841 * the calling process' pid. 842 * If is only valid to enter a process group that is in the session 843 * of the process. 844 * Also mksess should only be set if we are creating a process group 845 * 846 * Only called from sys_setsid, sys_setpgid and posix_spawn/spawn_return. 847 */ 848 int 849 proc_enterpgrp(struct proc *curp, pid_t pid, pid_t pgid, bool mksess) 850 { 851 struct pgrp *new_pgrp, *pgrp; 852 struct session *sess; 853 struct proc *p; 854 int rval; 855 pid_t pg_id = NO_PGID; 856 857 sess = mksess ? kmem_alloc(sizeof(*sess), KM_SLEEP) : NULL; 858 859 /* Allocate data areas we might need before doing any validity checks */ 860 mutex_enter(proc_lock); /* Because pid_table might change */ 861 if (pid_table[pgid & pid_tbl_mask].pt_pgrp == 0) { 862 mutex_exit(proc_lock); 863 new_pgrp = kmem_alloc(sizeof(*new_pgrp), KM_SLEEP); 864 mutex_enter(proc_lock); 865 } else 866 new_pgrp = NULL; 867 rval = EPERM; /* most common error (to save typing) */ 868 869 /* Check pgrp exists or can be created */ 870 pgrp = pid_table[pgid & pid_tbl_mask].pt_pgrp; 871 if (pgrp != NULL && pgrp->pg_id != pgid) 872 goto done; 873 874 /* Can only set another process under restricted circumstances. */ 875 if (pid != curp->p_pid) { 876 /* Must exist and be one of our children... */ 877 p = proc_find(pid); 878 if (p == NULL || !p_inferior(p, curp)) { 879 rval = ESRCH; 880 goto done; 881 } 882 /* ... in the same session... */ 883 if (sess != NULL || p->p_session != curp->p_session) 884 goto done; 885 /* ... existing pgid must be in same session ... */ 886 if (pgrp != NULL && pgrp->pg_session != p->p_session) 887 goto done; 888 /* ... and not done an exec. */ 889 if (p->p_flag & PK_EXEC) { 890 rval = EACCES; 891 goto done; 892 } 893 } else { 894 /* ... setsid() cannot re-enter a pgrp */ 895 if (mksess && (curp->p_pgid == curp->p_pid || 896 pgrp_find(curp->p_pid))) 897 goto done; 898 p = curp; 899 } 900 901 /* Changing the process group/session of a session 902 leader is definitely off limits. */ 903 if (SESS_LEADER(p)) { 904 if (sess == NULL && p->p_pgrp == pgrp) 905 /* unless it's a definite noop */ 906 rval = 0; 907 goto done; 908 } 909 910 /* Can only create a process group with id of process */ 911 if (pgrp == NULL && pgid != pid) 912 goto done; 913 914 /* Can only create a session if creating pgrp */ 915 if (sess != NULL && pgrp != NULL) 916 goto done; 917 918 /* Check we allocated memory for a pgrp... */ 919 if (pgrp == NULL && new_pgrp == NULL) 920 goto done; 921 922 /* Don't attach to 'zombie' pgrp */ 923 if (pgrp != NULL && LIST_EMPTY(&pgrp->pg_members)) 924 goto done; 925 926 /* Expect to succeed now */ 927 rval = 0; 928 929 if (pgrp == p->p_pgrp) 930 /* nothing to do */ 931 goto done; 932 933 /* Ok all setup, link up required structures */ 934 935 if (pgrp == NULL) { 936 pgrp = new_pgrp; 937 new_pgrp = NULL; 938 if (sess != NULL) { 939 sess->s_sid = p->p_pid; 940 sess->s_leader = p; 941 sess->s_count = 1; 942 sess->s_ttyvp = NULL; 943 sess->s_ttyp = NULL; 944 sess->s_flags = p->p_session->s_flags & ~S_LOGIN_SET; 945 memcpy(sess->s_login, p->p_session->s_login, 946 sizeof(sess->s_login)); 947 p->p_lflag &= ~PL_CONTROLT; 948 } else { 949 sess = p->p_pgrp->pg_session; 950 proc_sesshold(sess); 951 } 952 pgrp->pg_session = sess; 953 sess = NULL; 954 955 pgrp->pg_id = pgid; 956 LIST_INIT(&pgrp->pg_members); 957 #ifdef DIAGNOSTIC 958 if (__predict_false(pid_table[pgid & pid_tbl_mask].pt_pgrp)) 959 panic("enterpgrp: pgrp table slot in use"); 960 if (__predict_false(mksess && p != curp)) 961 panic("enterpgrp: mksession and p != curproc"); 962 #endif 963 pid_table[pgid & pid_tbl_mask].pt_pgrp = pgrp; 964 pgrp->pg_jobc = 0; 965 } 966 967 /* 968 * Adjust eligibility of affected pgrps to participate in job control. 969 * Increment eligibility counts before decrementing, otherwise we 970 * could reach 0 spuriously during the first call. 971 */ 972 fixjobc(p, pgrp, 1); 973 fixjobc(p, p->p_pgrp, 0); 974 975 /* Interlock with ttread(). */ 976 mutex_spin_enter(&tty_lock); 977 978 /* Move process to requested group. */ 979 LIST_REMOVE(p, p_pglist); 980 if (LIST_EMPTY(&p->p_pgrp->pg_members)) 981 /* defer delete until we've dumped the lock */ 982 pg_id = p->p_pgrp->pg_id; 983 p->p_pgrp = pgrp; 984 LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist); 985 986 /* Done with the swap; we can release the tty mutex. */ 987 mutex_spin_exit(&tty_lock); 988 989 done: 990 if (pg_id != NO_PGID) { 991 /* Releases proc_lock. */ 992 pg_delete(pg_id); 993 } else { 994 mutex_exit(proc_lock); 995 } 996 if (sess != NULL) 997 kmem_free(sess, sizeof(*sess)); 998 if (new_pgrp != NULL) 999 kmem_free(new_pgrp, sizeof(*new_pgrp)); 1000 #ifdef DEBUG_PGRP 1001 if (__predict_false(rval)) 1002 printf("enterpgrp(%d,%d,%d), curproc %d, rval %d\n", 1003 pid, pgid, mksess, curp->p_pid, rval); 1004 #endif 1005 return rval; 1006 } 1007 1008 /* 1009 * proc_leavepgrp: remove a process from its process group. 1010 * => must be called with the proc_lock held, which will be released; 1011 */ 1012 void 1013 proc_leavepgrp(struct proc *p) 1014 { 1015 struct pgrp *pgrp; 1016 1017 KASSERT(mutex_owned(proc_lock)); 1018 1019 /* Interlock with ttread() */ 1020 mutex_spin_enter(&tty_lock); 1021 pgrp = p->p_pgrp; 1022 LIST_REMOVE(p, p_pglist); 1023 p->p_pgrp = NULL; 1024 mutex_spin_exit(&tty_lock); 1025 1026 if (LIST_EMPTY(&pgrp->pg_members)) { 1027 /* Releases proc_lock. */ 1028 pg_delete(pgrp->pg_id); 1029 } else { 1030 mutex_exit(proc_lock); 1031 } 1032 } 1033 1034 /* 1035 * pg_remove: remove a process group from the table. 1036 * => must be called with the proc_lock held; 1037 * => returns process group to free; 1038 */ 1039 static struct pgrp * 1040 pg_remove(pid_t pg_id) 1041 { 1042 struct pgrp *pgrp; 1043 struct pid_table *pt; 1044 1045 KASSERT(mutex_owned(proc_lock)); 1046 1047 pt = &pid_table[pg_id & pid_tbl_mask]; 1048 pgrp = pt->pt_pgrp; 1049 1050 KASSERT(pgrp != NULL); 1051 KASSERT(pgrp->pg_id == pg_id); 1052 KASSERT(LIST_EMPTY(&pgrp->pg_members)); 1053 1054 pt->pt_pgrp = NULL; 1055 1056 if (!P_VALID(pt->pt_proc)) { 1057 /* Orphaned pgrp, put slot onto free list. */ 1058 KASSERT((P_NEXT(pt->pt_proc) & pid_tbl_mask) == 0); 1059 pg_id &= pid_tbl_mask; 1060 pt = &pid_table[last_free_pt]; 1061 pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pg_id); 1062 KASSERT(pt->pt_pid == 0); 1063 last_free_pt = pg_id; 1064 pid_alloc_cnt--; 1065 } 1066 return pgrp; 1067 } 1068 1069 /* 1070 * pg_delete: delete and free a process group. 1071 * => must be called with the proc_lock held, which will be released. 1072 */ 1073 static void 1074 pg_delete(pid_t pg_id) 1075 { 1076 struct pgrp *pg; 1077 struct tty *ttyp; 1078 struct session *ss; 1079 1080 KASSERT(mutex_owned(proc_lock)); 1081 1082 pg = pid_table[pg_id & pid_tbl_mask].pt_pgrp; 1083 if (pg == NULL || pg->pg_id != pg_id || !LIST_EMPTY(&pg->pg_members)) { 1084 mutex_exit(proc_lock); 1085 return; 1086 } 1087 1088 ss = pg->pg_session; 1089 1090 /* Remove reference (if any) from tty to this process group */ 1091 mutex_spin_enter(&tty_lock); 1092 ttyp = ss->s_ttyp; 1093 if (ttyp != NULL && ttyp->t_pgrp == pg) { 1094 ttyp->t_pgrp = NULL; 1095 KASSERT(ttyp->t_session == ss); 1096 } 1097 mutex_spin_exit(&tty_lock); 1098 1099 /* 1100 * The leading process group in a session is freed by proc_sessrele(), 1101 * if last reference. Note: proc_sessrele() releases proc_lock. 1102 */ 1103 pg = (ss->s_sid != pg->pg_id) ? pg_remove(pg_id) : NULL; 1104 proc_sessrele(ss); 1105 1106 if (pg != NULL) { 1107 /* Free it, if was not done by proc_sessrele(). */ 1108 kmem_free(pg, sizeof(struct pgrp)); 1109 } 1110 } 1111 1112 /* 1113 * Adjust pgrp jobc counters when specified process changes process group. 1114 * We count the number of processes in each process group that "qualify" 1115 * the group for terminal job control (those with a parent in a different 1116 * process group of the same session). If that count reaches zero, the 1117 * process group becomes orphaned. Check both the specified process' 1118 * process group and that of its children. 1119 * entering == 0 => p is leaving specified group. 1120 * entering == 1 => p is entering specified group. 1121 * 1122 * Call with proc_lock held. 1123 */ 1124 void 1125 fixjobc(struct proc *p, struct pgrp *pgrp, int entering) 1126 { 1127 struct pgrp *hispgrp; 1128 struct session *mysession = pgrp->pg_session; 1129 struct proc *child; 1130 1131 KASSERT(mutex_owned(proc_lock)); 1132 1133 /* 1134 * Check p's parent to see whether p qualifies its own process 1135 * group; if so, adjust count for p's process group. 1136 */ 1137 hispgrp = p->p_pptr->p_pgrp; 1138 if (hispgrp != pgrp && hispgrp->pg_session == mysession) { 1139 if (entering) { 1140 pgrp->pg_jobc++; 1141 p->p_lflag &= ~PL_ORPHANPG; 1142 } else if (--pgrp->pg_jobc == 0) 1143 orphanpg(pgrp); 1144 } 1145 1146 /* 1147 * Check this process' children to see whether they qualify 1148 * their process groups; if so, adjust counts for children's 1149 * process groups. 1150 */ 1151 LIST_FOREACH(child, &p->p_children, p_sibling) { 1152 hispgrp = child->p_pgrp; 1153 if (hispgrp != pgrp && hispgrp->pg_session == mysession && 1154 !P_ZOMBIE(child)) { 1155 if (entering) { 1156 child->p_lflag &= ~PL_ORPHANPG; 1157 hispgrp->pg_jobc++; 1158 } else if (--hispgrp->pg_jobc == 0) 1159 orphanpg(hispgrp); 1160 } 1161 } 1162 } 1163 1164 /* 1165 * A process group has become orphaned; 1166 * if there are any stopped processes in the group, 1167 * hang-up all process in that group. 1168 * 1169 * Call with proc_lock held. 1170 */ 1171 static void 1172 orphanpg(struct pgrp *pg) 1173 { 1174 struct proc *p; 1175 1176 KASSERT(mutex_owned(proc_lock)); 1177 1178 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 1179 if (p->p_stat == SSTOP) { 1180 p->p_lflag |= PL_ORPHANPG; 1181 psignal(p, SIGHUP); 1182 psignal(p, SIGCONT); 1183 } 1184 } 1185 } 1186 1187 #ifdef DDB 1188 #include <ddb/db_output.h> 1189 void pidtbl_dump(void); 1190 void 1191 pidtbl_dump(void) 1192 { 1193 struct pid_table *pt; 1194 struct proc *p; 1195 struct pgrp *pgrp; 1196 int id; 1197 1198 db_printf("pid table %p size %x, next %x, last %x\n", 1199 pid_table, pid_tbl_mask+1, 1200 next_free_pt, last_free_pt); 1201 for (pt = pid_table, id = 0; id <= pid_tbl_mask; id++, pt++) { 1202 p = pt->pt_proc; 1203 if (!P_VALID(p) && !pt->pt_pgrp) 1204 continue; 1205 db_printf(" id %x: ", id); 1206 if (P_VALID(p)) 1207 db_printf("slotpid %d proc %p id %d (0x%x) %s\n", 1208 pt->pt_pid, p, p->p_pid, p->p_pid, p->p_comm); 1209 else 1210 db_printf("next %x use %x\n", 1211 P_NEXT(p) & pid_tbl_mask, 1212 P_NEXT(p) & ~pid_tbl_mask); 1213 if ((pgrp = pt->pt_pgrp)) { 1214 db_printf("\tsession %p, sid %d, count %d, login %s\n", 1215 pgrp->pg_session, pgrp->pg_session->s_sid, 1216 pgrp->pg_session->s_count, 1217 pgrp->pg_session->s_login); 1218 db_printf("\tpgrp %p, pg_id %d, pg_jobc %d, members %p\n", 1219 pgrp, pgrp->pg_id, pgrp->pg_jobc, 1220 LIST_FIRST(&pgrp->pg_members)); 1221 LIST_FOREACH(p, &pgrp->pg_members, p_pglist) { 1222 db_printf("\t\tpid %d addr %p pgrp %p %s\n", 1223 p->p_pid, p, p->p_pgrp, p->p_comm); 1224 } 1225 } 1226 } 1227 } 1228 #endif /* DDB */ 1229 1230 #ifdef KSTACK_CHECK_MAGIC 1231 1232 #define KSTACK_MAGIC 0xdeadbeaf 1233 1234 /* XXX should be per process basis? */ 1235 static int kstackleftmin = KSTACK_SIZE; 1236 static int kstackleftthres = KSTACK_SIZE / 8; 1237 1238 void 1239 kstack_setup_magic(const struct lwp *l) 1240 { 1241 uint32_t *ip; 1242 uint32_t const *end; 1243 1244 KASSERT(l != NULL); 1245 KASSERT(l != &lwp0); 1246 1247 /* 1248 * fill all the stack with magic number 1249 * so that later modification on it can be detected. 1250 */ 1251 ip = (uint32_t *)KSTACK_LOWEST_ADDR(l); 1252 end = (uint32_t *)((char *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE); 1253 for (; ip < end; ip++) { 1254 *ip = KSTACK_MAGIC; 1255 } 1256 } 1257 1258 void 1259 kstack_check_magic(const struct lwp *l) 1260 { 1261 uint32_t const *ip, *end; 1262 int stackleft; 1263 1264 KASSERT(l != NULL); 1265 1266 /* don't check proc0 */ /*XXX*/ 1267 if (l == &lwp0) 1268 return; 1269 1270 #ifdef __MACHINE_STACK_GROWS_UP 1271 /* stack grows upwards (eg. hppa) */ 1272 ip = (uint32_t *)((void *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE); 1273 end = (uint32_t *)KSTACK_LOWEST_ADDR(l); 1274 for (ip--; ip >= end; ip--) 1275 if (*ip != KSTACK_MAGIC) 1276 break; 1277 1278 stackleft = (void *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE - (void *)ip; 1279 #else /* __MACHINE_STACK_GROWS_UP */ 1280 /* stack grows downwards (eg. i386) */ 1281 ip = (uint32_t *)KSTACK_LOWEST_ADDR(l); 1282 end = (uint32_t *)((char *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE); 1283 for (; ip < end; ip++) 1284 if (*ip != KSTACK_MAGIC) 1285 break; 1286 1287 stackleft = ((const char *)ip) - (const char *)KSTACK_LOWEST_ADDR(l); 1288 #endif /* __MACHINE_STACK_GROWS_UP */ 1289 1290 if (kstackleftmin > stackleft) { 1291 kstackleftmin = stackleft; 1292 if (stackleft < kstackleftthres) 1293 printf("warning: kernel stack left %d bytes" 1294 "(pid %u:lid %u)\n", stackleft, 1295 (u_int)l->l_proc->p_pid, (u_int)l->l_lid); 1296 } 1297 1298 if (stackleft <= 0) { 1299 panic("magic on the top of kernel stack changed for " 1300 "pid %u, lid %u: maybe kernel stack overflow", 1301 (u_int)l->l_proc->p_pid, (u_int)l->l_lid); 1302 } 1303 } 1304 #endif /* KSTACK_CHECK_MAGIC */ 1305 1306 int 1307 proclist_foreach_call(struct proclist *list, 1308 int (*callback)(struct proc *, void *arg), void *arg) 1309 { 1310 struct proc marker; 1311 struct proc *p; 1312 int ret = 0; 1313 1314 marker.p_flag = PK_MARKER; 1315 mutex_enter(proc_lock); 1316 for (p = LIST_FIRST(list); ret == 0 && p != NULL;) { 1317 if (p->p_flag & PK_MARKER) { 1318 p = LIST_NEXT(p, p_list); 1319 continue; 1320 } 1321 LIST_INSERT_AFTER(p, &marker, p_list); 1322 ret = (*callback)(p, arg); 1323 KASSERT(mutex_owned(proc_lock)); 1324 p = LIST_NEXT(&marker, p_list); 1325 LIST_REMOVE(&marker, p_list); 1326 } 1327 mutex_exit(proc_lock); 1328 1329 return ret; 1330 } 1331 1332 int 1333 proc_vmspace_getref(struct proc *p, struct vmspace **vm) 1334 { 1335 1336 /* XXXCDC: how should locking work here? */ 1337 1338 /* curproc exception is for coredump. */ 1339 1340 if ((p != curproc && (p->p_sflag & PS_WEXIT) != 0) || 1341 (p->p_vmspace->vm_refcnt < 1)) { /* XXX */ 1342 return EFAULT; 1343 } 1344 1345 uvmspace_addref(p->p_vmspace); 1346 *vm = p->p_vmspace; 1347 1348 return 0; 1349 } 1350 1351 /* 1352 * Acquire a write lock on the process credential. 1353 */ 1354 void 1355 proc_crmod_enter(void) 1356 { 1357 struct lwp *l = curlwp; 1358 struct proc *p = l->l_proc; 1359 kauth_cred_t oc; 1360 1361 /* Reset what needs to be reset in plimit. */ 1362 if (p->p_limit->pl_corename != defcorename) { 1363 lim_setcorename(p, defcorename, 0); 1364 } 1365 1366 mutex_enter(p->p_lock); 1367 1368 /* Ensure the LWP cached credentials are up to date. */ 1369 if ((oc = l->l_cred) != p->p_cred) { 1370 kauth_cred_hold(p->p_cred); 1371 l->l_cred = p->p_cred; 1372 kauth_cred_free(oc); 1373 } 1374 } 1375 1376 /* 1377 * Set in a new process credential, and drop the write lock. The credential 1378 * must have a reference already. Optionally, free a no-longer required 1379 * credential. The scheduler also needs to inspect p_cred, so we also 1380 * briefly acquire the sched state mutex. 1381 */ 1382 void 1383 proc_crmod_leave(kauth_cred_t scred, kauth_cred_t fcred, bool sugid) 1384 { 1385 struct lwp *l = curlwp, *l2; 1386 struct proc *p = l->l_proc; 1387 kauth_cred_t oc; 1388 1389 KASSERT(mutex_owned(p->p_lock)); 1390 1391 /* Is there a new credential to set in? */ 1392 if (scred != NULL) { 1393 p->p_cred = scred; 1394 LIST_FOREACH(l2, &p->p_lwps, l_sibling) { 1395 if (l2 != l) 1396 l2->l_prflag |= LPR_CRMOD; 1397 } 1398 1399 /* Ensure the LWP cached credentials are up to date. */ 1400 if ((oc = l->l_cred) != scred) { 1401 kauth_cred_hold(scred); 1402 l->l_cred = scred; 1403 } 1404 } else 1405 oc = NULL; /* XXXgcc */ 1406 1407 if (sugid) { 1408 /* 1409 * Mark process as having changed credentials, stops 1410 * tracing etc. 1411 */ 1412 p->p_flag |= PK_SUGID; 1413 } 1414 1415 mutex_exit(p->p_lock); 1416 1417 /* If there is a credential to be released, free it now. */ 1418 if (fcred != NULL) { 1419 KASSERT(scred != NULL); 1420 kauth_cred_free(fcred); 1421 if (oc != scred) 1422 kauth_cred_free(oc); 1423 } 1424 } 1425 1426 /* 1427 * proc_specific_key_create -- 1428 * Create a key for subsystem proc-specific data. 1429 */ 1430 int 1431 proc_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor) 1432 { 1433 1434 return (specificdata_key_create(proc_specificdata_domain, keyp, dtor)); 1435 } 1436 1437 /* 1438 * proc_specific_key_delete -- 1439 * Delete a key for subsystem proc-specific data. 1440 */ 1441 void 1442 proc_specific_key_delete(specificdata_key_t key) 1443 { 1444 1445 specificdata_key_delete(proc_specificdata_domain, key); 1446 } 1447 1448 /* 1449 * proc_initspecific -- 1450 * Initialize a proc's specificdata container. 1451 */ 1452 void 1453 proc_initspecific(struct proc *p) 1454 { 1455 int error __diagused; 1456 1457 error = specificdata_init(proc_specificdata_domain, &p->p_specdataref); 1458 KASSERT(error == 0); 1459 } 1460 1461 /* 1462 * proc_finispecific -- 1463 * Finalize a proc's specificdata container. 1464 */ 1465 void 1466 proc_finispecific(struct proc *p) 1467 { 1468 1469 specificdata_fini(proc_specificdata_domain, &p->p_specdataref); 1470 } 1471 1472 /* 1473 * proc_getspecific -- 1474 * Return proc-specific data corresponding to the specified key. 1475 */ 1476 void * 1477 proc_getspecific(struct proc *p, specificdata_key_t key) 1478 { 1479 1480 return (specificdata_getspecific(proc_specificdata_domain, 1481 &p->p_specdataref, key)); 1482 } 1483 1484 /* 1485 * proc_setspecific -- 1486 * Set proc-specific data corresponding to the specified key. 1487 */ 1488 void 1489 proc_setspecific(struct proc *p, specificdata_key_t key, void *data) 1490 { 1491 1492 specificdata_setspecific(proc_specificdata_domain, 1493 &p->p_specdataref, key, data); 1494 } 1495 1496 int 1497 proc_uidmatch(kauth_cred_t cred, kauth_cred_t target) 1498 { 1499 int r = 0; 1500 1501 if (kauth_cred_getuid(cred) != kauth_cred_getuid(target) || 1502 kauth_cred_getuid(cred) != kauth_cred_getsvuid(target)) { 1503 /* 1504 * suid proc of ours or proc not ours 1505 */ 1506 r = EPERM; 1507 } else if (kauth_cred_getgid(target) != kauth_cred_getsvgid(target)) { 1508 /* 1509 * sgid proc has sgid back to us temporarily 1510 */ 1511 r = EPERM; 1512 } else { 1513 /* 1514 * our rgid must be in target's group list (ie, 1515 * sub-processes started by a sgid process) 1516 */ 1517 int ismember = 0; 1518 1519 if (kauth_cred_ismember_gid(cred, 1520 kauth_cred_getgid(target), &ismember) != 0 || 1521 !ismember) 1522 r = EPERM; 1523 } 1524 1525 return (r); 1526 } 1527 1528 /* 1529 * sysctl stuff 1530 */ 1531 1532 #define KERN_PROCSLOP (5 * sizeof(struct kinfo_proc)) 1533 1534 static const u_int sysctl_flagmap[] = { 1535 PK_ADVLOCK, P_ADVLOCK, 1536 PK_EXEC, P_EXEC, 1537 PK_NOCLDWAIT, P_NOCLDWAIT, 1538 PK_32, P_32, 1539 PK_CLDSIGIGN, P_CLDSIGIGN, 1540 PK_SUGID, P_SUGID, 1541 0 1542 }; 1543 1544 static const u_int sysctl_sflagmap[] = { 1545 PS_NOCLDSTOP, P_NOCLDSTOP, 1546 PS_WEXIT, P_WEXIT, 1547 PS_STOPFORK, P_STOPFORK, 1548 PS_STOPEXEC, P_STOPEXEC, 1549 PS_STOPEXIT, P_STOPEXIT, 1550 0 1551 }; 1552 1553 static const u_int sysctl_slflagmap[] = { 1554 PSL_TRACED, P_TRACED, 1555 PSL_FSTRACE, P_FSTRACE, 1556 PSL_CHTRACED, P_CHTRACED, 1557 PSL_SYSCALL, P_SYSCALL, 1558 0 1559 }; 1560 1561 static const u_int sysctl_lflagmap[] = { 1562 PL_CONTROLT, P_CONTROLT, 1563 PL_PPWAIT, P_PPWAIT, 1564 0 1565 }; 1566 1567 static const u_int sysctl_stflagmap[] = { 1568 PST_PROFIL, P_PROFIL, 1569 0 1570 1571 }; 1572 1573 /* used by kern_lwp also */ 1574 const u_int sysctl_lwpflagmap[] = { 1575 LW_SINTR, L_SINTR, 1576 LW_SYSTEM, L_SYSTEM, 1577 0 1578 }; 1579 1580 /* 1581 * Find the most ``active'' lwp of a process and return it for ps display 1582 * purposes 1583 */ 1584 static struct lwp * 1585 proc_active_lwp(struct proc *p) 1586 { 1587 static const int ostat[] = { 1588 0, 1589 2, /* LSIDL */ 1590 6, /* LSRUN */ 1591 5, /* LSSLEEP */ 1592 4, /* LSSTOP */ 1593 0, /* LSZOMB */ 1594 1, /* LSDEAD */ 1595 7, /* LSONPROC */ 1596 3 /* LSSUSPENDED */ 1597 }; 1598 1599 struct lwp *l, *lp = NULL; 1600 LIST_FOREACH(l, &p->p_lwps, l_sibling) { 1601 KASSERT(l->l_stat >= 0 && l->l_stat < __arraycount(ostat)); 1602 if (lp == NULL || 1603 ostat[l->l_stat] > ostat[lp->l_stat] || 1604 (ostat[l->l_stat] == ostat[lp->l_stat] && 1605 l->l_cpticks > lp->l_cpticks)) { 1606 lp = l; 1607 continue; 1608 } 1609 } 1610 return lp; 1611 } 1612 1613 static int 1614 sysctl_doeproc(SYSCTLFN_ARGS) 1615 { 1616 union { 1617 struct kinfo_proc kproc; 1618 struct kinfo_proc2 kproc2; 1619 } *kbuf; 1620 struct proc *p, *next, *marker; 1621 char *where, *dp; 1622 int type, op, arg, error; 1623 u_int elem_size, kelem_size, elem_count; 1624 size_t buflen, needed; 1625 bool match, zombie, mmmbrains; 1626 1627 if (namelen == 1 && name[0] == CTL_QUERY) 1628 return (sysctl_query(SYSCTLFN_CALL(rnode))); 1629 1630 dp = where = oldp; 1631 buflen = where != NULL ? *oldlenp : 0; 1632 error = 0; 1633 needed = 0; 1634 type = rnode->sysctl_num; 1635 1636 if (type == KERN_PROC) { 1637 if (namelen != 2 && !(namelen == 1 && name[0] == KERN_PROC_ALL)) 1638 return (EINVAL); 1639 op = name[0]; 1640 if (op != KERN_PROC_ALL) 1641 arg = name[1]; 1642 else 1643 arg = 0; /* Quell compiler warning */ 1644 elem_count = 0; /* Ditto */ 1645 kelem_size = elem_size = sizeof(kbuf->kproc); 1646 } else { 1647 if (namelen != 4) 1648 return (EINVAL); 1649 op = name[0]; 1650 arg = name[1]; 1651 elem_size = name[2]; 1652 elem_count = name[3]; 1653 kelem_size = sizeof(kbuf->kproc2); 1654 } 1655 1656 sysctl_unlock(); 1657 1658 kbuf = kmem_alloc(sizeof(*kbuf), KM_SLEEP); 1659 marker = kmem_alloc(sizeof(*marker), KM_SLEEP); 1660 marker->p_flag = PK_MARKER; 1661 1662 mutex_enter(proc_lock); 1663 mmmbrains = false; 1664 for (p = LIST_FIRST(&allproc);; p = next) { 1665 if (p == NULL) { 1666 if (!mmmbrains) { 1667 p = LIST_FIRST(&zombproc); 1668 mmmbrains = true; 1669 } 1670 if (p == NULL) 1671 break; 1672 } 1673 next = LIST_NEXT(p, p_list); 1674 if ((p->p_flag & PK_MARKER) != 0) 1675 continue; 1676 1677 /* 1678 * Skip embryonic processes. 1679 */ 1680 if (p->p_stat == SIDL) 1681 continue; 1682 1683 mutex_enter(p->p_lock); 1684 error = kauth_authorize_process(l->l_cred, 1685 KAUTH_PROCESS_CANSEE, p, 1686 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL); 1687 if (error != 0) { 1688 mutex_exit(p->p_lock); 1689 continue; 1690 } 1691 1692 /* 1693 * TODO - make more efficient (see notes below). 1694 * do by session. 1695 */ 1696 switch (op) { 1697 case KERN_PROC_PID: 1698 /* could do this with just a lookup */ 1699 match = (p->p_pid == (pid_t)arg); 1700 break; 1701 1702 case KERN_PROC_PGRP: 1703 /* could do this by traversing pgrp */ 1704 match = (p->p_pgrp->pg_id == (pid_t)arg); 1705 break; 1706 1707 case KERN_PROC_SESSION: 1708 match = (p->p_session->s_sid == (pid_t)arg); 1709 break; 1710 1711 case KERN_PROC_TTY: 1712 match = true; 1713 if (arg == (int) KERN_PROC_TTY_REVOKE) { 1714 if ((p->p_lflag & PL_CONTROLT) == 0 || 1715 p->p_session->s_ttyp == NULL || 1716 p->p_session->s_ttyvp != NULL) { 1717 match = false; 1718 } 1719 } else if ((p->p_lflag & PL_CONTROLT) == 0 || 1720 p->p_session->s_ttyp == NULL) { 1721 if ((dev_t)arg != KERN_PROC_TTY_NODEV) { 1722 match = false; 1723 } 1724 } else if (p->p_session->s_ttyp->t_dev != (dev_t)arg) { 1725 match = false; 1726 } 1727 break; 1728 1729 case KERN_PROC_UID: 1730 match = (kauth_cred_geteuid(p->p_cred) == (uid_t)arg); 1731 break; 1732 1733 case KERN_PROC_RUID: 1734 match = (kauth_cred_getuid(p->p_cred) == (uid_t)arg); 1735 break; 1736 1737 case KERN_PROC_GID: 1738 match = (kauth_cred_getegid(p->p_cred) == (uid_t)arg); 1739 break; 1740 1741 case KERN_PROC_RGID: 1742 match = (kauth_cred_getgid(p->p_cred) == (uid_t)arg); 1743 break; 1744 1745 case KERN_PROC_ALL: 1746 match = true; 1747 /* allow everything */ 1748 break; 1749 1750 default: 1751 error = EINVAL; 1752 mutex_exit(p->p_lock); 1753 goto cleanup; 1754 } 1755 if (!match) { 1756 mutex_exit(p->p_lock); 1757 continue; 1758 } 1759 1760 /* 1761 * Grab a hold on the process. 1762 */ 1763 if (mmmbrains) { 1764 zombie = true; 1765 } else { 1766 zombie = !rw_tryenter(&p->p_reflock, RW_READER); 1767 } 1768 if (zombie) { 1769 LIST_INSERT_AFTER(p, marker, p_list); 1770 } 1771 1772 if (buflen >= elem_size && 1773 (type == KERN_PROC || elem_count > 0)) { 1774 if (type == KERN_PROC) { 1775 kbuf->kproc.kp_proc = *p; 1776 fill_eproc(p, &kbuf->kproc.kp_eproc, zombie); 1777 } else { 1778 fill_kproc2(p, &kbuf->kproc2, zombie); 1779 elem_count--; 1780 } 1781 mutex_exit(p->p_lock); 1782 mutex_exit(proc_lock); 1783 /* 1784 * Copy out elem_size, but not larger than kelem_size 1785 */ 1786 error = sysctl_copyout(l, kbuf, dp, 1787 min(kelem_size, elem_size)); 1788 mutex_enter(proc_lock); 1789 if (error) { 1790 goto bah; 1791 } 1792 dp += elem_size; 1793 buflen -= elem_size; 1794 } else { 1795 mutex_exit(p->p_lock); 1796 } 1797 needed += elem_size; 1798 1799 /* 1800 * Release reference to process. 1801 */ 1802 if (zombie) { 1803 next = LIST_NEXT(marker, p_list); 1804 LIST_REMOVE(marker, p_list); 1805 } else { 1806 rw_exit(&p->p_reflock); 1807 next = LIST_NEXT(p, p_list); 1808 } 1809 } 1810 mutex_exit(proc_lock); 1811 1812 if (where != NULL) { 1813 *oldlenp = dp - where; 1814 if (needed > *oldlenp) { 1815 error = ENOMEM; 1816 goto out; 1817 } 1818 } else { 1819 needed += KERN_PROCSLOP; 1820 *oldlenp = needed; 1821 } 1822 if (kbuf) 1823 kmem_free(kbuf, sizeof(*kbuf)); 1824 if (marker) 1825 kmem_free(marker, sizeof(*marker)); 1826 sysctl_relock(); 1827 return 0; 1828 bah: 1829 if (zombie) 1830 LIST_REMOVE(marker, p_list); 1831 else 1832 rw_exit(&p->p_reflock); 1833 cleanup: 1834 mutex_exit(proc_lock); 1835 out: 1836 if (kbuf) 1837 kmem_free(kbuf, sizeof(*kbuf)); 1838 if (marker) 1839 kmem_free(marker, sizeof(*marker)); 1840 sysctl_relock(); 1841 return error; 1842 } 1843 1844 int 1845 copyin_psstrings(struct proc *p, struct ps_strings *arginfo) 1846 { 1847 1848 #ifdef COMPAT_NETBSD32 1849 if (p->p_flag & PK_32) { 1850 struct ps_strings32 arginfo32; 1851 1852 int error = copyin_proc(p, (void *)p->p_psstrp, &arginfo32, 1853 sizeof(arginfo32)); 1854 if (error) 1855 return error; 1856 arginfo->ps_argvstr = (void *)(uintptr_t)arginfo32.ps_argvstr; 1857 arginfo->ps_nargvstr = arginfo32.ps_nargvstr; 1858 arginfo->ps_envstr = (void *)(uintptr_t)arginfo32.ps_envstr; 1859 arginfo->ps_nenvstr = arginfo32.ps_nenvstr; 1860 return 0; 1861 } 1862 #endif 1863 return copyin_proc(p, (void *)p->p_psstrp, arginfo, sizeof(*arginfo)); 1864 } 1865 1866 static int 1867 copy_procargs_sysctl_cb(void *cookie_, const void *src, size_t off, size_t len) 1868 { 1869 void **cookie = cookie_; 1870 struct lwp *l = cookie[0]; 1871 char *dst = cookie[1]; 1872 1873 return sysctl_copyout(l, src, dst + off, len); 1874 } 1875 1876 /* 1877 * sysctl helper routine for kern.proc_args pseudo-subtree. 1878 */ 1879 static int 1880 sysctl_kern_proc_args(SYSCTLFN_ARGS) 1881 { 1882 struct ps_strings pss; 1883 struct proc *p; 1884 pid_t pid; 1885 int type, error; 1886 void *cookie[2]; 1887 1888 if (namelen == 1 && name[0] == CTL_QUERY) 1889 return (sysctl_query(SYSCTLFN_CALL(rnode))); 1890 1891 if (newp != NULL || namelen != 2) 1892 return (EINVAL); 1893 pid = name[0]; 1894 type = name[1]; 1895 1896 switch (type) { 1897 case KERN_PROC_ARGV: 1898 case KERN_PROC_NARGV: 1899 case KERN_PROC_ENV: 1900 case KERN_PROC_NENV: 1901 /* ok */ 1902 break; 1903 default: 1904 return (EINVAL); 1905 } 1906 1907 sysctl_unlock(); 1908 1909 /* check pid */ 1910 mutex_enter(proc_lock); 1911 if ((p = proc_find(pid)) == NULL) { 1912 error = EINVAL; 1913 goto out_locked; 1914 } 1915 mutex_enter(p->p_lock); 1916 1917 /* Check permission. */ 1918 if (type == KERN_PROC_ARGV || type == KERN_PROC_NARGV) 1919 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE, 1920 p, KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ARGS), NULL, NULL); 1921 else if (type == KERN_PROC_ENV || type == KERN_PROC_NENV) 1922 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE, 1923 p, KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENV), NULL, NULL); 1924 else 1925 error = EINVAL; /* XXXGCC */ 1926 if (error) { 1927 mutex_exit(p->p_lock); 1928 goto out_locked; 1929 } 1930 1931 if (oldp == NULL) { 1932 if (type == KERN_PROC_NARGV || type == KERN_PROC_NENV) 1933 *oldlenp = sizeof (int); 1934 else 1935 *oldlenp = ARG_MAX; /* XXX XXX XXX */ 1936 error = 0; 1937 mutex_exit(p->p_lock); 1938 goto out_locked; 1939 } 1940 1941 /* 1942 * Zombies don't have a stack, so we can't read their psstrings. 1943 * System processes also don't have a user stack. 1944 */ 1945 if (P_ZOMBIE(p) || (p->p_flag & PK_SYSTEM) != 0) { 1946 error = EINVAL; 1947 mutex_exit(p->p_lock); 1948 goto out_locked; 1949 } 1950 1951 error = rw_tryenter(&p->p_reflock, RW_READER) ? 0 : EBUSY; 1952 mutex_exit(p->p_lock); 1953 if (error) { 1954 goto out_locked; 1955 } 1956 mutex_exit(proc_lock); 1957 1958 if (type == KERN_PROC_NARGV || type == KERN_PROC_NENV) { 1959 int value; 1960 if ((error = copyin_psstrings(p, &pss)) == 0) { 1961 if (type == KERN_PROC_NARGV) 1962 value = pss.ps_nargvstr; 1963 else 1964 value = pss.ps_nenvstr; 1965 error = sysctl_copyout(l, &value, oldp, sizeof(value)); 1966 *oldlenp = sizeof(value); 1967 } 1968 } else { 1969 cookie[0] = l; 1970 cookie[1] = oldp; 1971 error = copy_procargs(p, type, oldlenp, 1972 copy_procargs_sysctl_cb, cookie); 1973 } 1974 rw_exit(&p->p_reflock); 1975 sysctl_relock(); 1976 return error; 1977 1978 out_locked: 1979 mutex_exit(proc_lock); 1980 sysctl_relock(); 1981 return error; 1982 } 1983 1984 int 1985 copy_procargs(struct proc *p, int oid, size_t *limit, 1986 int (*cb)(void *, const void *, size_t, size_t), void *cookie) 1987 { 1988 struct ps_strings pss; 1989 size_t len, i, loaded, entry_len; 1990 struct uio auio; 1991 struct iovec aiov; 1992 int error, argvlen; 1993 char *arg; 1994 char **argv; 1995 vaddr_t user_argv; 1996 struct vmspace *vmspace; 1997 1998 /* 1999 * Allocate a temporary buffer to hold the argument vector and 2000 * the arguments themselve. 2001 */ 2002 arg = kmem_alloc(PAGE_SIZE, KM_SLEEP); 2003 argv = kmem_alloc(PAGE_SIZE, KM_SLEEP); 2004 2005 /* 2006 * Lock the process down in memory. 2007 */ 2008 vmspace = p->p_vmspace; 2009 uvmspace_addref(vmspace); 2010 2011 /* 2012 * Read in the ps_strings structure. 2013 */ 2014 if ((error = copyin_psstrings(p, &pss)) != 0) 2015 goto done; 2016 2017 /* 2018 * Now read the address of the argument vector. 2019 */ 2020 switch (oid) { 2021 case KERN_PROC_ARGV: 2022 user_argv = (uintptr_t)pss.ps_argvstr; 2023 argvlen = pss.ps_nargvstr; 2024 break; 2025 case KERN_PROC_ENV: 2026 user_argv = (uintptr_t)pss.ps_envstr; 2027 argvlen = pss.ps_nenvstr; 2028 break; 2029 default: 2030 error = EINVAL; 2031 goto done; 2032 } 2033 2034 if (argvlen < 0) { 2035 error = EIO; 2036 goto done; 2037 } 2038 2039 #ifdef COMPAT_NETBSD32 2040 if (p->p_flag & PK_32) 2041 entry_len = sizeof(netbsd32_charp); 2042 else 2043 #endif 2044 entry_len = sizeof(char *); 2045 2046 /* 2047 * Now copy each string. 2048 */ 2049 len = 0; /* bytes written to user buffer */ 2050 loaded = 0; /* bytes from argv already processed */ 2051 i = 0; /* To make compiler happy */ 2052 2053 for (; argvlen; --argvlen) { 2054 int finished = 0; 2055 vaddr_t base; 2056 size_t xlen; 2057 int j; 2058 2059 if (loaded == 0) { 2060 size_t rem = entry_len * argvlen; 2061 loaded = MIN(rem, PAGE_SIZE); 2062 error = copyin_vmspace(vmspace, 2063 (const void *)user_argv, argv, loaded); 2064 if (error) 2065 break; 2066 user_argv += loaded; 2067 i = 0; 2068 } 2069 2070 #ifdef COMPAT_NETBSD32 2071 if (p->p_flag & PK_32) { 2072 netbsd32_charp *argv32; 2073 2074 argv32 = (netbsd32_charp *)argv; 2075 base = (vaddr_t)NETBSD32PTR64(argv32[i++]); 2076 } else 2077 #endif 2078 base = (vaddr_t)argv[i++]; 2079 loaded -= entry_len; 2080 2081 /* 2082 * The program has messed around with its arguments, 2083 * possibly deleting some, and replacing them with 2084 * NULL's. Treat this as the last argument and not 2085 * a failure. 2086 */ 2087 if (base == 0) 2088 break; 2089 2090 while (!finished) { 2091 xlen = PAGE_SIZE - (base & PAGE_MASK); 2092 2093 aiov.iov_base = arg; 2094 aiov.iov_len = PAGE_SIZE; 2095 auio.uio_iov = &aiov; 2096 auio.uio_iovcnt = 1; 2097 auio.uio_offset = base; 2098 auio.uio_resid = xlen; 2099 auio.uio_rw = UIO_READ; 2100 UIO_SETUP_SYSSPACE(&auio); 2101 error = uvm_io(&vmspace->vm_map, &auio); 2102 if (error) 2103 goto done; 2104 2105 /* Look for the end of the string */ 2106 for (j = 0; j < xlen; j++) { 2107 if (arg[j] == '\0') { 2108 xlen = j + 1; 2109 finished = 1; 2110 break; 2111 } 2112 } 2113 2114 /* Check for user buffer overflow */ 2115 if (len + xlen > *limit) { 2116 finished = 1; 2117 if (len > *limit) 2118 xlen = 0; 2119 else 2120 xlen = *limit - len; 2121 } 2122 2123 /* Copyout the page */ 2124 error = (*cb)(cookie, arg, len, xlen); 2125 if (error) 2126 goto done; 2127 2128 len += xlen; 2129 base += xlen; 2130 } 2131 } 2132 *limit = len; 2133 2134 done: 2135 kmem_free(argv, PAGE_SIZE); 2136 kmem_free(arg, PAGE_SIZE); 2137 uvmspace_free(vmspace); 2138 return error; 2139 } 2140 2141 /* 2142 * Fill in an eproc structure for the specified process. 2143 */ 2144 void 2145 fill_eproc(struct proc *p, struct eproc *ep, bool zombie) 2146 { 2147 struct tty *tp; 2148 struct lwp *l; 2149 2150 KASSERT(mutex_owned(proc_lock)); 2151 KASSERT(mutex_owned(p->p_lock)); 2152 2153 memset(ep, 0, sizeof(*ep)); 2154 2155 ep->e_paddr = p; 2156 ep->e_sess = p->p_session; 2157 if (p->p_cred) { 2158 kauth_cred_topcred(p->p_cred, &ep->e_pcred); 2159 kauth_cred_toucred(p->p_cred, &ep->e_ucred); 2160 } 2161 if (p->p_stat != SIDL && !P_ZOMBIE(p) && !zombie) { 2162 struct vmspace *vm = p->p_vmspace; 2163 2164 ep->e_vm.vm_rssize = vm_resident_count(vm); 2165 ep->e_vm.vm_tsize = vm->vm_tsize; 2166 ep->e_vm.vm_dsize = vm->vm_dsize; 2167 ep->e_vm.vm_ssize = vm->vm_ssize; 2168 ep->e_vm.vm_map.size = vm->vm_map.size; 2169 2170 /* Pick the primary (first) LWP */ 2171 l = proc_active_lwp(p); 2172 KASSERT(l != NULL); 2173 lwp_lock(l); 2174 if (l->l_wchan) 2175 strncpy(ep->e_wmesg, l->l_wmesg, WMESGLEN); 2176 lwp_unlock(l); 2177 } 2178 if (p->p_pptr) 2179 ep->e_ppid = p->p_pptr->p_pid; 2180 if (p->p_pgrp && p->p_session) { 2181 ep->e_pgid = p->p_pgrp->pg_id; 2182 ep->e_jobc = p->p_pgrp->pg_jobc; 2183 ep->e_sid = p->p_session->s_sid; 2184 if ((p->p_lflag & PL_CONTROLT) && 2185 (tp = ep->e_sess->s_ttyp)) { 2186 ep->e_tdev = tp->t_dev; 2187 ep->e_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PGID; 2188 ep->e_tsess = tp->t_session; 2189 } else 2190 ep->e_tdev = (uint32_t)NODEV; 2191 ep->e_flag = ep->e_sess->s_ttyvp ? EPROC_CTTY : 0; 2192 if (SESS_LEADER(p)) 2193 ep->e_flag |= EPROC_SLEADER; 2194 strncpy(ep->e_login, ep->e_sess->s_login, MAXLOGNAME); 2195 } 2196 ep->e_xsize = ep->e_xrssize = 0; 2197 ep->e_xccount = ep->e_xswrss = 0; 2198 } 2199 2200 /* 2201 * Fill in a kinfo_proc2 structure for the specified process. 2202 */ 2203 static void 2204 fill_kproc2(struct proc *p, struct kinfo_proc2 *ki, bool zombie) 2205 { 2206 struct tty *tp; 2207 struct lwp *l, *l2; 2208 struct timeval ut, st, rt; 2209 sigset_t ss1, ss2; 2210 struct rusage ru; 2211 struct vmspace *vm; 2212 2213 KASSERT(mutex_owned(proc_lock)); 2214 KASSERT(mutex_owned(p->p_lock)); 2215 2216 sigemptyset(&ss1); 2217 sigemptyset(&ss2); 2218 memset(ki, 0, sizeof(*ki)); 2219 2220 ki->p_paddr = PTRTOUINT64(p); 2221 ki->p_fd = PTRTOUINT64(p->p_fd); 2222 ki->p_cwdi = PTRTOUINT64(p->p_cwdi); 2223 ki->p_stats = PTRTOUINT64(p->p_stats); 2224 ki->p_limit = PTRTOUINT64(p->p_limit); 2225 ki->p_vmspace = PTRTOUINT64(p->p_vmspace); 2226 ki->p_sigacts = PTRTOUINT64(p->p_sigacts); 2227 ki->p_sess = PTRTOUINT64(p->p_session); 2228 ki->p_tsess = 0; /* may be changed if controlling tty below */ 2229 ki->p_ru = PTRTOUINT64(&p->p_stats->p_ru); 2230 ki->p_eflag = 0; 2231 ki->p_exitsig = p->p_exitsig; 2232 ki->p_flag = L_INMEM; /* Process never swapped out */ 2233 ki->p_flag |= sysctl_map_flags(sysctl_flagmap, p->p_flag); 2234 ki->p_flag |= sysctl_map_flags(sysctl_sflagmap, p->p_sflag); 2235 ki->p_flag |= sysctl_map_flags(sysctl_slflagmap, p->p_slflag); 2236 ki->p_flag |= sysctl_map_flags(sysctl_lflagmap, p->p_lflag); 2237 ki->p_flag |= sysctl_map_flags(sysctl_stflagmap, p->p_stflag); 2238 ki->p_pid = p->p_pid; 2239 if (p->p_pptr) 2240 ki->p_ppid = p->p_pptr->p_pid; 2241 else 2242 ki->p_ppid = 0; 2243 ki->p_uid = kauth_cred_geteuid(p->p_cred); 2244 ki->p_ruid = kauth_cred_getuid(p->p_cred); 2245 ki->p_gid = kauth_cred_getegid(p->p_cred); 2246 ki->p_rgid = kauth_cred_getgid(p->p_cred); 2247 ki->p_svuid = kauth_cred_getsvuid(p->p_cred); 2248 ki->p_svgid = kauth_cred_getsvgid(p->p_cred); 2249 ki->p_ngroups = kauth_cred_ngroups(p->p_cred); 2250 kauth_cred_getgroups(p->p_cred, ki->p_groups, 2251 min(ki->p_ngroups, sizeof(ki->p_groups) / sizeof(ki->p_groups[0])), 2252 UIO_SYSSPACE); 2253 2254 ki->p_uticks = p->p_uticks; 2255 ki->p_sticks = p->p_sticks; 2256 ki->p_iticks = p->p_iticks; 2257 ki->p_tpgid = NO_PGID; /* may be changed if controlling tty below */ 2258 ki->p_tracep = PTRTOUINT64(p->p_tracep); 2259 ki->p_traceflag = p->p_traceflag; 2260 2261 memcpy(&ki->p_sigignore, &p->p_sigctx.ps_sigignore,sizeof(ki_sigset_t)); 2262 memcpy(&ki->p_sigcatch, &p->p_sigctx.ps_sigcatch, sizeof(ki_sigset_t)); 2263 2264 ki->p_cpticks = 0; 2265 ki->p_pctcpu = p->p_pctcpu; 2266 ki->p_estcpu = 0; 2267 ki->p_stat = p->p_stat; /* Will likely be overridden by LWP status */ 2268 ki->p_realstat = p->p_stat; 2269 ki->p_nice = p->p_nice; 2270 ki->p_xstat = p->p_xstat; 2271 ki->p_acflag = p->p_acflag; 2272 2273 strncpy(ki->p_comm, p->p_comm, 2274 min(sizeof(ki->p_comm), sizeof(p->p_comm))); 2275 strncpy(ki->p_ename, p->p_emul->e_name, sizeof(ki->p_ename)); 2276 2277 ki->p_nlwps = p->p_nlwps; 2278 ki->p_realflag = ki->p_flag; 2279 2280 if (p->p_stat != SIDL && !P_ZOMBIE(p) && !zombie) { 2281 vm = p->p_vmspace; 2282 ki->p_vm_rssize = vm_resident_count(vm); 2283 ki->p_vm_tsize = vm->vm_tsize; 2284 ki->p_vm_dsize = vm->vm_dsize; 2285 ki->p_vm_ssize = vm->vm_ssize; 2286 ki->p_vm_vsize = atop(vm->vm_map.size); 2287 /* 2288 * Since the stack is initially mapped mostly with 2289 * PROT_NONE and grown as needed, adjust the "mapped size" 2290 * to skip the unused stack portion. 2291 */ 2292 ki->p_vm_msize = 2293 atop(vm->vm_map.size) - vm->vm_issize + vm->vm_ssize; 2294 2295 /* Pick the primary (first) LWP */ 2296 l = proc_active_lwp(p); 2297 KASSERT(l != NULL); 2298 lwp_lock(l); 2299 ki->p_nrlwps = p->p_nrlwps; 2300 ki->p_forw = 0; 2301 ki->p_back = 0; 2302 ki->p_addr = PTRTOUINT64(l->l_addr); 2303 ki->p_stat = l->l_stat; 2304 ki->p_flag |= sysctl_map_flags(sysctl_lwpflagmap, l->l_flag); 2305 ki->p_swtime = l->l_swtime; 2306 ki->p_slptime = l->l_slptime; 2307 if (l->l_stat == LSONPROC) 2308 ki->p_schedflags = l->l_cpu->ci_schedstate.spc_flags; 2309 else 2310 ki->p_schedflags = 0; 2311 ki->p_priority = lwp_eprio(l); 2312 ki->p_usrpri = l->l_priority; 2313 if (l->l_wchan) 2314 strncpy(ki->p_wmesg, l->l_wmesg, sizeof(ki->p_wmesg)); 2315 ki->p_wchan = PTRTOUINT64(l->l_wchan); 2316 ki->p_cpuid = cpu_index(l->l_cpu); 2317 lwp_unlock(l); 2318 LIST_FOREACH(l, &p->p_lwps, l_sibling) { 2319 /* This is hardly correct, but... */ 2320 sigplusset(&l->l_sigpend.sp_set, &ss1); 2321 sigplusset(&l->l_sigmask, &ss2); 2322 ki->p_cpticks += l->l_cpticks; 2323 ki->p_pctcpu += l->l_pctcpu; 2324 ki->p_estcpu += l->l_estcpu; 2325 } 2326 } 2327 sigplusset(&p->p_sigpend.sp_set, &ss2); 2328 memcpy(&ki->p_siglist, &ss1, sizeof(ki_sigset_t)); 2329 memcpy(&ki->p_sigmask, &ss2, sizeof(ki_sigset_t)); 2330 2331 if (p->p_session != NULL) { 2332 ki->p_sid = p->p_session->s_sid; 2333 ki->p__pgid = p->p_pgrp->pg_id; 2334 if (p->p_session->s_ttyvp) 2335 ki->p_eflag |= EPROC_CTTY; 2336 if (SESS_LEADER(p)) 2337 ki->p_eflag |= EPROC_SLEADER; 2338 strncpy(ki->p_login, p->p_session->s_login, 2339 min(sizeof ki->p_login - 1, sizeof p->p_session->s_login)); 2340 ki->p_jobc = p->p_pgrp->pg_jobc; 2341 if ((p->p_lflag & PL_CONTROLT) && (tp = p->p_session->s_ttyp)) { 2342 ki->p_tdev = tp->t_dev; 2343 ki->p_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PGID; 2344 ki->p_tsess = PTRTOUINT64(tp->t_session); 2345 } else { 2346 ki->p_tdev = (int32_t)NODEV; 2347 } 2348 } 2349 2350 if (!P_ZOMBIE(p) && !zombie) { 2351 ki->p_uvalid = 1; 2352 ki->p_ustart_sec = p->p_stats->p_start.tv_sec; 2353 ki->p_ustart_usec = p->p_stats->p_start.tv_usec; 2354 2355 calcru(p, &ut, &st, NULL, &rt); 2356 ki->p_rtime_sec = rt.tv_sec; 2357 ki->p_rtime_usec = rt.tv_usec; 2358 ki->p_uutime_sec = ut.tv_sec; 2359 ki->p_uutime_usec = ut.tv_usec; 2360 ki->p_ustime_sec = st.tv_sec; 2361 ki->p_ustime_usec = st.tv_usec; 2362 2363 memcpy(&ru, &p->p_stats->p_ru, sizeof(ru)); 2364 ki->p_uru_nvcsw = 0; 2365 ki->p_uru_nivcsw = 0; 2366 LIST_FOREACH(l2, &p->p_lwps, l_sibling) { 2367 ki->p_uru_nvcsw += (l2->l_ncsw - l2->l_nivcsw); 2368 ki->p_uru_nivcsw += l2->l_nivcsw; 2369 ruadd(&ru, &l2->l_ru); 2370 } 2371 ki->p_uru_maxrss = ru.ru_maxrss; 2372 ki->p_uru_ixrss = ru.ru_ixrss; 2373 ki->p_uru_idrss = ru.ru_idrss; 2374 ki->p_uru_isrss = ru.ru_isrss; 2375 ki->p_uru_minflt = ru.ru_minflt; 2376 ki->p_uru_majflt = ru.ru_majflt; 2377 ki->p_uru_nswap = ru.ru_nswap; 2378 ki->p_uru_inblock = ru.ru_inblock; 2379 ki->p_uru_oublock = ru.ru_oublock; 2380 ki->p_uru_msgsnd = ru.ru_msgsnd; 2381 ki->p_uru_msgrcv = ru.ru_msgrcv; 2382 ki->p_uru_nsignals = ru.ru_nsignals; 2383 2384 timeradd(&p->p_stats->p_cru.ru_utime, 2385 &p->p_stats->p_cru.ru_stime, &ut); 2386 ki->p_uctime_sec = ut.tv_sec; 2387 ki->p_uctime_usec = ut.tv_usec; 2388 } 2389 } 2390