1 /* $NetBSD: kern_proc.c,v 1.259 2020/08/28 22:27:51 riastradh Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2006, 2007, 2008, 2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center, and by Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1982, 1986, 1989, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)kern_proc.c 8.7 (Berkeley) 2/14/95 62 */ 63 64 #include <sys/cdefs.h> 65 __KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.259 2020/08/28 22:27:51 riastradh Exp $"); 66 67 #ifdef _KERNEL_OPT 68 #include "opt_kstack.h" 69 #include "opt_maxuprc.h" 70 #include "opt_dtrace.h" 71 #include "opt_compat_netbsd32.h" 72 #include "opt_kaslr.h" 73 #endif 74 75 #if defined(__HAVE_COMPAT_NETBSD32) && !defined(COMPAT_NETBSD32) \ 76 && !defined(_RUMPKERNEL) 77 #define COMPAT_NETBSD32 78 #endif 79 80 #include <sys/param.h> 81 #include <sys/systm.h> 82 #include <sys/kernel.h> 83 #include <sys/proc.h> 84 #include <sys/resourcevar.h> 85 #include <sys/buf.h> 86 #include <sys/acct.h> 87 #include <sys/wait.h> 88 #include <sys/file.h> 89 #include <ufs/ufs/quota.h> 90 #include <sys/uio.h> 91 #include <sys/pool.h> 92 #include <sys/pset.h> 93 #include <sys/ioctl.h> 94 #include <sys/tty.h> 95 #include <sys/signalvar.h> 96 #include <sys/ras.h> 97 #include <sys/filedesc.h> 98 #include <sys/syscall_stats.h> 99 #include <sys/kauth.h> 100 #include <sys/sleepq.h> 101 #include <sys/atomic.h> 102 #include <sys/kmem.h> 103 #include <sys/namei.h> 104 #include <sys/dtrace_bsd.h> 105 #include <sys/sysctl.h> 106 #include <sys/exec.h> 107 #include <sys/cpu.h> 108 #include <sys/compat_stub.h> 109 #include <sys/futex.h> 110 #include <sys/pserialize.h> 111 112 #include <uvm/uvm_extern.h> 113 #include <uvm/uvm.h> 114 115 /* 116 * Process lists. 117 */ 118 119 struct proclist allproc __cacheline_aligned; 120 struct proclist zombproc __cacheline_aligned; 121 122 kmutex_t proc_lock __cacheline_aligned; 123 static pserialize_t proc_psz; 124 125 /* 126 * pid to lwp/proc lookup is done by indexing the pid_table array. 127 * Since pid numbers are only allocated when an empty slot 128 * has been found, there is no need to search any lists ever. 129 * (an orphaned pgrp will lock the slot, a session will lock 130 * the pgrp with the same number.) 131 * If the table is too small it is reallocated with twice the 132 * previous size and the entries 'unzipped' into the two halves. 133 * A linked list of free entries is passed through the pt_lwp 134 * field of 'free' items - set odd to be an invalid ptr. Two 135 * additional bits are also used to indicate if the slot is 136 * currently occupied by a proc or lwp, and if the PID is 137 * hidden from certain kinds of lookups. We thus require a 138 * minimum alignment for proc and lwp structures (LWPs are 139 * at least 32-byte aligned). 140 */ 141 142 struct pid_table { 143 uintptr_t pt_slot; 144 struct pgrp *pt_pgrp; 145 pid_t pt_pid; 146 }; 147 148 #define PT_F_FREE ((uintptr_t)__BIT(0)) 149 #define PT_F_LWP 0 /* pseudo-flag */ 150 #define PT_F_PROC ((uintptr_t)__BIT(1)) 151 152 #define PT_F_TYPEBITS (PT_F_FREE|PT_F_PROC) 153 #define PT_F_ALLBITS (PT_F_FREE|PT_F_PROC) 154 155 #define PT_VALID(s) (((s) & PT_F_FREE) == 0) 156 #define PT_RESERVED(s) ((s) == 0) 157 #define PT_NEXT(s) ((u_int)(s) >> 1) 158 #define PT_SET_FREE(pid) (((pid) << 1) | PT_F_FREE) 159 #define PT_SET_LWP(l) ((uintptr_t)(l)) 160 #define PT_SET_PROC(p) (((uintptr_t)(p)) | PT_F_PROC) 161 #define PT_SET_RESERVED 0 162 #define PT_GET_LWP(s) ((struct lwp *)((s) & ~PT_F_ALLBITS)) 163 #define PT_GET_PROC(s) ((struct proc *)((s) & ~PT_F_ALLBITS)) 164 #define PT_GET_TYPE(s) ((s) & PT_F_TYPEBITS) 165 #define PT_IS_LWP(s) (PT_GET_TYPE(s) == PT_F_LWP && (s) != 0) 166 #define PT_IS_PROC(s) (PT_GET_TYPE(s) == PT_F_PROC) 167 168 #define MIN_PROC_ALIGNMENT (PT_F_ALLBITS + 1) 169 170 /* 171 * Table of process IDs (PIDs). 172 */ 173 static struct pid_table *pid_table __read_mostly; 174 175 #define INITIAL_PID_TABLE_SIZE (1 << 5) 176 177 /* Table mask, threshold for growing and number of allocated PIDs. */ 178 static u_int pid_tbl_mask __read_mostly; 179 static u_int pid_alloc_lim __read_mostly; 180 static u_int pid_alloc_cnt __cacheline_aligned; 181 182 /* Next free, last free and maximum PIDs. */ 183 static u_int next_free_pt __cacheline_aligned; 184 static u_int last_free_pt __cacheline_aligned; 185 static pid_t pid_max __read_mostly; 186 187 /* Components of the first process -- never freed. */ 188 189 extern struct emul emul_netbsd; /* defined in kern_exec.c */ 190 191 struct session session0 = { 192 .s_count = 1, 193 .s_sid = 0, 194 }; 195 struct pgrp pgrp0 = { 196 .pg_members = LIST_HEAD_INITIALIZER(&pgrp0.pg_members), 197 .pg_session = &session0, 198 }; 199 filedesc_t filedesc0; 200 struct cwdinfo cwdi0 = { 201 .cwdi_cmask = CMASK, 202 .cwdi_refcnt = 1, 203 }; 204 struct plimit limit0; 205 struct pstats pstat0; 206 struct vmspace vmspace0; 207 struct sigacts sigacts0; 208 struct proc proc0 = { 209 .p_lwps = LIST_HEAD_INITIALIZER(&proc0.p_lwps), 210 .p_sigwaiters = LIST_HEAD_INITIALIZER(&proc0.p_sigwaiters), 211 .p_nlwps = 1, 212 .p_nrlwps = 1, 213 .p_pgrp = &pgrp0, 214 .p_comm = "system", 215 /* 216 * Set P_NOCLDWAIT so that kernel threads are reparented to init(8) 217 * when they exit. init(8) can easily wait them out for us. 218 */ 219 .p_flag = PK_SYSTEM | PK_NOCLDWAIT, 220 .p_stat = SACTIVE, 221 .p_nice = NZERO, 222 .p_emul = &emul_netbsd, 223 .p_cwdi = &cwdi0, 224 .p_limit = &limit0, 225 .p_fd = &filedesc0, 226 .p_vmspace = &vmspace0, 227 .p_stats = &pstat0, 228 .p_sigacts = &sigacts0, 229 #ifdef PROC0_MD_INITIALIZERS 230 PROC0_MD_INITIALIZERS 231 #endif 232 }; 233 kauth_cred_t cred0; 234 235 static const int nofile = NOFILE; 236 static const int maxuprc = MAXUPRC; 237 238 static int sysctl_doeproc(SYSCTLFN_PROTO); 239 static int sysctl_kern_proc_args(SYSCTLFN_PROTO); 240 static int sysctl_security_expose_address(SYSCTLFN_PROTO); 241 242 #ifdef KASLR 243 static int kern_expose_address = 0; 244 #else 245 static int kern_expose_address = 1; 246 #endif 247 /* 248 * The process list descriptors, used during pid allocation and 249 * by sysctl. No locking on this data structure is needed since 250 * it is completely static. 251 */ 252 const struct proclist_desc proclists[] = { 253 { &allproc }, 254 { &zombproc }, 255 { NULL }, 256 }; 257 258 static struct pgrp * pg_remove(pid_t); 259 static void pg_delete(pid_t); 260 static void orphanpg(struct pgrp *); 261 262 static specificdata_domain_t proc_specificdata_domain; 263 264 static pool_cache_t proc_cache; 265 266 static kauth_listener_t proc_listener; 267 268 static void fill_proc(const struct proc *, struct proc *, bool); 269 static int fill_pathname(struct lwp *, pid_t, void *, size_t *); 270 static int fill_cwd(struct lwp *, pid_t, void *, size_t *); 271 272 static int 273 proc_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, 274 void *arg0, void *arg1, void *arg2, void *arg3) 275 { 276 struct proc *p; 277 int result; 278 279 result = KAUTH_RESULT_DEFER; 280 p = arg0; 281 282 switch (action) { 283 case KAUTH_PROCESS_CANSEE: { 284 enum kauth_process_req req; 285 286 req = (enum kauth_process_req)(uintptr_t)arg1; 287 288 switch (req) { 289 case KAUTH_REQ_PROCESS_CANSEE_ARGS: 290 case KAUTH_REQ_PROCESS_CANSEE_ENTRY: 291 case KAUTH_REQ_PROCESS_CANSEE_OPENFILES: 292 case KAUTH_REQ_PROCESS_CANSEE_EPROC: 293 result = KAUTH_RESULT_ALLOW; 294 break; 295 296 case KAUTH_REQ_PROCESS_CANSEE_ENV: 297 if (kauth_cred_getuid(cred) != 298 kauth_cred_getuid(p->p_cred) || 299 kauth_cred_getuid(cred) != 300 kauth_cred_getsvuid(p->p_cred)) 301 break; 302 303 result = KAUTH_RESULT_ALLOW; 304 305 break; 306 307 case KAUTH_REQ_PROCESS_CANSEE_KPTR: 308 if (!kern_expose_address) 309 break; 310 311 if (kern_expose_address == 1 && !(p->p_flag & PK_KMEM)) 312 break; 313 314 result = KAUTH_RESULT_ALLOW; 315 316 break; 317 318 default: 319 break; 320 } 321 322 break; 323 } 324 325 case KAUTH_PROCESS_FORK: { 326 int lnprocs = (int)(unsigned long)arg2; 327 328 /* 329 * Don't allow a nonprivileged user to use the last few 330 * processes. The variable lnprocs is the current number of 331 * processes, maxproc is the limit. 332 */ 333 if (__predict_false((lnprocs >= maxproc - 5))) 334 break; 335 336 result = KAUTH_RESULT_ALLOW; 337 338 break; 339 } 340 341 case KAUTH_PROCESS_CORENAME: 342 case KAUTH_PROCESS_STOPFLAG: 343 if (proc_uidmatch(cred, p->p_cred) == 0) 344 result = KAUTH_RESULT_ALLOW; 345 346 break; 347 348 default: 349 break; 350 } 351 352 return result; 353 } 354 355 static int 356 proc_ctor(void *arg __unused, void *obj, int flags __unused) 357 { 358 memset(obj, 0, sizeof(struct proc)); 359 return 0; 360 } 361 362 static pid_t proc_alloc_pid_slot(struct proc *, uintptr_t); 363 364 /* 365 * Initialize global process hashing structures. 366 */ 367 void 368 procinit(void) 369 { 370 const struct proclist_desc *pd; 371 u_int i; 372 #define LINK_EMPTY ((PID_MAX + INITIAL_PID_TABLE_SIZE) & ~(INITIAL_PID_TABLE_SIZE - 1)) 373 374 for (pd = proclists; pd->pd_list != NULL; pd++) 375 LIST_INIT(pd->pd_list); 376 377 mutex_init(&proc_lock, MUTEX_DEFAULT, IPL_NONE); 378 379 proc_psz = pserialize_create(); 380 381 pid_table = kmem_alloc(INITIAL_PID_TABLE_SIZE 382 * sizeof(struct pid_table), KM_SLEEP); 383 pid_tbl_mask = INITIAL_PID_TABLE_SIZE - 1; 384 pid_max = PID_MAX; 385 386 /* Set free list running through table... 387 Preset 'use count' above PID_MAX so we allocate pid 1 next. */ 388 for (i = 0; i <= pid_tbl_mask; i++) { 389 pid_table[i].pt_slot = PT_SET_FREE(LINK_EMPTY + i + 1); 390 pid_table[i].pt_pgrp = 0; 391 pid_table[i].pt_pid = 0; 392 } 393 /* slot 0 is just grabbed */ 394 next_free_pt = 1; 395 /* Need to fix last entry. */ 396 last_free_pt = pid_tbl_mask; 397 pid_table[last_free_pt].pt_slot = PT_SET_FREE(LINK_EMPTY); 398 /* point at which we grow table - to avoid reusing pids too often */ 399 pid_alloc_lim = pid_tbl_mask - 1; 400 #undef LINK_EMPTY 401 402 /* Reserve PID 1 for init(8). */ /* XXX slightly gross */ 403 mutex_enter(&proc_lock); 404 if (proc_alloc_pid_slot(&proc0, PT_SET_RESERVED) != 1) 405 panic("failed to reserve PID 1 for init(8)"); 406 mutex_exit(&proc_lock); 407 408 proc_specificdata_domain = specificdata_domain_create(); 409 KASSERT(proc_specificdata_domain != NULL); 410 411 size_t proc_alignment = coherency_unit; 412 if (proc_alignment < MIN_PROC_ALIGNMENT) 413 proc_alignment = MIN_PROC_ALIGNMENT; 414 415 proc_cache = pool_cache_init(sizeof(struct proc), proc_alignment, 0, 0, 416 "procpl", NULL, IPL_NONE, proc_ctor, NULL, NULL); 417 418 proc_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS, 419 proc_listener_cb, NULL); 420 } 421 422 void 423 procinit_sysctl(void) 424 { 425 static struct sysctllog *clog; 426 427 sysctl_createv(&clog, 0, NULL, NULL, 428 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 429 CTLTYPE_INT, "expose_address", 430 SYSCTL_DESCR("Enable exposing kernel addresses"), 431 sysctl_security_expose_address, 0, 432 &kern_expose_address, 0, CTL_KERN, CTL_CREATE, CTL_EOL); 433 sysctl_createv(&clog, 0, NULL, NULL, 434 CTLFLAG_PERMANENT, 435 CTLTYPE_NODE, "proc", 436 SYSCTL_DESCR("System-wide process information"), 437 sysctl_doeproc, 0, NULL, 0, 438 CTL_KERN, KERN_PROC, CTL_EOL); 439 sysctl_createv(&clog, 0, NULL, NULL, 440 CTLFLAG_PERMANENT, 441 CTLTYPE_NODE, "proc2", 442 SYSCTL_DESCR("Machine-independent process information"), 443 sysctl_doeproc, 0, NULL, 0, 444 CTL_KERN, KERN_PROC2, CTL_EOL); 445 sysctl_createv(&clog, 0, NULL, NULL, 446 CTLFLAG_PERMANENT, 447 CTLTYPE_NODE, "proc_args", 448 SYSCTL_DESCR("Process argument information"), 449 sysctl_kern_proc_args, 0, NULL, 0, 450 CTL_KERN, KERN_PROC_ARGS, CTL_EOL); 451 452 /* 453 "nodes" under these: 454 455 KERN_PROC_ALL 456 KERN_PROC_PID pid 457 KERN_PROC_PGRP pgrp 458 KERN_PROC_SESSION sess 459 KERN_PROC_TTY tty 460 KERN_PROC_UID uid 461 KERN_PROC_RUID uid 462 KERN_PROC_GID gid 463 KERN_PROC_RGID gid 464 465 all in all, probably not worth the effort... 466 */ 467 } 468 469 /* 470 * Initialize process 0. 471 */ 472 void 473 proc0_init(void) 474 { 475 struct proc *p; 476 struct pgrp *pg; 477 struct rlimit *rlim; 478 rlim_t lim; 479 int i; 480 481 p = &proc0; 482 pg = &pgrp0; 483 484 mutex_init(&p->p_stmutex, MUTEX_DEFAULT, IPL_HIGH); 485 mutex_init(&p->p_auxlock, MUTEX_DEFAULT, IPL_NONE); 486 p->p_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 487 488 rw_init(&p->p_reflock); 489 cv_init(&p->p_waitcv, "wait"); 490 cv_init(&p->p_lwpcv, "lwpwait"); 491 492 LIST_INSERT_HEAD(&p->p_lwps, &lwp0, l_sibling); 493 494 KASSERT(lwp0.l_lid == 0); 495 pid_table[lwp0.l_lid].pt_slot = PT_SET_LWP(&lwp0); 496 LIST_INSERT_HEAD(&allproc, p, p_list); 497 498 pid_table[lwp0.l_lid].pt_pgrp = pg; 499 LIST_INSERT_HEAD(&pg->pg_members, p, p_pglist); 500 501 #ifdef __HAVE_SYSCALL_INTERN 502 (*p->p_emul->e_syscall_intern)(p); 503 #endif 504 505 /* Create credentials. */ 506 cred0 = kauth_cred_alloc(); 507 p->p_cred = cred0; 508 509 /* Create the CWD info. */ 510 rw_init(&cwdi0.cwdi_lock); 511 512 /* Create the limits structures. */ 513 mutex_init(&limit0.pl_lock, MUTEX_DEFAULT, IPL_NONE); 514 515 rlim = limit0.pl_rlimit; 516 for (i = 0; i < __arraycount(limit0.pl_rlimit); i++) { 517 rlim[i].rlim_cur = RLIM_INFINITY; 518 rlim[i].rlim_max = RLIM_INFINITY; 519 } 520 521 rlim[RLIMIT_NOFILE].rlim_max = maxfiles; 522 rlim[RLIMIT_NOFILE].rlim_cur = maxfiles < nofile ? maxfiles : nofile; 523 524 rlim[RLIMIT_NPROC].rlim_max = maxproc; 525 rlim[RLIMIT_NPROC].rlim_cur = maxproc < maxuprc ? maxproc : maxuprc; 526 527 lim = MIN(VM_MAXUSER_ADDRESS, ctob((rlim_t)uvm_availmem(false))); 528 rlim[RLIMIT_RSS].rlim_max = lim; 529 rlim[RLIMIT_MEMLOCK].rlim_max = lim; 530 rlim[RLIMIT_MEMLOCK].rlim_cur = lim / 3; 531 532 rlim[RLIMIT_NTHR].rlim_max = maxlwp; 533 rlim[RLIMIT_NTHR].rlim_cur = maxlwp < maxuprc ? maxlwp : maxuprc; 534 535 /* Note that default core name has zero length. */ 536 limit0.pl_corename = defcorename; 537 limit0.pl_cnlen = 0; 538 limit0.pl_refcnt = 1; 539 limit0.pl_writeable = false; 540 limit0.pl_sv_limit = NULL; 541 542 /* Configure virtual memory system, set vm rlimits. */ 543 uvm_init_limits(p); 544 545 /* Initialize file descriptor table for proc0. */ 546 fd_init(&filedesc0); 547 548 /* 549 * Initialize proc0's vmspace, which uses the kernel pmap. 550 * All kernel processes (which never have user space mappings) 551 * share proc0's vmspace, and thus, the kernel pmap. 552 */ 553 uvmspace_init(&vmspace0, pmap_kernel(), round_page(VM_MIN_ADDRESS), 554 trunc_page(VM_MAXUSER_ADDRESS), 555 #ifdef __USE_TOPDOWN_VM 556 true 557 #else 558 false 559 #endif 560 ); 561 562 /* Initialize signal state for proc0. XXX IPL_SCHED */ 563 mutex_init(&p->p_sigacts->sa_mutex, MUTEX_DEFAULT, IPL_SCHED); 564 siginit(p); 565 566 proc_initspecific(p); 567 kdtrace_proc_ctor(NULL, p); 568 } 569 570 /* 571 * Session reference counting. 572 */ 573 574 void 575 proc_sesshold(struct session *ss) 576 { 577 578 KASSERT(mutex_owned(&proc_lock)); 579 ss->s_count++; 580 } 581 582 void 583 proc_sessrele(struct session *ss) 584 { 585 struct pgrp *pg; 586 587 KASSERT(mutex_owned(&proc_lock)); 588 KASSERT(ss->s_count > 0); 589 590 /* 591 * We keep the pgrp with the same id as the session in order to 592 * stop a process being given the same pid. Since the pgrp holds 593 * a reference to the session, it must be a 'zombie' pgrp by now. 594 */ 595 if (--ss->s_count == 0) { 596 pg = pg_remove(ss->s_sid); 597 } else { 598 pg = NULL; 599 ss = NULL; 600 } 601 602 mutex_exit(&proc_lock); 603 604 if (pg) 605 kmem_free(pg, sizeof(struct pgrp)); 606 if (ss) 607 kmem_free(ss, sizeof(struct session)); 608 } 609 610 /* 611 * Check that the specified process group is in the session of the 612 * specified process. 613 * Treats -ve ids as process ids. 614 * Used to validate TIOCSPGRP requests. 615 */ 616 int 617 pgid_in_session(struct proc *p, pid_t pg_id) 618 { 619 struct pgrp *pgrp; 620 struct session *session; 621 int error; 622 623 mutex_enter(&proc_lock); 624 if (pg_id < 0) { 625 struct proc *p1 = proc_find(-pg_id); 626 if (p1 == NULL) { 627 error = EINVAL; 628 goto fail; 629 } 630 pgrp = p1->p_pgrp; 631 } else { 632 pgrp = pgrp_find(pg_id); 633 if (pgrp == NULL) { 634 error = EINVAL; 635 goto fail; 636 } 637 } 638 session = pgrp->pg_session; 639 error = (session != p->p_pgrp->pg_session) ? EPERM : 0; 640 fail: 641 mutex_exit(&proc_lock); 642 return error; 643 } 644 645 /* 646 * p_inferior: is p an inferior of q? 647 */ 648 static inline bool 649 p_inferior(struct proc *p, struct proc *q) 650 { 651 652 KASSERT(mutex_owned(&proc_lock)); 653 654 for (; p != q; p = p->p_pptr) 655 if (p->p_pid == 0) 656 return false; 657 return true; 658 } 659 660 /* 661 * proc_find_lwp: locate an lwp in said proc by the ID. 662 * 663 * => Must be called with p::p_lock held. 664 * => LSIDL lwps are not returned because they are only partially 665 * constructed while occupying the slot. 666 * => Callers need to be careful about lwp::l_stat of the returned 667 * lwp. 668 */ 669 struct lwp * 670 proc_find_lwp(proc_t *p, pid_t pid) 671 { 672 struct pid_table *pt; 673 struct lwp *l = NULL; 674 uintptr_t slot; 675 int s; 676 677 KASSERT(mutex_owned(p->p_lock)); 678 679 /* 680 * Look in the pid_table. This is done unlocked inside a pserialize 681 * read section covering pid_table's memory allocation only, so take 682 * care to read the slot atomically and only once. This issues a 683 * memory barrier for dependent loads on alpha. 684 */ 685 s = pserialize_read_enter(); 686 pt = &pid_table[pid & pid_tbl_mask]; 687 slot = atomic_load_consume(&pt->pt_slot); 688 if (__predict_false(!PT_IS_LWP(slot))) { 689 pserialize_read_exit(s); 690 return NULL; 691 } 692 693 /* 694 * Check to see if the LWP is from the correct process. We won't 695 * see entries in pid_table from a prior process that also used "p", 696 * by virtue of the fact that allocating "p" means all prior updates 697 * to dependant data structures are visible to this thread. 698 */ 699 l = PT_GET_LWP(slot); 700 if (__predict_false(atomic_load_relaxed(&l->l_proc) != p)) { 701 pserialize_read_exit(s); 702 return NULL; 703 } 704 705 /* 706 * We now know that p->p_lock holds this LWP stable. 707 * 708 * If the status is not LSIDL, it means the LWP is intended to be 709 * findable by LID and l_lid cannot change behind us. 710 * 711 * No need to acquire the LWP's lock to check for LSIDL, as 712 * p->p_lock must be held to transition in and out of LSIDL. 713 * Any other observed state of is no particular interest. 714 */ 715 pserialize_read_exit(s); 716 return l->l_stat != LSIDL && l->l_lid == pid ? l : NULL; 717 } 718 719 /* 720 * proc_find_lwp_unlocked: locate an lwp in said proc by the ID. 721 * 722 * => Called in a pserialize read section with no locks held. 723 * => LSIDL lwps are not returned because they are only partially 724 * constructed while occupying the slot. 725 * => Callers need to be careful about lwp::l_stat of the returned 726 * lwp. 727 * => If an LWP is found, it's returned locked. 728 */ 729 struct lwp * 730 proc_find_lwp_unlocked(proc_t *p, pid_t pid) 731 { 732 struct pid_table *pt; 733 struct lwp *l = NULL; 734 uintptr_t slot; 735 736 KASSERT(pserialize_in_read_section()); 737 738 /* 739 * Look in the pid_table. This is done unlocked inside a pserialize 740 * read section covering pid_table's memory allocation only, so take 741 * care to read the slot atomically and only once. This issues a 742 * memory barrier for dependent loads on alpha. 743 */ 744 pt = &pid_table[pid & pid_tbl_mask]; 745 slot = atomic_load_consume(&pt->pt_slot); 746 if (__predict_false(!PT_IS_LWP(slot))) { 747 return NULL; 748 } 749 750 /* 751 * Lock the LWP we found to get it stable. If it's embryonic or 752 * reaped (LSIDL) then none of the other fields can safely be 753 * checked. 754 */ 755 l = PT_GET_LWP(slot); 756 lwp_lock(l); 757 if (__predict_false(l->l_stat == LSIDL)) { 758 lwp_unlock(l); 759 return NULL; 760 } 761 762 /* 763 * l_proc and l_lid are now known stable because the LWP is not 764 * LSIDL, so check those fields too to make sure we found the 765 * right thing. 766 */ 767 if (__predict_false(l->l_proc != p || l->l_lid != pid)) { 768 lwp_unlock(l); 769 return NULL; 770 } 771 772 /* Everything checks out, return it locked. */ 773 return l; 774 } 775 776 /* 777 * proc_find_lwp_acquire_proc: locate an lwp and acquire a lock 778 * on its containing proc. 779 * 780 * => Similar to proc_find_lwp(), but does not require you to have 781 * the proc a priori. 782 * => Also returns proc * to caller, with p::p_lock held. 783 * => Same caveats apply. 784 */ 785 struct lwp * 786 proc_find_lwp_acquire_proc(pid_t pid, struct proc **pp) 787 { 788 struct pid_table *pt; 789 struct proc *p = NULL; 790 struct lwp *l = NULL; 791 uintptr_t slot; 792 793 KASSERT(pp != NULL); 794 mutex_enter(&proc_lock); 795 pt = &pid_table[pid & pid_tbl_mask]; 796 797 slot = pt->pt_slot; 798 if (__predict_true(PT_IS_LWP(slot) && pt->pt_pid == pid)) { 799 l = PT_GET_LWP(slot); 800 p = l->l_proc; 801 mutex_enter(p->p_lock); 802 if (__predict_false(l->l_stat == LSIDL)) { 803 mutex_exit(p->p_lock); 804 l = NULL; 805 p = NULL; 806 } 807 } 808 mutex_exit(&proc_lock); 809 810 KASSERT(p == NULL || mutex_owned(p->p_lock)); 811 *pp = p; 812 return l; 813 } 814 815 /* 816 * proc_find_raw_pid_table_locked: locate a process by the ID. 817 * 818 * => Must be called with proc_lock held. 819 */ 820 static proc_t * 821 proc_find_raw_pid_table_locked(pid_t pid, bool any_lwpid) 822 { 823 struct pid_table *pt; 824 proc_t *p = NULL; 825 uintptr_t slot; 826 827 /* No - used by DDB. KASSERT(mutex_owned(&proc_lock)); */ 828 pt = &pid_table[pid & pid_tbl_mask]; 829 830 slot = pt->pt_slot; 831 if (__predict_true(PT_IS_LWP(slot) && pt->pt_pid == pid)) { 832 /* 833 * When looking up processes, require a direct match 834 * on the PID assigned to the proc, not just one of 835 * its LWPs. 836 * 837 * N.B. We require lwp::l_proc of LSIDL LWPs to be 838 * valid here. 839 */ 840 p = PT_GET_LWP(slot)->l_proc; 841 if (__predict_false(p->p_pid != pid && !any_lwpid)) 842 p = NULL; 843 } else if (PT_IS_PROC(slot) && pt->pt_pid == pid) { 844 p = PT_GET_PROC(slot); 845 } 846 return p; 847 } 848 849 proc_t * 850 proc_find_raw(pid_t pid) 851 { 852 853 return proc_find_raw_pid_table_locked(pid, false); 854 } 855 856 static proc_t * 857 proc_find_internal(pid_t pid, bool any_lwpid) 858 { 859 proc_t *p; 860 861 KASSERT(mutex_owned(&proc_lock)); 862 863 p = proc_find_raw_pid_table_locked(pid, any_lwpid); 864 if (__predict_false(p == NULL)) { 865 return NULL; 866 } 867 868 /* 869 * Only allow live processes to be found by PID. 870 * XXX: p_stat might change, since proc unlocked. 871 */ 872 if (__predict_true(p->p_stat == SACTIVE || p->p_stat == SSTOP)) { 873 return p; 874 } 875 return NULL; 876 } 877 878 proc_t * 879 proc_find(pid_t pid) 880 { 881 return proc_find_internal(pid, false); 882 } 883 884 proc_t * 885 proc_find_lwpid(pid_t pid) 886 { 887 return proc_find_internal(pid, true); 888 } 889 890 /* 891 * pgrp_find: locate a process group by the ID. 892 * 893 * => Must be called with proc_lock held. 894 */ 895 struct pgrp * 896 pgrp_find(pid_t pgid) 897 { 898 struct pgrp *pg; 899 900 KASSERT(mutex_owned(&proc_lock)); 901 902 pg = pid_table[pgid & pid_tbl_mask].pt_pgrp; 903 904 /* 905 * Cannot look up a process group that only exists because the 906 * session has not died yet (traditional). 907 */ 908 if (pg == NULL || pg->pg_id != pgid || LIST_EMPTY(&pg->pg_members)) { 909 return NULL; 910 } 911 return pg; 912 } 913 914 static void 915 expand_pid_table(void) 916 { 917 size_t pt_size, tsz; 918 struct pid_table *n_pt, *new_pt; 919 uintptr_t slot; 920 struct pgrp *pgrp; 921 pid_t pid, rpid; 922 u_int i; 923 uint new_pt_mask; 924 925 KASSERT(mutex_owned(&proc_lock)); 926 927 /* Unlock the pid_table briefly to allocate memory. */ 928 pt_size = pid_tbl_mask + 1; 929 mutex_exit(&proc_lock); 930 931 tsz = pt_size * 2 * sizeof(struct pid_table); 932 new_pt = kmem_alloc(tsz, KM_SLEEP); 933 new_pt_mask = pt_size * 2 - 1; 934 935 /* XXX For now. The pratical limit is much lower anyway. */ 936 KASSERT(new_pt_mask <= FUTEX_TID_MASK); 937 938 mutex_enter(&proc_lock); 939 if (pt_size != pid_tbl_mask + 1) { 940 /* Another process beat us to it... */ 941 mutex_exit(&proc_lock); 942 kmem_free(new_pt, tsz); 943 goto out; 944 } 945 946 /* 947 * Copy entries from old table into new one. 948 * If 'pid' is 'odd' we need to place in the upper half, 949 * even pid's to the lower half. 950 * Free items stay in the low half so we don't have to 951 * fixup the reference to them. 952 * We stuff free items on the front of the freelist 953 * because we can't write to unmodified entries. 954 * Processing the table backwards maintains a semblance 955 * of issuing pid numbers that increase with time. 956 */ 957 i = pt_size - 1; 958 n_pt = new_pt + i; 959 for (; ; i--, n_pt--) { 960 slot = pid_table[i].pt_slot; 961 pgrp = pid_table[i].pt_pgrp; 962 if (!PT_VALID(slot)) { 963 /* Up 'use count' so that link is valid */ 964 pid = (PT_NEXT(slot) + pt_size) & ~pt_size; 965 rpid = 0; 966 slot = PT_SET_FREE(pid); 967 if (pgrp) 968 pid = pgrp->pg_id; 969 } else { 970 pid = pid_table[i].pt_pid; 971 rpid = pid; 972 } 973 974 /* Save entry in appropriate half of table */ 975 n_pt[pid & pt_size].pt_slot = slot; 976 n_pt[pid & pt_size].pt_pgrp = pgrp; 977 n_pt[pid & pt_size].pt_pid = rpid; 978 979 /* Put other piece on start of free list */ 980 pid = (pid ^ pt_size) & ~pid_tbl_mask; 981 n_pt[pid & pt_size].pt_slot = 982 PT_SET_FREE((pid & ~pt_size) | next_free_pt); 983 n_pt[pid & pt_size].pt_pgrp = 0; 984 n_pt[pid & pt_size].pt_pid = 0; 985 986 next_free_pt = i | (pid & pt_size); 987 if (i == 0) 988 break; 989 } 990 991 /* Save old table size and switch tables */ 992 tsz = pt_size * sizeof(struct pid_table); 993 n_pt = pid_table; 994 pid_table = new_pt; 995 pid_tbl_mask = new_pt_mask; 996 997 /* 998 * pid_max starts as PID_MAX (= 30000), once we have 16384 999 * allocated pids we need it to be larger! 1000 */ 1001 if (pid_tbl_mask > PID_MAX) { 1002 pid_max = pid_tbl_mask * 2 + 1; 1003 pid_alloc_lim |= pid_alloc_lim << 1; 1004 } else 1005 pid_alloc_lim <<= 1; /* doubles number of free slots... */ 1006 1007 mutex_exit(&proc_lock); 1008 1009 /* 1010 * Make sure that unlocked access to the old pid_table is complete 1011 * and then free it. 1012 */ 1013 pserialize_perform(proc_psz); 1014 kmem_free(n_pt, tsz); 1015 1016 out: /* Return with proc_lock held again. */ 1017 mutex_enter(&proc_lock); 1018 } 1019 1020 struct proc * 1021 proc_alloc(void) 1022 { 1023 struct proc *p; 1024 1025 p = pool_cache_get(proc_cache, PR_WAITOK); 1026 p->p_stat = SIDL; /* protect against others */ 1027 proc_initspecific(p); 1028 kdtrace_proc_ctor(NULL, p); 1029 1030 /* 1031 * Allocate a placeholder in the pid_table. When we create the 1032 * first LWP for this process, it will take ownership of the 1033 * slot. 1034 */ 1035 if (__predict_false(proc_alloc_pid(p) == -1)) { 1036 /* Allocating the PID failed; unwind. */ 1037 proc_finispecific(p); 1038 proc_free_mem(p); 1039 p = NULL; 1040 } 1041 return p; 1042 } 1043 1044 /* 1045 * proc_alloc_pid_slot: allocate PID and record the occcupant so that 1046 * proc_find_raw() can find it by the PID. 1047 */ 1048 static pid_t __noinline 1049 proc_alloc_pid_slot(struct proc *p, uintptr_t slot) 1050 { 1051 struct pid_table *pt; 1052 pid_t pid; 1053 int nxt; 1054 1055 KASSERT(mutex_owned(&proc_lock)); 1056 1057 for (;;expand_pid_table()) { 1058 if (__predict_false(pid_alloc_cnt >= pid_alloc_lim)) { 1059 /* ensure pids cycle through 2000+ values */ 1060 continue; 1061 } 1062 /* 1063 * The first user process *must* be given PID 1. 1064 * it has already been reserved for us. This 1065 * will be coming in from the proc_alloc() call 1066 * above, and the entry will be usurped later when 1067 * the first user LWP is created. 1068 * XXX this is slightly gross. 1069 */ 1070 if (__predict_false(PT_RESERVED(pid_table[1].pt_slot) && 1071 p != &proc0)) { 1072 KASSERT(PT_IS_PROC(slot)); 1073 pt = &pid_table[1]; 1074 pt->pt_slot = slot; 1075 return 1; 1076 } 1077 pt = &pid_table[next_free_pt]; 1078 #ifdef DIAGNOSTIC 1079 if (__predict_false(PT_VALID(pt->pt_slot) || pt->pt_pgrp)) 1080 panic("proc_alloc: slot busy"); 1081 #endif 1082 nxt = PT_NEXT(pt->pt_slot); 1083 if (nxt & pid_tbl_mask) 1084 break; 1085 /* Table full - expand (NB last entry not used....) */ 1086 } 1087 1088 /* pid is 'saved use count' + 'size' + entry */ 1089 pid = (nxt & ~pid_tbl_mask) + pid_tbl_mask + 1 + next_free_pt; 1090 if ((uint)pid > (uint)pid_max) 1091 pid &= pid_tbl_mask; 1092 next_free_pt = nxt & pid_tbl_mask; 1093 1094 /* XXX For now. The pratical limit is much lower anyway. */ 1095 KASSERT(pid <= FUTEX_TID_MASK); 1096 1097 /* Grab table slot */ 1098 pt->pt_slot = slot; 1099 1100 KASSERT(pt->pt_pid == 0); 1101 pt->pt_pid = pid; 1102 pid_alloc_cnt++; 1103 1104 return pid; 1105 } 1106 1107 pid_t 1108 proc_alloc_pid(struct proc *p) 1109 { 1110 pid_t pid; 1111 1112 KASSERT((((uintptr_t)p) & PT_F_ALLBITS) == 0); 1113 KASSERT(p->p_stat == SIDL); 1114 1115 mutex_enter(&proc_lock); 1116 pid = proc_alloc_pid_slot(p, PT_SET_PROC(p)); 1117 if (pid != -1) 1118 p->p_pid = pid; 1119 mutex_exit(&proc_lock); 1120 1121 return pid; 1122 } 1123 1124 pid_t 1125 proc_alloc_lwpid(struct proc *p, struct lwp *l) 1126 { 1127 struct pid_table *pt; 1128 pid_t pid; 1129 1130 KASSERT((((uintptr_t)l) & PT_F_ALLBITS) == 0); 1131 KASSERT(l->l_proc == p); 1132 KASSERT(l->l_stat == LSIDL); 1133 1134 /* 1135 * For unlocked lookup in proc_find_lwp(), make sure l->l_proc 1136 * is globally visible before the LWP becomes visible via the 1137 * pid_table. 1138 */ 1139 #ifndef __HAVE_ATOMIC_AS_MEMBAR 1140 membar_producer(); 1141 #endif 1142 1143 /* 1144 * If the slot for p->p_pid currently points to the proc, 1145 * then we should usurp this ID for the LWP. This happens 1146 * at least once per process (for the first LWP), and can 1147 * happen again if the first LWP for a process exits and 1148 * before the process creates another. 1149 */ 1150 mutex_enter(&proc_lock); 1151 pid = p->p_pid; 1152 pt = &pid_table[pid & pid_tbl_mask]; 1153 KASSERT(pt->pt_pid == pid); 1154 if (PT_IS_PROC(pt->pt_slot)) { 1155 KASSERT(PT_GET_PROC(pt->pt_slot) == p); 1156 l->l_lid = pid; 1157 pt->pt_slot = PT_SET_LWP(l); 1158 } else { 1159 /* Need to allocate a new slot. */ 1160 pid = proc_alloc_pid_slot(p, PT_SET_LWP(l)); 1161 if (pid != -1) 1162 l->l_lid = pid; 1163 } 1164 mutex_exit(&proc_lock); 1165 1166 return pid; 1167 } 1168 1169 static void __noinline 1170 proc_free_pid_internal(pid_t pid, uintptr_t type __diagused) 1171 { 1172 struct pid_table *pt; 1173 1174 pt = &pid_table[pid & pid_tbl_mask]; 1175 1176 KASSERT(PT_GET_TYPE(pt->pt_slot) == type); 1177 KASSERT(pt->pt_pid == pid); 1178 1179 /* save pid use count in slot */ 1180 pt->pt_slot = PT_SET_FREE(pid & ~pid_tbl_mask); 1181 pt->pt_pid = 0; 1182 1183 if (pt->pt_pgrp == NULL) { 1184 /* link last freed entry onto ours */ 1185 pid &= pid_tbl_mask; 1186 pt = &pid_table[last_free_pt]; 1187 pt->pt_slot = PT_SET_FREE(PT_NEXT(pt->pt_slot) | pid); 1188 pt->pt_pid = 0; 1189 last_free_pt = pid; 1190 pid_alloc_cnt--; 1191 } 1192 } 1193 1194 /* 1195 * Free a process id - called from proc_free (in kern_exit.c) 1196 * 1197 * Called with the proc_lock held. 1198 */ 1199 void 1200 proc_free_pid(pid_t pid) 1201 { 1202 1203 KASSERT(mutex_owned(&proc_lock)); 1204 proc_free_pid_internal(pid, PT_F_PROC); 1205 } 1206 1207 /* 1208 * Free a process id used by an LWP. If this was the process's 1209 * first LWP, we convert the slot to point to the process; the 1210 * entry will get cleaned up later when the process finishes exiting. 1211 * 1212 * If not, then it's the same as proc_free_pid(). 1213 */ 1214 void 1215 proc_free_lwpid(struct proc *p, pid_t pid) 1216 { 1217 1218 KASSERT(mutex_owned(&proc_lock)); 1219 1220 if (__predict_true(p->p_pid == pid)) { 1221 struct pid_table *pt; 1222 1223 pt = &pid_table[pid & pid_tbl_mask]; 1224 1225 KASSERT(pt->pt_pid == pid); 1226 KASSERT(PT_IS_LWP(pt->pt_slot)); 1227 KASSERT(PT_GET_LWP(pt->pt_slot)->l_proc == p); 1228 1229 pt->pt_slot = PT_SET_PROC(p); 1230 return; 1231 } 1232 proc_free_pid_internal(pid, PT_F_LWP); 1233 } 1234 1235 void 1236 proc_free_mem(struct proc *p) 1237 { 1238 1239 kdtrace_proc_dtor(NULL, p); 1240 pool_cache_put(proc_cache, p); 1241 } 1242 1243 /* 1244 * proc_enterpgrp: move p to a new or existing process group (and session). 1245 * 1246 * If we are creating a new pgrp, the pgid should equal 1247 * the calling process' pid. 1248 * If is only valid to enter a process group that is in the session 1249 * of the process. 1250 * Also mksess should only be set if we are creating a process group 1251 * 1252 * Only called from sys_setsid, sys_setpgid and posix_spawn/spawn_return. 1253 */ 1254 int 1255 proc_enterpgrp(struct proc *curp, pid_t pid, pid_t pgid, bool mksess) 1256 { 1257 struct pgrp *new_pgrp, *pgrp; 1258 struct session *sess; 1259 struct proc *p; 1260 int rval; 1261 pid_t pg_id = NO_PGID; 1262 1263 /* Allocate data areas we might need before doing any validity checks */ 1264 sess = mksess ? kmem_alloc(sizeof(*sess), KM_SLEEP) : NULL; 1265 new_pgrp = kmem_alloc(sizeof(*new_pgrp), KM_SLEEP); 1266 1267 mutex_enter(&proc_lock); 1268 rval = EPERM; /* most common error (to save typing) */ 1269 1270 /* Check pgrp exists or can be created */ 1271 pgrp = pid_table[pgid & pid_tbl_mask].pt_pgrp; 1272 if (pgrp != NULL && pgrp->pg_id != pgid) 1273 goto done; 1274 1275 /* Can only set another process under restricted circumstances. */ 1276 if (pid != curp->p_pid) { 1277 /* Must exist and be one of our children... */ 1278 p = proc_find_internal(pid, false); 1279 if (p == NULL || !p_inferior(p, curp)) { 1280 rval = ESRCH; 1281 goto done; 1282 } 1283 /* ... in the same session... */ 1284 if (sess != NULL || p->p_session != curp->p_session) 1285 goto done; 1286 /* ... existing pgid must be in same session ... */ 1287 if (pgrp != NULL && pgrp->pg_session != p->p_session) 1288 goto done; 1289 /* ... and not done an exec. */ 1290 if (p->p_flag & PK_EXEC) { 1291 rval = EACCES; 1292 goto done; 1293 } 1294 } else { 1295 /* ... setsid() cannot re-enter a pgrp */ 1296 if (mksess && (curp->p_pgid == curp->p_pid || 1297 pgrp_find(curp->p_pid))) 1298 goto done; 1299 p = curp; 1300 } 1301 1302 /* Changing the process group/session of a session 1303 leader is definitely off limits. */ 1304 if (SESS_LEADER(p)) { 1305 if (sess == NULL && p->p_pgrp == pgrp) 1306 /* unless it's a definite noop */ 1307 rval = 0; 1308 goto done; 1309 } 1310 1311 /* Can only create a process group with id of process */ 1312 if (pgrp == NULL && pgid != pid) 1313 goto done; 1314 1315 /* Can only create a session if creating pgrp */ 1316 if (sess != NULL && pgrp != NULL) 1317 goto done; 1318 1319 /* Check we allocated memory for a pgrp... */ 1320 if (pgrp == NULL && new_pgrp == NULL) 1321 goto done; 1322 1323 /* Don't attach to 'zombie' pgrp */ 1324 if (pgrp != NULL && LIST_EMPTY(&pgrp->pg_members)) 1325 goto done; 1326 1327 /* Expect to succeed now */ 1328 rval = 0; 1329 1330 if (pgrp == p->p_pgrp) 1331 /* nothing to do */ 1332 goto done; 1333 1334 /* Ok all setup, link up required structures */ 1335 1336 if (pgrp == NULL) { 1337 pgrp = new_pgrp; 1338 new_pgrp = NULL; 1339 if (sess != NULL) { 1340 sess->s_sid = p->p_pid; 1341 sess->s_leader = p; 1342 sess->s_count = 1; 1343 sess->s_ttyvp = NULL; 1344 sess->s_ttyp = NULL; 1345 sess->s_flags = p->p_session->s_flags & ~S_LOGIN_SET; 1346 memcpy(sess->s_login, p->p_session->s_login, 1347 sizeof(sess->s_login)); 1348 p->p_lflag &= ~PL_CONTROLT; 1349 } else { 1350 sess = p->p_pgrp->pg_session; 1351 proc_sesshold(sess); 1352 } 1353 pgrp->pg_session = sess; 1354 sess = NULL; 1355 1356 pgrp->pg_id = pgid; 1357 LIST_INIT(&pgrp->pg_members); 1358 #ifdef DIAGNOSTIC 1359 if (__predict_false(pid_table[pgid & pid_tbl_mask].pt_pgrp)) 1360 panic("enterpgrp: pgrp table slot in use"); 1361 if (__predict_false(mksess && p != curp)) 1362 panic("enterpgrp: mksession and p != curproc"); 1363 #endif 1364 pid_table[pgid & pid_tbl_mask].pt_pgrp = pgrp; 1365 pgrp->pg_jobc = 0; 1366 } 1367 1368 /* 1369 * Adjust eligibility of affected pgrps to participate in job control. 1370 * Increment eligibility counts before decrementing, otherwise we 1371 * could reach 0 spuriously during the first call. 1372 */ 1373 fixjobc(p, pgrp, 1); 1374 fixjobc(p, p->p_pgrp, 0); 1375 1376 /* Interlock with ttread(). */ 1377 mutex_spin_enter(&tty_lock); 1378 1379 /* Move process to requested group. */ 1380 LIST_REMOVE(p, p_pglist); 1381 if (LIST_EMPTY(&p->p_pgrp->pg_members)) 1382 /* defer delete until we've dumped the lock */ 1383 pg_id = p->p_pgrp->pg_id; 1384 p->p_pgrp = pgrp; 1385 LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist); 1386 1387 /* Done with the swap; we can release the tty mutex. */ 1388 mutex_spin_exit(&tty_lock); 1389 1390 done: 1391 if (pg_id != NO_PGID) { 1392 /* Releases proc_lock. */ 1393 pg_delete(pg_id); 1394 } else { 1395 mutex_exit(&proc_lock); 1396 } 1397 if (sess != NULL) 1398 kmem_free(sess, sizeof(*sess)); 1399 if (new_pgrp != NULL) 1400 kmem_free(new_pgrp, sizeof(*new_pgrp)); 1401 #ifdef DEBUG_PGRP 1402 if (__predict_false(rval)) 1403 printf("enterpgrp(%d,%d,%d), curproc %d, rval %d\n", 1404 pid, pgid, mksess, curp->p_pid, rval); 1405 #endif 1406 return rval; 1407 } 1408 1409 /* 1410 * proc_leavepgrp: remove a process from its process group. 1411 * => must be called with the proc_lock held, which will be released; 1412 */ 1413 void 1414 proc_leavepgrp(struct proc *p) 1415 { 1416 struct pgrp *pgrp; 1417 1418 KASSERT(mutex_owned(&proc_lock)); 1419 1420 /* Interlock with ttread() */ 1421 mutex_spin_enter(&tty_lock); 1422 pgrp = p->p_pgrp; 1423 LIST_REMOVE(p, p_pglist); 1424 p->p_pgrp = NULL; 1425 mutex_spin_exit(&tty_lock); 1426 1427 if (LIST_EMPTY(&pgrp->pg_members)) { 1428 /* Releases proc_lock. */ 1429 pg_delete(pgrp->pg_id); 1430 } else { 1431 mutex_exit(&proc_lock); 1432 } 1433 } 1434 1435 /* 1436 * pg_remove: remove a process group from the table. 1437 * => must be called with the proc_lock held; 1438 * => returns process group to free; 1439 */ 1440 static struct pgrp * 1441 pg_remove(pid_t pg_id) 1442 { 1443 struct pgrp *pgrp; 1444 struct pid_table *pt; 1445 1446 KASSERT(mutex_owned(&proc_lock)); 1447 1448 pt = &pid_table[pg_id & pid_tbl_mask]; 1449 pgrp = pt->pt_pgrp; 1450 1451 KASSERT(pgrp != NULL); 1452 KASSERT(pgrp->pg_id == pg_id); 1453 KASSERT(LIST_EMPTY(&pgrp->pg_members)); 1454 1455 pt->pt_pgrp = NULL; 1456 1457 if (!PT_VALID(pt->pt_slot)) { 1458 /* Orphaned pgrp, put slot onto free list. */ 1459 KASSERT((PT_NEXT(pt->pt_slot) & pid_tbl_mask) == 0); 1460 pg_id &= pid_tbl_mask; 1461 pt = &pid_table[last_free_pt]; 1462 pt->pt_slot = PT_SET_FREE(PT_NEXT(pt->pt_slot) | pg_id); 1463 KASSERT(pt->pt_pid == 0); 1464 last_free_pt = pg_id; 1465 pid_alloc_cnt--; 1466 } 1467 return pgrp; 1468 } 1469 1470 /* 1471 * pg_delete: delete and free a process group. 1472 * => must be called with the proc_lock held, which will be released. 1473 */ 1474 static void 1475 pg_delete(pid_t pg_id) 1476 { 1477 struct pgrp *pg; 1478 struct tty *ttyp; 1479 struct session *ss; 1480 1481 KASSERT(mutex_owned(&proc_lock)); 1482 1483 pg = pid_table[pg_id & pid_tbl_mask].pt_pgrp; 1484 if (pg == NULL || pg->pg_id != pg_id || !LIST_EMPTY(&pg->pg_members)) { 1485 mutex_exit(&proc_lock); 1486 return; 1487 } 1488 1489 ss = pg->pg_session; 1490 1491 /* Remove reference (if any) from tty to this process group */ 1492 mutex_spin_enter(&tty_lock); 1493 ttyp = ss->s_ttyp; 1494 if (ttyp != NULL && ttyp->t_pgrp == pg) { 1495 ttyp->t_pgrp = NULL; 1496 KASSERT(ttyp->t_session == ss); 1497 } 1498 mutex_spin_exit(&tty_lock); 1499 1500 /* 1501 * The leading process group in a session is freed by proc_sessrele(), 1502 * if last reference. It will also release the locks. 1503 */ 1504 pg = (ss->s_sid != pg->pg_id) ? pg_remove(pg_id) : NULL; 1505 proc_sessrele(ss); 1506 1507 if (pg != NULL) { 1508 /* Free it, if was not done above. */ 1509 kmem_free(pg, sizeof(struct pgrp)); 1510 } 1511 } 1512 1513 /* 1514 * Adjust pgrp jobc counters when specified process changes process group. 1515 * We count the number of processes in each process group that "qualify" 1516 * the group for terminal job control (those with a parent in a different 1517 * process group of the same session). If that count reaches zero, the 1518 * process group becomes orphaned. Check both the specified process' 1519 * process group and that of its children. 1520 * entering == 0 => p is leaving specified group. 1521 * entering == 1 => p is entering specified group. 1522 * 1523 * Call with proc_lock held. 1524 */ 1525 void 1526 fixjobc(struct proc *p, struct pgrp *pgrp, int entering) 1527 { 1528 struct pgrp *hispgrp; 1529 struct session *mysession = pgrp->pg_session; 1530 struct proc *child; 1531 1532 KASSERT(mutex_owned(&proc_lock)); 1533 1534 /* 1535 * Check p's parent to see whether p qualifies its own process 1536 * group; if so, adjust count for p's process group. 1537 */ 1538 hispgrp = p->p_pptr->p_pgrp; 1539 if (hispgrp != pgrp && hispgrp->pg_session == mysession) { 1540 if (entering) { 1541 pgrp->pg_jobc++; 1542 p->p_lflag &= ~PL_ORPHANPG; 1543 } else { 1544 KASSERT(pgrp->pg_jobc > 0); 1545 if (--pgrp->pg_jobc == 0) 1546 orphanpg(pgrp); 1547 } 1548 } 1549 1550 /* 1551 * Check this process' children to see whether they qualify 1552 * their process groups; if so, adjust counts for children's 1553 * process groups. 1554 */ 1555 LIST_FOREACH(child, &p->p_children, p_sibling) { 1556 hispgrp = child->p_pgrp; 1557 if (hispgrp != pgrp && hispgrp->pg_session == mysession && 1558 !P_ZOMBIE(child)) { 1559 if (entering) { 1560 child->p_lflag &= ~PL_ORPHANPG; 1561 hispgrp->pg_jobc++; 1562 } else { 1563 KASSERT(hispgrp->pg_jobc > 0); 1564 if (--hispgrp->pg_jobc == 0) 1565 orphanpg(hispgrp); 1566 } 1567 } 1568 } 1569 } 1570 1571 /* 1572 * A process group has become orphaned; 1573 * if there are any stopped processes in the group, 1574 * hang-up all process in that group. 1575 * 1576 * Call with proc_lock held. 1577 */ 1578 static void 1579 orphanpg(struct pgrp *pg) 1580 { 1581 struct proc *p; 1582 1583 KASSERT(mutex_owned(&proc_lock)); 1584 1585 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 1586 if (p->p_stat == SSTOP) { 1587 p->p_lflag |= PL_ORPHANPG; 1588 psignal(p, SIGHUP); 1589 psignal(p, SIGCONT); 1590 } 1591 } 1592 } 1593 1594 #ifdef DDB 1595 #include <ddb/db_output.h> 1596 void pidtbl_dump(void); 1597 void 1598 pidtbl_dump(void) 1599 { 1600 struct pid_table *pt; 1601 struct proc *p; 1602 struct pgrp *pgrp; 1603 uintptr_t slot; 1604 int id; 1605 1606 db_printf("pid table %p size %x, next %x, last %x\n", 1607 pid_table, pid_tbl_mask+1, 1608 next_free_pt, last_free_pt); 1609 for (pt = pid_table, id = 0; id <= pid_tbl_mask; id++, pt++) { 1610 slot = pt->pt_slot; 1611 if (!PT_VALID(slot) && !pt->pt_pgrp) 1612 continue; 1613 if (PT_IS_LWP(slot)) { 1614 p = PT_GET_LWP(slot)->l_proc; 1615 } else if (PT_IS_PROC(slot)) { 1616 p = PT_GET_PROC(slot); 1617 } else { 1618 p = NULL; 1619 } 1620 db_printf(" id %x: ", id); 1621 if (p != NULL) 1622 db_printf("slotpid %d proc %p id %d (0x%x) %s\n", 1623 pt->pt_pid, p, p->p_pid, p->p_pid, p->p_comm); 1624 else 1625 db_printf("next %x use %x\n", 1626 PT_NEXT(slot) & pid_tbl_mask, 1627 PT_NEXT(slot) & ~pid_tbl_mask); 1628 if ((pgrp = pt->pt_pgrp)) { 1629 db_printf("\tsession %p, sid %d, count %d, login %s\n", 1630 pgrp->pg_session, pgrp->pg_session->s_sid, 1631 pgrp->pg_session->s_count, 1632 pgrp->pg_session->s_login); 1633 db_printf("\tpgrp %p, pg_id %d, pg_jobc %d, members %p\n", 1634 pgrp, pgrp->pg_id, pgrp->pg_jobc, 1635 LIST_FIRST(&pgrp->pg_members)); 1636 LIST_FOREACH(p, &pgrp->pg_members, p_pglist) { 1637 db_printf("\t\tpid %d addr %p pgrp %p %s\n", 1638 p->p_pid, p, p->p_pgrp, p->p_comm); 1639 } 1640 } 1641 } 1642 } 1643 #endif /* DDB */ 1644 1645 #ifdef KSTACK_CHECK_MAGIC 1646 1647 #define KSTACK_MAGIC 0xdeadbeaf 1648 1649 /* XXX should be per process basis? */ 1650 static int kstackleftmin = KSTACK_SIZE; 1651 static int kstackleftthres = KSTACK_SIZE / 8; 1652 1653 void 1654 kstack_setup_magic(const struct lwp *l) 1655 { 1656 uint32_t *ip; 1657 uint32_t const *end; 1658 1659 KASSERT(l != NULL); 1660 KASSERT(l != &lwp0); 1661 1662 /* 1663 * fill all the stack with magic number 1664 * so that later modification on it can be detected. 1665 */ 1666 ip = (uint32_t *)KSTACK_LOWEST_ADDR(l); 1667 end = (uint32_t *)((char *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE); 1668 for (; ip < end; ip++) { 1669 *ip = KSTACK_MAGIC; 1670 } 1671 } 1672 1673 void 1674 kstack_check_magic(const struct lwp *l) 1675 { 1676 uint32_t const *ip, *end; 1677 int stackleft; 1678 1679 KASSERT(l != NULL); 1680 1681 /* don't check proc0 */ /*XXX*/ 1682 if (l == &lwp0) 1683 return; 1684 1685 #ifdef __MACHINE_STACK_GROWS_UP 1686 /* stack grows upwards (eg. hppa) */ 1687 ip = (uint32_t *)((void *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE); 1688 end = (uint32_t *)KSTACK_LOWEST_ADDR(l); 1689 for (ip--; ip >= end; ip--) 1690 if (*ip != KSTACK_MAGIC) 1691 break; 1692 1693 stackleft = (void *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE - (void *)ip; 1694 #else /* __MACHINE_STACK_GROWS_UP */ 1695 /* stack grows downwards (eg. i386) */ 1696 ip = (uint32_t *)KSTACK_LOWEST_ADDR(l); 1697 end = (uint32_t *)((char *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE); 1698 for (; ip < end; ip++) 1699 if (*ip != KSTACK_MAGIC) 1700 break; 1701 1702 stackleft = ((const char *)ip) - (const char *)KSTACK_LOWEST_ADDR(l); 1703 #endif /* __MACHINE_STACK_GROWS_UP */ 1704 1705 if (kstackleftmin > stackleft) { 1706 kstackleftmin = stackleft; 1707 if (stackleft < kstackleftthres) 1708 printf("warning: kernel stack left %d bytes" 1709 "(pid %u:lid %u)\n", stackleft, 1710 (u_int)l->l_proc->p_pid, (u_int)l->l_lid); 1711 } 1712 1713 if (stackleft <= 0) { 1714 panic("magic on the top of kernel stack changed for " 1715 "pid %u, lid %u: maybe kernel stack overflow", 1716 (u_int)l->l_proc->p_pid, (u_int)l->l_lid); 1717 } 1718 } 1719 #endif /* KSTACK_CHECK_MAGIC */ 1720 1721 int 1722 proclist_foreach_call(struct proclist *list, 1723 int (*callback)(struct proc *, void *arg), void *arg) 1724 { 1725 struct proc marker; 1726 struct proc *p; 1727 int ret = 0; 1728 1729 marker.p_flag = PK_MARKER; 1730 mutex_enter(&proc_lock); 1731 for (p = LIST_FIRST(list); ret == 0 && p != NULL;) { 1732 if (p->p_flag & PK_MARKER) { 1733 p = LIST_NEXT(p, p_list); 1734 continue; 1735 } 1736 LIST_INSERT_AFTER(p, &marker, p_list); 1737 ret = (*callback)(p, arg); 1738 KASSERT(mutex_owned(&proc_lock)); 1739 p = LIST_NEXT(&marker, p_list); 1740 LIST_REMOVE(&marker, p_list); 1741 } 1742 mutex_exit(&proc_lock); 1743 1744 return ret; 1745 } 1746 1747 int 1748 proc_vmspace_getref(struct proc *p, struct vmspace **vm) 1749 { 1750 1751 /* XXXCDC: how should locking work here? */ 1752 1753 /* curproc exception is for coredump. */ 1754 1755 if ((p != curproc && (p->p_sflag & PS_WEXIT) != 0) || 1756 (p->p_vmspace->vm_refcnt < 1)) { 1757 return EFAULT; 1758 } 1759 1760 uvmspace_addref(p->p_vmspace); 1761 *vm = p->p_vmspace; 1762 1763 return 0; 1764 } 1765 1766 /* 1767 * Acquire a write lock on the process credential. 1768 */ 1769 void 1770 proc_crmod_enter(void) 1771 { 1772 struct lwp *l = curlwp; 1773 struct proc *p = l->l_proc; 1774 kauth_cred_t oc; 1775 1776 /* Reset what needs to be reset in plimit. */ 1777 if (p->p_limit->pl_corename != defcorename) { 1778 lim_setcorename(p, defcorename, 0); 1779 } 1780 1781 mutex_enter(p->p_lock); 1782 1783 /* Ensure the LWP cached credentials are up to date. */ 1784 if ((oc = l->l_cred) != p->p_cred) { 1785 kauth_cred_hold(p->p_cred); 1786 l->l_cred = p->p_cred; 1787 kauth_cred_free(oc); 1788 } 1789 } 1790 1791 /* 1792 * Set in a new process credential, and drop the write lock. The credential 1793 * must have a reference already. Optionally, free a no-longer required 1794 * credential. The scheduler also needs to inspect p_cred, so we also 1795 * briefly acquire the sched state mutex. 1796 */ 1797 void 1798 proc_crmod_leave(kauth_cred_t scred, kauth_cred_t fcred, bool sugid) 1799 { 1800 struct lwp *l = curlwp, *l2; 1801 struct proc *p = l->l_proc; 1802 kauth_cred_t oc; 1803 1804 KASSERT(mutex_owned(p->p_lock)); 1805 1806 /* Is there a new credential to set in? */ 1807 if (scred != NULL) { 1808 p->p_cred = scred; 1809 LIST_FOREACH(l2, &p->p_lwps, l_sibling) { 1810 if (l2 != l) 1811 l2->l_prflag |= LPR_CRMOD; 1812 } 1813 1814 /* Ensure the LWP cached credentials are up to date. */ 1815 if ((oc = l->l_cred) != scred) { 1816 kauth_cred_hold(scred); 1817 l->l_cred = scred; 1818 } 1819 } else 1820 oc = NULL; /* XXXgcc */ 1821 1822 if (sugid) { 1823 /* 1824 * Mark process as having changed credentials, stops 1825 * tracing etc. 1826 */ 1827 p->p_flag |= PK_SUGID; 1828 } 1829 1830 mutex_exit(p->p_lock); 1831 1832 /* If there is a credential to be released, free it now. */ 1833 if (fcred != NULL) { 1834 KASSERT(scred != NULL); 1835 kauth_cred_free(fcred); 1836 if (oc != scred) 1837 kauth_cred_free(oc); 1838 } 1839 } 1840 1841 /* 1842 * proc_specific_key_create -- 1843 * Create a key for subsystem proc-specific data. 1844 */ 1845 int 1846 proc_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor) 1847 { 1848 1849 return (specificdata_key_create(proc_specificdata_domain, keyp, dtor)); 1850 } 1851 1852 /* 1853 * proc_specific_key_delete -- 1854 * Delete a key for subsystem proc-specific data. 1855 */ 1856 void 1857 proc_specific_key_delete(specificdata_key_t key) 1858 { 1859 1860 specificdata_key_delete(proc_specificdata_domain, key); 1861 } 1862 1863 /* 1864 * proc_initspecific -- 1865 * Initialize a proc's specificdata container. 1866 */ 1867 void 1868 proc_initspecific(struct proc *p) 1869 { 1870 int error __diagused; 1871 1872 error = specificdata_init(proc_specificdata_domain, &p->p_specdataref); 1873 KASSERT(error == 0); 1874 } 1875 1876 /* 1877 * proc_finispecific -- 1878 * Finalize a proc's specificdata container. 1879 */ 1880 void 1881 proc_finispecific(struct proc *p) 1882 { 1883 1884 specificdata_fini(proc_specificdata_domain, &p->p_specdataref); 1885 } 1886 1887 /* 1888 * proc_getspecific -- 1889 * Return proc-specific data corresponding to the specified key. 1890 */ 1891 void * 1892 proc_getspecific(struct proc *p, specificdata_key_t key) 1893 { 1894 1895 return (specificdata_getspecific(proc_specificdata_domain, 1896 &p->p_specdataref, key)); 1897 } 1898 1899 /* 1900 * proc_setspecific -- 1901 * Set proc-specific data corresponding to the specified key. 1902 */ 1903 void 1904 proc_setspecific(struct proc *p, specificdata_key_t key, void *data) 1905 { 1906 1907 specificdata_setspecific(proc_specificdata_domain, 1908 &p->p_specdataref, key, data); 1909 } 1910 1911 int 1912 proc_uidmatch(kauth_cred_t cred, kauth_cred_t target) 1913 { 1914 int r = 0; 1915 1916 if (kauth_cred_getuid(cred) != kauth_cred_getuid(target) || 1917 kauth_cred_getuid(cred) != kauth_cred_getsvuid(target)) { 1918 /* 1919 * suid proc of ours or proc not ours 1920 */ 1921 r = EPERM; 1922 } else if (kauth_cred_getgid(target) != kauth_cred_getsvgid(target)) { 1923 /* 1924 * sgid proc has sgid back to us temporarily 1925 */ 1926 r = EPERM; 1927 } else { 1928 /* 1929 * our rgid must be in target's group list (ie, 1930 * sub-processes started by a sgid process) 1931 */ 1932 int ismember = 0; 1933 1934 if (kauth_cred_ismember_gid(cred, 1935 kauth_cred_getgid(target), &ismember) != 0 || 1936 !ismember) 1937 r = EPERM; 1938 } 1939 1940 return (r); 1941 } 1942 1943 /* 1944 * sysctl stuff 1945 */ 1946 1947 #define KERN_PROCSLOP (5 * sizeof(struct kinfo_proc)) 1948 1949 static const u_int sysctl_flagmap[] = { 1950 PK_ADVLOCK, P_ADVLOCK, 1951 PK_EXEC, P_EXEC, 1952 PK_NOCLDWAIT, P_NOCLDWAIT, 1953 PK_32, P_32, 1954 PK_CLDSIGIGN, P_CLDSIGIGN, 1955 PK_SUGID, P_SUGID, 1956 0 1957 }; 1958 1959 static const u_int sysctl_sflagmap[] = { 1960 PS_NOCLDSTOP, P_NOCLDSTOP, 1961 PS_WEXIT, P_WEXIT, 1962 PS_STOPFORK, P_STOPFORK, 1963 PS_STOPEXEC, P_STOPEXEC, 1964 PS_STOPEXIT, P_STOPEXIT, 1965 0 1966 }; 1967 1968 static const u_int sysctl_slflagmap[] = { 1969 PSL_TRACED, P_TRACED, 1970 PSL_CHTRACED, P_CHTRACED, 1971 PSL_SYSCALL, P_SYSCALL, 1972 0 1973 }; 1974 1975 static const u_int sysctl_lflagmap[] = { 1976 PL_CONTROLT, P_CONTROLT, 1977 PL_PPWAIT, P_PPWAIT, 1978 0 1979 }; 1980 1981 static const u_int sysctl_stflagmap[] = { 1982 PST_PROFIL, P_PROFIL, 1983 0 1984 1985 }; 1986 1987 /* used by kern_lwp also */ 1988 const u_int sysctl_lwpflagmap[] = { 1989 LW_SINTR, L_SINTR, 1990 LW_SYSTEM, L_SYSTEM, 1991 0 1992 }; 1993 1994 /* 1995 * Find the most ``active'' lwp of a process and return it for ps display 1996 * purposes 1997 */ 1998 static struct lwp * 1999 proc_active_lwp(struct proc *p) 2000 { 2001 static const int ostat[] = { 2002 0, 2003 2, /* LSIDL */ 2004 6, /* LSRUN */ 2005 5, /* LSSLEEP */ 2006 4, /* LSSTOP */ 2007 0, /* LSZOMB */ 2008 1, /* LSDEAD */ 2009 7, /* LSONPROC */ 2010 3 /* LSSUSPENDED */ 2011 }; 2012 2013 struct lwp *l, *lp = NULL; 2014 LIST_FOREACH(l, &p->p_lwps, l_sibling) { 2015 KASSERT(l->l_stat >= 0 && l->l_stat < __arraycount(ostat)); 2016 if (lp == NULL || 2017 ostat[l->l_stat] > ostat[lp->l_stat] || 2018 (ostat[l->l_stat] == ostat[lp->l_stat] && 2019 l->l_cpticks > lp->l_cpticks)) { 2020 lp = l; 2021 continue; 2022 } 2023 } 2024 return lp; 2025 } 2026 2027 static int 2028 sysctl_doeproc(SYSCTLFN_ARGS) 2029 { 2030 union { 2031 struct kinfo_proc kproc; 2032 struct kinfo_proc2 kproc2; 2033 } *kbuf; 2034 struct proc *p, *next, *marker; 2035 char *where, *dp; 2036 int type, op, arg, error; 2037 u_int elem_size, kelem_size, elem_count; 2038 size_t buflen, needed; 2039 bool match, zombie, mmmbrains; 2040 const bool allowaddr = get_expose_address(curproc); 2041 2042 if (namelen == 1 && name[0] == CTL_QUERY) 2043 return (sysctl_query(SYSCTLFN_CALL(rnode))); 2044 2045 dp = where = oldp; 2046 buflen = where != NULL ? *oldlenp : 0; 2047 error = 0; 2048 needed = 0; 2049 type = rnode->sysctl_num; 2050 2051 if (type == KERN_PROC) { 2052 if (namelen == 0) 2053 return EINVAL; 2054 switch (op = name[0]) { 2055 case KERN_PROC_ALL: 2056 if (namelen != 1) 2057 return EINVAL; 2058 arg = 0; 2059 break; 2060 default: 2061 if (namelen != 2) 2062 return EINVAL; 2063 arg = name[1]; 2064 break; 2065 } 2066 elem_count = 0; /* Hush little compiler, don't you cry */ 2067 kelem_size = elem_size = sizeof(kbuf->kproc); 2068 } else { 2069 if (namelen != 4) 2070 return EINVAL; 2071 op = name[0]; 2072 arg = name[1]; 2073 elem_size = name[2]; 2074 elem_count = name[3]; 2075 kelem_size = sizeof(kbuf->kproc2); 2076 } 2077 2078 sysctl_unlock(); 2079 2080 kbuf = kmem_zalloc(sizeof(*kbuf), KM_SLEEP); 2081 marker = kmem_alloc(sizeof(*marker), KM_SLEEP); 2082 marker->p_flag = PK_MARKER; 2083 2084 mutex_enter(&proc_lock); 2085 /* 2086 * Start with zombies to prevent reporting processes twice, in case they 2087 * are dying and being moved from the list of alive processes to zombies. 2088 */ 2089 mmmbrains = true; 2090 for (p = LIST_FIRST(&zombproc);; p = next) { 2091 if (p == NULL) { 2092 if (mmmbrains) { 2093 p = LIST_FIRST(&allproc); 2094 mmmbrains = false; 2095 } 2096 if (p == NULL) 2097 break; 2098 } 2099 next = LIST_NEXT(p, p_list); 2100 if ((p->p_flag & PK_MARKER) != 0) 2101 continue; 2102 2103 /* 2104 * Skip embryonic processes. 2105 */ 2106 if (p->p_stat == SIDL) 2107 continue; 2108 2109 mutex_enter(p->p_lock); 2110 error = kauth_authorize_process(l->l_cred, 2111 KAUTH_PROCESS_CANSEE, p, 2112 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_EPROC), NULL, NULL); 2113 if (error != 0) { 2114 mutex_exit(p->p_lock); 2115 continue; 2116 } 2117 2118 /* 2119 * Hande all the operations in one switch on the cost of 2120 * algorithm complexity is on purpose. The win splitting this 2121 * function into several similar copies makes maintenance burden 2122 * burden, code grow and boost is neglible in practical systems. 2123 */ 2124 switch (op) { 2125 case KERN_PROC_PID: 2126 match = (p->p_pid == (pid_t)arg); 2127 break; 2128 2129 case KERN_PROC_PGRP: 2130 match = (p->p_pgrp->pg_id == (pid_t)arg); 2131 break; 2132 2133 case KERN_PROC_SESSION: 2134 match = (p->p_session->s_sid == (pid_t)arg); 2135 break; 2136 2137 case KERN_PROC_TTY: 2138 match = true; 2139 if (arg == (int) KERN_PROC_TTY_REVOKE) { 2140 if ((p->p_lflag & PL_CONTROLT) == 0 || 2141 p->p_session->s_ttyp == NULL || 2142 p->p_session->s_ttyvp != NULL) { 2143 match = false; 2144 } 2145 } else if ((p->p_lflag & PL_CONTROLT) == 0 || 2146 p->p_session->s_ttyp == NULL) { 2147 if ((dev_t)arg != KERN_PROC_TTY_NODEV) { 2148 match = false; 2149 } 2150 } else if (p->p_session->s_ttyp->t_dev != (dev_t)arg) { 2151 match = false; 2152 } 2153 break; 2154 2155 case KERN_PROC_UID: 2156 match = (kauth_cred_geteuid(p->p_cred) == (uid_t)arg); 2157 break; 2158 2159 case KERN_PROC_RUID: 2160 match = (kauth_cred_getuid(p->p_cred) == (uid_t)arg); 2161 break; 2162 2163 case KERN_PROC_GID: 2164 match = (kauth_cred_getegid(p->p_cred) == (uid_t)arg); 2165 break; 2166 2167 case KERN_PROC_RGID: 2168 match = (kauth_cred_getgid(p->p_cred) == (uid_t)arg); 2169 break; 2170 2171 case KERN_PROC_ALL: 2172 match = true; 2173 /* allow everything */ 2174 break; 2175 2176 default: 2177 error = EINVAL; 2178 mutex_exit(p->p_lock); 2179 goto cleanup; 2180 } 2181 if (!match) { 2182 mutex_exit(p->p_lock); 2183 continue; 2184 } 2185 2186 /* 2187 * Grab a hold on the process. 2188 */ 2189 if (mmmbrains) { 2190 zombie = true; 2191 } else { 2192 zombie = !rw_tryenter(&p->p_reflock, RW_READER); 2193 } 2194 if (zombie) { 2195 LIST_INSERT_AFTER(p, marker, p_list); 2196 } 2197 2198 if (buflen >= elem_size && 2199 (type == KERN_PROC || elem_count > 0)) { 2200 ruspace(p); /* Update process vm resource use */ 2201 2202 if (type == KERN_PROC) { 2203 fill_proc(p, &kbuf->kproc.kp_proc, allowaddr); 2204 fill_eproc(p, &kbuf->kproc.kp_eproc, zombie, 2205 allowaddr); 2206 } else { 2207 fill_kproc2(p, &kbuf->kproc2, zombie, 2208 allowaddr); 2209 elem_count--; 2210 } 2211 mutex_exit(p->p_lock); 2212 mutex_exit(&proc_lock); 2213 /* 2214 * Copy out elem_size, but not larger than kelem_size 2215 */ 2216 error = sysctl_copyout(l, kbuf, dp, 2217 uimin(kelem_size, elem_size)); 2218 mutex_enter(&proc_lock); 2219 if (error) { 2220 goto bah; 2221 } 2222 dp += elem_size; 2223 buflen -= elem_size; 2224 } else { 2225 mutex_exit(p->p_lock); 2226 } 2227 needed += elem_size; 2228 2229 /* 2230 * Release reference to process. 2231 */ 2232 if (zombie) { 2233 next = LIST_NEXT(marker, p_list); 2234 LIST_REMOVE(marker, p_list); 2235 } else { 2236 rw_exit(&p->p_reflock); 2237 next = LIST_NEXT(p, p_list); 2238 } 2239 2240 /* 2241 * Short-circuit break quickly! 2242 */ 2243 if (op == KERN_PROC_PID) 2244 break; 2245 } 2246 mutex_exit(&proc_lock); 2247 2248 if (where != NULL) { 2249 *oldlenp = dp - where; 2250 if (needed > *oldlenp) { 2251 error = ENOMEM; 2252 goto out; 2253 } 2254 } else { 2255 needed += KERN_PROCSLOP; 2256 *oldlenp = needed; 2257 } 2258 kmem_free(kbuf, sizeof(*kbuf)); 2259 kmem_free(marker, sizeof(*marker)); 2260 sysctl_relock(); 2261 return 0; 2262 bah: 2263 if (zombie) 2264 LIST_REMOVE(marker, p_list); 2265 else 2266 rw_exit(&p->p_reflock); 2267 cleanup: 2268 mutex_exit(&proc_lock); 2269 out: 2270 kmem_free(kbuf, sizeof(*kbuf)); 2271 kmem_free(marker, sizeof(*marker)); 2272 sysctl_relock(); 2273 return error; 2274 } 2275 2276 int 2277 copyin_psstrings(struct proc *p, struct ps_strings *arginfo) 2278 { 2279 #if !defined(_RUMPKERNEL) 2280 int retval; 2281 2282 if (p->p_flag & PK_32) { 2283 MODULE_HOOK_CALL(kern_proc32_copyin_hook, (p, arginfo), 2284 enosys(), retval); 2285 return retval; 2286 } 2287 #endif /* !defined(_RUMPKERNEL) */ 2288 2289 return copyin_proc(p, (void *)p->p_psstrp, arginfo, sizeof(*arginfo)); 2290 } 2291 2292 static int 2293 copy_procargs_sysctl_cb(void *cookie_, const void *src, size_t off, size_t len) 2294 { 2295 void **cookie = cookie_; 2296 struct lwp *l = cookie[0]; 2297 char *dst = cookie[1]; 2298 2299 return sysctl_copyout(l, src, dst + off, len); 2300 } 2301 2302 /* 2303 * sysctl helper routine for kern.proc_args pseudo-subtree. 2304 */ 2305 static int 2306 sysctl_kern_proc_args(SYSCTLFN_ARGS) 2307 { 2308 struct ps_strings pss; 2309 struct proc *p; 2310 pid_t pid; 2311 int type, error; 2312 void *cookie[2]; 2313 2314 if (namelen == 1 && name[0] == CTL_QUERY) 2315 return (sysctl_query(SYSCTLFN_CALL(rnode))); 2316 2317 if (newp != NULL || namelen != 2) 2318 return (EINVAL); 2319 pid = name[0]; 2320 type = name[1]; 2321 2322 switch (type) { 2323 case KERN_PROC_PATHNAME: 2324 sysctl_unlock(); 2325 error = fill_pathname(l, pid, oldp, oldlenp); 2326 sysctl_relock(); 2327 return error; 2328 2329 case KERN_PROC_CWD: 2330 sysctl_unlock(); 2331 error = fill_cwd(l, pid, oldp, oldlenp); 2332 sysctl_relock(); 2333 return error; 2334 2335 case KERN_PROC_ARGV: 2336 case KERN_PROC_NARGV: 2337 case KERN_PROC_ENV: 2338 case KERN_PROC_NENV: 2339 /* ok */ 2340 break; 2341 default: 2342 return (EINVAL); 2343 } 2344 2345 sysctl_unlock(); 2346 2347 /* check pid */ 2348 mutex_enter(&proc_lock); 2349 if ((p = proc_find(pid)) == NULL) { 2350 error = EINVAL; 2351 goto out_locked; 2352 } 2353 mutex_enter(p->p_lock); 2354 2355 /* Check permission. */ 2356 if (type == KERN_PROC_ARGV || type == KERN_PROC_NARGV) 2357 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE, 2358 p, KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ARGS), NULL, NULL); 2359 else if (type == KERN_PROC_ENV || type == KERN_PROC_NENV) 2360 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE, 2361 p, KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENV), NULL, NULL); 2362 else 2363 error = EINVAL; /* XXXGCC */ 2364 if (error) { 2365 mutex_exit(p->p_lock); 2366 goto out_locked; 2367 } 2368 2369 if (oldp == NULL) { 2370 if (type == KERN_PROC_NARGV || type == KERN_PROC_NENV) 2371 *oldlenp = sizeof (int); 2372 else 2373 *oldlenp = ARG_MAX; /* XXX XXX XXX */ 2374 error = 0; 2375 mutex_exit(p->p_lock); 2376 goto out_locked; 2377 } 2378 2379 /* 2380 * Zombies don't have a stack, so we can't read their psstrings. 2381 * System processes also don't have a user stack. 2382 */ 2383 if (P_ZOMBIE(p) || (p->p_flag & PK_SYSTEM) != 0) { 2384 error = EINVAL; 2385 mutex_exit(p->p_lock); 2386 goto out_locked; 2387 } 2388 2389 error = rw_tryenter(&p->p_reflock, RW_READER) ? 0 : EBUSY; 2390 mutex_exit(p->p_lock); 2391 if (error) { 2392 goto out_locked; 2393 } 2394 mutex_exit(&proc_lock); 2395 2396 if (type == KERN_PROC_NARGV || type == KERN_PROC_NENV) { 2397 int value; 2398 if ((error = copyin_psstrings(p, &pss)) == 0) { 2399 if (type == KERN_PROC_NARGV) 2400 value = pss.ps_nargvstr; 2401 else 2402 value = pss.ps_nenvstr; 2403 error = sysctl_copyout(l, &value, oldp, sizeof(value)); 2404 *oldlenp = sizeof(value); 2405 } 2406 } else { 2407 cookie[0] = l; 2408 cookie[1] = oldp; 2409 error = copy_procargs(p, type, oldlenp, 2410 copy_procargs_sysctl_cb, cookie); 2411 } 2412 rw_exit(&p->p_reflock); 2413 sysctl_relock(); 2414 return error; 2415 2416 out_locked: 2417 mutex_exit(&proc_lock); 2418 sysctl_relock(); 2419 return error; 2420 } 2421 2422 int 2423 copy_procargs(struct proc *p, int oid, size_t *limit, 2424 int (*cb)(void *, const void *, size_t, size_t), void *cookie) 2425 { 2426 struct ps_strings pss; 2427 size_t len, i, loaded, entry_len; 2428 struct uio auio; 2429 struct iovec aiov; 2430 int error, argvlen; 2431 char *arg; 2432 char **argv; 2433 vaddr_t user_argv; 2434 struct vmspace *vmspace; 2435 2436 /* 2437 * Allocate a temporary buffer to hold the argument vector and 2438 * the arguments themselve. 2439 */ 2440 arg = kmem_alloc(PAGE_SIZE, KM_SLEEP); 2441 argv = kmem_alloc(PAGE_SIZE, KM_SLEEP); 2442 2443 /* 2444 * Lock the process down in memory. 2445 */ 2446 vmspace = p->p_vmspace; 2447 uvmspace_addref(vmspace); 2448 2449 /* 2450 * Read in the ps_strings structure. 2451 */ 2452 if ((error = copyin_psstrings(p, &pss)) != 0) 2453 goto done; 2454 2455 /* 2456 * Now read the address of the argument vector. 2457 */ 2458 switch (oid) { 2459 case KERN_PROC_ARGV: 2460 user_argv = (uintptr_t)pss.ps_argvstr; 2461 argvlen = pss.ps_nargvstr; 2462 break; 2463 case KERN_PROC_ENV: 2464 user_argv = (uintptr_t)pss.ps_envstr; 2465 argvlen = pss.ps_nenvstr; 2466 break; 2467 default: 2468 error = EINVAL; 2469 goto done; 2470 } 2471 2472 if (argvlen < 0) { 2473 error = EIO; 2474 goto done; 2475 } 2476 2477 2478 /* 2479 * Now copy each string. 2480 */ 2481 len = 0; /* bytes written to user buffer */ 2482 loaded = 0; /* bytes from argv already processed */ 2483 i = 0; /* To make compiler happy */ 2484 entry_len = PROC_PTRSZ(p); 2485 2486 for (; argvlen; --argvlen) { 2487 int finished = 0; 2488 vaddr_t base; 2489 size_t xlen; 2490 int j; 2491 2492 if (loaded == 0) { 2493 size_t rem = entry_len * argvlen; 2494 loaded = MIN(rem, PAGE_SIZE); 2495 error = copyin_vmspace(vmspace, 2496 (const void *)user_argv, argv, loaded); 2497 if (error) 2498 break; 2499 user_argv += loaded; 2500 i = 0; 2501 } 2502 2503 #if !defined(_RUMPKERNEL) 2504 if (p->p_flag & PK_32) 2505 MODULE_HOOK_CALL(kern_proc32_base_hook, 2506 (argv, i++), 0, base); 2507 else 2508 #endif /* !defined(_RUMPKERNEL) */ 2509 base = (vaddr_t)argv[i++]; 2510 loaded -= entry_len; 2511 2512 /* 2513 * The program has messed around with its arguments, 2514 * possibly deleting some, and replacing them with 2515 * NULL's. Treat this as the last argument and not 2516 * a failure. 2517 */ 2518 if (base == 0) 2519 break; 2520 2521 while (!finished) { 2522 xlen = PAGE_SIZE - (base & PAGE_MASK); 2523 2524 aiov.iov_base = arg; 2525 aiov.iov_len = PAGE_SIZE; 2526 auio.uio_iov = &aiov; 2527 auio.uio_iovcnt = 1; 2528 auio.uio_offset = base; 2529 auio.uio_resid = xlen; 2530 auio.uio_rw = UIO_READ; 2531 UIO_SETUP_SYSSPACE(&auio); 2532 error = uvm_io(&vmspace->vm_map, &auio, 0); 2533 if (error) 2534 goto done; 2535 2536 /* Look for the end of the string */ 2537 for (j = 0; j < xlen; j++) { 2538 if (arg[j] == '\0') { 2539 xlen = j + 1; 2540 finished = 1; 2541 break; 2542 } 2543 } 2544 2545 /* Check for user buffer overflow */ 2546 if (len + xlen > *limit) { 2547 finished = 1; 2548 if (len > *limit) 2549 xlen = 0; 2550 else 2551 xlen = *limit - len; 2552 } 2553 2554 /* Copyout the page */ 2555 error = (*cb)(cookie, arg, len, xlen); 2556 if (error) 2557 goto done; 2558 2559 len += xlen; 2560 base += xlen; 2561 } 2562 } 2563 *limit = len; 2564 2565 done: 2566 kmem_free(argv, PAGE_SIZE); 2567 kmem_free(arg, PAGE_SIZE); 2568 uvmspace_free(vmspace); 2569 return error; 2570 } 2571 2572 /* 2573 * Fill in a proc structure for the specified process. 2574 */ 2575 static void 2576 fill_proc(const struct proc *psrc, struct proc *p, bool allowaddr) 2577 { 2578 COND_SET_STRUCT(p->p_list, psrc->p_list, allowaddr); 2579 memset(&p->p_auxlock, 0, sizeof(p->p_auxlock)); 2580 COND_SET_STRUCT(p->p_lock, psrc->p_lock, allowaddr); 2581 memset(&p->p_stmutex, 0, sizeof(p->p_stmutex)); 2582 memset(&p->p_reflock, 0, sizeof(p->p_reflock)); 2583 COND_SET_STRUCT(p->p_waitcv, psrc->p_waitcv, allowaddr); 2584 COND_SET_STRUCT(p->p_lwpcv, psrc->p_lwpcv, allowaddr); 2585 COND_SET_PTR(p->p_cred, psrc->p_cred, allowaddr); 2586 COND_SET_PTR(p->p_fd, psrc->p_fd, allowaddr); 2587 COND_SET_PTR(p->p_cwdi, psrc->p_cwdi, allowaddr); 2588 COND_SET_PTR(p->p_stats, psrc->p_stats, allowaddr); 2589 COND_SET_PTR(p->p_limit, psrc->p_limit, allowaddr); 2590 COND_SET_PTR(p->p_vmspace, psrc->p_vmspace, allowaddr); 2591 COND_SET_PTR(p->p_sigacts, psrc->p_sigacts, allowaddr); 2592 COND_SET_PTR(p->p_aio, psrc->p_aio, allowaddr); 2593 p->p_mqueue_cnt = psrc->p_mqueue_cnt; 2594 memset(&p->p_specdataref, 0, sizeof(p->p_specdataref)); 2595 p->p_exitsig = psrc->p_exitsig; 2596 p->p_flag = psrc->p_flag; 2597 p->p_sflag = psrc->p_sflag; 2598 p->p_slflag = psrc->p_slflag; 2599 p->p_lflag = psrc->p_lflag; 2600 p->p_stflag = psrc->p_stflag; 2601 p->p_stat = psrc->p_stat; 2602 p->p_trace_enabled = psrc->p_trace_enabled; 2603 p->p_pid = psrc->p_pid; 2604 COND_SET_STRUCT(p->p_pglist, psrc->p_pglist, allowaddr); 2605 COND_SET_PTR(p->p_pptr, psrc->p_pptr, allowaddr); 2606 COND_SET_STRUCT(p->p_sibling, psrc->p_sibling, allowaddr); 2607 COND_SET_STRUCT(p->p_children, psrc->p_children, allowaddr); 2608 COND_SET_STRUCT(p->p_lwps, psrc->p_lwps, allowaddr); 2609 COND_SET_PTR(p->p_raslist, psrc->p_raslist, allowaddr); 2610 p->p_nlwps = psrc->p_nlwps; 2611 p->p_nzlwps = psrc->p_nzlwps; 2612 p->p_nrlwps = psrc->p_nrlwps; 2613 p->p_nlwpwait = psrc->p_nlwpwait; 2614 p->p_ndlwps = psrc->p_ndlwps; 2615 p->p_nstopchild = psrc->p_nstopchild; 2616 p->p_waited = psrc->p_waited; 2617 COND_SET_PTR(p->p_zomblwp, psrc->p_zomblwp, allowaddr); 2618 COND_SET_PTR(p->p_vforklwp, psrc->p_vforklwp, allowaddr); 2619 COND_SET_PTR(p->p_sched_info, psrc->p_sched_info, allowaddr); 2620 p->p_estcpu = psrc->p_estcpu; 2621 p->p_estcpu_inherited = psrc->p_estcpu_inherited; 2622 p->p_forktime = psrc->p_forktime; 2623 p->p_pctcpu = psrc->p_pctcpu; 2624 COND_SET_PTR(p->p_opptr, psrc->p_opptr, allowaddr); 2625 COND_SET_PTR(p->p_timers, psrc->p_timers, allowaddr); 2626 p->p_rtime = psrc->p_rtime; 2627 p->p_uticks = psrc->p_uticks; 2628 p->p_sticks = psrc->p_sticks; 2629 p->p_iticks = psrc->p_iticks; 2630 p->p_xutime = psrc->p_xutime; 2631 p->p_xstime = psrc->p_xstime; 2632 p->p_traceflag = psrc->p_traceflag; 2633 COND_SET_PTR(p->p_tracep, psrc->p_tracep, allowaddr); 2634 COND_SET_PTR(p->p_textvp, psrc->p_textvp, allowaddr); 2635 COND_SET_PTR(p->p_emul, psrc->p_emul, allowaddr); 2636 COND_SET_PTR(p->p_emuldata, psrc->p_emuldata, allowaddr); 2637 COND_SET_CPTR(p->p_execsw, psrc->p_execsw, allowaddr); 2638 COND_SET_STRUCT(p->p_klist, psrc->p_klist, allowaddr); 2639 COND_SET_STRUCT(p->p_sigwaiters, psrc->p_sigwaiters, allowaddr); 2640 COND_SET_STRUCT(p->p_sigpend.sp_info, psrc->p_sigpend.sp_info, 2641 allowaddr); 2642 p->p_sigpend.sp_set = psrc->p_sigpend.sp_set; 2643 COND_SET_PTR(p->p_lwpctl, psrc->p_lwpctl, allowaddr); 2644 p->p_ppid = psrc->p_ppid; 2645 p->p_oppid = psrc->p_oppid; 2646 COND_SET_PTR(p->p_path, psrc->p_path, allowaddr); 2647 p->p_sigctx = psrc->p_sigctx; 2648 p->p_nice = psrc->p_nice; 2649 memcpy(p->p_comm, psrc->p_comm, sizeof(p->p_comm)); 2650 COND_SET_PTR(p->p_pgrp, psrc->p_pgrp, allowaddr); 2651 COND_SET_VALUE(p->p_psstrp, psrc->p_psstrp, allowaddr); 2652 p->p_pax = psrc->p_pax; 2653 p->p_xexit = psrc->p_xexit; 2654 p->p_xsig = psrc->p_xsig; 2655 p->p_acflag = psrc->p_acflag; 2656 COND_SET_STRUCT(p->p_md, psrc->p_md, allowaddr); 2657 p->p_stackbase = psrc->p_stackbase; 2658 COND_SET_PTR(p->p_dtrace, psrc->p_dtrace, allowaddr); 2659 } 2660 2661 /* 2662 * Fill in an eproc structure for the specified process. 2663 */ 2664 void 2665 fill_eproc(struct proc *p, struct eproc *ep, bool zombie, bool allowaddr) 2666 { 2667 struct tty *tp; 2668 struct lwp *l; 2669 2670 KASSERT(mutex_owned(&proc_lock)); 2671 KASSERT(mutex_owned(p->p_lock)); 2672 2673 COND_SET_PTR(ep->e_paddr, p, allowaddr); 2674 COND_SET_PTR(ep->e_sess, p->p_session, allowaddr); 2675 if (p->p_cred) { 2676 kauth_cred_topcred(p->p_cred, &ep->e_pcred); 2677 kauth_cred_toucred(p->p_cred, &ep->e_ucred); 2678 } 2679 if (p->p_stat != SIDL && !P_ZOMBIE(p) && !zombie) { 2680 struct vmspace *vm = p->p_vmspace; 2681 2682 ep->e_vm.vm_rssize = vm_resident_count(vm); 2683 ep->e_vm.vm_tsize = vm->vm_tsize; 2684 ep->e_vm.vm_dsize = vm->vm_dsize; 2685 ep->e_vm.vm_ssize = vm->vm_ssize; 2686 ep->e_vm.vm_map.size = vm->vm_map.size; 2687 2688 /* Pick the primary (first) LWP */ 2689 l = proc_active_lwp(p); 2690 KASSERT(l != NULL); 2691 lwp_lock(l); 2692 if (l->l_wchan) 2693 strncpy(ep->e_wmesg, l->l_wmesg, WMESGLEN); 2694 lwp_unlock(l); 2695 } 2696 ep->e_ppid = p->p_ppid; 2697 if (p->p_pgrp && p->p_session) { 2698 ep->e_pgid = p->p_pgrp->pg_id; 2699 ep->e_jobc = p->p_pgrp->pg_jobc; 2700 ep->e_sid = p->p_session->s_sid; 2701 if ((p->p_lflag & PL_CONTROLT) && 2702 (tp = p->p_session->s_ttyp)) { 2703 ep->e_tdev = tp->t_dev; 2704 ep->e_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PGID; 2705 COND_SET_PTR(ep->e_tsess, tp->t_session, allowaddr); 2706 } else 2707 ep->e_tdev = (uint32_t)NODEV; 2708 ep->e_flag = p->p_session->s_ttyvp ? EPROC_CTTY : 0; 2709 if (SESS_LEADER(p)) 2710 ep->e_flag |= EPROC_SLEADER; 2711 strncpy(ep->e_login, p->p_session->s_login, MAXLOGNAME); 2712 } 2713 ep->e_xsize = ep->e_xrssize = 0; 2714 ep->e_xccount = ep->e_xswrss = 0; 2715 } 2716 2717 /* 2718 * Fill in a kinfo_proc2 structure for the specified process. 2719 */ 2720 void 2721 fill_kproc2(struct proc *p, struct kinfo_proc2 *ki, bool zombie, bool allowaddr) 2722 { 2723 struct tty *tp; 2724 struct lwp *l, *l2; 2725 struct timeval ut, st, rt; 2726 sigset_t ss1, ss2; 2727 struct rusage ru; 2728 struct vmspace *vm; 2729 2730 KASSERT(mutex_owned(&proc_lock)); 2731 KASSERT(mutex_owned(p->p_lock)); 2732 2733 sigemptyset(&ss1); 2734 sigemptyset(&ss2); 2735 2736 COND_SET_VALUE(ki->p_paddr, PTRTOUINT64(p), allowaddr); 2737 COND_SET_VALUE(ki->p_fd, PTRTOUINT64(p->p_fd), allowaddr); 2738 COND_SET_VALUE(ki->p_cwdi, PTRTOUINT64(p->p_cwdi), allowaddr); 2739 COND_SET_VALUE(ki->p_stats, PTRTOUINT64(p->p_stats), allowaddr); 2740 COND_SET_VALUE(ki->p_limit, PTRTOUINT64(p->p_limit), allowaddr); 2741 COND_SET_VALUE(ki->p_vmspace, PTRTOUINT64(p->p_vmspace), allowaddr); 2742 COND_SET_VALUE(ki->p_sigacts, PTRTOUINT64(p->p_sigacts), allowaddr); 2743 COND_SET_VALUE(ki->p_sess, PTRTOUINT64(p->p_session), allowaddr); 2744 ki->p_tsess = 0; /* may be changed if controlling tty below */ 2745 COND_SET_VALUE(ki->p_ru, PTRTOUINT64(&p->p_stats->p_ru), allowaddr); 2746 ki->p_eflag = 0; 2747 ki->p_exitsig = p->p_exitsig; 2748 ki->p_flag = L_INMEM; /* Process never swapped out */ 2749 ki->p_flag |= sysctl_map_flags(sysctl_flagmap, p->p_flag); 2750 ki->p_flag |= sysctl_map_flags(sysctl_sflagmap, p->p_sflag); 2751 ki->p_flag |= sysctl_map_flags(sysctl_slflagmap, p->p_slflag); 2752 ki->p_flag |= sysctl_map_flags(sysctl_lflagmap, p->p_lflag); 2753 ki->p_flag |= sysctl_map_flags(sysctl_stflagmap, p->p_stflag); 2754 ki->p_pid = p->p_pid; 2755 ki->p_ppid = p->p_ppid; 2756 ki->p_uid = kauth_cred_geteuid(p->p_cred); 2757 ki->p_ruid = kauth_cred_getuid(p->p_cred); 2758 ki->p_gid = kauth_cred_getegid(p->p_cred); 2759 ki->p_rgid = kauth_cred_getgid(p->p_cred); 2760 ki->p_svuid = kauth_cred_getsvuid(p->p_cred); 2761 ki->p_svgid = kauth_cred_getsvgid(p->p_cred); 2762 ki->p_ngroups = kauth_cred_ngroups(p->p_cred); 2763 kauth_cred_getgroups(p->p_cred, ki->p_groups, 2764 uimin(ki->p_ngroups, sizeof(ki->p_groups) / sizeof(ki->p_groups[0])), 2765 UIO_SYSSPACE); 2766 2767 ki->p_uticks = p->p_uticks; 2768 ki->p_sticks = p->p_sticks; 2769 ki->p_iticks = p->p_iticks; 2770 ki->p_tpgid = NO_PGID; /* may be changed if controlling tty below */ 2771 COND_SET_VALUE(ki->p_tracep, PTRTOUINT64(p->p_tracep), allowaddr); 2772 ki->p_traceflag = p->p_traceflag; 2773 2774 memcpy(&ki->p_sigignore, &p->p_sigctx.ps_sigignore,sizeof(ki_sigset_t)); 2775 memcpy(&ki->p_sigcatch, &p->p_sigctx.ps_sigcatch, sizeof(ki_sigset_t)); 2776 2777 ki->p_cpticks = 0; 2778 ki->p_pctcpu = p->p_pctcpu; 2779 ki->p_estcpu = 0; 2780 ki->p_stat = p->p_stat; /* Will likely be overridden by LWP status */ 2781 ki->p_realstat = p->p_stat; 2782 ki->p_nice = p->p_nice; 2783 ki->p_xstat = P_WAITSTATUS(p); 2784 ki->p_acflag = p->p_acflag; 2785 2786 strncpy(ki->p_comm, p->p_comm, 2787 uimin(sizeof(ki->p_comm), sizeof(p->p_comm))); 2788 strncpy(ki->p_ename, p->p_emul->e_name, sizeof(ki->p_ename)); 2789 2790 ki->p_nlwps = p->p_nlwps; 2791 ki->p_realflag = ki->p_flag; 2792 2793 if (p->p_stat != SIDL && !P_ZOMBIE(p) && !zombie) { 2794 vm = p->p_vmspace; 2795 ki->p_vm_rssize = vm_resident_count(vm); 2796 ki->p_vm_tsize = vm->vm_tsize; 2797 ki->p_vm_dsize = vm->vm_dsize; 2798 ki->p_vm_ssize = vm->vm_ssize; 2799 ki->p_vm_vsize = atop(vm->vm_map.size); 2800 /* 2801 * Since the stack is initially mapped mostly with 2802 * PROT_NONE and grown as needed, adjust the "mapped size" 2803 * to skip the unused stack portion. 2804 */ 2805 ki->p_vm_msize = 2806 atop(vm->vm_map.size) - vm->vm_issize + vm->vm_ssize; 2807 2808 /* Pick the primary (first) LWP */ 2809 l = proc_active_lwp(p); 2810 KASSERT(l != NULL); 2811 lwp_lock(l); 2812 ki->p_nrlwps = p->p_nrlwps; 2813 ki->p_forw = 0; 2814 ki->p_back = 0; 2815 COND_SET_VALUE(ki->p_addr, PTRTOUINT64(l->l_addr), allowaddr); 2816 ki->p_stat = l->l_stat; 2817 ki->p_flag |= sysctl_map_flags(sysctl_lwpflagmap, l->l_flag); 2818 ki->p_swtime = l->l_swtime; 2819 ki->p_slptime = l->l_slptime; 2820 if (l->l_stat == LSONPROC) 2821 ki->p_schedflags = l->l_cpu->ci_schedstate.spc_flags; 2822 else 2823 ki->p_schedflags = 0; 2824 ki->p_priority = lwp_eprio(l); 2825 ki->p_usrpri = l->l_priority; 2826 if (l->l_wchan) 2827 strncpy(ki->p_wmesg, l->l_wmesg, sizeof(ki->p_wmesg)); 2828 COND_SET_VALUE(ki->p_wchan, PTRTOUINT64(l->l_wchan), allowaddr); 2829 ki->p_cpuid = cpu_index(l->l_cpu); 2830 lwp_unlock(l); 2831 LIST_FOREACH(l, &p->p_lwps, l_sibling) { 2832 /* This is hardly correct, but... */ 2833 sigplusset(&l->l_sigpend.sp_set, &ss1); 2834 sigplusset(&l->l_sigmask, &ss2); 2835 ki->p_cpticks += l->l_cpticks; 2836 ki->p_pctcpu += l->l_pctcpu; 2837 ki->p_estcpu += l->l_estcpu; 2838 } 2839 } 2840 sigplusset(&p->p_sigpend.sp_set, &ss1); 2841 memcpy(&ki->p_siglist, &ss1, sizeof(ki_sigset_t)); 2842 memcpy(&ki->p_sigmask, &ss2, sizeof(ki_sigset_t)); 2843 2844 if (p->p_session != NULL) { 2845 ki->p_sid = p->p_session->s_sid; 2846 ki->p__pgid = p->p_pgrp->pg_id; 2847 if (p->p_session->s_ttyvp) 2848 ki->p_eflag |= EPROC_CTTY; 2849 if (SESS_LEADER(p)) 2850 ki->p_eflag |= EPROC_SLEADER; 2851 strncpy(ki->p_login, p->p_session->s_login, 2852 uimin(sizeof ki->p_login - 1, sizeof p->p_session->s_login)); 2853 ki->p_jobc = p->p_pgrp->pg_jobc; 2854 if ((p->p_lflag & PL_CONTROLT) && (tp = p->p_session->s_ttyp)) { 2855 ki->p_tdev = tp->t_dev; 2856 ki->p_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PGID; 2857 COND_SET_VALUE(ki->p_tsess, PTRTOUINT64(tp->t_session), 2858 allowaddr); 2859 } else { 2860 ki->p_tdev = (int32_t)NODEV; 2861 } 2862 } 2863 2864 if (!P_ZOMBIE(p) && !zombie) { 2865 ki->p_uvalid = 1; 2866 ki->p_ustart_sec = p->p_stats->p_start.tv_sec; 2867 ki->p_ustart_usec = p->p_stats->p_start.tv_usec; 2868 2869 calcru(p, &ut, &st, NULL, &rt); 2870 ki->p_rtime_sec = rt.tv_sec; 2871 ki->p_rtime_usec = rt.tv_usec; 2872 ki->p_uutime_sec = ut.tv_sec; 2873 ki->p_uutime_usec = ut.tv_usec; 2874 ki->p_ustime_sec = st.tv_sec; 2875 ki->p_ustime_usec = st.tv_usec; 2876 2877 memcpy(&ru, &p->p_stats->p_ru, sizeof(ru)); 2878 ki->p_uru_nvcsw = 0; 2879 ki->p_uru_nivcsw = 0; 2880 LIST_FOREACH(l2, &p->p_lwps, l_sibling) { 2881 ki->p_uru_nvcsw += (l2->l_ncsw - l2->l_nivcsw); 2882 ki->p_uru_nivcsw += l2->l_nivcsw; 2883 ruadd(&ru, &l2->l_ru); 2884 } 2885 ki->p_uru_maxrss = ru.ru_maxrss; 2886 ki->p_uru_ixrss = ru.ru_ixrss; 2887 ki->p_uru_idrss = ru.ru_idrss; 2888 ki->p_uru_isrss = ru.ru_isrss; 2889 ki->p_uru_minflt = ru.ru_minflt; 2890 ki->p_uru_majflt = ru.ru_majflt; 2891 ki->p_uru_nswap = ru.ru_nswap; 2892 ki->p_uru_inblock = ru.ru_inblock; 2893 ki->p_uru_oublock = ru.ru_oublock; 2894 ki->p_uru_msgsnd = ru.ru_msgsnd; 2895 ki->p_uru_msgrcv = ru.ru_msgrcv; 2896 ki->p_uru_nsignals = ru.ru_nsignals; 2897 2898 timeradd(&p->p_stats->p_cru.ru_utime, 2899 &p->p_stats->p_cru.ru_stime, &ut); 2900 ki->p_uctime_sec = ut.tv_sec; 2901 ki->p_uctime_usec = ut.tv_usec; 2902 } 2903 } 2904 2905 2906 int 2907 proc_find_locked(struct lwp *l, struct proc **p, pid_t pid) 2908 { 2909 int error; 2910 2911 mutex_enter(&proc_lock); 2912 if (pid == -1) 2913 *p = l->l_proc; 2914 else 2915 *p = proc_find(pid); 2916 2917 if (*p == NULL) { 2918 if (pid != -1) 2919 mutex_exit(&proc_lock); 2920 return ESRCH; 2921 } 2922 if (pid != -1) 2923 mutex_enter((*p)->p_lock); 2924 mutex_exit(&proc_lock); 2925 2926 error = kauth_authorize_process(l->l_cred, 2927 KAUTH_PROCESS_CANSEE, *p, 2928 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL); 2929 if (error) { 2930 if (pid != -1) 2931 mutex_exit((*p)->p_lock); 2932 } 2933 return error; 2934 } 2935 2936 static int 2937 fill_pathname(struct lwp *l, pid_t pid, void *oldp, size_t *oldlenp) 2938 { 2939 int error; 2940 struct proc *p; 2941 2942 if ((error = proc_find_locked(l, &p, pid)) != 0) 2943 return error; 2944 2945 if (p->p_path == NULL) { 2946 if (pid != -1) 2947 mutex_exit(p->p_lock); 2948 return ENOENT; 2949 } 2950 2951 size_t len = strlen(p->p_path) + 1; 2952 if (oldp != NULL) { 2953 size_t copylen = uimin(len, *oldlenp); 2954 error = sysctl_copyout(l, p->p_path, oldp, copylen); 2955 if (error == 0 && *oldlenp < len) 2956 error = ENOSPC; 2957 } 2958 *oldlenp = len; 2959 if (pid != -1) 2960 mutex_exit(p->p_lock); 2961 return error; 2962 } 2963 2964 static int 2965 fill_cwd(struct lwp *l, pid_t pid, void *oldp, size_t *oldlenp) 2966 { 2967 int error; 2968 struct proc *p; 2969 char *path; 2970 char *bp, *bend; 2971 struct cwdinfo *cwdi; 2972 struct vnode *vp; 2973 size_t len, lenused; 2974 2975 if ((error = proc_find_locked(l, &p, pid)) != 0) 2976 return error; 2977 2978 len = MAXPATHLEN * 4; 2979 2980 path = kmem_alloc(len, KM_SLEEP); 2981 2982 bp = &path[len]; 2983 bend = bp; 2984 *(--bp) = '\0'; 2985 2986 cwdi = p->p_cwdi; 2987 rw_enter(&cwdi->cwdi_lock, RW_READER); 2988 vp = cwdi->cwdi_cdir; 2989 error = getcwd_common(vp, NULL, &bp, path, len/2, 0, l); 2990 rw_exit(&cwdi->cwdi_lock); 2991 2992 if (error) 2993 goto out; 2994 2995 lenused = bend - bp; 2996 2997 if (oldp != NULL) { 2998 size_t copylen = uimin(lenused, *oldlenp); 2999 error = sysctl_copyout(l, bp, oldp, copylen); 3000 if (error == 0 && *oldlenp < lenused) 3001 error = ENOSPC; 3002 } 3003 *oldlenp = lenused; 3004 out: 3005 if (pid != -1) 3006 mutex_exit(p->p_lock); 3007 kmem_free(path, len); 3008 return error; 3009 } 3010 3011 int 3012 proc_getauxv(struct proc *p, void **buf, size_t *len) 3013 { 3014 struct ps_strings pss; 3015 int error; 3016 void *uauxv, *kauxv; 3017 size_t size; 3018 3019 if ((error = copyin_psstrings(p, &pss)) != 0) 3020 return error; 3021 if (pss.ps_envstr == NULL) 3022 return EIO; 3023 3024 size = p->p_execsw->es_arglen; 3025 if (size == 0) 3026 return EIO; 3027 3028 size_t ptrsz = PROC_PTRSZ(p); 3029 uauxv = (void *)((char *)pss.ps_envstr + (pss.ps_nenvstr + 1) * ptrsz); 3030 3031 kauxv = kmem_alloc(size, KM_SLEEP); 3032 3033 error = copyin_proc(p, uauxv, kauxv, size); 3034 if (error) { 3035 kmem_free(kauxv, size); 3036 return error; 3037 } 3038 3039 *buf = kauxv; 3040 *len = size; 3041 3042 return 0; 3043 } 3044 3045 3046 static int 3047 sysctl_security_expose_address(SYSCTLFN_ARGS) 3048 { 3049 int expose_address, error; 3050 struct sysctlnode node; 3051 3052 node = *rnode; 3053 node.sysctl_data = &expose_address; 3054 expose_address = *(int *)rnode->sysctl_data; 3055 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 3056 if (error || newp == NULL) 3057 return error; 3058 3059 if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_KERNADDR, 3060 0, NULL, NULL, NULL)) 3061 return EPERM; 3062 3063 switch (expose_address) { 3064 case 0: 3065 case 1: 3066 case 2: 3067 break; 3068 default: 3069 return EINVAL; 3070 } 3071 3072 *(int *)rnode->sysctl_data = expose_address; 3073 3074 return 0; 3075 } 3076 3077 bool 3078 get_expose_address(struct proc *p) 3079 { 3080 /* allow only if sysctl variable is set or privileged */ 3081 return kauth_authorize_process(kauth_cred_get(), KAUTH_PROCESS_CANSEE, 3082 p, KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_KPTR), NULL, NULL) == 0; 3083 } 3084