1 /* $NetBSD: kern_proc.c,v 1.262 2020/12/24 12:14:50 nia Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2006, 2007, 2008, 2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center, and by Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1982, 1986, 1989, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)kern_proc.c 8.7 (Berkeley) 2/14/95 62 */ 63 64 #include <sys/cdefs.h> 65 __KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.262 2020/12/24 12:14:50 nia Exp $"); 66 67 #ifdef _KERNEL_OPT 68 #include "opt_kstack.h" 69 #include "opt_maxuprc.h" 70 #include "opt_dtrace.h" 71 #include "opt_compat_netbsd32.h" 72 #include "opt_kaslr.h" 73 #endif 74 75 #if defined(__HAVE_COMPAT_NETBSD32) && !defined(COMPAT_NETBSD32) \ 76 && !defined(_RUMPKERNEL) 77 #define COMPAT_NETBSD32 78 #endif 79 80 #include <sys/param.h> 81 #include <sys/systm.h> 82 #include <sys/kernel.h> 83 #include <sys/proc.h> 84 #include <sys/resourcevar.h> 85 #include <sys/buf.h> 86 #include <sys/acct.h> 87 #include <sys/wait.h> 88 #include <sys/file.h> 89 #include <ufs/ufs/quota.h> 90 #include <sys/uio.h> 91 #include <sys/pool.h> 92 #include <sys/pset.h> 93 #include <sys/ioctl.h> 94 #include <sys/tty.h> 95 #include <sys/signalvar.h> 96 #include <sys/ras.h> 97 #include <sys/filedesc.h> 98 #include <sys/syscall_stats.h> 99 #include <sys/kauth.h> 100 #include <sys/sleepq.h> 101 #include <sys/atomic.h> 102 #include <sys/kmem.h> 103 #include <sys/namei.h> 104 #include <sys/dtrace_bsd.h> 105 #include <sys/sysctl.h> 106 #include <sys/exec.h> 107 #include <sys/cpu.h> 108 #include <sys/compat_stub.h> 109 #include <sys/futex.h> 110 #include <sys/pserialize.h> 111 112 #include <uvm/uvm_extern.h> 113 114 /* 115 * Process lists. 116 */ 117 118 struct proclist allproc __cacheline_aligned; 119 struct proclist zombproc __cacheline_aligned; 120 121 kmutex_t proc_lock __cacheline_aligned; 122 static pserialize_t proc_psz; 123 124 /* 125 * pid to lwp/proc lookup is done by indexing the pid_table array. 126 * Since pid numbers are only allocated when an empty slot 127 * has been found, there is no need to search any lists ever. 128 * (an orphaned pgrp will lock the slot, a session will lock 129 * the pgrp with the same number.) 130 * If the table is too small it is reallocated with twice the 131 * previous size and the entries 'unzipped' into the two halves. 132 * A linked list of free entries is passed through the pt_lwp 133 * field of 'free' items - set odd to be an invalid ptr. Two 134 * additional bits are also used to indicate if the slot is 135 * currently occupied by a proc or lwp, and if the PID is 136 * hidden from certain kinds of lookups. We thus require a 137 * minimum alignment for proc and lwp structures (LWPs are 138 * at least 32-byte aligned). 139 */ 140 141 struct pid_table { 142 uintptr_t pt_slot; 143 struct pgrp *pt_pgrp; 144 pid_t pt_pid; 145 }; 146 147 #define PT_F_FREE ((uintptr_t)__BIT(0)) 148 #define PT_F_LWP 0 /* pseudo-flag */ 149 #define PT_F_PROC ((uintptr_t)__BIT(1)) 150 151 #define PT_F_TYPEBITS (PT_F_FREE|PT_F_PROC) 152 #define PT_F_ALLBITS (PT_F_FREE|PT_F_PROC) 153 154 #define PT_VALID(s) (((s) & PT_F_FREE) == 0) 155 #define PT_RESERVED(s) ((s) == 0) 156 #define PT_NEXT(s) ((u_int)(s) >> 1) 157 #define PT_SET_FREE(pid) (((pid) << 1) | PT_F_FREE) 158 #define PT_SET_LWP(l) ((uintptr_t)(l)) 159 #define PT_SET_PROC(p) (((uintptr_t)(p)) | PT_F_PROC) 160 #define PT_SET_RESERVED 0 161 #define PT_GET_LWP(s) ((struct lwp *)((s) & ~PT_F_ALLBITS)) 162 #define PT_GET_PROC(s) ((struct proc *)((s) & ~PT_F_ALLBITS)) 163 #define PT_GET_TYPE(s) ((s) & PT_F_TYPEBITS) 164 #define PT_IS_LWP(s) (PT_GET_TYPE(s) == PT_F_LWP && (s) != 0) 165 #define PT_IS_PROC(s) (PT_GET_TYPE(s) == PT_F_PROC) 166 167 #define MIN_PROC_ALIGNMENT (PT_F_ALLBITS + 1) 168 169 /* 170 * Table of process IDs (PIDs). 171 */ 172 static struct pid_table *pid_table __read_mostly; 173 174 #define INITIAL_PID_TABLE_SIZE (1 << 5) 175 176 /* Table mask, threshold for growing and number of allocated PIDs. */ 177 static u_int pid_tbl_mask __read_mostly; 178 static u_int pid_alloc_lim __read_mostly; 179 static u_int pid_alloc_cnt __cacheline_aligned; 180 181 /* Next free, last free and maximum PIDs. */ 182 static u_int next_free_pt __cacheline_aligned; 183 static u_int last_free_pt __cacheline_aligned; 184 static pid_t pid_max __read_mostly; 185 186 /* Components of the first process -- never freed. */ 187 188 extern struct emul emul_netbsd; /* defined in kern_exec.c */ 189 190 struct session session0 = { 191 .s_count = 1, 192 .s_sid = 0, 193 }; 194 struct pgrp pgrp0 = { 195 .pg_members = LIST_HEAD_INITIALIZER(&pgrp0.pg_members), 196 .pg_session = &session0, 197 }; 198 filedesc_t filedesc0; 199 struct cwdinfo cwdi0 = { 200 .cwdi_cmask = CMASK, 201 .cwdi_refcnt = 1, 202 }; 203 struct plimit limit0; 204 struct pstats pstat0; 205 struct vmspace vmspace0; 206 struct sigacts sigacts0; 207 struct proc proc0 = { 208 .p_lwps = LIST_HEAD_INITIALIZER(&proc0.p_lwps), 209 .p_sigwaiters = LIST_HEAD_INITIALIZER(&proc0.p_sigwaiters), 210 .p_nlwps = 1, 211 .p_nrlwps = 1, 212 .p_pgrp = &pgrp0, 213 .p_comm = "system", 214 /* 215 * Set P_NOCLDWAIT so that kernel threads are reparented to init(8) 216 * when they exit. init(8) can easily wait them out for us. 217 */ 218 .p_flag = PK_SYSTEM | PK_NOCLDWAIT, 219 .p_stat = SACTIVE, 220 .p_nice = NZERO, 221 .p_emul = &emul_netbsd, 222 .p_cwdi = &cwdi0, 223 .p_limit = &limit0, 224 .p_fd = &filedesc0, 225 .p_vmspace = &vmspace0, 226 .p_stats = &pstat0, 227 .p_sigacts = &sigacts0, 228 #ifdef PROC0_MD_INITIALIZERS 229 PROC0_MD_INITIALIZERS 230 #endif 231 }; 232 kauth_cred_t cred0; 233 234 static const int nofile = NOFILE; 235 static const int maxuprc = MAXUPRC; 236 237 static int sysctl_doeproc(SYSCTLFN_PROTO); 238 static int sysctl_kern_proc_args(SYSCTLFN_PROTO); 239 static int sysctl_security_expose_address(SYSCTLFN_PROTO); 240 241 #ifdef KASLR 242 static int kern_expose_address = 0; 243 #else 244 static int kern_expose_address = 1; 245 #endif 246 /* 247 * The process list descriptors, used during pid allocation and 248 * by sysctl. No locking on this data structure is needed since 249 * it is completely static. 250 */ 251 const struct proclist_desc proclists[] = { 252 { &allproc }, 253 { &zombproc }, 254 { NULL }, 255 }; 256 257 static struct pgrp * pg_remove(pid_t); 258 static void pg_delete(pid_t); 259 static void orphanpg(struct pgrp *); 260 261 static specificdata_domain_t proc_specificdata_domain; 262 263 static pool_cache_t proc_cache; 264 265 static kauth_listener_t proc_listener; 266 267 static void fill_proc(const struct proc *, struct proc *, bool); 268 static int fill_pathname(struct lwp *, pid_t, void *, size_t *); 269 static int fill_cwd(struct lwp *, pid_t, void *, size_t *); 270 271 static int 272 proc_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, 273 void *arg0, void *arg1, void *arg2, void *arg3) 274 { 275 struct proc *p; 276 int result; 277 278 result = KAUTH_RESULT_DEFER; 279 p = arg0; 280 281 switch (action) { 282 case KAUTH_PROCESS_CANSEE: { 283 enum kauth_process_req req; 284 285 req = (enum kauth_process_req)(uintptr_t)arg1; 286 287 switch (req) { 288 case KAUTH_REQ_PROCESS_CANSEE_ARGS: 289 case KAUTH_REQ_PROCESS_CANSEE_ENTRY: 290 case KAUTH_REQ_PROCESS_CANSEE_OPENFILES: 291 case KAUTH_REQ_PROCESS_CANSEE_EPROC: 292 result = KAUTH_RESULT_ALLOW; 293 break; 294 295 case KAUTH_REQ_PROCESS_CANSEE_ENV: 296 if (kauth_cred_getuid(cred) != 297 kauth_cred_getuid(p->p_cred) || 298 kauth_cred_getuid(cred) != 299 kauth_cred_getsvuid(p->p_cred)) 300 break; 301 302 result = KAUTH_RESULT_ALLOW; 303 304 break; 305 306 case KAUTH_REQ_PROCESS_CANSEE_KPTR: 307 if (!kern_expose_address) 308 break; 309 310 if (kern_expose_address == 1 && !(p->p_flag & PK_KMEM)) 311 break; 312 313 result = KAUTH_RESULT_ALLOW; 314 315 break; 316 317 default: 318 break; 319 } 320 321 break; 322 } 323 324 case KAUTH_PROCESS_FORK: { 325 int lnprocs = (int)(unsigned long)arg2; 326 327 /* 328 * Don't allow a nonprivileged user to use the last few 329 * processes. The variable lnprocs is the current number of 330 * processes, maxproc is the limit. 331 */ 332 if (__predict_false((lnprocs >= maxproc - 5))) 333 break; 334 335 result = KAUTH_RESULT_ALLOW; 336 337 break; 338 } 339 340 case KAUTH_PROCESS_CORENAME: 341 case KAUTH_PROCESS_STOPFLAG: 342 if (proc_uidmatch(cred, p->p_cred) == 0) 343 result = KAUTH_RESULT_ALLOW; 344 345 break; 346 347 default: 348 break; 349 } 350 351 return result; 352 } 353 354 static int 355 proc_ctor(void *arg __unused, void *obj, int flags __unused) 356 { 357 memset(obj, 0, sizeof(struct proc)); 358 return 0; 359 } 360 361 static pid_t proc_alloc_pid_slot(struct proc *, uintptr_t); 362 363 /* 364 * Initialize global process hashing structures. 365 */ 366 void 367 procinit(void) 368 { 369 const struct proclist_desc *pd; 370 u_int i; 371 #define LINK_EMPTY ((PID_MAX + INITIAL_PID_TABLE_SIZE) & ~(INITIAL_PID_TABLE_SIZE - 1)) 372 373 for (pd = proclists; pd->pd_list != NULL; pd++) 374 LIST_INIT(pd->pd_list); 375 376 mutex_init(&proc_lock, MUTEX_DEFAULT, IPL_NONE); 377 378 proc_psz = pserialize_create(); 379 380 pid_table = kmem_alloc(INITIAL_PID_TABLE_SIZE 381 * sizeof(struct pid_table), KM_SLEEP); 382 pid_tbl_mask = INITIAL_PID_TABLE_SIZE - 1; 383 pid_max = PID_MAX; 384 385 /* Set free list running through table... 386 Preset 'use count' above PID_MAX so we allocate pid 1 next. */ 387 for (i = 0; i <= pid_tbl_mask; i++) { 388 pid_table[i].pt_slot = PT_SET_FREE(LINK_EMPTY + i + 1); 389 pid_table[i].pt_pgrp = 0; 390 pid_table[i].pt_pid = 0; 391 } 392 /* slot 0 is just grabbed */ 393 next_free_pt = 1; 394 /* Need to fix last entry. */ 395 last_free_pt = pid_tbl_mask; 396 pid_table[last_free_pt].pt_slot = PT_SET_FREE(LINK_EMPTY); 397 /* point at which we grow table - to avoid reusing pids too often */ 398 pid_alloc_lim = pid_tbl_mask - 1; 399 #undef LINK_EMPTY 400 401 /* Reserve PID 1 for init(8). */ /* XXX slightly gross */ 402 mutex_enter(&proc_lock); 403 if (proc_alloc_pid_slot(&proc0, PT_SET_RESERVED) != 1) 404 panic("failed to reserve PID 1 for init(8)"); 405 mutex_exit(&proc_lock); 406 407 proc_specificdata_domain = specificdata_domain_create(); 408 KASSERT(proc_specificdata_domain != NULL); 409 410 size_t proc_alignment = coherency_unit; 411 if (proc_alignment < MIN_PROC_ALIGNMENT) 412 proc_alignment = MIN_PROC_ALIGNMENT; 413 414 proc_cache = pool_cache_init(sizeof(struct proc), proc_alignment, 0, 0, 415 "procpl", NULL, IPL_NONE, proc_ctor, NULL, NULL); 416 417 proc_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS, 418 proc_listener_cb, NULL); 419 } 420 421 void 422 procinit_sysctl(void) 423 { 424 static struct sysctllog *clog; 425 426 sysctl_createv(&clog, 0, NULL, NULL, 427 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 428 CTLTYPE_INT, "expose_address", 429 SYSCTL_DESCR("Enable exposing kernel addresses"), 430 sysctl_security_expose_address, 0, 431 &kern_expose_address, 0, CTL_KERN, CTL_CREATE, CTL_EOL); 432 sysctl_createv(&clog, 0, NULL, NULL, 433 CTLFLAG_PERMANENT, 434 CTLTYPE_NODE, "proc", 435 SYSCTL_DESCR("System-wide process information"), 436 sysctl_doeproc, 0, NULL, 0, 437 CTL_KERN, KERN_PROC, CTL_EOL); 438 sysctl_createv(&clog, 0, NULL, NULL, 439 CTLFLAG_PERMANENT, 440 CTLTYPE_NODE, "proc2", 441 SYSCTL_DESCR("Machine-independent process information"), 442 sysctl_doeproc, 0, NULL, 0, 443 CTL_KERN, KERN_PROC2, CTL_EOL); 444 sysctl_createv(&clog, 0, NULL, NULL, 445 CTLFLAG_PERMANENT, 446 CTLTYPE_NODE, "proc_args", 447 SYSCTL_DESCR("Process argument information"), 448 sysctl_kern_proc_args, 0, NULL, 0, 449 CTL_KERN, KERN_PROC_ARGS, CTL_EOL); 450 451 /* 452 "nodes" under these: 453 454 KERN_PROC_ALL 455 KERN_PROC_PID pid 456 KERN_PROC_PGRP pgrp 457 KERN_PROC_SESSION sess 458 KERN_PROC_TTY tty 459 KERN_PROC_UID uid 460 KERN_PROC_RUID uid 461 KERN_PROC_GID gid 462 KERN_PROC_RGID gid 463 464 all in all, probably not worth the effort... 465 */ 466 } 467 468 /* 469 * Initialize process 0. 470 */ 471 void 472 proc0_init(void) 473 { 474 struct proc *p; 475 struct pgrp *pg; 476 struct rlimit *rlim; 477 rlim_t lim; 478 int i; 479 480 p = &proc0; 481 pg = &pgrp0; 482 483 mutex_init(&p->p_stmutex, MUTEX_DEFAULT, IPL_HIGH); 484 mutex_init(&p->p_auxlock, MUTEX_DEFAULT, IPL_NONE); 485 p->p_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 486 487 rw_init(&p->p_reflock); 488 cv_init(&p->p_waitcv, "wait"); 489 cv_init(&p->p_lwpcv, "lwpwait"); 490 491 LIST_INSERT_HEAD(&p->p_lwps, &lwp0, l_sibling); 492 493 KASSERT(lwp0.l_lid == 0); 494 pid_table[lwp0.l_lid].pt_slot = PT_SET_LWP(&lwp0); 495 LIST_INSERT_HEAD(&allproc, p, p_list); 496 497 pid_table[lwp0.l_lid].pt_pgrp = pg; 498 LIST_INSERT_HEAD(&pg->pg_members, p, p_pglist); 499 500 #ifdef __HAVE_SYSCALL_INTERN 501 (*p->p_emul->e_syscall_intern)(p); 502 #endif 503 504 /* Create credentials. */ 505 cred0 = kauth_cred_alloc(); 506 p->p_cred = cred0; 507 508 /* Create the CWD info. */ 509 rw_init(&cwdi0.cwdi_lock); 510 511 /* Create the limits structures. */ 512 mutex_init(&limit0.pl_lock, MUTEX_DEFAULT, IPL_NONE); 513 514 rlim = limit0.pl_rlimit; 515 for (i = 0; i < __arraycount(limit0.pl_rlimit); i++) { 516 rlim[i].rlim_cur = RLIM_INFINITY; 517 rlim[i].rlim_max = RLIM_INFINITY; 518 } 519 520 rlim[RLIMIT_NOFILE].rlim_max = maxfiles; 521 rlim[RLIMIT_NOFILE].rlim_cur = maxfiles < nofile ? maxfiles : nofile; 522 523 rlim[RLIMIT_NPROC].rlim_max = maxproc; 524 rlim[RLIMIT_NPROC].rlim_cur = maxproc < maxuprc ? maxproc : maxuprc; 525 526 lim = MIN(VM_MAXUSER_ADDRESS, ctob((rlim_t)uvm_availmem(false))); 527 rlim[RLIMIT_RSS].rlim_max = lim; 528 rlim[RLIMIT_MEMLOCK].rlim_max = lim; 529 rlim[RLIMIT_MEMLOCK].rlim_cur = lim / 3; 530 531 rlim[RLIMIT_NTHR].rlim_max = maxlwp; 532 rlim[RLIMIT_NTHR].rlim_cur = maxlwp < maxuprc ? maxlwp : maxuprc; 533 534 /* Note that default core name has zero length. */ 535 limit0.pl_corename = defcorename; 536 limit0.pl_cnlen = 0; 537 limit0.pl_refcnt = 1; 538 limit0.pl_writeable = false; 539 limit0.pl_sv_limit = NULL; 540 541 /* Configure virtual memory system, set vm rlimits. */ 542 uvm_init_limits(p); 543 544 /* Initialize file descriptor table for proc0. */ 545 fd_init(&filedesc0); 546 547 /* 548 * Initialize proc0's vmspace, which uses the kernel pmap. 549 * All kernel processes (which never have user space mappings) 550 * share proc0's vmspace, and thus, the kernel pmap. 551 */ 552 uvmspace_init(&vmspace0, pmap_kernel(), round_page(VM_MIN_ADDRESS), 553 trunc_page(VM_MAXUSER_ADDRESS), 554 #ifdef __USE_TOPDOWN_VM 555 true 556 #else 557 false 558 #endif 559 ); 560 561 /* Initialize signal state for proc0. XXX IPL_SCHED */ 562 mutex_init(&p->p_sigacts->sa_mutex, MUTEX_DEFAULT, IPL_SCHED); 563 siginit(p); 564 565 proc_initspecific(p); 566 kdtrace_proc_ctor(NULL, p); 567 } 568 569 /* 570 * Session reference counting. 571 */ 572 573 void 574 proc_sesshold(struct session *ss) 575 { 576 577 KASSERT(mutex_owned(&proc_lock)); 578 ss->s_count++; 579 } 580 581 void 582 proc_sessrele(struct session *ss) 583 { 584 struct pgrp *pg; 585 586 KASSERT(mutex_owned(&proc_lock)); 587 KASSERT(ss->s_count > 0); 588 589 /* 590 * We keep the pgrp with the same id as the session in order to 591 * stop a process being given the same pid. Since the pgrp holds 592 * a reference to the session, it must be a 'zombie' pgrp by now. 593 */ 594 if (--ss->s_count == 0) { 595 pg = pg_remove(ss->s_sid); 596 } else { 597 pg = NULL; 598 ss = NULL; 599 } 600 601 mutex_exit(&proc_lock); 602 603 if (pg) 604 kmem_free(pg, sizeof(struct pgrp)); 605 if (ss) 606 kmem_free(ss, sizeof(struct session)); 607 } 608 609 /* 610 * Check that the specified process group is in the session of the 611 * specified process. 612 * Treats -ve ids as process ids. 613 * Used to validate TIOCSPGRP requests. 614 */ 615 int 616 pgid_in_session(struct proc *p, pid_t pg_id) 617 { 618 struct pgrp *pgrp; 619 struct session *session; 620 int error; 621 622 if (pg_id == INT_MIN) 623 return EINVAL; 624 625 mutex_enter(&proc_lock); 626 if (pg_id < 0) { 627 struct proc *p1 = proc_find(-pg_id); 628 if (p1 == NULL) { 629 error = EINVAL; 630 goto fail; 631 } 632 pgrp = p1->p_pgrp; 633 } else { 634 pgrp = pgrp_find(pg_id); 635 if (pgrp == NULL) { 636 error = EINVAL; 637 goto fail; 638 } 639 } 640 session = pgrp->pg_session; 641 error = (session != p->p_pgrp->pg_session) ? EPERM : 0; 642 fail: 643 mutex_exit(&proc_lock); 644 return error; 645 } 646 647 /* 648 * p_inferior: is p an inferior of q? 649 */ 650 static inline bool 651 p_inferior(struct proc *p, struct proc *q) 652 { 653 654 KASSERT(mutex_owned(&proc_lock)); 655 656 for (; p != q; p = p->p_pptr) 657 if (p->p_pid == 0) 658 return false; 659 return true; 660 } 661 662 /* 663 * proc_find_lwp: locate an lwp in said proc by the ID. 664 * 665 * => Must be called with p::p_lock held. 666 * => LSIDL lwps are not returned because they are only partially 667 * constructed while occupying the slot. 668 * => Callers need to be careful about lwp::l_stat of the returned 669 * lwp. 670 */ 671 struct lwp * 672 proc_find_lwp(proc_t *p, pid_t pid) 673 { 674 struct pid_table *pt; 675 struct lwp *l = NULL; 676 uintptr_t slot; 677 int s; 678 679 KASSERT(mutex_owned(p->p_lock)); 680 681 /* 682 * Look in the pid_table. This is done unlocked inside a pserialize 683 * read section covering pid_table's memory allocation only, so take 684 * care to read the slot atomically and only once. This issues a 685 * memory barrier for dependent loads on alpha. 686 */ 687 s = pserialize_read_enter(); 688 pt = &pid_table[pid & pid_tbl_mask]; 689 slot = atomic_load_consume(&pt->pt_slot); 690 if (__predict_false(!PT_IS_LWP(slot))) { 691 pserialize_read_exit(s); 692 return NULL; 693 } 694 695 /* 696 * Check to see if the LWP is from the correct process. We won't 697 * see entries in pid_table from a prior process that also used "p", 698 * by virtue of the fact that allocating "p" means all prior updates 699 * to dependant data structures are visible to this thread. 700 */ 701 l = PT_GET_LWP(slot); 702 if (__predict_false(atomic_load_relaxed(&l->l_proc) != p)) { 703 pserialize_read_exit(s); 704 return NULL; 705 } 706 707 /* 708 * We now know that p->p_lock holds this LWP stable. 709 * 710 * If the status is not LSIDL, it means the LWP is intended to be 711 * findable by LID and l_lid cannot change behind us. 712 * 713 * No need to acquire the LWP's lock to check for LSIDL, as 714 * p->p_lock must be held to transition in and out of LSIDL. 715 * Any other observed state of is no particular interest. 716 */ 717 pserialize_read_exit(s); 718 return l->l_stat != LSIDL && l->l_lid == pid ? l : NULL; 719 } 720 721 /* 722 * proc_find_lwp_unlocked: locate an lwp in said proc by the ID. 723 * 724 * => Called in a pserialize read section with no locks held. 725 * => LSIDL lwps are not returned because they are only partially 726 * constructed while occupying the slot. 727 * => Callers need to be careful about lwp::l_stat of the returned 728 * lwp. 729 * => If an LWP is found, it's returned locked. 730 */ 731 struct lwp * 732 proc_find_lwp_unlocked(proc_t *p, pid_t pid) 733 { 734 struct pid_table *pt; 735 struct lwp *l = NULL; 736 uintptr_t slot; 737 738 KASSERT(pserialize_in_read_section()); 739 740 /* 741 * Look in the pid_table. This is done unlocked inside a pserialize 742 * read section covering pid_table's memory allocation only, so take 743 * care to read the slot atomically and only once. This issues a 744 * memory barrier for dependent loads on alpha. 745 */ 746 pt = &pid_table[pid & pid_tbl_mask]; 747 slot = atomic_load_consume(&pt->pt_slot); 748 if (__predict_false(!PT_IS_LWP(slot))) { 749 return NULL; 750 } 751 752 /* 753 * Lock the LWP we found to get it stable. If it's embryonic or 754 * reaped (LSIDL) then none of the other fields can safely be 755 * checked. 756 */ 757 l = PT_GET_LWP(slot); 758 lwp_lock(l); 759 if (__predict_false(l->l_stat == LSIDL)) { 760 lwp_unlock(l); 761 return NULL; 762 } 763 764 /* 765 * l_proc and l_lid are now known stable because the LWP is not 766 * LSIDL, so check those fields too to make sure we found the 767 * right thing. 768 */ 769 if (__predict_false(l->l_proc != p || l->l_lid != pid)) { 770 lwp_unlock(l); 771 return NULL; 772 } 773 774 /* Everything checks out, return it locked. */ 775 return l; 776 } 777 778 /* 779 * proc_find_lwp_acquire_proc: locate an lwp and acquire a lock 780 * on its containing proc. 781 * 782 * => Similar to proc_find_lwp(), but does not require you to have 783 * the proc a priori. 784 * => Also returns proc * to caller, with p::p_lock held. 785 * => Same caveats apply. 786 */ 787 struct lwp * 788 proc_find_lwp_acquire_proc(pid_t pid, struct proc **pp) 789 { 790 struct pid_table *pt; 791 struct proc *p = NULL; 792 struct lwp *l = NULL; 793 uintptr_t slot; 794 795 KASSERT(pp != NULL); 796 mutex_enter(&proc_lock); 797 pt = &pid_table[pid & pid_tbl_mask]; 798 799 slot = pt->pt_slot; 800 if (__predict_true(PT_IS_LWP(slot) && pt->pt_pid == pid)) { 801 l = PT_GET_LWP(slot); 802 p = l->l_proc; 803 mutex_enter(p->p_lock); 804 if (__predict_false(l->l_stat == LSIDL)) { 805 mutex_exit(p->p_lock); 806 l = NULL; 807 p = NULL; 808 } 809 } 810 mutex_exit(&proc_lock); 811 812 KASSERT(p == NULL || mutex_owned(p->p_lock)); 813 *pp = p; 814 return l; 815 } 816 817 /* 818 * proc_find_raw_pid_table_locked: locate a process by the ID. 819 * 820 * => Must be called with proc_lock held. 821 */ 822 static proc_t * 823 proc_find_raw_pid_table_locked(pid_t pid, bool any_lwpid) 824 { 825 struct pid_table *pt; 826 proc_t *p = NULL; 827 uintptr_t slot; 828 829 /* No - used by DDB. KASSERT(mutex_owned(&proc_lock)); */ 830 pt = &pid_table[pid & pid_tbl_mask]; 831 832 slot = pt->pt_slot; 833 if (__predict_true(PT_IS_LWP(slot) && pt->pt_pid == pid)) { 834 /* 835 * When looking up processes, require a direct match 836 * on the PID assigned to the proc, not just one of 837 * its LWPs. 838 * 839 * N.B. We require lwp::l_proc of LSIDL LWPs to be 840 * valid here. 841 */ 842 p = PT_GET_LWP(slot)->l_proc; 843 if (__predict_false(p->p_pid != pid && !any_lwpid)) 844 p = NULL; 845 } else if (PT_IS_PROC(slot) && pt->pt_pid == pid) { 846 p = PT_GET_PROC(slot); 847 } 848 return p; 849 } 850 851 proc_t * 852 proc_find_raw(pid_t pid) 853 { 854 855 return proc_find_raw_pid_table_locked(pid, false); 856 } 857 858 static proc_t * 859 proc_find_internal(pid_t pid, bool any_lwpid) 860 { 861 proc_t *p; 862 863 KASSERT(mutex_owned(&proc_lock)); 864 865 p = proc_find_raw_pid_table_locked(pid, any_lwpid); 866 if (__predict_false(p == NULL)) { 867 return NULL; 868 } 869 870 /* 871 * Only allow live processes to be found by PID. 872 * XXX: p_stat might change, since proc unlocked. 873 */ 874 if (__predict_true(p->p_stat == SACTIVE || p->p_stat == SSTOP)) { 875 return p; 876 } 877 return NULL; 878 } 879 880 proc_t * 881 proc_find(pid_t pid) 882 { 883 return proc_find_internal(pid, false); 884 } 885 886 proc_t * 887 proc_find_lwpid(pid_t pid) 888 { 889 return proc_find_internal(pid, true); 890 } 891 892 /* 893 * pgrp_find: locate a process group by the ID. 894 * 895 * => Must be called with proc_lock held. 896 */ 897 struct pgrp * 898 pgrp_find(pid_t pgid) 899 { 900 struct pgrp *pg; 901 902 KASSERT(mutex_owned(&proc_lock)); 903 904 pg = pid_table[pgid & pid_tbl_mask].pt_pgrp; 905 906 /* 907 * Cannot look up a process group that only exists because the 908 * session has not died yet (traditional). 909 */ 910 if (pg == NULL || pg->pg_id != pgid || LIST_EMPTY(&pg->pg_members)) { 911 return NULL; 912 } 913 return pg; 914 } 915 916 static void 917 expand_pid_table(void) 918 { 919 size_t pt_size, tsz; 920 struct pid_table *n_pt, *new_pt; 921 uintptr_t slot; 922 struct pgrp *pgrp; 923 pid_t pid, rpid; 924 u_int i; 925 uint new_pt_mask; 926 927 KASSERT(mutex_owned(&proc_lock)); 928 929 /* Unlock the pid_table briefly to allocate memory. */ 930 pt_size = pid_tbl_mask + 1; 931 mutex_exit(&proc_lock); 932 933 tsz = pt_size * 2 * sizeof(struct pid_table); 934 new_pt = kmem_alloc(tsz, KM_SLEEP); 935 new_pt_mask = pt_size * 2 - 1; 936 937 /* XXX For now. The pratical limit is much lower anyway. */ 938 KASSERT(new_pt_mask <= FUTEX_TID_MASK); 939 940 mutex_enter(&proc_lock); 941 if (pt_size != pid_tbl_mask + 1) { 942 /* Another process beat us to it... */ 943 mutex_exit(&proc_lock); 944 kmem_free(new_pt, tsz); 945 goto out; 946 } 947 948 /* 949 * Copy entries from old table into new one. 950 * If 'pid' is 'odd' we need to place in the upper half, 951 * even pid's to the lower half. 952 * Free items stay in the low half so we don't have to 953 * fixup the reference to them. 954 * We stuff free items on the front of the freelist 955 * because we can't write to unmodified entries. 956 * Processing the table backwards maintains a semblance 957 * of issuing pid numbers that increase with time. 958 */ 959 i = pt_size - 1; 960 n_pt = new_pt + i; 961 for (; ; i--, n_pt--) { 962 slot = pid_table[i].pt_slot; 963 pgrp = pid_table[i].pt_pgrp; 964 if (!PT_VALID(slot)) { 965 /* Up 'use count' so that link is valid */ 966 pid = (PT_NEXT(slot) + pt_size) & ~pt_size; 967 rpid = 0; 968 slot = PT_SET_FREE(pid); 969 if (pgrp) 970 pid = pgrp->pg_id; 971 } else { 972 pid = pid_table[i].pt_pid; 973 rpid = pid; 974 } 975 976 /* Save entry in appropriate half of table */ 977 n_pt[pid & pt_size].pt_slot = slot; 978 n_pt[pid & pt_size].pt_pgrp = pgrp; 979 n_pt[pid & pt_size].pt_pid = rpid; 980 981 /* Put other piece on start of free list */ 982 pid = (pid ^ pt_size) & ~pid_tbl_mask; 983 n_pt[pid & pt_size].pt_slot = 984 PT_SET_FREE((pid & ~pt_size) | next_free_pt); 985 n_pt[pid & pt_size].pt_pgrp = 0; 986 n_pt[pid & pt_size].pt_pid = 0; 987 988 next_free_pt = i | (pid & pt_size); 989 if (i == 0) 990 break; 991 } 992 993 /* Save old table size and switch tables */ 994 tsz = pt_size * sizeof(struct pid_table); 995 n_pt = pid_table; 996 pid_table = new_pt; 997 pid_tbl_mask = new_pt_mask; 998 999 /* 1000 * pid_max starts as PID_MAX (= 30000), once we have 16384 1001 * allocated pids we need it to be larger! 1002 */ 1003 if (pid_tbl_mask > PID_MAX) { 1004 pid_max = pid_tbl_mask * 2 + 1; 1005 pid_alloc_lim |= pid_alloc_lim << 1; 1006 } else 1007 pid_alloc_lim <<= 1; /* doubles number of free slots... */ 1008 1009 mutex_exit(&proc_lock); 1010 1011 /* 1012 * Make sure that unlocked access to the old pid_table is complete 1013 * and then free it. 1014 */ 1015 pserialize_perform(proc_psz); 1016 kmem_free(n_pt, tsz); 1017 1018 out: /* Return with proc_lock held again. */ 1019 mutex_enter(&proc_lock); 1020 } 1021 1022 struct proc * 1023 proc_alloc(void) 1024 { 1025 struct proc *p; 1026 1027 p = pool_cache_get(proc_cache, PR_WAITOK); 1028 p->p_stat = SIDL; /* protect against others */ 1029 proc_initspecific(p); 1030 kdtrace_proc_ctor(NULL, p); 1031 1032 /* 1033 * Allocate a placeholder in the pid_table. When we create the 1034 * first LWP for this process, it will take ownership of the 1035 * slot. 1036 */ 1037 if (__predict_false(proc_alloc_pid(p) == -1)) { 1038 /* Allocating the PID failed; unwind. */ 1039 proc_finispecific(p); 1040 proc_free_mem(p); 1041 p = NULL; 1042 } 1043 return p; 1044 } 1045 1046 /* 1047 * proc_alloc_pid_slot: allocate PID and record the occcupant so that 1048 * proc_find_raw() can find it by the PID. 1049 */ 1050 static pid_t __noinline 1051 proc_alloc_pid_slot(struct proc *p, uintptr_t slot) 1052 { 1053 struct pid_table *pt; 1054 pid_t pid; 1055 int nxt; 1056 1057 KASSERT(mutex_owned(&proc_lock)); 1058 1059 for (;;expand_pid_table()) { 1060 if (__predict_false(pid_alloc_cnt >= pid_alloc_lim)) { 1061 /* ensure pids cycle through 2000+ values */ 1062 continue; 1063 } 1064 /* 1065 * The first user process *must* be given PID 1. 1066 * it has already been reserved for us. This 1067 * will be coming in from the proc_alloc() call 1068 * above, and the entry will be usurped later when 1069 * the first user LWP is created. 1070 * XXX this is slightly gross. 1071 */ 1072 if (__predict_false(PT_RESERVED(pid_table[1].pt_slot) && 1073 p != &proc0)) { 1074 KASSERT(PT_IS_PROC(slot)); 1075 pt = &pid_table[1]; 1076 pt->pt_slot = slot; 1077 return 1; 1078 } 1079 pt = &pid_table[next_free_pt]; 1080 #ifdef DIAGNOSTIC 1081 if (__predict_false(PT_VALID(pt->pt_slot) || pt->pt_pgrp)) 1082 panic("proc_alloc: slot busy"); 1083 #endif 1084 nxt = PT_NEXT(pt->pt_slot); 1085 if (nxt & pid_tbl_mask) 1086 break; 1087 /* Table full - expand (NB last entry not used....) */ 1088 } 1089 1090 /* pid is 'saved use count' + 'size' + entry */ 1091 pid = (nxt & ~pid_tbl_mask) + pid_tbl_mask + 1 + next_free_pt; 1092 if ((uint)pid > (uint)pid_max) 1093 pid &= pid_tbl_mask; 1094 next_free_pt = nxt & pid_tbl_mask; 1095 1096 /* XXX For now. The pratical limit is much lower anyway. */ 1097 KASSERT(pid <= FUTEX_TID_MASK); 1098 1099 /* Grab table slot */ 1100 pt->pt_slot = slot; 1101 1102 KASSERT(pt->pt_pid == 0); 1103 pt->pt_pid = pid; 1104 pid_alloc_cnt++; 1105 1106 return pid; 1107 } 1108 1109 pid_t 1110 proc_alloc_pid(struct proc *p) 1111 { 1112 pid_t pid; 1113 1114 KASSERT((((uintptr_t)p) & PT_F_ALLBITS) == 0); 1115 KASSERT(p->p_stat == SIDL); 1116 1117 mutex_enter(&proc_lock); 1118 pid = proc_alloc_pid_slot(p, PT_SET_PROC(p)); 1119 if (pid != -1) 1120 p->p_pid = pid; 1121 mutex_exit(&proc_lock); 1122 1123 return pid; 1124 } 1125 1126 pid_t 1127 proc_alloc_lwpid(struct proc *p, struct lwp *l) 1128 { 1129 struct pid_table *pt; 1130 pid_t pid; 1131 1132 KASSERT((((uintptr_t)l) & PT_F_ALLBITS) == 0); 1133 KASSERT(l->l_proc == p); 1134 KASSERT(l->l_stat == LSIDL); 1135 1136 /* 1137 * For unlocked lookup in proc_find_lwp(), make sure l->l_proc 1138 * is globally visible before the LWP becomes visible via the 1139 * pid_table. 1140 */ 1141 #ifndef __HAVE_ATOMIC_AS_MEMBAR 1142 membar_producer(); 1143 #endif 1144 1145 /* 1146 * If the slot for p->p_pid currently points to the proc, 1147 * then we should usurp this ID for the LWP. This happens 1148 * at least once per process (for the first LWP), and can 1149 * happen again if the first LWP for a process exits and 1150 * before the process creates another. 1151 */ 1152 mutex_enter(&proc_lock); 1153 pid = p->p_pid; 1154 pt = &pid_table[pid & pid_tbl_mask]; 1155 KASSERT(pt->pt_pid == pid); 1156 if (PT_IS_PROC(pt->pt_slot)) { 1157 KASSERT(PT_GET_PROC(pt->pt_slot) == p); 1158 l->l_lid = pid; 1159 pt->pt_slot = PT_SET_LWP(l); 1160 } else { 1161 /* Need to allocate a new slot. */ 1162 pid = proc_alloc_pid_slot(p, PT_SET_LWP(l)); 1163 if (pid != -1) 1164 l->l_lid = pid; 1165 } 1166 mutex_exit(&proc_lock); 1167 1168 return pid; 1169 } 1170 1171 static void __noinline 1172 proc_free_pid_internal(pid_t pid, uintptr_t type __diagused) 1173 { 1174 struct pid_table *pt; 1175 1176 pt = &pid_table[pid & pid_tbl_mask]; 1177 1178 KASSERT(PT_GET_TYPE(pt->pt_slot) == type); 1179 KASSERT(pt->pt_pid == pid); 1180 1181 /* save pid use count in slot */ 1182 pt->pt_slot = PT_SET_FREE(pid & ~pid_tbl_mask); 1183 pt->pt_pid = 0; 1184 1185 if (pt->pt_pgrp == NULL) { 1186 /* link last freed entry onto ours */ 1187 pid &= pid_tbl_mask; 1188 pt = &pid_table[last_free_pt]; 1189 pt->pt_slot = PT_SET_FREE(PT_NEXT(pt->pt_slot) | pid); 1190 pt->pt_pid = 0; 1191 last_free_pt = pid; 1192 pid_alloc_cnt--; 1193 } 1194 } 1195 1196 /* 1197 * Free a process id - called from proc_free (in kern_exit.c) 1198 * 1199 * Called with the proc_lock held. 1200 */ 1201 void 1202 proc_free_pid(pid_t pid) 1203 { 1204 1205 KASSERT(mutex_owned(&proc_lock)); 1206 proc_free_pid_internal(pid, PT_F_PROC); 1207 } 1208 1209 /* 1210 * Free a process id used by an LWP. If this was the process's 1211 * first LWP, we convert the slot to point to the process; the 1212 * entry will get cleaned up later when the process finishes exiting. 1213 * 1214 * If not, then it's the same as proc_free_pid(). 1215 */ 1216 void 1217 proc_free_lwpid(struct proc *p, pid_t pid) 1218 { 1219 1220 KASSERT(mutex_owned(&proc_lock)); 1221 1222 if (__predict_true(p->p_pid == pid)) { 1223 struct pid_table *pt; 1224 1225 pt = &pid_table[pid & pid_tbl_mask]; 1226 1227 KASSERT(pt->pt_pid == pid); 1228 KASSERT(PT_IS_LWP(pt->pt_slot)); 1229 KASSERT(PT_GET_LWP(pt->pt_slot)->l_proc == p); 1230 1231 pt->pt_slot = PT_SET_PROC(p); 1232 return; 1233 } 1234 proc_free_pid_internal(pid, PT_F_LWP); 1235 } 1236 1237 void 1238 proc_free_mem(struct proc *p) 1239 { 1240 1241 kdtrace_proc_dtor(NULL, p); 1242 pool_cache_put(proc_cache, p); 1243 } 1244 1245 /* 1246 * proc_enterpgrp: move p to a new or existing process group (and session). 1247 * 1248 * If we are creating a new pgrp, the pgid should equal 1249 * the calling process' pid. 1250 * If is only valid to enter a process group that is in the session 1251 * of the process. 1252 * Also mksess should only be set if we are creating a process group 1253 * 1254 * Only called from sys_setsid, sys_setpgid and posix_spawn/spawn_return. 1255 */ 1256 int 1257 proc_enterpgrp(struct proc *curp, pid_t pid, pid_t pgid, bool mksess) 1258 { 1259 struct pgrp *new_pgrp, *pgrp; 1260 struct session *sess; 1261 struct proc *p; 1262 int rval; 1263 pid_t pg_id = NO_PGID; 1264 1265 /* Allocate data areas we might need before doing any validity checks */ 1266 sess = mksess ? kmem_alloc(sizeof(*sess), KM_SLEEP) : NULL; 1267 new_pgrp = kmem_alloc(sizeof(*new_pgrp), KM_SLEEP); 1268 1269 mutex_enter(&proc_lock); 1270 rval = EPERM; /* most common error (to save typing) */ 1271 1272 /* Check pgrp exists or can be created */ 1273 pgrp = pid_table[pgid & pid_tbl_mask].pt_pgrp; 1274 if (pgrp != NULL && pgrp->pg_id != pgid) 1275 goto done; 1276 1277 /* Can only set another process under restricted circumstances. */ 1278 if (pid != curp->p_pid) { 1279 /* Must exist and be one of our children... */ 1280 p = proc_find_internal(pid, false); 1281 if (p == NULL || !p_inferior(p, curp)) { 1282 rval = ESRCH; 1283 goto done; 1284 } 1285 /* ... in the same session... */ 1286 if (sess != NULL || p->p_session != curp->p_session) 1287 goto done; 1288 /* ... existing pgid must be in same session ... */ 1289 if (pgrp != NULL && pgrp->pg_session != p->p_session) 1290 goto done; 1291 /* ... and not done an exec. */ 1292 if (p->p_flag & PK_EXEC) { 1293 rval = EACCES; 1294 goto done; 1295 } 1296 } else { 1297 /* ... setsid() cannot re-enter a pgrp */ 1298 if (mksess && (curp->p_pgid == curp->p_pid || 1299 pgrp_find(curp->p_pid))) 1300 goto done; 1301 p = curp; 1302 } 1303 1304 /* Changing the process group/session of a session 1305 leader is definitely off limits. */ 1306 if (SESS_LEADER(p)) { 1307 if (sess == NULL && p->p_pgrp == pgrp) 1308 /* unless it's a definite noop */ 1309 rval = 0; 1310 goto done; 1311 } 1312 1313 /* Can only create a process group with id of process */ 1314 if (pgrp == NULL && pgid != pid) 1315 goto done; 1316 1317 /* Can only create a session if creating pgrp */ 1318 if (sess != NULL && pgrp != NULL) 1319 goto done; 1320 1321 /* Check we allocated memory for a pgrp... */ 1322 if (pgrp == NULL && new_pgrp == NULL) 1323 goto done; 1324 1325 /* Don't attach to 'zombie' pgrp */ 1326 if (pgrp != NULL && LIST_EMPTY(&pgrp->pg_members)) 1327 goto done; 1328 1329 /* Expect to succeed now */ 1330 rval = 0; 1331 1332 if (pgrp == p->p_pgrp) 1333 /* nothing to do */ 1334 goto done; 1335 1336 /* Ok all setup, link up required structures */ 1337 1338 if (pgrp == NULL) { 1339 pgrp = new_pgrp; 1340 new_pgrp = NULL; 1341 if (sess != NULL) { 1342 sess->s_sid = p->p_pid; 1343 sess->s_leader = p; 1344 sess->s_count = 1; 1345 sess->s_ttyvp = NULL; 1346 sess->s_ttyp = NULL; 1347 sess->s_flags = p->p_session->s_flags & ~S_LOGIN_SET; 1348 memcpy(sess->s_login, p->p_session->s_login, 1349 sizeof(sess->s_login)); 1350 p->p_lflag &= ~PL_CONTROLT; 1351 } else { 1352 sess = p->p_pgrp->pg_session; 1353 proc_sesshold(sess); 1354 } 1355 pgrp->pg_session = sess; 1356 sess = NULL; 1357 1358 pgrp->pg_id = pgid; 1359 LIST_INIT(&pgrp->pg_members); 1360 #ifdef DIAGNOSTIC 1361 if (__predict_false(pid_table[pgid & pid_tbl_mask].pt_pgrp)) 1362 panic("enterpgrp: pgrp table slot in use"); 1363 if (__predict_false(mksess && p != curp)) 1364 panic("enterpgrp: mksession and p != curproc"); 1365 #endif 1366 pid_table[pgid & pid_tbl_mask].pt_pgrp = pgrp; 1367 pgrp->pg_jobc = 0; 1368 } 1369 1370 /* 1371 * Adjust eligibility of affected pgrps to participate in job control. 1372 * Increment eligibility counts before decrementing, otherwise we 1373 * could reach 0 spuriously during the first call. 1374 */ 1375 fixjobc(p, pgrp, 1); 1376 fixjobc(p, p->p_pgrp, 0); 1377 1378 /* Interlock with ttread(). */ 1379 mutex_spin_enter(&tty_lock); 1380 1381 /* Move process to requested group. */ 1382 LIST_REMOVE(p, p_pglist); 1383 if (LIST_EMPTY(&p->p_pgrp->pg_members)) 1384 /* defer delete until we've dumped the lock */ 1385 pg_id = p->p_pgrp->pg_id; 1386 p->p_pgrp = pgrp; 1387 LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist); 1388 1389 /* Done with the swap; we can release the tty mutex. */ 1390 mutex_spin_exit(&tty_lock); 1391 1392 done: 1393 if (pg_id != NO_PGID) { 1394 /* Releases proc_lock. */ 1395 pg_delete(pg_id); 1396 } else { 1397 mutex_exit(&proc_lock); 1398 } 1399 if (sess != NULL) 1400 kmem_free(sess, sizeof(*sess)); 1401 if (new_pgrp != NULL) 1402 kmem_free(new_pgrp, sizeof(*new_pgrp)); 1403 #ifdef DEBUG_PGRP 1404 if (__predict_false(rval)) 1405 printf("enterpgrp(%d,%d,%d), curproc %d, rval %d\n", 1406 pid, pgid, mksess, curp->p_pid, rval); 1407 #endif 1408 return rval; 1409 } 1410 1411 /* 1412 * proc_leavepgrp: remove a process from its process group. 1413 * => must be called with the proc_lock held, which will be released; 1414 */ 1415 void 1416 proc_leavepgrp(struct proc *p) 1417 { 1418 struct pgrp *pgrp; 1419 1420 KASSERT(mutex_owned(&proc_lock)); 1421 1422 /* Interlock with ttread() */ 1423 mutex_spin_enter(&tty_lock); 1424 pgrp = p->p_pgrp; 1425 LIST_REMOVE(p, p_pglist); 1426 p->p_pgrp = NULL; 1427 mutex_spin_exit(&tty_lock); 1428 1429 if (LIST_EMPTY(&pgrp->pg_members)) { 1430 /* Releases proc_lock. */ 1431 pg_delete(pgrp->pg_id); 1432 } else { 1433 mutex_exit(&proc_lock); 1434 } 1435 } 1436 1437 /* 1438 * pg_remove: remove a process group from the table. 1439 * => must be called with the proc_lock held; 1440 * => returns process group to free; 1441 */ 1442 static struct pgrp * 1443 pg_remove(pid_t pg_id) 1444 { 1445 struct pgrp *pgrp; 1446 struct pid_table *pt; 1447 1448 KASSERT(mutex_owned(&proc_lock)); 1449 1450 pt = &pid_table[pg_id & pid_tbl_mask]; 1451 pgrp = pt->pt_pgrp; 1452 1453 KASSERT(pgrp != NULL); 1454 KASSERT(pgrp->pg_id == pg_id); 1455 KASSERT(LIST_EMPTY(&pgrp->pg_members)); 1456 1457 pt->pt_pgrp = NULL; 1458 1459 if (!PT_VALID(pt->pt_slot)) { 1460 /* Orphaned pgrp, put slot onto free list. */ 1461 KASSERT((PT_NEXT(pt->pt_slot) & pid_tbl_mask) == 0); 1462 pg_id &= pid_tbl_mask; 1463 pt = &pid_table[last_free_pt]; 1464 pt->pt_slot = PT_SET_FREE(PT_NEXT(pt->pt_slot) | pg_id); 1465 KASSERT(pt->pt_pid == 0); 1466 last_free_pt = pg_id; 1467 pid_alloc_cnt--; 1468 } 1469 return pgrp; 1470 } 1471 1472 /* 1473 * pg_delete: delete and free a process group. 1474 * => must be called with the proc_lock held, which will be released. 1475 */ 1476 static void 1477 pg_delete(pid_t pg_id) 1478 { 1479 struct pgrp *pg; 1480 struct tty *ttyp; 1481 struct session *ss; 1482 1483 KASSERT(mutex_owned(&proc_lock)); 1484 1485 pg = pid_table[pg_id & pid_tbl_mask].pt_pgrp; 1486 if (pg == NULL || pg->pg_id != pg_id || !LIST_EMPTY(&pg->pg_members)) { 1487 mutex_exit(&proc_lock); 1488 return; 1489 } 1490 1491 ss = pg->pg_session; 1492 1493 /* Remove reference (if any) from tty to this process group */ 1494 mutex_spin_enter(&tty_lock); 1495 ttyp = ss->s_ttyp; 1496 if (ttyp != NULL && ttyp->t_pgrp == pg) { 1497 ttyp->t_pgrp = NULL; 1498 KASSERT(ttyp->t_session == ss); 1499 } 1500 mutex_spin_exit(&tty_lock); 1501 1502 /* 1503 * The leading process group in a session is freed by proc_sessrele(), 1504 * if last reference. It will also release the locks. 1505 */ 1506 pg = (ss->s_sid != pg->pg_id) ? pg_remove(pg_id) : NULL; 1507 proc_sessrele(ss); 1508 1509 if (pg != NULL) { 1510 /* Free it, if was not done above. */ 1511 kmem_free(pg, sizeof(struct pgrp)); 1512 } 1513 } 1514 1515 /* 1516 * Adjust pgrp jobc counters when specified process changes process group. 1517 * We count the number of processes in each process group that "qualify" 1518 * the group for terminal job control (those with a parent in a different 1519 * process group of the same session). If that count reaches zero, the 1520 * process group becomes orphaned. Check both the specified process' 1521 * process group and that of its children. 1522 * entering == 0 => p is leaving specified group. 1523 * entering == 1 => p is entering specified group. 1524 * 1525 * Call with proc_lock held. 1526 */ 1527 void 1528 fixjobc(struct proc *p, struct pgrp *pgrp, int entering) 1529 { 1530 struct pgrp *hispgrp; 1531 struct session *mysession = pgrp->pg_session; 1532 struct proc *child; 1533 1534 KASSERT(mutex_owned(&proc_lock)); 1535 1536 /* 1537 * Check p's parent to see whether p qualifies its own process 1538 * group; if so, adjust count for p's process group. 1539 */ 1540 hispgrp = p->p_pptr->p_pgrp; 1541 if (hispgrp != pgrp && hispgrp->pg_session == mysession) { 1542 if (entering) { 1543 pgrp->pg_jobc++; 1544 p->p_lflag &= ~PL_ORPHANPG; 1545 } else { 1546 /* KASSERT(pgrp->pg_jobc > 0); */ 1547 if (--pgrp->pg_jobc == 0) 1548 orphanpg(pgrp); 1549 } 1550 } 1551 1552 /* 1553 * Check this process' children to see whether they qualify 1554 * their process groups; if so, adjust counts for children's 1555 * process groups. 1556 */ 1557 LIST_FOREACH(child, &p->p_children, p_sibling) { 1558 hispgrp = child->p_pgrp; 1559 if (hispgrp != pgrp && hispgrp->pg_session == mysession && 1560 !P_ZOMBIE(child)) { 1561 if (entering) { 1562 child->p_lflag &= ~PL_ORPHANPG; 1563 hispgrp->pg_jobc++; 1564 } else { 1565 KASSERT(hispgrp->pg_jobc > 0); 1566 if (--hispgrp->pg_jobc == 0) 1567 orphanpg(hispgrp); 1568 } 1569 } 1570 } 1571 } 1572 1573 /* 1574 * A process group has become orphaned; 1575 * if there are any stopped processes in the group, 1576 * hang-up all process in that group. 1577 * 1578 * Call with proc_lock held. 1579 */ 1580 static void 1581 orphanpg(struct pgrp *pg) 1582 { 1583 struct proc *p; 1584 1585 KASSERT(mutex_owned(&proc_lock)); 1586 1587 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 1588 if (p->p_stat == SSTOP) { 1589 p->p_lflag |= PL_ORPHANPG; 1590 psignal(p, SIGHUP); 1591 psignal(p, SIGCONT); 1592 } 1593 } 1594 } 1595 1596 #ifdef DDB 1597 #include <ddb/db_output.h> 1598 void pidtbl_dump(void); 1599 void 1600 pidtbl_dump(void) 1601 { 1602 struct pid_table *pt; 1603 struct proc *p; 1604 struct pgrp *pgrp; 1605 uintptr_t slot; 1606 int id; 1607 1608 db_printf("pid table %p size %x, next %x, last %x\n", 1609 pid_table, pid_tbl_mask+1, 1610 next_free_pt, last_free_pt); 1611 for (pt = pid_table, id = 0; id <= pid_tbl_mask; id++, pt++) { 1612 slot = pt->pt_slot; 1613 if (!PT_VALID(slot) && !pt->pt_pgrp) 1614 continue; 1615 if (PT_IS_LWP(slot)) { 1616 p = PT_GET_LWP(slot)->l_proc; 1617 } else if (PT_IS_PROC(slot)) { 1618 p = PT_GET_PROC(slot); 1619 } else { 1620 p = NULL; 1621 } 1622 db_printf(" id %x: ", id); 1623 if (p != NULL) 1624 db_printf("slotpid %d proc %p id %d (0x%x) %s\n", 1625 pt->pt_pid, p, p->p_pid, p->p_pid, p->p_comm); 1626 else 1627 db_printf("next %x use %x\n", 1628 PT_NEXT(slot) & pid_tbl_mask, 1629 PT_NEXT(slot) & ~pid_tbl_mask); 1630 if ((pgrp = pt->pt_pgrp)) { 1631 db_printf("\tsession %p, sid %d, count %d, login %s\n", 1632 pgrp->pg_session, pgrp->pg_session->s_sid, 1633 pgrp->pg_session->s_count, 1634 pgrp->pg_session->s_login); 1635 db_printf("\tpgrp %p, pg_id %d, pg_jobc %d, members %p\n", 1636 pgrp, pgrp->pg_id, pgrp->pg_jobc, 1637 LIST_FIRST(&pgrp->pg_members)); 1638 LIST_FOREACH(p, &pgrp->pg_members, p_pglist) { 1639 db_printf("\t\tpid %d addr %p pgrp %p %s\n", 1640 p->p_pid, p, p->p_pgrp, p->p_comm); 1641 } 1642 } 1643 } 1644 } 1645 #endif /* DDB */ 1646 1647 #ifdef KSTACK_CHECK_MAGIC 1648 1649 #define KSTACK_MAGIC 0xdeadbeaf 1650 1651 /* XXX should be per process basis? */ 1652 static int kstackleftmin = KSTACK_SIZE; 1653 static int kstackleftthres = KSTACK_SIZE / 8; 1654 1655 void 1656 kstack_setup_magic(const struct lwp *l) 1657 { 1658 uint32_t *ip; 1659 uint32_t const *end; 1660 1661 KASSERT(l != NULL); 1662 KASSERT(l != &lwp0); 1663 1664 /* 1665 * fill all the stack with magic number 1666 * so that later modification on it can be detected. 1667 */ 1668 ip = (uint32_t *)KSTACK_LOWEST_ADDR(l); 1669 end = (uint32_t *)((char *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE); 1670 for (; ip < end; ip++) { 1671 *ip = KSTACK_MAGIC; 1672 } 1673 } 1674 1675 void 1676 kstack_check_magic(const struct lwp *l) 1677 { 1678 uint32_t const *ip, *end; 1679 int stackleft; 1680 1681 KASSERT(l != NULL); 1682 1683 /* don't check proc0 */ /*XXX*/ 1684 if (l == &lwp0) 1685 return; 1686 1687 #ifdef __MACHINE_STACK_GROWS_UP 1688 /* stack grows upwards (eg. hppa) */ 1689 ip = (uint32_t *)((void *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE); 1690 end = (uint32_t *)KSTACK_LOWEST_ADDR(l); 1691 for (ip--; ip >= end; ip--) 1692 if (*ip != KSTACK_MAGIC) 1693 break; 1694 1695 stackleft = (void *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE - (void *)ip; 1696 #else /* __MACHINE_STACK_GROWS_UP */ 1697 /* stack grows downwards (eg. i386) */ 1698 ip = (uint32_t *)KSTACK_LOWEST_ADDR(l); 1699 end = (uint32_t *)((char *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE); 1700 for (; ip < end; ip++) 1701 if (*ip != KSTACK_MAGIC) 1702 break; 1703 1704 stackleft = ((const char *)ip) - (const char *)KSTACK_LOWEST_ADDR(l); 1705 #endif /* __MACHINE_STACK_GROWS_UP */ 1706 1707 if (kstackleftmin > stackleft) { 1708 kstackleftmin = stackleft; 1709 if (stackleft < kstackleftthres) 1710 printf("warning: kernel stack left %d bytes" 1711 "(pid %u:lid %u)\n", stackleft, 1712 (u_int)l->l_proc->p_pid, (u_int)l->l_lid); 1713 } 1714 1715 if (stackleft <= 0) { 1716 panic("magic on the top of kernel stack changed for " 1717 "pid %u, lid %u: maybe kernel stack overflow", 1718 (u_int)l->l_proc->p_pid, (u_int)l->l_lid); 1719 } 1720 } 1721 #endif /* KSTACK_CHECK_MAGIC */ 1722 1723 int 1724 proclist_foreach_call(struct proclist *list, 1725 int (*callback)(struct proc *, void *arg), void *arg) 1726 { 1727 struct proc marker; 1728 struct proc *p; 1729 int ret = 0; 1730 1731 marker.p_flag = PK_MARKER; 1732 mutex_enter(&proc_lock); 1733 for (p = LIST_FIRST(list); ret == 0 && p != NULL;) { 1734 if (p->p_flag & PK_MARKER) { 1735 p = LIST_NEXT(p, p_list); 1736 continue; 1737 } 1738 LIST_INSERT_AFTER(p, &marker, p_list); 1739 ret = (*callback)(p, arg); 1740 KASSERT(mutex_owned(&proc_lock)); 1741 p = LIST_NEXT(&marker, p_list); 1742 LIST_REMOVE(&marker, p_list); 1743 } 1744 mutex_exit(&proc_lock); 1745 1746 return ret; 1747 } 1748 1749 int 1750 proc_vmspace_getref(struct proc *p, struct vmspace **vm) 1751 { 1752 1753 /* XXXCDC: how should locking work here? */ 1754 1755 /* curproc exception is for coredump. */ 1756 1757 if ((p != curproc && (p->p_sflag & PS_WEXIT) != 0) || 1758 (p->p_vmspace->vm_refcnt < 1)) { 1759 return EFAULT; 1760 } 1761 1762 uvmspace_addref(p->p_vmspace); 1763 *vm = p->p_vmspace; 1764 1765 return 0; 1766 } 1767 1768 /* 1769 * Acquire a write lock on the process credential. 1770 */ 1771 void 1772 proc_crmod_enter(void) 1773 { 1774 struct lwp *l = curlwp; 1775 struct proc *p = l->l_proc; 1776 kauth_cred_t oc; 1777 1778 /* Reset what needs to be reset in plimit. */ 1779 if (p->p_limit->pl_corename != defcorename) { 1780 lim_setcorename(p, defcorename, 0); 1781 } 1782 1783 mutex_enter(p->p_lock); 1784 1785 /* Ensure the LWP cached credentials are up to date. */ 1786 if ((oc = l->l_cred) != p->p_cred) { 1787 kauth_cred_hold(p->p_cred); 1788 l->l_cred = p->p_cred; 1789 kauth_cred_free(oc); 1790 } 1791 } 1792 1793 /* 1794 * Set in a new process credential, and drop the write lock. The credential 1795 * must have a reference already. Optionally, free a no-longer required 1796 * credential. The scheduler also needs to inspect p_cred, so we also 1797 * briefly acquire the sched state mutex. 1798 */ 1799 void 1800 proc_crmod_leave(kauth_cred_t scred, kauth_cred_t fcred, bool sugid) 1801 { 1802 struct lwp *l = curlwp, *l2; 1803 struct proc *p = l->l_proc; 1804 kauth_cred_t oc; 1805 1806 KASSERT(mutex_owned(p->p_lock)); 1807 1808 /* Is there a new credential to set in? */ 1809 if (scred != NULL) { 1810 p->p_cred = scred; 1811 LIST_FOREACH(l2, &p->p_lwps, l_sibling) { 1812 if (l2 != l) 1813 l2->l_prflag |= LPR_CRMOD; 1814 } 1815 1816 /* Ensure the LWP cached credentials are up to date. */ 1817 if ((oc = l->l_cred) != scred) { 1818 kauth_cred_hold(scred); 1819 l->l_cred = scred; 1820 } 1821 } else 1822 oc = NULL; /* XXXgcc */ 1823 1824 if (sugid) { 1825 /* 1826 * Mark process as having changed credentials, stops 1827 * tracing etc. 1828 */ 1829 p->p_flag |= PK_SUGID; 1830 } 1831 1832 mutex_exit(p->p_lock); 1833 1834 /* If there is a credential to be released, free it now. */ 1835 if (fcred != NULL) { 1836 KASSERT(scred != NULL); 1837 kauth_cred_free(fcred); 1838 if (oc != scred) 1839 kauth_cred_free(oc); 1840 } 1841 } 1842 1843 /* 1844 * proc_specific_key_create -- 1845 * Create a key for subsystem proc-specific data. 1846 */ 1847 int 1848 proc_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor) 1849 { 1850 1851 return (specificdata_key_create(proc_specificdata_domain, keyp, dtor)); 1852 } 1853 1854 /* 1855 * proc_specific_key_delete -- 1856 * Delete a key for subsystem proc-specific data. 1857 */ 1858 void 1859 proc_specific_key_delete(specificdata_key_t key) 1860 { 1861 1862 specificdata_key_delete(proc_specificdata_domain, key); 1863 } 1864 1865 /* 1866 * proc_initspecific -- 1867 * Initialize a proc's specificdata container. 1868 */ 1869 void 1870 proc_initspecific(struct proc *p) 1871 { 1872 int error __diagused; 1873 1874 error = specificdata_init(proc_specificdata_domain, &p->p_specdataref); 1875 KASSERT(error == 0); 1876 } 1877 1878 /* 1879 * proc_finispecific -- 1880 * Finalize a proc's specificdata container. 1881 */ 1882 void 1883 proc_finispecific(struct proc *p) 1884 { 1885 1886 specificdata_fini(proc_specificdata_domain, &p->p_specdataref); 1887 } 1888 1889 /* 1890 * proc_getspecific -- 1891 * Return proc-specific data corresponding to the specified key. 1892 */ 1893 void * 1894 proc_getspecific(struct proc *p, specificdata_key_t key) 1895 { 1896 1897 return (specificdata_getspecific(proc_specificdata_domain, 1898 &p->p_specdataref, key)); 1899 } 1900 1901 /* 1902 * proc_setspecific -- 1903 * Set proc-specific data corresponding to the specified key. 1904 */ 1905 void 1906 proc_setspecific(struct proc *p, specificdata_key_t key, void *data) 1907 { 1908 1909 specificdata_setspecific(proc_specificdata_domain, 1910 &p->p_specdataref, key, data); 1911 } 1912 1913 int 1914 proc_uidmatch(kauth_cred_t cred, kauth_cred_t target) 1915 { 1916 int r = 0; 1917 1918 if (kauth_cred_getuid(cred) != kauth_cred_getuid(target) || 1919 kauth_cred_getuid(cred) != kauth_cred_getsvuid(target)) { 1920 /* 1921 * suid proc of ours or proc not ours 1922 */ 1923 r = EPERM; 1924 } else if (kauth_cred_getgid(target) != kauth_cred_getsvgid(target)) { 1925 /* 1926 * sgid proc has sgid back to us temporarily 1927 */ 1928 r = EPERM; 1929 } else { 1930 /* 1931 * our rgid must be in target's group list (ie, 1932 * sub-processes started by a sgid process) 1933 */ 1934 int ismember = 0; 1935 1936 if (kauth_cred_ismember_gid(cred, 1937 kauth_cred_getgid(target), &ismember) != 0 || 1938 !ismember) 1939 r = EPERM; 1940 } 1941 1942 return (r); 1943 } 1944 1945 /* 1946 * sysctl stuff 1947 */ 1948 1949 #define KERN_PROCSLOP (5 * sizeof(struct kinfo_proc)) 1950 1951 static const u_int sysctl_flagmap[] = { 1952 PK_ADVLOCK, P_ADVLOCK, 1953 PK_EXEC, P_EXEC, 1954 PK_NOCLDWAIT, P_NOCLDWAIT, 1955 PK_32, P_32, 1956 PK_CLDSIGIGN, P_CLDSIGIGN, 1957 PK_SUGID, P_SUGID, 1958 0 1959 }; 1960 1961 static const u_int sysctl_sflagmap[] = { 1962 PS_NOCLDSTOP, P_NOCLDSTOP, 1963 PS_WEXIT, P_WEXIT, 1964 PS_STOPFORK, P_STOPFORK, 1965 PS_STOPEXEC, P_STOPEXEC, 1966 PS_STOPEXIT, P_STOPEXIT, 1967 0 1968 }; 1969 1970 static const u_int sysctl_slflagmap[] = { 1971 PSL_TRACED, P_TRACED, 1972 PSL_CHTRACED, P_CHTRACED, 1973 PSL_SYSCALL, P_SYSCALL, 1974 0 1975 }; 1976 1977 static const u_int sysctl_lflagmap[] = { 1978 PL_CONTROLT, P_CONTROLT, 1979 PL_PPWAIT, P_PPWAIT, 1980 0 1981 }; 1982 1983 static const u_int sysctl_stflagmap[] = { 1984 PST_PROFIL, P_PROFIL, 1985 0 1986 1987 }; 1988 1989 /* used by kern_lwp also */ 1990 const u_int sysctl_lwpflagmap[] = { 1991 LW_SINTR, L_SINTR, 1992 LW_SYSTEM, L_SYSTEM, 1993 0 1994 }; 1995 1996 /* 1997 * Find the most ``active'' lwp of a process and return it for ps display 1998 * purposes 1999 */ 2000 static struct lwp * 2001 proc_active_lwp(struct proc *p) 2002 { 2003 static const int ostat[] = { 2004 0, 2005 2, /* LSIDL */ 2006 6, /* LSRUN */ 2007 5, /* LSSLEEP */ 2008 4, /* LSSTOP */ 2009 0, /* LSZOMB */ 2010 1, /* LSDEAD */ 2011 7, /* LSONPROC */ 2012 3 /* LSSUSPENDED */ 2013 }; 2014 2015 struct lwp *l, *lp = NULL; 2016 LIST_FOREACH(l, &p->p_lwps, l_sibling) { 2017 KASSERT(l->l_stat >= 0 && l->l_stat < __arraycount(ostat)); 2018 if (lp == NULL || 2019 ostat[l->l_stat] > ostat[lp->l_stat] || 2020 (ostat[l->l_stat] == ostat[lp->l_stat] && 2021 l->l_cpticks > lp->l_cpticks)) { 2022 lp = l; 2023 continue; 2024 } 2025 } 2026 return lp; 2027 } 2028 2029 static int 2030 sysctl_doeproc(SYSCTLFN_ARGS) 2031 { 2032 union { 2033 struct kinfo_proc kproc; 2034 struct kinfo_proc2 kproc2; 2035 } *kbuf; 2036 struct proc *p, *next, *marker; 2037 char *where, *dp; 2038 int type, op, arg, error; 2039 u_int elem_size, kelem_size, elem_count; 2040 size_t buflen, needed; 2041 bool match, zombie, mmmbrains; 2042 const bool allowaddr = get_expose_address(curproc); 2043 2044 if (namelen == 1 && name[0] == CTL_QUERY) 2045 return (sysctl_query(SYSCTLFN_CALL(rnode))); 2046 2047 dp = where = oldp; 2048 buflen = where != NULL ? *oldlenp : 0; 2049 error = 0; 2050 needed = 0; 2051 type = rnode->sysctl_num; 2052 2053 if (type == KERN_PROC) { 2054 if (namelen == 0) 2055 return EINVAL; 2056 switch (op = name[0]) { 2057 case KERN_PROC_ALL: 2058 if (namelen != 1) 2059 return EINVAL; 2060 arg = 0; 2061 break; 2062 default: 2063 if (namelen != 2) 2064 return EINVAL; 2065 arg = name[1]; 2066 break; 2067 } 2068 elem_count = 0; /* Hush little compiler, don't you cry */ 2069 kelem_size = elem_size = sizeof(kbuf->kproc); 2070 } else { 2071 if (namelen != 4) 2072 return EINVAL; 2073 op = name[0]; 2074 arg = name[1]; 2075 elem_size = name[2]; 2076 elem_count = name[3]; 2077 kelem_size = sizeof(kbuf->kproc2); 2078 } 2079 2080 sysctl_unlock(); 2081 2082 kbuf = kmem_zalloc(sizeof(*kbuf), KM_SLEEP); 2083 marker = kmem_alloc(sizeof(*marker), KM_SLEEP); 2084 marker->p_flag = PK_MARKER; 2085 2086 mutex_enter(&proc_lock); 2087 /* 2088 * Start with zombies to prevent reporting processes twice, in case they 2089 * are dying and being moved from the list of alive processes to zombies. 2090 */ 2091 mmmbrains = true; 2092 for (p = LIST_FIRST(&zombproc);; p = next) { 2093 if (p == NULL) { 2094 if (mmmbrains) { 2095 p = LIST_FIRST(&allproc); 2096 mmmbrains = false; 2097 } 2098 if (p == NULL) 2099 break; 2100 } 2101 next = LIST_NEXT(p, p_list); 2102 if ((p->p_flag & PK_MARKER) != 0) 2103 continue; 2104 2105 /* 2106 * Skip embryonic processes. 2107 */ 2108 if (p->p_stat == SIDL) 2109 continue; 2110 2111 mutex_enter(p->p_lock); 2112 error = kauth_authorize_process(l->l_cred, 2113 KAUTH_PROCESS_CANSEE, p, 2114 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_EPROC), NULL, NULL); 2115 if (error != 0) { 2116 mutex_exit(p->p_lock); 2117 continue; 2118 } 2119 2120 /* 2121 * Hande all the operations in one switch on the cost of 2122 * algorithm complexity is on purpose. The win splitting this 2123 * function into several similar copies makes maintenance burden 2124 * burden, code grow and boost is neglible in practical systems. 2125 */ 2126 switch (op) { 2127 case KERN_PROC_PID: 2128 match = (p->p_pid == (pid_t)arg); 2129 break; 2130 2131 case KERN_PROC_PGRP: 2132 match = (p->p_pgrp->pg_id == (pid_t)arg); 2133 break; 2134 2135 case KERN_PROC_SESSION: 2136 match = (p->p_session->s_sid == (pid_t)arg); 2137 break; 2138 2139 case KERN_PROC_TTY: 2140 match = true; 2141 if (arg == (int) KERN_PROC_TTY_REVOKE) { 2142 if ((p->p_lflag & PL_CONTROLT) == 0 || 2143 p->p_session->s_ttyp == NULL || 2144 p->p_session->s_ttyvp != NULL) { 2145 match = false; 2146 } 2147 } else if ((p->p_lflag & PL_CONTROLT) == 0 || 2148 p->p_session->s_ttyp == NULL) { 2149 if ((dev_t)arg != KERN_PROC_TTY_NODEV) { 2150 match = false; 2151 } 2152 } else if (p->p_session->s_ttyp->t_dev != (dev_t)arg) { 2153 match = false; 2154 } 2155 break; 2156 2157 case KERN_PROC_UID: 2158 match = (kauth_cred_geteuid(p->p_cred) == (uid_t)arg); 2159 break; 2160 2161 case KERN_PROC_RUID: 2162 match = (kauth_cred_getuid(p->p_cred) == (uid_t)arg); 2163 break; 2164 2165 case KERN_PROC_GID: 2166 match = (kauth_cred_getegid(p->p_cred) == (uid_t)arg); 2167 break; 2168 2169 case KERN_PROC_RGID: 2170 match = (kauth_cred_getgid(p->p_cred) == (uid_t)arg); 2171 break; 2172 2173 case KERN_PROC_ALL: 2174 match = true; 2175 /* allow everything */ 2176 break; 2177 2178 default: 2179 error = EINVAL; 2180 mutex_exit(p->p_lock); 2181 goto cleanup; 2182 } 2183 if (!match) { 2184 mutex_exit(p->p_lock); 2185 continue; 2186 } 2187 2188 /* 2189 * Grab a hold on the process. 2190 */ 2191 if (mmmbrains) { 2192 zombie = true; 2193 } else { 2194 zombie = !rw_tryenter(&p->p_reflock, RW_READER); 2195 } 2196 if (zombie) { 2197 LIST_INSERT_AFTER(p, marker, p_list); 2198 } 2199 2200 if (buflen >= elem_size && 2201 (type == KERN_PROC || elem_count > 0)) { 2202 ruspace(p); /* Update process vm resource use */ 2203 2204 if (type == KERN_PROC) { 2205 fill_proc(p, &kbuf->kproc.kp_proc, allowaddr); 2206 fill_eproc(p, &kbuf->kproc.kp_eproc, zombie, 2207 allowaddr); 2208 } else { 2209 fill_kproc2(p, &kbuf->kproc2, zombie, 2210 allowaddr); 2211 elem_count--; 2212 } 2213 mutex_exit(p->p_lock); 2214 mutex_exit(&proc_lock); 2215 /* 2216 * Copy out elem_size, but not larger than kelem_size 2217 */ 2218 error = sysctl_copyout(l, kbuf, dp, 2219 uimin(kelem_size, elem_size)); 2220 mutex_enter(&proc_lock); 2221 if (error) { 2222 goto bah; 2223 } 2224 dp += elem_size; 2225 buflen -= elem_size; 2226 } else { 2227 mutex_exit(p->p_lock); 2228 } 2229 needed += elem_size; 2230 2231 /* 2232 * Release reference to process. 2233 */ 2234 if (zombie) { 2235 next = LIST_NEXT(marker, p_list); 2236 LIST_REMOVE(marker, p_list); 2237 } else { 2238 rw_exit(&p->p_reflock); 2239 next = LIST_NEXT(p, p_list); 2240 } 2241 2242 /* 2243 * Short-circuit break quickly! 2244 */ 2245 if (op == KERN_PROC_PID) 2246 break; 2247 } 2248 mutex_exit(&proc_lock); 2249 2250 if (where != NULL) { 2251 *oldlenp = dp - where; 2252 if (needed > *oldlenp) { 2253 error = ENOMEM; 2254 goto out; 2255 } 2256 } else { 2257 needed += KERN_PROCSLOP; 2258 *oldlenp = needed; 2259 } 2260 kmem_free(kbuf, sizeof(*kbuf)); 2261 kmem_free(marker, sizeof(*marker)); 2262 sysctl_relock(); 2263 return 0; 2264 bah: 2265 if (zombie) 2266 LIST_REMOVE(marker, p_list); 2267 else 2268 rw_exit(&p->p_reflock); 2269 cleanup: 2270 mutex_exit(&proc_lock); 2271 out: 2272 kmem_free(kbuf, sizeof(*kbuf)); 2273 kmem_free(marker, sizeof(*marker)); 2274 sysctl_relock(); 2275 return error; 2276 } 2277 2278 int 2279 copyin_psstrings(struct proc *p, struct ps_strings *arginfo) 2280 { 2281 #if !defined(_RUMPKERNEL) 2282 int retval; 2283 2284 if (p->p_flag & PK_32) { 2285 MODULE_HOOK_CALL(kern_proc32_copyin_hook, (p, arginfo), 2286 enosys(), retval); 2287 return retval; 2288 } 2289 #endif /* !defined(_RUMPKERNEL) */ 2290 2291 return copyin_proc(p, (void *)p->p_psstrp, arginfo, sizeof(*arginfo)); 2292 } 2293 2294 static int 2295 copy_procargs_sysctl_cb(void *cookie_, const void *src, size_t off, size_t len) 2296 { 2297 void **cookie = cookie_; 2298 struct lwp *l = cookie[0]; 2299 char *dst = cookie[1]; 2300 2301 return sysctl_copyout(l, src, dst + off, len); 2302 } 2303 2304 /* 2305 * sysctl helper routine for kern.proc_args pseudo-subtree. 2306 */ 2307 static int 2308 sysctl_kern_proc_args(SYSCTLFN_ARGS) 2309 { 2310 struct ps_strings pss; 2311 struct proc *p; 2312 pid_t pid; 2313 int type, error; 2314 void *cookie[2]; 2315 2316 if (namelen == 1 && name[0] == CTL_QUERY) 2317 return (sysctl_query(SYSCTLFN_CALL(rnode))); 2318 2319 if (newp != NULL || namelen != 2) 2320 return (EINVAL); 2321 pid = name[0]; 2322 type = name[1]; 2323 2324 switch (type) { 2325 case KERN_PROC_PATHNAME: 2326 sysctl_unlock(); 2327 error = fill_pathname(l, pid, oldp, oldlenp); 2328 sysctl_relock(); 2329 return error; 2330 2331 case KERN_PROC_CWD: 2332 sysctl_unlock(); 2333 error = fill_cwd(l, pid, oldp, oldlenp); 2334 sysctl_relock(); 2335 return error; 2336 2337 case KERN_PROC_ARGV: 2338 case KERN_PROC_NARGV: 2339 case KERN_PROC_ENV: 2340 case KERN_PROC_NENV: 2341 /* ok */ 2342 break; 2343 default: 2344 return (EINVAL); 2345 } 2346 2347 sysctl_unlock(); 2348 2349 /* check pid */ 2350 mutex_enter(&proc_lock); 2351 if ((p = proc_find(pid)) == NULL) { 2352 error = EINVAL; 2353 goto out_locked; 2354 } 2355 mutex_enter(p->p_lock); 2356 2357 /* Check permission. */ 2358 if (type == KERN_PROC_ARGV || type == KERN_PROC_NARGV) 2359 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE, 2360 p, KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ARGS), NULL, NULL); 2361 else if (type == KERN_PROC_ENV || type == KERN_PROC_NENV) 2362 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE, 2363 p, KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENV), NULL, NULL); 2364 else 2365 error = EINVAL; /* XXXGCC */ 2366 if (error) { 2367 mutex_exit(p->p_lock); 2368 goto out_locked; 2369 } 2370 2371 if (oldp == NULL) { 2372 if (type == KERN_PROC_NARGV || type == KERN_PROC_NENV) 2373 *oldlenp = sizeof (int); 2374 else 2375 *oldlenp = ARG_MAX; /* XXX XXX XXX */ 2376 error = 0; 2377 mutex_exit(p->p_lock); 2378 goto out_locked; 2379 } 2380 2381 /* 2382 * Zombies don't have a stack, so we can't read their psstrings. 2383 * System processes also don't have a user stack. 2384 */ 2385 if (P_ZOMBIE(p) || (p->p_flag & PK_SYSTEM) != 0) { 2386 error = EINVAL; 2387 mutex_exit(p->p_lock); 2388 goto out_locked; 2389 } 2390 2391 error = rw_tryenter(&p->p_reflock, RW_READER) ? 0 : EBUSY; 2392 mutex_exit(p->p_lock); 2393 if (error) { 2394 goto out_locked; 2395 } 2396 mutex_exit(&proc_lock); 2397 2398 if (type == KERN_PROC_NARGV || type == KERN_PROC_NENV) { 2399 int value; 2400 if ((error = copyin_psstrings(p, &pss)) == 0) { 2401 if (type == KERN_PROC_NARGV) 2402 value = pss.ps_nargvstr; 2403 else 2404 value = pss.ps_nenvstr; 2405 error = sysctl_copyout(l, &value, oldp, sizeof(value)); 2406 *oldlenp = sizeof(value); 2407 } 2408 } else { 2409 cookie[0] = l; 2410 cookie[1] = oldp; 2411 error = copy_procargs(p, type, oldlenp, 2412 copy_procargs_sysctl_cb, cookie); 2413 } 2414 rw_exit(&p->p_reflock); 2415 sysctl_relock(); 2416 return error; 2417 2418 out_locked: 2419 mutex_exit(&proc_lock); 2420 sysctl_relock(); 2421 return error; 2422 } 2423 2424 int 2425 copy_procargs(struct proc *p, int oid, size_t *limit, 2426 int (*cb)(void *, const void *, size_t, size_t), void *cookie) 2427 { 2428 struct ps_strings pss; 2429 size_t len, i, loaded, entry_len; 2430 struct uio auio; 2431 struct iovec aiov; 2432 int error, argvlen; 2433 char *arg; 2434 char **argv; 2435 vaddr_t user_argv; 2436 struct vmspace *vmspace; 2437 2438 /* 2439 * Allocate a temporary buffer to hold the argument vector and 2440 * the arguments themselve. 2441 */ 2442 arg = kmem_alloc(PAGE_SIZE, KM_SLEEP); 2443 argv = kmem_alloc(PAGE_SIZE, KM_SLEEP); 2444 2445 /* 2446 * Lock the process down in memory. 2447 */ 2448 vmspace = p->p_vmspace; 2449 uvmspace_addref(vmspace); 2450 2451 /* 2452 * Read in the ps_strings structure. 2453 */ 2454 if ((error = copyin_psstrings(p, &pss)) != 0) 2455 goto done; 2456 2457 /* 2458 * Now read the address of the argument vector. 2459 */ 2460 switch (oid) { 2461 case KERN_PROC_ARGV: 2462 user_argv = (uintptr_t)pss.ps_argvstr; 2463 argvlen = pss.ps_nargvstr; 2464 break; 2465 case KERN_PROC_ENV: 2466 user_argv = (uintptr_t)pss.ps_envstr; 2467 argvlen = pss.ps_nenvstr; 2468 break; 2469 default: 2470 error = EINVAL; 2471 goto done; 2472 } 2473 2474 if (argvlen < 0) { 2475 error = EIO; 2476 goto done; 2477 } 2478 2479 2480 /* 2481 * Now copy each string. 2482 */ 2483 len = 0; /* bytes written to user buffer */ 2484 loaded = 0; /* bytes from argv already processed */ 2485 i = 0; /* To make compiler happy */ 2486 entry_len = PROC_PTRSZ(p); 2487 2488 for (; argvlen; --argvlen) { 2489 int finished = 0; 2490 vaddr_t base; 2491 size_t xlen; 2492 int j; 2493 2494 if (loaded == 0) { 2495 size_t rem = entry_len * argvlen; 2496 loaded = MIN(rem, PAGE_SIZE); 2497 error = copyin_vmspace(vmspace, 2498 (const void *)user_argv, argv, loaded); 2499 if (error) 2500 break; 2501 user_argv += loaded; 2502 i = 0; 2503 } 2504 2505 #if !defined(_RUMPKERNEL) 2506 if (p->p_flag & PK_32) 2507 MODULE_HOOK_CALL(kern_proc32_base_hook, 2508 (argv, i++), 0, base); 2509 else 2510 #endif /* !defined(_RUMPKERNEL) */ 2511 base = (vaddr_t)argv[i++]; 2512 loaded -= entry_len; 2513 2514 /* 2515 * The program has messed around with its arguments, 2516 * possibly deleting some, and replacing them with 2517 * NULL's. Treat this as the last argument and not 2518 * a failure. 2519 */ 2520 if (base == 0) 2521 break; 2522 2523 while (!finished) { 2524 xlen = PAGE_SIZE - (base & PAGE_MASK); 2525 2526 aiov.iov_base = arg; 2527 aiov.iov_len = PAGE_SIZE; 2528 auio.uio_iov = &aiov; 2529 auio.uio_iovcnt = 1; 2530 auio.uio_offset = base; 2531 auio.uio_resid = xlen; 2532 auio.uio_rw = UIO_READ; 2533 UIO_SETUP_SYSSPACE(&auio); 2534 error = uvm_io(&vmspace->vm_map, &auio, 0); 2535 if (error) 2536 goto done; 2537 2538 /* Look for the end of the string */ 2539 for (j = 0; j < xlen; j++) { 2540 if (arg[j] == '\0') { 2541 xlen = j + 1; 2542 finished = 1; 2543 break; 2544 } 2545 } 2546 2547 /* Check for user buffer overflow */ 2548 if (len + xlen > *limit) { 2549 finished = 1; 2550 if (len > *limit) 2551 xlen = 0; 2552 else 2553 xlen = *limit - len; 2554 } 2555 2556 /* Copyout the page */ 2557 error = (*cb)(cookie, arg, len, xlen); 2558 if (error) 2559 goto done; 2560 2561 len += xlen; 2562 base += xlen; 2563 } 2564 } 2565 *limit = len; 2566 2567 done: 2568 kmem_free(argv, PAGE_SIZE); 2569 kmem_free(arg, PAGE_SIZE); 2570 uvmspace_free(vmspace); 2571 return error; 2572 } 2573 2574 /* 2575 * Fill in a proc structure for the specified process. 2576 */ 2577 static void 2578 fill_proc(const struct proc *psrc, struct proc *p, bool allowaddr) 2579 { 2580 COND_SET_STRUCT(p->p_list, psrc->p_list, allowaddr); 2581 memset(&p->p_auxlock, 0, sizeof(p->p_auxlock)); 2582 COND_SET_STRUCT(p->p_lock, psrc->p_lock, allowaddr); 2583 memset(&p->p_stmutex, 0, sizeof(p->p_stmutex)); 2584 memset(&p->p_reflock, 0, sizeof(p->p_reflock)); 2585 COND_SET_STRUCT(p->p_waitcv, psrc->p_waitcv, allowaddr); 2586 COND_SET_STRUCT(p->p_lwpcv, psrc->p_lwpcv, allowaddr); 2587 COND_SET_PTR(p->p_cred, psrc->p_cred, allowaddr); 2588 COND_SET_PTR(p->p_fd, psrc->p_fd, allowaddr); 2589 COND_SET_PTR(p->p_cwdi, psrc->p_cwdi, allowaddr); 2590 COND_SET_PTR(p->p_stats, psrc->p_stats, allowaddr); 2591 COND_SET_PTR(p->p_limit, psrc->p_limit, allowaddr); 2592 COND_SET_PTR(p->p_vmspace, psrc->p_vmspace, allowaddr); 2593 COND_SET_PTR(p->p_sigacts, psrc->p_sigacts, allowaddr); 2594 COND_SET_PTR(p->p_aio, psrc->p_aio, allowaddr); 2595 p->p_mqueue_cnt = psrc->p_mqueue_cnt; 2596 memset(&p->p_specdataref, 0, sizeof(p->p_specdataref)); 2597 p->p_exitsig = psrc->p_exitsig; 2598 p->p_flag = psrc->p_flag; 2599 p->p_sflag = psrc->p_sflag; 2600 p->p_slflag = psrc->p_slflag; 2601 p->p_lflag = psrc->p_lflag; 2602 p->p_stflag = psrc->p_stflag; 2603 p->p_stat = psrc->p_stat; 2604 p->p_trace_enabled = psrc->p_trace_enabled; 2605 p->p_pid = psrc->p_pid; 2606 COND_SET_STRUCT(p->p_pglist, psrc->p_pglist, allowaddr); 2607 COND_SET_PTR(p->p_pptr, psrc->p_pptr, allowaddr); 2608 COND_SET_STRUCT(p->p_sibling, psrc->p_sibling, allowaddr); 2609 COND_SET_STRUCT(p->p_children, psrc->p_children, allowaddr); 2610 COND_SET_STRUCT(p->p_lwps, psrc->p_lwps, allowaddr); 2611 COND_SET_PTR(p->p_raslist, psrc->p_raslist, allowaddr); 2612 p->p_nlwps = psrc->p_nlwps; 2613 p->p_nzlwps = psrc->p_nzlwps; 2614 p->p_nrlwps = psrc->p_nrlwps; 2615 p->p_nlwpwait = psrc->p_nlwpwait; 2616 p->p_ndlwps = psrc->p_ndlwps; 2617 p->p_nstopchild = psrc->p_nstopchild; 2618 p->p_waited = psrc->p_waited; 2619 COND_SET_PTR(p->p_zomblwp, psrc->p_zomblwp, allowaddr); 2620 COND_SET_PTR(p->p_vforklwp, psrc->p_vforklwp, allowaddr); 2621 COND_SET_PTR(p->p_sched_info, psrc->p_sched_info, allowaddr); 2622 p->p_estcpu = psrc->p_estcpu; 2623 p->p_estcpu_inherited = psrc->p_estcpu_inherited; 2624 p->p_forktime = psrc->p_forktime; 2625 p->p_pctcpu = psrc->p_pctcpu; 2626 COND_SET_PTR(p->p_opptr, psrc->p_opptr, allowaddr); 2627 COND_SET_PTR(p->p_timers, psrc->p_timers, allowaddr); 2628 p->p_rtime = psrc->p_rtime; 2629 p->p_uticks = psrc->p_uticks; 2630 p->p_sticks = psrc->p_sticks; 2631 p->p_iticks = psrc->p_iticks; 2632 p->p_xutime = psrc->p_xutime; 2633 p->p_xstime = psrc->p_xstime; 2634 p->p_traceflag = psrc->p_traceflag; 2635 COND_SET_PTR(p->p_tracep, psrc->p_tracep, allowaddr); 2636 COND_SET_PTR(p->p_textvp, psrc->p_textvp, allowaddr); 2637 COND_SET_PTR(p->p_emul, psrc->p_emul, allowaddr); 2638 COND_SET_PTR(p->p_emuldata, psrc->p_emuldata, allowaddr); 2639 COND_SET_CPTR(p->p_execsw, psrc->p_execsw, allowaddr); 2640 COND_SET_STRUCT(p->p_klist, psrc->p_klist, allowaddr); 2641 COND_SET_STRUCT(p->p_sigwaiters, psrc->p_sigwaiters, allowaddr); 2642 COND_SET_STRUCT(p->p_sigpend.sp_info, psrc->p_sigpend.sp_info, 2643 allowaddr); 2644 p->p_sigpend.sp_set = psrc->p_sigpend.sp_set; 2645 COND_SET_PTR(p->p_lwpctl, psrc->p_lwpctl, allowaddr); 2646 p->p_ppid = psrc->p_ppid; 2647 p->p_oppid = psrc->p_oppid; 2648 COND_SET_PTR(p->p_path, psrc->p_path, allowaddr); 2649 p->p_sigctx = psrc->p_sigctx; 2650 p->p_nice = psrc->p_nice; 2651 memcpy(p->p_comm, psrc->p_comm, sizeof(p->p_comm)); 2652 COND_SET_PTR(p->p_pgrp, psrc->p_pgrp, allowaddr); 2653 COND_SET_VALUE(p->p_psstrp, psrc->p_psstrp, allowaddr); 2654 p->p_pax = psrc->p_pax; 2655 p->p_xexit = psrc->p_xexit; 2656 p->p_xsig = psrc->p_xsig; 2657 p->p_acflag = psrc->p_acflag; 2658 COND_SET_STRUCT(p->p_md, psrc->p_md, allowaddr); 2659 p->p_stackbase = psrc->p_stackbase; 2660 COND_SET_PTR(p->p_dtrace, psrc->p_dtrace, allowaddr); 2661 } 2662 2663 /* 2664 * Fill in an eproc structure for the specified process. 2665 */ 2666 void 2667 fill_eproc(struct proc *p, struct eproc *ep, bool zombie, bool allowaddr) 2668 { 2669 struct tty *tp; 2670 struct lwp *l; 2671 2672 KASSERT(mutex_owned(&proc_lock)); 2673 KASSERT(mutex_owned(p->p_lock)); 2674 2675 COND_SET_PTR(ep->e_paddr, p, allowaddr); 2676 COND_SET_PTR(ep->e_sess, p->p_session, allowaddr); 2677 if (p->p_cred) { 2678 kauth_cred_topcred(p->p_cred, &ep->e_pcred); 2679 kauth_cred_toucred(p->p_cred, &ep->e_ucred); 2680 } 2681 if (p->p_stat != SIDL && !P_ZOMBIE(p) && !zombie) { 2682 struct vmspace *vm = p->p_vmspace; 2683 2684 ep->e_vm.vm_rssize = vm_resident_count(vm); 2685 ep->e_vm.vm_tsize = vm->vm_tsize; 2686 ep->e_vm.vm_dsize = vm->vm_dsize; 2687 ep->e_vm.vm_ssize = vm->vm_ssize; 2688 ep->e_vm.vm_map.size = vm->vm_map.size; 2689 2690 /* Pick the primary (first) LWP */ 2691 l = proc_active_lwp(p); 2692 KASSERT(l != NULL); 2693 lwp_lock(l); 2694 if (l->l_wchan) 2695 strncpy(ep->e_wmesg, l->l_wmesg, WMESGLEN); 2696 lwp_unlock(l); 2697 } 2698 ep->e_ppid = p->p_ppid; 2699 if (p->p_pgrp && p->p_session) { 2700 ep->e_pgid = p->p_pgrp->pg_id; 2701 ep->e_jobc = p->p_pgrp->pg_jobc; 2702 ep->e_sid = p->p_session->s_sid; 2703 if ((p->p_lflag & PL_CONTROLT) && 2704 (tp = p->p_session->s_ttyp)) { 2705 ep->e_tdev = tp->t_dev; 2706 ep->e_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PGID; 2707 COND_SET_PTR(ep->e_tsess, tp->t_session, allowaddr); 2708 } else 2709 ep->e_tdev = (uint32_t)NODEV; 2710 ep->e_flag = p->p_session->s_ttyvp ? EPROC_CTTY : 0; 2711 if (SESS_LEADER(p)) 2712 ep->e_flag |= EPROC_SLEADER; 2713 strncpy(ep->e_login, p->p_session->s_login, MAXLOGNAME); 2714 } 2715 ep->e_xsize = ep->e_xrssize = 0; 2716 ep->e_xccount = ep->e_xswrss = 0; 2717 } 2718 2719 /* 2720 * Fill in a kinfo_proc2 structure for the specified process. 2721 */ 2722 void 2723 fill_kproc2(struct proc *p, struct kinfo_proc2 *ki, bool zombie, bool allowaddr) 2724 { 2725 struct tty *tp; 2726 struct lwp *l, *l2; 2727 struct timeval ut, st, rt; 2728 sigset_t ss1, ss2; 2729 struct rusage ru; 2730 struct vmspace *vm; 2731 2732 KASSERT(mutex_owned(&proc_lock)); 2733 KASSERT(mutex_owned(p->p_lock)); 2734 2735 sigemptyset(&ss1); 2736 sigemptyset(&ss2); 2737 2738 COND_SET_VALUE(ki->p_paddr, PTRTOUINT64(p), allowaddr); 2739 COND_SET_VALUE(ki->p_fd, PTRTOUINT64(p->p_fd), allowaddr); 2740 COND_SET_VALUE(ki->p_cwdi, PTRTOUINT64(p->p_cwdi), allowaddr); 2741 COND_SET_VALUE(ki->p_stats, PTRTOUINT64(p->p_stats), allowaddr); 2742 COND_SET_VALUE(ki->p_limit, PTRTOUINT64(p->p_limit), allowaddr); 2743 COND_SET_VALUE(ki->p_vmspace, PTRTOUINT64(p->p_vmspace), allowaddr); 2744 COND_SET_VALUE(ki->p_sigacts, PTRTOUINT64(p->p_sigacts), allowaddr); 2745 COND_SET_VALUE(ki->p_sess, PTRTOUINT64(p->p_session), allowaddr); 2746 ki->p_tsess = 0; /* may be changed if controlling tty below */ 2747 COND_SET_VALUE(ki->p_ru, PTRTOUINT64(&p->p_stats->p_ru), allowaddr); 2748 ki->p_eflag = 0; 2749 ki->p_exitsig = p->p_exitsig; 2750 ki->p_flag = L_INMEM; /* Process never swapped out */ 2751 ki->p_flag |= sysctl_map_flags(sysctl_flagmap, p->p_flag); 2752 ki->p_flag |= sysctl_map_flags(sysctl_sflagmap, p->p_sflag); 2753 ki->p_flag |= sysctl_map_flags(sysctl_slflagmap, p->p_slflag); 2754 ki->p_flag |= sysctl_map_flags(sysctl_lflagmap, p->p_lflag); 2755 ki->p_flag |= sysctl_map_flags(sysctl_stflagmap, p->p_stflag); 2756 ki->p_pid = p->p_pid; 2757 ki->p_ppid = p->p_ppid; 2758 ki->p_uid = kauth_cred_geteuid(p->p_cred); 2759 ki->p_ruid = kauth_cred_getuid(p->p_cred); 2760 ki->p_gid = kauth_cred_getegid(p->p_cred); 2761 ki->p_rgid = kauth_cred_getgid(p->p_cred); 2762 ki->p_svuid = kauth_cred_getsvuid(p->p_cred); 2763 ki->p_svgid = kauth_cred_getsvgid(p->p_cred); 2764 ki->p_ngroups = kauth_cred_ngroups(p->p_cred); 2765 kauth_cred_getgroups(p->p_cred, ki->p_groups, 2766 uimin(ki->p_ngroups, sizeof(ki->p_groups) / sizeof(ki->p_groups[0])), 2767 UIO_SYSSPACE); 2768 2769 ki->p_uticks = p->p_uticks; 2770 ki->p_sticks = p->p_sticks; 2771 ki->p_iticks = p->p_iticks; 2772 ki->p_tpgid = NO_PGID; /* may be changed if controlling tty below */ 2773 COND_SET_VALUE(ki->p_tracep, PTRTOUINT64(p->p_tracep), allowaddr); 2774 ki->p_traceflag = p->p_traceflag; 2775 2776 memcpy(&ki->p_sigignore, &p->p_sigctx.ps_sigignore,sizeof(ki_sigset_t)); 2777 memcpy(&ki->p_sigcatch, &p->p_sigctx.ps_sigcatch, sizeof(ki_sigset_t)); 2778 2779 ki->p_cpticks = 0; 2780 ki->p_pctcpu = p->p_pctcpu; 2781 ki->p_estcpu = 0; 2782 ki->p_stat = p->p_stat; /* Will likely be overridden by LWP status */ 2783 ki->p_realstat = p->p_stat; 2784 ki->p_nice = p->p_nice; 2785 ki->p_xstat = P_WAITSTATUS(p); 2786 ki->p_acflag = p->p_acflag; 2787 2788 strncpy(ki->p_comm, p->p_comm, 2789 uimin(sizeof(ki->p_comm), sizeof(p->p_comm))); 2790 strncpy(ki->p_ename, p->p_emul->e_name, sizeof(ki->p_ename)); 2791 2792 ki->p_nlwps = p->p_nlwps; 2793 ki->p_realflag = ki->p_flag; 2794 2795 if (p->p_stat != SIDL && !P_ZOMBIE(p) && !zombie) { 2796 vm = p->p_vmspace; 2797 ki->p_vm_rssize = vm_resident_count(vm); 2798 ki->p_vm_tsize = vm->vm_tsize; 2799 ki->p_vm_dsize = vm->vm_dsize; 2800 ki->p_vm_ssize = vm->vm_ssize; 2801 ki->p_vm_vsize = atop(vm->vm_map.size); 2802 /* 2803 * Since the stack is initially mapped mostly with 2804 * PROT_NONE and grown as needed, adjust the "mapped size" 2805 * to skip the unused stack portion. 2806 */ 2807 ki->p_vm_msize = 2808 atop(vm->vm_map.size) - vm->vm_issize + vm->vm_ssize; 2809 2810 /* Pick the primary (first) LWP */ 2811 l = proc_active_lwp(p); 2812 KASSERT(l != NULL); 2813 lwp_lock(l); 2814 ki->p_nrlwps = p->p_nrlwps; 2815 ki->p_forw = 0; 2816 ki->p_back = 0; 2817 COND_SET_VALUE(ki->p_addr, PTRTOUINT64(l->l_addr), allowaddr); 2818 ki->p_stat = l->l_stat; 2819 ki->p_flag |= sysctl_map_flags(sysctl_lwpflagmap, l->l_flag); 2820 ki->p_swtime = l->l_swtime; 2821 ki->p_slptime = l->l_slptime; 2822 if (l->l_stat == LSONPROC) 2823 ki->p_schedflags = l->l_cpu->ci_schedstate.spc_flags; 2824 else 2825 ki->p_schedflags = 0; 2826 ki->p_priority = lwp_eprio(l); 2827 ki->p_usrpri = l->l_priority; 2828 if (l->l_wchan) 2829 strncpy(ki->p_wmesg, l->l_wmesg, sizeof(ki->p_wmesg)); 2830 COND_SET_VALUE(ki->p_wchan, PTRTOUINT64(l->l_wchan), allowaddr); 2831 ki->p_cpuid = cpu_index(l->l_cpu); 2832 lwp_unlock(l); 2833 LIST_FOREACH(l, &p->p_lwps, l_sibling) { 2834 /* This is hardly correct, but... */ 2835 sigplusset(&l->l_sigpend.sp_set, &ss1); 2836 sigplusset(&l->l_sigmask, &ss2); 2837 ki->p_cpticks += l->l_cpticks; 2838 ki->p_pctcpu += l->l_pctcpu; 2839 ki->p_estcpu += l->l_estcpu; 2840 } 2841 } 2842 sigplusset(&p->p_sigpend.sp_set, &ss1); 2843 memcpy(&ki->p_siglist, &ss1, sizeof(ki_sigset_t)); 2844 memcpy(&ki->p_sigmask, &ss2, sizeof(ki_sigset_t)); 2845 2846 if (p->p_session != NULL) { 2847 ki->p_sid = p->p_session->s_sid; 2848 ki->p__pgid = p->p_pgrp->pg_id; 2849 if (p->p_session->s_ttyvp) 2850 ki->p_eflag |= EPROC_CTTY; 2851 if (SESS_LEADER(p)) 2852 ki->p_eflag |= EPROC_SLEADER; 2853 strncpy(ki->p_login, p->p_session->s_login, 2854 uimin(sizeof ki->p_login - 1, sizeof p->p_session->s_login)); 2855 ki->p_jobc = p->p_pgrp->pg_jobc; 2856 if ((p->p_lflag & PL_CONTROLT) && (tp = p->p_session->s_ttyp)) { 2857 ki->p_tdev = tp->t_dev; 2858 ki->p_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PGID; 2859 COND_SET_VALUE(ki->p_tsess, PTRTOUINT64(tp->t_session), 2860 allowaddr); 2861 } else { 2862 ki->p_tdev = (int32_t)NODEV; 2863 } 2864 } 2865 2866 if (!P_ZOMBIE(p) && !zombie) { 2867 ki->p_uvalid = 1; 2868 ki->p_ustart_sec = p->p_stats->p_start.tv_sec; 2869 ki->p_ustart_usec = p->p_stats->p_start.tv_usec; 2870 2871 calcru(p, &ut, &st, NULL, &rt); 2872 ki->p_rtime_sec = rt.tv_sec; 2873 ki->p_rtime_usec = rt.tv_usec; 2874 ki->p_uutime_sec = ut.tv_sec; 2875 ki->p_uutime_usec = ut.tv_usec; 2876 ki->p_ustime_sec = st.tv_sec; 2877 ki->p_ustime_usec = st.tv_usec; 2878 2879 memcpy(&ru, &p->p_stats->p_ru, sizeof(ru)); 2880 ki->p_uru_nvcsw = 0; 2881 ki->p_uru_nivcsw = 0; 2882 LIST_FOREACH(l2, &p->p_lwps, l_sibling) { 2883 ki->p_uru_nvcsw += (l2->l_ncsw - l2->l_nivcsw); 2884 ki->p_uru_nivcsw += l2->l_nivcsw; 2885 ruadd(&ru, &l2->l_ru); 2886 } 2887 ki->p_uru_maxrss = ru.ru_maxrss; 2888 ki->p_uru_ixrss = ru.ru_ixrss; 2889 ki->p_uru_idrss = ru.ru_idrss; 2890 ki->p_uru_isrss = ru.ru_isrss; 2891 ki->p_uru_minflt = ru.ru_minflt; 2892 ki->p_uru_majflt = ru.ru_majflt; 2893 ki->p_uru_nswap = ru.ru_nswap; 2894 ki->p_uru_inblock = ru.ru_inblock; 2895 ki->p_uru_oublock = ru.ru_oublock; 2896 ki->p_uru_msgsnd = ru.ru_msgsnd; 2897 ki->p_uru_msgrcv = ru.ru_msgrcv; 2898 ki->p_uru_nsignals = ru.ru_nsignals; 2899 2900 timeradd(&p->p_stats->p_cru.ru_utime, 2901 &p->p_stats->p_cru.ru_stime, &ut); 2902 ki->p_uctime_sec = ut.tv_sec; 2903 ki->p_uctime_usec = ut.tv_usec; 2904 } 2905 } 2906 2907 2908 int 2909 proc_find_locked(struct lwp *l, struct proc **p, pid_t pid) 2910 { 2911 int error; 2912 2913 mutex_enter(&proc_lock); 2914 if (pid == -1) 2915 *p = l->l_proc; 2916 else 2917 *p = proc_find(pid); 2918 2919 if (*p == NULL) { 2920 if (pid != -1) 2921 mutex_exit(&proc_lock); 2922 return ESRCH; 2923 } 2924 if (pid != -1) 2925 mutex_enter((*p)->p_lock); 2926 mutex_exit(&proc_lock); 2927 2928 error = kauth_authorize_process(l->l_cred, 2929 KAUTH_PROCESS_CANSEE, *p, 2930 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL); 2931 if (error) { 2932 if (pid != -1) 2933 mutex_exit((*p)->p_lock); 2934 } 2935 return error; 2936 } 2937 2938 static int 2939 fill_pathname(struct lwp *l, pid_t pid, void *oldp, size_t *oldlenp) 2940 { 2941 int error; 2942 struct proc *p; 2943 2944 if ((error = proc_find_locked(l, &p, pid)) != 0) 2945 return error; 2946 2947 if (p->p_path == NULL) { 2948 if (pid != -1) 2949 mutex_exit(p->p_lock); 2950 return ENOENT; 2951 } 2952 2953 size_t len = strlen(p->p_path) + 1; 2954 if (oldp != NULL) { 2955 size_t copylen = uimin(len, *oldlenp); 2956 error = sysctl_copyout(l, p->p_path, oldp, copylen); 2957 if (error == 0 && *oldlenp < len) 2958 error = ENOSPC; 2959 } 2960 *oldlenp = len; 2961 if (pid != -1) 2962 mutex_exit(p->p_lock); 2963 return error; 2964 } 2965 2966 static int 2967 fill_cwd(struct lwp *l, pid_t pid, void *oldp, size_t *oldlenp) 2968 { 2969 int error; 2970 struct proc *p; 2971 char *path; 2972 char *bp, *bend; 2973 struct cwdinfo *cwdi; 2974 struct vnode *vp; 2975 size_t len, lenused; 2976 2977 if ((error = proc_find_locked(l, &p, pid)) != 0) 2978 return error; 2979 2980 len = MAXPATHLEN * 4; 2981 2982 path = kmem_alloc(len, KM_SLEEP); 2983 2984 bp = &path[len]; 2985 bend = bp; 2986 *(--bp) = '\0'; 2987 2988 cwdi = p->p_cwdi; 2989 rw_enter(&cwdi->cwdi_lock, RW_READER); 2990 vp = cwdi->cwdi_cdir; 2991 error = getcwd_common(vp, NULL, &bp, path, len/2, 0, l); 2992 rw_exit(&cwdi->cwdi_lock); 2993 2994 if (error) 2995 goto out; 2996 2997 lenused = bend - bp; 2998 2999 if (oldp != NULL) { 3000 size_t copylen = uimin(lenused, *oldlenp); 3001 error = sysctl_copyout(l, bp, oldp, copylen); 3002 if (error == 0 && *oldlenp < lenused) 3003 error = ENOSPC; 3004 } 3005 *oldlenp = lenused; 3006 out: 3007 if (pid != -1) 3008 mutex_exit(p->p_lock); 3009 kmem_free(path, len); 3010 return error; 3011 } 3012 3013 int 3014 proc_getauxv(struct proc *p, void **buf, size_t *len) 3015 { 3016 struct ps_strings pss; 3017 int error; 3018 void *uauxv, *kauxv; 3019 size_t size; 3020 3021 if ((error = copyin_psstrings(p, &pss)) != 0) 3022 return error; 3023 if (pss.ps_envstr == NULL) 3024 return EIO; 3025 3026 size = p->p_execsw->es_arglen; 3027 if (size == 0) 3028 return EIO; 3029 3030 size_t ptrsz = PROC_PTRSZ(p); 3031 uauxv = (void *)((char *)pss.ps_envstr + (pss.ps_nenvstr + 1) * ptrsz); 3032 3033 kauxv = kmem_alloc(size, KM_SLEEP); 3034 3035 error = copyin_proc(p, uauxv, kauxv, size); 3036 if (error) { 3037 kmem_free(kauxv, size); 3038 return error; 3039 } 3040 3041 *buf = kauxv; 3042 *len = size; 3043 3044 return 0; 3045 } 3046 3047 3048 static int 3049 sysctl_security_expose_address(SYSCTLFN_ARGS) 3050 { 3051 int expose_address, error; 3052 struct sysctlnode node; 3053 3054 node = *rnode; 3055 node.sysctl_data = &expose_address; 3056 expose_address = *(int *)rnode->sysctl_data; 3057 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 3058 if (error || newp == NULL) 3059 return error; 3060 3061 if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_KERNADDR, 3062 0, NULL, NULL, NULL)) 3063 return EPERM; 3064 3065 switch (expose_address) { 3066 case 0: 3067 case 1: 3068 case 2: 3069 break; 3070 default: 3071 return EINVAL; 3072 } 3073 3074 *(int *)rnode->sysctl_data = expose_address; 3075 3076 return 0; 3077 } 3078 3079 bool 3080 get_expose_address(struct proc *p) 3081 { 3082 /* allow only if sysctl variable is set or privileged */ 3083 return kauth_authorize_process(kauth_cred_get(), KAUTH_PROCESS_CANSEE, 3084 p, KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_KPTR), NULL, NULL) == 0; 3085 } 3086