1 /* $NetBSD: kern_lwp.c,v 1.243 2021/01/13 07:36:56 skrll Exp $ */ 2 3 /*- 4 * Copyright (c) 2001, 2006, 2007, 2008, 2009, 2019, 2020 5 * The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Nathan J. Williams, and Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Overview 35 * 36 * Lightweight processes (LWPs) are the basic unit or thread of 37 * execution within the kernel. The core state of an LWP is described 38 * by "struct lwp", also known as lwp_t. 39 * 40 * Each LWP is contained within a process (described by "struct proc"), 41 * Every process contains at least one LWP, but may contain more. The 42 * process describes attributes shared among all of its LWPs such as a 43 * private address space, global execution state (stopped, active, 44 * zombie, ...), signal disposition and so on. On a multiprocessor 45 * machine, multiple LWPs be executing concurrently in the kernel. 46 * 47 * Execution states 48 * 49 * At any given time, an LWP has overall state that is described by 50 * lwp::l_stat. The states are broken into two sets below. The first 51 * set is guaranteed to represent the absolute, current state of the 52 * LWP: 53 * 54 * LSONPROC 55 * 56 * On processor: the LWP is executing on a CPU, either in the 57 * kernel or in user space. 58 * 59 * LSRUN 60 * 61 * Runnable: the LWP is parked on a run queue, and may soon be 62 * chosen to run by an idle processor, or by a processor that 63 * has been asked to preempt a currently runnning but lower 64 * priority LWP. 65 * 66 * LSIDL 67 * 68 * Idle: the LWP has been created but has not yet executed, or 69 * it has ceased executing a unit of work and is waiting to be 70 * started again. This state exists so that the LWP can occupy 71 * a slot in the process & PID table, but without having to 72 * worry about being touched; lookups of the LWP by ID will 73 * fail while in this state. The LWP will become visible for 74 * lookup once its state transitions further. Some special 75 * kernel threads also (ab)use this state to indicate that they 76 * are idle (soft interrupts and idle LWPs). 77 * 78 * LSSUSPENDED: 79 * 80 * Suspended: the LWP has had its execution suspended by 81 * another LWP in the same process using the _lwp_suspend() 82 * system call. User-level LWPs also enter the suspended 83 * state when the system is shutting down. 84 * 85 * The second set represent a "statement of intent" on behalf of the 86 * LWP. The LWP may in fact be executing on a processor, may be 87 * sleeping or idle. It is expected to take the necessary action to 88 * stop executing or become "running" again within a short timeframe. 89 * The LP_RUNNING flag in lwp::l_pflag indicates that an LWP is running. 90 * Importantly, it indicates that its state is tied to a CPU. 91 * 92 * LSZOMB: 93 * 94 * Dead or dying: the LWP has released most of its resources 95 * and is about to switch away into oblivion, or has already 96 * switched away. When it switches away, its few remaining 97 * resources can be collected. 98 * 99 * LSSLEEP: 100 * 101 * Sleeping: the LWP has entered itself onto a sleep queue, and 102 * has switched away or will switch away shortly to allow other 103 * LWPs to run on the CPU. 104 * 105 * LSSTOP: 106 * 107 * Stopped: the LWP has been stopped as a result of a job 108 * control signal, or as a result of the ptrace() interface. 109 * 110 * Stopped LWPs may run briefly within the kernel to handle 111 * signals that they receive, but will not return to user space 112 * until their process' state is changed away from stopped. 113 * 114 * Single LWPs within a process can not be set stopped 115 * selectively: all actions that can stop or continue LWPs 116 * occur at the process level. 117 * 118 * State transitions 119 * 120 * Note that the LSSTOP state may only be set when returning to 121 * user space in userret(), or when sleeping interruptably. The 122 * LSSUSPENDED state may only be set in userret(). Before setting 123 * those states, we try to ensure that the LWPs will release all 124 * locks that they hold, and at a minimum try to ensure that the 125 * LWP can be set runnable again by a signal. 126 * 127 * LWPs may transition states in the following ways: 128 * 129 * RUN -------> ONPROC ONPROC -----> RUN 130 * > SLEEP 131 * > STOPPED 132 * > SUSPENDED 133 * > ZOMB 134 * > IDL (special cases) 135 * 136 * STOPPED ---> RUN SUSPENDED --> RUN 137 * > SLEEP 138 * 139 * SLEEP -----> ONPROC IDL --------> RUN 140 * > RUN > SUSPENDED 141 * > STOPPED > STOPPED 142 * > ONPROC (special cases) 143 * 144 * Some state transitions are only possible with kernel threads (eg 145 * ONPROC -> IDL) and happen under tightly controlled circumstances 146 * free of unwanted side effects. 147 * 148 * Migration 149 * 150 * Migration of threads from one CPU to another could be performed 151 * internally by the scheduler via sched_takecpu() or sched_catchlwp() 152 * functions. The universal lwp_migrate() function should be used for 153 * any other cases. Subsystems in the kernel must be aware that CPU 154 * of LWP may change, while it is not locked. 155 * 156 * Locking 157 * 158 * The majority of fields in 'struct lwp' are covered by a single, 159 * general spin lock pointed to by lwp::l_mutex. The locks covering 160 * each field are documented in sys/lwp.h. 161 * 162 * State transitions must be made with the LWP's general lock held, 163 * and may cause the LWP's lock pointer to change. Manipulation of 164 * the general lock is not performed directly, but through calls to 165 * lwp_lock(), lwp_unlock() and others. It should be noted that the 166 * adaptive locks are not allowed to be released while the LWP's lock 167 * is being held (unlike for other spin-locks). 168 * 169 * States and their associated locks: 170 * 171 * LSIDL, LSONPROC, LSZOMB, LSSUPENDED: 172 * 173 * Always covered by spc_lwplock, which protects LWPs not 174 * associated with any other sync object. This is a per-CPU 175 * lock and matches lwp::l_cpu. 176 * 177 * LSRUN: 178 * 179 * Always covered by spc_mutex, which protects the run queues. 180 * This is a per-CPU lock and matches lwp::l_cpu. 181 * 182 * LSSLEEP: 183 * 184 * Covered by a lock associated with the sleep queue (sometimes 185 * a turnstile sleep queue) that the LWP resides on. This can 186 * be spc_lwplock for SOBJ_SLEEPQ_NULL (an "untracked" sleep). 187 * 188 * LSSTOP: 189 * 190 * If the LWP was previously sleeping (l_wchan != NULL), then 191 * l_mutex references the sleep queue lock. If the LWP was 192 * runnable or on the CPU when halted, or has been removed from 193 * the sleep queue since halted, then the lock is spc_lwplock. 194 * 195 * The lock order is as follows: 196 * 197 * sleepq -> turnstile -> spc_lwplock -> spc_mutex 198 * 199 * Each process has a scheduler state lock (proc::p_lock), and a 200 * number of counters on LWPs and their states: p_nzlwps, p_nrlwps, and 201 * so on. When an LWP is to be entered into or removed from one of the 202 * following states, p_lock must be held and the process wide counters 203 * adjusted: 204 * 205 * LSIDL, LSZOMB, LSSTOP, LSSUSPENDED 206 * 207 * (But not always for kernel threads. There are some special cases 208 * as mentioned above: soft interrupts, and the idle loops.) 209 * 210 * Note that an LWP is considered running or likely to run soon if in 211 * one of the following states. This affects the value of p_nrlwps: 212 * 213 * LSRUN, LSONPROC, LSSLEEP 214 * 215 * p_lock does not need to be held when transitioning among these 216 * three states, hence p_lock is rarely taken for state transitions. 217 */ 218 219 #include <sys/cdefs.h> 220 __KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.243 2021/01/13 07:36:56 skrll Exp $"); 221 222 #include "opt_ddb.h" 223 #include "opt_lockdebug.h" 224 #include "opt_dtrace.h" 225 226 #define _LWP_API_PRIVATE 227 228 #include <sys/param.h> 229 #include <sys/systm.h> 230 #include <sys/cpu.h> 231 #include <sys/pool.h> 232 #include <sys/proc.h> 233 #include <sys/syscallargs.h> 234 #include <sys/syscall_stats.h> 235 #include <sys/kauth.h> 236 #include <sys/sleepq.h> 237 #include <sys/lockdebug.h> 238 #include <sys/kmem.h> 239 #include <sys/pset.h> 240 #include <sys/intr.h> 241 #include <sys/lwpctl.h> 242 #include <sys/atomic.h> 243 #include <sys/filedesc.h> 244 #include <sys/fstrans.h> 245 #include <sys/dtrace_bsd.h> 246 #include <sys/sdt.h> 247 #include <sys/ptrace.h> 248 #include <sys/xcall.h> 249 #include <sys/uidinfo.h> 250 #include <sys/sysctl.h> 251 #include <sys/psref.h> 252 #include <sys/msan.h> 253 #include <sys/kcov.h> 254 #include <sys/cprng.h> 255 #include <sys/futex.h> 256 257 #include <uvm/uvm_extern.h> 258 #include <uvm/uvm_object.h> 259 260 static pool_cache_t lwp_cache __read_mostly; 261 struct lwplist alllwp __cacheline_aligned; 262 263 static int lwp_ctor(void *, void *, int); 264 static void lwp_dtor(void *, void *); 265 266 /* DTrace proc provider probes */ 267 SDT_PROVIDER_DEFINE(proc); 268 269 SDT_PROBE_DEFINE1(proc, kernel, , lwp__create, "struct lwp *"); 270 SDT_PROBE_DEFINE1(proc, kernel, , lwp__start, "struct lwp *"); 271 SDT_PROBE_DEFINE1(proc, kernel, , lwp__exit, "struct lwp *"); 272 273 struct turnstile turnstile0 __cacheline_aligned; 274 struct lwp lwp0 __aligned(MIN_LWP_ALIGNMENT) = { 275 #ifdef LWP0_CPU_INFO 276 .l_cpu = LWP0_CPU_INFO, 277 #endif 278 #ifdef LWP0_MD_INITIALIZER 279 .l_md = LWP0_MD_INITIALIZER, 280 #endif 281 .l_proc = &proc0, 282 .l_lid = 0, /* we own proc0's slot in the pid table */ 283 .l_flag = LW_SYSTEM, 284 .l_stat = LSONPROC, 285 .l_ts = &turnstile0, 286 .l_syncobj = &sched_syncobj, 287 .l_refcnt = 0, 288 .l_priority = PRI_USER + NPRI_USER - 1, 289 .l_inheritedprio = -1, 290 .l_class = SCHED_OTHER, 291 .l_psid = PS_NONE, 292 .l_pi_lenders = SLIST_HEAD_INITIALIZER(&lwp0.l_pi_lenders), 293 .l_name = __UNCONST("swapper"), 294 .l_fd = &filedesc0, 295 }; 296 297 static int sysctl_kern_maxlwp(SYSCTLFN_PROTO); 298 299 /* 300 * sysctl helper routine for kern.maxlwp. Ensures that the new 301 * values are not too low or too high. 302 */ 303 static int 304 sysctl_kern_maxlwp(SYSCTLFN_ARGS) 305 { 306 int error, nmaxlwp; 307 struct sysctlnode node; 308 309 nmaxlwp = maxlwp; 310 node = *rnode; 311 node.sysctl_data = &nmaxlwp; 312 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 313 if (error || newp == NULL) 314 return error; 315 316 if (nmaxlwp < 0 || nmaxlwp >= 65536) 317 return EINVAL; 318 if (nmaxlwp > cpu_maxlwp()) 319 return EINVAL; 320 maxlwp = nmaxlwp; 321 322 return 0; 323 } 324 325 static void 326 sysctl_kern_lwp_setup(void) 327 { 328 sysctl_createv(NULL, 0, NULL, NULL, 329 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 330 CTLTYPE_INT, "maxlwp", 331 SYSCTL_DESCR("Maximum number of simultaneous threads"), 332 sysctl_kern_maxlwp, 0, NULL, 0, 333 CTL_KERN, CTL_CREATE, CTL_EOL); 334 } 335 336 void 337 lwpinit(void) 338 { 339 340 LIST_INIT(&alllwp); 341 lwpinit_specificdata(); 342 lwp_cache = pool_cache_init(sizeof(lwp_t), MIN_LWP_ALIGNMENT, 0, 0, 343 "lwppl", NULL, IPL_NONE, lwp_ctor, lwp_dtor, NULL); 344 345 maxlwp = cpu_maxlwp(); 346 sysctl_kern_lwp_setup(); 347 } 348 349 void 350 lwp0_init(void) 351 { 352 struct lwp *l = &lwp0; 353 354 KASSERT((void *)uvm_lwp_getuarea(l) != NULL); 355 356 LIST_INSERT_HEAD(&alllwp, l, l_list); 357 358 callout_init(&l->l_timeout_ch, CALLOUT_MPSAFE); 359 callout_setfunc(&l->l_timeout_ch, sleepq_timeout, l); 360 cv_init(&l->l_sigcv, "sigwait"); 361 cv_init(&l->l_waitcv, "vfork"); 362 363 kauth_cred_hold(proc0.p_cred); 364 l->l_cred = proc0.p_cred; 365 366 kdtrace_thread_ctor(NULL, l); 367 lwp_initspecific(l); 368 369 SYSCALL_TIME_LWP_INIT(l); 370 } 371 372 /* 373 * Initialize the non-zeroed portion of an lwp_t. 374 */ 375 static int 376 lwp_ctor(void *arg, void *obj, int flags) 377 { 378 lwp_t *l = obj; 379 380 l->l_stat = LSIDL; 381 l->l_cpu = curcpu(); 382 l->l_mutex = l->l_cpu->ci_schedstate.spc_lwplock; 383 l->l_ts = pool_get(&turnstile_pool, flags); 384 385 if (l->l_ts == NULL) { 386 return ENOMEM; 387 } else { 388 turnstile_ctor(l->l_ts); 389 return 0; 390 } 391 } 392 393 static void 394 lwp_dtor(void *arg, void *obj) 395 { 396 lwp_t *l = obj; 397 (void)l; 398 399 /* 400 * Provide a barrier to ensure that all mutex_oncpu() and rw_oncpu() 401 * calls will exit before memory of LWP is returned to the pool, where 402 * KVA of LWP structure might be freed and re-used for other purposes. 403 * Kernel preemption is disabled around mutex_oncpu() and rw_oncpu() 404 * callers, therefore cross-call to all CPUs will do the job. Also, 405 * the value of l->l_cpu must be still valid at this point. 406 * 407 * XXX should use epoch based reclamation. 408 */ 409 KASSERT(l->l_cpu != NULL); 410 xc_barrier(0); 411 412 /* 413 * We can't return turnstile0 to the pool (it didn't come from it), 414 * so if it comes up just drop it quietly and move on. 415 */ 416 if (l->l_ts != &turnstile0) 417 pool_put(&turnstile_pool, l->l_ts); 418 } 419 420 /* 421 * Set an LWP suspended. 422 * 423 * Must be called with p_lock held, and the LWP locked. Will unlock the 424 * LWP before return. 425 */ 426 int 427 lwp_suspend(struct lwp *curl, struct lwp *t) 428 { 429 int error; 430 431 KASSERT(mutex_owned(t->l_proc->p_lock)); 432 KASSERT(lwp_locked(t, NULL)); 433 434 KASSERT(curl != t || curl->l_stat == LSONPROC); 435 436 /* 437 * If the current LWP has been told to exit, we must not suspend anyone 438 * else or deadlock could occur. We won't return to userspace. 439 */ 440 if ((curl->l_flag & (LW_WEXIT | LW_WCORE)) != 0) { 441 lwp_unlock(t); 442 return (EDEADLK); 443 } 444 445 if ((t->l_flag & LW_DBGSUSPEND) != 0) { 446 lwp_unlock(t); 447 return 0; 448 } 449 450 error = 0; 451 452 switch (t->l_stat) { 453 case LSRUN: 454 case LSONPROC: 455 t->l_flag |= LW_WSUSPEND; 456 lwp_need_userret(t); 457 lwp_unlock(t); 458 break; 459 460 case LSSLEEP: 461 t->l_flag |= LW_WSUSPEND; 462 463 /* 464 * Kick the LWP and try to get it to the kernel boundary 465 * so that it will release any locks that it holds. 466 * setrunnable() will release the lock. 467 */ 468 if ((t->l_flag & LW_SINTR) != 0) 469 setrunnable(t); 470 else 471 lwp_unlock(t); 472 break; 473 474 case LSSUSPENDED: 475 lwp_unlock(t); 476 break; 477 478 case LSSTOP: 479 t->l_flag |= LW_WSUSPEND; 480 setrunnable(t); 481 break; 482 483 case LSIDL: 484 case LSZOMB: 485 error = EINTR; /* It's what Solaris does..... */ 486 lwp_unlock(t); 487 break; 488 } 489 490 return (error); 491 } 492 493 /* 494 * Restart a suspended LWP. 495 * 496 * Must be called with p_lock held, and the LWP locked. Will unlock the 497 * LWP before return. 498 */ 499 void 500 lwp_continue(struct lwp *l) 501 { 502 503 KASSERT(mutex_owned(l->l_proc->p_lock)); 504 KASSERT(lwp_locked(l, NULL)); 505 506 /* If rebooting or not suspended, then just bail out. */ 507 if ((l->l_flag & LW_WREBOOT) != 0) { 508 lwp_unlock(l); 509 return; 510 } 511 512 l->l_flag &= ~LW_WSUSPEND; 513 514 if (l->l_stat != LSSUSPENDED || (l->l_flag & LW_DBGSUSPEND) != 0) { 515 lwp_unlock(l); 516 return; 517 } 518 519 /* setrunnable() will release the lock. */ 520 setrunnable(l); 521 } 522 523 /* 524 * Restart a stopped LWP. 525 * 526 * Must be called with p_lock held, and the LWP NOT locked. Will unlock the 527 * LWP before return. 528 */ 529 void 530 lwp_unstop(struct lwp *l) 531 { 532 struct proc *p = l->l_proc; 533 534 KASSERT(mutex_owned(&proc_lock)); 535 KASSERT(mutex_owned(p->p_lock)); 536 537 lwp_lock(l); 538 539 KASSERT((l->l_flag & LW_DBGSUSPEND) == 0); 540 541 /* If not stopped, then just bail out. */ 542 if (l->l_stat != LSSTOP) { 543 lwp_unlock(l); 544 return; 545 } 546 547 p->p_stat = SACTIVE; 548 p->p_sflag &= ~PS_STOPPING; 549 550 if (!p->p_waited) 551 p->p_pptr->p_nstopchild--; 552 553 if (l->l_wchan == NULL) { 554 /* setrunnable() will release the lock. */ 555 setrunnable(l); 556 } else if (p->p_xsig && (l->l_flag & LW_SINTR) != 0) { 557 /* setrunnable() so we can receive the signal */ 558 setrunnable(l); 559 } else { 560 l->l_stat = LSSLEEP; 561 p->p_nrlwps++; 562 lwp_unlock(l); 563 } 564 } 565 566 /* 567 * Wait for an LWP within the current process to exit. If 'lid' is 568 * non-zero, we are waiting for a specific LWP. 569 * 570 * Must be called with p->p_lock held. 571 */ 572 int 573 lwp_wait(struct lwp *l, lwpid_t lid, lwpid_t *departed, bool exiting) 574 { 575 const lwpid_t curlid = l->l_lid; 576 proc_t *p = l->l_proc; 577 lwp_t *l2, *next; 578 int error; 579 580 KASSERT(mutex_owned(p->p_lock)); 581 582 p->p_nlwpwait++; 583 l->l_waitingfor = lid; 584 585 for (;;) { 586 int nfound; 587 588 /* 589 * Avoid a race between exit1() and sigexit(): if the 590 * process is dumping core, then we need to bail out: call 591 * into lwp_userret() where we will be suspended until the 592 * deed is done. 593 */ 594 if ((p->p_sflag & PS_WCORE) != 0) { 595 mutex_exit(p->p_lock); 596 lwp_userret(l); 597 KASSERT(false); 598 } 599 600 /* 601 * First off, drain any detached LWP that is waiting to be 602 * reaped. 603 */ 604 while ((l2 = p->p_zomblwp) != NULL) { 605 p->p_zomblwp = NULL; 606 lwp_free(l2, false, false);/* releases proc mutex */ 607 mutex_enter(p->p_lock); 608 } 609 610 /* 611 * Now look for an LWP to collect. If the whole process is 612 * exiting, count detached LWPs as eligible to be collected, 613 * but don't drain them here. 614 */ 615 nfound = 0; 616 error = 0; 617 618 /* 619 * If given a specific LID, go via pid_table and make sure 620 * it's not detached. 621 */ 622 if (lid != 0) { 623 l2 = proc_find_lwp(p, lid); 624 if (l2 == NULL) { 625 error = ESRCH; 626 break; 627 } 628 KASSERT(l2->l_lid == lid); 629 if ((l2->l_prflag & LPR_DETACHED) != 0) { 630 error = EINVAL; 631 break; 632 } 633 } else { 634 l2 = LIST_FIRST(&p->p_lwps); 635 } 636 for (; l2 != NULL; l2 = next) { 637 next = (lid != 0 ? NULL : LIST_NEXT(l2, l_sibling)); 638 639 /* 640 * If a specific wait and the target is waiting on 641 * us, then avoid deadlock. This also traps LWPs 642 * that try to wait on themselves. 643 * 644 * Note that this does not handle more complicated 645 * cycles, like: t1 -> t2 -> t3 -> t1. The process 646 * can still be killed so it is not a major problem. 647 */ 648 if (l2->l_lid == lid && l2->l_waitingfor == curlid) { 649 error = EDEADLK; 650 break; 651 } 652 if (l2 == l) 653 continue; 654 if ((l2->l_prflag & LPR_DETACHED) != 0) { 655 nfound += exiting; 656 continue; 657 } 658 if (lid != 0) { 659 /* 660 * Mark this LWP as the first waiter, if there 661 * is no other. 662 */ 663 if (l2->l_waiter == 0) 664 l2->l_waiter = curlid; 665 } else if (l2->l_waiter != 0) { 666 /* 667 * It already has a waiter - so don't 668 * collect it. If the waiter doesn't 669 * grab it we'll get another chance 670 * later. 671 */ 672 nfound++; 673 continue; 674 } 675 nfound++; 676 677 /* No need to lock the LWP in order to see LSZOMB. */ 678 if (l2->l_stat != LSZOMB) 679 continue; 680 681 /* 682 * We're no longer waiting. Reset the "first waiter" 683 * pointer on the target, in case it was us. 684 */ 685 l->l_waitingfor = 0; 686 l2->l_waiter = 0; 687 p->p_nlwpwait--; 688 if (departed) 689 *departed = l2->l_lid; 690 sched_lwp_collect(l2); 691 692 /* lwp_free() releases the proc lock. */ 693 lwp_free(l2, false, false); 694 mutex_enter(p->p_lock); 695 return 0; 696 } 697 698 if (error != 0) 699 break; 700 if (nfound == 0) { 701 error = ESRCH; 702 break; 703 } 704 705 /* 706 * Note: since the lock will be dropped, need to restart on 707 * wakeup to run all LWPs again, e.g. there may be new LWPs. 708 */ 709 if (exiting) { 710 KASSERT(p->p_nlwps > 1); 711 error = cv_timedwait(&p->p_lwpcv, p->p_lock, 1); 712 break; 713 } 714 715 /* 716 * Break out if all LWPs are in _lwp_wait(). There are 717 * other ways to hang the process with _lwp_wait(), but the 718 * sleep is interruptable so little point checking for them. 719 */ 720 if (p->p_nlwpwait == p->p_nlwps) { 721 error = EDEADLK; 722 break; 723 } 724 725 /* 726 * Sit around and wait for something to happen. We'll be 727 * awoken if any of the conditions examined change: if an 728 * LWP exits, is collected, or is detached. 729 */ 730 if ((error = cv_wait_sig(&p->p_lwpcv, p->p_lock)) != 0) 731 break; 732 } 733 734 /* 735 * We didn't find any LWPs to collect, we may have received a 736 * signal, or some other condition has caused us to bail out. 737 * 738 * If waiting on a specific LWP, clear the waiters marker: some 739 * other LWP may want it. Then, kick all the remaining waiters 740 * so that they can re-check for zombies and for deadlock. 741 */ 742 if (lid != 0) { 743 l2 = proc_find_lwp(p, lid); 744 KASSERT(l2 == NULL || l2->l_lid == lid); 745 746 if (l2 != NULL && l2->l_waiter == curlid) 747 l2->l_waiter = 0; 748 } 749 p->p_nlwpwait--; 750 l->l_waitingfor = 0; 751 cv_broadcast(&p->p_lwpcv); 752 753 return error; 754 } 755 756 /* 757 * Create a new LWP within process 'p2', using LWP 'l1' as a template. 758 * The new LWP is created in state LSIDL and must be set running, 759 * suspended, or stopped by the caller. 760 */ 761 int 762 lwp_create(lwp_t *l1, proc_t *p2, vaddr_t uaddr, int flags, 763 void *stack, size_t stacksize, void (*func)(void *), void *arg, 764 lwp_t **rnewlwpp, int sclass, const sigset_t *sigmask, 765 const stack_t *sigstk) 766 { 767 struct lwp *l2; 768 769 KASSERT(l1 == curlwp || l1->l_proc == &proc0); 770 771 /* 772 * Enforce limits, excluding the first lwp and kthreads. We must 773 * use the process credentials here when adjusting the limit, as 774 * they are what's tied to the accounting entity. However for 775 * authorizing the action, we'll use the LWP's credentials. 776 */ 777 mutex_enter(p2->p_lock); 778 if (p2->p_nlwps != 0 && p2 != &proc0) { 779 uid_t uid = kauth_cred_getuid(p2->p_cred); 780 int count = chglwpcnt(uid, 1); 781 if (__predict_false(count > 782 p2->p_rlimit[RLIMIT_NTHR].rlim_cur)) { 783 if (kauth_authorize_process(l1->l_cred, 784 KAUTH_PROCESS_RLIMIT, p2, 785 KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_BYPASS), 786 &p2->p_rlimit[RLIMIT_NTHR], KAUTH_ARG(RLIMIT_NTHR)) 787 != 0) { 788 (void)chglwpcnt(uid, -1); 789 mutex_exit(p2->p_lock); 790 return EAGAIN; 791 } 792 } 793 } 794 795 /* 796 * First off, reap any detached LWP waiting to be collected. 797 * We can re-use its LWP structure and turnstile. 798 */ 799 if ((l2 = p2->p_zomblwp) != NULL) { 800 p2->p_zomblwp = NULL; 801 lwp_free(l2, true, false); 802 /* p2 now unlocked by lwp_free() */ 803 KASSERT(l2->l_ts != NULL); 804 KASSERT(l2->l_inheritedprio == -1); 805 KASSERT(SLIST_EMPTY(&l2->l_pi_lenders)); 806 memset(&l2->l_startzero, 0, sizeof(*l2) - 807 offsetof(lwp_t, l_startzero)); 808 } else { 809 mutex_exit(p2->p_lock); 810 l2 = pool_cache_get(lwp_cache, PR_WAITOK); 811 memset(&l2->l_startzero, 0, sizeof(*l2) - 812 offsetof(lwp_t, l_startzero)); 813 SLIST_INIT(&l2->l_pi_lenders); 814 } 815 816 /* 817 * Because of lockless lookup via pid_table, the LWP can be locked 818 * and inspected briefly even after it's freed, so a few fields are 819 * kept stable. 820 */ 821 KASSERT(l2->l_stat == LSIDL); 822 KASSERT(l2->l_cpu != NULL); 823 KASSERT(l2->l_ts != NULL); 824 KASSERT(l2->l_mutex == l2->l_cpu->ci_schedstate.spc_lwplock); 825 826 l2->l_proc = p2; 827 l2->l_refcnt = 0; 828 l2->l_class = sclass; 829 830 /* 831 * Allocate a process ID for this LWP. We need to do this now 832 * while we can still unwind if it fails. Beacuse we're marked 833 * as LSIDL, no lookups by the ID will succeed. 834 * 835 * N.B. this will always succeed for the first LWP in a process, 836 * because proc_alloc_lwpid() will usurp the slot. Also note 837 * that l2->l_proc MUST be valid so that lookups of the proc 838 * will succeed, even if the LWP itself is not visible. 839 */ 840 if (__predict_false(proc_alloc_lwpid(p2, l2) == -1)) { 841 pool_cache_put(lwp_cache, l2); 842 return EAGAIN; 843 } 844 845 /* 846 * If vfork(), we want the LWP to run fast and on the same CPU 847 * as its parent, so that it can reuse the VM context and cache 848 * footprint on the local CPU. 849 */ 850 l2->l_kpriority = ((flags & LWP_VFORK) ? true : false); 851 l2->l_kpribase = PRI_KERNEL; 852 l2->l_priority = l1->l_priority; 853 l2->l_inheritedprio = -1; 854 l2->l_protectprio = -1; 855 l2->l_auxprio = -1; 856 l2->l_flag = 0; 857 l2->l_pflag = LP_MPSAFE; 858 TAILQ_INIT(&l2->l_ld_locks); 859 l2->l_psrefs = 0; 860 kmsan_lwp_alloc(l2); 861 862 /* 863 * For vfork, borrow parent's lwpctl context if it exists. 864 * This also causes us to return via lwp_userret. 865 */ 866 if (flags & LWP_VFORK && l1->l_lwpctl) { 867 l2->l_lwpctl = l1->l_lwpctl; 868 l2->l_flag |= LW_LWPCTL; 869 } 870 871 /* 872 * If not the first LWP in the process, grab a reference to the 873 * descriptor table. 874 */ 875 l2->l_fd = p2->p_fd; 876 if (p2->p_nlwps != 0) { 877 KASSERT(l1->l_proc == p2); 878 fd_hold(l2); 879 } else { 880 KASSERT(l1->l_proc != p2); 881 } 882 883 if (p2->p_flag & PK_SYSTEM) { 884 /* Mark it as a system LWP. */ 885 l2->l_flag |= LW_SYSTEM; 886 } 887 888 kdtrace_thread_ctor(NULL, l2); 889 lwp_initspecific(l2); 890 sched_lwp_fork(l1, l2); 891 lwp_update_creds(l2); 892 callout_init(&l2->l_timeout_ch, CALLOUT_MPSAFE); 893 callout_setfunc(&l2->l_timeout_ch, sleepq_timeout, l2); 894 cv_init(&l2->l_sigcv, "sigwait"); 895 cv_init(&l2->l_waitcv, "vfork"); 896 l2->l_syncobj = &sched_syncobj; 897 PSREF_DEBUG_INIT_LWP(l2); 898 899 if (rnewlwpp != NULL) 900 *rnewlwpp = l2; 901 902 /* 903 * PCU state needs to be saved before calling uvm_lwp_fork() so that 904 * the MD cpu_lwp_fork() can copy the saved state to the new LWP. 905 */ 906 pcu_save_all(l1); 907 #if PCU_UNIT_COUNT > 0 908 l2->l_pcu_valid = l1->l_pcu_valid; 909 #endif 910 911 uvm_lwp_setuarea(l2, uaddr); 912 uvm_lwp_fork(l1, l2, stack, stacksize, func, (arg != NULL) ? arg : l2); 913 914 mutex_enter(p2->p_lock); 915 if ((flags & LWP_DETACHED) != 0) { 916 l2->l_prflag = LPR_DETACHED; 917 p2->p_ndlwps++; 918 } else 919 l2->l_prflag = 0; 920 921 if (l1->l_proc == p2) { 922 /* 923 * These flags are set while p_lock is held. Copy with 924 * p_lock held too, so the LWP doesn't sneak into the 925 * process without them being set. 926 */ 927 l2->l_flag |= (l1->l_flag & (LW_WEXIT | LW_WREBOOT | LW_WCORE)); 928 } else { 929 /* fork(): pending core/exit doesn't apply to child. */ 930 l2->l_flag |= (l1->l_flag & LW_WREBOOT); 931 } 932 933 l2->l_sigstk = *sigstk; 934 l2->l_sigmask = *sigmask; 935 TAILQ_INIT(&l2->l_sigpend.sp_info); 936 sigemptyset(&l2->l_sigpend.sp_set); 937 LIST_INSERT_HEAD(&p2->p_lwps, l2, l_sibling); 938 p2->p_nlwps++; 939 p2->p_nrlwps++; 940 941 KASSERT(l2->l_affinity == NULL); 942 943 /* Inherit the affinity mask. */ 944 if (l1->l_affinity) { 945 /* 946 * Note that we hold the state lock while inheriting 947 * the affinity to avoid race with sched_setaffinity(). 948 */ 949 lwp_lock(l1); 950 if (l1->l_affinity) { 951 kcpuset_use(l1->l_affinity); 952 l2->l_affinity = l1->l_affinity; 953 } 954 lwp_unlock(l1); 955 } 956 957 /* This marks the end of the "must be atomic" section. */ 958 mutex_exit(p2->p_lock); 959 960 SDT_PROBE(proc, kernel, , lwp__create, l2, 0, 0, 0, 0); 961 962 mutex_enter(&proc_lock); 963 LIST_INSERT_HEAD(&alllwp, l2, l_list); 964 /* Inherit a processor-set */ 965 l2->l_psid = l1->l_psid; 966 mutex_exit(&proc_lock); 967 968 SYSCALL_TIME_LWP_INIT(l2); 969 970 if (p2->p_emul->e_lwp_fork) 971 (*p2->p_emul->e_lwp_fork)(l1, l2); 972 973 return (0); 974 } 975 976 /* 977 * Set a new LWP running. If the process is stopping, then the LWP is 978 * created stopped. 979 */ 980 void 981 lwp_start(lwp_t *l, int flags) 982 { 983 proc_t *p = l->l_proc; 984 985 mutex_enter(p->p_lock); 986 lwp_lock(l); 987 KASSERT(l->l_stat == LSIDL); 988 if ((flags & LWP_SUSPENDED) != 0) { 989 /* It'll suspend itself in lwp_userret(). */ 990 l->l_flag |= LW_WSUSPEND; 991 } 992 if (p->p_stat == SSTOP || (p->p_sflag & PS_STOPPING) != 0) { 993 KASSERT(l->l_wchan == NULL); 994 l->l_stat = LSSTOP; 995 p->p_nrlwps--; 996 lwp_unlock(l); 997 } else { 998 setrunnable(l); 999 /* LWP now unlocked */ 1000 } 1001 mutex_exit(p->p_lock); 1002 } 1003 1004 /* 1005 * Called by MD code when a new LWP begins execution. Must be called 1006 * with the previous LWP locked (so at splsched), or if there is no 1007 * previous LWP, at splsched. 1008 */ 1009 void 1010 lwp_startup(struct lwp *prev, struct lwp *new_lwp) 1011 { 1012 kmutex_t *lock; 1013 1014 KASSERTMSG(new_lwp == curlwp, "l %p curlwp %p prevlwp %p", new_lwp, curlwp, prev); 1015 KASSERT(kpreempt_disabled()); 1016 KASSERT(prev != NULL); 1017 KASSERT((prev->l_pflag & LP_RUNNING) != 0); 1018 KASSERT(curcpu()->ci_mtx_count == -2); 1019 1020 /* 1021 * Immediately mark the previous LWP as no longer running and unlock 1022 * (to keep lock wait times short as possible). If a zombie, don't 1023 * touch after clearing LP_RUNNING as it could be reaped by another 1024 * CPU. Issue a memory barrier to ensure this. 1025 */ 1026 lock = prev->l_mutex; 1027 if (__predict_false(prev->l_stat == LSZOMB)) { 1028 membar_sync(); 1029 } 1030 prev->l_pflag &= ~LP_RUNNING; 1031 mutex_spin_exit(lock); 1032 1033 /* Correct spin mutex count after mi_switch(). */ 1034 curcpu()->ci_mtx_count = 0; 1035 1036 /* Install new VM context. */ 1037 if (__predict_true(new_lwp->l_proc->p_vmspace)) { 1038 pmap_activate(new_lwp); 1039 } 1040 1041 /* We remain at IPL_SCHED from mi_switch() - reset it. */ 1042 spl0(); 1043 1044 LOCKDEBUG_BARRIER(NULL, 0); 1045 SDT_PROBE(proc, kernel, , lwp__start, new_lwp, 0, 0, 0, 0); 1046 1047 /* For kthreads, acquire kernel lock if not MPSAFE. */ 1048 if (__predict_false((new_lwp->l_pflag & LP_MPSAFE) == 0)) { 1049 KERNEL_LOCK(1, new_lwp); 1050 } 1051 } 1052 1053 /* 1054 * Exit an LWP. 1055 * 1056 * *** WARNING *** This can be called with (l != curlwp) in error paths. 1057 */ 1058 void 1059 lwp_exit(struct lwp *l) 1060 { 1061 struct proc *p = l->l_proc; 1062 struct lwp *l2; 1063 bool current; 1064 1065 current = (l == curlwp); 1066 1067 KASSERT(current || (l->l_stat == LSIDL && l->l_target_cpu == NULL)); 1068 KASSERT(p == curproc); 1069 1070 SDT_PROBE(proc, kernel, , lwp__exit, l, 0, 0, 0, 0); 1071 1072 /* Verify that we hold no locks; for DIAGNOSTIC check kernel_lock. */ 1073 LOCKDEBUG_BARRIER(NULL, 0); 1074 KASSERTMSG(curcpu()->ci_biglock_count == 0, "kernel_lock leaked"); 1075 1076 /* 1077 * If we are the last live LWP in a process, we need to exit the 1078 * entire process. We do so with an exit status of zero, because 1079 * it's a "controlled" exit, and because that's what Solaris does. 1080 * 1081 * We are not quite a zombie yet, but for accounting purposes we 1082 * must increment the count of zombies here. 1083 * 1084 * Note: the last LWP's specificdata will be deleted here. 1085 */ 1086 mutex_enter(p->p_lock); 1087 if (p->p_nlwps - p->p_nzlwps == 1) { 1088 KASSERT(current == true); 1089 KASSERT(p != &proc0); 1090 exit1(l, 0, 0); 1091 /* NOTREACHED */ 1092 } 1093 p->p_nzlwps++; 1094 1095 /* 1096 * Perform any required thread cleanup. Do this early so 1097 * anyone wanting to look us up with lwp_getref_lwpid() will 1098 * fail to find us before we become a zombie. 1099 * 1100 * N.B. this will unlock p->p_lock on our behalf. 1101 */ 1102 lwp_thread_cleanup(l); 1103 1104 if (p->p_emul->e_lwp_exit) 1105 (*p->p_emul->e_lwp_exit)(l); 1106 1107 /* Drop filedesc reference. */ 1108 fd_free(); 1109 1110 /* Release fstrans private data. */ 1111 fstrans_lwp_dtor(l); 1112 1113 /* Delete the specificdata while it's still safe to sleep. */ 1114 lwp_finispecific(l); 1115 1116 /* 1117 * Release our cached credentials. 1118 */ 1119 kauth_cred_free(l->l_cred); 1120 callout_destroy(&l->l_timeout_ch); 1121 1122 /* 1123 * If traced, report LWP exit event to the debugger. 1124 * 1125 * Remove the LWP from the global list. 1126 * Free its LID from the PID namespace if needed. 1127 */ 1128 mutex_enter(&proc_lock); 1129 1130 if ((p->p_slflag & (PSL_TRACED|PSL_TRACELWP_EXIT)) == 1131 (PSL_TRACED|PSL_TRACELWP_EXIT)) { 1132 mutex_enter(p->p_lock); 1133 if (ISSET(p->p_sflag, PS_WEXIT)) { 1134 mutex_exit(p->p_lock); 1135 /* 1136 * We are exiting, bail out without informing parent 1137 * about a terminating LWP as it would deadlock. 1138 */ 1139 } else { 1140 eventswitch(TRAP_LWP, PTRACE_LWP_EXIT, l->l_lid); 1141 mutex_enter(&proc_lock); 1142 } 1143 } 1144 1145 LIST_REMOVE(l, l_list); 1146 mutex_exit(&proc_lock); 1147 1148 /* 1149 * Get rid of all references to the LWP that others (e.g. procfs) 1150 * may have, and mark the LWP as a zombie. If the LWP is detached, 1151 * mark it waiting for collection in the proc structure. Note that 1152 * before we can do that, we need to free any other dead, deatched 1153 * LWP waiting to meet its maker. 1154 * 1155 * All conditions need to be observed upon under the same hold of 1156 * p_lock, because if the lock is dropped any of them can change. 1157 */ 1158 mutex_enter(p->p_lock); 1159 for (;;) { 1160 if (lwp_drainrefs(l)) 1161 continue; 1162 if ((l->l_prflag & LPR_DETACHED) != 0) { 1163 if ((l2 = p->p_zomblwp) != NULL) { 1164 p->p_zomblwp = NULL; 1165 lwp_free(l2, false, false); 1166 /* proc now unlocked */ 1167 mutex_enter(p->p_lock); 1168 continue; 1169 } 1170 p->p_zomblwp = l; 1171 } 1172 break; 1173 } 1174 1175 /* 1176 * If we find a pending signal for the process and we have been 1177 * asked to check for signals, then we lose: arrange to have 1178 * all other LWPs in the process check for signals. 1179 */ 1180 if ((l->l_flag & LW_PENDSIG) != 0 && 1181 firstsig(&p->p_sigpend.sp_set) != 0) { 1182 LIST_FOREACH(l2, &p->p_lwps, l_sibling) { 1183 lwp_lock(l2); 1184 signotify(l2); 1185 lwp_unlock(l2); 1186 } 1187 } 1188 1189 /* 1190 * Release any PCU resources before becoming a zombie. 1191 */ 1192 pcu_discard_all(l); 1193 1194 lwp_lock(l); 1195 l->l_stat = LSZOMB; 1196 if (l->l_name != NULL) { 1197 strcpy(l->l_name, "(zombie)"); 1198 } 1199 lwp_unlock(l); 1200 p->p_nrlwps--; 1201 cv_broadcast(&p->p_lwpcv); 1202 if (l->l_lwpctl != NULL) 1203 l->l_lwpctl->lc_curcpu = LWPCTL_CPU_EXITED; 1204 mutex_exit(p->p_lock); 1205 1206 /* 1207 * We can no longer block. At this point, lwp_free() may already 1208 * be gunning for us. On a multi-CPU system, we may be off p_lwps. 1209 * 1210 * Free MD LWP resources. 1211 */ 1212 cpu_lwp_free(l, 0); 1213 1214 if (current) { 1215 /* Switch away into oblivion. */ 1216 lwp_lock(l); 1217 spc_lock(l->l_cpu); 1218 mi_switch(l); 1219 panic("lwp_exit"); 1220 } 1221 } 1222 1223 /* 1224 * Free a dead LWP's remaining resources. 1225 * 1226 * XXXLWP limits. 1227 */ 1228 void 1229 lwp_free(struct lwp *l, bool recycle, bool last) 1230 { 1231 struct proc *p = l->l_proc; 1232 struct rusage *ru; 1233 ksiginfoq_t kq; 1234 1235 KASSERT(l != curlwp); 1236 KASSERT(last || mutex_owned(p->p_lock)); 1237 1238 /* 1239 * We use the process credentials instead of the lwp credentials here 1240 * because the lwp credentials maybe cached (just after a setuid call) 1241 * and we don't want pay for syncing, since the lwp is going away 1242 * anyway 1243 */ 1244 if (p != &proc0 && p->p_nlwps != 1) 1245 (void)chglwpcnt(kauth_cred_getuid(p->p_cred), -1); 1246 1247 /* 1248 * In the unlikely event that the LWP is still on the CPU, 1249 * then spin until it has switched away. 1250 */ 1251 membar_consumer(); 1252 while (__predict_false((l->l_pflag & LP_RUNNING) != 0)) { 1253 SPINLOCK_BACKOFF_HOOK; 1254 } 1255 1256 /* 1257 * Now that the LWP's known off the CPU, reset its state back to 1258 * LSIDL, which defeats anything that might have gotten a hold on 1259 * the LWP via pid_table before the ID was freed. It's important 1260 * to do this with both the LWP locked and p_lock held. 1261 * 1262 * Also reset the CPU and lock pointer back to curcpu(), since the 1263 * LWP will in all likelyhood be cached with the current CPU in 1264 * lwp_cache when we free it and later allocated from there again 1265 * (avoid incidental lock contention). 1266 */ 1267 lwp_lock(l); 1268 l->l_stat = LSIDL; 1269 l->l_cpu = curcpu(); 1270 lwp_unlock_to(l, l->l_cpu->ci_schedstate.spc_lwplock); 1271 1272 /* 1273 * If this was not the last LWP in the process, then adjust counters 1274 * and unlock. This is done differently for the last LWP in exit1(). 1275 */ 1276 if (!last) { 1277 /* 1278 * Add the LWP's run time to the process' base value. 1279 * This needs to co-incide with coming off p_lwps. 1280 */ 1281 bintime_add(&p->p_rtime, &l->l_rtime); 1282 p->p_pctcpu += l->l_pctcpu; 1283 ru = &p->p_stats->p_ru; 1284 ruadd(ru, &l->l_ru); 1285 ru->ru_nvcsw += (l->l_ncsw - l->l_nivcsw); 1286 ru->ru_nivcsw += l->l_nivcsw; 1287 LIST_REMOVE(l, l_sibling); 1288 p->p_nlwps--; 1289 p->p_nzlwps--; 1290 if ((l->l_prflag & LPR_DETACHED) != 0) 1291 p->p_ndlwps--; 1292 1293 /* 1294 * Have any LWPs sleeping in lwp_wait() recheck for 1295 * deadlock. 1296 */ 1297 cv_broadcast(&p->p_lwpcv); 1298 mutex_exit(p->p_lock); 1299 1300 /* Free the LWP ID. */ 1301 mutex_enter(&proc_lock); 1302 proc_free_lwpid(p, l->l_lid); 1303 mutex_exit(&proc_lock); 1304 } 1305 1306 /* 1307 * Destroy the LWP's remaining signal information. 1308 */ 1309 ksiginfo_queue_init(&kq); 1310 sigclear(&l->l_sigpend, NULL, &kq); 1311 ksiginfo_queue_drain(&kq); 1312 cv_destroy(&l->l_sigcv); 1313 cv_destroy(&l->l_waitcv); 1314 1315 /* 1316 * Free lwpctl structure and affinity. 1317 */ 1318 if (l->l_lwpctl) { 1319 lwp_ctl_free(l); 1320 } 1321 if (l->l_affinity) { 1322 kcpuset_unuse(l->l_affinity, NULL); 1323 l->l_affinity = NULL; 1324 } 1325 1326 /* 1327 * Free remaining data structures and the LWP itself unless the 1328 * caller wants to recycle. 1329 */ 1330 if (l->l_name != NULL) 1331 kmem_free(l->l_name, MAXCOMLEN); 1332 1333 kmsan_lwp_free(l); 1334 kcov_lwp_free(l); 1335 cpu_lwp_free2(l); 1336 uvm_lwp_exit(l); 1337 1338 KASSERT(SLIST_EMPTY(&l->l_pi_lenders)); 1339 KASSERT(l->l_inheritedprio == -1); 1340 KASSERT(l->l_blcnt == 0); 1341 kdtrace_thread_dtor(NULL, l); 1342 if (!recycle) 1343 pool_cache_put(lwp_cache, l); 1344 } 1345 1346 /* 1347 * Migrate the LWP to the another CPU. Unlocks the LWP. 1348 */ 1349 void 1350 lwp_migrate(lwp_t *l, struct cpu_info *tci) 1351 { 1352 struct schedstate_percpu *tspc; 1353 int lstat = l->l_stat; 1354 1355 KASSERT(lwp_locked(l, NULL)); 1356 KASSERT(tci != NULL); 1357 1358 /* If LWP is still on the CPU, it must be handled like LSONPROC */ 1359 if ((l->l_pflag & LP_RUNNING) != 0) { 1360 lstat = LSONPROC; 1361 } 1362 1363 /* 1364 * The destination CPU could be changed while previous migration 1365 * was not finished. 1366 */ 1367 if (l->l_target_cpu != NULL) { 1368 l->l_target_cpu = tci; 1369 lwp_unlock(l); 1370 return; 1371 } 1372 1373 /* Nothing to do if trying to migrate to the same CPU */ 1374 if (l->l_cpu == tci) { 1375 lwp_unlock(l); 1376 return; 1377 } 1378 1379 KASSERT(l->l_target_cpu == NULL); 1380 tspc = &tci->ci_schedstate; 1381 switch (lstat) { 1382 case LSRUN: 1383 l->l_target_cpu = tci; 1384 break; 1385 case LSSLEEP: 1386 l->l_cpu = tci; 1387 break; 1388 case LSIDL: 1389 case LSSTOP: 1390 case LSSUSPENDED: 1391 l->l_cpu = tci; 1392 if (l->l_wchan == NULL) { 1393 lwp_unlock_to(l, tspc->spc_lwplock); 1394 return; 1395 } 1396 break; 1397 case LSONPROC: 1398 l->l_target_cpu = tci; 1399 spc_lock(l->l_cpu); 1400 sched_resched_cpu(l->l_cpu, PRI_USER_RT, true); 1401 /* spc now unlocked */ 1402 break; 1403 } 1404 lwp_unlock(l); 1405 } 1406 1407 #define lwp_find_exclude(l) \ 1408 ((l)->l_stat == LSIDL || (l)->l_stat == LSZOMB) 1409 1410 /* 1411 * Find the LWP in the process. Arguments may be zero, in such case, 1412 * the calling process and first LWP in the list will be used. 1413 * On success - returns proc locked. 1414 * 1415 * => pid == 0 -> look in curproc. 1416 * => pid == -1 -> match any proc. 1417 * => otherwise look up the proc. 1418 * 1419 * => lid == 0 -> first LWP in the proc 1420 * => otherwise specific LWP 1421 */ 1422 struct lwp * 1423 lwp_find2(pid_t pid, lwpid_t lid) 1424 { 1425 proc_t *p; 1426 lwp_t *l; 1427 1428 /* First LWP of specified proc. */ 1429 if (lid == 0) { 1430 switch (pid) { 1431 case -1: 1432 /* No lookup keys. */ 1433 return NULL; 1434 case 0: 1435 p = curproc; 1436 mutex_enter(p->p_lock); 1437 break; 1438 default: 1439 mutex_enter(&proc_lock); 1440 p = proc_find(pid); 1441 if (__predict_false(p == NULL)) { 1442 mutex_exit(&proc_lock); 1443 return NULL; 1444 } 1445 mutex_enter(p->p_lock); 1446 mutex_exit(&proc_lock); 1447 break; 1448 } 1449 LIST_FOREACH(l, &p->p_lwps, l_sibling) { 1450 if (__predict_true(!lwp_find_exclude(l))) 1451 break; 1452 } 1453 goto out; 1454 } 1455 1456 l = proc_find_lwp_acquire_proc(lid, &p); 1457 if (l == NULL) 1458 return NULL; 1459 KASSERT(p != NULL); 1460 KASSERT(mutex_owned(p->p_lock)); 1461 1462 if (__predict_false(lwp_find_exclude(l))) { 1463 l = NULL; 1464 goto out; 1465 } 1466 1467 /* Apply proc filter, if applicable. */ 1468 switch (pid) { 1469 case -1: 1470 /* Match anything. */ 1471 break; 1472 case 0: 1473 if (p != curproc) 1474 l = NULL; 1475 break; 1476 default: 1477 if (p->p_pid != pid) 1478 l = NULL; 1479 break; 1480 } 1481 1482 out: 1483 if (__predict_false(l == NULL)) { 1484 mutex_exit(p->p_lock); 1485 } 1486 return l; 1487 } 1488 1489 /* 1490 * Look up a live LWP within the specified process. 1491 * 1492 * Must be called with p->p_lock held (as it looks at the radix tree, 1493 * and also wants to exclude idle and zombie LWPs). 1494 */ 1495 struct lwp * 1496 lwp_find(struct proc *p, lwpid_t id) 1497 { 1498 struct lwp *l; 1499 1500 KASSERT(mutex_owned(p->p_lock)); 1501 1502 l = proc_find_lwp(p, id); 1503 KASSERT(l == NULL || l->l_lid == id); 1504 1505 /* 1506 * No need to lock - all of these conditions will 1507 * be visible with the process level mutex held. 1508 */ 1509 if (__predict_false(l != NULL && lwp_find_exclude(l))) 1510 l = NULL; 1511 1512 return l; 1513 } 1514 1515 /* 1516 * Update an LWP's cached credentials to mirror the process' master copy. 1517 * 1518 * This happens early in the syscall path, on user trap, and on LWP 1519 * creation. A long-running LWP can also voluntarily choose to update 1520 * its credentials by calling this routine. This may be called from 1521 * LWP_CACHE_CREDS(), which checks l->l_cred != p->p_cred beforehand. 1522 */ 1523 void 1524 lwp_update_creds(struct lwp *l) 1525 { 1526 kauth_cred_t oc; 1527 struct proc *p; 1528 1529 p = l->l_proc; 1530 oc = l->l_cred; 1531 1532 mutex_enter(p->p_lock); 1533 kauth_cred_hold(p->p_cred); 1534 l->l_cred = p->p_cred; 1535 l->l_prflag &= ~LPR_CRMOD; 1536 mutex_exit(p->p_lock); 1537 if (oc != NULL) 1538 kauth_cred_free(oc); 1539 } 1540 1541 /* 1542 * Verify that an LWP is locked, and optionally verify that the lock matches 1543 * one we specify. 1544 */ 1545 int 1546 lwp_locked(struct lwp *l, kmutex_t *mtx) 1547 { 1548 kmutex_t *cur = l->l_mutex; 1549 1550 return mutex_owned(cur) && (mtx == cur || mtx == NULL); 1551 } 1552 1553 /* 1554 * Lend a new mutex to an LWP. The old mutex must be held. 1555 */ 1556 kmutex_t * 1557 lwp_setlock(struct lwp *l, kmutex_t *mtx) 1558 { 1559 kmutex_t *oldmtx = l->l_mutex; 1560 1561 KASSERT(mutex_owned(oldmtx)); 1562 1563 membar_exit(); 1564 l->l_mutex = mtx; 1565 return oldmtx; 1566 } 1567 1568 /* 1569 * Lend a new mutex to an LWP, and release the old mutex. The old mutex 1570 * must be held. 1571 */ 1572 void 1573 lwp_unlock_to(struct lwp *l, kmutex_t *mtx) 1574 { 1575 kmutex_t *old; 1576 1577 KASSERT(lwp_locked(l, NULL)); 1578 1579 old = l->l_mutex; 1580 membar_exit(); 1581 l->l_mutex = mtx; 1582 mutex_spin_exit(old); 1583 } 1584 1585 int 1586 lwp_trylock(struct lwp *l) 1587 { 1588 kmutex_t *old; 1589 1590 for (;;) { 1591 if (!mutex_tryenter(old = l->l_mutex)) 1592 return 0; 1593 if (__predict_true(l->l_mutex == old)) 1594 return 1; 1595 mutex_spin_exit(old); 1596 } 1597 } 1598 1599 void 1600 lwp_unsleep(lwp_t *l, bool unlock) 1601 { 1602 1603 KASSERT(mutex_owned(l->l_mutex)); 1604 (*l->l_syncobj->sobj_unsleep)(l, unlock); 1605 } 1606 1607 /* 1608 * Handle exceptions for mi_userret(). Called if a member of LW_USERRET is 1609 * set. 1610 */ 1611 void 1612 lwp_userret(struct lwp *l) 1613 { 1614 struct proc *p; 1615 int sig; 1616 1617 KASSERT(l == curlwp); 1618 KASSERT(l->l_stat == LSONPROC); 1619 p = l->l_proc; 1620 1621 /* 1622 * It is safe to do this read unlocked on a MP system.. 1623 */ 1624 while ((l->l_flag & LW_USERRET) != 0) { 1625 /* 1626 * Process pending signals first, unless the process 1627 * is dumping core or exiting, where we will instead 1628 * enter the LW_WSUSPEND case below. 1629 */ 1630 if ((l->l_flag & (LW_PENDSIG | LW_WCORE | LW_WEXIT)) == 1631 LW_PENDSIG) { 1632 mutex_enter(p->p_lock); 1633 while ((sig = issignal(l)) != 0) 1634 postsig(sig); 1635 mutex_exit(p->p_lock); 1636 } 1637 1638 /* 1639 * Core-dump or suspend pending. 1640 * 1641 * In case of core dump, suspend ourselves, so that the kernel 1642 * stack and therefore the userland registers saved in the 1643 * trapframe are around for coredump() to write them out. 1644 * We also need to save any PCU resources that we have so that 1645 * they accessible for coredump(). We issue a wakeup on 1646 * p->p_lwpcv so that sigexit() will write the core file out 1647 * once all other LWPs are suspended. 1648 */ 1649 if ((l->l_flag & LW_WSUSPEND) != 0) { 1650 pcu_save_all(l); 1651 mutex_enter(p->p_lock); 1652 p->p_nrlwps--; 1653 cv_broadcast(&p->p_lwpcv); 1654 lwp_lock(l); 1655 l->l_stat = LSSUSPENDED; 1656 lwp_unlock(l); 1657 mutex_exit(p->p_lock); 1658 lwp_lock(l); 1659 spc_lock(l->l_cpu); 1660 mi_switch(l); 1661 } 1662 1663 /* Process is exiting. */ 1664 if ((l->l_flag & LW_WEXIT) != 0) { 1665 lwp_exit(l); 1666 KASSERT(0); 1667 /* NOTREACHED */ 1668 } 1669 1670 /* update lwpctl processor (for vfork child_return) */ 1671 if (l->l_flag & LW_LWPCTL) { 1672 lwp_lock(l); 1673 KASSERT(kpreempt_disabled()); 1674 l->l_lwpctl->lc_curcpu = (int)cpu_index(l->l_cpu); 1675 l->l_lwpctl->lc_pctr++; 1676 l->l_flag &= ~LW_LWPCTL; 1677 lwp_unlock(l); 1678 } 1679 } 1680 } 1681 1682 /* 1683 * Force an LWP to enter the kernel, to take a trip through lwp_userret(). 1684 */ 1685 void 1686 lwp_need_userret(struct lwp *l) 1687 { 1688 1689 KASSERT(!cpu_intr_p()); 1690 KASSERT(lwp_locked(l, NULL)); 1691 1692 /* 1693 * If the LWP is in any state other than LSONPROC, we know that it 1694 * is executing in-kernel and will hit userret() on the way out. 1695 * 1696 * If the LWP is curlwp, then we know we'll be back out to userspace 1697 * soon (can't be called from a hardware interrupt here). 1698 * 1699 * Otherwise, we can't be sure what the LWP is doing, so first make 1700 * sure the update to l_flag will be globally visible, and then 1701 * force the LWP to take a trip through trap() where it will do 1702 * userret(). 1703 */ 1704 if (l->l_stat == LSONPROC && l != curlwp) { 1705 membar_producer(); 1706 cpu_signotify(l); 1707 } 1708 } 1709 1710 /* 1711 * Add one reference to an LWP. This will prevent the LWP from 1712 * exiting, thus keep the lwp structure and PCB around to inspect. 1713 */ 1714 void 1715 lwp_addref(struct lwp *l) 1716 { 1717 KASSERT(mutex_owned(l->l_proc->p_lock)); 1718 KASSERT(l->l_stat != LSZOMB); 1719 l->l_refcnt++; 1720 } 1721 1722 /* 1723 * Remove one reference to an LWP. If this is the last reference, 1724 * then we must finalize the LWP's death. 1725 */ 1726 void 1727 lwp_delref(struct lwp *l) 1728 { 1729 struct proc *p = l->l_proc; 1730 1731 mutex_enter(p->p_lock); 1732 lwp_delref2(l); 1733 mutex_exit(p->p_lock); 1734 } 1735 1736 /* 1737 * Remove one reference to an LWP. If this is the last reference, 1738 * then we must finalize the LWP's death. The proc mutex is held 1739 * on entry. 1740 */ 1741 void 1742 lwp_delref2(struct lwp *l) 1743 { 1744 struct proc *p = l->l_proc; 1745 1746 KASSERT(mutex_owned(p->p_lock)); 1747 KASSERT(l->l_stat != LSZOMB); 1748 KASSERT(l->l_refcnt > 0); 1749 1750 if (--l->l_refcnt == 0) 1751 cv_broadcast(&p->p_lwpcv); 1752 } 1753 1754 /* 1755 * Drain all references to the current LWP. Returns true if 1756 * we blocked. 1757 */ 1758 bool 1759 lwp_drainrefs(struct lwp *l) 1760 { 1761 struct proc *p = l->l_proc; 1762 bool rv = false; 1763 1764 KASSERT(mutex_owned(p->p_lock)); 1765 1766 l->l_prflag |= LPR_DRAINING; 1767 1768 while (l->l_refcnt > 0) { 1769 rv = true; 1770 cv_wait(&p->p_lwpcv, p->p_lock); 1771 } 1772 return rv; 1773 } 1774 1775 /* 1776 * Return true if the specified LWP is 'alive'. Only p->p_lock need 1777 * be held. 1778 */ 1779 bool 1780 lwp_alive(lwp_t *l) 1781 { 1782 1783 KASSERT(mutex_owned(l->l_proc->p_lock)); 1784 1785 switch (l->l_stat) { 1786 case LSSLEEP: 1787 case LSRUN: 1788 case LSONPROC: 1789 case LSSTOP: 1790 case LSSUSPENDED: 1791 return true; 1792 default: 1793 return false; 1794 } 1795 } 1796 1797 /* 1798 * Return first live LWP in the process. 1799 */ 1800 lwp_t * 1801 lwp_find_first(proc_t *p) 1802 { 1803 lwp_t *l; 1804 1805 KASSERT(mutex_owned(p->p_lock)); 1806 1807 LIST_FOREACH(l, &p->p_lwps, l_sibling) { 1808 if (lwp_alive(l)) { 1809 return l; 1810 } 1811 } 1812 1813 return NULL; 1814 } 1815 1816 /* 1817 * Allocate a new lwpctl structure for a user LWP. 1818 */ 1819 int 1820 lwp_ctl_alloc(vaddr_t *uaddr) 1821 { 1822 lcproc_t *lp; 1823 u_int bit, i, offset; 1824 struct uvm_object *uao; 1825 int error; 1826 lcpage_t *lcp; 1827 proc_t *p; 1828 lwp_t *l; 1829 1830 l = curlwp; 1831 p = l->l_proc; 1832 1833 /* don't allow a vforked process to create lwp ctls */ 1834 if (p->p_lflag & PL_PPWAIT) 1835 return EBUSY; 1836 1837 if (l->l_lcpage != NULL) { 1838 lcp = l->l_lcpage; 1839 *uaddr = lcp->lcp_uaddr + (vaddr_t)l->l_lwpctl - lcp->lcp_kaddr; 1840 return 0; 1841 } 1842 1843 /* First time around, allocate header structure for the process. */ 1844 if ((lp = p->p_lwpctl) == NULL) { 1845 lp = kmem_alloc(sizeof(*lp), KM_SLEEP); 1846 mutex_init(&lp->lp_lock, MUTEX_DEFAULT, IPL_NONE); 1847 lp->lp_uao = NULL; 1848 TAILQ_INIT(&lp->lp_pages); 1849 mutex_enter(p->p_lock); 1850 if (p->p_lwpctl == NULL) { 1851 p->p_lwpctl = lp; 1852 mutex_exit(p->p_lock); 1853 } else { 1854 mutex_exit(p->p_lock); 1855 mutex_destroy(&lp->lp_lock); 1856 kmem_free(lp, sizeof(*lp)); 1857 lp = p->p_lwpctl; 1858 } 1859 } 1860 1861 /* 1862 * Set up an anonymous memory region to hold the shared pages. 1863 * Map them into the process' address space. The user vmspace 1864 * gets the first reference on the UAO. 1865 */ 1866 mutex_enter(&lp->lp_lock); 1867 if (lp->lp_uao == NULL) { 1868 lp->lp_uao = uao_create(LWPCTL_UAREA_SZ, 0); 1869 lp->lp_cur = 0; 1870 lp->lp_max = LWPCTL_UAREA_SZ; 1871 lp->lp_uva = p->p_emul->e_vm_default_addr(p, 1872 (vaddr_t)p->p_vmspace->vm_daddr, LWPCTL_UAREA_SZ, 1873 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN); 1874 error = uvm_map(&p->p_vmspace->vm_map, &lp->lp_uva, 1875 LWPCTL_UAREA_SZ, lp->lp_uao, 0, 0, UVM_MAPFLAG(UVM_PROT_RW, 1876 UVM_PROT_RW, UVM_INH_NONE, UVM_ADV_NORMAL, 0)); 1877 if (error != 0) { 1878 uao_detach(lp->lp_uao); 1879 lp->lp_uao = NULL; 1880 mutex_exit(&lp->lp_lock); 1881 return error; 1882 } 1883 } 1884 1885 /* Get a free block and allocate for this LWP. */ 1886 TAILQ_FOREACH(lcp, &lp->lp_pages, lcp_chain) { 1887 if (lcp->lcp_nfree != 0) 1888 break; 1889 } 1890 if (lcp == NULL) { 1891 /* Nothing available - try to set up a free page. */ 1892 if (lp->lp_cur == lp->lp_max) { 1893 mutex_exit(&lp->lp_lock); 1894 return ENOMEM; 1895 } 1896 lcp = kmem_alloc(LWPCTL_LCPAGE_SZ, KM_SLEEP); 1897 1898 /* 1899 * Wire the next page down in kernel space. Since this 1900 * is a new mapping, we must add a reference. 1901 */ 1902 uao = lp->lp_uao; 1903 (*uao->pgops->pgo_reference)(uao); 1904 lcp->lcp_kaddr = vm_map_min(kernel_map); 1905 error = uvm_map(kernel_map, &lcp->lcp_kaddr, PAGE_SIZE, 1906 uao, lp->lp_cur, PAGE_SIZE, 1907 UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, 1908 UVM_INH_NONE, UVM_ADV_RANDOM, 0)); 1909 if (error != 0) { 1910 mutex_exit(&lp->lp_lock); 1911 kmem_free(lcp, LWPCTL_LCPAGE_SZ); 1912 (*uao->pgops->pgo_detach)(uao); 1913 return error; 1914 } 1915 error = uvm_map_pageable(kernel_map, lcp->lcp_kaddr, 1916 lcp->lcp_kaddr + PAGE_SIZE, FALSE, 0); 1917 if (error != 0) { 1918 mutex_exit(&lp->lp_lock); 1919 uvm_unmap(kernel_map, lcp->lcp_kaddr, 1920 lcp->lcp_kaddr + PAGE_SIZE); 1921 kmem_free(lcp, LWPCTL_LCPAGE_SZ); 1922 return error; 1923 } 1924 /* Prepare the page descriptor and link into the list. */ 1925 lcp->lcp_uaddr = lp->lp_uva + lp->lp_cur; 1926 lp->lp_cur += PAGE_SIZE; 1927 lcp->lcp_nfree = LWPCTL_PER_PAGE; 1928 lcp->lcp_rotor = 0; 1929 memset(lcp->lcp_bitmap, 0xff, LWPCTL_BITMAP_SZ); 1930 TAILQ_INSERT_HEAD(&lp->lp_pages, lcp, lcp_chain); 1931 } 1932 for (i = lcp->lcp_rotor; lcp->lcp_bitmap[i] == 0;) { 1933 if (++i >= LWPCTL_BITMAP_ENTRIES) 1934 i = 0; 1935 } 1936 bit = ffs(lcp->lcp_bitmap[i]) - 1; 1937 lcp->lcp_bitmap[i] ^= (1U << bit); 1938 lcp->lcp_rotor = i; 1939 lcp->lcp_nfree--; 1940 l->l_lcpage = lcp; 1941 offset = (i << 5) + bit; 1942 l->l_lwpctl = (lwpctl_t *)lcp->lcp_kaddr + offset; 1943 *uaddr = lcp->lcp_uaddr + offset * sizeof(lwpctl_t); 1944 mutex_exit(&lp->lp_lock); 1945 1946 KPREEMPT_DISABLE(l); 1947 l->l_lwpctl->lc_curcpu = (int)cpu_index(curcpu()); 1948 KPREEMPT_ENABLE(l); 1949 1950 return 0; 1951 } 1952 1953 /* 1954 * Free an lwpctl structure back to the per-process list. 1955 */ 1956 void 1957 lwp_ctl_free(lwp_t *l) 1958 { 1959 struct proc *p = l->l_proc; 1960 lcproc_t *lp; 1961 lcpage_t *lcp; 1962 u_int map, offset; 1963 1964 /* don't free a lwp context we borrowed for vfork */ 1965 if (p->p_lflag & PL_PPWAIT) { 1966 l->l_lwpctl = NULL; 1967 return; 1968 } 1969 1970 lp = p->p_lwpctl; 1971 KASSERT(lp != NULL); 1972 1973 lcp = l->l_lcpage; 1974 offset = (u_int)((lwpctl_t *)l->l_lwpctl - (lwpctl_t *)lcp->lcp_kaddr); 1975 KASSERT(offset < LWPCTL_PER_PAGE); 1976 1977 mutex_enter(&lp->lp_lock); 1978 lcp->lcp_nfree++; 1979 map = offset >> 5; 1980 lcp->lcp_bitmap[map] |= (1U << (offset & 31)); 1981 if (lcp->lcp_bitmap[lcp->lcp_rotor] == 0) 1982 lcp->lcp_rotor = map; 1983 if (TAILQ_FIRST(&lp->lp_pages)->lcp_nfree == 0) { 1984 TAILQ_REMOVE(&lp->lp_pages, lcp, lcp_chain); 1985 TAILQ_INSERT_HEAD(&lp->lp_pages, lcp, lcp_chain); 1986 } 1987 mutex_exit(&lp->lp_lock); 1988 } 1989 1990 /* 1991 * Process is exiting; tear down lwpctl state. This can only be safely 1992 * called by the last LWP in the process. 1993 */ 1994 void 1995 lwp_ctl_exit(void) 1996 { 1997 lcpage_t *lcp, *next; 1998 lcproc_t *lp; 1999 proc_t *p; 2000 lwp_t *l; 2001 2002 l = curlwp; 2003 l->l_lwpctl = NULL; 2004 l->l_lcpage = NULL; 2005 p = l->l_proc; 2006 lp = p->p_lwpctl; 2007 2008 KASSERT(lp != NULL); 2009 KASSERT(p->p_nlwps == 1); 2010 2011 for (lcp = TAILQ_FIRST(&lp->lp_pages); lcp != NULL; lcp = next) { 2012 next = TAILQ_NEXT(lcp, lcp_chain); 2013 uvm_unmap(kernel_map, lcp->lcp_kaddr, 2014 lcp->lcp_kaddr + PAGE_SIZE); 2015 kmem_free(lcp, LWPCTL_LCPAGE_SZ); 2016 } 2017 2018 if (lp->lp_uao != NULL) { 2019 uvm_unmap(&p->p_vmspace->vm_map, lp->lp_uva, 2020 lp->lp_uva + LWPCTL_UAREA_SZ); 2021 } 2022 2023 mutex_destroy(&lp->lp_lock); 2024 kmem_free(lp, sizeof(*lp)); 2025 p->p_lwpctl = NULL; 2026 } 2027 2028 /* 2029 * Return the current LWP's "preemption counter". Used to detect 2030 * preemption across operations that can tolerate preemption without 2031 * crashing, but which may generate incorrect results if preempted. 2032 */ 2033 uint64_t 2034 lwp_pctr(void) 2035 { 2036 2037 return curlwp->l_ncsw; 2038 } 2039 2040 /* 2041 * Set an LWP's private data pointer. 2042 */ 2043 int 2044 lwp_setprivate(struct lwp *l, void *ptr) 2045 { 2046 int error = 0; 2047 2048 l->l_private = ptr; 2049 #ifdef __HAVE_CPU_LWP_SETPRIVATE 2050 error = cpu_lwp_setprivate(l, ptr); 2051 #endif 2052 return error; 2053 } 2054 2055 /* 2056 * Perform any thread-related cleanup on LWP exit. 2057 * N.B. l->l_proc->p_lock must be HELD on entry but will 2058 * be released before returning! 2059 */ 2060 void 2061 lwp_thread_cleanup(struct lwp *l) 2062 { 2063 const lwpid_t tid = l->l_lid; 2064 2065 KASSERT((tid & FUTEX_TID_MASK) == tid); 2066 KASSERT(mutex_owned(l->l_proc->p_lock)); 2067 2068 mutex_exit(l->l_proc->p_lock); 2069 2070 /* 2071 * If the LWP has robust futexes, release them all 2072 * now. 2073 */ 2074 if (__predict_false(l->l_robust_head != 0)) { 2075 futex_release_all_lwp(l, tid); 2076 } 2077 } 2078 2079 #if defined(DDB) 2080 #include <machine/pcb.h> 2081 2082 void 2083 lwp_whatis(uintptr_t addr, void (*pr)(const char *, ...)) 2084 { 2085 lwp_t *l; 2086 2087 LIST_FOREACH(l, &alllwp, l_list) { 2088 uintptr_t stack = (uintptr_t)KSTACK_LOWEST_ADDR(l); 2089 2090 if (addr < stack || stack + KSTACK_SIZE <= addr) { 2091 continue; 2092 } 2093 (*pr)("%p is %p+%zu, LWP %p's stack\n", 2094 (void *)addr, (void *)stack, 2095 (size_t)(addr - stack), l); 2096 } 2097 } 2098 #endif /* defined(DDB) */ 2099