1 /* $NetBSD: kern_lwp.c,v 1.228 2020/02/27 20:52:25 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 2001, 2006, 2007, 2008, 2009, 2019, 2020 5 * The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Nathan J. Williams, and Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Overview 35 * 36 * Lightweight processes (LWPs) are the basic unit or thread of 37 * execution within the kernel. The core state of an LWP is described 38 * by "struct lwp", also known as lwp_t. 39 * 40 * Each LWP is contained within a process (described by "struct proc"), 41 * Every process contains at least one LWP, but may contain more. The 42 * process describes attributes shared among all of its LWPs such as a 43 * private address space, global execution state (stopped, active, 44 * zombie, ...), signal disposition and so on. On a multiprocessor 45 * machine, multiple LWPs be executing concurrently in the kernel. 46 * 47 * Execution states 48 * 49 * At any given time, an LWP has overall state that is described by 50 * lwp::l_stat. The states are broken into two sets below. The first 51 * set is guaranteed to represent the absolute, current state of the 52 * LWP: 53 * 54 * LSONPROC 55 * 56 * On processor: the LWP is executing on a CPU, either in the 57 * kernel or in user space. 58 * 59 * LSRUN 60 * 61 * Runnable: the LWP is parked on a run queue, and may soon be 62 * chosen to run by an idle processor, or by a processor that 63 * has been asked to preempt a currently runnning but lower 64 * priority LWP. 65 * 66 * LSIDL 67 * 68 * Idle: the LWP has been created but has not yet executed, 69 * or it has ceased executing a unit of work and is waiting 70 * to be started again. 71 * 72 * LSSUSPENDED: 73 * 74 * Suspended: the LWP has had its execution suspended by 75 * another LWP in the same process using the _lwp_suspend() 76 * system call. User-level LWPs also enter the suspended 77 * state when the system is shutting down. 78 * 79 * The second set represent a "statement of intent" on behalf of the 80 * LWP. The LWP may in fact be executing on a processor, may be 81 * sleeping or idle. It is expected to take the necessary action to 82 * stop executing or become "running" again within a short timeframe. 83 * The LP_RUNNING flag in lwp::l_pflag indicates that an LWP is running. 84 * Importantly, it indicates that its state is tied to a CPU. 85 * 86 * LSZOMB: 87 * 88 * Dead or dying: the LWP has released most of its resources 89 * and is about to switch away into oblivion, or has already 90 * switched away. When it switches away, its few remaining 91 * resources can be collected. 92 * 93 * LSSLEEP: 94 * 95 * Sleeping: the LWP has entered itself onto a sleep queue, and 96 * has switched away or will switch away shortly to allow other 97 * LWPs to run on the CPU. 98 * 99 * LSSTOP: 100 * 101 * Stopped: the LWP has been stopped as a result of a job 102 * control signal, or as a result of the ptrace() interface. 103 * 104 * Stopped LWPs may run briefly within the kernel to handle 105 * signals that they receive, but will not return to user space 106 * until their process' state is changed away from stopped. 107 * 108 * Single LWPs within a process can not be set stopped 109 * selectively: all actions that can stop or continue LWPs 110 * occur at the process level. 111 * 112 * State transitions 113 * 114 * Note that the LSSTOP state may only be set when returning to 115 * user space in userret(), or when sleeping interruptably. The 116 * LSSUSPENDED state may only be set in userret(). Before setting 117 * those states, we try to ensure that the LWPs will release all 118 * locks that they hold, and at a minimum try to ensure that the 119 * LWP can be set runnable again by a signal. 120 * 121 * LWPs may transition states in the following ways: 122 * 123 * RUN -------> ONPROC ONPROC -----> RUN 124 * > SLEEP 125 * > STOPPED 126 * > SUSPENDED 127 * > ZOMB 128 * > IDL (special cases) 129 * 130 * STOPPED ---> RUN SUSPENDED --> RUN 131 * > SLEEP 132 * 133 * SLEEP -----> ONPROC IDL --------> RUN 134 * > RUN > SUSPENDED 135 * > STOPPED > STOPPED 136 * > ONPROC (special cases) 137 * 138 * Some state transitions are only possible with kernel threads (eg 139 * ONPROC -> IDL) and happen under tightly controlled circumstances 140 * free of unwanted side effects. 141 * 142 * Migration 143 * 144 * Migration of threads from one CPU to another could be performed 145 * internally by the scheduler via sched_takecpu() or sched_catchlwp() 146 * functions. The universal lwp_migrate() function should be used for 147 * any other cases. Subsystems in the kernel must be aware that CPU 148 * of LWP may change, while it is not locked. 149 * 150 * Locking 151 * 152 * The majority of fields in 'struct lwp' are covered by a single, 153 * general spin lock pointed to by lwp::l_mutex. The locks covering 154 * each field are documented in sys/lwp.h. 155 * 156 * State transitions must be made with the LWP's general lock held, 157 * and may cause the LWP's lock pointer to change. Manipulation of 158 * the general lock is not performed directly, but through calls to 159 * lwp_lock(), lwp_unlock() and others. It should be noted that the 160 * adaptive locks are not allowed to be released while the LWP's lock 161 * is being held (unlike for other spin-locks). 162 * 163 * States and their associated locks: 164 * 165 * LSIDL, LSONPROC, LSZOMB, LSSUPENDED: 166 * 167 * Always covered by spc_lwplock, which protects LWPs not 168 * associated with any other sync object. This is a per-CPU 169 * lock and matches lwp::l_cpu. 170 * 171 * LSRUN: 172 * 173 * Always covered by spc_mutex, which protects the run queues. 174 * This is a per-CPU lock and matches lwp::l_cpu. 175 * 176 * LSSLEEP: 177 * 178 * Covered by a lock associated with the sleep queue (sometimes 179 * a turnstile sleep queue) that the LWP resides on. This can 180 * be spc_lwplock for SOBJ_SLEEPQ_NULL (an "untracked" sleep). 181 * 182 * LSSTOP: 183 * 184 * If the LWP was previously sleeping (l_wchan != NULL), then 185 * l_mutex references the sleep queue lock. If the LWP was 186 * runnable or on the CPU when halted, or has been removed from 187 * the sleep queue since halted, then the lock is spc_lwplock. 188 * 189 * The lock order is as follows: 190 * 191 * sleepq -> turnstile -> spc_lwplock -> spc_mutex 192 * 193 * Each process has an scheduler state lock (proc::p_lock), and a 194 * number of counters on LWPs and their states: p_nzlwps, p_nrlwps, and 195 * so on. When an LWP is to be entered into or removed from one of the 196 * following states, p_lock must be held and the process wide counters 197 * adjusted: 198 * 199 * LSIDL, LSZOMB, LSSTOP, LSSUSPENDED 200 * 201 * (But not always for kernel threads. There are some special cases 202 * as mentioned above: soft interrupts, and the idle loops.) 203 * 204 * Note that an LWP is considered running or likely to run soon if in 205 * one of the following states. This affects the value of p_nrlwps: 206 * 207 * LSRUN, LSONPROC, LSSLEEP 208 * 209 * p_lock does not need to be held when transitioning among these 210 * three states, hence p_lock is rarely taken for state transitions. 211 */ 212 213 #include <sys/cdefs.h> 214 __KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.228 2020/02/27 20:52:25 ad Exp $"); 215 216 #include "opt_ddb.h" 217 #include "opt_lockdebug.h" 218 #include "opt_dtrace.h" 219 220 #define _LWP_API_PRIVATE 221 222 #include <sys/param.h> 223 #include <sys/systm.h> 224 #include <sys/cpu.h> 225 #include <sys/pool.h> 226 #include <sys/proc.h> 227 #include <sys/syscallargs.h> 228 #include <sys/syscall_stats.h> 229 #include <sys/kauth.h> 230 #include <sys/sleepq.h> 231 #include <sys/lockdebug.h> 232 #include <sys/kmem.h> 233 #include <sys/pset.h> 234 #include <sys/intr.h> 235 #include <sys/lwpctl.h> 236 #include <sys/atomic.h> 237 #include <sys/filedesc.h> 238 #include <sys/fstrans.h> 239 #include <sys/dtrace_bsd.h> 240 #include <sys/sdt.h> 241 #include <sys/ptrace.h> 242 #include <sys/xcall.h> 243 #include <sys/uidinfo.h> 244 #include <sys/sysctl.h> 245 #include <sys/psref.h> 246 #include <sys/msan.h> 247 248 #include <uvm/uvm_extern.h> 249 #include <uvm/uvm_object.h> 250 251 static pool_cache_t lwp_cache __read_mostly; 252 struct lwplist alllwp __cacheline_aligned; 253 254 static void lwp_dtor(void *, void *); 255 256 /* DTrace proc provider probes */ 257 SDT_PROVIDER_DEFINE(proc); 258 259 SDT_PROBE_DEFINE1(proc, kernel, , lwp__create, "struct lwp *"); 260 SDT_PROBE_DEFINE1(proc, kernel, , lwp__start, "struct lwp *"); 261 SDT_PROBE_DEFINE1(proc, kernel, , lwp__exit, "struct lwp *"); 262 263 struct turnstile turnstile0 __cacheline_aligned; 264 struct lwp lwp0 __aligned(MIN_LWP_ALIGNMENT) = { 265 #ifdef LWP0_CPU_INFO 266 .l_cpu = LWP0_CPU_INFO, 267 #endif 268 #ifdef LWP0_MD_INITIALIZER 269 .l_md = LWP0_MD_INITIALIZER, 270 #endif 271 .l_proc = &proc0, 272 .l_lid = 1, 273 .l_flag = LW_SYSTEM, 274 .l_stat = LSONPROC, 275 .l_ts = &turnstile0, 276 .l_syncobj = &sched_syncobj, 277 .l_refcnt = 1, 278 .l_priority = PRI_USER + NPRI_USER - 1, 279 .l_inheritedprio = -1, 280 .l_class = SCHED_OTHER, 281 .l_psid = PS_NONE, 282 .l_pi_lenders = SLIST_HEAD_INITIALIZER(&lwp0.l_pi_lenders), 283 .l_name = __UNCONST("swapper"), 284 .l_fd = &filedesc0, 285 }; 286 287 static int sysctl_kern_maxlwp(SYSCTLFN_PROTO); 288 289 /* 290 * sysctl helper routine for kern.maxlwp. Ensures that the new 291 * values are not too low or too high. 292 */ 293 static int 294 sysctl_kern_maxlwp(SYSCTLFN_ARGS) 295 { 296 int error, nmaxlwp; 297 struct sysctlnode node; 298 299 nmaxlwp = maxlwp; 300 node = *rnode; 301 node.sysctl_data = &nmaxlwp; 302 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 303 if (error || newp == NULL) 304 return error; 305 306 if (nmaxlwp < 0 || nmaxlwp >= 65536) 307 return EINVAL; 308 if (nmaxlwp > cpu_maxlwp()) 309 return EINVAL; 310 maxlwp = nmaxlwp; 311 312 return 0; 313 } 314 315 static void 316 sysctl_kern_lwp_setup(void) 317 { 318 struct sysctllog *clog = NULL; 319 320 sysctl_createv(&clog, 0, NULL, NULL, 321 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 322 CTLTYPE_INT, "maxlwp", 323 SYSCTL_DESCR("Maximum number of simultaneous threads"), 324 sysctl_kern_maxlwp, 0, NULL, 0, 325 CTL_KERN, CTL_CREATE, CTL_EOL); 326 } 327 328 void 329 lwpinit(void) 330 { 331 332 LIST_INIT(&alllwp); 333 lwpinit_specificdata(); 334 lwp_cache = pool_cache_init(sizeof(lwp_t), MIN_LWP_ALIGNMENT, 0, 0, 335 "lwppl", NULL, IPL_NONE, NULL, lwp_dtor, NULL); 336 337 maxlwp = cpu_maxlwp(); 338 sysctl_kern_lwp_setup(); 339 } 340 341 void 342 lwp0_init(void) 343 { 344 struct lwp *l = &lwp0; 345 346 KASSERT((void *)uvm_lwp_getuarea(l) != NULL); 347 KASSERT(l->l_lid == proc0.p_nlwpid); 348 349 LIST_INSERT_HEAD(&alllwp, l, l_list); 350 351 callout_init(&l->l_timeout_ch, CALLOUT_MPSAFE); 352 callout_setfunc(&l->l_timeout_ch, sleepq_timeout, l); 353 cv_init(&l->l_sigcv, "sigwait"); 354 cv_init(&l->l_waitcv, "vfork"); 355 356 kauth_cred_hold(proc0.p_cred); 357 l->l_cred = proc0.p_cred; 358 359 kdtrace_thread_ctor(NULL, l); 360 lwp_initspecific(l); 361 362 SYSCALL_TIME_LWP_INIT(l); 363 } 364 365 static void 366 lwp_dtor(void *arg, void *obj) 367 { 368 lwp_t *l = obj; 369 (void)l; 370 371 /* 372 * Provide a barrier to ensure that all mutex_oncpu() and rw_oncpu() 373 * calls will exit before memory of LWP is returned to the pool, where 374 * KVA of LWP structure might be freed and re-used for other purposes. 375 * Kernel preemption is disabled around mutex_oncpu() and rw_oncpu() 376 * callers, therefore cross-call to all CPUs will do the job. Also, 377 * the value of l->l_cpu must be still valid at this point. 378 */ 379 KASSERT(l->l_cpu != NULL); 380 xc_barrier(0); 381 } 382 383 /* 384 * Set an suspended. 385 * 386 * Must be called with p_lock held, and the LWP locked. Will unlock the 387 * LWP before return. 388 */ 389 int 390 lwp_suspend(struct lwp *curl, struct lwp *t) 391 { 392 int error; 393 394 KASSERT(mutex_owned(t->l_proc->p_lock)); 395 KASSERT(lwp_locked(t, NULL)); 396 397 KASSERT(curl != t || curl->l_stat == LSONPROC); 398 399 /* 400 * If the current LWP has been told to exit, we must not suspend anyone 401 * else or deadlock could occur. We won't return to userspace. 402 */ 403 if ((curl->l_flag & (LW_WEXIT | LW_WCORE)) != 0) { 404 lwp_unlock(t); 405 return (EDEADLK); 406 } 407 408 if ((t->l_flag & LW_DBGSUSPEND) != 0) { 409 lwp_unlock(t); 410 return 0; 411 } 412 413 error = 0; 414 415 switch (t->l_stat) { 416 case LSRUN: 417 case LSONPROC: 418 t->l_flag |= LW_WSUSPEND; 419 lwp_need_userret(t); 420 lwp_unlock(t); 421 break; 422 423 case LSSLEEP: 424 t->l_flag |= LW_WSUSPEND; 425 426 /* 427 * Kick the LWP and try to get it to the kernel boundary 428 * so that it will release any locks that it holds. 429 * setrunnable() will release the lock. 430 */ 431 if ((t->l_flag & LW_SINTR) != 0) 432 setrunnable(t); 433 else 434 lwp_unlock(t); 435 break; 436 437 case LSSUSPENDED: 438 lwp_unlock(t); 439 break; 440 441 case LSSTOP: 442 t->l_flag |= LW_WSUSPEND; 443 setrunnable(t); 444 break; 445 446 case LSIDL: 447 case LSZOMB: 448 error = EINTR; /* It's what Solaris does..... */ 449 lwp_unlock(t); 450 break; 451 } 452 453 return (error); 454 } 455 456 /* 457 * Restart a suspended LWP. 458 * 459 * Must be called with p_lock held, and the LWP locked. Will unlock the 460 * LWP before return. 461 */ 462 void 463 lwp_continue(struct lwp *l) 464 { 465 466 KASSERT(mutex_owned(l->l_proc->p_lock)); 467 KASSERT(lwp_locked(l, NULL)); 468 469 /* If rebooting or not suspended, then just bail out. */ 470 if ((l->l_flag & LW_WREBOOT) != 0) { 471 lwp_unlock(l); 472 return; 473 } 474 475 l->l_flag &= ~LW_WSUSPEND; 476 477 if (l->l_stat != LSSUSPENDED || (l->l_flag & LW_DBGSUSPEND) != 0) { 478 lwp_unlock(l); 479 return; 480 } 481 482 /* setrunnable() will release the lock. */ 483 setrunnable(l); 484 } 485 486 /* 487 * Restart a stopped LWP. 488 * 489 * Must be called with p_lock held, and the LWP NOT locked. Will unlock the 490 * LWP before return. 491 */ 492 void 493 lwp_unstop(struct lwp *l) 494 { 495 struct proc *p = l->l_proc; 496 497 KASSERT(mutex_owned(proc_lock)); 498 KASSERT(mutex_owned(p->p_lock)); 499 500 lwp_lock(l); 501 502 KASSERT((l->l_flag & LW_DBGSUSPEND) == 0); 503 504 /* If not stopped, then just bail out. */ 505 if (l->l_stat != LSSTOP) { 506 lwp_unlock(l); 507 return; 508 } 509 510 p->p_stat = SACTIVE; 511 p->p_sflag &= ~PS_STOPPING; 512 513 if (!p->p_waited) 514 p->p_pptr->p_nstopchild--; 515 516 if (l->l_wchan == NULL) { 517 /* setrunnable() will release the lock. */ 518 setrunnable(l); 519 } else if (p->p_xsig && (l->l_flag & LW_SINTR) != 0) { 520 /* setrunnable() so we can receive the signal */ 521 setrunnable(l); 522 } else { 523 l->l_stat = LSSLEEP; 524 p->p_nrlwps++; 525 lwp_unlock(l); 526 } 527 } 528 529 /* 530 * Wait for an LWP within the current process to exit. If 'lid' is 531 * non-zero, we are waiting for a specific LWP. 532 * 533 * Must be called with p->p_lock held. 534 */ 535 int 536 lwp_wait(struct lwp *l, lwpid_t lid, lwpid_t *departed, bool exiting) 537 { 538 const lwpid_t curlid = l->l_lid; 539 proc_t *p = l->l_proc; 540 lwp_t *l2, *next; 541 int error; 542 543 KASSERT(mutex_owned(p->p_lock)); 544 545 p->p_nlwpwait++; 546 l->l_waitingfor = lid; 547 548 for (;;) { 549 int nfound; 550 551 /* 552 * Avoid a race between exit1() and sigexit(): if the 553 * process is dumping core, then we need to bail out: call 554 * into lwp_userret() where we will be suspended until the 555 * deed is done. 556 */ 557 if ((p->p_sflag & PS_WCORE) != 0) { 558 mutex_exit(p->p_lock); 559 lwp_userret(l); 560 KASSERT(false); 561 } 562 563 /* 564 * First off, drain any detached LWP that is waiting to be 565 * reaped. 566 */ 567 while ((l2 = p->p_zomblwp) != NULL) { 568 p->p_zomblwp = NULL; 569 lwp_free(l2, false, false);/* releases proc mutex */ 570 mutex_enter(p->p_lock); 571 } 572 573 /* 574 * Now look for an LWP to collect. If the whole process is 575 * exiting, count detached LWPs as eligible to be collected, 576 * but don't drain them here. 577 */ 578 nfound = 0; 579 error = 0; 580 581 /* 582 * If given a specific LID, go via the tree and make sure 583 * it's not detached. 584 */ 585 if (lid != 0) { 586 l2 = radix_tree_lookup_node(&p->p_lwptree, 587 (uint64_t)(lid - 1)); 588 if (l2 == NULL) { 589 error = ESRCH; 590 break; 591 } 592 KASSERT(l2->l_lid == lid); 593 if ((l2->l_prflag & LPR_DETACHED) != 0) { 594 error = EINVAL; 595 break; 596 } 597 } else { 598 l2 = LIST_FIRST(&p->p_lwps); 599 } 600 for (; l2 != NULL; l2 = next) { 601 next = (lid != 0 ? NULL : LIST_NEXT(l2, l_sibling)); 602 603 /* 604 * If a specific wait and the target is waiting on 605 * us, then avoid deadlock. This also traps LWPs 606 * that try to wait on themselves. 607 * 608 * Note that this does not handle more complicated 609 * cycles, like: t1 -> t2 -> t3 -> t1. The process 610 * can still be killed so it is not a major problem. 611 */ 612 if (l2->l_lid == lid && l2->l_waitingfor == curlid) { 613 error = EDEADLK; 614 break; 615 } 616 if (l2 == l) 617 continue; 618 if ((l2->l_prflag & LPR_DETACHED) != 0) { 619 nfound += exiting; 620 continue; 621 } 622 if (lid != 0) { 623 /* 624 * Mark this LWP as the first waiter, if there 625 * is no other. 626 */ 627 if (l2->l_waiter == 0) 628 l2->l_waiter = curlid; 629 } else if (l2->l_waiter != 0) { 630 /* 631 * It already has a waiter - so don't 632 * collect it. If the waiter doesn't 633 * grab it we'll get another chance 634 * later. 635 */ 636 nfound++; 637 continue; 638 } 639 nfound++; 640 641 /* No need to lock the LWP in order to see LSZOMB. */ 642 if (l2->l_stat != LSZOMB) 643 continue; 644 645 /* 646 * We're no longer waiting. Reset the "first waiter" 647 * pointer on the target, in case it was us. 648 */ 649 l->l_waitingfor = 0; 650 l2->l_waiter = 0; 651 p->p_nlwpwait--; 652 if (departed) 653 *departed = l2->l_lid; 654 sched_lwp_collect(l2); 655 656 /* lwp_free() releases the proc lock. */ 657 lwp_free(l2, false, false); 658 mutex_enter(p->p_lock); 659 return 0; 660 } 661 662 if (error != 0) 663 break; 664 if (nfound == 0) { 665 error = ESRCH; 666 break; 667 } 668 669 /* 670 * Note: since the lock will be dropped, need to restart on 671 * wakeup to run all LWPs again, e.g. there may be new LWPs. 672 */ 673 if (exiting) { 674 KASSERT(p->p_nlwps > 1); 675 error = cv_timedwait(&p->p_lwpcv, p->p_lock, 1); 676 break; 677 } 678 679 /* 680 * If all other LWPs are waiting for exits or suspends 681 * and the supply of zombies and potential zombies is 682 * exhausted, then we are about to deadlock. 683 */ 684 if ((p->p_sflag & PS_WEXIT) != 0 || 685 p->p_nrlwps + p->p_nzlwps - p->p_ndlwps <= p->p_nlwpwait) { 686 error = EDEADLK; 687 break; 688 } 689 690 /* 691 * Sit around and wait for something to happen. We'll be 692 * awoken if any of the conditions examined change: if an 693 * LWP exits, is collected, or is detached. 694 */ 695 if ((error = cv_wait_sig(&p->p_lwpcv, p->p_lock)) != 0) 696 break; 697 } 698 699 /* 700 * We didn't find any LWPs to collect, we may have received a 701 * signal, or some other condition has caused us to bail out. 702 * 703 * If waiting on a specific LWP, clear the waiters marker: some 704 * other LWP may want it. Then, kick all the remaining waiters 705 * so that they can re-check for zombies and for deadlock. 706 */ 707 if (lid != 0) { 708 l2 = radix_tree_lookup_node(&p->p_lwptree, 709 (uint64_t)(lid - 1)); 710 KASSERT(l2 == NULL || l2->l_lid == lid); 711 712 if (l2 != NULL && l2->l_waiter == curlid) 713 l2->l_waiter = 0; 714 } 715 p->p_nlwpwait--; 716 l->l_waitingfor = 0; 717 cv_broadcast(&p->p_lwpcv); 718 719 return error; 720 } 721 722 /* 723 * Find an unused LID for a new LWP. 724 */ 725 static lwpid_t 726 lwp_find_free_lid(struct proc *p) 727 { 728 struct lwp *gang[32]; 729 lwpid_t lid; 730 unsigned n; 731 732 KASSERT(mutex_owned(p->p_lock)); 733 KASSERT(p->p_nlwpid > 0); 734 735 /* 736 * Scoot forward through the tree in blocks of LIDs doing gang 737 * lookup with dense=true, meaning the lookup will terminate the 738 * instant a hole is encountered. Most of the time the first entry 739 * (p->p_nlwpid) is free and the lookup fails fast. 740 */ 741 for (lid = p->p_nlwpid;;) { 742 n = radix_tree_gang_lookup_node(&p->p_lwptree, lid - 1, 743 (void **)gang, __arraycount(gang), true); 744 if (n == 0) { 745 /* Start point was empty. */ 746 break; 747 } 748 KASSERT(gang[0]->l_lid == lid); 749 lid = gang[n - 1]->l_lid + 1; 750 if (n < __arraycount(gang)) { 751 /* Scan encountered a hole. */ 752 break; 753 } 754 } 755 756 return (lwpid_t)lid; 757 } 758 759 /* 760 * Create a new LWP within process 'p2', using LWP 'l1' as a template. 761 * The new LWP is created in state LSIDL and must be set running, 762 * suspended, or stopped by the caller. 763 */ 764 int 765 lwp_create(lwp_t *l1, proc_t *p2, vaddr_t uaddr, int flags, 766 void *stack, size_t stacksize, void (*func)(void *), void *arg, 767 lwp_t **rnewlwpp, int sclass, const sigset_t *sigmask, 768 const stack_t *sigstk) 769 { 770 struct lwp *l2; 771 turnstile_t *ts; 772 lwpid_t lid; 773 774 KASSERT(l1 == curlwp || l1->l_proc == &proc0); 775 776 /* 777 * Enforce limits, excluding the first lwp and kthreads. We must 778 * use the process credentials here when adjusting the limit, as 779 * they are what's tied to the accounting entity. However for 780 * authorizing the action, we'll use the LWP's credentials. 781 */ 782 mutex_enter(p2->p_lock); 783 if (p2->p_nlwps != 0 && p2 != &proc0) { 784 uid_t uid = kauth_cred_getuid(p2->p_cred); 785 int count = chglwpcnt(uid, 1); 786 if (__predict_false(count > 787 p2->p_rlimit[RLIMIT_NTHR].rlim_cur)) { 788 if (kauth_authorize_process(l1->l_cred, 789 KAUTH_PROCESS_RLIMIT, p2, 790 KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_BYPASS), 791 &p2->p_rlimit[RLIMIT_NTHR], KAUTH_ARG(RLIMIT_NTHR)) 792 != 0) { 793 (void)chglwpcnt(uid, -1); 794 mutex_exit(p2->p_lock); 795 return EAGAIN; 796 } 797 } 798 } 799 800 /* 801 * First off, reap any detached LWP waiting to be collected. 802 * We can re-use its LWP structure and turnstile. 803 */ 804 if ((l2 = p2->p_zomblwp) != NULL) { 805 p2->p_zomblwp = NULL; 806 lwp_free(l2, true, false); 807 /* p2 now unlocked by lwp_free() */ 808 ts = l2->l_ts; 809 KASSERT(l2->l_inheritedprio == -1); 810 KASSERT(SLIST_EMPTY(&l2->l_pi_lenders)); 811 memset(l2, 0, sizeof(*l2)); 812 l2->l_ts = ts; 813 } else { 814 mutex_exit(p2->p_lock); 815 l2 = pool_cache_get(lwp_cache, PR_WAITOK); 816 memset(l2, 0, sizeof(*l2)); 817 l2->l_ts = pool_cache_get(turnstile_cache, PR_WAITOK); 818 SLIST_INIT(&l2->l_pi_lenders); 819 } 820 821 l2->l_stat = LSIDL; 822 l2->l_proc = p2; 823 l2->l_refcnt = 1; 824 l2->l_class = sclass; 825 826 /* 827 * If vfork(), we want the LWP to run fast and on the same CPU 828 * as its parent, so that it can reuse the VM context and cache 829 * footprint on the local CPU. 830 */ 831 l2->l_kpriority = ((flags & LWP_VFORK) ? true : false); 832 l2->l_kpribase = PRI_KERNEL; 833 l2->l_priority = l1->l_priority; 834 l2->l_inheritedprio = -1; 835 l2->l_protectprio = -1; 836 l2->l_auxprio = -1; 837 l2->l_flag = 0; 838 l2->l_pflag = LP_MPSAFE; 839 TAILQ_INIT(&l2->l_ld_locks); 840 l2->l_psrefs = 0; 841 kmsan_lwp_alloc(l2); 842 843 /* 844 * For vfork, borrow parent's lwpctl context if it exists. 845 * This also causes us to return via lwp_userret. 846 */ 847 if (flags & LWP_VFORK && l1->l_lwpctl) { 848 l2->l_lwpctl = l1->l_lwpctl; 849 l2->l_flag |= LW_LWPCTL; 850 } 851 852 /* 853 * If not the first LWP in the process, grab a reference to the 854 * descriptor table. 855 */ 856 l2->l_fd = p2->p_fd; 857 if (p2->p_nlwps != 0) { 858 KASSERT(l1->l_proc == p2); 859 fd_hold(l2); 860 } else { 861 KASSERT(l1->l_proc != p2); 862 } 863 864 if (p2->p_flag & PK_SYSTEM) { 865 /* Mark it as a system LWP. */ 866 l2->l_flag |= LW_SYSTEM; 867 } 868 869 kpreempt_disable(); 870 l2->l_mutex = l1->l_cpu->ci_schedstate.spc_lwplock; 871 l2->l_cpu = l1->l_cpu; 872 kpreempt_enable(); 873 874 kdtrace_thread_ctor(NULL, l2); 875 lwp_initspecific(l2); 876 sched_lwp_fork(l1, l2); 877 lwp_update_creds(l2); 878 callout_init(&l2->l_timeout_ch, CALLOUT_MPSAFE); 879 callout_setfunc(&l2->l_timeout_ch, sleepq_timeout, l2); 880 cv_init(&l2->l_sigcv, "sigwait"); 881 cv_init(&l2->l_waitcv, "vfork"); 882 l2->l_syncobj = &sched_syncobj; 883 PSREF_DEBUG_INIT_LWP(l2); 884 885 if (rnewlwpp != NULL) 886 *rnewlwpp = l2; 887 888 /* 889 * PCU state needs to be saved before calling uvm_lwp_fork() so that 890 * the MD cpu_lwp_fork() can copy the saved state to the new LWP. 891 */ 892 pcu_save_all(l1); 893 #if PCU_UNIT_COUNT > 0 894 l2->l_pcu_valid = l1->l_pcu_valid; 895 #endif 896 897 uvm_lwp_setuarea(l2, uaddr); 898 uvm_lwp_fork(l1, l2, stack, stacksize, func, (arg != NULL) ? arg : l2); 899 900 if ((flags & LWP_PIDLID) != 0) { 901 /* Linux threads: use a PID. */ 902 lid = proc_alloc_pid(p2); 903 l2->l_pflag |= LP_PIDLID; 904 } else if (p2->p_nlwps == 0) { 905 /* 906 * First LWP in process. Copy the parent's LID to avoid 907 * causing problems for fork() + threads. Don't give 908 * subsequent threads the distinction of using LID 1. 909 */ 910 lid = l1->l_lid; 911 p2->p_nlwpid = 2; 912 } else { 913 /* Scan the radix tree for a free LID. */ 914 lid = 0; 915 } 916 917 /* 918 * Allocate LID if needed, and insert into the radix tree. The 919 * first LWP in most processes has a LID of 1. It turns out that if 920 * you insert an item with a key of zero to a radixtree, it's stored 921 * directly in the root (p_lwptree) and no extra memory is 922 * allocated. We therefore always subtract 1 from the LID, which 923 * means no memory is allocated for the tree unless the program is 924 * using threads. NB: the allocation and insert must take place 925 * under the same hold of p_lock. 926 */ 927 mutex_enter(p2->p_lock); 928 for (;;) { 929 int error; 930 931 l2->l_lid = (lid == 0 ? lwp_find_free_lid(p2) : lid); 932 933 rw_enter(&p2->p_treelock, RW_WRITER); 934 error = radix_tree_insert_node(&p2->p_lwptree, 935 (uint64_t)(l2->l_lid - 1), l2); 936 rw_exit(&p2->p_treelock); 937 938 if (__predict_true(error == 0)) { 939 if (lid == 0) 940 p2->p_nlwpid = l2->l_lid + 1; 941 break; 942 } 943 944 KASSERT(error == ENOMEM); 945 mutex_exit(p2->p_lock); 946 radix_tree_await_memory(); 947 mutex_enter(p2->p_lock); 948 } 949 950 if ((flags & LWP_DETACHED) != 0) { 951 l2->l_prflag = LPR_DETACHED; 952 p2->p_ndlwps++; 953 } else 954 l2->l_prflag = 0; 955 956 if (l1->l_proc == p2) { 957 /* 958 * These flags are set while p_lock is held. Copy with 959 * p_lock held too, so the LWP doesn't sneak into the 960 * process without them being set. 961 */ 962 l2->l_flag |= (l1->l_flag & (LW_WEXIT | LW_WREBOOT | LW_WCORE)); 963 } else { 964 /* fork(): pending core/exit doesn't apply to child. */ 965 l2->l_flag |= (l1->l_flag & LW_WREBOOT); 966 } 967 968 l2->l_sigstk = *sigstk; 969 l2->l_sigmask = *sigmask; 970 TAILQ_INIT(&l2->l_sigpend.sp_info); 971 sigemptyset(&l2->l_sigpend.sp_set); 972 LIST_INSERT_HEAD(&p2->p_lwps, l2, l_sibling); 973 p2->p_nlwps++; 974 p2->p_nrlwps++; 975 976 KASSERT(l2->l_affinity == NULL); 977 978 /* Inherit the affinity mask. */ 979 if (l1->l_affinity) { 980 /* 981 * Note that we hold the state lock while inheriting 982 * the affinity to avoid race with sched_setaffinity(). 983 */ 984 lwp_lock(l1); 985 if (l1->l_affinity) { 986 kcpuset_use(l1->l_affinity); 987 l2->l_affinity = l1->l_affinity; 988 } 989 lwp_unlock(l1); 990 } 991 992 /* This marks the end of the "must be atomic" section. */ 993 mutex_exit(p2->p_lock); 994 995 SDT_PROBE(proc, kernel, , lwp__create, l2, 0, 0, 0, 0); 996 997 mutex_enter(proc_lock); 998 LIST_INSERT_HEAD(&alllwp, l2, l_list); 999 /* Inherit a processor-set */ 1000 l2->l_psid = l1->l_psid; 1001 mutex_exit(proc_lock); 1002 1003 SYSCALL_TIME_LWP_INIT(l2); 1004 1005 if (p2->p_emul->e_lwp_fork) 1006 (*p2->p_emul->e_lwp_fork)(l1, l2); 1007 1008 return (0); 1009 } 1010 1011 /* 1012 * Set a new LWP running. If the process is stopping, then the LWP is 1013 * created stopped. 1014 */ 1015 void 1016 lwp_start(lwp_t *l, int flags) 1017 { 1018 proc_t *p = l->l_proc; 1019 1020 mutex_enter(p->p_lock); 1021 lwp_lock(l); 1022 KASSERT(l->l_stat == LSIDL); 1023 if ((flags & LWP_SUSPENDED) != 0) { 1024 /* It'll suspend itself in lwp_userret(). */ 1025 l->l_flag |= LW_WSUSPEND; 1026 } 1027 if (p->p_stat == SSTOP || (p->p_sflag & PS_STOPPING) != 0) { 1028 KASSERT(l->l_wchan == NULL); 1029 l->l_stat = LSSTOP; 1030 p->p_nrlwps--; 1031 lwp_unlock(l); 1032 } else { 1033 setrunnable(l); 1034 /* LWP now unlocked */ 1035 } 1036 mutex_exit(p->p_lock); 1037 } 1038 1039 /* 1040 * Called by MD code when a new LWP begins execution. Must be called 1041 * with the previous LWP locked (so at splsched), or if there is no 1042 * previous LWP, at splsched. 1043 */ 1044 void 1045 lwp_startup(struct lwp *prev, struct lwp *new_lwp) 1046 { 1047 kmutex_t *lock; 1048 1049 KASSERTMSG(new_lwp == curlwp, "l %p curlwp %p prevlwp %p", new_lwp, curlwp, prev); 1050 KASSERT(kpreempt_disabled()); 1051 KASSERT(prev != NULL); 1052 KASSERT((prev->l_pflag & LP_RUNNING) != 0); 1053 KASSERT(curcpu()->ci_mtx_count == -2); 1054 1055 /* 1056 * Immediately mark the previous LWP as no longer running and unlock 1057 * (to keep lock wait times short as possible). If a zombie, don't 1058 * touch after clearing LP_RUNNING as it could be reaped by another 1059 * CPU. Issue a memory barrier to ensure this. 1060 */ 1061 lock = prev->l_mutex; 1062 if (__predict_false(prev->l_stat == LSZOMB)) { 1063 membar_sync(); 1064 } 1065 prev->l_pflag &= ~LP_RUNNING; 1066 mutex_spin_exit(lock); 1067 1068 /* Correct spin mutex count after mi_switch(). */ 1069 curcpu()->ci_mtx_count = 0; 1070 1071 /* Install new VM context. */ 1072 if (__predict_true(new_lwp->l_proc->p_vmspace)) { 1073 pmap_activate(new_lwp); 1074 } 1075 1076 /* We remain at IPL_SCHED from mi_switch() - reset it. */ 1077 spl0(); 1078 1079 LOCKDEBUG_BARRIER(NULL, 0); 1080 SDT_PROBE(proc, kernel, , lwp__start, new_lwp, 0, 0, 0, 0); 1081 1082 /* For kthreads, acquire kernel lock if not MPSAFE. */ 1083 if (__predict_false((new_lwp->l_pflag & LP_MPSAFE) == 0)) { 1084 KERNEL_LOCK(1, new_lwp); 1085 } 1086 } 1087 1088 /* 1089 * Exit an LWP. 1090 */ 1091 void 1092 lwp_exit(struct lwp *l) 1093 { 1094 struct proc *p = l->l_proc; 1095 struct lwp *l2; 1096 bool current; 1097 1098 current = (l == curlwp); 1099 1100 KASSERT(current || (l->l_stat == LSIDL && l->l_target_cpu == NULL)); 1101 KASSERT(p == curproc); 1102 1103 SDT_PROBE(proc, kernel, , lwp__exit, l, 0, 0, 0, 0); 1104 1105 /* Verify that we hold no locks; for DIAGNOSTIC check kernel_lock. */ 1106 LOCKDEBUG_BARRIER(NULL, 0); 1107 KASSERTMSG(curcpu()->ci_biglock_count == 0, "kernel_lock leaked"); 1108 1109 /* 1110 * If we are the last live LWP in a process, we need to exit the 1111 * entire process. We do so with an exit status of zero, because 1112 * it's a "controlled" exit, and because that's what Solaris does. 1113 * 1114 * We are not quite a zombie yet, but for accounting purposes we 1115 * must increment the count of zombies here. 1116 * 1117 * Note: the last LWP's specificdata will be deleted here. 1118 */ 1119 mutex_enter(p->p_lock); 1120 if (p->p_nlwps - p->p_nzlwps == 1) { 1121 KASSERT(current == true); 1122 KASSERT(p != &proc0); 1123 exit1(l, 0, 0); 1124 /* NOTREACHED */ 1125 } 1126 p->p_nzlwps++; 1127 mutex_exit(p->p_lock); 1128 1129 if (p->p_emul->e_lwp_exit) 1130 (*p->p_emul->e_lwp_exit)(l); 1131 1132 /* Drop filedesc reference. */ 1133 fd_free(); 1134 1135 /* Release fstrans private data. */ 1136 fstrans_lwp_dtor(l); 1137 1138 /* Delete the specificdata while it's still safe to sleep. */ 1139 lwp_finispecific(l); 1140 1141 /* 1142 * Release our cached credentials. 1143 */ 1144 kauth_cred_free(l->l_cred); 1145 callout_destroy(&l->l_timeout_ch); 1146 1147 /* 1148 * If traced, report LWP exit event to the debugger. 1149 * 1150 * Remove the LWP from the global list. 1151 * Free its LID from the PID namespace if needed. 1152 */ 1153 mutex_enter(proc_lock); 1154 1155 if ((p->p_slflag & (PSL_TRACED|PSL_TRACELWP_EXIT)) == 1156 (PSL_TRACED|PSL_TRACELWP_EXIT)) { 1157 mutex_enter(p->p_lock); 1158 if (ISSET(p->p_sflag, PS_WEXIT)) { 1159 mutex_exit(p->p_lock); 1160 /* 1161 * We are exiting, bail out without informing parent 1162 * about a terminating LWP as it would deadlock. 1163 */ 1164 } else { 1165 eventswitch(TRAP_LWP, PTRACE_LWP_EXIT, l->l_lid); 1166 mutex_enter(proc_lock); 1167 } 1168 } 1169 1170 LIST_REMOVE(l, l_list); 1171 if ((l->l_pflag & LP_PIDLID) != 0 && l->l_lid != p->p_pid) { 1172 proc_free_pid(l->l_lid); 1173 } 1174 mutex_exit(proc_lock); 1175 1176 /* 1177 * Get rid of all references to the LWP that others (e.g. procfs) 1178 * may have, and mark the LWP as a zombie. If the LWP is detached, 1179 * mark it waiting for collection in the proc structure. Note that 1180 * before we can do that, we need to free any other dead, deatched 1181 * LWP waiting to meet its maker. 1182 */ 1183 mutex_enter(p->p_lock); 1184 lwp_drainrefs(l); 1185 1186 if ((l->l_prflag & LPR_DETACHED) != 0) { 1187 while ((l2 = p->p_zomblwp) != NULL) { 1188 p->p_zomblwp = NULL; 1189 lwp_free(l2, false, false);/* releases proc mutex */ 1190 mutex_enter(p->p_lock); 1191 l->l_refcnt++; 1192 lwp_drainrefs(l); 1193 } 1194 p->p_zomblwp = l; 1195 } 1196 1197 /* 1198 * If we find a pending signal for the process and we have been 1199 * asked to check for signals, then we lose: arrange to have 1200 * all other LWPs in the process check for signals. 1201 */ 1202 if ((l->l_flag & LW_PENDSIG) != 0 && 1203 firstsig(&p->p_sigpend.sp_set) != 0) { 1204 LIST_FOREACH(l2, &p->p_lwps, l_sibling) { 1205 lwp_lock(l2); 1206 signotify(l2); 1207 lwp_unlock(l2); 1208 } 1209 } 1210 1211 /* 1212 * Release any PCU resources before becoming a zombie. 1213 */ 1214 pcu_discard_all(l); 1215 1216 lwp_lock(l); 1217 l->l_stat = LSZOMB; 1218 if (l->l_name != NULL) { 1219 strcpy(l->l_name, "(zombie)"); 1220 } 1221 lwp_unlock(l); 1222 p->p_nrlwps--; 1223 cv_broadcast(&p->p_lwpcv); 1224 if (l->l_lwpctl != NULL) 1225 l->l_lwpctl->lc_curcpu = LWPCTL_CPU_EXITED; 1226 mutex_exit(p->p_lock); 1227 1228 /* 1229 * We can no longer block. At this point, lwp_free() may already 1230 * be gunning for us. On a multi-CPU system, we may be off p_lwps. 1231 * 1232 * Free MD LWP resources. 1233 */ 1234 cpu_lwp_free(l, 0); 1235 1236 if (current) { 1237 /* Switch away into oblivion. */ 1238 lwp_lock(l); 1239 spc_lock(l->l_cpu); 1240 mi_switch(l); 1241 panic("lwp_exit"); 1242 } 1243 } 1244 1245 /* 1246 * Free a dead LWP's remaining resources. 1247 * 1248 * XXXLWP limits. 1249 */ 1250 void 1251 lwp_free(struct lwp *l, bool recycle, bool last) 1252 { 1253 struct proc *p = l->l_proc; 1254 struct rusage *ru; 1255 struct lwp *l2 __diagused; 1256 ksiginfoq_t kq; 1257 1258 KASSERT(l != curlwp); 1259 KASSERT(last || mutex_owned(p->p_lock)); 1260 1261 /* 1262 * We use the process credentials instead of the lwp credentials here 1263 * because the lwp credentials maybe cached (just after a setuid call) 1264 * and we don't want pay for syncing, since the lwp is going away 1265 * anyway 1266 */ 1267 if (p != &proc0 && p->p_nlwps != 1) 1268 (void)chglwpcnt(kauth_cred_getuid(p->p_cred), -1); 1269 1270 /* 1271 * If this was not the last LWP in the process, then adjust counters 1272 * and unlock. This is done differently for the last LWP in exit1(). 1273 */ 1274 if (!last) { 1275 /* 1276 * Add the LWP's run time to the process' base value. 1277 * This needs to co-incide with coming off p_lwps. 1278 */ 1279 bintime_add(&p->p_rtime, &l->l_rtime); 1280 p->p_pctcpu += l->l_pctcpu; 1281 ru = &p->p_stats->p_ru; 1282 ruadd(ru, &l->l_ru); 1283 ru->ru_nvcsw += (l->l_ncsw - l->l_nivcsw); 1284 ru->ru_nivcsw += l->l_nivcsw; 1285 LIST_REMOVE(l, l_sibling); 1286 p->p_nlwps--; 1287 p->p_nzlwps--; 1288 if ((l->l_prflag & LPR_DETACHED) != 0) 1289 p->p_ndlwps--; 1290 1291 /* Make note of the LID being free, and remove from tree. */ 1292 if (l->l_lid < p->p_nlwpid) 1293 p->p_nlwpid = l->l_lid; 1294 rw_enter(&p->p_treelock, RW_WRITER); 1295 l2 = radix_tree_remove_node(&p->p_lwptree, 1296 (uint64_t)(l->l_lid - 1)); 1297 KASSERT(l2 == l); 1298 rw_exit(&p->p_treelock); 1299 1300 /* 1301 * Have any LWPs sleeping in lwp_wait() recheck for 1302 * deadlock. 1303 */ 1304 cv_broadcast(&p->p_lwpcv); 1305 mutex_exit(p->p_lock); 1306 } 1307 1308 /* 1309 * In the unlikely event that the LWP is still on the CPU, 1310 * then spin until it has switched away. 1311 */ 1312 membar_consumer(); 1313 while (__predict_false((l->l_pflag & LP_RUNNING) != 0)) { 1314 SPINLOCK_BACKOFF_HOOK; 1315 } 1316 1317 /* 1318 * Destroy the LWP's remaining signal information. 1319 */ 1320 ksiginfo_queue_init(&kq); 1321 sigclear(&l->l_sigpend, NULL, &kq); 1322 ksiginfo_queue_drain(&kq); 1323 cv_destroy(&l->l_sigcv); 1324 cv_destroy(&l->l_waitcv); 1325 1326 /* 1327 * Free lwpctl structure and affinity. 1328 */ 1329 if (l->l_lwpctl) { 1330 lwp_ctl_free(l); 1331 } 1332 if (l->l_affinity) { 1333 kcpuset_unuse(l->l_affinity, NULL); 1334 l->l_affinity = NULL; 1335 } 1336 1337 /* 1338 * Free the LWP's turnstile and the LWP structure itself unless the 1339 * caller wants to recycle them. Also, free the scheduler specific 1340 * data. 1341 * 1342 * We can't return turnstile0 to the pool (it didn't come from it), 1343 * so if it comes up just drop it quietly and move on. 1344 * 1345 * We don't recycle the VM resources at this time. 1346 */ 1347 1348 if (!recycle && l->l_ts != &turnstile0) 1349 pool_cache_put(turnstile_cache, l->l_ts); 1350 if (l->l_name != NULL) 1351 kmem_free(l->l_name, MAXCOMLEN); 1352 1353 kmsan_lwp_free(l); 1354 cpu_lwp_free2(l); 1355 uvm_lwp_exit(l); 1356 1357 KASSERT(SLIST_EMPTY(&l->l_pi_lenders)); 1358 KASSERT(l->l_inheritedprio == -1); 1359 KASSERT(l->l_blcnt == 0); 1360 kdtrace_thread_dtor(NULL, l); 1361 if (!recycle) 1362 pool_cache_put(lwp_cache, l); 1363 } 1364 1365 /* 1366 * Migrate the LWP to the another CPU. Unlocks the LWP. 1367 */ 1368 void 1369 lwp_migrate(lwp_t *l, struct cpu_info *tci) 1370 { 1371 struct schedstate_percpu *tspc; 1372 int lstat = l->l_stat; 1373 1374 KASSERT(lwp_locked(l, NULL)); 1375 KASSERT(tci != NULL); 1376 1377 /* If LWP is still on the CPU, it must be handled like LSONPROC */ 1378 if ((l->l_pflag & LP_RUNNING) != 0) { 1379 lstat = LSONPROC; 1380 } 1381 1382 /* 1383 * The destination CPU could be changed while previous migration 1384 * was not finished. 1385 */ 1386 if (l->l_target_cpu != NULL) { 1387 l->l_target_cpu = tci; 1388 lwp_unlock(l); 1389 return; 1390 } 1391 1392 /* Nothing to do if trying to migrate to the same CPU */ 1393 if (l->l_cpu == tci) { 1394 lwp_unlock(l); 1395 return; 1396 } 1397 1398 KASSERT(l->l_target_cpu == NULL); 1399 tspc = &tci->ci_schedstate; 1400 switch (lstat) { 1401 case LSRUN: 1402 l->l_target_cpu = tci; 1403 break; 1404 case LSSLEEP: 1405 l->l_cpu = tci; 1406 break; 1407 case LSIDL: 1408 case LSSTOP: 1409 case LSSUSPENDED: 1410 l->l_cpu = tci; 1411 if (l->l_wchan == NULL) { 1412 lwp_unlock_to(l, tspc->spc_lwplock); 1413 return; 1414 } 1415 break; 1416 case LSONPROC: 1417 l->l_target_cpu = tci; 1418 spc_lock(l->l_cpu); 1419 sched_resched_cpu(l->l_cpu, PRI_USER_RT, true); 1420 /* spc now unlocked */ 1421 break; 1422 } 1423 lwp_unlock(l); 1424 } 1425 1426 /* 1427 * Find the LWP in the process. Arguments may be zero, in such case, 1428 * the calling process and first LWP in the list will be used. 1429 * On success - returns proc locked. 1430 */ 1431 struct lwp * 1432 lwp_find2(pid_t pid, lwpid_t lid) 1433 { 1434 proc_t *p; 1435 lwp_t *l; 1436 1437 /* Find the process. */ 1438 if (pid != 0) { 1439 mutex_enter(proc_lock); 1440 p = proc_find(pid); 1441 if (p == NULL) { 1442 mutex_exit(proc_lock); 1443 return NULL; 1444 } 1445 mutex_enter(p->p_lock); 1446 mutex_exit(proc_lock); 1447 } else { 1448 p = curlwp->l_proc; 1449 mutex_enter(p->p_lock); 1450 } 1451 /* Find the thread. */ 1452 if (lid != 0) { 1453 l = lwp_find(p, lid); 1454 } else { 1455 l = LIST_FIRST(&p->p_lwps); 1456 } 1457 if (l == NULL) { 1458 mutex_exit(p->p_lock); 1459 } 1460 return l; 1461 } 1462 1463 /* 1464 * Look up a live LWP within the specified process. 1465 * 1466 * Must be called with p->p_lock held (as it looks at the radix tree, 1467 * and also wants to exclude idle and zombie LWPs). 1468 */ 1469 struct lwp * 1470 lwp_find(struct proc *p, lwpid_t id) 1471 { 1472 struct lwp *l; 1473 1474 KASSERT(mutex_owned(p->p_lock)); 1475 1476 l = radix_tree_lookup_node(&p->p_lwptree, (uint64_t)(id - 1)); 1477 KASSERT(l == NULL || l->l_lid == id); 1478 1479 /* 1480 * No need to lock - all of these conditions will 1481 * be visible with the process level mutex held. 1482 */ 1483 if (l != NULL && (l->l_stat == LSIDL || l->l_stat == LSZOMB)) 1484 l = NULL; 1485 1486 return l; 1487 } 1488 1489 /* 1490 * Update an LWP's cached credentials to mirror the process' master copy. 1491 * 1492 * This happens early in the syscall path, on user trap, and on LWP 1493 * creation. A long-running LWP can also voluntarily choose to update 1494 * its credentials by calling this routine. This may be called from 1495 * LWP_CACHE_CREDS(), which checks l->l_cred != p->p_cred beforehand. 1496 */ 1497 void 1498 lwp_update_creds(struct lwp *l) 1499 { 1500 kauth_cred_t oc; 1501 struct proc *p; 1502 1503 p = l->l_proc; 1504 oc = l->l_cred; 1505 1506 mutex_enter(p->p_lock); 1507 kauth_cred_hold(p->p_cred); 1508 l->l_cred = p->p_cred; 1509 l->l_prflag &= ~LPR_CRMOD; 1510 mutex_exit(p->p_lock); 1511 if (oc != NULL) 1512 kauth_cred_free(oc); 1513 } 1514 1515 /* 1516 * Verify that an LWP is locked, and optionally verify that the lock matches 1517 * one we specify. 1518 */ 1519 int 1520 lwp_locked(struct lwp *l, kmutex_t *mtx) 1521 { 1522 kmutex_t *cur = l->l_mutex; 1523 1524 return mutex_owned(cur) && (mtx == cur || mtx == NULL); 1525 } 1526 1527 /* 1528 * Lend a new mutex to an LWP. The old mutex must be held. 1529 */ 1530 kmutex_t * 1531 lwp_setlock(struct lwp *l, kmutex_t *mtx) 1532 { 1533 kmutex_t *oldmtx = l->l_mutex; 1534 1535 KASSERT(mutex_owned(oldmtx)); 1536 1537 membar_exit(); 1538 l->l_mutex = mtx; 1539 return oldmtx; 1540 } 1541 1542 /* 1543 * Lend a new mutex to an LWP, and release the old mutex. The old mutex 1544 * must be held. 1545 */ 1546 void 1547 lwp_unlock_to(struct lwp *l, kmutex_t *mtx) 1548 { 1549 kmutex_t *old; 1550 1551 KASSERT(lwp_locked(l, NULL)); 1552 1553 old = l->l_mutex; 1554 membar_exit(); 1555 l->l_mutex = mtx; 1556 mutex_spin_exit(old); 1557 } 1558 1559 int 1560 lwp_trylock(struct lwp *l) 1561 { 1562 kmutex_t *old; 1563 1564 for (;;) { 1565 if (!mutex_tryenter(old = l->l_mutex)) 1566 return 0; 1567 if (__predict_true(l->l_mutex == old)) 1568 return 1; 1569 mutex_spin_exit(old); 1570 } 1571 } 1572 1573 void 1574 lwp_unsleep(lwp_t *l, bool unlock) 1575 { 1576 1577 KASSERT(mutex_owned(l->l_mutex)); 1578 (*l->l_syncobj->sobj_unsleep)(l, unlock); 1579 } 1580 1581 /* 1582 * Handle exceptions for mi_userret(). Called if a member of LW_USERRET is 1583 * set. 1584 */ 1585 void 1586 lwp_userret(struct lwp *l) 1587 { 1588 struct proc *p; 1589 int sig; 1590 1591 KASSERT(l == curlwp); 1592 KASSERT(l->l_stat == LSONPROC); 1593 p = l->l_proc; 1594 1595 #ifndef __HAVE_FAST_SOFTINTS 1596 /* Run pending soft interrupts. */ 1597 if (l->l_cpu->ci_data.cpu_softints != 0) 1598 softint_overlay(); 1599 #endif 1600 1601 /* 1602 * It is safe to do this read unlocked on a MP system.. 1603 */ 1604 while ((l->l_flag & LW_USERRET) != 0) { 1605 /* 1606 * Process pending signals first, unless the process 1607 * is dumping core or exiting, where we will instead 1608 * enter the LW_WSUSPEND case below. 1609 */ 1610 if ((l->l_flag & (LW_PENDSIG | LW_WCORE | LW_WEXIT)) == 1611 LW_PENDSIG) { 1612 mutex_enter(p->p_lock); 1613 while ((sig = issignal(l)) != 0) 1614 postsig(sig); 1615 mutex_exit(p->p_lock); 1616 } 1617 1618 /* 1619 * Core-dump or suspend pending. 1620 * 1621 * In case of core dump, suspend ourselves, so that the kernel 1622 * stack and therefore the userland registers saved in the 1623 * trapframe are around for coredump() to write them out. 1624 * We also need to save any PCU resources that we have so that 1625 * they accessible for coredump(). We issue a wakeup on 1626 * p->p_lwpcv so that sigexit() will write the core file out 1627 * once all other LWPs are suspended. 1628 */ 1629 if ((l->l_flag & LW_WSUSPEND) != 0) { 1630 pcu_save_all(l); 1631 mutex_enter(p->p_lock); 1632 p->p_nrlwps--; 1633 cv_broadcast(&p->p_lwpcv); 1634 lwp_lock(l); 1635 l->l_stat = LSSUSPENDED; 1636 lwp_unlock(l); 1637 mutex_exit(p->p_lock); 1638 lwp_lock(l); 1639 spc_lock(l->l_cpu); 1640 mi_switch(l); 1641 } 1642 1643 /* Process is exiting. */ 1644 if ((l->l_flag & LW_WEXIT) != 0) { 1645 lwp_exit(l); 1646 KASSERT(0); 1647 /* NOTREACHED */ 1648 } 1649 1650 /* update lwpctl processor (for vfork child_return) */ 1651 if (l->l_flag & LW_LWPCTL) { 1652 lwp_lock(l); 1653 KASSERT(kpreempt_disabled()); 1654 l->l_lwpctl->lc_curcpu = (int)cpu_index(l->l_cpu); 1655 l->l_lwpctl->lc_pctr++; 1656 l->l_flag &= ~LW_LWPCTL; 1657 lwp_unlock(l); 1658 } 1659 } 1660 } 1661 1662 /* 1663 * Force an LWP to enter the kernel, to take a trip through lwp_userret(). 1664 */ 1665 void 1666 lwp_need_userret(struct lwp *l) 1667 { 1668 1669 KASSERT(!cpu_intr_p()); 1670 KASSERT(lwp_locked(l, NULL)); 1671 1672 /* 1673 * If the LWP is in any state other than LSONPROC, we know that it 1674 * is executing in-kernel and will hit userret() on the way out. 1675 * 1676 * If the LWP is curlwp, then we know we'll be back out to userspace 1677 * soon (can't be called from a hardware interrupt here). 1678 * 1679 * Otherwise, we can't be sure what the LWP is doing, so first make 1680 * sure the update to l_flag will be globally visible, and then 1681 * force the LWP to take a trip through trap() where it will do 1682 * userret(). 1683 */ 1684 if (l->l_stat == LSONPROC && l != curlwp) { 1685 membar_producer(); 1686 cpu_signotify(l); 1687 } 1688 } 1689 1690 /* 1691 * Add one reference to an LWP. This will prevent the LWP from 1692 * exiting, thus keep the lwp structure and PCB around to inspect. 1693 */ 1694 void 1695 lwp_addref(struct lwp *l) 1696 { 1697 1698 KASSERT(mutex_owned(l->l_proc->p_lock)); 1699 KASSERT(l->l_stat != LSZOMB); 1700 KASSERT(l->l_refcnt != 0); 1701 1702 l->l_refcnt++; 1703 } 1704 1705 /* 1706 * Remove one reference to an LWP. If this is the last reference, 1707 * then we must finalize the LWP's death. 1708 */ 1709 void 1710 lwp_delref(struct lwp *l) 1711 { 1712 struct proc *p = l->l_proc; 1713 1714 mutex_enter(p->p_lock); 1715 lwp_delref2(l); 1716 mutex_exit(p->p_lock); 1717 } 1718 1719 /* 1720 * Remove one reference to an LWP. If this is the last reference, 1721 * then we must finalize the LWP's death. The proc mutex is held 1722 * on entry. 1723 */ 1724 void 1725 lwp_delref2(struct lwp *l) 1726 { 1727 struct proc *p = l->l_proc; 1728 1729 KASSERT(mutex_owned(p->p_lock)); 1730 KASSERT(l->l_stat != LSZOMB); 1731 KASSERT(l->l_refcnt > 0); 1732 if (--l->l_refcnt == 0) 1733 cv_broadcast(&p->p_lwpcv); 1734 } 1735 1736 /* 1737 * Drain all references to the current LWP. 1738 */ 1739 void 1740 lwp_drainrefs(struct lwp *l) 1741 { 1742 struct proc *p = l->l_proc; 1743 1744 KASSERT(mutex_owned(p->p_lock)); 1745 KASSERT(l->l_refcnt != 0); 1746 1747 l->l_refcnt--; 1748 while (l->l_refcnt != 0) 1749 cv_wait(&p->p_lwpcv, p->p_lock); 1750 } 1751 1752 /* 1753 * Return true if the specified LWP is 'alive'. Only p->p_lock need 1754 * be held. 1755 */ 1756 bool 1757 lwp_alive(lwp_t *l) 1758 { 1759 1760 KASSERT(mutex_owned(l->l_proc->p_lock)); 1761 1762 switch (l->l_stat) { 1763 case LSSLEEP: 1764 case LSRUN: 1765 case LSONPROC: 1766 case LSSTOP: 1767 case LSSUSPENDED: 1768 return true; 1769 default: 1770 return false; 1771 } 1772 } 1773 1774 /* 1775 * Return first live LWP in the process. 1776 */ 1777 lwp_t * 1778 lwp_find_first(proc_t *p) 1779 { 1780 lwp_t *l; 1781 1782 KASSERT(mutex_owned(p->p_lock)); 1783 1784 LIST_FOREACH(l, &p->p_lwps, l_sibling) { 1785 if (lwp_alive(l)) { 1786 return l; 1787 } 1788 } 1789 1790 return NULL; 1791 } 1792 1793 /* 1794 * Allocate a new lwpctl structure for a user LWP. 1795 */ 1796 int 1797 lwp_ctl_alloc(vaddr_t *uaddr) 1798 { 1799 lcproc_t *lp; 1800 u_int bit, i, offset; 1801 struct uvm_object *uao; 1802 int error; 1803 lcpage_t *lcp; 1804 proc_t *p; 1805 lwp_t *l; 1806 1807 l = curlwp; 1808 p = l->l_proc; 1809 1810 /* don't allow a vforked process to create lwp ctls */ 1811 if (p->p_lflag & PL_PPWAIT) 1812 return EBUSY; 1813 1814 if (l->l_lcpage != NULL) { 1815 lcp = l->l_lcpage; 1816 *uaddr = lcp->lcp_uaddr + (vaddr_t)l->l_lwpctl - lcp->lcp_kaddr; 1817 return 0; 1818 } 1819 1820 /* First time around, allocate header structure for the process. */ 1821 if ((lp = p->p_lwpctl) == NULL) { 1822 lp = kmem_alloc(sizeof(*lp), KM_SLEEP); 1823 mutex_init(&lp->lp_lock, MUTEX_DEFAULT, IPL_NONE); 1824 lp->lp_uao = NULL; 1825 TAILQ_INIT(&lp->lp_pages); 1826 mutex_enter(p->p_lock); 1827 if (p->p_lwpctl == NULL) { 1828 p->p_lwpctl = lp; 1829 mutex_exit(p->p_lock); 1830 } else { 1831 mutex_exit(p->p_lock); 1832 mutex_destroy(&lp->lp_lock); 1833 kmem_free(lp, sizeof(*lp)); 1834 lp = p->p_lwpctl; 1835 } 1836 } 1837 1838 /* 1839 * Set up an anonymous memory region to hold the shared pages. 1840 * Map them into the process' address space. The user vmspace 1841 * gets the first reference on the UAO. 1842 */ 1843 mutex_enter(&lp->lp_lock); 1844 if (lp->lp_uao == NULL) { 1845 lp->lp_uao = uao_create(LWPCTL_UAREA_SZ, 0); 1846 lp->lp_cur = 0; 1847 lp->lp_max = LWPCTL_UAREA_SZ; 1848 lp->lp_uva = p->p_emul->e_vm_default_addr(p, 1849 (vaddr_t)p->p_vmspace->vm_daddr, LWPCTL_UAREA_SZ, 1850 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN); 1851 error = uvm_map(&p->p_vmspace->vm_map, &lp->lp_uva, 1852 LWPCTL_UAREA_SZ, lp->lp_uao, 0, 0, UVM_MAPFLAG(UVM_PROT_RW, 1853 UVM_PROT_RW, UVM_INH_NONE, UVM_ADV_NORMAL, 0)); 1854 if (error != 0) { 1855 uao_detach(lp->lp_uao); 1856 lp->lp_uao = NULL; 1857 mutex_exit(&lp->lp_lock); 1858 return error; 1859 } 1860 } 1861 1862 /* Get a free block and allocate for this LWP. */ 1863 TAILQ_FOREACH(lcp, &lp->lp_pages, lcp_chain) { 1864 if (lcp->lcp_nfree != 0) 1865 break; 1866 } 1867 if (lcp == NULL) { 1868 /* Nothing available - try to set up a free page. */ 1869 if (lp->lp_cur == lp->lp_max) { 1870 mutex_exit(&lp->lp_lock); 1871 return ENOMEM; 1872 } 1873 lcp = kmem_alloc(LWPCTL_LCPAGE_SZ, KM_SLEEP); 1874 1875 /* 1876 * Wire the next page down in kernel space. Since this 1877 * is a new mapping, we must add a reference. 1878 */ 1879 uao = lp->lp_uao; 1880 (*uao->pgops->pgo_reference)(uao); 1881 lcp->lcp_kaddr = vm_map_min(kernel_map); 1882 error = uvm_map(kernel_map, &lcp->lcp_kaddr, PAGE_SIZE, 1883 uao, lp->lp_cur, PAGE_SIZE, 1884 UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, 1885 UVM_INH_NONE, UVM_ADV_RANDOM, 0)); 1886 if (error != 0) { 1887 mutex_exit(&lp->lp_lock); 1888 kmem_free(lcp, LWPCTL_LCPAGE_SZ); 1889 (*uao->pgops->pgo_detach)(uao); 1890 return error; 1891 } 1892 error = uvm_map_pageable(kernel_map, lcp->lcp_kaddr, 1893 lcp->lcp_kaddr + PAGE_SIZE, FALSE, 0); 1894 if (error != 0) { 1895 mutex_exit(&lp->lp_lock); 1896 uvm_unmap(kernel_map, lcp->lcp_kaddr, 1897 lcp->lcp_kaddr + PAGE_SIZE); 1898 kmem_free(lcp, LWPCTL_LCPAGE_SZ); 1899 return error; 1900 } 1901 /* Prepare the page descriptor and link into the list. */ 1902 lcp->lcp_uaddr = lp->lp_uva + lp->lp_cur; 1903 lp->lp_cur += PAGE_SIZE; 1904 lcp->lcp_nfree = LWPCTL_PER_PAGE; 1905 lcp->lcp_rotor = 0; 1906 memset(lcp->lcp_bitmap, 0xff, LWPCTL_BITMAP_SZ); 1907 TAILQ_INSERT_HEAD(&lp->lp_pages, lcp, lcp_chain); 1908 } 1909 for (i = lcp->lcp_rotor; lcp->lcp_bitmap[i] == 0;) { 1910 if (++i >= LWPCTL_BITMAP_ENTRIES) 1911 i = 0; 1912 } 1913 bit = ffs(lcp->lcp_bitmap[i]) - 1; 1914 lcp->lcp_bitmap[i] ^= (1U << bit); 1915 lcp->lcp_rotor = i; 1916 lcp->lcp_nfree--; 1917 l->l_lcpage = lcp; 1918 offset = (i << 5) + bit; 1919 l->l_lwpctl = (lwpctl_t *)lcp->lcp_kaddr + offset; 1920 *uaddr = lcp->lcp_uaddr + offset * sizeof(lwpctl_t); 1921 mutex_exit(&lp->lp_lock); 1922 1923 KPREEMPT_DISABLE(l); 1924 l->l_lwpctl->lc_curcpu = (int)cpu_index(curcpu()); 1925 KPREEMPT_ENABLE(l); 1926 1927 return 0; 1928 } 1929 1930 /* 1931 * Free an lwpctl structure back to the per-process list. 1932 */ 1933 void 1934 lwp_ctl_free(lwp_t *l) 1935 { 1936 struct proc *p = l->l_proc; 1937 lcproc_t *lp; 1938 lcpage_t *lcp; 1939 u_int map, offset; 1940 1941 /* don't free a lwp context we borrowed for vfork */ 1942 if (p->p_lflag & PL_PPWAIT) { 1943 l->l_lwpctl = NULL; 1944 return; 1945 } 1946 1947 lp = p->p_lwpctl; 1948 KASSERT(lp != NULL); 1949 1950 lcp = l->l_lcpage; 1951 offset = (u_int)((lwpctl_t *)l->l_lwpctl - (lwpctl_t *)lcp->lcp_kaddr); 1952 KASSERT(offset < LWPCTL_PER_PAGE); 1953 1954 mutex_enter(&lp->lp_lock); 1955 lcp->lcp_nfree++; 1956 map = offset >> 5; 1957 lcp->lcp_bitmap[map] |= (1U << (offset & 31)); 1958 if (lcp->lcp_bitmap[lcp->lcp_rotor] == 0) 1959 lcp->lcp_rotor = map; 1960 if (TAILQ_FIRST(&lp->lp_pages)->lcp_nfree == 0) { 1961 TAILQ_REMOVE(&lp->lp_pages, lcp, lcp_chain); 1962 TAILQ_INSERT_HEAD(&lp->lp_pages, lcp, lcp_chain); 1963 } 1964 mutex_exit(&lp->lp_lock); 1965 } 1966 1967 /* 1968 * Process is exiting; tear down lwpctl state. This can only be safely 1969 * called by the last LWP in the process. 1970 */ 1971 void 1972 lwp_ctl_exit(void) 1973 { 1974 lcpage_t *lcp, *next; 1975 lcproc_t *lp; 1976 proc_t *p; 1977 lwp_t *l; 1978 1979 l = curlwp; 1980 l->l_lwpctl = NULL; 1981 l->l_lcpage = NULL; 1982 p = l->l_proc; 1983 lp = p->p_lwpctl; 1984 1985 KASSERT(lp != NULL); 1986 KASSERT(p->p_nlwps == 1); 1987 1988 for (lcp = TAILQ_FIRST(&lp->lp_pages); lcp != NULL; lcp = next) { 1989 next = TAILQ_NEXT(lcp, lcp_chain); 1990 uvm_unmap(kernel_map, lcp->lcp_kaddr, 1991 lcp->lcp_kaddr + PAGE_SIZE); 1992 kmem_free(lcp, LWPCTL_LCPAGE_SZ); 1993 } 1994 1995 if (lp->lp_uao != NULL) { 1996 uvm_unmap(&p->p_vmspace->vm_map, lp->lp_uva, 1997 lp->lp_uva + LWPCTL_UAREA_SZ); 1998 } 1999 2000 mutex_destroy(&lp->lp_lock); 2001 kmem_free(lp, sizeof(*lp)); 2002 p->p_lwpctl = NULL; 2003 } 2004 2005 /* 2006 * Return the current LWP's "preemption counter". Used to detect 2007 * preemption across operations that can tolerate preemption without 2008 * crashing, but which may generate incorrect results if preempted. 2009 */ 2010 uint64_t 2011 lwp_pctr(void) 2012 { 2013 2014 return curlwp->l_ncsw; 2015 } 2016 2017 /* 2018 * Set an LWP's private data pointer. 2019 */ 2020 int 2021 lwp_setprivate(struct lwp *l, void *ptr) 2022 { 2023 int error = 0; 2024 2025 l->l_private = ptr; 2026 #ifdef __HAVE_CPU_LWP_SETPRIVATE 2027 error = cpu_lwp_setprivate(l, ptr); 2028 #endif 2029 return error; 2030 } 2031 2032 /* 2033 * Renumber the first and only LWP in a process on exec() or fork(). 2034 * Don't bother with p_treelock here as this is the only live LWP in 2035 * the proc right now. 2036 */ 2037 void 2038 lwp_renumber(lwp_t *l, lwpid_t lid) 2039 { 2040 lwp_t *l2 __diagused; 2041 proc_t *p = l->l_proc; 2042 int error; 2043 2044 KASSERT(p->p_nlwps == 1); 2045 2046 while (l->l_lid != lid) { 2047 mutex_enter(p->p_lock); 2048 error = radix_tree_insert_node(&p->p_lwptree, lid - 1, l); 2049 if (error == 0) { 2050 l2 = radix_tree_remove_node(&p->p_lwptree, 2051 (uint64_t)(l->l_lid - 1)); 2052 KASSERT(l2 == l); 2053 p->p_nlwpid = lid + 1; 2054 l->l_lid = lid; 2055 } 2056 mutex_exit(p->p_lock); 2057 2058 if (error == 0) 2059 break; 2060 2061 KASSERT(error == ENOMEM); 2062 radix_tree_await_memory(); 2063 } 2064 } 2065 2066 #if defined(DDB) 2067 #include <machine/pcb.h> 2068 2069 void 2070 lwp_whatis(uintptr_t addr, void (*pr)(const char *, ...)) 2071 { 2072 lwp_t *l; 2073 2074 LIST_FOREACH(l, &alllwp, l_list) { 2075 uintptr_t stack = (uintptr_t)KSTACK_LOWEST_ADDR(l); 2076 2077 if (addr < stack || stack + KSTACK_SIZE <= addr) { 2078 continue; 2079 } 2080 (*pr)("%p is %p+%zu, LWP %p's stack\n", 2081 (void *)addr, (void *)stack, 2082 (size_t)(addr - stack), l); 2083 } 2084 } 2085 #endif /* defined(DDB) */ 2086