1 /* $NetBSD: kern_lwp.c,v 1.108 2008/04/28 20:24:03 martin Exp $ */ 2 3 /*- 4 * Copyright (c) 2001, 2006, 2007, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Nathan J. Williams, and Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Overview 34 * 35 * Lightweight processes (LWPs) are the basic unit or thread of 36 * execution within the kernel. The core state of an LWP is described 37 * by "struct lwp", also known as lwp_t. 38 * 39 * Each LWP is contained within a process (described by "struct proc"), 40 * Every process contains at least one LWP, but may contain more. The 41 * process describes attributes shared among all of its LWPs such as a 42 * private address space, global execution state (stopped, active, 43 * zombie, ...), signal disposition and so on. On a multiprocessor 44 * machine, multiple LWPs be executing concurrently in the kernel. 45 * 46 * Execution states 47 * 48 * At any given time, an LWP has overall state that is described by 49 * lwp::l_stat. The states are broken into two sets below. The first 50 * set is guaranteed to represent the absolute, current state of the 51 * LWP: 52 * 53 * LSONPROC 54 * 55 * On processor: the LWP is executing on a CPU, either in the 56 * kernel or in user space. 57 * 58 * LSRUN 59 * 60 * Runnable: the LWP is parked on a run queue, and may soon be 61 * chosen to run by an idle processor, or by a processor that 62 * has been asked to preempt a currently runnning but lower 63 * priority LWP. If the LWP is not swapped in (LW_INMEM == 0) 64 * then the LWP is not on a run queue, but may be soon. 65 * 66 * LSIDL 67 * 68 * Idle: the LWP has been created but has not yet executed, 69 * or it has ceased executing a unit of work and is waiting 70 * to be started again. 71 * 72 * LSSUSPENDED: 73 * 74 * Suspended: the LWP has had its execution suspended by 75 * another LWP in the same process using the _lwp_suspend() 76 * system call. User-level LWPs also enter the suspended 77 * state when the system is shutting down. 78 * 79 * The second set represent a "statement of intent" on behalf of the 80 * LWP. The LWP may in fact be executing on a processor, may be 81 * sleeping or idle. It is expected to take the necessary action to 82 * stop executing or become "running" again within a short timeframe. 83 * The LW_RUNNING flag in lwp::l_flag indicates that an LWP is running. 84 * Importantly, it indicates that its state is tied to a CPU. 85 * 86 * LSZOMB: 87 * 88 * Dead or dying: the LWP has released most of its resources 89 * and is: a) about to switch away into oblivion b) has already 90 * switched away. When it switches away, its few remaining 91 * resources can be collected. 92 * 93 * LSSLEEP: 94 * 95 * Sleeping: the LWP has entered itself onto a sleep queue, and 96 * has switched away or will switch away shortly to allow other 97 * LWPs to run on the CPU. 98 * 99 * LSSTOP: 100 * 101 * Stopped: the LWP has been stopped as a result of a job 102 * control signal, or as a result of the ptrace() interface. 103 * 104 * Stopped LWPs may run briefly within the kernel to handle 105 * signals that they receive, but will not return to user space 106 * until their process' state is changed away from stopped. 107 * 108 * Single LWPs within a process can not be set stopped 109 * selectively: all actions that can stop or continue LWPs 110 * occur at the process level. 111 * 112 * State transitions 113 * 114 * Note that the LSSTOP state may only be set when returning to 115 * user space in userret(), or when sleeping interruptably. The 116 * LSSUSPENDED state may only be set in userret(). Before setting 117 * those states, we try to ensure that the LWPs will release all 118 * locks that they hold, and at a minimum try to ensure that the 119 * LWP can be set runnable again by a signal. 120 * 121 * LWPs may transition states in the following ways: 122 * 123 * RUN -------> ONPROC ONPROC -----> RUN 124 * > STOPPED > SLEEP 125 * > SUSPENDED > STOPPED 126 * > SUSPENDED 127 * > ZOMB 128 * 129 * STOPPED ---> RUN SUSPENDED --> RUN 130 * > SLEEP > SLEEP 131 * 132 * SLEEP -----> ONPROC IDL --------> RUN 133 * > RUN > SUSPENDED 134 * > STOPPED > STOPPED 135 * > SUSPENDED 136 * 137 * Other state transitions are possible with kernel threads (eg 138 * ONPROC -> IDL), but only happen under tightly controlled 139 * circumstances the side effects are understood. 140 * 141 * Locking 142 * 143 * The majority of fields in 'struct lwp' are covered by a single, 144 * general spin lock pointed to by lwp::l_mutex. The locks covering 145 * each field are documented in sys/lwp.h. 146 * 147 * State transitions must be made with the LWP's general lock held, 148 * and may cause the LWP's lock pointer to change. Manipulation of 149 * the general lock is not performed directly, but through calls to 150 * lwp_lock(), lwp_relock() and similar. 151 * 152 * States and their associated locks: 153 * 154 * LSONPROC, LSZOMB: 155 * 156 * Always covered by spc_lwplock, which protects running LWPs. 157 * This is a per-CPU lock. 158 * 159 * LSIDL, LSRUN: 160 * 161 * Always covered by spc_mutex, which protects the run queues. 162 * This is a per-CPU lock. 163 * 164 * LSSLEEP: 165 * 166 * Covered by a lock associated with the sleep queue that the 167 * LWP resides on, indirectly referenced by l_sleepq->sq_mutex. 168 * 169 * LSSTOP, LSSUSPENDED: 170 * 171 * If the LWP was previously sleeping (l_wchan != NULL), then 172 * l_mutex references the sleep queue lock. If the LWP was 173 * runnable or on the CPU when halted, or has been removed from 174 * the sleep queue since halted, then the lock is spc_lwplock. 175 * 176 * The lock order is as follows: 177 * 178 * spc::spc_lwplock -> 179 * sleepq_t::sq_mutex -> 180 * tschain_t::tc_mutex -> 181 * spc::spc_mutex 182 * 183 * Each process has an scheduler state lock (proc::p_lock), and a 184 * number of counters on LWPs and their states: p_nzlwps, p_nrlwps, and 185 * so on. When an LWP is to be entered into or removed from one of the 186 * following states, p_lock must be held and the process wide counters 187 * adjusted: 188 * 189 * LSIDL, LSZOMB, LSSTOP, LSSUSPENDED 190 * 191 * Note that an LWP is considered running or likely to run soon if in 192 * one of the following states. This affects the value of p_nrlwps: 193 * 194 * LSRUN, LSONPROC, LSSLEEP 195 * 196 * p_lock does not need to be held when transitioning among these 197 * three states. 198 */ 199 200 #include <sys/cdefs.h> 201 __KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.108 2008/04/28 20:24:03 martin Exp $"); 202 203 #include "opt_ddb.h" 204 #include "opt_multiprocessor.h" 205 #include "opt_lockdebug.h" 206 207 #define _LWP_API_PRIVATE 208 209 #include <sys/param.h> 210 #include <sys/systm.h> 211 #include <sys/cpu.h> 212 #include <sys/pool.h> 213 #include <sys/proc.h> 214 #include <sys/syscallargs.h> 215 #include <sys/syscall_stats.h> 216 #include <sys/kauth.h> 217 #include <sys/sleepq.h> 218 #include <sys/user.h> 219 #include <sys/lockdebug.h> 220 #include <sys/kmem.h> 221 #include <sys/pset.h> 222 #include <sys/intr.h> 223 #include <sys/lwpctl.h> 224 #include <sys/atomic.h> 225 226 #include <uvm/uvm_extern.h> 227 #include <uvm/uvm_object.h> 228 229 struct lwplist alllwp = LIST_HEAD_INITIALIZER(alllwp); 230 231 POOL_INIT(lwp_uc_pool, sizeof(ucontext_t), 0, 0, 0, "lwpucpl", 232 &pool_allocator_nointr, IPL_NONE); 233 234 static pool_cache_t lwp_cache; 235 static specificdata_domain_t lwp_specificdata_domain; 236 237 void 238 lwpinit(void) 239 { 240 241 lwp_specificdata_domain = specificdata_domain_create(); 242 KASSERT(lwp_specificdata_domain != NULL); 243 lwp_sys_init(); 244 lwp_cache = pool_cache_init(sizeof(lwp_t), MIN_LWP_ALIGNMENT, 0, 0, 245 "lwppl", NULL, IPL_NONE, NULL, NULL, NULL); 246 } 247 248 /* 249 * Set an suspended. 250 * 251 * Must be called with p_lock held, and the LWP locked. Will unlock the 252 * LWP before return. 253 */ 254 int 255 lwp_suspend(struct lwp *curl, struct lwp *t) 256 { 257 int error; 258 259 KASSERT(mutex_owned(t->l_proc->p_lock)); 260 KASSERT(lwp_locked(t, NULL)); 261 262 KASSERT(curl != t || curl->l_stat == LSONPROC); 263 264 /* 265 * If the current LWP has been told to exit, we must not suspend anyone 266 * else or deadlock could occur. We won't return to userspace. 267 */ 268 if ((curl->l_stat & (LW_WEXIT | LW_WCORE)) != 0) { 269 lwp_unlock(t); 270 return (EDEADLK); 271 } 272 273 error = 0; 274 275 switch (t->l_stat) { 276 case LSRUN: 277 case LSONPROC: 278 t->l_flag |= LW_WSUSPEND; 279 lwp_need_userret(t); 280 lwp_unlock(t); 281 break; 282 283 case LSSLEEP: 284 t->l_flag |= LW_WSUSPEND; 285 286 /* 287 * Kick the LWP and try to get it to the kernel boundary 288 * so that it will release any locks that it holds. 289 * setrunnable() will release the lock. 290 */ 291 if ((t->l_flag & LW_SINTR) != 0) 292 setrunnable(t); 293 else 294 lwp_unlock(t); 295 break; 296 297 case LSSUSPENDED: 298 lwp_unlock(t); 299 break; 300 301 case LSSTOP: 302 t->l_flag |= LW_WSUSPEND; 303 setrunnable(t); 304 break; 305 306 case LSIDL: 307 case LSZOMB: 308 error = EINTR; /* It's what Solaris does..... */ 309 lwp_unlock(t); 310 break; 311 } 312 313 return (error); 314 } 315 316 /* 317 * Restart a suspended LWP. 318 * 319 * Must be called with p_lock held, and the LWP locked. Will unlock the 320 * LWP before return. 321 */ 322 void 323 lwp_continue(struct lwp *l) 324 { 325 326 KASSERT(mutex_owned(l->l_proc->p_lock)); 327 KASSERT(lwp_locked(l, NULL)); 328 329 /* If rebooting or not suspended, then just bail out. */ 330 if ((l->l_flag & LW_WREBOOT) != 0) { 331 lwp_unlock(l); 332 return; 333 } 334 335 l->l_flag &= ~LW_WSUSPEND; 336 337 if (l->l_stat != LSSUSPENDED) { 338 lwp_unlock(l); 339 return; 340 } 341 342 /* setrunnable() will release the lock. */ 343 setrunnable(l); 344 } 345 346 /* 347 * Wait for an LWP within the current process to exit. If 'lid' is 348 * non-zero, we are waiting for a specific LWP. 349 * 350 * Must be called with p->p_lock held. 351 */ 352 int 353 lwp_wait1(struct lwp *l, lwpid_t lid, lwpid_t *departed, int flags) 354 { 355 struct proc *p = l->l_proc; 356 struct lwp *l2; 357 int nfound, error; 358 lwpid_t curlid; 359 bool exiting; 360 361 KASSERT(mutex_owned(p->p_lock)); 362 363 p->p_nlwpwait++; 364 l->l_waitingfor = lid; 365 curlid = l->l_lid; 366 exiting = ((flags & LWPWAIT_EXITCONTROL) != 0); 367 368 for (;;) { 369 /* 370 * Avoid a race between exit1() and sigexit(): if the 371 * process is dumping core, then we need to bail out: call 372 * into lwp_userret() where we will be suspended until the 373 * deed is done. 374 */ 375 if ((p->p_sflag & PS_WCORE) != 0) { 376 mutex_exit(p->p_lock); 377 lwp_userret(l); 378 #ifdef DIAGNOSTIC 379 panic("lwp_wait1"); 380 #endif 381 /* NOTREACHED */ 382 } 383 384 /* 385 * First off, drain any detached LWP that is waiting to be 386 * reaped. 387 */ 388 while ((l2 = p->p_zomblwp) != NULL) { 389 p->p_zomblwp = NULL; 390 lwp_free(l2, false, false);/* releases proc mutex */ 391 mutex_enter(p->p_lock); 392 } 393 394 /* 395 * Now look for an LWP to collect. If the whole process is 396 * exiting, count detached LWPs as eligible to be collected, 397 * but don't drain them here. 398 */ 399 nfound = 0; 400 error = 0; 401 LIST_FOREACH(l2, &p->p_lwps, l_sibling) { 402 /* 403 * If a specific wait and the target is waiting on 404 * us, then avoid deadlock. This also traps LWPs 405 * that try to wait on themselves. 406 * 407 * Note that this does not handle more complicated 408 * cycles, like: t1 -> t2 -> t3 -> t1. The process 409 * can still be killed so it is not a major problem. 410 */ 411 if (l2->l_lid == lid && l2->l_waitingfor == curlid) { 412 error = EDEADLK; 413 break; 414 } 415 if (l2 == l) 416 continue; 417 if ((l2->l_prflag & LPR_DETACHED) != 0) { 418 nfound += exiting; 419 continue; 420 } 421 if (lid != 0) { 422 if (l2->l_lid != lid) 423 continue; 424 /* 425 * Mark this LWP as the first waiter, if there 426 * is no other. 427 */ 428 if (l2->l_waiter == 0) 429 l2->l_waiter = curlid; 430 } else if (l2->l_waiter != 0) { 431 /* 432 * It already has a waiter - so don't 433 * collect it. If the waiter doesn't 434 * grab it we'll get another chance 435 * later. 436 */ 437 nfound++; 438 continue; 439 } 440 nfound++; 441 442 /* No need to lock the LWP in order to see LSZOMB. */ 443 if (l2->l_stat != LSZOMB) 444 continue; 445 446 /* 447 * We're no longer waiting. Reset the "first waiter" 448 * pointer on the target, in case it was us. 449 */ 450 l->l_waitingfor = 0; 451 l2->l_waiter = 0; 452 p->p_nlwpwait--; 453 if (departed) 454 *departed = l2->l_lid; 455 sched_lwp_collect(l2); 456 457 /* lwp_free() releases the proc lock. */ 458 lwp_free(l2, false, false); 459 mutex_enter(p->p_lock); 460 return 0; 461 } 462 463 if (error != 0) 464 break; 465 if (nfound == 0) { 466 error = ESRCH; 467 break; 468 } 469 470 /* 471 * The kernel is careful to ensure that it can not deadlock 472 * when exiting - just keep waiting. 473 */ 474 if (exiting) { 475 KASSERT(p->p_nlwps > 1); 476 cv_wait(&p->p_lwpcv, p->p_lock); 477 continue; 478 } 479 480 /* 481 * If all other LWPs are waiting for exits or suspends 482 * and the supply of zombies and potential zombies is 483 * exhausted, then we are about to deadlock. 484 * 485 * If the process is exiting (and this LWP is not the one 486 * that is coordinating the exit) then bail out now. 487 */ 488 if ((p->p_sflag & PS_WEXIT) != 0 || 489 p->p_nrlwps + p->p_nzlwps - p->p_ndlwps <= p->p_nlwpwait) { 490 error = EDEADLK; 491 break; 492 } 493 494 /* 495 * Sit around and wait for something to happen. We'll be 496 * awoken if any of the conditions examined change: if an 497 * LWP exits, is collected, or is detached. 498 */ 499 if ((error = cv_wait_sig(&p->p_lwpcv, p->p_lock)) != 0) 500 break; 501 } 502 503 /* 504 * We didn't find any LWPs to collect, we may have received a 505 * signal, or some other condition has caused us to bail out. 506 * 507 * If waiting on a specific LWP, clear the waiters marker: some 508 * other LWP may want it. Then, kick all the remaining waiters 509 * so that they can re-check for zombies and for deadlock. 510 */ 511 if (lid != 0) { 512 LIST_FOREACH(l2, &p->p_lwps, l_sibling) { 513 if (l2->l_lid == lid) { 514 if (l2->l_waiter == curlid) 515 l2->l_waiter = 0; 516 break; 517 } 518 } 519 } 520 p->p_nlwpwait--; 521 l->l_waitingfor = 0; 522 cv_broadcast(&p->p_lwpcv); 523 524 return error; 525 } 526 527 /* 528 * Create a new LWP within process 'p2', using LWP 'l1' as a template. 529 * The new LWP is created in state LSIDL and must be set running, 530 * suspended, or stopped by the caller. 531 */ 532 int 533 lwp_create(lwp_t *l1, proc_t *p2, vaddr_t uaddr, bool inmem, int flags, 534 void *stack, size_t stacksize, void (*func)(void *), void *arg, 535 lwp_t **rnewlwpp, int sclass) 536 { 537 struct lwp *l2, *isfree; 538 turnstile_t *ts; 539 540 KASSERT(l1 == curlwp || l1->l_proc == &proc0); 541 542 /* 543 * First off, reap any detached LWP waiting to be collected. 544 * We can re-use its LWP structure and turnstile. 545 */ 546 isfree = NULL; 547 if (p2->p_zomblwp != NULL) { 548 mutex_enter(p2->p_lock); 549 if ((isfree = p2->p_zomblwp) != NULL) { 550 p2->p_zomblwp = NULL; 551 lwp_free(isfree, true, false);/* releases proc mutex */ 552 } else 553 mutex_exit(p2->p_lock); 554 } 555 if (isfree == NULL) { 556 l2 = pool_cache_get(lwp_cache, PR_WAITOK); 557 memset(l2, 0, sizeof(*l2)); 558 l2->l_ts = pool_cache_get(turnstile_cache, PR_WAITOK); 559 SLIST_INIT(&l2->l_pi_lenders); 560 } else { 561 l2 = isfree; 562 ts = l2->l_ts; 563 KASSERT(l2->l_inheritedprio == -1); 564 KASSERT(SLIST_EMPTY(&l2->l_pi_lenders)); 565 memset(l2, 0, sizeof(*l2)); 566 l2->l_ts = ts; 567 } 568 569 l2->l_stat = LSIDL; 570 l2->l_proc = p2; 571 l2->l_refcnt = 1; 572 l2->l_class = sclass; 573 l2->l_kpriority = l1->l_kpriority; 574 l2->l_kpribase = PRI_KERNEL; 575 l2->l_priority = l1->l_priority; 576 l2->l_inheritedprio = -1; 577 l2->l_flag = inmem ? LW_INMEM : 0; 578 l2->l_pflag = LP_MPSAFE; 579 l2->l_fd = p2->p_fd; 580 581 if (p2->p_flag & PK_SYSTEM) { 582 /* Mark it as a system LWP and not a candidate for swapping */ 583 l2->l_flag |= LW_SYSTEM; 584 } 585 586 kpreempt_disable(); 587 l2->l_mutex = l1->l_cpu->ci_schedstate.spc_mutex; 588 l2->l_cpu = l1->l_cpu; 589 kpreempt_enable(); 590 591 lwp_initspecific(l2); 592 sched_lwp_fork(l1, l2); 593 lwp_update_creds(l2); 594 callout_init(&l2->l_timeout_ch, CALLOUT_MPSAFE); 595 callout_setfunc(&l2->l_timeout_ch, sleepq_timeout, l2); 596 mutex_init(&l2->l_swaplock, MUTEX_DEFAULT, IPL_NONE); 597 cv_init(&l2->l_sigcv, "sigwait"); 598 l2->l_syncobj = &sched_syncobj; 599 600 if (rnewlwpp != NULL) 601 *rnewlwpp = l2; 602 603 l2->l_addr = UAREA_TO_USER(uaddr); 604 uvm_lwp_fork(l1, l2, stack, stacksize, func, 605 (arg != NULL) ? arg : l2); 606 607 mutex_enter(p2->p_lock); 608 609 if ((flags & LWP_DETACHED) != 0) { 610 l2->l_prflag = LPR_DETACHED; 611 p2->p_ndlwps++; 612 } else 613 l2->l_prflag = 0; 614 615 l2->l_sigmask = l1->l_sigmask; 616 CIRCLEQ_INIT(&l2->l_sigpend.sp_info); 617 sigemptyset(&l2->l_sigpend.sp_set); 618 619 p2->p_nlwpid++; 620 if (p2->p_nlwpid == 0) 621 p2->p_nlwpid++; 622 l2->l_lid = p2->p_nlwpid; 623 LIST_INSERT_HEAD(&p2->p_lwps, l2, l_sibling); 624 p2->p_nlwps++; 625 626 mutex_exit(p2->p_lock); 627 628 mutex_enter(proc_lock); 629 LIST_INSERT_HEAD(&alllwp, l2, l_list); 630 mutex_exit(proc_lock); 631 632 if ((p2->p_flag & PK_SYSTEM) == 0) { 633 /* Locking is needed, since LWP is in the list of all LWPs */ 634 lwp_lock(l2); 635 /* Inherit a processor-set */ 636 l2->l_psid = l1->l_psid; 637 /* Inherit an affinity */ 638 memcpy(&l2->l_affinity, &l1->l_affinity, sizeof(cpuset_t)); 639 /* Look for a CPU to start */ 640 l2->l_cpu = sched_takecpu(l2); 641 lwp_unlock_to(l2, l2->l_cpu->ci_schedstate.spc_mutex); 642 } 643 644 SYSCALL_TIME_LWP_INIT(l2); 645 646 if (p2->p_emul->e_lwp_fork) 647 (*p2->p_emul->e_lwp_fork)(l1, l2); 648 649 return (0); 650 } 651 652 /* 653 * Called by MD code when a new LWP begins execution. Must be called 654 * with the previous LWP locked (so at splsched), or if there is no 655 * previous LWP, at splsched. 656 */ 657 void 658 lwp_startup(struct lwp *prev, struct lwp *new) 659 { 660 661 KASSERT(kpreempt_disabled()); 662 if (prev != NULL) { 663 /* 664 * Normalize the count of the spin-mutexes, it was 665 * increased in mi_switch(). Unmark the state of 666 * context switch - it is finished for previous LWP. 667 */ 668 curcpu()->ci_mtx_count++; 669 membar_exit(); 670 prev->l_ctxswtch = 0; 671 } 672 KPREEMPT_DISABLE(new); 673 spl0(); 674 pmap_activate(new); 675 LOCKDEBUG_BARRIER(NULL, 0); 676 KPREEMPT_ENABLE(new); 677 if ((new->l_pflag & LP_MPSAFE) == 0) { 678 KERNEL_LOCK(1, new); 679 } 680 } 681 682 /* 683 * Exit an LWP. 684 */ 685 void 686 lwp_exit(struct lwp *l) 687 { 688 struct proc *p = l->l_proc; 689 struct lwp *l2; 690 bool current; 691 692 current = (l == curlwp); 693 694 KASSERT(current || l->l_stat == LSIDL); 695 696 /* 697 * Verify that we hold no locks other than the kernel lock. 698 */ 699 #ifdef MULTIPROCESSOR 700 LOCKDEBUG_BARRIER(&kernel_lock, 0); 701 #else 702 LOCKDEBUG_BARRIER(NULL, 0); 703 #endif 704 705 /* 706 * If we are the last live LWP in a process, we need to exit the 707 * entire process. We do so with an exit status of zero, because 708 * it's a "controlled" exit, and because that's what Solaris does. 709 * 710 * We are not quite a zombie yet, but for accounting purposes we 711 * must increment the count of zombies here. 712 * 713 * Note: the last LWP's specificdata will be deleted here. 714 */ 715 mutex_enter(p->p_lock); 716 if (p->p_nlwps - p->p_nzlwps == 1) { 717 KASSERT(current == true); 718 /* XXXSMP kernel_lock not held */ 719 exit1(l, 0); 720 /* NOTREACHED */ 721 } 722 p->p_nzlwps++; 723 mutex_exit(p->p_lock); 724 725 if (p->p_emul->e_lwp_exit) 726 (*p->p_emul->e_lwp_exit)(l); 727 728 /* Delete the specificdata while it's still safe to sleep. */ 729 specificdata_fini(lwp_specificdata_domain, &l->l_specdataref); 730 731 /* 732 * Release our cached credentials. 733 */ 734 kauth_cred_free(l->l_cred); 735 callout_destroy(&l->l_timeout_ch); 736 737 /* 738 * While we can still block, mark the LWP as unswappable to 739 * prevent conflicts with the with the swapper. 740 */ 741 if (current) 742 uvm_lwp_hold(l); 743 744 /* 745 * Remove the LWP from the global list. 746 */ 747 mutex_enter(proc_lock); 748 LIST_REMOVE(l, l_list); 749 mutex_exit(proc_lock); 750 751 /* 752 * Get rid of all references to the LWP that others (e.g. procfs) 753 * may have, and mark the LWP as a zombie. If the LWP is detached, 754 * mark it waiting for collection in the proc structure. Note that 755 * before we can do that, we need to free any other dead, deatched 756 * LWP waiting to meet its maker. 757 */ 758 mutex_enter(p->p_lock); 759 lwp_drainrefs(l); 760 761 if ((l->l_prflag & LPR_DETACHED) != 0) { 762 while ((l2 = p->p_zomblwp) != NULL) { 763 p->p_zomblwp = NULL; 764 lwp_free(l2, false, false);/* releases proc mutex */ 765 mutex_enter(p->p_lock); 766 l->l_refcnt++; 767 lwp_drainrefs(l); 768 } 769 p->p_zomblwp = l; 770 } 771 772 /* 773 * If we find a pending signal for the process and we have been 774 * asked to check for signals, then we loose: arrange to have 775 * all other LWPs in the process check for signals. 776 */ 777 if ((l->l_flag & LW_PENDSIG) != 0 && 778 firstsig(&p->p_sigpend.sp_set) != 0) { 779 LIST_FOREACH(l2, &p->p_lwps, l_sibling) { 780 lwp_lock(l2); 781 l2->l_flag |= LW_PENDSIG; 782 lwp_unlock(l2); 783 } 784 } 785 786 lwp_lock(l); 787 l->l_stat = LSZOMB; 788 if (l->l_name != NULL) 789 strcpy(l->l_name, "(zombie)"); 790 lwp_unlock(l); 791 p->p_nrlwps--; 792 cv_broadcast(&p->p_lwpcv); 793 if (l->l_lwpctl != NULL) 794 l->l_lwpctl->lc_curcpu = LWPCTL_CPU_EXITED; 795 mutex_exit(p->p_lock); 796 797 /* 798 * We can no longer block. At this point, lwp_free() may already 799 * be gunning for us. On a multi-CPU system, we may be off p_lwps. 800 * 801 * Free MD LWP resources. 802 */ 803 #ifndef __NO_CPU_LWP_FREE 804 cpu_lwp_free(l, 0); 805 #endif 806 807 if (current) { 808 pmap_deactivate(l); 809 810 /* 811 * Release the kernel lock, and switch away into 812 * oblivion. 813 */ 814 #ifdef notyet 815 /* XXXSMP hold in lwp_userret() */ 816 KERNEL_UNLOCK_LAST(l); 817 #else 818 KERNEL_UNLOCK_ALL(l, NULL); 819 #endif 820 lwp_exit_switchaway(l); 821 } 822 } 823 824 void 825 lwp_exit_switchaway(struct lwp *l) 826 { 827 struct cpu_info *ci; 828 struct lwp *idlelwp; 829 830 (void)splsched(); 831 l->l_flag &= ~LW_RUNNING; 832 ci = curcpu(); 833 ci->ci_data.cpu_nswtch++; 834 idlelwp = ci->ci_data.cpu_idlelwp; 835 idlelwp->l_stat = LSONPROC; 836 837 /* 838 * cpu_onproc must be updated with the CPU locked, as 839 * aston() may try to set a AST pending on the LWP (and 840 * it does so with the CPU locked). Otherwise, the LWP 841 * may be destroyed before the AST can be set, leading 842 * to a user-after-free. 843 */ 844 spc_lock(ci); 845 ci->ci_data.cpu_onproc = idlelwp; 846 spc_unlock(ci); 847 cpu_switchto(NULL, idlelwp, false); 848 } 849 850 /* 851 * Free a dead LWP's remaining resources. 852 * 853 * XXXLWP limits. 854 */ 855 void 856 lwp_free(struct lwp *l, bool recycle, bool last) 857 { 858 struct proc *p = l->l_proc; 859 struct rusage *ru; 860 ksiginfoq_t kq; 861 862 KASSERT(l != curlwp); 863 864 /* 865 * If this was not the last LWP in the process, then adjust 866 * counters and unlock. 867 */ 868 if (!last) { 869 /* 870 * Add the LWP's run time to the process' base value. 871 * This needs to co-incide with coming off p_lwps. 872 */ 873 bintime_add(&p->p_rtime, &l->l_rtime); 874 p->p_pctcpu += l->l_pctcpu; 875 ru = &p->p_stats->p_ru; 876 ruadd(ru, &l->l_ru); 877 ru->ru_nvcsw += (l->l_ncsw - l->l_nivcsw); 878 ru->ru_nivcsw += l->l_nivcsw; 879 LIST_REMOVE(l, l_sibling); 880 p->p_nlwps--; 881 p->p_nzlwps--; 882 if ((l->l_prflag & LPR_DETACHED) != 0) 883 p->p_ndlwps--; 884 885 /* 886 * Have any LWPs sleeping in lwp_wait() recheck for 887 * deadlock. 888 */ 889 cv_broadcast(&p->p_lwpcv); 890 mutex_exit(p->p_lock); 891 } 892 893 #ifdef MULTIPROCESSOR 894 /* 895 * In the unlikely event that the LWP is still on the CPU, 896 * then spin until it has switched away. We need to release 897 * all locks to avoid deadlock against interrupt handlers on 898 * the target CPU. 899 */ 900 if ((l->l_flag & LW_RUNNING) != 0 || l->l_cpu->ci_curlwp == l) { 901 int count; 902 (void)count; /* XXXgcc */ 903 KERNEL_UNLOCK_ALL(curlwp, &count); 904 while ((l->l_flag & LW_RUNNING) != 0 || 905 l->l_cpu->ci_curlwp == l) 906 SPINLOCK_BACKOFF_HOOK; 907 KERNEL_LOCK(count, curlwp); 908 } 909 #endif 910 911 /* 912 * Destroy the LWP's remaining signal information. 913 */ 914 ksiginfo_queue_init(&kq); 915 sigclear(&l->l_sigpend, NULL, &kq); 916 ksiginfo_queue_drain(&kq); 917 cv_destroy(&l->l_sigcv); 918 mutex_destroy(&l->l_swaplock); 919 920 /* 921 * Free the LWP's turnstile and the LWP structure itself unless the 922 * caller wants to recycle them. Also, free the scheduler specific 923 * data. 924 * 925 * We can't return turnstile0 to the pool (it didn't come from it), 926 * so if it comes up just drop it quietly and move on. 927 * 928 * We don't recycle the VM resources at this time. 929 */ 930 if (l->l_lwpctl != NULL) 931 lwp_ctl_free(l); 932 sched_lwp_exit(l); 933 934 if (!recycle && l->l_ts != &turnstile0) 935 pool_cache_put(turnstile_cache, l->l_ts); 936 if (l->l_name != NULL) 937 kmem_free(l->l_name, MAXCOMLEN); 938 #ifndef __NO_CPU_LWP_FREE 939 cpu_lwp_free2(l); 940 #endif 941 KASSERT((l->l_flag & LW_INMEM) != 0); 942 uvm_lwp_exit(l); 943 KASSERT(SLIST_EMPTY(&l->l_pi_lenders)); 944 KASSERT(l->l_inheritedprio == -1); 945 if (!recycle) 946 pool_cache_put(lwp_cache, l); 947 } 948 949 /* 950 * Pick a LWP to represent the process for those operations which 951 * want information about a "process" that is actually associated 952 * with a LWP. 953 * 954 * If 'locking' is false, no locking or lock checks are performed. 955 * This is intended for use by DDB. 956 * 957 * We don't bother locking the LWP here, since code that uses this 958 * interface is broken by design and an exact match is not required. 959 */ 960 struct lwp * 961 proc_representative_lwp(struct proc *p, int *nrlwps, int locking) 962 { 963 struct lwp *l, *onproc, *running, *sleeping, *stopped, *suspended; 964 struct lwp *signalled; 965 int cnt; 966 967 if (locking) { 968 KASSERT(mutex_owned(p->p_lock)); 969 } 970 971 /* Trivial case: only one LWP */ 972 if (p->p_nlwps == 1) { 973 l = LIST_FIRST(&p->p_lwps); 974 if (nrlwps) 975 *nrlwps = (l->l_stat == LSONPROC || l->l_stat == LSRUN); 976 return l; 977 } 978 979 cnt = 0; 980 switch (p->p_stat) { 981 case SSTOP: 982 case SACTIVE: 983 /* Pick the most live LWP */ 984 onproc = running = sleeping = stopped = suspended = NULL; 985 signalled = NULL; 986 LIST_FOREACH(l, &p->p_lwps, l_sibling) { 987 if ((l->l_flag & LW_IDLE) != 0) { 988 continue; 989 } 990 if (l->l_lid == p->p_sigctx.ps_lwp) 991 signalled = l; 992 switch (l->l_stat) { 993 case LSONPROC: 994 onproc = l; 995 cnt++; 996 break; 997 case LSRUN: 998 running = l; 999 cnt++; 1000 break; 1001 case LSSLEEP: 1002 sleeping = l; 1003 break; 1004 case LSSTOP: 1005 stopped = l; 1006 break; 1007 case LSSUSPENDED: 1008 suspended = l; 1009 break; 1010 } 1011 } 1012 if (nrlwps) 1013 *nrlwps = cnt; 1014 if (signalled) 1015 l = signalled; 1016 else if (onproc) 1017 l = onproc; 1018 else if (running) 1019 l = running; 1020 else if (sleeping) 1021 l = sleeping; 1022 else if (stopped) 1023 l = stopped; 1024 else if (suspended) 1025 l = suspended; 1026 else 1027 break; 1028 return l; 1029 #ifdef DIAGNOSTIC 1030 case SIDL: 1031 case SZOMB: 1032 case SDYING: 1033 case SDEAD: 1034 if (locking) 1035 mutex_exit(p->p_lock); 1036 /* We have more than one LWP and we're in SIDL? 1037 * How'd that happen? 1038 */ 1039 panic("Too many LWPs in idle/dying process %d (%s) stat = %d", 1040 p->p_pid, p->p_comm, p->p_stat); 1041 break; 1042 default: 1043 if (locking) 1044 mutex_exit(p->p_lock); 1045 panic("Process %d (%s) in unknown state %d", 1046 p->p_pid, p->p_comm, p->p_stat); 1047 #endif 1048 } 1049 1050 if (locking) 1051 mutex_exit(p->p_lock); 1052 panic("proc_representative_lwp: couldn't find a lwp for process" 1053 " %d (%s)", p->p_pid, p->p_comm); 1054 /* NOTREACHED */ 1055 return NULL; 1056 } 1057 1058 /* 1059 * Migrate the LWP to the another CPU. Unlocks the LWP. 1060 */ 1061 void 1062 lwp_migrate(lwp_t *l, struct cpu_info *ci) 1063 { 1064 struct schedstate_percpu *spc; 1065 KASSERT(lwp_locked(l, NULL)); 1066 1067 if (l->l_cpu == ci) { 1068 lwp_unlock(l); 1069 return; 1070 } 1071 1072 spc = &ci->ci_schedstate; 1073 switch (l->l_stat) { 1074 case LSRUN: 1075 if (l->l_flag & LW_INMEM) { 1076 l->l_target_cpu = ci; 1077 break; 1078 } 1079 case LSIDL: 1080 l->l_cpu = ci; 1081 lwp_unlock_to(l, spc->spc_mutex); 1082 KASSERT(!mutex_owned(spc->spc_mutex)); 1083 return; 1084 case LSSLEEP: 1085 l->l_cpu = ci; 1086 break; 1087 case LSSTOP: 1088 case LSSUSPENDED: 1089 if (l->l_wchan != NULL) { 1090 l->l_cpu = ci; 1091 break; 1092 } 1093 case LSONPROC: 1094 l->l_target_cpu = ci; 1095 break; 1096 } 1097 lwp_unlock(l); 1098 } 1099 1100 /* 1101 * Find the LWP in the process. Arguments may be zero, in such case, 1102 * the calling process and first LWP in the list will be used. 1103 * On success - returns proc locked. 1104 */ 1105 struct lwp * 1106 lwp_find2(pid_t pid, lwpid_t lid) 1107 { 1108 proc_t *p; 1109 lwp_t *l; 1110 1111 /* Find the process */ 1112 p = (pid == 0) ? curlwp->l_proc : p_find(pid, PFIND_UNLOCK_FAIL); 1113 if (p == NULL) 1114 return NULL; 1115 mutex_enter(p->p_lock); 1116 if (pid != 0) { 1117 /* Case of p_find */ 1118 mutex_exit(proc_lock); 1119 } 1120 1121 /* Find the thread */ 1122 l = (lid == 0) ? LIST_FIRST(&p->p_lwps) : lwp_find(p, lid); 1123 if (l == NULL) { 1124 mutex_exit(p->p_lock); 1125 } 1126 1127 return l; 1128 } 1129 1130 /* 1131 * Look up a live LWP within the speicifed process, and return it locked. 1132 * 1133 * Must be called with p->p_lock held. 1134 */ 1135 struct lwp * 1136 lwp_find(struct proc *p, int id) 1137 { 1138 struct lwp *l; 1139 1140 KASSERT(mutex_owned(p->p_lock)); 1141 1142 LIST_FOREACH(l, &p->p_lwps, l_sibling) { 1143 if (l->l_lid == id) 1144 break; 1145 } 1146 1147 /* 1148 * No need to lock - all of these conditions will 1149 * be visible with the process level mutex held. 1150 */ 1151 if (l != NULL && (l->l_stat == LSIDL || l->l_stat == LSZOMB)) 1152 l = NULL; 1153 1154 return l; 1155 } 1156 1157 /* 1158 * Update an LWP's cached credentials to mirror the process' master copy. 1159 * 1160 * This happens early in the syscall path, on user trap, and on LWP 1161 * creation. A long-running LWP can also voluntarily choose to update 1162 * it's credentials by calling this routine. This may be called from 1163 * LWP_CACHE_CREDS(), which checks l->l_cred != p->p_cred beforehand. 1164 */ 1165 void 1166 lwp_update_creds(struct lwp *l) 1167 { 1168 kauth_cred_t oc; 1169 struct proc *p; 1170 1171 p = l->l_proc; 1172 oc = l->l_cred; 1173 1174 mutex_enter(p->p_lock); 1175 kauth_cred_hold(p->p_cred); 1176 l->l_cred = p->p_cred; 1177 l->l_prflag &= ~LPR_CRMOD; 1178 mutex_exit(p->p_lock); 1179 if (oc != NULL) 1180 kauth_cred_free(oc); 1181 } 1182 1183 /* 1184 * Verify that an LWP is locked, and optionally verify that the lock matches 1185 * one we specify. 1186 */ 1187 int 1188 lwp_locked(struct lwp *l, kmutex_t *mtx) 1189 { 1190 kmutex_t *cur = l->l_mutex; 1191 1192 return mutex_owned(cur) && (mtx == cur || mtx == NULL); 1193 } 1194 1195 /* 1196 * Lock an LWP. 1197 */ 1198 void 1199 lwp_lock_retry(struct lwp *l, kmutex_t *old) 1200 { 1201 1202 /* 1203 * XXXgcc ignoring kmutex_t * volatile on i386 1204 * 1205 * gcc version 4.1.2 20061021 prerelease (NetBSD nb1 20061021) 1206 */ 1207 #if 1 1208 while (l->l_mutex != old) { 1209 #else 1210 for (;;) { 1211 #endif 1212 mutex_spin_exit(old); 1213 old = l->l_mutex; 1214 mutex_spin_enter(old); 1215 1216 /* 1217 * mutex_enter() will have posted a read barrier. Re-test 1218 * l->l_mutex. If it has changed, we need to try again. 1219 */ 1220 #if 1 1221 } 1222 #else 1223 } while (__predict_false(l->l_mutex != old)); 1224 #endif 1225 } 1226 1227 /* 1228 * Lend a new mutex to an LWP. The old mutex must be held. 1229 */ 1230 void 1231 lwp_setlock(struct lwp *l, kmutex_t *new) 1232 { 1233 1234 KASSERT(mutex_owned(l->l_mutex)); 1235 1236 membar_exit(); 1237 l->l_mutex = new; 1238 } 1239 1240 /* 1241 * Lend a new mutex to an LWP, and release the old mutex. The old mutex 1242 * must be held. 1243 */ 1244 void 1245 lwp_unlock_to(struct lwp *l, kmutex_t *new) 1246 { 1247 kmutex_t *old; 1248 1249 KASSERT(mutex_owned(l->l_mutex)); 1250 1251 old = l->l_mutex; 1252 membar_exit(); 1253 l->l_mutex = new; 1254 mutex_spin_exit(old); 1255 } 1256 1257 /* 1258 * Acquire a new mutex, and donate it to an LWP. The LWP must already be 1259 * locked. 1260 */ 1261 void 1262 lwp_relock(struct lwp *l, kmutex_t *new) 1263 { 1264 kmutex_t *old; 1265 1266 KASSERT(mutex_owned(l->l_mutex)); 1267 1268 old = l->l_mutex; 1269 if (old != new) { 1270 mutex_spin_enter(new); 1271 l->l_mutex = new; 1272 mutex_spin_exit(old); 1273 } 1274 } 1275 1276 int 1277 lwp_trylock(struct lwp *l) 1278 { 1279 kmutex_t *old; 1280 1281 for (;;) { 1282 if (!mutex_tryenter(old = l->l_mutex)) 1283 return 0; 1284 if (__predict_true(l->l_mutex == old)) 1285 return 1; 1286 mutex_spin_exit(old); 1287 } 1288 } 1289 1290 u_int 1291 lwp_unsleep(lwp_t *l, bool cleanup) 1292 { 1293 1294 KASSERT(mutex_owned(l->l_mutex)); 1295 1296 return (*l->l_syncobj->sobj_unsleep)(l, cleanup); 1297 } 1298 1299 1300 /* 1301 * Handle exceptions for mi_userret(). Called if a member of LW_USERRET is 1302 * set. 1303 */ 1304 void 1305 lwp_userret(struct lwp *l) 1306 { 1307 struct proc *p; 1308 void (*hook)(void); 1309 int sig; 1310 1311 p = l->l_proc; 1312 1313 #ifndef __HAVE_FAST_SOFTINTS 1314 /* Run pending soft interrupts. */ 1315 if (l->l_cpu->ci_data.cpu_softints != 0) 1316 softint_overlay(); 1317 #endif 1318 1319 /* 1320 * It should be safe to do this read unlocked on a multiprocessor 1321 * system.. 1322 */ 1323 while ((l->l_flag & LW_USERRET) != 0) { 1324 /* 1325 * Process pending signals first, unless the process 1326 * is dumping core or exiting, where we will instead 1327 * enter the LW_WSUSPEND case below. 1328 */ 1329 if ((l->l_flag & (LW_PENDSIG | LW_WCORE | LW_WEXIT)) == 1330 LW_PENDSIG) { 1331 mutex_enter(p->p_lock); 1332 while ((sig = issignal(l)) != 0) 1333 postsig(sig); 1334 mutex_exit(p->p_lock); 1335 } 1336 1337 /* 1338 * Core-dump or suspend pending. 1339 * 1340 * In case of core dump, suspend ourselves, so that the 1341 * kernel stack and therefore the userland registers saved 1342 * in the trapframe are around for coredump() to write them 1343 * out. We issue a wakeup on p->p_lwpcv so that sigexit() 1344 * will write the core file out once all other LWPs are 1345 * suspended. 1346 */ 1347 if ((l->l_flag & LW_WSUSPEND) != 0) { 1348 mutex_enter(p->p_lock); 1349 p->p_nrlwps--; 1350 cv_broadcast(&p->p_lwpcv); 1351 lwp_lock(l); 1352 l->l_stat = LSSUSPENDED; 1353 lwp_unlock(l); 1354 mutex_exit(p->p_lock); 1355 lwp_lock(l); 1356 mi_switch(l); 1357 } 1358 1359 /* Process is exiting. */ 1360 if ((l->l_flag & LW_WEXIT) != 0) { 1361 lwp_exit(l); 1362 KASSERT(0); 1363 /* NOTREACHED */ 1364 } 1365 1366 /* Call userret hook; used by Linux emulation. */ 1367 if ((l->l_flag & LW_WUSERRET) != 0) { 1368 lwp_lock(l); 1369 l->l_flag &= ~LW_WUSERRET; 1370 lwp_unlock(l); 1371 hook = p->p_userret; 1372 p->p_userret = NULL; 1373 (*hook)(); 1374 } 1375 } 1376 } 1377 1378 /* 1379 * Force an LWP to enter the kernel, to take a trip through lwp_userret(). 1380 */ 1381 void 1382 lwp_need_userret(struct lwp *l) 1383 { 1384 KASSERT(lwp_locked(l, NULL)); 1385 1386 /* 1387 * Since the tests in lwp_userret() are done unlocked, make sure 1388 * that the condition will be seen before forcing the LWP to enter 1389 * kernel mode. 1390 */ 1391 membar_producer(); 1392 cpu_signotify(l); 1393 } 1394 1395 /* 1396 * Add one reference to an LWP. This will prevent the LWP from 1397 * exiting, thus keep the lwp structure and PCB around to inspect. 1398 */ 1399 void 1400 lwp_addref(struct lwp *l) 1401 { 1402 1403 KASSERT(mutex_owned(l->l_proc->p_lock)); 1404 KASSERT(l->l_stat != LSZOMB); 1405 KASSERT(l->l_refcnt != 0); 1406 1407 l->l_refcnt++; 1408 } 1409 1410 /* 1411 * Remove one reference to an LWP. If this is the last reference, 1412 * then we must finalize the LWP's death. 1413 */ 1414 void 1415 lwp_delref(struct lwp *l) 1416 { 1417 struct proc *p = l->l_proc; 1418 1419 mutex_enter(p->p_lock); 1420 KASSERT(l->l_stat != LSZOMB); 1421 KASSERT(l->l_refcnt > 0); 1422 if (--l->l_refcnt == 0) 1423 cv_broadcast(&p->p_lwpcv); 1424 mutex_exit(p->p_lock); 1425 } 1426 1427 /* 1428 * Drain all references to the current LWP. 1429 */ 1430 void 1431 lwp_drainrefs(struct lwp *l) 1432 { 1433 struct proc *p = l->l_proc; 1434 1435 KASSERT(mutex_owned(p->p_lock)); 1436 KASSERT(l->l_refcnt != 0); 1437 1438 l->l_refcnt--; 1439 while (l->l_refcnt != 0) 1440 cv_wait(&p->p_lwpcv, p->p_lock); 1441 } 1442 1443 /* 1444 * lwp_specific_key_create -- 1445 * Create a key for subsystem lwp-specific data. 1446 */ 1447 int 1448 lwp_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor) 1449 { 1450 1451 return (specificdata_key_create(lwp_specificdata_domain, keyp, dtor)); 1452 } 1453 1454 /* 1455 * lwp_specific_key_delete -- 1456 * Delete a key for subsystem lwp-specific data. 1457 */ 1458 void 1459 lwp_specific_key_delete(specificdata_key_t key) 1460 { 1461 1462 specificdata_key_delete(lwp_specificdata_domain, key); 1463 } 1464 1465 /* 1466 * lwp_initspecific -- 1467 * Initialize an LWP's specificdata container. 1468 */ 1469 void 1470 lwp_initspecific(struct lwp *l) 1471 { 1472 int error; 1473 1474 error = specificdata_init(lwp_specificdata_domain, &l->l_specdataref); 1475 KASSERT(error == 0); 1476 } 1477 1478 /* 1479 * lwp_finispecific -- 1480 * Finalize an LWP's specificdata container. 1481 */ 1482 void 1483 lwp_finispecific(struct lwp *l) 1484 { 1485 1486 specificdata_fini(lwp_specificdata_domain, &l->l_specdataref); 1487 } 1488 1489 /* 1490 * lwp_getspecific -- 1491 * Return lwp-specific data corresponding to the specified key. 1492 * 1493 * Note: LWP specific data is NOT INTERLOCKED. An LWP should access 1494 * only its OWN SPECIFIC DATA. If it is necessary to access another 1495 * LWP's specifc data, care must be taken to ensure that doing so 1496 * would not cause internal data structure inconsistency (i.e. caller 1497 * can guarantee that the target LWP is not inside an lwp_getspecific() 1498 * or lwp_setspecific() call). 1499 */ 1500 void * 1501 lwp_getspecific(specificdata_key_t key) 1502 { 1503 1504 return (specificdata_getspecific_unlocked(lwp_specificdata_domain, 1505 &curlwp->l_specdataref, key)); 1506 } 1507 1508 void * 1509 _lwp_getspecific_by_lwp(struct lwp *l, specificdata_key_t key) 1510 { 1511 1512 return (specificdata_getspecific_unlocked(lwp_specificdata_domain, 1513 &l->l_specdataref, key)); 1514 } 1515 1516 /* 1517 * lwp_setspecific -- 1518 * Set lwp-specific data corresponding to the specified key. 1519 */ 1520 void 1521 lwp_setspecific(specificdata_key_t key, void *data) 1522 { 1523 1524 specificdata_setspecific(lwp_specificdata_domain, 1525 &curlwp->l_specdataref, key, data); 1526 } 1527 1528 /* 1529 * Allocate a new lwpctl structure for a user LWP. 1530 */ 1531 int 1532 lwp_ctl_alloc(vaddr_t *uaddr) 1533 { 1534 lcproc_t *lp; 1535 u_int bit, i, offset; 1536 struct uvm_object *uao; 1537 int error; 1538 lcpage_t *lcp; 1539 proc_t *p; 1540 lwp_t *l; 1541 1542 l = curlwp; 1543 p = l->l_proc; 1544 1545 if (l->l_lcpage != NULL) { 1546 lcp = l->l_lcpage; 1547 *uaddr = lcp->lcp_uaddr + (vaddr_t)l->l_lwpctl - lcp->lcp_kaddr; 1548 return (EINVAL); 1549 } 1550 1551 /* First time around, allocate header structure for the process. */ 1552 if ((lp = p->p_lwpctl) == NULL) { 1553 lp = kmem_alloc(sizeof(*lp), KM_SLEEP); 1554 mutex_init(&lp->lp_lock, MUTEX_DEFAULT, IPL_NONE); 1555 lp->lp_uao = NULL; 1556 TAILQ_INIT(&lp->lp_pages); 1557 mutex_enter(p->p_lock); 1558 if (p->p_lwpctl == NULL) { 1559 p->p_lwpctl = lp; 1560 mutex_exit(p->p_lock); 1561 } else { 1562 mutex_exit(p->p_lock); 1563 mutex_destroy(&lp->lp_lock); 1564 kmem_free(lp, sizeof(*lp)); 1565 lp = p->p_lwpctl; 1566 } 1567 } 1568 1569 /* 1570 * Set up an anonymous memory region to hold the shared pages. 1571 * Map them into the process' address space. The user vmspace 1572 * gets the first reference on the UAO. 1573 */ 1574 mutex_enter(&lp->lp_lock); 1575 if (lp->lp_uao == NULL) { 1576 lp->lp_uao = uao_create(LWPCTL_UAREA_SZ, 0); 1577 lp->lp_cur = 0; 1578 lp->lp_max = LWPCTL_UAREA_SZ; 1579 lp->lp_uva = p->p_emul->e_vm_default_addr(p, 1580 (vaddr_t)p->p_vmspace->vm_daddr, LWPCTL_UAREA_SZ); 1581 error = uvm_map(&p->p_vmspace->vm_map, &lp->lp_uva, 1582 LWPCTL_UAREA_SZ, lp->lp_uao, 0, 0, UVM_MAPFLAG(UVM_PROT_RW, 1583 UVM_PROT_RW, UVM_INH_NONE, UVM_ADV_NORMAL, 0)); 1584 if (error != 0) { 1585 uao_detach(lp->lp_uao); 1586 lp->lp_uao = NULL; 1587 mutex_exit(&lp->lp_lock); 1588 return error; 1589 } 1590 } 1591 1592 /* Get a free block and allocate for this LWP. */ 1593 TAILQ_FOREACH(lcp, &lp->lp_pages, lcp_chain) { 1594 if (lcp->lcp_nfree != 0) 1595 break; 1596 } 1597 if (lcp == NULL) { 1598 /* Nothing available - try to set up a free page. */ 1599 if (lp->lp_cur == lp->lp_max) { 1600 mutex_exit(&lp->lp_lock); 1601 return ENOMEM; 1602 } 1603 lcp = kmem_alloc(LWPCTL_LCPAGE_SZ, KM_SLEEP); 1604 if (lcp == NULL) { 1605 mutex_exit(&lp->lp_lock); 1606 return ENOMEM; 1607 } 1608 /* 1609 * Wire the next page down in kernel space. Since this 1610 * is a new mapping, we must add a reference. 1611 */ 1612 uao = lp->lp_uao; 1613 (*uao->pgops->pgo_reference)(uao); 1614 lcp->lcp_kaddr = vm_map_min(kernel_map); 1615 error = uvm_map(kernel_map, &lcp->lcp_kaddr, PAGE_SIZE, 1616 uao, lp->lp_cur, PAGE_SIZE, 1617 UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, 1618 UVM_INH_NONE, UVM_ADV_RANDOM, 0)); 1619 if (error != 0) { 1620 mutex_exit(&lp->lp_lock); 1621 kmem_free(lcp, LWPCTL_LCPAGE_SZ); 1622 (*uao->pgops->pgo_detach)(uao); 1623 return error; 1624 } 1625 error = uvm_map_pageable(kernel_map, lcp->lcp_kaddr, 1626 lcp->lcp_kaddr + PAGE_SIZE, FALSE, 0); 1627 if (error != 0) { 1628 mutex_exit(&lp->lp_lock); 1629 uvm_unmap(kernel_map, lcp->lcp_kaddr, 1630 lcp->lcp_kaddr + PAGE_SIZE); 1631 kmem_free(lcp, LWPCTL_LCPAGE_SZ); 1632 return error; 1633 } 1634 /* Prepare the page descriptor and link into the list. */ 1635 lcp->lcp_uaddr = lp->lp_uva + lp->lp_cur; 1636 lp->lp_cur += PAGE_SIZE; 1637 lcp->lcp_nfree = LWPCTL_PER_PAGE; 1638 lcp->lcp_rotor = 0; 1639 memset(lcp->lcp_bitmap, 0xff, LWPCTL_BITMAP_SZ); 1640 TAILQ_INSERT_HEAD(&lp->lp_pages, lcp, lcp_chain); 1641 } 1642 for (i = lcp->lcp_rotor; lcp->lcp_bitmap[i] == 0;) { 1643 if (++i >= LWPCTL_BITMAP_ENTRIES) 1644 i = 0; 1645 } 1646 bit = ffs(lcp->lcp_bitmap[i]) - 1; 1647 lcp->lcp_bitmap[i] ^= (1 << bit); 1648 lcp->lcp_rotor = i; 1649 lcp->lcp_nfree--; 1650 l->l_lcpage = lcp; 1651 offset = (i << 5) + bit; 1652 l->l_lwpctl = (lwpctl_t *)lcp->lcp_kaddr + offset; 1653 *uaddr = lcp->lcp_uaddr + offset * sizeof(lwpctl_t); 1654 mutex_exit(&lp->lp_lock); 1655 1656 KPREEMPT_DISABLE(l); 1657 l->l_lwpctl->lc_curcpu = (short)curcpu()->ci_data.cpu_index; 1658 KPREEMPT_ENABLE(l); 1659 1660 return 0; 1661 } 1662 1663 /* 1664 * Free an lwpctl structure back to the per-process list. 1665 */ 1666 void 1667 lwp_ctl_free(lwp_t *l) 1668 { 1669 lcproc_t *lp; 1670 lcpage_t *lcp; 1671 u_int map, offset; 1672 1673 lp = l->l_proc->p_lwpctl; 1674 KASSERT(lp != NULL); 1675 1676 lcp = l->l_lcpage; 1677 offset = (u_int)((lwpctl_t *)l->l_lwpctl - (lwpctl_t *)lcp->lcp_kaddr); 1678 KASSERT(offset < LWPCTL_PER_PAGE); 1679 1680 mutex_enter(&lp->lp_lock); 1681 lcp->lcp_nfree++; 1682 map = offset >> 5; 1683 lcp->lcp_bitmap[map] |= (1 << (offset & 31)); 1684 if (lcp->lcp_bitmap[lcp->lcp_rotor] == 0) 1685 lcp->lcp_rotor = map; 1686 if (TAILQ_FIRST(&lp->lp_pages)->lcp_nfree == 0) { 1687 TAILQ_REMOVE(&lp->lp_pages, lcp, lcp_chain); 1688 TAILQ_INSERT_HEAD(&lp->lp_pages, lcp, lcp_chain); 1689 } 1690 mutex_exit(&lp->lp_lock); 1691 } 1692 1693 /* 1694 * Process is exiting; tear down lwpctl state. This can only be safely 1695 * called by the last LWP in the process. 1696 */ 1697 void 1698 lwp_ctl_exit(void) 1699 { 1700 lcpage_t *lcp, *next; 1701 lcproc_t *lp; 1702 proc_t *p; 1703 lwp_t *l; 1704 1705 l = curlwp; 1706 l->l_lwpctl = NULL; 1707 l->l_lcpage = NULL; 1708 p = l->l_proc; 1709 lp = p->p_lwpctl; 1710 1711 KASSERT(lp != NULL); 1712 KASSERT(p->p_nlwps == 1); 1713 1714 for (lcp = TAILQ_FIRST(&lp->lp_pages); lcp != NULL; lcp = next) { 1715 next = TAILQ_NEXT(lcp, lcp_chain); 1716 uvm_unmap(kernel_map, lcp->lcp_kaddr, 1717 lcp->lcp_kaddr + PAGE_SIZE); 1718 kmem_free(lcp, LWPCTL_LCPAGE_SZ); 1719 } 1720 1721 if (lp->lp_uao != NULL) { 1722 uvm_unmap(&p->p_vmspace->vm_map, lp->lp_uva, 1723 lp->lp_uva + LWPCTL_UAREA_SZ); 1724 } 1725 1726 mutex_destroy(&lp->lp_lock); 1727 kmem_free(lp, sizeof(*lp)); 1728 p->p_lwpctl = NULL; 1729 } 1730 1731 #if defined(DDB) 1732 void 1733 lwp_whatis(uintptr_t addr, void (*pr)(const char *, ...)) 1734 { 1735 lwp_t *l; 1736 1737 LIST_FOREACH(l, &alllwp, l_list) { 1738 uintptr_t stack = (uintptr_t)KSTACK_LOWEST_ADDR(l); 1739 1740 if (addr < stack || stack + KSTACK_SIZE <= addr) { 1741 continue; 1742 } 1743 (*pr)("%p is %p+%zu, LWP %p's stack\n", 1744 (void *)addr, (void *)stack, 1745 (size_t)(addr - stack), l); 1746 } 1747 } 1748 #endif /* defined(DDB) */ 1749