1 /* $NetBSD: kern_lwp.c,v 1.77 2007/11/11 23:22:23 matt Exp $ */ 2 3 /*- 4 * Copyright (c) 2001, 2006, 2007 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Nathan J. Williams, and Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /* 40 * Overview 41 * 42 * Lightweight processes (LWPs) are the basic unit or thread of 43 * execution within the kernel. The core state of an LWP is described 44 * by "struct lwp", also known as lwp_t. 45 * 46 * Each LWP is contained within a process (described by "struct proc"), 47 * Every process contains at least one LWP, but may contain more. The 48 * process describes attributes shared among all of its LWPs such as a 49 * private address space, global execution state (stopped, active, 50 * zombie, ...), signal disposition and so on. On a multiprocessor 51 * machine, multiple LWPs be executing concurrently in the kernel. 52 * 53 * Execution states 54 * 55 * At any given time, an LWP has overall state that is described by 56 * lwp::l_stat. The states are broken into two sets below. The first 57 * set is guaranteed to represent the absolute, current state of the 58 * LWP: 59 * 60 * LSONPROC 61 * 62 * On processor: the LWP is executing on a CPU, either in the 63 * kernel or in user space. 64 * 65 * LSRUN 66 * 67 * Runnable: the LWP is parked on a run queue, and may soon be 68 * chosen to run by a idle processor, or by a processor that 69 * has been asked to preempt a currently runnning but lower 70 * priority LWP. If the LWP is not swapped in (L_INMEM == 0) 71 * then the LWP is not on a run queue, but may be soon. 72 * 73 * LSIDL 74 * 75 * Idle: the LWP has been created but has not yet executed, 76 * or it has ceased executing a unit of work and is waiting 77 * to be started again. 78 * 79 * LSSUSPENDED: 80 * 81 * Suspended: the LWP has had its execution suspended by 82 * another LWP in the same process using the _lwp_suspend() 83 * system call. User-level LWPs also enter the suspended 84 * state when the system is shutting down. 85 * 86 * The second set represent a "statement of intent" on behalf of the 87 * LWP. The LWP may in fact be executing on a processor, may be 88 * sleeping or idle. It is expected to take the necessary action to 89 * stop executing or become "running" again within a short timeframe. 90 * The LW_RUNNING flag in lwp::l_flag indicates that an LWP is running. 91 * Importantly, in indicates that its state is tied to a CPU. 92 * 93 * LSZOMB: 94 * 95 * Dead or dying: the LWP has released most of its resources 96 * and is a) about to switch away into oblivion b) has already 97 * switched away. When it switches away, its few remaining 98 * resources can be collected. 99 * 100 * LSSLEEP: 101 * 102 * Sleeping: the LWP has entered itself onto a sleep queue, and 103 * has switched away or will switch away shortly to allow other 104 * LWPs to run on the CPU. 105 * 106 * LSSTOP: 107 * 108 * Stopped: the LWP has been stopped as a result of a job 109 * control signal, or as a result of the ptrace() interface. 110 * 111 * Stopped LWPs may run briefly within the kernel to handle 112 * signals that they receive, but will not return to user space 113 * until their process' state is changed away from stopped. 114 * 115 * Single LWPs within a process can not be set stopped 116 * selectively: all actions that can stop or continue LWPs 117 * occur at the process level. 118 * 119 * State transitions 120 * 121 * Note that the LSSTOP state may only be set when returning to 122 * user space in userret(), or when sleeping interruptably. The 123 * LSSUSPENDED state may only be set in userret(). Before setting 124 * those states, we try to ensure that the LWPs will release all 125 * locks that they hold, and at a minimum try to ensure that the 126 * LWP can be set runnable again by a signal. 127 * 128 * LWPs may transition states in the following ways: 129 * 130 * RUN -------> ONPROC ONPROC -----> RUN 131 * > STOPPED > SLEEP 132 * > SUSPENDED > STOPPED 133 * > SUSPENDED 134 * > ZOMB 135 * 136 * STOPPED ---> RUN SUSPENDED --> RUN 137 * > SLEEP > SLEEP 138 * 139 * SLEEP -----> ONPROC IDL --------> RUN 140 * > RUN > SUSPENDED 141 * > STOPPED > STOPPED 142 * > SUSPENDED 143 * 144 * Other state transitions are possible with kernel threads (eg 145 * ONPROC -> IDL), but only happen under tightly controlled 146 * circumstances the side effects are understood. 147 * 148 * Locking 149 * 150 * The majority of fields in 'struct lwp' are covered by a single, 151 * general spin lock pointed to by lwp::l_mutex. The locks covering 152 * each field are documented in sys/lwp.h. 153 * 154 * State transitions must be made with the LWP's general lock held, 155 * and may cause the LWP's lock pointer to change. Manipulation of 156 * the general lock is not performed directly, but through calls to 157 * lwp_lock(), lwp_relock() and similar. 158 * 159 * States and their associated locks: 160 * 161 * LSONPROC, LSZOMB: 162 * 163 * Always covered by spc_lwplock, which protects running LWPs. 164 * This is a per-CPU lock. 165 * 166 * LSIDL, LSRUN: 167 * 168 * Always covered by spc_mutex, which protects the run queues. 169 * This may be a per-CPU lock, depending on the scheduler. 170 * 171 * LSSLEEP: 172 * 173 * Covered by a lock associated with the sleep queue that the 174 * LWP resides on, indirectly referenced by l_sleepq->sq_mutex. 175 * 176 * LSSTOP, LSSUSPENDED: 177 * 178 * If the LWP was previously sleeping (l_wchan != NULL), then 179 * l_mutex references the sleep queue lock. If the LWP was 180 * runnable or on the CPU when halted, or has been removed from 181 * the sleep queue since halted, then the lock is spc_lwplock. 182 * 183 * The lock order is as follows: 184 * 185 * spc::spc_lwplock -> 186 * sleepq_t::sq_mutex -> 187 * tschain_t::tc_mutex -> 188 * spc::spc_mutex 189 * 190 * Each process has an scheduler state lock (proc::p_smutex), and a 191 * number of counters on LWPs and their states: p_nzlwps, p_nrlwps, and 192 * so on. When an LWP is to be entered into or removed from one of the 193 * following states, p_mutex must be held and the process wide counters 194 * adjusted: 195 * 196 * LSIDL, LSZOMB, LSSTOP, LSSUSPENDED 197 * 198 * Note that an LWP is considered running or likely to run soon if in 199 * one of the following states. This affects the value of p_nrlwps: 200 * 201 * LSRUN, LSONPROC, LSSLEEP 202 * 203 * p_smutex does not need to be held when transitioning among these 204 * three states. 205 */ 206 207 #include <sys/cdefs.h> 208 __KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.77 2007/11/11 23:22:23 matt Exp $"); 209 210 #include "opt_multiprocessor.h" 211 #include "opt_lockdebug.h" 212 213 #define _LWP_API_PRIVATE 214 215 #include <sys/param.h> 216 #include <sys/systm.h> 217 #include <sys/cpu.h> 218 #include <sys/pool.h> 219 #include <sys/proc.h> 220 #include <sys/syscallargs.h> 221 #include <sys/syscall_stats.h> 222 #include <sys/kauth.h> 223 #include <sys/sleepq.h> 224 #include <sys/lockdebug.h> 225 #include <sys/kmem.h> 226 #include <sys/intr.h> 227 228 #include <uvm/uvm_extern.h> 229 230 struct lwplist alllwp = LIST_HEAD_INITIALIZER(alllwp); 231 232 POOL_INIT(lwp_pool, sizeof(struct lwp), MIN_LWP_ALIGNMENT, 0, 0, "lwppl", 233 &pool_allocator_nointr, IPL_NONE); 234 POOL_INIT(lwp_uc_pool, sizeof(ucontext_t), 0, 0, 0, "lwpucpl", 235 &pool_allocator_nointr, IPL_NONE); 236 237 static specificdata_domain_t lwp_specificdata_domain; 238 239 #define LWP_DEBUG 240 241 #ifdef LWP_DEBUG 242 int lwp_debug = 0; 243 #define DPRINTF(x) if (lwp_debug) printf x 244 #else 245 #define DPRINTF(x) 246 #endif 247 248 void 249 lwpinit(void) 250 { 251 252 lwp_specificdata_domain = specificdata_domain_create(); 253 KASSERT(lwp_specificdata_domain != NULL); 254 lwp_sys_init(); 255 } 256 257 /* 258 * Set an suspended. 259 * 260 * Must be called with p_smutex held, and the LWP locked. Will unlock the 261 * LWP before return. 262 */ 263 int 264 lwp_suspend(struct lwp *curl, struct lwp *t) 265 { 266 int error; 267 268 KASSERT(mutex_owned(&t->l_proc->p_smutex)); 269 KASSERT(lwp_locked(t, NULL)); 270 271 KASSERT(curl != t || curl->l_stat == LSONPROC); 272 273 /* 274 * If the current LWP has been told to exit, we must not suspend anyone 275 * else or deadlock could occur. We won't return to userspace. 276 */ 277 if ((curl->l_stat & (LW_WEXIT | LW_WCORE)) != 0) { 278 lwp_unlock(t); 279 return (EDEADLK); 280 } 281 282 error = 0; 283 284 switch (t->l_stat) { 285 case LSRUN: 286 case LSONPROC: 287 t->l_flag |= LW_WSUSPEND; 288 lwp_need_userret(t); 289 lwp_unlock(t); 290 break; 291 292 case LSSLEEP: 293 t->l_flag |= LW_WSUSPEND; 294 295 /* 296 * Kick the LWP and try to get it to the kernel boundary 297 * so that it will release any locks that it holds. 298 * setrunnable() will release the lock. 299 */ 300 if ((t->l_flag & LW_SINTR) != 0) 301 setrunnable(t); 302 else 303 lwp_unlock(t); 304 break; 305 306 case LSSUSPENDED: 307 lwp_unlock(t); 308 break; 309 310 case LSSTOP: 311 t->l_flag |= LW_WSUSPEND; 312 setrunnable(t); 313 break; 314 315 case LSIDL: 316 case LSZOMB: 317 error = EINTR; /* It's what Solaris does..... */ 318 lwp_unlock(t); 319 break; 320 } 321 322 return (error); 323 } 324 325 /* 326 * Restart a suspended LWP. 327 * 328 * Must be called with p_smutex held, and the LWP locked. Will unlock the 329 * LWP before return. 330 */ 331 void 332 lwp_continue(struct lwp *l) 333 { 334 335 KASSERT(mutex_owned(&l->l_proc->p_smutex)); 336 KASSERT(lwp_locked(l, NULL)); 337 338 DPRINTF(("lwp_continue of %d.%d (%s), state %d, wchan %p\n", 339 l->l_proc->p_pid, l->l_lid, l->l_proc->p_comm, l->l_stat, 340 l->l_wchan)); 341 342 /* If rebooting or not suspended, then just bail out. */ 343 if ((l->l_flag & LW_WREBOOT) != 0) { 344 lwp_unlock(l); 345 return; 346 } 347 348 l->l_flag &= ~LW_WSUSPEND; 349 350 if (l->l_stat != LSSUSPENDED) { 351 lwp_unlock(l); 352 return; 353 } 354 355 /* setrunnable() will release the lock. */ 356 setrunnable(l); 357 } 358 359 /* 360 * Wait for an LWP within the current process to exit. If 'lid' is 361 * non-zero, we are waiting for a specific LWP. 362 * 363 * Must be called with p->p_smutex held. 364 */ 365 int 366 lwp_wait1(struct lwp *l, lwpid_t lid, lwpid_t *departed, int flags) 367 { 368 struct proc *p = l->l_proc; 369 struct lwp *l2; 370 int nfound, error; 371 lwpid_t curlid; 372 bool exiting; 373 374 DPRINTF(("lwp_wait1: %d.%d waiting for %d.\n", 375 p->p_pid, l->l_lid, lid)); 376 377 KASSERT(mutex_owned(&p->p_smutex)); 378 379 p->p_nlwpwait++; 380 l->l_waitingfor = lid; 381 curlid = l->l_lid; 382 exiting = ((flags & LWPWAIT_EXITCONTROL) != 0); 383 384 for (;;) { 385 /* 386 * Avoid a race between exit1() and sigexit(): if the 387 * process is dumping core, then we need to bail out: call 388 * into lwp_userret() where we will be suspended until the 389 * deed is done. 390 */ 391 if ((p->p_sflag & PS_WCORE) != 0) { 392 mutex_exit(&p->p_smutex); 393 lwp_userret(l); 394 #ifdef DIAGNOSTIC 395 panic("lwp_wait1"); 396 #endif 397 /* NOTREACHED */ 398 } 399 400 /* 401 * First off, drain any detached LWP that is waiting to be 402 * reaped. 403 */ 404 while ((l2 = p->p_zomblwp) != NULL) { 405 p->p_zomblwp = NULL; 406 lwp_free(l2, false, false);/* releases proc mutex */ 407 mutex_enter(&p->p_smutex); 408 } 409 410 /* 411 * Now look for an LWP to collect. If the whole process is 412 * exiting, count detached LWPs as eligible to be collected, 413 * but don't drain them here. 414 */ 415 nfound = 0; 416 error = 0; 417 LIST_FOREACH(l2, &p->p_lwps, l_sibling) { 418 /* 419 * If a specific wait and the target is waiting on 420 * us, then avoid deadlock. This also traps LWPs 421 * that try to wait on themselves. 422 * 423 * Note that this does not handle more complicated 424 * cycles, like: t1 -> t2 -> t3 -> t1. The process 425 * can still be killed so it is not a major problem. 426 */ 427 if (l2->l_lid == lid && l2->l_waitingfor == curlid) { 428 error = EDEADLK; 429 break; 430 } 431 if (l2 == l) 432 continue; 433 if ((l2->l_prflag & LPR_DETACHED) != 0) { 434 nfound += exiting; 435 continue; 436 } 437 if (lid != 0) { 438 if (l2->l_lid != lid) 439 continue; 440 /* 441 * Mark this LWP as the first waiter, if there 442 * is no other. 443 */ 444 if (l2->l_waiter == 0) 445 l2->l_waiter = curlid; 446 } else if (l2->l_waiter != 0) { 447 /* 448 * It already has a waiter - so don't 449 * collect it. If the waiter doesn't 450 * grab it we'll get another chance 451 * later. 452 */ 453 nfound++; 454 continue; 455 } 456 nfound++; 457 458 /* No need to lock the LWP in order to see LSZOMB. */ 459 if (l2->l_stat != LSZOMB) 460 continue; 461 462 /* 463 * We're no longer waiting. Reset the "first waiter" 464 * pointer on the target, in case it was us. 465 */ 466 l->l_waitingfor = 0; 467 l2->l_waiter = 0; 468 p->p_nlwpwait--; 469 if (departed) 470 *departed = l2->l_lid; 471 sched_lwp_collect(l2); 472 473 /* lwp_free() releases the proc lock. */ 474 lwp_free(l2, false, false); 475 mutex_enter(&p->p_smutex); 476 return 0; 477 } 478 479 if (error != 0) 480 break; 481 if (nfound == 0) { 482 error = ESRCH; 483 break; 484 } 485 486 /* 487 * The kernel is careful to ensure that it can not deadlock 488 * when exiting - just keep waiting. 489 */ 490 if (exiting) { 491 KASSERT(p->p_nlwps > 1); 492 cv_wait(&p->p_lwpcv, &p->p_smutex); 493 continue; 494 } 495 496 /* 497 * If all other LWPs are waiting for exits or suspends 498 * and the supply of zombies and potential zombies is 499 * exhausted, then we are about to deadlock. 500 * 501 * If the process is exiting (and this LWP is not the one 502 * that is coordinating the exit) then bail out now. 503 */ 504 if ((p->p_sflag & PS_WEXIT) != 0 || 505 p->p_nrlwps + p->p_nzlwps - p->p_ndlwps <= p->p_nlwpwait) { 506 error = EDEADLK; 507 break; 508 } 509 510 /* 511 * Sit around and wait for something to happen. We'll be 512 * awoken if any of the conditions examined change: if an 513 * LWP exits, is collected, or is detached. 514 */ 515 if ((error = cv_wait_sig(&p->p_lwpcv, &p->p_smutex)) != 0) 516 break; 517 } 518 519 /* 520 * We didn't find any LWPs to collect, we may have received a 521 * signal, or some other condition has caused us to bail out. 522 * 523 * If waiting on a specific LWP, clear the waiters marker: some 524 * other LWP may want it. Then, kick all the remaining waiters 525 * so that they can re-check for zombies and for deadlock. 526 */ 527 if (lid != 0) { 528 LIST_FOREACH(l2, &p->p_lwps, l_sibling) { 529 if (l2->l_lid == lid) { 530 if (l2->l_waiter == curlid) 531 l2->l_waiter = 0; 532 break; 533 } 534 } 535 } 536 p->p_nlwpwait--; 537 l->l_waitingfor = 0; 538 cv_broadcast(&p->p_lwpcv); 539 540 return error; 541 } 542 543 /* 544 * Create a new LWP within process 'p2', using LWP 'l1' as a template. 545 * The new LWP is created in state LSIDL and must be set running, 546 * suspended, or stopped by the caller. 547 */ 548 int 549 lwp_create(lwp_t *l1, proc_t *p2, vaddr_t uaddr, bool inmem, int flags, 550 void *stack, size_t stacksize, void (*func)(void *), void *arg, 551 lwp_t **rnewlwpp, int sclass) 552 { 553 struct lwp *l2, *isfree; 554 turnstile_t *ts; 555 556 /* 557 * First off, reap any detached LWP waiting to be collected. 558 * We can re-use its LWP structure and turnstile. 559 */ 560 isfree = NULL; 561 if (p2->p_zomblwp != NULL) { 562 mutex_enter(&p2->p_smutex); 563 if ((isfree = p2->p_zomblwp) != NULL) { 564 p2->p_zomblwp = NULL; 565 lwp_free(isfree, true, false);/* releases proc mutex */ 566 } else 567 mutex_exit(&p2->p_smutex); 568 } 569 if (isfree == NULL) { 570 l2 = pool_get(&lwp_pool, PR_WAITOK); 571 memset(l2, 0, sizeof(*l2)); 572 l2->l_ts = pool_cache_get(turnstile_cache, PR_WAITOK); 573 SLIST_INIT(&l2->l_pi_lenders); 574 } else { 575 l2 = isfree; 576 ts = l2->l_ts; 577 KASSERT(l2->l_inheritedprio == -1); 578 KASSERT(SLIST_EMPTY(&l2->l_pi_lenders)); 579 memset(l2, 0, sizeof(*l2)); 580 l2->l_ts = ts; 581 } 582 583 l2->l_stat = LSIDL; 584 l2->l_proc = p2; 585 l2->l_refcnt = 1; 586 l2->l_class = sclass; 587 l2->l_kpriority = l1->l_kpriority; 588 l2->l_priority = l1->l_priority; 589 l2->l_inheritedprio = -1; 590 l2->l_mutex = l1->l_cpu->ci_schedstate.spc_mutex; 591 l2->l_cpu = l1->l_cpu; 592 l2->l_flag = inmem ? LW_INMEM : 0; 593 594 if (p2->p_flag & PK_SYSTEM) { 595 /* 596 * Mark it as a system process and not a candidate for 597 * swapping. 598 */ 599 l2->l_flag |= LW_SYSTEM; 600 } else { 601 /* Look for a CPU to start */ 602 l2->l_cpu = sched_takecpu(l2); 603 l2->l_mutex = l2->l_cpu->ci_schedstate.spc_mutex; 604 } 605 606 lwp_initspecific(l2); 607 sched_lwp_fork(l1, l2); 608 lwp_update_creds(l2); 609 callout_init(&l2->l_timeout_ch, CALLOUT_MPSAFE); 610 callout_setfunc(&l2->l_timeout_ch, sleepq_timeout, l2); 611 mutex_init(&l2->l_swaplock, MUTEX_DEFAULT, IPL_NONE); 612 cv_init(&l2->l_sigcv, "sigwait"); 613 l2->l_syncobj = &sched_syncobj; 614 615 if (rnewlwpp != NULL) 616 *rnewlwpp = l2; 617 618 l2->l_addr = UAREA_TO_USER(uaddr); 619 uvm_lwp_fork(l1, l2, stack, stacksize, func, 620 (arg != NULL) ? arg : l2); 621 622 mutex_enter(&p2->p_smutex); 623 624 if ((flags & LWP_DETACHED) != 0) { 625 l2->l_prflag = LPR_DETACHED; 626 p2->p_ndlwps++; 627 } else 628 l2->l_prflag = 0; 629 630 l2->l_sigmask = l1->l_sigmask; 631 CIRCLEQ_INIT(&l2->l_sigpend.sp_info); 632 sigemptyset(&l2->l_sigpend.sp_set); 633 634 p2->p_nlwpid++; 635 if (p2->p_nlwpid == 0) 636 p2->p_nlwpid++; 637 l2->l_lid = p2->p_nlwpid; 638 LIST_INSERT_HEAD(&p2->p_lwps, l2, l_sibling); 639 p2->p_nlwps++; 640 641 mutex_exit(&p2->p_smutex); 642 643 mutex_enter(&proclist_lock); 644 mutex_enter(&proclist_mutex); 645 LIST_INSERT_HEAD(&alllwp, l2, l_list); 646 mutex_exit(&proclist_mutex); 647 mutex_exit(&proclist_lock); 648 649 SYSCALL_TIME_LWP_INIT(l2); 650 651 if (p2->p_emul->e_lwp_fork) 652 (*p2->p_emul->e_lwp_fork)(l1, l2); 653 654 return (0); 655 } 656 657 /* 658 * Called by MD code when a new LWP begins execution. Must be called 659 * with the previous LWP locked (so at splsched), or if there is no 660 * previous LWP, at splsched. 661 */ 662 void 663 lwp_startup(struct lwp *prev, struct lwp *new) 664 { 665 666 if (prev != NULL) { 667 lwp_unlock(prev); 668 } 669 spl0(); 670 pmap_activate(new); 671 LOCKDEBUG_BARRIER(NULL, 0); 672 if ((new->l_pflag & LP_MPSAFE) == 0) { 673 KERNEL_LOCK(1, new); 674 } 675 } 676 677 /* 678 * Exit an LWP. 679 */ 680 void 681 lwp_exit(struct lwp *l) 682 { 683 struct proc *p = l->l_proc; 684 struct lwp *l2; 685 bool current; 686 687 current = (l == curlwp); 688 689 DPRINTF(("lwp_exit: %d.%d exiting.\n", p->p_pid, l->l_lid)); 690 DPRINTF((" nlwps: %d nzlwps: %d\n", p->p_nlwps, p->p_nzlwps)); 691 KASSERT(current || l->l_stat == LSIDL); 692 693 /* 694 * Verify that we hold no locks other than the kernel lock. 695 */ 696 #ifdef MULTIPROCESSOR 697 LOCKDEBUG_BARRIER(&kernel_lock, 0); 698 #else 699 LOCKDEBUG_BARRIER(NULL, 0); 700 #endif 701 702 /* 703 * If we are the last live LWP in a process, we need to exit the 704 * entire process. We do so with an exit status of zero, because 705 * it's a "controlled" exit, and because that's what Solaris does. 706 * 707 * We are not quite a zombie yet, but for accounting purposes we 708 * must increment the count of zombies here. 709 * 710 * Note: the last LWP's specificdata will be deleted here. 711 */ 712 mutex_enter(&p->p_smutex); 713 if (p->p_nlwps - p->p_nzlwps == 1) { 714 KASSERT(current == true); 715 DPRINTF(("lwp_exit: %d.%d calling exit1()\n", 716 p->p_pid, l->l_lid)); 717 exit1(l, 0); 718 /* NOTREACHED */ 719 } 720 p->p_nzlwps++; 721 mutex_exit(&p->p_smutex); 722 723 if (p->p_emul->e_lwp_exit) 724 (*p->p_emul->e_lwp_exit)(l); 725 726 /* Delete the specificdata while it's still safe to sleep. */ 727 specificdata_fini(lwp_specificdata_domain, &l->l_specdataref); 728 729 /* 730 * Release our cached credentials. 731 */ 732 kauth_cred_free(l->l_cred); 733 callout_destroy(&l->l_timeout_ch); 734 735 /* 736 * While we can still block, mark the LWP as unswappable to 737 * prevent conflicts with the with the swapper. 738 */ 739 if (current) 740 uvm_lwp_hold(l); 741 742 /* 743 * Remove the LWP from the global list. 744 */ 745 mutex_enter(&proclist_lock); 746 mutex_enter(&proclist_mutex); 747 LIST_REMOVE(l, l_list); 748 mutex_exit(&proclist_mutex); 749 mutex_exit(&proclist_lock); 750 751 /* 752 * Get rid of all references to the LWP that others (e.g. procfs) 753 * may have, and mark the LWP as a zombie. If the LWP is detached, 754 * mark it waiting for collection in the proc structure. Note that 755 * before we can do that, we need to free any other dead, deatched 756 * LWP waiting to meet its maker. 757 * 758 * XXXSMP disable preemption. 759 */ 760 mutex_enter(&p->p_smutex); 761 lwp_drainrefs(l); 762 763 if ((l->l_prflag & LPR_DETACHED) != 0) { 764 while ((l2 = p->p_zomblwp) != NULL) { 765 p->p_zomblwp = NULL; 766 lwp_free(l2, false, false);/* releases proc mutex */ 767 mutex_enter(&p->p_smutex); 768 l->l_refcnt++; 769 lwp_drainrefs(l); 770 } 771 p->p_zomblwp = l; 772 } 773 774 /* 775 * If we find a pending signal for the process and we have been 776 * asked to check for signals, then we loose: arrange to have 777 * all other LWPs in the process check for signals. 778 */ 779 if ((l->l_flag & LW_PENDSIG) != 0 && 780 firstsig(&p->p_sigpend.sp_set) != 0) { 781 LIST_FOREACH(l2, &p->p_lwps, l_sibling) { 782 lwp_lock(l2); 783 l2->l_flag |= LW_PENDSIG; 784 lwp_unlock(l2); 785 } 786 } 787 788 lwp_lock(l); 789 l->l_stat = LSZOMB; 790 lwp_unlock(l); 791 p->p_nrlwps--; 792 cv_broadcast(&p->p_lwpcv); 793 mutex_exit(&p->p_smutex); 794 795 /* 796 * We can no longer block. At this point, lwp_free() may already 797 * be gunning for us. On a multi-CPU system, we may be off p_lwps. 798 * 799 * Free MD LWP resources. 800 */ 801 #ifndef __NO_CPU_LWP_FREE 802 cpu_lwp_free(l, 0); 803 #endif 804 805 if (current) { 806 pmap_deactivate(l); 807 808 /* 809 * Release the kernel lock, and switch away into 810 * oblivion. 811 */ 812 #ifdef notyet 813 /* XXXSMP hold in lwp_userret() */ 814 KERNEL_UNLOCK_LAST(l); 815 #else 816 KERNEL_UNLOCK_ALL(l, NULL); 817 #endif 818 lwp_exit_switchaway(l); 819 } 820 } 821 822 void 823 lwp_exit_switchaway(struct lwp *l) 824 { 825 struct cpu_info *ci; 826 struct lwp *idlelwp; 827 828 /* Unlocked, but is for statistics only. */ 829 uvmexp.swtch++; 830 831 (void)splsched(); 832 l->l_flag &= ~LW_RUNNING; 833 ci = curcpu(); 834 idlelwp = ci->ci_data.cpu_idlelwp; 835 idlelwp->l_stat = LSONPROC; 836 837 /* 838 * cpu_onproc must be updated with the CPU locked, as 839 * aston() may try to set a AST pending on the LWP (and 840 * it does so with the CPU locked). Otherwise, the LWP 841 * may be destroyed before the AST can be set, leading 842 * to a user-after-free. 843 */ 844 spc_lock(ci); 845 ci->ci_data.cpu_onproc = idlelwp; 846 spc_unlock(ci); 847 cpu_switchto(NULL, idlelwp, false); 848 } 849 850 /* 851 * Free a dead LWP's remaining resources. 852 * 853 * XXXLWP limits. 854 */ 855 void 856 lwp_free(struct lwp *l, bool recycle, bool last) 857 { 858 struct proc *p = l->l_proc; 859 ksiginfoq_t kq; 860 861 /* 862 * If this was not the last LWP in the process, then adjust 863 * counters and unlock. 864 */ 865 if (!last) { 866 /* 867 * Add the LWP's run time to the process' base value. 868 * This needs to co-incide with coming off p_lwps. 869 */ 870 timeradd(&l->l_rtime, &p->p_rtime, &p->p_rtime); 871 p->p_pctcpu += l->l_pctcpu; 872 LIST_REMOVE(l, l_sibling); 873 p->p_nlwps--; 874 p->p_nzlwps--; 875 if ((l->l_prflag & LPR_DETACHED) != 0) 876 p->p_ndlwps--; 877 878 /* 879 * Have any LWPs sleeping in lwp_wait() recheck for 880 * deadlock. 881 */ 882 cv_broadcast(&p->p_lwpcv); 883 mutex_exit(&p->p_smutex); 884 } 885 886 #ifdef MULTIPROCESSOR 887 /* 888 * In the unlikely event that the LWP is still on the CPU, 889 * then spin until it has switched away. We need to release 890 * all locks to avoid deadlock against interrupt handlers on 891 * the target CPU. 892 */ 893 if ((l->l_flag & LW_RUNNING) != 0 || l->l_cpu->ci_curlwp == l) { 894 int count; 895 (void)count; /* XXXgcc */ 896 KERNEL_UNLOCK_ALL(curlwp, &count); 897 while ((l->l_flag & LW_RUNNING) != 0 || 898 l->l_cpu->ci_curlwp == l) 899 SPINLOCK_BACKOFF_HOOK; 900 KERNEL_LOCK(count, curlwp); 901 } 902 #endif 903 904 /* 905 * Destroy the LWP's remaining signal information. 906 */ 907 ksiginfo_queue_init(&kq); 908 sigclear(&l->l_sigpend, NULL, &kq); 909 ksiginfo_queue_drain(&kq); 910 cv_destroy(&l->l_sigcv); 911 mutex_destroy(&l->l_swaplock); 912 913 /* 914 * Free the LWP's turnstile and the LWP structure itself unless the 915 * caller wants to recycle them. Also, free the scheduler specific data. 916 * 917 * We can't return turnstile0 to the pool (it didn't come from it), 918 * so if it comes up just drop it quietly and move on. 919 * 920 * We don't recycle the VM resources at this time. 921 */ 922 KERNEL_LOCK(1, curlwp); /* XXXSMP */ 923 924 sched_lwp_exit(l); 925 926 if (!recycle && l->l_ts != &turnstile0) 927 pool_cache_put(turnstile_cache, l->l_ts); 928 #ifndef __NO_CPU_LWP_FREE 929 cpu_lwp_free2(l); 930 #endif 931 uvm_lwp_exit(l); 932 KASSERT(SLIST_EMPTY(&l->l_pi_lenders)); 933 KASSERT(l->l_inheritedprio == -1); 934 if (!recycle) 935 pool_put(&lwp_pool, l); 936 KERNEL_UNLOCK_ONE(curlwp); /* XXXSMP */ 937 } 938 939 /* 940 * Pick a LWP to represent the process for those operations which 941 * want information about a "process" that is actually associated 942 * with a LWP. 943 * 944 * If 'locking' is false, no locking or lock checks are performed. 945 * This is intended for use by DDB. 946 * 947 * We don't bother locking the LWP here, since code that uses this 948 * interface is broken by design and an exact match is not required. 949 */ 950 struct lwp * 951 proc_representative_lwp(struct proc *p, int *nrlwps, int locking) 952 { 953 struct lwp *l, *onproc, *running, *sleeping, *stopped, *suspended; 954 struct lwp *signalled; 955 int cnt; 956 957 if (locking) { 958 KASSERT(mutex_owned(&p->p_smutex)); 959 } 960 961 /* Trivial case: only one LWP */ 962 if (p->p_nlwps == 1) { 963 l = LIST_FIRST(&p->p_lwps); 964 if (nrlwps) 965 *nrlwps = (l->l_stat == LSONPROC || l->l_stat == LSRUN); 966 return l; 967 } 968 969 cnt = 0; 970 switch (p->p_stat) { 971 case SSTOP: 972 case SACTIVE: 973 /* Pick the most live LWP */ 974 onproc = running = sleeping = stopped = suspended = NULL; 975 signalled = NULL; 976 LIST_FOREACH(l, &p->p_lwps, l_sibling) { 977 if ((l->l_flag & LW_IDLE) != 0) { 978 continue; 979 } 980 if (l->l_lid == p->p_sigctx.ps_lwp) 981 signalled = l; 982 switch (l->l_stat) { 983 case LSONPROC: 984 onproc = l; 985 cnt++; 986 break; 987 case LSRUN: 988 running = l; 989 cnt++; 990 break; 991 case LSSLEEP: 992 sleeping = l; 993 break; 994 case LSSTOP: 995 stopped = l; 996 break; 997 case LSSUSPENDED: 998 suspended = l; 999 break; 1000 } 1001 } 1002 if (nrlwps) 1003 *nrlwps = cnt; 1004 if (signalled) 1005 l = signalled; 1006 else if (onproc) 1007 l = onproc; 1008 else if (running) 1009 l = running; 1010 else if (sleeping) 1011 l = sleeping; 1012 else if (stopped) 1013 l = stopped; 1014 else if (suspended) 1015 l = suspended; 1016 else 1017 break; 1018 return l; 1019 #ifdef DIAGNOSTIC 1020 case SIDL: 1021 case SZOMB: 1022 case SDYING: 1023 case SDEAD: 1024 if (locking) 1025 mutex_exit(&p->p_smutex); 1026 /* We have more than one LWP and we're in SIDL? 1027 * How'd that happen? 1028 */ 1029 panic("Too many LWPs in idle/dying process %d (%s) stat = %d", 1030 p->p_pid, p->p_comm, p->p_stat); 1031 break; 1032 default: 1033 if (locking) 1034 mutex_exit(&p->p_smutex); 1035 panic("Process %d (%s) in unknown state %d", 1036 p->p_pid, p->p_comm, p->p_stat); 1037 #endif 1038 } 1039 1040 if (locking) 1041 mutex_exit(&p->p_smutex); 1042 panic("proc_representative_lwp: couldn't find a lwp for process" 1043 " %d (%s)", p->p_pid, p->p_comm); 1044 /* NOTREACHED */ 1045 return NULL; 1046 } 1047 1048 /* 1049 * Look up a live LWP within the speicifed process, and return it locked. 1050 * 1051 * Must be called with p->p_smutex held. 1052 */ 1053 struct lwp * 1054 lwp_find(struct proc *p, int id) 1055 { 1056 struct lwp *l; 1057 1058 KASSERT(mutex_owned(&p->p_smutex)); 1059 1060 LIST_FOREACH(l, &p->p_lwps, l_sibling) { 1061 if (l->l_lid == id) 1062 break; 1063 } 1064 1065 /* 1066 * No need to lock - all of these conditions will 1067 * be visible with the process level mutex held. 1068 */ 1069 if (l != NULL && (l->l_stat == LSIDL || l->l_stat == LSZOMB)) 1070 l = NULL; 1071 1072 return l; 1073 } 1074 1075 /* 1076 * Update an LWP's cached credentials to mirror the process' master copy. 1077 * 1078 * This happens early in the syscall path, on user trap, and on LWP 1079 * creation. A long-running LWP can also voluntarily choose to update 1080 * it's credentials by calling this routine. This may be called from 1081 * LWP_CACHE_CREDS(), which checks l->l_cred != p->p_cred beforehand. 1082 */ 1083 void 1084 lwp_update_creds(struct lwp *l) 1085 { 1086 kauth_cred_t oc; 1087 struct proc *p; 1088 1089 p = l->l_proc; 1090 oc = l->l_cred; 1091 1092 mutex_enter(&p->p_mutex); 1093 kauth_cred_hold(p->p_cred); 1094 l->l_cred = p->p_cred; 1095 mutex_exit(&p->p_mutex); 1096 if (oc != NULL) { 1097 KERNEL_LOCK(1, l); /* XXXSMP */ 1098 kauth_cred_free(oc); 1099 KERNEL_UNLOCK_ONE(l); /* XXXSMP */ 1100 } 1101 } 1102 1103 /* 1104 * Verify that an LWP is locked, and optionally verify that the lock matches 1105 * one we specify. 1106 */ 1107 int 1108 lwp_locked(struct lwp *l, kmutex_t *mtx) 1109 { 1110 kmutex_t *cur = l->l_mutex; 1111 1112 return mutex_owned(cur) && (mtx == cur || mtx == NULL); 1113 } 1114 1115 /* 1116 * Lock an LWP. 1117 */ 1118 void 1119 lwp_lock_retry(struct lwp *l, kmutex_t *old) 1120 { 1121 1122 /* 1123 * XXXgcc ignoring kmutex_t * volatile on i386 1124 * 1125 * gcc version 4.1.2 20061021 prerelease (NetBSD nb1 20061021) 1126 */ 1127 #if 1 1128 while (l->l_mutex != old) { 1129 #else 1130 for (;;) { 1131 #endif 1132 mutex_spin_exit(old); 1133 old = l->l_mutex; 1134 mutex_spin_enter(old); 1135 1136 /* 1137 * mutex_enter() will have posted a read barrier. Re-test 1138 * l->l_mutex. If it has changed, we need to try again. 1139 */ 1140 #if 1 1141 } 1142 #else 1143 } while (__predict_false(l->l_mutex != old)); 1144 #endif 1145 } 1146 1147 /* 1148 * Lend a new mutex to an LWP. The old mutex must be held. 1149 */ 1150 void 1151 lwp_setlock(struct lwp *l, kmutex_t *new) 1152 { 1153 1154 KASSERT(mutex_owned(l->l_mutex)); 1155 1156 mb_write(); 1157 l->l_mutex = new; 1158 } 1159 1160 /* 1161 * Lend a new mutex to an LWP, and release the old mutex. The old mutex 1162 * must be held. 1163 */ 1164 void 1165 lwp_unlock_to(struct lwp *l, kmutex_t *new) 1166 { 1167 kmutex_t *old; 1168 1169 KASSERT(mutex_owned(l->l_mutex)); 1170 1171 old = l->l_mutex; 1172 mb_write(); 1173 l->l_mutex = new; 1174 mutex_spin_exit(old); 1175 } 1176 1177 /* 1178 * Acquire a new mutex, and donate it to an LWP. The LWP must already be 1179 * locked. 1180 */ 1181 void 1182 lwp_relock(struct lwp *l, kmutex_t *new) 1183 { 1184 kmutex_t *old; 1185 1186 KASSERT(mutex_owned(l->l_mutex)); 1187 1188 old = l->l_mutex; 1189 if (old != new) { 1190 mutex_spin_enter(new); 1191 l->l_mutex = new; 1192 mutex_spin_exit(old); 1193 } 1194 } 1195 1196 int 1197 lwp_trylock(struct lwp *l) 1198 { 1199 kmutex_t *old; 1200 1201 for (;;) { 1202 if (!mutex_tryenter(old = l->l_mutex)) 1203 return 0; 1204 if (__predict_true(l->l_mutex == old)) 1205 return 1; 1206 mutex_spin_exit(old); 1207 } 1208 } 1209 1210 /* 1211 * Handle exceptions for mi_userret(). Called if a member of LW_USERRET is 1212 * set. 1213 */ 1214 void 1215 lwp_userret(struct lwp *l) 1216 { 1217 struct proc *p; 1218 void (*hook)(void); 1219 int sig; 1220 1221 p = l->l_proc; 1222 1223 #ifndef __HAVE_FAST_SOFTINTS 1224 /* Run pending soft interrupts. */ 1225 if (l->l_cpu->ci_data.cpu_softints != 0) 1226 softint_overlay(); 1227 #endif 1228 1229 /* 1230 * It should be safe to do this read unlocked on a multiprocessor 1231 * system.. 1232 */ 1233 while ((l->l_flag & LW_USERRET) != 0) { 1234 /* 1235 * Process pending signals first, unless the process 1236 * is dumping core or exiting, where we will instead 1237 * enter the L_WSUSPEND case below. 1238 */ 1239 if ((l->l_flag & (LW_PENDSIG | LW_WCORE | LW_WEXIT)) == 1240 LW_PENDSIG) { 1241 KERNEL_LOCK(1, l); /* XXXSMP pool_put() below */ 1242 mutex_enter(&p->p_smutex); 1243 while ((sig = issignal(l)) != 0) 1244 postsig(sig); 1245 mutex_exit(&p->p_smutex); 1246 KERNEL_UNLOCK_LAST(l); /* XXXSMP */ 1247 } 1248 1249 /* 1250 * Core-dump or suspend pending. 1251 * 1252 * In case of core dump, suspend ourselves, so that the 1253 * kernel stack and therefore the userland registers saved 1254 * in the trapframe are around for coredump() to write them 1255 * out. We issue a wakeup on p->p_lwpcv so that sigexit() 1256 * will write the core file out once all other LWPs are 1257 * suspended. 1258 */ 1259 if ((l->l_flag & LW_WSUSPEND) != 0) { 1260 mutex_enter(&p->p_smutex); 1261 p->p_nrlwps--; 1262 cv_broadcast(&p->p_lwpcv); 1263 lwp_lock(l); 1264 l->l_stat = LSSUSPENDED; 1265 mutex_exit(&p->p_smutex); 1266 mi_switch(l); 1267 } 1268 1269 /* Process is exiting. */ 1270 if ((l->l_flag & LW_WEXIT) != 0) { 1271 KERNEL_LOCK(1, l); 1272 lwp_exit(l); 1273 KASSERT(0); 1274 /* NOTREACHED */ 1275 } 1276 1277 /* Call userret hook; used by Linux emulation. */ 1278 if ((l->l_flag & LW_WUSERRET) != 0) { 1279 lwp_lock(l); 1280 l->l_flag &= ~LW_WUSERRET; 1281 lwp_unlock(l); 1282 hook = p->p_userret; 1283 p->p_userret = NULL; 1284 (*hook)(); 1285 } 1286 } 1287 } 1288 1289 /* 1290 * Force an LWP to enter the kernel, to take a trip through lwp_userret(). 1291 */ 1292 void 1293 lwp_need_userret(struct lwp *l) 1294 { 1295 KASSERT(lwp_locked(l, NULL)); 1296 1297 /* 1298 * Since the tests in lwp_userret() are done unlocked, make sure 1299 * that the condition will be seen before forcing the LWP to enter 1300 * kernel mode. 1301 */ 1302 mb_write(); 1303 cpu_signotify(l); 1304 } 1305 1306 /* 1307 * Add one reference to an LWP. This will prevent the LWP from 1308 * exiting, thus keep the lwp structure and PCB around to inspect. 1309 */ 1310 void 1311 lwp_addref(struct lwp *l) 1312 { 1313 1314 KASSERT(mutex_owned(&l->l_proc->p_smutex)); 1315 KASSERT(l->l_stat != LSZOMB); 1316 KASSERT(l->l_refcnt != 0); 1317 1318 l->l_refcnt++; 1319 } 1320 1321 /* 1322 * Remove one reference to an LWP. If this is the last reference, 1323 * then we must finalize the LWP's death. 1324 */ 1325 void 1326 lwp_delref(struct lwp *l) 1327 { 1328 struct proc *p = l->l_proc; 1329 1330 mutex_enter(&p->p_smutex); 1331 KASSERT(l->l_stat != LSZOMB); 1332 KASSERT(l->l_refcnt > 0); 1333 if (--l->l_refcnt == 0) 1334 cv_broadcast(&p->p_lwpcv); 1335 mutex_exit(&p->p_smutex); 1336 } 1337 1338 /* 1339 * Drain all references to the current LWP. 1340 */ 1341 void 1342 lwp_drainrefs(struct lwp *l) 1343 { 1344 struct proc *p = l->l_proc; 1345 1346 KASSERT(mutex_owned(&p->p_smutex)); 1347 KASSERT(l->l_refcnt != 0); 1348 1349 l->l_refcnt--; 1350 while (l->l_refcnt != 0) 1351 cv_wait(&p->p_lwpcv, &p->p_smutex); 1352 } 1353 1354 /* 1355 * lwp_specific_key_create -- 1356 * Create a key for subsystem lwp-specific data. 1357 */ 1358 int 1359 lwp_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor) 1360 { 1361 1362 return (specificdata_key_create(lwp_specificdata_domain, keyp, dtor)); 1363 } 1364 1365 /* 1366 * lwp_specific_key_delete -- 1367 * Delete a key for subsystem lwp-specific data. 1368 */ 1369 void 1370 lwp_specific_key_delete(specificdata_key_t key) 1371 { 1372 1373 specificdata_key_delete(lwp_specificdata_domain, key); 1374 } 1375 1376 /* 1377 * lwp_initspecific -- 1378 * Initialize an LWP's specificdata container. 1379 */ 1380 void 1381 lwp_initspecific(struct lwp *l) 1382 { 1383 int error; 1384 1385 error = specificdata_init(lwp_specificdata_domain, &l->l_specdataref); 1386 KASSERT(error == 0); 1387 } 1388 1389 /* 1390 * lwp_finispecific -- 1391 * Finalize an LWP's specificdata container. 1392 */ 1393 void 1394 lwp_finispecific(struct lwp *l) 1395 { 1396 1397 specificdata_fini(lwp_specificdata_domain, &l->l_specdataref); 1398 } 1399 1400 /* 1401 * lwp_getspecific -- 1402 * Return lwp-specific data corresponding to the specified key. 1403 * 1404 * Note: LWP specific data is NOT INTERLOCKED. An LWP should access 1405 * only its OWN SPECIFIC DATA. If it is necessary to access another 1406 * LWP's specifc data, care must be taken to ensure that doing so 1407 * would not cause internal data structure inconsistency (i.e. caller 1408 * can guarantee that the target LWP is not inside an lwp_getspecific() 1409 * or lwp_setspecific() call). 1410 */ 1411 void * 1412 lwp_getspecific(specificdata_key_t key) 1413 { 1414 1415 return (specificdata_getspecific_unlocked(lwp_specificdata_domain, 1416 &curlwp->l_specdataref, key)); 1417 } 1418 1419 void * 1420 _lwp_getspecific_by_lwp(struct lwp *l, specificdata_key_t key) 1421 { 1422 1423 return (specificdata_getspecific_unlocked(lwp_specificdata_domain, 1424 &l->l_specdataref, key)); 1425 } 1426 1427 /* 1428 * lwp_setspecific -- 1429 * Set lwp-specific data corresponding to the specified key. 1430 */ 1431 void 1432 lwp_setspecific(specificdata_key_t key, void *data) 1433 { 1434 1435 specificdata_setspecific(lwp_specificdata_domain, 1436 &curlwp->l_specdataref, key, data); 1437 } 1438