1 /* $NetBSD: kern_lwp.c,v 1.80 2007/11/13 11:38:35 skrll Exp $ */ 2 3 /*- 4 * Copyright (c) 2001, 2006, 2007 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Nathan J. Williams, and Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /* 40 * Overview 41 * 42 * Lightweight processes (LWPs) are the basic unit or thread of 43 * execution within the kernel. The core state of an LWP is described 44 * by "struct lwp", also known as lwp_t. 45 * 46 * Each LWP is contained within a process (described by "struct proc"), 47 * Every process contains at least one LWP, but may contain more. The 48 * process describes attributes shared among all of its LWPs such as a 49 * private address space, global execution state (stopped, active, 50 * zombie, ...), signal disposition and so on. On a multiprocessor 51 * machine, multiple LWPs be executing concurrently in the kernel. 52 * 53 * Execution states 54 * 55 * At any given time, an LWP has overall state that is described by 56 * lwp::l_stat. The states are broken into two sets below. The first 57 * set is guaranteed to represent the absolute, current state of the 58 * LWP: 59 * 60 * LSONPROC 61 * 62 * On processor: the LWP is executing on a CPU, either in the 63 * kernel or in user space. 64 * 65 * LSRUN 66 * 67 * Runnable: the LWP is parked on a run queue, and may soon be 68 * chosen to run by a idle processor, or by a processor that 69 * has been asked to preempt a currently runnning but lower 70 * priority LWP. If the LWP is not swapped in (L_INMEM == 0) 71 * then the LWP is not on a run queue, but may be soon. 72 * 73 * LSIDL 74 * 75 * Idle: the LWP has been created but has not yet executed, 76 * or it has ceased executing a unit of work and is waiting 77 * to be started again. 78 * 79 * LSSUSPENDED: 80 * 81 * Suspended: the LWP has had its execution suspended by 82 * another LWP in the same process using the _lwp_suspend() 83 * system call. User-level LWPs also enter the suspended 84 * state when the system is shutting down. 85 * 86 * The second set represent a "statement of intent" on behalf of the 87 * LWP. The LWP may in fact be executing on a processor, may be 88 * sleeping or idle. It is expected to take the necessary action to 89 * stop executing or become "running" again within a short timeframe. 90 * The LW_RUNNING flag in lwp::l_flag indicates that an LWP is running. 91 * Importantly, in indicates that its state is tied to a CPU. 92 * 93 * LSZOMB: 94 * 95 * Dead or dying: the LWP has released most of its resources 96 * and is a) about to switch away into oblivion b) has already 97 * switched away. When it switches away, its few remaining 98 * resources can be collected. 99 * 100 * LSSLEEP: 101 * 102 * Sleeping: the LWP has entered itself onto a sleep queue, and 103 * has switched away or will switch away shortly to allow other 104 * LWPs to run on the CPU. 105 * 106 * LSSTOP: 107 * 108 * Stopped: the LWP has been stopped as a result of a job 109 * control signal, or as a result of the ptrace() interface. 110 * 111 * Stopped LWPs may run briefly within the kernel to handle 112 * signals that they receive, but will not return to user space 113 * until their process' state is changed away from stopped. 114 * 115 * Single LWPs within a process can not be set stopped 116 * selectively: all actions that can stop or continue LWPs 117 * occur at the process level. 118 * 119 * State transitions 120 * 121 * Note that the LSSTOP state may only be set when returning to 122 * user space in userret(), or when sleeping interruptably. The 123 * LSSUSPENDED state may only be set in userret(). Before setting 124 * those states, we try to ensure that the LWPs will release all 125 * locks that they hold, and at a minimum try to ensure that the 126 * LWP can be set runnable again by a signal. 127 * 128 * LWPs may transition states in the following ways: 129 * 130 * RUN -------> ONPROC ONPROC -----> RUN 131 * > STOPPED > SLEEP 132 * > SUSPENDED > STOPPED 133 * > SUSPENDED 134 * > ZOMB 135 * 136 * STOPPED ---> RUN SUSPENDED --> RUN 137 * > SLEEP > SLEEP 138 * 139 * SLEEP -----> ONPROC IDL --------> RUN 140 * > RUN > SUSPENDED 141 * > STOPPED > STOPPED 142 * > SUSPENDED 143 * 144 * Other state transitions are possible with kernel threads (eg 145 * ONPROC -> IDL), but only happen under tightly controlled 146 * circumstances the side effects are understood. 147 * 148 * Locking 149 * 150 * The majority of fields in 'struct lwp' are covered by a single, 151 * general spin lock pointed to by lwp::l_mutex. The locks covering 152 * each field are documented in sys/lwp.h. 153 * 154 * State transitions must be made with the LWP's general lock held, 155 * and may cause the LWP's lock pointer to change. Manipulation of 156 * the general lock is not performed directly, but through calls to 157 * lwp_lock(), lwp_relock() and similar. 158 * 159 * States and their associated locks: 160 * 161 * LSONPROC, LSZOMB: 162 * 163 * Always covered by spc_lwplock, which protects running LWPs. 164 * This is a per-CPU lock. 165 * 166 * LSIDL, LSRUN: 167 * 168 * Always covered by spc_mutex, which protects the run queues. 169 * This may be a per-CPU lock, depending on the scheduler. 170 * 171 * LSSLEEP: 172 * 173 * Covered by a lock associated with the sleep queue that the 174 * LWP resides on, indirectly referenced by l_sleepq->sq_mutex. 175 * 176 * LSSTOP, LSSUSPENDED: 177 * 178 * If the LWP was previously sleeping (l_wchan != NULL), then 179 * l_mutex references the sleep queue lock. If the LWP was 180 * runnable or on the CPU when halted, or has been removed from 181 * the sleep queue since halted, then the lock is spc_lwplock. 182 * 183 * The lock order is as follows: 184 * 185 * spc::spc_lwplock -> 186 * sleepq_t::sq_mutex -> 187 * tschain_t::tc_mutex -> 188 * spc::spc_mutex 189 * 190 * Each process has an scheduler state lock (proc::p_smutex), and a 191 * number of counters on LWPs and their states: p_nzlwps, p_nrlwps, and 192 * so on. When an LWP is to be entered into or removed from one of the 193 * following states, p_mutex must be held and the process wide counters 194 * adjusted: 195 * 196 * LSIDL, LSZOMB, LSSTOP, LSSUSPENDED 197 * 198 * Note that an LWP is considered running or likely to run soon if in 199 * one of the following states. This affects the value of p_nrlwps: 200 * 201 * LSRUN, LSONPROC, LSSLEEP 202 * 203 * p_smutex does not need to be held when transitioning among these 204 * three states. 205 */ 206 207 #include <sys/cdefs.h> 208 __KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.80 2007/11/13 11:38:35 skrll Exp $"); 209 210 #include "opt_multiprocessor.h" 211 #include "opt_lockdebug.h" 212 213 #define _LWP_API_PRIVATE 214 215 #include <sys/param.h> 216 #include <sys/systm.h> 217 #include <sys/cpu.h> 218 #include <sys/pool.h> 219 #include <sys/proc.h> 220 #include <sys/syscallargs.h> 221 #include <sys/syscall_stats.h> 222 #include <sys/kauth.h> 223 #include <sys/sleepq.h> 224 #include <sys/lockdebug.h> 225 #include <sys/kmem.h> 226 #include <sys/intr.h> 227 #include <sys/lwpctl.h> 228 229 #include <uvm/uvm_extern.h> 230 #include <uvm/uvm_object.h> 231 232 struct lwplist alllwp = LIST_HEAD_INITIALIZER(alllwp); 233 234 POOL_INIT(lwp_pool, sizeof(struct lwp), MIN_LWP_ALIGNMENT, 0, 0, "lwppl", 235 &pool_allocator_nointr, IPL_NONE); 236 POOL_INIT(lwp_uc_pool, sizeof(ucontext_t), 0, 0, 0, "lwpucpl", 237 &pool_allocator_nointr, IPL_NONE); 238 239 static specificdata_domain_t lwp_specificdata_domain; 240 241 void 242 lwpinit(void) 243 { 244 245 lwp_specificdata_domain = specificdata_domain_create(); 246 KASSERT(lwp_specificdata_domain != NULL); 247 lwp_sys_init(); 248 } 249 250 /* 251 * Set an suspended. 252 * 253 * Must be called with p_smutex held, and the LWP locked. Will unlock the 254 * LWP before return. 255 */ 256 int 257 lwp_suspend(struct lwp *curl, struct lwp *t) 258 { 259 int error; 260 261 KASSERT(mutex_owned(&t->l_proc->p_smutex)); 262 KASSERT(lwp_locked(t, NULL)); 263 264 KASSERT(curl != t || curl->l_stat == LSONPROC); 265 266 /* 267 * If the current LWP has been told to exit, we must not suspend anyone 268 * else or deadlock could occur. We won't return to userspace. 269 */ 270 if ((curl->l_stat & (LW_WEXIT | LW_WCORE)) != 0) { 271 lwp_unlock(t); 272 return (EDEADLK); 273 } 274 275 error = 0; 276 277 switch (t->l_stat) { 278 case LSRUN: 279 case LSONPROC: 280 t->l_flag |= LW_WSUSPEND; 281 lwp_need_userret(t); 282 lwp_unlock(t); 283 break; 284 285 case LSSLEEP: 286 t->l_flag |= LW_WSUSPEND; 287 288 /* 289 * Kick the LWP and try to get it to the kernel boundary 290 * so that it will release any locks that it holds. 291 * setrunnable() will release the lock. 292 */ 293 if ((t->l_flag & LW_SINTR) != 0) 294 setrunnable(t); 295 else 296 lwp_unlock(t); 297 break; 298 299 case LSSUSPENDED: 300 lwp_unlock(t); 301 break; 302 303 case LSSTOP: 304 t->l_flag |= LW_WSUSPEND; 305 setrunnable(t); 306 break; 307 308 case LSIDL: 309 case LSZOMB: 310 error = EINTR; /* It's what Solaris does..... */ 311 lwp_unlock(t); 312 break; 313 } 314 315 return (error); 316 } 317 318 /* 319 * Restart a suspended LWP. 320 * 321 * Must be called with p_smutex held, and the LWP locked. Will unlock the 322 * LWP before return. 323 */ 324 void 325 lwp_continue(struct lwp *l) 326 { 327 328 KASSERT(mutex_owned(&l->l_proc->p_smutex)); 329 KASSERT(lwp_locked(l, NULL)); 330 331 /* If rebooting or not suspended, then just bail out. */ 332 if ((l->l_flag & LW_WREBOOT) != 0) { 333 lwp_unlock(l); 334 return; 335 } 336 337 l->l_flag &= ~LW_WSUSPEND; 338 339 if (l->l_stat != LSSUSPENDED) { 340 lwp_unlock(l); 341 return; 342 } 343 344 /* setrunnable() will release the lock. */ 345 setrunnable(l); 346 } 347 348 /* 349 * Wait for an LWP within the current process to exit. If 'lid' is 350 * non-zero, we are waiting for a specific LWP. 351 * 352 * Must be called with p->p_smutex held. 353 */ 354 int 355 lwp_wait1(struct lwp *l, lwpid_t lid, lwpid_t *departed, int flags) 356 { 357 struct proc *p = l->l_proc; 358 struct lwp *l2; 359 int nfound, error; 360 lwpid_t curlid; 361 bool exiting; 362 363 KASSERT(mutex_owned(&p->p_smutex)); 364 365 p->p_nlwpwait++; 366 l->l_waitingfor = lid; 367 curlid = l->l_lid; 368 exiting = ((flags & LWPWAIT_EXITCONTROL) != 0); 369 370 for (;;) { 371 /* 372 * Avoid a race between exit1() and sigexit(): if the 373 * process is dumping core, then we need to bail out: call 374 * into lwp_userret() where we will be suspended until the 375 * deed is done. 376 */ 377 if ((p->p_sflag & PS_WCORE) != 0) { 378 mutex_exit(&p->p_smutex); 379 lwp_userret(l); 380 #ifdef DIAGNOSTIC 381 panic("lwp_wait1"); 382 #endif 383 /* NOTREACHED */ 384 } 385 386 /* 387 * First off, drain any detached LWP that is waiting to be 388 * reaped. 389 */ 390 while ((l2 = p->p_zomblwp) != NULL) { 391 p->p_zomblwp = NULL; 392 lwp_free(l2, false, false);/* releases proc mutex */ 393 mutex_enter(&p->p_smutex); 394 } 395 396 /* 397 * Now look for an LWP to collect. If the whole process is 398 * exiting, count detached LWPs as eligible to be collected, 399 * but don't drain them here. 400 */ 401 nfound = 0; 402 error = 0; 403 LIST_FOREACH(l2, &p->p_lwps, l_sibling) { 404 /* 405 * If a specific wait and the target is waiting on 406 * us, then avoid deadlock. This also traps LWPs 407 * that try to wait on themselves. 408 * 409 * Note that this does not handle more complicated 410 * cycles, like: t1 -> t2 -> t3 -> t1. The process 411 * can still be killed so it is not a major problem. 412 */ 413 if (l2->l_lid == lid && l2->l_waitingfor == curlid) { 414 error = EDEADLK; 415 break; 416 } 417 if (l2 == l) 418 continue; 419 if ((l2->l_prflag & LPR_DETACHED) != 0) { 420 nfound += exiting; 421 continue; 422 } 423 if (lid != 0) { 424 if (l2->l_lid != lid) 425 continue; 426 /* 427 * Mark this LWP as the first waiter, if there 428 * is no other. 429 */ 430 if (l2->l_waiter == 0) 431 l2->l_waiter = curlid; 432 } else if (l2->l_waiter != 0) { 433 /* 434 * It already has a waiter - so don't 435 * collect it. If the waiter doesn't 436 * grab it we'll get another chance 437 * later. 438 */ 439 nfound++; 440 continue; 441 } 442 nfound++; 443 444 /* No need to lock the LWP in order to see LSZOMB. */ 445 if (l2->l_stat != LSZOMB) 446 continue; 447 448 /* 449 * We're no longer waiting. Reset the "first waiter" 450 * pointer on the target, in case it was us. 451 */ 452 l->l_waitingfor = 0; 453 l2->l_waiter = 0; 454 p->p_nlwpwait--; 455 if (departed) 456 *departed = l2->l_lid; 457 sched_lwp_collect(l2); 458 459 /* lwp_free() releases the proc lock. */ 460 lwp_free(l2, false, false); 461 mutex_enter(&p->p_smutex); 462 return 0; 463 } 464 465 if (error != 0) 466 break; 467 if (nfound == 0) { 468 error = ESRCH; 469 break; 470 } 471 472 /* 473 * The kernel is careful to ensure that it can not deadlock 474 * when exiting - just keep waiting. 475 */ 476 if (exiting) { 477 KASSERT(p->p_nlwps > 1); 478 cv_wait(&p->p_lwpcv, &p->p_smutex); 479 continue; 480 } 481 482 /* 483 * If all other LWPs are waiting for exits or suspends 484 * and the supply of zombies and potential zombies is 485 * exhausted, then we are about to deadlock. 486 * 487 * If the process is exiting (and this LWP is not the one 488 * that is coordinating the exit) then bail out now. 489 */ 490 if ((p->p_sflag & PS_WEXIT) != 0 || 491 p->p_nrlwps + p->p_nzlwps - p->p_ndlwps <= p->p_nlwpwait) { 492 error = EDEADLK; 493 break; 494 } 495 496 /* 497 * Sit around and wait for something to happen. We'll be 498 * awoken if any of the conditions examined change: if an 499 * LWP exits, is collected, or is detached. 500 */ 501 if ((error = cv_wait_sig(&p->p_lwpcv, &p->p_smutex)) != 0) 502 break; 503 } 504 505 /* 506 * We didn't find any LWPs to collect, we may have received a 507 * signal, or some other condition has caused us to bail out. 508 * 509 * If waiting on a specific LWP, clear the waiters marker: some 510 * other LWP may want it. Then, kick all the remaining waiters 511 * so that they can re-check for zombies and for deadlock. 512 */ 513 if (lid != 0) { 514 LIST_FOREACH(l2, &p->p_lwps, l_sibling) { 515 if (l2->l_lid == lid) { 516 if (l2->l_waiter == curlid) 517 l2->l_waiter = 0; 518 break; 519 } 520 } 521 } 522 p->p_nlwpwait--; 523 l->l_waitingfor = 0; 524 cv_broadcast(&p->p_lwpcv); 525 526 return error; 527 } 528 529 /* 530 * Create a new LWP within process 'p2', using LWP 'l1' as a template. 531 * The new LWP is created in state LSIDL and must be set running, 532 * suspended, or stopped by the caller. 533 */ 534 int 535 lwp_create(lwp_t *l1, proc_t *p2, vaddr_t uaddr, bool inmem, int flags, 536 void *stack, size_t stacksize, void (*func)(void *), void *arg, 537 lwp_t **rnewlwpp, int sclass) 538 { 539 struct lwp *l2, *isfree; 540 turnstile_t *ts; 541 542 /* 543 * First off, reap any detached LWP waiting to be collected. 544 * We can re-use its LWP structure and turnstile. 545 */ 546 isfree = NULL; 547 if (p2->p_zomblwp != NULL) { 548 mutex_enter(&p2->p_smutex); 549 if ((isfree = p2->p_zomblwp) != NULL) { 550 p2->p_zomblwp = NULL; 551 lwp_free(isfree, true, false);/* releases proc mutex */ 552 } else 553 mutex_exit(&p2->p_smutex); 554 } 555 if (isfree == NULL) { 556 l2 = pool_get(&lwp_pool, PR_WAITOK); 557 memset(l2, 0, sizeof(*l2)); 558 l2->l_ts = pool_cache_get(turnstile_cache, PR_WAITOK); 559 SLIST_INIT(&l2->l_pi_lenders); 560 } else { 561 l2 = isfree; 562 ts = l2->l_ts; 563 KASSERT(l2->l_inheritedprio == -1); 564 KASSERT(SLIST_EMPTY(&l2->l_pi_lenders)); 565 memset(l2, 0, sizeof(*l2)); 566 l2->l_ts = ts; 567 } 568 569 l2->l_stat = LSIDL; 570 l2->l_proc = p2; 571 l2->l_refcnt = 1; 572 l2->l_class = sclass; 573 l2->l_kpriority = l1->l_kpriority; 574 l2->l_priority = l1->l_priority; 575 l2->l_inheritedprio = -1; 576 l2->l_mutex = l1->l_cpu->ci_schedstate.spc_mutex; 577 l2->l_cpu = l1->l_cpu; 578 l2->l_flag = inmem ? LW_INMEM : 0; 579 580 if (p2->p_flag & PK_SYSTEM) { 581 /* 582 * Mark it as a system process and not a candidate for 583 * swapping. 584 */ 585 l2->l_flag |= LW_SYSTEM; 586 } else { 587 /* Look for a CPU to start */ 588 l2->l_cpu = sched_takecpu(l2); 589 l2->l_mutex = l2->l_cpu->ci_schedstate.spc_mutex; 590 } 591 592 lwp_initspecific(l2); 593 sched_lwp_fork(l1, l2); 594 lwp_update_creds(l2); 595 callout_init(&l2->l_timeout_ch, CALLOUT_MPSAFE); 596 callout_setfunc(&l2->l_timeout_ch, sleepq_timeout, l2); 597 mutex_init(&l2->l_swaplock, MUTEX_DEFAULT, IPL_NONE); 598 cv_init(&l2->l_sigcv, "sigwait"); 599 l2->l_syncobj = &sched_syncobj; 600 601 if (rnewlwpp != NULL) 602 *rnewlwpp = l2; 603 604 l2->l_addr = UAREA_TO_USER(uaddr); 605 uvm_lwp_fork(l1, l2, stack, stacksize, func, 606 (arg != NULL) ? arg : l2); 607 608 mutex_enter(&p2->p_smutex); 609 610 if ((flags & LWP_DETACHED) != 0) { 611 l2->l_prflag = LPR_DETACHED; 612 p2->p_ndlwps++; 613 } else 614 l2->l_prflag = 0; 615 616 l2->l_sigmask = l1->l_sigmask; 617 CIRCLEQ_INIT(&l2->l_sigpend.sp_info); 618 sigemptyset(&l2->l_sigpend.sp_set); 619 620 p2->p_nlwpid++; 621 if (p2->p_nlwpid == 0) 622 p2->p_nlwpid++; 623 l2->l_lid = p2->p_nlwpid; 624 LIST_INSERT_HEAD(&p2->p_lwps, l2, l_sibling); 625 p2->p_nlwps++; 626 627 mutex_exit(&p2->p_smutex); 628 629 mutex_enter(&proclist_lock); 630 mutex_enter(&proclist_mutex); 631 LIST_INSERT_HEAD(&alllwp, l2, l_list); 632 mutex_exit(&proclist_mutex); 633 mutex_exit(&proclist_lock); 634 635 SYSCALL_TIME_LWP_INIT(l2); 636 637 if (p2->p_emul->e_lwp_fork) 638 (*p2->p_emul->e_lwp_fork)(l1, l2); 639 640 return (0); 641 } 642 643 /* 644 * Called by MD code when a new LWP begins execution. Must be called 645 * with the previous LWP locked (so at splsched), or if there is no 646 * previous LWP, at splsched. 647 */ 648 void 649 lwp_startup(struct lwp *prev, struct lwp *new) 650 { 651 652 if (prev != NULL) { 653 lwp_unlock(prev); 654 } 655 spl0(); 656 pmap_activate(new); 657 LOCKDEBUG_BARRIER(NULL, 0); 658 if ((new->l_pflag & LP_MPSAFE) == 0) { 659 KERNEL_LOCK(1, new); 660 } 661 } 662 663 /* 664 * Exit an LWP. 665 */ 666 void 667 lwp_exit(struct lwp *l) 668 { 669 struct proc *p = l->l_proc; 670 struct lwp *l2; 671 bool current; 672 673 current = (l == curlwp); 674 675 KASSERT(current || l->l_stat == LSIDL); 676 677 /* 678 * Verify that we hold no locks other than the kernel lock. 679 */ 680 #ifdef MULTIPROCESSOR 681 LOCKDEBUG_BARRIER(&kernel_lock, 0); 682 #else 683 LOCKDEBUG_BARRIER(NULL, 0); 684 #endif 685 686 /* 687 * If we are the last live LWP in a process, we need to exit the 688 * entire process. We do so with an exit status of zero, because 689 * it's a "controlled" exit, and because that's what Solaris does. 690 * 691 * We are not quite a zombie yet, but for accounting purposes we 692 * must increment the count of zombies here. 693 * 694 * Note: the last LWP's specificdata will be deleted here. 695 */ 696 mutex_enter(&p->p_smutex); 697 if (p->p_nlwps - p->p_nzlwps == 1) { 698 KASSERT(current == true); 699 exit1(l, 0); 700 /* NOTREACHED */ 701 } 702 p->p_nzlwps++; 703 mutex_exit(&p->p_smutex); 704 705 if (p->p_emul->e_lwp_exit) 706 (*p->p_emul->e_lwp_exit)(l); 707 708 /* Delete the specificdata while it's still safe to sleep. */ 709 specificdata_fini(lwp_specificdata_domain, &l->l_specdataref); 710 711 /* 712 * Release our cached credentials. 713 */ 714 kauth_cred_free(l->l_cred); 715 callout_destroy(&l->l_timeout_ch); 716 717 /* 718 * While we can still block, mark the LWP as unswappable to 719 * prevent conflicts with the with the swapper. 720 */ 721 if (current) 722 uvm_lwp_hold(l); 723 724 /* 725 * Remove the LWP from the global list. 726 */ 727 mutex_enter(&proclist_lock); 728 mutex_enter(&proclist_mutex); 729 LIST_REMOVE(l, l_list); 730 mutex_exit(&proclist_mutex); 731 mutex_exit(&proclist_lock); 732 733 /* 734 * Get rid of all references to the LWP that others (e.g. procfs) 735 * may have, and mark the LWP as a zombie. If the LWP is detached, 736 * mark it waiting for collection in the proc structure. Note that 737 * before we can do that, we need to free any other dead, deatched 738 * LWP waiting to meet its maker. 739 * 740 * XXXSMP disable preemption. 741 */ 742 mutex_enter(&p->p_smutex); 743 lwp_drainrefs(l); 744 745 if ((l->l_prflag & LPR_DETACHED) != 0) { 746 while ((l2 = p->p_zomblwp) != NULL) { 747 p->p_zomblwp = NULL; 748 lwp_free(l2, false, false);/* releases proc mutex */ 749 mutex_enter(&p->p_smutex); 750 l->l_refcnt++; 751 lwp_drainrefs(l); 752 } 753 p->p_zomblwp = l; 754 } 755 756 /* 757 * If we find a pending signal for the process and we have been 758 * asked to check for signals, then we loose: arrange to have 759 * all other LWPs in the process check for signals. 760 */ 761 if ((l->l_flag & LW_PENDSIG) != 0 && 762 firstsig(&p->p_sigpend.sp_set) != 0) { 763 LIST_FOREACH(l2, &p->p_lwps, l_sibling) { 764 lwp_lock(l2); 765 l2->l_flag |= LW_PENDSIG; 766 lwp_unlock(l2); 767 } 768 } 769 770 lwp_lock(l); 771 l->l_stat = LSZOMB; 772 lwp_unlock(l); 773 p->p_nrlwps--; 774 cv_broadcast(&p->p_lwpcv); 775 if (l->l_lwpctl != NULL) 776 l->l_lwpctl->lc_curcpu = LWPCTL_CPU_EXITED; 777 mutex_exit(&p->p_smutex); 778 779 /* 780 * We can no longer block. At this point, lwp_free() may already 781 * be gunning for us. On a multi-CPU system, we may be off p_lwps. 782 * 783 * Free MD LWP resources. 784 */ 785 #ifndef __NO_CPU_LWP_FREE 786 cpu_lwp_free(l, 0); 787 #endif 788 789 if (current) { 790 pmap_deactivate(l); 791 792 /* 793 * Release the kernel lock, and switch away into 794 * oblivion. 795 */ 796 #ifdef notyet 797 /* XXXSMP hold in lwp_userret() */ 798 KERNEL_UNLOCK_LAST(l); 799 #else 800 KERNEL_UNLOCK_ALL(l, NULL); 801 #endif 802 lwp_exit_switchaway(l); 803 } 804 } 805 806 void 807 lwp_exit_switchaway(struct lwp *l) 808 { 809 struct cpu_info *ci; 810 struct lwp *idlelwp; 811 812 /* Unlocked, but is for statistics only. */ 813 uvmexp.swtch++; 814 815 (void)splsched(); 816 l->l_flag &= ~LW_RUNNING; 817 ci = curcpu(); 818 idlelwp = ci->ci_data.cpu_idlelwp; 819 idlelwp->l_stat = LSONPROC; 820 821 /* 822 * cpu_onproc must be updated with the CPU locked, as 823 * aston() may try to set a AST pending on the LWP (and 824 * it does so with the CPU locked). Otherwise, the LWP 825 * may be destroyed before the AST can be set, leading 826 * to a user-after-free. 827 */ 828 spc_lock(ci); 829 ci->ci_data.cpu_onproc = idlelwp; 830 spc_unlock(ci); 831 cpu_switchto(NULL, idlelwp, false); 832 } 833 834 /* 835 * Free a dead LWP's remaining resources. 836 * 837 * XXXLWP limits. 838 */ 839 void 840 lwp_free(struct lwp *l, bool recycle, bool last) 841 { 842 struct proc *p = l->l_proc; 843 ksiginfoq_t kq; 844 845 /* 846 * If this was not the last LWP in the process, then adjust 847 * counters and unlock. 848 */ 849 if (!last) { 850 /* 851 * Add the LWP's run time to the process' base value. 852 * This needs to co-incide with coming off p_lwps. 853 */ 854 timeradd(&l->l_rtime, &p->p_rtime, &p->p_rtime); 855 p->p_pctcpu += l->l_pctcpu; 856 LIST_REMOVE(l, l_sibling); 857 p->p_nlwps--; 858 p->p_nzlwps--; 859 if ((l->l_prflag & LPR_DETACHED) != 0) 860 p->p_ndlwps--; 861 862 /* 863 * Have any LWPs sleeping in lwp_wait() recheck for 864 * deadlock. 865 */ 866 cv_broadcast(&p->p_lwpcv); 867 mutex_exit(&p->p_smutex); 868 } 869 870 #ifdef MULTIPROCESSOR 871 /* 872 * In the unlikely event that the LWP is still on the CPU, 873 * then spin until it has switched away. We need to release 874 * all locks to avoid deadlock against interrupt handlers on 875 * the target CPU. 876 */ 877 if ((l->l_flag & LW_RUNNING) != 0 || l->l_cpu->ci_curlwp == l) { 878 int count; 879 (void)count; /* XXXgcc */ 880 KERNEL_UNLOCK_ALL(curlwp, &count); 881 while ((l->l_flag & LW_RUNNING) != 0 || 882 l->l_cpu->ci_curlwp == l) 883 SPINLOCK_BACKOFF_HOOK; 884 KERNEL_LOCK(count, curlwp); 885 } 886 #endif 887 888 /* 889 * Destroy the LWP's remaining signal information. 890 */ 891 ksiginfo_queue_init(&kq); 892 sigclear(&l->l_sigpend, NULL, &kq); 893 ksiginfo_queue_drain(&kq); 894 cv_destroy(&l->l_sigcv); 895 mutex_destroy(&l->l_swaplock); 896 897 /* 898 * Free the LWP's turnstile and the LWP structure itself unless the 899 * caller wants to recycle them. Also, free the scheduler specific data. 900 * 901 * We can't return turnstile0 to the pool (it didn't come from it), 902 * so if it comes up just drop it quietly and move on. 903 * 904 * We don't recycle the VM resources at this time. 905 */ 906 KERNEL_LOCK(1, curlwp); /* XXXSMP */ 907 908 if (l->l_lwpctl != NULL) 909 lwp_ctl_free(l); 910 sched_lwp_exit(l); 911 912 if (!recycle && l->l_ts != &turnstile0) 913 pool_cache_put(turnstile_cache, l->l_ts); 914 #ifndef __NO_CPU_LWP_FREE 915 cpu_lwp_free2(l); 916 #endif 917 uvm_lwp_exit(l); 918 KASSERT(SLIST_EMPTY(&l->l_pi_lenders)); 919 KASSERT(l->l_inheritedprio == -1); 920 if (!recycle) 921 pool_put(&lwp_pool, l); 922 KERNEL_UNLOCK_ONE(curlwp); /* XXXSMP */ 923 } 924 925 /* 926 * Pick a LWP to represent the process for those operations which 927 * want information about a "process" that is actually associated 928 * with a LWP. 929 * 930 * If 'locking' is false, no locking or lock checks are performed. 931 * This is intended for use by DDB. 932 * 933 * We don't bother locking the LWP here, since code that uses this 934 * interface is broken by design and an exact match is not required. 935 */ 936 struct lwp * 937 proc_representative_lwp(struct proc *p, int *nrlwps, int locking) 938 { 939 struct lwp *l, *onproc, *running, *sleeping, *stopped, *suspended; 940 struct lwp *signalled; 941 int cnt; 942 943 if (locking) { 944 KASSERT(mutex_owned(&p->p_smutex)); 945 } 946 947 /* Trivial case: only one LWP */ 948 if (p->p_nlwps == 1) { 949 l = LIST_FIRST(&p->p_lwps); 950 if (nrlwps) 951 *nrlwps = (l->l_stat == LSONPROC || l->l_stat == LSRUN); 952 return l; 953 } 954 955 cnt = 0; 956 switch (p->p_stat) { 957 case SSTOP: 958 case SACTIVE: 959 /* Pick the most live LWP */ 960 onproc = running = sleeping = stopped = suspended = NULL; 961 signalled = NULL; 962 LIST_FOREACH(l, &p->p_lwps, l_sibling) { 963 if ((l->l_flag & LW_IDLE) != 0) { 964 continue; 965 } 966 if (l->l_lid == p->p_sigctx.ps_lwp) 967 signalled = l; 968 switch (l->l_stat) { 969 case LSONPROC: 970 onproc = l; 971 cnt++; 972 break; 973 case LSRUN: 974 running = l; 975 cnt++; 976 break; 977 case LSSLEEP: 978 sleeping = l; 979 break; 980 case LSSTOP: 981 stopped = l; 982 break; 983 case LSSUSPENDED: 984 suspended = l; 985 break; 986 } 987 } 988 if (nrlwps) 989 *nrlwps = cnt; 990 if (signalled) 991 l = signalled; 992 else if (onproc) 993 l = onproc; 994 else if (running) 995 l = running; 996 else if (sleeping) 997 l = sleeping; 998 else if (stopped) 999 l = stopped; 1000 else if (suspended) 1001 l = suspended; 1002 else 1003 break; 1004 return l; 1005 #ifdef DIAGNOSTIC 1006 case SIDL: 1007 case SZOMB: 1008 case SDYING: 1009 case SDEAD: 1010 if (locking) 1011 mutex_exit(&p->p_smutex); 1012 /* We have more than one LWP and we're in SIDL? 1013 * How'd that happen? 1014 */ 1015 panic("Too many LWPs in idle/dying process %d (%s) stat = %d", 1016 p->p_pid, p->p_comm, p->p_stat); 1017 break; 1018 default: 1019 if (locking) 1020 mutex_exit(&p->p_smutex); 1021 panic("Process %d (%s) in unknown state %d", 1022 p->p_pid, p->p_comm, p->p_stat); 1023 #endif 1024 } 1025 1026 if (locking) 1027 mutex_exit(&p->p_smutex); 1028 panic("proc_representative_lwp: couldn't find a lwp for process" 1029 " %d (%s)", p->p_pid, p->p_comm); 1030 /* NOTREACHED */ 1031 return NULL; 1032 } 1033 1034 /* 1035 * Look up a live LWP within the speicifed process, and return it locked. 1036 * 1037 * Must be called with p->p_smutex held. 1038 */ 1039 struct lwp * 1040 lwp_find(struct proc *p, int id) 1041 { 1042 struct lwp *l; 1043 1044 KASSERT(mutex_owned(&p->p_smutex)); 1045 1046 LIST_FOREACH(l, &p->p_lwps, l_sibling) { 1047 if (l->l_lid == id) 1048 break; 1049 } 1050 1051 /* 1052 * No need to lock - all of these conditions will 1053 * be visible with the process level mutex held. 1054 */ 1055 if (l != NULL && (l->l_stat == LSIDL || l->l_stat == LSZOMB)) 1056 l = NULL; 1057 1058 return l; 1059 } 1060 1061 /* 1062 * Update an LWP's cached credentials to mirror the process' master copy. 1063 * 1064 * This happens early in the syscall path, on user trap, and on LWP 1065 * creation. A long-running LWP can also voluntarily choose to update 1066 * it's credentials by calling this routine. This may be called from 1067 * LWP_CACHE_CREDS(), which checks l->l_cred != p->p_cred beforehand. 1068 */ 1069 void 1070 lwp_update_creds(struct lwp *l) 1071 { 1072 kauth_cred_t oc; 1073 struct proc *p; 1074 1075 p = l->l_proc; 1076 oc = l->l_cred; 1077 1078 mutex_enter(&p->p_mutex); 1079 kauth_cred_hold(p->p_cred); 1080 l->l_cred = p->p_cred; 1081 mutex_exit(&p->p_mutex); 1082 if (oc != NULL) { 1083 KERNEL_LOCK(1, l); /* XXXSMP */ 1084 kauth_cred_free(oc); 1085 KERNEL_UNLOCK_ONE(l); /* XXXSMP */ 1086 } 1087 } 1088 1089 /* 1090 * Verify that an LWP is locked, and optionally verify that the lock matches 1091 * one we specify. 1092 */ 1093 int 1094 lwp_locked(struct lwp *l, kmutex_t *mtx) 1095 { 1096 kmutex_t *cur = l->l_mutex; 1097 1098 return mutex_owned(cur) && (mtx == cur || mtx == NULL); 1099 } 1100 1101 /* 1102 * Lock an LWP. 1103 */ 1104 void 1105 lwp_lock_retry(struct lwp *l, kmutex_t *old) 1106 { 1107 1108 /* 1109 * XXXgcc ignoring kmutex_t * volatile on i386 1110 * 1111 * gcc version 4.1.2 20061021 prerelease (NetBSD nb1 20061021) 1112 */ 1113 #if 1 1114 while (l->l_mutex != old) { 1115 #else 1116 for (;;) { 1117 #endif 1118 mutex_spin_exit(old); 1119 old = l->l_mutex; 1120 mutex_spin_enter(old); 1121 1122 /* 1123 * mutex_enter() will have posted a read barrier. Re-test 1124 * l->l_mutex. If it has changed, we need to try again. 1125 */ 1126 #if 1 1127 } 1128 #else 1129 } while (__predict_false(l->l_mutex != old)); 1130 #endif 1131 } 1132 1133 /* 1134 * Lend a new mutex to an LWP. The old mutex must be held. 1135 */ 1136 void 1137 lwp_setlock(struct lwp *l, kmutex_t *new) 1138 { 1139 1140 KASSERT(mutex_owned(l->l_mutex)); 1141 1142 mb_write(); 1143 l->l_mutex = new; 1144 } 1145 1146 /* 1147 * Lend a new mutex to an LWP, and release the old mutex. The old mutex 1148 * must be held. 1149 */ 1150 void 1151 lwp_unlock_to(struct lwp *l, kmutex_t *new) 1152 { 1153 kmutex_t *old; 1154 1155 KASSERT(mutex_owned(l->l_mutex)); 1156 1157 old = l->l_mutex; 1158 mb_write(); 1159 l->l_mutex = new; 1160 mutex_spin_exit(old); 1161 } 1162 1163 /* 1164 * Acquire a new mutex, and donate it to an LWP. The LWP must already be 1165 * locked. 1166 */ 1167 void 1168 lwp_relock(struct lwp *l, kmutex_t *new) 1169 { 1170 kmutex_t *old; 1171 1172 KASSERT(mutex_owned(l->l_mutex)); 1173 1174 old = l->l_mutex; 1175 if (old != new) { 1176 mutex_spin_enter(new); 1177 l->l_mutex = new; 1178 mutex_spin_exit(old); 1179 } 1180 } 1181 1182 int 1183 lwp_trylock(struct lwp *l) 1184 { 1185 kmutex_t *old; 1186 1187 for (;;) { 1188 if (!mutex_tryenter(old = l->l_mutex)) 1189 return 0; 1190 if (__predict_true(l->l_mutex == old)) 1191 return 1; 1192 mutex_spin_exit(old); 1193 } 1194 } 1195 1196 /* 1197 * Handle exceptions for mi_userret(). Called if a member of LW_USERRET is 1198 * set. 1199 */ 1200 void 1201 lwp_userret(struct lwp *l) 1202 { 1203 struct proc *p; 1204 void (*hook)(void); 1205 int sig; 1206 1207 p = l->l_proc; 1208 1209 #ifndef __HAVE_FAST_SOFTINTS 1210 /* Run pending soft interrupts. */ 1211 if (l->l_cpu->ci_data.cpu_softints != 0) 1212 softint_overlay(); 1213 #endif 1214 1215 /* 1216 * It should be safe to do this read unlocked on a multiprocessor 1217 * system.. 1218 */ 1219 while ((l->l_flag & LW_USERRET) != 0) { 1220 /* 1221 * Process pending signals first, unless the process 1222 * is dumping core or exiting, where we will instead 1223 * enter the L_WSUSPEND case below. 1224 */ 1225 if ((l->l_flag & (LW_PENDSIG | LW_WCORE | LW_WEXIT)) == 1226 LW_PENDSIG) { 1227 KERNEL_LOCK(1, l); /* XXXSMP pool_put() below */ 1228 mutex_enter(&p->p_smutex); 1229 while ((sig = issignal(l)) != 0) 1230 postsig(sig); 1231 mutex_exit(&p->p_smutex); 1232 KERNEL_UNLOCK_LAST(l); /* XXXSMP */ 1233 } 1234 1235 /* 1236 * Core-dump or suspend pending. 1237 * 1238 * In case of core dump, suspend ourselves, so that the 1239 * kernel stack and therefore the userland registers saved 1240 * in the trapframe are around for coredump() to write them 1241 * out. We issue a wakeup on p->p_lwpcv so that sigexit() 1242 * will write the core file out once all other LWPs are 1243 * suspended. 1244 */ 1245 if ((l->l_flag & LW_WSUSPEND) != 0) { 1246 mutex_enter(&p->p_smutex); 1247 p->p_nrlwps--; 1248 cv_broadcast(&p->p_lwpcv); 1249 lwp_lock(l); 1250 l->l_stat = LSSUSPENDED; 1251 mutex_exit(&p->p_smutex); 1252 mi_switch(l); 1253 } 1254 1255 /* Process is exiting. */ 1256 if ((l->l_flag & LW_WEXIT) != 0) { 1257 KERNEL_LOCK(1, l); 1258 lwp_exit(l); 1259 KASSERT(0); 1260 /* NOTREACHED */ 1261 } 1262 1263 /* Call userret hook; used by Linux emulation. */ 1264 if ((l->l_flag & LW_WUSERRET) != 0) { 1265 lwp_lock(l); 1266 l->l_flag &= ~LW_WUSERRET; 1267 lwp_unlock(l); 1268 hook = p->p_userret; 1269 p->p_userret = NULL; 1270 (*hook)(); 1271 } 1272 } 1273 } 1274 1275 /* 1276 * Force an LWP to enter the kernel, to take a trip through lwp_userret(). 1277 */ 1278 void 1279 lwp_need_userret(struct lwp *l) 1280 { 1281 KASSERT(lwp_locked(l, NULL)); 1282 1283 /* 1284 * Since the tests in lwp_userret() are done unlocked, make sure 1285 * that the condition will be seen before forcing the LWP to enter 1286 * kernel mode. 1287 */ 1288 mb_write(); 1289 cpu_signotify(l); 1290 } 1291 1292 /* 1293 * Add one reference to an LWP. This will prevent the LWP from 1294 * exiting, thus keep the lwp structure and PCB around to inspect. 1295 */ 1296 void 1297 lwp_addref(struct lwp *l) 1298 { 1299 1300 KASSERT(mutex_owned(&l->l_proc->p_smutex)); 1301 KASSERT(l->l_stat != LSZOMB); 1302 KASSERT(l->l_refcnt != 0); 1303 1304 l->l_refcnt++; 1305 } 1306 1307 /* 1308 * Remove one reference to an LWP. If this is the last reference, 1309 * then we must finalize the LWP's death. 1310 */ 1311 void 1312 lwp_delref(struct lwp *l) 1313 { 1314 struct proc *p = l->l_proc; 1315 1316 mutex_enter(&p->p_smutex); 1317 KASSERT(l->l_stat != LSZOMB); 1318 KASSERT(l->l_refcnt > 0); 1319 if (--l->l_refcnt == 0) 1320 cv_broadcast(&p->p_lwpcv); 1321 mutex_exit(&p->p_smutex); 1322 } 1323 1324 /* 1325 * Drain all references to the current LWP. 1326 */ 1327 void 1328 lwp_drainrefs(struct lwp *l) 1329 { 1330 struct proc *p = l->l_proc; 1331 1332 KASSERT(mutex_owned(&p->p_smutex)); 1333 KASSERT(l->l_refcnt != 0); 1334 1335 l->l_refcnt--; 1336 while (l->l_refcnt != 0) 1337 cv_wait(&p->p_lwpcv, &p->p_smutex); 1338 } 1339 1340 /* 1341 * lwp_specific_key_create -- 1342 * Create a key for subsystem lwp-specific data. 1343 */ 1344 int 1345 lwp_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor) 1346 { 1347 1348 return (specificdata_key_create(lwp_specificdata_domain, keyp, dtor)); 1349 } 1350 1351 /* 1352 * lwp_specific_key_delete -- 1353 * Delete a key for subsystem lwp-specific data. 1354 */ 1355 void 1356 lwp_specific_key_delete(specificdata_key_t key) 1357 { 1358 1359 specificdata_key_delete(lwp_specificdata_domain, key); 1360 } 1361 1362 /* 1363 * lwp_initspecific -- 1364 * Initialize an LWP's specificdata container. 1365 */ 1366 void 1367 lwp_initspecific(struct lwp *l) 1368 { 1369 int error; 1370 1371 error = specificdata_init(lwp_specificdata_domain, &l->l_specdataref); 1372 KASSERT(error == 0); 1373 } 1374 1375 /* 1376 * lwp_finispecific -- 1377 * Finalize an LWP's specificdata container. 1378 */ 1379 void 1380 lwp_finispecific(struct lwp *l) 1381 { 1382 1383 specificdata_fini(lwp_specificdata_domain, &l->l_specdataref); 1384 } 1385 1386 /* 1387 * lwp_getspecific -- 1388 * Return lwp-specific data corresponding to the specified key. 1389 * 1390 * Note: LWP specific data is NOT INTERLOCKED. An LWP should access 1391 * only its OWN SPECIFIC DATA. If it is necessary to access another 1392 * LWP's specifc data, care must be taken to ensure that doing so 1393 * would not cause internal data structure inconsistency (i.e. caller 1394 * can guarantee that the target LWP is not inside an lwp_getspecific() 1395 * or lwp_setspecific() call). 1396 */ 1397 void * 1398 lwp_getspecific(specificdata_key_t key) 1399 { 1400 1401 return (specificdata_getspecific_unlocked(lwp_specificdata_domain, 1402 &curlwp->l_specdataref, key)); 1403 } 1404 1405 void * 1406 _lwp_getspecific_by_lwp(struct lwp *l, specificdata_key_t key) 1407 { 1408 1409 return (specificdata_getspecific_unlocked(lwp_specificdata_domain, 1410 &l->l_specdataref, key)); 1411 } 1412 1413 /* 1414 * lwp_setspecific -- 1415 * Set lwp-specific data corresponding to the specified key. 1416 */ 1417 void 1418 lwp_setspecific(specificdata_key_t key, void *data) 1419 { 1420 1421 specificdata_setspecific(lwp_specificdata_domain, 1422 &curlwp->l_specdataref, key, data); 1423 } 1424 1425 /* 1426 * Allocate a new lwpctl structure for a user LWP. 1427 */ 1428 int 1429 lwp_ctl_alloc(vaddr_t *uaddr) 1430 { 1431 lcproc_t *lp; 1432 u_int bit, i, offset; 1433 struct uvm_object *uao; 1434 int error; 1435 lcpage_t *lcp; 1436 proc_t *p; 1437 lwp_t *l; 1438 1439 l = curlwp; 1440 p = l->l_proc; 1441 1442 if (l->l_lcpage != NULL) 1443 return (EINVAL); 1444 1445 /* First time around, allocate header structure for the process. */ 1446 if ((lp = p->p_lwpctl) == NULL) { 1447 lp = kmem_alloc(sizeof(*lp), KM_SLEEP); 1448 mutex_init(&lp->lp_lock, MUTEX_DEFAULT, IPL_NONE); 1449 lp->lp_uao = NULL; 1450 TAILQ_INIT(&lp->lp_pages); 1451 mutex_enter(&p->p_mutex); 1452 if (p->p_lwpctl == NULL) { 1453 p->p_lwpctl = lp; 1454 mutex_exit(&p->p_mutex); 1455 } else { 1456 mutex_exit(&p->p_mutex); 1457 mutex_destroy(&lp->lp_lock); 1458 kmem_free(lp, sizeof(*lp)); 1459 lp = p->p_lwpctl; 1460 } 1461 } 1462 1463 /* 1464 * Set up an anonymous memory region to hold the shared pages. 1465 * Map them into the process' address space. The user vmspace 1466 * gets the first reference on the UAO. 1467 */ 1468 mutex_enter(&lp->lp_lock); 1469 if (lp->lp_uao == NULL) { 1470 lp->lp_uao = uao_create(LWPCTL_UAREA_SZ, 0); 1471 lp->lp_cur = 0; 1472 lp->lp_max = LWPCTL_UAREA_SZ; 1473 lp->lp_uva = p->p_emul->e_vm_default_addr(p, 1474 (vaddr_t)p->p_vmspace->vm_daddr, LWPCTL_UAREA_SZ); 1475 error = uvm_map(&p->p_vmspace->vm_map, &lp->lp_uva, 1476 LWPCTL_UAREA_SZ, lp->lp_uao, 0, 0, UVM_MAPFLAG(UVM_PROT_RW, 1477 UVM_PROT_RW, UVM_INH_NONE, UVM_ADV_NORMAL, 0)); 1478 if (error != 0) { 1479 uao_detach(lp->lp_uao); 1480 lp->lp_uao = NULL; 1481 mutex_exit(&lp->lp_lock); 1482 return error; 1483 } 1484 } 1485 1486 /* Get a free block and allocate for this LWP. */ 1487 TAILQ_FOREACH(lcp, &lp->lp_pages, lcp_chain) { 1488 if (lcp->lcp_nfree != 0) 1489 break; 1490 } 1491 if (lcp == NULL) { 1492 /* Nothing available - try to set up a free page. */ 1493 if (lp->lp_cur == lp->lp_max) { 1494 mutex_exit(&lp->lp_lock); 1495 return ENOMEM; 1496 } 1497 lcp = kmem_alloc(LWPCTL_LCPAGE_SZ, KM_SLEEP); 1498 if (lcp == NULL) { 1499 mutex_exit(&lp->lp_lock); 1500 return ENOMEM; 1501 } 1502 /* 1503 * Wire the next page down in kernel space. Since this 1504 * is a new mapping, we must add a reference. 1505 */ 1506 uao = lp->lp_uao; 1507 (*uao->pgops->pgo_reference)(uao); 1508 error = uvm_map(kernel_map, &lcp->lcp_kaddr, PAGE_SIZE, 1509 uao, lp->lp_cur, PAGE_SIZE, 1510 UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, 1511 UVM_INH_NONE, UVM_ADV_RANDOM, 0)); 1512 if (error == 0) 1513 error = uvm_map_pageable(kernel_map, lcp->lcp_kaddr, 1514 lcp->lcp_kaddr + PAGE_SIZE, FALSE, 0); 1515 if (error != 0) { 1516 mutex_exit(&lp->lp_lock); 1517 kmem_free(lcp, LWPCTL_LCPAGE_SZ); 1518 (*uao->pgops->pgo_detach)(uao); 1519 return error; 1520 } 1521 /* Prepare the page descriptor and link into the list. */ 1522 lcp->lcp_uaddr = lp->lp_uva + lp->lp_cur; 1523 lp->lp_cur += PAGE_SIZE; 1524 lcp->lcp_nfree = LWPCTL_PER_PAGE; 1525 lcp->lcp_rotor = 0; 1526 memset(lcp->lcp_bitmap, 0xff, LWPCTL_BITMAP_SZ); 1527 TAILQ_INSERT_HEAD(&lp->lp_pages, lcp, lcp_chain); 1528 } 1529 for (i = lcp->lcp_rotor; lcp->lcp_bitmap[i] == 0;) { 1530 if (++i >= LWPCTL_BITMAP_ENTRIES) 1531 i = 0; 1532 } 1533 bit = ffs(lcp->lcp_bitmap[i]) - 1; 1534 lcp->lcp_bitmap[i] ^= (1 << bit); 1535 lcp->lcp_rotor = i; 1536 lcp->lcp_nfree--; 1537 l->l_lcpage = lcp; 1538 offset = (i << 5) + bit; 1539 l->l_lwpctl = (lwpctl_t *)lcp->lcp_kaddr + offset; 1540 *uaddr = lcp->lcp_uaddr + offset * sizeof(lwpctl_t); 1541 mutex_exit(&lp->lp_lock); 1542 1543 l->l_lwpctl->lc_curcpu = (short)curcpu()->ci_data.cpu_index; 1544 1545 return 0; 1546 } 1547 1548 /* 1549 * Free an lwpctl structure back to the per-process list. 1550 */ 1551 void 1552 lwp_ctl_free(lwp_t *l) 1553 { 1554 lcproc_t *lp; 1555 lcpage_t *lcp; 1556 u_int map, offset; 1557 1558 lp = l->l_proc->p_lwpctl; 1559 KASSERT(lp != NULL); 1560 1561 lcp = l->l_lcpage; 1562 offset = (u_int)((lwpctl_t *)l->l_lwpctl - (lwpctl_t *)lcp->lcp_kaddr); 1563 KASSERT(offset < LWPCTL_PER_PAGE); 1564 1565 mutex_enter(&lp->lp_lock); 1566 lcp->lcp_nfree++; 1567 map = offset >> 5; 1568 lcp->lcp_bitmap[map] |= (1 << (offset & 31)); 1569 if (lcp->lcp_bitmap[lcp->lcp_rotor] == 0) 1570 lcp->lcp_rotor = map; 1571 if (TAILQ_FIRST(&lp->lp_pages)->lcp_nfree == 0) { 1572 TAILQ_REMOVE(&lp->lp_pages, lcp, lcp_chain); 1573 TAILQ_INSERT_HEAD(&lp->lp_pages, lcp, lcp_chain); 1574 } 1575 mutex_exit(&lp->lp_lock); 1576 } 1577 1578 /* 1579 * Process is exiting; tear down lwpctl state. This can only be safely 1580 * called by the last LWP in the process. 1581 */ 1582 void 1583 lwp_ctl_exit(void) 1584 { 1585 lcpage_t *lcp, *next; 1586 lcproc_t *lp; 1587 proc_t *p; 1588 lwp_t *l; 1589 1590 l = curlwp; 1591 l->l_lwpctl = NULL; 1592 p = l->l_proc; 1593 lp = p->p_lwpctl; 1594 1595 KASSERT(lp != NULL); 1596 KASSERT(p->p_nlwps == 1); 1597 1598 for (lcp = TAILQ_FIRST(&lp->lp_pages); lcp != NULL; lcp = next) { 1599 next = TAILQ_NEXT(lcp, lcp_chain); 1600 uvm_unmap(kernel_map, lcp->lcp_kaddr, 1601 lcp->lcp_kaddr + PAGE_SIZE); 1602 kmem_free(lcp, LWPCTL_LCPAGE_SZ); 1603 } 1604 1605 if (lp->lp_uao != NULL) { 1606 uvm_unmap(&p->p_vmspace->vm_map, lp->lp_uva, 1607 lp->lp_uva + LWPCTL_UAREA_SZ); 1608 } 1609 1610 mutex_destroy(&lp->lp_lock); 1611 kmem_free(lp, sizeof(*lp)); 1612 p->p_lwpctl = NULL; 1613 } 1614