1 /* 2 * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $DragonFly: src/sys/kern/lwkt_thread.c,v 1.47 2003/12/30 03:19:02 dillon Exp $ 27 */ 28 29 /* 30 * Each cpu in a system has its own self-contained light weight kernel 31 * thread scheduler, which means that generally speaking we only need 32 * to use a critical section to avoid problems. Foreign thread 33 * scheduling is queued via (async) IPIs. 34 * 35 * NOTE: on UP machines smp_active is defined to be 0. On SMP machines 36 * smp_active is 0 prior to SMP activation, then it is 1. The LWKT module 37 * uses smp_active to optimize UP builds and to avoid sending IPIs during 38 * early boot (primarily interrupt and network thread initialization). 39 */ 40 41 #ifdef _KERNEL 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/kernel.h> 46 #include <sys/proc.h> 47 #include <sys/rtprio.h> 48 #include <sys/queue.h> 49 #include <sys/thread2.h> 50 #include <sys/sysctl.h> 51 #include <sys/kthread.h> 52 #include <machine/cpu.h> 53 #include <sys/lock.h> 54 55 #include <vm/vm.h> 56 #include <vm/vm_param.h> 57 #include <vm/vm_kern.h> 58 #include <vm/vm_object.h> 59 #include <vm/vm_page.h> 60 #include <vm/vm_map.h> 61 #include <vm/vm_pager.h> 62 #include <vm/vm_extern.h> 63 #include <vm/vm_zone.h> 64 65 #include <machine/stdarg.h> 66 #include <machine/ipl.h> 67 #include <machine/smp.h> 68 69 #define THREAD_STACK (UPAGES * PAGE_SIZE) 70 71 #else 72 73 #include <sys/stdint.h> 74 #include <libcaps/thread.h> 75 #include <sys/thread.h> 76 #include <sys/msgport.h> 77 #include <sys/errno.h> 78 #include <libcaps/globaldata.h> 79 #include <sys/thread2.h> 80 #include <sys/msgport2.h> 81 #include <stdio.h> 82 #include <stdlib.h> 83 #include <string.h> 84 #include <machine/cpufunc.h> 85 #include <machine/lock.h> 86 87 #endif 88 89 static int untimely_switch = 0; 90 #ifdef INVARIANTS 91 static int token_debug = 0; 92 #endif 93 static __int64_t switch_count = 0; 94 static __int64_t preempt_hit = 0; 95 static __int64_t preempt_miss = 0; 96 static __int64_t preempt_weird = 0; 97 #ifdef SMP 98 static __int64_t ipiq_count = 0; 99 static __int64_t ipiq_fifofull = 0; 100 #endif 101 102 #ifdef _KERNEL 103 104 SYSCTL_INT(_lwkt, OID_AUTO, untimely_switch, CTLFLAG_RW, &untimely_switch, 0, ""); 105 #ifdef INVARIANTS 106 SYSCTL_INT(_lwkt, OID_AUTO, token_debug, CTLFLAG_RW, &token_debug, 0, ""); 107 #endif 108 SYSCTL_QUAD(_lwkt, OID_AUTO, switch_count, CTLFLAG_RW, &switch_count, 0, ""); 109 SYSCTL_QUAD(_lwkt, OID_AUTO, preempt_hit, CTLFLAG_RW, &preempt_hit, 0, ""); 110 SYSCTL_QUAD(_lwkt, OID_AUTO, preempt_miss, CTLFLAG_RW, &preempt_miss, 0, ""); 111 SYSCTL_QUAD(_lwkt, OID_AUTO, preempt_weird, CTLFLAG_RW, &preempt_weird, 0, ""); 112 #ifdef SMP 113 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_count, CTLFLAG_RW, &ipiq_count, 0, ""); 114 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_fifofull, CTLFLAG_RW, &ipiq_fifofull, 0, ""); 115 #endif 116 117 #endif 118 119 /* 120 * These helper procedures handle the runq, they can only be called from 121 * within a critical section. 122 * 123 * WARNING! Prior to SMP being brought up it is possible to enqueue and 124 * dequeue threads belonging to other cpus, so be sure to use td->td_gd 125 * instead of 'mycpu' when referencing the globaldata structure. Once 126 * SMP live enqueuing and dequeueing only occurs on the current cpu. 127 */ 128 static __inline 129 void 130 _lwkt_dequeue(thread_t td) 131 { 132 if (td->td_flags & TDF_RUNQ) { 133 int nq = td->td_pri & TDPRI_MASK; 134 struct globaldata *gd = td->td_gd; 135 136 td->td_flags &= ~TDF_RUNQ; 137 TAILQ_REMOVE(&gd->gd_tdrunq[nq], td, td_threadq); 138 /* runqmask is passively cleaned up by the switcher */ 139 } 140 } 141 142 static __inline 143 void 144 _lwkt_enqueue(thread_t td) 145 { 146 if ((td->td_flags & TDF_RUNQ) == 0) { 147 int nq = td->td_pri & TDPRI_MASK; 148 struct globaldata *gd = td->td_gd; 149 150 td->td_flags |= TDF_RUNQ; 151 TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], td, td_threadq); 152 gd->gd_runqmask |= 1 << nq; 153 } 154 } 155 156 static __inline 157 int 158 _lwkt_wantresched(thread_t ntd, thread_t cur) 159 { 160 return((ntd->td_pri & TDPRI_MASK) > (cur->td_pri & TDPRI_MASK)); 161 } 162 163 #ifdef _KERNEL 164 165 /* 166 * LWKTs operate on a per-cpu basis 167 * 168 * WARNING! Called from early boot, 'mycpu' may not work yet. 169 */ 170 void 171 lwkt_gdinit(struct globaldata *gd) 172 { 173 int i; 174 175 for (i = 0; i < sizeof(gd->gd_tdrunq)/sizeof(gd->gd_tdrunq[0]); ++i) 176 TAILQ_INIT(&gd->gd_tdrunq[i]); 177 gd->gd_runqmask = 0; 178 TAILQ_INIT(&gd->gd_tdallq); 179 } 180 181 #endif /* _KERNEL */ 182 183 /* 184 * Initialize a thread wait structure prior to first use. 185 * 186 * NOTE! called from low level boot code, we cannot do anything fancy! 187 */ 188 void 189 lwkt_init_wait(lwkt_wait_t w) 190 { 191 TAILQ_INIT(&w->wa_waitq); 192 } 193 194 /* 195 * Create a new thread. The thread must be associated with a process context 196 * or LWKT start address before it can be scheduled. If the target cpu is 197 * -1 the thread will be created on the current cpu. 198 * 199 * If you intend to create a thread without a process context this function 200 * does everything except load the startup and switcher function. 201 */ 202 thread_t 203 lwkt_alloc_thread(struct thread *td, int cpu) 204 { 205 void *stack; 206 int flags = 0; 207 208 if (td == NULL) { 209 crit_enter(); 210 if (mycpu->gd_tdfreecount > 0) { 211 --mycpu->gd_tdfreecount; 212 td = TAILQ_FIRST(&mycpu->gd_tdfreeq); 213 KASSERT(td != NULL && (td->td_flags & TDF_RUNNING) == 0, 214 ("lwkt_alloc_thread: unexpected NULL or corrupted td")); 215 TAILQ_REMOVE(&mycpu->gd_tdfreeq, td, td_threadq); 216 crit_exit(); 217 stack = td->td_kstack; 218 flags = td->td_flags & (TDF_ALLOCATED_STACK|TDF_ALLOCATED_THREAD); 219 } else { 220 crit_exit(); 221 #ifdef _KERNEL 222 td = zalloc(thread_zone); 223 #else 224 td = malloc(sizeof(struct thread)); 225 #endif 226 td->td_kstack = NULL; 227 flags |= TDF_ALLOCATED_THREAD; 228 } 229 } 230 if ((stack = td->td_kstack) == NULL) { 231 #ifdef _KERNEL 232 stack = (void *)kmem_alloc(kernel_map, THREAD_STACK); 233 #else 234 stack = libcaps_alloc_stack(THREAD_STACK); 235 #endif 236 flags |= TDF_ALLOCATED_STACK; 237 } 238 if (cpu < 0) 239 lwkt_init_thread(td, stack, flags, mycpu); 240 else 241 lwkt_init_thread(td, stack, flags, globaldata_find(cpu)); 242 return(td); 243 } 244 245 #ifdef _KERNEL 246 247 /* 248 * Initialize a preexisting thread structure. This function is used by 249 * lwkt_alloc_thread() and also used to initialize the per-cpu idlethread. 250 * 251 * All threads start out in a critical section at a priority of 252 * TDPRI_KERN_DAEMON. Higher level code will modify the priority as 253 * appropriate. This function may send an IPI message when the 254 * requested cpu is not the current cpu and consequently gd_tdallq may 255 * not be initialized synchronously from the point of view of the originating 256 * cpu. 257 * 258 * NOTE! we have to be careful in regards to creating threads for other cpus 259 * if SMP has not yet been activated. 260 */ 261 static void 262 lwkt_init_thread_remote(void *arg) 263 { 264 thread_t td = arg; 265 266 TAILQ_INSERT_TAIL(&td->td_gd->gd_tdallq, td, td_allq); 267 } 268 269 void 270 lwkt_init_thread(thread_t td, void *stack, int flags, struct globaldata *gd) 271 { 272 bzero(td, sizeof(struct thread)); 273 td->td_kstack = stack; 274 td->td_flags |= flags; 275 td->td_gd = gd; 276 td->td_pri = TDPRI_KERN_DAEMON + TDPRI_CRIT; 277 lwkt_initport(&td->td_msgport, td); 278 pmap_init_thread(td); 279 if (smp_active == 0 || gd == mycpu) { 280 crit_enter(); 281 TAILQ_INSERT_TAIL(&gd->gd_tdallq, td, td_allq); 282 crit_exit(); 283 } else { 284 lwkt_send_ipiq(gd->gd_cpuid, lwkt_init_thread_remote, td); 285 } 286 } 287 288 #endif /* _KERNEL */ 289 290 void 291 lwkt_set_comm(thread_t td, const char *ctl, ...) 292 { 293 __va_list va; 294 295 __va_start(va, ctl); 296 vsnprintf(td->td_comm, sizeof(td->td_comm), ctl, va); 297 __va_end(va); 298 } 299 300 void 301 lwkt_hold(thread_t td) 302 { 303 ++td->td_refs; 304 } 305 306 void 307 lwkt_rele(thread_t td) 308 { 309 KKASSERT(td->td_refs > 0); 310 --td->td_refs; 311 } 312 313 #ifdef _KERNEL 314 315 void 316 lwkt_wait_free(thread_t td) 317 { 318 while (td->td_refs) 319 tsleep(td, 0, "tdreap", hz); 320 } 321 322 #endif 323 324 void 325 lwkt_free_thread(thread_t td) 326 { 327 struct globaldata *gd = mycpu; 328 329 KASSERT((td->td_flags & TDF_RUNNING) == 0, 330 ("lwkt_free_thread: did not exit! %p", td)); 331 332 crit_enter(); 333 TAILQ_REMOVE(&gd->gd_tdallq, td, td_allq); 334 if (gd->gd_tdfreecount < CACHE_NTHREADS && 335 (td->td_flags & TDF_ALLOCATED_THREAD) 336 ) { 337 ++gd->gd_tdfreecount; 338 TAILQ_INSERT_HEAD(&gd->gd_tdfreeq, td, td_threadq); 339 crit_exit(); 340 } else { 341 crit_exit(); 342 if (td->td_kstack && (td->td_flags & TDF_ALLOCATED_STACK)) { 343 #ifdef _KERNEL 344 kmem_free(kernel_map, (vm_offset_t)td->td_kstack, THREAD_STACK); 345 #else 346 libcaps_free_stack(td->td_kstack, THREAD_STACK); 347 #endif 348 /* gd invalid */ 349 td->td_kstack = NULL; 350 } 351 if (td->td_flags & TDF_ALLOCATED_THREAD) { 352 #ifdef _KERNEL 353 zfree(thread_zone, td); 354 #else 355 free(td); 356 #endif 357 } 358 } 359 } 360 361 362 /* 363 * Switch to the next runnable lwkt. If no LWKTs are runnable then 364 * switch to the idlethread. Switching must occur within a critical 365 * section to avoid races with the scheduling queue. 366 * 367 * We always have full control over our cpu's run queue. Other cpus 368 * that wish to manipulate our queue must use the cpu_*msg() calls to 369 * talk to our cpu, so a critical section is all that is needed and 370 * the result is very, very fast thread switching. 371 * 372 * The LWKT scheduler uses a fixed priority model and round-robins at 373 * each priority level. User process scheduling is a totally 374 * different beast and LWKT priorities should not be confused with 375 * user process priorities. 376 * 377 * The MP lock may be out of sync with the thread's td_mpcount. lwkt_switch() 378 * cleans it up. Note that the td_switch() function cannot do anything that 379 * requires the MP lock since the MP lock will have already been setup for 380 * the target thread (not the current thread). It's nice to have a scheduler 381 * that does not need the MP lock to work because it allows us to do some 382 * really cool high-performance MP lock optimizations. 383 */ 384 385 void 386 lwkt_switch(void) 387 { 388 struct globaldata *gd; 389 thread_t td = curthread; 390 thread_t ntd; 391 #ifdef SMP 392 int mpheld; 393 #endif 394 395 /* 396 * Switching from within a 'fast' (non thread switched) interrupt is 397 * illegal. 398 */ 399 if (mycpu->gd_intr_nesting_level && panicstr == NULL) { 400 panic("lwkt_switch: cannot switch from within a fast interrupt, yet\n"); 401 } 402 403 /* 404 * Passive release (used to transition from user to kernel mode 405 * when we block or switch rather then when we enter the kernel). 406 * This function is NOT called if we are switching into a preemption 407 * or returning from a preemption. Typically this causes us to lose 408 * our P_CURPROC designation (if we have one) and become a true LWKT 409 * thread, and may also hand P_CURPROC to another process and schedule 410 * its thread. 411 */ 412 if (td->td_release) 413 td->td_release(td); 414 415 crit_enter(); 416 ++switch_count; 417 418 #ifdef SMP 419 /* 420 * td_mpcount cannot be used to determine if we currently hold the 421 * MP lock because get_mplock() will increment it prior to attempting 422 * to get the lock, and switch out if it can't. Our ownership of 423 * the actual lock will remain stable while we are in a critical section 424 * (but, of course, another cpu may own or release the lock so the 425 * actual value of mp_lock is not stable). 426 */ 427 mpheld = MP_LOCK_HELD(); 428 #endif 429 if ((ntd = td->td_preempted) != NULL) { 430 /* 431 * We had preempted another thread on this cpu, resume the preempted 432 * thread. This occurs transparently, whether the preempted thread 433 * was scheduled or not (it may have been preempted after descheduling 434 * itself). 435 * 436 * We have to setup the MP lock for the original thread after backing 437 * out the adjustment that was made to curthread when the original 438 * was preempted. 439 */ 440 KKASSERT(ntd->td_flags & TDF_PREEMPT_LOCK); 441 #ifdef SMP 442 if (ntd->td_mpcount && mpheld == 0) { 443 panic("MPLOCK NOT HELD ON RETURN: %p %p %d %d\n", 444 td, ntd, td->td_mpcount, ntd->td_mpcount); 445 } 446 if (ntd->td_mpcount) { 447 td->td_mpcount -= ntd->td_mpcount; 448 KKASSERT(td->td_mpcount >= 0); 449 } 450 #endif 451 ntd->td_flags |= TDF_PREEMPT_DONE; 452 /* YYY release mp lock on switchback if original doesn't need it */ 453 } else { 454 /* 455 * Priority queue / round-robin at each priority. Note that user 456 * processes run at a fixed, low priority and the user process 457 * scheduler deals with interactions between user processes 458 * by scheduling and descheduling them from the LWKT queue as 459 * necessary. 460 * 461 * We have to adjust the MP lock for the target thread. If we 462 * need the MP lock and cannot obtain it we try to locate a 463 * thread that does not need the MP lock. 464 */ 465 gd = mycpu; 466 again: 467 if (gd->gd_runqmask) { 468 int nq = bsrl(gd->gd_runqmask); 469 if ((ntd = TAILQ_FIRST(&gd->gd_tdrunq[nq])) == NULL) { 470 gd->gd_runqmask &= ~(1 << nq); 471 goto again; 472 } 473 #ifdef SMP 474 if (ntd->td_mpcount && mpheld == 0 && !cpu_try_mplock()) { 475 /* 476 * Target needs MP lock and we couldn't get it, try 477 * to locate a thread which does not need the MP lock 478 * to run. If we cannot locate a thread spin in idle. 479 */ 480 u_int32_t rqmask = gd->gd_runqmask; 481 while (rqmask) { 482 TAILQ_FOREACH(ntd, &gd->gd_tdrunq[nq], td_threadq) { 483 if (ntd->td_mpcount == 0) 484 break; 485 } 486 if (ntd) 487 break; 488 rqmask &= ~(1 << nq); 489 nq = bsrl(rqmask); 490 } 491 if (ntd == NULL) { 492 ntd = &gd->gd_idlethread; 493 ntd->td_flags |= TDF_IDLE_NOHLT; 494 } else { 495 TAILQ_REMOVE(&gd->gd_tdrunq[nq], ntd, td_threadq); 496 TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], ntd, td_threadq); 497 } 498 } else { 499 TAILQ_REMOVE(&gd->gd_tdrunq[nq], ntd, td_threadq); 500 TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], ntd, td_threadq); 501 } 502 #else 503 TAILQ_REMOVE(&gd->gd_tdrunq[nq], ntd, td_threadq); 504 TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], ntd, td_threadq); 505 #endif 506 } else { 507 /* 508 * We have nothing to run but only let the idle loop halt 509 * the cpu if there are no pending interrupts. 510 */ 511 ntd = &gd->gd_idlethread; 512 if (gd->gd_reqflags & RQF_IDLECHECK_MASK) 513 ntd->td_flags |= TDF_IDLE_NOHLT; 514 } 515 } 516 KASSERT(ntd->td_pri >= TDPRI_CRIT, 517 ("priority problem in lwkt_switch %d %d", td->td_pri, ntd->td_pri)); 518 519 /* 520 * Do the actual switch. If the new target does not need the MP lock 521 * and we are holding it, release the MP lock. If the new target requires 522 * the MP lock we have already acquired it for the target. 523 */ 524 #ifdef SMP 525 if (ntd->td_mpcount == 0 ) { 526 if (MP_LOCK_HELD()) 527 cpu_rel_mplock(); 528 } else { 529 ASSERT_MP_LOCK_HELD(); 530 } 531 #endif 532 if (td != ntd) { 533 td->td_switch(ntd); 534 } 535 536 crit_exit(); 537 } 538 539 /* 540 * Switch if another thread has a higher priority. Do not switch to other 541 * threads at the same priority. 542 */ 543 void 544 lwkt_maybe_switch() 545 { 546 struct globaldata *gd = mycpu; 547 struct thread *td = gd->gd_curthread; 548 549 if ((td->td_pri & TDPRI_MASK) < bsrl(gd->gd_runqmask)) { 550 lwkt_switch(); 551 } 552 } 553 554 /* 555 * Request that the target thread preempt the current thread. Preemption 556 * only works under a specific set of conditions: 557 * 558 * - We are not preempting ourselves 559 * - The target thread is owned by the current cpu 560 * - We are not currently being preempted 561 * - The target is not currently being preempted 562 * - We are able to satisfy the target's MP lock requirements (if any). 563 * 564 * THE CALLER OF LWKT_PREEMPT() MUST BE IN A CRITICAL SECTION. Typically 565 * this is called via lwkt_schedule() through the td_preemptable callback. 566 * critpri is the managed critical priority that we should ignore in order 567 * to determine whether preemption is possible (aka usually just the crit 568 * priority of lwkt_schedule() itself). 569 * 570 * XXX at the moment we run the target thread in a critical section during 571 * the preemption in order to prevent the target from taking interrupts 572 * that *WE* can't. Preemption is strictly limited to interrupt threads 573 * and interrupt-like threads, outside of a critical section, and the 574 * preempted source thread will be resumed the instant the target blocks 575 * whether or not the source is scheduled (i.e. preemption is supposed to 576 * be as transparent as possible). 577 * 578 * The target thread inherits our MP count (added to its own) for the 579 * duration of the preemption in order to preserve the atomicy of the 580 * MP lock during the preemption. Therefore, any preempting targets must be 581 * careful in regards to MP assertions. Note that the MP count may be 582 * out of sync with the physical mp_lock, but we do not have to preserve 583 * the original ownership of the lock if it was out of synch (that is, we 584 * can leave it synchronized on return). 585 */ 586 void 587 lwkt_preempt(thread_t ntd, int critpri) 588 { 589 struct globaldata *gd = mycpu; 590 thread_t td = gd->gd_curthread; 591 #ifdef SMP 592 int mpheld; 593 int savecnt; 594 #endif 595 596 /* 597 * The caller has put us in a critical section. We can only preempt 598 * if the caller of the caller was not in a critical section (basically 599 * a local interrupt), as determined by the 'critpri' parameter. If 600 * we are unable to preempt 601 * 602 * YYY The target thread must be in a critical section (else it must 603 * inherit our critical section? I dunno yet). 604 */ 605 KASSERT(ntd->td_pri >= TDPRI_CRIT, ("BADCRIT0 %d", ntd->td_pri)); 606 607 need_resched(); 608 if (!_lwkt_wantresched(ntd, td)) { 609 ++preempt_miss; 610 return; 611 } 612 if ((td->td_pri & ~TDPRI_MASK) > critpri) { 613 ++preempt_miss; 614 return; 615 } 616 #ifdef SMP 617 if (ntd->td_gd != gd) { 618 ++preempt_miss; 619 return; 620 } 621 #endif 622 if (td == ntd || ((td->td_flags | ntd->td_flags) & TDF_PREEMPT_LOCK)) { 623 ++preempt_weird; 624 return; 625 } 626 if (ntd->td_preempted) { 627 ++preempt_hit; 628 return; 629 } 630 #ifdef SMP 631 /* 632 * note: an interrupt might have occured just as we were transitioning 633 * to or from the MP lock. In this case td_mpcount will be pre-disposed 634 * (non-zero) but not actually synchronized with the actual state of the 635 * lock. We can use it to imply an MP lock requirement for the 636 * preemption but we cannot use it to test whether we hold the MP lock 637 * or not. 638 */ 639 savecnt = td->td_mpcount; 640 mpheld = MP_LOCK_HELD(); 641 ntd->td_mpcount += td->td_mpcount; 642 if (mpheld == 0 && ntd->td_mpcount && !cpu_try_mplock()) { 643 ntd->td_mpcount -= td->td_mpcount; 644 ++preempt_miss; 645 return; 646 } 647 #endif 648 649 ++preempt_hit; 650 ntd->td_preempted = td; 651 td->td_flags |= TDF_PREEMPT_LOCK; 652 td->td_switch(ntd); 653 KKASSERT(ntd->td_preempted && (td->td_flags & TDF_PREEMPT_DONE)); 654 #ifdef SMP 655 KKASSERT(savecnt == td->td_mpcount); 656 mpheld = MP_LOCK_HELD(); 657 if (mpheld && td->td_mpcount == 0) 658 cpu_rel_mplock(); 659 else if (mpheld == 0 && td->td_mpcount) 660 panic("lwkt_preempt(): MP lock was not held through"); 661 #endif 662 ntd->td_preempted = NULL; 663 td->td_flags &= ~(TDF_PREEMPT_LOCK|TDF_PREEMPT_DONE); 664 } 665 666 /* 667 * Yield our thread while higher priority threads are pending. This is 668 * typically called when we leave a critical section but it can be safely 669 * called while we are in a critical section. 670 * 671 * This function will not generally yield to equal priority threads but it 672 * can occur as a side effect. Note that lwkt_switch() is called from 673 * inside the critical section to prevent its own crit_exit() from reentering 674 * lwkt_yield_quick(). 675 * 676 * gd_reqflags indicates that *something* changed, e.g. an interrupt or softint 677 * came along but was blocked and made pending. 678 * 679 * (self contained on a per cpu basis) 680 */ 681 void 682 lwkt_yield_quick(void) 683 { 684 globaldata_t gd = mycpu; 685 thread_t td = gd->gd_curthread; 686 687 /* 688 * gd_reqflags is cleared in splz if the cpl is 0. If we were to clear 689 * it with a non-zero cpl then we might not wind up calling splz after 690 * a task switch when the critical section is exited even though the 691 * new task could accept the interrupt. 692 * 693 * XXX from crit_exit() only called after last crit section is released. 694 * If called directly will run splz() even if in a critical section. 695 * 696 * td_nest_count prevent deep nesting via splz() or doreti(). Note that 697 * except for this special case, we MUST call splz() here to handle any 698 * pending ints, particularly after we switch, or we might accidently 699 * halt the cpu with interrupts pending. 700 */ 701 if (gd->gd_reqflags && td->td_nest_count < 2) 702 splz(); 703 704 /* 705 * YYY enabling will cause wakeup() to task-switch, which really 706 * confused the old 4.x code. This is a good way to simulate 707 * preemption and MP without actually doing preemption or MP, because a 708 * lot of code assumes that wakeup() does not block. 709 */ 710 if (untimely_switch && td->td_nest_count == 0 && 711 gd->gd_intr_nesting_level == 0 712 ) { 713 crit_enter(); 714 /* 715 * YYY temporary hacks until we disassociate the userland scheduler 716 * from the LWKT scheduler. 717 */ 718 if (td->td_flags & TDF_RUNQ) { 719 lwkt_switch(); /* will not reenter yield function */ 720 } else { 721 lwkt_schedule_self(); /* make sure we are scheduled */ 722 lwkt_switch(); /* will not reenter yield function */ 723 lwkt_deschedule_self(); /* make sure we are descheduled */ 724 } 725 crit_exit_noyield(td); 726 } 727 } 728 729 /* 730 * This implements a normal yield which, unlike _quick, will yield to equal 731 * priority threads as well. Note that gd_reqflags tests will be handled by 732 * the crit_exit() call in lwkt_switch(). 733 * 734 * (self contained on a per cpu basis) 735 */ 736 void 737 lwkt_yield(void) 738 { 739 lwkt_schedule_self(); 740 lwkt_switch(); 741 } 742 743 /* 744 * Schedule a thread to run. As the current thread we can always safely 745 * schedule ourselves, and a shortcut procedure is provided for that 746 * function. 747 * 748 * (non-blocking, self contained on a per cpu basis) 749 */ 750 void 751 lwkt_schedule_self(void) 752 { 753 thread_t td = curthread; 754 755 crit_enter(); 756 KASSERT(td->td_wait == NULL, ("lwkt_schedule_self(): td_wait not NULL!")); 757 _lwkt_enqueue(td); 758 #ifdef _KERNEL 759 if (td->td_proc && td->td_proc->p_stat == SSLEEP) 760 panic("SCHED SELF PANIC"); 761 #endif 762 crit_exit(); 763 } 764 765 /* 766 * Generic schedule. Possibly schedule threads belonging to other cpus and 767 * deal with threads that might be blocked on a wait queue. 768 * 769 * YYY this is one of the best places to implement load balancing code. 770 * Load balancing can be accomplished by requesting other sorts of actions 771 * for the thread in question. 772 */ 773 void 774 lwkt_schedule(thread_t td) 775 { 776 #ifdef INVARIANTS 777 if ((td->td_flags & TDF_PREEMPT_LOCK) == 0 && td->td_proc 778 && td->td_proc->p_stat == SSLEEP 779 ) { 780 printf("PANIC schedule curtd = %p (%d %d) target %p (%d %d)\n", 781 curthread, 782 curthread->td_proc ? curthread->td_proc->p_pid : -1, 783 curthread->td_proc ? curthread->td_proc->p_stat : -1, 784 td, 785 td->td_proc ? curthread->td_proc->p_pid : -1, 786 td->td_proc ? curthread->td_proc->p_stat : -1 787 ); 788 panic("SCHED PANIC"); 789 } 790 #endif 791 crit_enter(); 792 if (td == curthread) { 793 _lwkt_enqueue(td); 794 } else { 795 lwkt_wait_t w; 796 797 /* 798 * If the thread is on a wait list we have to send our scheduling 799 * request to the owner of the wait structure. Otherwise we send 800 * the scheduling request to the cpu owning the thread. Races 801 * are ok, the target will forward the message as necessary (the 802 * message may chase the thread around before it finally gets 803 * acted upon). 804 * 805 * (remember, wait structures use stable storage) 806 */ 807 if ((w = td->td_wait) != NULL) { 808 if (lwkt_trytoken(&w->wa_token)) { 809 TAILQ_REMOVE(&w->wa_waitq, td, td_threadq); 810 --w->wa_count; 811 td->td_wait = NULL; 812 if (smp_active == 0 || td->td_gd == mycpu) { 813 _lwkt_enqueue(td); 814 if (td->td_preemptable) { 815 td->td_preemptable(td, TDPRI_CRIT*2); /* YYY +token */ 816 } else if (_lwkt_wantresched(td, curthread)) { 817 need_resched(); 818 } 819 } else { 820 lwkt_send_ipiq(td->td_gd->gd_cpuid, (ipifunc_t)lwkt_schedule, td); 821 } 822 lwkt_reltoken(&w->wa_token); 823 } else { 824 lwkt_send_ipiq(w->wa_token.t_cpu, (ipifunc_t)lwkt_schedule, td); 825 } 826 } else { 827 /* 828 * If the wait structure is NULL and we own the thread, there 829 * is no race (since we are in a critical section). If we 830 * do not own the thread there might be a race but the 831 * target cpu will deal with it. 832 */ 833 if (smp_active == 0 || td->td_gd == mycpu) { 834 _lwkt_enqueue(td); 835 if (td->td_preemptable) { 836 td->td_preemptable(td, TDPRI_CRIT); 837 } else if (_lwkt_wantresched(td, curthread)) { 838 need_resched(); 839 } 840 } else { 841 lwkt_send_ipiq(td->td_gd->gd_cpuid, (ipifunc_t)lwkt_schedule, td); 842 } 843 } 844 } 845 crit_exit(); 846 } 847 848 /* 849 * Managed acquisition. This code assumes that the MP lock is held for 850 * the tdallq operation and that the thread has been descheduled from its 851 * original cpu. We also have to wait for the thread to be entirely switched 852 * out on its original cpu (this is usually fast enough that we never loop) 853 * since the LWKT system does not have to hold the MP lock while switching 854 * and the target may have released it before switching. 855 */ 856 void 857 lwkt_acquire(thread_t td) 858 { 859 struct globaldata *gd; 860 861 gd = td->td_gd; 862 KKASSERT((td->td_flags & TDF_RUNQ) == 0); 863 while (td->td_flags & TDF_RUNNING) /* XXX spin */ 864 ; 865 if (gd != mycpu) { 866 crit_enter(); 867 TAILQ_REMOVE(&gd->gd_tdallq, td, td_allq); /* protected by BGL */ 868 gd = mycpu; 869 td->td_gd = gd; 870 TAILQ_INSERT_TAIL(&gd->gd_tdallq, td, td_allq); /* protected by BGL */ 871 crit_exit(); 872 } 873 } 874 875 /* 876 * Deschedule a thread. 877 * 878 * (non-blocking, self contained on a per cpu basis) 879 */ 880 void 881 lwkt_deschedule_self(void) 882 { 883 thread_t td = curthread; 884 885 crit_enter(); 886 KASSERT(td->td_wait == NULL, ("lwkt_schedule_self(): td_wait not NULL!")); 887 _lwkt_dequeue(td); 888 crit_exit(); 889 } 890 891 /* 892 * Generic deschedule. Descheduling threads other then your own should be 893 * done only in carefully controlled circumstances. Descheduling is 894 * asynchronous. 895 * 896 * This function may block if the cpu has run out of messages. 897 */ 898 void 899 lwkt_deschedule(thread_t td) 900 { 901 crit_enter(); 902 if (td == curthread) { 903 _lwkt_dequeue(td); 904 } else { 905 if (td->td_gd == mycpu) { 906 _lwkt_dequeue(td); 907 } else { 908 lwkt_send_ipiq(td->td_gd->gd_cpuid, (ipifunc_t)lwkt_deschedule, td); 909 } 910 } 911 crit_exit(); 912 } 913 914 /* 915 * Set the target thread's priority. This routine does not automatically 916 * switch to a higher priority thread, LWKT threads are not designed for 917 * continuous priority changes. Yield if you want to switch. 918 * 919 * We have to retain the critical section count which uses the high bits 920 * of the td_pri field. The specified priority may also indicate zero or 921 * more critical sections by adding TDPRI_CRIT*N. 922 */ 923 void 924 lwkt_setpri(thread_t td, int pri) 925 { 926 KKASSERT(pri >= 0); 927 KKASSERT(td->td_gd == mycpu); 928 crit_enter(); 929 if (td->td_flags & TDF_RUNQ) { 930 _lwkt_dequeue(td); 931 td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri; 932 _lwkt_enqueue(td); 933 } else { 934 td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri; 935 } 936 crit_exit(); 937 } 938 939 void 940 lwkt_setpri_self(int pri) 941 { 942 thread_t td = curthread; 943 944 KKASSERT(pri >= 0 && pri <= TDPRI_MAX); 945 crit_enter(); 946 if (td->td_flags & TDF_RUNQ) { 947 _lwkt_dequeue(td); 948 td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri; 949 _lwkt_enqueue(td); 950 } else { 951 td->td_pri = (td->td_pri & ~TDPRI_MASK) + pri; 952 } 953 crit_exit(); 954 } 955 956 struct proc * 957 lwkt_preempted_proc(void) 958 { 959 thread_t td = curthread; 960 while (td->td_preempted) 961 td = td->td_preempted; 962 return(td->td_proc); 963 } 964 965 typedef struct lwkt_gettoken_req { 966 lwkt_token_t tok; 967 int cpu; 968 } lwkt_gettoken_req; 969 970 #if 0 971 972 /* 973 * This function deschedules the current thread and blocks on the specified 974 * wait queue. We obtain ownership of the wait queue in order to block 975 * on it. A generation number is used to interlock the wait queue in case 976 * it gets signalled while we are blocked waiting on the token. 977 * 978 * Note: alternatively we could dequeue our thread and then message the 979 * target cpu owning the wait queue. YYY implement as sysctl. 980 * 981 * Note: wait queue signals normally ping-pong the cpu as an optimization. 982 */ 983 984 void 985 lwkt_block(lwkt_wait_t w, const char *wmesg, int *gen) 986 { 987 thread_t td = curthread; 988 989 lwkt_gettoken(&w->wa_token); 990 if (w->wa_gen == *gen) { 991 _lwkt_dequeue(td); 992 TAILQ_INSERT_TAIL(&w->wa_waitq, td, td_threadq); 993 ++w->wa_count; 994 td->td_wait = w; 995 td->td_wmesg = wmesg; 996 again: 997 lwkt_switch(); 998 lwkt_regettoken(&w->wa_token); 999 if (td->td_wmesg != NULL) { 1000 _lwkt_dequeue(td); 1001 goto again; 1002 } 1003 } 1004 /* token might be lost, doesn't matter for gen update */ 1005 *gen = w->wa_gen; 1006 lwkt_reltoken(&w->wa_token); 1007 } 1008 1009 /* 1010 * Signal a wait queue. We gain ownership of the wait queue in order to 1011 * signal it. Once a thread is removed from the wait queue we have to 1012 * deal with the cpu owning the thread. 1013 * 1014 * Note: alternatively we could message the target cpu owning the wait 1015 * queue. YYY implement as sysctl. 1016 */ 1017 void 1018 lwkt_signal(lwkt_wait_t w, int count) 1019 { 1020 thread_t td; 1021 int count; 1022 1023 lwkt_gettoken(&w->wa_token); 1024 ++w->wa_gen; 1025 if (count < 0) 1026 count = w->wa_count; 1027 while ((td = TAILQ_FIRST(&w->wa_waitq)) != NULL && count) { 1028 --count; 1029 --w->wa_count; 1030 TAILQ_REMOVE(&w->wa_waitq, td, td_threadq); 1031 td->td_wait = NULL; 1032 td->td_wmesg = NULL; 1033 if (td->td_gd == mycpu) { 1034 _lwkt_enqueue(td); 1035 } else { 1036 lwkt_send_ipiq(td->td_gd->gd_cpuid, (ipifunc_t)lwkt_schedule, td); 1037 } 1038 lwkt_regettoken(&w->wa_token); 1039 } 1040 lwkt_reltoken(&w->wa_token); 1041 } 1042 1043 #endif 1044 1045 /* 1046 * Acquire ownership of a token 1047 * 1048 * Acquire ownership of a token. The token may have spl and/or critical 1049 * section side effects, depending on its purpose. These side effects 1050 * guarentee that you will maintain ownership of the token as long as you 1051 * do not block. If you block you may lose access to the token (but you 1052 * must still release it even if you lose your access to it). 1053 * 1054 * YYY for now we use a critical section to prevent IPIs from taking away 1055 * a token, but do we really only need to disable IPIs ? 1056 * 1057 * YYY certain tokens could be made to act like mutexes when performance 1058 * would be better (e.g. t_cpu == -1). This is not yet implemented. 1059 * 1060 * YYY the tokens replace 4.x's simplelocks for the most part, but this 1061 * means that 4.x does not expect a switch so for now we cannot switch 1062 * when waiting for an IPI to be returned. 1063 * 1064 * YYY If the token is owned by another cpu we may have to send an IPI to 1065 * it and then block. The IPI causes the token to be given away to the 1066 * requesting cpu, unless it has already changed hands. Since only the 1067 * current cpu can give away a token it owns we do not need a memory barrier. 1068 * This needs serious optimization. 1069 */ 1070 1071 #ifdef SMP 1072 1073 static 1074 void 1075 lwkt_gettoken_remote(void *arg) 1076 { 1077 lwkt_gettoken_req *req = arg; 1078 if (req->tok->t_cpu == mycpu->gd_cpuid) { 1079 #ifdef INVARIANTS 1080 if (token_debug) 1081 printf("GT(%d,%d) ", req->tok->t_cpu, req->cpu); 1082 #endif 1083 req->tok->t_cpu = req->cpu; 1084 req->tok->t_reqcpu = req->cpu; /* YYY leave owned by target cpu */ 1085 /* else set reqcpu to point to current cpu for release */ 1086 } 1087 } 1088 1089 #endif 1090 1091 int 1092 lwkt_gettoken(lwkt_token_t tok) 1093 { 1094 /* 1095 * Prevent preemption so the token can't be taken away from us once 1096 * we gain ownership of it. Use a synchronous request which might 1097 * block. The request will be forwarded as necessary playing catchup 1098 * to the token. 1099 */ 1100 1101 crit_enter(); 1102 #ifdef INVARIANTS 1103 if (curthread->td_pri > 1800) { 1104 printf("lwkt_gettoken: %p called from %p: crit sect nesting warning\n", 1105 tok, ((int **)&tok)[-1]); 1106 } 1107 if (curthread->td_pri > 2000) { 1108 curthread->td_pri = 1000; 1109 panic("too HIGH!"); 1110 } 1111 #endif 1112 #ifdef SMP 1113 while (tok->t_cpu != mycpu->gd_cpuid) { 1114 struct lwkt_gettoken_req req; 1115 int seq; 1116 int dcpu; 1117 1118 req.cpu = mycpu->gd_cpuid; 1119 req.tok = tok; 1120 dcpu = (volatile int)tok->t_cpu; 1121 KKASSERT(dcpu >= 0 && dcpu < ncpus); 1122 #ifdef INVARIANTS 1123 if (token_debug) 1124 printf("REQT%d ", dcpu); 1125 #endif 1126 seq = lwkt_send_ipiq(dcpu, lwkt_gettoken_remote, &req); 1127 lwkt_wait_ipiq(dcpu, seq); 1128 #ifdef INVARIANTS 1129 if (token_debug) 1130 printf("REQR%d ", tok->t_cpu); 1131 #endif 1132 } 1133 #endif 1134 /* 1135 * leave us in a critical section on return. This will be undone 1136 * by lwkt_reltoken(). Bump the generation number. 1137 */ 1138 return(++tok->t_gen); 1139 } 1140 1141 /* 1142 * Attempt to acquire ownership of a token. Returns 1 on success, 0 on 1143 * failure. 1144 */ 1145 int 1146 lwkt_trytoken(lwkt_token_t tok) 1147 { 1148 crit_enter(); 1149 #ifdef SMP 1150 if (tok->t_cpu != mycpu->gd_cpuid) { 1151 crit_exit(); 1152 return(0); 1153 } 1154 #endif 1155 /* leave us in the critical section */ 1156 ++tok->t_gen; 1157 return(1); 1158 } 1159 1160 /* 1161 * Release your ownership of a token. Releases must occur in reverse 1162 * order to aquisitions, eventually so priorities can be unwound properly 1163 * like SPLs. At the moment the actual implemention doesn't care. 1164 * 1165 * We can safely hand a token that we own to another cpu without notifying 1166 * it, but once we do we can't get it back without requesting it (unless 1167 * the other cpu hands it back to us before we check). 1168 * 1169 * We might have lost the token, so check that. 1170 * 1171 * Return the token's generation number. The number is useful to callers 1172 * who may want to know if the token was stolen during potential blockages. 1173 */ 1174 int 1175 lwkt_reltoken(lwkt_token_t tok) 1176 { 1177 int gen; 1178 1179 if (tok->t_cpu == mycpu->gd_cpuid) { 1180 tok->t_cpu = tok->t_reqcpu; 1181 } 1182 gen = tok->t_gen; 1183 crit_exit(); 1184 return(gen); 1185 } 1186 1187 /* 1188 * Reacquire a token that might have been lost. 0 is returned if the 1189 * generation has not changed (nobody stole the token from us), -1 is 1190 * returned otherwise. The token is reacquired regardless but the 1191 * generation number is not bumped further if we already own the token. 1192 * 1193 * For efficiency we inline the best-case situation for lwkt_regettoken() 1194 * (i.e .we still own the token). 1195 */ 1196 int 1197 lwkt_gentoken(lwkt_token_t tok, int *gen) 1198 { 1199 if (tok->t_cpu == mycpu->gd_cpuid && tok->t_gen == *gen) 1200 return(0); 1201 *gen = lwkt_regettoken(tok); 1202 return(-1); 1203 } 1204 1205 /* 1206 * Re-acquire a token that might have been lost. The generation number 1207 * is bumped and returned regardless of whether the token had been lost 1208 * or not (because we only have cpu granularity we have to bump the token 1209 * either way). 1210 */ 1211 int 1212 lwkt_regettoken(lwkt_token_t tok) 1213 { 1214 /* assert we are in a critical section */ 1215 if (tok->t_cpu != mycpu->gd_cpuid) { 1216 #ifdef SMP 1217 while (tok->t_cpu != mycpu->gd_cpuid) { 1218 struct lwkt_gettoken_req req; 1219 int seq; 1220 int dcpu; 1221 1222 req.cpu = mycpu->gd_cpuid; 1223 req.tok = tok; 1224 dcpu = (volatile int)tok->t_cpu; 1225 KKASSERT(dcpu >= 0 && dcpu < ncpus); 1226 #ifdef INVARIANTS 1227 if (token_debug) 1228 printf("REQT%d ", dcpu); 1229 #endif 1230 seq = lwkt_send_ipiq(dcpu, lwkt_gettoken_remote, &req); 1231 lwkt_wait_ipiq(dcpu, seq); 1232 #ifdef INVARIATNS 1233 if (token_debug) 1234 printf("REQR%d ", tok->t_cpu); 1235 #endif 1236 } 1237 #endif 1238 } 1239 ++tok->t_gen; 1240 return(tok->t_gen); 1241 } 1242 1243 void 1244 lwkt_inittoken(lwkt_token_t tok) 1245 { 1246 /* 1247 * Zero structure and set cpu owner and reqcpu to cpu 0. 1248 */ 1249 bzero(tok, sizeof(*tok)); 1250 } 1251 1252 /* 1253 * Create a kernel process/thread/whatever. It shares it's address space 1254 * with proc0 - ie: kernel only. 1255 * 1256 * NOTE! By default new threads are created with the MP lock held. A 1257 * thread which does not require the MP lock should release it by calling 1258 * rel_mplock() at the start of the new thread. 1259 */ 1260 int 1261 lwkt_create(void (*func)(void *), void *arg, 1262 struct thread **tdp, thread_t template, int tdflags, int cpu, 1263 const char *fmt, ...) 1264 { 1265 thread_t td; 1266 __va_list ap; 1267 1268 td = lwkt_alloc_thread(template, cpu); 1269 if (tdp) 1270 *tdp = td; 1271 cpu_set_thread_handler(td, lwkt_exit, func, arg); 1272 td->td_flags |= TDF_VERBOSE | tdflags; 1273 #ifdef SMP 1274 td->td_mpcount = 1; 1275 #endif 1276 1277 /* 1278 * Set up arg0 for 'ps' etc 1279 */ 1280 __va_start(ap, fmt); 1281 vsnprintf(td->td_comm, sizeof(td->td_comm), fmt, ap); 1282 __va_end(ap); 1283 1284 /* 1285 * Schedule the thread to run 1286 */ 1287 if ((td->td_flags & TDF_STOPREQ) == 0) 1288 lwkt_schedule(td); 1289 else 1290 td->td_flags &= ~TDF_STOPREQ; 1291 return 0; 1292 } 1293 1294 /* 1295 * kthread_* is specific to the kernel and is not needed by userland. 1296 */ 1297 #ifdef _KERNEL 1298 1299 /* 1300 * Destroy an LWKT thread. Warning! This function is not called when 1301 * a process exits, cpu_proc_exit() directly calls cpu_thread_exit() and 1302 * uses a different reaping mechanism. 1303 */ 1304 void 1305 lwkt_exit(void) 1306 { 1307 thread_t td = curthread; 1308 1309 if (td->td_flags & TDF_VERBOSE) 1310 printf("kthread %p %s has exited\n", td, td->td_comm); 1311 crit_enter(); 1312 lwkt_deschedule_self(); 1313 ++mycpu->gd_tdfreecount; 1314 TAILQ_INSERT_TAIL(&mycpu->gd_tdfreeq, td, td_threadq); 1315 cpu_thread_exit(); 1316 } 1317 1318 /* 1319 * Create a kernel process/thread/whatever. It shares it's address space 1320 * with proc0 - ie: kernel only. 5.x compatible. 1321 * 1322 * NOTE! By default kthreads are created with the MP lock held. A 1323 * thread which does not require the MP lock should release it by calling 1324 * rel_mplock() at the start of the new thread. 1325 */ 1326 int 1327 kthread_create(void (*func)(void *), void *arg, 1328 struct thread **tdp, const char *fmt, ...) 1329 { 1330 thread_t td; 1331 __va_list ap; 1332 1333 td = lwkt_alloc_thread(NULL, -1); 1334 if (tdp) 1335 *tdp = td; 1336 cpu_set_thread_handler(td, kthread_exit, func, arg); 1337 td->td_flags |= TDF_VERBOSE; 1338 #ifdef SMP 1339 td->td_mpcount = 1; 1340 #endif 1341 1342 /* 1343 * Set up arg0 for 'ps' etc 1344 */ 1345 __va_start(ap, fmt); 1346 vsnprintf(td->td_comm, sizeof(td->td_comm), fmt, ap); 1347 __va_end(ap); 1348 1349 /* 1350 * Schedule the thread to run 1351 */ 1352 lwkt_schedule(td); 1353 return 0; 1354 } 1355 1356 /* 1357 * Destroy an LWKT thread. Warning! This function is not called when 1358 * a process exits, cpu_proc_exit() directly calls cpu_thread_exit() and 1359 * uses a different reaping mechanism. 1360 * 1361 * XXX duplicates lwkt_exit() 1362 */ 1363 void 1364 kthread_exit(void) 1365 { 1366 lwkt_exit(); 1367 } 1368 1369 #endif /* _KERNEL */ 1370 1371 void 1372 crit_panic(void) 1373 { 1374 thread_t td = curthread; 1375 int lpri = td->td_pri; 1376 1377 td->td_pri = 0; 1378 panic("td_pri is/would-go negative! %p %d", td, lpri); 1379 } 1380 1381 #ifdef SMP 1382 1383 /* 1384 * Send a function execution request to another cpu. The request is queued 1385 * on the cpu<->cpu ipiq matrix. Each cpu owns a unique ipiq FIFO for every 1386 * possible target cpu. The FIFO can be written. 1387 * 1388 * YYY If the FIFO fills up we have to enable interrupts and process the 1389 * IPIQ while waiting for it to empty or we may deadlock with another cpu. 1390 * Create a CPU_*() function to do this! 1391 * 1392 * We can safely bump gd_intr_nesting_level because our crit_exit() at the 1393 * end will take care of any pending interrupts. 1394 * 1395 * Must be called from a critical section. 1396 */ 1397 int 1398 lwkt_send_ipiq(int dcpu, ipifunc_t func, void *arg) 1399 { 1400 lwkt_ipiq_t ip; 1401 int windex; 1402 struct globaldata *gd = mycpu; 1403 1404 if (dcpu == gd->gd_cpuid) { 1405 func(arg); 1406 return(0); 1407 } 1408 crit_enter(); 1409 ++gd->gd_intr_nesting_level; 1410 #ifdef INVARIANTS 1411 if (gd->gd_intr_nesting_level > 20) 1412 panic("lwkt_send_ipiq: TOO HEAVILY NESTED!"); 1413 #endif 1414 KKASSERT(curthread->td_pri >= TDPRI_CRIT); 1415 KKASSERT(dcpu >= 0 && dcpu < ncpus); 1416 ++ipiq_count; 1417 ip = &gd->gd_ipiq[dcpu]; 1418 1419 /* 1420 * We always drain before the FIFO becomes full so it should never 1421 * become full. We need to leave enough entries to deal with 1422 * reentrancy. 1423 */ 1424 KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO); 1425 windex = ip->ip_windex & MAXCPUFIFO_MASK; 1426 ip->ip_func[windex] = func; 1427 ip->ip_arg[windex] = arg; 1428 /* YYY memory barrier */ 1429 ++ip->ip_windex; 1430 if (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) { 1431 unsigned int eflags = read_eflags(); 1432 cpu_enable_intr(); 1433 ++ipiq_fifofull; 1434 while (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 4) { 1435 KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1); 1436 lwkt_process_ipiq(); 1437 } 1438 write_eflags(eflags); 1439 } 1440 --gd->gd_intr_nesting_level; 1441 cpu_send_ipiq(dcpu); /* issues memory barrier if appropriate */ 1442 crit_exit(); 1443 return(ip->ip_windex); 1444 } 1445 1446 /* 1447 * Send a message to several target cpus. Typically used for scheduling. 1448 * The message will not be sent to stopped cpus. 1449 */ 1450 void 1451 lwkt_send_ipiq_mask(u_int32_t mask, ipifunc_t func, void *arg) 1452 { 1453 int cpuid; 1454 1455 mask &= ~stopped_cpus; 1456 while (mask) { 1457 cpuid = bsfl(mask); 1458 lwkt_send_ipiq(cpuid, func, arg); 1459 mask &= ~(1 << cpuid); 1460 } 1461 } 1462 1463 /* 1464 * Wait for the remote cpu to finish processing a function. 1465 * 1466 * YYY we have to enable interrupts and process the IPIQ while waiting 1467 * for it to empty or we may deadlock with another cpu. Create a CPU_*() 1468 * function to do this! YYY we really should 'block' here. 1469 * 1470 * Must be called from a critical section. Thsi routine may be called 1471 * from an interrupt (for example, if an interrupt wakes a foreign thread 1472 * up). 1473 */ 1474 void 1475 lwkt_wait_ipiq(int dcpu, int seq) 1476 { 1477 lwkt_ipiq_t ip; 1478 int maxc = 100000000; 1479 1480 if (dcpu != mycpu->gd_cpuid) { 1481 KKASSERT(dcpu >= 0 && dcpu < ncpus); 1482 ip = &mycpu->gd_ipiq[dcpu]; 1483 if ((int)(ip->ip_xindex - seq) < 0) { 1484 unsigned int eflags = read_eflags(); 1485 cpu_enable_intr(); 1486 while ((int)(ip->ip_xindex - seq) < 0) { 1487 lwkt_process_ipiq(); 1488 if (--maxc == 0) 1489 printf("LWKT_WAIT_IPIQ WARNING! %d wait %d (%d)\n", mycpu->gd_cpuid, dcpu, ip->ip_xindex - seq); 1490 if (maxc < -1000000) 1491 panic("LWKT_WAIT_IPIQ"); 1492 } 1493 write_eflags(eflags); 1494 } 1495 } 1496 } 1497 1498 /* 1499 * Called from IPI interrupt (like a fast interrupt), which has placed 1500 * us in a critical section. The MP lock may or may not be held. 1501 * May also be called from doreti or splz, or be reentrantly called 1502 * indirectly through the ip_func[] we run. 1503 */ 1504 void 1505 lwkt_process_ipiq(void) 1506 { 1507 int n; 1508 int cpuid = mycpu->gd_cpuid; 1509 1510 for (n = 0; n < ncpus; ++n) { 1511 lwkt_ipiq_t ip; 1512 int ri; 1513 1514 if (n == cpuid) 1515 continue; 1516 ip = globaldata_find(n)->gd_ipiq; 1517 if (ip == NULL) 1518 continue; 1519 ip = &ip[cpuid]; 1520 1521 /* 1522 * Note: xindex is only updated after we are sure the function has 1523 * finished execution. Beware lwkt_process_ipiq() reentrancy! The 1524 * function may send an IPI which may block/drain. 1525 */ 1526 while (ip->ip_rindex != ip->ip_windex) { 1527 ri = ip->ip_rindex & MAXCPUFIFO_MASK; 1528 ++ip->ip_rindex; 1529 ip->ip_func[ri](ip->ip_arg[ri]); 1530 /* YYY memory barrier */ 1531 ip->ip_xindex = ip->ip_rindex; 1532 } 1533 } 1534 } 1535 1536 #else 1537 1538 int 1539 lwkt_send_ipiq(int dcpu, ipifunc_t func, void *arg) 1540 { 1541 panic("lwkt_send_ipiq: UP box! (%d,%p,%p)", dcpu, func, arg); 1542 return(0); /* NOT REACHED */ 1543 } 1544 1545 void 1546 lwkt_wait_ipiq(int dcpu, int seq) 1547 { 1548 panic("lwkt_wait_ipiq: UP box! (%d,%d)", dcpu, seq); 1549 } 1550 1551 #endif 1552