1 /* 2 * Copyright (c) 2003,2004 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/kern/lwkt_ipiq.c,v 1.11 2005/06/03 23:57:32 dillon Exp $ 35 */ 36 37 /* 38 * This module implements IPI message queueing and the MI portion of IPI 39 * message processing. 40 */ 41 42 #ifdef _KERNEL 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/kernel.h> 47 #include <sys/proc.h> 48 #include <sys/rtprio.h> 49 #include <sys/queue.h> 50 #include <sys/thread2.h> 51 #include <sys/sysctl.h> 52 #include <sys/kthread.h> 53 #include <machine/cpu.h> 54 #include <sys/lock.h> 55 #include <sys/caps.h> 56 57 #include <vm/vm.h> 58 #include <vm/vm_param.h> 59 #include <vm/vm_kern.h> 60 #include <vm/vm_object.h> 61 #include <vm/vm_page.h> 62 #include <vm/vm_map.h> 63 #include <vm/vm_pager.h> 64 #include <vm/vm_extern.h> 65 #include <vm/vm_zone.h> 66 67 #include <machine/stdarg.h> 68 #include <machine/ipl.h> 69 #include <machine/smp.h> 70 #include <machine/atomic.h> 71 72 #define THREAD_STACK (UPAGES * PAGE_SIZE) 73 74 #else 75 76 #include <sys/stdint.h> 77 #include <libcaps/thread.h> 78 #include <sys/thread.h> 79 #include <sys/msgport.h> 80 #include <sys/errno.h> 81 #include <libcaps/globaldata.h> 82 #include <machine/cpufunc.h> 83 #include <sys/thread2.h> 84 #include <sys/msgport2.h> 85 #include <stdio.h> 86 #include <stdlib.h> 87 #include <string.h> 88 #include <machine/lock.h> 89 #include <machine/cpu.h> 90 #include <machine/atomic.h> 91 92 #endif 93 94 #ifdef SMP 95 static __int64_t ipiq_count; /* total calls to lwkt_send_ipiq*() */ 96 static __int64_t ipiq_fifofull; /* number of fifo full conditions detected */ 97 static __int64_t ipiq_avoided; /* interlock with target avoids cpu ipi */ 98 static __int64_t ipiq_passive; /* passive IPI messages */ 99 static __int64_t ipiq_cscount; /* number of cpu synchronizations */ 100 static int ipiq_optimized = 1; /* XXX temporary sysctl */ 101 #endif 102 103 #ifdef _KERNEL 104 105 #ifdef SMP 106 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_count, CTLFLAG_RW, &ipiq_count, 0, ""); 107 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_fifofull, CTLFLAG_RW, &ipiq_fifofull, 0, ""); 108 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_avoided, CTLFLAG_RW, &ipiq_avoided, 0, ""); 109 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_passive, CTLFLAG_RW, &ipiq_passive, 0, ""); 110 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_cscount, CTLFLAG_RW, &ipiq_cscount, 0, ""); 111 SYSCTL_INT(_lwkt, OID_AUTO, ipiq_optimized, CTLFLAG_RW, &ipiq_optimized, 0, ""); 112 #endif 113 114 #endif 115 116 #ifdef SMP 117 118 static int lwkt_process_ipiq1(lwkt_ipiq_t ip, struct intrframe *frame); 119 static void lwkt_cpusync_remote1(lwkt_cpusync_t poll); 120 static void lwkt_cpusync_remote2(lwkt_cpusync_t poll); 121 122 /* 123 * Send a function execution request to another cpu. The request is queued 124 * on the cpu<->cpu ipiq matrix. Each cpu owns a unique ipiq FIFO for every 125 * possible target cpu. The FIFO can be written. 126 * 127 * If the FIFO fills up we have to enable interrupts to avoid an APIC 128 * deadlock and process pending IPIQs while waiting for it to empty. 129 * Otherwise we may soft-deadlock with another cpu whos FIFO is also full. 130 * 131 * We can safely bump gd_intr_nesting_level because our crit_exit() at the 132 * end will take care of any pending interrupts. 133 * 134 * The actual hardware IPI is avoided if the target cpu is already processing 135 * the queue from a prior IPI. It is possible to pipeline IPI messages 136 * very quickly between cpus due to the FIFO hysteresis. 137 * 138 * Need not be called from a critical section. 139 */ 140 int 141 lwkt_send_ipiq(globaldata_t target, ipifunc_t func, void *arg) 142 { 143 lwkt_ipiq_t ip; 144 int windex; 145 struct globaldata *gd = mycpu; 146 147 if (target == gd) { 148 func(arg); 149 return(0); 150 } 151 crit_enter(); 152 ++gd->gd_intr_nesting_level; 153 #ifdef INVARIANTS 154 if (gd->gd_intr_nesting_level > 20) 155 panic("lwkt_send_ipiq: TOO HEAVILY NESTED!"); 156 #endif 157 KKASSERT(curthread->td_pri >= TDPRI_CRIT); 158 ++ipiq_count; 159 ip = &gd->gd_ipiq[target->gd_cpuid]; 160 161 /* 162 * Do not allow the FIFO to become full. Interrupts must be physically 163 * enabled while we liveloop to avoid deadlocking the APIC. 164 */ 165 if (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) { 166 unsigned int eflags = read_eflags(); 167 168 if (atomic_poll_acquire_int(&ip->ip_npoll) || ipiq_optimized == 0) 169 cpu_send_ipiq(target->gd_cpuid); 170 cpu_enable_intr(); 171 ++ipiq_fifofull; 172 while (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 4) { 173 KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1); 174 lwkt_process_ipiq(); 175 } 176 write_eflags(eflags); 177 } 178 179 /* 180 * Queue the new message 181 */ 182 windex = ip->ip_windex & MAXCPUFIFO_MASK; 183 ip->ip_func[windex] = (ipifunc2_t)func; 184 ip->ip_arg[windex] = arg; 185 cpu_sfence(); 186 ++ip->ip_windex; 187 --gd->gd_intr_nesting_level; 188 189 /* 190 * signal the target cpu that there is work pending. 191 */ 192 if (atomic_poll_acquire_int(&ip->ip_npoll)) { 193 cpu_send_ipiq(target->gd_cpuid); 194 } else { 195 if (ipiq_optimized == 0) 196 cpu_send_ipiq(target->gd_cpuid); 197 ++ipiq_avoided; 198 } 199 crit_exit(); 200 return(ip->ip_windex); 201 } 202 203 /* 204 * Similar to lwkt_send_ipiq() but this function does not actually initiate 205 * the IPI to the target cpu unless the FIFO has become too full, so it is 206 * very fast. 207 * 208 * This function is used for non-critical IPI messages, such as memory 209 * deallocations. The queue will typically be flushed by the target cpu at 210 * the next clock interrupt. 211 * 212 * Need not be called from a critical section. 213 */ 214 int 215 lwkt_send_ipiq_passive(globaldata_t target, ipifunc_t func, void *arg) 216 { 217 lwkt_ipiq_t ip; 218 int windex; 219 struct globaldata *gd = mycpu; 220 221 KKASSERT(target != gd); 222 crit_enter(); 223 ++gd->gd_intr_nesting_level; 224 #ifdef INVARIANTS 225 if (gd->gd_intr_nesting_level > 20) 226 panic("lwkt_send_ipiq: TOO HEAVILY NESTED!"); 227 #endif 228 KKASSERT(curthread->td_pri >= TDPRI_CRIT); 229 ++ipiq_count; 230 ++ipiq_passive; 231 ip = &gd->gd_ipiq[target->gd_cpuid]; 232 233 /* 234 * Do not allow the FIFO to become full. Interrupts must be physically 235 * enabled while we liveloop to avoid deadlocking the APIC. 236 */ 237 if (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) { 238 unsigned int eflags = read_eflags(); 239 240 if (atomic_poll_acquire_int(&ip->ip_npoll) || ipiq_optimized == 0) 241 cpu_send_ipiq(target->gd_cpuid); 242 cpu_enable_intr(); 243 ++ipiq_fifofull; 244 while (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 4) { 245 KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1); 246 lwkt_process_ipiq(); 247 } 248 write_eflags(eflags); 249 } 250 251 /* 252 * Queue the new message 253 */ 254 windex = ip->ip_windex & MAXCPUFIFO_MASK; 255 ip->ip_func[windex] = (ipifunc2_t)func; 256 ip->ip_arg[windex] = arg; 257 cpu_sfence(); 258 ++ip->ip_windex; 259 --gd->gd_intr_nesting_level; 260 261 /* 262 * Do not signal the target cpu, it will pick up the IPI when it next 263 * polls (typically on the next tick). 264 */ 265 crit_exit(); 266 return(ip->ip_windex); 267 } 268 269 /* 270 * Send an IPI request without blocking, return 0 on success, ENOENT on 271 * failure. The actual queueing of the hardware IPI may still force us 272 * to spin and process incoming IPIs but that will eventually go away 273 * when we've gotten rid of the other general IPIs. 274 */ 275 int 276 lwkt_send_ipiq_nowait(globaldata_t target, ipifunc_t func, void *arg) 277 { 278 lwkt_ipiq_t ip; 279 int windex; 280 struct globaldata *gd = mycpu; 281 282 KKASSERT(curthread->td_pri >= TDPRI_CRIT); 283 if (target == gd) { 284 func(arg); 285 return(0); 286 } 287 ++ipiq_count; 288 ip = &gd->gd_ipiq[target->gd_cpuid]; 289 290 if (ip->ip_windex - ip->ip_rindex >= MAXCPUFIFO * 2 / 3) 291 return(ENOENT); 292 windex = ip->ip_windex & MAXCPUFIFO_MASK; 293 ip->ip_func[windex] = (ipifunc2_t)func; 294 ip->ip_arg[windex] = arg; 295 cpu_sfence(); 296 ++ip->ip_windex; 297 298 /* 299 * This isn't a passive IPI, we still have to signal the target cpu. 300 */ 301 if (atomic_poll_acquire_int(&ip->ip_npoll)) { 302 cpu_send_ipiq(target->gd_cpuid); 303 } else { 304 if (ipiq_optimized == 0) 305 cpu_send_ipiq(target->gd_cpuid); 306 ++ipiq_avoided; 307 } 308 return(0); 309 } 310 311 /* 312 * deprecated, used only by fast int forwarding. 313 */ 314 int 315 lwkt_send_ipiq_bycpu(int dcpu, ipifunc_t func, void *arg) 316 { 317 return(lwkt_send_ipiq(globaldata_find(dcpu), func, arg)); 318 } 319 320 /* 321 * Send a message to several target cpus. Typically used for scheduling. 322 * The message will not be sent to stopped cpus. 323 */ 324 int 325 lwkt_send_ipiq_mask(u_int32_t mask, ipifunc_t func, void *arg) 326 { 327 int cpuid; 328 int count = 0; 329 330 mask &= ~stopped_cpus; 331 while (mask) { 332 cpuid = bsfl(mask); 333 lwkt_send_ipiq(globaldata_find(cpuid), func, arg); 334 mask &= ~(1 << cpuid); 335 ++count; 336 } 337 return(count); 338 } 339 340 /* 341 * Wait for the remote cpu to finish processing a function. 342 * 343 * YYY we have to enable interrupts and process the IPIQ while waiting 344 * for it to empty or we may deadlock with another cpu. Create a CPU_*() 345 * function to do this! YYY we really should 'block' here. 346 * 347 * MUST be called from a critical section. This routine may be called 348 * from an interrupt (for example, if an interrupt wakes a foreign thread 349 * up). 350 */ 351 void 352 lwkt_wait_ipiq(globaldata_t target, int seq) 353 { 354 lwkt_ipiq_t ip; 355 int maxc = 100000000; 356 357 if (target != mycpu) { 358 ip = &mycpu->gd_ipiq[target->gd_cpuid]; 359 if ((int)(ip->ip_xindex - seq) < 0) { 360 unsigned int eflags = read_eflags(); 361 cpu_enable_intr(); 362 while ((int)(ip->ip_xindex - seq) < 0) { 363 crit_enter(); 364 lwkt_process_ipiq(); 365 crit_exit(); 366 if (--maxc == 0) 367 printf("LWKT_WAIT_IPIQ WARNING! %d wait %d (%d)\n", mycpu->gd_cpuid, target->gd_cpuid, ip->ip_xindex - seq); 368 if (maxc < -1000000) 369 panic("LWKT_WAIT_IPIQ"); 370 /* 371 * xindex may be modified by another cpu, use a load fence 372 * to ensure that the loop does not use a speculative value 373 * (which may improve performance). 374 */ 375 cpu_lfence(); 376 } 377 write_eflags(eflags); 378 } 379 } 380 } 381 382 int 383 lwkt_seq_ipiq(globaldata_t target) 384 { 385 lwkt_ipiq_t ip; 386 387 ip = &mycpu->gd_ipiq[target->gd_cpuid]; 388 return(ip->ip_windex); 389 } 390 391 /* 392 * Called from IPI interrupt (like a fast interrupt), which has placed 393 * us in a critical section. The MP lock may or may not be held. 394 * May also be called from doreti or splz, or be reentrantly called 395 * indirectly through the ip_func[] we run. 396 * 397 * There are two versions, one where no interrupt frame is available (when 398 * called from the send code and from splz, and one where an interrupt 399 * frame is available. 400 */ 401 void 402 lwkt_process_ipiq(void) 403 { 404 globaldata_t gd = mycpu; 405 lwkt_ipiq_t ip; 406 int n; 407 408 again: 409 for (n = 0; n < ncpus; ++n) { 410 if (n != gd->gd_cpuid) { 411 ip = globaldata_find(n)->gd_ipiq; 412 if (ip != NULL) { 413 while (lwkt_process_ipiq1(&ip[gd->gd_cpuid], NULL)) 414 ; 415 } 416 } 417 } 418 if (gd->gd_cpusyncq.ip_rindex != gd->gd_cpusyncq.ip_windex) { 419 if (lwkt_process_ipiq1(&gd->gd_cpusyncq, NULL)) { 420 if (gd->gd_curthread->td_cscount == 0) 421 goto again; 422 need_ipiq(); 423 } 424 } 425 } 426 427 #ifdef _KERNEL 428 void 429 lwkt_process_ipiq_frame(struct intrframe frame) 430 { 431 globaldata_t gd = mycpu; 432 lwkt_ipiq_t ip; 433 int n; 434 435 again: 436 for (n = 0; n < ncpus; ++n) { 437 if (n != gd->gd_cpuid) { 438 ip = globaldata_find(n)->gd_ipiq; 439 if (ip != NULL) { 440 while (lwkt_process_ipiq1(&ip[gd->gd_cpuid], &frame)) 441 ; 442 } 443 } 444 } 445 if (gd->gd_cpusyncq.ip_rindex != gd->gd_cpusyncq.ip_windex) { 446 if (lwkt_process_ipiq1(&gd->gd_cpusyncq, &frame)) { 447 if (gd->gd_curthread->td_cscount == 0) 448 goto again; 449 need_ipiq(); 450 } 451 } 452 } 453 #endif 454 455 static int 456 lwkt_process_ipiq1(lwkt_ipiq_t ip, struct intrframe *frame) 457 { 458 int ri; 459 int wi; 460 461 /* 462 * Obtain the current write index, which is modified by a remote cpu. 463 * Issue a load fence to prevent speculative reads of e.g. data written 464 * by the other cpu prior to it updating the index. 465 */ 466 wi = ip->ip_windex; 467 cpu_lfence(); 468 469 /* 470 * Note: xindex is only updated after we are sure the function has 471 * finished execution. Beware lwkt_process_ipiq() reentrancy! The 472 * function may send an IPI which may block/drain. 473 */ 474 while ((ri = ip->ip_rindex) != wi) { 475 ip->ip_rindex = ri + 1; 476 ri &= MAXCPUFIFO_MASK; 477 ip->ip_func[ri](ip->ip_arg[ri], frame); 478 cpu_sfence(); 479 ip->ip_xindex = ip->ip_rindex; 480 } 481 482 /* 483 * Return non-zero if there are more IPI messages pending on this 484 * ipiq. ip_npoll is left set as long as possible to reduce the 485 * number of IPIs queued by the originating cpu, but must be cleared 486 * *BEFORE* checking windex. 487 */ 488 atomic_poll_release_int(&ip->ip_npoll); 489 return(wi != ip->ip_windex); 490 } 491 492 #else 493 494 /* 495 * !SMP dummy routines 496 */ 497 498 int 499 lwkt_send_ipiq(globaldata_t target, ipifunc_t func, void *arg) 500 { 501 panic("lwkt_send_ipiq: UP box! (%d,%p,%p)", target->gd_cpuid, func, arg); 502 return(0); /* NOT REACHED */ 503 } 504 505 void 506 lwkt_wait_ipiq(globaldata_t target, int seq) 507 { 508 panic("lwkt_wait_ipiq: UP box! (%d,%d)", target->gd_cpuid, seq); 509 } 510 511 #endif 512 513 /* 514 * CPU Synchronization Support 515 * 516 * lwkt_cpusync_simple() 517 * 518 * The function is executed synchronously before return on remote cpus. 519 * A lwkt_cpusync_t pointer is passed as an argument. The data can 520 * be accessed via arg->cs_data. 521 * 522 * XXX should I just pass the data as an argument to be consistent? 523 */ 524 525 void 526 lwkt_cpusync_simple(cpumask_t mask, cpusync_func_t func, void *data) 527 { 528 struct lwkt_cpusync cmd; 529 530 cmd.cs_run_func = NULL; 531 cmd.cs_fin1_func = func; 532 cmd.cs_fin2_func = NULL; 533 cmd.cs_data = data; 534 lwkt_cpusync_start(mask & mycpu->gd_other_cpus, &cmd); 535 if (mask & (1 << mycpu->gd_cpuid)) 536 func(&cmd); 537 lwkt_cpusync_finish(&cmd); 538 } 539 540 /* 541 * lwkt_cpusync_fastdata() 542 * 543 * The function is executed in tandem with return on remote cpus. 544 * The data is directly passed as an argument. Do not pass pointers to 545 * temporary storage as the storage might have 546 * gone poof by the time the target cpu executes 547 * the function. 548 * 549 * At the moment lwkt_cpusync is declared on the stack and we must wait 550 * for all remote cpus to ack in lwkt_cpusync_finish(), but as a future 551 * optimization we should be able to put a counter in the globaldata 552 * structure (if it is not otherwise being used) and just poke it and 553 * return without waiting. XXX 554 */ 555 void 556 lwkt_cpusync_fastdata(cpumask_t mask, cpusync_func2_t func, void *data) 557 { 558 struct lwkt_cpusync cmd; 559 560 cmd.cs_run_func = NULL; 561 cmd.cs_fin1_func = NULL; 562 cmd.cs_fin2_func = func; 563 cmd.cs_data = NULL; 564 lwkt_cpusync_start(mask & mycpu->gd_other_cpus, &cmd); 565 if (mask & (1 << mycpu->gd_cpuid)) 566 func(data); 567 lwkt_cpusync_finish(&cmd); 568 } 569 570 /* 571 * lwkt_cpusync_start() 572 * 573 * Start synchronization with a set of target cpus, return once they are 574 * known to be in a synchronization loop. The target cpus will execute 575 * poll->cs_run_func() IN TANDEM WITH THE RETURN. 576 * 577 * XXX future: add lwkt_cpusync_start_quick() and require a call to 578 * lwkt_cpusync_add() or lwkt_cpusync_wait(), allowing the caller to 579 * potentially absorb the IPI latency doing something useful. 580 */ 581 void 582 lwkt_cpusync_start(cpumask_t mask, lwkt_cpusync_t poll) 583 { 584 globaldata_t gd = mycpu; 585 586 poll->cs_count = 0; 587 poll->cs_mask = mask; 588 #ifdef SMP 589 poll->cs_maxcount = lwkt_send_ipiq_mask( 590 mask & gd->gd_other_cpus & smp_active_mask, 591 (ipifunc_t)lwkt_cpusync_remote1, poll); 592 #endif 593 if (mask & gd->gd_cpumask) { 594 if (poll->cs_run_func) 595 poll->cs_run_func(poll); 596 } 597 #ifdef SMP 598 if (poll->cs_maxcount) { 599 ++ipiq_cscount; 600 ++gd->gd_curthread->td_cscount; 601 while (poll->cs_count != poll->cs_maxcount) { 602 crit_enter(); 603 lwkt_process_ipiq(); 604 crit_exit(); 605 } 606 } 607 #endif 608 } 609 610 void 611 lwkt_cpusync_add(cpumask_t mask, lwkt_cpusync_t poll) 612 { 613 globaldata_t gd = mycpu; 614 #ifdef SMP 615 int count; 616 #endif 617 618 mask &= ~poll->cs_mask; 619 poll->cs_mask |= mask; 620 #ifdef SMP 621 count = lwkt_send_ipiq_mask( 622 mask & gd->gd_other_cpus & smp_active_mask, 623 (ipifunc_t)lwkt_cpusync_remote1, poll); 624 #endif 625 if (mask & gd->gd_cpumask) { 626 if (poll->cs_run_func) 627 poll->cs_run_func(poll); 628 } 629 #ifdef SMP 630 poll->cs_maxcount += count; 631 if (poll->cs_maxcount) { 632 if (poll->cs_maxcount == count) 633 ++gd->gd_curthread->td_cscount; 634 while (poll->cs_count != poll->cs_maxcount) { 635 crit_enter(); 636 lwkt_process_ipiq(); 637 crit_exit(); 638 } 639 } 640 #endif 641 } 642 643 /* 644 * Finish synchronization with a set of target cpus. The target cpus will 645 * execute cs_fin1_func(poll) prior to this function returning, and will 646 * execute cs_fin2_func(data) IN TANDEM WITH THIS FUNCTION'S RETURN. 647 * 648 * If cs_maxcount is non-zero then we are mastering a cpusync with one or 649 * more remote cpus and must account for it in our thread structure. 650 */ 651 void 652 lwkt_cpusync_finish(lwkt_cpusync_t poll) 653 { 654 globaldata_t gd = mycpu; 655 656 poll->cs_count = -1; 657 if (poll->cs_mask & gd->gd_cpumask) { 658 if (poll->cs_fin1_func) 659 poll->cs_fin1_func(poll); 660 if (poll->cs_fin2_func) 661 poll->cs_fin2_func(poll->cs_data); 662 } 663 #ifdef SMP 664 if (poll->cs_maxcount) { 665 while (poll->cs_count != -(poll->cs_maxcount + 1)) { 666 crit_enter(); 667 lwkt_process_ipiq(); 668 crit_exit(); 669 } 670 --gd->gd_curthread->td_cscount; 671 } 672 #endif 673 } 674 675 #ifdef SMP 676 677 /* 678 * helper IPI remote messaging function. 679 * 680 * Called on remote cpu when a new cpu synchronization request has been 681 * sent to us. Execute the run function and adjust cs_count, then requeue 682 * the request so we spin on it. 683 */ 684 static void 685 lwkt_cpusync_remote1(lwkt_cpusync_t poll) 686 { 687 atomic_add_int(&poll->cs_count, 1); 688 if (poll->cs_run_func) 689 poll->cs_run_func(poll); 690 lwkt_cpusync_remote2(poll); 691 } 692 693 /* 694 * helper IPI remote messaging function. 695 * 696 * Poll for the originator telling us to finish. If it hasn't, requeue 697 * our request so we spin on it. When the originator requests that we 698 * finish we execute cs_fin1_func(poll) synchronously and cs_fin2_func(data) 699 * in tandem with the release. 700 */ 701 static void 702 lwkt_cpusync_remote2(lwkt_cpusync_t poll) 703 { 704 if (poll->cs_count < 0) { 705 cpusync_func2_t savef; 706 void *saved; 707 708 if (poll->cs_fin1_func) 709 poll->cs_fin1_func(poll); 710 if (poll->cs_fin2_func) { 711 savef = poll->cs_fin2_func; 712 saved = poll->cs_data; 713 atomic_add_int(&poll->cs_count, -1); 714 savef(saved); 715 } else { 716 atomic_add_int(&poll->cs_count, -1); 717 } 718 } else { 719 globaldata_t gd = mycpu; 720 lwkt_ipiq_t ip; 721 int wi; 722 723 ip = &gd->gd_cpusyncq; 724 wi = ip->ip_windex & MAXCPUFIFO_MASK; 725 ip->ip_func[wi] = (ipifunc2_t)lwkt_cpusync_remote2; 726 ip->ip_arg[wi] = poll; 727 cpu_sfence(); 728 ++ip->ip_windex; 729 } 730 } 731 732 #endif 733