1 /* 2 * Copyright (c) 2003,2004 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/kern/lwkt_ipiq.c,v 1.13 2005/06/21 05:25:17 dillon Exp $ 35 */ 36 37 /* 38 * This module implements IPI message queueing and the MI portion of IPI 39 * message processing. 40 */ 41 42 #ifdef _KERNEL 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/kernel.h> 47 #include <sys/proc.h> 48 #include <sys/rtprio.h> 49 #include <sys/queue.h> 50 #include <sys/thread2.h> 51 #include <sys/sysctl.h> 52 #include <sys/ktr.h> 53 #include <sys/kthread.h> 54 #include <machine/cpu.h> 55 #include <sys/lock.h> 56 #include <sys/caps.h> 57 58 #include <vm/vm.h> 59 #include <vm/vm_param.h> 60 #include <vm/vm_kern.h> 61 #include <vm/vm_object.h> 62 #include <vm/vm_page.h> 63 #include <vm/vm_map.h> 64 #include <vm/vm_pager.h> 65 #include <vm/vm_extern.h> 66 #include <vm/vm_zone.h> 67 68 #include <machine/stdarg.h> 69 #include <machine/ipl.h> 70 #include <machine/smp.h> 71 #include <machine/atomic.h> 72 73 #define THREAD_STACK (UPAGES * PAGE_SIZE) 74 75 #else 76 77 #include <sys/stdint.h> 78 #include <libcaps/thread.h> 79 #include <sys/thread.h> 80 #include <sys/msgport.h> 81 #include <sys/errno.h> 82 #include <libcaps/globaldata.h> 83 #include <machine/cpufunc.h> 84 #include <sys/thread2.h> 85 #include <sys/msgport2.h> 86 #include <stdio.h> 87 #include <stdlib.h> 88 #include <string.h> 89 #include <machine/lock.h> 90 #include <machine/cpu.h> 91 #include <machine/atomic.h> 92 93 #endif 94 95 #ifdef SMP 96 static __int64_t ipiq_count; /* total calls to lwkt_send_ipiq*() */ 97 static __int64_t ipiq_fifofull; /* number of fifo full conditions detected */ 98 static __int64_t ipiq_avoided; /* interlock with target avoids cpu ipi */ 99 static __int64_t ipiq_passive; /* passive IPI messages */ 100 static __int64_t ipiq_cscount; /* number of cpu synchronizations */ 101 static int ipiq_optimized = 1; /* XXX temporary sysctl */ 102 #endif 103 104 #ifdef _KERNEL 105 106 #ifdef SMP 107 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_count, CTLFLAG_RW, &ipiq_count, 0, ""); 108 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_fifofull, CTLFLAG_RW, &ipiq_fifofull, 0, ""); 109 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_avoided, CTLFLAG_RW, &ipiq_avoided, 0, ""); 110 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_passive, CTLFLAG_RW, &ipiq_passive, 0, ""); 111 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_cscount, CTLFLAG_RW, &ipiq_cscount, 0, ""); 112 SYSCTL_INT(_lwkt, OID_AUTO, ipiq_optimized, CTLFLAG_RW, &ipiq_optimized, 0, ""); 113 114 #define IPIQ_STRING "func=%p arg=%p scpu=%d dcpu=%d" 115 #define IPIQ_ARG_SIZE (sizeof(void *) * 2 + sizeof(int) * 2) 116 117 #if !defined(KTR_IPIQ) 118 #define KTR_IPIQ KTR_ALL 119 #endif 120 KTR_INFO_MASTER(ipiq); 121 KTR_INFO(KTR_IPIQ, ipiq, send_norm, 0, IPIQ_STRING, IPIQ_ARG_SIZE); 122 KTR_INFO(KTR_IPIQ, ipiq, send_pasv, 1, IPIQ_STRING, IPIQ_ARG_SIZE); 123 KTR_INFO(KTR_IPIQ, ipiq, send_nbio, 2, IPIQ_STRING, IPIQ_ARG_SIZE); 124 KTR_INFO(KTR_IPIQ, ipiq, send_fail, 3, IPIQ_STRING, IPIQ_ARG_SIZE); 125 KTR_INFO(KTR_IPIQ, ipiq, receive, 4, IPIQ_STRING, IPIQ_ARG_SIZE); 126 127 #define logipiq(name, func, arg, sgd, dgd) \ 128 KTR_LOG(ipiq_ ## name, func, arg, sgd->gd_cpuid, dgd->gd_cpuid) 129 130 #endif /* SMP */ 131 #endif /* KERNEL */ 132 133 #ifdef SMP 134 135 static int lwkt_process_ipiq1(globaldata_t sgd, lwkt_ipiq_t ip, struct intrframe *frame); 136 static void lwkt_cpusync_remote1(lwkt_cpusync_t poll); 137 static void lwkt_cpusync_remote2(lwkt_cpusync_t poll); 138 139 /* 140 * Send a function execution request to another cpu. The request is queued 141 * on the cpu<->cpu ipiq matrix. Each cpu owns a unique ipiq FIFO for every 142 * possible target cpu. The FIFO can be written. 143 * 144 * If the FIFO fills up we have to enable interrupts to avoid an APIC 145 * deadlock and process pending IPIQs while waiting for it to empty. 146 * Otherwise we may soft-deadlock with another cpu whos FIFO is also full. 147 * 148 * We can safely bump gd_intr_nesting_level because our crit_exit() at the 149 * end will take care of any pending interrupts. 150 * 151 * The actual hardware IPI is avoided if the target cpu is already processing 152 * the queue from a prior IPI. It is possible to pipeline IPI messages 153 * very quickly between cpus due to the FIFO hysteresis. 154 * 155 * Need not be called from a critical section. 156 */ 157 int 158 lwkt_send_ipiq(globaldata_t target, ipifunc_t func, void *arg) 159 { 160 lwkt_ipiq_t ip; 161 int windex; 162 struct globaldata *gd = mycpu; 163 164 logipiq(send_norm, func, arg, gd, target); 165 166 if (target == gd) { 167 func(arg); 168 return(0); 169 } 170 crit_enter(); 171 ++gd->gd_intr_nesting_level; 172 #ifdef INVARIANTS 173 if (gd->gd_intr_nesting_level > 20) 174 panic("lwkt_send_ipiq: TOO HEAVILY NESTED!"); 175 #endif 176 KKASSERT(curthread->td_pri >= TDPRI_CRIT); 177 ++ipiq_count; 178 ip = &gd->gd_ipiq[target->gd_cpuid]; 179 180 /* 181 * Do not allow the FIFO to become full. Interrupts must be physically 182 * enabled while we liveloop to avoid deadlocking the APIC. 183 */ 184 if (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) { 185 unsigned int eflags = read_eflags(); 186 187 if (atomic_poll_acquire_int(&ip->ip_npoll) || ipiq_optimized == 0) 188 cpu_send_ipiq(target->gd_cpuid); 189 cpu_enable_intr(); 190 ++ipiq_fifofull; 191 while (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 4) { 192 KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1); 193 lwkt_process_ipiq(); 194 } 195 write_eflags(eflags); 196 } 197 198 /* 199 * Queue the new message 200 */ 201 windex = ip->ip_windex & MAXCPUFIFO_MASK; 202 ip->ip_func[windex] = (ipifunc2_t)func; 203 ip->ip_arg[windex] = arg; 204 cpu_sfence(); 205 ++ip->ip_windex; 206 --gd->gd_intr_nesting_level; 207 208 /* 209 * signal the target cpu that there is work pending. 210 */ 211 if (atomic_poll_acquire_int(&ip->ip_npoll)) { 212 cpu_send_ipiq(target->gd_cpuid); 213 } else { 214 if (ipiq_optimized == 0) 215 cpu_send_ipiq(target->gd_cpuid); 216 ++ipiq_avoided; 217 } 218 crit_exit(); 219 return(ip->ip_windex); 220 } 221 222 /* 223 * Similar to lwkt_send_ipiq() but this function does not actually initiate 224 * the IPI to the target cpu unless the FIFO has become too full, so it is 225 * very fast. 226 * 227 * This function is used for non-critical IPI messages, such as memory 228 * deallocations. The queue will typically be flushed by the target cpu at 229 * the next clock interrupt. 230 * 231 * Need not be called from a critical section. 232 */ 233 int 234 lwkt_send_ipiq_passive(globaldata_t target, ipifunc_t func, void *arg) 235 { 236 lwkt_ipiq_t ip; 237 int windex; 238 struct globaldata *gd = mycpu; 239 240 KKASSERT(target != gd); 241 crit_enter(); 242 logipiq(send_pasv, func, arg, gd, target); 243 ++gd->gd_intr_nesting_level; 244 #ifdef INVARIANTS 245 if (gd->gd_intr_nesting_level > 20) 246 panic("lwkt_send_ipiq: TOO HEAVILY NESTED!"); 247 #endif 248 KKASSERT(curthread->td_pri >= TDPRI_CRIT); 249 ++ipiq_count; 250 ++ipiq_passive; 251 ip = &gd->gd_ipiq[target->gd_cpuid]; 252 253 /* 254 * Do not allow the FIFO to become full. Interrupts must be physically 255 * enabled while we liveloop to avoid deadlocking the APIC. 256 */ 257 if (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) { 258 unsigned int eflags = read_eflags(); 259 260 if (atomic_poll_acquire_int(&ip->ip_npoll) || ipiq_optimized == 0) 261 cpu_send_ipiq(target->gd_cpuid); 262 cpu_enable_intr(); 263 ++ipiq_fifofull; 264 while (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 4) { 265 KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1); 266 lwkt_process_ipiq(); 267 } 268 write_eflags(eflags); 269 } 270 271 /* 272 * Queue the new message 273 */ 274 windex = ip->ip_windex & MAXCPUFIFO_MASK; 275 ip->ip_func[windex] = (ipifunc2_t)func; 276 ip->ip_arg[windex] = arg; 277 cpu_sfence(); 278 ++ip->ip_windex; 279 --gd->gd_intr_nesting_level; 280 281 /* 282 * Do not signal the target cpu, it will pick up the IPI when it next 283 * polls (typically on the next tick). 284 */ 285 crit_exit(); 286 return(ip->ip_windex); 287 } 288 289 /* 290 * Send an IPI request without blocking, return 0 on success, ENOENT on 291 * failure. The actual queueing of the hardware IPI may still force us 292 * to spin and process incoming IPIs but that will eventually go away 293 * when we've gotten rid of the other general IPIs. 294 */ 295 int 296 lwkt_send_ipiq_nowait(globaldata_t target, ipifunc_t func, void *arg) 297 { 298 lwkt_ipiq_t ip; 299 int windex; 300 struct globaldata *gd = mycpu; 301 302 logipiq(send_nbio, func, arg, gd, target); 303 KKASSERT(curthread->td_pri >= TDPRI_CRIT); 304 if (target == gd) { 305 func(arg); 306 return(0); 307 } 308 ++ipiq_count; 309 ip = &gd->gd_ipiq[target->gd_cpuid]; 310 311 if (ip->ip_windex - ip->ip_rindex >= MAXCPUFIFO * 2 / 3) { 312 logipiq(send_fail, func, arg, gd, target); 313 return(ENOENT); 314 } 315 windex = ip->ip_windex & MAXCPUFIFO_MASK; 316 ip->ip_func[windex] = (ipifunc2_t)func; 317 ip->ip_arg[windex] = arg; 318 cpu_sfence(); 319 ++ip->ip_windex; 320 321 /* 322 * This isn't a passive IPI, we still have to signal the target cpu. 323 */ 324 if (atomic_poll_acquire_int(&ip->ip_npoll)) { 325 cpu_send_ipiq(target->gd_cpuid); 326 } else { 327 if (ipiq_optimized == 0) 328 cpu_send_ipiq(target->gd_cpuid); 329 else 330 ++ipiq_avoided; 331 } 332 return(0); 333 } 334 335 /* 336 * deprecated, used only by fast int forwarding. 337 */ 338 int 339 lwkt_send_ipiq_bycpu(int dcpu, ipifunc_t func, void *arg) 340 { 341 return(lwkt_send_ipiq(globaldata_find(dcpu), func, arg)); 342 } 343 344 /* 345 * Send a message to several target cpus. Typically used for scheduling. 346 * The message will not be sent to stopped cpus. 347 */ 348 int 349 lwkt_send_ipiq_mask(u_int32_t mask, ipifunc_t func, void *arg) 350 { 351 int cpuid; 352 int count = 0; 353 354 mask &= ~stopped_cpus; 355 while (mask) { 356 cpuid = bsfl(mask); 357 lwkt_send_ipiq(globaldata_find(cpuid), func, arg); 358 mask &= ~(1 << cpuid); 359 ++count; 360 } 361 return(count); 362 } 363 364 /* 365 * Wait for the remote cpu to finish processing a function. 366 * 367 * YYY we have to enable interrupts and process the IPIQ while waiting 368 * for it to empty or we may deadlock with another cpu. Create a CPU_*() 369 * function to do this! YYY we really should 'block' here. 370 * 371 * MUST be called from a critical section. This routine may be called 372 * from an interrupt (for example, if an interrupt wakes a foreign thread 373 * up). 374 */ 375 void 376 lwkt_wait_ipiq(globaldata_t target, int seq) 377 { 378 lwkt_ipiq_t ip; 379 int maxc = 100000000; 380 381 if (target != mycpu) { 382 ip = &mycpu->gd_ipiq[target->gd_cpuid]; 383 if ((int)(ip->ip_xindex - seq) < 0) { 384 unsigned int eflags = read_eflags(); 385 cpu_enable_intr(); 386 while ((int)(ip->ip_xindex - seq) < 0) { 387 crit_enter(); 388 lwkt_process_ipiq(); 389 crit_exit(); 390 if (--maxc == 0) 391 printf("LWKT_WAIT_IPIQ WARNING! %d wait %d (%d)\n", mycpu->gd_cpuid, target->gd_cpuid, ip->ip_xindex - seq); 392 if (maxc < -1000000) 393 panic("LWKT_WAIT_IPIQ"); 394 /* 395 * xindex may be modified by another cpu, use a load fence 396 * to ensure that the loop does not use a speculative value 397 * (which may improve performance). 398 */ 399 cpu_lfence(); 400 } 401 write_eflags(eflags); 402 } 403 } 404 } 405 406 int 407 lwkt_seq_ipiq(globaldata_t target) 408 { 409 lwkt_ipiq_t ip; 410 411 ip = &mycpu->gd_ipiq[target->gd_cpuid]; 412 return(ip->ip_windex); 413 } 414 415 /* 416 * Called from IPI interrupt (like a fast interrupt), which has placed 417 * us in a critical section. The MP lock may or may not be held. 418 * May also be called from doreti or splz, or be reentrantly called 419 * indirectly through the ip_func[] we run. 420 * 421 * There are two versions, one where no interrupt frame is available (when 422 * called from the send code and from splz, and one where an interrupt 423 * frame is available. 424 */ 425 void 426 lwkt_process_ipiq(void) 427 { 428 globaldata_t gd = mycpu; 429 globaldata_t sgd; 430 lwkt_ipiq_t ip; 431 int n; 432 433 again: 434 for (n = 0; n < ncpus; ++n) { 435 if (n != gd->gd_cpuid) { 436 sgd = globaldata_find(n); 437 ip = sgd->gd_ipiq; 438 if (ip != NULL) { 439 while (lwkt_process_ipiq1(sgd, &ip[gd->gd_cpuid], NULL)) 440 ; 441 } 442 } 443 } 444 if (gd->gd_cpusyncq.ip_rindex != gd->gd_cpusyncq.ip_windex) { 445 if (lwkt_process_ipiq1(gd, &gd->gd_cpusyncq, NULL)) { 446 if (gd->gd_curthread->td_cscount == 0) 447 goto again; 448 need_ipiq(); 449 } 450 } 451 } 452 453 #ifdef _KERNEL 454 void 455 lwkt_process_ipiq_frame(struct intrframe frame) 456 { 457 globaldata_t gd = mycpu; 458 globaldata_t sgd; 459 lwkt_ipiq_t ip; 460 int n; 461 462 again: 463 for (n = 0; n < ncpus; ++n) { 464 if (n != gd->gd_cpuid) { 465 sgd = globaldata_find(n); 466 ip = sgd->gd_ipiq; 467 if (ip != NULL) { 468 while (lwkt_process_ipiq1(sgd, &ip[gd->gd_cpuid], &frame)) 469 ; 470 } 471 } 472 } 473 if (gd->gd_cpusyncq.ip_rindex != gd->gd_cpusyncq.ip_windex) { 474 if (lwkt_process_ipiq1(gd, &gd->gd_cpusyncq, &frame)) { 475 if (gd->gd_curthread->td_cscount == 0) 476 goto again; 477 need_ipiq(); 478 } 479 } 480 } 481 #endif 482 483 static int 484 lwkt_process_ipiq1(globaldata_t sgd, lwkt_ipiq_t ip, struct intrframe *frame) 485 { 486 int ri; 487 int wi; 488 void (*copy_func)(void *data, struct intrframe *frame); 489 void *copy_arg; 490 491 /* 492 * Obtain the current write index, which is modified by a remote cpu. 493 * Issue a load fence to prevent speculative reads of e.g. data written 494 * by the other cpu prior to it updating the index. 495 */ 496 KKASSERT(curthread->td_pri >= TDPRI_CRIT); 497 wi = ip->ip_windex; 498 cpu_lfence(); 499 500 /* 501 * Note: xindex is only updated after we are sure the function has 502 * finished execution. Beware lwkt_process_ipiq() reentrancy! The 503 * function may send an IPI which may block/drain. 504 */ 505 while ((ri = ip->ip_rindex) != wi) { 506 ri &= MAXCPUFIFO_MASK; 507 copy_func = ip->ip_func[ri]; 508 copy_arg = ip->ip_arg[ri]; 509 cpu_mfence(); 510 ++ip->ip_rindex; 511 KKASSERT((ip->ip_rindex & MAXCPUFIFO_MASK) == ((ri + 1) & MAXCPUFIFO_MASK)); 512 logipiq(receive, copy_func, copy_arg, sgd, mycpu); 513 copy_func(copy_arg, frame); 514 cpu_sfence(); 515 ip->ip_xindex = ip->ip_rindex; 516 } 517 518 /* 519 * Return non-zero if there are more IPI messages pending on this 520 * ipiq. ip_npoll is left set as long as possible to reduce the 521 * number of IPIs queued by the originating cpu, but must be cleared 522 * *BEFORE* checking windex. 523 */ 524 atomic_poll_release_int(&ip->ip_npoll); 525 return(wi != ip->ip_windex); 526 } 527 528 #else 529 530 /* 531 * !SMP dummy routines 532 */ 533 534 int 535 lwkt_send_ipiq(globaldata_t target, ipifunc_t func, void *arg) 536 { 537 panic("lwkt_send_ipiq: UP box! (%d,%p,%p)", target->gd_cpuid, func, arg); 538 return(0); /* NOT REACHED */ 539 } 540 541 void 542 lwkt_wait_ipiq(globaldata_t target, int seq) 543 { 544 panic("lwkt_wait_ipiq: UP box! (%d,%d)", target->gd_cpuid, seq); 545 } 546 547 #endif 548 549 /* 550 * CPU Synchronization Support 551 * 552 * lwkt_cpusync_simple() 553 * 554 * The function is executed synchronously before return on remote cpus. 555 * A lwkt_cpusync_t pointer is passed as an argument. The data can 556 * be accessed via arg->cs_data. 557 * 558 * XXX should I just pass the data as an argument to be consistent? 559 */ 560 561 void 562 lwkt_cpusync_simple(cpumask_t mask, cpusync_func_t func, void *data) 563 { 564 struct lwkt_cpusync cmd; 565 566 cmd.cs_run_func = NULL; 567 cmd.cs_fin1_func = func; 568 cmd.cs_fin2_func = NULL; 569 cmd.cs_data = data; 570 lwkt_cpusync_start(mask & mycpu->gd_other_cpus, &cmd); 571 if (mask & (1 << mycpu->gd_cpuid)) 572 func(&cmd); 573 lwkt_cpusync_finish(&cmd); 574 } 575 576 /* 577 * lwkt_cpusync_fastdata() 578 * 579 * The function is executed in tandem with return on remote cpus. 580 * The data is directly passed as an argument. Do not pass pointers to 581 * temporary storage as the storage might have 582 * gone poof by the time the target cpu executes 583 * the function. 584 * 585 * At the moment lwkt_cpusync is declared on the stack and we must wait 586 * for all remote cpus to ack in lwkt_cpusync_finish(), but as a future 587 * optimization we should be able to put a counter in the globaldata 588 * structure (if it is not otherwise being used) and just poke it and 589 * return without waiting. XXX 590 */ 591 void 592 lwkt_cpusync_fastdata(cpumask_t mask, cpusync_func2_t func, void *data) 593 { 594 struct lwkt_cpusync cmd; 595 596 cmd.cs_run_func = NULL; 597 cmd.cs_fin1_func = NULL; 598 cmd.cs_fin2_func = func; 599 cmd.cs_data = NULL; 600 lwkt_cpusync_start(mask & mycpu->gd_other_cpus, &cmd); 601 if (mask & (1 << mycpu->gd_cpuid)) 602 func(data); 603 lwkt_cpusync_finish(&cmd); 604 } 605 606 /* 607 * lwkt_cpusync_start() 608 * 609 * Start synchronization with a set of target cpus, return once they are 610 * known to be in a synchronization loop. The target cpus will execute 611 * poll->cs_run_func() IN TANDEM WITH THE RETURN. 612 * 613 * XXX future: add lwkt_cpusync_start_quick() and require a call to 614 * lwkt_cpusync_add() or lwkt_cpusync_wait(), allowing the caller to 615 * potentially absorb the IPI latency doing something useful. 616 */ 617 void 618 lwkt_cpusync_start(cpumask_t mask, lwkt_cpusync_t poll) 619 { 620 globaldata_t gd = mycpu; 621 622 poll->cs_count = 0; 623 poll->cs_mask = mask; 624 #ifdef SMP 625 poll->cs_maxcount = lwkt_send_ipiq_mask( 626 mask & gd->gd_other_cpus & smp_active_mask, 627 (ipifunc_t)lwkt_cpusync_remote1, poll); 628 #endif 629 if (mask & gd->gd_cpumask) { 630 if (poll->cs_run_func) 631 poll->cs_run_func(poll); 632 } 633 #ifdef SMP 634 if (poll->cs_maxcount) { 635 ++ipiq_cscount; 636 ++gd->gd_curthread->td_cscount; 637 while (poll->cs_count != poll->cs_maxcount) { 638 crit_enter(); 639 lwkt_process_ipiq(); 640 crit_exit(); 641 } 642 } 643 #endif 644 } 645 646 void 647 lwkt_cpusync_add(cpumask_t mask, lwkt_cpusync_t poll) 648 { 649 globaldata_t gd = mycpu; 650 #ifdef SMP 651 int count; 652 #endif 653 654 mask &= ~poll->cs_mask; 655 poll->cs_mask |= mask; 656 #ifdef SMP 657 count = lwkt_send_ipiq_mask( 658 mask & gd->gd_other_cpus & smp_active_mask, 659 (ipifunc_t)lwkt_cpusync_remote1, poll); 660 #endif 661 if (mask & gd->gd_cpumask) { 662 if (poll->cs_run_func) 663 poll->cs_run_func(poll); 664 } 665 #ifdef SMP 666 poll->cs_maxcount += count; 667 if (poll->cs_maxcount) { 668 if (poll->cs_maxcount == count) 669 ++gd->gd_curthread->td_cscount; 670 while (poll->cs_count != poll->cs_maxcount) { 671 crit_enter(); 672 lwkt_process_ipiq(); 673 crit_exit(); 674 } 675 } 676 #endif 677 } 678 679 /* 680 * Finish synchronization with a set of target cpus. The target cpus will 681 * execute cs_fin1_func(poll) prior to this function returning, and will 682 * execute cs_fin2_func(data) IN TANDEM WITH THIS FUNCTION'S RETURN. 683 * 684 * If cs_maxcount is non-zero then we are mastering a cpusync with one or 685 * more remote cpus and must account for it in our thread structure. 686 */ 687 void 688 lwkt_cpusync_finish(lwkt_cpusync_t poll) 689 { 690 globaldata_t gd = mycpu; 691 692 poll->cs_count = -1; 693 if (poll->cs_mask & gd->gd_cpumask) { 694 if (poll->cs_fin1_func) 695 poll->cs_fin1_func(poll); 696 if (poll->cs_fin2_func) 697 poll->cs_fin2_func(poll->cs_data); 698 } 699 #ifdef SMP 700 if (poll->cs_maxcount) { 701 while (poll->cs_count != -(poll->cs_maxcount + 1)) { 702 crit_enter(); 703 lwkt_process_ipiq(); 704 crit_exit(); 705 } 706 --gd->gd_curthread->td_cscount; 707 } 708 #endif 709 } 710 711 #ifdef SMP 712 713 /* 714 * helper IPI remote messaging function. 715 * 716 * Called on remote cpu when a new cpu synchronization request has been 717 * sent to us. Execute the run function and adjust cs_count, then requeue 718 * the request so we spin on it. 719 */ 720 static void 721 lwkt_cpusync_remote1(lwkt_cpusync_t poll) 722 { 723 atomic_add_int(&poll->cs_count, 1); 724 if (poll->cs_run_func) 725 poll->cs_run_func(poll); 726 lwkt_cpusync_remote2(poll); 727 } 728 729 /* 730 * helper IPI remote messaging function. 731 * 732 * Poll for the originator telling us to finish. If it hasn't, requeue 733 * our request so we spin on it. When the originator requests that we 734 * finish we execute cs_fin1_func(poll) synchronously and cs_fin2_func(data) 735 * in tandem with the release. 736 */ 737 static void 738 lwkt_cpusync_remote2(lwkt_cpusync_t poll) 739 { 740 if (poll->cs_count < 0) { 741 cpusync_func2_t savef; 742 void *saved; 743 744 if (poll->cs_fin1_func) 745 poll->cs_fin1_func(poll); 746 if (poll->cs_fin2_func) { 747 savef = poll->cs_fin2_func; 748 saved = poll->cs_data; 749 atomic_add_int(&poll->cs_count, -1); 750 savef(saved); 751 } else { 752 atomic_add_int(&poll->cs_count, -1); 753 } 754 } else { 755 globaldata_t gd = mycpu; 756 lwkt_ipiq_t ip; 757 int wi; 758 759 ip = &gd->gd_cpusyncq; 760 wi = ip->ip_windex & MAXCPUFIFO_MASK; 761 ip->ip_func[wi] = (ipifunc2_t)lwkt_cpusync_remote2; 762 ip->ip_arg[wi] = poll; 763 cpu_sfence(); 764 ++ip->ip_windex; 765 } 766 } 767 768 #endif 769