1 /* 2 * Copyright (c) 2003,2004 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/kern/lwkt_ipiq.c,v 1.19 2006/12/23 00:35:04 swildner Exp $ 35 */ 36 37 /* 38 * This module implements IPI message queueing and the MI portion of IPI 39 * message processing. 40 */ 41 42 #ifdef _KERNEL 43 44 #include "opt_ddb.h" 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/kernel.h> 49 #include <sys/proc.h> 50 #include <sys/rtprio.h> 51 #include <sys/queue.h> 52 #include <sys/thread2.h> 53 #include <sys/sysctl.h> 54 #include <sys/ktr.h> 55 #include <sys/kthread.h> 56 #include <machine/cpu.h> 57 #include <sys/lock.h> 58 #include <sys/caps.h> 59 60 #include <vm/vm.h> 61 #include <vm/vm_param.h> 62 #include <vm/vm_kern.h> 63 #include <vm/vm_object.h> 64 #include <vm/vm_page.h> 65 #include <vm/vm_map.h> 66 #include <vm/vm_pager.h> 67 #include <vm/vm_extern.h> 68 #include <vm/vm_zone.h> 69 70 #include <machine/stdarg.h> 71 #include <machine/smp.h> 72 #include <machine/atomic.h> 73 74 #define THREAD_STACK (UPAGES * PAGE_SIZE) 75 76 #else 77 78 #include <sys/stdint.h> 79 #include <libcaps/thread.h> 80 #include <sys/thread.h> 81 #include <sys/msgport.h> 82 #include <sys/errno.h> 83 #include <libcaps/globaldata.h> 84 #include <machine/cpufunc.h> 85 #include <sys/thread2.h> 86 #include <sys/msgport2.h> 87 #include <stdio.h> 88 #include <stdlib.h> 89 #include <string.h> 90 #include <machine/lock.h> 91 #include <machine/cpu.h> 92 #include <machine/atomic.h> 93 94 #endif 95 96 #ifdef SMP 97 static __int64_t ipiq_count; /* total calls to lwkt_send_ipiq*() */ 98 static __int64_t ipiq_fifofull; /* number of fifo full conditions detected */ 99 static __int64_t ipiq_avoided; /* interlock with target avoids cpu ipi */ 100 static __int64_t ipiq_passive; /* passive IPI messages */ 101 static __int64_t ipiq_cscount; /* number of cpu synchronizations */ 102 static int ipiq_optimized = 1; /* XXX temporary sysctl */ 103 #ifdef PANIC_DEBUG 104 static int panic_ipiq_cpu = -1; 105 static int panic_ipiq_count = 100; 106 #endif 107 #endif 108 109 #ifdef _KERNEL 110 111 #ifdef SMP 112 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_count, CTLFLAG_RW, &ipiq_count, 0, ""); 113 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_fifofull, CTLFLAG_RW, &ipiq_fifofull, 0, ""); 114 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_avoided, CTLFLAG_RW, &ipiq_avoided, 0, ""); 115 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_passive, CTLFLAG_RW, &ipiq_passive, 0, ""); 116 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_cscount, CTLFLAG_RW, &ipiq_cscount, 0, ""); 117 SYSCTL_INT(_lwkt, OID_AUTO, ipiq_optimized, CTLFLAG_RW, &ipiq_optimized, 0, ""); 118 #ifdef PANIC_DEBUG 119 SYSCTL_INT(_lwkt, OID_AUTO, panic_ipiq_cpu, CTLFLAG_RW, &panic_ipiq_cpu, 0, ""); 120 SYSCTL_INT(_lwkt, OID_AUTO, panic_ipiq_count, CTLFLAG_RW, &panic_ipiq_count, 0, ""); 121 #endif 122 123 #define IPIQ_STRING "func=%p arg1=%p arg2=%d scpu=%d dcpu=%d" 124 #define IPIQ_ARG_SIZE (sizeof(void *) * 2 + sizeof(int) * 2) 125 126 #if !defined(KTR_IPIQ) 127 #define KTR_IPIQ KTR_ALL 128 #endif 129 KTR_INFO_MASTER(ipiq); 130 KTR_INFO(KTR_IPIQ, ipiq, send_norm, 0, IPIQ_STRING, IPIQ_ARG_SIZE); 131 KTR_INFO(KTR_IPIQ, ipiq, send_pasv, 1, IPIQ_STRING, IPIQ_ARG_SIZE); 132 KTR_INFO(KTR_IPIQ, ipiq, send_nbio, 2, IPIQ_STRING, IPIQ_ARG_SIZE); 133 KTR_INFO(KTR_IPIQ, ipiq, send_fail, 3, IPIQ_STRING, IPIQ_ARG_SIZE); 134 KTR_INFO(KTR_IPIQ, ipiq, receive, 4, IPIQ_STRING, IPIQ_ARG_SIZE); 135 136 #define logipiq(name, func, arg1, arg2, sgd, dgd) \ 137 KTR_LOG(ipiq_ ## name, func, arg1, arg2, sgd->gd_cpuid, dgd->gd_cpuid) 138 139 #endif /* SMP */ 140 #endif /* KERNEL */ 141 142 #ifdef SMP 143 144 static int lwkt_process_ipiq_core(globaldata_t sgd, lwkt_ipiq_t ip, 145 struct intrframe *frame); 146 static void lwkt_cpusync_remote1(lwkt_cpusync_t poll); 147 static void lwkt_cpusync_remote2(lwkt_cpusync_t poll); 148 149 /* 150 * Send a function execution request to another cpu. The request is queued 151 * on the cpu<->cpu ipiq matrix. Each cpu owns a unique ipiq FIFO for every 152 * possible target cpu. The FIFO can be written. 153 * 154 * If the FIFO fills up we have to enable interrupts to avoid an APIC 155 * deadlock and process pending IPIQs while waiting for it to empty. 156 * Otherwise we may soft-deadlock with another cpu whos FIFO is also full. 157 * 158 * We can safely bump gd_intr_nesting_level because our crit_exit() at the 159 * end will take care of any pending interrupts. 160 * 161 * The actual hardware IPI is avoided if the target cpu is already processing 162 * the queue from a prior IPI. It is possible to pipeline IPI messages 163 * very quickly between cpus due to the FIFO hysteresis. 164 * 165 * Need not be called from a critical section. 166 */ 167 int 168 lwkt_send_ipiq3(globaldata_t target, ipifunc3_t func, void *arg1, int arg2) 169 { 170 lwkt_ipiq_t ip; 171 int windex; 172 struct globaldata *gd = mycpu; 173 174 logipiq(send_norm, func, arg1, arg2, gd, target); 175 176 if (target == gd) { 177 func(arg1, arg2, NULL); 178 return(0); 179 } 180 crit_enter(); 181 ++gd->gd_intr_nesting_level; 182 #ifdef INVARIANTS 183 if (gd->gd_intr_nesting_level > 20) 184 panic("lwkt_send_ipiq: TOO HEAVILY NESTED!"); 185 #endif 186 KKASSERT(curthread->td_pri >= TDPRI_CRIT); 187 ++ipiq_count; 188 ip = &gd->gd_ipiq[target->gd_cpuid]; 189 190 /* 191 * Do not allow the FIFO to become full. Interrupts must be physically 192 * enabled while we liveloop to avoid deadlocking the APIC. 193 */ 194 if (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) { 195 unsigned int eflags = read_eflags(); 196 197 if (atomic_poll_acquire_int(&ip->ip_npoll) || ipiq_optimized == 0) 198 cpu_send_ipiq(target->gd_cpuid); 199 cpu_enable_intr(); 200 ++ipiq_fifofull; 201 while (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 4) { 202 KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1); 203 lwkt_process_ipiq(); 204 } 205 write_eflags(eflags); 206 } 207 208 /* 209 * Queue the new message 210 */ 211 windex = ip->ip_windex & MAXCPUFIFO_MASK; 212 ip->ip_func[windex] = func; 213 ip->ip_arg1[windex] = arg1; 214 ip->ip_arg2[windex] = arg2; 215 cpu_sfence(); 216 ++ip->ip_windex; 217 --gd->gd_intr_nesting_level; 218 219 /* 220 * signal the target cpu that there is work pending. 221 */ 222 if (atomic_poll_acquire_int(&ip->ip_npoll)) { 223 cpu_send_ipiq(target->gd_cpuid); 224 } else { 225 if (ipiq_optimized == 0) 226 cpu_send_ipiq(target->gd_cpuid); 227 ++ipiq_avoided; 228 } 229 crit_exit(); 230 return(ip->ip_windex); 231 } 232 233 /* 234 * Similar to lwkt_send_ipiq() but this function does not actually initiate 235 * the IPI to the target cpu unless the FIFO has become too full, so it is 236 * very fast. 237 * 238 * This function is used for non-critical IPI messages, such as memory 239 * deallocations. The queue will typically be flushed by the target cpu at 240 * the next clock interrupt. 241 * 242 * Need not be called from a critical section. 243 */ 244 int 245 lwkt_send_ipiq3_passive(globaldata_t target, ipifunc3_t func, 246 void *arg1, int arg2) 247 { 248 lwkt_ipiq_t ip; 249 int windex; 250 struct globaldata *gd = mycpu; 251 252 KKASSERT(target != gd); 253 crit_enter(); 254 logipiq(send_pasv, func, arg1, arg2, gd, target); 255 ++gd->gd_intr_nesting_level; 256 #ifdef INVARIANTS 257 if (gd->gd_intr_nesting_level > 20) 258 panic("lwkt_send_ipiq: TOO HEAVILY NESTED!"); 259 #endif 260 KKASSERT(curthread->td_pri >= TDPRI_CRIT); 261 ++ipiq_count; 262 ++ipiq_passive; 263 ip = &gd->gd_ipiq[target->gd_cpuid]; 264 265 /* 266 * Do not allow the FIFO to become full. Interrupts must be physically 267 * enabled while we liveloop to avoid deadlocking the APIC. 268 */ 269 if (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) { 270 unsigned int eflags = read_eflags(); 271 272 if (atomic_poll_acquire_int(&ip->ip_npoll) || ipiq_optimized == 0) 273 cpu_send_ipiq(target->gd_cpuid); 274 cpu_enable_intr(); 275 ++ipiq_fifofull; 276 while (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 4) { 277 KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1); 278 lwkt_process_ipiq(); 279 } 280 write_eflags(eflags); 281 } 282 283 /* 284 * Queue the new message 285 */ 286 windex = ip->ip_windex & MAXCPUFIFO_MASK; 287 ip->ip_func[windex] = func; 288 ip->ip_arg1[windex] = arg1; 289 ip->ip_arg2[windex] = arg2; 290 cpu_sfence(); 291 ++ip->ip_windex; 292 --gd->gd_intr_nesting_level; 293 294 /* 295 * Do not signal the target cpu, it will pick up the IPI when it next 296 * polls (typically on the next tick). 297 */ 298 crit_exit(); 299 return(ip->ip_windex); 300 } 301 302 /* 303 * Send an IPI request without blocking, return 0 on success, ENOENT on 304 * failure. The actual queueing of the hardware IPI may still force us 305 * to spin and process incoming IPIs but that will eventually go away 306 * when we've gotten rid of the other general IPIs. 307 */ 308 int 309 lwkt_send_ipiq3_nowait(globaldata_t target, ipifunc3_t func, 310 void *arg1, int arg2) 311 { 312 lwkt_ipiq_t ip; 313 int windex; 314 struct globaldata *gd = mycpu; 315 316 logipiq(send_nbio, func, arg1, arg2, gd, target); 317 KKASSERT(curthread->td_pri >= TDPRI_CRIT); 318 if (target == gd) { 319 func(arg1, arg2, NULL); 320 return(0); 321 } 322 ++ipiq_count; 323 ip = &gd->gd_ipiq[target->gd_cpuid]; 324 325 if (ip->ip_windex - ip->ip_rindex >= MAXCPUFIFO * 2 / 3) { 326 logipiq(send_fail, func, arg1, arg2, gd, target); 327 return(ENOENT); 328 } 329 windex = ip->ip_windex & MAXCPUFIFO_MASK; 330 ip->ip_func[windex] = func; 331 ip->ip_arg1[windex] = arg1; 332 ip->ip_arg2[windex] = arg2; 333 cpu_sfence(); 334 ++ip->ip_windex; 335 336 /* 337 * This isn't a passive IPI, we still have to signal the target cpu. 338 */ 339 if (atomic_poll_acquire_int(&ip->ip_npoll)) { 340 cpu_send_ipiq(target->gd_cpuid); 341 } else { 342 if (ipiq_optimized == 0) 343 cpu_send_ipiq(target->gd_cpuid); 344 else 345 ++ipiq_avoided; 346 } 347 return(0); 348 } 349 350 /* 351 * deprecated, used only by fast int forwarding. 352 */ 353 int 354 lwkt_send_ipiq3_bycpu(int dcpu, ipifunc3_t func, void *arg1, int arg2) 355 { 356 return(lwkt_send_ipiq3(globaldata_find(dcpu), func, arg1, arg2)); 357 } 358 359 /* 360 * Send a message to several target cpus. Typically used for scheduling. 361 * The message will not be sent to stopped cpus. 362 */ 363 int 364 lwkt_send_ipiq3_mask(u_int32_t mask, ipifunc3_t func, void *arg1, int arg2) 365 { 366 int cpuid; 367 int count = 0; 368 369 mask &= ~stopped_cpus; 370 while (mask) { 371 cpuid = bsfl(mask); 372 lwkt_send_ipiq3(globaldata_find(cpuid), func, arg1, arg2); 373 mask &= ~(1 << cpuid); 374 ++count; 375 } 376 return(count); 377 } 378 379 /* 380 * Wait for the remote cpu to finish processing a function. 381 * 382 * YYY we have to enable interrupts and process the IPIQ while waiting 383 * for it to empty or we may deadlock with another cpu. Create a CPU_*() 384 * function to do this! YYY we really should 'block' here. 385 * 386 * MUST be called from a critical section. This routine may be called 387 * from an interrupt (for example, if an interrupt wakes a foreign thread 388 * up). 389 */ 390 void 391 lwkt_wait_ipiq(globaldata_t target, int seq) 392 { 393 lwkt_ipiq_t ip; 394 int maxc = 100000000; 395 396 if (target != mycpu) { 397 ip = &mycpu->gd_ipiq[target->gd_cpuid]; 398 if ((int)(ip->ip_xindex - seq) < 0) { 399 unsigned int eflags = read_eflags(); 400 cpu_enable_intr(); 401 while ((int)(ip->ip_xindex - seq) < 0) { 402 crit_enter(); 403 lwkt_process_ipiq(); 404 crit_exit(); 405 if (--maxc == 0) 406 kprintf("LWKT_WAIT_IPIQ WARNING! %d wait %d (%d)\n", mycpu->gd_cpuid, target->gd_cpuid, ip->ip_xindex - seq); 407 if (maxc < -1000000) 408 panic("LWKT_WAIT_IPIQ"); 409 /* 410 * xindex may be modified by another cpu, use a load fence 411 * to ensure that the loop does not use a speculative value 412 * (which may improve performance). 413 */ 414 cpu_lfence(); 415 } 416 write_eflags(eflags); 417 } 418 } 419 } 420 421 int 422 lwkt_seq_ipiq(globaldata_t target) 423 { 424 lwkt_ipiq_t ip; 425 426 ip = &mycpu->gd_ipiq[target->gd_cpuid]; 427 return(ip->ip_windex); 428 } 429 430 /* 431 * Called from IPI interrupt (like a fast interrupt), which has placed 432 * us in a critical section. The MP lock may or may not be held. 433 * May also be called from doreti or splz, or be reentrantly called 434 * indirectly through the ip_func[] we run. 435 * 436 * There are two versions, one where no interrupt frame is available (when 437 * called from the send code and from splz, and one where an interrupt 438 * frame is available. 439 */ 440 void 441 lwkt_process_ipiq(void) 442 { 443 globaldata_t gd = mycpu; 444 globaldata_t sgd; 445 lwkt_ipiq_t ip; 446 int n; 447 448 again: 449 for (n = 0; n < ncpus; ++n) { 450 if (n != gd->gd_cpuid) { 451 sgd = globaldata_find(n); 452 ip = sgd->gd_ipiq; 453 if (ip != NULL) { 454 while (lwkt_process_ipiq_core(sgd, &ip[gd->gd_cpuid], NULL)) 455 ; 456 } 457 } 458 } 459 if (gd->gd_cpusyncq.ip_rindex != gd->gd_cpusyncq.ip_windex) { 460 if (lwkt_process_ipiq_core(gd, &gd->gd_cpusyncq, NULL)) { 461 if (gd->gd_curthread->td_cscount == 0) 462 goto again; 463 need_ipiq(); 464 } 465 } 466 } 467 468 #ifdef _KERNEL 469 void 470 lwkt_process_ipiq_frame(struct intrframe frame) 471 { 472 globaldata_t gd = mycpu; 473 globaldata_t sgd; 474 lwkt_ipiq_t ip; 475 int n; 476 477 again: 478 for (n = 0; n < ncpus; ++n) { 479 if (n != gd->gd_cpuid) { 480 sgd = globaldata_find(n); 481 ip = sgd->gd_ipiq; 482 if (ip != NULL) { 483 while (lwkt_process_ipiq_core(sgd, &ip[gd->gd_cpuid], &frame)) 484 ; 485 } 486 } 487 } 488 if (gd->gd_cpusyncq.ip_rindex != gd->gd_cpusyncq.ip_windex) { 489 if (lwkt_process_ipiq_core(gd, &gd->gd_cpusyncq, &frame)) { 490 if (gd->gd_curthread->td_cscount == 0) 491 goto again; 492 need_ipiq(); 493 } 494 } 495 } 496 #endif 497 498 static int 499 lwkt_process_ipiq_core(globaldata_t sgd, lwkt_ipiq_t ip, 500 struct intrframe *frame) 501 { 502 int ri; 503 int wi; 504 ipifunc3_t copy_func; 505 void *copy_arg1; 506 int copy_arg2; 507 508 /* 509 * Obtain the current write index, which is modified by a remote cpu. 510 * Issue a load fence to prevent speculative reads of e.g. data written 511 * by the other cpu prior to it updating the index. 512 */ 513 KKASSERT(curthread->td_pri >= TDPRI_CRIT); 514 wi = ip->ip_windex; 515 cpu_lfence(); 516 517 /* 518 * Note: xindex is only updated after we are sure the function has 519 * finished execution. Beware lwkt_process_ipiq() reentrancy! The 520 * function may send an IPI which may block/drain. 521 * 522 * Note: due to additional IPI operations that the callback function 523 * may make, it is possible for both rindex and windex to advance and 524 * thus for rindex to advance passed our cached windex. 525 */ 526 while (wi - (ri = ip->ip_rindex) > 0) { 527 ri &= MAXCPUFIFO_MASK; 528 copy_func = ip->ip_func[ri]; 529 copy_arg1 = ip->ip_arg1[ri]; 530 copy_arg2 = ip->ip_arg2[ri]; 531 cpu_mfence(); 532 ++ip->ip_rindex; 533 KKASSERT((ip->ip_rindex & MAXCPUFIFO_MASK) == ((ri + 1) & MAXCPUFIFO_MASK)); 534 logipiq(receive, copy_func, copy_arg1, copy_arg2, sgd, mycpu); 535 copy_func(copy_arg1, copy_arg2, frame); 536 cpu_sfence(); 537 ip->ip_xindex = ip->ip_rindex; 538 539 #ifdef PANIC_DEBUG 540 /* 541 * Simulate panics during the processing of an IPI 542 */ 543 if (mycpu->gd_cpuid == panic_ipiq_cpu && panic_ipiq_count) { 544 if (--panic_ipiq_count == 0) { 545 #ifdef DDB 546 Debugger("PANIC_DEBUG"); 547 #else 548 panic("PANIC_DEBUG"); 549 #endif 550 } 551 } 552 #endif 553 } 554 555 /* 556 * Return non-zero if there are more IPI messages pending on this 557 * ipiq. ip_npoll is left set as long as possible to reduce the 558 * number of IPIs queued by the originating cpu, but must be cleared 559 * *BEFORE* checking windex. 560 */ 561 atomic_poll_release_int(&ip->ip_npoll); 562 return(wi != ip->ip_windex); 563 } 564 565 #endif 566 567 /* 568 * CPU Synchronization Support 569 * 570 * lwkt_cpusync_simple() 571 * 572 * The function is executed synchronously before return on remote cpus. 573 * A lwkt_cpusync_t pointer is passed as an argument. The data can 574 * be accessed via arg->cs_data. 575 * 576 * XXX should I just pass the data as an argument to be consistent? 577 */ 578 579 void 580 lwkt_cpusync_simple(cpumask_t mask, cpusync_func_t func, void *data) 581 { 582 struct lwkt_cpusync cmd; 583 584 cmd.cs_run_func = NULL; 585 cmd.cs_fin1_func = func; 586 cmd.cs_fin2_func = NULL; 587 cmd.cs_data = data; 588 lwkt_cpusync_start(mask & mycpu->gd_other_cpus, &cmd); 589 if (mask & (1 << mycpu->gd_cpuid)) 590 func(&cmd); 591 lwkt_cpusync_finish(&cmd); 592 } 593 594 /* 595 * lwkt_cpusync_fastdata() 596 * 597 * The function is executed in tandem with return on remote cpus. 598 * The data is directly passed as an argument. Do not pass pointers to 599 * temporary storage as the storage might have 600 * gone poof by the time the target cpu executes 601 * the function. 602 * 603 * At the moment lwkt_cpusync is declared on the stack and we must wait 604 * for all remote cpus to ack in lwkt_cpusync_finish(), but as a future 605 * optimization we should be able to put a counter in the globaldata 606 * structure (if it is not otherwise being used) and just poke it and 607 * return without waiting. XXX 608 */ 609 void 610 lwkt_cpusync_fastdata(cpumask_t mask, cpusync_func2_t func, void *data) 611 { 612 struct lwkt_cpusync cmd; 613 614 cmd.cs_run_func = NULL; 615 cmd.cs_fin1_func = NULL; 616 cmd.cs_fin2_func = func; 617 cmd.cs_data = NULL; 618 lwkt_cpusync_start(mask & mycpu->gd_other_cpus, &cmd); 619 if (mask & (1 << mycpu->gd_cpuid)) 620 func(data); 621 lwkt_cpusync_finish(&cmd); 622 } 623 624 /* 625 * lwkt_cpusync_start() 626 * 627 * Start synchronization with a set of target cpus, return once they are 628 * known to be in a synchronization loop. The target cpus will execute 629 * poll->cs_run_func() IN TANDEM WITH THE RETURN. 630 * 631 * XXX future: add lwkt_cpusync_start_quick() and require a call to 632 * lwkt_cpusync_add() or lwkt_cpusync_wait(), allowing the caller to 633 * potentially absorb the IPI latency doing something useful. 634 */ 635 void 636 lwkt_cpusync_start(cpumask_t mask, lwkt_cpusync_t poll) 637 { 638 globaldata_t gd = mycpu; 639 640 poll->cs_count = 0; 641 poll->cs_mask = mask; 642 #ifdef SMP 643 poll->cs_maxcount = lwkt_send_ipiq_mask( 644 mask & gd->gd_other_cpus & smp_active_mask, 645 (ipifunc1_t)lwkt_cpusync_remote1, poll); 646 #endif 647 if (mask & gd->gd_cpumask) { 648 if (poll->cs_run_func) 649 poll->cs_run_func(poll); 650 } 651 #ifdef SMP 652 if (poll->cs_maxcount) { 653 ++ipiq_cscount; 654 ++gd->gd_curthread->td_cscount; 655 while (poll->cs_count != poll->cs_maxcount) { 656 crit_enter(); 657 lwkt_process_ipiq(); 658 crit_exit(); 659 } 660 } 661 #endif 662 } 663 664 void 665 lwkt_cpusync_add(cpumask_t mask, lwkt_cpusync_t poll) 666 { 667 globaldata_t gd = mycpu; 668 #ifdef SMP 669 int count; 670 #endif 671 672 mask &= ~poll->cs_mask; 673 poll->cs_mask |= mask; 674 #ifdef SMP 675 count = lwkt_send_ipiq_mask( 676 mask & gd->gd_other_cpus & smp_active_mask, 677 (ipifunc1_t)lwkt_cpusync_remote1, poll); 678 #endif 679 if (mask & gd->gd_cpumask) { 680 if (poll->cs_run_func) 681 poll->cs_run_func(poll); 682 } 683 #ifdef SMP 684 poll->cs_maxcount += count; 685 if (poll->cs_maxcount) { 686 if (poll->cs_maxcount == count) 687 ++gd->gd_curthread->td_cscount; 688 while (poll->cs_count != poll->cs_maxcount) { 689 crit_enter(); 690 lwkt_process_ipiq(); 691 crit_exit(); 692 } 693 } 694 #endif 695 } 696 697 /* 698 * Finish synchronization with a set of target cpus. The target cpus will 699 * execute cs_fin1_func(poll) prior to this function returning, and will 700 * execute cs_fin2_func(data) IN TANDEM WITH THIS FUNCTION'S RETURN. 701 * 702 * If cs_maxcount is non-zero then we are mastering a cpusync with one or 703 * more remote cpus and must account for it in our thread structure. 704 */ 705 void 706 lwkt_cpusync_finish(lwkt_cpusync_t poll) 707 { 708 globaldata_t gd = mycpu; 709 710 poll->cs_count = -1; 711 if (poll->cs_mask & gd->gd_cpumask) { 712 if (poll->cs_fin1_func) 713 poll->cs_fin1_func(poll); 714 if (poll->cs_fin2_func) 715 poll->cs_fin2_func(poll->cs_data); 716 } 717 #ifdef SMP 718 if (poll->cs_maxcount) { 719 while (poll->cs_count != -(poll->cs_maxcount + 1)) { 720 crit_enter(); 721 lwkt_process_ipiq(); 722 crit_exit(); 723 } 724 --gd->gd_curthread->td_cscount; 725 } 726 #endif 727 } 728 729 #ifdef SMP 730 731 /* 732 * helper IPI remote messaging function. 733 * 734 * Called on remote cpu when a new cpu synchronization request has been 735 * sent to us. Execute the run function and adjust cs_count, then requeue 736 * the request so we spin on it. 737 */ 738 static void 739 lwkt_cpusync_remote1(lwkt_cpusync_t poll) 740 { 741 atomic_add_int(&poll->cs_count, 1); 742 if (poll->cs_run_func) 743 poll->cs_run_func(poll); 744 lwkt_cpusync_remote2(poll); 745 } 746 747 /* 748 * helper IPI remote messaging function. 749 * 750 * Poll for the originator telling us to finish. If it hasn't, requeue 751 * our request so we spin on it. When the originator requests that we 752 * finish we execute cs_fin1_func(poll) synchronously and cs_fin2_func(data) 753 * in tandem with the release. 754 */ 755 static void 756 lwkt_cpusync_remote2(lwkt_cpusync_t poll) 757 { 758 if (poll->cs_count < 0) { 759 cpusync_func2_t savef; 760 void *saved; 761 762 if (poll->cs_fin1_func) 763 poll->cs_fin1_func(poll); 764 if (poll->cs_fin2_func) { 765 savef = poll->cs_fin2_func; 766 saved = poll->cs_data; 767 atomic_add_int(&poll->cs_count, -1); 768 savef(saved); 769 } else { 770 atomic_add_int(&poll->cs_count, -1); 771 } 772 } else { 773 globaldata_t gd = mycpu; 774 lwkt_ipiq_t ip; 775 int wi; 776 777 ip = &gd->gd_cpusyncq; 778 wi = ip->ip_windex & MAXCPUFIFO_MASK; 779 ip->ip_func[wi] = (ipifunc3_t)(ipifunc1_t)lwkt_cpusync_remote2; 780 ip->ip_arg1[wi] = poll; 781 ip->ip_arg2[wi] = 0; 782 cpu_sfence(); 783 ++ip->ip_windex; 784 } 785 } 786 787 #endif 788