1 /* 2 * Copyright (c) 2003,2004 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/kern/lwkt_ipiq.c,v 1.27 2008/05/18 20:57:56 nth Exp $ 35 */ 36 37 /* 38 * This module implements IPI message queueing and the MI portion of IPI 39 * message processing. 40 */ 41 42 #include "opt_ddb.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/kernel.h> 47 #include <sys/proc.h> 48 #include <sys/rtprio.h> 49 #include <sys/queue.h> 50 #include <sys/thread2.h> 51 #include <sys/sysctl.h> 52 #include <sys/ktr.h> 53 #include <sys/kthread.h> 54 #include <machine/cpu.h> 55 #include <sys/lock.h> 56 #include <sys/caps.h> 57 58 #include <vm/vm.h> 59 #include <vm/vm_param.h> 60 #include <vm/vm_kern.h> 61 #include <vm/vm_object.h> 62 #include <vm/vm_page.h> 63 #include <vm/vm_map.h> 64 #include <vm/vm_pager.h> 65 #include <vm/vm_extern.h> 66 #include <vm/vm_zone.h> 67 68 #include <machine/stdarg.h> 69 #include <machine/smp.h> 70 #include <machine/atomic.h> 71 72 #ifdef SMP 73 static __int64_t ipiq_count; /* total calls to lwkt_send_ipiq*() */ 74 static __int64_t ipiq_fifofull; /* number of fifo full conditions detected */ 75 static __int64_t ipiq_avoided; /* interlock with target avoids cpu ipi */ 76 static __int64_t ipiq_passive; /* passive IPI messages */ 77 static __int64_t ipiq_cscount; /* number of cpu synchronizations */ 78 static int ipiq_optimized = 1; /* XXX temporary sysctl */ 79 #ifdef PANIC_DEBUG 80 static int panic_ipiq_cpu = -1; 81 static int panic_ipiq_count = 100; 82 #endif 83 #endif 84 85 #ifdef SMP 86 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_count, CTLFLAG_RW, &ipiq_count, 0, ""); 87 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_fifofull, CTLFLAG_RW, &ipiq_fifofull, 0, ""); 88 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_avoided, CTLFLAG_RW, &ipiq_avoided, 0, ""); 89 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_passive, CTLFLAG_RW, &ipiq_passive, 0, ""); 90 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_cscount, CTLFLAG_RW, &ipiq_cscount, 0, ""); 91 SYSCTL_INT(_lwkt, OID_AUTO, ipiq_optimized, CTLFLAG_RW, &ipiq_optimized, 0, ""); 92 #ifdef PANIC_DEBUG 93 SYSCTL_INT(_lwkt, OID_AUTO, panic_ipiq_cpu, CTLFLAG_RW, &panic_ipiq_cpu, 0, ""); 94 SYSCTL_INT(_lwkt, OID_AUTO, panic_ipiq_count, CTLFLAG_RW, &panic_ipiq_count, 0, ""); 95 #endif 96 97 #define IPIQ_STRING "func=%p arg1=%p arg2=%d scpu=%d dcpu=%d" 98 #define IPIQ_ARG_SIZE (sizeof(void *) * 2 + sizeof(int) * 3) 99 100 #if !defined(KTR_IPIQ) 101 #define KTR_IPIQ KTR_ALL 102 #endif 103 KTR_INFO_MASTER(ipiq); 104 KTR_INFO(KTR_IPIQ, ipiq, send_norm, 0, IPIQ_STRING, IPIQ_ARG_SIZE); 105 KTR_INFO(KTR_IPIQ, ipiq, send_pasv, 1, IPIQ_STRING, IPIQ_ARG_SIZE); 106 KTR_INFO(KTR_IPIQ, ipiq, send_nbio, 2, IPIQ_STRING, IPIQ_ARG_SIZE); 107 KTR_INFO(KTR_IPIQ, ipiq, send_fail, 3, IPIQ_STRING, IPIQ_ARG_SIZE); 108 KTR_INFO(KTR_IPIQ, ipiq, receive, 4, IPIQ_STRING, IPIQ_ARG_SIZE); 109 KTR_INFO(KTR_IPIQ, ipiq, sync_start, 5, "cpumask=%08x", sizeof(cpumask_t)); 110 KTR_INFO(KTR_IPIQ, ipiq, sync_add, 6, "cpumask=%08x", sizeof(cpumask_t)); 111 KTR_INFO(KTR_IPIQ, ipiq, cpu_send, 7, IPIQ_STRING, IPIQ_ARG_SIZE); 112 KTR_INFO(KTR_IPIQ, ipiq, send_end, 8, IPIQ_STRING, IPIQ_ARG_SIZE); 113 114 #define logipiq(name, func, arg1, arg2, sgd, dgd) \ 115 KTR_LOG(ipiq_ ## name, func, arg1, arg2, sgd->gd_cpuid, dgd->gd_cpuid) 116 #define logipiq2(name, arg) \ 117 KTR_LOG(ipiq_ ## name, arg) 118 119 #endif /* SMP */ 120 121 #ifdef SMP 122 123 static int lwkt_process_ipiq_core(globaldata_t sgd, lwkt_ipiq_t ip, 124 struct intrframe *frame); 125 static void lwkt_cpusync_remote1(lwkt_cpusync_t poll); 126 static void lwkt_cpusync_remote2(lwkt_cpusync_t poll); 127 128 /* 129 * Send a function execution request to another cpu. The request is queued 130 * on the cpu<->cpu ipiq matrix. Each cpu owns a unique ipiq FIFO for every 131 * possible target cpu. The FIFO can be written. 132 * 133 * If the FIFO fills up we have to enable interrupts to avoid an APIC 134 * deadlock and process pending IPIQs while waiting for it to empty. 135 * Otherwise we may soft-deadlock with another cpu whos FIFO is also full. 136 * 137 * We can safely bump gd_intr_nesting_level because our crit_exit() at the 138 * end will take care of any pending interrupts. 139 * 140 * The actual hardware IPI is avoided if the target cpu is already processing 141 * the queue from a prior IPI. It is possible to pipeline IPI messages 142 * very quickly between cpus due to the FIFO hysteresis. 143 * 144 * Need not be called from a critical section. 145 */ 146 int 147 lwkt_send_ipiq3(globaldata_t target, ipifunc3_t func, void *arg1, int arg2) 148 { 149 lwkt_ipiq_t ip; 150 int windex; 151 struct globaldata *gd = mycpu; 152 153 logipiq(send_norm, func, arg1, arg2, gd, target); 154 155 if (target == gd) { 156 func(arg1, arg2, NULL); 157 logipiq(send_end, func, arg1, arg2, gd, target); 158 return(0); 159 } 160 crit_enter(); 161 ++gd->gd_intr_nesting_level; 162 #ifdef INVARIANTS 163 if (gd->gd_intr_nesting_level > 20) 164 panic("lwkt_send_ipiq: TOO HEAVILY NESTED!"); 165 #endif 166 KKASSERT(curthread->td_critcount); 167 ++ipiq_count; 168 ip = &gd->gd_ipiq[target->gd_cpuid]; 169 170 /* 171 * Do not allow the FIFO to become full. Interrupts must be physically 172 * enabled while we liveloop to avoid deadlocking the APIC. 173 */ 174 if (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) { 175 #if defined(__i386__) 176 unsigned int eflags = read_eflags(); 177 #elif defined(__x86_64__) 178 unsigned long rflags = read_rflags(); 179 #endif 180 181 if (atomic_poll_acquire_int(&ip->ip_npoll) || ipiq_optimized == 0) { 182 logipiq(cpu_send, func, arg1, arg2, gd, target); 183 cpu_send_ipiq(target->gd_cpuid); 184 } 185 cpu_enable_intr(); 186 ++ipiq_fifofull; 187 while (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 4) { 188 KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1); 189 lwkt_process_ipiq(); 190 } 191 #if defined(__i386__) 192 write_eflags(eflags); 193 #elif defined(__x86_64__) 194 write_rflags(rflags); 195 #endif 196 } 197 198 /* 199 * Queue the new message 200 */ 201 windex = ip->ip_windex & MAXCPUFIFO_MASK; 202 ip->ip_func[windex] = func; 203 ip->ip_arg1[windex] = arg1; 204 ip->ip_arg2[windex] = arg2; 205 cpu_sfence(); 206 ++ip->ip_windex; 207 --gd->gd_intr_nesting_level; 208 209 /* 210 * signal the target cpu that there is work pending. 211 */ 212 if (atomic_poll_acquire_int(&ip->ip_npoll)) { 213 logipiq(cpu_send, func, arg1, arg2, gd, target); 214 cpu_send_ipiq(target->gd_cpuid); 215 } else { 216 if (ipiq_optimized == 0) { 217 logipiq(cpu_send, func, arg1, arg2, gd, target); 218 cpu_send_ipiq(target->gd_cpuid); 219 } else { 220 ++ipiq_avoided; 221 } 222 } 223 crit_exit(); 224 225 logipiq(send_end, func, arg1, arg2, gd, target); 226 return(ip->ip_windex); 227 } 228 229 /* 230 * Similar to lwkt_send_ipiq() but this function does not actually initiate 231 * the IPI to the target cpu unless the FIFO has become too full, so it is 232 * very fast. 233 * 234 * This function is used for non-critical IPI messages, such as memory 235 * deallocations. The queue will typically be flushed by the target cpu at 236 * the next clock interrupt. 237 * 238 * Need not be called from a critical section. 239 */ 240 int 241 lwkt_send_ipiq3_passive(globaldata_t target, ipifunc3_t func, 242 void *arg1, int arg2) 243 { 244 lwkt_ipiq_t ip; 245 int windex; 246 struct globaldata *gd = mycpu; 247 248 KKASSERT(target != gd); 249 crit_enter(); 250 logipiq(send_pasv, func, arg1, arg2, gd, target); 251 ++gd->gd_intr_nesting_level; 252 #ifdef INVARIANTS 253 if (gd->gd_intr_nesting_level > 20) 254 panic("lwkt_send_ipiq: TOO HEAVILY NESTED!"); 255 #endif 256 KKASSERT(curthread->td_critcount); 257 ++ipiq_count; 258 ++ipiq_passive; 259 ip = &gd->gd_ipiq[target->gd_cpuid]; 260 261 /* 262 * Do not allow the FIFO to become full. Interrupts must be physically 263 * enabled while we liveloop to avoid deadlocking the APIC. 264 */ 265 if (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) { 266 #if defined(__i386__) 267 unsigned int eflags = read_eflags(); 268 #elif defined(__x86_64__) 269 unsigned long rflags = read_rflags(); 270 #endif 271 272 if (atomic_poll_acquire_int(&ip->ip_npoll) || ipiq_optimized == 0) { 273 logipiq(cpu_send, func, arg1, arg2, gd, target); 274 cpu_send_ipiq(target->gd_cpuid); 275 } 276 cpu_enable_intr(); 277 ++ipiq_fifofull; 278 while (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 4) { 279 KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1); 280 lwkt_process_ipiq(); 281 } 282 #if defined(__i386__) 283 write_eflags(eflags); 284 #elif defined(__x86_64__) 285 write_rflags(rflags); 286 #endif 287 } 288 289 /* 290 * Queue the new message 291 */ 292 windex = ip->ip_windex & MAXCPUFIFO_MASK; 293 ip->ip_func[windex] = func; 294 ip->ip_arg1[windex] = arg1; 295 ip->ip_arg2[windex] = arg2; 296 cpu_sfence(); 297 ++ip->ip_windex; 298 --gd->gd_intr_nesting_level; 299 300 /* 301 * Do not signal the target cpu, it will pick up the IPI when it next 302 * polls (typically on the next tick). 303 */ 304 crit_exit(); 305 306 logipiq(send_end, func, arg1, arg2, gd, target); 307 return(ip->ip_windex); 308 } 309 310 /* 311 * Send an IPI request without blocking, return 0 on success, ENOENT on 312 * failure. The actual queueing of the hardware IPI may still force us 313 * to spin and process incoming IPIs but that will eventually go away 314 * when we've gotten rid of the other general IPIs. 315 */ 316 int 317 lwkt_send_ipiq3_nowait(globaldata_t target, ipifunc3_t func, 318 void *arg1, int arg2) 319 { 320 lwkt_ipiq_t ip; 321 int windex; 322 struct globaldata *gd = mycpu; 323 324 logipiq(send_nbio, func, arg1, arg2, gd, target); 325 KKASSERT(curthread->td_critcount); 326 if (target == gd) { 327 func(arg1, arg2, NULL); 328 logipiq(send_end, func, arg1, arg2, gd, target); 329 return(0); 330 } 331 ++ipiq_count; 332 ip = &gd->gd_ipiq[target->gd_cpuid]; 333 334 if (ip->ip_windex - ip->ip_rindex >= MAXCPUFIFO * 2 / 3) { 335 logipiq(send_fail, func, arg1, arg2, gd, target); 336 return(ENOENT); 337 } 338 windex = ip->ip_windex & MAXCPUFIFO_MASK; 339 ip->ip_func[windex] = func; 340 ip->ip_arg1[windex] = arg1; 341 ip->ip_arg2[windex] = arg2; 342 cpu_sfence(); 343 ++ip->ip_windex; 344 345 /* 346 * This isn't a passive IPI, we still have to signal the target cpu. 347 */ 348 if (atomic_poll_acquire_int(&ip->ip_npoll)) { 349 logipiq(cpu_send, func, arg1, arg2, gd, target); 350 cpu_send_ipiq(target->gd_cpuid); 351 } else { 352 if (ipiq_optimized == 0) { 353 logipiq(cpu_send, func, arg1, arg2, gd, target); 354 cpu_send_ipiq(target->gd_cpuid); 355 } else { 356 ++ipiq_avoided; 357 } 358 } 359 360 logipiq(send_end, func, arg1, arg2, gd, target); 361 return(0); 362 } 363 364 /* 365 * deprecated, used only by fast int forwarding. 366 */ 367 int 368 lwkt_send_ipiq3_bycpu(int dcpu, ipifunc3_t func, void *arg1, int arg2) 369 { 370 return(lwkt_send_ipiq3(globaldata_find(dcpu), func, arg1, arg2)); 371 } 372 373 /* 374 * Send a message to several target cpus. Typically used for scheduling. 375 * The message will not be sent to stopped cpus. 376 */ 377 int 378 lwkt_send_ipiq3_mask(u_int32_t mask, ipifunc3_t func, void *arg1, int arg2) 379 { 380 int cpuid; 381 int count = 0; 382 383 mask &= ~stopped_cpus; 384 while (mask) { 385 cpuid = bsfl(mask); 386 lwkt_send_ipiq3(globaldata_find(cpuid), func, arg1, arg2); 387 mask &= ~(1 << cpuid); 388 ++count; 389 } 390 return(count); 391 } 392 393 /* 394 * Wait for the remote cpu to finish processing a function. 395 * 396 * YYY we have to enable interrupts and process the IPIQ while waiting 397 * for it to empty or we may deadlock with another cpu. Create a CPU_*() 398 * function to do this! YYY we really should 'block' here. 399 * 400 * MUST be called from a critical section. This routine may be called 401 * from an interrupt (for example, if an interrupt wakes a foreign thread 402 * up). 403 */ 404 void 405 lwkt_wait_ipiq(globaldata_t target, int seq) 406 { 407 lwkt_ipiq_t ip; 408 int maxc = 100000000; 409 410 if (target != mycpu) { 411 ip = &mycpu->gd_ipiq[target->gd_cpuid]; 412 if ((int)(ip->ip_xindex - seq) < 0) { 413 #if defined(__i386__) 414 unsigned int eflags = read_eflags(); 415 #elif defined(__x86_64__) 416 unsigned long rflags = read_rflags(); 417 #endif 418 cpu_enable_intr(); 419 while ((int)(ip->ip_xindex - seq) < 0) { 420 crit_enter(); 421 lwkt_process_ipiq(); 422 crit_exit(); 423 if (--maxc == 0) 424 kprintf("LWKT_WAIT_IPIQ WARNING! %d wait %d (%d)\n", mycpu->gd_cpuid, target->gd_cpuid, ip->ip_xindex - seq); 425 if (maxc < -1000000) 426 panic("LWKT_WAIT_IPIQ"); 427 /* 428 * xindex may be modified by another cpu, use a load fence 429 * to ensure that the loop does not use a speculative value 430 * (which may improve performance). 431 */ 432 cpu_lfence(); 433 } 434 #if defined(__i386__) 435 write_eflags(eflags); 436 #elif defined(__x86_64__) 437 write_rflags(rflags); 438 #endif 439 } 440 } 441 } 442 443 int 444 lwkt_seq_ipiq(globaldata_t target) 445 { 446 lwkt_ipiq_t ip; 447 448 ip = &mycpu->gd_ipiq[target->gd_cpuid]; 449 return(ip->ip_windex); 450 } 451 452 /* 453 * Called from IPI interrupt (like a fast interrupt), which has placed 454 * us in a critical section. The MP lock may or may not be held. 455 * May also be called from doreti or splz, or be reentrantly called 456 * indirectly through the ip_func[] we run. 457 * 458 * There are two versions, one where no interrupt frame is available (when 459 * called from the send code and from splz, and one where an interrupt 460 * frame is available. 461 */ 462 void 463 lwkt_process_ipiq(void) 464 { 465 globaldata_t gd = mycpu; 466 globaldata_t sgd; 467 lwkt_ipiq_t ip; 468 int n; 469 470 again: 471 for (n = 0; n < ncpus; ++n) { 472 if (n != gd->gd_cpuid) { 473 sgd = globaldata_find(n); 474 ip = sgd->gd_ipiq; 475 if (ip != NULL) { 476 while (lwkt_process_ipiq_core(sgd, &ip[gd->gd_cpuid], NULL)) 477 ; 478 } 479 } 480 } 481 if (gd->gd_cpusyncq.ip_rindex != gd->gd_cpusyncq.ip_windex) { 482 if (lwkt_process_ipiq_core(gd, &gd->gd_cpusyncq, NULL)) { 483 if (gd->gd_curthread->td_cscount == 0) 484 goto again; 485 need_ipiq(); 486 } 487 } 488 } 489 490 void 491 lwkt_process_ipiq_frame(struct intrframe *frame) 492 { 493 globaldata_t gd = mycpu; 494 globaldata_t sgd; 495 lwkt_ipiq_t ip; 496 int n; 497 498 again: 499 for (n = 0; n < ncpus; ++n) { 500 if (n != gd->gd_cpuid) { 501 sgd = globaldata_find(n); 502 ip = sgd->gd_ipiq; 503 if (ip != NULL) { 504 while (lwkt_process_ipiq_core(sgd, &ip[gd->gd_cpuid], frame)) 505 ; 506 } 507 } 508 } 509 if (gd->gd_cpusyncq.ip_rindex != gd->gd_cpusyncq.ip_windex) { 510 if (lwkt_process_ipiq_core(gd, &gd->gd_cpusyncq, frame)) { 511 if (gd->gd_curthread->td_cscount == 0) 512 goto again; 513 need_ipiq(); 514 } 515 } 516 } 517 518 static int 519 lwkt_process_ipiq_core(globaldata_t sgd, lwkt_ipiq_t ip, 520 struct intrframe *frame) 521 { 522 globaldata_t mygd = mycpu; 523 int ri; 524 int wi; 525 ipifunc3_t copy_func; 526 void *copy_arg1; 527 int copy_arg2; 528 529 /* 530 * Obtain the current write index, which is modified by a remote cpu. 531 * Issue a load fence to prevent speculative reads of e.g. data written 532 * by the other cpu prior to it updating the index. 533 */ 534 KKASSERT(curthread->td_critcount); 535 wi = ip->ip_windex; 536 cpu_lfence(); 537 ++mygd->gd_intr_nesting_level; 538 539 /* 540 * NOTE: xindex is only updated after we are sure the function has 541 * finished execution. Beware lwkt_process_ipiq() reentrancy! 542 * The function may send an IPI which may block/drain. 543 * 544 * NOTE: Due to additional IPI operations that the callback function 545 * may make, it is possible for both rindex and windex to advance and 546 * thus for rindex to advance passed our cached windex. 547 * 548 * NOTE: A memory fence is required to prevent speculative loads prior 549 * to the loading of ip_rindex. Even though stores might be 550 * ordered, loads are probably not. 551 */ 552 while (wi - (ri = ip->ip_rindex) > 0) { 553 ri &= MAXCPUFIFO_MASK; 554 cpu_mfence(); 555 copy_func = ip->ip_func[ri]; 556 copy_arg1 = ip->ip_arg1[ri]; 557 copy_arg2 = ip->ip_arg2[ri]; 558 ++ip->ip_rindex; 559 KKASSERT((ip->ip_rindex & MAXCPUFIFO_MASK) == 560 ((ri + 1) & MAXCPUFIFO_MASK)); 561 logipiq(receive, copy_func, copy_arg1, copy_arg2, sgd, mycpu); 562 copy_func(copy_arg1, copy_arg2, frame); 563 cpu_sfence(); 564 ip->ip_xindex = ip->ip_rindex; 565 566 #ifdef PANIC_DEBUG 567 /* 568 * Simulate panics during the processing of an IPI 569 */ 570 if (mycpu->gd_cpuid == panic_ipiq_cpu && panic_ipiq_count) { 571 if (--panic_ipiq_count == 0) { 572 #ifdef DDB 573 Debugger("PANIC_DEBUG"); 574 #else 575 panic("PANIC_DEBUG"); 576 #endif 577 } 578 } 579 #endif 580 } 581 --mygd->gd_intr_nesting_level; 582 583 /* 584 * Return non-zero if there are more IPI messages pending on this 585 * ipiq. ip_npoll is left set as long as possible to reduce the 586 * number of IPIs queued by the originating cpu, but must be cleared 587 * *BEFORE* checking windex. 588 */ 589 atomic_poll_release_int(&ip->ip_npoll); 590 return(wi != ip->ip_windex); 591 } 592 593 static void 594 lwkt_sync_ipiq(void *arg) 595 { 596 cpumask_t *cpumask = arg; 597 598 atomic_clear_int(cpumask, mycpu->gd_cpumask); 599 if (*cpumask == 0) 600 wakeup(cpumask); 601 } 602 603 void 604 lwkt_synchronize_ipiqs(const char *wmesg) 605 { 606 cpumask_t other_cpumask; 607 608 other_cpumask = mycpu->gd_other_cpus & smp_active_mask; 609 lwkt_send_ipiq_mask(other_cpumask, lwkt_sync_ipiq, &other_cpumask); 610 611 while (other_cpumask != 0) { 612 tsleep_interlock(&other_cpumask, 0); 613 if (other_cpumask != 0) 614 tsleep(&other_cpumask, PINTERLOCKED, wmesg, 0); 615 } 616 } 617 618 #endif 619 620 /* 621 * CPU Synchronization Support 622 * 623 * lwkt_cpusync_simple() 624 * 625 * The function is executed synchronously before return on remote cpus. 626 * A lwkt_cpusync_t pointer is passed as an argument. The data can 627 * be accessed via arg->cs_data. 628 * 629 * XXX should I just pass the data as an argument to be consistent? 630 */ 631 632 void 633 lwkt_cpusync_simple(cpumask_t mask, cpusync_func_t func, void *data) 634 { 635 struct lwkt_cpusync cmd; 636 637 cmd.cs_run_func = NULL; 638 cmd.cs_fin1_func = func; 639 cmd.cs_fin2_func = NULL; 640 cmd.cs_data = data; 641 lwkt_cpusync_start(mask & mycpu->gd_other_cpus, &cmd); 642 if (mask & (1 << mycpu->gd_cpuid)) 643 func(&cmd); 644 lwkt_cpusync_finish(&cmd); 645 } 646 647 /* 648 * lwkt_cpusync_fastdata() 649 * 650 * The function is executed in tandem with return on remote cpus. 651 * The data is directly passed as an argument. Do not pass pointers to 652 * temporary storage as the storage might have 653 * gone poof by the time the target cpu executes 654 * the function. 655 * 656 * At the moment lwkt_cpusync is declared on the stack and we must wait 657 * for all remote cpus to ack in lwkt_cpusync_finish(), but as a future 658 * optimization we should be able to put a counter in the globaldata 659 * structure (if it is not otherwise being used) and just poke it and 660 * return without waiting. XXX 661 */ 662 void 663 lwkt_cpusync_fastdata(cpumask_t mask, cpusync_func2_t func, void *data) 664 { 665 struct lwkt_cpusync cmd; 666 667 cmd.cs_run_func = NULL; 668 cmd.cs_fin1_func = NULL; 669 cmd.cs_fin2_func = func; 670 cmd.cs_data = NULL; 671 lwkt_cpusync_start(mask & mycpu->gd_other_cpus, &cmd); 672 if (mask & (1 << mycpu->gd_cpuid)) 673 func(data); 674 lwkt_cpusync_finish(&cmd); 675 } 676 677 /* 678 * lwkt_cpusync_start() 679 * 680 * Start synchronization with a set of target cpus, return once they are 681 * known to be in a synchronization loop. The target cpus will execute 682 * poll->cs_run_func() IN TANDEM WITH THE RETURN. 683 * 684 * XXX future: add lwkt_cpusync_start_quick() and require a call to 685 * lwkt_cpusync_add() or lwkt_cpusync_wait(), allowing the caller to 686 * potentially absorb the IPI latency doing something useful. 687 */ 688 void 689 lwkt_cpusync_start(cpumask_t mask, lwkt_cpusync_t poll) 690 { 691 globaldata_t gd = mycpu; 692 693 poll->cs_count = 0; 694 poll->cs_mask = mask; 695 #ifdef SMP 696 logipiq2(sync_start, mask & gd->gd_other_cpus); 697 poll->cs_maxcount = lwkt_send_ipiq_mask( 698 mask & gd->gd_other_cpus & smp_active_mask, 699 (ipifunc1_t)lwkt_cpusync_remote1, poll); 700 #endif 701 if (mask & gd->gd_cpumask) { 702 if (poll->cs_run_func) 703 poll->cs_run_func(poll); 704 } 705 #ifdef SMP 706 if (poll->cs_maxcount) { 707 ++ipiq_cscount; 708 ++gd->gd_curthread->td_cscount; 709 while (poll->cs_count != poll->cs_maxcount) { 710 crit_enter(); 711 lwkt_process_ipiq(); 712 crit_exit(); 713 } 714 } 715 #endif 716 } 717 718 void 719 lwkt_cpusync_add(cpumask_t mask, lwkt_cpusync_t poll) 720 { 721 globaldata_t gd = mycpu; 722 #ifdef SMP 723 int count; 724 #endif 725 726 mask &= ~poll->cs_mask; 727 poll->cs_mask |= mask; 728 #ifdef SMP 729 logipiq2(sync_add, mask & gd->gd_other_cpus); 730 count = lwkt_send_ipiq_mask( 731 mask & gd->gd_other_cpus & smp_active_mask, 732 (ipifunc1_t)lwkt_cpusync_remote1, poll); 733 #endif 734 if (mask & gd->gd_cpumask) { 735 if (poll->cs_run_func) 736 poll->cs_run_func(poll); 737 } 738 #ifdef SMP 739 poll->cs_maxcount += count; 740 if (poll->cs_maxcount) { 741 if (poll->cs_maxcount == count) 742 ++gd->gd_curthread->td_cscount; 743 while (poll->cs_count != poll->cs_maxcount) { 744 crit_enter(); 745 lwkt_process_ipiq(); 746 crit_exit(); 747 } 748 } 749 #endif 750 } 751 752 /* 753 * Finish synchronization with a set of target cpus. The target cpus will 754 * execute cs_fin1_func(poll) prior to this function returning, and will 755 * execute cs_fin2_func(data) IN TANDEM WITH THIS FUNCTION'S RETURN. 756 * 757 * If cs_maxcount is non-zero then we are mastering a cpusync with one or 758 * more remote cpus and must account for it in our thread structure. 759 */ 760 void 761 lwkt_cpusync_finish(lwkt_cpusync_t poll) 762 { 763 globaldata_t gd = mycpu; 764 765 poll->cs_count = -1; 766 if (poll->cs_mask & gd->gd_cpumask) { 767 if (poll->cs_fin1_func) 768 poll->cs_fin1_func(poll); 769 if (poll->cs_fin2_func) 770 poll->cs_fin2_func(poll->cs_data); 771 } 772 #ifdef SMP 773 if (poll->cs_maxcount) { 774 while (poll->cs_count != -(poll->cs_maxcount + 1)) { 775 crit_enter(); 776 lwkt_process_ipiq(); 777 crit_exit(); 778 } 779 --gd->gd_curthread->td_cscount; 780 } 781 #endif 782 } 783 784 #ifdef SMP 785 786 /* 787 * helper IPI remote messaging function. 788 * 789 * Called on remote cpu when a new cpu synchronization request has been 790 * sent to us. Execute the run function and adjust cs_count, then requeue 791 * the request so we spin on it. 792 */ 793 static void 794 lwkt_cpusync_remote1(lwkt_cpusync_t poll) 795 { 796 atomic_add_int(&poll->cs_count, 1); 797 if (poll->cs_run_func) 798 poll->cs_run_func(poll); 799 lwkt_cpusync_remote2(poll); 800 } 801 802 /* 803 * helper IPI remote messaging function. 804 * 805 * Poll for the originator telling us to finish. If it hasn't, requeue 806 * our request so we spin on it. When the originator requests that we 807 * finish we execute cs_fin1_func(poll) synchronously and cs_fin2_func(data) 808 * in tandem with the release. 809 */ 810 static void 811 lwkt_cpusync_remote2(lwkt_cpusync_t poll) 812 { 813 if (poll->cs_count < 0) { 814 cpusync_func2_t savef; 815 void *saved; 816 817 if (poll->cs_fin1_func) 818 poll->cs_fin1_func(poll); 819 if (poll->cs_fin2_func) { 820 savef = poll->cs_fin2_func; 821 saved = poll->cs_data; 822 atomic_add_int(&poll->cs_count, -1); 823 savef(saved); 824 } else { 825 atomic_add_int(&poll->cs_count, -1); 826 } 827 } else { 828 globaldata_t gd = mycpu; 829 lwkt_ipiq_t ip; 830 int wi; 831 832 ip = &gd->gd_cpusyncq; 833 wi = ip->ip_windex & MAXCPUFIFO_MASK; 834 ip->ip_func[wi] = (ipifunc3_t)(ipifunc1_t)lwkt_cpusync_remote2; 835 ip->ip_arg1[wi] = poll; 836 ip->ip_arg2[wi] = 0; 837 cpu_sfence(); 838 ++ip->ip_windex; 839 } 840 } 841 842 #endif 843