1 /* 2 * Copyright (c) 2003,2004 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/kern/lwkt_ipiq.c,v 1.27 2008/05/18 20:57:56 nth Exp $ 35 */ 36 37 /* 38 * This module implements IPI message queueing and the MI portion of IPI 39 * message processing. 40 */ 41 42 #include "opt_ddb.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/kernel.h> 47 #include <sys/proc.h> 48 #include <sys/rtprio.h> 49 #include <sys/queue.h> 50 #include <sys/thread2.h> 51 #include <sys/sysctl.h> 52 #include <sys/ktr.h> 53 #include <sys/kthread.h> 54 #include <machine/cpu.h> 55 #include <sys/lock.h> 56 #include <sys/caps.h> 57 58 #include <vm/vm.h> 59 #include <vm/vm_param.h> 60 #include <vm/vm_kern.h> 61 #include <vm/vm_object.h> 62 #include <vm/vm_page.h> 63 #include <vm/vm_map.h> 64 #include <vm/vm_pager.h> 65 #include <vm/vm_extern.h> 66 #include <vm/vm_zone.h> 67 68 #include <machine/stdarg.h> 69 #include <machine/smp.h> 70 #include <machine/atomic.h> 71 72 #ifdef SMP 73 static __int64_t ipiq_count; /* total calls to lwkt_send_ipiq*() */ 74 static __int64_t ipiq_fifofull; /* number of fifo full conditions detected */ 75 static __int64_t ipiq_avoided; /* interlock with target avoids cpu ipi */ 76 static __int64_t ipiq_passive; /* passive IPI messages */ 77 static __int64_t ipiq_cscount; /* number of cpu synchronizations */ 78 static int ipiq_optimized = 1; /* XXX temporary sysctl */ 79 static int ipiq_debug; /* set to 1 for debug */ 80 #ifdef PANIC_DEBUG 81 static int panic_ipiq_cpu = -1; 82 static int panic_ipiq_count = 100; 83 #endif 84 #endif 85 86 #ifdef SMP 87 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_count, CTLFLAG_RW, &ipiq_count, 0, 88 "Number of IPI's sent"); 89 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_fifofull, CTLFLAG_RW, &ipiq_fifofull, 0, 90 "Number of fifo full conditions detected"); 91 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_avoided, CTLFLAG_RW, &ipiq_avoided, 0, 92 "Number of IPI's avoided by interlock with target cpu"); 93 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_passive, CTLFLAG_RW, &ipiq_passive, 0, 94 "Number of passive IPI messages sent"); 95 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_cscount, CTLFLAG_RW, &ipiq_cscount, 0, 96 "Number of cpu synchronizations"); 97 SYSCTL_INT(_lwkt, OID_AUTO, ipiq_optimized, CTLFLAG_RW, &ipiq_optimized, 0, 98 ""); 99 SYSCTL_INT(_lwkt, OID_AUTO, ipiq_debug, CTLFLAG_RW, &ipiq_debug, 0, 100 ""); 101 #ifdef PANIC_DEBUG 102 SYSCTL_INT(_lwkt, OID_AUTO, panic_ipiq_cpu, CTLFLAG_RW, &panic_ipiq_cpu, 0, ""); 103 SYSCTL_INT(_lwkt, OID_AUTO, panic_ipiq_count, CTLFLAG_RW, &panic_ipiq_count, 0, ""); 104 #endif 105 106 #define IPIQ_STRING "func=%p arg1=%p arg2=%d scpu=%d dcpu=%d" 107 #define IPIQ_ARG_SIZE (sizeof(void *) * 2 + sizeof(int) * 3) 108 109 #if !defined(KTR_IPIQ) 110 #define KTR_IPIQ KTR_ALL 111 #endif 112 KTR_INFO_MASTER(ipiq); 113 KTR_INFO(KTR_IPIQ, ipiq, send_norm, 0, IPIQ_STRING, IPIQ_ARG_SIZE); 114 KTR_INFO(KTR_IPIQ, ipiq, send_pasv, 1, IPIQ_STRING, IPIQ_ARG_SIZE); 115 KTR_INFO(KTR_IPIQ, ipiq, send_nbio, 2, IPIQ_STRING, IPIQ_ARG_SIZE); 116 KTR_INFO(KTR_IPIQ, ipiq, send_fail, 3, IPIQ_STRING, IPIQ_ARG_SIZE); 117 KTR_INFO(KTR_IPIQ, ipiq, receive, 4, IPIQ_STRING, IPIQ_ARG_SIZE); 118 KTR_INFO(KTR_IPIQ, ipiq, sync_start, 5, "cpumask=%08x", sizeof(cpumask_t)); 119 KTR_INFO(KTR_IPIQ, ipiq, sync_end, 6, "cpumask=%08x", sizeof(cpumask_t)); 120 KTR_INFO(KTR_IPIQ, ipiq, cpu_send, 7, IPIQ_STRING, IPIQ_ARG_SIZE); 121 KTR_INFO(KTR_IPIQ, ipiq, send_end, 8, IPIQ_STRING, IPIQ_ARG_SIZE); 122 123 #define logipiq(name, func, arg1, arg2, sgd, dgd) \ 124 KTR_LOG(ipiq_ ## name, func, arg1, arg2, sgd->gd_cpuid, dgd->gd_cpuid) 125 #define logipiq2(name, arg) \ 126 KTR_LOG(ipiq_ ## name, arg) 127 128 #endif /* SMP */ 129 130 #ifdef SMP 131 132 static int lwkt_process_ipiq_core(globaldata_t sgd, lwkt_ipiq_t ip, 133 struct intrframe *frame); 134 static void lwkt_cpusync_remote1(lwkt_cpusync_t cs); 135 static void lwkt_cpusync_remote2(lwkt_cpusync_t cs); 136 137 /* 138 * Send a function execution request to another cpu. The request is queued 139 * on the cpu<->cpu ipiq matrix. Each cpu owns a unique ipiq FIFO for every 140 * possible target cpu. The FIFO can be written. 141 * 142 * If the FIFO fills up we have to enable interrupts to avoid an APIC 143 * deadlock and process pending IPIQs while waiting for it to empty. 144 * Otherwise we may soft-deadlock with another cpu whos FIFO is also full. 145 * 146 * We can safely bump gd_intr_nesting_level because our crit_exit() at the 147 * end will take care of any pending interrupts. 148 * 149 * The actual hardware IPI is avoided if the target cpu is already processing 150 * the queue from a prior IPI. It is possible to pipeline IPI messages 151 * very quickly between cpus due to the FIFO hysteresis. 152 * 153 * Need not be called from a critical section. 154 */ 155 int 156 lwkt_send_ipiq3(globaldata_t target, ipifunc3_t func, void *arg1, int arg2) 157 { 158 lwkt_ipiq_t ip; 159 int windex; 160 struct globaldata *gd = mycpu; 161 162 logipiq(send_norm, func, arg1, arg2, gd, target); 163 164 if (target == gd) { 165 func(arg1, arg2, NULL); 166 logipiq(send_end, func, arg1, arg2, gd, target); 167 return(0); 168 } 169 crit_enter(); 170 ++gd->gd_intr_nesting_level; 171 #ifdef INVARIANTS 172 if (gd->gd_intr_nesting_level > 20) 173 panic("lwkt_send_ipiq: TOO HEAVILY NESTED!"); 174 #endif 175 KKASSERT(curthread->td_critcount); 176 ++ipiq_count; 177 ip = &gd->gd_ipiq[target->gd_cpuid]; 178 179 /* 180 * Do not allow the FIFO to become full. Interrupts must be physically 181 * enabled while we liveloop to avoid deadlocking the APIC. 182 */ 183 if (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) { 184 #if defined(__i386__) 185 unsigned int eflags = read_eflags(); 186 #elif defined(__x86_64__) 187 unsigned long rflags = read_rflags(); 188 #endif 189 190 if (atomic_poll_acquire_int(&ip->ip_npoll) || ipiq_optimized == 0) { 191 logipiq(cpu_send, func, arg1, arg2, gd, target); 192 cpu_send_ipiq(target->gd_cpuid); 193 } 194 cpu_enable_intr(); 195 ++ipiq_fifofull; 196 while (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 4) { 197 KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1); 198 lwkt_process_ipiq(); 199 } 200 #if defined(__i386__) 201 write_eflags(eflags); 202 #elif defined(__x86_64__) 203 write_rflags(rflags); 204 #endif 205 } 206 207 /* 208 * Queue the new message 209 */ 210 windex = ip->ip_windex & MAXCPUFIFO_MASK; 211 ip->ip_func[windex] = func; 212 ip->ip_arg1[windex] = arg1; 213 ip->ip_arg2[windex] = arg2; 214 cpu_sfence(); 215 ++ip->ip_windex; 216 --gd->gd_intr_nesting_level; 217 218 /* 219 * signal the target cpu that there is work pending. 220 */ 221 if (atomic_poll_acquire_int(&ip->ip_npoll)) { 222 logipiq(cpu_send, func, arg1, arg2, gd, target); 223 cpu_send_ipiq(target->gd_cpuid); 224 } else { 225 if (ipiq_optimized == 0) { 226 logipiq(cpu_send, func, arg1, arg2, gd, target); 227 cpu_send_ipiq(target->gd_cpuid); 228 } else { 229 ++ipiq_avoided; 230 } 231 } 232 crit_exit(); 233 234 logipiq(send_end, func, arg1, arg2, gd, target); 235 return(ip->ip_windex); 236 } 237 238 /* 239 * Similar to lwkt_send_ipiq() but this function does not actually initiate 240 * the IPI to the target cpu unless the FIFO has become too full, so it is 241 * very fast. 242 * 243 * This function is used for non-critical IPI messages, such as memory 244 * deallocations. The queue will typically be flushed by the target cpu at 245 * the next clock interrupt. 246 * 247 * Need not be called from a critical section. 248 */ 249 int 250 lwkt_send_ipiq3_passive(globaldata_t target, ipifunc3_t func, 251 void *arg1, int arg2) 252 { 253 lwkt_ipiq_t ip; 254 int windex; 255 struct globaldata *gd = mycpu; 256 257 KKASSERT(target != gd); 258 crit_enter(); 259 logipiq(send_pasv, func, arg1, arg2, gd, target); 260 ++gd->gd_intr_nesting_level; 261 #ifdef INVARIANTS 262 if (gd->gd_intr_nesting_level > 20) 263 panic("lwkt_send_ipiq: TOO HEAVILY NESTED!"); 264 #endif 265 KKASSERT(curthread->td_critcount); 266 ++ipiq_count; 267 ++ipiq_passive; 268 ip = &gd->gd_ipiq[target->gd_cpuid]; 269 270 /* 271 * Do not allow the FIFO to become full. Interrupts must be physically 272 * enabled while we liveloop to avoid deadlocking the APIC. 273 */ 274 if (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) { 275 #if defined(__i386__) 276 unsigned int eflags = read_eflags(); 277 #elif defined(__x86_64__) 278 unsigned long rflags = read_rflags(); 279 #endif 280 281 if (atomic_poll_acquire_int(&ip->ip_npoll) || ipiq_optimized == 0) { 282 logipiq(cpu_send, func, arg1, arg2, gd, target); 283 cpu_send_ipiq(target->gd_cpuid); 284 } 285 cpu_enable_intr(); 286 ++ipiq_fifofull; 287 while (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 4) { 288 KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1); 289 lwkt_process_ipiq(); 290 } 291 #if defined(__i386__) 292 write_eflags(eflags); 293 #elif defined(__x86_64__) 294 write_rflags(rflags); 295 #endif 296 } 297 298 /* 299 * Queue the new message 300 */ 301 windex = ip->ip_windex & MAXCPUFIFO_MASK; 302 ip->ip_func[windex] = func; 303 ip->ip_arg1[windex] = arg1; 304 ip->ip_arg2[windex] = arg2; 305 cpu_sfence(); 306 ++ip->ip_windex; 307 --gd->gd_intr_nesting_level; 308 309 /* 310 * Do not signal the target cpu, it will pick up the IPI when it next 311 * polls (typically on the next tick). 312 */ 313 crit_exit(); 314 315 logipiq(send_end, func, arg1, arg2, gd, target); 316 return(ip->ip_windex); 317 } 318 319 /* 320 * Send an IPI request without blocking, return 0 on success, ENOENT on 321 * failure. The actual queueing of the hardware IPI may still force us 322 * to spin and process incoming IPIs but that will eventually go away 323 * when we've gotten rid of the other general IPIs. 324 */ 325 int 326 lwkt_send_ipiq3_nowait(globaldata_t target, ipifunc3_t func, 327 void *arg1, int arg2) 328 { 329 lwkt_ipiq_t ip; 330 int windex; 331 struct globaldata *gd = mycpu; 332 333 logipiq(send_nbio, func, arg1, arg2, gd, target); 334 KKASSERT(curthread->td_critcount); 335 if (target == gd) { 336 func(arg1, arg2, NULL); 337 logipiq(send_end, func, arg1, arg2, gd, target); 338 return(0); 339 } 340 ++ipiq_count; 341 ip = &gd->gd_ipiq[target->gd_cpuid]; 342 343 if (ip->ip_windex - ip->ip_rindex >= MAXCPUFIFO * 2 / 3) { 344 logipiq(send_fail, func, arg1, arg2, gd, target); 345 return(ENOENT); 346 } 347 windex = ip->ip_windex & MAXCPUFIFO_MASK; 348 ip->ip_func[windex] = func; 349 ip->ip_arg1[windex] = arg1; 350 ip->ip_arg2[windex] = arg2; 351 cpu_sfence(); 352 ++ip->ip_windex; 353 354 /* 355 * This isn't a passive IPI, we still have to signal the target cpu. 356 */ 357 if (atomic_poll_acquire_int(&ip->ip_npoll)) { 358 logipiq(cpu_send, func, arg1, arg2, gd, target); 359 cpu_send_ipiq(target->gd_cpuid); 360 } else { 361 if (ipiq_optimized == 0) { 362 logipiq(cpu_send, func, arg1, arg2, gd, target); 363 cpu_send_ipiq(target->gd_cpuid); 364 } else { 365 ++ipiq_avoided; 366 } 367 } 368 369 logipiq(send_end, func, arg1, arg2, gd, target); 370 return(0); 371 } 372 373 /* 374 * deprecated, used only by fast int forwarding. 375 */ 376 int 377 lwkt_send_ipiq3_bycpu(int dcpu, ipifunc3_t func, void *arg1, int arg2) 378 { 379 return(lwkt_send_ipiq3(globaldata_find(dcpu), func, arg1, arg2)); 380 } 381 382 /* 383 * Send a message to several target cpus. Typically used for scheduling. 384 * The message will not be sent to stopped cpus. 385 */ 386 int 387 lwkt_send_ipiq3_mask(cpumask_t mask, ipifunc3_t func, void *arg1, int arg2) 388 { 389 int cpuid; 390 int count = 0; 391 392 mask &= ~stopped_cpus; 393 while (mask) { 394 cpuid = BSFCPUMASK(mask); 395 lwkt_send_ipiq3(globaldata_find(cpuid), func, arg1, arg2); 396 mask &= ~CPUMASK(cpuid); 397 ++count; 398 } 399 return(count); 400 } 401 402 /* 403 * Wait for the remote cpu to finish processing a function. 404 * 405 * YYY we have to enable interrupts and process the IPIQ while waiting 406 * for it to empty or we may deadlock with another cpu. Create a CPU_*() 407 * function to do this! YYY we really should 'block' here. 408 * 409 * MUST be called from a critical section. This routine may be called 410 * from an interrupt (for example, if an interrupt wakes a foreign thread 411 * up). 412 */ 413 void 414 lwkt_wait_ipiq(globaldata_t target, int seq) 415 { 416 lwkt_ipiq_t ip; 417 int maxc = 100000000; 418 419 if (target != mycpu) { 420 ip = &mycpu->gd_ipiq[target->gd_cpuid]; 421 if ((int)(ip->ip_xindex - seq) < 0) { 422 #if defined(__i386__) 423 unsigned int eflags = read_eflags(); 424 #elif defined(__x86_64__) 425 unsigned long rflags = read_rflags(); 426 #endif 427 cpu_enable_intr(); 428 while ((int)(ip->ip_xindex - seq) < 0) { 429 crit_enter(); 430 lwkt_process_ipiq(); 431 crit_exit(); 432 if (--maxc == 0) 433 kprintf("LWKT_WAIT_IPIQ WARNING! %d wait %d (%d)\n", mycpu->gd_cpuid, target->gd_cpuid, ip->ip_xindex - seq); 434 if (maxc < -1000000) 435 panic("LWKT_WAIT_IPIQ"); 436 /* 437 * xindex may be modified by another cpu, use a load fence 438 * to ensure that the loop does not use a speculative value 439 * (which may improve performance). 440 */ 441 cpu_lfence(); 442 } 443 #if defined(__i386__) 444 write_eflags(eflags); 445 #elif defined(__x86_64__) 446 write_rflags(rflags); 447 #endif 448 } 449 } 450 } 451 452 int 453 lwkt_seq_ipiq(globaldata_t target) 454 { 455 lwkt_ipiq_t ip; 456 457 ip = &mycpu->gd_ipiq[target->gd_cpuid]; 458 return(ip->ip_windex); 459 } 460 461 /* 462 * Called from IPI interrupt (like a fast interrupt), which has placed 463 * us in a critical section. The MP lock may or may not be held. 464 * May also be called from doreti or splz, or be reentrantly called 465 * indirectly through the ip_func[] we run. 466 * 467 * There are two versions, one where no interrupt frame is available (when 468 * called from the send code and from splz, and one where an interrupt 469 * frame is available. 470 * 471 * When the current cpu is mastering a cpusync we do NOT internally loop 472 * on the cpusyncq poll. We also do not re-flag a pending ipi due to 473 * the cpusyncq poll because this can cause doreti/splz to loop internally. 474 * The cpusync master's own loop must be allowed to run to avoid a deadlock. 475 */ 476 void 477 lwkt_process_ipiq(void) 478 { 479 globaldata_t gd = mycpu; 480 globaldata_t sgd; 481 lwkt_ipiq_t ip; 482 int n; 483 484 again: 485 for (n = 0; n < ncpus; ++n) { 486 if (n != gd->gd_cpuid) { 487 sgd = globaldata_find(n); 488 ip = sgd->gd_ipiq; 489 if (ip != NULL) { 490 while (lwkt_process_ipiq_core(sgd, &ip[gd->gd_cpuid], NULL)) 491 ; 492 } 493 } 494 } 495 if (gd->gd_cpusyncq.ip_rindex != gd->gd_cpusyncq.ip_windex) { 496 if (lwkt_process_ipiq_core(gd, &gd->gd_cpusyncq, NULL)) { 497 if (gd->gd_curthread->td_cscount == 0) 498 goto again; 499 } 500 } 501 } 502 503 void 504 lwkt_process_ipiq_frame(struct intrframe *frame) 505 { 506 globaldata_t gd = mycpu; 507 globaldata_t sgd; 508 lwkt_ipiq_t ip; 509 int n; 510 511 again: 512 for (n = 0; n < ncpus; ++n) { 513 if (n != gd->gd_cpuid) { 514 sgd = globaldata_find(n); 515 ip = sgd->gd_ipiq; 516 if (ip != NULL) { 517 while (lwkt_process_ipiq_core(sgd, &ip[gd->gd_cpuid], frame)) 518 ; 519 } 520 } 521 } 522 if (gd->gd_cpusyncq.ip_rindex != gd->gd_cpusyncq.ip_windex) { 523 if (lwkt_process_ipiq_core(gd, &gd->gd_cpusyncq, frame)) { 524 if (gd->gd_curthread->td_cscount == 0) 525 goto again; 526 } 527 } 528 } 529 530 static int 531 lwkt_process_ipiq_core(globaldata_t sgd, lwkt_ipiq_t ip, 532 struct intrframe *frame) 533 { 534 globaldata_t mygd = mycpu; 535 int ri; 536 int wi; 537 ipifunc3_t copy_func; 538 void *copy_arg1; 539 int copy_arg2; 540 541 /* 542 * Obtain the current write index, which is modified by a remote cpu. 543 * Issue a load fence to prevent speculative reads of e.g. data written 544 * by the other cpu prior to it updating the index. 545 */ 546 KKASSERT(curthread->td_critcount); 547 wi = ip->ip_windex; 548 cpu_lfence(); 549 ++mygd->gd_intr_nesting_level; 550 551 /* 552 * NOTE: xindex is only updated after we are sure the function has 553 * finished execution. Beware lwkt_process_ipiq() reentrancy! 554 * The function may send an IPI which may block/drain. 555 * 556 * NOTE: Due to additional IPI operations that the callback function 557 * may make, it is possible for both rindex and windex to advance and 558 * thus for rindex to advance passed our cached windex. 559 * 560 * NOTE: A load fence is required to prevent speculative loads prior 561 * to the loading of ip_rindex. Even though stores might be 562 * ordered, loads are probably not. A memory fence is required 563 * to prevent reordering of the loads after the ip_rindex update. 564 */ 565 while (wi - (ri = ip->ip_rindex) > 0) { 566 ri &= MAXCPUFIFO_MASK; 567 cpu_lfence(); 568 copy_func = ip->ip_func[ri]; 569 copy_arg1 = ip->ip_arg1[ri]; 570 copy_arg2 = ip->ip_arg2[ri]; 571 cpu_mfence(); 572 ++ip->ip_rindex; 573 KKASSERT((ip->ip_rindex & MAXCPUFIFO_MASK) == 574 ((ri + 1) & MAXCPUFIFO_MASK)); 575 logipiq(receive, copy_func, copy_arg1, copy_arg2, sgd, mycpu); 576 #ifdef INVARIANTS 577 if (ipiq_debug && (ip->ip_rindex & 0xFFFFFF) == 0) { 578 kprintf("cpu %d ipifunc %p %p %d (frame %p)\n", 579 mycpu->gd_cpuid, 580 copy_func, copy_arg1, copy_arg2, 581 #if defined(__i386__) 582 (frame ? (void *)frame->if_eip : NULL)); 583 #elif defined(__amd64__) 584 (frame ? (void *)frame->if_rip : NULL)); 585 #else 586 NULL); 587 #endif 588 } 589 #endif 590 copy_func(copy_arg1, copy_arg2, frame); 591 cpu_sfence(); 592 ip->ip_xindex = ip->ip_rindex; 593 594 #ifdef PANIC_DEBUG 595 /* 596 * Simulate panics during the processing of an IPI 597 */ 598 if (mycpu->gd_cpuid == panic_ipiq_cpu && panic_ipiq_count) { 599 if (--panic_ipiq_count == 0) { 600 #ifdef DDB 601 Debugger("PANIC_DEBUG"); 602 #else 603 panic("PANIC_DEBUG"); 604 #endif 605 } 606 } 607 #endif 608 } 609 --mygd->gd_intr_nesting_level; 610 611 /* 612 * Return non-zero if there are more IPI messages pending on this 613 * ipiq. ip_npoll is left set as long as possible to reduce the 614 * number of IPIs queued by the originating cpu, but must be cleared 615 * *BEFORE* checking windex. 616 */ 617 atomic_poll_release_int(&ip->ip_npoll); 618 return(wi != ip->ip_windex); 619 } 620 621 static void 622 lwkt_sync_ipiq(void *arg) 623 { 624 cpumask_t *cpumask = arg; 625 626 atomic_clear_cpumask(cpumask, mycpu->gd_cpumask); 627 if (*cpumask == 0) 628 wakeup(cpumask); 629 } 630 631 void 632 lwkt_synchronize_ipiqs(const char *wmesg) 633 { 634 cpumask_t other_cpumask; 635 636 other_cpumask = mycpu->gd_other_cpus & smp_active_mask; 637 lwkt_send_ipiq_mask(other_cpumask, lwkt_sync_ipiq, &other_cpumask); 638 639 while (other_cpumask != 0) { 640 tsleep_interlock(&other_cpumask, 0); 641 if (other_cpumask != 0) 642 tsleep(&other_cpumask, PINTERLOCKED, wmesg, 0); 643 } 644 } 645 646 #endif 647 648 /* 649 * CPU Synchronization Support 650 * 651 * lwkt_cpusync_interlock() - Place specified cpus in a quiescent state. 652 * The current cpu is placed in a hard critical 653 * section. 654 * 655 * lwkt_cpusync_deinterlock() - Execute cs_func on specified cpus, including 656 * current cpu if specified, then return. 657 */ 658 void 659 lwkt_cpusync_simple(cpumask_t mask, cpusync_func_t func, void *arg) 660 { 661 struct lwkt_cpusync cs; 662 663 lwkt_cpusync_init(&cs, mask, func, arg); 664 lwkt_cpusync_interlock(&cs); 665 lwkt_cpusync_deinterlock(&cs); 666 } 667 668 669 void 670 lwkt_cpusync_interlock(lwkt_cpusync_t cs) 671 { 672 #ifdef SMP 673 globaldata_t gd = mycpu; 674 cpumask_t mask; 675 676 /* 677 * mask acknowledge (cs_mack): 0->mask for stage 1 678 * 679 * mack does not include the current cpu. 680 */ 681 mask = cs->cs_mask & gd->gd_other_cpus & smp_active_mask; 682 cs->cs_mack = 0; 683 crit_enter_id("cpusync"); 684 if (mask) { 685 ++ipiq_cscount; 686 ++gd->gd_curthread->td_cscount; 687 lwkt_send_ipiq_mask(mask, (ipifunc1_t)lwkt_cpusync_remote1, cs); 688 logipiq2(sync_start, mask); 689 while (cs->cs_mack != mask) { 690 lwkt_process_ipiq(); 691 cpu_pause(); 692 } 693 } 694 #else 695 cs->cs_mack = 0; 696 #endif 697 } 698 699 /* 700 * Interlocked cpus have executed remote1 and are polling in remote2. 701 * To deinterlock we clear cs_mack and wait for the cpus to execute 702 * the func and set their bit in cs_mack again. 703 * 704 */ 705 void 706 lwkt_cpusync_deinterlock(lwkt_cpusync_t cs) 707 { 708 globaldata_t gd = mycpu; 709 #ifdef SMP 710 cpumask_t mask; 711 712 /* 713 * mask acknowledge (cs_mack): mack->0->mack for stage 2 714 * 715 * Clearing cpu bits for polling cpus in cs_mack will cause them to 716 * execute stage 2, which executes the cs_func(cs_data) and then sets 717 * their bit in cs_mack again. 718 * 719 * mack does not include the current cpu. 720 */ 721 mask = cs->cs_mack; 722 cpu_ccfence(); 723 cs->cs_mack = 0; 724 if (cs->cs_func && (cs->cs_mask & gd->gd_cpumask)) 725 cs->cs_func(cs->cs_data); 726 if (mask) { 727 while (cs->cs_mack != mask) { 728 lwkt_process_ipiq(); 729 cpu_pause(); 730 } 731 --gd->gd_curthread->td_cscount; 732 lwkt_process_ipiq(); 733 logipiq2(sync_end, mask); 734 } 735 crit_exit_id("cpusync"); 736 #else 737 if (cs->cs_func && (cs->cs_mask & gd->gd_cpumask)) 738 cs->cs_func(cs->cs_data); 739 #endif 740 } 741 742 #ifdef SMP 743 744 /* 745 * helper IPI remote messaging function. 746 * 747 * Called on remote cpu when a new cpu synchronization request has been 748 * sent to us. Execute the run function and adjust cs_count, then requeue 749 * the request so we spin on it. 750 */ 751 static void 752 lwkt_cpusync_remote1(lwkt_cpusync_t cs) 753 { 754 globaldata_t gd = mycpu; 755 756 atomic_set_cpumask(&cs->cs_mack, gd->gd_cpumask); 757 lwkt_cpusync_remote2(cs); 758 } 759 760 /* 761 * helper IPI remote messaging function. 762 * 763 * Poll for the originator telling us to finish. If it hasn't, requeue 764 * our request so we spin on it. 765 */ 766 static void 767 lwkt_cpusync_remote2(lwkt_cpusync_t cs) 768 { 769 globaldata_t gd = mycpu; 770 771 if ((cs->cs_mack & gd->gd_cpumask) == 0) { 772 if (cs->cs_func) 773 cs->cs_func(cs->cs_data); 774 atomic_set_cpumask(&cs->cs_mack, gd->gd_cpumask); 775 } else { 776 lwkt_ipiq_t ip; 777 int wi; 778 779 ip = &gd->gd_cpusyncq; 780 wi = ip->ip_windex & MAXCPUFIFO_MASK; 781 ip->ip_func[wi] = (ipifunc3_t)(ipifunc1_t)lwkt_cpusync_remote2; 782 ip->ip_arg1[wi] = cs; 783 ip->ip_arg2[wi] = 0; 784 cpu_sfence(); 785 ++ip->ip_windex; 786 } 787 } 788 789 #endif 790