1 /* 2 * Copyright (c) 1996, by Steve Passe 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. The name of the developer may NOT be used to endorse or promote products 11 * derived from this software without specific prior written permission. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD: src/sys/i386/i386/mpapic.c,v 1.37.2.7 2003/01/25 02:31:47 peter Exp $ 26 */ 27 28 #include <sys/param.h> 29 #include <sys/systm.h> 30 #include <sys/kernel.h> 31 #include <sys/ktr.h> 32 #include <sys/bus.h> 33 #include <sys/machintr.h> 34 #include <sys/sysctl.h> 35 #include <machine/globaldata.h> 36 #include <machine/clock.h> 37 #include <machine/limits.h> 38 #include <machine/smp.h> 39 #include <machine/md_var.h> 40 #include <machine/pmap.h> 41 #include <machine/specialreg.h> 42 #include <machine_base/apic/lapic.h> 43 #include <machine_base/apic/ioapic.h> 44 #include <machine_base/apic/ioapic_abi.h> 45 #include <machine_base/apic/apicvar.h> 46 #include <machine_base/icu/icu_var.h> 47 #include <machine/segments.h> 48 #include <sys/spinlock2.h> 49 50 #include <machine/cputypes.h> 51 #include <machine/intr_machdep.h> 52 53 #if !defined(KTR_LAPIC) 54 #define KTR_LAPIC KTR_ALL 55 #endif 56 KTR_INFO_MASTER(lapic); 57 KTR_INFO(KTR_LAPIC, lapic, mem_eoi, 0, "mem_eoi"); 58 KTR_INFO(KTR_LAPIC, lapic, msr_eoi, 0, "msr_eoi"); 59 #define log_lapic(name) KTR_LOG(lapic_ ## name) 60 61 extern int naps; 62 63 volatile lapic_t *lapic_mem; 64 65 static void lapic_timer_calibrate(void); 66 static void lapic_timer_set_divisor(int); 67 static void lapic_timer_fixup_handler(void *); 68 static void lapic_timer_restart_handler(void *); 69 70 71 static int lapic_timer_enable = 1; 72 TUNABLE_INT("hw.lapic_timer_enable", &lapic_timer_enable); 73 74 static int lapic_timer_tscdeadline = 1; 75 TUNABLE_INT("hw.lapic_timer_tscdeadline", &lapic_timer_tscdeadline); 76 77 static int lapic_calibrate_test = 0; 78 TUNABLE_INT("hw.lapic_calibrate_test", &lapic_calibrate_test); 79 80 static int lapic_calibrate_fast = 1; 81 TUNABLE_INT("hw.lapic_calibrate_fast", &lapic_calibrate_fast); 82 83 static void lapic_timer_tscdlt_reload(struct cputimer_intr *, sysclock_t); 84 static void lapic_mem_timer_intr_reload(struct cputimer_intr *, sysclock_t); 85 static void lapic_msr_timer_intr_reload(struct cputimer_intr *, sysclock_t); 86 static void lapic_timer_intr_enable(struct cputimer_intr *); 87 static void lapic_timer_intr_restart(struct cputimer_intr *); 88 static void lapic_timer_intr_pmfixup(struct cputimer_intr *); 89 90 static struct cputimer_intr lapic_cputimer_intr = { 91 .freq = 0, 92 .reload = lapic_mem_timer_intr_reload, 93 .enable = lapic_timer_intr_enable, 94 .config = cputimer_intr_default_config, 95 .restart = lapic_timer_intr_restart, 96 .pmfixup = lapic_timer_intr_pmfixup, 97 .initclock = cputimer_intr_default_initclock, 98 .pcpuhand = NULL, 99 .next = SLIST_ENTRY_INITIALIZER, 100 .name = "lapic", 101 .type = CPUTIMER_INTR_LAPIC, 102 .prio = CPUTIMER_INTR_PRIO_LAPIC, 103 .caps = CPUTIMER_INTR_CAP_NONE, 104 .priv = NULL 105 }; 106 107 static int lapic_timer_divisor_idx = -1; 108 static const uint32_t lapic_timer_divisors[] = { 109 APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16, 110 APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128, APIC_TDCR_1 111 }; 112 #define APIC_TIMER_NDIVISORS (int)(NELEM(lapic_timer_divisors)) 113 114 static int lapic_use_tscdeadline = 0; 115 /* The raw TSC frequency might not fit into a sysclock_t value. */ 116 static int lapic_timer_tscfreq_shift; 117 118 /* 119 * APIC ID <-> CPU ID mapping structures. 120 */ 121 int cpu_id_to_apic_id[NAPICID]; 122 int apic_id_to_cpu_id[NAPICID]; 123 int lapic_enable = 1; 124 int lapic_usable = 0; 125 int x2apic_enable = 1; 126 127 SYSCTL_INT(_hw, OID_AUTO, x2apic_enable, CTLFLAG_RD, &x2apic_enable, 0, ""); 128 129 /* Separate cachelines for each cpu's info. */ 130 struct deadlines { 131 uint64_t timestamp; 132 uint64_t downcount_time; 133 uint64_t padding[6]; 134 }; 135 struct deadlines *tsc_deadlines = NULL; 136 137 static void lapic_mem_eoi(void); 138 static int lapic_mem_ipi(int dest_type, int vector, int delivery_mode); 139 static void lapic_mem_single_ipi(int cpu, int vector, int delivery_mode); 140 141 static void lapic_msr_eoi(void); 142 static int lapic_msr_ipi(int dest_type, int vector, int delivery_mode); 143 static void lapic_msr_single_ipi(int cpu, int vector, int delivery_mode); 144 145 void (*lapic_eoi)(void); 146 int (*apic_ipi)(int dest_type, int vector, int delivery_mode); 147 void (*single_apic_ipi)(int cpu, int vector, int delivery_mode); 148 149 static __inline void 150 lapic_mem_icr_set(uint32_t apic_id, uint32_t icr_lo_val) 151 { 152 uint32_t icr_lo, icr_hi; 153 154 icr_hi = (LAPIC_MEM_READ(icr_hi) & ~APIC_ID_MASK) | 155 (apic_id << APIC_ID_SHIFT); 156 icr_lo = (LAPIC_MEM_READ(icr_lo) & APIC_ICRLO_RESV_MASK) | icr_lo_val; 157 158 LAPIC_MEM_WRITE(icr_hi, icr_hi); 159 LAPIC_MEM_WRITE(icr_lo, icr_lo); 160 } 161 162 static __inline void 163 lapic_msr_icr_set(uint32_t apic_id, uint32_t icr_lo_val) 164 { 165 LAPIC_MSR_WRITE(MSR_X2APIC_ICR, 166 ((uint64_t)apic_id << 32) | ((uint64_t)icr_lo_val)); 167 } 168 169 /* 170 * Enable LAPIC, configure interrupts. 171 */ 172 void 173 lapic_init(boolean_t bsp) 174 { 175 uint32_t timer; 176 u_int temp; 177 178 if (bsp) { 179 /* Decide whether we want to use TSC Deadline mode. */ 180 if (lapic_timer_tscdeadline != 0 && 181 (cpu_feature2 & CPUID2_TSCDLT) && 182 tsc_invariant && tsc_frequency != 0) { 183 lapic_use_tscdeadline = 1; 184 tsc_deadlines = kmalloc_cachealign( 185 sizeof(struct deadlines) * (naps + 1), 186 M_DEVBUF, M_WAITOK | M_ZERO); 187 } 188 } 189 190 /* 191 * Install vectors 192 * 193 * Since IDT is shared between BSP and APs, these vectors 194 * only need to be installed once; we do it on BSP. 195 */ 196 if (bsp) { 197 if (cpu_vendor_id == CPU_VENDOR_AMD && 198 CPUID_TO_FAMILY(cpu_id) >= 0x0f && 199 CPUID_TO_FAMILY(cpu_id) < 0x17) { /* XXX */ 200 uint32_t tcr; 201 202 /* 203 * Set the LINTEN bit in the HyperTransport 204 * Transaction Control Register. 205 * 206 * This will cause EXTINT and NMI interrupts 207 * routed over the hypertransport bus to be 208 * fed into the LAPIC LINT0/LINT1. If the bit 209 * isn't set, the interrupts will go to the 210 * general cpu INTR/NMI pins. On a dual-core 211 * cpu the interrupt winds up going to BOTH cpus. 212 * The first cpu that does the interrupt ack 213 * cycle will get the correct interrupt. The 214 * second cpu that does it will get a spurious 215 * interrupt vector (typically IRQ 7). 216 */ 217 outl(0x0cf8, 218 (1 << 31) | /* enable */ 219 (0 << 16) | /* bus */ 220 (0x18 << 11) | /* dev (cpu + 0x18) */ 221 (0 << 8) | /* func */ 222 0x68 /* reg */ 223 ); 224 tcr = inl(0xcfc); 225 if ((tcr & 0x00010000) == 0) { 226 kprintf("LAPIC: AMD LINTEN on\n"); 227 outl(0xcfc, tcr|0x00010000); 228 } 229 outl(0x0cf8, 0); 230 } 231 232 /* Install a 'Spurious INTerrupt' vector */ 233 setidt_global(XSPURIOUSINT_OFFSET, Xspuriousint, 234 SDT_SYSIGT, SEL_KPL, 0); 235 236 /* Install a timer vector */ 237 setidt_global(XTIMER_OFFSET, Xtimer, 238 SDT_SYSIGT, SEL_KPL, 0); 239 240 /* Install an inter-CPU IPI for TLB invalidation */ 241 setidt_global(XINVLTLB_OFFSET, Xinvltlb, 242 SDT_SYSIGT, SEL_KPL, 0); 243 244 /* Install an inter-CPU IPI for IPIQ messaging */ 245 setidt_global(XIPIQ_OFFSET, Xipiq, 246 SDT_SYSIGT, SEL_KPL, 0); 247 248 /* Install an inter-CPU IPI for CPU stop/restart */ 249 setidt_global(XCPUSTOP_OFFSET, Xcpustop, 250 SDT_SYSIGT, SEL_KPL, 0); 251 252 /* Install an inter-CPU IPI for TLB invalidation */ 253 setidt_global(XSNIFF_OFFSET, Xsniff, 254 SDT_SYSIGT, SEL_KPL, 0); 255 } 256 257 /* 258 * Setup LINT0 as ExtINT on the BSP. This is theoretically an 259 * aggregate interrupt input from the 8259. The INTA cycle 260 * will be routed to the external controller (the 8259) which 261 * is expected to supply the vector. 262 * 263 * Must be setup edge triggered, active high. 264 * 265 * Disable LINT0 on BSP, if I/O APIC is enabled. 266 * 267 * Disable LINT0 on the APs. It doesn't matter what delivery 268 * mode we use because we leave it masked. 269 */ 270 temp = LAPIC_READ(lvt_lint0); 271 temp &= ~(APIC_LVT_MASKED | APIC_LVT_TRIG_MASK | 272 APIC_LVT_POLARITY_MASK | APIC_LVT_DM_MASK); 273 if (bsp) { 274 temp |= APIC_LVT_DM_EXTINT; 275 if (ioapic_enable) 276 temp |= APIC_LVT_MASKED; 277 } else { 278 temp |= APIC_LVT_DM_FIXED | APIC_LVT_MASKED; 279 } 280 LAPIC_WRITE(lvt_lint0, temp); 281 282 /* 283 * Setup LINT1 as NMI. 284 * 285 * Must be setup edge trigger, active high. 286 * 287 * Enable LINT1 on BSP, if I/O APIC is enabled. 288 * 289 * Disable LINT1 on the APs. 290 */ 291 temp = LAPIC_READ(lvt_lint1); 292 temp &= ~(APIC_LVT_MASKED | APIC_LVT_TRIG_MASK | 293 APIC_LVT_POLARITY_MASK | APIC_LVT_DM_MASK); 294 temp |= APIC_LVT_MASKED | APIC_LVT_DM_NMI; 295 if (bsp && ioapic_enable) 296 temp &= ~APIC_LVT_MASKED; 297 LAPIC_WRITE(lvt_lint1, temp); 298 299 /* 300 * Mask the LAPIC error interrupt, LAPIC performance counter 301 * interrupt. 302 */ 303 LAPIC_WRITE(lvt_error, LAPIC_READ(lvt_error) | APIC_LVT_MASKED); 304 LAPIC_WRITE(lvt_pcint, LAPIC_READ(lvt_pcint) | APIC_LVT_MASKED); 305 306 /* 307 * Set LAPIC timer vector and mask the LAPIC timer interrupt. 308 */ 309 timer = LAPIC_READ(lvt_timer); 310 timer &= ~APIC_LVTT_VECTOR; 311 timer |= XTIMER_OFFSET; 312 timer |= APIC_LVTT_MASKED; 313 LAPIC_WRITE(lvt_timer, timer); 314 315 /* 316 * Set the Task Priority Register as needed. At the moment allow 317 * interrupts on all cpus (the APs will remain CLId until they are 318 * ready to deal). 319 */ 320 temp = LAPIC_READ(tpr); 321 temp &= ~APIC_TPR_PRIO; /* clear priority field */ 322 LAPIC_WRITE(tpr, temp); 323 324 /* 325 * AMD specific setup 326 */ 327 if (cpu_vendor_id == CPU_VENDOR_AMD && lapic_mem != NULL && 328 (LAPIC_MEM_READ(version) & APIC_VER_AMD_EXT_SPACE)) { 329 uint32_t ext_feat; 330 uint32_t count; 331 uint32_t max_count; 332 uint32_t lvt; 333 uint32_t i; 334 335 ext_feat = LAPIC_MEM_READ(ext_feat); 336 count = (ext_feat & APIC_EXTFEAT_MASK) >> APIC_EXTFEAT_SHIFT; 337 max_count = sizeof(lapic_mem->ext_lvt) / 338 sizeof(lapic_mem->ext_lvt[0]); 339 if (count > max_count) 340 count = max_count; 341 for (i = 0; i < count; ++i) { 342 lvt = LAPIC_MEM_READ(ext_lvt[i].lvt); 343 344 lvt &= ~(APIC_LVT_POLARITY_MASK | APIC_LVT_TRIG_MASK | 345 APIC_LVT_DM_MASK | APIC_LVT_MASKED); 346 lvt |= APIC_LVT_MASKED | APIC_LVT_DM_FIXED; 347 348 switch(i) { 349 case APIC_EXTLVT_IBS: 350 break; 351 case APIC_EXTLVT_MCA: 352 break; 353 case APIC_EXTLVT_DEI: 354 break; 355 case APIC_EXTLVT_SBI: 356 break; 357 default: 358 break; 359 } 360 if (bsp) { 361 kprintf(" LAPIC AMD elvt%d: 0x%08x", 362 i, LAPIC_MEM_READ(ext_lvt[i].lvt)); 363 if (LAPIC_MEM_READ(ext_lvt[i].lvt) != lvt) 364 kprintf(" -> 0x%08x", lvt); 365 kprintf("\n"); 366 } 367 LAPIC_MEM_WRITE(ext_lvt[i].lvt, lvt); 368 } 369 } 370 371 /* 372 * Enable the LAPIC 373 */ 374 temp = LAPIC_READ(svr); 375 temp |= APIC_SVR_ENABLE; /* enable the LAPIC */ 376 temp &= ~APIC_SVR_FOCUS_DISABLE; /* enable lopri focus processor */ 377 378 if (LAPIC_READ(version) & APIC_VER_EOI_SUPP) { 379 if (temp & APIC_SVR_EOI_SUPP) { 380 temp &= ~APIC_SVR_EOI_SUPP; 381 if (bsp) 382 kprintf(" LAPIC disabling EOI supp\n"); 383 } 384 } 385 386 /* 387 * Set the spurious interrupt vector. The low 4 bits of the vector 388 * must be 1111. 389 */ 390 if ((XSPURIOUSINT_OFFSET & 0x0F) != 0x0F) 391 panic("bad XSPURIOUSINT_OFFSET: 0x%08x", XSPURIOUSINT_OFFSET); 392 temp &= ~APIC_SVR_VECTOR; 393 temp |= XSPURIOUSINT_OFFSET; 394 395 LAPIC_WRITE(svr, temp); 396 397 /* 398 * Pump out a few EOIs to clean out interrupts that got through 399 * before we were able to set the TPR. 400 */ 401 LAPIC_WRITE(eoi, 0); 402 LAPIC_WRITE(eoi, 0); 403 LAPIC_WRITE(eoi, 0); 404 405 if (bsp) { 406 lapic_timer_calibrate(); 407 if (lapic_timer_enable) { 408 if (cpu_thermal_feature & CPUID_THERMAL_ARAT) { 409 /* 410 * Local APIC timer will not stop 411 * in deep C-state. 412 */ 413 lapic_cputimer_intr.caps |= 414 CPUTIMER_INTR_CAP_PS; 415 } 416 if (lapic_use_tscdeadline) { 417 lapic_cputimer_intr.reload = 418 lapic_timer_tscdlt_reload; 419 } 420 cputimer_intr_register(&lapic_cputimer_intr); 421 cputimer_intr_select(&lapic_cputimer_intr, 0); 422 } 423 } else if (!lapic_use_tscdeadline) { 424 lapic_timer_set_divisor(lapic_timer_divisor_idx); 425 } 426 427 if (bootverbose) 428 apic_dump("apic_initialize()"); 429 } 430 431 static void 432 lapic_timer_set_divisor(int divisor_idx) 433 { 434 KKASSERT(divisor_idx >= 0 && divisor_idx < APIC_TIMER_NDIVISORS); 435 LAPIC_WRITE(dcr_timer, lapic_timer_divisors[divisor_idx]); 436 } 437 438 static void 439 lapic_timer_oneshot(u_int count) 440 { 441 uint32_t value; 442 443 value = LAPIC_READ(lvt_timer); 444 value &= ~(APIC_LVTT_PERIODIC | APIC_LVTT_TSCDLT); 445 LAPIC_WRITE(lvt_timer, value); 446 LAPIC_WRITE(icr_timer, count); 447 } 448 449 static void 450 lapic_timer_oneshot_quick(u_int count) 451 { 452 LAPIC_WRITE(icr_timer, count); 453 } 454 455 static void 456 lapic_timer_tscdeadline_quick(uint64_t diff) 457 { 458 uint64_t val = rdtsc() + diff; 459 460 wrmsr(MSR_TSC_DEADLINE, val); 461 tsc_deadlines[mycpuid].timestamp = val; 462 } 463 464 static uint64_t 465 lapic_scale_to_tsc(unsigned value, unsigned scale) 466 { 467 uint64_t val; 468 469 val = value; 470 val *= tsc_frequency; 471 val += (scale - 1); 472 val /= scale; 473 return val; 474 } 475 476 #define MAX_MEASURE_RETRIES 100 477 478 static u_int64_t 479 do_tsc_calibration(u_int us, u_int64_t apic_delay_tsc) 480 { 481 u_int64_t old_tsc1, old_tsc2, new_tsc1, new_tsc2; 482 u_int64_t diff, count; 483 u_int64_t a; 484 u_int32_t start, end; 485 int retries1 = 0, retries2 = 0; 486 487 retry1: 488 lapic_timer_oneshot_quick(APIC_TIMER_MAX_COUNT); 489 old_tsc1 = rdtsc_ordered(); 490 start = LAPIC_READ(ccr_timer); 491 old_tsc2 = rdtsc_ordered(); 492 if (apic_delay_tsc > 0 && retries1 < MAX_MEASURE_RETRIES && 493 old_tsc2 - old_tsc1 > 2 * apic_delay_tsc) { 494 retries1++; 495 goto retry1; 496 } 497 DELAY(us); 498 retry2: 499 new_tsc1 = rdtsc_ordered(); 500 end = LAPIC_READ(ccr_timer); 501 new_tsc2 = rdtsc_ordered(); 502 if (apic_delay_tsc > 0 && retries2 < MAX_MEASURE_RETRIES && 503 new_tsc2 - new_tsc1 > 2 * apic_delay_tsc) { 504 retries2++; 505 goto retry2; 506 } 507 if (end == 0) 508 return 0; 509 510 count = start - end; 511 512 /* Make sure the lapic can count for up to 2s */ 513 a = (unsigned)APIC_TIMER_MAX_COUNT; 514 if (us < 2000000 && (u_int64_t)count * 2000000 >= a * us) 515 return 0; 516 517 if (lapic_calibrate_test > 0 && (retries1 > 0 || retries2 > 0)) { 518 kprintf("%s: retries1=%d retries2=%d\n", 519 __func__, retries1, retries2); 520 } 521 522 diff = (new_tsc1 - old_tsc1) + (new_tsc2 - old_tsc2); 523 /* XXX First estimate if the total TSC diff value makes sense */ 524 /* This will almost overflow, but only almost :) */ 525 count = (2 * count * tsc_frequency) / diff; 526 527 return count; 528 } 529 530 static uint64_t 531 do_cputimer_calibration(u_int us) 532 { 533 sysclock_t value; 534 sysclock_t start, end, beginning, finish; 535 536 lapic_timer_oneshot(APIC_TIMER_MAX_COUNT); 537 beginning = LAPIC_READ(ccr_timer); 538 start = sys_cputimer->count(); 539 DELAY(us); 540 end = sys_cputimer->count(); 541 finish = LAPIC_READ(ccr_timer); 542 if (finish == 0) 543 return 0; 544 /* value is the LAPIC timer difference. */ 545 value = beginning - finish; 546 /* end is the sys_cputimer difference. */ 547 end -= start; 548 if (end == 0) 549 return 0; 550 value = ((uint64_t)value * sys_cputimer->freq) / end; 551 return value; 552 } 553 554 static void 555 lapic_timer_calibrate(void) 556 { 557 sysclock_t value; 558 u_int64_t apic_delay_tsc = 0; 559 int use_tsc_calibration = 0; 560 561 /* No need to calibrate lapic_timer, if we will use TSC Deadline mode */ 562 if (lapic_use_tscdeadline) { 563 lapic_timer_tscfreq_shift = 0; 564 while ((tsc_frequency >> lapic_timer_tscfreq_shift) > INT_MAX) 565 lapic_timer_tscfreq_shift++; 566 lapic_cputimer_intr.freq = 567 tsc_frequency >> lapic_timer_tscfreq_shift; 568 kprintf( 569 "lapic: TSC Deadline Mode: shift %d, frequency %u Hz\n", 570 lapic_timer_tscfreq_shift, lapic_cputimer_intr.freq); 571 return; 572 } 573 574 /* 575 * On real hardware, tsc_invariant == 0 wouldn't be an issue, but in 576 * a virtual machine the frequency may get changed by the host. 577 */ 578 if (tsc_frequency != 0 && tsc_invariant && lapic_calibrate_fast) 579 use_tsc_calibration = 1; 580 581 if (use_tsc_calibration) { 582 u_int64_t min_apic_tsc = 0, max_apic_tsc = 0; 583 u_int64_t old_tsc, new_tsc; 584 sysclock_t val; 585 int i; 586 587 /* warm up */ 588 lapic_timer_oneshot(APIC_TIMER_MAX_COUNT); 589 for (i = 0; i < 10; i++) 590 val = LAPIC_READ(ccr_timer); 591 592 for (i = 0; i < 100; i++) { 593 old_tsc = rdtsc_ordered(); 594 val = LAPIC_READ(ccr_timer); 595 new_tsc = rdtsc_ordered(); 596 new_tsc -= old_tsc; 597 apic_delay_tsc += new_tsc; 598 if (min_apic_tsc == 0 || 599 min_apic_tsc > new_tsc) { 600 min_apic_tsc = new_tsc; 601 } 602 if (max_apic_tsc < new_tsc) 603 max_apic_tsc = new_tsc; 604 } 605 apic_delay_tsc /= 100; 606 kprintf( 607 "LAPIC latency (in TSC ticks): %lu min: %lu max: %lu\n", 608 apic_delay_tsc, min_apic_tsc, max_apic_tsc); 609 apic_delay_tsc = min_apic_tsc; 610 } 611 612 if (!use_tsc_calibration) { 613 int i; 614 615 /* 616 * Do some exercising of the lapic timer access. This improves 617 * precision of the subsequent calibration run in at least some 618 * virtualization cases. 619 */ 620 lapic_timer_set_divisor(0); 621 for (i = 0; i < 10; i++) 622 (void)do_cputimer_calibration(100); 623 } 624 /* Try to calibrate the local APIC timer. */ 625 for (lapic_timer_divisor_idx = 0; 626 lapic_timer_divisor_idx < APIC_TIMER_NDIVISORS; 627 lapic_timer_divisor_idx++) { 628 lapic_timer_set_divisor(lapic_timer_divisor_idx); 629 if (use_tsc_calibration) { 630 value = do_tsc_calibration(200*1000, apic_delay_tsc); 631 } else { 632 value = do_cputimer_calibration(2*1000*1000); 633 } 634 if (value != 0) 635 break; 636 } 637 if (lapic_timer_divisor_idx >= APIC_TIMER_NDIVISORS) 638 panic("lapic: no proper timer divisor?!"); 639 lapic_cputimer_intr.freq = value; 640 641 kprintf("lapic: divisor index %d, frequency %u Hz\n", 642 lapic_timer_divisor_idx, lapic_cputimer_intr.freq); 643 644 if (lapic_calibrate_test > 0) { 645 uint64_t freq; 646 int i; 647 648 for (i = 1; i <= 20; i++) { 649 if (use_tsc_calibration) { 650 freq = do_tsc_calibration(i*100*1000, 651 apic_delay_tsc); 652 } else { 653 freq = do_cputimer_calibration(i*100*1000); 654 } 655 if (freq != 0) 656 kprintf("%ums: %lu\n", i * 100, freq); 657 } 658 } 659 } 660 661 static void 662 lapic_timer_tscdlt_reload(struct cputimer_intr *cti, sysclock_t reload) 663 { 664 struct globaldata *gd = mycpu; 665 uint64_t diff, now, val; 666 667 if (reload > 1000*1000*1000) 668 reload = 1000*1000*1000; 669 diff = (uint64_t)reload * tsc_frequency / sys_cputimer->freq; 670 if (diff < 4) 671 diff = 4; 672 if (cpu_vendor_id == CPU_VENDOR_INTEL) 673 cpu_lfence(); 674 else 675 cpu_mfence(); 676 now = rdtsc(); 677 val = now + diff; 678 if (gd->gd_timer_running) { 679 uint64_t deadline = tsc_deadlines[mycpuid].timestamp; 680 if (deadline == 0 || now > deadline || val < deadline) { 681 wrmsr(MSR_TSC_DEADLINE, val); 682 tsc_deadlines[mycpuid].timestamp = val; 683 } 684 } else { 685 gd->gd_timer_running = 1; 686 wrmsr(MSR_TSC_DEADLINE, val); 687 tsc_deadlines[mycpuid].timestamp = val; 688 } 689 } 690 691 static void 692 lapic_mem_timer_intr_reload(struct cputimer_intr *cti, sysclock_t reload) 693 { 694 struct globaldata *gd = mycpu; 695 696 reload = (int64_t)reload * cti->freq / sys_cputimer->freq; 697 if (reload < 2) 698 reload = 2; 699 700 if (gd->gd_timer_running) { 701 if (reload < LAPIC_MEM_READ(ccr_timer)) 702 LAPIC_MEM_WRITE(icr_timer, reload); 703 } else { 704 gd->gd_timer_running = 1; 705 LAPIC_MEM_WRITE(icr_timer, reload); 706 } 707 } 708 709 static void 710 lapic_msr_timer_intr_reload(struct cputimer_intr *cti, sysclock_t reload) 711 { 712 struct globaldata *gd = mycpu; 713 714 reload = (int64_t)reload * cti->freq / sys_cputimer->freq; 715 if (reload < 2) 716 reload = 2; 717 718 if (gd->gd_timer_running) { 719 if (reload < LAPIC_MSR_READ(MSR_X2APIC_CCR_TIMER)) 720 LAPIC_MSR_WRITE(MSR_X2APIC_ICR_TIMER, reload); 721 } else { 722 gd->gd_timer_running = 1; 723 LAPIC_MSR_WRITE(MSR_X2APIC_ICR_TIMER, reload); 724 } 725 } 726 727 static void 728 lapic_timer_intr_enable(struct cputimer_intr *cti __unused) 729 { 730 uint32_t timer; 731 732 timer = LAPIC_READ(lvt_timer); 733 timer &= ~(APIC_LVTT_MASKED | APIC_LVTT_PERIODIC | APIC_LVTT_TSCDLT); 734 if (lapic_use_tscdeadline) 735 timer |= APIC_LVTT_TSCDLT; 736 LAPIC_WRITE(lvt_timer, timer); 737 if (lapic_use_tscdeadline) 738 cpu_mfence(); 739 740 lapic_timer_fixup_handler(NULL); 741 } 742 743 static void 744 lapic_timer_fixup_handler(void *arg) 745 { 746 int *started = arg; 747 748 if (started != NULL) 749 *started = 0; 750 751 if (cpu_vendor_id == CPU_VENDOR_AMD) { 752 /* 753 * Detect the presence of C1E capability mostly on latest 754 * dual-cores (or future) k8 family. This feature renders 755 * the local APIC timer dead, so we disable it by reading 756 * the Interrupt Pending Message register and clearing both 757 * C1eOnCmpHalt (bit 28) and SmiOnCmpHalt (bit 27). 758 * 759 * Reference: 760 * "BIOS and Kernel Developer's Guide for AMD NPT 761 * Family 0Fh Processors" 762 * #32559 revision 3.00 763 */ 764 if ((cpu_id & 0x00000f00) == 0x00000f00 && 765 (cpu_id & 0x0fff0000) >= 0x00040000) { 766 uint64_t msr; 767 768 msr = rdmsr(0xc0010055); 769 if (msr & 0x18000000) { 770 struct globaldata *gd = mycpu; 771 772 kprintf("cpu%d: AMD C1E detected\n", 773 gd->gd_cpuid); 774 wrmsr(0xc0010055, msr & ~0x18000000ULL); 775 776 /* 777 * We are kinda stalled; 778 * kick start again. 779 */ 780 gd->gd_timer_running = 1; 781 if (lapic_use_tscdeadline) { 782 /* Maybe reached in Virtual Machines? */ 783 lapic_timer_tscdeadline_quick(5000); 784 } else { 785 lapic_timer_oneshot_quick(2); 786 } 787 788 if (started != NULL) 789 *started = 1; 790 } 791 } 792 } 793 } 794 795 static void 796 lapic_timer_restart_handler(void *dummy __unused) 797 { 798 int started; 799 800 lapic_timer_fixup_handler(&started); 801 if (!started) { 802 struct globaldata *gd = mycpu; 803 804 gd->gd_timer_running = 1; 805 if (lapic_use_tscdeadline) { 806 /* Maybe reached in Virtual Machines? */ 807 lapic_timer_tscdeadline_quick(5000); 808 } else { 809 lapic_timer_oneshot_quick(2); 810 } 811 } 812 } 813 814 /* 815 * This function is called only by ACPICA code currently: 816 * - AMD C1E fixup. AMD C1E only seems to happen after ACPI 817 * module controls PM. So once ACPICA is attached, we try 818 * to apply the fixup to prevent LAPIC timer from hanging. 819 */ 820 static void 821 lapic_timer_intr_pmfixup(struct cputimer_intr *cti __unused) 822 { 823 lwkt_send_ipiq_mask(smp_active_mask, 824 lapic_timer_fixup_handler, NULL); 825 } 826 827 static void 828 lapic_timer_intr_restart(struct cputimer_intr *cti __unused) 829 { 830 lwkt_send_ipiq_mask(smp_active_mask, lapic_timer_restart_handler, NULL); 831 } 832 833 834 /* 835 * dump contents of local APIC registers 836 */ 837 void 838 apic_dump(char* str) 839 { 840 kprintf("SMP: CPU%d %s:\n", mycpu->gd_cpuid, str); 841 kprintf(" lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n", 842 LAPIC_READ(lvt_lint0), LAPIC_READ(lvt_lint1), LAPIC_READ(tpr), 843 LAPIC_READ(svr)); 844 } 845 846 /* 847 * Inter Processor Interrupt functions. 848 */ 849 850 static __inline void 851 lapic_mem_icr_unpend(const char *func) 852 { 853 if (LAPIC_MEM_READ(icr_lo) & APIC_DELSTAT_PEND) { 854 int64_t tsc; 855 int loops = 1; 856 857 tsc = rdtsc(); 858 while (LAPIC_MEM_READ(icr_lo) & APIC_DELSTAT_PEND) { 859 cpu_pause(); 860 if ((tsc_sclock_t)(rdtsc() - 861 (tsc + tsc_frequency)) > 0) { 862 tsc = rdtsc(); 863 if (++loops > 30) { 864 panic("%s: cpu%d apic stalled", 865 func, mycpuid); 866 } else { 867 kprintf("%s: cpu%d apic stalled\n", 868 func, mycpuid); 869 } 870 } 871 } 872 } 873 } 874 875 /* 876 * Send APIC IPI 'vector' to 'destType' via 'deliveryMode'. 877 * 878 * destType is 1 of: APIC_DEST_SELF, APIC_DEST_ALLISELF, APIC_DEST_ALLESELF 879 * vector is any valid SYSTEM INT vector 880 * delivery_mode is 1 of: APIC_DELMODE_FIXED, APIC_DELMODE_LOWPRIO 881 * 882 * WARNINGS! 883 * 884 * We now implement a per-cpu interlock (gd->gd_npoll) to prevent more than 885 * one IPI from being sent to any given cpu at a time. Thus we no longer 886 * have to process incoming IPIs while waiting for the status to clear. 887 * No deadlock should be possible. 888 * 889 * We now physically disable interrupts for the lapic ICR operation. If 890 * we do not do this then it looks like an EOI sent to the lapic (which 891 * occurs even with a critical section) can interfere with the command 892 * register ready status and cause an IPI to be lost. 893 * 894 * e.g. an interrupt can occur, issue the EOI, IRET, and cause the command 895 * register to busy just before we write to icr_lo, resulting in a lost 896 * issuance. This only appears to occur on Intel cpus and is not 897 * documented. It could simply be that cpus are so fast these days that 898 * it was always an issue, but is only now rearing its ugly head. This 899 * is conjecture. 900 */ 901 static int 902 lapic_mem_ipi(int dest_type, int vector, int delivery_mode) 903 { 904 lapic_mem_icr_unpend(__func__); 905 lapic_mem_icr_set(0, 906 dest_type | APIC_LEVEL_ASSERT | delivery_mode | vector); 907 return 0; 908 } 909 910 static int 911 lapic_msr_ipi(int dest_type, int vector, int delivery_mode) 912 { 913 lapic_msr_icr_set(0, 914 dest_type | APIC_LEVEL_ASSERT | delivery_mode | vector); 915 return 0; 916 } 917 918 /* 919 * Interrupts must be hard-disabled by caller 920 */ 921 static void 922 lapic_mem_single_ipi(int cpu, int vector, int delivery_mode) 923 { 924 lapic_mem_icr_unpend(__func__); 925 lapic_mem_icr_set(CPUID_TO_APICID(cpu), 926 APIC_DEST_DESTFLD | APIC_LEVEL_ASSERT | delivery_mode | vector); 927 } 928 929 static void 930 lapic_msr_single_ipi(int cpu, int vector, int delivery_mode) 931 { 932 lapic_msr_icr_set(CPUID_TO_APICID(cpu), 933 APIC_DEST_DESTFLD | APIC_LEVEL_ASSERT | delivery_mode | vector); 934 } 935 936 /* 937 * Send APIC IPI 'vector' to 'target's via 'delivery_mode'. 938 * 939 * target is a bitmask of destination cpus. Vector is any 940 * valid system INT vector. Delivery mode may be either 941 * APIC_DELMODE_FIXED or APIC_DELMODE_LOWPRIO. 942 * 943 * Interrupts must be hard-disabled by caller 944 */ 945 void 946 selected_apic_ipi(cpumask_t target, int vector, int delivery_mode) 947 { 948 while (CPUMASK_TESTNZERO(target)) { 949 int n = BSFCPUMASK(target); 950 CPUMASK_NANDBIT(target, n); 951 single_apic_ipi(n, vector, delivery_mode); 952 } 953 } 954 955 /* 956 * Load a 'downcount time' in uSeconds. 957 */ 958 void 959 set_apic_timer(int us) 960 { 961 u_int count; 962 963 if (lapic_use_tscdeadline) { 964 uint64_t val; 965 966 val = lapic_scale_to_tsc(us, 1000000); 967 val += rdtsc(); 968 /* No need to arm the lapic here, just track the timeout. */ 969 tsc_deadlines[mycpuid].downcount_time = val; 970 return; 971 } 972 973 /* 974 * When we reach here, lapic timer's frequency 975 * must have been calculated as well as the 976 * divisor (lapic->dcr_timer is setup during the 977 * divisor calculation). 978 */ 979 KKASSERT(lapic_cputimer_intr.freq != 0 && 980 lapic_timer_divisor_idx >= 0); 981 982 count = ((us * (int64_t)lapic_cputimer_intr.freq) + 999999) / 1000000; 983 lapic_timer_oneshot(count); 984 } 985 986 987 /* 988 * Read remaining time in timer, in microseconds (rounded up). 989 */ 990 int 991 read_apic_timer(void) 992 { 993 uint64_t val; 994 995 if (lapic_use_tscdeadline) { 996 uint64_t now; 997 998 val = tsc_deadlines[mycpuid].downcount_time; 999 now = rdtsc(); 1000 if (val == 0 || now > val) { 1001 return 0; 1002 } else { 1003 val -= now; 1004 val *= 1000000; 1005 val += (tsc_frequency - 1); 1006 val /= tsc_frequency; 1007 if (val > INT_MAX) 1008 val = INT_MAX; 1009 return val; 1010 } 1011 } 1012 1013 val = LAPIC_READ(ccr_timer); 1014 if (val == 0) 1015 return 0; 1016 1017 KKASSERT(lapic_cputimer_intr.freq > 0); 1018 val *= 1000000; 1019 val += (lapic_cputimer_intr.freq - 1); 1020 val /= lapic_cputimer_intr.freq; 1021 if (val > INT_MAX) 1022 val = INT_MAX; 1023 return val; 1024 } 1025 1026 1027 /* 1028 * Spin-style delay, set delay time in uS, spin till it drains. 1029 */ 1030 void 1031 u_sleep(int count) 1032 { 1033 set_apic_timer(count); 1034 while (read_apic_timer()) 1035 /* spin */ ; 1036 } 1037 1038 int 1039 lapic_unused_apic_id(int start) 1040 { 1041 int i; 1042 1043 for (i = start; i < APICID_MAX; ++i) { 1044 if (APICID_TO_CPUID(i) == -1) 1045 return i; 1046 } 1047 return NAPICID; 1048 } 1049 1050 void 1051 lapic_map(vm_paddr_t lapic_addr) 1052 { 1053 lapic_mem = pmap_mapdev_uncacheable(lapic_addr, sizeof(struct LAPIC)); 1054 } 1055 1056 void 1057 lapic_x2apic_enter(boolean_t bsp) 1058 { 1059 uint64_t apic_base; 1060 1061 KASSERT(x2apic_enable, ("X2APIC mode is not enabled")); 1062 1063 /* 1064 * X2APIC mode is requested, if it has not been enabled by the BIOS, 1065 * enable it now. 1066 */ 1067 apic_base = rdmsr(MSR_APICBASE); 1068 if ((apic_base & APICBASE_X2APIC) == 0) { 1069 wrmsr(MSR_APICBASE, 1070 apic_base | APICBASE_X2APIC | APICBASE_ENABLED); 1071 } 1072 if (bsp) { 1073 lapic_eoi = lapic_msr_eoi; 1074 apic_ipi = lapic_msr_ipi; 1075 single_apic_ipi = lapic_msr_single_ipi; 1076 lapic_cputimer_intr.reload = lapic_msr_timer_intr_reload; 1077 } 1078 } 1079 1080 static TAILQ_HEAD(, lapic_enumerator) lapic_enumerators = 1081 TAILQ_HEAD_INITIALIZER(lapic_enumerators); 1082 1083 int 1084 lapic_config(void) 1085 { 1086 struct lapic_enumerator *e; 1087 uint64_t apic_base; 1088 int error, i, ap_max; 1089 1090 KKASSERT(lapic_enable); 1091 1092 lapic_eoi = lapic_mem_eoi; 1093 apic_ipi = lapic_mem_ipi; 1094 single_apic_ipi = lapic_mem_single_ipi; 1095 1096 TUNABLE_INT_FETCH("hw.x2apic_enable", &x2apic_enable); 1097 if (x2apic_enable < 0) 1098 x2apic_enable = 1; 1099 1100 if ((cpu_feature2 & CPUID2_X2APIC) == 0) { 1101 /* X2APIC is not supported. */ 1102 x2apic_enable = 0; 1103 } else if (!x2apic_enable) { 1104 /* 1105 * If the BIOS enabled the X2APIC mode, then we would stick 1106 * with the X2APIC mode. 1107 */ 1108 apic_base = rdmsr(MSR_APICBASE); 1109 if (apic_base & APICBASE_X2APIC) { 1110 kprintf("LAPIC: BIOS enabled X2APIC mode\n"); 1111 x2apic_enable = 1; 1112 } 1113 } 1114 1115 if (x2apic_enable) { 1116 /* 1117 * Enter X2APIC mode. 1118 */ 1119 kprintf("LAPIC: enter X2APIC mode\n"); 1120 lapic_x2apic_enter(TRUE); 1121 } 1122 1123 for (i = 0; i < NAPICID; ++i) 1124 APICID_TO_CPUID(i) = -1; 1125 1126 TAILQ_FOREACH(e, &lapic_enumerators, lapic_link) { 1127 error = e->lapic_probe(e); 1128 if (!error) 1129 break; 1130 } 1131 if (e == NULL) { 1132 kprintf("LAPIC: Can't find LAPIC\n"); 1133 return ENXIO; 1134 } 1135 1136 error = e->lapic_enumerate(e); 1137 if (error) { 1138 kprintf("LAPIC: enumeration failed\n"); 1139 return ENXIO; 1140 } 1141 1142 /* LAPIC is usable now. */ 1143 lapic_usable = 1; 1144 1145 ap_max = MAXCPU - 1; 1146 TUNABLE_INT_FETCH("hw.ap_max", &ap_max); 1147 if (ap_max > MAXCPU - 1) 1148 ap_max = MAXCPU - 1; 1149 1150 if (naps > ap_max) { 1151 kprintf("LAPIC: Warning use only %d out of %d " 1152 "available APs\n", 1153 ap_max, naps); 1154 naps = ap_max; 1155 } 1156 1157 return 0; 1158 } 1159 1160 void 1161 lapic_enumerator_register(struct lapic_enumerator *ne) 1162 { 1163 struct lapic_enumerator *e; 1164 1165 TAILQ_FOREACH(e, &lapic_enumerators, lapic_link) { 1166 if (e->lapic_prio < ne->lapic_prio) { 1167 TAILQ_INSERT_BEFORE(e, ne, lapic_link); 1168 return; 1169 } 1170 } 1171 TAILQ_INSERT_TAIL(&lapic_enumerators, ne, lapic_link); 1172 } 1173 1174 void 1175 lapic_set_cpuid(int cpu_id, int apic_id) 1176 { 1177 CPUID_TO_APICID(cpu_id) = apic_id; 1178 APICID_TO_CPUID(apic_id) = cpu_id; 1179 } 1180 1181 void 1182 lapic_fixup_noioapic(void) 1183 { 1184 u_int temp; 1185 1186 /* Only allowed on BSP */ 1187 KKASSERT(mycpuid == 0); 1188 KKASSERT(!ioapic_enable); 1189 1190 temp = LAPIC_READ(lvt_lint0); 1191 temp &= ~APIC_LVT_MASKED; 1192 LAPIC_WRITE(lvt_lint0, temp); 1193 1194 temp = LAPIC_READ(lvt_lint1); 1195 temp |= APIC_LVT_MASKED; 1196 LAPIC_WRITE(lvt_lint1, temp); 1197 } 1198 1199 static void 1200 lapic_mem_eoi(void) 1201 { 1202 log_lapic(mem_eoi); 1203 LAPIC_MEM_WRITE(eoi, 0); 1204 } 1205 1206 static void 1207 lapic_msr_eoi(void) 1208 { 1209 log_lapic(msr_eoi); 1210 LAPIC_MSR_WRITE(MSR_X2APIC_EOI, 0); 1211 } 1212 1213 static void 1214 lapic_mem_seticr_sync(uint32_t apic_id, uint32_t icr_lo_val) 1215 { 1216 lapic_mem_icr_set(apic_id, icr_lo_val); 1217 while (LAPIC_MEM_READ(icr_lo) & APIC_DELSTAT_PEND) 1218 /* spin */; 1219 } 1220 1221 void 1222 lapic_seticr_sync(uint32_t apic_id, uint32_t icr_lo_val) 1223 { 1224 if (x2apic_enable) 1225 lapic_msr_icr_set(apic_id, icr_lo_val); 1226 else 1227 lapic_mem_seticr_sync(apic_id, icr_lo_val); 1228 } 1229 1230 static void 1231 lapic_sysinit(void *dummy __unused) 1232 { 1233 if (lapic_enable) { 1234 int error; 1235 1236 error = lapic_config(); 1237 if (error) 1238 lapic_enable = 0; 1239 } 1240 if (!lapic_enable) 1241 x2apic_enable = 0; 1242 1243 if (lapic_enable) { 1244 /* Initialize BSP's local APIC */ 1245 lapic_init(TRUE); 1246 } else if (ioapic_enable) { 1247 ioapic_enable = 0; 1248 icu_reinit_noioapic(); 1249 } 1250 } 1251 SYSINIT(lapic, SI_BOOT2_LAPIC, SI_ORDER_FIRST, lapic_sysinit, NULL); 1252