1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1996, by Steve Passe 5 * All rights reserved. 6 * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. The name of the developer may NOT be used to endorse or promote products 14 * derived from this software without specific prior written permission. 15 * 3. Neither the name of the author nor the names of any co-contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /* 33 * Local APIC support on Pentium and later processors. 34 */ 35 36 #include <sys/cdefs.h> 37 #include "opt_atpic.h" 38 39 #include "opt_ddb.h" 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/asan.h> 44 #include <sys/bus.h> 45 #include <sys/kernel.h> 46 #include <sys/lock.h> 47 #include <sys/malloc.h> 48 #include <sys/msan.h> 49 #include <sys/mutex.h> 50 #include <sys/pcpu.h> 51 #include <sys/proc.h> 52 #include <sys/refcount.h> 53 #include <sys/sched.h> 54 #include <sys/smp.h> 55 #include <sys/sysctl.h> 56 #include <sys/timeet.h> 57 #include <sys/timetc.h> 58 59 #include <vm/vm.h> 60 #include <vm/pmap.h> 61 62 #include <x86/apicreg.h> 63 #include <machine/clock.h> 64 #include <machine/cpufunc.h> 65 #include <machine/cputypes.h> 66 #include <machine/fpu.h> 67 #include <machine/frame.h> 68 #include <machine/intr_machdep.h> 69 #include <x86/apicvar.h> 70 #include <x86/mca.h> 71 #include <machine/md_var.h> 72 #include <machine/smp.h> 73 #include <machine/specialreg.h> 74 #include <x86/init.h> 75 76 #ifdef DDB 77 #include <sys/interrupt.h> 78 #include <ddb/ddb.h> 79 #endif 80 81 #ifdef __amd64__ 82 #define SDT_APIC SDT_SYSIGT 83 #define GSEL_APIC 0 84 #else 85 #define SDT_APIC SDT_SYS386IGT 86 #define GSEL_APIC GSEL(GCODE_SEL, SEL_KPL) 87 #endif 88 89 static MALLOC_DEFINE(M_LAPIC, "local_apic", "Local APIC items"); 90 91 /* Sanity checks on IDT vectors. */ 92 CTASSERT(APIC_IO_INTS + APIC_NUM_IOINTS == APIC_TIMER_INT); 93 CTASSERT(APIC_TIMER_INT < APIC_LOCAL_INTS); 94 CTASSERT(APIC_LOCAL_INTS == 240); 95 CTASSERT(IPI_STOP < APIC_SPURIOUS_INT); 96 97 /* 98 * I/O interrupts use non-negative IRQ values. These values are used 99 * to mark unused IDT entries or IDT entries reserved for a non-I/O 100 * interrupt. 101 */ 102 #define IRQ_FREE -1 103 #define IRQ_TIMER -2 104 #define IRQ_SYSCALL -3 105 #define IRQ_DTRACE_RET -4 106 #define IRQ_EVTCHN -5 107 108 enum lat_timer_mode { 109 LAT_MODE_UNDEF = 0, 110 LAT_MODE_PERIODIC = 1, 111 LAT_MODE_ONESHOT = 2, 112 LAT_MODE_DEADLINE = 3, 113 }; 114 115 /* 116 * Support for local APICs. Local APICs manage interrupts on each 117 * individual processor as opposed to I/O APICs which receive interrupts 118 * from I/O devices and then forward them on to the local APICs. 119 * 120 * Local APICs can also send interrupts to each other thus providing the 121 * mechanism for IPIs. 122 */ 123 124 struct lvt { 125 u_int lvt_edgetrigger:1; 126 u_int lvt_activehi:1; 127 u_int lvt_masked:1; 128 u_int lvt_active:1; 129 u_int lvt_mode:16; 130 u_int lvt_vector:8; 131 }; 132 133 struct lapic { 134 struct lvt la_lvts[APIC_LVT_MAX + 1]; 135 struct lvt la_elvts[APIC_ELVT_MAX + 1]; 136 u_int la_id:8; 137 u_int la_cluster:4; 138 u_int la_cluster_id:2; 139 u_int la_present:1; 140 u_long *la_timer_count; 141 uint64_t la_timer_period; 142 enum lat_timer_mode la_timer_mode; 143 uint32_t lvt_timer_base; 144 uint32_t lvt_timer_last; 145 /* Include IDT_SYSCALL to make indexing easier. */ 146 int la_ioint_irqs[APIC_NUM_IOINTS + 1]; 147 } static *lapics; 148 149 /* Global defaults for local APIC LVT entries. */ 150 static struct lvt lvts[APIC_LVT_MAX + 1] = { 151 { 1, 1, 1, 1, APIC_LVT_DM_EXTINT, 0 }, /* LINT0: masked ExtINT */ 152 { 1, 1, 0, 1, APIC_LVT_DM_NMI, 0 }, /* LINT1: NMI */ 153 { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_TIMER_INT }, /* Timer */ 154 { 1, 1, 0, 1, APIC_LVT_DM_FIXED, APIC_ERROR_INT }, /* Error */ 155 { 1, 1, 1, 1, APIC_LVT_DM_NMI, 0 }, /* PMC */ 156 { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_THERMAL_INT }, /* Thermal */ 157 { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_CMC_INT }, /* CMCI */ 158 }; 159 160 /* Global defaults for AMD local APIC ELVT entries. */ 161 static struct lvt elvts[APIC_ELVT_MAX + 1] = { 162 { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 }, 163 { 1, 1, 1, 0, APIC_LVT_DM_FIXED, APIC_CMC_INT }, 164 { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 }, 165 { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 }, 166 }; 167 168 static inthand_t *ioint_handlers[] = { 169 NULL, /* 0 - 31 */ 170 IDTVEC(apic_isr1), /* 32 - 63 */ 171 IDTVEC(apic_isr2), /* 64 - 95 */ 172 IDTVEC(apic_isr3), /* 96 - 127 */ 173 IDTVEC(apic_isr4), /* 128 - 159 */ 174 IDTVEC(apic_isr5), /* 160 - 191 */ 175 IDTVEC(apic_isr6), /* 192 - 223 */ 176 IDTVEC(apic_isr7), /* 224 - 255 */ 177 }; 178 179 static inthand_t *ioint_pti_handlers[] = { 180 NULL, /* 0 - 31 */ 181 IDTVEC(apic_isr1_pti), /* 32 - 63 */ 182 IDTVEC(apic_isr2_pti), /* 64 - 95 */ 183 IDTVEC(apic_isr3_pti), /* 96 - 127 */ 184 IDTVEC(apic_isr4_pti), /* 128 - 159 */ 185 IDTVEC(apic_isr5_pti), /* 160 - 191 */ 186 IDTVEC(apic_isr6_pti), /* 192 - 223 */ 187 IDTVEC(apic_isr7_pti), /* 224 - 255 */ 188 }; 189 190 static u_int32_t lapic_timer_divisors[] = { 191 APIC_TDCR_1, APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16, 192 APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128 193 }; 194 195 extern inthand_t IDTVEC(rsvd_pti), IDTVEC(rsvd); 196 197 volatile char *lapic_map; 198 vm_paddr_t lapic_paddr = DEFAULT_APIC_BASE; 199 int x2apic_mode; 200 int lapic_eoi_suppression; 201 static int lapic_timer_tsc_deadline; 202 static u_long lapic_timer_divisor, count_freq; 203 static struct eventtimer lapic_et; 204 #ifdef SMP 205 static uint64_t lapic_ipi_wait_mult; 206 static int __read_mostly lapic_ds_idle_timeout = 1000000; 207 #endif 208 unsigned int max_apic_id; 209 static int pcint_refcnt = 0; 210 211 SYSCTL_NODE(_hw, OID_AUTO, apic, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 212 "APIC options"); 213 SYSCTL_INT(_hw_apic, OID_AUTO, x2apic_mode, CTLFLAG_RD, &x2apic_mode, 0, ""); 214 SYSCTL_INT(_hw_apic, OID_AUTO, eoi_suppression, CTLFLAG_RD, 215 &lapic_eoi_suppression, 0, ""); 216 SYSCTL_INT(_hw_apic, OID_AUTO, timer_tsc_deadline, CTLFLAG_RD, 217 &lapic_timer_tsc_deadline, 0, ""); 218 #ifdef SMP 219 SYSCTL_INT(_hw_apic, OID_AUTO, ds_idle_timeout, CTLFLAG_RWTUN, 220 &lapic_ds_idle_timeout, 0, 221 "timeout (in us) for APIC Delivery Status to become Idle (xAPIC only)"); 222 #endif 223 224 static void lapic_calibrate_initcount(struct lapic *la); 225 226 /* 227 * Use __nosanitizethread to exempt the LAPIC I/O accessors from KCSan 228 * instrumentation. Otherwise, if x2APIC is not available, use of the global 229 * lapic_map will generate a KCSan false positive. While the mapping is 230 * shared among all CPUs, the physical access will always take place on the 231 * local CPU's APIC, so there isn't in fact a race here. Furthermore, the 232 * KCSan warning printf can cause a panic if issued during LAPIC access, 233 * due to attempted recursive use of event timer resources. 234 */ 235 236 static uint32_t __nosanitizethread 237 lapic_read32(enum LAPIC_REGISTERS reg) 238 { 239 uint32_t res; 240 241 if (x2apic_mode) { 242 res = rdmsr32(MSR_APIC_000 + reg); 243 } else { 244 res = *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL); 245 } 246 return (res); 247 } 248 249 static void __nosanitizethread 250 lapic_write32(enum LAPIC_REGISTERS reg, uint32_t val) 251 { 252 253 if (x2apic_mode) { 254 mfence(); 255 lfence(); 256 wrmsr(MSR_APIC_000 + reg, val); 257 } else { 258 *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val; 259 } 260 } 261 262 static void __nosanitizethread 263 lapic_write32_nofence(enum LAPIC_REGISTERS reg, uint32_t val) 264 { 265 266 if (x2apic_mode) { 267 wrmsr(MSR_APIC_000 + reg, val); 268 } else { 269 *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val; 270 } 271 } 272 273 #ifdef SMP 274 static uint64_t 275 lapic_read_icr_lo(void) 276 { 277 278 return (lapic_read32(LAPIC_ICR_LO)); 279 } 280 281 static void 282 lapic_write_icr(uint32_t vhi, uint32_t vlo) 283 { 284 register_t saveintr; 285 uint64_t v; 286 287 if (x2apic_mode) { 288 v = ((uint64_t)vhi << 32) | vlo; 289 mfence(); 290 wrmsr(MSR_APIC_000 + LAPIC_ICR_LO, v); 291 } else { 292 saveintr = intr_disable(); 293 lapic_write32(LAPIC_ICR_HI, vhi); 294 lapic_write32(LAPIC_ICR_LO, vlo); 295 intr_restore(saveintr); 296 } 297 } 298 299 static void 300 lapic_write_icr_lo(uint32_t vlo) 301 { 302 303 if (x2apic_mode) { 304 mfence(); 305 wrmsr(MSR_APIC_000 + LAPIC_ICR_LO, vlo); 306 } else { 307 lapic_write32(LAPIC_ICR_LO, vlo); 308 } 309 } 310 311 static void 312 lapic_write_self_ipi(uint32_t vector) 313 { 314 315 KASSERT(x2apic_mode, ("SELF IPI write in xAPIC mode")); 316 wrmsr(MSR_APIC_000 + LAPIC_SELF_IPI, vector); 317 } 318 #endif /* SMP */ 319 320 static void 321 lapic_enable_x2apic(void) 322 { 323 uint64_t apic_base; 324 325 apic_base = rdmsr(MSR_APICBASE); 326 apic_base |= APICBASE_X2APIC | APICBASE_ENABLED; 327 wrmsr(MSR_APICBASE, apic_base); 328 } 329 330 bool 331 lapic_is_x2apic(void) 332 { 333 uint64_t apic_base; 334 335 apic_base = rdmsr(MSR_APICBASE); 336 return ((apic_base & (APICBASE_X2APIC | APICBASE_ENABLED)) == 337 (APICBASE_X2APIC | APICBASE_ENABLED)); 338 } 339 340 static void lapic_enable(void); 341 static void lapic_resume(struct pic *pic, bool suspend_cancelled); 342 static void lapic_timer_oneshot(struct lapic *); 343 static void lapic_timer_oneshot_nointr(struct lapic *, uint32_t); 344 static void lapic_timer_periodic(struct lapic *); 345 static void lapic_timer_deadline(struct lapic *); 346 static void lapic_timer_stop(struct lapic *); 347 static void lapic_timer_set_divisor(u_int divisor); 348 static uint32_t lvt_mode(struct lapic *la, u_int pin, uint32_t value); 349 static int lapic_et_start(struct eventtimer *et, 350 sbintime_t first, sbintime_t period); 351 static int lapic_et_stop(struct eventtimer *et); 352 static u_int apic_idt_to_irq(u_int apic_id, u_int vector); 353 static void lapic_set_tpr(u_int vector); 354 355 struct pic lapic_pic = { .pic_resume = lapic_resume }; 356 357 static uint32_t 358 lvt_mode_impl(struct lapic *la, struct lvt *lvt, u_int pin, uint32_t value) 359 { 360 361 value &= ~(APIC_LVT_M | APIC_LVT_TM | APIC_LVT_IIPP | APIC_LVT_DM | 362 APIC_LVT_VECTOR); 363 if (lvt->lvt_edgetrigger == 0) 364 value |= APIC_LVT_TM; 365 if (lvt->lvt_activehi == 0) 366 value |= APIC_LVT_IIPP_INTALO; 367 if (lvt->lvt_masked) 368 value |= APIC_LVT_M; 369 value |= lvt->lvt_mode; 370 switch (lvt->lvt_mode) { 371 case APIC_LVT_DM_NMI: 372 case APIC_LVT_DM_SMI: 373 case APIC_LVT_DM_INIT: 374 case APIC_LVT_DM_EXTINT: 375 if (!lvt->lvt_edgetrigger && bootverbose) { 376 printf("lapic%u: Forcing LINT%u to edge trigger\n", 377 la->la_id, pin); 378 value &= ~APIC_LVT_TM; 379 } 380 /* Use a vector of 0. */ 381 break; 382 case APIC_LVT_DM_FIXED: 383 value |= lvt->lvt_vector; 384 break; 385 default: 386 panic("bad APIC LVT delivery mode: %#x\n", value); 387 } 388 return (value); 389 } 390 391 static uint32_t 392 lvt_mode(struct lapic *la, u_int pin, uint32_t value) 393 { 394 struct lvt *lvt; 395 396 KASSERT(pin <= APIC_LVT_MAX, 397 ("%s: pin %u out of range", __func__, pin)); 398 if (la->la_lvts[pin].lvt_active) 399 lvt = &la->la_lvts[pin]; 400 else 401 lvt = &lvts[pin]; 402 403 return (lvt_mode_impl(la, lvt, pin, value)); 404 } 405 406 static uint32_t 407 elvt_mode(struct lapic *la, u_int idx, uint32_t value) 408 { 409 struct lvt *elvt; 410 411 KASSERT(idx <= APIC_ELVT_MAX, 412 ("%s: idx %u out of range", __func__, idx)); 413 414 elvt = &la->la_elvts[idx]; 415 KASSERT(elvt->lvt_active, ("%s: ELVT%u is not active", __func__, idx)); 416 KASSERT(elvt->lvt_edgetrigger, 417 ("%s: ELVT%u is not edge triggered", __func__, idx)); 418 KASSERT(elvt->lvt_activehi, 419 ("%s: ELVT%u is not active high", __func__, idx)); 420 return (lvt_mode_impl(la, elvt, idx, value)); 421 } 422 423 /* 424 * Map the local APIC and setup necessary interrupt vectors. 425 */ 426 void 427 lapic_init(vm_paddr_t addr) 428 { 429 #ifdef SMP 430 uint64_t r, r1, r2, rx; 431 #endif 432 uint32_t ver; 433 int i; 434 bool arat; 435 436 TSENTER(); 437 438 /* 439 * Enable x2APIC mode if possible. Map the local APIC 440 * registers page. 441 * 442 * Keep the LAPIC registers page mapped uncached for x2APIC 443 * mode too, to have direct map page attribute set to 444 * uncached. This is needed to work around CPU errata present 445 * on all Intel processors. 446 */ 447 KASSERT(trunc_page(addr) == addr, 448 ("local APIC not aligned on a page boundary")); 449 lapic_paddr = addr; 450 lapic_map = pmap_mapdev(addr, PAGE_SIZE); 451 if (x2apic_mode) { 452 lapic_enable_x2apic(); 453 lapic_map = NULL; 454 } 455 456 /* Setup the spurious interrupt handler. */ 457 setidt(APIC_SPURIOUS_INT, IDTVEC(spuriousint), SDT_APIC, SEL_KPL, 458 GSEL_APIC); 459 460 /* Perform basic initialization of the BSP's local APIC. */ 461 lapic_enable(); 462 463 /* Set BSP's per-CPU local APIC ID. */ 464 PCPU_SET(apic_id, lapic_id()); 465 466 /* Local APIC timer interrupt. */ 467 setidt(APIC_TIMER_INT, pti ? IDTVEC(timerint_pti) : IDTVEC(timerint), 468 SDT_APIC, SEL_KPL, GSEL_APIC); 469 470 /* Local APIC error interrupt. */ 471 setidt(APIC_ERROR_INT, pti ? IDTVEC(errorint_pti) : IDTVEC(errorint), 472 SDT_APIC, SEL_KPL, GSEL_APIC); 473 474 /* XXX: Thermal interrupt */ 475 476 /* Local APIC CMCI. */ 477 setidt(APIC_CMC_INT, pti ? IDTVEC(cmcint_pti) : IDTVEC(cmcint), 478 SDT_APIC, SEL_KPL, GSEL_APIC); 479 480 if ((resource_int_value("apic", 0, "clock", &i) != 0 || i != 0)) { 481 /* Set if APIC timer runs in C3. */ 482 arat = (cpu_power_eax & CPUTPM1_ARAT); 483 484 bzero(&lapic_et, sizeof(lapic_et)); 485 lapic_et.et_name = "LAPIC"; 486 lapic_et.et_flags = ET_FLAGS_PERIODIC | ET_FLAGS_ONESHOT | 487 ET_FLAGS_PERCPU; 488 lapic_et.et_quality = 600; 489 if (!arat) { 490 lapic_et.et_flags |= ET_FLAGS_C3STOP; 491 lapic_et.et_quality = 100; 492 } 493 if ((cpu_feature & CPUID_TSC) != 0 && 494 (cpu_feature2 & CPUID2_TSCDLT) != 0 && 495 tsc_is_invariant && tsc_freq != 0) { 496 lapic_timer_tsc_deadline = 1; 497 TUNABLE_INT_FETCH("hw.apic.timer_tsc_deadline", 498 &lapic_timer_tsc_deadline); 499 } 500 501 lapic_et.et_frequency = 0; 502 /* We don't know frequency yet, so trying to guess. */ 503 lapic_et.et_min_period = 0x00001000LL; 504 lapic_et.et_max_period = SBT_1S; 505 lapic_et.et_start = lapic_et_start; 506 lapic_et.et_stop = lapic_et_stop; 507 lapic_et.et_priv = NULL; 508 et_register(&lapic_et); 509 } 510 511 /* 512 * Set lapic_eoi_suppression after lapic_enable(), to not 513 * enable suppression in the hardware prematurely. Note that 514 * we by default enable suppression even when system only has 515 * one IO-APIC, since EOI is broadcasted to all APIC agents, 516 * including CPUs, otherwise. 517 * 518 * It seems that at least some KVM versions report 519 * EOI_SUPPRESSION bit, but auto-EOI does not work. 520 */ 521 ver = lapic_read32(LAPIC_VERSION); 522 if ((ver & APIC_VER_EOI_SUPPRESSION) != 0) { 523 lapic_eoi_suppression = 1; 524 if (vm_guest == VM_GUEST_KVM) { 525 if (bootverbose) 526 printf( 527 "KVM -- disabling lapic eoi suppression\n"); 528 lapic_eoi_suppression = 0; 529 } 530 TUNABLE_INT_FETCH("hw.apic.eoi_suppression", 531 &lapic_eoi_suppression); 532 } 533 534 #ifdef SMP 535 #define LOOPS 1000 536 /* 537 * Calibrate the busy loop waiting for IPI ack in xAPIC mode. 538 * lapic_ipi_wait_mult contains the number of iterations which 539 * approximately delay execution for 1 microsecond (the 540 * argument to lapic_ipi_wait() is in microseconds). 541 * 542 * We assume that TSC is present and already measured. 543 * Possible TSC frequency jumps are irrelevant to the 544 * calibration loop below, the CPU clock management code is 545 * not yet started, and we do not enter sleep states. 546 */ 547 KASSERT((cpu_feature & CPUID_TSC) != 0 && tsc_freq != 0, 548 ("TSC not initialized")); 549 if (!x2apic_mode) { 550 r = rdtsc(); 551 for (rx = 0; rx < LOOPS; rx++) { 552 (void)lapic_read_icr_lo(); 553 ia32_pause(); 554 } 555 r = rdtsc() - r; 556 r1 = tsc_freq * LOOPS; 557 r2 = r * 1000000; 558 lapic_ipi_wait_mult = r1 >= r2 ? r1 / r2 : 1; 559 if (bootverbose) { 560 printf("LAPIC: ipi_wait() us multiplier %ju (r %ju " 561 "tsc %ju)\n", (uintmax_t)lapic_ipi_wait_mult, 562 (uintmax_t)r, (uintmax_t)tsc_freq); 563 } 564 } 565 #undef LOOPS 566 #endif /* SMP */ 567 568 TSEXIT(); 569 } 570 571 /* 572 * Create a local APIC instance. 573 */ 574 void 575 lapic_create(u_int apic_id, int boot_cpu) 576 { 577 int i; 578 579 if (apic_id > max_apic_id) { 580 printf("APIC: Ignoring local APIC with ID %d\n", apic_id); 581 if (boot_cpu) 582 panic("Can't ignore BSP"); 583 return; 584 } 585 KASSERT(!lapics[apic_id].la_present, ("duplicate local APIC %u", 586 apic_id)); 587 588 /* 589 * Assume no local LVT overrides and a cluster of 0 and 590 * intra-cluster ID of 0. 591 */ 592 lapics[apic_id].la_present = 1; 593 lapics[apic_id].la_id = apic_id; 594 for (i = 0; i <= APIC_LVT_MAX; i++) { 595 lapics[apic_id].la_lvts[i] = lvts[i]; 596 lapics[apic_id].la_lvts[i].lvt_active = 0; 597 } 598 for (i = 0; i <= APIC_ELVT_MAX; i++) { 599 lapics[apic_id].la_elvts[i] = elvts[i]; 600 lapics[apic_id].la_elvts[i].lvt_active = 0; 601 } 602 for (i = 0; i <= APIC_NUM_IOINTS; i++) 603 lapics[apic_id].la_ioint_irqs[i] = IRQ_FREE; 604 lapics[apic_id].la_ioint_irqs[IDT_SYSCALL - APIC_IO_INTS] = IRQ_SYSCALL; 605 lapics[apic_id].la_ioint_irqs[APIC_TIMER_INT - APIC_IO_INTS] = 606 IRQ_TIMER; 607 #ifdef KDTRACE_HOOKS 608 lapics[apic_id].la_ioint_irqs[IDT_DTRACE_RET - APIC_IO_INTS] = 609 IRQ_DTRACE_RET; 610 #endif 611 #ifdef XENHVM 612 lapics[apic_id].la_ioint_irqs[IDT_EVTCHN - APIC_IO_INTS] = IRQ_EVTCHN; 613 #endif 614 615 #ifdef SMP 616 cpu_add(apic_id, boot_cpu); 617 #endif 618 } 619 620 static inline uint32_t 621 amd_read_ext_features(void) 622 { 623 uint32_t version; 624 625 if (cpu_vendor_id != CPU_VENDOR_AMD && 626 cpu_vendor_id != CPU_VENDOR_HYGON) 627 return (0); 628 version = lapic_read32(LAPIC_VERSION); 629 if ((version & APIC_VER_AMD_EXT_SPACE) != 0) 630 return (lapic_read32(LAPIC_EXT_FEATURES)); 631 else 632 return (0); 633 } 634 635 static inline uint32_t 636 amd_read_elvt_count(void) 637 { 638 uint32_t extf; 639 uint32_t count; 640 641 extf = amd_read_ext_features(); 642 count = (extf & APIC_EXTF_ELVT_MASK) >> APIC_EXTF_ELVT_SHIFT; 643 count = min(count, APIC_ELVT_MAX + 1); 644 return (count); 645 } 646 647 /* 648 * Dump contents of local APIC registers 649 */ 650 void 651 lapic_dump(const char* str) 652 { 653 uint32_t version; 654 uint32_t maxlvt; 655 uint32_t extf; 656 int elvt_count; 657 int i; 658 659 version = lapic_read32(LAPIC_VERSION); 660 maxlvt = (version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; 661 printf("cpu%d %s:\n", PCPU_GET(cpuid), str); 662 printf(" ID: 0x%08x VER: 0x%08x LDR: 0x%08x DFR: 0x%08x", 663 lapic_read32(LAPIC_ID), version, 664 lapic_read32(LAPIC_LDR), x2apic_mode ? 0 : lapic_read32(LAPIC_DFR)); 665 if ((cpu_feature2 & CPUID2_X2APIC) != 0) 666 printf(" x2APIC: %d", x2apic_mode); 667 printf("\n lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n", 668 lapic_read32(LAPIC_LVT_LINT0), lapic_read32(LAPIC_LVT_LINT1), 669 lapic_read32(LAPIC_TPR), lapic_read32(LAPIC_SVR)); 670 printf(" timer: 0x%08x therm: 0x%08x err: 0x%08x", 671 lapic_read32(LAPIC_LVT_TIMER), lapic_read32(LAPIC_LVT_THERMAL), 672 lapic_read32(LAPIC_LVT_ERROR)); 673 if (maxlvt >= APIC_LVT_PMC) 674 printf(" pmc: 0x%08x", lapic_read32(LAPIC_LVT_PCINT)); 675 printf("\n"); 676 if (maxlvt >= APIC_LVT_CMCI) 677 printf(" cmci: 0x%08x\n", lapic_read32(LAPIC_LVT_CMCI)); 678 extf = amd_read_ext_features(); 679 if (extf != 0) { 680 printf(" AMD ext features: 0x%08x", extf); 681 elvt_count = amd_read_elvt_count(); 682 for (i = 0; i < elvt_count; i++) 683 printf("%s elvt%d: 0x%08x", (i % 4) ? "" : "\n ", i, 684 lapic_read32(LAPIC_EXT_LVT0 + i)); 685 printf("\n"); 686 } 687 } 688 689 void 690 lapic_xapic_mode(void) 691 { 692 register_t saveintr; 693 694 saveintr = intr_disable(); 695 if (x2apic_mode) 696 lapic_enable_x2apic(); 697 intr_restore(saveintr); 698 } 699 700 void 701 lapic_setup(int boot) 702 { 703 struct lapic *la; 704 uint32_t version; 705 uint32_t maxlvt; 706 register_t saveintr; 707 int elvt_count; 708 int i; 709 710 saveintr = intr_disable(); 711 712 la = &lapics[lapic_id()]; 713 KASSERT(la->la_present, ("missing APIC structure")); 714 version = lapic_read32(LAPIC_VERSION); 715 maxlvt = (version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; 716 717 /* Initialize the TPR to allow all interrupts. */ 718 lapic_set_tpr(0); 719 720 /* Setup spurious vector and enable the local APIC. */ 721 lapic_enable(); 722 723 /* Program LINT[01] LVT entries. */ 724 lapic_write32(LAPIC_LVT_LINT0, lvt_mode(la, APIC_LVT_LINT0, 725 lapic_read32(LAPIC_LVT_LINT0))); 726 lapic_write32(LAPIC_LVT_LINT1, lvt_mode(la, APIC_LVT_LINT1, 727 lapic_read32(LAPIC_LVT_LINT1))); 728 729 /* Program the PMC LVT entry if present. */ 730 if (maxlvt >= APIC_LVT_PMC) { 731 lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC, 732 LAPIC_LVT_PCINT)); 733 } 734 735 /* 736 * Program the timer LVT. Calibration is deferred until it is certain 737 * that we have a reliable timecounter. 738 */ 739 la->lvt_timer_base = lvt_mode(la, APIC_LVT_TIMER, 740 lapic_read32(LAPIC_LVT_TIMER)); 741 la->lvt_timer_last = la->lvt_timer_base; 742 lapic_write32(LAPIC_LVT_TIMER, la->lvt_timer_base); 743 744 if (boot) 745 la->la_timer_mode = LAT_MODE_UNDEF; 746 else if (la->la_timer_mode != LAT_MODE_UNDEF) { 747 KASSERT(la->la_timer_period != 0, ("lapic%u: zero divisor", 748 lapic_id())); 749 switch (la->la_timer_mode) { 750 case LAT_MODE_PERIODIC: 751 lapic_timer_set_divisor(lapic_timer_divisor); 752 lapic_timer_periodic(la); 753 break; 754 case LAT_MODE_ONESHOT: 755 lapic_timer_set_divisor(lapic_timer_divisor); 756 lapic_timer_oneshot(la); 757 break; 758 case LAT_MODE_DEADLINE: 759 lapic_timer_deadline(la); 760 break; 761 default: 762 panic("corrupted la_timer_mode %p %d", la, 763 la->la_timer_mode); 764 } 765 } 766 767 /* Program error LVT and clear any existing errors. */ 768 lapic_write32(LAPIC_LVT_ERROR, lvt_mode(la, APIC_LVT_ERROR, 769 lapic_read32(LAPIC_LVT_ERROR))); 770 lapic_write32(LAPIC_ESR, 0); 771 772 /* XXX: Thermal LVT */ 773 774 /* Program the CMCI LVT entry if present. */ 775 if (maxlvt >= APIC_LVT_CMCI) { 776 lapic_write32(LAPIC_LVT_CMCI, lvt_mode(la, APIC_LVT_CMCI, 777 lapic_read32(LAPIC_LVT_CMCI))); 778 } 779 780 elvt_count = amd_read_elvt_count(); 781 for (i = 0; i < elvt_count; i++) { 782 if (la->la_elvts[i].lvt_active) 783 lapic_write32(LAPIC_EXT_LVT0 + i, 784 elvt_mode(la, i, lapic_read32(LAPIC_EXT_LVT0 + i))); 785 } 786 787 intr_restore(saveintr); 788 } 789 790 static void 791 lapic_intrcnt(void *dummy __unused) 792 { 793 struct pcpu *pc; 794 struct lapic *la; 795 char buf[MAXCOMLEN + 1]; 796 797 /* If there are no APICs, skip this function. */ 798 if (lapics == NULL) 799 return; 800 801 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 802 la = &lapics[pc->pc_apic_id]; 803 if (!la->la_present) 804 continue; 805 806 snprintf(buf, sizeof(buf), "cpu%d:timer", pc->pc_cpuid); 807 intrcnt_add(buf, &la->la_timer_count); 808 } 809 } 810 SYSINIT(lapic_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, lapic_intrcnt, NULL); 811 812 void 813 lapic_reenable_pcint(void) 814 { 815 uint32_t value; 816 817 if (refcount_load(&pcint_refcnt) == 0) 818 return; 819 value = lapic_read32(LAPIC_LVT_PCINT); 820 value &= ~APIC_LVT_M; 821 lapic_write32(LAPIC_LVT_PCINT, value); 822 } 823 824 static void 825 lapic_update_pcint(void *dummy) 826 { 827 struct lapic *la; 828 829 la = &lapics[lapic_id()]; 830 lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC, 831 lapic_read32(LAPIC_LVT_PCINT))); 832 } 833 834 void 835 lapic_calibrate_timer(void) 836 { 837 struct lapic *la; 838 register_t intr; 839 840 #ifdef DEV_ATPIC 841 /* Fail if the local APIC is not present. */ 842 if (!x2apic_mode && lapic_map == NULL) 843 return; 844 #endif 845 846 intr = intr_disable(); 847 la = &lapics[lapic_id()]; 848 849 lapic_calibrate_initcount(la); 850 851 intr_restore(intr); 852 853 if (lapic_timer_tsc_deadline && bootverbose) { 854 printf("lapic: deadline tsc mode, Frequency %ju Hz\n", 855 (uintmax_t)tsc_freq); 856 } 857 } 858 859 int 860 lapic_enable_pcint(void) 861 { 862 u_int32_t maxlvt; 863 864 #ifdef DEV_ATPIC 865 /* Fail if the local APIC is not present. */ 866 if (!x2apic_mode && lapic_map == NULL) 867 return (0); 868 #endif 869 870 /* Fail if the PMC LVT is not present. */ 871 maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT; 872 if (maxlvt < APIC_LVT_PMC) 873 return (0); 874 if (refcount_acquire(&pcint_refcnt) > 0) 875 return (1); 876 lvts[APIC_LVT_PMC].lvt_masked = 0; 877 878 MPASS(mp_ncpus == 1 || smp_started); 879 smp_rendezvous(NULL, lapic_update_pcint, NULL, NULL); 880 return (1); 881 } 882 883 void 884 lapic_disable_pcint(void) 885 { 886 u_int32_t maxlvt; 887 888 #ifdef DEV_ATPIC 889 /* Fail if the local APIC is not present. */ 890 if (!x2apic_mode && lapic_map == NULL) 891 return; 892 #endif 893 894 /* Fail if the PMC LVT is not present. */ 895 maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT; 896 if (maxlvt < APIC_LVT_PMC) 897 return; 898 if (refcount_release(&pcint_refcnt)) 899 return; 900 lvts[APIC_LVT_PMC].lvt_masked = 1; 901 902 #ifdef SMP 903 /* The APs should always be started when hwpmc is unloaded. */ 904 KASSERT(mp_ncpus == 1 || smp_started, ("hwpmc unloaded too early")); 905 #endif 906 smp_rendezvous(NULL, lapic_update_pcint, NULL, NULL); 907 } 908 909 static int 910 lapic_calibrate_initcount_cpuid_vm(void) 911 { 912 u_int regs[4]; 913 uint64_t freq; 914 915 /* Get value from CPUID leaf if possible. */ 916 if (vm_guest == VM_GUEST_NO) 917 return (false); 918 if (hv_high < 0x40000010) 919 return (false); 920 do_cpuid(0x40000010, regs); 921 freq = (uint64_t)(regs[1]) * 1000; 922 923 /* Pick timer divisor. */ 924 lapic_timer_divisor = 2; 925 do { 926 if (freq / lapic_timer_divisor < APIC_TIMER_MAX_COUNT) 927 break; 928 lapic_timer_divisor <<= 1; 929 } while (lapic_timer_divisor <= 128); 930 if (lapic_timer_divisor > 128) 931 return (false); 932 933 /* Record divided frequency. */ 934 count_freq = freq / lapic_timer_divisor; 935 return (count_freq != 0); 936 } 937 938 static uint64_t 939 cb_lapic_getcount(void) 940 { 941 942 return (APIC_TIMER_MAX_COUNT - lapic_read32(LAPIC_CCR_TIMER)); 943 } 944 945 static void 946 lapic_calibrate_initcount(struct lapic *la) 947 { 948 uint64_t freq; 949 950 if (lapic_calibrate_initcount_cpuid_vm()) 951 goto done; 952 953 /* Calibrate the APIC timer frequency. */ 954 lapic_timer_set_divisor(2); 955 lapic_timer_oneshot_nointr(la, APIC_TIMER_MAX_COUNT); 956 fpu_kern_enter(curthread, NULL, FPU_KERN_NOCTX); 957 freq = clockcalib(cb_lapic_getcount, "lapic"); 958 fpu_kern_leave(curthread, NULL); 959 960 /* Pick a different divisor if necessary. */ 961 lapic_timer_divisor = 2; 962 do { 963 if (freq * 2 / lapic_timer_divisor < APIC_TIMER_MAX_COUNT) 964 break; 965 lapic_timer_divisor <<= 1; 966 } while (lapic_timer_divisor <= 128); 967 if (lapic_timer_divisor > 128) 968 panic("lapic: Divisor too big"); 969 count_freq = freq * 2 / lapic_timer_divisor; 970 done: 971 if (bootverbose) { 972 printf("lapic: Divisor %lu, Frequency %lu Hz\n", 973 lapic_timer_divisor, count_freq); 974 } 975 } 976 977 static void 978 lapic_change_mode(struct eventtimer *et, struct lapic *la, 979 enum lat_timer_mode newmode) 980 { 981 if (la->la_timer_mode == newmode) 982 return; 983 switch (newmode) { 984 case LAT_MODE_PERIODIC: 985 lapic_timer_set_divisor(lapic_timer_divisor); 986 et->et_frequency = count_freq; 987 break; 988 case LAT_MODE_DEADLINE: 989 et->et_frequency = tsc_freq; 990 break; 991 case LAT_MODE_ONESHOT: 992 lapic_timer_set_divisor(lapic_timer_divisor); 993 et->et_frequency = count_freq; 994 break; 995 default: 996 panic("lapic_change_mode %d", newmode); 997 } 998 la->la_timer_mode = newmode; 999 et->et_min_period = (0x00000002LLU << 32) / et->et_frequency; 1000 et->et_max_period = (0xfffffffeLLU << 32) / et->et_frequency; 1001 } 1002 1003 static int 1004 lapic_et_start(struct eventtimer *et, sbintime_t first, sbintime_t period) 1005 { 1006 struct lapic *la; 1007 1008 la = &lapics[PCPU_GET(apic_id)]; 1009 if (period != 0) { 1010 lapic_change_mode(et, la, LAT_MODE_PERIODIC); 1011 la->la_timer_period = ((uint32_t)et->et_frequency * period) >> 1012 32; 1013 lapic_timer_periodic(la); 1014 } else if (lapic_timer_tsc_deadline) { 1015 lapic_change_mode(et, la, LAT_MODE_DEADLINE); 1016 la->la_timer_period = (et->et_frequency * first) >> 32; 1017 lapic_timer_deadline(la); 1018 } else { 1019 lapic_change_mode(et, la, LAT_MODE_ONESHOT); 1020 la->la_timer_period = ((uint32_t)et->et_frequency * first) >> 1021 32; 1022 lapic_timer_oneshot(la); 1023 } 1024 return (0); 1025 } 1026 1027 static int 1028 lapic_et_stop(struct eventtimer *et) 1029 { 1030 struct lapic *la; 1031 1032 la = &lapics[PCPU_GET(apic_id)]; 1033 lapic_timer_stop(la); 1034 la->la_timer_mode = LAT_MODE_UNDEF; 1035 return (0); 1036 } 1037 1038 void 1039 lapic_disable(void) 1040 { 1041 uint32_t value; 1042 1043 /* Software disable the local APIC. */ 1044 value = lapic_read32(LAPIC_SVR); 1045 value &= ~APIC_SVR_SWEN; 1046 lapic_write32(LAPIC_SVR, value); 1047 } 1048 1049 static void 1050 lapic_enable(void) 1051 { 1052 uint32_t value; 1053 1054 /* Program the spurious vector to enable the local APIC. */ 1055 value = lapic_read32(LAPIC_SVR); 1056 value &= ~(APIC_SVR_VECTOR | APIC_SVR_FOCUS); 1057 value |= APIC_SVR_FEN | APIC_SVR_SWEN | APIC_SPURIOUS_INT; 1058 if (lapic_eoi_suppression) 1059 value |= APIC_SVR_EOI_SUPPRESSION; 1060 lapic_write32(LAPIC_SVR, value); 1061 } 1062 1063 /* Reset the local APIC on the BSP during resume. */ 1064 static void 1065 lapic_resume(struct pic *pic, bool suspend_cancelled) 1066 { 1067 1068 lapic_setup(0); 1069 } 1070 1071 int 1072 lapic_id(void) 1073 { 1074 uint32_t v; 1075 1076 KASSERT(x2apic_mode || lapic_map != NULL, ("local APIC is not mapped")); 1077 v = lapic_read32(LAPIC_ID); 1078 if (!x2apic_mode) 1079 v >>= APIC_ID_SHIFT; 1080 return (v); 1081 } 1082 1083 int 1084 lapic_intr_pending(u_int vector) 1085 { 1086 uint32_t irr; 1087 1088 /* 1089 * The IRR registers are an array of registers each of which 1090 * only describes 32 interrupts in the low 32 bits. Thus, we 1091 * divide the vector by 32 to get the register index. 1092 * Finally, we modulus the vector by 32 to determine the 1093 * individual bit to test. 1094 */ 1095 irr = lapic_read32(LAPIC_IRR0 + vector / 32); 1096 return (irr & 1 << (vector % 32)); 1097 } 1098 1099 void 1100 lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id) 1101 { 1102 struct lapic *la; 1103 1104 KASSERT(lapics[apic_id].la_present, ("%s: APIC %u doesn't exist", 1105 __func__, apic_id)); 1106 KASSERT(cluster <= APIC_MAX_CLUSTER, ("%s: cluster %u too big", 1107 __func__, cluster)); 1108 KASSERT(cluster_id <= APIC_MAX_INTRACLUSTER_ID, 1109 ("%s: intra cluster id %u too big", __func__, cluster_id)); 1110 la = &lapics[apic_id]; 1111 la->la_cluster = cluster; 1112 la->la_cluster_id = cluster_id; 1113 } 1114 1115 int 1116 lapic_set_lvt_mask(u_int apic_id, u_int pin, u_char masked) 1117 { 1118 1119 if (pin > APIC_LVT_MAX) 1120 return (EINVAL); 1121 if (apic_id == APIC_ID_ALL) { 1122 lvts[pin].lvt_masked = masked; 1123 if (bootverbose) 1124 printf("lapic:"); 1125 } else { 1126 KASSERT(lapics[apic_id].la_present, 1127 ("%s: missing APIC %u", __func__, apic_id)); 1128 lapics[apic_id].la_lvts[pin].lvt_masked = masked; 1129 lapics[apic_id].la_lvts[pin].lvt_active = 1; 1130 if (bootverbose) 1131 printf("lapic%u:", apic_id); 1132 } 1133 if (bootverbose) 1134 printf(" LINT%u %s\n", pin, masked ? "masked" : "unmasked"); 1135 return (0); 1136 } 1137 1138 int 1139 lapic_set_lvt_mode(u_int apic_id, u_int pin, u_int32_t mode) 1140 { 1141 struct lvt *lvt; 1142 1143 if (pin > APIC_LVT_MAX) 1144 return (EINVAL); 1145 if (apic_id == APIC_ID_ALL) { 1146 lvt = &lvts[pin]; 1147 if (bootverbose) 1148 printf("lapic:"); 1149 } else { 1150 KASSERT(lapics[apic_id].la_present, 1151 ("%s: missing APIC %u", __func__, apic_id)); 1152 lvt = &lapics[apic_id].la_lvts[pin]; 1153 lvt->lvt_active = 1; 1154 if (bootverbose) 1155 printf("lapic%u:", apic_id); 1156 } 1157 lvt->lvt_mode = mode; 1158 switch (mode) { 1159 case APIC_LVT_DM_NMI: 1160 case APIC_LVT_DM_SMI: 1161 case APIC_LVT_DM_INIT: 1162 case APIC_LVT_DM_EXTINT: 1163 lvt->lvt_edgetrigger = 1; 1164 lvt->lvt_activehi = 1; 1165 if (mode == APIC_LVT_DM_EXTINT) 1166 lvt->lvt_masked = 1; 1167 else 1168 lvt->lvt_masked = 0; 1169 break; 1170 default: 1171 panic("Unsupported delivery mode: 0x%x\n", mode); 1172 } 1173 if (bootverbose) { 1174 printf(" Routing "); 1175 switch (mode) { 1176 case APIC_LVT_DM_NMI: 1177 printf("NMI"); 1178 break; 1179 case APIC_LVT_DM_SMI: 1180 printf("SMI"); 1181 break; 1182 case APIC_LVT_DM_INIT: 1183 printf("INIT"); 1184 break; 1185 case APIC_LVT_DM_EXTINT: 1186 printf("ExtINT"); 1187 break; 1188 } 1189 printf(" -> LINT%u\n", pin); 1190 } 1191 return (0); 1192 } 1193 1194 int 1195 lapic_set_lvt_polarity(u_int apic_id, u_int pin, enum intr_polarity pol) 1196 { 1197 1198 if (pin > APIC_LVT_MAX || pol == INTR_POLARITY_CONFORM) 1199 return (EINVAL); 1200 if (apic_id == APIC_ID_ALL) { 1201 lvts[pin].lvt_activehi = (pol == INTR_POLARITY_HIGH); 1202 if (bootverbose) 1203 printf("lapic:"); 1204 } else { 1205 KASSERT(lapics[apic_id].la_present, 1206 ("%s: missing APIC %u", __func__, apic_id)); 1207 lapics[apic_id].la_lvts[pin].lvt_active = 1; 1208 lapics[apic_id].la_lvts[pin].lvt_activehi = 1209 (pol == INTR_POLARITY_HIGH); 1210 if (bootverbose) 1211 printf("lapic%u:", apic_id); 1212 } 1213 if (bootverbose) 1214 printf(" LINT%u polarity: %s\n", pin, 1215 pol == INTR_POLARITY_HIGH ? "high" : "low"); 1216 return (0); 1217 } 1218 1219 int 1220 lapic_set_lvt_triggermode(u_int apic_id, u_int pin, 1221 enum intr_trigger trigger) 1222 { 1223 1224 if (pin > APIC_LVT_MAX || trigger == INTR_TRIGGER_CONFORM) 1225 return (EINVAL); 1226 if (apic_id == APIC_ID_ALL) { 1227 lvts[pin].lvt_edgetrigger = (trigger == INTR_TRIGGER_EDGE); 1228 if (bootverbose) 1229 printf("lapic:"); 1230 } else { 1231 KASSERT(lapics[apic_id].la_present, 1232 ("%s: missing APIC %u", __func__, apic_id)); 1233 lapics[apic_id].la_lvts[pin].lvt_edgetrigger = 1234 (trigger == INTR_TRIGGER_EDGE); 1235 lapics[apic_id].la_lvts[pin].lvt_active = 1; 1236 if (bootverbose) 1237 printf("lapic%u:", apic_id); 1238 } 1239 if (bootverbose) 1240 printf(" LINT%u trigger: %s\n", pin, 1241 trigger == INTR_TRIGGER_EDGE ? "edge" : "level"); 1242 return (0); 1243 } 1244 1245 /* 1246 * Adjust the TPR of the current CPU so that it blocks all interrupts below 1247 * the passed in vector. 1248 */ 1249 static void 1250 lapic_set_tpr(u_int vector) 1251 { 1252 #ifdef CHEAP_TPR 1253 lapic_write32(LAPIC_TPR, vector); 1254 #else 1255 uint32_t tpr; 1256 1257 tpr = lapic_read32(LAPIC_TPR) & ~APIC_TPR_PRIO; 1258 tpr |= vector; 1259 lapic_write32(LAPIC_TPR, tpr); 1260 #endif 1261 } 1262 1263 void 1264 lapic_eoi(void) 1265 { 1266 1267 lapic_write32_nofence(LAPIC_EOI, 0); 1268 } 1269 1270 void 1271 lapic_handle_intr(int vector, struct trapframe *frame) 1272 { 1273 struct intsrc *isrc; 1274 1275 kasan_mark(frame, sizeof(*frame), sizeof(*frame), 0); 1276 kmsan_mark(&vector, sizeof(vector), KMSAN_STATE_INITED); 1277 kmsan_mark(frame, sizeof(*frame), KMSAN_STATE_INITED); 1278 trap_check_kstack(); 1279 1280 isrc = intr_lookup_source(apic_idt_to_irq(PCPU_GET(apic_id), 1281 vector)); 1282 intr_execute_handlers(isrc, frame); 1283 } 1284 1285 void 1286 lapic_handle_timer(struct trapframe *frame) 1287 { 1288 struct lapic *la; 1289 struct trapframe *oldframe; 1290 struct thread *td; 1291 1292 /* Send EOI first thing. */ 1293 lapic_eoi(); 1294 1295 kasan_mark(frame, sizeof(*frame), sizeof(*frame), 0); 1296 kmsan_mark(frame, sizeof(*frame), KMSAN_STATE_INITED); 1297 trap_check_kstack(); 1298 1299 #if defined(SMP) && !defined(SCHED_ULE) 1300 /* 1301 * Don't do any accounting for the disabled HTT cores, since it 1302 * will provide misleading numbers for the userland. 1303 * 1304 * No locking is necessary here, since even if we lose the race 1305 * when hlt_cpus_mask changes it is not a big deal, really. 1306 * 1307 * Don't do that for ULE, since ULE doesn't consider hlt_cpus_mask 1308 * and unlike other schedulers it actually schedules threads to 1309 * those CPUs. 1310 */ 1311 if (CPU_ISSET(PCPU_GET(cpuid), &hlt_cpus_mask)) 1312 return; 1313 #endif 1314 1315 /* Look up our local APIC structure for the tick counters. */ 1316 la = &lapics[PCPU_GET(apic_id)]; 1317 (*la->la_timer_count)++; 1318 critical_enter(); 1319 if (lapic_et.et_active) { 1320 td = curthread; 1321 td->td_intr_nesting_level++; 1322 oldframe = td->td_intr_frame; 1323 td->td_intr_frame = frame; 1324 lapic_et.et_event_cb(&lapic_et, lapic_et.et_arg); 1325 td->td_intr_frame = oldframe; 1326 td->td_intr_nesting_level--; 1327 } 1328 critical_exit(); 1329 } 1330 1331 static void 1332 lapic_timer_set_divisor(u_int divisor) 1333 { 1334 1335 KASSERT(powerof2(divisor), ("lapic: invalid divisor %u", divisor)); 1336 KASSERT(ffs(divisor) <= nitems(lapic_timer_divisors), 1337 ("lapic: invalid divisor %u", divisor)); 1338 lapic_write32(LAPIC_DCR_TIMER, lapic_timer_divisors[ffs(divisor) - 1]); 1339 } 1340 1341 static void 1342 lapic_timer_oneshot(struct lapic *la) 1343 { 1344 uint32_t value; 1345 1346 value = la->lvt_timer_base; 1347 value &= ~(APIC_LVTT_TM | APIC_LVT_M); 1348 value |= APIC_LVTT_TM_ONE_SHOT; 1349 la->lvt_timer_last = value; 1350 lapic_write32(LAPIC_LVT_TIMER, value); 1351 lapic_write32(LAPIC_ICR_TIMER, la->la_timer_period); 1352 } 1353 1354 static void 1355 lapic_timer_oneshot_nointr(struct lapic *la, uint32_t count) 1356 { 1357 uint32_t value; 1358 1359 value = la->lvt_timer_base; 1360 value &= ~APIC_LVTT_TM; 1361 value |= APIC_LVTT_TM_ONE_SHOT | APIC_LVT_M; 1362 la->lvt_timer_last = value; 1363 lapic_write32(LAPIC_LVT_TIMER, value); 1364 lapic_write32(LAPIC_ICR_TIMER, count); 1365 } 1366 1367 static void 1368 lapic_timer_periodic(struct lapic *la) 1369 { 1370 uint32_t value; 1371 1372 value = la->lvt_timer_base; 1373 value &= ~(APIC_LVTT_TM | APIC_LVT_M); 1374 value |= APIC_LVTT_TM_PERIODIC; 1375 la->lvt_timer_last = value; 1376 lapic_write32(LAPIC_LVT_TIMER, value); 1377 lapic_write32(LAPIC_ICR_TIMER, la->la_timer_period); 1378 } 1379 1380 static void 1381 lapic_timer_deadline(struct lapic *la) 1382 { 1383 uint32_t value; 1384 1385 value = la->lvt_timer_base; 1386 value &= ~(APIC_LVTT_TM | APIC_LVT_M); 1387 value |= APIC_LVTT_TM_TSCDLT; 1388 if (value != la->lvt_timer_last) { 1389 la->lvt_timer_last = value; 1390 lapic_write32_nofence(LAPIC_LVT_TIMER, value); 1391 if (!x2apic_mode) 1392 mfence(); 1393 } 1394 wrmsr(MSR_TSC_DEADLINE, la->la_timer_period + rdtsc()); 1395 } 1396 1397 static void 1398 lapic_timer_stop(struct lapic *la) 1399 { 1400 uint32_t value; 1401 1402 if (la->la_timer_mode == LAT_MODE_DEADLINE) { 1403 wrmsr(MSR_TSC_DEADLINE, 0); 1404 mfence(); 1405 } else { 1406 value = la->lvt_timer_base; 1407 value &= ~APIC_LVTT_TM; 1408 value |= APIC_LVT_M; 1409 la->lvt_timer_last = value; 1410 lapic_write32(LAPIC_LVT_TIMER, value); 1411 } 1412 } 1413 1414 void 1415 lapic_handle_cmc(void) 1416 { 1417 trap_check_kstack(); 1418 1419 lapic_eoi(); 1420 cmc_intr(); 1421 } 1422 1423 /* 1424 * Called from the mca_init() to activate the CMC interrupt if this CPU is 1425 * responsible for monitoring any MC banks for CMC events. Since mca_init() 1426 * is called prior to lapic_setup() during boot, this just needs to unmask 1427 * this CPU's LVT_CMCI entry. 1428 */ 1429 void 1430 lapic_enable_cmc(void) 1431 { 1432 u_int apic_id; 1433 1434 #ifdef DEV_ATPIC 1435 if (!x2apic_mode && lapic_map == NULL) 1436 return; 1437 #endif 1438 apic_id = PCPU_GET(apic_id); 1439 KASSERT(lapics[apic_id].la_present, 1440 ("%s: missing APIC %u", __func__, apic_id)); 1441 lapics[apic_id].la_lvts[APIC_LVT_CMCI].lvt_masked = 0; 1442 lapics[apic_id].la_lvts[APIC_LVT_CMCI].lvt_active = 1; 1443 } 1444 1445 int 1446 lapic_enable_mca_elvt(void) 1447 { 1448 u_int apic_id; 1449 uint32_t value; 1450 int elvt_count; 1451 1452 #ifdef DEV_ATPIC 1453 if (lapic_map == NULL) 1454 return (-1); 1455 #endif 1456 1457 apic_id = PCPU_GET(apic_id); 1458 KASSERT(lapics[apic_id].la_present, 1459 ("%s: missing APIC %u", __func__, apic_id)); 1460 elvt_count = amd_read_elvt_count(); 1461 if (elvt_count <= APIC_ELVT_MCA) 1462 return (-1); 1463 1464 value = lapic_read32(LAPIC_EXT_LVT0 + APIC_ELVT_MCA); 1465 if ((value & APIC_LVT_M) == 0) { 1466 if (bootverbose) 1467 printf("AMD MCE Thresholding Extended LVT is already active\n"); 1468 return (APIC_ELVT_MCA); 1469 } 1470 lapics[apic_id].la_elvts[APIC_ELVT_MCA].lvt_masked = 0; 1471 lapics[apic_id].la_elvts[APIC_ELVT_MCA].lvt_active = 1; 1472 return (APIC_ELVT_MCA); 1473 } 1474 1475 void 1476 lapic_handle_error(void) 1477 { 1478 uint32_t esr; 1479 1480 trap_check_kstack(); 1481 1482 /* 1483 * Read the contents of the error status register. Write to 1484 * the register first before reading from it to force the APIC 1485 * to update its value to indicate any errors that have 1486 * occurred since the previous write to the register. 1487 */ 1488 lapic_write32(LAPIC_ESR, 0); 1489 esr = lapic_read32(LAPIC_ESR); 1490 1491 printf("CPU%d: local APIC error 0x%x\n", PCPU_GET(cpuid), esr); 1492 lapic_eoi(); 1493 } 1494 1495 u_int 1496 apic_cpuid(u_int apic_id) 1497 { 1498 #ifdef SMP 1499 return apic_cpuids[apic_id]; 1500 #else 1501 return 0; 1502 #endif 1503 } 1504 1505 /* Request a free IDT vector to be used by the specified IRQ. */ 1506 u_int 1507 apic_alloc_vector(u_int apic_id, u_int irq) 1508 { 1509 u_int vector; 1510 1511 KASSERT(irq < num_io_irqs, ("Invalid IRQ %u", irq)); 1512 1513 /* 1514 * Search for a free vector. Currently we just use a very simple 1515 * algorithm to find the first free vector. 1516 */ 1517 mtx_lock_spin(&icu_lock); 1518 for (vector = 0; vector < APIC_NUM_IOINTS; vector++) { 1519 if (lapics[apic_id].la_ioint_irqs[vector] != IRQ_FREE) 1520 continue; 1521 lapics[apic_id].la_ioint_irqs[vector] = irq; 1522 mtx_unlock_spin(&icu_lock); 1523 return (vector + APIC_IO_INTS); 1524 } 1525 mtx_unlock_spin(&icu_lock); 1526 return (0); 1527 } 1528 1529 /* 1530 * Request 'count' free contiguous IDT vectors to be used by 'count' 1531 * IRQs. 'count' must be a power of two and the vectors will be 1532 * aligned on a boundary of 'align'. If the request cannot be 1533 * satisfied, 0 is returned. 1534 */ 1535 u_int 1536 apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align) 1537 { 1538 u_int first, run, vector; 1539 1540 KASSERT(powerof2(count), ("bad count")); 1541 KASSERT(powerof2(align), ("bad align")); 1542 KASSERT(align >= count, ("align < count")); 1543 #ifdef INVARIANTS 1544 for (run = 0; run < count; run++) 1545 KASSERT(irqs[run] < num_io_irqs, ("Invalid IRQ %u at index %u", 1546 irqs[run], run)); 1547 #endif 1548 1549 /* 1550 * Search for 'count' free vectors. As with apic_alloc_vector(), 1551 * this just uses a simple first fit algorithm. 1552 */ 1553 run = 0; 1554 first = 0; 1555 mtx_lock_spin(&icu_lock); 1556 for (vector = 0; vector < APIC_NUM_IOINTS; vector++) { 1557 /* Vector is in use, end run. */ 1558 if (lapics[apic_id].la_ioint_irqs[vector] != IRQ_FREE) { 1559 run = 0; 1560 first = 0; 1561 continue; 1562 } 1563 1564 /* Start a new run if run == 0 and vector is aligned. */ 1565 if (run == 0) { 1566 if (((vector + APIC_IO_INTS) & (align - 1)) != 0) 1567 continue; 1568 first = vector; 1569 } 1570 run++; 1571 1572 /* Keep looping if the run isn't long enough yet. */ 1573 if (run < count) 1574 continue; 1575 1576 /* Found a run, assign IRQs and return the first vector. */ 1577 for (vector = 0; vector < count; vector++) 1578 lapics[apic_id].la_ioint_irqs[first + vector] = 1579 irqs[vector]; 1580 mtx_unlock_spin(&icu_lock); 1581 return (first + APIC_IO_INTS); 1582 } 1583 mtx_unlock_spin(&icu_lock); 1584 printf("APIC: Couldn't find APIC vectors for %u IRQs\n", count); 1585 return (0); 1586 } 1587 1588 /* 1589 * Enable a vector for a particular apic_id. Since all lapics share idt 1590 * entries and ioint_handlers this enables the vector on all lapics. lapics 1591 * which do not have the vector configured would report spurious interrupts 1592 * should it fire. 1593 */ 1594 void 1595 apic_enable_vector(u_int apic_id, u_int vector) 1596 { 1597 1598 KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry")); 1599 KASSERT(ioint_handlers[vector / 32] != NULL, 1600 ("No ISR handler for vector %u", vector)); 1601 #ifdef KDTRACE_HOOKS 1602 KASSERT(vector != IDT_DTRACE_RET, 1603 ("Attempt to overwrite DTrace entry")); 1604 #endif 1605 setidt(vector, (pti ? ioint_pti_handlers : ioint_handlers)[vector / 32], 1606 SDT_APIC, SEL_KPL, GSEL_APIC); 1607 } 1608 1609 void 1610 apic_disable_vector(u_int apic_id, u_int vector) 1611 { 1612 1613 KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry")); 1614 #ifdef KDTRACE_HOOKS 1615 KASSERT(vector != IDT_DTRACE_RET, 1616 ("Attempt to overwrite DTrace entry")); 1617 #endif 1618 KASSERT(ioint_handlers[vector / 32] != NULL, 1619 ("No ISR handler for vector %u", vector)); 1620 #ifdef notyet 1621 /* 1622 * We can not currently clear the idt entry because other cpus 1623 * may have a valid vector at this offset. 1624 */ 1625 setidt(vector, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_APIC, 1626 SEL_KPL, GSEL_APIC); 1627 #endif 1628 } 1629 1630 /* Release an APIC vector when it's no longer in use. */ 1631 void 1632 apic_free_vector(u_int apic_id, u_int vector, u_int irq) 1633 { 1634 struct thread *td; 1635 1636 KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL && 1637 vector <= APIC_IO_INTS + APIC_NUM_IOINTS, 1638 ("Vector %u does not map to an IRQ line", vector)); 1639 KASSERT(irq < num_io_irqs, ("Invalid IRQ %u", irq)); 1640 KASSERT(lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] == 1641 irq, ("IRQ mismatch")); 1642 #ifdef KDTRACE_HOOKS 1643 KASSERT(vector != IDT_DTRACE_RET, 1644 ("Attempt to overwrite DTrace entry")); 1645 #endif 1646 1647 /* 1648 * Bind us to the cpu that owned the vector before freeing it so 1649 * we don't lose an interrupt delivery race. 1650 */ 1651 td = curthread; 1652 if (!rebooting) { 1653 thread_lock(td); 1654 if (sched_is_bound(td)) 1655 panic("apic_free_vector: Thread already bound.\n"); 1656 sched_bind(td, apic_cpuid(apic_id)); 1657 thread_unlock(td); 1658 } 1659 mtx_lock_spin(&icu_lock); 1660 lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] = IRQ_FREE; 1661 mtx_unlock_spin(&icu_lock); 1662 if (!rebooting) { 1663 thread_lock(td); 1664 sched_unbind(td); 1665 thread_unlock(td); 1666 } 1667 } 1668 1669 /* Map an IDT vector (APIC) to an IRQ (interrupt source). */ 1670 static u_int 1671 apic_idt_to_irq(u_int apic_id, u_int vector) 1672 { 1673 int irq; 1674 1675 KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL && 1676 vector <= APIC_IO_INTS + APIC_NUM_IOINTS, 1677 ("Vector %u does not map to an IRQ line", vector)); 1678 #ifdef KDTRACE_HOOKS 1679 KASSERT(vector != IDT_DTRACE_RET, 1680 ("Attempt to overwrite DTrace entry")); 1681 #endif 1682 irq = lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS]; 1683 if (irq < 0) 1684 irq = 0; 1685 return (irq); 1686 } 1687 1688 #ifdef DDB 1689 /* 1690 * Dump data about APIC IDT vector mappings. 1691 */ 1692 DB_SHOW_COMMAND_FLAGS(apic, db_show_apic, DB_CMD_MEMSAFE) 1693 { 1694 struct intsrc *isrc; 1695 int i, verbose; 1696 u_int apic_id; 1697 u_int irq; 1698 1699 if (strcmp(modif, "vv") == 0) 1700 verbose = 2; 1701 else if (strcmp(modif, "v") == 0) 1702 verbose = 1; 1703 else 1704 verbose = 0; 1705 for (apic_id = 0; apic_id <= max_apic_id; apic_id++) { 1706 if (lapics[apic_id].la_present == 0) 1707 continue; 1708 db_printf("Interrupts bound to lapic %u\n", apic_id); 1709 for (i = 0; i < APIC_NUM_IOINTS + 1 && !db_pager_quit; i++) { 1710 irq = lapics[apic_id].la_ioint_irqs[i]; 1711 if (irq == IRQ_FREE || irq == IRQ_SYSCALL) 1712 continue; 1713 #ifdef KDTRACE_HOOKS 1714 if (irq == IRQ_DTRACE_RET) 1715 continue; 1716 #endif 1717 #ifdef XENHVM 1718 if (irq == IRQ_EVTCHN) 1719 continue; 1720 #endif 1721 db_printf("vec 0x%2x -> ", i + APIC_IO_INTS); 1722 if (irq == IRQ_TIMER) 1723 db_printf("lapic timer\n"); 1724 else if (irq < num_io_irqs) { 1725 isrc = intr_lookup_source(irq); 1726 if (isrc == NULL || verbose == 0) 1727 db_printf("IRQ %u\n", irq); 1728 else 1729 db_dump_intr_event(isrc->is_event, 1730 verbose == 2); 1731 } else 1732 db_printf("IRQ %u ???\n", irq); 1733 } 1734 } 1735 } 1736 1737 static void 1738 dump_mask(const char *prefix, uint32_t v, int base) 1739 { 1740 int i, first; 1741 1742 first = 1; 1743 for (i = 0; i < 32; i++) 1744 if (v & (1 << i)) { 1745 if (first) { 1746 db_printf("%s:", prefix); 1747 first = 0; 1748 } 1749 db_printf(" %02x", base + i); 1750 } 1751 if (!first) 1752 db_printf("\n"); 1753 } 1754 1755 /* Show info from the lapic regs for this CPU. */ 1756 DB_SHOW_COMMAND_FLAGS(lapic, db_show_lapic, DB_CMD_MEMSAFE) 1757 { 1758 uint32_t v; 1759 1760 db_printf("lapic ID = %d\n", lapic_id()); 1761 v = lapic_read32(LAPIC_VERSION); 1762 db_printf("version = %d.%d\n", (v & APIC_VER_VERSION) >> 4, 1763 v & 0xf); 1764 db_printf("max LVT = %d\n", (v & APIC_VER_MAXLVT) >> MAXLVTSHIFT); 1765 v = lapic_read32(LAPIC_SVR); 1766 db_printf("SVR = %02x (%s)\n", v & APIC_SVR_VECTOR, 1767 v & APIC_SVR_ENABLE ? "enabled" : "disabled"); 1768 db_printf("TPR = %02x\n", lapic_read32(LAPIC_TPR)); 1769 1770 #define dump_field(prefix, regn, index) \ 1771 dump_mask(__XSTRING(prefix ## index), \ 1772 lapic_read32(LAPIC_ ## regn ## index), \ 1773 index * 32) 1774 1775 db_printf("In-service Interrupts:\n"); 1776 dump_field(isr, ISR, 0); 1777 dump_field(isr, ISR, 1); 1778 dump_field(isr, ISR, 2); 1779 dump_field(isr, ISR, 3); 1780 dump_field(isr, ISR, 4); 1781 dump_field(isr, ISR, 5); 1782 dump_field(isr, ISR, 6); 1783 dump_field(isr, ISR, 7); 1784 1785 db_printf("TMR Interrupts:\n"); 1786 dump_field(tmr, TMR, 0); 1787 dump_field(tmr, TMR, 1); 1788 dump_field(tmr, TMR, 2); 1789 dump_field(tmr, TMR, 3); 1790 dump_field(tmr, TMR, 4); 1791 dump_field(tmr, TMR, 5); 1792 dump_field(tmr, TMR, 6); 1793 dump_field(tmr, TMR, 7); 1794 1795 db_printf("IRR Interrupts:\n"); 1796 dump_field(irr, IRR, 0); 1797 dump_field(irr, IRR, 1); 1798 dump_field(irr, IRR, 2); 1799 dump_field(irr, IRR, 3); 1800 dump_field(irr, IRR, 4); 1801 dump_field(irr, IRR, 5); 1802 dump_field(irr, IRR, 6); 1803 dump_field(irr, IRR, 7); 1804 1805 #undef dump_field 1806 } 1807 #endif 1808 1809 /* 1810 * APIC probing support code. This includes code to manage enumerators. 1811 */ 1812 1813 static SLIST_HEAD(, apic_enumerator) enumerators = 1814 SLIST_HEAD_INITIALIZER(enumerators); 1815 static struct apic_enumerator *best_enum; 1816 1817 void 1818 apic_register_enumerator(struct apic_enumerator *enumerator) 1819 { 1820 #ifdef INVARIANTS 1821 struct apic_enumerator *apic_enum; 1822 1823 SLIST_FOREACH(apic_enum, &enumerators, apic_next) { 1824 if (apic_enum == enumerator) 1825 panic("%s: Duplicate register of %s", __func__, 1826 enumerator->apic_name); 1827 } 1828 #endif 1829 SLIST_INSERT_HEAD(&enumerators, enumerator, apic_next); 1830 } 1831 1832 /* 1833 * We have to look for CPU's very, very early because certain subsystems 1834 * want to know how many CPU's we have extremely early on in the boot 1835 * process. 1836 */ 1837 static void 1838 apic_init(void *dummy __unused) 1839 { 1840 struct apic_enumerator *enumerator; 1841 int retval, best; 1842 1843 /* We only support built in local APICs. */ 1844 if (!(cpu_feature & CPUID_APIC)) 1845 return; 1846 1847 /* Don't probe if APIC mode is disabled. */ 1848 if (resource_disabled("apic", 0)) 1849 return; 1850 1851 /* Probe all the enumerators to find the best match. */ 1852 best_enum = NULL; 1853 best = 0; 1854 SLIST_FOREACH(enumerator, &enumerators, apic_next) { 1855 retval = enumerator->apic_probe(); 1856 if (retval > 0) 1857 continue; 1858 if (best_enum == NULL || best < retval) { 1859 best_enum = enumerator; 1860 best = retval; 1861 } 1862 } 1863 if (best_enum == NULL) { 1864 if (bootverbose) 1865 printf("APIC: Could not find any APICs.\n"); 1866 #ifndef DEV_ATPIC 1867 panic("running without device atpic requires a local APIC"); 1868 #endif 1869 return; 1870 } 1871 1872 if (bootverbose) 1873 printf("APIC: Using the %s enumerator.\n", 1874 best_enum->apic_name); 1875 1876 #ifdef I686_CPU 1877 /* 1878 * To work around an errata, we disable the local APIC on some 1879 * CPUs during early startup. We need to turn the local APIC back 1880 * on on such CPUs now. 1881 */ 1882 ppro_reenable_apic(); 1883 #endif 1884 1885 /* Probe the CPU's in the system. */ 1886 retval = best_enum->apic_probe_cpus(); 1887 if (retval != 0) 1888 printf("%s: Failed to probe CPUs: returned %d\n", 1889 best_enum->apic_name, retval); 1890 1891 } 1892 SYSINIT(apic_init, SI_SUB_TUNABLES - 1, SI_ORDER_SECOND, apic_init, NULL); 1893 1894 /* 1895 * Setup the local APIC. We have to do this prior to starting up the APs 1896 * in the SMP case. 1897 */ 1898 static void 1899 apic_setup_local(void *dummy __unused) 1900 { 1901 int retval; 1902 1903 if (best_enum == NULL) 1904 return; 1905 1906 lapics = malloc(sizeof(*lapics) * (max_apic_id + 1), M_LAPIC, 1907 M_WAITOK | M_ZERO); 1908 1909 /* Initialize the local APIC. */ 1910 retval = best_enum->apic_setup_local(); 1911 if (retval != 0) 1912 printf("%s: Failed to setup the local APIC: returned %d\n", 1913 best_enum->apic_name, retval); 1914 } 1915 SYSINIT(apic_setup_local, SI_SUB_CPU, SI_ORDER_SECOND, apic_setup_local, NULL); 1916 1917 /* 1918 * Setup the I/O APICs. 1919 */ 1920 static void 1921 apic_setup_io(void *dummy __unused) 1922 { 1923 int retval; 1924 1925 if (best_enum == NULL) 1926 return; 1927 1928 /* 1929 * Local APIC must be registered before other PICs and pseudo PICs 1930 * for proper suspend/resume order. 1931 */ 1932 intr_register_pic(&lapic_pic); 1933 1934 retval = best_enum->apic_setup_io(); 1935 if (retval != 0) 1936 printf("%s: Failed to setup I/O APICs: returned %d\n", 1937 best_enum->apic_name, retval); 1938 1939 /* 1940 * Finish setting up the local APIC on the BSP once we know 1941 * how to properly program the LINT pins. In particular, this 1942 * enables the EOI suppression mode, if LAPIC supports it and 1943 * user did not disable the mode. 1944 */ 1945 lapic_setup(1); 1946 if (bootverbose) 1947 lapic_dump("BSP"); 1948 1949 /* Enable the MSI "pic". */ 1950 msi_init(); 1951 1952 #ifdef XENHVM 1953 xen_intr_alloc_irqs(); 1954 #endif 1955 } 1956 SYSINIT(apic_setup_io, SI_SUB_INTR, SI_ORDER_THIRD, apic_setup_io, NULL); 1957 1958 #ifdef SMP 1959 /* 1960 * Inter Processor Interrupt functions. The lapic_ipi_*() functions are 1961 * private to the MD code. The public interface for the rest of the 1962 * kernel is defined in mp_machdep.c. 1963 */ 1964 1965 /* 1966 * Wait delay microseconds for IPI to be sent. If delay is -1, we 1967 * wait forever. 1968 */ 1969 int 1970 lapic_ipi_wait(int delay) 1971 { 1972 uint64_t rx; 1973 1974 /* LAPIC_ICR.APIC_DELSTAT_MASK is undefined in x2APIC mode */ 1975 if (x2apic_mode) 1976 return (1); 1977 1978 for (rx = 0; delay == -1 || rx < lapic_ipi_wait_mult * delay; rx++) { 1979 if ((lapic_read_icr_lo() & APIC_DELSTAT_MASK) == 1980 APIC_DELSTAT_IDLE) 1981 return (1); 1982 ia32_pause(); 1983 } 1984 return (0); 1985 } 1986 1987 void 1988 lapic_ipi_raw(register_t icrlo, u_int dest) 1989 { 1990 uint32_t icrhi; 1991 1992 /* XXX: Need more sanity checking of icrlo? */ 1993 KASSERT(x2apic_mode || lapic_map != NULL, 1994 ("%s called too early", __func__)); 1995 KASSERT(x2apic_mode || 1996 (dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0, 1997 ("%s: invalid dest field", __func__)); 1998 KASSERT((icrlo & APIC_ICRLO_RESV_MASK) == 0, 1999 ("%s: reserved bits set in ICR LO register", __func__)); 2000 2001 if ((icrlo & APIC_DEST_MASK) == APIC_DEST_DESTFLD) { 2002 if (x2apic_mode) 2003 icrhi = dest; 2004 else 2005 icrhi = dest << APIC_ID_SHIFT; 2006 lapic_write_icr(icrhi, icrlo); 2007 } else { 2008 lapic_write_icr_lo(icrlo); 2009 } 2010 } 2011 2012 #ifdef DETECT_DEADLOCK 2013 #define AFTER_SPIN 50 2014 #endif 2015 2016 static void 2017 native_lapic_ipi_vectored(u_int vector, int dest) 2018 { 2019 register_t icrlo, destfield; 2020 2021 KASSERT((vector & ~APIC_VECTOR_MASK) == 0, 2022 ("%s: invalid vector %d", __func__, vector)); 2023 2024 destfield = 0; 2025 switch (dest) { 2026 case APIC_IPI_DEST_SELF: 2027 if (x2apic_mode && vector < IPI_NMI_FIRST) { 2028 lapic_write_self_ipi(vector); 2029 return; 2030 } 2031 icrlo = APIC_DEST_SELF; 2032 break; 2033 case APIC_IPI_DEST_ALL: 2034 icrlo = APIC_DEST_ALLISELF; 2035 break; 2036 case APIC_IPI_DEST_OTHERS: 2037 icrlo = APIC_DEST_ALLESELF; 2038 break; 2039 default: 2040 icrlo = 0; 2041 KASSERT(x2apic_mode || 2042 (dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0, 2043 ("%s: invalid destination 0x%x", __func__, dest)); 2044 destfield = dest; 2045 } 2046 2047 /* 2048 * NMI IPIs are just fake vectors used to send a NMI. Use special rules 2049 * regarding NMIs if passed, otherwise specify the vector. 2050 */ 2051 if (vector >= IPI_NMI_FIRST) 2052 icrlo |= APIC_DELMODE_NMI; 2053 else 2054 icrlo |= vector | APIC_DELMODE_FIXED; 2055 icrlo |= APIC_DESTMODE_PHY | APIC_TRIGMOD_EDGE | APIC_LEVEL_ASSERT; 2056 2057 /* Wait for an earlier IPI to finish. */ 2058 if (!lapic_ipi_wait(lapic_ds_idle_timeout)) { 2059 if (KERNEL_PANICKED()) 2060 return; 2061 else 2062 panic("APIC: Previous IPI is stuck"); 2063 } 2064 2065 lapic_ipi_raw(icrlo, destfield); 2066 2067 #ifdef DETECT_DEADLOCK 2068 /* Wait for IPI to be delivered. */ 2069 if (!lapic_ipi_wait(AFTER_SPIN)) { 2070 #ifdef needsattention 2071 /* 2072 * XXX FIXME: 2073 * 2074 * The above function waits for the message to actually be 2075 * delivered. It breaks out after an arbitrary timeout 2076 * since the message should eventually be delivered (at 2077 * least in theory) and that if it wasn't we would catch 2078 * the failure with the check above when the next IPI is 2079 * sent. 2080 * 2081 * We could skip this wait entirely, EXCEPT it probably 2082 * protects us from other routines that assume that the 2083 * message was delivered and acted upon when this function 2084 * returns. 2085 */ 2086 printf("APIC: IPI might be stuck\n"); 2087 #else /* !needsattention */ 2088 /* Wait until mesage is sent without a timeout. */ 2089 while (lapic_read_icr_lo() & APIC_DELSTAT_PEND) 2090 ia32_pause(); 2091 #endif /* needsattention */ 2092 } 2093 #endif /* DETECT_DEADLOCK */ 2094 } 2095 2096 void (*ipi_vectored)(u_int, int) = &native_lapic_ipi_vectored; 2097 #endif /* SMP */ 2098 2099 /* 2100 * Since the IDT is shared by all CPUs the IPI slot update needs to be globally 2101 * visible. 2102 * 2103 * Consider the case where an IPI is generated immediately after allocation: 2104 * vector = lapic_ipi_alloc(ipifunc); 2105 * ipi_selected(other_cpus, vector); 2106 * 2107 * In xAPIC mode a write to ICR_LO has serializing semantics because the 2108 * APIC page is mapped as an uncached region. In x2APIC mode there is an 2109 * explicit 'mfence' before the ICR MSR is written. Therefore in both cases 2110 * the IDT slot update is globally visible before the IPI is delivered. 2111 */ 2112 int 2113 lapic_ipi_alloc(inthand_t *ipifunc) 2114 { 2115 struct gate_descriptor *ip; 2116 long func; 2117 int idx, vector; 2118 2119 KASSERT(ipifunc != &IDTVEC(rsvd) && ipifunc != &IDTVEC(rsvd_pti), 2120 ("invalid ipifunc %p", ipifunc)); 2121 2122 vector = -1; 2123 mtx_lock_spin(&icu_lock); 2124 for (idx = IPI_DYN_FIRST; idx <= IPI_DYN_LAST; idx++) { 2125 ip = &idt[idx]; 2126 func = (ip->gd_hioffset << 16) | ip->gd_looffset; 2127 #ifdef __i386__ 2128 func -= setidt_disp; 2129 #endif 2130 if ((!pti && func == (uintptr_t)&IDTVEC(rsvd)) || 2131 (pti && func == (uintptr_t)&IDTVEC(rsvd_pti))) { 2132 vector = idx; 2133 setidt(vector, ipifunc, SDT_APIC, SEL_KPL, GSEL_APIC); 2134 break; 2135 } 2136 } 2137 mtx_unlock_spin(&icu_lock); 2138 return (vector); 2139 } 2140 2141 void 2142 lapic_ipi_free(int vector) 2143 { 2144 struct gate_descriptor *ip; 2145 long func __diagused; 2146 2147 KASSERT(vector >= IPI_DYN_FIRST && vector <= IPI_DYN_LAST, 2148 ("%s: invalid vector %d", __func__, vector)); 2149 2150 mtx_lock_spin(&icu_lock); 2151 ip = &idt[vector]; 2152 func = (ip->gd_hioffset << 16) | ip->gd_looffset; 2153 #ifdef __i386__ 2154 func -= setidt_disp; 2155 #endif 2156 KASSERT(func != (uintptr_t)&IDTVEC(rsvd) && 2157 func != (uintptr_t)&IDTVEC(rsvd_pti), 2158 ("invalid idtfunc %#lx", func)); 2159 setidt(vector, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_APIC, 2160 SEL_KPL, GSEL_APIC); 2161 mtx_unlock_spin(&icu_lock); 2162 } 2163