1 /* $NetBSD: acpi_cpu_md.c,v 1.68 2011/10/18 05:08:24 jruoho Exp $ */ 2 3 /*- 4 * Copyright (c) 2010, 2011 Jukka Ruohonen <jruohonen@iki.fi> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 #include <sys/cdefs.h> 30 __KERNEL_RCSID(0, "$NetBSD: acpi_cpu_md.c,v 1.68 2011/10/18 05:08:24 jruoho Exp $"); 31 32 #include <sys/param.h> 33 #include <sys/bus.h> 34 #include <sys/cpufreq.h> 35 #include <sys/device.h> 36 #include <sys/kcore.h> 37 #include <sys/sysctl.h> 38 #include <sys/xcall.h> 39 40 #include <x86/cpu.h> 41 #include <x86/cpufunc.h> 42 #include <x86/cputypes.h> 43 #include <x86/cpuvar.h> 44 #include <x86/cpu_msr.h> 45 #include <x86/machdep.h> 46 47 #include <dev/acpi/acpica.h> 48 #include <dev/acpi/acpi_cpu.h> 49 50 #include <dev/pci/pcivar.h> 51 #include <dev/pci/pcidevs.h> 52 53 #include <machine/acpi_machdep.h> 54 55 /* 56 * Intel IA32_MISC_ENABLE. 57 */ 58 #define MSR_MISC_ENABLE_EST __BIT(16) 59 #define MSR_MISC_ENABLE_TURBO __BIT(38) 60 61 /* 62 * AMD C1E. 63 */ 64 #define MSR_CMPHALT 0xc0010055 65 66 #define MSR_CMPHALT_SMI __BIT(27) 67 #define MSR_CMPHALT_C1E __BIT(28) 68 #define MSR_CMPHALT_BMSTS __BIT(29) 69 70 /* 71 * AMD families 10h, 11h, and 14h 72 */ 73 #define MSR_10H_LIMIT 0xc0010061 74 #define MSR_10H_CONTROL 0xc0010062 75 #define MSR_10H_STATUS 0xc0010063 76 #define MSR_10H_CONFIG 0xc0010064 77 78 /* 79 * AMD family 0Fh. 80 */ 81 #define MSR_0FH_CONTROL 0xc0010041 82 #define MSR_0FH_STATUS 0xc0010042 83 84 #define MSR_0FH_STATUS_CFID __BITS( 0, 5) 85 #define MSR_0FH_STATUS_CVID __BITS(32, 36) 86 #define MSR_0FH_STATUS_PENDING __BITS(31, 31) 87 88 #define MSR_0FH_CONTROL_FID __BITS( 0, 5) 89 #define MSR_0FH_CONTROL_VID __BITS( 8, 12) 90 #define MSR_0FH_CONTROL_CHG __BITS(16, 16) 91 #define MSR_0FH_CONTROL_CNT __BITS(32, 51) 92 93 #define ACPI_0FH_STATUS_FID __BITS( 0, 5) 94 #define ACPI_0FH_STATUS_VID __BITS( 6, 10) 95 96 #define ACPI_0FH_CONTROL_FID __BITS( 0, 5) 97 #define ACPI_0FH_CONTROL_VID __BITS( 6, 10) 98 #define ACPI_0FH_CONTROL_VST __BITS(11, 17) 99 #define ACPI_0FH_CONTROL_MVS __BITS(18, 19) 100 #define ACPI_0FH_CONTROL_PLL __BITS(20, 26) 101 #define ACPI_0FH_CONTROL_RVO __BITS(28, 29) 102 #define ACPI_0FH_CONTROL_IRT __BITS(30, 31) 103 104 #define FID_TO_VCO_FID(fidd) (((fid) < 8) ? (8 + ((fid) << 1)) : (fid)) 105 106 static char native_idle_text[16]; 107 void (*native_idle)(void) = NULL; 108 109 static int acpicpu_md_quirk_piix4(const struct pci_attach_args *); 110 static void acpicpu_md_quirk_amd(struct acpicpu_pstate *, uint32_t); 111 static void acpicpu_md_pstate_hwf_reset(void *, void *); 112 static int acpicpu_md_pstate_fidvid_get(struct acpicpu_softc *, 113 uint32_t *); 114 static int acpicpu_md_pstate_fidvid_set(struct acpicpu_pstate *); 115 static int acpicpu_md_pstate_fidvid_read(uint32_t *, uint32_t *); 116 static void acpicpu_md_pstate_fidvid_write(uint32_t, uint32_t, 117 uint32_t, uint32_t); 118 static int acpicpu_md_pstate_sysctl_init(void); 119 static int acpicpu_md_pstate_sysctl_get(SYSCTLFN_PROTO); 120 static int acpicpu_md_pstate_sysctl_set(SYSCTLFN_PROTO); 121 static int acpicpu_md_pstate_sysctl_all(SYSCTLFN_PROTO); 122 123 extern struct acpicpu_softc **acpicpu_sc; 124 static struct sysctllog *acpicpu_log = NULL; 125 126 struct cpu_info * 127 acpicpu_md_match(device_t parent, cfdata_t match, void *aux) 128 { 129 struct cpufeature_attach_args *cfaa = aux; 130 131 if (strcmp(cfaa->name, "frequency") != 0) 132 return NULL; 133 134 return cfaa->ci; 135 } 136 137 struct cpu_info * 138 acpicpu_md_attach(device_t parent, device_t self, void *aux) 139 { 140 struct cpufeature_attach_args *cfaa = aux; 141 142 return cfaa->ci; 143 } 144 145 uint32_t 146 acpicpu_md_flags(void) 147 { 148 struct cpu_info *ci = curcpu(); 149 struct pci_attach_args pa; 150 uint32_t family, val = 0; 151 uint32_t regs[4]; 152 uint64_t msr; 153 154 if (acpi_md_ncpus() == 1) 155 val |= ACPICPU_FLAG_C_BM; 156 157 if ((ci->ci_feat_val[1] & CPUID2_MONITOR) != 0) 158 val |= ACPICPU_FLAG_C_FFH; 159 160 /* 161 * By default, assume that the local APIC timer 162 * as well as TSC are stalled during C3 sleep. 163 */ 164 val |= ACPICPU_FLAG_C_APIC | ACPICPU_FLAG_C_TSC; 165 166 switch (cpu_vendor) { 167 168 case CPUVENDOR_IDT: 169 170 if ((ci->ci_feat_val[1] & CPUID2_EST) != 0) 171 val |= ACPICPU_FLAG_P_FFH; 172 173 if ((ci->ci_feat_val[0] & CPUID_ACPI) != 0) 174 val |= ACPICPU_FLAG_T_FFH; 175 176 break; 177 178 case CPUVENDOR_INTEL: 179 180 /* 181 * Bus master control and arbitration should be 182 * available on all supported Intel CPUs (to be 183 * sure, this is double-checked later from the 184 * firmware data). These flags imply that it is 185 * not necessary to flush caches before C3 state. 186 */ 187 val |= ACPICPU_FLAG_C_BM | ACPICPU_FLAG_C_ARB; 188 189 /* 190 * Check if we can use "native", MSR-based, 191 * access. If not, we have to resort to I/O. 192 */ 193 if ((ci->ci_feat_val[1] & CPUID2_EST) != 0) 194 val |= ACPICPU_FLAG_P_FFH; 195 196 if ((ci->ci_feat_val[0] & CPUID_ACPI) != 0) 197 val |= ACPICPU_FLAG_T_FFH; 198 199 /* 200 * Check whether MSR_APERF, MSR_MPERF, and Turbo 201 * Boost are available. Also see if we might have 202 * an invariant local APIC timer ("ARAT"). 203 */ 204 if (cpuid_level >= 0x06) { 205 206 x86_cpuid(0x00000006, regs); 207 208 if ((regs[2] & CPUID_DSPM_HWF) != 0) 209 val |= ACPICPU_FLAG_P_HWF; 210 211 if ((regs[0] & CPUID_DSPM_IDA) != 0) 212 val |= ACPICPU_FLAG_P_TURBO; 213 214 if ((regs[0] & CPUID_DSPM_ARAT) != 0) 215 val &= ~ACPICPU_FLAG_C_APIC; 216 } 217 218 /* 219 * Detect whether TSC is invariant. If it is not, 220 * we keep the flag to note that TSC will not run 221 * at constant rate. Depending on the CPU, this may 222 * affect P- and T-state changes, but especially 223 * relevant are C-states; with variant TSC, states 224 * larger than C1 may completely stop the counter. 225 */ 226 x86_cpuid(0x80000000, regs); 227 228 if (regs[0] >= 0x80000007) { 229 230 x86_cpuid(0x80000007, regs); 231 232 if ((regs[3] & __BIT(8)) != 0) 233 val &= ~ACPICPU_FLAG_C_TSC; 234 } 235 236 break; 237 238 case CPUVENDOR_AMD: 239 240 x86_cpuid(0x80000000, regs); 241 242 if (regs[0] < 0x80000007) 243 break; 244 245 x86_cpuid(0x80000007, regs); 246 247 family = CPUID2FAMILY(ci->ci_signature); 248 249 if (family == 0xf) 250 family += CPUID2EXTFAMILY(ci->ci_signature); 251 252 switch (family) { 253 254 case 0x0f: 255 256 /* 257 * Evaluate support for the "FID/VID 258 * algorithm" also used by powernow(4). 259 */ 260 if ((regs[3] & CPUID_APM_FID) == 0) 261 break; 262 263 if ((regs[3] & CPUID_APM_VID) == 0) 264 break; 265 266 val |= ACPICPU_FLAG_P_FFH | ACPICPU_FLAG_P_FIDVID; 267 break; 268 269 case 0x10: 270 case 0x11: 271 272 if (rdmsr_safe(MSR_CMPHALT, &msr) != EFAULT) 273 val |= ACPICPU_FLAG_C_C1E; 274 275 /* FALLTHROUGH */ 276 277 case 0x14: /* AMD Fusion */ 278 279 /* 280 * Like with Intel, detect invariant TSC, 281 * MSR-based P-states, and AMD's "turbo" 282 * (Core Performance Boost), respectively. 283 */ 284 if ((regs[3] & CPUID_APM_TSC) != 0) 285 val &= ~ACPICPU_FLAG_C_TSC; 286 287 if ((regs[3] & CPUID_APM_HWP) != 0) 288 val |= ACPICPU_FLAG_P_FFH; 289 290 if ((regs[3] & CPUID_APM_CPB) != 0) 291 val |= ACPICPU_FLAG_P_TURBO; 292 293 /* 294 * Also check for APERF and MPERF, 295 * first available in the family 10h. 296 */ 297 if (cpuid_level >= 0x06) { 298 299 x86_cpuid(0x00000006, regs); 300 301 if ((regs[2] & CPUID_DSPM_HWF) != 0) 302 val |= ACPICPU_FLAG_P_HWF; 303 } 304 305 break; 306 } 307 308 break; 309 } 310 311 /* 312 * There are several erratums for PIIX4. 313 */ 314 if (pci_find_device(&pa, acpicpu_md_quirk_piix4) != 0) 315 val |= ACPICPU_FLAG_PIIX4; 316 317 return val; 318 } 319 320 static int 321 acpicpu_md_quirk_piix4(const struct pci_attach_args *pa) 322 { 323 324 /* 325 * XXX: The pci_find_device(9) function only 326 * deals with attached devices. Change this 327 * to use something like pci_device_foreach(). 328 */ 329 if (PCI_VENDOR(pa->pa_id) != PCI_VENDOR_INTEL) 330 return 0; 331 332 if (PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_INTEL_82371AB_ISA || 333 PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_INTEL_82440MX_PMC) 334 return 1; 335 336 return 0; 337 } 338 339 static void 340 acpicpu_md_quirk_amd(struct acpicpu_pstate *ps, uint32_t i) 341 { 342 struct cpu_info *ci = &cpu_info_primary; 343 uint32_t family, fid, freq, did, zeta; 344 uint64_t val; 345 346 if (i > 7 || cpu_vendor != CPUVENDOR_AMD) 347 return; 348 349 family = CPUID2FAMILY(ci->ci_signature); 350 351 if (family == 0xf) 352 family += CPUID2EXTFAMILY(ci->ci_signature); 353 354 switch (family) { 355 356 case 0x10: 357 zeta = 0x10; 358 break; 359 360 case 0x11: 361 zeta = 0x08; 362 break; 363 364 default: 365 return; 366 } 367 368 /* 369 * The following eight P-state control MSRs define 370 * the static per-core values; the MSB indicates 371 * whether the state is enabled, and the first eight 372 * bits define the frequency divisor and multiplier. 373 */ 374 val = rdmsr(MSR_10H_CONFIG + i); 375 376 if ((val & __BIT(63)) == 0) 377 return; 378 379 fid = __SHIFTOUT(val, __BITS(0, 5)); 380 did = __SHIFTOUT(val, __BITS(6, 8)); 381 382 freq = 100 * (fid + zeta) >> did; 383 384 if (freq != 0 && ps->ps_freq != freq) 385 ps->ps_freq = freq; 386 } 387 388 void 389 acpicpu_md_quirk_c1e(void) 390 { 391 const uint64_t c1e = MSR_CMPHALT_SMI | MSR_CMPHALT_C1E; 392 uint64_t val; 393 394 val = rdmsr(MSR_CMPHALT); 395 396 if ((val & c1e) != 0) 397 wrmsr(MSR_CMPHALT, val & ~c1e); 398 } 399 400 int 401 acpicpu_md_cstate_start(struct acpicpu_softc *sc) 402 { 403 const size_t size = sizeof(native_idle_text); 404 struct acpicpu_cstate *cs; 405 bool ipi = false; 406 int i; 407 408 /* 409 * Save the cpu_idle(9) loop used by default. 410 */ 411 x86_cpu_idle_get(&native_idle, native_idle_text, size); 412 413 for (i = 0; i < ACPI_C_STATE_COUNT; i++) { 414 415 cs = &sc->sc_cstate[i]; 416 417 if (cs->cs_method == ACPICPU_C_STATE_HALT) { 418 ipi = true; 419 break; 420 } 421 } 422 423 x86_cpu_idle_set(acpicpu_cstate_idle, "acpi", ipi); 424 425 return 0; 426 } 427 428 int 429 acpicpu_md_cstate_stop(void) 430 { 431 static char text[16]; 432 void (*func)(void); 433 uint64_t xc; 434 bool ipi; 435 436 x86_cpu_idle_get(&func, text, sizeof(text)); 437 438 if (func == native_idle) 439 return EALREADY; 440 441 ipi = (native_idle != x86_cpu_idle_halt) ? false : true; 442 x86_cpu_idle_set(native_idle, native_idle_text, ipi); 443 444 /* 445 * Run a cross-call to ensure that all CPUs are 446 * out from the ACPI idle-loop before detachment. 447 */ 448 xc = xc_broadcast(0, (xcfunc_t)nullop, NULL, NULL); 449 xc_wait(xc); 450 451 return 0; 452 } 453 454 /* 455 * Called with interrupts enabled. 456 */ 457 void 458 acpicpu_md_cstate_enter(int method, int state) 459 { 460 struct cpu_info *ci = curcpu(); 461 462 KASSERT(ci->ci_ilevel == IPL_NONE); 463 464 switch (method) { 465 466 case ACPICPU_C_STATE_FFH: 467 468 x86_monitor(&ci->ci_want_resched, 0, 0); 469 470 if (__predict_false(ci->ci_want_resched != 0)) 471 return; 472 473 x86_mwait((state - 1) << 4, 0); 474 break; 475 476 case ACPICPU_C_STATE_HALT: 477 478 x86_disable_intr(); 479 480 if (__predict_false(ci->ci_want_resched != 0)) { 481 x86_enable_intr(); 482 return; 483 } 484 485 x86_stihlt(); 486 break; 487 } 488 } 489 490 int 491 acpicpu_md_pstate_start(struct acpicpu_softc *sc) 492 { 493 uint64_t xc, val; 494 495 switch (cpu_vendor) { 496 497 case CPUVENDOR_IDT: 498 case CPUVENDOR_INTEL: 499 500 /* 501 * Make sure EST is enabled. 502 */ 503 if ((sc->sc_flags & ACPICPU_FLAG_P_FFH) != 0) { 504 505 val = rdmsr(MSR_MISC_ENABLE); 506 507 if ((val & MSR_MISC_ENABLE_EST) == 0) { 508 509 val |= MSR_MISC_ENABLE_EST; 510 wrmsr(MSR_MISC_ENABLE, val); 511 val = rdmsr(MSR_MISC_ENABLE); 512 513 if ((val & MSR_MISC_ENABLE_EST) == 0) 514 return ENOTTY; 515 } 516 } 517 } 518 519 /* 520 * Reset the APERF and MPERF counters. 521 */ 522 if ((sc->sc_flags & ACPICPU_FLAG_P_HWF) != 0) { 523 xc = xc_broadcast(0, acpicpu_md_pstate_hwf_reset, NULL, NULL); 524 xc_wait(xc); 525 } 526 527 return acpicpu_md_pstate_sysctl_init(); 528 } 529 530 int 531 acpicpu_md_pstate_stop(void) 532 { 533 534 if (acpicpu_log == NULL) 535 return EALREADY; 536 537 sysctl_teardown(&acpicpu_log); 538 acpicpu_log = NULL; 539 540 return 0; 541 } 542 543 int 544 acpicpu_md_pstate_init(struct acpicpu_softc *sc) 545 { 546 struct cpu_info *ci = sc->sc_ci; 547 struct acpicpu_pstate *ps, msr; 548 uint32_t family, i = 0; 549 550 (void)memset(&msr, 0, sizeof(struct acpicpu_pstate)); 551 552 switch (cpu_vendor) { 553 554 case CPUVENDOR_IDT: 555 case CPUVENDOR_INTEL: 556 557 /* 558 * If the so-called Turbo Boost is present, 559 * the P0-state is always the "turbo state". 560 * It is shown as the P1 frequency + 1 MHz. 561 * 562 * For discussion, see: 563 * 564 * Intel Corporation: Intel Turbo Boost Technology 565 * in Intel Core(tm) Microarchitectures (Nehalem) 566 * Based Processors. White Paper, November 2008. 567 */ 568 if (sc->sc_pstate_count >= 2 && 569 (sc->sc_flags & ACPICPU_FLAG_P_TURBO) != 0) { 570 571 ps = &sc->sc_pstate[0]; 572 573 if (ps->ps_freq == sc->sc_pstate[1].ps_freq + 1) 574 ps->ps_flags |= ACPICPU_FLAG_P_TURBO; 575 } 576 577 msr.ps_control_addr = MSR_PERF_CTL; 578 msr.ps_control_mask = __BITS(0, 15); 579 580 msr.ps_status_addr = MSR_PERF_STATUS; 581 msr.ps_status_mask = __BITS(0, 15); 582 break; 583 584 case CPUVENDOR_AMD: 585 586 if ((sc->sc_flags & ACPICPU_FLAG_P_FIDVID) != 0) 587 msr.ps_flags |= ACPICPU_FLAG_P_FIDVID; 588 589 family = CPUID2FAMILY(ci->ci_signature); 590 591 if (family == 0xf) 592 family += CPUID2EXTFAMILY(ci->ci_signature); 593 594 switch (family) { 595 596 case 0x0f: 597 msr.ps_control_addr = MSR_0FH_CONTROL; 598 msr.ps_status_addr = MSR_0FH_STATUS; 599 break; 600 601 case 0x10: 602 case 0x11: 603 case 0x14: /* AMD Fusion */ 604 msr.ps_control_addr = MSR_10H_CONTROL; 605 msr.ps_control_mask = __BITS(0, 2); 606 607 msr.ps_status_addr = MSR_10H_STATUS; 608 msr.ps_status_mask = __BITS(0, 2); 609 break; 610 611 default: 612 /* 613 * If we have an unknown AMD CPU, rely on XPSS. 614 */ 615 if ((sc->sc_flags & ACPICPU_FLAG_P_XPSS) == 0) 616 return EOPNOTSUPP; 617 } 618 619 break; 620 621 default: 622 return ENODEV; 623 } 624 625 /* 626 * Fill the P-state structures with MSR addresses that are 627 * known to be correct. If we do not know the addresses, 628 * leave the values intact. If a vendor uses XPSS, we do 629 * not necessarily need to do anything to support new CPUs. 630 */ 631 while (i < sc->sc_pstate_count) { 632 633 ps = &sc->sc_pstate[i]; 634 635 if (msr.ps_flags != 0) 636 ps->ps_flags |= msr.ps_flags; 637 638 if (msr.ps_status_addr != 0) 639 ps->ps_status_addr = msr.ps_status_addr; 640 641 if (msr.ps_status_mask != 0) 642 ps->ps_status_mask = msr.ps_status_mask; 643 644 if (msr.ps_control_addr != 0) 645 ps->ps_control_addr = msr.ps_control_addr; 646 647 if (msr.ps_control_mask != 0) 648 ps->ps_control_mask = msr.ps_control_mask; 649 650 /* 651 * Some AMD systems may round the frequencies 652 * reported in the tables. Try to fix these. 653 */ 654 if (cpu_vendor == CPUVENDOR_AMD) 655 acpicpu_md_quirk_amd(ps, i); 656 657 i++; 658 } 659 660 return 0; 661 } 662 663 /* 664 * Read the IA32_APERF and IA32_MPERF counters. The first 665 * increments at the rate of the fixed maximum frequency 666 * configured during the boot, whereas APERF counts at the 667 * rate of the actual frequency. Note that the MSRs must be 668 * read without delay, and that only the ratio between 669 * IA32_APERF and IA32_MPERF is architecturally defined. 670 * 671 * The function thus returns the percentage of the actual 672 * frequency in terms of the maximum frequency of the calling 673 * CPU since the last call. A value zero implies an error. 674 * 675 * For further details, refer to: 676 * 677 * Intel Corporation: Intel 64 and IA-32 Architectures 678 * Software Developer's Manual. Section 13.2, Volume 3A: 679 * System Programming Guide, Part 1. July, 2008. 680 * 681 * Advanced Micro Devices: BIOS and Kernel Developer's 682 * Guide (BKDG) for AMD Family 10h Processors. Section 683 * 2.4.5, Revision 3.48, April 2010. 684 */ 685 uint8_t 686 acpicpu_md_pstate_hwf(struct cpu_info *ci) 687 { 688 struct acpicpu_softc *sc; 689 uint64_t aperf, mperf; 690 uint8_t rv = 0; 691 692 sc = acpicpu_sc[ci->ci_acpiid]; 693 694 if (__predict_false(sc == NULL)) 695 return 0; 696 697 if (__predict_false((sc->sc_flags & ACPICPU_FLAG_P_HWF) == 0)) 698 return 0; 699 700 aperf = sc->sc_pstate_aperf; 701 mperf = sc->sc_pstate_mperf; 702 703 x86_disable_intr(); 704 705 sc->sc_pstate_aperf = rdmsr(MSR_APERF); 706 sc->sc_pstate_mperf = rdmsr(MSR_MPERF); 707 708 x86_enable_intr(); 709 710 aperf = sc->sc_pstate_aperf - aperf; 711 mperf = sc->sc_pstate_mperf - mperf; 712 713 if (__predict_true(mperf != 0)) 714 rv = (aperf * 100) / mperf; 715 716 return rv; 717 } 718 719 static void 720 acpicpu_md_pstate_hwf_reset(void *arg1, void *arg2) 721 { 722 struct cpu_info *ci = curcpu(); 723 struct acpicpu_softc *sc; 724 725 sc = acpicpu_sc[ci->ci_acpiid]; 726 727 if (__predict_false(sc == NULL)) 728 return; 729 730 x86_disable_intr(); 731 732 wrmsr(MSR_APERF, 0); 733 wrmsr(MSR_MPERF, 0); 734 735 x86_enable_intr(); 736 737 sc->sc_pstate_aperf = 0; 738 sc->sc_pstate_mperf = 0; 739 } 740 741 int 742 acpicpu_md_pstate_get(struct acpicpu_softc *sc, uint32_t *freq) 743 { 744 struct acpicpu_pstate *ps = NULL; 745 uint64_t val; 746 uint32_t i; 747 748 if ((sc->sc_flags & ACPICPU_FLAG_P_FIDVID) != 0) 749 return acpicpu_md_pstate_fidvid_get(sc, freq); 750 751 /* 752 * Pick any P-state for the status address. 753 */ 754 for (i = 0; i < sc->sc_pstate_count; i++) { 755 756 ps = &sc->sc_pstate[i]; 757 758 if (__predict_true(ps->ps_freq != 0)) 759 break; 760 } 761 762 if (__predict_false(ps == NULL)) 763 return ENODEV; 764 765 if (__predict_false(ps->ps_status_addr == 0)) 766 return EINVAL; 767 768 val = rdmsr(ps->ps_status_addr); 769 770 if (__predict_true(ps->ps_status_mask != 0)) 771 val = val & ps->ps_status_mask; 772 773 /* 774 * Search for the value from known P-states. 775 */ 776 for (i = 0; i < sc->sc_pstate_count; i++) { 777 778 ps = &sc->sc_pstate[i]; 779 780 if (__predict_false(ps->ps_freq == 0)) 781 continue; 782 783 if (val == ps->ps_status) { 784 *freq = ps->ps_freq; 785 return 0; 786 } 787 } 788 789 /* 790 * If the value was not found, try APERF/MPERF. 791 * The state is P0 if the return value is 100 %. 792 */ 793 if ((sc->sc_flags & ACPICPU_FLAG_P_HWF) != 0) { 794 795 KASSERT(sc->sc_pstate_count > 0); 796 KASSERT(sc->sc_pstate[0].ps_freq != 0); 797 798 if (acpicpu_md_pstate_hwf(sc->sc_ci) == 100) { 799 *freq = sc->sc_pstate[0].ps_freq; 800 return 0; 801 } 802 } 803 804 return EIO; 805 } 806 807 int 808 acpicpu_md_pstate_set(struct acpicpu_pstate *ps) 809 { 810 uint64_t val = 0; 811 812 if (__predict_false(ps->ps_control_addr == 0)) 813 return EINVAL; 814 815 if ((ps->ps_flags & ACPICPU_FLAG_P_FIDVID) != 0) 816 return acpicpu_md_pstate_fidvid_set(ps); 817 818 /* 819 * If the mask is set, do a read-modify-write. 820 */ 821 if (__predict_true(ps->ps_control_mask != 0)) { 822 val = rdmsr(ps->ps_control_addr); 823 val &= ~ps->ps_control_mask; 824 } 825 826 val |= ps->ps_control; 827 828 wrmsr(ps->ps_control_addr, val); 829 DELAY(ps->ps_latency); 830 831 return 0; 832 } 833 834 static int 835 acpicpu_md_pstate_fidvid_get(struct acpicpu_softc *sc, uint32_t *freq) 836 { 837 struct acpicpu_pstate *ps; 838 uint32_t fid, i, vid; 839 uint32_t cfid, cvid; 840 int rv; 841 842 /* 843 * AMD family 0Fh needs special treatment. 844 * While it wants to use ACPI, it does not 845 * comply with the ACPI specifications. 846 */ 847 rv = acpicpu_md_pstate_fidvid_read(&cfid, &cvid); 848 849 if (rv != 0) 850 return rv; 851 852 for (i = 0; i < sc->sc_pstate_count; i++) { 853 854 ps = &sc->sc_pstate[i]; 855 856 if (__predict_false(ps->ps_freq == 0)) 857 continue; 858 859 fid = __SHIFTOUT(ps->ps_status, ACPI_0FH_STATUS_FID); 860 vid = __SHIFTOUT(ps->ps_status, ACPI_0FH_STATUS_VID); 861 862 if (cfid == fid && cvid == vid) { 863 *freq = ps->ps_freq; 864 return 0; 865 } 866 } 867 868 return EIO; 869 } 870 871 static int 872 acpicpu_md_pstate_fidvid_set(struct acpicpu_pstate *ps) 873 { 874 const uint64_t ctrl = ps->ps_control; 875 uint32_t cfid, cvid, fid, i, irt; 876 uint32_t pll, vco_cfid, vco_fid; 877 uint32_t val, vid, vst; 878 int rv; 879 880 rv = acpicpu_md_pstate_fidvid_read(&cfid, &cvid); 881 882 if (rv != 0) 883 return rv; 884 885 fid = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_FID); 886 vid = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_VID); 887 irt = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_IRT); 888 vst = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_VST); 889 pll = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_PLL); 890 891 vst = vst * 20; 892 pll = pll * 1000 / 5; 893 irt = 10 * __BIT(irt); 894 895 /* 896 * Phase 1. 897 */ 898 while (cvid > vid) { 899 900 val = 1 << __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_MVS); 901 val = (val > cvid) ? 0 : cvid - val; 902 903 acpicpu_md_pstate_fidvid_write(cfid, val, 1, vst); 904 rv = acpicpu_md_pstate_fidvid_read(NULL, &cvid); 905 906 if (rv != 0) 907 return rv; 908 } 909 910 i = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_RVO); 911 912 for (; i > 0 && cvid > 0; --i) { 913 914 acpicpu_md_pstate_fidvid_write(cfid, cvid - 1, 1, vst); 915 rv = acpicpu_md_pstate_fidvid_read(NULL, &cvid); 916 917 if (rv != 0) 918 return rv; 919 } 920 921 /* 922 * Phase 2. 923 */ 924 if (cfid != fid) { 925 926 vco_fid = FID_TO_VCO_FID(fid); 927 vco_cfid = FID_TO_VCO_FID(cfid); 928 929 while (abs(vco_fid - vco_cfid) > 2) { 930 931 if (fid <= cfid) 932 val = cfid - 2; 933 else { 934 val = (cfid > 6) ? cfid + 2 : 935 FID_TO_VCO_FID(cfid) + 2; 936 } 937 938 acpicpu_md_pstate_fidvid_write(val, cvid, pll, irt); 939 rv = acpicpu_md_pstate_fidvid_read(&cfid, NULL); 940 941 if (rv != 0) 942 return rv; 943 944 vco_cfid = FID_TO_VCO_FID(cfid); 945 } 946 947 acpicpu_md_pstate_fidvid_write(fid, cvid, pll, irt); 948 rv = acpicpu_md_pstate_fidvid_read(&cfid, NULL); 949 950 if (rv != 0) 951 return rv; 952 } 953 954 /* 955 * Phase 3. 956 */ 957 if (cvid != vid) { 958 959 acpicpu_md_pstate_fidvid_write(cfid, vid, 1, vst); 960 rv = acpicpu_md_pstate_fidvid_read(NULL, &cvid); 961 962 if (rv != 0) 963 return rv; 964 } 965 966 return 0; 967 } 968 969 static int 970 acpicpu_md_pstate_fidvid_read(uint32_t *cfid, uint32_t *cvid) 971 { 972 int i = ACPICPU_P_STATE_RETRY * 100; 973 uint64_t val; 974 975 do { 976 val = rdmsr(MSR_0FH_STATUS); 977 978 } while (__SHIFTOUT(val, MSR_0FH_STATUS_PENDING) != 0 && --i >= 0); 979 980 if (i == 0) 981 return EAGAIN; 982 983 if (cfid != NULL) 984 *cfid = __SHIFTOUT(val, MSR_0FH_STATUS_CFID); 985 986 if (cvid != NULL) 987 *cvid = __SHIFTOUT(val, MSR_0FH_STATUS_CVID); 988 989 return 0; 990 } 991 992 static void 993 acpicpu_md_pstate_fidvid_write(uint32_t fid, 994 uint32_t vid, uint32_t cnt, uint32_t tmo) 995 { 996 uint64_t val = 0; 997 998 val |= __SHIFTIN(fid, MSR_0FH_CONTROL_FID); 999 val |= __SHIFTIN(vid, MSR_0FH_CONTROL_VID); 1000 val |= __SHIFTIN(cnt, MSR_0FH_CONTROL_CNT); 1001 val |= __SHIFTIN(0x1, MSR_0FH_CONTROL_CHG); 1002 1003 wrmsr(MSR_0FH_CONTROL, val); 1004 DELAY(tmo); 1005 } 1006 1007 int 1008 acpicpu_md_tstate_get(struct acpicpu_softc *sc, uint32_t *percent) 1009 { 1010 struct acpicpu_tstate *ts; 1011 uint64_t val; 1012 uint32_t i; 1013 1014 val = rdmsr(MSR_THERM_CONTROL); 1015 1016 for (i = 0; i < sc->sc_tstate_count; i++) { 1017 1018 ts = &sc->sc_tstate[i]; 1019 1020 if (ts->ts_percent == 0) 1021 continue; 1022 1023 if (val == ts->ts_status) { 1024 *percent = ts->ts_percent; 1025 return 0; 1026 } 1027 } 1028 1029 return EIO; 1030 } 1031 1032 int 1033 acpicpu_md_tstate_set(struct acpicpu_tstate *ts) 1034 { 1035 uint64_t val; 1036 uint8_t i; 1037 1038 val = ts->ts_control; 1039 val = val & __BITS(1, 4); 1040 1041 wrmsr(MSR_THERM_CONTROL, val); 1042 1043 if (ts->ts_status == 0) { 1044 DELAY(ts->ts_latency); 1045 return 0; 1046 } 1047 1048 for (i = val = 0; i < ACPICPU_T_STATE_RETRY; i++) { 1049 1050 val = rdmsr(MSR_THERM_CONTROL); 1051 1052 if (val == ts->ts_status) 1053 return 0; 1054 1055 DELAY(ts->ts_latency); 1056 } 1057 1058 return EAGAIN; 1059 } 1060 1061 /* 1062 * A kludge for backwards compatibility. 1063 */ 1064 static int 1065 acpicpu_md_pstate_sysctl_init(void) 1066 { 1067 const struct sysctlnode *fnode, *mnode, *rnode; 1068 const char *str; 1069 int rv; 1070 1071 switch (cpu_vendor) { 1072 1073 case CPUVENDOR_IDT: 1074 case CPUVENDOR_INTEL: 1075 str = "est"; 1076 break; 1077 1078 case CPUVENDOR_AMD: 1079 str = "powernow"; 1080 break; 1081 1082 default: 1083 return ENODEV; 1084 } 1085 1086 1087 rv = sysctl_createv(&acpicpu_log, 0, NULL, &rnode, 1088 CTLFLAG_PERMANENT, CTLTYPE_NODE, "machdep", NULL, 1089 NULL, 0, NULL, 0, CTL_MACHDEP, CTL_EOL); 1090 1091 if (rv != 0) 1092 goto fail; 1093 1094 rv = sysctl_createv(&acpicpu_log, 0, &rnode, &mnode, 1095 0, CTLTYPE_NODE, str, NULL, 1096 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL); 1097 1098 if (rv != 0) 1099 goto fail; 1100 1101 rv = sysctl_createv(&acpicpu_log, 0, &mnode, &fnode, 1102 0, CTLTYPE_NODE, "frequency", NULL, 1103 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL); 1104 1105 if (rv != 0) 1106 goto fail; 1107 1108 rv = sysctl_createv(&acpicpu_log, 0, &fnode, &rnode, 1109 CTLFLAG_READWRITE, CTLTYPE_INT, "target", NULL, 1110 acpicpu_md_pstate_sysctl_set, 0, NULL, 0, CTL_CREATE, CTL_EOL); 1111 1112 if (rv != 0) 1113 goto fail; 1114 1115 rv = sysctl_createv(&acpicpu_log, 0, &fnode, &rnode, 1116 CTLFLAG_READONLY, CTLTYPE_INT, "current", NULL, 1117 acpicpu_md_pstate_sysctl_get, 0, NULL, 0, CTL_CREATE, CTL_EOL); 1118 1119 if (rv != 0) 1120 goto fail; 1121 1122 rv = sysctl_createv(&acpicpu_log, 0, &fnode, &rnode, 1123 CTLFLAG_READONLY, CTLTYPE_STRING, "available", NULL, 1124 acpicpu_md_pstate_sysctl_all, 0, NULL, 0, CTL_CREATE, CTL_EOL); 1125 1126 if (rv != 0) 1127 goto fail; 1128 1129 return 0; 1130 1131 fail: 1132 if (acpicpu_log != NULL) { 1133 sysctl_teardown(&acpicpu_log); 1134 acpicpu_log = NULL; 1135 } 1136 1137 return rv; 1138 } 1139 1140 static int 1141 acpicpu_md_pstate_sysctl_get(SYSCTLFN_ARGS) 1142 { 1143 struct sysctlnode node; 1144 uint32_t freq; 1145 int err; 1146 1147 freq = cpufreq_get(curcpu()); 1148 1149 if (freq == 0) 1150 return ENXIO; 1151 1152 node = *rnode; 1153 node.sysctl_data = &freq; 1154 1155 err = sysctl_lookup(SYSCTLFN_CALL(&node)); 1156 1157 if (err != 0 || newp == NULL) 1158 return err; 1159 1160 return 0; 1161 } 1162 1163 static int 1164 acpicpu_md_pstate_sysctl_set(SYSCTLFN_ARGS) 1165 { 1166 struct sysctlnode node; 1167 uint32_t freq; 1168 int err; 1169 1170 freq = cpufreq_get(curcpu()); 1171 1172 if (freq == 0) 1173 return ENXIO; 1174 1175 node = *rnode; 1176 node.sysctl_data = &freq; 1177 1178 err = sysctl_lookup(SYSCTLFN_CALL(&node)); 1179 1180 if (err != 0 || newp == NULL) 1181 return err; 1182 1183 cpufreq_set_all(freq); 1184 1185 return 0; 1186 } 1187 1188 static int 1189 acpicpu_md_pstate_sysctl_all(SYSCTLFN_ARGS) 1190 { 1191 struct cpu_info *ci = curcpu(); 1192 struct acpicpu_softc *sc; 1193 struct sysctlnode node; 1194 char buf[1024]; 1195 size_t len; 1196 uint32_t i; 1197 int err; 1198 1199 sc = acpicpu_sc[ci->ci_acpiid]; 1200 1201 if (sc == NULL) 1202 return ENXIO; 1203 1204 (void)memset(&buf, 0, sizeof(buf)); 1205 1206 mutex_enter(&sc->sc_mtx); 1207 1208 for (len = 0, i = sc->sc_pstate_max; i < sc->sc_pstate_count; i++) { 1209 1210 if (sc->sc_pstate[i].ps_freq == 0) 1211 continue; 1212 1213 len += snprintf(buf + len, sizeof(buf) - len, "%u%s", 1214 sc->sc_pstate[i].ps_freq, 1215 i < (sc->sc_pstate_count - 1) ? " " : ""); 1216 } 1217 1218 mutex_exit(&sc->sc_mtx); 1219 1220 node = *rnode; 1221 node.sysctl_data = buf; 1222 1223 err = sysctl_lookup(SYSCTLFN_CALL(&node)); 1224 1225 if (err != 0 || newp == NULL) 1226 return err; 1227 1228 return 0; 1229 } 1230 1231