1 /* $NetBSD: acpi_cpu_md.c,v 1.82 2020/03/14 13:50:46 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 2010, 2011 Jukka Ruohonen <jruohonen@iki.fi> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 #include <sys/cdefs.h> 30 __KERNEL_RCSID(0, "$NetBSD: acpi_cpu_md.c,v 1.82 2020/03/14 13:50:46 ad Exp $"); 31 32 #include <sys/param.h> 33 #include <sys/bus.h> 34 #include <sys/cpufreq.h> 35 #include <sys/device.h> 36 #include <sys/kcore.h> 37 #include <sys/sysctl.h> 38 #include <sys/xcall.h> 39 40 #include <x86/cpu.h> 41 #include <x86/cpufunc.h> 42 #include <x86/cputypes.h> 43 #include <x86/cpuvar.h> 44 #include <x86/machdep.h> 45 #include <x86/x86/tsc.h> 46 47 #include <dev/acpi/acpica.h> 48 #include <dev/acpi/acpi_cpu.h> 49 50 #include <dev/pci/pcivar.h> 51 #include <dev/pci/pcidevs.h> 52 53 #include <machine/acpi_machdep.h> 54 55 /* 56 * Intel IA32_MISC_ENABLE. 57 */ 58 #define MSR_MISC_ENABLE_EST __BIT(16) 59 #define MSR_MISC_ENABLE_TURBO __BIT(38) 60 61 /* 62 * AMD C1E. 63 */ 64 #define MSR_CMPHALT 0xc0010055 65 66 #define MSR_CMPHALT_SMI __BIT(27) 67 #define MSR_CMPHALT_C1E __BIT(28) 68 #define MSR_CMPHALT_BMSTS __BIT(29) 69 70 /* 71 * AMD families 10h, 11h, 12h, 14h, and 15h. 72 */ 73 #define MSR_10H_LIMIT 0xc0010061 74 #define MSR_10H_CONTROL 0xc0010062 75 #define MSR_10H_STATUS 0xc0010063 76 #define MSR_10H_CONFIG 0xc0010064 77 78 /* 79 * AMD family 0Fh. 80 */ 81 #define MSR_0FH_CONTROL 0xc0010041 82 #define MSR_0FH_STATUS 0xc0010042 83 84 #define MSR_0FH_STATUS_CFID __BITS( 0, 5) 85 #define MSR_0FH_STATUS_CVID __BITS(32, 36) 86 #define MSR_0FH_STATUS_PENDING __BITS(31, 31) 87 88 #define MSR_0FH_CONTROL_FID __BITS( 0, 5) 89 #define MSR_0FH_CONTROL_VID __BITS( 8, 12) 90 #define MSR_0FH_CONTROL_CHG __BITS(16, 16) 91 #define MSR_0FH_CONTROL_CNT __BITS(32, 51) 92 93 #define ACPI_0FH_STATUS_FID __BITS( 0, 5) 94 #define ACPI_0FH_STATUS_VID __BITS( 6, 10) 95 96 #define ACPI_0FH_CONTROL_FID __BITS( 0, 5) 97 #define ACPI_0FH_CONTROL_VID __BITS( 6, 10) 98 #define ACPI_0FH_CONTROL_VST __BITS(11, 17) 99 #define ACPI_0FH_CONTROL_MVS __BITS(18, 19) 100 #define ACPI_0FH_CONTROL_PLL __BITS(20, 26) 101 #define ACPI_0FH_CONTROL_RVO __BITS(28, 29) 102 #define ACPI_0FH_CONTROL_IRT __BITS(30, 31) 103 104 #define FID_TO_VCO_FID(fidd) (((fid) < 8) ? (8 + ((fid) << 1)) : (fid)) 105 106 #ifdef ACPICPU_ENABLE_C3 107 static char native_idle_text[16]; 108 void (*native_idle)(void) = NULL; 109 #endif 110 111 static int acpicpu_md_quirk_piix4(const struct pci_attach_args *); 112 static void acpicpu_md_pstate_hwf_reset(void *, void *); 113 static int acpicpu_md_pstate_fidvid_get(struct acpicpu_softc *, 114 uint32_t *); 115 static int acpicpu_md_pstate_fidvid_set(struct acpicpu_pstate *); 116 static int acpicpu_md_pstate_fidvid_read(uint32_t *, uint32_t *); 117 static void acpicpu_md_pstate_fidvid_write(uint32_t, uint32_t, 118 uint32_t, uint32_t); 119 static int acpicpu_md_pstate_sysctl_init(void); 120 static int acpicpu_md_pstate_sysctl_get(SYSCTLFN_PROTO); 121 static int acpicpu_md_pstate_sysctl_set(SYSCTLFN_PROTO); 122 static int acpicpu_md_pstate_sysctl_all(SYSCTLFN_PROTO); 123 124 extern struct acpicpu_softc **acpicpu_sc; 125 static struct sysctllog *acpicpu_log = NULL; 126 127 struct cpu_info * 128 acpicpu_md_match(device_t parent, cfdata_t match, void *aux) 129 { 130 struct cpufeature_attach_args *cfaa = aux; 131 132 if (strcmp(cfaa->name, "frequency") != 0) 133 return NULL; 134 135 return cfaa->ci; 136 } 137 138 struct cpu_info * 139 acpicpu_md_attach(device_t parent, device_t self, void *aux) 140 { 141 struct cpufeature_attach_args *cfaa = aux; 142 143 return cfaa->ci; 144 } 145 146 uint32_t 147 acpicpu_md_flags(void) 148 { 149 struct cpu_info *ci = curcpu(); 150 struct pci_attach_args pa; 151 uint32_t family, val = 0; 152 uint32_t regs[4]; 153 uint64_t msr; 154 155 if (acpi_md_ncpus() == 1) 156 val |= ACPICPU_FLAG_C_BM; 157 158 if ((ci->ci_feat_val[1] & CPUID2_MONITOR) != 0) 159 val |= ACPICPU_FLAG_C_FFH; 160 161 /* 162 * By default, assume that the local APIC timer 163 * as well as TSC are stalled during C3 sleep. 164 */ 165 val |= ACPICPU_FLAG_C_APIC | ACPICPU_FLAG_C_TSC; 166 167 /* 168 * Detect whether TSC is invariant. If it is not, we keep the flag to 169 * note that TSC will not run at constant rate. Depending on the CPU, 170 * this may affect P- and T-state changes, but especially relevant 171 * are C-states; with variant TSC, states larger than C1 may 172 * completely stop the counter. 173 */ 174 if (tsc_is_invariant()) 175 val &= ~ACPICPU_FLAG_C_TSC; 176 177 switch (cpu_vendor) { 178 179 case CPUVENDOR_IDT: 180 181 if ((ci->ci_feat_val[1] & CPUID2_EST) != 0) 182 val |= ACPICPU_FLAG_P_FFH; 183 184 if ((ci->ci_feat_val[0] & CPUID_ACPI) != 0) 185 val |= ACPICPU_FLAG_T_FFH; 186 187 break; 188 189 case CPUVENDOR_INTEL: 190 191 /* 192 * Bus master control and arbitration should be 193 * available on all supported Intel CPUs (to be 194 * sure, this is double-checked later from the 195 * firmware data). These flags imply that it is 196 * not necessary to flush caches before C3 state. 197 */ 198 val |= ACPICPU_FLAG_C_BM | ACPICPU_FLAG_C_ARB; 199 200 /* 201 * Check if we can use "native", MSR-based, 202 * access. If not, we have to resort to I/O. 203 */ 204 if ((ci->ci_feat_val[1] & CPUID2_EST) != 0) 205 val |= ACPICPU_FLAG_P_FFH; 206 207 if ((ci->ci_feat_val[0] & CPUID_ACPI) != 0) 208 val |= ACPICPU_FLAG_T_FFH; 209 210 /* 211 * Check whether MSR_APERF, MSR_MPERF, and Turbo 212 * Boost are available. Also see if we might have 213 * an invariant local APIC timer ("ARAT"). 214 */ 215 if (cpuid_level >= 0x06) { 216 217 x86_cpuid(0x00000006, regs); 218 219 if ((regs[2] & CPUID_DSPM_HWF) != 0) 220 val |= ACPICPU_FLAG_P_HWF; 221 222 if ((regs[0] & CPUID_DSPM_IDA) != 0) 223 val |= ACPICPU_FLAG_P_TURBO; 224 225 if ((regs[0] & CPUID_DSPM_ARAT) != 0) 226 val &= ~ACPICPU_FLAG_C_APIC; 227 228 } 229 230 break; 231 232 case CPUVENDOR_AMD: 233 234 x86_cpuid(0x80000000, regs); 235 236 if (regs[0] < 0x80000007) 237 break; 238 239 x86_cpuid(0x80000007, regs); 240 241 family = CPUID_TO_FAMILY(ci->ci_signature); 242 243 switch (family) { 244 245 case 0x0f: 246 247 /* 248 * Disable C1E if present. 249 */ 250 if (rdmsr_safe(MSR_CMPHALT, &msr) != EFAULT) 251 val |= ACPICPU_FLAG_C_C1E; 252 253 /* 254 * Evaluate support for the "FID/VID 255 * algorithm" also used by powernow(4). 256 */ 257 if ((regs[3] & CPUID_APM_FID) == 0) 258 break; 259 260 if ((regs[3] & CPUID_APM_VID) == 0) 261 break; 262 263 val |= ACPICPU_FLAG_P_FFH | ACPICPU_FLAG_P_FIDVID; 264 break; 265 266 case 0x10: 267 case 0x11: 268 269 /* 270 * Disable C1E if present. 271 */ 272 if (rdmsr_safe(MSR_CMPHALT, &msr) != EFAULT) 273 val |= ACPICPU_FLAG_C_C1E; 274 275 /* FALLTHROUGH */ 276 277 case 0x12: 278 case 0x14: /* AMD Fusion */ 279 case 0x15: /* AMD Bulldozer */ 280 281 /* 282 * Like with Intel, detect MSR-based P-states, 283 * and AMD's "turbo" (Core Performance Boost), 284 * respectively. 285 */ 286 if ((regs[3] & CPUID_APM_HWP) != 0) 287 val |= ACPICPU_FLAG_P_FFH; 288 289 if ((regs[3] & CPUID_APM_CPB) != 0) 290 val |= ACPICPU_FLAG_P_TURBO; 291 292 /* 293 * Also check for APERF and MPERF, 294 * first available in the family 10h. 295 */ 296 if (cpuid_level >= 0x06) { 297 298 x86_cpuid(0x00000006, regs); 299 300 if ((regs[2] & CPUID_DSPM_HWF) != 0) 301 val |= ACPICPU_FLAG_P_HWF; 302 } 303 304 break; 305 } 306 307 break; 308 } 309 310 /* 311 * There are several erratums for PIIX4. 312 */ 313 if (pci_find_device(&pa, acpicpu_md_quirk_piix4) != 0) 314 val |= ACPICPU_FLAG_PIIX4; 315 316 return val; 317 } 318 319 static int 320 acpicpu_md_quirk_piix4(const struct pci_attach_args *pa) 321 { 322 323 /* 324 * XXX: The pci_find_device(9) function only 325 * deals with attached devices. Change this 326 * to use something like pci_device_foreach(). 327 */ 328 if (PCI_VENDOR(pa->pa_id) != PCI_VENDOR_INTEL) 329 return 0; 330 331 if (PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_INTEL_82371AB_ISA || 332 PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_INTEL_82440MX_PMC) 333 return 1; 334 335 return 0; 336 } 337 338 void 339 acpicpu_md_quirk_c1e(void) 340 { 341 const uint64_t c1e = MSR_CMPHALT_SMI | MSR_CMPHALT_C1E; 342 uint64_t val; 343 344 val = rdmsr(MSR_CMPHALT); 345 346 if ((val & c1e) != 0) 347 wrmsr(MSR_CMPHALT, val & ~c1e); 348 } 349 350 int 351 acpicpu_md_cstate_start(struct acpicpu_softc *sc) 352 { 353 #ifdef ACPICPU_ENABLE_C3 354 /* 355 * XXX There are performance problems with the ACPI idle loop, and 356 * it does not enter deep sleep. Once those are resolved it'll be 357 * re-enabled. 358 */ 359 const size_t size = sizeof(native_idle_text); 360 struct acpicpu_cstate *cs; 361 bool ipi = false; 362 int i; 363 364 /* 365 * Save the cpu_idle(9) loop used by default. 366 */ 367 x86_cpu_idle_get(&native_idle, native_idle_text, size); 368 369 for (i = 0; i < ACPI_C_STATE_COUNT; i++) { 370 371 cs = &sc->sc_cstate[i]; 372 373 if (cs->cs_method == ACPICPU_C_STATE_HALT) { 374 ipi = true; 375 break; 376 } 377 } 378 379 x86_cpu_idle_set(acpicpu_cstate_idle, "acpi", ipi); 380 #endif /* ACPICPU_ENABLE_C3 */ 381 382 return 0; 383 } 384 385 int 386 acpicpu_md_cstate_stop(void) 387 { 388 #ifdef ACPICPU_ENABLE_C3 389 /* 390 * XXX There are performance problems with the ACPI idle loop, and 391 * it does not enter deep sleep. Once those are resolved it'll be 392 * re-enabled. 393 */ 394 static char text[16]; 395 void (*func)(void); 396 bool ipi; 397 398 x86_cpu_idle_get(&func, text, sizeof(text)); 399 400 if (func == native_idle) 401 return EALREADY; 402 403 ipi = (native_idle != x86_cpu_idle_halt) ? false : true; 404 x86_cpu_idle_set(native_idle, native_idle_text, ipi); 405 406 /* 407 * Run a cross-call to ensure that all CPUs are 408 * out from the ACPI idle-loop before detachment. 409 */ 410 xc_barrier(0); 411 #endif /* ACPICPU_ENABLE_C3 */ 412 413 return 0; 414 } 415 416 /* 417 * Called with interrupts enabled. 418 */ 419 void __nocsan 420 acpicpu_md_cstate_enter(int method, int state) 421 { 422 struct cpu_info *ci = curcpu(); 423 424 KASSERT(ci->ci_ilevel == IPL_NONE); 425 426 switch (method) { 427 428 case ACPICPU_C_STATE_FFH: 429 430 x86_monitor(&ci->ci_want_resched, 0, 0); 431 432 if (__predict_false(ci->ci_want_resched != 0)) 433 return; 434 435 x86_mwait((state - 1) << 4, 0); 436 break; 437 438 case ACPICPU_C_STATE_HALT: 439 440 x86_disable_intr(); 441 442 if (__predict_false(ci->ci_want_resched != 0)) { 443 x86_enable_intr(); 444 return; 445 } 446 447 x86_stihlt(); 448 break; 449 } 450 } 451 452 int 453 acpicpu_md_pstate_start(struct acpicpu_softc *sc) 454 { 455 uint64_t xc, val; 456 457 switch (cpu_vendor) { 458 459 case CPUVENDOR_IDT: 460 case CPUVENDOR_INTEL: 461 462 /* 463 * Make sure EST is enabled. 464 */ 465 if ((sc->sc_flags & ACPICPU_FLAG_P_FFH) != 0) { 466 467 val = rdmsr(MSR_MISC_ENABLE); 468 469 if ((val & MSR_MISC_ENABLE_EST) == 0) { 470 471 val |= MSR_MISC_ENABLE_EST; 472 wrmsr(MSR_MISC_ENABLE, val); 473 val = rdmsr(MSR_MISC_ENABLE); 474 475 if ((val & MSR_MISC_ENABLE_EST) == 0) 476 return ENOTTY; 477 } 478 } 479 } 480 481 /* 482 * Reset the APERF and MPERF counters. 483 */ 484 if ((sc->sc_flags & ACPICPU_FLAG_P_HWF) != 0) { 485 xc = xc_broadcast(0, acpicpu_md_pstate_hwf_reset, NULL, NULL); 486 xc_wait(xc); 487 } 488 489 return acpicpu_md_pstate_sysctl_init(); 490 } 491 492 int 493 acpicpu_md_pstate_stop(void) 494 { 495 496 if (acpicpu_log == NULL) 497 return EALREADY; 498 499 sysctl_teardown(&acpicpu_log); 500 acpicpu_log = NULL; 501 502 return 0; 503 } 504 505 int 506 acpicpu_md_pstate_init(struct acpicpu_softc *sc) 507 { 508 struct cpu_info *ci = sc->sc_ci; 509 struct acpicpu_pstate *ps, msr; 510 uint32_t family, i = 0; 511 512 (void)memset(&msr, 0, sizeof(struct acpicpu_pstate)); 513 514 switch (cpu_vendor) { 515 516 case CPUVENDOR_IDT: 517 case CPUVENDOR_INTEL: 518 519 /* 520 * If the so-called Turbo Boost is present, 521 * the P0-state is always the "turbo state". 522 * It is shown as the P1 frequency + 1 MHz. 523 * 524 * For discussion, see: 525 * 526 * Intel Corporation: Intel Turbo Boost Technology 527 * in Intel Core(tm) Microarchitectures (Nehalem) 528 * Based Processors. White Paper, November 2008. 529 */ 530 if (sc->sc_pstate_count >= 2 && 531 (sc->sc_flags & ACPICPU_FLAG_P_TURBO) != 0) { 532 533 ps = &sc->sc_pstate[0]; 534 535 if (ps->ps_freq == sc->sc_pstate[1].ps_freq + 1) 536 ps->ps_flags |= ACPICPU_FLAG_P_TURBO; 537 } 538 539 msr.ps_control_addr = MSR_PERF_CTL; 540 msr.ps_control_mask = __BITS(0, 15); 541 542 msr.ps_status_addr = MSR_PERF_STATUS; 543 msr.ps_status_mask = __BITS(0, 15); 544 break; 545 546 case CPUVENDOR_AMD: 547 548 if ((sc->sc_flags & ACPICPU_FLAG_P_FIDVID) != 0) 549 msr.ps_flags |= ACPICPU_FLAG_P_FIDVID; 550 551 family = CPUID_TO_FAMILY(ci->ci_signature); 552 553 switch (family) { 554 555 case 0x0f: 556 msr.ps_control_addr = MSR_0FH_CONTROL; 557 msr.ps_status_addr = MSR_0FH_STATUS; 558 break; 559 560 case 0x10: 561 case 0x11: 562 case 0x12: 563 case 0x14: 564 case 0x15: 565 msr.ps_control_addr = MSR_10H_CONTROL; 566 msr.ps_control_mask = __BITS(0, 2); 567 568 msr.ps_status_addr = MSR_10H_STATUS; 569 msr.ps_status_mask = __BITS(0, 2); 570 break; 571 572 default: 573 /* 574 * If we have an unknown AMD CPU, rely on XPSS. 575 */ 576 if ((sc->sc_flags & ACPICPU_FLAG_P_XPSS) == 0) 577 return EOPNOTSUPP; 578 } 579 580 break; 581 582 default: 583 return ENODEV; 584 } 585 586 /* 587 * Fill the P-state structures with MSR addresses that are 588 * known to be correct. If we do not know the addresses, 589 * leave the values intact. If a vendor uses XPSS, we do 590 * not necessarily need to do anything to support new CPUs. 591 */ 592 while (i < sc->sc_pstate_count) { 593 594 ps = &sc->sc_pstate[i]; 595 596 if (msr.ps_flags != 0) 597 ps->ps_flags |= msr.ps_flags; 598 599 if (msr.ps_status_addr != 0) 600 ps->ps_status_addr = msr.ps_status_addr; 601 602 if (msr.ps_status_mask != 0) 603 ps->ps_status_mask = msr.ps_status_mask; 604 605 if (msr.ps_control_addr != 0) 606 ps->ps_control_addr = msr.ps_control_addr; 607 608 if (msr.ps_control_mask != 0) 609 ps->ps_control_mask = msr.ps_control_mask; 610 611 i++; 612 } 613 614 return 0; 615 } 616 617 /* 618 * Read the IA32_APERF and IA32_MPERF counters. The first 619 * increments at the rate of the fixed maximum frequency 620 * configured during the boot, whereas APERF counts at the 621 * rate of the actual frequency. Note that the MSRs must be 622 * read without delay, and that only the ratio between 623 * IA32_APERF and IA32_MPERF is architecturally defined. 624 * 625 * The function thus returns the percentage of the actual 626 * frequency in terms of the maximum frequency of the calling 627 * CPU since the last call. A value zero implies an error. 628 * 629 * For further details, refer to: 630 * 631 * Intel Corporation: Intel 64 and IA-32 Architectures 632 * Software Developer's Manual. Section 13.2, Volume 3A: 633 * System Programming Guide, Part 1. July, 2008. 634 * 635 * Advanced Micro Devices: BIOS and Kernel Developer's 636 * Guide (BKDG) for AMD Family 10h Processors. Section 637 * 2.4.5, Revision 3.48, April 2010. 638 */ 639 uint8_t 640 acpicpu_md_pstate_hwf(struct cpu_info *ci) 641 { 642 struct acpicpu_softc *sc; 643 uint64_t aperf, mperf; 644 uint8_t rv = 0; 645 646 sc = acpicpu_sc[ci->ci_acpiid]; 647 648 if (__predict_false(sc == NULL)) 649 return 0; 650 651 if (__predict_false((sc->sc_flags & ACPICPU_FLAG_P_HWF) == 0)) 652 return 0; 653 654 aperf = sc->sc_pstate_aperf; 655 mperf = sc->sc_pstate_mperf; 656 657 x86_disable_intr(); 658 659 sc->sc_pstate_aperf = rdmsr(MSR_APERF); 660 sc->sc_pstate_mperf = rdmsr(MSR_MPERF); 661 662 x86_enable_intr(); 663 664 aperf = sc->sc_pstate_aperf - aperf; 665 mperf = sc->sc_pstate_mperf - mperf; 666 667 if (__predict_true(mperf != 0)) 668 rv = (aperf * 100) / mperf; 669 670 return rv; 671 } 672 673 static void 674 acpicpu_md_pstate_hwf_reset(void *arg1, void *arg2) 675 { 676 struct cpu_info *ci = curcpu(); 677 struct acpicpu_softc *sc; 678 679 sc = acpicpu_sc[ci->ci_acpiid]; 680 681 if (__predict_false(sc == NULL)) 682 return; 683 684 x86_disable_intr(); 685 686 wrmsr(MSR_APERF, 0); 687 wrmsr(MSR_MPERF, 0); 688 689 x86_enable_intr(); 690 691 sc->sc_pstate_aperf = 0; 692 sc->sc_pstate_mperf = 0; 693 } 694 695 int 696 acpicpu_md_pstate_get(struct acpicpu_softc *sc, uint32_t *freq) 697 { 698 struct acpicpu_pstate *ps = NULL; 699 uint64_t val; 700 uint32_t i; 701 702 if ((sc->sc_flags & ACPICPU_FLAG_P_FIDVID) != 0) 703 return acpicpu_md_pstate_fidvid_get(sc, freq); 704 705 /* 706 * Pick any P-state for the status address. 707 */ 708 for (i = 0; i < sc->sc_pstate_count; i++) { 709 710 ps = &sc->sc_pstate[i]; 711 712 if (__predict_true(ps->ps_freq != 0)) 713 break; 714 } 715 716 if (__predict_false(ps == NULL)) 717 return ENODEV; 718 719 if (__predict_false(ps->ps_status_addr == 0)) 720 return EINVAL; 721 722 val = rdmsr(ps->ps_status_addr); 723 724 if (__predict_true(ps->ps_status_mask != 0)) 725 val = val & ps->ps_status_mask; 726 727 /* 728 * Search for the value from known P-states. 729 */ 730 for (i = 0; i < sc->sc_pstate_count; i++) { 731 732 ps = &sc->sc_pstate[i]; 733 734 if (__predict_false(ps->ps_freq == 0)) 735 continue; 736 737 if (val == ps->ps_status) { 738 *freq = ps->ps_freq; 739 return 0; 740 } 741 } 742 743 /* 744 * If the value was not found, try APERF/MPERF. 745 * The state is P0 if the return value is 100 %. 746 */ 747 if ((sc->sc_flags & ACPICPU_FLAG_P_HWF) != 0) { 748 749 KASSERT(sc->sc_pstate_count > 0); 750 KASSERT(sc->sc_pstate[0].ps_freq != 0); 751 752 if (acpicpu_md_pstate_hwf(sc->sc_ci) == 100) { 753 *freq = sc->sc_pstate[0].ps_freq; 754 return 0; 755 } 756 } 757 758 return EIO; 759 } 760 761 int 762 acpicpu_md_pstate_set(struct acpicpu_pstate *ps) 763 { 764 uint64_t val = 0; 765 766 if (__predict_false(ps->ps_control_addr == 0)) 767 return EINVAL; 768 769 if ((ps->ps_flags & ACPICPU_FLAG_P_FIDVID) != 0) 770 return acpicpu_md_pstate_fidvid_set(ps); 771 772 /* 773 * If the mask is set, do a read-modify-write. 774 */ 775 if (__predict_true(ps->ps_control_mask != 0)) { 776 val = rdmsr(ps->ps_control_addr); 777 val &= ~ps->ps_control_mask; 778 } 779 780 val |= ps->ps_control; 781 782 wrmsr(ps->ps_control_addr, val); 783 DELAY(ps->ps_latency); 784 785 return 0; 786 } 787 788 static int 789 acpicpu_md_pstate_fidvid_get(struct acpicpu_softc *sc, uint32_t *freq) 790 { 791 struct acpicpu_pstate *ps; 792 uint32_t fid, i, vid; 793 uint32_t cfid, cvid; 794 int rv; 795 796 /* 797 * AMD family 0Fh needs special treatment. 798 * While it wants to use ACPI, it does not 799 * comply with the ACPI specifications. 800 */ 801 rv = acpicpu_md_pstate_fidvid_read(&cfid, &cvid); 802 803 if (rv != 0) 804 return rv; 805 806 for (i = 0; i < sc->sc_pstate_count; i++) { 807 808 ps = &sc->sc_pstate[i]; 809 810 if (__predict_false(ps->ps_freq == 0)) 811 continue; 812 813 fid = __SHIFTOUT(ps->ps_status, ACPI_0FH_STATUS_FID); 814 vid = __SHIFTOUT(ps->ps_status, ACPI_0FH_STATUS_VID); 815 816 if (cfid == fid && cvid == vid) { 817 *freq = ps->ps_freq; 818 return 0; 819 } 820 } 821 822 return EIO; 823 } 824 825 static int 826 acpicpu_md_pstate_fidvid_set(struct acpicpu_pstate *ps) 827 { 828 const uint64_t ctrl = ps->ps_control; 829 uint32_t cfid, cvid, fid, i, irt; 830 uint32_t pll, vco_cfid, vco_fid; 831 uint32_t val, vid, vst; 832 int rv; 833 834 rv = acpicpu_md_pstate_fidvid_read(&cfid, &cvid); 835 836 if (rv != 0) 837 return rv; 838 839 fid = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_FID); 840 vid = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_VID); 841 irt = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_IRT); 842 vst = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_VST); 843 pll = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_PLL); 844 845 vst = vst * 20; 846 pll = pll * 1000 / 5; 847 irt = 10 * __BIT(irt); 848 849 /* 850 * Phase 1. 851 */ 852 while (cvid > vid) { 853 854 val = 1 << __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_MVS); 855 val = (val > cvid) ? 0 : cvid - val; 856 857 acpicpu_md_pstate_fidvid_write(cfid, val, 1, vst); 858 rv = acpicpu_md_pstate_fidvid_read(NULL, &cvid); 859 860 if (rv != 0) 861 return rv; 862 } 863 864 i = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_RVO); 865 866 for (; i > 0 && cvid > 0; --i) { 867 868 acpicpu_md_pstate_fidvid_write(cfid, cvid - 1, 1, vst); 869 rv = acpicpu_md_pstate_fidvid_read(NULL, &cvid); 870 871 if (rv != 0) 872 return rv; 873 } 874 875 /* 876 * Phase 2. 877 */ 878 if (cfid != fid) { 879 880 vco_fid = FID_TO_VCO_FID(fid); 881 vco_cfid = FID_TO_VCO_FID(cfid); 882 883 while (abs(vco_fid - vco_cfid) > 2) { 884 885 if (fid <= cfid) 886 val = cfid - 2; 887 else { 888 val = (cfid > 6) ? cfid + 2 : 889 FID_TO_VCO_FID(cfid) + 2; 890 } 891 892 acpicpu_md_pstate_fidvid_write(val, cvid, pll, irt); 893 rv = acpicpu_md_pstate_fidvid_read(&cfid, NULL); 894 895 if (rv != 0) 896 return rv; 897 898 vco_cfid = FID_TO_VCO_FID(cfid); 899 } 900 901 acpicpu_md_pstate_fidvid_write(fid, cvid, pll, irt); 902 rv = acpicpu_md_pstate_fidvid_read(&cfid, NULL); 903 904 if (rv != 0) 905 return rv; 906 } 907 908 /* 909 * Phase 3. 910 */ 911 if (cvid != vid) { 912 913 acpicpu_md_pstate_fidvid_write(cfid, vid, 1, vst); 914 rv = acpicpu_md_pstate_fidvid_read(NULL, &cvid); 915 916 if (rv != 0) 917 return rv; 918 } 919 920 return 0; 921 } 922 923 static int 924 acpicpu_md_pstate_fidvid_read(uint32_t *cfid, uint32_t *cvid) 925 { 926 int i = ACPICPU_P_STATE_RETRY * 100; 927 uint64_t val; 928 929 do { 930 val = rdmsr(MSR_0FH_STATUS); 931 932 } while (__SHIFTOUT(val, MSR_0FH_STATUS_PENDING) != 0 && --i >= 0); 933 934 if (i == 0) 935 return EAGAIN; 936 937 if (cfid != NULL) 938 *cfid = __SHIFTOUT(val, MSR_0FH_STATUS_CFID); 939 940 if (cvid != NULL) 941 *cvid = __SHIFTOUT(val, MSR_0FH_STATUS_CVID); 942 943 return 0; 944 } 945 946 static void 947 acpicpu_md_pstate_fidvid_write(uint32_t fid, 948 uint32_t vid, uint32_t cnt, uint32_t tmo) 949 { 950 uint64_t val = 0; 951 952 val |= __SHIFTIN(fid, MSR_0FH_CONTROL_FID); 953 val |= __SHIFTIN(vid, MSR_0FH_CONTROL_VID); 954 val |= __SHIFTIN(cnt, MSR_0FH_CONTROL_CNT); 955 val |= __SHIFTIN(0x1, MSR_0FH_CONTROL_CHG); 956 957 wrmsr(MSR_0FH_CONTROL, val); 958 DELAY(tmo); 959 } 960 961 int 962 acpicpu_md_tstate_get(struct acpicpu_softc *sc, uint32_t *percent) 963 { 964 struct acpicpu_tstate *ts; 965 uint64_t val; 966 uint32_t i; 967 968 val = rdmsr(MSR_THERM_CONTROL); 969 970 for (i = 0; i < sc->sc_tstate_count; i++) { 971 972 ts = &sc->sc_tstate[i]; 973 974 if (ts->ts_percent == 0) 975 continue; 976 977 if (val == ts->ts_status) { 978 *percent = ts->ts_percent; 979 return 0; 980 } 981 } 982 983 return EIO; 984 } 985 986 int 987 acpicpu_md_tstate_set(struct acpicpu_tstate *ts) 988 { 989 uint64_t val; 990 uint8_t i; 991 992 val = ts->ts_control; 993 val = val & __BITS(0, 4); 994 995 wrmsr(MSR_THERM_CONTROL, val); 996 997 if (ts->ts_status == 0) { 998 DELAY(ts->ts_latency); 999 return 0; 1000 } 1001 1002 for (i = val = 0; i < ACPICPU_T_STATE_RETRY; i++) { 1003 1004 val = rdmsr(MSR_THERM_CONTROL); 1005 1006 if (val == ts->ts_status) 1007 return 0; 1008 1009 DELAY(ts->ts_latency); 1010 } 1011 1012 return EAGAIN; 1013 } 1014 1015 /* 1016 * A kludge for backwards compatibility. 1017 */ 1018 static int 1019 acpicpu_md_pstate_sysctl_init(void) 1020 { 1021 const struct sysctlnode *fnode, *mnode, *rnode; 1022 const char *str; 1023 int rv; 1024 1025 switch (cpu_vendor) { 1026 1027 case CPUVENDOR_IDT: 1028 case CPUVENDOR_INTEL: 1029 str = "est"; 1030 break; 1031 1032 case CPUVENDOR_AMD: 1033 str = "powernow"; 1034 break; 1035 1036 default: 1037 return ENODEV; 1038 } 1039 1040 1041 rv = sysctl_createv(&acpicpu_log, 0, NULL, &rnode, 1042 CTLFLAG_PERMANENT, CTLTYPE_NODE, "machdep", NULL, 1043 NULL, 0, NULL, 0, CTL_MACHDEP, CTL_EOL); 1044 1045 if (rv != 0) 1046 goto fail; 1047 1048 rv = sysctl_createv(&acpicpu_log, 0, &rnode, &mnode, 1049 0, CTLTYPE_NODE, str, NULL, 1050 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL); 1051 1052 if (rv != 0) 1053 goto fail; 1054 1055 rv = sysctl_createv(&acpicpu_log, 0, &mnode, &fnode, 1056 0, CTLTYPE_NODE, "frequency", NULL, 1057 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL); 1058 1059 if (rv != 0) 1060 goto fail; 1061 1062 rv = sysctl_createv(&acpicpu_log, 0, &fnode, &rnode, 1063 CTLFLAG_READWRITE, CTLTYPE_INT, "target", NULL, 1064 acpicpu_md_pstate_sysctl_set, 0, NULL, 0, CTL_CREATE, CTL_EOL); 1065 1066 if (rv != 0) 1067 goto fail; 1068 1069 rv = sysctl_createv(&acpicpu_log, 0, &fnode, &rnode, 1070 CTLFLAG_READONLY, CTLTYPE_INT, "current", NULL, 1071 acpicpu_md_pstate_sysctl_get, 0, NULL, 0, CTL_CREATE, CTL_EOL); 1072 1073 if (rv != 0) 1074 goto fail; 1075 1076 rv = sysctl_createv(&acpicpu_log, 0, &fnode, &rnode, 1077 CTLFLAG_READONLY, CTLTYPE_STRING, "available", NULL, 1078 acpicpu_md_pstate_sysctl_all, 0, NULL, 0, CTL_CREATE, CTL_EOL); 1079 1080 if (rv != 0) 1081 goto fail; 1082 1083 return 0; 1084 1085 fail: 1086 if (acpicpu_log != NULL) { 1087 sysctl_teardown(&acpicpu_log); 1088 acpicpu_log = NULL; 1089 } 1090 1091 return rv; 1092 } 1093 1094 static int 1095 acpicpu_md_pstate_sysctl_get(SYSCTLFN_ARGS) 1096 { 1097 struct sysctlnode node; 1098 uint32_t freq; 1099 int err; 1100 1101 freq = cpufreq_get(curcpu()); 1102 1103 if (freq == 0) 1104 return ENXIO; 1105 1106 node = *rnode; 1107 node.sysctl_data = &freq; 1108 1109 err = sysctl_lookup(SYSCTLFN_CALL(&node)); 1110 1111 if (err != 0 || newp == NULL) 1112 return err; 1113 1114 return 0; 1115 } 1116 1117 static int 1118 acpicpu_md_pstate_sysctl_set(SYSCTLFN_ARGS) 1119 { 1120 struct sysctlnode node; 1121 uint32_t freq; 1122 int err; 1123 1124 freq = cpufreq_get(curcpu()); 1125 1126 if (freq == 0) 1127 return ENXIO; 1128 1129 node = *rnode; 1130 node.sysctl_data = &freq; 1131 1132 err = sysctl_lookup(SYSCTLFN_CALL(&node)); 1133 1134 if (err != 0 || newp == NULL) 1135 return err; 1136 1137 cpufreq_set_all(freq); 1138 1139 return 0; 1140 } 1141 1142 static int 1143 acpicpu_md_pstate_sysctl_all(SYSCTLFN_ARGS) 1144 { 1145 struct cpu_info *ci = curcpu(); 1146 struct acpicpu_softc *sc; 1147 struct sysctlnode node; 1148 char buf[1024]; 1149 size_t len; 1150 uint32_t i; 1151 int err; 1152 1153 sc = acpicpu_sc[ci->ci_acpiid]; 1154 1155 if (sc == NULL) 1156 return ENXIO; 1157 1158 (void)memset(&buf, 0, sizeof(buf)); 1159 1160 mutex_enter(&sc->sc_mtx); 1161 1162 for (len = 0, i = sc->sc_pstate_max; i < sc->sc_pstate_count; i++) { 1163 1164 if (sc->sc_pstate[i].ps_freq == 0) 1165 continue; 1166 1167 if (len >= sizeof(buf)) 1168 break; 1169 len += snprintf(buf + len, sizeof(buf) - len, "%u%s", 1170 sc->sc_pstate[i].ps_freq, 1171 i < (sc->sc_pstate_count - 1) ? " " : ""); 1172 } 1173 1174 mutex_exit(&sc->sc_mtx); 1175 1176 node = *rnode; 1177 node.sysctl_data = buf; 1178 1179 err = sysctl_lookup(SYSCTLFN_CALL(&node)); 1180 1181 if (err != 0 || newp == NULL) 1182 return err; 1183 1184 return 0; 1185 } 1186 1187