1 /* $NetBSD: acpi_cpu_md.c,v 1.71 2012/02/11 22:09:47 jruoho Exp $ */ 2 3 /*- 4 * Copyright (c) 2010, 2011 Jukka Ruohonen <jruohonen@iki.fi> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 #include <sys/cdefs.h> 30 __KERNEL_RCSID(0, "$NetBSD: acpi_cpu_md.c,v 1.71 2012/02/11 22:09:47 jruoho Exp $"); 31 32 #include <sys/param.h> 33 #include <sys/bus.h> 34 #include <sys/cpufreq.h> 35 #include <sys/device.h> 36 #include <sys/kcore.h> 37 #include <sys/sysctl.h> 38 #include <sys/xcall.h> 39 40 #include <x86/cpu.h> 41 #include <x86/cpufunc.h> 42 #include <x86/cputypes.h> 43 #include <x86/cpuvar.h> 44 #include <x86/cpu_msr.h> 45 #include <x86/machdep.h> 46 47 #include <dev/acpi/acpica.h> 48 #include <dev/acpi/acpi_cpu.h> 49 50 #include <dev/pci/pcivar.h> 51 #include <dev/pci/pcidevs.h> 52 53 #include <machine/acpi_machdep.h> 54 55 /* 56 * Intel IA32_MISC_ENABLE. 57 */ 58 #define MSR_MISC_ENABLE_EST __BIT(16) 59 #define MSR_MISC_ENABLE_TURBO __BIT(38) 60 61 /* 62 * AMD C1E. 63 */ 64 #define MSR_CMPHALT 0xc0010055 65 66 #define MSR_CMPHALT_SMI __BIT(27) 67 #define MSR_CMPHALT_C1E __BIT(28) 68 #define MSR_CMPHALT_BMSTS __BIT(29) 69 70 /* 71 * AMD families 10h, 11h, 12h, 14h, and 15h. 72 */ 73 #define MSR_10H_LIMIT 0xc0010061 74 #define MSR_10H_CONTROL 0xc0010062 75 #define MSR_10H_STATUS 0xc0010063 76 #define MSR_10H_CONFIG 0xc0010064 77 78 /* 79 * AMD family 0Fh. 80 */ 81 #define MSR_0FH_CONTROL 0xc0010041 82 #define MSR_0FH_STATUS 0xc0010042 83 84 #define MSR_0FH_STATUS_CFID __BITS( 0, 5) 85 #define MSR_0FH_STATUS_CVID __BITS(32, 36) 86 #define MSR_0FH_STATUS_PENDING __BITS(31, 31) 87 88 #define MSR_0FH_CONTROL_FID __BITS( 0, 5) 89 #define MSR_0FH_CONTROL_VID __BITS( 8, 12) 90 #define MSR_0FH_CONTROL_CHG __BITS(16, 16) 91 #define MSR_0FH_CONTROL_CNT __BITS(32, 51) 92 93 #define ACPI_0FH_STATUS_FID __BITS( 0, 5) 94 #define ACPI_0FH_STATUS_VID __BITS( 6, 10) 95 96 #define ACPI_0FH_CONTROL_FID __BITS( 0, 5) 97 #define ACPI_0FH_CONTROL_VID __BITS( 6, 10) 98 #define ACPI_0FH_CONTROL_VST __BITS(11, 17) 99 #define ACPI_0FH_CONTROL_MVS __BITS(18, 19) 100 #define ACPI_0FH_CONTROL_PLL __BITS(20, 26) 101 #define ACPI_0FH_CONTROL_RVO __BITS(28, 29) 102 #define ACPI_0FH_CONTROL_IRT __BITS(30, 31) 103 104 #define FID_TO_VCO_FID(fidd) (((fid) < 8) ? (8 + ((fid) << 1)) : (fid)) 105 106 static char native_idle_text[16]; 107 void (*native_idle)(void) = NULL; 108 109 static int acpicpu_md_quirk_piix4(const struct pci_attach_args *); 110 static void acpicpu_md_pstate_hwf_reset(void *, void *); 111 static int acpicpu_md_pstate_fidvid_get(struct acpicpu_softc *, 112 uint32_t *); 113 static int acpicpu_md_pstate_fidvid_set(struct acpicpu_pstate *); 114 static int acpicpu_md_pstate_fidvid_read(uint32_t *, uint32_t *); 115 static void acpicpu_md_pstate_fidvid_write(uint32_t, uint32_t, 116 uint32_t, uint32_t); 117 static int acpicpu_md_pstate_sysctl_init(void); 118 static int acpicpu_md_pstate_sysctl_get(SYSCTLFN_PROTO); 119 static int acpicpu_md_pstate_sysctl_set(SYSCTLFN_PROTO); 120 static int acpicpu_md_pstate_sysctl_all(SYSCTLFN_PROTO); 121 122 extern struct acpicpu_softc **acpicpu_sc; 123 static struct sysctllog *acpicpu_log = NULL; 124 125 struct cpu_info * 126 acpicpu_md_match(device_t parent, cfdata_t match, void *aux) 127 { 128 struct cpufeature_attach_args *cfaa = aux; 129 130 if (strcmp(cfaa->name, "frequency") != 0) 131 return NULL; 132 133 return cfaa->ci; 134 } 135 136 struct cpu_info * 137 acpicpu_md_attach(device_t parent, device_t self, void *aux) 138 { 139 struct cpufeature_attach_args *cfaa = aux; 140 141 return cfaa->ci; 142 } 143 144 uint32_t 145 acpicpu_md_flags(void) 146 { 147 struct cpu_info *ci = curcpu(); 148 struct pci_attach_args pa; 149 uint32_t family, val = 0; 150 uint32_t regs[4]; 151 uint64_t msr; 152 153 if (acpi_md_ncpus() == 1) 154 val |= ACPICPU_FLAG_C_BM; 155 156 if ((ci->ci_feat_val[1] & CPUID2_MONITOR) != 0) 157 val |= ACPICPU_FLAG_C_FFH; 158 159 /* 160 * By default, assume that the local APIC timer 161 * as well as TSC are stalled during C3 sleep. 162 */ 163 val |= ACPICPU_FLAG_C_APIC | ACPICPU_FLAG_C_TSC; 164 165 switch (cpu_vendor) { 166 167 case CPUVENDOR_IDT: 168 169 if ((ci->ci_feat_val[1] & CPUID2_EST) != 0) 170 val |= ACPICPU_FLAG_P_FFH; 171 172 if ((ci->ci_feat_val[0] & CPUID_ACPI) != 0) 173 val |= ACPICPU_FLAG_T_FFH; 174 175 break; 176 177 case CPUVENDOR_INTEL: 178 179 /* 180 * Bus master control and arbitration should be 181 * available on all supported Intel CPUs (to be 182 * sure, this is double-checked later from the 183 * firmware data). These flags imply that it is 184 * not necessary to flush caches before C3 state. 185 */ 186 val |= ACPICPU_FLAG_C_BM | ACPICPU_FLAG_C_ARB; 187 188 /* 189 * Check if we can use "native", MSR-based, 190 * access. If not, we have to resort to I/O. 191 */ 192 if ((ci->ci_feat_val[1] & CPUID2_EST) != 0) 193 val |= ACPICPU_FLAG_P_FFH; 194 195 if ((ci->ci_feat_val[0] & CPUID_ACPI) != 0) 196 val |= ACPICPU_FLAG_T_FFH; 197 198 /* 199 * Check whether MSR_APERF, MSR_MPERF, and Turbo 200 * Boost are available. Also see if we might have 201 * an invariant local APIC timer ("ARAT"). 202 */ 203 if (cpuid_level >= 0x06) { 204 205 x86_cpuid(0x00000006, regs); 206 207 if ((regs[2] & CPUID_DSPM_HWF) != 0) 208 val |= ACPICPU_FLAG_P_HWF; 209 210 if ((regs[0] & CPUID_DSPM_IDA) != 0) 211 val |= ACPICPU_FLAG_P_TURBO; 212 213 if ((regs[0] & CPUID_DSPM_ARAT) != 0) 214 val &= ~ACPICPU_FLAG_C_APIC; 215 } 216 217 /* 218 * Detect whether TSC is invariant. If it is not, 219 * we keep the flag to note that TSC will not run 220 * at constant rate. Depending on the CPU, this may 221 * affect P- and T-state changes, but especially 222 * relevant are C-states; with variant TSC, states 223 * larger than C1 may completely stop the counter. 224 */ 225 x86_cpuid(0x80000000, regs); 226 227 if (regs[0] >= 0x80000007) { 228 229 x86_cpuid(0x80000007, regs); 230 231 if ((regs[3] & __BIT(8)) != 0) 232 val &= ~ACPICPU_FLAG_C_TSC; 233 } 234 235 break; 236 237 case CPUVENDOR_AMD: 238 239 x86_cpuid(0x80000000, regs); 240 241 if (regs[0] < 0x80000007) 242 break; 243 244 x86_cpuid(0x80000007, regs); 245 246 family = CPUID2FAMILY(ci->ci_signature); 247 248 if (family == 0xf) 249 family += CPUID2EXTFAMILY(ci->ci_signature); 250 251 switch (family) { 252 253 case 0x0f: 254 255 /* 256 * Evaluate support for the "FID/VID 257 * algorithm" also used by powernow(4). 258 */ 259 if ((regs[3] & CPUID_APM_FID) == 0) 260 break; 261 262 if ((regs[3] & CPUID_APM_VID) == 0) 263 break; 264 265 val |= ACPICPU_FLAG_P_FFH | ACPICPU_FLAG_P_FIDVID; 266 break; 267 268 case 0x10: 269 case 0x11: 270 271 if (rdmsr_safe(MSR_CMPHALT, &msr) != EFAULT) 272 val |= ACPICPU_FLAG_C_C1E; 273 274 /* FALLTHROUGH */ 275 276 case 0x12: 277 case 0x14: /* AMD Fusion */ 278 case 0x15: /* AMD Bulldozer */ 279 280 /* 281 * Like with Intel, detect invariant TSC, 282 * MSR-based P-states, and AMD's "turbo" 283 * (Core Performance Boost), respectively. 284 */ 285 if ((regs[3] & CPUID_APM_TSC) != 0) 286 val &= ~ACPICPU_FLAG_C_TSC; 287 288 if ((regs[3] & CPUID_APM_HWP) != 0) 289 val |= ACPICPU_FLAG_P_FFH; 290 291 if ((regs[3] & CPUID_APM_CPB) != 0) 292 val |= ACPICPU_FLAG_P_TURBO; 293 294 /* 295 * Also check for APERF and MPERF, 296 * first available in the family 10h. 297 */ 298 if (cpuid_level >= 0x06) { 299 300 x86_cpuid(0x00000006, regs); 301 302 if ((regs[2] & CPUID_DSPM_HWF) != 0) 303 val |= ACPICPU_FLAG_P_HWF; 304 } 305 306 break; 307 } 308 309 break; 310 } 311 312 /* 313 * There are several erratums for PIIX4. 314 */ 315 if (pci_find_device(&pa, acpicpu_md_quirk_piix4) != 0) 316 val |= ACPICPU_FLAG_PIIX4; 317 318 return val; 319 } 320 321 static int 322 acpicpu_md_quirk_piix4(const struct pci_attach_args *pa) 323 { 324 325 /* 326 * XXX: The pci_find_device(9) function only 327 * deals with attached devices. Change this 328 * to use something like pci_device_foreach(). 329 */ 330 if (PCI_VENDOR(pa->pa_id) != PCI_VENDOR_INTEL) 331 return 0; 332 333 if (PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_INTEL_82371AB_ISA || 334 PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_INTEL_82440MX_PMC) 335 return 1; 336 337 return 0; 338 } 339 340 void 341 acpicpu_md_quirk_c1e(void) 342 { 343 const uint64_t c1e = MSR_CMPHALT_SMI | MSR_CMPHALT_C1E; 344 uint64_t val; 345 346 val = rdmsr(MSR_CMPHALT); 347 348 if ((val & c1e) != 0) 349 wrmsr(MSR_CMPHALT, val & ~c1e); 350 } 351 352 int 353 acpicpu_md_cstate_start(struct acpicpu_softc *sc) 354 { 355 const size_t size = sizeof(native_idle_text); 356 struct acpicpu_cstate *cs; 357 bool ipi = false; 358 int i; 359 360 /* 361 * Save the cpu_idle(9) loop used by default. 362 */ 363 x86_cpu_idle_get(&native_idle, native_idle_text, size); 364 365 for (i = 0; i < ACPI_C_STATE_COUNT; i++) { 366 367 cs = &sc->sc_cstate[i]; 368 369 if (cs->cs_method == ACPICPU_C_STATE_HALT) { 370 ipi = true; 371 break; 372 } 373 } 374 375 x86_cpu_idle_set(acpicpu_cstate_idle, "acpi", ipi); 376 377 return 0; 378 } 379 380 int 381 acpicpu_md_cstate_stop(void) 382 { 383 static char text[16]; 384 void (*func)(void); 385 uint64_t xc; 386 bool ipi; 387 388 x86_cpu_idle_get(&func, text, sizeof(text)); 389 390 if (func == native_idle) 391 return EALREADY; 392 393 ipi = (native_idle != x86_cpu_idle_halt) ? false : true; 394 x86_cpu_idle_set(native_idle, native_idle_text, ipi); 395 396 /* 397 * Run a cross-call to ensure that all CPUs are 398 * out from the ACPI idle-loop before detachment. 399 */ 400 xc = xc_broadcast(0, (xcfunc_t)nullop, NULL, NULL); 401 xc_wait(xc); 402 403 return 0; 404 } 405 406 /* 407 * Called with interrupts enabled. 408 */ 409 void 410 acpicpu_md_cstate_enter(int method, int state) 411 { 412 struct cpu_info *ci = curcpu(); 413 414 KASSERT(ci->ci_ilevel == IPL_NONE); 415 416 switch (method) { 417 418 case ACPICPU_C_STATE_FFH: 419 420 x86_monitor(&ci->ci_want_resched, 0, 0); 421 422 if (__predict_false(ci->ci_want_resched != 0)) 423 return; 424 425 x86_mwait((state - 1) << 4, 0); 426 break; 427 428 case ACPICPU_C_STATE_HALT: 429 430 x86_disable_intr(); 431 432 if (__predict_false(ci->ci_want_resched != 0)) { 433 x86_enable_intr(); 434 return; 435 } 436 437 x86_stihlt(); 438 break; 439 } 440 } 441 442 int 443 acpicpu_md_pstate_start(struct acpicpu_softc *sc) 444 { 445 uint64_t xc, val; 446 447 switch (cpu_vendor) { 448 449 case CPUVENDOR_IDT: 450 case CPUVENDOR_INTEL: 451 452 /* 453 * Make sure EST is enabled. 454 */ 455 if ((sc->sc_flags & ACPICPU_FLAG_P_FFH) != 0) { 456 457 val = rdmsr(MSR_MISC_ENABLE); 458 459 if ((val & MSR_MISC_ENABLE_EST) == 0) { 460 461 val |= MSR_MISC_ENABLE_EST; 462 wrmsr(MSR_MISC_ENABLE, val); 463 val = rdmsr(MSR_MISC_ENABLE); 464 465 if ((val & MSR_MISC_ENABLE_EST) == 0) 466 return ENOTTY; 467 } 468 } 469 } 470 471 /* 472 * Reset the APERF and MPERF counters. 473 */ 474 if ((sc->sc_flags & ACPICPU_FLAG_P_HWF) != 0) { 475 xc = xc_broadcast(0, acpicpu_md_pstate_hwf_reset, NULL, NULL); 476 xc_wait(xc); 477 } 478 479 return acpicpu_md_pstate_sysctl_init(); 480 } 481 482 int 483 acpicpu_md_pstate_stop(void) 484 { 485 486 if (acpicpu_log == NULL) 487 return EALREADY; 488 489 sysctl_teardown(&acpicpu_log); 490 acpicpu_log = NULL; 491 492 return 0; 493 } 494 495 int 496 acpicpu_md_pstate_init(struct acpicpu_softc *sc) 497 { 498 struct cpu_info *ci = sc->sc_ci; 499 struct acpicpu_pstate *ps, msr; 500 uint32_t family, i = 0; 501 502 (void)memset(&msr, 0, sizeof(struct acpicpu_pstate)); 503 504 switch (cpu_vendor) { 505 506 case CPUVENDOR_IDT: 507 case CPUVENDOR_INTEL: 508 509 /* 510 * If the so-called Turbo Boost is present, 511 * the P0-state is always the "turbo state". 512 * It is shown as the P1 frequency + 1 MHz. 513 * 514 * For discussion, see: 515 * 516 * Intel Corporation: Intel Turbo Boost Technology 517 * in Intel Core(tm) Microarchitectures (Nehalem) 518 * Based Processors. White Paper, November 2008. 519 */ 520 if (sc->sc_pstate_count >= 2 && 521 (sc->sc_flags & ACPICPU_FLAG_P_TURBO) != 0) { 522 523 ps = &sc->sc_pstate[0]; 524 525 if (ps->ps_freq == sc->sc_pstate[1].ps_freq + 1) 526 ps->ps_flags |= ACPICPU_FLAG_P_TURBO; 527 } 528 529 msr.ps_control_addr = MSR_PERF_CTL; 530 msr.ps_control_mask = __BITS(0, 15); 531 532 msr.ps_status_addr = MSR_PERF_STATUS; 533 msr.ps_status_mask = __BITS(0, 15); 534 break; 535 536 case CPUVENDOR_AMD: 537 538 if ((sc->sc_flags & ACPICPU_FLAG_P_FIDVID) != 0) 539 msr.ps_flags |= ACPICPU_FLAG_P_FIDVID; 540 541 family = CPUID2FAMILY(ci->ci_signature); 542 543 if (family == 0xf) 544 family += CPUID2EXTFAMILY(ci->ci_signature); 545 546 switch (family) { 547 548 case 0x0f: 549 msr.ps_control_addr = MSR_0FH_CONTROL; 550 msr.ps_status_addr = MSR_0FH_STATUS; 551 break; 552 553 case 0x10: 554 case 0x11: 555 case 0x12: 556 case 0x14: 557 case 0x15: 558 msr.ps_control_addr = MSR_10H_CONTROL; 559 msr.ps_control_mask = __BITS(0, 2); 560 561 msr.ps_status_addr = MSR_10H_STATUS; 562 msr.ps_status_mask = __BITS(0, 2); 563 break; 564 565 default: 566 /* 567 * If we have an unknown AMD CPU, rely on XPSS. 568 */ 569 if ((sc->sc_flags & ACPICPU_FLAG_P_XPSS) == 0) 570 return EOPNOTSUPP; 571 } 572 573 break; 574 575 default: 576 return ENODEV; 577 } 578 579 /* 580 * Fill the P-state structures with MSR addresses that are 581 * known to be correct. If we do not know the addresses, 582 * leave the values intact. If a vendor uses XPSS, we do 583 * not necessarily need to do anything to support new CPUs. 584 */ 585 while (i < sc->sc_pstate_count) { 586 587 ps = &sc->sc_pstate[i]; 588 589 if (msr.ps_flags != 0) 590 ps->ps_flags |= msr.ps_flags; 591 592 if (msr.ps_status_addr != 0) 593 ps->ps_status_addr = msr.ps_status_addr; 594 595 if (msr.ps_status_mask != 0) 596 ps->ps_status_mask = msr.ps_status_mask; 597 598 if (msr.ps_control_addr != 0) 599 ps->ps_control_addr = msr.ps_control_addr; 600 601 if (msr.ps_control_mask != 0) 602 ps->ps_control_mask = msr.ps_control_mask; 603 604 i++; 605 } 606 607 return 0; 608 } 609 610 /* 611 * Read the IA32_APERF and IA32_MPERF counters. The first 612 * increments at the rate of the fixed maximum frequency 613 * configured during the boot, whereas APERF counts at the 614 * rate of the actual frequency. Note that the MSRs must be 615 * read without delay, and that only the ratio between 616 * IA32_APERF and IA32_MPERF is architecturally defined. 617 * 618 * The function thus returns the percentage of the actual 619 * frequency in terms of the maximum frequency of the calling 620 * CPU since the last call. A value zero implies an error. 621 * 622 * For further details, refer to: 623 * 624 * Intel Corporation: Intel 64 and IA-32 Architectures 625 * Software Developer's Manual. Section 13.2, Volume 3A: 626 * System Programming Guide, Part 1. July, 2008. 627 * 628 * Advanced Micro Devices: BIOS and Kernel Developer's 629 * Guide (BKDG) for AMD Family 10h Processors. Section 630 * 2.4.5, Revision 3.48, April 2010. 631 */ 632 uint8_t 633 acpicpu_md_pstate_hwf(struct cpu_info *ci) 634 { 635 struct acpicpu_softc *sc; 636 uint64_t aperf, mperf; 637 uint8_t rv = 0; 638 639 sc = acpicpu_sc[ci->ci_acpiid]; 640 641 if (__predict_false(sc == NULL)) 642 return 0; 643 644 if (__predict_false((sc->sc_flags & ACPICPU_FLAG_P_HWF) == 0)) 645 return 0; 646 647 aperf = sc->sc_pstate_aperf; 648 mperf = sc->sc_pstate_mperf; 649 650 x86_disable_intr(); 651 652 sc->sc_pstate_aperf = rdmsr(MSR_APERF); 653 sc->sc_pstate_mperf = rdmsr(MSR_MPERF); 654 655 x86_enable_intr(); 656 657 aperf = sc->sc_pstate_aperf - aperf; 658 mperf = sc->sc_pstate_mperf - mperf; 659 660 if (__predict_true(mperf != 0)) 661 rv = (aperf * 100) / mperf; 662 663 return rv; 664 } 665 666 static void 667 acpicpu_md_pstate_hwf_reset(void *arg1, void *arg2) 668 { 669 struct cpu_info *ci = curcpu(); 670 struct acpicpu_softc *sc; 671 672 sc = acpicpu_sc[ci->ci_acpiid]; 673 674 if (__predict_false(sc == NULL)) 675 return; 676 677 x86_disable_intr(); 678 679 wrmsr(MSR_APERF, 0); 680 wrmsr(MSR_MPERF, 0); 681 682 x86_enable_intr(); 683 684 sc->sc_pstate_aperf = 0; 685 sc->sc_pstate_mperf = 0; 686 } 687 688 int 689 acpicpu_md_pstate_get(struct acpicpu_softc *sc, uint32_t *freq) 690 { 691 struct acpicpu_pstate *ps = NULL; 692 uint64_t val; 693 uint32_t i; 694 695 if ((sc->sc_flags & ACPICPU_FLAG_P_FIDVID) != 0) 696 return acpicpu_md_pstate_fidvid_get(sc, freq); 697 698 /* 699 * Pick any P-state for the status address. 700 */ 701 for (i = 0; i < sc->sc_pstate_count; i++) { 702 703 ps = &sc->sc_pstate[i]; 704 705 if (__predict_true(ps->ps_freq != 0)) 706 break; 707 } 708 709 if (__predict_false(ps == NULL)) 710 return ENODEV; 711 712 if (__predict_false(ps->ps_status_addr == 0)) 713 return EINVAL; 714 715 val = rdmsr(ps->ps_status_addr); 716 717 if (__predict_true(ps->ps_status_mask != 0)) 718 val = val & ps->ps_status_mask; 719 720 /* 721 * Search for the value from known P-states. 722 */ 723 for (i = 0; i < sc->sc_pstate_count; i++) { 724 725 ps = &sc->sc_pstate[i]; 726 727 if (__predict_false(ps->ps_freq == 0)) 728 continue; 729 730 if (val == ps->ps_status) { 731 *freq = ps->ps_freq; 732 return 0; 733 } 734 } 735 736 /* 737 * If the value was not found, try APERF/MPERF. 738 * The state is P0 if the return value is 100 %. 739 */ 740 if ((sc->sc_flags & ACPICPU_FLAG_P_HWF) != 0) { 741 742 KASSERT(sc->sc_pstate_count > 0); 743 KASSERT(sc->sc_pstate[0].ps_freq != 0); 744 745 if (acpicpu_md_pstate_hwf(sc->sc_ci) == 100) { 746 *freq = sc->sc_pstate[0].ps_freq; 747 return 0; 748 } 749 } 750 751 return EIO; 752 } 753 754 int 755 acpicpu_md_pstate_set(struct acpicpu_pstate *ps) 756 { 757 uint64_t val = 0; 758 759 if (__predict_false(ps->ps_control_addr == 0)) 760 return EINVAL; 761 762 if ((ps->ps_flags & ACPICPU_FLAG_P_FIDVID) != 0) 763 return acpicpu_md_pstate_fidvid_set(ps); 764 765 /* 766 * If the mask is set, do a read-modify-write. 767 */ 768 if (__predict_true(ps->ps_control_mask != 0)) { 769 val = rdmsr(ps->ps_control_addr); 770 val &= ~ps->ps_control_mask; 771 } 772 773 val |= ps->ps_control; 774 775 wrmsr(ps->ps_control_addr, val); 776 DELAY(ps->ps_latency); 777 778 return 0; 779 } 780 781 static int 782 acpicpu_md_pstate_fidvid_get(struct acpicpu_softc *sc, uint32_t *freq) 783 { 784 struct acpicpu_pstate *ps; 785 uint32_t fid, i, vid; 786 uint32_t cfid, cvid; 787 int rv; 788 789 /* 790 * AMD family 0Fh needs special treatment. 791 * While it wants to use ACPI, it does not 792 * comply with the ACPI specifications. 793 */ 794 rv = acpicpu_md_pstate_fidvid_read(&cfid, &cvid); 795 796 if (rv != 0) 797 return rv; 798 799 for (i = 0; i < sc->sc_pstate_count; i++) { 800 801 ps = &sc->sc_pstate[i]; 802 803 if (__predict_false(ps->ps_freq == 0)) 804 continue; 805 806 fid = __SHIFTOUT(ps->ps_status, ACPI_0FH_STATUS_FID); 807 vid = __SHIFTOUT(ps->ps_status, ACPI_0FH_STATUS_VID); 808 809 if (cfid == fid && cvid == vid) { 810 *freq = ps->ps_freq; 811 return 0; 812 } 813 } 814 815 return EIO; 816 } 817 818 static int 819 acpicpu_md_pstate_fidvid_set(struct acpicpu_pstate *ps) 820 { 821 const uint64_t ctrl = ps->ps_control; 822 uint32_t cfid, cvid, fid, i, irt; 823 uint32_t pll, vco_cfid, vco_fid; 824 uint32_t val, vid, vst; 825 int rv; 826 827 rv = acpicpu_md_pstate_fidvid_read(&cfid, &cvid); 828 829 if (rv != 0) 830 return rv; 831 832 fid = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_FID); 833 vid = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_VID); 834 irt = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_IRT); 835 vst = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_VST); 836 pll = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_PLL); 837 838 vst = vst * 20; 839 pll = pll * 1000 / 5; 840 irt = 10 * __BIT(irt); 841 842 /* 843 * Phase 1. 844 */ 845 while (cvid > vid) { 846 847 val = 1 << __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_MVS); 848 val = (val > cvid) ? 0 : cvid - val; 849 850 acpicpu_md_pstate_fidvid_write(cfid, val, 1, vst); 851 rv = acpicpu_md_pstate_fidvid_read(NULL, &cvid); 852 853 if (rv != 0) 854 return rv; 855 } 856 857 i = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_RVO); 858 859 for (; i > 0 && cvid > 0; --i) { 860 861 acpicpu_md_pstate_fidvid_write(cfid, cvid - 1, 1, vst); 862 rv = acpicpu_md_pstate_fidvid_read(NULL, &cvid); 863 864 if (rv != 0) 865 return rv; 866 } 867 868 /* 869 * Phase 2. 870 */ 871 if (cfid != fid) { 872 873 vco_fid = FID_TO_VCO_FID(fid); 874 vco_cfid = FID_TO_VCO_FID(cfid); 875 876 while (abs(vco_fid - vco_cfid) > 2) { 877 878 if (fid <= cfid) 879 val = cfid - 2; 880 else { 881 val = (cfid > 6) ? cfid + 2 : 882 FID_TO_VCO_FID(cfid) + 2; 883 } 884 885 acpicpu_md_pstate_fidvid_write(val, cvid, pll, irt); 886 rv = acpicpu_md_pstate_fidvid_read(&cfid, NULL); 887 888 if (rv != 0) 889 return rv; 890 891 vco_cfid = FID_TO_VCO_FID(cfid); 892 } 893 894 acpicpu_md_pstate_fidvid_write(fid, cvid, pll, irt); 895 rv = acpicpu_md_pstate_fidvid_read(&cfid, NULL); 896 897 if (rv != 0) 898 return rv; 899 } 900 901 /* 902 * Phase 3. 903 */ 904 if (cvid != vid) { 905 906 acpicpu_md_pstate_fidvid_write(cfid, vid, 1, vst); 907 rv = acpicpu_md_pstate_fidvid_read(NULL, &cvid); 908 909 if (rv != 0) 910 return rv; 911 } 912 913 return 0; 914 } 915 916 static int 917 acpicpu_md_pstate_fidvid_read(uint32_t *cfid, uint32_t *cvid) 918 { 919 int i = ACPICPU_P_STATE_RETRY * 100; 920 uint64_t val; 921 922 do { 923 val = rdmsr(MSR_0FH_STATUS); 924 925 } while (__SHIFTOUT(val, MSR_0FH_STATUS_PENDING) != 0 && --i >= 0); 926 927 if (i == 0) 928 return EAGAIN; 929 930 if (cfid != NULL) 931 *cfid = __SHIFTOUT(val, MSR_0FH_STATUS_CFID); 932 933 if (cvid != NULL) 934 *cvid = __SHIFTOUT(val, MSR_0FH_STATUS_CVID); 935 936 return 0; 937 } 938 939 static void 940 acpicpu_md_pstate_fidvid_write(uint32_t fid, 941 uint32_t vid, uint32_t cnt, uint32_t tmo) 942 { 943 uint64_t val = 0; 944 945 val |= __SHIFTIN(fid, MSR_0FH_CONTROL_FID); 946 val |= __SHIFTIN(vid, MSR_0FH_CONTROL_VID); 947 val |= __SHIFTIN(cnt, MSR_0FH_CONTROL_CNT); 948 val |= __SHIFTIN(0x1, MSR_0FH_CONTROL_CHG); 949 950 wrmsr(MSR_0FH_CONTROL, val); 951 DELAY(tmo); 952 } 953 954 int 955 acpicpu_md_tstate_get(struct acpicpu_softc *sc, uint32_t *percent) 956 { 957 struct acpicpu_tstate *ts; 958 uint64_t val; 959 uint32_t i; 960 961 val = rdmsr(MSR_THERM_CONTROL); 962 963 for (i = 0; i < sc->sc_tstate_count; i++) { 964 965 ts = &sc->sc_tstate[i]; 966 967 if (ts->ts_percent == 0) 968 continue; 969 970 if (val == ts->ts_status) { 971 *percent = ts->ts_percent; 972 return 0; 973 } 974 } 975 976 return EIO; 977 } 978 979 int 980 acpicpu_md_tstate_set(struct acpicpu_tstate *ts) 981 { 982 uint64_t val; 983 uint8_t i; 984 985 val = ts->ts_control; 986 val = val & __BITS(1, 4); 987 988 wrmsr(MSR_THERM_CONTROL, val); 989 990 if (ts->ts_status == 0) { 991 DELAY(ts->ts_latency); 992 return 0; 993 } 994 995 for (i = val = 0; i < ACPICPU_T_STATE_RETRY; i++) { 996 997 val = rdmsr(MSR_THERM_CONTROL); 998 999 if (val == ts->ts_status) 1000 return 0; 1001 1002 DELAY(ts->ts_latency); 1003 } 1004 1005 return EAGAIN; 1006 } 1007 1008 /* 1009 * A kludge for backwards compatibility. 1010 */ 1011 static int 1012 acpicpu_md_pstate_sysctl_init(void) 1013 { 1014 const struct sysctlnode *fnode, *mnode, *rnode; 1015 const char *str; 1016 int rv; 1017 1018 switch (cpu_vendor) { 1019 1020 case CPUVENDOR_IDT: 1021 case CPUVENDOR_INTEL: 1022 str = "est"; 1023 break; 1024 1025 case CPUVENDOR_AMD: 1026 str = "powernow"; 1027 break; 1028 1029 default: 1030 return ENODEV; 1031 } 1032 1033 1034 rv = sysctl_createv(&acpicpu_log, 0, NULL, &rnode, 1035 CTLFLAG_PERMANENT, CTLTYPE_NODE, "machdep", NULL, 1036 NULL, 0, NULL, 0, CTL_MACHDEP, CTL_EOL); 1037 1038 if (rv != 0) 1039 goto fail; 1040 1041 rv = sysctl_createv(&acpicpu_log, 0, &rnode, &mnode, 1042 0, CTLTYPE_NODE, str, NULL, 1043 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL); 1044 1045 if (rv != 0) 1046 goto fail; 1047 1048 rv = sysctl_createv(&acpicpu_log, 0, &mnode, &fnode, 1049 0, CTLTYPE_NODE, "frequency", NULL, 1050 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL); 1051 1052 if (rv != 0) 1053 goto fail; 1054 1055 rv = sysctl_createv(&acpicpu_log, 0, &fnode, &rnode, 1056 CTLFLAG_READWRITE, CTLTYPE_INT, "target", NULL, 1057 acpicpu_md_pstate_sysctl_set, 0, NULL, 0, CTL_CREATE, CTL_EOL); 1058 1059 if (rv != 0) 1060 goto fail; 1061 1062 rv = sysctl_createv(&acpicpu_log, 0, &fnode, &rnode, 1063 CTLFLAG_READONLY, CTLTYPE_INT, "current", NULL, 1064 acpicpu_md_pstate_sysctl_get, 0, NULL, 0, CTL_CREATE, CTL_EOL); 1065 1066 if (rv != 0) 1067 goto fail; 1068 1069 rv = sysctl_createv(&acpicpu_log, 0, &fnode, &rnode, 1070 CTLFLAG_READONLY, CTLTYPE_STRING, "available", NULL, 1071 acpicpu_md_pstate_sysctl_all, 0, NULL, 0, CTL_CREATE, CTL_EOL); 1072 1073 if (rv != 0) 1074 goto fail; 1075 1076 return 0; 1077 1078 fail: 1079 if (acpicpu_log != NULL) { 1080 sysctl_teardown(&acpicpu_log); 1081 acpicpu_log = NULL; 1082 } 1083 1084 return rv; 1085 } 1086 1087 static int 1088 acpicpu_md_pstate_sysctl_get(SYSCTLFN_ARGS) 1089 { 1090 struct sysctlnode node; 1091 uint32_t freq; 1092 int err; 1093 1094 freq = cpufreq_get(curcpu()); 1095 1096 if (freq == 0) 1097 return ENXIO; 1098 1099 node = *rnode; 1100 node.sysctl_data = &freq; 1101 1102 err = sysctl_lookup(SYSCTLFN_CALL(&node)); 1103 1104 if (err != 0 || newp == NULL) 1105 return err; 1106 1107 return 0; 1108 } 1109 1110 static int 1111 acpicpu_md_pstate_sysctl_set(SYSCTLFN_ARGS) 1112 { 1113 struct sysctlnode node; 1114 uint32_t freq; 1115 int err; 1116 1117 freq = cpufreq_get(curcpu()); 1118 1119 if (freq == 0) 1120 return ENXIO; 1121 1122 node = *rnode; 1123 node.sysctl_data = &freq; 1124 1125 err = sysctl_lookup(SYSCTLFN_CALL(&node)); 1126 1127 if (err != 0 || newp == NULL) 1128 return err; 1129 1130 cpufreq_set_all(freq); 1131 1132 return 0; 1133 } 1134 1135 static int 1136 acpicpu_md_pstate_sysctl_all(SYSCTLFN_ARGS) 1137 { 1138 struct cpu_info *ci = curcpu(); 1139 struct acpicpu_softc *sc; 1140 struct sysctlnode node; 1141 char buf[1024]; 1142 size_t len; 1143 uint32_t i; 1144 int err; 1145 1146 sc = acpicpu_sc[ci->ci_acpiid]; 1147 1148 if (sc == NULL) 1149 return ENXIO; 1150 1151 (void)memset(&buf, 0, sizeof(buf)); 1152 1153 mutex_enter(&sc->sc_mtx); 1154 1155 for (len = 0, i = sc->sc_pstate_max; i < sc->sc_pstate_count; i++) { 1156 1157 if (sc->sc_pstate[i].ps_freq == 0) 1158 continue; 1159 1160 len += snprintf(buf + len, sizeof(buf) - len, "%u%s", 1161 sc->sc_pstate[i].ps_freq, 1162 i < (sc->sc_pstate_count - 1) ? " " : ""); 1163 } 1164 1165 mutex_exit(&sc->sc_mtx); 1166 1167 node = *rnode; 1168 node.sysctl_data = buf; 1169 1170 err = sysctl_lookup(SYSCTLFN_CALL(&node)); 1171 1172 if (err != 0 || newp == NULL) 1173 return err; 1174 1175 return 0; 1176 } 1177 1178