xref: /netbsd-src/sys/arch/x86/acpi/acpi_cpu_md.c (revision daf6c4152fcddc27c445489775ed1f66ab4ea9a9)
1 /* $NetBSD: acpi_cpu_md.c,v 1.39 2011/02/15 17:50:46 jruoho Exp $ */
2 
3 /*-
4  * Copyright (c) 2010 Jukka Ruohonen <jruohonen@iki.fi>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: acpi_cpu_md.c,v 1.39 2011/02/15 17:50:46 jruoho Exp $");
31 
32 #include <sys/param.h>
33 #include <sys/bus.h>
34 #include <sys/kcore.h>
35 #include <sys/sysctl.h>
36 #include <sys/xcall.h>
37 
38 #include <x86/cpu.h>
39 #include <x86/cpufunc.h>
40 #include <x86/cputypes.h>
41 #include <x86/cpuvar.h>
42 #include <x86/cpu_msr.h>
43 #include <x86/machdep.h>
44 
45 #include <dev/acpi/acpica.h>
46 #include <dev/acpi/acpi_cpu.h>
47 
48 #include <dev/pci/pcivar.h>
49 #include <dev/pci/pcidevs.h>
50 
51 #include <machine/acpi_machdep.h>
52 
53 /*
54  * AMD C1E.
55  */
56 #define MSR_CMPHALT		0xc0010055
57 
58 #define MSR_CMPHALT_SMI		__BIT(27)
59 #define MSR_CMPHALT_C1E		__BIT(28)
60 #define MSR_CMPHALT_BMSTS	__BIT(29)
61 
62 /*
63  * AMD families 10h and 11h.
64  */
65 #define MSR_10H_LIMIT		0xc0010061
66 #define MSR_10H_CONTROL		0xc0010062
67 #define MSR_10H_STATUS		0xc0010063
68 #define MSR_10H_CONFIG		0xc0010064
69 
70 /*
71  * AMD family 0Fh.
72  */
73 #define MSR_0FH_CONTROL		0xc0010041
74 #define MSR_0FH_STATUS		0xc0010042
75 
76 #define MSR_0FH_STATUS_CFID	__BITS( 0,  5)
77 #define MSR_0FH_STATUS_CVID	__BITS(32, 36)
78 #define MSR_0FH_STATUS_PENDING	__BITS(31, 31)
79 
80 #define MSR_0FH_CONTROL_FID	__BITS( 0,  5)
81 #define MSR_0FH_CONTROL_VID	__BITS( 8, 12)
82 #define MSR_0FH_CONTROL_CHG	__BITS(16, 16)
83 #define MSR_0FH_CONTROL_CNT	__BITS(32, 51)
84 
85 #define ACPI_0FH_STATUS_FID	__BITS( 0,  5)
86 #define ACPI_0FH_STATUS_VID	__BITS( 6, 10)
87 
88 #define ACPI_0FH_CONTROL_FID	__BITS( 0,  5)
89 #define ACPI_0FH_CONTROL_VID	__BITS( 6, 10)
90 #define ACPI_0FH_CONTROL_VST	__BITS(11, 17)
91 #define ACPI_0FH_CONTROL_MVS	__BITS(18, 19)
92 #define ACPI_0FH_CONTROL_PLL	__BITS(20, 26)
93 #define ACPI_0FH_CONTROL_RVO	__BITS(28, 29)
94 #define ACPI_0FH_CONTROL_IRT	__BITS(30, 31)
95 
96 #define FID_TO_VCO_FID(fidd)	(((fid) < 8) ? (8 + ((fid) << 1)) : (fid))
97 
98 static char	  native_idle_text[16];
99 void		(*native_idle)(void) = NULL;
100 
101 static int	 acpicpu_md_quirks_piix4(struct pci_attach_args *);
102 static void	 acpicpu_md_pstate_status(void *, void *);
103 static int	 acpicpu_md_pstate_fidvid_get(struct acpicpu_softc *,
104                                               uint32_t *);
105 static int	 acpicpu_md_pstate_fidvid_set(struct acpicpu_pstate *);
106 static int	 acpicpu_md_pstate_fidvid_read(uint32_t *, uint32_t *);
107 static void	 acpicpu_md_pstate_fidvid_write(uint32_t, uint32_t,
108 					        uint32_t, uint32_t);
109 static void	 acpicpu_md_tstate_status(void *, void *);
110 static int	 acpicpu_md_pstate_sysctl_init(void);
111 static int	 acpicpu_md_pstate_sysctl_get(SYSCTLFN_PROTO);
112 static int	 acpicpu_md_pstate_sysctl_set(SYSCTLFN_PROTO);
113 static int	 acpicpu_md_pstate_sysctl_all(SYSCTLFN_PROTO);
114 
115 extern struct acpicpu_softc **acpicpu_sc;
116 static bool acpicpu_pstate_status = false;
117 static struct sysctllog *acpicpu_log = NULL;
118 
119 uint32_t
120 acpicpu_md_cap(void)
121 {
122 	struct cpu_info *ci = curcpu();
123 	uint32_t val = 0;
124 
125 	if (cpu_vendor != CPUVENDOR_IDT &&
126 	    cpu_vendor != CPUVENDOR_INTEL)
127 		return val;
128 
129 	/*
130 	 * Basic SMP C-states (required for _CST).
131 	 */
132 	val |= ACPICPU_PDC_C_C1PT | ACPICPU_PDC_C_C2C3;
133 
134         /*
135 	 * If MONITOR/MWAIT is available, announce
136 	 * support for native instructions in all C-states.
137 	 */
138         if ((ci->ci_feat_val[1] & CPUID2_MONITOR) != 0)
139 		val |= ACPICPU_PDC_C_C1_FFH | ACPICPU_PDC_C_C2C3_FFH;
140 
141 	/*
142 	 * Set native P- and T-states, if available.
143 	 */
144         if ((ci->ci_feat_val[1] & CPUID2_EST) != 0)
145 		val |= ACPICPU_PDC_P_FFH;
146 
147 	if ((ci->ci_feat_val[0] & CPUID_ACPI) != 0)
148 		val |= ACPICPU_PDC_T_FFH;
149 
150 	return val;
151 }
152 
153 uint32_t
154 acpicpu_md_quirks(void)
155 {
156 	struct cpu_info *ci = curcpu();
157 	struct pci_attach_args pa;
158 	uint32_t family, val = 0;
159 	uint32_t regs[4];
160 
161 	if (acpi_md_ncpus() == 1)
162 		val |= ACPICPU_FLAG_C_BM;
163 
164 	if ((ci->ci_feat_val[1] & CPUID2_MONITOR) != 0)
165 		val |= ACPICPU_FLAG_C_FFH;
166 
167 	/*
168 	 * By default, assume that the local APIC timer
169 	 * as well as TSC are stalled during C3 sleep.
170 	 */
171 	val |= ACPICPU_FLAG_C_APIC | ACPICPU_FLAG_C_TSC;
172 
173 	switch (cpu_vendor) {
174 
175 	case CPUVENDOR_IDT:
176 
177 		if ((ci->ci_feat_val[1] & CPUID2_EST) != 0)
178 			val |= ACPICPU_FLAG_P_FFH;
179 
180 		if ((ci->ci_feat_val[0] & CPUID_ACPI) != 0)
181 			val |= ACPICPU_FLAG_T_FFH;
182 
183 		break;
184 
185 	case CPUVENDOR_INTEL:
186 
187 		/*
188 		 * Bus master control and arbitration should be
189 		 * available on all supported Intel CPUs (to be
190 		 * sure, this is double-checked later from the
191 		 * firmware data). These flags imply that it is
192 		 * not necessary to flush caches before C3 state.
193 		 */
194 		val |= ACPICPU_FLAG_C_BM | ACPICPU_FLAG_C_ARB;
195 
196 		/*
197 		 * Check if we can use "native", MSR-based,
198 		 * access. If not, we have to resort to I/O.
199 		 */
200 		if ((ci->ci_feat_val[1] & CPUID2_EST) != 0)
201 			val |= ACPICPU_FLAG_P_FFH;
202 
203 		if ((ci->ci_feat_val[0] & CPUID_ACPI) != 0)
204 			val |= ACPICPU_FLAG_T_FFH;
205 
206 		/*
207 		 * Check whether MSR_APERF, MSR_MPERF, and Turbo
208 		 * Boost are available. Also see if we might have
209 		 * an invariant local APIC timer ("ARAT").
210 		 */
211 		if (cpuid_level >= 0x06) {
212 
213 			x86_cpuid(0x06, regs);
214 
215 			if ((regs[2] & CPUID_DSPM_HWF) != 0)
216 				val |= ACPICPU_FLAG_P_HW;
217 
218 			if ((regs[0] & CPUID_DSPM_IDA) != 0)
219 				val |= ACPICPU_FLAG_P_TURBO;
220 
221 			if ((regs[0] & CPUID_DSPM_ARAT) != 0)
222 				val &= ~ACPICPU_FLAG_C_APIC;
223 		}
224 
225 		/*
226 		 * Detect whether TSC is invariant. If it is not,
227 		 * we keep the flag to note that TSC will not run
228 		 * at constant rate. Depending on the CPU, this may
229 		 * affect P- and T-state changes, but especially
230 		 * relevant are C-states; with variant TSC, states
231 		 * larger than C1 may completely stop the counter.
232 		 */
233 		x86_cpuid(0x80000000, regs);
234 
235 		if (regs[0] >= 0x80000007) {
236 
237 			x86_cpuid(0x80000007, regs);
238 
239 			if ((regs[3] & __BIT(8)) != 0)
240 				val &= ~ACPICPU_FLAG_C_TSC;
241 		}
242 
243 		break;
244 
245 	case CPUVENDOR_AMD:
246 
247 		x86_cpuid(0x80000000, regs);
248 
249 		if (regs[0] < 0x80000007)
250 			break;
251 
252 		x86_cpuid(0x80000007, regs);
253 
254 		family = CPUID2FAMILY(ci->ci_signature);
255 
256 		if (family == 0xf)
257 			family += CPUID2EXTFAMILY(ci->ci_signature);
258 
259     		switch (family) {
260 
261 		case 0x0f:
262 
263 			if ((regs[3] & CPUID_APM_FID) == 0)
264 				break;
265 
266 			if ((regs[3] & CPUID_APM_VID) == 0)
267 				break;
268 
269 			val |= ACPICPU_FLAG_P_FFH | ACPICPU_FLAG_P_FIDVID;
270 			break;
271 
272 		case 0x10:
273 		case 0x11:
274 
275 			if ((regs[3] & CPUID_APM_TSC) != 0)
276 				val &= ~ACPICPU_FLAG_C_TSC;
277 
278 			if ((regs[3] & CPUID_APM_HWP) != 0)
279 				val |= ACPICPU_FLAG_P_FFH;
280 
281 			if ((regs[3] & CPUID_APM_CPB) != 0)
282 				val |= ACPICPU_FLAG_P_TURBO;
283 
284 			val |= ACPICPU_FLAG_C_C1E;
285 			break;
286 		}
287 
288 		break;
289 	}
290 
291 	/*
292 	 * There are several erratums for PIIX4.
293 	 */
294 	if (pci_find_device(&pa, acpicpu_md_quirks_piix4) != 0)
295 		val |= ACPICPU_FLAG_PIIX4;
296 
297 	return val;
298 }
299 
300 static int
301 acpicpu_md_quirks_piix4(struct pci_attach_args *pa)
302 {
303 
304 	/*
305 	 * XXX: The pci_find_device(9) function only
306 	 *	deals with attached devices. Change this
307 	 *	to use something like pci_device_foreach().
308 	 */
309 	if (PCI_VENDOR(pa->pa_id) != PCI_VENDOR_INTEL)
310 		return 0;
311 
312 	if (PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_INTEL_82371AB_ISA ||
313 	    PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_INTEL_82440MX_PMC)
314 		return 1;
315 
316 	return 0;
317 }
318 
319 void
320 acpicpu_md_quirks_c1e(void)
321 {
322 	const uint64_t c1e = MSR_CMPHALT_SMI | MSR_CMPHALT_C1E;
323 	uint64_t val;
324 
325 	val = rdmsr(MSR_CMPHALT);
326 
327 	if ((val & c1e) != 0)
328 		wrmsr(MSR_CMPHALT, val & ~c1e);
329 }
330 
331 int
332 acpicpu_md_idle_start(struct acpicpu_softc *sc)
333 {
334 	const size_t size = sizeof(native_idle_text);
335 	struct acpicpu_cstate *cs;
336 	bool ipi = false;
337 	int i;
338 
339 	x86_cpu_idle_get(&native_idle, native_idle_text, size);
340 
341 	for (i = 0; i < ACPI_C_STATE_COUNT; i++) {
342 
343 		cs = &sc->sc_cstate[i];
344 
345 		if (cs->cs_method == ACPICPU_C_STATE_HALT) {
346 			ipi = true;
347 			break;
348 		}
349 	}
350 
351 	x86_cpu_idle_set(acpicpu_cstate_idle, "acpi", ipi);
352 
353 	return 0;
354 }
355 
356 int
357 acpicpu_md_idle_stop(void)
358 {
359 	uint64_t xc;
360 	bool ipi;
361 
362 	ipi = (native_idle != x86_cpu_idle_halt) ? false : true;
363 	x86_cpu_idle_set(native_idle, native_idle_text, ipi);
364 
365 	/*
366 	 * Run a cross-call to ensure that all CPUs are
367 	 * out from the ACPI idle-loop before detachment.
368 	 */
369 	xc = xc_broadcast(0, (xcfunc_t)nullop, NULL, NULL);
370 	xc_wait(xc);
371 
372 	return 0;
373 }
374 
375 /*
376  * Called with interrupts disabled.
377  * Caller should enable interrupts after return.
378  */
379 void
380 acpicpu_md_idle_enter(int method, int state)
381 {
382 	struct cpu_info *ci = curcpu();
383 
384 	switch (method) {
385 
386 	case ACPICPU_C_STATE_FFH:
387 
388 		x86_enable_intr();
389 		x86_monitor(&ci->ci_want_resched, 0, 0);
390 
391 		if (__predict_false(ci->ci_want_resched != 0))
392 			return;
393 
394 		x86_mwait((state - 1) << 4, 0);
395 		break;
396 
397 	case ACPICPU_C_STATE_HALT:
398 
399 		if (__predict_false(ci->ci_want_resched != 0))
400 			return;
401 
402 		x86_stihlt();
403 		break;
404 	}
405 }
406 
407 int
408 acpicpu_md_pstate_start(void)
409 {
410 	const uint64_t est = __BIT(16);
411 	uint64_t val;
412 
413 	switch (cpu_vendor) {
414 
415 	case CPUVENDOR_IDT:
416 	case CPUVENDOR_INTEL:
417 
418 		val = rdmsr(MSR_MISC_ENABLE);
419 
420 		if ((val & est) == 0) {
421 
422 			val |= est;
423 
424 			wrmsr(MSR_MISC_ENABLE, val);
425 			val = rdmsr(MSR_MISC_ENABLE);
426 
427 			if ((val & est) == 0)
428 				return ENOTTY;
429 		}
430 	}
431 
432 	return acpicpu_md_pstate_sysctl_init();
433 }
434 
435 int
436 acpicpu_md_pstate_stop(void)
437 {
438 
439 	if (acpicpu_log != NULL)
440 		sysctl_teardown(&acpicpu_log);
441 
442 	return 0;
443 }
444 
445 int
446 acpicpu_md_pstate_pss(struct acpicpu_softc *sc)
447 {
448 	struct acpicpu_pstate *ps, msr;
449 	struct cpu_info *ci = curcpu();
450 	uint32_t family, i = 0;
451 
452 	(void)memset(&msr, 0, sizeof(struct acpicpu_pstate));
453 
454 	switch (cpu_vendor) {
455 
456 	case CPUVENDOR_IDT:
457 	case CPUVENDOR_INTEL:
458 
459 		/*
460 		 * If the so-called Turbo Boost is present,
461 		 * the P0-state is always the "turbo state".
462 		 *
463 		 * For discussion, see:
464 		 *
465 		 *	Intel Corporation: Intel Turbo Boost Technology
466 		 *	in Intel Core(tm) Microarchitectures (Nehalem)
467 		 *	Based Processors. White Paper, November 2008.
468 		 */
469 		if ((sc->sc_flags & ACPICPU_FLAG_P_TURBO) != 0)
470 			sc->sc_pstate[0].ps_flags |= ACPICPU_FLAG_P_TURBO;
471 
472 		msr.ps_control_addr = MSR_PERF_CTL;
473 		msr.ps_control_mask = __BITS(0, 15);
474 
475 		msr.ps_status_addr  = MSR_PERF_STATUS;
476 		msr.ps_status_mask  = __BITS(0, 15);
477 		break;
478 
479 	case CPUVENDOR_AMD:
480 
481 		if ((sc->sc_flags & ACPICPU_FLAG_P_FIDVID) != 0)
482 			msr.ps_flags |= ACPICPU_FLAG_P_FIDVID;
483 
484 		family = CPUID2FAMILY(ci->ci_signature);
485 
486 		if (family == 0xf)
487 			family += CPUID2EXTFAMILY(ci->ci_signature);
488 
489 		switch (family) {
490 
491 		case 0x0f:
492 			msr.ps_control_addr = MSR_0FH_CONTROL;
493 			msr.ps_status_addr  = MSR_0FH_STATUS;
494 			break;
495 
496 		case 0x10:
497 		case 0x11:
498 			msr.ps_control_addr = MSR_10H_CONTROL;
499 			msr.ps_control_mask = __BITS(0, 2);
500 
501 			msr.ps_status_addr  = MSR_10H_STATUS;
502 			msr.ps_status_mask  = __BITS(0, 2);
503 			break;
504 
505 		default:
506 
507 			if ((sc->sc_flags & ACPICPU_FLAG_P_XPSS) == 0)
508 				return EOPNOTSUPP;
509 		}
510 
511 		break;
512 
513 	default:
514 		return ENODEV;
515 	}
516 
517 	/*
518 	 * Fill the P-state structures with MSR addresses that are
519 	 * known to be correct. If we do not know the addresses,
520 	 * leave the values intact. If a vendor uses XPSS, we do
521 	 * not necessarily need to do anything to support new CPUs.
522 	 */
523 	while (i < sc->sc_pstate_count) {
524 
525 		ps = &sc->sc_pstate[i];
526 
527 		if (msr.ps_flags != 0)
528 			ps->ps_flags |= msr.ps_flags;
529 
530 		if (msr.ps_status_addr != 0)
531 			ps->ps_status_addr = msr.ps_status_addr;
532 
533 		if (msr.ps_status_mask != 0)
534 			ps->ps_status_mask = msr.ps_status_mask;
535 
536 		if (msr.ps_control_addr != 0)
537 			ps->ps_control_addr = msr.ps_control_addr;
538 
539 		if (msr.ps_control_mask != 0)
540 			ps->ps_control_mask = msr.ps_control_mask;
541 
542 		i++;
543 	}
544 
545 	return 0;
546 }
547 
548 int
549 acpicpu_md_pstate_get(struct acpicpu_softc *sc, uint32_t *freq)
550 {
551 	struct acpicpu_pstate *ps = NULL;
552 	uint64_t val;
553 	uint32_t i;
554 
555 	if ((sc->sc_flags & ACPICPU_FLAG_P_FIDVID) != 0)
556 		return acpicpu_md_pstate_fidvid_get(sc, freq);
557 
558 	for (i = 0; i < sc->sc_pstate_count; i++) {
559 
560 		ps = &sc->sc_pstate[i];
561 
562 		if (__predict_true(ps->ps_freq != 0))
563 			break;
564 	}
565 
566 	if (__predict_false(ps == NULL))
567 		return ENODEV;
568 
569 	if (__predict_false(ps->ps_status_addr == 0))
570 		return EINVAL;
571 
572 	val = rdmsr(ps->ps_status_addr);
573 
574 	if (__predict_true(ps->ps_status_mask != 0))
575 		val = val & ps->ps_status_mask;
576 
577 	for (i = 0; i < sc->sc_pstate_count; i++) {
578 
579 		ps = &sc->sc_pstate[i];
580 
581 		if (__predict_false(ps->ps_freq == 0))
582 			continue;
583 
584 		if (val == ps->ps_status) {
585 			*freq = ps->ps_freq;
586 			return 0;
587 		}
588 	}
589 
590 	return EIO;
591 }
592 
593 int
594 acpicpu_md_pstate_set(struct acpicpu_pstate *ps)
595 {
596 	struct msr_rw_info msr;
597 	uint64_t xc;
598 	int rv = 0;
599 
600 	if (__predict_false(ps->ps_control_addr == 0))
601 		return EINVAL;
602 
603 	if ((ps->ps_flags & ACPICPU_FLAG_P_FIDVID) != 0)
604 		return acpicpu_md_pstate_fidvid_set(ps);
605 
606 	msr.msr_read  = false;
607 	msr.msr_type  = ps->ps_control_addr;
608 	msr.msr_value = ps->ps_control;
609 
610 	if (__predict_true(ps->ps_control_mask != 0)) {
611 		msr.msr_mask = ps->ps_control_mask;
612 		msr.msr_read = true;
613 	}
614 
615 	xc = xc_broadcast(0, (xcfunc_t)x86_msr_xcall, &msr, NULL);
616 	xc_wait(xc);
617 
618 	/*
619 	 * Due several problems, we bypass the
620 	 * relatively expensive status check.
621 	 */
622 	if (acpicpu_pstate_status != true) {
623 		DELAY(ps->ps_latency);
624 		return 0;
625 	}
626 
627 	xc = xc_broadcast(0, (xcfunc_t)acpicpu_md_pstate_status, ps, &rv);
628 	xc_wait(xc);
629 
630 	return rv;
631 }
632 
633 static void
634 acpicpu_md_pstate_status(void *arg1, void *arg2)
635 {
636 	struct acpicpu_pstate *ps = arg1;
637 	uint64_t val;
638 	int i;
639 
640 	for (i = val = 0; i < ACPICPU_P_STATE_RETRY; i++) {
641 
642 		val = rdmsr(ps->ps_status_addr);
643 
644 		if (__predict_true(ps->ps_status_mask != 0))
645 			val = val & ps->ps_status_mask;
646 
647 		if (val == ps->ps_status)
648 			return;
649 
650 		DELAY(ps->ps_latency);
651 	}
652 
653 	*(uintptr_t *)arg2 = EAGAIN;
654 }
655 
656 static int
657 acpicpu_md_pstate_fidvid_get(struct acpicpu_softc *sc, uint32_t *freq)
658 {
659 	struct acpicpu_pstate *ps;
660 	uint32_t fid, i, vid;
661 	uint32_t cfid, cvid;
662 	int rv;
663 
664 	/*
665 	 * AMD family 0Fh needs special treatment.
666 	 * While it wants to use ACPI, it does not
667 	 * comply with the ACPI specifications.
668 	 */
669 	rv = acpicpu_md_pstate_fidvid_read(&cfid, &cvid);
670 
671 	if (rv != 0)
672 		return rv;
673 
674 	for (i = 0; i < sc->sc_pstate_count; i++) {
675 
676 		ps = &sc->sc_pstate[i];
677 
678 		if (__predict_false(ps->ps_freq == 0))
679 			continue;
680 
681 		fid = __SHIFTOUT(ps->ps_status, ACPI_0FH_STATUS_FID);
682 		vid = __SHIFTOUT(ps->ps_status, ACPI_0FH_STATUS_VID);
683 
684 		if (cfid == fid && cvid == vid) {
685 			*freq = ps->ps_freq;
686 			return 0;
687 		}
688 	}
689 
690 	return EIO;
691 }
692 
693 static int
694 acpicpu_md_pstate_fidvid_set(struct acpicpu_pstate *ps)
695 {
696 	const uint64_t ctrl = ps->ps_control;
697 	uint32_t cfid, cvid, fid, i, irt;
698 	uint32_t pll, vco_cfid, vco_fid;
699 	uint32_t val, vid, vst;
700 	int rv;
701 
702 	rv = acpicpu_md_pstate_fidvid_read(&cfid, &cvid);
703 
704 	if (rv != 0)
705 		return rv;
706 
707 	fid = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_FID);
708 	vid = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_VID);
709 	irt = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_IRT);
710 	vst = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_VST);
711 	pll = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_PLL);
712 
713 	vst = vst * 20;
714 	pll = pll * 1000 / 5;
715 	irt = 10 * __BIT(irt);
716 
717 	/*
718 	 * Phase 1.
719 	 */
720 	while (cvid > vid) {
721 
722 		val = 1 << __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_MVS);
723 		val = (val > cvid) ? 0 : cvid - val;
724 
725 		acpicpu_md_pstate_fidvid_write(cfid, val, 1, vst);
726 		rv = acpicpu_md_pstate_fidvid_read(NULL, &cvid);
727 
728 		if (rv != 0)
729 			return rv;
730 	}
731 
732 	i = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_RVO);
733 
734 	for (; i > 0 && cvid > 0; --i) {
735 
736 		acpicpu_md_pstate_fidvid_write(cfid, cvid - 1, 1, vst);
737 		rv = acpicpu_md_pstate_fidvid_read(NULL, &cvid);
738 
739 		if (rv != 0)
740 			return rv;
741 	}
742 
743 	/*
744 	 * Phase 2.
745 	 */
746 	if (cfid != fid) {
747 
748 		vco_fid  = FID_TO_VCO_FID(fid);
749 		vco_cfid = FID_TO_VCO_FID(cfid);
750 
751 		while (abs(vco_fid - vco_cfid) > 2) {
752 
753 			if (fid <= cfid)
754 				val = cfid - 2;
755 			else {
756 				val = (cfid > 6) ? cfid + 2 :
757 				    FID_TO_VCO_FID(cfid) + 2;
758 			}
759 
760 			acpicpu_md_pstate_fidvid_write(val, cvid, pll, irt);
761 			rv = acpicpu_md_pstate_fidvid_read(&cfid, NULL);
762 
763 			if (rv != 0)
764 				return rv;
765 
766 			vco_cfid = FID_TO_VCO_FID(cfid);
767 		}
768 
769 		acpicpu_md_pstate_fidvid_write(fid, cvid, pll, irt);
770 		rv = acpicpu_md_pstate_fidvid_read(&cfid, NULL);
771 
772 		if (rv != 0)
773 			return rv;
774 	}
775 
776 	/*
777 	 * Phase 3.
778 	 */
779 	if (cvid != vid) {
780 
781 		acpicpu_md_pstate_fidvid_write(cfid, vid, 1, vst);
782 		rv = acpicpu_md_pstate_fidvid_read(NULL, &cvid);
783 
784 		if (rv != 0)
785 			return rv;
786 	}
787 
788 	if (cfid != fid || cvid != vid)
789 		return EIO;
790 
791 	return 0;
792 }
793 
794 static int
795 acpicpu_md_pstate_fidvid_read(uint32_t *cfid, uint32_t *cvid)
796 {
797 	int i = ACPICPU_P_STATE_RETRY * 100;
798 	uint64_t val;
799 
800 	do {
801 		val = rdmsr(MSR_0FH_STATUS);
802 
803 	} while (__SHIFTOUT(val, MSR_0FH_STATUS_PENDING) != 0 && --i >= 0);
804 
805 	if (i == 0)
806 		return EAGAIN;
807 
808 	if (cfid != NULL)
809 		*cfid = __SHIFTOUT(val, MSR_0FH_STATUS_CFID);
810 
811 	if (cvid != NULL)
812 		*cvid = __SHIFTOUT(val, MSR_0FH_STATUS_CVID);
813 
814 	return 0;
815 }
816 
817 static void
818 acpicpu_md_pstate_fidvid_write(uint32_t fid,
819     uint32_t vid, uint32_t cnt, uint32_t tmo)
820 {
821 	struct msr_rw_info msr;
822 	uint64_t xc;
823 
824 	msr.msr_read  = false;
825 	msr.msr_type  = MSR_0FH_CONTROL;
826 	msr.msr_value = 0;
827 
828 	msr.msr_value |= __SHIFTIN(fid, MSR_0FH_CONTROL_FID);
829 	msr.msr_value |= __SHIFTIN(vid, MSR_0FH_CONTROL_VID);
830 	msr.msr_value |= __SHIFTIN(cnt, MSR_0FH_CONTROL_CNT);
831 	msr.msr_value |= __SHIFTIN(0x1, MSR_0FH_CONTROL_CHG);
832 
833 	xc = xc_broadcast(0, (xcfunc_t)x86_msr_xcall, &msr, NULL);
834 	xc_wait(xc);
835 
836 	DELAY(tmo);
837 }
838 
839 int
840 acpicpu_md_tstate_get(struct acpicpu_softc *sc, uint32_t *percent)
841 {
842 	struct acpicpu_tstate *ts;
843 	uint64_t val;
844 	uint32_t i;
845 
846 	val = rdmsr(MSR_THERM_CONTROL);
847 
848 	for (i = 0; i < sc->sc_tstate_count; i++) {
849 
850 		ts = &sc->sc_tstate[i];
851 
852 		if (ts->ts_percent == 0)
853 			continue;
854 
855 		if (val == ts->ts_status) {
856 			*percent = ts->ts_percent;
857 			return 0;
858 		}
859 	}
860 
861 	return EIO;
862 }
863 
864 int
865 acpicpu_md_tstate_set(struct acpicpu_tstate *ts)
866 {
867 	struct msr_rw_info msr;
868 	uint64_t xc;
869 	int rv = 0;
870 
871 	msr.msr_read  = true;
872 	msr.msr_type  = MSR_THERM_CONTROL;
873 	msr.msr_value = ts->ts_control;
874 	msr.msr_mask = __BITS(1, 4);
875 
876 	xc = xc_broadcast(0, (xcfunc_t)x86_msr_xcall, &msr, NULL);
877 	xc_wait(xc);
878 
879 	if (ts->ts_status == 0) {
880 		DELAY(ts->ts_latency);
881 		return 0;
882 	}
883 
884 	xc = xc_broadcast(0, (xcfunc_t)acpicpu_md_tstate_status, ts, &rv);
885 	xc_wait(xc);
886 
887 	return rv;
888 }
889 
890 static void
891 acpicpu_md_tstate_status(void *arg1, void *arg2)
892 {
893 	struct acpicpu_tstate *ts = arg1;
894 	uint64_t val;
895 	int i;
896 
897 	for (i = val = 0; i < ACPICPU_T_STATE_RETRY; i++) {
898 
899 		val = rdmsr(MSR_THERM_CONTROL);
900 
901 		if (val == ts->ts_status)
902 			return;
903 
904 		DELAY(ts->ts_latency);
905 	}
906 
907 	*(uintptr_t *)arg2 = EAGAIN;
908 }
909 
910 /*
911  * A kludge for backwards compatibility.
912  */
913 static int
914 acpicpu_md_pstate_sysctl_init(void)
915 {
916 	const struct sysctlnode	*fnode, *mnode, *rnode;
917 	const char *str;
918 	int rv;
919 
920 	switch (cpu_vendor) {
921 
922 	case CPUVENDOR_IDT:
923 	case CPUVENDOR_INTEL:
924 		str = "est";
925 		break;
926 
927 	case CPUVENDOR_AMD:
928 		str = "powernow";
929 		break;
930 
931 	default:
932 		return ENODEV;
933 	}
934 
935 
936 	rv = sysctl_createv(&acpicpu_log, 0, NULL, &rnode,
937 	    CTLFLAG_PERMANENT, CTLTYPE_NODE, "machdep", NULL,
938 	    NULL, 0, NULL, 0, CTL_MACHDEP, CTL_EOL);
939 
940 	if (rv != 0)
941 		goto fail;
942 
943 	rv = sysctl_createv(&acpicpu_log, 0, &rnode, &mnode,
944 	    0, CTLTYPE_NODE, str, NULL,
945 	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
946 
947 	if (rv != 0)
948 		goto fail;
949 
950 	rv = sysctl_createv(&acpicpu_log, 0, &mnode, &fnode,
951 	    0, CTLTYPE_NODE, "frequency", NULL,
952 	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
953 
954 	if (rv != 0)
955 		goto fail;
956 
957 	rv = sysctl_createv(&acpicpu_log, 0, &fnode, &rnode,
958 	    CTLFLAG_READWRITE, CTLTYPE_INT, "target", NULL,
959 	    acpicpu_md_pstate_sysctl_set, 0, NULL, 0, CTL_CREATE, CTL_EOL);
960 
961 	if (rv != 0)
962 		goto fail;
963 
964 	rv = sysctl_createv(&acpicpu_log, 0, &fnode, &rnode,
965 	    CTLFLAG_READONLY, CTLTYPE_INT, "current", NULL,
966 	    acpicpu_md_pstate_sysctl_get, 0, NULL, 0, CTL_CREATE, CTL_EOL);
967 
968 	if (rv != 0)
969 		goto fail;
970 
971 	rv = sysctl_createv(&acpicpu_log, 0, &fnode, &rnode,
972 	    CTLFLAG_READONLY, CTLTYPE_STRING, "available", NULL,
973 	    acpicpu_md_pstate_sysctl_all, 0, NULL, 0, CTL_CREATE, CTL_EOL);
974 
975 	if (rv != 0)
976 		goto fail;
977 
978 	return 0;
979 
980 fail:
981 	if (acpicpu_log != NULL) {
982 		sysctl_teardown(&acpicpu_log);
983 		acpicpu_log = NULL;
984 	}
985 
986 	return rv;
987 }
988 
989 static int
990 acpicpu_md_pstate_sysctl_get(SYSCTLFN_ARGS)
991 {
992 	struct cpu_info *ci = curcpu();
993 	struct acpicpu_softc *sc;
994 	struct sysctlnode node;
995 	uint32_t freq;
996 	int err;
997 
998 	sc = acpicpu_sc[ci->ci_acpiid];
999 
1000 	if (sc == NULL)
1001 		return ENXIO;
1002 
1003 	err = acpicpu_pstate_get(sc, &freq);
1004 
1005 	if (err != 0)
1006 		return err;
1007 
1008 	node = *rnode;
1009 	node.sysctl_data = &freq;
1010 
1011 	err = sysctl_lookup(SYSCTLFN_CALL(&node));
1012 
1013 	if (err != 0 || newp == NULL)
1014 		return err;
1015 
1016 	return 0;
1017 }
1018 
1019 static int
1020 acpicpu_md_pstate_sysctl_set(SYSCTLFN_ARGS)
1021 {
1022 	struct cpu_info *ci = curcpu();
1023 	struct acpicpu_softc *sc;
1024 	struct sysctlnode node;
1025 	uint32_t freq;
1026 	int err;
1027 
1028 	sc = acpicpu_sc[ci->ci_acpiid];
1029 
1030 	if (sc == NULL)
1031 		return ENXIO;
1032 
1033 	err = acpicpu_pstate_get(sc, &freq);
1034 
1035 	if (err != 0)
1036 		return err;
1037 
1038 	node = *rnode;
1039 	node.sysctl_data = &freq;
1040 
1041 	err = sysctl_lookup(SYSCTLFN_CALL(&node));
1042 
1043 	if (err != 0 || newp == NULL)
1044 		return err;
1045 
1046 	err = acpicpu_pstate_set(sc, freq);
1047 
1048 	if (err != 0)
1049 		return err;
1050 
1051 	return 0;
1052 }
1053 
1054 static int
1055 acpicpu_md_pstate_sysctl_all(SYSCTLFN_ARGS)
1056 {
1057 	struct cpu_info *ci = curcpu();
1058 	struct acpicpu_softc *sc;
1059 	struct sysctlnode node;
1060 	char buf[1024];
1061 	size_t len;
1062 	uint32_t i;
1063 	int err;
1064 
1065 	sc = acpicpu_sc[ci->ci_acpiid];
1066 
1067 	if (sc == NULL)
1068 		return ENXIO;
1069 
1070 	(void)memset(&buf, 0, sizeof(buf));
1071 
1072 	mutex_enter(&sc->sc_mtx);
1073 
1074 	for (len = 0, i = sc->sc_pstate_max; i < sc->sc_pstate_count; i++) {
1075 
1076 		if (sc->sc_pstate[i].ps_freq == 0)
1077 			continue;
1078 
1079 		len += snprintf(buf + len, sizeof(buf) - len, "%u%s",
1080 		    sc->sc_pstate[i].ps_freq,
1081 		    i < (sc->sc_pstate_count - 1) ? " " : "");
1082 	}
1083 
1084 	mutex_exit(&sc->sc_mtx);
1085 
1086 	node = *rnode;
1087 	node.sysctl_data = buf;
1088 
1089 	err = sysctl_lookup(SYSCTLFN_CALL(&node));
1090 
1091 	if (err != 0 || newp == NULL)
1092 		return err;
1093 
1094 	return 0;
1095 }
1096 
1097