xref: /openbsd-src/sys/dev/acpi/acpicpu.c (revision f2da64fbbbf1b03f09f390ab01267c93dfd77c4c)
1 /* $OpenBSD: acpicpu.c,v 1.78 2016/09/18 23:56:45 guenther Exp $ */
2 /*
3  * Copyright (c) 2005 Marco Peereboom <marco@openbsd.org>
4  * Copyright (c) 2015 Philip Guenther <guenther@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 #include <sys/kernel.h>		/* for tick */
21 #include <sys/signalvar.h>
22 #include <sys/sysctl.h>
23 #include <sys/systm.h>
24 #include <sys/device.h>
25 #include <sys/malloc.h>
26 #include <sys/queue.h>
27 #include <sys/atomic.h>
28 
29 #include <machine/bus.h>
30 #include <machine/cpu.h>
31 #include <machine/cpufunc.h>
32 #include <machine/specialreg.h>
33 
34 #include <dev/acpi/acpireg.h>
35 #include <dev/acpi/acpivar.h>
36 #include <dev/acpi/acpidev.h>
37 #include <dev/acpi/amltypes.h>
38 #include <dev/acpi/dsdt.h>
39 
40 #include <sys/sensors.h>
41 
42 int	acpicpu_match(struct device *, void *, void *);
43 void	acpicpu_attach(struct device *, struct device *, void *);
44 int	acpicpu_notify(struct aml_node *, int, void *);
45 void	acpicpu_setperf(int);
46 void	acpicpu_setperf_ppc_change(struct acpicpu_pss *, int);
47 
48 #define ACPI_STATE_C0		0x00
49 #define ACPI_STATE_C1		0x01
50 #define ACPI_STATE_C2		0x02
51 #define ACPI_STATE_C3		0x03
52 
53 #define ACPI_PDC_REVID		0x1
54 #define ACPI_PDC_SMP		0xa
55 #define ACPI_PDC_MSR		0x1
56 
57 /* _PDC/_OSC Intel capabilities flags */
58 #define ACPI_PDC_P_FFH		0x0001
59 #define ACPI_PDC_C_C1_HALT	0x0002
60 #define ACPI_PDC_T_FFH		0x0004
61 #define ACPI_PDC_SMP_C1PT	0x0008
62 #define ACPI_PDC_SMP_C2C3	0x0010
63 #define ACPI_PDC_SMP_P_SWCOORD	0x0020
64 #define ACPI_PDC_SMP_C_SWCOORD	0x0040
65 #define ACPI_PDC_SMP_T_SWCOORD	0x0080
66 #define ACPI_PDC_C_C1_FFH	0x0100
67 #define ACPI_PDC_C_C2C3_FFH	0x0200
68 /* reserved			0x0400 */
69 #define ACPI_PDC_P_HWCOORD	0x0800
70 #define ACPI_PDC_PPC_NOTIFY	0x1000
71 
72 #define CST_METH_HALT		0
73 #define CST_METH_IO_HALT	1
74 #define CST_METH_MWAIT		2
75 #define CST_METH_GAS_IO		3
76 
77 /* flags on Intel's FFH mwait method */
78 #define CST_FLAG_MWAIT_HW_COORD		0x1
79 #define CST_FLAG_MWAIT_BM_AVOIDANCE	0x2
80 #define CST_FLAG_FALLBACK		0x4000	/* fallback for broken _CST */
81 #define CST_FLAG_SKIP			0x8000	/* state is worse choice */
82 
83 #define FLAGS_MWAIT_ONLY	0x02
84 #define FLAGS_BMCHECK		0x04
85 #define FLAGS_NOTHROTTLE	0x08
86 #define FLAGS_NOPSS		0x10
87 #define FLAGS_NOPCT		0x20
88 
89 #define CPU_THT_EN		(1L << 4)
90 #define CPU_MAXSTATE(sc)	(1L << (sc)->sc_duty_wid)
91 #define CPU_STATE(sc,pct)	((pct * CPU_MAXSTATE(sc) / 100) << (sc)->sc_duty_off)
92 #define CPU_STATEMASK(sc)	((CPU_MAXSTATE(sc) - 1) << (sc)->sc_duty_off)
93 
94 #define ACPI_MAX_C2_LATENCY	100
95 #define ACPI_MAX_C3_LATENCY	1000
96 
97 #define CSD_COORD_SW_ALL	0xFC
98 #define CSD_COORD_SW_ANY	0xFD
99 #define CSD_COORD_HW_ALL	0xFE
100 
101 /* Make sure throttling bits are valid,a=addr,o=offset,w=width */
102 #define valid_throttle(o,w,a)	(a && w && (o+w)<=31 && (o>4 || (o+w)<=4))
103 
104 struct acpi_cstate
105 {
106 	SLIST_ENTRY(acpi_cstate) link;
107 
108 	u_short		state;
109 	short		method;		/* CST_METH_* */
110 	u_short		flags;		/* CST_FLAG_* */
111 	u_short		latency;
112 	int		power;
113 	u_int64_t	address;	/* or mwait hint */
114 };
115 
116 unsigned long cst_stats[4] = { 0 };
117 
118 struct acpicpu_softc {
119 	struct device		sc_dev;
120 	int			sc_cpu;
121 
122 	int			sc_duty_wid;
123 	int			sc_duty_off;
124 	u_int32_t		sc_pblk_addr;
125 	int			sc_pblk_len;
126 	int			sc_flags;
127 	unsigned long		sc_prev_sleep;
128 	unsigned long		sc_last_itime;
129 
130 	struct cpu_info		*sc_ci;
131 	SLIST_HEAD(,acpi_cstate) sc_cstates;
132 
133 	bus_space_tag_t		sc_iot;
134 	bus_space_handle_t	sc_ioh;
135 
136 	struct acpi_softc	*sc_acpi;
137 	struct aml_node		*sc_devnode;
138 
139 	int			sc_pss_len;
140 	int			sc_ppc;
141 	int			sc_level;
142 	struct acpicpu_pss	*sc_pss;
143 
144 	struct acpicpu_pct	sc_pct;
145 	/* save compensation for pct access for lying bios' */
146 	u_int32_t		sc_pct_stat_as;
147 	u_int32_t		sc_pct_ctrl_as;
148 	u_int32_t		sc_pct_stat_len;
149 	u_int32_t		sc_pct_ctrl_len;
150 	/*
151 	 * XXX: _PPC Change listener
152 	 * PPC changes can occur when for example a machine is disconnected
153 	 * from AC power and can no loger support the highest frequency or
154 	 * voltage when driven from the battery.
155 	 * Should probably be reimplemented as a list for now we assume only
156 	 * one listener
157 	 */
158 	void			(*sc_notify)(struct acpicpu_pss *, int);
159 };
160 
161 void	acpicpu_add_cstatepkg(struct aml_value *, void *);
162 void	acpicpu_add_cdeppkg(struct aml_value *, void *);
163 int	acpicpu_getppc(struct acpicpu_softc *);
164 int	acpicpu_getpct(struct acpicpu_softc *);
165 int	acpicpu_getpss(struct acpicpu_softc *);
166 int	acpicpu_getcst(struct acpicpu_softc *);
167 void	acpicpu_getcst_from_fadt(struct acpicpu_softc *);
168 void	acpicpu_print_one_cst(struct acpi_cstate *_cx);
169 void	acpicpu_print_cst(struct acpicpu_softc *_sc);
170 void	acpicpu_add_cstate(struct acpicpu_softc *_sc, int _state, int _method,
171 	    int _flags, int _latency, int _power, u_int64_t _address);
172 void	acpicpu_set_pdc(struct acpicpu_softc *);
173 void	acpicpu_idle(void);
174 
175 #if 0
176 void    acpicpu_set_throttle(struct acpicpu_softc *, int);
177 struct acpi_cstate *acpicpu_find_cstate(struct acpicpu_softc *, int);
178 #endif
179 
180 struct cfattach acpicpu_ca = {
181 	sizeof(struct acpicpu_softc), acpicpu_match, acpicpu_attach
182 };
183 
184 struct cfdriver acpicpu_cd = {
185 	NULL, "acpicpu", DV_DULL
186 };
187 
188 extern int setperf_prio;
189 
190 struct acpicpu_softc *acpicpu_sc[MAXCPUS];
191 
192 #if 0
193 void
194 acpicpu_set_throttle(struct acpicpu_softc *sc, int level)
195 {
196 	uint32_t pbval;
197 
198 	if (sc->sc_flags & FLAGS_NOTHROTTLE)
199 		return;
200 
201 	/* Disable throttling control */
202 	pbval = inl(sc->sc_pblk_addr);
203 	outl(sc->sc_pblk_addr, pbval & ~CPU_THT_EN);
204 	if (level < 100) {
205 		pbval &= ~CPU_STATEMASK(sc);
206 		pbval |= CPU_STATE(sc, level);
207 		outl(sc->sc_pblk_addr, pbval & ~CPU_THT_EN);
208 		outl(sc->sc_pblk_addr, pbval | CPU_THT_EN);
209 	}
210 }
211 
212 struct acpi_cstate *
213 acpicpu_find_cstate(struct acpicpu_softc *sc, int state)
214 {
215 	struct acpi_cstate	*cx;
216 
217 	SLIST_FOREACH(cx, &sc->sc_cstates, link)
218 		if (cx->state == state)
219 			return cx;
220 	return (NULL);
221 }
222 #endif
223 
224 
225 void
226 acpicpu_set_pdc(struct acpicpu_softc *sc)
227 {
228 	struct aml_value cmd, osc_cmd[4];
229 	struct aml_value res;
230 	uint32_t cap;
231 	uint32_t buf[3];
232 
233 	/* 4077A616-290C-47BE-9EBD-D87058713953 */
234 	static uint8_t cpu_oscuuid[16] = { 0x16, 0xA6, 0x77, 0x40, 0x0C, 0x29,
235 					   0xBE, 0x47, 0x9E, 0xBD, 0xD8, 0x70,
236 					   0x58, 0x71, 0x39, 0x53 };
237 	cap = ACPI_PDC_C_C1_HALT | ACPI_PDC_P_FFH | ACPI_PDC_C_C1_FFH
238 	    | ACPI_PDC_C_C2C3_FFH | ACPI_PDC_SMP_P_SWCOORD | ACPI_PDC_SMP_C2C3
239 	    | ACPI_PDC_SMP_C1PT;
240 
241 	if (aml_searchname(sc->sc_devnode, "_OSC")) {
242 		/* Query _OSC */
243 		memset(&osc_cmd, 0, sizeof(cmd) * 4);
244 		osc_cmd[0].type = AML_OBJTYPE_BUFFER;
245 		osc_cmd[0].v_buffer = (uint8_t *)&cpu_oscuuid;
246 		osc_cmd[0].length = sizeof(cpu_oscuuid);
247 
248 		osc_cmd[1].type = AML_OBJTYPE_INTEGER;
249 		osc_cmd[1].v_integer = 1;
250 		osc_cmd[1].length = 1;
251 
252 		osc_cmd[2].type = AML_OBJTYPE_INTEGER;
253 		osc_cmd[2].v_integer = 2;
254 		osc_cmd[2].length = 1;
255 
256 		buf[0] = 1;
257 		buf[1] = cap;
258 		osc_cmd[3].type = AML_OBJTYPE_BUFFER;
259 		osc_cmd[3].v_buffer = (int8_t *)&buf;
260 		osc_cmd[3].length = sizeof(buf);
261 
262 		aml_evalname(sc->sc_acpi, sc->sc_devnode, "_OSC",
263 		    4, osc_cmd, &res);
264 
265 		if (res.type != AML_OBJTYPE_BUFFER || res.length < 8) {
266 			printf(": unable to query capabilities\n");
267 			aml_freevalue(&res);
268 			return;
269 		}
270 
271 		/* Evaluate _OSC */
272 		memset(&osc_cmd, 0, sizeof(cmd) * 4);
273 		osc_cmd[0].type = AML_OBJTYPE_BUFFER;
274 		osc_cmd[0].v_buffer = (uint8_t *)&cpu_oscuuid;
275 		osc_cmd[0].length = sizeof(cpu_oscuuid);
276 
277 		osc_cmd[1].type = AML_OBJTYPE_INTEGER;
278 		osc_cmd[1].v_integer = 1;
279 		osc_cmd[1].length = 1;
280 
281 		osc_cmd[2].type = AML_OBJTYPE_INTEGER;
282 		osc_cmd[2].v_integer = 2;
283 		osc_cmd[2].length = 1;
284 
285 		buf[0] = 0;
286 		buf[1] = (*(uint32_t *)&res.v_buffer[4]) & cap;
287 		osc_cmd[3].type = AML_OBJTYPE_BUFFER;
288 		osc_cmd[3].v_buffer = (int8_t *)&buf;
289 		osc_cmd[3].length = sizeof(buf);
290 
291 		aml_freevalue(&res);
292 
293 		aml_evalname(sc->sc_acpi, sc->sc_devnode, "_OSC",
294 		    4, osc_cmd, NULL);
295 	} else {
296 		/* Evaluate _PDC */
297 		memset(&cmd, 0, sizeof(cmd));
298 		cmd.type = AML_OBJTYPE_BUFFER;
299 		cmd.v_buffer = (uint8_t *)&buf;
300 		cmd.length = sizeof(buf);
301 
302 		buf[0] = ACPI_PDC_REVID;
303 		buf[1] = 1;
304 		buf[2] = cap;
305 
306 		aml_evalname(sc->sc_acpi, sc->sc_devnode, "_PDC",
307 		    1, &cmd, NULL);
308 	}
309 }
310 
311 /*
312  * sanity check mwait hints against what cpuid told us
313  */
314 static int
315 check_mwait_hints(int state, int hints)
316 {
317 	int cstate;
318 	int substate;
319 	int num_substates;
320 
321 	if (cpu_mwait_size == 0)
322 		return (0);
323 	cstate = ((hints >> 4) & 0xf) + 1;
324 	if (cstate == 16)
325 		cstate = 0;
326 	else if (cstate > 7) {
327 		/* out of range of test against CPUID; just trust'em */
328 		return (1);
329 	}
330 	substate = hints & 0xf;
331 	num_substates = (cpu_mwait_states >> (4 * cstate)) & 0xf;
332 	if (substate >= num_substates) {
333 		printf("\nC%d: state %d: substate %d >= num %d",
334 		    state, cstate, substate, num_substates);
335 		return (0);
336 	}
337 	return (1);
338 }
339 
340 void
341 acpicpu_add_cstate(struct acpicpu_softc *sc, int state, int method,
342     int flags, int latency, int power, u_int64_t address)
343 {
344 	struct acpi_cstate	*cx;
345 
346 	dnprintf(10," C%d: latency:.%4x power:%.4x addr:%.16llx\n",
347 	    state, latency, power, address);
348 
349 	/* add a new state, or overwrite the fallback C1 state? */
350 	if (state != ACPI_STATE_C1 ||
351 	    (cx = SLIST_FIRST(&sc->sc_cstates)) == NULL ||
352 	    (cx->flags & CST_FLAG_FALLBACK) == 0) {
353 		cx = malloc(sizeof(*cx), M_DEVBUF, M_WAITOK);
354 		SLIST_INSERT_HEAD(&sc->sc_cstates, cx, link);
355 	}
356 
357 	cx->state = state;
358 	cx->method = method;
359 	cx->flags = flags;
360 	cx->latency = latency;
361 	cx->power = power;
362 	cx->address = address;
363 }
364 
365 /* Found a _CST object, add new cstate for each entry */
366 void
367 acpicpu_add_cstatepkg(struct aml_value *val, void *arg)
368 {
369 	struct acpicpu_softc	*sc = arg;
370 	u_int64_t addr;
371 	struct acpi_grd *grd;
372 	int state, method, flags;
373 
374 #if defined(ACPI_DEBUG) && !defined(SMALL_KERNEL)
375 	aml_showvalue(val);
376 #endif
377 	if (val->type != AML_OBJTYPE_PACKAGE || val->length != 4)
378 		return;
379 
380 	/* range and sanity checks */
381 	state = val->v_package[1]->v_integer;
382 	if (state < 0 || state > 4)
383 		return;
384 	if (val->v_package[0]->type != AML_OBJTYPE_BUFFER) {
385 		printf("\nC%d: unexpected ACPI object type %d",
386 		    state, val->v_package[0]->type);
387 		return;
388 	}
389 	grd = (struct acpi_grd *)val->v_package[0]->v_buffer;
390 	if (val->v_package[0]->length != sizeof(*grd) + 2 ||
391 	    grd->grd_descriptor != LR_GENREGISTER ||
392 	    grd->grd_length != sizeof(grd->grd_gas) ||
393 	    val->v_package[0]->v_buffer[sizeof(*grd)] != SRT_ENDTAG) {
394 		printf("\nC%d: bogo buffer", state);
395 		return;
396 	}
397 
398 	flags = 0;
399 	switch (grd->grd_gas.address_space_id) {
400 	case GAS_FUNCTIONAL_FIXED:
401 		if (grd->grd_gas.register_bit_width == 0) {
402 			method = CST_METH_HALT;
403 			addr = 0;
404 		} else if (grd->grd_gas.register_bit_width == 1 ||
405 		           grd->grd_gas.register_bit_width == 8) {
406 			/*
407 			 * vendor 1 == Intel
408 			 * vendor 8 == "AML author used the bitwidth"
409 			 */
410 			switch (grd->grd_gas.register_bit_offset) {
411 			case 0x1:
412 				method = CST_METH_IO_HALT;
413 				addr = grd->grd_gas.address;
414 
415 				/* i386 and amd64 I/O space is 16bits */
416 				if (addr > 0xffff) {
417 					printf("\nC%d: bogo I/O addr %llx",
418 					    state, addr);
419 					return;
420 				}
421 				break;
422 			case 0x2:
423 				addr = grd->grd_gas.address;
424 				if (!check_mwait_hints(state, addr))
425 					return;
426 				method = CST_METH_MWAIT;
427 				flags = grd->grd_gas.access_size;
428 				break;
429 			default:
430 				printf("\nC%d: unknown FFH class %d",
431 				    state, grd->grd_gas.register_bit_offset);
432 				return;
433 			}
434 		} else {
435 			printf("\nC%d: unknown FFH vendor %d",
436 			    state, grd->grd_gas.register_bit_width);
437 			return;
438 		}
439 		break;
440 
441 	case GAS_SYSTEM_IOSPACE:
442 		addr = grd->grd_gas.address;
443 		if (grd->grd_gas.register_bit_width != 8 ||
444 		    grd->grd_gas.register_bit_offset != 0) {
445 			printf("\nC%d: unhandled %s spec: %d/%d", state,
446 			    "I/O", grd->grd_gas.register_bit_width,
447 			    grd->grd_gas.register_bit_offset);
448 			return;
449 		}
450 		method = CST_METH_GAS_IO;
451 		break;
452 
453 	default:
454 		/* dump the GAS for analysis */
455 		{
456 			int i;
457 			printf("\nC%d: unhandled GAS:", state);
458 			for (i = 0; i < sizeof(grd->grd_gas); i++)
459 				printf(" %#x", ((u_char *)&grd->grd_gas)[i]);
460 
461 		}
462 		return;
463 	}
464 
465 	acpicpu_add_cstate(sc, state, method, flags,
466 	    val->v_package[2]->v_integer, val->v_package[3]->v_integer, addr);
467 }
468 
469 
470 /* Found a _CSD object, print the dependency  */
471 void
472 acpicpu_add_cdeppkg(struct aml_value *val, void *arg)
473 {
474 	int64_t	num_proc, coord_type, domain, cindex;
475 
476 	/*
477 	 * errors: unexpected object type, bad length, mismatched length,
478 	 * and bad CSD revision
479 	 */
480 	if (val->type != AML_OBJTYPE_PACKAGE || val->length < 6 ||
481 	    val->length != val->v_package[0]->v_integer ||
482 	    val->v_package[1]->v_integer != 0) {
483 #if 1 || defined(ACPI_DEBUG) && !defined(SMALL_KERNEL)
484 		aml_showvalue(val);
485 #endif
486 		printf("bogus CSD\n");
487 		return;
488 	}
489 
490 	/* coordinating 'among' one CPU is trivial, ignore */
491 	num_proc = val->v_package[4]->v_integer;
492 	if (num_proc == 1)
493 		return;
494 
495 	/* we practically assume the hardware will coordinate, so ignore */
496 	coord_type = val->v_package[3]->v_integer;
497 	if (coord_type == CSD_COORD_HW_ALL)
498 		return;
499 
500 	domain = val->v_package[2]->v_integer;
501 	cindex = val->v_package[5]->v_integer;
502 	printf("\nCSD c=%#llx d=%lld n=%lld i=%lli\n",
503 	    coord_type, domain, num_proc, cindex);
504 }
505 
506 int
507 acpicpu_getcst(struct acpicpu_softc *sc)
508 {
509 	struct aml_value	res;
510 	struct acpi_cstate	*cx, *next_cx;
511 	int			use_nonmwait;
512 
513 	/* delete the existing list */
514 	while ((cx = SLIST_FIRST(&sc->sc_cstates)) != NULL) {
515 		SLIST_REMOVE_HEAD(&sc->sc_cstates, link);
516 		free(cx, M_DEVBUF, sizeof(*cx));
517 	}
518 
519 	/* provide a fallback C1-via-halt in case _CST's C1 is bogus */
520 	acpicpu_add_cstate(sc, ACPI_STATE_C1, CST_METH_HALT,
521 	    CST_FLAG_FALLBACK, 1, -1, 0);
522 
523 	if (aml_evalname(sc->sc_acpi, sc->sc_devnode, "_CST", 0, NULL, &res))
524 		return (1);
525 
526 	aml_foreachpkg(&res, 1, acpicpu_add_cstatepkg, sc);
527 	aml_freevalue(&res);
528 
529 	/* only have fallback state?  then no _CST objects were understood */
530 	cx = SLIST_FIRST(&sc->sc_cstates);
531 	if (cx->flags & CST_FLAG_FALLBACK)
532 		return (1);
533 
534 	/*
535 	 * Skip states >= C2 if the CPU's LAPIC timer stops in deep
536 	 * states (i.e., it doesn't have the 'ARAT' bit set).
537 	 * Also keep track if all the states we'll use use mwait.
538 	 */
539 	use_nonmwait = 0;
540 	while ((next_cx = SLIST_NEXT(cx, link)) != NULL) {
541 		if (cx->state > 1 &&
542 		    (sc->sc_ci->ci_feature_tpmflags & TPM_ARAT) == 0)
543 			cx->flags |= CST_FLAG_SKIP;
544 		else if (cx->method != CST_METH_MWAIT)
545 			use_nonmwait = 1;
546 		cx = next_cx;
547 	}
548 	if (use_nonmwait)
549 		sc->sc_flags &= ~FLAGS_MWAIT_ONLY;
550 	else
551 		sc->sc_flags |= FLAGS_MWAIT_ONLY;
552 
553 	if (!aml_evalname(sc->sc_acpi, sc->sc_devnode, "_CSD", 0, NULL, &res)) {
554 		aml_foreachpkg(&res, 1, acpicpu_add_cdeppkg, sc);
555 		aml_freevalue(&res);
556 	}
557 
558 	return (0);
559 }
560 
561 /*
562  * old-style fixed C-state info in the FADT.
563  * Note that this has extra restrictions on values and flags.
564  */
565 void
566 acpicpu_getcst_from_fadt(struct acpicpu_softc *sc)
567 {
568 	struct acpi_fadt	*fadt = sc->sc_acpi->sc_fadt;
569 	int flags;
570 
571 	/* FADT has to set flag to do C2 and higher on MP */
572 	if ((fadt->flags & FADT_P_LVL2_UP) == 0 && ncpus > 1)
573 		return;
574 
575 	/* skip these C2 and C3 states if the CPU doesn't have ARAT */
576 	flags = (sc->sc_ci->ci_feature_tpmflags & TPM_ARAT)
577 	    ? 0 : CST_FLAG_SKIP;
578 
579 	/* Some systems don't export a full PBLK; reduce functionality */
580 	if (sc->sc_pblk_len >= 5 && fadt->p_lvl2_lat <= ACPI_MAX_C2_LATENCY) {
581 		acpicpu_add_cstate(sc, ACPI_STATE_C2, CST_METH_GAS_IO, flags,
582 		    fadt->p_lvl2_lat, -1, sc->sc_pblk_addr + 4);
583 	}
584 	if (sc->sc_pblk_len >= 6 && fadt->p_lvl3_lat <= ACPI_MAX_C3_LATENCY)
585 		acpicpu_add_cstate(sc, ACPI_STATE_C3, CST_METH_GAS_IO, flags,
586 		    fadt->p_lvl3_lat, -1, sc->sc_pblk_addr + 5);
587 }
588 
589 
590 void
591 acpicpu_print_one_cst(struct acpi_cstate *cx)
592 {
593 	const char *meth = "";
594 	int show_addr = 0;
595 
596 	switch (cx->method) {
597 	case CST_METH_IO_HALT:
598 		show_addr = 1;
599 		/* fallthrough */
600 	case CST_METH_HALT:
601 		meth = " halt";
602 		break;
603 
604 	case CST_METH_MWAIT:
605 		meth = " mwait";
606 		show_addr = cx->address != 0;
607 		break;
608 
609 	case CST_METH_GAS_IO:
610 		meth = " io";
611 		show_addr = 1;
612 		break;
613 
614 	}
615 
616 	printf(" %sC%d(", (cx->flags & CST_FLAG_SKIP ? "!" : ""), cx->state);
617 	if (cx->power != -1)
618 		printf("%d", cx->power);
619 	printf("@%d%s", cx->latency, meth);
620 	if (cx->flags & ~CST_FLAG_SKIP) {
621 		if (cx->flags & CST_FLAG_FALLBACK)
622 			printf("!");
623 		else
624 			printf(".%x", (cx->flags & ~CST_FLAG_SKIP));
625 	}
626 	if (show_addr)
627 		printf("@0x%llx", cx->address);
628 	printf(")");
629 }
630 
631 void
632 acpicpu_print_cst(struct acpicpu_softc *sc)
633 {
634 	struct acpi_cstate	*cx;
635 	int i;
636 
637 	if (!SLIST_EMPTY(&sc->sc_cstates)) {
638 		printf(":");
639 
640 		i = 0;
641 		SLIST_FOREACH(cx, &sc->sc_cstates, link) {
642 			if (i++)
643 				printf(",");
644 			acpicpu_print_one_cst(cx);
645 		}
646 	}
647 }
648 
649 
650 int
651 acpicpu_match(struct device *parent, void *match, void *aux)
652 {
653 	struct acpi_attach_args	*aa = aux;
654 	struct cfdata		*cf = match;
655 
656 	/* sanity */
657 	if (aa->aaa_name == NULL ||
658 	    strcmp(aa->aaa_name, cf->cf_driver->cd_name) != 0 ||
659 	    aa->aaa_table != NULL)
660 		return (0);
661 
662 	return (1);
663 }
664 
665 void
666 acpicpu_attach(struct device *parent, struct device *self, void *aux)
667 {
668 	struct acpicpu_softc	*sc = (struct acpicpu_softc *)self;
669 	struct acpi_attach_args *aa = aux;
670 	struct aml_value	res;
671 	int			i;
672 	u_int32_t		status = 0;
673 	CPU_INFO_ITERATOR	cii;
674 	struct cpu_info		*ci;
675 
676 	sc->sc_acpi = (struct acpi_softc *)parent;
677 	sc->sc_devnode = aa->aaa_node;
678 	acpicpu_sc[sc->sc_dev.dv_unit] = sc;
679 
680 	SLIST_INIT(&sc->sc_cstates);
681 
682 	sc->sc_pss = NULL;
683 
684 	if (aml_evalnode(sc->sc_acpi, sc->sc_devnode, 0, NULL, &res) == 0) {
685 		if (res.type == AML_OBJTYPE_PROCESSOR) {
686 			sc->sc_cpu = res.v_processor.proc_id;
687 			sc->sc_pblk_addr = res.v_processor.proc_addr;
688 			sc->sc_pblk_len = res.v_processor.proc_len;
689 		}
690 		aml_freevalue(&res);
691 	}
692 	sc->sc_duty_off = sc->sc_acpi->sc_fadt->duty_offset;
693 	sc->sc_duty_wid = sc->sc_acpi->sc_fadt->duty_width;
694 
695 	/* link in the matching cpu_info */
696 	CPU_INFO_FOREACH(cii, ci)
697 		if (ci->ci_acpi_proc_id == sc->sc_cpu) {
698 			ci->ci_acpicpudev = self;
699 			sc->sc_ci = ci;
700 			break;
701 		}
702 	if (ci == NULL) {
703 		printf(": no cpu matching ACPI ID %d\n", sc->sc_cpu);
704 		return;
705 	}
706 
707 	sc->sc_prev_sleep = 1000000;
708 
709 	acpicpu_set_pdc(sc);
710 
711 	if (!valid_throttle(sc->sc_duty_off, sc->sc_duty_wid, sc->sc_pblk_addr))
712 		sc->sc_flags |= FLAGS_NOTHROTTLE;
713 #ifdef ACPI_DEBUG
714 	printf(": %s: ", sc->sc_devnode->name);
715 	printf("\n: hdr:%x pblk:%x,%x duty:%x,%x pstate:%x "
716 	       "(%ld throttling states)\n", sc->sc_acpi->sc_fadt->hdr_revision,
717 		sc->sc_pblk_addr, sc->sc_pblk_len, sc->sc_duty_off,
718 		sc->sc_duty_wid, sc->sc_acpi->sc_fadt->pstate_cnt,
719 		CPU_MAXSTATE(sc));
720 #endif
721 
722 	/* Get C-States from _CST or FADT */
723 	if (acpicpu_getcst(sc) || SLIST_EMPTY(&sc->sc_cstates))
724 		acpicpu_getcst_from_fadt(sc);
725 	else {
726 		/* Notify BIOS we use _CST objects */
727 		if (sc->sc_acpi->sc_fadt->cst_cnt) {
728 			acpi_write_pmreg(sc->sc_acpi, ACPIREG_SMICMD, 0,
729 			    sc->sc_acpi->sc_fadt->cst_cnt);
730 		}
731 	}
732 	if (!SLIST_EMPTY(&sc->sc_cstates)) {
733 		extern u_int32_t acpi_force_bm;
734 
735 		cpu_idle_cycle_fcn = &acpicpu_idle;
736 
737 		/*
738 		 * C3 (and maybe C2?) needs BM_RLD to be set to
739 		 * wake the system
740 		 */
741 		if (SLIST_FIRST(&sc->sc_cstates)->state > 1 && acpi_force_bm == 0) {
742 			uint16_t en = acpi_read_pmreg(sc->sc_acpi,
743 			    ACPIREG_PM1_CNT, 0);
744 			if ((en & ACPI_PM1_BM_RLD) == 0) {
745 				acpi_write_pmreg(sc->sc_acpi, ACPIREG_PM1_CNT,
746 				    0, en | ACPI_PM1_BM_RLD);
747 				acpi_force_bm = ACPI_PM1_BM_RLD;
748 			}
749 		}
750 	}
751 
752 	if (acpicpu_getpss(sc)) {
753 		sc->sc_flags |= FLAGS_NOPSS;
754 	} else {
755 #ifdef ACPI_DEBUG
756 		for (i = 0; i < sc->sc_pss_len; i++) {
757 			dnprintf(20, "%d %d %d %d %d %d\n",
758 			    sc->sc_pss[i].pss_core_freq,
759 			    sc->sc_pss[i].pss_power,
760 			    sc->sc_pss[i].pss_trans_latency,
761 			    sc->sc_pss[i].pss_bus_latency,
762 			    sc->sc_pss[i].pss_ctrl,
763 			    sc->sc_pss[i].pss_status);
764 		}
765 		dnprintf(20, "\n");
766 #endif
767 		if (sc->sc_pss_len == 0) {
768 			/* this should never happen */
769 			printf("%s: invalid _PSS length\n", DEVNAME(sc));
770 			sc->sc_flags |= FLAGS_NOPSS;
771 		}
772 
773 		acpicpu_getppc(sc);
774 		if (acpicpu_getpct(sc))
775 			sc->sc_flags |= FLAGS_NOPCT;
776 		else if (sc->sc_pss_len > 0) {
777 			/* Notify BIOS we are handling p-states */
778 			if (sc->sc_acpi->sc_fadt->pstate_cnt) {
779 				acpi_write_pmreg(sc->sc_acpi, ACPIREG_SMICMD,
780 				    0, sc->sc_acpi->sc_fadt->pstate_cnt);
781 			}
782 
783 			aml_register_notify(sc->sc_devnode, NULL,
784 			    acpicpu_notify, sc, ACPIDEV_NOPOLL);
785 
786 			acpi_gasio(sc->sc_acpi, ACPI_IOREAD,
787 			    sc->sc_pct.pct_status.grd_gas.address_space_id,
788 			    sc->sc_pct.pct_status.grd_gas.address,
789 			    sc->sc_pct_stat_as, sc->sc_pct_stat_as, &status);
790 			sc->sc_level = (100 / sc->sc_pss_len) *
791 			    (sc->sc_pss_len - status);
792 			dnprintf(20, "%s: cpu index %d, percentage %d\n",
793 			    DEVNAME(sc), status, sc->sc_level);
794 			if (setperf_prio < 30) {
795 				cpu_setperf = acpicpu_setperf;
796 				acpicpu_set_notify(acpicpu_setperf_ppc_change);
797 				setperf_prio = 30;
798 				acpi_hasprocfvs = 1;
799 			}
800 		}
801 	}
802 
803 	/*
804 	 * Nicely enumerate what power management capabilities
805 	 * ACPI CPU provides.
806 	 */
807 	acpicpu_print_cst(sc);
808 	if (!(sc->sc_flags & (FLAGS_NOPSS | FLAGS_NOPCT)) ||
809 	    !(sc->sc_flags & FLAGS_NOPSS)) {
810 		printf("%c ", SLIST_EMPTY(&sc->sc_cstates) ? ':' : ',');
811 
812 		/*
813 		 * If acpicpu is itself providing the capability to transition
814 		 * states, enumerate them in the fashion that est and powernow
815 		 * would.
816 		 */
817 		if (!(sc->sc_flags & (FLAGS_NOPSS | FLAGS_NOPCT))) {
818 			printf("FVS, ");
819 			for (i = 0; i < sc->sc_pss_len - 1; i++)
820 				printf("%d, ", sc->sc_pss[i].pss_core_freq);
821 			printf("%d MHz", sc->sc_pss[i].pss_core_freq);
822 		} else
823 			printf("PSS");
824 	}
825 
826 	printf("\n");
827 }
828 
829 int
830 acpicpu_getppc(struct acpicpu_softc *sc)
831 {
832 	struct aml_value	res;
833 
834 	sc->sc_ppc = 0;
835 
836 	if (aml_evalname(sc->sc_acpi, sc->sc_devnode, "_PPC", 0, NULL, &res)) {
837 		dnprintf(10, "%s: no _PPC\n", DEVNAME(sc));
838 		return (1);
839 	}
840 
841 	sc->sc_ppc = aml_val2int(&res);
842 	dnprintf(10, "%s: _PPC: %d\n", DEVNAME(sc), sc->sc_ppc);
843 	aml_freevalue(&res);
844 
845 	return (0);
846 }
847 
848 int
849 acpicpu_getpct(struct acpicpu_softc *sc)
850 {
851 	struct aml_value	res;
852 	int			rv = 1;
853 
854 	if (aml_evalname(sc->sc_acpi, sc->sc_devnode, "_PCT", 0, NULL, &res)) {
855 		dnprintf(20, "%s: no _PCT\n", DEVNAME(sc));
856 		return (1);
857 	}
858 
859 	if (res.length != 2) {
860 		dnprintf(20, "%s: %s: invalid _PCT length\n", DEVNAME(sc),
861 		    sc->sc_devnode->name);
862 		return (1);
863 	}
864 
865 	memcpy(&sc->sc_pct.pct_ctrl, res.v_package[0]->v_buffer,
866 	    sizeof sc->sc_pct.pct_ctrl);
867 	if (sc->sc_pct.pct_ctrl.grd_gas.address_space_id ==
868 	    GAS_FUNCTIONAL_FIXED) {
869 		dnprintf(20, "CTRL GASIO is functional fixed hardware.\n");
870 		goto ffh;
871 	}
872 
873 	memcpy(&sc->sc_pct.pct_status, res.v_package[1]->v_buffer,
874 	    sizeof sc->sc_pct.pct_status);
875 	if (sc->sc_pct.pct_status.grd_gas.address_space_id ==
876 	    GAS_FUNCTIONAL_FIXED) {
877 		dnprintf(20, "CTRL GASIO is functional fixed hardware.\n");
878 		goto ffh;
879 	}
880 
881 	dnprintf(10, "_PCT(ctrl)  : %02x %04x %02x %02x %02x %02x %016llx\n",
882 	    sc->sc_pct.pct_ctrl.grd_descriptor,
883 	    sc->sc_pct.pct_ctrl.grd_length,
884 	    sc->sc_pct.pct_ctrl.grd_gas.address_space_id,
885 	    sc->sc_pct.pct_ctrl.grd_gas.register_bit_width,
886 	    sc->sc_pct.pct_ctrl.grd_gas.register_bit_offset,
887 	    sc->sc_pct.pct_ctrl.grd_gas.access_size,
888 	    sc->sc_pct.pct_ctrl.grd_gas.address);
889 
890 	dnprintf(10, "_PCT(status): %02x %04x %02x %02x %02x %02x %016llx\n",
891 	    sc->sc_pct.pct_status.grd_descriptor,
892 	    sc->sc_pct.pct_status.grd_length,
893 	    sc->sc_pct.pct_status.grd_gas.address_space_id,
894 	    sc->sc_pct.pct_status.grd_gas.register_bit_width,
895 	    sc->sc_pct.pct_status.grd_gas.register_bit_offset,
896 	    sc->sc_pct.pct_status.grd_gas.access_size,
897 	    sc->sc_pct.pct_status.grd_gas.address);
898 
899 	/* if not set assume single 32 bit access */
900 	sc->sc_pct_stat_as = sc->sc_pct.pct_status.grd_gas.register_bit_width
901 	    / 8;
902 	if (sc->sc_pct_stat_as == 0)
903 		sc->sc_pct_stat_as = 4;
904 	sc->sc_pct_ctrl_as = sc->sc_pct.pct_ctrl.grd_gas.register_bit_width / 8;
905 	if (sc->sc_pct_ctrl_as == 0)
906 		sc->sc_pct_ctrl_as = 4;
907 	sc->sc_pct_stat_len = sc->sc_pct.pct_status.grd_gas.access_size;
908 	if (sc->sc_pct_stat_len == 0)
909 		sc->sc_pct_stat_len = sc->sc_pct_stat_as;
910 	sc->sc_pct_ctrl_len = sc->sc_pct.pct_ctrl.grd_gas.access_size;
911 	if (sc->sc_pct_ctrl_len == 0)
912 		sc->sc_pct_ctrl_len = sc->sc_pct_ctrl_as;
913 
914 	rv = 0;
915 ffh:
916 	aml_freevalue(&res);
917 	return (rv);
918 }
919 
920 int
921 acpicpu_getpss(struct acpicpu_softc *sc)
922 {
923 	struct aml_value	res;
924 	int			i, c, cf;
925 
926 	if (aml_evalname(sc->sc_acpi, sc->sc_devnode, "_PSS", 0, NULL, &res)) {
927 		dprintf("%s: no _PSS\n", DEVNAME(sc));
928 		return (1);
929 	}
930 
931 	free(sc->sc_pss, M_DEVBUF, 0);
932 
933 	sc->sc_pss = mallocarray(res.length, sizeof(*sc->sc_pss), M_DEVBUF,
934 	    M_WAITOK | M_ZERO);
935 
936 	c = 0;
937 	for (i = 0; i < res.length; i++) {
938 		cf = aml_val2int(res.v_package[i]->v_package[0]);
939 
940 		/* This heuristic comes from FreeBSDs
941 		 * dev/acpica/acpi_perf.c to weed out invalid PSS entries.
942 		 */
943 		if (cf == sc->sc_pss[c].pss_core_freq) {
944 			printf("%s: struck PSS entry, core frequency equals "
945 			    " last\n", sc->sc_dev.dv_xname);
946 			continue;
947 		}
948 
949 		if (cf == 0xFFFF || cf == 0x9999 || cf == 99999 || cf == 0) {
950 			printf("%s: struck PSS entry, inappropriate core "
951 			    "frequency value\n", sc->sc_dev.dv_xname);
952 			continue;
953 		}
954 
955 		sc->sc_pss[c].pss_core_freq = cf;
956 		sc->sc_pss[c].pss_power = aml_val2int(
957 		    res.v_package[i]->v_package[1]);
958 		sc->sc_pss[c].pss_trans_latency = aml_val2int(
959 		    res.v_package[i]->v_package[2]);
960 		sc->sc_pss[c].pss_bus_latency = aml_val2int(
961 		    res.v_package[i]->v_package[3]);
962 		sc->sc_pss[c].pss_ctrl = aml_val2int(
963 		    res.v_package[i]->v_package[4]);
964 		sc->sc_pss[c].pss_status = aml_val2int(
965 		    res.v_package[i]->v_package[5]);
966 		c++;
967 	}
968 	sc->sc_pss_len = c;
969 
970 	aml_freevalue(&res);
971 
972 	return (0);
973 }
974 
975 int
976 acpicpu_fetch_pss(struct acpicpu_pss **pss)
977 {
978 	struct acpicpu_softc	*sc;
979 
980 	/*
981 	 * XXX: According to the ACPI spec in an SMP system all processors
982 	 * are supposed to support the same states. For now we pray
983 	 * the bios ensures this...
984 	 */
985 
986 	sc = acpicpu_sc[0];
987 	if (!sc)
988 		return 0;
989 	*pss = sc->sc_pss;
990 
991 	return (sc->sc_pss_len);
992 }
993 
994 int
995 acpicpu_notify(struct aml_node *node, int notify_type, void *arg)
996 {
997 	struct acpicpu_softc	*sc = arg;
998 
999 	dnprintf(10, "acpicpu_notify: %.2x %s\n", notify_type,
1000 	    sc->sc_devnode->name);
1001 
1002 	switch (notify_type) {
1003 	case 0x80:	/* _PPC changed, retrieve new values */
1004 		acpicpu_getppc(sc);
1005 		acpicpu_getpss(sc);
1006 		if (sc->sc_notify)
1007 			sc->sc_notify(sc->sc_pss, sc->sc_pss_len);
1008 		break;
1009 
1010 	case 0x81:	/* _CST changed, retrieve new values */
1011 		acpicpu_getcst(sc);
1012 		printf("%s: notify", DEVNAME(sc));
1013 		acpicpu_print_cst(sc);
1014 		printf("\n");
1015 		break;
1016 
1017 	default:
1018 		printf("%s: unhandled cpu event %x\n", DEVNAME(sc),
1019 		    notify_type);
1020 		break;
1021 	}
1022 
1023 	return (0);
1024 }
1025 
1026 void
1027 acpicpu_set_notify(void (*func)(struct acpicpu_pss *, int))
1028 {
1029 	struct acpicpu_softc    *sc;
1030 
1031 	sc = acpicpu_sc[0];
1032 	if (sc != NULL)
1033 		sc->sc_notify = func;
1034 }
1035 
1036 void
1037 acpicpu_setperf_ppc_change(struct acpicpu_pss *pss, int npss)
1038 {
1039 	struct acpicpu_softc    *sc;
1040 
1041 	sc = acpicpu_sc[0];
1042 
1043 	if (sc != NULL)
1044 		cpu_setperf(sc->sc_level);
1045 }
1046 
1047 void
1048 acpicpu_setperf(int level)
1049 {
1050 	struct acpicpu_softc	*sc;
1051 	struct acpicpu_pss	*pss = NULL;
1052 	int			idx, len;
1053 	u_int32_t		status = 0;
1054 
1055 	sc = acpicpu_sc[cpu_number()];
1056 
1057 	dnprintf(10, "%s: acpicpu setperf level %d\n",
1058 	    sc->sc_devnode->name, level);
1059 
1060 	if (level < 0 || level > 100) {
1061 		dnprintf(10, "%s: acpicpu setperf illegal percentage\n",
1062 		    sc->sc_devnode->name);
1063 		return;
1064 	}
1065 
1066 	/*
1067 	 * XXX this should be handled more gracefully and it needs to also do
1068 	 * the duty cycle method instead of pss exclusively
1069 	 */
1070 	if (sc->sc_flags & FLAGS_NOPSS || sc->sc_flags & FLAGS_NOPCT) {
1071 		dnprintf(10, "%s: acpicpu no _PSS or _PCT\n",
1072 		    sc->sc_devnode->name);
1073 		return;
1074 	}
1075 
1076 	if (sc->sc_ppc)
1077 		len = sc->sc_ppc;
1078 	else
1079 		len = sc->sc_pss_len;
1080 	idx = (len - 1) - (level / (100 / len));
1081 	if (idx < 0)
1082 		idx = 0;
1083 
1084 	if (sc->sc_ppc)
1085 		idx += sc->sc_pss_len - sc->sc_ppc;
1086 
1087 	if (idx > sc->sc_pss_len)
1088 		idx = sc->sc_pss_len - 1;
1089 
1090 	dnprintf(10, "%s: acpicpu setperf index %d pss_len %d ppc %d\n",
1091 	    sc->sc_devnode->name, idx, sc->sc_pss_len, sc->sc_ppc);
1092 
1093 	pss = &sc->sc_pss[idx];
1094 
1095 #ifdef ACPI_DEBUG
1096 	/* keep this for now since we will need this for debug in the field */
1097 	printf("0 status: %x %llx %u %u ctrl: %x %llx %u %u\n",
1098 	    sc->sc_pct.pct_status.grd_gas.address_space_id,
1099 	    sc->sc_pct.pct_status.grd_gas.address,
1100 	    sc->sc_pct_stat_as, sc->sc_pct_stat_len,
1101 	    sc->sc_pct.pct_ctrl.grd_gas.address_space_id,
1102 	    sc->sc_pct.pct_ctrl.grd_gas.address,
1103 	    sc->sc_pct_ctrl_as, sc->sc_pct_ctrl_len);
1104 #endif
1105 	acpi_gasio(sc->sc_acpi, ACPI_IOREAD,
1106 	    sc->sc_pct.pct_status.grd_gas.address_space_id,
1107 	    sc->sc_pct.pct_status.grd_gas.address, sc->sc_pct_stat_as,
1108 	    sc->sc_pct_stat_len, &status);
1109 	dnprintf(20, "1 status: %u <- %u\n", status, pss->pss_status);
1110 
1111 	/* Are we already at the requested frequency? */
1112 	if (status == pss->pss_status)
1113 		return;
1114 
1115 	acpi_gasio(sc->sc_acpi, ACPI_IOWRITE,
1116 	    sc->sc_pct.pct_ctrl.grd_gas.address_space_id,
1117 	    sc->sc_pct.pct_ctrl.grd_gas.address, sc->sc_pct_ctrl_as,
1118 	    sc->sc_pct_ctrl_len, &pss->pss_ctrl);
1119 	dnprintf(20, "pss_ctrl: %x\n", pss->pss_ctrl);
1120 
1121 	acpi_gasio(sc->sc_acpi, ACPI_IOREAD,
1122 	    sc->sc_pct.pct_status.grd_gas.address_space_id,
1123 	    sc->sc_pct.pct_status.grd_gas.address, sc->sc_pct_stat_as,
1124 	    sc->sc_pct_stat_as, &status);
1125 	dnprintf(20, "2 status: %d\n", status);
1126 
1127 	/* Did the transition succeed? */
1128 	 if (status == pss->pss_status) {
1129 		cpuspeed = pss->pss_core_freq;
1130 		sc->sc_level = level;
1131 	} else
1132 		printf("%s: acpicpu setperf failed to alter frequency\n",
1133 		    sc->sc_devnode->name);
1134 }
1135 
1136 void
1137 acpicpu_idle(void)
1138 {
1139 	struct cpu_info *ci = curcpu();
1140 	struct acpicpu_softc *sc = (struct acpicpu_softc *)ci->ci_acpicpudev;
1141 	struct acpi_cstate *best, *cx;
1142 	unsigned long itime;
1143 
1144 	if (sc == NULL) {
1145 		__asm volatile("sti");
1146 		panic("null acpicpu");
1147 	}
1148 
1149 	/* possibly update the MWAIT_ONLY flag in cpu_info */
1150 	if (sc->sc_flags & FLAGS_MWAIT_ONLY) {
1151 		if ((ci->ci_mwait & MWAIT_ONLY) == 0)
1152 			atomic_setbits_int(&ci->ci_mwait, MWAIT_ONLY);
1153 	} else if (ci->ci_mwait & MWAIT_ONLY)
1154 		atomic_clearbits_int(&ci->ci_mwait, MWAIT_ONLY);
1155 
1156 	/*
1157 	 * Find the first state with a latency we'll accept, ignoring
1158 	 * states marked skippable
1159 	 */
1160 	best = cx = SLIST_FIRST(&sc->sc_cstates);
1161 	while ((cx->flags & CST_FLAG_SKIP) ||
1162 	    cx->latency * 3 > sc->sc_prev_sleep) {
1163 		if ((cx = SLIST_NEXT(cx, link)) == NULL)
1164 			break;
1165 		best = cx;
1166 	}
1167 
1168 	if (best->state >= 3 &&
1169 	    (best->flags & CST_FLAG_MWAIT_BM_AVOIDANCE) &&
1170 	    acpi_read_pmreg(acpi_softc, ACPIREG_PM1_STS, 0) & ACPI_PM1_BM_STS) {
1171 		/* clear it and back off */
1172 		acpi_write_pmreg(acpi_softc, ACPIREG_PM1_STS, 0,
1173 		    ACPI_PM1_BM_STS);
1174 		while ((cx = SLIST_NEXT(cx, link)) != NULL) {
1175 			if (cx->flags & CST_FLAG_SKIP)
1176 				continue;
1177 			if (cx->state < 3 ||
1178 			    (cx->flags & CST_FLAG_MWAIT_BM_AVOIDANCE) == 0)
1179 				break;
1180 		}
1181 		best = cx;
1182 	}
1183 
1184 
1185 	atomic_inc_long(&cst_stats[best->state]);
1186 
1187 	itime = tick / 2;
1188 	switch (best->method) {
1189 	default:
1190 	case CST_METH_HALT:
1191 		__asm volatile("sti; hlt");
1192 		break;
1193 
1194 	case CST_METH_IO_HALT:
1195 		inb((u_short)best->address);
1196 		__asm volatile("sti; hlt");
1197 		break;
1198 
1199 	case CST_METH_MWAIT:
1200 		{
1201 		struct timeval start, stop;
1202 		unsigned int hints;
1203 
1204 #ifdef __LP64__
1205 		if ((read_rflags() & PSL_I) == 0)
1206 			panic("idle with interrupts blocked!");
1207 #else
1208 		if ((read_eflags() & PSL_I) == 0)
1209 			panic("idle with interrupts blocked!");
1210 #endif
1211 
1212 		/* something already queued? */
1213 		if (!cpu_is_idle(ci))
1214 			return;
1215 
1216 		/*
1217 		 * About to idle; setting the MWAIT_IN_IDLE bit tells
1218 		 * cpu_unidle() that it can't be a no-op and tells cpu_kick()
1219 		 * that it doesn't need to use an IPI.  We also set the
1220 		 * MWAIT_KEEP_IDLING bit: those routines clear it to stop
1221 		 * the mwait.  Once they're set, we do a final check of the
1222 		 * queue, in case another cpu called setrunqueue() and added
1223 		 * something to the queue and called cpu_unidle() between
1224 		 * the check in sched_idle() and here.
1225 		 */
1226 		hints = (unsigned)best->address;
1227 		microuptime(&start);
1228 		atomic_setbits_int(&ci->ci_mwait, MWAIT_IDLING);
1229 		if (cpu_is_idle(ci)) {
1230 			/* intel errata AAI65: cflush before monitor */
1231 			if (ci->ci_cflushsz != 0) {
1232 				membar_sync();
1233 				clflush((unsigned long)&ci->ci_mwait);
1234 				membar_sync();
1235 			}
1236 
1237 			monitor(&ci->ci_mwait, 0, 0);
1238 			if ((ci->ci_mwait & MWAIT_IDLING) == MWAIT_IDLING)
1239 				mwait(0, hints);
1240 		}
1241 
1242 		microuptime(&stop);
1243 		timersub(&stop, &start, &stop);
1244 		itime = stop.tv_sec * 1000000 + stop.tv_usec;
1245 
1246 		/* done idling; let cpu_kick() know that an IPI is required */
1247 		atomic_clearbits_int(&ci->ci_mwait, MWAIT_IDLING);
1248 		break;
1249 		}
1250 
1251 	case CST_METH_GAS_IO:
1252 		inb((u_short)best->address);
1253 		/* something harmless to give system time to change state */
1254 		acpi_read_pmreg(acpi_softc, ACPIREG_PM1_STS, 0);
1255 		break;
1256 
1257 	}
1258 
1259 	sc->sc_last_itime = itime;
1260 	itime >>= 1;
1261 	sc->sc_prev_sleep = (sc->sc_prev_sleep + (sc->sc_prev_sleep >> 1)
1262 	    + itime) >> 1;
1263 }
1264