xref: /openbsd-src/sys/dev/acpi/acpicpu.c (revision 99fd087599a8791921855f21bd7e36130f39aadc)
1 /* $OpenBSD: acpicpu.c,v 1.84 2019/10/16 01:27:59 mlarkin Exp $ */
2 /*
3  * Copyright (c) 2005 Marco Peereboom <marco@openbsd.org>
4  * Copyright (c) 2015 Philip Guenther <guenther@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 #include <sys/kernel.h>		/* for tick */
21 #include <sys/signalvar.h>
22 #include <sys/sysctl.h>
23 #include <sys/systm.h>
24 #include <sys/device.h>
25 #include <sys/malloc.h>
26 #include <sys/queue.h>
27 #include <sys/atomic.h>
28 
29 #include <machine/bus.h>
30 #include <machine/cpu.h>
31 #include <machine/cpufunc.h>
32 #include <machine/specialreg.h>
33 
34 #include <dev/acpi/acpireg.h>
35 #include <dev/acpi/acpivar.h>
36 #include <dev/acpi/acpidev.h>
37 #include <dev/acpi/amltypes.h>
38 #include <dev/acpi/dsdt.h>
39 
40 #include <sys/sensors.h>
41 
42 int	acpicpu_match(struct device *, void *, void *);
43 void	acpicpu_attach(struct device *, struct device *, void *);
44 int	acpicpu_notify(struct aml_node *, int, void *);
45 void	acpicpu_setperf(int);
46 void	acpicpu_setperf_ppc_change(struct acpicpu_pss *, int);
47 
48 #define ACPI_STATE_C0		0x00
49 #define ACPI_STATE_C1		0x01
50 #define ACPI_STATE_C2		0x02
51 #define ACPI_STATE_C3		0x03
52 
53 #define ACPI_PDC_REVID		0x1
54 #define ACPI_PDC_SMP		0xa
55 #define ACPI_PDC_MSR		0x1
56 
57 /* _PDC/_OSC Intel capabilities flags */
58 #define ACPI_PDC_P_FFH		0x0001
59 #define ACPI_PDC_C_C1_HALT	0x0002
60 #define ACPI_PDC_T_FFH		0x0004
61 #define ACPI_PDC_SMP_C1PT	0x0008
62 #define ACPI_PDC_SMP_C2C3	0x0010
63 #define ACPI_PDC_SMP_P_SWCOORD	0x0020
64 #define ACPI_PDC_SMP_C_SWCOORD	0x0040
65 #define ACPI_PDC_SMP_T_SWCOORD	0x0080
66 #define ACPI_PDC_C_C1_FFH	0x0100
67 #define ACPI_PDC_C_C2C3_FFH	0x0200
68 /* reserved			0x0400 */
69 #define ACPI_PDC_P_HWCOORD	0x0800
70 #define ACPI_PDC_PPC_NOTIFY	0x1000
71 
72 #define CST_METH_HALT		0
73 #define CST_METH_IO_HALT	1
74 #define CST_METH_MWAIT		2
75 #define CST_METH_GAS_IO		3
76 
77 /* flags on Intel's FFH mwait method */
78 #define CST_FLAG_MWAIT_HW_COORD		0x1
79 #define CST_FLAG_MWAIT_BM_AVOIDANCE	0x2
80 #define CST_FLAG_FALLBACK		0x4000	/* fallback for broken _CST */
81 #define CST_FLAG_SKIP			0x8000	/* state is worse choice */
82 
83 #define FLAGS_MWAIT_ONLY	0x02
84 #define FLAGS_BMCHECK		0x04
85 #define FLAGS_NOTHROTTLE	0x08
86 #define FLAGS_NOPSS		0x10
87 #define FLAGS_NOPCT		0x20
88 
89 #define CPU_THT_EN		(1L << 4)
90 #define CPU_MAXSTATE(sc)	(1L << (sc)->sc_duty_wid)
91 #define CPU_STATE(sc,pct)	((pct * CPU_MAXSTATE(sc) / 100) << (sc)->sc_duty_off)
92 #define CPU_STATEMASK(sc)	((CPU_MAXSTATE(sc) - 1) << (sc)->sc_duty_off)
93 
94 #define ACPI_MAX_C2_LATENCY	100
95 #define ACPI_MAX_C3_LATENCY	1000
96 
97 #define CSD_COORD_SW_ALL	0xFC
98 #define CSD_COORD_SW_ANY	0xFD
99 #define CSD_COORD_HW_ALL	0xFE
100 
101 /* Make sure throttling bits are valid,a=addr,o=offset,w=width */
102 #define valid_throttle(o,w,a)	(a && w && (o+w)<=31 && (o>4 || (o+w)<=4))
103 
104 struct acpi_cstate
105 {
106 	SLIST_ENTRY(acpi_cstate) link;
107 
108 	u_short		state;
109 	short		method;		/* CST_METH_* */
110 	u_short		flags;		/* CST_FLAG_* */
111 	u_short		latency;
112 	int		power;
113 	uint64_t	address;	/* or mwait hint */
114 };
115 
116 unsigned long cst_stats[4] = { 0 };
117 
118 struct acpicpu_softc {
119 	struct device		sc_dev;
120 	int			sc_cpu;
121 
122 	int			sc_duty_wid;
123 	int			sc_duty_off;
124 	uint32_t		sc_pblk_addr;
125 	int			sc_pblk_len;
126 	int			sc_flags;
127 	unsigned long		sc_prev_sleep;
128 	unsigned long		sc_last_itime;
129 
130 	struct cpu_info		*sc_ci;
131 	SLIST_HEAD(,acpi_cstate) sc_cstates;
132 
133 	bus_space_tag_t		sc_iot;
134 	bus_space_handle_t	sc_ioh;
135 
136 	struct acpi_softc	*sc_acpi;
137 	struct aml_node		*sc_devnode;
138 
139 	int			sc_pss_len;	/* XXX */
140 	int			sc_ppc;
141 	int			sc_level;
142 	struct acpicpu_pss	*sc_pss;
143 	size_t			sc_pssfulllen;
144 
145 	struct acpicpu_pct	sc_pct;
146 	/* save compensation for pct access for lying bios' */
147 	uint32_t		sc_pct_stat_as;
148 	uint32_t		sc_pct_ctrl_as;
149 	uint32_t		sc_pct_stat_len;
150 	uint32_t		sc_pct_ctrl_len;
151 	/*
152 	 * XXX: _PPC Change listener
153 	 * PPC changes can occur when for example a machine is disconnected
154 	 * from AC power and can no loger support the highest frequency or
155 	 * voltage when driven from the battery.
156 	 * Should probably be reimplemented as a list for now we assume only
157 	 * one listener
158 	 */
159 	void			(*sc_notify)(struct acpicpu_pss *, int);
160 };
161 
162 void	acpicpu_add_cstatepkg(struct aml_value *, void *);
163 void	acpicpu_add_cdeppkg(struct aml_value *, void *);
164 int	acpicpu_getppc(struct acpicpu_softc *);
165 int	acpicpu_getpct(struct acpicpu_softc *);
166 int	acpicpu_getpss(struct acpicpu_softc *);
167 int	acpicpu_getcst(struct acpicpu_softc *);
168 void	acpicpu_getcst_from_fadt(struct acpicpu_softc *);
169 void	acpicpu_print_one_cst(struct acpi_cstate *_cx);
170 void	acpicpu_print_cst(struct acpicpu_softc *_sc);
171 void	acpicpu_add_cstate(struct acpicpu_softc *_sc, int _state, int _method,
172 	    int _flags, int _latency, int _power, uint64_t _address);
173 void	acpicpu_set_pdc(struct acpicpu_softc *);
174 void	acpicpu_idle(void);
175 
176 #if 0
177 void    acpicpu_set_throttle(struct acpicpu_softc *, int);
178 struct acpi_cstate *acpicpu_find_cstate(struct acpicpu_softc *, int);
179 #endif
180 
181 struct cfattach acpicpu_ca = {
182 	sizeof(struct acpicpu_softc), acpicpu_match, acpicpu_attach
183 };
184 
185 struct cfdriver acpicpu_cd = {
186 	NULL, "acpicpu", DV_DULL
187 };
188 
189 extern int setperf_prio;
190 
191 struct acpicpu_softc *acpicpu_sc[MAXCPUS];
192 
193 #if 0
194 void
195 acpicpu_set_throttle(struct acpicpu_softc *sc, int level)
196 {
197 	uint32_t pbval;
198 
199 	if (sc->sc_flags & FLAGS_NOTHROTTLE)
200 		return;
201 
202 	/* Disable throttling control */
203 	pbval = inl(sc->sc_pblk_addr);
204 	outl(sc->sc_pblk_addr, pbval & ~CPU_THT_EN);
205 	if (level < 100) {
206 		pbval &= ~CPU_STATEMASK(sc);
207 		pbval |= CPU_STATE(sc, level);
208 		outl(sc->sc_pblk_addr, pbval & ~CPU_THT_EN);
209 		outl(sc->sc_pblk_addr, pbval | CPU_THT_EN);
210 	}
211 }
212 
213 struct acpi_cstate *
214 acpicpu_find_cstate(struct acpicpu_softc *sc, int state)
215 {
216 	struct acpi_cstate	*cx;
217 
218 	SLIST_FOREACH(cx, &sc->sc_cstates, link)
219 		if (cx->state == state)
220 			return cx;
221 	return (NULL);
222 }
223 #endif
224 
225 
226 void
227 acpicpu_set_pdc(struct acpicpu_softc *sc)
228 {
229 	struct aml_value cmd, osc_cmd[4];
230 	struct aml_value res;
231 	uint32_t cap;
232 	uint32_t buf[3];
233 
234 	/* 4077A616-290C-47BE-9EBD-D87058713953 */
235 	static uint8_t cpu_oscuuid[16] = { 0x16, 0xA6, 0x77, 0x40, 0x0C, 0x29,
236 					   0xBE, 0x47, 0x9E, 0xBD, 0xD8, 0x70,
237 					   0x58, 0x71, 0x39, 0x53 };
238 	cap = ACPI_PDC_C_C1_HALT | ACPI_PDC_P_FFH | ACPI_PDC_C_C1_FFH
239 	    | ACPI_PDC_C_C2C3_FFH | ACPI_PDC_SMP_P_SWCOORD | ACPI_PDC_SMP_C2C3
240 	    | ACPI_PDC_SMP_C1PT;
241 
242 	if (aml_searchname(sc->sc_devnode, "_OSC")) {
243 		/* Query _OSC */
244 		memset(&osc_cmd, 0, sizeof(osc_cmd));
245 		osc_cmd[0].type = AML_OBJTYPE_BUFFER;
246 		osc_cmd[0].v_buffer = (uint8_t *)&cpu_oscuuid;
247 		osc_cmd[0].length = sizeof(cpu_oscuuid);
248 
249 		osc_cmd[1].type = AML_OBJTYPE_INTEGER;
250 		osc_cmd[1].v_integer = 1;
251 		osc_cmd[1].length = 1;
252 
253 		osc_cmd[2].type = AML_OBJTYPE_INTEGER;
254 		osc_cmd[2].v_integer = 2;
255 		osc_cmd[2].length = 1;
256 
257 		buf[0] = 1;
258 		buf[1] = cap;
259 		osc_cmd[3].type = AML_OBJTYPE_BUFFER;
260 		osc_cmd[3].v_buffer = (int8_t *)&buf;
261 		osc_cmd[3].length = sizeof(buf);
262 
263 		aml_evalname(sc->sc_acpi, sc->sc_devnode, "_OSC",
264 		    4, osc_cmd, &res);
265 
266 		if (res.type != AML_OBJTYPE_BUFFER || res.length < 8) {
267 			printf(": unable to query capabilities\n");
268 			aml_freevalue(&res);
269 			return;
270 		}
271 
272 		/* Evaluate _OSC */
273 		memset(&osc_cmd, 0, sizeof(osc_cmd));
274 		osc_cmd[0].type = AML_OBJTYPE_BUFFER;
275 		osc_cmd[0].v_buffer = (uint8_t *)&cpu_oscuuid;
276 		osc_cmd[0].length = sizeof(cpu_oscuuid);
277 
278 		osc_cmd[1].type = AML_OBJTYPE_INTEGER;
279 		osc_cmd[1].v_integer = 1;
280 		osc_cmd[1].length = 1;
281 
282 		osc_cmd[2].type = AML_OBJTYPE_INTEGER;
283 		osc_cmd[2].v_integer = 2;
284 		osc_cmd[2].length = 1;
285 
286 		buf[0] = 0;
287 		buf[1] = (*(uint32_t *)&res.v_buffer[4]) & cap;
288 		osc_cmd[3].type = AML_OBJTYPE_BUFFER;
289 		osc_cmd[3].v_buffer = (int8_t *)&buf;
290 		osc_cmd[3].length = sizeof(buf);
291 
292 		aml_freevalue(&res);
293 
294 		aml_evalname(sc->sc_acpi, sc->sc_devnode, "_OSC",
295 		    4, osc_cmd, NULL);
296 	} else {
297 		/* Evaluate _PDC */
298 		memset(&cmd, 0, sizeof(cmd));
299 		cmd.type = AML_OBJTYPE_BUFFER;
300 		cmd.v_buffer = (uint8_t *)&buf;
301 		cmd.length = sizeof(buf);
302 
303 		buf[0] = ACPI_PDC_REVID;
304 		buf[1] = 1;
305 		buf[2] = cap;
306 
307 		aml_evalname(sc->sc_acpi, sc->sc_devnode, "_PDC",
308 		    1, &cmd, NULL);
309 	}
310 }
311 
312 /*
313  * sanity check mwait hints against what cpuid told us
314  * ...but because intel screwed up, just check whether cpuid says
315  * the given state has _any_ substates.
316  */
317 static int
318 check_mwait_hints(int state, int hints)
319 {
320 	int cstate;
321 	int num_substates;
322 
323 	if (cpu_mwait_size == 0)
324 		return (0);
325 	cstate = ((hints >> 4) & 0xf) + 1;
326 	if (cstate == 16)
327 		cstate = 0;
328 	else if (cstate > 7) {
329 		/* out of range of test against CPUID; just trust'em */
330 		return (1);
331 	}
332 	num_substates = (cpu_mwait_states >> (4 * cstate)) & 0xf;
333 	if (num_substates == 0) {
334 		printf(": C%d bad (state %d has no substates)", state, cstate);
335 		return (0);
336 	}
337 	return (1);
338 }
339 
340 void
341 acpicpu_add_cstate(struct acpicpu_softc *sc, int state, int method,
342     int flags, int latency, int power, uint64_t address)
343 {
344 	struct acpi_cstate	*cx;
345 
346 	dnprintf(10," C%d: latency:.%4x power:%.4x addr:%.16llx\n",
347 	    state, latency, power, address);
348 
349 	/* add a new state, or overwrite the fallback C1 state? */
350 	if (state != ACPI_STATE_C1 ||
351 	    (cx = SLIST_FIRST(&sc->sc_cstates)) == NULL ||
352 	    (cx->flags & CST_FLAG_FALLBACK) == 0) {
353 		cx = malloc(sizeof(*cx), M_DEVBUF, M_WAITOK);
354 		SLIST_INSERT_HEAD(&sc->sc_cstates, cx, link);
355 	}
356 
357 	cx->state = state;
358 	cx->method = method;
359 	cx->flags = flags;
360 	cx->latency = latency;
361 	cx->power = power;
362 	cx->address = address;
363 }
364 
365 /* Found a _CST object, add new cstate for each entry */
366 void
367 acpicpu_add_cstatepkg(struct aml_value *val, void *arg)
368 {
369 	struct acpicpu_softc	*sc = arg;
370 	uint64_t addr;
371 	struct acpi_grd *grd;
372 	int state, method, flags;
373 
374 #if defined(ACPI_DEBUG) && !defined(SMALL_KERNEL)
375 	aml_showvalue(val);
376 #endif
377 	if (val->type != AML_OBJTYPE_PACKAGE || val->length != 4)
378 		return;
379 
380 	/* range and sanity checks */
381 	state = val->v_package[1]->v_integer;
382 	if (state < 0 || state > 4)
383 		return;
384 	if (val->v_package[0]->type != AML_OBJTYPE_BUFFER) {
385 		printf(": C%d (unexpected ACPI object type %d)",
386 		    state, val->v_package[0]->type);
387 		return;
388 	}
389 	grd = (struct acpi_grd *)val->v_package[0]->v_buffer;
390 	if (val->v_package[0]->length != sizeof(*grd) + 2 ||
391 	    grd->grd_descriptor != LR_GENREGISTER ||
392 	    grd->grd_length != sizeof(grd->grd_gas) ||
393 	    val->v_package[0]->v_buffer[sizeof(*grd)] != SRT_ENDTAG) {
394 		printf(": C%d (bogo buffer)", state);
395 		return;
396 	}
397 
398 	flags = 0;
399 	switch (grd->grd_gas.address_space_id) {
400 	case GAS_FUNCTIONAL_FIXED:
401 		if (grd->grd_gas.register_bit_width == 0) {
402 			method = CST_METH_HALT;
403 			addr = 0;
404 		} else {
405 			/*
406 			 * In theory we should only do this for
407 			 * vendor 1 == Intel but other values crop up,
408 			 * presumably due to the normal ACPI spec confusion.
409 			 */
410 			switch (grd->grd_gas.register_bit_offset) {
411 			case 0x1:
412 				method = CST_METH_IO_HALT;
413 				addr = grd->grd_gas.address;
414 
415 				/* i386 and amd64 I/O space is 16bits */
416 				if (addr > 0xffff) {
417 					printf(": C%d (bogo I/O addr %llx)",
418 					    state, addr);
419 					return;
420 				}
421 				break;
422 			case 0x2:
423 				addr = grd->grd_gas.address;
424 				if (!check_mwait_hints(state, addr))
425 					return;
426 				method = CST_METH_MWAIT;
427 				flags = grd->grd_gas.access_size;
428 				break;
429 			default:
430 				printf(": C%d (unknown FFH class %d)",
431 				    state, grd->grd_gas.register_bit_offset);
432 				return;
433 			}
434 		}
435 		break;
436 
437 	case GAS_SYSTEM_IOSPACE:
438 		addr = grd->grd_gas.address;
439 		if (grd->grd_gas.register_bit_width != 8 ||
440 		    grd->grd_gas.register_bit_offset != 0) {
441 			printf(": C%d (unhandled %s spec: %d/%d)", state,
442 			    "I/O", grd->grd_gas.register_bit_width,
443 			    grd->grd_gas.register_bit_offset);
444 			return;
445 		}
446 		method = CST_METH_GAS_IO;
447 		break;
448 
449 	default:
450 		/* dump the GAS for analysis */
451 		{
452 			int i;
453 			printf(": C%d (unhandled GAS:", state);
454 			for (i = 0; i < sizeof(grd->grd_gas); i++)
455 				printf(" %#x", ((u_char *)&grd->grd_gas)[i]);
456 			printf(")");
457 
458 		}
459 		return;
460 	}
461 
462 	acpicpu_add_cstate(sc, state, method, flags,
463 	    val->v_package[2]->v_integer, val->v_package[3]->v_integer, addr);
464 }
465 
466 
467 /* Found a _CSD object, print the dependency  */
468 void
469 acpicpu_add_cdeppkg(struct aml_value *val, void *arg)
470 {
471 	int64_t	num_proc, coord_type, domain, cindex;
472 
473 	/*
474 	 * errors: unexpected object type, bad length, mismatched length,
475 	 * and bad CSD revision
476 	 */
477 	if (val->type != AML_OBJTYPE_PACKAGE || val->length < 6 ||
478 	    val->length != val->v_package[0]->v_integer ||
479 	    val->v_package[1]->v_integer != 0) {
480 #if 1 || defined(ACPI_DEBUG) && !defined(SMALL_KERNEL)
481 		aml_showvalue(val);
482 #endif
483 		printf("bogus CSD\n");
484 		return;
485 	}
486 
487 	/* coordinating 'among' one CPU is trivial, ignore */
488 	num_proc = val->v_package[4]->v_integer;
489 	if (num_proc == 1)
490 		return;
491 
492 	/* we practically assume the hardware will coordinate, so ignore */
493 	coord_type = val->v_package[3]->v_integer;
494 	if (coord_type == CSD_COORD_HW_ALL)
495 		return;
496 
497 	domain = val->v_package[2]->v_integer;
498 	cindex = val->v_package[5]->v_integer;
499 	printf(": CSD (c=%#llx d=%lld n=%lld i=%lli)",
500 	    coord_type, domain, num_proc, cindex);
501 }
502 
503 int
504 acpicpu_getcst(struct acpicpu_softc *sc)
505 {
506 	struct aml_value	res;
507 	struct acpi_cstate	*cx, *next_cx;
508 	int			use_nonmwait;
509 
510 	/* delete the existing list */
511 	while ((cx = SLIST_FIRST(&sc->sc_cstates)) != NULL) {
512 		SLIST_REMOVE_HEAD(&sc->sc_cstates, link);
513 		free(cx, M_DEVBUF, sizeof(*cx));
514 	}
515 
516 	/* provide a fallback C1-via-halt in case _CST's C1 is bogus */
517 	acpicpu_add_cstate(sc, ACPI_STATE_C1, CST_METH_HALT,
518 	    CST_FLAG_FALLBACK, 1, -1, 0);
519 
520 	if (aml_evalname(sc->sc_acpi, sc->sc_devnode, "_CST", 0, NULL, &res))
521 		return (1);
522 
523 	aml_foreachpkg(&res, 1, acpicpu_add_cstatepkg, sc);
524 	aml_freevalue(&res);
525 
526 	/* only have fallback state?  then no _CST objects were understood */
527 	cx = SLIST_FIRST(&sc->sc_cstates);
528 	if (cx->flags & CST_FLAG_FALLBACK)
529 		return (1);
530 
531 	/*
532 	 * Skip states >= C2 if the CPU's LAPIC timer stops in deep
533 	 * states (i.e., it doesn't have the 'ARAT' bit set).
534 	 * Also keep track if all the states we'll use use mwait.
535 	 */
536 	use_nonmwait = 0;
537 	while ((next_cx = SLIST_NEXT(cx, link)) != NULL) {
538 		if (cx->state > 1 &&
539 		    (sc->sc_ci->ci_feature_tpmflags & TPM_ARAT) == 0)
540 			cx->flags |= CST_FLAG_SKIP;
541 		else if (cx->method != CST_METH_MWAIT)
542 			use_nonmwait = 1;
543 		cx = next_cx;
544 	}
545 	if (use_nonmwait)
546 		sc->sc_flags &= ~FLAGS_MWAIT_ONLY;
547 	else
548 		sc->sc_flags |= FLAGS_MWAIT_ONLY;
549 
550 	if (!aml_evalname(sc->sc_acpi, sc->sc_devnode, "_CSD", 0, NULL, &res)) {
551 		aml_foreachpkg(&res, 1, acpicpu_add_cdeppkg, sc);
552 		aml_freevalue(&res);
553 	}
554 
555 	return (0);
556 }
557 
558 /*
559  * old-style fixed C-state info in the FADT.
560  * Note that this has extra restrictions on values and flags.
561  */
562 void
563 acpicpu_getcst_from_fadt(struct acpicpu_softc *sc)
564 {
565 	struct acpi_fadt	*fadt = sc->sc_acpi->sc_fadt;
566 	int flags;
567 
568 	/* FADT has to set flag to do C2 and higher on MP */
569 	if ((fadt->flags & FADT_P_LVL2_UP) == 0 && ncpus > 1)
570 		return;
571 
572 	/* skip these C2 and C3 states if the CPU doesn't have ARAT */
573 	flags = (sc->sc_ci->ci_feature_tpmflags & TPM_ARAT)
574 	    ? 0 : CST_FLAG_SKIP;
575 
576 	/* Some systems don't export a full PBLK; reduce functionality */
577 	if (sc->sc_pblk_len >= 5 && fadt->p_lvl2_lat <= ACPI_MAX_C2_LATENCY) {
578 		acpicpu_add_cstate(sc, ACPI_STATE_C2, CST_METH_GAS_IO, flags,
579 		    fadt->p_lvl2_lat, -1, sc->sc_pblk_addr + 4);
580 	}
581 	if (sc->sc_pblk_len >= 6 && fadt->p_lvl3_lat <= ACPI_MAX_C3_LATENCY)
582 		acpicpu_add_cstate(sc, ACPI_STATE_C3, CST_METH_GAS_IO, flags,
583 		    fadt->p_lvl3_lat, -1, sc->sc_pblk_addr + 5);
584 }
585 
586 
587 void
588 acpicpu_print_one_cst(struct acpi_cstate *cx)
589 {
590 	const char *meth = "";
591 	int show_addr = 0;
592 
593 	switch (cx->method) {
594 	case CST_METH_IO_HALT:
595 		show_addr = 1;
596 		/* fallthrough */
597 	case CST_METH_HALT:
598 		meth = " halt";
599 		break;
600 
601 	case CST_METH_MWAIT:
602 		meth = " mwait";
603 		show_addr = cx->address != 0;
604 		break;
605 
606 	case CST_METH_GAS_IO:
607 		meth = " io";
608 		show_addr = 1;
609 		break;
610 
611 	}
612 
613 	printf(" %sC%d(", (cx->flags & CST_FLAG_SKIP ? "!" : ""), cx->state);
614 	if (cx->power != -1)
615 		printf("%d", cx->power);
616 	printf("@%d%s", cx->latency, meth);
617 	if (cx->flags & ~CST_FLAG_SKIP) {
618 		if (cx->flags & CST_FLAG_FALLBACK)
619 			printf("!");
620 		else
621 			printf(".%x", (cx->flags & ~CST_FLAG_SKIP));
622 	}
623 	if (show_addr)
624 		printf("@0x%llx", cx->address);
625 	printf(")");
626 }
627 
628 void
629 acpicpu_print_cst(struct acpicpu_softc *sc)
630 {
631 	struct acpi_cstate	*cx;
632 	int i;
633 
634 	if (!SLIST_EMPTY(&sc->sc_cstates)) {
635 		printf(":");
636 
637 		i = 0;
638 		SLIST_FOREACH(cx, &sc->sc_cstates, link) {
639 			if (i++)
640 				printf(",");
641 			acpicpu_print_one_cst(cx);
642 		}
643 	}
644 }
645 
646 
647 int
648 acpicpu_match(struct device *parent, void *match, void *aux)
649 {
650 	struct acpi_attach_args	*aa = aux;
651 	struct cfdata		*cf = match;
652 
653 	/* sanity */
654 	if (aa->aaa_name == NULL ||
655 	    strcmp(aa->aaa_name, cf->cf_driver->cd_name) != 0 ||
656 	    aa->aaa_table != NULL)
657 		return (0);
658 
659 	return (1);
660 }
661 
662 void
663 acpicpu_attach(struct device *parent, struct device *self, void *aux)
664 {
665 	struct acpicpu_softc	*sc = (struct acpicpu_softc *)self;
666 	struct acpi_attach_args *aa = aux;
667 	struct aml_value	res;
668 	int			i;
669 	uint32_t		status = 0;
670 	CPU_INFO_ITERATOR	cii;
671 	struct cpu_info		*ci;
672 
673 	sc->sc_acpi = (struct acpi_softc *)parent;
674 	sc->sc_devnode = aa->aaa_node;
675 	acpicpu_sc[sc->sc_dev.dv_unit] = sc;
676 
677 	SLIST_INIT(&sc->sc_cstates);
678 
679 	if (aml_evalnode(sc->sc_acpi, sc->sc_devnode, 0, NULL, &res) == 0) {
680 		if (res.type == AML_OBJTYPE_PROCESSOR) {
681 			sc->sc_cpu = res.v_processor.proc_id;
682 			sc->sc_pblk_addr = res.v_processor.proc_addr;
683 			sc->sc_pblk_len = res.v_processor.proc_len;
684 		}
685 		aml_freevalue(&res);
686 	}
687 	sc->sc_duty_off = sc->sc_acpi->sc_fadt->duty_offset;
688 	sc->sc_duty_wid = sc->sc_acpi->sc_fadt->duty_width;
689 
690 	/* link in the matching cpu_info */
691 	CPU_INFO_FOREACH(cii, ci)
692 		if (ci->ci_acpi_proc_id == sc->sc_cpu) {
693 			ci->ci_acpicpudev = self;
694 			sc->sc_ci = ci;
695 			break;
696 		}
697 	if (ci == NULL) {
698 		printf(": no cpu matching ACPI ID %d\n", sc->sc_cpu);
699 		return;
700 	}
701 
702 	sc->sc_prev_sleep = 1000000;
703 
704 	acpicpu_set_pdc(sc);
705 
706 	if (!valid_throttle(sc->sc_duty_off, sc->sc_duty_wid, sc->sc_pblk_addr))
707 		sc->sc_flags |= FLAGS_NOTHROTTLE;
708 #ifdef ACPI_DEBUG
709 	printf(": %s: ", sc->sc_devnode->name);
710 	printf("\n: hdr:%x pblk:%x,%x duty:%x,%x pstate:%x "
711 	       "(%ld throttling states)\n", sc->sc_acpi->sc_fadt->hdr_revision,
712 		sc->sc_pblk_addr, sc->sc_pblk_len, sc->sc_duty_off,
713 		sc->sc_duty_wid, sc->sc_acpi->sc_fadt->pstate_cnt,
714 		CPU_MAXSTATE(sc));
715 #endif
716 
717 	/* Get C-States from _CST or FADT */
718 	if (acpicpu_getcst(sc) || SLIST_EMPTY(&sc->sc_cstates))
719 		acpicpu_getcst_from_fadt(sc);
720 	else {
721 		/* Notify BIOS we use _CST objects */
722 		if (sc->sc_acpi->sc_fadt->cst_cnt) {
723 			acpi_write_pmreg(sc->sc_acpi, ACPIREG_SMICMD, 0,
724 			    sc->sc_acpi->sc_fadt->cst_cnt);
725 		}
726 	}
727 	if (!SLIST_EMPTY(&sc->sc_cstates)) {
728 		extern uint32_t acpi_force_bm;
729 
730 		cpu_idle_cycle_fcn = &acpicpu_idle;
731 
732 		/*
733 		 * C3 (and maybe C2?) needs BM_RLD to be set to
734 		 * wake the system
735 		 */
736 		if (SLIST_FIRST(&sc->sc_cstates)->state > 1 && acpi_force_bm == 0) {
737 			uint16_t en = acpi_read_pmreg(sc->sc_acpi,
738 			    ACPIREG_PM1_CNT, 0);
739 			if ((en & ACPI_PM1_BM_RLD) == 0) {
740 				acpi_write_pmreg(sc->sc_acpi, ACPIREG_PM1_CNT,
741 				    0, en | ACPI_PM1_BM_RLD);
742 				acpi_force_bm = ACPI_PM1_BM_RLD;
743 			}
744 		}
745 	}
746 
747 	if (acpicpu_getpss(sc)) {
748 		sc->sc_flags |= FLAGS_NOPSS;
749 	} else {
750 #ifdef ACPI_DEBUG
751 		for (i = 0; i < sc->sc_pss_len; i++) {
752 			dnprintf(20, "%d %d %d %d %d %d\n",
753 			    sc->sc_pss[i].pss_core_freq,
754 			    sc->sc_pss[i].pss_power,
755 			    sc->sc_pss[i].pss_trans_latency,
756 			    sc->sc_pss[i].pss_bus_latency,
757 			    sc->sc_pss[i].pss_ctrl,
758 			    sc->sc_pss[i].pss_status);
759 		}
760 		dnprintf(20, "\n");
761 #endif
762 		if (sc->sc_pss_len == 0) {
763 			/* this should never happen */
764 			printf("%s: invalid _PSS length\n", DEVNAME(sc));
765 			sc->sc_flags |= FLAGS_NOPSS;
766 		}
767 
768 		acpicpu_getppc(sc);
769 		if (acpicpu_getpct(sc))
770 			sc->sc_flags |= FLAGS_NOPCT;
771 		else if (sc->sc_pss_len > 0) {
772 			/* Notify BIOS we are handling p-states */
773 			if (sc->sc_acpi->sc_fadt->pstate_cnt) {
774 				acpi_write_pmreg(sc->sc_acpi, ACPIREG_SMICMD,
775 				    0, sc->sc_acpi->sc_fadt->pstate_cnt);
776 			}
777 
778 			aml_register_notify(sc->sc_devnode, NULL,
779 			    acpicpu_notify, sc, ACPIDEV_NOPOLL);
780 
781 			acpi_gasio(sc->sc_acpi, ACPI_IOREAD,
782 			    sc->sc_pct.pct_status.grd_gas.address_space_id,
783 			    sc->sc_pct.pct_status.grd_gas.address,
784 			    sc->sc_pct_stat_as, sc->sc_pct_stat_as, &status);
785 			sc->sc_level = (100 / sc->sc_pss_len) *
786 			    (sc->sc_pss_len - status);
787 			dnprintf(20, "%s: cpu index %d, percentage %d\n",
788 			    DEVNAME(sc), status, sc->sc_level);
789 			if (setperf_prio < 30) {
790 				cpu_setperf = acpicpu_setperf;
791 				acpicpu_set_notify(acpicpu_setperf_ppc_change);
792 				setperf_prio = 30;
793 				acpi_hasprocfvs = 1;
794 			}
795 		}
796 	}
797 
798 	/*
799 	 * Nicely enumerate what power management capabilities
800 	 * ACPI CPU provides.
801 	 */
802 	acpicpu_print_cst(sc);
803 	if (!(sc->sc_flags & (FLAGS_NOPSS | FLAGS_NOPCT)) ||
804 	    !(sc->sc_flags & FLAGS_NOPSS)) {
805 		printf("%c ", SLIST_EMPTY(&sc->sc_cstates) ? ':' : ',');
806 
807 		/*
808 		 * If acpicpu is itself providing the capability to transition
809 		 * states, enumerate them in the fashion that est and powernow
810 		 * would.
811 		 */
812 		if (!(sc->sc_flags & (FLAGS_NOPSS | FLAGS_NOPCT))) {
813 			printf("FVS, ");
814 			for (i = 0; i < sc->sc_pss_len - 1; i++)
815 				printf("%d, ", sc->sc_pss[i].pss_core_freq);
816 			printf("%d MHz", sc->sc_pss[i].pss_core_freq);
817 		} else
818 			printf("PSS");
819 	}
820 
821 	printf("\n");
822 }
823 
824 int
825 acpicpu_getppc(struct acpicpu_softc *sc)
826 {
827 	struct aml_value	res;
828 
829 	sc->sc_ppc = 0;
830 
831 	if (aml_evalname(sc->sc_acpi, sc->sc_devnode, "_PPC", 0, NULL, &res)) {
832 		dnprintf(10, "%s: no _PPC\n", DEVNAME(sc));
833 		return (1);
834 	}
835 
836 	sc->sc_ppc = aml_val2int(&res);
837 	dnprintf(10, "%s: _PPC: %d\n", DEVNAME(sc), sc->sc_ppc);
838 	aml_freevalue(&res);
839 
840 	return (0);
841 }
842 
843 int
844 acpicpu_getpct(struct acpicpu_softc *sc)
845 {
846 	struct aml_value	res;
847 	int			rv = 1;
848 
849 	if (aml_evalname(sc->sc_acpi, sc->sc_devnode, "_PCT", 0, NULL, &res)) {
850 		dnprintf(20, "%s: no _PCT\n", DEVNAME(sc));
851 		return (1);
852 	}
853 
854 	if (res.length != 2) {
855 		dnprintf(20, "%s: %s: invalid _PCT length\n", DEVNAME(sc),
856 		    sc->sc_devnode->name);
857 		return (1);
858 	}
859 
860 	memcpy(&sc->sc_pct.pct_ctrl, res.v_package[0]->v_buffer,
861 	    sizeof sc->sc_pct.pct_ctrl);
862 	if (sc->sc_pct.pct_ctrl.grd_gas.address_space_id ==
863 	    GAS_FUNCTIONAL_FIXED) {
864 		dnprintf(20, "CTRL GASIO is functional fixed hardware.\n");
865 		goto ffh;
866 	}
867 
868 	memcpy(&sc->sc_pct.pct_status, res.v_package[1]->v_buffer,
869 	    sizeof sc->sc_pct.pct_status);
870 	if (sc->sc_pct.pct_status.grd_gas.address_space_id ==
871 	    GAS_FUNCTIONAL_FIXED) {
872 		dnprintf(20, "CTRL GASIO is functional fixed hardware.\n");
873 		goto ffh;
874 	}
875 
876 	dnprintf(10, "_PCT(ctrl)  : %02x %04x %02x %02x %02x %02x %016llx\n",
877 	    sc->sc_pct.pct_ctrl.grd_descriptor,
878 	    sc->sc_pct.pct_ctrl.grd_length,
879 	    sc->sc_pct.pct_ctrl.grd_gas.address_space_id,
880 	    sc->sc_pct.pct_ctrl.grd_gas.register_bit_width,
881 	    sc->sc_pct.pct_ctrl.grd_gas.register_bit_offset,
882 	    sc->sc_pct.pct_ctrl.grd_gas.access_size,
883 	    sc->sc_pct.pct_ctrl.grd_gas.address);
884 
885 	dnprintf(10, "_PCT(status): %02x %04x %02x %02x %02x %02x %016llx\n",
886 	    sc->sc_pct.pct_status.grd_descriptor,
887 	    sc->sc_pct.pct_status.grd_length,
888 	    sc->sc_pct.pct_status.grd_gas.address_space_id,
889 	    sc->sc_pct.pct_status.grd_gas.register_bit_width,
890 	    sc->sc_pct.pct_status.grd_gas.register_bit_offset,
891 	    sc->sc_pct.pct_status.grd_gas.access_size,
892 	    sc->sc_pct.pct_status.grd_gas.address);
893 
894 	/* if not set assume single 32 bit access */
895 	sc->sc_pct_stat_as = sc->sc_pct.pct_status.grd_gas.register_bit_width
896 	    / 8;
897 	if (sc->sc_pct_stat_as == 0)
898 		sc->sc_pct_stat_as = 4;
899 	sc->sc_pct_ctrl_as = sc->sc_pct.pct_ctrl.grd_gas.register_bit_width / 8;
900 	if (sc->sc_pct_ctrl_as == 0)
901 		sc->sc_pct_ctrl_as = 4;
902 	sc->sc_pct_stat_len = sc->sc_pct.pct_status.grd_gas.access_size;
903 	if (sc->sc_pct_stat_len == 0)
904 		sc->sc_pct_stat_len = sc->sc_pct_stat_as;
905 	sc->sc_pct_ctrl_len = sc->sc_pct.pct_ctrl.grd_gas.access_size;
906 	if (sc->sc_pct_ctrl_len == 0)
907 		sc->sc_pct_ctrl_len = sc->sc_pct_ctrl_as;
908 
909 	rv = 0;
910 ffh:
911 	aml_freevalue(&res);
912 	return (rv);
913 }
914 
915 int
916 acpicpu_getpss(struct acpicpu_softc *sc)
917 {
918 	struct aml_value	res;
919 	int			i, c, cf;
920 
921 	if (aml_evalname(sc->sc_acpi, sc->sc_devnode, "_PSS", 0, NULL, &res)) {
922 		dprintf("%s: no _PSS\n", DEVNAME(sc));
923 		return (1);
924 	}
925 
926 	free(sc->sc_pss, M_DEVBUF, sc->sc_pssfulllen);
927 
928 	sc->sc_pss = mallocarray(res.length, sizeof(*sc->sc_pss), M_DEVBUF,
929 	    M_WAITOK | M_ZERO);
930 	sc->sc_pssfulllen = res.length * sizeof(*sc->sc_pss);
931 
932 	c = 0;
933 	for (i = 0; i < res.length; i++) {
934 		cf = aml_val2int(res.v_package[i]->v_package[0]);
935 
936 		/* This heuristic comes from FreeBSDs
937 		 * dev/acpica/acpi_perf.c to weed out invalid PSS entries.
938 		 */
939 		if (cf == sc->sc_pss[c].pss_core_freq) {
940 			printf("%s: struck PSS entry, core frequency equals "
941 			    " last\n", sc->sc_dev.dv_xname);
942 			continue;
943 		}
944 
945 		if (cf == 0xFFFF || cf == 0x9999 || cf == 99999 || cf == 0) {
946 			printf("%s: struck PSS entry, inappropriate core "
947 			    "frequency value\n", sc->sc_dev.dv_xname);
948 			continue;
949 		}
950 
951 		sc->sc_pss[c].pss_core_freq = cf;
952 		sc->sc_pss[c].pss_power = aml_val2int(
953 		    res.v_package[i]->v_package[1]);
954 		sc->sc_pss[c].pss_trans_latency = aml_val2int(
955 		    res.v_package[i]->v_package[2]);
956 		sc->sc_pss[c].pss_bus_latency = aml_val2int(
957 		    res.v_package[i]->v_package[3]);
958 		sc->sc_pss[c].pss_ctrl = aml_val2int(
959 		    res.v_package[i]->v_package[4]);
960 		sc->sc_pss[c].pss_status = aml_val2int(
961 		    res.v_package[i]->v_package[5]);
962 		c++;
963 	}
964 	sc->sc_pss_len = c;
965 
966 	aml_freevalue(&res);
967 
968 	return (0);
969 }
970 
971 int
972 acpicpu_fetch_pss(struct acpicpu_pss **pss)
973 {
974 	struct acpicpu_softc	*sc;
975 
976 	/*
977 	 * XXX: According to the ACPI spec in an SMP system all processors
978 	 * are supposed to support the same states. For now we pray
979 	 * the bios ensures this...
980 	 */
981 
982 	sc = acpicpu_sc[0];
983 	if (!sc)
984 		return 0;
985 	*pss = sc->sc_pss;
986 
987 	return (sc->sc_pss_len);
988 }
989 
990 int
991 acpicpu_notify(struct aml_node *node, int notify_type, void *arg)
992 {
993 	struct acpicpu_softc	*sc = arg;
994 
995 	dnprintf(10, "acpicpu_notify: %.2x %s\n", notify_type,
996 	    sc->sc_devnode->name);
997 
998 	switch (notify_type) {
999 	case 0x80:	/* _PPC changed, retrieve new values */
1000 		acpicpu_getppc(sc);
1001 		acpicpu_getpss(sc);
1002 		if (sc->sc_notify)
1003 			sc->sc_notify(sc->sc_pss, sc->sc_pss_len);
1004 		break;
1005 
1006 	case 0x81:	/* _CST changed, retrieve new values */
1007 		acpicpu_getcst(sc);
1008 		printf("%s: notify", DEVNAME(sc));
1009 		acpicpu_print_cst(sc);
1010 		printf("\n");
1011 		break;
1012 
1013 	default:
1014 		printf("%s: unhandled cpu event %x\n", DEVNAME(sc),
1015 		    notify_type);
1016 		break;
1017 	}
1018 
1019 	return (0);
1020 }
1021 
1022 void
1023 acpicpu_set_notify(void (*func)(struct acpicpu_pss *, int))
1024 {
1025 	struct acpicpu_softc    *sc;
1026 
1027 	sc = acpicpu_sc[0];
1028 	if (sc != NULL)
1029 		sc->sc_notify = func;
1030 }
1031 
1032 void
1033 acpicpu_setperf_ppc_change(struct acpicpu_pss *pss, int npss)
1034 {
1035 	struct acpicpu_softc    *sc;
1036 
1037 	sc = acpicpu_sc[0];
1038 
1039 	if (sc != NULL)
1040 		cpu_setperf(sc->sc_level);
1041 }
1042 
1043 void
1044 acpicpu_setperf(int level)
1045 {
1046 	struct acpicpu_softc	*sc;
1047 	struct acpicpu_pss	*pss = NULL;
1048 	int			idx, len;
1049 	uint32_t		status = 0;
1050 
1051 	sc = acpicpu_sc[cpu_number()];
1052 
1053 	dnprintf(10, "%s: acpicpu setperf level %d\n",
1054 	    sc->sc_devnode->name, level);
1055 
1056 	if (level < 0 || level > 100) {
1057 		dnprintf(10, "%s: acpicpu setperf illegal percentage\n",
1058 		    sc->sc_devnode->name);
1059 		return;
1060 	}
1061 
1062 	/*
1063 	 * XXX this should be handled more gracefully and it needs to also do
1064 	 * the duty cycle method instead of pss exclusively
1065 	 */
1066 	if (sc->sc_flags & FLAGS_NOPSS || sc->sc_flags & FLAGS_NOPCT) {
1067 		dnprintf(10, "%s: acpicpu no _PSS or _PCT\n",
1068 		    sc->sc_devnode->name);
1069 		return;
1070 	}
1071 
1072 	if (sc->sc_ppc)
1073 		len = sc->sc_ppc;
1074 	else
1075 		len = sc->sc_pss_len;
1076 	idx = (len - 1) - (level / (100 / len));
1077 	if (idx < 0)
1078 		idx = 0;
1079 
1080 	if (sc->sc_ppc)
1081 		idx += sc->sc_pss_len - sc->sc_ppc;
1082 
1083 	if (idx > sc->sc_pss_len)
1084 		idx = sc->sc_pss_len - 1;
1085 
1086 	dnprintf(10, "%s: acpicpu setperf index %d pss_len %d ppc %d\n",
1087 	    sc->sc_devnode->name, idx, sc->sc_pss_len, sc->sc_ppc);
1088 
1089 	pss = &sc->sc_pss[idx];
1090 
1091 #ifdef ACPI_DEBUG
1092 	/* keep this for now since we will need this for debug in the field */
1093 	printf("0 status: %x %llx %u %u ctrl: %x %llx %u %u\n",
1094 	    sc->sc_pct.pct_status.grd_gas.address_space_id,
1095 	    sc->sc_pct.pct_status.grd_gas.address,
1096 	    sc->sc_pct_stat_as, sc->sc_pct_stat_len,
1097 	    sc->sc_pct.pct_ctrl.grd_gas.address_space_id,
1098 	    sc->sc_pct.pct_ctrl.grd_gas.address,
1099 	    sc->sc_pct_ctrl_as, sc->sc_pct_ctrl_len);
1100 #endif
1101 	acpi_gasio(sc->sc_acpi, ACPI_IOREAD,
1102 	    sc->sc_pct.pct_status.grd_gas.address_space_id,
1103 	    sc->sc_pct.pct_status.grd_gas.address, sc->sc_pct_stat_as,
1104 	    sc->sc_pct_stat_len, &status);
1105 	dnprintf(20, "1 status: %u <- %u\n", status, pss->pss_status);
1106 
1107 	/* Are we already at the requested frequency? */
1108 	if (status == pss->pss_status)
1109 		return;
1110 
1111 	acpi_gasio(sc->sc_acpi, ACPI_IOWRITE,
1112 	    sc->sc_pct.pct_ctrl.grd_gas.address_space_id,
1113 	    sc->sc_pct.pct_ctrl.grd_gas.address, sc->sc_pct_ctrl_as,
1114 	    sc->sc_pct_ctrl_len, &pss->pss_ctrl);
1115 	dnprintf(20, "pss_ctrl: %x\n", pss->pss_ctrl);
1116 
1117 	acpi_gasio(sc->sc_acpi, ACPI_IOREAD,
1118 	    sc->sc_pct.pct_status.grd_gas.address_space_id,
1119 	    sc->sc_pct.pct_status.grd_gas.address, sc->sc_pct_stat_as,
1120 	    sc->sc_pct_stat_as, &status);
1121 	dnprintf(20, "2 status: %d\n", status);
1122 
1123 	/* Did the transition succeed? */
1124 	 if (status == pss->pss_status) {
1125 		cpuspeed = pss->pss_core_freq;
1126 		sc->sc_level = level;
1127 	} else
1128 		printf("%s: acpicpu setperf failed to alter frequency\n",
1129 		    sc->sc_devnode->name);
1130 }
1131 
1132 void
1133 acpicpu_idle(void)
1134 {
1135 	struct cpu_info *ci = curcpu();
1136 	struct acpicpu_softc *sc = (struct acpicpu_softc *)ci->ci_acpicpudev;
1137 	struct acpi_cstate *best, *cx;
1138 	unsigned long itime;
1139 
1140 	if (sc == NULL) {
1141 		__asm volatile("sti");
1142 		panic("null acpicpu");
1143 	}
1144 
1145 	/* possibly update the MWAIT_ONLY flag in cpu_info */
1146 	if (sc->sc_flags & FLAGS_MWAIT_ONLY) {
1147 		if ((ci->ci_mwait & MWAIT_ONLY) == 0)
1148 			atomic_setbits_int(&ci->ci_mwait, MWAIT_ONLY);
1149 	} else if (ci->ci_mwait & MWAIT_ONLY)
1150 		atomic_clearbits_int(&ci->ci_mwait, MWAIT_ONLY);
1151 
1152 	/*
1153 	 * Find the first state with a latency we'll accept, ignoring
1154 	 * states marked skippable
1155 	 */
1156 	best = cx = SLIST_FIRST(&sc->sc_cstates);
1157 	while ((cx->flags & CST_FLAG_SKIP) ||
1158 	    cx->latency * 3 > sc->sc_prev_sleep) {
1159 		if ((cx = SLIST_NEXT(cx, link)) == NULL)
1160 			break;
1161 		best = cx;
1162 	}
1163 
1164 	if (best->state >= 3 &&
1165 	    (best->flags & CST_FLAG_MWAIT_BM_AVOIDANCE) &&
1166 	    acpi_read_pmreg(acpi_softc, ACPIREG_PM1_STS, 0) & ACPI_PM1_BM_STS) {
1167 		/* clear it and back off */
1168 		acpi_write_pmreg(acpi_softc, ACPIREG_PM1_STS, 0,
1169 		    ACPI_PM1_BM_STS);
1170 		while ((cx = SLIST_NEXT(cx, link)) != NULL) {
1171 			if (cx->flags & CST_FLAG_SKIP)
1172 				continue;
1173 			if (cx->state < 3 ||
1174 			    (cx->flags & CST_FLAG_MWAIT_BM_AVOIDANCE) == 0)
1175 				break;
1176 		}
1177 		best = cx;
1178 	}
1179 
1180 
1181 	atomic_inc_long(&cst_stats[best->state]);
1182 
1183 	itime = tick / 2;
1184 	switch (best->method) {
1185 	default:
1186 	case CST_METH_HALT:
1187 		__asm volatile("sti; hlt");
1188 		break;
1189 
1190 	case CST_METH_IO_HALT:
1191 		inb((u_short)best->address);
1192 		__asm volatile("sti; hlt");
1193 		break;
1194 
1195 	case CST_METH_MWAIT:
1196 		{
1197 		struct timeval start, stop;
1198 		unsigned int hints;
1199 
1200 #ifdef __LP64__
1201 		if ((read_rflags() & PSL_I) == 0)
1202 			panic("idle with interrupts blocked!");
1203 #else
1204 		if ((read_eflags() & PSL_I) == 0)
1205 			panic("idle with interrupts blocked!");
1206 #endif
1207 
1208 		/* something already queued? */
1209 		if (!cpu_is_idle(ci))
1210 			return;
1211 
1212 		/*
1213 		 * About to idle; setting the MWAIT_IN_IDLE bit tells
1214 		 * cpu_unidle() that it can't be a no-op and tells cpu_kick()
1215 		 * that it doesn't need to use an IPI.  We also set the
1216 		 * MWAIT_KEEP_IDLING bit: those routines clear it to stop
1217 		 * the mwait.  Once they're set, we do a final check of the
1218 		 * queue, in case another cpu called setrunqueue() and added
1219 		 * something to the queue and called cpu_unidle() between
1220 		 * the check in sched_idle() and here.
1221 		 */
1222 		hints = (unsigned)best->address;
1223 		microuptime(&start);
1224 		atomic_setbits_int(&ci->ci_mwait, MWAIT_IDLING);
1225 		if (cpu_is_idle(ci)) {
1226 			/* intel errata AAI65: cflush before monitor */
1227 			if (ci->ci_cflushsz != 0) {
1228 				membar_sync();
1229 				clflush((unsigned long)&ci->ci_mwait);
1230 				membar_sync();
1231 			}
1232 
1233 			monitor(&ci->ci_mwait, 0, 0);
1234 			if ((ci->ci_mwait & MWAIT_IDLING) == MWAIT_IDLING)
1235 				mwait(0, hints);
1236 		}
1237 
1238 		microuptime(&stop);
1239 		timersub(&stop, &start, &stop);
1240 		itime = stop.tv_sec * 1000000 + stop.tv_usec;
1241 
1242 		/* done idling; let cpu_kick() know that an IPI is required */
1243 		atomic_clearbits_int(&ci->ci_mwait, MWAIT_IDLING);
1244 		break;
1245 		}
1246 
1247 	case CST_METH_GAS_IO:
1248 		inb((u_short)best->address);
1249 		/* something harmless to give system time to change state */
1250 		acpi_read_pmreg(acpi_softc, ACPIREG_PM1_STS, 0);
1251 		break;
1252 
1253 	}
1254 
1255 	sc->sc_last_itime = itime;
1256 	itime >>= 1;
1257 	sc->sc_prev_sleep = (sc->sc_prev_sleep + (sc->sc_prev_sleep >> 1)
1258 	    + itime) >> 1;
1259 }
1260