xref: /dflybsd-src/sys/dev/acpica/acpi_cpu_cstate.c (revision 24da862f13a89f853ba0298c291319a87ef4f4d5)
1 /*-
2  * Copyright (c) 2003-2005 Nate Lawson (SDG)
3  * Copyright (c) 2001 Michael Smith
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  *
27  * $FreeBSD: src/sys/dev/acpica/acpi_cpu.c,v 1.72 2008/04/12 12:06:00 rpaulo Exp $
28  */
29 
30 #include "opt_acpi.h"
31 #include <sys/param.h>
32 #include <sys/bus.h>
33 #include <sys/kernel.h>
34 #include <sys/malloc.h>
35 #include <sys/globaldata.h>
36 #include <sys/power.h>
37 #include <sys/proc.h>
38 #include <sys/sbuf.h>
39 #include <sys/thread2.h>
40 #include <sys/serialize.h>
41 #include <sys/msgport2.h>
42 #include <sys/microtime_pcpu.h>
43 
44 #include <bus/pci/pcivar.h>
45 #include <machine/atomic.h>
46 #include <machine/globaldata.h>
47 #include <machine/md_var.h>
48 #include <machine/smp.h>
49 #include <sys/rman.h>
50 
51 #include <net/netisr2.h>
52 #include <net/netmsg2.h>
53 #include <net/if_var.h>
54 
55 #include "acpi.h"
56 #include "acpivar.h"
57 #include "acpi_cpu.h"
58 
59 /*
60  * Support for ACPI Processor devices, including C[1-3+] sleep states.
61  */
62 
63 /* Hooks for the ACPI CA debugging infrastructure */
64 #define _COMPONENT	ACPI_PROCESSOR
65 ACPI_MODULE_NAME("PROCESSOR")
66 
67 struct netmsg_acpi_cst {
68 	struct netmsg_base base;
69 	struct acpi_cst_softc *sc;
70 	int		val;
71 };
72 
73 struct acpi_cst_cx {
74     uint32_t		type;		/* C1-3+. */
75     uint32_t		trans_lat;	/* Transition latency (usec). */
76     void		(*enter)(const struct acpi_cst_cx *);
77     bus_space_tag_t	btag;
78     bus_space_handle_t	bhand;
79 
80     struct resource	*p_lvlx;	/* Register to read to enter state. */
81     ACPI_GENERIC_ADDRESS gas;
82     int			rid;		/* rid of p_lvlx */
83     uint32_t		power;		/* Power consumed (mW). */
84     int			res_type;	/* Resource type for p_lvlx. */
85 };
86 #define MAX_CX_STATES	 8
87 
88 struct acpi_cst_softc {
89     device_t		cst_dev;
90     struct acpi_cpux_softc *cst_parent;
91     ACPI_HANDLE		cst_handle;
92     int			cst_cpuid;
93     uint32_t		cst_flags;	/* ACPI_CST_FLAG_ */
94     uint32_t		cst_p_blk;	/* ACPI P_BLK location */
95     uint32_t		cst_p_blk_len;	/* P_BLK length (must be 6). */
96     struct acpi_cst_cx	cst_cx_states[MAX_CX_STATES];
97     int			cst_cx_count;	/* Number of valid Cx states. */
98     int			cst_prev_sleep;	/* Last idle sleep duration. */
99     /* Runtime state. */
100     int			cst_non_c3;	/* Index of lowest non-C3 state. */
101     u_long		cst_cx_stats[MAX_CX_STATES];/* Cx usage history. */
102     /* Values for sysctl. */
103     int			cst_cx_lowest;	/* Current Cx lowest */
104     int			cst_cx_lowest_req; /* Requested Cx lowest */
105     char 		cst_cx_supported[64];
106 };
107 
108 #define ACPI_CST_FLAG_PROBING	0x1
109 
110 #define ACPI_CST_QUIRK_NO_C3	(1<<0)	/* C3-type states are not usable. */
111 #define ACPI_CST_QUIRK_NO_BM_CTRL (1<<2) /* No bus mastering control. */
112 
113 #define PCI_VENDOR_INTEL	0x8086
114 #define PCI_DEVICE_82371AB_3	0x7113	/* PIIX4 chipset for quirks. */
115 #define PCI_REVISION_A_STEP	0
116 #define PCI_REVISION_B_STEP	1
117 #define PCI_REVISION_4E		2
118 #define PCI_REVISION_4M		3
119 #define PIIX4_DEVACTB_REG	0x58
120 #define PIIX4_BRLD_EN_IRQ0	(1<<0)
121 #define PIIX4_BRLD_EN_IRQ	(1<<1)
122 #define PIIX4_BRLD_EN_IRQ8	(1<<5)
123 #define PIIX4_STOP_BREAK_MASK	(PIIX4_BRLD_EN_IRQ0 | \
124 				 PIIX4_BRLD_EN_IRQ | \
125 				 PIIX4_BRLD_EN_IRQ8)
126 #define PIIX4_PCNTRL_BST_EN	(1<<10)
127 
128 /* Platform hardware resource information. */
129 static uint32_t		 acpi_cst_smi_cmd; /* Value to write to SMI_CMD. */
130 static uint8_t		 acpi_cst_ctrl;	/* Indicate we are _CST aware. */
131 static int		 acpi_cst_quirks; /* Indicate any hardware bugs. */
132 static boolean_t	 acpi_cst_use_fadt;
133 
134 /* Runtime state. */
135 static int		 acpi_cst_disable_idle;
136 					/* Disable entry to idle function */
137 static int		 acpi_cst_cx_count; /* Number of valid Cx states */
138 
139 /* Values for sysctl. */
140 static int		 acpi_cst_cx_lowest; /* Current Cx lowest */
141 static int		 acpi_cst_cx_lowest_req; /* Requested Cx lowest */
142 
143 /* Number of C3 state requesters */
144 static int		 acpi_cst_c3_reqs;
145 
146 static device_t		*acpi_cst_devices;
147 static int		 acpi_cst_ndevices;
148 static struct acpi_cst_softc **acpi_cst_softc;
149 static struct lwkt_serialize acpi_cst_slize = LWKT_SERIALIZE_INITIALIZER;
150 
151 static int	acpi_cst_probe(device_t);
152 static int	acpi_cst_attach(device_t);
153 static int	acpi_cst_suspend(device_t);
154 static int	acpi_cst_resume(device_t);
155 static int	acpi_cst_shutdown(device_t);
156 
157 static void	acpi_cst_notify(device_t);
158 static void	acpi_cst_postattach(void *);
159 static void	acpi_cst_idle(void);
160 
161 static void	acpi_cst_cx_probe(struct acpi_cst_softc *);
162 static void	acpi_cst_cx_probe_fadt(struct acpi_cst_softc *);
163 static int	acpi_cst_cx_probe_cst(struct acpi_cst_softc *, int);
164 static int	acpi_cst_cx_reprobe_cst(struct acpi_cst_softc *);
165 
166 static void	acpi_cst_startup(struct acpi_cst_softc *);
167 static void	acpi_cst_support_list(struct acpi_cst_softc *);
168 static int	acpi_cst_set_lowest(struct acpi_cst_softc *, int);
169 static int	acpi_cst_set_lowest_oncpu(struct acpi_cst_softc *, int);
170 static void	acpi_cst_non_c3(struct acpi_cst_softc *);
171 static void	acpi_cst_global_cx_count(void);
172 static int	acpi_cst_set_quirks(void);
173 static void	acpi_cst_c3_bm_rld(struct acpi_cst_softc *);
174 static void	acpi_cst_c1_halt(void);
175 
176 static int	acpi_cst_usage_sysctl(SYSCTL_HANDLER_ARGS);
177 static int	acpi_cst_lowest_sysctl(SYSCTL_HANDLER_ARGS);
178 static int	acpi_cst_lowest_use_sysctl(SYSCTL_HANDLER_ARGS);
179 static int	acpi_cst_global_lowest_sysctl(SYSCTL_HANDLER_ARGS);
180 static int	acpi_cst_global_lowest_use_sysctl(SYSCTL_HANDLER_ARGS);
181 
182 static void	acpi_cst_c1_halt_enter(const struct acpi_cst_cx *);
183 static void	acpi_cst_cx_io_enter(const struct acpi_cst_cx *);
184 
185 static device_method_t acpi_cst_methods[] = {
186     /* Device interface */
187     DEVMETHOD(device_probe,	acpi_cst_probe),
188     DEVMETHOD(device_attach,	acpi_cst_attach),
189     DEVMETHOD(device_detach,	bus_generic_detach),
190     DEVMETHOD(device_shutdown,	acpi_cst_shutdown),
191     DEVMETHOD(device_suspend,	acpi_cst_suspend),
192     DEVMETHOD(device_resume,	acpi_cst_resume),
193 
194     /* Bus interface */
195     DEVMETHOD(bus_add_child,	bus_generic_add_child),
196     DEVMETHOD(bus_read_ivar,	bus_generic_read_ivar),
197     DEVMETHOD(bus_get_resource_list, bus_generic_get_resource_list),
198     DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
199     DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
200     DEVMETHOD(bus_alloc_resource, bus_generic_rl_alloc_resource),
201     DEVMETHOD(bus_release_resource, bus_generic_rl_release_resource),
202     DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
203     DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
204     DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
205     DEVMETHOD(bus_setup_intr,	bus_generic_setup_intr),
206     DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr),
207     DEVMETHOD_END
208 };
209 
210 static driver_t acpi_cst_driver = {
211     "cpu_cst",
212     acpi_cst_methods,
213     sizeof(struct acpi_cst_softc),
214 };
215 
216 static devclass_t acpi_cst_devclass;
217 DRIVER_MODULE(cpu_cst, cpu, acpi_cst_driver, acpi_cst_devclass, NULL, NULL);
218 MODULE_DEPEND(cpu_cst, acpi, 1, 1, 1);
219 
220 static int
221 acpi_cst_probe(device_t dev)
222 {
223     int cpu_id;
224 
225     if (acpi_disabled("cpu_cst") || acpi_get_type(dev) != ACPI_TYPE_PROCESSOR)
226 	return (ENXIO);
227 
228     cpu_id = acpi_get_magic(dev);
229 
230     if (acpi_cst_softc == NULL)
231 	acpi_cst_softc = kmalloc(sizeof(struct acpi_cst_softc *) *
232 	    SMP_MAXCPU, M_TEMP /* XXX */, M_INTWAIT | M_ZERO);
233 
234     /*
235      * Check if we already probed this processor.  We scan the bus twice
236      * so it's possible we've already seen this one.
237      */
238     if (acpi_cst_softc[cpu_id] != NULL) {
239 	device_printf(dev, "CPU%d cstate already exist\n", cpu_id);
240 	return (ENXIO);
241     }
242 
243     /* Mark this processor as in-use and save our derived id for attach. */
244     acpi_cst_softc[cpu_id] = (void *)1;
245     device_set_desc(dev, "ACPI CPU C-State");
246 
247     return (0);
248 }
249 
250 static int
251 acpi_cst_attach(device_t dev)
252 {
253     ACPI_BUFFER		   buf;
254     ACPI_OBJECT		   *obj;
255     struct acpi_cst_softc *sc;
256     ACPI_STATUS		   status;
257 
258     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
259 
260     sc = device_get_softc(dev);
261     sc->cst_dev = dev;
262     sc->cst_parent = device_get_softc(device_get_parent(dev));
263     sc->cst_handle = acpi_get_handle(dev);
264     sc->cst_cpuid = acpi_get_magic(dev);
265     acpi_cst_softc[sc->cst_cpuid] = sc;
266     acpi_cst_smi_cmd = AcpiGbl_FADT.SmiCommand;
267     acpi_cst_ctrl = AcpiGbl_FADT.CstControl;
268 
269     buf.Pointer = NULL;
270     buf.Length = ACPI_ALLOCATE_BUFFER;
271     status = AcpiEvaluateObject(sc->cst_handle, NULL, NULL, &buf);
272     if (ACPI_FAILURE(status)) {
273 	device_printf(dev, "attach failed to get Processor obj - %s\n",
274 		      AcpiFormatException(status));
275 	return (ENXIO);
276     }
277     obj = (ACPI_OBJECT *)buf.Pointer;
278     sc->cst_p_blk = obj->Processor.PblkAddress;
279     sc->cst_p_blk_len = obj->Processor.PblkLength;
280     AcpiOsFree(obj);
281     ACPI_DEBUG_PRINT((ACPI_DB_INFO, "cpu_cst%d: P_BLK at %#x/%d\n",
282 		     device_get_unit(dev), sc->cst_p_blk, sc->cst_p_blk_len));
283 
284     /*
285      * If this is the first cpu we attach, create and initialize the generic
286      * resources that will be used by all acpi cpu devices.
287      */
288     if (device_get_unit(dev) == 0) {
289 	/* Assume we won't be using FADT for Cx states by default */
290 	acpi_cst_use_fadt = FALSE;
291 
292 	/* Queue post cpu-probing task handler */
293 	AcpiOsExecute(OSL_NOTIFY_HANDLER, acpi_cst_postattach, NULL);
294     }
295 
296     /* Probe for Cx state support. */
297     acpi_cst_cx_probe(sc);
298 
299     /* Finally,  call identify and probe/attach for child devices. */
300     bus_generic_probe(dev);
301     bus_generic_attach(dev);
302 
303     return (0);
304 }
305 
306 /*
307  * Disable any entry to the idle function during suspend and re-enable it
308  * during resume.
309  */
310 static int
311 acpi_cst_suspend(device_t dev)
312 {
313     int error;
314 
315     error = bus_generic_suspend(dev);
316     if (error)
317 	return (error);
318     acpi_cst_disable_idle = TRUE;
319     return (0);
320 }
321 
322 static int
323 acpi_cst_resume(device_t dev)
324 {
325     acpi_cst_disable_idle = FALSE;
326     return (bus_generic_resume(dev));
327 }
328 
329 static int
330 acpi_cst_shutdown(device_t dev)
331 {
332     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
333 
334     /* Allow children to shutdown first. */
335     bus_generic_shutdown(dev);
336 
337     /*
338      * Disable any entry to the idle function.  There is a small race where
339      * an idle thread have passed this check but not gone to sleep.  This
340      * is ok since device_shutdown() does not free the softc, otherwise
341      * we'd have to be sure all threads were evicted before returning.
342      */
343     acpi_cst_disable_idle = TRUE;
344 
345     return_VALUE (0);
346 }
347 
348 static void
349 acpi_cst_cx_probe(struct acpi_cst_softc *sc)
350 {
351     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
352 
353     /* Use initial sleep value of 1 sec. to start with lowest idle state. */
354     sc->cst_prev_sleep = 1000000;
355     sc->cst_cx_lowest = 0;
356     sc->cst_cx_lowest_req = 0;
357 
358     /*
359      * Check for the ACPI 2.0 _CST sleep states object.  If we can't find
360      * any, we'll revert to FADT/P_BLK Cx control method which will be
361      * handled by acpi_cst_postattach.  We need to defer to after having
362      * probed all the cpus in the system before probing for Cx states from
363      * FADT as we may already have found cpus with valid _CST packages.
364      */
365     if (!acpi_cst_use_fadt && acpi_cst_cx_probe_cst(sc, 0) != 0) {
366 	/*
367 	 * We were unable to find a _CST package for this cpu or there
368 	 * was an error parsing it. Switch back to generic mode.
369 	 */
370 	acpi_cst_use_fadt = TRUE;
371 	if (bootverbose)
372 	    device_printf(sc->cst_dev, "switching to FADT Cx mode\n");
373     }
374 
375     /*
376      * TODO: _CSD Package should be checked here.
377      */
378 }
379 
380 static void
381 acpi_cst_cx_probe_fadt(struct acpi_cst_softc *sc)
382 {
383     struct acpi_cst_cx		*cx_ptr;
384 
385     sc->cst_cx_count = 0;
386     cx_ptr = sc->cst_cx_states;
387 
388     /* Use initial sleep value of 1 sec. to start with lowest idle state. */
389     sc->cst_prev_sleep = 1000000;
390 
391     /* C1 has been required since just after ACPI 1.0 */
392     cx_ptr->gas.SpaceId = ACPI_ADR_SPACE_FIXED_HARDWARE;
393     cx_ptr->type = ACPI_STATE_C1;
394     cx_ptr->trans_lat = 0;
395     cx_ptr->enter = acpi_cst_c1_halt_enter;
396     cx_ptr++;
397     sc->cst_cx_count++;
398 
399     /* C2(+) is not supported on MP system */
400     if (ncpus > 1 && (AcpiGbl_FADT.Flags & ACPI_FADT_C2_MP_SUPPORTED) == 0)
401 	return;
402 
403     /*
404      * The spec says P_BLK must be 6 bytes long.  However, some systems
405      * use it to indicate a fractional set of features present so we
406      * take 5 as C2.  Some may also have a value of 7 to indicate
407      * another C3 but most use _CST for this (as required) and having
408      * "only" C1-C3 is not a hardship.
409      */
410     if (sc->cst_p_blk_len < 5)
411 	return;
412 
413     /* Validate and allocate resources for C2 (P_LVL2). */
414     if (AcpiGbl_FADT.C2Latency <= 100) {
415 	cx_ptr->gas.SpaceId = ACPI_ADR_SPACE_SYSTEM_IO;
416 	cx_ptr->gas.BitWidth = 8;
417 	cx_ptr->gas.Address = sc->cst_p_blk + 4;
418 
419 	cx_ptr->rid = sc->cst_parent->cpux_next_rid;
420 	acpi_bus_alloc_gas(sc->cst_dev, &cx_ptr->res_type, &cx_ptr->rid,
421 	    &cx_ptr->gas, &cx_ptr->p_lvlx, RF_SHAREABLE);
422 	if (cx_ptr->p_lvlx != NULL) {
423 	    sc->cst_parent->cpux_next_rid++;
424 	    cx_ptr->type = ACPI_STATE_C2;
425 	    cx_ptr->trans_lat = AcpiGbl_FADT.C2Latency;
426 	    cx_ptr->enter = acpi_cst_cx_io_enter;
427 	    cx_ptr->btag = rman_get_bustag(cx_ptr->p_lvlx);
428 	    cx_ptr->bhand = rman_get_bushandle(cx_ptr->p_lvlx);
429 	    cx_ptr++;
430 	    sc->cst_cx_count++;
431 	    sc->cst_non_c3 = 1;
432 	}
433     }
434     if (sc->cst_p_blk_len < 6)
435 	return;
436 
437     /* Validate and allocate resources for C3 (P_LVL3). */
438     if (AcpiGbl_FADT.C3Latency <= 1000 &&
439         !(acpi_cst_quirks & ACPI_CST_QUIRK_NO_C3)) {
440 	cx_ptr->gas.SpaceId = ACPI_ADR_SPACE_SYSTEM_IO;
441 	cx_ptr->gas.BitWidth = 8;
442 	cx_ptr->gas.Address = sc->cst_p_blk + 5;
443 
444 	cx_ptr->rid = sc->cst_parent->cpux_next_rid;
445 	acpi_bus_alloc_gas(sc->cst_dev, &cx_ptr->res_type, &cx_ptr->rid,
446 	    &cx_ptr->gas, &cx_ptr->p_lvlx, RF_SHAREABLE);
447 	if (cx_ptr->p_lvlx != NULL) {
448 	    sc->cst_parent->cpux_next_rid++;
449 	    cx_ptr->type = ACPI_STATE_C3;
450 	    cx_ptr->trans_lat = AcpiGbl_FADT.C3Latency;
451 	    cx_ptr->enter = acpi_cst_cx_io_enter;
452 	    cx_ptr->btag = rman_get_bustag(cx_ptr->p_lvlx);
453 	    cx_ptr->bhand = rman_get_bushandle(cx_ptr->p_lvlx);
454 	    cx_ptr++;
455 	    sc->cst_cx_count++;
456 	}
457     }
458 }
459 
460 /*
461  * Parse a _CST package and set up its Cx states.  Since the _CST object
462  * can change dynamically, our notify handler may call this function
463  * to clean up and probe the new _CST package.
464  */
465 static int
466 acpi_cst_cx_probe_cst(struct acpi_cst_softc *sc, int reprobe)
467 {
468     struct	 acpi_cst_cx *cx_ptr;
469     ACPI_STATUS	 status;
470     ACPI_BUFFER	 buf;
471     ACPI_OBJECT	*top;
472     ACPI_OBJECT	*pkg;
473     uint32_t	 count;
474     int		 i;
475 
476     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
477 
478     if (reprobe)
479 	KKASSERT(mycpuid == sc->cst_cpuid);
480 
481     buf.Pointer = NULL;
482     buf.Length = ACPI_ALLOCATE_BUFFER;
483     status = AcpiEvaluateObject(sc->cst_handle, "_CST", NULL, &buf);
484     if (ACPI_FAILURE(status))
485 	return (ENXIO);
486 
487     /* _CST is a package with a count and at least one Cx package. */
488     top = (ACPI_OBJECT *)buf.Pointer;
489     if (!ACPI_PKG_VALID(top, 2) || acpi_PkgInt32(top, 0, &count) != 0) {
490 	device_printf(sc->cst_dev, "invalid _CST package\n");
491 	AcpiOsFree(buf.Pointer);
492 	return (ENXIO);
493     }
494     if (count != top->Package.Count - 1) {
495 	device_printf(sc->cst_dev, "invalid _CST state count (%d != %d)\n",
496 	       count, top->Package.Count - 1);
497 	count = top->Package.Count - 1;
498     }
499     if (count > MAX_CX_STATES) {
500 	device_printf(sc->cst_dev, "_CST has too many states (%d)\n", count);
501 	count = MAX_CX_STATES;
502     }
503 
504     sc->cst_flags |= ACPI_CST_FLAG_PROBING;
505     cpu_sfence();
506 
507     for (i = 0; i < sc->cst_cx_count; ++i) {
508 	cx_ptr = &sc->cst_cx_states[i];
509 
510 	/* Free up any previous register. */
511 	if (cx_ptr->p_lvlx != NULL) {
512 	    bus_release_resource(sc->cst_dev, cx_ptr->res_type, cx_ptr->rid,
513 	        cx_ptr->p_lvlx);
514 	    cx_ptr->p_lvlx = NULL;
515 	}
516 	cx_ptr->enter = NULL;
517     }
518 
519     /* Set up all valid states. */
520     sc->cst_cx_count = 0;
521     cx_ptr = sc->cst_cx_states;
522     for (i = 0; i < count; i++) {
523 	pkg = &top->Package.Elements[i + 1];
524 	if (!ACPI_PKG_VALID(pkg, 4) ||
525 	    acpi_PkgInt32(pkg, 1, &cx_ptr->type) != 0 ||
526 	    acpi_PkgInt32(pkg, 2, &cx_ptr->trans_lat) != 0 ||
527 	    acpi_PkgInt32(pkg, 3, &cx_ptr->power) != 0) {
528 
529 	    device_printf(sc->cst_dev, "skipping invalid Cx state package\n");
530 	    continue;
531 	}
532 
533 	/* Validate the state to see if we should use it. */
534 	switch (cx_ptr->type) {
535 	case ACPI_STATE_C1:
536 	    sc->cst_non_c3 = i;
537 	    cx_ptr->enter = acpi_cst_c1_halt_enter;
538 	    cx_ptr++;
539 	    sc->cst_cx_count++;
540 	    continue;
541 	case ACPI_STATE_C2:
542 	    sc->cst_non_c3 = i;
543 	    break;
544 	case ACPI_STATE_C3:
545 	default:
546 	    if ((acpi_cst_quirks & ACPI_CST_QUIRK_NO_C3) != 0) {
547 		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
548 				 "cpu_cst%d: C3[%d] not available.\n",
549 				 device_get_unit(sc->cst_dev), i));
550 		continue;
551 	    }
552 	    break;
553 	}
554 
555 	/*
556 	 * Allocate the control register for C2 or C3(+).
557 	 */
558 	KASSERT(cx_ptr->p_lvlx == NULL, ("still has lvlx"));
559 	acpi_PkgRawGas(pkg, 0, &cx_ptr->gas);
560 
561 	cx_ptr->rid = sc->cst_parent->cpux_next_rid;
562 	acpi_bus_alloc_gas(sc->cst_dev, &cx_ptr->res_type, &cx_ptr->rid,
563 	    &cx_ptr->gas, &cx_ptr->p_lvlx, RF_SHAREABLE);
564 	if (cx_ptr->p_lvlx != NULL) {
565 	    sc->cst_parent->cpux_next_rid++;
566 	    ACPI_DEBUG_PRINT((ACPI_DB_INFO,
567 			     "cpu_cst%d: Got C%d - %d latency\n",
568 			     device_get_unit(sc->cst_dev), cx_ptr->type,
569 			     cx_ptr->trans_lat));
570 	    cx_ptr->enter = acpi_cst_cx_io_enter;
571 	    cx_ptr->btag = rman_get_bustag(cx_ptr->p_lvlx);
572 	    cx_ptr->bhand = rman_get_bushandle(cx_ptr->p_lvlx);
573 	    cx_ptr++;
574 	    sc->cst_cx_count++;
575 	}
576     }
577     AcpiOsFree(buf.Pointer);
578 
579     if (reprobe) {
580 	/* If there are C3(+) states, always enable bus master wakeup */
581 	if ((acpi_cst_quirks & ACPI_CST_QUIRK_NO_BM_CTRL) == 0) {
582 	    for (i = 0; i < sc->cst_cx_count; ++i) {
583 		struct acpi_cst_cx *cx = &sc->cst_cx_states[i];
584 
585 		if (cx->type >= ACPI_STATE_C3) {
586 		    AcpiWriteBitRegister(ACPI_BITREG_BUS_MASTER_RLD, 1);
587 		    break;
588 		}
589 	    }
590 	}
591 
592 	/* Fix up the lowest Cx being used */
593 	acpi_cst_set_lowest_oncpu(sc, sc->cst_cx_lowest_req);
594     }
595 
596     /*
597      * Cache the lowest non-C3 state.
598      * NOTE: must after cst_cx_lowest is set.
599      */
600     acpi_cst_non_c3(sc);
601 
602     cpu_sfence();
603     sc->cst_flags &= ~ACPI_CST_FLAG_PROBING;
604 
605     return (0);
606 }
607 
608 static void
609 acpi_cst_cx_reprobe_cst_handler(netmsg_t msg)
610 {
611     struct netmsg_acpi_cst *rmsg = (struct netmsg_acpi_cst *)msg;
612     int error;
613 
614     error = acpi_cst_cx_probe_cst(rmsg->sc, 1);
615     lwkt_replymsg(&rmsg->base.lmsg, error);
616 }
617 
618 static int
619 acpi_cst_cx_reprobe_cst(struct acpi_cst_softc *sc)
620 {
621     struct netmsg_acpi_cst msg;
622 
623     netmsg_init(&msg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY,
624 	acpi_cst_cx_reprobe_cst_handler);
625     msg.sc = sc;
626 
627     return lwkt_domsg(netisr_cpuport(sc->cst_cpuid), &msg.base.lmsg, 0);
628 }
629 
630 /*
631  * Call this *after* all CPUs Cx states have been attached.
632  */
633 static void
634 acpi_cst_postattach(void *arg)
635 {
636     struct acpi_cst_softc *sc;
637     int i;
638 
639     /* Get set of Cx state devices */
640     devclass_get_devices(acpi_cst_devclass, &acpi_cst_devices,
641 	&acpi_cst_ndevices);
642 
643     /*
644      * Setup any quirks that might necessary now that we have probed
645      * all the CPUs' Cx states.
646      */
647     acpi_cst_set_quirks();
648 
649     if (acpi_cst_use_fadt) {
650 	/*
651 	 * We are using Cx mode from FADT, probe for available Cx states
652 	 * for all processors.
653 	 */
654 	for (i = 0; i < acpi_cst_ndevices; i++) {
655 	    sc = device_get_softc(acpi_cst_devices[i]);
656 	    acpi_cst_cx_probe_fadt(sc);
657 	}
658     } else {
659 	/*
660 	 * We are using _CST mode, remove C3 state if necessary.
661 	 *
662 	 * As we now know for sure that we will be using _CST mode
663 	 * install our notify handler.
664 	 */
665 	for (i = 0; i < acpi_cst_ndevices; i++) {
666 	    sc = device_get_softc(acpi_cst_devices[i]);
667 	    if (acpi_cst_quirks & ACPI_CST_QUIRK_NO_C3)
668 		sc->cst_cx_count = sc->cst_non_c3 + 1;
669 	    sc->cst_parent->cpux_cst_notify = acpi_cst_notify;
670 	}
671     }
672     acpi_cst_global_cx_count();
673 
674     /* Perform Cx final initialization. */
675     for (i = 0; i < acpi_cst_ndevices; i++) {
676 	sc = device_get_softc(acpi_cst_devices[i]);
677 	acpi_cst_startup(sc);
678 
679 	if (sc->cst_parent->glob_sysctl_tree != NULL) {
680 	    struct acpi_cpux_softc *cpux = sc->cst_parent;
681 
682 	    /* Add a sysctl handler to handle global Cx lowest setting */
683 	    SYSCTL_ADD_PROC(&cpux->glob_sysctl_ctx,
684 	    		    SYSCTL_CHILDREN(cpux->glob_sysctl_tree),
685 			    OID_AUTO, "cx_lowest",
686 			    CTLTYPE_STRING | CTLFLAG_RW, NULL, 0,
687 			    acpi_cst_global_lowest_sysctl, "A",
688 			    "Requested global lowest Cx sleep state");
689 	    SYSCTL_ADD_PROC(&cpux->glob_sysctl_ctx,
690 	    		    SYSCTL_CHILDREN(cpux->glob_sysctl_tree),
691 			    OID_AUTO, "cx_lowest_use",
692 			    CTLTYPE_STRING | CTLFLAG_RD, NULL, 0,
693 			    acpi_cst_global_lowest_use_sysctl, "A",
694 			    "Global lowest Cx sleep state to use");
695 	}
696     }
697 
698     /* Take over idling from cpu_idle_default(). */
699     acpi_cst_cx_lowest = 0;
700     acpi_cst_cx_lowest_req = 0;
701     acpi_cst_disable_idle = FALSE;
702 
703     cpu_sfence();
704     cpu_idle_hook = acpi_cst_idle;
705 }
706 
707 static void
708 acpi_cst_support_list(struct acpi_cst_softc *sc)
709 {
710     struct sbuf sb;
711     int i;
712 
713     /*
714      * Set up the list of Cx states
715      */
716     sbuf_new(&sb, sc->cst_cx_supported, sizeof(sc->cst_cx_supported),
717 	SBUF_FIXEDLEN);
718     for (i = 0; i < sc->cst_cx_count; i++)
719 	sbuf_printf(&sb, "C%d/%d ", i + 1, sc->cst_cx_states[i].trans_lat);
720     sbuf_trim(&sb);
721     sbuf_finish(&sb);
722 }
723 
724 static void
725 acpi_cst_c3_bm_rld_handler(netmsg_t msg)
726 {
727     struct netmsg_acpi_cst *rmsg = (struct netmsg_acpi_cst *)msg;
728 
729     AcpiWriteBitRegister(ACPI_BITREG_BUS_MASTER_RLD, 1);
730     lwkt_replymsg(&rmsg->base.lmsg, 0);
731 }
732 
733 static void
734 acpi_cst_c3_bm_rld(struct acpi_cst_softc *sc)
735 {
736     struct netmsg_acpi_cst msg;
737 
738     netmsg_init(&msg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY,
739 	acpi_cst_c3_bm_rld_handler);
740     msg.sc = sc;
741 
742     lwkt_domsg(netisr_cpuport(sc->cst_cpuid), &msg.base.lmsg, 0);
743 }
744 
745 static void
746 acpi_cst_startup(struct acpi_cst_softc *sc)
747 {
748     struct acpi_cpux_softc *cpux = sc->cst_parent;
749 
750     /* If there are C3(+) states, always enable bus master wakeup */
751     if ((acpi_cst_quirks & ACPI_CST_QUIRK_NO_BM_CTRL) == 0) {
752 	int i;
753 
754 	for (i = 0; i < sc->cst_cx_count; ++i) {
755 	    struct acpi_cst_cx *cx = &sc->cst_cx_states[i];
756 
757 	    if (cx->type >= ACPI_STATE_C3) {
758 		acpi_cst_c3_bm_rld(sc);
759 		break;
760 	    }
761 	}
762     }
763 
764     acpi_cst_support_list(sc);
765 
766     SYSCTL_ADD_STRING(&cpux->pcpu_sysctl_ctx,
767 		      SYSCTL_CHILDREN(cpux->pcpu_sysctl_tree),
768 		      OID_AUTO, "cx_supported", CTLFLAG_RD,
769 		      sc->cst_cx_supported, 0,
770 		      "Cx/microsecond values for supported Cx states");
771     SYSCTL_ADD_PROC(&cpux->pcpu_sysctl_ctx,
772 		    SYSCTL_CHILDREN(cpux->pcpu_sysctl_tree),
773 		    OID_AUTO, "cx_lowest", CTLTYPE_STRING | CTLFLAG_RW,
774 		    (void *)sc, 0, acpi_cst_lowest_sysctl, "A",
775 		    "requested lowest Cx sleep state");
776     SYSCTL_ADD_PROC(&cpux->pcpu_sysctl_ctx,
777 		    SYSCTL_CHILDREN(cpux->pcpu_sysctl_tree),
778 		    OID_AUTO, "cx_lowest_use", CTLTYPE_STRING | CTLFLAG_RD,
779 		    (void *)sc, 0, acpi_cst_lowest_use_sysctl, "A",
780 		    "lowest Cx sleep state to use");
781     SYSCTL_ADD_PROC(&cpux->pcpu_sysctl_ctx,
782 		    SYSCTL_CHILDREN(cpux->pcpu_sysctl_tree),
783 		    OID_AUTO, "cx_usage", CTLTYPE_STRING | CTLFLAG_RD,
784 		    (void *)sc, 0, acpi_cst_usage_sysctl, "A",
785 		    "percent usage for each Cx state");
786 
787 #ifdef notyet
788     /* Signal platform that we can handle _CST notification. */
789     if (!acpi_cst_use_fadt && acpi_cst_ctrl != 0) {
790 	ACPI_LOCK(acpi);
791 	AcpiOsWritePort(acpi_cst_smi_cmd, acpi_cst_ctrl, 8);
792 	ACPI_UNLOCK(acpi);
793     }
794 #endif
795 }
796 
797 /*
798  * Idle the CPU in the lowest state possible.  This function is called with
799  * interrupts disabled.  Note that once it re-enables interrupts, a task
800  * switch can occur so do not access shared data (i.e. the softc) after
801  * interrupts are re-enabled.
802  */
803 static void
804 acpi_cst_idle(void)
805 {
806     struct	acpi_cst_softc *sc;
807     struct	acpi_cst_cx *cx_next;
808     union microtime_pcpu start, end;
809     int		bm_active, cx_next_idx, i, tdiff;
810 
811     /* If disabled, return immediately. */
812     if (acpi_cst_disable_idle) {
813 	ACPI_ENABLE_IRQS();
814 	return;
815     }
816 
817     /*
818      * Look up our CPU id to get our softc.  If it's NULL, we'll use C1
819      * since there is no Cx state for this processor.
820      */
821     sc = acpi_cst_softc[mdcpu->mi.gd_cpuid];
822     if (sc == NULL) {
823 	acpi_cst_c1_halt();
824 	return;
825     }
826 
827     /* Still probing; use C1 */
828     if (sc->cst_flags & ACPI_CST_FLAG_PROBING) {
829 	acpi_cst_c1_halt();
830 	return;
831     }
832 
833     /* Find the lowest state that has small enough latency. */
834     cx_next_idx = 0;
835     for (i = sc->cst_cx_lowest; i >= 0; i--) {
836 	if (sc->cst_cx_states[i].trans_lat * 3 <= sc->cst_prev_sleep) {
837 	    cx_next_idx = i;
838 	    break;
839 	}
840     }
841 
842     /*
843      * If C3(+) is to be entered, check for bus master activity.
844      * If there was activity, clear the bit and use the lowest
845      * non-C3 state.
846      */
847     cx_next = &sc->cst_cx_states[cx_next_idx];
848     if (cx_next->type >= ACPI_STATE_C3 &&
849         (acpi_cst_quirks & ACPI_CST_QUIRK_NO_BM_CTRL) == 0) {
850 	AcpiReadBitRegister(ACPI_BITREG_BUS_MASTER_STATUS, &bm_active);
851 	if (bm_active != 0) {
852 	    AcpiWriteBitRegister(ACPI_BITREG_BUS_MASTER_STATUS, 1);
853 	    cx_next_idx = sc->cst_non_c3;
854 	}
855     }
856 
857     /* Select the next state and update statistics. */
858     cx_next = &sc->cst_cx_states[cx_next_idx];
859     sc->cst_cx_stats[cx_next_idx]++;
860     KASSERT(cx_next->type != ACPI_STATE_C0, ("C0 sleep"));
861 
862     /*
863      * Execute HLT (or equivalent) and wait for an interrupt.  We can't
864      * calculate the time spent in C1 since the place we wake up is an
865      * ISR.  Assume we slept half of quantum and return.
866      */
867     if (cx_next->type == ACPI_STATE_C1) {
868 	sc->cst_prev_sleep = (sc->cst_prev_sleep * 3 + 500000 / hz) / 4;
869 	cx_next->enter(cx_next);
870 	return;
871     }
872 
873     /*
874      * For C3(+), disable bus master arbitration if BM control is
875      * available, otherwise flush the CPU cache.
876      */
877     if (cx_next->type >= ACPI_STATE_C3) {
878 	if ((acpi_cst_quirks & ACPI_CST_QUIRK_NO_BM_CTRL) == 0)
879 	    AcpiWriteBitRegister(ACPI_BITREG_ARB_DISABLE, 1);
880 	else
881 	    ACPI_FLUSH_CPU_CACHE();
882     }
883 
884     /*
885      * Read from P_LVLx to enter C2(+), checking time spent asleep.
886      */
887     microtime_pcpu_get(&start);
888     cpu_mfence();
889 
890     cx_next->enter(cx_next);
891 
892     cpu_mfence();
893     microtime_pcpu_get(&end);
894 
895     /* Enable bus master arbitration. */
896     if (cx_next->type >= ACPI_STATE_C3) {
897 	if ((acpi_cst_quirks & ACPI_CST_QUIRK_NO_BM_CTRL) == 0)
898 	    AcpiWriteBitRegister(ACPI_BITREG_ARB_DISABLE, 0);
899     }
900     ACPI_ENABLE_IRQS();
901 
902     /* Find the actual time asleep in microseconds. */
903     tdiff = microtime_pcpu_diff(&start, &end);
904     sc->cst_prev_sleep = (sc->cst_prev_sleep * 3 + tdiff) / 4;
905 }
906 
907 /*
908  * Re-evaluate the _CST object when we are notified that it changed.
909  */
910 static void
911 acpi_cst_notify(device_t dev)
912 {
913     struct acpi_cst_softc *sc = device_get_softc(dev);
914 
915     KASSERT(curthread->td_type != TD_TYPE_NETISR,
916         ("notify in netisr%d", mycpuid));
917 
918     lwkt_serialize_enter(&acpi_cst_slize);
919 
920     /* Update the list of Cx states. */
921     acpi_cst_cx_reprobe_cst(sc);
922     acpi_cst_support_list(sc);
923 
924     /* Update the new lowest useable Cx state for all CPUs. */
925     acpi_cst_global_cx_count();
926 
927     /*
928      * Fix up the lowest Cx being used
929      */
930     if (acpi_cst_cx_lowest_req < acpi_cst_cx_count)
931 	acpi_cst_cx_lowest = acpi_cst_cx_lowest_req;
932     if (acpi_cst_cx_lowest > acpi_cst_cx_count - 1)
933 	acpi_cst_cx_lowest = acpi_cst_cx_count - 1;
934 
935     lwkt_serialize_exit(&acpi_cst_slize);
936 }
937 
938 static int
939 acpi_cst_set_quirks(void)
940 {
941     device_t acpi_dev;
942     uint32_t val;
943 
944     ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
945 
946     /*
947      * Bus mastering arbitration control is needed to keep caches coherent
948      * while sleeping in C3.  If it's not present but a working flush cache
949      * instruction is present, flush the caches before entering C3 instead.
950      * Otherwise, just disable C3 completely.
951      */
952     if (AcpiGbl_FADT.Pm2ControlBlock == 0 ||
953 	AcpiGbl_FADT.Pm2ControlLength == 0) {
954 	if ((AcpiGbl_FADT.Flags & ACPI_FADT_WBINVD) &&
955 	    (AcpiGbl_FADT.Flags & ACPI_FADT_WBINVD_FLUSH) == 0) {
956 	    acpi_cst_quirks |= ACPI_CST_QUIRK_NO_BM_CTRL;
957 	    ACPI_DEBUG_PRINT((ACPI_DB_INFO,
958 		"cpu_cst: no BM control, using flush cache method\n"));
959 	} else {
960 	    acpi_cst_quirks |= ACPI_CST_QUIRK_NO_C3;
961 	    ACPI_DEBUG_PRINT((ACPI_DB_INFO,
962 		"cpu_cst: no BM control, C3 not available\n"));
963 	}
964     }
965 
966     /*
967      * If we are using FADT Cx mode, C3 on multiple CPUs requires using
968      * the expensive flush cache instruction.
969      */
970     if (acpi_cst_use_fadt && ncpus > 1) {
971 	acpi_cst_quirks |= ACPI_CST_QUIRK_NO_BM_CTRL;
972 	ACPI_DEBUG_PRINT((ACPI_DB_INFO,
973 	    "cpu_cst: SMP, using flush cache mode for C3\n"));
974     }
975 
976     /* Look for various quirks of the PIIX4 part. */
977     acpi_dev = pci_find_device(PCI_VENDOR_INTEL, PCI_DEVICE_82371AB_3);
978     if (acpi_dev != NULL) {
979 	switch (pci_get_revid(acpi_dev)) {
980 	/*
981 	 * Disable C3 support for all PIIX4 chipsets.  Some of these parts
982 	 * do not report the BMIDE status to the BM status register and
983 	 * others have a livelock bug if Type-F DMA is enabled.  Linux
984 	 * works around the BMIDE bug by reading the BM status directly
985 	 * but we take the simpler approach of disabling C3 for these
986 	 * parts.
987 	 *
988 	 * See erratum #18 ("C3 Power State/BMIDE and Type-F DMA
989 	 * Livelock") from the January 2002 PIIX4 specification update.
990 	 * Applies to all PIIX4 models.
991 	 *
992 	 * Also, make sure that all interrupts cause a "Stop Break"
993 	 * event to exit from C2 state.
994 	 * Also, BRLD_EN_BM (ACPI_BITREG_BUS_MASTER_RLD in ACPI-speak)
995 	 * should be set to zero, otherwise it causes C2 to short-sleep.
996 	 * PIIX4 doesn't properly support C3 and bus master activity
997 	 * need not break out of C2.
998 	 */
999 	case PCI_REVISION_A_STEP:
1000 	case PCI_REVISION_B_STEP:
1001 	case PCI_REVISION_4E:
1002 	case PCI_REVISION_4M:
1003 	    acpi_cst_quirks |= ACPI_CST_QUIRK_NO_C3;
1004 	    ACPI_DEBUG_PRINT((ACPI_DB_INFO,
1005 		"cpu_cst: working around PIIX4 bug, disabling C3\n"));
1006 
1007 	    val = pci_read_config(acpi_dev, PIIX4_DEVACTB_REG, 4);
1008 	    if ((val & PIIX4_STOP_BREAK_MASK) != PIIX4_STOP_BREAK_MASK) {
1009 		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
1010 		    "cpu_cst: PIIX4: enabling IRQs to generate Stop Break\n"));
1011 	    	val |= PIIX4_STOP_BREAK_MASK;
1012 		pci_write_config(acpi_dev, PIIX4_DEVACTB_REG, val, 4);
1013 	    }
1014 	    AcpiReadBitRegister(ACPI_BITREG_BUS_MASTER_RLD, &val);
1015 	    if (val) {
1016 		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
1017 		    "cpu_cst: PIIX4: reset BRLD_EN_BM\n"));
1018 		AcpiWriteBitRegister(ACPI_BITREG_BUS_MASTER_RLD, 0);
1019 	    }
1020 	    break;
1021 	default:
1022 	    break;
1023 	}
1024     }
1025 
1026     return (0);
1027 }
1028 
1029 static int
1030 acpi_cst_usage_sysctl(SYSCTL_HANDLER_ARGS)
1031 {
1032     struct acpi_cst_softc *sc;
1033     struct sbuf	 sb;
1034     char	 buf[128];
1035     int		 i;
1036     uintmax_t	 fract, sum, whole;
1037 
1038     sc = (struct acpi_cst_softc *) arg1;
1039     sum = 0;
1040     for (i = 0; i < sc->cst_cx_count; i++)
1041 	sum += sc->cst_cx_stats[i];
1042     sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN);
1043     for (i = 0; i < sc->cst_cx_count; i++) {
1044 	if (sum > 0) {
1045 	    whole = (uintmax_t)sc->cst_cx_stats[i] * 100;
1046 	    fract = (whole % sum) * 100;
1047 	    sbuf_printf(&sb, "%u.%02u%% ", (u_int)(whole / sum),
1048 		(u_int)(fract / sum));
1049 	} else
1050 	    sbuf_printf(&sb, "0.00%% ");
1051     }
1052     sbuf_printf(&sb, "last %dus", sc->cst_prev_sleep);
1053     sbuf_trim(&sb);
1054     sbuf_finish(&sb);
1055     sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
1056     sbuf_delete(&sb);
1057 
1058     return (0);
1059 }
1060 
1061 static int
1062 acpi_cst_set_lowest_oncpu(struct acpi_cst_softc *sc, int val)
1063 {
1064     int old_lowest, error = 0, old_lowest_req;
1065     uint32_t old_type, type;
1066 
1067     KKASSERT(mycpuid == sc->cst_cpuid);
1068 
1069     old_lowest_req = sc->cst_cx_lowest_req;
1070     sc->cst_cx_lowest_req = val;
1071 
1072     if (val > sc->cst_cx_count - 1)
1073 	val = sc->cst_cx_count - 1;
1074     old_lowest = atomic_swap_int(&sc->cst_cx_lowest, val);
1075 
1076     old_type = sc->cst_cx_states[old_lowest].type;
1077     type = sc->cst_cx_states[val].type;
1078     if (old_type >= ACPI_STATE_C3 && type < ACPI_STATE_C3) {
1079 	KKASSERT(acpi_cst_c3_reqs > 0);
1080 	if (atomic_fetchadd_int(&acpi_cst_c3_reqs, -1) == 1) {
1081 	    /*
1082 	     * All of the CPUs exit C3(+) state, use a better
1083 	     * one shot timer.
1084 	     */
1085 	    error = cputimer_intr_select_caps(CPUTIMER_INTR_CAP_NONE);
1086 	    KKASSERT(!error || error == ERESTART);
1087 	    if (error == ERESTART) {
1088 		if (bootverbose)
1089 		    kprintf("disable C3(+), restart intr cputimer\n");
1090 		cputimer_intr_restart();
1091 	    }
1092     	}
1093     } else if (type >= ACPI_STATE_C3 && old_type < ACPI_STATE_C3) {
1094 	if (atomic_fetchadd_int(&acpi_cst_c3_reqs, 1) == 0) {
1095 	    /*
1096 	     * When the first CPU enters C3(+) state, switch
1097 	     * to an one shot timer, which could handle
1098 	     * C3(+) state, i.e. the timer will not hang.
1099 	     */
1100 	    error = cputimer_intr_select_caps(CPUTIMER_INTR_CAP_PS);
1101 	    if (error == ERESTART) {
1102 		if (bootverbose)
1103 		    kprintf("enable C3(+), restart intr cputimer\n");
1104 		cputimer_intr_restart();
1105 	    } else if (error) {
1106 		kprintf("no suitable intr cputimer found\n");
1107 
1108 		/* Restore */
1109 		sc->cst_cx_lowest_req = old_lowest_req;
1110 		sc->cst_cx_lowest = old_lowest;
1111 		atomic_fetchadd_int(&acpi_cst_c3_reqs, -1);
1112 	    }
1113 	}
1114     }
1115 
1116     if (error)
1117 	return error;
1118 
1119     /* Cache the new lowest non-C3 state. */
1120     acpi_cst_non_c3(sc);
1121 
1122     /* Reset the statistics counters. */
1123     bzero(sc->cst_cx_stats, sizeof(sc->cst_cx_stats));
1124     return (0);
1125 }
1126 
1127 static void
1128 acpi_cst_set_lowest_handler(netmsg_t msg)
1129 {
1130     struct netmsg_acpi_cst *rmsg = (struct netmsg_acpi_cst *)msg;
1131     int error;
1132 
1133     error = acpi_cst_set_lowest_oncpu(rmsg->sc, rmsg->val);
1134     lwkt_replymsg(&rmsg->base.lmsg, error);
1135 }
1136 
1137 static int
1138 acpi_cst_set_lowest(struct acpi_cst_softc *sc, int val)
1139 {
1140     struct netmsg_acpi_cst msg;
1141 
1142     netmsg_init(&msg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY,
1143 	acpi_cst_set_lowest_handler);
1144     msg.sc = sc;
1145     msg.val = val;
1146 
1147     return lwkt_domsg(netisr_cpuport(sc->cst_cpuid), &msg.base.lmsg, 0);
1148 }
1149 
1150 static int
1151 acpi_cst_lowest_sysctl(SYSCTL_HANDLER_ARGS)
1152 {
1153     struct	 acpi_cst_softc *sc;
1154     char	 state[8];
1155     int		 val, error;
1156 
1157     sc = (struct acpi_cst_softc *)arg1;
1158     ksnprintf(state, sizeof(state), "C%d", sc->cst_cx_lowest_req + 1);
1159     error = sysctl_handle_string(oidp, state, sizeof(state), req);
1160     if (error != 0 || req->newptr == NULL)
1161 	return (error);
1162     if (strlen(state) < 2 || toupper(state[0]) != 'C')
1163 	return (EINVAL);
1164     val = (int) strtol(state + 1, NULL, 10) - 1;
1165     if (val < 0)
1166 	return (EINVAL);
1167 
1168     lwkt_serialize_enter(&acpi_cst_slize);
1169     error = acpi_cst_set_lowest(sc, val);
1170     lwkt_serialize_exit(&acpi_cst_slize);
1171 
1172     return error;
1173 }
1174 
1175 static int
1176 acpi_cst_lowest_use_sysctl(SYSCTL_HANDLER_ARGS)
1177 {
1178     struct	 acpi_cst_softc *sc;
1179     char	 state[8];
1180 
1181     sc = (struct acpi_cst_softc *)arg1;
1182     ksnprintf(state, sizeof(state), "C%d", sc->cst_cx_lowest + 1);
1183     return sysctl_handle_string(oidp, state, sizeof(state), req);
1184 }
1185 
1186 static int
1187 acpi_cst_global_lowest_sysctl(SYSCTL_HANDLER_ARGS)
1188 {
1189     struct	acpi_cst_softc *sc;
1190     char	state[8];
1191     int		val, error, i;
1192 
1193     ksnprintf(state, sizeof(state), "C%d", acpi_cst_cx_lowest_req + 1);
1194     error = sysctl_handle_string(oidp, state, sizeof(state), req);
1195     if (error != 0 || req->newptr == NULL)
1196 	return (error);
1197     if (strlen(state) < 2 || toupper(state[0]) != 'C')
1198 	return (EINVAL);
1199     val = (int) strtol(state + 1, NULL, 10) - 1;
1200     if (val < 0)
1201 	return (EINVAL);
1202 
1203     lwkt_serialize_enter(&acpi_cst_slize);
1204 
1205     acpi_cst_cx_lowest_req = val;
1206     acpi_cst_cx_lowest = val;
1207     if (acpi_cst_cx_lowest > acpi_cst_cx_count - 1)
1208 	acpi_cst_cx_lowest = acpi_cst_cx_count - 1;
1209 
1210     /* Update the new lowest useable Cx state for all CPUs. */
1211     for (i = 0; i < acpi_cst_ndevices; i++) {
1212 	sc = device_get_softc(acpi_cst_devices[i]);
1213 	error = acpi_cst_set_lowest(sc, val);
1214 	if (error) {
1215 	    KKASSERT(i == 0);
1216 	    break;
1217 	}
1218     }
1219 
1220     lwkt_serialize_exit(&acpi_cst_slize);
1221 
1222     return error;
1223 }
1224 
1225 static int
1226 acpi_cst_global_lowest_use_sysctl(SYSCTL_HANDLER_ARGS)
1227 {
1228     char	state[8];
1229 
1230     ksnprintf(state, sizeof(state), "C%d", acpi_cst_cx_lowest + 1);
1231     return sysctl_handle_string(oidp, state, sizeof(state), req);
1232 }
1233 
1234 /*
1235  * Put the CPU in C1 in a machine-dependant way.
1236  * XXX: shouldn't be here!
1237  */
1238 static void
1239 acpi_cst_c1_halt(void)
1240 {
1241     splz();
1242     if ((mycpu->gd_reqflags & RQF_IDLECHECK_WK_MASK) == 0)
1243         __asm __volatile("sti; hlt");
1244     else
1245         __asm __volatile("sti; pause");
1246 }
1247 
1248 static void
1249 acpi_cst_non_c3(struct acpi_cst_softc *sc)
1250 {
1251     int i;
1252 
1253     sc->cst_non_c3 = 0;
1254     for (i = sc->cst_cx_lowest; i >= 0; i--) {
1255 	if (sc->cst_cx_states[i].type < ACPI_STATE_C3) {
1256 	    sc->cst_non_c3 = i;
1257 	    break;
1258 	}
1259     }
1260     if (bootverbose)
1261 	device_printf(sc->cst_dev, "non-C3 %d\n", sc->cst_non_c3);
1262 }
1263 
1264 /*
1265  * Update the largest Cx state supported in the global acpi_cst_cx_count.
1266  * It will be used in the global Cx sysctl handler.
1267  */
1268 static void
1269 acpi_cst_global_cx_count(void)
1270 {
1271     struct acpi_cst_softc *sc;
1272     int i;
1273 
1274     if (acpi_cst_ndevices == 0) {
1275 	acpi_cst_cx_count = 0;
1276 	return;
1277     }
1278 
1279     sc = device_get_softc(acpi_cst_devices[0]);
1280     acpi_cst_cx_count = sc->cst_cx_count;
1281 
1282     for (i = 1; i < acpi_cst_ndevices; i++) {
1283 	struct acpi_cst_softc *sc = device_get_softc(acpi_cst_devices[i]);
1284 
1285 	if (sc->cst_cx_count < acpi_cst_cx_count)
1286 	    acpi_cst_cx_count = sc->cst_cx_count;
1287     }
1288     if (bootverbose)
1289 	kprintf("cpu_cst: global Cx count %d\n", acpi_cst_cx_count);
1290 }
1291 
1292 static void
1293 acpi_cst_c1_halt_enter(const struct acpi_cst_cx *cx __unused)
1294 {
1295     acpi_cst_c1_halt();
1296 }
1297 
1298 static void
1299 acpi_cst_cx_io_enter(const struct acpi_cst_cx *cx)
1300 {
1301     uint64_t dummy;
1302 
1303     bus_space_read_1(cx->btag, cx->bhand, 0);
1304     /*
1305      * Perform a dummy I/O read.  Since it may take an arbitrary time
1306      * to enter the idle state, this read makes sure that we are frozen.
1307      */
1308     AcpiRead(&dummy, &AcpiGbl_FADT.XPmTimerBlock);
1309 }
1310