11341Sstevel /*
21341Sstevel * CDDL HEADER START
31341Sstevel *
41341Sstevel * The contents of this file are subject to the terms of the
51341Sstevel * Common Development and Distribution License (the "License").
61341Sstevel * You may not use this file except in compliance with the License.
71341Sstevel *
81341Sstevel * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
91341Sstevel * or http://www.opensolaris.org/os/licensing.
101341Sstevel * See the License for the specific language governing permissions
111341Sstevel * and limitations under the License.
121341Sstevel *
131341Sstevel * When distributing Covered Code, include this CDDL HEADER in each
141341Sstevel * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
151341Sstevel * If applicable, add the following below this CDDL HEADER, with the
161341Sstevel * fields enclosed by brackets "[]" replaced with your own identifying
171341Sstevel * information: Portions Copyright [yyyy] [name of copyright owner]
181341Sstevel *
191341Sstevel * CDDL HEADER END
201341Sstevel */
211341Sstevel
221341Sstevel /*
23*11752STrevor.Thompson@Sun.COM * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
241341Sstevel * Use is subject to license terms.
251341Sstevel */
261341Sstevel
271341Sstevel /*
281341Sstevel * This workaround inhibits prom_printf after the cpus are grabbed.
291341Sstevel * This can be removed when 4154263 is corrected.
301341Sstevel */
311341Sstevel #define Bug_4154263
321341Sstevel
331341Sstevel /*
341341Sstevel * A CPR derivative specifically for sunfire
351341Sstevel */
361341Sstevel
371341Sstevel #include <sys/types.h>
381341Sstevel #include <sys/systm.h>
391341Sstevel #include <sys/machparam.h>
401341Sstevel #include <sys/machsystm.h>
411341Sstevel #include <sys/ddi.h>
421341Sstevel #define SUNDDI_IMPL
431341Sstevel #include <sys/sunddi.h>
441341Sstevel #include <sys/time.h>
451341Sstevel #include <sys/kmem.h>
461341Sstevel #include <nfs/lm.h>
471341Sstevel #include <sys/ddi_impldefs.h>
481341Sstevel #include <sys/obpdefs.h>
491341Sstevel #include <sys/cmn_err.h>
501341Sstevel #include <sys/debug.h>
511341Sstevel #include <sys/errno.h>
521341Sstevel #include <sys/callb.h>
531341Sstevel #include <sys/clock.h>
541341Sstevel #include <sys/x_call.h>
551341Sstevel #include <sys/cpuvar.h>
561341Sstevel #include <sys/epm.h>
571341Sstevel #include <sys/vfs.h>
581341Sstevel #include <sys/fhc.h>
591341Sstevel #include <sys/sysctrl.h>
601341Sstevel #include <sys/promif.h>
611341Sstevel #include <sys/conf.h>
621341Sstevel #include <sys/modctl.h>
631341Sstevel #include <sys/cyclic.h>
641341Sstevel #include <sys/sunndi.h>
651341Sstevel #include <sys/machsystm.h>
661341Sstevel
671341Sstevel static enum sysctrl_suspend_state {
681341Sstevel SYSC_STATE_BEGIN = 0,
691341Sstevel SYSC_STATE_USER,
701341Sstevel SYSC_STATE_DAEMON,
711341Sstevel SYSC_STATE_DRIVER,
721341Sstevel SYSC_STATE_FULL } suspend_state;
731341Sstevel
741341Sstevel static int pstate_save;
751341Sstevel static uint_t sysctrl_gate[NCPU];
761341Sstevel int sysctrl_quiesce_debug = FALSE;
771341Sstevel static int sysctrl_skip_kernel_threads = TRUE;
781341Sstevel
791341Sstevel /*
801341Sstevel * sysctrl_skip_user_threads is used to control if user threads should
811341Sstevel * be suspended. If sysctrl_skip_user_threads is true, the rest of the
821341Sstevel * flags are not used; if it is false, sysctrl_check_user_stop_result
831341Sstevel * will be used to control whether or not we need to check suspend
841341Sstevel * result, and sysctrl_allow_blocked_threads will be used to control
851341Sstevel * whether or not we allow suspend to continue if there are blocked
861341Sstevel * threads. We allow all combinations of sysctrl_check_user_stop_result
871341Sstevel * and sysctrl_allow_block_threads, even though it might not make much
881341Sstevel * sense to not allow block threads when we don't even check stop
891341Sstevel * result.
901341Sstevel */
911341Sstevel static int sysctrl_skip_user_threads = 0; /* default to FALSE */
921341Sstevel static int sysctrl_check_user_stop_result = 1; /* default to TRUE */
931341Sstevel static int sysctrl_allow_blocked_threads = 1; /* default to TRUE */
941341Sstevel
951341Sstevel static int sysc_watchdog_suspended;
961341Sstevel
971341Sstevel extern int sysctrl_enable_detach_suspend;
981341Sstevel static int sysc_lastval;
991341Sstevel
1001341Sstevel #define DEBUGP(p) { if (sysctrl_quiesce_debug) p; }
1011341Sstevel #define errp prom_printf
1021341Sstevel
1031341Sstevel #define SYSC_CPU_LOOP_MSEC 1000
1041341Sstevel
1051341Sstevel static void
sysctrl_grab_cpus(void)1061341Sstevel sysctrl_grab_cpus(void)
1071341Sstevel {
1081341Sstevel int i;
1091341Sstevel cpuset_t others;
1101341Sstevel extern cpuset_t cpu_ready_set;
1111341Sstevel extern void sysctrl_freeze(void);
1121341Sstevel uint64_t sysc_tick_limit;
1131341Sstevel uint64_t sysc_current_tick;
1141341Sstevel uint64_t sysc_tick_deadline;
1151341Sstevel
1161341Sstevel extern u_longlong_t gettick(void);
1171341Sstevel
1181341Sstevel for (i = 0; i < NCPU; i++)
1191341Sstevel sysctrl_gate[i] = 0;
1201341Sstevel
1211341Sstevel /* tell other cpus to go quiet and wait for continue signal */
1221341Sstevel others = cpu_ready_set;
1231341Sstevel CPUSET_DEL(others, CPU->cpu_id);
1241341Sstevel xt_some(others, (xcfunc_t *)sysctrl_freeze, (uint64_t)sysctrl_gate,
125*11752STrevor.Thompson@Sun.COM (uint64_t)(&sysctrl_gate[CPU->cpu_id]));
1261341Sstevel
127*11752STrevor.Thompson@Sun.COM sysc_tick_limit = ((uint64_t)sys_tick_freq * SYSC_CPU_LOOP_MSEC) / 1000;
1281341Sstevel
1291341Sstevel /* wait for each cpu to check in */
1301341Sstevel for (i = 0; i < NCPU; i++) {
1311341Sstevel if (!CPU_IN_SET(others, i))
1321341Sstevel continue;
1331341Sstevel
1341341Sstevel /*
1351341Sstevel * Get current tick value and calculate the deadline tick
1361341Sstevel */
1371341Sstevel sysc_current_tick = gettick();
1381341Sstevel sysc_tick_deadline = sysc_current_tick + sysc_tick_limit;
1391341Sstevel
1401341Sstevel while (sysctrl_gate[i] == 0) {
1411341Sstevel /* If in panic, we just return */
1421341Sstevel if (panicstr)
1431341Sstevel break;
1441341Sstevel
1451341Sstevel /* Panic the system if cpu not responsed by deadline */
1461341Sstevel sysc_current_tick = gettick();
1471341Sstevel if (sysc_current_tick >= sysc_tick_deadline) {
148*11752STrevor.Thompson@Sun.COM cmn_err(CE_PANIC, "sysctrl: cpu %d not "
149*11752STrevor.Thompson@Sun.COM "responding to quiesce command", i);
1501341Sstevel }
1511341Sstevel }
1521341Sstevel }
1531341Sstevel
1541341Sstevel /* now even our interrupts are disabled -- really quiet now */
1551341Sstevel pstate_save = disable_vec_intr();
1561341Sstevel }
1571341Sstevel
1581341Sstevel static void
sysctrl_release_cpus(void)1591341Sstevel sysctrl_release_cpus(void)
1601341Sstevel {
1611341Sstevel /* let the other cpus go */
1621341Sstevel sysctrl_gate[CPU->cpu_id] = 1;
1631341Sstevel
1641341Sstevel /* restore our interrupts too */
1651341Sstevel enable_vec_intr(pstate_save);
1661341Sstevel }
1671341Sstevel
1681341Sstevel static void
sysctrl_stop_intr(void)1691341Sstevel sysctrl_stop_intr(void)
1701341Sstevel {
1711341Sstevel mutex_enter(&cpu_lock);
1721341Sstevel kpreempt_disable();
1731341Sstevel cyclic_suspend();
1741341Sstevel }
1751341Sstevel
1761341Sstevel static void
sysctrl_enable_intr(void)1771341Sstevel sysctrl_enable_intr(void)
1781341Sstevel {
1791341Sstevel cyclic_resume();
1801341Sstevel (void) spl0();
1811341Sstevel kpreempt_enable();
1821341Sstevel mutex_exit(&cpu_lock);
1831341Sstevel }
1841341Sstevel
1851341Sstevel static int
sysctrl_is_real_device(dev_info_t * dip)1861341Sstevel sysctrl_is_real_device(dev_info_t *dip)
1871341Sstevel {
1881341Sstevel struct regspec *regbuf;
1891341Sstevel int length;
1901341Sstevel int rc;
1911341Sstevel
1921341Sstevel if (ddi_get_driver(dip) == NULL)
1931341Sstevel return (FALSE);
1941341Sstevel
1951341Sstevel if (DEVI(dip)->devi_pm_flags & (PMC_NEEDS_SR|PMC_PARENTAL_SR))
1961341Sstevel return (TRUE);
1971341Sstevel if (DEVI(dip)->devi_pm_flags & PMC_NO_SR)
1981341Sstevel return (FALSE);
1991341Sstevel
2001341Sstevel /*
2011341Sstevel * now the general case
2021341Sstevel */
2031341Sstevel rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "reg",
204*11752STrevor.Thompson@Sun.COM (caddr_t)®buf, &length);
2051341Sstevel ASSERT(rc != DDI_PROP_NO_MEMORY);
2061341Sstevel if (rc != DDI_PROP_SUCCESS) {
2071341Sstevel return (FALSE);
2081341Sstevel } else {
2091341Sstevel kmem_free(regbuf, length);
2101341Sstevel return (TRUE);
2111341Sstevel }
2121341Sstevel }
2131341Sstevel
2141341Sstevel static dev_info_t *failed_driver;
2151341Sstevel static char device_path[MAXPATHLEN];
2161341Sstevel
2171341Sstevel static int
sysctrl_suspend_devices(dev_info_t * dip,sysc_cfga_pkt_t * pkt)2181341Sstevel sysctrl_suspend_devices(dev_info_t *dip, sysc_cfga_pkt_t *pkt)
2191341Sstevel {
2201341Sstevel int circ;
2211341Sstevel
2221341Sstevel ASSERT(dip == NULL || ddi_get_parent(dip) == NULL ||
2231341Sstevel DEVI_BUSY_OWNED(ddi_get_parent(dip)));
2241341Sstevel
2251341Sstevel failed_driver = NULL;
2261341Sstevel for (; dip != NULL; dip = ddi_get_next_sibling(dip)) {
2271341Sstevel /*
2281341Sstevel * Hold parent busy while walking child list
2291341Sstevel */
2301341Sstevel ndi_devi_enter(dip, &circ);
2311341Sstevel if (sysctrl_suspend_devices(ddi_get_child(dip), pkt)) {
2321341Sstevel ndi_devi_exit(dip, circ);
2331341Sstevel return (ENXIO);
2341341Sstevel }
2351341Sstevel ndi_devi_exit(dip, circ);
2361341Sstevel
2371341Sstevel if (!sysctrl_is_real_device(dip))
2381341Sstevel continue;
2391341Sstevel
2401341Sstevel /*
2411341Sstevel * Safe to call ddi_pathname() as parent is held busy
2421341Sstevel */
2431341Sstevel (void) ddi_pathname(dip, device_path);
2441341Sstevel DEBUGP(errp(" suspending device %s\n", device_path));
2451341Sstevel if (devi_detach(dip, DDI_SUSPEND) != DDI_SUCCESS) {
2461341Sstevel DEBUGP(errp(" unable to suspend device %s\n",
247*11752STrevor.Thompson@Sun.COM device_path));
2481341Sstevel
2491341Sstevel (void) strncpy(pkt->errbuf, device_path,
250*11752STrevor.Thompson@Sun.COM SYSC_OUTPUT_LEN);
2511341Sstevel SYSC_ERR_SET(pkt, SYSC_ERR_SUSPEND);
2521341Sstevel ndi_hold_devi(dip);
2531341Sstevel failed_driver = dip;
2541341Sstevel return (ENXIO);
2551341Sstevel }
2561341Sstevel }
2571341Sstevel
2581341Sstevel return (DDI_SUCCESS);
2591341Sstevel }
2601341Sstevel
2611341Sstevel static void
sysctrl_resume_devices(dev_info_t * start,sysc_cfga_pkt_t * pkt)2621341Sstevel sysctrl_resume_devices(dev_info_t *start, sysc_cfga_pkt_t *pkt)
2631341Sstevel {
2641341Sstevel int circ;
2651341Sstevel dev_info_t *dip, *next, *last = NULL;
2661341Sstevel
2671341Sstevel ASSERT(start == NULL || ddi_get_parent(start) == NULL ||
2681341Sstevel DEVI_BUSY_OWNED(ddi_get_parent(start)));
2691341Sstevel
2701341Sstevel /* attach in reverse device tree order */
2711341Sstevel while (last != start) {
2721341Sstevel dip = start;
2731341Sstevel next = ddi_get_next_sibling(dip);
2741341Sstevel while (next != last && dip != failed_driver) {
2751341Sstevel dip = next;
2761341Sstevel next = ddi_get_next_sibling(dip);
2771341Sstevel }
2781341Sstevel if (dip == failed_driver) {
2791341Sstevel failed_driver = NULL;
2801341Sstevel ndi_rele_devi(dip);
2811341Sstevel } else if (sysctrl_is_real_device(dip) &&
2821341Sstevel failed_driver == NULL) {
2831341Sstevel /*
2841341Sstevel * Parent dip is held busy, so ddi_pathname() can
2851341Sstevel * be safely called.
2861341Sstevel */
2871341Sstevel (void) ddi_pathname(dip, device_path);
2881341Sstevel DEBUGP(errp(" resuming device %s\n", device_path));
2891341Sstevel if (devi_attach(dip, DDI_RESUME) != DDI_SUCCESS) {
2901341Sstevel /*
2911341Sstevel * XXX - if in the future we decide not to
2921341Sstevel * panic the system, we need to set the error
2931341Sstevel * SYSC_ERR_RESUME here and also change the
2941341Sstevel * cfgadm platform library.
2951341Sstevel */
2961341Sstevel cmn_err(CE_PANIC, "Unable to resume device %s",
297*11752STrevor.Thompson@Sun.COM device_path);
2981341Sstevel }
2991341Sstevel }
3001341Sstevel ndi_devi_enter(dip, &circ);
3011341Sstevel sysctrl_resume_devices(ddi_get_child(dip), pkt);
3021341Sstevel ndi_devi_exit(dip, circ);
3031341Sstevel
3041341Sstevel last = dip;
3051341Sstevel }
3061341Sstevel }
3071341Sstevel
3081341Sstevel /*
3091341Sstevel * True if thread is virtually stopped. Similar to CPR_VSTOPPED
3101341Sstevel * but from DR point of view. These user threads are waiting in
3111341Sstevel * the kernel. Once they complete in the kernel, they will process
3121341Sstevel * the stop signal and stop.
3131341Sstevel */
3141341Sstevel #define SYSCTRL_VSTOPPED(t) \
3151341Sstevel ((t)->t_state == TS_SLEEP && \
3161341Sstevel (t)->t_wchan != NULL && \
3171341Sstevel (t)->t_astflag && \
3181341Sstevel ((t)->t_proc_flag & TP_CHKPT))
3191341Sstevel
3201341Sstevel static int
sysctrl_stop_user_threads(sysc_cfga_pkt_t * pkt)3211341Sstevel sysctrl_stop_user_threads(sysc_cfga_pkt_t *pkt)
3221341Sstevel {
3231341Sstevel int count;
3241341Sstevel char cache_psargs[PSARGSZ];
3251341Sstevel kthread_id_t cache_tp;
3261341Sstevel uint_t cache_t_state;
3271341Sstevel int bailout;
3281341Sstevel pid_t pid;
3291341Sstevel
3301341Sstevel extern void add_one_utstop();
3311341Sstevel extern void utstop_timedwait(clock_t);
3321341Sstevel extern void utstop_init(void);
3331341Sstevel
3341341Sstevel #define SYSCTRL_UTSTOP_RETRY 4
3351341Sstevel #define SYSCTRL_UTSTOP_WAIT hz
3361341Sstevel
3371341Sstevel if (sysctrl_skip_user_threads)
3381341Sstevel return (DDI_SUCCESS);
3391341Sstevel
3401341Sstevel utstop_init();
3411341Sstevel
3421341Sstevel /* we need to try a few times to get past fork, etc. */
3431341Sstevel for (count = 0; count < SYSCTRL_UTSTOP_RETRY; count++) {
3441341Sstevel kthread_id_t tp;
3451341Sstevel
3461341Sstevel /* walk the entire threadlist */
3471341Sstevel mutex_enter(&pidlock);
3481341Sstevel for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
3491341Sstevel proc_t *p = ttoproc(tp);
3501341Sstevel
3511341Sstevel /* handle kernel threads separately */
3521341Sstevel if (p->p_as == &kas || p->p_stat == SZOMB)
3531341Sstevel continue;
3541341Sstevel
3551341Sstevel mutex_enter(&p->p_lock);
3561341Sstevel thread_lock(tp);
3571341Sstevel
3581341Sstevel if (tp->t_state == TS_STOPPED) {
3591341Sstevel /* add another reason to stop this thread */
3601341Sstevel tp->t_schedflag &= ~TS_RESUME;
3611341Sstevel } else {
3621341Sstevel tp->t_proc_flag |= TP_CHKPT;
3631341Sstevel
3641341Sstevel thread_unlock(tp);
3651341Sstevel mutex_exit(&p->p_lock);
3661341Sstevel add_one_utstop();
3671341Sstevel mutex_enter(&p->p_lock);
3681341Sstevel thread_lock(tp);
3691341Sstevel
3701341Sstevel aston(tp);
3711341Sstevel
3723792Sakolb if (ISWAKEABLE(tp) || ISWAITING(tp)) {
3731341Sstevel setrun_locked(tp);
3741341Sstevel }
3751341Sstevel
3761341Sstevel }
3771341Sstevel
3781341Sstevel /* grab thread if needed */
3791341Sstevel if (tp->t_state == TS_ONPROC && tp->t_cpu != CPU)
3801341Sstevel poke_cpu(tp->t_cpu->cpu_id);
3811341Sstevel
3821341Sstevel
3831341Sstevel thread_unlock(tp);
3841341Sstevel mutex_exit(&p->p_lock);
3851341Sstevel }
3861341Sstevel mutex_exit(&pidlock);
3871341Sstevel
3881341Sstevel
3891341Sstevel /* let everything catch up */
3901341Sstevel utstop_timedwait(count * count * SYSCTRL_UTSTOP_WAIT);
3911341Sstevel
3921341Sstevel
3931341Sstevel /* now, walk the threadlist again to see if we are done */
3941341Sstevel mutex_enter(&pidlock);
3951341Sstevel for (tp = curthread->t_next, bailout = 0;
3961341Sstevel bailout == 0 && tp != curthread; tp = tp->t_next) {
3971341Sstevel proc_t *p = ttoproc(tp);
3981341Sstevel
3991341Sstevel /* handle kernel threads separately */
4001341Sstevel if (p->p_as == &kas || p->p_stat == SZOMB)
4011341Sstevel continue;
4021341Sstevel
4031341Sstevel /*
4041341Sstevel * If this thread didn't stop, and we don't allow
4051341Sstevel * unstopped blocked threads, bail.
4061341Sstevel */
4071341Sstevel /* did this thread stop? */
4081341Sstevel thread_lock(tp);
4091341Sstevel if (!CPR_ISTOPPED(tp) &&
4101341Sstevel !(sysctrl_allow_blocked_threads &&
4111341Sstevel SYSCTRL_VSTOPPED(tp))) {
4121341Sstevel
4131341Sstevel /* nope, cache the details for later */
4141341Sstevel bcopy(p->p_user.u_psargs, cache_psargs,
415*11752STrevor.Thompson@Sun.COM sizeof (cache_psargs));
4161341Sstevel cache_tp = tp;
4171341Sstevel cache_t_state = tp->t_state;
4181341Sstevel bailout = 1;
4191341Sstevel pid = p->p_pidp->pid_id;
4201341Sstevel }
4211341Sstevel thread_unlock(tp);
4221341Sstevel }
4231341Sstevel mutex_exit(&pidlock);
4241341Sstevel
4251341Sstevel /* were all the threads stopped? */
4261341Sstevel if (!bailout)
4271341Sstevel break;
4281341Sstevel }
4291341Sstevel
4301341Sstevel /* were we unable to stop all threads after a few tries? */
4311341Sstevel if (bailout) {
4321341Sstevel (void) sprintf(pkt->errbuf, "process: %s id: %d state: %x"
433*11752STrevor.Thompson@Sun.COM " thread descriptor: %p", cache_psargs, (int)pid,
434*11752STrevor.Thompson@Sun.COM cache_t_state, (void *)cache_tp);
4351341Sstevel
4361341Sstevel SYSC_ERR_SET(pkt, SYSC_ERR_UTHREAD);
4371341Sstevel
4381341Sstevel return (ESRCH);
4391341Sstevel }
4401341Sstevel
4411341Sstevel return (DDI_SUCCESS);
4421341Sstevel }
4431341Sstevel
4441341Sstevel static int
sysctrl_stop_kernel_threads(sysc_cfga_pkt_t * pkt)4451341Sstevel sysctrl_stop_kernel_threads(sysc_cfga_pkt_t *pkt)
4461341Sstevel {
4471341Sstevel caddr_t name;
4481341Sstevel kthread_id_t tp;
4491341Sstevel
4501341Sstevel if (sysctrl_skip_kernel_threads) {
4511341Sstevel return (DDI_SUCCESS);
4521341Sstevel }
4531341Sstevel
4541341Sstevel /*
4551341Sstevel * Note: we unlock the table in resume.
4561341Sstevel * We only need to lock the callback table if we are actually
4571341Sstevel * suspending kernel threads.
4581341Sstevel */
4591341Sstevel callb_lock_table();
4601341Sstevel if ((name = callb_execute_class(CB_CL_CPR_DAEMON,
4611341Sstevel CB_CODE_CPR_CHKPT)) != (caddr_t)NULL) {
4621341Sstevel
4631341Sstevel (void) strncpy(pkt->errbuf, name, SYSC_OUTPUT_LEN);
4641341Sstevel SYSC_ERR_SET(pkt, SYSC_ERR_KTHREAD);
4651341Sstevel return (EBUSY);
4661341Sstevel }
4671341Sstevel
4681341Sstevel /*
4691341Sstevel * Verify that all threads are accounted for
4701341Sstevel */
4711341Sstevel mutex_enter(&pidlock);
4721341Sstevel for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
4731341Sstevel proc_t *p = ttoproc(tp);
4741341Sstevel
4751341Sstevel if (p->p_as != &kas)
4761341Sstevel continue;
4771341Sstevel
4781341Sstevel if (tp->t_flag & T_INTR_THREAD)
4791341Sstevel continue;
4801341Sstevel
4811341Sstevel if (!callb_is_stopped(tp, &name)) {
4821341Sstevel mutex_exit(&pidlock);
4831341Sstevel (void) strncpy(pkt->errbuf, name, SYSC_OUTPUT_LEN);
4841341Sstevel SYSC_ERR_SET(pkt, SYSC_ERR_KTHREAD);
4851341Sstevel return (EBUSY);
4861341Sstevel }
4871341Sstevel }
4881341Sstevel
4891341Sstevel mutex_exit(&pidlock);
4901341Sstevel return (DDI_SUCCESS);
4911341Sstevel }
4921341Sstevel
4931341Sstevel static void
sysctrl_start_user_threads(void)4941341Sstevel sysctrl_start_user_threads(void)
4951341Sstevel {
4961341Sstevel kthread_id_t tp;
4971341Sstevel
4981341Sstevel mutex_enter(&pidlock);
4991341Sstevel
5001341Sstevel /* walk all threads and release them */
5011341Sstevel for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
5021341Sstevel proc_t *p = ttoproc(tp);
5031341Sstevel
5041341Sstevel /* skip kernel threads */
5051341Sstevel if (ttoproc(tp)->p_as == &kas)
5061341Sstevel continue;
5071341Sstevel
5081341Sstevel mutex_enter(&p->p_lock);
5091341Sstevel tp->t_proc_flag &= ~TP_CHKPT;
5101341Sstevel mutex_exit(&p->p_lock);
5111341Sstevel
5121341Sstevel thread_lock(tp);
5131341Sstevel if (CPR_ISTOPPED(tp)) {
5141341Sstevel /* back on the runq */
5151341Sstevel tp->t_schedflag |= TS_RESUME;
5161341Sstevel setrun_locked(tp);
5171341Sstevel }
5181341Sstevel thread_unlock(tp);
5191341Sstevel }
5201341Sstevel
5211341Sstevel mutex_exit(&pidlock);
5221341Sstevel }
5231341Sstevel
5241341Sstevel static void
sysctrl_signal_user(int sig)5251341Sstevel sysctrl_signal_user(int sig)
5261341Sstevel {
5271341Sstevel struct proc *p;
5281341Sstevel
5291341Sstevel mutex_enter(&pidlock);
5301341Sstevel
5311341Sstevel for (p = practive; p != NULL; p = p->p_next) {
5321341Sstevel /* only user threads */
5331341Sstevel if (p->p_exec == NULL || p->p_stat == SZOMB ||
5341341Sstevel p == proc_init || p == ttoproc(curthread))
5351341Sstevel continue;
5361341Sstevel
5371341Sstevel mutex_enter(&p->p_lock);
5381341Sstevel sigtoproc(p, NULL, sig);
5391341Sstevel mutex_exit(&p->p_lock);
5401341Sstevel }
5411341Sstevel
5421341Sstevel mutex_exit(&pidlock);
5431341Sstevel
5441341Sstevel /* add a bit of delay */
5451341Sstevel delay(hz);
5461341Sstevel }
5471341Sstevel
5481341Sstevel void
sysctrl_resume(sysc_cfga_pkt_t * pkt)5491341Sstevel sysctrl_resume(sysc_cfga_pkt_t *pkt)
5501341Sstevel {
5511341Sstevel #ifndef Bug_4154263
5521341Sstevel DEBUGP(errp("resume system...\n"));
5531341Sstevel #endif
5541341Sstevel switch (suspend_state) {
5551341Sstevel case SYSC_STATE_FULL:
5561341Sstevel /*
5571341Sstevel * release all the other cpus
5581341Sstevel */
5591341Sstevel #ifndef Bug_4154263
5601341Sstevel DEBUGP(errp("release cpus..."));
5611341Sstevel #endif
5622399Scth /*
5632399Scth * Prevent false alarm in tod_validate() due to tod
5642399Scth * value change between suspend and resume
5652399Scth */
5662399Scth mutex_enter(&tod_lock);
567*11752STrevor.Thompson@Sun.COM tod_status_set(TOD_DR_RESUME_DONE);
5682399Scth mutex_exit(&tod_lock);
5692399Scth
5701341Sstevel sysctrl_release_cpus();
5711341Sstevel DEBUGP(errp("cpus resumed...\n"));
5721341Sstevel
5731341Sstevel /*
5741341Sstevel * If we suspended hw watchdog at suspend,
5751341Sstevel * re-enable it now.
5761341Sstevel */
5771341Sstevel if (sysc_watchdog_suspended) {
5781341Sstevel mutex_enter(&tod_lock);
5791341Sstevel tod_ops.tod_set_watchdog_timer(
580*11752STrevor.Thompson@Sun.COM watchdog_timeout_seconds);
5811341Sstevel mutex_exit(&tod_lock);
5821341Sstevel }
5831341Sstevel
5841341Sstevel /*
5851341Sstevel * resume callout
5861341Sstevel */
5871341Sstevel (void) callb_execute_class(CB_CL_CPR_RPC, CB_CODE_CPR_RESUME);
5881341Sstevel (void) callb_execute_class(CB_CL_CPR_CALLOUT,
589*11752STrevor.Thompson@Sun.COM CB_CODE_CPR_RESUME);
5901341Sstevel sysctrl_enable_intr();
5911341Sstevel /* FALLTHROUGH */
5921341Sstevel
5931341Sstevel case SYSC_STATE_DRIVER:
5941341Sstevel /*
5951341Sstevel * resume drivers
5961341Sstevel */
5971341Sstevel DEBUGP(errp("resume drivers..."));
5981341Sstevel sysctrl_resume_devices(ddi_root_node(), pkt);
5991341Sstevel DEBUGP(errp("done\n"));
6001341Sstevel
6011341Sstevel /*
6021341Sstevel * resume the lock manager
6031341Sstevel */
6041341Sstevel lm_cprresume();
6051341Sstevel
6061341Sstevel /* FALLTHROUGH */
6071341Sstevel
6081341Sstevel case SYSC_STATE_DAEMON:
6091341Sstevel /*
6101341Sstevel * resume kernel daemons
6111341Sstevel */
6121341Sstevel if (!sysctrl_skip_kernel_threads) {
6131341Sstevel DEBUGP(errp("starting kernel daemons..."));
6141341Sstevel (void) callb_execute_class(CB_CL_CPR_DAEMON,
615*11752STrevor.Thompson@Sun.COM CB_CODE_CPR_RESUME);
6161341Sstevel callb_unlock_table();
6171341Sstevel }
6181341Sstevel DEBUGP(errp("done\n"));
6191341Sstevel
6201341Sstevel /* FALLTHROUGH */
6211341Sstevel
6221341Sstevel case SYSC_STATE_USER:
6231341Sstevel /*
6241341Sstevel * finally, resume user threads
6251341Sstevel */
6261341Sstevel if (!sysctrl_skip_user_threads) {
6271341Sstevel DEBUGP(errp("starting user threads..."));
6281341Sstevel sysctrl_start_user_threads();
6291341Sstevel DEBUGP(errp("done\n"));
6301341Sstevel }
6311341Sstevel /* FALLTHROUGH */
6321341Sstevel
6331341Sstevel case SYSC_STATE_BEGIN:
6341341Sstevel default:
6351341Sstevel /*
6361341Sstevel * let those who care know that we've just resumed
6371341Sstevel */
6381341Sstevel DEBUGP(errp("sending SIGTHAW..."));
6391341Sstevel sysctrl_signal_user(SIGTHAW);
6401341Sstevel DEBUGP(errp("done\n"));
6411341Sstevel break;
6421341Sstevel }
6431341Sstevel
6441341Sstevel /* Restore sysctrl detach/suspend to its original value */
6451341Sstevel sysctrl_enable_detach_suspend = sysc_lastval;
6461341Sstevel
6471341Sstevel DEBUGP(errp("system state restored\n"));
6481341Sstevel }
6491341Sstevel
6501341Sstevel void
sysctrl_suspend_prepare(void)6511341Sstevel sysctrl_suspend_prepare(void)
6521341Sstevel {
6531341Sstevel /*
6541341Sstevel * We use a function, lm_cprsuspend(), in the suspend flow that
6551341Sstevel * is redirected to a module through the modstubs mechanism.
6561341Sstevel * If the module is currently not loaded, modstubs attempts
6571341Sstevel * the modload. The context this happens in below causes the
6581341Sstevel * module load to block forever, so this function must be called
6591341Sstevel * in the normal system call context ahead of time.
6601341Sstevel */
6611341Sstevel (void) modload("misc", "klmmod");
6621341Sstevel }
6631341Sstevel
6641341Sstevel int
sysctrl_suspend(sysc_cfga_pkt_t * pkt)6651341Sstevel sysctrl_suspend(sysc_cfga_pkt_t *pkt)
6661341Sstevel {
6671341Sstevel int rc = DDI_SUCCESS;
6681341Sstevel
6691341Sstevel /* enable sysctrl detach/suspend function */
6701341Sstevel sysc_lastval = sysctrl_enable_detach_suspend;
6711341Sstevel sysctrl_enable_detach_suspend = 1;
6721341Sstevel
6731341Sstevel /*
6741341Sstevel * first, stop all user threads
6751341Sstevel */
6761341Sstevel DEBUGP(errp("\nstopping user threads..."));
6771341Sstevel suspend_state = SYSC_STATE_USER;
6781341Sstevel if (((rc = sysctrl_stop_user_threads(pkt)) != DDI_SUCCESS) &&
6791341Sstevel sysctrl_check_user_stop_result) {
6801341Sstevel sysctrl_resume(pkt);
6811341Sstevel return (rc);
6821341Sstevel }
6831341Sstevel DEBUGP(errp("done\n"));
6841341Sstevel
6851341Sstevel /*
6861341Sstevel * now stop daemon activities
6871341Sstevel */
6881341Sstevel DEBUGP(errp("stopping kernel daemons..."));
6891341Sstevel suspend_state = SYSC_STATE_DAEMON;
6901341Sstevel if (rc = sysctrl_stop_kernel_threads(pkt)) {
6911341Sstevel sysctrl_resume(pkt);
6921341Sstevel return (rc);
6931341Sstevel }
6941341Sstevel DEBUGP(errp("done\n"));
6951341Sstevel
6961341Sstevel /*
6971341Sstevel * This sync swap out all user pages
6981341Sstevel */
6991341Sstevel vfs_sync(SYNC_ALL);
7001341Sstevel
7011341Sstevel /*
7021341Sstevel * special treatment for lock manager
7031341Sstevel */
7041341Sstevel lm_cprsuspend();
7051341Sstevel
7061341Sstevel /*
7071341Sstevel * sync the file system in case we never make it back
7081341Sstevel */
7091341Sstevel sync();
7101341Sstevel
7111341Sstevel /*
7121341Sstevel * now suspend drivers
7131341Sstevel */
7141341Sstevel DEBUGP(errp("suspending drivers..."));
7151341Sstevel suspend_state = SYSC_STATE_DRIVER;
7161341Sstevel if (rc = sysctrl_suspend_devices(ddi_root_node(), pkt)) {
7171341Sstevel sysctrl_resume(pkt);
7181341Sstevel return (rc);
7191341Sstevel }
7201341Sstevel DEBUGP(errp("done\n"));
7211341Sstevel
7221341Sstevel /*
7231341Sstevel * handle the callout table
7241341Sstevel */
7251341Sstevel sysctrl_stop_intr();
7261341Sstevel
7271341Sstevel (void) callb_execute_class(CB_CL_CPR_CALLOUT, CB_CODE_CPR_CHKPT);
7281341Sstevel
7291341Sstevel /*
7301341Sstevel * if watchdog was activated, disable it
7311341Sstevel */
7321341Sstevel if (watchdog_activated) {
7331341Sstevel mutex_enter(&tod_lock);
7341341Sstevel tod_ops.tod_clear_watchdog_timer();
7351341Sstevel mutex_exit(&tod_lock);
7361341Sstevel sysc_watchdog_suspended = 1;
7371341Sstevel } else {
7381341Sstevel sysc_watchdog_suspended = 0;
7391341Sstevel }
7401341Sstevel
7411341Sstevel /*
7421341Sstevel * finally, grab all cpus
7431341Sstevel */
7441341Sstevel DEBUGP(errp("freezing all cpus...\n"));
7451341Sstevel suspend_state = SYSC_STATE_FULL;
7461341Sstevel sysctrl_grab_cpus();
7471341Sstevel #ifndef Bug_4154263
7481341Sstevel DEBUGP(errp("done\n"));
7491341Sstevel
7501341Sstevel DEBUGP(errp("system is quiesced\n"));
7511341Sstevel #endif
7521341Sstevel
7531341Sstevel return (rc);
7541341Sstevel }
755