xref: /onnv-gate/usr/src/uts/sun4u/sunfire/io/sysctrl_quiesce.c (revision 2399:85c0948a37b7)
11341Sstevel /*
21341Sstevel  * CDDL HEADER START
31341Sstevel  *
41341Sstevel  * The contents of this file are subject to the terms of the
51341Sstevel  * Common Development and Distribution License (the "License").
61341Sstevel  * You may not use this file except in compliance with the License.
71341Sstevel  *
81341Sstevel  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
91341Sstevel  * or http://www.opensolaris.org/os/licensing.
101341Sstevel  * See the License for the specific language governing permissions
111341Sstevel  * and limitations under the License.
121341Sstevel  *
131341Sstevel  * When distributing Covered Code, include this CDDL HEADER in each
141341Sstevel  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
151341Sstevel  * If applicable, add the following below this CDDL HEADER, with the
161341Sstevel  * fields enclosed by brackets "[]" replaced with your own identifying
171341Sstevel  * information: Portions Copyright [yyyy] [name of copyright owner]
181341Sstevel  *
191341Sstevel  * CDDL HEADER END
201341Sstevel  */
211341Sstevel 
221341Sstevel /*
23*2399Scth  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
241341Sstevel  * Use is subject to license terms.
251341Sstevel  */
261341Sstevel 
271341Sstevel #pragma ident	"%Z%%M%	%I%	%E% SMI"
281341Sstevel 
291341Sstevel /*
301341Sstevel  * This workaround inhibits prom_printf after the cpus are grabbed.
311341Sstevel  * This can be removed when 4154263 is corrected.
321341Sstevel  */
331341Sstevel #define	Bug_4154263
341341Sstevel 
351341Sstevel /*
361341Sstevel  * A CPR derivative specifically for sunfire
371341Sstevel  */
381341Sstevel 
391341Sstevel #include <sys/types.h>
401341Sstevel #include <sys/systm.h>
411341Sstevel #include <sys/machparam.h>
421341Sstevel #include <sys/machsystm.h>
431341Sstevel #include <sys/ddi.h>
441341Sstevel #define	SUNDDI_IMPL
451341Sstevel #include <sys/sunddi.h>
461341Sstevel #include <sys/time.h>
471341Sstevel #include <sys/kmem.h>
481341Sstevel #include <nfs/lm.h>
491341Sstevel #include <sys/ddi_impldefs.h>
501341Sstevel #include <sys/obpdefs.h>
511341Sstevel #include <sys/cmn_err.h>
521341Sstevel #include <sys/debug.h>
531341Sstevel #include <sys/errno.h>
541341Sstevel #include <sys/callb.h>
551341Sstevel #include <sys/clock.h>
561341Sstevel #include <sys/x_call.h>
571341Sstevel #include <sys/cpuvar.h>
581341Sstevel #include <sys/epm.h>
591341Sstevel #include <sys/vfs.h>
601341Sstevel #include <sys/fhc.h>
611341Sstevel #include <sys/sysctrl.h>
621341Sstevel #include <sys/promif.h>
631341Sstevel #include <sys/conf.h>
641341Sstevel #include <sys/modctl.h>
651341Sstevel #include <sys/cyclic.h>
661341Sstevel #include <sys/sunndi.h>
671341Sstevel #include <sys/machsystm.h>
681341Sstevel 
691341Sstevel static enum sysctrl_suspend_state {
701341Sstevel 	SYSC_STATE_BEGIN = 0,
711341Sstevel 	SYSC_STATE_USER,
721341Sstevel 	SYSC_STATE_DAEMON,
731341Sstevel 	SYSC_STATE_DRIVER,
741341Sstevel 	SYSC_STATE_FULL } suspend_state;
751341Sstevel 
761341Sstevel static int	pstate_save;
771341Sstevel static uint_t	sysctrl_gate[NCPU];
781341Sstevel int	sysctrl_quiesce_debug = FALSE;
791341Sstevel static int	sysctrl_skip_kernel_threads = TRUE;
801341Sstevel 
811341Sstevel /*
821341Sstevel  * sysctrl_skip_user_threads is used to control if user threads should
831341Sstevel  * be suspended.  If sysctrl_skip_user_threads is true, the rest of the
841341Sstevel  * flags are not used; if it is false, sysctrl_check_user_stop_result
851341Sstevel  * will be used to control whether or not we need to check suspend
861341Sstevel  * result, and sysctrl_allow_blocked_threads will be used to control
871341Sstevel  * whether or not we allow suspend to continue if there are blocked
881341Sstevel  * threads.  We allow all combinations of sysctrl_check_user_stop_result
891341Sstevel  * and sysctrl_allow_block_threads, even though it might not make much
901341Sstevel  * sense to not allow block threads when we don't even check stop
911341Sstevel  * result.
921341Sstevel  */
931341Sstevel static int	sysctrl_skip_user_threads = 0;		/* default to FALSE */
941341Sstevel static int	sysctrl_check_user_stop_result = 1;	/* default to TRUE */
951341Sstevel static int	sysctrl_allow_blocked_threads = 1;	/* default to TRUE */
961341Sstevel 
971341Sstevel static int	sysc_watchdog_suspended;
981341Sstevel 
991341Sstevel extern int	sysctrl_enable_detach_suspend;
1001341Sstevel static int	sysc_lastval;
1011341Sstevel 
1021341Sstevel #define	DEBUGP(p) { if (sysctrl_quiesce_debug) p; }
1031341Sstevel #define	errp	prom_printf
1041341Sstevel 
1051341Sstevel #define	SYSC_CPU_LOOP_MSEC	1000
1061341Sstevel 
1071341Sstevel static void
1081341Sstevel sysctrl_grab_cpus(void)
1091341Sstevel {
1101341Sstevel 	int		i;
1111341Sstevel 	cpuset_t	others;
1121341Sstevel 	extern cpuset_t	cpu_ready_set;
1131341Sstevel 	extern void	sysctrl_freeze(void);
1141341Sstevel 	uint64_t	sysc_tick_limit;
1151341Sstevel 	uint64_t	sysc_current_tick;
1161341Sstevel 	uint64_t	sysc_tick_deadline;
1171341Sstevel 
1181341Sstevel 	extern u_longlong_t	gettick(void);
1191341Sstevel 
1201341Sstevel 	for (i = 0; i < NCPU; i++)
1211341Sstevel 		sysctrl_gate[i] = 0;
1221341Sstevel 
1231341Sstevel 	/* tell other cpus to go quiet and wait for continue signal */
1241341Sstevel 	others = cpu_ready_set;
1251341Sstevel 	CPUSET_DEL(others, CPU->cpu_id);
1261341Sstevel 	xt_some(others, (xcfunc_t *)sysctrl_freeze, (uint64_t)sysctrl_gate,
1271341Sstevel 		(uint64_t)(&sysctrl_gate[CPU->cpu_id]));
1281341Sstevel 
1291341Sstevel 	sysc_tick_limit =
1301341Sstevel 		((uint64_t)sys_tick_freq * SYSC_CPU_LOOP_MSEC) / 1000;
1311341Sstevel 
1321341Sstevel 	/* wait for each cpu to check in */
1331341Sstevel 	for (i = 0; i < NCPU; i++) {
1341341Sstevel 		if (!CPU_IN_SET(others, i))
1351341Sstevel 			continue;
1361341Sstevel 
1371341Sstevel 		/*
1381341Sstevel 		 * Get current tick value and calculate the deadline tick
1391341Sstevel 		 */
1401341Sstevel 		sysc_current_tick = gettick();
1411341Sstevel 		sysc_tick_deadline = sysc_current_tick + sysc_tick_limit;
1421341Sstevel 
1431341Sstevel 		while (sysctrl_gate[i] == 0) {
1441341Sstevel 			/* If in panic, we just return */
1451341Sstevel 			if (panicstr)
1461341Sstevel 				break;
1471341Sstevel 
1481341Sstevel 			/* Panic the system if cpu not responsed by deadline */
1491341Sstevel 			sysc_current_tick = gettick();
1501341Sstevel 			if (sysc_current_tick >= sysc_tick_deadline) {
1511341Sstevel 			    cmn_err(CE_PANIC, "sysctrl: cpu %d not "
1521341Sstevel 				"responding to quiesce command", i);
1531341Sstevel 			}
1541341Sstevel 		}
1551341Sstevel 	}
1561341Sstevel 
1571341Sstevel 	/* now even our interrupts are disabled -- really quiet now */
1581341Sstevel 	pstate_save = disable_vec_intr();
1591341Sstevel }
1601341Sstevel 
1611341Sstevel static void
1621341Sstevel sysctrl_release_cpus(void)
1631341Sstevel {
1641341Sstevel 	/* let the other cpus go */
1651341Sstevel 	sysctrl_gate[CPU->cpu_id] = 1;
1661341Sstevel 
1671341Sstevel 	/* restore our interrupts too */
1681341Sstevel 	enable_vec_intr(pstate_save);
1691341Sstevel }
1701341Sstevel 
1711341Sstevel static void
1721341Sstevel sysctrl_stop_intr(void)
1731341Sstevel {
1741341Sstevel 	mutex_enter(&cpu_lock);
1751341Sstevel 	kpreempt_disable();
1761341Sstevel 	cyclic_suspend();
1771341Sstevel }
1781341Sstevel 
1791341Sstevel static void
1801341Sstevel sysctrl_enable_intr(void)
1811341Sstevel {
1821341Sstevel 	cyclic_resume();
1831341Sstevel 	(void) spl0();
1841341Sstevel 	kpreempt_enable();
1851341Sstevel 	mutex_exit(&cpu_lock);
1861341Sstevel }
1871341Sstevel 
1881341Sstevel static int
1891341Sstevel sysctrl_is_real_device(dev_info_t *dip)
1901341Sstevel {
1911341Sstevel 	struct regspec *regbuf;
1921341Sstevel 	int length;
1931341Sstevel 	int rc;
1941341Sstevel 
1951341Sstevel 	if (ddi_get_driver(dip) == NULL)
1961341Sstevel 		return (FALSE);
1971341Sstevel 
1981341Sstevel 	if (DEVI(dip)->devi_pm_flags & (PMC_NEEDS_SR|PMC_PARENTAL_SR))
1991341Sstevel 		return (TRUE);
2001341Sstevel 	if (DEVI(dip)->devi_pm_flags & PMC_NO_SR)
2011341Sstevel 		return (FALSE);
2021341Sstevel 
2031341Sstevel 	/*
2041341Sstevel 	 * now the general case
2051341Sstevel 	 */
2061341Sstevel 	rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "reg",
2071341Sstevel 		(caddr_t)&regbuf, &length);
2081341Sstevel 	ASSERT(rc != DDI_PROP_NO_MEMORY);
2091341Sstevel 	if (rc != DDI_PROP_SUCCESS) {
2101341Sstevel 		return (FALSE);
2111341Sstevel 	} else {
2121341Sstevel 		kmem_free(regbuf, length);
2131341Sstevel 		return (TRUE);
2141341Sstevel 	}
2151341Sstevel }
2161341Sstevel 
2171341Sstevel static dev_info_t *failed_driver;
2181341Sstevel static char device_path[MAXPATHLEN];
2191341Sstevel 
2201341Sstevel static int
2211341Sstevel sysctrl_suspend_devices(dev_info_t *dip, sysc_cfga_pkt_t *pkt)
2221341Sstevel {
2231341Sstevel 	int circ;
2241341Sstevel 
2251341Sstevel 	ASSERT(dip == NULL || ddi_get_parent(dip) == NULL ||
2261341Sstevel 	    DEVI_BUSY_OWNED(ddi_get_parent(dip)));
2271341Sstevel 
2281341Sstevel 	failed_driver = NULL;
2291341Sstevel 	for (; dip != NULL; dip = ddi_get_next_sibling(dip)) {
2301341Sstevel 		/*
2311341Sstevel 		 * Hold parent busy while walking child list
2321341Sstevel 		 */
2331341Sstevel 		ndi_devi_enter(dip, &circ);
2341341Sstevel 		if (sysctrl_suspend_devices(ddi_get_child(dip), pkt)) {
2351341Sstevel 			ndi_devi_exit(dip, circ);
2361341Sstevel 			return (ENXIO);
2371341Sstevel 		}
2381341Sstevel 		ndi_devi_exit(dip, circ);
2391341Sstevel 
2401341Sstevel 		if (!sysctrl_is_real_device(dip))
2411341Sstevel 			continue;
2421341Sstevel 
2431341Sstevel 		/*
2441341Sstevel 		 * Safe to call ddi_pathname() as parent is held busy
2451341Sstevel 		 */
2461341Sstevel 		(void) ddi_pathname(dip, device_path);
2471341Sstevel 		DEBUGP(errp(" suspending device %s\n", device_path));
2481341Sstevel 		if (devi_detach(dip, DDI_SUSPEND) != DDI_SUCCESS) {
2491341Sstevel 			DEBUGP(errp("  unable to suspend device %s\n",
2501341Sstevel 				device_path));
2511341Sstevel 
2521341Sstevel 			(void) strncpy(pkt->errbuf, device_path,
2531341Sstevel 				SYSC_OUTPUT_LEN);
2541341Sstevel 			SYSC_ERR_SET(pkt, SYSC_ERR_SUSPEND);
2551341Sstevel 			ndi_hold_devi(dip);
2561341Sstevel 			failed_driver = dip;
2571341Sstevel 			return (ENXIO);
2581341Sstevel 		}
2591341Sstevel 	}
2601341Sstevel 
2611341Sstevel 	return (DDI_SUCCESS);
2621341Sstevel }
2631341Sstevel 
2641341Sstevel static void
2651341Sstevel sysctrl_resume_devices(dev_info_t *start, sysc_cfga_pkt_t *pkt)
2661341Sstevel {
2671341Sstevel 	int		circ;
2681341Sstevel 	dev_info_t	*dip, *next, *last = NULL;
2691341Sstevel 
2701341Sstevel 	ASSERT(start == NULL || ddi_get_parent(start) == NULL ||
2711341Sstevel 	    DEVI_BUSY_OWNED(ddi_get_parent(start)));
2721341Sstevel 
2731341Sstevel 	/* attach in reverse device tree order */
2741341Sstevel 	while (last != start) {
2751341Sstevel 		dip = start;
2761341Sstevel 		next = ddi_get_next_sibling(dip);
2771341Sstevel 		while (next != last && dip != failed_driver) {
2781341Sstevel 			dip = next;
2791341Sstevel 			next = ddi_get_next_sibling(dip);
2801341Sstevel 		}
2811341Sstevel 		if (dip == failed_driver) {
2821341Sstevel 			failed_driver = NULL;
2831341Sstevel 			ndi_rele_devi(dip);
2841341Sstevel 		} else if (sysctrl_is_real_device(dip) &&
2851341Sstevel 		    failed_driver == NULL) {
2861341Sstevel 			/*
2871341Sstevel 			 * Parent dip is held busy, so ddi_pathname() can
2881341Sstevel 			 * be safely called.
2891341Sstevel 			 */
2901341Sstevel 			(void) ddi_pathname(dip, device_path);
2911341Sstevel 			DEBUGP(errp(" resuming device %s\n", device_path));
2921341Sstevel 			if (devi_attach(dip, DDI_RESUME) != DDI_SUCCESS) {
2931341Sstevel 				/*
2941341Sstevel 				 * XXX - if in the future we decide not to
2951341Sstevel 				 * panic the system, we need to set the error
2961341Sstevel 				 * SYSC_ERR_RESUME here and also change the
2971341Sstevel 				 * cfgadm platform library.
2981341Sstevel 				 */
2991341Sstevel 				cmn_err(CE_PANIC, "Unable to resume device %s",
3001341Sstevel 					device_path);
3011341Sstevel 			}
3021341Sstevel 		}
3031341Sstevel 		ndi_devi_enter(dip, &circ);
3041341Sstevel 		sysctrl_resume_devices(ddi_get_child(dip), pkt);
3051341Sstevel 		ndi_devi_exit(dip, circ);
3061341Sstevel 
3071341Sstevel 		last = dip;
3081341Sstevel 	}
3091341Sstevel }
3101341Sstevel 
3111341Sstevel /*
3121341Sstevel  * True if thread is virtually stopped.  Similar to CPR_VSTOPPED
3131341Sstevel  * but from DR point of view.  These user threads are waiting in
3141341Sstevel  * the kernel.  Once they complete in the kernel, they will process
3151341Sstevel  * the stop signal and stop.
3161341Sstevel  */
3171341Sstevel #define	SYSCTRL_VSTOPPED(t)		\
3181341Sstevel 	((t)->t_state == TS_SLEEP &&	\
3191341Sstevel 	(t)->t_wchan != NULL &&		\
3201341Sstevel 	(t)->t_astflag &&		\
3211341Sstevel 	((t)->t_proc_flag & TP_CHKPT))
3221341Sstevel 
3231341Sstevel static int
3241341Sstevel sysctrl_stop_user_threads(sysc_cfga_pkt_t *pkt)
3251341Sstevel {
3261341Sstevel 	int		count;
3271341Sstevel 	char		cache_psargs[PSARGSZ];
3281341Sstevel 	kthread_id_t	cache_tp;
3291341Sstevel 	uint_t		cache_t_state;
3301341Sstevel 	int		bailout;
3311341Sstevel 	pid_t		pid;
3321341Sstevel 
3331341Sstevel 	extern void add_one_utstop();
3341341Sstevel 	extern void utstop_timedwait(clock_t);
3351341Sstevel 	extern void utstop_init(void);
3361341Sstevel 
3371341Sstevel #define	SYSCTRL_UTSTOP_RETRY	4
3381341Sstevel #define	SYSCTRL_UTSTOP_WAIT	hz
3391341Sstevel 
3401341Sstevel 	if (sysctrl_skip_user_threads)
3411341Sstevel 		return (DDI_SUCCESS);
3421341Sstevel 
3431341Sstevel 	utstop_init();
3441341Sstevel 
3451341Sstevel 	/* we need to try a few times to get past fork, etc. */
3461341Sstevel 	for (count = 0; count < SYSCTRL_UTSTOP_RETRY; count++) {
3471341Sstevel 		kthread_id_t tp;
3481341Sstevel 
3491341Sstevel 		/* walk the entire threadlist */
3501341Sstevel 		mutex_enter(&pidlock);
3511341Sstevel 		for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
3521341Sstevel 			proc_t *p = ttoproc(tp);
3531341Sstevel 
3541341Sstevel 			/* handle kernel threads separately */
3551341Sstevel 			if (p->p_as == &kas || p->p_stat == SZOMB)
3561341Sstevel 				continue;
3571341Sstevel 
3581341Sstevel 			mutex_enter(&p->p_lock);
3591341Sstevel 			thread_lock(tp);
3601341Sstevel 
3611341Sstevel 			if (tp->t_state == TS_STOPPED) {
3621341Sstevel 				/* add another reason to stop this thread */
3631341Sstevel 				tp->t_schedflag &= ~TS_RESUME;
3641341Sstevel 			} else {
3651341Sstevel 				tp->t_proc_flag |= TP_CHKPT;
3661341Sstevel 
3671341Sstevel 				thread_unlock(tp);
3681341Sstevel 				mutex_exit(&p->p_lock);
3691341Sstevel 				add_one_utstop();
3701341Sstevel 				mutex_enter(&p->p_lock);
3711341Sstevel 				thread_lock(tp);
3721341Sstevel 
3731341Sstevel 				aston(tp);
3741341Sstevel 
3751341Sstevel 				if (tp->t_state == TS_SLEEP &&
3761341Sstevel 				    (tp->t_flag & T_WAKEABLE)) {
3771341Sstevel 					setrun_locked(tp);
3781341Sstevel 				}
3791341Sstevel 
3801341Sstevel 			}
3811341Sstevel 
3821341Sstevel 			/* grab thread if needed */
3831341Sstevel 			if (tp->t_state == TS_ONPROC && tp->t_cpu != CPU)
3841341Sstevel 				poke_cpu(tp->t_cpu->cpu_id);
3851341Sstevel 
3861341Sstevel 
3871341Sstevel 			thread_unlock(tp);
3881341Sstevel 			mutex_exit(&p->p_lock);
3891341Sstevel 		}
3901341Sstevel 		mutex_exit(&pidlock);
3911341Sstevel 
3921341Sstevel 
3931341Sstevel 		/* let everything catch up */
3941341Sstevel 		utstop_timedwait(count * count * SYSCTRL_UTSTOP_WAIT);
3951341Sstevel 
3961341Sstevel 
3971341Sstevel 		/* now, walk the threadlist again to see if we are done */
3981341Sstevel 		mutex_enter(&pidlock);
3991341Sstevel 		for (tp = curthread->t_next, bailout = 0;
4001341Sstevel 		    bailout == 0 && tp != curthread; tp = tp->t_next) {
4011341Sstevel 			proc_t *p = ttoproc(tp);
4021341Sstevel 
4031341Sstevel 			/* handle kernel threads separately */
4041341Sstevel 			if (p->p_as == &kas || p->p_stat == SZOMB)
4051341Sstevel 				continue;
4061341Sstevel 
4071341Sstevel 			/*
4081341Sstevel 			 * If this thread didn't stop, and we don't allow
4091341Sstevel 			 * unstopped blocked threads, bail.
4101341Sstevel 			 */
4111341Sstevel 			/* did this thread stop? */
4121341Sstevel 			thread_lock(tp);
4131341Sstevel 			if (!CPR_ISTOPPED(tp) &&
4141341Sstevel 			    !(sysctrl_allow_blocked_threads &&
4151341Sstevel 			    SYSCTRL_VSTOPPED(tp))) {
4161341Sstevel 
4171341Sstevel 				/* nope, cache the details for later */
4181341Sstevel 				bcopy(p->p_user.u_psargs, cache_psargs,
4191341Sstevel 					sizeof (cache_psargs));
4201341Sstevel 				cache_tp = tp;
4211341Sstevel 				cache_t_state = tp->t_state;
4221341Sstevel 				bailout = 1;
4231341Sstevel 				pid = p->p_pidp->pid_id;
4241341Sstevel 			}
4251341Sstevel 			thread_unlock(tp);
4261341Sstevel 		}
4271341Sstevel 		mutex_exit(&pidlock);
4281341Sstevel 
4291341Sstevel 		/* were all the threads stopped? */
4301341Sstevel 		if (!bailout)
4311341Sstevel 			break;
4321341Sstevel 	}
4331341Sstevel 
4341341Sstevel 	/* were we unable to stop all threads after a few tries? */
4351341Sstevel 	if (bailout) {
4361341Sstevel 		(void) sprintf(pkt->errbuf, "process: %s id: %d state: %x"
4371341Sstevel 		    " thread descriptor: %p",
4381341Sstevel 		    cache_psargs, (int)pid, cache_t_state,
4391341Sstevel 			(void *)cache_tp);
4401341Sstevel 
4411341Sstevel 		SYSC_ERR_SET(pkt, SYSC_ERR_UTHREAD);
4421341Sstevel 
4431341Sstevel 		return (ESRCH);
4441341Sstevel 	}
4451341Sstevel 
4461341Sstevel 	return (DDI_SUCCESS);
4471341Sstevel }
4481341Sstevel 
4491341Sstevel static int
4501341Sstevel sysctrl_stop_kernel_threads(sysc_cfga_pkt_t *pkt)
4511341Sstevel {
4521341Sstevel 	caddr_t		name;
4531341Sstevel 	kthread_id_t	tp;
4541341Sstevel 
4551341Sstevel 	if (sysctrl_skip_kernel_threads) {
4561341Sstevel 		return (DDI_SUCCESS);
4571341Sstevel 	}
4581341Sstevel 
4591341Sstevel 	/*
4601341Sstevel 	 * Note: we unlock the table in resume.
4611341Sstevel 	 * We only need to lock the callback table if we are actually
4621341Sstevel 	 * suspending kernel threads.
4631341Sstevel 	 */
4641341Sstevel 	callb_lock_table();
4651341Sstevel 	if ((name = callb_execute_class(CB_CL_CPR_DAEMON,
4661341Sstevel 	    CB_CODE_CPR_CHKPT)) != (caddr_t)NULL) {
4671341Sstevel 
4681341Sstevel 		(void) strncpy(pkt->errbuf, name, SYSC_OUTPUT_LEN);
4691341Sstevel 		SYSC_ERR_SET(pkt, SYSC_ERR_KTHREAD);
4701341Sstevel 		return (EBUSY);
4711341Sstevel 	}
4721341Sstevel 
4731341Sstevel 	/*
4741341Sstevel 	 * Verify that all threads are accounted for
4751341Sstevel 	 */
4761341Sstevel 	mutex_enter(&pidlock);
4771341Sstevel 	for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
4781341Sstevel 		proc_t	*p = ttoproc(tp);
4791341Sstevel 
4801341Sstevel 		if (p->p_as != &kas)
4811341Sstevel 			continue;
4821341Sstevel 
4831341Sstevel 		if (tp->t_flag & T_INTR_THREAD)
4841341Sstevel 			continue;
4851341Sstevel 
4861341Sstevel 		if (!callb_is_stopped(tp, &name)) {
4871341Sstevel 			mutex_exit(&pidlock);
4881341Sstevel 			(void) strncpy(pkt->errbuf, name, SYSC_OUTPUT_LEN);
4891341Sstevel 			SYSC_ERR_SET(pkt, SYSC_ERR_KTHREAD);
4901341Sstevel 			return (EBUSY);
4911341Sstevel 		}
4921341Sstevel 	}
4931341Sstevel 
4941341Sstevel 	mutex_exit(&pidlock);
4951341Sstevel 	return (DDI_SUCCESS);
4961341Sstevel }
4971341Sstevel 
4981341Sstevel static void
4991341Sstevel sysctrl_start_user_threads(void)
5001341Sstevel {
5011341Sstevel 	kthread_id_t tp;
5021341Sstevel 
5031341Sstevel 	mutex_enter(&pidlock);
5041341Sstevel 
5051341Sstevel 	/* walk all threads and release them */
5061341Sstevel 	for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
5071341Sstevel 		proc_t *p = ttoproc(tp);
5081341Sstevel 
5091341Sstevel 		/* skip kernel threads */
5101341Sstevel 		if (ttoproc(tp)->p_as == &kas)
5111341Sstevel 			continue;
5121341Sstevel 
5131341Sstevel 		mutex_enter(&p->p_lock);
5141341Sstevel 		tp->t_proc_flag &= ~TP_CHKPT;
5151341Sstevel 		mutex_exit(&p->p_lock);
5161341Sstevel 
5171341Sstevel 		thread_lock(tp);
5181341Sstevel 		if (CPR_ISTOPPED(tp)) {
5191341Sstevel 			/* back on the runq */
5201341Sstevel 			tp->t_schedflag |= TS_RESUME;
5211341Sstevel 			setrun_locked(tp);
5221341Sstevel 		}
5231341Sstevel 		thread_unlock(tp);
5241341Sstevel 	}
5251341Sstevel 
5261341Sstevel 	mutex_exit(&pidlock);
5271341Sstevel }
5281341Sstevel 
5291341Sstevel static void
5301341Sstevel sysctrl_signal_user(int sig)
5311341Sstevel {
5321341Sstevel 	struct proc *p;
5331341Sstevel 
5341341Sstevel 	mutex_enter(&pidlock);
5351341Sstevel 
5361341Sstevel 	for (p = practive; p != NULL; p = p->p_next) {
5371341Sstevel 		/* only user threads */
5381341Sstevel 		if (p->p_exec == NULL || p->p_stat == SZOMB ||
5391341Sstevel 		    p == proc_init || p == ttoproc(curthread))
5401341Sstevel 			continue;
5411341Sstevel 
5421341Sstevel 		mutex_enter(&p->p_lock);
5431341Sstevel 		sigtoproc(p, NULL, sig);
5441341Sstevel 		mutex_exit(&p->p_lock);
5451341Sstevel 	}
5461341Sstevel 
5471341Sstevel 	mutex_exit(&pidlock);
5481341Sstevel 
5491341Sstevel 	/* add a bit of delay */
5501341Sstevel 	delay(hz);
5511341Sstevel }
5521341Sstevel 
5531341Sstevel void
5541341Sstevel sysctrl_resume(sysc_cfga_pkt_t *pkt)
5551341Sstevel {
5561341Sstevel #ifndef Bug_4154263
5571341Sstevel 	DEBUGP(errp("resume system...\n"));
5581341Sstevel #endif
5591341Sstevel 	switch (suspend_state) {
5601341Sstevel 	case SYSC_STATE_FULL:
5611341Sstevel 		/*
5621341Sstevel 		 * release all the other cpus
5631341Sstevel 		 */
5641341Sstevel #ifndef	Bug_4154263
5651341Sstevel 		DEBUGP(errp("release cpus..."));
5661341Sstevel #endif
567*2399Scth 		/*
568*2399Scth 		 * Prevent false alarm in tod_validate() due to tod
569*2399Scth 		 * value change between suspend and resume
570*2399Scth 		 */
571*2399Scth 		mutex_enter(&tod_lock);
572*2399Scth 		tod_fault_reset();
573*2399Scth 		mutex_exit(&tod_lock);
574*2399Scth 
5751341Sstevel 		sysctrl_release_cpus();
5761341Sstevel 		DEBUGP(errp("cpus resumed...\n"));
5771341Sstevel 
5781341Sstevel 		/*
5791341Sstevel 		 * If we suspended hw watchdog at suspend,
5801341Sstevel 		 * re-enable it now.
5811341Sstevel 		 */
5821341Sstevel 		if (sysc_watchdog_suspended) {
5831341Sstevel 			mutex_enter(&tod_lock);
5841341Sstevel 			tod_ops.tod_set_watchdog_timer(
5851341Sstevel 				watchdog_timeout_seconds);
5861341Sstevel 			mutex_exit(&tod_lock);
5871341Sstevel 		}
5881341Sstevel 
5891341Sstevel 		/*
5901341Sstevel 		 * resume callout
5911341Sstevel 		 */
5921341Sstevel 		(void) callb_execute_class(CB_CL_CPR_RPC, CB_CODE_CPR_RESUME);
5931341Sstevel 		(void) callb_execute_class(CB_CL_CPR_CALLOUT,
5941341Sstevel 			CB_CODE_CPR_RESUME);
5951341Sstevel 		sysctrl_enable_intr();
5961341Sstevel 		/* FALLTHROUGH */
5971341Sstevel 
5981341Sstevel 	case SYSC_STATE_DRIVER:
5991341Sstevel 		/*
6001341Sstevel 		 * resume drivers
6011341Sstevel 		 */
6021341Sstevel 		DEBUGP(errp("resume drivers..."));
6031341Sstevel 		sysctrl_resume_devices(ddi_root_node(), pkt);
6041341Sstevel 		DEBUGP(errp("done\n"));
6051341Sstevel 
6061341Sstevel 		/*
6071341Sstevel 		 * resume the lock manager
6081341Sstevel 		 */
6091341Sstevel 		lm_cprresume();
6101341Sstevel 
6111341Sstevel 		/* FALLTHROUGH */
6121341Sstevel 
6131341Sstevel 	case SYSC_STATE_DAEMON:
6141341Sstevel 		/*
6151341Sstevel 		 * resume kernel daemons
6161341Sstevel 		 */
6171341Sstevel 		if (!sysctrl_skip_kernel_threads) {
6181341Sstevel 			DEBUGP(errp("starting kernel daemons..."));
6191341Sstevel 			(void) callb_execute_class(CB_CL_CPR_DAEMON,
6201341Sstevel 				CB_CODE_CPR_RESUME);
6211341Sstevel 			callb_unlock_table();
6221341Sstevel 		}
6231341Sstevel 		DEBUGP(errp("done\n"));
6241341Sstevel 
6251341Sstevel 		/* FALLTHROUGH */
6261341Sstevel 
6271341Sstevel 	case SYSC_STATE_USER:
6281341Sstevel 		/*
6291341Sstevel 		 * finally, resume user threads
6301341Sstevel 		 */
6311341Sstevel 		if (!sysctrl_skip_user_threads) {
6321341Sstevel 			DEBUGP(errp("starting user threads..."));
6331341Sstevel 			sysctrl_start_user_threads();
6341341Sstevel 			DEBUGP(errp("done\n"));
6351341Sstevel 		}
6361341Sstevel 		/* FALLTHROUGH */
6371341Sstevel 
6381341Sstevel 	case SYSC_STATE_BEGIN:
6391341Sstevel 	default:
6401341Sstevel 		/*
6411341Sstevel 		 * let those who care know that we've just resumed
6421341Sstevel 		 */
6431341Sstevel 		DEBUGP(errp("sending SIGTHAW..."));
6441341Sstevel 		sysctrl_signal_user(SIGTHAW);
6451341Sstevel 		DEBUGP(errp("done\n"));
6461341Sstevel 		break;
6471341Sstevel 	}
6481341Sstevel 
6491341Sstevel 	/* Restore sysctrl detach/suspend to its original value */
6501341Sstevel 	sysctrl_enable_detach_suspend = sysc_lastval;
6511341Sstevel 
6521341Sstevel 	DEBUGP(errp("system state restored\n"));
6531341Sstevel }
6541341Sstevel 
6551341Sstevel void
6561341Sstevel sysctrl_suspend_prepare(void)
6571341Sstevel {
6581341Sstevel 	/*
6591341Sstevel 	 * We use a function, lm_cprsuspend(), in the suspend flow that
6601341Sstevel 	 * is redirected to a module through the modstubs mechanism.
6611341Sstevel 	 * If the module is currently not loaded, modstubs attempts
6621341Sstevel 	 * the modload. The context this happens in below causes the
6631341Sstevel 	 * module load to block forever, so this function must be called
6641341Sstevel 	 * in the normal system call context ahead of time.
6651341Sstevel 	 */
6661341Sstevel 	(void) modload("misc", "klmmod");
6671341Sstevel }
6681341Sstevel 
6691341Sstevel int
6701341Sstevel sysctrl_suspend(sysc_cfga_pkt_t *pkt)
6711341Sstevel {
6721341Sstevel 	int rc = DDI_SUCCESS;
6731341Sstevel 
6741341Sstevel 	/* enable sysctrl detach/suspend function */
6751341Sstevel 	sysc_lastval = sysctrl_enable_detach_suspend;
6761341Sstevel 	sysctrl_enable_detach_suspend = 1;
6771341Sstevel 
6781341Sstevel 	/*
6791341Sstevel 	 * first, stop all user threads
6801341Sstevel 	 */
6811341Sstevel 	DEBUGP(errp("\nstopping user threads..."));
6821341Sstevel 	suspend_state = SYSC_STATE_USER;
6831341Sstevel 	if (((rc = sysctrl_stop_user_threads(pkt)) != DDI_SUCCESS) &&
6841341Sstevel 	    sysctrl_check_user_stop_result) {
6851341Sstevel 		sysctrl_resume(pkt);
6861341Sstevel 		return (rc);
6871341Sstevel 	}
6881341Sstevel 	DEBUGP(errp("done\n"));
6891341Sstevel 
6901341Sstevel 	/*
6911341Sstevel 	 * now stop daemon activities
6921341Sstevel 	 */
6931341Sstevel 	DEBUGP(errp("stopping kernel daemons..."));
6941341Sstevel 	suspend_state = SYSC_STATE_DAEMON;
6951341Sstevel 	if (rc = sysctrl_stop_kernel_threads(pkt)) {
6961341Sstevel 		sysctrl_resume(pkt);
6971341Sstevel 		return (rc);
6981341Sstevel 	}
6991341Sstevel 	DEBUGP(errp("done\n"));
7001341Sstevel 
7011341Sstevel 	/*
7021341Sstevel 	 * This sync swap out all user pages
7031341Sstevel 	 */
7041341Sstevel 	vfs_sync(SYNC_ALL);
7051341Sstevel 
7061341Sstevel 	/*
7071341Sstevel 	 * special treatment for lock manager
7081341Sstevel 	 */
7091341Sstevel 	lm_cprsuspend();
7101341Sstevel 
7111341Sstevel 	/*
7121341Sstevel 	 * sync the file system in case we never make it back
7131341Sstevel 	 */
7141341Sstevel 	sync();
7151341Sstevel 
7161341Sstevel 	/*
7171341Sstevel 	 * now suspend drivers
7181341Sstevel 	 */
7191341Sstevel 	DEBUGP(errp("suspending drivers..."));
7201341Sstevel 	suspend_state = SYSC_STATE_DRIVER;
7211341Sstevel 	if (rc = sysctrl_suspend_devices(ddi_root_node(), pkt)) {
7221341Sstevel 		sysctrl_resume(pkt);
7231341Sstevel 		return (rc);
7241341Sstevel 	}
7251341Sstevel 	DEBUGP(errp("done\n"));
7261341Sstevel 
7271341Sstevel 	/*
7281341Sstevel 	 * handle the callout table
7291341Sstevel 	 */
7301341Sstevel 	sysctrl_stop_intr();
7311341Sstevel 
7321341Sstevel 	(void) callb_execute_class(CB_CL_CPR_CALLOUT, CB_CODE_CPR_CHKPT);
7331341Sstevel 
7341341Sstevel 	/*
7351341Sstevel 	 * if watchdog was activated, disable it
7361341Sstevel 	 */
7371341Sstevel 	if (watchdog_activated) {
7381341Sstevel 		mutex_enter(&tod_lock);
7391341Sstevel 		tod_ops.tod_clear_watchdog_timer();
7401341Sstevel 		mutex_exit(&tod_lock);
7411341Sstevel 		sysc_watchdog_suspended = 1;
7421341Sstevel 	} else {
7431341Sstevel 		sysc_watchdog_suspended = 0;
7441341Sstevel 	}
7451341Sstevel 
7461341Sstevel 	/*
7471341Sstevel 	 * finally, grab all cpus
7481341Sstevel 	 */
7491341Sstevel 	DEBUGP(errp("freezing all cpus...\n"));
7501341Sstevel 	suspend_state = SYSC_STATE_FULL;
7511341Sstevel 	sysctrl_grab_cpus();
7521341Sstevel #ifndef	Bug_4154263
7531341Sstevel 	DEBUGP(errp("done\n"));
7541341Sstevel 
7551341Sstevel 	DEBUGP(errp("system is quiesced\n"));
7561341Sstevel #endif
7571341Sstevel 
7581341Sstevel 	return (rc);
7591341Sstevel }
760