xref: /onnv-gate/usr/src/uts/sun4u/sunfire/io/sysctrl_quiesce.c (revision 11752:9c475fee0b48)
11341Sstevel /*
21341Sstevel  * CDDL HEADER START
31341Sstevel  *
41341Sstevel  * The contents of this file are subject to the terms of the
51341Sstevel  * Common Development and Distribution License (the "License").
61341Sstevel  * You may not use this file except in compliance with the License.
71341Sstevel  *
81341Sstevel  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
91341Sstevel  * or http://www.opensolaris.org/os/licensing.
101341Sstevel  * See the License for the specific language governing permissions
111341Sstevel  * and limitations under the License.
121341Sstevel  *
131341Sstevel  * When distributing Covered Code, include this CDDL HEADER in each
141341Sstevel  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
151341Sstevel  * If applicable, add the following below this CDDL HEADER, with the
161341Sstevel  * fields enclosed by brackets "[]" replaced with your own identifying
171341Sstevel  * information: Portions Copyright [yyyy] [name of copyright owner]
181341Sstevel  *
191341Sstevel  * CDDL HEADER END
201341Sstevel  */
211341Sstevel 
221341Sstevel /*
23*11752STrevor.Thompson@Sun.COM  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
241341Sstevel  * Use is subject to license terms.
251341Sstevel  */
261341Sstevel 
271341Sstevel /*
281341Sstevel  * This workaround inhibits prom_printf after the cpus are grabbed.
291341Sstevel  * This can be removed when 4154263 is corrected.
301341Sstevel  */
311341Sstevel #define	Bug_4154263
321341Sstevel 
331341Sstevel /*
341341Sstevel  * A CPR derivative specifically for sunfire
351341Sstevel  */
361341Sstevel 
371341Sstevel #include <sys/types.h>
381341Sstevel #include <sys/systm.h>
391341Sstevel #include <sys/machparam.h>
401341Sstevel #include <sys/machsystm.h>
411341Sstevel #include <sys/ddi.h>
421341Sstevel #define	SUNDDI_IMPL
431341Sstevel #include <sys/sunddi.h>
441341Sstevel #include <sys/time.h>
451341Sstevel #include <sys/kmem.h>
461341Sstevel #include <nfs/lm.h>
471341Sstevel #include <sys/ddi_impldefs.h>
481341Sstevel #include <sys/obpdefs.h>
491341Sstevel #include <sys/cmn_err.h>
501341Sstevel #include <sys/debug.h>
511341Sstevel #include <sys/errno.h>
521341Sstevel #include <sys/callb.h>
531341Sstevel #include <sys/clock.h>
541341Sstevel #include <sys/x_call.h>
551341Sstevel #include <sys/cpuvar.h>
561341Sstevel #include <sys/epm.h>
571341Sstevel #include <sys/vfs.h>
581341Sstevel #include <sys/fhc.h>
591341Sstevel #include <sys/sysctrl.h>
601341Sstevel #include <sys/promif.h>
611341Sstevel #include <sys/conf.h>
621341Sstevel #include <sys/modctl.h>
631341Sstevel #include <sys/cyclic.h>
641341Sstevel #include <sys/sunndi.h>
651341Sstevel #include <sys/machsystm.h>
661341Sstevel 
671341Sstevel static enum sysctrl_suspend_state {
681341Sstevel 	SYSC_STATE_BEGIN = 0,
691341Sstevel 	SYSC_STATE_USER,
701341Sstevel 	SYSC_STATE_DAEMON,
711341Sstevel 	SYSC_STATE_DRIVER,
721341Sstevel 	SYSC_STATE_FULL } suspend_state;
731341Sstevel 
741341Sstevel static int	pstate_save;
751341Sstevel static uint_t	sysctrl_gate[NCPU];
761341Sstevel int	sysctrl_quiesce_debug = FALSE;
771341Sstevel static int	sysctrl_skip_kernel_threads = TRUE;
781341Sstevel 
791341Sstevel /*
801341Sstevel  * sysctrl_skip_user_threads is used to control if user threads should
811341Sstevel  * be suspended.  If sysctrl_skip_user_threads is true, the rest of the
821341Sstevel  * flags are not used; if it is false, sysctrl_check_user_stop_result
831341Sstevel  * will be used to control whether or not we need to check suspend
841341Sstevel  * result, and sysctrl_allow_blocked_threads will be used to control
851341Sstevel  * whether or not we allow suspend to continue if there are blocked
861341Sstevel  * threads.  We allow all combinations of sysctrl_check_user_stop_result
871341Sstevel  * and sysctrl_allow_block_threads, even though it might not make much
881341Sstevel  * sense to not allow block threads when we don't even check stop
891341Sstevel  * result.
901341Sstevel  */
911341Sstevel static int	sysctrl_skip_user_threads = 0;		/* default to FALSE */
921341Sstevel static int	sysctrl_check_user_stop_result = 1;	/* default to TRUE */
931341Sstevel static int	sysctrl_allow_blocked_threads = 1;	/* default to TRUE */
941341Sstevel 
951341Sstevel static int	sysc_watchdog_suspended;
961341Sstevel 
971341Sstevel extern int	sysctrl_enable_detach_suspend;
981341Sstevel static int	sysc_lastval;
991341Sstevel 
1001341Sstevel #define	DEBUGP(p) { if (sysctrl_quiesce_debug) p; }
1011341Sstevel #define	errp	prom_printf
1021341Sstevel 
1031341Sstevel #define	SYSC_CPU_LOOP_MSEC	1000
1041341Sstevel 
1051341Sstevel static void
sysctrl_grab_cpus(void)1061341Sstevel sysctrl_grab_cpus(void)
1071341Sstevel {
1081341Sstevel 	int		i;
1091341Sstevel 	cpuset_t	others;
1101341Sstevel 	extern cpuset_t	cpu_ready_set;
1111341Sstevel 	extern void	sysctrl_freeze(void);
1121341Sstevel 	uint64_t	sysc_tick_limit;
1131341Sstevel 	uint64_t	sysc_current_tick;
1141341Sstevel 	uint64_t	sysc_tick_deadline;
1151341Sstevel 
1161341Sstevel 	extern u_longlong_t	gettick(void);
1171341Sstevel 
1181341Sstevel 	for (i = 0; i < NCPU; i++)
1191341Sstevel 		sysctrl_gate[i] = 0;
1201341Sstevel 
1211341Sstevel 	/* tell other cpus to go quiet and wait for continue signal */
1221341Sstevel 	others = cpu_ready_set;
1231341Sstevel 	CPUSET_DEL(others, CPU->cpu_id);
1241341Sstevel 	xt_some(others, (xcfunc_t *)sysctrl_freeze, (uint64_t)sysctrl_gate,
125*11752STrevor.Thompson@Sun.COM 	    (uint64_t)(&sysctrl_gate[CPU->cpu_id]));
1261341Sstevel 
127*11752STrevor.Thompson@Sun.COM 	sysc_tick_limit = ((uint64_t)sys_tick_freq * SYSC_CPU_LOOP_MSEC) / 1000;
1281341Sstevel 
1291341Sstevel 	/* wait for each cpu to check in */
1301341Sstevel 	for (i = 0; i < NCPU; i++) {
1311341Sstevel 		if (!CPU_IN_SET(others, i))
1321341Sstevel 			continue;
1331341Sstevel 
1341341Sstevel 		/*
1351341Sstevel 		 * Get current tick value and calculate the deadline tick
1361341Sstevel 		 */
1371341Sstevel 		sysc_current_tick = gettick();
1381341Sstevel 		sysc_tick_deadline = sysc_current_tick + sysc_tick_limit;
1391341Sstevel 
1401341Sstevel 		while (sysctrl_gate[i] == 0) {
1411341Sstevel 			/* If in panic, we just return */
1421341Sstevel 			if (panicstr)
1431341Sstevel 				break;
1441341Sstevel 
1451341Sstevel 			/* Panic the system if cpu not responsed by deadline */
1461341Sstevel 			sysc_current_tick = gettick();
1471341Sstevel 			if (sysc_current_tick >= sysc_tick_deadline) {
148*11752STrevor.Thompson@Sun.COM 				cmn_err(CE_PANIC, "sysctrl: cpu %d not "
149*11752STrevor.Thompson@Sun.COM 				    "responding to quiesce command", i);
1501341Sstevel 			}
1511341Sstevel 		}
1521341Sstevel 	}
1531341Sstevel 
1541341Sstevel 	/* now even our interrupts are disabled -- really quiet now */
1551341Sstevel 	pstate_save = disable_vec_intr();
1561341Sstevel }
1571341Sstevel 
1581341Sstevel static void
sysctrl_release_cpus(void)1591341Sstevel sysctrl_release_cpus(void)
1601341Sstevel {
1611341Sstevel 	/* let the other cpus go */
1621341Sstevel 	sysctrl_gate[CPU->cpu_id] = 1;
1631341Sstevel 
1641341Sstevel 	/* restore our interrupts too */
1651341Sstevel 	enable_vec_intr(pstate_save);
1661341Sstevel }
1671341Sstevel 
1681341Sstevel static void
sysctrl_stop_intr(void)1691341Sstevel sysctrl_stop_intr(void)
1701341Sstevel {
1711341Sstevel 	mutex_enter(&cpu_lock);
1721341Sstevel 	kpreempt_disable();
1731341Sstevel 	cyclic_suspend();
1741341Sstevel }
1751341Sstevel 
1761341Sstevel static void
sysctrl_enable_intr(void)1771341Sstevel sysctrl_enable_intr(void)
1781341Sstevel {
1791341Sstevel 	cyclic_resume();
1801341Sstevel 	(void) spl0();
1811341Sstevel 	kpreempt_enable();
1821341Sstevel 	mutex_exit(&cpu_lock);
1831341Sstevel }
1841341Sstevel 
1851341Sstevel static int
sysctrl_is_real_device(dev_info_t * dip)1861341Sstevel sysctrl_is_real_device(dev_info_t *dip)
1871341Sstevel {
1881341Sstevel 	struct regspec *regbuf;
1891341Sstevel 	int length;
1901341Sstevel 	int rc;
1911341Sstevel 
1921341Sstevel 	if (ddi_get_driver(dip) == NULL)
1931341Sstevel 		return (FALSE);
1941341Sstevel 
1951341Sstevel 	if (DEVI(dip)->devi_pm_flags & (PMC_NEEDS_SR|PMC_PARENTAL_SR))
1961341Sstevel 		return (TRUE);
1971341Sstevel 	if (DEVI(dip)->devi_pm_flags & PMC_NO_SR)
1981341Sstevel 		return (FALSE);
1991341Sstevel 
2001341Sstevel 	/*
2011341Sstevel 	 * now the general case
2021341Sstevel 	 */
2031341Sstevel 	rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "reg",
204*11752STrevor.Thompson@Sun.COM 	    (caddr_t)&regbuf, &length);
2051341Sstevel 	ASSERT(rc != DDI_PROP_NO_MEMORY);
2061341Sstevel 	if (rc != DDI_PROP_SUCCESS) {
2071341Sstevel 		return (FALSE);
2081341Sstevel 	} else {
2091341Sstevel 		kmem_free(regbuf, length);
2101341Sstevel 		return (TRUE);
2111341Sstevel 	}
2121341Sstevel }
2131341Sstevel 
2141341Sstevel static dev_info_t *failed_driver;
2151341Sstevel static char device_path[MAXPATHLEN];
2161341Sstevel 
2171341Sstevel static int
sysctrl_suspend_devices(dev_info_t * dip,sysc_cfga_pkt_t * pkt)2181341Sstevel sysctrl_suspend_devices(dev_info_t *dip, sysc_cfga_pkt_t *pkt)
2191341Sstevel {
2201341Sstevel 	int circ;
2211341Sstevel 
2221341Sstevel 	ASSERT(dip == NULL || ddi_get_parent(dip) == NULL ||
2231341Sstevel 	    DEVI_BUSY_OWNED(ddi_get_parent(dip)));
2241341Sstevel 
2251341Sstevel 	failed_driver = NULL;
2261341Sstevel 	for (; dip != NULL; dip = ddi_get_next_sibling(dip)) {
2271341Sstevel 		/*
2281341Sstevel 		 * Hold parent busy while walking child list
2291341Sstevel 		 */
2301341Sstevel 		ndi_devi_enter(dip, &circ);
2311341Sstevel 		if (sysctrl_suspend_devices(ddi_get_child(dip), pkt)) {
2321341Sstevel 			ndi_devi_exit(dip, circ);
2331341Sstevel 			return (ENXIO);
2341341Sstevel 		}
2351341Sstevel 		ndi_devi_exit(dip, circ);
2361341Sstevel 
2371341Sstevel 		if (!sysctrl_is_real_device(dip))
2381341Sstevel 			continue;
2391341Sstevel 
2401341Sstevel 		/*
2411341Sstevel 		 * Safe to call ddi_pathname() as parent is held busy
2421341Sstevel 		 */
2431341Sstevel 		(void) ddi_pathname(dip, device_path);
2441341Sstevel 		DEBUGP(errp(" suspending device %s\n", device_path));
2451341Sstevel 		if (devi_detach(dip, DDI_SUSPEND) != DDI_SUCCESS) {
2461341Sstevel 			DEBUGP(errp("  unable to suspend device %s\n",
247*11752STrevor.Thompson@Sun.COM 			    device_path));
2481341Sstevel 
2491341Sstevel 			(void) strncpy(pkt->errbuf, device_path,
250*11752STrevor.Thompson@Sun.COM 			    SYSC_OUTPUT_LEN);
2511341Sstevel 			SYSC_ERR_SET(pkt, SYSC_ERR_SUSPEND);
2521341Sstevel 			ndi_hold_devi(dip);
2531341Sstevel 			failed_driver = dip;
2541341Sstevel 			return (ENXIO);
2551341Sstevel 		}
2561341Sstevel 	}
2571341Sstevel 
2581341Sstevel 	return (DDI_SUCCESS);
2591341Sstevel }
2601341Sstevel 
2611341Sstevel static void
sysctrl_resume_devices(dev_info_t * start,sysc_cfga_pkt_t * pkt)2621341Sstevel sysctrl_resume_devices(dev_info_t *start, sysc_cfga_pkt_t *pkt)
2631341Sstevel {
2641341Sstevel 	int		circ;
2651341Sstevel 	dev_info_t	*dip, *next, *last = NULL;
2661341Sstevel 
2671341Sstevel 	ASSERT(start == NULL || ddi_get_parent(start) == NULL ||
2681341Sstevel 	    DEVI_BUSY_OWNED(ddi_get_parent(start)));
2691341Sstevel 
2701341Sstevel 	/* attach in reverse device tree order */
2711341Sstevel 	while (last != start) {
2721341Sstevel 		dip = start;
2731341Sstevel 		next = ddi_get_next_sibling(dip);
2741341Sstevel 		while (next != last && dip != failed_driver) {
2751341Sstevel 			dip = next;
2761341Sstevel 			next = ddi_get_next_sibling(dip);
2771341Sstevel 		}
2781341Sstevel 		if (dip == failed_driver) {
2791341Sstevel 			failed_driver = NULL;
2801341Sstevel 			ndi_rele_devi(dip);
2811341Sstevel 		} else if (sysctrl_is_real_device(dip) &&
2821341Sstevel 		    failed_driver == NULL) {
2831341Sstevel 			/*
2841341Sstevel 			 * Parent dip is held busy, so ddi_pathname() can
2851341Sstevel 			 * be safely called.
2861341Sstevel 			 */
2871341Sstevel 			(void) ddi_pathname(dip, device_path);
2881341Sstevel 			DEBUGP(errp(" resuming device %s\n", device_path));
2891341Sstevel 			if (devi_attach(dip, DDI_RESUME) != DDI_SUCCESS) {
2901341Sstevel 				/*
2911341Sstevel 				 * XXX - if in the future we decide not to
2921341Sstevel 				 * panic the system, we need to set the error
2931341Sstevel 				 * SYSC_ERR_RESUME here and also change the
2941341Sstevel 				 * cfgadm platform library.
2951341Sstevel 				 */
2961341Sstevel 				cmn_err(CE_PANIC, "Unable to resume device %s",
297*11752STrevor.Thompson@Sun.COM 				    device_path);
2981341Sstevel 			}
2991341Sstevel 		}
3001341Sstevel 		ndi_devi_enter(dip, &circ);
3011341Sstevel 		sysctrl_resume_devices(ddi_get_child(dip), pkt);
3021341Sstevel 		ndi_devi_exit(dip, circ);
3031341Sstevel 
3041341Sstevel 		last = dip;
3051341Sstevel 	}
3061341Sstevel }
3071341Sstevel 
3081341Sstevel /*
3091341Sstevel  * True if thread is virtually stopped.  Similar to CPR_VSTOPPED
3101341Sstevel  * but from DR point of view.  These user threads are waiting in
3111341Sstevel  * the kernel.  Once they complete in the kernel, they will process
3121341Sstevel  * the stop signal and stop.
3131341Sstevel  */
3141341Sstevel #define	SYSCTRL_VSTOPPED(t)		\
3151341Sstevel 	((t)->t_state == TS_SLEEP &&	\
3161341Sstevel 	(t)->t_wchan != NULL &&		\
3171341Sstevel 	(t)->t_astflag &&		\
3181341Sstevel 	((t)->t_proc_flag & TP_CHKPT))
3191341Sstevel 
3201341Sstevel static int
sysctrl_stop_user_threads(sysc_cfga_pkt_t * pkt)3211341Sstevel sysctrl_stop_user_threads(sysc_cfga_pkt_t *pkt)
3221341Sstevel {
3231341Sstevel 	int		count;
3241341Sstevel 	char		cache_psargs[PSARGSZ];
3251341Sstevel 	kthread_id_t	cache_tp;
3261341Sstevel 	uint_t		cache_t_state;
3271341Sstevel 	int		bailout;
3281341Sstevel 	pid_t		pid;
3291341Sstevel 
3301341Sstevel 	extern void add_one_utstop();
3311341Sstevel 	extern void utstop_timedwait(clock_t);
3321341Sstevel 	extern void utstop_init(void);
3331341Sstevel 
3341341Sstevel #define	SYSCTRL_UTSTOP_RETRY	4
3351341Sstevel #define	SYSCTRL_UTSTOP_WAIT	hz
3361341Sstevel 
3371341Sstevel 	if (sysctrl_skip_user_threads)
3381341Sstevel 		return (DDI_SUCCESS);
3391341Sstevel 
3401341Sstevel 	utstop_init();
3411341Sstevel 
3421341Sstevel 	/* we need to try a few times to get past fork, etc. */
3431341Sstevel 	for (count = 0; count < SYSCTRL_UTSTOP_RETRY; count++) {
3441341Sstevel 		kthread_id_t tp;
3451341Sstevel 
3461341Sstevel 		/* walk the entire threadlist */
3471341Sstevel 		mutex_enter(&pidlock);
3481341Sstevel 		for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
3491341Sstevel 			proc_t *p = ttoproc(tp);
3501341Sstevel 
3511341Sstevel 			/* handle kernel threads separately */
3521341Sstevel 			if (p->p_as == &kas || p->p_stat == SZOMB)
3531341Sstevel 				continue;
3541341Sstevel 
3551341Sstevel 			mutex_enter(&p->p_lock);
3561341Sstevel 			thread_lock(tp);
3571341Sstevel 
3581341Sstevel 			if (tp->t_state == TS_STOPPED) {
3591341Sstevel 				/* add another reason to stop this thread */
3601341Sstevel 				tp->t_schedflag &= ~TS_RESUME;
3611341Sstevel 			} else {
3621341Sstevel 				tp->t_proc_flag |= TP_CHKPT;
3631341Sstevel 
3641341Sstevel 				thread_unlock(tp);
3651341Sstevel 				mutex_exit(&p->p_lock);
3661341Sstevel 				add_one_utstop();
3671341Sstevel 				mutex_enter(&p->p_lock);
3681341Sstevel 				thread_lock(tp);
3691341Sstevel 
3701341Sstevel 				aston(tp);
3711341Sstevel 
3723792Sakolb 				if (ISWAKEABLE(tp) || ISWAITING(tp)) {
3731341Sstevel 					setrun_locked(tp);
3741341Sstevel 				}
3751341Sstevel 
3761341Sstevel 			}
3771341Sstevel 
3781341Sstevel 			/* grab thread if needed */
3791341Sstevel 			if (tp->t_state == TS_ONPROC && tp->t_cpu != CPU)
3801341Sstevel 				poke_cpu(tp->t_cpu->cpu_id);
3811341Sstevel 
3821341Sstevel 
3831341Sstevel 			thread_unlock(tp);
3841341Sstevel 			mutex_exit(&p->p_lock);
3851341Sstevel 		}
3861341Sstevel 		mutex_exit(&pidlock);
3871341Sstevel 
3881341Sstevel 
3891341Sstevel 		/* let everything catch up */
3901341Sstevel 		utstop_timedwait(count * count * SYSCTRL_UTSTOP_WAIT);
3911341Sstevel 
3921341Sstevel 
3931341Sstevel 		/* now, walk the threadlist again to see if we are done */
3941341Sstevel 		mutex_enter(&pidlock);
3951341Sstevel 		for (tp = curthread->t_next, bailout = 0;
3961341Sstevel 		    bailout == 0 && tp != curthread; tp = tp->t_next) {
3971341Sstevel 			proc_t *p = ttoproc(tp);
3981341Sstevel 
3991341Sstevel 			/* handle kernel threads separately */
4001341Sstevel 			if (p->p_as == &kas || p->p_stat == SZOMB)
4011341Sstevel 				continue;
4021341Sstevel 
4031341Sstevel 			/*
4041341Sstevel 			 * If this thread didn't stop, and we don't allow
4051341Sstevel 			 * unstopped blocked threads, bail.
4061341Sstevel 			 */
4071341Sstevel 			/* did this thread stop? */
4081341Sstevel 			thread_lock(tp);
4091341Sstevel 			if (!CPR_ISTOPPED(tp) &&
4101341Sstevel 			    !(sysctrl_allow_blocked_threads &&
4111341Sstevel 			    SYSCTRL_VSTOPPED(tp))) {
4121341Sstevel 
4131341Sstevel 				/* nope, cache the details for later */
4141341Sstevel 				bcopy(p->p_user.u_psargs, cache_psargs,
415*11752STrevor.Thompson@Sun.COM 				    sizeof (cache_psargs));
4161341Sstevel 				cache_tp = tp;
4171341Sstevel 				cache_t_state = tp->t_state;
4181341Sstevel 				bailout = 1;
4191341Sstevel 				pid = p->p_pidp->pid_id;
4201341Sstevel 			}
4211341Sstevel 			thread_unlock(tp);
4221341Sstevel 		}
4231341Sstevel 		mutex_exit(&pidlock);
4241341Sstevel 
4251341Sstevel 		/* were all the threads stopped? */
4261341Sstevel 		if (!bailout)
4271341Sstevel 			break;
4281341Sstevel 	}
4291341Sstevel 
4301341Sstevel 	/* were we unable to stop all threads after a few tries? */
4311341Sstevel 	if (bailout) {
4321341Sstevel 		(void) sprintf(pkt->errbuf, "process: %s id: %d state: %x"
433*11752STrevor.Thompson@Sun.COM 		    " thread descriptor: %p", cache_psargs, (int)pid,
434*11752STrevor.Thompson@Sun.COM 		    cache_t_state, (void *)cache_tp);
4351341Sstevel 
4361341Sstevel 		SYSC_ERR_SET(pkt, SYSC_ERR_UTHREAD);
4371341Sstevel 
4381341Sstevel 		return (ESRCH);
4391341Sstevel 	}
4401341Sstevel 
4411341Sstevel 	return (DDI_SUCCESS);
4421341Sstevel }
4431341Sstevel 
4441341Sstevel static int
sysctrl_stop_kernel_threads(sysc_cfga_pkt_t * pkt)4451341Sstevel sysctrl_stop_kernel_threads(sysc_cfga_pkt_t *pkt)
4461341Sstevel {
4471341Sstevel 	caddr_t		name;
4481341Sstevel 	kthread_id_t	tp;
4491341Sstevel 
4501341Sstevel 	if (sysctrl_skip_kernel_threads) {
4511341Sstevel 		return (DDI_SUCCESS);
4521341Sstevel 	}
4531341Sstevel 
4541341Sstevel 	/*
4551341Sstevel 	 * Note: we unlock the table in resume.
4561341Sstevel 	 * We only need to lock the callback table if we are actually
4571341Sstevel 	 * suspending kernel threads.
4581341Sstevel 	 */
4591341Sstevel 	callb_lock_table();
4601341Sstevel 	if ((name = callb_execute_class(CB_CL_CPR_DAEMON,
4611341Sstevel 	    CB_CODE_CPR_CHKPT)) != (caddr_t)NULL) {
4621341Sstevel 
4631341Sstevel 		(void) strncpy(pkt->errbuf, name, SYSC_OUTPUT_LEN);
4641341Sstevel 		SYSC_ERR_SET(pkt, SYSC_ERR_KTHREAD);
4651341Sstevel 		return (EBUSY);
4661341Sstevel 	}
4671341Sstevel 
4681341Sstevel 	/*
4691341Sstevel 	 * Verify that all threads are accounted for
4701341Sstevel 	 */
4711341Sstevel 	mutex_enter(&pidlock);
4721341Sstevel 	for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
4731341Sstevel 		proc_t	*p = ttoproc(tp);
4741341Sstevel 
4751341Sstevel 		if (p->p_as != &kas)
4761341Sstevel 			continue;
4771341Sstevel 
4781341Sstevel 		if (tp->t_flag & T_INTR_THREAD)
4791341Sstevel 			continue;
4801341Sstevel 
4811341Sstevel 		if (!callb_is_stopped(tp, &name)) {
4821341Sstevel 			mutex_exit(&pidlock);
4831341Sstevel 			(void) strncpy(pkt->errbuf, name, SYSC_OUTPUT_LEN);
4841341Sstevel 			SYSC_ERR_SET(pkt, SYSC_ERR_KTHREAD);
4851341Sstevel 			return (EBUSY);
4861341Sstevel 		}
4871341Sstevel 	}
4881341Sstevel 
4891341Sstevel 	mutex_exit(&pidlock);
4901341Sstevel 	return (DDI_SUCCESS);
4911341Sstevel }
4921341Sstevel 
4931341Sstevel static void
sysctrl_start_user_threads(void)4941341Sstevel sysctrl_start_user_threads(void)
4951341Sstevel {
4961341Sstevel 	kthread_id_t tp;
4971341Sstevel 
4981341Sstevel 	mutex_enter(&pidlock);
4991341Sstevel 
5001341Sstevel 	/* walk all threads and release them */
5011341Sstevel 	for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
5021341Sstevel 		proc_t *p = ttoproc(tp);
5031341Sstevel 
5041341Sstevel 		/* skip kernel threads */
5051341Sstevel 		if (ttoproc(tp)->p_as == &kas)
5061341Sstevel 			continue;
5071341Sstevel 
5081341Sstevel 		mutex_enter(&p->p_lock);
5091341Sstevel 		tp->t_proc_flag &= ~TP_CHKPT;
5101341Sstevel 		mutex_exit(&p->p_lock);
5111341Sstevel 
5121341Sstevel 		thread_lock(tp);
5131341Sstevel 		if (CPR_ISTOPPED(tp)) {
5141341Sstevel 			/* back on the runq */
5151341Sstevel 			tp->t_schedflag |= TS_RESUME;
5161341Sstevel 			setrun_locked(tp);
5171341Sstevel 		}
5181341Sstevel 		thread_unlock(tp);
5191341Sstevel 	}
5201341Sstevel 
5211341Sstevel 	mutex_exit(&pidlock);
5221341Sstevel }
5231341Sstevel 
5241341Sstevel static void
sysctrl_signal_user(int sig)5251341Sstevel sysctrl_signal_user(int sig)
5261341Sstevel {
5271341Sstevel 	struct proc *p;
5281341Sstevel 
5291341Sstevel 	mutex_enter(&pidlock);
5301341Sstevel 
5311341Sstevel 	for (p = practive; p != NULL; p = p->p_next) {
5321341Sstevel 		/* only user threads */
5331341Sstevel 		if (p->p_exec == NULL || p->p_stat == SZOMB ||
5341341Sstevel 		    p == proc_init || p == ttoproc(curthread))
5351341Sstevel 			continue;
5361341Sstevel 
5371341Sstevel 		mutex_enter(&p->p_lock);
5381341Sstevel 		sigtoproc(p, NULL, sig);
5391341Sstevel 		mutex_exit(&p->p_lock);
5401341Sstevel 	}
5411341Sstevel 
5421341Sstevel 	mutex_exit(&pidlock);
5431341Sstevel 
5441341Sstevel 	/* add a bit of delay */
5451341Sstevel 	delay(hz);
5461341Sstevel }
5471341Sstevel 
5481341Sstevel void
sysctrl_resume(sysc_cfga_pkt_t * pkt)5491341Sstevel sysctrl_resume(sysc_cfga_pkt_t *pkt)
5501341Sstevel {
5511341Sstevel #ifndef Bug_4154263
5521341Sstevel 	DEBUGP(errp("resume system...\n"));
5531341Sstevel #endif
5541341Sstevel 	switch (suspend_state) {
5551341Sstevel 	case SYSC_STATE_FULL:
5561341Sstevel 		/*
5571341Sstevel 		 * release all the other cpus
5581341Sstevel 		 */
5591341Sstevel #ifndef	Bug_4154263
5601341Sstevel 		DEBUGP(errp("release cpus..."));
5611341Sstevel #endif
5622399Scth 		/*
5632399Scth 		 * Prevent false alarm in tod_validate() due to tod
5642399Scth 		 * value change between suspend and resume
5652399Scth 		 */
5662399Scth 		mutex_enter(&tod_lock);
567*11752STrevor.Thompson@Sun.COM 		tod_status_set(TOD_DR_RESUME_DONE);
5682399Scth 		mutex_exit(&tod_lock);
5692399Scth 
5701341Sstevel 		sysctrl_release_cpus();
5711341Sstevel 		DEBUGP(errp("cpus resumed...\n"));
5721341Sstevel 
5731341Sstevel 		/*
5741341Sstevel 		 * If we suspended hw watchdog at suspend,
5751341Sstevel 		 * re-enable it now.
5761341Sstevel 		 */
5771341Sstevel 		if (sysc_watchdog_suspended) {
5781341Sstevel 			mutex_enter(&tod_lock);
5791341Sstevel 			tod_ops.tod_set_watchdog_timer(
580*11752STrevor.Thompson@Sun.COM 			    watchdog_timeout_seconds);
5811341Sstevel 			mutex_exit(&tod_lock);
5821341Sstevel 		}
5831341Sstevel 
5841341Sstevel 		/*
5851341Sstevel 		 * resume callout
5861341Sstevel 		 */
5871341Sstevel 		(void) callb_execute_class(CB_CL_CPR_RPC, CB_CODE_CPR_RESUME);
5881341Sstevel 		(void) callb_execute_class(CB_CL_CPR_CALLOUT,
589*11752STrevor.Thompson@Sun.COM 		    CB_CODE_CPR_RESUME);
5901341Sstevel 		sysctrl_enable_intr();
5911341Sstevel 		/* FALLTHROUGH */
5921341Sstevel 
5931341Sstevel 	case SYSC_STATE_DRIVER:
5941341Sstevel 		/*
5951341Sstevel 		 * resume drivers
5961341Sstevel 		 */
5971341Sstevel 		DEBUGP(errp("resume drivers..."));
5981341Sstevel 		sysctrl_resume_devices(ddi_root_node(), pkt);
5991341Sstevel 		DEBUGP(errp("done\n"));
6001341Sstevel 
6011341Sstevel 		/*
6021341Sstevel 		 * resume the lock manager
6031341Sstevel 		 */
6041341Sstevel 		lm_cprresume();
6051341Sstevel 
6061341Sstevel 		/* FALLTHROUGH */
6071341Sstevel 
6081341Sstevel 	case SYSC_STATE_DAEMON:
6091341Sstevel 		/*
6101341Sstevel 		 * resume kernel daemons
6111341Sstevel 		 */
6121341Sstevel 		if (!sysctrl_skip_kernel_threads) {
6131341Sstevel 			DEBUGP(errp("starting kernel daemons..."));
6141341Sstevel 			(void) callb_execute_class(CB_CL_CPR_DAEMON,
615*11752STrevor.Thompson@Sun.COM 			    CB_CODE_CPR_RESUME);
6161341Sstevel 			callb_unlock_table();
6171341Sstevel 		}
6181341Sstevel 		DEBUGP(errp("done\n"));
6191341Sstevel 
6201341Sstevel 		/* FALLTHROUGH */
6211341Sstevel 
6221341Sstevel 	case SYSC_STATE_USER:
6231341Sstevel 		/*
6241341Sstevel 		 * finally, resume user threads
6251341Sstevel 		 */
6261341Sstevel 		if (!sysctrl_skip_user_threads) {
6271341Sstevel 			DEBUGP(errp("starting user threads..."));
6281341Sstevel 			sysctrl_start_user_threads();
6291341Sstevel 			DEBUGP(errp("done\n"));
6301341Sstevel 		}
6311341Sstevel 		/* FALLTHROUGH */
6321341Sstevel 
6331341Sstevel 	case SYSC_STATE_BEGIN:
6341341Sstevel 	default:
6351341Sstevel 		/*
6361341Sstevel 		 * let those who care know that we've just resumed
6371341Sstevel 		 */
6381341Sstevel 		DEBUGP(errp("sending SIGTHAW..."));
6391341Sstevel 		sysctrl_signal_user(SIGTHAW);
6401341Sstevel 		DEBUGP(errp("done\n"));
6411341Sstevel 		break;
6421341Sstevel 	}
6431341Sstevel 
6441341Sstevel 	/* Restore sysctrl detach/suspend to its original value */
6451341Sstevel 	sysctrl_enable_detach_suspend = sysc_lastval;
6461341Sstevel 
6471341Sstevel 	DEBUGP(errp("system state restored\n"));
6481341Sstevel }
6491341Sstevel 
6501341Sstevel void
sysctrl_suspend_prepare(void)6511341Sstevel sysctrl_suspend_prepare(void)
6521341Sstevel {
6531341Sstevel 	/*
6541341Sstevel 	 * We use a function, lm_cprsuspend(), in the suspend flow that
6551341Sstevel 	 * is redirected to a module through the modstubs mechanism.
6561341Sstevel 	 * If the module is currently not loaded, modstubs attempts
6571341Sstevel 	 * the modload. The context this happens in below causes the
6581341Sstevel 	 * module load to block forever, so this function must be called
6591341Sstevel 	 * in the normal system call context ahead of time.
6601341Sstevel 	 */
6611341Sstevel 	(void) modload("misc", "klmmod");
6621341Sstevel }
6631341Sstevel 
6641341Sstevel int
sysctrl_suspend(sysc_cfga_pkt_t * pkt)6651341Sstevel sysctrl_suspend(sysc_cfga_pkt_t *pkt)
6661341Sstevel {
6671341Sstevel 	int rc = DDI_SUCCESS;
6681341Sstevel 
6691341Sstevel 	/* enable sysctrl detach/suspend function */
6701341Sstevel 	sysc_lastval = sysctrl_enable_detach_suspend;
6711341Sstevel 	sysctrl_enable_detach_suspend = 1;
6721341Sstevel 
6731341Sstevel 	/*
6741341Sstevel 	 * first, stop all user threads
6751341Sstevel 	 */
6761341Sstevel 	DEBUGP(errp("\nstopping user threads..."));
6771341Sstevel 	suspend_state = SYSC_STATE_USER;
6781341Sstevel 	if (((rc = sysctrl_stop_user_threads(pkt)) != DDI_SUCCESS) &&
6791341Sstevel 	    sysctrl_check_user_stop_result) {
6801341Sstevel 		sysctrl_resume(pkt);
6811341Sstevel 		return (rc);
6821341Sstevel 	}
6831341Sstevel 	DEBUGP(errp("done\n"));
6841341Sstevel 
6851341Sstevel 	/*
6861341Sstevel 	 * now stop daemon activities
6871341Sstevel 	 */
6881341Sstevel 	DEBUGP(errp("stopping kernel daemons..."));
6891341Sstevel 	suspend_state = SYSC_STATE_DAEMON;
6901341Sstevel 	if (rc = sysctrl_stop_kernel_threads(pkt)) {
6911341Sstevel 		sysctrl_resume(pkt);
6921341Sstevel 		return (rc);
6931341Sstevel 	}
6941341Sstevel 	DEBUGP(errp("done\n"));
6951341Sstevel 
6961341Sstevel 	/*
6971341Sstevel 	 * This sync swap out all user pages
6981341Sstevel 	 */
6991341Sstevel 	vfs_sync(SYNC_ALL);
7001341Sstevel 
7011341Sstevel 	/*
7021341Sstevel 	 * special treatment for lock manager
7031341Sstevel 	 */
7041341Sstevel 	lm_cprsuspend();
7051341Sstevel 
7061341Sstevel 	/*
7071341Sstevel 	 * sync the file system in case we never make it back
7081341Sstevel 	 */
7091341Sstevel 	sync();
7101341Sstevel 
7111341Sstevel 	/*
7121341Sstevel 	 * now suspend drivers
7131341Sstevel 	 */
7141341Sstevel 	DEBUGP(errp("suspending drivers..."));
7151341Sstevel 	suspend_state = SYSC_STATE_DRIVER;
7161341Sstevel 	if (rc = sysctrl_suspend_devices(ddi_root_node(), pkt)) {
7171341Sstevel 		sysctrl_resume(pkt);
7181341Sstevel 		return (rc);
7191341Sstevel 	}
7201341Sstevel 	DEBUGP(errp("done\n"));
7211341Sstevel 
7221341Sstevel 	/*
7231341Sstevel 	 * handle the callout table
7241341Sstevel 	 */
7251341Sstevel 	sysctrl_stop_intr();
7261341Sstevel 
7271341Sstevel 	(void) callb_execute_class(CB_CL_CPR_CALLOUT, CB_CODE_CPR_CHKPT);
7281341Sstevel 
7291341Sstevel 	/*
7301341Sstevel 	 * if watchdog was activated, disable it
7311341Sstevel 	 */
7321341Sstevel 	if (watchdog_activated) {
7331341Sstevel 		mutex_enter(&tod_lock);
7341341Sstevel 		tod_ops.tod_clear_watchdog_timer();
7351341Sstevel 		mutex_exit(&tod_lock);
7361341Sstevel 		sysc_watchdog_suspended = 1;
7371341Sstevel 	} else {
7381341Sstevel 		sysc_watchdog_suspended = 0;
7391341Sstevel 	}
7401341Sstevel 
7411341Sstevel 	/*
7421341Sstevel 	 * finally, grab all cpus
7431341Sstevel 	 */
7441341Sstevel 	DEBUGP(errp("freezing all cpus...\n"));
7451341Sstevel 	suspend_state = SYSC_STATE_FULL;
7461341Sstevel 	sysctrl_grab_cpus();
7471341Sstevel #ifndef	Bug_4154263
7481341Sstevel 	DEBUGP(errp("done\n"));
7491341Sstevel 
7501341Sstevel 	DEBUGP(errp("system is quiesced\n"));
7511341Sstevel #endif
7521341Sstevel 
7531341Sstevel 	return (rc);
7541341Sstevel }
755