xref: /onnv-gate/usr/src/uts/common/cpr/cpr_main.c (revision 5295:a21f2449e5f9)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
53446Smrj  * Common Development and Distribution License (the "License").
63446Smrj  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
223446Smrj  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
270Sstevel@tonic-gate 
280Sstevel@tonic-gate /*
290Sstevel@tonic-gate  * This module contains the guts of checkpoint-resume mechanism.
300Sstevel@tonic-gate  * All code in this module is platform independent.
310Sstevel@tonic-gate  */
320Sstevel@tonic-gate 
330Sstevel@tonic-gate #include <sys/types.h>
340Sstevel@tonic-gate #include <sys/errno.h>
350Sstevel@tonic-gate #include <sys/callb.h>
360Sstevel@tonic-gate #include <sys/processor.h>
370Sstevel@tonic-gate #include <sys/machsystm.h>
380Sstevel@tonic-gate #include <sys/clock.h>
390Sstevel@tonic-gate #include <sys/vfs.h>
400Sstevel@tonic-gate #include <sys/kmem.h>
410Sstevel@tonic-gate #include <nfs/lm.h>
420Sstevel@tonic-gate #include <sys/systm.h>
430Sstevel@tonic-gate #include <sys/cpr.h>
440Sstevel@tonic-gate #include <sys/bootconf.h>
450Sstevel@tonic-gate #include <sys/cyclic.h>
460Sstevel@tonic-gate #include <sys/filio.h>
470Sstevel@tonic-gate #include <sys/fs/ufs_filio.h>
480Sstevel@tonic-gate #include <sys/epm.h>
490Sstevel@tonic-gate #include <sys/modctl.h>
500Sstevel@tonic-gate #include <sys/reboot.h>
510Sstevel@tonic-gate #include <sys/kdi.h>
520Sstevel@tonic-gate #include <sys/promif.h>
53*5295Srandyf #include <sys/srn.h>
54*5295Srandyf #include <sys/cpr_impl.h>
55*5295Srandyf 
56*5295Srandyf #define	PPM(dip) ((dev_info_t *)DEVI(dip)->devi_pm_ppm)
570Sstevel@tonic-gate 
580Sstevel@tonic-gate extern struct cpr_terminator cpr_term;
590Sstevel@tonic-gate 
600Sstevel@tonic-gate extern int cpr_alloc_statefile(int);
610Sstevel@tonic-gate extern void cpr_start_kernel_threads(void);
620Sstevel@tonic-gate extern void cpr_abbreviate_devpath(char *, char *);
630Sstevel@tonic-gate extern void cpr_convert_promtime(cpr_time_t *);
640Sstevel@tonic-gate extern void cpr_send_notice(void);
650Sstevel@tonic-gate extern void cpr_set_bitmap_size(void);
660Sstevel@tonic-gate extern void cpr_stat_init();
670Sstevel@tonic-gate extern void cpr_statef_close(void);
680Sstevel@tonic-gate extern void flush_windows(void);
69*5295Srandyf extern void (*srn_signal)(int, int);
70*5295Srandyf extern void init_cpu_syscall(struct cpu *);
71*5295Srandyf extern void i_cpr_pre_resume_cpus();
72*5295Srandyf extern void i_cpr_post_resume_cpus();
730Sstevel@tonic-gate 
740Sstevel@tonic-gate extern int pm_powering_down;
75*5295Srandyf extern kmutex_t srn_clone_lock;
76*5295Srandyf extern int srn_inuse;
770Sstevel@tonic-gate 
78*5295Srandyf static int cpr_suspend(int);
79*5295Srandyf static int cpr_resume(int);
80*5295Srandyf static void cpr_suspend_init(int);
81*5295Srandyf #if defined(__x86)
82*5295Srandyf static int cpr_suspend_cpus(void);
83*5295Srandyf static void cpr_resume_cpus(void);
84*5295Srandyf #endif
85*5295Srandyf static int cpr_all_online(void);
86*5295Srandyf static void cpr_restore_offline(void);
870Sstevel@tonic-gate 
880Sstevel@tonic-gate cpr_time_t wholecycle_tv;
890Sstevel@tonic-gate int cpr_suspend_succeeded;
900Sstevel@tonic-gate pfn_t curthreadpfn;
910Sstevel@tonic-gate int curthreadremapped;
920Sstevel@tonic-gate 
93*5295Srandyf extern cpuset_t cpu_ready_set;
94*5295Srandyf extern void *(*cpu_pause_func)(void *);
95*5295Srandyf 
96*5295Srandyf extern processorid_t i_cpr_bootcpuid(void);
97*5295Srandyf extern cpu_t *i_cpr_bootcpu(void);
98*5295Srandyf extern void tsc_adjust_delta(hrtime_t tdelta);
99*5295Srandyf extern void tsc_resume(void);
100*5295Srandyf extern int tsc_resume_in_cyclic;
101*5295Srandyf 
102*5295Srandyf /*
103*5295Srandyf  * Set this variable to 1, to have device drivers resume in an
104*5295Srandyf  * uniprocessor environment. This is to allow drivers that assume
105*5295Srandyf  * that they resume on a UP machine to continue to work. Should be
106*5295Srandyf  * deprecated once the broken drivers are fixed
107*5295Srandyf  */
108*5295Srandyf int cpr_resume_uniproc = 0;
109*5295Srandyf 
1100Sstevel@tonic-gate /*
1110Sstevel@tonic-gate  * save or restore abort_enable;  this prevents a drop
1120Sstevel@tonic-gate  * to kadb or prom during cpr_resume_devices() when
1130Sstevel@tonic-gate  * there is no kbd present;  see abort_sequence_enter()
1140Sstevel@tonic-gate  */
1150Sstevel@tonic-gate static void
1160Sstevel@tonic-gate cpr_sae(int stash)
1170Sstevel@tonic-gate {
1180Sstevel@tonic-gate 	static int saved_ae = -1;
1190Sstevel@tonic-gate 
1200Sstevel@tonic-gate 	if (stash) {
1210Sstevel@tonic-gate 		saved_ae = abort_enable;
1220Sstevel@tonic-gate 		abort_enable = 0;
1230Sstevel@tonic-gate 	} else if (saved_ae != -1) {
1240Sstevel@tonic-gate 		abort_enable = saved_ae;
1250Sstevel@tonic-gate 		saved_ae = -1;
1260Sstevel@tonic-gate 	}
1270Sstevel@tonic-gate }
1280Sstevel@tonic-gate 
1290Sstevel@tonic-gate 
1300Sstevel@tonic-gate /*
1310Sstevel@tonic-gate  * The main switching point for cpr, this routine starts the ckpt
1320Sstevel@tonic-gate  * and state file saving routines; on resume the control is
1330Sstevel@tonic-gate  * returned back to here and it then calls the resume routine.
1340Sstevel@tonic-gate  */
1350Sstevel@tonic-gate int
136*5295Srandyf cpr_main(int sleeptype)
1370Sstevel@tonic-gate {
138*5295Srandyf 	int rc, rc2;
139*5295Srandyf 	label_t saveq;
140*5295Srandyf 	klwp_t *tlwp = ttolwp(curthread);
141*5295Srandyf 
142*5295Srandyf 	if (sleeptype == CPR_TODISK) {
143*5295Srandyf 		if ((rc = cpr_default_setup(1)) != 0)
144*5295Srandyf 			return (rc);
145*5295Srandyf 		ASSERT(tlwp);
146*5295Srandyf 		saveq = tlwp->lwp_qsav;
147*5295Srandyf 	}
148*5295Srandyf 
149*5295Srandyf 	if (sleeptype == CPR_TORAM) {
150*5295Srandyf 		rc = cpr_suspend(sleeptype);
151*5295Srandyf 		PMD(PMD_SX, ("cpr_suspend rets %x\n", rc))
152*5295Srandyf 		if (rc == 0) {
153*5295Srandyf 			int i_cpr_power_down(int sleeptype);
154*5295Srandyf 
155*5295Srandyf 			/*
156*5295Srandyf 			 * From this point on, we should be at a high
157*5295Srandyf 			 * spl, interrupts disabled, and all but one
158*5295Srandyf 			 * cpu's paused (effectively UP/single threaded).
159*5295Srandyf 			 * So this is were we want to put ASSERTS()
160*5295Srandyf 			 * to let us know otherwise.
161*5295Srandyf 			 */
162*5295Srandyf 			ASSERT(cpus_paused());
1630Sstevel@tonic-gate 
164*5295Srandyf 			/*
165*5295Srandyf 			 * Now do the work of actually putting this
166*5295Srandyf 			 * machine to sleep!
167*5295Srandyf 			 */
168*5295Srandyf 			rc = i_cpr_power_down(sleeptype);
169*5295Srandyf 			if (rc == 0) {
170*5295Srandyf 				PMD(PMD_SX, ("back from succssful suspend\n"))
171*5295Srandyf 			}
172*5295Srandyf 			/*
173*5295Srandyf 			 * We do care about the return value from cpr_resume
174*5295Srandyf 			 * at this point, as it will tell us if one of the
175*5295Srandyf 			 * resume functions failed (cpr_resume_devices())
176*5295Srandyf 			 * However, for this to return and _not_ panic, means
177*5295Srandyf 			 * that we must be in one of the test functions.  So
178*5295Srandyf 			 * check for that and return an appropriate message.
179*5295Srandyf 			 */
180*5295Srandyf 			rc2 = cpr_resume(sleeptype);
181*5295Srandyf 			if (rc2 != 0) {
182*5295Srandyf 				ASSERT(cpr_test_point > 0);
183*5295Srandyf 				cmn_err(CE_NOTE,
184*5295Srandyf 				    "cpr_resume returned non-zero: %d\n", rc2);
185*5295Srandyf 				PMD(PMD_SX, ("cpr_resume rets %x\n", rc2))
186*5295Srandyf 			}
187*5295Srandyf 			ASSERT(!cpus_paused());
188*5295Srandyf 		} else {
189*5295Srandyf 			PMD(PMD_SX, ("failed suspend, resuming\n"))
190*5295Srandyf 			rc = cpr_resume(sleeptype);
191*5295Srandyf 		}
1920Sstevel@tonic-gate 		return (rc);
193*5295Srandyf 	}
1940Sstevel@tonic-gate 	/*
195*5295Srandyf 	 * Remember where we are for resume after reboot
1960Sstevel@tonic-gate 	 */
197*5295Srandyf 	if (!setjmp(&tlwp->lwp_qsav)) {
1980Sstevel@tonic-gate 		/*
1990Sstevel@tonic-gate 		 * try to checkpoint the system, if failed return back
2000Sstevel@tonic-gate 		 * to userland, otherwise power off.
2010Sstevel@tonic-gate 		 */
202*5295Srandyf 		rc = cpr_suspend(sleeptype);
2030Sstevel@tonic-gate 		if (rc || cpr_reusable_mode) {
2040Sstevel@tonic-gate 			/*
2050Sstevel@tonic-gate 			 * We don't really want to go down, or
2060Sstevel@tonic-gate 			 * something went wrong in suspend, do what we can
2070Sstevel@tonic-gate 			 * to put the system back to an operable state then
2080Sstevel@tonic-gate 			 * return back to userland.
2090Sstevel@tonic-gate 			 */
210*5295Srandyf 			PMD(PMD_SX, ("failed suspend, resuming\n"))
211*5295Srandyf 			(void) cpr_resume(sleeptype);
212*5295Srandyf 			PMD(PMD_SX, ("back from failed suspend resume\n"))
2130Sstevel@tonic-gate 		}
2140Sstevel@tonic-gate 	} else {
2150Sstevel@tonic-gate 		/*
2160Sstevel@tonic-gate 		 * This is the resumed side of longjmp, restore the previous
2170Sstevel@tonic-gate 		 * longjmp pointer if there is one so this will be transparent
2180Sstevel@tonic-gate 		 * to the world.
219*5295Srandyf 		 * This path is only for CPR_TODISK, where we reboot
2200Sstevel@tonic-gate 		 */
221*5295Srandyf 		ASSERT(sleeptype == CPR_TODISK);
222*5295Srandyf 		tlwp->lwp_qsav = saveq;
2230Sstevel@tonic-gate 		CPR->c_flags &= ~C_SUSPENDING;
2240Sstevel@tonic-gate 		CPR->c_flags |= C_RESUMING;
2250Sstevel@tonic-gate 
2260Sstevel@tonic-gate 		/*
2270Sstevel@tonic-gate 		 * resume the system back to the original state
2280Sstevel@tonic-gate 		 */
229*5295Srandyf 		rc = cpr_resume(sleeptype);
230*5295Srandyf 		PMD(PMD_SX, ("back from successful suspend; resume rets %x\n",
231*5295Srandyf 		    rc))
2320Sstevel@tonic-gate 	}
2330Sstevel@tonic-gate 
2340Sstevel@tonic-gate 	(void) cpr_default_setup(0);
2350Sstevel@tonic-gate 
2360Sstevel@tonic-gate 	return (rc);
2370Sstevel@tonic-gate }
2380Sstevel@tonic-gate 
2390Sstevel@tonic-gate 
240*5295Srandyf #if defined(__sparc)
241*5295Srandyf 
2420Sstevel@tonic-gate /*
2430Sstevel@tonic-gate  * check/disable or re-enable UFS logging
2440Sstevel@tonic-gate  */
2450Sstevel@tonic-gate static void
2460Sstevel@tonic-gate cpr_log_status(int enable, int *svstat, vnode_t *vp)
2470Sstevel@tonic-gate {
2480Sstevel@tonic-gate 	int cmd, status, error;
2490Sstevel@tonic-gate 	char *str, *able;
2500Sstevel@tonic-gate 	fiolog_t fl;
2510Sstevel@tonic-gate 	refstr_t *mntpt;
2520Sstevel@tonic-gate 
2530Sstevel@tonic-gate 	str = "cpr_log_status";
2540Sstevel@tonic-gate 	bzero(&fl, sizeof (fl));
2550Sstevel@tonic-gate 	fl.error = FIOLOG_ENONE;
2560Sstevel@tonic-gate 
2570Sstevel@tonic-gate 	/*
2580Sstevel@tonic-gate 	 * when disabling, first get and save logging status (0 or 1)
2590Sstevel@tonic-gate 	 */
2600Sstevel@tonic-gate 	if (enable == 0) {
2610Sstevel@tonic-gate 		if (error = VOP_IOCTL(vp, _FIOISLOG,
2620Sstevel@tonic-gate 		    (uintptr_t)&status, FKIOCTL, CRED(), NULL)) {
2630Sstevel@tonic-gate 			mntpt = vfs_getmntpoint(vp->v_vfsp);
2643446Smrj 			prom_printf("%s: \"%s\", cant get logging "
2653446Smrj 			    "status, error %d\n", str, refstr_value(mntpt),
2663446Smrj 			    error);
2670Sstevel@tonic-gate 			refstr_rele(mntpt);
2680Sstevel@tonic-gate 			return;
2690Sstevel@tonic-gate 		}
2700Sstevel@tonic-gate 		*svstat = status;
2713446Smrj 		if (cpr_debug & CPR_DEBUG5) {
2720Sstevel@tonic-gate 			mntpt = vfs_getmntpoint(vp->v_vfsp);
273*5295Srandyf 			errp("%s: \"%s\", logging status = %d\n",
2740Sstevel@tonic-gate 			    str, refstr_value(mntpt), status);
2750Sstevel@tonic-gate 			refstr_rele(mntpt);
2763446Smrj 		};
2770Sstevel@tonic-gate 
2780Sstevel@tonic-gate 		able = "disable";
2790Sstevel@tonic-gate 		cmd = _FIOLOGDISABLE;
2800Sstevel@tonic-gate 	} else {
2810Sstevel@tonic-gate 		able = "enable";
2820Sstevel@tonic-gate 		cmd = _FIOLOGENABLE;
2830Sstevel@tonic-gate 	}
2840Sstevel@tonic-gate 
2850Sstevel@tonic-gate 	/*
2860Sstevel@tonic-gate 	 * disable or re-enable logging when the saved status is 1
2870Sstevel@tonic-gate 	 */
2880Sstevel@tonic-gate 	if (*svstat == 1) {
2890Sstevel@tonic-gate 		error = VOP_IOCTL(vp, cmd, (uintptr_t)&fl,
2900Sstevel@tonic-gate 		    FKIOCTL, CRED(), NULL);
2910Sstevel@tonic-gate 		if (error) {
2920Sstevel@tonic-gate 			mntpt = vfs_getmntpoint(vp->v_vfsp);
2933446Smrj 			prom_printf("%s: \"%s\", cant %s logging, error %d\n",
2940Sstevel@tonic-gate 			    str, refstr_value(mntpt), able, error);
2950Sstevel@tonic-gate 			refstr_rele(mntpt);
2960Sstevel@tonic-gate 		} else {
2973446Smrj 			if (cpr_debug & CPR_DEBUG5) {
2980Sstevel@tonic-gate 				mntpt = vfs_getmntpoint(vp->v_vfsp);
299*5295Srandyf 				errp("%s: \"%s\", logging is now %sd\n",
3000Sstevel@tonic-gate 				    str, refstr_value(mntpt), able);
3010Sstevel@tonic-gate 				refstr_rele(mntpt);
302*5295Srandyf 			};
3030Sstevel@tonic-gate 		}
3040Sstevel@tonic-gate 	}
3050Sstevel@tonic-gate 
3060Sstevel@tonic-gate 	/*
3070Sstevel@tonic-gate 	 * when enabling logging, reset the saved status
3080Sstevel@tonic-gate 	 * to unknown for next time
3090Sstevel@tonic-gate 	 */
3100Sstevel@tonic-gate 	if (enable)
3110Sstevel@tonic-gate 		*svstat = -1;
3120Sstevel@tonic-gate }
3130Sstevel@tonic-gate 
3140Sstevel@tonic-gate /*
3150Sstevel@tonic-gate  * enable/disable UFS logging on filesystems containing cpr_default_path
3160Sstevel@tonic-gate  * and cpr statefile.  since the statefile can be on any fs, that fs
3170Sstevel@tonic-gate  * needs to be handled separately.  this routine and cprboot expect that
3180Sstevel@tonic-gate  * CPR_CONFIG and CPR_DEFAULT both reside on the same fs, rootfs.  cprboot
3190Sstevel@tonic-gate  * is loaded from the device with rootfs and uses the same device to open
3200Sstevel@tonic-gate  * both CPR_CONFIG and CPR_DEFAULT (see common/support.c).  moving either
3210Sstevel@tonic-gate  * file outside of rootfs would cause errors during cprboot, plus cpr and
3220Sstevel@tonic-gate  * fsck problems with the new fs if logging were enabled.
3230Sstevel@tonic-gate  */
324*5295Srandyf 
3250Sstevel@tonic-gate static int
3260Sstevel@tonic-gate cpr_ufs_logging(int enable)
3270Sstevel@tonic-gate {
3280Sstevel@tonic-gate 	static int def_status = -1, sf_status = -1;
3290Sstevel@tonic-gate 	struct vfs *vfsp;
3300Sstevel@tonic-gate 	char *fname;
3310Sstevel@tonic-gate 	vnode_t *vp;
3320Sstevel@tonic-gate 	int error;
3330Sstevel@tonic-gate 
3340Sstevel@tonic-gate 	if (cpr_reusable_mode)
3350Sstevel@tonic-gate 		return (0);
3360Sstevel@tonic-gate 
3370Sstevel@tonic-gate 	if (error = cpr_open_deffile(FREAD, &vp))
3380Sstevel@tonic-gate 		return (error);
3390Sstevel@tonic-gate 	cpr_log_status(enable, &def_status, vp);
3400Sstevel@tonic-gate 	vfsp = vp->v_vfsp;
3410Sstevel@tonic-gate 	(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED());
3420Sstevel@tonic-gate 	VN_RELE(vp);
3430Sstevel@tonic-gate 
3440Sstevel@tonic-gate 	fname = cpr_build_statefile_path();
3450Sstevel@tonic-gate 	if (fname == NULL)
3460Sstevel@tonic-gate 		return (ENOENT);
3470Sstevel@tonic-gate 	if (error = vn_open(fname, UIO_SYSSPACE, FCREAT|FWRITE,
3480Sstevel@tonic-gate 	    0600, &vp, CRCREAT, 0)) {
3493446Smrj 		prom_printf("cpr_ufs_logging: cant open/create \"%s\", "
3503446Smrj 		    "error %d\n", fname, error);
3510Sstevel@tonic-gate 		return (error);
3520Sstevel@tonic-gate 	}
3530Sstevel@tonic-gate 
3540Sstevel@tonic-gate 	/*
3550Sstevel@tonic-gate 	 * check logging status for the statefile if it resides
3560Sstevel@tonic-gate 	 * on a different fs and the type is a regular file
3570Sstevel@tonic-gate 	 */
3580Sstevel@tonic-gate 	if (vp->v_vfsp != vfsp && vp->v_type == VREG)
3590Sstevel@tonic-gate 		cpr_log_status(enable, &sf_status, vp);
3600Sstevel@tonic-gate 	(void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, CRED());
3610Sstevel@tonic-gate 	VN_RELE(vp);
3620Sstevel@tonic-gate 
3630Sstevel@tonic-gate 	return (0);
3640Sstevel@tonic-gate }
365*5295Srandyf #endif
3660Sstevel@tonic-gate 
3670Sstevel@tonic-gate 
3680Sstevel@tonic-gate /*
3690Sstevel@tonic-gate  * Check if klmmod is loaded and call a lock manager service; if klmmod
3700Sstevel@tonic-gate  * is not loaded, the services aren't needed and a call would trigger a
3710Sstevel@tonic-gate  * modload, which would block since another thread would never run.
3720Sstevel@tonic-gate  */
3730Sstevel@tonic-gate static void
3740Sstevel@tonic-gate cpr_lock_mgr(void (*service)(void))
3750Sstevel@tonic-gate {
3760Sstevel@tonic-gate 	if (mod_find_by_filename(NULL, "misc/klmmod") != NULL)
3770Sstevel@tonic-gate 		(*service)();
3780Sstevel@tonic-gate }
3790Sstevel@tonic-gate 
380*5295Srandyf int
381*5295Srandyf cpr_suspend_cpus(void)
382*5295Srandyf {
383*5295Srandyf 	cpu_t *bootcpu;
384*5295Srandyf 	int	ret = 0;
385*5295Srandyf 	extern void *i_cpr_save_context(void *arg);
386*5295Srandyf 
387*5295Srandyf 	mutex_enter(&cpu_lock);
388*5295Srandyf 
389*5295Srandyf 	/*
390*5295Srandyf 	 * if bootcpu is offline bring it back online
391*5295Srandyf 	 */
392*5295Srandyf 	bootcpu = i_cpr_bootcpu();
393*5295Srandyf 
394*5295Srandyf 	/*
395*5295Srandyf 	 * the machine could not have booted without a bootcpu
396*5295Srandyf 	 */
397*5295Srandyf 	ASSERT(bootcpu != NULL);
398*5295Srandyf 
399*5295Srandyf 	/*
400*5295Srandyf 	 * bring all the offline cpus online
401*5295Srandyf 	 */
402*5295Srandyf 	if ((ret = cpr_all_online())) {
403*5295Srandyf 		mutex_exit(&cpu_lock);
404*5295Srandyf 		return (ret);
405*5295Srandyf 	}
406*5295Srandyf 
407*5295Srandyf 	/*
408*5295Srandyf 	 * Set the affinity to be the boot processor
409*5295Srandyf 	 * This is cleared in either cpr_resume_cpus() or cpr_unpause_cpus()
410*5295Srandyf 	 */
411*5295Srandyf 	affinity_set(i_cpr_bootcpuid());
412*5295Srandyf 
413*5295Srandyf 	ASSERT(CPU->cpu_id == 0);
414*5295Srandyf 
415*5295Srandyf 	PMD(PMD_SX, ("curthread running on bootcpu\n"))
416*5295Srandyf 
417*5295Srandyf 	/*
418*5295Srandyf 	 * pause all other running CPUs and save the CPU state at the sametime
419*5295Srandyf 	 */
420*5295Srandyf 	cpu_pause_func = i_cpr_save_context;
421*5295Srandyf 	pause_cpus(NULL);
422*5295Srandyf 
423*5295Srandyf 	mutex_exit(&cpu_lock);
424*5295Srandyf 
425*5295Srandyf 	return (0);
426*5295Srandyf }
427*5295Srandyf 
4280Sstevel@tonic-gate /*
4290Sstevel@tonic-gate  * Take the system down to a checkpointable state and write
4300Sstevel@tonic-gate  * the state file, the following are sequentially executed:
4310Sstevel@tonic-gate  *
4320Sstevel@tonic-gate  *    - Request all user threads to stop themselves
4330Sstevel@tonic-gate  *    - push out and invalidate user pages
4340Sstevel@tonic-gate  *    - bring statefile inode incore to prevent a miss later
4350Sstevel@tonic-gate  *    - request all daemons to stop
4360Sstevel@tonic-gate  *    - check and make sure all threads are stopped
4370Sstevel@tonic-gate  *    - sync the file system
4380Sstevel@tonic-gate  *    - suspend all devices
4390Sstevel@tonic-gate  *    - block intrpts
4400Sstevel@tonic-gate  *    - dump system state and memory to state file
441*5295Srandyf  *    - SPARC code will not be called with CPR_TORAM, caller filters
4420Sstevel@tonic-gate  */
4430Sstevel@tonic-gate static int
444*5295Srandyf cpr_suspend(int sleeptype)
4450Sstevel@tonic-gate {
446*5295Srandyf #if defined(__sparc)
447*5295Srandyf 	int sf_realloc, nverr;
448*5295Srandyf #endif
449*5295Srandyf 	int	rc = 0;
450*5295Srandyf 	int	skt_rc = 0;
4510Sstevel@tonic-gate 
452*5295Srandyf 	PMD(PMD_SX, ("cpr_suspend %x\n", sleeptype))
4530Sstevel@tonic-gate 	cpr_set_substate(C_ST_SUSPEND_BEGIN);
4540Sstevel@tonic-gate 
455*5295Srandyf 	cpr_suspend_init(sleeptype);
4560Sstevel@tonic-gate 
4570Sstevel@tonic-gate 	cpr_save_time();
4580Sstevel@tonic-gate 
4590Sstevel@tonic-gate 	cpr_tod_get(&wholecycle_tv);
4600Sstevel@tonic-gate 	CPR_STAT_EVENT_START("Suspend Total");
4610Sstevel@tonic-gate 
462*5295Srandyf 	i_cpr_alloc_cpus();
463*5295Srandyf 
464*5295Srandyf #if defined(__sparc)
465*5295Srandyf 	ASSERT(sleeptype == CPR_TODISK);
4660Sstevel@tonic-gate 	if (!cpr_reusable_mode) {
4670Sstevel@tonic-gate 		/*
468*5295Srandyf 		 * We need to validate default file before fs
469*5295Srandyf 		 * functionality is disabled.
4700Sstevel@tonic-gate 		 */
4710Sstevel@tonic-gate 		if (rc = cpr_validate_definfo(0))
4720Sstevel@tonic-gate 			return (rc);
4730Sstevel@tonic-gate 	}
474*5295Srandyf 	i_cpr_save_machdep_info();
475*5295Srandyf #endif
4760Sstevel@tonic-gate 
477*5295Srandyf 	PMD(PMD_SX, ("cpr_suspend: stop scans\n"))
4780Sstevel@tonic-gate 	/* Stop PM scans ASAP */
4790Sstevel@tonic-gate 	(void) callb_execute_class(CB_CL_CPR_PM, CB_CODE_CPR_CHKPT);
4800Sstevel@tonic-gate 
4810Sstevel@tonic-gate 	pm_dispatch_to_dep_thread(PM_DEP_WK_CPR_SUSPEND,
4820Sstevel@tonic-gate 	    NULL, NULL, PM_DEP_WAIT, NULL, 0);
4830Sstevel@tonic-gate 
484*5295Srandyf #if defined(__sparc)
485*5295Srandyf 	ASSERT(sleeptype == CPR_TODISK);
4860Sstevel@tonic-gate 	cpr_set_substate(C_ST_MP_OFFLINE);
4870Sstevel@tonic-gate 	if (rc = cpr_mp_offline())
4880Sstevel@tonic-gate 		return (rc);
489*5295Srandyf #endif
490*5295Srandyf 	/*
491*5295Srandyf 	 * Ask Xorg to suspend the frame buffer, and wait for it to happen
492*5295Srandyf 	 */
493*5295Srandyf 	mutex_enter(&srn_clone_lock);
494*5295Srandyf 	if (srn_signal) {
495*5295Srandyf 		PMD(PMD_SX, ("cpr_suspend: (*srn_signal)(..., "
496*5295Srandyf 		    "SRN_SUSPEND_REQ)\n"))
497*5295Srandyf 		srn_inuse = 1;	/* because *(srn_signal) cv_waits */
498*5295Srandyf 		(*srn_signal)(SRN_TYPE_APM, SRN_SUSPEND_REQ);
499*5295Srandyf 		srn_inuse = 0;
500*5295Srandyf 	} else {
501*5295Srandyf 		PMD(PMD_SX, ("cpr_suspend: srn_signal NULL\n"))
502*5295Srandyf 	}
503*5295Srandyf 	mutex_exit(&srn_clone_lock);
5040Sstevel@tonic-gate 
5050Sstevel@tonic-gate 	/*
5060Sstevel@tonic-gate 	 * Ask the user threads to stop by themselves, but
5070Sstevel@tonic-gate 	 * if they don't or can't after 3 retries, we give up on CPR.
5080Sstevel@tonic-gate 	 * The 3 retry is not a random number because 2 is possible if
5090Sstevel@tonic-gate 	 * a thread has been forked before the parent thread is stopped.
5100Sstevel@tonic-gate 	 */
5113446Smrj 	CPR_DEBUG(CPR_DEBUG1, "\nstopping user threads...");
5120Sstevel@tonic-gate 	CPR_STAT_EVENT_START("  stop users");
5130Sstevel@tonic-gate 	cpr_set_substate(C_ST_STOP_USER_THREADS);
514*5295Srandyf 	PMD(PMD_SX, ("cpr_suspend: stop user threads\n"))
5150Sstevel@tonic-gate 	if (rc = cpr_stop_user_threads())
5160Sstevel@tonic-gate 		return (rc);
5170Sstevel@tonic-gate 	CPR_STAT_EVENT_END("  stop users");
5183446Smrj 	CPR_DEBUG(CPR_DEBUG1, "done\n");
5190Sstevel@tonic-gate 
520*5295Srandyf 	PMD(PMD_SX, ("cpr_suspend: save direct levels\n"))
5210Sstevel@tonic-gate 	pm_save_direct_levels();
5220Sstevel@tonic-gate 
5230Sstevel@tonic-gate 	/*
5240Sstevel@tonic-gate 	 * User threads are stopped.  We will start communicating with the
5250Sstevel@tonic-gate 	 * user via prom_printf (some debug output may have already happened)
5260Sstevel@tonic-gate 	 * so let anybody who cares know about this (bug 4096122)
5270Sstevel@tonic-gate 	 */
5280Sstevel@tonic-gate 	(void) callb_execute_class(CB_CL_CPR_PROMPRINTF, CB_CODE_CPR_CHKPT);
5290Sstevel@tonic-gate 
530*5295Srandyf 	PMD(PMD_SX, ("cpr_suspend: send notice\n"))
531*5295Srandyf #ifndef DEBUG
5320Sstevel@tonic-gate 	cpr_send_notice();
5330Sstevel@tonic-gate 	if (cpr_debug)
5343446Smrj 		prom_printf("\n");
535*5295Srandyf #endif
5360Sstevel@tonic-gate 
537*5295Srandyf 	PMD(PMD_SX, ("cpr_suspend: POST USER callback\n"))
5380Sstevel@tonic-gate 	(void) callb_execute_class(CB_CL_CPR_POST_USER, CB_CODE_CPR_CHKPT);
5390Sstevel@tonic-gate 
5400Sstevel@tonic-gate 	/*
5410Sstevel@tonic-gate 	 * Reattach any drivers which originally exported the
5420Sstevel@tonic-gate 	 * no-involuntary-power-cycles property.  We need to do this before
5430Sstevel@tonic-gate 	 * stopping kernel threads because modload is implemented using
5440Sstevel@tonic-gate 	 * a kernel thread.
5450Sstevel@tonic-gate 	 */
5460Sstevel@tonic-gate 	cpr_set_substate(C_ST_PM_REATTACH_NOINVOL);
547*5295Srandyf 	PMD(PMD_SX, ("cpr_suspend: reattach noinvol\n"))
5480Sstevel@tonic-gate 	if (!pm_reattach_noinvol())
5490Sstevel@tonic-gate 		return (ENXIO);
5500Sstevel@tonic-gate 
551*5295Srandyf #if defined(__sparc)
552*5295Srandyf 	ASSERT(sleeptype == CPR_TODISK);
5530Sstevel@tonic-gate 	/*
5540Sstevel@tonic-gate 	 * if ufs logging is enabled, we need to disable before
5550Sstevel@tonic-gate 	 * stopping kernel threads so that ufs delete and roll
5560Sstevel@tonic-gate 	 * threads can do the work.
5570Sstevel@tonic-gate 	 */
5580Sstevel@tonic-gate 	cpr_set_substate(C_ST_DISABLE_UFS_LOGGING);
5590Sstevel@tonic-gate 	if (rc = cpr_ufs_logging(0))
5600Sstevel@tonic-gate 		return (rc);
5610Sstevel@tonic-gate 
5620Sstevel@tonic-gate 	/*
5630Sstevel@tonic-gate 	 * Use sync_all to swap out all user pages and find out how much
5640Sstevel@tonic-gate 	 * extra space needed for user pages that don't have back store
5650Sstevel@tonic-gate 	 * space left.
5660Sstevel@tonic-gate 	 */
5670Sstevel@tonic-gate 	CPR_STAT_EVENT_START("  swapout upages");
5680Sstevel@tonic-gate 	vfs_sync(SYNC_ALL);
5690Sstevel@tonic-gate 	CPR_STAT_EVENT_END("  swapout upages");
5700Sstevel@tonic-gate 
5710Sstevel@tonic-gate 	cpr_set_bitmap_size();
5720Sstevel@tonic-gate 
5730Sstevel@tonic-gate alloc_statefile:
5740Sstevel@tonic-gate 	/*
575*5295Srandyf 	 * If our last state was C_ST_DUMP_NOSPC, we're trying to
576*5295Srandyf 	 * realloc the statefile, otherwise this is the first attempt.
5770Sstevel@tonic-gate 	 */
5780Sstevel@tonic-gate 	sf_realloc = (CPR->c_substate == C_ST_DUMP_NOSPC) ? 1 : 0;
5790Sstevel@tonic-gate 
5800Sstevel@tonic-gate 	CPR_STAT_EVENT_START("  alloc statefile");
5810Sstevel@tonic-gate 	cpr_set_substate(C_ST_STATEF_ALLOC);
5820Sstevel@tonic-gate 	if (rc = cpr_alloc_statefile(sf_realloc)) {
5830Sstevel@tonic-gate 		if (sf_realloc)
584*5295Srandyf 			errp("realloc failed\n");
5850Sstevel@tonic-gate 		return (rc);
5860Sstevel@tonic-gate 	}
5870Sstevel@tonic-gate 	CPR_STAT_EVENT_END("  alloc statefile");
5880Sstevel@tonic-gate 
5890Sstevel@tonic-gate 	/*
5900Sstevel@tonic-gate 	 * Sync the filesystem to preserve its integrity.
5910Sstevel@tonic-gate 	 *
592*5295Srandyf 	 * This sync is also used to flush out all B_DELWRI buffers
593*5295Srandyf 	 * (fs cache) which are mapped and neither dirty nor referenced
594*5295Srandyf 	 * before cpr_invalidate_pages destroys them.
595*5295Srandyf 	 * fsflush does similar thing.
5960Sstevel@tonic-gate 	 */
5970Sstevel@tonic-gate 	sync();
5980Sstevel@tonic-gate 
5990Sstevel@tonic-gate 	/*
6000Sstevel@tonic-gate 	 * destroy all clean file mapped kernel pages
6010Sstevel@tonic-gate 	 */
6020Sstevel@tonic-gate 	CPR_STAT_EVENT_START("  clean pages");
603*5295Srandyf 	CPR_DEBUG(CPR_DEBUG1, ("cleaning up mapped pages..."));
6040Sstevel@tonic-gate 	(void) callb_execute_class(CB_CL_CPR_VM, CB_CODE_CPR_CHKPT);
605*5295Srandyf 	CPR_DEBUG(CPR_DEBUG1, ("done\n"));
6060Sstevel@tonic-gate 	CPR_STAT_EVENT_END("  clean pages");
607*5295Srandyf #endif
6080Sstevel@tonic-gate 
6090Sstevel@tonic-gate 
6100Sstevel@tonic-gate 	/*
6110Sstevel@tonic-gate 	 * Hooks needed by lock manager prior to suspending.
6120Sstevel@tonic-gate 	 * Refer to code for more comments.
6130Sstevel@tonic-gate 	 */
614*5295Srandyf 	PMD(PMD_SX, ("cpr_suspend: lock mgr\n"))
6150Sstevel@tonic-gate 	cpr_lock_mgr(lm_cprsuspend);
6160Sstevel@tonic-gate 
6170Sstevel@tonic-gate 	/*
6180Sstevel@tonic-gate 	 * Now suspend all the devices
6190Sstevel@tonic-gate 	 */
6200Sstevel@tonic-gate 	CPR_STAT_EVENT_START("  stop drivers");
6213446Smrj 	CPR_DEBUG(CPR_DEBUG1, "suspending drivers...");
6220Sstevel@tonic-gate 	cpr_set_substate(C_ST_SUSPEND_DEVICES);
6230Sstevel@tonic-gate 	pm_powering_down = 1;
624*5295Srandyf 	PMD(PMD_SX, ("cpr_suspend: suspending devices\n"))
6250Sstevel@tonic-gate 	rc = cpr_suspend_devices(ddi_root_node());
6260Sstevel@tonic-gate 	pm_powering_down = 0;
6270Sstevel@tonic-gate 	if (rc)
6280Sstevel@tonic-gate 		return (rc);
6293446Smrj 	CPR_DEBUG(CPR_DEBUG1, "done\n");
6300Sstevel@tonic-gate 	CPR_STAT_EVENT_END("  stop drivers");
6310Sstevel@tonic-gate 
6320Sstevel@tonic-gate 	/*
6330Sstevel@tonic-gate 	 * Stop all daemon activities
6340Sstevel@tonic-gate 	 */
6350Sstevel@tonic-gate 	cpr_set_substate(C_ST_STOP_KERNEL_THREADS);
636*5295Srandyf 	PMD(PMD_SX, ("cpr_suspend: stopping kernel threads\n"))
6370Sstevel@tonic-gate 	if (skt_rc = cpr_stop_kernel_threads())
6380Sstevel@tonic-gate 		return (skt_rc);
6390Sstevel@tonic-gate 
640*5295Srandyf 	PMD(PMD_SX, ("cpr_suspend: POST KERNEL callback\n"))
6410Sstevel@tonic-gate 	(void) callb_execute_class(CB_CL_CPR_POST_KERNEL, CB_CODE_CPR_CHKPT);
6420Sstevel@tonic-gate 
643*5295Srandyf 	PMD(PMD_SX, ("cpr_suspend: reattach noinvol fini\n"))
6440Sstevel@tonic-gate 	pm_reattach_noinvol_fini();
6450Sstevel@tonic-gate 
6460Sstevel@tonic-gate 	cpr_sae(1);
6470Sstevel@tonic-gate 
648*5295Srandyf 	PMD(PMD_SX, ("cpr_suspend: CPR CALLOUT callback\n"))
6490Sstevel@tonic-gate 	(void) callb_execute_class(CB_CL_CPR_CALLOUT, CB_CODE_CPR_CHKPT);
6500Sstevel@tonic-gate 
651*5295Srandyf 	if (sleeptype == CPR_TODISK) {
652*5295Srandyf 		/*
653*5295Srandyf 		 * It's safer to do tod_get before we disable all intr.
654*5295Srandyf 		 */
655*5295Srandyf 		CPR_STAT_EVENT_START("  write statefile");
656*5295Srandyf 	}
6570Sstevel@tonic-gate 
6580Sstevel@tonic-gate 	/*
6590Sstevel@tonic-gate 	 * it's time to ignore the outside world, stop the real time
6600Sstevel@tonic-gate 	 * clock and disable any further intrpt activity.
6610Sstevel@tonic-gate 	 */
662*5295Srandyf 	PMD(PMD_SX, ("cpr_suspend: handle xc\n"))
6630Sstevel@tonic-gate 	i_cpr_handle_xc(1);	/* turn it on to disable xc assertion */
6640Sstevel@tonic-gate 
6650Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
666*5295Srandyf 	PMD(PMD_SX, ("cpr_suspend: cyclic suspend\n"))
6670Sstevel@tonic-gate 	cyclic_suspend();
6680Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
6690Sstevel@tonic-gate 
670*5295Srandyf 	/*
671*5295Srandyf 	 * Due to the different methods of resuming the system between
672*5295Srandyf 	 * CPR_TODISK (boot cprboot on SPARC, which reloads kernel image)
673*5295Srandyf 	 * and CPR_TORAM (restart via reset into existing kernel image)
674*5295Srandyf 	 * cpus are not suspended and restored in the SPARC case, since it
675*5295Srandyf 	 * is necessary to restart the cpus and pause them before restoring
676*5295Srandyf 	 * the OBP image
677*5295Srandyf 	 */
678*5295Srandyf 
679*5295Srandyf #if defined(__x86)
6800Sstevel@tonic-gate 
681*5295Srandyf 	/* pause aux cpus */
682*5295Srandyf 	PMD(PMD_SX, ("pause aux cpus\n"))
683*5295Srandyf 
684*5295Srandyf 	cpr_set_substate(C_ST_MP_PAUSED);
685*5295Srandyf 
686*5295Srandyf 	if ((rc = cpr_suspend_cpus()) != 0)
687*5295Srandyf 		return (rc);
688*5295Srandyf #endif
689*5295Srandyf 
690*5295Srandyf 	PMD(PMD_SX, ("cpr_suspend: stop intr\n"))
6910Sstevel@tonic-gate 	i_cpr_stop_intr();
6923446Smrj 	CPR_DEBUG(CPR_DEBUG1, "interrupt is stopped\n");
6930Sstevel@tonic-gate 
6940Sstevel@tonic-gate 	/*
6950Sstevel@tonic-gate 	 * Since we will now disable the mechanism that causes prom_printfs
6960Sstevel@tonic-gate 	 * to power up (if needed) the console fb/monitor, we assert that
6970Sstevel@tonic-gate 	 * it must be up now.
6980Sstevel@tonic-gate 	 */
6990Sstevel@tonic-gate 	ASSERT(pm_cfb_is_up());
700*5295Srandyf 	PMD(PMD_SX, ("cpr_suspend: prom suspend prepost\n"))
7010Sstevel@tonic-gate 	prom_suspend_prepost();
7020Sstevel@tonic-gate 
703*5295Srandyf #if defined(__sparc)
7040Sstevel@tonic-gate 	/*
7050Sstevel@tonic-gate 	 * getting ready to write ourself out, flush the register
7060Sstevel@tonic-gate 	 * windows to make sure that our stack is good when we
7070Sstevel@tonic-gate 	 * come back on the resume side.
7080Sstevel@tonic-gate 	 */
7090Sstevel@tonic-gate 	flush_windows();
710*5295Srandyf #endif
7110Sstevel@tonic-gate 
7120Sstevel@tonic-gate 	/*
713*5295Srandyf 	 * For S3, we're done
714*5295Srandyf 	 */
715*5295Srandyf 	if (sleeptype == CPR_TORAM) {
716*5295Srandyf 		PMD(PMD_SX, ("cpr_suspend rets %x\n", rc))
717*5295Srandyf 		cpr_set_substate(C_ST_NODUMP);
718*5295Srandyf 		return (rc);
719*5295Srandyf 	}
720*5295Srandyf #if defined(__sparc)
721*5295Srandyf 	/*
7220Sstevel@tonic-gate 	 * FATAL: NO MORE MEMORY ALLOCATION ALLOWED AFTER THIS POINT!!!
7230Sstevel@tonic-gate 	 *
7240Sstevel@tonic-gate 	 * The system is quiesced at this point, we are ready to either dump
7250Sstevel@tonic-gate 	 * to the state file for a extended sleep or a simple shutdown for
7260Sstevel@tonic-gate 	 * systems with non-volatile memory.
7270Sstevel@tonic-gate 	 */
7280Sstevel@tonic-gate 
7290Sstevel@tonic-gate 	/*
7300Sstevel@tonic-gate 	 * special handling for reusable:
7310Sstevel@tonic-gate 	 */
7320Sstevel@tonic-gate 	if (cpr_reusable_mode) {
7330Sstevel@tonic-gate 		cpr_set_substate(C_ST_SETPROPS_1);
7340Sstevel@tonic-gate 		if (nverr = cpr_set_properties(1))
7350Sstevel@tonic-gate 			return (nverr);
7360Sstevel@tonic-gate 	}
7370Sstevel@tonic-gate 
7380Sstevel@tonic-gate 	cpr_set_substate(C_ST_DUMP);
7390Sstevel@tonic-gate 	rc = cpr_dump(C_VP);
7400Sstevel@tonic-gate 
7410Sstevel@tonic-gate 	/*
7420Sstevel@tonic-gate 	 * if any error occured during dump, more
7430Sstevel@tonic-gate 	 * special handling for reusable:
7440Sstevel@tonic-gate 	 */
7450Sstevel@tonic-gate 	if (rc && cpr_reusable_mode) {
7460Sstevel@tonic-gate 		cpr_set_substate(C_ST_SETPROPS_0);
7470Sstevel@tonic-gate 		if (nverr = cpr_set_properties(0))
7480Sstevel@tonic-gate 			return (nverr);
7490Sstevel@tonic-gate 	}
7500Sstevel@tonic-gate 
7510Sstevel@tonic-gate 	if (rc == ENOSPC) {
7520Sstevel@tonic-gate 		cpr_set_substate(C_ST_DUMP_NOSPC);
753*5295Srandyf 		(void) cpr_resume(sleeptype);
7540Sstevel@tonic-gate 		goto alloc_statefile;
7550Sstevel@tonic-gate 	} else if (rc == 0) {
7560Sstevel@tonic-gate 		if (cpr_reusable_mode) {
7570Sstevel@tonic-gate 			cpr_set_substate(C_ST_REUSABLE);
7580Sstevel@tonic-gate 			longjmp(&ttolwp(curthread)->lwp_qsav);
7590Sstevel@tonic-gate 		} else
7600Sstevel@tonic-gate 			rc = cpr_set_properties(1);
7610Sstevel@tonic-gate 	}
762*5295Srandyf #endif
763*5295Srandyf 	PMD(PMD_SX, ("cpr_suspend: return %d\n", rc))
7640Sstevel@tonic-gate 	return (rc);
7650Sstevel@tonic-gate }
7660Sstevel@tonic-gate 
767*5295Srandyf void
768*5295Srandyf cpr_resume_cpus(void)
769*5295Srandyf {
770*5295Srandyf 	/*
771*5295Srandyf 	 * this is a cut down version of start_other_cpus()
772*5295Srandyf 	 * just do the initialization to wake the other cpus
773*5295Srandyf 	 */
774*5295Srandyf 
775*5295Srandyf #if defined(__x86)
776*5295Srandyf 	/*
777*5295Srandyf 	 * Initialize our syscall handlers
778*5295Srandyf 	 */
779*5295Srandyf 	init_cpu_syscall(CPU);
780*5295Srandyf 
781*5295Srandyf #endif
782*5295Srandyf 
783*5295Srandyf 	i_cpr_pre_resume_cpus();
784*5295Srandyf 
785*5295Srandyf 	/*
786*5295Srandyf 	 * Restart the paused cpus
787*5295Srandyf 	 */
788*5295Srandyf 	mutex_enter(&cpu_lock);
789*5295Srandyf 	start_cpus();
790*5295Srandyf 	mutex_exit(&cpu_lock);
791*5295Srandyf 
792*5295Srandyf 	/*
793*5295Srandyf 	 * clear the affinity set in cpr_suspend_cpus()
794*5295Srandyf 	 */
795*5295Srandyf 	affinity_clear();
796*5295Srandyf 
797*5295Srandyf 	i_cpr_post_resume_cpus();
798*5295Srandyf 
799*5295Srandyf 	mutex_enter(&cpu_lock);
800*5295Srandyf 	/*
801*5295Srandyf 	 * Restore this cpu to use the regular cpu_pause(), so that
802*5295Srandyf 	 * online and offline will work correctly
803*5295Srandyf 	 */
804*5295Srandyf 	cpu_pause_func = NULL;
805*5295Srandyf 
806*5295Srandyf 	/*
807*5295Srandyf 	 * offline all the cpus that were brought online during suspend
808*5295Srandyf 	 */
809*5295Srandyf 	cpr_restore_offline();
810*5295Srandyf 
811*5295Srandyf 	/*
812*5295Srandyf 	 * clear the affinity set in cpr_suspend_cpus()
813*5295Srandyf 	 */
814*5295Srandyf 	affinity_clear();
815*5295Srandyf 
816*5295Srandyf 	mutex_exit(&cpu_lock);
817*5295Srandyf }
818*5295Srandyf 
819*5295Srandyf void
820*5295Srandyf cpr_unpause_cpus(void)
821*5295Srandyf {
822*5295Srandyf 	/*
823*5295Srandyf 	 * Now restore the system back to what it was before we suspended
824*5295Srandyf 	 */
825*5295Srandyf 
826*5295Srandyf 	PMD(PMD_SX, ("cpr_unpause_cpus: restoring system\n"))
827*5295Srandyf 
828*5295Srandyf 	mutex_enter(&cpu_lock);
829*5295Srandyf 
830*5295Srandyf 	/*
831*5295Srandyf 	 * Restore this cpu to use the regular cpu_pause(), so that
832*5295Srandyf 	 * online and offline will work correctly
833*5295Srandyf 	 */
834*5295Srandyf 	cpu_pause_func = NULL;
835*5295Srandyf 
836*5295Srandyf 	/*
837*5295Srandyf 	 * Restart the paused cpus
838*5295Srandyf 	 */
839*5295Srandyf 	start_cpus();
840*5295Srandyf 
841*5295Srandyf 	/*
842*5295Srandyf 	 * offline all the cpus that were brought online during suspend
843*5295Srandyf 	 */
844*5295Srandyf 	cpr_restore_offline();
845*5295Srandyf 
846*5295Srandyf 	/*
847*5295Srandyf 	 * clear the affinity set in cpr_suspend_cpus()
848*5295Srandyf 	 */
849*5295Srandyf 	affinity_clear();
850*5295Srandyf 
851*5295Srandyf 	mutex_exit(&cpu_lock);
852*5295Srandyf }
8530Sstevel@tonic-gate 
8540Sstevel@tonic-gate /*
8550Sstevel@tonic-gate  * Bring the system back up from a checkpoint, at this point
8560Sstevel@tonic-gate  * the VM has been minimally restored by boot, the following
8570Sstevel@tonic-gate  * are executed sequentially:
8580Sstevel@tonic-gate  *
8590Sstevel@tonic-gate  *    - machdep setup and enable interrupts (mp startup if it's mp)
8600Sstevel@tonic-gate  *    - resume all devices
8610Sstevel@tonic-gate  *    - restart daemons
8620Sstevel@tonic-gate  *    - put all threads back on run queue
8630Sstevel@tonic-gate  */
8640Sstevel@tonic-gate static int
865*5295Srandyf cpr_resume(int sleeptype)
8660Sstevel@tonic-gate {
8670Sstevel@tonic-gate 	cpr_time_t pwron_tv, *ctp;
8680Sstevel@tonic-gate 	char *str;
8690Sstevel@tonic-gate 	int rc = 0;
8700Sstevel@tonic-gate 
8710Sstevel@tonic-gate 	/*
8720Sstevel@tonic-gate 	 * The following switch is used to resume the system
8730Sstevel@tonic-gate 	 * that was suspended to a different level.
8740Sstevel@tonic-gate 	 */
8753446Smrj 	CPR_DEBUG(CPR_DEBUG1, "\nEntering cpr_resume...\n");
876*5295Srandyf 	PMD(PMD_SX, ("cpr_resume %x\n", sleeptype))
8770Sstevel@tonic-gate 
8780Sstevel@tonic-gate 	/*
8790Sstevel@tonic-gate 	 * Note:
8800Sstevel@tonic-gate 	 *
8810Sstevel@tonic-gate 	 * The rollback labels rb_xyz do not represent the cpr resume
8820Sstevel@tonic-gate 	 * state when event 'xyz' has happened. Instead they represent
8830Sstevel@tonic-gate 	 * the state during cpr suspend when event 'xyz' was being
8840Sstevel@tonic-gate 	 * entered (and where cpr suspend failed). The actual call that
8850Sstevel@tonic-gate 	 * failed may also need to be partially rolled back, since they
8860Sstevel@tonic-gate 	 * aren't atomic in most cases.  In other words, rb_xyz means
8870Sstevel@tonic-gate 	 * "roll back all cpr suspend events that happened before 'xyz',
8880Sstevel@tonic-gate 	 * and the one that caused the failure, if necessary."
8890Sstevel@tonic-gate 	 */
8900Sstevel@tonic-gate 	switch (CPR->c_substate) {
891*5295Srandyf #if defined(__sparc)
8920Sstevel@tonic-gate 	case C_ST_DUMP:
8930Sstevel@tonic-gate 		/*
8940Sstevel@tonic-gate 		 * This is most likely a full-fledged cpr_resume after
8950Sstevel@tonic-gate 		 * a complete and successful cpr suspend. Just roll back
8960Sstevel@tonic-gate 		 * everything.
8970Sstevel@tonic-gate 		 */
898*5295Srandyf 		ASSERT(sleeptype == CPR_TODISK);
8990Sstevel@tonic-gate 		break;
9000Sstevel@tonic-gate 
9010Sstevel@tonic-gate 	case C_ST_REUSABLE:
9020Sstevel@tonic-gate 	case C_ST_DUMP_NOSPC:
9030Sstevel@tonic-gate 	case C_ST_SETPROPS_0:
9040Sstevel@tonic-gate 	case C_ST_SETPROPS_1:
9050Sstevel@tonic-gate 		/*
9060Sstevel@tonic-gate 		 * C_ST_REUSABLE and C_ST_DUMP_NOSPC are the only two
9070Sstevel@tonic-gate 		 * special switch cases here. The other two do not have
9080Sstevel@tonic-gate 		 * any state change during cpr_suspend() that needs to
9090Sstevel@tonic-gate 		 * be rolled back. But these are exit points from
9100Sstevel@tonic-gate 		 * cpr_suspend, so theoretically (or in the future), it
9110Sstevel@tonic-gate 		 * is possible that a need for roll back of a state
9120Sstevel@tonic-gate 		 * change arises between these exit points.
9130Sstevel@tonic-gate 		 */
914*5295Srandyf 		ASSERT(sleeptype == CPR_TODISK);
9150Sstevel@tonic-gate 		goto rb_dump;
916*5295Srandyf #endif
917*5295Srandyf 
918*5295Srandyf 	case C_ST_NODUMP:
919*5295Srandyf 		PMD(PMD_SX, ("cpr_resume: NODUMP\n"))
920*5295Srandyf 		goto rb_nodump;
9210Sstevel@tonic-gate 
9220Sstevel@tonic-gate 	case C_ST_STOP_KERNEL_THREADS:
923*5295Srandyf 		PMD(PMD_SX, ("cpr_resume: STOP_KERNEL_THREADS\n"))
9240Sstevel@tonic-gate 		goto rb_stop_kernel_threads;
9250Sstevel@tonic-gate 
9260Sstevel@tonic-gate 	case C_ST_SUSPEND_DEVICES:
927*5295Srandyf 		PMD(PMD_SX, ("cpr_resume: SUSPEND_DEVICES\n"))
9280Sstevel@tonic-gate 		goto rb_suspend_devices;
9290Sstevel@tonic-gate 
930*5295Srandyf #if defined(__sparc)
9310Sstevel@tonic-gate 	case C_ST_STATEF_ALLOC:
932*5295Srandyf 		ASSERT(sleeptype == CPR_TODISK);
9330Sstevel@tonic-gate 		goto rb_statef_alloc;
9340Sstevel@tonic-gate 
9350Sstevel@tonic-gate 	case C_ST_DISABLE_UFS_LOGGING:
936*5295Srandyf 		ASSERT(sleeptype == CPR_TODISK);
9370Sstevel@tonic-gate 		goto rb_disable_ufs_logging;
938*5295Srandyf #endif
9390Sstevel@tonic-gate 
9400Sstevel@tonic-gate 	case C_ST_PM_REATTACH_NOINVOL:
941*5295Srandyf 		PMD(PMD_SX, ("cpr_resume: REATTACH_NOINVOL\n"))
9420Sstevel@tonic-gate 		goto rb_pm_reattach_noinvol;
9430Sstevel@tonic-gate 
9440Sstevel@tonic-gate 	case C_ST_STOP_USER_THREADS:
945*5295Srandyf 		PMD(PMD_SX, ("cpr_resume: STOP_USER_THREADS\n"))
9460Sstevel@tonic-gate 		goto rb_stop_user_threads;
9470Sstevel@tonic-gate 
948*5295Srandyf #if defined(__sparc)
9490Sstevel@tonic-gate 	case C_ST_MP_OFFLINE:
950*5295Srandyf 		PMD(PMD_SX, ("cpr_resume: MP_OFFLINE\n"))
9510Sstevel@tonic-gate 		goto rb_mp_offline;
952*5295Srandyf #endif
953*5295Srandyf 
954*5295Srandyf #if defined(__x86)
955*5295Srandyf 	case C_ST_MP_PAUSED:
956*5295Srandyf 		PMD(PMD_SX, ("cpr_resume: MP_PAUSED\n"))
957*5295Srandyf 		goto rb_mp_paused;
958*5295Srandyf #endif
959*5295Srandyf 
9600Sstevel@tonic-gate 
9610Sstevel@tonic-gate 	default:
962*5295Srandyf 		PMD(PMD_SX, ("cpr_resume: others\n"))
9630Sstevel@tonic-gate 		goto rb_others;
9640Sstevel@tonic-gate 	}
9650Sstevel@tonic-gate 
9660Sstevel@tonic-gate rb_all:
9670Sstevel@tonic-gate 	/*
9680Sstevel@tonic-gate 	 * perform platform-dependent initialization
9690Sstevel@tonic-gate 	 */
9700Sstevel@tonic-gate 	if (cpr_suspend_succeeded)
9710Sstevel@tonic-gate 		i_cpr_machdep_setup();
9720Sstevel@tonic-gate 
9730Sstevel@tonic-gate 	/*
9740Sstevel@tonic-gate 	 * system did not really go down if we jump here
9750Sstevel@tonic-gate 	 */
9760Sstevel@tonic-gate rb_dump:
9770Sstevel@tonic-gate 	/*
9780Sstevel@tonic-gate 	 * IMPORTANT:  SENSITIVE RESUME SEQUENCE
9790Sstevel@tonic-gate 	 *
9800Sstevel@tonic-gate 	 * DO NOT ADD ANY INITIALIZATION STEP BEFORE THIS POINT!!
9810Sstevel@tonic-gate 	 */
982*5295Srandyf rb_nodump:
983*5295Srandyf 	/*
984*5295Srandyf 	 * If we did suspend to RAM, we didn't generate a dump
985*5295Srandyf 	 */
986*5295Srandyf 	PMD(PMD_SX, ("cpr_resume: CPR DMA callback\n"))
9870Sstevel@tonic-gate 	(void) callb_execute_class(CB_CL_CPR_DMA, CB_CODE_CPR_RESUME);
988*5295Srandyf 	if (cpr_suspend_succeeded) {
989*5295Srandyf 		PMD(PMD_SX, ("cpr_resume: CPR RPC callback\n"))
9900Sstevel@tonic-gate 		(void) callb_execute_class(CB_CL_CPR_RPC, CB_CODE_CPR_RESUME);
991*5295Srandyf 	}
9920Sstevel@tonic-gate 
9930Sstevel@tonic-gate 	prom_resume_prepost();
994*5295Srandyf #if !defined(__sparc)
995*5295Srandyf 	/*
996*5295Srandyf 	 * Need to sync the software clock with the hardware clock.
997*5295Srandyf 	 * On Sparc, this occurs in the sparc-specific cbe.  However
998*5295Srandyf 	 * on x86 this needs to be handled _before_ we bring other cpu's
999*5295Srandyf 	 * back online.  So we call a resume function in timestamp.c
1000*5295Srandyf 	 */
1001*5295Srandyf 	if (tsc_resume_in_cyclic == 0)
1002*5295Srandyf 		tsc_resume();
10030Sstevel@tonic-gate 
1004*5295Srandyf #endif
1005*5295Srandyf 
1006*5295Srandyf #if defined(__sparc)
10070Sstevel@tonic-gate 	if (cpr_suspend_succeeded && (boothowto & RB_DEBUG))
10080Sstevel@tonic-gate 		kdi_dvec_cpr_restart();
1009*5295Srandyf #endif
1010*5295Srandyf 
1011*5295Srandyf 
1012*5295Srandyf #if defined(__x86)
1013*5295Srandyf rb_mp_paused:
1014*5295Srandyf 	PT(PT_RMPO);
1015*5295Srandyf 	PMD(PMD_SX, ("resume aux cpus\n"))
1016*5295Srandyf 
1017*5295Srandyf 	if (cpr_suspend_succeeded) {
1018*5295Srandyf 		cpr_resume_cpus();
1019*5295Srandyf 	} else {
1020*5295Srandyf 		cpr_unpause_cpus();
1021*5295Srandyf 	}
1022*5295Srandyf #endif
10230Sstevel@tonic-gate 
10240Sstevel@tonic-gate 	/*
10250Sstevel@tonic-gate 	 * let the tmp callout catch up.
10260Sstevel@tonic-gate 	 */
1027*5295Srandyf 	PMD(PMD_SX, ("cpr_resume: CPR CALLOUT callback\n"))
10280Sstevel@tonic-gate 	(void) callb_execute_class(CB_CL_CPR_CALLOUT, CB_CODE_CPR_RESUME);
10290Sstevel@tonic-gate 
10300Sstevel@tonic-gate 	i_cpr_enable_intr();
10310Sstevel@tonic-gate 
10320Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
1033*5295Srandyf 	PMD(PMD_SX, ("cpr_resume: cyclic resume\n"))
10340Sstevel@tonic-gate 	cyclic_resume();
10350Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
10360Sstevel@tonic-gate 
1037*5295Srandyf 	PMD(PMD_SX, ("cpr_resume: handle xc\n"))
10380Sstevel@tonic-gate 	i_cpr_handle_xc(0);	/* turn it off to allow xc assertion */
10390Sstevel@tonic-gate 
1040*5295Srandyf 	PMD(PMD_SX, ("cpr_resume: CPR POST KERNEL callback\n"))
10410Sstevel@tonic-gate 	(void) callb_execute_class(CB_CL_CPR_POST_KERNEL, CB_CODE_CPR_RESUME);
10420Sstevel@tonic-gate 
10430Sstevel@tonic-gate 	/*
10440Sstevel@tonic-gate 	 * statistics gathering
10450Sstevel@tonic-gate 	 */
10460Sstevel@tonic-gate 	if (cpr_suspend_succeeded) {
10470Sstevel@tonic-gate 		/*
10480Sstevel@tonic-gate 		 * Prevent false alarm in tod_validate() due to tod
10490Sstevel@tonic-gate 		 * value change between suspend and resume
10500Sstevel@tonic-gate 		 */
10510Sstevel@tonic-gate 		cpr_tod_fault_reset();
10520Sstevel@tonic-gate 
10530Sstevel@tonic-gate 		cpr_convert_promtime(&pwron_tv);
10540Sstevel@tonic-gate 
10550Sstevel@tonic-gate 		ctp = &cpr_term.tm_shutdown;
1056*5295Srandyf 		if (sleeptype == CPR_TODISK)
1057*5295Srandyf 			CPR_STAT_EVENT_END_TMZ("  write statefile", ctp);
10580Sstevel@tonic-gate 		CPR_STAT_EVENT_END_TMZ("Suspend Total", ctp);
10590Sstevel@tonic-gate 
10600Sstevel@tonic-gate 		CPR_STAT_EVENT_START_TMZ("Resume Total", &pwron_tv);
10610Sstevel@tonic-gate 
10620Sstevel@tonic-gate 		str = "  prom time";
10630Sstevel@tonic-gate 		CPR_STAT_EVENT_START_TMZ(str, &pwron_tv);
10640Sstevel@tonic-gate 		ctp = &cpr_term.tm_cprboot_start;
10650Sstevel@tonic-gate 		CPR_STAT_EVENT_END_TMZ(str, ctp);
10660Sstevel@tonic-gate 
10670Sstevel@tonic-gate 		str = "  read statefile";
10680Sstevel@tonic-gate 		CPR_STAT_EVENT_START_TMZ(str, ctp);
10690Sstevel@tonic-gate 		ctp = &cpr_term.tm_cprboot_end;
10700Sstevel@tonic-gate 		CPR_STAT_EVENT_END_TMZ(str, ctp);
10710Sstevel@tonic-gate 	}
10720Sstevel@tonic-gate 
10730Sstevel@tonic-gate rb_stop_kernel_threads:
10740Sstevel@tonic-gate 	/*
10750Sstevel@tonic-gate 	 * Put all threads back to where they belong; get the kernel
10760Sstevel@tonic-gate 	 * daemons straightened up too. Note that the callback table
10770Sstevel@tonic-gate 	 * locked during cpr_stop_kernel_threads() is released only
10780Sstevel@tonic-gate 	 * in cpr_start_kernel_threads(). Ensure modunloading is
10790Sstevel@tonic-gate 	 * disabled before starting kernel threads, we don't want
10800Sstevel@tonic-gate 	 * modunload thread to start changing device tree underneath.
10810Sstevel@tonic-gate 	 */
1082*5295Srandyf 	PMD(PMD_SX, ("cpr_resume: modunload disable\n"))
10830Sstevel@tonic-gate 	modunload_disable();
1084*5295Srandyf 	PMD(PMD_SX, ("cpr_resume: start kernel threads\n"))
10850Sstevel@tonic-gate 	cpr_start_kernel_threads();
10860Sstevel@tonic-gate 
10870Sstevel@tonic-gate rb_suspend_devices:
10883446Smrj 	CPR_DEBUG(CPR_DEBUG1, "resuming devices...");
10890Sstevel@tonic-gate 	CPR_STAT_EVENT_START("  start drivers");
10900Sstevel@tonic-gate 
1091*5295Srandyf 	PMD(PMD_SX,
1092*5295Srandyf 	    ("cpr_resume: rb_suspend_devices: cpr_resume_uniproc = %d\n",
1093*5295Srandyf 	    cpr_resume_uniproc))
1094*5295Srandyf 
1095*5295Srandyf #if defined(__x86)
1096*5295Srandyf 	/*
1097*5295Srandyf 	 * If cpr_resume_uniproc is set, then pause all the other cpus
1098*5295Srandyf 	 * apart from the current cpu, so that broken drivers that think
1099*5295Srandyf 	 * that they are on a uniprocessor machine will resume
1100*5295Srandyf 	 */
1101*5295Srandyf 	if (cpr_resume_uniproc) {
1102*5295Srandyf 		mutex_enter(&cpu_lock);
1103*5295Srandyf 		pause_cpus(NULL);
1104*5295Srandyf 		mutex_exit(&cpu_lock);
1105*5295Srandyf 	}
1106*5295Srandyf #endif
1107*5295Srandyf 
11080Sstevel@tonic-gate 	/*
11090Sstevel@tonic-gate 	 * The policy here is to continue resume everything we can if we did
11100Sstevel@tonic-gate 	 * not successfully finish suspend; and panic if we are coming back
11110Sstevel@tonic-gate 	 * from a fully suspended system.
11120Sstevel@tonic-gate 	 */
1113*5295Srandyf 	PMD(PMD_SX, ("cpr_resume: resume devices\n"))
11140Sstevel@tonic-gate 	rc = cpr_resume_devices(ddi_root_node(), 0);
11150Sstevel@tonic-gate 
11160Sstevel@tonic-gate 	cpr_sae(0);
11170Sstevel@tonic-gate 
11180Sstevel@tonic-gate 	str = "Failed to resume one or more devices.";
1119*5295Srandyf 
1120*5295Srandyf 	if (rc) {
1121*5295Srandyf 		if (CPR->c_substate == C_ST_DUMP ||
1122*5295Srandyf 		    (sleeptype == CPR_TORAM &&
1123*5295Srandyf 		    CPR->c_substate == C_ST_NODUMP)) {
1124*5295Srandyf 			if (cpr_test_point == FORCE_SUSPEND_TO_RAM) {
1125*5295Srandyf 				PMD(PMD_SX, ("cpr_resume: resume device "
1126*5295Srandyf 				    "warn\n"))
1127*5295Srandyf 				cpr_err(CE_WARN, str);
1128*5295Srandyf 			} else {
1129*5295Srandyf 				PMD(PMD_SX, ("cpr_resume: resume device "
1130*5295Srandyf 				    "panic\n"))
1131*5295Srandyf 				cpr_err(CE_PANIC, str);
1132*5295Srandyf 			}
1133*5295Srandyf 		} else {
1134*5295Srandyf 			PMD(PMD_SX, ("cpr_resume: resume device warn\n"))
1135*5295Srandyf 			cpr_err(CE_WARN, str);
1136*5295Srandyf 		}
1137*5295Srandyf 	}
1138*5295Srandyf 
11390Sstevel@tonic-gate 	CPR_STAT_EVENT_END("  start drivers");
11403446Smrj 	CPR_DEBUG(CPR_DEBUG1, "done\n");
11410Sstevel@tonic-gate 
1142*5295Srandyf #if defined(__x86)
1143*5295Srandyf 	/*
1144*5295Srandyf 	 * If cpr_resume_uniproc is set, then unpause all the processors
1145*5295Srandyf 	 * that were paused before resuming the drivers
1146*5295Srandyf 	 */
1147*5295Srandyf 	if (cpr_resume_uniproc) {
1148*5295Srandyf 		mutex_enter(&cpu_lock);
1149*5295Srandyf 		start_cpus();
1150*5295Srandyf 		mutex_exit(&cpu_lock);
1151*5295Srandyf 	}
1152*5295Srandyf #endif
1153*5295Srandyf 
11540Sstevel@tonic-gate 	/*
11550Sstevel@tonic-gate 	 * If we had disabled modunloading in this cpr resume cycle (i.e. we
11560Sstevel@tonic-gate 	 * resumed from a state earlier than C_ST_SUSPEND_DEVICES), re-enable
11570Sstevel@tonic-gate 	 * modunloading now.
11580Sstevel@tonic-gate 	 */
1159*5295Srandyf 	if (CPR->c_substate != C_ST_SUSPEND_DEVICES) {
1160*5295Srandyf 		PMD(PMD_SX, ("cpr_resume: modload enable\n"))
11610Sstevel@tonic-gate 		modunload_enable();
1162*5295Srandyf 	}
11630Sstevel@tonic-gate 
11640Sstevel@tonic-gate 	/*
11650Sstevel@tonic-gate 	 * Hooks needed by lock manager prior to resuming.
11660Sstevel@tonic-gate 	 * Refer to code for more comments.
11670Sstevel@tonic-gate 	 */
1168*5295Srandyf 	PMD(PMD_SX, ("cpr_resume: lock mgr\n"))
11690Sstevel@tonic-gate 	cpr_lock_mgr(lm_cprresume);
11700Sstevel@tonic-gate 
1171*5295Srandyf #if defined(__sparc)
11720Sstevel@tonic-gate 	/*
11730Sstevel@tonic-gate 	 * This is a partial (half) resume during cpr suspend, we
11740Sstevel@tonic-gate 	 * haven't yet given up on the suspend. On return from here,
11750Sstevel@tonic-gate 	 * cpr_suspend() will try to reallocate and retry the suspend.
11760Sstevel@tonic-gate 	 */
11770Sstevel@tonic-gate 	if (CPR->c_substate == C_ST_DUMP_NOSPC) {
11780Sstevel@tonic-gate 		return (0);
11790Sstevel@tonic-gate 	}
11800Sstevel@tonic-gate 
1181*5295Srandyf 	if (sleeptype == CPR_TODISK) {
11820Sstevel@tonic-gate rb_statef_alloc:
1183*5295Srandyf 		cpr_statef_close();
11840Sstevel@tonic-gate 
11850Sstevel@tonic-gate rb_disable_ufs_logging:
1186*5295Srandyf 		/*
1187*5295Srandyf 		 * if ufs logging was disabled, re-enable
1188*5295Srandyf 		 */
1189*5295Srandyf 		(void) cpr_ufs_logging(1);
1190*5295Srandyf 	}
1191*5295Srandyf #endif
11920Sstevel@tonic-gate 
11930Sstevel@tonic-gate rb_pm_reattach_noinvol:
11940Sstevel@tonic-gate 	/*
11950Sstevel@tonic-gate 	 * When pm_reattach_noinvol() succeeds, modunload_thread will
11960Sstevel@tonic-gate 	 * remain disabled until after cpr suspend passes the
11970Sstevel@tonic-gate 	 * C_ST_STOP_KERNEL_THREADS state. If any failure happens before
11980Sstevel@tonic-gate 	 * cpr suspend reaches this state, we'll need to enable modunload
11990Sstevel@tonic-gate 	 * thread during rollback.
12000Sstevel@tonic-gate 	 */
12010Sstevel@tonic-gate 	if (CPR->c_substate == C_ST_DISABLE_UFS_LOGGING ||
12020Sstevel@tonic-gate 	    CPR->c_substate == C_ST_STATEF_ALLOC ||
12030Sstevel@tonic-gate 	    CPR->c_substate == C_ST_SUSPEND_DEVICES ||
12040Sstevel@tonic-gate 	    CPR->c_substate == C_ST_STOP_KERNEL_THREADS) {
1205*5295Srandyf 		PMD(PMD_SX, ("cpr_resume: reattach noinvol fini\n"))
12060Sstevel@tonic-gate 		pm_reattach_noinvol_fini();
12070Sstevel@tonic-gate 	}
12080Sstevel@tonic-gate 
1209*5295Srandyf 	PMD(PMD_SX, ("cpr_resume: CPR POST USER callback\n"))
12100Sstevel@tonic-gate 	(void) callb_execute_class(CB_CL_CPR_POST_USER, CB_CODE_CPR_RESUME);
1211*5295Srandyf 	PMD(PMD_SX, ("cpr_resume: CPR PROMPRINTF callback\n"))
12120Sstevel@tonic-gate 	(void) callb_execute_class(CB_CL_CPR_PROMPRINTF, CB_CODE_CPR_RESUME);
12130Sstevel@tonic-gate 
1214*5295Srandyf 	PMD(PMD_SX, ("cpr_resume: restore direct levels\n"))
12150Sstevel@tonic-gate 	pm_restore_direct_levels();
12160Sstevel@tonic-gate 
12170Sstevel@tonic-gate rb_stop_user_threads:
12183446Smrj 	CPR_DEBUG(CPR_DEBUG1, "starting user threads...");
1219*5295Srandyf 	PMD(PMD_SX, ("cpr_resume: starting user threads\n"))
12200Sstevel@tonic-gate 	cpr_start_user_threads();
12213446Smrj 	CPR_DEBUG(CPR_DEBUG1, "done\n");
1222*5295Srandyf 	/*
1223*5295Srandyf 	 * Ask Xorg to resume the frame buffer, and wait for it to happen
1224*5295Srandyf 	 */
1225*5295Srandyf 	mutex_enter(&srn_clone_lock);
1226*5295Srandyf 	if (srn_signal) {
1227*5295Srandyf 		PMD(PMD_SX, ("cpr_suspend: (*srn_signal)(..., "
1228*5295Srandyf 		    "SRN_NORMAL_RESUME)\n"))
1229*5295Srandyf 		srn_inuse = 1;		/* because (*srn_signal) cv_waits */
1230*5295Srandyf 		(*srn_signal)(SRN_TYPE_APM, SRN_NORMAL_RESUME);
1231*5295Srandyf 		srn_inuse = 0;
1232*5295Srandyf 	} else {
1233*5295Srandyf 		PMD(PMD_SX, ("cpr_suspend: srn_signal NULL\n"))
1234*5295Srandyf 	}
1235*5295Srandyf 	mutex_exit(&srn_clone_lock);
12360Sstevel@tonic-gate 
1237*5295Srandyf #if defined(__sparc)
12380Sstevel@tonic-gate rb_mp_offline:
12390Sstevel@tonic-gate 	if (cpr_mp_online())
12400Sstevel@tonic-gate 		cpr_err(CE_WARN, "Failed to online all the processors.");
1241*5295Srandyf #endif
12420Sstevel@tonic-gate 
12430Sstevel@tonic-gate rb_others:
1244*5295Srandyf 	PMD(PMD_SX, ("cpr_resume: dep thread\n"))
1245*5295Srandyf 	pm_dispatch_to_dep_thread(PM_DEP_WK_CPR_RESUME, NULL, NULL,
1246*5295Srandyf 	    PM_DEP_WAIT, NULL, 0);
12470Sstevel@tonic-gate 
1248*5295Srandyf 	PMD(PMD_SX, ("cpr_resume: CPR PM callback\n"))
12490Sstevel@tonic-gate 	(void) callb_execute_class(CB_CL_CPR_PM, CB_CODE_CPR_RESUME);
12500Sstevel@tonic-gate 
12510Sstevel@tonic-gate 	if (cpr_suspend_succeeded) {
12520Sstevel@tonic-gate 		cpr_stat_record_events();
12530Sstevel@tonic-gate 	}
12540Sstevel@tonic-gate 
1255*5295Srandyf #if defined(__sparc)
1256*5295Srandyf 	if (sleeptype == CPR_TODISK && !cpr_reusable_mode)
12570Sstevel@tonic-gate 		cpr_clear_definfo();
1258*5295Srandyf #endif
12590Sstevel@tonic-gate 
1260*5295Srandyf 	i_cpr_free_cpus();
12613446Smrj 	CPR_DEBUG(CPR_DEBUG1, "Sending SIGTHAW...");
1262*5295Srandyf 	PMD(PMD_SX, ("cpr_resume: SIGTHAW\n"))
12630Sstevel@tonic-gate 	cpr_signal_user(SIGTHAW);
12643446Smrj 	CPR_DEBUG(CPR_DEBUG1, "done\n");
12650Sstevel@tonic-gate 
12660Sstevel@tonic-gate 	CPR_STAT_EVENT_END("Resume Total");
12670Sstevel@tonic-gate 
12680Sstevel@tonic-gate 	CPR_STAT_EVENT_START_TMZ("WHOLE CYCLE", &wholecycle_tv);
12690Sstevel@tonic-gate 	CPR_STAT_EVENT_END("WHOLE CYCLE");
12700Sstevel@tonic-gate 
12713446Smrj 	if (cpr_debug & CPR_DEBUG1)
12723446Smrj 		cmn_err(CE_CONT, "\nThe system is back where you left!\n");
12730Sstevel@tonic-gate 
12740Sstevel@tonic-gate 	CPR_STAT_EVENT_START("POST CPR DELAY");
12750Sstevel@tonic-gate 
12760Sstevel@tonic-gate #ifdef CPR_STAT
12770Sstevel@tonic-gate 	ctp = &cpr_term.tm_shutdown;
12780Sstevel@tonic-gate 	CPR_STAT_EVENT_START_TMZ("PWROFF TIME", ctp);
12790Sstevel@tonic-gate 	CPR_STAT_EVENT_END_TMZ("PWROFF TIME", &pwron_tv);
12800Sstevel@tonic-gate 
12810Sstevel@tonic-gate 	CPR_STAT_EVENT_PRINT();
12820Sstevel@tonic-gate #endif /* CPR_STAT */
12830Sstevel@tonic-gate 
1284*5295Srandyf 	PMD(PMD_SX, ("cpr_resume returns %x\n", rc))
12850Sstevel@tonic-gate 	return (rc);
12860Sstevel@tonic-gate }
12870Sstevel@tonic-gate 
12880Sstevel@tonic-gate static void
1289*5295Srandyf cpr_suspend_init(int sleeptype)
12900Sstevel@tonic-gate {
12910Sstevel@tonic-gate 	cpr_time_t *ctp;
12920Sstevel@tonic-gate 
12930Sstevel@tonic-gate 	cpr_stat_init();
12940Sstevel@tonic-gate 
12950Sstevel@tonic-gate 	/*
12960Sstevel@tonic-gate 	 * If cpr_suspend() failed before cpr_dump() gets a chance
12970Sstevel@tonic-gate 	 * to reinitialize the terminator of the statefile,
12980Sstevel@tonic-gate 	 * the values of the old terminator will still linger around.
12990Sstevel@tonic-gate 	 * Since the terminator contains information that we need to
13000Sstevel@tonic-gate 	 * decide whether suspend succeeded or not, we need to
13010Sstevel@tonic-gate 	 * reinitialize it as early as possible.
13020Sstevel@tonic-gate 	 */
13030Sstevel@tonic-gate 	cpr_term.real_statef_size = 0;
13040Sstevel@tonic-gate 	ctp = &cpr_term.tm_shutdown;
13050Sstevel@tonic-gate 	bzero(ctp, sizeof (*ctp));
13060Sstevel@tonic-gate 	ctp = &cpr_term.tm_cprboot_start;
13070Sstevel@tonic-gate 	bzero(ctp, sizeof (*ctp));
13080Sstevel@tonic-gate 	ctp = &cpr_term.tm_cprboot_end;
13090Sstevel@tonic-gate 	bzero(ctp, sizeof (*ctp));
13100Sstevel@tonic-gate 
1311*5295Srandyf 	if (sleeptype == CPR_TODISK) {
1312*5295Srandyf 		/*
1313*5295Srandyf 		 * Lookup the physical address of our thread structure.
1314*5295Srandyf 		 * This should never be invalid and the entire thread structure
1315*5295Srandyf 		 * is expected to reside within the same pfn.
1316*5295Srandyf 		 */
1317*5295Srandyf 		curthreadpfn = hat_getpfnum(kas.a_hat, (caddr_t)curthread);
1318*5295Srandyf 		ASSERT(curthreadpfn != PFN_INVALID);
1319*5295Srandyf 		ASSERT(curthreadpfn == hat_getpfnum(kas.a_hat,
1320*5295Srandyf 		    (caddr_t)curthread + sizeof (kthread_t) - 1));
1321*5295Srandyf 	}
13220Sstevel@tonic-gate 
13230Sstevel@tonic-gate 	cpr_suspend_succeeded = 0;
13240Sstevel@tonic-gate }
1325*5295Srandyf 
1326*5295Srandyf /*
1327*5295Srandyf  * bring all the offline cpus online
1328*5295Srandyf  */
1329*5295Srandyf static int
1330*5295Srandyf cpr_all_online(void)
1331*5295Srandyf {
1332*5295Srandyf 	int	rc = 0;
1333*5295Srandyf 
1334*5295Srandyf #ifdef	__sparc
1335*5295Srandyf 	/*
1336*5295Srandyf 	 * do nothing
1337*5295Srandyf 	 */
1338*5295Srandyf #else
1339*5295Srandyf 
1340*5295Srandyf 	cpu_t	*cp;
1341*5295Srandyf 
1342*5295Srandyf 	ASSERT(MUTEX_HELD(&cpu_lock));
1343*5295Srandyf 
1344*5295Srandyf 	cp = cpu_list;
1345*5295Srandyf 	do {
1346*5295Srandyf 		cp->cpu_cpr_flags &= ~CPU_CPR_ONLINE;
1347*5295Srandyf 		if (!CPU_ACTIVE(cp)) {
1348*5295Srandyf 			if ((rc = cpu_online(cp)) != 0)
1349*5295Srandyf 				break;
1350*5295Srandyf 			CPU_SET_CPR_FLAGS(cp, CPU_CPR_ONLINE);
1351*5295Srandyf 		}
1352*5295Srandyf 	} while ((cp = cp->cpu_next) != cpu_list);
1353*5295Srandyf 
1354*5295Srandyf 	if (rc) {
1355*5295Srandyf 		/*
1356*5295Srandyf 		 * an online operation failed so offline the cpus
1357*5295Srandyf 		 * that were onlined above to restore the system
1358*5295Srandyf 		 * to its original state
1359*5295Srandyf 		 */
1360*5295Srandyf 		cpr_restore_offline();
1361*5295Srandyf 	}
1362*5295Srandyf #endif
1363*5295Srandyf 	return (rc);
1364*5295Srandyf }
1365*5295Srandyf 
1366*5295Srandyf /*
1367*5295Srandyf  * offline all the cpus that were brought online by cpr_all_online()
1368*5295Srandyf  */
1369*5295Srandyf static void
1370*5295Srandyf cpr_restore_offline(void)
1371*5295Srandyf {
1372*5295Srandyf 
1373*5295Srandyf #ifdef	__sparc
1374*5295Srandyf 	/*
1375*5295Srandyf 	 * do nothing
1376*5295Srandyf 	 */
1377*5295Srandyf #else
1378*5295Srandyf 
1379*5295Srandyf 	cpu_t	*cp;
1380*5295Srandyf 	int	rc = 0;
1381*5295Srandyf 
1382*5295Srandyf 	ASSERT(MUTEX_HELD(&cpu_lock));
1383*5295Srandyf 
1384*5295Srandyf 	cp = cpu_list;
1385*5295Srandyf 	do {
1386*5295Srandyf 		if (CPU_CPR_IS_ONLINE(cp)) {
1387*5295Srandyf 			rc =  cpu_offline(cp, 0);
1388*5295Srandyf 			/*
1389*5295Srandyf 			 * this offline should work, since the cpu was
1390*5295Srandyf 			 * offline originally and was successfully onlined
1391*5295Srandyf 			 * by cpr_all_online()
1392*5295Srandyf 			 */
1393*5295Srandyf 			ASSERT(rc == 0);
1394*5295Srandyf 			cp->cpu_cpr_flags &= ~CPU_CPR_ONLINE;
1395*5295Srandyf 		}
1396*5295Srandyf 	} while ((cp = cp->cpu_next) != cpu_list);
1397*5295Srandyf 
1398*5295Srandyf #endif
1399*5295Srandyf 
1400*5295Srandyf }
1401