xref: /onnv-gate/usr/src/uts/common/cpr/cpr_misc.c (revision 4582:da3c7347dfa5)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
52621Sllai1  * Common Development and Distribution License (the "License").
62621Sllai1  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
223446Smrj  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
270Sstevel@tonic-gate 
280Sstevel@tonic-gate #include <sys/types.h>
290Sstevel@tonic-gate #include <sys/errno.h>
300Sstevel@tonic-gate #include <sys/cpuvar.h>
310Sstevel@tonic-gate #include <sys/vfs.h>
320Sstevel@tonic-gate #include <sys/vnode.h>
330Sstevel@tonic-gate #include <sys/pathname.h>
340Sstevel@tonic-gate #include <sys/callb.h>
350Sstevel@tonic-gate #include <sys/fs/ufs_inode.h>
360Sstevel@tonic-gate #include <vm/anon.h>
370Sstevel@tonic-gate #include <sys/fs/swapnode.h>	/* for swapfs_minfree */
380Sstevel@tonic-gate #include <sys/kmem.h>
390Sstevel@tonic-gate #include <sys/cpr.h>
400Sstevel@tonic-gate #include <sys/conf.h>
410Sstevel@tonic-gate 
420Sstevel@tonic-gate /*
430Sstevel@tonic-gate  * CPR miscellaneous support routines
440Sstevel@tonic-gate  */
450Sstevel@tonic-gate #define	cpr_open(path, mode,  vpp)	(vn_open(path, UIO_SYSSPACE, \
460Sstevel@tonic-gate 		mode, 0600, vpp, CRCREAT, 0))
470Sstevel@tonic-gate #define	cpr_rdwr(rw, vp, basep, cnt)	(vn_rdwr(rw, vp,  (caddr_t)(basep), \
480Sstevel@tonic-gate 		cnt, 0LL, UIO_SYSSPACE, 0, (rlim64_t)MAXOFF_T, CRED(), \
490Sstevel@tonic-gate 		(ssize_t *)NULL))
500Sstevel@tonic-gate 
510Sstevel@tonic-gate extern void clkset(time_t);
520Sstevel@tonic-gate extern cpu_t *i_cpr_bootcpu(void);
530Sstevel@tonic-gate extern caddr_t i_cpr_map_setup(void);
540Sstevel@tonic-gate extern void i_cpr_free_memory_resources(void);
550Sstevel@tonic-gate 
560Sstevel@tonic-gate extern kmutex_t cpr_slock;
570Sstevel@tonic-gate extern size_t cpr_buf_size;
580Sstevel@tonic-gate extern char *cpr_buf;
590Sstevel@tonic-gate extern size_t cpr_pagedata_size;
600Sstevel@tonic-gate extern char *cpr_pagedata;
610Sstevel@tonic-gate extern int cpr_bufs_allocated;
620Sstevel@tonic-gate extern int cpr_bitmaps_allocated;
630Sstevel@tonic-gate 
640Sstevel@tonic-gate static struct cprconfig cprconfig;
650Sstevel@tonic-gate static int cprconfig_loaded = 0;
660Sstevel@tonic-gate static int cpr_statefile_ok(vnode_t *, int);
670Sstevel@tonic-gate static int cpr_p_online(cpu_t *, int);
680Sstevel@tonic-gate static void cpr_save_mp_state(void);
690Sstevel@tonic-gate int cpr_is_ufs(struct vfs *);
700Sstevel@tonic-gate 
710Sstevel@tonic-gate char cpr_default_path[] = CPR_DEFAULT;
720Sstevel@tonic-gate 
730Sstevel@tonic-gate #define	COMPRESS_PERCENT 40	/* approx compression ratio in percent */
740Sstevel@tonic-gate #define	SIZE_RATE	115	/* increase size by 15% */
750Sstevel@tonic-gate #define	INTEGRAL	100	/* for integer math */
760Sstevel@tonic-gate 
770Sstevel@tonic-gate 
780Sstevel@tonic-gate /*
790Sstevel@tonic-gate  * cmn_err() followed by a 1/4 second delay; this gives the
800Sstevel@tonic-gate  * logging service a chance to flush messages and helps avoid
810Sstevel@tonic-gate  * intermixing output from prom_printf().
820Sstevel@tonic-gate  */
830Sstevel@tonic-gate /*PRINTFLIKE2*/
840Sstevel@tonic-gate void
850Sstevel@tonic-gate cpr_err(int ce, const char *fmt, ...)
860Sstevel@tonic-gate {
870Sstevel@tonic-gate 	va_list adx;
880Sstevel@tonic-gate 
890Sstevel@tonic-gate 	va_start(adx, fmt);
900Sstevel@tonic-gate 	vcmn_err(ce, fmt, adx);
910Sstevel@tonic-gate 	va_end(adx);
920Sstevel@tonic-gate 	drv_usecwait(MICROSEC >> 2);
930Sstevel@tonic-gate }
940Sstevel@tonic-gate 
950Sstevel@tonic-gate 
960Sstevel@tonic-gate int
970Sstevel@tonic-gate cpr_init(int fcn)
980Sstevel@tonic-gate {
990Sstevel@tonic-gate 	/*
1000Sstevel@tonic-gate 	 * Allow only one suspend/resume process.
1010Sstevel@tonic-gate 	 */
1020Sstevel@tonic-gate 	if (mutex_tryenter(&cpr_slock) == 0)
1030Sstevel@tonic-gate 		return (EBUSY);
1040Sstevel@tonic-gate 
1050Sstevel@tonic-gate 	CPR->c_flags = 0;
1060Sstevel@tonic-gate 	CPR->c_substate = 0;
1070Sstevel@tonic-gate 	CPR->c_cprboot_magic = 0;
1080Sstevel@tonic-gate 	CPR->c_alloc_cnt = 0;
1090Sstevel@tonic-gate 
1100Sstevel@tonic-gate 	CPR->c_fcn = fcn;
1110Sstevel@tonic-gate 	if (fcn == AD_CPR_REUSABLE)
1120Sstevel@tonic-gate 		CPR->c_flags |= C_REUSABLE;
1130Sstevel@tonic-gate 	else
1140Sstevel@tonic-gate 		CPR->c_flags |= C_SUSPENDING;
1150Sstevel@tonic-gate 	if (fcn != AD_CPR_NOCOMPRESS && fcn != AD_CPR_TESTNOZ)
1160Sstevel@tonic-gate 		CPR->c_flags |= C_COMPRESSING;
1170Sstevel@tonic-gate 	/*
1180Sstevel@tonic-gate 	 * reserve CPR_MAXCONTIG virtual pages for cpr_dump()
1190Sstevel@tonic-gate 	 */
1200Sstevel@tonic-gate 	CPR->c_mapping_area = i_cpr_map_setup();
1210Sstevel@tonic-gate 	if (CPR->c_mapping_area == 0) {		/* no space in kernelmap */
1220Sstevel@tonic-gate 		cpr_err(CE_CONT, "Unable to alloc from kernelmap.\n");
1230Sstevel@tonic-gate 		mutex_exit(&cpr_slock);
1240Sstevel@tonic-gate 		return (EAGAIN);
1250Sstevel@tonic-gate 	}
1263446Smrj 	if (cpr_debug & CPR_DEBUG3)
1273446Smrj 		cpr_err(CE_CONT, "Reserved virtual range from 0x%p for writing "
1283446Smrj 		    "kas\n", (void *)CPR->c_mapping_area);
1290Sstevel@tonic-gate 
1300Sstevel@tonic-gate 	return (0);
1310Sstevel@tonic-gate }
1320Sstevel@tonic-gate 
1330Sstevel@tonic-gate /*
1340Sstevel@tonic-gate  * This routine releases any resources used during the checkpoint.
1350Sstevel@tonic-gate  */
1360Sstevel@tonic-gate void
1370Sstevel@tonic-gate cpr_done(void)
1380Sstevel@tonic-gate {
1390Sstevel@tonic-gate 	cpr_stat_cleanup();
1400Sstevel@tonic-gate 	i_cpr_bitmap_cleanup();
1410Sstevel@tonic-gate 
1420Sstevel@tonic-gate 	/*
1430Sstevel@tonic-gate 	 * Free pages used by cpr buffers.
1440Sstevel@tonic-gate 	 */
1450Sstevel@tonic-gate 	if (cpr_buf) {
1460Sstevel@tonic-gate 		kmem_free(cpr_buf, cpr_buf_size);
1470Sstevel@tonic-gate 		cpr_buf = NULL;
1480Sstevel@tonic-gate 	}
1490Sstevel@tonic-gate 	if (cpr_pagedata) {
1500Sstevel@tonic-gate 		kmem_free(cpr_pagedata, cpr_pagedata_size);
1510Sstevel@tonic-gate 		cpr_pagedata = NULL;
1520Sstevel@tonic-gate 	}
1530Sstevel@tonic-gate 
1540Sstevel@tonic-gate 	i_cpr_free_memory_resources();
1550Sstevel@tonic-gate 	mutex_exit(&cpr_slock);
1560Sstevel@tonic-gate 	cpr_err(CE_CONT, "System has been resumed.\n");
1570Sstevel@tonic-gate }
1580Sstevel@tonic-gate 
1590Sstevel@tonic-gate 
1600Sstevel@tonic-gate /*
1610Sstevel@tonic-gate  * reads config data into cprconfig
1620Sstevel@tonic-gate  */
1630Sstevel@tonic-gate static int
1640Sstevel@tonic-gate cpr_get_config(void)
1650Sstevel@tonic-gate {
1660Sstevel@tonic-gate 	static char config_path[] = CPR_CONFIG;
1670Sstevel@tonic-gate 	struct cprconfig *cf = &cprconfig;
1680Sstevel@tonic-gate 	struct vnode *vp;
1690Sstevel@tonic-gate 	char *fmt;
1700Sstevel@tonic-gate 	int err;
1710Sstevel@tonic-gate 
1720Sstevel@tonic-gate 	if (cprconfig_loaded)
1730Sstevel@tonic-gate 		return (0);
1740Sstevel@tonic-gate 
1750Sstevel@tonic-gate 	fmt = "cannot %s config file \"%s\", error %d\n";
1760Sstevel@tonic-gate 	if (err = vn_open(config_path, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0)) {
1770Sstevel@tonic-gate 		cpr_err(CE_CONT, fmt, "open", config_path, err);
1780Sstevel@tonic-gate 		return (err);
1790Sstevel@tonic-gate 	}
1800Sstevel@tonic-gate 
1810Sstevel@tonic-gate 	err = cpr_rdwr(UIO_READ, vp, cf, sizeof (*cf));
1820Sstevel@tonic-gate 	(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED());
1830Sstevel@tonic-gate 	VN_RELE(vp);
1840Sstevel@tonic-gate 	if (err) {
1850Sstevel@tonic-gate 		cpr_err(CE_CONT, fmt, "read", config_path, err);
1860Sstevel@tonic-gate 		return (err);
1870Sstevel@tonic-gate 	}
1880Sstevel@tonic-gate 
1890Sstevel@tonic-gate 	if (cf->cf_magic == CPR_CONFIG_MAGIC)
1900Sstevel@tonic-gate 		cprconfig_loaded = 1;
1910Sstevel@tonic-gate 	else {
1920Sstevel@tonic-gate 		cpr_err(CE_CONT, "invalid config file \"%s\", "
1930Sstevel@tonic-gate 		    "rerun pmconfig(1M)\n", config_path);
1940Sstevel@tonic-gate 		err = EINVAL;
1950Sstevel@tonic-gate 	}
1960Sstevel@tonic-gate 
1970Sstevel@tonic-gate 	return (err);
1980Sstevel@tonic-gate }
1990Sstevel@tonic-gate 
2000Sstevel@tonic-gate 
2010Sstevel@tonic-gate /*
2020Sstevel@tonic-gate  * concat fs and path fields of the cprconfig structure;
2030Sstevel@tonic-gate  * returns pointer to the base of static data
2040Sstevel@tonic-gate  */
2050Sstevel@tonic-gate static char *
2060Sstevel@tonic-gate cpr_cprconfig_to_path(void)
2070Sstevel@tonic-gate {
2080Sstevel@tonic-gate 	static char full_path[MAXNAMELEN];
2090Sstevel@tonic-gate 	struct cprconfig *cf = &cprconfig;
2100Sstevel@tonic-gate 	char *ptr;
2110Sstevel@tonic-gate 
2120Sstevel@tonic-gate 	/*
2130Sstevel@tonic-gate 	 * build /fs/path without extra '/'
2140Sstevel@tonic-gate 	 */
2150Sstevel@tonic-gate 	(void) strcpy(full_path, cf->cf_fs);
2160Sstevel@tonic-gate 	if (strcmp(cf->cf_fs, "/"))
2170Sstevel@tonic-gate 		(void) strcat(full_path, "/");
2180Sstevel@tonic-gate 	ptr = cf->cf_path;
2190Sstevel@tonic-gate 	if (*ptr == '/')
2200Sstevel@tonic-gate 		ptr++;
2210Sstevel@tonic-gate 	(void) strcat(full_path, ptr);
2220Sstevel@tonic-gate 	return (full_path);
2230Sstevel@tonic-gate }
2240Sstevel@tonic-gate 
2250Sstevel@tonic-gate 
2260Sstevel@tonic-gate /*
2270Sstevel@tonic-gate  * Verify that the information in the configuration file regarding the
2280Sstevel@tonic-gate  * location for the statefile is still valid, depending on cf_type.
2290Sstevel@tonic-gate  * for CFT_UFS, cf_fs must still be a mounted filesystem, it must be
2300Sstevel@tonic-gate  *	mounted on the same device as when pmconfig was last run,
2310Sstevel@tonic-gate  *	and the translation of that device to a node in the prom's
2320Sstevel@tonic-gate  *	device tree must be the same as when pmconfig was last run.
2330Sstevel@tonic-gate  * for CFT_SPEC, cf_path must be the path to a block special file,
2340Sstevel@tonic-gate  *	it must have no file system mounted on it,
2350Sstevel@tonic-gate  *	and the translation of that device to a node in the prom's
2360Sstevel@tonic-gate  *	device tree must be the same as when pmconfig was last run.
2370Sstevel@tonic-gate  */
2380Sstevel@tonic-gate static int
2390Sstevel@tonic-gate cpr_verify_statefile_path(void)
2400Sstevel@tonic-gate {
2410Sstevel@tonic-gate 	struct cprconfig *cf = &cprconfig;
2420Sstevel@tonic-gate 	static const char long_name[] = "Statefile pathname is too long.\n";
2430Sstevel@tonic-gate 	static const char lookup_fmt[] = "Lookup failed for "
2440Sstevel@tonic-gate 	    "cpr statefile device %s.\n";
2450Sstevel@tonic-gate 	static const char path_chg_fmt[] = "Device path for statefile "
2460Sstevel@tonic-gate 	    "has changed from %s to %s.\t%s\n";
2470Sstevel@tonic-gate 	static const char rerun[] = "Please rerun pmconfig(1m).";
2480Sstevel@tonic-gate 	struct vfs *vfsp = NULL, *vfsp_save = rootvfs;
2490Sstevel@tonic-gate 	ufsvfs_t *ufsvfsp = (ufsvfs_t *)rootvfs->vfs_data;
2500Sstevel@tonic-gate 	ufsvfs_t *ufsvfsp_save = ufsvfsp;
2510Sstevel@tonic-gate 	int error;
2520Sstevel@tonic-gate 	struct vnode *vp;
2530Sstevel@tonic-gate 	char *slash, *tail, *longest;
2540Sstevel@tonic-gate 	char *errstr;
2550Sstevel@tonic-gate 	int found = 0;
2560Sstevel@tonic-gate 	union {
2570Sstevel@tonic-gate 		char un_devpath[OBP_MAXPATHLEN];
2580Sstevel@tonic-gate 		char un_sfpath[MAXNAMELEN];
2590Sstevel@tonic-gate 	} un;
2600Sstevel@tonic-gate #define	devpath	un.un_devpath
2610Sstevel@tonic-gate #define	sfpath	un.un_sfpath
2620Sstevel@tonic-gate 
2630Sstevel@tonic-gate 	ASSERT(cprconfig_loaded);
2640Sstevel@tonic-gate 	/*
2650Sstevel@tonic-gate 	 * We need not worry about locking or the timing of releasing
2660Sstevel@tonic-gate 	 * the vnode, since we are single-threaded now.
2670Sstevel@tonic-gate 	 */
2680Sstevel@tonic-gate 
2690Sstevel@tonic-gate 	switch (cf->cf_type) {
2700Sstevel@tonic-gate 	case CFT_SPEC:
2710Sstevel@tonic-gate 		if (strlen(cf->cf_path) > sizeof (sfpath)) {
2720Sstevel@tonic-gate 			cpr_err(CE_CONT, long_name);
2730Sstevel@tonic-gate 			return (ENAMETOOLONG);
2740Sstevel@tonic-gate 		}
2750Sstevel@tonic-gate 		if ((error = lookupname(cf->cf_devfs,
2760Sstevel@tonic-gate 		    UIO_SYSSPACE, FOLLOW, NULLVPP, &vp)) != 0) {
2770Sstevel@tonic-gate 			cpr_err(CE_CONT, lookup_fmt, cf->cf_devfs);
2780Sstevel@tonic-gate 			return (error);
2790Sstevel@tonic-gate 		}
2800Sstevel@tonic-gate 		if (vp->v_type != VBLK)
2810Sstevel@tonic-gate 			errstr = "statefile must be a block device";
2820Sstevel@tonic-gate 		else if (vfs_devismounted(vp->v_rdev))
2830Sstevel@tonic-gate 			errstr = "statefile device must not "
2840Sstevel@tonic-gate 			    "have a file system mounted on it";
2850Sstevel@tonic-gate 		else if (IS_SWAPVP(vp))
2860Sstevel@tonic-gate 			errstr = "statefile device must not "
2870Sstevel@tonic-gate 			    "be configured as swap file";
2880Sstevel@tonic-gate 		else
2890Sstevel@tonic-gate 			errstr = NULL;
2900Sstevel@tonic-gate 
2910Sstevel@tonic-gate 		VN_RELE(vp);
2920Sstevel@tonic-gate 		if (errstr) {
2930Sstevel@tonic-gate 			cpr_err(CE_CONT, "%s.\n", errstr);
2940Sstevel@tonic-gate 			return (ENOTSUP);
2950Sstevel@tonic-gate 		}
2960Sstevel@tonic-gate 
2970Sstevel@tonic-gate 		error = i_devname_to_promname(cf->cf_devfs, devpath,
2980Sstevel@tonic-gate 		    OBP_MAXPATHLEN);
2990Sstevel@tonic-gate 		if (error || strcmp(devpath, cf->cf_dev_prom)) {
3000Sstevel@tonic-gate 			cpr_err(CE_CONT, path_chg_fmt,
3010Sstevel@tonic-gate 			    cf->cf_dev_prom, devpath, rerun);
3020Sstevel@tonic-gate 		}
3030Sstevel@tonic-gate 		return (error);
3040Sstevel@tonic-gate 	case CFT_UFS:
3050Sstevel@tonic-gate 		break;		/* don't indent all the original code */
3060Sstevel@tonic-gate 	default:
3070Sstevel@tonic-gate 		cpr_err(CE_PANIC, "invalid cf_type");
3080Sstevel@tonic-gate 	}
3090Sstevel@tonic-gate 
3100Sstevel@tonic-gate 	/*
3110Sstevel@tonic-gate 	 * The original code for UFS statefile
3120Sstevel@tonic-gate 	 */
3130Sstevel@tonic-gate 	if (strlen(cf->cf_fs) + strlen(cf->cf_path) + 2 > sizeof (sfpath)) {
3140Sstevel@tonic-gate 		cpr_err(CE_CONT, long_name);
3150Sstevel@tonic-gate 		return (ENAMETOOLONG);
3160Sstevel@tonic-gate 	}
3170Sstevel@tonic-gate 
3180Sstevel@tonic-gate 	bzero(sfpath, sizeof (sfpath));
3190Sstevel@tonic-gate 	(void) strcpy(sfpath, cpr_cprconfig_to_path());
3200Sstevel@tonic-gate 
3210Sstevel@tonic-gate 	if (*sfpath != '/') {
3220Sstevel@tonic-gate 		cpr_err(CE_CONT, "Statefile pathname %s "
3230Sstevel@tonic-gate 		    "must begin with a /\n", sfpath);
3240Sstevel@tonic-gate 		return (EINVAL);
3250Sstevel@tonic-gate 	}
3260Sstevel@tonic-gate 
3270Sstevel@tonic-gate 	/*
3280Sstevel@tonic-gate 	 * Find the longest prefix of the statefile pathname which
3290Sstevel@tonic-gate 	 * is the mountpoint of a filesystem.  This string must
3300Sstevel@tonic-gate 	 * match the cf_fs field we read from the config file.  Other-
3310Sstevel@tonic-gate 	 * wise the user has changed things without running pmconfig.
3320Sstevel@tonic-gate 	 */
3330Sstevel@tonic-gate 	tail = longest = sfpath + 1;	/* pt beyond the leading "/" */
3340Sstevel@tonic-gate 	while ((slash = strchr(tail, '/')) != NULL) {
3350Sstevel@tonic-gate 		*slash = '\0';	  /* temporarily terminate the string */
3360Sstevel@tonic-gate 		if ((error = lookupname(sfpath,
3370Sstevel@tonic-gate 		    UIO_SYSSPACE, FOLLOW, NULLVPP, &vp)) != 0) {
3380Sstevel@tonic-gate 			*slash = '/';
3390Sstevel@tonic-gate 			cpr_err(CE_CONT, "A directory in the "
3400Sstevel@tonic-gate 			    "statefile path %s was not found.\n", sfpath);
3410Sstevel@tonic-gate 			VN_RELE(vp);
3420Sstevel@tonic-gate 
3430Sstevel@tonic-gate 			return (error);
3440Sstevel@tonic-gate 		}
3450Sstevel@tonic-gate 
3460Sstevel@tonic-gate 		vfs_list_read_lock();
3470Sstevel@tonic-gate 		vfsp = rootvfs;
3480Sstevel@tonic-gate 		do {
3490Sstevel@tonic-gate 			ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
3500Sstevel@tonic-gate 			if (ufsvfsp != NULL && ufsvfsp->vfs_root == vp) {
3510Sstevel@tonic-gate 				found = 1;
3520Sstevel@tonic-gate 				break;
3530Sstevel@tonic-gate 			}
3540Sstevel@tonic-gate 			vfsp = vfsp->vfs_next;
3550Sstevel@tonic-gate 		} while (vfsp != rootvfs);
3560Sstevel@tonic-gate 		vfs_list_unlock();
3570Sstevel@tonic-gate 
3580Sstevel@tonic-gate 		/*
3590Sstevel@tonic-gate 		 * If we have found a filesystem mounted on the current
3600Sstevel@tonic-gate 		 * path prefix, remember the end of the string in
3610Sstevel@tonic-gate 		 * "longest".  If it happens to be the the exact fs
3620Sstevel@tonic-gate 		 * saved in the configuration file, save the current
3630Sstevel@tonic-gate 		 * ufsvfsp so we can make additional checks further down.
3640Sstevel@tonic-gate 		 */
3650Sstevel@tonic-gate 		if (found) {
3660Sstevel@tonic-gate 			longest = slash;
3670Sstevel@tonic-gate 			if (strcmp(cf->cf_fs, sfpath) == 0) {
3680Sstevel@tonic-gate 				ufsvfsp_save = ufsvfsp;
3690Sstevel@tonic-gate 				vfsp_save = vfsp;
3700Sstevel@tonic-gate 			}
3710Sstevel@tonic-gate 			found = 0;
3720Sstevel@tonic-gate 		}
3730Sstevel@tonic-gate 
3740Sstevel@tonic-gate 		VN_RELE(vp);
3750Sstevel@tonic-gate 		*slash = '/';
3760Sstevel@tonic-gate 		tail = slash + 1;
3770Sstevel@tonic-gate 	}
3780Sstevel@tonic-gate 	*longest = '\0';
3790Sstevel@tonic-gate 	if (cpr_is_ufs(vfsp_save) == 0 || strcmp(cf->cf_fs, sfpath)) {
3800Sstevel@tonic-gate 		cpr_err(CE_CONT, "Filesystem containing "
3810Sstevel@tonic-gate 		    "the statefile when pmconfig was run (%s) has "
3820Sstevel@tonic-gate 		    "changed to %s. %s\n", cf->cf_fs, sfpath, rerun);
3830Sstevel@tonic-gate 		return (EINVAL);
3840Sstevel@tonic-gate 	}
3850Sstevel@tonic-gate 
3860Sstevel@tonic-gate 	if ((error = lookupname(cf->cf_devfs,
3870Sstevel@tonic-gate 	    UIO_SYSSPACE, FOLLOW, NULLVPP, &vp)) != 0) {
3880Sstevel@tonic-gate 		cpr_err(CE_CONT, lookup_fmt, cf->cf_devfs);
3890Sstevel@tonic-gate 		return (error);
3900Sstevel@tonic-gate 	}
3910Sstevel@tonic-gate 
3920Sstevel@tonic-gate 	if (ufsvfsp_save->vfs_devvp->v_rdev != vp->v_rdev) {
3930Sstevel@tonic-gate 		cpr_err(CE_CONT, "Filesystem containing "
3940Sstevel@tonic-gate 		    "statefile no longer mounted on device %s. "
3950Sstevel@tonic-gate 		    "See power.conf(4).", cf->cf_devfs);
3960Sstevel@tonic-gate 		VN_RELE(vp);
3970Sstevel@tonic-gate 		return (ENXIO);
3980Sstevel@tonic-gate 	}
3990Sstevel@tonic-gate 	VN_RELE(vp);
4000Sstevel@tonic-gate 
4010Sstevel@tonic-gate 	error = i_devname_to_promname(cf->cf_devfs, devpath, OBP_MAXPATHLEN);
4020Sstevel@tonic-gate 	if (error || strcmp(devpath, cf->cf_dev_prom)) {
4030Sstevel@tonic-gate 		cpr_err(CE_CONT, path_chg_fmt,
4040Sstevel@tonic-gate 		    cf->cf_dev_prom, devpath, rerun);
4050Sstevel@tonic-gate 		return (error);
4060Sstevel@tonic-gate 	}
4070Sstevel@tonic-gate 
4080Sstevel@tonic-gate 	return (0);
4090Sstevel@tonic-gate }
4100Sstevel@tonic-gate 
4110Sstevel@tonic-gate /*
4120Sstevel@tonic-gate  * Make sure that the statefile can be used as a block special statefile
4130Sstevel@tonic-gate  * (meaning that is exists and has nothing mounted on it)
4140Sstevel@tonic-gate  * Returns errno if not a valid statefile.
4150Sstevel@tonic-gate  */
4160Sstevel@tonic-gate int
4170Sstevel@tonic-gate cpr_check_spec_statefile(void)
4180Sstevel@tonic-gate {
4190Sstevel@tonic-gate 	int err;
4200Sstevel@tonic-gate 
4210Sstevel@tonic-gate 	if (err = cpr_get_config())
4220Sstevel@tonic-gate 		return (err);
4230Sstevel@tonic-gate 	ASSERT(cprconfig.cf_type == CFT_SPEC);
4240Sstevel@tonic-gate 
4250Sstevel@tonic-gate 	if (cprconfig.cf_devfs == NULL)
4260Sstevel@tonic-gate 		return (ENXIO);
4270Sstevel@tonic-gate 
4280Sstevel@tonic-gate 	return (cpr_verify_statefile_path());
4290Sstevel@tonic-gate 
4300Sstevel@tonic-gate }
4310Sstevel@tonic-gate 
4320Sstevel@tonic-gate int
4330Sstevel@tonic-gate cpr_alloc_statefile(int alloc_retry)
4340Sstevel@tonic-gate {
4350Sstevel@tonic-gate 	register int rc = 0;
4360Sstevel@tonic-gate 	char *str;
4370Sstevel@tonic-gate 
4380Sstevel@tonic-gate 	/*
4390Sstevel@tonic-gate 	 * Statefile size validation. If checkpoint the first time, disk blocks
4400Sstevel@tonic-gate 	 * allocation will be done; otherwise, just do file size check.
4410Sstevel@tonic-gate 	 * if statefile allocation is being retried, C_VP will be inited
4420Sstevel@tonic-gate 	 */
4430Sstevel@tonic-gate 	if (alloc_retry) {
4440Sstevel@tonic-gate 		str = "\n-->Retrying statefile allocation...";
4453446Smrj 		if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG7))
4463446Smrj 			prom_printf(str);
4470Sstevel@tonic-gate 		if (C_VP->v_type != VBLK)
4480Sstevel@tonic-gate 			(void) VOP_DUMPCTL(C_VP, DUMP_FREE, NULL);
4490Sstevel@tonic-gate 	} else {
4500Sstevel@tonic-gate 		/*
4510Sstevel@tonic-gate 		 * Open an exiting file for writing, the state file needs to be
4520Sstevel@tonic-gate 		 * pre-allocated since we can't and don't want to do allocation
4530Sstevel@tonic-gate 		 * during checkpoint (too much of the OS is disabled).
4540Sstevel@tonic-gate 		 *    - do a preliminary size checking here, if it is too small,
4550Sstevel@tonic-gate 		 *	allocate more space internally and retry.
4560Sstevel@tonic-gate 		 *    - check the vp to make sure it's the right type.
4570Sstevel@tonic-gate 		 */
4580Sstevel@tonic-gate 		char *path = cpr_build_statefile_path();
4590Sstevel@tonic-gate 
4600Sstevel@tonic-gate 		if (path == NULL)
4610Sstevel@tonic-gate 			return (ENXIO);
4620Sstevel@tonic-gate 		else if (rc = cpr_verify_statefile_path())
4630Sstevel@tonic-gate 			return (rc);
4640Sstevel@tonic-gate 
4650Sstevel@tonic-gate 		if (rc = vn_open(path, UIO_SYSSPACE,
4660Sstevel@tonic-gate 		    FCREAT|FWRITE, 0600, &C_VP, CRCREAT, 0)) {
4670Sstevel@tonic-gate 			cpr_err(CE_WARN, "cannot open statefile %s", path);
4680Sstevel@tonic-gate 			return (rc);
4690Sstevel@tonic-gate 		}
4700Sstevel@tonic-gate 	}
4710Sstevel@tonic-gate 
4720Sstevel@tonic-gate 	/*
4730Sstevel@tonic-gate 	 * Only ufs and block special statefiles supported
4740Sstevel@tonic-gate 	 */
4750Sstevel@tonic-gate 	if (C_VP->v_type != VREG && C_VP->v_type != VBLK) {
4760Sstevel@tonic-gate 		cpr_err(CE_CONT,
4770Sstevel@tonic-gate 		    "Statefile must be regular file or block special file.");
4780Sstevel@tonic-gate 		return (EACCES);
4790Sstevel@tonic-gate 	}
4800Sstevel@tonic-gate 
4810Sstevel@tonic-gate 	if (rc = cpr_statefile_ok(C_VP, alloc_retry))
4820Sstevel@tonic-gate 		return (rc);
4830Sstevel@tonic-gate 
4840Sstevel@tonic-gate 	if (C_VP->v_type != VBLK) {
4850Sstevel@tonic-gate 		/*
4860Sstevel@tonic-gate 		 * sync out the fs change due to the statefile reservation.
4870Sstevel@tonic-gate 		 */
4880Sstevel@tonic-gate 		(void) VFS_SYNC(C_VP->v_vfsp, 0, CRED());
4890Sstevel@tonic-gate 
4900Sstevel@tonic-gate 		/*
4910Sstevel@tonic-gate 		 * Validate disk blocks allocation for the state file.
4920Sstevel@tonic-gate 		 * Ask the file system prepare itself for the dump operation.
4930Sstevel@tonic-gate 		 */
4940Sstevel@tonic-gate 		if (rc = VOP_DUMPCTL(C_VP, DUMP_ALLOC, NULL)) {
4950Sstevel@tonic-gate 			cpr_err(CE_CONT, "Error allocating "
4960Sstevel@tonic-gate 			    "blocks for cpr statefile.");
4970Sstevel@tonic-gate 			return (rc);
4980Sstevel@tonic-gate 		}
4990Sstevel@tonic-gate 	}
5000Sstevel@tonic-gate 	return (0);
5010Sstevel@tonic-gate }
5020Sstevel@tonic-gate 
5030Sstevel@tonic-gate 
5040Sstevel@tonic-gate /*
505*4582Scth  * Lookup device size and return available space in bytes.
506*4582Scth  * NOTE: Since prop_op(9E) can't tell the difference between a character
507*4582Scth  * and a block reference, it is ok to ask for "Size" instead of "Nblocks".
5080Sstevel@tonic-gate  */
5090Sstevel@tonic-gate size_t
5100Sstevel@tonic-gate cpr_get_devsize(dev_t dev)
5110Sstevel@tonic-gate {
5120Sstevel@tonic-gate 	size_t bytes = 0;
5130Sstevel@tonic-gate 
514*4582Scth 	bytes = cdev_Size(dev);
515*4582Scth 	if (bytes == 0)
516*4582Scth 		bytes = cdev_size(dev);
5170Sstevel@tonic-gate 
5180Sstevel@tonic-gate 	if (bytes > CPR_SPEC_OFFSET)
5190Sstevel@tonic-gate 		bytes -= CPR_SPEC_OFFSET;
5200Sstevel@tonic-gate 	else
5210Sstevel@tonic-gate 		bytes = 0;
5220Sstevel@tonic-gate 
5230Sstevel@tonic-gate 	return (bytes);
5240Sstevel@tonic-gate }
5250Sstevel@tonic-gate 
5260Sstevel@tonic-gate 
5270Sstevel@tonic-gate /*
5280Sstevel@tonic-gate  * increase statefile size
5290Sstevel@tonic-gate  */
5300Sstevel@tonic-gate static int
5310Sstevel@tonic-gate cpr_grow_statefile(vnode_t *vp, u_longlong_t newsize)
5320Sstevel@tonic-gate {
5330Sstevel@tonic-gate 	extern uchar_t cpr_pagecopy[];
5340Sstevel@tonic-gate 	struct inode *ip = VTOI(vp);
5350Sstevel@tonic-gate 	u_longlong_t offset;
5360Sstevel@tonic-gate 	int error, increase;
5370Sstevel@tonic-gate 	ssize_t resid;
5380Sstevel@tonic-gate 
5390Sstevel@tonic-gate 	rw_enter(&ip->i_contents, RW_READER);
5400Sstevel@tonic-gate 	increase = (ip->i_size < newsize);
5410Sstevel@tonic-gate 	offset = ip->i_size;
5420Sstevel@tonic-gate 	rw_exit(&ip->i_contents);
5430Sstevel@tonic-gate 
5440Sstevel@tonic-gate 	if (increase == 0)
5450Sstevel@tonic-gate 		return (0);
5460Sstevel@tonic-gate 
5470Sstevel@tonic-gate 	/*
5480Sstevel@tonic-gate 	 * write to each logical block to reserve disk space
5490Sstevel@tonic-gate 	 */
5500Sstevel@tonic-gate 	error = 0;
5510Sstevel@tonic-gate 	cpr_pagecopy[0] = '1';
5520Sstevel@tonic-gate 	for (; offset < newsize; offset += ip->i_fs->fs_bsize) {
5530Sstevel@tonic-gate 		if (error = vn_rdwr(UIO_WRITE, vp, (caddr_t)cpr_pagecopy,
5540Sstevel@tonic-gate 		    ip->i_fs->fs_bsize, (offset_t)offset, UIO_SYSSPACE, 0,
5550Sstevel@tonic-gate 		    (rlim64_t)MAXOFF_T, CRED(), &resid)) {
5560Sstevel@tonic-gate 			if (error == ENOSPC) {
5570Sstevel@tonic-gate 				cpr_err(CE_WARN, "error %d while reserving "
5580Sstevel@tonic-gate 				    "disk space for statefile %s\n"
5590Sstevel@tonic-gate 				    "wanted %lld bytes, file is %lld short",
5600Sstevel@tonic-gate 				    error, cpr_cprconfig_to_path(),
5610Sstevel@tonic-gate 				    newsize, newsize - offset);
5620Sstevel@tonic-gate 			}
5630Sstevel@tonic-gate 			break;
5640Sstevel@tonic-gate 		}
5650Sstevel@tonic-gate 	}
5660Sstevel@tonic-gate 	return (error);
5670Sstevel@tonic-gate }
5680Sstevel@tonic-gate 
5690Sstevel@tonic-gate 
5700Sstevel@tonic-gate /*
5710Sstevel@tonic-gate  * do a simple estimate of the space needed to hold the statefile
5720Sstevel@tonic-gate  * taking compression into account, but be fairly conservative
5730Sstevel@tonic-gate  * so we have a better chance of completing; when dump fails,
5740Sstevel@tonic-gate  * the retry cost is fairly high.
5750Sstevel@tonic-gate  *
5760Sstevel@tonic-gate  * Do disk blocks allocation for the state file if no space has
5770Sstevel@tonic-gate  * been allocated yet. Since the state file will not be removed,
5780Sstevel@tonic-gate  * allocation should only be done once.
5790Sstevel@tonic-gate  */
5800Sstevel@tonic-gate static int
5810Sstevel@tonic-gate cpr_statefile_ok(vnode_t *vp, int alloc_retry)
5820Sstevel@tonic-gate {
5830Sstevel@tonic-gate 	extern size_t cpr_bitmap_size;
5840Sstevel@tonic-gate 	struct inode *ip = VTOI(vp);
5850Sstevel@tonic-gate 	const int UCOMP_RATE = 20; /* comp. ratio*10 for user pages */
5860Sstevel@tonic-gate 	u_longlong_t size, isize, ksize, raw_data;
5870Sstevel@tonic-gate 	char *str, *est_fmt;
5880Sstevel@tonic-gate 	size_t space;
5890Sstevel@tonic-gate 	int error;
5900Sstevel@tonic-gate 
5910Sstevel@tonic-gate 	/*
5920Sstevel@tonic-gate 	 * number of pages short for swapping.
5930Sstevel@tonic-gate 	 */
5940Sstevel@tonic-gate 	STAT->cs_nosw_pages = k_anoninfo.ani_mem_resv;
5950Sstevel@tonic-gate 	if (STAT->cs_nosw_pages < 0)
5960Sstevel@tonic-gate 		STAT->cs_nosw_pages = 0;
5970Sstevel@tonic-gate 
5980Sstevel@tonic-gate 	str = "cpr_statefile_ok:";
5990Sstevel@tonic-gate 
6003446Smrj 	CPR_DEBUG(CPR_DEBUG9, "Phys swap: max=%lu resv=%lu\n",
6013446Smrj 	    k_anoninfo.ani_max, k_anoninfo.ani_phys_resv);
6023446Smrj 	CPR_DEBUG(CPR_DEBUG9, "Mem swap: max=%ld resv=%lu\n",
6030Sstevel@tonic-gate 	    MAX(availrmem - swapfs_minfree, 0),
6043446Smrj 	    k_anoninfo.ani_mem_resv);
6053446Smrj 	CPR_DEBUG(CPR_DEBUG9, "Total available swap: %ld\n",
606*4582Scth 	    CURRENT_TOTAL_AVAILABLE_SWAP);
6070Sstevel@tonic-gate 
6080Sstevel@tonic-gate 	/*
6090Sstevel@tonic-gate 	 * try increasing filesize by 15%
6100Sstevel@tonic-gate 	 */
6110Sstevel@tonic-gate 	if (alloc_retry) {
6120Sstevel@tonic-gate 		/*
6130Sstevel@tonic-gate 		 * block device doesn't get any bigger
6140Sstevel@tonic-gate 		 */
6150Sstevel@tonic-gate 		if (vp->v_type == VBLK) {
6163446Smrj 			if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6))
6173446Smrj 				prom_printf(
6183446Smrj 				    "Retry statefile on special file\n");
6190Sstevel@tonic-gate 			return (ENOMEM);
6200Sstevel@tonic-gate 		} else {
6210Sstevel@tonic-gate 			rw_enter(&ip->i_contents, RW_READER);
6220Sstevel@tonic-gate 			size = (ip->i_size * SIZE_RATE) / INTEGRAL;
6230Sstevel@tonic-gate 			rw_exit(&ip->i_contents);
6240Sstevel@tonic-gate 		}
6253446Smrj 		if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6))
6263446Smrj 			prom_printf("Retry statefile size = %lld\n", size);
6270Sstevel@tonic-gate 	} else {
6280Sstevel@tonic-gate 		u_longlong_t cpd_size;
6290Sstevel@tonic-gate 		pgcnt_t npages, nback;
6300Sstevel@tonic-gate 		int ndvram;
6310Sstevel@tonic-gate 
6320Sstevel@tonic-gate 		ndvram = 0;
633931Smathue 		(void) callb_execute_class(CB_CL_CPR_FB,
634931Smathue 		    (int)(uintptr_t)&ndvram);
6353446Smrj 		if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6))
6363446Smrj 			prom_printf("ndvram size = %d\n", ndvram);
6370Sstevel@tonic-gate 
6380Sstevel@tonic-gate 		/*
6390Sstevel@tonic-gate 		 * estimate 1 cpd_t for every (CPR_MAXCONTIG / 2) pages
6400Sstevel@tonic-gate 		 */
6410Sstevel@tonic-gate 		npages = cpr_count_kpages(REGULAR_BITMAP, cpr_nobit);
6420Sstevel@tonic-gate 		cpd_size = sizeof (cpd_t) * (npages / (CPR_MAXCONTIG / 2));
6430Sstevel@tonic-gate 		raw_data = cpd_size + cpr_bitmap_size;
6440Sstevel@tonic-gate 		ksize = ndvram + mmu_ptob(npages);
6450Sstevel@tonic-gate 
6460Sstevel@tonic-gate 		est_fmt = "%s estimated size with "
6470Sstevel@tonic-gate 		    "%scompression %lld, ksize %lld\n";
6480Sstevel@tonic-gate 		nback = mmu_ptob(STAT->cs_nosw_pages);
6490Sstevel@tonic-gate 		if (CPR->c_flags & C_COMPRESSING) {
6500Sstevel@tonic-gate 			size = ((ksize * COMPRESS_PERCENT) / INTEGRAL) +
6510Sstevel@tonic-gate 			    raw_data + ((nback * 10) / UCOMP_RATE);
6523446Smrj 			CPR_DEBUG(CPR_DEBUG1, est_fmt, str, "", size, ksize);
6530Sstevel@tonic-gate 		} else {
6540Sstevel@tonic-gate 			size = ksize + raw_data + nback;
6553446Smrj 			CPR_DEBUG(CPR_DEBUG1, est_fmt, str, "no ",
6563446Smrj 			    size, ksize);
6570Sstevel@tonic-gate 		}
6580Sstevel@tonic-gate 	}
6590Sstevel@tonic-gate 
6600Sstevel@tonic-gate 	/*
6610Sstevel@tonic-gate 	 * All this is much simpler for a block device
6620Sstevel@tonic-gate 	 */
6630Sstevel@tonic-gate 	if (vp->v_type == VBLK) {
6640Sstevel@tonic-gate 		space = cpr_get_devsize(vp->v_rdev);
6653446Smrj 		if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6))
6663446Smrj 			prom_printf("statefile dev size %lu\n", space);
6670Sstevel@tonic-gate 
6680Sstevel@tonic-gate 		/*
6690Sstevel@tonic-gate 		 * Export the estimated filesize info, this value will be
6700Sstevel@tonic-gate 		 * compared before dumping out the statefile in the case of
6710Sstevel@tonic-gate 		 * no compression.
6720Sstevel@tonic-gate 		 */
6730Sstevel@tonic-gate 		STAT->cs_est_statefsz = size;
6743446Smrj 		if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6))
6753446Smrj 			prom_printf("%s Estimated statefile size %llu, "
6763446Smrj 			    "space %lu\n", str, size, space);
6770Sstevel@tonic-gate 		if (size > space) {
6780Sstevel@tonic-gate 			cpr_err(CE_CONT, "Statefile partition too small.");
6790Sstevel@tonic-gate 			return (ENOMEM);
6800Sstevel@tonic-gate 		}
6810Sstevel@tonic-gate 		return (0);
6820Sstevel@tonic-gate 	} else {
6830Sstevel@tonic-gate 		if (CPR->c_alloc_cnt++ > C_MAX_ALLOC_RETRY) {
6840Sstevel@tonic-gate 			cpr_err(CE_CONT, "Statefile allocation retry failed\n");
6850Sstevel@tonic-gate 			return (ENOMEM);
6860Sstevel@tonic-gate 		}
6870Sstevel@tonic-gate 
6880Sstevel@tonic-gate 		/*
6890Sstevel@tonic-gate 		 * Estimate space needed for the state file.
6900Sstevel@tonic-gate 		 *
6910Sstevel@tonic-gate 		 * State file size in bytes:
6920Sstevel@tonic-gate 		 * 	kernel size + non-cache pte seg +
6930Sstevel@tonic-gate 		 *	bitmap size + cpr state file headers size
6940Sstevel@tonic-gate 		 * (round up to fs->fs_bsize)
6950Sstevel@tonic-gate 		 */
6960Sstevel@tonic-gate 		size = blkroundup(ip->i_fs, size);
6970Sstevel@tonic-gate 
6980Sstevel@tonic-gate 		/*
6990Sstevel@tonic-gate 		 * Export the estimated filesize info, this value will be
7000Sstevel@tonic-gate 		 * compared before dumping out the statefile in the case of
7010Sstevel@tonic-gate 		 * no compression.
7020Sstevel@tonic-gate 		 */
7030Sstevel@tonic-gate 		STAT->cs_est_statefsz = size;
7040Sstevel@tonic-gate 		error = cpr_grow_statefile(vp, size);
7053446Smrj 		if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6)) {
7060Sstevel@tonic-gate 			rw_enter(&ip->i_contents, RW_READER);
7070Sstevel@tonic-gate 			isize = ip->i_size;
7080Sstevel@tonic-gate 			rw_exit(&ip->i_contents);
7093446Smrj 			prom_printf("%s Estimated statefile size %lld, "
7103446Smrj 			    "i_size %lld\n", str, size, isize);
7110Sstevel@tonic-gate 		}
7120Sstevel@tonic-gate 
7130Sstevel@tonic-gate 		return (error);
7140Sstevel@tonic-gate 	}
7150Sstevel@tonic-gate }
7160Sstevel@tonic-gate 
7170Sstevel@tonic-gate 
7180Sstevel@tonic-gate void
7190Sstevel@tonic-gate cpr_statef_close(void)
7200Sstevel@tonic-gate {
7210Sstevel@tonic-gate 	if (C_VP) {
7220Sstevel@tonic-gate 		if (!cpr_reusable_mode)
7230Sstevel@tonic-gate 			(void) VOP_DUMPCTL(C_VP, DUMP_FREE, NULL);
7240Sstevel@tonic-gate 		(void) VOP_CLOSE(C_VP, FWRITE, 1, (offset_t)0, CRED());
7250Sstevel@tonic-gate 		VN_RELE(C_VP);
7260Sstevel@tonic-gate 		C_VP = 0;
7270Sstevel@tonic-gate 	}
7280Sstevel@tonic-gate }
7290Sstevel@tonic-gate 
7300Sstevel@tonic-gate 
7310Sstevel@tonic-gate /*
7320Sstevel@tonic-gate  * open cpr default file and display error
7330Sstevel@tonic-gate  */
7340Sstevel@tonic-gate int
7350Sstevel@tonic-gate cpr_open_deffile(int mode, vnode_t **vpp)
7360Sstevel@tonic-gate {
7370Sstevel@tonic-gate 	int error;
7380Sstevel@tonic-gate 
7390Sstevel@tonic-gate 	if (error = cpr_open(cpr_default_path, mode, vpp))
7400Sstevel@tonic-gate 		cpr_err(CE_CONT, "cannot open \"%s\", error %d\n",
7410Sstevel@tonic-gate 		    cpr_default_path, error);
7420Sstevel@tonic-gate 	return (error);
7430Sstevel@tonic-gate }
7440Sstevel@tonic-gate 
7450Sstevel@tonic-gate 
7460Sstevel@tonic-gate /*
7470Sstevel@tonic-gate  * write cdef_t to disk.  This contains the original values of prom
7480Sstevel@tonic-gate  * properties that we modify.  We fill in the magic number of the file
7490Sstevel@tonic-gate  * here as a signal to the booter code that the state file is valid.
7500Sstevel@tonic-gate  * Be sure the file gets synced, since we may be shutting down the OS.
7510Sstevel@tonic-gate  */
7520Sstevel@tonic-gate int
7530Sstevel@tonic-gate cpr_write_deffile(cdef_t *cdef)
7540Sstevel@tonic-gate {
7550Sstevel@tonic-gate 	struct vnode *vp;
7560Sstevel@tonic-gate 	char *str;
7570Sstevel@tonic-gate 	int rc;
7580Sstevel@tonic-gate 
7590Sstevel@tonic-gate 	if (rc = cpr_open_deffile(FCREAT|FWRITE, &vp))
7600Sstevel@tonic-gate 		return (rc);
7610Sstevel@tonic-gate 
7620Sstevel@tonic-gate 	if (rc = cpr_rdwr(UIO_WRITE, vp, cdef, sizeof (*cdef)))
7630Sstevel@tonic-gate 		str = "write";
7640Sstevel@tonic-gate 	else if (rc = VOP_FSYNC(vp, FSYNC, CRED()))
7650Sstevel@tonic-gate 		str = "fsync";
7660Sstevel@tonic-gate 	(void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, CRED());
7670Sstevel@tonic-gate 	VN_RELE(vp);
7680Sstevel@tonic-gate 
7690Sstevel@tonic-gate 	if (rc) {
7700Sstevel@tonic-gate 		cpr_err(CE_WARN, "%s error %d, file \"%s\"",
7710Sstevel@tonic-gate 		    str, rc, cpr_default_path);
7720Sstevel@tonic-gate 	}
7730Sstevel@tonic-gate 	return (rc);
7740Sstevel@tonic-gate }
7750Sstevel@tonic-gate 
7760Sstevel@tonic-gate /*
7770Sstevel@tonic-gate  * Clear the magic number in the defaults file.  This tells the booter
7780Sstevel@tonic-gate  * program that the state file is not current and thus prevents
7790Sstevel@tonic-gate  * any attempt to restore from an obsolete state file.
7800Sstevel@tonic-gate  */
7810Sstevel@tonic-gate void
7820Sstevel@tonic-gate cpr_clear_definfo(void)
7830Sstevel@tonic-gate {
7840Sstevel@tonic-gate 	struct vnode *vp;
7850Sstevel@tonic-gate 	cmini_t mini;
7860Sstevel@tonic-gate 
7870Sstevel@tonic-gate 	if ((CPR->c_cprboot_magic != CPR_DEFAULT_MAGIC) ||
7880Sstevel@tonic-gate 	    cpr_open_deffile(FCREAT|FWRITE, &vp))
7890Sstevel@tonic-gate 		return;
7900Sstevel@tonic-gate 	mini.magic = mini.reusable = 0;
7910Sstevel@tonic-gate 	(void) cpr_rdwr(UIO_WRITE, vp, &mini, sizeof (mini));
7920Sstevel@tonic-gate 	(void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, CRED());
7930Sstevel@tonic-gate 	VN_RELE(vp);
7940Sstevel@tonic-gate }
7950Sstevel@tonic-gate 
7960Sstevel@tonic-gate /*
7970Sstevel@tonic-gate  * If the cpr default file is invalid, then we must not be in reusable mode
7980Sstevel@tonic-gate  * if it is valid, it tells us our mode
7990Sstevel@tonic-gate  */
8000Sstevel@tonic-gate int
8010Sstevel@tonic-gate cpr_get_reusable_mode(void)
8020Sstevel@tonic-gate {
8030Sstevel@tonic-gate 	struct vnode *vp;
8040Sstevel@tonic-gate 	cmini_t mini;
8050Sstevel@tonic-gate 	int rc;
8060Sstevel@tonic-gate 
8070Sstevel@tonic-gate 	if (cpr_open(cpr_default_path, FREAD, &vp))
8080Sstevel@tonic-gate 		return (0);
8090Sstevel@tonic-gate 
8100Sstevel@tonic-gate 	rc = cpr_rdwr(UIO_READ, vp, &mini, sizeof (mini));
8110Sstevel@tonic-gate 	(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED());
8120Sstevel@tonic-gate 	VN_RELE(vp);
8130Sstevel@tonic-gate 	if (rc == 0 && mini.magic == CPR_DEFAULT_MAGIC)
8140Sstevel@tonic-gate 		return (mini.reusable);
8150Sstevel@tonic-gate 
8160Sstevel@tonic-gate 	return (0);
8170Sstevel@tonic-gate }
8180Sstevel@tonic-gate 
8190Sstevel@tonic-gate /*
8200Sstevel@tonic-gate  * clock/time related routines
8210Sstevel@tonic-gate  */
8220Sstevel@tonic-gate static time_t   cpr_time_stamp;
8230Sstevel@tonic-gate 
8240Sstevel@tonic-gate 
8250Sstevel@tonic-gate void
8260Sstevel@tonic-gate cpr_tod_get(cpr_time_t *ctp)
8270Sstevel@tonic-gate {
8280Sstevel@tonic-gate 	timestruc_t ts;
8290Sstevel@tonic-gate 
8300Sstevel@tonic-gate 	mutex_enter(&tod_lock);
8310Sstevel@tonic-gate 	ts = tod_get();
8320Sstevel@tonic-gate 	mutex_exit(&tod_lock);
8330Sstevel@tonic-gate 	ctp->tv_sec = (time32_t)ts.tv_sec;
8340Sstevel@tonic-gate 	ctp->tv_nsec = (int32_t)ts.tv_nsec;
8350Sstevel@tonic-gate }
8360Sstevel@tonic-gate 
8370Sstevel@tonic-gate void
8380Sstevel@tonic-gate cpr_tod_fault_reset(void)
8390Sstevel@tonic-gate {
8400Sstevel@tonic-gate 	mutex_enter(&tod_lock);
8410Sstevel@tonic-gate 	tod_fault_reset();
8420Sstevel@tonic-gate 	mutex_exit(&tod_lock);
8430Sstevel@tonic-gate }
8440Sstevel@tonic-gate 
8450Sstevel@tonic-gate void
8460Sstevel@tonic-gate cpr_save_time(void)
8470Sstevel@tonic-gate {
8480Sstevel@tonic-gate 	cpr_time_stamp = gethrestime_sec();
8490Sstevel@tonic-gate }
8500Sstevel@tonic-gate 
8510Sstevel@tonic-gate /*
8520Sstevel@tonic-gate  * correct time based on saved time stamp or hardware clock
8530Sstevel@tonic-gate  */
8540Sstevel@tonic-gate void
8550Sstevel@tonic-gate cpr_restore_time(void)
8560Sstevel@tonic-gate {
8570Sstevel@tonic-gate 	clkset(cpr_time_stamp);
8580Sstevel@tonic-gate }
8590Sstevel@tonic-gate 
8600Sstevel@tonic-gate /*
8610Sstevel@tonic-gate  * CPU ONLINE/OFFLINE CODE
8620Sstevel@tonic-gate  */
8630Sstevel@tonic-gate int
8640Sstevel@tonic-gate cpr_mp_offline(void)
8650Sstevel@tonic-gate {
8660Sstevel@tonic-gate 	cpu_t *cp, *bootcpu;
8670Sstevel@tonic-gate 	int rc = 0;
8680Sstevel@tonic-gate 	int brought_up_boot = 0;
8690Sstevel@tonic-gate 
8700Sstevel@tonic-gate 	/*
8710Sstevel@tonic-gate 	 * Do nothing for UP.
8720Sstevel@tonic-gate 	 */
8730Sstevel@tonic-gate 	if (ncpus == 1)
8740Sstevel@tonic-gate 		return (0);
8750Sstevel@tonic-gate 
8760Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
8770Sstevel@tonic-gate 
8780Sstevel@tonic-gate 	cpr_save_mp_state();
8790Sstevel@tonic-gate 
8800Sstevel@tonic-gate 	bootcpu = i_cpr_bootcpu();
8810Sstevel@tonic-gate 	if (!CPU_ACTIVE(bootcpu)) {
8820Sstevel@tonic-gate 		if ((rc = cpr_p_online(bootcpu, CPU_CPR_ONLINE))) {
8830Sstevel@tonic-gate 			mutex_exit(&cpu_lock);
8840Sstevel@tonic-gate 			return (rc);
8850Sstevel@tonic-gate 		}
8860Sstevel@tonic-gate 		brought_up_boot = 1;
8870Sstevel@tonic-gate 	}
8880Sstevel@tonic-gate 
8890Sstevel@tonic-gate 	cp = cpu_list;
8900Sstevel@tonic-gate 	do {
8910Sstevel@tonic-gate 		if (cp == bootcpu)
8920Sstevel@tonic-gate 			continue;
8930Sstevel@tonic-gate 		if (cp->cpu_flags & CPU_OFFLINE)
8940Sstevel@tonic-gate 			continue;
8950Sstevel@tonic-gate 		if ((rc = cpr_p_online(cp, CPU_CPR_OFFLINE))) {
8960Sstevel@tonic-gate 			mutex_exit(&cpu_lock);
8970Sstevel@tonic-gate 			return (rc);
8980Sstevel@tonic-gate 		}
8990Sstevel@tonic-gate 	} while ((cp = cp->cpu_next) != cpu_list);
9003446Smrj 	if (brought_up_boot && (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6)))
9013446Smrj 		prom_printf("changed cpu %p to state %d\n",
9023446Smrj 		    bootcpu, CPU_CPR_ONLINE);
9030Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
9040Sstevel@tonic-gate 
9050Sstevel@tonic-gate 	return (rc);
9060Sstevel@tonic-gate }
9070Sstevel@tonic-gate 
9080Sstevel@tonic-gate int
9090Sstevel@tonic-gate cpr_mp_online(void)
9100Sstevel@tonic-gate {
9110Sstevel@tonic-gate 	cpu_t *cp, *bootcpu = CPU;
9120Sstevel@tonic-gate 	int rc = 0;
9130Sstevel@tonic-gate 
9140Sstevel@tonic-gate 	/*
9150Sstevel@tonic-gate 	 * Do nothing for UP.
9160Sstevel@tonic-gate 	 */
9170Sstevel@tonic-gate 	if (ncpus == 1)
9180Sstevel@tonic-gate 		return (0);
9190Sstevel@tonic-gate 
9200Sstevel@tonic-gate 	/*
9210Sstevel@tonic-gate 	 * cpr_save_mp_state() sets CPU_CPR_ONLINE in cpu_cpr_flags
9220Sstevel@tonic-gate 	 * to indicate a cpu was online at the time of cpr_suspend();
9230Sstevel@tonic-gate 	 * now restart those cpus that were marked as CPU_CPR_ONLINE
9240Sstevel@tonic-gate 	 * and actually are offline.
9250Sstevel@tonic-gate 	 */
9260Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
9270Sstevel@tonic-gate 	for (cp = bootcpu->cpu_next; cp != bootcpu; cp = cp->cpu_next) {
9280Sstevel@tonic-gate 		/*
9290Sstevel@tonic-gate 		 * Clear the CPU_FROZEN flag in all cases.
9300Sstevel@tonic-gate 		 */
9310Sstevel@tonic-gate 		cp->cpu_flags &= ~CPU_FROZEN;
9320Sstevel@tonic-gate 
9330Sstevel@tonic-gate 		if (CPU_CPR_IS_OFFLINE(cp))
9340Sstevel@tonic-gate 			continue;
9350Sstevel@tonic-gate 		if (CPU_ACTIVE(cp))
9360Sstevel@tonic-gate 			continue;
9370Sstevel@tonic-gate 		if ((rc = cpr_p_online(cp, CPU_CPR_ONLINE))) {
9380Sstevel@tonic-gate 			mutex_exit(&cpu_lock);
9390Sstevel@tonic-gate 			return (rc);
9400Sstevel@tonic-gate 		}
9410Sstevel@tonic-gate 	}
9420Sstevel@tonic-gate 
9430Sstevel@tonic-gate 	/*
9440Sstevel@tonic-gate 	 * turn off the boot cpu if it was offlined
9450Sstevel@tonic-gate 	 */
9460Sstevel@tonic-gate 	if (CPU_CPR_IS_OFFLINE(bootcpu)) {
9470Sstevel@tonic-gate 		if ((rc = cpr_p_online(bootcpu, CPU_CPR_OFFLINE))) {
9480Sstevel@tonic-gate 			mutex_exit(&cpu_lock);
9490Sstevel@tonic-gate 			return (rc);
9500Sstevel@tonic-gate 		}
9510Sstevel@tonic-gate 	}
9520Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
9530Sstevel@tonic-gate 	return (0);
9540Sstevel@tonic-gate }
9550Sstevel@tonic-gate 
9560Sstevel@tonic-gate static void
9570Sstevel@tonic-gate cpr_save_mp_state(void)
9580Sstevel@tonic-gate {
9590Sstevel@tonic-gate 	cpu_t *cp;
9600Sstevel@tonic-gate 
9610Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
9620Sstevel@tonic-gate 
9630Sstevel@tonic-gate 	cp = cpu_list;
9640Sstevel@tonic-gate 	do {
9650Sstevel@tonic-gate 		cp->cpu_cpr_flags &= ~CPU_CPR_ONLINE;
9660Sstevel@tonic-gate 		if (CPU_ACTIVE(cp))
9670Sstevel@tonic-gate 			CPU_SET_CPR_FLAGS(cp, CPU_CPR_ONLINE);
9680Sstevel@tonic-gate 	} while ((cp = cp->cpu_next) != cpu_list);
9690Sstevel@tonic-gate }
9700Sstevel@tonic-gate 
9710Sstevel@tonic-gate /*
9720Sstevel@tonic-gate  * change cpu to online/offline
9730Sstevel@tonic-gate  */
9740Sstevel@tonic-gate static int
9750Sstevel@tonic-gate cpr_p_online(cpu_t *cp, int state)
9760Sstevel@tonic-gate {
9770Sstevel@tonic-gate 	int rc;
9780Sstevel@tonic-gate 
9790Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
9800Sstevel@tonic-gate 
9810Sstevel@tonic-gate 	switch (state) {
9820Sstevel@tonic-gate 	case CPU_CPR_ONLINE:
9830Sstevel@tonic-gate 		rc = cpu_online(cp);
9840Sstevel@tonic-gate 		break;
9850Sstevel@tonic-gate 	case CPU_CPR_OFFLINE:
9860Sstevel@tonic-gate 		rc = cpu_offline(cp, CPU_FORCED);
9870Sstevel@tonic-gate 		break;
9880Sstevel@tonic-gate 	}
9890Sstevel@tonic-gate 	if (rc) {
9900Sstevel@tonic-gate 		cpr_err(CE_WARN, "Failed to change processor %d to "
9910Sstevel@tonic-gate 		    "state %d, (errno %d)", cp->cpu_id, state, rc);
9920Sstevel@tonic-gate 	}
9930Sstevel@tonic-gate 	return (rc);
9940Sstevel@tonic-gate }
9950Sstevel@tonic-gate 
9960Sstevel@tonic-gate /*
9970Sstevel@tonic-gate  * Construct the pathname of the state file and return a pointer to
9980Sstevel@tonic-gate  * caller.  Read the config file to get the mount point of the
9990Sstevel@tonic-gate  * filesystem and the pathname within fs.
10000Sstevel@tonic-gate  */
10010Sstevel@tonic-gate char *
10020Sstevel@tonic-gate cpr_build_statefile_path(void)
10030Sstevel@tonic-gate {
10040Sstevel@tonic-gate 	struct cprconfig *cf = &cprconfig;
10050Sstevel@tonic-gate 
10060Sstevel@tonic-gate 	if (cpr_get_config())
10070Sstevel@tonic-gate 		return (NULL);
10080Sstevel@tonic-gate 
10090Sstevel@tonic-gate 	switch (cf->cf_type) {
10100Sstevel@tonic-gate 	case CFT_UFS:
10110Sstevel@tonic-gate 		if (strlen(cf->cf_path) + strlen(cf->cf_fs) >= MAXNAMELEN - 1) {
10120Sstevel@tonic-gate 			cpr_err(CE_CONT, "Statefile path is too long.\n");
10130Sstevel@tonic-gate 			return (NULL);
10140Sstevel@tonic-gate 		}
10150Sstevel@tonic-gate 		return (cpr_cprconfig_to_path());
10160Sstevel@tonic-gate 	case CFT_SPEC:
10170Sstevel@tonic-gate 		return (cf->cf_devfs);
10180Sstevel@tonic-gate 	default:
10190Sstevel@tonic-gate 		cpr_err(CE_PANIC, "invalid statefile type");
10200Sstevel@tonic-gate 		/*NOTREACHED*/
1021931Smathue 		return (NULL);
10220Sstevel@tonic-gate 	}
10230Sstevel@tonic-gate }
10240Sstevel@tonic-gate 
10250Sstevel@tonic-gate int
10260Sstevel@tonic-gate cpr_statefile_is_spec(void)
10270Sstevel@tonic-gate {
10280Sstevel@tonic-gate 	if (cpr_get_config())
10290Sstevel@tonic-gate 		return (0);
10300Sstevel@tonic-gate 	return (cprconfig.cf_type == CFT_SPEC);
10310Sstevel@tonic-gate }
10320Sstevel@tonic-gate 
10330Sstevel@tonic-gate char *
10340Sstevel@tonic-gate cpr_get_statefile_prom_path(void)
10350Sstevel@tonic-gate {
10360Sstevel@tonic-gate 	struct cprconfig *cf = &cprconfig;
10370Sstevel@tonic-gate 
10380Sstevel@tonic-gate 	ASSERT(cprconfig_loaded);
10390Sstevel@tonic-gate 	ASSERT(cf->cf_magic == CPR_CONFIG_MAGIC);
10400Sstevel@tonic-gate 	ASSERT(cf->cf_type == CFT_SPEC);
10410Sstevel@tonic-gate 	return (cf->cf_dev_prom);
10420Sstevel@tonic-gate }
10430Sstevel@tonic-gate 
10440Sstevel@tonic-gate 
10450Sstevel@tonic-gate /*
10460Sstevel@tonic-gate  * XXX The following routines need to be in the vfs source code.
10470Sstevel@tonic-gate  */
10480Sstevel@tonic-gate 
10490Sstevel@tonic-gate int
10500Sstevel@tonic-gate cpr_is_ufs(struct vfs *vfsp)
10510Sstevel@tonic-gate {
10520Sstevel@tonic-gate 	char *fsname;
10530Sstevel@tonic-gate 
10540Sstevel@tonic-gate 	fsname = vfssw[vfsp->vfs_fstype].vsw_name;
10550Sstevel@tonic-gate 	return (strcmp(fsname, "ufs") == 0);
10560Sstevel@tonic-gate }
10570Sstevel@tonic-gate 
10580Sstevel@tonic-gate /*
10590Sstevel@tonic-gate  * This is a list of file systems that are allowed to be writeable when a
10600Sstevel@tonic-gate  * reusable statefile checkpoint is taken.  They must not have any state that
10610Sstevel@tonic-gate  * cannot be restored to consistency by simply rebooting using the checkpoint.
10620Sstevel@tonic-gate  * (In contrast to ufs, cachefs and pcfs which have disk state that could get
10630Sstevel@tonic-gate  * out of sync with the in-kernel data).
10640Sstevel@tonic-gate  */
10650Sstevel@tonic-gate int
10660Sstevel@tonic-gate cpr_reusable_mount_check(void)
10670Sstevel@tonic-gate {
10680Sstevel@tonic-gate 	struct vfs *vfsp;
10690Sstevel@tonic-gate 	char *fsname;
10700Sstevel@tonic-gate 	char **cpp;
10710Sstevel@tonic-gate 	static char *cpr_writeok_fss[] = {
10720Sstevel@tonic-gate 		"autofs", "devfs", "fd", "lofs", "mntfs", "namefs", "nfs",
10732621Sllai1 		"proc", "tmpfs", "ctfs", "objfs", "dev", NULL
10740Sstevel@tonic-gate 	};
10750Sstevel@tonic-gate 
10760Sstevel@tonic-gate 	vfs_list_read_lock();
10770Sstevel@tonic-gate 	vfsp = rootvfs;
10780Sstevel@tonic-gate 	do {
10790Sstevel@tonic-gate 		if (vfsp->vfs_flag & VFS_RDONLY) {
10800Sstevel@tonic-gate 			vfsp = vfsp->vfs_next;
10810Sstevel@tonic-gate 			continue;
10820Sstevel@tonic-gate 		}
10830Sstevel@tonic-gate 		fsname = vfssw[vfsp->vfs_fstype].vsw_name;
10840Sstevel@tonic-gate 		for (cpp = cpr_writeok_fss; *cpp; cpp++) {
10850Sstevel@tonic-gate 			if (strcmp(fsname, *cpp) == 0)
10860Sstevel@tonic-gate 				break;
10870Sstevel@tonic-gate 		}
10880Sstevel@tonic-gate 		/*
10890Sstevel@tonic-gate 		 * if the inner loop reached the NULL terminator,
10900Sstevel@tonic-gate 		 * the current fs-type does not match any OK-type
10910Sstevel@tonic-gate 		 */
10920Sstevel@tonic-gate 		if (*cpp == NULL) {
10930Sstevel@tonic-gate 			cpr_err(CE_CONT, "a filesystem of type %s is "
10940Sstevel@tonic-gate 			    "mounted read/write.\nReusable statefile requires "
10950Sstevel@tonic-gate 			    "no writeable filesystem of this type be mounted\n",
10960Sstevel@tonic-gate 			    fsname);
10970Sstevel@tonic-gate 			vfs_list_unlock();
10980Sstevel@tonic-gate 			return (EINVAL);
10990Sstevel@tonic-gate 		}
11000Sstevel@tonic-gate 		vfsp = vfsp->vfs_next;
11010Sstevel@tonic-gate 	} while (vfsp != rootvfs);
11020Sstevel@tonic-gate 	vfs_list_unlock();
11030Sstevel@tonic-gate 	return (0);
11040Sstevel@tonic-gate }
11050Sstevel@tonic-gate 
11060Sstevel@tonic-gate /*
11070Sstevel@tonic-gate  * Force a fresh read of the cprinfo per uadmin 3 call
11080Sstevel@tonic-gate  */
11090Sstevel@tonic-gate void
11100Sstevel@tonic-gate cpr_forget_cprconfig(void)
11110Sstevel@tonic-gate {
11120Sstevel@tonic-gate 	cprconfig_loaded = 0;
11130Sstevel@tonic-gate }
11140Sstevel@tonic-gate 
11150Sstevel@tonic-gate 
11160Sstevel@tonic-gate /*
11170Sstevel@tonic-gate  * return statefile offset in DEV_BSIZE units
11180Sstevel@tonic-gate  */
11190Sstevel@tonic-gate int
11200Sstevel@tonic-gate cpr_statefile_offset(void)
11210Sstevel@tonic-gate {
11220Sstevel@tonic-gate 	return (cpr_statefile_is_spec() ? btod(CPR_SPEC_OFFSET) : 0);
11230Sstevel@tonic-gate }
1124