xref: /onnv-gate/usr/src/uts/common/fs/ufs/ufs_lockfs.c (revision 427:81cdc815223e)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
50Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
60Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
70Sstevel@tonic-gate  * with the License.
80Sstevel@tonic-gate  *
90Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
100Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
110Sstevel@tonic-gate  * See the License for the specific language governing permissions
120Sstevel@tonic-gate  * and limitations under the License.
130Sstevel@tonic-gate  *
140Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
150Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
160Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
170Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
180Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
190Sstevel@tonic-gate  *
200Sstevel@tonic-gate  * CDDL HEADER END
210Sstevel@tonic-gate  */
220Sstevel@tonic-gate /*
230Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate  * Use is subject to license terms.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
280Sstevel@tonic-gate 
290Sstevel@tonic-gate #include <sys/types.h>
300Sstevel@tonic-gate #include <sys/t_lock.h>
310Sstevel@tonic-gate #include <sys/param.h>
320Sstevel@tonic-gate #include <sys/time.h>
330Sstevel@tonic-gate #include <sys/systm.h>
340Sstevel@tonic-gate #include <sys/sysmacros.h>
350Sstevel@tonic-gate #include <sys/resource.h>
360Sstevel@tonic-gate #include <sys/signal.h>
370Sstevel@tonic-gate #include <sys/cred.h>
380Sstevel@tonic-gate #include <sys/user.h>
390Sstevel@tonic-gate #include <sys/buf.h>
400Sstevel@tonic-gate #include <sys/vfs.h>
410Sstevel@tonic-gate #include <sys/vnode.h>
420Sstevel@tonic-gate #include <sys/proc.h>
430Sstevel@tonic-gate #include <sys/disp.h>
440Sstevel@tonic-gate #include <sys/file.h>
450Sstevel@tonic-gate #include <sys/fcntl.h>
460Sstevel@tonic-gate #include <sys/flock.h>
47329Saguzovsk #include <sys/atomic.h>
480Sstevel@tonic-gate #include <sys/kmem.h>
490Sstevel@tonic-gate #include <sys/uio.h>
500Sstevel@tonic-gate #include <sys/conf.h>
510Sstevel@tonic-gate #include <sys/mman.h>
520Sstevel@tonic-gate #include <sys/pathname.h>
530Sstevel@tonic-gate #include <sys/debug.h>
540Sstevel@tonic-gate #include <sys/vmmeter.h>
550Sstevel@tonic-gate #include <sys/vmsystm.h>
560Sstevel@tonic-gate #include <sys/cmn_err.h>
570Sstevel@tonic-gate #include <sys/vtrace.h>
580Sstevel@tonic-gate #include <sys/acct.h>
590Sstevel@tonic-gate #include <sys/dnlc.h>
600Sstevel@tonic-gate #include <sys/swap.h>
610Sstevel@tonic-gate 
620Sstevel@tonic-gate #include <sys/fs/ufs_fs.h>
630Sstevel@tonic-gate #include <sys/fs/ufs_inode.h>
640Sstevel@tonic-gate #include <sys/fs/ufs_fsdir.h>
650Sstevel@tonic-gate #include <sys/fs/ufs_trans.h>
660Sstevel@tonic-gate #include <sys/fs/ufs_panic.h>
670Sstevel@tonic-gate #include <sys/fs/ufs_mount.h>
680Sstevel@tonic-gate #include <sys/fs/ufs_bio.h>
690Sstevel@tonic-gate #include <sys/fs/ufs_log.h>
700Sstevel@tonic-gate #include <sys/fs/ufs_quota.h>
710Sstevel@tonic-gate #include <sys/dirent.h>		/* must be AFTER <sys/fs/fsdir.h>! */
720Sstevel@tonic-gate #include <sys/errno.h>
730Sstevel@tonic-gate #include <sys/sysinfo.h>
740Sstevel@tonic-gate 
750Sstevel@tonic-gate #include <vm/hat.h>
760Sstevel@tonic-gate #include <vm/pvn.h>
770Sstevel@tonic-gate #include <vm/as.h>
780Sstevel@tonic-gate #include <vm/seg.h>
790Sstevel@tonic-gate #include <vm/seg_map.h>
800Sstevel@tonic-gate #include <vm/seg_vn.h>
810Sstevel@tonic-gate #include <vm/rm.h>
820Sstevel@tonic-gate #include <vm/anon.h>
830Sstevel@tonic-gate #include <sys/swap.h>
840Sstevel@tonic-gate #include <sys/dnlc.h>
850Sstevel@tonic-gate 
860Sstevel@tonic-gate extern struct vnode *common_specvp(struct vnode *vp);
870Sstevel@tonic-gate 
880Sstevel@tonic-gate /* error lock status */
890Sstevel@tonic-gate #define	UN_ERRLCK	(-1)
900Sstevel@tonic-gate #define	SET_ERRLCK	1
910Sstevel@tonic-gate #define	RE_ERRLCK	2
920Sstevel@tonic-gate #define	NO_ERRLCK	0
930Sstevel@tonic-gate 
940Sstevel@tonic-gate /*
950Sstevel@tonic-gate  * Index to be used in TSD for storing lockfs data
960Sstevel@tonic-gate  */
970Sstevel@tonic-gate uint_t ufs_lockfs_key;
980Sstevel@tonic-gate 
990Sstevel@tonic-gate typedef struct _ulockfs_info {
1000Sstevel@tonic-gate 	struct _ulockfs_info *next;
1010Sstevel@tonic-gate 	struct ulockfs *ulp;
1020Sstevel@tonic-gate } ulockfs_info_t;
1030Sstevel@tonic-gate 
1040Sstevel@tonic-gate /*
1050Sstevel@tonic-gate  * Check in TSD that whether we are already doing any VOP on this filesystem
1060Sstevel@tonic-gate  */
1070Sstevel@tonic-gate #define	IS_REC_VOP(found, head, ulp, free)		\
1080Sstevel@tonic-gate {							\
1090Sstevel@tonic-gate 	ulockfs_info_t *_curr;				\
1100Sstevel@tonic-gate 							\
1110Sstevel@tonic-gate 	for (found = 0, free = NULL, _curr = head;	\
1120Sstevel@tonic-gate 	    _curr != NULL; _curr = _curr->next) {	\
1130Sstevel@tonic-gate 		if ((free == NULL) &&			\
1140Sstevel@tonic-gate 		    (_curr->ulp == NULL))		\
1150Sstevel@tonic-gate 			free = _curr;			\
1160Sstevel@tonic-gate 		if (_curr->ulp == ulp) {		\
1170Sstevel@tonic-gate 			found = 1;			\
1180Sstevel@tonic-gate 			break;				\
1190Sstevel@tonic-gate 		}					\
1200Sstevel@tonic-gate 	}						\
1210Sstevel@tonic-gate }
1220Sstevel@tonic-gate 
1230Sstevel@tonic-gate /*
1240Sstevel@tonic-gate  * Get the lockfs data from TSD so that lockfs handles the recursive VOP
1250Sstevel@tonic-gate  * properly
1260Sstevel@tonic-gate  */
1270Sstevel@tonic-gate #define	SEARCH_ULOCKFSP(head, ulp, info)		\
1280Sstevel@tonic-gate {							\
1290Sstevel@tonic-gate 	ulockfs_info_t *_curr;				\
1300Sstevel@tonic-gate 							\
1310Sstevel@tonic-gate 	for (_curr = head; _curr != NULL;		\
1320Sstevel@tonic-gate 	    _curr = _curr->next) {			\
1330Sstevel@tonic-gate 		if (_curr->ulp == ulp) {		\
1340Sstevel@tonic-gate 			break;				\
1350Sstevel@tonic-gate 		}					\
1360Sstevel@tonic-gate 	}						\
1370Sstevel@tonic-gate 							\
1380Sstevel@tonic-gate 	info = _curr;					\
1390Sstevel@tonic-gate }
1400Sstevel@tonic-gate 
1410Sstevel@tonic-gate /*
1420Sstevel@tonic-gate  * Validate lockfs request
1430Sstevel@tonic-gate  */
1440Sstevel@tonic-gate static int
1450Sstevel@tonic-gate ufs_getlfd(
1460Sstevel@tonic-gate 	struct lockfs *lockfsp,		/* new lock request */
1470Sstevel@tonic-gate 	struct lockfs *ul_lockfsp)	/* old lock state */
1480Sstevel@tonic-gate {
1490Sstevel@tonic-gate 	int	error = 0;
1500Sstevel@tonic-gate 
1510Sstevel@tonic-gate 	/*
1520Sstevel@tonic-gate 	 * no input flags defined
1530Sstevel@tonic-gate 	 */
1540Sstevel@tonic-gate 	if (lockfsp->lf_flags != 0) {
1550Sstevel@tonic-gate 		error = EINVAL;
1560Sstevel@tonic-gate 		goto errout;
1570Sstevel@tonic-gate 	}
1580Sstevel@tonic-gate 
1590Sstevel@tonic-gate 	/*
1600Sstevel@tonic-gate 	 * check key
1610Sstevel@tonic-gate 	 */
1620Sstevel@tonic-gate 	if (!LOCKFS_IS_ULOCK(ul_lockfsp))
1630Sstevel@tonic-gate 		if (lockfsp->lf_key != ul_lockfsp->lf_key) {
1640Sstevel@tonic-gate 			error = EINVAL;
1650Sstevel@tonic-gate 			goto errout;
1660Sstevel@tonic-gate 	}
1670Sstevel@tonic-gate 
1680Sstevel@tonic-gate 	lockfsp->lf_key = ul_lockfsp->lf_key + 1;
1690Sstevel@tonic-gate 
1700Sstevel@tonic-gate errout:
1710Sstevel@tonic-gate 	return (error);
1720Sstevel@tonic-gate }
1730Sstevel@tonic-gate 
1740Sstevel@tonic-gate /*
1750Sstevel@tonic-gate  * ufs_checkaccton
1760Sstevel@tonic-gate  *	check if accounting is turned on on this fs
1770Sstevel@tonic-gate  */
1780Sstevel@tonic-gate 
1790Sstevel@tonic-gate int
1800Sstevel@tonic-gate ufs_checkaccton(struct vnode *vp)
1810Sstevel@tonic-gate {
1820Sstevel@tonic-gate 	if (acct_fs_in_use(vp))
1830Sstevel@tonic-gate 		return (EDEADLK);
1840Sstevel@tonic-gate 	return (0);
1850Sstevel@tonic-gate }
1860Sstevel@tonic-gate 
1870Sstevel@tonic-gate /*
1880Sstevel@tonic-gate  * ufs_checkswapon
1890Sstevel@tonic-gate  *	check if local swapping is to file on this fs
1900Sstevel@tonic-gate  */
1910Sstevel@tonic-gate int
1920Sstevel@tonic-gate ufs_checkswapon(struct vnode *vp)
1930Sstevel@tonic-gate {
1940Sstevel@tonic-gate 	struct swapinfo	*sip;
1950Sstevel@tonic-gate 
1960Sstevel@tonic-gate 	mutex_enter(&swapinfo_lock);
1970Sstevel@tonic-gate 	for (sip = swapinfo; sip; sip = sip->si_next)
1980Sstevel@tonic-gate 		if (sip->si_vp->v_vfsp == vp->v_vfsp) {
1990Sstevel@tonic-gate 			mutex_exit(&swapinfo_lock);
2000Sstevel@tonic-gate 			return (EDEADLK);
2010Sstevel@tonic-gate 		}
2020Sstevel@tonic-gate 	mutex_exit(&swapinfo_lock);
2030Sstevel@tonic-gate 	return (0);
2040Sstevel@tonic-gate }
2050Sstevel@tonic-gate 
2060Sstevel@tonic-gate /*
2070Sstevel@tonic-gate  * ufs_freeze
2080Sstevel@tonic-gate  *	pend future accesses for current lock and desired lock
2090Sstevel@tonic-gate  */
2100Sstevel@tonic-gate void
2110Sstevel@tonic-gate ufs_freeze(struct ulockfs *ulp, struct lockfs *lockfsp)
2120Sstevel@tonic-gate {
2130Sstevel@tonic-gate 	/*
2140Sstevel@tonic-gate 	 * set to new lock type
2150Sstevel@tonic-gate 	 */
2160Sstevel@tonic-gate 	ulp->ul_lockfs.lf_lock = lockfsp->lf_lock;
2170Sstevel@tonic-gate 	ulp->ul_lockfs.lf_key = lockfsp->lf_key;
2180Sstevel@tonic-gate 	ulp->ul_lockfs.lf_comlen = lockfsp->lf_comlen;
2190Sstevel@tonic-gate 	ulp->ul_lockfs.lf_comment = lockfsp->lf_comment;
2200Sstevel@tonic-gate 
2210Sstevel@tonic-gate 	ulp->ul_fs_lock = (1 << ulp->ul_lockfs.lf_lock);
2220Sstevel@tonic-gate }
2230Sstevel@tonic-gate 
2240Sstevel@tonic-gate /*
225329Saguzovsk  * All callers of ufs_quiesce() atomically increment ufs_quiesce_pend before
226329Saguzovsk  * starting ufs_quiesce() protocol and decrement it only when a file system no
227329Saguzovsk  * longer has to be in quiescent state. This allows ufs_pageio() to detect
228329Saguzovsk  * that another thread wants to quiesce a file system. See more comments in
229329Saguzovsk  * ufs_pageio().
230329Saguzovsk  */
231329Saguzovsk ulong_t ufs_quiesce_pend = 0;
232329Saguzovsk 
233329Saguzovsk /*
2340Sstevel@tonic-gate  * ufs_quiesce
2350Sstevel@tonic-gate  *	wait for outstanding accesses to finish
2360Sstevel@tonic-gate  */
2370Sstevel@tonic-gate int
2380Sstevel@tonic-gate ufs_quiesce(struct ulockfs *ulp)
2390Sstevel@tonic-gate {
2400Sstevel@tonic-gate 	int error = 0;
2410Sstevel@tonic-gate 
2420Sstevel@tonic-gate 	/*
2430Sstevel@tonic-gate 	 * Set a softlock to suspend future ufs_vnops so that
2440Sstevel@tonic-gate 	 * this lockfs request will not be starved
2450Sstevel@tonic-gate 	 */
2460Sstevel@tonic-gate 	ULOCKFS_SET_SLOCK(ulp);
247329Saguzovsk 	ASSERT(ufs_quiesce_pend);
2480Sstevel@tonic-gate 
2490Sstevel@tonic-gate 	/* check if there is any outstanding ufs vnodeops calls */
2500Sstevel@tonic-gate 	while (ulp->ul_vnops_cnt)
251329Saguzovsk 		/*
252329Saguzovsk 		 * use timed version of cv_wait_sig() to make sure we don't
253329Saguzovsk 		 * miss a wake up call from ufs_pageio() when it doesn't use
254329Saguzovsk 		 * ul_lock.
255329Saguzovsk 		 */
256329Saguzovsk 		if (!cv_timedwait_sig(&ulp->ul_cv, &ulp->ul_lock, lbolt + hz)) {
2570Sstevel@tonic-gate 			error = EINTR;
2580Sstevel@tonic-gate 			goto out;
2590Sstevel@tonic-gate 		}
2600Sstevel@tonic-gate 
2610Sstevel@tonic-gate out:
2620Sstevel@tonic-gate 	/*
2630Sstevel@tonic-gate 	 * unlock the soft lock
2640Sstevel@tonic-gate 	 */
2650Sstevel@tonic-gate 	ULOCKFS_CLR_SLOCK(ulp);
2660Sstevel@tonic-gate 
2670Sstevel@tonic-gate 	return (error);
2680Sstevel@tonic-gate }
2690Sstevel@tonic-gate /*
2700Sstevel@tonic-gate  * ufs_flush_inode
2710Sstevel@tonic-gate  */
2720Sstevel@tonic-gate int
2730Sstevel@tonic-gate ufs_flush_inode(struct inode *ip, void *arg)
2740Sstevel@tonic-gate {
2750Sstevel@tonic-gate 	int	error;
2760Sstevel@tonic-gate 	int	saverror	= 0;
2770Sstevel@tonic-gate 
2780Sstevel@tonic-gate 	/*
2790Sstevel@tonic-gate 	 * wrong file system; keep looking
2800Sstevel@tonic-gate 	 */
2810Sstevel@tonic-gate 	if (ip->i_ufsvfs != (struct ufsvfs *)arg)
2820Sstevel@tonic-gate 		return (0);
2830Sstevel@tonic-gate 
2840Sstevel@tonic-gate 	/*
2850Sstevel@tonic-gate 	 * asynchronously push all the dirty pages
2860Sstevel@tonic-gate 	 */
2870Sstevel@tonic-gate 	if (((error = TRANS_SYNCIP(ip, B_ASYNC, 0, TOP_SYNCIP_FLUSHI)) != 0) &&
2880Sstevel@tonic-gate 	    (error != EAGAIN))
2890Sstevel@tonic-gate 		saverror = error;
2900Sstevel@tonic-gate 	/*
2910Sstevel@tonic-gate 	 * wait for io and discard all mappings
2920Sstevel@tonic-gate 	 */
2930Sstevel@tonic-gate 	if (error = TRANS_SYNCIP(ip, B_INVAL, 0, TOP_SYNCIP_FLUSHI))
2940Sstevel@tonic-gate 		saverror = error;
2950Sstevel@tonic-gate 
2960Sstevel@tonic-gate 	if (ITOV(ip)->v_type == VDIR) {
2970Sstevel@tonic-gate 		dnlc_dir_purge(&ip->i_danchor);
2980Sstevel@tonic-gate 	}
2990Sstevel@tonic-gate 
3000Sstevel@tonic-gate 	return (saverror);
3010Sstevel@tonic-gate }
3020Sstevel@tonic-gate 
3030Sstevel@tonic-gate /*
3040Sstevel@tonic-gate  * ufs_flush
3050Sstevel@tonic-gate  *	Flush everything that is currently dirty; this includes invalidating
3060Sstevel@tonic-gate  *	any mappings.
3070Sstevel@tonic-gate  */
3080Sstevel@tonic-gate int
3090Sstevel@tonic-gate ufs_flush(struct vfs *vfsp)
3100Sstevel@tonic-gate {
3110Sstevel@tonic-gate 	int		error;
3120Sstevel@tonic-gate 	int		saverror = 0;
3130Sstevel@tonic-gate 	struct ufsvfs	*ufsvfsp	= (struct ufsvfs *)vfsp->vfs_data;
3140Sstevel@tonic-gate 	struct fs	*fs		= ufsvfsp->vfs_fs;
3150Sstevel@tonic-gate 
3160Sstevel@tonic-gate 	ASSERT(vfs_lock_held(vfsp));
3170Sstevel@tonic-gate 
3180Sstevel@tonic-gate 	/*
3190Sstevel@tonic-gate 	 * purge dnlc
3200Sstevel@tonic-gate 	 */
3210Sstevel@tonic-gate 	(void) dnlc_purge_vfsp(vfsp, 0);
3220Sstevel@tonic-gate 
3230Sstevel@tonic-gate 	/*
3240Sstevel@tonic-gate 	 * drain the delete and idle threads
3250Sstevel@tonic-gate 	 */
3260Sstevel@tonic-gate 	ufs_delete_drain(vfsp, 0, 0);
3270Sstevel@tonic-gate 	ufs_idle_drain(vfsp);
3280Sstevel@tonic-gate 
3290Sstevel@tonic-gate 	/*
3300Sstevel@tonic-gate 	 * flush and invalidate quota records
3310Sstevel@tonic-gate 	 */
3320Sstevel@tonic-gate 	(void) qsync(ufsvfsp);
3330Sstevel@tonic-gate 
3340Sstevel@tonic-gate 	/*
3350Sstevel@tonic-gate 	 * flush w/invalidate the inodes for vfsp
3360Sstevel@tonic-gate 	 */
3370Sstevel@tonic-gate 	if (error = ufs_scan_inodes(0, ufs_flush_inode, ufsvfsp, ufsvfsp))
3380Sstevel@tonic-gate 		saverror = error;
3390Sstevel@tonic-gate 
3400Sstevel@tonic-gate 	/*
3410Sstevel@tonic-gate 	 * synchronously flush superblock and summary info
3420Sstevel@tonic-gate 	 */
3430Sstevel@tonic-gate 	if (fs->fs_ronly == 0 && fs->fs_fmod) {
3440Sstevel@tonic-gate 		fs->fs_fmod = 0;
3450Sstevel@tonic-gate 		TRANS_SBUPDATE(ufsvfsp, vfsp, TOP_SBUPDATE_FLUSH);
3460Sstevel@tonic-gate 	}
3470Sstevel@tonic-gate 	/*
3480Sstevel@tonic-gate 	 * flush w/invalidate block device pages and buf cache
3490Sstevel@tonic-gate 	 */
3500Sstevel@tonic-gate 	if ((error = VOP_PUTPAGE(common_specvp(ufsvfsp->vfs_devvp),
3510Sstevel@tonic-gate 	    (offset_t)0, 0, B_INVAL, CRED())) > 0)
3520Sstevel@tonic-gate 		saverror = error;
3530Sstevel@tonic-gate 
3540Sstevel@tonic-gate 	(void) bflush((dev_t)vfsp->vfs_dev);
3550Sstevel@tonic-gate 	(void) bfinval((dev_t)vfsp->vfs_dev, 0);
3560Sstevel@tonic-gate 
3570Sstevel@tonic-gate 	/*
3580Sstevel@tonic-gate 	 * drain the delete and idle threads again
3590Sstevel@tonic-gate 	 */
3600Sstevel@tonic-gate 	ufs_delete_drain(vfsp, 0, 0);
3610Sstevel@tonic-gate 	ufs_idle_drain(vfsp);
3620Sstevel@tonic-gate 
3630Sstevel@tonic-gate 	/*
3640Sstevel@tonic-gate 	 * play with the clean flag
3650Sstevel@tonic-gate 	 */
3660Sstevel@tonic-gate 	if (saverror == 0)
3670Sstevel@tonic-gate 		ufs_checkclean(vfsp);
3680Sstevel@tonic-gate 
3690Sstevel@tonic-gate 	/*
370*427Sdduvall 	 * flush any outstanding transactions and roll the log
3710Sstevel@tonic-gate 	 */
3720Sstevel@tonic-gate 	if (TRANS_ISTRANS(ufsvfsp)) {
373*427Sdduvall 		curthread->t_flag |= T_DONTBLOCK;
374*427Sdduvall 		TRANS_BEGIN_SYNC(ufsvfsp, TOP_COMMIT_FLUSH, TOP_COMMIT_SIZE,
375*427Sdduvall 		    error);
376*427Sdduvall 		if (!error) {
377*427Sdduvall 			TRANS_END_SYNC(ufsvfsp, saverror, TOP_COMMIT_FLUSH,
378*427Sdduvall 			    TOP_COMMIT_SIZE);
379*427Sdduvall 		}
380*427Sdduvall 		curthread->t_flag &= ~T_DONTBLOCK;
381406Sbatschul 
382*427Sdduvall 		logmap_roll_dev(ufsvfsp->vfs_log); /* fully roll the log */
3830Sstevel@tonic-gate 	}
3840Sstevel@tonic-gate 
3850Sstevel@tonic-gate 	return (saverror);
3860Sstevel@tonic-gate }
3870Sstevel@tonic-gate 
3880Sstevel@tonic-gate /*
3890Sstevel@tonic-gate  * ufs_thaw_wlock
3900Sstevel@tonic-gate  *	special processing when thawing down to wlock
3910Sstevel@tonic-gate  */
3920Sstevel@tonic-gate static int
3930Sstevel@tonic-gate ufs_thaw_wlock(struct inode *ip, void *arg)
3940Sstevel@tonic-gate {
3950Sstevel@tonic-gate 	/*
3960Sstevel@tonic-gate 	 * wrong file system; keep looking
3970Sstevel@tonic-gate 	 */
3980Sstevel@tonic-gate 	if (ip->i_ufsvfs != (struct ufsvfs *)arg)
3990Sstevel@tonic-gate 		return (0);
4000Sstevel@tonic-gate 
4010Sstevel@tonic-gate 	/*
4020Sstevel@tonic-gate 	 * iupdat refuses to clear flags if the fs is read only.  The fs
4030Sstevel@tonic-gate 	 * may become read/write during the lock and we wouldn't want
4040Sstevel@tonic-gate 	 * these inodes being written to disk.  So clear the flags.
4050Sstevel@tonic-gate 	 */
4060Sstevel@tonic-gate 	rw_enter(&ip->i_contents, RW_WRITER);
4070Sstevel@tonic-gate 	ip->i_flag &= ~(IMOD|IMODACC|IACC|IUPD|ICHG|IATTCHG);
4080Sstevel@tonic-gate 	rw_exit(&ip->i_contents);
4090Sstevel@tonic-gate 
4100Sstevel@tonic-gate 	/*
4110Sstevel@tonic-gate 	 * pages are mlocked -- fail wlock
4120Sstevel@tonic-gate 	 */
4130Sstevel@tonic-gate 	if (ITOV(ip)->v_type != VCHR && vn_has_cached_data(ITOV(ip)))
4140Sstevel@tonic-gate 		return (EBUSY);
4150Sstevel@tonic-gate 
4160Sstevel@tonic-gate 	return (0);
4170Sstevel@tonic-gate }
4180Sstevel@tonic-gate 
4190Sstevel@tonic-gate /*
4200Sstevel@tonic-gate  * ufs_thaw_hlock
4210Sstevel@tonic-gate  *	special processing when thawing down to hlock or elock
4220Sstevel@tonic-gate  */
4230Sstevel@tonic-gate static int
4240Sstevel@tonic-gate ufs_thaw_hlock(struct inode *ip, void *arg)
4250Sstevel@tonic-gate {
4260Sstevel@tonic-gate 	struct vnode	*vp	= ITOV(ip);
4270Sstevel@tonic-gate 
4280Sstevel@tonic-gate 	/*
4290Sstevel@tonic-gate 	 * wrong file system; keep looking
4300Sstevel@tonic-gate 	 */
4310Sstevel@tonic-gate 	if (ip->i_ufsvfs != (struct ufsvfs *)arg)
4320Sstevel@tonic-gate 		return (0);
4330Sstevel@tonic-gate 
4340Sstevel@tonic-gate 	/*
4350Sstevel@tonic-gate 	 * blow away all pages - even if they are mlocked
4360Sstevel@tonic-gate 	 */
4370Sstevel@tonic-gate 	do {
4380Sstevel@tonic-gate 		(void) TRANS_SYNCIP(ip, B_INVAL | B_FORCE, 0, TOP_SYNCIP_HLOCK);
4390Sstevel@tonic-gate 	} while ((vp->v_type != VCHR) && vn_has_cached_data(vp));
4400Sstevel@tonic-gate 	rw_enter(&ip->i_contents, RW_WRITER);
4410Sstevel@tonic-gate 	ip->i_flag &= ~(IMOD|IMODACC|IACC|IUPD|ICHG|IATTCHG);
4420Sstevel@tonic-gate 	rw_exit(&ip->i_contents);
4430Sstevel@tonic-gate 
4440Sstevel@tonic-gate 	return (0);
4450Sstevel@tonic-gate }
4460Sstevel@tonic-gate 
4470Sstevel@tonic-gate /*
4480Sstevel@tonic-gate  * ufs_thaw
4490Sstevel@tonic-gate  *	thaw file system lock down to current value
4500Sstevel@tonic-gate  */
4510Sstevel@tonic-gate int
4520Sstevel@tonic-gate ufs_thaw(struct vfs *vfsp, struct ufsvfs *ufsvfsp, struct ulockfs *ulp)
4530Sstevel@tonic-gate {
4540Sstevel@tonic-gate 	int		error	= 0;
4550Sstevel@tonic-gate 	int		noidel	= (int)(ulp->ul_flag & ULOCKFS_NOIDEL);
4560Sstevel@tonic-gate 
4570Sstevel@tonic-gate 	/*
4580Sstevel@tonic-gate 	 * if wlock or hlock or elock
4590Sstevel@tonic-gate 	 */
4600Sstevel@tonic-gate 	if (ULOCKFS_IS_WLOCK(ulp) || ULOCKFS_IS_HLOCK(ulp) ||
4610Sstevel@tonic-gate 	    ULOCKFS_IS_ELOCK(ulp)) {
4620Sstevel@tonic-gate 
4630Sstevel@tonic-gate 		/*
4640Sstevel@tonic-gate 		 * don't keep access times
4650Sstevel@tonic-gate 		 * don't free deleted files
4660Sstevel@tonic-gate 		 * if superblock writes are allowed, limit them to me for now
4670Sstevel@tonic-gate 		 */
4680Sstevel@tonic-gate 		ulp->ul_flag |= (ULOCKFS_NOIACC|ULOCKFS_NOIDEL);
4690Sstevel@tonic-gate 		if (ulp->ul_sbowner != (kthread_id_t)-1)
4700Sstevel@tonic-gate 			ulp->ul_sbowner = curthread;
4710Sstevel@tonic-gate 
4720Sstevel@tonic-gate 		/*
4730Sstevel@tonic-gate 		 * wait for writes for deleted files and superblock updates
4740Sstevel@tonic-gate 		 */
4750Sstevel@tonic-gate 		(void) ufs_flush(vfsp);
4760Sstevel@tonic-gate 
4770Sstevel@tonic-gate 		/*
4780Sstevel@tonic-gate 		 * now make sure the quota file is up-to-date
4790Sstevel@tonic-gate 		 *	expensive; but effective
4800Sstevel@tonic-gate 		 */
4810Sstevel@tonic-gate 		error = ufs_flush(vfsp);
4820Sstevel@tonic-gate 		/*
4830Sstevel@tonic-gate 		 * no one can write the superblock
4840Sstevel@tonic-gate 		 */
4850Sstevel@tonic-gate 		ulp->ul_sbowner = (kthread_id_t)-1;
4860Sstevel@tonic-gate 
4870Sstevel@tonic-gate 		/*
4880Sstevel@tonic-gate 		 * special processing for wlock/hlock/elock
4890Sstevel@tonic-gate 		 */
4900Sstevel@tonic-gate 		if (ULOCKFS_IS_WLOCK(ulp)) {
4910Sstevel@tonic-gate 			if (error)
4920Sstevel@tonic-gate 				goto errout;
4930Sstevel@tonic-gate 			error = bfinval(ufsvfsp->vfs_dev, 0);
4940Sstevel@tonic-gate 			if (error)
4950Sstevel@tonic-gate 				goto errout;
4960Sstevel@tonic-gate 			error = ufs_scan_inodes(0, ufs_thaw_wlock,
4970Sstevel@tonic-gate 					(void *)ufsvfsp, ufsvfsp);
4980Sstevel@tonic-gate 			if (error)
4990Sstevel@tonic-gate 				goto errout;
5000Sstevel@tonic-gate 		}
5010Sstevel@tonic-gate 		if (ULOCKFS_IS_HLOCK(ulp) || ULOCKFS_IS_ELOCK(ulp)) {
5020Sstevel@tonic-gate 			error = 0;
5030Sstevel@tonic-gate 			(void) ufs_scan_inodes(0, ufs_thaw_hlock,
5040Sstevel@tonic-gate 					(void *)ufsvfsp, ufsvfsp);
5050Sstevel@tonic-gate 			(void) bfinval(ufsvfsp->vfs_dev, 1);
5060Sstevel@tonic-gate 		}
5070Sstevel@tonic-gate 	} else {
5080Sstevel@tonic-gate 
5090Sstevel@tonic-gate 		/*
5100Sstevel@tonic-gate 		 * okay to keep access times
5110Sstevel@tonic-gate 		 * okay to free deleted files
5120Sstevel@tonic-gate 		 * okay to write the superblock
5130Sstevel@tonic-gate 		 */
5140Sstevel@tonic-gate 		ulp->ul_flag &= ~(ULOCKFS_NOIACC|ULOCKFS_NOIDEL);
5150Sstevel@tonic-gate 		ulp->ul_sbowner = NULL;
5160Sstevel@tonic-gate 
5170Sstevel@tonic-gate 		/*
5180Sstevel@tonic-gate 		 * flush in case deleted files are in memory
5190Sstevel@tonic-gate 		 */
5200Sstevel@tonic-gate 		if (noidel) {
5210Sstevel@tonic-gate 			if (error = ufs_flush(vfsp))
5220Sstevel@tonic-gate 				goto errout;
5230Sstevel@tonic-gate 		}
5240Sstevel@tonic-gate 	}
5250Sstevel@tonic-gate 
5260Sstevel@tonic-gate errout:
5270Sstevel@tonic-gate 	cv_broadcast(&ulp->ul_cv);
5280Sstevel@tonic-gate 	return (error);
5290Sstevel@tonic-gate }
5300Sstevel@tonic-gate 
5310Sstevel@tonic-gate /*
5320Sstevel@tonic-gate  * ufs_reconcile_fs
5330Sstevel@tonic-gate  *	reconcile incore superblock with ondisk superblock
5340Sstevel@tonic-gate  */
5350Sstevel@tonic-gate int
5360Sstevel@tonic-gate ufs_reconcile_fs(struct vfs *vfsp, struct ufsvfs *ufsvfsp, int errlck)
5370Sstevel@tonic-gate {
5380Sstevel@tonic-gate 	struct fs	*mfs; 	/* in-memory superblock */
5390Sstevel@tonic-gate 	struct fs	*dfs;	/* on-disk   superblock */
5400Sstevel@tonic-gate 	struct buf	*bp;	/* on-disk   superblock buf */
5410Sstevel@tonic-gate 	int		 needs_unlock;
5420Sstevel@tonic-gate 	char		 finished_fsclean;
5430Sstevel@tonic-gate 
5440Sstevel@tonic-gate 	mfs = ufsvfsp->vfs_fs;
5450Sstevel@tonic-gate 
5460Sstevel@tonic-gate 	/*
5470Sstevel@tonic-gate 	 * get the on-disk copy of the superblock
5480Sstevel@tonic-gate 	 */
5490Sstevel@tonic-gate 	bp = UFS_BREAD(ufsvfsp, vfsp->vfs_dev, SBLOCK, SBSIZE);
5500Sstevel@tonic-gate 	bp->b_flags |= (B_STALE|B_AGE);
5510Sstevel@tonic-gate 	if (bp->b_flags & B_ERROR) {
5520Sstevel@tonic-gate 		brelse(bp);
5530Sstevel@tonic-gate 		return (EIO);
5540Sstevel@tonic-gate 	}
5550Sstevel@tonic-gate 	dfs = bp->b_un.b_fs;
5560Sstevel@tonic-gate 
5570Sstevel@tonic-gate 	/* error locks may only unlock after the fs has been made consistent */
5580Sstevel@tonic-gate 	if (errlck == UN_ERRLCK) {
5590Sstevel@tonic-gate 		if (dfs->fs_clean == FSFIX) {	/* being repaired */
5600Sstevel@tonic-gate 			brelse(bp);
5610Sstevel@tonic-gate 			return (EAGAIN);
5620Sstevel@tonic-gate 		}
5630Sstevel@tonic-gate 		/* repair not yet started? */
5640Sstevel@tonic-gate 		finished_fsclean = TRANS_ISTRANS(ufsvfsp)? FSLOG: FSCLEAN;
5650Sstevel@tonic-gate 		if (dfs->fs_clean != finished_fsclean) {
5660Sstevel@tonic-gate 			brelse(bp);
5670Sstevel@tonic-gate 			return (EBUSY);
5680Sstevel@tonic-gate 		}
5690Sstevel@tonic-gate 	}
5700Sstevel@tonic-gate 
5710Sstevel@tonic-gate 	/*
5720Sstevel@tonic-gate 	 * if superblock has changed too much, abort
5730Sstevel@tonic-gate 	 */
5740Sstevel@tonic-gate 	if ((mfs->fs_sblkno		!= dfs->fs_sblkno) ||
5750Sstevel@tonic-gate 	    (mfs->fs_cblkno		!= dfs->fs_cblkno) ||
5760Sstevel@tonic-gate 	    (mfs->fs_iblkno		!= dfs->fs_iblkno) ||
5770Sstevel@tonic-gate 	    (mfs->fs_dblkno		!= dfs->fs_dblkno) ||
5780Sstevel@tonic-gate 	    (mfs->fs_cgoffset		!= dfs->fs_cgoffset) ||
5790Sstevel@tonic-gate 	    (mfs->fs_cgmask		!= dfs->fs_cgmask) ||
5800Sstevel@tonic-gate 	    (mfs->fs_bsize		!= dfs->fs_bsize) ||
5810Sstevel@tonic-gate 	    (mfs->fs_fsize		!= dfs->fs_fsize) ||
5820Sstevel@tonic-gate 	    (mfs->fs_frag		!= dfs->fs_frag) ||
5830Sstevel@tonic-gate 	    (mfs->fs_bmask		!= dfs->fs_bmask) ||
5840Sstevel@tonic-gate 	    (mfs->fs_fmask		!= dfs->fs_fmask) ||
5850Sstevel@tonic-gate 	    (mfs->fs_bshift		!= dfs->fs_bshift) ||
5860Sstevel@tonic-gate 	    (mfs->fs_fshift		!= dfs->fs_fshift) ||
5870Sstevel@tonic-gate 	    (mfs->fs_fragshift		!= dfs->fs_fragshift) ||
5880Sstevel@tonic-gate 	    (mfs->fs_fsbtodb		!= dfs->fs_fsbtodb) ||
5890Sstevel@tonic-gate 	    (mfs->fs_sbsize		!= dfs->fs_sbsize) ||
5900Sstevel@tonic-gate 	    (mfs->fs_nindir		!= dfs->fs_nindir) ||
5910Sstevel@tonic-gate 	    (mfs->fs_nspf		!= dfs->fs_nspf) ||
5920Sstevel@tonic-gate 	    (mfs->fs_trackskew		!= dfs->fs_trackskew) ||
5930Sstevel@tonic-gate 	    (mfs->fs_cgsize		!= dfs->fs_cgsize) ||
5940Sstevel@tonic-gate 	    (mfs->fs_ntrak		!= dfs->fs_ntrak) ||
5950Sstevel@tonic-gate 	    (mfs->fs_nsect		!= dfs->fs_nsect) ||
5960Sstevel@tonic-gate 	    (mfs->fs_spc		!= dfs->fs_spc) ||
5970Sstevel@tonic-gate 	    (mfs->fs_cpg		!= dfs->fs_cpg) ||
5980Sstevel@tonic-gate 	    (mfs->fs_ipg		!= dfs->fs_ipg) ||
5990Sstevel@tonic-gate 	    (mfs->fs_fpg		!= dfs->fs_fpg) ||
6000Sstevel@tonic-gate 	    (mfs->fs_postblformat	!= dfs->fs_postblformat) ||
6010Sstevel@tonic-gate 	    (mfs->fs_magic		!= dfs->fs_magic)) {
6020Sstevel@tonic-gate 		brelse(bp);
6030Sstevel@tonic-gate 		return (EACCES);
6040Sstevel@tonic-gate 	}
6050Sstevel@tonic-gate 	if (dfs->fs_clean == FSBAD || FSOKAY != dfs->fs_state + dfs->fs_time)
6060Sstevel@tonic-gate 		if (mfs->fs_clean == FSLOG) {
6070Sstevel@tonic-gate 			brelse(bp);
6080Sstevel@tonic-gate 			return (EACCES);
6090Sstevel@tonic-gate 		}
6100Sstevel@tonic-gate 
6110Sstevel@tonic-gate 	/*
6120Sstevel@tonic-gate 	 * get new summary info
6130Sstevel@tonic-gate 	 */
6140Sstevel@tonic-gate 	if (ufs_getsummaryinfo(vfsp->vfs_dev, ufsvfsp, dfs)) {
6150Sstevel@tonic-gate 		brelse(bp);
6160Sstevel@tonic-gate 		return (EIO);
6170Sstevel@tonic-gate 	}
6180Sstevel@tonic-gate 
6190Sstevel@tonic-gate 	/*
6200Sstevel@tonic-gate 	 * release old summary info and update in-memory superblock
6210Sstevel@tonic-gate 	 */
6220Sstevel@tonic-gate 	kmem_free(mfs->fs_u.fs_csp, mfs->fs_cssize);
6230Sstevel@tonic-gate 	mfs->fs_u.fs_csp = dfs->fs_u.fs_csp;	/* Only entry 0 used */
6240Sstevel@tonic-gate 
6250Sstevel@tonic-gate 	/*
6260Sstevel@tonic-gate 	 * update fields allowed to change
6270Sstevel@tonic-gate 	 */
6280Sstevel@tonic-gate 	mfs->fs_size		= dfs->fs_size;
6290Sstevel@tonic-gate 	mfs->fs_dsize		= dfs->fs_dsize;
6300Sstevel@tonic-gate 	mfs->fs_ncg		= dfs->fs_ncg;
6310Sstevel@tonic-gate 	mfs->fs_minfree		= dfs->fs_minfree;
6320Sstevel@tonic-gate 	mfs->fs_rotdelay	= dfs->fs_rotdelay;
6330Sstevel@tonic-gate 	mfs->fs_rps		= dfs->fs_rps;
6340Sstevel@tonic-gate 	mfs->fs_maxcontig	= dfs->fs_maxcontig;
6350Sstevel@tonic-gate 	mfs->fs_maxbpg		= dfs->fs_maxbpg;
6360Sstevel@tonic-gate 	mfs->fs_csmask		= dfs->fs_csmask;
6370Sstevel@tonic-gate 	mfs->fs_csshift		= dfs->fs_csshift;
6380Sstevel@tonic-gate 	mfs->fs_optim		= dfs->fs_optim;
6390Sstevel@tonic-gate 	mfs->fs_csaddr		= dfs->fs_csaddr;
6400Sstevel@tonic-gate 	mfs->fs_cssize		= dfs->fs_cssize;
6410Sstevel@tonic-gate 	mfs->fs_ncyl		= dfs->fs_ncyl;
6420Sstevel@tonic-gate 	mfs->fs_cstotal		= dfs->fs_cstotal;
6430Sstevel@tonic-gate 	mfs->fs_reclaim		= dfs->fs_reclaim;
6440Sstevel@tonic-gate 
6450Sstevel@tonic-gate 	if (mfs->fs_reclaim & (FS_RECLAIM|FS_RECLAIMING)) {
6460Sstevel@tonic-gate 		mfs->fs_reclaim &= ~FS_RECLAIM;
6470Sstevel@tonic-gate 		mfs->fs_reclaim |=  FS_RECLAIMING;
6480Sstevel@tonic-gate 		ufs_thread_start(&ufsvfsp->vfs_reclaim,
6490Sstevel@tonic-gate 			ufs_thread_reclaim, vfsp);
6500Sstevel@tonic-gate 	}
6510Sstevel@tonic-gate 
6520Sstevel@tonic-gate 	/* XXX What to do about sparecon? */
6530Sstevel@tonic-gate 
6540Sstevel@tonic-gate 	/* XXX need to copy volume label */
6550Sstevel@tonic-gate 
6560Sstevel@tonic-gate 	/*
6570Sstevel@tonic-gate 	 * ondisk clean flag overrides inmemory clean flag iff == FSBAD
6580Sstevel@tonic-gate 	 * or if error-locked and ondisk is now clean
6590Sstevel@tonic-gate 	 */
6600Sstevel@tonic-gate 	needs_unlock = !MUTEX_HELD(&ufsvfsp->vfs_lock);
6610Sstevel@tonic-gate 	if (needs_unlock)
6620Sstevel@tonic-gate 		mutex_enter(&ufsvfsp->vfs_lock);
6630Sstevel@tonic-gate 
6640Sstevel@tonic-gate 	if (errlck == UN_ERRLCK) {
6650Sstevel@tonic-gate 		if (finished_fsclean == dfs->fs_clean)
6660Sstevel@tonic-gate 			mfs->fs_clean = finished_fsclean;
6670Sstevel@tonic-gate 		else
6680Sstevel@tonic-gate 			mfs->fs_clean = FSBAD;
6690Sstevel@tonic-gate 		mfs->fs_state = FSOKAY - dfs->fs_time;
6700Sstevel@tonic-gate 	}
6710Sstevel@tonic-gate 
6720Sstevel@tonic-gate 	if (FSOKAY != dfs->fs_state + dfs->fs_time ||
6730Sstevel@tonic-gate 	    (dfs->fs_clean == FSBAD))
6740Sstevel@tonic-gate 		mfs->fs_clean = FSBAD;
6750Sstevel@tonic-gate 
6760Sstevel@tonic-gate 	if (needs_unlock)
6770Sstevel@tonic-gate 		mutex_exit(&ufsvfsp->vfs_lock);
6780Sstevel@tonic-gate 
6790Sstevel@tonic-gate 	brelse(bp);
6800Sstevel@tonic-gate 
6810Sstevel@tonic-gate 	return (0);
6820Sstevel@tonic-gate }
6830Sstevel@tonic-gate 
6840Sstevel@tonic-gate /*
6850Sstevel@tonic-gate  * ufs_reconcile_inode
6860Sstevel@tonic-gate  *	reconcile ondisk inode with incore inode
6870Sstevel@tonic-gate  */
6880Sstevel@tonic-gate static int
6890Sstevel@tonic-gate ufs_reconcile_inode(struct inode *ip, void *arg)
6900Sstevel@tonic-gate {
6910Sstevel@tonic-gate 	int		i;
6920Sstevel@tonic-gate 	int		ndaddr;
6930Sstevel@tonic-gate 	int		niaddr;
6940Sstevel@tonic-gate 	struct dinode	*dp;		/* ondisk inode */
6950Sstevel@tonic-gate 	struct buf	*bp	= NULL;
6960Sstevel@tonic-gate 	uid_t		d_uid;
6970Sstevel@tonic-gate 	gid_t		d_gid;
6980Sstevel@tonic-gate 	int		error = 0;
6990Sstevel@tonic-gate 	struct fs	*fs;
7000Sstevel@tonic-gate 
7010Sstevel@tonic-gate 	/*
7020Sstevel@tonic-gate 	 * not an inode we care about
7030Sstevel@tonic-gate 	 */
7040Sstevel@tonic-gate 	if (ip->i_ufsvfs != (struct ufsvfs *)arg)
7050Sstevel@tonic-gate 		return (0);
7060Sstevel@tonic-gate 
7070Sstevel@tonic-gate 	fs = ip->i_fs;
7080Sstevel@tonic-gate 
7090Sstevel@tonic-gate 	/*
7100Sstevel@tonic-gate 	 * Inode reconciliation fails: we made the filesystem quiescent
7110Sstevel@tonic-gate 	 * and we did a ufs_flush() before calling ufs_reconcile_inode()
7120Sstevel@tonic-gate 	 * and thus the inode should not have been changed inbetween.
7130Sstevel@tonic-gate 	 * Any discrepancies indicate a logic error and a pretty
7140Sstevel@tonic-gate 	 * significant run-state inconsistency we should complain about.
7150Sstevel@tonic-gate 	 */
7160Sstevel@tonic-gate 	if (ip->i_flag & (IMOD|IMODACC|IACC|IUPD|ICHG|IATTCHG)) {
7170Sstevel@tonic-gate 		cmn_err(CE_WARN, "%s: Inode reconciliation failed for"
7180Sstevel@tonic-gate 		    "inode %llu", fs->fs_fsmnt, (u_longlong_t)ip->i_number);
7190Sstevel@tonic-gate 		return (EINVAL);
7200Sstevel@tonic-gate 	}
7210Sstevel@tonic-gate 
7220Sstevel@tonic-gate 	/*
7230Sstevel@tonic-gate 	 * get the dinode
7240Sstevel@tonic-gate 	 */
7250Sstevel@tonic-gate 	bp = UFS_BREAD(ip->i_ufsvfs,
7260Sstevel@tonic-gate 			ip->i_dev, (daddr_t)fsbtodb(fs, itod(fs, ip->i_number)),
7270Sstevel@tonic-gate 	    (int)fs->fs_bsize);
7280Sstevel@tonic-gate 	if (bp->b_flags & B_ERROR) {
7290Sstevel@tonic-gate 		brelse(bp);
7300Sstevel@tonic-gate 		return (EIO);
7310Sstevel@tonic-gate 	}
7320Sstevel@tonic-gate 	dp  = bp->b_un.b_dino;
7330Sstevel@tonic-gate 	dp += itoo(fs, ip->i_number);
7340Sstevel@tonic-gate 
7350Sstevel@tonic-gate 	/*
7360Sstevel@tonic-gate 	 * handle Sun's implementation of EFT
7370Sstevel@tonic-gate 	 */
7380Sstevel@tonic-gate 	d_uid = (dp->di_suid == UID_LONG) ? dp->di_uid : (uid_t)dp->di_suid;
7390Sstevel@tonic-gate 	d_gid = (dp->di_sgid == GID_LONG) ? dp->di_gid : (uid_t)dp->di_sgid;
7400Sstevel@tonic-gate 
7410Sstevel@tonic-gate 	rw_enter(&ip->i_contents, RW_WRITER);
7420Sstevel@tonic-gate 
7430Sstevel@tonic-gate 	/*
7440Sstevel@tonic-gate 	 * some fields are not allowed to change
7450Sstevel@tonic-gate 	 */
7460Sstevel@tonic-gate 	if ((ip->i_mode  != dp->di_mode) ||
7470Sstevel@tonic-gate 	    (ip->i_gen   != dp->di_gen) ||
7480Sstevel@tonic-gate 	    (ip->i_uid   != d_uid) ||
7490Sstevel@tonic-gate 	    (ip->i_gid   != d_gid)) {
7500Sstevel@tonic-gate 		error = EACCES;
7510Sstevel@tonic-gate 		goto out;
7520Sstevel@tonic-gate 	}
7530Sstevel@tonic-gate 
7540Sstevel@tonic-gate 	/*
7550Sstevel@tonic-gate 	 * and some are allowed to change
7560Sstevel@tonic-gate 	 */
7570Sstevel@tonic-gate 	ip->i_size		= dp->di_size;
7580Sstevel@tonic-gate 	ip->i_ic.ic_flags	= dp->di_ic.ic_flags;
7590Sstevel@tonic-gate 	ip->i_blocks		= dp->di_blocks;
7600Sstevel@tonic-gate 	ip->i_nlink		= dp->di_nlink;
7610Sstevel@tonic-gate 	if (ip->i_flag & IFASTSYMLNK) {
7620Sstevel@tonic-gate 		ndaddr = 1;
7630Sstevel@tonic-gate 		niaddr = 0;
7640Sstevel@tonic-gate 	} else {
7650Sstevel@tonic-gate 		ndaddr = NDADDR;
7660Sstevel@tonic-gate 		niaddr = NIADDR;
7670Sstevel@tonic-gate 	}
7680Sstevel@tonic-gate 	for (i = 0; i < ndaddr; ++i)
7690Sstevel@tonic-gate 		ip->i_db[i] = dp->di_db[i];
7700Sstevel@tonic-gate 	for (i = 0; i < niaddr; ++i)
7710Sstevel@tonic-gate 		ip->i_ib[i] = dp->di_ib[i];
7720Sstevel@tonic-gate 
7730Sstevel@tonic-gate out:
7740Sstevel@tonic-gate 	rw_exit(&ip->i_contents);
7750Sstevel@tonic-gate 	brelse(bp);
7760Sstevel@tonic-gate 	return (error);
7770Sstevel@tonic-gate }
7780Sstevel@tonic-gate 
7790Sstevel@tonic-gate /*
7800Sstevel@tonic-gate  * ufs_reconcile
7810Sstevel@tonic-gate  *	reconcile ondisk superblock/inodes with any incore
7820Sstevel@tonic-gate  */
7830Sstevel@tonic-gate static int
7840Sstevel@tonic-gate ufs_reconcile(struct vfs *vfsp, struct ufsvfs *ufsvfsp, int errlck)
7850Sstevel@tonic-gate {
7860Sstevel@tonic-gate 	int	error = 0;
7870Sstevel@tonic-gate 
7880Sstevel@tonic-gate 	/*
7890Sstevel@tonic-gate 	 * get rid of as much inmemory data as possible
7900Sstevel@tonic-gate 	 */
7910Sstevel@tonic-gate 	(void) ufs_flush(vfsp);
7920Sstevel@tonic-gate 
7930Sstevel@tonic-gate 	/*
7940Sstevel@tonic-gate 	 * reconcile the superblock and inodes
7950Sstevel@tonic-gate 	 */
7960Sstevel@tonic-gate 	if (error = ufs_reconcile_fs(vfsp, ufsvfsp, errlck))
7970Sstevel@tonic-gate 		return (error);
7980Sstevel@tonic-gate 	if (error = ufs_scan_inodes(0, ufs_reconcile_inode, ufsvfsp, ufsvfsp))
7990Sstevel@tonic-gate 		return (error);
8000Sstevel@tonic-gate 	/*
8010Sstevel@tonic-gate 	 * allocation blocks may be incorrect; get rid of them
8020Sstevel@tonic-gate 	 */
8030Sstevel@tonic-gate 	(void) ufs_flush(vfsp);
8040Sstevel@tonic-gate 
8050Sstevel@tonic-gate 	return (error);
8060Sstevel@tonic-gate }
8070Sstevel@tonic-gate 
8080Sstevel@tonic-gate /*
8090Sstevel@tonic-gate  * File system locking
8100Sstevel@tonic-gate  */
8110Sstevel@tonic-gate int
8120Sstevel@tonic-gate ufs_fiolfs(struct vnode *vp, struct lockfs *lockfsp, int from_log)
8130Sstevel@tonic-gate {
8140Sstevel@tonic-gate 	return (ufs__fiolfs(vp, lockfsp, /* from_user */ 1, from_log));
8150Sstevel@tonic-gate }
8160Sstevel@tonic-gate 
8170Sstevel@tonic-gate /* kernel-internal interface, also used by fix-on-panic */
8180Sstevel@tonic-gate int
8190Sstevel@tonic-gate ufs__fiolfs(
8200Sstevel@tonic-gate 	struct vnode *vp,
8210Sstevel@tonic-gate 	struct lockfs *lockfsp,
8220Sstevel@tonic-gate 	int from_user,
8230Sstevel@tonic-gate 	int from_log)
8240Sstevel@tonic-gate {
8250Sstevel@tonic-gate 	struct ulockfs	*ulp;
8260Sstevel@tonic-gate 	struct lockfs	lfs;
8270Sstevel@tonic-gate 	int		error;
8280Sstevel@tonic-gate 	struct vfs	*vfsp;
8290Sstevel@tonic-gate 	struct ufsvfs	*ufsvfsp;
8300Sstevel@tonic-gate 	int		 errlck		= NO_ERRLCK;
8310Sstevel@tonic-gate 	int		 poll_events	= POLLPRI;
8320Sstevel@tonic-gate 	extern struct pollhead ufs_pollhd;
8330Sstevel@tonic-gate 
8340Sstevel@tonic-gate 	/* check valid lock type */
8350Sstevel@tonic-gate 	if (!lockfsp || lockfsp->lf_lock > LOCKFS_MAXLOCK)
8360Sstevel@tonic-gate 		return (EINVAL);
8370Sstevel@tonic-gate 
8380Sstevel@tonic-gate 	if (!vp || !vp->v_vfsp || !vp->v_vfsp->vfs_data)
8390Sstevel@tonic-gate 		return (EIO);
8400Sstevel@tonic-gate 
8410Sstevel@tonic-gate 	vfsp = vp->v_vfsp;
8420Sstevel@tonic-gate 	ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
8430Sstevel@tonic-gate 	ulp = &ufsvfsp->vfs_ulockfs;
8440Sstevel@tonic-gate 
8450Sstevel@tonic-gate 	/*
8460Sstevel@tonic-gate 	 * Suspend both the reclaim thread and the delete thread.
8470Sstevel@tonic-gate 	 * This must be done outside the lockfs locking protocol.
8480Sstevel@tonic-gate 	 */
8490Sstevel@tonic-gate 	ufs_thread_suspend(&ufsvfsp->vfs_reclaim);
8500Sstevel@tonic-gate 	ufs_thread_suspend(&ufsvfsp->vfs_delete);
8510Sstevel@tonic-gate 
8520Sstevel@tonic-gate 	/*
8530Sstevel@tonic-gate 	 * Acquire vfs_reflock around ul_lock to avoid deadlock with
8540Sstevel@tonic-gate 	 * umount/remount/sync.
8550Sstevel@tonic-gate 	 */
8560Sstevel@tonic-gate 	vfs_lock_wait(vfsp);
8570Sstevel@tonic-gate 	mutex_enter(&ulp->ul_lock);
858329Saguzovsk 	atomic_add_long(&ufs_quiesce_pend, 1);
8590Sstevel@tonic-gate 
8600Sstevel@tonic-gate 	/*
8610Sstevel@tonic-gate 	 * Quit if there is another lockfs request in progress
8620Sstevel@tonic-gate 	 * that is waiting for existing ufs_vnops to complete.
8630Sstevel@tonic-gate 	 */
8640Sstevel@tonic-gate 	if (ULOCKFS_IS_BUSY(ulp)) {
8650Sstevel@tonic-gate 		error = EBUSY;
8660Sstevel@tonic-gate 		goto errexit;
8670Sstevel@tonic-gate 	}
8680Sstevel@tonic-gate 
8690Sstevel@tonic-gate 	/* cannot ulocked or downgrade a hard-lock */
8700Sstevel@tonic-gate 	if (ULOCKFS_IS_HLOCK(ulp)) {
8710Sstevel@tonic-gate 		error = EIO;
8720Sstevel@tonic-gate 		goto errexit;
8730Sstevel@tonic-gate 	}
8740Sstevel@tonic-gate 
8750Sstevel@tonic-gate 	/* an error lock may be unlocked or relocked, only */
8760Sstevel@tonic-gate 	if (ULOCKFS_IS_ELOCK(ulp)) {
8770Sstevel@tonic-gate 		if (!LOCKFS_IS_ULOCK(lockfsp) && !LOCKFS_IS_ELOCK(lockfsp)) {
8780Sstevel@tonic-gate 			error = EBUSY;
8790Sstevel@tonic-gate 			goto errexit;
8800Sstevel@tonic-gate 		}
8810Sstevel@tonic-gate 	}
8820Sstevel@tonic-gate 
8830Sstevel@tonic-gate 	/*
8840Sstevel@tonic-gate 	 * a read-only error lock may only be upgraded to an
8850Sstevel@tonic-gate 	 * error lock or hard lock
8860Sstevel@tonic-gate 	 */
8870Sstevel@tonic-gate 	if (ULOCKFS_IS_ROELOCK(ulp)) {
8880Sstevel@tonic-gate 		if (!LOCKFS_IS_HLOCK(lockfsp) && !LOCKFS_IS_ELOCK(lockfsp)) {
8890Sstevel@tonic-gate 			error = EBUSY;
8900Sstevel@tonic-gate 			goto errexit;
8910Sstevel@tonic-gate 		}
8920Sstevel@tonic-gate 	}
8930Sstevel@tonic-gate 
8940Sstevel@tonic-gate 	/*
8950Sstevel@tonic-gate 	 * until read-only error locks are fully implemented
8960Sstevel@tonic-gate 	 * just return EINVAL
8970Sstevel@tonic-gate 	 */
8980Sstevel@tonic-gate 	if (LOCKFS_IS_ROELOCK(lockfsp)) {
8990Sstevel@tonic-gate 		error = EINVAL;
9000Sstevel@tonic-gate 		goto errexit;
9010Sstevel@tonic-gate 	}
9020Sstevel@tonic-gate 
9030Sstevel@tonic-gate 	/*
9040Sstevel@tonic-gate 	 * an error lock may only be applied if the file system is
9050Sstevel@tonic-gate 	 * unlocked or already error locked.
9060Sstevel@tonic-gate 	 * (this is to prevent the case where a fs gets changed out from
9070Sstevel@tonic-gate 	 * underneath a fs that is locked for backup,
9080Sstevel@tonic-gate 	 * that is, name/delete/write-locked.)
9090Sstevel@tonic-gate 	 */
9100Sstevel@tonic-gate 	if ((!ULOCKFS_IS_ULOCK(ulp) && !ULOCKFS_IS_ELOCK(ulp) &&
9110Sstevel@tonic-gate 	    !ULOCKFS_IS_ROELOCK(ulp)) &&
9120Sstevel@tonic-gate 	    (LOCKFS_IS_ELOCK(lockfsp) || LOCKFS_IS_ROELOCK(lockfsp))) {
9130Sstevel@tonic-gate 		error = EBUSY;
9140Sstevel@tonic-gate 		goto errexit;
9150Sstevel@tonic-gate 	}
9160Sstevel@tonic-gate 
9170Sstevel@tonic-gate 	/* get and validate the input lockfs request */
9180Sstevel@tonic-gate 	if (error = ufs_getlfd(lockfsp, &ulp->ul_lockfs))
9190Sstevel@tonic-gate 		goto errexit;
9200Sstevel@tonic-gate 
9210Sstevel@tonic-gate 	/*
9220Sstevel@tonic-gate 	 * save current ulockfs struct
9230Sstevel@tonic-gate 	 */
9240Sstevel@tonic-gate 	bcopy(&ulp->ul_lockfs, &lfs, sizeof (struct lockfs));
9250Sstevel@tonic-gate 
9260Sstevel@tonic-gate 	/*
9270Sstevel@tonic-gate 	 * Freeze the file system (pend future accesses)
9280Sstevel@tonic-gate 	 */
9290Sstevel@tonic-gate 	ufs_freeze(ulp, lockfsp);
9300Sstevel@tonic-gate 
9310Sstevel@tonic-gate 	/*
9320Sstevel@tonic-gate 	 * Set locking in progress because ufs_quiesce may free the
9330Sstevel@tonic-gate 	 * ul_lock mutex.
9340Sstevel@tonic-gate 	 */
9350Sstevel@tonic-gate 	ULOCKFS_SET_BUSY(ulp);
9360Sstevel@tonic-gate 	/* update the ioctl copy */
9370Sstevel@tonic-gate 	LOCKFS_SET_BUSY(&ulp->ul_lockfs);
9380Sstevel@tonic-gate 
9390Sstevel@tonic-gate 	/*
9400Sstevel@tonic-gate 	 * Quiesce (wait for outstanding accesses to finish)
9410Sstevel@tonic-gate 	 */
9420Sstevel@tonic-gate 	if (error = ufs_quiesce(ulp))
9430Sstevel@tonic-gate 		goto errout;
9440Sstevel@tonic-gate 
9450Sstevel@tonic-gate 	/*
9460Sstevel@tonic-gate 	 * can't wlock or (ro)elock fs with accounting or local swap file
9470Sstevel@tonic-gate 	 */
9480Sstevel@tonic-gate 	if ((ULOCKFS_IS_WLOCK(ulp) || ULOCKFS_IS_ELOCK(ulp) ||
9490Sstevel@tonic-gate 	    ULOCKFS_IS_ROELOCK(ulp)) && !from_log) {
9500Sstevel@tonic-gate 		if (error = ufs_checkaccton(vp))
9510Sstevel@tonic-gate 			goto errout;
9520Sstevel@tonic-gate 		if (error = ufs_checkswapon(vp))
9530Sstevel@tonic-gate 			goto errout;
9540Sstevel@tonic-gate 	}
9550Sstevel@tonic-gate 
9560Sstevel@tonic-gate 	/*
9570Sstevel@tonic-gate 	 * save error lock status to pass down to reconcilation
9580Sstevel@tonic-gate 	 * routines and for later cleanup
9590Sstevel@tonic-gate 	 */
9600Sstevel@tonic-gate 	if (LOCKFS_IS_ELOCK(&lfs) && ULOCKFS_IS_ULOCK(ulp))
9610Sstevel@tonic-gate 		errlck = UN_ERRLCK;
9620Sstevel@tonic-gate 
9630Sstevel@tonic-gate 	if (ULOCKFS_IS_ELOCK(ulp) || ULOCKFS_IS_ROELOCK(ulp)) {
9640Sstevel@tonic-gate 		int needs_unlock;
9650Sstevel@tonic-gate 		int needs_sbwrite;
9660Sstevel@tonic-gate 
9670Sstevel@tonic-gate 		poll_events |= POLLERR;
9680Sstevel@tonic-gate 		errlck = LOCKFS_IS_ELOCK(&lfs) || LOCKFS_IS_ROELOCK(&lfs)?
9690Sstevel@tonic-gate 							RE_ERRLCK: SET_ERRLCK;
9700Sstevel@tonic-gate 
9710Sstevel@tonic-gate 		needs_unlock = !MUTEX_HELD(&ufsvfsp->vfs_lock);
9720Sstevel@tonic-gate 		if (needs_unlock)
9730Sstevel@tonic-gate 			mutex_enter(&ufsvfsp->vfs_lock);
9740Sstevel@tonic-gate 
9750Sstevel@tonic-gate 		/* disable delayed i/o */
9760Sstevel@tonic-gate 		needs_sbwrite = 0;
9770Sstevel@tonic-gate 
9780Sstevel@tonic-gate 		if (errlck == SET_ERRLCK) {
9790Sstevel@tonic-gate 			ufsvfsp->vfs_fs->fs_clean = FSBAD;
9800Sstevel@tonic-gate 			needs_sbwrite = 1;
9810Sstevel@tonic-gate 		}
9820Sstevel@tonic-gate 
9830Sstevel@tonic-gate 		needs_sbwrite |= ufsvfsp->vfs_dio;
9840Sstevel@tonic-gate 		ufsvfsp->vfs_dio = 0;
9850Sstevel@tonic-gate 
9860Sstevel@tonic-gate 		if (needs_unlock)
9870Sstevel@tonic-gate 			mutex_exit(&ufsvfsp->vfs_lock);
9880Sstevel@tonic-gate 
9890Sstevel@tonic-gate 		if (needs_sbwrite) {
9900Sstevel@tonic-gate 			ulp->ul_sbowner = curthread;
9910Sstevel@tonic-gate 			TRANS_SBWRITE(ufsvfsp, TOP_SBWRITE_STABLE);
9920Sstevel@tonic-gate 
9930Sstevel@tonic-gate 			if (needs_unlock)
9940Sstevel@tonic-gate 				mutex_enter(&ufsvfsp->vfs_lock);
9950Sstevel@tonic-gate 
9960Sstevel@tonic-gate 			ufsvfsp->vfs_fs->fs_fmod = 0;
9970Sstevel@tonic-gate 
9980Sstevel@tonic-gate 			if (needs_unlock)
9990Sstevel@tonic-gate 				mutex_exit(&ufsvfsp->vfs_lock);
10000Sstevel@tonic-gate 		}
10010Sstevel@tonic-gate 	}
10020Sstevel@tonic-gate 
10030Sstevel@tonic-gate 	/*
10040Sstevel@tonic-gate 	 * reconcile superblock and inodes if was wlocked
10050Sstevel@tonic-gate 	 */
10060Sstevel@tonic-gate 	if (LOCKFS_IS_WLOCK(&lfs) || LOCKFS_IS_ELOCK(&lfs)) {
10070Sstevel@tonic-gate 		if (error = ufs_reconcile(vfsp, ufsvfsp, errlck))
10080Sstevel@tonic-gate 			goto errout;
10090Sstevel@tonic-gate 		/*
10100Sstevel@tonic-gate 		 * in case the fs grew; reset the metadata map for logging tests
10110Sstevel@tonic-gate 		 */
10120Sstevel@tonic-gate 		TRANS_MATA_UMOUNT(ufsvfsp);
10130Sstevel@tonic-gate 		TRANS_MATA_MOUNT(ufsvfsp);
10140Sstevel@tonic-gate 		TRANS_MATA_SI(ufsvfsp, ufsvfsp->vfs_fs);
10150Sstevel@tonic-gate 	}
10160Sstevel@tonic-gate 
10170Sstevel@tonic-gate 	/*
10180Sstevel@tonic-gate 	 * At least everything *currently* dirty goes out.
10190Sstevel@tonic-gate 	 */
10200Sstevel@tonic-gate 
10210Sstevel@tonic-gate 	if ((error = ufs_flush(vfsp)) != 0 && !ULOCKFS_IS_HLOCK(ulp) &&
10220Sstevel@tonic-gate 	    !ULOCKFS_IS_ELOCK(ulp))
10230Sstevel@tonic-gate 		goto errout;
10240Sstevel@tonic-gate 
10250Sstevel@tonic-gate 	/*
10260Sstevel@tonic-gate 	 * thaw file system and wakeup pended processes
10270Sstevel@tonic-gate 	 */
10280Sstevel@tonic-gate 	if (error = ufs_thaw(vfsp, ufsvfsp, ulp))
10290Sstevel@tonic-gate 		if (!ULOCKFS_IS_HLOCK(ulp) && !ULOCKFS_IS_ELOCK(ulp))
10300Sstevel@tonic-gate 			goto errout;
10310Sstevel@tonic-gate 
10320Sstevel@tonic-gate 	/*
10330Sstevel@tonic-gate 	 * reset modified flag if not already write locked
10340Sstevel@tonic-gate 	 */
10350Sstevel@tonic-gate 	if (!LOCKFS_IS_WLOCK(&lfs))
10360Sstevel@tonic-gate 		ULOCKFS_CLR_MOD(ulp);
10370Sstevel@tonic-gate 
10380Sstevel@tonic-gate 	/*
10390Sstevel@tonic-gate 	 * idle the lock struct
10400Sstevel@tonic-gate 	 */
10410Sstevel@tonic-gate 	ULOCKFS_CLR_BUSY(ulp);
10420Sstevel@tonic-gate 	/* update the ioctl copy */
10430Sstevel@tonic-gate 	LOCKFS_CLR_BUSY(&ulp->ul_lockfs);
10440Sstevel@tonic-gate 
10450Sstevel@tonic-gate 	/*
10460Sstevel@tonic-gate 	 * free current comment
10470Sstevel@tonic-gate 	 */
10480Sstevel@tonic-gate 	if (lfs.lf_comment && lfs.lf_comlen != 0) {
10490Sstevel@tonic-gate 		kmem_free(lfs.lf_comment, lfs.lf_comlen);
10500Sstevel@tonic-gate 		lfs.lf_comment = NULL;
10510Sstevel@tonic-gate 		lfs.lf_comlen = 0;
10520Sstevel@tonic-gate 	}
10530Sstevel@tonic-gate 
10540Sstevel@tonic-gate 	/* do error lock cleanup */
10550Sstevel@tonic-gate 	if (errlck == UN_ERRLCK)
10560Sstevel@tonic-gate 		ufsfx_unlockfs(ufsvfsp);
10570Sstevel@tonic-gate 
10580Sstevel@tonic-gate 	else if (errlck == RE_ERRLCK)
10590Sstevel@tonic-gate 		ufsfx_lockfs(ufsvfsp);
10600Sstevel@tonic-gate 
10610Sstevel@tonic-gate 	/* don't allow error lock from user to invoke panic */
10620Sstevel@tonic-gate 	else if (from_user && errlck == SET_ERRLCK &&
10630Sstevel@tonic-gate 		!(ufsvfsp->vfs_fsfx.fx_flags & (UFSMNT_ONERROR_PANIC >> 4)))
10640Sstevel@tonic-gate 		(void) ufs_fault(ufsvfsp->vfs_root,
10650Sstevel@tonic-gate 		    ulp->ul_lockfs.lf_comment && ulp->ul_lockfs.lf_comlen > 0 ?
10660Sstevel@tonic-gate 		    ulp->ul_lockfs.lf_comment: "user-applied error lock");
10670Sstevel@tonic-gate 
1068329Saguzovsk 	atomic_add_long(&ufs_quiesce_pend, -1);
10690Sstevel@tonic-gate 	mutex_exit(&ulp->ul_lock);
10700Sstevel@tonic-gate 	vfs_unlock(vfsp);
10710Sstevel@tonic-gate 
10720Sstevel@tonic-gate 	if (ULOCKFS_IS_HLOCK(&ufsvfsp->vfs_ulockfs))
10730Sstevel@tonic-gate 		poll_events |= POLLERR;
10740Sstevel@tonic-gate 
10750Sstevel@tonic-gate 	pollwakeup(&ufs_pollhd, poll_events);
10760Sstevel@tonic-gate 
10770Sstevel@tonic-gate 	/*
10780Sstevel@tonic-gate 	 * Allow both the delete thread and the reclaim thread to
10790Sstevel@tonic-gate 	 * continue.
10800Sstevel@tonic-gate 	 */
10810Sstevel@tonic-gate 	ufs_thread_continue(&ufsvfsp->vfs_delete);
10820Sstevel@tonic-gate 	ufs_thread_continue(&ufsvfsp->vfs_reclaim);
10830Sstevel@tonic-gate 
10840Sstevel@tonic-gate 	return (0);
10850Sstevel@tonic-gate 
10860Sstevel@tonic-gate errout:
10870Sstevel@tonic-gate 	/*
10880Sstevel@tonic-gate 	 * Lock failed. Reset the old lock in ufsvfs if not hard locked.
10890Sstevel@tonic-gate 	 */
10900Sstevel@tonic-gate 	if (!LOCKFS_IS_HLOCK(&ulp->ul_lockfs)) {
10910Sstevel@tonic-gate 		bcopy(&lfs, &ulp->ul_lockfs, sizeof (struct lockfs));
10920Sstevel@tonic-gate 		ulp->ul_fs_lock = (1 << lfs.lf_lock);
10930Sstevel@tonic-gate 	}
10940Sstevel@tonic-gate 	(void) ufs_thaw(vfsp, ufsvfsp, ulp);
10950Sstevel@tonic-gate 	ULOCKFS_CLR_BUSY(ulp);
10960Sstevel@tonic-gate 	LOCKFS_CLR_BUSY(&ulp->ul_lockfs);
10970Sstevel@tonic-gate 
10980Sstevel@tonic-gate errexit:
1099329Saguzovsk 	atomic_add_long(&ufs_quiesce_pend, -1);
11000Sstevel@tonic-gate 	mutex_exit(&ulp->ul_lock);
11010Sstevel@tonic-gate 	vfs_unlock(vfsp);
11020Sstevel@tonic-gate 
11030Sstevel@tonic-gate 	/*
11040Sstevel@tonic-gate 	 * Allow both the delete thread and the reclaim thread to
11050Sstevel@tonic-gate 	 * continue.
11060Sstevel@tonic-gate 	 */
11070Sstevel@tonic-gate 	ufs_thread_continue(&ufsvfsp->vfs_delete);
11080Sstevel@tonic-gate 	ufs_thread_continue(&ufsvfsp->vfs_reclaim);
11090Sstevel@tonic-gate 
11100Sstevel@tonic-gate 	return (error);
11110Sstevel@tonic-gate }
11120Sstevel@tonic-gate 
11130Sstevel@tonic-gate /*
11140Sstevel@tonic-gate  * fiolfss
11150Sstevel@tonic-gate  * 	return the current file system locking state info
11160Sstevel@tonic-gate  */
11170Sstevel@tonic-gate int
11180Sstevel@tonic-gate ufs_fiolfss(struct vnode *vp, struct lockfs *lockfsp)
11190Sstevel@tonic-gate {
11200Sstevel@tonic-gate 	struct ulockfs	*ulp;
11210Sstevel@tonic-gate 
11220Sstevel@tonic-gate 	if (!vp || !vp->v_vfsp || !VTOI(vp))
11230Sstevel@tonic-gate 		return (EINVAL);
11240Sstevel@tonic-gate 
11250Sstevel@tonic-gate 	/* file system has been forcibly unmounted */
11260Sstevel@tonic-gate 	if (VTOI(vp)->i_ufsvfs == NULL)
11270Sstevel@tonic-gate 		return (EIO);
11280Sstevel@tonic-gate 
11290Sstevel@tonic-gate 	ulp = VTOUL(vp);
11300Sstevel@tonic-gate 
11310Sstevel@tonic-gate 	if (ULOCKFS_IS_HLOCK(ulp)) {
11320Sstevel@tonic-gate 		*lockfsp = ulp->ul_lockfs;	/* structure assignment */
11330Sstevel@tonic-gate 		return (0);
11340Sstevel@tonic-gate 	}
11350Sstevel@tonic-gate 
11360Sstevel@tonic-gate 	mutex_enter(&ulp->ul_lock);
11370Sstevel@tonic-gate 
11380Sstevel@tonic-gate 	*lockfsp = ulp->ul_lockfs;	/* structure assignment */
11390Sstevel@tonic-gate 
11400Sstevel@tonic-gate 	if (ULOCKFS_IS_MOD(ulp))
11410Sstevel@tonic-gate 		lockfsp->lf_flags |= LOCKFS_MOD;
11420Sstevel@tonic-gate 
11430Sstevel@tonic-gate 	mutex_exit(&ulp->ul_lock);
11440Sstevel@tonic-gate 
11450Sstevel@tonic-gate 	return (0);
11460Sstevel@tonic-gate }
11470Sstevel@tonic-gate 
11480Sstevel@tonic-gate /*
11490Sstevel@tonic-gate  * ufs_check_lockfs
11500Sstevel@tonic-gate  *	check whether a ufs_vnops conflicts with the file system lock
11510Sstevel@tonic-gate  */
11520Sstevel@tonic-gate int
11530Sstevel@tonic-gate ufs_check_lockfs(struct ufsvfs *ufsvfsp, struct ulockfs *ulp, ulong_t mask)
11540Sstevel@tonic-gate {
11550Sstevel@tonic-gate 	k_sigset_t	smask;
11560Sstevel@tonic-gate 	int		sig, slock;
11570Sstevel@tonic-gate 
11580Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ulp->ul_lock));
11590Sstevel@tonic-gate 
11600Sstevel@tonic-gate 	while (ulp->ul_fs_lock & mask) {
11610Sstevel@tonic-gate 		slock = (int)ULOCKFS_IS_SLOCK(ulp);
11620Sstevel@tonic-gate 		if ((curthread->t_flag & T_DONTPEND) && !slock) {
11630Sstevel@tonic-gate 			curthread->t_flag |= T_WOULDBLOCK;
11640Sstevel@tonic-gate 			return (EAGAIN);
11650Sstevel@tonic-gate 		}
11660Sstevel@tonic-gate 		curthread->t_flag &= ~T_WOULDBLOCK;
11670Sstevel@tonic-gate 
11680Sstevel@tonic-gate 		if (ULOCKFS_IS_HLOCK(ulp))
11690Sstevel@tonic-gate 			return (EIO);
11700Sstevel@tonic-gate 
11710Sstevel@tonic-gate 		/*
11720Sstevel@tonic-gate 		 * wait for lock status to change
11730Sstevel@tonic-gate 		 */
11740Sstevel@tonic-gate 		if (slock || ufsvfsp->vfs_nointr) {
11750Sstevel@tonic-gate 			cv_wait(&ulp->ul_cv, &ulp->ul_lock);
11760Sstevel@tonic-gate 		} else {
11770Sstevel@tonic-gate 			sigintr(&smask, 1);
11780Sstevel@tonic-gate 			sig = cv_wait_sig(&ulp->ul_cv, &ulp->ul_lock);
11790Sstevel@tonic-gate 			sigunintr(&smask);
11800Sstevel@tonic-gate 			if ((!sig && (ulp->ul_fs_lock & mask)) ||
11810Sstevel@tonic-gate 				ufsvfsp->vfs_dontblock)
11820Sstevel@tonic-gate 				return (EINTR);
11830Sstevel@tonic-gate 		}
11840Sstevel@tonic-gate 	}
1185329Saguzovsk 	atomic_add_long(&ulp->ul_vnops_cnt, 1);
11860Sstevel@tonic-gate 	return (0);
11870Sstevel@tonic-gate }
11880Sstevel@tonic-gate 
11890Sstevel@tonic-gate /*
11900Sstevel@tonic-gate  * Check whether we came across the handcrafted lockfs protocol path. We can't
11910Sstevel@tonic-gate  * simply check for T_DONTBLOCK here as one would assume since this can also
11920Sstevel@tonic-gate  * falsely catch recursive VOP's going to a different filesystem, instead we
11930Sstevel@tonic-gate  * check if we already hold the ulockfs->ul_lock mutex.
11940Sstevel@tonic-gate  */
11950Sstevel@tonic-gate static int
11960Sstevel@tonic-gate ufs_lockfs_is_under_rawlockfs(struct ulockfs *ulp)
11970Sstevel@tonic-gate {
11980Sstevel@tonic-gate 	return ((mutex_owner(&ulp->ul_lock) != curthread) ? 0 : 1);
11990Sstevel@tonic-gate }
12000Sstevel@tonic-gate 
12010Sstevel@tonic-gate /*
12020Sstevel@tonic-gate  * ufs_lockfs_begin - start the lockfs locking protocol
12030Sstevel@tonic-gate  */
12040Sstevel@tonic-gate int
12050Sstevel@tonic-gate ufs_lockfs_begin(struct ufsvfs *ufsvfsp, struct ulockfs **ulpp, ulong_t mask)
12060Sstevel@tonic-gate {
12070Sstevel@tonic-gate 	int 		error;
12080Sstevel@tonic-gate 	int		rec_vop;
12090Sstevel@tonic-gate 	struct ulockfs *ulp;
12100Sstevel@tonic-gate 	ulockfs_info_t	*ulockfs_info;
12110Sstevel@tonic-gate 	ulockfs_info_t	*ulockfs_info_free;
12120Sstevel@tonic-gate 	ulockfs_info_t	*ulockfs_info_temp;
12130Sstevel@tonic-gate 
12140Sstevel@tonic-gate 	/*
12150Sstevel@tonic-gate 	 * file system has been forcibly unmounted
12160Sstevel@tonic-gate 	 */
12170Sstevel@tonic-gate 	if (ufsvfsp == NULL)
12180Sstevel@tonic-gate 		return (EIO);
12190Sstevel@tonic-gate 
12200Sstevel@tonic-gate 	*ulpp = ulp = &ufsvfsp->vfs_ulockfs;
12210Sstevel@tonic-gate 
12220Sstevel@tonic-gate 	/*
12230Sstevel@tonic-gate 	 * Do lockfs protocol
12240Sstevel@tonic-gate 	 */
12250Sstevel@tonic-gate 	ulockfs_info = (ulockfs_info_t *)tsd_get(ufs_lockfs_key);
12260Sstevel@tonic-gate 	IS_REC_VOP(rec_vop, ulockfs_info, ulp, ulockfs_info_free);
12270Sstevel@tonic-gate 
12280Sstevel@tonic-gate 	/*
12290Sstevel@tonic-gate 	 * Detect recursive VOP call or handcrafted internal lockfs protocol
12300Sstevel@tonic-gate 	 * path and bail out in that case.
12310Sstevel@tonic-gate 	 */
12320Sstevel@tonic-gate 	if (rec_vop || ufs_lockfs_is_under_rawlockfs(ulp)) {
12330Sstevel@tonic-gate 		*ulpp = NULL;
12340Sstevel@tonic-gate 		return (0);
12350Sstevel@tonic-gate 	} else {
12360Sstevel@tonic-gate 		if (ulockfs_info_free == NULL) {
12370Sstevel@tonic-gate 			if ((ulockfs_info_temp = (ulockfs_info_t *)
12380Sstevel@tonic-gate 			    kmem_zalloc(sizeof (ulockfs_info_t),
12390Sstevel@tonic-gate 			    KM_NOSLEEP)) == NULL) {
12400Sstevel@tonic-gate 				*ulpp = NULL;
12410Sstevel@tonic-gate 				return (ENOMEM);
12420Sstevel@tonic-gate 			}
12430Sstevel@tonic-gate 		}
12440Sstevel@tonic-gate 	}
12450Sstevel@tonic-gate 
12460Sstevel@tonic-gate 	/*
12470Sstevel@tonic-gate 	 * First time VOP call
12480Sstevel@tonic-gate 	 */
12490Sstevel@tonic-gate 	mutex_enter(&ulp->ul_lock);
12500Sstevel@tonic-gate 	if (ULOCKFS_IS_JUSTULOCK(ulp))
1251329Saguzovsk 		atomic_add_long(&ulp->ul_vnops_cnt, 1);
12520Sstevel@tonic-gate 	else {
12530Sstevel@tonic-gate 		if (error = ufs_check_lockfs(ufsvfsp, ulp, mask)) {
12540Sstevel@tonic-gate 			mutex_exit(&ulp->ul_lock);
12550Sstevel@tonic-gate 			if (ulockfs_info_free == NULL)
12560Sstevel@tonic-gate 				kmem_free(ulockfs_info_temp,
12570Sstevel@tonic-gate 				    sizeof (ulockfs_info_t));
12580Sstevel@tonic-gate 			return (error);
12590Sstevel@tonic-gate 		}
12600Sstevel@tonic-gate 	}
12610Sstevel@tonic-gate 	mutex_exit(&ulp->ul_lock);
12620Sstevel@tonic-gate 
12630Sstevel@tonic-gate 	if (ulockfs_info_free != NULL) {
12640Sstevel@tonic-gate 		ulockfs_info_free->ulp = ulp;
12650Sstevel@tonic-gate 	} else {
12660Sstevel@tonic-gate 		ulockfs_info_temp->ulp = ulp;
12670Sstevel@tonic-gate 		ulockfs_info_temp->next = ulockfs_info;
12680Sstevel@tonic-gate 		ASSERT(ufs_lockfs_key != 0);
12690Sstevel@tonic-gate 		(void) tsd_set(ufs_lockfs_key, (void *)ulockfs_info_temp);
12700Sstevel@tonic-gate 	}
12710Sstevel@tonic-gate 
12720Sstevel@tonic-gate 	curthread->t_flag |= T_DONTBLOCK;
12730Sstevel@tonic-gate 	return (0);
12740Sstevel@tonic-gate }
12750Sstevel@tonic-gate 
12760Sstevel@tonic-gate /*
12770Sstevel@tonic-gate  * Check whether we are returning from the top level VOP.
12780Sstevel@tonic-gate  */
12790Sstevel@tonic-gate static int
12800Sstevel@tonic-gate ufs_lockfs_top_vop_return(ulockfs_info_t *head)
12810Sstevel@tonic-gate {
12820Sstevel@tonic-gate 	ulockfs_info_t *info;
12830Sstevel@tonic-gate 	int result = 1;
12840Sstevel@tonic-gate 
12850Sstevel@tonic-gate 	for (info = head; info != NULL; info = info->next) {
12860Sstevel@tonic-gate 		if (info->ulp != NULL) {
12870Sstevel@tonic-gate 			result = 0;
12880Sstevel@tonic-gate 			break;
12890Sstevel@tonic-gate 		}
12900Sstevel@tonic-gate 	}
12910Sstevel@tonic-gate 
12920Sstevel@tonic-gate 	return (result);
12930Sstevel@tonic-gate }
12940Sstevel@tonic-gate 
12950Sstevel@tonic-gate /*
12960Sstevel@tonic-gate  * ufs_lockfs_end - terminate the lockfs locking protocol
12970Sstevel@tonic-gate  */
12980Sstevel@tonic-gate void
12990Sstevel@tonic-gate ufs_lockfs_end(struct ulockfs *ulp)
13000Sstevel@tonic-gate {
13010Sstevel@tonic-gate 	ulockfs_info_t *info;
13020Sstevel@tonic-gate 	ulockfs_info_t *head;
13030Sstevel@tonic-gate 
13040Sstevel@tonic-gate 	/*
13050Sstevel@tonic-gate 	 * end-of-VOP protocol
13060Sstevel@tonic-gate 	 */
13070Sstevel@tonic-gate 	if (ulp == NULL)
13080Sstevel@tonic-gate 		return;
13090Sstevel@tonic-gate 
13100Sstevel@tonic-gate 	head = (ulockfs_info_t *)tsd_get(ufs_lockfs_key);
13110Sstevel@tonic-gate 	SEARCH_ULOCKFSP(head, ulp, info);
13120Sstevel@tonic-gate 
13130Sstevel@tonic-gate 	/*
13140Sstevel@tonic-gate 	 * If we're called from a first level VOP, we have to have a
13150Sstevel@tonic-gate 	 * valid ulockfs record in the TSD.
13160Sstevel@tonic-gate 	 */
13170Sstevel@tonic-gate 	ASSERT(info != NULL);
13180Sstevel@tonic-gate 
13190Sstevel@tonic-gate 	/*
13200Sstevel@tonic-gate 	 * Invalidate the ulockfs record.
13210Sstevel@tonic-gate 	 */
13220Sstevel@tonic-gate 	info->ulp = NULL;
13230Sstevel@tonic-gate 
13240Sstevel@tonic-gate 	if (ufs_lockfs_top_vop_return(head))
13250Sstevel@tonic-gate 		curthread->t_flag &= ~T_DONTBLOCK;
13260Sstevel@tonic-gate 
13270Sstevel@tonic-gate 	mutex_enter(&ulp->ul_lock);
13280Sstevel@tonic-gate 
1329329Saguzovsk 	if (!atomic_add_long_nv(&ulp->ul_vnops_cnt, -1))
13300Sstevel@tonic-gate 		cv_broadcast(&ulp->ul_cv);
13310Sstevel@tonic-gate 
13320Sstevel@tonic-gate 	mutex_exit(&ulp->ul_lock);
13330Sstevel@tonic-gate }
13340Sstevel@tonic-gate 
13350Sstevel@tonic-gate /*
13360Sstevel@tonic-gate  * specialized version of ufs_lockfs_begin() called by ufs_getpage().
13370Sstevel@tonic-gate  */
13380Sstevel@tonic-gate int
13390Sstevel@tonic-gate ufs_lockfs_begin_getpage(
13400Sstevel@tonic-gate 	struct ufsvfs	*ufsvfsp,
13410Sstevel@tonic-gate 	struct ulockfs	**ulpp,
13420Sstevel@tonic-gate 	struct seg	*seg,
13430Sstevel@tonic-gate 	int		read_access,
13440Sstevel@tonic-gate 	uint_t		*protp)
13450Sstevel@tonic-gate {
13460Sstevel@tonic-gate 	ulong_t			mask;
13470Sstevel@tonic-gate 	int 			error;
13480Sstevel@tonic-gate 	int			rec_vop;
13490Sstevel@tonic-gate 	struct ulockfs		*ulp;
13500Sstevel@tonic-gate 	ulockfs_info_t		*ulockfs_info;
13510Sstevel@tonic-gate 	ulockfs_info_t		*ulockfs_info_free;
13520Sstevel@tonic-gate 	ulockfs_info_t		*ulockfs_info_temp;
13530Sstevel@tonic-gate 
13540Sstevel@tonic-gate 	/*
13550Sstevel@tonic-gate 	 * file system has been forcibly unmounted
13560Sstevel@tonic-gate 	 */
13570Sstevel@tonic-gate 	if (ufsvfsp == NULL)
13580Sstevel@tonic-gate 		return (EIO);
13590Sstevel@tonic-gate 
13600Sstevel@tonic-gate 	*ulpp = ulp = &ufsvfsp->vfs_ulockfs;
13610Sstevel@tonic-gate 
13620Sstevel@tonic-gate 	/*
13630Sstevel@tonic-gate 	 * Do lockfs protocol
13640Sstevel@tonic-gate 	 */
13650Sstevel@tonic-gate 	ulockfs_info = (ulockfs_info_t *)tsd_get(ufs_lockfs_key);
13660Sstevel@tonic-gate 	IS_REC_VOP(rec_vop, ulockfs_info, ulp, ulockfs_info_free);
13670Sstevel@tonic-gate 
13680Sstevel@tonic-gate 	/*
13690Sstevel@tonic-gate 	 * Detect recursive VOP call or handcrafted internal lockfs protocol
13700Sstevel@tonic-gate 	 * path and bail out in that case.
13710Sstevel@tonic-gate 	 */
13720Sstevel@tonic-gate 	if (rec_vop || ufs_lockfs_is_under_rawlockfs(ulp)) {
13730Sstevel@tonic-gate 		*ulpp = NULL;
13740Sstevel@tonic-gate 		return (0);
13750Sstevel@tonic-gate 	} else {
13760Sstevel@tonic-gate 		if (ulockfs_info_free == NULL) {
13770Sstevel@tonic-gate 			if ((ulockfs_info_temp = (ulockfs_info_t *)
13780Sstevel@tonic-gate 			    kmem_zalloc(sizeof (ulockfs_info_t),
13790Sstevel@tonic-gate 			    KM_NOSLEEP)) == NULL) {
13800Sstevel@tonic-gate 				*ulpp = NULL;
13810Sstevel@tonic-gate 				return (ENOMEM);
13820Sstevel@tonic-gate 			}
13830Sstevel@tonic-gate 		}
13840Sstevel@tonic-gate 	}
13850Sstevel@tonic-gate 
13860Sstevel@tonic-gate 	/*
13870Sstevel@tonic-gate 	 * First time VOP call
13880Sstevel@tonic-gate 	 */
13890Sstevel@tonic-gate 	mutex_enter(&ulp->ul_lock);
13900Sstevel@tonic-gate 	if (ULOCKFS_IS_JUSTULOCK(ulp))
13910Sstevel@tonic-gate 		/*
13920Sstevel@tonic-gate 		 * fs is not locked, simply inc the active-ops counter
13930Sstevel@tonic-gate 		 */
1394329Saguzovsk 		atomic_add_long(&ulp->ul_vnops_cnt, 1);
13950Sstevel@tonic-gate 	else {
13960Sstevel@tonic-gate 		if (seg->s_ops == &segvn_ops &&
13970Sstevel@tonic-gate 		    ((struct segvn_data *)seg->s_data)->type != MAP_SHARED) {
13980Sstevel@tonic-gate 			mask = (ulong_t)ULOCKFS_GETREAD_MASK;
13990Sstevel@tonic-gate 		} else if (protp && read_access) {
14000Sstevel@tonic-gate 			/*
14010Sstevel@tonic-gate 			 * Restrict the mapping to readonly.
14020Sstevel@tonic-gate 			 * Writes to this mapping will cause
14030Sstevel@tonic-gate 			 * another fault which will then
14040Sstevel@tonic-gate 			 * be suspended if fs is write locked
14050Sstevel@tonic-gate 			 */
14060Sstevel@tonic-gate 			*protp &= ~PROT_WRITE;
14070Sstevel@tonic-gate 			mask = (ulong_t)ULOCKFS_GETREAD_MASK;
14080Sstevel@tonic-gate 		} else
14090Sstevel@tonic-gate 			mask = (ulong_t)ULOCKFS_GETWRITE_MASK;
14100Sstevel@tonic-gate 
14110Sstevel@tonic-gate 		/*
14120Sstevel@tonic-gate 		 * will sleep if this fs is locked against this VOP
14130Sstevel@tonic-gate 		 */
14140Sstevel@tonic-gate 		if (error = ufs_check_lockfs(ufsvfsp, ulp, mask)) {
14150Sstevel@tonic-gate 			mutex_exit(&ulp->ul_lock);
14160Sstevel@tonic-gate 			if (ulockfs_info_free == NULL)
14170Sstevel@tonic-gate 				kmem_free(ulockfs_info_temp,
14180Sstevel@tonic-gate 				    sizeof (ulockfs_info_t));
14190Sstevel@tonic-gate 			return (error);
14200Sstevel@tonic-gate 		}
14210Sstevel@tonic-gate 	}
14220Sstevel@tonic-gate 	mutex_exit(&ulp->ul_lock);
14230Sstevel@tonic-gate 
14240Sstevel@tonic-gate 	if (ulockfs_info_free != NULL) {
14250Sstevel@tonic-gate 		ulockfs_info_free->ulp = ulp;
14260Sstevel@tonic-gate 	} else {
14270Sstevel@tonic-gate 		ulockfs_info_temp->ulp = ulp;
14280Sstevel@tonic-gate 		ulockfs_info_temp->next = ulockfs_info;
14290Sstevel@tonic-gate 		ASSERT(ufs_lockfs_key != 0);
14300Sstevel@tonic-gate 		(void) tsd_set(ufs_lockfs_key, (void *)ulockfs_info_temp);
14310Sstevel@tonic-gate 	}
14320Sstevel@tonic-gate 
14330Sstevel@tonic-gate 	curthread->t_flag |= T_DONTBLOCK;
14340Sstevel@tonic-gate 	return (0);
14350Sstevel@tonic-gate }
14360Sstevel@tonic-gate 
14370Sstevel@tonic-gate void
14380Sstevel@tonic-gate ufs_lockfs_tsd_destructor(void *head)
14390Sstevel@tonic-gate {
14400Sstevel@tonic-gate 	ulockfs_info_t *curr = (ulockfs_info_t *)head;
14410Sstevel@tonic-gate 	ulockfs_info_t *temp;
14420Sstevel@tonic-gate 
14430Sstevel@tonic-gate 	for (; curr != NULL; ) {
14440Sstevel@tonic-gate 		/*
14450Sstevel@tonic-gate 		 * The TSD destructor is being called when the thread exits
14460Sstevel@tonic-gate 		 * (via thread_exit()). At that time it must have cleaned up
14470Sstevel@tonic-gate 		 * all VOPs via ufs_lockfs_end() and there must not be a
14480Sstevel@tonic-gate 		 * valid ulockfs record exist while a thread is exiting.
14490Sstevel@tonic-gate 		 */
14500Sstevel@tonic-gate 		temp = curr;
14510Sstevel@tonic-gate 		curr = curr->next;
14520Sstevel@tonic-gate 		ASSERT(temp->ulp == NULL);
14530Sstevel@tonic-gate 		kmem_free(temp, sizeof (ulockfs_info_t));
14540Sstevel@tonic-gate 	}
14550Sstevel@tonic-gate }
1456