xref: /onnv-gate/usr/src/uts/common/fs/ufs/ufs_lockfs.c (revision 12607:2bc0f474d551)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
51365Sowenr  * Common Development and Distribution License (the "License").
61365Sowenr  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
22*12607Sjohn.levon@sun.com  * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
230Sstevel@tonic-gate  */
240Sstevel@tonic-gate 
250Sstevel@tonic-gate #include <sys/types.h>
260Sstevel@tonic-gate #include <sys/t_lock.h>
270Sstevel@tonic-gate #include <sys/param.h>
280Sstevel@tonic-gate #include <sys/time.h>
290Sstevel@tonic-gate #include <sys/systm.h>
300Sstevel@tonic-gate #include <sys/sysmacros.h>
310Sstevel@tonic-gate #include <sys/resource.h>
320Sstevel@tonic-gate #include <sys/signal.h>
330Sstevel@tonic-gate #include <sys/cred.h>
340Sstevel@tonic-gate #include <sys/user.h>
350Sstevel@tonic-gate #include <sys/buf.h>
360Sstevel@tonic-gate #include <sys/vfs.h>
370Sstevel@tonic-gate #include <sys/vnode.h>
380Sstevel@tonic-gate #include <sys/proc.h>
390Sstevel@tonic-gate #include <sys/disp.h>
400Sstevel@tonic-gate #include <sys/file.h>
410Sstevel@tonic-gate #include <sys/fcntl.h>
420Sstevel@tonic-gate #include <sys/flock.h>
43329Saguzovsk #include <sys/atomic.h>
440Sstevel@tonic-gate #include <sys/kmem.h>
450Sstevel@tonic-gate #include <sys/uio.h>
460Sstevel@tonic-gate #include <sys/conf.h>
470Sstevel@tonic-gate #include <sys/mman.h>
480Sstevel@tonic-gate #include <sys/pathname.h>
490Sstevel@tonic-gate #include <sys/debug.h>
500Sstevel@tonic-gate #include <sys/vmsystm.h>
510Sstevel@tonic-gate #include <sys/cmn_err.h>
520Sstevel@tonic-gate #include <sys/acct.h>
530Sstevel@tonic-gate #include <sys/dnlc.h>
540Sstevel@tonic-gate #include <sys/swap.h>
550Sstevel@tonic-gate 
560Sstevel@tonic-gate #include <sys/fs/ufs_fs.h>
570Sstevel@tonic-gate #include <sys/fs/ufs_inode.h>
580Sstevel@tonic-gate #include <sys/fs/ufs_fsdir.h>
590Sstevel@tonic-gate #include <sys/fs/ufs_trans.h>
600Sstevel@tonic-gate #include <sys/fs/ufs_panic.h>
610Sstevel@tonic-gate #include <sys/fs/ufs_mount.h>
620Sstevel@tonic-gate #include <sys/fs/ufs_bio.h>
630Sstevel@tonic-gate #include <sys/fs/ufs_log.h>
640Sstevel@tonic-gate #include <sys/fs/ufs_quota.h>
650Sstevel@tonic-gate #include <sys/dirent.h>		/* must be AFTER <sys/fs/fsdir.h>! */
660Sstevel@tonic-gate #include <sys/errno.h>
670Sstevel@tonic-gate #include <sys/sysinfo.h>
680Sstevel@tonic-gate 
690Sstevel@tonic-gate #include <vm/hat.h>
700Sstevel@tonic-gate #include <vm/pvn.h>
710Sstevel@tonic-gate #include <vm/as.h>
720Sstevel@tonic-gate #include <vm/seg.h>
730Sstevel@tonic-gate #include <vm/seg_map.h>
740Sstevel@tonic-gate #include <vm/seg_vn.h>
750Sstevel@tonic-gate #include <vm/rm.h>
760Sstevel@tonic-gate #include <vm/anon.h>
770Sstevel@tonic-gate #include <sys/swap.h>
780Sstevel@tonic-gate #include <sys/dnlc.h>
790Sstevel@tonic-gate 
800Sstevel@tonic-gate extern struct vnode *common_specvp(struct vnode *vp);
810Sstevel@tonic-gate 
820Sstevel@tonic-gate /* error lock status */
830Sstevel@tonic-gate #define	UN_ERRLCK	(-1)
840Sstevel@tonic-gate #define	SET_ERRLCK	1
850Sstevel@tonic-gate #define	RE_ERRLCK	2
860Sstevel@tonic-gate #define	NO_ERRLCK	0
870Sstevel@tonic-gate 
880Sstevel@tonic-gate /*
890Sstevel@tonic-gate  * Index to be used in TSD for storing lockfs data
900Sstevel@tonic-gate  */
910Sstevel@tonic-gate uint_t ufs_lockfs_key;
920Sstevel@tonic-gate 
930Sstevel@tonic-gate typedef struct _ulockfs_info {
940Sstevel@tonic-gate 	struct _ulockfs_info *next;
950Sstevel@tonic-gate 	struct ulockfs *ulp;
96923Ssdebnath 	uint_t flags;
970Sstevel@tonic-gate } ulockfs_info_t;
980Sstevel@tonic-gate 
99923Ssdebnath #define	ULOCK_INFO_FALLOCATE	0x00000001	/* fallocate thread */
100923Ssdebnath 
1010Sstevel@tonic-gate /*
1020Sstevel@tonic-gate  * Check in TSD that whether we are already doing any VOP on this filesystem
1030Sstevel@tonic-gate  */
1040Sstevel@tonic-gate #define	IS_REC_VOP(found, head, ulp, free)		\
1050Sstevel@tonic-gate {							\
1060Sstevel@tonic-gate 	ulockfs_info_t *_curr;				\
1070Sstevel@tonic-gate 							\
1080Sstevel@tonic-gate 	for (found = 0, free = NULL, _curr = head;	\
1090Sstevel@tonic-gate 	    _curr != NULL; _curr = _curr->next) {	\
1100Sstevel@tonic-gate 		if ((free == NULL) &&			\
1110Sstevel@tonic-gate 		    (_curr->ulp == NULL))		\
1120Sstevel@tonic-gate 			free = _curr;			\
1130Sstevel@tonic-gate 		if (_curr->ulp == ulp) {		\
1140Sstevel@tonic-gate 			found = 1;			\
1150Sstevel@tonic-gate 			break;				\
1160Sstevel@tonic-gate 		}					\
1170Sstevel@tonic-gate 	}						\
1180Sstevel@tonic-gate }
1190Sstevel@tonic-gate 
1200Sstevel@tonic-gate /*
1210Sstevel@tonic-gate  * Get the lockfs data from TSD so that lockfs handles the recursive VOP
1220Sstevel@tonic-gate  * properly
1230Sstevel@tonic-gate  */
1240Sstevel@tonic-gate #define	SEARCH_ULOCKFSP(head, ulp, info)		\
1250Sstevel@tonic-gate {							\
1260Sstevel@tonic-gate 	ulockfs_info_t *_curr;				\
1270Sstevel@tonic-gate 							\
1280Sstevel@tonic-gate 	for (_curr = head; _curr != NULL;		\
1290Sstevel@tonic-gate 	    _curr = _curr->next) {			\
1300Sstevel@tonic-gate 		if (_curr->ulp == ulp) {		\
1310Sstevel@tonic-gate 			break;				\
1320Sstevel@tonic-gate 		}					\
1330Sstevel@tonic-gate 	}						\
1340Sstevel@tonic-gate 							\
1350Sstevel@tonic-gate 	info = _curr;					\
1360Sstevel@tonic-gate }
1370Sstevel@tonic-gate 
1380Sstevel@tonic-gate /*
1390Sstevel@tonic-gate  * Validate lockfs request
1400Sstevel@tonic-gate  */
1410Sstevel@tonic-gate static int
ufs_getlfd(struct lockfs * lockfsp,struct lockfs * ul_lockfsp)1420Sstevel@tonic-gate ufs_getlfd(
1430Sstevel@tonic-gate 	struct lockfs *lockfsp,		/* new lock request */
1440Sstevel@tonic-gate 	struct lockfs *ul_lockfsp)	/* old lock state */
1450Sstevel@tonic-gate {
1460Sstevel@tonic-gate 	int	error = 0;
1470Sstevel@tonic-gate 
1480Sstevel@tonic-gate 	/*
1490Sstevel@tonic-gate 	 * no input flags defined
1500Sstevel@tonic-gate 	 */
1510Sstevel@tonic-gate 	if (lockfsp->lf_flags != 0) {
1520Sstevel@tonic-gate 		error = EINVAL;
1530Sstevel@tonic-gate 		goto errout;
1540Sstevel@tonic-gate 	}
1550Sstevel@tonic-gate 
1560Sstevel@tonic-gate 	/*
1570Sstevel@tonic-gate 	 * check key
1580Sstevel@tonic-gate 	 */
1590Sstevel@tonic-gate 	if (!LOCKFS_IS_ULOCK(ul_lockfsp))
1600Sstevel@tonic-gate 		if (lockfsp->lf_key != ul_lockfsp->lf_key) {
1610Sstevel@tonic-gate 			error = EINVAL;
1620Sstevel@tonic-gate 			goto errout;
1630Sstevel@tonic-gate 	}
1640Sstevel@tonic-gate 
1650Sstevel@tonic-gate 	lockfsp->lf_key = ul_lockfsp->lf_key + 1;
1660Sstevel@tonic-gate 
1670Sstevel@tonic-gate errout:
1680Sstevel@tonic-gate 	return (error);
1690Sstevel@tonic-gate }
1700Sstevel@tonic-gate 
1710Sstevel@tonic-gate /*
1720Sstevel@tonic-gate  * ufs_checkaccton
1730Sstevel@tonic-gate  *	check if accounting is turned on on this fs
1740Sstevel@tonic-gate  */
1750Sstevel@tonic-gate 
1760Sstevel@tonic-gate int
ufs_checkaccton(struct vnode * vp)1770Sstevel@tonic-gate ufs_checkaccton(struct vnode *vp)
1780Sstevel@tonic-gate {
1790Sstevel@tonic-gate 	if (acct_fs_in_use(vp))
1800Sstevel@tonic-gate 		return (EDEADLK);
1810Sstevel@tonic-gate 	return (0);
1820Sstevel@tonic-gate }
1830Sstevel@tonic-gate 
1840Sstevel@tonic-gate /*
1850Sstevel@tonic-gate  * ufs_checkswapon
1860Sstevel@tonic-gate  *	check if local swapping is to file on this fs
1870Sstevel@tonic-gate  */
1880Sstevel@tonic-gate int
ufs_checkswapon(struct vnode * vp)1890Sstevel@tonic-gate ufs_checkswapon(struct vnode *vp)
1900Sstevel@tonic-gate {
1910Sstevel@tonic-gate 	struct swapinfo	*sip;
1920Sstevel@tonic-gate 
1930Sstevel@tonic-gate 	mutex_enter(&swapinfo_lock);
1940Sstevel@tonic-gate 	for (sip = swapinfo; sip; sip = sip->si_next)
1950Sstevel@tonic-gate 		if (sip->si_vp->v_vfsp == vp->v_vfsp) {
1960Sstevel@tonic-gate 			mutex_exit(&swapinfo_lock);
1970Sstevel@tonic-gate 			return (EDEADLK);
1980Sstevel@tonic-gate 		}
1990Sstevel@tonic-gate 	mutex_exit(&swapinfo_lock);
2000Sstevel@tonic-gate 	return (0);
2010Sstevel@tonic-gate }
2020Sstevel@tonic-gate 
2030Sstevel@tonic-gate /*
2040Sstevel@tonic-gate  * ufs_freeze
2050Sstevel@tonic-gate  *	pend future accesses for current lock and desired lock
2060Sstevel@tonic-gate  */
2070Sstevel@tonic-gate void
ufs_freeze(struct ulockfs * ulp,struct lockfs * lockfsp)2080Sstevel@tonic-gate ufs_freeze(struct ulockfs *ulp, struct lockfs *lockfsp)
2090Sstevel@tonic-gate {
2100Sstevel@tonic-gate 	/*
2110Sstevel@tonic-gate 	 * set to new lock type
2120Sstevel@tonic-gate 	 */
2130Sstevel@tonic-gate 	ulp->ul_lockfs.lf_lock = lockfsp->lf_lock;
2140Sstevel@tonic-gate 	ulp->ul_lockfs.lf_key = lockfsp->lf_key;
2150Sstevel@tonic-gate 	ulp->ul_lockfs.lf_comlen = lockfsp->lf_comlen;
2160Sstevel@tonic-gate 	ulp->ul_lockfs.lf_comment = lockfsp->lf_comment;
2170Sstevel@tonic-gate 
2180Sstevel@tonic-gate 	ulp->ul_fs_lock = (1 << ulp->ul_lockfs.lf_lock);
2190Sstevel@tonic-gate }
2200Sstevel@tonic-gate 
2210Sstevel@tonic-gate /*
222329Saguzovsk  * All callers of ufs_quiesce() atomically increment ufs_quiesce_pend before
223329Saguzovsk  * starting ufs_quiesce() protocol and decrement it only when a file system no
224329Saguzovsk  * longer has to be in quiescent state. This allows ufs_pageio() to detect
225329Saguzovsk  * that another thread wants to quiesce a file system. See more comments in
226329Saguzovsk  * ufs_pageio().
227329Saguzovsk  */
228329Saguzovsk ulong_t ufs_quiesce_pend = 0;
229329Saguzovsk 
230329Saguzovsk /*
2310Sstevel@tonic-gate  * ufs_quiesce
2320Sstevel@tonic-gate  *	wait for outstanding accesses to finish
2330Sstevel@tonic-gate  */
2340Sstevel@tonic-gate int
ufs_quiesce(struct ulockfs * ulp)2350Sstevel@tonic-gate ufs_quiesce(struct ulockfs *ulp)
2360Sstevel@tonic-gate {
2370Sstevel@tonic-gate 	int error = 0;
238923Ssdebnath 	ulockfs_info_t *head;
239923Ssdebnath 	ulockfs_info_t *info;
24010278SFrank.Batschulat@Sun.COM 	klwp_t *lwp = ttolwp(curthread);
241923Ssdebnath 
242923Ssdebnath 	head = (ulockfs_info_t *)tsd_get(ufs_lockfs_key);
243923Ssdebnath 	SEARCH_ULOCKFSP(head, ulp, info);
2440Sstevel@tonic-gate 
2450Sstevel@tonic-gate 	/*
24610278SFrank.Batschulat@Sun.COM 	 * We have to keep /proc away from stopping us after we applied
24710278SFrank.Batschulat@Sun.COM 	 * the softlock but before we got a chance to clear it again.
24810278SFrank.Batschulat@Sun.COM 	 * prstop() may pagefault and become stuck on the softlock still
24910278SFrank.Batschulat@Sun.COM 	 * pending.
25010278SFrank.Batschulat@Sun.COM 	 */
25110278SFrank.Batschulat@Sun.COM 	if (lwp != NULL)
25210278SFrank.Batschulat@Sun.COM 		lwp->lwp_nostop++;
25310278SFrank.Batschulat@Sun.COM 
25410278SFrank.Batschulat@Sun.COM 	/*
2550Sstevel@tonic-gate 	 * Set a softlock to suspend future ufs_vnops so that
2560Sstevel@tonic-gate 	 * this lockfs request will not be starved
2570Sstevel@tonic-gate 	 */
2580Sstevel@tonic-gate 	ULOCKFS_SET_SLOCK(ulp);
259329Saguzovsk 	ASSERT(ufs_quiesce_pend);
2600Sstevel@tonic-gate 
2610Sstevel@tonic-gate 	/* check if there is any outstanding ufs vnodeops calls */
262923Ssdebnath 	while (ulp->ul_vnops_cnt || ulp->ul_falloc_cnt) {
263329Saguzovsk 		/*
264329Saguzovsk 		 * use timed version of cv_wait_sig() to make sure we don't
265329Saguzovsk 		 * miss a wake up call from ufs_pageio() when it doesn't use
266329Saguzovsk 		 * ul_lock.
267923Ssdebnath 		 *
268923Ssdebnath 		 * when a fallocate thread comes in, the only way it returns
269923Ssdebnath 		 * from this function is if there are no other vnode operations
270923Ssdebnath 		 * going on (remember fallocate threads are tracked using
271923Ssdebnath 		 * ul_falloc_cnt not ul_vnops_cnt), and another fallocate thread
272923Ssdebnath 		 * hasn't already grabbed the fs write lock.
273329Saguzovsk 		 */
274923Ssdebnath 		if (info && (info->flags & ULOCK_INFO_FALLOCATE)) {
275923Ssdebnath 			if (!ulp->ul_vnops_cnt && !ULOCKFS_IS_FWLOCK(ulp))
276923Ssdebnath 				goto out;
277923Ssdebnath 		}
27811066Srafael.vanoni@sun.com 		if (!cv_reltimedwait_sig(&ulp->ul_cv, &ulp->ul_lock, hz,
27911066Srafael.vanoni@sun.com 		    TR_CLOCK_TICK)) {
2800Sstevel@tonic-gate 			error = EINTR;
2810Sstevel@tonic-gate 			goto out;
2820Sstevel@tonic-gate 		}
283923Ssdebnath 	}
2840Sstevel@tonic-gate 
2850Sstevel@tonic-gate out:
2860Sstevel@tonic-gate 	/*
2870Sstevel@tonic-gate 	 * unlock the soft lock
2880Sstevel@tonic-gate 	 */
2890Sstevel@tonic-gate 	ULOCKFS_CLR_SLOCK(ulp);
2900Sstevel@tonic-gate 
29110278SFrank.Batschulat@Sun.COM 	if (lwp != NULL)
29210278SFrank.Batschulat@Sun.COM 		lwp->lwp_nostop--;
29310278SFrank.Batschulat@Sun.COM 
2940Sstevel@tonic-gate 	return (error);
2950Sstevel@tonic-gate }
296923Ssdebnath 
2970Sstevel@tonic-gate /*
2980Sstevel@tonic-gate  * ufs_flush_inode
2990Sstevel@tonic-gate  */
3000Sstevel@tonic-gate int
ufs_flush_inode(struct inode * ip,void * arg)3010Sstevel@tonic-gate ufs_flush_inode(struct inode *ip, void *arg)
3020Sstevel@tonic-gate {
3030Sstevel@tonic-gate 	int	error;
3040Sstevel@tonic-gate 	int	saverror	= 0;
3050Sstevel@tonic-gate 
3060Sstevel@tonic-gate 	/*
3070Sstevel@tonic-gate 	 * wrong file system; keep looking
3080Sstevel@tonic-gate 	 */
3090Sstevel@tonic-gate 	if (ip->i_ufsvfs != (struct ufsvfs *)arg)
3100Sstevel@tonic-gate 		return (0);
3110Sstevel@tonic-gate 
3120Sstevel@tonic-gate 	/*
3130Sstevel@tonic-gate 	 * asynchronously push all the dirty pages
3140Sstevel@tonic-gate 	 */
3150Sstevel@tonic-gate 	if (((error = TRANS_SYNCIP(ip, B_ASYNC, 0, TOP_SYNCIP_FLUSHI)) != 0) &&
3160Sstevel@tonic-gate 	    (error != EAGAIN))
3170Sstevel@tonic-gate 		saverror = error;
3180Sstevel@tonic-gate 	/*
3190Sstevel@tonic-gate 	 * wait for io and discard all mappings
3200Sstevel@tonic-gate 	 */
3210Sstevel@tonic-gate 	if (error = TRANS_SYNCIP(ip, B_INVAL, 0, TOP_SYNCIP_FLUSHI))
3220Sstevel@tonic-gate 		saverror = error;
3230Sstevel@tonic-gate 
3240Sstevel@tonic-gate 	if (ITOV(ip)->v_type == VDIR) {
3250Sstevel@tonic-gate 		dnlc_dir_purge(&ip->i_danchor);
3260Sstevel@tonic-gate 	}
3270Sstevel@tonic-gate 
3280Sstevel@tonic-gate 	return (saverror);
3290Sstevel@tonic-gate }
3300Sstevel@tonic-gate 
3310Sstevel@tonic-gate /*
3320Sstevel@tonic-gate  * ufs_flush
3330Sstevel@tonic-gate  *	Flush everything that is currently dirty; this includes invalidating
3340Sstevel@tonic-gate  *	any mappings.
3350Sstevel@tonic-gate  */
3360Sstevel@tonic-gate int
ufs_flush(struct vfs * vfsp)3370Sstevel@tonic-gate ufs_flush(struct vfs *vfsp)
3380Sstevel@tonic-gate {
3390Sstevel@tonic-gate 	int		error;
3400Sstevel@tonic-gate 	int		saverror = 0;
3410Sstevel@tonic-gate 	struct ufsvfs	*ufsvfsp	= (struct ufsvfs *)vfsp->vfs_data;
3420Sstevel@tonic-gate 	struct fs	*fs		= ufsvfsp->vfs_fs;
3433454Smishra 	int		tdontblock = 0;
3440Sstevel@tonic-gate 
3450Sstevel@tonic-gate 	ASSERT(vfs_lock_held(vfsp));
3460Sstevel@tonic-gate 
3470Sstevel@tonic-gate 	/*
3480Sstevel@tonic-gate 	 * purge dnlc
3490Sstevel@tonic-gate 	 */
3500Sstevel@tonic-gate 	(void) dnlc_purge_vfsp(vfsp, 0);
3510Sstevel@tonic-gate 
3520Sstevel@tonic-gate 	/*
3530Sstevel@tonic-gate 	 * drain the delete and idle threads
3540Sstevel@tonic-gate 	 */
3550Sstevel@tonic-gate 	ufs_delete_drain(vfsp, 0, 0);
3560Sstevel@tonic-gate 	ufs_idle_drain(vfsp);
3570Sstevel@tonic-gate 
3580Sstevel@tonic-gate 	/*
3590Sstevel@tonic-gate 	 * flush and invalidate quota records
3600Sstevel@tonic-gate 	 */
3610Sstevel@tonic-gate 	(void) qsync(ufsvfsp);
3620Sstevel@tonic-gate 
3630Sstevel@tonic-gate 	/*
3640Sstevel@tonic-gate 	 * flush w/invalidate the inodes for vfsp
3650Sstevel@tonic-gate 	 */
3660Sstevel@tonic-gate 	if (error = ufs_scan_inodes(0, ufs_flush_inode, ufsvfsp, ufsvfsp))
3670Sstevel@tonic-gate 		saverror = error;
3680Sstevel@tonic-gate 
3690Sstevel@tonic-gate 	/*
3700Sstevel@tonic-gate 	 * synchronously flush superblock and summary info
3710Sstevel@tonic-gate 	 */
3720Sstevel@tonic-gate 	if (fs->fs_ronly == 0 && fs->fs_fmod) {
3730Sstevel@tonic-gate 		fs->fs_fmod = 0;
3740Sstevel@tonic-gate 		TRANS_SBUPDATE(ufsvfsp, vfsp, TOP_SBUPDATE_FLUSH);
3750Sstevel@tonic-gate 	}
3760Sstevel@tonic-gate 	/*
3770Sstevel@tonic-gate 	 * flush w/invalidate block device pages and buf cache
3780Sstevel@tonic-gate 	 */
3790Sstevel@tonic-gate 	if ((error = VOP_PUTPAGE(common_specvp(ufsvfsp->vfs_devvp),
3805331Samw 	    (offset_t)0, 0, B_INVAL, CRED(), NULL)) > 0)
3810Sstevel@tonic-gate 		saverror = error;
3820Sstevel@tonic-gate 
3830Sstevel@tonic-gate 	(void) bflush((dev_t)vfsp->vfs_dev);
3840Sstevel@tonic-gate 	(void) bfinval((dev_t)vfsp->vfs_dev, 0);
3850Sstevel@tonic-gate 
3860Sstevel@tonic-gate 	/*
3870Sstevel@tonic-gate 	 * drain the delete and idle threads again
3880Sstevel@tonic-gate 	 */
3890Sstevel@tonic-gate 	ufs_delete_drain(vfsp, 0, 0);
3900Sstevel@tonic-gate 	ufs_idle_drain(vfsp);
3910Sstevel@tonic-gate 
3920Sstevel@tonic-gate 	/*
3930Sstevel@tonic-gate 	 * play with the clean flag
3940Sstevel@tonic-gate 	 */
3950Sstevel@tonic-gate 	if (saverror == 0)
3960Sstevel@tonic-gate 		ufs_checkclean(vfsp);
3970Sstevel@tonic-gate 
3980Sstevel@tonic-gate 	/*
399921Sbatschul 	 * Flush any outstanding transactions and roll the log
400921Sbatschul 	 * only if we are supposed to do, i.e. LDL_NOROLL not set.
401921Sbatschul 	 * We can not simply check for fs_ronly here since fsck also may
402921Sbatschul 	 * use this code to roll the log on a read-only filesystem, e.g.
403921Sbatschul 	 * root during early stages of boot, if other then a sanity check is
404921Sbatschul 	 * done, it will clear LDL_NOROLL before.
405921Sbatschul 	 * In addition we assert that the deltamap does not contain any deltas
406921Sbatschul 	 * in case LDL_NOROLL is set since this is not supposed to happen.
4070Sstevel@tonic-gate 	 */
4080Sstevel@tonic-gate 	if (TRANS_ISTRANS(ufsvfsp)) {
409921Sbatschul 		ml_unit_t	*ul	= ufsvfsp->vfs_log;
410921Sbatschul 		mt_map_t	*mtm	= ul->un_deltamap;
411921Sbatschul 
412921Sbatschul 		if (ul->un_flags & LDL_NOROLL) {
413921Sbatschul 			ASSERT(mtm->mtm_nme == 0);
414921Sbatschul 		} else {
4153454Smishra 			/*
4163454Smishra 			 * Do not set T_DONTBLOCK if there is a
4173454Smishra 			 * transaction opened by caller.
4183454Smishra 			 */
4193454Smishra 			if (curthread->t_flag & T_DONTBLOCK)
4203454Smishra 				tdontblock = 1;
4213454Smishra 			else
4223454Smishra 				curthread->t_flag |= T_DONTBLOCK;
4233454Smishra 
424921Sbatschul 			TRANS_BEGIN_SYNC(ufsvfsp, TOP_COMMIT_FLUSH,
425921Sbatschul 			    TOP_COMMIT_SIZE, error);
4263454Smishra 
427921Sbatschul 			if (!error) {
428921Sbatschul 				TRANS_END_SYNC(ufsvfsp, saverror,
429921Sbatschul 				    TOP_COMMIT_FLUSH, TOP_COMMIT_SIZE);
430921Sbatschul 			}
4313454Smishra 
4323454Smishra 			if (tdontblock == 0)
4333454Smishra 				curthread->t_flag &= ~T_DONTBLOCK;
4343454Smishra 
435921Sbatschul 			logmap_roll_dev(ufsvfsp->vfs_log);
436427Sdduvall 		}
4370Sstevel@tonic-gate 	}
4380Sstevel@tonic-gate 
4390Sstevel@tonic-gate 	return (saverror);
4400Sstevel@tonic-gate }
4410Sstevel@tonic-gate 
4420Sstevel@tonic-gate /*
4430Sstevel@tonic-gate  * ufs_thaw_wlock
4440Sstevel@tonic-gate  *	special processing when thawing down to wlock
4450Sstevel@tonic-gate  */
4460Sstevel@tonic-gate static int
ufs_thaw_wlock(struct inode * ip,void * arg)4470Sstevel@tonic-gate ufs_thaw_wlock(struct inode *ip, void *arg)
4480Sstevel@tonic-gate {
4490Sstevel@tonic-gate 	/*
4500Sstevel@tonic-gate 	 * wrong file system; keep looking
4510Sstevel@tonic-gate 	 */
4520Sstevel@tonic-gate 	if (ip->i_ufsvfs != (struct ufsvfs *)arg)
4530Sstevel@tonic-gate 		return (0);
4540Sstevel@tonic-gate 
4550Sstevel@tonic-gate 	/*
4560Sstevel@tonic-gate 	 * iupdat refuses to clear flags if the fs is read only.  The fs
4570Sstevel@tonic-gate 	 * may become read/write during the lock and we wouldn't want
4580Sstevel@tonic-gate 	 * these inodes being written to disk.  So clear the flags.
4590Sstevel@tonic-gate 	 */
4600Sstevel@tonic-gate 	rw_enter(&ip->i_contents, RW_WRITER);
4610Sstevel@tonic-gate 	ip->i_flag &= ~(IMOD|IMODACC|IACC|IUPD|ICHG|IATTCHG);
4620Sstevel@tonic-gate 	rw_exit(&ip->i_contents);
4630Sstevel@tonic-gate 
4640Sstevel@tonic-gate 	/*
4650Sstevel@tonic-gate 	 * pages are mlocked -- fail wlock
4660Sstevel@tonic-gate 	 */
4670Sstevel@tonic-gate 	if (ITOV(ip)->v_type != VCHR && vn_has_cached_data(ITOV(ip)))
4680Sstevel@tonic-gate 		return (EBUSY);
4690Sstevel@tonic-gate 
4700Sstevel@tonic-gate 	return (0);
4710Sstevel@tonic-gate }
4720Sstevel@tonic-gate 
4730Sstevel@tonic-gate /*
4740Sstevel@tonic-gate  * ufs_thaw_hlock
4750Sstevel@tonic-gate  *	special processing when thawing down to hlock or elock
4760Sstevel@tonic-gate  */
4770Sstevel@tonic-gate static int
ufs_thaw_hlock(struct inode * ip,void * arg)4780Sstevel@tonic-gate ufs_thaw_hlock(struct inode *ip, void *arg)
4790Sstevel@tonic-gate {
4800Sstevel@tonic-gate 	struct vnode	*vp	= ITOV(ip);
4810Sstevel@tonic-gate 
4820Sstevel@tonic-gate 	/*
4830Sstevel@tonic-gate 	 * wrong file system; keep looking
4840Sstevel@tonic-gate 	 */
4850Sstevel@tonic-gate 	if (ip->i_ufsvfs != (struct ufsvfs *)arg)
4860Sstevel@tonic-gate 		return (0);
4870Sstevel@tonic-gate 
4880Sstevel@tonic-gate 	/*
4890Sstevel@tonic-gate 	 * blow away all pages - even if they are mlocked
4900Sstevel@tonic-gate 	 */
4910Sstevel@tonic-gate 	do {
4920Sstevel@tonic-gate 		(void) TRANS_SYNCIP(ip, B_INVAL | B_FORCE, 0, TOP_SYNCIP_HLOCK);
4930Sstevel@tonic-gate 	} while ((vp->v_type != VCHR) && vn_has_cached_data(vp));
4940Sstevel@tonic-gate 	rw_enter(&ip->i_contents, RW_WRITER);
4950Sstevel@tonic-gate 	ip->i_flag &= ~(IMOD|IMODACC|IACC|IUPD|ICHG|IATTCHG);
4960Sstevel@tonic-gate 	rw_exit(&ip->i_contents);
4970Sstevel@tonic-gate 
4980Sstevel@tonic-gate 	return (0);
4990Sstevel@tonic-gate }
5000Sstevel@tonic-gate 
5010Sstevel@tonic-gate /*
5020Sstevel@tonic-gate  * ufs_thaw
5030Sstevel@tonic-gate  *	thaw file system lock down to current value
5040Sstevel@tonic-gate  */
5050Sstevel@tonic-gate int
ufs_thaw(struct vfs * vfsp,struct ufsvfs * ufsvfsp,struct ulockfs * ulp)5060Sstevel@tonic-gate ufs_thaw(struct vfs *vfsp, struct ufsvfs *ufsvfsp, struct ulockfs *ulp)
5070Sstevel@tonic-gate {
5080Sstevel@tonic-gate 	int		error	= 0;
5090Sstevel@tonic-gate 	int		noidel	= (int)(ulp->ul_flag & ULOCKFS_NOIDEL);
5100Sstevel@tonic-gate 
5110Sstevel@tonic-gate 	/*
5120Sstevel@tonic-gate 	 * if wlock or hlock or elock
5130Sstevel@tonic-gate 	 */
5140Sstevel@tonic-gate 	if (ULOCKFS_IS_WLOCK(ulp) || ULOCKFS_IS_HLOCK(ulp) ||
5150Sstevel@tonic-gate 	    ULOCKFS_IS_ELOCK(ulp)) {
5160Sstevel@tonic-gate 
5170Sstevel@tonic-gate 		/*
5180Sstevel@tonic-gate 		 * don't keep access times
5190Sstevel@tonic-gate 		 * don't free deleted files
5200Sstevel@tonic-gate 		 * if superblock writes are allowed, limit them to me for now
5210Sstevel@tonic-gate 		 */
5220Sstevel@tonic-gate 		ulp->ul_flag |= (ULOCKFS_NOIACC|ULOCKFS_NOIDEL);
5230Sstevel@tonic-gate 		if (ulp->ul_sbowner != (kthread_id_t)-1)
5240Sstevel@tonic-gate 			ulp->ul_sbowner = curthread;
5250Sstevel@tonic-gate 
5260Sstevel@tonic-gate 		/*
5270Sstevel@tonic-gate 		 * wait for writes for deleted files and superblock updates
5280Sstevel@tonic-gate 		 */
5290Sstevel@tonic-gate 		(void) ufs_flush(vfsp);
5300Sstevel@tonic-gate 
5310Sstevel@tonic-gate 		/*
5320Sstevel@tonic-gate 		 * now make sure the quota file is up-to-date
5330Sstevel@tonic-gate 		 *	expensive; but effective
5340Sstevel@tonic-gate 		 */
5350Sstevel@tonic-gate 		error = ufs_flush(vfsp);
5360Sstevel@tonic-gate 		/*
5370Sstevel@tonic-gate 		 * no one can write the superblock
5380Sstevel@tonic-gate 		 */
5390Sstevel@tonic-gate 		ulp->ul_sbowner = (kthread_id_t)-1;
5400Sstevel@tonic-gate 
5410Sstevel@tonic-gate 		/*
5420Sstevel@tonic-gate 		 * special processing for wlock/hlock/elock
5430Sstevel@tonic-gate 		 */
5440Sstevel@tonic-gate 		if (ULOCKFS_IS_WLOCK(ulp)) {
5450Sstevel@tonic-gate 			if (error)
5460Sstevel@tonic-gate 				goto errout;
5470Sstevel@tonic-gate 			error = bfinval(ufsvfsp->vfs_dev, 0);
5480Sstevel@tonic-gate 			if (error)
5490Sstevel@tonic-gate 				goto errout;
5500Sstevel@tonic-gate 			error = ufs_scan_inodes(0, ufs_thaw_wlock,
5514662Sfrankho 			    (void *)ufsvfsp, ufsvfsp);
5520Sstevel@tonic-gate 			if (error)
5530Sstevel@tonic-gate 				goto errout;
5540Sstevel@tonic-gate 		}
5550Sstevel@tonic-gate 		if (ULOCKFS_IS_HLOCK(ulp) || ULOCKFS_IS_ELOCK(ulp)) {
5560Sstevel@tonic-gate 			error = 0;
5570Sstevel@tonic-gate 			(void) ufs_scan_inodes(0, ufs_thaw_hlock,
5584662Sfrankho 			    (void *)ufsvfsp, ufsvfsp);
5590Sstevel@tonic-gate 			(void) bfinval(ufsvfsp->vfs_dev, 1);
5600Sstevel@tonic-gate 		}
5610Sstevel@tonic-gate 	} else {
5620Sstevel@tonic-gate 
5630Sstevel@tonic-gate 		/*
5640Sstevel@tonic-gate 		 * okay to keep access times
5650Sstevel@tonic-gate 		 * okay to free deleted files
5660Sstevel@tonic-gate 		 * okay to write the superblock
5670Sstevel@tonic-gate 		 */
5680Sstevel@tonic-gate 		ulp->ul_flag &= ~(ULOCKFS_NOIACC|ULOCKFS_NOIDEL);
5690Sstevel@tonic-gate 		ulp->ul_sbowner = NULL;
5700Sstevel@tonic-gate 
5710Sstevel@tonic-gate 		/*
5720Sstevel@tonic-gate 		 * flush in case deleted files are in memory
5730Sstevel@tonic-gate 		 */
5740Sstevel@tonic-gate 		if (noidel) {
5750Sstevel@tonic-gate 			if (error = ufs_flush(vfsp))
5760Sstevel@tonic-gate 				goto errout;
5770Sstevel@tonic-gate 		}
5780Sstevel@tonic-gate 	}
5790Sstevel@tonic-gate 
5800Sstevel@tonic-gate errout:
5810Sstevel@tonic-gate 	cv_broadcast(&ulp->ul_cv);
5820Sstevel@tonic-gate 	return (error);
5830Sstevel@tonic-gate }
5840Sstevel@tonic-gate 
5850Sstevel@tonic-gate /*
5860Sstevel@tonic-gate  * ufs_reconcile_fs
5870Sstevel@tonic-gate  *	reconcile incore superblock with ondisk superblock
5880Sstevel@tonic-gate  */
5890Sstevel@tonic-gate int
ufs_reconcile_fs(struct vfs * vfsp,struct ufsvfs * ufsvfsp,int errlck)5900Sstevel@tonic-gate ufs_reconcile_fs(struct vfs *vfsp, struct ufsvfs *ufsvfsp, int errlck)
5910Sstevel@tonic-gate {
5920Sstevel@tonic-gate 	struct fs	*mfs; 	/* in-memory superblock */
5930Sstevel@tonic-gate 	struct fs	*dfs;	/* on-disk   superblock */
5940Sstevel@tonic-gate 	struct buf	*bp;	/* on-disk   superblock buf */
5950Sstevel@tonic-gate 	int		 needs_unlock;
5960Sstevel@tonic-gate 	char		 finished_fsclean;
5970Sstevel@tonic-gate 
5980Sstevel@tonic-gate 	mfs = ufsvfsp->vfs_fs;
5990Sstevel@tonic-gate 
6000Sstevel@tonic-gate 	/*
6010Sstevel@tonic-gate 	 * get the on-disk copy of the superblock
6020Sstevel@tonic-gate 	 */
6030Sstevel@tonic-gate 	bp = UFS_BREAD(ufsvfsp, vfsp->vfs_dev, SBLOCK, SBSIZE);
6040Sstevel@tonic-gate 	bp->b_flags |= (B_STALE|B_AGE);
6050Sstevel@tonic-gate 	if (bp->b_flags & B_ERROR) {
6060Sstevel@tonic-gate 		brelse(bp);
6070Sstevel@tonic-gate 		return (EIO);
6080Sstevel@tonic-gate 	}
6090Sstevel@tonic-gate 	dfs = bp->b_un.b_fs;
6100Sstevel@tonic-gate 
6110Sstevel@tonic-gate 	/* error locks may only unlock after the fs has been made consistent */
6120Sstevel@tonic-gate 	if (errlck == UN_ERRLCK) {
6130Sstevel@tonic-gate 		if (dfs->fs_clean == FSFIX) {	/* being repaired */
6140Sstevel@tonic-gate 			brelse(bp);
6150Sstevel@tonic-gate 			return (EAGAIN);
6160Sstevel@tonic-gate 		}
6170Sstevel@tonic-gate 		/* repair not yet started? */
6180Sstevel@tonic-gate 		finished_fsclean = TRANS_ISTRANS(ufsvfsp)? FSLOG: FSCLEAN;
6190Sstevel@tonic-gate 		if (dfs->fs_clean != finished_fsclean) {
6200Sstevel@tonic-gate 			brelse(bp);
6210Sstevel@tonic-gate 			return (EBUSY);
6220Sstevel@tonic-gate 		}
6230Sstevel@tonic-gate 	}
6240Sstevel@tonic-gate 
6250Sstevel@tonic-gate 	/*
6260Sstevel@tonic-gate 	 * if superblock has changed too much, abort
6270Sstevel@tonic-gate 	 */
6280Sstevel@tonic-gate 	if ((mfs->fs_sblkno		!= dfs->fs_sblkno) ||
6290Sstevel@tonic-gate 	    (mfs->fs_cblkno		!= dfs->fs_cblkno) ||
6300Sstevel@tonic-gate 	    (mfs->fs_iblkno		!= dfs->fs_iblkno) ||
6310Sstevel@tonic-gate 	    (mfs->fs_dblkno		!= dfs->fs_dblkno) ||
6320Sstevel@tonic-gate 	    (mfs->fs_cgoffset		!= dfs->fs_cgoffset) ||
6330Sstevel@tonic-gate 	    (mfs->fs_cgmask		!= dfs->fs_cgmask) ||
6340Sstevel@tonic-gate 	    (mfs->fs_bsize		!= dfs->fs_bsize) ||
6350Sstevel@tonic-gate 	    (mfs->fs_fsize		!= dfs->fs_fsize) ||
6360Sstevel@tonic-gate 	    (mfs->fs_frag		!= dfs->fs_frag) ||
6370Sstevel@tonic-gate 	    (mfs->fs_bmask		!= dfs->fs_bmask) ||
6380Sstevel@tonic-gate 	    (mfs->fs_fmask		!= dfs->fs_fmask) ||
6390Sstevel@tonic-gate 	    (mfs->fs_bshift		!= dfs->fs_bshift) ||
6400Sstevel@tonic-gate 	    (mfs->fs_fshift		!= dfs->fs_fshift) ||
6410Sstevel@tonic-gate 	    (mfs->fs_fragshift		!= dfs->fs_fragshift) ||
6420Sstevel@tonic-gate 	    (mfs->fs_fsbtodb		!= dfs->fs_fsbtodb) ||
6430Sstevel@tonic-gate 	    (mfs->fs_sbsize		!= dfs->fs_sbsize) ||
6440Sstevel@tonic-gate 	    (mfs->fs_nindir		!= dfs->fs_nindir) ||
6450Sstevel@tonic-gate 	    (mfs->fs_nspf		!= dfs->fs_nspf) ||
6460Sstevel@tonic-gate 	    (mfs->fs_trackskew		!= dfs->fs_trackskew) ||
6470Sstevel@tonic-gate 	    (mfs->fs_cgsize		!= dfs->fs_cgsize) ||
6480Sstevel@tonic-gate 	    (mfs->fs_ntrak		!= dfs->fs_ntrak) ||
6490Sstevel@tonic-gate 	    (mfs->fs_nsect		!= dfs->fs_nsect) ||
6500Sstevel@tonic-gate 	    (mfs->fs_spc		!= dfs->fs_spc) ||
6510Sstevel@tonic-gate 	    (mfs->fs_cpg		!= dfs->fs_cpg) ||
6520Sstevel@tonic-gate 	    (mfs->fs_ipg		!= dfs->fs_ipg) ||
6530Sstevel@tonic-gate 	    (mfs->fs_fpg		!= dfs->fs_fpg) ||
6540Sstevel@tonic-gate 	    (mfs->fs_postblformat	!= dfs->fs_postblformat) ||
6550Sstevel@tonic-gate 	    (mfs->fs_magic		!= dfs->fs_magic)) {
6560Sstevel@tonic-gate 		brelse(bp);
6570Sstevel@tonic-gate 		return (EACCES);
6580Sstevel@tonic-gate 	}
6590Sstevel@tonic-gate 	if (dfs->fs_clean == FSBAD || FSOKAY != dfs->fs_state + dfs->fs_time)
6600Sstevel@tonic-gate 		if (mfs->fs_clean == FSLOG) {
6610Sstevel@tonic-gate 			brelse(bp);
6620Sstevel@tonic-gate 			return (EACCES);
6630Sstevel@tonic-gate 		}
6640Sstevel@tonic-gate 
6650Sstevel@tonic-gate 	/*
6660Sstevel@tonic-gate 	 * get new summary info
6670Sstevel@tonic-gate 	 */
6680Sstevel@tonic-gate 	if (ufs_getsummaryinfo(vfsp->vfs_dev, ufsvfsp, dfs)) {
6690Sstevel@tonic-gate 		brelse(bp);
6700Sstevel@tonic-gate 		return (EIO);
6710Sstevel@tonic-gate 	}
6720Sstevel@tonic-gate 
6730Sstevel@tonic-gate 	/*
6740Sstevel@tonic-gate 	 * release old summary info and update in-memory superblock
6750Sstevel@tonic-gate 	 */
6760Sstevel@tonic-gate 	kmem_free(mfs->fs_u.fs_csp, mfs->fs_cssize);
6770Sstevel@tonic-gate 	mfs->fs_u.fs_csp = dfs->fs_u.fs_csp;	/* Only entry 0 used */
6780Sstevel@tonic-gate 
6790Sstevel@tonic-gate 	/*
6800Sstevel@tonic-gate 	 * update fields allowed to change
6810Sstevel@tonic-gate 	 */
6820Sstevel@tonic-gate 	mfs->fs_size		= dfs->fs_size;
6830Sstevel@tonic-gate 	mfs->fs_dsize		= dfs->fs_dsize;
6840Sstevel@tonic-gate 	mfs->fs_ncg		= dfs->fs_ncg;
6850Sstevel@tonic-gate 	mfs->fs_minfree		= dfs->fs_minfree;
6860Sstevel@tonic-gate 	mfs->fs_rotdelay	= dfs->fs_rotdelay;
6870Sstevel@tonic-gate 	mfs->fs_rps		= dfs->fs_rps;
6880Sstevel@tonic-gate 	mfs->fs_maxcontig	= dfs->fs_maxcontig;
6890Sstevel@tonic-gate 	mfs->fs_maxbpg		= dfs->fs_maxbpg;
6900Sstevel@tonic-gate 	mfs->fs_csmask		= dfs->fs_csmask;
6910Sstevel@tonic-gate 	mfs->fs_csshift		= dfs->fs_csshift;
6920Sstevel@tonic-gate 	mfs->fs_optim		= dfs->fs_optim;
6930Sstevel@tonic-gate 	mfs->fs_csaddr		= dfs->fs_csaddr;
6940Sstevel@tonic-gate 	mfs->fs_cssize		= dfs->fs_cssize;
6950Sstevel@tonic-gate 	mfs->fs_ncyl		= dfs->fs_ncyl;
6960Sstevel@tonic-gate 	mfs->fs_cstotal		= dfs->fs_cstotal;
6970Sstevel@tonic-gate 	mfs->fs_reclaim		= dfs->fs_reclaim;
6980Sstevel@tonic-gate 
6990Sstevel@tonic-gate 	if (mfs->fs_reclaim & (FS_RECLAIM|FS_RECLAIMING)) {
7000Sstevel@tonic-gate 		mfs->fs_reclaim &= ~FS_RECLAIM;
7010Sstevel@tonic-gate 		mfs->fs_reclaim |=  FS_RECLAIMING;
7020Sstevel@tonic-gate 		ufs_thread_start(&ufsvfsp->vfs_reclaim,
7034662Sfrankho 		    ufs_thread_reclaim, vfsp);
7040Sstevel@tonic-gate 	}
7050Sstevel@tonic-gate 
7060Sstevel@tonic-gate 	/* XXX What to do about sparecon? */
7070Sstevel@tonic-gate 
7080Sstevel@tonic-gate 	/* XXX need to copy volume label */
7090Sstevel@tonic-gate 
7100Sstevel@tonic-gate 	/*
7110Sstevel@tonic-gate 	 * ondisk clean flag overrides inmemory clean flag iff == FSBAD
7120Sstevel@tonic-gate 	 * or if error-locked and ondisk is now clean
7130Sstevel@tonic-gate 	 */
7140Sstevel@tonic-gate 	needs_unlock = !MUTEX_HELD(&ufsvfsp->vfs_lock);
7150Sstevel@tonic-gate 	if (needs_unlock)
7160Sstevel@tonic-gate 		mutex_enter(&ufsvfsp->vfs_lock);
7170Sstevel@tonic-gate 
7180Sstevel@tonic-gate 	if (errlck == UN_ERRLCK) {
7190Sstevel@tonic-gate 		if (finished_fsclean == dfs->fs_clean)
7200Sstevel@tonic-gate 			mfs->fs_clean = finished_fsclean;
7210Sstevel@tonic-gate 		else
7220Sstevel@tonic-gate 			mfs->fs_clean = FSBAD;
7230Sstevel@tonic-gate 		mfs->fs_state = FSOKAY - dfs->fs_time;
7240Sstevel@tonic-gate 	}
7250Sstevel@tonic-gate 
7260Sstevel@tonic-gate 	if (FSOKAY != dfs->fs_state + dfs->fs_time ||
7270Sstevel@tonic-gate 	    (dfs->fs_clean == FSBAD))
7280Sstevel@tonic-gate 		mfs->fs_clean = FSBAD;
7290Sstevel@tonic-gate 
7300Sstevel@tonic-gate 	if (needs_unlock)
7310Sstevel@tonic-gate 		mutex_exit(&ufsvfsp->vfs_lock);
7320Sstevel@tonic-gate 
7330Sstevel@tonic-gate 	brelse(bp);
7340Sstevel@tonic-gate 
7350Sstevel@tonic-gate 	return (0);
7360Sstevel@tonic-gate }
7370Sstevel@tonic-gate 
7380Sstevel@tonic-gate /*
7390Sstevel@tonic-gate  * ufs_reconcile_inode
7400Sstevel@tonic-gate  *	reconcile ondisk inode with incore inode
7410Sstevel@tonic-gate  */
7420Sstevel@tonic-gate static int
ufs_reconcile_inode(struct inode * ip,void * arg)7430Sstevel@tonic-gate ufs_reconcile_inode(struct inode *ip, void *arg)
7440Sstevel@tonic-gate {
7450Sstevel@tonic-gate 	int		i;
7460Sstevel@tonic-gate 	int		ndaddr;
7470Sstevel@tonic-gate 	int		niaddr;
7480Sstevel@tonic-gate 	struct dinode	*dp;		/* ondisk inode */
7490Sstevel@tonic-gate 	struct buf	*bp	= NULL;
7500Sstevel@tonic-gate 	uid_t		d_uid;
7510Sstevel@tonic-gate 	gid_t		d_gid;
7520Sstevel@tonic-gate 	int		error = 0;
7530Sstevel@tonic-gate 	struct fs	*fs;
7540Sstevel@tonic-gate 
7550Sstevel@tonic-gate 	/*
7560Sstevel@tonic-gate 	 * not an inode we care about
7570Sstevel@tonic-gate 	 */
7580Sstevel@tonic-gate 	if (ip->i_ufsvfs != (struct ufsvfs *)arg)
7590Sstevel@tonic-gate 		return (0);
7600Sstevel@tonic-gate 
7610Sstevel@tonic-gate 	fs = ip->i_fs;
7620Sstevel@tonic-gate 
7630Sstevel@tonic-gate 	/*
7640Sstevel@tonic-gate 	 * Inode reconciliation fails: we made the filesystem quiescent
7650Sstevel@tonic-gate 	 * and we did a ufs_flush() before calling ufs_reconcile_inode()
7660Sstevel@tonic-gate 	 * and thus the inode should not have been changed inbetween.
7670Sstevel@tonic-gate 	 * Any discrepancies indicate a logic error and a pretty
7680Sstevel@tonic-gate 	 * significant run-state inconsistency we should complain about.
7690Sstevel@tonic-gate 	 */
7700Sstevel@tonic-gate 	if (ip->i_flag & (IMOD|IMODACC|IACC|IUPD|ICHG|IATTCHG)) {
7710Sstevel@tonic-gate 		cmn_err(CE_WARN, "%s: Inode reconciliation failed for"
7720Sstevel@tonic-gate 		    "inode %llu", fs->fs_fsmnt, (u_longlong_t)ip->i_number);
7730Sstevel@tonic-gate 		return (EINVAL);
7740Sstevel@tonic-gate 	}
7750Sstevel@tonic-gate 
7760Sstevel@tonic-gate 	/*
7770Sstevel@tonic-gate 	 * get the dinode
7780Sstevel@tonic-gate 	 */
7790Sstevel@tonic-gate 	bp = UFS_BREAD(ip->i_ufsvfs,
7804662Sfrankho 	    ip->i_dev, (daddr_t)fsbtodb(fs, itod(fs, ip->i_number)),
7810Sstevel@tonic-gate 	    (int)fs->fs_bsize);
7820Sstevel@tonic-gate 	if (bp->b_flags & B_ERROR) {
7830Sstevel@tonic-gate 		brelse(bp);
7840Sstevel@tonic-gate 		return (EIO);
7850Sstevel@tonic-gate 	}
7860Sstevel@tonic-gate 	dp  = bp->b_un.b_dino;
7870Sstevel@tonic-gate 	dp += itoo(fs, ip->i_number);
7880Sstevel@tonic-gate 
7890Sstevel@tonic-gate 	/*
7900Sstevel@tonic-gate 	 * handle Sun's implementation of EFT
7910Sstevel@tonic-gate 	 */
7920Sstevel@tonic-gate 	d_uid = (dp->di_suid == UID_LONG) ? dp->di_uid : (uid_t)dp->di_suid;
7930Sstevel@tonic-gate 	d_gid = (dp->di_sgid == GID_LONG) ? dp->di_gid : (uid_t)dp->di_sgid;
7940Sstevel@tonic-gate 
7950Sstevel@tonic-gate 	rw_enter(&ip->i_contents, RW_WRITER);
7960Sstevel@tonic-gate 
7970Sstevel@tonic-gate 	/*
7980Sstevel@tonic-gate 	 * some fields are not allowed to change
7990Sstevel@tonic-gate 	 */
8000Sstevel@tonic-gate 	if ((ip->i_mode  != dp->di_mode) ||
8010Sstevel@tonic-gate 	    (ip->i_gen   != dp->di_gen) ||
8020Sstevel@tonic-gate 	    (ip->i_uid   != d_uid) ||
8030Sstevel@tonic-gate 	    (ip->i_gid   != d_gid)) {
8040Sstevel@tonic-gate 		error = EACCES;
8050Sstevel@tonic-gate 		goto out;
8060Sstevel@tonic-gate 	}
8070Sstevel@tonic-gate 
8080Sstevel@tonic-gate 	/*
8090Sstevel@tonic-gate 	 * and some are allowed to change
8100Sstevel@tonic-gate 	 */
8110Sstevel@tonic-gate 	ip->i_size		= dp->di_size;
8120Sstevel@tonic-gate 	ip->i_ic.ic_flags	= dp->di_ic.ic_flags;
8130Sstevel@tonic-gate 	ip->i_blocks		= dp->di_blocks;
8140Sstevel@tonic-gate 	ip->i_nlink		= dp->di_nlink;
8150Sstevel@tonic-gate 	if (ip->i_flag & IFASTSYMLNK) {
8160Sstevel@tonic-gate 		ndaddr = 1;
8170Sstevel@tonic-gate 		niaddr = 0;
8180Sstevel@tonic-gate 	} else {
8190Sstevel@tonic-gate 		ndaddr = NDADDR;
8200Sstevel@tonic-gate 		niaddr = NIADDR;
8210Sstevel@tonic-gate 	}
8220Sstevel@tonic-gate 	for (i = 0; i < ndaddr; ++i)
8230Sstevel@tonic-gate 		ip->i_db[i] = dp->di_db[i];
8240Sstevel@tonic-gate 	for (i = 0; i < niaddr; ++i)
8250Sstevel@tonic-gate 		ip->i_ib[i] = dp->di_ib[i];
8260Sstevel@tonic-gate 
8270Sstevel@tonic-gate out:
8280Sstevel@tonic-gate 	rw_exit(&ip->i_contents);
8290Sstevel@tonic-gate 	brelse(bp);
8300Sstevel@tonic-gate 	return (error);
8310Sstevel@tonic-gate }
8320Sstevel@tonic-gate 
8330Sstevel@tonic-gate /*
8340Sstevel@tonic-gate  * ufs_reconcile
8350Sstevel@tonic-gate  *	reconcile ondisk superblock/inodes with any incore
8360Sstevel@tonic-gate  */
8370Sstevel@tonic-gate static int
ufs_reconcile(struct vfs * vfsp,struct ufsvfs * ufsvfsp,int errlck)8380Sstevel@tonic-gate ufs_reconcile(struct vfs *vfsp, struct ufsvfs *ufsvfsp, int errlck)
8390Sstevel@tonic-gate {
8400Sstevel@tonic-gate 	int	error = 0;
8410Sstevel@tonic-gate 
8420Sstevel@tonic-gate 	/*
8430Sstevel@tonic-gate 	 * get rid of as much inmemory data as possible
8440Sstevel@tonic-gate 	 */
8450Sstevel@tonic-gate 	(void) ufs_flush(vfsp);
8460Sstevel@tonic-gate 
8470Sstevel@tonic-gate 	/*
8480Sstevel@tonic-gate 	 * reconcile the superblock and inodes
8490Sstevel@tonic-gate 	 */
8500Sstevel@tonic-gate 	if (error = ufs_reconcile_fs(vfsp, ufsvfsp, errlck))
8510Sstevel@tonic-gate 		return (error);
8520Sstevel@tonic-gate 	if (error = ufs_scan_inodes(0, ufs_reconcile_inode, ufsvfsp, ufsvfsp))
8530Sstevel@tonic-gate 		return (error);
8540Sstevel@tonic-gate 	/*
8550Sstevel@tonic-gate 	 * allocation blocks may be incorrect; get rid of them
8560Sstevel@tonic-gate 	 */
8570Sstevel@tonic-gate 	(void) ufs_flush(vfsp);
8580Sstevel@tonic-gate 
8590Sstevel@tonic-gate 	return (error);
8600Sstevel@tonic-gate }
8610Sstevel@tonic-gate 
8620Sstevel@tonic-gate /*
8630Sstevel@tonic-gate  * File system locking
8640Sstevel@tonic-gate  */
8650Sstevel@tonic-gate int
ufs_fiolfs(struct vnode * vp,struct lockfs * lockfsp,int from_log)8660Sstevel@tonic-gate ufs_fiolfs(struct vnode *vp, struct lockfs *lockfsp, int from_log)
8670Sstevel@tonic-gate {
8680Sstevel@tonic-gate 	return (ufs__fiolfs(vp, lockfsp, /* from_user */ 1, from_log));
8690Sstevel@tonic-gate }
8700Sstevel@tonic-gate 
8710Sstevel@tonic-gate /* kernel-internal interface, also used by fix-on-panic */
8720Sstevel@tonic-gate int
ufs__fiolfs(struct vnode * vp,struct lockfs * lockfsp,int from_user,int from_log)8730Sstevel@tonic-gate ufs__fiolfs(
8740Sstevel@tonic-gate 	struct vnode *vp,
8750Sstevel@tonic-gate 	struct lockfs *lockfsp,
8760Sstevel@tonic-gate 	int from_user,
8770Sstevel@tonic-gate 	int from_log)
8780Sstevel@tonic-gate {
8790Sstevel@tonic-gate 	struct ulockfs	*ulp;
8800Sstevel@tonic-gate 	struct lockfs	lfs;
8810Sstevel@tonic-gate 	int		error;
8820Sstevel@tonic-gate 	struct vfs	*vfsp;
8830Sstevel@tonic-gate 	struct ufsvfs	*ufsvfsp;
8840Sstevel@tonic-gate 	int		 errlck		= NO_ERRLCK;
8850Sstevel@tonic-gate 	int		 poll_events	= POLLPRI;
8860Sstevel@tonic-gate 	extern struct pollhead ufs_pollhd;
887923Ssdebnath 	ulockfs_info_t *head;
888923Ssdebnath 	ulockfs_info_t *info;
8893454Smishra 	int signal = 0;
8900Sstevel@tonic-gate 
8910Sstevel@tonic-gate 	/* check valid lock type */
8920Sstevel@tonic-gate 	if (!lockfsp || lockfsp->lf_lock > LOCKFS_MAXLOCK)
8930Sstevel@tonic-gate 		return (EINVAL);
8940Sstevel@tonic-gate 
8950Sstevel@tonic-gate 	if (!vp || !vp->v_vfsp || !vp->v_vfsp->vfs_data)
8960Sstevel@tonic-gate 		return (EIO);
8970Sstevel@tonic-gate 
8989736Sbatschul 	vfsp = vp->v_vfsp;
8999736Sbatschul 
9009736Sbatschul 	if (vfsp->vfs_flag & VFS_UNMOUNTED) /* has been unmounted */
9017276Sjr26306 		return (EIO);
9027276Sjr26306 
9037276Sjr26306 	/* take the lock and check again */
9049736Sbatschul 	vfs_lock_wait(vfsp);
9059736Sbatschul 	if (vfsp->vfs_flag & VFS_UNMOUNTED) {
9069736Sbatschul 		vfs_unlock(vfsp);
9077276Sjr26306 		return (EIO);
9087276Sjr26306 	}
9097276Sjr26306 
9109736Sbatschul 	/*
9119736Sbatschul 	 * Can't wlock or ro/elock fs with accounting or local swap file
9129736Sbatschul 	 * We need to check for this before we grab the ul_lock to avoid
9139736Sbatschul 	 * deadlocks with the accounting framework.
9149736Sbatschul 	 */
9159736Sbatschul 	if ((LOCKFS_IS_WLOCK(lockfsp) || LOCKFS_IS_ELOCK(lockfsp) ||
9169736Sbatschul 	    LOCKFS_IS_ROELOCK(lockfsp)) && !from_log) {
9179736Sbatschul 		if (ufs_checkaccton(vp) || ufs_checkswapon(vp)) {
9189736Sbatschul 			vfs_unlock(vfsp);
9199736Sbatschul 			return (EDEADLK);
9209736Sbatschul 		}
9219736Sbatschul 	}
9229736Sbatschul 
9230Sstevel@tonic-gate 	ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
9240Sstevel@tonic-gate 	ulp = &ufsvfsp->vfs_ulockfs;
925923Ssdebnath 	head = (ulockfs_info_t *)tsd_get(ufs_lockfs_key);
926923Ssdebnath 	SEARCH_ULOCKFSP(head, ulp, info);
927923Ssdebnath 
9280Sstevel@tonic-gate 	/*
9290Sstevel@tonic-gate 	 * Suspend both the reclaim thread and the delete thread.
9300Sstevel@tonic-gate 	 * This must be done outside the lockfs locking protocol.
9310Sstevel@tonic-gate 	 */
9320Sstevel@tonic-gate 	ufs_thread_suspend(&ufsvfsp->vfs_reclaim);
9330Sstevel@tonic-gate 	ufs_thread_suspend(&ufsvfsp->vfs_delete);
9340Sstevel@tonic-gate 
9350Sstevel@tonic-gate 	mutex_enter(&ulp->ul_lock);
936329Saguzovsk 	atomic_add_long(&ufs_quiesce_pend, 1);
9370Sstevel@tonic-gate 
9380Sstevel@tonic-gate 	/*
9390Sstevel@tonic-gate 	 * Quit if there is another lockfs request in progress
9400Sstevel@tonic-gate 	 * that is waiting for existing ufs_vnops to complete.
9410Sstevel@tonic-gate 	 */
9420Sstevel@tonic-gate 	if (ULOCKFS_IS_BUSY(ulp)) {
9430Sstevel@tonic-gate 		error = EBUSY;
9440Sstevel@tonic-gate 		goto errexit;
9450Sstevel@tonic-gate 	}
9460Sstevel@tonic-gate 
9470Sstevel@tonic-gate 	/* cannot ulocked or downgrade a hard-lock */
9480Sstevel@tonic-gate 	if (ULOCKFS_IS_HLOCK(ulp)) {
9490Sstevel@tonic-gate 		error = EIO;
9500Sstevel@tonic-gate 		goto errexit;
9510Sstevel@tonic-gate 	}
9520Sstevel@tonic-gate 
9530Sstevel@tonic-gate 	/* an error lock may be unlocked or relocked, only */
9540Sstevel@tonic-gate 	if (ULOCKFS_IS_ELOCK(ulp)) {
9550Sstevel@tonic-gate 		if (!LOCKFS_IS_ULOCK(lockfsp) && !LOCKFS_IS_ELOCK(lockfsp)) {
9560Sstevel@tonic-gate 			error = EBUSY;
9570Sstevel@tonic-gate 			goto errexit;
9580Sstevel@tonic-gate 		}
9590Sstevel@tonic-gate 	}
9600Sstevel@tonic-gate 
9610Sstevel@tonic-gate 	/*
9620Sstevel@tonic-gate 	 * a read-only error lock may only be upgraded to an
9630Sstevel@tonic-gate 	 * error lock or hard lock
9640Sstevel@tonic-gate 	 */
9650Sstevel@tonic-gate 	if (ULOCKFS_IS_ROELOCK(ulp)) {
9660Sstevel@tonic-gate 		if (!LOCKFS_IS_HLOCK(lockfsp) && !LOCKFS_IS_ELOCK(lockfsp)) {
9670Sstevel@tonic-gate 			error = EBUSY;
9680Sstevel@tonic-gate 			goto errexit;
9690Sstevel@tonic-gate 		}
9700Sstevel@tonic-gate 	}
9710Sstevel@tonic-gate 
9720Sstevel@tonic-gate 	/*
9730Sstevel@tonic-gate 	 * until read-only error locks are fully implemented
9740Sstevel@tonic-gate 	 * just return EINVAL
9750Sstevel@tonic-gate 	 */
9760Sstevel@tonic-gate 	if (LOCKFS_IS_ROELOCK(lockfsp)) {
9770Sstevel@tonic-gate 		error = EINVAL;
9780Sstevel@tonic-gate 		goto errexit;
9790Sstevel@tonic-gate 	}
9800Sstevel@tonic-gate 
9810Sstevel@tonic-gate 	/*
9820Sstevel@tonic-gate 	 * an error lock may only be applied if the file system is
9830Sstevel@tonic-gate 	 * unlocked or already error locked.
9840Sstevel@tonic-gate 	 * (this is to prevent the case where a fs gets changed out from
9850Sstevel@tonic-gate 	 * underneath a fs that is locked for backup,
9860Sstevel@tonic-gate 	 * that is, name/delete/write-locked.)
9870Sstevel@tonic-gate 	 */
9880Sstevel@tonic-gate 	if ((!ULOCKFS_IS_ULOCK(ulp) && !ULOCKFS_IS_ELOCK(ulp) &&
9890Sstevel@tonic-gate 	    !ULOCKFS_IS_ROELOCK(ulp)) &&
9900Sstevel@tonic-gate 	    (LOCKFS_IS_ELOCK(lockfsp) || LOCKFS_IS_ROELOCK(lockfsp))) {
9910Sstevel@tonic-gate 		error = EBUSY;
9920Sstevel@tonic-gate 		goto errexit;
9930Sstevel@tonic-gate 	}
9940Sstevel@tonic-gate 
9950Sstevel@tonic-gate 	/* get and validate the input lockfs request */
9960Sstevel@tonic-gate 	if (error = ufs_getlfd(lockfsp, &ulp->ul_lockfs))
9970Sstevel@tonic-gate 		goto errexit;
9980Sstevel@tonic-gate 
9990Sstevel@tonic-gate 	/*
10000Sstevel@tonic-gate 	 * save current ulockfs struct
10010Sstevel@tonic-gate 	 */
10020Sstevel@tonic-gate 	bcopy(&ulp->ul_lockfs, &lfs, sizeof (struct lockfs));
10030Sstevel@tonic-gate 
10040Sstevel@tonic-gate 	/*
10050Sstevel@tonic-gate 	 * Freeze the file system (pend future accesses)
10060Sstevel@tonic-gate 	 */
10070Sstevel@tonic-gate 	ufs_freeze(ulp, lockfsp);
10080Sstevel@tonic-gate 
10090Sstevel@tonic-gate 	/*
10100Sstevel@tonic-gate 	 * Set locking in progress because ufs_quiesce may free the
10110Sstevel@tonic-gate 	 * ul_lock mutex.
10120Sstevel@tonic-gate 	 */
10130Sstevel@tonic-gate 	ULOCKFS_SET_BUSY(ulp);
10140Sstevel@tonic-gate 	/* update the ioctl copy */
10150Sstevel@tonic-gate 	LOCKFS_SET_BUSY(&ulp->ul_lockfs);
10160Sstevel@tonic-gate 
10170Sstevel@tonic-gate 	/*
1018923Ssdebnath 	 * We  need to unset FWLOCK status before we call ufs_quiesce
1019923Ssdebnath 	 * so that the thread doesnt get suspended. We do this only if
1020923Ssdebnath 	 * this (fallocate) thread requested an unlock operation.
1021923Ssdebnath 	 */
1022923Ssdebnath 	if (info && (info->flags & ULOCK_INFO_FALLOCATE)) {
1023923Ssdebnath 		if (!ULOCKFS_IS_WLOCK(ulp))
1024923Ssdebnath 			ULOCKFS_CLR_FWLOCK(ulp);
1025923Ssdebnath 	}
1026923Ssdebnath 
1027923Ssdebnath 	/*
10280Sstevel@tonic-gate 	 * Quiesce (wait for outstanding accesses to finish)
10290Sstevel@tonic-gate 	 */
10303454Smishra 	if (error = ufs_quiesce(ulp)) {
10313454Smishra 		/*
10323454Smishra 		 * Interrupted due to signal. There could still be
10333454Smishra 		 * pending vnops.
10343454Smishra 		 */
10353454Smishra 		signal = 1;
10363454Smishra 
10373454Smishra 		/*
10383454Smishra 		 * We do broadcast because lock-status
10393454Smishra 		 * could be reverted to old status.
10403454Smishra 		 */
10413454Smishra 		cv_broadcast(&ulp->ul_cv);
10420Sstevel@tonic-gate 		goto errout;
10433454Smishra 	}
10440Sstevel@tonic-gate 
10450Sstevel@tonic-gate 	/*
1046923Ssdebnath 	 * If the fallocate thread requested a write fs lock operation
1047923Ssdebnath 	 * then we set fwlock status in the ulp.
1048923Ssdebnath 	 */
1049923Ssdebnath 	if (info && (info->flags & ULOCK_INFO_FALLOCATE)) {
1050923Ssdebnath 		if (ULOCKFS_IS_WLOCK(ulp))
1051923Ssdebnath 			ULOCKFS_SET_FWLOCK(ulp);
1052923Ssdebnath 	}
1053923Ssdebnath 
1054923Ssdebnath 	/*
10550Sstevel@tonic-gate 	 * save error lock status to pass down to reconcilation
10560Sstevel@tonic-gate 	 * routines and for later cleanup
10570Sstevel@tonic-gate 	 */
10580Sstevel@tonic-gate 	if (LOCKFS_IS_ELOCK(&lfs) && ULOCKFS_IS_ULOCK(ulp))
10590Sstevel@tonic-gate 		errlck = UN_ERRLCK;
10600Sstevel@tonic-gate 
10610Sstevel@tonic-gate 	if (ULOCKFS_IS_ELOCK(ulp) || ULOCKFS_IS_ROELOCK(ulp)) {
10620Sstevel@tonic-gate 		int needs_unlock;
10630Sstevel@tonic-gate 		int needs_sbwrite;
10640Sstevel@tonic-gate 
10650Sstevel@tonic-gate 		poll_events |= POLLERR;
10664662Sfrankho 		errlck = LOCKFS_IS_ELOCK(&lfs) || LOCKFS_IS_ROELOCK(&lfs) ?
10674662Sfrankho 		    RE_ERRLCK : SET_ERRLCK;
10680Sstevel@tonic-gate 
10690Sstevel@tonic-gate 		needs_unlock = !MUTEX_HELD(&ufsvfsp->vfs_lock);
10700Sstevel@tonic-gate 		if (needs_unlock)
10710Sstevel@tonic-gate 			mutex_enter(&ufsvfsp->vfs_lock);
10720Sstevel@tonic-gate 
10730Sstevel@tonic-gate 		/* disable delayed i/o */
10740Sstevel@tonic-gate 		needs_sbwrite = 0;
10750Sstevel@tonic-gate 
10760Sstevel@tonic-gate 		if (errlck == SET_ERRLCK) {
10770Sstevel@tonic-gate 			ufsvfsp->vfs_fs->fs_clean = FSBAD;
10780Sstevel@tonic-gate 			needs_sbwrite = 1;
10790Sstevel@tonic-gate 		}
10800Sstevel@tonic-gate 
10810Sstevel@tonic-gate 		needs_sbwrite |= ufsvfsp->vfs_dio;
10820Sstevel@tonic-gate 		ufsvfsp->vfs_dio = 0;
10830Sstevel@tonic-gate 
10840Sstevel@tonic-gate 		if (needs_unlock)
10850Sstevel@tonic-gate 			mutex_exit(&ufsvfsp->vfs_lock);
10860Sstevel@tonic-gate 
10870Sstevel@tonic-gate 		if (needs_sbwrite) {
10880Sstevel@tonic-gate 			ulp->ul_sbowner = curthread;
10890Sstevel@tonic-gate 			TRANS_SBWRITE(ufsvfsp, TOP_SBWRITE_STABLE);
10900Sstevel@tonic-gate 
10910Sstevel@tonic-gate 			if (needs_unlock)
10920Sstevel@tonic-gate 				mutex_enter(&ufsvfsp->vfs_lock);
10930Sstevel@tonic-gate 
10940Sstevel@tonic-gate 			ufsvfsp->vfs_fs->fs_fmod = 0;
10950Sstevel@tonic-gate 
10960Sstevel@tonic-gate 			if (needs_unlock)
10970Sstevel@tonic-gate 				mutex_exit(&ufsvfsp->vfs_lock);
10980Sstevel@tonic-gate 		}
10990Sstevel@tonic-gate 	}
11000Sstevel@tonic-gate 
11010Sstevel@tonic-gate 	/*
11020Sstevel@tonic-gate 	 * reconcile superblock and inodes if was wlocked
11030Sstevel@tonic-gate 	 */
11040Sstevel@tonic-gate 	if (LOCKFS_IS_WLOCK(&lfs) || LOCKFS_IS_ELOCK(&lfs)) {
11050Sstevel@tonic-gate 		if (error = ufs_reconcile(vfsp, ufsvfsp, errlck))
11060Sstevel@tonic-gate 			goto errout;
11070Sstevel@tonic-gate 		/*
11080Sstevel@tonic-gate 		 * in case the fs grew; reset the metadata map for logging tests
11090Sstevel@tonic-gate 		 */
11100Sstevel@tonic-gate 		TRANS_MATA_UMOUNT(ufsvfsp);
11110Sstevel@tonic-gate 		TRANS_MATA_MOUNT(ufsvfsp);
11120Sstevel@tonic-gate 		TRANS_MATA_SI(ufsvfsp, ufsvfsp->vfs_fs);
11130Sstevel@tonic-gate 	}
11140Sstevel@tonic-gate 
11150Sstevel@tonic-gate 	/*
11160Sstevel@tonic-gate 	 * At least everything *currently* dirty goes out.
11170Sstevel@tonic-gate 	 */
11180Sstevel@tonic-gate 
11190Sstevel@tonic-gate 	if ((error = ufs_flush(vfsp)) != 0 && !ULOCKFS_IS_HLOCK(ulp) &&
11200Sstevel@tonic-gate 	    !ULOCKFS_IS_ELOCK(ulp))
11210Sstevel@tonic-gate 		goto errout;
11220Sstevel@tonic-gate 
11230Sstevel@tonic-gate 	/*
11240Sstevel@tonic-gate 	 * thaw file system and wakeup pended processes
11250Sstevel@tonic-gate 	 */
11260Sstevel@tonic-gate 	if (error = ufs_thaw(vfsp, ufsvfsp, ulp))
11270Sstevel@tonic-gate 		if (!ULOCKFS_IS_HLOCK(ulp) && !ULOCKFS_IS_ELOCK(ulp))
11280Sstevel@tonic-gate 			goto errout;
11290Sstevel@tonic-gate 
11300Sstevel@tonic-gate 	/*
11310Sstevel@tonic-gate 	 * reset modified flag if not already write locked
11320Sstevel@tonic-gate 	 */
11330Sstevel@tonic-gate 	if (!LOCKFS_IS_WLOCK(&lfs))
11340Sstevel@tonic-gate 		ULOCKFS_CLR_MOD(ulp);
11350Sstevel@tonic-gate 
11360Sstevel@tonic-gate 	/*
11370Sstevel@tonic-gate 	 * idle the lock struct
11380Sstevel@tonic-gate 	 */
11390Sstevel@tonic-gate 	ULOCKFS_CLR_BUSY(ulp);
11400Sstevel@tonic-gate 	/* update the ioctl copy */
11410Sstevel@tonic-gate 	LOCKFS_CLR_BUSY(&ulp->ul_lockfs);
11420Sstevel@tonic-gate 
11430Sstevel@tonic-gate 	/*
11440Sstevel@tonic-gate 	 * free current comment
11450Sstevel@tonic-gate 	 */
11460Sstevel@tonic-gate 	if (lfs.lf_comment && lfs.lf_comlen != 0) {
11470Sstevel@tonic-gate 		kmem_free(lfs.lf_comment, lfs.lf_comlen);
11480Sstevel@tonic-gate 		lfs.lf_comment = NULL;
11490Sstevel@tonic-gate 		lfs.lf_comlen = 0;
11500Sstevel@tonic-gate 	}
11510Sstevel@tonic-gate 
11520Sstevel@tonic-gate 	/* do error lock cleanup */
11530Sstevel@tonic-gate 	if (errlck == UN_ERRLCK)
11540Sstevel@tonic-gate 		ufsfx_unlockfs(ufsvfsp);
11550Sstevel@tonic-gate 
11560Sstevel@tonic-gate 	else if (errlck == RE_ERRLCK)
11570Sstevel@tonic-gate 		ufsfx_lockfs(ufsvfsp);
11580Sstevel@tonic-gate 
11590Sstevel@tonic-gate 	/* don't allow error lock from user to invoke panic */
11600Sstevel@tonic-gate 	else if (from_user && errlck == SET_ERRLCK &&
11614662Sfrankho 	    !(ufsvfsp->vfs_fsfx.fx_flags & (UFSMNT_ONERROR_PANIC >> 4)))
11620Sstevel@tonic-gate 		(void) ufs_fault(ufsvfsp->vfs_root,
11630Sstevel@tonic-gate 		    ulp->ul_lockfs.lf_comment && ulp->ul_lockfs.lf_comlen > 0 ?
11640Sstevel@tonic-gate 		    ulp->ul_lockfs.lf_comment: "user-applied error lock");
11650Sstevel@tonic-gate 
1166329Saguzovsk 	atomic_add_long(&ufs_quiesce_pend, -1);
11670Sstevel@tonic-gate 	mutex_exit(&ulp->ul_lock);
11680Sstevel@tonic-gate 	vfs_unlock(vfsp);
11690Sstevel@tonic-gate 
11700Sstevel@tonic-gate 	if (ULOCKFS_IS_HLOCK(&ufsvfsp->vfs_ulockfs))
11710Sstevel@tonic-gate 		poll_events |= POLLERR;
11720Sstevel@tonic-gate 
11730Sstevel@tonic-gate 	pollwakeup(&ufs_pollhd, poll_events);
11740Sstevel@tonic-gate 
11750Sstevel@tonic-gate 	/*
11760Sstevel@tonic-gate 	 * Allow both the delete thread and the reclaim thread to
11770Sstevel@tonic-gate 	 * continue.
11780Sstevel@tonic-gate 	 */
11790Sstevel@tonic-gate 	ufs_thread_continue(&ufsvfsp->vfs_delete);
11800Sstevel@tonic-gate 	ufs_thread_continue(&ufsvfsp->vfs_reclaim);
11810Sstevel@tonic-gate 
11820Sstevel@tonic-gate 	return (0);
11830Sstevel@tonic-gate 
11840Sstevel@tonic-gate errout:
11850Sstevel@tonic-gate 	/*
11860Sstevel@tonic-gate 	 * Lock failed. Reset the old lock in ufsvfs if not hard locked.
11870Sstevel@tonic-gate 	 */
11880Sstevel@tonic-gate 	if (!LOCKFS_IS_HLOCK(&ulp->ul_lockfs)) {
11890Sstevel@tonic-gate 		bcopy(&lfs, &ulp->ul_lockfs, sizeof (struct lockfs));
11900Sstevel@tonic-gate 		ulp->ul_fs_lock = (1 << lfs.lf_lock);
11910Sstevel@tonic-gate 	}
11923454Smishra 
11933454Smishra 	/*
11943454Smishra 	 * Don't call ufs_thaw() when there's a signal during
11953454Smishra 	 * ufs quiesce operation as it can lead to deadlock
11963454Smishra 	 * with getpage.
11973454Smishra 	 */
11983454Smishra 	if (signal == 0)
11993454Smishra 		(void) ufs_thaw(vfsp, ufsvfsp, ulp);
12003454Smishra 
12010Sstevel@tonic-gate 	ULOCKFS_CLR_BUSY(ulp);
12020Sstevel@tonic-gate 	LOCKFS_CLR_BUSY(&ulp->ul_lockfs);
12030Sstevel@tonic-gate 
12040Sstevel@tonic-gate errexit:
1205329Saguzovsk 	atomic_add_long(&ufs_quiesce_pend, -1);
12060Sstevel@tonic-gate 	mutex_exit(&ulp->ul_lock);
12070Sstevel@tonic-gate 	vfs_unlock(vfsp);
12080Sstevel@tonic-gate 
12090Sstevel@tonic-gate 	/*
12100Sstevel@tonic-gate 	 * Allow both the delete thread and the reclaim thread to
12110Sstevel@tonic-gate 	 * continue.
12120Sstevel@tonic-gate 	 */
12130Sstevel@tonic-gate 	ufs_thread_continue(&ufsvfsp->vfs_delete);
12140Sstevel@tonic-gate 	ufs_thread_continue(&ufsvfsp->vfs_reclaim);
12150Sstevel@tonic-gate 
12160Sstevel@tonic-gate 	return (error);
12170Sstevel@tonic-gate }
12180Sstevel@tonic-gate 
12190Sstevel@tonic-gate /*
12200Sstevel@tonic-gate  * fiolfss
12210Sstevel@tonic-gate  * 	return the current file system locking state info
12220Sstevel@tonic-gate  */
12230Sstevel@tonic-gate int
ufs_fiolfss(struct vnode * vp,struct lockfs * lockfsp)12240Sstevel@tonic-gate ufs_fiolfss(struct vnode *vp, struct lockfs *lockfsp)
12250Sstevel@tonic-gate {
12260Sstevel@tonic-gate 	struct ulockfs	*ulp;
12270Sstevel@tonic-gate 
12280Sstevel@tonic-gate 	if (!vp || !vp->v_vfsp || !VTOI(vp))
12290Sstevel@tonic-gate 		return (EINVAL);
12300Sstevel@tonic-gate 
12310Sstevel@tonic-gate 	/* file system has been forcibly unmounted */
12320Sstevel@tonic-gate 	if (VTOI(vp)->i_ufsvfs == NULL)
12330Sstevel@tonic-gate 		return (EIO);
12340Sstevel@tonic-gate 
12350Sstevel@tonic-gate 	ulp = VTOUL(vp);
12360Sstevel@tonic-gate 
12370Sstevel@tonic-gate 	if (ULOCKFS_IS_HLOCK(ulp)) {
12380Sstevel@tonic-gate 		*lockfsp = ulp->ul_lockfs;	/* structure assignment */
12390Sstevel@tonic-gate 		return (0);
12400Sstevel@tonic-gate 	}
12410Sstevel@tonic-gate 
12420Sstevel@tonic-gate 	mutex_enter(&ulp->ul_lock);
12430Sstevel@tonic-gate 
12440Sstevel@tonic-gate 	*lockfsp = ulp->ul_lockfs;	/* structure assignment */
12450Sstevel@tonic-gate 
12460Sstevel@tonic-gate 	if (ULOCKFS_IS_MOD(ulp))
12470Sstevel@tonic-gate 		lockfsp->lf_flags |= LOCKFS_MOD;
12480Sstevel@tonic-gate 
12490Sstevel@tonic-gate 	mutex_exit(&ulp->ul_lock);
12500Sstevel@tonic-gate 
12510Sstevel@tonic-gate 	return (0);
12520Sstevel@tonic-gate }
12530Sstevel@tonic-gate 
12540Sstevel@tonic-gate /*
12550Sstevel@tonic-gate  * ufs_check_lockfs
12560Sstevel@tonic-gate  *	check whether a ufs_vnops conflicts with the file system lock
12570Sstevel@tonic-gate  */
12580Sstevel@tonic-gate int
ufs_check_lockfs(struct ufsvfs * ufsvfsp,struct ulockfs * ulp,ulong_t mask)12590Sstevel@tonic-gate ufs_check_lockfs(struct ufsvfs *ufsvfsp, struct ulockfs *ulp, ulong_t mask)
12600Sstevel@tonic-gate {
12610Sstevel@tonic-gate 	k_sigset_t	smask;
12620Sstevel@tonic-gate 	int		sig, slock;
12630Sstevel@tonic-gate 
12640Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ulp->ul_lock));
12650Sstevel@tonic-gate 
12660Sstevel@tonic-gate 	while (ulp->ul_fs_lock & mask) {
12670Sstevel@tonic-gate 		slock = (int)ULOCKFS_IS_SLOCK(ulp);
12680Sstevel@tonic-gate 		if ((curthread->t_flag & T_DONTPEND) && !slock) {
12690Sstevel@tonic-gate 			curthread->t_flag |= T_WOULDBLOCK;
12700Sstevel@tonic-gate 			return (EAGAIN);
12710Sstevel@tonic-gate 		}
12720Sstevel@tonic-gate 		curthread->t_flag &= ~T_WOULDBLOCK;
12730Sstevel@tonic-gate 
12741365Sowenr 		/*
12751365Sowenr 		 * In the case of an onerr umount of the fs, threads could
12761365Sowenr 		 * have blocked before coming into ufs_check_lockfs and
12771365Sowenr 		 * need to check for the special case of ELOCK and
12781365Sowenr 		 * vfs_dontblock being set which would indicate that the fs
12791365Sowenr 		 * is on its way out and will not return therefore making
12801365Sowenr 		 * EIO the appropriate response.
12811365Sowenr 		 */
12821365Sowenr 		if (ULOCKFS_IS_HLOCK(ulp) ||
12831365Sowenr 		    (ULOCKFS_IS_ELOCK(ulp) && ufsvfsp->vfs_dontblock))
12840Sstevel@tonic-gate 			return (EIO);
12850Sstevel@tonic-gate 
12860Sstevel@tonic-gate 		/*
12870Sstevel@tonic-gate 		 * wait for lock status to change
12880Sstevel@tonic-gate 		 */
12890Sstevel@tonic-gate 		if (slock || ufsvfsp->vfs_nointr) {
12900Sstevel@tonic-gate 			cv_wait(&ulp->ul_cv, &ulp->ul_lock);
12910Sstevel@tonic-gate 		} else {
12920Sstevel@tonic-gate 			sigintr(&smask, 1);
12930Sstevel@tonic-gate 			sig = cv_wait_sig(&ulp->ul_cv, &ulp->ul_lock);
12940Sstevel@tonic-gate 			sigunintr(&smask);
12950Sstevel@tonic-gate 			if ((!sig && (ulp->ul_fs_lock & mask)) ||
12964662Sfrankho 			    ufsvfsp->vfs_dontblock)
12970Sstevel@tonic-gate 				return (EINTR);
12980Sstevel@tonic-gate 		}
12990Sstevel@tonic-gate 	}
1300923Ssdebnath 
1301923Ssdebnath 	if (mask & ULOCKFS_FWLOCK) {
1302923Ssdebnath 		atomic_add_long(&ulp->ul_falloc_cnt, 1);
1303923Ssdebnath 		ULOCKFS_SET_FALLOC(ulp);
1304923Ssdebnath 	} else {
1305923Ssdebnath 		atomic_add_long(&ulp->ul_vnops_cnt, 1);
1306923Ssdebnath 	}
1307923Ssdebnath 
13080Sstevel@tonic-gate 	return (0);
13090Sstevel@tonic-gate }
13100Sstevel@tonic-gate 
13110Sstevel@tonic-gate /*
13120Sstevel@tonic-gate  * Check whether we came across the handcrafted lockfs protocol path. We can't
13130Sstevel@tonic-gate  * simply check for T_DONTBLOCK here as one would assume since this can also
13140Sstevel@tonic-gate  * falsely catch recursive VOP's going to a different filesystem, instead we
13150Sstevel@tonic-gate  * check if we already hold the ulockfs->ul_lock mutex.
13160Sstevel@tonic-gate  */
13170Sstevel@tonic-gate static int
ufs_lockfs_is_under_rawlockfs(struct ulockfs * ulp)13180Sstevel@tonic-gate ufs_lockfs_is_under_rawlockfs(struct ulockfs *ulp)
13190Sstevel@tonic-gate {
13200Sstevel@tonic-gate 	return ((mutex_owner(&ulp->ul_lock) != curthread) ? 0 : 1);
13210Sstevel@tonic-gate }
13220Sstevel@tonic-gate 
13230Sstevel@tonic-gate /*
13240Sstevel@tonic-gate  * ufs_lockfs_begin - start the lockfs locking protocol
13250Sstevel@tonic-gate  */
13260Sstevel@tonic-gate int
ufs_lockfs_begin(struct ufsvfs * ufsvfsp,struct ulockfs ** ulpp,ulong_t mask)13270Sstevel@tonic-gate ufs_lockfs_begin(struct ufsvfs *ufsvfsp, struct ulockfs **ulpp, ulong_t mask)
13280Sstevel@tonic-gate {
13290Sstevel@tonic-gate 	int 		error;
13300Sstevel@tonic-gate 	int		rec_vop;
13317212Svsakar 	ushort_t	op_cnt_incremented = 0;
13327212Svsakar 	ulong_t		*ctr;
13330Sstevel@tonic-gate 	struct ulockfs *ulp;
13340Sstevel@tonic-gate 	ulockfs_info_t	*ulockfs_info;
13350Sstevel@tonic-gate 	ulockfs_info_t	*ulockfs_info_free;
13360Sstevel@tonic-gate 	ulockfs_info_t	*ulockfs_info_temp;
13370Sstevel@tonic-gate 
13380Sstevel@tonic-gate 	/*
13390Sstevel@tonic-gate 	 * file system has been forcibly unmounted
13400Sstevel@tonic-gate 	 */
13410Sstevel@tonic-gate 	if (ufsvfsp == NULL)
13420Sstevel@tonic-gate 		return (EIO);
13430Sstevel@tonic-gate 
13440Sstevel@tonic-gate 	*ulpp = ulp = &ufsvfsp->vfs_ulockfs;
13450Sstevel@tonic-gate 
13460Sstevel@tonic-gate 	/*
13470Sstevel@tonic-gate 	 * Do lockfs protocol
13480Sstevel@tonic-gate 	 */
13490Sstevel@tonic-gate 	ulockfs_info = (ulockfs_info_t *)tsd_get(ufs_lockfs_key);
13500Sstevel@tonic-gate 	IS_REC_VOP(rec_vop, ulockfs_info, ulp, ulockfs_info_free);
13510Sstevel@tonic-gate 
13520Sstevel@tonic-gate 	/*
13530Sstevel@tonic-gate 	 * Detect recursive VOP call or handcrafted internal lockfs protocol
13540Sstevel@tonic-gate 	 * path and bail out in that case.
13550Sstevel@tonic-gate 	 */
13560Sstevel@tonic-gate 	if (rec_vop || ufs_lockfs_is_under_rawlockfs(ulp)) {
13570Sstevel@tonic-gate 		*ulpp = NULL;
13580Sstevel@tonic-gate 		return (0);
13590Sstevel@tonic-gate 	} else {
13600Sstevel@tonic-gate 		if (ulockfs_info_free == NULL) {
13610Sstevel@tonic-gate 			if ((ulockfs_info_temp = (ulockfs_info_t *)
13620Sstevel@tonic-gate 			    kmem_zalloc(sizeof (ulockfs_info_t),
13630Sstevel@tonic-gate 			    KM_NOSLEEP)) == NULL) {
13640Sstevel@tonic-gate 				*ulpp = NULL;
13650Sstevel@tonic-gate 				return (ENOMEM);
13660Sstevel@tonic-gate 			}
13670Sstevel@tonic-gate 		}
13680Sstevel@tonic-gate 	}
13690Sstevel@tonic-gate 
13700Sstevel@tonic-gate 	/*
13710Sstevel@tonic-gate 	 * First time VOP call
13727212Svsakar 	 *
13737212Svsakar 	 * Increment the ctr irrespective of the lockfs state. If the lockfs
13747212Svsakar 	 * state is not ULOCKFS_ULOCK, we can decrement it later. However,
13757212Svsakar 	 * before incrementing we need to check if there is a pending quiesce
13767212Svsakar 	 * request because if we have a continuous stream of ufs_lockfs_begin
13777212Svsakar 	 * requests pounding on a few cpu's then the ufs_quiesce thread might
13787212Svsakar 	 * never see the value of zero for ctr - a livelock kind of scenario.
13790Sstevel@tonic-gate 	 */
13807212Svsakar 	ctr = (mask & ULOCKFS_FWLOCK) ?
13817212Svsakar 	    &ulp->ul_falloc_cnt : &ulp->ul_vnops_cnt;
13827212Svsakar 	if (!ULOCKFS_IS_SLOCK(ulp)) {
13837212Svsakar 		atomic_add_long(ctr, 1);
13847212Svsakar 		op_cnt_incremented++;
13857212Svsakar 	}
13867212Svsakar 
13877212Svsakar 	/*
13887212Svsakar 	 * If the lockfs state (indicated by ul_fs_lock) is not just
13897212Svsakar 	 * ULOCKFS_ULOCK, then we will be routed through ufs_check_lockfs
13907212Svsakar 	 * where there is a check with an appropriate mask to selectively allow
13917212Svsakar 	 * operations permitted for that kind of lockfs state.
13927212Svsakar 	 *
13937212Svsakar 	 * Even these selective operations should not be allowed to go through
13947212Svsakar 	 * if a lockfs request is in progress because that could result in inode
13957212Svsakar 	 * modifications during a quiesce and could hence result in inode
13967212Svsakar 	 * reconciliation failures. ULOCKFS_SLOCK alone would not be sufficient,
13977212Svsakar 	 * so make use of ufs_quiesce_pend to disallow vnode operations when a
13987212Svsakar 	 * quiesce is in progress.
13997212Svsakar 	 */
14007212Svsakar 	if (!ULOCKFS_IS_JUSTULOCK(ulp) || ufs_quiesce_pend) {
14017212Svsakar 		if (op_cnt_incremented)
14027212Svsakar 			if (!atomic_add_long_nv(ctr, -1))
14037212Svsakar 				cv_broadcast(&ulp->ul_cv);
14047212Svsakar 		mutex_enter(&ulp->ul_lock);
14057212Svsakar 		error = ufs_check_lockfs(ufsvfsp, ulp, mask);
14067212Svsakar 		mutex_exit(&ulp->ul_lock);
14077212Svsakar 		if (error) {
14080Sstevel@tonic-gate 			if (ulockfs_info_free == NULL)
14090Sstevel@tonic-gate 				kmem_free(ulockfs_info_temp,
14100Sstevel@tonic-gate 				    sizeof (ulockfs_info_t));
14110Sstevel@tonic-gate 			return (error);
14120Sstevel@tonic-gate 		}
14137212Svsakar 	} else {
14147212Svsakar 		/*
14157212Svsakar 		 * This is the common case of file system in a unlocked state.
14167212Svsakar 		 *
14177212Svsakar 		 * If a file system is unlocked, we would expect the ctr to have
14187212Svsakar 		 * been incremented by now. But this will not be true when a
14197212Svsakar 		 * quiesce is winding up - SLOCK was set when we checked before
14207212Svsakar 		 * incrementing the ctr, but by the time we checked for
14217212Svsakar 		 * ULOCKFS_IS_JUSTULOCK, the quiesce thread was gone. It is okay
14227212Svsakar 		 * to take ul_lock and go through the slow path in this uncommon
14237212Svsakar 		 * case.
14247212Svsakar 		 */
14257212Svsakar 		if (op_cnt_incremented == 0) {
14267212Svsakar 			mutex_enter(&ulp->ul_lock);
14277212Svsakar 			error = ufs_check_lockfs(ufsvfsp, ulp, mask);
14287212Svsakar 			if (error) {
14297212Svsakar 				mutex_exit(&ulp->ul_lock);
14307212Svsakar 				if (ulockfs_info_free == NULL)
14317212Svsakar 					kmem_free(ulockfs_info_temp,
14327212Svsakar 					    sizeof (ulockfs_info_t));
14337212Svsakar 				return (error);
14347212Svsakar 			}
14357212Svsakar 			if (mask & ULOCKFS_FWLOCK)
14367212Svsakar 				ULOCKFS_SET_FALLOC(ulp);
14377212Svsakar 			mutex_exit(&ulp->ul_lock);
14387212Svsakar 		} else if (mask & ULOCKFS_FWLOCK) {
14397212Svsakar 			mutex_enter(&ulp->ul_lock);
14407212Svsakar 			ULOCKFS_SET_FALLOC(ulp);
14417212Svsakar 			mutex_exit(&ulp->ul_lock);
14427212Svsakar 		}
14430Sstevel@tonic-gate 	}
14440Sstevel@tonic-gate 
14450Sstevel@tonic-gate 	if (ulockfs_info_free != NULL) {
14460Sstevel@tonic-gate 		ulockfs_info_free->ulp = ulp;
1447923Ssdebnath 		if (mask & ULOCKFS_FWLOCK)
1448923Ssdebnath 			ulockfs_info_free->flags |= ULOCK_INFO_FALLOCATE;
14490Sstevel@tonic-gate 	} else {
14500Sstevel@tonic-gate 		ulockfs_info_temp->ulp = ulp;
14510Sstevel@tonic-gate 		ulockfs_info_temp->next = ulockfs_info;
1452923Ssdebnath 		if (mask & ULOCKFS_FWLOCK)
1453923Ssdebnath 			ulockfs_info_temp->flags |= ULOCK_INFO_FALLOCATE;
14540Sstevel@tonic-gate 		ASSERT(ufs_lockfs_key != 0);
14550Sstevel@tonic-gate 		(void) tsd_set(ufs_lockfs_key, (void *)ulockfs_info_temp);
14560Sstevel@tonic-gate 	}
14570Sstevel@tonic-gate 
14580Sstevel@tonic-gate 	curthread->t_flag |= T_DONTBLOCK;
14590Sstevel@tonic-gate 	return (0);
14600Sstevel@tonic-gate }
14610Sstevel@tonic-gate 
14620Sstevel@tonic-gate /*
14630Sstevel@tonic-gate  * Check whether we are returning from the top level VOP.
14640Sstevel@tonic-gate  */
14650Sstevel@tonic-gate static int
ufs_lockfs_top_vop_return(ulockfs_info_t * head)14660Sstevel@tonic-gate ufs_lockfs_top_vop_return(ulockfs_info_t *head)
14670Sstevel@tonic-gate {
14680Sstevel@tonic-gate 	ulockfs_info_t *info;
14690Sstevel@tonic-gate 	int result = 1;
14700Sstevel@tonic-gate 
14710Sstevel@tonic-gate 	for (info = head; info != NULL; info = info->next) {
14720Sstevel@tonic-gate 		if (info->ulp != NULL) {
14730Sstevel@tonic-gate 			result = 0;
14740Sstevel@tonic-gate 			break;
14750Sstevel@tonic-gate 		}
14760Sstevel@tonic-gate 	}
14770Sstevel@tonic-gate 
14780Sstevel@tonic-gate 	return (result);
14790Sstevel@tonic-gate }
14800Sstevel@tonic-gate 
14810Sstevel@tonic-gate /*
14820Sstevel@tonic-gate  * ufs_lockfs_end - terminate the lockfs locking protocol
14830Sstevel@tonic-gate  */
14840Sstevel@tonic-gate void
ufs_lockfs_end(struct ulockfs * ulp)14850Sstevel@tonic-gate ufs_lockfs_end(struct ulockfs *ulp)
14860Sstevel@tonic-gate {
14870Sstevel@tonic-gate 	ulockfs_info_t *info;
14880Sstevel@tonic-gate 	ulockfs_info_t *head;
14890Sstevel@tonic-gate 
14900Sstevel@tonic-gate 	/*
14910Sstevel@tonic-gate 	 * end-of-VOP protocol
14920Sstevel@tonic-gate 	 */
14930Sstevel@tonic-gate 	if (ulp == NULL)
14940Sstevel@tonic-gate 		return;
14950Sstevel@tonic-gate 
14960Sstevel@tonic-gate 	head = (ulockfs_info_t *)tsd_get(ufs_lockfs_key);
14970Sstevel@tonic-gate 	SEARCH_ULOCKFSP(head, ulp, info);
14980Sstevel@tonic-gate 
14990Sstevel@tonic-gate 	/*
15000Sstevel@tonic-gate 	 * If we're called from a first level VOP, we have to have a
15010Sstevel@tonic-gate 	 * valid ulockfs record in the TSD.
15020Sstevel@tonic-gate 	 */
15030Sstevel@tonic-gate 	ASSERT(info != NULL);
15040Sstevel@tonic-gate 
15050Sstevel@tonic-gate 	/*
15060Sstevel@tonic-gate 	 * Invalidate the ulockfs record.
15070Sstevel@tonic-gate 	 */
15080Sstevel@tonic-gate 	info->ulp = NULL;
15090Sstevel@tonic-gate 
15100Sstevel@tonic-gate 	if (ufs_lockfs_top_vop_return(head))
15110Sstevel@tonic-gate 		curthread->t_flag &= ~T_DONTBLOCK;
15120Sstevel@tonic-gate 
1513923Ssdebnath 	/* fallocate thread */
1514923Ssdebnath 	if (ULOCKFS_IS_FALLOC(ulp) && info->flags & ULOCK_INFO_FALLOCATE) {
15157212Svsakar 		/* Clear the thread's fallocate state */
15167212Svsakar 		info->flags &= ~ULOCK_INFO_FALLOCATE;
15177212Svsakar 		if (!atomic_add_long_nv(&ulp->ul_falloc_cnt, -1)) {
15187212Svsakar 			mutex_enter(&ulp->ul_lock);
1519923Ssdebnath 			ULOCKFS_CLR_FALLOC(ulp);
15207212Svsakar 			cv_broadcast(&ulp->ul_cv);
15217212Svsakar 			mutex_exit(&ulp->ul_lock);
15227212Svsakar 		}
1523923Ssdebnath 	} else  { /* normal thread */
1524923Ssdebnath 		if (!atomic_add_long_nv(&ulp->ul_vnops_cnt, -1))
1525923Ssdebnath 			cv_broadcast(&ulp->ul_cv);
1526923Ssdebnath 	}
15270Sstevel@tonic-gate }
15280Sstevel@tonic-gate 
15290Sstevel@tonic-gate /*
15305859Svsakar  * ufs_lockfs_trybegin - try to start the lockfs locking protocol without
15315859Svsakar  * blocking.
15325859Svsakar  */
15335859Svsakar int
ufs_lockfs_trybegin(struct ufsvfs * ufsvfsp,struct ulockfs ** ulpp,ulong_t mask)15345859Svsakar ufs_lockfs_trybegin(struct ufsvfs *ufsvfsp, struct ulockfs **ulpp, ulong_t mask)
15355859Svsakar {
15365859Svsakar 	int 		error = 0;
15375859Svsakar 	int		rec_vop;
15387212Svsakar 	ushort_t	op_cnt_incremented = 0;
15397212Svsakar 	ulong_t		*ctr;
15405859Svsakar 	struct ulockfs *ulp;
15415859Svsakar 	ulockfs_info_t	*ulockfs_info;
15425859Svsakar 	ulockfs_info_t	*ulockfs_info_free;
15435859Svsakar 	ulockfs_info_t	*ulockfs_info_temp;
15445859Svsakar 
15455859Svsakar 	/*
15465859Svsakar 	 * file system has been forcibly unmounted
15475859Svsakar 	 */
15485859Svsakar 	if (ufsvfsp == NULL)
15495859Svsakar 		return (EIO);
15505859Svsakar 
15515859Svsakar 	*ulpp = ulp = &ufsvfsp->vfs_ulockfs;
15525859Svsakar 
15535859Svsakar 	/*
15545859Svsakar 	 * Do lockfs protocol
15555859Svsakar 	 */
15565859Svsakar 	ulockfs_info = (ulockfs_info_t *)tsd_get(ufs_lockfs_key);
15575859Svsakar 	IS_REC_VOP(rec_vop, ulockfs_info, ulp, ulockfs_info_free);
15585859Svsakar 
15595859Svsakar 	/*
15605859Svsakar 	 * Detect recursive VOP call or handcrafted internal lockfs protocol
15615859Svsakar 	 * path and bail out in that case.
15625859Svsakar 	 */
15635859Svsakar 	if (rec_vop || ufs_lockfs_is_under_rawlockfs(ulp)) {
15645859Svsakar 		*ulpp = NULL;
15655859Svsakar 		return (0);
15665859Svsakar 	} else {
15675859Svsakar 		if (ulockfs_info_free == NULL) {
15685859Svsakar 			if ((ulockfs_info_temp = (ulockfs_info_t *)
15695859Svsakar 			    kmem_zalloc(sizeof (ulockfs_info_t),
15705859Svsakar 			    KM_NOSLEEP)) == NULL) {
15715859Svsakar 				*ulpp = NULL;
15725859Svsakar 				return (ENOMEM);
15735859Svsakar 			}
15745859Svsakar 		}
15755859Svsakar 	}
15765859Svsakar 
15775859Svsakar 	/*
15785859Svsakar 	 * First time VOP call
15797212Svsakar 	 *
15807212Svsakar 	 * Increment the ctr irrespective of the lockfs state. If the lockfs
15817212Svsakar 	 * state is not ULOCKFS_ULOCK, we can decrement it later. However,
15827212Svsakar 	 * before incrementing we need to check if there is a pending quiesce
15837212Svsakar 	 * request because if we have a continuous stream of ufs_lockfs_begin
15847212Svsakar 	 * requests pounding on a few cpu's then the ufs_quiesce thread might
15857212Svsakar 	 * never see the value of zero for ctr - a livelock kind of scenario.
15865859Svsakar 	 */
15877212Svsakar 	ctr = (mask & ULOCKFS_FWLOCK) ?
15887212Svsakar 	    &ulp->ul_falloc_cnt : &ulp->ul_vnops_cnt;
15897212Svsakar 	if (!ULOCKFS_IS_SLOCK(ulp)) {
15907212Svsakar 		atomic_add_long(ctr, 1);
15917212Svsakar 		op_cnt_incremented++;
15927212Svsakar 	}
15937212Svsakar 
15947212Svsakar 	if (!ULOCKFS_IS_JUSTULOCK(ulp) || ufs_quiesce_pend) {
15955859Svsakar 		/*
15965859Svsakar 		 * Non-blocking version of ufs_check_lockfs() code.
15975859Svsakar 		 *
15985859Svsakar 		 * If the file system is not hard locked or error locked
15995859Svsakar 		 * and if ulp->ul_fs_lock allows this operation, increment
16005859Svsakar 		 * the appropriate counter and proceed (For eg., In case the
16015859Svsakar 		 * file system is delete locked, a mmap can still go through).
16025859Svsakar 		 */
16037212Svsakar 		if (op_cnt_incremented)
16047212Svsakar 			if (!atomic_add_long_nv(ctr, -1))
16057212Svsakar 				cv_broadcast(&ulp->ul_cv);
16067212Svsakar 		mutex_enter(&ulp->ul_lock);
16075859Svsakar 		if (ULOCKFS_IS_HLOCK(ulp) ||
16085859Svsakar 		    (ULOCKFS_IS_ELOCK(ulp) && ufsvfsp->vfs_dontblock))
16095859Svsakar 			error = EIO;
16105859Svsakar 		else if (ulp->ul_fs_lock & mask)
16115859Svsakar 			error = EAGAIN;
16125859Svsakar 
16135859Svsakar 		if (error) {
16145859Svsakar 			mutex_exit(&ulp->ul_lock);
16155859Svsakar 			if (ulockfs_info_free == NULL)
16165859Svsakar 				kmem_free(ulockfs_info_temp,
16175859Svsakar 				    sizeof (ulockfs_info_t));
16185859Svsakar 			return (error);
16197212Svsakar 		}
16207212Svsakar 		atomic_add_long(ctr, 1);
16217212Svsakar 		if (mask & ULOCKFS_FWLOCK)
16227212Svsakar 			ULOCKFS_SET_FALLOC(ulp);
16237212Svsakar 		mutex_exit(&ulp->ul_lock);
16247212Svsakar 	} else {
16257212Svsakar 		/*
16267212Svsakar 		 * This is the common case of file system in a unlocked state.
16277212Svsakar 		 *
16287212Svsakar 		 * If a file system is unlocked, we would expect the ctr to have
16297212Svsakar 		 * been incremented by now. But this will not be true when a
16307212Svsakar 		 * quiesce is winding up - SLOCK was set when we checked before
16317212Svsakar 		 * incrementing the ctr, but by the time we checked for
16327212Svsakar 		 * ULOCKFS_IS_JUSTULOCK, the quiesce thread was gone. Take
16337212Svsakar 		 * ul_lock and go through the non-blocking version of
16347212Svsakar 		 * ufs_check_lockfs() code.
16357212Svsakar 		 */
16367212Svsakar 		if (op_cnt_incremented == 0) {
16377212Svsakar 			mutex_enter(&ulp->ul_lock);
16387212Svsakar 			if (ULOCKFS_IS_HLOCK(ulp) ||
16397212Svsakar 			    (ULOCKFS_IS_ELOCK(ulp) && ufsvfsp->vfs_dontblock))
16407212Svsakar 				error = EIO;
16417212Svsakar 			else if (ulp->ul_fs_lock & mask)
16427212Svsakar 				error = EAGAIN;
16437212Svsakar 
16447212Svsakar 			if (error) {
16457212Svsakar 				mutex_exit(&ulp->ul_lock);
16467212Svsakar 				if (ulockfs_info_free == NULL)
16477212Svsakar 					kmem_free(ulockfs_info_temp,
16487212Svsakar 					    sizeof (ulockfs_info_t));
16497212Svsakar 				return (error);
16507212Svsakar 			}
16517212Svsakar 			atomic_add_long(ctr, 1);
16527212Svsakar 			if (mask & ULOCKFS_FWLOCK)
16535859Svsakar 				ULOCKFS_SET_FALLOC(ulp);
16547212Svsakar 			mutex_exit(&ulp->ul_lock);
16557212Svsakar 		} else if (mask & ULOCKFS_FWLOCK) {
16567212Svsakar 			mutex_enter(&ulp->ul_lock);
16577212Svsakar 			ULOCKFS_SET_FALLOC(ulp);
16587212Svsakar 			mutex_exit(&ulp->ul_lock);
16595859Svsakar 		}
16605859Svsakar 	}
16615859Svsakar 
16625859Svsakar 	if (ulockfs_info_free != NULL) {
16635859Svsakar 		ulockfs_info_free->ulp = ulp;
16645859Svsakar 		if (mask & ULOCKFS_FWLOCK)
16655859Svsakar 			ulockfs_info_free->flags |= ULOCK_INFO_FALLOCATE;
16665859Svsakar 	} else {
16675859Svsakar 		ulockfs_info_temp->ulp = ulp;
16685859Svsakar 		ulockfs_info_temp->next = ulockfs_info;
16695859Svsakar 		if (mask & ULOCKFS_FWLOCK)
16705859Svsakar 			ulockfs_info_temp->flags |= ULOCK_INFO_FALLOCATE;
16715859Svsakar 		ASSERT(ufs_lockfs_key != 0);
16725859Svsakar 		(void) tsd_set(ufs_lockfs_key, (void *)ulockfs_info_temp);
16735859Svsakar 	}
16745859Svsakar 
16755859Svsakar 	curthread->t_flag |= T_DONTBLOCK;
16765859Svsakar 	return (0);
16775859Svsakar }
16785859Svsakar 
16795859Svsakar /*
16800Sstevel@tonic-gate  * specialized version of ufs_lockfs_begin() called by ufs_getpage().
16810Sstevel@tonic-gate  */
16820Sstevel@tonic-gate int
ufs_lockfs_begin_getpage(struct ufsvfs * ufsvfsp,struct ulockfs ** ulpp,struct seg * seg,int read_access,uint_t * protp)16830Sstevel@tonic-gate ufs_lockfs_begin_getpage(
16840Sstevel@tonic-gate 	struct ufsvfs	*ufsvfsp,
16850Sstevel@tonic-gate 	struct ulockfs	**ulpp,
16860Sstevel@tonic-gate 	struct seg	*seg,
16870Sstevel@tonic-gate 	int		read_access,
16880Sstevel@tonic-gate 	uint_t		*protp)
16890Sstevel@tonic-gate {
16900Sstevel@tonic-gate 	ulong_t			mask;
16910Sstevel@tonic-gate 	int 			error;
16920Sstevel@tonic-gate 	int			rec_vop;
16930Sstevel@tonic-gate 	struct ulockfs		*ulp;
16940Sstevel@tonic-gate 	ulockfs_info_t		*ulockfs_info;
16950Sstevel@tonic-gate 	ulockfs_info_t		*ulockfs_info_free;
16960Sstevel@tonic-gate 	ulockfs_info_t		*ulockfs_info_temp;
16970Sstevel@tonic-gate 
16980Sstevel@tonic-gate 	/*
16990Sstevel@tonic-gate 	 * file system has been forcibly unmounted
17000Sstevel@tonic-gate 	 */
17010Sstevel@tonic-gate 	if (ufsvfsp == NULL)
17020Sstevel@tonic-gate 		return (EIO);
17030Sstevel@tonic-gate 
17040Sstevel@tonic-gate 	*ulpp = ulp = &ufsvfsp->vfs_ulockfs;
17050Sstevel@tonic-gate 
17060Sstevel@tonic-gate 	/*
17070Sstevel@tonic-gate 	 * Do lockfs protocol
17080Sstevel@tonic-gate 	 */
17090Sstevel@tonic-gate 	ulockfs_info = (ulockfs_info_t *)tsd_get(ufs_lockfs_key);
17100Sstevel@tonic-gate 	IS_REC_VOP(rec_vop, ulockfs_info, ulp, ulockfs_info_free);
17110Sstevel@tonic-gate 
17120Sstevel@tonic-gate 	/*
17130Sstevel@tonic-gate 	 * Detect recursive VOP call or handcrafted internal lockfs protocol
17140Sstevel@tonic-gate 	 * path and bail out in that case.
17150Sstevel@tonic-gate 	 */
17160Sstevel@tonic-gate 	if (rec_vop || ufs_lockfs_is_under_rawlockfs(ulp)) {
17170Sstevel@tonic-gate 		*ulpp = NULL;
17180Sstevel@tonic-gate 		return (0);
17190Sstevel@tonic-gate 	} else {
17200Sstevel@tonic-gate 		if (ulockfs_info_free == NULL) {
17210Sstevel@tonic-gate 			if ((ulockfs_info_temp = (ulockfs_info_t *)
17220Sstevel@tonic-gate 			    kmem_zalloc(sizeof (ulockfs_info_t),
17230Sstevel@tonic-gate 			    KM_NOSLEEP)) == NULL) {
17240Sstevel@tonic-gate 				*ulpp = NULL;
17250Sstevel@tonic-gate 				return (ENOMEM);
17260Sstevel@tonic-gate 			}
17270Sstevel@tonic-gate 		}
17280Sstevel@tonic-gate 	}
17290Sstevel@tonic-gate 
17300Sstevel@tonic-gate 	/*
17310Sstevel@tonic-gate 	 * First time VOP call
17320Sstevel@tonic-gate 	 */
17337212Svsakar 	atomic_add_long(&ulp->ul_vnops_cnt, 1);
17347212Svsakar 	if (!ULOCKFS_IS_JUSTULOCK(ulp) || ufs_quiesce_pend) {
17357212Svsakar 		if (!atomic_add_long_nv(&ulp->ul_vnops_cnt, -1))
17367212Svsakar 			cv_broadcast(&ulp->ul_cv);
17377212Svsakar 		mutex_enter(&ulp->ul_lock);
17380Sstevel@tonic-gate 		if (seg->s_ops == &segvn_ops &&
17390Sstevel@tonic-gate 		    ((struct segvn_data *)seg->s_data)->type != MAP_SHARED) {
17400Sstevel@tonic-gate 			mask = (ulong_t)ULOCKFS_GETREAD_MASK;
17410Sstevel@tonic-gate 		} else if (protp && read_access) {
17420Sstevel@tonic-gate 			/*
17430Sstevel@tonic-gate 			 * Restrict the mapping to readonly.
17440Sstevel@tonic-gate 			 * Writes to this mapping will cause
17450Sstevel@tonic-gate 			 * another fault which will then
17460Sstevel@tonic-gate 			 * be suspended if fs is write locked
17470Sstevel@tonic-gate 			 */
17480Sstevel@tonic-gate 			*protp &= ~PROT_WRITE;
17490Sstevel@tonic-gate 			mask = (ulong_t)ULOCKFS_GETREAD_MASK;
17500Sstevel@tonic-gate 		} else
17510Sstevel@tonic-gate 			mask = (ulong_t)ULOCKFS_GETWRITE_MASK;
17520Sstevel@tonic-gate 
17530Sstevel@tonic-gate 		/*
17540Sstevel@tonic-gate 		 * will sleep if this fs is locked against this VOP
17550Sstevel@tonic-gate 		 */
17567212Svsakar 		error = ufs_check_lockfs(ufsvfsp, ulp, mask);
17577212Svsakar 		mutex_exit(&ulp->ul_lock);
17587212Svsakar 		if (error) {
17590Sstevel@tonic-gate 			if (ulockfs_info_free == NULL)
17600Sstevel@tonic-gate 				kmem_free(ulockfs_info_temp,
17610Sstevel@tonic-gate 				    sizeof (ulockfs_info_t));
17620Sstevel@tonic-gate 			return (error);
17630Sstevel@tonic-gate 		}
17640Sstevel@tonic-gate 	}
17650Sstevel@tonic-gate 
17660Sstevel@tonic-gate 	if (ulockfs_info_free != NULL) {
17670Sstevel@tonic-gate 		ulockfs_info_free->ulp = ulp;
17680Sstevel@tonic-gate 	} else {
17690Sstevel@tonic-gate 		ulockfs_info_temp->ulp = ulp;
17700Sstevel@tonic-gate 		ulockfs_info_temp->next = ulockfs_info;
17710Sstevel@tonic-gate 		ASSERT(ufs_lockfs_key != 0);
17720Sstevel@tonic-gate 		(void) tsd_set(ufs_lockfs_key, (void *)ulockfs_info_temp);
17730Sstevel@tonic-gate 	}
17740Sstevel@tonic-gate 
17750Sstevel@tonic-gate 	curthread->t_flag |= T_DONTBLOCK;
17760Sstevel@tonic-gate 	return (0);
17770Sstevel@tonic-gate }
17780Sstevel@tonic-gate 
17790Sstevel@tonic-gate void
ufs_lockfs_tsd_destructor(void * head)17800Sstevel@tonic-gate ufs_lockfs_tsd_destructor(void *head)
17810Sstevel@tonic-gate {
17820Sstevel@tonic-gate 	ulockfs_info_t *curr = (ulockfs_info_t *)head;
17830Sstevel@tonic-gate 	ulockfs_info_t *temp;
17840Sstevel@tonic-gate 
17850Sstevel@tonic-gate 	for (; curr != NULL; ) {
17860Sstevel@tonic-gate 		/*
17870Sstevel@tonic-gate 		 * The TSD destructor is being called when the thread exits
17880Sstevel@tonic-gate 		 * (via thread_exit()). At that time it must have cleaned up
17890Sstevel@tonic-gate 		 * all VOPs via ufs_lockfs_end() and there must not be a
17900Sstevel@tonic-gate 		 * valid ulockfs record exist while a thread is exiting.
17910Sstevel@tonic-gate 		 */
17920Sstevel@tonic-gate 		temp = curr;
17930Sstevel@tonic-gate 		curr = curr->next;
17940Sstevel@tonic-gate 		ASSERT(temp->ulp == NULL);
17950Sstevel@tonic-gate 		kmem_free(temp, sizeof (ulockfs_info_t));
17960Sstevel@tonic-gate 	}
17970Sstevel@tonic-gate }
1798