xref: /onnv-gate/usr/src/uts/common/fs/ufs/ufs_subr.c (revision 5331:3047ad28a67b)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
54662Sfrankho  * Common Development and Distribution License (the "License").
64662Sfrankho  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
224662Sfrankho  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
270Sstevel@tonic-gate /*	  All Rights Reserved  	*/
280Sstevel@tonic-gate 
290Sstevel@tonic-gate /*
300Sstevel@tonic-gate  * University Copyright- Copyright (c) 1982, 1986, 1988
310Sstevel@tonic-gate  * The Regents of the University of California
320Sstevel@tonic-gate  * All Rights Reserved
330Sstevel@tonic-gate  *
340Sstevel@tonic-gate  * University Acknowledgment- Portions of this document are derived from
350Sstevel@tonic-gate  * software developed by the University of California, Berkeley, and its
360Sstevel@tonic-gate  * contributors.
370Sstevel@tonic-gate  */
380Sstevel@tonic-gate 
390Sstevel@tonic-gate 
400Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
410Sstevel@tonic-gate 
420Sstevel@tonic-gate #include <sys/types.h>
430Sstevel@tonic-gate #include <sys/t_lock.h>
440Sstevel@tonic-gate #include <sys/param.h>
450Sstevel@tonic-gate #include <sys/time.h>
460Sstevel@tonic-gate #include <sys/fs/ufs_fs.h>
470Sstevel@tonic-gate #include <sys/cmn_err.h>
480Sstevel@tonic-gate 
490Sstevel@tonic-gate #ifdef _KERNEL
500Sstevel@tonic-gate 
510Sstevel@tonic-gate #include <sys/systm.h>
520Sstevel@tonic-gate #include <sys/sysmacros.h>
530Sstevel@tonic-gate #include <sys/buf.h>
540Sstevel@tonic-gate #include <sys/conf.h>
550Sstevel@tonic-gate #include <sys/user.h>
560Sstevel@tonic-gate #include <sys/var.h>
570Sstevel@tonic-gate #include <sys/vfs.h>
580Sstevel@tonic-gate #include <sys/vnode.h>
590Sstevel@tonic-gate #include <sys/proc.h>
600Sstevel@tonic-gate #include <sys/debug.h>
610Sstevel@tonic-gate #include <sys/fssnap_if.h>
620Sstevel@tonic-gate #include <sys/fs/ufs_inode.h>
630Sstevel@tonic-gate #include <sys/fs/ufs_trans.h>
640Sstevel@tonic-gate #include <sys/fs/ufs_panic.h>
650Sstevel@tonic-gate #include <sys/fs/ufs_bio.h>
660Sstevel@tonic-gate #include <sys/fs/ufs_log.h>
670Sstevel@tonic-gate #include <sys/kmem.h>
680Sstevel@tonic-gate #include <sys/policy.h>
690Sstevel@tonic-gate #include <vm/hat.h>
700Sstevel@tonic-gate #include <vm/as.h>
710Sstevel@tonic-gate #include <vm/seg.h>
720Sstevel@tonic-gate #include <vm/pvn.h>
730Sstevel@tonic-gate #include <vm/seg_map.h>
740Sstevel@tonic-gate #include <sys/swap.h>
750Sstevel@tonic-gate #include <vm/seg_kmem.h>
760Sstevel@tonic-gate 
770Sstevel@tonic-gate #else  /* _KERNEL */
780Sstevel@tonic-gate 
790Sstevel@tonic-gate #define	ASSERT(x)		/* don't use asserts for fsck et al */
800Sstevel@tonic-gate 
810Sstevel@tonic-gate #endif  /* _KERNEL */
820Sstevel@tonic-gate 
830Sstevel@tonic-gate #ifdef _KERNEL
840Sstevel@tonic-gate 
850Sstevel@tonic-gate /*
860Sstevel@tonic-gate  * Used to verify that a given entry on the ufs_instances list (see below)
870Sstevel@tonic-gate  * still refers to a mounted file system.
880Sstevel@tonic-gate  *
890Sstevel@tonic-gate  * XXX:	This is a crock that substitutes for proper locking to coordinate
900Sstevel@tonic-gate  *	updates to and uses of the entries in ufs_instances.
910Sstevel@tonic-gate  */
920Sstevel@tonic-gate struct check_node {
930Sstevel@tonic-gate 	struct vfs *vfsp;
940Sstevel@tonic-gate 	struct ufsvfs *ufsvfs;
950Sstevel@tonic-gate 	dev_t vfs_dev;
960Sstevel@tonic-gate };
970Sstevel@tonic-gate 
980Sstevel@tonic-gate static vfs_t *still_mounted(struct check_node *);
990Sstevel@tonic-gate 
1000Sstevel@tonic-gate /*
1010Sstevel@tonic-gate  * All ufs file system instances are linked together into a list starting at
1020Sstevel@tonic-gate  * ufs_instances.  The list is updated as part of mount and unmount.  It's
1030Sstevel@tonic-gate  * consulted in ufs_update, to allow syncing out all ufs file system instances
1040Sstevel@tonic-gate  * in a batch.
1050Sstevel@tonic-gate  *
1060Sstevel@tonic-gate  * ufsvfs_mutex guards access to this list and to the {,old}ufsvfslist
1070Sstevel@tonic-gate  * manipulated in ufs_funmount_cleanup.  (A given ufs instance is always on
1080Sstevel@tonic-gate  * exactly one of these lists except while it's being allocated or
1090Sstevel@tonic-gate  * deallocated.)
1100Sstevel@tonic-gate  */
1110Sstevel@tonic-gate struct ufsvfs	*ufs_instances;
1120Sstevel@tonic-gate extern kmutex_t		ufsvfs_mutex;	/* XXX: move this to ufs_inode.h? */
1130Sstevel@tonic-gate 
1140Sstevel@tonic-gate /*
1150Sstevel@tonic-gate  * ufsvfs list manipulation routines
1160Sstevel@tonic-gate  */
1170Sstevel@tonic-gate 
1180Sstevel@tonic-gate /*
1190Sstevel@tonic-gate  * Link ufsp in at the head of the list of ufs_instances.
1200Sstevel@tonic-gate  */
1210Sstevel@tonic-gate void
1220Sstevel@tonic-gate ufs_vfs_add(struct ufsvfs *ufsp)
1230Sstevel@tonic-gate {
1240Sstevel@tonic-gate 	mutex_enter(&ufsvfs_mutex);
1250Sstevel@tonic-gate 	ufsp->vfs_next = ufs_instances;
1260Sstevel@tonic-gate 	ufs_instances = ufsp;
1270Sstevel@tonic-gate 	mutex_exit(&ufsvfs_mutex);
1280Sstevel@tonic-gate }
1290Sstevel@tonic-gate 
1300Sstevel@tonic-gate /*
1310Sstevel@tonic-gate  * Remove ufsp from the list of ufs_instances.
1320Sstevel@tonic-gate  *
1330Sstevel@tonic-gate  * Does no error checking; ufsp is assumed to actually be on the list.
1340Sstevel@tonic-gate  */
1350Sstevel@tonic-gate void
1360Sstevel@tonic-gate ufs_vfs_remove(struct ufsvfs *ufsp)
1370Sstevel@tonic-gate {
1380Sstevel@tonic-gate 	struct ufsvfs	**delpt = &ufs_instances;
1390Sstevel@tonic-gate 
1400Sstevel@tonic-gate 	mutex_enter(&ufsvfs_mutex);
1410Sstevel@tonic-gate 	for (; *delpt != NULL; delpt = &((*delpt)->vfs_next)) {
1420Sstevel@tonic-gate 		if (*delpt == ufsp) {
1430Sstevel@tonic-gate 			*delpt = ufsp->vfs_next;
1440Sstevel@tonic-gate 			ufsp->vfs_next = NULL;
1450Sstevel@tonic-gate 			break;
1460Sstevel@tonic-gate 		}
1470Sstevel@tonic-gate 	}
1480Sstevel@tonic-gate 	mutex_exit(&ufsvfs_mutex);
1490Sstevel@tonic-gate }
1500Sstevel@tonic-gate 
1510Sstevel@tonic-gate /*
1520Sstevel@tonic-gate  * Clean up state resulting from a forcible unmount that couldn't be handled
1530Sstevel@tonic-gate  * directly during the unmount.  (See commentary in the unmount code for more
1540Sstevel@tonic-gate  * info.)
1550Sstevel@tonic-gate  */
1560Sstevel@tonic-gate static void
1570Sstevel@tonic-gate ufs_funmount_cleanup()
1580Sstevel@tonic-gate {
1590Sstevel@tonic-gate 	struct ufsvfs		*ufsvfsp;
1600Sstevel@tonic-gate 	extern struct ufsvfs	*oldufsvfslist, *ufsvfslist;
1610Sstevel@tonic-gate 
1620Sstevel@tonic-gate 	/*
1630Sstevel@tonic-gate 	 * Assumption: it's now safe to blow away the entries on
1640Sstevel@tonic-gate 	 * oldufsvfslist.
1650Sstevel@tonic-gate 	 */
1660Sstevel@tonic-gate 	mutex_enter(&ufsvfs_mutex);
1670Sstevel@tonic-gate 	while ((ufsvfsp = oldufsvfslist) != NULL) {
1680Sstevel@tonic-gate 		oldufsvfslist = ufsvfsp->vfs_next;
1690Sstevel@tonic-gate 
1700Sstevel@tonic-gate 		mutex_destroy(&ufsvfsp->vfs_lock);
1710Sstevel@tonic-gate 		kmem_free(ufsvfsp, sizeof (struct ufsvfs));
1720Sstevel@tonic-gate 	}
1730Sstevel@tonic-gate 	/*
1740Sstevel@tonic-gate 	 * Rotate more recent unmount entries into place in preparation for
1750Sstevel@tonic-gate 	 * the next time around.
1760Sstevel@tonic-gate 	 */
1770Sstevel@tonic-gate 	oldufsvfslist = ufsvfslist;
1780Sstevel@tonic-gate 	ufsvfslist = NULL;
1790Sstevel@tonic-gate 	mutex_exit(&ufsvfs_mutex);
1800Sstevel@tonic-gate }
1810Sstevel@tonic-gate 
1820Sstevel@tonic-gate 
1830Sstevel@tonic-gate /*
1840Sstevel@tonic-gate  * ufs_update performs the ufs part of `sync'.  It goes through the disk
1850Sstevel@tonic-gate  * queues to initiate sandbagged IO; goes through the inodes to write
1860Sstevel@tonic-gate  * modified nodes; and it goes through the mount table to initiate
1870Sstevel@tonic-gate  * the writing of the modified super blocks.
1880Sstevel@tonic-gate  */
1890Sstevel@tonic-gate extern time_t	time;
1900Sstevel@tonic-gate time_t		ufs_sync_time;
1910Sstevel@tonic-gate time_t		ufs_sync_time_secs = 1;
1920Sstevel@tonic-gate 
1930Sstevel@tonic-gate extern kmutex_t	ufs_scan_lock;
1940Sstevel@tonic-gate 
1950Sstevel@tonic-gate void
1960Sstevel@tonic-gate ufs_update(int flag)
1970Sstevel@tonic-gate {
1980Sstevel@tonic-gate 	struct vfs *vfsp;
1990Sstevel@tonic-gate 	struct fs *fs;
2000Sstevel@tonic-gate 	struct ufsvfs *ufsp;
2010Sstevel@tonic-gate 	struct ufsvfs *ufsnext;
2020Sstevel@tonic-gate 	struct ufsvfs *update_list = NULL;
2030Sstevel@tonic-gate 	int check_cnt = 0;
2040Sstevel@tonic-gate 	size_t check_size;
2050Sstevel@tonic-gate 	struct check_node *check_list, *ptr;
2060Sstevel@tonic-gate 	int cheap = flag & SYNC_ATTR;
2070Sstevel@tonic-gate 
2080Sstevel@tonic-gate 	/*
2090Sstevel@tonic-gate 	 * This is a hack.  A design flaw in the forced unmount protocol
2100Sstevel@tonic-gate 	 * could allow a thread to attempt to use a kmem_freed ufsvfs
2110Sstevel@tonic-gate 	 * structure in ufs_lockfs_begin/ufs_check_lockfs.  This window
2120Sstevel@tonic-gate 	 * is difficult to hit, even during the lockfs stress tests.
2130Sstevel@tonic-gate 	 * So the hacky fix is to wait awhile before kmem_free'ing the
2140Sstevel@tonic-gate 	 * ufsvfs structures for forcibly unmounted file systems.  `Awhile'
2150Sstevel@tonic-gate 	 * is defined as every other call from fsflush (~60 seconds).
2160Sstevel@tonic-gate 	 */
2170Sstevel@tonic-gate 	if (cheap)
2180Sstevel@tonic-gate 		ufs_funmount_cleanup();
2190Sstevel@tonic-gate 
2200Sstevel@tonic-gate 	/*
2210Sstevel@tonic-gate 	 * Examine all ufsvfs structures and add those that we can lock to the
2220Sstevel@tonic-gate 	 * update list.  This is so that we don't hold the list lock for a
2230Sstevel@tonic-gate 	 * long time.  If vfs_lock fails for a file system instance, then skip
2240Sstevel@tonic-gate 	 * it because somebody is doing a unmount on it.
2250Sstevel@tonic-gate 	 */
2260Sstevel@tonic-gate 	mutex_enter(&ufsvfs_mutex);
2270Sstevel@tonic-gate 	for (ufsp = ufs_instances; ufsp != NULL; ufsp = ufsp->vfs_next) {
2280Sstevel@tonic-gate 		vfsp = ufsp->vfs_vfs;
2290Sstevel@tonic-gate 		if (vfs_lock(vfsp) != 0)
2300Sstevel@tonic-gate 			continue;
2310Sstevel@tonic-gate 		ufsp->vfs_wnext = update_list;
2320Sstevel@tonic-gate 		update_list = ufsp;
2330Sstevel@tonic-gate 		check_cnt++;
2340Sstevel@tonic-gate 	}
2350Sstevel@tonic-gate 	mutex_exit(&ufsvfs_mutex);
2360Sstevel@tonic-gate 
2370Sstevel@tonic-gate 	if (update_list == NULL)
2380Sstevel@tonic-gate 		return;
2390Sstevel@tonic-gate 
2400Sstevel@tonic-gate 	check_size = sizeof (struct check_node) * check_cnt;
2410Sstevel@tonic-gate 	check_list = ptr = kmem_alloc(check_size, KM_NOSLEEP);
2420Sstevel@tonic-gate 
2430Sstevel@tonic-gate 	/*
2440Sstevel@tonic-gate 	 * Write back modified superblocks.
2450Sstevel@tonic-gate 	 * Consistency check that the superblock of
2460Sstevel@tonic-gate 	 * each file system is still in the buffer cache.
2470Sstevel@tonic-gate 	 *
2480Sstevel@tonic-gate 	 * Note that the update_list traversal is done without the protection
2490Sstevel@tonic-gate 	 * of an overall list lock, so it's necessary to rely on the fact that
2500Sstevel@tonic-gate 	 * each entry of the list is vfs_locked when moving from one entry to
2510Sstevel@tonic-gate 	 * the next.  This works because a concurrent attempt to add an entry
2520Sstevel@tonic-gate 	 * to another thread's update_list won't find it, since it'll already
2530Sstevel@tonic-gate 	 * be locked.
2540Sstevel@tonic-gate 	 */
2550Sstevel@tonic-gate 	check_cnt = 0;
2560Sstevel@tonic-gate 	for (ufsp = update_list; ufsp != NULL; ufsp = ufsnext) {
2570Sstevel@tonic-gate 		/*
2580Sstevel@tonic-gate 		 * Need to grab the next ptr before we unlock this one so
2590Sstevel@tonic-gate 		 * another thread doesn't grab it and change it before we move
2600Sstevel@tonic-gate 		 * on to the next vfs.  (Once we unlock it, it's ok if another
2610Sstevel@tonic-gate 		 * thread finds it to add it to its own update_list; we don't
2620Sstevel@tonic-gate 		 * attempt to refer to it through our list any more.)
2630Sstevel@tonic-gate 		 */
2640Sstevel@tonic-gate 		ufsnext = ufsp->vfs_wnext;
2650Sstevel@tonic-gate 		vfsp = ufsp->vfs_vfs;
2660Sstevel@tonic-gate 
2670Sstevel@tonic-gate 		/*
2680Sstevel@tonic-gate 		 * Seems like this can't happen, so perhaps it should become
2690Sstevel@tonic-gate 		 * an ASSERT(vfsp->vfs_data != NULL).
2700Sstevel@tonic-gate 		 */
2710Sstevel@tonic-gate 		if (!vfsp->vfs_data) {
2720Sstevel@tonic-gate 			vfs_unlock(vfsp);
2730Sstevel@tonic-gate 			continue;
2740Sstevel@tonic-gate 		}
2750Sstevel@tonic-gate 
2760Sstevel@tonic-gate 		fs = ufsp->vfs_fs;
2770Sstevel@tonic-gate 
2780Sstevel@tonic-gate 		/*
2790Sstevel@tonic-gate 		 * don't update a locked superblock during a panic; it
2800Sstevel@tonic-gate 		 * may be in an inconsistent state
2810Sstevel@tonic-gate 		 */
2820Sstevel@tonic-gate 		if (panicstr) {
2830Sstevel@tonic-gate 			if (!mutex_tryenter(&ufsp->vfs_lock)) {
2840Sstevel@tonic-gate 				vfs_unlock(vfsp);
2850Sstevel@tonic-gate 				continue;
2860Sstevel@tonic-gate 			}
2870Sstevel@tonic-gate 		} else
2880Sstevel@tonic-gate 			mutex_enter(&ufsp->vfs_lock);
2890Sstevel@tonic-gate 		/*
2900Sstevel@tonic-gate 		 * Build up the STABLE check list, so we can unlock the vfs
2910Sstevel@tonic-gate 		 * until we do the actual checking.
2920Sstevel@tonic-gate 		 */
2930Sstevel@tonic-gate 		if (check_list != NULL) {
2940Sstevel@tonic-gate 			if ((fs->fs_ronly == 0) &&
2950Sstevel@tonic-gate 			    (fs->fs_clean != FSBAD) &&
2960Sstevel@tonic-gate 			    (fs->fs_clean != FSSUSPEND)) {
2970Sstevel@tonic-gate 				ptr->vfsp = vfsp;
2980Sstevel@tonic-gate 				ptr->ufsvfs = ufsp;
2990Sstevel@tonic-gate 				ptr->vfs_dev = vfsp->vfs_dev;
3000Sstevel@tonic-gate 				ptr++;
3010Sstevel@tonic-gate 				check_cnt++;
3020Sstevel@tonic-gate 			}
3030Sstevel@tonic-gate 		}
3040Sstevel@tonic-gate 
3050Sstevel@tonic-gate 		/*
3060Sstevel@tonic-gate 		 * superblock is not modified
3070Sstevel@tonic-gate 		 */
3080Sstevel@tonic-gate 		if (fs->fs_fmod == 0) {
3090Sstevel@tonic-gate 			mutex_exit(&ufsp->vfs_lock);
3100Sstevel@tonic-gate 			vfs_unlock(vfsp);
3110Sstevel@tonic-gate 			continue;
3120Sstevel@tonic-gate 		}
3130Sstevel@tonic-gate 		if (fs->fs_ronly != 0) {
3140Sstevel@tonic-gate 			mutex_exit(&ufsp->vfs_lock);
3150Sstevel@tonic-gate 			vfs_unlock(vfsp);
3160Sstevel@tonic-gate 			(void) ufs_fault(ufsp->vfs_root,
3174662Sfrankho 			    "fs = %s update: ro fs mod\n", fs->fs_fsmnt);
3180Sstevel@tonic-gate 			/*
3190Sstevel@tonic-gate 			 * XXX:	Why is this a return instead of a continue?
3200Sstevel@tonic-gate 			 *	This may be an attempt to replace a panic with
3210Sstevel@tonic-gate 			 *	something less drastic, but there's cleanup we
3220Sstevel@tonic-gate 			 *	should be doing that's not being done (e.g.,
3230Sstevel@tonic-gate 			 *	unlocking the remaining entries on the list).
3240Sstevel@tonic-gate 			 */
3250Sstevel@tonic-gate 			return;
3260Sstevel@tonic-gate 		}
3270Sstevel@tonic-gate 		fs->fs_fmod = 0;
3280Sstevel@tonic-gate 		mutex_exit(&ufsp->vfs_lock);
3290Sstevel@tonic-gate 		TRANS_SBUPDATE(ufsp, vfsp, TOP_SBUPDATE_UPDATE);
3300Sstevel@tonic-gate 		vfs_unlock(vfsp);
3310Sstevel@tonic-gate 	}
3320Sstevel@tonic-gate 
3330Sstevel@tonic-gate 	ufs_sync_time = time;
3340Sstevel@tonic-gate 
3350Sstevel@tonic-gate 	/*
3360Sstevel@tonic-gate 	 * Avoid racing with ufs_unmount() and ufs_sync().
3370Sstevel@tonic-gate 	 */
3380Sstevel@tonic-gate 	mutex_enter(&ufs_scan_lock);
3390Sstevel@tonic-gate 
3400Sstevel@tonic-gate 	(void) ufs_scan_inodes(1, ufs_sync_inode, (void *)(uintptr_t)cheap,
3410Sstevel@tonic-gate 	    NULL);
3420Sstevel@tonic-gate 
3430Sstevel@tonic-gate 	mutex_exit(&ufs_scan_lock);
3440Sstevel@tonic-gate 
3450Sstevel@tonic-gate 	/*
3460Sstevel@tonic-gate 	 * Force stale buffer cache information to be flushed,
3470Sstevel@tonic-gate 	 * for all devices.  This should cause any remaining control
3480Sstevel@tonic-gate 	 * information (e.g., cg and inode info) to be flushed back.
3490Sstevel@tonic-gate 	 */
3500Sstevel@tonic-gate 	bflush((dev_t)NODEV);
3510Sstevel@tonic-gate 
3520Sstevel@tonic-gate 	if (check_list == NULL)
3530Sstevel@tonic-gate 		return;
3540Sstevel@tonic-gate 
3550Sstevel@tonic-gate 	/*
3560Sstevel@tonic-gate 	 * For each UFS filesystem in the STABLE check_list, update
3570Sstevel@tonic-gate 	 * the clean flag if warranted.
3580Sstevel@tonic-gate 	 */
3590Sstevel@tonic-gate 	for (ptr = check_list; check_cnt > 0; check_cnt--, ptr++) {
3600Sstevel@tonic-gate 		int	error;
3610Sstevel@tonic-gate 
3620Sstevel@tonic-gate 		/*
3630Sstevel@tonic-gate 		 * still_mounted() returns with vfsp and the vfs_reflock
3640Sstevel@tonic-gate 		 * held if ptr refers to a vfs that is still mounted.
3650Sstevel@tonic-gate 		 */
3660Sstevel@tonic-gate 		if ((vfsp = still_mounted(ptr)) == NULL)
3670Sstevel@tonic-gate 			continue;
3680Sstevel@tonic-gate 		ufs_checkclean(vfsp);
3690Sstevel@tonic-gate 		/*
3700Sstevel@tonic-gate 		 * commit any outstanding async transactions
3710Sstevel@tonic-gate 		 */
3720Sstevel@tonic-gate 		ufsp = (struct ufsvfs *)vfsp->vfs_data;
3730Sstevel@tonic-gate 		curthread->t_flag |= T_DONTBLOCK;
3740Sstevel@tonic-gate 		TRANS_BEGIN_SYNC(ufsp, TOP_COMMIT_UPDATE, TOP_COMMIT_SIZE,
3750Sstevel@tonic-gate 		    error);
3760Sstevel@tonic-gate 		if (!error) {
3770Sstevel@tonic-gate 			TRANS_END_SYNC(ufsp, error, TOP_COMMIT_UPDATE,
3784662Sfrankho 			    TOP_COMMIT_SIZE);
3790Sstevel@tonic-gate 		}
3800Sstevel@tonic-gate 		curthread->t_flag &= ~T_DONTBLOCK;
3810Sstevel@tonic-gate 
3820Sstevel@tonic-gate 		vfs_unlock(vfsp);
3830Sstevel@tonic-gate 	}
3840Sstevel@tonic-gate 
3850Sstevel@tonic-gate 	kmem_free(check_list, check_size);
3860Sstevel@tonic-gate }
3870Sstevel@tonic-gate 
3880Sstevel@tonic-gate int
3890Sstevel@tonic-gate ufs_sync_inode(struct inode *ip, void *arg)
3900Sstevel@tonic-gate {
3910Sstevel@tonic-gate 	int cheap = (int)(uintptr_t)arg;
3920Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp;
3930Sstevel@tonic-gate 	uint_t flag = ip->i_flag;
3940Sstevel@tonic-gate 
3950Sstevel@tonic-gate 	if (cheap && ((flag & (IUPD|IACC|ICHG|IMOD|IMODACC|IATTCHG)) == 0))
3960Sstevel@tonic-gate 		return (0);
3970Sstevel@tonic-gate 
3980Sstevel@tonic-gate 	/*
3990Sstevel@tonic-gate 	 * if we are panic'ing; then don't update the inode if this
4000Sstevel@tonic-gate 	 * file system is FSSTABLE.  Otherwise, we would have to
4010Sstevel@tonic-gate 	 * force the superblock to FSACTIVE and the superblock
4020Sstevel@tonic-gate 	 * may not be in a good state.  Also, if the inode is
4030Sstevel@tonic-gate 	 * IREF'ed then it may be in an inconsistent state.  Don't
4040Sstevel@tonic-gate 	 * push it.  Finally, don't push the inode if the fs is
4050Sstevel@tonic-gate 	 * logging; the transaction will be discarded at boot.
4060Sstevel@tonic-gate 	 */
4070Sstevel@tonic-gate 	if (panicstr) {
4080Sstevel@tonic-gate 
4090Sstevel@tonic-gate 		if (flag & IREF)
4100Sstevel@tonic-gate 			return (0);
4110Sstevel@tonic-gate 
4120Sstevel@tonic-gate 		if (ip->i_ufsvfs == NULL ||
4130Sstevel@tonic-gate 		    (ip->i_fs->fs_clean == FSSTABLE ||
4140Sstevel@tonic-gate 		    ip->i_fs->fs_clean == FSLOG))
4150Sstevel@tonic-gate 				return (0);
4160Sstevel@tonic-gate 	}
4170Sstevel@tonic-gate 
4180Sstevel@tonic-gate 	ufsvfsp = ip->i_ufsvfs;
4190Sstevel@tonic-gate 
4200Sstevel@tonic-gate 	/*
4210Sstevel@tonic-gate 	 * Limit access time only updates
4220Sstevel@tonic-gate 	 */
4230Sstevel@tonic-gate 	if (((flag & (IMOD|IMODACC|IUPD|ICHG|IACC)) == IMODACC) && ufsvfsp) {
4240Sstevel@tonic-gate 		/*
4250Sstevel@tonic-gate 		 * if file system has deferred access time turned on and there
4260Sstevel@tonic-gate 		 * was no IO recently, don't bother flushing it. It will be
4270Sstevel@tonic-gate 		 * flushed when I/Os start again.
4280Sstevel@tonic-gate 		 */
4290Sstevel@tonic-gate 		if (cheap && (ufsvfsp->vfs_dfritime & UFS_DFRATIME) &&
4300Sstevel@tonic-gate 		    (ufsvfsp->vfs_iotstamp + ufs_iowait < lbolt))
4310Sstevel@tonic-gate 			return (0);
4320Sstevel@tonic-gate 		/*
4330Sstevel@tonic-gate 		 * an app issueing a sync() can take forever on a trans device
4340Sstevel@tonic-gate 		 * when NetWorker or find is running because all of the
4350Sstevel@tonic-gate 		 * directorys' access times have to be updated. So, we limit
4360Sstevel@tonic-gate 		 * the time we spend updating access times per sync.
4370Sstevel@tonic-gate 		 */
4380Sstevel@tonic-gate 		if (TRANS_ISTRANS(ufsvfsp) && ((ufs_sync_time +
4390Sstevel@tonic-gate 		    ufs_sync_time_secs) < time))
4400Sstevel@tonic-gate 			return (0);
4410Sstevel@tonic-gate 	}
4420Sstevel@tonic-gate 
4430Sstevel@tonic-gate 	/*
4440Sstevel@tonic-gate 	 * if we are running on behalf of the flush thread or this is
4450Sstevel@tonic-gate 	 * a swap file, then simply do a delay update of the inode.
4460Sstevel@tonic-gate 	 * Otherwise, push the pages and then do a delayed inode update.
4470Sstevel@tonic-gate 	 */
4480Sstevel@tonic-gate 	if (cheap || IS_SWAPVP(ITOV(ip))) {
4490Sstevel@tonic-gate 		TRANS_IUPDAT(ip, 0);
4500Sstevel@tonic-gate 	} else {
4510Sstevel@tonic-gate 		(void) TRANS_SYNCIP(ip, B_ASYNC, I_ASYNC, TOP_SYNCIP_SYNC);
4520Sstevel@tonic-gate 	}
4530Sstevel@tonic-gate 	return (0);
4540Sstevel@tonic-gate }
4550Sstevel@tonic-gate 
4560Sstevel@tonic-gate /*
4570Sstevel@tonic-gate  * Flush all the pages associated with an inode using the given 'flags',
4580Sstevel@tonic-gate  * then force inode information to be written back using the given 'waitfor'.
4590Sstevel@tonic-gate  */
4600Sstevel@tonic-gate int
4610Sstevel@tonic-gate ufs_syncip(struct inode *ip, int flags, int waitfor, top_t topid)
4620Sstevel@tonic-gate {
4630Sstevel@tonic-gate 	int	error;
4640Sstevel@tonic-gate 	struct vnode *vp = ITOV(ip);
4650Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
4660Sstevel@tonic-gate 	int dotrans = 0;
4670Sstevel@tonic-gate 
4680Sstevel@tonic-gate 	/*
4690Sstevel@tonic-gate 	 * Return if file system has been forcibly umounted.
4700Sstevel@tonic-gate 	 */
4710Sstevel@tonic-gate 	if (ufsvfsp == NULL)
4720Sstevel@tonic-gate 		return (EIO);
4730Sstevel@tonic-gate 	/*
4740Sstevel@tonic-gate 	 * don't need to VOP_PUTPAGE if there are no pages
4750Sstevel@tonic-gate 	 */
4760Sstevel@tonic-gate 	if (!vn_has_cached_data(vp) || vp->v_type == VCHR) {
4770Sstevel@tonic-gate 		error = 0;
4780Sstevel@tonic-gate 	} else {
4790Sstevel@tonic-gate 		/*
4800Sstevel@tonic-gate 		 * if the inode we're working on is a shadow inode
4810Sstevel@tonic-gate 		 * or quota inode we need to make sure that the
4820Sstevel@tonic-gate 		 * ufs_putpage call is inside a transaction as this
4830Sstevel@tonic-gate 		 * could include meta data changes.
4840Sstevel@tonic-gate 		 */
4850Sstevel@tonic-gate 		if ((ip->i_mode & IFMT) == IFSHAD ||
4864662Sfrankho 		    ufsvfsp->vfs_qinod == ip) {
4870Sstevel@tonic-gate 			dotrans = 1;
4880Sstevel@tonic-gate 			curthread->t_flag |= T_DONTBLOCK;
4890Sstevel@tonic-gate 			TRANS_BEGIN_ASYNC(ufsvfsp, TOP_PUTPAGE,
4900Sstevel@tonic-gate 			    TOP_PUTPAGE_SIZE(ip));
4910Sstevel@tonic-gate 		}
492*5331Samw 		error = VOP_PUTPAGE(vp, (offset_t)0, (size_t)0,
493*5331Samw 		    flags, CRED(), NULL);
4940Sstevel@tonic-gate 		if (dotrans) {
4950Sstevel@tonic-gate 			TRANS_END_ASYNC(ufsvfsp, TOP_PUTPAGE,
4960Sstevel@tonic-gate 			    TOP_PUTPAGE_SIZE(ip));
4970Sstevel@tonic-gate 			curthread->t_flag &= ~T_DONTBLOCK;
4980Sstevel@tonic-gate 			dotrans = 0;
4990Sstevel@tonic-gate 		}
5000Sstevel@tonic-gate 	}
5010Sstevel@tonic-gate 	if (panicstr && TRANS_ISTRANS(ufsvfsp))
5020Sstevel@tonic-gate 		goto out;
5030Sstevel@tonic-gate 	/*
5040Sstevel@tonic-gate 	 * waitfor represents two things -
5050Sstevel@tonic-gate 	 * 1. whether data sync or file sync.
5060Sstevel@tonic-gate 	 * 2. if file sync then ufs_iupdat should 'waitfor' disk i/o or not.
5070Sstevel@tonic-gate 	 */
5080Sstevel@tonic-gate 	if (waitfor == I_DSYNC) {
5090Sstevel@tonic-gate 		/*
5100Sstevel@tonic-gate 		 * If data sync, only IATTCHG (size/block change) requires
5110Sstevel@tonic-gate 		 * inode update, fdatasync()/FDSYNC implementation.
5120Sstevel@tonic-gate 		 */
5130Sstevel@tonic-gate 		if (ip->i_flag & (IBDWRITE|IATTCHG)) {
5140Sstevel@tonic-gate 			/*
5150Sstevel@tonic-gate 			 * Enter a transaction to provide mutual exclusion
5160Sstevel@tonic-gate 			 * with deltamap_push and avoid a race where
5170Sstevel@tonic-gate 			 * the inode flush could get dropped.
5180Sstevel@tonic-gate 			 */
5190Sstevel@tonic-gate 			if ((curthread->t_flag & T_DONTBLOCK) == 0) {
5200Sstevel@tonic-gate 				dotrans = 1;
5210Sstevel@tonic-gate 				curthread->t_flag |= T_DONTBLOCK;
5220Sstevel@tonic-gate 				TRANS_BEGIN_ASYNC(ufsvfsp, topid,
5230Sstevel@tonic-gate 				    TOP_SYNCIP_SIZE);
5240Sstevel@tonic-gate 			}
5250Sstevel@tonic-gate 			rw_enter(&ip->i_contents, RW_READER);
5260Sstevel@tonic-gate 			mutex_enter(&ip->i_tlock);
5270Sstevel@tonic-gate 			ip->i_flag &= ~IMODTIME;
5280Sstevel@tonic-gate 			mutex_exit(&ip->i_tlock);
5290Sstevel@tonic-gate 			ufs_iupdat(ip, 1);
5300Sstevel@tonic-gate 			rw_exit(&ip->i_contents);
5310Sstevel@tonic-gate 			if (dotrans) {
5320Sstevel@tonic-gate 				TRANS_END_ASYNC(ufsvfsp, topid,
5330Sstevel@tonic-gate 				    TOP_SYNCIP_SIZE);
5340Sstevel@tonic-gate 				curthread->t_flag &= ~T_DONTBLOCK;
5350Sstevel@tonic-gate 			}
5360Sstevel@tonic-gate 		}
5370Sstevel@tonic-gate 	} else {
5380Sstevel@tonic-gate 		/* For file sync, any inode change requires inode update */
5390Sstevel@tonic-gate 		if (ip->i_flag & (IBDWRITE|IUPD|IACC|ICHG|IMOD|IMODACC)) {
5400Sstevel@tonic-gate 			/*
5410Sstevel@tonic-gate 			 * Enter a transaction to provide mutual exclusion
5420Sstevel@tonic-gate 			 * with deltamap_push and avoid a race where
5430Sstevel@tonic-gate 			 * the inode flush could get dropped.
5440Sstevel@tonic-gate 			 */
5450Sstevel@tonic-gate 			if ((curthread->t_flag & T_DONTBLOCK) == 0) {
5460Sstevel@tonic-gate 				dotrans = 1;
5470Sstevel@tonic-gate 				curthread->t_flag |= T_DONTBLOCK;
5480Sstevel@tonic-gate 				TRANS_BEGIN_ASYNC(ufsvfsp, topid,
5490Sstevel@tonic-gate 				    TOP_SYNCIP_SIZE);
5500Sstevel@tonic-gate 			}
5510Sstevel@tonic-gate 			rw_enter(&ip->i_contents, RW_READER);
5520Sstevel@tonic-gate 			mutex_enter(&ip->i_tlock);
5530Sstevel@tonic-gate 			ip->i_flag &= ~IMODTIME;
5540Sstevel@tonic-gate 			mutex_exit(&ip->i_tlock);
5550Sstevel@tonic-gate 			ufs_iupdat(ip, waitfor);
5560Sstevel@tonic-gate 			rw_exit(&ip->i_contents);
5570Sstevel@tonic-gate 			if (dotrans) {
5580Sstevel@tonic-gate 				TRANS_END_ASYNC(ufsvfsp, topid,
5590Sstevel@tonic-gate 				    TOP_SYNCIP_SIZE);
5600Sstevel@tonic-gate 				curthread->t_flag &= ~T_DONTBLOCK;
5610Sstevel@tonic-gate 			}
5620Sstevel@tonic-gate 		}
5630Sstevel@tonic-gate 	}
5640Sstevel@tonic-gate 
5650Sstevel@tonic-gate out:
5660Sstevel@tonic-gate 	return (error);
5670Sstevel@tonic-gate }
5680Sstevel@tonic-gate /*
5690Sstevel@tonic-gate  * Flush all indirect blocks related to an inode.
5700Sstevel@tonic-gate  * Supports triple indirect blocks also.
5710Sstevel@tonic-gate  */
5720Sstevel@tonic-gate int
5730Sstevel@tonic-gate ufs_sync_indir(struct inode *ip)
5740Sstevel@tonic-gate {
5750Sstevel@tonic-gate 	int i;
5760Sstevel@tonic-gate 	daddr_t blkno;
5770Sstevel@tonic-gate 	daddr_t lbn;	/* logical blkno of last blk in file */
5780Sstevel@tonic-gate 	daddr_t clbn;	/* current logical blk */
5790Sstevel@tonic-gate 	daddr32_t *bap;
5800Sstevel@tonic-gate 	struct fs *fs;
5810Sstevel@tonic-gate 	struct buf *bp;
5820Sstevel@tonic-gate 	int bsize;
5830Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp;
5840Sstevel@tonic-gate 	int j;
5850Sstevel@tonic-gate 	daddr_t indirect_blkno;
5860Sstevel@tonic-gate 	daddr32_t *indirect_bap;
5870Sstevel@tonic-gate 	struct buf *indirect_bp;
5880Sstevel@tonic-gate 
5890Sstevel@tonic-gate 	ufsvfsp = ip->i_ufsvfs;
5900Sstevel@tonic-gate 	/*
5910Sstevel@tonic-gate 	 * unnecessary when logging; allocation blocks are kept up-to-date
5920Sstevel@tonic-gate 	 */
5930Sstevel@tonic-gate 	if (TRANS_ISTRANS(ufsvfsp))
5940Sstevel@tonic-gate 		return (0);
5950Sstevel@tonic-gate 
5960Sstevel@tonic-gate 	fs = ufsvfsp->vfs_fs;
5970Sstevel@tonic-gate 	bsize = fs->fs_bsize;
5980Sstevel@tonic-gate 	lbn = (daddr_t)lblkno(fs, ip->i_size - 1);
5990Sstevel@tonic-gate 	if (lbn < NDADDR)
6000Sstevel@tonic-gate 		return (0);	/* No indirect blocks used */
6010Sstevel@tonic-gate 	if (lbn < NDADDR + NINDIR(fs)) {
6020Sstevel@tonic-gate 		/* File has one indirect block. */
6030Sstevel@tonic-gate 		blkflush(ip->i_dev, (daddr_t)fsbtodb(fs, ip->i_ib[0]));
6040Sstevel@tonic-gate 		return (0);
6050Sstevel@tonic-gate 	}
6060Sstevel@tonic-gate 
6070Sstevel@tonic-gate 	/* Write out all the first level indirect blocks */
6080Sstevel@tonic-gate 	for (i = 0; i <= NIADDR; i++) {
6090Sstevel@tonic-gate 		if ((blkno = ip->i_ib[i]) == 0)
6100Sstevel@tonic-gate 			continue;
6110Sstevel@tonic-gate 		blkflush(ip->i_dev, (daddr_t)fsbtodb(fs, blkno));
6120Sstevel@tonic-gate 	}
6130Sstevel@tonic-gate 	/* Write out second level of indirect blocks */
6140Sstevel@tonic-gate 	if ((blkno = ip->i_ib[1]) == 0)
6150Sstevel@tonic-gate 		return (0);
6160Sstevel@tonic-gate 	bp = UFS_BREAD(ufsvfsp, ip->i_dev, (daddr_t)fsbtodb(fs, blkno), bsize);
6170Sstevel@tonic-gate 	if (bp->b_flags & B_ERROR) {
6180Sstevel@tonic-gate 		brelse(bp);
6190Sstevel@tonic-gate 		return (EIO);
6200Sstevel@tonic-gate 	}
6210Sstevel@tonic-gate 	bap = bp->b_un.b_daddr;
6220Sstevel@tonic-gate 	clbn = NDADDR + NINDIR(fs);
6230Sstevel@tonic-gate 	for (i = 0; i < NINDIR(fs); i++) {
6240Sstevel@tonic-gate 		if (clbn > lbn)
6250Sstevel@tonic-gate 			break;
6260Sstevel@tonic-gate 		clbn += NINDIR(fs);
6270Sstevel@tonic-gate 		if ((blkno = bap[i]) == 0)
6280Sstevel@tonic-gate 			continue;
6290Sstevel@tonic-gate 		blkflush(ip->i_dev, (daddr_t)fsbtodb(fs, blkno));
6300Sstevel@tonic-gate 	}
6310Sstevel@tonic-gate 
6320Sstevel@tonic-gate 	brelse(bp);
6330Sstevel@tonic-gate 	/* write out third level indirect blocks */
6340Sstevel@tonic-gate 
6350Sstevel@tonic-gate 	if ((blkno = ip->i_ib[2]) == 0)
6360Sstevel@tonic-gate 		return (0);
6370Sstevel@tonic-gate 
6380Sstevel@tonic-gate 	bp = UFS_BREAD(ufsvfsp, ip->i_dev, (daddr_t)fsbtodb(fs, blkno), bsize);
6390Sstevel@tonic-gate 	if (bp->b_flags & B_ERROR) {
6400Sstevel@tonic-gate 		brelse(bp);
6410Sstevel@tonic-gate 		return (EIO);
6420Sstevel@tonic-gate 	}
6430Sstevel@tonic-gate 	bap = bp->b_un.b_daddr;
6440Sstevel@tonic-gate 	clbn = NDADDR + NINDIR(fs) + (NINDIR(fs) * NINDIR(fs));
6450Sstevel@tonic-gate 
6460Sstevel@tonic-gate 	for (i = 0; i < NINDIR(fs); i++) {
6470Sstevel@tonic-gate 		if (clbn > lbn)
6480Sstevel@tonic-gate 			break;
6490Sstevel@tonic-gate 		if ((indirect_blkno = bap[i]) == 0)
6500Sstevel@tonic-gate 			continue;
6510Sstevel@tonic-gate 		blkflush(ip->i_dev, (daddr_t)fsbtodb(fs, indirect_blkno));
6520Sstevel@tonic-gate 		indirect_bp = UFS_BREAD(ufsvfsp, ip->i_dev,
6534662Sfrankho 		    (daddr_t)fsbtodb(fs, indirect_blkno), bsize);
6540Sstevel@tonic-gate 		if (indirect_bp->b_flags & B_ERROR) {
6550Sstevel@tonic-gate 			brelse(indirect_bp);
6560Sstevel@tonic-gate 			brelse(bp);
6570Sstevel@tonic-gate 			return (EIO);
6580Sstevel@tonic-gate 		}
6590Sstevel@tonic-gate 		indirect_bap = indirect_bp->b_un.b_daddr;
6600Sstevel@tonic-gate 		for (j = 0; j < NINDIR(fs); j++) {
6610Sstevel@tonic-gate 			if (clbn > lbn)
6620Sstevel@tonic-gate 				break;
6630Sstevel@tonic-gate 			clbn += NINDIR(fs);
6640Sstevel@tonic-gate 			if ((blkno = indirect_bap[j]) == 0)
6650Sstevel@tonic-gate 				continue;
6660Sstevel@tonic-gate 			blkflush(ip->i_dev, (daddr_t)fsbtodb(fs, blkno));
6670Sstevel@tonic-gate 		}
6680Sstevel@tonic-gate 		brelse(indirect_bp);
6690Sstevel@tonic-gate 	}
6700Sstevel@tonic-gate 	brelse(bp);
6710Sstevel@tonic-gate 
6720Sstevel@tonic-gate 	return (0);
6730Sstevel@tonic-gate }
6740Sstevel@tonic-gate 
6750Sstevel@tonic-gate /*
6760Sstevel@tonic-gate  * Flush all indirect blocks related to an offset of a file.
6770Sstevel@tonic-gate  * read/write in sync mode may have to flush indirect blocks.
6780Sstevel@tonic-gate  */
6790Sstevel@tonic-gate int
6800Sstevel@tonic-gate ufs_indirblk_sync(struct inode *ip, offset_t off)
6810Sstevel@tonic-gate {
6820Sstevel@tonic-gate 	daddr_t	lbn;
6830Sstevel@tonic-gate 	struct	fs *fs;
6840Sstevel@tonic-gate 	struct	buf *bp;
6850Sstevel@tonic-gate 	int	i, j, shft;
6860Sstevel@tonic-gate 	daddr_t	ob, nb, tbn;
6870Sstevel@tonic-gate 	daddr32_t *bap;
6880Sstevel@tonic-gate 	int	nindirshift, nindiroffset;
6890Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp;
6900Sstevel@tonic-gate 
6910Sstevel@tonic-gate 	ufsvfsp = ip->i_ufsvfs;
6920Sstevel@tonic-gate 	/*
6930Sstevel@tonic-gate 	 * unnecessary when logging; allocation blocks are kept up-to-date
6940Sstevel@tonic-gate 	 */
6950Sstevel@tonic-gate 	if (TRANS_ISTRANS(ufsvfsp))
6960Sstevel@tonic-gate 		return (0);
6970Sstevel@tonic-gate 
6980Sstevel@tonic-gate 	fs = ufsvfsp->vfs_fs;
6990Sstevel@tonic-gate 
7000Sstevel@tonic-gate 	lbn = (daddr_t)lblkno(fs, off);
7010Sstevel@tonic-gate 	if (lbn < 0)
7020Sstevel@tonic-gate 		return (EFBIG);
7030Sstevel@tonic-gate 
7040Sstevel@tonic-gate 	/* The first NDADDR are direct so nothing to do */
7050Sstevel@tonic-gate 	if (lbn < NDADDR)
7060Sstevel@tonic-gate 		return (0);
7070Sstevel@tonic-gate 
7080Sstevel@tonic-gate 	nindirshift = ip->i_ufsvfs->vfs_nindirshift;
7090Sstevel@tonic-gate 	nindiroffset = ip->i_ufsvfs->vfs_nindiroffset;
7100Sstevel@tonic-gate 
7110Sstevel@tonic-gate 	/* Determine level of indirect blocks */
7120Sstevel@tonic-gate 	shft = 0;
7130Sstevel@tonic-gate 	tbn = lbn - NDADDR;
7140Sstevel@tonic-gate 	for (j = NIADDR; j > 0; j--) {
7150Sstevel@tonic-gate 		longlong_t	sh;
7160Sstevel@tonic-gate 
7170Sstevel@tonic-gate 		shft += nindirshift;
7180Sstevel@tonic-gate 		sh = 1LL << shft;
7190Sstevel@tonic-gate 		if (tbn < sh)
7200Sstevel@tonic-gate 			break;
7210Sstevel@tonic-gate 		tbn -= (daddr_t)sh;
7220Sstevel@tonic-gate 	}
7230Sstevel@tonic-gate 
7240Sstevel@tonic-gate 	if (j == 0)
7250Sstevel@tonic-gate 		return (EFBIG);
7260Sstevel@tonic-gate 
7270Sstevel@tonic-gate 	if ((nb = ip->i_ib[NIADDR - j]) == 0)
7280Sstevel@tonic-gate 			return (0);		/* UFS Hole */
7290Sstevel@tonic-gate 
7300Sstevel@tonic-gate 	/* Flush first level indirect block */
7310Sstevel@tonic-gate 	blkflush(ip->i_dev, fsbtodb(fs, nb));
7320Sstevel@tonic-gate 
7330Sstevel@tonic-gate 	/* Fetch through next levels */
7340Sstevel@tonic-gate 	for (; j < NIADDR; j++) {
7350Sstevel@tonic-gate 		ob = nb;
7360Sstevel@tonic-gate 		bp = UFS_BREAD(ufsvfsp,
7374662Sfrankho 		    ip->i_dev, fsbtodb(fs, ob), fs->fs_bsize);
7380Sstevel@tonic-gate 		if (bp->b_flags & B_ERROR) {
7390Sstevel@tonic-gate 			brelse(bp);
7400Sstevel@tonic-gate 			return (EIO);
7410Sstevel@tonic-gate 		}
7420Sstevel@tonic-gate 		bap = bp->b_un.b_daddr;
7430Sstevel@tonic-gate 		shft -= nindirshift;		/* sh / nindir */
7440Sstevel@tonic-gate 		i = (tbn >> shft) & nindiroffset; /* (tbn /sh) & nindir */
7450Sstevel@tonic-gate 		nb = bap[i];
7460Sstevel@tonic-gate 		brelse(bp);
7470Sstevel@tonic-gate 		if (nb == 0) {
7480Sstevel@tonic-gate 			return (0); 		/* UFS hole */
7490Sstevel@tonic-gate 		}
7500Sstevel@tonic-gate 		blkflush(ip->i_dev, fsbtodb(fs, nb));
7510Sstevel@tonic-gate 	}
7520Sstevel@tonic-gate 	return (0);
7530Sstevel@tonic-gate }
7540Sstevel@tonic-gate 
7550Sstevel@tonic-gate #ifdef DEBUG
7560Sstevel@tonic-gate 
7570Sstevel@tonic-gate /*
7580Sstevel@tonic-gate  * The bad block checking routines: ufs_indir_badblock() and ufs_badblock()
7590Sstevel@tonic-gate  * are very expensive. It's been found from profiling that we're
7600Sstevel@tonic-gate  * spending 6-7% of our time in ufs_badblock, and another 1-2% in
7610Sstevel@tonic-gate  * ufs_indir_badblock. They are only called via ASSERTs (from debug kernels).
7620Sstevel@tonic-gate  * In addition from experience no failures have been found in recent
7630Sstevel@tonic-gate  * years. So the following tunable can be set to enable checking.
7640Sstevel@tonic-gate  */
7650Sstevel@tonic-gate int ufs_badblock_checks = 0;
7660Sstevel@tonic-gate 
7670Sstevel@tonic-gate /*
7680Sstevel@tonic-gate  * Check that a given indirect block contains blocks in range
7690Sstevel@tonic-gate  */
7700Sstevel@tonic-gate int
7710Sstevel@tonic-gate ufs_indir_badblock(struct inode *ip, daddr32_t *bap)
7720Sstevel@tonic-gate {
7730Sstevel@tonic-gate 	int i;
7740Sstevel@tonic-gate 	int err = 0;
7750Sstevel@tonic-gate 
7760Sstevel@tonic-gate 	if (ufs_badblock_checks) {
7770Sstevel@tonic-gate 		for (i = 0; i < NINDIR(ip->i_fs) - 1; i++)
7780Sstevel@tonic-gate 			if (bap[i] != 0 && (err = ufs_badblock(ip, bap[i])))
7790Sstevel@tonic-gate 				break;
7800Sstevel@tonic-gate 	}
7810Sstevel@tonic-gate 	return (err);
7820Sstevel@tonic-gate }
7830Sstevel@tonic-gate 
7840Sstevel@tonic-gate /*
7850Sstevel@tonic-gate  * Check that a specified block number is in range.
7860Sstevel@tonic-gate  */
7870Sstevel@tonic-gate int
7880Sstevel@tonic-gate ufs_badblock(struct inode *ip, daddr_t bn)
7890Sstevel@tonic-gate {
7900Sstevel@tonic-gate 	long	c;
7910Sstevel@tonic-gate 	daddr_t	sum;
7920Sstevel@tonic-gate 
7930Sstevel@tonic-gate 	if (!ufs_badblock_checks)
7940Sstevel@tonic-gate 		return (0);
7950Sstevel@tonic-gate 	ASSERT(bn);
7960Sstevel@tonic-gate 	if (bn <= 0 || bn > ip->i_fs->fs_size)
7970Sstevel@tonic-gate 		return (bn);
7980Sstevel@tonic-gate 
7990Sstevel@tonic-gate 	sum = 0;
8000Sstevel@tonic-gate 	c = dtog(ip->i_fs, bn);
8010Sstevel@tonic-gate 	if (c == 0) {
8020Sstevel@tonic-gate 		sum = howmany(ip->i_fs->fs_cssize, ip->i_fs->fs_fsize);
8030Sstevel@tonic-gate 	}
8040Sstevel@tonic-gate 	/*
8050Sstevel@tonic-gate 	 * if block no. is below this cylinder group,
8060Sstevel@tonic-gate 	 * within the space reserved for superblock, inodes, (summary data)
8070Sstevel@tonic-gate 	 * or if it is above this cylinder group
8080Sstevel@tonic-gate 	 * then its invalid
8090Sstevel@tonic-gate 	 * It's hard to see how we'd be outside this cyl, but let's be careful.
8100Sstevel@tonic-gate 	 */
8110Sstevel@tonic-gate 	if ((bn < cgbase(ip->i_fs, c)) ||
8120Sstevel@tonic-gate 	    (bn >= cgsblock(ip->i_fs, c) && bn < cgdmin(ip->i_fs, c)+sum) ||
8130Sstevel@tonic-gate 	    (bn >= (unsigned)cgbase(ip->i_fs, c+1)))
8140Sstevel@tonic-gate 		return (bn);
8150Sstevel@tonic-gate 
8160Sstevel@tonic-gate 	return (0);	/* not a bad block */
8170Sstevel@tonic-gate }
8180Sstevel@tonic-gate 
8190Sstevel@tonic-gate #endif /* DEBUG */
8200Sstevel@tonic-gate 
8210Sstevel@tonic-gate /*
8220Sstevel@tonic-gate  * When i_rwlock is write-locked or has a writer pended, then the inode
8230Sstevel@tonic-gate  * is going to change in a way that the filesystem will be marked as
8240Sstevel@tonic-gate  * active. So no need to let the filesystem be mark as stable now.
8250Sstevel@tonic-gate  * Also to ensure the filesystem consistency during the directory
8260Sstevel@tonic-gate  * operations, filesystem cannot be marked as stable if i_rwlock of
8270Sstevel@tonic-gate  * the directory inode is write-locked.
8280Sstevel@tonic-gate  */
8290Sstevel@tonic-gate 
8300Sstevel@tonic-gate /*
8310Sstevel@tonic-gate  * Check for busy inodes for this filesystem.
8320Sstevel@tonic-gate  * NOTE: Needs better way to do this expensive operation in the future.
8330Sstevel@tonic-gate  */
8340Sstevel@tonic-gate static void
8350Sstevel@tonic-gate ufs_icheck(struct ufsvfs *ufsvfsp, int *isbusyp, int *isreclaimp)
8360Sstevel@tonic-gate {
8370Sstevel@tonic-gate 	union  ihead	*ih;
8380Sstevel@tonic-gate 	struct inode	*ip;
8390Sstevel@tonic-gate 	int		i;
8400Sstevel@tonic-gate 	int		isnottrans	= !TRANS_ISTRANS(ufsvfsp);
8410Sstevel@tonic-gate 	int		isbusy		= *isbusyp;
8420Sstevel@tonic-gate 	int		isreclaim	= *isreclaimp;
8430Sstevel@tonic-gate 
8440Sstevel@tonic-gate 	for (i = 0, ih = ihead; i < inohsz; i++, ih++) {
8450Sstevel@tonic-gate 		mutex_enter(&ih_lock[i]);
8460Sstevel@tonic-gate 		for (ip = ih->ih_chain[0];
8470Sstevel@tonic-gate 		    ip != (struct inode *)ih;
8480Sstevel@tonic-gate 		    ip = ip->i_forw) {
8490Sstevel@tonic-gate 			/*
8500Sstevel@tonic-gate 			 * if inode is busy/modified/deleted, filesystem is busy
8510Sstevel@tonic-gate 			 */
8520Sstevel@tonic-gate 			if (ip->i_ufsvfs != ufsvfsp)
8530Sstevel@tonic-gate 				continue;
8540Sstevel@tonic-gate 			if ((ip->i_flag & (IMOD | IUPD | ICHG)) ||
8550Sstevel@tonic-gate 			    (RW_ISWRITER(&ip->i_rwlock)))
8560Sstevel@tonic-gate 				isbusy = 1;
8570Sstevel@tonic-gate 			if ((ip->i_nlink <= 0) && (ip->i_flag & IREF))
8580Sstevel@tonic-gate 				isreclaim = 1;
8590Sstevel@tonic-gate 			if (isbusy && (isreclaim || isnottrans))
8600Sstevel@tonic-gate 				break;
8610Sstevel@tonic-gate 		}
8620Sstevel@tonic-gate 		mutex_exit(&ih_lock[i]);
8630Sstevel@tonic-gate 		if (isbusy && (isreclaim || isnottrans))
8640Sstevel@tonic-gate 			break;
8650Sstevel@tonic-gate 	}
8660Sstevel@tonic-gate 	*isbusyp = isbusy;
8670Sstevel@tonic-gate 	*isreclaimp = isreclaim;
8680Sstevel@tonic-gate }
8690Sstevel@tonic-gate 
8700Sstevel@tonic-gate /*
8710Sstevel@tonic-gate  * As part of the ufs 'sync' operation, this routine is called to mark
8720Sstevel@tonic-gate  * the filesystem as STABLE if there is no modified metadata in memory.
8730Sstevel@tonic-gate  */
8740Sstevel@tonic-gate void
8750Sstevel@tonic-gate ufs_checkclean(struct vfs *vfsp)
8760Sstevel@tonic-gate {
8770Sstevel@tonic-gate 	struct ufsvfs	*ufsvfsp	= (struct ufsvfs *)vfsp->vfs_data;
8780Sstevel@tonic-gate 	struct fs	*fs		= ufsvfsp->vfs_fs;
8790Sstevel@tonic-gate 	int		isbusy;
8800Sstevel@tonic-gate 	int		isreclaim;
8810Sstevel@tonic-gate 	int		updatesb;
8820Sstevel@tonic-gate 
8830Sstevel@tonic-gate 	ASSERT(vfs_lock_held(vfsp));
8840Sstevel@tonic-gate 
8850Sstevel@tonic-gate 	/*
8860Sstevel@tonic-gate 	 * filesystem is stable or cleanflag processing is disabled; do nothing
8870Sstevel@tonic-gate 	 *	no transitions when panic'ing
8880Sstevel@tonic-gate 	 */
8890Sstevel@tonic-gate 	if (fs->fs_ronly ||
8900Sstevel@tonic-gate 	    fs->fs_clean == FSBAD ||
8910Sstevel@tonic-gate 	    fs->fs_clean == FSSUSPEND ||
8920Sstevel@tonic-gate 	    fs->fs_clean == FSSTABLE ||
8930Sstevel@tonic-gate 	    panicstr)
8940Sstevel@tonic-gate 		return;
8950Sstevel@tonic-gate 
8960Sstevel@tonic-gate 	/*
8970Sstevel@tonic-gate 	 * if logging and nothing to reclaim; do nothing
8980Sstevel@tonic-gate 	 */
8990Sstevel@tonic-gate 	if ((fs->fs_clean == FSLOG) &&
9000Sstevel@tonic-gate 	    (((fs->fs_reclaim & FS_RECLAIM) == 0) ||
9010Sstevel@tonic-gate 	    (fs->fs_reclaim & FS_RECLAIMING)))
9020Sstevel@tonic-gate 		return;
9030Sstevel@tonic-gate 
9040Sstevel@tonic-gate 	/*
9050Sstevel@tonic-gate 	 * FS_CHECKCLEAN is reset if the file system goes dirty
9060Sstevel@tonic-gate 	 * FS_CHECKRECLAIM is reset if a file gets deleted
9070Sstevel@tonic-gate 	 */
9080Sstevel@tonic-gate 	mutex_enter(&ufsvfsp->vfs_lock);
9090Sstevel@tonic-gate 	fs->fs_reclaim |= (FS_CHECKCLEAN | FS_CHECKRECLAIM);
9100Sstevel@tonic-gate 	mutex_exit(&ufsvfsp->vfs_lock);
9110Sstevel@tonic-gate 
9120Sstevel@tonic-gate 	updatesb = 0;
9130Sstevel@tonic-gate 
9140Sstevel@tonic-gate 	/*
9150Sstevel@tonic-gate 	 * if logging or buffers are busy; do nothing
9160Sstevel@tonic-gate 	 */
9170Sstevel@tonic-gate 	isbusy = isreclaim = 0;
9180Sstevel@tonic-gate 	if ((fs->fs_clean == FSLOG) ||
9190Sstevel@tonic-gate 	    (bcheck(vfsp->vfs_dev, ufsvfsp->vfs_bufp)))
9200Sstevel@tonic-gate 		isbusy = 1;
9210Sstevel@tonic-gate 
9220Sstevel@tonic-gate 	/*
9230Sstevel@tonic-gate 	 * isreclaim == TRUE means can't change the state of fs_reclaim
9240Sstevel@tonic-gate 	 */
9250Sstevel@tonic-gate 	isreclaim =
9264662Sfrankho 	    ((fs->fs_clean == FSLOG) &&
9274662Sfrankho 	    (((fs->fs_reclaim & FS_RECLAIM) == 0) ||
9284662Sfrankho 	    (fs->fs_reclaim & FS_RECLAIMING)));
9290Sstevel@tonic-gate 
9300Sstevel@tonic-gate 	/*
9310Sstevel@tonic-gate 	 * if fs is busy or can't change the state of fs_reclaim; do nothing
9320Sstevel@tonic-gate 	 */
9330Sstevel@tonic-gate 	if (isbusy && isreclaim)
9340Sstevel@tonic-gate 		return;
9350Sstevel@tonic-gate 
9360Sstevel@tonic-gate 	/*
9370Sstevel@tonic-gate 	 * look for busy or deleted inodes; (deleted == needs reclaim)
9380Sstevel@tonic-gate 	 */
9390Sstevel@tonic-gate 	ufs_icheck(ufsvfsp, &isbusy, &isreclaim);
9400Sstevel@tonic-gate 
9410Sstevel@tonic-gate 	mutex_enter(&ufsvfsp->vfs_lock);
9420Sstevel@tonic-gate 
9430Sstevel@tonic-gate 	/*
9440Sstevel@tonic-gate 	 * IF POSSIBLE, RESET RECLAIM
9450Sstevel@tonic-gate 	 */
9460Sstevel@tonic-gate 	/*
9470Sstevel@tonic-gate 	 * the reclaim thread is not running
9480Sstevel@tonic-gate 	 */
9490Sstevel@tonic-gate 	if ((fs->fs_reclaim & FS_RECLAIMING) == 0)
9500Sstevel@tonic-gate 		/*
9510Sstevel@tonic-gate 		 * no files were deleted during the scan
9520Sstevel@tonic-gate 		 */
9530Sstevel@tonic-gate 		if (fs->fs_reclaim & FS_CHECKRECLAIM)
9540Sstevel@tonic-gate 			/*
9550Sstevel@tonic-gate 			 * no deleted files were found in the inode cache
9560Sstevel@tonic-gate 			 */
9570Sstevel@tonic-gate 			if ((isreclaim == 0) && (fs->fs_reclaim & FS_RECLAIM)) {
9580Sstevel@tonic-gate 				fs->fs_reclaim &= ~FS_RECLAIM;
9590Sstevel@tonic-gate 				updatesb = 1;
9600Sstevel@tonic-gate 			}
9610Sstevel@tonic-gate 	/*
9620Sstevel@tonic-gate 	 * IF POSSIBLE, SET STABLE
9630Sstevel@tonic-gate 	 */
9640Sstevel@tonic-gate 	/*
9650Sstevel@tonic-gate 	 * not logging
9660Sstevel@tonic-gate 	 */
9670Sstevel@tonic-gate 	if (fs->fs_clean != FSLOG)
9680Sstevel@tonic-gate 		/*
9690Sstevel@tonic-gate 		 * file system has not gone dirty since the scan began
9700Sstevel@tonic-gate 		 */
9710Sstevel@tonic-gate 		if (fs->fs_reclaim & FS_CHECKCLEAN)
9720Sstevel@tonic-gate 			/*
9730Sstevel@tonic-gate 			 * nothing dirty was found in the buffer or inode cache
9740Sstevel@tonic-gate 			 */
9750Sstevel@tonic-gate 			if ((isbusy == 0) && (isreclaim == 0) &&
9760Sstevel@tonic-gate 			    (fs->fs_clean != FSSTABLE)) {
9770Sstevel@tonic-gate 				fs->fs_clean = FSSTABLE;
9780Sstevel@tonic-gate 				updatesb = 1;
9790Sstevel@tonic-gate 			}
9800Sstevel@tonic-gate 
9810Sstevel@tonic-gate 	mutex_exit(&ufsvfsp->vfs_lock);
9820Sstevel@tonic-gate 	if (updatesb) {
9830Sstevel@tonic-gate 		TRANS_SBWRITE(ufsvfsp, TOP_SBWRITE_STABLE);
9840Sstevel@tonic-gate 	}
9850Sstevel@tonic-gate }
9860Sstevel@tonic-gate 
9870Sstevel@tonic-gate /*
9880Sstevel@tonic-gate  * called whenever an unlink occurs
9890Sstevel@tonic-gate  */
9900Sstevel@tonic-gate void
9910Sstevel@tonic-gate ufs_setreclaim(struct inode *ip)
9920Sstevel@tonic-gate {
9930Sstevel@tonic-gate 	struct ufsvfs	*ufsvfsp	= ip->i_ufsvfs;
9940Sstevel@tonic-gate 	struct fs	*fs		= ufsvfsp->vfs_fs;
9950Sstevel@tonic-gate 
9960Sstevel@tonic-gate 	if (ip->i_nlink || fs->fs_ronly || (fs->fs_clean != FSLOG))
9970Sstevel@tonic-gate 		return;
9980Sstevel@tonic-gate 
9990Sstevel@tonic-gate 	/*
10000Sstevel@tonic-gate 	 * reclaim-needed bit is already set or we need to tell
10010Sstevel@tonic-gate 	 * ufs_checkclean that a file has been deleted
10020Sstevel@tonic-gate 	 */
10030Sstevel@tonic-gate 	if ((fs->fs_reclaim & (FS_RECLAIM | FS_CHECKRECLAIM)) == FS_RECLAIM)
10040Sstevel@tonic-gate 		return;
10050Sstevel@tonic-gate 
10060Sstevel@tonic-gate 	mutex_enter(&ufsvfsp->vfs_lock);
10070Sstevel@tonic-gate 	/*
10080Sstevel@tonic-gate 	 * inform ufs_checkclean that the file system has gone dirty
10090Sstevel@tonic-gate 	 */
10100Sstevel@tonic-gate 	fs->fs_reclaim &= ~FS_CHECKRECLAIM;
10110Sstevel@tonic-gate 
10120Sstevel@tonic-gate 	/*
10130Sstevel@tonic-gate 	 * set the reclaim-needed bit
10140Sstevel@tonic-gate 	 */
10150Sstevel@tonic-gate 	if ((fs->fs_reclaim & FS_RECLAIM) == 0) {
10160Sstevel@tonic-gate 		fs->fs_reclaim |= FS_RECLAIM;
10170Sstevel@tonic-gate 		ufs_sbwrite(ufsvfsp);
10180Sstevel@tonic-gate 	}
10190Sstevel@tonic-gate 	mutex_exit(&ufsvfsp->vfs_lock);
10200Sstevel@tonic-gate }
10210Sstevel@tonic-gate 
10220Sstevel@tonic-gate /*
10230Sstevel@tonic-gate  * Before any modified metadata written back to the disk, this routine
10240Sstevel@tonic-gate  * is called to mark the filesystem as ACTIVE.
10250Sstevel@tonic-gate  */
10260Sstevel@tonic-gate void
10270Sstevel@tonic-gate ufs_notclean(struct ufsvfs *ufsvfsp)
10280Sstevel@tonic-gate {
10290Sstevel@tonic-gate 	struct fs *fs = ufsvfsp->vfs_fs;
10300Sstevel@tonic-gate 
10310Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ufsvfsp->vfs_lock));
10320Sstevel@tonic-gate 	ULOCKFS_SET_MOD((&ufsvfsp->vfs_ulockfs));
10330Sstevel@tonic-gate 
10340Sstevel@tonic-gate 	/*
10350Sstevel@tonic-gate 	 * inform ufs_checkclean that the file system has gone dirty
10360Sstevel@tonic-gate 	 */
10370Sstevel@tonic-gate 	fs->fs_reclaim &= ~FS_CHECKCLEAN;
10380Sstevel@tonic-gate 
10390Sstevel@tonic-gate 	/*
10400Sstevel@tonic-gate 	 * ignore if active or bad or suspended or readonly or logging
10410Sstevel@tonic-gate 	 */
10420Sstevel@tonic-gate 	if ((fs->fs_clean == FSACTIVE) || (fs->fs_clean == FSLOG) ||
10430Sstevel@tonic-gate 	    (fs->fs_clean == FSBAD) || (fs->fs_clean == FSSUSPEND) ||
10440Sstevel@tonic-gate 	    (fs->fs_ronly)) {
10450Sstevel@tonic-gate 		mutex_exit(&ufsvfsp->vfs_lock);
10460Sstevel@tonic-gate 		return;
10470Sstevel@tonic-gate 	}
10480Sstevel@tonic-gate 	fs->fs_clean = FSACTIVE;
10490Sstevel@tonic-gate 	/*
10500Sstevel@tonic-gate 	 * write superblock synchronously
10510Sstevel@tonic-gate 	 */
10520Sstevel@tonic-gate 	ufs_sbwrite(ufsvfsp);
10530Sstevel@tonic-gate 	mutex_exit(&ufsvfsp->vfs_lock);
10540Sstevel@tonic-gate }
10550Sstevel@tonic-gate 
10560Sstevel@tonic-gate /*
10570Sstevel@tonic-gate  * ufs specific fbwrite()
10580Sstevel@tonic-gate  */
10590Sstevel@tonic-gate int
10600Sstevel@tonic-gate ufs_fbwrite(struct fbuf *fbp, struct inode *ip)
10610Sstevel@tonic-gate {
10620Sstevel@tonic-gate 	struct ufsvfs	*ufsvfsp	= ip->i_ufsvfs;
10630Sstevel@tonic-gate 
10640Sstevel@tonic-gate 	if (TRANS_ISTRANS(ufsvfsp))
10650Sstevel@tonic-gate 		return (fbwrite(fbp));
10660Sstevel@tonic-gate 	mutex_enter(&ufsvfsp->vfs_lock);
10670Sstevel@tonic-gate 	ufs_notclean(ufsvfsp);
10680Sstevel@tonic-gate 	return ((ufsvfsp->vfs_dio) ? fbdwrite(fbp) : fbwrite(fbp));
10690Sstevel@tonic-gate }
10700Sstevel@tonic-gate 
10710Sstevel@tonic-gate /*
10720Sstevel@tonic-gate  * ufs specific fbiwrite()
10730Sstevel@tonic-gate  */
10740Sstevel@tonic-gate int
10750Sstevel@tonic-gate ufs_fbiwrite(struct fbuf *fbp, struct inode *ip, daddr_t bn, long bsize)
10760Sstevel@tonic-gate {
10770Sstevel@tonic-gate 	struct ufsvfs	*ufsvfsp	= ip->i_ufsvfs;
10780Sstevel@tonic-gate 	o_mode_t	ifmt		= ip->i_mode & IFMT;
10790Sstevel@tonic-gate 	buf_t		*bp;
10800Sstevel@tonic-gate 	int		error;
10810Sstevel@tonic-gate 
10820Sstevel@tonic-gate 	mutex_enter(&ufsvfsp->vfs_lock);
10830Sstevel@tonic-gate 	ufs_notclean(ufsvfsp);
10840Sstevel@tonic-gate 	if (ifmt == IFDIR || ifmt == IFSHAD || ifmt == IFATTRDIR ||
10850Sstevel@tonic-gate 	    (ip->i_ufsvfs->vfs_qinod == ip)) {
10860Sstevel@tonic-gate 		TRANS_DELTA(ufsvfsp, ldbtob(bn * (offset_t)(btod(bsize))),
10874662Sfrankho 		    fbp->fb_count, DT_FBI, 0, 0);
10880Sstevel@tonic-gate 	}
10890Sstevel@tonic-gate 	/*
10900Sstevel@tonic-gate 	 * Inlined version of fbiwrite()
10910Sstevel@tonic-gate 	 */
10920Sstevel@tonic-gate 	bp = pageio_setup((struct page *)NULL, fbp->fb_count,
10934662Sfrankho 	    ip->i_devvp, B_WRITE);
10940Sstevel@tonic-gate 	bp->b_flags &= ~B_PAGEIO;
10950Sstevel@tonic-gate 	bp->b_un.b_addr = fbp->fb_addr;
10960Sstevel@tonic-gate 
10970Sstevel@tonic-gate 	bp->b_blkno = bn * btod(bsize);
10980Sstevel@tonic-gate 	bp->b_dev = cmpdev(ip->i_dev);	/* store in old dev format */
10990Sstevel@tonic-gate 	bp->b_edev = ip->i_dev;
11000Sstevel@tonic-gate 	bp->b_proc = NULL;			/* i.e. the kernel */
11010Sstevel@tonic-gate 	bp->b_file = ip->i_vnode;
11020Sstevel@tonic-gate 	bp->b_offset = -1;
11030Sstevel@tonic-gate 
11040Sstevel@tonic-gate 	if (ufsvfsp->vfs_log) {
11050Sstevel@tonic-gate 		lufs_write_strategy(ufsvfsp->vfs_log, bp);
11060Sstevel@tonic-gate 	} else if (ufsvfsp->vfs_snapshot) {
11070Sstevel@tonic-gate 		fssnap_strategy(&ufsvfsp->vfs_snapshot, bp);
11080Sstevel@tonic-gate 	} else {
11090Sstevel@tonic-gate 		ufsvfsp->vfs_iotstamp = lbolt;
11100Sstevel@tonic-gate 		ub.ub_fbiwrites.value.ul++;
11110Sstevel@tonic-gate 		(void) bdev_strategy(bp);
11120Sstevel@tonic-gate 		lwp_stat_update(LWP_STAT_OUBLK, 1);
11130Sstevel@tonic-gate 	}
11140Sstevel@tonic-gate 	error = biowait(bp);
11150Sstevel@tonic-gate 	pageio_done(bp);
11160Sstevel@tonic-gate 	fbrelse(fbp, S_OTHER);
11170Sstevel@tonic-gate 	return (error);
11180Sstevel@tonic-gate }
11190Sstevel@tonic-gate 
11200Sstevel@tonic-gate /*
11210Sstevel@tonic-gate  * Write the ufs superblock only.
11220Sstevel@tonic-gate  */
11230Sstevel@tonic-gate void
11240Sstevel@tonic-gate ufs_sbwrite(struct ufsvfs *ufsvfsp)
11250Sstevel@tonic-gate {
11260Sstevel@tonic-gate 	char sav_fs_fmod;
11270Sstevel@tonic-gate 	struct fs *fs = ufsvfsp->vfs_fs;
11280Sstevel@tonic-gate 	struct buf *bp = ufsvfsp->vfs_bufp;
11290Sstevel@tonic-gate 
11300Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ufsvfsp->vfs_lock));
11310Sstevel@tonic-gate 
11320Sstevel@tonic-gate 	/*
11330Sstevel@tonic-gate 	 * for ulockfs processing, limit the superblock writes
11340Sstevel@tonic-gate 	 */
11350Sstevel@tonic-gate 	if ((ufsvfsp->vfs_ulockfs.ul_sbowner) &&
11360Sstevel@tonic-gate 	    (curthread != ufsvfsp->vfs_ulockfs.ul_sbowner)) {
11370Sstevel@tonic-gate 		/* try again later */
11380Sstevel@tonic-gate 		fs->fs_fmod = 1;
11390Sstevel@tonic-gate 		return;
11400Sstevel@tonic-gate 	}
11410Sstevel@tonic-gate 
11420Sstevel@tonic-gate 	ULOCKFS_SET_MOD((&ufsvfsp->vfs_ulockfs));
11430Sstevel@tonic-gate 	/*
11440Sstevel@tonic-gate 	 * update superblock timestamp and fs_clean checksum
11450Sstevel@tonic-gate 	 * if marked FSBAD, we always want an erroneous
11460Sstevel@tonic-gate 	 * checksum to force repair
11470Sstevel@tonic-gate 	 */
11480Sstevel@tonic-gate 	fs->fs_time = gethrestime_sec();
11494662Sfrankho 	fs->fs_state = (fs->fs_clean != FSBAD) ?
11504662Sfrankho 	    FSOKAY - fs->fs_time : -(FSOKAY - fs->fs_time);
11510Sstevel@tonic-gate 	switch (fs->fs_clean) {
11520Sstevel@tonic-gate 	case FSCLEAN:
11530Sstevel@tonic-gate 	case FSSTABLE:
11540Sstevel@tonic-gate 		fs->fs_reclaim &= ~FS_RECLAIM;
11550Sstevel@tonic-gate 		break;
11560Sstevel@tonic-gate 	case FSACTIVE:
11570Sstevel@tonic-gate 	case FSSUSPEND:
11580Sstevel@tonic-gate 	case FSBAD:
11590Sstevel@tonic-gate 	case FSLOG:
11600Sstevel@tonic-gate 		break;
11610Sstevel@tonic-gate 	default:
11620Sstevel@tonic-gate 		fs->fs_clean = FSACTIVE;
11630Sstevel@tonic-gate 		break;
11640Sstevel@tonic-gate 	}
11650Sstevel@tonic-gate 	/*
11660Sstevel@tonic-gate 	 * reset incore only bits
11670Sstevel@tonic-gate 	 */
11680Sstevel@tonic-gate 	fs->fs_reclaim &= ~(FS_CHECKCLEAN | FS_CHECKRECLAIM);
11690Sstevel@tonic-gate 
11700Sstevel@tonic-gate 	/*
11710Sstevel@tonic-gate 	 * delta the whole superblock
11720Sstevel@tonic-gate 	 */
11730Sstevel@tonic-gate 	TRANS_DELTA(ufsvfsp, ldbtob(SBLOCK), sizeof (struct fs),
11744662Sfrankho 	    DT_SB, NULL, 0);
11750Sstevel@tonic-gate 	/*
11760Sstevel@tonic-gate 	 * retain the incore state of fs_fmod; set the ondisk state to 0
11770Sstevel@tonic-gate 	 */
11780Sstevel@tonic-gate 	sav_fs_fmod = fs->fs_fmod;
11790Sstevel@tonic-gate 	fs->fs_fmod = 0;
11800Sstevel@tonic-gate 
11810Sstevel@tonic-gate 	/*
11820Sstevel@tonic-gate 	 * Don't release the buffer after written to the disk
11830Sstevel@tonic-gate 	 */
11840Sstevel@tonic-gate 	UFS_BWRITE2(ufsvfsp, bp);
11850Sstevel@tonic-gate 	fs->fs_fmod = sav_fs_fmod;	/* reset fs_fmod's incore state */
11860Sstevel@tonic-gate }
11870Sstevel@tonic-gate 
11880Sstevel@tonic-gate /*
11890Sstevel@tonic-gate  * Returns vfs pointer if vfs still being mounted. vfs lock is held.
11900Sstevel@tonic-gate  * Otherwise, returns NULL.
11910Sstevel@tonic-gate  *
11920Sstevel@tonic-gate  * For our purposes, "still mounted" means that the file system still appears
11930Sstevel@tonic-gate  * on the list of UFS file system instances.
11940Sstevel@tonic-gate  */
11950Sstevel@tonic-gate static vfs_t *
11960Sstevel@tonic-gate still_mounted(struct check_node *checkp)
11970Sstevel@tonic-gate {
11980Sstevel@tonic-gate 	struct vfs	*vfsp;
11990Sstevel@tonic-gate 	struct ufsvfs	*ufsp;
12000Sstevel@tonic-gate 
12010Sstevel@tonic-gate 	mutex_enter(&ufsvfs_mutex);
12020Sstevel@tonic-gate 	for (ufsp = ufs_instances; ufsp != NULL; ufsp = ufsp->vfs_next) {
12030Sstevel@tonic-gate 		if (ufsp != checkp->ufsvfs)
12040Sstevel@tonic-gate 			continue;
12050Sstevel@tonic-gate 		/*
12060Sstevel@tonic-gate 		 * Tentative match:  verify it and try to lock.  (It's not at
12070Sstevel@tonic-gate 		 * all clear how the verification could fail, given that we've
12080Sstevel@tonic-gate 		 * gotten this far.  We would have had to reallocate the
12090Sstevel@tonic-gate 		 * ufsvfs struct at hand for a new incarnation; is that really
12100Sstevel@tonic-gate 		 * possible in the interval from constructing the check_node
12110Sstevel@tonic-gate 		 * to here?)
12120Sstevel@tonic-gate 		 */
12130Sstevel@tonic-gate 		vfsp = ufsp->vfs_vfs;
12140Sstevel@tonic-gate 		if (vfsp != checkp->vfsp)
12150Sstevel@tonic-gate 			continue;
12160Sstevel@tonic-gate 		if (vfsp->vfs_dev != checkp->vfs_dev)
12170Sstevel@tonic-gate 			continue;
12180Sstevel@tonic-gate 		if (vfs_lock(vfsp) != 0)
12190Sstevel@tonic-gate 			continue;
12200Sstevel@tonic-gate 
12210Sstevel@tonic-gate 		mutex_exit(&ufsvfs_mutex);
12220Sstevel@tonic-gate 		return (vfsp);
12230Sstevel@tonic-gate 	}
12240Sstevel@tonic-gate 	mutex_exit(&ufsvfs_mutex);
12250Sstevel@tonic-gate 	return (NULL);
12260Sstevel@tonic-gate }
12270Sstevel@tonic-gate 
12280Sstevel@tonic-gate int
12290Sstevel@tonic-gate ufs_si_io_done(struct buf *bp)
12300Sstevel@tonic-gate {
12310Sstevel@tonic-gate 	sema_v(&bp->b_io);
12320Sstevel@tonic-gate 	return (0);
12330Sstevel@tonic-gate }
12340Sstevel@tonic-gate 
12350Sstevel@tonic-gate #define	SI_BUFSZ roundup(sizeof (struct cg), DEV_BSIZE)
12360Sstevel@tonic-gate #define	NSIBUF 32
12370Sstevel@tonic-gate 
12380Sstevel@tonic-gate /*
12390Sstevel@tonic-gate  * ufs_construct_si()
12400Sstevel@tonic-gate  * Read each cylinder group in turn and construct the summary information
12410Sstevel@tonic-gate  */
12420Sstevel@tonic-gate static int
12430Sstevel@tonic-gate ufs_construct_si(dev_t dev, struct fs *fs, struct ufsvfs *ufsvfsp)
12440Sstevel@tonic-gate {
12450Sstevel@tonic-gate 	buf_t *bps, *bp;
12460Sstevel@tonic-gate 	char *bufs;
12470Sstevel@tonic-gate 	struct csum *sip = fs->fs_u.fs_csp;
12480Sstevel@tonic-gate 	struct cg *cgp;
12490Sstevel@tonic-gate 	int i, ncg;
12500Sstevel@tonic-gate 	int error = 0, cg = 0;
12510Sstevel@tonic-gate 
12520Sstevel@tonic-gate 	bps = kmem_alloc(NSIBUF * sizeof (buf_t), KM_SLEEP);
12530Sstevel@tonic-gate 	bufs = kmem_alloc(NSIBUF * SI_BUFSZ, KM_SLEEP);
12540Sstevel@tonic-gate 
12550Sstevel@tonic-gate 	/*
12560Sstevel@tonic-gate 	 * Initialise the buffer headers
12570Sstevel@tonic-gate 	 */
12580Sstevel@tonic-gate 	for (bp = bps, i = 0; i < NSIBUF; i++, bp++) {
12590Sstevel@tonic-gate 		bioinit(bp);
12600Sstevel@tonic-gate 		bp->b_iodone = ufs_si_io_done;
12610Sstevel@tonic-gate 		bp->b_bufsize = bp->b_bcount = SI_BUFSZ;
12620Sstevel@tonic-gate 		bp->b_flags = B_READ;
12630Sstevel@tonic-gate 		bp->b_un.b_addr = bufs + (i * SI_BUFSZ);
12640Sstevel@tonic-gate 		bp->b_edev = dev;
12650Sstevel@tonic-gate 	}
12660Sstevel@tonic-gate 
12670Sstevel@tonic-gate 	/*
12680Sstevel@tonic-gate 	 * Repeat while there are cylinder groups left to read.
12690Sstevel@tonic-gate 	 */
12700Sstevel@tonic-gate 	do {
12710Sstevel@tonic-gate 		/*
12720Sstevel@tonic-gate 		 * Issue upto NSIBUF asynchronous reads
12730Sstevel@tonic-gate 		 */
12740Sstevel@tonic-gate 		ncg = MIN(NSIBUF, (fs->fs_ncg - cg));
12750Sstevel@tonic-gate 		for (bp = bps, i = 0; i < ncg; i++, bp++) {
12760Sstevel@tonic-gate 			bp->b_blkno = (daddr_t)fsbtodb(fs, cgtod(fs, cg + i));
12770Sstevel@tonic-gate 			if (ufsvfsp->vfs_log) {
12780Sstevel@tonic-gate 				lufs_read_strategy(ufsvfsp->vfs_log, bp);
12790Sstevel@tonic-gate 			} else {
12800Sstevel@tonic-gate 				(void) bdev_strategy(bp);
12810Sstevel@tonic-gate 			}
12820Sstevel@tonic-gate 		}
12830Sstevel@tonic-gate 
12840Sstevel@tonic-gate 		/*
12850Sstevel@tonic-gate 		 * wait for each read to finish;
12860Sstevel@tonic-gate 		 * check for errors and copy the csum info
12870Sstevel@tonic-gate 		 */
12880Sstevel@tonic-gate 		for (bp = bps, i = 0; i < ncg; i++, bp++) {
12890Sstevel@tonic-gate 			sema_p(&bp->b_io);
12900Sstevel@tonic-gate 			if (!error) {
12910Sstevel@tonic-gate 				cgp = bp->b_un.b_cg;
12920Sstevel@tonic-gate 				sip[cg + i] = cgp->cg_cs;
12930Sstevel@tonic-gate 				error = geterror(bp);
12940Sstevel@tonic-gate 			}
12950Sstevel@tonic-gate 		}
12960Sstevel@tonic-gate 		if (error) {
12970Sstevel@tonic-gate 			goto err;
12980Sstevel@tonic-gate 		}
12990Sstevel@tonic-gate 		cg += ncg;
13000Sstevel@tonic-gate 	} while (cg < fs->fs_ncg);
13010Sstevel@tonic-gate 
13020Sstevel@tonic-gate err:
13030Sstevel@tonic-gate 	kmem_free(bps, NSIBUF * sizeof (buf_t));
13040Sstevel@tonic-gate 	kmem_free(bufs, NSIBUF * SI_BUFSZ);
13050Sstevel@tonic-gate 	return (error);
13060Sstevel@tonic-gate }
13070Sstevel@tonic-gate 
13080Sstevel@tonic-gate /*
13090Sstevel@tonic-gate  * ufs_getsummaryinfo
13100Sstevel@tonic-gate  */
13110Sstevel@tonic-gate int
13120Sstevel@tonic-gate ufs_getsummaryinfo(dev_t dev, struct ufsvfs *ufsvfsp, struct fs *fs)
13130Sstevel@tonic-gate {
13140Sstevel@tonic-gate 	int		i;		/* `for' loop counter */
13150Sstevel@tonic-gate 	ssize_t		size;		/* bytes of summary info to read */
13160Sstevel@tonic-gate 	daddr_t		frags;		/* frags of summary info to read */
13170Sstevel@tonic-gate 	caddr_t		sip;		/* summary info */
13180Sstevel@tonic-gate 	struct buf	*tp;		/* tmp buf */
13190Sstevel@tonic-gate 
13200Sstevel@tonic-gate 	/*
13210Sstevel@tonic-gate 	 * maintain metadata map for trans device (debug only)
13220Sstevel@tonic-gate 	 */
13230Sstevel@tonic-gate 	TRANS_MATA_SI(ufsvfsp, fs);
13240Sstevel@tonic-gate 
13250Sstevel@tonic-gate 	/*
13260Sstevel@tonic-gate 	 * Compute #frags and allocate space for summary info
13270Sstevel@tonic-gate 	 */
13280Sstevel@tonic-gate 	frags = howmany(fs->fs_cssize, fs->fs_fsize);
13290Sstevel@tonic-gate 	sip = kmem_alloc((size_t)fs->fs_cssize, KM_SLEEP);
13300Sstevel@tonic-gate 	fs->fs_u.fs_csp = (struct csum *)sip;
13310Sstevel@tonic-gate 
13320Sstevel@tonic-gate 	if (fs->fs_si == FS_SI_BAD) {
13330Sstevel@tonic-gate 		/*
13340Sstevel@tonic-gate 		 * The summary information is unknown, read it in from
13350Sstevel@tonic-gate 		 * the cylinder groups.
13360Sstevel@tonic-gate 		 */
13370Sstevel@tonic-gate 		if (TRANS_ISTRANS(ufsvfsp) && !TRANS_ISERROR(ufsvfsp) &&
13380Sstevel@tonic-gate 		    ufsvfsp->vfs_log->un_logmap) {
13390Sstevel@tonic-gate 			logmap_roll_dev(ufsvfsp->vfs_log); /* flush the log */
13400Sstevel@tonic-gate 		}
13410Sstevel@tonic-gate 		bzero(sip, (size_t)fs->fs_cssize);
13420Sstevel@tonic-gate 		if (ufs_construct_si(dev, fs, ufsvfsp)) {
13430Sstevel@tonic-gate 			kmem_free(fs->fs_u.fs_csp, fs->fs_cssize);
13440Sstevel@tonic-gate 			fs->fs_u.fs_csp = NULL;
13450Sstevel@tonic-gate 			return (EIO);
13460Sstevel@tonic-gate 		}
13470Sstevel@tonic-gate 	} else {
13480Sstevel@tonic-gate 		/* Read summary info a fs block at a time */
13490Sstevel@tonic-gate 		size = fs->fs_bsize;
13500Sstevel@tonic-gate 		for (i = 0; i < frags; i += fs->fs_frag) {
13510Sstevel@tonic-gate 			if (i + fs->fs_frag > frags)
13520Sstevel@tonic-gate 				/*
13530Sstevel@tonic-gate 				 * This happens only the last iteration, so
13540Sstevel@tonic-gate 				 * don't worry about size being reset
13550Sstevel@tonic-gate 				 */
13560Sstevel@tonic-gate 				size = (frags - i) * fs->fs_fsize;
13570Sstevel@tonic-gate 			tp = UFS_BREAD(ufsvfsp, dev,
13580Sstevel@tonic-gate 			    (daddr_t)fsbtodb(fs, fs->fs_csaddr+i), size);
13590Sstevel@tonic-gate 			tp->b_flags |= B_STALE | B_AGE;
13600Sstevel@tonic-gate 			if (tp->b_flags & B_ERROR) {
13610Sstevel@tonic-gate 				kmem_free(fs->fs_u.fs_csp, fs->fs_cssize);
13620Sstevel@tonic-gate 				fs->fs_u.fs_csp = NULL;
13630Sstevel@tonic-gate 				brelse(tp);
13640Sstevel@tonic-gate 				return (EIO);
13650Sstevel@tonic-gate 			}
13660Sstevel@tonic-gate 			bcopy(tp->b_un.b_addr, sip, size);
13670Sstevel@tonic-gate 			sip += size;
13680Sstevel@tonic-gate 			brelse(tp);
13690Sstevel@tonic-gate 		}
13700Sstevel@tonic-gate 	}
13710Sstevel@tonic-gate 	bzero((caddr_t)&fs->fs_cstotal, sizeof (fs->fs_cstotal));
13720Sstevel@tonic-gate 	for (i = 0; i < fs->fs_ncg; ++i) {
13730Sstevel@tonic-gate 		fs->fs_cstotal.cs_ndir += fs->fs_cs(fs, i).cs_ndir;
13740Sstevel@tonic-gate 		fs->fs_cstotal.cs_nbfree += fs->fs_cs(fs, i).cs_nbfree;
13750Sstevel@tonic-gate 		fs->fs_cstotal.cs_nifree += fs->fs_cs(fs, i).cs_nifree;
13760Sstevel@tonic-gate 		fs->fs_cstotal.cs_nffree += fs->fs_cs(fs, i).cs_nffree;
13770Sstevel@tonic-gate 	}
13780Sstevel@tonic-gate 	return (0);
13790Sstevel@tonic-gate }
13800Sstevel@tonic-gate 
13810Sstevel@tonic-gate /*
13820Sstevel@tonic-gate  * ufs_putsummaryinfo() stores all the cylinder group summary information
13830Sstevel@tonic-gate  * This is only used when logging, but the file system may not
13840Sstevel@tonic-gate  * be logging at the time, eg a read-only mount to flush the log
13850Sstevel@tonic-gate  * may push the summary info out.
13860Sstevel@tonic-gate  */
13870Sstevel@tonic-gate int
13880Sstevel@tonic-gate ufs_putsummaryinfo(dev_t dev, struct ufsvfs *ufsvfsp, struct fs *fs)
13890Sstevel@tonic-gate {
13900Sstevel@tonic-gate 	struct buf	b, *bp;		/* tmp buf */
13910Sstevel@tonic-gate 	caddr_t		sip;		/* summary info */
13920Sstevel@tonic-gate 	ssize_t		size;		/* bytes of summary info to write */
13930Sstevel@tonic-gate 	daddr_t		frags;		/* frags of summary info to write */
13940Sstevel@tonic-gate 	int		i;		/* `for' loop counter */
13950Sstevel@tonic-gate 	int		error;		/* error */
13960Sstevel@tonic-gate 
13970Sstevel@tonic-gate 	if (TRANS_ISERROR(ufsvfsp)) {
13980Sstevel@tonic-gate 		return (EIO);
13990Sstevel@tonic-gate 	}
14000Sstevel@tonic-gate 
14010Sstevel@tonic-gate 	if ((fs->fs_si != FS_SI_BAD) || !ufsvfsp->vfs_nolog_si) {
14020Sstevel@tonic-gate 		return (0);
14030Sstevel@tonic-gate 	}
14040Sstevel@tonic-gate 
14050Sstevel@tonic-gate 	bp = &b;
14060Sstevel@tonic-gate 	bioinit(bp);
14070Sstevel@tonic-gate 	bp->b_iodone = ufs_si_io_done;
14080Sstevel@tonic-gate 	bp->b_bufsize = size = fs->fs_bsize;
14090Sstevel@tonic-gate 	bp->b_flags = B_WRITE;
14100Sstevel@tonic-gate 	bp->b_un.b_addr = kmem_alloc(size, KM_SLEEP);
14110Sstevel@tonic-gate 	bp->b_edev = dev;
14120Sstevel@tonic-gate 	frags = howmany(fs->fs_cssize, fs->fs_fsize);
14130Sstevel@tonic-gate 	sip = (caddr_t)fs->fs_u.fs_csp;
14140Sstevel@tonic-gate 
14150Sstevel@tonic-gate 	/* Write summary info one fs block at a time */
14160Sstevel@tonic-gate 	for (error = 0, i = 0; (i < frags) && (error == 0); i += fs->fs_frag) {
14170Sstevel@tonic-gate 		if (i + fs->fs_frag > frags) {
14180Sstevel@tonic-gate 			/*
14190Sstevel@tonic-gate 			 * This happens only the last iteration, so
14200Sstevel@tonic-gate 			 * don't worry about size being reset
14210Sstevel@tonic-gate 			 */
14220Sstevel@tonic-gate 			size = (frags - i) * fs->fs_fsize;
14230Sstevel@tonic-gate 		}
14240Sstevel@tonic-gate 		bcopy(sip, bp->b_un.b_addr, size);
14250Sstevel@tonic-gate 		bp->b_blkno = (daddr_t)fsbtodb(fs, fs->fs_csaddr+i);
14260Sstevel@tonic-gate 		bp->b_bcount = size;
14270Sstevel@tonic-gate 		(void) bdev_strategy(bp);
14280Sstevel@tonic-gate 		sema_p(&bp->b_io); /* wait for write to complete */
14290Sstevel@tonic-gate 		error = geterror(bp);
14300Sstevel@tonic-gate 		sip += size;
14310Sstevel@tonic-gate 	}
14320Sstevel@tonic-gate 	kmem_free(bp->b_un.b_addr, fs->fs_bsize);
14330Sstevel@tonic-gate 	if (!error) {
14340Sstevel@tonic-gate 		fs->fs_si = FS_SI_OK;
14350Sstevel@tonic-gate 	}
14360Sstevel@tonic-gate 	return (error);
14370Sstevel@tonic-gate }
14380Sstevel@tonic-gate 
14390Sstevel@tonic-gate /*
14400Sstevel@tonic-gate  * Decide whether it is okay to remove within a sticky directory.
14410Sstevel@tonic-gate  * Two conditions need to be met:  write access to the directory
14420Sstevel@tonic-gate  * is needed.  In sticky directories, write access is not sufficient;
14430Sstevel@tonic-gate  * you can remove entries from a directory only if you own the directory,
14440Sstevel@tonic-gate  * if you are privileged, if you own the entry or if the entry is
14450Sstevel@tonic-gate  * a plain file and you have write access to that file.
14460Sstevel@tonic-gate  * Function returns 0 if remove access is granted.
14470Sstevel@tonic-gate  */
14480Sstevel@tonic-gate int
14490Sstevel@tonic-gate ufs_sticky_remove_access(struct inode *dp, struct inode *ip, struct cred *cr)
14500Sstevel@tonic-gate {
14510Sstevel@tonic-gate 	uid_t uid;
14520Sstevel@tonic-gate 	if ((dp->i_mode & ISVTX) &&
14530Sstevel@tonic-gate 	    (uid = crgetuid(cr)) != dp->i_uid &&
14540Sstevel@tonic-gate 	    uid != ip->i_uid &&
14550Sstevel@tonic-gate 	    ((ip->i_mode & IFMT) != IFREG ||
14560Sstevel@tonic-gate 	    ufs_iaccess(ip, IWRITE, cr) != 0))
14570Sstevel@tonic-gate 		return (secpolicy_vnode_remove(cr));
14580Sstevel@tonic-gate 
14590Sstevel@tonic-gate 	return (0);
14600Sstevel@tonic-gate }
14610Sstevel@tonic-gate #endif	/* _KERNEL */
14620Sstevel@tonic-gate 
14630Sstevel@tonic-gate extern	int around[9];
14640Sstevel@tonic-gate extern	int inside[9];
14650Sstevel@tonic-gate extern	uchar_t *fragtbl[];
14660Sstevel@tonic-gate 
14670Sstevel@tonic-gate /*
14680Sstevel@tonic-gate  * Update the frsum fields to reflect addition or deletion
14690Sstevel@tonic-gate  * of some frags.
14700Sstevel@tonic-gate  */
14710Sstevel@tonic-gate void
14720Sstevel@tonic-gate fragacct(struct fs *fs, int fragmap, int32_t *fraglist, int cnt)
14730Sstevel@tonic-gate {
14740Sstevel@tonic-gate 	int inblk;
14750Sstevel@tonic-gate 	int field, subfield;
14760Sstevel@tonic-gate 	int siz, pos;
14770Sstevel@tonic-gate 
14780Sstevel@tonic-gate 	/*
14790Sstevel@tonic-gate 	 * ufsvfsp->vfs_lock is held when calling this.
14800Sstevel@tonic-gate 	 */
14810Sstevel@tonic-gate 	inblk = (int)(fragtbl[fs->fs_frag][fragmap]) << 1;
14820Sstevel@tonic-gate 	fragmap <<= 1;
14830Sstevel@tonic-gate 	for (siz = 1; siz < fs->fs_frag; siz++) {
14840Sstevel@tonic-gate 		if ((inblk & (1 << (siz + (fs->fs_frag % NBBY)))) == 0)
14850Sstevel@tonic-gate 			continue;
14860Sstevel@tonic-gate 		field = around[siz];
14870Sstevel@tonic-gate 		subfield = inside[siz];
14880Sstevel@tonic-gate 		for (pos = siz; pos <= fs->fs_frag; pos++) {
14890Sstevel@tonic-gate 			if ((fragmap & field) == subfield) {
14900Sstevel@tonic-gate 				fraglist[siz] += cnt;
14910Sstevel@tonic-gate 				ASSERT(fraglist[siz] >= 0);
14920Sstevel@tonic-gate 				pos += siz;
14930Sstevel@tonic-gate 				field <<= siz;
14940Sstevel@tonic-gate 				subfield <<= siz;
14950Sstevel@tonic-gate 			}
14960Sstevel@tonic-gate 			field <<= 1;
14970Sstevel@tonic-gate 			subfield <<= 1;
14980Sstevel@tonic-gate 		}
14990Sstevel@tonic-gate 	}
15000Sstevel@tonic-gate }
15010Sstevel@tonic-gate 
15020Sstevel@tonic-gate /*
15030Sstevel@tonic-gate  * Block operations
15040Sstevel@tonic-gate  */
15050Sstevel@tonic-gate 
15060Sstevel@tonic-gate /*
15070Sstevel@tonic-gate  * Check if a block is available
15080Sstevel@tonic-gate  */
15090Sstevel@tonic-gate int
15100Sstevel@tonic-gate isblock(struct fs *fs, uchar_t *cp, daddr_t h)
15110Sstevel@tonic-gate {
15120Sstevel@tonic-gate 	uchar_t mask;
15130Sstevel@tonic-gate 
15140Sstevel@tonic-gate 	ASSERT(fs->fs_frag == 8 || fs->fs_frag == 4 || fs->fs_frag == 2 || \
15154662Sfrankho 	    fs->fs_frag == 1);
15160Sstevel@tonic-gate 	/*
15170Sstevel@tonic-gate 	 * ufsvfsp->vfs_lock is held when calling this.
15180Sstevel@tonic-gate 	 */
15190Sstevel@tonic-gate 	switch ((int)fs->fs_frag) {
15200Sstevel@tonic-gate 	case 8:
15210Sstevel@tonic-gate 		return (cp[h] == 0xff);
15220Sstevel@tonic-gate 	case 4:
15230Sstevel@tonic-gate 		mask = 0x0f << ((h & 0x1) << 2);
15240Sstevel@tonic-gate 		return ((cp[h >> 1] & mask) == mask);
15250Sstevel@tonic-gate 	case 2:
15260Sstevel@tonic-gate 		mask = 0x03 << ((h & 0x3) << 1);
15270Sstevel@tonic-gate 		return ((cp[h >> 2] & mask) == mask);
15280Sstevel@tonic-gate 	case 1:
15290Sstevel@tonic-gate 		mask = 0x01 << (h & 0x7);
15300Sstevel@tonic-gate 		return ((cp[h >> 3] & mask) == mask);
15310Sstevel@tonic-gate 	default:
15320Sstevel@tonic-gate #ifndef _KERNEL
15330Sstevel@tonic-gate 		cmn_err(CE_PANIC, "isblock: illegal fs->fs_frag value (%d)",
15344662Sfrankho 		    fs->fs_frag);
15350Sstevel@tonic-gate #endif /* _KERNEL */
15360Sstevel@tonic-gate 		return (0);
15370Sstevel@tonic-gate 	}
15380Sstevel@tonic-gate }
15390Sstevel@tonic-gate 
15400Sstevel@tonic-gate /*
15410Sstevel@tonic-gate  * Take a block out of the map
15420Sstevel@tonic-gate  */
15430Sstevel@tonic-gate void
15440Sstevel@tonic-gate clrblock(struct fs *fs, uchar_t *cp, daddr_t h)
15450Sstevel@tonic-gate {
15460Sstevel@tonic-gate 	ASSERT(fs->fs_frag == 8 || fs->fs_frag == 4 || fs->fs_frag == 2 || \
15474662Sfrankho 	    fs->fs_frag == 1);
15480Sstevel@tonic-gate 	/*
15490Sstevel@tonic-gate 	 * ufsvfsp->vfs_lock is held when calling this.
15500Sstevel@tonic-gate 	 */
15510Sstevel@tonic-gate 	switch ((int)fs->fs_frag) {
15520Sstevel@tonic-gate 	case 8:
15530Sstevel@tonic-gate 		cp[h] = 0;
15540Sstevel@tonic-gate 		return;
15550Sstevel@tonic-gate 	case 4:
15560Sstevel@tonic-gate 		cp[h >> 1] &= ~(0x0f << ((h & 0x1) << 2));
15570Sstevel@tonic-gate 		return;
15580Sstevel@tonic-gate 	case 2:
15590Sstevel@tonic-gate 		cp[h >> 2] &= ~(0x03 << ((h & 0x3) << 1));
15600Sstevel@tonic-gate 		return;
15610Sstevel@tonic-gate 	case 1:
15620Sstevel@tonic-gate 		cp[h >> 3] &= ~(0x01 << (h & 0x7));
15630Sstevel@tonic-gate 		return;
15640Sstevel@tonic-gate 	default:
15650Sstevel@tonic-gate #ifndef _KERNEL
15660Sstevel@tonic-gate 		cmn_err(CE_PANIC, "clrblock: illegal fs->fs_frag value (%d)",
15674662Sfrankho 		    fs->fs_frag);
15680Sstevel@tonic-gate #endif /* _KERNEL */
15690Sstevel@tonic-gate 		return;
15700Sstevel@tonic-gate 	}
15710Sstevel@tonic-gate }
15720Sstevel@tonic-gate 
15730Sstevel@tonic-gate /*
15740Sstevel@tonic-gate  * Is block allocated?
15750Sstevel@tonic-gate  */
15760Sstevel@tonic-gate int
15770Sstevel@tonic-gate isclrblock(struct fs *fs, uchar_t *cp, daddr_t h)
15780Sstevel@tonic-gate {
15790Sstevel@tonic-gate 	uchar_t	mask;
15800Sstevel@tonic-gate 	int	frag;
15810Sstevel@tonic-gate 	/*
15820Sstevel@tonic-gate 	 * ufsvfsp->vfs_lock is held when calling this.
15830Sstevel@tonic-gate 	 */
15840Sstevel@tonic-gate 	frag = fs->fs_frag;
15850Sstevel@tonic-gate 	ASSERT(frag == 8 || frag == 4 || frag == 2 || frag == 1);
15860Sstevel@tonic-gate 	switch (frag) {
15870Sstevel@tonic-gate 	case 8:
15880Sstevel@tonic-gate 		return (cp[h] == 0);
15890Sstevel@tonic-gate 	case 4:
15900Sstevel@tonic-gate 		mask = ~(0x0f << ((h & 0x1) << 2));
15910Sstevel@tonic-gate 		return (cp[h >> 1] == (cp[h >> 1] & mask));
15920Sstevel@tonic-gate 	case 2:
15930Sstevel@tonic-gate 		mask =	~(0x03 << ((h & 0x3) << 1));
15940Sstevel@tonic-gate 		return (cp[h >> 2] == (cp[h >> 2] & mask));
15950Sstevel@tonic-gate 	case 1:
15960Sstevel@tonic-gate 		mask = ~(0x01 << (h & 0x7));
15970Sstevel@tonic-gate 		return (cp[h >> 3] == (cp[h >> 3] & mask));
15980Sstevel@tonic-gate 	default:
15990Sstevel@tonic-gate #ifndef _KERNEL
16000Sstevel@tonic-gate 		cmn_err(CE_PANIC, "isclrblock: illegal fs->fs_frag value (%d)",
16014662Sfrankho 		    fs->fs_frag);
16020Sstevel@tonic-gate #endif /* _KERNEL */
16030Sstevel@tonic-gate 		break;
16040Sstevel@tonic-gate 	}
16050Sstevel@tonic-gate 	return (0);
16060Sstevel@tonic-gate }
16070Sstevel@tonic-gate 
16080Sstevel@tonic-gate /*
16090Sstevel@tonic-gate  * Put a block into the map
16100Sstevel@tonic-gate  */
16110Sstevel@tonic-gate void
16120Sstevel@tonic-gate setblock(struct fs *fs, uchar_t *cp, daddr_t h)
16130Sstevel@tonic-gate {
16140Sstevel@tonic-gate 	ASSERT(fs->fs_frag == 8 || fs->fs_frag == 4 || fs->fs_frag == 2 || \
16154662Sfrankho 	    fs->fs_frag == 1);
16160Sstevel@tonic-gate 	/*
16170Sstevel@tonic-gate 	 * ufsvfsp->vfs_lock is held when calling this.
16180Sstevel@tonic-gate 	 */
16190Sstevel@tonic-gate 	switch ((int)fs->fs_frag) {
16200Sstevel@tonic-gate 	case 8:
16210Sstevel@tonic-gate 		cp[h] = 0xff;
16220Sstevel@tonic-gate 		return;
16230Sstevel@tonic-gate 	case 4:
16240Sstevel@tonic-gate 		cp[h >> 1] |= (0x0f << ((h & 0x1) << 2));
16250Sstevel@tonic-gate 		return;
16260Sstevel@tonic-gate 	case 2:
16270Sstevel@tonic-gate 		cp[h >> 2] |= (0x03 << ((h & 0x3) << 1));
16280Sstevel@tonic-gate 		return;
16290Sstevel@tonic-gate 	case 1:
16300Sstevel@tonic-gate 		cp[h >> 3] |= (0x01 << (h & 0x7));
16310Sstevel@tonic-gate 		return;
16320Sstevel@tonic-gate 	default:
16330Sstevel@tonic-gate #ifndef _KERNEL
16340Sstevel@tonic-gate 		cmn_err(CE_PANIC, "setblock: illegal fs->fs_frag value (%d)",
16354662Sfrankho 		    fs->fs_frag);
16360Sstevel@tonic-gate #endif /* _KERNEL */
16370Sstevel@tonic-gate 		return;
16380Sstevel@tonic-gate 	}
16390Sstevel@tonic-gate }
16400Sstevel@tonic-gate 
16410Sstevel@tonic-gate int
16420Sstevel@tonic-gate skpc(char c, uint_t len, char *cp)
16430Sstevel@tonic-gate {
16440Sstevel@tonic-gate 	if (len == 0)
16450Sstevel@tonic-gate 		return (0);
16460Sstevel@tonic-gate 	while (*cp++ == c && --len)
16470Sstevel@tonic-gate 		;
16480Sstevel@tonic-gate 	return (len);
16490Sstevel@tonic-gate }
1650