xref: /onnv-gate/usr/src/uts/common/fs/ufs/ufs_trans.c (revision 4662:9c48274ded8b)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*4662Sfrankho  * Common Development and Distribution License (the "License").
6*4662Sfrankho  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
22*4662Sfrankho  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
270Sstevel@tonic-gate /* All Rights Reserved */
280Sstevel@tonic-gate 
290Sstevel@tonic-gate /*
300Sstevel@tonic-gate  * Portions of this source code were derived from Berkeley 4.3 BSD
310Sstevel@tonic-gate  * under license from the Regents of the University of California.
320Sstevel@tonic-gate  */
330Sstevel@tonic-gate 
340Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
350Sstevel@tonic-gate 
360Sstevel@tonic-gate #include <sys/sysmacros.h>
370Sstevel@tonic-gate #include <sys/param.h>
380Sstevel@tonic-gate #include <sys/types.h>
390Sstevel@tonic-gate #include <sys/systm.h>
400Sstevel@tonic-gate #include <sys/t_lock.h>
410Sstevel@tonic-gate #include <sys/uio.h>
420Sstevel@tonic-gate #include <sys/kmem.h>
430Sstevel@tonic-gate #include <sys/thread.h>
440Sstevel@tonic-gate #include <sys/vfs.h>
450Sstevel@tonic-gate #include <sys/errno.h>
460Sstevel@tonic-gate #include <sys/buf.h>
470Sstevel@tonic-gate #include <sys/vnode.h>
480Sstevel@tonic-gate #include <sys/fs/ufs_trans.h>
490Sstevel@tonic-gate #include <sys/fs/ufs_inode.h>
500Sstevel@tonic-gate #include <sys/fs/ufs_fs.h>
510Sstevel@tonic-gate #include <sys/fs/ufs_fsdir.h>
520Sstevel@tonic-gate #include <sys/fs/ufs_quota.h>
530Sstevel@tonic-gate #include <sys/fs/ufs_panic.h>
540Sstevel@tonic-gate #include <sys/fs/ufs_bio.h>
550Sstevel@tonic-gate #include <sys/fs/ufs_log.h>
560Sstevel@tonic-gate #include <sys/cmn_err.h>
570Sstevel@tonic-gate #include <sys/file.h>
580Sstevel@tonic-gate #include <sys/debug.h>
590Sstevel@tonic-gate 
600Sstevel@tonic-gate 
610Sstevel@tonic-gate extern kmutex_t ufsvfs_mutex;
620Sstevel@tonic-gate extern struct ufsvfs *ufs_instances;
630Sstevel@tonic-gate 
640Sstevel@tonic-gate /*
650Sstevel@tonic-gate  * hlock any file systems w/errored logs
660Sstevel@tonic-gate  */
670Sstevel@tonic-gate int
680Sstevel@tonic-gate ufs_trans_hlock()
690Sstevel@tonic-gate {
700Sstevel@tonic-gate 	struct ufsvfs	*ufsvfsp;
710Sstevel@tonic-gate 	struct lockfs	lockfs;
720Sstevel@tonic-gate 	int		error;
730Sstevel@tonic-gate 	int		retry	= 0;
740Sstevel@tonic-gate 
750Sstevel@tonic-gate 	/*
760Sstevel@tonic-gate 	 * find fs's that paniced or have errored logging devices
770Sstevel@tonic-gate 	 */
780Sstevel@tonic-gate 	mutex_enter(&ufsvfs_mutex);
790Sstevel@tonic-gate 	for (ufsvfsp = ufs_instances; ufsvfsp; ufsvfsp = ufsvfsp->vfs_next) {
800Sstevel@tonic-gate 		/*
810Sstevel@tonic-gate 		 * not mounted; continue
820Sstevel@tonic-gate 		 */
830Sstevel@tonic-gate 		if ((ufsvfsp->vfs_vfs == NULL) ||
840Sstevel@tonic-gate 		    (ufsvfsp->vfs_validfs == UT_UNMOUNTED))
850Sstevel@tonic-gate 			continue;
860Sstevel@tonic-gate 		/*
870Sstevel@tonic-gate 		 * disallow unmounts (hlock occurs below)
880Sstevel@tonic-gate 		 */
890Sstevel@tonic-gate 		if (TRANS_ISERROR(ufsvfsp))
900Sstevel@tonic-gate 			ufsvfsp->vfs_validfs = UT_HLOCKING;
910Sstevel@tonic-gate 	}
920Sstevel@tonic-gate 	mutex_exit(&ufsvfs_mutex);
930Sstevel@tonic-gate 
940Sstevel@tonic-gate 	/*
950Sstevel@tonic-gate 	 * hlock the fs's that paniced or have errored logging devices
960Sstevel@tonic-gate 	 */
970Sstevel@tonic-gate again:
980Sstevel@tonic-gate 	mutex_enter(&ufsvfs_mutex);
990Sstevel@tonic-gate 	for (ufsvfsp = ufs_instances; ufsvfsp; ufsvfsp = ufsvfsp->vfs_next)
1000Sstevel@tonic-gate 		if (ufsvfsp->vfs_validfs == UT_HLOCKING)
1010Sstevel@tonic-gate 			break;
1020Sstevel@tonic-gate 	mutex_exit(&ufsvfs_mutex);
1030Sstevel@tonic-gate 	if (ufsvfsp == NULL)
1040Sstevel@tonic-gate 		return (retry);
1050Sstevel@tonic-gate 	/*
1060Sstevel@tonic-gate 	 * hlock the file system
1070Sstevel@tonic-gate 	 */
1080Sstevel@tonic-gate 	(void) ufs_fiolfss(ufsvfsp->vfs_root, &lockfs);
1090Sstevel@tonic-gate 	if (!LOCKFS_IS_ELOCK(&lockfs)) {
1100Sstevel@tonic-gate 		lockfs.lf_lock = LOCKFS_HLOCK;
1110Sstevel@tonic-gate 		lockfs.lf_flags = 0;
1120Sstevel@tonic-gate 		lockfs.lf_comlen = 0;
1130Sstevel@tonic-gate 		lockfs.lf_comment = NULL;
1140Sstevel@tonic-gate 		error = ufs_fiolfs(ufsvfsp->vfs_root, &lockfs, 0);
1150Sstevel@tonic-gate 		/*
1160Sstevel@tonic-gate 		 * retry after awhile; another app currently doing lockfs
1170Sstevel@tonic-gate 		 */
1180Sstevel@tonic-gate 		if (error == EBUSY || error == EINVAL)
1190Sstevel@tonic-gate 			retry = 1;
1200Sstevel@tonic-gate 	} else {
1210Sstevel@tonic-gate 		if (ufsfx_get_failure_qlen() > 0) {
1220Sstevel@tonic-gate 			if (mutex_tryenter(&ufs_fix.uq_mutex)) {
1230Sstevel@tonic-gate 				ufs_fix.uq_lowat = ufs_fix.uq_ne;
1240Sstevel@tonic-gate 				cv_broadcast(&ufs_fix.uq_cv);
1250Sstevel@tonic-gate 				mutex_exit(&ufs_fix.uq_mutex);
1260Sstevel@tonic-gate 			}
1270Sstevel@tonic-gate 		}
1280Sstevel@tonic-gate 		retry = 1;
1290Sstevel@tonic-gate 	}
1300Sstevel@tonic-gate 
1310Sstevel@tonic-gate 	/*
1320Sstevel@tonic-gate 	 * allow unmounts
1330Sstevel@tonic-gate 	 */
1340Sstevel@tonic-gate 	ufsvfsp->vfs_validfs = UT_MOUNTED;
1350Sstevel@tonic-gate 	goto again;
1360Sstevel@tonic-gate }
1370Sstevel@tonic-gate 
1380Sstevel@tonic-gate /*ARGSUSED*/
1390Sstevel@tonic-gate void
1400Sstevel@tonic-gate ufs_trans_onerror()
1410Sstevel@tonic-gate {
1420Sstevel@tonic-gate 	mutex_enter(&ufs_hlock.uq_mutex);
1430Sstevel@tonic-gate 	ufs_hlock.uq_ne = ufs_hlock.uq_lowat;
1440Sstevel@tonic-gate 	cv_broadcast(&ufs_hlock.uq_cv);
1450Sstevel@tonic-gate 	mutex_exit(&ufs_hlock.uq_mutex);
1460Sstevel@tonic-gate }
1470Sstevel@tonic-gate 
1480Sstevel@tonic-gate void
1490Sstevel@tonic-gate ufs_trans_sbupdate(struct ufsvfs *ufsvfsp, struct vfs *vfsp, top_t topid)
1500Sstevel@tonic-gate {
1510Sstevel@tonic-gate 	if (curthread->t_flag & T_DONTBLOCK) {
1520Sstevel@tonic-gate 		sbupdate(vfsp);
1530Sstevel@tonic-gate 		return;
1540Sstevel@tonic-gate 	} else {
1550Sstevel@tonic-gate 
1560Sstevel@tonic-gate 		if (panicstr && TRANS_ISTRANS(ufsvfsp))
1570Sstevel@tonic-gate 			return;
1580Sstevel@tonic-gate 
1590Sstevel@tonic-gate 		curthread->t_flag |= T_DONTBLOCK;
1600Sstevel@tonic-gate 		TRANS_BEGIN_ASYNC(ufsvfsp, topid, TOP_SBUPDATE_SIZE);
1610Sstevel@tonic-gate 		sbupdate(vfsp);
1620Sstevel@tonic-gate 		TRANS_END_ASYNC(ufsvfsp, topid, TOP_SBUPDATE_SIZE);
1630Sstevel@tonic-gate 		curthread->t_flag &= ~T_DONTBLOCK;
1640Sstevel@tonic-gate 	}
1650Sstevel@tonic-gate }
1660Sstevel@tonic-gate 
1670Sstevel@tonic-gate void
1680Sstevel@tonic-gate ufs_trans_iupdat(struct inode *ip, int waitfor)
1690Sstevel@tonic-gate {
1700Sstevel@tonic-gate 	struct ufsvfs	*ufsvfsp;
1710Sstevel@tonic-gate 
1720Sstevel@tonic-gate 	if (curthread->t_flag & T_DONTBLOCK) {
1730Sstevel@tonic-gate 		rw_enter(&ip->i_contents, RW_READER);
1740Sstevel@tonic-gate 		ufs_iupdat(ip, waitfor);
1750Sstevel@tonic-gate 		rw_exit(&ip->i_contents);
1760Sstevel@tonic-gate 		return;
1770Sstevel@tonic-gate 	} else {
1780Sstevel@tonic-gate 		ufsvfsp = ip->i_ufsvfs;
1790Sstevel@tonic-gate 
1800Sstevel@tonic-gate 		if (panicstr && TRANS_ISTRANS(ufsvfsp))
1810Sstevel@tonic-gate 			return;
1820Sstevel@tonic-gate 
1830Sstevel@tonic-gate 		curthread->t_flag |= T_DONTBLOCK;
1840Sstevel@tonic-gate 		TRANS_BEGIN_ASYNC(ufsvfsp, TOP_IUPDAT, TOP_IUPDAT_SIZE(ip));
1850Sstevel@tonic-gate 		rw_enter(&ip->i_contents, RW_READER);
1860Sstevel@tonic-gate 		ufs_iupdat(ip, waitfor);
1870Sstevel@tonic-gate 		rw_exit(&ip->i_contents);
1880Sstevel@tonic-gate 		TRANS_END_ASYNC(ufsvfsp, TOP_IUPDAT, TOP_IUPDAT_SIZE(ip));
1890Sstevel@tonic-gate 		curthread->t_flag &= ~T_DONTBLOCK;
1900Sstevel@tonic-gate 	}
1910Sstevel@tonic-gate }
1920Sstevel@tonic-gate 
1930Sstevel@tonic-gate void
1940Sstevel@tonic-gate ufs_trans_sbwrite(struct ufsvfs *ufsvfsp, top_t topid)
1950Sstevel@tonic-gate {
1960Sstevel@tonic-gate 	if (curthread->t_flag & T_DONTBLOCK) {
1970Sstevel@tonic-gate 		mutex_enter(&ufsvfsp->vfs_lock);
1980Sstevel@tonic-gate 		ufs_sbwrite(ufsvfsp);
1990Sstevel@tonic-gate 		mutex_exit(&ufsvfsp->vfs_lock);
2000Sstevel@tonic-gate 		return;
2010Sstevel@tonic-gate 	} else {
2020Sstevel@tonic-gate 
2030Sstevel@tonic-gate 		if (panicstr && TRANS_ISTRANS(ufsvfsp))
2040Sstevel@tonic-gate 			return;
2050Sstevel@tonic-gate 
2060Sstevel@tonic-gate 		curthread->t_flag |= T_DONTBLOCK;
2070Sstevel@tonic-gate 		TRANS_BEGIN_ASYNC(ufsvfsp, topid, TOP_SBWRITE_SIZE);
2080Sstevel@tonic-gate 		mutex_enter(&ufsvfsp->vfs_lock);
2090Sstevel@tonic-gate 		ufs_sbwrite(ufsvfsp);
2100Sstevel@tonic-gate 		mutex_exit(&ufsvfsp->vfs_lock);
2110Sstevel@tonic-gate 		TRANS_END_ASYNC(ufsvfsp, topid, TOP_SBWRITE_SIZE);
2120Sstevel@tonic-gate 		curthread->t_flag &= ~T_DONTBLOCK;
2130Sstevel@tonic-gate 	}
2140Sstevel@tonic-gate }
2150Sstevel@tonic-gate 
2160Sstevel@tonic-gate /*ARGSUSED*/
2170Sstevel@tonic-gate int
2180Sstevel@tonic-gate ufs_trans_push_si(ufsvfs_t *ufsvfsp, delta_t dtyp, int ignore)
2190Sstevel@tonic-gate {
2200Sstevel@tonic-gate 	struct fs	*fs;
2210Sstevel@tonic-gate 
2220Sstevel@tonic-gate 	fs = ufsvfsp->vfs_fs;
2230Sstevel@tonic-gate 	mutex_enter(&ufsvfsp->vfs_lock);
2240Sstevel@tonic-gate 	TRANS_LOG(ufsvfsp, (char *)fs->fs_u.fs_csp,
225*4662Sfrankho 	    ldbtob(fsbtodb(fs, fs->fs_csaddr)), fs->fs_cssize,
226*4662Sfrankho 	    (caddr_t)fs->fs_u.fs_csp, fs->fs_cssize);
2270Sstevel@tonic-gate 	mutex_exit(&ufsvfsp->vfs_lock);
2280Sstevel@tonic-gate 	return (0);
2290Sstevel@tonic-gate }
2300Sstevel@tonic-gate 
2310Sstevel@tonic-gate /*ARGSUSED*/
2320Sstevel@tonic-gate int
2330Sstevel@tonic-gate ufs_trans_push_buf(ufsvfs_t *ufsvfsp, delta_t dtyp, daddr_t bno)
2340Sstevel@tonic-gate {
2350Sstevel@tonic-gate 	struct buf	*bp;
2360Sstevel@tonic-gate 
2370Sstevel@tonic-gate 	bp = (struct buf *)UFS_GETBLK(ufsvfsp, ufsvfsp->vfs_dev, bno, 1);
2380Sstevel@tonic-gate 	if (bp == NULL)
2390Sstevel@tonic-gate 		return (ENOENT);
2400Sstevel@tonic-gate 
2410Sstevel@tonic-gate 	if (bp->b_flags & B_DELWRI) {
2420Sstevel@tonic-gate 		/*
2430Sstevel@tonic-gate 		 * Do not use brwrite() here since the buffer is already
2440Sstevel@tonic-gate 		 * marked for retry or not by the code that called
2450Sstevel@tonic-gate 		 * TRANS_BUF().
2460Sstevel@tonic-gate 		 */
2470Sstevel@tonic-gate 		UFS_BWRITE(ufsvfsp, bp);
2480Sstevel@tonic-gate 		return (0);
2490Sstevel@tonic-gate 	}
2500Sstevel@tonic-gate 	/*
2510Sstevel@tonic-gate 	 * If we did not find the real buf for this block above then
2520Sstevel@tonic-gate 	 * clear the dev so the buf won't be found by mistake
2530Sstevel@tonic-gate 	 * for this block later.  We had to allocate at least a 1 byte
2540Sstevel@tonic-gate 	 * buffer to keep brelse happy.
2550Sstevel@tonic-gate 	 */
2560Sstevel@tonic-gate 	if (bp->b_bufsize == 1) {
2570Sstevel@tonic-gate 		bp->b_dev = (o_dev_t)NODEV;
2580Sstevel@tonic-gate 		bp->b_edev = NODEV;
2590Sstevel@tonic-gate 		bp->b_flags = 0;
2600Sstevel@tonic-gate 	}
2610Sstevel@tonic-gate 	brelse(bp);
2620Sstevel@tonic-gate 	return (ENOENT);
2630Sstevel@tonic-gate }
2640Sstevel@tonic-gate 
2650Sstevel@tonic-gate /*ARGSUSED*/
2660Sstevel@tonic-gate int
2670Sstevel@tonic-gate ufs_trans_push_inode(ufsvfs_t *ufsvfsp, delta_t dtyp, ino_t ino)
2680Sstevel@tonic-gate {
2690Sstevel@tonic-gate 	int		error;
2700Sstevel@tonic-gate 	struct inode	*ip;
2710Sstevel@tonic-gate 
2720Sstevel@tonic-gate 	/*
2730Sstevel@tonic-gate 	 * Grab the quota lock (if the file system has not been forcibly
2740Sstevel@tonic-gate 	 * unmounted).
2750Sstevel@tonic-gate 	 */
2760Sstevel@tonic-gate 	if (ufsvfsp)
2770Sstevel@tonic-gate 		rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
2780Sstevel@tonic-gate 
2790Sstevel@tonic-gate 	error = ufs_iget(ufsvfsp->vfs_vfs, ino, &ip, kcred);
2800Sstevel@tonic-gate 
2810Sstevel@tonic-gate 	if (ufsvfsp)
2820Sstevel@tonic-gate 		rw_exit(&ufsvfsp->vfs_dqrwlock);
2830Sstevel@tonic-gate 	if (error)
2840Sstevel@tonic-gate 		return (ENOENT);
2850Sstevel@tonic-gate 
2860Sstevel@tonic-gate 	if (ip->i_flag & (IUPD|IACC|ICHG|IMOD|IMODACC|IATTCHG)) {
2870Sstevel@tonic-gate 		rw_enter(&ip->i_contents, RW_READER);
2880Sstevel@tonic-gate 		ufs_iupdat(ip, 1);
2890Sstevel@tonic-gate 		rw_exit(&ip->i_contents);
2900Sstevel@tonic-gate 		VN_RELE(ITOV(ip));
2910Sstevel@tonic-gate 		return (0);
2920Sstevel@tonic-gate 	}
2930Sstevel@tonic-gate 	VN_RELE(ITOV(ip));
2940Sstevel@tonic-gate 	return (ENOENT);
2950Sstevel@tonic-gate }
2960Sstevel@tonic-gate 
2970Sstevel@tonic-gate #ifdef DEBUG
2980Sstevel@tonic-gate /*
2990Sstevel@tonic-gate  *	These routines maintain the metadata map (matamap)
3000Sstevel@tonic-gate  */
3010Sstevel@tonic-gate 
3020Sstevel@tonic-gate /*
3030Sstevel@tonic-gate  * update the metadata map at mount
3040Sstevel@tonic-gate  */
3050Sstevel@tonic-gate static int
3060Sstevel@tonic-gate ufs_trans_mata_mount_scan(struct inode *ip, void *arg)
3070Sstevel@tonic-gate {
3080Sstevel@tonic-gate 	/*
3090Sstevel@tonic-gate 	 * wrong file system; keep looking
3100Sstevel@tonic-gate 	 */
3110Sstevel@tonic-gate 	if (ip->i_ufsvfs != (struct ufsvfs *)arg)
3120Sstevel@tonic-gate 		return (0);
3130Sstevel@tonic-gate 
3140Sstevel@tonic-gate 	/*
3150Sstevel@tonic-gate 	 * load the metadata map
3160Sstevel@tonic-gate 	 */
3170Sstevel@tonic-gate 	rw_enter(&ip->i_contents, RW_WRITER);
3180Sstevel@tonic-gate 	ufs_trans_mata_iget(ip);
3190Sstevel@tonic-gate 	rw_exit(&ip->i_contents);
3200Sstevel@tonic-gate 	return (0);
3210Sstevel@tonic-gate }
3220Sstevel@tonic-gate 
3230Sstevel@tonic-gate void
3240Sstevel@tonic-gate ufs_trans_mata_mount(struct ufsvfs *ufsvfsp)
3250Sstevel@tonic-gate {
3260Sstevel@tonic-gate 	struct fs	*fs	= ufsvfsp->vfs_fs;
3270Sstevel@tonic-gate 	ino_t		ino;
3280Sstevel@tonic-gate 	int		i;
3290Sstevel@tonic-gate 
3300Sstevel@tonic-gate 	/*
3310Sstevel@tonic-gate 	 * put static metadata into matamap
3320Sstevel@tonic-gate 	 *	superblock
3330Sstevel@tonic-gate 	 *	cylinder groups
3340Sstevel@tonic-gate 	 *	inode groups
3350Sstevel@tonic-gate 	 *	existing inodes
3360Sstevel@tonic-gate 	 */
3370Sstevel@tonic-gate 	TRANS_MATAADD(ufsvfsp, ldbtob(SBLOCK), fs->fs_sbsize);
3380Sstevel@tonic-gate 
3390Sstevel@tonic-gate 	for (ino = i = 0; i < fs->fs_ncg; ++i, ino += fs->fs_ipg) {
3400Sstevel@tonic-gate 		TRANS_MATAADD(ufsvfsp,
3410Sstevel@tonic-gate 		    ldbtob(fsbtodb(fs, cgtod(fs, i))), fs->fs_cgsize);
3420Sstevel@tonic-gate 		TRANS_MATAADD(ufsvfsp,
3430Sstevel@tonic-gate 		    ldbtob(fsbtodb(fs, itod(fs, ino))),
3440Sstevel@tonic-gate 		    fs->fs_ipg * sizeof (struct dinode));
3450Sstevel@tonic-gate 	}
3460Sstevel@tonic-gate 	(void) ufs_scan_inodes(0, ufs_trans_mata_mount_scan, ufsvfsp, ufsvfsp);
3470Sstevel@tonic-gate }
3480Sstevel@tonic-gate 
3490Sstevel@tonic-gate /*
3500Sstevel@tonic-gate  * clear the metadata map at umount
3510Sstevel@tonic-gate  */
3520Sstevel@tonic-gate void
3530Sstevel@tonic-gate ufs_trans_mata_umount(struct ufsvfs *ufsvfsp)
3540Sstevel@tonic-gate {
3550Sstevel@tonic-gate 	top_mataclr(ufsvfsp);
3560Sstevel@tonic-gate }
3570Sstevel@tonic-gate 
3580Sstevel@tonic-gate /*
3590Sstevel@tonic-gate  * summary info (may be extended during growfs test)
3600Sstevel@tonic-gate  */
3610Sstevel@tonic-gate void
3620Sstevel@tonic-gate ufs_trans_mata_si(struct ufsvfs *ufsvfsp, struct fs *fs)
3630Sstevel@tonic-gate {
3640Sstevel@tonic-gate 	TRANS_MATAADD(ufsvfsp, ldbtob(fsbtodb(fs, fs->fs_csaddr)),
365*4662Sfrankho 	    fs->fs_cssize);
3660Sstevel@tonic-gate }
3670Sstevel@tonic-gate 
3680Sstevel@tonic-gate /*
3690Sstevel@tonic-gate  * scan an allocation block (either inode or true block)
3700Sstevel@tonic-gate  */
3710Sstevel@tonic-gate static void
3720Sstevel@tonic-gate ufs_trans_mata_direct(
3730Sstevel@tonic-gate 	struct inode *ip,
3740Sstevel@tonic-gate 	daddr_t *fragsp,
3750Sstevel@tonic-gate 	daddr32_t *blkp,
3760Sstevel@tonic-gate 	unsigned int nblk)
3770Sstevel@tonic-gate {
3780Sstevel@tonic-gate 	int		i;
3790Sstevel@tonic-gate 	daddr_t		frag;
3800Sstevel@tonic-gate 	ulong_t		nb;
3810Sstevel@tonic-gate 	struct ufsvfs	*ufsvfsp	= ip->i_ufsvfs;
3820Sstevel@tonic-gate 	struct fs	*fs		= ufsvfsp->vfs_fs;
3830Sstevel@tonic-gate 
3840Sstevel@tonic-gate 	for (i = 0; i < nblk && *fragsp; ++i, ++blkp)
3850Sstevel@tonic-gate 		if ((frag = *blkp) != 0) {
3860Sstevel@tonic-gate 			if (*fragsp > fs->fs_frag) {
3870Sstevel@tonic-gate 				nb = fs->fs_bsize;
3880Sstevel@tonic-gate 				*fragsp -= fs->fs_frag;
3890Sstevel@tonic-gate 			} else {
3900Sstevel@tonic-gate 				nb = *fragsp * fs->fs_fsize;
3910Sstevel@tonic-gate 				*fragsp = 0;
3920Sstevel@tonic-gate 			}
3930Sstevel@tonic-gate 			TRANS_MATAADD(ufsvfsp, ldbtob(fsbtodb(fs, frag)), nb);
3940Sstevel@tonic-gate 		}
3950Sstevel@tonic-gate }
3960Sstevel@tonic-gate 
3970Sstevel@tonic-gate /*
3980Sstevel@tonic-gate  * scan an indirect allocation block (either inode or true block)
3990Sstevel@tonic-gate  */
4000Sstevel@tonic-gate static void
4010Sstevel@tonic-gate ufs_trans_mata_indir(
4020Sstevel@tonic-gate 	struct inode *ip,
4030Sstevel@tonic-gate 	daddr_t *fragsp,
4040Sstevel@tonic-gate 	daddr_t frag,
4050Sstevel@tonic-gate 	int level)
4060Sstevel@tonic-gate {
4070Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp	= ip->i_ufsvfs;
4080Sstevel@tonic-gate 	struct fs *fs = ufsvfsp->vfs_fs;
4090Sstevel@tonic-gate 	int ne = fs->fs_bsize / (int)sizeof (daddr32_t);
4100Sstevel@tonic-gate 	int i;
4110Sstevel@tonic-gate 	struct buf *bp;
4120Sstevel@tonic-gate 	daddr32_t *blkp;
4130Sstevel@tonic-gate 	o_mode_t ifmt = ip->i_mode & IFMT;
4140Sstevel@tonic-gate 
4150Sstevel@tonic-gate 	bp = UFS_BREAD(ufsvfsp, ip->i_dev, fsbtodb(fs, frag), fs->fs_bsize);
4160Sstevel@tonic-gate 	if (bp->b_flags & B_ERROR) {
4170Sstevel@tonic-gate 		brelse(bp);
4180Sstevel@tonic-gate 		return;
4190Sstevel@tonic-gate 	}
4200Sstevel@tonic-gate 	blkp = bp->b_un.b_daddr;
4210Sstevel@tonic-gate 
4220Sstevel@tonic-gate 	if (level || (ifmt == IFDIR) || (ifmt == IFSHAD) ||
4230Sstevel@tonic-gate 	    (ifmt == IFATTRDIR) || (ip == ip->i_ufsvfs->vfs_qinod))
4240Sstevel@tonic-gate 		ufs_trans_mata_direct(ip, fragsp, blkp, ne);
4250Sstevel@tonic-gate 
4260Sstevel@tonic-gate 	if (level)
4270Sstevel@tonic-gate 		for (i = 0; i < ne && *fragsp; ++i, ++blkp)
4280Sstevel@tonic-gate 			ufs_trans_mata_indir(ip, fragsp, *blkp, level-1);
4290Sstevel@tonic-gate 	brelse(bp);
4300Sstevel@tonic-gate }
4310Sstevel@tonic-gate 
4320Sstevel@tonic-gate /*
4330Sstevel@tonic-gate  * put appropriate metadata into matamap for this inode
4340Sstevel@tonic-gate  */
4350Sstevel@tonic-gate void
4360Sstevel@tonic-gate ufs_trans_mata_iget(struct inode *ip)
4370Sstevel@tonic-gate {
4380Sstevel@tonic-gate 	int		i;
4390Sstevel@tonic-gate 	daddr_t		frags	= dbtofsb(ip->i_fs, ip->i_blocks);
4400Sstevel@tonic-gate 	o_mode_t	ifmt 	= ip->i_mode & IFMT;
4410Sstevel@tonic-gate 
4420Sstevel@tonic-gate 	if (frags && ((ifmt == IFDIR) || (ifmt == IFSHAD) ||
4430Sstevel@tonic-gate 	    (ifmt == IFATTRDIR) || (ip == ip->i_ufsvfs->vfs_qinod)))
4440Sstevel@tonic-gate 		ufs_trans_mata_direct(ip, &frags, &ip->i_db[0], NDADDR);
4450Sstevel@tonic-gate 
4460Sstevel@tonic-gate 	if (frags)
4470Sstevel@tonic-gate 		ufs_trans_mata_direct(ip, &frags, &ip->i_ib[0], NIADDR);
4480Sstevel@tonic-gate 
4490Sstevel@tonic-gate 	for (i = 0; i < NIADDR && frags; ++i)
4500Sstevel@tonic-gate 		if (ip->i_ib[i])
4510Sstevel@tonic-gate 			ufs_trans_mata_indir(ip, &frags, ip->i_ib[i], i);
4520Sstevel@tonic-gate }
4530Sstevel@tonic-gate 
4540Sstevel@tonic-gate /*
4550Sstevel@tonic-gate  * freeing possible metadata (block of user data)
4560Sstevel@tonic-gate  */
4570Sstevel@tonic-gate void
4580Sstevel@tonic-gate ufs_trans_mata_free(struct ufsvfs *ufsvfsp, offset_t mof, off_t nb)
4590Sstevel@tonic-gate {
4600Sstevel@tonic-gate 	top_matadel(ufsvfsp, mof, nb);
4610Sstevel@tonic-gate 
4620Sstevel@tonic-gate }
4630Sstevel@tonic-gate 
4640Sstevel@tonic-gate /*
4650Sstevel@tonic-gate  * allocating metadata
4660Sstevel@tonic-gate  */
4670Sstevel@tonic-gate void
4680Sstevel@tonic-gate ufs_trans_mata_alloc(
4690Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp,
4700Sstevel@tonic-gate 	struct inode *ip,
4710Sstevel@tonic-gate 	daddr_t frag,
4720Sstevel@tonic-gate 	ulong_t nb,
4730Sstevel@tonic-gate 	int indir)
4740Sstevel@tonic-gate {
4750Sstevel@tonic-gate 	struct fs	*fs	= ufsvfsp->vfs_fs;
4760Sstevel@tonic-gate 	o_mode_t	ifmt 	= ip->i_mode & IFMT;
4770Sstevel@tonic-gate 
4780Sstevel@tonic-gate 	if (indir || ((ifmt == IFDIR) || (ifmt == IFSHAD) ||
4790Sstevel@tonic-gate 	    (ifmt == IFATTRDIR) || (ip == ip->i_ufsvfs->vfs_qinod)))
4800Sstevel@tonic-gate 		TRANS_MATAADD(ufsvfsp, ldbtob(fsbtodb(fs, frag)), nb);
4810Sstevel@tonic-gate }
4820Sstevel@tonic-gate 
4830Sstevel@tonic-gate #endif /* DEBUG */
4840Sstevel@tonic-gate 
4850Sstevel@tonic-gate /*
4860Sstevel@tonic-gate  * ufs_trans_dir is used to declare a directory delta
4870Sstevel@tonic-gate  */
4880Sstevel@tonic-gate int
4890Sstevel@tonic-gate ufs_trans_dir(struct inode *ip, off_t offset)
4900Sstevel@tonic-gate {
4910Sstevel@tonic-gate 	daddr_t	bn;
4920Sstevel@tonic-gate 	int	contig = 0, error;
4930Sstevel@tonic-gate 
4940Sstevel@tonic-gate 	ASSERT(ip);
4950Sstevel@tonic-gate 	ASSERT(RW_WRITE_HELD(&ip->i_contents));
4960Sstevel@tonic-gate 	error = bmap_read(ip, (u_offset_t)offset, &bn, &contig);
4970Sstevel@tonic-gate 	if (error || (bn == UFS_HOLE)) {
4980Sstevel@tonic-gate 		cmn_err(CE_WARN, "ufs_trans_dir - could not get block"
4990Sstevel@tonic-gate 		    " number error = %d bn = %d\n", error, (int)bn);
5000Sstevel@tonic-gate 		if (error == 0)	/* treat UFS_HOLE as an I/O error */
5010Sstevel@tonic-gate 			error = EIO;
5020Sstevel@tonic-gate 		return (error);
5030Sstevel@tonic-gate 	}
5040Sstevel@tonic-gate 	TRANS_DELTA(ip->i_ufsvfs, ldbtob(bn), DIRBLKSIZ, DT_DIR, 0, 0);
5050Sstevel@tonic-gate 	return (error);
5060Sstevel@tonic-gate }
5070Sstevel@tonic-gate 
5080Sstevel@tonic-gate /*ARGSUSED*/
5090Sstevel@tonic-gate int
5100Sstevel@tonic-gate ufs_trans_push_quota(ufsvfs_t *ufsvfsp, delta_t dtyp, struct dquot *dqp)
5110Sstevel@tonic-gate {
5120Sstevel@tonic-gate 	/*
5130Sstevel@tonic-gate 	 * Lock the quota subsystem (ufsvfsp can be NULL
5140Sstevel@tonic-gate 	 * if the DQ_ERROR is set).
5150Sstevel@tonic-gate 	 */
5160Sstevel@tonic-gate 	if (ufsvfsp)
5170Sstevel@tonic-gate 		rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
5180Sstevel@tonic-gate 	mutex_enter(&dqp->dq_lock);
5190Sstevel@tonic-gate 
5200Sstevel@tonic-gate 	/*
5210Sstevel@tonic-gate 	 * If this transaction has been cancelled by closedq_scan_inode(),
5220Sstevel@tonic-gate 	 * then bail out now.  We don't call dqput() in this case because
5230Sstevel@tonic-gate 	 * it has already been done.
5240Sstevel@tonic-gate 	 */
5250Sstevel@tonic-gate 	if ((dqp->dq_flags & DQ_TRANS) == 0) {
5260Sstevel@tonic-gate 		mutex_exit(&dqp->dq_lock);
5270Sstevel@tonic-gate 		if (ufsvfsp)
5280Sstevel@tonic-gate 			rw_exit(&ufsvfsp->vfs_dqrwlock);
5290Sstevel@tonic-gate 		return (0);
5300Sstevel@tonic-gate 	}
5310Sstevel@tonic-gate 
5320Sstevel@tonic-gate 	if (dqp->dq_flags & DQ_ERROR) {
5330Sstevel@tonic-gate 		/*
5340Sstevel@tonic-gate 		 * Paranoia to make sure that there is at least one
5350Sstevel@tonic-gate 		 * reference to the dquot struct.  We are done with
5360Sstevel@tonic-gate 		 * the dquot (due to an error) so clear logging
5370Sstevel@tonic-gate 		 * specific markers.
5380Sstevel@tonic-gate 		 */
5390Sstevel@tonic-gate 		ASSERT(dqp->dq_cnt >= 1);
5400Sstevel@tonic-gate 		dqp->dq_flags &= ~DQ_TRANS;
5410Sstevel@tonic-gate 		dqput(dqp);
5420Sstevel@tonic-gate 		mutex_exit(&dqp->dq_lock);
5430Sstevel@tonic-gate 		if (ufsvfsp)
5440Sstevel@tonic-gate 			rw_exit(&ufsvfsp->vfs_dqrwlock);
5450Sstevel@tonic-gate 		return (1);
5460Sstevel@tonic-gate 	}
5470Sstevel@tonic-gate 
5480Sstevel@tonic-gate 	if (dqp->dq_flags & (DQ_MOD | DQ_BLKS | DQ_FILES)) {
5490Sstevel@tonic-gate 		ASSERT((dqp->dq_mof != UFS_HOLE) && (dqp->dq_mof != 0));
5500Sstevel@tonic-gate 		TRANS_LOG(ufsvfsp, (caddr_t)&dqp->dq_dqb,
5510Sstevel@tonic-gate 		    dqp->dq_mof, (int)sizeof (struct dqblk), NULL, 0);
5520Sstevel@tonic-gate 		/*
5530Sstevel@tonic-gate 		 * Paranoia to make sure that there is at least one
5540Sstevel@tonic-gate 		 * reference to the dquot struct.  Clear the
5550Sstevel@tonic-gate 		 * modification flag because the operation is now in
5560Sstevel@tonic-gate 		 * the log.  Also clear the logging specific markers
5570Sstevel@tonic-gate 		 * that were set in ufs_trans_quota().
5580Sstevel@tonic-gate 		 */
5590Sstevel@tonic-gate 		ASSERT(dqp->dq_cnt >= 1);
5600Sstevel@tonic-gate 		dqp->dq_flags &= ~(DQ_MOD | DQ_TRANS);
5610Sstevel@tonic-gate 		dqput(dqp);
5620Sstevel@tonic-gate 	}
5630Sstevel@tonic-gate 
5640Sstevel@tonic-gate 	/*
5650Sstevel@tonic-gate 	 * At this point, the logging specific flag should be clear,
5660Sstevel@tonic-gate 	 * but add paranoia just in case something has gone wrong.
5670Sstevel@tonic-gate 	 */
5680Sstevel@tonic-gate 	ASSERT((dqp->dq_flags & DQ_TRANS) == 0);
5690Sstevel@tonic-gate 	mutex_exit(&dqp->dq_lock);
5700Sstevel@tonic-gate 	if (ufsvfsp)
5710Sstevel@tonic-gate 		rw_exit(&ufsvfsp->vfs_dqrwlock);
5720Sstevel@tonic-gate 	return (0);
5730Sstevel@tonic-gate }
5740Sstevel@tonic-gate 
5750Sstevel@tonic-gate /*
5760Sstevel@tonic-gate  * ufs_trans_quota take in a uid, allocates the disk space, placing the
5770Sstevel@tonic-gate  * quota record into the metamap, then declares the delta.
5780Sstevel@tonic-gate  */
5790Sstevel@tonic-gate /*ARGSUSED*/
5800Sstevel@tonic-gate void
5810Sstevel@tonic-gate ufs_trans_quota(struct dquot *dqp)
5820Sstevel@tonic-gate {
5830Sstevel@tonic-gate 
5840Sstevel@tonic-gate 	struct inode	*qip = dqp->dq_ufsvfsp->vfs_qinod;
5850Sstevel@tonic-gate 
5860Sstevel@tonic-gate 	ASSERT(qip);
5870Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&dqp->dq_lock));
5880Sstevel@tonic-gate 	ASSERT(dqp->dq_flags & DQ_MOD);
5890Sstevel@tonic-gate 	ASSERT(dqp->dq_mof != 0);
5900Sstevel@tonic-gate 	ASSERT(dqp->dq_mof != UFS_HOLE);
5910Sstevel@tonic-gate 
5920Sstevel@tonic-gate 	/*
5930Sstevel@tonic-gate 	 * Mark this dquot to indicate that we are starting a logging
5940Sstevel@tonic-gate 	 * file system operation for this dquot.  Also increment the
5950Sstevel@tonic-gate 	 * reference count so that the dquot does not get reused while
5960Sstevel@tonic-gate 	 * it is on the mapentry_t list.  DQ_TRANS is cleared and the
5970Sstevel@tonic-gate 	 * reference count is decremented by ufs_trans_push_quota.
5980Sstevel@tonic-gate 	 *
5990Sstevel@tonic-gate 	 * If the file system is force-unmounted while there is a
6000Sstevel@tonic-gate 	 * pending quota transaction, then closedq_scan_inode() will
6010Sstevel@tonic-gate 	 * clear the DQ_TRANS flag and decrement the reference count.
6020Sstevel@tonic-gate 	 *
6030Sstevel@tonic-gate 	 * Since deltamap_add() drops multiple transactions to the
6040Sstevel@tonic-gate 	 * same dq_mof and ufs_trans_push_quota() won't get called,
6050Sstevel@tonic-gate 	 * we use DQ_TRANS to prevent repeat transactions from
6060Sstevel@tonic-gate 	 * incrementing the reference count (or calling TRANS_DELTA()).
6070Sstevel@tonic-gate 	 */
6080Sstevel@tonic-gate 	if ((dqp->dq_flags & DQ_TRANS) == 0) {
6090Sstevel@tonic-gate 		dqp->dq_flags |= DQ_TRANS;
6100Sstevel@tonic-gate 		dqp->dq_cnt++;
6110Sstevel@tonic-gate 		TRANS_DELTA(qip->i_ufsvfs, dqp->dq_mof, sizeof (struct dqblk),
6120Sstevel@tonic-gate 		    DT_QR, ufs_trans_push_quota, (ulong_t)dqp);
6130Sstevel@tonic-gate 	}
6140Sstevel@tonic-gate }
6150Sstevel@tonic-gate 
6160Sstevel@tonic-gate void
6170Sstevel@tonic-gate ufs_trans_dqrele(struct dquot *dqp)
6180Sstevel@tonic-gate {
6190Sstevel@tonic-gate 	struct ufsvfs	*ufsvfsp = dqp->dq_ufsvfsp;
6200Sstevel@tonic-gate 
6210Sstevel@tonic-gate 	curthread->t_flag |= T_DONTBLOCK;
6220Sstevel@tonic-gate 	TRANS_BEGIN_ASYNC(ufsvfsp, TOP_QUOTA, TOP_QUOTA_SIZE);
6230Sstevel@tonic-gate 	rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
6240Sstevel@tonic-gate 	dqrele(dqp);
6250Sstevel@tonic-gate 	rw_exit(&ufsvfsp->vfs_dqrwlock);
6260Sstevel@tonic-gate 	TRANS_END_ASYNC(ufsvfsp, TOP_QUOTA, TOP_QUOTA_SIZE);
6270Sstevel@tonic-gate 	curthread->t_flag &= ~T_DONTBLOCK;
6280Sstevel@tonic-gate }
6290Sstevel@tonic-gate 
6300Sstevel@tonic-gate int ufs_trans_max_resv = TOP_MAX_RESV;	/* will be adjusted for testing */
6310Sstevel@tonic-gate long ufs_trans_avgbfree = 0;		/* will be adjusted for testing */
6320Sstevel@tonic-gate #define	TRANS_MAX_WRITE	(1024 * 1024)
6330Sstevel@tonic-gate size_t ufs_trans_max_resid = TRANS_MAX_WRITE;
6340Sstevel@tonic-gate 
6350Sstevel@tonic-gate /*
6360Sstevel@tonic-gate  * Calculate the log reservation for the given write or truncate
6370Sstevel@tonic-gate  */
6380Sstevel@tonic-gate static ulong_t
6390Sstevel@tonic-gate ufs_log_amt(struct inode *ip, offset_t offset, ssize_t resid, int trunc)
6400Sstevel@tonic-gate {
6410Sstevel@tonic-gate 	long		ncg, last2blk;
6420Sstevel@tonic-gate 	long		niblk		= 0;
6430Sstevel@tonic-gate 	u_offset_t	writeend, offblk;
6440Sstevel@tonic-gate 	int		resv;
6450Sstevel@tonic-gate 	daddr_t		nblk, maxfblk;
6460Sstevel@tonic-gate 	long		avgbfree;
6470Sstevel@tonic-gate 	struct ufsvfs	*ufsvfsp	= ip->i_ufsvfs;
6480Sstevel@tonic-gate 	struct fs	*fs		= ufsvfsp->vfs_fs;
6490Sstevel@tonic-gate 	long		fni		= NINDIR(fs);
6500Sstevel@tonic-gate 	int		bsize		= fs->fs_bsize;
6510Sstevel@tonic-gate 
6520Sstevel@tonic-gate 	/*
6530Sstevel@tonic-gate 	 * Assume that the request will fit in 1 or 2 cg's,
6540Sstevel@tonic-gate 	 * resv is the amount of log space to reserve (in bytes).
6550Sstevel@tonic-gate 	 */
6560Sstevel@tonic-gate 	resv = SIZECG(ip) * 2 + INODESIZE + 1024;
6570Sstevel@tonic-gate 
6580Sstevel@tonic-gate 	/*
6590Sstevel@tonic-gate 	 * get max position of write in fs blocks
6600Sstevel@tonic-gate 	 */
6610Sstevel@tonic-gate 	writeend = offset + resid;
6620Sstevel@tonic-gate 	maxfblk = lblkno(fs, writeend);
6630Sstevel@tonic-gate 	offblk = lblkno(fs, offset);
6640Sstevel@tonic-gate 	/*
6650Sstevel@tonic-gate 	 * request size in fs blocks
6660Sstevel@tonic-gate 	 */
6670Sstevel@tonic-gate 	nblk = lblkno(fs, blkroundup(fs, resid));
6680Sstevel@tonic-gate 	/*
6690Sstevel@tonic-gate 	 * Adjust for sparse files
6700Sstevel@tonic-gate 	 */
6710Sstevel@tonic-gate 	if (trunc)
6720Sstevel@tonic-gate 		nblk = MIN(nblk, ip->i_blocks);
6730Sstevel@tonic-gate 
6740Sstevel@tonic-gate 	/*
6750Sstevel@tonic-gate 	 * Adjust avgbfree (for testing)
6760Sstevel@tonic-gate 	 */
6770Sstevel@tonic-gate 	avgbfree = (ufs_trans_avgbfree) ? 1 : ufsvfsp->vfs_avgbfree + 1;
6780Sstevel@tonic-gate 
6790Sstevel@tonic-gate 	/*
6800Sstevel@tonic-gate 	 * Calculate maximum number of blocks of triple indirect
6810Sstevel@tonic-gate 	 * pointers to write.
6820Sstevel@tonic-gate 	 */
6830Sstevel@tonic-gate 	last2blk = NDADDR + fni + fni * fni;
6840Sstevel@tonic-gate 	if (maxfblk > last2blk) {
6850Sstevel@tonic-gate 		long nl2ptr;
6860Sstevel@tonic-gate 		long n3blk;
6870Sstevel@tonic-gate 
6880Sstevel@tonic-gate 		if (offblk > last2blk)
6890Sstevel@tonic-gate 			n3blk = maxfblk - offblk;
6900Sstevel@tonic-gate 		else
6910Sstevel@tonic-gate 			n3blk = maxfblk - last2blk;
6920Sstevel@tonic-gate 		niblk += roundup(n3blk * sizeof (daddr_t), bsize) / bsize + 1;
6930Sstevel@tonic-gate 		nl2ptr = roundup(niblk, fni) / fni + 1;
6940Sstevel@tonic-gate 		niblk += roundup(nl2ptr * sizeof (daddr_t), bsize) / bsize + 2;
6950Sstevel@tonic-gate 		maxfblk -= n3blk;
6960Sstevel@tonic-gate 	}
6970Sstevel@tonic-gate 	/*
6980Sstevel@tonic-gate 	 * calculate maximum number of blocks of double indirect
6990Sstevel@tonic-gate 	 * pointers to write.
7000Sstevel@tonic-gate 	 */
7010Sstevel@tonic-gate 	if (maxfblk > NDADDR + fni) {
7020Sstevel@tonic-gate 		long n2blk;
7030Sstevel@tonic-gate 
7040Sstevel@tonic-gate 		if (offblk > NDADDR + fni)
7050Sstevel@tonic-gate 			n2blk = maxfblk - offblk;
7060Sstevel@tonic-gate 		else
7070Sstevel@tonic-gate 			n2blk = maxfblk - NDADDR + fni;
7080Sstevel@tonic-gate 		niblk += roundup(n2blk * sizeof (daddr_t), bsize) / bsize + 2;
7090Sstevel@tonic-gate 		maxfblk -= n2blk;
7100Sstevel@tonic-gate 	}
7110Sstevel@tonic-gate 	/*
7120Sstevel@tonic-gate 	 * Add in indirect pointer block write
7130Sstevel@tonic-gate 	 */
7140Sstevel@tonic-gate 	if (maxfblk > NDADDR) {
7150Sstevel@tonic-gate 		niblk += 1;
7160Sstevel@tonic-gate 	}
7170Sstevel@tonic-gate 	/*
7180Sstevel@tonic-gate 	 * Calculate deltas for indirect pointer writes
7190Sstevel@tonic-gate 	 */
7200Sstevel@tonic-gate 	resv += niblk * (fs->fs_bsize + sizeof (struct delta));
7210Sstevel@tonic-gate 	/*
7220Sstevel@tonic-gate 	 * maximum number of cg's needed for request
7230Sstevel@tonic-gate 	 */
7240Sstevel@tonic-gate 	ncg = nblk / avgbfree;
7250Sstevel@tonic-gate 	if (ncg > fs->fs_ncg)
7260Sstevel@tonic-gate 		ncg = fs->fs_ncg;
7270Sstevel@tonic-gate 
7280Sstevel@tonic-gate 	/*
7290Sstevel@tonic-gate 	 * maximum amount of log space needed for request
7300Sstevel@tonic-gate 	 */
7310Sstevel@tonic-gate 	if (ncg > 2)
7320Sstevel@tonic-gate 		resv += (ncg - 2) * SIZECG(ip);
7330Sstevel@tonic-gate 
7340Sstevel@tonic-gate 	return (resv);
7350Sstevel@tonic-gate }
7360Sstevel@tonic-gate 
7370Sstevel@tonic-gate /*
7380Sstevel@tonic-gate  * Calculate the amount of log space that needs to be reserved for this
7390Sstevel@tonic-gate  * trunc request.  If the amount of log space is too large, then
7400Sstevel@tonic-gate  * calculate the the size that the requests needs to be split into.
7410Sstevel@tonic-gate  */
742923Ssdebnath void
7430Sstevel@tonic-gate ufs_trans_trunc_resv(
7440Sstevel@tonic-gate 	struct inode *ip,
7450Sstevel@tonic-gate 	u_offset_t length,
7460Sstevel@tonic-gate 	int *resvp,
7470Sstevel@tonic-gate 	u_offset_t *residp)
7480Sstevel@tonic-gate {
7490Sstevel@tonic-gate 	ulong_t		resv;
7500Sstevel@tonic-gate 	u_offset_t	size, offset, resid;
7510Sstevel@tonic-gate 	int		nchunks;
7520Sstevel@tonic-gate 
7530Sstevel@tonic-gate 	/*
7540Sstevel@tonic-gate 	 *    *resvp is the amount of log space to reserve (in bytes).
7550Sstevel@tonic-gate 	 *    when nonzero, *residp is the number of bytes to truncate.
7560Sstevel@tonic-gate 	 */
7570Sstevel@tonic-gate 	*residp = 0;
7580Sstevel@tonic-gate 
7590Sstevel@tonic-gate 	if (length < ip->i_size) {
7600Sstevel@tonic-gate 		size = ip->i_size - length;
7610Sstevel@tonic-gate 	} else {
7620Sstevel@tonic-gate 		resv = SIZECG(ip) * 2 + INODESIZE + 1024;
7630Sstevel@tonic-gate 		/*
7640Sstevel@tonic-gate 		 * truncate up, doesn't really use much space,
7650Sstevel@tonic-gate 		 * the default above should be sufficient.
7660Sstevel@tonic-gate 		 */
7670Sstevel@tonic-gate 		goto done;
7680Sstevel@tonic-gate 	}
7690Sstevel@tonic-gate 
7700Sstevel@tonic-gate 	offset = length;
7710Sstevel@tonic-gate 	resid = size;
7720Sstevel@tonic-gate 	nchunks = 1;
7730Sstevel@tonic-gate 	for (; (resv = ufs_log_amt(ip, offset, resid, 1)) > ufs_trans_max_resv;
774*4662Sfrankho 	    offset = length + (nchunks - 1) * resid) {
7750Sstevel@tonic-gate 		nchunks++;
7760Sstevel@tonic-gate 		resid = size / nchunks;
7770Sstevel@tonic-gate 	}
7780Sstevel@tonic-gate 	/*
7790Sstevel@tonic-gate 	 * If this request takes too much log space, it will be split
7800Sstevel@tonic-gate 	 */
7810Sstevel@tonic-gate 	if (nchunks > 1) {
7820Sstevel@tonic-gate 		*residp = resid;
7830Sstevel@tonic-gate 	}
7840Sstevel@tonic-gate done:
7850Sstevel@tonic-gate 	*resvp = resv;
7860Sstevel@tonic-gate }
7870Sstevel@tonic-gate 
7880Sstevel@tonic-gate int
7890Sstevel@tonic-gate ufs_trans_itrunc(struct inode *ip, u_offset_t length, int flags, cred_t *cr)
7900Sstevel@tonic-gate {
7910Sstevel@tonic-gate 	int 		err, issync, resv;
7920Sstevel@tonic-gate 	u_offset_t	resid;
7930Sstevel@tonic-gate 	int		do_block	= 0;
7940Sstevel@tonic-gate 	struct ufsvfs	*ufsvfsp	= ip->i_ufsvfs;
7950Sstevel@tonic-gate 	struct fs	*fs		= ufsvfsp->vfs_fs;
7960Sstevel@tonic-gate 
7970Sstevel@tonic-gate 	/*
7980Sstevel@tonic-gate 	 * Not logging; just do the trunc
7990Sstevel@tonic-gate 	 */
8000Sstevel@tonic-gate 	if (!TRANS_ISTRANS(ufsvfsp)) {
8010Sstevel@tonic-gate 		rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
8020Sstevel@tonic-gate 		rw_enter(&ip->i_contents, RW_WRITER);
8030Sstevel@tonic-gate 		err = ufs_itrunc(ip, length, flags, cr);
8040Sstevel@tonic-gate 		rw_exit(&ip->i_contents);
8050Sstevel@tonic-gate 		rw_exit(&ufsvfsp->vfs_dqrwlock);
8060Sstevel@tonic-gate 		return (err);
8070Sstevel@tonic-gate 	}
8080Sstevel@tonic-gate 
8090Sstevel@tonic-gate 	/*
8100Sstevel@tonic-gate 	 * within the lockfs protocol but *not* part of a transaction
8110Sstevel@tonic-gate 	 */
8120Sstevel@tonic-gate 	do_block = curthread->t_flag & T_DONTBLOCK;
8130Sstevel@tonic-gate 	curthread->t_flag |= T_DONTBLOCK;
8140Sstevel@tonic-gate 
8150Sstevel@tonic-gate 	/*
8160Sstevel@tonic-gate 	 * Trunc the file (in pieces, if necessary)
8170Sstevel@tonic-gate 	 */
8180Sstevel@tonic-gate again:
8190Sstevel@tonic-gate 	ufs_trans_trunc_resv(ip, length, &resv, &resid);
8200Sstevel@tonic-gate 	TRANS_BEGIN_CSYNC(ufsvfsp, issync, TOP_ITRUNC, resv);
8210Sstevel@tonic-gate 	rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
8220Sstevel@tonic-gate 	rw_enter(&ip->i_contents, RW_WRITER);
8230Sstevel@tonic-gate 	if (resid) {
8240Sstevel@tonic-gate 		/*
8250Sstevel@tonic-gate 		 * resid is only set if we have to truncate in chunks
8260Sstevel@tonic-gate 		 */
8270Sstevel@tonic-gate 		ASSERT(length + resid < ip->i_size);
8280Sstevel@tonic-gate 
8290Sstevel@tonic-gate 		/*
8300Sstevel@tonic-gate 		 * Partially trunc file down to desired size (length).
8310Sstevel@tonic-gate 		 * Only retain I_FREE on the last partial trunc.
8320Sstevel@tonic-gate 		 * Round up size to a block boundary, to ensure the truncate
8330Sstevel@tonic-gate 		 * doesn't have to allocate blocks. This is done both for
8340Sstevel@tonic-gate 		 * performance and to fix a bug where if the block can't be
8350Sstevel@tonic-gate 		 * allocated then the inode delete fails, but the inode
8360Sstevel@tonic-gate 		 * is still freed with attached blocks and non-zero size
8370Sstevel@tonic-gate 		 * (bug 4348738).
8380Sstevel@tonic-gate 		 */
8390Sstevel@tonic-gate 		err = ufs_itrunc(ip, blkroundup(fs, (ip->i_size - resid)),
8400Sstevel@tonic-gate 		    flags & ~I_FREE, cr);
8410Sstevel@tonic-gate 		ASSERT(ip->i_size != length);
8420Sstevel@tonic-gate 	} else
8430Sstevel@tonic-gate 		err = ufs_itrunc(ip, length, flags, cr);
8440Sstevel@tonic-gate 	if (!do_block)
8450Sstevel@tonic-gate 		curthread->t_flag &= ~T_DONTBLOCK;
8460Sstevel@tonic-gate 	rw_exit(&ip->i_contents);
8470Sstevel@tonic-gate 	rw_exit(&ufsvfsp->vfs_dqrwlock);
8480Sstevel@tonic-gate 	TRANS_END_CSYNC(ufsvfsp, err, issync, TOP_ITRUNC, resv);
8490Sstevel@tonic-gate 
8500Sstevel@tonic-gate 	if ((err == 0) && resid) {
8510Sstevel@tonic-gate 		ufsvfsp->vfs_avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
8520Sstevel@tonic-gate 		goto again;
8530Sstevel@tonic-gate 	}
8540Sstevel@tonic-gate 	return (err);
8550Sstevel@tonic-gate }
8560Sstevel@tonic-gate 
8570Sstevel@tonic-gate /*
8580Sstevel@tonic-gate  * Fault in the pages of the first n bytes specified by the uio structure.
8590Sstevel@tonic-gate  * 1 byte in each page is touched and the uio struct is unmodified.
8600Sstevel@tonic-gate  * Any error will terminate the process as this is only a best
8610Sstevel@tonic-gate  * attempt to get the pages resident.
8620Sstevel@tonic-gate  */
8630Sstevel@tonic-gate static void
8640Sstevel@tonic-gate ufs_trans_touch(ssize_t n, struct uio *uio)
8650Sstevel@tonic-gate {
8660Sstevel@tonic-gate 	struct iovec *iov;
8670Sstevel@tonic-gate 	ulong_t cnt, incr;
8680Sstevel@tonic-gate 	caddr_t p;
8690Sstevel@tonic-gate 	uint8_t tmp;
8700Sstevel@tonic-gate 
8710Sstevel@tonic-gate 	iov = uio->uio_iov;
8720Sstevel@tonic-gate 
8730Sstevel@tonic-gate 	while (n) {
8740Sstevel@tonic-gate 		cnt = MIN(iov->iov_len, n);
8750Sstevel@tonic-gate 		if (cnt == 0) {
8760Sstevel@tonic-gate 			/* empty iov entry */
8770Sstevel@tonic-gate 			iov++;
8780Sstevel@tonic-gate 			continue;
8790Sstevel@tonic-gate 		}
8800Sstevel@tonic-gate 		n -= cnt;
8810Sstevel@tonic-gate 		/*
8820Sstevel@tonic-gate 		 * touch each page in this segment.
8830Sstevel@tonic-gate 		 */
8840Sstevel@tonic-gate 		p = iov->iov_base;
8850Sstevel@tonic-gate 		while (cnt) {
8860Sstevel@tonic-gate 			switch (uio->uio_segflg) {
8870Sstevel@tonic-gate 			case UIO_USERSPACE:
8880Sstevel@tonic-gate 			case UIO_USERISPACE:
8890Sstevel@tonic-gate 				if (fuword8(p, &tmp))
8900Sstevel@tonic-gate 					return;
8910Sstevel@tonic-gate 				break;
8920Sstevel@tonic-gate 			case UIO_SYSSPACE:
8930Sstevel@tonic-gate 				if (kcopy(p, &tmp, 1))
8940Sstevel@tonic-gate 					return;
8950Sstevel@tonic-gate 				break;
8960Sstevel@tonic-gate 			}
8970Sstevel@tonic-gate 			incr = MIN(cnt, PAGESIZE);
8980Sstevel@tonic-gate 			p += incr;
8990Sstevel@tonic-gate 			cnt -= incr;
9000Sstevel@tonic-gate 		}
9010Sstevel@tonic-gate 		/*
9020Sstevel@tonic-gate 		 * touch the last byte in case it straddles a page.
9030Sstevel@tonic-gate 		 */
9040Sstevel@tonic-gate 		p--;
9050Sstevel@tonic-gate 		switch (uio->uio_segflg) {
9060Sstevel@tonic-gate 		case UIO_USERSPACE:
9070Sstevel@tonic-gate 		case UIO_USERISPACE:
9080Sstevel@tonic-gate 			if (fuword8(p, &tmp))
9090Sstevel@tonic-gate 				return;
9100Sstevel@tonic-gate 			break;
9110Sstevel@tonic-gate 		case UIO_SYSSPACE:
9120Sstevel@tonic-gate 			if (kcopy(p, &tmp, 1))
9130Sstevel@tonic-gate 				return;
9140Sstevel@tonic-gate 			break;
9150Sstevel@tonic-gate 		}
9160Sstevel@tonic-gate 		iov++;
9170Sstevel@tonic-gate 	}
9180Sstevel@tonic-gate }
9190Sstevel@tonic-gate 
9200Sstevel@tonic-gate /*
9210Sstevel@tonic-gate  * Calculate the amount of log space that needs to be reserved for this
9220Sstevel@tonic-gate  * write request.  If the amount of log space is too large, then
9230Sstevel@tonic-gate  * calculate the size that the requests needs to be split into.
9240Sstevel@tonic-gate  * First try fixed chunks of size ufs_trans_max_resid. If that
9250Sstevel@tonic-gate  * is too big, iterate down to the largest size that will fit.
9260Sstevel@tonic-gate  * Pagein the pages in the first chunk here, so that the pagein is
9270Sstevel@tonic-gate  * avoided later when the transaction is open.
9280Sstevel@tonic-gate  */
9290Sstevel@tonic-gate void
9300Sstevel@tonic-gate ufs_trans_write_resv(
9310Sstevel@tonic-gate 	struct inode *ip,
9320Sstevel@tonic-gate 	struct uio *uio,
9330Sstevel@tonic-gate 	int *resvp,
9340Sstevel@tonic-gate 	int *residp)
9350Sstevel@tonic-gate {
9360Sstevel@tonic-gate 	ulong_t		resv;
9370Sstevel@tonic-gate 	offset_t	offset;
9380Sstevel@tonic-gate 	ssize_t		resid;
9390Sstevel@tonic-gate 	int		nchunks;
9400Sstevel@tonic-gate 
9410Sstevel@tonic-gate 	*residp = 0;
9420Sstevel@tonic-gate 	offset = uio->uio_offset;
9430Sstevel@tonic-gate 	resid = MIN(uio->uio_resid, ufs_trans_max_resid);
9440Sstevel@tonic-gate 	resv = ufs_log_amt(ip, offset, resid, 0);
9450Sstevel@tonic-gate 	if (resv <= ufs_trans_max_resv) {
9460Sstevel@tonic-gate 		ufs_trans_touch(resid, uio);
9470Sstevel@tonic-gate 		if (resid != uio->uio_resid)
9480Sstevel@tonic-gate 			*residp = resid;
9490Sstevel@tonic-gate 		*resvp = resv;
9500Sstevel@tonic-gate 		return;
9510Sstevel@tonic-gate 	}
9520Sstevel@tonic-gate 
9530Sstevel@tonic-gate 	resid = uio->uio_resid;
9540Sstevel@tonic-gate 	nchunks = 1;
9550Sstevel@tonic-gate 	for (; (resv = ufs_log_amt(ip, offset, resid, 0)) > ufs_trans_max_resv;
956*4662Sfrankho 	    offset = uio->uio_offset + (nchunks - 1) * resid) {
9570Sstevel@tonic-gate 		nchunks++;
9580Sstevel@tonic-gate 		resid = uio->uio_resid / nchunks;
9590Sstevel@tonic-gate 	}
9600Sstevel@tonic-gate 	ufs_trans_touch(resid, uio);
9610Sstevel@tonic-gate 	/*
9620Sstevel@tonic-gate 	 * If this request takes too much log space, it will be split
9630Sstevel@tonic-gate 	 */
9640Sstevel@tonic-gate 	if (nchunks > 1)
9650Sstevel@tonic-gate 		*residp = resid;
9660Sstevel@tonic-gate 	*resvp = resv;
9670Sstevel@tonic-gate }
9680Sstevel@tonic-gate 
9690Sstevel@tonic-gate /*
9700Sstevel@tonic-gate  * Issue write request.
9710Sstevel@tonic-gate  *
9720Sstevel@tonic-gate  * Split a large request into smaller chunks.
9730Sstevel@tonic-gate  */
9740Sstevel@tonic-gate int
9750Sstevel@tonic-gate ufs_trans_write(
9760Sstevel@tonic-gate 	struct inode *ip,
9770Sstevel@tonic-gate 	struct uio *uio,
9780Sstevel@tonic-gate 	int ioflag,
9790Sstevel@tonic-gate 	cred_t *cr,
9800Sstevel@tonic-gate 	int resv,
9810Sstevel@tonic-gate 	long resid)
9820Sstevel@tonic-gate {
9830Sstevel@tonic-gate 	long		realresid;
9840Sstevel@tonic-gate 	int		err;
9850Sstevel@tonic-gate 	struct ufsvfs	*ufsvfsp = ip->i_ufsvfs;
9860Sstevel@tonic-gate 
9870Sstevel@tonic-gate 	/*
9880Sstevel@tonic-gate 	 * since the write is too big and would "HOG THE LOG" it needs to
9890Sstevel@tonic-gate 	 * be broken up and done in pieces.  NOTE, the caller will
9900Sstevel@tonic-gate 	 * issue the EOT after the request has been completed
9910Sstevel@tonic-gate 	 */
9920Sstevel@tonic-gate 	realresid = uio->uio_resid;
9930Sstevel@tonic-gate 
9940Sstevel@tonic-gate again:
9950Sstevel@tonic-gate 	/*
9960Sstevel@tonic-gate 	 * Perform partial request (uiomove will update uio for us)
9970Sstevel@tonic-gate 	 *	Request is split up into "resid" size chunks until
9980Sstevel@tonic-gate 	 *	"realresid" bytes have been transferred.
9990Sstevel@tonic-gate 	 */
10000Sstevel@tonic-gate 	uio->uio_resid = MIN(resid, realresid);
10010Sstevel@tonic-gate 	realresid -= uio->uio_resid;
10020Sstevel@tonic-gate 	err = wrip(ip, uio, ioflag, cr);
10030Sstevel@tonic-gate 
10040Sstevel@tonic-gate 	/*
10050Sstevel@tonic-gate 	 * Error or request is done; caller issues final EOT
10060Sstevel@tonic-gate 	 */
10070Sstevel@tonic-gate 	if (err || uio->uio_resid || (realresid == 0)) {
10080Sstevel@tonic-gate 		uio->uio_resid += realresid;
10090Sstevel@tonic-gate 		return (err);
10100Sstevel@tonic-gate 	}
10110Sstevel@tonic-gate 
10120Sstevel@tonic-gate 	/*
10130Sstevel@tonic-gate 	 * Generate EOT for this part of the request
10140Sstevel@tonic-gate 	 */
10150Sstevel@tonic-gate 	rw_exit(&ip->i_contents);
10160Sstevel@tonic-gate 	rw_exit(&ufsvfsp->vfs_dqrwlock);
10170Sstevel@tonic-gate 	if (ioflag & (FSYNC|FDSYNC)) {
10180Sstevel@tonic-gate 		TRANS_END_SYNC(ufsvfsp, err, TOP_WRITE_SYNC, resv);
10190Sstevel@tonic-gate 	} else {
10200Sstevel@tonic-gate 		TRANS_END_ASYNC(ufsvfsp, TOP_WRITE, resv);
10210Sstevel@tonic-gate 	}
10220Sstevel@tonic-gate 
10230Sstevel@tonic-gate 	/*
10240Sstevel@tonic-gate 	 * Make sure the input buffer is resident before starting
10250Sstevel@tonic-gate 	 * the next transaction.
10260Sstevel@tonic-gate 	 */
10270Sstevel@tonic-gate 	ufs_trans_touch(MIN(resid, realresid), uio);
10280Sstevel@tonic-gate 
10290Sstevel@tonic-gate 	/*
10300Sstevel@tonic-gate 	 * Generate BOT for next part of the request
10310Sstevel@tonic-gate 	 */
10320Sstevel@tonic-gate 	if (ioflag & (FSYNC|FDSYNC)) {
10330Sstevel@tonic-gate 		int error;
10340Sstevel@tonic-gate 		TRANS_BEGIN_SYNC(ufsvfsp, TOP_WRITE_SYNC, resv, error);
10350Sstevel@tonic-gate 		ASSERT(!error);
10360Sstevel@tonic-gate 	} else {
10370Sstevel@tonic-gate 		TRANS_BEGIN_ASYNC(ufsvfsp, TOP_WRITE, resv);
10380Sstevel@tonic-gate 	}
10390Sstevel@tonic-gate 	rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
10400Sstevel@tonic-gate 	rw_enter(&ip->i_contents, RW_WRITER);
10410Sstevel@tonic-gate 	/*
10420Sstevel@tonic-gate 	 * Error during EOT (probably device error while writing commit rec)
10430Sstevel@tonic-gate 	 */
10440Sstevel@tonic-gate 	if (err)
10450Sstevel@tonic-gate 		return (err);
10460Sstevel@tonic-gate 	goto again;
10470Sstevel@tonic-gate }
1048