xref: /onnv-gate/usr/src/uts/common/fs/ufs/ufs_bmap.c (revision 12197:daa7ff6bdb35)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
54454Smishra  * Common Development and Distribution License (the "License").
64454Smishra  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
22*12197SJim.Rice@Sun.COM  * Copyright (c) 1983, 2010, Oracle and/or its affiliates. All rights reserved.
230Sstevel@tonic-gate  */
240Sstevel@tonic-gate 
250Sstevel@tonic-gate /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
260Sstevel@tonic-gate /*	  All Rights Reserved  	*/
270Sstevel@tonic-gate 
280Sstevel@tonic-gate /*
290Sstevel@tonic-gate  * University Copyright- Copyright (c) 1982, 1986, 1988
300Sstevel@tonic-gate  * The Regents of the University of California
310Sstevel@tonic-gate  * All Rights Reserved
320Sstevel@tonic-gate  *
330Sstevel@tonic-gate  * University Acknowledgment- Portions of this document are derived from
340Sstevel@tonic-gate  * software developed by the University of California, Berkeley, and its
350Sstevel@tonic-gate  * contributors.
360Sstevel@tonic-gate  */
370Sstevel@tonic-gate 
380Sstevel@tonic-gate 
390Sstevel@tonic-gate #include <sys/types.h>
400Sstevel@tonic-gate #include <sys/t_lock.h>
410Sstevel@tonic-gate #include <sys/param.h>
420Sstevel@tonic-gate #include <sys/systm.h>
430Sstevel@tonic-gate #include <sys/signal.h>
440Sstevel@tonic-gate #include <sys/user.h>
450Sstevel@tonic-gate #include <sys/vnode.h>
460Sstevel@tonic-gate #include <sys/buf.h>
470Sstevel@tonic-gate #include <sys/disp.h>
480Sstevel@tonic-gate #include <sys/proc.h>
490Sstevel@tonic-gate #include <sys/conf.h>
500Sstevel@tonic-gate #include <sys/fs/ufs_inode.h>
510Sstevel@tonic-gate #include <sys/fs/ufs_fs.h>
520Sstevel@tonic-gate #include <sys/fs/ufs_quota.h>
530Sstevel@tonic-gate #include <sys/fs/ufs_trans.h>
540Sstevel@tonic-gate #include <sys/fs/ufs_bio.h>
550Sstevel@tonic-gate #include <vm/seg.h>
560Sstevel@tonic-gate #include <sys/errno.h>
570Sstevel@tonic-gate #include <sys/sysmacros.h>
580Sstevel@tonic-gate #include <sys/vfs.h>
590Sstevel@tonic-gate #include <sys/debug.h>
600Sstevel@tonic-gate #include <sys/kmem.h>
61923Ssdebnath #include <sys/cmn_err.h>
620Sstevel@tonic-gate 
630Sstevel@tonic-gate /*
640Sstevel@tonic-gate  * This structure is used to track blocks as we allocate them, so that
650Sstevel@tonic-gate  * we can free them if we encounter an error during allocation.  We
660Sstevel@tonic-gate  * keep track of five pieces of information for each allocated block:
670Sstevel@tonic-gate  *   - The number of the newly allocated block
680Sstevel@tonic-gate  *   - The size of the block (lets us deal with fragments if we want)
690Sstevel@tonic-gate  *   - The number of the block containing a pointer to it; or whether
700Sstevel@tonic-gate  *     the pointer is in the inode
710Sstevel@tonic-gate  *   - The offset within the block (or inode) containing a pointer to it.
720Sstevel@tonic-gate  *   - A flag indicating the usage of the block.  (Logging needs to know
730Sstevel@tonic-gate  *     this to avoid overwriting a data block if it was previously used
740Sstevel@tonic-gate  *     for metadata.)
750Sstevel@tonic-gate  */
760Sstevel@tonic-gate 
770Sstevel@tonic-gate enum ufs_owner_type {
780Sstevel@tonic-gate 	ufs_no_owner,		/* Owner has not yet been updated */
790Sstevel@tonic-gate 	ufs_inode_direct,	/* Listed in inode's direct block table */
800Sstevel@tonic-gate 	ufs_inode_indirect,	/* Listed in inode's indirect block table */
810Sstevel@tonic-gate 	ufs_indirect_block	/* Listed in an indirect block */
820Sstevel@tonic-gate };
830Sstevel@tonic-gate 
840Sstevel@tonic-gate struct ufs_allocated_block {
850Sstevel@tonic-gate 	daddr_t this_block;	    /* Number of this block */
860Sstevel@tonic-gate 	off_t block_size;	    /* Size of this block, in bytes */
870Sstevel@tonic-gate 	enum ufs_owner_type owner;  /* Who points to this block? */
880Sstevel@tonic-gate 	daddr_t owner_block;	    /* Number of the owning block */
890Sstevel@tonic-gate 	uint_t owner_offset;	    /* Offset within that block or inode */
900Sstevel@tonic-gate 	int usage_flags;	    /* Usage flags, as expected by free() */
910Sstevel@tonic-gate };
920Sstevel@tonic-gate 
930Sstevel@tonic-gate 
940Sstevel@tonic-gate static int findextent(struct fs *fs, daddr32_t *sbp, int n, int *lenp,
950Sstevel@tonic-gate 		int maxtrans);
960Sstevel@tonic-gate 
970Sstevel@tonic-gate static void ufs_undo_allocation(inode_t *ip, int block_count,
980Sstevel@tonic-gate 	struct ufs_allocated_block table[], int inode_sector_adjust);
990Sstevel@tonic-gate 
1000Sstevel@tonic-gate /*
1010Sstevel@tonic-gate  * Find the extent and the matching block number.
1020Sstevel@tonic-gate  *
1030Sstevel@tonic-gate  * bsize > PAGESIZE
1040Sstevel@tonic-gate  *	boff indicates that we want a page in the middle
1050Sstevel@tonic-gate  *	min expression is supposed to make sure no extra page[s] after EOF
1060Sstevel@tonic-gate  * PAGESIZE >= bsize
1070Sstevel@tonic-gate  *	we assume that a page is a multiple of bsize, i.e.,
1080Sstevel@tonic-gate  *	boff always == 0
1090Sstevel@tonic-gate  *
1100Sstevel@tonic-gate  * We always return a length that is suitable for a disk transfer.
1110Sstevel@tonic-gate  */
1120Sstevel@tonic-gate #define	DOEXTENT(fs, lbn, boff, bnp, lenp, size, tblp, n, chkfrag, maxtrans) {\
1130Sstevel@tonic-gate 	register daddr32_t *dp = (tblp);				\
1140Sstevel@tonic-gate 	register int _chkfrag = chkfrag; /* for lint. sigh */		\
1150Sstevel@tonic-gate 									\
1160Sstevel@tonic-gate 	if (*dp == 0) {							\
1170Sstevel@tonic-gate 		*(bnp) = UFS_HOLE;					\
1180Sstevel@tonic-gate 	} else {							\
1190Sstevel@tonic-gate 		register int len;					\
1200Sstevel@tonic-gate 									\
1210Sstevel@tonic-gate 		len = findextent(fs, dp, (int)(n), lenp, maxtrans) << 	\
1220Sstevel@tonic-gate 			(fs)->fs_bshift; 				\
1230Sstevel@tonic-gate 		if (_chkfrag) {						\
1240Sstevel@tonic-gate 			register u_offset_t tmp;			\
1250Sstevel@tonic-gate 									\
1260Sstevel@tonic-gate 			tmp = fragroundup((fs), size) -			\
1270Sstevel@tonic-gate 			    (((u_offset_t)lbn) << fs->fs_bshift);	\
1280Sstevel@tonic-gate 			len = (int)MIN(tmp, len);			\
1290Sstevel@tonic-gate 		}							\
1300Sstevel@tonic-gate 		len -= (boff);						\
1310Sstevel@tonic-gate 		if (len <= 0) {						\
1320Sstevel@tonic-gate 			*(bnp) = UFS_HOLE;				\
1330Sstevel@tonic-gate 		} else {						\
1340Sstevel@tonic-gate 			*(bnp) = fsbtodb(fs, *dp) + btodb(boff);	\
1350Sstevel@tonic-gate 			*(lenp) = len;					\
1360Sstevel@tonic-gate 		}							\
1370Sstevel@tonic-gate 	}								\
1380Sstevel@tonic-gate }
1390Sstevel@tonic-gate 
1400Sstevel@tonic-gate /*
1410Sstevel@tonic-gate  * The maximum supported file size is actually somewhat less that 1
1420Sstevel@tonic-gate  * terabyte.  This is because the total number of blocks used for the
1430Sstevel@tonic-gate  * file and its metadata must fit into the ic_blocks field of the
1440Sstevel@tonic-gate  * inode, which is a signed 32-bit quantity.  The metadata allocated
1450Sstevel@tonic-gate  * for a file (that is, the single, double, and triple indirect blocks
1460Sstevel@tonic-gate  * used to reference the file blocks) is actually quite small,
1470Sstevel@tonic-gate  * but just to make sure, we check for overflow in the ic_blocks
1480Sstevel@tonic-gate  * ic_blocks fields for all files whose total block count is
1490Sstevel@tonic-gate  * within 1 GB of a terabyte.  VERYLARGEFILESIZE below is the number of
1500Sstevel@tonic-gate  * 512-byte blocks in a terabyte (2^31), minus the number of 512-byte blocks
1510Sstevel@tonic-gate  * in a gigabyte (2^21).  We only check for overflow in the ic_blocks
1520Sstevel@tonic-gate  * field if the number of blocks currently allocated to the file is
1530Sstevel@tonic-gate  * greater than VERYLARGEFILESIZE.
1540Sstevel@tonic-gate  *
1550Sstevel@tonic-gate  * Note that file "size" is the not the same as file "length".  A
1560Sstevel@tonic-gate  * file's "size" is the number of blocks allocated to it.  A file's
1570Sstevel@tonic-gate  * "length" is the maximum offset in the file.  A UFS FILE can have a
1580Sstevel@tonic-gate  * length of a terabyte, but the size is limited to somewhat less than
1590Sstevel@tonic-gate  * a terabyte, as described above.
1600Sstevel@tonic-gate  */
1610Sstevel@tonic-gate #define	VERYLARGEFILESIZE	0x7FE00000
1620Sstevel@tonic-gate 
1630Sstevel@tonic-gate /*
164923Ssdebnath  * bmap{read,write} define the structure of file system storage by mapping
1650Sstevel@tonic-gate  * a logical offset in a file to a physical block number on the device.
1660Sstevel@tonic-gate  * It should be called with a locked inode when allocation is to be
167923Ssdebnath  * done (bmap_write).  Note this strangeness: bmap_write is always called from
1680Sstevel@tonic-gate  * getpage(), not putpage(), since getpage() is where all the allocation
1690Sstevel@tonic-gate  * is done.
1700Sstevel@tonic-gate  *
171923Ssdebnath  * S_READ, S_OTHER -> bmap_read; S_WRITE -> bmap_write.
1720Sstevel@tonic-gate  *
1730Sstevel@tonic-gate  * NOTICE: the block number returned is the disk block number, not the
1740Sstevel@tonic-gate  * file system block number.  All the worries about block offsets and
1750Sstevel@tonic-gate  * page/block sizes are hidden inside of bmap.  Well, not quite,
1760Sstevel@tonic-gate  * unfortunately.  It's impossible to find one place to hide all this
1770Sstevel@tonic-gate  * mess.  There are 3 cases:
1780Sstevel@tonic-gate  *
1790Sstevel@tonic-gate  * PAGESIZE < bsize
1800Sstevel@tonic-gate  *	In this case, the {get,put}page routines will attempt to align to
1810Sstevel@tonic-gate  *	a file system block boundry (XXX - maybe this is a mistake?).  Since
1820Sstevel@tonic-gate  *	the kluster routines may be out of memory, we don't always get all
1830Sstevel@tonic-gate  *	the pages we wanted.  If we called bmap first, to find out how much
1840Sstevel@tonic-gate  *	to kluster, we handed in the block aligned offset.  If we didn't get
1850Sstevel@tonic-gate  *	all the pages, we have to chop off the amount we didn't get from the
1860Sstevel@tonic-gate  *	amount handed back by bmap.
1870Sstevel@tonic-gate  *
1880Sstevel@tonic-gate  * PAGESIZE == bsize
1890Sstevel@tonic-gate  *	Life is quite pleasant here, no extra work needed, mainly because we
1900Sstevel@tonic-gate  *	(probably?) won't kluster backwards, just forwards.
1910Sstevel@tonic-gate  *
1920Sstevel@tonic-gate  * PAGESIZE > bsize
1930Sstevel@tonic-gate  *	This one has a different set of problems, specifically, we may have to
1940Sstevel@tonic-gate  *	do N reads to fill one page.  Let us hope that Sun will stay with small
1950Sstevel@tonic-gate  *	pages.
1960Sstevel@tonic-gate  *
1970Sstevel@tonic-gate  * Returns 0 on success, or a non-zero errno if an error occurs.
1980Sstevel@tonic-gate  *
1990Sstevel@tonic-gate  * TODO
2000Sstevel@tonic-gate  *	LMXXX - add a bmap cache.  This could be a couple of extents in the
2010Sstevel@tonic-gate  *	inode.  Two is nice for PAGESIZE > bsize.
2020Sstevel@tonic-gate  */
2030Sstevel@tonic-gate 
2040Sstevel@tonic-gate int
bmap_read(struct inode * ip,u_offset_t off,daddr_t * bnp,int * lenp)2050Sstevel@tonic-gate bmap_read(struct inode *ip, u_offset_t off, daddr_t *bnp, int *lenp)
2060Sstevel@tonic-gate {
2070Sstevel@tonic-gate 	daddr_t lbn;
2080Sstevel@tonic-gate 	ufsvfs_t *ufsvfsp = ip->i_ufsvfs;
2090Sstevel@tonic-gate 	struct	fs *fs = ufsvfsp->vfs_fs;
2100Sstevel@tonic-gate 	struct	buf *bp;
2110Sstevel@tonic-gate 	int	i, j, boff;
2120Sstevel@tonic-gate 	int	shft;			/* we maintain sh = 1 << shft */
2130Sstevel@tonic-gate 	daddr_t	ob, nb, tbn;
2140Sstevel@tonic-gate 	daddr32_t *bap;
2150Sstevel@tonic-gate 	int	nindirshift, nindiroffset;
2160Sstevel@tonic-gate 
2170Sstevel@tonic-gate 	ASSERT(RW_LOCK_HELD(&ip->i_contents));
2180Sstevel@tonic-gate 	lbn = (daddr_t)lblkno(fs, off);
2190Sstevel@tonic-gate 	boff = (int)blkoff(fs, off);
2200Sstevel@tonic-gate 	if (lbn < 0)
2210Sstevel@tonic-gate 		return (EFBIG);
2220Sstevel@tonic-gate 
2230Sstevel@tonic-gate 	/*
2240Sstevel@tonic-gate 	 * The first NDADDR blocks are direct blocks.
2250Sstevel@tonic-gate 	 */
2260Sstevel@tonic-gate 	if (lbn < NDADDR) {
2270Sstevel@tonic-gate 		DOEXTENT(fs, lbn, boff, bnp, lenp,
2280Sstevel@tonic-gate 		    ip->i_size, &ip->i_db[lbn], NDADDR - lbn, 1,
2294662Sfrankho 		    ufsvfsp->vfs_iotransz);
2300Sstevel@tonic-gate 		return (0);
2310Sstevel@tonic-gate 	}
2320Sstevel@tonic-gate 
2330Sstevel@tonic-gate 	nindirshift = ufsvfsp->vfs_nindirshift;
2340Sstevel@tonic-gate 	nindiroffset = ufsvfsp->vfs_nindiroffset;
2350Sstevel@tonic-gate 	/*
2360Sstevel@tonic-gate 	 * Determine how many levels of indirection.
2370Sstevel@tonic-gate 	 */
2380Sstevel@tonic-gate 	shft = 0;				/* sh = 1 */
2390Sstevel@tonic-gate 	tbn = lbn - NDADDR;
2400Sstevel@tonic-gate 	for (j = NIADDR; j > 0; j--) {
2410Sstevel@tonic-gate 		longlong_t	sh;
2420Sstevel@tonic-gate 
2430Sstevel@tonic-gate 		shft += nindirshift;		/* sh *= nindir */
2440Sstevel@tonic-gate 		sh = 1LL << shft;
2450Sstevel@tonic-gate 		if (tbn < sh)
2460Sstevel@tonic-gate 			break;
2470Sstevel@tonic-gate 		tbn -= sh;
2480Sstevel@tonic-gate 	}
2490Sstevel@tonic-gate 	if (j == 0)
2500Sstevel@tonic-gate 		return (EFBIG);
2510Sstevel@tonic-gate 
2520Sstevel@tonic-gate 	/*
2530Sstevel@tonic-gate 	 * Fetch the first indirect block.
2540Sstevel@tonic-gate 	 */
2550Sstevel@tonic-gate 	nb = ip->i_ib[NIADDR - j];
2560Sstevel@tonic-gate 	if (nb == 0) {
2570Sstevel@tonic-gate 		*bnp = UFS_HOLE;
2580Sstevel@tonic-gate 		return (0);
2590Sstevel@tonic-gate 	}
2600Sstevel@tonic-gate 
2610Sstevel@tonic-gate 	/*
2620Sstevel@tonic-gate 	 * Fetch through the indirect blocks.
2630Sstevel@tonic-gate 	 */
2640Sstevel@tonic-gate 	for (; j <= NIADDR; j++) {
2650Sstevel@tonic-gate 		ob = nb;
2660Sstevel@tonic-gate 		bp = UFS_BREAD(ufsvfsp,
2674662Sfrankho 		    ip->i_dev, fsbtodb(fs, ob), fs->fs_bsize);
2680Sstevel@tonic-gate 		if (bp->b_flags & B_ERROR) {
2690Sstevel@tonic-gate 			brelse(bp);
2700Sstevel@tonic-gate 			return (EIO);
2710Sstevel@tonic-gate 		}
2720Sstevel@tonic-gate 		bap = bp->b_un.b_daddr;
2730Sstevel@tonic-gate 
2740Sstevel@tonic-gate 		ASSERT(!ufs_indir_badblock(ip, bap));
2750Sstevel@tonic-gate 
2760Sstevel@tonic-gate 		shft -= nindirshift;		/* sh / nindir */
2770Sstevel@tonic-gate 		i = (tbn >> shft) & nindiroffset; /* (tbn / sh) % nindir */
2780Sstevel@tonic-gate 		nb = bap[i];
2790Sstevel@tonic-gate 		if (nb == 0) {
2800Sstevel@tonic-gate 			*bnp = UFS_HOLE;
2810Sstevel@tonic-gate 			brelse(bp);
2820Sstevel@tonic-gate 			return (0);
2830Sstevel@tonic-gate 		}
2840Sstevel@tonic-gate 		if (j != NIADDR)
2850Sstevel@tonic-gate 			brelse(bp);
2860Sstevel@tonic-gate 	}
2870Sstevel@tonic-gate 	DOEXTENT(fs, lbn, boff, bnp, lenp, ip->i_size, &bap[i],
2880Sstevel@tonic-gate 	    MIN(NINDIR(fs) - i, (daddr_t)lblkno(fs, ip->i_size - 1) - lbn + 1),
2894662Sfrankho 	    0, ufsvfsp->vfs_iotransz);
2900Sstevel@tonic-gate 	brelse(bp);
2910Sstevel@tonic-gate 	return (0);
2920Sstevel@tonic-gate }
2930Sstevel@tonic-gate 
2940Sstevel@tonic-gate /*
295923Ssdebnath  * See bmap_read for general notes.
2960Sstevel@tonic-gate  *
2970Sstevel@tonic-gate  * The block must be at least size bytes and will be extended or
298923Ssdebnath  * allocated as needed.  If alloc_type is of type BI_ALLOC_ONLY, then bmap
299923Ssdebnath  * will not create any in-core pages that correspond to the new disk allocation.
300923Ssdebnath  * If alloc_type is of BI_FALLOCATE, blocks will be stored as (-1) * block addr
301923Ssdebnath  * and security is maintained b/c upon reading a negative block number pages
302923Ssdebnath  * are zeroed. For all other allocation types (BI_NORMAL) the in-core pages will
303923Ssdebnath  * be created and initialized as needed.
3040Sstevel@tonic-gate  *
3050Sstevel@tonic-gate  * Returns 0 on success, or a non-zero errno if an error occurs.
3060Sstevel@tonic-gate  */
3070Sstevel@tonic-gate int
bmap_write(struct inode * ip,u_offset_t off,int size,enum bi_type alloc_type,daddr_t * allocblk,struct cred * cr)308923Ssdebnath bmap_write(struct inode	*ip, u_offset_t	off, int size,
309923Ssdebnath     enum bi_type alloc_type, daddr_t *allocblk, struct cred *cr)
3100Sstevel@tonic-gate {
3110Sstevel@tonic-gate 	struct	fs *fs;
3120Sstevel@tonic-gate 	struct	buf *bp;
3130Sstevel@tonic-gate 	int	i;
3140Sstevel@tonic-gate 	struct	buf *nbp;
3150Sstevel@tonic-gate 	int	j;
3160Sstevel@tonic-gate 	int	shft;				/* we maintain sh = 1 << shft */
3170Sstevel@tonic-gate 	daddr_t	ob, nb, pref, lbn, llbn, tbn;
3180Sstevel@tonic-gate 	daddr32_t *bap;
3190Sstevel@tonic-gate 	struct	vnode *vp = ITOV(ip);
3200Sstevel@tonic-gate 	long	bsize = VBSIZE(vp);
3210Sstevel@tonic-gate 	long	osize, nsize;
3220Sstevel@tonic-gate 	int	issync, metaflag, isdirquota;
3230Sstevel@tonic-gate 	int	err;
3240Sstevel@tonic-gate 	dev_t	dev;
3250Sstevel@tonic-gate 	struct	fbuf *fbp;
3260Sstevel@tonic-gate 	int	nindirshift;
3270Sstevel@tonic-gate 	int	nindiroffset;
3280Sstevel@tonic-gate 	struct	ufsvfs	*ufsvfsp;
3290Sstevel@tonic-gate 	int	added_sectors;		/* sectors added to this inode */
3300Sstevel@tonic-gate 	int	alloced_blocks;		/* fs blocks newly allocated */
3310Sstevel@tonic-gate 	struct  ufs_allocated_block undo_table[NIADDR+1];
3320Sstevel@tonic-gate 	int	verylargefile = 0;
3330Sstevel@tonic-gate 
3340Sstevel@tonic-gate 	ASSERT(RW_WRITE_HELD(&ip->i_contents));
3350Sstevel@tonic-gate 
336923Ssdebnath 	if (allocblk)
337923Ssdebnath 		*allocblk = 0;
338923Ssdebnath 
3390Sstevel@tonic-gate 	ufsvfsp = ip->i_ufsvfs;
3400Sstevel@tonic-gate 	fs = ufsvfsp->vfs_bufp->b_un.b_fs;
3410Sstevel@tonic-gate 	lbn = (daddr_t)lblkno(fs, off);
3420Sstevel@tonic-gate 	if (lbn < 0)
3430Sstevel@tonic-gate 		return (EFBIG);
3440Sstevel@tonic-gate 	if (ip->i_blocks >= VERYLARGEFILESIZE)
3450Sstevel@tonic-gate 		verylargefile = 1;
3460Sstevel@tonic-gate 	llbn = (daddr_t)((ip->i_size) ? lblkno(fs, ip->i_size - 1) : 0);
3470Sstevel@tonic-gate 	metaflag = isdirquota = 0;
3480Sstevel@tonic-gate 	if (((ip->i_mode & IFMT) == IFDIR) ||
3490Sstevel@tonic-gate 	    ((ip->i_mode & IFMT) == IFATTRDIR))
3500Sstevel@tonic-gate 		isdirquota = metaflag = I_DIR;
3510Sstevel@tonic-gate 	else if ((ip->i_mode & IFMT) == IFSHAD)
3520Sstevel@tonic-gate 		metaflag = I_SHAD;
3530Sstevel@tonic-gate 	else if (ip->i_ufsvfs->vfs_qinod == ip)
3540Sstevel@tonic-gate 		isdirquota = metaflag = I_QUOTA;
3550Sstevel@tonic-gate 
3560Sstevel@tonic-gate 	issync = ((ip->i_flag & ISYNC) != 0);
3570Sstevel@tonic-gate 
3580Sstevel@tonic-gate 	if (isdirquota || issync) {
359923Ssdebnath 		alloc_type = BI_NORMAL;	/* make sure */
3600Sstevel@tonic-gate 	}
3610Sstevel@tonic-gate 
3620Sstevel@tonic-gate 	/*
3630Sstevel@tonic-gate 	 * If the next write will extend the file into a new block,
3640Sstevel@tonic-gate 	 * and the file is currently composed of a fragment
3650Sstevel@tonic-gate 	 * this fragment has to be extended to be a full block.
3660Sstevel@tonic-gate 	 */
3670Sstevel@tonic-gate 	if (llbn < NDADDR && llbn < lbn && (ob = ip->i_db[llbn]) != 0) {
3680Sstevel@tonic-gate 		osize = blksize(fs, ip, llbn);
3690Sstevel@tonic-gate 		if (osize < bsize && osize > 0) {
3700Sstevel@tonic-gate 			/*
3710Sstevel@tonic-gate 			 * Check to see if doing this will make the file too
3720Sstevel@tonic-gate 			 * big.  Only check if we are dealing with a very
3730Sstevel@tonic-gate 			 * large file.
3740Sstevel@tonic-gate 			 */
3750Sstevel@tonic-gate 			if (verylargefile == 1) {
3760Sstevel@tonic-gate 				if (((unsigned)ip->i_blocks +
3770Sstevel@tonic-gate 				    btodb(bsize - osize)) > INT_MAX) {
3780Sstevel@tonic-gate 					return (EFBIG);
3790Sstevel@tonic-gate 				}
3800Sstevel@tonic-gate 			}
3810Sstevel@tonic-gate 			/*
3820Sstevel@tonic-gate 			 * Make sure we have all needed pages setup correctly.
3830Sstevel@tonic-gate 			 *
3840Sstevel@tonic-gate 			 * We pass S_OTHER to fbread here because we want
3850Sstevel@tonic-gate 			 * an exclusive lock on the page in question
3860Sstevel@tonic-gate 			 * (see ufs_getpage). I/O to the old block location
3870Sstevel@tonic-gate 			 * may still be in progress and we are about to free
3880Sstevel@tonic-gate 			 * the old block. We don't want anyone else to get
3890Sstevel@tonic-gate 			 * a hold of the old block once we free it until
3900Sstevel@tonic-gate 			 * the I/O is complete.
3910Sstevel@tonic-gate 			 */
3924662Sfrankho 			err =
3934662Sfrankho 			    fbread(ITOV(ip), ((offset_t)llbn << fs->fs_bshift),
3944662Sfrankho 			    (uint_t)bsize, S_OTHER, &fbp);
3950Sstevel@tonic-gate 			if (err)
3960Sstevel@tonic-gate 				return (err);
3970Sstevel@tonic-gate 			pref = blkpref(ip, llbn, (int)llbn, &ip->i_db[0]);
3980Sstevel@tonic-gate 			err = realloccg(ip, ob, pref, (int)osize, (int)bsize,
3994662Sfrankho 			    &nb, cr);
4000Sstevel@tonic-gate 			if (err) {
4010Sstevel@tonic-gate 				if (fbp)
4020Sstevel@tonic-gate 					fbrelse(fbp, S_OTHER);
4030Sstevel@tonic-gate 				return (err);
4040Sstevel@tonic-gate 			}
4050Sstevel@tonic-gate 			ASSERT(!ufs_badblock(ip, nb));
4060Sstevel@tonic-gate 
4070Sstevel@tonic-gate 			/*
4080Sstevel@tonic-gate 			 * Update the inode before releasing the
4090Sstevel@tonic-gate 			 * lock on the page. If we released the page
4100Sstevel@tonic-gate 			 * lock first, the data could be written to it's
4110Sstevel@tonic-gate 			 * old address and then destroyed.
4120Sstevel@tonic-gate 			 */
4130Sstevel@tonic-gate 			TRANS_MATA_ALLOC(ufsvfsp, ip, nb, bsize, 0);
4140Sstevel@tonic-gate 			ip->i_db[llbn] = nb;
4150Sstevel@tonic-gate 			UFS_SET_ISIZE(((u_offset_t)(llbn + 1)) << fs->fs_bshift,
4160Sstevel@tonic-gate 			    ip);
4170Sstevel@tonic-gate 			ip->i_blocks += btodb(bsize - osize);
4180Sstevel@tonic-gate 			ASSERT((unsigned)ip->i_blocks <= INT_MAX);
4190Sstevel@tonic-gate 			TRANS_INODE(ufsvfsp, ip);
4200Sstevel@tonic-gate 			ip->i_flag |= IUPD | ICHG | IATTCHG;
421923Ssdebnath 
4220Sstevel@tonic-gate 			/* Caller is responsible for updating i_seq */
4230Sstevel@tonic-gate 			/*
4240Sstevel@tonic-gate 			 * Don't check metaflag here, directories won't do this
4250Sstevel@tonic-gate 			 *
4260Sstevel@tonic-gate 			 */
4270Sstevel@tonic-gate 			if (issync) {
4280Sstevel@tonic-gate 				(void) ufs_fbiwrite(fbp, ip, nb, fs->fs_fsize);
4290Sstevel@tonic-gate 			} else {
4300Sstevel@tonic-gate 				ASSERT(fbp);
4310Sstevel@tonic-gate 				fbrelse(fbp, S_WRITE);
4320Sstevel@tonic-gate 			}
4330Sstevel@tonic-gate 
4340Sstevel@tonic-gate 			if (nb != ob) {
4350Sstevel@tonic-gate 				(void) free(ip, ob, (off_t)osize, metaflag);
4360Sstevel@tonic-gate 			}
4370Sstevel@tonic-gate 		}
4380Sstevel@tonic-gate 	}
4390Sstevel@tonic-gate 
4400Sstevel@tonic-gate 	/*
4410Sstevel@tonic-gate 	 * The first NDADDR blocks are direct blocks.
4420Sstevel@tonic-gate 	 */
4430Sstevel@tonic-gate 	if (lbn < NDADDR) {
4440Sstevel@tonic-gate 		nb = ip->i_db[lbn];
4450Sstevel@tonic-gate 		if (nb == 0 ||
4460Sstevel@tonic-gate 		    ip->i_size < ((u_offset_t)(lbn + 1)) << fs->fs_bshift) {
4470Sstevel@tonic-gate 			if (nb != 0) {
4480Sstevel@tonic-gate 				/* consider need to reallocate a frag */
4490Sstevel@tonic-gate 				osize = fragroundup(fs, blkoff(fs, ip->i_size));
4500Sstevel@tonic-gate 				nsize = fragroundup(fs, size);
4510Sstevel@tonic-gate 				if (nsize <= osize)
4520Sstevel@tonic-gate 					goto gotit;
4530Sstevel@tonic-gate 				/*
4540Sstevel@tonic-gate 				 * Check to see if doing this will make the
4550Sstevel@tonic-gate 				 * file too big.  Only check if we are dealing
4560Sstevel@tonic-gate 				 * with a very large file.
4570Sstevel@tonic-gate 				 */
4580Sstevel@tonic-gate 				if (verylargefile == 1) {
4590Sstevel@tonic-gate 					if (((unsigned)ip->i_blocks +
4600Sstevel@tonic-gate 					    btodb(nsize - osize)) > INT_MAX) {
4610Sstevel@tonic-gate 						return (EFBIG);
4620Sstevel@tonic-gate 					}
4630Sstevel@tonic-gate 				}
4640Sstevel@tonic-gate 				/*
465923Ssdebnath 				 * need to re-allocate a block or frag
4660Sstevel@tonic-gate 				 */
4670Sstevel@tonic-gate 				ob = nb;
4680Sstevel@tonic-gate 				pref = blkpref(ip, lbn, (int)lbn,
4694662Sfrankho 				    &ip->i_db[0]);
4700Sstevel@tonic-gate 				err = realloccg(ip, ob, pref, (int)osize,
4714662Sfrankho 				    (int)nsize, &nb, cr);
4720Sstevel@tonic-gate 				if (err)
4730Sstevel@tonic-gate 					return (err);
474923Ssdebnath 				if (allocblk)
475923Ssdebnath 					*allocblk = nb;
4760Sstevel@tonic-gate 				ASSERT(!ufs_badblock(ip, nb));
4770Sstevel@tonic-gate 
4780Sstevel@tonic-gate 			} else {
4790Sstevel@tonic-gate 				/*
4800Sstevel@tonic-gate 				 * need to allocate a block or frag
4810Sstevel@tonic-gate 				 */
4820Sstevel@tonic-gate 				osize = 0;
4830Sstevel@tonic-gate 				if (ip->i_size <
4840Sstevel@tonic-gate 				    ((u_offset_t)(lbn + 1)) << fs->fs_bshift)
4850Sstevel@tonic-gate 					nsize = fragroundup(fs, size);
4860Sstevel@tonic-gate 				else
4870Sstevel@tonic-gate 					nsize = bsize;
4880Sstevel@tonic-gate 				/*
4890Sstevel@tonic-gate 				 * Check to see if doing this will make the
4900Sstevel@tonic-gate 				 * file too big.  Only check if we are dealing
4910Sstevel@tonic-gate 				 * with a very large file.
4920Sstevel@tonic-gate 				 */
4930Sstevel@tonic-gate 				if (verylargefile == 1) {
4940Sstevel@tonic-gate 					if (((unsigned)ip->i_blocks +
4950Sstevel@tonic-gate 					    btodb(nsize - osize)) > INT_MAX) {
4960Sstevel@tonic-gate 						return (EFBIG);
4970Sstevel@tonic-gate 					}
4980Sstevel@tonic-gate 				}
4990Sstevel@tonic-gate 				pref = blkpref(ip, lbn, (int)lbn, &ip->i_db[0]);
5000Sstevel@tonic-gate 				err = alloc(ip, pref, (int)nsize, &nb, cr);
5010Sstevel@tonic-gate 				if (err)
5020Sstevel@tonic-gate 					return (err);
503923Ssdebnath 				if (allocblk)
504923Ssdebnath 					*allocblk = nb;
5050Sstevel@tonic-gate 				ASSERT(!ufs_badblock(ip, nb));
5060Sstevel@tonic-gate 				ob = nb;
5070Sstevel@tonic-gate 			}
5080Sstevel@tonic-gate 
5090Sstevel@tonic-gate 			/*
5100Sstevel@tonic-gate 			 * Read old/create new zero pages
5110Sstevel@tonic-gate 			 */
5120Sstevel@tonic-gate 			fbp = NULL;
5130Sstevel@tonic-gate 			if (osize == 0) {
5140Sstevel@tonic-gate 				/*
5150Sstevel@tonic-gate 				 * mmap S_WRITE faults always enter here
5160Sstevel@tonic-gate 				 */
517923Ssdebnath 				/*
518923Ssdebnath 				 * We zero it if its also BI_FALLOCATE, but
519923Ssdebnath 				 * only for direct blocks!
520923Ssdebnath 				 */
521923Ssdebnath 				if (alloc_type == BI_NORMAL ||
522923Ssdebnath 				    alloc_type == BI_FALLOCATE ||
523923Ssdebnath 				    P2ROUNDUP_TYPED(size,
5240Sstevel@tonic-gate 				    PAGESIZE, u_offset_t) < nsize) {
5250Sstevel@tonic-gate 					/* fbzero doesn't cause a pagefault */
5260Sstevel@tonic-gate 					fbzero(ITOV(ip),
5270Sstevel@tonic-gate 					    ((offset_t)lbn << fs->fs_bshift),
5280Sstevel@tonic-gate 					    (uint_t)nsize, &fbp);
5290Sstevel@tonic-gate 				}
5300Sstevel@tonic-gate 			} else {
5310Sstevel@tonic-gate 				err = fbread(vp,
5320Sstevel@tonic-gate 				    ((offset_t)lbn << fs->fs_bshift),
5330Sstevel@tonic-gate 				    (uint_t)nsize, S_OTHER, &fbp);
5340Sstevel@tonic-gate 				if (err) {
5350Sstevel@tonic-gate 					if (nb != ob) {
5360Sstevel@tonic-gate 						(void) free(ip, nb,
5370Sstevel@tonic-gate 						    (off_t)nsize, metaflag);
5380Sstevel@tonic-gate 					} else {
5390Sstevel@tonic-gate 						(void) free(ip,
5400Sstevel@tonic-gate 						    ob + numfrags(fs, osize),
5410Sstevel@tonic-gate 						    (off_t)(nsize - osize),
5420Sstevel@tonic-gate 						    metaflag);
5430Sstevel@tonic-gate 					}
5440Sstevel@tonic-gate 					ASSERT(nsize >= osize);
5450Sstevel@tonic-gate 					(void) chkdq(ip,
5464662Sfrankho 					    -(long)btodb(nsize - osize),
5474662Sfrankho 					    0, cr, (char **)NULL,
5484662Sfrankho 					    (size_t *)NULL);
5490Sstevel@tonic-gate 					return (err);
5500Sstevel@tonic-gate 				}
5510Sstevel@tonic-gate 			}
5520Sstevel@tonic-gate 			TRANS_MATA_ALLOC(ufsvfsp, ip, nb, nsize, 0);
5530Sstevel@tonic-gate 			ip->i_db[lbn] = nb;
5540Sstevel@tonic-gate 			ip->i_blocks += btodb(nsize - osize);
5550Sstevel@tonic-gate 			ASSERT((unsigned)ip->i_blocks <= INT_MAX);
5560Sstevel@tonic-gate 			TRANS_INODE(ufsvfsp, ip);
5570Sstevel@tonic-gate 			ip->i_flag |= IUPD | ICHG | IATTCHG;
558923Ssdebnath 
5590Sstevel@tonic-gate 			/* Caller is responsible for updating i_seq */
5600Sstevel@tonic-gate 
5610Sstevel@tonic-gate 			/*
5620Sstevel@tonic-gate 			 * Write directory and shadow blocks synchronously so
5630Sstevel@tonic-gate 			 * that they never appear with garbage in them on the
5640Sstevel@tonic-gate 			 * disk.
5650Sstevel@tonic-gate 			 *
5660Sstevel@tonic-gate 			 */
5670Sstevel@tonic-gate 			if (isdirquota && (ip->i_size ||
5680Sstevel@tonic-gate 			    TRANS_ISTRANS(ufsvfsp))) {
5690Sstevel@tonic-gate 			/*
5700Sstevel@tonic-gate 			 * XXX man not be necessary with harpy trans
5710Sstevel@tonic-gate 			 * bug id 1130055
5720Sstevel@tonic-gate 			 */
5730Sstevel@tonic-gate 				(void) ufs_fbiwrite(fbp, ip, nb, fs->fs_fsize);
5740Sstevel@tonic-gate 			} else if (fbp) {
5750Sstevel@tonic-gate 				fbrelse(fbp, S_WRITE);
5760Sstevel@tonic-gate 			}
5770Sstevel@tonic-gate 
5780Sstevel@tonic-gate 			if (nb != ob)
5790Sstevel@tonic-gate 				(void) free(ip, ob, (off_t)osize, metaflag);
5800Sstevel@tonic-gate 		}
5810Sstevel@tonic-gate gotit:
5820Sstevel@tonic-gate 		return (0);
5830Sstevel@tonic-gate 	}
5840Sstevel@tonic-gate 
5850Sstevel@tonic-gate 	added_sectors = alloced_blocks = 0;	/* No blocks alloced yet */
5860Sstevel@tonic-gate 
5870Sstevel@tonic-gate 	/*
5880Sstevel@tonic-gate 	 * Determine how many levels of indirection.
5890Sstevel@tonic-gate 	 */
5900Sstevel@tonic-gate 	nindirshift = ip->i_ufsvfs->vfs_nindirshift;
5910Sstevel@tonic-gate 	nindiroffset = ip->i_ufsvfs->vfs_nindiroffset;
5920Sstevel@tonic-gate 	pref = 0;
5930Sstevel@tonic-gate 	shft = 0;				/* sh = 1 */
5940Sstevel@tonic-gate 	tbn = lbn - NDADDR;
5950Sstevel@tonic-gate 	for (j = NIADDR; j > 0; j--) {
5960Sstevel@tonic-gate 		longlong_t	sh;
5970Sstevel@tonic-gate 
5980Sstevel@tonic-gate 		shft += nindirshift;		/* sh *= nindir */
5990Sstevel@tonic-gate 		sh = 1LL << shft;
6000Sstevel@tonic-gate 		if (tbn < sh)
6010Sstevel@tonic-gate 			break;
6020Sstevel@tonic-gate 		tbn -= sh;
6030Sstevel@tonic-gate 	}
6040Sstevel@tonic-gate 
6050Sstevel@tonic-gate 	if (j == 0)
6060Sstevel@tonic-gate 		return (EFBIG);
6070Sstevel@tonic-gate 
6080Sstevel@tonic-gate 	/*
6090Sstevel@tonic-gate 	 * Fetch the first indirect block.
6100Sstevel@tonic-gate 	 */
6110Sstevel@tonic-gate 	dev = ip->i_dev;
6120Sstevel@tonic-gate 	nb = ip->i_ib[NIADDR - j];
6130Sstevel@tonic-gate 	if (nb == 0) {
6140Sstevel@tonic-gate 		/*
6150Sstevel@tonic-gate 		 * Check to see if doing this will make the
6160Sstevel@tonic-gate 		 * file too big.  Only check if we are dealing
6170Sstevel@tonic-gate 		 * with a very large file.
6180Sstevel@tonic-gate 		 */
6190Sstevel@tonic-gate 		if (verylargefile == 1) {
6200Sstevel@tonic-gate 			if (((unsigned)ip->i_blocks + btodb(bsize))
6210Sstevel@tonic-gate 			    > INT_MAX) {
6220Sstevel@tonic-gate 				return (EFBIG);
6230Sstevel@tonic-gate 			}
6240Sstevel@tonic-gate 		}
6250Sstevel@tonic-gate 		/*
6260Sstevel@tonic-gate 		 * Need to allocate an indirect block.
6270Sstevel@tonic-gate 		 */
6280Sstevel@tonic-gate 		pref = blkpref(ip, lbn, 0, (daddr32_t *)0);
6290Sstevel@tonic-gate 		err = alloc(ip, pref, (int)bsize, &nb, cr);
6300Sstevel@tonic-gate 		if (err)
6310Sstevel@tonic-gate 			return (err);
6320Sstevel@tonic-gate 		TRANS_MATA_ALLOC(ufsvfsp, ip, nb, bsize, 1);
6330Sstevel@tonic-gate 		ASSERT(!ufs_badblock(ip, nb));
6340Sstevel@tonic-gate 
6350Sstevel@tonic-gate 		/*
6360Sstevel@tonic-gate 		 * Keep track of this allocation so we can undo it if we
6370Sstevel@tonic-gate 		 * get an error later.
6380Sstevel@tonic-gate 		 */
6390Sstevel@tonic-gate 
6400Sstevel@tonic-gate 		ASSERT(alloced_blocks <= NIADDR);
6410Sstevel@tonic-gate 
6420Sstevel@tonic-gate 		undo_table[alloced_blocks].this_block = nb;
6430Sstevel@tonic-gate 		undo_table[alloced_blocks].block_size = bsize;
6440Sstevel@tonic-gate 		undo_table[alloced_blocks].owner = ufs_no_owner;
6450Sstevel@tonic-gate 		undo_table[alloced_blocks].usage_flags = metaflag | I_IBLK;
6460Sstevel@tonic-gate 
6470Sstevel@tonic-gate 		alloced_blocks++;
6480Sstevel@tonic-gate 
6490Sstevel@tonic-gate 		/*
6500Sstevel@tonic-gate 		 * Write zero block synchronously so that
6510Sstevel@tonic-gate 		 * indirect blocks never point at garbage.
6520Sstevel@tonic-gate 		 */
6530Sstevel@tonic-gate 		bp = UFS_GETBLK(ufsvfsp, dev, fsbtodb(fs, nb), bsize);
6540Sstevel@tonic-gate 
6550Sstevel@tonic-gate 		clrbuf(bp);
6560Sstevel@tonic-gate 		/* XXX Maybe special-case this? */
6570Sstevel@tonic-gate 		TRANS_BUF(ufsvfsp, 0, bsize, bp, DT_ABZERO);
6580Sstevel@tonic-gate 		UFS_BWRITE2(ufsvfsp, bp);
6590Sstevel@tonic-gate 		if (bp->b_flags & B_ERROR) {
6600Sstevel@tonic-gate 			err = geterror(bp);
6610Sstevel@tonic-gate 			brelse(bp);
6620Sstevel@tonic-gate 			ufs_undo_allocation(ip, alloced_blocks,
6630Sstevel@tonic-gate 			    undo_table, added_sectors);
6640Sstevel@tonic-gate 			return (err);
6650Sstevel@tonic-gate 		}
6660Sstevel@tonic-gate 		brelse(bp);
6670Sstevel@tonic-gate 
6680Sstevel@tonic-gate 		ip->i_ib[NIADDR - j] = nb;
6690Sstevel@tonic-gate 		added_sectors += btodb(bsize);
6700Sstevel@tonic-gate 		ip->i_blocks += btodb(bsize);
6710Sstevel@tonic-gate 		ASSERT((unsigned)ip->i_blocks <= INT_MAX);
6720Sstevel@tonic-gate 		TRANS_INODE(ufsvfsp, ip);
6730Sstevel@tonic-gate 		ip->i_flag |= IUPD | ICHG | IATTCHG;
6740Sstevel@tonic-gate 		/* Caller is responsible for updating i_seq */
6750Sstevel@tonic-gate 
6760Sstevel@tonic-gate 		/*
6770Sstevel@tonic-gate 		 * Update the 'undo table' now that we've linked this block
6780Sstevel@tonic-gate 		 * to an inode.
6790Sstevel@tonic-gate 		 */
6800Sstevel@tonic-gate 
6810Sstevel@tonic-gate 		undo_table[alloced_blocks-1].owner = ufs_inode_indirect;
6820Sstevel@tonic-gate 		undo_table[alloced_blocks-1].owner_offset = NIADDR - j;
6830Sstevel@tonic-gate 
6840Sstevel@tonic-gate 		/*
6850Sstevel@tonic-gate 		 * In the ISYNC case, wrip will notice that the block
6860Sstevel@tonic-gate 		 * count on the inode has changed and will be sure to
6870Sstevel@tonic-gate 		 * ufs_iupdat the inode at the end of wrip.
6880Sstevel@tonic-gate 		 */
6890Sstevel@tonic-gate 	}
6900Sstevel@tonic-gate 
6910Sstevel@tonic-gate 	/*
6920Sstevel@tonic-gate 	 * Fetch through the indirect blocks.
6930Sstevel@tonic-gate 	 */
6940Sstevel@tonic-gate 	for (; j <= NIADDR; j++) {
6950Sstevel@tonic-gate 		ob = nb;
6960Sstevel@tonic-gate 		bp = UFS_BREAD(ufsvfsp, ip->i_dev, fsbtodb(fs, ob), bsize);
6970Sstevel@tonic-gate 
6980Sstevel@tonic-gate 		if (bp->b_flags & B_ERROR) {
6990Sstevel@tonic-gate 			err = geterror(bp);
7000Sstevel@tonic-gate 			brelse(bp);
7010Sstevel@tonic-gate 			/*
7020Sstevel@tonic-gate 			 * Return any partial allocations.
7030Sstevel@tonic-gate 			 *
7040Sstevel@tonic-gate 			 * It is possible that we have not yet made any
7050Sstevel@tonic-gate 			 * allocations at this point (if this is the first
7060Sstevel@tonic-gate 			 * pass through the loop and we didn't have to
7070Sstevel@tonic-gate 			 * allocate the first indirect block, above).
7080Sstevel@tonic-gate 			 * In this case, alloced_blocks and added_sectors will
7090Sstevel@tonic-gate 			 * be zero, and ufs_undo_allocation will do nothing.
7100Sstevel@tonic-gate 			 */
7110Sstevel@tonic-gate 			ufs_undo_allocation(ip, alloced_blocks,
7120Sstevel@tonic-gate 			    undo_table, added_sectors);
7130Sstevel@tonic-gate 			return (err);
7140Sstevel@tonic-gate 		}
7150Sstevel@tonic-gate 		bap = bp->b_un.b_daddr;
7160Sstevel@tonic-gate 		shft -= nindirshift;		/* sh /= nindir */
7170Sstevel@tonic-gate 		i = (tbn >> shft) & nindiroffset; /* (tbn / sh) % nindir */
7180Sstevel@tonic-gate 		nb = bap[i];
719923Ssdebnath 
7200Sstevel@tonic-gate 		if (nb == 0) {
7210Sstevel@tonic-gate 			/*
7220Sstevel@tonic-gate 			 * Check to see if doing this will make the
7230Sstevel@tonic-gate 			 * file too big.  Only check if we are dealing
7240Sstevel@tonic-gate 			 * with a very large file.
7250Sstevel@tonic-gate 			 */
7260Sstevel@tonic-gate 			if (verylargefile == 1) {
7270Sstevel@tonic-gate 				if (((unsigned)ip->i_blocks + btodb(bsize))
7280Sstevel@tonic-gate 				    > INT_MAX) {
7290Sstevel@tonic-gate 					brelse(bp);
7300Sstevel@tonic-gate 					ufs_undo_allocation(ip, alloced_blocks,
7310Sstevel@tonic-gate 					    undo_table, added_sectors);
7320Sstevel@tonic-gate 					return (EFBIG);
7330Sstevel@tonic-gate 				}
7340Sstevel@tonic-gate 			}
7350Sstevel@tonic-gate 			if (pref == 0) {
7360Sstevel@tonic-gate 				if (j < NIADDR) {
7370Sstevel@tonic-gate 					/* Indirect block */
7380Sstevel@tonic-gate 					pref = blkpref(ip, lbn, 0,
7394662Sfrankho 					    (daddr32_t *)0);
7400Sstevel@tonic-gate 				} else {
7410Sstevel@tonic-gate 					/* Data block */
7420Sstevel@tonic-gate 					pref = blkpref(ip, lbn, i, &bap[0]);
7430Sstevel@tonic-gate 				}
7440Sstevel@tonic-gate 			}
7450Sstevel@tonic-gate 
7460Sstevel@tonic-gate 			/*
7470Sstevel@tonic-gate 			 * release "bp" buf to avoid deadlock (re-bread later)
7480Sstevel@tonic-gate 			 */
7490Sstevel@tonic-gate 			brelse(bp);
7500Sstevel@tonic-gate 
7510Sstevel@tonic-gate 			err = alloc(ip, pref, (int)bsize, &nb, cr);
7520Sstevel@tonic-gate 			if (err) {
7530Sstevel@tonic-gate 				/*
7540Sstevel@tonic-gate 				 * Return any partial allocations.
7550Sstevel@tonic-gate 				 */
7560Sstevel@tonic-gate 				ufs_undo_allocation(ip, alloced_blocks,
7570Sstevel@tonic-gate 				    undo_table, added_sectors);
7580Sstevel@tonic-gate 				return (err);
7590Sstevel@tonic-gate 			}
7600Sstevel@tonic-gate 
7610Sstevel@tonic-gate 			ASSERT(!ufs_badblock(ip, nb));
762923Ssdebnath 			ASSERT(alloced_blocks <= NIADDR);
7630Sstevel@tonic-gate 
764923Ssdebnath 			if (allocblk)
765923Ssdebnath 				*allocblk = nb;
7660Sstevel@tonic-gate 
7670Sstevel@tonic-gate 			undo_table[alloced_blocks].this_block = nb;
7680Sstevel@tonic-gate 			undo_table[alloced_blocks].block_size = bsize;
7690Sstevel@tonic-gate 			undo_table[alloced_blocks].owner = ufs_no_owner;
7700Sstevel@tonic-gate 			undo_table[alloced_blocks].usage_flags = metaflag |
7710Sstevel@tonic-gate 			    ((j < NIADDR) ? I_IBLK : 0);
7720Sstevel@tonic-gate 
7730Sstevel@tonic-gate 			alloced_blocks++;
7740Sstevel@tonic-gate 
7750Sstevel@tonic-gate 			if (j < NIADDR) {
7760Sstevel@tonic-gate 				TRANS_MATA_ALLOC(ufsvfsp, ip, nb, bsize, 1);
7770Sstevel@tonic-gate 				/*
7780Sstevel@tonic-gate 				 * Write synchronously so indirect
7790Sstevel@tonic-gate 				 * blocks never point at garbage.
7800Sstevel@tonic-gate 				 */
7810Sstevel@tonic-gate 				nbp = UFS_GETBLK(
7824662Sfrankho 				    ufsvfsp, dev, fsbtodb(fs, nb), bsize);
7830Sstevel@tonic-gate 
7840Sstevel@tonic-gate 				clrbuf(nbp);
7850Sstevel@tonic-gate 				/* XXX Maybe special-case this? */
7860Sstevel@tonic-gate 				TRANS_BUF(ufsvfsp, 0, bsize, nbp, DT_ABZERO);
7870Sstevel@tonic-gate 				UFS_BWRITE2(ufsvfsp, nbp);
7880Sstevel@tonic-gate 				if (nbp->b_flags & B_ERROR) {
7890Sstevel@tonic-gate 					err = geterror(nbp);
7900Sstevel@tonic-gate 					brelse(nbp);
7910Sstevel@tonic-gate 					/*
7920Sstevel@tonic-gate 					 * Return any partial
7930Sstevel@tonic-gate 					 * allocations.
7940Sstevel@tonic-gate 					 */
7950Sstevel@tonic-gate 					ufs_undo_allocation(ip,
7960Sstevel@tonic-gate 					    alloced_blocks,
7970Sstevel@tonic-gate 					    undo_table, added_sectors);
7980Sstevel@tonic-gate 					return (err);
7990Sstevel@tonic-gate 				}
8000Sstevel@tonic-gate 				brelse(nbp);
801923Ssdebnath 			} else if (alloc_type == BI_NORMAL ||
802923Ssdebnath 			    P2ROUNDUP_TYPED(size,
8030Sstevel@tonic-gate 			    PAGESIZE, u_offset_t) < bsize) {
8040Sstevel@tonic-gate 				TRANS_MATA_ALLOC(ufsvfsp, ip, nb, bsize, 0);
8050Sstevel@tonic-gate 				fbzero(ITOV(ip),
8060Sstevel@tonic-gate 				    ((offset_t)lbn << fs->fs_bshift),
8070Sstevel@tonic-gate 				    (uint_t)bsize, &fbp);
8080Sstevel@tonic-gate 
8090Sstevel@tonic-gate 				/*
8100Sstevel@tonic-gate 				 * Cases which we need to do a synchronous
8110Sstevel@tonic-gate 				 * write of the zeroed data pages:
8120Sstevel@tonic-gate 				 *
8130Sstevel@tonic-gate 				 * 1) If we are writing a directory then we
8140Sstevel@tonic-gate 				 * want to write synchronously so blocks in
8150Sstevel@tonic-gate 				 * directories never contain garbage.
8160Sstevel@tonic-gate 				 *
8170Sstevel@tonic-gate 				 * 2) If we are filling in a hole and the
8180Sstevel@tonic-gate 				 * indirect block is going to be synchronously
8190Sstevel@tonic-gate 				 * written back below we need to make sure
8200Sstevel@tonic-gate 				 * that the zeroes are written here before
8210Sstevel@tonic-gate 				 * the indirect block is updated so that if
8220Sstevel@tonic-gate 				 * we crash before the real data is pushed
8230Sstevel@tonic-gate 				 * we will not end up with random data is
8240Sstevel@tonic-gate 				 * the middle of the file.
8250Sstevel@tonic-gate 				 *
8260Sstevel@tonic-gate 				 * 3) If the size of the request rounded up
8270Sstevel@tonic-gate 				 * to the system page size is smaller than
8280Sstevel@tonic-gate 				 * the file system block size, we want to
8290Sstevel@tonic-gate 				 * write out all the pages now so that
8300Sstevel@tonic-gate 				 * they are not aborted before they actually
8310Sstevel@tonic-gate 				 * make it to ufs_putpage since the length
8320Sstevel@tonic-gate 				 * of the inode will not include the pages.
8330Sstevel@tonic-gate 				 */
8340Sstevel@tonic-gate 
8350Sstevel@tonic-gate 				if (isdirquota || (issync &&
8360Sstevel@tonic-gate 				    lbn < llbn))
8370Sstevel@tonic-gate 					(void) ufs_fbiwrite(fbp, ip, nb,
8384662Sfrankho 					    fs->fs_fsize);
8390Sstevel@tonic-gate 				else
8400Sstevel@tonic-gate 					fbrelse(fbp, S_WRITE);
8410Sstevel@tonic-gate 			}
8420Sstevel@tonic-gate 
8430Sstevel@tonic-gate 			/*
8440Sstevel@tonic-gate 			 * re-acquire "bp" buf
8450Sstevel@tonic-gate 			 */
8460Sstevel@tonic-gate 			bp = UFS_BREAD(ufsvfsp,
8474662Sfrankho 			    ip->i_dev, fsbtodb(fs, ob), bsize);
8480Sstevel@tonic-gate 			if (bp->b_flags & B_ERROR) {
8490Sstevel@tonic-gate 				err = geterror(bp);
8500Sstevel@tonic-gate 				brelse(bp);
8510Sstevel@tonic-gate 				/*
8520Sstevel@tonic-gate 				 * Return any partial allocations.
8530Sstevel@tonic-gate 				 */
8540Sstevel@tonic-gate 				ufs_undo_allocation(ip,
8550Sstevel@tonic-gate 				    alloced_blocks,
8560Sstevel@tonic-gate 				    undo_table, added_sectors);
8570Sstevel@tonic-gate 				return (err);
8580Sstevel@tonic-gate 			}
8590Sstevel@tonic-gate 			bap = bp->b_un.b_daddr;
8600Sstevel@tonic-gate 			bap[i] = nb;
861923Ssdebnath 
862923Ssdebnath 			/*
863923Ssdebnath 			 * The magic explained: j will be equal to NIADDR
864923Ssdebnath 			 * when we are at the lowest level, this is where the
865923Ssdebnath 			 * array entries point directly to data blocks. Since
866923Ssdebnath 			 * we will be 'fallocate'ing we will go ahead and negate
867923Ssdebnath 			 * the addresses.
868923Ssdebnath 			 */
869923Ssdebnath 			if (alloc_type == BI_FALLOCATE && j == NIADDR)
870923Ssdebnath 				bap[i] = -bap[i];
871923Ssdebnath 
8720Sstevel@tonic-gate 			TRANS_BUF_ITEM_128(ufsvfsp, bap[i], bap, bp, DT_AB);
8730Sstevel@tonic-gate 			added_sectors += btodb(bsize);
8740Sstevel@tonic-gate 			ip->i_blocks += btodb(bsize);
8750Sstevel@tonic-gate 			ASSERT((unsigned)ip->i_blocks <= INT_MAX);
8760Sstevel@tonic-gate 			TRANS_INODE(ufsvfsp, ip);
8770Sstevel@tonic-gate 			ip->i_flag |= IUPD | ICHG | IATTCHG;
878923Ssdebnath 
8790Sstevel@tonic-gate 			/* Caller is responsible for updating i_seq */
8800Sstevel@tonic-gate 
8810Sstevel@tonic-gate 			undo_table[alloced_blocks-1].owner =
8820Sstevel@tonic-gate 			    ufs_indirect_block;
8830Sstevel@tonic-gate 			undo_table[alloced_blocks-1].owner_block = ob;
8840Sstevel@tonic-gate 			undo_table[alloced_blocks-1].owner_offset = i;
8850Sstevel@tonic-gate 
8860Sstevel@tonic-gate 			if (issync) {
8870Sstevel@tonic-gate 				UFS_BWRITE2(ufsvfsp, bp);
8880Sstevel@tonic-gate 				if (bp->b_flags & B_ERROR) {
8890Sstevel@tonic-gate 					err = geterror(bp);
8900Sstevel@tonic-gate 					brelse(bp);
8910Sstevel@tonic-gate 					/*
8920Sstevel@tonic-gate 					 * Return any partial
8930Sstevel@tonic-gate 					 * allocations.
8940Sstevel@tonic-gate 					 */
8950Sstevel@tonic-gate 					ufs_undo_allocation(ip,
8960Sstevel@tonic-gate 					    alloced_blocks,
8970Sstevel@tonic-gate 					    undo_table, added_sectors);
8980Sstevel@tonic-gate 					return (err);
8990Sstevel@tonic-gate 				}
9000Sstevel@tonic-gate 				brelse(bp);
9010Sstevel@tonic-gate 			} else {
9020Sstevel@tonic-gate 				bdrwrite(bp);
9030Sstevel@tonic-gate 			}
9040Sstevel@tonic-gate 		} else {
9050Sstevel@tonic-gate 			brelse(bp);
9060Sstevel@tonic-gate 		}
9070Sstevel@tonic-gate 	}
9080Sstevel@tonic-gate 	return (0);
9090Sstevel@tonic-gate }
9100Sstevel@tonic-gate 
9110Sstevel@tonic-gate /*
912*12197SJim.Rice@Sun.COM  * Return 1 if inode has unmapped blocks (UFS holes) or if another thread
913*12197SJim.Rice@Sun.COM  * is in the critical region of wrip().
9140Sstevel@tonic-gate  */
9150Sstevel@tonic-gate int
bmap_has_holes(struct inode * ip)9160Sstevel@tonic-gate bmap_has_holes(struct inode *ip)
9170Sstevel@tonic-gate {
9180Sstevel@tonic-gate 	struct fs *fs = ip->i_fs;
9190Sstevel@tonic-gate 	uint_t	dblks; 			/* # of data blocks */
9200Sstevel@tonic-gate 	uint_t	mblks;			/* # of data + metadata blocks */
9210Sstevel@tonic-gate 	int	nindirshift;
9220Sstevel@tonic-gate 	int	nindiroffset;
9230Sstevel@tonic-gate 	uint_t	cnt;
9240Sstevel@tonic-gate 	int	n, j, shft;
9250Sstevel@tonic-gate 	uint_t nindirblks;
9260Sstevel@tonic-gate 
9270Sstevel@tonic-gate 	int	fsbshift = fs->fs_bshift;
9280Sstevel@tonic-gate 	int	fsboffset = (1 << fsbshift) - 1;
9290Sstevel@tonic-gate 
930*12197SJim.Rice@Sun.COM 	/*
931*12197SJim.Rice@Sun.COM 	 * Check for writer in critical region, if found then we
932*12197SJim.Rice@Sun.COM 	 * cannot trust the values of i_size and i_blocks
933*12197SJim.Rice@Sun.COM 	 * simply return true.
934*12197SJim.Rice@Sun.COM 	 */
935*12197SJim.Rice@Sun.COM 	if (ip->i_writer != NULL && ip->i_writer != curthread) {
936*12197SJim.Rice@Sun.COM 		return (1);
937*12197SJim.Rice@Sun.COM 	}
938*12197SJim.Rice@Sun.COM 
9390Sstevel@tonic-gate 	dblks = (ip->i_size + fsboffset) >> fsbshift;
9400Sstevel@tonic-gate 	mblks = (ldbtob((u_offset_t)ip->i_blocks) + fsboffset) >> fsbshift;
9410Sstevel@tonic-gate 
9420Sstevel@tonic-gate 	/*
9430Sstevel@tonic-gate 	 * File has only direct blocks.
9440Sstevel@tonic-gate 	 */
9450Sstevel@tonic-gate 	if (dblks <= NDADDR)
9460Sstevel@tonic-gate 		return (mblks < dblks);
947923Ssdebnath 	nindirshift = ip->i_ufsvfs->vfs_nindirshift;
9480Sstevel@tonic-gate 
9490Sstevel@tonic-gate 	nindiroffset = ip->i_ufsvfs->vfs_nindiroffset;
9500Sstevel@tonic-gate 	nindirblks = nindiroffset + 1;
9510Sstevel@tonic-gate 
9520Sstevel@tonic-gate 	dblks -= NDADDR;
9530Sstevel@tonic-gate 	shft = 0;
9540Sstevel@tonic-gate 	/*
9550Sstevel@tonic-gate 	 * Determine how many levels of indirection.
9560Sstevel@tonic-gate 	 */
9570Sstevel@tonic-gate 	for (j = NIADDR; j > 0; j--) {
9580Sstevel@tonic-gate 		longlong_t	sh;
9590Sstevel@tonic-gate 
9600Sstevel@tonic-gate 		shft += nindirshift;	/* sh *= nindir */
9610Sstevel@tonic-gate 		sh = 1LL << shft;
9620Sstevel@tonic-gate 		if (dblks <= sh)
9630Sstevel@tonic-gate 			break;
9640Sstevel@tonic-gate 		dblks -= sh;
9650Sstevel@tonic-gate 	}
9660Sstevel@tonic-gate 	/* LINTED: warning: logical expression always true: op "||" */
9670Sstevel@tonic-gate 	ASSERT(NIADDR <= 3);
9680Sstevel@tonic-gate 	ASSERT(j <= NIADDR);
9690Sstevel@tonic-gate 	if (j == NIADDR)	/* single level indirection */
9700Sstevel@tonic-gate 		cnt = NDADDR + 1 + dblks;
9710Sstevel@tonic-gate 	else if (j == NIADDR-1) /* double indirection */
9720Sstevel@tonic-gate 		cnt = NDADDR + 1 + nindirblks +
9734662Sfrankho 		    1 + (dblks + nindiroffset)/nindirblks + dblks;
9740Sstevel@tonic-gate 	else if (j == NIADDR-2) { /* triple indirection */
9750Sstevel@tonic-gate 		n = (dblks + nindiroffset)/nindirblks;
9760Sstevel@tonic-gate 		cnt = NDADDR + 1 + nindirblks +
9774662Sfrankho 		    1 + nindirblks + nindirblks*nindirblks +
9784662Sfrankho 		    1 + (n + nindiroffset)/nindirblks + n + dblks;
9790Sstevel@tonic-gate 	}
9800Sstevel@tonic-gate 
9810Sstevel@tonic-gate 	return (mblks < cnt);
9820Sstevel@tonic-gate }
9830Sstevel@tonic-gate 
9840Sstevel@tonic-gate /*
9850Sstevel@tonic-gate  * find some contig blocks starting at *sbp and going for min(n, max_contig)
9860Sstevel@tonic-gate  * return the number of blocks (not frags) found.
9870Sstevel@tonic-gate  * The array passed in must be at least [0..n-1].
9880Sstevel@tonic-gate  */
9890Sstevel@tonic-gate static int
findextent(struct fs * fs,daddr32_t * sbp,int n,int * lenp,int maxtransfer)9900Sstevel@tonic-gate findextent(struct fs *fs, daddr32_t *sbp, int n, int *lenp, int maxtransfer)
9910Sstevel@tonic-gate {
9920Sstevel@tonic-gate 	register daddr_t bn, nextbn;
9930Sstevel@tonic-gate 	register daddr32_t *bp;
9940Sstevel@tonic-gate 	register int diff;
9950Sstevel@tonic-gate 	int maxtransblk;
9960Sstevel@tonic-gate 
9970Sstevel@tonic-gate 	if (n <= 0)
9980Sstevel@tonic-gate 		return (0);
9990Sstevel@tonic-gate 	bn = *sbp;
10000Sstevel@tonic-gate 	if (bn == 0)
10010Sstevel@tonic-gate 		return (0);
1002923Ssdebnath 
10030Sstevel@tonic-gate 	diff = fs->fs_frag;
10040Sstevel@tonic-gate 	if (*lenp) {
10050Sstevel@tonic-gate 		n = MIN(n, lblkno(fs, *lenp));
10060Sstevel@tonic-gate 	} else {
10070Sstevel@tonic-gate 		/*
10080Sstevel@tonic-gate 		 * If the user has set the value for maxcontig lower than
10090Sstevel@tonic-gate 		 * the drive transfer size, then assume they want this
10100Sstevel@tonic-gate 		 * to be the maximum value for the size of the data transfer.
10110Sstevel@tonic-gate 		 */
10120Sstevel@tonic-gate 		maxtransblk = maxtransfer >> DEV_BSHIFT;
10130Sstevel@tonic-gate 		if (fs->fs_maxcontig < maxtransblk) {
10140Sstevel@tonic-gate 			n = MIN(n, fs->fs_maxcontig);
10150Sstevel@tonic-gate 		} else {
10160Sstevel@tonic-gate 			n = MIN(n, maxtransblk);
10170Sstevel@tonic-gate 		}
10180Sstevel@tonic-gate 	}
10190Sstevel@tonic-gate 	bp = sbp;
10200Sstevel@tonic-gate 	while (--n > 0) {
10210Sstevel@tonic-gate 		nextbn = *(bp + 1);
10220Sstevel@tonic-gate 		if (nextbn == 0 || bn + diff != nextbn)
10230Sstevel@tonic-gate 			break;
10240Sstevel@tonic-gate 		bn = nextbn;
10250Sstevel@tonic-gate 		bp++;
10260Sstevel@tonic-gate 	}
10270Sstevel@tonic-gate 	return ((int)(bp - sbp) + 1);
10280Sstevel@tonic-gate }
10290Sstevel@tonic-gate 
10300Sstevel@tonic-gate /*
10310Sstevel@tonic-gate  * Free any blocks which had been successfully allocated.  Always called
10320Sstevel@tonic-gate  * as a result of an error, so we don't bother returning an error code
10330Sstevel@tonic-gate  * from here.
10340Sstevel@tonic-gate  *
10350Sstevel@tonic-gate  * If block_count and inode_sector_adjust are both zero, we'll do nothing.
10360Sstevel@tonic-gate  * Thus it is safe to call this as part of error handling, whether or not
10370Sstevel@tonic-gate  * any blocks have been allocated.
10380Sstevel@tonic-gate  *
10390Sstevel@tonic-gate  * The ufs_inode_direct case is currently unused.
10400Sstevel@tonic-gate  */
10410Sstevel@tonic-gate 
10420Sstevel@tonic-gate static void
ufs_undo_allocation(inode_t * ip,int block_count,struct ufs_allocated_block table[],int inode_sector_adjust)10430Sstevel@tonic-gate ufs_undo_allocation(
10440Sstevel@tonic-gate 	inode_t *ip,
10450Sstevel@tonic-gate 	int block_count,
10460Sstevel@tonic-gate 	struct ufs_allocated_block table[],
10470Sstevel@tonic-gate 	int inode_sector_adjust)
10480Sstevel@tonic-gate {
10490Sstevel@tonic-gate 	int i;
10500Sstevel@tonic-gate 	int inode_changed;
10510Sstevel@tonic-gate 	int error_updating_pointers;
10520Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp;
10530Sstevel@tonic-gate 
10540Sstevel@tonic-gate 	inode_changed = 0;
10550Sstevel@tonic-gate 	error_updating_pointers = 0;
10560Sstevel@tonic-gate 
10570Sstevel@tonic-gate 	ufsvfsp = ip->i_ufsvfs;
10580Sstevel@tonic-gate 
10590Sstevel@tonic-gate 	/*
10600Sstevel@tonic-gate 	 * Update pointers on disk before freeing blocks.  If we fail,
10610Sstevel@tonic-gate 	 * some blocks may remain busy; but they will be reclaimed by
10620Sstevel@tonic-gate 	 * an fsck.  (This is better than letting a block wind up with
10630Sstevel@tonic-gate 	 * two owners if we successfully freed it but could not remove
10640Sstevel@tonic-gate 	 * the pointer to it.)
10650Sstevel@tonic-gate 	 */
10660Sstevel@tonic-gate 
10670Sstevel@tonic-gate 	for (i = 0; i < block_count; i++) {
10680Sstevel@tonic-gate 		switch (table[i].owner) {
10690Sstevel@tonic-gate 		case ufs_no_owner:
10700Sstevel@tonic-gate 			/* Nothing to do here, nobody points to us */
10710Sstevel@tonic-gate 			break;
10720Sstevel@tonic-gate 		case ufs_inode_direct:
10730Sstevel@tonic-gate 			ASSERT(table[i].owner_offset < NDADDR);
10740Sstevel@tonic-gate 			ip->i_db[table[i].owner_offset] = 0;
10750Sstevel@tonic-gate 			inode_changed = 1;
10760Sstevel@tonic-gate 			break;
10770Sstevel@tonic-gate 		case ufs_inode_indirect:
10780Sstevel@tonic-gate 			ASSERT(table[i].owner_offset < NIADDR);
10790Sstevel@tonic-gate 			ip->i_ib[table[i].owner_offset] = 0;
10800Sstevel@tonic-gate 			inode_changed = 1;
10810Sstevel@tonic-gate 			break;
10820Sstevel@tonic-gate 		case ufs_indirect_block: {
10830Sstevel@tonic-gate 			buf_t *bp;
10840Sstevel@tonic-gate 			daddr32_t *block_data;
10850Sstevel@tonic-gate 
10860Sstevel@tonic-gate 			/* Read/modify/log/write. */
10870Sstevel@tonic-gate 
10880Sstevel@tonic-gate 			ASSERT(table[i].owner_offset <
10890Sstevel@tonic-gate 			    (VBSIZE(ITOV(ip)) / sizeof (daddr32_t)));
10900Sstevel@tonic-gate 
10910Sstevel@tonic-gate 			bp = UFS_BREAD(ufsvfsp, ip->i_dev,
10920Sstevel@tonic-gate 			    fsbtodb(ufsvfsp->vfs_fs, table[i].owner_block),
10930Sstevel@tonic-gate 			    VBSIZE(ITOV(ip)));
10940Sstevel@tonic-gate 
10950Sstevel@tonic-gate 			if (bp->b_flags & B_ERROR) {
10960Sstevel@tonic-gate 				/* Couldn't read this block; give up. */
10970Sstevel@tonic-gate 				error_updating_pointers = 1;
10980Sstevel@tonic-gate 				brelse(bp);
10990Sstevel@tonic-gate 				break;		/* out of SWITCH */
11000Sstevel@tonic-gate 			}
11010Sstevel@tonic-gate 
11020Sstevel@tonic-gate 			block_data = bp->b_un.b_daddr;
11030Sstevel@tonic-gate 			block_data[table[i].owner_offset] = 0;
11040Sstevel@tonic-gate 
11050Sstevel@tonic-gate 			/* Write a log entry which includes the zero. */
11060Sstevel@tonic-gate 			/* It might be possible to optimize this by using */
11070Sstevel@tonic-gate 			/* TRANS_BUF directly and zeroing only the four */
11080Sstevel@tonic-gate 			/* bytes involved, but an attempt to do that led */
11090Sstevel@tonic-gate 			/* to panics in the logging code.  The attempt was */
11100Sstevel@tonic-gate 			/* TRANS_BUF(ufsvfsp,				  */
11110Sstevel@tonic-gate 			/*    table[i].owner_offset * sizeof (daddr32_t), */
11120Sstevel@tonic-gate 			/*    sizeof (daddr32_t),			  */
11130Sstevel@tonic-gate 			/*    bp,					  */
11140Sstevel@tonic-gate 			/*    DT_ABZERO);				  */
11150Sstevel@tonic-gate 
11160Sstevel@tonic-gate 			TRANS_BUF_ITEM_128(ufsvfsp,
11170Sstevel@tonic-gate 			    block_data[table[i].owner_offset],
11180Sstevel@tonic-gate 			    block_data, bp, DT_AB);
11190Sstevel@tonic-gate 
11200Sstevel@tonic-gate 			/* Now we can write the buffer itself. */
11210Sstevel@tonic-gate 
11220Sstevel@tonic-gate 			UFS_BWRITE2(ufsvfsp, bp);
11230Sstevel@tonic-gate 
11240Sstevel@tonic-gate 			if (bp->b_flags & B_ERROR) {
11250Sstevel@tonic-gate 				error_updating_pointers = 1;
11260Sstevel@tonic-gate 			}
11270Sstevel@tonic-gate 
11280Sstevel@tonic-gate 			brelse(bp);
11290Sstevel@tonic-gate 			break;
11300Sstevel@tonic-gate 		}
11310Sstevel@tonic-gate 		default:
11320Sstevel@tonic-gate 			(void) ufs_fault(ITOV(ip),
11330Sstevel@tonic-gate 			    "ufs_undo_allocation failure\n");
11340Sstevel@tonic-gate 			break;
11350Sstevel@tonic-gate 		}
11360Sstevel@tonic-gate 	}
11370Sstevel@tonic-gate 
11380Sstevel@tonic-gate 	/*
11390Sstevel@tonic-gate 	 * If the inode changed, or if we need to update its block count,
11400Sstevel@tonic-gate 	 * then do that now.  We update the inode synchronously on disk
11410Sstevel@tonic-gate 	 * to ensure that it won't transiently point at a block we've
11420Sstevel@tonic-gate 	 * freed (only necessary if we're not logging).
11430Sstevel@tonic-gate 	 *
11440Sstevel@tonic-gate 	 * NOTE: Currently ufs_iupdat() does not check for errors.  When
11450Sstevel@tonic-gate 	 * it is fixed, we should verify that we successfully updated the
11460Sstevel@tonic-gate 	 * inode before freeing blocks below.
11470Sstevel@tonic-gate 	 */
11480Sstevel@tonic-gate 
11490Sstevel@tonic-gate 	if (inode_changed || (inode_sector_adjust != 0)) {
11500Sstevel@tonic-gate 		ip->i_blocks -= inode_sector_adjust;
11510Sstevel@tonic-gate 		ASSERT((unsigned)ip->i_blocks <= INT_MAX);
11520Sstevel@tonic-gate 		TRANS_INODE(ufsvfsp, ip);
11530Sstevel@tonic-gate 		ip->i_flag |= IUPD | ICHG | IATTCHG;
11540Sstevel@tonic-gate 		ip->i_seq++;
11550Sstevel@tonic-gate 		if (!TRANS_ISTRANS(ufsvfsp))
11560Sstevel@tonic-gate 			ufs_iupdat(ip, I_SYNC);
11570Sstevel@tonic-gate 	}
11580Sstevel@tonic-gate 
11590Sstevel@tonic-gate 	/*
11600Sstevel@tonic-gate 	 * Now we go through and actually free the blocks, but only if we
11610Sstevel@tonic-gate 	 * successfully removed the pointers to them.
11620Sstevel@tonic-gate 	 */
11630Sstevel@tonic-gate 
11640Sstevel@tonic-gate 	if (!error_updating_pointers) {
11650Sstevel@tonic-gate 		for (i = 0; i < block_count; i++) {
11660Sstevel@tonic-gate 			free(ip, table[i].this_block, table[i].block_size,
11670Sstevel@tonic-gate 			    table[i].usage_flags);
11680Sstevel@tonic-gate 		}
11690Sstevel@tonic-gate 	}
11700Sstevel@tonic-gate }
11710Sstevel@tonic-gate 
11720Sstevel@tonic-gate /*
11730Sstevel@tonic-gate  * Find the next hole or data block in file starting at *off
1174272Sperrin  * Return found offset in *off, which can be less than the
1175272Sperrin  * starting offset if not block aligned.
11760Sstevel@tonic-gate  * This code is based on bmap_read().
11770Sstevel@tonic-gate  * Errors: ENXIO for end of file
11780Sstevel@tonic-gate  *         EIO for block read error.
11790Sstevel@tonic-gate  */
11800Sstevel@tonic-gate int
bmap_find(struct inode * ip,boolean_t hole,u_offset_t * off)11810Sstevel@tonic-gate bmap_find(struct inode *ip, boolean_t hole, u_offset_t *off)
11820Sstevel@tonic-gate {
11830Sstevel@tonic-gate 	ufsvfs_t *ufsvfsp = ip->i_ufsvfs;
11840Sstevel@tonic-gate 	struct fs *fs = ufsvfsp->vfs_fs;
11850Sstevel@tonic-gate 	buf_t *bp[NIADDR];
11860Sstevel@tonic-gate 	int i, j;
11870Sstevel@tonic-gate 	int shft;			/* we maintain sh = 1 << shft */
11880Sstevel@tonic-gate 	int nindirshift, nindiroffset;
11890Sstevel@tonic-gate 	daddr_t	ob, nb, tbn, lbn, skip;
11900Sstevel@tonic-gate 	daddr32_t *bap;
11910Sstevel@tonic-gate 	u_offset_t isz = (offset_t)ip->i_size;
11920Sstevel@tonic-gate 	int32_t bs = fs->fs_bsize; /* file system block size */
11930Sstevel@tonic-gate 	int32_t nindir = fs->fs_nindir;
11940Sstevel@tonic-gate 	dev_t dev;
11950Sstevel@tonic-gate 	int error = 0;
11960Sstevel@tonic-gate 	daddr_t limits[NIADDR];
11970Sstevel@tonic-gate 
11980Sstevel@tonic-gate 	ASSERT(*off < isz);
11990Sstevel@tonic-gate 	ASSERT(RW_LOCK_HELD(&ip->i_contents));
12000Sstevel@tonic-gate 	lbn = (daddr_t)lblkno(fs, *off);
12010Sstevel@tonic-gate 	ASSERT(lbn >= 0);
12020Sstevel@tonic-gate 
12030Sstevel@tonic-gate 	for (i = 0; i < NIADDR; i++)
12040Sstevel@tonic-gate 		bp[i] = NULL;
12050Sstevel@tonic-gate 
12060Sstevel@tonic-gate 	/*
12070Sstevel@tonic-gate 	 * The first NDADDR blocks are direct blocks.
12080Sstevel@tonic-gate 	 */
12090Sstevel@tonic-gate 	if (lbn < NDADDR) {
12100Sstevel@tonic-gate 		for (; lbn < NDADDR; lbn++) {
12110Sstevel@tonic-gate 			if ((hole && (ip->i_db[lbn] == 0)) ||
12120Sstevel@tonic-gate 			    (!hole && (ip->i_db[lbn] != 0))) {
12130Sstevel@tonic-gate 				goto out;
12140Sstevel@tonic-gate 			}
12150Sstevel@tonic-gate 		}
12160Sstevel@tonic-gate 		if ((u_offset_t)lbn << fs->fs_bshift >= isz)
12170Sstevel@tonic-gate 			goto out;
12180Sstevel@tonic-gate 	}
12190Sstevel@tonic-gate 
12200Sstevel@tonic-gate 	nindir = fs->fs_nindir;
12210Sstevel@tonic-gate 	nindirshift = ufsvfsp->vfs_nindirshift;
12220Sstevel@tonic-gate 	nindiroffset = ufsvfsp->vfs_nindiroffset;
12230Sstevel@tonic-gate 	dev = ip->i_dev;
12240Sstevel@tonic-gate 
12250Sstevel@tonic-gate 	/* Set up limits array */
12260Sstevel@tonic-gate 	for (limits[0] = NDADDR, j = 1; j  < NIADDR; j++)
12270Sstevel@tonic-gate 		limits[j] = limits[j-1] + (1ULL << (nindirshift * j));
12280Sstevel@tonic-gate 
12290Sstevel@tonic-gate loop:
12300Sstevel@tonic-gate 	/*
12310Sstevel@tonic-gate 	 * Determine how many levels of indirection.
12320Sstevel@tonic-gate 	 */
12330Sstevel@tonic-gate 	shft = 0;				/* sh = 1 */
12340Sstevel@tonic-gate 	tbn = lbn - NDADDR;
12350Sstevel@tonic-gate 	for (j = NIADDR; j > 0; j--) {
12360Sstevel@tonic-gate 		longlong_t sh;
12370Sstevel@tonic-gate 
12380Sstevel@tonic-gate 		shft += nindirshift;		/* sh *= nindir */
12390Sstevel@tonic-gate 		sh = 1LL << shft;
12400Sstevel@tonic-gate 		if (tbn < sh)
12410Sstevel@tonic-gate 			break;
12420Sstevel@tonic-gate 		tbn -= sh;
12430Sstevel@tonic-gate 	}
12440Sstevel@tonic-gate 	if (j == 0) {
12450Sstevel@tonic-gate 		/* must have passed end of file */
12460Sstevel@tonic-gate 		ASSERT(((u_offset_t)lbn << fs->fs_bshift) >= isz);
12470Sstevel@tonic-gate 		goto out;
12480Sstevel@tonic-gate 	}
12490Sstevel@tonic-gate 
12500Sstevel@tonic-gate 	/*
12510Sstevel@tonic-gate 	 * Fetch the first indirect block.
12520Sstevel@tonic-gate 	 */
12530Sstevel@tonic-gate 	nb = ip->i_ib[NIADDR - j];
12540Sstevel@tonic-gate 	if (nb == 0) {
12550Sstevel@tonic-gate 		if (hole) {
12560Sstevel@tonic-gate 			lbn = limits[NIADDR - j];
12570Sstevel@tonic-gate 			goto out;
12580Sstevel@tonic-gate 		} else {
12590Sstevel@tonic-gate 			lbn = limits[NIADDR - j + 1];
12600Sstevel@tonic-gate 			if ((u_offset_t)lbn << fs->fs_bshift >= isz)
12610Sstevel@tonic-gate 				goto out;
12620Sstevel@tonic-gate 			goto loop;
12630Sstevel@tonic-gate 		}
12640Sstevel@tonic-gate 	}
12650Sstevel@tonic-gate 
12660Sstevel@tonic-gate 	/*
12670Sstevel@tonic-gate 	 * Fetch through the indirect blocks.
12680Sstevel@tonic-gate 	 */
12690Sstevel@tonic-gate 	for (; ((j <= NIADDR) && (nb != 0)); j++) {
12700Sstevel@tonic-gate 		ob = nb;
12710Sstevel@tonic-gate 		/*
12720Sstevel@tonic-gate 		 * if there's a different block at this level then release
12730Sstevel@tonic-gate 		 * the old one and in with the new.
12740Sstevel@tonic-gate 		 */
12750Sstevel@tonic-gate 		if ((bp[j-1] == NULL) || bp[j-1]->b_blkno != fsbtodb(fs, ob)) {
12760Sstevel@tonic-gate 			if (bp[j-1] != NULL)
12770Sstevel@tonic-gate 				brelse(bp[j-1]);
12780Sstevel@tonic-gate 			bp[j-1] = UFS_BREAD(ufsvfsp, dev, fsbtodb(fs, ob), bs);
12790Sstevel@tonic-gate 			if (bp[j-1]->b_flags & B_ERROR) {
12800Sstevel@tonic-gate 				error = EIO;
12810Sstevel@tonic-gate 				goto out;
12820Sstevel@tonic-gate 			}
12830Sstevel@tonic-gate 		}
12840Sstevel@tonic-gate 		bap = bp[j-1]->b_un.b_daddr;
12850Sstevel@tonic-gate 
12860Sstevel@tonic-gate 		shft -= nindirshift;		/* sh / nindir */
12870Sstevel@tonic-gate 		i = (tbn >> shft) & nindiroffset; /* (tbn / sh) % nindir */
12880Sstevel@tonic-gate 		nb = bap[i];
12890Sstevel@tonic-gate 		skip = 1LL << (nindirshift * (NIADDR - j));
12900Sstevel@tonic-gate 	}
12910Sstevel@tonic-gate 
12920Sstevel@tonic-gate 	/*
12930Sstevel@tonic-gate 	 * Scan through the blocks in this array.
12940Sstevel@tonic-gate 	 */
12950Sstevel@tonic-gate 	for (; i < nindir; i++, lbn += skip) {
12960Sstevel@tonic-gate 		if (hole && (bap[i] == 0))
12970Sstevel@tonic-gate 			goto out;
12980Sstevel@tonic-gate 		if (!hole && (bap[i] != 0)) {
12990Sstevel@tonic-gate 			if (skip == 1) {
13000Sstevel@tonic-gate 				/* we're at the lowest level */
13010Sstevel@tonic-gate 				goto out;
13020Sstevel@tonic-gate 			} else {
13030Sstevel@tonic-gate 				goto loop;
13040Sstevel@tonic-gate 			}
13050Sstevel@tonic-gate 		}
13060Sstevel@tonic-gate 	}
13070Sstevel@tonic-gate 	if (((u_offset_t)lbn << fs->fs_bshift) < isz)
13080Sstevel@tonic-gate 		goto loop;
13090Sstevel@tonic-gate out:
13100Sstevel@tonic-gate 	for (i = 0; i < NIADDR; i++) {
13110Sstevel@tonic-gate 		if (bp[i])
13120Sstevel@tonic-gate 			brelse(bp[i]);
13130Sstevel@tonic-gate 	}
13140Sstevel@tonic-gate 	if (error == 0) {
13150Sstevel@tonic-gate 		if (((u_offset_t)lbn << fs->fs_bshift) >= isz) {
13160Sstevel@tonic-gate 			error = ENXIO;
13170Sstevel@tonic-gate 		} else {
13180Sstevel@tonic-gate 			/* success */
13190Sstevel@tonic-gate 			*off = (u_offset_t)lbn << fs->fs_bshift;
13200Sstevel@tonic-gate 		}
13210Sstevel@tonic-gate 	}
13220Sstevel@tonic-gate 	return (error);
13230Sstevel@tonic-gate }
1324923Ssdebnath 
1325923Ssdebnath /*
1326923Ssdebnath  * Set a particular offset in the inode list to be a certain block.
1327923Ssdebnath  * User is responsible for calling TRANS* functions
1328923Ssdebnath  */
1329923Ssdebnath int
bmap_set_bn(struct vnode * vp,u_offset_t off,daddr32_t bn)1330923Ssdebnath bmap_set_bn(struct vnode *vp, u_offset_t off, daddr32_t bn)
1331923Ssdebnath {
1332923Ssdebnath 	daddr_t lbn;
1333923Ssdebnath 	struct inode *ip;
1334923Ssdebnath 	ufsvfs_t *ufsvfsp;
1335923Ssdebnath 	struct	fs *fs;
1336923Ssdebnath 	struct	buf *bp;
1337923Ssdebnath 	int	i, j;
1338923Ssdebnath 	int	shft;			/* we maintain sh = 1 << shft */
1339923Ssdebnath 	int err;
1340923Ssdebnath 	daddr_t	ob, nb, tbn;
1341923Ssdebnath 	daddr32_t *bap;
1342923Ssdebnath 	int	nindirshift, nindiroffset;
1343923Ssdebnath 
1344923Ssdebnath 	ip = VTOI(vp);
1345923Ssdebnath 	ufsvfsp = ip->i_ufsvfs;
1346923Ssdebnath 	fs = ufsvfsp->vfs_fs;
1347923Ssdebnath 	lbn = (daddr_t)lblkno(fs, off);
1348923Ssdebnath 
1349923Ssdebnath 	ASSERT(RW_LOCK_HELD(&ip->i_contents));
1350923Ssdebnath 
1351923Ssdebnath 	if (lbn < 0)
1352923Ssdebnath 		return (EFBIG);
1353923Ssdebnath 
1354923Ssdebnath 	/*
1355923Ssdebnath 	 * Take care of direct block assignment
1356923Ssdebnath 	 */
1357923Ssdebnath 	if (lbn < NDADDR) {
1358923Ssdebnath 		ip->i_db[lbn] = bn;
1359923Ssdebnath 		return (0);
1360923Ssdebnath 	}
1361923Ssdebnath 
1362923Ssdebnath 	nindirshift = ip->i_ufsvfs->vfs_nindirshift;
1363923Ssdebnath 	nindiroffset = ip->i_ufsvfs->vfs_nindiroffset;
1364923Ssdebnath 	/*
1365923Ssdebnath 	 * Determine how many levels of indirection.
1366923Ssdebnath 	 */
1367923Ssdebnath 	shft = 0;				/* sh = 1 */
1368923Ssdebnath 	tbn = lbn - NDADDR;
1369923Ssdebnath 	for (j = NIADDR; j > 0; j--) {
1370923Ssdebnath 		longlong_t	sh;
1371923Ssdebnath 
1372923Ssdebnath 		shft += nindirshift;		/* sh *= nindir */
1373923Ssdebnath 		sh = 1LL << shft;
1374923Ssdebnath 		if (tbn < sh)
1375923Ssdebnath 			break;
1376923Ssdebnath 		tbn -= sh;
1377923Ssdebnath 	}
1378923Ssdebnath 	if (j == 0)
1379923Ssdebnath 		return (EFBIG);
1380923Ssdebnath 
1381923Ssdebnath 	/*
1382923Ssdebnath 	 * Fetch the first indirect block.
1383923Ssdebnath 	 */
1384923Ssdebnath 	nb = ip->i_ib[NIADDR - j];
13854454Smishra 	if (nb == 0) {
1386923Ssdebnath 		err = ufs_fault(ITOV(ip), "ufs_set_bn: nb == UFS_HOLE");
13874454Smishra 		return (err);
13884454Smishra 	}
1389923Ssdebnath 
1390923Ssdebnath 	/*
1391923Ssdebnath 	 * Fetch through the indirect blocks.
1392923Ssdebnath 	 */
1393923Ssdebnath 	for (; j <= NIADDR; j++) {
1394923Ssdebnath 		ob = nb;
1395923Ssdebnath 		bp = UFS_BREAD(ufsvfsp,
13964662Sfrankho 		    ip->i_dev, fsbtodb(fs, ob), fs->fs_bsize);
1397923Ssdebnath 		if (bp->b_flags & B_ERROR) {
1398923Ssdebnath 			err = geterror(bp);
1399923Ssdebnath 			brelse(bp);
1400923Ssdebnath 			return (err);
1401923Ssdebnath 		}
1402923Ssdebnath 		bap = bp->b_un.b_daddr;
1403923Ssdebnath 
1404923Ssdebnath 		ASSERT(!ufs_indir_badblock(ip, bap));
1405923Ssdebnath 
1406923Ssdebnath 		shft -= nindirshift;		/* sh / nindir */
1407923Ssdebnath 		i = (tbn >> shft) & nindiroffset; /* (tbn / sh) % nindir */
1408923Ssdebnath 
14094454Smishra 		nb = bap[i];
14104454Smishra 		if (nb == 0) {
14114454Smishra 			err = ufs_fault(ITOV(ip), "ufs_set_bn: nb == UFS_HOLE");
14124454Smishra 			return (err);
14134454Smishra 		}
14144454Smishra 
1415923Ssdebnath 		if (j == NIADDR) {
1416923Ssdebnath 			bap[i] = bn;
1417923Ssdebnath 			bdrwrite(bp);
1418923Ssdebnath 			return (0);
1419923Ssdebnath 		}
14204454Smishra 
1421923Ssdebnath 		brelse(bp);
1422923Ssdebnath 	}
1423923Ssdebnath 	return (0);
1424923Ssdebnath }
1425