xref: /onnv-gate/usr/src/uts/common/fs/ufs/ufs_alloc.c (revision 11066:cebb50cbe4f9)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
54454Smishra  * Common Development and Distribution License (the "License").
64454Smishra  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
229915SOwen.Roberts@Sun.Com  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
270Sstevel@tonic-gate /*	  All Rights Reserved  	*/
280Sstevel@tonic-gate 
290Sstevel@tonic-gate /*
300Sstevel@tonic-gate  * University Copyright- Copyright (c) 1982, 1986, 1988
310Sstevel@tonic-gate  * The Regents of the University of California
320Sstevel@tonic-gate  * All Rights Reserved
330Sstevel@tonic-gate  *
340Sstevel@tonic-gate  * University Acknowledgment- Portions of this document are derived from
350Sstevel@tonic-gate  * software developed by the University of California, Berkeley, and its
360Sstevel@tonic-gate  * contributors.
370Sstevel@tonic-gate  */
380Sstevel@tonic-gate 
39923Ssdebnath #include <sys/condvar_impl.h>
400Sstevel@tonic-gate #include <sys/types.h>
410Sstevel@tonic-gate #include <sys/t_lock.h>
420Sstevel@tonic-gate #include <sys/debug.h>
430Sstevel@tonic-gate #include <sys/param.h>
440Sstevel@tonic-gate #include <sys/systm.h>
450Sstevel@tonic-gate #include <sys/signal.h>
460Sstevel@tonic-gate #include <sys/cred.h>
470Sstevel@tonic-gate #include <sys/proc.h>
480Sstevel@tonic-gate #include <sys/disp.h>
490Sstevel@tonic-gate #include <sys/user.h>
500Sstevel@tonic-gate #include <sys/buf.h>
510Sstevel@tonic-gate #include <sys/vfs.h>
520Sstevel@tonic-gate #include <sys/vnode.h>
530Sstevel@tonic-gate #include <sys/acl.h>
540Sstevel@tonic-gate #include <sys/fs/ufs_fs.h>
550Sstevel@tonic-gate #include <sys/fs/ufs_inode.h>
560Sstevel@tonic-gate #include <sys/fs/ufs_acl.h>
570Sstevel@tonic-gate #include <sys/fs/ufs_bio.h>
580Sstevel@tonic-gate #include <sys/fs/ufs_quota.h>
590Sstevel@tonic-gate #include <sys/kmem.h>
600Sstevel@tonic-gate #include <sys/fs/ufs_trans.h>
610Sstevel@tonic-gate #include <sys/fs/ufs_panic.h>
620Sstevel@tonic-gate #include <sys/errno.h>
630Sstevel@tonic-gate #include <sys/time.h>
640Sstevel@tonic-gate #include <sys/sysmacros.h>
650Sstevel@tonic-gate #include <sys/file.h>
660Sstevel@tonic-gate #include <sys/fcntl.h>
670Sstevel@tonic-gate #include <sys/flock.h>
680Sstevel@tonic-gate #include <fs/fs_subr.h>
690Sstevel@tonic-gate #include <sys/cmn_err.h>
700Sstevel@tonic-gate #include <sys/policy.h>
719915SOwen.Roberts@Sun.Com #include <sys/fs/ufs_log.h>
720Sstevel@tonic-gate 
730Sstevel@tonic-gate static ino_t	hashalloc();
740Sstevel@tonic-gate static daddr_t	fragextend();
750Sstevel@tonic-gate static daddr_t	alloccg();
760Sstevel@tonic-gate static daddr_t	alloccgblk();
770Sstevel@tonic-gate static ino_t	ialloccg();
780Sstevel@tonic-gate static daddr_t	mapsearch();
799915SOwen.Roberts@Sun.Com static int	findlogstartcg();
800Sstevel@tonic-gate 
810Sstevel@tonic-gate extern int	inside[], around[];
820Sstevel@tonic-gate extern uchar_t	*fragtbl[];
830Sstevel@tonic-gate void delay();
840Sstevel@tonic-gate 
850Sstevel@tonic-gate /*
860Sstevel@tonic-gate  * Allocate a block in the file system.
870Sstevel@tonic-gate  *
880Sstevel@tonic-gate  * The size of the requested block is given, which must be some
890Sstevel@tonic-gate  * multiple of fs_fsize and <= fs_bsize.
900Sstevel@tonic-gate  * A preference may be optionally specified. If a preference is given
910Sstevel@tonic-gate  * the following hierarchy is used to allocate a block:
920Sstevel@tonic-gate  *   1) allocate the requested block.
930Sstevel@tonic-gate  *   2) allocate a rotationally optimal block in the same cylinder.
940Sstevel@tonic-gate  *   3) allocate a block in the same cylinder group.
950Sstevel@tonic-gate  *   4) quadratically rehash into other cylinder groups, until an
960Sstevel@tonic-gate  *	available block is located.
970Sstevel@tonic-gate  * If no block preference is given the following hierarchy is used
980Sstevel@tonic-gate  * to allocate a block:
990Sstevel@tonic-gate  *   1) allocate a block in the cylinder group that contains the
1000Sstevel@tonic-gate  *	inode for the file.
1010Sstevel@tonic-gate  *   2) quadratically rehash into other cylinder groups, until an
1020Sstevel@tonic-gate  *	available block is located.
1030Sstevel@tonic-gate  */
1040Sstevel@tonic-gate int
alloc(struct inode * ip,daddr_t bpref,int size,daddr_t * bnp,cred_t * cr)1050Sstevel@tonic-gate alloc(struct inode *ip, daddr_t bpref, int size, daddr_t *bnp, cred_t *cr)
1060Sstevel@tonic-gate {
1070Sstevel@tonic-gate 	struct fs *fs;
1080Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp;
1090Sstevel@tonic-gate 	daddr_t bno;
1100Sstevel@tonic-gate 	int cg;
1110Sstevel@tonic-gate 	int err;
1120Sstevel@tonic-gate 	char *errmsg = NULL;
1130Sstevel@tonic-gate 	size_t len;
114*11066Srafael.vanoni@sun.com 	clock_t	now;
1150Sstevel@tonic-gate 
1160Sstevel@tonic-gate 	ufsvfsp = ip->i_ufsvfs;
1170Sstevel@tonic-gate 	fs = ufsvfsp->vfs_fs;
1180Sstevel@tonic-gate 	if ((unsigned)size > fs->fs_bsize || fragoff(fs, size) != 0) {
119923Ssdebnath 		err = ufs_fault(ITOV(ip), "alloc: bad size, dev = 0x%lx,"
120923Ssdebnath 		    " bsize = %d, size = %d, fs = %s\n",
121923Ssdebnath 		    ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
1220Sstevel@tonic-gate 		return (err);
1230Sstevel@tonic-gate 	}
1240Sstevel@tonic-gate 	if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
1250Sstevel@tonic-gate 		goto nospace;
1260Sstevel@tonic-gate 	if (freespace(fs, ufsvfsp) <= 0 &&
1270Sstevel@tonic-gate 	    secpolicy_fs_minfree(cr, ufsvfsp->vfs_vfs) != 0)
1280Sstevel@tonic-gate 		goto nospace;
1290Sstevel@tonic-gate 	err = chkdq(ip, (long)btodb(size), 0, cr, &errmsg, &len);
1300Sstevel@tonic-gate 	/* Note that may not have err, but may have errmsg */
1310Sstevel@tonic-gate 	if (errmsg != NULL) {
1320Sstevel@tonic-gate 		uprintf(errmsg);
1330Sstevel@tonic-gate 		kmem_free(errmsg, len);
1340Sstevel@tonic-gate 		errmsg = NULL;
1350Sstevel@tonic-gate 	}
1360Sstevel@tonic-gate 	if (err)
1370Sstevel@tonic-gate 		return (err);
1380Sstevel@tonic-gate 	if (bpref >= fs->fs_size)
1390Sstevel@tonic-gate 		bpref = 0;
1400Sstevel@tonic-gate 	if (bpref == 0)
1410Sstevel@tonic-gate 		cg = (int)itog(fs, ip->i_number);
1420Sstevel@tonic-gate 	else
1430Sstevel@tonic-gate 		cg = dtog(fs, bpref);
1440Sstevel@tonic-gate 
1450Sstevel@tonic-gate 	bno = (daddr_t)hashalloc(ip, cg, (long)bpref, size,
1460Sstevel@tonic-gate 	    (ulong_t (*)())alloccg);
1470Sstevel@tonic-gate 	if (bno > 0) {
1480Sstevel@tonic-gate 		*bnp = bno;
1490Sstevel@tonic-gate 		return (0);
1500Sstevel@tonic-gate 	}
1510Sstevel@tonic-gate 
1520Sstevel@tonic-gate 	/*
1530Sstevel@tonic-gate 	 * hashalloc() failed because some other thread grabbed
1540Sstevel@tonic-gate 	 * the last block so unwind the quota operation.  We can
1550Sstevel@tonic-gate 	 * ignore the return because subtractions don't fail and
1560Sstevel@tonic-gate 	 * size is guaranteed to be >= zero by our caller.
1570Sstevel@tonic-gate 	 */
1580Sstevel@tonic-gate 	(void) chkdq(ip, -(long)btodb(size), 0, cr, (char **)NULL,
1594662Sfrankho 	    (size_t *)NULL);
1600Sstevel@tonic-gate 
1610Sstevel@tonic-gate nospace:
162*11066Srafael.vanoni@sun.com 	now = ddi_get_lbolt();
1630Sstevel@tonic-gate 	mutex_enter(&ufsvfsp->vfs_lock);
164*11066Srafael.vanoni@sun.com 	if ((now - ufsvfsp->vfs_lastwhinetime) > (hz << 2) &&
1654662Sfrankho 	    (!(TRANS_ISTRANS(ufsvfsp)) || !(ip->i_flag & IQUIET))) {
166*11066Srafael.vanoni@sun.com 		ufsvfsp->vfs_lastwhinetime = now;
1670Sstevel@tonic-gate 		cmn_err(CE_NOTE, "alloc: %s: file system full", fs->fs_fsmnt);
1680Sstevel@tonic-gate 	}
1690Sstevel@tonic-gate 	mutex_exit(&ufsvfsp->vfs_lock);
1700Sstevel@tonic-gate 	return (ENOSPC);
1710Sstevel@tonic-gate }
1720Sstevel@tonic-gate 
1730Sstevel@tonic-gate /*
1740Sstevel@tonic-gate  * Reallocate a fragment to a bigger size
1750Sstevel@tonic-gate  *
1760Sstevel@tonic-gate  * The number and size of the old block is given, and a preference
1770Sstevel@tonic-gate  * and new size is also specified.  The allocator attempts to extend
1780Sstevel@tonic-gate  * the original block.  Failing that, the regular block allocator is
1790Sstevel@tonic-gate  * invoked to get an appropriate block.
1800Sstevel@tonic-gate  */
1810Sstevel@tonic-gate int
realloccg(struct inode * ip,daddr_t bprev,daddr_t bpref,int osize,int nsize,daddr_t * bnp,cred_t * cr)1820Sstevel@tonic-gate realloccg(struct inode *ip, daddr_t bprev, daddr_t bpref, int osize,
1830Sstevel@tonic-gate     int nsize, daddr_t *bnp, cred_t *cr)
1840Sstevel@tonic-gate {
1850Sstevel@tonic-gate 	daddr_t bno;
1860Sstevel@tonic-gate 	struct fs *fs;
1870Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp;
1880Sstevel@tonic-gate 	int cg, request;
1890Sstevel@tonic-gate 	int err;
1900Sstevel@tonic-gate 	char *errmsg = NULL;
1910Sstevel@tonic-gate 	size_t len;
192*11066Srafael.vanoni@sun.com 	clock_t	now;
1930Sstevel@tonic-gate 
1940Sstevel@tonic-gate 	ufsvfsp = ip->i_ufsvfs;
1950Sstevel@tonic-gate 	fs = ufsvfsp->vfs_fs;
1960Sstevel@tonic-gate 	if ((unsigned)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
1970Sstevel@tonic-gate 	    (unsigned)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) {
1980Sstevel@tonic-gate 		err = ufs_fault(ITOV(ip),
199923Ssdebnath 		    "realloccg: bad size, dev=0x%lx, bsize=%d, "
200923Ssdebnath 		    "osize=%d, nsize=%d, fs=%s\n",
201923Ssdebnath 		    ip->i_dev, fs->fs_bsize, osize, nsize, fs->fs_fsmnt);
2020Sstevel@tonic-gate 		return (err);
2030Sstevel@tonic-gate 	}
2040Sstevel@tonic-gate 	if (freespace(fs, ufsvfsp) <= 0 &&
2050Sstevel@tonic-gate 	    secpolicy_fs_minfree(cr, ufsvfsp->vfs_vfs) != 0)
2060Sstevel@tonic-gate 		goto nospace;
2070Sstevel@tonic-gate 	if (bprev == 0) {
2080Sstevel@tonic-gate 		err = ufs_fault(ITOV(ip),
209923Ssdebnath 		    "realloccg: bad bprev, dev = 0x%lx, bsize = %d,"
210923Ssdebnath 		    " bprev = %ld, fs = %s\n", ip->i_dev, fs->fs_bsize, bprev,
2110Sstevel@tonic-gate 		    fs->fs_fsmnt);
2120Sstevel@tonic-gate 		return (err);
2130Sstevel@tonic-gate 	}
2140Sstevel@tonic-gate 	err = chkdq(ip, (long)btodb(nsize - osize), 0, cr, &errmsg, &len);
2150Sstevel@tonic-gate 	/* Note that may not have err, but may have errmsg */
2160Sstevel@tonic-gate 	if (errmsg != NULL) {
2170Sstevel@tonic-gate 		uprintf(errmsg);
2180Sstevel@tonic-gate 		kmem_free(errmsg, len);
2190Sstevel@tonic-gate 		errmsg = NULL;
2200Sstevel@tonic-gate 	}
2210Sstevel@tonic-gate 	if (err)
2220Sstevel@tonic-gate 		return (err);
2230Sstevel@tonic-gate 	cg = dtog(fs, bprev);
2240Sstevel@tonic-gate 	bno = fragextend(ip, cg, (long)bprev, osize, nsize);
2250Sstevel@tonic-gate 	if (bno != 0) {
2260Sstevel@tonic-gate 		*bnp = bno;
2270Sstevel@tonic-gate 		return (0);
2280Sstevel@tonic-gate 	}
2290Sstevel@tonic-gate 	if (bpref >= fs->fs_size)
2300Sstevel@tonic-gate 		bpref = 0;
2310Sstevel@tonic-gate 
2320Sstevel@tonic-gate 	/*
2330Sstevel@tonic-gate 	 * When optimizing for time we allocate a full block and
2340Sstevel@tonic-gate 	 * then only use the upper portion for this request. When
2350Sstevel@tonic-gate 	 * this file grows again it will grow into the unused portion
2360Sstevel@tonic-gate 	 * of the block (See fragextend() above).  This saves time
2370Sstevel@tonic-gate 	 * because an extra disk write would be needed if the frags
2380Sstevel@tonic-gate 	 * following the current allocation were not free. The extra
2390Sstevel@tonic-gate 	 * disk write is needed to move the data from its current
2400Sstevel@tonic-gate 	 * location into the newly allocated position.
2410Sstevel@tonic-gate 	 *
2420Sstevel@tonic-gate 	 * When optimizing for space we allocate a run of frags
2430Sstevel@tonic-gate 	 * that is just the right size for this request.
2440Sstevel@tonic-gate 	 */
2450Sstevel@tonic-gate 	request = (fs->fs_optim == FS_OPTTIME) ? fs->fs_bsize : nsize;
2460Sstevel@tonic-gate 	bno = (daddr_t)hashalloc(ip, cg, (long)bpref, request,
2474662Sfrankho 	    (ulong_t (*)())alloccg);
2480Sstevel@tonic-gate 	if (bno > 0) {
2490Sstevel@tonic-gate 		*bnp = bno;
2500Sstevel@tonic-gate 		if (nsize < request)
2510Sstevel@tonic-gate 			(void) free(ip, bno + numfrags(fs, nsize),
2520Sstevel@tonic-gate 			    (off_t)(request - nsize), I_NOCANCEL);
2530Sstevel@tonic-gate 		return (0);
2540Sstevel@tonic-gate 	}
2550Sstevel@tonic-gate 
2560Sstevel@tonic-gate 	/*
2570Sstevel@tonic-gate 	 * hashalloc() failed because some other thread grabbed
2580Sstevel@tonic-gate 	 * the last block so unwind the quota operation.  We can
2590Sstevel@tonic-gate 	 * ignore the return because subtractions don't fail, and
2600Sstevel@tonic-gate 	 * our caller guarantees nsize >= osize.
2610Sstevel@tonic-gate 	 */
2620Sstevel@tonic-gate 	(void) chkdq(ip, -(long)btodb(nsize - osize), 0, cr, (char **)NULL,
2634662Sfrankho 	    (size_t *)NULL);
2640Sstevel@tonic-gate 
2650Sstevel@tonic-gate nospace:
266*11066Srafael.vanoni@sun.com 	now = ddi_get_lbolt();
2670Sstevel@tonic-gate 	mutex_enter(&ufsvfsp->vfs_lock);
268*11066Srafael.vanoni@sun.com 	if ((now - ufsvfsp->vfs_lastwhinetime) > (hz << 2) &&
2694662Sfrankho 	    (!(TRANS_ISTRANS(ufsvfsp)) || !(ip->i_flag & IQUIET))) {
270*11066Srafael.vanoni@sun.com 		ufsvfsp->vfs_lastwhinetime = now;
2710Sstevel@tonic-gate 		cmn_err(CE_NOTE,
2724662Sfrankho 		    "realloccg %s: file system full", fs->fs_fsmnt);
2730Sstevel@tonic-gate 	}
2740Sstevel@tonic-gate 	mutex_exit(&ufsvfsp->vfs_lock);
2750Sstevel@tonic-gate 	return (ENOSPC);
2760Sstevel@tonic-gate }
2770Sstevel@tonic-gate 
2780Sstevel@tonic-gate /*
2790Sstevel@tonic-gate  * Allocate an inode in the file system.
2800Sstevel@tonic-gate  *
2810Sstevel@tonic-gate  * A preference may be optionally specified. If a preference is given
2820Sstevel@tonic-gate  * the following hierarchy is used to allocate an inode:
2830Sstevel@tonic-gate  *   1) allocate the requested inode.
2840Sstevel@tonic-gate  *   2) allocate an inode in the same cylinder group.
2850Sstevel@tonic-gate  *   3) quadratically rehash into other cylinder groups, until an
2860Sstevel@tonic-gate  *	available inode is located.
2870Sstevel@tonic-gate  * If no inode preference is given the following hierarchy is used
2880Sstevel@tonic-gate  * to allocate an inode:
2890Sstevel@tonic-gate  *   1) allocate an inode in cylinder group 0.
2900Sstevel@tonic-gate  *   2) quadratically rehash into other cylinder groups, until an
2910Sstevel@tonic-gate  *	available inode is located.
2920Sstevel@tonic-gate  */
2930Sstevel@tonic-gate int
ufs_ialloc(struct inode * pip,ino_t ipref,mode_t mode,struct inode ** ipp,cred_t * cr)2940Sstevel@tonic-gate ufs_ialloc(struct inode *pip,
2950Sstevel@tonic-gate     ino_t ipref, mode_t mode, struct inode **ipp, cred_t *cr)
2960Sstevel@tonic-gate {
2970Sstevel@tonic-gate 	struct inode *ip;
2980Sstevel@tonic-gate 	struct fs *fs;
2990Sstevel@tonic-gate 	int cg;
3000Sstevel@tonic-gate 	ino_t ino;
3010Sstevel@tonic-gate 	int err;
3020Sstevel@tonic-gate 	int nifree;
3030Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp = pip->i_ufsvfs;
3040Sstevel@tonic-gate 	char *errmsg = NULL;
3050Sstevel@tonic-gate 	size_t len;
3060Sstevel@tonic-gate 
3070Sstevel@tonic-gate 	ASSERT(RW_WRITE_HELD(&pip->i_rwlock));
3080Sstevel@tonic-gate 	fs = pip->i_fs;
3090Sstevel@tonic-gate loop:
3100Sstevel@tonic-gate 	nifree = fs->fs_cstotal.cs_nifree;
3110Sstevel@tonic-gate 
3120Sstevel@tonic-gate 	if (nifree == 0)
3130Sstevel@tonic-gate 		goto noinodes;
3140Sstevel@tonic-gate 	/*
3150Sstevel@tonic-gate 	 * Shadow inodes don't count against a user's inode allocation.
3160Sstevel@tonic-gate 	 * They are an implementation method and not a resource.
3170Sstevel@tonic-gate 	 */
3180Sstevel@tonic-gate 	if ((mode != IFSHAD) && (mode != IFATTRDIR)) {
3190Sstevel@tonic-gate 		err = chkiq((struct ufsvfs *)ITOV(pip)->v_vfsp->vfs_data,
3204662Sfrankho 		    /* change */ 1, (struct inode *)NULL, crgetuid(cr), 0,
3214662Sfrankho 		    cr, &errmsg, &len);
3220Sstevel@tonic-gate 		/*
3230Sstevel@tonic-gate 		 * As we haven't acquired any locks yet, dump the message
3240Sstevel@tonic-gate 		 * now.
3250Sstevel@tonic-gate 		 */
3260Sstevel@tonic-gate 		if (errmsg != NULL) {
3270Sstevel@tonic-gate 			uprintf(errmsg);
3280Sstevel@tonic-gate 			kmem_free(errmsg, len);
3290Sstevel@tonic-gate 			errmsg = NULL;
3300Sstevel@tonic-gate 		}
3310Sstevel@tonic-gate 		if (err)
3320Sstevel@tonic-gate 			return (err);
3330Sstevel@tonic-gate 	}
3340Sstevel@tonic-gate 
3350Sstevel@tonic-gate 	if (ipref >= (ulong_t)(fs->fs_ncg * fs->fs_ipg))
3360Sstevel@tonic-gate 		ipref = 0;
3370Sstevel@tonic-gate 	cg = (int)itog(fs, ipref);
3380Sstevel@tonic-gate 	ino = (ino_t)hashalloc(pip, cg, (long)ipref, (int)mode,
3390Sstevel@tonic-gate 	    (ulong_t (*)())ialloccg);
3400Sstevel@tonic-gate 	if (ino == 0) {
3410Sstevel@tonic-gate 		if ((mode != IFSHAD) && (mode != IFATTRDIR)) {
3420Sstevel@tonic-gate 			/*
3430Sstevel@tonic-gate 			 * We can safely ignore the return from chkiq()
3440Sstevel@tonic-gate 			 * because deallocations can only fail if we
3450Sstevel@tonic-gate 			 * can't get the user's quota info record off
3460Sstevel@tonic-gate 			 * the disk due to an I/O error.  In that case,
3470Sstevel@tonic-gate 			 * the quota subsystem is already messed up.
3480Sstevel@tonic-gate 			 */
3490Sstevel@tonic-gate 			(void) chkiq(ufsvfsp, /* change */ -1,
3504662Sfrankho 			    (struct inode *)NULL, crgetuid(cr), 0, cr,
3514662Sfrankho 			    (char **)NULL, (size_t *)NULL);
3520Sstevel@tonic-gate 		}
3530Sstevel@tonic-gate 		goto noinodes;
3540Sstevel@tonic-gate 	}
3550Sstevel@tonic-gate 	err = ufs_iget(pip->i_vfs, ino, ipp, cr);
3560Sstevel@tonic-gate 	if (err) {
3570Sstevel@tonic-gate 		if ((mode != IFSHAD) && (mode != IFATTRDIR)) {
3580Sstevel@tonic-gate 			/*
3590Sstevel@tonic-gate 			 * See above comment about why it is safe to ignore an
3600Sstevel@tonic-gate 			 * error return here.
3610Sstevel@tonic-gate 			 */
3620Sstevel@tonic-gate 			(void) chkiq(ufsvfsp, /* change */ -1,
3634662Sfrankho 			    (struct inode *)NULL, crgetuid(cr), 0, cr,
3644662Sfrankho 			    (char **)NULL, (size_t *)NULL);
3650Sstevel@tonic-gate 		}
3660Sstevel@tonic-gate 		ufs_ifree(pip, ino, 0);
3670Sstevel@tonic-gate 		return (err);
3680Sstevel@tonic-gate 	}
3690Sstevel@tonic-gate 	ip = *ipp;
3700Sstevel@tonic-gate 	ASSERT(!ip->i_ufs_acl);
3710Sstevel@tonic-gate 	ASSERT(!ip->i_dquot);
3720Sstevel@tonic-gate 	rw_enter(&ip->i_contents, RW_WRITER);
3730Sstevel@tonic-gate 
3740Sstevel@tonic-gate 	/*
3750Sstevel@tonic-gate 	 * Check if we really got a free inode, if not then complain
3760Sstevel@tonic-gate 	 * and mark the inode ISTALE so that it will be freed by the
3770Sstevel@tonic-gate 	 * ufs idle thread eventually and will not be sent to ufs_delete().
3780Sstevel@tonic-gate 	 */
3790Sstevel@tonic-gate 	if (ip->i_mode || (ip->i_nlink > 0)) {
3800Sstevel@tonic-gate 		ip->i_flag |= ISTALE;
3810Sstevel@tonic-gate 		rw_exit(&ip->i_contents);
3820Sstevel@tonic-gate 		VN_RELE(ITOV(ip));
3830Sstevel@tonic-gate 		cmn_err(CE_WARN,
3844662Sfrankho 		    "%s: unexpected allocated inode %d, run fsck(1M)%s",
3854662Sfrankho 		    fs->fs_fsmnt, (int)ino,
3864662Sfrankho 		    (TRANS_ISTRANS(ufsvfsp) ? " -o f" : ""));
3870Sstevel@tonic-gate 		goto loop;
3880Sstevel@tonic-gate 	}
3890Sstevel@tonic-gate 
3900Sstevel@tonic-gate 	/*
3910Sstevel@tonic-gate 	 * Check the inode has no size or data blocks.
3920Sstevel@tonic-gate 	 * This could have happened if the truncation failed when
3930Sstevel@tonic-gate 	 * deleting the inode. It used to be possible for this to occur
3940Sstevel@tonic-gate 	 * if a block allocation failed when iteratively truncating a
3950Sstevel@tonic-gate 	 * large file using logging and with a full file system.
3960Sstevel@tonic-gate 	 * This was fixed with bug fix 4348738. However, truncation may
3970Sstevel@tonic-gate 	 * still fail on an IO error. So in all cases for safety and
3980Sstevel@tonic-gate 	 * security we clear out the size; the blocks allocated; and
3990Sstevel@tonic-gate 	 * pointers to the blocks. This will ultimately cause a fsck
4000Sstevel@tonic-gate 	 * error of un-accounted for blocks, but its a fairly benign error,
4010Sstevel@tonic-gate 	 * and possibly the correct thing to do anyway as accesssing those
4020Sstevel@tonic-gate 	 * blocks agains may lead to more IO errors.
4030Sstevel@tonic-gate 	 */
4040Sstevel@tonic-gate 	if (ip->i_size || ip->i_blocks) {
4050Sstevel@tonic-gate 		int i;
4060Sstevel@tonic-gate 
4070Sstevel@tonic-gate 		if (ip->i_size) {
4080Sstevel@tonic-gate 			cmn_err(CE_WARN,
409923Ssdebnath 			    "%s: free inode %d had size 0x%llx, run fsck(1M)%s",
410923Ssdebnath 			    fs->fs_fsmnt, (int)ino, ip->i_size,
411923Ssdebnath 			    (TRANS_ISTRANS(ufsvfsp) ? " -o f" : ""));
4120Sstevel@tonic-gate 		}
4130Sstevel@tonic-gate 		/*
4140Sstevel@tonic-gate 		 * Clear any garbage left behind.
4150Sstevel@tonic-gate 		 */
4160Sstevel@tonic-gate 		ip->i_size = (u_offset_t)0;
4170Sstevel@tonic-gate 		ip->i_blocks = 0;
4180Sstevel@tonic-gate 		for (i = 0; i < NDADDR; i++)
4190Sstevel@tonic-gate 			ip->i_db[i] = 0;
4200Sstevel@tonic-gate 		for (i = 0; i < NIADDR; i++)
4210Sstevel@tonic-gate 			ip->i_ib[i] = 0;
4220Sstevel@tonic-gate 	}
4230Sstevel@tonic-gate 
4240Sstevel@tonic-gate 	/*
4250Sstevel@tonic-gate 	 * Initialize the link count
4260Sstevel@tonic-gate 	 */
4270Sstevel@tonic-gate 	ip->i_nlink = 0;
4280Sstevel@tonic-gate 
4290Sstevel@tonic-gate 	/*
4300Sstevel@tonic-gate 	 * Clear the old flags
4310Sstevel@tonic-gate 	 */
4320Sstevel@tonic-gate 	ip->i_flag &= IREF;
4330Sstevel@tonic-gate 
4340Sstevel@tonic-gate 	/*
4350Sstevel@tonic-gate 	 * Access times are not really defined if the fs is mounted
4360Sstevel@tonic-gate 	 * with 'noatime'. But it can cause nfs clients to fail
4370Sstevel@tonic-gate 	 * open() if the atime is not a legal value. Set a legal value
4380Sstevel@tonic-gate 	 * here when the inode is allocated.
4390Sstevel@tonic-gate 	 */
4400Sstevel@tonic-gate 	if (ufsvfsp->vfs_noatime) {
4410Sstevel@tonic-gate 		mutex_enter(&ufs_iuniqtime_lock);
4420Sstevel@tonic-gate 		ip->i_atime = iuniqtime;
4430Sstevel@tonic-gate 		mutex_exit(&ufs_iuniqtime_lock);
4440Sstevel@tonic-gate 	}
4450Sstevel@tonic-gate 	rw_exit(&ip->i_contents);
4460Sstevel@tonic-gate 	return (0);
4470Sstevel@tonic-gate noinodes:
4480Sstevel@tonic-gate 	if (!(TRANS_ISTRANS(ufsvfsp)) || !(pip->i_flag & IQUIET))
4490Sstevel@tonic-gate 		cmn_err(CE_NOTE, "%s: out of inodes\n", fs->fs_fsmnt);
4500Sstevel@tonic-gate 	return (ENOSPC);
4510Sstevel@tonic-gate }
4520Sstevel@tonic-gate 
4530Sstevel@tonic-gate /*
4540Sstevel@tonic-gate  * Find a cylinder group to place a directory.
4550Sstevel@tonic-gate  * Returns an inumber within the selected cylinder group.
4560Sstevel@tonic-gate  * Note, the vfs_lock is not needed as we don't require exact cg summary info.
4570Sstevel@tonic-gate  *
4580Sstevel@tonic-gate  * If the switch ufs_close_dirs is set, then the policy is to use
4590Sstevel@tonic-gate  * the current cg if it has more than 25% free inodes and more
4600Sstevel@tonic-gate  * than 25% free blocks. Otherwise the cgs are searched from
4610Sstevel@tonic-gate  * the beginning and the first cg with the same criteria is
4620Sstevel@tonic-gate  * used. If that is also null then we revert to the old algorithm.
4630Sstevel@tonic-gate  * This tends to cluster files at the beginning of the disk
4640Sstevel@tonic-gate  * until the disk gets full.
4650Sstevel@tonic-gate  *
4660Sstevel@tonic-gate  * Otherwise if ufs_close_dirs is not set then the original policy is
4670Sstevel@tonic-gate  * used which is to select from among those cylinder groups with
4680Sstevel@tonic-gate  * above the average number of free inodes, the one with the smallest
4690Sstevel@tonic-gate  * number of directories.
4700Sstevel@tonic-gate  */
4710Sstevel@tonic-gate 
4720Sstevel@tonic-gate int ufs_close_dirs = 1;	/* allocate directories close as possible */
4730Sstevel@tonic-gate 
4740Sstevel@tonic-gate ino_t
dirpref(inode_t * dp)4750Sstevel@tonic-gate dirpref(inode_t *dp)
4760Sstevel@tonic-gate {
4770Sstevel@tonic-gate 	int cg, minndir, mincg, avgifree, mininode, minbpg, ifree;
4780Sstevel@tonic-gate 	struct fs *fs = dp->i_fs;
4790Sstevel@tonic-gate 
4800Sstevel@tonic-gate 	cg = itog(fs, dp->i_number);
4810Sstevel@tonic-gate 	mininode = fs->fs_ipg >> 2;
4820Sstevel@tonic-gate 	minbpg = fs->fs_maxbpg >> 2;
4830Sstevel@tonic-gate 	if (ufs_close_dirs &&
4840Sstevel@tonic-gate 	    (fs->fs_cs(fs, cg).cs_nifree > mininode) &&
4850Sstevel@tonic-gate 	    (fs->fs_cs(fs, cg).cs_nbfree > minbpg)) {
4860Sstevel@tonic-gate 		return (dp->i_number);
4870Sstevel@tonic-gate 	}
4880Sstevel@tonic-gate 
4890Sstevel@tonic-gate 	avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
4900Sstevel@tonic-gate 	minndir = fs->fs_ipg;
4910Sstevel@tonic-gate 	mincg = 0;
4920Sstevel@tonic-gate 	for (cg = 0; cg < fs->fs_ncg; cg++) {
4930Sstevel@tonic-gate 		ifree = fs->fs_cs(fs, cg).cs_nifree;
4940Sstevel@tonic-gate 		if (ufs_close_dirs &&
4950Sstevel@tonic-gate 		    (ifree > mininode) &&
4960Sstevel@tonic-gate 		    (fs->fs_cs(fs, cg).cs_nbfree > minbpg)) {
4970Sstevel@tonic-gate 			return ((ino_t)(fs->fs_ipg * cg));
4980Sstevel@tonic-gate 		}
4990Sstevel@tonic-gate 		if ((fs->fs_cs(fs, cg).cs_ndir < minndir) &&
5000Sstevel@tonic-gate 		    (ifree >= avgifree)) {
5010Sstevel@tonic-gate 			mincg = cg;
5020Sstevel@tonic-gate 			minndir = fs->fs_cs(fs, cg).cs_ndir;
5030Sstevel@tonic-gate 		}
5040Sstevel@tonic-gate 	}
5050Sstevel@tonic-gate 	return ((ino_t)(fs->fs_ipg * mincg));
5060Sstevel@tonic-gate }
5070Sstevel@tonic-gate 
5080Sstevel@tonic-gate /*
5090Sstevel@tonic-gate  * Select the desired position for the next block in a file.  The file is
5100Sstevel@tonic-gate  * logically divided into sections. The first section is composed of the
5110Sstevel@tonic-gate  * direct blocks. Each additional section contains fs_maxbpg blocks.
5120Sstevel@tonic-gate  *
5130Sstevel@tonic-gate  * If no blocks have been allocated in the first section, the policy is to
5140Sstevel@tonic-gate  * request a block in the same cylinder group as the inode that describes
5150Sstevel@tonic-gate  * the file. If no blocks have been allocated in any other section, the
5160Sstevel@tonic-gate  * policy is to place the section in a cylinder group with a greater than
5170Sstevel@tonic-gate  * average number of free blocks.  An appropriate cylinder group is found
5180Sstevel@tonic-gate  * by using a rotor that sweeps the cylinder groups. When a new group of
5190Sstevel@tonic-gate  * blocks is needed, the sweep begins in the cylinder group following the
5200Sstevel@tonic-gate  * cylinder group from which the previous allocation was made. The sweep
5210Sstevel@tonic-gate  * continues until a cylinder group with greater than the average number
5220Sstevel@tonic-gate  * of free blocks is found. If the allocation is for the first block in an
5230Sstevel@tonic-gate  * indirect block, the information on the previous allocation is unavailable;
5240Sstevel@tonic-gate  * here a best guess is made based upon the logical block number being
5250Sstevel@tonic-gate  * allocated.
5260Sstevel@tonic-gate  *
5270Sstevel@tonic-gate  * If a section is already partially allocated, the policy is to
5280Sstevel@tonic-gate  * contiguously allocate fs_maxcontig blocks.  The end of one of these
5290Sstevel@tonic-gate  * contiguous blocks and the beginning of the next is physically separated
5300Sstevel@tonic-gate  * so that the disk head will be in transit between them for at least
5310Sstevel@tonic-gate  * fs_rotdelay milliseconds.  This is to allow time for the processor to
5320Sstevel@tonic-gate  * schedule another I/O transfer.
5330Sstevel@tonic-gate  */
5340Sstevel@tonic-gate daddr_t
blkpref(struct inode * ip,daddr_t lbn,int indx,daddr32_t * bap)5350Sstevel@tonic-gate blkpref(struct inode *ip, daddr_t lbn, int indx, daddr32_t *bap)
5360Sstevel@tonic-gate {
5370Sstevel@tonic-gate 	struct fs *fs;
5380Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp;
5390Sstevel@tonic-gate 	int cg;
5400Sstevel@tonic-gate 	int avgbfree, startcg;
5410Sstevel@tonic-gate 	daddr_t nextblk;
5420Sstevel@tonic-gate 
5430Sstevel@tonic-gate 	ufsvfsp = ip->i_ufsvfs;
5440Sstevel@tonic-gate 	fs = ip->i_fs;
5450Sstevel@tonic-gate 	if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
5460Sstevel@tonic-gate 		if (lbn < NDADDR) {
5470Sstevel@tonic-gate 			cg = itog(fs, ip->i_number);
5480Sstevel@tonic-gate 			return (fs->fs_fpg * cg + fs->fs_frag);
5490Sstevel@tonic-gate 		}
5500Sstevel@tonic-gate 		/*
5510Sstevel@tonic-gate 		 * Find a cylinder with greater than average
5520Sstevel@tonic-gate 		 * number of unused data blocks.
5530Sstevel@tonic-gate 		 */
5540Sstevel@tonic-gate 		if (indx == 0 || bap[indx - 1] == 0)
5550Sstevel@tonic-gate 			startcg = itog(fs, ip->i_number) + lbn / fs->fs_maxbpg;
5560Sstevel@tonic-gate 		else
5570Sstevel@tonic-gate 			startcg = dtog(fs, bap[indx - 1]) + 1;
5580Sstevel@tonic-gate 		startcg %= fs->fs_ncg;
5590Sstevel@tonic-gate 
5600Sstevel@tonic-gate 		mutex_enter(&ufsvfsp->vfs_lock);
5610Sstevel@tonic-gate 		avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
5620Sstevel@tonic-gate 		/*
5630Sstevel@tonic-gate 		 * used for computing log space for writes/truncs
5640Sstevel@tonic-gate 		 */
5650Sstevel@tonic-gate 		ufsvfsp->vfs_avgbfree = avgbfree;
5660Sstevel@tonic-gate 		for (cg = startcg; cg < fs->fs_ncg; cg++)
5670Sstevel@tonic-gate 			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
5680Sstevel@tonic-gate 				fs->fs_cgrotor = cg;
5690Sstevel@tonic-gate 				mutex_exit(&ufsvfsp->vfs_lock);
5700Sstevel@tonic-gate 				return (fs->fs_fpg * cg + fs->fs_frag);
5710Sstevel@tonic-gate 			}
5720Sstevel@tonic-gate 		for (cg = 0; cg <= startcg; cg++)
5730Sstevel@tonic-gate 			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
5740Sstevel@tonic-gate 				fs->fs_cgrotor = cg;
5750Sstevel@tonic-gate 				mutex_exit(&ufsvfsp->vfs_lock);
5760Sstevel@tonic-gate 				return (fs->fs_fpg * cg + fs->fs_frag);
5770Sstevel@tonic-gate 			}
5780Sstevel@tonic-gate 		mutex_exit(&ufsvfsp->vfs_lock);
5790Sstevel@tonic-gate 		return (NULL);
5800Sstevel@tonic-gate 	}
5810Sstevel@tonic-gate 	/*
5820Sstevel@tonic-gate 	 * One or more previous blocks have been laid out. If less
5830Sstevel@tonic-gate 	 * than fs_maxcontig previous blocks are contiguous, the
5840Sstevel@tonic-gate 	 * next block is requested contiguously, otherwise it is
5850Sstevel@tonic-gate 	 * requested rotationally delayed by fs_rotdelay milliseconds.
5860Sstevel@tonic-gate 	 */
587923Ssdebnath 
588923Ssdebnath 	nextblk = bap[indx - 1];
589923Ssdebnath 	/*
590923Ssdebnath 	 * Provision for fallocate to return positive
591923Ssdebnath 	 * blk preference based on last allocation
592923Ssdebnath 	 */
593923Ssdebnath 	if (nextblk < 0 && nextblk != UFS_HOLE) {
594923Ssdebnath 		nextblk = (-bap[indx - 1]) + fs->fs_frag;
595923Ssdebnath 	} else {
596923Ssdebnath 		nextblk = bap[indx - 1] + fs->fs_frag;
597923Ssdebnath 	}
598923Ssdebnath 
599923Ssdebnath 	if (indx > fs->fs_maxcontig && bap[indx - fs->fs_maxcontig] +
600923Ssdebnath 	    blkstofrags(fs, fs->fs_maxcontig) != nextblk) {
6010Sstevel@tonic-gate 		return (nextblk);
602923Ssdebnath 	}
6030Sstevel@tonic-gate 	if (fs->fs_rotdelay != 0)
6040Sstevel@tonic-gate 		/*
6050Sstevel@tonic-gate 		 * Here we convert ms of delay to frags as:
6060Sstevel@tonic-gate 		 * (frags) = (ms) * (rev/sec) * (sect/rev) /
607923Ssdebnath 		 * 	((sect/frag) * (ms/sec))
6080Sstevel@tonic-gate 		 * then round up to the next block.
6090Sstevel@tonic-gate 		 */
6100Sstevel@tonic-gate 		nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect /
6110Sstevel@tonic-gate 		    (NSPF(fs) * 1000), fs->fs_frag);
6120Sstevel@tonic-gate 	return (nextblk);
6130Sstevel@tonic-gate }
6140Sstevel@tonic-gate 
6150Sstevel@tonic-gate /*
6160Sstevel@tonic-gate  * Free a block or fragment.
6170Sstevel@tonic-gate  *
6180Sstevel@tonic-gate  * The specified block or fragment is placed back in the
6190Sstevel@tonic-gate  * free map. If a fragment is deallocated, a possible
6200Sstevel@tonic-gate  * block reassembly is checked.
6210Sstevel@tonic-gate  */
6220Sstevel@tonic-gate void
free(struct inode * ip,daddr_t bno,off_t size,int flags)6230Sstevel@tonic-gate free(struct inode *ip, daddr_t bno, off_t size, int flags)
6240Sstevel@tonic-gate {
6250Sstevel@tonic-gate 	struct fs *fs = ip->i_fs;
6260Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
627512Sjkennedy 	struct ufs_q *delq = &ufsvfsp->vfs_delete;
628512Sjkennedy 	struct ufs_delq_info *delq_info = &ufsvfsp->vfs_delete_info;
6290Sstevel@tonic-gate 	struct cg *cgp;
6300Sstevel@tonic-gate 	struct buf *bp;
6310Sstevel@tonic-gate 	int cg, bmap, bbase;
6320Sstevel@tonic-gate 	int i;
6330Sstevel@tonic-gate 	uchar_t *blksfree;
6340Sstevel@tonic-gate 	int *blktot;
6350Sstevel@tonic-gate 	short *blks;
6360Sstevel@tonic-gate 	daddr_t blkno, cylno, rpos;
6370Sstevel@tonic-gate 
638923Ssdebnath 	/*
639923Ssdebnath 	 * fallocate'd files will have negative block address.
640923Ssdebnath 	 * So negate it again to get original block address.
641923Ssdebnath 	 */
6424454Smishra 	if (bno < 0 && (bno % fs->fs_frag == 0) && bno != UFS_HOLE) {
643923Ssdebnath 		bno = -bno;
644923Ssdebnath 	}
645923Ssdebnath 
6460Sstevel@tonic-gate 	if ((unsigned long)size > fs->fs_bsize || fragoff(fs, size) != 0) {
6470Sstevel@tonic-gate 		(void) ufs_fault(ITOV(ip),
648923Ssdebnath 		    "free: bad size, dev = 0x%lx, bsize = %d, size = %d, "
649923Ssdebnath 		    "fs = %s\n", ip->i_dev, fs->fs_bsize,
650923Ssdebnath 		    (int)size, fs->fs_fsmnt);
6510Sstevel@tonic-gate 		return;
6520Sstevel@tonic-gate 	}
6530Sstevel@tonic-gate 	cg = dtog(fs, bno);
6540Sstevel@tonic-gate 	ASSERT(!ufs_badblock(ip, bno));
6550Sstevel@tonic-gate 	bp = UFS_BREAD(ufsvfsp, ip->i_dev, (daddr_t)fsbtodb(fs, cgtod(fs, cg)),
656923Ssdebnath 	    (int)fs->fs_cgsize);
6570Sstevel@tonic-gate 
6580Sstevel@tonic-gate 	cgp = bp->b_un.b_cg;
6590Sstevel@tonic-gate 	if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp)) {
6600Sstevel@tonic-gate 		brelse(bp);
6610Sstevel@tonic-gate 		return;
6620Sstevel@tonic-gate 	}
6630Sstevel@tonic-gate 
6640Sstevel@tonic-gate 	if (!(flags & I_NOCANCEL))
6650Sstevel@tonic-gate 		TRANS_CANCEL(ufsvfsp, ldbtob(fsbtodb(fs, bno)), size, flags);
6660Sstevel@tonic-gate 	if (flags & (I_DIR|I_IBLK|I_SHAD|I_QUOTA)) {
6670Sstevel@tonic-gate 		TRANS_MATA_FREE(ufsvfsp, ldbtob(fsbtodb(fs, bno)), size);
6680Sstevel@tonic-gate 	}
6690Sstevel@tonic-gate 	blksfree = cg_blksfree(cgp);
6700Sstevel@tonic-gate 	blktot = cg_blktot(cgp);
6710Sstevel@tonic-gate 	mutex_enter(&ufsvfsp->vfs_lock);
6720Sstevel@tonic-gate 	cgp->cg_time = gethrestime_sec();
6730Sstevel@tonic-gate 	bno = dtogd(fs, bno);
6740Sstevel@tonic-gate 	if (size == fs->fs_bsize) {
6750Sstevel@tonic-gate 		blkno = fragstoblks(fs, bno);
6760Sstevel@tonic-gate 		cylno = cbtocylno(fs, bno);
6770Sstevel@tonic-gate 		rpos = cbtorpos(ufsvfsp, bno);
6780Sstevel@tonic-gate 		blks = cg_blks(ufsvfsp, cgp, cylno);
6790Sstevel@tonic-gate 		if (!isclrblock(fs, blksfree, blkno)) {
6800Sstevel@tonic-gate 			mutex_exit(&ufsvfsp->vfs_lock);
6810Sstevel@tonic-gate 			brelse(bp);
6820Sstevel@tonic-gate 			(void) ufs_fault(ITOV(ip), "free: freeing free block, "
6830Sstevel@tonic-gate 			    "dev:0x%lx, block:%ld, ino:%lu, fs:%s",
6840Sstevel@tonic-gate 			    ip->i_dev, bno, ip->i_number, fs->fs_fsmnt);
6850Sstevel@tonic-gate 			return;
6860Sstevel@tonic-gate 		}
6870Sstevel@tonic-gate 		setblock(fs, blksfree, blkno);
6880Sstevel@tonic-gate 		blks[rpos]++;
6890Sstevel@tonic-gate 		blktot[cylno]++;
6900Sstevel@tonic-gate 		cgp->cg_cs.cs_nbfree++;		/* Log below */
6910Sstevel@tonic-gate 		fs->fs_cstotal.cs_nbfree++;
6920Sstevel@tonic-gate 		fs->fs_cs(fs, cg).cs_nbfree++;
693512Sjkennedy 		if (TRANS_ISTRANS(ufsvfsp) && (flags & I_ACCT)) {
694512Sjkennedy 			mutex_enter(&delq->uq_mutex);
695512Sjkennedy 			delq_info->delq_unreclaimed_blocks -=
696512Sjkennedy 			    btodb(fs->fs_bsize);
697512Sjkennedy 			mutex_exit(&delq->uq_mutex);
698512Sjkennedy 		}
6990Sstevel@tonic-gate 	} else {
7000Sstevel@tonic-gate 		bbase = bno - fragnum(fs, bno);
7010Sstevel@tonic-gate 		/*
7020Sstevel@tonic-gate 		 * Decrement the counts associated with the old frags
7030Sstevel@tonic-gate 		 */
7040Sstevel@tonic-gate 		bmap = blkmap(fs, blksfree, bbase);
7050Sstevel@tonic-gate 		fragacct(fs, bmap, cgp->cg_frsum, -1);
7060Sstevel@tonic-gate 		/*
7070Sstevel@tonic-gate 		 * Deallocate the fragment
7080Sstevel@tonic-gate 		 */
7090Sstevel@tonic-gate 		for (i = 0; i < numfrags(fs, size); i++) {
7100Sstevel@tonic-gate 			if (isset(blksfree, bno + i)) {
7110Sstevel@tonic-gate 				brelse(bp);
7120Sstevel@tonic-gate 				mutex_exit(&ufsvfsp->vfs_lock);
7130Sstevel@tonic-gate 				(void) ufs_fault(ITOV(ip),
7140Sstevel@tonic-gate 				    "free: freeing free frag, "
7150Sstevel@tonic-gate 				    "dev:0x%lx, blk:%ld, cg:%d, "
7160Sstevel@tonic-gate 				    "ino:%lu, fs:%s",
7170Sstevel@tonic-gate 				    ip->i_dev,
7180Sstevel@tonic-gate 				    bno + i,
7190Sstevel@tonic-gate 				    cgp->cg_cgx,
7200Sstevel@tonic-gate 				    ip->i_number,
7210Sstevel@tonic-gate 				    fs->fs_fsmnt);
7220Sstevel@tonic-gate 				return;
7230Sstevel@tonic-gate 			}
7240Sstevel@tonic-gate 			setbit(blksfree, bno + i);
7250Sstevel@tonic-gate 		}
7260Sstevel@tonic-gate 		cgp->cg_cs.cs_nffree += i;
7270Sstevel@tonic-gate 		fs->fs_cstotal.cs_nffree += i;
7280Sstevel@tonic-gate 		fs->fs_cs(fs, cg).cs_nffree += i;
729512Sjkennedy 		if (TRANS_ISTRANS(ufsvfsp) && (flags & I_ACCT)) {
730512Sjkennedy 			mutex_enter(&delq->uq_mutex);
731512Sjkennedy 			delq_info->delq_unreclaimed_blocks -=
732512Sjkennedy 			    btodb(i * fs->fs_fsize);
733512Sjkennedy 			mutex_exit(&delq->uq_mutex);
734512Sjkennedy 		}
7350Sstevel@tonic-gate 		/*
7360Sstevel@tonic-gate 		 * Add back in counts associated with the new frags
7370Sstevel@tonic-gate 		 */
7380Sstevel@tonic-gate 		bmap = blkmap(fs, blksfree, bbase);
7390Sstevel@tonic-gate 		fragacct(fs, bmap, cgp->cg_frsum, 1);
7400Sstevel@tonic-gate 		/*
7410Sstevel@tonic-gate 		 * If a complete block has been reassembled, account for it
7420Sstevel@tonic-gate 		 */
7430Sstevel@tonic-gate 		blkno = fragstoblks(fs, bbase);
7440Sstevel@tonic-gate 		if (isblock(fs, blksfree, blkno)) {
7450Sstevel@tonic-gate 			cylno = cbtocylno(fs, bbase);
7460Sstevel@tonic-gate 			rpos = cbtorpos(ufsvfsp, bbase);
7470Sstevel@tonic-gate 			blks = cg_blks(ufsvfsp, cgp, cylno);
7480Sstevel@tonic-gate 			blks[rpos]++;
7490Sstevel@tonic-gate 			blktot[cylno]++;
7500Sstevel@tonic-gate 			cgp->cg_cs.cs_nffree -= fs->fs_frag;
7510Sstevel@tonic-gate 			fs->fs_cstotal.cs_nffree -= fs->fs_frag;
7520Sstevel@tonic-gate 			fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
7530Sstevel@tonic-gate 			cgp->cg_cs.cs_nbfree++;
7540Sstevel@tonic-gate 			fs->fs_cstotal.cs_nbfree++;
7550Sstevel@tonic-gate 			fs->fs_cs(fs, cg).cs_nbfree++;
7560Sstevel@tonic-gate 		}
7570Sstevel@tonic-gate 	}
7580Sstevel@tonic-gate 	fs->fs_fmod = 1;
7590Sstevel@tonic-gate 	ufs_notclean(ufsvfsp);
7600Sstevel@tonic-gate 	TRANS_BUF(ufsvfsp, 0, fs->fs_cgsize, bp, DT_CG);
7610Sstevel@tonic-gate 	TRANS_SI(ufsvfsp, fs, cg);
7620Sstevel@tonic-gate 	bdrwrite(bp);
7630Sstevel@tonic-gate }
7640Sstevel@tonic-gate 
7650Sstevel@tonic-gate /*
7660Sstevel@tonic-gate  * Free an inode.
7670Sstevel@tonic-gate  *
7680Sstevel@tonic-gate  * The specified inode is placed back in the free map.
7690Sstevel@tonic-gate  */
7700Sstevel@tonic-gate void
ufs_ifree(struct inode * ip,ino_t ino,mode_t mode)7710Sstevel@tonic-gate ufs_ifree(struct inode *ip, ino_t ino, mode_t mode)
7720Sstevel@tonic-gate {
7730Sstevel@tonic-gate 	struct fs *fs = ip->i_fs;
7740Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
7750Sstevel@tonic-gate 	struct cg *cgp;
7760Sstevel@tonic-gate 	struct buf *bp;
7770Sstevel@tonic-gate 	unsigned int inot;
7780Sstevel@tonic-gate 	int cg;
7790Sstevel@tonic-gate 	char *iused;
7800Sstevel@tonic-gate 
7810Sstevel@tonic-gate 	if (ip->i_number == ino && ip->i_mode != 0) {
7820Sstevel@tonic-gate 		(void) ufs_fault(ITOV(ip),
7830Sstevel@tonic-gate 		    "ufs_ifree: illegal mode: (imode) %o, (omode) %o, ino %d, "
7840Sstevel@tonic-gate 		    "fs = %s\n",
7850Sstevel@tonic-gate 		    ip->i_mode, mode, (int)ip->i_number, fs->fs_fsmnt);
7860Sstevel@tonic-gate 		return;
7870Sstevel@tonic-gate 	}
7880Sstevel@tonic-gate 	if (ino >= fs->fs_ipg * fs->fs_ncg) {
7890Sstevel@tonic-gate 		(void) ufs_fault(ITOV(ip),
7900Sstevel@tonic-gate 		    "ifree: range, dev = 0x%x, ino = %d, fs = %s\n",
7910Sstevel@tonic-gate 		    (int)ip->i_dev, (int)ino, fs->fs_fsmnt);
7920Sstevel@tonic-gate 		return;
7930Sstevel@tonic-gate 	}
7940Sstevel@tonic-gate 	cg = (int)itog(fs, ino);
7950Sstevel@tonic-gate 	bp = UFS_BREAD(ufsvfsp, ip->i_dev, (daddr_t)fsbtodb(fs, cgtod(fs, cg)),
796923Ssdebnath 	    (int)fs->fs_cgsize);
7970Sstevel@tonic-gate 
7980Sstevel@tonic-gate 	cgp = bp->b_un.b_cg;
7990Sstevel@tonic-gate 	if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp)) {
8000Sstevel@tonic-gate 		brelse(bp);
8010Sstevel@tonic-gate 		return;
8020Sstevel@tonic-gate 	}
8030Sstevel@tonic-gate 	mutex_enter(&ufsvfsp->vfs_lock);
8040Sstevel@tonic-gate 	cgp->cg_time = gethrestime_sec();
8050Sstevel@tonic-gate 	iused = cg_inosused(cgp);
8060Sstevel@tonic-gate 	inot = (unsigned int)(ino % (ulong_t)fs->fs_ipg);
8070Sstevel@tonic-gate 	if (isclr(iused, inot)) {
8080Sstevel@tonic-gate 		mutex_exit(&ufsvfsp->vfs_lock);
8090Sstevel@tonic-gate 		brelse(bp);
8100Sstevel@tonic-gate 		(void) ufs_fault(ITOV(ip), "ufs_ifree: freeing free inode, "
811923Ssdebnath 		    "mode: (imode) %o, (omode) %o, ino:%d, "
812923Ssdebnath 		    "fs:%s",
813923Ssdebnath 		    ip->i_mode, mode, (int)ino, fs->fs_fsmnt);
8140Sstevel@tonic-gate 		return;
8150Sstevel@tonic-gate 	}
8160Sstevel@tonic-gate 	clrbit(iused, inot);
8170Sstevel@tonic-gate 
8180Sstevel@tonic-gate 	if (inot < (ulong_t)cgp->cg_irotor)
8190Sstevel@tonic-gate 		cgp->cg_irotor = inot;
8200Sstevel@tonic-gate 	cgp->cg_cs.cs_nifree++;
8210Sstevel@tonic-gate 	fs->fs_cstotal.cs_nifree++;
8220Sstevel@tonic-gate 	fs->fs_cs(fs, cg).cs_nifree++;
8230Sstevel@tonic-gate 	if (((mode & IFMT) == IFDIR) || ((mode & IFMT) == IFATTRDIR)) {
8240Sstevel@tonic-gate 		cgp->cg_cs.cs_ndir--;
8250Sstevel@tonic-gate 		fs->fs_cstotal.cs_ndir--;
8260Sstevel@tonic-gate 		fs->fs_cs(fs, cg).cs_ndir--;
8270Sstevel@tonic-gate 	}
8280Sstevel@tonic-gate 	fs->fs_fmod = 1;
8290Sstevel@tonic-gate 	ufs_notclean(ufsvfsp);
8300Sstevel@tonic-gate 	TRANS_BUF(ufsvfsp, 0, fs->fs_cgsize, bp, DT_CG);
8310Sstevel@tonic-gate 	TRANS_SI(ufsvfsp, fs, cg);
8320Sstevel@tonic-gate 	bdrwrite(bp);
8330Sstevel@tonic-gate }
8340Sstevel@tonic-gate 
8350Sstevel@tonic-gate /*
8360Sstevel@tonic-gate  * Implement the cylinder overflow algorithm.
8370Sstevel@tonic-gate  *
8380Sstevel@tonic-gate  * The policy implemented by this algorithm is:
8390Sstevel@tonic-gate  *   1) allocate the block in its requested cylinder group.
8400Sstevel@tonic-gate  *   2) quadratically rehash on the cylinder group number.
8410Sstevel@tonic-gate  *   3) brute force search for a free block.
8420Sstevel@tonic-gate  * The size parameter means size for data blocks, mode for inodes.
8430Sstevel@tonic-gate  */
8440Sstevel@tonic-gate static ino_t
hashalloc(struct inode * ip,int cg,long pref,int size,ulong_t (* allocator)())8450Sstevel@tonic-gate hashalloc(struct inode *ip, int cg, long pref, int size, ulong_t (*allocator)())
8460Sstevel@tonic-gate {
8470Sstevel@tonic-gate 	struct fs *fs;
8480Sstevel@tonic-gate 	int i;
8490Sstevel@tonic-gate 	long result;
8500Sstevel@tonic-gate 	int icg = cg;
8510Sstevel@tonic-gate 
8520Sstevel@tonic-gate 	fs = ip->i_fs;
8530Sstevel@tonic-gate 	/*
8540Sstevel@tonic-gate 	 * 1: preferred cylinder group
8550Sstevel@tonic-gate 	 */
8560Sstevel@tonic-gate 	result = (*allocator)(ip, cg, pref, size);
8570Sstevel@tonic-gate 	if (result)
8580Sstevel@tonic-gate 		return (result);
8590Sstevel@tonic-gate 	/*
8600Sstevel@tonic-gate 	 * 2: quadratic rehash
8610Sstevel@tonic-gate 	 */
8620Sstevel@tonic-gate 	for (i = 1; i < fs->fs_ncg; i *= 2) {
8630Sstevel@tonic-gate 		cg += i;
8640Sstevel@tonic-gate 		if (cg >= fs->fs_ncg)
8650Sstevel@tonic-gate 			cg -= fs->fs_ncg;
8660Sstevel@tonic-gate 		result = (*allocator)(ip, cg, 0, size);
8670Sstevel@tonic-gate 		if (result)
8680Sstevel@tonic-gate 			return (result);
8690Sstevel@tonic-gate 	}
8700Sstevel@tonic-gate 	/*
8710Sstevel@tonic-gate 	 * 3: brute force search
8720Sstevel@tonic-gate 	 * Note that we start at i == 2, since 0 was checked initially,
8730Sstevel@tonic-gate 	 * and 1 is always checked in the quadratic rehash.
8740Sstevel@tonic-gate 	 */
8750Sstevel@tonic-gate 	cg = (icg + 2) % fs->fs_ncg;
8760Sstevel@tonic-gate 	for (i = 2; i < fs->fs_ncg; i++) {
8770Sstevel@tonic-gate 		result = (*allocator)(ip, cg, 0, size);
8780Sstevel@tonic-gate 		if (result)
8790Sstevel@tonic-gate 			return (result);
8800Sstevel@tonic-gate 		cg++;
8810Sstevel@tonic-gate 		if (cg == fs->fs_ncg)
8820Sstevel@tonic-gate 			cg = 0;
8830Sstevel@tonic-gate 	}
8840Sstevel@tonic-gate 	return (NULL);
8850Sstevel@tonic-gate }
8860Sstevel@tonic-gate 
8870Sstevel@tonic-gate /*
8880Sstevel@tonic-gate  * Determine whether a fragment can be extended.
8890Sstevel@tonic-gate  *
8900Sstevel@tonic-gate  * Check to see if the necessary fragments are available, and
8910Sstevel@tonic-gate  * if they are, allocate them.
8920Sstevel@tonic-gate  */
8930Sstevel@tonic-gate static daddr_t
fragextend(struct inode * ip,int cg,long bprev,int osize,int nsize)8940Sstevel@tonic-gate fragextend(struct inode *ip, int cg, long bprev, int osize, int nsize)
8950Sstevel@tonic-gate {
8960Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
8970Sstevel@tonic-gate 	struct fs *fs = ip->i_fs;
8980Sstevel@tonic-gate 	struct buf *bp;
8990Sstevel@tonic-gate 	struct cg *cgp;
9000Sstevel@tonic-gate 	uchar_t *blksfree;
9010Sstevel@tonic-gate 	long bno;
9020Sstevel@tonic-gate 	int frags, bbase;
9030Sstevel@tonic-gate 	int i, j;
9040Sstevel@tonic-gate 
9050Sstevel@tonic-gate 	if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize))
9060Sstevel@tonic-gate 		return (NULL);
9070Sstevel@tonic-gate 	frags = numfrags(fs, nsize);
9080Sstevel@tonic-gate 	bbase = (int)fragnum(fs, bprev);
9090Sstevel@tonic-gate 	if (bbase > fragnum(fs, (bprev + frags - 1))) {
9100Sstevel@tonic-gate 		/* cannot extend across a block boundary */
9110Sstevel@tonic-gate 		return (NULL);
9120Sstevel@tonic-gate 	}
9130Sstevel@tonic-gate 
9140Sstevel@tonic-gate 	bp = UFS_BREAD(ufsvfsp, ip->i_dev, (daddr_t)fsbtodb(fs, cgtod(fs, cg)),
915923Ssdebnath 	    (int)fs->fs_cgsize);
9160Sstevel@tonic-gate 	cgp = bp->b_un.b_cg;
9170Sstevel@tonic-gate 	if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp)) {
9180Sstevel@tonic-gate 		brelse(bp);
9190Sstevel@tonic-gate 		return (NULL);
9200Sstevel@tonic-gate 	}
9210Sstevel@tonic-gate 
9220Sstevel@tonic-gate 	blksfree = cg_blksfree(cgp);
9230Sstevel@tonic-gate 	mutex_enter(&ufsvfsp->vfs_lock);
9240Sstevel@tonic-gate 	bno = dtogd(fs, bprev);
9250Sstevel@tonic-gate 	for (i = numfrags(fs, osize); i < frags; i++) {
9260Sstevel@tonic-gate 		if (isclr(blksfree, bno + i)) {
9270Sstevel@tonic-gate 			mutex_exit(&ufsvfsp->vfs_lock);
9280Sstevel@tonic-gate 			brelse(bp);
9290Sstevel@tonic-gate 			return (NULL);
9300Sstevel@tonic-gate 		}
9310Sstevel@tonic-gate 		if ((TRANS_ISCANCEL(ufsvfsp, ldbtob(fsbtodb(fs, bprev + i)),
9324662Sfrankho 		    fs->fs_fsize))) {
9330Sstevel@tonic-gate 			mutex_exit(&ufsvfsp->vfs_lock);
9340Sstevel@tonic-gate 			brelse(bp);
9350Sstevel@tonic-gate 			return (NULL);
9360Sstevel@tonic-gate 		}
9370Sstevel@tonic-gate 	}
9380Sstevel@tonic-gate 
9390Sstevel@tonic-gate 	cgp->cg_time = gethrestime_sec();
9400Sstevel@tonic-gate 	/*
9410Sstevel@tonic-gate 	 * The current fragment can be extended,
9420Sstevel@tonic-gate 	 * deduct the count on fragment being extended into
9430Sstevel@tonic-gate 	 * increase the count on the remaining fragment (if any)
9440Sstevel@tonic-gate 	 * allocate the extended piece.
9450Sstevel@tonic-gate 	 */
9460Sstevel@tonic-gate 	for (i = frags; i < fs->fs_frag - bbase; i++)
9470Sstevel@tonic-gate 		if (isclr(blksfree, bno + i))
9480Sstevel@tonic-gate 			break;
9490Sstevel@tonic-gate 	j = i - numfrags(fs, osize);
9500Sstevel@tonic-gate 	cgp->cg_frsum[j]--;
9510Sstevel@tonic-gate 	ASSERT(cgp->cg_frsum[j] >= 0);
9520Sstevel@tonic-gate 	if (i != frags)
9530Sstevel@tonic-gate 		cgp->cg_frsum[i - frags]++;
9540Sstevel@tonic-gate 	for (i = numfrags(fs, osize); i < frags; i++) {
9550Sstevel@tonic-gate 		clrbit(blksfree, bno + i);
9560Sstevel@tonic-gate 		cgp->cg_cs.cs_nffree--;
9570Sstevel@tonic-gate 		fs->fs_cs(fs, cg).cs_nffree--;
9580Sstevel@tonic-gate 		fs->fs_cstotal.cs_nffree--;
9590Sstevel@tonic-gate 	}
9600Sstevel@tonic-gate 	fs->fs_fmod = 1;
9610Sstevel@tonic-gate 	ufs_notclean(ufsvfsp);
9620Sstevel@tonic-gate 	TRANS_BUF(ufsvfsp, 0, fs->fs_cgsize, bp, DT_CG);
9630Sstevel@tonic-gate 	TRANS_SI(ufsvfsp, fs, cg);
9640Sstevel@tonic-gate 	bdrwrite(bp);
9650Sstevel@tonic-gate 	return ((daddr_t)bprev);
9660Sstevel@tonic-gate }
9670Sstevel@tonic-gate 
9680Sstevel@tonic-gate /*
9690Sstevel@tonic-gate  * Determine whether a block can be allocated.
9700Sstevel@tonic-gate  *
9710Sstevel@tonic-gate  * Check to see if a block of the apprpriate size
9720Sstevel@tonic-gate  * is available, and if it is, allocate it.
9730Sstevel@tonic-gate  */
9740Sstevel@tonic-gate static daddr_t
alloccg(struct inode * ip,int cg,daddr_t bpref,int size)9750Sstevel@tonic-gate alloccg(struct inode *ip, int cg, daddr_t bpref, int size)
9760Sstevel@tonic-gate {
9770Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
9780Sstevel@tonic-gate 	struct fs *fs = ip->i_fs;
9790Sstevel@tonic-gate 	struct buf *bp;
9800Sstevel@tonic-gate 	struct cg *cgp;
9810Sstevel@tonic-gate 	uchar_t *blksfree;
9820Sstevel@tonic-gate 	int bno, frags;
9830Sstevel@tonic-gate 	int allocsiz;
9840Sstevel@tonic-gate 	int i;
9850Sstevel@tonic-gate 
9864479Smishra 	/*
9874479Smishra 	 * Searching for space could be time expensive so do some
9884479Smishra 	 * up front checking to verify that there is actually space
9894479Smishra 	 * available (free blocks or free frags).
9904479Smishra 	 */
9914479Smishra 	if (fs->fs_cs(fs, cg).cs_nbfree == 0) {
9924479Smishra 		if (size == fs->fs_bsize)
9934479Smishra 			return (0);
9944479Smishra 
9954479Smishra 		/*
9964479Smishra 		 * If there are not enough free frags then return.
9974479Smishra 		 */
9984479Smishra 		if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, size))
9994479Smishra 			return (0);
10004479Smishra 	}
10014479Smishra 
10020Sstevel@tonic-gate 	bp = UFS_BREAD(ufsvfsp, ip->i_dev, (daddr_t)fsbtodb(fs, cgtod(fs, cg)),
1003923Ssdebnath 	    (int)fs->fs_cgsize);
10040Sstevel@tonic-gate 
10050Sstevel@tonic-gate 	cgp = bp->b_un.b_cg;
10060Sstevel@tonic-gate 	if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp) ||
10070Sstevel@tonic-gate 	    (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) {
10080Sstevel@tonic-gate 		brelse(bp);
10090Sstevel@tonic-gate 		return (0);
10100Sstevel@tonic-gate 	}
10110Sstevel@tonic-gate 	blksfree = cg_blksfree(cgp);
10120Sstevel@tonic-gate 	mutex_enter(&ufsvfsp->vfs_lock);
10130Sstevel@tonic-gate 	cgp->cg_time = gethrestime_sec();
10140Sstevel@tonic-gate 	if (size == fs->fs_bsize) {
10150Sstevel@tonic-gate 		if ((bno = alloccgblk(ufsvfsp, cgp, bpref, bp)) == 0)
10160Sstevel@tonic-gate 			goto errout;
10170Sstevel@tonic-gate 		fs->fs_fmod = 1;
10180Sstevel@tonic-gate 		ufs_notclean(ufsvfsp);
10190Sstevel@tonic-gate 		TRANS_SI(ufsvfsp, fs, cg);
10200Sstevel@tonic-gate 		bdrwrite(bp);
10210Sstevel@tonic-gate 		return (bno);
10220Sstevel@tonic-gate 	}
10230Sstevel@tonic-gate 	/*
10248186SViswanathan.Kannappan@Sun.COM 	 * Check fragment bitmap to see if any fragments are already available.
10258186SViswanathan.Kannappan@Sun.COM 	 * mapsearch() may fail because the fragment that fits this request
10268186SViswanathan.Kannappan@Sun.COM 	 * might still be on the cancel list and not available for re-use yet.
10278186SViswanathan.Kannappan@Sun.COM 	 * Look for a bigger sized fragment to allocate first before we have
10288186SViswanathan.Kannappan@Sun.COM 	 * to give up and fragment a whole new block eventually.
10290Sstevel@tonic-gate 	 */
10300Sstevel@tonic-gate 	frags = numfrags(fs, size);
10318186SViswanathan.Kannappan@Sun.COM 	allocsiz = frags;
10328186SViswanathan.Kannappan@Sun.COM next_size:
10338186SViswanathan.Kannappan@Sun.COM 	for (; allocsiz < fs->fs_frag; allocsiz++)
10340Sstevel@tonic-gate 		if (cgp->cg_frsum[allocsiz] != 0)
10350Sstevel@tonic-gate 			break;
10360Sstevel@tonic-gate 
10378186SViswanathan.Kannappan@Sun.COM 	if (allocsiz != fs->fs_frag) {
10380Sstevel@tonic-gate 		bno = mapsearch(ufsvfsp, cgp, bpref, allocsiz);
10398186SViswanathan.Kannappan@Sun.COM 		if (bno < 0 && allocsiz < (fs->fs_frag - 1)) {
10408186SViswanathan.Kannappan@Sun.COM 			allocsiz++;
10418186SViswanathan.Kannappan@Sun.COM 			goto next_size;
10428186SViswanathan.Kannappan@Sun.COM 		}
10438186SViswanathan.Kannappan@Sun.COM 	}
10440Sstevel@tonic-gate 
10450Sstevel@tonic-gate 	if (allocsiz == fs->fs_frag || bno < 0) {
10460Sstevel@tonic-gate 		/*
10470Sstevel@tonic-gate 		 * No fragments were available, so a block
10480Sstevel@tonic-gate 		 * will be allocated and hacked up.
10490Sstevel@tonic-gate 		 */
10500Sstevel@tonic-gate 		if (cgp->cg_cs.cs_nbfree == 0)
10510Sstevel@tonic-gate 			goto errout;
10520Sstevel@tonic-gate 		if ((bno = alloccgblk(ufsvfsp, cgp, bpref, bp)) == 0)
10530Sstevel@tonic-gate 			goto errout;
10540Sstevel@tonic-gate 		bpref = dtogd(fs, bno);
10550Sstevel@tonic-gate 		for (i = frags; i < fs->fs_frag; i++)
10560Sstevel@tonic-gate 			setbit(blksfree, bpref + i);
10570Sstevel@tonic-gate 		i = fs->fs_frag - frags;
10580Sstevel@tonic-gate 		cgp->cg_cs.cs_nffree += i;
10590Sstevel@tonic-gate 		fs->fs_cstotal.cs_nffree += i;
10600Sstevel@tonic-gate 		fs->fs_cs(fs, cg).cs_nffree += i;
10610Sstevel@tonic-gate 		cgp->cg_frsum[i]++;
10620Sstevel@tonic-gate 		fs->fs_fmod = 1;
10630Sstevel@tonic-gate 		ufs_notclean(ufsvfsp);
10640Sstevel@tonic-gate 		TRANS_SI(ufsvfsp, fs, cg);
10650Sstevel@tonic-gate 		bdrwrite(bp);
10660Sstevel@tonic-gate 		return (bno);
10670Sstevel@tonic-gate 	}
10680Sstevel@tonic-gate 
10690Sstevel@tonic-gate 	for (i = 0; i < frags; i++)
10700Sstevel@tonic-gate 		clrbit(blksfree, bno + i);
10710Sstevel@tonic-gate 	cgp->cg_cs.cs_nffree -= frags;
10720Sstevel@tonic-gate 	fs->fs_cstotal.cs_nffree -= frags;
10730Sstevel@tonic-gate 	fs->fs_cs(fs, cg).cs_nffree -= frags;
10740Sstevel@tonic-gate 	cgp->cg_frsum[allocsiz]--;
10750Sstevel@tonic-gate 	ASSERT(cgp->cg_frsum[allocsiz] >= 0);
10760Sstevel@tonic-gate 	if (frags != allocsiz) {
10770Sstevel@tonic-gate 		cgp->cg_frsum[allocsiz - frags]++;
10780Sstevel@tonic-gate 	}
10790Sstevel@tonic-gate 	fs->fs_fmod = 1;
10800Sstevel@tonic-gate 	ufs_notclean(ufsvfsp);
10810Sstevel@tonic-gate 	TRANS_BUF(ufsvfsp, 0, fs->fs_cgsize, bp, DT_CG);
10820Sstevel@tonic-gate 	TRANS_SI(ufsvfsp, fs, cg);
10830Sstevel@tonic-gate 	bdrwrite(bp);
10840Sstevel@tonic-gate 	return (cg * fs->fs_fpg + bno);
10850Sstevel@tonic-gate errout:
10860Sstevel@tonic-gate 	mutex_exit(&ufsvfsp->vfs_lock);
10870Sstevel@tonic-gate 	brelse(bp);
10880Sstevel@tonic-gate 	return (0);
10890Sstevel@tonic-gate }
10900Sstevel@tonic-gate 
10910Sstevel@tonic-gate /*
10920Sstevel@tonic-gate  * Allocate a block in a cylinder group.
10930Sstevel@tonic-gate  *
10940Sstevel@tonic-gate  * This algorithm implements the following policy:
10950Sstevel@tonic-gate  *   1) allocate the requested block.
10960Sstevel@tonic-gate  *   2) allocate a rotationally optimal block in the same cylinder.
10970Sstevel@tonic-gate  *   3) allocate the next available block on the block rotor for the
10980Sstevel@tonic-gate  *	specified cylinder group.
10990Sstevel@tonic-gate  * Note that this routine only allocates fs_bsize blocks; these
11000Sstevel@tonic-gate  * blocks may be fragmented by the routine that allocates them.
11010Sstevel@tonic-gate  */
11020Sstevel@tonic-gate static daddr_t
alloccgblk(struct ufsvfs * ufsvfsp,struct cg * cgp,daddr_t bpref,struct buf * bp)11030Sstevel@tonic-gate alloccgblk(
11040Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp,
11050Sstevel@tonic-gate 	struct cg *cgp,
11060Sstevel@tonic-gate 	daddr_t bpref,
11070Sstevel@tonic-gate 	struct buf *bp)
11080Sstevel@tonic-gate {
11090Sstevel@tonic-gate 	daddr_t bno;
11100Sstevel@tonic-gate 	int cylno, pos, delta, rotbl_size;
11110Sstevel@tonic-gate 	short *cylbp;
11120Sstevel@tonic-gate 	int i;
11130Sstevel@tonic-gate 	struct fs *fs;
11140Sstevel@tonic-gate 	uchar_t *blksfree;
11150Sstevel@tonic-gate 	daddr_t blkno, rpos, frag;
11160Sstevel@tonic-gate 	short *blks;
11170Sstevel@tonic-gate 	int32_t *blktot;
11180Sstevel@tonic-gate 
11190Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ufsvfsp->vfs_lock));
11200Sstevel@tonic-gate 	fs = ufsvfsp->vfs_fs;
11210Sstevel@tonic-gate 	blksfree = cg_blksfree(cgp);
11220Sstevel@tonic-gate 	if (bpref == 0) {
11230Sstevel@tonic-gate 		bpref = cgp->cg_rotor;
11240Sstevel@tonic-gate 		goto norot;
11250Sstevel@tonic-gate 	}
11260Sstevel@tonic-gate 	bpref = blknum(fs, bpref);
11270Sstevel@tonic-gate 	bpref = dtogd(fs, bpref);
11280Sstevel@tonic-gate 	/*
11290Sstevel@tonic-gate 	 * If the requested block is available, use it.
11300Sstevel@tonic-gate 	 */
11310Sstevel@tonic-gate 	if (isblock(fs, blksfree, (daddr_t)fragstoblks(fs, bpref))) {
11320Sstevel@tonic-gate 		bno = bpref;
11330Sstevel@tonic-gate 		goto gotit;
11340Sstevel@tonic-gate 	}
11350Sstevel@tonic-gate 	/*
11360Sstevel@tonic-gate 	 * Check for a block available on the same cylinder.
11370Sstevel@tonic-gate 	 */
11380Sstevel@tonic-gate 	cylno = cbtocylno(fs, bpref);
11390Sstevel@tonic-gate 	if (cg_blktot(cgp)[cylno] == 0)
11400Sstevel@tonic-gate 		goto norot;
11410Sstevel@tonic-gate 	if (fs->fs_cpc == 0) {
11420Sstevel@tonic-gate 		/*
11430Sstevel@tonic-gate 		 * Block layout info is not available, so just
11440Sstevel@tonic-gate 		 * have to take any block in this cylinder.
11450Sstevel@tonic-gate 		 */
11460Sstevel@tonic-gate 		bpref = howmany(fs->fs_spc * cylno, NSPF(fs));
11470Sstevel@tonic-gate 		goto norot;
11480Sstevel@tonic-gate 	}
11490Sstevel@tonic-gate 	/*
11500Sstevel@tonic-gate 	 * Check the summary information to see if a block is
11510Sstevel@tonic-gate 	 * available in the requested cylinder starting at the
11520Sstevel@tonic-gate 	 * requested rotational position and proceeding around.
11530Sstevel@tonic-gate 	 */
11540Sstevel@tonic-gate 	cylbp = cg_blks(ufsvfsp, cgp, cylno);
11550Sstevel@tonic-gate 	pos = cbtorpos(ufsvfsp, bpref);
11560Sstevel@tonic-gate 	for (i = pos; i < ufsvfsp->vfs_nrpos; i++)
11570Sstevel@tonic-gate 		if (cylbp[i] > 0)
11580Sstevel@tonic-gate 			break;
11590Sstevel@tonic-gate 	if (i == ufsvfsp->vfs_nrpos)
11600Sstevel@tonic-gate 		for (i = 0; i < pos; i++)
11610Sstevel@tonic-gate 			if (cylbp[i] > 0)
11620Sstevel@tonic-gate 				break;
11630Sstevel@tonic-gate 	if (cylbp[i] > 0) {
11640Sstevel@tonic-gate 		/*
11650Sstevel@tonic-gate 		 * Found a rotational position, now find the actual
11660Sstevel@tonic-gate 		 * block.  A "panic" if none is actually there.
11670Sstevel@tonic-gate 		 */
11680Sstevel@tonic-gate 
11690Sstevel@tonic-gate 		/*
11700Sstevel@tonic-gate 		 * Up to this point, "pos" has referred to the rotational
11710Sstevel@tonic-gate 		 * position of the desired block.  From now on, it holds
11720Sstevel@tonic-gate 		 * the offset of the current cylinder within a cylinder
11730Sstevel@tonic-gate 		 * cycle.  (A cylinder cycle refers to a set of cylinders
11740Sstevel@tonic-gate 		 * which are described by a single rotational table; the
11750Sstevel@tonic-gate 		 * size of the cycle is fs_cpc.)
11760Sstevel@tonic-gate 		 *
11770Sstevel@tonic-gate 		 * bno is set to the block number of the first block within
11780Sstevel@tonic-gate 		 * the current cylinder cycle.
11790Sstevel@tonic-gate 		 */
11800Sstevel@tonic-gate 
11810Sstevel@tonic-gate 		pos = cylno % fs->fs_cpc;
11820Sstevel@tonic-gate 		bno = (cylno - pos) * fs->fs_spc / NSPB(fs);
11830Sstevel@tonic-gate 
11840Sstevel@tonic-gate 		/*
11850Sstevel@tonic-gate 		 * The blocks within a cylinder are grouped into equivalence
11860Sstevel@tonic-gate 		 * classes according to their "rotational position."  There
11870Sstevel@tonic-gate 		 * are two tables used to determine these classes.
11880Sstevel@tonic-gate 		 *
11890Sstevel@tonic-gate 		 * The positional offset table (fs_postbl) has an entry for
11900Sstevel@tonic-gate 		 * each rotational position of each cylinder in a cylinder
11910Sstevel@tonic-gate 		 * cycle.  This entry contains the relative block number
11920Sstevel@tonic-gate 		 * (counting from the start of the cylinder cycle) of the
11930Sstevel@tonic-gate 		 * first block in the equivalence class for that position
11940Sstevel@tonic-gate 		 * and that cylinder.  Positions for which no blocks exist
11950Sstevel@tonic-gate 		 * are indicated by a -1.
11960Sstevel@tonic-gate 		 *
11970Sstevel@tonic-gate 		 * The rotational delta table (fs_rotbl) has an entry for
11980Sstevel@tonic-gate 		 * each block in a cylinder cycle.  This entry contains
11990Sstevel@tonic-gate 		 * the offset from that block to the next block in the
12000Sstevel@tonic-gate 		 * same equivalence class.  The last block in the class
12010Sstevel@tonic-gate 		 * is indicated by a zero in the table.
12020Sstevel@tonic-gate 		 *
12030Sstevel@tonic-gate 		 * The following code, then, walks through all of the blocks
12040Sstevel@tonic-gate 		 * in the cylinder (cylno) which we're allocating within
12050Sstevel@tonic-gate 		 * which are in the equivalence class for the rotational
12060Sstevel@tonic-gate 		 * position (i) which we're allocating within.
12070Sstevel@tonic-gate 		 */
12080Sstevel@tonic-gate 
12090Sstevel@tonic-gate 		if (fs_postbl(ufsvfsp, pos)[i] == -1) {
12100Sstevel@tonic-gate 			(void) ufs_fault(ufsvfsp->vfs_root,
1211923Ssdebnath 			    "alloccgblk: cyl groups corrupted, pos = %d, "
1212923Ssdebnath 			    "i = %d, fs = %s\n", pos, i, fs->fs_fsmnt);
12130Sstevel@tonic-gate 			return (0);
12140Sstevel@tonic-gate 		}
12150Sstevel@tonic-gate 
12160Sstevel@tonic-gate 		/*
12170Sstevel@tonic-gate 		 * There is one entry in the rotational table for each block
12180Sstevel@tonic-gate 		 * in the cylinder cycle.  These are whole blocks, not frags.
12190Sstevel@tonic-gate 		 */
12200Sstevel@tonic-gate 
12210Sstevel@tonic-gate 		rotbl_size = (fs->fs_cpc * fs->fs_spc) >>
12220Sstevel@tonic-gate 		    (fs->fs_fragshift + fs->fs_fsbtodb);
12230Sstevel@tonic-gate 
12240Sstevel@tonic-gate 		/*
12250Sstevel@tonic-gate 		 * As we start, "i" is the rotational position within which
12260Sstevel@tonic-gate 		 * we're searching.  After the next line, it will be a block
12270Sstevel@tonic-gate 		 * number (relative to the start of the cylinder cycle)
12280Sstevel@tonic-gate 		 * within the equivalence class of that rotational position.
12290Sstevel@tonic-gate 		 */
12300Sstevel@tonic-gate 
12310Sstevel@tonic-gate 		i = fs_postbl(ufsvfsp, pos)[i];
12320Sstevel@tonic-gate 
12330Sstevel@tonic-gate 		for (;;) {
12340Sstevel@tonic-gate 			if (isblock(fs, blksfree, (daddr_t)(bno + i))) {
12350Sstevel@tonic-gate 				bno = blkstofrags(fs, (bno + i));
12360Sstevel@tonic-gate 				goto gotit;
12370Sstevel@tonic-gate 			}
12380Sstevel@tonic-gate 			delta = fs_rotbl(fs)[i];
12390Sstevel@tonic-gate 			if (delta <= 0 ||		/* End of chain, or */
12400Sstevel@tonic-gate 			    delta + i > rotbl_size)	/* end of table? */
12410Sstevel@tonic-gate 				break;			/* If so, panic. */
12420Sstevel@tonic-gate 			i += delta;
12430Sstevel@tonic-gate 		}
12440Sstevel@tonic-gate 		(void) ufs_fault(ufsvfsp->vfs_root,
1245923Ssdebnath 		    "alloccgblk: can't find blk in cyl, pos:%d, i:%d, "
1246923Ssdebnath 		    "fs:%s bno: %x\n", pos, i, fs->fs_fsmnt, (int)bno);
12470Sstevel@tonic-gate 		return (0);
12480Sstevel@tonic-gate 	}
12490Sstevel@tonic-gate norot:
12500Sstevel@tonic-gate 	/*
12510Sstevel@tonic-gate 	 * No blocks in the requested cylinder, so take
12520Sstevel@tonic-gate 	 * next available one in this cylinder group.
12530Sstevel@tonic-gate 	 */
12540Sstevel@tonic-gate 	bno = mapsearch(ufsvfsp, cgp, bpref, (int)fs->fs_frag);
12550Sstevel@tonic-gate 	if (bno < 0)
12560Sstevel@tonic-gate 		return (0);
12570Sstevel@tonic-gate 	cgp->cg_rotor = bno;
12580Sstevel@tonic-gate gotit:
12590Sstevel@tonic-gate 	blkno = fragstoblks(fs, bno);
12600Sstevel@tonic-gate 	frag = (cgp->cg_cgx * fs->fs_fpg) + bno;
12610Sstevel@tonic-gate 	if (TRANS_ISCANCEL(ufsvfsp, ldbtob(fsbtodb(fs, frag)), fs->fs_bsize))
12620Sstevel@tonic-gate 		goto norot;
12630Sstevel@tonic-gate 	clrblock(fs, blksfree, (long)blkno);
12640Sstevel@tonic-gate 	/*
12650Sstevel@tonic-gate 	 * the other cg/sb/si fields are TRANS'ed by the caller
12660Sstevel@tonic-gate 	 */
12670Sstevel@tonic-gate 	cgp->cg_cs.cs_nbfree--;
12680Sstevel@tonic-gate 	fs->fs_cstotal.cs_nbfree--;
12690Sstevel@tonic-gate 	fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--;
12700Sstevel@tonic-gate 	cylno = cbtocylno(fs, bno);
12710Sstevel@tonic-gate 	blks = cg_blks(ufsvfsp, cgp, cylno);
12720Sstevel@tonic-gate 	rpos = cbtorpos(ufsvfsp, bno);
12730Sstevel@tonic-gate 	blktot = cg_blktot(cgp);
12740Sstevel@tonic-gate 	blks[rpos]--;
12750Sstevel@tonic-gate 	blktot[cylno]--;
12760Sstevel@tonic-gate 	TRANS_BUF(ufsvfsp, 0, fs->fs_cgsize, bp, DT_CG);
12770Sstevel@tonic-gate 	fs->fs_fmod = 1;
12780Sstevel@tonic-gate 	return (frag);
12790Sstevel@tonic-gate }
12800Sstevel@tonic-gate 
12810Sstevel@tonic-gate /*
12820Sstevel@tonic-gate  * Determine whether an inode can be allocated.
12830Sstevel@tonic-gate  *
12840Sstevel@tonic-gate  * Check to see if an inode is available, and if it is,
12850Sstevel@tonic-gate  * allocate it using the following policy:
12860Sstevel@tonic-gate  *   1) allocate the requested inode.
12870Sstevel@tonic-gate  *   2) allocate the next available inode after the requested
12880Sstevel@tonic-gate  *	inode in the specified cylinder group.
12890Sstevel@tonic-gate  */
12900Sstevel@tonic-gate static ino_t
ialloccg(struct inode * ip,int cg,daddr_t ipref,int mode)12910Sstevel@tonic-gate ialloccg(struct inode *ip, int cg, daddr_t ipref, int mode)
12920Sstevel@tonic-gate {
12930Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
12940Sstevel@tonic-gate 	struct fs *fs = ip->i_fs;
12950Sstevel@tonic-gate 	struct cg *cgp;
12960Sstevel@tonic-gate 	struct buf *bp;
12970Sstevel@tonic-gate 	int start, len, loc, map, i;
12980Sstevel@tonic-gate 	char *iused;
12990Sstevel@tonic-gate 
13000Sstevel@tonic-gate 	if (fs->fs_cs(fs, cg).cs_nifree == 0)
13010Sstevel@tonic-gate 		return (0);
13020Sstevel@tonic-gate 	bp = UFS_BREAD(ufsvfsp, ip->i_dev, (daddr_t)fsbtodb(fs, cgtod(fs, cg)),
13034662Sfrankho 	    (int)fs->fs_cgsize);
13040Sstevel@tonic-gate 
13050Sstevel@tonic-gate 	cgp = bp->b_un.b_cg;
13060Sstevel@tonic-gate 	if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp) ||
13070Sstevel@tonic-gate 	    cgp->cg_cs.cs_nifree == 0) {
13080Sstevel@tonic-gate 		brelse(bp);
13090Sstevel@tonic-gate 		return (0);
13100Sstevel@tonic-gate 	}
13110Sstevel@tonic-gate 	iused = cg_inosused(cgp);
13120Sstevel@tonic-gate 	mutex_enter(&ufsvfsp->vfs_lock);
13130Sstevel@tonic-gate 	/*
13140Sstevel@tonic-gate 	 * While we are waiting for the mutex, someone may have taken
13150Sstevel@tonic-gate 	 * the last available inode.  Need to recheck.
13160Sstevel@tonic-gate 	 */
13170Sstevel@tonic-gate 	if (cgp->cg_cs.cs_nifree == 0) {
13180Sstevel@tonic-gate 		mutex_exit(&ufsvfsp->vfs_lock);
13190Sstevel@tonic-gate 		brelse(bp);
13200Sstevel@tonic-gate 		return (0);
13210Sstevel@tonic-gate 	}
13220Sstevel@tonic-gate 
13230Sstevel@tonic-gate 	cgp->cg_time = gethrestime_sec();
13240Sstevel@tonic-gate 	if (ipref) {
13250Sstevel@tonic-gate 		ipref %= fs->fs_ipg;
13260Sstevel@tonic-gate 		if (isclr(iused, ipref))
13270Sstevel@tonic-gate 			goto gotit;
13280Sstevel@tonic-gate 	}
13290Sstevel@tonic-gate 	start = cgp->cg_irotor / NBBY;
13300Sstevel@tonic-gate 	len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY);
13310Sstevel@tonic-gate 	loc = skpc(0xff, (uint_t)len, &iused[start]);
13320Sstevel@tonic-gate 	if (loc == 0) {
13330Sstevel@tonic-gate 		len = start + 1;
13340Sstevel@tonic-gate 		start = 0;
13350Sstevel@tonic-gate 		loc = skpc(0xff, (uint_t)len, &iused[0]);
13360Sstevel@tonic-gate 		if (loc == 0) {
13370Sstevel@tonic-gate 			mutex_exit(&ufsvfsp->vfs_lock);
13380Sstevel@tonic-gate 			(void) ufs_fault(ITOV(ip),
1339923Ssdebnath 			    "ialloccg: map corrupted, cg = %d, irotor = %d, "
1340923Ssdebnath 			    "fs = %s\n", cg, (int)cgp->cg_irotor, fs->fs_fsmnt);
13410Sstevel@tonic-gate 			return (0);
13420Sstevel@tonic-gate 		}
13430Sstevel@tonic-gate 	}
13440Sstevel@tonic-gate 	i = start + len - loc;
13450Sstevel@tonic-gate 	map = iused[i];
13460Sstevel@tonic-gate 	ipref = i * NBBY;
13470Sstevel@tonic-gate 	for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
13480Sstevel@tonic-gate 		if ((map & i) == 0) {
13490Sstevel@tonic-gate 			cgp->cg_irotor = ipref;
13500Sstevel@tonic-gate 			goto gotit;
13510Sstevel@tonic-gate 		}
13520Sstevel@tonic-gate 	}
13530Sstevel@tonic-gate 
13540Sstevel@tonic-gate 	mutex_exit(&ufsvfsp->vfs_lock);
13550Sstevel@tonic-gate 	(void) ufs_fault(ITOV(ip), "ialloccg: block not in mapfs = %s",
13564662Sfrankho 	    fs->fs_fsmnt);
13570Sstevel@tonic-gate 	return (0);
13580Sstevel@tonic-gate gotit:
13590Sstevel@tonic-gate 	setbit(iused, ipref);
13600Sstevel@tonic-gate 	cgp->cg_cs.cs_nifree--;
13610Sstevel@tonic-gate 	fs->fs_cstotal.cs_nifree--;
13620Sstevel@tonic-gate 	fs->fs_cs(fs, cg).cs_nifree--;
13630Sstevel@tonic-gate 	if (((mode & IFMT) == IFDIR) || ((mode & IFMT) == IFATTRDIR)) {
13640Sstevel@tonic-gate 		cgp->cg_cs.cs_ndir++;
13650Sstevel@tonic-gate 		fs->fs_cstotal.cs_ndir++;
13660Sstevel@tonic-gate 		fs->fs_cs(fs, cg).cs_ndir++;
13670Sstevel@tonic-gate 	}
13680Sstevel@tonic-gate 	fs->fs_fmod = 1;
13690Sstevel@tonic-gate 	ufs_notclean(ufsvfsp);
13700Sstevel@tonic-gate 	TRANS_BUF(ufsvfsp, 0, fs->fs_cgsize, bp, DT_CG);
13710Sstevel@tonic-gate 	TRANS_SI(ufsvfsp, fs, cg);
13720Sstevel@tonic-gate 	bdrwrite(bp);
13730Sstevel@tonic-gate 	return (cg * fs->fs_ipg + ipref);
13740Sstevel@tonic-gate }
13750Sstevel@tonic-gate 
13760Sstevel@tonic-gate /*
13770Sstevel@tonic-gate  * Find a block of the specified size in the specified cylinder group.
13780Sstevel@tonic-gate  *
13790Sstevel@tonic-gate  * It is a panic if a request is made to find a block if none are
13800Sstevel@tonic-gate  * available.
13810Sstevel@tonic-gate  */
13820Sstevel@tonic-gate static daddr_t
mapsearch(struct ufsvfs * ufsvfsp,struct cg * cgp,daddr_t bpref,int allocsiz)13830Sstevel@tonic-gate mapsearch(struct ufsvfs *ufsvfsp, struct cg *cgp, daddr_t bpref,
13840Sstevel@tonic-gate 	int allocsiz)
13850Sstevel@tonic-gate {
13860Sstevel@tonic-gate 	struct fs *fs	= ufsvfsp->vfs_fs;
13870Sstevel@tonic-gate 	daddr_t bno, cfrag;
13880Sstevel@tonic-gate 	int start, len, loc, i, last, first, secondtime;
13890Sstevel@tonic-gate 	int blk, field, subfield, pos;
13900Sstevel@tonic-gate 	int gotit;
13910Sstevel@tonic-gate 
13920Sstevel@tonic-gate 	/*
13930Sstevel@tonic-gate 	 * ufsvfs->vfs_lock is held when calling this.
13940Sstevel@tonic-gate 	 */
13950Sstevel@tonic-gate 	/*
13960Sstevel@tonic-gate 	 * Find the fragment by searching through the
13970Sstevel@tonic-gate 	 * free block map for an appropriate bit pattern.
13980Sstevel@tonic-gate 	 */
13990Sstevel@tonic-gate 	if (bpref)
14000Sstevel@tonic-gate 		start = dtogd(fs, bpref) / NBBY;
14010Sstevel@tonic-gate 	else
14020Sstevel@tonic-gate 		start = cgp->cg_frotor / NBBY;
14030Sstevel@tonic-gate 	/*
14040Sstevel@tonic-gate 	 * the following loop performs two scans -- the first scan
14050Sstevel@tonic-gate 	 * searches the bottom half of the array for a match and the
14060Sstevel@tonic-gate 	 * second scan searches the top half of the array.  The loops
14070Sstevel@tonic-gate 	 * have been merged just to make things difficult.
14080Sstevel@tonic-gate 	 */
14090Sstevel@tonic-gate 	first = start;
14100Sstevel@tonic-gate 	last = howmany(fs->fs_fpg, NBBY);
14110Sstevel@tonic-gate 	secondtime = 0;
14120Sstevel@tonic-gate 	cfrag = cgp->cg_cgx * fs->fs_fpg;
14130Sstevel@tonic-gate 	while (first < last) {
14140Sstevel@tonic-gate 		len = last - first;
14150Sstevel@tonic-gate 		/*
14160Sstevel@tonic-gate 		 * search the array for a match
14170Sstevel@tonic-gate 		 */
14180Sstevel@tonic-gate 		loc = scanc((unsigned)len, (uchar_t *)&cg_blksfree(cgp)[first],
14194662Sfrankho 		    (uchar_t *)fragtbl[fs->fs_frag],
14204662Sfrankho 		    (int)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
14210Sstevel@tonic-gate 		/*
14220Sstevel@tonic-gate 		 * match found
14230Sstevel@tonic-gate 		 */
14240Sstevel@tonic-gate 		if (loc) {
14250Sstevel@tonic-gate 			bno = (last - loc) * NBBY;
14260Sstevel@tonic-gate 
14270Sstevel@tonic-gate 			/*
14280Sstevel@tonic-gate 			 * Found the byte in the map, sift
14290Sstevel@tonic-gate 			 * through the bits to find the selected frag
14300Sstevel@tonic-gate 			 */
14310Sstevel@tonic-gate 			cgp->cg_frotor = bno;
14320Sstevel@tonic-gate 			gotit = 0;
14330Sstevel@tonic-gate 			for (i = bno + NBBY; bno < i; bno += fs->fs_frag) {
14340Sstevel@tonic-gate 				blk = blkmap(fs, cg_blksfree(cgp), bno);
14350Sstevel@tonic-gate 				blk <<= 1;
14360Sstevel@tonic-gate 				field = around[allocsiz];
14370Sstevel@tonic-gate 				subfield = inside[allocsiz];
14380Sstevel@tonic-gate 				for (pos = 0;
14390Sstevel@tonic-gate 				    pos <= fs->fs_frag - allocsiz;
14400Sstevel@tonic-gate 				    pos++) {
14410Sstevel@tonic-gate 					if ((blk & field) == subfield) {
14420Sstevel@tonic-gate 						gotit++;
14430Sstevel@tonic-gate 						break;
14440Sstevel@tonic-gate 					}
14450Sstevel@tonic-gate 					field <<= 1;
14460Sstevel@tonic-gate 					subfield <<= 1;
14470Sstevel@tonic-gate 				}
14480Sstevel@tonic-gate 				if (gotit)
14490Sstevel@tonic-gate 					break;
14500Sstevel@tonic-gate 			}
14510Sstevel@tonic-gate 			bno += pos;
14520Sstevel@tonic-gate 
14530Sstevel@tonic-gate 			/*
14540Sstevel@tonic-gate 			 * success if block is *not* being converted from
14550Sstevel@tonic-gate 			 * metadata into userdata (harpy).  If so, ignore.
14560Sstevel@tonic-gate 			 */
14570Sstevel@tonic-gate 			if (!TRANS_ISCANCEL(ufsvfsp,
1458923Ssdebnath 			    ldbtob(fsbtodb(fs, (cfrag+bno))),
1459923Ssdebnath 			    allocsiz * fs->fs_fsize))
14600Sstevel@tonic-gate 				return (bno);
1461923Ssdebnath 
14620Sstevel@tonic-gate 			/*
14630Sstevel@tonic-gate 			 * keep looking -- this block is being converted
14640Sstevel@tonic-gate 			 */
14650Sstevel@tonic-gate 			first = (last - loc) + 1;
14660Sstevel@tonic-gate 			loc = 0;
14670Sstevel@tonic-gate 			if (first < last)
14680Sstevel@tonic-gate 				continue;
14690Sstevel@tonic-gate 		}
14700Sstevel@tonic-gate 		/*
14710Sstevel@tonic-gate 		 * no usable matches in bottom half -- now search the top half
14720Sstevel@tonic-gate 		 */
14730Sstevel@tonic-gate 		if (secondtime)
14740Sstevel@tonic-gate 			/*
14750Sstevel@tonic-gate 			 * no usable matches in top half -- all done
14760Sstevel@tonic-gate 			 */
14770Sstevel@tonic-gate 			break;
14780Sstevel@tonic-gate 		secondtime = 1;
14790Sstevel@tonic-gate 		last = start + 1;
14800Sstevel@tonic-gate 		first = 0;
14810Sstevel@tonic-gate 	}
14820Sstevel@tonic-gate 	/*
14830Sstevel@tonic-gate 	 * no usable matches
14840Sstevel@tonic-gate 	 */
14850Sstevel@tonic-gate 	return ((daddr_t)-1);
14860Sstevel@tonic-gate }
14870Sstevel@tonic-gate 
14880Sstevel@tonic-gate #define	UFSNADDR (NDADDR + NIADDR)	/* NADDR applies to (obsolete) S5FS */
14890Sstevel@tonic-gate #define	IB(i)	(NDADDR + (i))	/* index of i'th indirect block ptr */
14900Sstevel@tonic-gate #define	SINGLE	0		/* single indirect block ptr */
14910Sstevel@tonic-gate #define	DOUBLE	1		/* double indirect block ptr */
14920Sstevel@tonic-gate #define	TRIPLE	2		/* triple indirect block ptr */
14930Sstevel@tonic-gate 
14940Sstevel@tonic-gate /*
1495923Ssdebnath  * Acquire a write lock, and keep trying till we get it
1496923Ssdebnath  */
1497923Ssdebnath static int
allocsp_wlockfs(struct vnode * vp,struct lockfs * lf)1498923Ssdebnath allocsp_wlockfs(struct vnode *vp, struct lockfs *lf)
1499923Ssdebnath {
1500923Ssdebnath 	int err = 0;
1501923Ssdebnath 
1502923Ssdebnath lockagain:
1503923Ssdebnath 	do {
1504923Ssdebnath 		err = ufs_fiolfss(vp, lf);
1505923Ssdebnath 		if (err)
1506923Ssdebnath 			return (err);
1507923Ssdebnath 	} while (!LOCKFS_IS_ULOCK(lf));
1508923Ssdebnath 
1509923Ssdebnath 	lf->lf_lock = LOCKFS_WLOCK;
1510923Ssdebnath 	lf->lf_flags = 0;
1511923Ssdebnath 	lf->lf_comment = NULL;
1512923Ssdebnath 	err = ufs__fiolfs(vp, lf, 1, 0);
1513923Ssdebnath 
1514923Ssdebnath 	if (err == EBUSY || err == EINVAL)
1515923Ssdebnath 		goto lockagain;
1516923Ssdebnath 
1517923Ssdebnath 	return (err);
1518923Ssdebnath }
1519923Ssdebnath 
1520923Ssdebnath /*
1521923Ssdebnath  * Release the write lock
1522923Ssdebnath  */
1523923Ssdebnath static int
allocsp_unlockfs(struct vnode * vp,struct lockfs * lf)1524923Ssdebnath allocsp_unlockfs(struct vnode *vp, struct lockfs *lf)
1525923Ssdebnath {
1526923Ssdebnath 	int err = 0;
1527923Ssdebnath 
1528923Ssdebnath 	lf->lf_lock = LOCKFS_ULOCK;
1529923Ssdebnath 	lf->lf_flags = 0;
1530923Ssdebnath 	err = ufs__fiolfs(vp, lf, 1, 0);
1531923Ssdebnath 	return (err);
1532923Ssdebnath }
1533923Ssdebnath 
1534923Ssdebnath struct allocsp_undo {
1535923Ssdebnath 	daddr_t offset;
1536923Ssdebnath 	daddr_t blk;
1537923Ssdebnath 	struct allocsp_undo *next;
1538923Ssdebnath };
1539923Ssdebnath 
1540923Ssdebnath /*
1541923Ssdebnath  * ufs_allocsp() can be used to pre-allocate blocks for a file on a given
15426717Sjr26306  * file system. For direct blocks, the blocks are allocated from the offset
15436717Sjr26306  * requested to the block boundary, then any full blocks are allocated,
15446717Sjr26306  * and finally any remainder.
15456717Sjr26306  * For indirect blocks the blocks are not initialized and are
15466717Sjr26306  * only marked as allocated. These addresses are then stored as negative
15476717Sjr26306  * block numbers in the inode to imply special handling. UFS has been modified
15486717Sjr26306  * where necessary to understand this new notion.
15496717Sjr26306  * Successfully fallocated files will have IFALLOCATE cflag set in the inode.
1550923Ssdebnath  */
1551923Ssdebnath int
ufs_allocsp(struct vnode * vp,struct flock64 * lp,cred_t * cr)1552923Ssdebnath ufs_allocsp(struct vnode *vp, struct flock64 *lp, cred_t *cr)
1553923Ssdebnath {
1554923Ssdebnath 	struct lockfs lf;
1555923Ssdebnath 	int berr, err, resv, issync;
15566717Sjr26306 	off_t istart, len; /* istart, special for idb */
1557923Ssdebnath 	struct inode *ip;
1558923Ssdebnath 	struct fs *fs;
1559923Ssdebnath 	struct ufsvfs *ufsvfsp;
15606717Sjr26306 	u_offset_t resid, i, uoff;
1561923Ssdebnath 	daddr32_t db_undo[NDADDR];	/* old direct blocks */
1562923Ssdebnath 	struct allocsp_undo *ib_undo = NULL;	/* ib undo */
1563923Ssdebnath 	struct allocsp_undo *undo = NULL;
1564923Ssdebnath 	u_offset_t osz;			/* old file size */
1565923Ssdebnath 	int chunkblks = 0;		/* # of blocks in 1 allocation */
1566923Ssdebnath 	int cnt = 0;
1567923Ssdebnath 	daddr_t allocblk;
1568923Ssdebnath 	daddr_t totblks = 0;
1569923Ssdebnath 	struct ulockfs	*ulp;
15706717Sjr26306 	size_t done_len;
15716717Sjr26306 	int nbytes, offsetn;
15726717Sjr26306 
1573923Ssdebnath 
1574923Ssdebnath 	ASSERT(vp->v_type == VREG);
1575923Ssdebnath 
1576923Ssdebnath 	ip = VTOI(vp);
1577923Ssdebnath 	fs = ip->i_fs;
1578923Ssdebnath 	if ((ufsvfsp = ip->i_ufsvfs) == NULL) {
1579923Ssdebnath 		err = EIO;
1580923Ssdebnath 		goto out_allocsp;
1581923Ssdebnath 	}
1582923Ssdebnath 
15836717Sjr26306 	istart = blkroundup(fs, (lp->l_start));
1584923Ssdebnath 	len = blkroundup(fs, (lp->l_len));
1585923Ssdebnath 	chunkblks = blkroundup(fs, ufsvfsp->vfs_iotransz) / fs->fs_bsize;
1586923Ssdebnath 	ulp = &ufsvfsp->vfs_ulockfs;
1587923Ssdebnath 
1588923Ssdebnath 	if (lp->l_start < 0 || lp->l_len <= 0)
1589923Ssdebnath 		return (EINVAL);
1590923Ssdebnath 
1591923Ssdebnath 	/* Quickly check to make sure we have space before we proceed */
1592923Ssdebnath 	if (lblkno(fs, len) > fs->fs_cstotal.cs_nbfree) {
1593923Ssdebnath 		if (TRANS_ISTRANS(ufsvfsp)) {
1594923Ssdebnath 			ufs_delete_drain_wait(ufsvfsp, 1);
1595923Ssdebnath 			if (lblkno(fs, len) > fs->fs_cstotal.cs_nbfree)
1596923Ssdebnath 				return (ENOSPC);
1597923Ssdebnath 		} else
1598923Ssdebnath 			return (ENOSPC);
1599923Ssdebnath 	}
1600923Ssdebnath 
1601923Ssdebnath 	/*
1602923Ssdebnath 	 * We will keep i_rwlock locked as WRITER through out the function
1603923Ssdebnath 	 * since we don't want anyone else reading or writing to the inode
1604923Ssdebnath 	 * while we are in the middle of fallocating the file.
1605923Ssdebnath 	 */
1606923Ssdebnath 	rw_enter(&ip->i_rwlock, RW_WRITER);
1607923Ssdebnath 
1608923Ssdebnath 	/* Back up the direct block list, used for undo later if necessary */
1609923Ssdebnath 	rw_enter(&ip->i_contents, RW_READER);
1610923Ssdebnath 	for (i = 0; i < NDADDR; i++)
1611923Ssdebnath 		db_undo[i] = ip->i_db[i];
1612923Ssdebnath 	osz = ip->i_size;
1613923Ssdebnath 	rw_exit(&ip->i_contents);
1614923Ssdebnath 
16156717Sjr26306 	/* Write lock the file system */
16166717Sjr26306 	if (err = allocsp_wlockfs(vp, &lf))
16176717Sjr26306 		goto exit;
16186717Sjr26306 
16196717Sjr26306 	/*
16206717Sjr26306 	 * Allocate any direct blocks now.
16216717Sjr26306 	 * Blocks are allocated from the offset requested to the block
16226717Sjr26306 	 * boundary, then any full blocks are allocated, and finally any
16236717Sjr26306 	 * remainder.
16246717Sjr26306 	 */
16256717Sjr26306 	if (lblkno(fs, lp->l_start) < NDADDR) {
1626923Ssdebnath 		ufs_trans_trunc_resv(ip, ip->i_size + (NDADDR * fs->fs_bsize),
1627923Ssdebnath 		    &resv, &resid);
1628923Ssdebnath 		TRANS_BEGIN_CSYNC(ufsvfsp, issync, TOP_ALLOCSP, resv);
1629923Ssdebnath 
1630923Ssdebnath 		rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
1631923Ssdebnath 		rw_enter(&ip->i_contents, RW_WRITER);
1632923Ssdebnath 
16336717Sjr26306 		done_len = 0;
16346717Sjr26306 		while ((done_len < lp->l_len) &&
16356717Sjr26306 		    (lblkno(fs, lp->l_start + done_len) < NDADDR)) {
16366717Sjr26306 			uoff = (offset_t)(lp->l_start + done_len);
16376717Sjr26306 			offsetn = (int)blkoff(fs, uoff);
16386717Sjr26306 			nbytes = (int)MIN(fs->fs_bsize - offsetn,
16396717Sjr26306 			    lp->l_len - done_len);
16406717Sjr26306 
16416717Sjr26306 			berr = bmap_write(ip, uoff, offsetn + nbytes,
16426717Sjr26306 			    BI_FALLOCATE, &allocblk, cr);
1643923Ssdebnath 			/* Yikes error, quit */
1644923Ssdebnath 			if (berr) {
1645923Ssdebnath 				TRANS_INODE(ufsvfsp, ip);
1646923Ssdebnath 				rw_exit(&ip->i_contents);
1647923Ssdebnath 				rw_exit(&ufsvfsp->vfs_dqrwlock);
1648923Ssdebnath 				TRANS_END_CSYNC(ufsvfsp, err, issync,
1649923Ssdebnath 				    TOP_ALLOCSP, resv);
16506717Sjr26306 				err = allocsp_unlockfs(vp, &lf);
1651923Ssdebnath 				goto exit;
1652923Ssdebnath 			}
1653923Ssdebnath 
1654923Ssdebnath 			if (allocblk) {
1655923Ssdebnath 				totblks++;
16566717Sjr26306 				if ((uoff + nbytes) > ip->i_size)
16576717Sjr26306 					ip->i_size = (uoff + nbytes);
1658923Ssdebnath 			}
16596717Sjr26306 			done_len += nbytes;
1660923Ssdebnath 		}
1661923Ssdebnath 
1662923Ssdebnath 		TRANS_INODE(ufsvfsp, ip);
1663923Ssdebnath 		rw_exit(&ip->i_contents);
1664923Ssdebnath 		rw_exit(&ufsvfsp->vfs_dqrwlock);
1665923Ssdebnath 		TRANS_END_CSYNC(ufsvfsp, err, issync, TOP_ALLOCSP, resv);
1666923Ssdebnath 
16676717Sjr26306 		/* start offset for indirect allocation */
16686717Sjr26306 		istart =  (uoff + nbytes);
1669923Ssdebnath 	}
1670923Ssdebnath 
1671923Ssdebnath 	/* Break the transactions into vfs_iotransz units */
1672923Ssdebnath 	ufs_trans_trunc_resv(ip, ip->i_size +
1673923Ssdebnath 	    blkroundup(fs, ufsvfsp->vfs_iotransz), &resv, &resid);
1674923Ssdebnath 	TRANS_BEGIN_CSYNC(ufsvfsp, issync, TOP_ALLOCSP, resv);
1675923Ssdebnath 
1676923Ssdebnath 	rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
1677923Ssdebnath 	rw_enter(&ip->i_contents, RW_WRITER);
1678923Ssdebnath 
1679923Ssdebnath 	/* Now go about fallocating necessary indirect blocks */
16806717Sjr26306 	for (i = istart; i < (lp->l_start + lp->l_len); i += fs->fs_bsize) {
1681923Ssdebnath 		berr = bmap_write(ip, i, fs->fs_bsize, BI_FALLOCATE,
1682923Ssdebnath 		    &allocblk, cr);
1683923Ssdebnath 		if (berr) {
1684923Ssdebnath 			TRANS_INODE(ufsvfsp, ip);
1685923Ssdebnath 			rw_exit(&ip->i_contents);
1686923Ssdebnath 			rw_exit(&ufsvfsp->vfs_dqrwlock);
1687923Ssdebnath 			TRANS_END_CSYNC(ufsvfsp, err, issync,
1688923Ssdebnath 			    TOP_ALLOCSP, resv);
1689923Ssdebnath 			err = allocsp_unlockfs(vp, &lf);
1690923Ssdebnath 			goto exit;
1691923Ssdebnath 		}
1692923Ssdebnath 
1693923Ssdebnath 		/* Update the blk counter only if new block was added */
1694923Ssdebnath 		if (allocblk) {
1695923Ssdebnath 			/* Save undo information */
1696923Ssdebnath 			undo = kmem_alloc(sizeof (struct allocsp_undo),
1697923Ssdebnath 			    KM_SLEEP);
1698923Ssdebnath 			undo->offset = i;
1699923Ssdebnath 			undo->blk = allocblk;
1700923Ssdebnath 			undo->next = ib_undo;
1701923Ssdebnath 			ib_undo = undo;
1702923Ssdebnath 			totblks++;
17034454Smishra 
17044454Smishra 			if (i >= ip->i_size)
17054454Smishra 				ip->i_size += fs->fs_bsize;
1706923Ssdebnath 		}
1707923Ssdebnath 		cnt++;
1708923Ssdebnath 
1709923Ssdebnath 		/* Being a good UFS citizen, let others get a share */
1710923Ssdebnath 		if (cnt == chunkblks) {
1711923Ssdebnath 			/*
1712923Ssdebnath 			 * If there are waiters or the fs is hard locked,
1713923Ssdebnath 			 * error locked, or read-only error locked,
1714923Ssdebnath 			 * quit with EIO
1715923Ssdebnath 			 */
1716923Ssdebnath 			if (ULOCKFS_IS_HLOCK(ulp) || ULOCKFS_IS_ELOCK(ulp) ||
1717923Ssdebnath 			    ULOCKFS_IS_ROELOCK(ulp)) {
1718923Ssdebnath 				ip->i_cflags |= IFALLOCATE;
1719923Ssdebnath 				TRANS_INODE(ufsvfsp, ip);
1720923Ssdebnath 				rw_exit(&ip->i_contents);
1721923Ssdebnath 				rw_exit(&ufsvfsp->vfs_dqrwlock);
1722923Ssdebnath 
1723923Ssdebnath 				TRANS_END_CSYNC(ufsvfsp, err, issync,
1724923Ssdebnath 				    TOP_ALLOCSP, resv);
1725923Ssdebnath 				rw_exit(&ip->i_rwlock);
17266717Sjr26306 				(void) allocsp_unlockfs(vp, &lf);
1727923Ssdebnath 				return (EIO);
1728923Ssdebnath 			}
1729923Ssdebnath 
1730923Ssdebnath 			TRANS_INODE(ufsvfsp, ip);
1731923Ssdebnath 			rw_exit(&ip->i_contents);
1732923Ssdebnath 			rw_exit(&ufsvfsp->vfs_dqrwlock);
1733923Ssdebnath 
1734923Ssdebnath 			/* End the current transaction */
1735923Ssdebnath 			TRANS_END_CSYNC(ufsvfsp, err, issync,
1736923Ssdebnath 			    TOP_ALLOCSP, resv);
1737923Ssdebnath 
1738923Ssdebnath 			if (CV_HAS_WAITERS(&ulp->ul_cv)) {
1739923Ssdebnath 				/* Release the write lock */
1740923Ssdebnath 				if (err = allocsp_unlockfs(vp, &lf))
1741923Ssdebnath 					goto exit;
1742923Ssdebnath 
1743923Ssdebnath 				/* Wake up others waiting to do operations */
1744923Ssdebnath 				mutex_enter(&ulp->ul_lock);
1745923Ssdebnath 				cv_broadcast(&ulp->ul_cv);
1746923Ssdebnath 				mutex_exit(&ulp->ul_lock);
1747923Ssdebnath 
1748923Ssdebnath 				/* Grab the write lock again */
1749923Ssdebnath 				if (err = allocsp_wlockfs(vp, &lf))
1750923Ssdebnath 					goto exit;
1751923Ssdebnath 			} /* end of CV_HAS_WAITERS(&ulp->ul_cv) */
1752923Ssdebnath 
1753923Ssdebnath 			/* Reserve more space in log for this file */
1754923Ssdebnath 			ufs_trans_trunc_resv(ip,
1755923Ssdebnath 			    ip->i_size + blkroundup(fs, ufsvfsp->vfs_iotransz),
1756923Ssdebnath 			    &resv, &resid);
1757923Ssdebnath 			TRANS_BEGIN_CSYNC(ufsvfsp, issync, TOP_ALLOCSP, resv);
1758923Ssdebnath 
1759923Ssdebnath 			rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
1760923Ssdebnath 			rw_enter(&ip->i_contents, RW_WRITER);
1761923Ssdebnath 
1762923Ssdebnath 			cnt = 0;	/* reset cnt b/c of new transaction */
1763923Ssdebnath 		}
1764923Ssdebnath 	}
1765923Ssdebnath 
1766923Ssdebnath 	if (!err && !berr)
1767923Ssdebnath 		ip->i_cflags |= IFALLOCATE;
1768923Ssdebnath 
17696717Sjr26306 	/* If the file has grown then correct the file size */
17706717Sjr26306 	if (osz < (lp->l_start + lp->l_len))
17716717Sjr26306 		ip->i_size = (lp->l_start + lp->l_len);
17726717Sjr26306 
1773923Ssdebnath 	/* Release locks, end log transaction and unlock fs */
1774923Ssdebnath 	TRANS_INODE(ufsvfsp, ip);
1775923Ssdebnath 	rw_exit(&ip->i_contents);
1776923Ssdebnath 	rw_exit(&ufsvfsp->vfs_dqrwlock);
1777923Ssdebnath 
1778923Ssdebnath 	TRANS_END_CSYNC(ufsvfsp, err, issync, TOP_ALLOCSP, resv);
1779923Ssdebnath 	err = allocsp_unlockfs(vp, &lf);
1780923Ssdebnath 
1781923Ssdebnath 	/*
1782923Ssdebnath 	 * @ exit label, we should no longer be holding the fs write lock, and
1783923Ssdebnath 	 * all logging transactions should have been ended. We still hold
1784923Ssdebnath 	 * ip->i_rwlock.
1785923Ssdebnath 	 */
1786923Ssdebnath exit:
1787923Ssdebnath 	/*
1788923Ssdebnath 	 * File has grown larger than 2GB. Set flag
1789923Ssdebnath 	 * in superblock to indicate this, if it
1790923Ssdebnath 	 * is not already set.
1791923Ssdebnath 	 */
1792923Ssdebnath 	if ((ip->i_size > MAXOFF32_T) &&
17934662Sfrankho 	    !(fs->fs_flags & FSLARGEFILES)) {
1794923Ssdebnath 		ASSERT(ufsvfsp->vfs_lfflags & UFS_LARGEFILES);
1795923Ssdebnath 		mutex_enter(&ufsvfsp->vfs_lock);
1796923Ssdebnath 		fs->fs_flags |= FSLARGEFILES;
1797923Ssdebnath 		ufs_sbwrite(ufsvfsp);
1798923Ssdebnath 		mutex_exit(&ufsvfsp->vfs_lock);
1799923Ssdebnath 	}
1800923Ssdebnath 
1801923Ssdebnath 	/*
1802923Ssdebnath 	 * Since we couldn't allocate completely, we will undo the allocations.
1803923Ssdebnath 	 */
1804923Ssdebnath 	if (berr) {
1805923Ssdebnath 		ufs_trans_trunc_resv(ip, totblks * fs->fs_bsize, &resv, &resid);
1806923Ssdebnath 		TRANS_BEGIN_CSYNC(ufsvfsp, issync, TOP_ALLOCSP, resv);
1807923Ssdebnath 
1808923Ssdebnath 		rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
1809923Ssdebnath 		rw_enter(&ip->i_contents, RW_WRITER);
1810923Ssdebnath 
1811923Ssdebnath 		/* Direct blocks */
1812923Ssdebnath 		for (i = 0; i < NDADDR; i++) {
1813923Ssdebnath 			/*
1814923Ssdebnath 			 * Only free the block if they are not same, and
1815923Ssdebnath 			 * the old one isn't zero (the fragment was
1816923Ssdebnath 			 * re-allocated).
1817923Ssdebnath 			 */
1818923Ssdebnath 			if (db_undo[i] != ip->i_db[i] && db_undo[i] == 0) {
1819923Ssdebnath 				free(ip, ip->i_db[i], fs->fs_bsize, 0);
1820923Ssdebnath 				ip->i_db[i] = 0;
1821923Ssdebnath 			}
1822923Ssdebnath 		}
1823923Ssdebnath 
1824923Ssdebnath 		/* Undo the indirect blocks */
1825923Ssdebnath 		while (ib_undo != NULL) {
1826923Ssdebnath 			undo = ib_undo;
1827923Ssdebnath 			err = bmap_set_bn(vp, undo->offset, 0);
1828923Ssdebnath 			if (err)
1829923Ssdebnath 				cmn_err(CE_PANIC, "ufs_allocsp(): failed to "
1830923Ssdebnath 				    "undo allocation of block %ld",
1831923Ssdebnath 				    undo->offset);
1832923Ssdebnath 			free(ip, undo->blk, fs->fs_bsize, I_IBLK);
1833923Ssdebnath 			ib_undo = undo->next;
1834923Ssdebnath 			kmem_free(undo, sizeof (struct allocsp_undo));
1835923Ssdebnath 		}
1836923Ssdebnath 
1837923Ssdebnath 		ip->i_size = osz;
1838923Ssdebnath 		TRANS_INODE(ufsvfsp, ip);
1839923Ssdebnath 
1840923Ssdebnath 		rw_exit(&ip->i_contents);
1841923Ssdebnath 		rw_exit(&ufsvfsp->vfs_dqrwlock);
1842923Ssdebnath 
1843923Ssdebnath 		TRANS_END_CSYNC(ufsvfsp, err, issync, TOP_ALLOCSP, resv);
1844923Ssdebnath 
1845923Ssdebnath 		rw_exit(&ip->i_rwlock);
1846923Ssdebnath 		return (berr);
1847923Ssdebnath 	}
1848923Ssdebnath 
1849923Ssdebnath 	/*
1850923Ssdebnath 	 * Don't forget to free the undo chain :)
1851923Ssdebnath 	 */
1852923Ssdebnath 	while (ib_undo != NULL) {
1853923Ssdebnath 		undo = ib_undo;
1854923Ssdebnath 		ib_undo = undo->next;
1855923Ssdebnath 		kmem_free(undo, sizeof (struct allocsp_undo));
1856923Ssdebnath 	}
1857923Ssdebnath 
1858923Ssdebnath 	rw_exit(&ip->i_rwlock);
1859923Ssdebnath 
1860923Ssdebnath out_allocsp:
1861923Ssdebnath 	return (err);
1862923Ssdebnath }
1863923Ssdebnath 
1864923Ssdebnath /*
18650Sstevel@tonic-gate  * Free storage space associated with the specified inode.  The portion
18660Sstevel@tonic-gate  * to be freed is specified by lp->l_start and lp->l_len (already
18670Sstevel@tonic-gate  * normalized to a "whence" of 0).
18680Sstevel@tonic-gate  *
18690Sstevel@tonic-gate  * This is an experimental facility whose continued existence is not
18700Sstevel@tonic-gate  * guaranteed.  Currently, we only support the special case
18710Sstevel@tonic-gate  * of l_len == 0, meaning free to end of file.
18720Sstevel@tonic-gate  *
18730Sstevel@tonic-gate  * Blocks are freed in reverse order.  This FILO algorithm will tend to
18740Sstevel@tonic-gate  * maintain a contiguous free list much longer than FIFO.
18750Sstevel@tonic-gate  * See also ufs_itrunc() in ufs_inode.c.
18760Sstevel@tonic-gate  *
18770Sstevel@tonic-gate  * Bug: unused bytes in the last retained block are not cleared.
18780Sstevel@tonic-gate  * This may result in a "hole" in the file that does not read as zeroes.
18790Sstevel@tonic-gate  */
18800Sstevel@tonic-gate /* ARGSUSED */
18810Sstevel@tonic-gate int
ufs_freesp(struct vnode * vp,struct flock64 * lp,int flag,cred_t * cr)18820Sstevel@tonic-gate ufs_freesp(struct vnode *vp, struct flock64 *lp, int flag, cred_t *cr)
18830Sstevel@tonic-gate {
18840Sstevel@tonic-gate 	int i;
18850Sstevel@tonic-gate 	struct inode *ip = VTOI(vp);
18860Sstevel@tonic-gate 	int error;
18870Sstevel@tonic-gate 
18880Sstevel@tonic-gate 	ASSERT(vp->v_type == VREG);
18890Sstevel@tonic-gate 	ASSERT(lp->l_start >= 0);	/* checked by convoff */
18900Sstevel@tonic-gate 
18910Sstevel@tonic-gate 	if (lp->l_len != 0)
18920Sstevel@tonic-gate 		return (EINVAL);
18930Sstevel@tonic-gate 
18940Sstevel@tonic-gate 	rw_enter(&ip->i_contents, RW_READER);
18950Sstevel@tonic-gate 	if (ip->i_size == (u_offset_t)lp->l_start) {
18960Sstevel@tonic-gate 		rw_exit(&ip->i_contents);
18970Sstevel@tonic-gate 		return (0);
18980Sstevel@tonic-gate 	}
18990Sstevel@tonic-gate 
19000Sstevel@tonic-gate 	/*
19010Sstevel@tonic-gate 	 * Check if there is any active mandatory lock on the
19020Sstevel@tonic-gate 	 * range that will be truncated/expanded.
19030Sstevel@tonic-gate 	 */
19040Sstevel@tonic-gate 	if (MANDLOCK(vp, ip->i_mode)) {
19050Sstevel@tonic-gate 		offset_t save_start;
19060Sstevel@tonic-gate 
19070Sstevel@tonic-gate 		save_start = lp->l_start;
19080Sstevel@tonic-gate 
19090Sstevel@tonic-gate 		if (ip->i_size < lp->l_start) {
19100Sstevel@tonic-gate 			/*
19110Sstevel@tonic-gate 			 * "Truncate up" case: need to make sure there
19120Sstevel@tonic-gate 			 * is no lock beyond current end-of-file. To
19130Sstevel@tonic-gate 			 * do so, we need to set l_start to the size
19140Sstevel@tonic-gate 			 * of the file temporarily.
19150Sstevel@tonic-gate 			 */
19160Sstevel@tonic-gate 			lp->l_start = ip->i_size;
19170Sstevel@tonic-gate 		}
19180Sstevel@tonic-gate 		lp->l_type = F_WRLCK;
19190Sstevel@tonic-gate 		lp->l_sysid = 0;
19200Sstevel@tonic-gate 		lp->l_pid = ttoproc(curthread)->p_pid;
19210Sstevel@tonic-gate 		i = (flag & (FNDELAY|FNONBLOCK)) ? 0 : SLPFLCK;
19220Sstevel@tonic-gate 		rw_exit(&ip->i_contents);
19230Sstevel@tonic-gate 		if ((i = reclock(vp, lp, i, 0, lp->l_start, NULL)) != 0 ||
19240Sstevel@tonic-gate 		    lp->l_type != F_UNLCK) {
19250Sstevel@tonic-gate 			return (i ? i : EAGAIN);
19260Sstevel@tonic-gate 		}
19270Sstevel@tonic-gate 		rw_enter(&ip->i_contents, RW_READER);
19280Sstevel@tonic-gate 
19290Sstevel@tonic-gate 		lp->l_start = save_start;
19300Sstevel@tonic-gate 	}
19310Sstevel@tonic-gate 
19320Sstevel@tonic-gate 	/*
19330Sstevel@tonic-gate 	 * Make sure a write isn't in progress (allocating blocks)
19340Sstevel@tonic-gate 	 * by acquiring i_rwlock (we promised ufs_bmap we wouldn't
19350Sstevel@tonic-gate 	 * truncate while it was allocating blocks).
19360Sstevel@tonic-gate 	 * Grab the locks in the right order.
19370Sstevel@tonic-gate 	 */
19380Sstevel@tonic-gate 	rw_exit(&ip->i_contents);
19390Sstevel@tonic-gate 	rw_enter(&ip->i_rwlock, RW_WRITER);
19400Sstevel@tonic-gate 	error = TRANS_ITRUNC(ip, (u_offset_t)lp->l_start, 0, cr);
19410Sstevel@tonic-gate 	rw_exit(&ip->i_rwlock);
19420Sstevel@tonic-gate 	return (error);
19430Sstevel@tonic-gate }
19440Sstevel@tonic-gate 
19450Sstevel@tonic-gate /*
19460Sstevel@tonic-gate  * Find a cg with as close to nb contiguous bytes as possible
19470Sstevel@tonic-gate  *	THIS MAY TAKE MANY DISK READS!
19480Sstevel@tonic-gate  *
19490Sstevel@tonic-gate  * Implemented in an attempt to allocate contiguous blocks for
19500Sstevel@tonic-gate  * writing the ufs log file to, minimizing future disk head seeking
19510Sstevel@tonic-gate  */
19520Sstevel@tonic-gate daddr_t
contigpref(ufsvfs_t * ufsvfsp,size_t nb,size_t minb)19539915SOwen.Roberts@Sun.Com contigpref(ufsvfs_t *ufsvfsp, size_t nb, size_t minb)
19540Sstevel@tonic-gate {
19550Sstevel@tonic-gate 	struct fs	*fs	= ufsvfsp->vfs_fs;
19560Sstevel@tonic-gate 	daddr_t		nblk	= lblkno(fs, blkroundup(fs, nb));
19579915SOwen.Roberts@Sun.Com 	daddr_t		minblk	= lblkno(fs, blkroundup(fs, minb));
19580Sstevel@tonic-gate 	daddr_t		savebno, curbno, cgbno;
19599915SOwen.Roberts@Sun.Com 	int		cg, cgblks, savecg, savenblk, curnblk, startcg;
19600Sstevel@tonic-gate 	uchar_t		*blksfree;
19610Sstevel@tonic-gate 	buf_t		*bp;
19620Sstevel@tonic-gate 	struct cg	*cgp;
19630Sstevel@tonic-gate 
19640Sstevel@tonic-gate 	savenblk = 0;
19650Sstevel@tonic-gate 	savecg = 0;
19660Sstevel@tonic-gate 	savebno = 0;
19670Sstevel@tonic-gate 
19689915SOwen.Roberts@Sun.Com 	if ((startcg = findlogstartcg(fs, nblk, minblk)) == -1)
19699915SOwen.Roberts@Sun.Com 		cg = 0;	/* Nothing suitable found */
19709915SOwen.Roberts@Sun.Com 	else
19719915SOwen.Roberts@Sun.Com 		cg = startcg;
19720Sstevel@tonic-gate 
19739915SOwen.Roberts@Sun.Com 	for (; cg < fs->fs_ncg; ++cg) {
19740Sstevel@tonic-gate 		/*
19750Sstevel@tonic-gate 		 * find the largest contiguous range in this cg
19760Sstevel@tonic-gate 		 */
19770Sstevel@tonic-gate 		bp = UFS_BREAD(ufsvfsp, ufsvfsp->vfs_dev,
1978923Ssdebnath 		    (daddr_t)fsbtodb(fs, cgtod(fs, cg)),
1979923Ssdebnath 		    (int)fs->fs_cgsize);
19800Sstevel@tonic-gate 		cgp = bp->b_un.b_cg;
19810Sstevel@tonic-gate 		if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp)) {
19820Sstevel@tonic-gate 			brelse(bp);
19830Sstevel@tonic-gate 			continue;
19840Sstevel@tonic-gate 		}
19850Sstevel@tonic-gate 		blksfree = cg_blksfree(cgp);	    /* free array */
19860Sstevel@tonic-gate 		cgblks = fragstoblks(fs, fs->fs_fpg); /* blks in free array */
19870Sstevel@tonic-gate 		cgbno = 0;
19880Sstevel@tonic-gate 		while (cgbno < cgblks && savenblk < nblk) {
19890Sstevel@tonic-gate 			/* find a free block */
19909915SOwen.Roberts@Sun.Com 			for (; cgbno < cgblks; ++cgbno) {
19919915SOwen.Roberts@Sun.Com 				if (isblock(fs, blksfree, cgbno)) {
199211036SJim.Rice@Sun.COM 					if (startcg != -1) {
199311036SJim.Rice@Sun.COM 						brelse(bp);
199411036SJim.Rice@Sun.COM 						savecg = startcg;
199511036SJim.Rice@Sun.COM 						savebno = cgbno;
19969915SOwen.Roberts@Sun.Com 						goto done;
199711036SJim.Rice@Sun.COM 					} else
19989915SOwen.Roberts@Sun.Com 						break;
19999915SOwen.Roberts@Sun.Com 				}
20009915SOwen.Roberts@Sun.Com 			}
20010Sstevel@tonic-gate 			curbno = cgbno;
20020Sstevel@tonic-gate 			/* count the number of free blocks */
20030Sstevel@tonic-gate 			for (curnblk = 0; cgbno < cgblks; ++cgbno) {
20040Sstevel@tonic-gate 				if (!isblock(fs, blksfree, cgbno))
20050Sstevel@tonic-gate 					break;
20060Sstevel@tonic-gate 				if (++curnblk >= nblk)
20070Sstevel@tonic-gate 					break;
20080Sstevel@tonic-gate 			}
20090Sstevel@tonic-gate 			if (curnblk > savenblk) {
20100Sstevel@tonic-gate 				savecg = cg;
20110Sstevel@tonic-gate 				savenblk = curnblk;
20120Sstevel@tonic-gate 				savebno = curbno;
20130Sstevel@tonic-gate 			}
20140Sstevel@tonic-gate 		}
20150Sstevel@tonic-gate 		brelse(bp);
20160Sstevel@tonic-gate 		if (savenblk >= nblk)
20170Sstevel@tonic-gate 			break;
20180Sstevel@tonic-gate 	}
20190Sstevel@tonic-gate 
20209915SOwen.Roberts@Sun.Com done:
20219915SOwen.Roberts@Sun.Com 
20220Sstevel@tonic-gate 	/* convert block offset in cg to frag offset in cg */
20230Sstevel@tonic-gate 	savebno = blkstofrags(fs, savebno);
20240Sstevel@tonic-gate 
20250Sstevel@tonic-gate 	/* convert frag offset in cg to frag offset in fs */
20260Sstevel@tonic-gate 	savebno += (savecg * fs->fs_fpg);
20270Sstevel@tonic-gate 
20280Sstevel@tonic-gate 	return (savebno);
20290Sstevel@tonic-gate }
20309915SOwen.Roberts@Sun.Com 
20319915SOwen.Roberts@Sun.Com /*
20329915SOwen.Roberts@Sun.Com  * The object of this routine is to find a start point for the UFS log.
20339915SOwen.Roberts@Sun.Com  * Ideally the space should be allocated from the smallest possible number
20349915SOwen.Roberts@Sun.Com  * of contiguous cylinder groups. This is found by using a sliding window
20359915SOwen.Roberts@Sun.Com  * technique. The smallest window of contiguous cylinder groups, which is
20369915SOwen.Roberts@Sun.Com  * still able to accommodate the target, is found by moving the window
20379915SOwen.Roberts@Sun.Com  * through the cylinder groups in a single pass. The end of the window is
20389915SOwen.Roberts@Sun.Com  * advanced until the space is accommodated, then the start is advanced until
20399915SOwen.Roberts@Sun.Com  * it no longer fits, the end is then advanced again and so on until the
20409915SOwen.Roberts@Sun.Com  * final cylinder group is reached. The first suitable instance is recorded
20419915SOwen.Roberts@Sun.Com  * and its starting cg number is returned.
20429915SOwen.Roberts@Sun.Com  *
20439915SOwen.Roberts@Sun.Com  * If we are not able to find a minimum amount of space, represented by
20449915SOwen.Roberts@Sun.Com  * minblk, or to do so uses more than the available extents, then return -1.
20459915SOwen.Roberts@Sun.Com  */
20469915SOwen.Roberts@Sun.Com 
20479915SOwen.Roberts@Sun.Com int
findlogstartcg(struct fs * fs,daddr_t requested,daddr_t minblk)20489915SOwen.Roberts@Sun.Com findlogstartcg(struct fs *fs, daddr_t requested, daddr_t minblk)
20499915SOwen.Roberts@Sun.Com {
20509915SOwen.Roberts@Sun.Com 	int	 ncgs;		 /* number of cylinder groups */
20519915SOwen.Roberts@Sun.Com 	daddr_t target;		 /* amount of space sought */
20529915SOwen.Roberts@Sun.Com 	int	 cwidth, ctotal; /* current window width and total */
20539915SOwen.Roberts@Sun.Com 	int	 bwidth, btotal; /* best window width and total so far */
20549915SOwen.Roberts@Sun.Com 	int	 s;	/* index of the first element in the current window */
20559915SOwen.Roberts@Sun.Com 	int	 e;	/* index of the first element + the width */
20569915SOwen.Roberts@Sun.Com 			/*  (i.e. 1 + index of last element) */
20579915SOwen.Roberts@Sun.Com 	int	 bs; /* index of the first element in the best window so far */
20589915SOwen.Roberts@Sun.Com 	int	 header, max_extents;
20599915SOwen.Roberts@Sun.Com 
20609915SOwen.Roberts@Sun.Com 	target = requested;
20619915SOwen.Roberts@Sun.Com 	ncgs = fs->fs_ncg;
20629915SOwen.Roberts@Sun.Com 
20639915SOwen.Roberts@Sun.Com 	header = sizeof (extent_block_t) - sizeof (extent_t);
20649915SOwen.Roberts@Sun.Com 	max_extents = ((fs->fs_bsize)-header) / sizeof (extent_t);
20659915SOwen.Roberts@Sun.Com 	cwidth = ctotal = 0;
20669915SOwen.Roberts@Sun.Com 	btotal = -1;
20679915SOwen.Roberts@Sun.Com 	bwidth = ncgs;
20689915SOwen.Roberts@Sun.Com 	s = e = 0;
20699915SOwen.Roberts@Sun.Com 	while (e < ncgs) {
20709915SOwen.Roberts@Sun.Com 	/* Advance the end of the window until it accommodates the target. */
20719915SOwen.Roberts@Sun.Com 		while (ctotal < target && e < ncgs) {
20729915SOwen.Roberts@Sun.Com 			ctotal += fs->fs_cs(fs, e).cs_nbfree;
20739915SOwen.Roberts@Sun.Com 			e++;
20749915SOwen.Roberts@Sun.Com 		}
20759915SOwen.Roberts@Sun.Com 
20769915SOwen.Roberts@Sun.Com 		/*
20779915SOwen.Roberts@Sun.Com 		 * Advance the start of the window until it no longer
20789915SOwen.Roberts@Sun.Com 		 * accommodates the target.
20799915SOwen.Roberts@Sun.Com 		 */
20809915SOwen.Roberts@Sun.Com 		while (ctotal >= target && s < e) {
20819915SOwen.Roberts@Sun.Com 			/* See if this is the smallest window so far. */
20829915SOwen.Roberts@Sun.Com 			cwidth = e - s;
20839915SOwen.Roberts@Sun.Com 			if (cwidth <= bwidth) {
20849915SOwen.Roberts@Sun.Com 				if (cwidth == bwidth && ctotal <= btotal)
20859915SOwen.Roberts@Sun.Com 					goto more;
20869915SOwen.Roberts@Sun.Com 				bwidth = cwidth;
20879915SOwen.Roberts@Sun.Com 				btotal = ctotal;
20889915SOwen.Roberts@Sun.Com 				bs = s;
20899915SOwen.Roberts@Sun.Com 			}
20909915SOwen.Roberts@Sun.Com more:
20919915SOwen.Roberts@Sun.Com 			ctotal -= fs->fs_cs(fs, s).cs_nbfree;
20929915SOwen.Roberts@Sun.Com 			s++;
20939915SOwen.Roberts@Sun.Com 		}
20949915SOwen.Roberts@Sun.Com 	}
20959915SOwen.Roberts@Sun.Com 
20969915SOwen.Roberts@Sun.Com 	/*
20979915SOwen.Roberts@Sun.Com 	 * If we cannot allocate the minimum required or we use too many
20989915SOwen.Roberts@Sun.Com 	 * extents to do so, return -1.
20999915SOwen.Roberts@Sun.Com 	 */
21009915SOwen.Roberts@Sun.Com 	if (btotal < minblk || bwidth > max_extents)
21019915SOwen.Roberts@Sun.Com 		bs = -1;
21029915SOwen.Roberts@Sun.Com 
21039915SOwen.Roberts@Sun.Com 	return (bs);
21049915SOwen.Roberts@Sun.Com }
2105