10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 50Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 60Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 70Sstevel@tonic-gate * with the License. 80Sstevel@tonic-gate * 90Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 100Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 110Sstevel@tonic-gate * See the License for the specific language governing permissions 120Sstevel@tonic-gate * and limitations under the License. 130Sstevel@tonic-gate * 140Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 150Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 160Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 170Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 180Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 190Sstevel@tonic-gate * 200Sstevel@tonic-gate * CDDL HEADER END 210Sstevel@tonic-gate */ 220Sstevel@tonic-gate /* 230Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 240Sstevel@tonic-gate * Use is subject to license terms. 250Sstevel@tonic-gate */ 260Sstevel@tonic-gate 270Sstevel@tonic-gate /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 280Sstevel@tonic-gate /* All Rights Reserved */ 290Sstevel@tonic-gate 300Sstevel@tonic-gate /* 310Sstevel@tonic-gate * University Copyright- Copyright (c) 1982, 1986, 1988 320Sstevel@tonic-gate * The Regents of the University of California 330Sstevel@tonic-gate * All Rights Reserved 340Sstevel@tonic-gate * 350Sstevel@tonic-gate * University Acknowledgment- Portions of this document are derived from 360Sstevel@tonic-gate * software developed by the University of California, Berkeley, and its 370Sstevel@tonic-gate * contributors. 380Sstevel@tonic-gate */ 390Sstevel@tonic-gate 400Sstevel@tonic-gate 410Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 420Sstevel@tonic-gate 43*923Ssdebnath #include <sys/condvar_impl.h> 440Sstevel@tonic-gate #include <sys/types.h> 450Sstevel@tonic-gate #include <sys/t_lock.h> 460Sstevel@tonic-gate #include <sys/debug.h> 470Sstevel@tonic-gate #include <sys/param.h> 480Sstevel@tonic-gate #include <sys/systm.h> 490Sstevel@tonic-gate #include <sys/signal.h> 500Sstevel@tonic-gate #include <sys/cred.h> 510Sstevel@tonic-gate #include <sys/proc.h> 520Sstevel@tonic-gate #include <sys/disp.h> 530Sstevel@tonic-gate #include <sys/user.h> 540Sstevel@tonic-gate #include <sys/buf.h> 550Sstevel@tonic-gate #include <sys/vfs.h> 560Sstevel@tonic-gate #include <sys/vnode.h> 570Sstevel@tonic-gate #include <sys/acl.h> 580Sstevel@tonic-gate #include <sys/fs/ufs_fs.h> 590Sstevel@tonic-gate #include <sys/fs/ufs_inode.h> 600Sstevel@tonic-gate #include <sys/fs/ufs_acl.h> 610Sstevel@tonic-gate #include <sys/fs/ufs_bio.h> 620Sstevel@tonic-gate #include <sys/fs/ufs_quota.h> 630Sstevel@tonic-gate #include <sys/kmem.h> 640Sstevel@tonic-gate #include <sys/fs/ufs_trans.h> 650Sstevel@tonic-gate #include <sys/fs/ufs_panic.h> 660Sstevel@tonic-gate #include <sys/errno.h> 670Sstevel@tonic-gate #include <sys/time.h> 680Sstevel@tonic-gate #include <sys/sysmacros.h> 690Sstevel@tonic-gate #include <sys/file.h> 700Sstevel@tonic-gate #include <sys/fcntl.h> 710Sstevel@tonic-gate #include <sys/flock.h> 720Sstevel@tonic-gate #include <fs/fs_subr.h> 730Sstevel@tonic-gate #include <sys/cmn_err.h> 740Sstevel@tonic-gate #include <sys/policy.h> 750Sstevel@tonic-gate 760Sstevel@tonic-gate static ino_t hashalloc(); 770Sstevel@tonic-gate static daddr_t fragextend(); 780Sstevel@tonic-gate static daddr_t alloccg(); 790Sstevel@tonic-gate static daddr_t alloccgblk(); 800Sstevel@tonic-gate static ino_t ialloccg(); 810Sstevel@tonic-gate static daddr_t mapsearch(); 820Sstevel@tonic-gate 830Sstevel@tonic-gate extern int inside[], around[]; 840Sstevel@tonic-gate extern uchar_t *fragtbl[]; 850Sstevel@tonic-gate void delay(); 860Sstevel@tonic-gate 870Sstevel@tonic-gate /* 880Sstevel@tonic-gate * Allocate a block in the file system. 890Sstevel@tonic-gate * 900Sstevel@tonic-gate * The size of the requested block is given, which must be some 910Sstevel@tonic-gate * multiple of fs_fsize and <= fs_bsize. 920Sstevel@tonic-gate * A preference may be optionally specified. If a preference is given 930Sstevel@tonic-gate * the following hierarchy is used to allocate a block: 940Sstevel@tonic-gate * 1) allocate the requested block. 950Sstevel@tonic-gate * 2) allocate a rotationally optimal block in the same cylinder. 960Sstevel@tonic-gate * 3) allocate a block in the same cylinder group. 970Sstevel@tonic-gate * 4) quadratically rehash into other cylinder groups, until an 980Sstevel@tonic-gate * available block is located. 990Sstevel@tonic-gate * If no block preference is given the following hierarchy is used 1000Sstevel@tonic-gate * to allocate a block: 1010Sstevel@tonic-gate * 1) allocate a block in the cylinder group that contains the 1020Sstevel@tonic-gate * inode for the file. 1030Sstevel@tonic-gate * 2) quadratically rehash into other cylinder groups, until an 1040Sstevel@tonic-gate * available block is located. 1050Sstevel@tonic-gate */ 1060Sstevel@tonic-gate int 1070Sstevel@tonic-gate alloc(struct inode *ip, daddr_t bpref, int size, daddr_t *bnp, cred_t *cr) 1080Sstevel@tonic-gate { 1090Sstevel@tonic-gate struct fs *fs; 1100Sstevel@tonic-gate struct ufsvfs *ufsvfsp; 1110Sstevel@tonic-gate daddr_t bno; 1120Sstevel@tonic-gate int cg; 1130Sstevel@tonic-gate int err; 1140Sstevel@tonic-gate char *errmsg = NULL; 1150Sstevel@tonic-gate size_t len; 1160Sstevel@tonic-gate 1170Sstevel@tonic-gate ufsvfsp = ip->i_ufsvfs; 1180Sstevel@tonic-gate fs = ufsvfsp->vfs_fs; 1190Sstevel@tonic-gate if ((unsigned)size > fs->fs_bsize || fragoff(fs, size) != 0) { 120*923Ssdebnath err = ufs_fault(ITOV(ip), "alloc: bad size, dev = 0x%lx," 121*923Ssdebnath " bsize = %d, size = %d, fs = %s\n", 122*923Ssdebnath ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt); 1230Sstevel@tonic-gate return (err); 1240Sstevel@tonic-gate } 1250Sstevel@tonic-gate if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0) 1260Sstevel@tonic-gate goto nospace; 1270Sstevel@tonic-gate if (freespace(fs, ufsvfsp) <= 0 && 1280Sstevel@tonic-gate secpolicy_fs_minfree(cr, ufsvfsp->vfs_vfs) != 0) 1290Sstevel@tonic-gate goto nospace; 1300Sstevel@tonic-gate err = chkdq(ip, (long)btodb(size), 0, cr, &errmsg, &len); 1310Sstevel@tonic-gate /* Note that may not have err, but may have errmsg */ 1320Sstevel@tonic-gate if (errmsg != NULL) { 1330Sstevel@tonic-gate uprintf(errmsg); 1340Sstevel@tonic-gate kmem_free(errmsg, len); 1350Sstevel@tonic-gate errmsg = NULL; 1360Sstevel@tonic-gate } 1370Sstevel@tonic-gate if (err) 1380Sstevel@tonic-gate return (err); 1390Sstevel@tonic-gate if (bpref >= fs->fs_size) 1400Sstevel@tonic-gate bpref = 0; 1410Sstevel@tonic-gate if (bpref == 0) 1420Sstevel@tonic-gate cg = (int)itog(fs, ip->i_number); 1430Sstevel@tonic-gate else 1440Sstevel@tonic-gate cg = dtog(fs, bpref); 1450Sstevel@tonic-gate 1460Sstevel@tonic-gate bno = (daddr_t)hashalloc(ip, cg, (long)bpref, size, 1470Sstevel@tonic-gate (ulong_t (*)())alloccg); 1480Sstevel@tonic-gate if (bno > 0) { 1490Sstevel@tonic-gate *bnp = bno; 1500Sstevel@tonic-gate return (0); 1510Sstevel@tonic-gate } 1520Sstevel@tonic-gate 1530Sstevel@tonic-gate /* 1540Sstevel@tonic-gate * hashalloc() failed because some other thread grabbed 1550Sstevel@tonic-gate * the last block so unwind the quota operation. We can 1560Sstevel@tonic-gate * ignore the return because subtractions don't fail and 1570Sstevel@tonic-gate * size is guaranteed to be >= zero by our caller. 1580Sstevel@tonic-gate */ 1590Sstevel@tonic-gate (void) chkdq(ip, -(long)btodb(size), 0, cr, (char **)NULL, 1600Sstevel@tonic-gate (size_t *)NULL); 1610Sstevel@tonic-gate 1620Sstevel@tonic-gate nospace: 1630Sstevel@tonic-gate mutex_enter(&ufsvfsp->vfs_lock); 1640Sstevel@tonic-gate if ((lbolt - ufsvfsp->vfs_lastwhinetime) > (hz << 2) && 1650Sstevel@tonic-gate (!(TRANS_ISTRANS(ufsvfsp)) || !(ip->i_flag & IQUIET))) { 1660Sstevel@tonic-gate ufsvfsp->vfs_lastwhinetime = lbolt; 1670Sstevel@tonic-gate cmn_err(CE_NOTE, "alloc: %s: file system full", fs->fs_fsmnt); 1680Sstevel@tonic-gate } 1690Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 1700Sstevel@tonic-gate return (ENOSPC); 1710Sstevel@tonic-gate } 1720Sstevel@tonic-gate 1730Sstevel@tonic-gate /* 1740Sstevel@tonic-gate * Reallocate a fragment to a bigger size 1750Sstevel@tonic-gate * 1760Sstevel@tonic-gate * The number and size of the old block is given, and a preference 1770Sstevel@tonic-gate * and new size is also specified. The allocator attempts to extend 1780Sstevel@tonic-gate * the original block. Failing that, the regular block allocator is 1790Sstevel@tonic-gate * invoked to get an appropriate block. 1800Sstevel@tonic-gate */ 1810Sstevel@tonic-gate int 1820Sstevel@tonic-gate realloccg(struct inode *ip, daddr_t bprev, daddr_t bpref, int osize, 1830Sstevel@tonic-gate int nsize, daddr_t *bnp, cred_t *cr) 1840Sstevel@tonic-gate { 1850Sstevel@tonic-gate daddr_t bno; 1860Sstevel@tonic-gate struct fs *fs; 1870Sstevel@tonic-gate struct ufsvfs *ufsvfsp; 1880Sstevel@tonic-gate int cg, request; 1890Sstevel@tonic-gate int err; 1900Sstevel@tonic-gate char *errmsg = NULL; 1910Sstevel@tonic-gate size_t len; 1920Sstevel@tonic-gate 1930Sstevel@tonic-gate ufsvfsp = ip->i_ufsvfs; 1940Sstevel@tonic-gate fs = ufsvfsp->vfs_fs; 1950Sstevel@tonic-gate if ((unsigned)osize > fs->fs_bsize || fragoff(fs, osize) != 0 || 1960Sstevel@tonic-gate (unsigned)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) { 1970Sstevel@tonic-gate err = ufs_fault(ITOV(ip), 198*923Ssdebnath "realloccg: bad size, dev=0x%lx, bsize=%d, " 199*923Ssdebnath "osize=%d, nsize=%d, fs=%s\n", 200*923Ssdebnath ip->i_dev, fs->fs_bsize, osize, nsize, fs->fs_fsmnt); 2010Sstevel@tonic-gate return (err); 2020Sstevel@tonic-gate } 2030Sstevel@tonic-gate if (freespace(fs, ufsvfsp) <= 0 && 2040Sstevel@tonic-gate secpolicy_fs_minfree(cr, ufsvfsp->vfs_vfs) != 0) 2050Sstevel@tonic-gate goto nospace; 2060Sstevel@tonic-gate if (bprev == 0) { 2070Sstevel@tonic-gate err = ufs_fault(ITOV(ip), 208*923Ssdebnath "realloccg: bad bprev, dev = 0x%lx, bsize = %d," 209*923Ssdebnath " bprev = %ld, fs = %s\n", ip->i_dev, fs->fs_bsize, bprev, 2100Sstevel@tonic-gate fs->fs_fsmnt); 2110Sstevel@tonic-gate return (err); 2120Sstevel@tonic-gate } 2130Sstevel@tonic-gate err = chkdq(ip, (long)btodb(nsize - osize), 0, cr, &errmsg, &len); 2140Sstevel@tonic-gate /* Note that may not have err, but may have errmsg */ 2150Sstevel@tonic-gate if (errmsg != NULL) { 2160Sstevel@tonic-gate uprintf(errmsg); 2170Sstevel@tonic-gate kmem_free(errmsg, len); 2180Sstevel@tonic-gate errmsg = NULL; 2190Sstevel@tonic-gate } 2200Sstevel@tonic-gate if (err) 2210Sstevel@tonic-gate return (err); 2220Sstevel@tonic-gate cg = dtog(fs, bprev); 2230Sstevel@tonic-gate bno = fragextend(ip, cg, (long)bprev, osize, nsize); 2240Sstevel@tonic-gate if (bno != 0) { 2250Sstevel@tonic-gate *bnp = bno; 2260Sstevel@tonic-gate return (0); 2270Sstevel@tonic-gate } 2280Sstevel@tonic-gate if (bpref >= fs->fs_size) 2290Sstevel@tonic-gate bpref = 0; 2300Sstevel@tonic-gate 2310Sstevel@tonic-gate /* 2320Sstevel@tonic-gate * When optimizing for time we allocate a full block and 2330Sstevel@tonic-gate * then only use the upper portion for this request. When 2340Sstevel@tonic-gate * this file grows again it will grow into the unused portion 2350Sstevel@tonic-gate * of the block (See fragextend() above). This saves time 2360Sstevel@tonic-gate * because an extra disk write would be needed if the frags 2370Sstevel@tonic-gate * following the current allocation were not free. The extra 2380Sstevel@tonic-gate * disk write is needed to move the data from its current 2390Sstevel@tonic-gate * location into the newly allocated position. 2400Sstevel@tonic-gate * 2410Sstevel@tonic-gate * When optimizing for space we allocate a run of frags 2420Sstevel@tonic-gate * that is just the right size for this request. 2430Sstevel@tonic-gate */ 2440Sstevel@tonic-gate request = (fs->fs_optim == FS_OPTTIME) ? fs->fs_bsize : nsize; 2450Sstevel@tonic-gate bno = (daddr_t)hashalloc(ip, cg, (long)bpref, request, 2460Sstevel@tonic-gate (ulong_t (*)())alloccg); 2470Sstevel@tonic-gate if (bno > 0) { 2480Sstevel@tonic-gate *bnp = bno; 2490Sstevel@tonic-gate if (nsize < request) 2500Sstevel@tonic-gate (void) free(ip, bno + numfrags(fs, nsize), 2510Sstevel@tonic-gate (off_t)(request - nsize), I_NOCANCEL); 2520Sstevel@tonic-gate return (0); 2530Sstevel@tonic-gate } 2540Sstevel@tonic-gate 2550Sstevel@tonic-gate /* 2560Sstevel@tonic-gate * hashalloc() failed because some other thread grabbed 2570Sstevel@tonic-gate * the last block so unwind the quota operation. We can 2580Sstevel@tonic-gate * ignore the return because subtractions don't fail, and 2590Sstevel@tonic-gate * our caller guarantees nsize >= osize. 2600Sstevel@tonic-gate */ 2610Sstevel@tonic-gate (void) chkdq(ip, -(long)btodb(nsize - osize), 0, cr, (char **)NULL, 2620Sstevel@tonic-gate (size_t *)NULL); 2630Sstevel@tonic-gate 2640Sstevel@tonic-gate nospace: 2650Sstevel@tonic-gate mutex_enter(&ufsvfsp->vfs_lock); 2660Sstevel@tonic-gate if ((lbolt - ufsvfsp->vfs_lastwhinetime) > (hz << 2) && 2670Sstevel@tonic-gate (!(TRANS_ISTRANS(ufsvfsp)) || !(ip->i_flag & IQUIET))) { 2680Sstevel@tonic-gate ufsvfsp->vfs_lastwhinetime = lbolt; 2690Sstevel@tonic-gate cmn_err(CE_NOTE, 2700Sstevel@tonic-gate "realloccg %s: file system full", fs->fs_fsmnt); 2710Sstevel@tonic-gate } 2720Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 2730Sstevel@tonic-gate return (ENOSPC); 2740Sstevel@tonic-gate } 2750Sstevel@tonic-gate 2760Sstevel@tonic-gate /* 2770Sstevel@tonic-gate * Allocate an inode in the file system. 2780Sstevel@tonic-gate * 2790Sstevel@tonic-gate * A preference may be optionally specified. If a preference is given 2800Sstevel@tonic-gate * the following hierarchy is used to allocate an inode: 2810Sstevel@tonic-gate * 1) allocate the requested inode. 2820Sstevel@tonic-gate * 2) allocate an inode in the same cylinder group. 2830Sstevel@tonic-gate * 3) quadratically rehash into other cylinder groups, until an 2840Sstevel@tonic-gate * available inode is located. 2850Sstevel@tonic-gate * If no inode preference is given the following hierarchy is used 2860Sstevel@tonic-gate * to allocate an inode: 2870Sstevel@tonic-gate * 1) allocate an inode in cylinder group 0. 2880Sstevel@tonic-gate * 2) quadratically rehash into other cylinder groups, until an 2890Sstevel@tonic-gate * available inode is located. 2900Sstevel@tonic-gate */ 2910Sstevel@tonic-gate int 2920Sstevel@tonic-gate ufs_ialloc(struct inode *pip, 2930Sstevel@tonic-gate ino_t ipref, mode_t mode, struct inode **ipp, cred_t *cr) 2940Sstevel@tonic-gate { 2950Sstevel@tonic-gate struct inode *ip; 2960Sstevel@tonic-gate struct fs *fs; 2970Sstevel@tonic-gate int cg; 2980Sstevel@tonic-gate ino_t ino; 2990Sstevel@tonic-gate int err; 3000Sstevel@tonic-gate int nifree; 3010Sstevel@tonic-gate struct ufsvfs *ufsvfsp = pip->i_ufsvfs; 3020Sstevel@tonic-gate char *errmsg = NULL; 3030Sstevel@tonic-gate size_t len; 3040Sstevel@tonic-gate 3050Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&pip->i_rwlock)); 3060Sstevel@tonic-gate fs = pip->i_fs; 3070Sstevel@tonic-gate loop: 3080Sstevel@tonic-gate nifree = fs->fs_cstotal.cs_nifree; 3090Sstevel@tonic-gate 3100Sstevel@tonic-gate if (nifree == 0) 3110Sstevel@tonic-gate goto noinodes; 3120Sstevel@tonic-gate /* 3130Sstevel@tonic-gate * Shadow inodes don't count against a user's inode allocation. 3140Sstevel@tonic-gate * They are an implementation method and not a resource. 3150Sstevel@tonic-gate */ 3160Sstevel@tonic-gate if ((mode != IFSHAD) && (mode != IFATTRDIR)) { 3170Sstevel@tonic-gate err = chkiq((struct ufsvfs *)ITOV(pip)->v_vfsp->vfs_data, 3180Sstevel@tonic-gate /* change */ 1, (struct inode *)NULL, crgetuid(cr), 0, 3190Sstevel@tonic-gate cr, &errmsg, &len); 3200Sstevel@tonic-gate /* 3210Sstevel@tonic-gate * As we haven't acquired any locks yet, dump the message 3220Sstevel@tonic-gate * now. 3230Sstevel@tonic-gate */ 3240Sstevel@tonic-gate if (errmsg != NULL) { 3250Sstevel@tonic-gate uprintf(errmsg); 3260Sstevel@tonic-gate kmem_free(errmsg, len); 3270Sstevel@tonic-gate errmsg = NULL; 3280Sstevel@tonic-gate } 3290Sstevel@tonic-gate if (err) 3300Sstevel@tonic-gate return (err); 3310Sstevel@tonic-gate } 3320Sstevel@tonic-gate 3330Sstevel@tonic-gate if (ipref >= (ulong_t)(fs->fs_ncg * fs->fs_ipg)) 3340Sstevel@tonic-gate ipref = 0; 3350Sstevel@tonic-gate cg = (int)itog(fs, ipref); 3360Sstevel@tonic-gate ino = (ino_t)hashalloc(pip, cg, (long)ipref, (int)mode, 3370Sstevel@tonic-gate (ulong_t (*)())ialloccg); 3380Sstevel@tonic-gate if (ino == 0) { 3390Sstevel@tonic-gate if ((mode != IFSHAD) && (mode != IFATTRDIR)) { 3400Sstevel@tonic-gate /* 3410Sstevel@tonic-gate * We can safely ignore the return from chkiq() 3420Sstevel@tonic-gate * because deallocations can only fail if we 3430Sstevel@tonic-gate * can't get the user's quota info record off 3440Sstevel@tonic-gate * the disk due to an I/O error. In that case, 3450Sstevel@tonic-gate * the quota subsystem is already messed up. 3460Sstevel@tonic-gate */ 3470Sstevel@tonic-gate (void) chkiq(ufsvfsp, /* change */ -1, 3480Sstevel@tonic-gate (struct inode *)NULL, crgetuid(cr), 0, cr, 3490Sstevel@tonic-gate (char **)NULL, (size_t *)NULL); 3500Sstevel@tonic-gate } 3510Sstevel@tonic-gate goto noinodes; 3520Sstevel@tonic-gate } 3530Sstevel@tonic-gate err = ufs_iget(pip->i_vfs, ino, ipp, cr); 3540Sstevel@tonic-gate if (err) { 3550Sstevel@tonic-gate if ((mode != IFSHAD) && (mode != IFATTRDIR)) { 3560Sstevel@tonic-gate /* 3570Sstevel@tonic-gate * See above comment about why it is safe to ignore an 3580Sstevel@tonic-gate * error return here. 3590Sstevel@tonic-gate */ 3600Sstevel@tonic-gate (void) chkiq(ufsvfsp, /* change */ -1, 3610Sstevel@tonic-gate (struct inode *)NULL, crgetuid(cr), 0, cr, 3620Sstevel@tonic-gate (char **)NULL, (size_t *)NULL); 3630Sstevel@tonic-gate } 3640Sstevel@tonic-gate ufs_ifree(pip, ino, 0); 3650Sstevel@tonic-gate return (err); 3660Sstevel@tonic-gate } 3670Sstevel@tonic-gate ip = *ipp; 3680Sstevel@tonic-gate ASSERT(!ip->i_ufs_acl); 3690Sstevel@tonic-gate ASSERT(!ip->i_dquot); 3700Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_WRITER); 3710Sstevel@tonic-gate 3720Sstevel@tonic-gate /* 3730Sstevel@tonic-gate * Check if we really got a free inode, if not then complain 3740Sstevel@tonic-gate * and mark the inode ISTALE so that it will be freed by the 3750Sstevel@tonic-gate * ufs idle thread eventually and will not be sent to ufs_delete(). 3760Sstevel@tonic-gate */ 3770Sstevel@tonic-gate if (ip->i_mode || (ip->i_nlink > 0)) { 3780Sstevel@tonic-gate ip->i_flag |= ISTALE; 3790Sstevel@tonic-gate rw_exit(&ip->i_contents); 3800Sstevel@tonic-gate VN_RELE(ITOV(ip)); 3810Sstevel@tonic-gate cmn_err(CE_WARN, 3820Sstevel@tonic-gate "%s: unexpected allocated inode %d, run fsck(1M)%s", 3830Sstevel@tonic-gate fs->fs_fsmnt, (int)ino, 3840Sstevel@tonic-gate (TRANS_ISTRANS(ufsvfsp) ? " -o f" : "")); 3850Sstevel@tonic-gate goto loop; 3860Sstevel@tonic-gate } 3870Sstevel@tonic-gate 3880Sstevel@tonic-gate /* 3890Sstevel@tonic-gate * Check the inode has no size or data blocks. 3900Sstevel@tonic-gate * This could have happened if the truncation failed when 3910Sstevel@tonic-gate * deleting the inode. It used to be possible for this to occur 3920Sstevel@tonic-gate * if a block allocation failed when iteratively truncating a 3930Sstevel@tonic-gate * large file using logging and with a full file system. 3940Sstevel@tonic-gate * This was fixed with bug fix 4348738. However, truncation may 3950Sstevel@tonic-gate * still fail on an IO error. So in all cases for safety and 3960Sstevel@tonic-gate * security we clear out the size; the blocks allocated; and 3970Sstevel@tonic-gate * pointers to the blocks. This will ultimately cause a fsck 3980Sstevel@tonic-gate * error of un-accounted for blocks, but its a fairly benign error, 3990Sstevel@tonic-gate * and possibly the correct thing to do anyway as accesssing those 4000Sstevel@tonic-gate * blocks agains may lead to more IO errors. 4010Sstevel@tonic-gate */ 4020Sstevel@tonic-gate if (ip->i_size || ip->i_blocks) { 4030Sstevel@tonic-gate int i; 4040Sstevel@tonic-gate 4050Sstevel@tonic-gate if (ip->i_size) { 4060Sstevel@tonic-gate cmn_err(CE_WARN, 407*923Ssdebnath "%s: free inode %d had size 0x%llx, run fsck(1M)%s", 408*923Ssdebnath fs->fs_fsmnt, (int)ino, ip->i_size, 409*923Ssdebnath (TRANS_ISTRANS(ufsvfsp) ? " -o f" : "")); 4100Sstevel@tonic-gate } 4110Sstevel@tonic-gate /* 4120Sstevel@tonic-gate * Clear any garbage left behind. 4130Sstevel@tonic-gate */ 4140Sstevel@tonic-gate ip->i_size = (u_offset_t)0; 4150Sstevel@tonic-gate ip->i_blocks = 0; 4160Sstevel@tonic-gate for (i = 0; i < NDADDR; i++) 4170Sstevel@tonic-gate ip->i_db[i] = 0; 4180Sstevel@tonic-gate for (i = 0; i < NIADDR; i++) 4190Sstevel@tonic-gate ip->i_ib[i] = 0; 4200Sstevel@tonic-gate } 4210Sstevel@tonic-gate 4220Sstevel@tonic-gate /* 4230Sstevel@tonic-gate * Initialize the link count 4240Sstevel@tonic-gate */ 4250Sstevel@tonic-gate ip->i_nlink = 0; 4260Sstevel@tonic-gate 4270Sstevel@tonic-gate /* 4280Sstevel@tonic-gate * Clear the old flags 4290Sstevel@tonic-gate */ 4300Sstevel@tonic-gate ip->i_flag &= IREF; 4310Sstevel@tonic-gate 4320Sstevel@tonic-gate /* 4330Sstevel@tonic-gate * Access times are not really defined if the fs is mounted 4340Sstevel@tonic-gate * with 'noatime'. But it can cause nfs clients to fail 4350Sstevel@tonic-gate * open() if the atime is not a legal value. Set a legal value 4360Sstevel@tonic-gate * here when the inode is allocated. 4370Sstevel@tonic-gate */ 4380Sstevel@tonic-gate if (ufsvfsp->vfs_noatime) { 4390Sstevel@tonic-gate mutex_enter(&ufs_iuniqtime_lock); 4400Sstevel@tonic-gate ip->i_atime = iuniqtime; 4410Sstevel@tonic-gate mutex_exit(&ufs_iuniqtime_lock); 4420Sstevel@tonic-gate } 4430Sstevel@tonic-gate rw_exit(&ip->i_contents); 4440Sstevel@tonic-gate return (0); 4450Sstevel@tonic-gate noinodes: 4460Sstevel@tonic-gate if (!(TRANS_ISTRANS(ufsvfsp)) || !(pip->i_flag & IQUIET)) 4470Sstevel@tonic-gate cmn_err(CE_NOTE, "%s: out of inodes\n", fs->fs_fsmnt); 4480Sstevel@tonic-gate return (ENOSPC); 4490Sstevel@tonic-gate } 4500Sstevel@tonic-gate 4510Sstevel@tonic-gate /* 4520Sstevel@tonic-gate * Find a cylinder group to place a directory. 4530Sstevel@tonic-gate * Returns an inumber within the selected cylinder group. 4540Sstevel@tonic-gate * Note, the vfs_lock is not needed as we don't require exact cg summary info. 4550Sstevel@tonic-gate * 4560Sstevel@tonic-gate * If the switch ufs_close_dirs is set, then the policy is to use 4570Sstevel@tonic-gate * the current cg if it has more than 25% free inodes and more 4580Sstevel@tonic-gate * than 25% free blocks. Otherwise the cgs are searched from 4590Sstevel@tonic-gate * the beginning and the first cg with the same criteria is 4600Sstevel@tonic-gate * used. If that is also null then we revert to the old algorithm. 4610Sstevel@tonic-gate * This tends to cluster files at the beginning of the disk 4620Sstevel@tonic-gate * until the disk gets full. 4630Sstevel@tonic-gate * 4640Sstevel@tonic-gate * Otherwise if ufs_close_dirs is not set then the original policy is 4650Sstevel@tonic-gate * used which is to select from among those cylinder groups with 4660Sstevel@tonic-gate * above the average number of free inodes, the one with the smallest 4670Sstevel@tonic-gate * number of directories. 4680Sstevel@tonic-gate */ 4690Sstevel@tonic-gate 4700Sstevel@tonic-gate int ufs_close_dirs = 1; /* allocate directories close as possible */ 4710Sstevel@tonic-gate 4720Sstevel@tonic-gate ino_t 4730Sstevel@tonic-gate dirpref(inode_t *dp) 4740Sstevel@tonic-gate { 4750Sstevel@tonic-gate int cg, minndir, mincg, avgifree, mininode, minbpg, ifree; 4760Sstevel@tonic-gate struct fs *fs = dp->i_fs; 4770Sstevel@tonic-gate 4780Sstevel@tonic-gate cg = itog(fs, dp->i_number); 4790Sstevel@tonic-gate mininode = fs->fs_ipg >> 2; 4800Sstevel@tonic-gate minbpg = fs->fs_maxbpg >> 2; 4810Sstevel@tonic-gate if (ufs_close_dirs && 4820Sstevel@tonic-gate (fs->fs_cs(fs, cg).cs_nifree > mininode) && 4830Sstevel@tonic-gate (fs->fs_cs(fs, cg).cs_nbfree > minbpg)) { 4840Sstevel@tonic-gate return (dp->i_number); 4850Sstevel@tonic-gate } 4860Sstevel@tonic-gate 4870Sstevel@tonic-gate avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg; 4880Sstevel@tonic-gate minndir = fs->fs_ipg; 4890Sstevel@tonic-gate mincg = 0; 4900Sstevel@tonic-gate for (cg = 0; cg < fs->fs_ncg; cg++) { 4910Sstevel@tonic-gate ifree = fs->fs_cs(fs, cg).cs_nifree; 4920Sstevel@tonic-gate if (ufs_close_dirs && 4930Sstevel@tonic-gate (ifree > mininode) && 4940Sstevel@tonic-gate (fs->fs_cs(fs, cg).cs_nbfree > minbpg)) { 4950Sstevel@tonic-gate return ((ino_t)(fs->fs_ipg * cg)); 4960Sstevel@tonic-gate } 4970Sstevel@tonic-gate if ((fs->fs_cs(fs, cg).cs_ndir < minndir) && 4980Sstevel@tonic-gate (ifree >= avgifree)) { 4990Sstevel@tonic-gate mincg = cg; 5000Sstevel@tonic-gate minndir = fs->fs_cs(fs, cg).cs_ndir; 5010Sstevel@tonic-gate } 5020Sstevel@tonic-gate } 5030Sstevel@tonic-gate return ((ino_t)(fs->fs_ipg * mincg)); 5040Sstevel@tonic-gate } 5050Sstevel@tonic-gate 5060Sstevel@tonic-gate /* 5070Sstevel@tonic-gate * Select the desired position for the next block in a file. The file is 5080Sstevel@tonic-gate * logically divided into sections. The first section is composed of the 5090Sstevel@tonic-gate * direct blocks. Each additional section contains fs_maxbpg blocks. 5100Sstevel@tonic-gate * 5110Sstevel@tonic-gate * If no blocks have been allocated in the first section, the policy is to 5120Sstevel@tonic-gate * request a block in the same cylinder group as the inode that describes 5130Sstevel@tonic-gate * the file. If no blocks have been allocated in any other section, the 5140Sstevel@tonic-gate * policy is to place the section in a cylinder group with a greater than 5150Sstevel@tonic-gate * average number of free blocks. An appropriate cylinder group is found 5160Sstevel@tonic-gate * by using a rotor that sweeps the cylinder groups. When a new group of 5170Sstevel@tonic-gate * blocks is needed, the sweep begins in the cylinder group following the 5180Sstevel@tonic-gate * cylinder group from which the previous allocation was made. The sweep 5190Sstevel@tonic-gate * continues until a cylinder group with greater than the average number 5200Sstevel@tonic-gate * of free blocks is found. If the allocation is for the first block in an 5210Sstevel@tonic-gate * indirect block, the information on the previous allocation is unavailable; 5220Sstevel@tonic-gate * here a best guess is made based upon the logical block number being 5230Sstevel@tonic-gate * allocated. 5240Sstevel@tonic-gate * 5250Sstevel@tonic-gate * If a section is already partially allocated, the policy is to 5260Sstevel@tonic-gate * contiguously allocate fs_maxcontig blocks. The end of one of these 5270Sstevel@tonic-gate * contiguous blocks and the beginning of the next is physically separated 5280Sstevel@tonic-gate * so that the disk head will be in transit between them for at least 5290Sstevel@tonic-gate * fs_rotdelay milliseconds. This is to allow time for the processor to 5300Sstevel@tonic-gate * schedule another I/O transfer. 5310Sstevel@tonic-gate */ 5320Sstevel@tonic-gate daddr_t 5330Sstevel@tonic-gate blkpref(struct inode *ip, daddr_t lbn, int indx, daddr32_t *bap) 5340Sstevel@tonic-gate { 5350Sstevel@tonic-gate struct fs *fs; 5360Sstevel@tonic-gate struct ufsvfs *ufsvfsp; 5370Sstevel@tonic-gate int cg; 5380Sstevel@tonic-gate int avgbfree, startcg; 5390Sstevel@tonic-gate daddr_t nextblk; 5400Sstevel@tonic-gate 5410Sstevel@tonic-gate ufsvfsp = ip->i_ufsvfs; 5420Sstevel@tonic-gate fs = ip->i_fs; 5430Sstevel@tonic-gate if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { 5440Sstevel@tonic-gate if (lbn < NDADDR) { 5450Sstevel@tonic-gate cg = itog(fs, ip->i_number); 5460Sstevel@tonic-gate return (fs->fs_fpg * cg + fs->fs_frag); 5470Sstevel@tonic-gate } 5480Sstevel@tonic-gate /* 5490Sstevel@tonic-gate * Find a cylinder with greater than average 5500Sstevel@tonic-gate * number of unused data blocks. 5510Sstevel@tonic-gate */ 5520Sstevel@tonic-gate if (indx == 0 || bap[indx - 1] == 0) 5530Sstevel@tonic-gate startcg = itog(fs, ip->i_number) + lbn / fs->fs_maxbpg; 5540Sstevel@tonic-gate else 5550Sstevel@tonic-gate startcg = dtog(fs, bap[indx - 1]) + 1; 5560Sstevel@tonic-gate startcg %= fs->fs_ncg; 5570Sstevel@tonic-gate 5580Sstevel@tonic-gate mutex_enter(&ufsvfsp->vfs_lock); 5590Sstevel@tonic-gate avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 5600Sstevel@tonic-gate /* 5610Sstevel@tonic-gate * used for computing log space for writes/truncs 5620Sstevel@tonic-gate */ 5630Sstevel@tonic-gate ufsvfsp->vfs_avgbfree = avgbfree; 5640Sstevel@tonic-gate for (cg = startcg; cg < fs->fs_ncg; cg++) 5650Sstevel@tonic-gate if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 5660Sstevel@tonic-gate fs->fs_cgrotor = cg; 5670Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 5680Sstevel@tonic-gate return (fs->fs_fpg * cg + fs->fs_frag); 5690Sstevel@tonic-gate } 5700Sstevel@tonic-gate for (cg = 0; cg <= startcg; cg++) 5710Sstevel@tonic-gate if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 5720Sstevel@tonic-gate fs->fs_cgrotor = cg; 5730Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 5740Sstevel@tonic-gate return (fs->fs_fpg * cg + fs->fs_frag); 5750Sstevel@tonic-gate } 5760Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 5770Sstevel@tonic-gate return (NULL); 5780Sstevel@tonic-gate } 5790Sstevel@tonic-gate /* 5800Sstevel@tonic-gate * One or more previous blocks have been laid out. If less 5810Sstevel@tonic-gate * than fs_maxcontig previous blocks are contiguous, the 5820Sstevel@tonic-gate * next block is requested contiguously, otherwise it is 5830Sstevel@tonic-gate * requested rotationally delayed by fs_rotdelay milliseconds. 5840Sstevel@tonic-gate */ 585*923Ssdebnath 586*923Ssdebnath nextblk = bap[indx - 1]; 587*923Ssdebnath /* 588*923Ssdebnath * Provision for fallocate to return positive 589*923Ssdebnath * blk preference based on last allocation 590*923Ssdebnath */ 591*923Ssdebnath if (nextblk < 0 && nextblk != UFS_HOLE) { 592*923Ssdebnath nextblk = (-bap[indx - 1]) + fs->fs_frag; 593*923Ssdebnath } else { 594*923Ssdebnath nextblk = bap[indx - 1] + fs->fs_frag; 595*923Ssdebnath } 596*923Ssdebnath 597*923Ssdebnath if (indx > fs->fs_maxcontig && bap[indx - fs->fs_maxcontig] + 598*923Ssdebnath blkstofrags(fs, fs->fs_maxcontig) != nextblk) { 5990Sstevel@tonic-gate return (nextblk); 600*923Ssdebnath } 6010Sstevel@tonic-gate if (fs->fs_rotdelay != 0) 6020Sstevel@tonic-gate /* 6030Sstevel@tonic-gate * Here we convert ms of delay to frags as: 6040Sstevel@tonic-gate * (frags) = (ms) * (rev/sec) * (sect/rev) / 605*923Ssdebnath * ((sect/frag) * (ms/sec)) 6060Sstevel@tonic-gate * then round up to the next block. 6070Sstevel@tonic-gate */ 6080Sstevel@tonic-gate nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect / 6090Sstevel@tonic-gate (NSPF(fs) * 1000), fs->fs_frag); 6100Sstevel@tonic-gate return (nextblk); 6110Sstevel@tonic-gate } 6120Sstevel@tonic-gate 6130Sstevel@tonic-gate /* 6140Sstevel@tonic-gate * Free a block or fragment. 6150Sstevel@tonic-gate * 6160Sstevel@tonic-gate * The specified block or fragment is placed back in the 6170Sstevel@tonic-gate * free map. If a fragment is deallocated, a possible 6180Sstevel@tonic-gate * block reassembly is checked. 6190Sstevel@tonic-gate */ 6200Sstevel@tonic-gate void 6210Sstevel@tonic-gate free(struct inode *ip, daddr_t bno, off_t size, int flags) 6220Sstevel@tonic-gate { 6230Sstevel@tonic-gate struct fs *fs = ip->i_fs; 6240Sstevel@tonic-gate struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 625512Sjkennedy struct ufs_q *delq = &ufsvfsp->vfs_delete; 626512Sjkennedy struct ufs_delq_info *delq_info = &ufsvfsp->vfs_delete_info; 6270Sstevel@tonic-gate struct cg *cgp; 6280Sstevel@tonic-gate struct buf *bp; 6290Sstevel@tonic-gate int cg, bmap, bbase; 6300Sstevel@tonic-gate int i; 6310Sstevel@tonic-gate uchar_t *blksfree; 6320Sstevel@tonic-gate int *blktot; 6330Sstevel@tonic-gate short *blks; 6340Sstevel@tonic-gate daddr_t blkno, cylno, rpos; 6350Sstevel@tonic-gate 636*923Ssdebnath /* 637*923Ssdebnath * fallocate'd files will have negative block address. 638*923Ssdebnath * So negate it again to get original block address. 639*923Ssdebnath */ 640*923Ssdebnath if (bno < 0 && bno % fs->fs_bsize == 0 && bno != UFS_HOLE) { 641*923Ssdebnath bno = -bno; 642*923Ssdebnath } 643*923Ssdebnath 6440Sstevel@tonic-gate if ((unsigned long)size > fs->fs_bsize || fragoff(fs, size) != 0) { 6450Sstevel@tonic-gate (void) ufs_fault(ITOV(ip), 646*923Ssdebnath "free: bad size, dev = 0x%lx, bsize = %d, size = %d, " 647*923Ssdebnath "fs = %s\n", ip->i_dev, fs->fs_bsize, 648*923Ssdebnath (int)size, fs->fs_fsmnt); 6490Sstevel@tonic-gate return; 6500Sstevel@tonic-gate } 6510Sstevel@tonic-gate cg = dtog(fs, bno); 6520Sstevel@tonic-gate ASSERT(!ufs_badblock(ip, bno)); 6530Sstevel@tonic-gate bp = UFS_BREAD(ufsvfsp, ip->i_dev, (daddr_t)fsbtodb(fs, cgtod(fs, cg)), 654*923Ssdebnath (int)fs->fs_cgsize); 6550Sstevel@tonic-gate 6560Sstevel@tonic-gate cgp = bp->b_un.b_cg; 6570Sstevel@tonic-gate if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp)) { 6580Sstevel@tonic-gate brelse(bp); 6590Sstevel@tonic-gate return; 6600Sstevel@tonic-gate } 6610Sstevel@tonic-gate 6620Sstevel@tonic-gate if (!(flags & I_NOCANCEL)) 6630Sstevel@tonic-gate TRANS_CANCEL(ufsvfsp, ldbtob(fsbtodb(fs, bno)), size, flags); 6640Sstevel@tonic-gate if (flags & (I_DIR|I_IBLK|I_SHAD|I_QUOTA)) { 6650Sstevel@tonic-gate TRANS_MATA_FREE(ufsvfsp, ldbtob(fsbtodb(fs, bno)), size); 6660Sstevel@tonic-gate } 6670Sstevel@tonic-gate blksfree = cg_blksfree(cgp); 6680Sstevel@tonic-gate blktot = cg_blktot(cgp); 6690Sstevel@tonic-gate mutex_enter(&ufsvfsp->vfs_lock); 6700Sstevel@tonic-gate cgp->cg_time = gethrestime_sec(); 6710Sstevel@tonic-gate bno = dtogd(fs, bno); 6720Sstevel@tonic-gate if (size == fs->fs_bsize) { 6730Sstevel@tonic-gate blkno = fragstoblks(fs, bno); 6740Sstevel@tonic-gate cylno = cbtocylno(fs, bno); 6750Sstevel@tonic-gate rpos = cbtorpos(ufsvfsp, bno); 6760Sstevel@tonic-gate blks = cg_blks(ufsvfsp, cgp, cylno); 6770Sstevel@tonic-gate if (!isclrblock(fs, blksfree, blkno)) { 6780Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 6790Sstevel@tonic-gate brelse(bp); 6800Sstevel@tonic-gate (void) ufs_fault(ITOV(ip), "free: freeing free block, " 6810Sstevel@tonic-gate "dev:0x%lx, block:%ld, ino:%lu, fs:%s", 6820Sstevel@tonic-gate ip->i_dev, bno, ip->i_number, fs->fs_fsmnt); 6830Sstevel@tonic-gate return; 6840Sstevel@tonic-gate } 6850Sstevel@tonic-gate setblock(fs, blksfree, blkno); 6860Sstevel@tonic-gate blks[rpos]++; 6870Sstevel@tonic-gate blktot[cylno]++; 6880Sstevel@tonic-gate cgp->cg_cs.cs_nbfree++; /* Log below */ 6890Sstevel@tonic-gate fs->fs_cstotal.cs_nbfree++; 6900Sstevel@tonic-gate fs->fs_cs(fs, cg).cs_nbfree++; 691512Sjkennedy if (TRANS_ISTRANS(ufsvfsp) && (flags & I_ACCT)) { 692512Sjkennedy mutex_enter(&delq->uq_mutex); 693512Sjkennedy delq_info->delq_unreclaimed_blocks -= 694512Sjkennedy btodb(fs->fs_bsize); 695512Sjkennedy mutex_exit(&delq->uq_mutex); 696512Sjkennedy } 6970Sstevel@tonic-gate } else { 6980Sstevel@tonic-gate bbase = bno - fragnum(fs, bno); 6990Sstevel@tonic-gate /* 7000Sstevel@tonic-gate * Decrement the counts associated with the old frags 7010Sstevel@tonic-gate */ 7020Sstevel@tonic-gate bmap = blkmap(fs, blksfree, bbase); 7030Sstevel@tonic-gate fragacct(fs, bmap, cgp->cg_frsum, -1); 7040Sstevel@tonic-gate /* 7050Sstevel@tonic-gate * Deallocate the fragment 7060Sstevel@tonic-gate */ 7070Sstevel@tonic-gate for (i = 0; i < numfrags(fs, size); i++) { 7080Sstevel@tonic-gate if (isset(blksfree, bno + i)) { 7090Sstevel@tonic-gate brelse(bp); 7100Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 7110Sstevel@tonic-gate (void) ufs_fault(ITOV(ip), 7120Sstevel@tonic-gate "free: freeing free frag, " 7130Sstevel@tonic-gate "dev:0x%lx, blk:%ld, cg:%d, " 7140Sstevel@tonic-gate "ino:%lu, fs:%s", 7150Sstevel@tonic-gate ip->i_dev, 7160Sstevel@tonic-gate bno + i, 7170Sstevel@tonic-gate cgp->cg_cgx, 7180Sstevel@tonic-gate ip->i_number, 7190Sstevel@tonic-gate fs->fs_fsmnt); 7200Sstevel@tonic-gate return; 7210Sstevel@tonic-gate } 7220Sstevel@tonic-gate setbit(blksfree, bno + i); 7230Sstevel@tonic-gate } 7240Sstevel@tonic-gate cgp->cg_cs.cs_nffree += i; 7250Sstevel@tonic-gate fs->fs_cstotal.cs_nffree += i; 7260Sstevel@tonic-gate fs->fs_cs(fs, cg).cs_nffree += i; 727512Sjkennedy if (TRANS_ISTRANS(ufsvfsp) && (flags & I_ACCT)) { 728512Sjkennedy mutex_enter(&delq->uq_mutex); 729512Sjkennedy delq_info->delq_unreclaimed_blocks -= 730512Sjkennedy btodb(i * fs->fs_fsize); 731512Sjkennedy mutex_exit(&delq->uq_mutex); 732512Sjkennedy } 7330Sstevel@tonic-gate /* 7340Sstevel@tonic-gate * Add back in counts associated with the new frags 7350Sstevel@tonic-gate */ 7360Sstevel@tonic-gate bmap = blkmap(fs, blksfree, bbase); 7370Sstevel@tonic-gate fragacct(fs, bmap, cgp->cg_frsum, 1); 7380Sstevel@tonic-gate /* 7390Sstevel@tonic-gate * If a complete block has been reassembled, account for it 7400Sstevel@tonic-gate */ 7410Sstevel@tonic-gate blkno = fragstoblks(fs, bbase); 7420Sstevel@tonic-gate if (isblock(fs, blksfree, blkno)) { 7430Sstevel@tonic-gate cylno = cbtocylno(fs, bbase); 7440Sstevel@tonic-gate rpos = cbtorpos(ufsvfsp, bbase); 7450Sstevel@tonic-gate blks = cg_blks(ufsvfsp, cgp, cylno); 7460Sstevel@tonic-gate blks[rpos]++; 7470Sstevel@tonic-gate blktot[cylno]++; 7480Sstevel@tonic-gate cgp->cg_cs.cs_nffree -= fs->fs_frag; 7490Sstevel@tonic-gate fs->fs_cstotal.cs_nffree -= fs->fs_frag; 7500Sstevel@tonic-gate fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag; 7510Sstevel@tonic-gate cgp->cg_cs.cs_nbfree++; 7520Sstevel@tonic-gate fs->fs_cstotal.cs_nbfree++; 7530Sstevel@tonic-gate fs->fs_cs(fs, cg).cs_nbfree++; 7540Sstevel@tonic-gate } 7550Sstevel@tonic-gate } 7560Sstevel@tonic-gate fs->fs_fmod = 1; 7570Sstevel@tonic-gate ufs_notclean(ufsvfsp); 7580Sstevel@tonic-gate TRANS_BUF(ufsvfsp, 0, fs->fs_cgsize, bp, DT_CG); 7590Sstevel@tonic-gate TRANS_SI(ufsvfsp, fs, cg); 7600Sstevel@tonic-gate bdrwrite(bp); 7610Sstevel@tonic-gate } 7620Sstevel@tonic-gate 7630Sstevel@tonic-gate /* 7640Sstevel@tonic-gate * Free an inode. 7650Sstevel@tonic-gate * 7660Sstevel@tonic-gate * The specified inode is placed back in the free map. 7670Sstevel@tonic-gate */ 7680Sstevel@tonic-gate void 7690Sstevel@tonic-gate ufs_ifree(struct inode *ip, ino_t ino, mode_t mode) 7700Sstevel@tonic-gate { 7710Sstevel@tonic-gate struct fs *fs = ip->i_fs; 7720Sstevel@tonic-gate struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 7730Sstevel@tonic-gate struct cg *cgp; 7740Sstevel@tonic-gate struct buf *bp; 7750Sstevel@tonic-gate unsigned int inot; 7760Sstevel@tonic-gate int cg; 7770Sstevel@tonic-gate char *iused; 7780Sstevel@tonic-gate 7790Sstevel@tonic-gate if (ip->i_number == ino && ip->i_mode != 0) { 7800Sstevel@tonic-gate (void) ufs_fault(ITOV(ip), 7810Sstevel@tonic-gate "ufs_ifree: illegal mode: (imode) %o, (omode) %o, ino %d, " 7820Sstevel@tonic-gate "fs = %s\n", 7830Sstevel@tonic-gate ip->i_mode, mode, (int)ip->i_number, fs->fs_fsmnt); 7840Sstevel@tonic-gate return; 7850Sstevel@tonic-gate } 7860Sstevel@tonic-gate if (ino >= fs->fs_ipg * fs->fs_ncg) { 7870Sstevel@tonic-gate (void) ufs_fault(ITOV(ip), 7880Sstevel@tonic-gate "ifree: range, dev = 0x%x, ino = %d, fs = %s\n", 7890Sstevel@tonic-gate (int)ip->i_dev, (int)ino, fs->fs_fsmnt); 7900Sstevel@tonic-gate return; 7910Sstevel@tonic-gate } 7920Sstevel@tonic-gate cg = (int)itog(fs, ino); 7930Sstevel@tonic-gate bp = UFS_BREAD(ufsvfsp, ip->i_dev, (daddr_t)fsbtodb(fs, cgtod(fs, cg)), 794*923Ssdebnath (int)fs->fs_cgsize); 7950Sstevel@tonic-gate 7960Sstevel@tonic-gate cgp = bp->b_un.b_cg; 7970Sstevel@tonic-gate if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp)) { 7980Sstevel@tonic-gate brelse(bp); 7990Sstevel@tonic-gate return; 8000Sstevel@tonic-gate } 8010Sstevel@tonic-gate mutex_enter(&ufsvfsp->vfs_lock); 8020Sstevel@tonic-gate cgp->cg_time = gethrestime_sec(); 8030Sstevel@tonic-gate iused = cg_inosused(cgp); 8040Sstevel@tonic-gate inot = (unsigned int)(ino % (ulong_t)fs->fs_ipg); 8050Sstevel@tonic-gate if (isclr(iused, inot)) { 8060Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 8070Sstevel@tonic-gate brelse(bp); 8080Sstevel@tonic-gate (void) ufs_fault(ITOV(ip), "ufs_ifree: freeing free inode, " 809*923Ssdebnath "mode: (imode) %o, (omode) %o, ino:%d, " 810*923Ssdebnath "fs:%s", 811*923Ssdebnath ip->i_mode, mode, (int)ino, fs->fs_fsmnt); 8120Sstevel@tonic-gate return; 8130Sstevel@tonic-gate } 8140Sstevel@tonic-gate clrbit(iused, inot); 8150Sstevel@tonic-gate 8160Sstevel@tonic-gate if (inot < (ulong_t)cgp->cg_irotor) 8170Sstevel@tonic-gate cgp->cg_irotor = inot; 8180Sstevel@tonic-gate cgp->cg_cs.cs_nifree++; 8190Sstevel@tonic-gate fs->fs_cstotal.cs_nifree++; 8200Sstevel@tonic-gate fs->fs_cs(fs, cg).cs_nifree++; 8210Sstevel@tonic-gate if (((mode & IFMT) == IFDIR) || ((mode & IFMT) == IFATTRDIR)) { 8220Sstevel@tonic-gate cgp->cg_cs.cs_ndir--; 8230Sstevel@tonic-gate fs->fs_cstotal.cs_ndir--; 8240Sstevel@tonic-gate fs->fs_cs(fs, cg).cs_ndir--; 8250Sstevel@tonic-gate } 8260Sstevel@tonic-gate fs->fs_fmod = 1; 8270Sstevel@tonic-gate ufs_notclean(ufsvfsp); 8280Sstevel@tonic-gate TRANS_BUF(ufsvfsp, 0, fs->fs_cgsize, bp, DT_CG); 8290Sstevel@tonic-gate TRANS_SI(ufsvfsp, fs, cg); 8300Sstevel@tonic-gate bdrwrite(bp); 8310Sstevel@tonic-gate } 8320Sstevel@tonic-gate 8330Sstevel@tonic-gate /* 8340Sstevel@tonic-gate * Implement the cylinder overflow algorithm. 8350Sstevel@tonic-gate * 8360Sstevel@tonic-gate * The policy implemented by this algorithm is: 8370Sstevel@tonic-gate * 1) allocate the block in its requested cylinder group. 8380Sstevel@tonic-gate * 2) quadratically rehash on the cylinder group number. 8390Sstevel@tonic-gate * 3) brute force search for a free block. 8400Sstevel@tonic-gate * The size parameter means size for data blocks, mode for inodes. 8410Sstevel@tonic-gate */ 8420Sstevel@tonic-gate static ino_t 8430Sstevel@tonic-gate hashalloc(struct inode *ip, int cg, long pref, int size, ulong_t (*allocator)()) 8440Sstevel@tonic-gate { 8450Sstevel@tonic-gate struct fs *fs; 8460Sstevel@tonic-gate int i; 8470Sstevel@tonic-gate long result; 8480Sstevel@tonic-gate int icg = cg; 8490Sstevel@tonic-gate 8500Sstevel@tonic-gate fs = ip->i_fs; 8510Sstevel@tonic-gate /* 8520Sstevel@tonic-gate * 1: preferred cylinder group 8530Sstevel@tonic-gate */ 8540Sstevel@tonic-gate result = (*allocator)(ip, cg, pref, size); 8550Sstevel@tonic-gate if (result) 8560Sstevel@tonic-gate return (result); 8570Sstevel@tonic-gate /* 8580Sstevel@tonic-gate * 2: quadratic rehash 8590Sstevel@tonic-gate */ 8600Sstevel@tonic-gate for (i = 1; i < fs->fs_ncg; i *= 2) { 8610Sstevel@tonic-gate cg += i; 8620Sstevel@tonic-gate if (cg >= fs->fs_ncg) 8630Sstevel@tonic-gate cg -= fs->fs_ncg; 8640Sstevel@tonic-gate result = (*allocator)(ip, cg, 0, size); 8650Sstevel@tonic-gate if (result) 8660Sstevel@tonic-gate return (result); 8670Sstevel@tonic-gate } 8680Sstevel@tonic-gate /* 8690Sstevel@tonic-gate * 3: brute force search 8700Sstevel@tonic-gate * Note that we start at i == 2, since 0 was checked initially, 8710Sstevel@tonic-gate * and 1 is always checked in the quadratic rehash. 8720Sstevel@tonic-gate */ 8730Sstevel@tonic-gate cg = (icg + 2) % fs->fs_ncg; 8740Sstevel@tonic-gate for (i = 2; i < fs->fs_ncg; i++) { 8750Sstevel@tonic-gate result = (*allocator)(ip, cg, 0, size); 8760Sstevel@tonic-gate if (result) 8770Sstevel@tonic-gate return (result); 8780Sstevel@tonic-gate cg++; 8790Sstevel@tonic-gate if (cg == fs->fs_ncg) 8800Sstevel@tonic-gate cg = 0; 8810Sstevel@tonic-gate } 8820Sstevel@tonic-gate return (NULL); 8830Sstevel@tonic-gate } 8840Sstevel@tonic-gate 8850Sstevel@tonic-gate /* 8860Sstevel@tonic-gate * Determine whether a fragment can be extended. 8870Sstevel@tonic-gate * 8880Sstevel@tonic-gate * Check to see if the necessary fragments are available, and 8890Sstevel@tonic-gate * if they are, allocate them. 8900Sstevel@tonic-gate */ 8910Sstevel@tonic-gate static daddr_t 8920Sstevel@tonic-gate fragextend(struct inode *ip, int cg, long bprev, int osize, int nsize) 8930Sstevel@tonic-gate { 8940Sstevel@tonic-gate struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 8950Sstevel@tonic-gate struct fs *fs = ip->i_fs; 8960Sstevel@tonic-gate struct buf *bp; 8970Sstevel@tonic-gate struct cg *cgp; 8980Sstevel@tonic-gate uchar_t *blksfree; 8990Sstevel@tonic-gate long bno; 9000Sstevel@tonic-gate int frags, bbase; 9010Sstevel@tonic-gate int i, j; 9020Sstevel@tonic-gate 9030Sstevel@tonic-gate if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize)) 9040Sstevel@tonic-gate return (NULL); 9050Sstevel@tonic-gate frags = numfrags(fs, nsize); 9060Sstevel@tonic-gate bbase = (int)fragnum(fs, bprev); 9070Sstevel@tonic-gate if (bbase > fragnum(fs, (bprev + frags - 1))) { 9080Sstevel@tonic-gate /* cannot extend across a block boundary */ 9090Sstevel@tonic-gate return (NULL); 9100Sstevel@tonic-gate } 9110Sstevel@tonic-gate 9120Sstevel@tonic-gate bp = UFS_BREAD(ufsvfsp, ip->i_dev, (daddr_t)fsbtodb(fs, cgtod(fs, cg)), 913*923Ssdebnath (int)fs->fs_cgsize); 9140Sstevel@tonic-gate cgp = bp->b_un.b_cg; 9150Sstevel@tonic-gate if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp)) { 9160Sstevel@tonic-gate brelse(bp); 9170Sstevel@tonic-gate return (NULL); 9180Sstevel@tonic-gate } 9190Sstevel@tonic-gate 9200Sstevel@tonic-gate blksfree = cg_blksfree(cgp); 9210Sstevel@tonic-gate mutex_enter(&ufsvfsp->vfs_lock); 9220Sstevel@tonic-gate bno = dtogd(fs, bprev); 9230Sstevel@tonic-gate for (i = numfrags(fs, osize); i < frags; i++) { 9240Sstevel@tonic-gate if (isclr(blksfree, bno + i)) { 9250Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 9260Sstevel@tonic-gate brelse(bp); 9270Sstevel@tonic-gate return (NULL); 9280Sstevel@tonic-gate } 9290Sstevel@tonic-gate if ((TRANS_ISCANCEL(ufsvfsp, ldbtob(fsbtodb(fs, bprev + i)), 9300Sstevel@tonic-gate fs->fs_fsize))) { 9310Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 9320Sstevel@tonic-gate brelse(bp); 9330Sstevel@tonic-gate return (NULL); 9340Sstevel@tonic-gate } 9350Sstevel@tonic-gate } 9360Sstevel@tonic-gate 9370Sstevel@tonic-gate cgp->cg_time = gethrestime_sec(); 9380Sstevel@tonic-gate /* 9390Sstevel@tonic-gate * The current fragment can be extended, 9400Sstevel@tonic-gate * deduct the count on fragment being extended into 9410Sstevel@tonic-gate * increase the count on the remaining fragment (if any) 9420Sstevel@tonic-gate * allocate the extended piece. 9430Sstevel@tonic-gate */ 9440Sstevel@tonic-gate for (i = frags; i < fs->fs_frag - bbase; i++) 9450Sstevel@tonic-gate if (isclr(blksfree, bno + i)) 9460Sstevel@tonic-gate break; 9470Sstevel@tonic-gate j = i - numfrags(fs, osize); 9480Sstevel@tonic-gate cgp->cg_frsum[j]--; 9490Sstevel@tonic-gate ASSERT(cgp->cg_frsum[j] >= 0); 9500Sstevel@tonic-gate if (i != frags) 9510Sstevel@tonic-gate cgp->cg_frsum[i - frags]++; 9520Sstevel@tonic-gate for (i = numfrags(fs, osize); i < frags; i++) { 9530Sstevel@tonic-gate clrbit(blksfree, bno + i); 9540Sstevel@tonic-gate cgp->cg_cs.cs_nffree--; 9550Sstevel@tonic-gate fs->fs_cs(fs, cg).cs_nffree--; 9560Sstevel@tonic-gate fs->fs_cstotal.cs_nffree--; 9570Sstevel@tonic-gate } 9580Sstevel@tonic-gate fs->fs_fmod = 1; 9590Sstevel@tonic-gate ufs_notclean(ufsvfsp); 9600Sstevel@tonic-gate TRANS_BUF(ufsvfsp, 0, fs->fs_cgsize, bp, DT_CG); 9610Sstevel@tonic-gate TRANS_SI(ufsvfsp, fs, cg); 9620Sstevel@tonic-gate bdrwrite(bp); 9630Sstevel@tonic-gate return ((daddr_t)bprev); 9640Sstevel@tonic-gate } 9650Sstevel@tonic-gate 9660Sstevel@tonic-gate /* 9670Sstevel@tonic-gate * Determine whether a block can be allocated. 9680Sstevel@tonic-gate * 9690Sstevel@tonic-gate * Check to see if a block of the apprpriate size 9700Sstevel@tonic-gate * is available, and if it is, allocate it. 9710Sstevel@tonic-gate */ 9720Sstevel@tonic-gate static daddr_t 9730Sstevel@tonic-gate alloccg(struct inode *ip, int cg, daddr_t bpref, int size) 9740Sstevel@tonic-gate { 9750Sstevel@tonic-gate struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 9760Sstevel@tonic-gate struct fs *fs = ip->i_fs; 9770Sstevel@tonic-gate struct buf *bp; 9780Sstevel@tonic-gate struct cg *cgp; 9790Sstevel@tonic-gate uchar_t *blksfree; 9800Sstevel@tonic-gate int bno, frags; 9810Sstevel@tonic-gate int allocsiz; 9820Sstevel@tonic-gate int i; 9830Sstevel@tonic-gate 9840Sstevel@tonic-gate if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize) 9850Sstevel@tonic-gate return (0); 9860Sstevel@tonic-gate bp = UFS_BREAD(ufsvfsp, ip->i_dev, (daddr_t)fsbtodb(fs, cgtod(fs, cg)), 987*923Ssdebnath (int)fs->fs_cgsize); 9880Sstevel@tonic-gate 9890Sstevel@tonic-gate cgp = bp->b_un.b_cg; 9900Sstevel@tonic-gate if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp) || 9910Sstevel@tonic-gate (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) { 9920Sstevel@tonic-gate brelse(bp); 9930Sstevel@tonic-gate return (0); 9940Sstevel@tonic-gate } 9950Sstevel@tonic-gate blksfree = cg_blksfree(cgp); 9960Sstevel@tonic-gate mutex_enter(&ufsvfsp->vfs_lock); 9970Sstevel@tonic-gate cgp->cg_time = gethrestime_sec(); 9980Sstevel@tonic-gate if (size == fs->fs_bsize) { 9990Sstevel@tonic-gate if ((bno = alloccgblk(ufsvfsp, cgp, bpref, bp)) == 0) 10000Sstevel@tonic-gate goto errout; 10010Sstevel@tonic-gate fs->fs_fmod = 1; 10020Sstevel@tonic-gate ufs_notclean(ufsvfsp); 10030Sstevel@tonic-gate TRANS_SI(ufsvfsp, fs, cg); 10040Sstevel@tonic-gate bdrwrite(bp); 10050Sstevel@tonic-gate return (bno); 10060Sstevel@tonic-gate } 10070Sstevel@tonic-gate /* 10080Sstevel@tonic-gate * Check to see if any fragments are already available 10090Sstevel@tonic-gate * allocsiz is the size which will be allocated, hacking 10100Sstevel@tonic-gate * it down to a smaller size if necessary. 10110Sstevel@tonic-gate */ 10120Sstevel@tonic-gate frags = numfrags(fs, size); 10130Sstevel@tonic-gate for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++) 10140Sstevel@tonic-gate if (cgp->cg_frsum[allocsiz] != 0) 10150Sstevel@tonic-gate break; 10160Sstevel@tonic-gate 10170Sstevel@tonic-gate if (allocsiz != fs->fs_frag) 10180Sstevel@tonic-gate bno = mapsearch(ufsvfsp, cgp, bpref, allocsiz); 10190Sstevel@tonic-gate 10200Sstevel@tonic-gate if (allocsiz == fs->fs_frag || bno < 0) { 10210Sstevel@tonic-gate /* 10220Sstevel@tonic-gate * No fragments were available, so a block 10230Sstevel@tonic-gate * will be allocated and hacked up. 10240Sstevel@tonic-gate */ 10250Sstevel@tonic-gate if (cgp->cg_cs.cs_nbfree == 0) 10260Sstevel@tonic-gate goto errout; 10270Sstevel@tonic-gate if ((bno = alloccgblk(ufsvfsp, cgp, bpref, bp)) == 0) 10280Sstevel@tonic-gate goto errout; 10290Sstevel@tonic-gate bpref = dtogd(fs, bno); 10300Sstevel@tonic-gate for (i = frags; i < fs->fs_frag; i++) 10310Sstevel@tonic-gate setbit(blksfree, bpref + i); 10320Sstevel@tonic-gate i = fs->fs_frag - frags; 10330Sstevel@tonic-gate cgp->cg_cs.cs_nffree += i; 10340Sstevel@tonic-gate fs->fs_cstotal.cs_nffree += i; 10350Sstevel@tonic-gate fs->fs_cs(fs, cg).cs_nffree += i; 10360Sstevel@tonic-gate cgp->cg_frsum[i]++; 10370Sstevel@tonic-gate fs->fs_fmod = 1; 10380Sstevel@tonic-gate ufs_notclean(ufsvfsp); 10390Sstevel@tonic-gate TRANS_SI(ufsvfsp, fs, cg); 10400Sstevel@tonic-gate bdrwrite(bp); 10410Sstevel@tonic-gate return (bno); 10420Sstevel@tonic-gate } 10430Sstevel@tonic-gate 10440Sstevel@tonic-gate for (i = 0; i < frags; i++) 10450Sstevel@tonic-gate clrbit(blksfree, bno + i); 10460Sstevel@tonic-gate cgp->cg_cs.cs_nffree -= frags; 10470Sstevel@tonic-gate fs->fs_cstotal.cs_nffree -= frags; 10480Sstevel@tonic-gate fs->fs_cs(fs, cg).cs_nffree -= frags; 10490Sstevel@tonic-gate cgp->cg_frsum[allocsiz]--; 10500Sstevel@tonic-gate ASSERT(cgp->cg_frsum[allocsiz] >= 0); 10510Sstevel@tonic-gate if (frags != allocsiz) { 10520Sstevel@tonic-gate cgp->cg_frsum[allocsiz - frags]++; 10530Sstevel@tonic-gate } 10540Sstevel@tonic-gate fs->fs_fmod = 1; 10550Sstevel@tonic-gate ufs_notclean(ufsvfsp); 10560Sstevel@tonic-gate TRANS_BUF(ufsvfsp, 0, fs->fs_cgsize, bp, DT_CG); 10570Sstevel@tonic-gate TRANS_SI(ufsvfsp, fs, cg); 10580Sstevel@tonic-gate bdrwrite(bp); 10590Sstevel@tonic-gate return (cg * fs->fs_fpg + bno); 10600Sstevel@tonic-gate errout: 10610Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 10620Sstevel@tonic-gate brelse(bp); 10630Sstevel@tonic-gate return (0); 10640Sstevel@tonic-gate } 10650Sstevel@tonic-gate 10660Sstevel@tonic-gate /* 10670Sstevel@tonic-gate * Allocate a block in a cylinder group. 10680Sstevel@tonic-gate * 10690Sstevel@tonic-gate * This algorithm implements the following policy: 10700Sstevel@tonic-gate * 1) allocate the requested block. 10710Sstevel@tonic-gate * 2) allocate a rotationally optimal block in the same cylinder. 10720Sstevel@tonic-gate * 3) allocate the next available block on the block rotor for the 10730Sstevel@tonic-gate * specified cylinder group. 10740Sstevel@tonic-gate * Note that this routine only allocates fs_bsize blocks; these 10750Sstevel@tonic-gate * blocks may be fragmented by the routine that allocates them. 10760Sstevel@tonic-gate */ 10770Sstevel@tonic-gate static daddr_t 10780Sstevel@tonic-gate alloccgblk( 10790Sstevel@tonic-gate struct ufsvfs *ufsvfsp, 10800Sstevel@tonic-gate struct cg *cgp, 10810Sstevel@tonic-gate daddr_t bpref, 10820Sstevel@tonic-gate struct buf *bp) 10830Sstevel@tonic-gate { 10840Sstevel@tonic-gate daddr_t bno; 10850Sstevel@tonic-gate int cylno, pos, delta, rotbl_size; 10860Sstevel@tonic-gate short *cylbp; 10870Sstevel@tonic-gate int i; 10880Sstevel@tonic-gate struct fs *fs; 10890Sstevel@tonic-gate uchar_t *blksfree; 10900Sstevel@tonic-gate daddr_t blkno, rpos, frag; 10910Sstevel@tonic-gate short *blks; 10920Sstevel@tonic-gate int32_t *blktot; 10930Sstevel@tonic-gate 10940Sstevel@tonic-gate ASSERT(MUTEX_HELD(&ufsvfsp->vfs_lock)); 10950Sstevel@tonic-gate fs = ufsvfsp->vfs_fs; 10960Sstevel@tonic-gate blksfree = cg_blksfree(cgp); 10970Sstevel@tonic-gate if (bpref == 0) { 10980Sstevel@tonic-gate bpref = cgp->cg_rotor; 10990Sstevel@tonic-gate goto norot; 11000Sstevel@tonic-gate } 11010Sstevel@tonic-gate bpref = blknum(fs, bpref); 11020Sstevel@tonic-gate bpref = dtogd(fs, bpref); 11030Sstevel@tonic-gate /* 11040Sstevel@tonic-gate * If the requested block is available, use it. 11050Sstevel@tonic-gate */ 11060Sstevel@tonic-gate if (isblock(fs, blksfree, (daddr_t)fragstoblks(fs, bpref))) { 11070Sstevel@tonic-gate bno = bpref; 11080Sstevel@tonic-gate goto gotit; 11090Sstevel@tonic-gate } 11100Sstevel@tonic-gate /* 11110Sstevel@tonic-gate * Check for a block available on the same cylinder. 11120Sstevel@tonic-gate */ 11130Sstevel@tonic-gate cylno = cbtocylno(fs, bpref); 11140Sstevel@tonic-gate if (cg_blktot(cgp)[cylno] == 0) 11150Sstevel@tonic-gate goto norot; 11160Sstevel@tonic-gate if (fs->fs_cpc == 0) { 11170Sstevel@tonic-gate /* 11180Sstevel@tonic-gate * Block layout info is not available, so just 11190Sstevel@tonic-gate * have to take any block in this cylinder. 11200Sstevel@tonic-gate */ 11210Sstevel@tonic-gate bpref = howmany(fs->fs_spc * cylno, NSPF(fs)); 11220Sstevel@tonic-gate goto norot; 11230Sstevel@tonic-gate } 11240Sstevel@tonic-gate /* 11250Sstevel@tonic-gate * Check the summary information to see if a block is 11260Sstevel@tonic-gate * available in the requested cylinder starting at the 11270Sstevel@tonic-gate * requested rotational position and proceeding around. 11280Sstevel@tonic-gate */ 11290Sstevel@tonic-gate cylbp = cg_blks(ufsvfsp, cgp, cylno); 11300Sstevel@tonic-gate pos = cbtorpos(ufsvfsp, bpref); 11310Sstevel@tonic-gate for (i = pos; i < ufsvfsp->vfs_nrpos; i++) 11320Sstevel@tonic-gate if (cylbp[i] > 0) 11330Sstevel@tonic-gate break; 11340Sstevel@tonic-gate if (i == ufsvfsp->vfs_nrpos) 11350Sstevel@tonic-gate for (i = 0; i < pos; i++) 11360Sstevel@tonic-gate if (cylbp[i] > 0) 11370Sstevel@tonic-gate break; 11380Sstevel@tonic-gate if (cylbp[i] > 0) { 11390Sstevel@tonic-gate /* 11400Sstevel@tonic-gate * Found a rotational position, now find the actual 11410Sstevel@tonic-gate * block. A "panic" if none is actually there. 11420Sstevel@tonic-gate */ 11430Sstevel@tonic-gate 11440Sstevel@tonic-gate /* 11450Sstevel@tonic-gate * Up to this point, "pos" has referred to the rotational 11460Sstevel@tonic-gate * position of the desired block. From now on, it holds 11470Sstevel@tonic-gate * the offset of the current cylinder within a cylinder 11480Sstevel@tonic-gate * cycle. (A cylinder cycle refers to a set of cylinders 11490Sstevel@tonic-gate * which are described by a single rotational table; the 11500Sstevel@tonic-gate * size of the cycle is fs_cpc.) 11510Sstevel@tonic-gate * 11520Sstevel@tonic-gate * bno is set to the block number of the first block within 11530Sstevel@tonic-gate * the current cylinder cycle. 11540Sstevel@tonic-gate */ 11550Sstevel@tonic-gate 11560Sstevel@tonic-gate pos = cylno % fs->fs_cpc; 11570Sstevel@tonic-gate bno = (cylno - pos) * fs->fs_spc / NSPB(fs); 11580Sstevel@tonic-gate 11590Sstevel@tonic-gate /* 11600Sstevel@tonic-gate * The blocks within a cylinder are grouped into equivalence 11610Sstevel@tonic-gate * classes according to their "rotational position." There 11620Sstevel@tonic-gate * are two tables used to determine these classes. 11630Sstevel@tonic-gate * 11640Sstevel@tonic-gate * The positional offset table (fs_postbl) has an entry for 11650Sstevel@tonic-gate * each rotational position of each cylinder in a cylinder 11660Sstevel@tonic-gate * cycle. This entry contains the relative block number 11670Sstevel@tonic-gate * (counting from the start of the cylinder cycle) of the 11680Sstevel@tonic-gate * first block in the equivalence class for that position 11690Sstevel@tonic-gate * and that cylinder. Positions for which no blocks exist 11700Sstevel@tonic-gate * are indicated by a -1. 11710Sstevel@tonic-gate * 11720Sstevel@tonic-gate * The rotational delta table (fs_rotbl) has an entry for 11730Sstevel@tonic-gate * each block in a cylinder cycle. This entry contains 11740Sstevel@tonic-gate * the offset from that block to the next block in the 11750Sstevel@tonic-gate * same equivalence class. The last block in the class 11760Sstevel@tonic-gate * is indicated by a zero in the table. 11770Sstevel@tonic-gate * 11780Sstevel@tonic-gate * The following code, then, walks through all of the blocks 11790Sstevel@tonic-gate * in the cylinder (cylno) which we're allocating within 11800Sstevel@tonic-gate * which are in the equivalence class for the rotational 11810Sstevel@tonic-gate * position (i) which we're allocating within. 11820Sstevel@tonic-gate */ 11830Sstevel@tonic-gate 11840Sstevel@tonic-gate if (fs_postbl(ufsvfsp, pos)[i] == -1) { 11850Sstevel@tonic-gate (void) ufs_fault(ufsvfsp->vfs_root, 1186*923Ssdebnath "alloccgblk: cyl groups corrupted, pos = %d, " 1187*923Ssdebnath "i = %d, fs = %s\n", pos, i, fs->fs_fsmnt); 11880Sstevel@tonic-gate return (0); 11890Sstevel@tonic-gate } 11900Sstevel@tonic-gate 11910Sstevel@tonic-gate /* 11920Sstevel@tonic-gate * There is one entry in the rotational table for each block 11930Sstevel@tonic-gate * in the cylinder cycle. These are whole blocks, not frags. 11940Sstevel@tonic-gate */ 11950Sstevel@tonic-gate 11960Sstevel@tonic-gate rotbl_size = (fs->fs_cpc * fs->fs_spc) >> 11970Sstevel@tonic-gate (fs->fs_fragshift + fs->fs_fsbtodb); 11980Sstevel@tonic-gate 11990Sstevel@tonic-gate /* 12000Sstevel@tonic-gate * As we start, "i" is the rotational position within which 12010Sstevel@tonic-gate * we're searching. After the next line, it will be a block 12020Sstevel@tonic-gate * number (relative to the start of the cylinder cycle) 12030Sstevel@tonic-gate * within the equivalence class of that rotational position. 12040Sstevel@tonic-gate */ 12050Sstevel@tonic-gate 12060Sstevel@tonic-gate i = fs_postbl(ufsvfsp, pos)[i]; 12070Sstevel@tonic-gate 12080Sstevel@tonic-gate for (;;) { 12090Sstevel@tonic-gate if (isblock(fs, blksfree, (daddr_t)(bno + i))) { 12100Sstevel@tonic-gate bno = blkstofrags(fs, (bno + i)); 12110Sstevel@tonic-gate goto gotit; 12120Sstevel@tonic-gate } 12130Sstevel@tonic-gate delta = fs_rotbl(fs)[i]; 12140Sstevel@tonic-gate if (delta <= 0 || /* End of chain, or */ 12150Sstevel@tonic-gate delta + i > rotbl_size) /* end of table? */ 12160Sstevel@tonic-gate break; /* If so, panic. */ 12170Sstevel@tonic-gate i += delta; 12180Sstevel@tonic-gate } 12190Sstevel@tonic-gate (void) ufs_fault(ufsvfsp->vfs_root, 1220*923Ssdebnath "alloccgblk: can't find blk in cyl, pos:%d, i:%d, " 1221*923Ssdebnath "fs:%s bno: %x\n", pos, i, fs->fs_fsmnt, (int)bno); 12220Sstevel@tonic-gate return (0); 12230Sstevel@tonic-gate } 12240Sstevel@tonic-gate norot: 12250Sstevel@tonic-gate /* 12260Sstevel@tonic-gate * No blocks in the requested cylinder, so take 12270Sstevel@tonic-gate * next available one in this cylinder group. 12280Sstevel@tonic-gate */ 12290Sstevel@tonic-gate bno = mapsearch(ufsvfsp, cgp, bpref, (int)fs->fs_frag); 12300Sstevel@tonic-gate if (bno < 0) 12310Sstevel@tonic-gate return (0); 12320Sstevel@tonic-gate cgp->cg_rotor = bno; 12330Sstevel@tonic-gate gotit: 12340Sstevel@tonic-gate blkno = fragstoblks(fs, bno); 12350Sstevel@tonic-gate frag = (cgp->cg_cgx * fs->fs_fpg) + bno; 12360Sstevel@tonic-gate if (TRANS_ISCANCEL(ufsvfsp, ldbtob(fsbtodb(fs, frag)), fs->fs_bsize)) 12370Sstevel@tonic-gate goto norot; 12380Sstevel@tonic-gate clrblock(fs, blksfree, (long)blkno); 12390Sstevel@tonic-gate /* 12400Sstevel@tonic-gate * the other cg/sb/si fields are TRANS'ed by the caller 12410Sstevel@tonic-gate */ 12420Sstevel@tonic-gate cgp->cg_cs.cs_nbfree--; 12430Sstevel@tonic-gate fs->fs_cstotal.cs_nbfree--; 12440Sstevel@tonic-gate fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--; 12450Sstevel@tonic-gate cylno = cbtocylno(fs, bno); 12460Sstevel@tonic-gate blks = cg_blks(ufsvfsp, cgp, cylno); 12470Sstevel@tonic-gate rpos = cbtorpos(ufsvfsp, bno); 12480Sstevel@tonic-gate blktot = cg_blktot(cgp); 12490Sstevel@tonic-gate blks[rpos]--; 12500Sstevel@tonic-gate blktot[cylno]--; 12510Sstevel@tonic-gate TRANS_BUF(ufsvfsp, 0, fs->fs_cgsize, bp, DT_CG); 12520Sstevel@tonic-gate fs->fs_fmod = 1; 12530Sstevel@tonic-gate return (frag); 12540Sstevel@tonic-gate } 12550Sstevel@tonic-gate 12560Sstevel@tonic-gate /* 12570Sstevel@tonic-gate * Determine whether an inode can be allocated. 12580Sstevel@tonic-gate * 12590Sstevel@tonic-gate * Check to see if an inode is available, and if it is, 12600Sstevel@tonic-gate * allocate it using the following policy: 12610Sstevel@tonic-gate * 1) allocate the requested inode. 12620Sstevel@tonic-gate * 2) allocate the next available inode after the requested 12630Sstevel@tonic-gate * inode in the specified cylinder group. 12640Sstevel@tonic-gate */ 12650Sstevel@tonic-gate static ino_t 12660Sstevel@tonic-gate ialloccg(struct inode *ip, int cg, daddr_t ipref, int mode) 12670Sstevel@tonic-gate { 12680Sstevel@tonic-gate struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 12690Sstevel@tonic-gate struct fs *fs = ip->i_fs; 12700Sstevel@tonic-gate struct cg *cgp; 12710Sstevel@tonic-gate struct buf *bp; 12720Sstevel@tonic-gate int start, len, loc, map, i; 12730Sstevel@tonic-gate char *iused; 12740Sstevel@tonic-gate 12750Sstevel@tonic-gate if (fs->fs_cs(fs, cg).cs_nifree == 0) 12760Sstevel@tonic-gate return (0); 12770Sstevel@tonic-gate bp = UFS_BREAD(ufsvfsp, ip->i_dev, (daddr_t)fsbtodb(fs, cgtod(fs, cg)), 12780Sstevel@tonic-gate (int)fs->fs_cgsize); 12790Sstevel@tonic-gate 12800Sstevel@tonic-gate cgp = bp->b_un.b_cg; 12810Sstevel@tonic-gate if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp) || 12820Sstevel@tonic-gate cgp->cg_cs.cs_nifree == 0) { 12830Sstevel@tonic-gate brelse(bp); 12840Sstevel@tonic-gate return (0); 12850Sstevel@tonic-gate } 12860Sstevel@tonic-gate iused = cg_inosused(cgp); 12870Sstevel@tonic-gate mutex_enter(&ufsvfsp->vfs_lock); 12880Sstevel@tonic-gate /* 12890Sstevel@tonic-gate * While we are waiting for the mutex, someone may have taken 12900Sstevel@tonic-gate * the last available inode. Need to recheck. 12910Sstevel@tonic-gate */ 12920Sstevel@tonic-gate if (cgp->cg_cs.cs_nifree == 0) { 12930Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 12940Sstevel@tonic-gate brelse(bp); 12950Sstevel@tonic-gate return (0); 12960Sstevel@tonic-gate } 12970Sstevel@tonic-gate 12980Sstevel@tonic-gate cgp->cg_time = gethrestime_sec(); 12990Sstevel@tonic-gate if (ipref) { 13000Sstevel@tonic-gate ipref %= fs->fs_ipg; 13010Sstevel@tonic-gate if (isclr(iused, ipref)) 13020Sstevel@tonic-gate goto gotit; 13030Sstevel@tonic-gate } 13040Sstevel@tonic-gate start = cgp->cg_irotor / NBBY; 13050Sstevel@tonic-gate len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY); 13060Sstevel@tonic-gate loc = skpc(0xff, (uint_t)len, &iused[start]); 13070Sstevel@tonic-gate if (loc == 0) { 13080Sstevel@tonic-gate len = start + 1; 13090Sstevel@tonic-gate start = 0; 13100Sstevel@tonic-gate loc = skpc(0xff, (uint_t)len, &iused[0]); 13110Sstevel@tonic-gate if (loc == 0) { 13120Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 13130Sstevel@tonic-gate (void) ufs_fault(ITOV(ip), 1314*923Ssdebnath "ialloccg: map corrupted, cg = %d, irotor = %d, " 1315*923Ssdebnath "fs = %s\n", cg, (int)cgp->cg_irotor, fs->fs_fsmnt); 13160Sstevel@tonic-gate return (0); 13170Sstevel@tonic-gate } 13180Sstevel@tonic-gate } 13190Sstevel@tonic-gate i = start + len - loc; 13200Sstevel@tonic-gate map = iused[i]; 13210Sstevel@tonic-gate ipref = i * NBBY; 13220Sstevel@tonic-gate for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) { 13230Sstevel@tonic-gate if ((map & i) == 0) { 13240Sstevel@tonic-gate cgp->cg_irotor = ipref; 13250Sstevel@tonic-gate goto gotit; 13260Sstevel@tonic-gate } 13270Sstevel@tonic-gate } 13280Sstevel@tonic-gate 13290Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 13300Sstevel@tonic-gate (void) ufs_fault(ITOV(ip), "ialloccg: block not in mapfs = %s", 13310Sstevel@tonic-gate fs->fs_fsmnt); 13320Sstevel@tonic-gate return (0); 13330Sstevel@tonic-gate gotit: 13340Sstevel@tonic-gate setbit(iused, ipref); 13350Sstevel@tonic-gate cgp->cg_cs.cs_nifree--; 13360Sstevel@tonic-gate fs->fs_cstotal.cs_nifree--; 13370Sstevel@tonic-gate fs->fs_cs(fs, cg).cs_nifree--; 13380Sstevel@tonic-gate if (((mode & IFMT) == IFDIR) || ((mode & IFMT) == IFATTRDIR)) { 13390Sstevel@tonic-gate cgp->cg_cs.cs_ndir++; 13400Sstevel@tonic-gate fs->fs_cstotal.cs_ndir++; 13410Sstevel@tonic-gate fs->fs_cs(fs, cg).cs_ndir++; 13420Sstevel@tonic-gate } 13430Sstevel@tonic-gate fs->fs_fmod = 1; 13440Sstevel@tonic-gate ufs_notclean(ufsvfsp); 13450Sstevel@tonic-gate TRANS_BUF(ufsvfsp, 0, fs->fs_cgsize, bp, DT_CG); 13460Sstevel@tonic-gate TRANS_SI(ufsvfsp, fs, cg); 13470Sstevel@tonic-gate bdrwrite(bp); 13480Sstevel@tonic-gate return (cg * fs->fs_ipg + ipref); 13490Sstevel@tonic-gate } 13500Sstevel@tonic-gate 13510Sstevel@tonic-gate /* 13520Sstevel@tonic-gate * Find a block of the specified size in the specified cylinder group. 13530Sstevel@tonic-gate * 13540Sstevel@tonic-gate * It is a panic if a request is made to find a block if none are 13550Sstevel@tonic-gate * available. 13560Sstevel@tonic-gate */ 13570Sstevel@tonic-gate static daddr_t 13580Sstevel@tonic-gate mapsearch(struct ufsvfs *ufsvfsp, struct cg *cgp, daddr_t bpref, 13590Sstevel@tonic-gate int allocsiz) 13600Sstevel@tonic-gate { 13610Sstevel@tonic-gate struct fs *fs = ufsvfsp->vfs_fs; 13620Sstevel@tonic-gate daddr_t bno, cfrag; 13630Sstevel@tonic-gate int start, len, loc, i, last, first, secondtime; 13640Sstevel@tonic-gate int blk, field, subfield, pos; 13650Sstevel@tonic-gate int gotit; 13660Sstevel@tonic-gate 13670Sstevel@tonic-gate /* 13680Sstevel@tonic-gate * ufsvfs->vfs_lock is held when calling this. 13690Sstevel@tonic-gate */ 13700Sstevel@tonic-gate /* 13710Sstevel@tonic-gate * Find the fragment by searching through the 13720Sstevel@tonic-gate * free block map for an appropriate bit pattern. 13730Sstevel@tonic-gate */ 13740Sstevel@tonic-gate if (bpref) 13750Sstevel@tonic-gate start = dtogd(fs, bpref) / NBBY; 13760Sstevel@tonic-gate else 13770Sstevel@tonic-gate start = cgp->cg_frotor / NBBY; 13780Sstevel@tonic-gate /* 13790Sstevel@tonic-gate * the following loop performs two scans -- the first scan 13800Sstevel@tonic-gate * searches the bottom half of the array for a match and the 13810Sstevel@tonic-gate * second scan searches the top half of the array. The loops 13820Sstevel@tonic-gate * have been merged just to make things difficult. 13830Sstevel@tonic-gate */ 13840Sstevel@tonic-gate first = start; 13850Sstevel@tonic-gate last = howmany(fs->fs_fpg, NBBY); 13860Sstevel@tonic-gate secondtime = 0; 13870Sstevel@tonic-gate cfrag = cgp->cg_cgx * fs->fs_fpg; 13880Sstevel@tonic-gate while (first < last) { 13890Sstevel@tonic-gate len = last - first; 13900Sstevel@tonic-gate /* 13910Sstevel@tonic-gate * search the array for a match 13920Sstevel@tonic-gate */ 13930Sstevel@tonic-gate loc = scanc((unsigned)len, (uchar_t *)&cg_blksfree(cgp)[first], 13940Sstevel@tonic-gate (uchar_t *)fragtbl[fs->fs_frag], 13950Sstevel@tonic-gate (int)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); 13960Sstevel@tonic-gate /* 13970Sstevel@tonic-gate * match found 13980Sstevel@tonic-gate */ 13990Sstevel@tonic-gate if (loc) { 14000Sstevel@tonic-gate bno = (last - loc) * NBBY; 14010Sstevel@tonic-gate 14020Sstevel@tonic-gate /* 14030Sstevel@tonic-gate * Found the byte in the map, sift 14040Sstevel@tonic-gate * through the bits to find the selected frag 14050Sstevel@tonic-gate */ 14060Sstevel@tonic-gate cgp->cg_frotor = bno; 14070Sstevel@tonic-gate gotit = 0; 14080Sstevel@tonic-gate for (i = bno + NBBY; bno < i; bno += fs->fs_frag) { 14090Sstevel@tonic-gate blk = blkmap(fs, cg_blksfree(cgp), bno); 14100Sstevel@tonic-gate blk <<= 1; 14110Sstevel@tonic-gate field = around[allocsiz]; 14120Sstevel@tonic-gate subfield = inside[allocsiz]; 14130Sstevel@tonic-gate for (pos = 0; 14140Sstevel@tonic-gate pos <= fs->fs_frag - allocsiz; 14150Sstevel@tonic-gate pos++) { 14160Sstevel@tonic-gate if ((blk & field) == subfield) { 14170Sstevel@tonic-gate gotit++; 14180Sstevel@tonic-gate break; 14190Sstevel@tonic-gate } 14200Sstevel@tonic-gate field <<= 1; 14210Sstevel@tonic-gate subfield <<= 1; 14220Sstevel@tonic-gate } 14230Sstevel@tonic-gate if (gotit) 14240Sstevel@tonic-gate break; 14250Sstevel@tonic-gate } 14260Sstevel@tonic-gate bno += pos; 14270Sstevel@tonic-gate 14280Sstevel@tonic-gate /* 14290Sstevel@tonic-gate * success if block is *not* being converted from 14300Sstevel@tonic-gate * metadata into userdata (harpy). If so, ignore. 14310Sstevel@tonic-gate */ 14320Sstevel@tonic-gate if (!TRANS_ISCANCEL(ufsvfsp, 1433*923Ssdebnath ldbtob(fsbtodb(fs, (cfrag+bno))), 1434*923Ssdebnath allocsiz * fs->fs_fsize)) 14350Sstevel@tonic-gate return (bno); 1436*923Ssdebnath 14370Sstevel@tonic-gate /* 14380Sstevel@tonic-gate * keep looking -- this block is being converted 14390Sstevel@tonic-gate */ 14400Sstevel@tonic-gate first = (last - loc) + 1; 14410Sstevel@tonic-gate loc = 0; 14420Sstevel@tonic-gate if (first < last) 14430Sstevel@tonic-gate continue; 14440Sstevel@tonic-gate } 14450Sstevel@tonic-gate /* 14460Sstevel@tonic-gate * no usable matches in bottom half -- now search the top half 14470Sstevel@tonic-gate */ 14480Sstevel@tonic-gate if (secondtime) 14490Sstevel@tonic-gate /* 14500Sstevel@tonic-gate * no usable matches in top half -- all done 14510Sstevel@tonic-gate */ 14520Sstevel@tonic-gate break; 14530Sstevel@tonic-gate secondtime = 1; 14540Sstevel@tonic-gate last = start + 1; 14550Sstevel@tonic-gate first = 0; 14560Sstevel@tonic-gate } 14570Sstevel@tonic-gate /* 14580Sstevel@tonic-gate * no usable matches 14590Sstevel@tonic-gate */ 14600Sstevel@tonic-gate return ((daddr_t)-1); 14610Sstevel@tonic-gate } 14620Sstevel@tonic-gate 14630Sstevel@tonic-gate #define UFSNADDR (NDADDR + NIADDR) /* NADDR applies to (obsolete) S5FS */ 14640Sstevel@tonic-gate #define IB(i) (NDADDR + (i)) /* index of i'th indirect block ptr */ 14650Sstevel@tonic-gate #define SINGLE 0 /* single indirect block ptr */ 14660Sstevel@tonic-gate #define DOUBLE 1 /* double indirect block ptr */ 14670Sstevel@tonic-gate #define TRIPLE 2 /* triple indirect block ptr */ 14680Sstevel@tonic-gate 14690Sstevel@tonic-gate /* 1470*923Ssdebnath * Acquire a write lock, and keep trying till we get it 1471*923Ssdebnath */ 1472*923Ssdebnath static int 1473*923Ssdebnath allocsp_wlockfs(struct vnode *vp, struct lockfs *lf) 1474*923Ssdebnath { 1475*923Ssdebnath int err = 0; 1476*923Ssdebnath 1477*923Ssdebnath lockagain: 1478*923Ssdebnath do { 1479*923Ssdebnath err = ufs_fiolfss(vp, lf); 1480*923Ssdebnath if (err) 1481*923Ssdebnath return (err); 1482*923Ssdebnath } while (!LOCKFS_IS_ULOCK(lf)); 1483*923Ssdebnath 1484*923Ssdebnath lf->lf_lock = LOCKFS_WLOCK; 1485*923Ssdebnath lf->lf_flags = 0; 1486*923Ssdebnath lf->lf_comment = NULL; 1487*923Ssdebnath err = ufs__fiolfs(vp, lf, 1, 0); 1488*923Ssdebnath 1489*923Ssdebnath if (err == EBUSY || err == EINVAL) 1490*923Ssdebnath goto lockagain; 1491*923Ssdebnath 1492*923Ssdebnath return (err); 1493*923Ssdebnath } 1494*923Ssdebnath 1495*923Ssdebnath /* 1496*923Ssdebnath * Release the write lock 1497*923Ssdebnath */ 1498*923Ssdebnath static int 1499*923Ssdebnath allocsp_unlockfs(struct vnode *vp, struct lockfs *lf) 1500*923Ssdebnath { 1501*923Ssdebnath int err = 0; 1502*923Ssdebnath 1503*923Ssdebnath lf->lf_lock = LOCKFS_ULOCK; 1504*923Ssdebnath lf->lf_flags = 0; 1505*923Ssdebnath err = ufs__fiolfs(vp, lf, 1, 0); 1506*923Ssdebnath return (err); 1507*923Ssdebnath } 1508*923Ssdebnath 1509*923Ssdebnath struct allocsp_undo { 1510*923Ssdebnath daddr_t offset; 1511*923Ssdebnath daddr_t blk; 1512*923Ssdebnath struct allocsp_undo *next; 1513*923Ssdebnath }; 1514*923Ssdebnath 1515*923Ssdebnath /* 1516*923Ssdebnath * ufs_allocsp() can be used to pre-allocate blocks for a file on a given 1517*923Ssdebnath * file system. The blocks are not initialized and are only marked as allocated. 1518*923Ssdebnath * These addresses are then stored as negative block numbers in the inode to 1519*923Ssdebnath * imply special handling. UFS has been modified where necessary to understand 1520*923Ssdebnath * this new notion. Successfully fallocated files will have IFALLOCATE cflag 1521*923Ssdebnath * set in the inode. 1522*923Ssdebnath */ 1523*923Ssdebnath int 1524*923Ssdebnath ufs_allocsp(struct vnode *vp, struct flock64 *lp, cred_t *cr) 1525*923Ssdebnath { 1526*923Ssdebnath struct lockfs lf; 1527*923Ssdebnath int berr, err, resv, issync; 1528*923Ssdebnath off_t start, istart, len; /* istart, special for idb */ 1529*923Ssdebnath struct inode *ip; 1530*923Ssdebnath struct fs *fs; 1531*923Ssdebnath struct ufsvfs *ufsvfsp; 1532*923Ssdebnath u_offset_t resid, i; 1533*923Ssdebnath daddr32_t db_undo[NDADDR]; /* old direct blocks */ 1534*923Ssdebnath struct allocsp_undo *ib_undo = NULL; /* ib undo */ 1535*923Ssdebnath struct allocsp_undo *undo = NULL; 1536*923Ssdebnath u_offset_t osz; /* old file size */ 1537*923Ssdebnath int chunkblks = 0; /* # of blocks in 1 allocation */ 1538*923Ssdebnath int cnt = 0; 1539*923Ssdebnath daddr_t allocblk; 1540*923Ssdebnath daddr_t totblks = 0; 1541*923Ssdebnath struct ulockfs *ulp; 1542*923Ssdebnath 1543*923Ssdebnath ASSERT(vp->v_type == VREG); 1544*923Ssdebnath 1545*923Ssdebnath ip = VTOI(vp); 1546*923Ssdebnath fs = ip->i_fs; 1547*923Ssdebnath if ((ufsvfsp = ip->i_ufsvfs) == NULL) { 1548*923Ssdebnath err = EIO; 1549*923Ssdebnath goto out_allocsp; 1550*923Ssdebnath } 1551*923Ssdebnath 1552*923Ssdebnath istart = start = blkroundup(fs, (lp->l_start)); 1553*923Ssdebnath len = blkroundup(fs, (lp->l_len)); 1554*923Ssdebnath chunkblks = blkroundup(fs, ufsvfsp->vfs_iotransz) / fs->fs_bsize; 1555*923Ssdebnath ulp = &ufsvfsp->vfs_ulockfs; 1556*923Ssdebnath 1557*923Ssdebnath if (lp->l_start < 0 || lp->l_len <= 0) 1558*923Ssdebnath return (EINVAL); 1559*923Ssdebnath 1560*923Ssdebnath /* Quickly check to make sure we have space before we proceed */ 1561*923Ssdebnath if (lblkno(fs, len) > fs->fs_cstotal.cs_nbfree) { 1562*923Ssdebnath if (TRANS_ISTRANS(ufsvfsp)) { 1563*923Ssdebnath ufs_delete_drain_wait(ufsvfsp, 1); 1564*923Ssdebnath if (lblkno(fs, len) > fs->fs_cstotal.cs_nbfree) 1565*923Ssdebnath return (ENOSPC); 1566*923Ssdebnath } else 1567*923Ssdebnath return (ENOSPC); 1568*923Ssdebnath } 1569*923Ssdebnath 1570*923Ssdebnath /* 1571*923Ssdebnath * We will keep i_rwlock locked as WRITER through out the function 1572*923Ssdebnath * since we don't want anyone else reading or writing to the inode 1573*923Ssdebnath * while we are in the middle of fallocating the file. 1574*923Ssdebnath */ 1575*923Ssdebnath rw_enter(&ip->i_rwlock, RW_WRITER); 1576*923Ssdebnath 1577*923Ssdebnath /* Back up the direct block list, used for undo later if necessary */ 1578*923Ssdebnath rw_enter(&ip->i_contents, RW_READER); 1579*923Ssdebnath for (i = 0; i < NDADDR; i++) 1580*923Ssdebnath db_undo[i] = ip->i_db[i]; 1581*923Ssdebnath osz = ip->i_size; 1582*923Ssdebnath rw_exit(&ip->i_contents); 1583*923Ssdebnath 1584*923Ssdebnath /* Allocate any direct blocks now before we write lock the fs */ 1585*923Ssdebnath if (lblkno(fs, start) < NDADDR) { 1586*923Ssdebnath ufs_trans_trunc_resv(ip, ip->i_size + (NDADDR * fs->fs_bsize), 1587*923Ssdebnath &resv, &resid); 1588*923Ssdebnath TRANS_BEGIN_CSYNC(ufsvfsp, issync, TOP_ALLOCSP, resv); 1589*923Ssdebnath 1590*923Ssdebnath rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER); 1591*923Ssdebnath rw_enter(&ip->i_contents, RW_WRITER); 1592*923Ssdebnath 1593*923Ssdebnath for (i = start; (i < len) && (lblkno(fs, i) < NDADDR); 1594*923Ssdebnath i += fs->fs_bsize) { 1595*923Ssdebnath berr = bmap_write(ip, i, fs->fs_bsize, BI_FALLOCATE, 1596*923Ssdebnath &allocblk, cr); 1597*923Ssdebnath /* Yikes error, quit */ 1598*923Ssdebnath if (berr) { 1599*923Ssdebnath TRANS_INODE(ufsvfsp, ip); 1600*923Ssdebnath rw_exit(&ip->i_contents); 1601*923Ssdebnath rw_exit(&ufsvfsp->vfs_dqrwlock); 1602*923Ssdebnath TRANS_END_CSYNC(ufsvfsp, err, issync, 1603*923Ssdebnath TOP_ALLOCSP, resv); 1604*923Ssdebnath goto exit; 1605*923Ssdebnath } 1606*923Ssdebnath 1607*923Ssdebnath if (allocblk) { 1608*923Ssdebnath totblks++; 1609*923Ssdebnath ip->i_size += fs->fs_bsize; 1610*923Ssdebnath } 1611*923Ssdebnath } 1612*923Ssdebnath 1613*923Ssdebnath TRANS_INODE(ufsvfsp, ip); 1614*923Ssdebnath rw_exit(&ip->i_contents); 1615*923Ssdebnath rw_exit(&ufsvfsp->vfs_dqrwlock); 1616*923Ssdebnath TRANS_END_CSYNC(ufsvfsp, err, issync, TOP_ALLOCSP, resv); 1617*923Ssdebnath 1618*923Ssdebnath istart = i; /* start offset for indirect allocation */ 1619*923Ssdebnath } 1620*923Ssdebnath 1621*923Ssdebnath /* Write lock the file system */ 1622*923Ssdebnath if (err = allocsp_wlockfs(vp, &lf)) 1623*923Ssdebnath goto exit; 1624*923Ssdebnath 1625*923Ssdebnath /* Break the transactions into vfs_iotransz units */ 1626*923Ssdebnath ufs_trans_trunc_resv(ip, ip->i_size + 1627*923Ssdebnath blkroundup(fs, ufsvfsp->vfs_iotransz), &resv, &resid); 1628*923Ssdebnath TRANS_BEGIN_CSYNC(ufsvfsp, issync, TOP_ALLOCSP, resv); 1629*923Ssdebnath 1630*923Ssdebnath rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER); 1631*923Ssdebnath rw_enter(&ip->i_contents, RW_WRITER); 1632*923Ssdebnath 1633*923Ssdebnath /* Now go about fallocating necessary indirect blocks */ 1634*923Ssdebnath for (i = istart; i < len; i += fs->fs_bsize) { 1635*923Ssdebnath berr = bmap_write(ip, i, fs->fs_bsize, BI_FALLOCATE, 1636*923Ssdebnath &allocblk, cr); 1637*923Ssdebnath if (berr) { 1638*923Ssdebnath TRANS_INODE(ufsvfsp, ip); 1639*923Ssdebnath rw_exit(&ip->i_contents); 1640*923Ssdebnath rw_exit(&ufsvfsp->vfs_dqrwlock); 1641*923Ssdebnath TRANS_END_CSYNC(ufsvfsp, err, issync, 1642*923Ssdebnath TOP_ALLOCSP, resv); 1643*923Ssdebnath err = allocsp_unlockfs(vp, &lf); 1644*923Ssdebnath goto exit; 1645*923Ssdebnath } 1646*923Ssdebnath 1647*923Ssdebnath /* Update the blk counter only if new block was added */ 1648*923Ssdebnath if (allocblk) { 1649*923Ssdebnath /* Save undo information */ 1650*923Ssdebnath undo = kmem_alloc(sizeof (struct allocsp_undo), 1651*923Ssdebnath KM_SLEEP); 1652*923Ssdebnath undo->offset = i; 1653*923Ssdebnath undo->blk = allocblk; 1654*923Ssdebnath undo->next = ib_undo; 1655*923Ssdebnath ib_undo = undo; 1656*923Ssdebnath totblks++; 1657*923Ssdebnath ip->i_size += fs->fs_bsize; 1658*923Ssdebnath } 1659*923Ssdebnath cnt++; 1660*923Ssdebnath 1661*923Ssdebnath /* Being a good UFS citizen, let others get a share */ 1662*923Ssdebnath if (cnt == chunkblks) { 1663*923Ssdebnath /* 1664*923Ssdebnath * If there are waiters or the fs is hard locked, 1665*923Ssdebnath * error locked, or read-only error locked, 1666*923Ssdebnath * quit with EIO 1667*923Ssdebnath */ 1668*923Ssdebnath if (ULOCKFS_IS_HLOCK(ulp) || ULOCKFS_IS_ELOCK(ulp) || 1669*923Ssdebnath ULOCKFS_IS_ROELOCK(ulp)) { 1670*923Ssdebnath ip->i_cflags |= IFALLOCATE; 1671*923Ssdebnath TRANS_INODE(ufsvfsp, ip); 1672*923Ssdebnath rw_exit(&ip->i_contents); 1673*923Ssdebnath rw_exit(&ufsvfsp->vfs_dqrwlock); 1674*923Ssdebnath 1675*923Ssdebnath TRANS_END_CSYNC(ufsvfsp, err, issync, 1676*923Ssdebnath TOP_ALLOCSP, resv); 1677*923Ssdebnath rw_exit(&ip->i_rwlock); 1678*923Ssdebnath return (EIO); 1679*923Ssdebnath } 1680*923Ssdebnath 1681*923Ssdebnath TRANS_INODE(ufsvfsp, ip); 1682*923Ssdebnath rw_exit(&ip->i_contents); 1683*923Ssdebnath rw_exit(&ufsvfsp->vfs_dqrwlock); 1684*923Ssdebnath 1685*923Ssdebnath /* End the current transaction */ 1686*923Ssdebnath TRANS_END_CSYNC(ufsvfsp, err, issync, 1687*923Ssdebnath TOP_ALLOCSP, resv); 1688*923Ssdebnath 1689*923Ssdebnath if (CV_HAS_WAITERS(&ulp->ul_cv)) { 1690*923Ssdebnath /* Release the write lock */ 1691*923Ssdebnath if (err = allocsp_unlockfs(vp, &lf)) 1692*923Ssdebnath goto exit; 1693*923Ssdebnath 1694*923Ssdebnath /* Wake up others waiting to do operations */ 1695*923Ssdebnath mutex_enter(&ulp->ul_lock); 1696*923Ssdebnath cv_broadcast(&ulp->ul_cv); 1697*923Ssdebnath mutex_exit(&ulp->ul_lock); 1698*923Ssdebnath 1699*923Ssdebnath /* Grab the write lock again */ 1700*923Ssdebnath if (err = allocsp_wlockfs(vp, &lf)) 1701*923Ssdebnath goto exit; 1702*923Ssdebnath } /* end of CV_HAS_WAITERS(&ulp->ul_cv) */ 1703*923Ssdebnath 1704*923Ssdebnath /* Reserve more space in log for this file */ 1705*923Ssdebnath ufs_trans_trunc_resv(ip, 1706*923Ssdebnath ip->i_size + blkroundup(fs, ufsvfsp->vfs_iotransz), 1707*923Ssdebnath &resv, &resid); 1708*923Ssdebnath TRANS_BEGIN_CSYNC(ufsvfsp, issync, TOP_ALLOCSP, resv); 1709*923Ssdebnath 1710*923Ssdebnath rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER); 1711*923Ssdebnath rw_enter(&ip->i_contents, RW_WRITER); 1712*923Ssdebnath 1713*923Ssdebnath cnt = 0; /* reset cnt b/c of new transaction */ 1714*923Ssdebnath } 1715*923Ssdebnath } 1716*923Ssdebnath 1717*923Ssdebnath if (!err && !berr) 1718*923Ssdebnath ip->i_cflags |= IFALLOCATE; 1719*923Ssdebnath 1720*923Ssdebnath /* Release locks, end log transaction and unlock fs */ 1721*923Ssdebnath TRANS_INODE(ufsvfsp, ip); 1722*923Ssdebnath rw_exit(&ip->i_contents); 1723*923Ssdebnath rw_exit(&ufsvfsp->vfs_dqrwlock); 1724*923Ssdebnath 1725*923Ssdebnath TRANS_END_CSYNC(ufsvfsp, err, issync, TOP_ALLOCSP, resv); 1726*923Ssdebnath err = allocsp_unlockfs(vp, &lf); 1727*923Ssdebnath 1728*923Ssdebnath /* 1729*923Ssdebnath * @ exit label, we should no longer be holding the fs write lock, and 1730*923Ssdebnath * all logging transactions should have been ended. We still hold 1731*923Ssdebnath * ip->i_rwlock. 1732*923Ssdebnath */ 1733*923Ssdebnath exit: 1734*923Ssdebnath /* 1735*923Ssdebnath * File has grown larger than 2GB. Set flag 1736*923Ssdebnath * in superblock to indicate this, if it 1737*923Ssdebnath * is not already set. 1738*923Ssdebnath */ 1739*923Ssdebnath if ((ip->i_size > MAXOFF32_T) && 1740*923Ssdebnath !(fs->fs_flags & FSLARGEFILES)) { 1741*923Ssdebnath ASSERT(ufsvfsp->vfs_lfflags & UFS_LARGEFILES); 1742*923Ssdebnath mutex_enter(&ufsvfsp->vfs_lock); 1743*923Ssdebnath fs->fs_flags |= FSLARGEFILES; 1744*923Ssdebnath ufs_sbwrite(ufsvfsp); 1745*923Ssdebnath mutex_exit(&ufsvfsp->vfs_lock); 1746*923Ssdebnath } 1747*923Ssdebnath 1748*923Ssdebnath /* 1749*923Ssdebnath * Since we couldn't allocate completely, we will undo the allocations. 1750*923Ssdebnath */ 1751*923Ssdebnath if (berr) { 1752*923Ssdebnath ufs_trans_trunc_resv(ip, totblks * fs->fs_bsize, &resv, &resid); 1753*923Ssdebnath TRANS_BEGIN_CSYNC(ufsvfsp, issync, TOP_ALLOCSP, resv); 1754*923Ssdebnath 1755*923Ssdebnath rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER); 1756*923Ssdebnath rw_enter(&ip->i_contents, RW_WRITER); 1757*923Ssdebnath 1758*923Ssdebnath /* Direct blocks */ 1759*923Ssdebnath for (i = 0; i < NDADDR; i++) { 1760*923Ssdebnath /* 1761*923Ssdebnath * Only free the block if they are not same, and 1762*923Ssdebnath * the old one isn't zero (the fragment was 1763*923Ssdebnath * re-allocated). 1764*923Ssdebnath */ 1765*923Ssdebnath if (db_undo[i] != ip->i_db[i] && db_undo[i] == 0) { 1766*923Ssdebnath free(ip, ip->i_db[i], fs->fs_bsize, 0); 1767*923Ssdebnath ip->i_db[i] = 0; 1768*923Ssdebnath } 1769*923Ssdebnath } 1770*923Ssdebnath 1771*923Ssdebnath /* Undo the indirect blocks */ 1772*923Ssdebnath while (ib_undo != NULL) { 1773*923Ssdebnath undo = ib_undo; 1774*923Ssdebnath err = bmap_set_bn(vp, undo->offset, 0); 1775*923Ssdebnath if (err) 1776*923Ssdebnath cmn_err(CE_PANIC, "ufs_allocsp(): failed to " 1777*923Ssdebnath "undo allocation of block %ld", 1778*923Ssdebnath undo->offset); 1779*923Ssdebnath free(ip, undo->blk, fs->fs_bsize, I_IBLK); 1780*923Ssdebnath ib_undo = undo->next; 1781*923Ssdebnath kmem_free(undo, sizeof (struct allocsp_undo)); 1782*923Ssdebnath } 1783*923Ssdebnath 1784*923Ssdebnath ip->i_size = osz; 1785*923Ssdebnath TRANS_INODE(ufsvfsp, ip); 1786*923Ssdebnath 1787*923Ssdebnath rw_exit(&ip->i_contents); 1788*923Ssdebnath rw_exit(&ufsvfsp->vfs_dqrwlock); 1789*923Ssdebnath 1790*923Ssdebnath TRANS_END_CSYNC(ufsvfsp, err, issync, TOP_ALLOCSP, resv); 1791*923Ssdebnath 1792*923Ssdebnath rw_exit(&ip->i_rwlock); 1793*923Ssdebnath return (berr); 1794*923Ssdebnath } 1795*923Ssdebnath 1796*923Ssdebnath /* 1797*923Ssdebnath * Don't forget to free the undo chain :) 1798*923Ssdebnath */ 1799*923Ssdebnath while (ib_undo != NULL) { 1800*923Ssdebnath undo = ib_undo; 1801*923Ssdebnath ib_undo = undo->next; 1802*923Ssdebnath kmem_free(undo, sizeof (struct allocsp_undo)); 1803*923Ssdebnath } 1804*923Ssdebnath 1805*923Ssdebnath rw_exit(&ip->i_rwlock); 1806*923Ssdebnath 1807*923Ssdebnath out_allocsp: 1808*923Ssdebnath return (err); 1809*923Ssdebnath } 1810*923Ssdebnath 1811*923Ssdebnath /* 18120Sstevel@tonic-gate * Free storage space associated with the specified inode. The portion 18130Sstevel@tonic-gate * to be freed is specified by lp->l_start and lp->l_len (already 18140Sstevel@tonic-gate * normalized to a "whence" of 0). 18150Sstevel@tonic-gate * 18160Sstevel@tonic-gate * This is an experimental facility whose continued existence is not 18170Sstevel@tonic-gate * guaranteed. Currently, we only support the special case 18180Sstevel@tonic-gate * of l_len == 0, meaning free to end of file. 18190Sstevel@tonic-gate * 18200Sstevel@tonic-gate * Blocks are freed in reverse order. This FILO algorithm will tend to 18210Sstevel@tonic-gate * maintain a contiguous free list much longer than FIFO. 18220Sstevel@tonic-gate * See also ufs_itrunc() in ufs_inode.c. 18230Sstevel@tonic-gate * 18240Sstevel@tonic-gate * Bug: unused bytes in the last retained block are not cleared. 18250Sstevel@tonic-gate * This may result in a "hole" in the file that does not read as zeroes. 18260Sstevel@tonic-gate */ 18270Sstevel@tonic-gate /* ARGSUSED */ 18280Sstevel@tonic-gate int 18290Sstevel@tonic-gate ufs_freesp(struct vnode *vp, struct flock64 *lp, int flag, cred_t *cr) 18300Sstevel@tonic-gate { 18310Sstevel@tonic-gate int i; 18320Sstevel@tonic-gate struct inode *ip = VTOI(vp); 18330Sstevel@tonic-gate int error; 18340Sstevel@tonic-gate 18350Sstevel@tonic-gate ASSERT(vp->v_type == VREG); 18360Sstevel@tonic-gate ASSERT(lp->l_start >= 0); /* checked by convoff */ 18370Sstevel@tonic-gate 18380Sstevel@tonic-gate if (lp->l_len != 0) 18390Sstevel@tonic-gate return (EINVAL); 18400Sstevel@tonic-gate 18410Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_READER); 18420Sstevel@tonic-gate if (ip->i_size == (u_offset_t)lp->l_start) { 18430Sstevel@tonic-gate rw_exit(&ip->i_contents); 18440Sstevel@tonic-gate return (0); 18450Sstevel@tonic-gate } 18460Sstevel@tonic-gate 18470Sstevel@tonic-gate /* 18480Sstevel@tonic-gate * Check if there is any active mandatory lock on the 18490Sstevel@tonic-gate * range that will be truncated/expanded. 18500Sstevel@tonic-gate */ 18510Sstevel@tonic-gate if (MANDLOCK(vp, ip->i_mode)) { 18520Sstevel@tonic-gate offset_t save_start; 18530Sstevel@tonic-gate 18540Sstevel@tonic-gate save_start = lp->l_start; 18550Sstevel@tonic-gate 18560Sstevel@tonic-gate if (ip->i_size < lp->l_start) { 18570Sstevel@tonic-gate /* 18580Sstevel@tonic-gate * "Truncate up" case: need to make sure there 18590Sstevel@tonic-gate * is no lock beyond current end-of-file. To 18600Sstevel@tonic-gate * do so, we need to set l_start to the size 18610Sstevel@tonic-gate * of the file temporarily. 18620Sstevel@tonic-gate */ 18630Sstevel@tonic-gate lp->l_start = ip->i_size; 18640Sstevel@tonic-gate } 18650Sstevel@tonic-gate lp->l_type = F_WRLCK; 18660Sstevel@tonic-gate lp->l_sysid = 0; 18670Sstevel@tonic-gate lp->l_pid = ttoproc(curthread)->p_pid; 18680Sstevel@tonic-gate i = (flag & (FNDELAY|FNONBLOCK)) ? 0 : SLPFLCK; 18690Sstevel@tonic-gate rw_exit(&ip->i_contents); 18700Sstevel@tonic-gate if ((i = reclock(vp, lp, i, 0, lp->l_start, NULL)) != 0 || 18710Sstevel@tonic-gate lp->l_type != F_UNLCK) { 18720Sstevel@tonic-gate return (i ? i : EAGAIN); 18730Sstevel@tonic-gate } 18740Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_READER); 18750Sstevel@tonic-gate 18760Sstevel@tonic-gate lp->l_start = save_start; 18770Sstevel@tonic-gate } 18780Sstevel@tonic-gate 18790Sstevel@tonic-gate /* 18800Sstevel@tonic-gate * Make sure a write isn't in progress (allocating blocks) 18810Sstevel@tonic-gate * by acquiring i_rwlock (we promised ufs_bmap we wouldn't 18820Sstevel@tonic-gate * truncate while it was allocating blocks). 18830Sstevel@tonic-gate * Grab the locks in the right order. 18840Sstevel@tonic-gate */ 18850Sstevel@tonic-gate rw_exit(&ip->i_contents); 18860Sstevel@tonic-gate rw_enter(&ip->i_rwlock, RW_WRITER); 18870Sstevel@tonic-gate error = TRANS_ITRUNC(ip, (u_offset_t)lp->l_start, 0, cr); 18880Sstevel@tonic-gate rw_exit(&ip->i_rwlock); 18890Sstevel@tonic-gate return (error); 18900Sstevel@tonic-gate } 18910Sstevel@tonic-gate 18920Sstevel@tonic-gate /* 18930Sstevel@tonic-gate * Find a cg with as close to nb contiguous bytes as possible 18940Sstevel@tonic-gate * THIS MAY TAKE MANY DISK READS! 18950Sstevel@tonic-gate * 18960Sstevel@tonic-gate * Implemented in an attempt to allocate contiguous blocks for 18970Sstevel@tonic-gate * writing the ufs log file to, minimizing future disk head seeking 18980Sstevel@tonic-gate */ 18990Sstevel@tonic-gate daddr_t 19000Sstevel@tonic-gate contigpref(ufsvfs_t *ufsvfsp, size_t nb) 19010Sstevel@tonic-gate { 19020Sstevel@tonic-gate struct fs *fs = ufsvfsp->vfs_fs; 19030Sstevel@tonic-gate daddr_t nblk = lblkno(fs, blkroundup(fs, nb)); 19040Sstevel@tonic-gate daddr_t savebno, curbno, cgbno; 19050Sstevel@tonic-gate int cg, cgblks, savecg, savenblk, curnblk; 19060Sstevel@tonic-gate uchar_t *blksfree; 19070Sstevel@tonic-gate buf_t *bp; 19080Sstevel@tonic-gate struct cg *cgp; 19090Sstevel@tonic-gate 19100Sstevel@tonic-gate savenblk = 0; 19110Sstevel@tonic-gate savecg = 0; 19120Sstevel@tonic-gate savebno = 0; 19130Sstevel@tonic-gate for (cg = 0; cg < fs->fs_ncg; ++cg) { 19140Sstevel@tonic-gate 19150Sstevel@tonic-gate /* not enough free blks for a contig check */ 19160Sstevel@tonic-gate if (fs->fs_cs(fs, cg).cs_nbfree < nblk) 19170Sstevel@tonic-gate continue; 19180Sstevel@tonic-gate 19190Sstevel@tonic-gate /* 19200Sstevel@tonic-gate * find the largest contiguous range in this cg 19210Sstevel@tonic-gate */ 19220Sstevel@tonic-gate bp = UFS_BREAD(ufsvfsp, ufsvfsp->vfs_dev, 1923*923Ssdebnath (daddr_t)fsbtodb(fs, cgtod(fs, cg)), 1924*923Ssdebnath (int)fs->fs_cgsize); 19250Sstevel@tonic-gate cgp = bp->b_un.b_cg; 19260Sstevel@tonic-gate if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp)) { 19270Sstevel@tonic-gate brelse(bp); 19280Sstevel@tonic-gate continue; 19290Sstevel@tonic-gate } 19300Sstevel@tonic-gate blksfree = cg_blksfree(cgp); /* free array */ 19310Sstevel@tonic-gate cgblks = fragstoblks(fs, fs->fs_fpg); /* blks in free array */ 19320Sstevel@tonic-gate cgbno = 0; 19330Sstevel@tonic-gate while (cgbno < cgblks && savenblk < nblk) { 19340Sstevel@tonic-gate /* find a free block */ 19350Sstevel@tonic-gate for (; cgbno < cgblks; ++cgbno) 19360Sstevel@tonic-gate if (isblock(fs, blksfree, cgbno)) 19370Sstevel@tonic-gate break; 19380Sstevel@tonic-gate curbno = cgbno; 19390Sstevel@tonic-gate /* count the number of free blocks */ 19400Sstevel@tonic-gate for (curnblk = 0; cgbno < cgblks; ++cgbno) { 19410Sstevel@tonic-gate if (!isblock(fs, blksfree, cgbno)) 19420Sstevel@tonic-gate break; 19430Sstevel@tonic-gate if (++curnblk >= nblk) 19440Sstevel@tonic-gate break; 19450Sstevel@tonic-gate } 19460Sstevel@tonic-gate if (curnblk > savenblk) { 19470Sstevel@tonic-gate savecg = cg; 19480Sstevel@tonic-gate savenblk = curnblk; 19490Sstevel@tonic-gate savebno = curbno; 19500Sstevel@tonic-gate } 19510Sstevel@tonic-gate } 19520Sstevel@tonic-gate brelse(bp); 19530Sstevel@tonic-gate if (savenblk >= nblk) 19540Sstevel@tonic-gate break; 19550Sstevel@tonic-gate } 19560Sstevel@tonic-gate 19570Sstevel@tonic-gate /* convert block offset in cg to frag offset in cg */ 19580Sstevel@tonic-gate savebno = blkstofrags(fs, savebno); 19590Sstevel@tonic-gate 19600Sstevel@tonic-gate /* convert frag offset in cg to frag offset in fs */ 19610Sstevel@tonic-gate savebno += (savecg * fs->fs_fpg); 19620Sstevel@tonic-gate 19630Sstevel@tonic-gate return (savebno); 19640Sstevel@tonic-gate } 1965