10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 54662Sfrankho * Common Development and Distribution License (the "License"). 64662Sfrankho * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 225794Svsakar * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 230Sstevel@tonic-gate * Use is subject to license terms. 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate 260Sstevel@tonic-gate /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 270Sstevel@tonic-gate /* All Rights Reserved */ 280Sstevel@tonic-gate 290Sstevel@tonic-gate /* 300Sstevel@tonic-gate * Portions of this source code were derived from Berkeley 4.3 BSD 310Sstevel@tonic-gate * under license from the Regents of the University of California. 320Sstevel@tonic-gate */ 330Sstevel@tonic-gate 340Sstevel@tonic-gate #include <sys/sysmacros.h> 350Sstevel@tonic-gate #include <sys/param.h> 360Sstevel@tonic-gate #include <sys/types.h> 370Sstevel@tonic-gate #include <sys/systm.h> 380Sstevel@tonic-gate #include <sys/t_lock.h> 390Sstevel@tonic-gate #include <sys/uio.h> 400Sstevel@tonic-gate #include <sys/kmem.h> 410Sstevel@tonic-gate #include <sys/thread.h> 420Sstevel@tonic-gate #include <sys/vfs.h> 430Sstevel@tonic-gate #include <sys/errno.h> 440Sstevel@tonic-gate #include <sys/buf.h> 450Sstevel@tonic-gate #include <sys/vnode.h> 460Sstevel@tonic-gate #include <sys/fs/ufs_trans.h> 470Sstevel@tonic-gate #include <sys/fs/ufs_inode.h> 480Sstevel@tonic-gate #include <sys/fs/ufs_fs.h> 490Sstevel@tonic-gate #include <sys/fs/ufs_fsdir.h> 500Sstevel@tonic-gate #include <sys/fs/ufs_quota.h> 510Sstevel@tonic-gate #include <sys/fs/ufs_panic.h> 520Sstevel@tonic-gate #include <sys/fs/ufs_bio.h> 530Sstevel@tonic-gate #include <sys/fs/ufs_log.h> 540Sstevel@tonic-gate #include <sys/cmn_err.h> 550Sstevel@tonic-gate #include <sys/file.h> 560Sstevel@tonic-gate #include <sys/debug.h> 570Sstevel@tonic-gate 580Sstevel@tonic-gate 590Sstevel@tonic-gate extern kmutex_t ufsvfs_mutex; 600Sstevel@tonic-gate extern struct ufsvfs *ufs_instances; 610Sstevel@tonic-gate 620Sstevel@tonic-gate /* 630Sstevel@tonic-gate * hlock any file systems w/errored logs 640Sstevel@tonic-gate */ 650Sstevel@tonic-gate int 660Sstevel@tonic-gate ufs_trans_hlock() 670Sstevel@tonic-gate { 680Sstevel@tonic-gate struct ufsvfs *ufsvfsp; 690Sstevel@tonic-gate struct lockfs lockfs; 700Sstevel@tonic-gate int error; 710Sstevel@tonic-gate int retry = 0; 720Sstevel@tonic-gate 730Sstevel@tonic-gate /* 740Sstevel@tonic-gate * find fs's that paniced or have errored logging devices 750Sstevel@tonic-gate */ 760Sstevel@tonic-gate mutex_enter(&ufsvfs_mutex); 770Sstevel@tonic-gate for (ufsvfsp = ufs_instances; ufsvfsp; ufsvfsp = ufsvfsp->vfs_next) { 780Sstevel@tonic-gate /* 790Sstevel@tonic-gate * not mounted; continue 800Sstevel@tonic-gate */ 810Sstevel@tonic-gate if ((ufsvfsp->vfs_vfs == NULL) || 820Sstevel@tonic-gate (ufsvfsp->vfs_validfs == UT_UNMOUNTED)) 830Sstevel@tonic-gate continue; 840Sstevel@tonic-gate /* 850Sstevel@tonic-gate * disallow unmounts (hlock occurs below) 860Sstevel@tonic-gate */ 870Sstevel@tonic-gate if (TRANS_ISERROR(ufsvfsp)) 880Sstevel@tonic-gate ufsvfsp->vfs_validfs = UT_HLOCKING; 890Sstevel@tonic-gate } 900Sstevel@tonic-gate mutex_exit(&ufsvfs_mutex); 910Sstevel@tonic-gate 920Sstevel@tonic-gate /* 930Sstevel@tonic-gate * hlock the fs's that paniced or have errored logging devices 940Sstevel@tonic-gate */ 950Sstevel@tonic-gate again: 960Sstevel@tonic-gate mutex_enter(&ufsvfs_mutex); 970Sstevel@tonic-gate for (ufsvfsp = ufs_instances; ufsvfsp; ufsvfsp = ufsvfsp->vfs_next) 980Sstevel@tonic-gate if (ufsvfsp->vfs_validfs == UT_HLOCKING) 990Sstevel@tonic-gate break; 1000Sstevel@tonic-gate mutex_exit(&ufsvfs_mutex); 1010Sstevel@tonic-gate if (ufsvfsp == NULL) 1020Sstevel@tonic-gate return (retry); 1030Sstevel@tonic-gate /* 1040Sstevel@tonic-gate * hlock the file system 1050Sstevel@tonic-gate */ 1060Sstevel@tonic-gate (void) ufs_fiolfss(ufsvfsp->vfs_root, &lockfs); 1070Sstevel@tonic-gate if (!LOCKFS_IS_ELOCK(&lockfs)) { 1080Sstevel@tonic-gate lockfs.lf_lock = LOCKFS_HLOCK; 1090Sstevel@tonic-gate lockfs.lf_flags = 0; 1100Sstevel@tonic-gate lockfs.lf_comlen = 0; 1110Sstevel@tonic-gate lockfs.lf_comment = NULL; 1120Sstevel@tonic-gate error = ufs_fiolfs(ufsvfsp->vfs_root, &lockfs, 0); 1130Sstevel@tonic-gate /* 1140Sstevel@tonic-gate * retry after awhile; another app currently doing lockfs 1150Sstevel@tonic-gate */ 1160Sstevel@tonic-gate if (error == EBUSY || error == EINVAL) 1170Sstevel@tonic-gate retry = 1; 1180Sstevel@tonic-gate } else { 1190Sstevel@tonic-gate if (ufsfx_get_failure_qlen() > 0) { 1200Sstevel@tonic-gate if (mutex_tryenter(&ufs_fix.uq_mutex)) { 1210Sstevel@tonic-gate ufs_fix.uq_lowat = ufs_fix.uq_ne; 1220Sstevel@tonic-gate cv_broadcast(&ufs_fix.uq_cv); 1230Sstevel@tonic-gate mutex_exit(&ufs_fix.uq_mutex); 1240Sstevel@tonic-gate } 1250Sstevel@tonic-gate } 1260Sstevel@tonic-gate retry = 1; 1270Sstevel@tonic-gate } 1280Sstevel@tonic-gate 1290Sstevel@tonic-gate /* 1300Sstevel@tonic-gate * allow unmounts 1310Sstevel@tonic-gate */ 1320Sstevel@tonic-gate ufsvfsp->vfs_validfs = UT_MOUNTED; 1330Sstevel@tonic-gate goto again; 1340Sstevel@tonic-gate } 1350Sstevel@tonic-gate 1360Sstevel@tonic-gate /*ARGSUSED*/ 1370Sstevel@tonic-gate void 1380Sstevel@tonic-gate ufs_trans_onerror() 1390Sstevel@tonic-gate { 1400Sstevel@tonic-gate mutex_enter(&ufs_hlock.uq_mutex); 1410Sstevel@tonic-gate ufs_hlock.uq_ne = ufs_hlock.uq_lowat; 1420Sstevel@tonic-gate cv_broadcast(&ufs_hlock.uq_cv); 1430Sstevel@tonic-gate mutex_exit(&ufs_hlock.uq_mutex); 1440Sstevel@tonic-gate } 1450Sstevel@tonic-gate 1460Sstevel@tonic-gate void 1470Sstevel@tonic-gate ufs_trans_sbupdate(struct ufsvfs *ufsvfsp, struct vfs *vfsp, top_t topid) 1480Sstevel@tonic-gate { 1490Sstevel@tonic-gate if (curthread->t_flag & T_DONTBLOCK) { 1500Sstevel@tonic-gate sbupdate(vfsp); 1510Sstevel@tonic-gate return; 1520Sstevel@tonic-gate } else { 1530Sstevel@tonic-gate 1540Sstevel@tonic-gate if (panicstr && TRANS_ISTRANS(ufsvfsp)) 1550Sstevel@tonic-gate return; 1560Sstevel@tonic-gate 1570Sstevel@tonic-gate curthread->t_flag |= T_DONTBLOCK; 1580Sstevel@tonic-gate TRANS_BEGIN_ASYNC(ufsvfsp, topid, TOP_SBUPDATE_SIZE); 1590Sstevel@tonic-gate sbupdate(vfsp); 1600Sstevel@tonic-gate TRANS_END_ASYNC(ufsvfsp, topid, TOP_SBUPDATE_SIZE); 1610Sstevel@tonic-gate curthread->t_flag &= ~T_DONTBLOCK; 1620Sstevel@tonic-gate } 1630Sstevel@tonic-gate } 1640Sstevel@tonic-gate 1650Sstevel@tonic-gate void 1660Sstevel@tonic-gate ufs_trans_iupdat(struct inode *ip, int waitfor) 1670Sstevel@tonic-gate { 1680Sstevel@tonic-gate struct ufsvfs *ufsvfsp; 1690Sstevel@tonic-gate 1700Sstevel@tonic-gate if (curthread->t_flag & T_DONTBLOCK) { 1710Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_READER); 1720Sstevel@tonic-gate ufs_iupdat(ip, waitfor); 1730Sstevel@tonic-gate rw_exit(&ip->i_contents); 1740Sstevel@tonic-gate return; 1750Sstevel@tonic-gate } else { 1760Sstevel@tonic-gate ufsvfsp = ip->i_ufsvfs; 1770Sstevel@tonic-gate 1780Sstevel@tonic-gate if (panicstr && TRANS_ISTRANS(ufsvfsp)) 1790Sstevel@tonic-gate return; 1800Sstevel@tonic-gate 1810Sstevel@tonic-gate curthread->t_flag |= T_DONTBLOCK; 1820Sstevel@tonic-gate TRANS_BEGIN_ASYNC(ufsvfsp, TOP_IUPDAT, TOP_IUPDAT_SIZE(ip)); 1830Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_READER); 1840Sstevel@tonic-gate ufs_iupdat(ip, waitfor); 1850Sstevel@tonic-gate rw_exit(&ip->i_contents); 1860Sstevel@tonic-gate TRANS_END_ASYNC(ufsvfsp, TOP_IUPDAT, TOP_IUPDAT_SIZE(ip)); 1870Sstevel@tonic-gate curthread->t_flag &= ~T_DONTBLOCK; 1880Sstevel@tonic-gate } 1890Sstevel@tonic-gate } 1900Sstevel@tonic-gate 1910Sstevel@tonic-gate void 1920Sstevel@tonic-gate ufs_trans_sbwrite(struct ufsvfs *ufsvfsp, top_t topid) 1930Sstevel@tonic-gate { 1940Sstevel@tonic-gate if (curthread->t_flag & T_DONTBLOCK) { 1950Sstevel@tonic-gate mutex_enter(&ufsvfsp->vfs_lock); 1960Sstevel@tonic-gate ufs_sbwrite(ufsvfsp); 1970Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 1980Sstevel@tonic-gate return; 1990Sstevel@tonic-gate } else { 2000Sstevel@tonic-gate 2010Sstevel@tonic-gate if (panicstr && TRANS_ISTRANS(ufsvfsp)) 2020Sstevel@tonic-gate return; 2030Sstevel@tonic-gate 2040Sstevel@tonic-gate curthread->t_flag |= T_DONTBLOCK; 2050Sstevel@tonic-gate TRANS_BEGIN_ASYNC(ufsvfsp, topid, TOP_SBWRITE_SIZE); 2060Sstevel@tonic-gate mutex_enter(&ufsvfsp->vfs_lock); 2070Sstevel@tonic-gate ufs_sbwrite(ufsvfsp); 2080Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 2090Sstevel@tonic-gate TRANS_END_ASYNC(ufsvfsp, topid, TOP_SBWRITE_SIZE); 2100Sstevel@tonic-gate curthread->t_flag &= ~T_DONTBLOCK; 2110Sstevel@tonic-gate } 2120Sstevel@tonic-gate } 2130Sstevel@tonic-gate 2140Sstevel@tonic-gate /*ARGSUSED*/ 2150Sstevel@tonic-gate int 2160Sstevel@tonic-gate ufs_trans_push_si(ufsvfs_t *ufsvfsp, delta_t dtyp, int ignore) 2170Sstevel@tonic-gate { 2180Sstevel@tonic-gate struct fs *fs; 2190Sstevel@tonic-gate 2200Sstevel@tonic-gate fs = ufsvfsp->vfs_fs; 2210Sstevel@tonic-gate mutex_enter(&ufsvfsp->vfs_lock); 2220Sstevel@tonic-gate TRANS_LOG(ufsvfsp, (char *)fs->fs_u.fs_csp, 2234662Sfrankho ldbtob(fsbtodb(fs, fs->fs_csaddr)), fs->fs_cssize, 2244662Sfrankho (caddr_t)fs->fs_u.fs_csp, fs->fs_cssize); 2250Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 2260Sstevel@tonic-gate return (0); 2270Sstevel@tonic-gate } 2280Sstevel@tonic-gate 2290Sstevel@tonic-gate /*ARGSUSED*/ 2300Sstevel@tonic-gate int 2310Sstevel@tonic-gate ufs_trans_push_buf(ufsvfs_t *ufsvfsp, delta_t dtyp, daddr_t bno) 2320Sstevel@tonic-gate { 2330Sstevel@tonic-gate struct buf *bp; 2340Sstevel@tonic-gate 2350Sstevel@tonic-gate bp = (struct buf *)UFS_GETBLK(ufsvfsp, ufsvfsp->vfs_dev, bno, 1); 2360Sstevel@tonic-gate if (bp == NULL) 2370Sstevel@tonic-gate return (ENOENT); 2380Sstevel@tonic-gate 2390Sstevel@tonic-gate if (bp->b_flags & B_DELWRI) { 2400Sstevel@tonic-gate /* 2410Sstevel@tonic-gate * Do not use brwrite() here since the buffer is already 2420Sstevel@tonic-gate * marked for retry or not by the code that called 2430Sstevel@tonic-gate * TRANS_BUF(). 2440Sstevel@tonic-gate */ 2450Sstevel@tonic-gate UFS_BWRITE(ufsvfsp, bp); 2460Sstevel@tonic-gate return (0); 2470Sstevel@tonic-gate } 2480Sstevel@tonic-gate /* 2490Sstevel@tonic-gate * If we did not find the real buf for this block above then 2500Sstevel@tonic-gate * clear the dev so the buf won't be found by mistake 2510Sstevel@tonic-gate * for this block later. We had to allocate at least a 1 byte 2520Sstevel@tonic-gate * buffer to keep brelse happy. 2530Sstevel@tonic-gate */ 2540Sstevel@tonic-gate if (bp->b_bufsize == 1) { 2550Sstevel@tonic-gate bp->b_dev = (o_dev_t)NODEV; 2560Sstevel@tonic-gate bp->b_edev = NODEV; 2570Sstevel@tonic-gate bp->b_flags = 0; 2580Sstevel@tonic-gate } 2590Sstevel@tonic-gate brelse(bp); 2600Sstevel@tonic-gate return (ENOENT); 2610Sstevel@tonic-gate } 2620Sstevel@tonic-gate 2630Sstevel@tonic-gate /*ARGSUSED*/ 2640Sstevel@tonic-gate int 2650Sstevel@tonic-gate ufs_trans_push_inode(ufsvfs_t *ufsvfsp, delta_t dtyp, ino_t ino) 2660Sstevel@tonic-gate { 2670Sstevel@tonic-gate int error; 2680Sstevel@tonic-gate struct inode *ip; 2690Sstevel@tonic-gate 2700Sstevel@tonic-gate /* 2710Sstevel@tonic-gate * Grab the quota lock (if the file system has not been forcibly 2720Sstevel@tonic-gate * unmounted). 2730Sstevel@tonic-gate */ 2740Sstevel@tonic-gate if (ufsvfsp) 2750Sstevel@tonic-gate rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER); 2760Sstevel@tonic-gate 2770Sstevel@tonic-gate error = ufs_iget(ufsvfsp->vfs_vfs, ino, &ip, kcred); 2780Sstevel@tonic-gate 2790Sstevel@tonic-gate if (ufsvfsp) 2800Sstevel@tonic-gate rw_exit(&ufsvfsp->vfs_dqrwlock); 2810Sstevel@tonic-gate if (error) 2820Sstevel@tonic-gate return (ENOENT); 2830Sstevel@tonic-gate 2840Sstevel@tonic-gate if (ip->i_flag & (IUPD|IACC|ICHG|IMOD|IMODACC|IATTCHG)) { 2850Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_READER); 2860Sstevel@tonic-gate ufs_iupdat(ip, 1); 2870Sstevel@tonic-gate rw_exit(&ip->i_contents); 2880Sstevel@tonic-gate VN_RELE(ITOV(ip)); 2890Sstevel@tonic-gate return (0); 2900Sstevel@tonic-gate } 2910Sstevel@tonic-gate VN_RELE(ITOV(ip)); 2920Sstevel@tonic-gate return (ENOENT); 2930Sstevel@tonic-gate } 2940Sstevel@tonic-gate 2950Sstevel@tonic-gate #ifdef DEBUG 2960Sstevel@tonic-gate /* 2970Sstevel@tonic-gate * These routines maintain the metadata map (matamap) 2980Sstevel@tonic-gate */ 2990Sstevel@tonic-gate 3000Sstevel@tonic-gate /* 3010Sstevel@tonic-gate * update the metadata map at mount 3020Sstevel@tonic-gate */ 3030Sstevel@tonic-gate static int 3040Sstevel@tonic-gate ufs_trans_mata_mount_scan(struct inode *ip, void *arg) 3050Sstevel@tonic-gate { 3060Sstevel@tonic-gate /* 3070Sstevel@tonic-gate * wrong file system; keep looking 3080Sstevel@tonic-gate */ 3090Sstevel@tonic-gate if (ip->i_ufsvfs != (struct ufsvfs *)arg) 3100Sstevel@tonic-gate return (0); 3110Sstevel@tonic-gate 3120Sstevel@tonic-gate /* 3130Sstevel@tonic-gate * load the metadata map 3140Sstevel@tonic-gate */ 3150Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_WRITER); 3160Sstevel@tonic-gate ufs_trans_mata_iget(ip); 3170Sstevel@tonic-gate rw_exit(&ip->i_contents); 3180Sstevel@tonic-gate return (0); 3190Sstevel@tonic-gate } 3200Sstevel@tonic-gate 3210Sstevel@tonic-gate void 3220Sstevel@tonic-gate ufs_trans_mata_mount(struct ufsvfs *ufsvfsp) 3230Sstevel@tonic-gate { 3240Sstevel@tonic-gate struct fs *fs = ufsvfsp->vfs_fs; 3250Sstevel@tonic-gate ino_t ino; 3260Sstevel@tonic-gate int i; 3270Sstevel@tonic-gate 3280Sstevel@tonic-gate /* 3290Sstevel@tonic-gate * put static metadata into matamap 3300Sstevel@tonic-gate * superblock 3310Sstevel@tonic-gate * cylinder groups 3320Sstevel@tonic-gate * inode groups 3330Sstevel@tonic-gate * existing inodes 3340Sstevel@tonic-gate */ 3350Sstevel@tonic-gate TRANS_MATAADD(ufsvfsp, ldbtob(SBLOCK), fs->fs_sbsize); 3360Sstevel@tonic-gate 3370Sstevel@tonic-gate for (ino = i = 0; i < fs->fs_ncg; ++i, ino += fs->fs_ipg) { 3380Sstevel@tonic-gate TRANS_MATAADD(ufsvfsp, 3390Sstevel@tonic-gate ldbtob(fsbtodb(fs, cgtod(fs, i))), fs->fs_cgsize); 3400Sstevel@tonic-gate TRANS_MATAADD(ufsvfsp, 3410Sstevel@tonic-gate ldbtob(fsbtodb(fs, itod(fs, ino))), 3420Sstevel@tonic-gate fs->fs_ipg * sizeof (struct dinode)); 3430Sstevel@tonic-gate } 3440Sstevel@tonic-gate (void) ufs_scan_inodes(0, ufs_trans_mata_mount_scan, ufsvfsp, ufsvfsp); 3450Sstevel@tonic-gate } 3460Sstevel@tonic-gate 3470Sstevel@tonic-gate /* 3480Sstevel@tonic-gate * clear the metadata map at umount 3490Sstevel@tonic-gate */ 3500Sstevel@tonic-gate void 3510Sstevel@tonic-gate ufs_trans_mata_umount(struct ufsvfs *ufsvfsp) 3520Sstevel@tonic-gate { 3530Sstevel@tonic-gate top_mataclr(ufsvfsp); 3540Sstevel@tonic-gate } 3550Sstevel@tonic-gate 3560Sstevel@tonic-gate /* 3570Sstevel@tonic-gate * summary info (may be extended during growfs test) 3580Sstevel@tonic-gate */ 3590Sstevel@tonic-gate void 3600Sstevel@tonic-gate ufs_trans_mata_si(struct ufsvfs *ufsvfsp, struct fs *fs) 3610Sstevel@tonic-gate { 3620Sstevel@tonic-gate TRANS_MATAADD(ufsvfsp, ldbtob(fsbtodb(fs, fs->fs_csaddr)), 3634662Sfrankho fs->fs_cssize); 3640Sstevel@tonic-gate } 3650Sstevel@tonic-gate 3660Sstevel@tonic-gate /* 3670Sstevel@tonic-gate * scan an allocation block (either inode or true block) 3680Sstevel@tonic-gate */ 3690Sstevel@tonic-gate static void 3700Sstevel@tonic-gate ufs_trans_mata_direct( 3710Sstevel@tonic-gate struct inode *ip, 3720Sstevel@tonic-gate daddr_t *fragsp, 3730Sstevel@tonic-gate daddr32_t *blkp, 3740Sstevel@tonic-gate unsigned int nblk) 3750Sstevel@tonic-gate { 3760Sstevel@tonic-gate int i; 3770Sstevel@tonic-gate daddr_t frag; 3780Sstevel@tonic-gate ulong_t nb; 3790Sstevel@tonic-gate struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 3800Sstevel@tonic-gate struct fs *fs = ufsvfsp->vfs_fs; 3810Sstevel@tonic-gate 3820Sstevel@tonic-gate for (i = 0; i < nblk && *fragsp; ++i, ++blkp) 3830Sstevel@tonic-gate if ((frag = *blkp) != 0) { 3840Sstevel@tonic-gate if (*fragsp > fs->fs_frag) { 3850Sstevel@tonic-gate nb = fs->fs_bsize; 3860Sstevel@tonic-gate *fragsp -= fs->fs_frag; 3870Sstevel@tonic-gate } else { 3880Sstevel@tonic-gate nb = *fragsp * fs->fs_fsize; 3890Sstevel@tonic-gate *fragsp = 0; 3900Sstevel@tonic-gate } 3910Sstevel@tonic-gate TRANS_MATAADD(ufsvfsp, ldbtob(fsbtodb(fs, frag)), nb); 3920Sstevel@tonic-gate } 3930Sstevel@tonic-gate } 3940Sstevel@tonic-gate 3950Sstevel@tonic-gate /* 3960Sstevel@tonic-gate * scan an indirect allocation block (either inode or true block) 3970Sstevel@tonic-gate */ 3980Sstevel@tonic-gate static void 3990Sstevel@tonic-gate ufs_trans_mata_indir( 4000Sstevel@tonic-gate struct inode *ip, 4010Sstevel@tonic-gate daddr_t *fragsp, 4020Sstevel@tonic-gate daddr_t frag, 4030Sstevel@tonic-gate int level) 4040Sstevel@tonic-gate { 4050Sstevel@tonic-gate struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 4060Sstevel@tonic-gate struct fs *fs = ufsvfsp->vfs_fs; 4070Sstevel@tonic-gate int ne = fs->fs_bsize / (int)sizeof (daddr32_t); 4080Sstevel@tonic-gate int i; 4090Sstevel@tonic-gate struct buf *bp; 4100Sstevel@tonic-gate daddr32_t *blkp; 4110Sstevel@tonic-gate o_mode_t ifmt = ip->i_mode & IFMT; 4120Sstevel@tonic-gate 4130Sstevel@tonic-gate bp = UFS_BREAD(ufsvfsp, ip->i_dev, fsbtodb(fs, frag), fs->fs_bsize); 4140Sstevel@tonic-gate if (bp->b_flags & B_ERROR) { 4150Sstevel@tonic-gate brelse(bp); 4160Sstevel@tonic-gate return; 4170Sstevel@tonic-gate } 4180Sstevel@tonic-gate blkp = bp->b_un.b_daddr; 4190Sstevel@tonic-gate 4200Sstevel@tonic-gate if (level || (ifmt == IFDIR) || (ifmt == IFSHAD) || 4210Sstevel@tonic-gate (ifmt == IFATTRDIR) || (ip == ip->i_ufsvfs->vfs_qinod)) 4220Sstevel@tonic-gate ufs_trans_mata_direct(ip, fragsp, blkp, ne); 4230Sstevel@tonic-gate 4240Sstevel@tonic-gate if (level) 4250Sstevel@tonic-gate for (i = 0; i < ne && *fragsp; ++i, ++blkp) 4260Sstevel@tonic-gate ufs_trans_mata_indir(ip, fragsp, *blkp, level-1); 4270Sstevel@tonic-gate brelse(bp); 4280Sstevel@tonic-gate } 4290Sstevel@tonic-gate 4300Sstevel@tonic-gate /* 4310Sstevel@tonic-gate * put appropriate metadata into matamap for this inode 4320Sstevel@tonic-gate */ 4330Sstevel@tonic-gate void 4340Sstevel@tonic-gate ufs_trans_mata_iget(struct inode *ip) 4350Sstevel@tonic-gate { 4360Sstevel@tonic-gate int i; 4370Sstevel@tonic-gate daddr_t frags = dbtofsb(ip->i_fs, ip->i_blocks); 4380Sstevel@tonic-gate o_mode_t ifmt = ip->i_mode & IFMT; 4390Sstevel@tonic-gate 4400Sstevel@tonic-gate if (frags && ((ifmt == IFDIR) || (ifmt == IFSHAD) || 4410Sstevel@tonic-gate (ifmt == IFATTRDIR) || (ip == ip->i_ufsvfs->vfs_qinod))) 4420Sstevel@tonic-gate ufs_trans_mata_direct(ip, &frags, &ip->i_db[0], NDADDR); 4430Sstevel@tonic-gate 4440Sstevel@tonic-gate if (frags) 4450Sstevel@tonic-gate ufs_trans_mata_direct(ip, &frags, &ip->i_ib[0], NIADDR); 4460Sstevel@tonic-gate 4470Sstevel@tonic-gate for (i = 0; i < NIADDR && frags; ++i) 4480Sstevel@tonic-gate if (ip->i_ib[i]) 4490Sstevel@tonic-gate ufs_trans_mata_indir(ip, &frags, ip->i_ib[i], i); 4500Sstevel@tonic-gate } 4510Sstevel@tonic-gate 4520Sstevel@tonic-gate /* 4530Sstevel@tonic-gate * freeing possible metadata (block of user data) 4540Sstevel@tonic-gate */ 4550Sstevel@tonic-gate void 4560Sstevel@tonic-gate ufs_trans_mata_free(struct ufsvfs *ufsvfsp, offset_t mof, off_t nb) 4570Sstevel@tonic-gate { 4580Sstevel@tonic-gate top_matadel(ufsvfsp, mof, nb); 4590Sstevel@tonic-gate 4600Sstevel@tonic-gate } 4610Sstevel@tonic-gate 4620Sstevel@tonic-gate /* 4630Sstevel@tonic-gate * allocating metadata 4640Sstevel@tonic-gate */ 4650Sstevel@tonic-gate void 4660Sstevel@tonic-gate ufs_trans_mata_alloc( 4670Sstevel@tonic-gate struct ufsvfs *ufsvfsp, 4680Sstevel@tonic-gate struct inode *ip, 4690Sstevel@tonic-gate daddr_t frag, 4700Sstevel@tonic-gate ulong_t nb, 4710Sstevel@tonic-gate int indir) 4720Sstevel@tonic-gate { 4730Sstevel@tonic-gate struct fs *fs = ufsvfsp->vfs_fs; 4740Sstevel@tonic-gate o_mode_t ifmt = ip->i_mode & IFMT; 4750Sstevel@tonic-gate 4760Sstevel@tonic-gate if (indir || ((ifmt == IFDIR) || (ifmt == IFSHAD) || 4770Sstevel@tonic-gate (ifmt == IFATTRDIR) || (ip == ip->i_ufsvfs->vfs_qinod))) 4780Sstevel@tonic-gate TRANS_MATAADD(ufsvfsp, ldbtob(fsbtodb(fs, frag)), nb); 4790Sstevel@tonic-gate } 4800Sstevel@tonic-gate 4810Sstevel@tonic-gate #endif /* DEBUG */ 4820Sstevel@tonic-gate 4830Sstevel@tonic-gate /* 4840Sstevel@tonic-gate * ufs_trans_dir is used to declare a directory delta 4850Sstevel@tonic-gate */ 4860Sstevel@tonic-gate int 4870Sstevel@tonic-gate ufs_trans_dir(struct inode *ip, off_t offset) 4880Sstevel@tonic-gate { 4890Sstevel@tonic-gate daddr_t bn; 4900Sstevel@tonic-gate int contig = 0, error; 4910Sstevel@tonic-gate 4920Sstevel@tonic-gate ASSERT(ip); 4930Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&ip->i_contents)); 4940Sstevel@tonic-gate error = bmap_read(ip, (u_offset_t)offset, &bn, &contig); 4950Sstevel@tonic-gate if (error || (bn == UFS_HOLE)) { 4960Sstevel@tonic-gate cmn_err(CE_WARN, "ufs_trans_dir - could not get block" 4970Sstevel@tonic-gate " number error = %d bn = %d\n", error, (int)bn); 4980Sstevel@tonic-gate if (error == 0) /* treat UFS_HOLE as an I/O error */ 4990Sstevel@tonic-gate error = EIO; 5000Sstevel@tonic-gate return (error); 5010Sstevel@tonic-gate } 5020Sstevel@tonic-gate TRANS_DELTA(ip->i_ufsvfs, ldbtob(bn), DIRBLKSIZ, DT_DIR, 0, 0); 5030Sstevel@tonic-gate return (error); 5040Sstevel@tonic-gate } 5050Sstevel@tonic-gate 5060Sstevel@tonic-gate /*ARGSUSED*/ 5070Sstevel@tonic-gate int 5080Sstevel@tonic-gate ufs_trans_push_quota(ufsvfs_t *ufsvfsp, delta_t dtyp, struct dquot *dqp) 5090Sstevel@tonic-gate { 5100Sstevel@tonic-gate /* 5110Sstevel@tonic-gate * Lock the quota subsystem (ufsvfsp can be NULL 5120Sstevel@tonic-gate * if the DQ_ERROR is set). 5130Sstevel@tonic-gate */ 5140Sstevel@tonic-gate if (ufsvfsp) 5150Sstevel@tonic-gate rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER); 5160Sstevel@tonic-gate mutex_enter(&dqp->dq_lock); 5170Sstevel@tonic-gate 5180Sstevel@tonic-gate /* 5190Sstevel@tonic-gate * If this transaction has been cancelled by closedq_scan_inode(), 5200Sstevel@tonic-gate * then bail out now. We don't call dqput() in this case because 5210Sstevel@tonic-gate * it has already been done. 5220Sstevel@tonic-gate */ 5230Sstevel@tonic-gate if ((dqp->dq_flags & DQ_TRANS) == 0) { 5240Sstevel@tonic-gate mutex_exit(&dqp->dq_lock); 5250Sstevel@tonic-gate if (ufsvfsp) 5260Sstevel@tonic-gate rw_exit(&ufsvfsp->vfs_dqrwlock); 5270Sstevel@tonic-gate return (0); 5280Sstevel@tonic-gate } 5290Sstevel@tonic-gate 5300Sstevel@tonic-gate if (dqp->dq_flags & DQ_ERROR) { 5310Sstevel@tonic-gate /* 5320Sstevel@tonic-gate * Paranoia to make sure that there is at least one 5330Sstevel@tonic-gate * reference to the dquot struct. We are done with 5340Sstevel@tonic-gate * the dquot (due to an error) so clear logging 5350Sstevel@tonic-gate * specific markers. 5360Sstevel@tonic-gate */ 5370Sstevel@tonic-gate ASSERT(dqp->dq_cnt >= 1); 5380Sstevel@tonic-gate dqp->dq_flags &= ~DQ_TRANS; 5390Sstevel@tonic-gate dqput(dqp); 5400Sstevel@tonic-gate mutex_exit(&dqp->dq_lock); 5410Sstevel@tonic-gate if (ufsvfsp) 5420Sstevel@tonic-gate rw_exit(&ufsvfsp->vfs_dqrwlock); 5430Sstevel@tonic-gate return (1); 5440Sstevel@tonic-gate } 5450Sstevel@tonic-gate 5460Sstevel@tonic-gate if (dqp->dq_flags & (DQ_MOD | DQ_BLKS | DQ_FILES)) { 5470Sstevel@tonic-gate ASSERT((dqp->dq_mof != UFS_HOLE) && (dqp->dq_mof != 0)); 5480Sstevel@tonic-gate TRANS_LOG(ufsvfsp, (caddr_t)&dqp->dq_dqb, 5490Sstevel@tonic-gate dqp->dq_mof, (int)sizeof (struct dqblk), NULL, 0); 5500Sstevel@tonic-gate /* 5510Sstevel@tonic-gate * Paranoia to make sure that there is at least one 5520Sstevel@tonic-gate * reference to the dquot struct. Clear the 5530Sstevel@tonic-gate * modification flag because the operation is now in 5540Sstevel@tonic-gate * the log. Also clear the logging specific markers 5550Sstevel@tonic-gate * that were set in ufs_trans_quota(). 5560Sstevel@tonic-gate */ 5570Sstevel@tonic-gate ASSERT(dqp->dq_cnt >= 1); 5580Sstevel@tonic-gate dqp->dq_flags &= ~(DQ_MOD | DQ_TRANS); 5590Sstevel@tonic-gate dqput(dqp); 5600Sstevel@tonic-gate } 5610Sstevel@tonic-gate 5620Sstevel@tonic-gate /* 5630Sstevel@tonic-gate * At this point, the logging specific flag should be clear, 5640Sstevel@tonic-gate * but add paranoia just in case something has gone wrong. 5650Sstevel@tonic-gate */ 5660Sstevel@tonic-gate ASSERT((dqp->dq_flags & DQ_TRANS) == 0); 5670Sstevel@tonic-gate mutex_exit(&dqp->dq_lock); 5680Sstevel@tonic-gate if (ufsvfsp) 5690Sstevel@tonic-gate rw_exit(&ufsvfsp->vfs_dqrwlock); 5700Sstevel@tonic-gate return (0); 5710Sstevel@tonic-gate } 5720Sstevel@tonic-gate 5730Sstevel@tonic-gate /* 5740Sstevel@tonic-gate * ufs_trans_quota take in a uid, allocates the disk space, placing the 5750Sstevel@tonic-gate * quota record into the metamap, then declares the delta. 5760Sstevel@tonic-gate */ 5770Sstevel@tonic-gate /*ARGSUSED*/ 5780Sstevel@tonic-gate void 5790Sstevel@tonic-gate ufs_trans_quota(struct dquot *dqp) 5800Sstevel@tonic-gate { 5810Sstevel@tonic-gate 5820Sstevel@tonic-gate struct inode *qip = dqp->dq_ufsvfsp->vfs_qinod; 5830Sstevel@tonic-gate 5840Sstevel@tonic-gate ASSERT(qip); 5850Sstevel@tonic-gate ASSERT(MUTEX_HELD(&dqp->dq_lock)); 5860Sstevel@tonic-gate ASSERT(dqp->dq_flags & DQ_MOD); 5870Sstevel@tonic-gate ASSERT(dqp->dq_mof != 0); 5880Sstevel@tonic-gate ASSERT(dqp->dq_mof != UFS_HOLE); 5890Sstevel@tonic-gate 5900Sstevel@tonic-gate /* 5910Sstevel@tonic-gate * Mark this dquot to indicate that we are starting a logging 5920Sstevel@tonic-gate * file system operation for this dquot. Also increment the 5930Sstevel@tonic-gate * reference count so that the dquot does not get reused while 5940Sstevel@tonic-gate * it is on the mapentry_t list. DQ_TRANS is cleared and the 5950Sstevel@tonic-gate * reference count is decremented by ufs_trans_push_quota. 5960Sstevel@tonic-gate * 5970Sstevel@tonic-gate * If the file system is force-unmounted while there is a 5980Sstevel@tonic-gate * pending quota transaction, then closedq_scan_inode() will 5990Sstevel@tonic-gate * clear the DQ_TRANS flag and decrement the reference count. 6000Sstevel@tonic-gate * 6010Sstevel@tonic-gate * Since deltamap_add() drops multiple transactions to the 6020Sstevel@tonic-gate * same dq_mof and ufs_trans_push_quota() won't get called, 6030Sstevel@tonic-gate * we use DQ_TRANS to prevent repeat transactions from 6040Sstevel@tonic-gate * incrementing the reference count (or calling TRANS_DELTA()). 6050Sstevel@tonic-gate */ 6060Sstevel@tonic-gate if ((dqp->dq_flags & DQ_TRANS) == 0) { 6070Sstevel@tonic-gate dqp->dq_flags |= DQ_TRANS; 6080Sstevel@tonic-gate dqp->dq_cnt++; 6090Sstevel@tonic-gate TRANS_DELTA(qip->i_ufsvfs, dqp->dq_mof, sizeof (struct dqblk), 6100Sstevel@tonic-gate DT_QR, ufs_trans_push_quota, (ulong_t)dqp); 6110Sstevel@tonic-gate } 6120Sstevel@tonic-gate } 6130Sstevel@tonic-gate 6140Sstevel@tonic-gate void 6150Sstevel@tonic-gate ufs_trans_dqrele(struct dquot *dqp) 6160Sstevel@tonic-gate { 6170Sstevel@tonic-gate struct ufsvfs *ufsvfsp = dqp->dq_ufsvfsp; 6180Sstevel@tonic-gate 6190Sstevel@tonic-gate curthread->t_flag |= T_DONTBLOCK; 6200Sstevel@tonic-gate TRANS_BEGIN_ASYNC(ufsvfsp, TOP_QUOTA, TOP_QUOTA_SIZE); 6210Sstevel@tonic-gate rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER); 6220Sstevel@tonic-gate dqrele(dqp); 6230Sstevel@tonic-gate rw_exit(&ufsvfsp->vfs_dqrwlock); 6240Sstevel@tonic-gate TRANS_END_ASYNC(ufsvfsp, TOP_QUOTA, TOP_QUOTA_SIZE); 6250Sstevel@tonic-gate curthread->t_flag &= ~T_DONTBLOCK; 6260Sstevel@tonic-gate } 6270Sstevel@tonic-gate 6280Sstevel@tonic-gate int ufs_trans_max_resv = TOP_MAX_RESV; /* will be adjusted for testing */ 6290Sstevel@tonic-gate long ufs_trans_avgbfree = 0; /* will be adjusted for testing */ 6300Sstevel@tonic-gate #define TRANS_MAX_WRITE (1024 * 1024) 6310Sstevel@tonic-gate size_t ufs_trans_max_resid = TRANS_MAX_WRITE; 6320Sstevel@tonic-gate 6330Sstevel@tonic-gate /* 6340Sstevel@tonic-gate * Calculate the log reservation for the given write or truncate 6350Sstevel@tonic-gate */ 6360Sstevel@tonic-gate static ulong_t 6370Sstevel@tonic-gate ufs_log_amt(struct inode *ip, offset_t offset, ssize_t resid, int trunc) 6380Sstevel@tonic-gate { 6390Sstevel@tonic-gate long ncg, last2blk; 6400Sstevel@tonic-gate long niblk = 0; 6410Sstevel@tonic-gate u_offset_t writeend, offblk; 6420Sstevel@tonic-gate int resv; 6430Sstevel@tonic-gate daddr_t nblk, maxfblk; 6440Sstevel@tonic-gate long avgbfree; 6450Sstevel@tonic-gate struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 6460Sstevel@tonic-gate struct fs *fs = ufsvfsp->vfs_fs; 6470Sstevel@tonic-gate long fni = NINDIR(fs); 6480Sstevel@tonic-gate int bsize = fs->fs_bsize; 6490Sstevel@tonic-gate 6500Sstevel@tonic-gate /* 6510Sstevel@tonic-gate * Assume that the request will fit in 1 or 2 cg's, 6520Sstevel@tonic-gate * resv is the amount of log space to reserve (in bytes). 6530Sstevel@tonic-gate */ 6540Sstevel@tonic-gate resv = SIZECG(ip) * 2 + INODESIZE + 1024; 6550Sstevel@tonic-gate 6560Sstevel@tonic-gate /* 6570Sstevel@tonic-gate * get max position of write in fs blocks 6580Sstevel@tonic-gate */ 6590Sstevel@tonic-gate writeend = offset + resid; 6600Sstevel@tonic-gate maxfblk = lblkno(fs, writeend); 6610Sstevel@tonic-gate offblk = lblkno(fs, offset); 6620Sstevel@tonic-gate /* 6630Sstevel@tonic-gate * request size in fs blocks 6640Sstevel@tonic-gate */ 6650Sstevel@tonic-gate nblk = lblkno(fs, blkroundup(fs, resid)); 6660Sstevel@tonic-gate /* 6670Sstevel@tonic-gate * Adjust for sparse files 6680Sstevel@tonic-gate */ 6690Sstevel@tonic-gate if (trunc) 6700Sstevel@tonic-gate nblk = MIN(nblk, ip->i_blocks); 6710Sstevel@tonic-gate 6720Sstevel@tonic-gate /* 6730Sstevel@tonic-gate * Adjust avgbfree (for testing) 6740Sstevel@tonic-gate */ 6750Sstevel@tonic-gate avgbfree = (ufs_trans_avgbfree) ? 1 : ufsvfsp->vfs_avgbfree + 1; 6760Sstevel@tonic-gate 6770Sstevel@tonic-gate /* 6780Sstevel@tonic-gate * Calculate maximum number of blocks of triple indirect 6790Sstevel@tonic-gate * pointers to write. 6800Sstevel@tonic-gate */ 6810Sstevel@tonic-gate last2blk = NDADDR + fni + fni * fni; 6820Sstevel@tonic-gate if (maxfblk > last2blk) { 6830Sstevel@tonic-gate long nl2ptr; 6840Sstevel@tonic-gate long n3blk; 6850Sstevel@tonic-gate 6860Sstevel@tonic-gate if (offblk > last2blk) 6870Sstevel@tonic-gate n3blk = maxfblk - offblk; 6880Sstevel@tonic-gate else 6890Sstevel@tonic-gate n3blk = maxfblk - last2blk; 6900Sstevel@tonic-gate niblk += roundup(n3blk * sizeof (daddr_t), bsize) / bsize + 1; 6910Sstevel@tonic-gate nl2ptr = roundup(niblk, fni) / fni + 1; 6920Sstevel@tonic-gate niblk += roundup(nl2ptr * sizeof (daddr_t), bsize) / bsize + 2; 6930Sstevel@tonic-gate maxfblk -= n3blk; 6940Sstevel@tonic-gate } 6950Sstevel@tonic-gate /* 6960Sstevel@tonic-gate * calculate maximum number of blocks of double indirect 6970Sstevel@tonic-gate * pointers to write. 6980Sstevel@tonic-gate */ 6990Sstevel@tonic-gate if (maxfblk > NDADDR + fni) { 7000Sstevel@tonic-gate long n2blk; 7010Sstevel@tonic-gate 7020Sstevel@tonic-gate if (offblk > NDADDR + fni) 7030Sstevel@tonic-gate n2blk = maxfblk - offblk; 7040Sstevel@tonic-gate else 7050Sstevel@tonic-gate n2blk = maxfblk - NDADDR + fni; 7060Sstevel@tonic-gate niblk += roundup(n2blk * sizeof (daddr_t), bsize) / bsize + 2; 7070Sstevel@tonic-gate maxfblk -= n2blk; 7080Sstevel@tonic-gate } 7090Sstevel@tonic-gate /* 7100Sstevel@tonic-gate * Add in indirect pointer block write 7110Sstevel@tonic-gate */ 7120Sstevel@tonic-gate if (maxfblk > NDADDR) { 7130Sstevel@tonic-gate niblk += 1; 7140Sstevel@tonic-gate } 7150Sstevel@tonic-gate /* 7160Sstevel@tonic-gate * Calculate deltas for indirect pointer writes 7170Sstevel@tonic-gate */ 7180Sstevel@tonic-gate resv += niblk * (fs->fs_bsize + sizeof (struct delta)); 7190Sstevel@tonic-gate /* 7200Sstevel@tonic-gate * maximum number of cg's needed for request 7210Sstevel@tonic-gate */ 7220Sstevel@tonic-gate ncg = nblk / avgbfree; 7230Sstevel@tonic-gate if (ncg > fs->fs_ncg) 7240Sstevel@tonic-gate ncg = fs->fs_ncg; 7250Sstevel@tonic-gate 7260Sstevel@tonic-gate /* 7270Sstevel@tonic-gate * maximum amount of log space needed for request 7280Sstevel@tonic-gate */ 7290Sstevel@tonic-gate if (ncg > 2) 7300Sstevel@tonic-gate resv += (ncg - 2) * SIZECG(ip); 7310Sstevel@tonic-gate 7320Sstevel@tonic-gate return (resv); 7330Sstevel@tonic-gate } 7340Sstevel@tonic-gate 7350Sstevel@tonic-gate /* 7360Sstevel@tonic-gate * Calculate the amount of log space that needs to be reserved for this 7370Sstevel@tonic-gate * trunc request. If the amount of log space is too large, then 7380Sstevel@tonic-gate * calculate the the size that the requests needs to be split into. 7390Sstevel@tonic-gate */ 740923Ssdebnath void 7410Sstevel@tonic-gate ufs_trans_trunc_resv( 7420Sstevel@tonic-gate struct inode *ip, 7430Sstevel@tonic-gate u_offset_t length, 7440Sstevel@tonic-gate int *resvp, 7450Sstevel@tonic-gate u_offset_t *residp) 7460Sstevel@tonic-gate { 7470Sstevel@tonic-gate ulong_t resv; 7480Sstevel@tonic-gate u_offset_t size, offset, resid; 7495794Svsakar int nchunks, incr; 7505794Svsakar int is_sparse = 0; 7510Sstevel@tonic-gate 7520Sstevel@tonic-gate /* 7530Sstevel@tonic-gate * *resvp is the amount of log space to reserve (in bytes). 7540Sstevel@tonic-gate * when nonzero, *residp is the number of bytes to truncate. 7550Sstevel@tonic-gate */ 7560Sstevel@tonic-gate *residp = 0; 7570Sstevel@tonic-gate 7580Sstevel@tonic-gate if (length < ip->i_size) { 7590Sstevel@tonic-gate size = ip->i_size - length; 7600Sstevel@tonic-gate } else { 7610Sstevel@tonic-gate resv = SIZECG(ip) * 2 + INODESIZE + 1024; 7620Sstevel@tonic-gate /* 7630Sstevel@tonic-gate * truncate up, doesn't really use much space, 7640Sstevel@tonic-gate * the default above should be sufficient. 7650Sstevel@tonic-gate */ 7660Sstevel@tonic-gate goto done; 7670Sstevel@tonic-gate } 7680Sstevel@tonic-gate 7695794Svsakar /* 7705794Svsakar * There is no need to split sparse file truncation into 7715794Svsakar * as many chunks as that of regular files. 7725794Svsakar */ 7735794Svsakar is_sparse = bmap_has_holes(ip); 7745794Svsakar 7750Sstevel@tonic-gate offset = length; 7760Sstevel@tonic-gate resid = size; 7770Sstevel@tonic-gate nchunks = 1; 7785794Svsakar incr = 0; 7795794Svsakar 7805794Svsakar do { 7815794Svsakar resv = ufs_log_amt(ip, offset, resid, 1); 7825794Svsakar /* 7835794Svsakar * If this is the first iteration, set "incr". 7845794Svsakar */ 7855794Svsakar if (!incr) { 7865794Svsakar /* 7875794Svsakar * If this request takes too much log space, 7885794Svsakar * it will be split into "nchunks". If this split 7895794Svsakar * is not enough, linearly increment the nchunks in 7905794Svsakar * the next iteration. 7915794Svsakar */ 7925794Svsakar if (resv > ufs_trans_max_resv && !is_sparse) { 7935794Svsakar nchunks = MAX(size/ufs_trans_max_resv, 1); 7945794Svsakar incr = nchunks; 7955794Svsakar } else { 7965794Svsakar incr = 1; 7975794Svsakar } 7985794Svsakar } else 7995794Svsakar nchunks += incr; 8000Sstevel@tonic-gate resid = size / nchunks; 8015794Svsakar offset = length + (nchunks - 1) * resid; 8025794Svsakar } while (resv > ufs_trans_max_resv); 8035794Svsakar 8040Sstevel@tonic-gate if (nchunks > 1) { 8050Sstevel@tonic-gate *residp = resid; 8060Sstevel@tonic-gate } 8070Sstevel@tonic-gate done: 8080Sstevel@tonic-gate *resvp = resv; 8090Sstevel@tonic-gate } 8100Sstevel@tonic-gate 8110Sstevel@tonic-gate int 8120Sstevel@tonic-gate ufs_trans_itrunc(struct inode *ip, u_offset_t length, int flags, cred_t *cr) 8130Sstevel@tonic-gate { 8140Sstevel@tonic-gate int err, issync, resv; 8150Sstevel@tonic-gate u_offset_t resid; 8160Sstevel@tonic-gate int do_block = 0; 8170Sstevel@tonic-gate struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 8180Sstevel@tonic-gate struct fs *fs = ufsvfsp->vfs_fs; 8190Sstevel@tonic-gate 8200Sstevel@tonic-gate /* 8210Sstevel@tonic-gate * Not logging; just do the trunc 8220Sstevel@tonic-gate */ 8230Sstevel@tonic-gate if (!TRANS_ISTRANS(ufsvfsp)) { 8240Sstevel@tonic-gate rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER); 8250Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_WRITER); 8260Sstevel@tonic-gate err = ufs_itrunc(ip, length, flags, cr); 8270Sstevel@tonic-gate rw_exit(&ip->i_contents); 8280Sstevel@tonic-gate rw_exit(&ufsvfsp->vfs_dqrwlock); 8290Sstevel@tonic-gate return (err); 8300Sstevel@tonic-gate } 8310Sstevel@tonic-gate 8320Sstevel@tonic-gate /* 8330Sstevel@tonic-gate * within the lockfs protocol but *not* part of a transaction 8340Sstevel@tonic-gate */ 8350Sstevel@tonic-gate do_block = curthread->t_flag & T_DONTBLOCK; 8360Sstevel@tonic-gate curthread->t_flag |= T_DONTBLOCK; 8370Sstevel@tonic-gate 8380Sstevel@tonic-gate /* 8390Sstevel@tonic-gate * Trunc the file (in pieces, if necessary) 8400Sstevel@tonic-gate */ 8410Sstevel@tonic-gate again: 8420Sstevel@tonic-gate ufs_trans_trunc_resv(ip, length, &resv, &resid); 8430Sstevel@tonic-gate TRANS_BEGIN_CSYNC(ufsvfsp, issync, TOP_ITRUNC, resv); 8440Sstevel@tonic-gate rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER); 8450Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_WRITER); 8460Sstevel@tonic-gate if (resid) { 8470Sstevel@tonic-gate /* 8480Sstevel@tonic-gate * resid is only set if we have to truncate in chunks 8490Sstevel@tonic-gate */ 8500Sstevel@tonic-gate ASSERT(length + resid < ip->i_size); 8510Sstevel@tonic-gate 8520Sstevel@tonic-gate /* 8530Sstevel@tonic-gate * Partially trunc file down to desired size (length). 8540Sstevel@tonic-gate * Only retain I_FREE on the last partial trunc. 8550Sstevel@tonic-gate * Round up size to a block boundary, to ensure the truncate 8560Sstevel@tonic-gate * doesn't have to allocate blocks. This is done both for 8570Sstevel@tonic-gate * performance and to fix a bug where if the block can't be 8580Sstevel@tonic-gate * allocated then the inode delete fails, but the inode 8590Sstevel@tonic-gate * is still freed with attached blocks and non-zero size 8600Sstevel@tonic-gate * (bug 4348738). 8610Sstevel@tonic-gate */ 8620Sstevel@tonic-gate err = ufs_itrunc(ip, blkroundup(fs, (ip->i_size - resid)), 8630Sstevel@tonic-gate flags & ~I_FREE, cr); 8640Sstevel@tonic-gate ASSERT(ip->i_size != length); 8650Sstevel@tonic-gate } else 8660Sstevel@tonic-gate err = ufs_itrunc(ip, length, flags, cr); 8670Sstevel@tonic-gate if (!do_block) 8680Sstevel@tonic-gate curthread->t_flag &= ~T_DONTBLOCK; 8690Sstevel@tonic-gate rw_exit(&ip->i_contents); 8700Sstevel@tonic-gate rw_exit(&ufsvfsp->vfs_dqrwlock); 8710Sstevel@tonic-gate TRANS_END_CSYNC(ufsvfsp, err, issync, TOP_ITRUNC, resv); 8720Sstevel@tonic-gate 8730Sstevel@tonic-gate if ((err == 0) && resid) { 8740Sstevel@tonic-gate ufsvfsp->vfs_avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 8750Sstevel@tonic-gate goto again; 8760Sstevel@tonic-gate } 8770Sstevel@tonic-gate return (err); 8780Sstevel@tonic-gate } 8790Sstevel@tonic-gate 8800Sstevel@tonic-gate /* 8810Sstevel@tonic-gate * Calculate the amount of log space that needs to be reserved for this 8820Sstevel@tonic-gate * write request. If the amount of log space is too large, then 8830Sstevel@tonic-gate * calculate the size that the requests needs to be split into. 8840Sstevel@tonic-gate * First try fixed chunks of size ufs_trans_max_resid. If that 8850Sstevel@tonic-gate * is too big, iterate down to the largest size that will fit. 8860Sstevel@tonic-gate * Pagein the pages in the first chunk here, so that the pagein is 8870Sstevel@tonic-gate * avoided later when the transaction is open. 8880Sstevel@tonic-gate */ 8890Sstevel@tonic-gate void 8900Sstevel@tonic-gate ufs_trans_write_resv( 8910Sstevel@tonic-gate struct inode *ip, 8920Sstevel@tonic-gate struct uio *uio, 8930Sstevel@tonic-gate int *resvp, 8940Sstevel@tonic-gate int *residp) 8950Sstevel@tonic-gate { 8960Sstevel@tonic-gate ulong_t resv; 8970Sstevel@tonic-gate offset_t offset; 8980Sstevel@tonic-gate ssize_t resid; 8990Sstevel@tonic-gate int nchunks; 9000Sstevel@tonic-gate 9010Sstevel@tonic-gate *residp = 0; 9020Sstevel@tonic-gate offset = uio->uio_offset; 9030Sstevel@tonic-gate resid = MIN(uio->uio_resid, ufs_trans_max_resid); 9040Sstevel@tonic-gate resv = ufs_log_amt(ip, offset, resid, 0); 9050Sstevel@tonic-gate if (resv <= ufs_trans_max_resv) { 906*8059SDonghai.Qiao@Sun.COM uio_prefaultpages(resid, uio); 9070Sstevel@tonic-gate if (resid != uio->uio_resid) 9080Sstevel@tonic-gate *residp = resid; 9090Sstevel@tonic-gate *resvp = resv; 9100Sstevel@tonic-gate return; 9110Sstevel@tonic-gate } 9120Sstevel@tonic-gate 9130Sstevel@tonic-gate resid = uio->uio_resid; 9140Sstevel@tonic-gate nchunks = 1; 9150Sstevel@tonic-gate for (; (resv = ufs_log_amt(ip, offset, resid, 0)) > ufs_trans_max_resv; 9164662Sfrankho offset = uio->uio_offset + (nchunks - 1) * resid) { 9170Sstevel@tonic-gate nchunks++; 9180Sstevel@tonic-gate resid = uio->uio_resid / nchunks; 9190Sstevel@tonic-gate } 920*8059SDonghai.Qiao@Sun.COM uio_prefaultpages(resid, uio); 9210Sstevel@tonic-gate /* 9220Sstevel@tonic-gate * If this request takes too much log space, it will be split 9230Sstevel@tonic-gate */ 9240Sstevel@tonic-gate if (nchunks > 1) 9250Sstevel@tonic-gate *residp = resid; 9260Sstevel@tonic-gate *resvp = resv; 9270Sstevel@tonic-gate } 9280Sstevel@tonic-gate 9290Sstevel@tonic-gate /* 9300Sstevel@tonic-gate * Issue write request. 9310Sstevel@tonic-gate * 9320Sstevel@tonic-gate * Split a large request into smaller chunks. 9330Sstevel@tonic-gate */ 9340Sstevel@tonic-gate int 9350Sstevel@tonic-gate ufs_trans_write( 9360Sstevel@tonic-gate struct inode *ip, 9370Sstevel@tonic-gate struct uio *uio, 9380Sstevel@tonic-gate int ioflag, 9390Sstevel@tonic-gate cred_t *cr, 9400Sstevel@tonic-gate int resv, 9410Sstevel@tonic-gate long resid) 9420Sstevel@tonic-gate { 9430Sstevel@tonic-gate long realresid; 9440Sstevel@tonic-gate int err; 9450Sstevel@tonic-gate struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 9460Sstevel@tonic-gate 9470Sstevel@tonic-gate /* 9480Sstevel@tonic-gate * since the write is too big and would "HOG THE LOG" it needs to 9490Sstevel@tonic-gate * be broken up and done in pieces. NOTE, the caller will 9500Sstevel@tonic-gate * issue the EOT after the request has been completed 9510Sstevel@tonic-gate */ 9520Sstevel@tonic-gate realresid = uio->uio_resid; 9530Sstevel@tonic-gate 9540Sstevel@tonic-gate again: 9550Sstevel@tonic-gate /* 9560Sstevel@tonic-gate * Perform partial request (uiomove will update uio for us) 9570Sstevel@tonic-gate * Request is split up into "resid" size chunks until 9580Sstevel@tonic-gate * "realresid" bytes have been transferred. 9590Sstevel@tonic-gate */ 9600Sstevel@tonic-gate uio->uio_resid = MIN(resid, realresid); 9610Sstevel@tonic-gate realresid -= uio->uio_resid; 9620Sstevel@tonic-gate err = wrip(ip, uio, ioflag, cr); 9630Sstevel@tonic-gate 9640Sstevel@tonic-gate /* 9650Sstevel@tonic-gate * Error or request is done; caller issues final EOT 9660Sstevel@tonic-gate */ 9670Sstevel@tonic-gate if (err || uio->uio_resid || (realresid == 0)) { 9680Sstevel@tonic-gate uio->uio_resid += realresid; 9690Sstevel@tonic-gate return (err); 9700Sstevel@tonic-gate } 9710Sstevel@tonic-gate 9720Sstevel@tonic-gate /* 9730Sstevel@tonic-gate * Generate EOT for this part of the request 9740Sstevel@tonic-gate */ 9750Sstevel@tonic-gate rw_exit(&ip->i_contents); 9760Sstevel@tonic-gate rw_exit(&ufsvfsp->vfs_dqrwlock); 9770Sstevel@tonic-gate if (ioflag & (FSYNC|FDSYNC)) { 9780Sstevel@tonic-gate TRANS_END_SYNC(ufsvfsp, err, TOP_WRITE_SYNC, resv); 9790Sstevel@tonic-gate } else { 9800Sstevel@tonic-gate TRANS_END_ASYNC(ufsvfsp, TOP_WRITE, resv); 9810Sstevel@tonic-gate } 9820Sstevel@tonic-gate 9830Sstevel@tonic-gate /* 9840Sstevel@tonic-gate * Make sure the input buffer is resident before starting 9850Sstevel@tonic-gate * the next transaction. 9860Sstevel@tonic-gate */ 987*8059SDonghai.Qiao@Sun.COM uio_prefaultpages(MIN(resid, realresid), uio); 9880Sstevel@tonic-gate 9890Sstevel@tonic-gate /* 9900Sstevel@tonic-gate * Generate BOT for next part of the request 9910Sstevel@tonic-gate */ 9920Sstevel@tonic-gate if (ioflag & (FSYNC|FDSYNC)) { 9930Sstevel@tonic-gate int error; 9940Sstevel@tonic-gate TRANS_BEGIN_SYNC(ufsvfsp, TOP_WRITE_SYNC, resv, error); 9950Sstevel@tonic-gate ASSERT(!error); 9960Sstevel@tonic-gate } else { 9970Sstevel@tonic-gate TRANS_BEGIN_ASYNC(ufsvfsp, TOP_WRITE, resv); 9980Sstevel@tonic-gate } 9990Sstevel@tonic-gate rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER); 10000Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_WRITER); 10010Sstevel@tonic-gate /* 10020Sstevel@tonic-gate * Error during EOT (probably device error while writing commit rec) 10030Sstevel@tonic-gate */ 10040Sstevel@tonic-gate if (err) 10050Sstevel@tonic-gate return (err); 10060Sstevel@tonic-gate goto again; 10070Sstevel@tonic-gate } 1008