10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*4662Sfrankho * Common Development and Distribution License (the "License"). 6*4662Sfrankho * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 22*4662Sfrankho * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 230Sstevel@tonic-gate * Use is subject to license terms. 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate 260Sstevel@tonic-gate /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 270Sstevel@tonic-gate /* All Rights Reserved */ 280Sstevel@tonic-gate 290Sstevel@tonic-gate /* 300Sstevel@tonic-gate * Portions of this source code were derived from Berkeley 4.3 BSD 310Sstevel@tonic-gate * under license from the Regents of the University of California. 320Sstevel@tonic-gate */ 330Sstevel@tonic-gate 340Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 350Sstevel@tonic-gate 360Sstevel@tonic-gate #include <sys/sysmacros.h> 370Sstevel@tonic-gate #include <sys/param.h> 380Sstevel@tonic-gate #include <sys/types.h> 390Sstevel@tonic-gate #include <sys/systm.h> 400Sstevel@tonic-gate #include <sys/t_lock.h> 410Sstevel@tonic-gate #include <sys/uio.h> 420Sstevel@tonic-gate #include <sys/kmem.h> 430Sstevel@tonic-gate #include <sys/thread.h> 440Sstevel@tonic-gate #include <sys/vfs.h> 450Sstevel@tonic-gate #include <sys/errno.h> 460Sstevel@tonic-gate #include <sys/buf.h> 470Sstevel@tonic-gate #include <sys/vnode.h> 480Sstevel@tonic-gate #include <sys/fs/ufs_trans.h> 490Sstevel@tonic-gate #include <sys/fs/ufs_inode.h> 500Sstevel@tonic-gate #include <sys/fs/ufs_fs.h> 510Sstevel@tonic-gate #include <sys/fs/ufs_fsdir.h> 520Sstevel@tonic-gate #include <sys/fs/ufs_quota.h> 530Sstevel@tonic-gate #include <sys/fs/ufs_panic.h> 540Sstevel@tonic-gate #include <sys/fs/ufs_bio.h> 550Sstevel@tonic-gate #include <sys/fs/ufs_log.h> 560Sstevel@tonic-gate #include <sys/cmn_err.h> 570Sstevel@tonic-gate #include <sys/file.h> 580Sstevel@tonic-gate #include <sys/debug.h> 590Sstevel@tonic-gate 600Sstevel@tonic-gate 610Sstevel@tonic-gate extern kmutex_t ufsvfs_mutex; 620Sstevel@tonic-gate extern struct ufsvfs *ufs_instances; 630Sstevel@tonic-gate 640Sstevel@tonic-gate /* 650Sstevel@tonic-gate * hlock any file systems w/errored logs 660Sstevel@tonic-gate */ 670Sstevel@tonic-gate int 680Sstevel@tonic-gate ufs_trans_hlock() 690Sstevel@tonic-gate { 700Sstevel@tonic-gate struct ufsvfs *ufsvfsp; 710Sstevel@tonic-gate struct lockfs lockfs; 720Sstevel@tonic-gate int error; 730Sstevel@tonic-gate int retry = 0; 740Sstevel@tonic-gate 750Sstevel@tonic-gate /* 760Sstevel@tonic-gate * find fs's that paniced or have errored logging devices 770Sstevel@tonic-gate */ 780Sstevel@tonic-gate mutex_enter(&ufsvfs_mutex); 790Sstevel@tonic-gate for (ufsvfsp = ufs_instances; ufsvfsp; ufsvfsp = ufsvfsp->vfs_next) { 800Sstevel@tonic-gate /* 810Sstevel@tonic-gate * not mounted; continue 820Sstevel@tonic-gate */ 830Sstevel@tonic-gate if ((ufsvfsp->vfs_vfs == NULL) || 840Sstevel@tonic-gate (ufsvfsp->vfs_validfs == UT_UNMOUNTED)) 850Sstevel@tonic-gate continue; 860Sstevel@tonic-gate /* 870Sstevel@tonic-gate * disallow unmounts (hlock occurs below) 880Sstevel@tonic-gate */ 890Sstevel@tonic-gate if (TRANS_ISERROR(ufsvfsp)) 900Sstevel@tonic-gate ufsvfsp->vfs_validfs = UT_HLOCKING; 910Sstevel@tonic-gate } 920Sstevel@tonic-gate mutex_exit(&ufsvfs_mutex); 930Sstevel@tonic-gate 940Sstevel@tonic-gate /* 950Sstevel@tonic-gate * hlock the fs's that paniced or have errored logging devices 960Sstevel@tonic-gate */ 970Sstevel@tonic-gate again: 980Sstevel@tonic-gate mutex_enter(&ufsvfs_mutex); 990Sstevel@tonic-gate for (ufsvfsp = ufs_instances; ufsvfsp; ufsvfsp = ufsvfsp->vfs_next) 1000Sstevel@tonic-gate if (ufsvfsp->vfs_validfs == UT_HLOCKING) 1010Sstevel@tonic-gate break; 1020Sstevel@tonic-gate mutex_exit(&ufsvfs_mutex); 1030Sstevel@tonic-gate if (ufsvfsp == NULL) 1040Sstevel@tonic-gate return (retry); 1050Sstevel@tonic-gate /* 1060Sstevel@tonic-gate * hlock the file system 1070Sstevel@tonic-gate */ 1080Sstevel@tonic-gate (void) ufs_fiolfss(ufsvfsp->vfs_root, &lockfs); 1090Sstevel@tonic-gate if (!LOCKFS_IS_ELOCK(&lockfs)) { 1100Sstevel@tonic-gate lockfs.lf_lock = LOCKFS_HLOCK; 1110Sstevel@tonic-gate lockfs.lf_flags = 0; 1120Sstevel@tonic-gate lockfs.lf_comlen = 0; 1130Sstevel@tonic-gate lockfs.lf_comment = NULL; 1140Sstevel@tonic-gate error = ufs_fiolfs(ufsvfsp->vfs_root, &lockfs, 0); 1150Sstevel@tonic-gate /* 1160Sstevel@tonic-gate * retry after awhile; another app currently doing lockfs 1170Sstevel@tonic-gate */ 1180Sstevel@tonic-gate if (error == EBUSY || error == EINVAL) 1190Sstevel@tonic-gate retry = 1; 1200Sstevel@tonic-gate } else { 1210Sstevel@tonic-gate if (ufsfx_get_failure_qlen() > 0) { 1220Sstevel@tonic-gate if (mutex_tryenter(&ufs_fix.uq_mutex)) { 1230Sstevel@tonic-gate ufs_fix.uq_lowat = ufs_fix.uq_ne; 1240Sstevel@tonic-gate cv_broadcast(&ufs_fix.uq_cv); 1250Sstevel@tonic-gate mutex_exit(&ufs_fix.uq_mutex); 1260Sstevel@tonic-gate } 1270Sstevel@tonic-gate } 1280Sstevel@tonic-gate retry = 1; 1290Sstevel@tonic-gate } 1300Sstevel@tonic-gate 1310Sstevel@tonic-gate /* 1320Sstevel@tonic-gate * allow unmounts 1330Sstevel@tonic-gate */ 1340Sstevel@tonic-gate ufsvfsp->vfs_validfs = UT_MOUNTED; 1350Sstevel@tonic-gate goto again; 1360Sstevel@tonic-gate } 1370Sstevel@tonic-gate 1380Sstevel@tonic-gate /*ARGSUSED*/ 1390Sstevel@tonic-gate void 1400Sstevel@tonic-gate ufs_trans_onerror() 1410Sstevel@tonic-gate { 1420Sstevel@tonic-gate mutex_enter(&ufs_hlock.uq_mutex); 1430Sstevel@tonic-gate ufs_hlock.uq_ne = ufs_hlock.uq_lowat; 1440Sstevel@tonic-gate cv_broadcast(&ufs_hlock.uq_cv); 1450Sstevel@tonic-gate mutex_exit(&ufs_hlock.uq_mutex); 1460Sstevel@tonic-gate } 1470Sstevel@tonic-gate 1480Sstevel@tonic-gate void 1490Sstevel@tonic-gate ufs_trans_sbupdate(struct ufsvfs *ufsvfsp, struct vfs *vfsp, top_t topid) 1500Sstevel@tonic-gate { 1510Sstevel@tonic-gate if (curthread->t_flag & T_DONTBLOCK) { 1520Sstevel@tonic-gate sbupdate(vfsp); 1530Sstevel@tonic-gate return; 1540Sstevel@tonic-gate } else { 1550Sstevel@tonic-gate 1560Sstevel@tonic-gate if (panicstr && TRANS_ISTRANS(ufsvfsp)) 1570Sstevel@tonic-gate return; 1580Sstevel@tonic-gate 1590Sstevel@tonic-gate curthread->t_flag |= T_DONTBLOCK; 1600Sstevel@tonic-gate TRANS_BEGIN_ASYNC(ufsvfsp, topid, TOP_SBUPDATE_SIZE); 1610Sstevel@tonic-gate sbupdate(vfsp); 1620Sstevel@tonic-gate TRANS_END_ASYNC(ufsvfsp, topid, TOP_SBUPDATE_SIZE); 1630Sstevel@tonic-gate curthread->t_flag &= ~T_DONTBLOCK; 1640Sstevel@tonic-gate } 1650Sstevel@tonic-gate } 1660Sstevel@tonic-gate 1670Sstevel@tonic-gate void 1680Sstevel@tonic-gate ufs_trans_iupdat(struct inode *ip, int waitfor) 1690Sstevel@tonic-gate { 1700Sstevel@tonic-gate struct ufsvfs *ufsvfsp; 1710Sstevel@tonic-gate 1720Sstevel@tonic-gate if (curthread->t_flag & T_DONTBLOCK) { 1730Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_READER); 1740Sstevel@tonic-gate ufs_iupdat(ip, waitfor); 1750Sstevel@tonic-gate rw_exit(&ip->i_contents); 1760Sstevel@tonic-gate return; 1770Sstevel@tonic-gate } else { 1780Sstevel@tonic-gate ufsvfsp = ip->i_ufsvfs; 1790Sstevel@tonic-gate 1800Sstevel@tonic-gate if (panicstr && TRANS_ISTRANS(ufsvfsp)) 1810Sstevel@tonic-gate return; 1820Sstevel@tonic-gate 1830Sstevel@tonic-gate curthread->t_flag |= T_DONTBLOCK; 1840Sstevel@tonic-gate TRANS_BEGIN_ASYNC(ufsvfsp, TOP_IUPDAT, TOP_IUPDAT_SIZE(ip)); 1850Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_READER); 1860Sstevel@tonic-gate ufs_iupdat(ip, waitfor); 1870Sstevel@tonic-gate rw_exit(&ip->i_contents); 1880Sstevel@tonic-gate TRANS_END_ASYNC(ufsvfsp, TOP_IUPDAT, TOP_IUPDAT_SIZE(ip)); 1890Sstevel@tonic-gate curthread->t_flag &= ~T_DONTBLOCK; 1900Sstevel@tonic-gate } 1910Sstevel@tonic-gate } 1920Sstevel@tonic-gate 1930Sstevel@tonic-gate void 1940Sstevel@tonic-gate ufs_trans_sbwrite(struct ufsvfs *ufsvfsp, top_t topid) 1950Sstevel@tonic-gate { 1960Sstevel@tonic-gate if (curthread->t_flag & T_DONTBLOCK) { 1970Sstevel@tonic-gate mutex_enter(&ufsvfsp->vfs_lock); 1980Sstevel@tonic-gate ufs_sbwrite(ufsvfsp); 1990Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 2000Sstevel@tonic-gate return; 2010Sstevel@tonic-gate } else { 2020Sstevel@tonic-gate 2030Sstevel@tonic-gate if (panicstr && TRANS_ISTRANS(ufsvfsp)) 2040Sstevel@tonic-gate return; 2050Sstevel@tonic-gate 2060Sstevel@tonic-gate curthread->t_flag |= T_DONTBLOCK; 2070Sstevel@tonic-gate TRANS_BEGIN_ASYNC(ufsvfsp, topid, TOP_SBWRITE_SIZE); 2080Sstevel@tonic-gate mutex_enter(&ufsvfsp->vfs_lock); 2090Sstevel@tonic-gate ufs_sbwrite(ufsvfsp); 2100Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 2110Sstevel@tonic-gate TRANS_END_ASYNC(ufsvfsp, topid, TOP_SBWRITE_SIZE); 2120Sstevel@tonic-gate curthread->t_flag &= ~T_DONTBLOCK; 2130Sstevel@tonic-gate } 2140Sstevel@tonic-gate } 2150Sstevel@tonic-gate 2160Sstevel@tonic-gate /*ARGSUSED*/ 2170Sstevel@tonic-gate int 2180Sstevel@tonic-gate ufs_trans_push_si(ufsvfs_t *ufsvfsp, delta_t dtyp, int ignore) 2190Sstevel@tonic-gate { 2200Sstevel@tonic-gate struct fs *fs; 2210Sstevel@tonic-gate 2220Sstevel@tonic-gate fs = ufsvfsp->vfs_fs; 2230Sstevel@tonic-gate mutex_enter(&ufsvfsp->vfs_lock); 2240Sstevel@tonic-gate TRANS_LOG(ufsvfsp, (char *)fs->fs_u.fs_csp, 225*4662Sfrankho ldbtob(fsbtodb(fs, fs->fs_csaddr)), fs->fs_cssize, 226*4662Sfrankho (caddr_t)fs->fs_u.fs_csp, fs->fs_cssize); 2270Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 2280Sstevel@tonic-gate return (0); 2290Sstevel@tonic-gate } 2300Sstevel@tonic-gate 2310Sstevel@tonic-gate /*ARGSUSED*/ 2320Sstevel@tonic-gate int 2330Sstevel@tonic-gate ufs_trans_push_buf(ufsvfs_t *ufsvfsp, delta_t dtyp, daddr_t bno) 2340Sstevel@tonic-gate { 2350Sstevel@tonic-gate struct buf *bp; 2360Sstevel@tonic-gate 2370Sstevel@tonic-gate bp = (struct buf *)UFS_GETBLK(ufsvfsp, ufsvfsp->vfs_dev, bno, 1); 2380Sstevel@tonic-gate if (bp == NULL) 2390Sstevel@tonic-gate return (ENOENT); 2400Sstevel@tonic-gate 2410Sstevel@tonic-gate if (bp->b_flags & B_DELWRI) { 2420Sstevel@tonic-gate /* 2430Sstevel@tonic-gate * Do not use brwrite() here since the buffer is already 2440Sstevel@tonic-gate * marked for retry or not by the code that called 2450Sstevel@tonic-gate * TRANS_BUF(). 2460Sstevel@tonic-gate */ 2470Sstevel@tonic-gate UFS_BWRITE(ufsvfsp, bp); 2480Sstevel@tonic-gate return (0); 2490Sstevel@tonic-gate } 2500Sstevel@tonic-gate /* 2510Sstevel@tonic-gate * If we did not find the real buf for this block above then 2520Sstevel@tonic-gate * clear the dev so the buf won't be found by mistake 2530Sstevel@tonic-gate * for this block later. We had to allocate at least a 1 byte 2540Sstevel@tonic-gate * buffer to keep brelse happy. 2550Sstevel@tonic-gate */ 2560Sstevel@tonic-gate if (bp->b_bufsize == 1) { 2570Sstevel@tonic-gate bp->b_dev = (o_dev_t)NODEV; 2580Sstevel@tonic-gate bp->b_edev = NODEV; 2590Sstevel@tonic-gate bp->b_flags = 0; 2600Sstevel@tonic-gate } 2610Sstevel@tonic-gate brelse(bp); 2620Sstevel@tonic-gate return (ENOENT); 2630Sstevel@tonic-gate } 2640Sstevel@tonic-gate 2650Sstevel@tonic-gate /*ARGSUSED*/ 2660Sstevel@tonic-gate int 2670Sstevel@tonic-gate ufs_trans_push_inode(ufsvfs_t *ufsvfsp, delta_t dtyp, ino_t ino) 2680Sstevel@tonic-gate { 2690Sstevel@tonic-gate int error; 2700Sstevel@tonic-gate struct inode *ip; 2710Sstevel@tonic-gate 2720Sstevel@tonic-gate /* 2730Sstevel@tonic-gate * Grab the quota lock (if the file system has not been forcibly 2740Sstevel@tonic-gate * unmounted). 2750Sstevel@tonic-gate */ 2760Sstevel@tonic-gate if (ufsvfsp) 2770Sstevel@tonic-gate rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER); 2780Sstevel@tonic-gate 2790Sstevel@tonic-gate error = ufs_iget(ufsvfsp->vfs_vfs, ino, &ip, kcred); 2800Sstevel@tonic-gate 2810Sstevel@tonic-gate if (ufsvfsp) 2820Sstevel@tonic-gate rw_exit(&ufsvfsp->vfs_dqrwlock); 2830Sstevel@tonic-gate if (error) 2840Sstevel@tonic-gate return (ENOENT); 2850Sstevel@tonic-gate 2860Sstevel@tonic-gate if (ip->i_flag & (IUPD|IACC|ICHG|IMOD|IMODACC|IATTCHG)) { 2870Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_READER); 2880Sstevel@tonic-gate ufs_iupdat(ip, 1); 2890Sstevel@tonic-gate rw_exit(&ip->i_contents); 2900Sstevel@tonic-gate VN_RELE(ITOV(ip)); 2910Sstevel@tonic-gate return (0); 2920Sstevel@tonic-gate } 2930Sstevel@tonic-gate VN_RELE(ITOV(ip)); 2940Sstevel@tonic-gate return (ENOENT); 2950Sstevel@tonic-gate } 2960Sstevel@tonic-gate 2970Sstevel@tonic-gate #ifdef DEBUG 2980Sstevel@tonic-gate /* 2990Sstevel@tonic-gate * These routines maintain the metadata map (matamap) 3000Sstevel@tonic-gate */ 3010Sstevel@tonic-gate 3020Sstevel@tonic-gate /* 3030Sstevel@tonic-gate * update the metadata map at mount 3040Sstevel@tonic-gate */ 3050Sstevel@tonic-gate static int 3060Sstevel@tonic-gate ufs_trans_mata_mount_scan(struct inode *ip, void *arg) 3070Sstevel@tonic-gate { 3080Sstevel@tonic-gate /* 3090Sstevel@tonic-gate * wrong file system; keep looking 3100Sstevel@tonic-gate */ 3110Sstevel@tonic-gate if (ip->i_ufsvfs != (struct ufsvfs *)arg) 3120Sstevel@tonic-gate return (0); 3130Sstevel@tonic-gate 3140Sstevel@tonic-gate /* 3150Sstevel@tonic-gate * load the metadata map 3160Sstevel@tonic-gate */ 3170Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_WRITER); 3180Sstevel@tonic-gate ufs_trans_mata_iget(ip); 3190Sstevel@tonic-gate rw_exit(&ip->i_contents); 3200Sstevel@tonic-gate return (0); 3210Sstevel@tonic-gate } 3220Sstevel@tonic-gate 3230Sstevel@tonic-gate void 3240Sstevel@tonic-gate ufs_trans_mata_mount(struct ufsvfs *ufsvfsp) 3250Sstevel@tonic-gate { 3260Sstevel@tonic-gate struct fs *fs = ufsvfsp->vfs_fs; 3270Sstevel@tonic-gate ino_t ino; 3280Sstevel@tonic-gate int i; 3290Sstevel@tonic-gate 3300Sstevel@tonic-gate /* 3310Sstevel@tonic-gate * put static metadata into matamap 3320Sstevel@tonic-gate * superblock 3330Sstevel@tonic-gate * cylinder groups 3340Sstevel@tonic-gate * inode groups 3350Sstevel@tonic-gate * existing inodes 3360Sstevel@tonic-gate */ 3370Sstevel@tonic-gate TRANS_MATAADD(ufsvfsp, ldbtob(SBLOCK), fs->fs_sbsize); 3380Sstevel@tonic-gate 3390Sstevel@tonic-gate for (ino = i = 0; i < fs->fs_ncg; ++i, ino += fs->fs_ipg) { 3400Sstevel@tonic-gate TRANS_MATAADD(ufsvfsp, 3410Sstevel@tonic-gate ldbtob(fsbtodb(fs, cgtod(fs, i))), fs->fs_cgsize); 3420Sstevel@tonic-gate TRANS_MATAADD(ufsvfsp, 3430Sstevel@tonic-gate ldbtob(fsbtodb(fs, itod(fs, ino))), 3440Sstevel@tonic-gate fs->fs_ipg * sizeof (struct dinode)); 3450Sstevel@tonic-gate } 3460Sstevel@tonic-gate (void) ufs_scan_inodes(0, ufs_trans_mata_mount_scan, ufsvfsp, ufsvfsp); 3470Sstevel@tonic-gate } 3480Sstevel@tonic-gate 3490Sstevel@tonic-gate /* 3500Sstevel@tonic-gate * clear the metadata map at umount 3510Sstevel@tonic-gate */ 3520Sstevel@tonic-gate void 3530Sstevel@tonic-gate ufs_trans_mata_umount(struct ufsvfs *ufsvfsp) 3540Sstevel@tonic-gate { 3550Sstevel@tonic-gate top_mataclr(ufsvfsp); 3560Sstevel@tonic-gate } 3570Sstevel@tonic-gate 3580Sstevel@tonic-gate /* 3590Sstevel@tonic-gate * summary info (may be extended during growfs test) 3600Sstevel@tonic-gate */ 3610Sstevel@tonic-gate void 3620Sstevel@tonic-gate ufs_trans_mata_si(struct ufsvfs *ufsvfsp, struct fs *fs) 3630Sstevel@tonic-gate { 3640Sstevel@tonic-gate TRANS_MATAADD(ufsvfsp, ldbtob(fsbtodb(fs, fs->fs_csaddr)), 365*4662Sfrankho fs->fs_cssize); 3660Sstevel@tonic-gate } 3670Sstevel@tonic-gate 3680Sstevel@tonic-gate /* 3690Sstevel@tonic-gate * scan an allocation block (either inode or true block) 3700Sstevel@tonic-gate */ 3710Sstevel@tonic-gate static void 3720Sstevel@tonic-gate ufs_trans_mata_direct( 3730Sstevel@tonic-gate struct inode *ip, 3740Sstevel@tonic-gate daddr_t *fragsp, 3750Sstevel@tonic-gate daddr32_t *blkp, 3760Sstevel@tonic-gate unsigned int nblk) 3770Sstevel@tonic-gate { 3780Sstevel@tonic-gate int i; 3790Sstevel@tonic-gate daddr_t frag; 3800Sstevel@tonic-gate ulong_t nb; 3810Sstevel@tonic-gate struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 3820Sstevel@tonic-gate struct fs *fs = ufsvfsp->vfs_fs; 3830Sstevel@tonic-gate 3840Sstevel@tonic-gate for (i = 0; i < nblk && *fragsp; ++i, ++blkp) 3850Sstevel@tonic-gate if ((frag = *blkp) != 0) { 3860Sstevel@tonic-gate if (*fragsp > fs->fs_frag) { 3870Sstevel@tonic-gate nb = fs->fs_bsize; 3880Sstevel@tonic-gate *fragsp -= fs->fs_frag; 3890Sstevel@tonic-gate } else { 3900Sstevel@tonic-gate nb = *fragsp * fs->fs_fsize; 3910Sstevel@tonic-gate *fragsp = 0; 3920Sstevel@tonic-gate } 3930Sstevel@tonic-gate TRANS_MATAADD(ufsvfsp, ldbtob(fsbtodb(fs, frag)), nb); 3940Sstevel@tonic-gate } 3950Sstevel@tonic-gate } 3960Sstevel@tonic-gate 3970Sstevel@tonic-gate /* 3980Sstevel@tonic-gate * scan an indirect allocation block (either inode or true block) 3990Sstevel@tonic-gate */ 4000Sstevel@tonic-gate static void 4010Sstevel@tonic-gate ufs_trans_mata_indir( 4020Sstevel@tonic-gate struct inode *ip, 4030Sstevel@tonic-gate daddr_t *fragsp, 4040Sstevel@tonic-gate daddr_t frag, 4050Sstevel@tonic-gate int level) 4060Sstevel@tonic-gate { 4070Sstevel@tonic-gate struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 4080Sstevel@tonic-gate struct fs *fs = ufsvfsp->vfs_fs; 4090Sstevel@tonic-gate int ne = fs->fs_bsize / (int)sizeof (daddr32_t); 4100Sstevel@tonic-gate int i; 4110Sstevel@tonic-gate struct buf *bp; 4120Sstevel@tonic-gate daddr32_t *blkp; 4130Sstevel@tonic-gate o_mode_t ifmt = ip->i_mode & IFMT; 4140Sstevel@tonic-gate 4150Sstevel@tonic-gate bp = UFS_BREAD(ufsvfsp, ip->i_dev, fsbtodb(fs, frag), fs->fs_bsize); 4160Sstevel@tonic-gate if (bp->b_flags & B_ERROR) { 4170Sstevel@tonic-gate brelse(bp); 4180Sstevel@tonic-gate return; 4190Sstevel@tonic-gate } 4200Sstevel@tonic-gate blkp = bp->b_un.b_daddr; 4210Sstevel@tonic-gate 4220Sstevel@tonic-gate if (level || (ifmt == IFDIR) || (ifmt == IFSHAD) || 4230Sstevel@tonic-gate (ifmt == IFATTRDIR) || (ip == ip->i_ufsvfs->vfs_qinod)) 4240Sstevel@tonic-gate ufs_trans_mata_direct(ip, fragsp, blkp, ne); 4250Sstevel@tonic-gate 4260Sstevel@tonic-gate if (level) 4270Sstevel@tonic-gate for (i = 0; i < ne && *fragsp; ++i, ++blkp) 4280Sstevel@tonic-gate ufs_trans_mata_indir(ip, fragsp, *blkp, level-1); 4290Sstevel@tonic-gate brelse(bp); 4300Sstevel@tonic-gate } 4310Sstevel@tonic-gate 4320Sstevel@tonic-gate /* 4330Sstevel@tonic-gate * put appropriate metadata into matamap for this inode 4340Sstevel@tonic-gate */ 4350Sstevel@tonic-gate void 4360Sstevel@tonic-gate ufs_trans_mata_iget(struct inode *ip) 4370Sstevel@tonic-gate { 4380Sstevel@tonic-gate int i; 4390Sstevel@tonic-gate daddr_t frags = dbtofsb(ip->i_fs, ip->i_blocks); 4400Sstevel@tonic-gate o_mode_t ifmt = ip->i_mode & IFMT; 4410Sstevel@tonic-gate 4420Sstevel@tonic-gate if (frags && ((ifmt == IFDIR) || (ifmt == IFSHAD) || 4430Sstevel@tonic-gate (ifmt == IFATTRDIR) || (ip == ip->i_ufsvfs->vfs_qinod))) 4440Sstevel@tonic-gate ufs_trans_mata_direct(ip, &frags, &ip->i_db[0], NDADDR); 4450Sstevel@tonic-gate 4460Sstevel@tonic-gate if (frags) 4470Sstevel@tonic-gate ufs_trans_mata_direct(ip, &frags, &ip->i_ib[0], NIADDR); 4480Sstevel@tonic-gate 4490Sstevel@tonic-gate for (i = 0; i < NIADDR && frags; ++i) 4500Sstevel@tonic-gate if (ip->i_ib[i]) 4510Sstevel@tonic-gate ufs_trans_mata_indir(ip, &frags, ip->i_ib[i], i); 4520Sstevel@tonic-gate } 4530Sstevel@tonic-gate 4540Sstevel@tonic-gate /* 4550Sstevel@tonic-gate * freeing possible metadata (block of user data) 4560Sstevel@tonic-gate */ 4570Sstevel@tonic-gate void 4580Sstevel@tonic-gate ufs_trans_mata_free(struct ufsvfs *ufsvfsp, offset_t mof, off_t nb) 4590Sstevel@tonic-gate { 4600Sstevel@tonic-gate top_matadel(ufsvfsp, mof, nb); 4610Sstevel@tonic-gate 4620Sstevel@tonic-gate } 4630Sstevel@tonic-gate 4640Sstevel@tonic-gate /* 4650Sstevel@tonic-gate * allocating metadata 4660Sstevel@tonic-gate */ 4670Sstevel@tonic-gate void 4680Sstevel@tonic-gate ufs_trans_mata_alloc( 4690Sstevel@tonic-gate struct ufsvfs *ufsvfsp, 4700Sstevel@tonic-gate struct inode *ip, 4710Sstevel@tonic-gate daddr_t frag, 4720Sstevel@tonic-gate ulong_t nb, 4730Sstevel@tonic-gate int indir) 4740Sstevel@tonic-gate { 4750Sstevel@tonic-gate struct fs *fs = ufsvfsp->vfs_fs; 4760Sstevel@tonic-gate o_mode_t ifmt = ip->i_mode & IFMT; 4770Sstevel@tonic-gate 4780Sstevel@tonic-gate if (indir || ((ifmt == IFDIR) || (ifmt == IFSHAD) || 4790Sstevel@tonic-gate (ifmt == IFATTRDIR) || (ip == ip->i_ufsvfs->vfs_qinod))) 4800Sstevel@tonic-gate TRANS_MATAADD(ufsvfsp, ldbtob(fsbtodb(fs, frag)), nb); 4810Sstevel@tonic-gate } 4820Sstevel@tonic-gate 4830Sstevel@tonic-gate #endif /* DEBUG */ 4840Sstevel@tonic-gate 4850Sstevel@tonic-gate /* 4860Sstevel@tonic-gate * ufs_trans_dir is used to declare a directory delta 4870Sstevel@tonic-gate */ 4880Sstevel@tonic-gate int 4890Sstevel@tonic-gate ufs_trans_dir(struct inode *ip, off_t offset) 4900Sstevel@tonic-gate { 4910Sstevel@tonic-gate daddr_t bn; 4920Sstevel@tonic-gate int contig = 0, error; 4930Sstevel@tonic-gate 4940Sstevel@tonic-gate ASSERT(ip); 4950Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&ip->i_contents)); 4960Sstevel@tonic-gate error = bmap_read(ip, (u_offset_t)offset, &bn, &contig); 4970Sstevel@tonic-gate if (error || (bn == UFS_HOLE)) { 4980Sstevel@tonic-gate cmn_err(CE_WARN, "ufs_trans_dir - could not get block" 4990Sstevel@tonic-gate " number error = %d bn = %d\n", error, (int)bn); 5000Sstevel@tonic-gate if (error == 0) /* treat UFS_HOLE as an I/O error */ 5010Sstevel@tonic-gate error = EIO; 5020Sstevel@tonic-gate return (error); 5030Sstevel@tonic-gate } 5040Sstevel@tonic-gate TRANS_DELTA(ip->i_ufsvfs, ldbtob(bn), DIRBLKSIZ, DT_DIR, 0, 0); 5050Sstevel@tonic-gate return (error); 5060Sstevel@tonic-gate } 5070Sstevel@tonic-gate 5080Sstevel@tonic-gate /*ARGSUSED*/ 5090Sstevel@tonic-gate int 5100Sstevel@tonic-gate ufs_trans_push_quota(ufsvfs_t *ufsvfsp, delta_t dtyp, struct dquot *dqp) 5110Sstevel@tonic-gate { 5120Sstevel@tonic-gate /* 5130Sstevel@tonic-gate * Lock the quota subsystem (ufsvfsp can be NULL 5140Sstevel@tonic-gate * if the DQ_ERROR is set). 5150Sstevel@tonic-gate */ 5160Sstevel@tonic-gate if (ufsvfsp) 5170Sstevel@tonic-gate rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER); 5180Sstevel@tonic-gate mutex_enter(&dqp->dq_lock); 5190Sstevel@tonic-gate 5200Sstevel@tonic-gate /* 5210Sstevel@tonic-gate * If this transaction has been cancelled by closedq_scan_inode(), 5220Sstevel@tonic-gate * then bail out now. We don't call dqput() in this case because 5230Sstevel@tonic-gate * it has already been done. 5240Sstevel@tonic-gate */ 5250Sstevel@tonic-gate if ((dqp->dq_flags & DQ_TRANS) == 0) { 5260Sstevel@tonic-gate mutex_exit(&dqp->dq_lock); 5270Sstevel@tonic-gate if (ufsvfsp) 5280Sstevel@tonic-gate rw_exit(&ufsvfsp->vfs_dqrwlock); 5290Sstevel@tonic-gate return (0); 5300Sstevel@tonic-gate } 5310Sstevel@tonic-gate 5320Sstevel@tonic-gate if (dqp->dq_flags & DQ_ERROR) { 5330Sstevel@tonic-gate /* 5340Sstevel@tonic-gate * Paranoia to make sure that there is at least one 5350Sstevel@tonic-gate * reference to the dquot struct. We are done with 5360Sstevel@tonic-gate * the dquot (due to an error) so clear logging 5370Sstevel@tonic-gate * specific markers. 5380Sstevel@tonic-gate */ 5390Sstevel@tonic-gate ASSERT(dqp->dq_cnt >= 1); 5400Sstevel@tonic-gate dqp->dq_flags &= ~DQ_TRANS; 5410Sstevel@tonic-gate dqput(dqp); 5420Sstevel@tonic-gate mutex_exit(&dqp->dq_lock); 5430Sstevel@tonic-gate if (ufsvfsp) 5440Sstevel@tonic-gate rw_exit(&ufsvfsp->vfs_dqrwlock); 5450Sstevel@tonic-gate return (1); 5460Sstevel@tonic-gate } 5470Sstevel@tonic-gate 5480Sstevel@tonic-gate if (dqp->dq_flags & (DQ_MOD | DQ_BLKS | DQ_FILES)) { 5490Sstevel@tonic-gate ASSERT((dqp->dq_mof != UFS_HOLE) && (dqp->dq_mof != 0)); 5500Sstevel@tonic-gate TRANS_LOG(ufsvfsp, (caddr_t)&dqp->dq_dqb, 5510Sstevel@tonic-gate dqp->dq_mof, (int)sizeof (struct dqblk), NULL, 0); 5520Sstevel@tonic-gate /* 5530Sstevel@tonic-gate * Paranoia to make sure that there is at least one 5540Sstevel@tonic-gate * reference to the dquot struct. Clear the 5550Sstevel@tonic-gate * modification flag because the operation is now in 5560Sstevel@tonic-gate * the log. Also clear the logging specific markers 5570Sstevel@tonic-gate * that were set in ufs_trans_quota(). 5580Sstevel@tonic-gate */ 5590Sstevel@tonic-gate ASSERT(dqp->dq_cnt >= 1); 5600Sstevel@tonic-gate dqp->dq_flags &= ~(DQ_MOD | DQ_TRANS); 5610Sstevel@tonic-gate dqput(dqp); 5620Sstevel@tonic-gate } 5630Sstevel@tonic-gate 5640Sstevel@tonic-gate /* 5650Sstevel@tonic-gate * At this point, the logging specific flag should be clear, 5660Sstevel@tonic-gate * but add paranoia just in case something has gone wrong. 5670Sstevel@tonic-gate */ 5680Sstevel@tonic-gate ASSERT((dqp->dq_flags & DQ_TRANS) == 0); 5690Sstevel@tonic-gate mutex_exit(&dqp->dq_lock); 5700Sstevel@tonic-gate if (ufsvfsp) 5710Sstevel@tonic-gate rw_exit(&ufsvfsp->vfs_dqrwlock); 5720Sstevel@tonic-gate return (0); 5730Sstevel@tonic-gate } 5740Sstevel@tonic-gate 5750Sstevel@tonic-gate /* 5760Sstevel@tonic-gate * ufs_trans_quota take in a uid, allocates the disk space, placing the 5770Sstevel@tonic-gate * quota record into the metamap, then declares the delta. 5780Sstevel@tonic-gate */ 5790Sstevel@tonic-gate /*ARGSUSED*/ 5800Sstevel@tonic-gate void 5810Sstevel@tonic-gate ufs_trans_quota(struct dquot *dqp) 5820Sstevel@tonic-gate { 5830Sstevel@tonic-gate 5840Sstevel@tonic-gate struct inode *qip = dqp->dq_ufsvfsp->vfs_qinod; 5850Sstevel@tonic-gate 5860Sstevel@tonic-gate ASSERT(qip); 5870Sstevel@tonic-gate ASSERT(MUTEX_HELD(&dqp->dq_lock)); 5880Sstevel@tonic-gate ASSERT(dqp->dq_flags & DQ_MOD); 5890Sstevel@tonic-gate ASSERT(dqp->dq_mof != 0); 5900Sstevel@tonic-gate ASSERT(dqp->dq_mof != UFS_HOLE); 5910Sstevel@tonic-gate 5920Sstevel@tonic-gate /* 5930Sstevel@tonic-gate * Mark this dquot to indicate that we are starting a logging 5940Sstevel@tonic-gate * file system operation for this dquot. Also increment the 5950Sstevel@tonic-gate * reference count so that the dquot does not get reused while 5960Sstevel@tonic-gate * it is on the mapentry_t list. DQ_TRANS is cleared and the 5970Sstevel@tonic-gate * reference count is decremented by ufs_trans_push_quota. 5980Sstevel@tonic-gate * 5990Sstevel@tonic-gate * If the file system is force-unmounted while there is a 6000Sstevel@tonic-gate * pending quota transaction, then closedq_scan_inode() will 6010Sstevel@tonic-gate * clear the DQ_TRANS flag and decrement the reference count. 6020Sstevel@tonic-gate * 6030Sstevel@tonic-gate * Since deltamap_add() drops multiple transactions to the 6040Sstevel@tonic-gate * same dq_mof and ufs_trans_push_quota() won't get called, 6050Sstevel@tonic-gate * we use DQ_TRANS to prevent repeat transactions from 6060Sstevel@tonic-gate * incrementing the reference count (or calling TRANS_DELTA()). 6070Sstevel@tonic-gate */ 6080Sstevel@tonic-gate if ((dqp->dq_flags & DQ_TRANS) == 0) { 6090Sstevel@tonic-gate dqp->dq_flags |= DQ_TRANS; 6100Sstevel@tonic-gate dqp->dq_cnt++; 6110Sstevel@tonic-gate TRANS_DELTA(qip->i_ufsvfs, dqp->dq_mof, sizeof (struct dqblk), 6120Sstevel@tonic-gate DT_QR, ufs_trans_push_quota, (ulong_t)dqp); 6130Sstevel@tonic-gate } 6140Sstevel@tonic-gate } 6150Sstevel@tonic-gate 6160Sstevel@tonic-gate void 6170Sstevel@tonic-gate ufs_trans_dqrele(struct dquot *dqp) 6180Sstevel@tonic-gate { 6190Sstevel@tonic-gate struct ufsvfs *ufsvfsp = dqp->dq_ufsvfsp; 6200Sstevel@tonic-gate 6210Sstevel@tonic-gate curthread->t_flag |= T_DONTBLOCK; 6220Sstevel@tonic-gate TRANS_BEGIN_ASYNC(ufsvfsp, TOP_QUOTA, TOP_QUOTA_SIZE); 6230Sstevel@tonic-gate rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER); 6240Sstevel@tonic-gate dqrele(dqp); 6250Sstevel@tonic-gate rw_exit(&ufsvfsp->vfs_dqrwlock); 6260Sstevel@tonic-gate TRANS_END_ASYNC(ufsvfsp, TOP_QUOTA, TOP_QUOTA_SIZE); 6270Sstevel@tonic-gate curthread->t_flag &= ~T_DONTBLOCK; 6280Sstevel@tonic-gate } 6290Sstevel@tonic-gate 6300Sstevel@tonic-gate int ufs_trans_max_resv = TOP_MAX_RESV; /* will be adjusted for testing */ 6310Sstevel@tonic-gate long ufs_trans_avgbfree = 0; /* will be adjusted for testing */ 6320Sstevel@tonic-gate #define TRANS_MAX_WRITE (1024 * 1024) 6330Sstevel@tonic-gate size_t ufs_trans_max_resid = TRANS_MAX_WRITE; 6340Sstevel@tonic-gate 6350Sstevel@tonic-gate /* 6360Sstevel@tonic-gate * Calculate the log reservation for the given write or truncate 6370Sstevel@tonic-gate */ 6380Sstevel@tonic-gate static ulong_t 6390Sstevel@tonic-gate ufs_log_amt(struct inode *ip, offset_t offset, ssize_t resid, int trunc) 6400Sstevel@tonic-gate { 6410Sstevel@tonic-gate long ncg, last2blk; 6420Sstevel@tonic-gate long niblk = 0; 6430Sstevel@tonic-gate u_offset_t writeend, offblk; 6440Sstevel@tonic-gate int resv; 6450Sstevel@tonic-gate daddr_t nblk, maxfblk; 6460Sstevel@tonic-gate long avgbfree; 6470Sstevel@tonic-gate struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 6480Sstevel@tonic-gate struct fs *fs = ufsvfsp->vfs_fs; 6490Sstevel@tonic-gate long fni = NINDIR(fs); 6500Sstevel@tonic-gate int bsize = fs->fs_bsize; 6510Sstevel@tonic-gate 6520Sstevel@tonic-gate /* 6530Sstevel@tonic-gate * Assume that the request will fit in 1 or 2 cg's, 6540Sstevel@tonic-gate * resv is the amount of log space to reserve (in bytes). 6550Sstevel@tonic-gate */ 6560Sstevel@tonic-gate resv = SIZECG(ip) * 2 + INODESIZE + 1024; 6570Sstevel@tonic-gate 6580Sstevel@tonic-gate /* 6590Sstevel@tonic-gate * get max position of write in fs blocks 6600Sstevel@tonic-gate */ 6610Sstevel@tonic-gate writeend = offset + resid; 6620Sstevel@tonic-gate maxfblk = lblkno(fs, writeend); 6630Sstevel@tonic-gate offblk = lblkno(fs, offset); 6640Sstevel@tonic-gate /* 6650Sstevel@tonic-gate * request size in fs blocks 6660Sstevel@tonic-gate */ 6670Sstevel@tonic-gate nblk = lblkno(fs, blkroundup(fs, resid)); 6680Sstevel@tonic-gate /* 6690Sstevel@tonic-gate * Adjust for sparse files 6700Sstevel@tonic-gate */ 6710Sstevel@tonic-gate if (trunc) 6720Sstevel@tonic-gate nblk = MIN(nblk, ip->i_blocks); 6730Sstevel@tonic-gate 6740Sstevel@tonic-gate /* 6750Sstevel@tonic-gate * Adjust avgbfree (for testing) 6760Sstevel@tonic-gate */ 6770Sstevel@tonic-gate avgbfree = (ufs_trans_avgbfree) ? 1 : ufsvfsp->vfs_avgbfree + 1; 6780Sstevel@tonic-gate 6790Sstevel@tonic-gate /* 6800Sstevel@tonic-gate * Calculate maximum number of blocks of triple indirect 6810Sstevel@tonic-gate * pointers to write. 6820Sstevel@tonic-gate */ 6830Sstevel@tonic-gate last2blk = NDADDR + fni + fni * fni; 6840Sstevel@tonic-gate if (maxfblk > last2blk) { 6850Sstevel@tonic-gate long nl2ptr; 6860Sstevel@tonic-gate long n3blk; 6870Sstevel@tonic-gate 6880Sstevel@tonic-gate if (offblk > last2blk) 6890Sstevel@tonic-gate n3blk = maxfblk - offblk; 6900Sstevel@tonic-gate else 6910Sstevel@tonic-gate n3blk = maxfblk - last2blk; 6920Sstevel@tonic-gate niblk += roundup(n3blk * sizeof (daddr_t), bsize) / bsize + 1; 6930Sstevel@tonic-gate nl2ptr = roundup(niblk, fni) / fni + 1; 6940Sstevel@tonic-gate niblk += roundup(nl2ptr * sizeof (daddr_t), bsize) / bsize + 2; 6950Sstevel@tonic-gate maxfblk -= n3blk; 6960Sstevel@tonic-gate } 6970Sstevel@tonic-gate /* 6980Sstevel@tonic-gate * calculate maximum number of blocks of double indirect 6990Sstevel@tonic-gate * pointers to write. 7000Sstevel@tonic-gate */ 7010Sstevel@tonic-gate if (maxfblk > NDADDR + fni) { 7020Sstevel@tonic-gate long n2blk; 7030Sstevel@tonic-gate 7040Sstevel@tonic-gate if (offblk > NDADDR + fni) 7050Sstevel@tonic-gate n2blk = maxfblk - offblk; 7060Sstevel@tonic-gate else 7070Sstevel@tonic-gate n2blk = maxfblk - NDADDR + fni; 7080Sstevel@tonic-gate niblk += roundup(n2blk * sizeof (daddr_t), bsize) / bsize + 2; 7090Sstevel@tonic-gate maxfblk -= n2blk; 7100Sstevel@tonic-gate } 7110Sstevel@tonic-gate /* 7120Sstevel@tonic-gate * Add in indirect pointer block write 7130Sstevel@tonic-gate */ 7140Sstevel@tonic-gate if (maxfblk > NDADDR) { 7150Sstevel@tonic-gate niblk += 1; 7160Sstevel@tonic-gate } 7170Sstevel@tonic-gate /* 7180Sstevel@tonic-gate * Calculate deltas for indirect pointer writes 7190Sstevel@tonic-gate */ 7200Sstevel@tonic-gate resv += niblk * (fs->fs_bsize + sizeof (struct delta)); 7210Sstevel@tonic-gate /* 7220Sstevel@tonic-gate * maximum number of cg's needed for request 7230Sstevel@tonic-gate */ 7240Sstevel@tonic-gate ncg = nblk / avgbfree; 7250Sstevel@tonic-gate if (ncg > fs->fs_ncg) 7260Sstevel@tonic-gate ncg = fs->fs_ncg; 7270Sstevel@tonic-gate 7280Sstevel@tonic-gate /* 7290Sstevel@tonic-gate * maximum amount of log space needed for request 7300Sstevel@tonic-gate */ 7310Sstevel@tonic-gate if (ncg > 2) 7320Sstevel@tonic-gate resv += (ncg - 2) * SIZECG(ip); 7330Sstevel@tonic-gate 7340Sstevel@tonic-gate return (resv); 7350Sstevel@tonic-gate } 7360Sstevel@tonic-gate 7370Sstevel@tonic-gate /* 7380Sstevel@tonic-gate * Calculate the amount of log space that needs to be reserved for this 7390Sstevel@tonic-gate * trunc request. If the amount of log space is too large, then 7400Sstevel@tonic-gate * calculate the the size that the requests needs to be split into. 7410Sstevel@tonic-gate */ 742923Ssdebnath void 7430Sstevel@tonic-gate ufs_trans_trunc_resv( 7440Sstevel@tonic-gate struct inode *ip, 7450Sstevel@tonic-gate u_offset_t length, 7460Sstevel@tonic-gate int *resvp, 7470Sstevel@tonic-gate u_offset_t *residp) 7480Sstevel@tonic-gate { 7490Sstevel@tonic-gate ulong_t resv; 7500Sstevel@tonic-gate u_offset_t size, offset, resid; 7510Sstevel@tonic-gate int nchunks; 7520Sstevel@tonic-gate 7530Sstevel@tonic-gate /* 7540Sstevel@tonic-gate * *resvp is the amount of log space to reserve (in bytes). 7550Sstevel@tonic-gate * when nonzero, *residp is the number of bytes to truncate. 7560Sstevel@tonic-gate */ 7570Sstevel@tonic-gate *residp = 0; 7580Sstevel@tonic-gate 7590Sstevel@tonic-gate if (length < ip->i_size) { 7600Sstevel@tonic-gate size = ip->i_size - length; 7610Sstevel@tonic-gate } else { 7620Sstevel@tonic-gate resv = SIZECG(ip) * 2 + INODESIZE + 1024; 7630Sstevel@tonic-gate /* 7640Sstevel@tonic-gate * truncate up, doesn't really use much space, 7650Sstevel@tonic-gate * the default above should be sufficient. 7660Sstevel@tonic-gate */ 7670Sstevel@tonic-gate goto done; 7680Sstevel@tonic-gate } 7690Sstevel@tonic-gate 7700Sstevel@tonic-gate offset = length; 7710Sstevel@tonic-gate resid = size; 7720Sstevel@tonic-gate nchunks = 1; 7730Sstevel@tonic-gate for (; (resv = ufs_log_amt(ip, offset, resid, 1)) > ufs_trans_max_resv; 774*4662Sfrankho offset = length + (nchunks - 1) * resid) { 7750Sstevel@tonic-gate nchunks++; 7760Sstevel@tonic-gate resid = size / nchunks; 7770Sstevel@tonic-gate } 7780Sstevel@tonic-gate /* 7790Sstevel@tonic-gate * If this request takes too much log space, it will be split 7800Sstevel@tonic-gate */ 7810Sstevel@tonic-gate if (nchunks > 1) { 7820Sstevel@tonic-gate *residp = resid; 7830Sstevel@tonic-gate } 7840Sstevel@tonic-gate done: 7850Sstevel@tonic-gate *resvp = resv; 7860Sstevel@tonic-gate } 7870Sstevel@tonic-gate 7880Sstevel@tonic-gate int 7890Sstevel@tonic-gate ufs_trans_itrunc(struct inode *ip, u_offset_t length, int flags, cred_t *cr) 7900Sstevel@tonic-gate { 7910Sstevel@tonic-gate int err, issync, resv; 7920Sstevel@tonic-gate u_offset_t resid; 7930Sstevel@tonic-gate int do_block = 0; 7940Sstevel@tonic-gate struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 7950Sstevel@tonic-gate struct fs *fs = ufsvfsp->vfs_fs; 7960Sstevel@tonic-gate 7970Sstevel@tonic-gate /* 7980Sstevel@tonic-gate * Not logging; just do the trunc 7990Sstevel@tonic-gate */ 8000Sstevel@tonic-gate if (!TRANS_ISTRANS(ufsvfsp)) { 8010Sstevel@tonic-gate rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER); 8020Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_WRITER); 8030Sstevel@tonic-gate err = ufs_itrunc(ip, length, flags, cr); 8040Sstevel@tonic-gate rw_exit(&ip->i_contents); 8050Sstevel@tonic-gate rw_exit(&ufsvfsp->vfs_dqrwlock); 8060Sstevel@tonic-gate return (err); 8070Sstevel@tonic-gate } 8080Sstevel@tonic-gate 8090Sstevel@tonic-gate /* 8100Sstevel@tonic-gate * within the lockfs protocol but *not* part of a transaction 8110Sstevel@tonic-gate */ 8120Sstevel@tonic-gate do_block = curthread->t_flag & T_DONTBLOCK; 8130Sstevel@tonic-gate curthread->t_flag |= T_DONTBLOCK; 8140Sstevel@tonic-gate 8150Sstevel@tonic-gate /* 8160Sstevel@tonic-gate * Trunc the file (in pieces, if necessary) 8170Sstevel@tonic-gate */ 8180Sstevel@tonic-gate again: 8190Sstevel@tonic-gate ufs_trans_trunc_resv(ip, length, &resv, &resid); 8200Sstevel@tonic-gate TRANS_BEGIN_CSYNC(ufsvfsp, issync, TOP_ITRUNC, resv); 8210Sstevel@tonic-gate rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER); 8220Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_WRITER); 8230Sstevel@tonic-gate if (resid) { 8240Sstevel@tonic-gate /* 8250Sstevel@tonic-gate * resid is only set if we have to truncate in chunks 8260Sstevel@tonic-gate */ 8270Sstevel@tonic-gate ASSERT(length + resid < ip->i_size); 8280Sstevel@tonic-gate 8290Sstevel@tonic-gate /* 8300Sstevel@tonic-gate * Partially trunc file down to desired size (length). 8310Sstevel@tonic-gate * Only retain I_FREE on the last partial trunc. 8320Sstevel@tonic-gate * Round up size to a block boundary, to ensure the truncate 8330Sstevel@tonic-gate * doesn't have to allocate blocks. This is done both for 8340Sstevel@tonic-gate * performance and to fix a bug where if the block can't be 8350Sstevel@tonic-gate * allocated then the inode delete fails, but the inode 8360Sstevel@tonic-gate * is still freed with attached blocks and non-zero size 8370Sstevel@tonic-gate * (bug 4348738). 8380Sstevel@tonic-gate */ 8390Sstevel@tonic-gate err = ufs_itrunc(ip, blkroundup(fs, (ip->i_size - resid)), 8400Sstevel@tonic-gate flags & ~I_FREE, cr); 8410Sstevel@tonic-gate ASSERT(ip->i_size != length); 8420Sstevel@tonic-gate } else 8430Sstevel@tonic-gate err = ufs_itrunc(ip, length, flags, cr); 8440Sstevel@tonic-gate if (!do_block) 8450Sstevel@tonic-gate curthread->t_flag &= ~T_DONTBLOCK; 8460Sstevel@tonic-gate rw_exit(&ip->i_contents); 8470Sstevel@tonic-gate rw_exit(&ufsvfsp->vfs_dqrwlock); 8480Sstevel@tonic-gate TRANS_END_CSYNC(ufsvfsp, err, issync, TOP_ITRUNC, resv); 8490Sstevel@tonic-gate 8500Sstevel@tonic-gate if ((err == 0) && resid) { 8510Sstevel@tonic-gate ufsvfsp->vfs_avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 8520Sstevel@tonic-gate goto again; 8530Sstevel@tonic-gate } 8540Sstevel@tonic-gate return (err); 8550Sstevel@tonic-gate } 8560Sstevel@tonic-gate 8570Sstevel@tonic-gate /* 8580Sstevel@tonic-gate * Fault in the pages of the first n bytes specified by the uio structure. 8590Sstevel@tonic-gate * 1 byte in each page is touched and the uio struct is unmodified. 8600Sstevel@tonic-gate * Any error will terminate the process as this is only a best 8610Sstevel@tonic-gate * attempt to get the pages resident. 8620Sstevel@tonic-gate */ 8630Sstevel@tonic-gate static void 8640Sstevel@tonic-gate ufs_trans_touch(ssize_t n, struct uio *uio) 8650Sstevel@tonic-gate { 8660Sstevel@tonic-gate struct iovec *iov; 8670Sstevel@tonic-gate ulong_t cnt, incr; 8680Sstevel@tonic-gate caddr_t p; 8690Sstevel@tonic-gate uint8_t tmp; 8700Sstevel@tonic-gate 8710Sstevel@tonic-gate iov = uio->uio_iov; 8720Sstevel@tonic-gate 8730Sstevel@tonic-gate while (n) { 8740Sstevel@tonic-gate cnt = MIN(iov->iov_len, n); 8750Sstevel@tonic-gate if (cnt == 0) { 8760Sstevel@tonic-gate /* empty iov entry */ 8770Sstevel@tonic-gate iov++; 8780Sstevel@tonic-gate continue; 8790Sstevel@tonic-gate } 8800Sstevel@tonic-gate n -= cnt; 8810Sstevel@tonic-gate /* 8820Sstevel@tonic-gate * touch each page in this segment. 8830Sstevel@tonic-gate */ 8840Sstevel@tonic-gate p = iov->iov_base; 8850Sstevel@tonic-gate while (cnt) { 8860Sstevel@tonic-gate switch (uio->uio_segflg) { 8870Sstevel@tonic-gate case UIO_USERSPACE: 8880Sstevel@tonic-gate case UIO_USERISPACE: 8890Sstevel@tonic-gate if (fuword8(p, &tmp)) 8900Sstevel@tonic-gate return; 8910Sstevel@tonic-gate break; 8920Sstevel@tonic-gate case UIO_SYSSPACE: 8930Sstevel@tonic-gate if (kcopy(p, &tmp, 1)) 8940Sstevel@tonic-gate return; 8950Sstevel@tonic-gate break; 8960Sstevel@tonic-gate } 8970Sstevel@tonic-gate incr = MIN(cnt, PAGESIZE); 8980Sstevel@tonic-gate p += incr; 8990Sstevel@tonic-gate cnt -= incr; 9000Sstevel@tonic-gate } 9010Sstevel@tonic-gate /* 9020Sstevel@tonic-gate * touch the last byte in case it straddles a page. 9030Sstevel@tonic-gate */ 9040Sstevel@tonic-gate p--; 9050Sstevel@tonic-gate switch (uio->uio_segflg) { 9060Sstevel@tonic-gate case UIO_USERSPACE: 9070Sstevel@tonic-gate case UIO_USERISPACE: 9080Sstevel@tonic-gate if (fuword8(p, &tmp)) 9090Sstevel@tonic-gate return; 9100Sstevel@tonic-gate break; 9110Sstevel@tonic-gate case UIO_SYSSPACE: 9120Sstevel@tonic-gate if (kcopy(p, &tmp, 1)) 9130Sstevel@tonic-gate return; 9140Sstevel@tonic-gate break; 9150Sstevel@tonic-gate } 9160Sstevel@tonic-gate iov++; 9170Sstevel@tonic-gate } 9180Sstevel@tonic-gate } 9190Sstevel@tonic-gate 9200Sstevel@tonic-gate /* 9210Sstevel@tonic-gate * Calculate the amount of log space that needs to be reserved for this 9220Sstevel@tonic-gate * write request. If the amount of log space is too large, then 9230Sstevel@tonic-gate * calculate the size that the requests needs to be split into. 9240Sstevel@tonic-gate * First try fixed chunks of size ufs_trans_max_resid. If that 9250Sstevel@tonic-gate * is too big, iterate down to the largest size that will fit. 9260Sstevel@tonic-gate * Pagein the pages in the first chunk here, so that the pagein is 9270Sstevel@tonic-gate * avoided later when the transaction is open. 9280Sstevel@tonic-gate */ 9290Sstevel@tonic-gate void 9300Sstevel@tonic-gate ufs_trans_write_resv( 9310Sstevel@tonic-gate struct inode *ip, 9320Sstevel@tonic-gate struct uio *uio, 9330Sstevel@tonic-gate int *resvp, 9340Sstevel@tonic-gate int *residp) 9350Sstevel@tonic-gate { 9360Sstevel@tonic-gate ulong_t resv; 9370Sstevel@tonic-gate offset_t offset; 9380Sstevel@tonic-gate ssize_t resid; 9390Sstevel@tonic-gate int nchunks; 9400Sstevel@tonic-gate 9410Sstevel@tonic-gate *residp = 0; 9420Sstevel@tonic-gate offset = uio->uio_offset; 9430Sstevel@tonic-gate resid = MIN(uio->uio_resid, ufs_trans_max_resid); 9440Sstevel@tonic-gate resv = ufs_log_amt(ip, offset, resid, 0); 9450Sstevel@tonic-gate if (resv <= ufs_trans_max_resv) { 9460Sstevel@tonic-gate ufs_trans_touch(resid, uio); 9470Sstevel@tonic-gate if (resid != uio->uio_resid) 9480Sstevel@tonic-gate *residp = resid; 9490Sstevel@tonic-gate *resvp = resv; 9500Sstevel@tonic-gate return; 9510Sstevel@tonic-gate } 9520Sstevel@tonic-gate 9530Sstevel@tonic-gate resid = uio->uio_resid; 9540Sstevel@tonic-gate nchunks = 1; 9550Sstevel@tonic-gate for (; (resv = ufs_log_amt(ip, offset, resid, 0)) > ufs_trans_max_resv; 956*4662Sfrankho offset = uio->uio_offset + (nchunks - 1) * resid) { 9570Sstevel@tonic-gate nchunks++; 9580Sstevel@tonic-gate resid = uio->uio_resid / nchunks; 9590Sstevel@tonic-gate } 9600Sstevel@tonic-gate ufs_trans_touch(resid, uio); 9610Sstevel@tonic-gate /* 9620Sstevel@tonic-gate * If this request takes too much log space, it will be split 9630Sstevel@tonic-gate */ 9640Sstevel@tonic-gate if (nchunks > 1) 9650Sstevel@tonic-gate *residp = resid; 9660Sstevel@tonic-gate *resvp = resv; 9670Sstevel@tonic-gate } 9680Sstevel@tonic-gate 9690Sstevel@tonic-gate /* 9700Sstevel@tonic-gate * Issue write request. 9710Sstevel@tonic-gate * 9720Sstevel@tonic-gate * Split a large request into smaller chunks. 9730Sstevel@tonic-gate */ 9740Sstevel@tonic-gate int 9750Sstevel@tonic-gate ufs_trans_write( 9760Sstevel@tonic-gate struct inode *ip, 9770Sstevel@tonic-gate struct uio *uio, 9780Sstevel@tonic-gate int ioflag, 9790Sstevel@tonic-gate cred_t *cr, 9800Sstevel@tonic-gate int resv, 9810Sstevel@tonic-gate long resid) 9820Sstevel@tonic-gate { 9830Sstevel@tonic-gate long realresid; 9840Sstevel@tonic-gate int err; 9850Sstevel@tonic-gate struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 9860Sstevel@tonic-gate 9870Sstevel@tonic-gate /* 9880Sstevel@tonic-gate * since the write is too big and would "HOG THE LOG" it needs to 9890Sstevel@tonic-gate * be broken up and done in pieces. NOTE, the caller will 9900Sstevel@tonic-gate * issue the EOT after the request has been completed 9910Sstevel@tonic-gate */ 9920Sstevel@tonic-gate realresid = uio->uio_resid; 9930Sstevel@tonic-gate 9940Sstevel@tonic-gate again: 9950Sstevel@tonic-gate /* 9960Sstevel@tonic-gate * Perform partial request (uiomove will update uio for us) 9970Sstevel@tonic-gate * Request is split up into "resid" size chunks until 9980Sstevel@tonic-gate * "realresid" bytes have been transferred. 9990Sstevel@tonic-gate */ 10000Sstevel@tonic-gate uio->uio_resid = MIN(resid, realresid); 10010Sstevel@tonic-gate realresid -= uio->uio_resid; 10020Sstevel@tonic-gate err = wrip(ip, uio, ioflag, cr); 10030Sstevel@tonic-gate 10040Sstevel@tonic-gate /* 10050Sstevel@tonic-gate * Error or request is done; caller issues final EOT 10060Sstevel@tonic-gate */ 10070Sstevel@tonic-gate if (err || uio->uio_resid || (realresid == 0)) { 10080Sstevel@tonic-gate uio->uio_resid += realresid; 10090Sstevel@tonic-gate return (err); 10100Sstevel@tonic-gate } 10110Sstevel@tonic-gate 10120Sstevel@tonic-gate /* 10130Sstevel@tonic-gate * Generate EOT for this part of the request 10140Sstevel@tonic-gate */ 10150Sstevel@tonic-gate rw_exit(&ip->i_contents); 10160Sstevel@tonic-gate rw_exit(&ufsvfsp->vfs_dqrwlock); 10170Sstevel@tonic-gate if (ioflag & (FSYNC|FDSYNC)) { 10180Sstevel@tonic-gate TRANS_END_SYNC(ufsvfsp, err, TOP_WRITE_SYNC, resv); 10190Sstevel@tonic-gate } else { 10200Sstevel@tonic-gate TRANS_END_ASYNC(ufsvfsp, TOP_WRITE, resv); 10210Sstevel@tonic-gate } 10220Sstevel@tonic-gate 10230Sstevel@tonic-gate /* 10240Sstevel@tonic-gate * Make sure the input buffer is resident before starting 10250Sstevel@tonic-gate * the next transaction. 10260Sstevel@tonic-gate */ 10270Sstevel@tonic-gate ufs_trans_touch(MIN(resid, realresid), uio); 10280Sstevel@tonic-gate 10290Sstevel@tonic-gate /* 10300Sstevel@tonic-gate * Generate BOT for next part of the request 10310Sstevel@tonic-gate */ 10320Sstevel@tonic-gate if (ioflag & (FSYNC|FDSYNC)) { 10330Sstevel@tonic-gate int error; 10340Sstevel@tonic-gate TRANS_BEGIN_SYNC(ufsvfsp, TOP_WRITE_SYNC, resv, error); 10350Sstevel@tonic-gate ASSERT(!error); 10360Sstevel@tonic-gate } else { 10370Sstevel@tonic-gate TRANS_BEGIN_ASYNC(ufsvfsp, TOP_WRITE, resv); 10380Sstevel@tonic-gate } 10390Sstevel@tonic-gate rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER); 10400Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_WRITER); 10410Sstevel@tonic-gate /* 10420Sstevel@tonic-gate * Error during EOT (probably device error while writing commit rec) 10430Sstevel@tonic-gate */ 10440Sstevel@tonic-gate if (err) 10450Sstevel@tonic-gate return (err); 10460Sstevel@tonic-gate goto again; 10470Sstevel@tonic-gate } 1048