1*0Sstevel@tonic-gate /* 2*0Sstevel@tonic-gate * CDDL HEADER START 3*0Sstevel@tonic-gate * 4*0Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*0Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*0Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*0Sstevel@tonic-gate * with the License. 8*0Sstevel@tonic-gate * 9*0Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*0Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*0Sstevel@tonic-gate * See the License for the specific language governing permissions 12*0Sstevel@tonic-gate * and limitations under the License. 13*0Sstevel@tonic-gate * 14*0Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*0Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*0Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*0Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*0Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*0Sstevel@tonic-gate * 20*0Sstevel@tonic-gate * CDDL HEADER END 21*0Sstevel@tonic-gate */ 22*0Sstevel@tonic-gate /* 23*0Sstevel@tonic-gate * Copyright 2003 Sun Microsystems, Inc. All rights reserved. 24*0Sstevel@tonic-gate * Use is subject to license terms. 25*0Sstevel@tonic-gate */ 26*0Sstevel@tonic-gate 27*0Sstevel@tonic-gate /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 28*0Sstevel@tonic-gate /* All Rights Reserved */ 29*0Sstevel@tonic-gate 30*0Sstevel@tonic-gate /* 31*0Sstevel@tonic-gate * Portions of this source code were derived from Berkeley 4.3 BSD 32*0Sstevel@tonic-gate * under license from the Regents of the University of California. 33*0Sstevel@tonic-gate */ 34*0Sstevel@tonic-gate 35*0Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 36*0Sstevel@tonic-gate 37*0Sstevel@tonic-gate #include <sys/sysmacros.h> 38*0Sstevel@tonic-gate #include <sys/param.h> 39*0Sstevel@tonic-gate #include <sys/types.h> 40*0Sstevel@tonic-gate #include <sys/systm.h> 41*0Sstevel@tonic-gate #include <sys/t_lock.h> 42*0Sstevel@tonic-gate #include <sys/uio.h> 43*0Sstevel@tonic-gate #include <sys/kmem.h> 44*0Sstevel@tonic-gate #include <sys/thread.h> 45*0Sstevel@tonic-gate #include <sys/vfs.h> 46*0Sstevel@tonic-gate #include <sys/errno.h> 47*0Sstevel@tonic-gate #include <sys/buf.h> 48*0Sstevel@tonic-gate #include <sys/vnode.h> 49*0Sstevel@tonic-gate #include <sys/fs/ufs_trans.h> 50*0Sstevel@tonic-gate #include <sys/fs/ufs_inode.h> 51*0Sstevel@tonic-gate #include <sys/fs/ufs_fs.h> 52*0Sstevel@tonic-gate #include <sys/fs/ufs_fsdir.h> 53*0Sstevel@tonic-gate #include <sys/fs/ufs_quota.h> 54*0Sstevel@tonic-gate #include <sys/fs/ufs_panic.h> 55*0Sstevel@tonic-gate #include <sys/fs/ufs_bio.h> 56*0Sstevel@tonic-gate #include <sys/fs/ufs_log.h> 57*0Sstevel@tonic-gate #include <sys/cmn_err.h> 58*0Sstevel@tonic-gate #include <sys/file.h> 59*0Sstevel@tonic-gate #include <sys/debug.h> 60*0Sstevel@tonic-gate 61*0Sstevel@tonic-gate 62*0Sstevel@tonic-gate extern kmutex_t ufsvfs_mutex; 63*0Sstevel@tonic-gate extern struct ufsvfs *ufs_instances; 64*0Sstevel@tonic-gate 65*0Sstevel@tonic-gate /* 66*0Sstevel@tonic-gate * hlock any file systems w/errored logs 67*0Sstevel@tonic-gate */ 68*0Sstevel@tonic-gate int 69*0Sstevel@tonic-gate ufs_trans_hlock() 70*0Sstevel@tonic-gate { 71*0Sstevel@tonic-gate struct ufsvfs *ufsvfsp; 72*0Sstevel@tonic-gate struct lockfs lockfs; 73*0Sstevel@tonic-gate int error; 74*0Sstevel@tonic-gate int retry = 0; 75*0Sstevel@tonic-gate 76*0Sstevel@tonic-gate /* 77*0Sstevel@tonic-gate * find fs's that paniced or have errored logging devices 78*0Sstevel@tonic-gate */ 79*0Sstevel@tonic-gate mutex_enter(&ufsvfs_mutex); 80*0Sstevel@tonic-gate for (ufsvfsp = ufs_instances; ufsvfsp; ufsvfsp = ufsvfsp->vfs_next) { 81*0Sstevel@tonic-gate /* 82*0Sstevel@tonic-gate * not mounted; continue 83*0Sstevel@tonic-gate */ 84*0Sstevel@tonic-gate if ((ufsvfsp->vfs_vfs == NULL) || 85*0Sstevel@tonic-gate (ufsvfsp->vfs_validfs == UT_UNMOUNTED)) 86*0Sstevel@tonic-gate continue; 87*0Sstevel@tonic-gate /* 88*0Sstevel@tonic-gate * disallow unmounts (hlock occurs below) 89*0Sstevel@tonic-gate */ 90*0Sstevel@tonic-gate if (TRANS_ISERROR(ufsvfsp)) 91*0Sstevel@tonic-gate ufsvfsp->vfs_validfs = UT_HLOCKING; 92*0Sstevel@tonic-gate } 93*0Sstevel@tonic-gate mutex_exit(&ufsvfs_mutex); 94*0Sstevel@tonic-gate 95*0Sstevel@tonic-gate /* 96*0Sstevel@tonic-gate * hlock the fs's that paniced or have errored logging devices 97*0Sstevel@tonic-gate */ 98*0Sstevel@tonic-gate again: 99*0Sstevel@tonic-gate mutex_enter(&ufsvfs_mutex); 100*0Sstevel@tonic-gate for (ufsvfsp = ufs_instances; ufsvfsp; ufsvfsp = ufsvfsp->vfs_next) 101*0Sstevel@tonic-gate if (ufsvfsp->vfs_validfs == UT_HLOCKING) 102*0Sstevel@tonic-gate break; 103*0Sstevel@tonic-gate mutex_exit(&ufsvfs_mutex); 104*0Sstevel@tonic-gate if (ufsvfsp == NULL) 105*0Sstevel@tonic-gate return (retry); 106*0Sstevel@tonic-gate /* 107*0Sstevel@tonic-gate * hlock the file system 108*0Sstevel@tonic-gate */ 109*0Sstevel@tonic-gate (void) ufs_fiolfss(ufsvfsp->vfs_root, &lockfs); 110*0Sstevel@tonic-gate if (!LOCKFS_IS_ELOCK(&lockfs)) { 111*0Sstevel@tonic-gate lockfs.lf_lock = LOCKFS_HLOCK; 112*0Sstevel@tonic-gate lockfs.lf_flags = 0; 113*0Sstevel@tonic-gate lockfs.lf_comlen = 0; 114*0Sstevel@tonic-gate lockfs.lf_comment = NULL; 115*0Sstevel@tonic-gate error = ufs_fiolfs(ufsvfsp->vfs_root, &lockfs, 0); 116*0Sstevel@tonic-gate /* 117*0Sstevel@tonic-gate * retry after awhile; another app currently doing lockfs 118*0Sstevel@tonic-gate */ 119*0Sstevel@tonic-gate if (error == EBUSY || error == EINVAL) 120*0Sstevel@tonic-gate retry = 1; 121*0Sstevel@tonic-gate } else { 122*0Sstevel@tonic-gate if (ufsfx_get_failure_qlen() > 0) { 123*0Sstevel@tonic-gate if (mutex_tryenter(&ufs_fix.uq_mutex)) { 124*0Sstevel@tonic-gate ufs_fix.uq_lowat = ufs_fix.uq_ne; 125*0Sstevel@tonic-gate cv_broadcast(&ufs_fix.uq_cv); 126*0Sstevel@tonic-gate mutex_exit(&ufs_fix.uq_mutex); 127*0Sstevel@tonic-gate } 128*0Sstevel@tonic-gate } 129*0Sstevel@tonic-gate retry = 1; 130*0Sstevel@tonic-gate } 131*0Sstevel@tonic-gate 132*0Sstevel@tonic-gate /* 133*0Sstevel@tonic-gate * allow unmounts 134*0Sstevel@tonic-gate */ 135*0Sstevel@tonic-gate ufsvfsp->vfs_validfs = UT_MOUNTED; 136*0Sstevel@tonic-gate goto again; 137*0Sstevel@tonic-gate } 138*0Sstevel@tonic-gate 139*0Sstevel@tonic-gate /*ARGSUSED*/ 140*0Sstevel@tonic-gate void 141*0Sstevel@tonic-gate ufs_trans_onerror() 142*0Sstevel@tonic-gate { 143*0Sstevel@tonic-gate mutex_enter(&ufs_hlock.uq_mutex); 144*0Sstevel@tonic-gate ufs_hlock.uq_ne = ufs_hlock.uq_lowat; 145*0Sstevel@tonic-gate cv_broadcast(&ufs_hlock.uq_cv); 146*0Sstevel@tonic-gate mutex_exit(&ufs_hlock.uq_mutex); 147*0Sstevel@tonic-gate } 148*0Sstevel@tonic-gate 149*0Sstevel@tonic-gate void 150*0Sstevel@tonic-gate ufs_trans_sbupdate(struct ufsvfs *ufsvfsp, struct vfs *vfsp, top_t topid) 151*0Sstevel@tonic-gate { 152*0Sstevel@tonic-gate if (curthread->t_flag & T_DONTBLOCK) { 153*0Sstevel@tonic-gate sbupdate(vfsp); 154*0Sstevel@tonic-gate return; 155*0Sstevel@tonic-gate } else { 156*0Sstevel@tonic-gate 157*0Sstevel@tonic-gate if (panicstr && TRANS_ISTRANS(ufsvfsp)) 158*0Sstevel@tonic-gate return; 159*0Sstevel@tonic-gate 160*0Sstevel@tonic-gate curthread->t_flag |= T_DONTBLOCK; 161*0Sstevel@tonic-gate TRANS_BEGIN_ASYNC(ufsvfsp, topid, TOP_SBUPDATE_SIZE); 162*0Sstevel@tonic-gate sbupdate(vfsp); 163*0Sstevel@tonic-gate TRANS_END_ASYNC(ufsvfsp, topid, TOP_SBUPDATE_SIZE); 164*0Sstevel@tonic-gate curthread->t_flag &= ~T_DONTBLOCK; 165*0Sstevel@tonic-gate } 166*0Sstevel@tonic-gate } 167*0Sstevel@tonic-gate 168*0Sstevel@tonic-gate void 169*0Sstevel@tonic-gate ufs_trans_iupdat(struct inode *ip, int waitfor) 170*0Sstevel@tonic-gate { 171*0Sstevel@tonic-gate struct ufsvfs *ufsvfsp; 172*0Sstevel@tonic-gate 173*0Sstevel@tonic-gate if (curthread->t_flag & T_DONTBLOCK) { 174*0Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_READER); 175*0Sstevel@tonic-gate ufs_iupdat(ip, waitfor); 176*0Sstevel@tonic-gate rw_exit(&ip->i_contents); 177*0Sstevel@tonic-gate return; 178*0Sstevel@tonic-gate } else { 179*0Sstevel@tonic-gate ufsvfsp = ip->i_ufsvfs; 180*0Sstevel@tonic-gate 181*0Sstevel@tonic-gate if (panicstr && TRANS_ISTRANS(ufsvfsp)) 182*0Sstevel@tonic-gate return; 183*0Sstevel@tonic-gate 184*0Sstevel@tonic-gate curthread->t_flag |= T_DONTBLOCK; 185*0Sstevel@tonic-gate TRANS_BEGIN_ASYNC(ufsvfsp, TOP_IUPDAT, TOP_IUPDAT_SIZE(ip)); 186*0Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_READER); 187*0Sstevel@tonic-gate ufs_iupdat(ip, waitfor); 188*0Sstevel@tonic-gate rw_exit(&ip->i_contents); 189*0Sstevel@tonic-gate TRANS_END_ASYNC(ufsvfsp, TOP_IUPDAT, TOP_IUPDAT_SIZE(ip)); 190*0Sstevel@tonic-gate curthread->t_flag &= ~T_DONTBLOCK; 191*0Sstevel@tonic-gate } 192*0Sstevel@tonic-gate } 193*0Sstevel@tonic-gate 194*0Sstevel@tonic-gate void 195*0Sstevel@tonic-gate ufs_trans_sbwrite(struct ufsvfs *ufsvfsp, top_t topid) 196*0Sstevel@tonic-gate { 197*0Sstevel@tonic-gate if (curthread->t_flag & T_DONTBLOCK) { 198*0Sstevel@tonic-gate mutex_enter(&ufsvfsp->vfs_lock); 199*0Sstevel@tonic-gate ufs_sbwrite(ufsvfsp); 200*0Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 201*0Sstevel@tonic-gate return; 202*0Sstevel@tonic-gate } else { 203*0Sstevel@tonic-gate 204*0Sstevel@tonic-gate if (panicstr && TRANS_ISTRANS(ufsvfsp)) 205*0Sstevel@tonic-gate return; 206*0Sstevel@tonic-gate 207*0Sstevel@tonic-gate curthread->t_flag |= T_DONTBLOCK; 208*0Sstevel@tonic-gate TRANS_BEGIN_ASYNC(ufsvfsp, topid, TOP_SBWRITE_SIZE); 209*0Sstevel@tonic-gate mutex_enter(&ufsvfsp->vfs_lock); 210*0Sstevel@tonic-gate ufs_sbwrite(ufsvfsp); 211*0Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 212*0Sstevel@tonic-gate TRANS_END_ASYNC(ufsvfsp, topid, TOP_SBWRITE_SIZE); 213*0Sstevel@tonic-gate curthread->t_flag &= ~T_DONTBLOCK; 214*0Sstevel@tonic-gate } 215*0Sstevel@tonic-gate } 216*0Sstevel@tonic-gate 217*0Sstevel@tonic-gate /*ARGSUSED*/ 218*0Sstevel@tonic-gate int 219*0Sstevel@tonic-gate ufs_trans_push_si(ufsvfs_t *ufsvfsp, delta_t dtyp, int ignore) 220*0Sstevel@tonic-gate { 221*0Sstevel@tonic-gate struct fs *fs; 222*0Sstevel@tonic-gate 223*0Sstevel@tonic-gate fs = ufsvfsp->vfs_fs; 224*0Sstevel@tonic-gate mutex_enter(&ufsvfsp->vfs_lock); 225*0Sstevel@tonic-gate TRANS_LOG(ufsvfsp, (char *)fs->fs_u.fs_csp, 226*0Sstevel@tonic-gate ldbtob(fsbtodb(fs, fs->fs_csaddr)), fs->fs_cssize, 227*0Sstevel@tonic-gate (caddr_t)fs->fs_u.fs_csp, fs->fs_cssize); 228*0Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 229*0Sstevel@tonic-gate return (0); 230*0Sstevel@tonic-gate } 231*0Sstevel@tonic-gate 232*0Sstevel@tonic-gate /*ARGSUSED*/ 233*0Sstevel@tonic-gate int 234*0Sstevel@tonic-gate ufs_trans_push_buf(ufsvfs_t *ufsvfsp, delta_t dtyp, daddr_t bno) 235*0Sstevel@tonic-gate { 236*0Sstevel@tonic-gate struct buf *bp; 237*0Sstevel@tonic-gate 238*0Sstevel@tonic-gate bp = (struct buf *)UFS_GETBLK(ufsvfsp, ufsvfsp->vfs_dev, bno, 1); 239*0Sstevel@tonic-gate if (bp == NULL) 240*0Sstevel@tonic-gate return (ENOENT); 241*0Sstevel@tonic-gate 242*0Sstevel@tonic-gate if (bp->b_flags & B_DELWRI) { 243*0Sstevel@tonic-gate /* 244*0Sstevel@tonic-gate * Do not use brwrite() here since the buffer is already 245*0Sstevel@tonic-gate * marked for retry or not by the code that called 246*0Sstevel@tonic-gate * TRANS_BUF(). 247*0Sstevel@tonic-gate */ 248*0Sstevel@tonic-gate UFS_BWRITE(ufsvfsp, bp); 249*0Sstevel@tonic-gate return (0); 250*0Sstevel@tonic-gate } 251*0Sstevel@tonic-gate /* 252*0Sstevel@tonic-gate * If we did not find the real buf for this block above then 253*0Sstevel@tonic-gate * clear the dev so the buf won't be found by mistake 254*0Sstevel@tonic-gate * for this block later. We had to allocate at least a 1 byte 255*0Sstevel@tonic-gate * buffer to keep brelse happy. 256*0Sstevel@tonic-gate */ 257*0Sstevel@tonic-gate if (bp->b_bufsize == 1) { 258*0Sstevel@tonic-gate bp->b_dev = (o_dev_t)NODEV; 259*0Sstevel@tonic-gate bp->b_edev = NODEV; 260*0Sstevel@tonic-gate bp->b_flags = 0; 261*0Sstevel@tonic-gate } 262*0Sstevel@tonic-gate brelse(bp); 263*0Sstevel@tonic-gate return (ENOENT); 264*0Sstevel@tonic-gate } 265*0Sstevel@tonic-gate 266*0Sstevel@tonic-gate /*ARGSUSED*/ 267*0Sstevel@tonic-gate int 268*0Sstevel@tonic-gate ufs_trans_push_inode(ufsvfs_t *ufsvfsp, delta_t dtyp, ino_t ino) 269*0Sstevel@tonic-gate { 270*0Sstevel@tonic-gate int error; 271*0Sstevel@tonic-gate struct inode *ip; 272*0Sstevel@tonic-gate 273*0Sstevel@tonic-gate /* 274*0Sstevel@tonic-gate * Grab the quota lock (if the file system has not been forcibly 275*0Sstevel@tonic-gate * unmounted). 276*0Sstevel@tonic-gate */ 277*0Sstevel@tonic-gate if (ufsvfsp) 278*0Sstevel@tonic-gate rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER); 279*0Sstevel@tonic-gate 280*0Sstevel@tonic-gate error = ufs_iget(ufsvfsp->vfs_vfs, ino, &ip, kcred); 281*0Sstevel@tonic-gate 282*0Sstevel@tonic-gate if (ufsvfsp) 283*0Sstevel@tonic-gate rw_exit(&ufsvfsp->vfs_dqrwlock); 284*0Sstevel@tonic-gate if (error) 285*0Sstevel@tonic-gate return (ENOENT); 286*0Sstevel@tonic-gate 287*0Sstevel@tonic-gate if (ip->i_flag & (IUPD|IACC|ICHG|IMOD|IMODACC|IATTCHG)) { 288*0Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_READER); 289*0Sstevel@tonic-gate ufs_iupdat(ip, 1); 290*0Sstevel@tonic-gate rw_exit(&ip->i_contents); 291*0Sstevel@tonic-gate VN_RELE(ITOV(ip)); 292*0Sstevel@tonic-gate return (0); 293*0Sstevel@tonic-gate } 294*0Sstevel@tonic-gate VN_RELE(ITOV(ip)); 295*0Sstevel@tonic-gate return (ENOENT); 296*0Sstevel@tonic-gate } 297*0Sstevel@tonic-gate 298*0Sstevel@tonic-gate #ifdef DEBUG 299*0Sstevel@tonic-gate /* 300*0Sstevel@tonic-gate * These routines maintain the metadata map (matamap) 301*0Sstevel@tonic-gate */ 302*0Sstevel@tonic-gate 303*0Sstevel@tonic-gate /* 304*0Sstevel@tonic-gate * update the metadata map at mount 305*0Sstevel@tonic-gate */ 306*0Sstevel@tonic-gate static int 307*0Sstevel@tonic-gate ufs_trans_mata_mount_scan(struct inode *ip, void *arg) 308*0Sstevel@tonic-gate { 309*0Sstevel@tonic-gate /* 310*0Sstevel@tonic-gate * wrong file system; keep looking 311*0Sstevel@tonic-gate */ 312*0Sstevel@tonic-gate if (ip->i_ufsvfs != (struct ufsvfs *)arg) 313*0Sstevel@tonic-gate return (0); 314*0Sstevel@tonic-gate 315*0Sstevel@tonic-gate /* 316*0Sstevel@tonic-gate * load the metadata map 317*0Sstevel@tonic-gate */ 318*0Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_WRITER); 319*0Sstevel@tonic-gate ufs_trans_mata_iget(ip); 320*0Sstevel@tonic-gate rw_exit(&ip->i_contents); 321*0Sstevel@tonic-gate return (0); 322*0Sstevel@tonic-gate } 323*0Sstevel@tonic-gate 324*0Sstevel@tonic-gate void 325*0Sstevel@tonic-gate ufs_trans_mata_mount(struct ufsvfs *ufsvfsp) 326*0Sstevel@tonic-gate { 327*0Sstevel@tonic-gate struct fs *fs = ufsvfsp->vfs_fs; 328*0Sstevel@tonic-gate ino_t ino; 329*0Sstevel@tonic-gate int i; 330*0Sstevel@tonic-gate 331*0Sstevel@tonic-gate /* 332*0Sstevel@tonic-gate * put static metadata into matamap 333*0Sstevel@tonic-gate * superblock 334*0Sstevel@tonic-gate * cylinder groups 335*0Sstevel@tonic-gate * inode groups 336*0Sstevel@tonic-gate * existing inodes 337*0Sstevel@tonic-gate */ 338*0Sstevel@tonic-gate TRANS_MATAADD(ufsvfsp, ldbtob(SBLOCK), fs->fs_sbsize); 339*0Sstevel@tonic-gate 340*0Sstevel@tonic-gate for (ino = i = 0; i < fs->fs_ncg; ++i, ino += fs->fs_ipg) { 341*0Sstevel@tonic-gate TRANS_MATAADD(ufsvfsp, 342*0Sstevel@tonic-gate ldbtob(fsbtodb(fs, cgtod(fs, i))), fs->fs_cgsize); 343*0Sstevel@tonic-gate TRANS_MATAADD(ufsvfsp, 344*0Sstevel@tonic-gate ldbtob(fsbtodb(fs, itod(fs, ino))), 345*0Sstevel@tonic-gate fs->fs_ipg * sizeof (struct dinode)); 346*0Sstevel@tonic-gate } 347*0Sstevel@tonic-gate (void) ufs_scan_inodes(0, ufs_trans_mata_mount_scan, ufsvfsp, ufsvfsp); 348*0Sstevel@tonic-gate } 349*0Sstevel@tonic-gate 350*0Sstevel@tonic-gate /* 351*0Sstevel@tonic-gate * clear the metadata map at umount 352*0Sstevel@tonic-gate */ 353*0Sstevel@tonic-gate void 354*0Sstevel@tonic-gate ufs_trans_mata_umount(struct ufsvfs *ufsvfsp) 355*0Sstevel@tonic-gate { 356*0Sstevel@tonic-gate top_mataclr(ufsvfsp); 357*0Sstevel@tonic-gate } 358*0Sstevel@tonic-gate 359*0Sstevel@tonic-gate /* 360*0Sstevel@tonic-gate * summary info (may be extended during growfs test) 361*0Sstevel@tonic-gate */ 362*0Sstevel@tonic-gate void 363*0Sstevel@tonic-gate ufs_trans_mata_si(struct ufsvfs *ufsvfsp, struct fs *fs) 364*0Sstevel@tonic-gate { 365*0Sstevel@tonic-gate TRANS_MATAADD(ufsvfsp, ldbtob(fsbtodb(fs, fs->fs_csaddr)), 366*0Sstevel@tonic-gate fs->fs_cssize); 367*0Sstevel@tonic-gate } 368*0Sstevel@tonic-gate 369*0Sstevel@tonic-gate /* 370*0Sstevel@tonic-gate * scan an allocation block (either inode or true block) 371*0Sstevel@tonic-gate */ 372*0Sstevel@tonic-gate static void 373*0Sstevel@tonic-gate ufs_trans_mata_direct( 374*0Sstevel@tonic-gate struct inode *ip, 375*0Sstevel@tonic-gate daddr_t *fragsp, 376*0Sstevel@tonic-gate daddr32_t *blkp, 377*0Sstevel@tonic-gate unsigned int nblk) 378*0Sstevel@tonic-gate { 379*0Sstevel@tonic-gate int i; 380*0Sstevel@tonic-gate daddr_t frag; 381*0Sstevel@tonic-gate ulong_t nb; 382*0Sstevel@tonic-gate struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 383*0Sstevel@tonic-gate struct fs *fs = ufsvfsp->vfs_fs; 384*0Sstevel@tonic-gate 385*0Sstevel@tonic-gate for (i = 0; i < nblk && *fragsp; ++i, ++blkp) 386*0Sstevel@tonic-gate if ((frag = *blkp) != 0) { 387*0Sstevel@tonic-gate if (*fragsp > fs->fs_frag) { 388*0Sstevel@tonic-gate nb = fs->fs_bsize; 389*0Sstevel@tonic-gate *fragsp -= fs->fs_frag; 390*0Sstevel@tonic-gate } else { 391*0Sstevel@tonic-gate nb = *fragsp * fs->fs_fsize; 392*0Sstevel@tonic-gate *fragsp = 0; 393*0Sstevel@tonic-gate } 394*0Sstevel@tonic-gate TRANS_MATAADD(ufsvfsp, ldbtob(fsbtodb(fs, frag)), nb); 395*0Sstevel@tonic-gate } 396*0Sstevel@tonic-gate } 397*0Sstevel@tonic-gate 398*0Sstevel@tonic-gate /* 399*0Sstevel@tonic-gate * scan an indirect allocation block (either inode or true block) 400*0Sstevel@tonic-gate */ 401*0Sstevel@tonic-gate static void 402*0Sstevel@tonic-gate ufs_trans_mata_indir( 403*0Sstevel@tonic-gate struct inode *ip, 404*0Sstevel@tonic-gate daddr_t *fragsp, 405*0Sstevel@tonic-gate daddr_t frag, 406*0Sstevel@tonic-gate int level) 407*0Sstevel@tonic-gate { 408*0Sstevel@tonic-gate struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 409*0Sstevel@tonic-gate struct fs *fs = ufsvfsp->vfs_fs; 410*0Sstevel@tonic-gate int ne = fs->fs_bsize / (int)sizeof (daddr32_t); 411*0Sstevel@tonic-gate int i; 412*0Sstevel@tonic-gate struct buf *bp; 413*0Sstevel@tonic-gate daddr32_t *blkp; 414*0Sstevel@tonic-gate o_mode_t ifmt = ip->i_mode & IFMT; 415*0Sstevel@tonic-gate 416*0Sstevel@tonic-gate bp = UFS_BREAD(ufsvfsp, ip->i_dev, fsbtodb(fs, frag), fs->fs_bsize); 417*0Sstevel@tonic-gate if (bp->b_flags & B_ERROR) { 418*0Sstevel@tonic-gate brelse(bp); 419*0Sstevel@tonic-gate return; 420*0Sstevel@tonic-gate } 421*0Sstevel@tonic-gate blkp = bp->b_un.b_daddr; 422*0Sstevel@tonic-gate 423*0Sstevel@tonic-gate if (level || (ifmt == IFDIR) || (ifmt == IFSHAD) || 424*0Sstevel@tonic-gate (ifmt == IFATTRDIR) || (ip == ip->i_ufsvfs->vfs_qinod)) 425*0Sstevel@tonic-gate ufs_trans_mata_direct(ip, fragsp, blkp, ne); 426*0Sstevel@tonic-gate 427*0Sstevel@tonic-gate if (level) 428*0Sstevel@tonic-gate for (i = 0; i < ne && *fragsp; ++i, ++blkp) 429*0Sstevel@tonic-gate ufs_trans_mata_indir(ip, fragsp, *blkp, level-1); 430*0Sstevel@tonic-gate brelse(bp); 431*0Sstevel@tonic-gate } 432*0Sstevel@tonic-gate 433*0Sstevel@tonic-gate /* 434*0Sstevel@tonic-gate * put appropriate metadata into matamap for this inode 435*0Sstevel@tonic-gate */ 436*0Sstevel@tonic-gate void 437*0Sstevel@tonic-gate ufs_trans_mata_iget(struct inode *ip) 438*0Sstevel@tonic-gate { 439*0Sstevel@tonic-gate int i; 440*0Sstevel@tonic-gate daddr_t frags = dbtofsb(ip->i_fs, ip->i_blocks); 441*0Sstevel@tonic-gate o_mode_t ifmt = ip->i_mode & IFMT; 442*0Sstevel@tonic-gate 443*0Sstevel@tonic-gate if (frags && ((ifmt == IFDIR) || (ifmt == IFSHAD) || 444*0Sstevel@tonic-gate (ifmt == IFATTRDIR) || (ip == ip->i_ufsvfs->vfs_qinod))) 445*0Sstevel@tonic-gate ufs_trans_mata_direct(ip, &frags, &ip->i_db[0], NDADDR); 446*0Sstevel@tonic-gate 447*0Sstevel@tonic-gate if (frags) 448*0Sstevel@tonic-gate ufs_trans_mata_direct(ip, &frags, &ip->i_ib[0], NIADDR); 449*0Sstevel@tonic-gate 450*0Sstevel@tonic-gate for (i = 0; i < NIADDR && frags; ++i) 451*0Sstevel@tonic-gate if (ip->i_ib[i]) 452*0Sstevel@tonic-gate ufs_trans_mata_indir(ip, &frags, ip->i_ib[i], i); 453*0Sstevel@tonic-gate } 454*0Sstevel@tonic-gate 455*0Sstevel@tonic-gate /* 456*0Sstevel@tonic-gate * freeing possible metadata (block of user data) 457*0Sstevel@tonic-gate */ 458*0Sstevel@tonic-gate void 459*0Sstevel@tonic-gate ufs_trans_mata_free(struct ufsvfs *ufsvfsp, offset_t mof, off_t nb) 460*0Sstevel@tonic-gate { 461*0Sstevel@tonic-gate top_matadel(ufsvfsp, mof, nb); 462*0Sstevel@tonic-gate 463*0Sstevel@tonic-gate } 464*0Sstevel@tonic-gate 465*0Sstevel@tonic-gate /* 466*0Sstevel@tonic-gate * allocating metadata 467*0Sstevel@tonic-gate */ 468*0Sstevel@tonic-gate void 469*0Sstevel@tonic-gate ufs_trans_mata_alloc( 470*0Sstevel@tonic-gate struct ufsvfs *ufsvfsp, 471*0Sstevel@tonic-gate struct inode *ip, 472*0Sstevel@tonic-gate daddr_t frag, 473*0Sstevel@tonic-gate ulong_t nb, 474*0Sstevel@tonic-gate int indir) 475*0Sstevel@tonic-gate { 476*0Sstevel@tonic-gate struct fs *fs = ufsvfsp->vfs_fs; 477*0Sstevel@tonic-gate o_mode_t ifmt = ip->i_mode & IFMT; 478*0Sstevel@tonic-gate 479*0Sstevel@tonic-gate if (indir || ((ifmt == IFDIR) || (ifmt == IFSHAD) || 480*0Sstevel@tonic-gate (ifmt == IFATTRDIR) || (ip == ip->i_ufsvfs->vfs_qinod))) 481*0Sstevel@tonic-gate TRANS_MATAADD(ufsvfsp, ldbtob(fsbtodb(fs, frag)), nb); 482*0Sstevel@tonic-gate } 483*0Sstevel@tonic-gate 484*0Sstevel@tonic-gate #endif /* DEBUG */ 485*0Sstevel@tonic-gate 486*0Sstevel@tonic-gate /* 487*0Sstevel@tonic-gate * ufs_trans_dir is used to declare a directory delta 488*0Sstevel@tonic-gate */ 489*0Sstevel@tonic-gate int 490*0Sstevel@tonic-gate ufs_trans_dir(struct inode *ip, off_t offset) 491*0Sstevel@tonic-gate { 492*0Sstevel@tonic-gate daddr_t bn; 493*0Sstevel@tonic-gate int contig = 0, error; 494*0Sstevel@tonic-gate 495*0Sstevel@tonic-gate ASSERT(ip); 496*0Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&ip->i_contents)); 497*0Sstevel@tonic-gate error = bmap_read(ip, (u_offset_t)offset, &bn, &contig); 498*0Sstevel@tonic-gate if (error || (bn == UFS_HOLE)) { 499*0Sstevel@tonic-gate cmn_err(CE_WARN, "ufs_trans_dir - could not get block" 500*0Sstevel@tonic-gate " number error = %d bn = %d\n", error, (int)bn); 501*0Sstevel@tonic-gate if (error == 0) /* treat UFS_HOLE as an I/O error */ 502*0Sstevel@tonic-gate error = EIO; 503*0Sstevel@tonic-gate return (error); 504*0Sstevel@tonic-gate } 505*0Sstevel@tonic-gate TRANS_DELTA(ip->i_ufsvfs, ldbtob(bn), DIRBLKSIZ, DT_DIR, 0, 0); 506*0Sstevel@tonic-gate return (error); 507*0Sstevel@tonic-gate } 508*0Sstevel@tonic-gate 509*0Sstevel@tonic-gate /*ARGSUSED*/ 510*0Sstevel@tonic-gate int 511*0Sstevel@tonic-gate ufs_trans_push_quota(ufsvfs_t *ufsvfsp, delta_t dtyp, struct dquot *dqp) 512*0Sstevel@tonic-gate { 513*0Sstevel@tonic-gate /* 514*0Sstevel@tonic-gate * Lock the quota subsystem (ufsvfsp can be NULL 515*0Sstevel@tonic-gate * if the DQ_ERROR is set). 516*0Sstevel@tonic-gate */ 517*0Sstevel@tonic-gate if (ufsvfsp) 518*0Sstevel@tonic-gate rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER); 519*0Sstevel@tonic-gate mutex_enter(&dqp->dq_lock); 520*0Sstevel@tonic-gate 521*0Sstevel@tonic-gate /* 522*0Sstevel@tonic-gate * If this transaction has been cancelled by closedq_scan_inode(), 523*0Sstevel@tonic-gate * then bail out now. We don't call dqput() in this case because 524*0Sstevel@tonic-gate * it has already been done. 525*0Sstevel@tonic-gate */ 526*0Sstevel@tonic-gate if ((dqp->dq_flags & DQ_TRANS) == 0) { 527*0Sstevel@tonic-gate mutex_exit(&dqp->dq_lock); 528*0Sstevel@tonic-gate if (ufsvfsp) 529*0Sstevel@tonic-gate rw_exit(&ufsvfsp->vfs_dqrwlock); 530*0Sstevel@tonic-gate return (0); 531*0Sstevel@tonic-gate } 532*0Sstevel@tonic-gate 533*0Sstevel@tonic-gate if (dqp->dq_flags & DQ_ERROR) { 534*0Sstevel@tonic-gate /* 535*0Sstevel@tonic-gate * Paranoia to make sure that there is at least one 536*0Sstevel@tonic-gate * reference to the dquot struct. We are done with 537*0Sstevel@tonic-gate * the dquot (due to an error) so clear logging 538*0Sstevel@tonic-gate * specific markers. 539*0Sstevel@tonic-gate */ 540*0Sstevel@tonic-gate ASSERT(dqp->dq_cnt >= 1); 541*0Sstevel@tonic-gate dqp->dq_flags &= ~DQ_TRANS; 542*0Sstevel@tonic-gate dqput(dqp); 543*0Sstevel@tonic-gate mutex_exit(&dqp->dq_lock); 544*0Sstevel@tonic-gate if (ufsvfsp) 545*0Sstevel@tonic-gate rw_exit(&ufsvfsp->vfs_dqrwlock); 546*0Sstevel@tonic-gate return (1); 547*0Sstevel@tonic-gate } 548*0Sstevel@tonic-gate 549*0Sstevel@tonic-gate if (dqp->dq_flags & (DQ_MOD | DQ_BLKS | DQ_FILES)) { 550*0Sstevel@tonic-gate ASSERT((dqp->dq_mof != UFS_HOLE) && (dqp->dq_mof != 0)); 551*0Sstevel@tonic-gate TRANS_LOG(ufsvfsp, (caddr_t)&dqp->dq_dqb, 552*0Sstevel@tonic-gate dqp->dq_mof, (int)sizeof (struct dqblk), NULL, 0); 553*0Sstevel@tonic-gate /* 554*0Sstevel@tonic-gate * Paranoia to make sure that there is at least one 555*0Sstevel@tonic-gate * reference to the dquot struct. Clear the 556*0Sstevel@tonic-gate * modification flag because the operation is now in 557*0Sstevel@tonic-gate * the log. Also clear the logging specific markers 558*0Sstevel@tonic-gate * that were set in ufs_trans_quota(). 559*0Sstevel@tonic-gate */ 560*0Sstevel@tonic-gate ASSERT(dqp->dq_cnt >= 1); 561*0Sstevel@tonic-gate dqp->dq_flags &= ~(DQ_MOD | DQ_TRANS); 562*0Sstevel@tonic-gate dqput(dqp); 563*0Sstevel@tonic-gate } 564*0Sstevel@tonic-gate 565*0Sstevel@tonic-gate /* 566*0Sstevel@tonic-gate * At this point, the logging specific flag should be clear, 567*0Sstevel@tonic-gate * but add paranoia just in case something has gone wrong. 568*0Sstevel@tonic-gate */ 569*0Sstevel@tonic-gate ASSERT((dqp->dq_flags & DQ_TRANS) == 0); 570*0Sstevel@tonic-gate mutex_exit(&dqp->dq_lock); 571*0Sstevel@tonic-gate if (ufsvfsp) 572*0Sstevel@tonic-gate rw_exit(&ufsvfsp->vfs_dqrwlock); 573*0Sstevel@tonic-gate return (0); 574*0Sstevel@tonic-gate } 575*0Sstevel@tonic-gate 576*0Sstevel@tonic-gate /* 577*0Sstevel@tonic-gate * ufs_trans_quota take in a uid, allocates the disk space, placing the 578*0Sstevel@tonic-gate * quota record into the metamap, then declares the delta. 579*0Sstevel@tonic-gate */ 580*0Sstevel@tonic-gate /*ARGSUSED*/ 581*0Sstevel@tonic-gate void 582*0Sstevel@tonic-gate ufs_trans_quota(struct dquot *dqp) 583*0Sstevel@tonic-gate { 584*0Sstevel@tonic-gate 585*0Sstevel@tonic-gate struct inode *qip = dqp->dq_ufsvfsp->vfs_qinod; 586*0Sstevel@tonic-gate 587*0Sstevel@tonic-gate ASSERT(qip); 588*0Sstevel@tonic-gate ASSERT(MUTEX_HELD(&dqp->dq_lock)); 589*0Sstevel@tonic-gate ASSERT(dqp->dq_flags & DQ_MOD); 590*0Sstevel@tonic-gate ASSERT(dqp->dq_mof != 0); 591*0Sstevel@tonic-gate ASSERT(dqp->dq_mof != UFS_HOLE); 592*0Sstevel@tonic-gate 593*0Sstevel@tonic-gate /* 594*0Sstevel@tonic-gate * Mark this dquot to indicate that we are starting a logging 595*0Sstevel@tonic-gate * file system operation for this dquot. Also increment the 596*0Sstevel@tonic-gate * reference count so that the dquot does not get reused while 597*0Sstevel@tonic-gate * it is on the mapentry_t list. DQ_TRANS is cleared and the 598*0Sstevel@tonic-gate * reference count is decremented by ufs_trans_push_quota. 599*0Sstevel@tonic-gate * 600*0Sstevel@tonic-gate * If the file system is force-unmounted while there is a 601*0Sstevel@tonic-gate * pending quota transaction, then closedq_scan_inode() will 602*0Sstevel@tonic-gate * clear the DQ_TRANS flag and decrement the reference count. 603*0Sstevel@tonic-gate * 604*0Sstevel@tonic-gate * Since deltamap_add() drops multiple transactions to the 605*0Sstevel@tonic-gate * same dq_mof and ufs_trans_push_quota() won't get called, 606*0Sstevel@tonic-gate * we use DQ_TRANS to prevent repeat transactions from 607*0Sstevel@tonic-gate * incrementing the reference count (or calling TRANS_DELTA()). 608*0Sstevel@tonic-gate */ 609*0Sstevel@tonic-gate if ((dqp->dq_flags & DQ_TRANS) == 0) { 610*0Sstevel@tonic-gate dqp->dq_flags |= DQ_TRANS; 611*0Sstevel@tonic-gate dqp->dq_cnt++; 612*0Sstevel@tonic-gate TRANS_DELTA(qip->i_ufsvfs, dqp->dq_mof, sizeof (struct dqblk), 613*0Sstevel@tonic-gate DT_QR, ufs_trans_push_quota, (ulong_t)dqp); 614*0Sstevel@tonic-gate } 615*0Sstevel@tonic-gate } 616*0Sstevel@tonic-gate 617*0Sstevel@tonic-gate void 618*0Sstevel@tonic-gate ufs_trans_dqrele(struct dquot *dqp) 619*0Sstevel@tonic-gate { 620*0Sstevel@tonic-gate struct ufsvfs *ufsvfsp = dqp->dq_ufsvfsp; 621*0Sstevel@tonic-gate 622*0Sstevel@tonic-gate curthread->t_flag |= T_DONTBLOCK; 623*0Sstevel@tonic-gate TRANS_BEGIN_ASYNC(ufsvfsp, TOP_QUOTA, TOP_QUOTA_SIZE); 624*0Sstevel@tonic-gate rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER); 625*0Sstevel@tonic-gate dqrele(dqp); 626*0Sstevel@tonic-gate rw_exit(&ufsvfsp->vfs_dqrwlock); 627*0Sstevel@tonic-gate TRANS_END_ASYNC(ufsvfsp, TOP_QUOTA, TOP_QUOTA_SIZE); 628*0Sstevel@tonic-gate curthread->t_flag &= ~T_DONTBLOCK; 629*0Sstevel@tonic-gate } 630*0Sstevel@tonic-gate 631*0Sstevel@tonic-gate int ufs_trans_max_resv = TOP_MAX_RESV; /* will be adjusted for testing */ 632*0Sstevel@tonic-gate long ufs_trans_avgbfree = 0; /* will be adjusted for testing */ 633*0Sstevel@tonic-gate #define TRANS_MAX_WRITE (1024 * 1024) 634*0Sstevel@tonic-gate size_t ufs_trans_max_resid = TRANS_MAX_WRITE; 635*0Sstevel@tonic-gate 636*0Sstevel@tonic-gate /* 637*0Sstevel@tonic-gate * Calculate the log reservation for the given write or truncate 638*0Sstevel@tonic-gate */ 639*0Sstevel@tonic-gate static ulong_t 640*0Sstevel@tonic-gate ufs_log_amt(struct inode *ip, offset_t offset, ssize_t resid, int trunc) 641*0Sstevel@tonic-gate { 642*0Sstevel@tonic-gate long ncg, last2blk; 643*0Sstevel@tonic-gate long niblk = 0; 644*0Sstevel@tonic-gate u_offset_t writeend, offblk; 645*0Sstevel@tonic-gate int resv; 646*0Sstevel@tonic-gate daddr_t nblk, maxfblk; 647*0Sstevel@tonic-gate long avgbfree; 648*0Sstevel@tonic-gate struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 649*0Sstevel@tonic-gate struct fs *fs = ufsvfsp->vfs_fs; 650*0Sstevel@tonic-gate long fni = NINDIR(fs); 651*0Sstevel@tonic-gate int bsize = fs->fs_bsize; 652*0Sstevel@tonic-gate 653*0Sstevel@tonic-gate /* 654*0Sstevel@tonic-gate * Assume that the request will fit in 1 or 2 cg's, 655*0Sstevel@tonic-gate * resv is the amount of log space to reserve (in bytes). 656*0Sstevel@tonic-gate */ 657*0Sstevel@tonic-gate resv = SIZECG(ip) * 2 + INODESIZE + 1024; 658*0Sstevel@tonic-gate 659*0Sstevel@tonic-gate /* 660*0Sstevel@tonic-gate * get max position of write in fs blocks 661*0Sstevel@tonic-gate */ 662*0Sstevel@tonic-gate writeend = offset + resid; 663*0Sstevel@tonic-gate maxfblk = lblkno(fs, writeend); 664*0Sstevel@tonic-gate offblk = lblkno(fs, offset); 665*0Sstevel@tonic-gate /* 666*0Sstevel@tonic-gate * request size in fs blocks 667*0Sstevel@tonic-gate */ 668*0Sstevel@tonic-gate nblk = lblkno(fs, blkroundup(fs, resid)); 669*0Sstevel@tonic-gate /* 670*0Sstevel@tonic-gate * Adjust for sparse files 671*0Sstevel@tonic-gate */ 672*0Sstevel@tonic-gate if (trunc) 673*0Sstevel@tonic-gate nblk = MIN(nblk, ip->i_blocks); 674*0Sstevel@tonic-gate 675*0Sstevel@tonic-gate /* 676*0Sstevel@tonic-gate * Adjust avgbfree (for testing) 677*0Sstevel@tonic-gate */ 678*0Sstevel@tonic-gate avgbfree = (ufs_trans_avgbfree) ? 1 : ufsvfsp->vfs_avgbfree + 1; 679*0Sstevel@tonic-gate 680*0Sstevel@tonic-gate /* 681*0Sstevel@tonic-gate * Calculate maximum number of blocks of triple indirect 682*0Sstevel@tonic-gate * pointers to write. 683*0Sstevel@tonic-gate */ 684*0Sstevel@tonic-gate last2blk = NDADDR + fni + fni * fni; 685*0Sstevel@tonic-gate if (maxfblk > last2blk) { 686*0Sstevel@tonic-gate long nl2ptr; 687*0Sstevel@tonic-gate long n3blk; 688*0Sstevel@tonic-gate 689*0Sstevel@tonic-gate if (offblk > last2blk) 690*0Sstevel@tonic-gate n3blk = maxfblk - offblk; 691*0Sstevel@tonic-gate else 692*0Sstevel@tonic-gate n3blk = maxfblk - last2blk; 693*0Sstevel@tonic-gate niblk += roundup(n3blk * sizeof (daddr_t), bsize) / bsize + 1; 694*0Sstevel@tonic-gate nl2ptr = roundup(niblk, fni) / fni + 1; 695*0Sstevel@tonic-gate niblk += roundup(nl2ptr * sizeof (daddr_t), bsize) / bsize + 2; 696*0Sstevel@tonic-gate maxfblk -= n3blk; 697*0Sstevel@tonic-gate } 698*0Sstevel@tonic-gate /* 699*0Sstevel@tonic-gate * calculate maximum number of blocks of double indirect 700*0Sstevel@tonic-gate * pointers to write. 701*0Sstevel@tonic-gate */ 702*0Sstevel@tonic-gate if (maxfblk > NDADDR + fni) { 703*0Sstevel@tonic-gate long n2blk; 704*0Sstevel@tonic-gate 705*0Sstevel@tonic-gate if (offblk > NDADDR + fni) 706*0Sstevel@tonic-gate n2blk = maxfblk - offblk; 707*0Sstevel@tonic-gate else 708*0Sstevel@tonic-gate n2blk = maxfblk - NDADDR + fni; 709*0Sstevel@tonic-gate niblk += roundup(n2blk * sizeof (daddr_t), bsize) / bsize + 2; 710*0Sstevel@tonic-gate maxfblk -= n2blk; 711*0Sstevel@tonic-gate } 712*0Sstevel@tonic-gate /* 713*0Sstevel@tonic-gate * Add in indirect pointer block write 714*0Sstevel@tonic-gate */ 715*0Sstevel@tonic-gate if (maxfblk > NDADDR) { 716*0Sstevel@tonic-gate niblk += 1; 717*0Sstevel@tonic-gate } 718*0Sstevel@tonic-gate /* 719*0Sstevel@tonic-gate * Calculate deltas for indirect pointer writes 720*0Sstevel@tonic-gate */ 721*0Sstevel@tonic-gate resv += niblk * (fs->fs_bsize + sizeof (struct delta)); 722*0Sstevel@tonic-gate /* 723*0Sstevel@tonic-gate * maximum number of cg's needed for request 724*0Sstevel@tonic-gate */ 725*0Sstevel@tonic-gate ncg = nblk / avgbfree; 726*0Sstevel@tonic-gate if (ncg > fs->fs_ncg) 727*0Sstevel@tonic-gate ncg = fs->fs_ncg; 728*0Sstevel@tonic-gate 729*0Sstevel@tonic-gate /* 730*0Sstevel@tonic-gate * maximum amount of log space needed for request 731*0Sstevel@tonic-gate */ 732*0Sstevel@tonic-gate if (ncg > 2) 733*0Sstevel@tonic-gate resv += (ncg - 2) * SIZECG(ip); 734*0Sstevel@tonic-gate 735*0Sstevel@tonic-gate return (resv); 736*0Sstevel@tonic-gate } 737*0Sstevel@tonic-gate 738*0Sstevel@tonic-gate /* 739*0Sstevel@tonic-gate * Calculate the amount of log space that needs to be reserved for this 740*0Sstevel@tonic-gate * trunc request. If the amount of log space is too large, then 741*0Sstevel@tonic-gate * calculate the the size that the requests needs to be split into. 742*0Sstevel@tonic-gate */ 743*0Sstevel@tonic-gate static void 744*0Sstevel@tonic-gate ufs_trans_trunc_resv( 745*0Sstevel@tonic-gate struct inode *ip, 746*0Sstevel@tonic-gate u_offset_t length, 747*0Sstevel@tonic-gate int *resvp, 748*0Sstevel@tonic-gate u_offset_t *residp) 749*0Sstevel@tonic-gate { 750*0Sstevel@tonic-gate ulong_t resv; 751*0Sstevel@tonic-gate u_offset_t size, offset, resid; 752*0Sstevel@tonic-gate int nchunks; 753*0Sstevel@tonic-gate 754*0Sstevel@tonic-gate /* 755*0Sstevel@tonic-gate * *resvp is the amount of log space to reserve (in bytes). 756*0Sstevel@tonic-gate * when nonzero, *residp is the number of bytes to truncate. 757*0Sstevel@tonic-gate */ 758*0Sstevel@tonic-gate *residp = 0; 759*0Sstevel@tonic-gate 760*0Sstevel@tonic-gate if (length < ip->i_size) { 761*0Sstevel@tonic-gate size = ip->i_size - length; 762*0Sstevel@tonic-gate } else { 763*0Sstevel@tonic-gate resv = SIZECG(ip) * 2 + INODESIZE + 1024; 764*0Sstevel@tonic-gate /* 765*0Sstevel@tonic-gate * truncate up, doesn't really use much space, 766*0Sstevel@tonic-gate * the default above should be sufficient. 767*0Sstevel@tonic-gate */ 768*0Sstevel@tonic-gate goto done; 769*0Sstevel@tonic-gate } 770*0Sstevel@tonic-gate 771*0Sstevel@tonic-gate offset = length; 772*0Sstevel@tonic-gate resid = size; 773*0Sstevel@tonic-gate nchunks = 1; 774*0Sstevel@tonic-gate for (; (resv = ufs_log_amt(ip, offset, resid, 1)) > ufs_trans_max_resv; 775*0Sstevel@tonic-gate offset = length + (nchunks - 1) * resid) { 776*0Sstevel@tonic-gate nchunks++; 777*0Sstevel@tonic-gate resid = size / nchunks; 778*0Sstevel@tonic-gate } 779*0Sstevel@tonic-gate /* 780*0Sstevel@tonic-gate * If this request takes too much log space, it will be split 781*0Sstevel@tonic-gate */ 782*0Sstevel@tonic-gate if (nchunks > 1) { 783*0Sstevel@tonic-gate *residp = resid; 784*0Sstevel@tonic-gate } 785*0Sstevel@tonic-gate done: 786*0Sstevel@tonic-gate *resvp = resv; 787*0Sstevel@tonic-gate } 788*0Sstevel@tonic-gate 789*0Sstevel@tonic-gate int 790*0Sstevel@tonic-gate ufs_trans_itrunc(struct inode *ip, u_offset_t length, int flags, cred_t *cr) 791*0Sstevel@tonic-gate { 792*0Sstevel@tonic-gate int err, issync, resv; 793*0Sstevel@tonic-gate u_offset_t resid; 794*0Sstevel@tonic-gate int do_block = 0; 795*0Sstevel@tonic-gate struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 796*0Sstevel@tonic-gate struct fs *fs = ufsvfsp->vfs_fs; 797*0Sstevel@tonic-gate 798*0Sstevel@tonic-gate /* 799*0Sstevel@tonic-gate * Not logging; just do the trunc 800*0Sstevel@tonic-gate */ 801*0Sstevel@tonic-gate if (!TRANS_ISTRANS(ufsvfsp)) { 802*0Sstevel@tonic-gate rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER); 803*0Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_WRITER); 804*0Sstevel@tonic-gate err = ufs_itrunc(ip, length, flags, cr); 805*0Sstevel@tonic-gate rw_exit(&ip->i_contents); 806*0Sstevel@tonic-gate rw_exit(&ufsvfsp->vfs_dqrwlock); 807*0Sstevel@tonic-gate return (err); 808*0Sstevel@tonic-gate } 809*0Sstevel@tonic-gate 810*0Sstevel@tonic-gate /* 811*0Sstevel@tonic-gate * within the lockfs protocol but *not* part of a transaction 812*0Sstevel@tonic-gate */ 813*0Sstevel@tonic-gate do_block = curthread->t_flag & T_DONTBLOCK; 814*0Sstevel@tonic-gate curthread->t_flag |= T_DONTBLOCK; 815*0Sstevel@tonic-gate 816*0Sstevel@tonic-gate /* 817*0Sstevel@tonic-gate * Trunc the file (in pieces, if necessary) 818*0Sstevel@tonic-gate */ 819*0Sstevel@tonic-gate again: 820*0Sstevel@tonic-gate ufs_trans_trunc_resv(ip, length, &resv, &resid); 821*0Sstevel@tonic-gate TRANS_BEGIN_CSYNC(ufsvfsp, issync, TOP_ITRUNC, resv); 822*0Sstevel@tonic-gate rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER); 823*0Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_WRITER); 824*0Sstevel@tonic-gate if (resid) { 825*0Sstevel@tonic-gate /* 826*0Sstevel@tonic-gate * resid is only set if we have to truncate in chunks 827*0Sstevel@tonic-gate */ 828*0Sstevel@tonic-gate ASSERT(length + resid < ip->i_size); 829*0Sstevel@tonic-gate 830*0Sstevel@tonic-gate /* 831*0Sstevel@tonic-gate * Partially trunc file down to desired size (length). 832*0Sstevel@tonic-gate * Only retain I_FREE on the last partial trunc. 833*0Sstevel@tonic-gate * Round up size to a block boundary, to ensure the truncate 834*0Sstevel@tonic-gate * doesn't have to allocate blocks. This is done both for 835*0Sstevel@tonic-gate * performance and to fix a bug where if the block can't be 836*0Sstevel@tonic-gate * allocated then the inode delete fails, but the inode 837*0Sstevel@tonic-gate * is still freed with attached blocks and non-zero size 838*0Sstevel@tonic-gate * (bug 4348738). 839*0Sstevel@tonic-gate */ 840*0Sstevel@tonic-gate err = ufs_itrunc(ip, blkroundup(fs, (ip->i_size - resid)), 841*0Sstevel@tonic-gate flags & ~I_FREE, cr); 842*0Sstevel@tonic-gate ASSERT(ip->i_size != length); 843*0Sstevel@tonic-gate } else 844*0Sstevel@tonic-gate err = ufs_itrunc(ip, length, flags, cr); 845*0Sstevel@tonic-gate if (!do_block) 846*0Sstevel@tonic-gate curthread->t_flag &= ~T_DONTBLOCK; 847*0Sstevel@tonic-gate rw_exit(&ip->i_contents); 848*0Sstevel@tonic-gate rw_exit(&ufsvfsp->vfs_dqrwlock); 849*0Sstevel@tonic-gate TRANS_END_CSYNC(ufsvfsp, err, issync, TOP_ITRUNC, resv); 850*0Sstevel@tonic-gate 851*0Sstevel@tonic-gate if ((err == 0) && resid) { 852*0Sstevel@tonic-gate ufsvfsp->vfs_avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 853*0Sstevel@tonic-gate goto again; 854*0Sstevel@tonic-gate } 855*0Sstevel@tonic-gate return (err); 856*0Sstevel@tonic-gate } 857*0Sstevel@tonic-gate 858*0Sstevel@tonic-gate /* 859*0Sstevel@tonic-gate * Fault in the pages of the first n bytes specified by the uio structure. 860*0Sstevel@tonic-gate * 1 byte in each page is touched and the uio struct is unmodified. 861*0Sstevel@tonic-gate * Any error will terminate the process as this is only a best 862*0Sstevel@tonic-gate * attempt to get the pages resident. 863*0Sstevel@tonic-gate */ 864*0Sstevel@tonic-gate static void 865*0Sstevel@tonic-gate ufs_trans_touch(ssize_t n, struct uio *uio) 866*0Sstevel@tonic-gate { 867*0Sstevel@tonic-gate struct iovec *iov; 868*0Sstevel@tonic-gate ulong_t cnt, incr; 869*0Sstevel@tonic-gate caddr_t p; 870*0Sstevel@tonic-gate uint8_t tmp; 871*0Sstevel@tonic-gate 872*0Sstevel@tonic-gate iov = uio->uio_iov; 873*0Sstevel@tonic-gate 874*0Sstevel@tonic-gate while (n) { 875*0Sstevel@tonic-gate cnt = MIN(iov->iov_len, n); 876*0Sstevel@tonic-gate if (cnt == 0) { 877*0Sstevel@tonic-gate /* empty iov entry */ 878*0Sstevel@tonic-gate iov++; 879*0Sstevel@tonic-gate continue; 880*0Sstevel@tonic-gate } 881*0Sstevel@tonic-gate n -= cnt; 882*0Sstevel@tonic-gate /* 883*0Sstevel@tonic-gate * touch each page in this segment. 884*0Sstevel@tonic-gate */ 885*0Sstevel@tonic-gate p = iov->iov_base; 886*0Sstevel@tonic-gate while (cnt) { 887*0Sstevel@tonic-gate switch (uio->uio_segflg) { 888*0Sstevel@tonic-gate case UIO_USERSPACE: 889*0Sstevel@tonic-gate case UIO_USERISPACE: 890*0Sstevel@tonic-gate if (fuword8(p, &tmp)) 891*0Sstevel@tonic-gate return; 892*0Sstevel@tonic-gate break; 893*0Sstevel@tonic-gate case UIO_SYSSPACE: 894*0Sstevel@tonic-gate if (kcopy(p, &tmp, 1)) 895*0Sstevel@tonic-gate return; 896*0Sstevel@tonic-gate break; 897*0Sstevel@tonic-gate } 898*0Sstevel@tonic-gate incr = MIN(cnt, PAGESIZE); 899*0Sstevel@tonic-gate p += incr; 900*0Sstevel@tonic-gate cnt -= incr; 901*0Sstevel@tonic-gate } 902*0Sstevel@tonic-gate /* 903*0Sstevel@tonic-gate * touch the last byte in case it straddles a page. 904*0Sstevel@tonic-gate */ 905*0Sstevel@tonic-gate p--; 906*0Sstevel@tonic-gate switch (uio->uio_segflg) { 907*0Sstevel@tonic-gate case UIO_USERSPACE: 908*0Sstevel@tonic-gate case UIO_USERISPACE: 909*0Sstevel@tonic-gate if (fuword8(p, &tmp)) 910*0Sstevel@tonic-gate return; 911*0Sstevel@tonic-gate break; 912*0Sstevel@tonic-gate case UIO_SYSSPACE: 913*0Sstevel@tonic-gate if (kcopy(p, &tmp, 1)) 914*0Sstevel@tonic-gate return; 915*0Sstevel@tonic-gate break; 916*0Sstevel@tonic-gate } 917*0Sstevel@tonic-gate iov++; 918*0Sstevel@tonic-gate } 919*0Sstevel@tonic-gate } 920*0Sstevel@tonic-gate 921*0Sstevel@tonic-gate /* 922*0Sstevel@tonic-gate * Calculate the amount of log space that needs to be reserved for this 923*0Sstevel@tonic-gate * write request. If the amount of log space is too large, then 924*0Sstevel@tonic-gate * calculate the size that the requests needs to be split into. 925*0Sstevel@tonic-gate * First try fixed chunks of size ufs_trans_max_resid. If that 926*0Sstevel@tonic-gate * is too big, iterate down to the largest size that will fit. 927*0Sstevel@tonic-gate * Pagein the pages in the first chunk here, so that the pagein is 928*0Sstevel@tonic-gate * avoided later when the transaction is open. 929*0Sstevel@tonic-gate */ 930*0Sstevel@tonic-gate void 931*0Sstevel@tonic-gate ufs_trans_write_resv( 932*0Sstevel@tonic-gate struct inode *ip, 933*0Sstevel@tonic-gate struct uio *uio, 934*0Sstevel@tonic-gate int *resvp, 935*0Sstevel@tonic-gate int *residp) 936*0Sstevel@tonic-gate { 937*0Sstevel@tonic-gate ulong_t resv; 938*0Sstevel@tonic-gate offset_t offset; 939*0Sstevel@tonic-gate ssize_t resid; 940*0Sstevel@tonic-gate int nchunks; 941*0Sstevel@tonic-gate 942*0Sstevel@tonic-gate *residp = 0; 943*0Sstevel@tonic-gate offset = uio->uio_offset; 944*0Sstevel@tonic-gate resid = MIN(uio->uio_resid, ufs_trans_max_resid); 945*0Sstevel@tonic-gate resv = ufs_log_amt(ip, offset, resid, 0); 946*0Sstevel@tonic-gate if (resv <= ufs_trans_max_resv) { 947*0Sstevel@tonic-gate ufs_trans_touch(resid, uio); 948*0Sstevel@tonic-gate if (resid != uio->uio_resid) 949*0Sstevel@tonic-gate *residp = resid; 950*0Sstevel@tonic-gate *resvp = resv; 951*0Sstevel@tonic-gate return; 952*0Sstevel@tonic-gate } 953*0Sstevel@tonic-gate 954*0Sstevel@tonic-gate resid = uio->uio_resid; 955*0Sstevel@tonic-gate nchunks = 1; 956*0Sstevel@tonic-gate for (; (resv = ufs_log_amt(ip, offset, resid, 0)) > ufs_trans_max_resv; 957*0Sstevel@tonic-gate offset = uio->uio_offset + (nchunks - 1) * resid) { 958*0Sstevel@tonic-gate nchunks++; 959*0Sstevel@tonic-gate resid = uio->uio_resid / nchunks; 960*0Sstevel@tonic-gate } 961*0Sstevel@tonic-gate ufs_trans_touch(resid, uio); 962*0Sstevel@tonic-gate /* 963*0Sstevel@tonic-gate * If this request takes too much log space, it will be split 964*0Sstevel@tonic-gate */ 965*0Sstevel@tonic-gate if (nchunks > 1) 966*0Sstevel@tonic-gate *residp = resid; 967*0Sstevel@tonic-gate *resvp = resv; 968*0Sstevel@tonic-gate } 969*0Sstevel@tonic-gate 970*0Sstevel@tonic-gate /* 971*0Sstevel@tonic-gate * Issue write request. 972*0Sstevel@tonic-gate * 973*0Sstevel@tonic-gate * Split a large request into smaller chunks. 974*0Sstevel@tonic-gate */ 975*0Sstevel@tonic-gate int 976*0Sstevel@tonic-gate ufs_trans_write( 977*0Sstevel@tonic-gate struct inode *ip, 978*0Sstevel@tonic-gate struct uio *uio, 979*0Sstevel@tonic-gate int ioflag, 980*0Sstevel@tonic-gate cred_t *cr, 981*0Sstevel@tonic-gate int resv, 982*0Sstevel@tonic-gate long resid) 983*0Sstevel@tonic-gate { 984*0Sstevel@tonic-gate long realresid; 985*0Sstevel@tonic-gate int err; 986*0Sstevel@tonic-gate struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 987*0Sstevel@tonic-gate 988*0Sstevel@tonic-gate /* 989*0Sstevel@tonic-gate * since the write is too big and would "HOG THE LOG" it needs to 990*0Sstevel@tonic-gate * be broken up and done in pieces. NOTE, the caller will 991*0Sstevel@tonic-gate * issue the EOT after the request has been completed 992*0Sstevel@tonic-gate */ 993*0Sstevel@tonic-gate realresid = uio->uio_resid; 994*0Sstevel@tonic-gate 995*0Sstevel@tonic-gate again: 996*0Sstevel@tonic-gate /* 997*0Sstevel@tonic-gate * Perform partial request (uiomove will update uio for us) 998*0Sstevel@tonic-gate * Request is split up into "resid" size chunks until 999*0Sstevel@tonic-gate * "realresid" bytes have been transferred. 1000*0Sstevel@tonic-gate */ 1001*0Sstevel@tonic-gate uio->uio_resid = MIN(resid, realresid); 1002*0Sstevel@tonic-gate realresid -= uio->uio_resid; 1003*0Sstevel@tonic-gate err = wrip(ip, uio, ioflag, cr); 1004*0Sstevel@tonic-gate 1005*0Sstevel@tonic-gate /* 1006*0Sstevel@tonic-gate * Error or request is done; caller issues final EOT 1007*0Sstevel@tonic-gate */ 1008*0Sstevel@tonic-gate if (err || uio->uio_resid || (realresid == 0)) { 1009*0Sstevel@tonic-gate uio->uio_resid += realresid; 1010*0Sstevel@tonic-gate return (err); 1011*0Sstevel@tonic-gate } 1012*0Sstevel@tonic-gate 1013*0Sstevel@tonic-gate /* 1014*0Sstevel@tonic-gate * Generate EOT for this part of the request 1015*0Sstevel@tonic-gate */ 1016*0Sstevel@tonic-gate rw_exit(&ip->i_contents); 1017*0Sstevel@tonic-gate rw_exit(&ufsvfsp->vfs_dqrwlock); 1018*0Sstevel@tonic-gate if (ioflag & (FSYNC|FDSYNC)) { 1019*0Sstevel@tonic-gate TRANS_END_SYNC(ufsvfsp, err, TOP_WRITE_SYNC, resv); 1020*0Sstevel@tonic-gate } else { 1021*0Sstevel@tonic-gate TRANS_END_ASYNC(ufsvfsp, TOP_WRITE, resv); 1022*0Sstevel@tonic-gate } 1023*0Sstevel@tonic-gate 1024*0Sstevel@tonic-gate /* 1025*0Sstevel@tonic-gate * Make sure the input buffer is resident before starting 1026*0Sstevel@tonic-gate * the next transaction. 1027*0Sstevel@tonic-gate */ 1028*0Sstevel@tonic-gate ufs_trans_touch(MIN(resid, realresid), uio); 1029*0Sstevel@tonic-gate 1030*0Sstevel@tonic-gate /* 1031*0Sstevel@tonic-gate * Generate BOT for next part of the request 1032*0Sstevel@tonic-gate */ 1033*0Sstevel@tonic-gate if (ioflag & (FSYNC|FDSYNC)) { 1034*0Sstevel@tonic-gate int error; 1035*0Sstevel@tonic-gate TRANS_BEGIN_SYNC(ufsvfsp, TOP_WRITE_SYNC, resv, error); 1036*0Sstevel@tonic-gate ASSERT(!error); 1037*0Sstevel@tonic-gate } else { 1038*0Sstevel@tonic-gate TRANS_BEGIN_ASYNC(ufsvfsp, TOP_WRITE, resv); 1039*0Sstevel@tonic-gate } 1040*0Sstevel@tonic-gate rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER); 1041*0Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_WRITER); 1042*0Sstevel@tonic-gate /* 1043*0Sstevel@tonic-gate * Error during EOT (probably device error while writing commit rec) 1044*0Sstevel@tonic-gate */ 1045*0Sstevel@tonic-gate if (err) 1046*0Sstevel@tonic-gate return (err); 1047*0Sstevel@tonic-gate goto again; 1048*0Sstevel@tonic-gate } 1049