1 /* $NetBSD: lfs_inode.c,v 1.160 2020/04/23 21:47:09 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Konrad E. Schroder <perseant@hhhh.org>. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 /* 32 * Copyright (c) 1986, 1989, 1991, 1993 33 * The Regents of the University of California. All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. Neither the name of the University nor the names of its contributors 44 * may be used to endorse or promote products derived from this software 45 * without specific prior written permission. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 * SUCH DAMAGE. 58 * 59 * @(#)lfs_inode.c 8.9 (Berkeley) 5/8/95 60 */ 61 62 #include <sys/cdefs.h> 63 __KERNEL_RCSID(0, "$NetBSD: lfs_inode.c,v 1.160 2020/04/23 21:47:09 ad Exp $"); 64 65 #if defined(_KERNEL_OPT) 66 #include "opt_quota.h" 67 #endif 68 69 #include <sys/param.h> 70 #include <sys/systm.h> 71 #include <sys/mount.h> 72 #include <sys/malloc.h> 73 #include <sys/proc.h> 74 #include <sys/file.h> 75 #include <sys/buf.h> 76 #include <sys/vnode.h> 77 #include <sys/kernel.h> 78 #include <sys/trace.h> 79 #include <sys/resourcevar.h> 80 #include <sys/kauth.h> 81 82 #include <ufs/lfs/ulfs_quotacommon.h> 83 #include <ufs/lfs/ulfs_inode.h> 84 #include <ufs/lfs/ulfsmount.h> 85 #include <ufs/lfs/ulfs_extern.h> 86 87 #include <ufs/lfs/lfs.h> 88 #include <ufs/lfs/lfs_accessors.h> 89 #include <ufs/lfs/lfs_extern.h> 90 #include <ufs/lfs/lfs_kernel.h> 91 92 static int lfs_update_seguse(struct lfs *, struct inode *ip, long, size_t); 93 static int lfs_indirtrunc(struct inode *, daddr_t, daddr_t, 94 daddr_t, int, daddr_t *, daddr_t *, 95 long *, size_t *); 96 static int lfs_blkfree (struct lfs *, struct inode *, daddr_t, size_t, long *, size_t *); 97 static int lfs_vtruncbuf(struct vnode *, daddr_t, bool, int); 98 99 /* Search a block for a specific dinode. */ 100 union lfs_dinode * 101 lfs_ifind(struct lfs *fs, ino_t ino, struct buf *bp) 102 { 103 union lfs_dinode *ldip; 104 unsigned num, i; 105 106 ASSERT_NO_SEGLOCK(fs); 107 /* 108 * Read the inode block backwards, since later versions of the 109 * inode will supercede earlier ones. Though it is unlikely, it is 110 * possible that the same inode will appear in the same inode block. 111 */ 112 num = LFS_INOPB(fs); 113 for (i = num; i-- > 0; ) { 114 ldip = DINO_IN_BLOCK(fs, bp->b_data, i); 115 if (lfs_dino_getinumber(fs, ldip) == ino) 116 return (ldip); 117 } 118 119 printf("searched %u entries for %ju\n", num, (uintmax_t)ino); 120 printf("offset is 0x%jx (seg %d)\n", (uintmax_t)lfs_sb_getoffset(fs), 121 lfs_dtosn(fs, lfs_sb_getoffset(fs))); 122 printf("block is 0x%jx (seg %d)\n", 123 (uintmax_t)LFS_DBTOFSB(fs, bp->b_blkno), 124 lfs_dtosn(fs, LFS_DBTOFSB(fs, bp->b_blkno))); 125 126 return NULL; 127 } 128 129 int 130 lfs_update(struct vnode *vp, const struct timespec *acc, 131 const struct timespec *mod, int updflags) 132 { 133 struct inode *ip; 134 struct lfs *fs = VFSTOULFS(vp->v_mount)->um_lfs; 135 int flags; 136 int error; 137 138 ASSERT_NO_SEGLOCK(fs); 139 if (vp->v_mount->mnt_flag & MNT_RDONLY) 140 return (0); 141 ip = VTOI(vp); 142 143 /* 144 * If we are called from vinvalbuf, and the file's blocks have 145 * already been scheduled for writing, but the writes have not 146 * yet completed, lfs_vflush will not be called, and vinvalbuf 147 * will cause a panic. So, we must wait until any pending write 148 * for our inode completes, if we are called with UPDATE_WAIT set. 149 */ 150 mutex_enter(vp->v_interlock); 151 while ((updflags & (UPDATE_WAIT|UPDATE_DIROP)) == UPDATE_WAIT && 152 WRITEINPROG(vp)) { 153 DLOG((DLOG_SEG, "lfs_update: sleeping on ino %llu" 154 " (in progress)\n", (unsigned long long) ip->i_number)); 155 cv_wait(&vp->v_cv, vp->v_interlock); 156 } 157 mutex_exit(vp->v_interlock); 158 LFS_ITIMES(ip, acc, mod, NULL); 159 if (updflags & UPDATE_CLOSE) 160 flags = ip->i_state & (IN_MODIFIED | IN_ACCESSED | IN_CLEANING); 161 else 162 flags = ip->i_state & (IN_MODIFIED | IN_CLEANING); 163 if (flags == 0) 164 return (0); 165 166 /* If sync, push back the vnode and any dirty blocks it may have. */ 167 if ((updflags & (UPDATE_WAIT|UPDATE_DIROP)) == UPDATE_WAIT) { 168 /* Avoid flushing VU_DIROP. */ 169 mutex_enter(&lfs_lock); 170 ++fs->lfs_diropwait; 171 while (vp->v_uflag & VU_DIROP) { 172 DLOG((DLOG_DIROP, "lfs_update: sleeping on inode %llu " 173 "(dirops)\n", (unsigned long long) ip->i_number)); 174 DLOG((DLOG_DIROP, "lfs_update: vflags 0x%x, i_state" 175 " 0x%x\n", 176 vp->v_iflag | vp->v_vflag | vp->v_uflag, 177 ip->i_state)); 178 if (fs->lfs_dirops == 0) 179 break; 180 else 181 mtsleep(&fs->lfs_writer, PRIBIO+1, "lfs_fsync", 182 0, &lfs_lock); 183 /* XXX KS - by falling out here, are we writing the vn 184 twice? */ 185 } 186 --fs->lfs_diropwait; 187 fs->lfs_writer++; 188 if (vp->v_uflag & VU_DIROP) { 189 KASSERT(fs->lfs_dirops == 0); 190 lfs_flush_fs(fs, SEGM_SYNC); 191 } 192 mutex_exit(&lfs_lock); 193 error = lfs_vflush(vp); 194 mutex_enter(&lfs_lock); 195 if (--fs->lfs_writer == 0) 196 cv_broadcast(&fs->lfs_diropscv); 197 mutex_exit(&lfs_lock); 198 return error; 199 } 200 return 0; 201 } 202 203 #define SINGLE 0 /* index of single indirect block */ 204 #define DOUBLE 1 /* index of double indirect block */ 205 #define TRIPLE 2 /* index of triple indirect block */ 206 /* 207 * Truncate the inode oip to at most length size, freeing the 208 * disk blocks. 209 */ 210 /* VOP_BWRITE 1 + ULFS_NIADDR + lfs_balloc == 2 + 2*ULFS_NIADDR times */ 211 212 int 213 lfs_truncate(struct vnode *ovp, off_t length, int ioflag, kauth_cred_t cred) 214 { 215 daddr_t lastblock; 216 struct inode *oip = VTOI(ovp); 217 daddr_t bn, lbn, lastiblock[ULFS_NIADDR], indir_lbn[ULFS_NIADDR]; 218 /* note: newblks is set but only actually used if DIAGNOSTIC */ 219 daddr_t newblks[ULFS_NDADDR + ULFS_NIADDR] __diagused; 220 struct lfs *fs; 221 struct buf *bp; 222 int offset, size, level; 223 daddr_t count, rcount; 224 daddr_t blocksreleased = 0, real_released = 0; 225 int i, nblocks; 226 int aflags, error, allerror = 0; 227 off_t osize; 228 long lastseg; 229 size_t bc; 230 int obufsize, odb; 231 int usepc; 232 233 if (ovp->v_type == VCHR || ovp->v_type == VBLK || 234 ovp->v_type == VFIFO || ovp->v_type == VSOCK) { 235 KASSERT(oip->i_size == 0); 236 return 0; 237 } 238 239 if (length < 0) 240 return (EINVAL); 241 242 fs = oip->i_lfs; 243 244 if (ovp->v_type == VLNK && 245 (oip->i_size < fs->um_maxsymlinklen || 246 (fs->um_maxsymlinklen == 0 && 247 lfs_dino_getblocks(fs, oip->i_din) == 0))) { 248 KASSERTMSG((length == 0), 249 "partial truncate of symlink: %jd", (intmax_t)length); 250 memset((char *)SHORTLINK(oip), 0, (u_int)oip->i_size); 251 oip->i_size = 0; 252 lfs_dino_setsize(fs, oip->i_din, 0); 253 oip->i_state |= IN_CHANGE | IN_UPDATE; 254 return (lfs_update(ovp, NULL, NULL, 0)); 255 } 256 if (oip->i_size == length) { 257 /* still do a uvm_vnp_setsize() as writesize may be larger */ 258 uvm_vnp_setsize(ovp, length); 259 oip->i_state |= IN_CHANGE | IN_UPDATE; 260 return (lfs_update(ovp, NULL, NULL, 0)); 261 } 262 lfs_imtime(fs); 263 osize = oip->i_size; 264 usepc = (ovp->v_type == VREG && ovp != fs->lfs_ivnode); 265 266 ASSERT_NO_SEGLOCK(fs); 267 /* 268 * Lengthen the size of the file. We must ensure that the 269 * last byte of the file is allocated. Since the smallest 270 * value of osize is 0, length will be at least 1. 271 */ 272 if (osize < length) { 273 if (length > fs->um_maxfilesize) 274 return (EFBIG); 275 aflags = B_CLRBUF; 276 if (ioflag & IO_SYNC) 277 aflags |= B_SYNC; 278 if (usepc) { 279 if (lfs_lblkno(fs, osize) < ULFS_NDADDR && 280 lfs_lblkno(fs, osize) != lfs_lblkno(fs, length) && 281 lfs_blkroundup(fs, osize) != osize) { 282 off_t eob; 283 284 eob = lfs_blkroundup(fs, osize); 285 uvm_vnp_setwritesize(ovp, eob); 286 error = ulfs_balloc_range(ovp, osize, 287 eob - osize, cred, aflags); 288 if (error) { 289 (void) lfs_truncate(ovp, osize, 290 ioflag & IO_SYNC, cred); 291 return error; 292 } 293 if (ioflag & IO_SYNC) { 294 rw_enter(ovp->v_uobj.vmobjlock, RW_WRITER); 295 VOP_PUTPAGES(ovp, 296 trunc_page(osize & lfs_sb_getbmask(fs)), 297 round_page(eob), 298 PGO_CLEANIT | PGO_SYNCIO); 299 } 300 } 301 uvm_vnp_setwritesize(ovp, length); 302 error = ulfs_balloc_range(ovp, length - 1, 1, cred, 303 aflags); 304 if (error) { 305 (void) lfs_truncate(ovp, osize, 306 ioflag & IO_SYNC, cred); 307 return error; 308 } 309 uvm_vnp_setsize(ovp, length); 310 oip->i_state |= IN_CHANGE | IN_UPDATE; 311 KASSERT(ovp->v_size == oip->i_size); 312 oip->i_lfs_hiblk = lfs_lblkno(fs, oip->i_size + lfs_sb_getbsize(fs) - 1) - 1; 313 return (lfs_update(ovp, NULL, NULL, 0)); 314 } else { 315 error = lfs_reserve(fs, ovp, NULL, 316 lfs_btofsb(fs, (ULFS_NIADDR + 2) << lfs_sb_getbshift(fs))); 317 if (error) 318 return (error); 319 error = lfs_balloc(ovp, length - 1, 1, cred, 320 aflags, &bp); 321 lfs_reserve(fs, ovp, NULL, 322 -lfs_btofsb(fs, (ULFS_NIADDR + 2) << lfs_sb_getbshift(fs))); 323 if (error) 324 return (error); 325 oip->i_size = length; 326 lfs_dino_setsize(fs, oip->i_din, oip->i_size); 327 uvm_vnp_setsize(ovp, length); 328 (void) VOP_BWRITE(bp->b_vp, bp); 329 oip->i_state |= IN_CHANGE | IN_UPDATE; 330 oip->i_lfs_hiblk = lfs_lblkno(fs, oip->i_size + lfs_sb_getbsize(fs) - 1) - 1; 331 return (lfs_update(ovp, NULL, NULL, 0)); 332 } 333 } 334 335 if ((error = lfs_reserve(fs, ovp, NULL, 336 lfs_btofsb(fs, (2 * ULFS_NIADDR + 3) << lfs_sb_getbshift(fs)))) != 0) 337 return (error); 338 339 /* 340 * Shorten the size of the file. If the file is not being 341 * truncated to a block boundary, the contents of the 342 * partial block following the end of the file must be 343 * zero'ed in case it ever becomes accessible again because 344 * of subsequent file growth. Directories however are not 345 * zero'ed as they should grow back initialized to empty. 346 */ 347 offset = lfs_blkoff(fs, length); 348 lastseg = -1; 349 bc = 0; 350 351 if (ovp != fs->lfs_ivnode) 352 lfs_seglock(fs, SEGM_PROT); 353 if (offset == 0) { 354 oip->i_size = length; 355 lfs_dino_setsize(fs, oip->i_din, oip->i_size); 356 } else if (!usepc) { 357 lbn = lfs_lblkno(fs, length); 358 aflags = B_CLRBUF; 359 if (ioflag & IO_SYNC) 360 aflags |= B_SYNC; 361 error = lfs_balloc(ovp, length - 1, 1, cred, aflags, &bp); 362 if (error) { 363 lfs_reserve(fs, ovp, NULL, 364 -lfs_btofsb(fs, (2 * ULFS_NIADDR + 3) << lfs_sb_getbshift(fs))); 365 goto errout; 366 } 367 obufsize = bp->b_bufsize; 368 odb = lfs_btofsb(fs, bp->b_bcount); 369 oip->i_size = length; 370 lfs_dino_setsize(fs, oip->i_din, oip->i_size); 371 size = lfs_blksize(fs, oip, lbn); 372 if (ovp->v_type != VDIR) 373 memset((char *)bp->b_data + offset, 0, 374 (u_int)(size - offset)); 375 allocbuf(bp, size, 1); 376 if ((bp->b_flags & B_LOCKED) != 0 && bp->b_iodone == NULL) { 377 mutex_enter(&lfs_lock); 378 locked_queue_bytes -= obufsize - bp->b_bufsize; 379 mutex_exit(&lfs_lock); 380 } 381 if (bp->b_oflags & BO_DELWRI) { 382 lfs_sb_addavail(fs, odb - lfs_btofsb(fs, size)); 383 /* XXX shouldn't this wake up on lfs_availsleep? */ 384 } 385 (void) VOP_BWRITE(bp->b_vp, bp); 386 } else { /* vp->v_type == VREG && length < osize && offset != 0 */ 387 /* 388 * When truncating a regular file down to a non-block-aligned 389 * size, we must zero the part of last block which is past 390 * the new EOF. We must synchronously flush the zeroed pages 391 * to disk since the new pages will be invalidated as soon 392 * as we inform the VM system of the new, smaller size. 393 * We must do this before acquiring the GLOCK, since fetching 394 * the pages will acquire the GLOCK internally. 395 * So there is a window where another thread could see a whole 396 * zeroed page past EOF, but that's life. 397 */ 398 daddr_t xlbn; 399 voff_t eoz; 400 401 aflags = ioflag & IO_SYNC ? B_SYNC : 0; 402 error = ulfs_balloc_range(ovp, length - 1, 1, cred, aflags); 403 if (error) { 404 lfs_reserve(fs, ovp, NULL, 405 -lfs_btofsb(fs, (2 * ULFS_NIADDR + 3) << lfs_sb_getbshift(fs))); 406 goto errout; 407 } 408 xlbn = lfs_lblkno(fs, length); 409 size = lfs_blksize(fs, oip, xlbn); 410 eoz = MIN(lfs_lblktosize(fs, xlbn) + size, osize); 411 ubc_zerorange(&ovp->v_uobj, length, eoz - length, 412 UBC_VNODE_FLAGS(ovp)); 413 if (round_page(eoz) > round_page(length)) { 414 rw_enter(ovp->v_uobj.vmobjlock, RW_WRITER); 415 error = VOP_PUTPAGES(ovp, round_page(length), 416 round_page(eoz), 417 PGO_CLEANIT | PGO_DEACTIVATE | 418 ((ioflag & IO_SYNC) ? PGO_SYNCIO : 0)); 419 if (error) { 420 lfs_reserve(fs, ovp, NULL, 421 -lfs_btofsb(fs, (2 * ULFS_NIADDR + 3) << lfs_sb_getbshift(fs))); 422 goto errout; 423 } 424 } 425 } 426 427 genfs_node_wrlock(ovp); 428 429 oip->i_size = length; 430 lfs_dino_setsize(fs, oip->i_din, oip->i_size); 431 uvm_vnp_setsize(ovp, length); 432 433 /* 434 * Calculate index into inode's block list of 435 * last direct and indirect blocks (if any) 436 * which we want to keep. Lastblock is -1 when 437 * the file is truncated to 0. 438 */ 439 /* Avoid sign overflow - XXX assumes that off_t is a quad_t. */ 440 if (length > QUAD_MAX - lfs_sb_getbsize(fs)) 441 lastblock = lfs_lblkno(fs, QUAD_MAX - lfs_sb_getbsize(fs)); 442 else 443 lastblock = lfs_lblkno(fs, length + lfs_sb_getbsize(fs) - 1) - 1; 444 lastiblock[SINGLE] = lastblock - ULFS_NDADDR; 445 lastiblock[DOUBLE] = lastiblock[SINGLE] - LFS_NINDIR(fs); 446 lastiblock[TRIPLE] = lastiblock[DOUBLE] - LFS_NINDIR(fs) * LFS_NINDIR(fs); 447 nblocks = lfs_btofsb(fs, lfs_sb_getbsize(fs)); 448 /* 449 * Record changed file and block pointers before we start 450 * freeing blocks. lastiblock values are also normalized to -1 451 * for calls to lfs_indirtrunc below. 452 */ 453 for (i=0; i<ULFS_NDADDR; i++) { 454 newblks[i] = lfs_dino_getdb(fs, oip->i_din, i); 455 } 456 for (i=0; i<ULFS_NIADDR; i++) { 457 newblks[ULFS_NDADDR + i] = lfs_dino_getib(fs, oip->i_din, i); 458 } 459 for (level = TRIPLE; level >= SINGLE; level--) 460 if (lastiblock[level] < 0) { 461 newblks[ULFS_NDADDR+level] = 0; 462 lastiblock[level] = -1; 463 } 464 for (i = ULFS_NDADDR - 1; i > lastblock; i--) 465 newblks[i] = 0; 466 467 oip->i_size = osize; 468 lfs_dino_setsize(fs, oip->i_din, oip->i_size); 469 error = lfs_vtruncbuf(ovp, lastblock + 1, false, 0); 470 if (error && !allerror) 471 allerror = error; 472 473 /* 474 * Indirect blocks first. 475 */ 476 indir_lbn[SINGLE] = -ULFS_NDADDR; 477 indir_lbn[DOUBLE] = indir_lbn[SINGLE] - LFS_NINDIR(fs) - 1; 478 indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - LFS_NINDIR(fs) * LFS_NINDIR(fs) - 1; 479 for (level = TRIPLE; level >= SINGLE; level--) { 480 bn = lfs_dino_getib(fs, oip->i_din, level); 481 if (bn != 0) { 482 error = lfs_indirtrunc(oip, indir_lbn[level], 483 bn, lastiblock[level], 484 level, &count, &rcount, 485 &lastseg, &bc); 486 if (error) 487 allerror = error; 488 real_released += rcount; 489 blocksreleased += count; 490 if (lastiblock[level] < 0) { 491 if (lfs_dino_getib(fs, oip->i_din, level) > 0) 492 real_released += nblocks; 493 blocksreleased += nblocks; 494 lfs_dino_setib(fs, oip->i_din, level, 0); 495 lfs_blkfree(fs, oip, bn, lfs_sb_getbsize(fs), 496 &lastseg, &bc); 497 lfs_deregister_block(ovp, bn); 498 } 499 } 500 if (lastiblock[level] >= 0) 501 goto done; 502 } 503 504 /* 505 * All whole direct blocks or frags. 506 */ 507 for (i = ULFS_NDADDR - 1; i > lastblock; i--) { 508 long bsize, obsize; 509 510 bn = lfs_dino_getdb(fs, oip->i_din, i); 511 if (bn == 0) 512 continue; 513 bsize = lfs_blksize(fs, oip, i); 514 if (lfs_dino_getdb(fs, oip->i_din, i) > 0) { 515 /* Check for fragment size changes */ 516 obsize = oip->i_lfs_fragsize[i]; 517 real_released += lfs_btofsb(fs, obsize); 518 oip->i_lfs_fragsize[i] = 0; 519 } else 520 obsize = 0; 521 blocksreleased += lfs_btofsb(fs, bsize); 522 lfs_dino_setdb(fs, oip->i_din, i, 0); 523 lfs_blkfree(fs, oip, bn, obsize, &lastseg, &bc); 524 lfs_deregister_block(ovp, bn); 525 } 526 if (lastblock < 0) 527 goto done; 528 529 /* 530 * Finally, look for a change in size of the 531 * last direct block; release any frags. 532 */ 533 bn = lfs_dino_getdb(fs, oip->i_din, lastblock); 534 if (bn != 0) { 535 long oldspace, newspace; 536 #if 0 537 long olddspace; 538 #endif 539 540 /* 541 * Calculate amount of space we're giving 542 * back as old block size minus new block size. 543 */ 544 oldspace = lfs_blksize(fs, oip, lastblock); 545 #if 0 546 olddspace = oip->i_lfs_fragsize[lastblock]; 547 #endif 548 549 oip->i_size = length; 550 lfs_dino_setsize(fs, oip->i_din, oip->i_size); 551 newspace = lfs_blksize(fs, oip, lastblock); 552 if (newspace == 0) 553 panic("itrunc: newspace"); 554 if (oldspace - newspace > 0) { 555 blocksreleased += lfs_btofsb(fs, oldspace - newspace); 556 } 557 #if 0 558 if (bn > 0 && olddspace - newspace > 0) { 559 /* No segment accounting here, just vnode */ 560 real_released += lfs_btofsb(fs, olddspace - newspace); 561 } 562 #endif 563 } 564 565 done: 566 /* Finish segment accounting corrections */ 567 lfs_update_seguse(fs, oip, lastseg, bc); 568 for (level = SINGLE; level <= TRIPLE; level++) 569 KASSERTMSG(((newblks[ULFS_NDADDR + level] == 0) == 570 (lfs_dino_getib(fs, oip->i_din, level) == 0)), 571 "lfs itrunc1"); 572 for (i = 0; i < ULFS_NDADDR; i++) 573 KASSERTMSG(((newblks[i] == 0) == 574 (lfs_dino_getdb(fs, oip->i_din, i) == 0)), 575 "lfs itrunc2"); 576 KASSERTMSG((length != 0 || LIST_EMPTY(&ovp->v_cleanblkhd)), 577 "lfs itrunc3a"); 578 KASSERTMSG((length != 0 || LIST_EMPTY(&ovp->v_dirtyblkhd)), 579 "lfs itrunc3b"); 580 581 /* 582 * Put back the real size. 583 */ 584 oip->i_size = length; 585 lfs_dino_setsize(fs, oip->i_din, oip->i_size); 586 oip->i_lfs_effnblks -= blocksreleased; 587 588 mutex_enter(&lfs_lock); 589 lfs_dino_setblocks(fs, oip->i_din, 590 lfs_dino_getblocks(fs, oip->i_din) - real_released); 591 lfs_sb_addbfree(fs, blocksreleased); 592 593 KASSERTMSG((oip->i_size != 0 || 594 lfs_dino_getblocks(fs, oip->i_din) == 0), 595 "ino %llu truncate to 0 but %jd blks/%jd effblks", 596 (unsigned long long) oip->i_number, 597 lfs_dino_getblocks(fs, oip->i_din), oip->i_lfs_effnblks); 598 KASSERTMSG((oip->i_size != 0 || oip->i_lfs_effnblks == 0), 599 "ino %llu truncate to 0 but %jd blks/%jd effblks", 600 (unsigned long long) oip->i_number, 601 lfs_dino_getblocks(fs, oip->i_din), oip->i_lfs_effnblks); 602 603 /* 604 * If we truncated to zero, take us off the paging queue. 605 */ 606 if (oip->i_size == 0 && oip->i_state & IN_PAGING) { 607 oip->i_state &= ~IN_PAGING; 608 TAILQ_REMOVE(&fs->lfs_pchainhd, oip, i_lfs_pchain); 609 } 610 mutex_exit(&lfs_lock); 611 612 oip->i_state |= IN_CHANGE; 613 #if defined(LFS_QUOTA) || defined(LFS_QUOTA2) 614 (void) lfs_chkdq(oip, -blocksreleased, NOCRED, 0); 615 #endif 616 lfs_reserve(fs, ovp, NULL, 617 -lfs_btofsb(fs, (2 * ULFS_NIADDR + 3) << lfs_sb_getbshift(fs))); 618 genfs_node_unlock(ovp); 619 errout: 620 oip->i_lfs_hiblk = lfs_lblkno(fs, oip->i_size + lfs_sb_getbsize(fs) - 1) - 1; 621 if (ovp != fs->lfs_ivnode) 622 lfs_segunlock(fs); 623 return (allerror ? allerror : error); 624 } 625 626 /* Update segment and avail usage information when removing a block. */ 627 static int 628 lfs_blkfree(struct lfs *fs, struct inode *ip, daddr_t daddr, 629 size_t bsize, long *lastseg, size_t *num) 630 { 631 long seg; 632 int error = 0; 633 634 ASSERT_SEGLOCK(fs); 635 bsize = lfs_fragroundup(fs, bsize); 636 if (daddr > 0) { 637 if (*lastseg != (seg = lfs_dtosn(fs, daddr))) { 638 error = lfs_update_seguse(fs, ip, *lastseg, *num); 639 *num = bsize; 640 *lastseg = seg; 641 } else 642 *num += bsize; 643 } 644 645 return error; 646 } 647 648 /* Finish the accounting updates for a segment. */ 649 static int 650 lfs_update_seguse(struct lfs *fs, struct inode *ip, long lastseg, size_t num) 651 { 652 struct segdelta *sd; 653 654 ASSERT_SEGLOCK(fs); 655 if (lastseg < 0 || num == 0) 656 return 0; 657 658 LIST_FOREACH(sd, &ip->i_lfs_segdhd, list) 659 if (sd->segnum == lastseg) 660 break; 661 if (sd == NULL) { 662 sd = malloc(sizeof(*sd), M_SEGMENT, M_WAITOK); 663 sd->segnum = lastseg; 664 sd->num = 0; 665 LIST_INSERT_HEAD(&ip->i_lfs_segdhd, sd, list); 666 } 667 sd->num += num; 668 669 return 0; 670 } 671 672 static void 673 lfs_finalize_seguse(struct lfs *fs, void *v) 674 { 675 SEGUSE *sup; 676 struct buf *bp; 677 struct segdelta *sd; 678 LIST_HEAD(, segdelta) *hd = v; 679 680 ASSERT_SEGLOCK(fs); 681 while((sd = LIST_FIRST(hd)) != NULL) { 682 LIST_REMOVE(sd, list); 683 LFS_SEGENTRY(sup, fs, sd->segnum, bp); 684 if (sd->num > sup->su_nbytes) { 685 printf("lfs_finalize_seguse: segment %ld short by %ld\n", 686 sd->segnum, (long)(sd->num - sup->su_nbytes)); 687 panic("lfs_finalize_seguse: negative bytes"); 688 sup->su_nbytes = sd->num; 689 } 690 sup->su_nbytes -= sd->num; 691 LFS_WRITESEGENTRY(sup, fs, sd->segnum, bp); 692 free(sd, M_SEGMENT); 693 } 694 } 695 696 /* Finish the accounting updates for a segment. */ 697 void 698 lfs_finalize_ino_seguse(struct lfs *fs, struct inode *ip) 699 { 700 ASSERT_SEGLOCK(fs); 701 lfs_finalize_seguse(fs, &ip->i_lfs_segdhd); 702 } 703 704 /* Finish the accounting updates for a segment. */ 705 void 706 lfs_finalize_fs_seguse(struct lfs *fs) 707 { 708 ASSERT_SEGLOCK(fs); 709 lfs_finalize_seguse(fs, &fs->lfs_segdhd); 710 } 711 712 /* 713 * Release blocks associated with the inode ip and stored in the indirect 714 * block bn. Blocks are free'd in LIFO order up to (but not including) 715 * lastbn. If level is greater than SINGLE, the block is an indirect block 716 * and recursive calls to indirtrunc must be used to cleanse other indirect 717 * blocks. 718 * 719 * NB: triple indirect blocks are untested. 720 */ 721 static int 722 lfs_indirtrunc(struct inode *ip, daddr_t lbn, daddr_t dbn, 723 daddr_t lastbn, int level, daddr_t *countp, 724 daddr_t *rcountp, long *lastsegp, size_t *bcp) 725 { 726 int i; 727 struct buf *bp; 728 struct lfs *fs = ip->i_lfs; 729 void *bap; 730 bool bap_needs_free; 731 struct vnode *vp; 732 daddr_t nb, nlbn, last; 733 daddr_t blkcount, rblkcount, factor; 734 int nblocks; 735 daddr_t blocksreleased = 0, real_released = 0; 736 int error = 0, allerror = 0; 737 738 ASSERT_SEGLOCK(fs); 739 /* 740 * Calculate index in current block of last 741 * block to be kept. -1 indicates the entire 742 * block so we need not calculate the index. 743 */ 744 factor = 1; 745 for (i = SINGLE; i < level; i++) 746 factor *= LFS_NINDIR(fs); 747 last = lastbn; 748 if (lastbn > 0) 749 last /= factor; 750 nblocks = lfs_btofsb(fs, lfs_sb_getbsize(fs)); 751 /* 752 * Get buffer of block pointers, zero those entries corresponding 753 * to blocks to be free'd, and update on disk copy first. Since 754 * double(triple) indirect before single(double) indirect, calls 755 * to bmap on these blocks will fail. However, we already have 756 * the on disk address, so we have to set the b_blkno field 757 * explicitly instead of letting bread do everything for us. 758 */ 759 vp = ITOV(ip); 760 bp = getblk(vp, lbn, lfs_sb_getbsize(fs), 0, 0); 761 if (bp->b_oflags & (BO_DONE | BO_DELWRI)) { 762 /* Braces must be here in case trace evaluates to nothing. */ 763 trace(TR_BREADHIT, pack(vp, lfs_sb_getbsize(fs)), lbn); 764 } else { 765 trace(TR_BREADMISS, pack(vp, lfs_sb_getbsize(fs)), lbn); 766 curlwp->l_ru.ru_inblock++; /* pay for read */ 767 bp->b_flags |= B_READ; 768 if (bp->b_bcount > bp->b_bufsize) 769 panic("lfs_indirtrunc: bad buffer size"); 770 bp->b_blkno = LFS_FSBTODB(fs, dbn); 771 VOP_STRATEGY(vp, bp); 772 error = biowait(bp); 773 } 774 if (error) { 775 brelse(bp, 0); 776 *countp = *rcountp = 0; 777 return (error); 778 } 779 780 if (lastbn >= 0) { 781 /* 782 * We still need this block, so copy the data for 783 * subsequent processing; then in the original block, 784 * zero out the dying block pointers and send it off. 785 */ 786 bap = lfs_malloc(fs, lfs_sb_getbsize(fs), LFS_NB_IBLOCK); 787 memcpy(bap, bp->b_data, lfs_sb_getbsize(fs)); 788 bap_needs_free = true; 789 790 for (i = last + 1; i < LFS_NINDIR(fs); i++) { 791 lfs_iblock_set(fs, bp->b_data, i, 0); 792 } 793 error = VOP_BWRITE(bp->b_vp, bp); 794 if (error) 795 allerror = error; 796 } else { 797 bap = bp->b_data; 798 bap_needs_free = false; 799 } 800 801 /* 802 * Recursively free totally unused blocks. 803 */ 804 for (i = LFS_NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last; 805 i--, nlbn += factor) { 806 nb = lfs_iblock_get(fs, bap, i); 807 if (nb == 0) 808 continue; 809 if (level > SINGLE) { 810 error = lfs_indirtrunc(ip, nlbn, nb, 811 (daddr_t)-1, level - 1, 812 &blkcount, &rblkcount, 813 lastsegp, bcp); 814 if (error) 815 allerror = error; 816 blocksreleased += blkcount; 817 real_released += rblkcount; 818 } 819 lfs_blkfree(fs, ip, nb, lfs_sb_getbsize(fs), lastsegp, bcp); 820 if (lfs_iblock_get(fs, bap, i) > 0) 821 real_released += nblocks; 822 blocksreleased += nblocks; 823 } 824 825 /* 826 * Recursively free last partial block. 827 */ 828 if (level > SINGLE && lastbn >= 0) { 829 last = lastbn % factor; 830 nb = lfs_iblock_get(fs, bap, i); 831 if (nb != 0) { 832 error = lfs_indirtrunc(ip, nlbn, nb, 833 last, level - 1, &blkcount, 834 &rblkcount, lastsegp, bcp); 835 if (error) 836 allerror = error; 837 real_released += rblkcount; 838 blocksreleased += blkcount; 839 } 840 } 841 842 if (bap_needs_free) { 843 lfs_free(fs, bap, LFS_NB_IBLOCK); 844 } else { 845 mutex_enter(&bufcache_lock); 846 if (bp->b_oflags & BO_DELWRI) { 847 LFS_UNLOCK_BUF(bp); 848 lfs_sb_addavail(fs, lfs_btofsb(fs, bp->b_bcount)); 849 wakeup(&fs->lfs_availsleep); 850 } 851 brelsel(bp, BC_INVAL); 852 mutex_exit(&bufcache_lock); 853 } 854 855 *countp = blocksreleased; 856 *rcountp = real_released; 857 return (allerror); 858 } 859 860 /* 861 * Destroy any in core blocks past the truncation length. 862 * Inlined from vtruncbuf, so that lfs_avail could be updated. 863 * We take the seglock to prevent cleaning from occurring while we are 864 * invalidating blocks. 865 */ 866 static int 867 lfs_vtruncbuf(struct vnode *vp, daddr_t lbn, bool catch, int slptimeo) 868 { 869 struct buf *bp, *nbp; 870 int error = 0; 871 struct lfs *fs; 872 voff_t off; 873 874 off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift); 875 rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); 876 error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO); 877 if (error) 878 return error; 879 880 fs = VTOI(vp)->i_lfs; 881 882 ASSERT_SEGLOCK(fs); 883 884 mutex_enter(&bufcache_lock); 885 restart: 886 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 887 nbp = LIST_NEXT(bp, b_vnbufs); 888 if (bp->b_lblkno < lbn) 889 continue; 890 error = bbusy(bp, catch, slptimeo, NULL); 891 if (error == EPASSTHROUGH) 892 goto restart; 893 if (error) 894 goto exit; 895 896 mutex_enter(bp->b_objlock); 897 if (bp->b_oflags & BO_DELWRI) { 898 bp->b_oflags &= ~BO_DELWRI; 899 lfs_sb_addavail(fs, lfs_btofsb(fs, bp->b_bcount)); 900 wakeup(&fs->lfs_availsleep); 901 } 902 mutex_exit(bp->b_objlock); 903 LFS_UNLOCK_BUF(bp); 904 brelsel(bp, BC_INVAL | BC_VFLUSH); 905 } 906 907 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 908 nbp = LIST_NEXT(bp, b_vnbufs); 909 if (bp->b_lblkno < lbn) 910 continue; 911 error = bbusy(bp, catch, slptimeo, NULL); 912 if (error == EPASSTHROUGH) 913 goto restart; 914 if (error) 915 goto exit; 916 917 mutex_enter(bp->b_objlock); 918 if (bp->b_oflags & BO_DELWRI) { 919 bp->b_oflags &= ~BO_DELWRI; 920 lfs_sb_addavail(fs, lfs_btofsb(fs, bp->b_bcount)); 921 wakeup(&fs->lfs_availsleep); 922 } 923 mutex_exit(bp->b_objlock); 924 LFS_UNLOCK_BUF(bp); 925 brelsel(bp, BC_INVAL | BC_VFLUSH); 926 } 927 exit: 928 mutex_exit(&bufcache_lock); 929 930 return error; 931 } 932 933