1 /* $NetBSD: lfs_inode.c,v 1.147 2015/09/01 06:13:09 dholland Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Konrad E. Schroder <perseant@hhhh.org>. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 /* 32 * Copyright (c) 1986, 1989, 1991, 1993 33 * The Regents of the University of California. All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. Neither the name of the University nor the names of its contributors 44 * may be used to endorse or promote products derived from this software 45 * without specific prior written permission. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 * SUCH DAMAGE. 58 * 59 * @(#)lfs_inode.c 8.9 (Berkeley) 5/8/95 60 */ 61 62 #include <sys/cdefs.h> 63 __KERNEL_RCSID(0, "$NetBSD: lfs_inode.c,v 1.147 2015/09/01 06:13:09 dholland Exp $"); 64 65 #if defined(_KERNEL_OPT) 66 #include "opt_quota.h" 67 #endif 68 69 #include <sys/param.h> 70 #include <sys/systm.h> 71 #include <sys/mount.h> 72 #include <sys/malloc.h> 73 #include <sys/proc.h> 74 #include <sys/file.h> 75 #include <sys/buf.h> 76 #include <sys/vnode.h> 77 #include <sys/kernel.h> 78 #include <sys/trace.h> 79 #include <sys/resourcevar.h> 80 #include <sys/kauth.h> 81 82 #include <ufs/lfs/ulfs_quotacommon.h> 83 #include <ufs/lfs/ulfs_inode.h> 84 #include <ufs/lfs/ulfsmount.h> 85 #include <ufs/lfs/ulfs_extern.h> 86 87 #include <ufs/lfs/lfs.h> 88 #include <ufs/lfs/lfs_accessors.h> 89 #include <ufs/lfs/lfs_extern.h> 90 #include <ufs/lfs/lfs_kernel.h> 91 92 static int lfs_update_seguse(struct lfs *, struct inode *ip, long, size_t); 93 static int lfs_indirtrunc(struct inode *, daddr_t, daddr_t, 94 daddr_t, int, daddr_t *, daddr_t *, 95 long *, size_t *); 96 static int lfs_blkfree (struct lfs *, struct inode *, daddr_t, size_t, long *, size_t *); 97 static int lfs_vtruncbuf(struct vnode *, daddr_t, bool, int); 98 99 /* Search a block for a specific dinode. */ 100 union lfs_dinode * 101 lfs_ifind(struct lfs *fs, ino_t ino, struct buf *bp) 102 { 103 union lfs_dinode *ldip; 104 unsigned num, i; 105 106 ASSERT_NO_SEGLOCK(fs); 107 /* 108 * Read the inode block backwards, since later versions of the 109 * inode will supercede earlier ones. Though it is unlikely, it is 110 * possible that the same inode will appear in the same inode block. 111 */ 112 num = LFS_INOPB(fs); 113 for (i = num; i-- > 0; ) { 114 ldip = DINO_IN_BLOCK(fs, bp->b_data, i); 115 if (lfs_dino_getinumber(fs, ldip) == ino) 116 return (ldip); 117 } 118 119 printf("searched %u entries for %ju\n", num, (uintmax_t)ino); 120 printf("offset is 0x%jx (seg %d)\n", (uintmax_t)lfs_sb_getoffset(fs), 121 lfs_dtosn(fs, lfs_sb_getoffset(fs))); 122 printf("block is 0x%jx (seg %d)\n", 123 (uintmax_t)LFS_DBTOFSB(fs, bp->b_blkno), 124 lfs_dtosn(fs, LFS_DBTOFSB(fs, bp->b_blkno))); 125 126 return NULL; 127 } 128 129 int 130 lfs_update(struct vnode *vp, const struct timespec *acc, 131 const struct timespec *mod, int updflags) 132 { 133 struct inode *ip; 134 struct lfs *fs = VFSTOULFS(vp->v_mount)->um_lfs; 135 int flags; 136 137 ASSERT_NO_SEGLOCK(fs); 138 if (vp->v_mount->mnt_flag & MNT_RDONLY) 139 return (0); 140 ip = VTOI(vp); 141 142 /* 143 * If we are called from vinvalbuf, and the file's blocks have 144 * already been scheduled for writing, but the writes have not 145 * yet completed, lfs_vflush will not be called, and vinvalbuf 146 * will cause a panic. So, we must wait until any pending write 147 * for our inode completes, if we are called with UPDATE_WAIT set. 148 */ 149 mutex_enter(vp->v_interlock); 150 while ((updflags & (UPDATE_WAIT|UPDATE_DIROP)) == UPDATE_WAIT && 151 WRITEINPROG(vp)) { 152 DLOG((DLOG_SEG, "lfs_update: sleeping on ino %d" 153 " (in progress)\n", ip->i_number)); 154 cv_wait(&vp->v_cv, vp->v_interlock); 155 } 156 mutex_exit(vp->v_interlock); 157 LFS_ITIMES(ip, acc, mod, NULL); 158 if (updflags & UPDATE_CLOSE) 159 flags = ip->i_flag & (IN_MODIFIED | IN_ACCESSED | IN_CLEANING); 160 else 161 flags = ip->i_flag & (IN_MODIFIED | IN_CLEANING); 162 if (flags == 0) 163 return (0); 164 165 /* If sync, push back the vnode and any dirty blocks it may have. */ 166 if ((updflags & (UPDATE_WAIT|UPDATE_DIROP)) == UPDATE_WAIT) { 167 /* Avoid flushing VU_DIROP. */ 168 mutex_enter(&lfs_lock); 169 ++fs->lfs_diropwait; 170 while (vp->v_uflag & VU_DIROP) { 171 DLOG((DLOG_DIROP, "lfs_update: sleeping on inode %d" 172 " (dirops)\n", ip->i_number)); 173 DLOG((DLOG_DIROP, "lfs_update: vflags 0x%x, iflags" 174 " 0x%x\n", 175 vp->v_iflag | vp->v_vflag | vp->v_uflag, 176 ip->i_flag)); 177 if (fs->lfs_dirops == 0) 178 lfs_flush_fs(fs, SEGM_SYNC); 179 else 180 mtsleep(&fs->lfs_writer, PRIBIO+1, "lfs_fsync", 181 0, &lfs_lock); 182 /* XXX KS - by falling out here, are we writing the vn 183 twice? */ 184 } 185 --fs->lfs_diropwait; 186 mutex_exit(&lfs_lock); 187 return lfs_vflush(vp); 188 } 189 return 0; 190 } 191 192 #define SINGLE 0 /* index of single indirect block */ 193 #define DOUBLE 1 /* index of double indirect block */ 194 #define TRIPLE 2 /* index of triple indirect block */ 195 /* 196 * Truncate the inode oip to at most length size, freeing the 197 * disk blocks. 198 */ 199 /* VOP_BWRITE 1 + ULFS_NIADDR + lfs_balloc == 2 + 2*ULFS_NIADDR times */ 200 201 int 202 lfs_truncate(struct vnode *ovp, off_t length, int ioflag, kauth_cred_t cred) 203 { 204 daddr_t lastblock; 205 struct inode *oip = VTOI(ovp); 206 daddr_t bn, lbn, lastiblock[ULFS_NIADDR], indir_lbn[ULFS_NIADDR]; 207 /* note: newblks is set but only actually used if DIAGNOSTIC */ 208 daddr_t newblks[ULFS_NDADDR + ULFS_NIADDR] __diagused; 209 struct lfs *fs; 210 struct buf *bp; 211 int offset, size, level; 212 daddr_t count, rcount; 213 daddr_t blocksreleased = 0, real_released = 0; 214 int i, nblocks; 215 int aflags, error, allerror = 0; 216 off_t osize; 217 long lastseg; 218 size_t bc; 219 int obufsize, odb; 220 int usepc; 221 222 if (ovp->v_type == VCHR || ovp->v_type == VBLK || 223 ovp->v_type == VFIFO || ovp->v_type == VSOCK) { 224 KASSERT(oip->i_size == 0); 225 return 0; 226 } 227 228 if (length < 0) 229 return (EINVAL); 230 231 /* 232 * Just return and not update modification times. 233 */ 234 if (oip->i_size == length) { 235 /* still do a uvm_vnp_setsize() as writesize may be larger */ 236 uvm_vnp_setsize(ovp, length); 237 return (0); 238 } 239 240 fs = oip->i_lfs; 241 242 if (ovp->v_type == VLNK && 243 (oip->i_size < fs->um_maxsymlinklen || 244 (fs->um_maxsymlinklen == 0 && 245 lfs_dino_getblocks(fs, oip->i_din) == 0))) { 246 #ifdef DIAGNOSTIC 247 if (length != 0) 248 panic("lfs_truncate: partial truncate of symlink"); 249 #endif 250 memset((char *)SHORTLINK(oip), 0, (u_int)oip->i_size); 251 oip->i_size = 0; 252 lfs_dino_setsize(fs, oip->i_din, 0); 253 oip->i_flag |= IN_CHANGE | IN_UPDATE; 254 return (lfs_update(ovp, NULL, NULL, 0)); 255 } 256 if (oip->i_size == length) { 257 oip->i_flag |= IN_CHANGE | IN_UPDATE; 258 return (lfs_update(ovp, NULL, NULL, 0)); 259 } 260 lfs_imtime(fs); 261 osize = oip->i_size; 262 usepc = (ovp->v_type == VREG && ovp != fs->lfs_ivnode); 263 264 ASSERT_NO_SEGLOCK(fs); 265 /* 266 * Lengthen the size of the file. We must ensure that the 267 * last byte of the file is allocated. Since the smallest 268 * value of osize is 0, length will be at least 1. 269 */ 270 if (osize < length) { 271 if (length > fs->um_maxfilesize) 272 return (EFBIG); 273 aflags = B_CLRBUF; 274 if (ioflag & IO_SYNC) 275 aflags |= B_SYNC; 276 if (usepc) { 277 if (lfs_lblkno(fs, osize) < ULFS_NDADDR && 278 lfs_lblkno(fs, osize) != lfs_lblkno(fs, length) && 279 lfs_blkroundup(fs, osize) != osize) { 280 off_t eob; 281 282 eob = lfs_blkroundup(fs, osize); 283 uvm_vnp_setwritesize(ovp, eob); 284 error = ulfs_balloc_range(ovp, osize, 285 eob - osize, cred, aflags); 286 if (error) { 287 (void) lfs_truncate(ovp, osize, 288 ioflag & IO_SYNC, cred); 289 return error; 290 } 291 if (ioflag & IO_SYNC) { 292 mutex_enter(ovp->v_interlock); 293 VOP_PUTPAGES(ovp, 294 trunc_page(osize & lfs_sb_getbmask(fs)), 295 round_page(eob), 296 PGO_CLEANIT | PGO_SYNCIO); 297 } 298 } 299 uvm_vnp_setwritesize(ovp, length); 300 error = ulfs_balloc_range(ovp, length - 1, 1, cred, 301 aflags); 302 if (error) { 303 (void) lfs_truncate(ovp, osize, 304 ioflag & IO_SYNC, cred); 305 return error; 306 } 307 uvm_vnp_setsize(ovp, length); 308 oip->i_flag |= IN_CHANGE | IN_UPDATE; 309 KASSERT(ovp->v_size == oip->i_size); 310 oip->i_lfs_hiblk = lfs_lblkno(fs, oip->i_size + lfs_sb_getbsize(fs) - 1) - 1; 311 return (lfs_update(ovp, NULL, NULL, 0)); 312 } else { 313 error = lfs_reserve(fs, ovp, NULL, 314 lfs_btofsb(fs, (ULFS_NIADDR + 2) << lfs_sb_getbshift(fs))); 315 if (error) 316 return (error); 317 error = lfs_balloc(ovp, length - 1, 1, cred, 318 aflags, &bp); 319 lfs_reserve(fs, ovp, NULL, 320 -lfs_btofsb(fs, (ULFS_NIADDR + 2) << lfs_sb_getbshift(fs))); 321 if (error) 322 return (error); 323 oip->i_size = length; 324 lfs_dino_setsize(fs, oip->i_din, oip->i_size); 325 uvm_vnp_setsize(ovp, length); 326 (void) VOP_BWRITE(bp->b_vp, bp); 327 oip->i_flag |= IN_CHANGE | IN_UPDATE; 328 oip->i_lfs_hiblk = lfs_lblkno(fs, oip->i_size + lfs_sb_getbsize(fs) - 1) - 1; 329 return (lfs_update(ovp, NULL, NULL, 0)); 330 } 331 } 332 333 if ((error = lfs_reserve(fs, ovp, NULL, 334 lfs_btofsb(fs, (2 * ULFS_NIADDR + 3) << lfs_sb_getbshift(fs)))) != 0) 335 return (error); 336 337 /* 338 * Shorten the size of the file. If the file is not being 339 * truncated to a block boundary, the contents of the 340 * partial block following the end of the file must be 341 * zero'ed in case it ever becomes accessible again because 342 * of subsequent file growth. Directories however are not 343 * zero'ed as they should grow back initialized to empty. 344 */ 345 offset = lfs_blkoff(fs, length); 346 lastseg = -1; 347 bc = 0; 348 349 if (ovp != fs->lfs_ivnode) 350 lfs_seglock(fs, SEGM_PROT); 351 if (offset == 0) { 352 oip->i_size = length; 353 lfs_dino_setsize(fs, oip->i_din, oip->i_size); 354 } else if (!usepc) { 355 lbn = lfs_lblkno(fs, length); 356 aflags = B_CLRBUF; 357 if (ioflag & IO_SYNC) 358 aflags |= B_SYNC; 359 error = lfs_balloc(ovp, length - 1, 1, cred, aflags, &bp); 360 if (error) { 361 lfs_reserve(fs, ovp, NULL, 362 -lfs_btofsb(fs, (2 * ULFS_NIADDR + 3) << lfs_sb_getbshift(fs))); 363 goto errout; 364 } 365 obufsize = bp->b_bufsize; 366 odb = lfs_btofsb(fs, bp->b_bcount); 367 oip->i_size = length; 368 lfs_dino_setsize(fs, oip->i_din, oip->i_size); 369 size = lfs_blksize(fs, oip, lbn); 370 if (ovp->v_type != VDIR) 371 memset((char *)bp->b_data + offset, 0, 372 (u_int)(size - offset)); 373 allocbuf(bp, size, 1); 374 if ((bp->b_flags & B_LOCKED) != 0 && bp->b_iodone == NULL) { 375 mutex_enter(&lfs_lock); 376 locked_queue_bytes -= obufsize - bp->b_bufsize; 377 mutex_exit(&lfs_lock); 378 } 379 if (bp->b_oflags & BO_DELWRI) { 380 lfs_sb_addavail(fs, odb - lfs_btofsb(fs, size)); 381 /* XXX shouldn't this wake up on lfs_availsleep? */ 382 } 383 (void) VOP_BWRITE(bp->b_vp, bp); 384 } else { /* vp->v_type == VREG && length < osize && offset != 0 */ 385 /* 386 * When truncating a regular file down to a non-block-aligned 387 * size, we must zero the part of last block which is past 388 * the new EOF. We must synchronously flush the zeroed pages 389 * to disk since the new pages will be invalidated as soon 390 * as we inform the VM system of the new, smaller size. 391 * We must do this before acquiring the GLOCK, since fetching 392 * the pages will acquire the GLOCK internally. 393 * So there is a window where another thread could see a whole 394 * zeroed page past EOF, but that's life. 395 */ 396 daddr_t xlbn; 397 voff_t eoz; 398 399 aflags = ioflag & IO_SYNC ? B_SYNC : 0; 400 error = ulfs_balloc_range(ovp, length - 1, 1, cred, aflags); 401 if (error) { 402 lfs_reserve(fs, ovp, NULL, 403 -lfs_btofsb(fs, (2 * ULFS_NIADDR + 3) << lfs_sb_getbshift(fs))); 404 goto errout; 405 } 406 xlbn = lfs_lblkno(fs, length); 407 size = lfs_blksize(fs, oip, xlbn); 408 eoz = MIN(lfs_lblktosize(fs, xlbn) + size, osize); 409 ubc_zerorange(&ovp->v_uobj, length, eoz - length, 410 UBC_UNMAP_FLAG(ovp)); 411 if (round_page(eoz) > round_page(length)) { 412 mutex_enter(ovp->v_interlock); 413 error = VOP_PUTPAGES(ovp, round_page(length), 414 round_page(eoz), 415 PGO_CLEANIT | PGO_DEACTIVATE | 416 ((ioflag & IO_SYNC) ? PGO_SYNCIO : 0)); 417 if (error) { 418 lfs_reserve(fs, ovp, NULL, 419 -lfs_btofsb(fs, (2 * ULFS_NIADDR + 3) << lfs_sb_getbshift(fs))); 420 goto errout; 421 } 422 } 423 } 424 425 genfs_node_wrlock(ovp); 426 427 oip->i_size = length; 428 lfs_dino_setsize(fs, oip->i_din, oip->i_size); 429 uvm_vnp_setsize(ovp, length); 430 431 /* 432 * Calculate index into inode's block list of 433 * last direct and indirect blocks (if any) 434 * which we want to keep. Lastblock is -1 when 435 * the file is truncated to 0. 436 */ 437 /* Avoid sign overflow - XXX assumes that off_t is a quad_t. */ 438 if (length > QUAD_MAX - lfs_sb_getbsize(fs)) 439 lastblock = lfs_lblkno(fs, QUAD_MAX - lfs_sb_getbsize(fs)); 440 else 441 lastblock = lfs_lblkno(fs, length + lfs_sb_getbsize(fs) - 1) - 1; 442 lastiblock[SINGLE] = lastblock - ULFS_NDADDR; 443 lastiblock[DOUBLE] = lastiblock[SINGLE] - LFS_NINDIR(fs); 444 lastiblock[TRIPLE] = lastiblock[DOUBLE] - LFS_NINDIR(fs) * LFS_NINDIR(fs); 445 nblocks = lfs_btofsb(fs, lfs_sb_getbsize(fs)); 446 /* 447 * Record changed file and block pointers before we start 448 * freeing blocks. lastiblock values are also normalized to -1 449 * for calls to lfs_indirtrunc below. 450 */ 451 for (i=0; i<ULFS_NDADDR; i++) { 452 newblks[i] = lfs_dino_getdb(fs, oip->i_din, i); 453 } 454 for (i=0; i<ULFS_NIADDR; i++) { 455 newblks[ULFS_NDADDR + i] = lfs_dino_getib(fs, oip->i_din, i); 456 } 457 for (level = TRIPLE; level >= SINGLE; level--) 458 if (lastiblock[level] < 0) { 459 newblks[ULFS_NDADDR+level] = 0; 460 lastiblock[level] = -1; 461 } 462 for (i = ULFS_NDADDR - 1; i > lastblock; i--) 463 newblks[i] = 0; 464 465 oip->i_size = osize; 466 lfs_dino_setsize(fs, oip->i_din, oip->i_size); 467 error = lfs_vtruncbuf(ovp, lastblock + 1, false, 0); 468 if (error && !allerror) 469 allerror = error; 470 471 /* 472 * Indirect blocks first. 473 */ 474 indir_lbn[SINGLE] = -ULFS_NDADDR; 475 indir_lbn[DOUBLE] = indir_lbn[SINGLE] - LFS_NINDIR(fs) - 1; 476 indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - LFS_NINDIR(fs) * LFS_NINDIR(fs) - 1; 477 for (level = TRIPLE; level >= SINGLE; level--) { 478 bn = lfs_dino_getib(fs, oip->i_din, level); 479 if (bn != 0) { 480 error = lfs_indirtrunc(oip, indir_lbn[level], 481 bn, lastiblock[level], 482 level, &count, &rcount, 483 &lastseg, &bc); 484 if (error) 485 allerror = error; 486 real_released += rcount; 487 blocksreleased += count; 488 if (lastiblock[level] < 0) { 489 if (lfs_dino_getib(fs, oip->i_din, level) > 0) 490 real_released += nblocks; 491 blocksreleased += nblocks; 492 lfs_dino_setib(fs, oip->i_din, level, 0); 493 lfs_blkfree(fs, oip, bn, lfs_sb_getbsize(fs), 494 &lastseg, &bc); 495 lfs_deregister_block(ovp, bn); 496 } 497 } 498 if (lastiblock[level] >= 0) 499 goto done; 500 } 501 502 /* 503 * All whole direct blocks or frags. 504 */ 505 for (i = ULFS_NDADDR - 1; i > lastblock; i--) { 506 long bsize, obsize; 507 508 bn = lfs_dino_getdb(fs, oip->i_din, i); 509 if (bn == 0) 510 continue; 511 bsize = lfs_blksize(fs, oip, i); 512 if (lfs_dino_getdb(fs, oip->i_din, i) > 0) { 513 /* Check for fragment size changes */ 514 obsize = oip->i_lfs_fragsize[i]; 515 real_released += lfs_btofsb(fs, obsize); 516 oip->i_lfs_fragsize[i] = 0; 517 } else 518 obsize = 0; 519 blocksreleased += lfs_btofsb(fs, bsize); 520 lfs_dino_setdb(fs, oip->i_din, i, 0); 521 lfs_blkfree(fs, oip, bn, obsize, &lastseg, &bc); 522 lfs_deregister_block(ovp, bn); 523 } 524 if (lastblock < 0) 525 goto done; 526 527 /* 528 * Finally, look for a change in size of the 529 * last direct block; release any frags. 530 */ 531 bn = lfs_dino_getdb(fs, oip->i_din, lastblock); 532 if (bn != 0) { 533 long oldspace, newspace; 534 #if 0 535 long olddspace; 536 #endif 537 538 /* 539 * Calculate amount of space we're giving 540 * back as old block size minus new block size. 541 */ 542 oldspace = lfs_blksize(fs, oip, lastblock); 543 #if 0 544 olddspace = oip->i_lfs_fragsize[lastblock]; 545 #endif 546 547 oip->i_size = length; 548 lfs_dino_setsize(fs, oip->i_din, oip->i_size); 549 newspace = lfs_blksize(fs, oip, lastblock); 550 if (newspace == 0) 551 panic("itrunc: newspace"); 552 if (oldspace - newspace > 0) { 553 blocksreleased += lfs_btofsb(fs, oldspace - newspace); 554 } 555 #if 0 556 if (bn > 0 && olddspace - newspace > 0) { 557 /* No segment accounting here, just vnode */ 558 real_released += lfs_btofsb(fs, olddspace - newspace); 559 } 560 #endif 561 } 562 563 done: 564 /* Finish segment accounting corrections */ 565 lfs_update_seguse(fs, oip, lastseg, bc); 566 #ifdef DIAGNOSTIC 567 for (level = SINGLE; level <= TRIPLE; level++) 568 if ((newblks[ULFS_NDADDR + level] == 0) != 569 (lfs_dino_getib(fs, oip->i_din, level) == 0)) { 570 panic("lfs itrunc1"); 571 } 572 for (i = 0; i < ULFS_NDADDR; i++) 573 if ((newblks[i] == 0) != 574 (lfs_dino_getdb(fs, oip->i_din, i) == 0)) { 575 panic("lfs itrunc2"); 576 } 577 if (length == 0 && 578 (!LIST_EMPTY(&ovp->v_cleanblkhd) || !LIST_EMPTY(&ovp->v_dirtyblkhd))) 579 panic("lfs itrunc3"); 580 #endif /* DIAGNOSTIC */ 581 /* 582 * Put back the real size. 583 */ 584 oip->i_size = length; 585 lfs_dino_setsize(fs, oip->i_din, oip->i_size); 586 oip->i_lfs_effnblks -= blocksreleased; 587 lfs_dino_setblocks(fs, oip->i_din, 588 lfs_dino_getblocks(fs, oip->i_din) - real_released); 589 mutex_enter(&lfs_lock); 590 lfs_sb_addbfree(fs, blocksreleased); 591 mutex_exit(&lfs_lock); 592 #ifdef DIAGNOSTIC 593 if (oip->i_size == 0 && 594 (lfs_dino_getblocks(fs, oip->i_din) != 0 || oip->i_lfs_effnblks != 0)) { 595 printf("lfs_truncate: truncate to 0 but %jd blks/%jd effblks\n", 596 (intmax_t)lfs_dino_getblocks(fs, oip->i_din), 597 (intmax_t)oip->i_lfs_effnblks); 598 panic("lfs_truncate: persistent blocks"); 599 } 600 #endif 601 602 /* 603 * If we truncated to zero, take us off the paging queue. 604 */ 605 mutex_enter(&lfs_lock); 606 if (oip->i_size == 0 && oip->i_flags & IN_PAGING) { 607 oip->i_flags &= ~IN_PAGING; 608 TAILQ_REMOVE(&fs->lfs_pchainhd, oip, i_lfs_pchain); 609 } 610 mutex_exit(&lfs_lock); 611 612 oip->i_flag |= IN_CHANGE; 613 #if defined(LFS_QUOTA) || defined(LFS_QUOTA2) 614 (void) lfs_chkdq(oip, -blocksreleased, NOCRED, 0); 615 #endif 616 lfs_reserve(fs, ovp, NULL, 617 -lfs_btofsb(fs, (2 * ULFS_NIADDR + 3) << lfs_sb_getbshift(fs))); 618 genfs_node_unlock(ovp); 619 errout: 620 oip->i_lfs_hiblk = lfs_lblkno(fs, oip->i_size + lfs_sb_getbsize(fs) - 1) - 1; 621 if (ovp != fs->lfs_ivnode) 622 lfs_segunlock(fs); 623 return (allerror ? allerror : error); 624 } 625 626 /* Update segment and avail usage information when removing a block. */ 627 static int 628 lfs_blkfree(struct lfs *fs, struct inode *ip, daddr_t daddr, 629 size_t bsize, long *lastseg, size_t *num) 630 { 631 long seg; 632 int error = 0; 633 634 ASSERT_SEGLOCK(fs); 635 bsize = lfs_fragroundup(fs, bsize); 636 if (daddr > 0) { 637 if (*lastseg != (seg = lfs_dtosn(fs, daddr))) { 638 error = lfs_update_seguse(fs, ip, *lastseg, *num); 639 *num = bsize; 640 *lastseg = seg; 641 } else 642 *num += bsize; 643 } 644 645 return error; 646 } 647 648 /* Finish the accounting updates for a segment. */ 649 static int 650 lfs_update_seguse(struct lfs *fs, struct inode *ip, long lastseg, size_t num) 651 { 652 struct segdelta *sd; 653 654 ASSERT_SEGLOCK(fs); 655 if (lastseg < 0 || num == 0) 656 return 0; 657 658 LIST_FOREACH(sd, &ip->i_lfs_segdhd, list) 659 if (sd->segnum == lastseg) 660 break; 661 if (sd == NULL) { 662 sd = malloc(sizeof(*sd), M_SEGMENT, M_WAITOK); 663 sd->segnum = lastseg; 664 sd->num = 0; 665 LIST_INSERT_HEAD(&ip->i_lfs_segdhd, sd, list); 666 } 667 sd->num += num; 668 669 return 0; 670 } 671 672 static void 673 lfs_finalize_seguse(struct lfs *fs, void *v) 674 { 675 SEGUSE *sup; 676 struct buf *bp; 677 struct segdelta *sd; 678 LIST_HEAD(, segdelta) *hd = v; 679 680 ASSERT_SEGLOCK(fs); 681 while((sd = LIST_FIRST(hd)) != NULL) { 682 LIST_REMOVE(sd, list); 683 LFS_SEGENTRY(sup, fs, sd->segnum, bp); 684 if (sd->num > sup->su_nbytes) { 685 printf("lfs_finalize_seguse: segment %ld short by %ld\n", 686 sd->segnum, (long)(sd->num - sup->su_nbytes)); 687 panic("lfs_finalize_seguse: negative bytes"); 688 sup->su_nbytes = sd->num; 689 } 690 sup->su_nbytes -= sd->num; 691 LFS_WRITESEGENTRY(sup, fs, sd->segnum, bp); 692 free(sd, M_SEGMENT); 693 } 694 } 695 696 /* Finish the accounting updates for a segment. */ 697 void 698 lfs_finalize_ino_seguse(struct lfs *fs, struct inode *ip) 699 { 700 ASSERT_SEGLOCK(fs); 701 lfs_finalize_seguse(fs, &ip->i_lfs_segdhd); 702 } 703 704 /* Finish the accounting updates for a segment. */ 705 void 706 lfs_finalize_fs_seguse(struct lfs *fs) 707 { 708 ASSERT_SEGLOCK(fs); 709 lfs_finalize_seguse(fs, &fs->lfs_segdhd); 710 } 711 712 /* 713 * Release blocks associated with the inode ip and stored in the indirect 714 * block bn. Blocks are free'd in LIFO order up to (but not including) 715 * lastbn. If level is greater than SINGLE, the block is an indirect block 716 * and recursive calls to indirtrunc must be used to cleanse other indirect 717 * blocks. 718 * 719 * NB: triple indirect blocks are untested. 720 */ 721 static int 722 lfs_indirtrunc(struct inode *ip, daddr_t lbn, daddr_t dbn, 723 daddr_t lastbn, int level, daddr_t *countp, 724 daddr_t *rcountp, long *lastsegp, size_t *bcp) 725 { 726 int i; 727 struct buf *bp; 728 struct lfs *fs = ip->i_lfs; 729 void *bap; 730 bool bap_needs_free; 731 struct vnode *vp; 732 daddr_t nb, nlbn, last; 733 daddr_t blkcount, rblkcount, factor; 734 int nblocks; 735 daddr_t blocksreleased = 0, real_released = 0; 736 int error = 0, allerror = 0; 737 738 ASSERT_SEGLOCK(fs); 739 /* 740 * Calculate index in current block of last 741 * block to be kept. -1 indicates the entire 742 * block so we need not calculate the index. 743 */ 744 factor = 1; 745 for (i = SINGLE; i < level; i++) 746 factor *= LFS_NINDIR(fs); 747 last = lastbn; 748 if (lastbn > 0) 749 last /= factor; 750 nblocks = lfs_btofsb(fs, lfs_sb_getbsize(fs)); 751 /* 752 * Get buffer of block pointers, zero those entries corresponding 753 * to blocks to be free'd, and update on disk copy first. Since 754 * double(triple) indirect before single(double) indirect, calls 755 * to bmap on these blocks will fail. However, we already have 756 * the on disk address, so we have to set the b_blkno field 757 * explicitly instead of letting bread do everything for us. 758 */ 759 vp = ITOV(ip); 760 bp = getblk(vp, lbn, lfs_sb_getbsize(fs), 0, 0); 761 if (bp->b_oflags & (BO_DONE | BO_DELWRI)) { 762 /* Braces must be here in case trace evaluates to nothing. */ 763 trace(TR_BREADHIT, pack(vp, lfs_sb_getbsize(fs)), lbn); 764 } else { 765 trace(TR_BREADMISS, pack(vp, lfs_sb_getbsize(fs)), lbn); 766 curlwp->l_ru.ru_inblock++; /* pay for read */ 767 bp->b_flags |= B_READ; 768 if (bp->b_bcount > bp->b_bufsize) 769 panic("lfs_indirtrunc: bad buffer size"); 770 bp->b_blkno = LFS_FSBTODB(fs, dbn); 771 VOP_STRATEGY(vp, bp); 772 error = biowait(bp); 773 } 774 if (error) { 775 brelse(bp, 0); 776 *countp = *rcountp = 0; 777 return (error); 778 } 779 780 if (lastbn >= 0) { 781 /* 782 * We still need this block, so copy the data for 783 * subsequent processing; then in the original block, 784 * zero out the dying block pointers and send it off. 785 */ 786 bap = lfs_malloc(fs, lfs_sb_getbsize(fs), LFS_NB_IBLOCK); 787 memcpy(bap, bp->b_data, lfs_sb_getbsize(fs)); 788 bap_needs_free = true; 789 790 for (i = last + 1; i < LFS_NINDIR(fs); i++) { 791 lfs_iblock_set(fs, bp->b_data, i, 0); 792 } 793 error = VOP_BWRITE(bp->b_vp, bp); 794 if (error) 795 allerror = error; 796 } else { 797 bap = bp->b_data; 798 bap_needs_free = false; 799 } 800 801 /* 802 * Recursively free totally unused blocks. 803 */ 804 for (i = LFS_NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last; 805 i--, nlbn += factor) { 806 nb = lfs_iblock_get(fs, bap, i); 807 if (nb == 0) 808 continue; 809 if (level > SINGLE) { 810 error = lfs_indirtrunc(ip, nlbn, nb, 811 (daddr_t)-1, level - 1, 812 &blkcount, &rblkcount, 813 lastsegp, bcp); 814 if (error) 815 allerror = error; 816 blocksreleased += blkcount; 817 real_released += rblkcount; 818 } 819 lfs_blkfree(fs, ip, nb, lfs_sb_getbsize(fs), lastsegp, bcp); 820 if (lfs_iblock_get(fs, bap, i) > 0) 821 real_released += nblocks; 822 blocksreleased += nblocks; 823 } 824 825 /* 826 * Recursively free last partial block. 827 */ 828 if (level > SINGLE && lastbn >= 0) { 829 last = lastbn % factor; 830 nb = lfs_iblock_get(fs, bap, i); 831 if (nb != 0) { 832 error = lfs_indirtrunc(ip, nlbn, nb, 833 last, level - 1, &blkcount, 834 &rblkcount, lastsegp, bcp); 835 if (error) 836 allerror = error; 837 real_released += rblkcount; 838 blocksreleased += blkcount; 839 } 840 } 841 842 if (bap_needs_free) { 843 lfs_free(fs, bap, LFS_NB_IBLOCK); 844 } else { 845 mutex_enter(&bufcache_lock); 846 if (bp->b_oflags & BO_DELWRI) { 847 LFS_UNLOCK_BUF(bp); 848 lfs_sb_addavail(fs, lfs_btofsb(fs, bp->b_bcount)); 849 wakeup(&fs->lfs_availsleep); 850 } 851 brelsel(bp, BC_INVAL); 852 mutex_exit(&bufcache_lock); 853 } 854 855 *countp = blocksreleased; 856 *rcountp = real_released; 857 return (allerror); 858 } 859 860 /* 861 * Destroy any in core blocks past the truncation length. 862 * Inlined from vtruncbuf, so that lfs_avail could be updated. 863 * We take the seglock to prevent cleaning from occurring while we are 864 * invalidating blocks. 865 */ 866 static int 867 lfs_vtruncbuf(struct vnode *vp, daddr_t lbn, bool catch, int slptimeo) 868 { 869 struct buf *bp, *nbp; 870 int error; 871 struct lfs *fs; 872 voff_t off; 873 874 off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift); 875 mutex_enter(vp->v_interlock); 876 error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO); 877 if (error) 878 return error; 879 880 fs = VTOI(vp)->i_lfs; 881 882 ASSERT_SEGLOCK(fs); 883 884 mutex_enter(&bufcache_lock); 885 restart: 886 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 887 nbp = LIST_NEXT(bp, b_vnbufs); 888 if (bp->b_lblkno < lbn) 889 continue; 890 error = bbusy(bp, catch, slptimeo, NULL); 891 if (error == EPASSTHROUGH) 892 goto restart; 893 if (error != 0) { 894 mutex_exit(&bufcache_lock); 895 return (error); 896 } 897 mutex_enter(bp->b_objlock); 898 if (bp->b_oflags & BO_DELWRI) { 899 bp->b_oflags &= ~BO_DELWRI; 900 lfs_sb_addavail(fs, lfs_btofsb(fs, bp->b_bcount)); 901 wakeup(&fs->lfs_availsleep); 902 } 903 mutex_exit(bp->b_objlock); 904 LFS_UNLOCK_BUF(bp); 905 brelsel(bp, BC_INVAL | BC_VFLUSH); 906 } 907 908 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 909 nbp = LIST_NEXT(bp, b_vnbufs); 910 if (bp->b_lblkno < lbn) 911 continue; 912 error = bbusy(bp, catch, slptimeo, NULL); 913 if (error == EPASSTHROUGH) 914 goto restart; 915 if (error != 0) { 916 mutex_exit(&bufcache_lock); 917 return (error); 918 } 919 mutex_enter(bp->b_objlock); 920 if (bp->b_oflags & BO_DELWRI) { 921 bp->b_oflags &= ~BO_DELWRI; 922 lfs_sb_addavail(fs, lfs_btofsb(fs, bp->b_bcount)); 923 wakeup(&fs->lfs_availsleep); 924 } 925 mutex_exit(bp->b_objlock); 926 LFS_UNLOCK_BUF(bp); 927 brelsel(bp, BC_INVAL | BC_VFLUSH); 928 } 929 mutex_exit(&bufcache_lock); 930 931 return (0); 932 } 933 934