1 /* $NetBSD: lfs_inode.c,v 1.115 2007/12/08 19:29:55 pooka Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Konrad E. Schroder <perseant@hhhh.org>. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 /* 39 * Copyright (c) 1986, 1989, 1991, 1993 40 * The Regents of the University of California. All rights reserved. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)lfs_inode.c 8.9 (Berkeley) 5/8/95 67 */ 68 69 #include <sys/cdefs.h> 70 __KERNEL_RCSID(0, "$NetBSD: lfs_inode.c,v 1.115 2007/12/08 19:29:55 pooka Exp $"); 71 72 #if defined(_KERNEL_OPT) 73 #include "opt_quota.h" 74 #endif 75 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/mount.h> 79 #include <sys/malloc.h> 80 #include <sys/proc.h> 81 #include <sys/file.h> 82 #include <sys/buf.h> 83 #include <sys/vnode.h> 84 #include <sys/kernel.h> 85 #include <sys/trace.h> 86 #include <sys/resourcevar.h> 87 #include <sys/kauth.h> 88 89 #include <ufs/ufs/quota.h> 90 #include <ufs/ufs/inode.h> 91 #include <ufs/ufs/ufsmount.h> 92 #include <ufs/ufs/ufs_extern.h> 93 94 #include <ufs/lfs/lfs.h> 95 #include <ufs/lfs/lfs_extern.h> 96 97 static int lfs_update_seguse(struct lfs *, struct inode *ip, long, size_t); 98 static int lfs_indirtrunc (struct inode *, daddr_t, daddr_t, 99 daddr_t, int, long *, long *, long *, size_t *); 100 static int lfs_blkfree (struct lfs *, struct inode *, daddr_t, size_t, long *, size_t *); 101 static int lfs_vtruncbuf(struct vnode *, daddr_t, int, int); 102 103 /* Search a block for a specific dinode. */ 104 struct ufs1_dinode * 105 lfs_ifind(struct lfs *fs, ino_t ino, struct buf *bp) 106 { 107 struct ufs1_dinode *dip = (struct ufs1_dinode *)bp->b_data; 108 struct ufs1_dinode *ldip, *fin; 109 110 ASSERT_NO_SEGLOCK(fs); 111 /* 112 * Read the inode block backwards, since later versions of the 113 * inode will supercede earlier ones. Though it is unlikely, it is 114 * possible that the same inode will appear in the same inode block. 115 */ 116 fin = dip + INOPB(fs); 117 for (ldip = fin - 1; ldip >= dip; --ldip) 118 if (ldip->di_inumber == ino) 119 return (ldip); 120 121 printf("searched %d entries\n", (int)(fin - dip)); 122 printf("offset is 0x%x (seg %d)\n", fs->lfs_offset, 123 dtosn(fs, fs->lfs_offset)); 124 printf("block is 0x%llx (seg %lld)\n", 125 (unsigned long long)dbtofsb(fs, bp->b_blkno), 126 (long long)dtosn(fs, dbtofsb(fs, bp->b_blkno))); 127 128 return NULL; 129 } 130 131 int 132 lfs_update(struct vnode *vp, const struct timespec *acc, 133 const struct timespec *mod, int updflags) 134 { 135 struct inode *ip; 136 struct lfs *fs = VFSTOUFS(vp->v_mount)->um_lfs; 137 int s; 138 int flags; 139 140 ASSERT_NO_SEGLOCK(fs); 141 if (vp->v_mount->mnt_flag & MNT_RDONLY) 142 return (0); 143 ip = VTOI(vp); 144 145 /* 146 * If we are called from vinvalbuf, and the file's blocks have 147 * already been scheduled for writing, but the writes have not 148 * yet completed, lfs_vflush will not be called, and vinvalbuf 149 * will cause a panic. So, we must wait until any pending write 150 * for our inode completes, if we are called with UPDATE_WAIT set. 151 */ 152 s = splbio(); 153 simple_lock(&vp->v_interlock); 154 while ((updflags & (UPDATE_WAIT|UPDATE_DIROP)) == UPDATE_WAIT && 155 WRITEINPROG(vp)) { 156 DLOG((DLOG_SEG, "lfs_update: sleeping on ino %d" 157 " (in progress)\n", ip->i_number)); 158 ltsleep(vp, (PRIBIO+1), "lfs_update", 0, &vp->v_interlock); 159 } 160 simple_unlock(&vp->v_interlock); 161 splx(s); 162 LFS_ITIMES(ip, acc, mod, NULL); 163 if (updflags & UPDATE_CLOSE) 164 flags = ip->i_flag & (IN_MODIFIED | IN_ACCESSED | IN_CLEANING); 165 else 166 flags = ip->i_flag & (IN_MODIFIED | IN_CLEANING); 167 if (flags == 0) 168 return (0); 169 170 /* If sync, push back the vnode and any dirty blocks it may have. */ 171 if ((updflags & (UPDATE_WAIT|UPDATE_DIROP)) == UPDATE_WAIT) { 172 /* Avoid flushing VU_DIROP. */ 173 simple_lock(&fs->lfs_interlock); 174 ++fs->lfs_diropwait; 175 while (vp->v_uflag & VU_DIROP) { 176 DLOG((DLOG_DIROP, "lfs_update: sleeping on inode %d" 177 " (dirops)\n", ip->i_number)); 178 DLOG((DLOG_DIROP, "lfs_update: vflags 0x%x, iflags" 179 " 0x%x\n", vp->v_uflag|vp->v_iflag|vp->v_vflag, 180 ip->i_flag)); 181 if (fs->lfs_dirops == 0) 182 lfs_flush_fs(fs, SEGM_SYNC); 183 else 184 ltsleep(&fs->lfs_writer, PRIBIO+1, "lfs_fsync", 185 0, &fs->lfs_interlock); 186 /* XXX KS - by falling out here, are we writing the vn 187 twice? */ 188 } 189 --fs->lfs_diropwait; 190 simple_unlock(&fs->lfs_interlock); 191 return lfs_vflush(vp); 192 } 193 return 0; 194 } 195 196 #define SINGLE 0 /* index of single indirect block */ 197 #define DOUBLE 1 /* index of double indirect block */ 198 #define TRIPLE 2 /* index of triple indirect block */ 199 /* 200 * Truncate the inode oip to at most length size, freeing the 201 * disk blocks. 202 */ 203 /* VOP_BWRITE 1 + NIADDR + lfs_balloc == 2 + 2*NIADDR times */ 204 205 int 206 lfs_truncate(struct vnode *ovp, off_t length, int ioflag, kauth_cred_t cred) 207 { 208 daddr_t lastblock; 209 struct inode *oip = VTOI(ovp); 210 daddr_t bn, lbn, lastiblock[NIADDR], indir_lbn[NIADDR]; 211 /* XXX ondisk32 */ 212 int32_t newblks[NDADDR + NIADDR]; 213 struct lfs *fs; 214 struct buf *bp; 215 int offset, size, level; 216 long count, rcount, blocksreleased = 0, real_released = 0; 217 int i, nblocks; 218 int aflags, error, allerror = 0; 219 off_t osize; 220 long lastseg; 221 size_t bc; 222 int obufsize, odb; 223 int usepc; 224 struct ufsmount *ump = oip->i_ump; 225 226 if (ovp->v_type == VCHR || ovp->v_type == VBLK || 227 ovp->v_type == VFIFO || ovp->v_type == VSOCK) { 228 KASSERT(oip->i_size == 0); 229 return 0; 230 } 231 232 if (length < 0) 233 return (EINVAL); 234 235 /* 236 * Just return and not update modification times. 237 */ 238 if (oip->i_size == length) 239 return (0); 240 241 if (ovp->v_type == VLNK && 242 (oip->i_size < ump->um_maxsymlinklen || 243 (ump->um_maxsymlinklen == 0 && 244 oip->i_ffs1_blocks == 0))) { 245 #ifdef DIAGNOSTIC 246 if (length != 0) 247 panic("lfs_truncate: partial truncate of symlink"); 248 #endif 249 memset((char *)SHORTLINK(oip), 0, (u_int)oip->i_size); 250 oip->i_size = oip->i_ffs1_size = 0; 251 oip->i_flag |= IN_CHANGE | IN_UPDATE; 252 return (lfs_update(ovp, NULL, NULL, 0)); 253 } 254 if (oip->i_size == length) { 255 oip->i_flag |= IN_CHANGE | IN_UPDATE; 256 return (lfs_update(ovp, NULL, NULL, 0)); 257 } 258 fs = oip->i_lfs; 259 lfs_imtime(fs); 260 osize = oip->i_size; 261 usepc = (ovp->v_type == VREG && ovp != fs->lfs_ivnode); 262 263 ASSERT_NO_SEGLOCK(fs); 264 /* 265 * Lengthen the size of the file. We must ensure that the 266 * last byte of the file is allocated. Since the smallest 267 * value of osize is 0, length will be at least 1. 268 */ 269 if (osize < length) { 270 if (length > ump->um_maxfilesize) 271 return (EFBIG); 272 aflags = B_CLRBUF; 273 if (ioflag & IO_SYNC) 274 aflags |= B_SYNC; 275 if (usepc) { 276 if (lblkno(fs, osize) < NDADDR && 277 lblkno(fs, osize) != lblkno(fs, length) && 278 blkroundup(fs, osize) != osize) { 279 off_t eob; 280 281 eob = blkroundup(fs, osize); 282 uvm_vnp_setwritesize(ovp, eob); 283 error = ufs_balloc_range(ovp, osize, 284 eob - osize, cred, aflags); 285 if (error) 286 return error; 287 if (ioflag & IO_SYNC) { 288 simple_lock(&ovp->v_interlock); 289 VOP_PUTPAGES(ovp, 290 trunc_page(osize & fs->lfs_bmask), 291 round_page(eob), 292 PGO_CLEANIT | PGO_SYNCIO); 293 } 294 } 295 uvm_vnp_setwritesize(ovp, length); 296 error = ufs_balloc_range(ovp, length - 1, 1, cred, 297 aflags); 298 if (error) { 299 (void) lfs_truncate(ovp, osize, 300 ioflag & IO_SYNC, cred); 301 return error; 302 } 303 uvm_vnp_setsize(ovp, length); 304 oip->i_flag |= IN_CHANGE | IN_UPDATE; 305 KASSERT(ovp->v_size == oip->i_size); 306 oip->i_lfs_hiblk = lblkno(fs, oip->i_size + fs->lfs_bsize - 1) - 1; 307 return (lfs_update(ovp, NULL, NULL, 0)); 308 } else { 309 error = lfs_reserve(fs, ovp, NULL, 310 btofsb(fs, (NIADDR + 2) << fs->lfs_bshift)); 311 if (error) 312 return (error); 313 error = lfs_balloc(ovp, length - 1, 1, cred, 314 aflags, &bp); 315 lfs_reserve(fs, ovp, NULL, 316 -btofsb(fs, (NIADDR + 2) << fs->lfs_bshift)); 317 if (error) 318 return (error); 319 oip->i_ffs1_size = oip->i_size = length; 320 uvm_vnp_setsize(ovp, length); 321 (void) VOP_BWRITE(bp); 322 oip->i_flag |= IN_CHANGE | IN_UPDATE; 323 oip->i_lfs_hiblk = lblkno(fs, oip->i_size + fs->lfs_bsize - 1) - 1; 324 return (lfs_update(ovp, NULL, NULL, 0)); 325 } 326 } 327 328 if ((error = lfs_reserve(fs, ovp, NULL, 329 btofsb(fs, (2 * NIADDR + 3) << fs->lfs_bshift))) != 0) 330 return (error); 331 332 /* 333 * Shorten the size of the file. If the file is not being 334 * truncated to a block boundary, the contents of the 335 * partial block following the end of the file must be 336 * zero'ed in case it ever becomes accessible again because 337 * of subsequent file growth. Directories however are not 338 * zero'ed as they should grow back initialized to empty. 339 */ 340 offset = blkoff(fs, length); 341 lastseg = -1; 342 bc = 0; 343 344 if (ovp != fs->lfs_ivnode) 345 lfs_seglock(fs, SEGM_PROT); 346 if (offset == 0) { 347 oip->i_size = oip->i_ffs1_size = length; 348 } else if (!usepc) { 349 lbn = lblkno(fs, length); 350 aflags = B_CLRBUF; 351 if (ioflag & IO_SYNC) 352 aflags |= B_SYNC; 353 error = lfs_balloc(ovp, length - 1, 1, cred, aflags, &bp); 354 if (error) { 355 lfs_reserve(fs, ovp, NULL, 356 -btofsb(fs, (2 * NIADDR + 3) << fs->lfs_bshift)); 357 goto errout; 358 } 359 obufsize = bp->b_bufsize; 360 odb = btofsb(fs, bp->b_bcount); 361 oip->i_size = oip->i_ffs1_size = length; 362 size = blksize(fs, oip, lbn); 363 if (ovp->v_type != VDIR) 364 memset((char *)bp->b_data + offset, 0, 365 (u_int)(size - offset)); 366 allocbuf(bp, size, 1); 367 if ((bp->b_flags & (B_LOCKED | B_CALL)) == B_LOCKED) { 368 simple_lock(&lfs_subsys_lock); 369 locked_queue_bytes -= obufsize - bp->b_bufsize; 370 simple_unlock(&lfs_subsys_lock); 371 } 372 if (bp->b_flags & B_DELWRI) 373 fs->lfs_avail += odb - btofsb(fs, size); 374 (void) VOP_BWRITE(bp); 375 } else { /* vp->v_type == VREG && length < osize && offset != 0 */ 376 /* 377 * When truncating a regular file down to a non-block-aligned 378 * size, we must zero the part of last block which is past 379 * the new EOF. We must synchronously flush the zeroed pages 380 * to disk since the new pages will be invalidated as soon 381 * as we inform the VM system of the new, smaller size. 382 * We must do this before acquiring the GLOCK, since fetching 383 * the pages will acquire the GLOCK internally. 384 * So there is a window where another thread could see a whole 385 * zeroed page past EOF, but that's life. 386 */ 387 daddr_t xlbn; 388 voff_t eoz; 389 390 aflags = ioflag & IO_SYNC ? B_SYNC : 0; 391 error = ufs_balloc_range(ovp, length - 1, 1, cred, aflags); 392 if (error) { 393 lfs_reserve(fs, ovp, NULL, 394 -btofsb(fs, (2 * NIADDR + 3) << fs->lfs_bshift)); 395 goto errout; 396 } 397 xlbn = lblkno(fs, length); 398 size = blksize(fs, oip, xlbn); 399 eoz = MIN(lblktosize(fs, xlbn) + size, osize); 400 uvm_vnp_zerorange(ovp, length, eoz - length); 401 if (round_page(eoz) > round_page(length)) { 402 simple_lock(&ovp->v_interlock); 403 error = VOP_PUTPAGES(ovp, round_page(length), 404 round_page(eoz), 405 PGO_CLEANIT | PGO_DEACTIVATE | 406 ((ioflag & IO_SYNC) ? PGO_SYNCIO : 0)); 407 if (error) { 408 lfs_reserve(fs, ovp, NULL, 409 -btofsb(fs, (2 * NIADDR + 3) << fs->lfs_bshift)); 410 goto errout; 411 } 412 } 413 } 414 415 genfs_node_wrlock(ovp); 416 417 oip->i_size = oip->i_ffs1_size = length; 418 uvm_vnp_setsize(ovp, length); 419 /* 420 * Calculate index into inode's block list of 421 * last direct and indirect blocks (if any) 422 * which we want to keep. Lastblock is -1 when 423 * the file is truncated to 0. 424 */ 425 /* Avoid sign overflow - XXX assumes that off_t is a quad_t. */ 426 if (length > QUAD_MAX - fs->lfs_bsize) 427 lastblock = lblkno(fs, QUAD_MAX - fs->lfs_bsize); 428 else 429 lastblock = lblkno(fs, length + fs->lfs_bsize - 1) - 1; 430 lastiblock[SINGLE] = lastblock - NDADDR; 431 lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs); 432 lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs); 433 nblocks = btofsb(fs, fs->lfs_bsize); 434 /* 435 * Record changed file and block pointers before we start 436 * freeing blocks. lastiblock values are also normalized to -1 437 * for calls to lfs_indirtrunc below. 438 */ 439 memcpy((void *)newblks, (void *)&oip->i_ffs1_db[0], sizeof newblks); 440 for (level = TRIPLE; level >= SINGLE; level--) 441 if (lastiblock[level] < 0) { 442 newblks[NDADDR+level] = 0; 443 lastiblock[level] = -1; 444 } 445 for (i = NDADDR - 1; i > lastblock; i--) 446 newblks[i] = 0; 447 448 oip->i_size = oip->i_ffs1_size = osize; 449 error = lfs_vtruncbuf(ovp, lastblock + 1, 0, 0); 450 if (error && !allerror) 451 allerror = error; 452 453 /* 454 * Indirect blocks first. 455 */ 456 indir_lbn[SINGLE] = -NDADDR; 457 indir_lbn[DOUBLE] = indir_lbn[SINGLE] - NINDIR(fs) - 1; 458 indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - NINDIR(fs) * NINDIR(fs) - 1; 459 for (level = TRIPLE; level >= SINGLE; level--) { 460 bn = oip->i_ffs1_ib[level]; 461 if (bn != 0) { 462 error = lfs_indirtrunc(oip, indir_lbn[level], 463 bn, lastiblock[level], 464 level, &count, &rcount, 465 &lastseg, &bc); 466 if (error) 467 allerror = error; 468 real_released += rcount; 469 blocksreleased += count; 470 if (lastiblock[level] < 0) { 471 if (oip->i_ffs1_ib[level] > 0) 472 real_released += nblocks; 473 blocksreleased += nblocks; 474 oip->i_ffs1_ib[level] = 0; 475 lfs_blkfree(fs, oip, bn, fs->lfs_bsize, 476 &lastseg, &bc); 477 lfs_deregister_block(ovp, bn); 478 } 479 } 480 if (lastiblock[level] >= 0) 481 goto done; 482 } 483 484 /* 485 * All whole direct blocks or frags. 486 */ 487 for (i = NDADDR - 1; i > lastblock; i--) { 488 long bsize, obsize; 489 490 bn = oip->i_ffs1_db[i]; 491 if (bn == 0) 492 continue; 493 bsize = blksize(fs, oip, i); 494 if (oip->i_ffs1_db[i] > 0) { 495 /* Check for fragment size changes */ 496 obsize = oip->i_lfs_fragsize[i]; 497 real_released += btofsb(fs, obsize); 498 oip->i_lfs_fragsize[i] = 0; 499 } else 500 obsize = 0; 501 blocksreleased += btofsb(fs, bsize); 502 oip->i_ffs1_db[i] = 0; 503 lfs_blkfree(fs, oip, bn, obsize, &lastseg, &bc); 504 lfs_deregister_block(ovp, bn); 505 } 506 if (lastblock < 0) 507 goto done; 508 509 /* 510 * Finally, look for a change in size of the 511 * last direct block; release any frags. 512 */ 513 bn = oip->i_ffs1_db[lastblock]; 514 if (bn != 0) { 515 long oldspace, newspace; 516 #if 0 517 long olddspace; 518 #endif 519 520 /* 521 * Calculate amount of space we're giving 522 * back as old block size minus new block size. 523 */ 524 oldspace = blksize(fs, oip, lastblock); 525 #if 0 526 olddspace = oip->i_lfs_fragsize[lastblock]; 527 #endif 528 529 oip->i_size = oip->i_ffs1_size = length; 530 newspace = blksize(fs, oip, lastblock); 531 if (newspace == 0) 532 panic("itrunc: newspace"); 533 if (oldspace - newspace > 0) { 534 blocksreleased += btofsb(fs, oldspace - newspace); 535 } 536 #if 0 537 if (bn > 0 && olddspace - newspace > 0) { 538 /* No segment accounting here, just vnode */ 539 real_released += btofsb(fs, olddspace - newspace); 540 } 541 #endif 542 } 543 544 done: 545 /* Finish segment accounting corrections */ 546 lfs_update_seguse(fs, oip, lastseg, bc); 547 #ifdef DIAGNOSTIC 548 for (level = SINGLE; level <= TRIPLE; level++) 549 if ((newblks[NDADDR + level] == 0) != 550 ((oip->i_ffs1_ib[level]) == 0)) { 551 panic("lfs itrunc1"); 552 } 553 for (i = 0; i < NDADDR; i++) 554 if ((newblks[i] == 0) != (oip->i_ffs1_db[i] == 0)) { 555 panic("lfs itrunc2"); 556 } 557 if (length == 0 && 558 (!LIST_EMPTY(&ovp->v_cleanblkhd) || !LIST_EMPTY(&ovp->v_dirtyblkhd))) 559 panic("lfs itrunc3"); 560 #endif /* DIAGNOSTIC */ 561 /* 562 * Put back the real size. 563 */ 564 oip->i_size = oip->i_ffs1_size = length; 565 oip->i_lfs_effnblks -= blocksreleased; 566 oip->i_ffs1_blocks -= real_released; 567 simple_lock(&fs->lfs_interlock); 568 fs->lfs_bfree += blocksreleased; 569 simple_unlock(&fs->lfs_interlock); 570 #ifdef DIAGNOSTIC 571 if (oip->i_size == 0 && 572 (oip->i_ffs1_blocks != 0 || oip->i_lfs_effnblks != 0)) { 573 printf("lfs_truncate: truncate to 0 but %d blks/%d effblks\n", 574 oip->i_ffs1_blocks, oip->i_lfs_effnblks); 575 panic("lfs_truncate: persistent blocks"); 576 } 577 #endif 578 579 /* 580 * If we truncated to zero, take us off the paging queue. 581 */ 582 simple_lock(&fs->lfs_interlock); 583 if (oip->i_size == 0 && oip->i_flags & IN_PAGING) { 584 oip->i_flags &= ~IN_PAGING; 585 TAILQ_REMOVE(&fs->lfs_pchainhd, oip, i_lfs_pchain); 586 } 587 simple_unlock(&fs->lfs_interlock); 588 589 oip->i_flag |= IN_CHANGE; 590 #ifdef QUOTA 591 (void) chkdq(oip, -blocksreleased, NOCRED, 0); 592 #endif 593 lfs_reserve(fs, ovp, NULL, 594 -btofsb(fs, (2 * NIADDR + 3) << fs->lfs_bshift)); 595 genfs_node_unlock(ovp); 596 errout: 597 oip->i_lfs_hiblk = lblkno(fs, oip->i_size + fs->lfs_bsize - 1) - 1; 598 if (ovp != fs->lfs_ivnode) 599 lfs_segunlock(fs); 600 return (allerror ? allerror : error); 601 } 602 603 /* Update segment and avail usage information when removing a block. */ 604 static int 605 lfs_blkfree(struct lfs *fs, struct inode *ip, daddr_t daddr, 606 size_t bsize, long *lastseg, size_t *num) 607 { 608 long seg; 609 int error = 0; 610 611 ASSERT_SEGLOCK(fs); 612 bsize = fragroundup(fs, bsize); 613 if (daddr > 0) { 614 if (*lastseg != (seg = dtosn(fs, daddr))) { 615 error = lfs_update_seguse(fs, ip, *lastseg, *num); 616 *num = bsize; 617 *lastseg = seg; 618 } else 619 *num += bsize; 620 } 621 622 return error; 623 } 624 625 /* Finish the accounting updates for a segment. */ 626 static int 627 lfs_update_seguse(struct lfs *fs, struct inode *ip, long lastseg, size_t num) 628 { 629 struct segdelta *sd; 630 struct vnode *vp; 631 632 ASSERT_SEGLOCK(fs); 633 if (lastseg < 0 || num == 0) 634 return 0; 635 636 vp = ITOV(ip); 637 LIST_FOREACH(sd, &ip->i_lfs_segdhd, list) 638 if (sd->segnum == lastseg) 639 break; 640 if (sd == NULL) { 641 sd = malloc(sizeof(*sd), M_SEGMENT, M_WAITOK); 642 sd->segnum = lastseg; 643 sd->num = 0; 644 LIST_INSERT_HEAD(&ip->i_lfs_segdhd, sd, list); 645 } 646 sd->num += num; 647 648 return 0; 649 } 650 651 static void 652 lfs_finalize_seguse(struct lfs *fs, void *v) 653 { 654 SEGUSE *sup; 655 struct buf *bp; 656 struct segdelta *sd; 657 LIST_HEAD(, segdelta) *hd = v; 658 659 ASSERT_SEGLOCK(fs); 660 while((sd = LIST_FIRST(hd)) != NULL) { 661 LIST_REMOVE(sd, list); 662 LFS_SEGENTRY(sup, fs, sd->segnum, bp); 663 if (sd->num > sup->su_nbytes) { 664 printf("lfs_finalize_seguse: segment %ld short by %ld\n", 665 sd->segnum, (long)(sd->num - sup->su_nbytes)); 666 panic("lfs_finalize_seguse: negative bytes"); 667 sup->su_nbytes = sd->num; 668 } 669 sup->su_nbytes -= sd->num; 670 LFS_WRITESEGENTRY(sup, fs, sd->segnum, bp); 671 free(sd, M_SEGMENT); 672 } 673 } 674 675 /* Finish the accounting updates for a segment. */ 676 void 677 lfs_finalize_ino_seguse(struct lfs *fs, struct inode *ip) 678 { 679 ASSERT_SEGLOCK(fs); 680 lfs_finalize_seguse(fs, &ip->i_lfs_segdhd); 681 } 682 683 /* Finish the accounting updates for a segment. */ 684 void 685 lfs_finalize_fs_seguse(struct lfs *fs) 686 { 687 ASSERT_SEGLOCK(fs); 688 lfs_finalize_seguse(fs, &fs->lfs_segdhd); 689 } 690 691 /* 692 * Release blocks associated with the inode ip and stored in the indirect 693 * block bn. Blocks are free'd in LIFO order up to (but not including) 694 * lastbn. If level is greater than SINGLE, the block is an indirect block 695 * and recursive calls to indirtrunc must be used to cleanse other indirect 696 * blocks. 697 * 698 * NB: triple indirect blocks are untested. 699 */ 700 static int 701 lfs_indirtrunc(struct inode *ip, daddr_t lbn, daddr_t dbn, 702 daddr_t lastbn, int level, long *countp, 703 long *rcountp, long *lastsegp, size_t *bcp) 704 { 705 int i; 706 struct buf *bp; 707 struct lfs *fs = ip->i_lfs; 708 int32_t *bap; /* XXX ondisk32 */ 709 struct vnode *vp; 710 daddr_t nb, nlbn, last; 711 int32_t *copy = NULL; /* XXX ondisk32 */ 712 long blkcount, rblkcount, factor; 713 int nblocks, blocksreleased = 0, real_released = 0; 714 int error = 0, allerror = 0; 715 716 ASSERT_SEGLOCK(fs); 717 /* 718 * Calculate index in current block of last 719 * block to be kept. -1 indicates the entire 720 * block so we need not calculate the index. 721 */ 722 factor = 1; 723 for (i = SINGLE; i < level; i++) 724 factor *= NINDIR(fs); 725 last = lastbn; 726 if (lastbn > 0) 727 last /= factor; 728 nblocks = btofsb(fs, fs->lfs_bsize); 729 /* 730 * Get buffer of block pointers, zero those entries corresponding 731 * to blocks to be free'd, and update on disk copy first. Since 732 * double(triple) indirect before single(double) indirect, calls 733 * to bmap on these blocks will fail. However, we already have 734 * the on disk address, so we have to set the b_blkno field 735 * explicitly instead of letting bread do everything for us. 736 */ 737 vp = ITOV(ip); 738 bp = getblk(vp, lbn, (int)fs->lfs_bsize, 0, 0); 739 if (bp->b_flags & (B_DONE | B_DELWRI)) { 740 /* Braces must be here in case trace evaluates to nothing. */ 741 trace(TR_BREADHIT, pack(vp, fs->lfs_bsize), lbn); 742 } else { 743 trace(TR_BREADMISS, pack(vp, fs->lfs_bsize), lbn); 744 curlwp->l_proc->p_stats->p_ru.ru_inblock++; /* pay for read */ 745 bp->b_flags |= B_READ; 746 if (bp->b_bcount > bp->b_bufsize) 747 panic("lfs_indirtrunc: bad buffer size"); 748 bp->b_blkno = fsbtodb(fs, dbn); 749 VOP_STRATEGY(vp, bp); 750 error = biowait(bp); 751 } 752 if (error) { 753 brelse(bp, 0); 754 *countp = *rcountp = 0; 755 return (error); 756 } 757 758 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */ 759 if (lastbn >= 0) { 760 copy = (int32_t *)lfs_malloc(fs, fs->lfs_bsize, LFS_NB_IBLOCK); 761 memcpy((void *)copy, (void *)bap, (u_int)fs->lfs_bsize); 762 memset((void *)&bap[last + 1], 0, 763 /* XXX ondisk32 */ 764 (u_int)(NINDIR(fs) - (last + 1)) * sizeof (int32_t)); 765 error = VOP_BWRITE(bp); 766 if (error) 767 allerror = error; 768 bap = copy; 769 } 770 771 /* 772 * Recursively free totally unused blocks. 773 */ 774 for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last; 775 i--, nlbn += factor) { 776 nb = bap[i]; 777 if (nb == 0) 778 continue; 779 if (level > SINGLE) { 780 error = lfs_indirtrunc(ip, nlbn, nb, 781 (daddr_t)-1, level - 1, 782 &blkcount, &rblkcount, 783 lastsegp, bcp); 784 if (error) 785 allerror = error; 786 blocksreleased += blkcount; 787 real_released += rblkcount; 788 } 789 lfs_blkfree(fs, ip, nb, fs->lfs_bsize, lastsegp, bcp); 790 if (bap[i] > 0) 791 real_released += nblocks; 792 blocksreleased += nblocks; 793 } 794 795 /* 796 * Recursively free last partial block. 797 */ 798 if (level > SINGLE && lastbn >= 0) { 799 last = lastbn % factor; 800 nb = bap[i]; 801 if (nb != 0) { 802 error = lfs_indirtrunc(ip, nlbn, nb, 803 last, level - 1, &blkcount, 804 &rblkcount, lastsegp, bcp); 805 if (error) 806 allerror = error; 807 real_released += rblkcount; 808 blocksreleased += blkcount; 809 } 810 } 811 812 if (copy != NULL) { 813 lfs_free(fs, copy, LFS_NB_IBLOCK); 814 } else { 815 if (bp->b_flags & B_DELWRI) { 816 LFS_UNLOCK_BUF(bp); 817 fs->lfs_avail += btofsb(fs, bp->b_bcount); 818 wakeup(&fs->lfs_avail); 819 } 820 brelse(bp, BC_INVAL); 821 } 822 823 *countp = blocksreleased; 824 *rcountp = real_released; 825 return (allerror); 826 } 827 828 /* 829 * Destroy any in core blocks past the truncation length. 830 * Inlined from vtruncbuf, so that lfs_avail could be updated. 831 * We take the seglock to prevent cleaning from occurring while we are 832 * invalidating blocks. 833 */ 834 static int 835 lfs_vtruncbuf(struct vnode *vp, daddr_t lbn, int slpflag, int slptimeo) 836 { 837 struct buf *bp, *nbp; 838 int s, error; 839 struct lfs *fs; 840 voff_t off; 841 842 off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift); 843 simple_lock(&vp->v_interlock); 844 error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO); 845 if (error) 846 return error; 847 848 fs = VTOI(vp)->i_lfs; 849 s = splbio(); 850 851 ASSERT_SEGLOCK(fs); 852 restart: 853 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 854 nbp = LIST_NEXT(bp, b_vnbufs); 855 if (bp->b_lblkno < lbn) 856 continue; 857 simple_lock(&bp->b_interlock); 858 if (bp->b_flags & B_BUSY) { 859 bp->b_flags |= B_WANTED; 860 error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK, 861 "lfs_vtruncbuf", slptimeo, &bp->b_interlock); 862 if (error) { 863 splx(s); 864 return (error); 865 } 866 goto restart; 867 } 868 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 869 if (bp->b_flags & B_DELWRI) { 870 bp->b_flags &= ~B_DELWRI; 871 fs->lfs_avail += btofsb(fs, bp->b_bcount); 872 wakeup(&fs->lfs_avail); 873 } 874 LFS_UNLOCK_BUF(bp); 875 simple_unlock(&bp->b_interlock); 876 brelse(bp, 0); 877 } 878 879 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 880 nbp = LIST_NEXT(bp, b_vnbufs); 881 if (bp->b_lblkno < lbn) 882 continue; 883 simple_lock(&bp->b_interlock); 884 if (bp->b_flags & B_BUSY) { 885 bp->b_flags |= B_WANTED; 886 error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK, 887 "lfs_vtruncbuf", slptimeo, &bp->b_interlock); 888 if (error) { 889 splx(s); 890 return (error); 891 } 892 goto restart; 893 } 894 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 895 if (bp->b_flags & B_DELWRI) { 896 bp->b_flags &= ~B_DELWRI; 897 fs->lfs_avail += btofsb(fs, bp->b_bcount); 898 wakeup(&fs->lfs_avail); 899 } 900 LFS_UNLOCK_BUF(bp); 901 simple_unlock(&bp->b_interlock); 902 brelse(bp, 0); 903 } 904 905 splx(s); 906 907 return (0); 908 } 909 910