1 /* $NetBSD: lfs_inode.c,v 1.88 2004/08/15 19:01:16 mycroft Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Konrad E. Schroder <perseant@hhhh.org>. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 /* 39 * Copyright (c) 1986, 1989, 1991, 1993 40 * The Regents of the University of California. All rights reserved. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)lfs_inode.c 8.9 (Berkeley) 5/8/95 67 */ 68 69 #include <sys/cdefs.h> 70 __KERNEL_RCSID(0, "$NetBSD: lfs_inode.c,v 1.88 2004/08/15 19:01:16 mycroft Exp $"); 71 72 #if defined(_KERNEL_OPT) 73 #include "opt_quota.h" 74 #endif 75 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/mount.h> 79 #include <sys/proc.h> 80 #include <sys/file.h> 81 #include <sys/buf.h> 82 #include <sys/vnode.h> 83 #include <sys/kernel.h> 84 #include <sys/malloc.h> 85 #include <sys/trace.h> 86 #include <sys/resourcevar.h> 87 88 #include <ufs/ufs/quota.h> 89 #include <ufs/ufs/inode.h> 90 #include <ufs/ufs/ufsmount.h> 91 #include <ufs/ufs/ufs_extern.h> 92 93 #include <ufs/lfs/lfs.h> 94 #include <ufs/lfs/lfs_extern.h> 95 96 static int lfs_update_seguse(struct lfs *, long, size_t); 97 static int lfs_indirtrunc (struct inode *, daddr_t, daddr_t, 98 daddr_t, int, long *, long *, long *, size_t *, 99 struct proc *); 100 static int lfs_blkfree (struct lfs *, daddr_t, size_t, long *, size_t *); 101 static int lfs_vtruncbuf(struct vnode *, daddr_t, int, int); 102 103 /* Search a block for a specific dinode. */ 104 struct ufs1_dinode * 105 lfs_ifind(struct lfs *fs, ino_t ino, struct buf *bp) 106 { 107 struct ufs1_dinode *dip = (struct ufs1_dinode *)bp->b_data; 108 struct ufs1_dinode *ldip, *fin; 109 110 #ifdef LFS_IFILE_FRAG_ADDRESSING 111 if (fs->lfs_version == 1) 112 fin = dip + INOPB(fs); 113 else 114 fin = dip + INOPF(fs); 115 #else 116 fin = dip + INOPB(fs); 117 #endif 118 119 /* 120 * Read the inode block backwards, since later versions of the 121 * inode will supercede earlier ones. Though it is unlikely, it is 122 * possible that the same inode will appear in the same inode block. 123 */ 124 for (ldip = fin - 1; ldip >= dip; --ldip) 125 if (ldip->di_inumber == ino) 126 return (ldip); 127 128 printf("searched %d entries\n", (int)(fin - dip)); 129 printf("offset is 0x%x (seg %d)\n", fs->lfs_offset, 130 dtosn(fs, fs->lfs_offset)); 131 printf("block is 0x%llx (seg %lld)\n", 132 (unsigned long long)dbtofsb(fs, bp->b_blkno), 133 (long long)dtosn(fs, dbtofsb(fs, bp->b_blkno))); 134 135 return NULL; 136 } 137 138 int 139 lfs_update(void *v) 140 { 141 struct vop_update_args /* { 142 struct vnode *a_vp; 143 struct timespec *a_access; 144 struct timespec *a_modify; 145 int a_flags; 146 } */ *ap = v; 147 struct inode *ip; 148 struct vnode *vp = ap->a_vp; 149 struct timespec ts; 150 struct lfs *fs = VFSTOUFS(vp->v_mount)->um_lfs; 151 int s; 152 int flags; 153 154 if (vp->v_mount->mnt_flag & MNT_RDONLY) 155 return (0); 156 ip = VTOI(vp); 157 158 /* 159 * If we are called from vinvalbuf, and the file's blocks have 160 * already been scheduled for writing, but the writes have not 161 * yet completed, lfs_vflush will not be called, and vinvalbuf 162 * will cause a panic. So, we must wait until any pending write 163 * for our inode completes, if we are called with UPDATE_WAIT set. 164 */ 165 s = splbio(); 166 while ((ap->a_flags & (UPDATE_WAIT|UPDATE_DIROP)) == UPDATE_WAIT && 167 WRITEINPROG(vp)) { 168 #ifdef DEBUG_LFS 169 printf("lfs_update: sleeping on inode %d (in-progress)\n", 170 ip->i_number); 171 #endif 172 tsleep(vp, (PRIBIO+1), "lfs_update", 0); 173 } 174 splx(s); 175 TIMEVAL_TO_TIMESPEC(&time, &ts); 176 LFS_ITIMES(ip, 177 ap->a_access ? ap->a_access : &ts, 178 ap->a_modify ? ap->a_modify : &ts, &ts); 179 if (ap->a_flags & UPDATE_CLOSE) 180 flags = ip->i_flag & (IN_MODIFIED | IN_ACCESSED | IN_CLEANING); 181 else 182 flags = ip->i_flag & (IN_MODIFIED | IN_CLEANING); 183 if (flags == 0) 184 return (0); 185 186 /* If sync, push back the vnode and any dirty blocks it may have. */ 187 if ((ap->a_flags & (UPDATE_WAIT|UPDATE_DIROP)) == UPDATE_WAIT) { 188 /* Avoid flushing VDIROP. */ 189 ++fs->lfs_diropwait; 190 while (vp->v_flag & VDIROP) { 191 #ifdef DEBUG_LFS 192 printf("lfs_update: sleeping on inode %d (dirops)\n", 193 ip->i_number); 194 printf("lfs_update: vflags 0x%x, iflags 0x%x\n", 195 vp->v_flag, ip->i_flag); 196 #endif 197 if (fs->lfs_dirops == 0) 198 lfs_flush_fs(fs, SEGM_SYNC); 199 else 200 tsleep(&fs->lfs_writer, PRIBIO+1, "lfs_fsync", 201 0); 202 /* XXX KS - by falling out here, are we writing the vn 203 twice? */ 204 } 205 --fs->lfs_diropwait; 206 return lfs_vflush(vp); 207 } 208 return 0; 209 } 210 211 #define SINGLE 0 /* index of single indirect block */ 212 #define DOUBLE 1 /* index of double indirect block */ 213 #define TRIPLE 2 /* index of triple indirect block */ 214 /* 215 * Truncate the inode oip to at most length size, freeing the 216 * disk blocks. 217 */ 218 /* VOP_BWRITE 1 + NIADDR + VOP_BALLOC == 2 + 2*NIADDR times */ 219 220 int 221 lfs_truncate(void *v) 222 { 223 struct vop_truncate_args /* { 224 struct vnode *a_vp; 225 off_t a_length; 226 int a_flags; 227 struct ucred *a_cred; 228 struct proc *a_p; 229 } */ *ap = v; 230 struct vnode *ovp = ap->a_vp; 231 struct genfs_node *gp = VTOG(ovp); 232 daddr_t lastblock; 233 struct inode *oip = VTOI(ovp); 234 daddr_t bn, lbn, lastiblock[NIADDR], indir_lbn[NIADDR]; 235 /* XXX ondisk32 */ 236 int32_t newblks[NDADDR + NIADDR]; 237 off_t length = ap->a_length; 238 struct lfs *fs; 239 struct buf *bp; 240 int offset, size, level; 241 long count, rcount, blocksreleased = 0, real_released = 0; 242 int i, ioflag, nblocks; 243 int aflags, error, allerror = 0; 244 off_t osize; 245 long lastseg; 246 size_t bc; 247 int obufsize, odb; 248 int usepc; 249 struct ufsmount *ump = oip->i_ump; 250 251 if (length < 0) 252 return (EINVAL); 253 254 /* 255 * Just return and not update modification times. 256 */ 257 if (oip->i_size == length) 258 return (0); 259 260 if (ovp->v_type == VLNK && 261 (oip->i_size < ump->um_maxsymlinklen || 262 (ump->um_maxsymlinklen == 0 && 263 oip->i_ffs1_blocks == 0))) { 264 #ifdef DIAGNOSTIC 265 if (length != 0) 266 panic("lfs_truncate: partial truncate of symlink"); 267 #endif 268 memset((char *)SHORTLINK(oip), 0, (u_int)oip->i_size); 269 oip->i_size = oip->i_ffs1_size = 0; 270 oip->i_flag |= IN_CHANGE | IN_UPDATE; 271 return (VOP_UPDATE(ovp, NULL, NULL, 0)); 272 } 273 if (oip->i_size == length) { 274 oip->i_flag |= IN_CHANGE | IN_UPDATE; 275 return (VOP_UPDATE(ovp, NULL, NULL, 0)); 276 } 277 #ifdef QUOTA 278 if ((error = getinoquota(oip)) != 0) 279 return (error); 280 #endif 281 fs = oip->i_lfs; 282 lfs_imtime(fs); 283 osize = oip->i_size; 284 ioflag = ap->a_flags; 285 usepc = (ovp->v_type == VREG && ovp != fs->lfs_ivnode); 286 287 /* 288 * Lengthen the size of the file. We must ensure that the 289 * last byte of the file is allocated. Since the smallest 290 * value of osize is 0, length will be at least 1. 291 */ 292 if (osize < length) { 293 if (length > ump->um_maxfilesize) 294 return (EFBIG); 295 aflags = B_CLRBUF; 296 if (ioflag & IO_SYNC) 297 aflags |= B_SYNC; 298 if (usepc) { 299 if (lblkno(fs, osize) < NDADDR && 300 lblkno(fs, osize) != lblkno(fs, length) && 301 blkroundup(fs, osize) != osize) { 302 off_t eob; 303 304 eob = blkroundup(fs, osize); 305 error = ufs_balloc_range(ovp, osize, 306 eob - osize, ap->a_cred, aflags); 307 if (error) 308 return error; 309 if (ioflag & IO_SYNC) { 310 ovp->v_size = eob; 311 simple_lock(&ovp->v_interlock); 312 VOP_PUTPAGES(ovp, 313 trunc_page(osize & fs->lfs_bmask), 314 round_page(eob), 315 PGO_CLEANIT | PGO_SYNCIO); 316 } 317 } 318 error = ufs_balloc_range(ovp, length - 1, 1, ap->a_cred, 319 aflags); 320 if (error) { 321 (void) VOP_TRUNCATE(ovp, osize, 322 ioflag & IO_SYNC, 323 ap->a_cred, ap->a_p); 324 return error; 325 } 326 uvm_vnp_setsize(ovp, length); 327 oip->i_flag |= IN_CHANGE | IN_UPDATE; 328 KASSERT(ovp->v_size == oip->i_size); 329 return (VOP_UPDATE(ovp, NULL, NULL, 0)); 330 } else { 331 error = lfs_reserve(fs, ovp, NULL, 332 btofsb(fs, (NIADDR + 2) << fs->lfs_bshift)); 333 if (error) 334 return (error); 335 error = VOP_BALLOC(ovp, length - 1, 1, ap->a_cred, 336 aflags, &bp); 337 lfs_reserve(fs, ovp, NULL, 338 -btofsb(fs, (NIADDR + 2) << fs->lfs_bshift)); 339 if (error) 340 return (error); 341 oip->i_ffs1_size = oip->i_size = length; 342 uvm_vnp_setsize(ovp, length); 343 (void) VOP_BWRITE(bp); 344 oip->i_flag |= IN_CHANGE | IN_UPDATE; 345 return (VOP_UPDATE(ovp, NULL, NULL, 0)); 346 } 347 } 348 349 if ((error = lfs_reserve(fs, ovp, NULL, 350 btofsb(fs, (2 * NIADDR + 3) << fs->lfs_bshift))) != 0) 351 return (error); 352 353 /* 354 * Shorten the size of the file. If the file is not being 355 * truncated to a block boundary, the contents of the 356 * partial block following the end of the file must be 357 * zero'ed in case it ever becomes accessible again because 358 * of subsequent file growth. Directories however are not 359 * zero'ed as they should grow back initialized to empty. 360 */ 361 offset = blkoff(fs, length); 362 lastseg = -1; 363 bc = 0; 364 365 lfs_seglock(fs, SEGM_PROT); 366 if (offset == 0) { 367 oip->i_size = oip->i_ffs1_size = length; 368 } else if (!usepc) { 369 lbn = lblkno(fs, length); 370 aflags = B_CLRBUF; 371 if (ioflag & IO_SYNC) 372 aflags |= B_SYNC; 373 error = VOP_BALLOC(ovp, length - 1, 1, ap->a_cred, aflags, &bp); 374 if (error) { 375 lfs_reserve(fs, ovp, NULL, 376 -btofsb(fs, (2 * NIADDR + 3) << fs->lfs_bshift)); 377 goto errout; 378 } 379 obufsize = bp->b_bufsize; 380 odb = btofsb(fs, bp->b_bcount); 381 oip->i_size = oip->i_ffs1_size = length; 382 size = blksize(fs, oip, lbn); 383 if (ovp->v_type != VDIR) 384 memset((char *)bp->b_data + offset, 0, 385 (u_int)(size - offset)); 386 allocbuf(bp, size, 1); 387 if ((bp->b_flags & (B_LOCKED | B_CALL)) == B_LOCKED) 388 locked_queue_bytes -= obufsize - bp->b_bufsize; 389 if (bp->b_flags & B_DELWRI) 390 fs->lfs_avail += odb - btofsb(fs, size); 391 (void) VOP_BWRITE(bp); 392 } else { /* vp->v_type == VREG && length < osize && offset != 0 */ 393 /* 394 * When truncating a regular file down to a non-block-aligned 395 * size, we must zero the part of last block which is past 396 * the new EOF. We must synchronously flush the zeroed pages 397 * to disk since the new pages will be invalidated as soon 398 * as we inform the VM system of the new, smaller size. 399 * We must do this before acquiring the GLOCK, since fetching 400 * the pages will acquire the GLOCK internally. 401 * So there is a window where another thread could see a whole 402 * zeroed page past EOF, but that's life. 403 */ 404 daddr_t lbn; 405 voff_t eoz; 406 407 aflags = ioflag & IO_SYNC ? B_SYNC : 0; 408 error = ufs_balloc_range(ovp, length - 1, 1, ap->a_cred, 409 aflags); 410 if (error) { 411 lfs_reserve(fs, ovp, NULL, 412 -btofsb(fs, (2 * NIADDR + 3) << fs->lfs_bshift)); 413 goto errout; 414 } 415 lbn = lblkno(fs, length); 416 size = blksize(fs, oip, lbn); 417 eoz = MIN(lblktosize(fs, lbn) + size, osize); 418 uvm_vnp_zerorange(ovp, length, eoz - length); 419 if (round_page(eoz) > round_page(length)) { 420 simple_lock(&ovp->v_interlock); 421 error = VOP_PUTPAGES(ovp, round_page(length), 422 round_page(eoz), 423 PGO_CLEANIT | PGO_DEACTIVATE | 424 ((ioflag & IO_SYNC) ? PGO_SYNCIO : 0)); 425 if (error) { 426 lfs_reserve(fs, ovp, NULL, 427 -btofsb(fs, (2 * NIADDR + 3) << fs->lfs_bshift)); 428 goto errout; 429 } 430 } 431 } 432 433 lockmgr(&gp->g_glock, LK_EXCLUSIVE, NULL); 434 435 oip->i_size = oip->i_ffs1_size = length; 436 uvm_vnp_setsize(ovp, length); 437 /* 438 * Calculate index into inode's block list of 439 * last direct and indirect blocks (if any) 440 * which we want to keep. Lastblock is -1 when 441 * the file is truncated to 0. 442 */ 443 lastblock = lblkno(fs, length + fs->lfs_bsize - 1) - 1; 444 lastiblock[SINGLE] = lastblock - NDADDR; 445 lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs); 446 lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs); 447 nblocks = btofsb(fs, fs->lfs_bsize); 448 /* 449 * Record changed file and block pointers before we start 450 * freeing blocks. lastiblock values are also normalized to -1 451 * for calls to lfs_indirtrunc below. 452 */ 453 memcpy((caddr_t)newblks, (caddr_t)&oip->i_ffs1_db[0], sizeof newblks); 454 for (level = TRIPLE; level >= SINGLE; level--) 455 if (lastiblock[level] < 0) { 456 newblks[NDADDR+level] = 0; 457 lastiblock[level] = -1; 458 } 459 for (i = NDADDR - 1; i > lastblock; i--) 460 newblks[i] = 0; 461 462 oip->i_size = oip->i_ffs1_size = osize; 463 error = lfs_vtruncbuf(ovp, lastblock + 1, 0, 0); 464 if (error && !allerror) 465 allerror = error; 466 467 /* 468 * Indirect blocks first. 469 */ 470 indir_lbn[SINGLE] = -NDADDR; 471 indir_lbn[DOUBLE] = indir_lbn[SINGLE] - NINDIR(fs) - 1; 472 indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - NINDIR(fs) * NINDIR(fs) - 1; 473 for (level = TRIPLE; level >= SINGLE; level--) { 474 bn = oip->i_ffs1_ib[level]; 475 if (bn != 0) { 476 error = lfs_indirtrunc(oip, indir_lbn[level], 477 bn, lastiblock[level], 478 level, &count, &rcount, 479 &lastseg, &bc, ap->a_p); 480 if (error) 481 allerror = error; 482 real_released += rcount; 483 blocksreleased += count; 484 if (lastiblock[level] < 0) { 485 if (oip->i_ffs1_ib[level] > 0) 486 real_released += nblocks; 487 blocksreleased += nblocks; 488 oip->i_ffs1_ib[level] = 0; 489 lfs_blkfree(fs, bn, fs->lfs_bsize, &lastseg, &bc); 490 } 491 } 492 if (lastiblock[level] >= 0) 493 goto done; 494 } 495 496 /* 497 * All whole direct blocks or frags. 498 */ 499 for (i = NDADDR - 1; i > lastblock; i--) { 500 long bsize, obsize; 501 502 bn = oip->i_ffs1_db[i]; 503 if (bn == 0) 504 continue; 505 bsize = blksize(fs, oip, i); 506 if (oip->i_ffs1_db[i] > 0) { 507 /* Check for fragment size changes */ 508 obsize = oip->i_lfs_fragsize[i]; 509 real_released += btofsb(fs, obsize); 510 oip->i_lfs_fragsize[i] = 0; 511 } else 512 obsize = 0; 513 blocksreleased += btofsb(fs, bsize); 514 oip->i_ffs1_db[i] = 0; 515 lfs_blkfree(fs, bn, obsize, &lastseg, &bc); 516 } 517 if (lastblock < 0) 518 goto done; 519 520 /* 521 * Finally, look for a change in size of the 522 * last direct block; release any frags. 523 */ 524 bn = oip->i_ffs1_db[lastblock]; 525 if (bn != 0) { 526 long oldspace, newspace; 527 #if 0 528 long olddspace; 529 #endif 530 531 /* 532 * Calculate amount of space we're giving 533 * back as old block size minus new block size. 534 */ 535 oldspace = blksize(fs, oip, lastblock); 536 #if 0 537 olddspace = oip->i_lfs_fragsize[lastblock]; 538 #endif 539 540 oip->i_size = oip->i_ffs1_size = length; 541 newspace = blksize(fs, oip, lastblock); 542 if (newspace == 0) 543 panic("itrunc: newspace"); 544 if (oldspace - newspace > 0) { 545 blocksreleased += btofsb(fs, oldspace - newspace); 546 } 547 #if 0 548 if (bn > 0 && olddspace - newspace > 0) { 549 /* No segment accounting here, just vnode */ 550 real_released += btofsb(fs, olddspace - newspace); 551 } 552 #endif 553 } 554 555 done: 556 /* Finish segment accounting corrections */ 557 lfs_update_seguse(fs, lastseg, bc); 558 #ifdef DIAGNOSTIC 559 for (level = SINGLE; level <= TRIPLE; level++) 560 if ((newblks[NDADDR + level] == 0) != 561 (oip->i_ffs1_ib[level]) == 0) { 562 panic("lfs itrunc1"); 563 } 564 for (i = 0; i < NDADDR; i++) 565 if ((newblks[i] == 0) != (oip->i_ffs1_db[i] == 0)) { 566 panic("lfs itrunc2"); 567 } 568 if (length == 0 && 569 (!LIST_EMPTY(&ovp->v_cleanblkhd) || !LIST_EMPTY(&ovp->v_dirtyblkhd))) 570 panic("lfs itrunc3"); 571 #endif /* DIAGNOSTIC */ 572 /* 573 * Put back the real size. 574 */ 575 oip->i_size = oip->i_ffs1_size = length; 576 oip->i_lfs_effnblks -= blocksreleased; 577 oip->i_ffs1_blocks -= real_released; 578 fs->lfs_bfree += blocksreleased; 579 #ifdef DIAGNOSTIC 580 if (oip->i_size == 0 && 581 (oip->i_ffs1_blocks != 0 || oip->i_lfs_effnblks != 0)) { 582 printf("lfs_truncate: truncate to 0 but %d blks/%d effblks\n", 583 oip->i_ffs1_blocks, oip->i_lfs_effnblks); 584 panic("lfs_truncate: persistent blocks"); 585 } 586 #endif 587 oip->i_flag |= IN_CHANGE; 588 #ifdef QUOTA 589 (void) chkdq(oip, -blocksreleased, NOCRED, 0); 590 #endif 591 lfs_reserve(fs, ovp, NULL, 592 -btofsb(fs, (2 * NIADDR + 3) << fs->lfs_bshift)); 593 lockmgr(&gp->g_glock, LK_RELEASE, NULL); 594 errout: 595 lfs_segunlock(fs); 596 return (allerror ? allerror : error); 597 } 598 599 /* Update segment usage information when removing a block. */ 600 static int 601 lfs_blkfree(struct lfs *fs, daddr_t daddr, size_t bsize, long *lastseg, 602 size_t *num) 603 { 604 long seg; 605 int error = 0; 606 607 bsize = fragroundup(fs, bsize); 608 if (daddr > 0) { 609 if (*lastseg != (seg = dtosn(fs, daddr))) { 610 error = lfs_update_seguse(fs, *lastseg, *num); 611 *num = bsize; 612 *lastseg = seg; 613 } else 614 *num += bsize; 615 } 616 return error; 617 } 618 619 /* Finish the accounting updates for a segment. */ 620 static int 621 lfs_update_seguse(struct lfs *fs, long lastseg, size_t num) 622 { 623 SEGUSE *sup; 624 struct buf *bp; 625 626 if (lastseg < 0 || num == 0) 627 return 0; 628 629 LFS_SEGENTRY(sup, fs, lastseg, bp); 630 if (num > sup->su_nbytes) { 631 printf("lfs_truncate: segment %ld short by %ld\n", 632 lastseg, (long)num - sup->su_nbytes); 633 panic("lfs_truncate: negative bytes"); 634 sup->su_nbytes = num; 635 } 636 sup->su_nbytes -= num; 637 LFS_WRITESEGENTRY(sup, fs, lastseg, bp); 638 639 return 0; 640 } 641 642 /* 643 * Release blocks associated with the inode ip and stored in the indirect 644 * block bn. Blocks are free'd in LIFO order up to (but not including) 645 * lastbn. If level is greater than SINGLE, the block is an indirect block 646 * and recursive calls to indirtrunc must be used to cleanse other indirect 647 * blocks. 648 * 649 * NB: triple indirect blocks are untested. 650 */ 651 static int 652 lfs_indirtrunc(struct inode *ip, daddr_t lbn, daddr_t dbn, 653 daddr_t lastbn, int level, long *countp, 654 long *rcountp, long *lastsegp, size_t *bcp, struct proc *p) 655 { 656 int i; 657 struct buf *bp; 658 struct lfs *fs = ip->i_lfs; 659 int32_t *bap; /* XXX ondisk32 */ 660 struct vnode *vp; 661 daddr_t nb, nlbn, last; 662 int32_t *copy = NULL; /* XXX ondisk32 */ 663 long blkcount, rblkcount, factor; 664 int nblocks, blocksreleased = 0, real_released = 0; 665 int error = 0, allerror = 0; 666 667 /* 668 * Calculate index in current block of last 669 * block to be kept. -1 indicates the entire 670 * block so we need not calculate the index. 671 */ 672 factor = 1; 673 for (i = SINGLE; i < level; i++) 674 factor *= NINDIR(fs); 675 last = lastbn; 676 if (lastbn > 0) 677 last /= factor; 678 nblocks = btofsb(fs, fs->lfs_bsize); 679 /* 680 * Get buffer of block pointers, zero those entries corresponding 681 * to blocks to be free'd, and update on disk copy first. Since 682 * double(triple) indirect before single(double) indirect, calls 683 * to bmap on these blocks will fail. However, we already have 684 * the on disk address, so we have to set the b_blkno field 685 * explicitly instead of letting bread do everything for us. 686 */ 687 vp = ITOV(ip); 688 bp = getblk(vp, lbn, (int)fs->lfs_bsize, 0, 0); 689 if (bp->b_flags & (B_DONE | B_DELWRI)) { 690 /* Braces must be here in case trace evaluates to nothing. */ 691 trace(TR_BREADHIT, pack(vp, fs->lfs_bsize), lbn); 692 } else { 693 trace(TR_BREADMISS, pack(vp, fs->lfs_bsize), lbn); 694 p->p_stats->p_ru.ru_inblock++; /* pay for read */ 695 bp->b_flags |= B_READ; 696 if (bp->b_bcount > bp->b_bufsize) 697 panic("lfs_indirtrunc: bad buffer size"); 698 bp->b_blkno = fsbtodb(fs, dbn); 699 VOP_STRATEGY(vp, bp); 700 error = biowait(bp); 701 } 702 if (error) { 703 brelse(bp); 704 *countp = *rcountp = 0; 705 return (error); 706 } 707 708 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */ 709 if (lastbn >= 0) { 710 MALLOC(copy, int32_t *, fs->lfs_bsize, M_TEMP, M_WAITOK); 711 memcpy((caddr_t)copy, (caddr_t)bap, (u_int)fs->lfs_bsize); 712 memset((caddr_t)&bap[last + 1], 0, 713 /* XXX ondisk32 */ 714 (u_int)(NINDIR(fs) - (last + 1)) * sizeof (int32_t)); 715 error = VOP_BWRITE(bp); 716 if (error) 717 allerror = error; 718 bap = copy; 719 } 720 721 /* 722 * Recursively free totally unused blocks. 723 */ 724 for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last; 725 i--, nlbn += factor) { 726 nb = bap[i]; 727 if (nb == 0) 728 continue; 729 if (level > SINGLE) { 730 error = lfs_indirtrunc(ip, nlbn, nb, 731 (daddr_t)-1, level - 1, 732 &blkcount, &rblkcount, 733 lastsegp, bcp, p); 734 if (error) 735 allerror = error; 736 blocksreleased += blkcount; 737 real_released += rblkcount; 738 } 739 lfs_blkfree(fs, nb, fs->lfs_bsize, lastsegp, bcp); 740 if (bap[i] > 0) 741 real_released += nblocks; 742 blocksreleased += nblocks; 743 } 744 745 /* 746 * Recursively free last partial block. 747 */ 748 if (level > SINGLE && lastbn >= 0) { 749 last = lastbn % factor; 750 nb = bap[i]; 751 if (nb != 0) { 752 error = lfs_indirtrunc(ip, nlbn, nb, 753 last, level - 1, &blkcount, 754 &rblkcount, lastsegp, bcp, p); 755 if (error) 756 allerror = error; 757 real_released += rblkcount; 758 blocksreleased += blkcount; 759 } 760 } 761 762 if (copy != NULL) { 763 FREE(copy, M_TEMP); 764 } else { 765 if (bp->b_flags & B_DELWRI) { 766 LFS_UNLOCK_BUF(bp); 767 fs->lfs_avail += btofsb(fs, bp->b_bcount); 768 wakeup(&fs->lfs_avail); 769 } 770 bp->b_flags |= B_INVAL; 771 brelse(bp); 772 } 773 774 *countp = blocksreleased; 775 *rcountp = real_released; 776 return (allerror); 777 } 778 779 /* 780 * Destroy any in core blocks past the truncation length. 781 * Inlined from vtruncbuf, so that lfs_avail could be updated. 782 * We take the seglock to prevent cleaning from occurring while we are 783 * invalidating blocks. 784 */ 785 static int 786 lfs_vtruncbuf(struct vnode *vp, daddr_t lbn, int slpflag, int slptimeo) 787 { 788 struct buf *bp, *nbp; 789 int s, error; 790 struct lfs *fs; 791 voff_t off; 792 793 off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift); 794 simple_lock(&vp->v_interlock); 795 error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO); 796 if (error) 797 return error; 798 799 fs = VTOI(vp)->i_lfs; 800 s = splbio(); 801 802 restart: 803 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 804 nbp = LIST_NEXT(bp, b_vnbufs); 805 if (bp->b_lblkno < lbn) 806 continue; 807 simple_lock(&bp->b_interlock); 808 if (bp->b_flags & B_BUSY) { 809 bp->b_flags |= B_WANTED; 810 error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK, 811 "lfs_vtruncbuf", slptimeo, &bp->b_interlock); 812 if (error) { 813 splx(s); 814 return (error); 815 } 816 goto restart; 817 } 818 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 819 if (bp->b_flags & B_DELWRI) { 820 bp->b_flags &= ~B_DELWRI; 821 fs->lfs_avail += btofsb(fs, bp->b_bcount); 822 wakeup(&fs->lfs_avail); 823 } 824 LFS_UNLOCK_BUF(bp); 825 simple_unlock(&bp->b_interlock); 826 brelse(bp); 827 } 828 829 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 830 nbp = LIST_NEXT(bp, b_vnbufs); 831 if (bp->b_lblkno < lbn) 832 continue; 833 simple_lock(&bp->b_interlock); 834 if (bp->b_flags & B_BUSY) { 835 bp->b_flags |= B_WANTED; 836 error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK, 837 "lfs_vtruncbuf", slptimeo, &bp->b_interlock); 838 if (error) { 839 splx(s); 840 return (error); 841 } 842 goto restart; 843 } 844 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 845 if (bp->b_flags & B_DELWRI) { 846 bp->b_flags &= ~B_DELWRI; 847 fs->lfs_avail += btofsb(fs, bp->b_bcount); 848 wakeup(&fs->lfs_avail); 849 } 850 LFS_UNLOCK_BUF(bp); 851 simple_unlock(&bp->b_interlock); 852 brelse(bp); 853 } 854 855 splx(s); 856 857 return (0); 858 } 859 860