1 /* $OpenBSD: ffs_inode.c,v 1.76 2016/02/27 18:50:38 natano Exp $ */ 2 /* $NetBSD: ffs_inode.c,v 1.10 1996/05/11 18:27:19 mycroft Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)ffs_inode.c 8.8 (Berkeley) 10/19/94 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/mount.h> 38 #include <sys/proc.h> 39 #include <sys/buf.h> 40 #include <sys/vnode.h> 41 #include <sys/kernel.h> 42 #include <sys/malloc.h> 43 #include <sys/resourcevar.h> 44 45 #include <ufs/ufs/quota.h> 46 #include <ufs/ufs/inode.h> 47 #include <ufs/ufs/ufsmount.h> 48 #include <ufs/ufs/ufs_extern.h> 49 50 #include <ufs/ffs/fs.h> 51 #include <ufs/ffs/ffs_extern.h> 52 53 int ffs_indirtrunc(struct inode *, daddr_t, daddr_t, daddr_t, int, long *); 54 55 /* 56 * Update the access, modified, and inode change times as specified by the 57 * IN_ACCESS, IN_UPDATE, and IN_CHANGE flags respectively. The IN_MODIFIED 58 * flag is used to specify that the inode needs to be updated but that the 59 * times have already been set. The IN_LAZYMOD flag is used to specify 60 * that the inode needs to be updated at some point, by reclaim if not 61 * in the course of other changes; this is used to defer writes just to 62 * update device timestamps. If waitfor is set, then wait for the disk 63 * write of the inode to complete. 64 */ 65 int 66 ffs_update(struct inode *ip, int waitfor) 67 { 68 struct vnode *vp; 69 struct fs *fs; 70 struct buf *bp; 71 int error; 72 73 vp = ITOV(ip); 74 ufs_itimes(vp); 75 76 if ((ip->i_flag & IN_MODIFIED) == 0 && waitfor == 0) 77 return (0); 78 79 ip->i_flag &= ~(IN_MODIFIED | IN_LAZYMOD); 80 fs = ip->i_fs; 81 82 /* 83 * Ensure that uid and gid are correct. This is a temporary 84 * fix until fsck has been changed to do the update. 85 */ 86 if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_inodefmt < FS_44INODEFMT) { 87 ip->i_din1->di_ouid = ip->i_ffs1_uid; 88 ip->i_din1->di_ogid = ip->i_ffs1_gid; 89 } 90 91 error = bread(ip->i_devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), 92 (int)fs->fs_bsize, &bp); 93 if (error) { 94 brelse(bp); 95 return (error); 96 } 97 98 if (DOINGSOFTDEP(vp)) 99 softdep_update_inodeblock(ip, bp, waitfor); 100 else if (ip->i_effnlink != DIP(ip, nlink)) 101 panic("ffs_update: bad link cnt"); 102 103 #ifdef FFS2 104 if (ip->i_ump->um_fstype == UM_UFS2) 105 *((struct ufs2_dinode *)bp->b_data + 106 ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2; 107 else 108 #endif 109 *((struct ufs1_dinode *)bp->b_data + 110 ino_to_fsbo(fs, ip->i_number)) = *ip->i_din1; 111 112 if (waitfor && !DOINGASYNC(vp)) { 113 return (bwrite(bp)); 114 } else { 115 bdwrite(bp); 116 return (0); 117 } 118 } 119 120 #define SINGLE 0 /* index of single indirect block */ 121 #define DOUBLE 1 /* index of double indirect block */ 122 #define TRIPLE 2 /* index of triple indirect block */ 123 124 /* 125 * Truncate the inode oip to at most length size, freeing the 126 * disk blocks. 127 */ 128 int 129 ffs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred) 130 { 131 struct vnode *ovp; 132 daddr_t lastblock; 133 daddr_t bn, lbn, lastiblock[NIADDR], indir_lbn[NIADDR]; 134 daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR]; 135 struct fs *fs; 136 struct buf *bp; 137 int offset, size, level; 138 long count, nblocks, vflags, blocksreleased = 0; 139 int i, aflags, error, allerror; 140 off_t osize; 141 142 if (length < 0) 143 return (EINVAL); 144 ovp = ITOV(oip); 145 146 if (ovp->v_type != VREG && 147 ovp->v_type != VDIR && 148 ovp->v_type != VLNK) 149 return (0); 150 151 if (DIP(oip, size) == length) 152 return (0); 153 154 if (ovp->v_type == VLNK && 155 (DIP(oip, size) < oip->i_ump->um_maxsymlinklen || 156 (oip->i_ump->um_maxsymlinklen == 0 && 157 oip->i_din1->di_blocks == 0))) { 158 #ifdef DIAGNOSTIC 159 if (length != 0) 160 panic("ffs_truncate: partial truncate of symlink"); 161 #endif 162 memset(SHORTLINK(oip), 0, (size_t) DIP(oip, size)); 163 DIP_ASSIGN(oip, size, 0); 164 oip->i_flag |= IN_CHANGE | IN_UPDATE; 165 return (UFS_UPDATE(oip, 1)); 166 } 167 168 if ((error = getinoquota(oip)) != 0) 169 return (error); 170 171 uvm_vnp_setsize(ovp, length); 172 oip->i_ci.ci_lasta = oip->i_ci.ci_clen 173 = oip->i_ci.ci_cstart = oip->i_ci.ci_lastw = 0; 174 175 if (DOINGSOFTDEP(ovp)) { 176 if (length > 0 || softdep_slowdown(ovp)) { 177 /* 178 * If a file is only partially truncated, then 179 * we have to clean up the data structures 180 * describing the allocation past the truncation 181 * point. Finding and deallocating those structures 182 * is a lot of work. Since partial truncation occurs 183 * rarely, we solve the problem by syncing the file 184 * so that it will have no data structures left. 185 */ 186 if ((error = VOP_FSYNC(ovp, cred, MNT_WAIT, 187 curproc)) != 0) 188 return (error); 189 } else { 190 (void)ufs_quota_free_blocks(oip, DIP(oip, blocks), 191 NOCRED); 192 softdep_setup_freeblocks(oip, length); 193 (void) vinvalbuf(ovp, 0, cred, curproc, 0, 0); 194 oip->i_flag |= IN_CHANGE | IN_UPDATE; 195 return (UFS_UPDATE(oip, 0)); 196 } 197 } 198 199 fs = oip->i_fs; 200 osize = DIP(oip, size); 201 /* 202 * Lengthen the size of the file. We must ensure that the 203 * last byte of the file is allocated. Since the smallest 204 * value of osize is 0, length will be at least 1. 205 */ 206 if (osize < length) { 207 if (length > fs->fs_maxfilesize) 208 return (EFBIG); 209 aflags = B_CLRBUF; 210 if (flags & IO_SYNC) 211 aflags |= B_SYNC; 212 error = UFS_BUF_ALLOC(oip, length - 1, 1, 213 cred, aflags, &bp); 214 if (error) 215 return (error); 216 DIP_ASSIGN(oip, size, length); 217 uvm_vnp_setsize(ovp, length); 218 (void) uvm_vnp_uncache(ovp); 219 if (aflags & B_SYNC) 220 bwrite(bp); 221 else 222 bawrite(bp); 223 oip->i_flag |= IN_CHANGE | IN_UPDATE; 224 return (UFS_UPDATE(oip, 1)); 225 } 226 uvm_vnp_setsize(ovp, length); 227 228 /* 229 * Shorten the size of the file. If the file is not being 230 * truncated to a block boundary, the contents of the 231 * partial block following the end of the file must be 232 * zero'ed in case it ever becomes accessible again because 233 * of subsequent file growth. Directories however are not 234 * zero'ed as they should grow back initialized to empty. 235 */ 236 offset = blkoff(fs, length); 237 if (offset == 0) { 238 DIP_ASSIGN(oip, size, length); 239 } else { 240 lbn = lblkno(fs, length); 241 aflags = B_CLRBUF; 242 if (flags & IO_SYNC) 243 aflags |= B_SYNC; 244 error = UFS_BUF_ALLOC(oip, length - 1, 1, 245 cred, aflags, &bp); 246 if (error) 247 return (error); 248 /* 249 * When we are doing soft updates and the UFS_BALLOC 250 * above fills in a direct block hole with a full sized 251 * block that will be truncated down to a fragment below, 252 * we must flush out the block dependency with an FSYNC 253 * so that we do not get a soft updates inconsistency 254 * when we create the fragment below. 255 */ 256 if (DOINGSOFTDEP(ovp) && lbn < NDADDR && 257 fragroundup(fs, blkoff(fs, length)) < fs->fs_bsize && 258 (error = VOP_FSYNC(ovp, cred, MNT_WAIT, curproc)) != 0) 259 return (error); 260 DIP_ASSIGN(oip, size, length); 261 size = blksize(fs, oip, lbn); 262 (void) uvm_vnp_uncache(ovp); 263 if (ovp->v_type != VDIR) 264 memset(bp->b_data + offset, 0, size - offset); 265 buf_adjcnt(bp, size); 266 if (aflags & B_SYNC) 267 bwrite(bp); 268 else 269 bawrite(bp); 270 } 271 /* 272 * Calculate index into inode's block list of 273 * last direct and indirect blocks (if any) 274 * which we want to keep. Lastblock is -1 when 275 * the file is truncated to 0. 276 */ 277 lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1; 278 lastiblock[SINGLE] = lastblock - NDADDR; 279 lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs); 280 lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs); 281 nblocks = btodb(fs->fs_bsize); 282 283 /* 284 * Update file and block pointers on disk before we start freeing 285 * blocks. If we crash before free'ing blocks below, the blocks 286 * will be returned to the free list. lastiblock values are also 287 * normalized to -1 for calls to ffs_indirtrunc below. 288 */ 289 for (level = TRIPLE; level >= SINGLE; level--) { 290 oldblks[NDADDR + level] = DIP(oip, ib[level]); 291 if (lastiblock[level] < 0) { 292 DIP_ASSIGN(oip, ib[level], 0); 293 lastiblock[level] = -1; 294 } 295 } 296 297 for (i = 0; i < NDADDR; i++) { 298 oldblks[i] = DIP(oip, db[i]); 299 if (i > lastblock) 300 DIP_ASSIGN(oip, db[i], 0); 301 } 302 303 oip->i_flag |= IN_CHANGE | IN_UPDATE; 304 if ((error = UFS_UPDATE(oip, 1)) != 0) 305 allerror = error; 306 307 /* 308 * Having written the new inode to disk, save its new configuration 309 * and put back the old block pointers long enough to process them. 310 * Note that we save the new block configuration so we can check it 311 * when we are done. 312 */ 313 for (i = 0; i < NDADDR; i++) { 314 newblks[i] = DIP(oip, db[i]); 315 DIP_ASSIGN(oip, db[i], oldblks[i]); 316 } 317 318 for (i = 0; i < NIADDR; i++) { 319 newblks[NDADDR + i] = DIP(oip, ib[i]); 320 DIP_ASSIGN(oip, ib[i], oldblks[NDADDR + i]); 321 } 322 323 DIP_ASSIGN(oip, size, osize); 324 vflags = ((length > 0) ? V_SAVE : 0) | V_SAVEMETA; 325 allerror = vinvalbuf(ovp, vflags, cred, curproc, 0, 0); 326 327 /* 328 * Indirect blocks first. 329 */ 330 indir_lbn[SINGLE] = -NDADDR; 331 indir_lbn[DOUBLE] = indir_lbn[SINGLE] - NINDIR(fs) - 1; 332 indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - NINDIR(fs) * NINDIR(fs) - 1; 333 for (level = TRIPLE; level >= SINGLE; level--) { 334 bn = DIP(oip, ib[level]); 335 if (bn != 0) { 336 error = ffs_indirtrunc(oip, indir_lbn[level], 337 fsbtodb(fs, bn), lastiblock[level], level, &count); 338 if (error) 339 allerror = error; 340 blocksreleased += count; 341 if (lastiblock[level] < 0) { 342 DIP_ASSIGN(oip, ib[level], 0); 343 ffs_blkfree(oip, bn, fs->fs_bsize); 344 blocksreleased += nblocks; 345 } 346 } 347 if (lastiblock[level] >= 0) 348 goto done; 349 } 350 351 /* 352 * All whole direct blocks or frags. 353 */ 354 for (i = NDADDR - 1; i > lastblock; i--) { 355 long bsize; 356 357 bn = DIP(oip, db[i]); 358 if (bn == 0) 359 continue; 360 361 DIP_ASSIGN(oip, db[i], 0); 362 bsize = blksize(fs, oip, i); 363 ffs_blkfree(oip, bn, bsize); 364 blocksreleased += btodb(bsize); 365 } 366 if (lastblock < 0) 367 goto done; 368 369 /* 370 * Finally, look for a change in size of the 371 * last direct block; release any frags. 372 */ 373 bn = DIP(oip, db[lastblock]); 374 if (bn != 0) { 375 long oldspace, newspace; 376 377 /* 378 * Calculate amount of space we're giving 379 * back as old block size minus new block size. 380 */ 381 oldspace = blksize(fs, oip, lastblock); 382 DIP_ASSIGN(oip, size, length); 383 newspace = blksize(fs, oip, lastblock); 384 if (newspace == 0) 385 panic("ffs_truncate: newspace"); 386 if (oldspace - newspace > 0) { 387 /* 388 * Block number of space to be free'd is 389 * the old block # plus the number of frags 390 * required for the storage we're keeping. 391 */ 392 bn += numfrags(fs, newspace); 393 ffs_blkfree(oip, bn, oldspace - newspace); 394 blocksreleased += btodb(oldspace - newspace); 395 } 396 } 397 done: 398 #ifdef DIAGNOSTIC 399 for (level = SINGLE; level <= TRIPLE; level++) 400 if (newblks[NDADDR + level] != DIP(oip, ib[level])) 401 panic("ffs_truncate1"); 402 for (i = 0; i < NDADDR; i++) 403 if (newblks[i] != DIP(oip, db[i])) 404 panic("ffs_truncate2"); 405 #endif /* DIAGNOSTIC */ 406 /* 407 * Put back the real size. 408 */ 409 DIP_ASSIGN(oip, size, length); 410 if (DIP(oip, blocks) >= blocksreleased) 411 DIP_ADD(oip, blocks, -blocksreleased); 412 else /* sanity */ 413 DIP_ASSIGN(oip, blocks, 0); 414 oip->i_flag |= IN_CHANGE; 415 (void)ufs_quota_free_blocks(oip, blocksreleased, NOCRED); 416 return (allerror); 417 } 418 419 #ifdef FFS2 420 #define BAP(ip, i) (((ip)->i_ump->um_fstype == UM_UFS2) ? bap2[i] : bap1[i]) 421 #define BAP_ASSIGN(ip, i, value) \ 422 do { \ 423 if ((ip)->i_ump->um_fstype == UM_UFS2) \ 424 bap2[i] = (value); \ 425 else \ 426 bap1[i] = (value); \ 427 } while (0) 428 #else 429 #define BAP(ip, i) bap1[i] 430 #define BAP_ASSIGN(ip, i, value) do { bap1[i] = (value); } while (0) 431 #endif /* FFS2 */ 432 433 /* 434 * Release blocks associated with the inode ip and stored in the indirect 435 * block bn. Blocks are free'd in LIFO order up to (but not including) 436 * lastbn. If level is greater than SINGLE, the block is an indirect block 437 * and recursive calls to indirtrunc must be used to cleanse other indirect 438 * blocks. 439 * 440 * NB: triple indirect blocks are untested. 441 */ 442 int 443 ffs_indirtrunc(struct inode *ip, daddr_t lbn, daddr_t dbn, 444 daddr_t lastbn, int level, long *countp) 445 { 446 int i; 447 struct buf *bp; 448 struct fs *fs = ip->i_fs; 449 struct vnode *vp; 450 void *copy = NULL; 451 daddr_t nb, nlbn, last; 452 long blkcount, factor; 453 int nblocks, blocksreleased = 0; 454 int error = 0, allerror = 0; 455 int32_t *bap1 = NULL; 456 #ifdef FFS2 457 int64_t *bap2 = NULL; 458 #endif 459 460 /* 461 * Calculate index in current block of last 462 * block to be kept. -1 indicates the entire 463 * block so we need not calculate the index. 464 */ 465 factor = 1; 466 for (i = SINGLE; i < level; i++) 467 factor *= NINDIR(fs); 468 last = lastbn; 469 if (lastbn > 0) 470 last /= factor; 471 nblocks = btodb(fs->fs_bsize); 472 /* 473 * Get buffer of block pointers, zero those entries corresponding 474 * to blocks to be free'd, and update on disk copy first. Since 475 * double(triple) indirect before single(double) indirect, calls 476 * to bmap on these blocks will fail. However, we already have 477 * the on disk address, so we have to set the b_blkno field 478 * explicitly instead of letting bread do everything for us. 479 */ 480 vp = ITOV(ip); 481 bp = getblk(vp, lbn, (int)fs->fs_bsize, 0, 0); 482 if (!(bp->b_flags & (B_DONE | B_DELWRI))) { 483 curproc->p_ru.ru_inblock++; /* pay for read */ 484 bcstats.pendingreads++; 485 bcstats.numreads++; 486 bp->b_flags |= B_READ; 487 if (bp->b_bcount > bp->b_bufsize) 488 panic("ffs_indirtrunc: bad buffer size"); 489 bp->b_blkno = dbn; 490 VOP_STRATEGY(bp); 491 error = biowait(bp); 492 } 493 if (error) { 494 brelse(bp); 495 *countp = 0; 496 return (error); 497 } 498 499 #ifdef FFS2 500 if (ip->i_ump->um_fstype == UM_UFS2) 501 bap2 = (int64_t *)bp->b_data; 502 else 503 #endif 504 bap1 = (int32_t *)bp->b_data; 505 506 if (lastbn != -1) { 507 copy = malloc(fs->fs_bsize, M_TEMP, M_WAITOK); 508 memcpy(copy, bp->b_data, fs->fs_bsize); 509 510 for (i = last + 1; i < NINDIR(fs); i++) 511 BAP_ASSIGN(ip, i, 0); 512 513 if (!DOINGASYNC(vp)) { 514 error = bwrite(bp); 515 if (error) 516 allerror = error; 517 } else { 518 bawrite(bp); 519 } 520 521 #ifdef FFS2 522 if (ip->i_ump->um_fstype == UM_UFS2) 523 bap2 = (int64_t *)copy; 524 else 525 #endif 526 bap1 = (int32_t *)copy; 527 } 528 529 /* 530 * Recursively free totally unused blocks. 531 */ 532 for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last; 533 i--, nlbn += factor) { 534 nb = BAP(ip, i); 535 if (nb == 0) 536 continue; 537 if (level > SINGLE) { 538 error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb), 539 -1, level - 1, &blkcount); 540 if (error) 541 allerror = error; 542 blocksreleased += blkcount; 543 } 544 ffs_blkfree(ip, nb, fs->fs_bsize); 545 blocksreleased += nblocks; 546 } 547 548 /* 549 * Recursively free last partial block. 550 */ 551 if (level > SINGLE && lastbn >= 0) { 552 last = lastbn % factor; 553 nb = BAP(ip, i); 554 if (nb != 0) { 555 error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb), 556 last, level - 1, &blkcount); 557 if (error) 558 allerror = error; 559 blocksreleased += blkcount; 560 } 561 } 562 if (copy != NULL) { 563 free(copy, M_TEMP, 0); 564 } else { 565 bp->b_flags |= B_INVAL; 566 brelse(bp); 567 } 568 569 *countp = blocksreleased; 570 return (allerror); 571 } 572