1 /* $NetBSD: ext2fs_inode.c,v 1.67 2008/12/17 20:51:38 cegger Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)ffs_inode.c 8.8 (Berkeley) 10/19/94 32 * Modified for ext2fs by Manuel Bouyer. 33 */ 34 35 /* 36 * Copyright (c) 1997 Manuel Bouyer. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. All advertising materials mentioning features or use of this software 47 * must display the following acknowledgement: 48 * This product includes software developed by Manuel Bouyer. 49 * 4. The name of the author may not be used to endorse or promote products 50 * derived from this software without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 53 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 54 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 55 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 56 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 57 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 58 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 59 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 60 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 61 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 62 * 63 * @(#)ffs_inode.c 8.8 (Berkeley) 10/19/94 64 * Modified for ext2fs by Manuel Bouyer. 65 */ 66 67 #include <sys/cdefs.h> 68 __KERNEL_RCSID(0, "$NetBSD: ext2fs_inode.c,v 1.67 2008/12/17 20:51:38 cegger Exp $"); 69 70 #include <sys/param.h> 71 #include <sys/systm.h> 72 #include <sys/mount.h> 73 #include <sys/proc.h> 74 #include <sys/file.h> 75 #include <sys/buf.h> 76 #include <sys/vnode.h> 77 #include <sys/kernel.h> 78 #include <sys/malloc.h> 79 #include <sys/trace.h> 80 #include <sys/resourcevar.h> 81 #include <sys/kauth.h> 82 83 #include <ufs/ufs/inode.h> 84 #include <ufs/ufs/ufsmount.h> 85 #include <ufs/ufs/ufs_extern.h> 86 87 #include <ufs/ext2fs/ext2fs.h> 88 #include <ufs/ext2fs/ext2fs_extern.h> 89 90 extern int prtactive; 91 92 static int ext2fs_indirtrunc(struct inode *, daddr_t, daddr_t, 93 daddr_t, int, long *); 94 95 /* 96 * Get the size of an inode. 97 */ 98 u_int64_t 99 ext2fs_size(struct inode *ip) 100 { 101 u_int64_t size = ip->i_e2fs_size; 102 103 if ((ip->i_e2fs_mode & IFMT) == IFREG) 104 size |= (u_int64_t)ip->i_e2fs_dacl << 32; 105 return size; 106 } 107 108 int 109 ext2fs_setsize(struct inode *ip, u_int64_t size) 110 { 111 if ((ip->i_e2fs_mode & IFMT) == IFREG || 112 ip->i_e2fs_mode == 0) { 113 ip->i_e2fs_dacl = size >> 32; 114 if (size >= 0x80000000U) { 115 struct m_ext2fs *fs = ip->i_e2fs; 116 117 if (fs->e2fs.e2fs_rev <= E2FS_REV0) { 118 /* Linux automagically upgrades to REV1 here! */ 119 return EFBIG; 120 } 121 if (!(fs->e2fs.e2fs_features_rocompat 122 & EXT2F_ROCOMPAT_LARGEFILE)) { 123 fs->e2fs.e2fs_features_rocompat |= 124 EXT2F_ROCOMPAT_LARGEFILE; 125 fs->e2fs_fmod = 1; 126 } 127 } 128 } else if (size >= 0x80000000U) 129 return EFBIG; 130 131 ip->i_e2fs_size = size; 132 133 return 0; 134 } 135 136 /* 137 * Last reference to an inode. If necessary, write or delete it. 138 */ 139 int 140 ext2fs_inactive(void *v) 141 { 142 struct vop_inactive_args /* { 143 struct vnode *a_vp; 144 bool *a_recycle; 145 } */ *ap = v; 146 struct vnode *vp = ap->a_vp; 147 struct inode *ip = VTOI(vp); 148 int error = 0; 149 150 if (prtactive && vp->v_usecount != 0) 151 vprint("ext2fs_inactive: pushing active", vp); 152 /* Get rid of inodes related to stale file handles. */ 153 if (ip->i_e2fs_mode == 0 || ip->i_e2fs_dtime != 0) 154 goto out; 155 156 error = 0; 157 if (ip->i_e2fs_nlink == 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { 158 /* Defer final inode free and update to reclaim.*/ 159 if (ext2fs_size(ip) != 0) { 160 error = ext2fs_truncate(vp, (off_t)0, 0, NOCRED); 161 } 162 ip->i_e2fs_dtime = time_second; 163 ip->i_flag |= IN_CHANGE | IN_UPDATE; 164 mutex_enter(&vp->v_interlock); 165 vp->v_iflag |= VI_FREEING; 166 mutex_exit(&vp->v_interlock); 167 ext2fs_vfree(vp, ip->i_number, ip->i_e2fs_mode); 168 } 169 if (ip->i_flag & (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) { 170 ext2fs_update(vp, NULL, NULL, 0); 171 } 172 out: 173 /* 174 * If we are done with the inode, reclaim it 175 * so that it can be reused immediately. 176 */ 177 *ap->a_recycle = (ip->i_e2fs_dtime != 0); 178 VOP_UNLOCK(vp, 0); 179 return (error); 180 } 181 182 183 /* 184 * Update the access, modified, and inode change times as specified by the 185 * IACCESS, IUPDATE, and ICHANGE flags respectively. The IMODIFIED flag is 186 * used to specify that the inode needs to be updated but that the times have 187 * already been set. The access and modified times are taken from the second 188 * and third parameters; the inode change time is always taken from the current 189 * time. If UPDATE_WAIT or UPDATE_DIROP is set, then wait for the disk 190 * write of the inode to complete. 191 */ 192 int 193 ext2fs_update(struct vnode *vp, const struct timespec *acc, 194 const struct timespec *mod, int updflags) 195 { 196 struct m_ext2fs *fs; 197 struct buf *bp; 198 struct inode *ip; 199 int error; 200 void *cp; 201 int flags; 202 203 if (vp->v_mount->mnt_flag & MNT_RDONLY) 204 return (0); 205 ip = VTOI(vp); 206 EXT2FS_ITIMES(ip, acc, mod, NULL); 207 if (updflags & UPDATE_CLOSE) 208 flags = ip->i_flag & (IN_MODIFIED | IN_ACCESSED); 209 else 210 flags = ip->i_flag & IN_MODIFIED; 211 if (flags == 0) 212 return (0); 213 fs = ip->i_e2fs; 214 215 error = bread(ip->i_devvp, 216 fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), 217 (int)fs->e2fs_bsize, NOCRED, B_MODIFY, &bp); 218 if (error) { 219 brelse(bp, 0); 220 return (error); 221 } 222 ip->i_flag &= ~(IN_MODIFIED | IN_ACCESSED); 223 cp = (char *)bp->b_data + 224 (ino_to_fsbo(fs, ip->i_number) * EXT2_DINODE_SIZE); 225 e2fs_isave(ip->i_din.e2fs_din, (struct ext2fs_dinode *)cp); 226 if ((updflags & (UPDATE_WAIT|UPDATE_DIROP)) != 0 && 227 (flags & IN_MODIFIED) != 0 && 228 (vp->v_mount->mnt_flag & MNT_ASYNC) == 0) 229 return (bwrite(bp)); 230 else { 231 bdwrite(bp); 232 return (0); 233 } 234 } 235 236 #define SINGLE 0 /* index of single indirect block */ 237 #define DOUBLE 1 /* index of double indirect block */ 238 #define TRIPLE 2 /* index of triple indirect block */ 239 /* 240 * Truncate the inode oip to at most length size, freeing the 241 * disk blocks. 242 */ 243 int 244 ext2fs_truncate(struct vnode *ovp, off_t length, int ioflag, 245 kauth_cred_t cred) 246 { 247 daddr_t lastblock; 248 struct inode *oip = VTOI(ovp); 249 daddr_t bn, lastiblock[NIADDR], indir_lbn[NIADDR]; 250 /* XXX ondisk32 */ 251 int32_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR]; 252 struct m_ext2fs *fs; 253 int offset, size, level; 254 long count, blocksreleased = 0; 255 int i, nblocks; 256 int error, allerror = 0; 257 off_t osize; 258 int sync; 259 struct ufsmount *ump = oip->i_ump; 260 261 if (ovp->v_type == VCHR || ovp->v_type == VBLK || 262 ovp->v_type == VFIFO || ovp->v_type == VSOCK) { 263 return 0; 264 } 265 266 if (length < 0) 267 return (EINVAL); 268 269 if (ovp->v_type == VLNK && 270 (ext2fs_size(oip) < ump->um_maxsymlinklen || 271 (ump->um_maxsymlinklen == 0 && oip->i_e2fs_nblock == 0))) { 272 KDASSERT(length == 0); 273 memset((char *)&oip->i_din.e2fs_din->e2di_shortlink, 0, 274 (u_int)ext2fs_size(oip)); 275 (void)ext2fs_setsize(oip, 0); 276 oip->i_flag |= IN_CHANGE | IN_UPDATE; 277 return (ext2fs_update(ovp, NULL, NULL, 0)); 278 } 279 if (ext2fs_size(oip) == length) { 280 oip->i_flag |= IN_CHANGE | IN_UPDATE; 281 return (ext2fs_update(ovp, NULL, NULL, 0)); 282 } 283 fs = oip->i_e2fs; 284 if (length > ump->um_maxfilesize) 285 return (EFBIG); 286 287 osize = ext2fs_size(oip); 288 289 /* 290 * Lengthen the size of the file. We must ensure that the 291 * last byte of the file is allocated. Since the smallest 292 * value of osize is 0, length will be at least 1. 293 */ 294 if (osize < length) { 295 uvm_vnp_setwritesize(ovp, length); 296 error = ufs_balloc_range(ovp, length - 1, 1, cred, 297 ioflag & IO_SYNC ? B_SYNC : 0); 298 if (error) { 299 (void) ext2fs_truncate(ovp, osize, ioflag & IO_SYNC, 300 cred); 301 return (error); 302 } 303 uvm_vnp_setsize(ovp, length); 304 oip->i_flag |= IN_CHANGE | IN_UPDATE; 305 KASSERT(error || ovp->v_size == ext2fs_size(oip)); 306 return (ext2fs_update(ovp, NULL, NULL, 0)); 307 } 308 /* 309 * Shorten the size of the file. If the file is not being 310 * truncated to a block boundry, the contents of the 311 * partial block following the end of the file must be 312 * zero'ed in case it ever become accessible again because 313 * of subsequent file growth. 314 */ 315 offset = blkoff(fs, length); 316 if (offset != 0) { 317 size = fs->e2fs_bsize; 318 319 /* XXXUBC we should handle more than just VREG */ 320 uvm_vnp_zerorange(ovp, length, size - offset); 321 } 322 (void)ext2fs_setsize(oip, length); 323 uvm_vnp_setsize(ovp, length); 324 /* 325 * Calculate index into inode's block list of 326 * last direct and indirect blocks (if any) 327 * which we want to keep. Lastblock is -1 when 328 * the file is truncated to 0. 329 */ 330 lastblock = lblkno(fs, length + fs->e2fs_bsize - 1) - 1; 331 lastiblock[SINGLE] = lastblock - NDADDR; 332 lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs); 333 lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs); 334 nblocks = btodb(fs->e2fs_bsize); 335 /* 336 * Update file and block pointers on disk before we start freeing 337 * blocks. If we crash before free'ing blocks below, the blocks 338 * will be returned to the free list. lastiblock values are also 339 * normalized to -1 for calls to ext2fs_indirtrunc below. 340 */ 341 memcpy((void *)oldblks, (void *)&oip->i_e2fs_blocks[0], sizeof oldblks); 342 sync = 0; 343 for (level = TRIPLE; level >= SINGLE; level--) { 344 if (lastiblock[level] < 0 && oldblks[NDADDR + level] != 0) { 345 sync = 1; 346 oip->i_e2fs_blocks[NDADDR + level] = 0; 347 lastiblock[level] = -1; 348 } 349 } 350 for (i = 0; i < NDADDR; i++) { 351 if (i > lastblock && oldblks[i] != 0) { 352 sync = 1; 353 oip->i_e2fs_blocks[i] = 0; 354 } 355 } 356 oip->i_flag |= IN_CHANGE | IN_UPDATE; 357 if (sync) { 358 error = ext2fs_update(ovp, NULL, NULL, UPDATE_WAIT); 359 if (error && !allerror) 360 allerror = error; 361 } 362 363 /* 364 * Having written the new inode to disk, save its new configuration 365 * and put back the old block pointers long enough to process them. 366 * Note that we save the new block configuration so we can check it 367 * when we are done. 368 */ 369 memcpy((void *)newblks, (void *)&oip->i_e2fs_blocks[0], sizeof newblks); 370 memcpy((void *)&oip->i_e2fs_blocks[0], (void *)oldblks, sizeof oldblks); 371 372 (void)ext2fs_setsize(oip, osize); 373 error = vtruncbuf(ovp, lastblock + 1, 0, 0); 374 if (error && !allerror) 375 allerror = error; 376 377 /* 378 * Indirect blocks first. 379 */ 380 indir_lbn[SINGLE] = -NDADDR; 381 indir_lbn[DOUBLE] = indir_lbn[SINGLE] - NINDIR(fs) -1; 382 indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - NINDIR(fs) * NINDIR(fs) - 1; 383 for (level = TRIPLE; level >= SINGLE; level--) { 384 /* XXX ondisk32 */ 385 bn = fs2h32(oip->i_e2fs_blocks[NDADDR + level]); 386 if (bn != 0) { 387 error = ext2fs_indirtrunc(oip, indir_lbn[level], 388 fsbtodb(fs, bn), lastiblock[level], level, &count); 389 if (error) 390 allerror = error; 391 blocksreleased += count; 392 if (lastiblock[level] < 0) { 393 oip->i_e2fs_blocks[NDADDR + level] = 0; 394 ext2fs_blkfree(oip, bn); 395 blocksreleased += nblocks; 396 } 397 } 398 if (lastiblock[level] >= 0) 399 goto done; 400 } 401 402 /* 403 * All whole direct blocks or frags. 404 */ 405 for (i = NDADDR - 1; i > lastblock; i--) { 406 /* XXX ondisk32 */ 407 bn = fs2h32(oip->i_e2fs_blocks[i]); 408 if (bn == 0) 409 continue; 410 oip->i_e2fs_blocks[i] = 0; 411 ext2fs_blkfree(oip, bn); 412 blocksreleased += btodb(fs->e2fs_bsize); 413 } 414 415 done: 416 #ifdef DIAGNOSTIC 417 for (level = SINGLE; level <= TRIPLE; level++) 418 if (newblks[NDADDR + level] != 419 oip->i_e2fs_blocks[NDADDR + level]) 420 panic("ext2fs_truncate1"); 421 for (i = 0; i < NDADDR; i++) 422 if (newblks[i] != oip->i_e2fs_blocks[i]) 423 panic("ext2fs_truncate2"); 424 if (length == 0 && 425 (!LIST_EMPTY(&ovp->v_cleanblkhd) || 426 !LIST_EMPTY(&ovp->v_dirtyblkhd))) 427 panic("ext2fs_truncate3"); 428 #endif /* DIAGNOSTIC */ 429 /* 430 * Put back the real size. 431 */ 432 (void)ext2fs_setsize(oip, length); 433 oip->i_e2fs_nblock -= blocksreleased; 434 oip->i_flag |= IN_CHANGE; 435 KASSERT(ovp->v_type != VREG || ovp->v_size == ext2fs_size(oip)); 436 return (allerror); 437 } 438 439 /* 440 * Release blocks associated with the inode ip and stored in the indirect 441 * block bn. Blocks are free'd in LIFO order up to (but not including) 442 * lastbn. If level is greater than SINGLE, the block is an indirect block 443 * and recursive calls to indirtrunc must be used to cleanse other indirect 444 * blocks. 445 * 446 * NB: triple indirect blocks are untested. 447 */ 448 static int 449 ext2fs_indirtrunc(struct inode *ip, daddr_t lbn, daddr_t dbn, daddr_t lastbn, 450 int level, long *countp) 451 { 452 int i; 453 struct buf *bp; 454 struct m_ext2fs *fs = ip->i_e2fs; 455 int32_t *bap; /* XXX ondisk32 */ 456 struct vnode *vp; 457 daddr_t nb, nlbn, last; 458 int32_t *copy = NULL; /* XXX ondisk32 */ 459 long blkcount, factor; 460 int nblocks, blocksreleased = 0; 461 int error = 0, allerror = 0; 462 463 /* 464 * Calculate index in current block of last 465 * block to be kept. -1 indicates the entire 466 * block so we need not calculate the index. 467 */ 468 factor = 1; 469 for (i = SINGLE; i < level; i++) 470 factor *= NINDIR(fs); 471 last = lastbn; 472 if (lastbn > 0) 473 last /= factor; 474 nblocks = btodb(fs->e2fs_bsize); 475 /* 476 * Get buffer of block pointers, zero those entries corresponding 477 * to blocks to be free'd, and update on disk copy first. Since 478 * double(triple) indirect before single(double) indirect, calls 479 * to bmap on these blocks will fail. However, we already have 480 * the on disk address, so we have to set the b_blkno field 481 * explicitly instead of letting bread do everything for us. 482 */ 483 vp = ITOV(ip); 484 bp = getblk(vp, lbn, (int)fs->e2fs_bsize, 0, 0); 485 if (bp->b_oflags & (BO_DONE | BO_DELWRI)) { 486 /* Braces must be here in case trace evaluates to nothing. */ 487 trace(TR_BREADHIT, pack(vp, fs->e2fs_bsize), lbn); 488 } else { 489 trace(TR_BREADMISS, pack(vp, fs->e2fs_bsize), lbn); 490 curlwp->l_ru.ru_inblock++; /* pay for read */ 491 bp->b_flags |= B_READ; 492 if (bp->b_bcount > bp->b_bufsize) 493 panic("ext2fs_indirtrunc: bad buffer size"); 494 bp->b_blkno = dbn; 495 VOP_STRATEGY(vp, bp); 496 error = biowait(bp); 497 } 498 if (error) { 499 brelse(bp, 0); 500 *countp = 0; 501 return (error); 502 } 503 504 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */ 505 if (lastbn >= 0) { 506 /* XXX ondisk32 */ 507 copy = malloc(fs->e2fs_bsize, M_TEMP, M_WAITOK); 508 memcpy((void *)copy, (void *)bap, (u_int)fs->e2fs_bsize); 509 memset((void *)&bap[last + 1], 0, 510 (u_int)(NINDIR(fs) - (last + 1)) * sizeof (u_int32_t)); 511 error = bwrite(bp); 512 if (error) 513 allerror = error; 514 bap = copy; 515 } 516 517 /* 518 * Recursively free totally unused blocks. 519 */ 520 for (i = NINDIR(fs) - 1, 521 nlbn = lbn + 1 - i * factor; i > last; 522 i--, nlbn += factor) { 523 /* XXX ondisk32 */ 524 nb = fs2h32(bap[i]); 525 if (nb == 0) 526 continue; 527 if (level > SINGLE) { 528 error = ext2fs_indirtrunc(ip, nlbn, fsbtodb(fs, nb), 529 (daddr_t)-1, level - 1, 530 &blkcount); 531 if (error) 532 allerror = error; 533 blocksreleased += blkcount; 534 } 535 ext2fs_blkfree(ip, nb); 536 blocksreleased += nblocks; 537 } 538 539 /* 540 * Recursively free last partial block. 541 */ 542 if (level > SINGLE && lastbn >= 0) { 543 last = lastbn % factor; 544 /* XXX ondisk32 */ 545 nb = fs2h32(bap[i]); 546 if (nb != 0) { 547 error = ext2fs_indirtrunc(ip, nlbn, fsbtodb(fs, nb), 548 last, level - 1, &blkcount); 549 if (error) 550 allerror = error; 551 blocksreleased += blkcount; 552 } 553 } 554 555 if (copy != NULL) { 556 free(copy, M_TEMP); 557 } else { 558 brelse(bp, BC_INVAL); 559 } 560 561 *countp = blocksreleased; 562 return (allerror); 563 } 564