1 /* 2 * Copyright (c) 1982, 1986, 1989 Regents of the University of California. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms are permitted 6 * provided that the above copyright notice and this paragraph are 7 * duplicated in all such forms and that any documentation, 8 * advertising materials, and other materials related to such 9 * distribution and use acknowledge that the software was developed 10 * by the University of California, Berkeley. The name of the 11 * University may not be used to endorse or promote products derived 12 * from this software without specific prior written permission. 13 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 14 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 15 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. 16 * 17 * @(#)ufs_inode.c 7.32 (Berkeley) 05/04/90 18 */ 19 20 #include "param.h" 21 #include "systm.h" 22 #include "mount.h" 23 #include "user.h" 24 #include "proc.h" 25 #include "file.h" 26 #include "buf.h" 27 #include "cmap.h" 28 #include "vnode.h" 29 #include "../ufs/quota.h" 30 #include "../ufs/inode.h" 31 #include "../ufs/fs.h" 32 #include "../ufs/ufsmount.h" 33 #include "kernel.h" 34 #include "malloc.h" 35 36 #define INOHSZ 512 37 #if ((INOHSZ&(INOHSZ-1)) == 0) 38 #define INOHASH(dev,ino) (((dev)+(ino))&(INOHSZ-1)) 39 #else 40 #define INOHASH(dev,ino) (((unsigned)((dev)+(ino)))%INOHSZ) 41 #endif 42 43 union ihead { 44 union ihead *ih_head[2]; 45 struct inode *ih_chain[2]; 46 } ihead[INOHSZ]; 47 48 int prtactive; /* 1 => print out reclaim of active vnodes */ 49 50 /* 51 * Initialize hash links for inodes. 52 */ 53 ufs_init() 54 { 55 register int i; 56 register union ihead *ih = ihead; 57 58 #ifndef lint 59 if (VN_MAXPRIVATE < sizeof(struct inode)) 60 panic("ihinit: too small"); 61 #endif /* not lint */ 62 for (i = INOHSZ; --i >= 0; ih++) { 63 ih->ih_head[0] = ih; 64 ih->ih_head[1] = ih; 65 } 66 #ifdef QUOTA 67 dqinit(); 68 #endif /* QUOTA */ 69 } 70 71 /* 72 * Look up an vnode/inode by device,inumber. 73 * If it is in core (in the inode structure), 74 * honor the locking protocol. 75 * If it is not in core, read it in from the 76 * specified device. 77 * Callers must check for mount points!! 78 * In all cases, a pointer to a locked 79 * inode structure is returned. 80 */ 81 iget(xp, ino, ipp) 82 struct inode *xp; 83 ino_t ino; 84 struct inode **ipp; 85 { 86 dev_t dev = xp->i_dev; 87 struct mount *mntp = ITOV(xp)->v_mount; 88 register struct fs *fs = VFSTOUFS(mntp)->um_fs; 89 extern struct vnodeops ufs_vnodeops, spec_inodeops; 90 register struct inode *ip, *iq; 91 register struct vnode *vp; 92 struct vnode *nvp; 93 struct buf *bp; 94 struct dinode *dp; 95 union ihead *ih; 96 int i, error; 97 98 ih = &ihead[INOHASH(dev, ino)]; 99 loop: 100 for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw) { 101 if (ino != ip->i_number || dev != ip->i_dev) 102 continue; 103 if ((ip->i_flag&ILOCKED) != 0) { 104 ip->i_flag |= IWANT; 105 sleep((caddr_t)ip, PINOD); 106 goto loop; 107 } 108 if (vget(ITOV(ip))) 109 goto loop; 110 *ipp = ip; 111 return(0); 112 } 113 /* 114 * Allocate a new inode. 115 */ 116 if (error = getnewvnode(VT_UFS, mntp, &ufs_vnodeops, &nvp)) { 117 *ipp = 0; 118 return (error); 119 } 120 ip = VTOI(nvp); 121 ip->i_vnode = nvp; 122 ip->i_flag = 0; 123 ip->i_devvp = 0; 124 ip->i_mode = 0; 125 ip->i_diroff = 0; 126 #ifdef QUOTA 127 for (i = 0; i < MAXQUOTAS; i++) 128 ip->i_dquot[i] = NODQUOT; 129 #endif 130 /* 131 * Put it onto its hash chain and lock it so that other requests for 132 * this inode will block if they arrive while we are sleeping waiting 133 * for old data structures to be purged or for the contents of the 134 * disk portion of this inode to be read. 135 */ 136 ip->i_dev = dev; 137 ip->i_number = ino; 138 insque(ip, ih); 139 ILOCK(ip); 140 /* 141 * Read in the disk contents for the inode. 142 */ 143 if (error = bread(VFSTOUFS(mntp)->um_devvp, fsbtodb(fs, itod(fs, ino)), 144 (int)fs->fs_bsize, NOCRED, &bp)) { 145 /* 146 * The inode does not contain anything useful, so it would 147 * be misleading to leave it on its hash chain. 148 * Iput() will take care of putting it back on the free list. 149 */ 150 remque(ip); 151 ip->i_forw = ip; 152 ip->i_back = ip; 153 /* 154 * Unlock and discard unneeded inode. 155 */ 156 iput(ip); 157 brelse(bp); 158 *ipp = 0; 159 return (error); 160 } 161 dp = bp->b_un.b_dino; 162 dp += itoo(fs, ino); 163 ip->i_din = *dp; 164 brelse(bp); 165 /* 166 * Initialize the associated vnode 167 */ 168 vp = ITOV(ip); 169 vp->v_type = IFTOVT(ip->i_mode); 170 if (vp->v_type == VFIFO) { 171 #ifdef FIFO 172 extern struct vnodeops fifo_inodeops; 173 vp->v_op = &fifo_inodeops; 174 #else 175 iput(ip); 176 *ipp = 0; 177 return (EOPNOTSUPP); 178 #endif /* FIFO */ 179 } 180 if (vp->v_type == VCHR || vp->v_type == VBLK) { 181 vp->v_op = &spec_inodeops; 182 if (nvp = checkalias(vp, ip->i_rdev, mntp)) { 183 /* 184 * Reinitialize aliased inode. 185 */ 186 vp = nvp; 187 iq = VTOI(vp); 188 iq->i_vnode = vp; 189 iq->i_flag = 0; 190 ILOCK(iq); 191 iq->i_din = ip->i_din; 192 iq->i_dev = dev; 193 iq->i_number = ino; 194 insque(iq, ih); 195 /* 196 * Discard unneeded vnode 197 */ 198 ip->i_mode = 0; 199 iput(ip); 200 ip = iq; 201 } 202 } 203 if (ino == ROOTINO) 204 vp->v_flag |= VROOT; 205 /* 206 * Finish inode initialization. 207 */ 208 ip->i_fs = fs; 209 ip->i_devvp = VFSTOUFS(mntp)->um_devvp; 210 VREF(ip->i_devvp); 211 /* 212 * Set up a generation number for this inode if it does not 213 * already have one. This should only happen on old filesystems. 214 */ 215 if (ip->i_gen == 0) { 216 if (++nextgennumber < (u_long)time.tv_sec) 217 nextgennumber = time.tv_sec; 218 ip->i_gen = nextgennumber; 219 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) 220 ip->i_flag |= IMOD; 221 } 222 *ipp = ip; 223 return (0); 224 } 225 226 /* 227 * Unlock and decrement the reference count of an inode structure. 228 */ 229 iput(ip) 230 register struct inode *ip; 231 { 232 233 if ((ip->i_flag & ILOCKED) == 0) 234 panic("iput"); 235 IUNLOCK(ip); 236 vrele(ITOV(ip)); 237 } 238 239 /* 240 * Last reference to an inode, write the inode out and if necessary, 241 * truncate and deallocate the file. 242 */ 243 ufs_inactive(vp) 244 struct vnode *vp; 245 { 246 register struct inode *ip = VTOI(vp); 247 int mode, error = 0; 248 249 if (prtactive && vp->v_usecount != 0) 250 vprint("ufs_inactive: pushing active", vp); 251 /* 252 * Get rid of inodes related to stale file handles. 253 */ 254 if (ip->i_mode == 0) { 255 if ((vp->v_flag & VXLOCK) == 0) 256 vgone(vp); 257 return (0); 258 } 259 ILOCK(ip); 260 if (ip->i_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { 261 #ifdef QUOTA 262 if (!getinoquota(ip)) 263 (void) chkiq(ip, -1, NOCRED, 0); 264 #endif 265 error = itrunc(ip, (u_long)0, 0); 266 mode = ip->i_mode; 267 ip->i_mode = 0; 268 ip->i_flag |= IUPD|ICHG; 269 ifree(ip, ip->i_number, mode); 270 } 271 IUPDAT(ip, &time, &time, 0); 272 IUNLOCK(ip); 273 ip->i_flag = 0; 274 /* 275 * If we are done with the inode, reclaim it 276 * so that it can be reused immediately. 277 */ 278 if (vp->v_usecount == 0 && ip->i_mode == 0) 279 vgone(vp); 280 return (error); 281 } 282 283 /* 284 * Reclaim an inode so that it can be used for other purposes. 285 */ 286 ufs_reclaim(vp) 287 register struct vnode *vp; 288 { 289 register struct inode *ip = VTOI(vp); 290 int i; 291 292 if (prtactive && vp->v_usecount != 0) 293 vprint("ufs_reclaim: pushing active", vp); 294 /* 295 * Remove the inode from its hash chain. 296 */ 297 remque(ip); 298 ip->i_forw = ip; 299 ip->i_back = ip; 300 /* 301 * Purge old data structures associated with the inode. 302 */ 303 cache_purge(vp); 304 if (ip->i_devvp) { 305 vrele(ip->i_devvp); 306 ip->i_devvp = 0; 307 } 308 #ifdef QUOTA 309 for (i = 0; i < MAXQUOTAS; i++) { 310 if (ip->i_dquot[i] != NODQUOT) { 311 dqrele(vp, ip->i_dquot[i]); 312 ip->i_dquot[i] = NODQUOT; 313 } 314 } 315 #endif 316 ip->i_flag = 0; 317 return (0); 318 } 319 320 /* 321 * Check accessed and update flags on an inode structure. 322 * If any is on, update the inode with the current time. 323 * If waitfor is given, then must ensure I/O order, 324 * so wait for write to complete. 325 */ 326 iupdat(ip, ta, tm, waitfor) 327 register struct inode *ip; 328 struct timeval *ta, *tm; 329 int waitfor; 330 { 331 struct buf *bp; 332 struct vnode *vp = ITOV(ip); 333 struct dinode *dp; 334 register struct fs *fs; 335 int error; 336 337 fs = ip->i_fs; 338 if ((ip->i_flag & (IUPD|IACC|ICHG|IMOD)) == 0) 339 return (0); 340 if (vp->v_mount->mnt_flag & MNT_RDONLY) 341 return (0); 342 error = bread(ip->i_devvp, fsbtodb(fs, itod(fs, ip->i_number)), 343 (int)fs->fs_bsize, NOCRED, &bp); 344 if (error) { 345 brelse(bp); 346 return (error); 347 } 348 if (ip->i_flag&IACC) 349 ip->i_atime = ta->tv_sec; 350 if (ip->i_flag&IUPD) 351 ip->i_mtime = tm->tv_sec; 352 if (ip->i_flag&ICHG) 353 ip->i_ctime = time.tv_sec; 354 ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD); 355 dp = bp->b_un.b_dino + itoo(fs, ip->i_number); 356 *dp = ip->i_din; 357 if (waitfor) { 358 return (bwrite(bp)); 359 } else { 360 bdwrite(bp); 361 return (0); 362 } 363 } 364 365 #define SINGLE 0 /* index of single indirect block */ 366 #define DOUBLE 1 /* index of double indirect block */ 367 #define TRIPLE 2 /* index of triple indirect block */ 368 /* 369 * Truncate the inode ip to at most length size. Free affected disk 370 * blocks -- the blocks of the file are removed in reverse order. 371 * 372 * NB: triple indirect blocks are untested. 373 */ 374 itrunc(oip, length, flags) 375 register struct inode *oip; 376 u_long length; 377 int flags; 378 { 379 register daddr_t lastblock; 380 daddr_t bn, lbn, lastiblock[NIADDR]; 381 register struct fs *fs; 382 register struct inode *ip; 383 struct buf *bp; 384 int offset, osize, size, level; 385 long count, nblocks, blocksreleased = 0; 386 register int i; 387 int aflags, error, allerror; 388 struct inode tip; 389 390 if (oip->i_size <= length) { 391 oip->i_flag |= ICHG|IUPD; 392 error = iupdat(oip, &time, &time, 1); 393 return (error); 394 } 395 /* 396 * Calculate index into inode's block list of 397 * last direct and indirect blocks (if any) 398 * which we want to keep. Lastblock is -1 when 399 * the file is truncated to 0. 400 */ 401 fs = oip->i_fs; 402 lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1; 403 lastiblock[SINGLE] = lastblock - NDADDR; 404 lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs); 405 lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs); 406 nblocks = btodb(fs->fs_bsize); 407 /* 408 * Update the size of the file. If the file is not being 409 * truncated to a block boundry, the contents of the 410 * partial block following the end of the file must be 411 * zero'ed in case it ever become accessable again because 412 * of subsequent file growth. 413 */ 414 osize = oip->i_size; 415 offset = blkoff(fs, length); 416 if (offset == 0) { 417 oip->i_size = length; 418 } else { 419 lbn = lblkno(fs, length); 420 aflags = B_CLRBUF; 421 if (flags & IO_SYNC) 422 aflags |= B_SYNC; 423 #ifdef QUOTA 424 if (error = getinoquota(oip)) 425 return (error); 426 #endif 427 if (error = balloc(oip, lbn, offset, &bp, aflags)) 428 return (error); 429 oip->i_size = length; 430 size = blksize(fs, oip, lbn); 431 bn = bp->b_blkno; 432 count = howmany(size, CLBYTES); 433 for (i = 0; i < count; i++) 434 munhash(oip->i_devvp, bn + i * CLBYTES / DEV_BSIZE); 435 bzero(bp->b_un.b_addr + offset, (unsigned)(size - offset)); 436 brealloc(bp, size); 437 if (flags & IO_SYNC) 438 bwrite(bp); 439 else 440 bdwrite(bp); 441 } 442 /* 443 * Update file and block pointers 444 * on disk before we start freeing blocks. 445 * If we crash before free'ing blocks below, 446 * the blocks will be returned to the free list. 447 * lastiblock values are also normalized to -1 448 * for calls to indirtrunc below. 449 */ 450 tip = *oip; 451 tip.i_size = osize; 452 for (level = TRIPLE; level >= SINGLE; level--) 453 if (lastiblock[level] < 0) { 454 oip->i_ib[level] = 0; 455 lastiblock[level] = -1; 456 } 457 for (i = NDADDR - 1; i > lastblock; i--) 458 oip->i_db[i] = 0; 459 oip->i_flag |= ICHG|IUPD; 460 vinvalbuf(ITOV(oip), (length > 0)); 461 allerror = iupdat(oip, &time, &time, MNT_WAIT); 462 463 /* 464 * Indirect blocks first. 465 */ 466 ip = &tip; 467 for (level = TRIPLE; level >= SINGLE; level--) { 468 bn = ip->i_ib[level]; 469 if (bn != 0) { 470 error = indirtrunc(ip, bn, lastiblock[level], level, 471 &count); 472 if (error) 473 allerror = error; 474 blocksreleased += count; 475 if (lastiblock[level] < 0) { 476 ip->i_ib[level] = 0; 477 blkfree(ip, bn, (off_t)fs->fs_bsize); 478 blocksreleased += nblocks; 479 } 480 } 481 if (lastiblock[level] >= 0) 482 goto done; 483 } 484 485 /* 486 * All whole direct blocks or frags. 487 */ 488 for (i = NDADDR - 1; i > lastblock; i--) { 489 register off_t bsize; 490 491 bn = ip->i_db[i]; 492 if (bn == 0) 493 continue; 494 ip->i_db[i] = 0; 495 bsize = (off_t)blksize(fs, ip, i); 496 blkfree(ip, bn, bsize); 497 blocksreleased += btodb(bsize); 498 } 499 if (lastblock < 0) 500 goto done; 501 502 /* 503 * Finally, look for a change in size of the 504 * last direct block; release any frags. 505 */ 506 bn = ip->i_db[lastblock]; 507 if (bn != 0) { 508 off_t oldspace, newspace; 509 510 /* 511 * Calculate amount of space we're giving 512 * back as old block size minus new block size. 513 */ 514 oldspace = blksize(fs, ip, lastblock); 515 ip->i_size = length; 516 newspace = blksize(fs, ip, lastblock); 517 if (newspace == 0) 518 panic("itrunc: newspace"); 519 if (oldspace - newspace > 0) { 520 /* 521 * Block number of space to be free'd is 522 * the old block # plus the number of frags 523 * required for the storage we're keeping. 524 */ 525 bn += numfrags(fs, newspace); 526 blkfree(ip, bn, oldspace - newspace); 527 blocksreleased += btodb(oldspace - newspace); 528 } 529 } 530 done: 531 /* BEGIN PARANOIA */ 532 for (level = SINGLE; level <= TRIPLE; level++) 533 if (ip->i_ib[level] != oip->i_ib[level]) 534 panic("itrunc1"); 535 for (i = 0; i < NDADDR; i++) 536 if (ip->i_db[i] != oip->i_db[i]) 537 panic("itrunc2"); 538 /* END PARANOIA */ 539 oip->i_blocks -= blocksreleased; 540 if (oip->i_blocks < 0) /* sanity */ 541 oip->i_blocks = 0; 542 oip->i_flag |= ICHG; 543 #ifdef QUOTA 544 if (!getinoquota(oip)) 545 (void) chkdq(oip, -blocksreleased, NOCRED, 0); 546 #endif 547 return (allerror); 548 } 549 550 /* 551 * Release blocks associated with the inode ip and 552 * stored in the indirect block bn. Blocks are free'd 553 * in LIFO order up to (but not including) lastbn. If 554 * level is greater than SINGLE, the block is an indirect 555 * block and recursive calls to indirtrunc must be used to 556 * cleanse other indirect blocks. 557 * 558 * NB: triple indirect blocks are untested. 559 */ 560 indirtrunc(ip, bn, lastbn, level, countp) 561 register struct inode *ip; 562 daddr_t bn, lastbn; 563 int level; 564 long *countp; 565 { 566 register int i; 567 struct buf *bp; 568 register struct fs *fs = ip->i_fs; 569 register daddr_t *bap; 570 daddr_t *copy, nb, last; 571 long blkcount, factor; 572 int nblocks, blocksreleased = 0; 573 int error, allerror = 0; 574 575 /* 576 * Calculate index in current block of last 577 * block to be kept. -1 indicates the entire 578 * block so we need not calculate the index. 579 */ 580 factor = 1; 581 for (i = SINGLE; i < level; i++) 582 factor *= NINDIR(fs); 583 last = lastbn; 584 if (lastbn > 0) 585 last /= factor; 586 nblocks = btodb(fs->fs_bsize); 587 /* 588 * Get buffer of block pointers, zero those 589 * entries corresponding to blocks to be free'd, 590 * and update on disk copy first. 591 */ 592 error = bread(ip->i_devvp, fsbtodb(fs, bn), (int)fs->fs_bsize, 593 NOCRED, &bp); 594 if (error) { 595 brelse(bp); 596 *countp = 0; 597 return (error); 598 } 599 bap = bp->b_un.b_daddr; 600 MALLOC(copy, daddr_t *, fs->fs_bsize, M_TEMP, M_WAITOK); 601 bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->fs_bsize); 602 bzero((caddr_t)&bap[last + 1], 603 (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t)); 604 if (last == -1) 605 bp->b_flags |= B_INVAL; 606 error = bwrite(bp); 607 if (error) 608 allerror = error; 609 bap = copy; 610 611 /* 612 * Recursively free totally unused blocks. 613 */ 614 for (i = NINDIR(fs) - 1; i > last; i--) { 615 nb = bap[i]; 616 if (nb == 0) 617 continue; 618 if (level > SINGLE) { 619 error = indirtrunc(ip, nb, (daddr_t)-1, level - 1, 620 &blkcount); 621 if (error) 622 allerror = error; 623 blocksreleased += blkcount; 624 } 625 blkfree(ip, nb, (off_t)fs->fs_bsize); 626 blocksreleased += nblocks; 627 } 628 629 /* 630 * Recursively free last partial block. 631 */ 632 if (level > SINGLE && lastbn >= 0) { 633 last = lastbn % factor; 634 nb = bap[i]; 635 if (nb != 0) { 636 error = indirtrunc(ip, nb, last, level - 1, &blkcount); 637 if (error) 638 allerror = error; 639 blocksreleased += blkcount; 640 } 641 } 642 FREE(copy, M_TEMP); 643 *countp = blocksreleased; 644 return (allerror); 645 } 646 647 /* 648 * Lock an inode. If its already locked, set the WANT bit and sleep. 649 */ 650 ilock(ip) 651 register struct inode *ip; 652 { 653 654 while (ip->i_flag & ILOCKED) { 655 ip->i_flag |= IWANT; 656 if (ip->i_spare0 == u.u_procp->p_pid) 657 panic("locking against myself"); 658 ip->i_spare1 = u.u_procp->p_pid; 659 (void) sleep((caddr_t)ip, PINOD); 660 } 661 ip->i_spare1 = 0; 662 ip->i_spare0 = u.u_procp->p_pid; 663 u.u_spare[0]++; 664 ip->i_flag |= ILOCKED; 665 } 666 667 /* 668 * Unlock an inode. If WANT bit is on, wakeup. 669 */ 670 iunlock(ip) 671 register struct inode *ip; 672 { 673 674 if ((ip->i_flag & ILOCKED) == 0) 675 vprint("iunlock: unlocked inode", ITOV(ip)); 676 ip->i_spare0 = 0; 677 u.u_spare[0]--; 678 ip->i_flag &= ~ILOCKED; 679 if (ip->i_flag&IWANT) { 680 ip->i_flag &= ~IWANT; 681 wakeup((caddr_t)ip); 682 } 683 } 684