1 /* 2 * Copyright (c) 1982, 1986, 1989 Regents of the University of California. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms are permitted 6 * provided that the above copyright notice and this paragraph are 7 * duplicated in all such forms and that any documentation, 8 * advertising materials, and other materials related to such 9 * distribution and use acknowledge that the software was developed 10 * by the University of California, Berkeley. The name of the 11 * University may not be used to endorse or promote products derived 12 * from this software without specific prior written permission. 13 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 14 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 15 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. 16 * 17 * @(#)ffs_inode.c 7.30 (Berkeley) 05/02/90 18 */ 19 20 #include "param.h" 21 #include "systm.h" 22 #include "mount.h" 23 #include "user.h" 24 #include "proc.h" 25 #include "file.h" 26 #include "buf.h" 27 #include "cmap.h" 28 #include "vnode.h" 29 #include "../ufs/quota.h" 30 #include "../ufs/inode.h" 31 #include "../ufs/fs.h" 32 #include "../ufs/ufsmount.h" 33 #include "kernel.h" 34 #include "malloc.h" 35 36 #define INOHSZ 512 37 #if ((INOHSZ&(INOHSZ-1)) == 0) 38 #define INOHASH(dev,ino) (((dev)+(ino))&(INOHSZ-1)) 39 #else 40 #define INOHASH(dev,ino) (((unsigned)((dev)+(ino)))%INOHSZ) 41 #endif 42 43 union ihead { 44 union ihead *ih_head[2]; 45 struct inode *ih_chain[2]; 46 } ihead[INOHSZ]; 47 48 int prtactive; /* 1 => print out reclaim of active vnodes */ 49 50 /* 51 * Initialize hash links for inodes. 52 */ 53 ufs_init() 54 { 55 register int i; 56 register union ihead *ih = ihead; 57 58 #ifndef lint 59 if (VN_MAXPRIVATE < sizeof(struct inode)) 60 panic("ihinit: too small"); 61 #endif /* not lint */ 62 for (i = INOHSZ; --i >= 0; ih++) { 63 ih->ih_head[0] = ih; 64 ih->ih_head[1] = ih; 65 } 66 #ifdef QUOTA 67 dqinit(); 68 #endif /* QUOTA */ 69 } 70 71 /* 72 * Look up an vnode/inode by device,inumber. 73 * If it is in core (in the inode structure), 74 * honor the locking protocol. 75 * If it is not in core, read it in from the 76 * specified device. 77 * Callers must check for mount points!! 78 * In all cases, a pointer to a locked 79 * inode structure is returned. 80 */ 81 iget(xp, ino, ipp) 82 struct inode *xp; 83 ino_t ino; 84 struct inode **ipp; 85 { 86 dev_t dev = xp->i_dev; 87 struct mount *mntp = ITOV(xp)->v_mount; 88 register struct fs *fs = VFSTOUFS(mntp)->um_fs; 89 extern struct vnodeops ufs_vnodeops, spec_inodeops; 90 register struct inode *ip, *iq; 91 register struct vnode *vp; 92 struct vnode *nvp; 93 struct buf *bp; 94 struct dinode *dp; 95 union ihead *ih; 96 int i, error; 97 98 ih = &ihead[INOHASH(dev, ino)]; 99 loop: 100 for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw) { 101 if (ino != ip->i_number || dev != ip->i_dev) 102 continue; 103 if ((ip->i_flag&ILOCKED) != 0) { 104 ip->i_flag |= IWANT; 105 sleep((caddr_t)ip, PINOD); 106 goto loop; 107 } 108 if (vget(ITOV(ip))) 109 goto loop; 110 *ipp = ip; 111 return(0); 112 } 113 /* 114 * Allocate a new inode. 115 */ 116 if (error = getnewvnode(VT_UFS, mntp, &ufs_vnodeops, &nvp)) { 117 *ipp = 0; 118 return (error); 119 } 120 ip = VTOI(nvp); 121 ip->i_vnode = nvp; 122 ip->i_flag = 0; 123 ip->i_devvp = 0; 124 ip->i_mode = 0; 125 ip->i_diroff = 0; 126 #ifdef QUOTA 127 for (i = 0; i < MAXQUOTAS; i++) 128 ip->i_dquot[i] = NODQUOT; 129 #endif 130 /* 131 * Put it onto its hash chain and lock it so that other requests for 132 * this inode will block if they arrive while we are sleeping waiting 133 * for old data structures to be purged or for the contents of the 134 * disk portion of this inode to be read. 135 */ 136 ip->i_dev = dev; 137 ip->i_number = ino; 138 insque(ip, ih); 139 ILOCK(ip); 140 /* 141 * Read in the disk contents for the inode. 142 */ 143 if (error = bread(VFSTOUFS(mntp)->um_devvp, fsbtodb(fs, itod(fs, ino)), 144 (int)fs->fs_bsize, NOCRED, &bp)) { 145 /* 146 * Unlock and discard unneeded inode. 147 */ 148 iput(ip); 149 brelse(bp); 150 *ipp = 0; 151 return (error); 152 } 153 dp = bp->b_un.b_dino; 154 dp += itoo(fs, ino); 155 ip->i_din = *dp; 156 brelse(bp); 157 /* 158 * Initialize the associated vnode 159 */ 160 vp = ITOV(ip); 161 vp->v_type = IFTOVT(ip->i_mode); 162 if (vp->v_type == VFIFO) { 163 #ifdef FIFO 164 extern struct vnodeops fifo_inodeops; 165 vp->v_op = &fifo_inodeops; 166 #else 167 iput(ip); 168 *ipp = 0; 169 return (EOPNOTSUPP); 170 #endif /* FIFO */ 171 } 172 if (vp->v_type == VCHR || vp->v_type == VBLK) { 173 vp->v_op = &spec_inodeops; 174 if (nvp = checkalias(vp, ip->i_rdev, mntp)) { 175 /* 176 * Reinitialize aliased inode. 177 */ 178 vp = nvp; 179 iq = VTOI(vp); 180 iq->i_vnode = vp; 181 iq->i_flag = 0; 182 ILOCK(iq); 183 iq->i_din = ip->i_din; 184 iq->i_dev = dev; 185 iq->i_number = ino; 186 insque(iq, ih); 187 /* 188 * Discard unneeded vnode 189 */ 190 ip->i_mode = 0; 191 iput(ip); 192 ip = iq; 193 } 194 } 195 if (ino == ROOTINO) 196 vp->v_flag |= VROOT; 197 /* 198 * Finish inode initialization. 199 */ 200 ip->i_fs = fs; 201 ip->i_devvp = VFSTOUFS(mntp)->um_devvp; 202 VREF(ip->i_devvp); 203 /* 204 * Set up a generation number for this inode if it does not 205 * already have one. This should only happen on old filesystems. 206 */ 207 if (ip->i_gen == 0) { 208 if (++nextgennumber < (u_long)time.tv_sec) 209 nextgennumber = time.tv_sec; 210 ip->i_gen = nextgennumber; 211 if ((vp->v_mount->m_flag & M_RDONLY) == 0) 212 ip->i_flag |= IMOD; 213 } 214 *ipp = ip; 215 return (0); 216 } 217 218 /* 219 * Unlock and decrement the reference count of an inode structure. 220 */ 221 iput(ip) 222 register struct inode *ip; 223 { 224 225 if ((ip->i_flag & ILOCKED) == 0) 226 panic("iput"); 227 IUNLOCK(ip); 228 vrele(ITOV(ip)); 229 } 230 231 /* 232 * Last reference to an inode, write the inode out and if necessary, 233 * truncate and deallocate the file. 234 */ 235 ufs_inactive(vp) 236 struct vnode *vp; 237 { 238 register struct inode *ip = VTOI(vp); 239 int mode, error = 0; 240 241 if (prtactive && vp->v_usecount != 0) 242 vprint("ufs_inactive: pushing active", vp); 243 /* 244 * Get rid of inodes related to stale file handles. 245 */ 246 if (ip->i_mode == 0) { 247 if ((vp->v_flag & VXLOCK) == 0) 248 vgone(vp); 249 return (0); 250 } 251 ILOCK(ip); 252 if (ip->i_nlink <= 0 && (vp->v_mount->m_flag & M_RDONLY) == 0) { 253 #ifdef QUOTA 254 if (!getinoquota(ip)) 255 (void) chkiq(ip, -1, NOCRED, 0); 256 #endif 257 error = itrunc(ip, (u_long)0, 0); 258 mode = ip->i_mode; 259 ip->i_mode = 0; 260 ip->i_flag |= IUPD|ICHG; 261 ifree(ip, ip->i_number, mode); 262 } 263 IUPDAT(ip, &time, &time, 0); 264 IUNLOCK(ip); 265 ip->i_flag = 0; 266 /* 267 * If we are done with the inode, reclaim it 268 * so that it can be reused immediately. 269 */ 270 if (vp->v_usecount == 0 && ip->i_mode == 0) 271 vgone(vp); 272 return (error); 273 } 274 275 /* 276 * Reclaim an inode so that it can be used for other purposes. 277 */ 278 ufs_reclaim(vp) 279 register struct vnode *vp; 280 { 281 register struct inode *ip = VTOI(vp); 282 int i; 283 284 if (prtactive && vp->v_usecount != 0) 285 vprint("ufs_reclaim: pushing active", vp); 286 /* 287 * Remove the inode from its hash chain. 288 */ 289 remque(ip); 290 ip->i_forw = ip; 291 ip->i_back = ip; 292 /* 293 * Purge old data structures associated with the inode. 294 */ 295 cache_purge(vp); 296 if (ip->i_devvp) { 297 vrele(ip->i_devvp); 298 ip->i_devvp = 0; 299 } 300 #ifdef QUOTA 301 for (i = 0; i < MAXQUOTAS; i++) { 302 if (ip->i_dquot[i] != NODQUOT) { 303 dqrele(vp, ip->i_dquot[i]); 304 ip->i_dquot[i] = NODQUOT; 305 } 306 } 307 #endif 308 ip->i_flag = 0; 309 return (0); 310 } 311 312 /* 313 * Check accessed and update flags on an inode structure. 314 * If any is on, update the inode with the current time. 315 * If waitfor is given, then must ensure I/O order, 316 * so wait for write to complete. 317 */ 318 iupdat(ip, ta, tm, waitfor) 319 register struct inode *ip; 320 struct timeval *ta, *tm; 321 int waitfor; 322 { 323 struct buf *bp; 324 struct vnode *vp = ITOV(ip); 325 struct dinode *dp; 326 register struct fs *fs; 327 int error; 328 329 fs = ip->i_fs; 330 if ((ip->i_flag & (IUPD|IACC|ICHG|IMOD)) == 0) 331 return (0); 332 if (vp->v_mount->m_flag & M_RDONLY) 333 return (0); 334 error = bread(ip->i_devvp, fsbtodb(fs, itod(fs, ip->i_number)), 335 (int)fs->fs_bsize, NOCRED, &bp); 336 if (error) { 337 brelse(bp); 338 return (error); 339 } 340 if (ip->i_flag&IACC) 341 ip->i_atime = ta->tv_sec; 342 if (ip->i_flag&IUPD) 343 ip->i_mtime = tm->tv_sec; 344 if (ip->i_flag&ICHG) 345 ip->i_ctime = time.tv_sec; 346 ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD); 347 dp = bp->b_un.b_dino + itoo(fs, ip->i_number); 348 *dp = ip->i_din; 349 if (waitfor) { 350 return (bwrite(bp)); 351 } else { 352 bdwrite(bp); 353 return (0); 354 } 355 } 356 357 #define SINGLE 0 /* index of single indirect block */ 358 #define DOUBLE 1 /* index of double indirect block */ 359 #define TRIPLE 2 /* index of triple indirect block */ 360 /* 361 * Truncate the inode ip to at most length size. Free affected disk 362 * blocks -- the blocks of the file are removed in reverse order. 363 * 364 * NB: triple indirect blocks are untested. 365 */ 366 itrunc(oip, length, flags) 367 register struct inode *oip; 368 u_long length; 369 int flags; 370 { 371 register daddr_t lastblock; 372 daddr_t bn, lbn, lastiblock[NIADDR]; 373 register struct fs *fs; 374 register struct inode *ip; 375 struct buf *bp; 376 int offset, osize, size, level; 377 long count, nblocks, blocksreleased = 0; 378 register int i; 379 int aflags, error, allerror; 380 struct inode tip; 381 382 if (oip->i_size <= length) { 383 oip->i_flag |= ICHG|IUPD; 384 error = iupdat(oip, &time, &time, 1); 385 return (error); 386 } 387 /* 388 * Calculate index into inode's block list of 389 * last direct and indirect blocks (if any) 390 * which we want to keep. Lastblock is -1 when 391 * the file is truncated to 0. 392 */ 393 fs = oip->i_fs; 394 lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1; 395 lastiblock[SINGLE] = lastblock - NDADDR; 396 lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs); 397 lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs); 398 nblocks = btodb(fs->fs_bsize); 399 /* 400 * Update the size of the file. If the file is not being 401 * truncated to a block boundry, the contents of the 402 * partial block following the end of the file must be 403 * zero'ed in case it ever become accessable again because 404 * of subsequent file growth. 405 */ 406 osize = oip->i_size; 407 offset = blkoff(fs, length); 408 if (offset == 0) { 409 oip->i_size = length; 410 } else { 411 lbn = lblkno(fs, length); 412 aflags = B_CLRBUF; 413 if (flags & IO_SYNC) 414 aflags |= B_SYNC; 415 #ifdef QUOTA 416 if (error = getinoquota(oip)) 417 return (error); 418 #endif 419 if (error = balloc(oip, lbn, offset, &bp, aflags)) 420 return (error); 421 oip->i_size = length; 422 size = blksize(fs, oip, lbn); 423 bn = bp->b_blkno; 424 count = howmany(size, CLBYTES); 425 for (i = 0; i < count; i++) 426 munhash(oip->i_devvp, bn + i * CLBYTES / DEV_BSIZE); 427 bzero(bp->b_un.b_addr + offset, (unsigned)(size - offset)); 428 brealloc(bp, size); 429 if (flags & IO_SYNC) 430 bwrite(bp); 431 else 432 bdwrite(bp); 433 } 434 /* 435 * Update file and block pointers 436 * on disk before we start freeing blocks. 437 * If we crash before free'ing blocks below, 438 * the blocks will be returned to the free list. 439 * lastiblock values are also normalized to -1 440 * for calls to indirtrunc below. 441 */ 442 tip = *oip; 443 tip.i_size = osize; 444 for (level = TRIPLE; level >= SINGLE; level--) 445 if (lastiblock[level] < 0) { 446 oip->i_ib[level] = 0; 447 lastiblock[level] = -1; 448 } 449 for (i = NDADDR - 1; i > lastblock; i--) 450 oip->i_db[i] = 0; 451 oip->i_flag |= ICHG|IUPD; 452 vinvalbuf(ITOV(oip), (length > 0)); 453 allerror = iupdat(oip, &time, &time, MNT_WAIT); 454 455 /* 456 * Indirect blocks first. 457 */ 458 ip = &tip; 459 for (level = TRIPLE; level >= SINGLE; level--) { 460 bn = ip->i_ib[level]; 461 if (bn != 0) { 462 error = indirtrunc(ip, bn, lastiblock[level], level, 463 &count); 464 if (error) 465 allerror = error; 466 blocksreleased += count; 467 if (lastiblock[level] < 0) { 468 ip->i_ib[level] = 0; 469 blkfree(ip, bn, (off_t)fs->fs_bsize); 470 blocksreleased += nblocks; 471 } 472 } 473 if (lastiblock[level] >= 0) 474 goto done; 475 } 476 477 /* 478 * All whole direct blocks or frags. 479 */ 480 for (i = NDADDR - 1; i > lastblock; i--) { 481 register off_t bsize; 482 483 bn = ip->i_db[i]; 484 if (bn == 0) 485 continue; 486 ip->i_db[i] = 0; 487 bsize = (off_t)blksize(fs, ip, i); 488 blkfree(ip, bn, bsize); 489 blocksreleased += btodb(bsize); 490 } 491 if (lastblock < 0) 492 goto done; 493 494 /* 495 * Finally, look for a change in size of the 496 * last direct block; release any frags. 497 */ 498 bn = ip->i_db[lastblock]; 499 if (bn != 0) { 500 off_t oldspace, newspace; 501 502 /* 503 * Calculate amount of space we're giving 504 * back as old block size minus new block size. 505 */ 506 oldspace = blksize(fs, ip, lastblock); 507 ip->i_size = length; 508 newspace = blksize(fs, ip, lastblock); 509 if (newspace == 0) 510 panic("itrunc: newspace"); 511 if (oldspace - newspace > 0) { 512 /* 513 * Block number of space to be free'd is 514 * the old block # plus the number of frags 515 * required for the storage we're keeping. 516 */ 517 bn += numfrags(fs, newspace); 518 blkfree(ip, bn, oldspace - newspace); 519 blocksreleased += btodb(oldspace - newspace); 520 } 521 } 522 done: 523 /* BEGIN PARANOIA */ 524 for (level = SINGLE; level <= TRIPLE; level++) 525 if (ip->i_ib[level] != oip->i_ib[level]) 526 panic("itrunc1"); 527 for (i = 0; i < NDADDR; i++) 528 if (ip->i_db[i] != oip->i_db[i]) 529 panic("itrunc2"); 530 /* END PARANOIA */ 531 oip->i_blocks -= blocksreleased; 532 if (oip->i_blocks < 0) /* sanity */ 533 oip->i_blocks = 0; 534 oip->i_flag |= ICHG; 535 #ifdef QUOTA 536 if (!getinoquota(oip)) 537 (void) chkdq(oip, -blocksreleased, NOCRED, 0); 538 #endif 539 return (allerror); 540 } 541 542 /* 543 * Release blocks associated with the inode ip and 544 * stored in the indirect block bn. Blocks are free'd 545 * in LIFO order up to (but not including) lastbn. If 546 * level is greater than SINGLE, the block is an indirect 547 * block and recursive calls to indirtrunc must be used to 548 * cleanse other indirect blocks. 549 * 550 * NB: triple indirect blocks are untested. 551 */ 552 indirtrunc(ip, bn, lastbn, level, countp) 553 register struct inode *ip; 554 daddr_t bn, lastbn; 555 int level; 556 long *countp; 557 { 558 register int i; 559 struct buf *bp; 560 register struct fs *fs = ip->i_fs; 561 register daddr_t *bap; 562 daddr_t *copy, nb, last; 563 long blkcount, factor; 564 int nblocks, blocksreleased = 0; 565 int error, allerror = 0; 566 567 /* 568 * Calculate index in current block of last 569 * block to be kept. -1 indicates the entire 570 * block so we need not calculate the index. 571 */ 572 factor = 1; 573 for (i = SINGLE; i < level; i++) 574 factor *= NINDIR(fs); 575 last = lastbn; 576 if (lastbn > 0) 577 last /= factor; 578 nblocks = btodb(fs->fs_bsize); 579 /* 580 * Get buffer of block pointers, zero those 581 * entries corresponding to blocks to be free'd, 582 * and update on disk copy first. 583 */ 584 error = bread(ip->i_devvp, fsbtodb(fs, bn), (int)fs->fs_bsize, 585 NOCRED, &bp); 586 if (error) { 587 brelse(bp); 588 *countp = 0; 589 return (error); 590 } 591 bap = bp->b_un.b_daddr; 592 MALLOC(copy, daddr_t *, fs->fs_bsize, M_TEMP, M_WAITOK); 593 bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->fs_bsize); 594 bzero((caddr_t)&bap[last + 1], 595 (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t)); 596 if (last == -1) 597 bp->b_flags |= B_INVAL; 598 error = bwrite(bp); 599 if (error) 600 allerror = error; 601 bap = copy; 602 603 /* 604 * Recursively free totally unused blocks. 605 */ 606 for (i = NINDIR(fs) - 1; i > last; i--) { 607 nb = bap[i]; 608 if (nb == 0) 609 continue; 610 if (level > SINGLE) { 611 error = indirtrunc(ip, nb, (daddr_t)-1, level - 1, 612 &blkcount); 613 if (error) 614 allerror = error; 615 blocksreleased += blkcount; 616 } 617 blkfree(ip, nb, (off_t)fs->fs_bsize); 618 blocksreleased += nblocks; 619 } 620 621 /* 622 * Recursively free last partial block. 623 */ 624 if (level > SINGLE && lastbn >= 0) { 625 last = lastbn % factor; 626 nb = bap[i]; 627 if (nb != 0) { 628 error = indirtrunc(ip, nb, last, level - 1, &blkcount); 629 if (error) 630 allerror = error; 631 blocksreleased += blkcount; 632 } 633 } 634 FREE(copy, M_TEMP); 635 *countp = blocksreleased; 636 return (allerror); 637 } 638 639 /* 640 * Lock an inode. If its already locked, set the WANT bit and sleep. 641 */ 642 ilock(ip) 643 register struct inode *ip; 644 { 645 646 while (ip->i_flag & ILOCKED) { 647 ip->i_flag |= IWANT; 648 if (ip->i_spare0 == u.u_procp->p_pid) 649 panic("locking against myself"); 650 ip->i_spare1 = u.u_procp->p_pid; 651 (void) sleep((caddr_t)ip, PINOD); 652 } 653 ip->i_spare1 = 0; 654 ip->i_spare0 = u.u_procp->p_pid; 655 u.u_spare[0]++; 656 ip->i_flag |= ILOCKED; 657 } 658 659 /* 660 * Unlock an inode. If WANT bit is on, wakeup. 661 */ 662 iunlock(ip) 663 register struct inode *ip; 664 { 665 666 if ((ip->i_flag & ILOCKED) == 0) 667 vprint("iunlock: unlocked inode", ITOV(ip)); 668 ip->i_spare0 = 0; 669 u.u_spare[0]--; 670 ip->i_flag &= ~ILOCKED; 671 if (ip->i_flag&IWANT) { 672 ip->i_flag &= ~IWANT; 673 wakeup((caddr_t)ip); 674 } 675 } 676