1 /* 2 * Copyright (c) 1982, 1986 Regents of the University of California. 3 * All rights reserved. The Berkeley software License Agreement 4 * specifies the terms and conditions for redistribution. 5 * 6 * @(#)lfs_inode.c 7.1.1.1 (Berkeley) 04/02/87 7 */ 8 9 #include "param.h" 10 #include "systm.h" 11 #include "mount.h" 12 #include "dir.h" 13 #include "user.h" 14 #include "inode.h" 15 #include "fs.h" 16 #include "buf.h" 17 #include "cmap.h" 18 #ifdef QUOTA 19 #include "quota.h" 20 #endif 21 #include "kernel.h" 22 23 #define INOHSZ 512 24 #if ((INOHSZ&(INOHSZ-1)) == 0) 25 #define INOHASH(dev,ino) (((dev)+(ino))&(INOHSZ-1)) 26 #else 27 #define INOHASH(dev,ino) (((unsigned)((dev)+(ino)))%INOHSZ) 28 #endif 29 30 union ihead { /* inode LRU cache, Chris Maltby */ 31 union ihead *ih_head[2]; 32 struct inode *ih_chain[2]; 33 } ihead[INOHSZ]; 34 35 struct inode *ifreeh, **ifreet; 36 37 /* 38 * Initialize hash links for inodes 39 * and build inode free list. 40 */ 41 ihinit() 42 { 43 register int i; 44 register struct inode *ip = inode; 45 register union ihead *ih = ihead; 46 47 for (i = INOHSZ; --i >= 0; ih++) { 48 ih->ih_head[0] = ih; 49 ih->ih_head[1] = ih; 50 } 51 ifreeh = ip; 52 ifreet = &ip->i_freef; 53 ip->i_freeb = &ifreeh; 54 ip->i_forw = ip; 55 ip->i_back = ip; 56 for (i = ninode; --i > 0; ) { 57 ++ip; 58 ip->i_forw = ip; 59 ip->i_back = ip; 60 *ifreet = ip; 61 ip->i_freeb = ifreet; 62 ifreet = &ip->i_freef; 63 } 64 ip->i_freef = NULL; 65 } 66 67 #ifdef notdef 68 /* 69 * Find an inode if it is incore. 70 * This is the equivalent, for inodes, 71 * of ``incore'' in bio.c or ``pfind'' in subr.c. 72 */ 73 struct inode * 74 ifind(dev, ino) 75 dev_t dev; 76 ino_t ino; 77 { 78 register struct inode *ip; 79 register union ihead *ih; 80 81 ih = &ihead[INOHASH(dev, ino)]; 82 for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw) 83 if (ino==ip->i_number && dev==ip->i_dev) 84 return (ip); 85 return ((struct inode *)0); 86 } 87 #endif notdef 88 89 /* 90 * Look up an inode by device,inumber. 91 * If it is in core (in the inode structure), 92 * honor the locking protocol. 93 * If it is not in core, read it in from the 94 * specified device. 95 * If the inode is mounted on, perform 96 * the indicated indirection. 97 * In all cases, a pointer to a locked 98 * inode structure is returned. 99 * 100 * panic: no imt -- if the mounted file 101 * system is not in the mount table. 102 * "cannot happen" 103 */ 104 struct inode * 105 iget(dev, fs, ino) 106 dev_t dev; 107 register struct fs *fs; 108 ino_t ino; 109 { 110 register struct inode *ip; 111 register union ihead *ih; 112 register struct mount *mp; 113 register struct buf *bp; 114 register struct dinode *dp; 115 register struct inode *iq; 116 117 loop: 118 ih = &ihead[INOHASH(dev, ino)]; 119 for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw) 120 if (ino == ip->i_number && dev == ip->i_dev) { 121 /* 122 * Following is essentially an inline expanded 123 * copy of igrab(), expanded inline for speed, 124 * and so that the test for a mounted on inode 125 * can be deferred until after we are sure that 126 * the inode isn't busy. 127 */ 128 if ((ip->i_flag&ILOCKED) != 0) { 129 ip->i_flag |= IWANT; 130 sleep((caddr_t)ip, PINOD); 131 goto loop; 132 } 133 if ((ip->i_flag&IMOUNT) != 0) { 134 for (mp = &mount[0]; mp < &mount[NMOUNT]; mp++) 135 if(mp->m_inodp == ip) { 136 dev = mp->m_dev; 137 fs = mp->m_bufp->b_un.b_fs; 138 ino = ROOTINO; 139 goto loop; 140 } 141 panic("no imt"); 142 } 143 if (ip->i_count == 0) { /* ino on free list */ 144 if (iq = ip->i_freef) 145 iq->i_freeb = ip->i_freeb; 146 else 147 ifreet = ip->i_freeb; 148 *ip->i_freeb = iq; 149 ip->i_freef = NULL; 150 ip->i_freeb = NULL; 151 } 152 ip->i_count++; 153 ip->i_flag |= ILOCKED; 154 return(ip); 155 } 156 157 if ((ip = ifreeh) == NULL) { 158 tablefull("inode"); 159 u.u_error = ENFILE; 160 return(NULL); 161 } 162 if (ip->i_count) 163 panic("free inode isn't"); 164 if (iq = ip->i_freef) 165 iq->i_freeb = &ifreeh; 166 ifreeh = iq; 167 ip->i_freef = NULL; 168 ip->i_freeb = NULL; 169 /* 170 * Now to take inode off the hash chain it was on 171 * (initially, or after an iflush, it is on a "hash chain" 172 * consisting entirely of itself, and pointed to by no-one, 173 * but that doesn't matter), and put it on the chain for 174 * its new (ino, dev) pair 175 */ 176 remque(ip); 177 insque(ip, ih); 178 ip->i_dev = dev; 179 ip->i_fs = fs; 180 ip->i_number = ino; 181 cacheinval(ip); 182 ip->i_flag = ILOCKED; 183 ip->i_count++; 184 ip->i_lastr = 0; 185 #ifdef QUOTA 186 dqrele(ip->i_dquot); 187 #endif 188 #ifdef SECSIZE 189 bp = bread(dev, fsbtodb(fs, itod(fs, ino)), (int)fs->fs_bsize, 190 fs->fs_dbsize); 191 #else SECSIZE 192 bp = bread(dev, fsbtodb(fs, itod(fs, ino)), (int)fs->fs_bsize); 193 #endif SECSIZE 194 /* 195 * Check I/O errors 196 */ 197 if ((bp->b_flags&B_ERROR) != 0) { 198 brelse(bp); 199 /* 200 * the inode doesn't contain anything useful, so it would 201 * be misleading to leave it on its hash chain. 202 * 'iput' will take care of putting it back on the free list. 203 */ 204 remque(ip); 205 ip->i_forw = ip; 206 ip->i_back = ip; 207 /* 208 * we also loose its inumber, just in case (as iput 209 * doesn't do that any more) - but as it isn't on its 210 * hash chain, I doubt if this is really necessary .. kre 211 * (probably the two methods are interchangable) 212 */ 213 ip->i_number = 0; 214 #ifdef QUOTA 215 ip->i_dquot = NODQUOT; 216 #endif 217 iput(ip); 218 return(NULL); 219 } 220 dp = bp->b_un.b_dino; 221 dp += itoo(fs, ino); 222 ip->i_ic = dp->di_ic; 223 brelse(bp); 224 #ifdef QUOTA 225 if (ip->i_mode == 0) 226 ip->i_dquot = NODQUOT; 227 else 228 ip->i_dquot = inoquota(ip); 229 #endif 230 return (ip); 231 } 232 233 /* 234 * Convert a pointer to an inode into a reference to an inode. 235 * 236 * This is basically the internal piece of iget (after the 237 * inode pointer is located) but without the test for mounted 238 * filesystems. It is caller's responsibility to check that 239 * the inode pointer is valid. 240 */ 241 igrab(ip) 242 register struct inode *ip; 243 { 244 while ((ip->i_flag&ILOCKED) != 0) { 245 ip->i_flag |= IWANT; 246 sleep((caddr_t)ip, PINOD); 247 } 248 if (ip->i_count == 0) { /* ino on free list */ 249 register struct inode *iq; 250 251 if (iq = ip->i_freef) 252 iq->i_freeb = ip->i_freeb; 253 else 254 ifreet = ip->i_freeb; 255 *ip->i_freeb = iq; 256 ip->i_freef = NULL; 257 ip->i_freeb = NULL; 258 } 259 ip->i_count++; 260 ip->i_flag |= ILOCKED; 261 } 262 263 /* 264 * Decrement reference count of 265 * an inode structure. 266 * On the last reference, 267 * write the inode out and if necessary, 268 * truncate and deallocate the file. 269 */ 270 iput(ip) 271 register struct inode *ip; 272 { 273 274 if ((ip->i_flag & ILOCKED) == 0) 275 panic("iput"); 276 IUNLOCK(ip); 277 irele(ip); 278 } 279 280 irele(ip) 281 register struct inode *ip; 282 { 283 int mode; 284 285 if (ip->i_count == 1) { 286 ip->i_flag |= ILOCKED; 287 if (ip->i_nlink <= 0 && ip->i_fs->fs_ronly == 0) { 288 itrunc(ip, (u_long)0); 289 mode = ip->i_mode; 290 ip->i_mode = 0; 291 ip->i_rdev = 0; 292 ip->i_flag |= IUPD|ICHG; 293 ifree(ip, ip->i_number, mode); 294 #ifdef QUOTA 295 (void) chkiq(ip->i_dev, ip, ip->i_uid, 0); 296 dqrele(ip->i_dquot); 297 ip->i_dquot = NODQUOT; 298 #endif 299 } 300 IUPDAT(ip, &time, &time, 0); 301 IUNLOCK(ip); 302 ip->i_flag = 0; 303 /* 304 * Put the inode on the end of the free list. 305 * Possibly in some cases it would be better to 306 * put the inode at the head of the free list, 307 * (eg: where i_mode == 0 || i_number == 0) 308 * but I will think about that later .. kre 309 * (i_number is rarely 0 - only after an i/o error in iget, 310 * where i_mode == 0, the inode will probably be wanted 311 * again soon for an ialloc, so possibly we should keep it) 312 */ 313 if (ifreeh) { 314 *ifreet = ip; 315 ip->i_freeb = ifreet; 316 } else { 317 ifreeh = ip; 318 ip->i_freeb = &ifreeh; 319 } 320 ip->i_freef = NULL; 321 ifreet = &ip->i_freef; 322 } else if (!(ip->i_flag & ILOCKED)) 323 ITIMES(ip, &time, &time); 324 ip->i_count--; 325 } 326 327 /* 328 * Check accessed and update flags on 329 * an inode structure. 330 * If any is on, update the inode 331 * with the current time. 332 * If waitfor is given, then must insure 333 * i/o order so wait for write to complete. 334 */ 335 iupdat(ip, ta, tm, waitfor) 336 register struct inode *ip; 337 struct timeval *ta, *tm; 338 int waitfor; 339 { 340 register struct buf *bp; 341 struct dinode *dp; 342 register struct fs *fs; 343 344 fs = ip->i_fs; 345 if ((ip->i_flag & (IUPD|IACC|ICHG|IMOD)) != 0) { 346 if (fs->fs_ronly) 347 return; 348 #ifdef SECSIZE 349 bp = bread(ip->i_dev, fsbtodb(fs, itod(fs, ip->i_number)), 350 (int)fs->fs_bsize, fs->fs_dbsize); 351 #else SECSIZE 352 bp = bread(ip->i_dev, fsbtodb(fs, itod(fs, ip->i_number)), 353 (int)fs->fs_bsize); 354 #endif SECSIZE 355 if (bp->b_flags & B_ERROR) { 356 brelse(bp); 357 return; 358 } 359 if (ip->i_flag&IACC) 360 ip->i_atime = ta->tv_sec; 361 if (ip->i_flag&IUPD) 362 ip->i_mtime = tm->tv_sec; 363 if (ip->i_flag&ICHG) 364 ip->i_ctime = time.tv_sec; 365 ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD); 366 dp = bp->b_un.b_dino + itoo(fs, ip->i_number); 367 dp->di_ic = ip->i_ic; 368 if (waitfor) 369 bwrite(bp); 370 else 371 bdwrite(bp); 372 } 373 } 374 375 #define SINGLE 0 /* index of single indirect block */ 376 #define DOUBLE 1 /* index of double indirect block */ 377 #define TRIPLE 2 /* index of triple indirect block */ 378 /* 379 * Truncate the inode ip to at most 380 * length size. Free affected disk 381 * blocks -- the blocks of the file 382 * are removed in reverse order. 383 * 384 * NB: triple indirect blocks are untested. 385 */ 386 itrunc(oip, length) 387 register struct inode *oip; 388 u_long length; 389 { 390 register daddr_t lastblock; 391 daddr_t bn, lbn, lastiblock[NIADDR]; 392 register struct fs *fs; 393 register struct inode *ip; 394 struct buf *bp; 395 int offset, osize, size, count, level; 396 long nblocks, blocksreleased = 0; 397 register int i; 398 dev_t dev; 399 struct inode tip; 400 extern long indirtrunc(); 401 402 if (oip->i_size <= length) { 403 oip->i_flag |= ICHG|IUPD; 404 iupdat(oip, &time, &time, 1); 405 return; 406 } 407 /* 408 * Calculate index into inode's block list of 409 * last direct and indirect blocks (if any) 410 * which we want to keep. Lastblock is -1 when 411 * the file is truncated to 0. 412 */ 413 fs = oip->i_fs; 414 lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1; 415 lastiblock[SINGLE] = lastblock - NDADDR; 416 lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs); 417 lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs); 418 nblocks = btodb(fs->fs_bsize); 419 /* 420 * Update the size of the file. If the file is not being 421 * truncated to a block boundry, the contents of the 422 * partial block following the end of the file must be 423 * zero'ed in case it ever become accessable again because 424 * of subsequent file growth. 425 */ 426 osize = oip->i_size; 427 offset = blkoff(fs, length); 428 if (offset == 0) { 429 oip->i_size = length; 430 } else { 431 lbn = lblkno(fs, length); 432 bn = fsbtodb(fs, bmap(oip, lbn, B_WRITE, offset)); 433 if (u.u_error || (long)bn < 0) 434 return; 435 oip->i_size = length; 436 size = blksize(fs, oip, lbn); 437 count = howmany(size, CLBYTES); 438 dev = oip->i_dev; 439 for (i = 0; i < count; i++) 440 #ifdef SECSIZE 441 munhash(dev, bn + i * CLBYTES / fs->fs_dbsize); 442 #else SECSIZE 443 munhash(dev, bn + i * CLBYTES / DEV_BSIZE); 444 #endif SECSIZE 445 bp = bread(dev, bn, size); 446 if (bp->b_flags & B_ERROR) { 447 u.u_error = EIO; 448 oip->i_size = osize; 449 brelse(bp); 450 return; 451 } 452 bzero(bp->b_un.b_addr + offset, (unsigned)(size - offset)); 453 bdwrite(bp); 454 } 455 /* 456 * Update file and block pointers 457 * on disk before we start freeing blocks. 458 * If we crash before free'ing blocks below, 459 * the blocks will be returned to the free list. 460 * lastiblock values are also normalized to -1 461 * for calls to indirtrunc below. 462 */ 463 tip = *oip; 464 tip.i_size = osize; 465 for (level = TRIPLE; level >= SINGLE; level--) 466 if (lastiblock[level] < 0) { 467 oip->i_ib[level] = 0; 468 lastiblock[level] = -1; 469 } 470 for (i = NDADDR - 1; i > lastblock; i--) 471 oip->i_db[i] = 0; 472 oip->i_flag |= ICHG|IUPD; 473 syncip(oip); 474 475 /* 476 * Indirect blocks first. 477 */ 478 ip = &tip; 479 for (level = TRIPLE; level >= SINGLE; level--) { 480 bn = ip->i_ib[level]; 481 if (bn != 0) { 482 blocksreleased += 483 indirtrunc(ip, bn, lastiblock[level], level); 484 if (lastiblock[level] < 0) { 485 ip->i_ib[level] = 0; 486 free(ip, bn, (off_t)fs->fs_bsize); 487 blocksreleased += nblocks; 488 } 489 } 490 if (lastiblock[level] >= 0) 491 goto done; 492 } 493 494 /* 495 * All whole direct blocks or frags. 496 */ 497 for (i = NDADDR - 1; i > lastblock; i--) { 498 register off_t bsize; 499 500 bn = ip->i_db[i]; 501 if (bn == 0) 502 continue; 503 ip->i_db[i] = 0; 504 bsize = (off_t)blksize(fs, ip, i); 505 free(ip, bn, bsize); 506 blocksreleased += btodb(bsize); 507 } 508 if (lastblock < 0) 509 goto done; 510 511 /* 512 * Finally, look for a change in size of the 513 * last direct block; release any frags. 514 */ 515 bn = ip->i_db[lastblock]; 516 if (bn != 0) { 517 off_t oldspace, newspace; 518 519 /* 520 * Calculate amount of space we're giving 521 * back as old block size minus new block size. 522 */ 523 oldspace = blksize(fs, ip, lastblock); 524 ip->i_size = length; 525 newspace = blksize(fs, ip, lastblock); 526 if (newspace == 0) 527 panic("itrunc: newspace"); 528 if (oldspace - newspace > 0) { 529 /* 530 * Block number of space to be free'd is 531 * the old block # plus the number of frags 532 * required for the storage we're keeping. 533 */ 534 bn += numfrags(fs, newspace); 535 free(ip, bn, oldspace - newspace); 536 blocksreleased += btodb(oldspace - newspace); 537 } 538 } 539 done: 540 /* BEGIN PARANOIA */ 541 for (level = SINGLE; level <= TRIPLE; level++) 542 if (ip->i_ib[level] != oip->i_ib[level]) 543 panic("itrunc1"); 544 for (i = 0; i < NDADDR; i++) 545 if (ip->i_db[i] != oip->i_db[i]) 546 panic("itrunc2"); 547 /* END PARANOIA */ 548 oip->i_blocks -= blocksreleased; 549 if (oip->i_blocks < 0) /* sanity */ 550 oip->i_blocks = 0; 551 oip->i_flag |= ICHG; 552 #ifdef QUOTA 553 (void) chkdq(oip, -blocksreleased, 0); 554 #endif 555 } 556 557 /* 558 * Release blocks associated with the inode ip and 559 * stored in the indirect block bn. Blocks are free'd 560 * in LIFO order up to (but not including) lastbn. If 561 * level is greater than SINGLE, the block is an indirect 562 * block and recursive calls to indirtrunc must be used to 563 * cleanse other indirect blocks. 564 * 565 * NB: triple indirect blocks are untested. 566 */ 567 long 568 indirtrunc(ip, bn, lastbn, level) 569 register struct inode *ip; 570 daddr_t bn, lastbn; 571 int level; 572 { 573 register int i; 574 struct buf *bp, *copy; 575 register daddr_t *bap; 576 register struct fs *fs = ip->i_fs; 577 daddr_t nb, last; 578 long factor; 579 int blocksreleased = 0, nblocks; 580 581 /* 582 * Calculate index in current block of last 583 * block to be kept. -1 indicates the entire 584 * block so we need not calculate the index. 585 */ 586 factor = 1; 587 for (i = SINGLE; i < level; i++) 588 factor *= NINDIR(fs); 589 last = lastbn; 590 if (lastbn > 0) 591 last /= factor; 592 nblocks = btodb(fs->fs_bsize); 593 /* 594 * Get buffer of block pointers, zero those 595 * entries corresponding to blocks to be free'd, 596 * and update on disk copy first. 597 */ 598 copy = geteblk((int)fs->fs_bsize); 599 #ifdef SECSIZE 600 bp = bread(ip->i_dev, fsbtodb(fs, bn), (int)fs->fs_bsize, 601 fs->fs_dbsize); 602 #else SECSIZE 603 bp = bread(ip->i_dev, fsbtodb(fs, bn), (int)fs->fs_bsize); 604 #endif SECSIZE 605 if (bp->b_flags&B_ERROR) { 606 brelse(copy); 607 brelse(bp); 608 return (0); 609 } 610 bap = bp->b_un.b_daddr; 611 bcopy((caddr_t)bap, (caddr_t)copy->b_un.b_daddr, (u_int)fs->fs_bsize); 612 bzero((caddr_t)&bap[last + 1], 613 (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t)); 614 bwrite(bp); 615 bp = copy, bap = bp->b_un.b_daddr; 616 617 /* 618 * Recursively free totally unused blocks. 619 */ 620 for (i = NINDIR(fs) - 1; i > last; i--) { 621 nb = bap[i]; 622 if (nb == 0) 623 continue; 624 if (level > SINGLE) 625 blocksreleased += 626 indirtrunc(ip, nb, (daddr_t)-1, level - 1); 627 free(ip, nb, (off_t)fs->fs_bsize); 628 blocksreleased += nblocks; 629 } 630 631 /* 632 * Recursively free last partial block. 633 */ 634 if (level > SINGLE && lastbn >= 0) { 635 last = lastbn % factor; 636 nb = bap[i]; 637 if (nb != 0) 638 blocksreleased += indirtrunc(ip, nb, last, level - 1); 639 } 640 brelse(bp); 641 return (blocksreleased); 642 } 643 644 /* 645 * Remove any inodes in the inode cache belonging to dev. 646 * 647 * There should not be any active ones, return error if any are found 648 * (nb: this is a user error, not a system err). 649 */ 650 #ifdef QUOTA 651 iflush(dev, iq) 652 dev_t dev; 653 struct inode *iq; 654 #else 655 iflush(dev) 656 dev_t dev; 657 #endif 658 { 659 register struct inode *ip; 660 661 for (ip = inode; ip < inodeNINODE; ip++) { 662 #ifdef QUOTA 663 if (ip != iq && ip->i_dev == dev) 664 #else 665 if (ip->i_dev == dev) 666 #endif 667 if (ip->i_count) 668 return (EBUSY); 669 else { 670 remque(ip); 671 ip->i_forw = ip; 672 ip->i_back = ip; 673 /* 674 * as i_count == 0, the inode was on the free 675 * list already, just leave it there, it will 676 * fall off the bottom eventually. We could 677 * perhaps move it to the head of the free 678 * list, but as umounts are done so 679 * infrequently, we would gain very little, 680 * while making the code bigger. 681 */ 682 #ifdef QUOTA 683 dqrele(ip->i_dquot); 684 ip->i_dquot = NODQUOT; 685 #endif 686 } 687 } 688 return (0); 689 } 690 691 /* 692 * Lock an inode. If its already locked, set the WANT bit and sleep. 693 */ 694 ilock(ip) 695 register struct inode *ip; 696 { 697 698 ILOCK(ip); 699 } 700 701 /* 702 * Unlock an inode. If WANT bit is on, wakeup. 703 */ 704 iunlock(ip) 705 register struct inode *ip; 706 { 707 708 IUNLOCK(ip); 709 } 710