1 /* 2 * Copyright (c) 1982 Regents of the University of California. 3 * All rights reserved. The Berkeley software License Agreement 4 * specifies the terms and conditions for redistribution. 5 * 6 * @(#)lfs_inode.c 6.17 (Berkeley) 09/04/85 7 */ 8 9 #include "param.h" 10 #include "systm.h" 11 #include "mount.h" 12 #include "dir.h" 13 #include "user.h" 14 #include "inode.h" 15 #include "fs.h" 16 #include "buf.h" 17 #include "cmap.h" 18 #ifdef QUOTA 19 #include "quota.h" 20 #endif 21 #include "kernel.h" 22 23 #define INOHSZ 512 24 #if ((INOHSZ&(INOHSZ-1)) == 0) 25 #define INOHASH(dev,ino) (((dev)+(ino))&(INOHSZ-1)) 26 #else 27 #define INOHASH(dev,ino) (((unsigned)((dev)+(ino)))%INOHSZ) 28 #endif 29 30 union ihead { /* inode LRU cache, Chris Maltby */ 31 union ihead *ih_head[2]; 32 struct inode *ih_chain[2]; 33 } ihead[INOHSZ]; 34 35 struct inode *ifreeh, **ifreet; 36 37 /* 38 * Initialize hash links for inodes 39 * and build inode free list. 40 */ 41 ihinit() 42 { 43 register int i; 44 register struct inode *ip = inode; 45 register union ihead *ih = ihead; 46 47 for (i = INOHSZ; --i >= 0; ih++) { 48 ih->ih_head[0] = ih; 49 ih->ih_head[1] = ih; 50 } 51 ifreeh = ip; 52 ifreet = &ip->i_freef; 53 ip->i_freeb = &ifreeh; 54 ip->i_forw = ip; 55 ip->i_back = ip; 56 for (i = ninode; --i > 0; ) { 57 ++ip; 58 ip->i_forw = ip; 59 ip->i_back = ip; 60 *ifreet = ip; 61 ip->i_freeb = ifreet; 62 ifreet = &ip->i_freef; 63 } 64 ip->i_freef = NULL; 65 } 66 67 #ifdef notdef 68 /* 69 * Find an inode if it is incore. 70 * This is the equivalent, for inodes, 71 * of ``incore'' in bio.c or ``pfind'' in subr.c. 72 */ 73 struct inode * 74 ifind(dev, ino) 75 dev_t dev; 76 ino_t ino; 77 { 78 register struct inode *ip; 79 register union ihead *ih; 80 81 ih = &ihead[INOHASH(dev, ino)]; 82 for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw) 83 if (ino==ip->i_number && dev==ip->i_dev) 84 return (ip); 85 return ((struct inode *)0); 86 } 87 #endif notdef 88 89 /* 90 * Look up an inode by device,inumber. 91 * If it is in core (in the inode structure), 92 * honor the locking protocol. 93 * If it is not in core, read it in from the 94 * specified device. 95 * If the inode is mounted on, perform 96 * the indicated indirection. 97 * In all cases, a pointer to a locked 98 * inode structure is returned. 99 * 100 * panic: no imt -- if the mounted file 101 * system is not in the mount table. 102 * "cannot happen" 103 */ 104 struct inode * 105 iget(dev, fs, ino) 106 dev_t dev; 107 register struct fs *fs; 108 ino_t ino; 109 { 110 register struct inode *ip; 111 register union ihead *ih; 112 register struct mount *mp; 113 register struct buf *bp; 114 register struct dinode *dp; 115 register struct inode *iq; 116 117 118 loop: 119 ih = &ihead[INOHASH(dev, ino)]; 120 for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw) 121 if (ino == ip->i_number && dev == ip->i_dev) { 122 /* 123 * Following is essentially an inline expanded 124 * copy of igrab(), expanded inline for speed, 125 * and so that the test for a mounted on inode 126 * can be deferred until after we are sure that 127 * the inode isn't busy. 128 */ 129 if ((ip->i_flag&ILOCKED) != 0) { 130 ip->i_flag |= IWANT; 131 sleep((caddr_t)ip, PINOD); 132 goto loop; 133 } 134 if ((ip->i_flag&IMOUNT) != 0) { 135 for (mp = &mount[0]; mp < &mount[NMOUNT]; mp++) 136 if(mp->m_inodp == ip) { 137 dev = mp->m_dev; 138 fs = mp->m_bufp->b_un.b_fs; 139 ino = ROOTINO; 140 goto loop; 141 } 142 panic("no imt"); 143 } 144 if (ip->i_count == 0) { /* ino on free list */ 145 if (iq = ip->i_freef) 146 iq->i_freeb = ip->i_freeb; 147 else 148 ifreet = ip->i_freeb; 149 *ip->i_freeb = iq; 150 ip->i_freef = NULL; 151 ip->i_freeb = NULL; 152 } 153 ip->i_count++; 154 ip->i_flag |= ILOCKED; 155 return(ip); 156 } 157 158 if ((ip = ifreeh) == NULL) { 159 tablefull("inode"); 160 u.u_error = ENFILE; 161 return(NULL); 162 } 163 if (ip->i_count) 164 panic("free inode isn't"); 165 if (iq = ip->i_freef) 166 iq->i_freeb = &ifreeh; 167 ifreeh = iq; 168 ip->i_freef = NULL; 169 ip->i_freeb = NULL; 170 /* 171 * Now to take inode off the hash chain it was on 172 * (initially, or after an iflush, it is on a "hash chain" 173 * consisting entirely of itself, and pointed to by no-one, 174 * but that doesn't matter), and put it on the chain for 175 * its new (ino, dev) pair 176 */ 177 remque(ip); 178 insque(ip, ih); 179 ip->i_dev = dev; 180 ip->i_fs = fs; 181 ip->i_number = ino; 182 cacheinval(ip); 183 ip->i_flag = ILOCKED; 184 ip->i_count++; 185 ip->i_lastr = 0; 186 #ifdef QUOTA 187 dqrele(ip->i_dquot); 188 #endif 189 bp = bread(dev, fsbtodb(fs, itod(fs, ino)), (int)fs->fs_bsize); 190 /* 191 * Check I/O errors 192 */ 193 if ((bp->b_flags&B_ERROR) != 0) { 194 brelse(bp); 195 /* 196 * the inode doesn't contain anything useful, so it would 197 * be misleading to leave it on its hash chain. 198 * 'iput' will take care of putting it back on the free list. 199 */ 200 remque(ip); 201 ip->i_forw = ip; 202 ip->i_back = ip; 203 /* 204 * we also loose its inumber, just in case (as iput 205 * doesn't do that any more) - but as it isn't on its 206 * hash chain, I doubt if this is really necessary .. kre 207 * (probably the two methods are interchangable) 208 */ 209 ip->i_number = 0; 210 #ifdef QUOTA 211 ip->i_dquot = NODQUOT; 212 #endif 213 iput(ip); 214 return(NULL); 215 } 216 dp = bp->b_un.b_dino; 217 dp += itoo(fs, ino); 218 ip->i_ic = dp->di_ic; 219 brelse(bp); 220 #ifdef QUOTA 221 if (ip->i_mode == 0) 222 ip->i_dquot = NODQUOT; 223 else 224 ip->i_dquot = inoquota(ip); 225 #endif 226 return (ip); 227 } 228 229 /* 230 * Convert a pointer to an inode into a reference to an inode. 231 * 232 * This is basically the internal piece of iget (after the 233 * inode pointer is located) but without the test for mounted 234 * filesystems. It is caller's responsibility to check that 235 * the inode pointer is valid. 236 */ 237 igrab(ip) 238 register struct inode *ip; 239 { 240 while ((ip->i_flag&ILOCKED) != 0) { 241 ip->i_flag |= IWANT; 242 sleep((caddr_t)ip, PINOD); 243 } 244 if (ip->i_count == 0) { /* ino on free list */ 245 register struct inode *iq; 246 247 if (iq = ip->i_freef) 248 iq->i_freeb = ip->i_freeb; 249 else 250 ifreet = ip->i_freeb; 251 *ip->i_freeb = iq; 252 ip->i_freef = NULL; 253 ip->i_freeb = NULL; 254 } 255 ip->i_count++; 256 ip->i_flag |= ILOCKED; 257 } 258 259 /* 260 * Decrement reference count of 261 * an inode structure. 262 * On the last reference, 263 * write the inode out and if necessary, 264 * truncate and deallocate the file. 265 */ 266 iput(ip) 267 register struct inode *ip; 268 { 269 270 if ((ip->i_flag & ILOCKED) == 0) 271 panic("iput"); 272 IUNLOCK(ip); 273 irele(ip); 274 } 275 276 irele(ip) 277 register struct inode *ip; 278 { 279 int mode; 280 281 if (ip->i_count == 1) { 282 ip->i_flag |= ILOCKED; 283 if (ip->i_nlink <= 0 && ip->i_fs->fs_ronly == 0) { 284 itrunc(ip, (u_long)0); 285 mode = ip->i_mode; 286 ip->i_mode = 0; 287 ip->i_rdev = 0; 288 ip->i_flag |= IUPD|ICHG; 289 ifree(ip, ip->i_number, mode); 290 #ifdef QUOTA 291 (void) chkiq(ip->i_dev, ip, ip->i_uid, 0); 292 dqrele(ip->i_dquot); 293 ip->i_dquot = NODQUOT; 294 #endif 295 } 296 IUPDAT(ip, &time, &time, 0); 297 IUNLOCK(ip); 298 ip->i_flag = 0; 299 /* 300 * Put the inode on the end of the free list. 301 * Possibly in some cases it would be better to 302 * put the inode at the head of the free list, 303 * (eg: where i_mode == 0 || i_number == 0) 304 * but I will think about that later .. kre 305 * (i_number is rarely 0 - only after an i/o error in iget, 306 * where i_mode == 0, the inode will probably be wanted 307 * again soon for an ialloc, so possibly we should keep it) 308 */ 309 if (ifreeh) { 310 *ifreet = ip; 311 ip->i_freeb = ifreet; 312 } else { 313 ifreeh = ip; 314 ip->i_freeb = &ifreeh; 315 } 316 ip->i_freef = NULL; 317 ifreet = &ip->i_freef; 318 } else if (!(ip->i_flag & ILOCKED)) 319 ITIMES(ip, &time, &time); 320 ip->i_count--; 321 } 322 323 /* 324 * Check accessed and update flags on 325 * an inode structure. 326 * If any is on, update the inode 327 * with the current time. 328 * If waitfor is given, then must insure 329 * i/o order so wait for write to complete. 330 */ 331 iupdat(ip, ta, tm, waitfor) 332 register struct inode *ip; 333 struct timeval *ta, *tm; 334 int waitfor; 335 { 336 register struct buf *bp; 337 struct dinode *dp; 338 register struct fs *fp; 339 340 fp = ip->i_fs; 341 if ((ip->i_flag & (IUPD|IACC|ICHG|IMOD)) != 0) { 342 if (fp->fs_ronly) 343 return; 344 bp = bread(ip->i_dev, fsbtodb(fp, itod(fp, ip->i_number)), 345 (int)fp->fs_bsize); 346 if (bp->b_flags & B_ERROR) { 347 brelse(bp); 348 return; 349 } 350 if (ip->i_flag&IACC) 351 ip->i_atime = ta->tv_sec; 352 if (ip->i_flag&IUPD) 353 ip->i_mtime = tm->tv_sec; 354 if (ip->i_flag&ICHG) 355 ip->i_ctime = time.tv_sec; 356 ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD); 357 dp = bp->b_un.b_dino + itoo(fp, ip->i_number); 358 dp->di_ic = ip->i_ic; 359 if (waitfor) 360 bwrite(bp); 361 else 362 bdwrite(bp); 363 } 364 } 365 366 #define SINGLE 0 /* index of single indirect block */ 367 #define DOUBLE 1 /* index of double indirect block */ 368 #define TRIPLE 2 /* index of triple indirect block */ 369 /* 370 * Truncate the inode ip to at most 371 * length size. Free affected disk 372 * blocks -- the blocks of the file 373 * are removed in reverse order. 374 * 375 * NB: triple indirect blocks are untested. 376 */ 377 itrunc(oip, length) 378 register struct inode *oip; 379 u_long length; 380 { 381 register daddr_t lastblock; 382 daddr_t bn, lastiblock[NIADDR]; 383 register struct fs *fs; 384 register struct inode *ip; 385 struct buf *bp; 386 int offset, lbn, osize, size, count, level, s; 387 long nblocks, blocksreleased = 0; 388 register int i; 389 dev_t dev; 390 struct inode tip; 391 extern long indirtrunc(); 392 extern struct cmap *mfind(); 393 394 if (oip->i_size <= length) { 395 oip->i_flag |= ICHG|IUPD; 396 iupdat(oip, &time, &time, 1); 397 return; 398 } 399 /* 400 * Calculate index into inode's block list of 401 * last direct and indirect blocks (if any) 402 * which we want to keep. Lastblock is -1 when 403 * the file is truncated to 0. 404 */ 405 fs = oip->i_fs; 406 lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1; 407 lastiblock[SINGLE] = lastblock - NDADDR; 408 lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs); 409 lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs); 410 nblocks = btodb(fs->fs_bsize); 411 /* 412 * Update the size of the file. If the file is not being 413 * truncated to a block boundry, the contents of the 414 * partial block following the end of the file must be 415 * zero'ed in case it ever become accessable again because 416 * of subsequent file growth. 417 */ 418 osize = oip->i_size; 419 offset = blkoff(fs, length); 420 if (offset == 0) { 421 oip->i_size = length; 422 } else { 423 lbn = lblkno(fs, length); 424 bn = fsbtodb(fs, bmap(oip, lbn, B_WRITE, offset)); 425 if (u.u_error || (long)bn < 0) 426 return; 427 oip->i_size = length; 428 size = blksize(fs, oip, lbn); 429 count = howmany(size, DEV_BSIZE); 430 dev = oip->i_dev; 431 s = splimp(); 432 for (i = 0; i < count; i += CLSIZE) 433 if (mfind(dev, bn + i)) 434 munhash(dev, bn + i); 435 splx(s); 436 bp = bread(dev, bn, size); 437 if (bp->b_flags & B_ERROR) { 438 u.u_error = EIO; 439 oip->i_size = osize; 440 brelse(bp); 441 return; 442 } 443 bzero(bp->b_un.b_addr + offset, size - offset); 444 bdwrite(bp); 445 } 446 /* 447 * Update file and block pointers 448 * on disk before we start freeing blocks. 449 * If we crash before free'ing blocks below, 450 * the blocks will be returned to the free list. 451 * lastiblock values are also normalized to -1 452 * for calls to indirtrunc below. 453 */ 454 tip = *oip; 455 tip.i_size = osize; 456 for (level = TRIPLE; level >= SINGLE; level--) 457 if (lastiblock[level] < 0) { 458 oip->i_ib[level] = 0; 459 lastiblock[level] = -1; 460 } 461 for (i = NDADDR - 1; i > lastblock; i--) 462 oip->i_db[i] = 0; 463 oip->i_flag |= ICHG|IUPD; 464 syncip(oip); 465 466 /* 467 * Indirect blocks first. 468 */ 469 ip = &tip; 470 for (level = TRIPLE; level >= SINGLE; level--) { 471 bn = ip->i_ib[level]; 472 if (bn != 0) { 473 blocksreleased += 474 indirtrunc(ip, bn, lastiblock[level], level); 475 if (lastiblock[level] < 0) { 476 ip->i_ib[level] = 0; 477 free(ip, bn, (off_t)fs->fs_bsize); 478 blocksreleased += nblocks; 479 } 480 } 481 if (lastiblock[level] >= 0) 482 goto done; 483 } 484 485 /* 486 * All whole direct blocks or frags. 487 */ 488 for (i = NDADDR - 1; i > lastblock; i--) { 489 register int bsize; 490 491 bn = ip->i_db[i]; 492 if (bn == 0) 493 continue; 494 ip->i_db[i] = 0; 495 bsize = (off_t)blksize(fs, ip, i); 496 free(ip, bn, bsize); 497 blocksreleased += btodb(bsize); 498 } 499 if (lastblock < 0) 500 goto done; 501 502 /* 503 * Finally, look for a change in size of the 504 * last direct block; release any frags. 505 */ 506 bn = ip->i_db[lastblock]; 507 if (bn != 0) { 508 int oldspace, newspace; 509 510 /* 511 * Calculate amount of space we're giving 512 * back as old block size minus new block size. 513 */ 514 oldspace = blksize(fs, ip, lastblock); 515 ip->i_size = length; 516 newspace = blksize(fs, ip, lastblock); 517 if (newspace == 0) 518 panic("itrunc: newspace"); 519 if (oldspace - newspace > 0) { 520 /* 521 * Block number of space to be free'd is 522 * the old block # plus the number of frags 523 * required for the storage we're keeping. 524 */ 525 bn += numfrags(fs, newspace); 526 free(ip, bn, oldspace - newspace); 527 blocksreleased += btodb(oldspace - newspace); 528 } 529 } 530 done: 531 /* BEGIN PARANOIA */ 532 for (level = SINGLE; level <= TRIPLE; level++) 533 if (ip->i_ib[level] != oip->i_ib[level]) 534 panic("itrunc1"); 535 for (i = 0; i < NDADDR; i++) 536 if (ip->i_db[i] != oip->i_db[i]) 537 panic("itrunc2"); 538 /* END PARANOIA */ 539 oip->i_blocks -= blocksreleased; 540 if (oip->i_blocks < 0) /* sanity */ 541 oip->i_blocks = 0; 542 oip->i_flag |= ICHG; 543 #ifdef QUOTA 544 (void) chkdq(oip, -blocksreleased, 0); 545 #endif 546 } 547 548 /* 549 * Release blocks associated with the inode ip and 550 * stored in the indirect block bn. Blocks are free'd 551 * in LIFO order up to (but not including) lastbn. If 552 * level is greater than SINGLE, the block is an indirect 553 * block and recursive calls to indirtrunc must be used to 554 * cleanse other indirect blocks. 555 * 556 * NB: triple indirect blocks are untested. 557 */ 558 long 559 indirtrunc(ip, bn, lastbn, level) 560 register struct inode *ip; 561 daddr_t bn, lastbn; 562 int level; 563 { 564 register int i; 565 struct buf *bp, *copy; 566 register daddr_t *bap; 567 register struct fs *fs = ip->i_fs; 568 daddr_t nb, last; 569 long factor; 570 int blocksreleased = 0, nblocks; 571 572 /* 573 * Calculate index in current block of last 574 * block to be kept. -1 indicates the entire 575 * block so we need not calculate the index. 576 */ 577 factor = 1; 578 for (i = SINGLE; i < level; i++) 579 factor *= NINDIR(fs); 580 last = lastbn; 581 if (lastbn > 0) 582 last /= factor; 583 nblocks = btodb(fs->fs_bsize); 584 /* 585 * Get buffer of block pointers, zero those 586 * entries corresponding to blocks to be free'd, 587 * and update on disk copy first. 588 */ 589 copy = geteblk((int)fs->fs_bsize); 590 bp = bread(ip->i_dev, fsbtodb(fs, bn), (int)fs->fs_bsize); 591 if (bp->b_flags&B_ERROR) { 592 brelse(copy); 593 brelse(bp); 594 return (0); 595 } 596 bap = bp->b_un.b_daddr; 597 bcopy((caddr_t)bap, (caddr_t)copy->b_un.b_daddr, (u_int)fs->fs_bsize); 598 bzero((caddr_t)&bap[last + 1], 599 (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t)); 600 bwrite(bp); 601 bp = copy, bap = bp->b_un.b_daddr; 602 603 /* 604 * Recursively free totally unused blocks. 605 */ 606 for (i = NINDIR(fs) - 1; i > last; i--) { 607 nb = bap[i]; 608 if (nb == 0) 609 continue; 610 if (level > SINGLE) 611 blocksreleased += 612 indirtrunc(ip, nb, (daddr_t)-1, level - 1); 613 free(ip, nb, (int)fs->fs_bsize); 614 blocksreleased += nblocks; 615 } 616 617 /* 618 * Recursively free last partial block. 619 */ 620 if (level > SINGLE && lastbn >= 0) { 621 last = lastbn % factor; 622 nb = bap[i]; 623 if (nb != 0) 624 blocksreleased += indirtrunc(ip, nb, last, level - 1); 625 } 626 brelse(bp); 627 return (blocksreleased); 628 } 629 630 /* 631 * remove any inodes in the inode cache belonging to dev 632 * 633 * There should not be any active ones, return error if any are found 634 * (nb: this is a user error, not a system err) 635 * 636 * Also, count the references to dev by block devices - this really 637 * has nothing to do with the object of the procedure, but as we have 638 * to scan the inode table here anyway, we might as well get the 639 * extra benefit. 640 * 641 * this is called from sumount()/sys3.c when dev is being unmounted 642 */ 643 #ifdef QUOTA 644 iflush(dev, iq) 645 dev_t dev; 646 struct inode *iq; 647 #else 648 iflush(dev) 649 dev_t dev; 650 #endif 651 { 652 register struct inode *ip; 653 register open = 0; 654 655 for (ip = inode; ip < inodeNINODE; ip++) { 656 #ifdef QUOTA 657 if (ip != iq && ip->i_dev == dev) 658 #else 659 if (ip->i_dev == dev) 660 #endif 661 if (ip->i_count) 662 return(-1); 663 else { 664 remque(ip); 665 ip->i_forw = ip; 666 ip->i_back = ip; 667 /* 668 * as i_count == 0, the inode was on the free 669 * list already, just leave it there, it will 670 * fall off the bottom eventually. We could 671 * perhaps move it to the head of the free 672 * list, but as umounts are done so 673 * infrequently, we would gain very little, 674 * while making the code bigger. 675 */ 676 #ifdef QUOTA 677 dqrele(ip->i_dquot); 678 ip->i_dquot = NODQUOT; 679 #endif 680 } 681 else if (ip->i_count && (ip->i_mode&IFMT)==IFBLK && 682 ip->i_rdev == dev) 683 open++; 684 } 685 return (open); 686 } 687 688 /* 689 * Lock an inode. If its already locked, set the WANT bit and sleep. 690 */ 691 ilock(ip) 692 register struct inode *ip; 693 { 694 695 ILOCK(ip); 696 } 697 698 /* 699 * Unlock an inode. If WANT bit is on, wakeup. 700 */ 701 iunlock(ip) 702 register struct inode *ip; 703 { 704 705 IUNLOCK(ip); 706 } 707