1 /* lfs_inode.c 6.12 85/02/11 */ 2 3 #include "param.h" 4 #include "systm.h" 5 #include "mount.h" 6 #include "dir.h" 7 #include "user.h" 8 #include "inode.h" 9 #include "fs.h" 10 #include "conf.h" 11 #include "buf.h" 12 #ifdef QUOTA 13 #include "quota.h" 14 #endif 15 #include "kernel.h" 16 17 #define INOHSZ 512 18 #if ((INOHSZ&(INOHSZ-1)) == 0) 19 #define INOHASH(dev,ino) (((dev)+(ino))&(INOHSZ-1)) 20 #else 21 #define INOHASH(dev,ino) (((unsigned)((dev)+(ino)))%INOHSZ) 22 #endif 23 24 union ihead { /* inode LRU cache, Chris Maltby */ 25 union ihead *ih_head[2]; 26 struct inode *ih_chain[2]; 27 } ihead[INOHSZ]; 28 29 struct inode *ifreeh, **ifreet; 30 31 /* 32 * Initialize hash links for inodes 33 * and build inode free list. 34 */ 35 ihinit() 36 { 37 register int i; 38 register struct inode *ip = inode; 39 register union ihead *ih = ihead; 40 41 for (i = INOHSZ; --i >= 0; ih++) { 42 ih->ih_head[0] = ih; 43 ih->ih_head[1] = ih; 44 } 45 ifreeh = ip; 46 ifreet = &ip->i_freef; 47 ip->i_freeb = &ifreeh; 48 ip->i_forw = ip; 49 ip->i_back = ip; 50 for (i = ninode; --i > 0; ) { 51 ++ip; 52 ip->i_forw = ip; 53 ip->i_back = ip; 54 *ifreet = ip; 55 ip->i_freeb = ifreet; 56 ifreet = &ip->i_freef; 57 } 58 ip->i_freef = NULL; 59 } 60 61 #ifdef notdef 62 /* 63 * Find an inode if it is incore. 64 * This is the equivalent, for inodes, 65 * of ``incore'' in bio.c or ``pfind'' in subr.c. 66 */ 67 struct inode * 68 ifind(dev, ino) 69 dev_t dev; 70 ino_t ino; 71 { 72 register struct inode *ip; 73 register union ihead *ih; 74 75 ih = &ihead[INOHASH(dev, ino)]; 76 for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw) 77 if (ino==ip->i_number && dev==ip->i_dev) 78 return (ip); 79 return ((struct inode *)0); 80 } 81 #endif notdef 82 83 /* 84 * Look up an inode by device,inumber. 85 * If it is in core (in the inode structure), 86 * honor the locking protocol. 87 * If it is not in core, read it in from the 88 * specified device. 89 * If the inode is mounted on, perform 90 * the indicated indirection. 91 * In all cases, a pointer to a locked 92 * inode structure is returned. 93 * 94 * panic: no imt -- if the mounted file 95 * system is not in the mount table. 96 * "cannot happen" 97 */ 98 struct inode * 99 iget(dev, fs, ino) 100 dev_t dev; 101 register struct fs *fs; 102 ino_t ino; 103 { 104 register struct inode *ip; 105 register union ihead *ih; 106 register struct mount *mp; 107 register struct buf *bp; 108 register struct dinode *dp; 109 register struct inode *iq; 110 111 112 loop: 113 ih = &ihead[INOHASH(dev, ino)]; 114 for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw) 115 if (ino == ip->i_number && dev == ip->i_dev) { 116 /* 117 * Following is essentially an inline expanded 118 * copy of igrab(), expanded inline for speed, 119 * and so that the test for a mounted on inode 120 * can be deferred until after we are sure that 121 * the inode isn't busy. 122 */ 123 if ((ip->i_flag&ILOCKED) != 0) { 124 ip->i_flag |= IWANT; 125 sleep((caddr_t)ip, PINOD); 126 goto loop; 127 } 128 if ((ip->i_flag&IMOUNT) != 0) { 129 for (mp = &mount[0]; mp < &mount[NMOUNT]; mp++) 130 if(mp->m_inodp == ip) { 131 dev = mp->m_dev; 132 fs = mp->m_bufp->b_un.b_fs; 133 ino = ROOTINO; 134 goto loop; 135 } 136 panic("no imt"); 137 } 138 if (ip->i_count == 0) { /* ino on free list */ 139 if (iq = ip->i_freef) 140 iq->i_freeb = ip->i_freeb; 141 else 142 ifreet = ip->i_freeb; 143 *ip->i_freeb = iq; 144 ip->i_freef = NULL; 145 ip->i_freeb = NULL; 146 } 147 ip->i_count++; 148 ip->i_flag |= ILOCKED; 149 return(ip); 150 } 151 152 if ((ip = ifreeh) == NULL) { 153 tablefull("inode"); 154 u.u_error = ENFILE; 155 return(NULL); 156 } 157 if (ip->i_count) 158 panic("free inode isn't"); 159 if (iq = ip->i_freef) 160 iq->i_freeb = &ifreeh; 161 ifreeh = iq; 162 ip->i_freef = NULL; 163 ip->i_freeb = NULL; 164 /* 165 * Now to take inode off the hash chain it was on 166 * (initially, or after an iflush, it is on a "hash chain" 167 * consisting entirely of itself, and pointed to by no-one, 168 * but that doesn't matter), and put it on the chain for 169 * its new (ino, dev) pair 170 */ 171 remque(ip); 172 insque(ip, ih); 173 ip->i_dev = dev; 174 ip->i_fs = fs; 175 ip->i_number = ino; 176 cacheinval(ip); 177 ip->i_flag = ILOCKED; 178 ip->i_count++; 179 ip->i_lastr = 0; 180 #ifdef QUOTA 181 dqrele(ip->i_dquot); 182 #endif 183 bp = bread(dev, fsbtodb(fs, itod(fs, ino)), (int)fs->fs_bsize); 184 /* 185 * Check I/O errors 186 */ 187 if ((bp->b_flags&B_ERROR) != 0) { 188 brelse(bp); 189 /* 190 * the inode doesn't contain anything useful, so it would 191 * be misleading to leave it on its hash chain. 192 * 'iput' will take care of putting it back on the free list. 193 */ 194 remque(ip); 195 ip->i_forw = ip; 196 ip->i_back = ip; 197 /* 198 * we also loose its inumber, just in case (as iput 199 * doesn't do that any more) - but as it isn't on its 200 * hash chain, I doubt if this is really necessary .. kre 201 * (probably the two methods are interchangable) 202 */ 203 ip->i_number = 0; 204 #ifdef QUOTA 205 ip->i_dquot = NODQUOT; 206 #endif 207 iput(ip); 208 return(NULL); 209 } 210 dp = bp->b_un.b_dino; 211 dp += itoo(fs, ino); 212 ip->i_ic = dp->di_ic; 213 brelse(bp); 214 #ifdef QUOTA 215 if (ip->i_mode == 0) 216 ip->i_dquot = NODQUOT; 217 else 218 ip->i_dquot = inoquota(ip); 219 #endif 220 return (ip); 221 } 222 223 /* 224 * Convert a pointer to an inode into a reference to an inode. 225 * 226 * This is basically the internal piece of iget (after the 227 * inode pointer is located) but without the test for mounted 228 * filesystems. It is caller's responsibility to check that 229 * the inode pointer is valid. 230 */ 231 igrab(ip) 232 register struct inode *ip; 233 { 234 while ((ip->i_flag&ILOCKED) != 0) { 235 ip->i_flag |= IWANT; 236 sleep((caddr_t)ip, PINOD); 237 } 238 if (ip->i_count == 0) { /* ino on free list */ 239 register struct inode *iq; 240 241 if (iq = ip->i_freef) 242 iq->i_freeb = ip->i_freeb; 243 else 244 ifreet = ip->i_freeb; 245 *ip->i_freeb = iq; 246 ip->i_freef = NULL; 247 ip->i_freeb = NULL; 248 } 249 ip->i_count++; 250 ip->i_flag |= ILOCKED; 251 } 252 253 /* 254 * Decrement reference count of 255 * an inode structure. 256 * On the last reference, 257 * write the inode out and if necessary, 258 * truncate and deallocate the file. 259 */ 260 iput(ip) 261 register struct inode *ip; 262 { 263 264 if ((ip->i_flag & ILOCKED) == 0) 265 panic("iput"); 266 IUNLOCK(ip); 267 irele(ip); 268 } 269 270 irele(ip) 271 register struct inode *ip; 272 { 273 int mode; 274 275 if (ip->i_count == 1) { 276 ip->i_flag |= ILOCKED; 277 if (ip->i_nlink <= 0) { 278 itrunc(ip, (u_long)0); 279 mode = ip->i_mode; 280 ip->i_mode = 0; 281 ip->i_rdev = 0; 282 ip->i_flag |= IUPD|ICHG; 283 ifree(ip, ip->i_number, mode); 284 #ifdef QUOTA 285 (void) chkiq(ip->i_dev, ip, ip->i_uid, 0); 286 dqrele(ip->i_dquot); 287 ip->i_dquot = NODQUOT; 288 #endif 289 } 290 IUPDAT(ip, &time, &time, 0); 291 IUNLOCK(ip); 292 ip->i_flag = 0; 293 /* 294 * Put the inode on the end of the free list. 295 * Possibly in some cases it would be better to 296 * put the inode at the head of the free list, 297 * (eg: where i_mode == 0 || i_number == 0) 298 * but I will think about that later .. kre 299 * (i_number is rarely 0 - only after an i/o error in iget, 300 * where i_mode == 0, the inode will probably be wanted 301 * again soon for an ialloc, so possibly we should keep it) 302 */ 303 if (ifreeh) { 304 *ifreet = ip; 305 ip->i_freeb = ifreet; 306 } else { 307 ifreeh = ip; 308 ip->i_freeb = &ifreeh; 309 } 310 ip->i_freef = NULL; 311 ifreet = &ip->i_freef; 312 } else if (!(ip->i_flag & ILOCKED)) 313 ITIMES(ip, &time, &time); 314 ip->i_count--; 315 } 316 317 /* 318 * Check accessed and update flags on 319 * an inode structure. 320 * If any is on, update the inode 321 * with the current time. 322 * If waitfor is given, then must insure 323 * i/o order so wait for write to complete. 324 */ 325 iupdat(ip, ta, tm, waitfor) 326 register struct inode *ip; 327 struct timeval *ta, *tm; 328 int waitfor; 329 { 330 register struct buf *bp; 331 struct dinode *dp; 332 register struct fs *fp; 333 334 fp = ip->i_fs; 335 if ((ip->i_flag & (IUPD|IACC|ICHG|IMOD)) != 0) { 336 if (fp->fs_ronly) 337 return; 338 bp = bread(ip->i_dev, fsbtodb(fp, itod(fp, ip->i_number)), 339 (int)fp->fs_bsize); 340 if (bp->b_flags & B_ERROR) { 341 brelse(bp); 342 return; 343 } 344 if (ip->i_flag&IACC) 345 ip->i_atime = ta->tv_sec; 346 if (ip->i_flag&IUPD) 347 ip->i_mtime = tm->tv_sec; 348 if (ip->i_flag&ICHG) 349 ip->i_ctime = time.tv_sec; 350 ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD); 351 dp = bp->b_un.b_dino + itoo(fp, ip->i_number); 352 dp->di_ic = ip->i_ic; 353 if (waitfor) 354 bwrite(bp); 355 else 356 bdwrite(bp); 357 } 358 } 359 360 #define SINGLE 0 /* index of single indirect block */ 361 #define DOUBLE 1 /* index of double indirect block */ 362 #define TRIPLE 2 /* index of triple indirect block */ 363 /* 364 * Truncate the inode ip to at most 365 * length size. Free affected disk 366 * blocks -- the blocks of the file 367 * are removed in reverse order. 368 * 369 * NB: triple indirect blocks are untested. 370 */ 371 itrunc(oip, length) 372 register struct inode *oip; 373 u_long length; 374 { 375 register daddr_t lastblock; 376 daddr_t bn, lastiblock[NIADDR]; 377 register struct fs *fs; 378 register struct inode *ip; 379 struct buf *bp; 380 int offset, lbn, osize, size, error, count, level, s; 381 long nblocks, blocksreleased = 0; 382 register int i; 383 dev_t dev; 384 struct inode tip; 385 extern long indirtrunc(); 386 extern struct cmap *mfind(); 387 388 if (oip->i_size <= length) { 389 oip->i_flag |= ICHG|IUPD; 390 iupdat(oip, &time, &time, 1); 391 return; 392 } 393 /* 394 * Calculate index into inode's block list of 395 * last direct and indirect blocks (if any) 396 * which we want to keep. Lastblock is -1 when 397 * the file is truncated to 0. 398 */ 399 fs = oip->i_fs; 400 lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1; 401 lastiblock[SINGLE] = lastblock - NDADDR; 402 lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs); 403 lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs); 404 nblocks = btodb(fs->fs_bsize); 405 /* 406 * Update the size of the file. If the file is not being 407 * truncated to a block boundry, the contents of the 408 * partial block following the end of the file must be 409 * zero'ed in case it ever become accessable again because 410 * of subsequent file growth. 411 */ 412 osize = oip->i_size; 413 offset = blkoff(fs, length); 414 if (offset == 0) { 415 oip->i_size = length; 416 } else { 417 lbn = lblkno(fs, length); 418 bn = fsbtodb(fs, bmap(oip, lbn, B_WRITE, offset)); 419 if (u.u_error || (long)bn < 0) 420 return; 421 oip->i_size = length; 422 size = blksize(fs, oip, lbn); 423 count = howmany(size, DEV_BSIZE); 424 dev = oip->i_dev; 425 s = splimp(); 426 for (i = 0; i < count; i += CLSIZE) 427 if (mfind(dev, bn + i)) 428 munhash(dev, bn + i); 429 splx(s); 430 bp = bread(dev, bn, size); 431 if (bp->b_flags & B_ERROR) { 432 u.u_error = EIO; 433 oip->i_size = osize; 434 brelse(bp); 435 return; 436 } 437 bzero(bp->b_un.b_addr + offset, size - offset); 438 bdwrite(bp); 439 } 440 /* 441 * Update file and block pointers 442 * on disk before we start freeing blocks. 443 * If we crash before free'ing blocks below, 444 * the blocks will be returned to the free list. 445 * lastiblock values are also normalized to -1 446 * for calls to indirtrunc below. 447 */ 448 tip = *oip; 449 tip.i_size = osize; 450 for (level = TRIPLE; level >= SINGLE; level--) 451 if (lastiblock[level] < 0) { 452 oip->i_ib[level] = 0; 453 lastiblock[level] = -1; 454 } 455 for (i = NDADDR - 1; i > lastblock; i--) 456 oip->i_db[i] = 0; 457 oip->i_flag |= ICHG|IUPD; 458 syncip(oip); 459 460 /* 461 * Indirect blocks first. 462 */ 463 ip = &tip; 464 for (level = TRIPLE; level >= SINGLE; level--) { 465 bn = ip->i_ib[level]; 466 if (bn != 0) { 467 blocksreleased += 468 indirtrunc(ip, bn, lastiblock[level], level); 469 if (lastiblock[level] < 0) { 470 ip->i_ib[level] = 0; 471 free(ip, bn, (off_t)fs->fs_bsize); 472 blocksreleased += nblocks; 473 } 474 } 475 if (lastiblock[level] >= 0) 476 goto done; 477 } 478 479 /* 480 * All whole direct blocks or frags. 481 */ 482 for (i = NDADDR - 1; i > lastblock; i--) { 483 register int size; 484 485 bn = ip->i_db[i]; 486 if (bn == 0) 487 continue; 488 ip->i_db[i] = 0; 489 size = (off_t)blksize(fs, ip, i); 490 free(ip, bn, size); 491 blocksreleased += btodb(size); 492 } 493 if (lastblock < 0) 494 goto done; 495 496 /* 497 * Finally, look for a change in size of the 498 * last direct block; release any frags. 499 */ 500 bn = ip->i_db[lastblock]; 501 if (bn != 0) { 502 int oldspace, newspace; 503 504 /* 505 * Calculate amount of space we're giving 506 * back as old block size minus new block size. 507 */ 508 oldspace = blksize(fs, ip, lastblock); 509 ip->i_size = length; 510 newspace = blksize(fs, ip, lastblock); 511 if (newspace == 0) 512 panic("itrunc: newspace"); 513 if (oldspace - newspace > 0) { 514 /* 515 * Block number of space to be free'd is 516 * the old block # plus the number of frags 517 * required for the storage we're keeping. 518 */ 519 bn += numfrags(fs, newspace); 520 free(ip, bn, oldspace - newspace); 521 blocksreleased += btodb(oldspace - newspace); 522 } 523 } 524 done: 525 /* BEGIN PARANOIA */ 526 for (level = SINGLE; level <= TRIPLE; level++) 527 if (ip->i_ib[level] != oip->i_ib[level]) 528 panic("itrunc1"); 529 for (i = 0; i < NDADDR; i++) 530 if (ip->i_db[i] != oip->i_db[i]) 531 panic("itrunc2"); 532 /* END PARANOIA */ 533 oip->i_blocks -= blocksreleased; 534 if (oip->i_blocks < 0) /* sanity */ 535 oip->i_blocks = 0; 536 oip->i_flag |= ICHG; 537 #ifdef QUOTA 538 (void) chkdq(oip, -blocksreleased, 0); 539 #endif 540 } 541 542 /* 543 * Release blocks associated with the inode ip and 544 * stored in the indirect block bn. Blocks are free'd 545 * in LIFO order up to (but not including) lastbn. If 546 * level is greater than SINGLE, the block is an indirect 547 * block and recursive calls to indirtrunc must be used to 548 * cleanse other indirect blocks. 549 * 550 * NB: triple indirect blocks are untested. 551 */ 552 long 553 indirtrunc(ip, bn, lastbn, level) 554 register struct inode *ip; 555 daddr_t bn, lastbn; 556 int level; 557 { 558 register int i; 559 struct buf *bp, *copy; 560 register daddr_t *bap; 561 register struct fs *fs = ip->i_fs; 562 daddr_t nb, last; 563 long factor; 564 int blocksreleased = 0, nblocks; 565 566 /* 567 * Calculate index in current block of last 568 * block to be kept. -1 indicates the entire 569 * block so we need not calculate the index. 570 */ 571 factor = 1; 572 for (i = SINGLE; i < level; i++) 573 factor *= NINDIR(fs); 574 last = lastbn; 575 if (lastbn > 0) 576 last /= factor; 577 nblocks = btodb(fs->fs_bsize); 578 /* 579 * Get buffer of block pointers, zero those 580 * entries corresponding to blocks to be free'd, 581 * and update on disk copy first. 582 */ 583 copy = geteblk((int)fs->fs_bsize); 584 bp = bread(ip->i_dev, fsbtodb(fs, bn), (int)fs->fs_bsize); 585 if (bp->b_flags&B_ERROR) { 586 brelse(copy); 587 brelse(bp); 588 return (0); 589 } 590 bap = bp->b_un.b_daddr; 591 bcopy((caddr_t)bap, (caddr_t)copy->b_un.b_daddr, (u_int)fs->fs_bsize); 592 bzero((caddr_t)&bap[last + 1], 593 (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t)); 594 bwrite(bp); 595 bp = copy, bap = bp->b_un.b_daddr; 596 597 /* 598 * Recursively free totally unused blocks. 599 */ 600 for (i = NINDIR(fs) - 1; i > last; i--) { 601 nb = bap[i]; 602 if (nb == 0) 603 continue; 604 if (level > SINGLE) 605 blocksreleased += 606 indirtrunc(ip, nb, (daddr_t)-1, level - 1); 607 free(ip, nb, (int)fs->fs_bsize); 608 blocksreleased += nblocks; 609 } 610 611 /* 612 * Recursively free last partial block. 613 */ 614 if (level > SINGLE && lastbn >= 0) { 615 last = lastbn % factor; 616 nb = bap[i]; 617 if (nb != 0) 618 blocksreleased += indirtrunc(ip, nb, last, level - 1); 619 } 620 brelse(bp); 621 return (blocksreleased); 622 } 623 624 /* 625 * remove any inodes in the inode cache belonging to dev 626 * 627 * There should not be any active ones, return error if any are found 628 * (nb: this is a user error, not a system err) 629 * 630 * Also, count the references to dev by block devices - this really 631 * has nothing to do with the object of the procedure, but as we have 632 * to scan the inode table here anyway, we might as well get the 633 * extra benefit. 634 * 635 * this is called from sumount()/sys3.c when dev is being unmounted 636 */ 637 #ifdef QUOTA 638 iflush(dev, iq) 639 dev_t dev; 640 struct inode *iq; 641 #else 642 iflush(dev) 643 dev_t dev; 644 #endif 645 { 646 register struct inode *ip; 647 register open = 0; 648 649 for (ip = inode; ip < inodeNINODE; ip++) { 650 #ifdef QUOTA 651 if (ip != iq && ip->i_dev == dev) 652 #else 653 if (ip->i_dev == dev) 654 #endif 655 if (ip->i_count) 656 return(-1); 657 else { 658 remque(ip); 659 ip->i_forw = ip; 660 ip->i_back = ip; 661 /* 662 * as i_count == 0, the inode was on the free 663 * list already, just leave it there, it will 664 * fall off the bottom eventually. We could 665 * perhaps move it to the head of the free 666 * list, but as umounts are done so 667 * infrequently, we would gain very little, 668 * while making the code bigger. 669 */ 670 #ifdef QUOTA 671 dqrele(ip->i_dquot); 672 ip->i_dquot = NODQUOT; 673 #endif 674 } 675 else if (ip->i_count && (ip->i_mode&IFMT)==IFBLK && 676 ip->i_rdev == dev) 677 open++; 678 } 679 return (open); 680 } 681 682 /* 683 * Lock an inode. If its already locked, set the WANT bit and sleep. 684 */ 685 ilock(ip) 686 register struct inode *ip; 687 { 688 689 ILOCK(ip); 690 } 691 692 /* 693 * Unlock an inode. If WANT bit is on, wakeup. 694 */ 695 iunlock(ip) 696 register struct inode *ip; 697 { 698 699 IUNLOCK(ip); 700 } 701