1 /* 2 * Copyright (c) 1982, 1986, 1989 Regents of the University of California. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms are permitted 6 * provided that the above copyright notice and this paragraph are 7 * duplicated in all such forms and that any documentation, 8 * advertising materials, and other materials related to such 9 * distribution and use acknowledge that the software was developed 10 * by the University of California, Berkeley. The name of the 11 * University may not be used to endorse or promote products derived 12 * from this software without specific prior written permission. 13 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 14 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 15 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. 16 * 17 * @(#)ufs_inode.c 7.26 (Berkeley) 12/30/89 18 */ 19 20 #include "param.h" 21 #include "systm.h" 22 #include "mount.h" 23 #include "user.h" 24 #include "proc.h" 25 #include "file.h" 26 #include "buf.h" 27 #include "cmap.h" 28 #include "vnode.h" 29 #include "../ufs/inode.h" 30 #include "../ufs/fs.h" 31 #include "../ufs/ufsmount.h" 32 #ifdef QUOTA 33 #include "../ufs/quota.h" 34 #endif 35 #include "kernel.h" 36 #include "malloc.h" 37 38 #define INOHSZ 512 39 #if ((INOHSZ&(INOHSZ-1)) == 0) 40 #define INOHASH(dev,ino) (((dev)+(ino))&(INOHSZ-1)) 41 #else 42 #define INOHASH(dev,ino) (((unsigned)((dev)+(ino)))%INOHSZ) 43 #endif 44 45 union ihead { 46 union ihead *ih_head[2]; 47 struct inode *ih_chain[2]; 48 } ihead[INOHSZ]; 49 50 int prtactive; /* 1 => print out reclaim of active vnodes */ 51 52 /* 53 * Initialize hash links for inodes. 54 */ 55 ufs_init() 56 { 57 register int i; 58 register union ihead *ih = ihead; 59 60 #ifndef lint 61 if (VN_MAXPRIVATE < sizeof(struct inode)) 62 panic("ihinit: too small"); 63 #endif /* not lint */ 64 for (i = INOHSZ; --i >= 0; ih++) { 65 ih->ih_head[0] = ih; 66 ih->ih_head[1] = ih; 67 } 68 } 69 70 /* 71 * Look up an vnode/inode by device,inumber. 72 * If it is in core (in the inode structure), 73 * honor the locking protocol. 74 * If it is not in core, read it in from the 75 * specified device. 76 * Callers must check for mount points!! 77 * In all cases, a pointer to a locked 78 * inode structure is returned. 79 */ 80 iget(xp, ino, ipp) 81 struct inode *xp; 82 ino_t ino; 83 struct inode **ipp; 84 { 85 dev_t dev = xp->i_dev; 86 struct mount *mntp = ITOV(xp)->v_mount; 87 register struct fs *fs = VFSTOUFS(mntp)->um_fs; 88 extern struct vnodeops ufs_vnodeops, spec_inodeops; 89 register struct inode *ip, *iq; 90 register struct vnode *vp; 91 struct vnode *nvp; 92 struct buf *bp; 93 struct dinode *dp; 94 union ihead *ih; 95 int error; 96 97 ih = &ihead[INOHASH(dev, ino)]; 98 loop: 99 for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw) { 100 if (ino != ip->i_number || dev != ip->i_dev) 101 continue; 102 if ((ip->i_flag&ILOCKED) != 0) { 103 ip->i_flag |= IWANT; 104 sleep((caddr_t)ip, PINOD); 105 goto loop; 106 } 107 if (vget(ITOV(ip))) 108 goto loop; 109 *ipp = ip; 110 return(0); 111 } 112 /* 113 * Allocate a new inode. 114 */ 115 if (error = getnewvnode(VT_UFS, mntp, &ufs_vnodeops, &nvp)) { 116 *ipp = 0; 117 return (error); 118 } 119 ip = VTOI(nvp); 120 ip->i_vnode = nvp; 121 ip->i_flag = 0; 122 ip->i_devvp = 0; 123 ip->i_mode = 0; 124 #ifdef QUOTA 125 ip->i_dquot = NODQUOT; 126 #endif 127 /* 128 * Put it onto its hash chain and lock it so that other requests for 129 * this inode will block if they arrive while we are sleeping waiting 130 * for old data structures to be purged or for the contents of the 131 * disk portion of this inode to be read. 132 */ 133 ip->i_dev = dev; 134 ip->i_number = ino; 135 insque(ip, ih); 136 ILOCK(ip); 137 /* 138 * Read in the disk contents for the inode. 139 */ 140 if (error = bread(VFSTOUFS(mntp)->um_devvp, fsbtodb(fs, itod(fs, ino)), 141 (int)fs->fs_bsize, NOCRED, &bp)) { 142 /* 143 * Unlock and discard unneeded inode. 144 */ 145 iput(ip); 146 brelse(bp); 147 *ipp = 0; 148 return (error); 149 } 150 dp = bp->b_un.b_dino; 151 dp += itoo(fs, ino); 152 ip->i_din = *dp; 153 brelse(bp); 154 /* 155 * Initialize the associated vnode 156 */ 157 vp = ITOV(ip); 158 vp->v_type = IFTOVT(ip->i_mode); 159 if (vp->v_type == VCHR || vp->v_type == VBLK) { 160 vp->v_op = &spec_inodeops; 161 if (nvp = checkalias(vp, ip->i_rdev, mntp)) { 162 /* 163 * Reinitialize aliased inode. 164 */ 165 vp = nvp; 166 iq = VTOI(vp); 167 iq->i_vnode = vp; 168 iq->i_flag = 0; 169 ILOCK(iq); 170 iq->i_din = ip->i_din; 171 iq->i_dev = dev; 172 iq->i_number = ino; 173 insque(iq, ih); 174 /* 175 * Discard unneeded vnode 176 */ 177 ip->i_mode = 0; 178 iput(ip); 179 ip = iq; 180 } 181 } 182 if (ino == ROOTINO) 183 vp->v_flag |= VROOT; 184 /* 185 * Finish inode initialization. 186 */ 187 ip->i_fs = fs; 188 ip->i_devvp = VFSTOUFS(mntp)->um_devvp; 189 VREF(ip->i_devvp); 190 #ifdef QUOTA 191 if (ip->i_mode != 0) 192 ip->i_dquot = inoquota(ip); 193 #endif 194 /* 195 * Set up a generation number for this inode if it does not 196 * already have one. This should only happen on old filesystems. 197 */ 198 if (ip->i_gen == 0) { 199 if (++nextgennumber < (u_long)time.tv_sec) 200 nextgennumber = time.tv_sec; 201 ip->i_gen = nextgennumber; 202 if ((vp->v_mount->m_flag & M_RDONLY) == 0) 203 ip->i_flag |= IMOD; 204 } 205 *ipp = ip; 206 return (0); 207 } 208 209 /* 210 * Unlock and decrement the reference count of an inode structure. 211 */ 212 iput(ip) 213 register struct inode *ip; 214 { 215 216 if ((ip->i_flag & ILOCKED) == 0) 217 panic("iput"); 218 IUNLOCK(ip); 219 vrele(ITOV(ip)); 220 } 221 222 /* 223 * Last reference to an inode, write the inode out and if necessary, 224 * truncate and deallocate the file. 225 */ 226 ufs_inactive(vp) 227 struct vnode *vp; 228 { 229 register struct inode *ip = VTOI(vp); 230 int mode, error = 0; 231 232 if (prtactive && vp->v_usecount != 0) 233 vprint("ufs_inactive: pushing active", vp); 234 /* 235 * Get rid of inodes related to stale file handles. 236 */ 237 if (ip->i_mode == 0) { 238 if ((vp->v_flag & VXLOCK) == 0) 239 vgone(vp); 240 return (0); 241 } 242 ILOCK(ip); 243 if (ip->i_nlink <= 0 && (vp->v_mount->m_flag & M_RDONLY) == 0) { 244 error = itrunc(ip, (u_long)0, 0); 245 mode = ip->i_mode; 246 ip->i_mode = 0; 247 ip->i_rdev = 0; 248 ip->i_flag |= IUPD|ICHG; 249 ifree(ip, ip->i_number, mode); 250 #ifdef QUOTA 251 (void) chkiq(ip->i_dev, ip, ip->i_uid, 0); 252 dqrele(ip->i_dquot); 253 ip->i_dquot = NODQUOT; 254 #endif 255 } 256 IUPDAT(ip, &time, &time, 0); 257 /* 258 * If we are done with the inode, reclaim it 259 * so that it can be reused immediately. 260 */ 261 if (vp->v_usecount == 0 && ip->i_mode == 0) { 262 vinvalbuf(vp, 0); 263 IUNLOCK(ip); 264 ip->i_flag = 0; 265 if ((vp->v_flag & VXLOCK) == 0) 266 vgone(vp); 267 return (error); 268 } 269 IUNLOCK(ip); 270 ip->i_flag = 0; 271 return (error); 272 } 273 274 /* 275 * Reclaim an inode so that it can be used for other purposes. 276 */ 277 ufs_reclaim(vp) 278 register struct vnode *vp; 279 { 280 register struct inode *ip = VTOI(vp); 281 282 if (prtactive && vp->v_usecount != 0) 283 vprint("ufs_reclaim: pushing active", vp); 284 /* 285 * Remove the inode from its hash chain. 286 */ 287 remque(ip); 288 ip->i_forw = ip; 289 ip->i_back = ip; 290 /* 291 * Purge old data structures associated with the inode. 292 */ 293 cache_purge(vp); 294 if (ip->i_devvp) { 295 vrele(ip->i_devvp); 296 ip->i_devvp = 0; 297 } 298 #ifdef QUOTA 299 dqrele(ip->i_dquot); 300 ip->i_dquot = NODQUOT; 301 #endif 302 ip->i_flag = 0; 303 return (0); 304 } 305 306 /* 307 * Check accessed and update flags on an inode structure. 308 * If any is on, update the inode with the current time. 309 * If waitfor is given, then must ensure I/O order, 310 * so wait for write to complete. 311 */ 312 iupdat(ip, ta, tm, waitfor) 313 register struct inode *ip; 314 struct timeval *ta, *tm; 315 int waitfor; 316 { 317 struct buf *bp; 318 struct vnode *vp = ITOV(ip); 319 struct dinode *dp; 320 register struct fs *fs; 321 int error; 322 323 fs = ip->i_fs; 324 if ((ip->i_flag & (IUPD|IACC|ICHG|IMOD)) == 0) 325 return (0); 326 if (vp->v_mount->m_flag & M_RDONLY) 327 return (0); 328 error = bread(ip->i_devvp, fsbtodb(fs, itod(fs, ip->i_number)), 329 (int)fs->fs_bsize, NOCRED, &bp); 330 if (error) { 331 brelse(bp); 332 return (error); 333 } 334 if (ip->i_flag&IACC) 335 ip->i_atime = ta->tv_sec; 336 if (ip->i_flag&IUPD) 337 ip->i_mtime = tm->tv_sec; 338 if (ip->i_flag&ICHG) 339 ip->i_ctime = time.tv_sec; 340 ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD); 341 dp = bp->b_un.b_dino + itoo(fs, ip->i_number); 342 *dp = ip->i_din; 343 if (waitfor) { 344 return (bwrite(bp)); 345 } else { 346 bdwrite(bp); 347 return (0); 348 } 349 } 350 351 #define SINGLE 0 /* index of single indirect block */ 352 #define DOUBLE 1 /* index of double indirect block */ 353 #define TRIPLE 2 /* index of triple indirect block */ 354 /* 355 * Truncate the inode ip to at most length size. Free affected disk 356 * blocks -- the blocks of the file are removed in reverse order. 357 * 358 * NB: triple indirect blocks are untested. 359 */ 360 itrunc(oip, length, flags) 361 register struct inode *oip; 362 u_long length; 363 int flags; 364 { 365 register daddr_t lastblock; 366 daddr_t bn, lbn, lastiblock[NIADDR]; 367 register struct fs *fs; 368 register struct inode *ip; 369 struct buf *bp; 370 int offset, osize, size, level; 371 long count, nblocks, blocksreleased = 0; 372 register int i; 373 int aflags, error, allerror; 374 struct inode tip; 375 376 if (oip->i_size <= length) { 377 oip->i_flag |= ICHG|IUPD; 378 error = iupdat(oip, &time, &time, 1); 379 return (error); 380 } 381 /* 382 * Calculate index into inode's block list of 383 * last direct and indirect blocks (if any) 384 * which we want to keep. Lastblock is -1 when 385 * the file is truncated to 0. 386 */ 387 fs = oip->i_fs; 388 lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1; 389 lastiblock[SINGLE] = lastblock - NDADDR; 390 lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs); 391 lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs); 392 nblocks = btodb(fs->fs_bsize); 393 /* 394 * Update the size of the file. If the file is not being 395 * truncated to a block boundry, the contents of the 396 * partial block following the end of the file must be 397 * zero'ed in case it ever become accessable again because 398 * of subsequent file growth. 399 */ 400 osize = oip->i_size; 401 offset = blkoff(fs, length); 402 if (offset == 0) { 403 oip->i_size = length; 404 } else { 405 lbn = lblkno(fs, length); 406 aflags = B_CLRBUF; 407 if (flags & IO_SYNC) 408 aflags |= B_SYNC; 409 if (error = balloc(oip, lbn, offset, &bp, aflags)) 410 return (error); 411 oip->i_size = length; 412 size = blksize(fs, oip, lbn); 413 bn = bp->b_blkno; 414 count = howmany(size, CLBYTES); 415 for (i = 0; i < count; i++) 416 munhash(oip->i_devvp, bn + i * CLBYTES / DEV_BSIZE); 417 bzero(bp->b_un.b_addr + offset, (unsigned)(size - offset)); 418 brealloc(bp, size); 419 if (flags & IO_SYNC) 420 bwrite(bp); 421 else 422 bdwrite(bp); 423 } 424 /* 425 * Update file and block pointers 426 * on disk before we start freeing blocks. 427 * If we crash before free'ing blocks below, 428 * the blocks will be returned to the free list. 429 * lastiblock values are also normalized to -1 430 * for calls to indirtrunc below. 431 */ 432 tip = *oip; 433 tip.i_size = osize; 434 for (level = TRIPLE; level >= SINGLE; level--) 435 if (lastiblock[level] < 0) { 436 oip->i_ib[level] = 0; 437 lastiblock[level] = -1; 438 } 439 for (i = NDADDR - 1; i > lastblock; i--) 440 oip->i_db[i] = 0; 441 oip->i_flag |= ICHG|IUPD; 442 vinvalbuf(ITOV(oip), (length > 0)); 443 allerror = iupdat(oip, &time, &time, MNT_WAIT); 444 445 /* 446 * Indirect blocks first. 447 */ 448 ip = &tip; 449 for (level = TRIPLE; level >= SINGLE; level--) { 450 bn = ip->i_ib[level]; 451 if (bn != 0) { 452 error = indirtrunc(ip, bn, lastiblock[level], level, 453 &count); 454 if (error) 455 allerror = error; 456 blocksreleased += count; 457 if (lastiblock[level] < 0) { 458 ip->i_ib[level] = 0; 459 blkfree(ip, bn, (off_t)fs->fs_bsize); 460 blocksreleased += nblocks; 461 } 462 } 463 if (lastiblock[level] >= 0) 464 goto done; 465 } 466 467 /* 468 * All whole direct blocks or frags. 469 */ 470 for (i = NDADDR - 1; i > lastblock; i--) { 471 register off_t bsize; 472 473 bn = ip->i_db[i]; 474 if (bn == 0) 475 continue; 476 ip->i_db[i] = 0; 477 bsize = (off_t)blksize(fs, ip, i); 478 blkfree(ip, bn, bsize); 479 blocksreleased += btodb(bsize); 480 } 481 if (lastblock < 0) 482 goto done; 483 484 /* 485 * Finally, look for a change in size of the 486 * last direct block; release any frags. 487 */ 488 bn = ip->i_db[lastblock]; 489 if (bn != 0) { 490 off_t oldspace, newspace; 491 492 /* 493 * Calculate amount of space we're giving 494 * back as old block size minus new block size. 495 */ 496 oldspace = blksize(fs, ip, lastblock); 497 ip->i_size = length; 498 newspace = blksize(fs, ip, lastblock); 499 if (newspace == 0) 500 panic("itrunc: newspace"); 501 if (oldspace - newspace > 0) { 502 /* 503 * Block number of space to be free'd is 504 * the old block # plus the number of frags 505 * required for the storage we're keeping. 506 */ 507 bn += numfrags(fs, newspace); 508 blkfree(ip, bn, oldspace - newspace); 509 blocksreleased += btodb(oldspace - newspace); 510 } 511 } 512 done: 513 /* BEGIN PARANOIA */ 514 for (level = SINGLE; level <= TRIPLE; level++) 515 if (ip->i_ib[level] != oip->i_ib[level]) 516 panic("itrunc1"); 517 for (i = 0; i < NDADDR; i++) 518 if (ip->i_db[i] != oip->i_db[i]) 519 panic("itrunc2"); 520 /* END PARANOIA */ 521 oip->i_blocks -= blocksreleased; 522 if (oip->i_blocks < 0) /* sanity */ 523 oip->i_blocks = 0; 524 oip->i_flag |= ICHG; 525 #ifdef QUOTA 526 (void) chkdq(oip, -blocksreleased, 0); 527 #endif 528 return (allerror); 529 } 530 531 /* 532 * Release blocks associated with the inode ip and 533 * stored in the indirect block bn. Blocks are free'd 534 * in LIFO order up to (but not including) lastbn. If 535 * level is greater than SINGLE, the block is an indirect 536 * block and recursive calls to indirtrunc must be used to 537 * cleanse other indirect blocks. 538 * 539 * NB: triple indirect blocks are untested. 540 */ 541 indirtrunc(ip, bn, lastbn, level, countp) 542 register struct inode *ip; 543 daddr_t bn, lastbn; 544 int level; 545 long *countp; 546 { 547 register int i; 548 struct buf *bp; 549 register struct fs *fs = ip->i_fs; 550 register daddr_t *bap; 551 daddr_t *copy, nb, last; 552 long blkcount, factor; 553 int nblocks, blocksreleased = 0; 554 int error, allerror = 0; 555 556 /* 557 * Calculate index in current block of last 558 * block to be kept. -1 indicates the entire 559 * block so we need not calculate the index. 560 */ 561 factor = 1; 562 for (i = SINGLE; i < level; i++) 563 factor *= NINDIR(fs); 564 last = lastbn; 565 if (lastbn > 0) 566 last /= factor; 567 nblocks = btodb(fs->fs_bsize); 568 /* 569 * Get buffer of block pointers, zero those 570 * entries corresponding to blocks to be free'd, 571 * and update on disk copy first. 572 */ 573 error = bread(ip->i_devvp, fsbtodb(fs, bn), (int)fs->fs_bsize, 574 NOCRED, &bp); 575 if (error) { 576 brelse(bp); 577 *countp = 0; 578 return (error); 579 } 580 if ((bp->b_flags & B_CACHE) == 0) 581 reassignbuf(bp, ITOV(ip)); 582 bap = bp->b_un.b_daddr; 583 MALLOC(copy, daddr_t *, fs->fs_bsize, M_TEMP, M_WAITOK); 584 bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->fs_bsize); 585 bzero((caddr_t)&bap[last + 1], 586 (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t)); 587 if (last == -1) 588 bp->b_flags |= B_INVAL; 589 error = bwrite(bp); 590 if (error) 591 allerror = error; 592 bap = copy; 593 594 /* 595 * Recursively free totally unused blocks. 596 */ 597 for (i = NINDIR(fs) - 1; i > last; i--) { 598 nb = bap[i]; 599 if (nb == 0) 600 continue; 601 if (level > SINGLE) { 602 error = indirtrunc(ip, nb, (daddr_t)-1, level - 1, 603 &blkcount); 604 if (error) 605 allerror = error; 606 blocksreleased += blkcount; 607 } 608 blkfree(ip, nb, (off_t)fs->fs_bsize); 609 blocksreleased += nblocks; 610 } 611 612 /* 613 * Recursively free last partial block. 614 */ 615 if (level > SINGLE && lastbn >= 0) { 616 last = lastbn % factor; 617 nb = bap[i]; 618 if (nb != 0) { 619 error = indirtrunc(ip, nb, last, level - 1, &blkcount); 620 if (error) 621 allerror = error; 622 blocksreleased += blkcount; 623 } 624 } 625 FREE(copy, M_TEMP); 626 *countp = blocksreleased; 627 return (allerror); 628 } 629 630 /* 631 * Lock an inode. If its already locked, set the WANT bit and sleep. 632 */ 633 ilock(ip) 634 register struct inode *ip; 635 { 636 637 while (ip->i_flag & ILOCKED) { 638 ip->i_flag |= IWANT; 639 if (ip->i_spare0 == u.u_procp->p_pid) 640 panic("locking against myself"); 641 ip->i_spare1 = u.u_procp->p_pid; 642 (void) sleep((caddr_t)ip, PINOD); 643 } 644 ip->i_spare1 = 0; 645 ip->i_spare0 = u.u_procp->p_pid; 646 u.u_spare[0]++; 647 ip->i_flag |= ILOCKED; 648 } 649 650 /* 651 * Unlock an inode. If WANT bit is on, wakeup. 652 */ 653 iunlock(ip) 654 register struct inode *ip; 655 { 656 657 if ((ip->i_flag & ILOCKED) == 0) 658 vprint("iunlock: unlocked inode", ITOV(ip)); 659 ip->i_spare0 = 0; 660 u.u_spare[0]--; 661 ip->i_flag &= ~ILOCKED; 662 if (ip->i_flag&IWANT) { 663 ip->i_flag &= ~IWANT; 664 wakeup((caddr_t)ip); 665 } 666 } 667 668 /* 669 * Check mode permission on inode pointer. Mode is READ, WRITE or EXEC. 670 * The mode is shifted to select the owner/group/other fields. The 671 * super user is granted all permissions. 672 * 673 * NB: Called from vnode op table. It seems this could all be done 674 * using vattr's but... 675 */ 676 iaccess(ip, mode, cred) 677 register struct inode *ip; 678 register int mode; 679 struct ucred *cred; 680 { 681 register gid_t *gp; 682 int i; 683 684 /* 685 * If you're the super-user, you always get access. 686 */ 687 if (cred->cr_uid == 0) 688 return (0); 689 /* 690 * Access check is based on only one of owner, group, public. 691 * If not owner, then check group. If not a member of the 692 * group, then check public access. 693 */ 694 if (cred->cr_uid != ip->i_uid) { 695 mode >>= 3; 696 gp = cred->cr_groups; 697 for (i = 0; i < cred->cr_ngroups; i++, gp++) 698 if (ip->i_gid == *gp) 699 goto found; 700 mode >>= 3; 701 found: 702 ; 703 } 704 if ((ip->i_mode & mode) != 0) 705 return (0); 706 return (EACCES); 707 } 708