1 /* 2 * Copyright (c) 1982, 1986, 1989 Regents of the University of California. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms are permitted 6 * provided that the above copyright notice and this paragraph are 7 * duplicated in all such forms and that any documentation, 8 * advertising materials, and other materials related to such 9 * distribution and use acknowledge that the software was developed 10 * by the University of California, Berkeley. The name of the 11 * University may not be used to endorse or promote products derived 12 * from this software without specific prior written permission. 13 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 14 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 15 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. 16 * 17 * @(#)ufs_inode.c 7.28 (Berkeley) 02/08/90 18 */ 19 20 #include "param.h" 21 #include "systm.h" 22 #include "mount.h" 23 #include "user.h" 24 #include "proc.h" 25 #include "file.h" 26 #include "buf.h" 27 #include "cmap.h" 28 #include "vnode.h" 29 #include "../ufs/inode.h" 30 #include "../ufs/fs.h" 31 #include "../ufs/ufsmount.h" 32 #ifdef QUOTA 33 #include "../ufs/quota.h" 34 #endif 35 #include "kernel.h" 36 #include "malloc.h" 37 38 #define INOHSZ 512 39 #if ((INOHSZ&(INOHSZ-1)) == 0) 40 #define INOHASH(dev,ino) (((dev)+(ino))&(INOHSZ-1)) 41 #else 42 #define INOHASH(dev,ino) (((unsigned)((dev)+(ino)))%INOHSZ) 43 #endif 44 45 union ihead { 46 union ihead *ih_head[2]; 47 struct inode *ih_chain[2]; 48 } ihead[INOHSZ]; 49 50 int prtactive; /* 1 => print out reclaim of active vnodes */ 51 52 /* 53 * Initialize hash links for inodes. 54 */ 55 ufs_init() 56 { 57 register int i; 58 register union ihead *ih = ihead; 59 60 #ifndef lint 61 if (VN_MAXPRIVATE < sizeof(struct inode)) 62 panic("ihinit: too small"); 63 #endif /* not lint */ 64 for (i = INOHSZ; --i >= 0; ih++) { 65 ih->ih_head[0] = ih; 66 ih->ih_head[1] = ih; 67 } 68 } 69 70 /* 71 * Look up an vnode/inode by device,inumber. 72 * If it is in core (in the inode structure), 73 * honor the locking protocol. 74 * If it is not in core, read it in from the 75 * specified device. 76 * Callers must check for mount points!! 77 * In all cases, a pointer to a locked 78 * inode structure is returned. 79 */ 80 iget(xp, ino, ipp) 81 struct inode *xp; 82 ino_t ino; 83 struct inode **ipp; 84 { 85 dev_t dev = xp->i_dev; 86 struct mount *mntp = ITOV(xp)->v_mount; 87 register struct fs *fs = VFSTOUFS(mntp)->um_fs; 88 extern struct vnodeops ufs_vnodeops, spec_inodeops; 89 register struct inode *ip, *iq; 90 register struct vnode *vp; 91 struct vnode *nvp; 92 struct buf *bp; 93 struct dinode *dp; 94 union ihead *ih; 95 int error; 96 97 ih = &ihead[INOHASH(dev, ino)]; 98 loop: 99 for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw) { 100 if (ino != ip->i_number || dev != ip->i_dev) 101 continue; 102 if ((ip->i_flag&ILOCKED) != 0) { 103 ip->i_flag |= IWANT; 104 sleep((caddr_t)ip, PINOD); 105 goto loop; 106 } 107 if (vget(ITOV(ip))) 108 goto loop; 109 *ipp = ip; 110 return(0); 111 } 112 /* 113 * Allocate a new inode. 114 */ 115 if (error = getnewvnode(VT_UFS, mntp, &ufs_vnodeops, &nvp)) { 116 *ipp = 0; 117 return (error); 118 } 119 ip = VTOI(nvp); 120 ip->i_vnode = nvp; 121 ip->i_flag = 0; 122 ip->i_devvp = 0; 123 ip->i_mode = 0; 124 ip->i_diroff = 0; 125 #ifdef QUOTA 126 ip->i_dquot = NODQUOT; 127 #endif 128 /* 129 * Put it onto its hash chain and lock it so that other requests for 130 * this inode will block if they arrive while we are sleeping waiting 131 * for old data structures to be purged or for the contents of the 132 * disk portion of this inode to be read. 133 */ 134 ip->i_dev = dev; 135 ip->i_number = ino; 136 insque(ip, ih); 137 ILOCK(ip); 138 /* 139 * Read in the disk contents for the inode. 140 */ 141 if (error = bread(VFSTOUFS(mntp)->um_devvp, fsbtodb(fs, itod(fs, ino)), 142 (int)fs->fs_bsize, NOCRED, &bp)) { 143 /* 144 * Unlock and discard unneeded inode. 145 */ 146 iput(ip); 147 brelse(bp); 148 *ipp = 0; 149 return (error); 150 } 151 dp = bp->b_un.b_dino; 152 dp += itoo(fs, ino); 153 ip->i_din = *dp; 154 brelse(bp); 155 /* 156 * Initialize the associated vnode 157 */ 158 vp = ITOV(ip); 159 vp->v_type = IFTOVT(ip->i_mode); 160 if (vp->v_type == VCHR || vp->v_type == VBLK) { 161 vp->v_op = &spec_inodeops; 162 if (nvp = checkalias(vp, ip->i_rdev, mntp)) { 163 /* 164 * Reinitialize aliased inode. 165 */ 166 vp = nvp; 167 iq = VTOI(vp); 168 iq->i_vnode = vp; 169 iq->i_flag = 0; 170 ILOCK(iq); 171 iq->i_din = ip->i_din; 172 iq->i_dev = dev; 173 iq->i_number = ino; 174 insque(iq, ih); 175 /* 176 * Discard unneeded vnode 177 */ 178 ip->i_mode = 0; 179 iput(ip); 180 ip = iq; 181 } 182 } 183 if (ino == ROOTINO) 184 vp->v_flag |= VROOT; 185 /* 186 * Finish inode initialization. 187 */ 188 ip->i_fs = fs; 189 ip->i_devvp = VFSTOUFS(mntp)->um_devvp; 190 VREF(ip->i_devvp); 191 #ifdef QUOTA 192 if (ip->i_mode != 0) 193 ip->i_dquot = inoquota(ip); 194 #endif 195 /* 196 * Set up a generation number for this inode if it does not 197 * already have one. This should only happen on old filesystems. 198 */ 199 if (ip->i_gen == 0) { 200 if (++nextgennumber < (u_long)time.tv_sec) 201 nextgennumber = time.tv_sec; 202 ip->i_gen = nextgennumber; 203 if ((vp->v_mount->m_flag & M_RDONLY) == 0) 204 ip->i_flag |= IMOD; 205 } 206 *ipp = ip; 207 return (0); 208 } 209 210 /* 211 * Unlock and decrement the reference count of an inode structure. 212 */ 213 iput(ip) 214 register struct inode *ip; 215 { 216 217 if ((ip->i_flag & ILOCKED) == 0) 218 panic("iput"); 219 IUNLOCK(ip); 220 vrele(ITOV(ip)); 221 } 222 223 /* 224 * Last reference to an inode, write the inode out and if necessary, 225 * truncate and deallocate the file. 226 */ 227 ufs_inactive(vp) 228 struct vnode *vp; 229 { 230 register struct inode *ip = VTOI(vp); 231 int mode, error = 0; 232 233 if (prtactive && vp->v_usecount != 0) 234 vprint("ufs_inactive: pushing active", vp); 235 /* 236 * Get rid of inodes related to stale file handles. 237 */ 238 if (ip->i_mode == 0) { 239 if ((vp->v_flag & VXLOCK) == 0) 240 vgone(vp); 241 return (0); 242 } 243 ILOCK(ip); 244 if (ip->i_nlink <= 0 && (vp->v_mount->m_flag & M_RDONLY) == 0) { 245 error = itrunc(ip, (u_long)0, 0); 246 mode = ip->i_mode; 247 ip->i_mode = 0; 248 ip->i_rdev = 0; 249 ip->i_flag |= IUPD|ICHG; 250 ifree(ip, ip->i_number, mode); 251 #ifdef QUOTA 252 (void) chkiq(ip->i_dev, ip, ip->i_uid, 0); 253 dqrele(ip->i_dquot); 254 ip->i_dquot = NODQUOT; 255 #endif 256 } 257 IUPDAT(ip, &time, &time, 0); 258 IUNLOCK(ip); 259 ip->i_flag = 0; 260 /* 261 * If we are done with the inode, reclaim it 262 * so that it can be reused immediately. 263 */ 264 if (vp->v_usecount == 0 && ip->i_mode == 0 && 265 (vp->v_flag & VXLOCK) == 0) 266 vgone(vp); 267 return (error); 268 } 269 270 /* 271 * Reclaim an inode so that it can be used for other purposes. 272 */ 273 ufs_reclaim(vp) 274 register struct vnode *vp; 275 { 276 register struct inode *ip = VTOI(vp); 277 278 if (prtactive && vp->v_usecount != 0) 279 vprint("ufs_reclaim: pushing active", vp); 280 /* 281 * Remove the inode from its hash chain. 282 */ 283 remque(ip); 284 ip->i_forw = ip; 285 ip->i_back = ip; 286 /* 287 * Purge old data structures associated with the inode. 288 */ 289 cache_purge(vp); 290 if (ip->i_devvp) { 291 vrele(ip->i_devvp); 292 ip->i_devvp = 0; 293 } 294 #ifdef QUOTA 295 dqrele(ip->i_dquot); 296 ip->i_dquot = NODQUOT; 297 #endif 298 ip->i_flag = 0; 299 return (0); 300 } 301 302 /* 303 * Check accessed and update flags on an inode structure. 304 * If any is on, update the inode with the current time. 305 * If waitfor is given, then must ensure I/O order, 306 * so wait for write to complete. 307 */ 308 iupdat(ip, ta, tm, waitfor) 309 register struct inode *ip; 310 struct timeval *ta, *tm; 311 int waitfor; 312 { 313 struct buf *bp; 314 struct vnode *vp = ITOV(ip); 315 struct dinode *dp; 316 register struct fs *fs; 317 int error; 318 319 fs = ip->i_fs; 320 if ((ip->i_flag & (IUPD|IACC|ICHG|IMOD)) == 0) 321 return (0); 322 if (vp->v_mount->m_flag & M_RDONLY) 323 return (0); 324 error = bread(ip->i_devvp, fsbtodb(fs, itod(fs, ip->i_number)), 325 (int)fs->fs_bsize, NOCRED, &bp); 326 if (error) { 327 brelse(bp); 328 return (error); 329 } 330 if (ip->i_flag&IACC) 331 ip->i_atime = ta->tv_sec; 332 if (ip->i_flag&IUPD) 333 ip->i_mtime = tm->tv_sec; 334 if (ip->i_flag&ICHG) 335 ip->i_ctime = time.tv_sec; 336 ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD); 337 dp = bp->b_un.b_dino + itoo(fs, ip->i_number); 338 *dp = ip->i_din; 339 if (waitfor) { 340 return (bwrite(bp)); 341 } else { 342 bdwrite(bp); 343 return (0); 344 } 345 } 346 347 #define SINGLE 0 /* index of single indirect block */ 348 #define DOUBLE 1 /* index of double indirect block */ 349 #define TRIPLE 2 /* index of triple indirect block */ 350 /* 351 * Truncate the inode ip to at most length size. Free affected disk 352 * blocks -- the blocks of the file are removed in reverse order. 353 * 354 * NB: triple indirect blocks are untested. 355 */ 356 itrunc(oip, length, flags) 357 register struct inode *oip; 358 u_long length; 359 int flags; 360 { 361 register daddr_t lastblock; 362 daddr_t bn, lbn, lastiblock[NIADDR]; 363 register struct fs *fs; 364 register struct inode *ip; 365 struct buf *bp; 366 int offset, osize, size, level; 367 long count, nblocks, blocksreleased = 0; 368 register int i; 369 int aflags, error, allerror; 370 struct inode tip; 371 372 if (oip->i_size <= length) { 373 oip->i_flag |= ICHG|IUPD; 374 error = iupdat(oip, &time, &time, 1); 375 return (error); 376 } 377 /* 378 * Calculate index into inode's block list of 379 * last direct and indirect blocks (if any) 380 * which we want to keep. Lastblock is -1 when 381 * the file is truncated to 0. 382 */ 383 fs = oip->i_fs; 384 lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1; 385 lastiblock[SINGLE] = lastblock - NDADDR; 386 lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs); 387 lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs); 388 nblocks = btodb(fs->fs_bsize); 389 /* 390 * Update the size of the file. If the file is not being 391 * truncated to a block boundry, the contents of the 392 * partial block following the end of the file must be 393 * zero'ed in case it ever become accessable again because 394 * of subsequent file growth. 395 */ 396 osize = oip->i_size; 397 offset = blkoff(fs, length); 398 if (offset == 0) { 399 oip->i_size = length; 400 } else { 401 lbn = lblkno(fs, length); 402 aflags = B_CLRBUF; 403 if (flags & IO_SYNC) 404 aflags |= B_SYNC; 405 if (error = balloc(oip, lbn, offset, &bp, aflags)) 406 return (error); 407 oip->i_size = length; 408 size = blksize(fs, oip, lbn); 409 bn = bp->b_blkno; 410 count = howmany(size, CLBYTES); 411 for (i = 0; i < count; i++) 412 munhash(oip->i_devvp, bn + i * CLBYTES / DEV_BSIZE); 413 bzero(bp->b_un.b_addr + offset, (unsigned)(size - offset)); 414 brealloc(bp, size); 415 if (flags & IO_SYNC) 416 bwrite(bp); 417 else 418 bdwrite(bp); 419 } 420 /* 421 * Update file and block pointers 422 * on disk before we start freeing blocks. 423 * If we crash before free'ing blocks below, 424 * the blocks will be returned to the free list. 425 * lastiblock values are also normalized to -1 426 * for calls to indirtrunc below. 427 */ 428 tip = *oip; 429 tip.i_size = osize; 430 for (level = TRIPLE; level >= SINGLE; level--) 431 if (lastiblock[level] < 0) { 432 oip->i_ib[level] = 0; 433 lastiblock[level] = -1; 434 } 435 for (i = NDADDR - 1; i > lastblock; i--) 436 oip->i_db[i] = 0; 437 oip->i_flag |= ICHG|IUPD; 438 vinvalbuf(ITOV(oip), (length > 0)); 439 allerror = iupdat(oip, &time, &time, MNT_WAIT); 440 441 /* 442 * Indirect blocks first. 443 */ 444 ip = &tip; 445 for (level = TRIPLE; level >= SINGLE; level--) { 446 bn = ip->i_ib[level]; 447 if (bn != 0) { 448 error = indirtrunc(ip, bn, lastiblock[level], level, 449 &count); 450 if (error) 451 allerror = error; 452 blocksreleased += count; 453 if (lastiblock[level] < 0) { 454 ip->i_ib[level] = 0; 455 blkfree(ip, bn, (off_t)fs->fs_bsize); 456 blocksreleased += nblocks; 457 } 458 } 459 if (lastiblock[level] >= 0) 460 goto done; 461 } 462 463 /* 464 * All whole direct blocks or frags. 465 */ 466 for (i = NDADDR - 1; i > lastblock; i--) { 467 register off_t bsize; 468 469 bn = ip->i_db[i]; 470 if (bn == 0) 471 continue; 472 ip->i_db[i] = 0; 473 bsize = (off_t)blksize(fs, ip, i); 474 blkfree(ip, bn, bsize); 475 blocksreleased += btodb(bsize); 476 } 477 if (lastblock < 0) 478 goto done; 479 480 /* 481 * Finally, look for a change in size of the 482 * last direct block; release any frags. 483 */ 484 bn = ip->i_db[lastblock]; 485 if (bn != 0) { 486 off_t oldspace, newspace; 487 488 /* 489 * Calculate amount of space we're giving 490 * back as old block size minus new block size. 491 */ 492 oldspace = blksize(fs, ip, lastblock); 493 ip->i_size = length; 494 newspace = blksize(fs, ip, lastblock); 495 if (newspace == 0) 496 panic("itrunc: newspace"); 497 if (oldspace - newspace > 0) { 498 /* 499 * Block number of space to be free'd is 500 * the old block # plus the number of frags 501 * required for the storage we're keeping. 502 */ 503 bn += numfrags(fs, newspace); 504 blkfree(ip, bn, oldspace - newspace); 505 blocksreleased += btodb(oldspace - newspace); 506 } 507 } 508 done: 509 /* BEGIN PARANOIA */ 510 for (level = SINGLE; level <= TRIPLE; level++) 511 if (ip->i_ib[level] != oip->i_ib[level]) 512 panic("itrunc1"); 513 for (i = 0; i < NDADDR; i++) 514 if (ip->i_db[i] != oip->i_db[i]) 515 panic("itrunc2"); 516 /* END PARANOIA */ 517 oip->i_blocks -= blocksreleased; 518 if (oip->i_blocks < 0) /* sanity */ 519 oip->i_blocks = 0; 520 oip->i_flag |= ICHG; 521 #ifdef QUOTA 522 (void) chkdq(oip, -blocksreleased, 0); 523 #endif 524 return (allerror); 525 } 526 527 /* 528 * Release blocks associated with the inode ip and 529 * stored in the indirect block bn. Blocks are free'd 530 * in LIFO order up to (but not including) lastbn. If 531 * level is greater than SINGLE, the block is an indirect 532 * block and recursive calls to indirtrunc must be used to 533 * cleanse other indirect blocks. 534 * 535 * NB: triple indirect blocks are untested. 536 */ 537 indirtrunc(ip, bn, lastbn, level, countp) 538 register struct inode *ip; 539 daddr_t bn, lastbn; 540 int level; 541 long *countp; 542 { 543 register int i; 544 struct buf *bp; 545 register struct fs *fs = ip->i_fs; 546 register daddr_t *bap; 547 daddr_t *copy, nb, last; 548 long blkcount, factor; 549 int nblocks, blocksreleased = 0; 550 int error, allerror = 0; 551 552 /* 553 * Calculate index in current block of last 554 * block to be kept. -1 indicates the entire 555 * block so we need not calculate the index. 556 */ 557 factor = 1; 558 for (i = SINGLE; i < level; i++) 559 factor *= NINDIR(fs); 560 last = lastbn; 561 if (lastbn > 0) 562 last /= factor; 563 nblocks = btodb(fs->fs_bsize); 564 /* 565 * Get buffer of block pointers, zero those 566 * entries corresponding to blocks to be free'd, 567 * and update on disk copy first. 568 */ 569 error = bread(ip->i_devvp, fsbtodb(fs, bn), (int)fs->fs_bsize, 570 NOCRED, &bp); 571 if (error) { 572 brelse(bp); 573 *countp = 0; 574 return (error); 575 } 576 bap = bp->b_un.b_daddr; 577 MALLOC(copy, daddr_t *, fs->fs_bsize, M_TEMP, M_WAITOK); 578 bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->fs_bsize); 579 bzero((caddr_t)&bap[last + 1], 580 (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t)); 581 if (last == -1) 582 bp->b_flags |= B_INVAL; 583 error = bwrite(bp); 584 if (error) 585 allerror = error; 586 bap = copy; 587 588 /* 589 * Recursively free totally unused blocks. 590 */ 591 for (i = NINDIR(fs) - 1; i > last; i--) { 592 nb = bap[i]; 593 if (nb == 0) 594 continue; 595 if (level > SINGLE) { 596 error = indirtrunc(ip, nb, (daddr_t)-1, level - 1, 597 &blkcount); 598 if (error) 599 allerror = error; 600 blocksreleased += blkcount; 601 } 602 blkfree(ip, nb, (off_t)fs->fs_bsize); 603 blocksreleased += nblocks; 604 } 605 606 /* 607 * Recursively free last partial block. 608 */ 609 if (level > SINGLE && lastbn >= 0) { 610 last = lastbn % factor; 611 nb = bap[i]; 612 if (nb != 0) { 613 error = indirtrunc(ip, nb, last, level - 1, &blkcount); 614 if (error) 615 allerror = error; 616 blocksreleased += blkcount; 617 } 618 } 619 FREE(copy, M_TEMP); 620 *countp = blocksreleased; 621 return (allerror); 622 } 623 624 /* 625 * Lock an inode. If its already locked, set the WANT bit and sleep. 626 */ 627 ilock(ip) 628 register struct inode *ip; 629 { 630 631 while (ip->i_flag & ILOCKED) { 632 ip->i_flag |= IWANT; 633 if (ip->i_spare0 == u.u_procp->p_pid) 634 panic("locking against myself"); 635 ip->i_spare1 = u.u_procp->p_pid; 636 (void) sleep((caddr_t)ip, PINOD); 637 } 638 ip->i_spare1 = 0; 639 ip->i_spare0 = u.u_procp->p_pid; 640 u.u_spare[0]++; 641 ip->i_flag |= ILOCKED; 642 } 643 644 /* 645 * Unlock an inode. If WANT bit is on, wakeup. 646 */ 647 iunlock(ip) 648 register struct inode *ip; 649 { 650 651 if ((ip->i_flag & ILOCKED) == 0) 652 vprint("iunlock: unlocked inode", ITOV(ip)); 653 ip->i_spare0 = 0; 654 u.u_spare[0]--; 655 ip->i_flag &= ~ILOCKED; 656 if (ip->i_flag&IWANT) { 657 ip->i_flag &= ~IWANT; 658 wakeup((caddr_t)ip); 659 } 660 } 661 662 /* 663 * Check mode permission on inode pointer. Mode is READ, WRITE or EXEC. 664 * The mode is shifted to select the owner/group/other fields. The 665 * super user is granted all permissions. 666 * 667 * NB: Called from vnode op table. It seems this could all be done 668 * using vattr's but... 669 */ 670 iaccess(ip, mode, cred) 671 register struct inode *ip; 672 register int mode; 673 struct ucred *cred; 674 { 675 register gid_t *gp; 676 int i; 677 678 /* 679 * If you're the super-user, you always get access. 680 */ 681 if (cred->cr_uid == 0) 682 return (0); 683 /* 684 * Access check is based on only one of owner, group, public. 685 * If not owner, then check group. If not a member of the 686 * group, then check public access. 687 */ 688 if (cred->cr_uid != ip->i_uid) { 689 mode >>= 3; 690 gp = cred->cr_groups; 691 for (i = 0; i < cred->cr_ngroups; i++, gp++) 692 if (ip->i_gid == *gp) 693 goto found; 694 mode >>= 3; 695 found: 696 ; 697 } 698 if ((ip->i_mode & mode) != 0) 699 return (0); 700 return (EACCES); 701 } 702