1 /* 2 * Copyright (c) 1982, 1986, 1989 Regents of the University of California. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms are permitted 6 * provided that the above copyright notice and this paragraph are 7 * duplicated in all such forms and that any documentation, 8 * advertising materials, and other materials related to such 9 * distribution and use acknowledge that the software was developed 10 * by the University of California, Berkeley. The name of the 11 * University may not be used to endorse or promote products derived 12 * from this software without specific prior written permission. 13 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 14 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 15 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. 16 * 17 * @(#)ufs_inode.c 7.29 (Berkeley) 03/05/90 18 */ 19 20 #include "param.h" 21 #include "systm.h" 22 #include "mount.h" 23 #include "user.h" 24 #include "proc.h" 25 #include "file.h" 26 #include "buf.h" 27 #include "cmap.h" 28 #include "vnode.h" 29 #include "../ufs/inode.h" 30 #include "../ufs/fs.h" 31 #include "../ufs/ufsmount.h" 32 #ifdef QUOTA 33 #include "../ufs/quota.h" 34 #endif 35 #include "kernel.h" 36 #include "malloc.h" 37 38 #define INOHSZ 512 39 #if ((INOHSZ&(INOHSZ-1)) == 0) 40 #define INOHASH(dev,ino) (((dev)+(ino))&(INOHSZ-1)) 41 #else 42 #define INOHASH(dev,ino) (((unsigned)((dev)+(ino)))%INOHSZ) 43 #endif 44 45 union ihead { 46 union ihead *ih_head[2]; 47 struct inode *ih_chain[2]; 48 } ihead[INOHSZ]; 49 50 int prtactive; /* 1 => print out reclaim of active vnodes */ 51 52 /* 53 * Initialize hash links for inodes. 54 */ 55 ufs_init() 56 { 57 register int i; 58 register union ihead *ih = ihead; 59 60 #ifndef lint 61 if (VN_MAXPRIVATE < sizeof(struct inode)) 62 panic("ihinit: too small"); 63 #endif /* not lint */ 64 for (i = INOHSZ; --i >= 0; ih++) { 65 ih->ih_head[0] = ih; 66 ih->ih_head[1] = ih; 67 } 68 } 69 70 /* 71 * Look up an vnode/inode by device,inumber. 72 * If it is in core (in the inode structure), 73 * honor the locking protocol. 74 * If it is not in core, read it in from the 75 * specified device. 76 * Callers must check for mount points!! 77 * In all cases, a pointer to a locked 78 * inode structure is returned. 79 */ 80 iget(xp, ino, ipp) 81 struct inode *xp; 82 ino_t ino; 83 struct inode **ipp; 84 { 85 dev_t dev = xp->i_dev; 86 struct mount *mntp = ITOV(xp)->v_mount; 87 register struct fs *fs = VFSTOUFS(mntp)->um_fs; 88 extern struct vnodeops ufs_vnodeops, spec_inodeops; 89 register struct inode *ip, *iq; 90 register struct vnode *vp; 91 struct vnode *nvp; 92 struct buf *bp; 93 struct dinode *dp; 94 union ihead *ih; 95 int error; 96 97 ih = &ihead[INOHASH(dev, ino)]; 98 loop: 99 for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw) { 100 if (ino != ip->i_number || dev != ip->i_dev) 101 continue; 102 if ((ip->i_flag&ILOCKED) != 0) { 103 ip->i_flag |= IWANT; 104 sleep((caddr_t)ip, PINOD); 105 goto loop; 106 } 107 if (vget(ITOV(ip))) 108 goto loop; 109 *ipp = ip; 110 return(0); 111 } 112 /* 113 * Allocate a new inode. 114 */ 115 if (error = getnewvnode(VT_UFS, mntp, &ufs_vnodeops, &nvp)) { 116 *ipp = 0; 117 return (error); 118 } 119 ip = VTOI(nvp); 120 ip->i_vnode = nvp; 121 ip->i_flag = 0; 122 ip->i_devvp = 0; 123 ip->i_mode = 0; 124 ip->i_diroff = 0; 125 #ifdef QUOTA 126 ip->i_dquot = NODQUOT; 127 #endif 128 /* 129 * Put it onto its hash chain and lock it so that other requests for 130 * this inode will block if they arrive while we are sleeping waiting 131 * for old data structures to be purged or for the contents of the 132 * disk portion of this inode to be read. 133 */ 134 ip->i_dev = dev; 135 ip->i_number = ino; 136 insque(ip, ih); 137 ILOCK(ip); 138 /* 139 * Read in the disk contents for the inode. 140 */ 141 if (error = bread(VFSTOUFS(mntp)->um_devvp, fsbtodb(fs, itod(fs, ino)), 142 (int)fs->fs_bsize, NOCRED, &bp)) { 143 /* 144 * Unlock and discard unneeded inode. 145 */ 146 iput(ip); 147 brelse(bp); 148 *ipp = 0; 149 return (error); 150 } 151 dp = bp->b_un.b_dino; 152 dp += itoo(fs, ino); 153 ip->i_din = *dp; 154 brelse(bp); 155 /* 156 * Initialize the associated vnode 157 */ 158 vp = ITOV(ip); 159 vp->v_type = IFTOVT(ip->i_mode); 160 if (vp->v_type == VFIFO) { 161 #ifdef FIFO 162 extern struct vnodeops fifo_inodeops; 163 vp->v_op = &fifo_inodeops; 164 #else 165 iput(ip); 166 *ipp = 0; 167 return (EOPNOTSUPP); 168 #endif /* FIFO */ 169 } 170 if (vp->v_type == VCHR || vp->v_type == VBLK) { 171 vp->v_op = &spec_inodeops; 172 if (nvp = checkalias(vp, ip->i_rdev, mntp)) { 173 /* 174 * Reinitialize aliased inode. 175 */ 176 vp = nvp; 177 iq = VTOI(vp); 178 iq->i_vnode = vp; 179 iq->i_flag = 0; 180 ILOCK(iq); 181 iq->i_din = ip->i_din; 182 iq->i_dev = dev; 183 iq->i_number = ino; 184 insque(iq, ih); 185 /* 186 * Discard unneeded vnode 187 */ 188 ip->i_mode = 0; 189 iput(ip); 190 ip = iq; 191 } 192 } 193 if (ino == ROOTINO) 194 vp->v_flag |= VROOT; 195 /* 196 * Finish inode initialization. 197 */ 198 ip->i_fs = fs; 199 ip->i_devvp = VFSTOUFS(mntp)->um_devvp; 200 VREF(ip->i_devvp); 201 #ifdef QUOTA 202 if (ip->i_mode != 0) 203 ip->i_dquot = inoquota(ip); 204 #endif 205 /* 206 * Set up a generation number for this inode if it does not 207 * already have one. This should only happen on old filesystems. 208 */ 209 if (ip->i_gen == 0) { 210 if (++nextgennumber < (u_long)time.tv_sec) 211 nextgennumber = time.tv_sec; 212 ip->i_gen = nextgennumber; 213 if ((vp->v_mount->m_flag & M_RDONLY) == 0) 214 ip->i_flag |= IMOD; 215 } 216 *ipp = ip; 217 return (0); 218 } 219 220 /* 221 * Unlock and decrement the reference count of an inode structure. 222 */ 223 iput(ip) 224 register struct inode *ip; 225 { 226 227 if ((ip->i_flag & ILOCKED) == 0) 228 panic("iput"); 229 IUNLOCK(ip); 230 vrele(ITOV(ip)); 231 } 232 233 /* 234 * Last reference to an inode, write the inode out and if necessary, 235 * truncate and deallocate the file. 236 */ 237 ufs_inactive(vp) 238 struct vnode *vp; 239 { 240 register struct inode *ip = VTOI(vp); 241 int mode, error = 0; 242 243 if (prtactive && vp->v_usecount != 0) 244 vprint("ufs_inactive: pushing active", vp); 245 /* 246 * Get rid of inodes related to stale file handles. 247 */ 248 if (ip->i_mode == 0) { 249 if ((vp->v_flag & VXLOCK) == 0) 250 vgone(vp); 251 return (0); 252 } 253 ILOCK(ip); 254 if (ip->i_nlink <= 0 && (vp->v_mount->m_flag & M_RDONLY) == 0) { 255 error = itrunc(ip, (u_long)0, 0); 256 mode = ip->i_mode; 257 ip->i_mode = 0; 258 ip->i_rdev = 0; 259 ip->i_flag |= IUPD|ICHG; 260 ifree(ip, ip->i_number, mode); 261 #ifdef QUOTA 262 (void) chkiq(ip->i_dev, ip, ip->i_uid, 0); 263 dqrele(ip->i_dquot); 264 ip->i_dquot = NODQUOT; 265 #endif 266 } 267 IUPDAT(ip, &time, &time, 0); 268 IUNLOCK(ip); 269 ip->i_flag = 0; 270 /* 271 * If we are done with the inode, reclaim it 272 * so that it can be reused immediately. 273 */ 274 if (vp->v_usecount == 0 && ip->i_mode == 0 && 275 (vp->v_flag & VXLOCK) == 0) 276 vgone(vp); 277 return (error); 278 } 279 280 /* 281 * Reclaim an inode so that it can be used for other purposes. 282 */ 283 ufs_reclaim(vp) 284 register struct vnode *vp; 285 { 286 register struct inode *ip = VTOI(vp); 287 288 if (prtactive && vp->v_usecount != 0) 289 vprint("ufs_reclaim: pushing active", vp); 290 /* 291 * Remove the inode from its hash chain. 292 */ 293 remque(ip); 294 ip->i_forw = ip; 295 ip->i_back = ip; 296 /* 297 * Purge old data structures associated with the inode. 298 */ 299 cache_purge(vp); 300 if (ip->i_devvp) { 301 vrele(ip->i_devvp); 302 ip->i_devvp = 0; 303 } 304 #ifdef QUOTA 305 dqrele(ip->i_dquot); 306 ip->i_dquot = NODQUOT; 307 #endif 308 ip->i_flag = 0; 309 return (0); 310 } 311 312 /* 313 * Check accessed and update flags on an inode structure. 314 * If any is on, update the inode with the current time. 315 * If waitfor is given, then must ensure I/O order, 316 * so wait for write to complete. 317 */ 318 iupdat(ip, ta, tm, waitfor) 319 register struct inode *ip; 320 struct timeval *ta, *tm; 321 int waitfor; 322 { 323 struct buf *bp; 324 struct vnode *vp = ITOV(ip); 325 struct dinode *dp; 326 register struct fs *fs; 327 int error; 328 329 fs = ip->i_fs; 330 if ((ip->i_flag & (IUPD|IACC|ICHG|IMOD)) == 0) 331 return (0); 332 if (vp->v_mount->m_flag & M_RDONLY) 333 return (0); 334 error = bread(ip->i_devvp, fsbtodb(fs, itod(fs, ip->i_number)), 335 (int)fs->fs_bsize, NOCRED, &bp); 336 if (error) { 337 brelse(bp); 338 return (error); 339 } 340 if (ip->i_flag&IACC) 341 ip->i_atime = ta->tv_sec; 342 if (ip->i_flag&IUPD) 343 ip->i_mtime = tm->tv_sec; 344 if (ip->i_flag&ICHG) 345 ip->i_ctime = time.tv_sec; 346 ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD); 347 dp = bp->b_un.b_dino + itoo(fs, ip->i_number); 348 *dp = ip->i_din; 349 if (waitfor) { 350 return (bwrite(bp)); 351 } else { 352 bdwrite(bp); 353 return (0); 354 } 355 } 356 357 #define SINGLE 0 /* index of single indirect block */ 358 #define DOUBLE 1 /* index of double indirect block */ 359 #define TRIPLE 2 /* index of triple indirect block */ 360 /* 361 * Truncate the inode ip to at most length size. Free affected disk 362 * blocks -- the blocks of the file are removed in reverse order. 363 * 364 * NB: triple indirect blocks are untested. 365 */ 366 itrunc(oip, length, flags) 367 register struct inode *oip; 368 u_long length; 369 int flags; 370 { 371 register daddr_t lastblock; 372 daddr_t bn, lbn, lastiblock[NIADDR]; 373 register struct fs *fs; 374 register struct inode *ip; 375 struct buf *bp; 376 int offset, osize, size, level; 377 long count, nblocks, blocksreleased = 0; 378 register int i; 379 int aflags, error, allerror; 380 struct inode tip; 381 382 if (oip->i_size <= length) { 383 oip->i_flag |= ICHG|IUPD; 384 error = iupdat(oip, &time, &time, 1); 385 return (error); 386 } 387 /* 388 * Calculate index into inode's block list of 389 * last direct and indirect blocks (if any) 390 * which we want to keep. Lastblock is -1 when 391 * the file is truncated to 0. 392 */ 393 fs = oip->i_fs; 394 lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1; 395 lastiblock[SINGLE] = lastblock - NDADDR; 396 lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs); 397 lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs); 398 nblocks = btodb(fs->fs_bsize); 399 /* 400 * Update the size of the file. If the file is not being 401 * truncated to a block boundry, the contents of the 402 * partial block following the end of the file must be 403 * zero'ed in case it ever become accessable again because 404 * of subsequent file growth. 405 */ 406 osize = oip->i_size; 407 offset = blkoff(fs, length); 408 if (offset == 0) { 409 oip->i_size = length; 410 } else { 411 lbn = lblkno(fs, length); 412 aflags = B_CLRBUF; 413 if (flags & IO_SYNC) 414 aflags |= B_SYNC; 415 if (error = balloc(oip, lbn, offset, &bp, aflags)) 416 return (error); 417 oip->i_size = length; 418 size = blksize(fs, oip, lbn); 419 bn = bp->b_blkno; 420 count = howmany(size, CLBYTES); 421 for (i = 0; i < count; i++) 422 munhash(oip->i_devvp, bn + i * CLBYTES / DEV_BSIZE); 423 bzero(bp->b_un.b_addr + offset, (unsigned)(size - offset)); 424 brealloc(bp, size); 425 if (flags & IO_SYNC) 426 bwrite(bp); 427 else 428 bdwrite(bp); 429 } 430 /* 431 * Update file and block pointers 432 * on disk before we start freeing blocks. 433 * If we crash before free'ing blocks below, 434 * the blocks will be returned to the free list. 435 * lastiblock values are also normalized to -1 436 * for calls to indirtrunc below. 437 */ 438 tip = *oip; 439 tip.i_size = osize; 440 for (level = TRIPLE; level >= SINGLE; level--) 441 if (lastiblock[level] < 0) { 442 oip->i_ib[level] = 0; 443 lastiblock[level] = -1; 444 } 445 for (i = NDADDR - 1; i > lastblock; i--) 446 oip->i_db[i] = 0; 447 oip->i_flag |= ICHG|IUPD; 448 vinvalbuf(ITOV(oip), (length > 0)); 449 allerror = iupdat(oip, &time, &time, MNT_WAIT); 450 451 /* 452 * Indirect blocks first. 453 */ 454 ip = &tip; 455 for (level = TRIPLE; level >= SINGLE; level--) { 456 bn = ip->i_ib[level]; 457 if (bn != 0) { 458 error = indirtrunc(ip, bn, lastiblock[level], level, 459 &count); 460 if (error) 461 allerror = error; 462 blocksreleased += count; 463 if (lastiblock[level] < 0) { 464 ip->i_ib[level] = 0; 465 blkfree(ip, bn, (off_t)fs->fs_bsize); 466 blocksreleased += nblocks; 467 } 468 } 469 if (lastiblock[level] >= 0) 470 goto done; 471 } 472 473 /* 474 * All whole direct blocks or frags. 475 */ 476 for (i = NDADDR - 1; i > lastblock; i--) { 477 register off_t bsize; 478 479 bn = ip->i_db[i]; 480 if (bn == 0) 481 continue; 482 ip->i_db[i] = 0; 483 bsize = (off_t)blksize(fs, ip, i); 484 blkfree(ip, bn, bsize); 485 blocksreleased += btodb(bsize); 486 } 487 if (lastblock < 0) 488 goto done; 489 490 /* 491 * Finally, look for a change in size of the 492 * last direct block; release any frags. 493 */ 494 bn = ip->i_db[lastblock]; 495 if (bn != 0) { 496 off_t oldspace, newspace; 497 498 /* 499 * Calculate amount of space we're giving 500 * back as old block size minus new block size. 501 */ 502 oldspace = blksize(fs, ip, lastblock); 503 ip->i_size = length; 504 newspace = blksize(fs, ip, lastblock); 505 if (newspace == 0) 506 panic("itrunc: newspace"); 507 if (oldspace - newspace > 0) { 508 /* 509 * Block number of space to be free'd is 510 * the old block # plus the number of frags 511 * required for the storage we're keeping. 512 */ 513 bn += numfrags(fs, newspace); 514 blkfree(ip, bn, oldspace - newspace); 515 blocksreleased += btodb(oldspace - newspace); 516 } 517 } 518 done: 519 /* BEGIN PARANOIA */ 520 for (level = SINGLE; level <= TRIPLE; level++) 521 if (ip->i_ib[level] != oip->i_ib[level]) 522 panic("itrunc1"); 523 for (i = 0; i < NDADDR; i++) 524 if (ip->i_db[i] != oip->i_db[i]) 525 panic("itrunc2"); 526 /* END PARANOIA */ 527 oip->i_blocks -= blocksreleased; 528 if (oip->i_blocks < 0) /* sanity */ 529 oip->i_blocks = 0; 530 oip->i_flag |= ICHG; 531 #ifdef QUOTA 532 (void) chkdq(oip, -blocksreleased, 0); 533 #endif 534 return (allerror); 535 } 536 537 /* 538 * Release blocks associated with the inode ip and 539 * stored in the indirect block bn. Blocks are free'd 540 * in LIFO order up to (but not including) lastbn. If 541 * level is greater than SINGLE, the block is an indirect 542 * block and recursive calls to indirtrunc must be used to 543 * cleanse other indirect blocks. 544 * 545 * NB: triple indirect blocks are untested. 546 */ 547 indirtrunc(ip, bn, lastbn, level, countp) 548 register struct inode *ip; 549 daddr_t bn, lastbn; 550 int level; 551 long *countp; 552 { 553 register int i; 554 struct buf *bp; 555 register struct fs *fs = ip->i_fs; 556 register daddr_t *bap; 557 daddr_t *copy, nb, last; 558 long blkcount, factor; 559 int nblocks, blocksreleased = 0; 560 int error, allerror = 0; 561 562 /* 563 * Calculate index in current block of last 564 * block to be kept. -1 indicates the entire 565 * block so we need not calculate the index. 566 */ 567 factor = 1; 568 for (i = SINGLE; i < level; i++) 569 factor *= NINDIR(fs); 570 last = lastbn; 571 if (lastbn > 0) 572 last /= factor; 573 nblocks = btodb(fs->fs_bsize); 574 /* 575 * Get buffer of block pointers, zero those 576 * entries corresponding to blocks to be free'd, 577 * and update on disk copy first. 578 */ 579 error = bread(ip->i_devvp, fsbtodb(fs, bn), (int)fs->fs_bsize, 580 NOCRED, &bp); 581 if (error) { 582 brelse(bp); 583 *countp = 0; 584 return (error); 585 } 586 bap = bp->b_un.b_daddr; 587 MALLOC(copy, daddr_t *, fs->fs_bsize, M_TEMP, M_WAITOK); 588 bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->fs_bsize); 589 bzero((caddr_t)&bap[last + 1], 590 (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t)); 591 if (last == -1) 592 bp->b_flags |= B_INVAL; 593 error = bwrite(bp); 594 if (error) 595 allerror = error; 596 bap = copy; 597 598 /* 599 * Recursively free totally unused blocks. 600 */ 601 for (i = NINDIR(fs) - 1; i > last; i--) { 602 nb = bap[i]; 603 if (nb == 0) 604 continue; 605 if (level > SINGLE) { 606 error = indirtrunc(ip, nb, (daddr_t)-1, level - 1, 607 &blkcount); 608 if (error) 609 allerror = error; 610 blocksreleased += blkcount; 611 } 612 blkfree(ip, nb, (off_t)fs->fs_bsize); 613 blocksreleased += nblocks; 614 } 615 616 /* 617 * Recursively free last partial block. 618 */ 619 if (level > SINGLE && lastbn >= 0) { 620 last = lastbn % factor; 621 nb = bap[i]; 622 if (nb != 0) { 623 error = indirtrunc(ip, nb, last, level - 1, &blkcount); 624 if (error) 625 allerror = error; 626 blocksreleased += blkcount; 627 } 628 } 629 FREE(copy, M_TEMP); 630 *countp = blocksreleased; 631 return (allerror); 632 } 633 634 /* 635 * Lock an inode. If its already locked, set the WANT bit and sleep. 636 */ 637 ilock(ip) 638 register struct inode *ip; 639 { 640 641 while (ip->i_flag & ILOCKED) { 642 ip->i_flag |= IWANT; 643 if (ip->i_spare0 == u.u_procp->p_pid) 644 panic("locking against myself"); 645 ip->i_spare1 = u.u_procp->p_pid; 646 (void) sleep((caddr_t)ip, PINOD); 647 } 648 ip->i_spare1 = 0; 649 ip->i_spare0 = u.u_procp->p_pid; 650 u.u_spare[0]++; 651 ip->i_flag |= ILOCKED; 652 } 653 654 /* 655 * Unlock an inode. If WANT bit is on, wakeup. 656 */ 657 iunlock(ip) 658 register struct inode *ip; 659 { 660 661 if ((ip->i_flag & ILOCKED) == 0) 662 vprint("iunlock: unlocked inode", ITOV(ip)); 663 ip->i_spare0 = 0; 664 u.u_spare[0]--; 665 ip->i_flag &= ~ILOCKED; 666 if (ip->i_flag&IWANT) { 667 ip->i_flag &= ~IWANT; 668 wakeup((caddr_t)ip); 669 } 670 } 671 672 /* 673 * Check mode permission on inode pointer. Mode is READ, WRITE or EXEC. 674 * The mode is shifted to select the owner/group/other fields. The 675 * super user is granted all permissions. 676 * 677 * NB: Called from vnode op table. It seems this could all be done 678 * using vattr's but... 679 */ 680 iaccess(ip, mode, cred) 681 register struct inode *ip; 682 register int mode; 683 struct ucred *cred; 684 { 685 register gid_t *gp; 686 int i; 687 688 /* 689 * If you're the super-user, you always get access. 690 */ 691 if (cred->cr_uid == 0) 692 return (0); 693 /* 694 * Access check is based on only one of owner, group, public. 695 * If not owner, then check group. If not a member of the 696 * group, then check public access. 697 */ 698 if (cred->cr_uid != ip->i_uid) { 699 mode >>= 3; 700 gp = cred->cr_groups; 701 for (i = 0; i < cred->cr_ngroups; i++, gp++) 702 if (ip->i_gid == *gp) 703 goto found; 704 mode >>= 3; 705 found: 706 ; 707 } 708 if ((ip->i_mode & mode) != 0) 709 return (0); 710 return (EACCES); 711 } 712