1 /* 2 * Copyright (c) 1982, 1986, 1989 Regents of the University of California. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms are permitted 6 * provided that the above copyright notice and this paragraph are 7 * duplicated in all such forms and that any documentation, 8 * advertising materials, and other materials related to such 9 * distribution and use acknowledge that the software was developed 10 * by the University of California, Berkeley. The name of the 11 * University may not be used to endorse or promote products derived 12 * from this software without specific prior written permission. 13 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 14 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 15 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. 16 * 17 * @(#)ffs_inode.c 7.24 (Berkeley) 12/19/89 18 */ 19 20 #include "param.h" 21 #include "systm.h" 22 #include "mount.h" 23 #include "user.h" 24 #include "file.h" 25 #include "buf.h" 26 #include "cmap.h" 27 #include "vnode.h" 28 #include "../ufs/inode.h" 29 #include "../ufs/fs.h" 30 #include "../ufs/ufsmount.h" 31 #ifdef QUOTA 32 #include "../ufs/quota.h" 33 #endif 34 #include "kernel.h" 35 #include "malloc.h" 36 37 #define INOHSZ 512 38 #if ((INOHSZ&(INOHSZ-1)) == 0) 39 #define INOHASH(dev,ino) (((dev)+(ino))&(INOHSZ-1)) 40 #else 41 #define INOHASH(dev,ino) (((unsigned)((dev)+(ino)))%INOHSZ) 42 #endif 43 44 union ihead { 45 union ihead *ih_head[2]; 46 struct inode *ih_chain[2]; 47 } ihead[INOHSZ]; 48 49 int prtactive; /* 1 => print out reclaim of active vnodes */ 50 51 /* 52 * Initialize hash links for inodes. 53 */ 54 ufs_init() 55 { 56 register int i; 57 register union ihead *ih = ihead; 58 59 #ifndef lint 60 if (VN_MAXPRIVATE < sizeof(struct inode)) 61 panic("ihinit: too small"); 62 #endif /* not lint */ 63 for (i = INOHSZ; --i >= 0; ih++) { 64 ih->ih_head[0] = ih; 65 ih->ih_head[1] = ih; 66 } 67 } 68 69 /* 70 * Look up an vnode/inode by device,inumber. 71 * If it is in core (in the inode structure), 72 * honor the locking protocol. 73 * If it is not in core, read it in from the 74 * specified device. 75 * Callers must check for mount points!! 76 * In all cases, a pointer to a locked 77 * inode structure is returned. 78 */ 79 iget(xp, ino, ipp) 80 struct inode *xp; 81 ino_t ino; 82 struct inode **ipp; 83 { 84 dev_t dev = xp->i_dev; 85 struct mount *mntp = ITOV(xp)->v_mount; 86 register struct fs *fs = VFSTOUFS(mntp)->um_fs; 87 extern struct vnodeops ufs_vnodeops, spec_inodeops; 88 register struct inode *ip, *iq; 89 register struct vnode *vp; 90 struct vnode *nvp; 91 struct buf *bp; 92 struct dinode *dp; 93 union ihead *ih; 94 int error; 95 96 ih = &ihead[INOHASH(dev, ino)]; 97 loop: 98 for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw) { 99 if (ino != ip->i_number || dev != ip->i_dev) 100 continue; 101 if ((ip->i_flag&ILOCKED) != 0) { 102 ip->i_flag |= IWANT; 103 sleep((caddr_t)ip, PINOD); 104 goto loop; 105 } 106 if (vget(ITOV(ip))) 107 goto loop; 108 *ipp = ip; 109 return(0); 110 } 111 /* 112 * Allocate a new inode. 113 */ 114 if (error = getnewvnode(VT_UFS, mntp, &ufs_vnodeops, &nvp)) { 115 *ipp = 0; 116 return (error); 117 } 118 ip = VTOI(nvp); 119 ip->i_vnode = nvp; 120 ip->i_flag = 0; 121 ip->i_devvp = 0; 122 ip->i_lastr = 0; 123 ip->i_mode = 0; 124 #ifdef QUOTA 125 ip->i_dquot = NODQUOT; 126 #endif 127 /* 128 * Put it onto its hash chain and lock it so that other requests for 129 * this inode will block if they arrive while we are sleeping waiting 130 * for old data structures to be purged or for the contents of the 131 * disk portion of this inode to be read. 132 */ 133 ip->i_dev = dev; 134 ip->i_number = ino; 135 insque(ip, ih); 136 ILOCK(ip); 137 /* 138 * Read in the disk contents for the inode. 139 */ 140 if (error = bread(VFSTOUFS(mntp)->um_devvp, fsbtodb(fs, itod(fs, ino)), 141 (int)fs->fs_bsize, NOCRED, &bp)) { 142 /* 143 * Unlock and discard unneeded inode. 144 */ 145 iput(ip); 146 brelse(bp); 147 *ipp = 0; 148 return (error); 149 } 150 dp = bp->b_un.b_dino; 151 dp += itoo(fs, ino); 152 ip->i_din = *dp; 153 brelse(bp); 154 /* 155 * Initialize the associated vnode 156 */ 157 vp = ITOV(ip); 158 vp->v_type = IFTOVT(ip->i_mode); 159 if (vp->v_type == VCHR || vp->v_type == VBLK) { 160 vp->v_op = &spec_inodeops; 161 if (nvp = checkalias(vp, ip->i_rdev, mntp)) { 162 /* 163 * Reinitialize aliased inode. 164 */ 165 vp = nvp; 166 iq = VTOI(vp); 167 iq->i_vnode = vp; 168 iq->i_lastr = 0; 169 iq->i_flag = 0; 170 ILOCK(iq); 171 iq->i_din = ip->i_din; 172 iq->i_dev = dev; 173 iq->i_number = ino; 174 insque(iq, ih); 175 /* 176 * Discard unneeded vnode 177 */ 178 ip->i_mode = 0; 179 iput(ip); 180 ip = iq; 181 } 182 } 183 if (ino == ROOTINO) 184 vp->v_flag |= VROOT; 185 /* 186 * Finish inode initialization. 187 */ 188 ip->i_fs = fs; 189 ip->i_devvp = VFSTOUFS(mntp)->um_devvp; 190 VREF(ip->i_devvp); 191 #ifdef QUOTA 192 if (ip->i_mode != 0) 193 ip->i_dquot = inoquota(ip); 194 #endif 195 /* 196 * Set up a generation number for this inode if it does not 197 * already have one. This should only happen on old filesystems. 198 */ 199 if (ip->i_gen == 0) { 200 if (++nextgennumber < (u_long)time.tv_sec) 201 nextgennumber = time.tv_sec; 202 ip->i_gen = nextgennumber; 203 if ((vp->v_mount->m_flag & M_RDONLY) == 0) 204 ip->i_flag |= IMOD; 205 } 206 *ipp = ip; 207 return (0); 208 } 209 210 /* 211 * Unlock and decrement the reference count of an inode structure. 212 */ 213 iput(ip) 214 register struct inode *ip; 215 { 216 217 if ((ip->i_flag & ILOCKED) == 0) 218 panic("iput"); 219 IUNLOCK(ip); 220 vrele(ITOV(ip)); 221 } 222 223 /* 224 * Last reference to an inode, write the inode out and if necessary, 225 * truncate and deallocate the file. 226 */ 227 ufs_inactive(vp) 228 struct vnode *vp; 229 { 230 register struct inode *ip = VTOI(vp); 231 int mode, error = 0; 232 233 if (prtactive && vp->v_count != 0) 234 vprint("ufs_inactive: pushing active", vp); 235 /* 236 * Get rid of inodes related to stale file handles. 237 */ 238 if (ip->i_mode == 0) { 239 if ((vp->v_flag & VXLOCK) == 0) 240 vgone(vp); 241 return (0); 242 } 243 ILOCK(ip); 244 if (ip->i_nlink <= 0 && (vp->v_mount->m_flag & M_RDONLY) == 0) { 245 error = itrunc(ip, (u_long)0, 0); 246 mode = ip->i_mode; 247 ip->i_mode = 0; 248 ip->i_rdev = 0; 249 ip->i_flag |= IUPD|ICHG; 250 ifree(ip, ip->i_number, mode); 251 #ifdef QUOTA 252 (void) chkiq(ip->i_dev, ip, ip->i_uid, 0); 253 dqrele(ip->i_dquot); 254 ip->i_dquot = NODQUOT; 255 #endif 256 } 257 IUPDAT(ip, &time, &time, 0); 258 /* 259 * If we are done with the inode, reclaim it 260 * so that it can be reused immediately. 261 */ 262 if (vp->v_count == 0 && ip->i_mode == 0) { 263 vinvalbuf(vp, 0); 264 IUNLOCK(ip); 265 ip->i_flag = 0; 266 if ((vp->v_flag & VXLOCK) == 0) 267 vgone(vp); 268 return (error); 269 } 270 IUNLOCK(ip); 271 ip->i_flag = 0; 272 return (error); 273 } 274 275 /* 276 * Reclaim an inode so that it can be used for other purposes. 277 */ 278 ufs_reclaim(vp) 279 register struct vnode *vp; 280 { 281 register struct inode *ip = VTOI(vp); 282 283 if (prtactive && vp->v_count != 0) 284 vprint("ufs_reclaim: pushing active", vp); 285 /* 286 * Remove the inode from its hash chain. 287 */ 288 remque(ip); 289 ip->i_forw = ip; 290 ip->i_back = ip; 291 /* 292 * Purge old data structures associated with the inode. 293 */ 294 cache_purge(vp); 295 if (ip->i_devvp) { 296 vrele(ip->i_devvp); 297 ip->i_devvp = 0; 298 } 299 #ifdef QUOTA 300 dqrele(ip->i_dquot); 301 ip->i_dquot = NODQUOT; 302 #endif 303 ip->i_flag = 0; 304 return (0); 305 } 306 307 /* 308 * Check accessed and update flags on an inode structure. 309 * If any is on, update the inode with the current time. 310 * If waitfor is given, then must ensure I/O order, 311 * so wait for write to complete. 312 */ 313 iupdat(ip, ta, tm, waitfor) 314 register struct inode *ip; 315 struct timeval *ta, *tm; 316 int waitfor; 317 { 318 struct buf *bp; 319 struct vnode *vp = ITOV(ip); 320 struct dinode *dp; 321 register struct fs *fs; 322 int error; 323 324 fs = ip->i_fs; 325 if ((ip->i_flag & (IUPD|IACC|ICHG|IMOD)) == 0) 326 return (0); 327 if (vp->v_mount->m_flag & M_RDONLY) 328 return (0); 329 error = bread(ip->i_devvp, fsbtodb(fs, itod(fs, ip->i_number)), 330 (int)fs->fs_bsize, NOCRED, &bp); 331 if (error) { 332 brelse(bp); 333 return (error); 334 } 335 if (ip->i_flag&IACC) 336 ip->i_atime = ta->tv_sec; 337 if (ip->i_flag&IUPD) 338 ip->i_mtime = tm->tv_sec; 339 if (ip->i_flag&ICHG) 340 ip->i_ctime = time.tv_sec; 341 ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD); 342 dp = bp->b_un.b_dino + itoo(fs, ip->i_number); 343 *dp = ip->i_din; 344 if (waitfor) { 345 return (bwrite(bp)); 346 } else { 347 bdwrite(bp); 348 return (0); 349 } 350 } 351 352 #define SINGLE 0 /* index of single indirect block */ 353 #define DOUBLE 1 /* index of double indirect block */ 354 #define TRIPLE 2 /* index of triple indirect block */ 355 /* 356 * Truncate the inode ip to at most length size. Free affected disk 357 * blocks -- the blocks of the file are removed in reverse order. 358 * 359 * NB: triple indirect blocks are untested. 360 */ 361 itrunc(oip, length, flags) 362 register struct inode *oip; 363 u_long length; 364 int flags; 365 { 366 register daddr_t lastblock; 367 daddr_t bn, lbn, lastiblock[NIADDR]; 368 register struct fs *fs; 369 register struct inode *ip; 370 struct buf *bp; 371 int offset, osize, size, level; 372 long count, nblocks, blocksreleased = 0; 373 register int i; 374 int aflags, error, allerror; 375 struct inode tip; 376 377 if (oip->i_size <= length) { 378 oip->i_flag |= ICHG|IUPD; 379 error = iupdat(oip, &time, &time, 1); 380 return (error); 381 } 382 /* 383 * Calculate index into inode's block list of 384 * last direct and indirect blocks (if any) 385 * which we want to keep. Lastblock is -1 when 386 * the file is truncated to 0. 387 */ 388 fs = oip->i_fs; 389 lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1; 390 lastiblock[SINGLE] = lastblock - NDADDR; 391 lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs); 392 lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs); 393 nblocks = btodb(fs->fs_bsize); 394 /* 395 * Update the size of the file. If the file is not being 396 * truncated to a block boundry, the contents of the 397 * partial block following the end of the file must be 398 * zero'ed in case it ever become accessable again because 399 * of subsequent file growth. 400 */ 401 osize = oip->i_size; 402 offset = blkoff(fs, length); 403 if (offset == 0) { 404 oip->i_size = length; 405 } else { 406 lbn = lblkno(fs, length); 407 aflags = B_CLRBUF; 408 if (flags & IO_SYNC) 409 aflags |= B_SYNC; 410 if (error = balloc(oip, lbn, offset, &bp, aflags)) 411 return (error); 412 oip->i_size = length; 413 size = blksize(fs, oip, lbn); 414 bn = bp->b_blkno; 415 count = howmany(size, CLBYTES); 416 for (i = 0; i < count; i++) 417 munhash(oip->i_devvp, bn + i * CLBYTES / DEV_BSIZE); 418 bzero(bp->b_un.b_addr + offset, (unsigned)(size - offset)); 419 brealloc(bp, size); 420 if (flags & IO_SYNC) 421 bwrite(bp); 422 else 423 bdwrite(bp); 424 } 425 /* 426 * Update file and block pointers 427 * on disk before we start freeing blocks. 428 * If we crash before free'ing blocks below, 429 * the blocks will be returned to the free list. 430 * lastiblock values are also normalized to -1 431 * for calls to indirtrunc below. 432 */ 433 tip = *oip; 434 tip.i_size = osize; 435 for (level = TRIPLE; level >= SINGLE; level--) 436 if (lastiblock[level] < 0) { 437 oip->i_ib[level] = 0; 438 lastiblock[level] = -1; 439 } 440 for (i = NDADDR - 1; i > lastblock; i--) 441 oip->i_db[i] = 0; 442 oip->i_flag |= ICHG|IUPD; 443 vinvalbuf(ITOV(oip), (length > 0)); 444 allerror = iupdat(oip, &time, &time, MNT_WAIT); 445 446 /* 447 * Indirect blocks first. 448 */ 449 ip = &tip; 450 for (level = TRIPLE; level >= SINGLE; level--) { 451 bn = ip->i_ib[level]; 452 if (bn != 0) { 453 error = indirtrunc(ip, bn, lastiblock[level], level, 454 &count); 455 if (error) 456 allerror = error; 457 blocksreleased += count; 458 if (lastiblock[level] < 0) { 459 ip->i_ib[level] = 0; 460 blkfree(ip, bn, (off_t)fs->fs_bsize); 461 blocksreleased += nblocks; 462 } 463 } 464 if (lastiblock[level] >= 0) 465 goto done; 466 } 467 468 /* 469 * All whole direct blocks or frags. 470 */ 471 for (i = NDADDR - 1; i > lastblock; i--) { 472 register off_t bsize; 473 474 bn = ip->i_db[i]; 475 if (bn == 0) 476 continue; 477 ip->i_db[i] = 0; 478 bsize = (off_t)blksize(fs, ip, i); 479 blkfree(ip, bn, bsize); 480 blocksreleased += btodb(bsize); 481 } 482 if (lastblock < 0) 483 goto done; 484 485 /* 486 * Finally, look for a change in size of the 487 * last direct block; release any frags. 488 */ 489 bn = ip->i_db[lastblock]; 490 if (bn != 0) { 491 off_t oldspace, newspace; 492 493 /* 494 * Calculate amount of space we're giving 495 * back as old block size minus new block size. 496 */ 497 oldspace = blksize(fs, ip, lastblock); 498 ip->i_size = length; 499 newspace = blksize(fs, ip, lastblock); 500 if (newspace == 0) 501 panic("itrunc: newspace"); 502 if (oldspace - newspace > 0) { 503 /* 504 * Block number of space to be free'd is 505 * the old block # plus the number of frags 506 * required for the storage we're keeping. 507 */ 508 bn += numfrags(fs, newspace); 509 blkfree(ip, bn, oldspace - newspace); 510 blocksreleased += btodb(oldspace - newspace); 511 } 512 } 513 done: 514 /* BEGIN PARANOIA */ 515 for (level = SINGLE; level <= TRIPLE; level++) 516 if (ip->i_ib[level] != oip->i_ib[level]) 517 panic("itrunc1"); 518 for (i = 0; i < NDADDR; i++) 519 if (ip->i_db[i] != oip->i_db[i]) 520 panic("itrunc2"); 521 /* END PARANOIA */ 522 oip->i_blocks -= blocksreleased; 523 if (oip->i_blocks < 0) /* sanity */ 524 oip->i_blocks = 0; 525 oip->i_flag |= ICHG; 526 #ifdef QUOTA 527 (void) chkdq(oip, -blocksreleased, 0); 528 #endif 529 return (allerror); 530 } 531 532 /* 533 * Release blocks associated with the inode ip and 534 * stored in the indirect block bn. Blocks are free'd 535 * in LIFO order up to (but not including) lastbn. If 536 * level is greater than SINGLE, the block is an indirect 537 * block and recursive calls to indirtrunc must be used to 538 * cleanse other indirect blocks. 539 * 540 * NB: triple indirect blocks are untested. 541 */ 542 indirtrunc(ip, bn, lastbn, level, countp) 543 register struct inode *ip; 544 daddr_t bn, lastbn; 545 int level; 546 long *countp; 547 { 548 register int i; 549 struct buf *bp; 550 register struct fs *fs = ip->i_fs; 551 register daddr_t *bap; 552 daddr_t *copy, nb, last; 553 long blkcount, factor; 554 int nblocks, blocksreleased = 0; 555 int error, allerror = 0; 556 557 /* 558 * Calculate index in current block of last 559 * block to be kept. -1 indicates the entire 560 * block so we need not calculate the index. 561 */ 562 factor = 1; 563 for (i = SINGLE; i < level; i++) 564 factor *= NINDIR(fs); 565 last = lastbn; 566 if (lastbn > 0) 567 last /= factor; 568 nblocks = btodb(fs->fs_bsize); 569 /* 570 * Get buffer of block pointers, zero those 571 * entries corresponding to blocks to be free'd, 572 * and update on disk copy first. 573 */ 574 error = bread(ip->i_devvp, fsbtodb(fs, bn), (int)fs->fs_bsize, 575 NOCRED, &bp); 576 if (error) { 577 brelse(bp); 578 *countp = 0; 579 return (error); 580 } 581 if ((bp->b_flags & B_CACHE) == 0) 582 reassignbuf(bp, ITOV(ip)); 583 bap = bp->b_un.b_daddr; 584 MALLOC(copy, daddr_t *, fs->fs_bsize, M_TEMP, M_WAITOK); 585 bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->fs_bsize); 586 bzero((caddr_t)&bap[last + 1], 587 (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t)); 588 if (last == -1) 589 bp->b_flags |= B_INVAL; 590 error = bwrite(bp); 591 if (error) 592 allerror = error; 593 bap = copy; 594 595 /* 596 * Recursively free totally unused blocks. 597 */ 598 for (i = NINDIR(fs) - 1; i > last; i--) { 599 nb = bap[i]; 600 if (nb == 0) 601 continue; 602 if (level > SINGLE) { 603 error = indirtrunc(ip, nb, (daddr_t)-1, level - 1, 604 &blkcount); 605 if (error) 606 allerror = error; 607 blocksreleased += blkcount; 608 } 609 blkfree(ip, nb, (off_t)fs->fs_bsize); 610 blocksreleased += nblocks; 611 } 612 613 /* 614 * Recursively free last partial block. 615 */ 616 if (level > SINGLE && lastbn >= 0) { 617 last = lastbn % factor; 618 nb = bap[i]; 619 if (nb != 0) { 620 error = indirtrunc(ip, nb, last, level - 1, &blkcount); 621 if (error) 622 allerror = error; 623 blocksreleased += blkcount; 624 } 625 } 626 FREE(copy, M_TEMP); 627 *countp = blocksreleased; 628 return (allerror); 629 } 630 631 /* 632 * Lock an inode. If its already locked, set the WANT bit and sleep. 633 */ 634 ilock(ip) 635 register struct inode *ip; 636 { 637 638 while (ip->i_flag & ILOCKED) { 639 ip->i_flag |= IWANT; 640 (void) sleep((caddr_t)ip, PINOD); 641 } 642 ip->i_flag |= ILOCKED; 643 } 644 645 /* 646 * Unlock an inode. If WANT bit is on, wakeup. 647 */ 648 iunlock(ip) 649 register struct inode *ip; 650 { 651 652 if ((ip->i_flag & ILOCKED) == 0) 653 vprint("iunlock: unlocked inode", ITOV(ip)); 654 ip->i_flag &= ~ILOCKED; 655 if (ip->i_flag&IWANT) { 656 ip->i_flag &= ~IWANT; 657 wakeup((caddr_t)ip); 658 } 659 } 660 661 /* 662 * Check mode permission on inode pointer. Mode is READ, WRITE or EXEC. 663 * The mode is shifted to select the owner/group/other fields. The 664 * super user is granted all permissions. 665 * 666 * NB: Called from vnode op table. It seems this could all be done 667 * using vattr's but... 668 */ 669 iaccess(ip, mode, cred) 670 register struct inode *ip; 671 register int mode; 672 struct ucred *cred; 673 { 674 register gid_t *gp; 675 int i; 676 677 /* 678 * If you're the super-user, you always get access. 679 */ 680 if (cred->cr_uid == 0) 681 return (0); 682 /* 683 * Access check is based on only one of owner, group, public. 684 * If not owner, then check group. If not a member of the 685 * group, then check public access. 686 */ 687 if (cred->cr_uid != ip->i_uid) { 688 mode >>= 3; 689 gp = cred->cr_groups; 690 for (i = 0; i < cred->cr_ngroups; i++, gp++) 691 if (ip->i_gid == *gp) 692 goto found; 693 mode >>= 3; 694 found: 695 ; 696 } 697 if ((ip->i_mode & mode) != 0) 698 return (0); 699 return (EACCES); 700 } 701