1 /* 2 * Copyright (c) 1982, 1986, 1989 Regents of the University of California. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms are permitted 6 * provided that the above copyright notice and this paragraph are 7 * duplicated in all such forms and that any documentation, 8 * advertising materials, and other materials related to such 9 * distribution and use acknowledge that the software was developed 10 * by the University of California, Berkeley. The name of the 11 * University may not be used to endorse or promote products derived 12 * from this software without specific prior written permission. 13 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 14 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 15 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. 16 * 17 * @(#)ufs_inode.c 7.21 (Berkeley) 11/22/89 18 */ 19 20 #include "param.h" 21 #include "systm.h" 22 #include "mount.h" 23 #include "user.h" 24 #include "file.h" 25 #include "buf.h" 26 #include "cmap.h" 27 #include "vnode.h" 28 #include "../ufs/inode.h" 29 #include "../ufs/fs.h" 30 #include "../ufs/ufsmount.h" 31 #ifdef QUOTA 32 #include "../ufs/quota.h" 33 #endif 34 #include "kernel.h" 35 #include "malloc.h" 36 37 #define INOHSZ 512 38 #if ((INOHSZ&(INOHSZ-1)) == 0) 39 #define INOHASH(dev,ino) (((dev)+(ino))&(INOHSZ-1)) 40 #else 41 #define INOHASH(dev,ino) (((unsigned)((dev)+(ino)))%INOHSZ) 42 #endif 43 44 union ihead { 45 union ihead *ih_head[2]; 46 struct inode *ih_chain[2]; 47 } ihead[INOHSZ]; 48 49 int prtactive; /* 1 => print out reclaim of active vnodes */ 50 51 /* 52 * Initialize hash links for inodes. 53 */ 54 ufs_init() 55 { 56 register int i; 57 register union ihead *ih = ihead; 58 59 #ifndef lint 60 if (VN_MAXPRIVATE < sizeof(struct inode)) 61 panic("ihinit: too small"); 62 #endif /* not lint */ 63 for (i = INOHSZ; --i >= 0; ih++) { 64 ih->ih_head[0] = ih; 65 ih->ih_head[1] = ih; 66 } 67 } 68 69 /* 70 * Look up an vnode/inode by device,inumber. 71 * If it is in core (in the inode structure), 72 * honor the locking protocol. 73 * If it is not in core, read it in from the 74 * specified device. 75 * Callers must check for mount points!! 76 * In all cases, a pointer to a locked 77 * inode structure is returned. 78 */ 79 iget(xp, ino, ipp) 80 struct inode *xp; 81 ino_t ino; 82 struct inode **ipp; 83 { 84 dev_t dev = xp->i_dev; 85 struct mount *mntp = ITOV(xp)->v_mount; 86 register struct fs *fs = VFSTOUFS(mntp)->um_fs; 87 extern struct vnodeops ufs_vnodeops, spec_inodeops; 88 register struct inode *ip, *iq; 89 register struct vnode *vp; 90 struct vnode *nvp; 91 struct buf *bp; 92 struct dinode *dp; 93 union ihead *ih; 94 int error; 95 96 ih = &ihead[INOHASH(dev, ino)]; 97 loop: 98 for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw) { 99 if (ino != ip->i_number || dev != ip->i_dev) 100 continue; 101 if ((ip->i_flag&ILOCKED) != 0) { 102 ip->i_flag |= IWANT; 103 sleep((caddr_t)ip, PINOD); 104 goto loop; 105 } 106 if (vget(ITOV(ip))) 107 goto loop; 108 *ipp = ip; 109 return(0); 110 } 111 /* 112 * Allocate a new inode. 113 */ 114 if (error = getnewvnode(VT_UFS, mntp, &ufs_vnodeops, &nvp)) { 115 *ipp = 0; 116 return (error); 117 } 118 ip = VTOI(nvp); 119 ip->i_vnode = nvp; 120 ip->i_flag = 0; 121 ip->i_devvp = 0; 122 ip->i_lastr = 0; 123 ip->i_mode = 0; 124 #ifdef QUOTA 125 ip->i_dquot = NODQUOT; 126 #endif 127 /* 128 * Put it onto its hash chain and lock it so that other requests for 129 * this inode will block if they arrive while we are sleeping waiting 130 * for old data structures to be purged or for the contents of the 131 * disk portion of this inode to be read. 132 */ 133 ip->i_dev = dev; 134 ip->i_number = ino; 135 insque(ip, ih); 136 ILOCK(ip); 137 /* 138 * Read in the disk contents for the inode. 139 */ 140 if (error = bread(VFSTOUFS(mntp)->um_devvp, fsbtodb(fs, itod(fs, ino)), 141 (int)fs->fs_bsize, NOCRED, &bp)) { 142 /* 143 * Unlock and discard unneeded inode. 144 */ 145 iput(ip); 146 brelse(bp); 147 *ipp = 0; 148 return (error); 149 } 150 dp = bp->b_un.b_dino; 151 dp += itoo(fs, ino); 152 ip->i_din = *dp; 153 brelse(bp); 154 /* 155 * Initialize the associated vnode 156 */ 157 vp = ITOV(ip); 158 vp->v_type = IFTOVT(ip->i_mode); 159 if (vp->v_type == VCHR || vp->v_type == VBLK) { 160 vp->v_rdev = ip->i_rdev; 161 vp->v_op = &spec_inodeops; 162 if (nvp = checkalias(vp, mntp)) { 163 /* 164 * Reinitialize aliased inode. 165 */ 166 vp = nvp; 167 iq = VTOI(vp); 168 iq->i_vnode = vp; 169 iq->i_lastr = 0; 170 iq->i_flag = 0; 171 ILOCK(iq); 172 iq->i_din = ip->i_din; 173 iq->i_dev = dev; 174 iq->i_number = ino; 175 insque(iq, ih); 176 /* 177 * Discard unneeded vnode 178 */ 179 ip->i_mode = 0; 180 iput(ip); 181 ip = iq; 182 } 183 } 184 if (ino == ROOTINO) 185 vp->v_flag |= VROOT; 186 /* 187 * Finish inode initialization. 188 */ 189 ip->i_fs = fs; 190 ip->i_devvp = VFSTOUFS(mntp)->um_devvp; 191 VREF(ip->i_devvp); 192 #ifdef QUOTA 193 if (ip->i_mode != 0) 194 ip->i_dquot = inoquota(ip); 195 #endif 196 /* 197 * Set up a generation number for this inode if it does not 198 * already have one. This should only happen on old filesystems. 199 */ 200 if (ip->i_gen == 0) { 201 if (++nextgennumber < (u_long)time.tv_sec) 202 nextgennumber = time.tv_sec; 203 ip->i_gen = nextgennumber; 204 if ((vp->v_mount->m_flag & M_RDONLY) == 0) 205 ip->i_flag |= IMOD; 206 } 207 *ipp = ip; 208 return (0); 209 } 210 211 /* 212 * Unlock and decrement the reference count of an inode structure. 213 */ 214 iput(ip) 215 register struct inode *ip; 216 { 217 218 if ((ip->i_flag & ILOCKED) == 0) 219 panic("iput"); 220 IUNLOCK(ip); 221 vrele(ITOV(ip)); 222 } 223 224 /* 225 * Last reference to an inode, write the inode out and if necessary, 226 * truncate and deallocate the file. 227 */ 228 ufs_inactive(vp) 229 struct vnode *vp; 230 { 231 register struct inode *ip = VTOI(vp); 232 int mode, error = 0; 233 234 if (prtactive && vp->v_count != 0) 235 printf("ufs_inactive: pushing active ino %d dev 0x%x\n", 236 ip->i_number, ip->i_dev); 237 /* 238 * Get rid of inodes related to stale file handles. 239 */ 240 if (ip->i_mode == 0) { 241 vgone(vp); 242 return (0); 243 } 244 ILOCK(ip); 245 if (ip->i_nlink <= 0 && (vp->v_mount->m_flag & M_RDONLY) == 0) { 246 error = itrunc(ip, (u_long)0); 247 mode = ip->i_mode; 248 ip->i_mode = 0; 249 ip->i_rdev = 0; 250 ip->i_flag |= IUPD|ICHG; 251 ifree(ip, ip->i_number, mode); 252 #ifdef QUOTA 253 (void) chkiq(ip->i_dev, ip, ip->i_uid, 0); 254 dqrele(ip->i_dquot); 255 ip->i_dquot = NODQUOT; 256 #endif 257 } 258 IUPDAT(ip, &time, &time, 0); 259 IUNLOCK(ip); 260 ip->i_flag = 0; 261 /* 262 * If we are done with the inode, reclaim it 263 * so that it can be reused immediately. 264 */ 265 if (vp->v_count == 0 && ip->i_mode == 0) 266 vgone(vp); 267 return (error); 268 } 269 270 /* 271 * Reclaim an inode so that it can be used for other purposes. 272 */ 273 ufs_reclaim(vp) 274 register struct vnode *vp; 275 { 276 register struct inode *ip = VTOI(vp); 277 278 if (prtactive && vp->v_count != 0) 279 printf("ufs_reclaim: pushing active ino %d dev 0x%x\n", 280 ip->i_number, ip->i_dev); 281 /* 282 * Remove the inode from its hash chain. 283 */ 284 remque(ip); 285 ip->i_forw = ip; 286 ip->i_back = ip; 287 /* 288 * Purge old data structures associated with the inode. 289 */ 290 cache_purge(vp); 291 if (ip->i_devvp) { 292 vrele(ip->i_devvp); 293 ip->i_devvp = 0; 294 } 295 #ifdef QUOTA 296 dqrele(ip->i_dquot); 297 ip->i_dquot = NODQUOT; 298 #endif 299 ip->i_flag = 0; 300 return (0); 301 } 302 303 /* 304 * Check accessed and update flags on an inode structure. 305 * If any is on, update the inode with the current time. 306 * If waitfor is given, then must ensure I/O order, 307 * so wait for write to complete. 308 */ 309 iupdat(ip, ta, tm, waitfor) 310 register struct inode *ip; 311 struct timeval *ta, *tm; 312 int waitfor; 313 { 314 struct buf *bp; 315 struct vnode *vp = ITOV(ip); 316 struct dinode *dp; 317 register struct fs *fs; 318 int error; 319 320 fs = ip->i_fs; 321 if ((ip->i_flag & (IUPD|IACC|ICHG|IMOD)) == 0) 322 return (0); 323 if (vp->v_mount->m_flag & M_RDONLY) 324 return (0); 325 error = bread(ip->i_devvp, fsbtodb(fs, itod(fs, ip->i_number)), 326 (int)fs->fs_bsize, NOCRED, &bp); 327 if (error) { 328 brelse(bp); 329 return (error); 330 } 331 if (ip->i_flag&IACC) 332 ip->i_atime = ta->tv_sec; 333 if (ip->i_flag&IUPD) 334 ip->i_mtime = tm->tv_sec; 335 if (ip->i_flag&ICHG) 336 ip->i_ctime = time.tv_sec; 337 ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD); 338 dp = bp->b_un.b_dino + itoo(fs, ip->i_number); 339 *dp = ip->i_din; 340 if (waitfor) { 341 return (bwrite(bp)); 342 } else { 343 bdwrite(bp); 344 return (0); 345 } 346 } 347 348 #define SINGLE 0 /* index of single indirect block */ 349 #define DOUBLE 1 /* index of double indirect block */ 350 #define TRIPLE 2 /* index of triple indirect block */ 351 /* 352 * Truncate the inode ip to at most length size. Free affected disk 353 * blocks -- the blocks of the file are removed in reverse order. 354 * 355 * NB: triple indirect blocks are untested. 356 */ 357 itrunc(oip, length) 358 register struct inode *oip; 359 u_long length; 360 { 361 register daddr_t lastblock; 362 daddr_t bn, lbn, lastiblock[NIADDR]; 363 register struct fs *fs; 364 register struct inode *ip; 365 struct buf *bp; 366 int offset, osize, size, level; 367 long count, nblocks, blocksreleased = 0; 368 register int i; 369 int error, allerror = 0; 370 struct inode tip; 371 372 if (oip->i_size <= length) { 373 oip->i_flag |= ICHG|IUPD; 374 error = iupdat(oip, &time, &time, 1); 375 return (error); 376 } 377 /* 378 * Calculate index into inode's block list of 379 * last direct and indirect blocks (if any) 380 * which we want to keep. Lastblock is -1 when 381 * the file is truncated to 0. 382 */ 383 fs = oip->i_fs; 384 lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1; 385 lastiblock[SINGLE] = lastblock - NDADDR; 386 lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs); 387 lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs); 388 nblocks = btodb(fs->fs_bsize); 389 /* 390 * Update the size of the file. If the file is not being 391 * truncated to a block boundry, the contents of the 392 * partial block following the end of the file must be 393 * zero'ed in case it ever become accessable again because 394 * of subsequent file growth. 395 */ 396 osize = oip->i_size; 397 offset = blkoff(fs, length); 398 if (offset == 0) { 399 oip->i_size = length; 400 } else { 401 lbn = lblkno(fs, length); 402 error = balloc(oip, lbn, offset, &bn, B_CLRBUF); 403 if (error) 404 return (error); 405 if ((long)bn < 0) 406 panic("itrunc: hole"); 407 oip->i_size = length; 408 size = blksize(fs, oip, lbn); 409 count = howmany(size, CLBYTES); 410 for (i = 0; i < count; i++) 411 munhash(oip->i_devvp, bn + i * CLBYTES / DEV_BSIZE); 412 error = bread(oip->i_devvp, bn, size, NOCRED, &bp); 413 if (error) { 414 oip->i_size = osize; 415 brelse(bp); 416 return (error); 417 } 418 bzero(bp->b_un.b_addr + offset, (unsigned)(size - offset)); 419 bdwrite(bp); 420 } 421 /* 422 * Update file and block pointers 423 * on disk before we start freeing blocks. 424 * If we crash before free'ing blocks below, 425 * the blocks will be returned to the free list. 426 * lastiblock values are also normalized to -1 427 * for calls to indirtrunc below. 428 */ 429 tip = *oip; 430 tip.i_size = osize; 431 for (level = TRIPLE; level >= SINGLE; level--) 432 if (lastiblock[level] < 0) { 433 oip->i_ib[level] = 0; 434 lastiblock[level] = -1; 435 } 436 for (i = NDADDR - 1; i > lastblock; i--) 437 oip->i_db[i] = 0; 438 oip->i_flag |= ICHG|IUPD; 439 allerror = syncip(oip, MNT_WAIT); 440 441 /* 442 * Indirect blocks first. 443 */ 444 ip = &tip; 445 for (level = TRIPLE; level >= SINGLE; level--) { 446 bn = ip->i_ib[level]; 447 if (bn != 0) { 448 error = indirtrunc(ip, bn, lastiblock[level], level, 449 &count); 450 if (error) 451 allerror = error; 452 blocksreleased += count; 453 if (lastiblock[level] < 0) { 454 ip->i_ib[level] = 0; 455 blkfree(ip, bn, (off_t)fs->fs_bsize); 456 blocksreleased += nblocks; 457 } 458 } 459 if (lastiblock[level] >= 0) 460 goto done; 461 } 462 463 /* 464 * All whole direct blocks or frags. 465 */ 466 for (i = NDADDR - 1; i > lastblock; i--) { 467 register off_t bsize; 468 469 bn = ip->i_db[i]; 470 if (bn == 0) 471 continue; 472 ip->i_db[i] = 0; 473 bsize = (off_t)blksize(fs, ip, i); 474 blkfree(ip, bn, bsize); 475 blocksreleased += btodb(bsize); 476 } 477 if (lastblock < 0) 478 goto done; 479 480 /* 481 * Finally, look for a change in size of the 482 * last direct block; release any frags. 483 */ 484 bn = ip->i_db[lastblock]; 485 if (bn != 0) { 486 off_t oldspace, newspace; 487 488 /* 489 * Calculate amount of space we're giving 490 * back as old block size minus new block size. 491 */ 492 oldspace = blksize(fs, ip, lastblock); 493 ip->i_size = length; 494 newspace = blksize(fs, ip, lastblock); 495 if (newspace == 0) 496 panic("itrunc: newspace"); 497 if (oldspace - newspace > 0) { 498 /* 499 * Block number of space to be free'd is 500 * the old block # plus the number of frags 501 * required for the storage we're keeping. 502 */ 503 bn += numfrags(fs, newspace); 504 blkfree(ip, bn, oldspace - newspace); 505 blocksreleased += btodb(oldspace - newspace); 506 } 507 } 508 done: 509 /* BEGIN PARANOIA */ 510 for (level = SINGLE; level <= TRIPLE; level++) 511 if (ip->i_ib[level] != oip->i_ib[level]) 512 panic("itrunc1"); 513 for (i = 0; i < NDADDR; i++) 514 if (ip->i_db[i] != oip->i_db[i]) 515 panic("itrunc2"); 516 /* END PARANOIA */ 517 oip->i_blocks -= blocksreleased; 518 if (oip->i_blocks < 0) /* sanity */ 519 oip->i_blocks = 0; 520 oip->i_flag |= ICHG; 521 #ifdef QUOTA 522 (void) chkdq(oip, -blocksreleased, 0); 523 #endif 524 return (allerror); 525 } 526 527 /* 528 * Release blocks associated with the inode ip and 529 * stored in the indirect block bn. Blocks are free'd 530 * in LIFO order up to (but not including) lastbn. If 531 * level is greater than SINGLE, the block is an indirect 532 * block and recursive calls to indirtrunc must be used to 533 * cleanse other indirect blocks. 534 * 535 * NB: triple indirect blocks are untested. 536 */ 537 indirtrunc(ip, bn, lastbn, level, countp) 538 register struct inode *ip; 539 daddr_t bn, lastbn; 540 int level; 541 long *countp; 542 { 543 register int i; 544 struct buf *bp; 545 register struct fs *fs = ip->i_fs; 546 register daddr_t *bap; 547 daddr_t *copy, nb, last; 548 long blkcount, factor; 549 int nblocks, blocksreleased = 0; 550 int error, allerror = 0; 551 552 /* 553 * Calculate index in current block of last 554 * block to be kept. -1 indicates the entire 555 * block so we need not calculate the index. 556 */ 557 factor = 1; 558 for (i = SINGLE; i < level; i++) 559 factor *= NINDIR(fs); 560 last = lastbn; 561 if (lastbn > 0) 562 last /= factor; 563 nblocks = btodb(fs->fs_bsize); 564 /* 565 * Get buffer of block pointers, zero those 566 * entries corresponding to blocks to be free'd, 567 * and update on disk copy first. 568 */ 569 error = bread(ip->i_devvp, fsbtodb(fs, bn), (int)fs->fs_bsize, 570 NOCRED, &bp); 571 if (error) { 572 brelse(bp); 573 *countp = 0; 574 return (error); 575 } 576 bap = bp->b_un.b_daddr; 577 MALLOC(copy, daddr_t *, fs->fs_bsize, M_TEMP, M_WAITOK); 578 bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->fs_bsize); 579 bzero((caddr_t)&bap[last + 1], 580 (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t)); 581 error = bwrite(bp); 582 if (error) 583 allerror = error; 584 bap = copy; 585 586 /* 587 * Recursively free totally unused blocks. 588 */ 589 for (i = NINDIR(fs) - 1; i > last; i--) { 590 nb = bap[i]; 591 if (nb == 0) 592 continue; 593 if (level > SINGLE) { 594 error = indirtrunc(ip, nb, (daddr_t)-1, level - 1, 595 &blkcount); 596 if (error) 597 allerror = error; 598 blocksreleased += blkcount; 599 } 600 blkfree(ip, nb, (off_t)fs->fs_bsize); 601 blocksreleased += nblocks; 602 } 603 604 /* 605 * Recursively free last partial block. 606 */ 607 if (level > SINGLE && lastbn >= 0) { 608 last = lastbn % factor; 609 nb = bap[i]; 610 if (nb != 0) { 611 error = indirtrunc(ip, nb, last, level - 1, &blkcount); 612 if (error) 613 allerror = error; 614 blocksreleased += blkcount; 615 } 616 } 617 FREE(copy, M_TEMP); 618 *countp = blocksreleased; 619 return (allerror); 620 } 621 622 /* 623 * Lock an inode. If its already locked, set the WANT bit and sleep. 624 */ 625 ilock(ip) 626 register struct inode *ip; 627 { 628 629 while (ip->i_flag & ILOCKED) { 630 ip->i_flag |= IWANT; 631 (void) sleep((caddr_t)ip, PINOD); 632 } 633 ip->i_flag |= ILOCKED; 634 } 635 636 /* 637 * Unlock an inode. If WANT bit is on, wakeup. 638 */ 639 iunlock(ip) 640 register struct inode *ip; 641 { 642 643 if ((ip->i_flag & ILOCKED) == 0) 644 printf("unlocking unlocked inode %d on dev 0x%x\n", 645 ip->i_number, ip->i_dev); 646 ip->i_flag &= ~ILOCKED; 647 if (ip->i_flag&IWANT) { 648 ip->i_flag &= ~IWANT; 649 wakeup((caddr_t)ip); 650 } 651 } 652 653 /* 654 * Check mode permission on inode pointer. Mode is READ, WRITE or EXEC. 655 * The mode is shifted to select the owner/group/other fields. The 656 * super user is granted all permissions. 657 * 658 * NB: Called from vnode op table. It seems this could all be done 659 * using vattr's but... 660 */ 661 iaccess(ip, mode, cred) 662 register struct inode *ip; 663 register int mode; 664 struct ucred *cred; 665 { 666 register gid_t *gp; 667 int i; 668 669 /* 670 * If you're the super-user, you always get access. 671 */ 672 if (cred->cr_uid == 0) 673 return (0); 674 /* 675 * Access check is based on only one of owner, group, public. 676 * If not owner, then check group. If not a member of the 677 * group, then check public access. 678 */ 679 if (cred->cr_uid != ip->i_uid) { 680 mode >>= 3; 681 gp = cred->cr_groups; 682 for (i = 0; i < cred->cr_ngroups; i++, gp++) 683 if (ip->i_gid == *gp) 684 goto found; 685 mode >>= 3; 686 found: 687 ; 688 } 689 if ((ip->i_mode & mode) != 0) 690 return (0); 691 return (EACCES); 692 } 693