xref: /csrg-svn/sys/ufs/ffs/ufs_inode.c (revision 39574)
1 /*
2  * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms are permitted
6  * provided that the above copyright notice and this paragraph are
7  * duplicated in all such forms and that any documentation,
8  * advertising materials, and other materials related to such
9  * distribution and use acknowledge that the software was developed
10  * by the University of California, Berkeley.  The name of the
11  * University may not be used to endorse or promote products derived
12  * from this software without specific prior written permission.
13  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
14  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
15  * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
16  *
17  *	@(#)ufs_inode.c	7.20 (Berkeley) 11/21/89
18  */
19 
20 #include "param.h"
21 #include "systm.h"
22 #include "mount.h"
23 #include "user.h"
24 #include "file.h"
25 #include "buf.h"
26 #include "cmap.h"
27 #include "vnode.h"
28 #include "../ufs/inode.h"
29 #include "../ufs/fs.h"
30 #include "../ufs/ufsmount.h"
31 #ifdef QUOTA
32 #include "../ufs/quota.h"
33 #endif
34 #include "kernel.h"
35 #include "malloc.h"
36 
37 #define	INOHSZ	512
38 #if	((INOHSZ&(INOHSZ-1)) == 0)
39 #define	INOHASH(dev,ino)	(((dev)+(ino))&(INOHSZ-1))
40 #else
41 #define	INOHASH(dev,ino)	(((unsigned)((dev)+(ino)))%INOHSZ)
42 #endif
43 
44 union ihead {
45 	union  ihead *ih_head[2];
46 	struct inode *ih_chain[2];
47 } ihead[INOHSZ];
48 
49 int prtactive;	/* 1 => print out reclaim of active vnodes */
50 
51 /*
52  * Initialize hash links for inodes.
53  */
54 ufs_init()
55 {
56 	register int i;
57 	register union ihead *ih = ihead;
58 
59 #ifndef lint
60 	if (VN_MAXPRIVATE < sizeof(struct inode))
61 		panic("ihinit: too small");
62 #endif /* not lint */
63 	for (i = INOHSZ; --i >= 0; ih++) {
64 		ih->ih_head[0] = ih;
65 		ih->ih_head[1] = ih;
66 	}
67 }
68 
69 /*
70  * Look up an vnode/inode by device,inumber.
71  * If it is in core (in the inode structure),
72  * honor the locking protocol.
73  * If it is not in core, read it in from the
74  * specified device.
75  * Callers must check for mount points!!
76  * In all cases, a pointer to a locked
77  * inode structure is returned.
78  */
79 iget(xp, ino, ipp)
80 	struct inode *xp;
81 	ino_t ino;
82 	struct inode **ipp;
83 {
84 	dev_t dev = xp->i_dev;
85 	struct mount *mntp = ITOV(xp)->v_mount;
86 	register struct fs *fs = VFSTOUFS(mntp)->um_fs;
87 	extern struct vnodeops ufs_vnodeops, spec_inodeops;
88 	register struct inode *ip, *iq;
89 	register struct vnode *vp;
90 	struct vnode *nvp;
91 	struct buf *bp;
92 	struct dinode *dp;
93 	union  ihead *ih;
94 	int error;
95 
96 	ih = &ihead[INOHASH(dev, ino)];
97 loop:
98 	for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw) {
99 		if (ino != ip->i_number || dev != ip->i_dev)
100 			continue;
101 		if ((ip->i_flag&ILOCKED) != 0) {
102 			ip->i_flag |= IWANT;
103 			sleep((caddr_t)ip, PINOD);
104 			goto loop;
105 		}
106 		if (vget(ITOV(ip)))
107 			goto loop;
108 		*ipp = ip;
109 		return(0);
110 	}
111 	/*
112 	 * Allocate a new inode.
113 	 */
114 	if (error = getnewvnode(VT_UFS, mntp, &ufs_vnodeops, &nvp)) {
115 		*ipp = 0;
116 		return (error);
117 	}
118 	ip = VTOI(nvp);
119 	ip->i_vnode = nvp;
120 	ip->i_flag = 0;
121 	ip->i_devvp = 0;
122 	ip->i_lastr = 0;
123 	ip->i_mode = 0;
124 #ifdef QUOTA
125 	ip->i_dquot = NODQUOT;
126 #endif
127 	/*
128 	 * Put it onto its hash chain and lock it so that other requests for
129 	 * this inode will block if they arrive while we are sleeping waiting
130 	 * for old data structures to be purged or for the contents of the
131 	 * disk portion of this inode to be read.
132 	 */
133 	ip->i_dev = dev;
134 	ip->i_number = ino;
135 	insque(ip, ih);
136 	ILOCK(ip);
137 	/*
138 	 * Read in the disk contents for the inode.
139 	 */
140 	if (error = bread(VFSTOUFS(mntp)->um_devvp, fsbtodb(fs, itod(fs, ino)),
141 	    (int)fs->fs_bsize, NOCRED, &bp)) {
142 		/*
143 		 * Unlock and discard unneeded inode.
144 		 */
145 		iput(ip);
146 		brelse(bp);
147 		*ipp = 0;
148 		return (error);
149 	}
150 	dp = bp->b_un.b_dino;
151 	dp += itoo(fs, ino);
152 	ip->i_din = *dp;
153 	brelse(bp);
154 	/*
155 	 * Initialize the associated vnode
156 	 */
157 	vp = ITOV(ip);
158 	vp->v_type = IFTOVT(ip->i_mode);
159 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
160 		vp->v_rdev = ip->i_rdev;
161 		vp->v_op = &spec_inodeops;
162 		if (nvp = checkalias(vp, mntp)) {
163 			/*
164 			 * Reinitialize aliased inode.
165 			 */
166 			vp = nvp;
167 			iq = VTOI(vp);
168 			iq->i_vnode = vp;
169 			iq->i_lastr = 0;
170 			iq->i_flag = 0;
171 			ILOCK(iq);
172 			iq->i_din = ip->i_din;
173 			iq->i_dev = dev;
174 			iq->i_number = ino;
175 			insque(iq, ih);
176 			/*
177 			 * Discard unneeded vnode
178 			 */
179 			ip->i_mode = 0;
180 			iput(ip);
181 			ip = iq;
182 		}
183 	}
184 	if (ino == ROOTINO)
185 		vp->v_flag |= VROOT;
186 	/*
187 	 * Finish inode initialization.
188 	 */
189 	ip->i_fs = fs;
190 	ip->i_devvp = VFSTOUFS(mntp)->um_devvp;
191 	VREF(ip->i_devvp);
192 #ifdef QUOTA
193 	if (ip->i_mode != 0)
194 		ip->i_dquot = inoquota(ip);
195 #endif
196 	/*
197 	 * Set up a generation number for this inode if it does not
198 	 * already have one. This should only happen on old filesystems.
199 	 */
200 	if (ip->i_gen == 0) {
201 		if (++nextgennumber < (u_long)time.tv_sec)
202 			nextgennumber = time.tv_sec;
203 		ip->i_gen = nextgennumber;
204 		if ((vp->v_mount->m_flag & M_RDONLY) == 0)
205 			ip->i_flag |= IMOD;
206 	}
207 	*ipp = ip;
208 	return (0);
209 }
210 
211 /*
212  * Unlock and decrement the reference count of an inode structure.
213  */
214 iput(ip)
215 	register struct inode *ip;
216 {
217 
218 	if ((ip->i_flag & ILOCKED) == 0)
219 		panic("iput");
220 	IUNLOCK(ip);
221 	vrele(ITOV(ip));
222 }
223 
224 /*
225  * Last reference to an inode, write the inode out and if necessary,
226  * truncate and deallocate the file.
227  */
228 ufs_inactive(vp)
229 	struct vnode *vp;
230 {
231 	register struct inode *ip = VTOI(vp);
232 	int mode, error = 0;
233 
234 	if (prtactive && vp->v_count != 0)
235 		printf("ufs_inactive: pushing active ino %d dev 0x%x\n",
236 			ip->i_number, ip->i_dev);
237 	/*
238 	 * Get rid of inodes related to stale file handles.
239 	 */
240 	if (ip->i_mode == 0) {
241 		vgone(vp);
242 		return (0);
243 	}
244 	ILOCK(ip);
245 	if (ip->i_nlink <= 0 && (vp->v_mount->m_flag & M_RDONLY) == 0) {
246 		error = itrunc(ip, (u_long)0);
247 		mode = ip->i_mode;
248 		ip->i_mode = 0;
249 		ip->i_rdev = 0;
250 		ip->i_flag |= IUPD|ICHG;
251 		ifree(ip, ip->i_number, mode);
252 #ifdef QUOTA
253 		(void) chkiq(ip->i_dev, ip, ip->i_uid, 0);
254 		dqrele(ip->i_dquot);
255 		ip->i_dquot = NODQUOT;
256 #endif
257 	}
258 	IUPDAT(ip, &time, &time, 0);
259 	IUNLOCK(ip);
260 	ip->i_flag = 0;
261 	/*
262 	 * If we are done with the inode, reclaim it
263 	 * so that it can be reused immediately.
264 	 */
265 	if (vp->v_count == 0 && ip->i_mode == 0)
266 		vgone(vp);
267 	return (error);
268 }
269 
270 /*
271  * Reclaim an inode so that it can be used for other purposes.
272  */
273 ufs_reclaim(vp)
274 	register struct vnode *vp;
275 {
276 	register struct inode *ip = VTOI(vp);
277 
278 	if (prtactive && vp->v_count != 0)
279 		printf("ufs_reclaim: pushing active ino %d dev 0x%x\n",
280 			ip->i_number, ip->i_dev);
281 	/*
282 	 * Remove the inode from its hash chain.
283 	 */
284 	remque(ip);
285 	ip->i_forw = ip;
286 	ip->i_back = ip;
287 	/*
288 	 * Purge old data structures associated with the inode.
289 	 */
290 	cache_purge(vp);
291 	if (ip->i_devvp) {
292 		vrele(ip->i_devvp);
293 		ip->i_devvp = 0;
294 	}
295 #ifdef QUOTA
296 	dqrele(ip->i_dquot);
297 	ip->i_dquot = NODQUOT;
298 #endif
299 	ip->i_flag = 0;
300 	return (0);
301 }
302 
303 /*
304  * Check accessed and update flags on an inode structure.
305  * If any is on, update the inode with the current time.
306  * If waitfor is given, then must ensure I/O order,
307  * so wait for write to complete.
308  */
309 iupdat(ip, ta, tm, waitfor)
310 	register struct inode *ip;
311 	struct timeval *ta, *tm;
312 	int waitfor;
313 {
314 	struct buf *bp;
315 	struct vnode *vp = ITOV(ip);
316 	struct dinode *dp;
317 	register struct fs *fs;
318 	int error;
319 
320 	fs = ip->i_fs;
321 	if ((ip->i_flag & (IUPD|IACC|ICHG|IMOD)) == 0)
322 		return (0);
323 	if (vp->v_mount->m_flag & M_RDONLY)
324 		return (0);
325 	error = bread(ip->i_devvp, fsbtodb(fs, itod(fs, ip->i_number)),
326 		(int)fs->fs_bsize, NOCRED, &bp);
327 	if (error) {
328 		brelse(bp);
329 		return (error);
330 	}
331 	if (ip->i_flag&IACC)
332 		ip->i_atime = ta->tv_sec;
333 	if (ip->i_flag&IUPD)
334 		ip->i_mtime = tm->tv_sec;
335 	if (ip->i_flag&ICHG)
336 		ip->i_ctime = time.tv_sec;
337 	ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD);
338 	dp = bp->b_un.b_dino + itoo(fs, ip->i_number);
339 	*dp = ip->i_din;
340 	if (waitfor) {
341 		return (bwrite(bp));
342 	} else {
343 		bdwrite(bp);
344 		return (0);
345 	}
346 }
347 
348 #define	SINGLE	0	/* index of single indirect block */
349 #define	DOUBLE	1	/* index of double indirect block */
350 #define	TRIPLE	2	/* index of triple indirect block */
351 /*
352  * Truncate the inode ip to at most length size.  Free affected disk
353  * blocks -- the blocks of the file are removed in reverse order.
354  *
355  * NB: triple indirect blocks are untested.
356  */
357 itrunc(oip, length)
358 	register struct inode *oip;
359 	u_long length;
360 {
361 	register daddr_t lastblock;
362 	daddr_t bn, lbn, lastiblock[NIADDR];
363 	register struct fs *fs;
364 	register struct inode *ip;
365 	struct buf *bp;
366 	int offset, osize, size, level;
367 	long count, nblocks, blocksreleased = 0;
368 	register int i;
369 	int error, allerror = 0;
370 	struct inode tip;
371 
372 	if (oip->i_size <= length) {
373 		oip->i_flag |= ICHG|IUPD;
374 		error = iupdat(oip, &time, &time, 1);
375 		return (error);
376 	}
377 	/*
378 	 * Calculate index into inode's block list of
379 	 * last direct and indirect blocks (if any)
380 	 * which we want to keep.  Lastblock is -1 when
381 	 * the file is truncated to 0.
382 	 */
383 	fs = oip->i_fs;
384 	lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1;
385 	lastiblock[SINGLE] = lastblock - NDADDR;
386 	lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs);
387 	lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs);
388 	nblocks = btodb(fs->fs_bsize);
389 	/*
390 	 * Update the size of the file. If the file is not being
391 	 * truncated to a block boundry, the contents of the
392 	 * partial block following the end of the file must be
393 	 * zero'ed in case it ever become accessable again because
394 	 * of subsequent file growth.
395 	 */
396 	osize = oip->i_size;
397 	offset = blkoff(fs, length);
398 	if (offset == 0) {
399 		oip->i_size = length;
400 	} else {
401 		lbn = lblkno(fs, length);
402 		error = balloc(oip, lbn, offset, &bn, B_CLRBUF);
403 		if (error)
404 			return (error);
405 		if ((long)bn < 0)
406 			panic("itrunc: hole");
407 		oip->i_size = length;
408 		size = blksize(fs, oip, lbn);
409 		count = howmany(size, CLBYTES);
410 		for (i = 0; i < count; i++)
411 			munhash(oip->i_devvp, bn + i * CLBYTES / DEV_BSIZE);
412 		error = bread(oip->i_devvp, bn, size, NOCRED, &bp);
413 		if (error) {
414 			oip->i_size = osize;
415 			brelse(bp);
416 			return (error);
417 		}
418 		bzero(bp->b_un.b_addr + offset, (unsigned)(size - offset));
419 		bdwrite(bp);
420 	}
421 	/*
422 	 * Update file and block pointers
423 	 * on disk before we start freeing blocks.
424 	 * If we crash before free'ing blocks below,
425 	 * the blocks will be returned to the free list.
426 	 * lastiblock values are also normalized to -1
427 	 * for calls to indirtrunc below.
428 	 */
429 	tip = *oip;
430 	tip.i_size = osize;
431 	for (level = TRIPLE; level >= SINGLE; level--)
432 		if (lastiblock[level] < 0) {
433 			oip->i_ib[level] = 0;
434 			lastiblock[level] = -1;
435 		}
436 	for (i = NDADDR - 1; i > lastblock; i--)
437 		oip->i_db[i] = 0;
438 	oip->i_flag |= ICHG|IUPD;
439 	allerror = syncip(oip);
440 
441 	/*
442 	 * Indirect blocks first.
443 	 */
444 	ip = &tip;
445 	for (level = TRIPLE; level >= SINGLE; level--) {
446 		bn = ip->i_ib[level];
447 		if (bn != 0) {
448 			error = indirtrunc(ip, bn, lastiblock[level], level,
449 				&count);
450 			if (error)
451 				allerror = error;
452 			blocksreleased += count;
453 			if (lastiblock[level] < 0) {
454 				ip->i_ib[level] = 0;
455 				blkfree(ip, bn, (off_t)fs->fs_bsize);
456 				blocksreleased += nblocks;
457 			}
458 		}
459 		if (lastiblock[level] >= 0)
460 			goto done;
461 	}
462 
463 	/*
464 	 * All whole direct blocks or frags.
465 	 */
466 	for (i = NDADDR - 1; i > lastblock; i--) {
467 		register off_t bsize;
468 
469 		bn = ip->i_db[i];
470 		if (bn == 0)
471 			continue;
472 		ip->i_db[i] = 0;
473 		bsize = (off_t)blksize(fs, ip, i);
474 		blkfree(ip, bn, bsize);
475 		blocksreleased += btodb(bsize);
476 	}
477 	if (lastblock < 0)
478 		goto done;
479 
480 	/*
481 	 * Finally, look for a change in size of the
482 	 * last direct block; release any frags.
483 	 */
484 	bn = ip->i_db[lastblock];
485 	if (bn != 0) {
486 		off_t oldspace, newspace;
487 
488 		/*
489 		 * Calculate amount of space we're giving
490 		 * back as old block size minus new block size.
491 		 */
492 		oldspace = blksize(fs, ip, lastblock);
493 		ip->i_size = length;
494 		newspace = blksize(fs, ip, lastblock);
495 		if (newspace == 0)
496 			panic("itrunc: newspace");
497 		if (oldspace - newspace > 0) {
498 			/*
499 			 * Block number of space to be free'd is
500 			 * the old block # plus the number of frags
501 			 * required for the storage we're keeping.
502 			 */
503 			bn += numfrags(fs, newspace);
504 			blkfree(ip, bn, oldspace - newspace);
505 			blocksreleased += btodb(oldspace - newspace);
506 		}
507 	}
508 done:
509 /* BEGIN PARANOIA */
510 	for (level = SINGLE; level <= TRIPLE; level++)
511 		if (ip->i_ib[level] != oip->i_ib[level])
512 			panic("itrunc1");
513 	for (i = 0; i < NDADDR; i++)
514 		if (ip->i_db[i] != oip->i_db[i])
515 			panic("itrunc2");
516 /* END PARANOIA */
517 	oip->i_blocks -= blocksreleased;
518 	if (oip->i_blocks < 0)			/* sanity */
519 		oip->i_blocks = 0;
520 	oip->i_flag |= ICHG;
521 #ifdef QUOTA
522 	(void) chkdq(oip, -blocksreleased, 0);
523 #endif
524 	return (allerror);
525 }
526 
527 /*
528  * Release blocks associated with the inode ip and
529  * stored in the indirect block bn.  Blocks are free'd
530  * in LIFO order up to (but not including) lastbn.  If
531  * level is greater than SINGLE, the block is an indirect
532  * block and recursive calls to indirtrunc must be used to
533  * cleanse other indirect blocks.
534  *
535  * NB: triple indirect blocks are untested.
536  */
537 indirtrunc(ip, bn, lastbn, level, countp)
538 	register struct inode *ip;
539 	daddr_t bn, lastbn;
540 	int level;
541 	long *countp;
542 {
543 	register int i;
544 	struct buf *bp;
545 	register struct fs *fs = ip->i_fs;
546 	register daddr_t *bap;
547 	daddr_t *copy, nb, last;
548 	long blkcount, factor;
549 	int nblocks, blocksreleased = 0;
550 	int error, allerror = 0;
551 
552 	/*
553 	 * Calculate index in current block of last
554 	 * block to be kept.  -1 indicates the entire
555 	 * block so we need not calculate the index.
556 	 */
557 	factor = 1;
558 	for (i = SINGLE; i < level; i++)
559 		factor *= NINDIR(fs);
560 	last = lastbn;
561 	if (lastbn > 0)
562 		last /= factor;
563 	nblocks = btodb(fs->fs_bsize);
564 	/*
565 	 * Get buffer of block pointers, zero those
566 	 * entries corresponding to blocks to be free'd,
567 	 * and update on disk copy first.
568 	 */
569 	error = bread(ip->i_devvp, fsbtodb(fs, bn), (int)fs->fs_bsize,
570 		NOCRED, &bp);
571 	if (error) {
572 		brelse(bp);
573 		*countp = 0;
574 		return (error);
575 	}
576 	bap = bp->b_un.b_daddr;
577 	MALLOC(copy, daddr_t *, fs->fs_bsize, M_TEMP, M_WAITOK);
578 	bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->fs_bsize);
579 	bzero((caddr_t)&bap[last + 1],
580 	  (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t));
581 	error = bwrite(bp);
582 	if (error)
583 		allerror = error;
584 	bap = copy;
585 
586 	/*
587 	 * Recursively free totally unused blocks.
588 	 */
589 	for (i = NINDIR(fs) - 1; i > last; i--) {
590 		nb = bap[i];
591 		if (nb == 0)
592 			continue;
593 		if (level > SINGLE) {
594 			error = indirtrunc(ip, nb, (daddr_t)-1, level - 1,
595 				&blkcount);
596 			if (error)
597 				allerror = error;
598 			blocksreleased += blkcount;
599 		}
600 		blkfree(ip, nb, (off_t)fs->fs_bsize);
601 		blocksreleased += nblocks;
602 	}
603 
604 	/*
605 	 * Recursively free last partial block.
606 	 */
607 	if (level > SINGLE && lastbn >= 0) {
608 		last = lastbn % factor;
609 		nb = bap[i];
610 		if (nb != 0) {
611 			error = indirtrunc(ip, nb, last, level - 1, &blkcount);
612 			if (error)
613 				allerror = error;
614 			blocksreleased += blkcount;
615 		}
616 	}
617 	FREE(copy, M_TEMP);
618 	*countp = blocksreleased;
619 	return (allerror);
620 }
621 
622 /*
623  * Lock an inode. If its already locked, set the WANT bit and sleep.
624  */
625 ilock(ip)
626 	register struct inode *ip;
627 {
628 
629 	while (ip->i_flag & ILOCKED) {
630 		ip->i_flag |= IWANT;
631 		(void) sleep((caddr_t)ip, PINOD);
632 	}
633 	ip->i_flag |= ILOCKED;
634 }
635 
636 /*
637  * Unlock an inode.  If WANT bit is on, wakeup.
638  */
639 iunlock(ip)
640 	register struct inode *ip;
641 {
642 
643 	if ((ip->i_flag & ILOCKED) == 0)
644 		printf("unlocking unlocked inode %d on dev 0x%x\n",
645 			ip->i_number, ip->i_dev);
646 	ip->i_flag &= ~ILOCKED;
647 	if (ip->i_flag&IWANT) {
648 		ip->i_flag &= ~IWANT;
649 		wakeup((caddr_t)ip);
650 	}
651 }
652 
653 /*
654  * Check mode permission on inode pointer. Mode is READ, WRITE or EXEC.
655  * The mode is shifted to select the owner/group/other fields. The
656  * super user is granted all permissions.
657  *
658  * NB: Called from vnode op table. It seems this could all be done
659  * using vattr's but...
660  */
661 iaccess(ip, mode, cred)
662 	register struct inode *ip;
663 	register int mode;
664 	struct ucred *cred;
665 {
666 	register gid_t *gp;
667 	int i;
668 
669 	/*
670 	 * If you're the super-user, you always get access.
671 	 */
672 	if (cred->cr_uid == 0)
673 		return (0);
674 	/*
675 	 * Access check is based on only one of owner, group, public.
676 	 * If not owner, then check group. If not a member of the
677 	 * group, then check public access.
678 	 */
679 	if (cred->cr_uid != ip->i_uid) {
680 		mode >>= 3;
681 		gp = cred->cr_groups;
682 		for (i = 0; i < cred->cr_ngroups; i++, gp++)
683 			if (ip->i_gid == *gp)
684 				goto found;
685 		mode >>= 3;
686 found:
687 		;
688 	}
689 	if ((ip->i_mode & mode) != 0)
690 		return (0);
691 	return (EACCES);
692 }
693