xref: /csrg-svn/sys/ufs/ffs/ufs_inode.c (revision 24525)
1 /*
2  * Copyright (c) 1982 Regents of the University of California.
3  * All rights reserved.  The Berkeley software License Agreement
4  * specifies the terms and conditions for redistribution.
5  *
6  *	@(#)ufs_inode.c	6.17 (Berkeley) 09/04/85
7  */
8 
9 #include "param.h"
10 #include "systm.h"
11 #include "mount.h"
12 #include "dir.h"
13 #include "user.h"
14 #include "inode.h"
15 #include "fs.h"
16 #include "buf.h"
17 #include "cmap.h"
18 #ifdef QUOTA
19 #include "quota.h"
20 #endif
21 #include "kernel.h"
22 
23 #define	INOHSZ	512
24 #if	((INOHSZ&(INOHSZ-1)) == 0)
25 #define	INOHASH(dev,ino)	(((dev)+(ino))&(INOHSZ-1))
26 #else
27 #define	INOHASH(dev,ino)	(((unsigned)((dev)+(ino)))%INOHSZ)
28 #endif
29 
30 union ihead {				/* inode LRU cache, Chris Maltby */
31 	union  ihead *ih_head[2];
32 	struct inode *ih_chain[2];
33 } ihead[INOHSZ];
34 
35 struct inode *ifreeh, **ifreet;
36 
37 /*
38  * Initialize hash links for inodes
39  * and build inode free list.
40  */
41 ihinit()
42 {
43 	register int i;
44 	register struct inode *ip = inode;
45 	register union  ihead *ih = ihead;
46 
47 	for (i = INOHSZ; --i >= 0; ih++) {
48 		ih->ih_head[0] = ih;
49 		ih->ih_head[1] = ih;
50 	}
51 	ifreeh = ip;
52 	ifreet = &ip->i_freef;
53 	ip->i_freeb = &ifreeh;
54 	ip->i_forw = ip;
55 	ip->i_back = ip;
56 	for (i = ninode; --i > 0; ) {
57 		++ip;
58 		ip->i_forw = ip;
59 		ip->i_back = ip;
60 		*ifreet = ip;
61 		ip->i_freeb = ifreet;
62 		ifreet = &ip->i_freef;
63 	}
64 	ip->i_freef = NULL;
65 }
66 
67 #ifdef notdef
68 /*
69  * Find an inode if it is incore.
70  * This is the equivalent, for inodes,
71  * of ``incore'' in bio.c or ``pfind'' in subr.c.
72  */
73 struct inode *
74 ifind(dev, ino)
75 	dev_t dev;
76 	ino_t ino;
77 {
78 	register struct inode *ip;
79 	register union  ihead *ih;
80 
81 	ih = &ihead[INOHASH(dev, ino)];
82 	for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw)
83 		if (ino==ip->i_number && dev==ip->i_dev)
84 			return (ip);
85 	return ((struct inode *)0);
86 }
87 #endif notdef
88 
89 /*
90  * Look up an inode by device,inumber.
91  * If it is in core (in the inode structure),
92  * honor the locking protocol.
93  * If it is not in core, read it in from the
94  * specified device.
95  * If the inode is mounted on, perform
96  * the indicated indirection.
97  * In all cases, a pointer to a locked
98  * inode structure is returned.
99  *
100  * panic: no imt -- if the mounted file
101  *	system is not in the mount table.
102  *	"cannot happen"
103  */
104 struct inode *
105 iget(dev, fs, ino)
106 	dev_t dev;
107 	register struct fs *fs;
108 	ino_t ino;
109 {
110 	register struct inode *ip;
111 	register union  ihead *ih;
112 	register struct mount *mp;
113 	register struct buf *bp;
114 	register struct dinode *dp;
115 	register struct inode *iq;
116 
117 
118 loop:
119 	ih = &ihead[INOHASH(dev, ino)];
120 	for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw)
121 		if (ino == ip->i_number && dev == ip->i_dev) {
122 			/*
123 			 * Following is essentially an inline expanded
124 			 * copy of igrab(), expanded inline for speed,
125 			 * and so that the test for a mounted on inode
126 			 * can be deferred until after we are sure that
127 			 * the inode isn't busy.
128 			 */
129 			if ((ip->i_flag&ILOCKED) != 0) {
130 				ip->i_flag |= IWANT;
131 				sleep((caddr_t)ip, PINOD);
132 				goto loop;
133 			}
134 			if ((ip->i_flag&IMOUNT) != 0) {
135 				for (mp = &mount[0]; mp < &mount[NMOUNT]; mp++)
136 					if(mp->m_inodp == ip) {
137 						dev = mp->m_dev;
138 						fs = mp->m_bufp->b_un.b_fs;
139 						ino = ROOTINO;
140 						goto loop;
141 					}
142 				panic("no imt");
143 			}
144 			if (ip->i_count == 0) {		/* ino on free list */
145 				if (iq = ip->i_freef)
146 					iq->i_freeb = ip->i_freeb;
147 				else
148 					ifreet = ip->i_freeb;
149 				*ip->i_freeb = iq;
150 				ip->i_freef = NULL;
151 				ip->i_freeb = NULL;
152 			}
153 			ip->i_count++;
154 			ip->i_flag |= ILOCKED;
155 			return(ip);
156 		}
157 
158 	if ((ip = ifreeh) == NULL) {
159 		tablefull("inode");
160 		u.u_error = ENFILE;
161 		return(NULL);
162 	}
163 	if (ip->i_count)
164 		panic("free inode isn't");
165 	if (iq = ip->i_freef)
166 		iq->i_freeb = &ifreeh;
167 	ifreeh = iq;
168 	ip->i_freef = NULL;
169 	ip->i_freeb = NULL;
170 	/*
171 	 * Now to take inode off the hash chain it was on
172 	 * (initially, or after an iflush, it is on a "hash chain"
173 	 * consisting entirely of itself, and pointed to by no-one,
174 	 * but that doesn't matter), and put it on the chain for
175 	 * its new (ino, dev) pair
176 	 */
177 	remque(ip);
178 	insque(ip, ih);
179 	ip->i_dev = dev;
180 	ip->i_fs = fs;
181 	ip->i_number = ino;
182 	cacheinval(ip);
183 	ip->i_flag = ILOCKED;
184 	ip->i_count++;
185 	ip->i_lastr = 0;
186 #ifdef QUOTA
187 	dqrele(ip->i_dquot);
188 #endif
189 	bp = bread(dev, fsbtodb(fs, itod(fs, ino)), (int)fs->fs_bsize);
190 	/*
191 	 * Check I/O errors
192 	 */
193 	if ((bp->b_flags&B_ERROR) != 0) {
194 		brelse(bp);
195 		/*
196 		 * the inode doesn't contain anything useful, so it would
197 		 * be misleading to leave it on its hash chain.
198 		 * 'iput' will take care of putting it back on the free list.
199 		 */
200 		remque(ip);
201 		ip->i_forw = ip;
202 		ip->i_back = ip;
203 		/*
204 		 * we also loose its inumber, just in case (as iput
205 		 * doesn't do that any more) - but as it isn't on its
206 		 * hash chain, I doubt if this is really necessary .. kre
207 		 * (probably the two methods are interchangable)
208 		 */
209 		ip->i_number = 0;
210 #ifdef QUOTA
211 		ip->i_dquot = NODQUOT;
212 #endif
213 		iput(ip);
214 		return(NULL);
215 	}
216 	dp = bp->b_un.b_dino;
217 	dp += itoo(fs, ino);
218 	ip->i_ic = dp->di_ic;
219 	brelse(bp);
220 #ifdef QUOTA
221 	if (ip->i_mode == 0)
222 		ip->i_dquot = NODQUOT;
223 	else
224 		ip->i_dquot = inoquota(ip);
225 #endif
226 	return (ip);
227 }
228 
229 /*
230  * Convert a pointer to an inode into a reference to an inode.
231  *
232  * This is basically the internal piece of iget (after the
233  * inode pointer is located) but without the test for mounted
234  * filesystems.  It is caller's responsibility to check that
235  * the inode pointer is valid.
236  */
237 igrab(ip)
238 	register struct inode *ip;
239 {
240 	while ((ip->i_flag&ILOCKED) != 0) {
241 		ip->i_flag |= IWANT;
242 		sleep((caddr_t)ip, PINOD);
243 	}
244 	if (ip->i_count == 0) {		/* ino on free list */
245 		register struct inode *iq;
246 
247 		if (iq = ip->i_freef)
248 			iq->i_freeb = ip->i_freeb;
249 		else
250 			ifreet = ip->i_freeb;
251 		*ip->i_freeb = iq;
252 		ip->i_freef = NULL;
253 		ip->i_freeb = NULL;
254 	}
255 	ip->i_count++;
256 	ip->i_flag |= ILOCKED;
257 }
258 
259 /*
260  * Decrement reference count of
261  * an inode structure.
262  * On the last reference,
263  * write the inode out and if necessary,
264  * truncate and deallocate the file.
265  */
266 iput(ip)
267 	register struct inode *ip;
268 {
269 
270 	if ((ip->i_flag & ILOCKED) == 0)
271 		panic("iput");
272 	IUNLOCK(ip);
273 	irele(ip);
274 }
275 
276 irele(ip)
277 	register struct inode *ip;
278 {
279 	int mode;
280 
281 	if (ip->i_count == 1) {
282 		ip->i_flag |= ILOCKED;
283 		if (ip->i_nlink <= 0 && ip->i_fs->fs_ronly == 0) {
284 			itrunc(ip, (u_long)0);
285 			mode = ip->i_mode;
286 			ip->i_mode = 0;
287 			ip->i_rdev = 0;
288 			ip->i_flag |= IUPD|ICHG;
289 			ifree(ip, ip->i_number, mode);
290 #ifdef QUOTA
291 			(void) chkiq(ip->i_dev, ip, ip->i_uid, 0);
292 			dqrele(ip->i_dquot);
293 			ip->i_dquot = NODQUOT;
294 #endif
295 		}
296 		IUPDAT(ip, &time, &time, 0);
297 		IUNLOCK(ip);
298 		ip->i_flag = 0;
299 		/*
300 		 * Put the inode on the end of the free list.
301 		 * Possibly in some cases it would be better to
302 		 * put the inode at the head of the free list,
303 		 * (eg: where i_mode == 0 || i_number == 0)
304 		 * but I will think about that later .. kre
305 		 * (i_number is rarely 0 - only after an i/o error in iget,
306 		 * where i_mode == 0, the inode will probably be wanted
307 		 * again soon for an ialloc, so possibly we should keep it)
308 		 */
309 		if (ifreeh) {
310 			*ifreet = ip;
311 			ip->i_freeb = ifreet;
312 		} else {
313 			ifreeh = ip;
314 			ip->i_freeb = &ifreeh;
315 		}
316 		ip->i_freef = NULL;
317 		ifreet = &ip->i_freef;
318 	} else if (!(ip->i_flag & ILOCKED))
319 		ITIMES(ip, &time, &time);
320 	ip->i_count--;
321 }
322 
323 /*
324  * Check accessed and update flags on
325  * an inode structure.
326  * If any is on, update the inode
327  * with the current time.
328  * If waitfor is given, then must insure
329  * i/o order so wait for write to complete.
330  */
331 iupdat(ip, ta, tm, waitfor)
332 	register struct inode *ip;
333 	struct timeval *ta, *tm;
334 	int waitfor;
335 {
336 	register struct buf *bp;
337 	struct dinode *dp;
338 	register struct fs *fp;
339 
340 	fp = ip->i_fs;
341 	if ((ip->i_flag & (IUPD|IACC|ICHG|IMOD)) != 0) {
342 		if (fp->fs_ronly)
343 			return;
344 		bp = bread(ip->i_dev, fsbtodb(fp, itod(fp, ip->i_number)),
345 			(int)fp->fs_bsize);
346 		if (bp->b_flags & B_ERROR) {
347 			brelse(bp);
348 			return;
349 		}
350 		if (ip->i_flag&IACC)
351 			ip->i_atime = ta->tv_sec;
352 		if (ip->i_flag&IUPD)
353 			ip->i_mtime = tm->tv_sec;
354 		if (ip->i_flag&ICHG)
355 			ip->i_ctime = time.tv_sec;
356 		ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD);
357 		dp = bp->b_un.b_dino + itoo(fp, ip->i_number);
358 		dp->di_ic = ip->i_ic;
359 		if (waitfor)
360 			bwrite(bp);
361 		else
362 			bdwrite(bp);
363 	}
364 }
365 
366 #define	SINGLE	0	/* index of single indirect block */
367 #define	DOUBLE	1	/* index of double indirect block */
368 #define	TRIPLE	2	/* index of triple indirect block */
369 /*
370  * Truncate the inode ip to at most
371  * length size.  Free affected disk
372  * blocks -- the blocks of the file
373  * are removed in reverse order.
374  *
375  * NB: triple indirect blocks are untested.
376  */
377 itrunc(oip, length)
378 	register struct inode *oip;
379 	u_long length;
380 {
381 	register daddr_t lastblock;
382 	daddr_t bn, lastiblock[NIADDR];
383 	register struct fs *fs;
384 	register struct inode *ip;
385 	struct buf *bp;
386 	int offset, lbn, osize, size, count, level, s;
387 	long nblocks, blocksreleased = 0;
388 	register int i;
389 	dev_t dev;
390 	struct inode tip;
391 	extern long indirtrunc();
392 	extern struct cmap *mfind();
393 
394 	if (oip->i_size <= length) {
395 		oip->i_flag |= ICHG|IUPD;
396 		iupdat(oip, &time, &time, 1);
397 		return;
398 	}
399 	/*
400 	 * Calculate index into inode's block list of
401 	 * last direct and indirect blocks (if any)
402 	 * which we want to keep.  Lastblock is -1 when
403 	 * the file is truncated to 0.
404 	 */
405 	fs = oip->i_fs;
406 	lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1;
407 	lastiblock[SINGLE] = lastblock - NDADDR;
408 	lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs);
409 	lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs);
410 	nblocks = btodb(fs->fs_bsize);
411 	/*
412 	 * Update the size of the file. If the file is not being
413 	 * truncated to a block boundry, the contents of the
414 	 * partial block following the end of the file must be
415 	 * zero'ed in case it ever become accessable again because
416 	 * of subsequent file growth.
417 	 */
418 	osize = oip->i_size;
419 	offset = blkoff(fs, length);
420 	if (offset == 0) {
421 		oip->i_size = length;
422 	} else {
423 		lbn = lblkno(fs, length);
424 		bn = fsbtodb(fs, bmap(oip, lbn, B_WRITE, offset));
425 		if (u.u_error || (long)bn < 0)
426 			return;
427 		oip->i_size = length;
428 		size = blksize(fs, oip, lbn);
429 		count = howmany(size, DEV_BSIZE);
430 		dev = oip->i_dev;
431 		s = splimp();
432 		for (i = 0; i < count; i += CLSIZE)
433 			if (mfind(dev, bn + i))
434 				munhash(dev, bn + i);
435 		splx(s);
436 		bp = bread(dev, bn, size);
437 		if (bp->b_flags & B_ERROR) {
438 			u.u_error = EIO;
439 			oip->i_size = osize;
440 			brelse(bp);
441 			return;
442 		}
443 		bzero(bp->b_un.b_addr + offset, size - offset);
444 		bdwrite(bp);
445 	}
446 	/*
447 	 * Update file and block pointers
448 	 * on disk before we start freeing blocks.
449 	 * If we crash before free'ing blocks below,
450 	 * the blocks will be returned to the free list.
451 	 * lastiblock values are also normalized to -1
452 	 * for calls to indirtrunc below.
453 	 */
454 	tip = *oip;
455 	tip.i_size = osize;
456 	for (level = TRIPLE; level >= SINGLE; level--)
457 		if (lastiblock[level] < 0) {
458 			oip->i_ib[level] = 0;
459 			lastiblock[level] = -1;
460 		}
461 	for (i = NDADDR - 1; i > lastblock; i--)
462 		oip->i_db[i] = 0;
463 	oip->i_flag |= ICHG|IUPD;
464 	syncip(oip);
465 
466 	/*
467 	 * Indirect blocks first.
468 	 */
469 	ip = &tip;
470 	for (level = TRIPLE; level >= SINGLE; level--) {
471 		bn = ip->i_ib[level];
472 		if (bn != 0) {
473 			blocksreleased +=
474 			    indirtrunc(ip, bn, lastiblock[level], level);
475 			if (lastiblock[level] < 0) {
476 				ip->i_ib[level] = 0;
477 				free(ip, bn, (off_t)fs->fs_bsize);
478 				blocksreleased += nblocks;
479 			}
480 		}
481 		if (lastiblock[level] >= 0)
482 			goto done;
483 	}
484 
485 	/*
486 	 * All whole direct blocks or frags.
487 	 */
488 	for (i = NDADDR - 1; i > lastblock; i--) {
489 		register int bsize;
490 
491 		bn = ip->i_db[i];
492 		if (bn == 0)
493 			continue;
494 		ip->i_db[i] = 0;
495 		bsize = (off_t)blksize(fs, ip, i);
496 		free(ip, bn, bsize);
497 		blocksreleased += btodb(bsize);
498 	}
499 	if (lastblock < 0)
500 		goto done;
501 
502 	/*
503 	 * Finally, look for a change in size of the
504 	 * last direct block; release any frags.
505 	 */
506 	bn = ip->i_db[lastblock];
507 	if (bn != 0) {
508 		int oldspace, newspace;
509 
510 		/*
511 		 * Calculate amount of space we're giving
512 		 * back as old block size minus new block size.
513 		 */
514 		oldspace = blksize(fs, ip, lastblock);
515 		ip->i_size = length;
516 		newspace = blksize(fs, ip, lastblock);
517 		if (newspace == 0)
518 			panic("itrunc: newspace");
519 		if (oldspace - newspace > 0) {
520 			/*
521 			 * Block number of space to be free'd is
522 			 * the old block # plus the number of frags
523 			 * required for the storage we're keeping.
524 			 */
525 			bn += numfrags(fs, newspace);
526 			free(ip, bn, oldspace - newspace);
527 			blocksreleased += btodb(oldspace - newspace);
528 		}
529 	}
530 done:
531 /* BEGIN PARANOIA */
532 	for (level = SINGLE; level <= TRIPLE; level++)
533 		if (ip->i_ib[level] != oip->i_ib[level])
534 			panic("itrunc1");
535 	for (i = 0; i < NDADDR; i++)
536 		if (ip->i_db[i] != oip->i_db[i])
537 			panic("itrunc2");
538 /* END PARANOIA */
539 	oip->i_blocks -= blocksreleased;
540 	if (oip->i_blocks < 0)			/* sanity */
541 		oip->i_blocks = 0;
542 	oip->i_flag |= ICHG;
543 #ifdef QUOTA
544 	(void) chkdq(oip, -blocksreleased, 0);
545 #endif
546 }
547 
548 /*
549  * Release blocks associated with the inode ip and
550  * stored in the indirect block bn.  Blocks are free'd
551  * in LIFO order up to (but not including) lastbn.  If
552  * level is greater than SINGLE, the block is an indirect
553  * block and recursive calls to indirtrunc must be used to
554  * cleanse other indirect blocks.
555  *
556  * NB: triple indirect blocks are untested.
557  */
558 long
559 indirtrunc(ip, bn, lastbn, level)
560 	register struct inode *ip;
561 	daddr_t bn, lastbn;
562 	int level;
563 {
564 	register int i;
565 	struct buf *bp, *copy;
566 	register daddr_t *bap;
567 	register struct fs *fs = ip->i_fs;
568 	daddr_t nb, last;
569 	long factor;
570 	int blocksreleased = 0, nblocks;
571 
572 	/*
573 	 * Calculate index in current block of last
574 	 * block to be kept.  -1 indicates the entire
575 	 * block so we need not calculate the index.
576 	 */
577 	factor = 1;
578 	for (i = SINGLE; i < level; i++)
579 		factor *= NINDIR(fs);
580 	last = lastbn;
581 	if (lastbn > 0)
582 		last /= factor;
583 	nblocks = btodb(fs->fs_bsize);
584 	/*
585 	 * Get buffer of block pointers, zero those
586 	 * entries corresponding to blocks to be free'd,
587 	 * and update on disk copy first.
588 	 */
589 	copy = geteblk((int)fs->fs_bsize);
590 	bp = bread(ip->i_dev, fsbtodb(fs, bn), (int)fs->fs_bsize);
591 	if (bp->b_flags&B_ERROR) {
592 		brelse(copy);
593 		brelse(bp);
594 		return (0);
595 	}
596 	bap = bp->b_un.b_daddr;
597 	bcopy((caddr_t)bap, (caddr_t)copy->b_un.b_daddr, (u_int)fs->fs_bsize);
598 	bzero((caddr_t)&bap[last + 1],
599 	  (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t));
600 	bwrite(bp);
601 	bp = copy, bap = bp->b_un.b_daddr;
602 
603 	/*
604 	 * Recursively free totally unused blocks.
605 	 */
606 	for (i = NINDIR(fs) - 1; i > last; i--) {
607 		nb = bap[i];
608 		if (nb == 0)
609 			continue;
610 		if (level > SINGLE)
611 			blocksreleased +=
612 			    indirtrunc(ip, nb, (daddr_t)-1, level - 1);
613 		free(ip, nb, (int)fs->fs_bsize);
614 		blocksreleased += nblocks;
615 	}
616 
617 	/*
618 	 * Recursively free last partial block.
619 	 */
620 	if (level > SINGLE && lastbn >= 0) {
621 		last = lastbn % factor;
622 		nb = bap[i];
623 		if (nb != 0)
624 			blocksreleased += indirtrunc(ip, nb, last, level - 1);
625 	}
626 	brelse(bp);
627 	return (blocksreleased);
628 }
629 
630 /*
631  * remove any inodes in the inode cache belonging to dev
632  *
633  * There should not be any active ones, return error if any are found
634  * (nb: this is a user error, not a system err)
635  *
636  * Also, count the references to dev by block devices - this really
637  * has nothing to do with the object of the procedure, but as we have
638  * to scan the inode table here anyway, we might as well get the
639  * extra benefit.
640  *
641  * this is called from sumount()/sys3.c when dev is being unmounted
642  */
643 #ifdef QUOTA
644 iflush(dev, iq)
645 	dev_t dev;
646 	struct inode *iq;
647 #else
648 iflush(dev)
649 	dev_t dev;
650 #endif
651 {
652 	register struct inode *ip;
653 	register open = 0;
654 
655 	for (ip = inode; ip < inodeNINODE; ip++) {
656 #ifdef QUOTA
657 		if (ip != iq && ip->i_dev == dev)
658 #else
659 		if (ip->i_dev == dev)
660 #endif
661 			if (ip->i_count)
662 				return(-1);
663 			else {
664 				remque(ip);
665 				ip->i_forw = ip;
666 				ip->i_back = ip;
667 				/*
668 				 * as i_count == 0, the inode was on the free
669 				 * list already, just leave it there, it will
670 				 * fall off the bottom eventually. We could
671 				 * perhaps move it to the head of the free
672 				 * list, but as umounts are done so
673 				 * infrequently, we would gain very little,
674 				 * while making the code bigger.
675 				 */
676 #ifdef QUOTA
677 				dqrele(ip->i_dquot);
678 				ip->i_dquot = NODQUOT;
679 #endif
680 			}
681 		else if (ip->i_count && (ip->i_mode&IFMT)==IFBLK &&
682 		    ip->i_rdev == dev)
683 			open++;
684 	}
685 	return (open);
686 }
687 
688 /*
689  * Lock an inode. If its already locked, set the WANT bit and sleep.
690  */
691 ilock(ip)
692 	register struct inode *ip;
693 {
694 
695 	ILOCK(ip);
696 }
697 
698 /*
699  * Unlock an inode.  If WANT bit is on, wakeup.
700  */
701 iunlock(ip)
702 	register struct inode *ip;
703 {
704 
705 	IUNLOCK(ip);
706 }
707