xref: /csrg-svn/sys/ufs/ffs/ffs_inode.c (revision 16840)
1 /*	ffs_inode.c	6.10	84/08/03	*/
2 
3 #include "../h/param.h"
4 #include "../h/systm.h"
5 #include "../h/mount.h"
6 #include "../h/dir.h"
7 #include "../h/user.h"
8 #include "../h/inode.h"
9 #include "../h/fs.h"
10 #include "../h/conf.h"
11 #include "../h/buf.h"
12 #ifdef QUOTA
13 #include "../h/quota.h"
14 #endif
15 #include "../h/kernel.h"
16 
17 #define	INOHSZ	512
18 #if	((INOHSZ&(INOHSZ-1)) == 0)
19 #define	INOHASH(dev,ino)	(((dev)+(ino))&(INOHSZ-1))
20 #else
21 #define	INOHASH(dev,ino)	(((unsigned)((dev)+(ino)))%INOHSZ)
22 #endif
23 
24 union ihead {				/* inode LRU cache, Chris Maltby */
25 	union  ihead *ih_head[2];
26 	struct inode *ih_chain[2];
27 } ihead[INOHSZ];
28 
29 struct inode *ifreeh, **ifreet;
30 
31 /*
32  * Initialize hash links for inodes
33  * and build inode free list.
34  */
35 ihinit()
36 {
37 	register int i;
38 	register struct inode *ip = inode;
39 	register union  ihead *ih = ihead;
40 
41 	for (i = INOHSZ; --i >= 0; ih++) {
42 		ih->ih_head[0] = ih;
43 		ih->ih_head[1] = ih;
44 	}
45 	ifreeh = ip;
46 	ifreet = &ip->i_freef;
47 	ip->i_freeb = &ifreeh;
48 	ip->i_forw = ip;
49 	ip->i_back = ip;
50 	for (i = ninode; --i > 0; ) {
51 		++ip;
52 		ip->i_forw = ip;
53 		ip->i_back = ip;
54 		*ifreet = ip;
55 		ip->i_freeb = ifreet;
56 		ifreet = &ip->i_freef;
57 	}
58 	ip->i_freef = NULL;
59 }
60 
61 #ifdef notdef
62 /*
63  * Find an inode if it is incore.
64  * This is the equivalent, for inodes,
65  * of ``incore'' in bio.c or ``pfind'' in subr.c.
66  */
67 struct inode *
68 ifind(dev, ino)
69 	dev_t dev;
70 	ino_t ino;
71 {
72 	register struct inode *ip;
73 	register union  ihead *ih;
74 
75 	ih = &ihead[INOHASH(dev, ino)];
76 	for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw)
77 		if (ino==ip->i_number && dev==ip->i_dev)
78 			return (ip);
79 	return ((struct inode *)0);
80 }
81 #endif notdef
82 
83 /*
84  * Look up an inode by device,inumber.
85  * If it is in core (in the inode structure),
86  * honor the locking protocol.
87  * If it is not in core, read it in from the
88  * specified device.
89  * If the inode is mounted on, perform
90  * the indicated indirection.
91  * In all cases, a pointer to a locked
92  * inode structure is returned.
93  *
94  * panic: no imt -- if the mounted file
95  *	system is not in the mount table.
96  *	"cannot happen"
97  */
98 struct inode *
99 iget(dev, fs, ino)
100 	dev_t dev;
101 	register struct fs *fs;
102 	ino_t ino;
103 {
104 	register struct inode *ip;
105 	register union  ihead *ih;
106 	register struct mount *mp;
107 	register struct buf *bp;
108 	register struct dinode *dp;
109 	register struct inode *iq;
110 
111 
112 loop:
113 	ih = &ihead[INOHASH(dev, ino)];
114 	for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw)
115 		if (ino == ip->i_number && dev == ip->i_dev) {
116 			/*
117 			 * Following is essentially an inline expanded
118 			 * copy of igrab(), expanded inline for speed,
119 			 * and so that the test for a mounted on inode
120 			 * can be deferred until after we are sure that
121 			 * the inode isn't busy.
122 			 */
123 			if ((ip->i_flag&ILOCKED) != 0) {
124 				ip->i_flag |= IWANT;
125 				sleep((caddr_t)ip, PINOD);
126 				goto loop;
127 			}
128 			if ((ip->i_flag&IMOUNT) != 0) {
129 				for (mp = &mount[0]; mp < &mount[NMOUNT]; mp++)
130 					if(mp->m_inodp == ip) {
131 						dev = mp->m_dev;
132 						fs = mp->m_bufp->b_un.b_fs;
133 						ino = ROOTINO;
134 						goto loop;
135 					}
136 				panic("no imt");
137 			}
138 			if (ip->i_count == 0) {		/* ino on free list */
139 				if (iq = ip->i_freef)
140 					iq->i_freeb = ip->i_freeb;
141 				else
142 					ifreet = ip->i_freeb;
143 				*ip->i_freeb = iq;
144 				ip->i_freef = NULL;
145 				ip->i_freeb = NULL;
146 			}
147 			ip->i_count++;
148 			ip->i_flag |= ILOCKED;
149 			return(ip);
150 		}
151 
152 	if ((ip = ifreeh) == NULL) {
153 		tablefull("inode");
154 		u.u_error = ENFILE;
155 		return(NULL);
156 	}
157 	if (ip->i_count)
158 		panic("free inode isn't");
159 	if (iq = ip->i_freef)
160 		iq->i_freeb = &ifreeh;
161 	ifreeh = iq;
162 	ip->i_freef = NULL;
163 	ip->i_freeb = NULL;
164 	/*
165 	 * Now to take inode off the hash chain it was on
166 	 * (initially, or after an iflush, it is on a "hash chain"
167 	 * consisting entirely of itself, and pointed to by no-one,
168 	 * but that doesn't matter), and put it on the chain for
169 	 * its new (ino, dev) pair
170 	 */
171 	remque(ip);
172 	insque(ip, ih);
173 	ip->i_dev = dev;
174 	ip->i_fs = fs;
175 	ip->i_number = ino;
176 	cacheinval(ip);
177 	ip->i_flag = ILOCKED;
178 	ip->i_count++;
179 	ip->i_lastr = 0;
180 #ifdef QUOTA
181 	dqrele(ip->i_dquot);
182 #endif
183 	bp = bread(dev, fsbtodb(fs, itod(fs, ino)), (int)fs->fs_bsize);
184 	/*
185 	 * Check I/O errors
186 	 */
187 	if ((bp->b_flags&B_ERROR) != 0) {
188 		brelse(bp);
189 		/*
190 		 * the inode doesn't contain anything useful, so it would
191 		 * be misleading to leave it on its hash chain.
192 		 * 'iput' will take care of putting it back on the free list.
193 		 */
194 		remque(ip);
195 		ip->i_forw = ip;
196 		ip->i_back = ip;
197 		/*
198 		 * we also loose its inumber, just in case (as iput
199 		 * doesn't do that any more) - but as it isn't on its
200 		 * hash chain, I doubt if this is really necessary .. kre
201 		 * (probably the two methods are interchangable)
202 		 */
203 		ip->i_number = 0;
204 #ifdef QUOTA
205 		ip->i_dquot = NODQUOT;
206 #endif
207 		iput(ip);
208 		return(NULL);
209 	}
210 	dp = bp->b_un.b_dino;
211 	dp += itoo(fs, ino);
212 	ip->i_ic = dp->di_ic;
213 	brelse(bp);
214 #ifdef QUOTA
215 	if (ip->i_mode == 0)
216 		ip->i_dquot = NODQUOT;
217 	else
218 		ip->i_dquot = inoquota(ip);
219 #endif
220 	return (ip);
221 }
222 
223 /*
224  * Convert a pointer to an inode into a reference to an inode.
225  *
226  * This is basically the internal piece of iget (after the
227  * inode pointer is located) but without the test for mounted
228  * filesystems.  It is caller's responsibility to check that
229  * the inode pointer is valid.
230  */
231 igrab(ip)
232 	register struct inode *ip;
233 {
234 	while ((ip->i_flag&ILOCKED) != 0) {
235 		ip->i_flag |= IWANT;
236 		sleep((caddr_t)ip, PINOD);
237 	}
238 	if (ip->i_count == 0) {		/* ino on free list */
239 		register struct inode *iq;
240 
241 		if (iq = ip->i_freef)
242 			iq->i_freeb = ip->i_freeb;
243 		else
244 			ifreet = ip->i_freeb;
245 		*ip->i_freeb = iq;
246 		ip->i_freef = NULL;
247 		ip->i_freeb = NULL;
248 	}
249 	ip->i_count++;
250 	ip->i_flag |= ILOCKED;
251 }
252 
253 /*
254  * Decrement reference count of
255  * an inode structure.
256  * On the last reference,
257  * write the inode out and if necessary,
258  * truncate and deallocate the file.
259  */
260 iput(ip)
261 	register struct inode *ip;
262 {
263 
264 	if ((ip->i_flag & ILOCKED) == 0)
265 		panic("iput");
266 	IUNLOCK(ip);
267 	irele(ip);
268 }
269 
270 irele(ip)
271 	register struct inode *ip;
272 {
273 	int mode;
274 
275 	if (ip->i_count == 1) {
276 		ip->i_flag |= ILOCKED;
277 		if (ip->i_nlink <= 0) {
278 			itrunc(ip, (u_long)0);
279 			mode = ip->i_mode;
280 			ip->i_mode = 0;
281 			ip->i_rdev = 0;
282 			ip->i_flag |= IUPD|ICHG;
283 			ifree(ip, ip->i_number, mode);
284 #ifdef QUOTA
285 			(void) chkiq(ip->i_dev, ip, ip->i_uid, 0);
286 			dqrele(ip->i_dquot);
287 			ip->i_dquot = NODQUOT;
288 #endif
289 		}
290 		IUPDAT(ip, &time, &time, 0);
291 		IUNLOCK(ip);
292 		ip->i_flag = 0;
293 		/*
294 		 * Put the inode on the end of the free list.
295 		 * Possibly in some cases it would be better to
296 		 * put the inode at the head of the free list,
297 		 * (eg: where i_mode == 0 || i_number == 0)
298 		 * but I will think about that later .. kre
299 		 * (i_number is rarely 0 - only after an i/o error in iget,
300 		 * where i_mode == 0, the inode will probably be wanted
301 		 * again soon for an ialloc, so possibly we should keep it)
302 		 */
303 		if (ifreeh) {
304 			*ifreet = ip;
305 			ip->i_freeb = ifreet;
306 		} else {
307 			ifreeh = ip;
308 			ip->i_freeb = &ifreeh;
309 		}
310 		ip->i_freef = NULL;
311 		ifreet = &ip->i_freef;
312 	} else if (!(ip->i_flag & ILOCKED))
313 		ITIMES(ip, &time, &time);
314 	ip->i_count--;
315 }
316 
317 /*
318  * Check accessed and update flags on
319  * an inode structure.
320  * If any is on, update the inode
321  * with the current time.
322  * If waitfor is given, then must insure
323  * i/o order so wait for write to complete.
324  */
325 iupdat(ip, ta, tm, waitfor)
326 	register struct inode *ip;
327 	struct timeval *ta, *tm;
328 	int waitfor;
329 {
330 	register struct buf *bp;
331 	struct dinode *dp;
332 	register struct fs *fp;
333 
334 	fp = ip->i_fs;
335 	if ((ip->i_flag & (IUPD|IACC|ICHG|IMOD)) != 0) {
336 		if (fp->fs_ronly)
337 			return;
338 		bp = bread(ip->i_dev, fsbtodb(fp, itod(fp, ip->i_number)),
339 			(int)fp->fs_bsize);
340 		if (bp->b_flags & B_ERROR) {
341 			brelse(bp);
342 			return;
343 		}
344 		if (ip->i_flag&IACC)
345 			ip->i_atime = ta->tv_sec;
346 		if (ip->i_flag&IUPD)
347 			ip->i_mtime = tm->tv_sec;
348 		if (ip->i_flag&ICHG)
349 			ip->i_ctime = time.tv_sec;
350 		ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD);
351 		dp = bp->b_un.b_dino + itoo(fp, ip->i_number);
352 		dp->di_ic = ip->i_ic;
353 		if (waitfor)
354 			bwrite(bp);
355 		else
356 			bdwrite(bp);
357 	}
358 }
359 
360 #define	SINGLE	0	/* index of single indirect block */
361 #define	DOUBLE	1	/* index of double indirect block */
362 #define	TRIPLE	2	/* index of triple indirect block */
363 /*
364  * Truncate the inode ip to at most
365  * length size.  Free affected disk
366  * blocks -- the blocks of the file
367  * are removed in reverse order.
368  *
369  * NB: triple indirect blocks are untested.
370  */
371 itrunc(oip, length)
372 	struct inode *oip;
373 	u_long length;
374 {
375 	register i;
376 	register daddr_t lastblock;
377 	daddr_t bn, lastiblock[NIADDR];
378 	register struct fs *fs;
379 	register struct inode *ip;
380 	struct inode tip;
381 	long blocksreleased = 0, nblocks;
382 	long indirtrunc();
383 	int level;
384 
385 	if (oip->i_size <= length) {
386 		oip->i_flag |= ICHG|IUPD;
387 		iupdat(oip, &time, &time, 1);
388 		return;
389 	}
390 	/*
391 	 * Calculate index into inode's block list of
392 	 * last direct and indirect blocks (if any)
393 	 * which we want to keep.  Lastblock is -1 when
394 	 * the file is truncated to 0.
395 	 */
396 	fs = oip->i_fs;
397 	lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1;
398 	lastiblock[SINGLE] = lastblock - NDADDR;
399 	lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs);
400 	lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs);
401 	nblocks = btodb(fs->fs_bsize);
402 	/*
403 	 * Update size of file and block pointers
404 	 * on disk before we start freeing blocks.
405 	 * If we crash before free'ing blocks below,
406 	 * the blocks will be returned to the free list.
407 	 * lastiblock values are also normalized to -1
408 	 * for calls to indirtrunc below.
409 	 * (? fsck doesn't check validity of pointers in indirect blocks)
410 	 */
411 	tip = *oip;
412 	for (level = TRIPLE; level >= SINGLE; level--)
413 		if (lastiblock[level] < 0) {
414 			oip->i_ib[level] = 0;
415 			lastiblock[level] = -1;
416 		}
417 	for (i = NDADDR - 1; i > lastblock; i--)
418 		oip->i_db[i] = 0;
419 	oip->i_size = length;
420 	oip->i_flag |= ICHG|IUPD;
421 	iupdat(oip, &time, &time, 1);
422 	ip = &tip;
423 
424 	/*
425 	 * Indirect blocks first.
426 	 */
427 	for (level = TRIPLE; level >= SINGLE; level--) {
428 		bn = ip->i_ib[level];
429 		if (bn != 0) {
430 			blocksreleased +=
431 			    indirtrunc(ip, bn, lastiblock[level], level);
432 			if (lastiblock[level] < 0) {
433 				ip->i_ib[level] = 0;
434 				free(ip, bn, (off_t)fs->fs_bsize);
435 				blocksreleased += nblocks;
436 			}
437 		}
438 		if (lastiblock[level] >= 0)
439 			goto done;
440 	}
441 
442 	/*
443 	 * All whole direct blocks or frags.
444 	 */
445 	for (i = NDADDR - 1; i > lastblock; i--) {
446 		register int size;
447 
448 		bn = ip->i_db[i];
449 		if (bn == 0)
450 			continue;
451 		ip->i_db[i] = 0;
452 		size = (off_t)blksize(fs, ip, i);
453 		free(ip, bn, size);
454 		blocksreleased += btodb(size);
455 	}
456 	if (lastblock < 0)
457 		goto done;
458 
459 	/*
460 	 * Finally, look for a change in size of the
461 	 * last direct block; release any frags.
462 	 */
463 	bn = ip->i_db[lastblock];
464 	if (bn != 0) {
465 		int oldspace, newspace;
466 
467 		/*
468 		 * Calculate amount of space we're giving
469 		 * back as old block size minus new block size.
470 		 */
471 		oldspace = blksize(fs, ip, lastblock);
472 		ip->i_size = length;
473 		newspace = blksize(fs, ip, lastblock);
474 		if (newspace == 0)
475 			panic("itrunc: newspace");
476 		if (oldspace - newspace > 0) {
477 			/*
478 			 * Block number of space to be free'd is
479 			 * the old block # plus the number of frags
480 			 * required for the storage we're keeping.
481 			 */
482 			bn += numfrags(fs, newspace);
483 			free(ip, bn, oldspace - newspace);
484 			blocksreleased += btodb(oldspace - newspace);
485 		}
486 	}
487 done:
488 /* BEGIN PARANOIA */
489 	for (level = SINGLE; level <= TRIPLE; level++)
490 		if (ip->i_ib[level] != oip->i_ib[level])
491 			panic("itrunc1");
492 	for (i = 0; i < NDADDR; i++)
493 		if (ip->i_db[i] != oip->i_db[i])
494 			panic("itrunc2");
495 /* END PARANOIA */
496 	oip->i_blocks -= blocksreleased;
497 	if (oip->i_blocks < 0)			/* sanity */
498 		oip->i_blocks = 0;
499 	oip->i_flag |= ICHG;
500 #ifdef QUOTA
501 	(void) chkdq(oip, -blocksreleased, 0);
502 #endif
503 }
504 
505 /*
506  * Release blocks associated with the inode ip and
507  * stored in the indirect block bn.  Blocks are free'd
508  * in LIFO order up to (but not including) lastbn.  If
509  * level is greater than SINGLE, the block is an indirect
510  * block and recursive calls to indirtrunc must be used to
511  * cleanse other indirect blocks.
512  *
513  * NB: triple indirect blocks are untested.
514  */
515 long
516 indirtrunc(ip, bn, lastbn, level)
517 	register struct inode *ip;
518 	daddr_t bn, lastbn;
519 	int level;
520 {
521 	register int i;
522 	struct buf *bp, *copy;
523 	register daddr_t *bap;
524 	register struct fs *fs = ip->i_fs;
525 	daddr_t nb, last;
526 	long factor;
527 	int blocksreleased = 0, nblocks;
528 
529 	/*
530 	 * Calculate index in current block of last
531 	 * block to be kept.  -1 indicates the entire
532 	 * block so we need not calculate the index.
533 	 */
534 	factor = 1;
535 	for (i = SINGLE; i < level; i++)
536 		factor *= NINDIR(fs);
537 	last = lastbn;
538 	if (lastbn > 0)
539 		last /= factor;
540 	nblocks = btodb(fs->fs_bsize);
541 	/*
542 	 * Get buffer of block pointers, zero those
543 	 * entries corresponding to blocks to be free'd,
544 	 * and update on disk copy first.
545 	 */
546 	copy = geteblk((int)fs->fs_bsize);
547 	bp = bread(ip->i_dev, fsbtodb(fs, bn), (int)fs->fs_bsize);
548 	if (bp->b_flags&B_ERROR) {
549 		brelse(copy);
550 		brelse(bp);
551 		return (0);
552 	}
553 	bap = bp->b_un.b_daddr;
554 	bcopy((caddr_t)bap, (caddr_t)copy->b_un.b_daddr, (u_int)fs->fs_bsize);
555 	bzero((caddr_t)&bap[last + 1],
556 	  (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t));
557 	bwrite(bp);
558 	bp = copy, bap = bp->b_un.b_daddr;
559 
560 	/*
561 	 * Recursively free totally unused blocks.
562 	 */
563 	for (i = NINDIR(fs) - 1; i > last; i--) {
564 		nb = bap[i];
565 		if (nb == 0)
566 			continue;
567 		if (level > SINGLE)
568 			blocksreleased +=
569 			    indirtrunc(ip, nb, (daddr_t)-1, level - 1);
570 		free(ip, nb, (int)fs->fs_bsize);
571 		blocksreleased += nblocks;
572 	}
573 
574 	/*
575 	 * Recursively free last partial block.
576 	 */
577 	if (level > SINGLE && lastbn >= 0) {
578 		last = lastbn % factor;
579 		nb = bap[i];
580 		if (nb != 0)
581 			blocksreleased += indirtrunc(ip, nb, last, level - 1);
582 	}
583 	brelse(bp);
584 	return (blocksreleased);
585 }
586 
587 /*
588  * remove any inodes in the inode cache belonging to dev
589  *
590  * There should not be any active ones, return error if any are found
591  * (nb: this is a user error, not a system err)
592  *
593  * Also, count the references to dev by block devices - this really
594  * has nothing to do with the object of the procedure, but as we have
595  * to scan the inode table here anyway, we might as well get the
596  * extra benefit.
597  *
598  * this is called from sumount()/sys3.c when dev is being unmounted
599  */
600 #ifdef QUOTA
601 iflush(dev, iq)
602 	dev_t dev;
603 	struct inode *iq;
604 #else
605 iflush(dev)
606 	dev_t dev;
607 #endif
608 {
609 	register struct inode *ip;
610 	register open = 0;
611 
612 	for (ip = inode; ip < inodeNINODE; ip++) {
613 #ifdef QUOTA
614 		if (ip != iq && ip->i_dev == dev)
615 #else
616 		if (ip->i_dev == dev)
617 #endif
618 			if (ip->i_count)
619 				return(-1);
620 			else {
621 				remque(ip);
622 				ip->i_forw = ip;
623 				ip->i_back = ip;
624 				/*
625 				 * as i_count == 0, the inode was on the free
626 				 * list already, just leave it there, it will
627 				 * fall off the bottom eventually. We could
628 				 * perhaps move it to the head of the free
629 				 * list, but as umounts are done so
630 				 * infrequently, we would gain very little,
631 				 * while making the code bigger.
632 				 */
633 #ifdef QUOTA
634 				dqrele(ip->i_dquot);
635 				ip->i_dquot = NODQUOT;
636 #endif
637 			}
638 		else if (ip->i_count && (ip->i_mode&IFMT)==IFBLK &&
639 		    ip->i_rdev == dev)
640 			open++;
641 	}
642 	return (open);
643 }
644 
645 /*
646  * Lock an inode. If its already locked, set the WANT bit and sleep.
647  */
648 ilock(ip)
649 	register struct inode *ip;
650 {
651 
652 	ILOCK(ip);
653 }
654 
655 /*
656  * Unlock an inode.  If WANT bit is on, wakeup.
657  */
658 iunlock(ip)
659 	register struct inode *ip;
660 {
661 
662 	IUNLOCK(ip);
663 }
664