xref: /csrg-svn/sys/ufs/ffs/ffs_inode.c (revision 16720)
1 /*	ffs_inode.c	6.8	84/07/15	*/
2 
3 #include "../h/param.h"
4 #include "../h/systm.h"
5 #include "../h/mount.h"
6 #include "../h/dir.h"
7 #include "../h/user.h"
8 #include "../h/inode.h"
9 #include "../h/fs.h"
10 #include "../h/conf.h"
11 #include "../h/buf.h"
12 #ifdef QUOTA
13 #include "../h/quota.h"
14 #endif
15 #include "../h/kernel.h"
16 
17 #define	INOHSZ	64
18 #if	((INOHSZ&(INOHSZ-1)) == 0)
19 #define	INOHASH(dev,ino)	(((dev)+(ino))&(INOHSZ-1))
20 #else
21 #define	INOHASH(dev,ino)	(((unsigned)((dev)+(ino)))%INOHSZ)
22 #endif
23 
24 union ihead {				/* inode LRU cache, Chris Maltby */
25 	union  ihead *ih_head[2];
26 	struct inode *ih_chain[2];
27 } ihead[INOHSZ];
28 
29 struct inode *ifreeh, **ifreet;
30 
31 /*
32  * Initialize hash links for inodes
33  * and build inode free list.
34  */
35 ihinit()
36 {
37 	register int i;
38 	register struct inode *ip = inode;
39 	register union  ihead *ih = ihead;
40 
41 	for (i = INOHSZ; --i >= 0; ih++) {
42 		ih->ih_head[0] = ih;
43 		ih->ih_head[1] = ih;
44 	}
45 	ifreeh = ip;
46 	ifreet = &ip->i_freef;
47 	ip->i_freeb = &ifreeh;
48 	ip->i_forw = ip;
49 	ip->i_back = ip;
50 	for (i = ninode; --i > 0; ) {
51 		++ip;
52 		ip->i_forw = ip;
53 		ip->i_back = ip;
54 		*ifreet = ip;
55 		ip->i_freeb = ifreet;
56 		ifreet = &ip->i_freef;
57 	}
58 	ip->i_freef = NULL;
59 }
60 
61 #ifdef notdef
62 /*
63  * Find an inode if it is incore.
64  * This is the equivalent, for inodes,
65  * of ``incore'' in bio.c or ``pfind'' in subr.c.
66  */
67 struct inode *
68 ifind(dev, ino)
69 	dev_t dev;
70 	ino_t ino;
71 {
72 	register struct inode *ip;
73 	register union  ihead *ih;
74 
75 	ih = &ihead[INOHASH(dev, ino)];
76 	for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw)
77 		if (ino==ip->i_number && dev==ip->i_dev)
78 			return (ip);
79 	return ((struct inode *)0);
80 }
81 #endif notdef
82 
83 /*
84  * Look up an inode by device,inumber.
85  * If it is in core (in the inode structure),
86  * honor the locking protocol.
87  * If it is not in core, read it in from the
88  * specified device.
89  * If the inode is mounted on, perform
90  * the indicated indirection.
91  * In all cases, a pointer to a locked
92  * inode structure is returned.
93  *
94  * panic: no imt -- if the mounted file
95  *	system is not in the mount table.
96  *	"cannot happen"
97  */
98 struct inode *
99 iget(dev, fs, ino)
100 	dev_t dev;
101 	register struct fs *fs;
102 	ino_t ino;
103 {
104 	register struct inode *ip;
105 	register union  ihead *ih;
106 	register struct mount *mp;
107 	register struct buf *bp;
108 	register struct dinode *dp;
109 	register struct inode *iq;
110 	struct inode *xp;
111 
112 
113 loop:
114 	ih = &ihead[INOHASH(dev, ino)];
115 	for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw)
116 		if (ino == ip->i_number && dev == ip->i_dev) {
117 			/*
118 			 * Following is essentially an inline expanded
119 			 * copy of igrab(), expanded inline for speed,
120 			 * and so that the test for a mounted on inode
121 			 * can be deferred until after we are sure that
122 			 * the inode isn't busy.
123 			 */
124 			if ((ip->i_flag&ILOCKED) != 0) {
125 				ip->i_flag |= IWANT;
126 				sleep((caddr_t)ip, PINOD);
127 				goto loop;
128 			}
129 			if ((ip->i_flag&IMOUNT) != 0) {
130 				for (mp = &mount[0]; mp < &mount[NMOUNT]; mp++)
131 					if(mp->m_inodp == ip) {
132 						dev = mp->m_dev;
133 						fs = mp->m_bufp->b_un.b_fs;
134 						ino = ROOTINO;
135 						goto loop;
136 					}
137 				panic("no imt");
138 			}
139 			if (ip->i_count == 0) {		/* ino on free list */
140 				if (iq = ip->i_freef)
141 					iq->i_freeb = ip->i_freeb;
142 				else
143 					ifreet = ip->i_freeb;
144 				*ip->i_freeb = iq;
145 				ip->i_freef = NULL;
146 				ip->i_freeb = NULL;
147 			}
148 			ip->i_count++;
149 			ip->i_flag |= ILOCKED;
150 			return(ip);
151 		}
152 
153 	if ((ip = ifreeh) == NULL) {
154 		tablefull("inode");
155 		u.u_error = ENFILE;
156 		return(NULL);
157 	}
158 	if (ip->i_count)
159 		panic("free inode isn't");
160 	if (iq = ip->i_freef)
161 		iq->i_freeb = &ifreeh;
162 	ifreeh = iq;
163 	ip->i_freef = NULL;
164 	ip->i_freeb = NULL;
165 	/*
166 	 * Now to take inode off the hash chain it was on
167 	 * (initially, or after an iflush, it is on a "hash chain"
168 	 * consisting entirely of itself, and pointed to by no-one,
169 	 * but that doesn't matter), and put it on the chain for
170 	 * its new (ino, dev) pair
171 	 */
172 	remque(ip);
173 	insque(ip, ih);
174 	ip->i_dev = dev;
175 	ip->i_fs = fs;
176 	ip->i_number = ino;
177 	ip->i_id = ++nextinodeid;	/* also used in rename */
178 	/*
179 	 * At an absurd rate of 100 calls/second,
180 	 * this should occur once every 8 months.
181 	 */
182 	if (nextinodeid < 0)
183 		for (nextinodeid = 0, xp = inode; xp < inodeNINODE; xp++)
184 			xp->i_id = 0;
185 	ip->i_flag = ILOCKED;
186 	ip->i_count++;
187 	ip->i_lastr = 0;
188 #ifdef QUOTA
189 	dqrele(ip->i_dquot);
190 #endif
191 	bp = bread(dev, fsbtodb(fs, itod(fs, ino)), (int)fs->fs_bsize);
192 	/*
193 	 * Check I/O errors
194 	 */
195 	if ((bp->b_flags&B_ERROR) != 0) {
196 		brelse(bp);
197 		/*
198 		 * the inode doesn't contain anything useful, so it would
199 		 * be misleading to leave it on its hash chain.
200 		 * 'iput' will take care of putting it back on the free list.
201 		 */
202 		remque(ip);
203 		ip->i_forw = ip;
204 		ip->i_back = ip;
205 		/*
206 		 * we also loose its inumber, just in case (as iput
207 		 * doesn't do that any more) - but as it isn't on its
208 		 * hash chain, I doubt if this is really necessary .. kre
209 		 * (probably the two methods are interchangable)
210 		 */
211 		ip->i_number = 0;
212 #ifdef QUOTA
213 		ip->i_dquot = NODQUOT;
214 #endif
215 		iput(ip);
216 		return(NULL);
217 	}
218 	dp = bp->b_un.b_dino;
219 	dp += itoo(fs, ino);
220 	ip->i_ic = dp->di_ic;
221 	brelse(bp);
222 #ifdef QUOTA
223 	if (ip->i_mode == 0)
224 		ip->i_dquot = NODQUOT;
225 	else
226 		ip->i_dquot = inoquota(ip);
227 #endif
228 	return (ip);
229 }
230 
231 /*
232  * Convert a pointer to an inode into a reference to an inode.
233  *
234  * This is basically the internal piece of iget (after the
235  * inode pointer is located) but without the test for mounted
236  * filesystems.  It is caller's responsibility to check that
237  * the inode pointer is valid.
238  */
239 igrab(ip)
240 	register struct inode *ip;
241 {
242 	while ((ip->i_flag&ILOCKED) != 0) {
243 		ip->i_flag |= IWANT;
244 		sleep((caddr_t)ip, PINOD);
245 	}
246 	if (ip->i_count == 0) {		/* ino on free list */
247 		register struct inode *iq;
248 
249 		if (iq = ip->i_freef)
250 			iq->i_freeb = ip->i_freeb;
251 		else
252 			ifreet = ip->i_freeb;
253 		*ip->i_freeb = iq;
254 		ip->i_freef = NULL;
255 		ip->i_freeb = NULL;
256 	}
257 	ip->i_count++;
258 	ip->i_flag |= ILOCKED;
259 }
260 
261 /*
262  * Decrement reference count of
263  * an inode structure.
264  * On the last reference,
265  * write the inode out and if necessary,
266  * truncate and deallocate the file.
267  */
268 iput(ip)
269 	register struct inode *ip;
270 {
271 
272 	if ((ip->i_flag & ILOCKED) == 0)
273 		panic("iput");
274 	IUNLOCK(ip);
275 	irele(ip);
276 }
277 
278 irele(ip)
279 	register struct inode *ip;
280 {
281 	int mode;
282 
283 	if (ip->i_count == 1) {
284 		ip->i_flag |= ILOCKED;
285 		if (ip->i_nlink <= 0) {
286 			itrunc(ip, (u_long)0);
287 			mode = ip->i_mode;
288 			ip->i_mode = 0;
289 			ip->i_rdev = 0;
290 			ip->i_flag |= IUPD|ICHG;
291 			ifree(ip, ip->i_number, mode);
292 #ifdef QUOTA
293 			(void) chkiq(ip->i_dev, ip, ip->i_uid, 0);
294 			dqrele(ip->i_dquot);
295 			ip->i_dquot = NODQUOT;
296 #endif
297 		}
298 		IUPDAT(ip, &time, &time, 0);
299 		IUNLOCK(ip);
300 		ip->i_flag = 0;
301 		/*
302 		 * Put the inode on the end of the free list.
303 		 * Possibly in some cases it would be better to
304 		 * put the inode at the head of the free list,
305 		 * (eg: where i_mode == 0 || i_number == 0)
306 		 * but I will think about that later .. kre
307 		 * (i_number is rarely 0 - only after an i/o error in iget,
308 		 * where i_mode == 0, the inode will probably be wanted
309 		 * again soon for an ialloc, so possibly we should keep it)
310 		 */
311 		if (ifreeh) {
312 			*ifreet = ip;
313 			ip->i_freeb = ifreet;
314 		} else {
315 			ifreeh = ip;
316 			ip->i_freeb = &ifreeh;
317 		}
318 		ip->i_freef = NULL;
319 		ifreet = &ip->i_freef;
320 	} else if (!(ip->i_flag & ILOCKED))
321 		ITIMES(ip, &time, &time);
322 	ip->i_count--;
323 }
324 
325 /*
326  * Check accessed and update flags on
327  * an inode structure.
328  * If any is on, update the inode
329  * with the current time.
330  * If waitfor is given, then must insure
331  * i/o order so wait for write to complete.
332  */
333 iupdat(ip, ta, tm, waitfor)
334 	register struct inode *ip;
335 	struct timeval *ta, *tm;
336 	int waitfor;
337 {
338 	register struct buf *bp;
339 	struct dinode *dp;
340 	register struct fs *fp;
341 
342 	fp = ip->i_fs;
343 	if ((ip->i_flag & (IUPD|IACC|ICHG|IMOD)) != 0) {
344 		if (fp->fs_ronly)
345 			return;
346 		bp = bread(ip->i_dev, fsbtodb(fp, itod(fp, ip->i_number)),
347 			(int)fp->fs_bsize);
348 		if (bp->b_flags & B_ERROR) {
349 			brelse(bp);
350 			return;
351 		}
352 		if (ip->i_flag&IACC)
353 			ip->i_atime = ta->tv_sec;
354 		if (ip->i_flag&IUPD)
355 			ip->i_mtime = tm->tv_sec;
356 		if (ip->i_flag&ICHG)
357 			ip->i_ctime = time.tv_sec;
358 		ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD);
359 		dp = bp->b_un.b_dino + itoo(fp, ip->i_number);
360 		dp->di_ic = ip->i_ic;
361 		if (waitfor)
362 			bwrite(bp);
363 		else
364 			bdwrite(bp);
365 	}
366 }
367 
368 #define	SINGLE	0	/* index of single indirect block */
369 #define	DOUBLE	1	/* index of double indirect block */
370 #define	TRIPLE	2	/* index of triple indirect block */
371 /*
372  * Truncate the inode ip to at most
373  * length size.  Free affected disk
374  * blocks -- the blocks of the file
375  * are removed in reverse order.
376  *
377  * NB: triple indirect blocks are untested.
378  */
379 itrunc(oip, length)
380 	struct inode *oip;
381 	u_long length;
382 {
383 	register i;
384 	register daddr_t lastblock;
385 	daddr_t bn, lastiblock[NIADDR];
386 	register struct fs *fs;
387 	register struct inode *ip;
388 	struct inode tip;
389 	long blocksreleased = 0, nblocks;
390 	long indirtrunc();
391 	int level;
392 
393 	if (oip->i_size <= length) {
394 		oip->i_flag |= ICHG|IUPD;
395 		iupdat(oip, &time, &time, 1);
396 		return;
397 	}
398 	/*
399 	 * Calculate index into inode's block list of
400 	 * last direct and indirect blocks (if any)
401 	 * which we want to keep.  Lastblock is -1 when
402 	 * the file is truncated to 0.
403 	 */
404 	fs = oip->i_fs;
405 	lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1;
406 	lastiblock[SINGLE] = lastblock - NDADDR;
407 	lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs);
408 	lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs);
409 	nblocks = btodb(fs->fs_bsize);
410 	/*
411 	 * Update size of file and block pointers
412 	 * on disk before we start freeing blocks.
413 	 * If we crash before free'ing blocks below,
414 	 * the blocks will be returned to the free list.
415 	 * lastiblock values are also normalized to -1
416 	 * for calls to indirtrunc below.
417 	 * (? fsck doesn't check validity of pointers in indirect blocks)
418 	 */
419 	tip = *oip;
420 	for (level = TRIPLE; level >= SINGLE; level--)
421 		if (lastiblock[level] < 0) {
422 			oip->i_ib[level] = 0;
423 			lastiblock[level] = -1;
424 		}
425 	for (i = NDADDR - 1; i > lastblock; i--)
426 		oip->i_db[i] = 0;
427 	oip->i_size = length;
428 	oip->i_flag |= ICHG|IUPD;
429 	iupdat(oip, &time, &time, 1);
430 	ip = &tip;
431 
432 	/*
433 	 * Indirect blocks first.
434 	 */
435 	for (level = TRIPLE; level >= SINGLE; level--) {
436 		bn = ip->i_ib[level];
437 		if (bn != 0) {
438 			blocksreleased +=
439 			    indirtrunc(ip, bn, lastiblock[level], level);
440 			if (lastiblock[level] < 0) {
441 				ip->i_ib[level] = 0;
442 				free(ip, bn, (off_t)fs->fs_bsize);
443 				blocksreleased += nblocks;
444 			}
445 		}
446 		if (lastiblock[level] >= 0)
447 			goto done;
448 	}
449 
450 	/*
451 	 * All whole direct blocks or frags.
452 	 */
453 	for (i = NDADDR - 1; i > lastblock; i--) {
454 		register int size;
455 
456 		bn = ip->i_db[i];
457 		if (bn == 0)
458 			continue;
459 		ip->i_db[i] = 0;
460 		size = (off_t)blksize(fs, ip, i);
461 		free(ip, bn, size);
462 		blocksreleased += btodb(size);
463 	}
464 	if (lastblock < 0)
465 		goto done;
466 
467 	/*
468 	 * Finally, look for a change in size of the
469 	 * last direct block; release any frags.
470 	 */
471 	bn = ip->i_db[lastblock];
472 	if (bn != 0) {
473 		int oldspace, newspace;
474 
475 		/*
476 		 * Calculate amount of space we're giving
477 		 * back as old block size minus new block size.
478 		 */
479 		oldspace = blksize(fs, ip, lastblock);
480 		ip->i_size = length;
481 		newspace = blksize(fs, ip, lastblock);
482 		if (newspace == 0)
483 			panic("itrunc: newspace");
484 		if (oldspace - newspace > 0) {
485 			/*
486 			 * Block number of space to be free'd is
487 			 * the old block # plus the number of frags
488 			 * required for the storage we're keeping.
489 			 */
490 			bn += numfrags(fs, newspace);
491 			free(ip, bn, oldspace - newspace);
492 			blocksreleased += btodb(oldspace - newspace);
493 		}
494 	}
495 done:
496 /* BEGIN PARANOIA */
497 	for (level = SINGLE; level <= TRIPLE; level++)
498 		if (ip->i_ib[level] != oip->i_ib[level])
499 			panic("itrunc1");
500 	for (i = 0; i < NDADDR; i++)
501 		if (ip->i_db[i] != oip->i_db[i])
502 			panic("itrunc2");
503 /* END PARANOIA */
504 	oip->i_blocks -= blocksreleased;
505 	if (oip->i_blocks < 0)			/* sanity */
506 		oip->i_blocks = 0;
507 	oip->i_flag |= ICHG;
508 #ifdef QUOTA
509 	(void) chkdq(oip, -blocksreleased, 0);
510 #endif
511 }
512 
513 /*
514  * Release blocks associated with the inode ip and
515  * stored in the indirect block bn.  Blocks are free'd
516  * in LIFO order up to (but not including) lastbn.  If
517  * level is greater than SINGLE, the block is an indirect
518  * block and recursive calls to indirtrunc must be used to
519  * cleanse other indirect blocks.
520  *
521  * NB: triple indirect blocks are untested.
522  */
523 long
524 indirtrunc(ip, bn, lastbn, level)
525 	register struct inode *ip;
526 	daddr_t bn, lastbn;
527 	int level;
528 {
529 	register int i;
530 	struct buf *bp, *copy;
531 	register daddr_t *bap;
532 	register struct fs *fs = ip->i_fs;
533 	daddr_t nb, last;
534 	long factor;
535 	int blocksreleased = 0, nblocks;
536 
537 	/*
538 	 * Calculate index in current block of last
539 	 * block to be kept.  -1 indicates the entire
540 	 * block so we need not calculate the index.
541 	 */
542 	factor = 1;
543 	for (i = SINGLE; i < level; i++)
544 		factor *= NINDIR(fs);
545 	last = lastbn;
546 	if (lastbn > 0)
547 		last /= factor;
548 	nblocks = btodb(fs->fs_bsize);
549 	/*
550 	 * Get buffer of block pointers, zero those
551 	 * entries corresponding to blocks to be free'd,
552 	 * and update on disk copy first.
553 	 */
554 	copy = geteblk((int)fs->fs_bsize);
555 	bp = bread(ip->i_dev, fsbtodb(fs, bn), (int)fs->fs_bsize);
556 	if (bp->b_flags&B_ERROR) {
557 		brelse(copy);
558 		brelse(bp);
559 		return (0);
560 	}
561 	bap = bp->b_un.b_daddr;
562 	bcopy((caddr_t)bap, (caddr_t)copy->b_un.b_daddr, (u_int)fs->fs_bsize);
563 	bzero((caddr_t)&bap[last + 1],
564 	  (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t));
565 	bwrite(bp);
566 	bp = copy, bap = bp->b_un.b_daddr;
567 
568 	/*
569 	 * Recursively free totally unused blocks.
570 	 */
571 	for (i = NINDIR(fs) - 1; i > last; i--) {
572 		nb = bap[i];
573 		if (nb == 0)
574 			continue;
575 		if (level > SINGLE)
576 			blocksreleased +=
577 			    indirtrunc(ip, nb, (daddr_t)-1, level - 1);
578 		free(ip, nb, (int)fs->fs_bsize);
579 		blocksreleased += nblocks;
580 	}
581 
582 	/*
583 	 * Recursively free last partial block.
584 	 */
585 	if (level > SINGLE && lastbn >= 0) {
586 		last = lastbn % factor;
587 		nb = bap[i];
588 		if (nb != 0)
589 			blocksreleased += indirtrunc(ip, nb, last, level - 1);
590 	}
591 	brelse(bp);
592 	return (blocksreleased);
593 }
594 
595 /*
596  * remove any inodes in the inode cache belonging to dev
597  *
598  * There should not be any active ones, return error if any are found
599  * (nb: this is a user error, not a system err)
600  *
601  * Also, count the references to dev by block devices - this really
602  * has nothing to do with the object of the procedure, but as we have
603  * to scan the inode table here anyway, we might as well get the
604  * extra benefit.
605  *
606  * this is called from sumount()/sys3.c when dev is being unmounted
607  */
608 #ifdef QUOTA
609 iflush(dev, iq)
610 	dev_t dev;
611 	struct inode *iq;
612 #else
613 iflush(dev)
614 	dev_t dev;
615 #endif
616 {
617 	register struct inode *ip;
618 	register open = 0;
619 
620 	for (ip = inode; ip < inodeNINODE; ip++) {
621 #ifdef QUOTA
622 		if (ip != iq && ip->i_dev == dev)
623 #else
624 		if (ip->i_dev == dev)
625 #endif
626 			if (ip->i_count)
627 				return(-1);
628 			else {
629 				remque(ip);
630 				ip->i_forw = ip;
631 				ip->i_back = ip;
632 				/*
633 				 * as i_count == 0, the inode was on the free
634 				 * list already, just leave it there, it will
635 				 * fall off the bottom eventually. We could
636 				 * perhaps move it to the head of the free
637 				 * list, but as umounts are done so
638 				 * infrequently, we would gain very little,
639 				 * while making the code bigger.
640 				 */
641 #ifdef QUOTA
642 				dqrele(ip->i_dquot);
643 				ip->i_dquot = NODQUOT;
644 #endif
645 			}
646 		else if (ip->i_count && (ip->i_mode&IFMT)==IFBLK &&
647 		    ip->i_rdev == dev)
648 			open++;
649 	}
650 	return (open);
651 }
652 
653 /*
654  * Lock an inode. If its already locked, set the WANT bit and sleep.
655  */
656 ilock(ip)
657 	register struct inode *ip;
658 {
659 
660 	ILOCK(ip);
661 }
662 
663 /*
664  * Unlock an inode.  If WANT bit is on, wakeup.
665  */
666 iunlock(ip)
667 	register struct inode *ip;
668 {
669 
670 	IUNLOCK(ip);
671 }
672