xref: /csrg-svn/sys/ufs/ffs/ufs_inode.c (revision 16642)
1 /*	ufs_inode.c	6.5	84/06/27	*/
2 
3 #include "../h/param.h"
4 #include "../h/systm.h"
5 #include "../h/mount.h"
6 #include "../h/dir.h"
7 #include "../h/user.h"
8 #include "../h/inode.h"
9 #include "../h/fs.h"
10 #include "../h/conf.h"
11 #include "../h/buf.h"
12 #ifdef QUOTA
13 #include "../h/quota.h"
14 #endif
15 #include "../h/kernel.h"
16 
17 #define	INOHSZ	64
18 #if	((INOHSZ&(INOHSZ-1)) == 0)
19 #define	INOHASH(dev,ino)	(((dev)+(ino))&(INOHSZ-1))
20 #else
21 #define	INOHASH(dev,ino)	(((unsigned)((dev)+(ino)))%INOHSZ)
22 #endif
23 
24 union ihead {				/* inode LRU cache, Chris Maltby */
25 	union  ihead *ih_head[2];
26 	struct inode *ih_chain[2];
27 } ihead[INOHSZ];
28 
29 struct inode *ifreeh, **ifreet;
30 
31 /*
32  * Initialize hash links for inodes
33  * and build inode free list.
34  */
35 ihinit()
36 {
37 	register int i;
38 	register struct inode *ip = inode;
39 	register union  ihead *ih = ihead;
40 
41 	for (i = INOHSZ; --i >= 0; ih++) {
42 		ih->ih_head[0] = ih;
43 		ih->ih_head[1] = ih;
44 	}
45 	ifreeh = ip;
46 	ifreet = &ip->i_freef;
47 	ip->i_freeb = &ifreeh;
48 	ip->i_forw = ip;
49 	ip->i_back = ip;
50 	for (i = ninode; --i > 0; ) {
51 		++ip;
52 		ip->i_forw = ip;
53 		ip->i_back = ip;
54 		*ifreet = ip;
55 		ip->i_freeb = ifreet;
56 		ifreet = &ip->i_freef;
57 	}
58 	ip->i_freef = NULL;
59 }
60 
61 #ifdef notdef
62 /*
63  * Find an inode if it is incore.
64  * This is the equivalent, for inodes,
65  * of ``incore'' in bio.c or ``pfind'' in subr.c.
66  */
67 struct inode *
68 ifind(dev, ino)
69 	dev_t dev;
70 	ino_t ino;
71 {
72 	register struct inode *ip;
73 	register union  ihead *ih;
74 
75 	ih = &ihead[INOHASH(dev, ino)];
76 	for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw)
77 		if (ino==ip->i_number && dev==ip->i_dev)
78 			return (ip);
79 	return ((struct inode *)0);
80 }
81 #endif notdef
82 
83 /*
84  * Look up an inode by device,inumber.
85  * If it is in core (in the inode structure),
86  * honor the locking protocol.
87  * If it is not in core, read it in from the
88  * specified device.
89  * If the inode is mounted on, perform
90  * the indicated indirection.
91  * In all cases, a pointer to a locked
92  * inode structure is returned.
93  *
94  * panic: no imt -- if the mounted file
95  *	system is not in the mount table.
96  *	"cannot happen"
97  */
98 struct inode *
99 iget(dev, fs, ino)
100 	dev_t dev;
101 	register struct fs *fs;
102 	ino_t ino;
103 {
104 	register struct inode *ip;
105 	register union  ihead *ih;
106 	register struct mount *mp;
107 	register struct buf *bp;
108 	register struct dinode *dp;
109 	register struct inode *iq;
110 
111 loop:
112 	ih = &ihead[INOHASH(dev, ino)];
113 	for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw)
114 		if (ino == ip->i_number && dev == ip->i_dev) {
115 			/*
116 			 * Following is essentially an inline expanded
117 			 * copy of igrab(), expanded inline for speed,
118 			 * and so that the test for a mounted on inode
119 			 * can be deferred until after we are sure that
120 			 * the inode isn't busy.
121 			 */
122 			if ((ip->i_flag&ILOCKED) != 0) {
123 				ip->i_flag |= IWANT;
124 				sleep((caddr_t)ip, PINOD);
125 				goto loop;
126 			}
127 			if ((ip->i_flag&IMOUNT) != 0) {
128 				for (mp = &mount[0]; mp < &mount[NMOUNT]; mp++)
129 					if(mp->m_inodp == ip) {
130 						dev = mp->m_dev;
131 						fs = mp->m_bufp->b_un.b_fs;
132 						ino = ROOTINO;
133 						goto loop;
134 					}
135 				panic("no imt");
136 			}
137 			if (ip->i_count == 0) {		/* ino on free list */
138 				if (iq = ip->i_freef)
139 					iq->i_freeb = ip->i_freeb;
140 				else
141 					ifreet = ip->i_freeb;
142 				*ip->i_freeb = iq;
143 				ip->i_freef = NULL;
144 				ip->i_freeb = NULL;
145 			}
146 			ip->i_count++;
147 			ip->i_flag |= ILOCKED;
148 			return(ip);
149 		}
150 
151 	if ((ip = ifreeh) == NULL) {
152 		tablefull("inode");
153 		u.u_error = ENFILE;
154 		return(NULL);
155 	}
156 	if (iq = ip->i_freef)
157 		iq->i_freeb = &ifreeh;
158 	ifreeh = iq;
159 	ip->i_freef = NULL;
160 	ip->i_freeb = NULL;
161 	/*
162 	 * Now to take inode off the hash chain it was on
163 	 * (initially, or after an iflush, it is on a "hash chain"
164 	 * consisting entirely of itself, and pointed to by no-one,
165 	 * but that doesn't matter), and put it on the chain for
166 	 * its new (ino, dev) pair
167 	 */
168 	remque(ip);
169 	insque(ip, ih);
170 #ifdef QUOTA
171 	dqrele(ip->i_dquot);
172 #endif
173 	ip->i_dev = dev;
174 	ip->i_fs = fs;
175 	ip->i_number = ino;
176 	ip->i_id = ++nextinodeid;	/* also used in rename */
177 	/*
178 	 * At an absurd rate of 100 calls/second,
179 	 * this should occur once every 16 months.
180 	 */
181 	if (nextinodeid == 0)
182 		panic("iget: wrap");
183 	ip->i_flag = ILOCKED;
184 	ip->i_count++;
185 	ip->i_lastr = 0;
186 	bp = bread(dev, fsbtodb(fs, itod(fs, ino)), (int)fs->fs_bsize);
187 	/*
188 	 * Check I/O errors
189 	 */
190 	if ((bp->b_flags&B_ERROR) != 0) {
191 		brelse(bp);
192 		/*
193 		 * the inode doesn't contain anything useful, so it would
194 		 * be misleading to leave it on its hash chain.
195 		 * 'iput' will take care of putting it back on the free list.
196 		 */
197 		remque(ip);
198 		ip->i_forw = ip;
199 		ip->i_back = ip;
200 		/*
201 		 * we also loose its inumber, just in case (as iput
202 		 * doesn't do that any more) - but as it isn't on its
203 		 * hash chain, I doubt if this is really necessary .. kre
204 		 * (probably the two methods are interchangable)
205 		 */
206 		ip->i_number = 0;
207 #ifdef QUOTA
208 		ip->i_dquot = NODQUOT;
209 #endif
210 		iput(ip);
211 		return(NULL);
212 	}
213 	dp = bp->b_un.b_dino;
214 	dp += itoo(fs, ino);
215 	ip->i_ic = dp->di_ic;
216 	brelse(bp);
217 #ifdef QUOTA
218 	if (ip->i_mode == 0)
219 		ip->i_dquot = NODQUOT;
220 	else
221 		ip->i_dquot = inoquota(ip);
222 #endif
223 	return (ip);
224 }
225 
226 /*
227  * Convert a pointer to an inode into a reference to an inode.
228  *
229  * This is basically the internal piece of iget (after the
230  * inode pointer is located) but without the test for mounted
231  * filesystems.  It is caller's responsibility to check that
232  * the inode pointer is valid.
233  */
234 igrab(ip)
235 	register struct inode *ip;
236 {
237 	while ((ip->i_flag&ILOCKED) != 0) {
238 		ip->i_flag |= IWANT;
239 		sleep((caddr_t)ip, PINOD);
240 	}
241 	if (ip->i_count == 0) {		/* ino on free list */
242 		register struct inode *iq;
243 
244 		if (iq = ip->i_freef)
245 			iq->i_freeb = ip->i_freeb;
246 		else
247 			ifreet = ip->i_freeb;
248 		*ip->i_freeb = iq;
249 		ip->i_freef = NULL;
250 		ip->i_freeb = NULL;
251 	}
252 	ip->i_count++;
253 	ip->i_flag |= ILOCKED;
254 }
255 
256 /*
257  * Decrement reference count of
258  * an inode structure.
259  * On the last reference,
260  * write the inode out and if necessary,
261  * truncate and deallocate the file.
262  */
263 iput(ip)
264 	register struct inode *ip;
265 {
266 
267 	if ((ip->i_flag & ILOCKED) == 0)
268 		panic("iput");
269 	iunlock(ip);
270 	irele(ip);
271 }
272 
273 irele(ip)
274 	register struct inode *ip;
275 {
276 	int mode;
277 
278 	if (ip->i_count == 1) {
279 		ip->i_flag |= ILOCKED;
280 		if (ip->i_nlink <= 0) {
281 			itrunc(ip, (u_long)0);
282 			mode = ip->i_mode;
283 			ip->i_mode = 0;
284 			ip->i_rdev = 0;
285 			ip->i_flag |= IUPD|ICHG;
286 			ifree(ip, ip->i_number, mode);
287 #ifdef QUOTA
288 			(void) chkiq(ip->i_dev, ip, ip->i_uid, 0);
289 			dqrele(ip->i_dquot);
290 			ip->i_dquot = NODQUOT;
291 #endif
292 		}
293 		IUPDAT(ip, &time, &time, 0);
294 		iunlock(ip);
295 		ip->i_flag = 0;
296 		/*
297 		 * Put the inode on the end of the free list.
298 		 * Possibly in some cases it would be better to
299 		 * put the inode at the head of the free list,
300 		 * (eg: where i_mode == 0 || i_number == 0)
301 		 * but I will think about that later .. kre
302 		 * (i_number is rarely 0 - only after an i/o error in iget,
303 		 * where i_mode == 0, the inode will probably be wanted
304 		 * again soon for an ialloc, so possibly we should keep it)
305 		 */
306 		if (ifreeh) {
307 			*ifreet = ip;
308 			ip->i_freeb = ifreet;
309 		} else {
310 			ifreeh = ip;
311 			ip->i_freeb = &ifreeh;
312 		}
313 		ip->i_freef = NULL;
314 		ifreet = &ip->i_freef;
315 	} else if (!(ip->i_flag & ILOCKED))
316 		ITIMES(ip, &time, &time);
317 	ip->i_count--;
318 }
319 
320 /*
321  * Check accessed and update flags on
322  * an inode structure.
323  * If any is on, update the inode
324  * with the current time.
325  * If waitfor is given, then must insure
326  * i/o order so wait for write to complete.
327  */
328 iupdat(ip, ta, tm, waitfor)
329 	register struct inode *ip;
330 	struct timeval *ta, *tm;
331 	int waitfor;
332 {
333 	register struct buf *bp;
334 	struct dinode *dp;
335 	register struct fs *fp;
336 
337 	fp = ip->i_fs;
338 	if ((ip->i_flag & (IUPD|IACC|ICHG|IMOD)) != 0) {
339 		if (fp->fs_ronly)
340 			return;
341 		bp = bread(ip->i_dev, fsbtodb(fp, itod(fp, ip->i_number)),
342 			(int)fp->fs_bsize);
343 		if (bp->b_flags & B_ERROR) {
344 			brelse(bp);
345 			return;
346 		}
347 		if (ip->i_flag&IACC)
348 			ip->i_atime = ta->tv_sec;
349 		if (ip->i_flag&IUPD)
350 			ip->i_mtime = tm->tv_sec;
351 		if (ip->i_flag&ICHG)
352 			ip->i_ctime = time.tv_sec;
353 		ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD);
354 		dp = bp->b_un.b_dino + itoo(fp, ip->i_number);
355 		dp->di_ic = ip->i_ic;
356 		if (waitfor)
357 			bwrite(bp);
358 		else
359 			bdwrite(bp);
360 	}
361 }
362 
363 #define	SINGLE	0	/* index of single indirect block */
364 #define	DOUBLE	1	/* index of double indirect block */
365 #define	TRIPLE	2	/* index of triple indirect block */
366 /*
367  * Truncate the inode ip to at most
368  * length size.  Free affected disk
369  * blocks -- the blocks of the file
370  * are removed in reverse order.
371  *
372  * NB: triple indirect blocks are untested.
373  */
374 itrunc(oip, length)
375 	struct inode *oip;
376 	u_long length;
377 {
378 	register i;
379 	register daddr_t lastblock;
380 	daddr_t bn, lastiblock[NIADDR];
381 	register struct fs *fs;
382 	register struct inode *ip;
383 	struct inode tip;
384 	long blocksreleased = 0, nblocks;
385 	long indirtrunc();
386 	int level;
387 
388 	if (oip->i_size <= length) {
389 		oip->i_flag |= ICHG|IUPD;
390 		iupdat(oip, &time, &time, 1);
391 		return;
392 	}
393 	/*
394 	 * Calculate index into inode's block list of
395 	 * last direct and indirect blocks (if any)
396 	 * which we want to keep.  Lastblock is -1 when
397 	 * the file is truncated to 0.
398 	 */
399 	fs = oip->i_fs;
400 	lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1;
401 	lastiblock[SINGLE] = lastblock - NDADDR;
402 	lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs);
403 	lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs);
404 	nblocks = btodb(fs->fs_bsize);
405 	/*
406 	 * Update size of file and block pointers
407 	 * on disk before we start freeing blocks.
408 	 * If we crash before free'ing blocks below,
409 	 * the blocks will be returned to the free list.
410 	 * lastiblock values are also normalized to -1
411 	 * for calls to indirtrunc below.
412 	 * (? fsck doesn't check validity of pointers in indirect blocks)
413 	 */
414 	tip = *oip;
415 	for (level = TRIPLE; level >= SINGLE; level--)
416 		if (lastiblock[level] < 0) {
417 			oip->i_ib[level] = 0;
418 			lastiblock[level] = -1;
419 		}
420 	for (i = NDADDR - 1; i > lastblock; i--)
421 		oip->i_db[i] = 0;
422 	oip->i_size = length;
423 	oip->i_flag |= ICHG|IUPD;
424 	iupdat(oip, &time, &time, 1);
425 	ip = &tip;
426 
427 	/*
428 	 * Indirect blocks first.
429 	 */
430 	for (level = TRIPLE; level >= SINGLE; level--) {
431 		bn = ip->i_ib[level];
432 		if (bn != 0) {
433 			blocksreleased +=
434 			    indirtrunc(ip, bn, lastiblock[level], level);
435 			if (lastiblock[level] < 0) {
436 				ip->i_ib[level] = 0;
437 				free(ip, bn, (off_t)fs->fs_bsize);
438 				blocksreleased += nblocks;
439 			}
440 		}
441 		if (lastiblock[level] >= 0)
442 			goto done;
443 	}
444 
445 	/*
446 	 * All whole direct blocks or frags.
447 	 */
448 	for (i = NDADDR - 1; i > lastblock; i--) {
449 		register int size;
450 
451 		bn = ip->i_db[i];
452 		if (bn == 0)
453 			continue;
454 		ip->i_db[i] = 0;
455 		size = (off_t)blksize(fs, ip, i);
456 		free(ip, bn, size);
457 		blocksreleased += btodb(size);
458 	}
459 	if (lastblock < 0)
460 		goto done;
461 
462 	/*
463 	 * Finally, look for a change in size of the
464 	 * last direct block; release any frags.
465 	 */
466 	bn = ip->i_db[lastblock];
467 	if (bn != 0) {
468 		int oldspace, newspace;
469 
470 		/*
471 		 * Calculate amount of space we're giving
472 		 * back as old block size minus new block size.
473 		 */
474 		oldspace = blksize(fs, ip, lastblock);
475 		ip->i_size = length;
476 		newspace = blksize(fs, ip, lastblock);
477 		if (newspace == 0)
478 			panic("itrunc: newspace");
479 		if (oldspace - newspace > 0) {
480 			/*
481 			 * Block number of space to be free'd is
482 			 * the old block # plus the number of frags
483 			 * required for the storage we're keeping.
484 			 */
485 			bn += numfrags(fs, newspace);
486 			free(ip, bn, oldspace - newspace);
487 			blocksreleased += btodb(oldspace - newspace);
488 		}
489 	}
490 done:
491 /* BEGIN PARANOIA */
492 	for (level = SINGLE; level <= TRIPLE; level++)
493 		if (ip->i_ib[level] != oip->i_ib[level])
494 			panic("itrunc1");
495 	for (i = 0; i < NDADDR; i++)
496 		if (ip->i_db[i] != oip->i_db[i])
497 			panic("itrunc2");
498 /* END PARANOIA */
499 	oip->i_blocks -= blocksreleased;
500 	if (oip->i_blocks < 0)			/* sanity */
501 		oip->i_blocks = 0;
502 	oip->i_flag |= ICHG;
503 #ifdef QUOTA
504 	(void) chkdq(oip, -blocksreleased, 0);
505 #endif
506 }
507 
508 /*
509  * Release blocks associated with the inode ip and
510  * stored in the indirect block bn.  Blocks are free'd
511  * in LIFO order up to (but not including) lastbn.  If
512  * level is greater than SINGLE, the block is an indirect
513  * block and recursive calls to indirtrunc must be used to
514  * cleanse other indirect blocks.
515  *
516  * NB: triple indirect blocks are untested.
517  */
518 long
519 indirtrunc(ip, bn, lastbn, level)
520 	register struct inode *ip;
521 	daddr_t bn, lastbn;
522 	int level;
523 {
524 	register int i;
525 	struct buf *bp, *copy;
526 	register daddr_t *bap;
527 	register struct fs *fs = ip->i_fs;
528 	daddr_t nb, last;
529 	long factor;
530 	int blocksreleased = 0, nblocks;
531 
532 	/*
533 	 * Calculate index in current block of last
534 	 * block to be kept.  -1 indicates the entire
535 	 * block so we need not calculate the index.
536 	 */
537 	factor = 1;
538 	for (i = SINGLE; i < level; i++)
539 		factor *= NINDIR(fs);
540 	last = lastbn;
541 	if (lastbn > 0)
542 		last /= factor;
543 	nblocks = btodb(fs->fs_bsize);
544 	/*
545 	 * Get buffer of block pointers, zero those
546 	 * entries corresponding to blocks to be free'd,
547 	 * and update on disk copy first.
548 	 */
549 	copy = geteblk((int)fs->fs_bsize);
550 	bp = bread(ip->i_dev, fsbtodb(fs, bn), (int)fs->fs_bsize);
551 	if (bp->b_flags&B_ERROR) {
552 		brelse(copy);
553 		brelse(bp);
554 		return (0);
555 	}
556 	bap = bp->b_un.b_daddr;
557 	bcopy((caddr_t)bap, (caddr_t)copy->b_un.b_daddr, (u_int)fs->fs_bsize);
558 	bzero((caddr_t)&bap[last + 1],
559 	  (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t));
560 	bwrite(bp);
561 	bp = copy, bap = bp->b_un.b_daddr;
562 
563 	/*
564 	 * Recursively free totally unused blocks.
565 	 */
566 	for (i = NINDIR(fs) - 1; i > last; i--) {
567 		nb = bap[i];
568 		if (nb == 0)
569 			continue;
570 		if (level > SINGLE)
571 			blocksreleased +=
572 			    indirtrunc(ip, nb, (daddr_t)-1, level - 1);
573 		free(ip, nb, (int)fs->fs_bsize);
574 		blocksreleased += nblocks;
575 	}
576 
577 	/*
578 	 * Recursively free last partial block.
579 	 */
580 	if (level > SINGLE && lastbn >= 0) {
581 		last = lastbn % factor;
582 		nb = bap[i];
583 		if (nb != 0)
584 			blocksreleased += indirtrunc(ip, nb, last, level - 1);
585 	}
586 	brelse(bp);
587 	return (blocksreleased);
588 }
589 
590 /*
591  * remove any inodes in the inode cache belonging to dev
592  *
593  * There should not be any active ones, return error if any are found
594  * (nb: this is a user error, not a system err)
595  *
596  * Also, count the references to dev by block devices - this really
597  * has nothing to do with the object of the procedure, but as we have
598  * to scan the inode table here anyway, we might as well get the
599  * extra benefit.
600  *
601  * this is called from sumount()/sys3.c when dev is being unmounted
602  */
603 #ifdef QUOTA
604 iflush(dev, iq)
605 	dev_t dev;
606 	struct inode *iq;
607 #else
608 iflush(dev)
609 	dev_t dev;
610 #endif
611 {
612 	register struct inode *ip;
613 	register open = 0;
614 
615 	for (ip = inode; ip < inodeNINODE; ip++) {
616 #ifdef QUOTA
617 		if (ip != iq && ip->i_dev == dev)
618 #else
619 		if (ip->i_dev == dev)
620 #endif
621 			if (ip->i_count)
622 				return(-1);
623 			else {
624 				remque(ip);
625 				ip->i_forw = ip;
626 				ip->i_back = ip;
627 				/*
628 				 * as i_count == 0, the inode was on the free
629 				 * list already, just leave it there, it will
630 				 * fall off the bottom eventually. We could
631 				 * perhaps move it to the head of the free
632 				 * list, but as umounts are done so
633 				 * infrequently, we would gain very little,
634 				 * while making the code bigger.
635 				 */
636 #ifdef QUOTA
637 				dqrele(ip->i_dquot);
638 				ip->i_dquot = NODQUOT;
639 #endif
640 			}
641 		else if (ip->i_count && (ip->i_mode&IFMT)==IFBLK &&
642 		    ip->i_rdev == dev)
643 			open++;
644 	}
645 	return (open);
646 }
647 
648 /*
649  * Lock an inode. If its already locked, set the WANT bit and sleep.
650  */
651 ilock(ip)
652 	register struct inode *ip;
653 {
654 
655 	ILOCK(ip);
656 }
657 
658 /*
659  * Unlock an inode.  If WANT bit is on, wakeup.
660  */
661 iunlock(ip)
662 	register struct inode *ip;
663 {
664 
665 	IUNLOCK(ip);
666 }
667