xref: /csrg-svn/sys/ufs/lfs/lfs_inode.c (revision 17942)
1 /*	lfs_inode.c	6.12	85/02/11	*/
2 
3 #include "param.h"
4 #include "systm.h"
5 #include "mount.h"
6 #include "dir.h"
7 #include "user.h"
8 #include "inode.h"
9 #include "fs.h"
10 #include "conf.h"
11 #include "buf.h"
12 #ifdef QUOTA
13 #include "quota.h"
14 #endif
15 #include "kernel.h"
16 
17 #define	INOHSZ	512
18 #if	((INOHSZ&(INOHSZ-1)) == 0)
19 #define	INOHASH(dev,ino)	(((dev)+(ino))&(INOHSZ-1))
20 #else
21 #define	INOHASH(dev,ino)	(((unsigned)((dev)+(ino)))%INOHSZ)
22 #endif
23 
24 union ihead {				/* inode LRU cache, Chris Maltby */
25 	union  ihead *ih_head[2];
26 	struct inode *ih_chain[2];
27 } ihead[INOHSZ];
28 
29 struct inode *ifreeh, **ifreet;
30 
31 /*
32  * Initialize hash links for inodes
33  * and build inode free list.
34  */
35 ihinit()
36 {
37 	register int i;
38 	register struct inode *ip = inode;
39 	register union  ihead *ih = ihead;
40 
41 	for (i = INOHSZ; --i >= 0; ih++) {
42 		ih->ih_head[0] = ih;
43 		ih->ih_head[1] = ih;
44 	}
45 	ifreeh = ip;
46 	ifreet = &ip->i_freef;
47 	ip->i_freeb = &ifreeh;
48 	ip->i_forw = ip;
49 	ip->i_back = ip;
50 	for (i = ninode; --i > 0; ) {
51 		++ip;
52 		ip->i_forw = ip;
53 		ip->i_back = ip;
54 		*ifreet = ip;
55 		ip->i_freeb = ifreet;
56 		ifreet = &ip->i_freef;
57 	}
58 	ip->i_freef = NULL;
59 }
60 
61 #ifdef notdef
62 /*
63  * Find an inode if it is incore.
64  * This is the equivalent, for inodes,
65  * of ``incore'' in bio.c or ``pfind'' in subr.c.
66  */
67 struct inode *
68 ifind(dev, ino)
69 	dev_t dev;
70 	ino_t ino;
71 {
72 	register struct inode *ip;
73 	register union  ihead *ih;
74 
75 	ih = &ihead[INOHASH(dev, ino)];
76 	for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw)
77 		if (ino==ip->i_number && dev==ip->i_dev)
78 			return (ip);
79 	return ((struct inode *)0);
80 }
81 #endif notdef
82 
83 /*
84  * Look up an inode by device,inumber.
85  * If it is in core (in the inode structure),
86  * honor the locking protocol.
87  * If it is not in core, read it in from the
88  * specified device.
89  * If the inode is mounted on, perform
90  * the indicated indirection.
91  * In all cases, a pointer to a locked
92  * inode structure is returned.
93  *
94  * panic: no imt -- if the mounted file
95  *	system is not in the mount table.
96  *	"cannot happen"
97  */
98 struct inode *
99 iget(dev, fs, ino)
100 	dev_t dev;
101 	register struct fs *fs;
102 	ino_t ino;
103 {
104 	register struct inode *ip;
105 	register union  ihead *ih;
106 	register struct mount *mp;
107 	register struct buf *bp;
108 	register struct dinode *dp;
109 	register struct inode *iq;
110 
111 
112 loop:
113 	ih = &ihead[INOHASH(dev, ino)];
114 	for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw)
115 		if (ino == ip->i_number && dev == ip->i_dev) {
116 			/*
117 			 * Following is essentially an inline expanded
118 			 * copy of igrab(), expanded inline for speed,
119 			 * and so that the test for a mounted on inode
120 			 * can be deferred until after we are sure that
121 			 * the inode isn't busy.
122 			 */
123 			if ((ip->i_flag&ILOCKED) != 0) {
124 				ip->i_flag |= IWANT;
125 				sleep((caddr_t)ip, PINOD);
126 				goto loop;
127 			}
128 			if ((ip->i_flag&IMOUNT) != 0) {
129 				for (mp = &mount[0]; mp < &mount[NMOUNT]; mp++)
130 					if(mp->m_inodp == ip) {
131 						dev = mp->m_dev;
132 						fs = mp->m_bufp->b_un.b_fs;
133 						ino = ROOTINO;
134 						goto loop;
135 					}
136 				panic("no imt");
137 			}
138 			if (ip->i_count == 0) {		/* ino on free list */
139 				if (iq = ip->i_freef)
140 					iq->i_freeb = ip->i_freeb;
141 				else
142 					ifreet = ip->i_freeb;
143 				*ip->i_freeb = iq;
144 				ip->i_freef = NULL;
145 				ip->i_freeb = NULL;
146 			}
147 			ip->i_count++;
148 			ip->i_flag |= ILOCKED;
149 			return(ip);
150 		}
151 
152 	if ((ip = ifreeh) == NULL) {
153 		tablefull("inode");
154 		u.u_error = ENFILE;
155 		return(NULL);
156 	}
157 	if (ip->i_count)
158 		panic("free inode isn't");
159 	if (iq = ip->i_freef)
160 		iq->i_freeb = &ifreeh;
161 	ifreeh = iq;
162 	ip->i_freef = NULL;
163 	ip->i_freeb = NULL;
164 	/*
165 	 * Now to take inode off the hash chain it was on
166 	 * (initially, or after an iflush, it is on a "hash chain"
167 	 * consisting entirely of itself, and pointed to by no-one,
168 	 * but that doesn't matter), and put it on the chain for
169 	 * its new (ino, dev) pair
170 	 */
171 	remque(ip);
172 	insque(ip, ih);
173 	ip->i_dev = dev;
174 	ip->i_fs = fs;
175 	ip->i_number = ino;
176 	cacheinval(ip);
177 	ip->i_flag = ILOCKED;
178 	ip->i_count++;
179 	ip->i_lastr = 0;
180 #ifdef QUOTA
181 	dqrele(ip->i_dquot);
182 #endif
183 	bp = bread(dev, fsbtodb(fs, itod(fs, ino)), (int)fs->fs_bsize);
184 	/*
185 	 * Check I/O errors
186 	 */
187 	if ((bp->b_flags&B_ERROR) != 0) {
188 		brelse(bp);
189 		/*
190 		 * the inode doesn't contain anything useful, so it would
191 		 * be misleading to leave it on its hash chain.
192 		 * 'iput' will take care of putting it back on the free list.
193 		 */
194 		remque(ip);
195 		ip->i_forw = ip;
196 		ip->i_back = ip;
197 		/*
198 		 * we also loose its inumber, just in case (as iput
199 		 * doesn't do that any more) - but as it isn't on its
200 		 * hash chain, I doubt if this is really necessary .. kre
201 		 * (probably the two methods are interchangable)
202 		 */
203 		ip->i_number = 0;
204 #ifdef QUOTA
205 		ip->i_dquot = NODQUOT;
206 #endif
207 		iput(ip);
208 		return(NULL);
209 	}
210 	dp = bp->b_un.b_dino;
211 	dp += itoo(fs, ino);
212 	ip->i_ic = dp->di_ic;
213 	brelse(bp);
214 #ifdef QUOTA
215 	if (ip->i_mode == 0)
216 		ip->i_dquot = NODQUOT;
217 	else
218 		ip->i_dquot = inoquota(ip);
219 #endif
220 	return (ip);
221 }
222 
223 /*
224  * Convert a pointer to an inode into a reference to an inode.
225  *
226  * This is basically the internal piece of iget (after the
227  * inode pointer is located) but without the test for mounted
228  * filesystems.  It is caller's responsibility to check that
229  * the inode pointer is valid.
230  */
231 igrab(ip)
232 	register struct inode *ip;
233 {
234 	while ((ip->i_flag&ILOCKED) != 0) {
235 		ip->i_flag |= IWANT;
236 		sleep((caddr_t)ip, PINOD);
237 	}
238 	if (ip->i_count == 0) {		/* ino on free list */
239 		register struct inode *iq;
240 
241 		if (iq = ip->i_freef)
242 			iq->i_freeb = ip->i_freeb;
243 		else
244 			ifreet = ip->i_freeb;
245 		*ip->i_freeb = iq;
246 		ip->i_freef = NULL;
247 		ip->i_freeb = NULL;
248 	}
249 	ip->i_count++;
250 	ip->i_flag |= ILOCKED;
251 }
252 
253 /*
254  * Decrement reference count of
255  * an inode structure.
256  * On the last reference,
257  * write the inode out and if necessary,
258  * truncate and deallocate the file.
259  */
260 iput(ip)
261 	register struct inode *ip;
262 {
263 
264 	if ((ip->i_flag & ILOCKED) == 0)
265 		panic("iput");
266 	IUNLOCK(ip);
267 	irele(ip);
268 }
269 
270 irele(ip)
271 	register struct inode *ip;
272 {
273 	int mode;
274 
275 	if (ip->i_count == 1) {
276 		ip->i_flag |= ILOCKED;
277 		if (ip->i_nlink <= 0) {
278 			itrunc(ip, (u_long)0);
279 			mode = ip->i_mode;
280 			ip->i_mode = 0;
281 			ip->i_rdev = 0;
282 			ip->i_flag |= IUPD|ICHG;
283 			ifree(ip, ip->i_number, mode);
284 #ifdef QUOTA
285 			(void) chkiq(ip->i_dev, ip, ip->i_uid, 0);
286 			dqrele(ip->i_dquot);
287 			ip->i_dquot = NODQUOT;
288 #endif
289 		}
290 		IUPDAT(ip, &time, &time, 0);
291 		IUNLOCK(ip);
292 		ip->i_flag = 0;
293 		/*
294 		 * Put the inode on the end of the free list.
295 		 * Possibly in some cases it would be better to
296 		 * put the inode at the head of the free list,
297 		 * (eg: where i_mode == 0 || i_number == 0)
298 		 * but I will think about that later .. kre
299 		 * (i_number is rarely 0 - only after an i/o error in iget,
300 		 * where i_mode == 0, the inode will probably be wanted
301 		 * again soon for an ialloc, so possibly we should keep it)
302 		 */
303 		if (ifreeh) {
304 			*ifreet = ip;
305 			ip->i_freeb = ifreet;
306 		} else {
307 			ifreeh = ip;
308 			ip->i_freeb = &ifreeh;
309 		}
310 		ip->i_freef = NULL;
311 		ifreet = &ip->i_freef;
312 	} else if (!(ip->i_flag & ILOCKED))
313 		ITIMES(ip, &time, &time);
314 	ip->i_count--;
315 }
316 
317 /*
318  * Check accessed and update flags on
319  * an inode structure.
320  * If any is on, update the inode
321  * with the current time.
322  * If waitfor is given, then must insure
323  * i/o order so wait for write to complete.
324  */
325 iupdat(ip, ta, tm, waitfor)
326 	register struct inode *ip;
327 	struct timeval *ta, *tm;
328 	int waitfor;
329 {
330 	register struct buf *bp;
331 	struct dinode *dp;
332 	register struct fs *fp;
333 
334 	fp = ip->i_fs;
335 	if ((ip->i_flag & (IUPD|IACC|ICHG|IMOD)) != 0) {
336 		if (fp->fs_ronly)
337 			return;
338 		bp = bread(ip->i_dev, fsbtodb(fp, itod(fp, ip->i_number)),
339 			(int)fp->fs_bsize);
340 		if (bp->b_flags & B_ERROR) {
341 			brelse(bp);
342 			return;
343 		}
344 		if (ip->i_flag&IACC)
345 			ip->i_atime = ta->tv_sec;
346 		if (ip->i_flag&IUPD)
347 			ip->i_mtime = tm->tv_sec;
348 		if (ip->i_flag&ICHG)
349 			ip->i_ctime = time.tv_sec;
350 		ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD);
351 		dp = bp->b_un.b_dino + itoo(fp, ip->i_number);
352 		dp->di_ic = ip->i_ic;
353 		if (waitfor)
354 			bwrite(bp);
355 		else
356 			bdwrite(bp);
357 	}
358 }
359 
360 #define	SINGLE	0	/* index of single indirect block */
361 #define	DOUBLE	1	/* index of double indirect block */
362 #define	TRIPLE	2	/* index of triple indirect block */
363 /*
364  * Truncate the inode ip to at most
365  * length size.  Free affected disk
366  * blocks -- the blocks of the file
367  * are removed in reverse order.
368  *
369  * NB: triple indirect blocks are untested.
370  */
371 itrunc(oip, length)
372 	register struct inode *oip;
373 	u_long length;
374 {
375 	register daddr_t lastblock;
376 	daddr_t bn, lastiblock[NIADDR];
377 	register struct fs *fs;
378 	register struct inode *ip;
379 	struct buf *bp;
380 	int offset, lbn, osize, size, error, count, level, s;
381 	long nblocks, blocksreleased = 0;
382 	register int i;
383 	dev_t dev;
384 	struct inode tip;
385 	extern long indirtrunc();
386 	extern struct cmap *mfind();
387 
388 	if (oip->i_size <= length) {
389 		oip->i_flag |= ICHG|IUPD;
390 		iupdat(oip, &time, &time, 1);
391 		return;
392 	}
393 	/*
394 	 * Calculate index into inode's block list of
395 	 * last direct and indirect blocks (if any)
396 	 * which we want to keep.  Lastblock is -1 when
397 	 * the file is truncated to 0.
398 	 */
399 	fs = oip->i_fs;
400 	lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1;
401 	lastiblock[SINGLE] = lastblock - NDADDR;
402 	lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs);
403 	lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs);
404 	nblocks = btodb(fs->fs_bsize);
405 	/*
406 	 * Update the size of the file. If the file is not being
407 	 * truncated to a block boundry, the contents of the
408 	 * partial block following the end of the file must be
409 	 * zero'ed in case it ever become accessable again because
410 	 * of subsequent file growth.
411 	 */
412 	osize = oip->i_size;
413 	offset = blkoff(fs, length);
414 	if (offset == 0) {
415 		oip->i_size = length;
416 	} else {
417 		lbn = lblkno(fs, length);
418 		bn = fsbtodb(fs, bmap(oip, lbn, B_WRITE, offset));
419 		if (u.u_error || (long)bn < 0)
420 			return;
421 		oip->i_size = length;
422 		size = blksize(fs, oip, lbn);
423 		count = howmany(size, DEV_BSIZE);
424 		dev = oip->i_dev;
425 		s = splimp();
426 		for (i = 0; i < count; i += CLSIZE)
427 			if (mfind(dev, bn + i))
428 				munhash(dev, bn + i);
429 		splx(s);
430 		bp = bread(dev, bn, size);
431 		if (bp->b_flags & B_ERROR) {
432 			u.u_error = EIO;
433 			oip->i_size = osize;
434 			brelse(bp);
435 			return;
436 		}
437 		bzero(bp->b_un.b_addr + offset, size - offset);
438 		bdwrite(bp);
439 	}
440 	/*
441 	 * Update file and block pointers
442 	 * on disk before we start freeing blocks.
443 	 * If we crash before free'ing blocks below,
444 	 * the blocks will be returned to the free list.
445 	 * lastiblock values are also normalized to -1
446 	 * for calls to indirtrunc below.
447 	 */
448 	tip = *oip;
449 	tip.i_size = osize;
450 	for (level = TRIPLE; level >= SINGLE; level--)
451 		if (lastiblock[level] < 0) {
452 			oip->i_ib[level] = 0;
453 			lastiblock[level] = -1;
454 		}
455 	for (i = NDADDR - 1; i > lastblock; i--)
456 		oip->i_db[i] = 0;
457 	oip->i_flag |= ICHG|IUPD;
458 	syncip(oip);
459 
460 	/*
461 	 * Indirect blocks first.
462 	 */
463 	ip = &tip;
464 	for (level = TRIPLE; level >= SINGLE; level--) {
465 		bn = ip->i_ib[level];
466 		if (bn != 0) {
467 			blocksreleased +=
468 			    indirtrunc(ip, bn, lastiblock[level], level);
469 			if (lastiblock[level] < 0) {
470 				ip->i_ib[level] = 0;
471 				free(ip, bn, (off_t)fs->fs_bsize);
472 				blocksreleased += nblocks;
473 			}
474 		}
475 		if (lastiblock[level] >= 0)
476 			goto done;
477 	}
478 
479 	/*
480 	 * All whole direct blocks or frags.
481 	 */
482 	for (i = NDADDR - 1; i > lastblock; i--) {
483 		register int size;
484 
485 		bn = ip->i_db[i];
486 		if (bn == 0)
487 			continue;
488 		ip->i_db[i] = 0;
489 		size = (off_t)blksize(fs, ip, i);
490 		free(ip, bn, size);
491 		blocksreleased += btodb(size);
492 	}
493 	if (lastblock < 0)
494 		goto done;
495 
496 	/*
497 	 * Finally, look for a change in size of the
498 	 * last direct block; release any frags.
499 	 */
500 	bn = ip->i_db[lastblock];
501 	if (bn != 0) {
502 		int oldspace, newspace;
503 
504 		/*
505 		 * Calculate amount of space we're giving
506 		 * back as old block size minus new block size.
507 		 */
508 		oldspace = blksize(fs, ip, lastblock);
509 		ip->i_size = length;
510 		newspace = blksize(fs, ip, lastblock);
511 		if (newspace == 0)
512 			panic("itrunc: newspace");
513 		if (oldspace - newspace > 0) {
514 			/*
515 			 * Block number of space to be free'd is
516 			 * the old block # plus the number of frags
517 			 * required for the storage we're keeping.
518 			 */
519 			bn += numfrags(fs, newspace);
520 			free(ip, bn, oldspace - newspace);
521 			blocksreleased += btodb(oldspace - newspace);
522 		}
523 	}
524 done:
525 /* BEGIN PARANOIA */
526 	for (level = SINGLE; level <= TRIPLE; level++)
527 		if (ip->i_ib[level] != oip->i_ib[level])
528 			panic("itrunc1");
529 	for (i = 0; i < NDADDR; i++)
530 		if (ip->i_db[i] != oip->i_db[i])
531 			panic("itrunc2");
532 /* END PARANOIA */
533 	oip->i_blocks -= blocksreleased;
534 	if (oip->i_blocks < 0)			/* sanity */
535 		oip->i_blocks = 0;
536 	oip->i_flag |= ICHG;
537 #ifdef QUOTA
538 	(void) chkdq(oip, -blocksreleased, 0);
539 #endif
540 }
541 
542 /*
543  * Release blocks associated with the inode ip and
544  * stored in the indirect block bn.  Blocks are free'd
545  * in LIFO order up to (but not including) lastbn.  If
546  * level is greater than SINGLE, the block is an indirect
547  * block and recursive calls to indirtrunc must be used to
548  * cleanse other indirect blocks.
549  *
550  * NB: triple indirect blocks are untested.
551  */
552 long
553 indirtrunc(ip, bn, lastbn, level)
554 	register struct inode *ip;
555 	daddr_t bn, lastbn;
556 	int level;
557 {
558 	register int i;
559 	struct buf *bp, *copy;
560 	register daddr_t *bap;
561 	register struct fs *fs = ip->i_fs;
562 	daddr_t nb, last;
563 	long factor;
564 	int blocksreleased = 0, nblocks;
565 
566 	/*
567 	 * Calculate index in current block of last
568 	 * block to be kept.  -1 indicates the entire
569 	 * block so we need not calculate the index.
570 	 */
571 	factor = 1;
572 	for (i = SINGLE; i < level; i++)
573 		factor *= NINDIR(fs);
574 	last = lastbn;
575 	if (lastbn > 0)
576 		last /= factor;
577 	nblocks = btodb(fs->fs_bsize);
578 	/*
579 	 * Get buffer of block pointers, zero those
580 	 * entries corresponding to blocks to be free'd,
581 	 * and update on disk copy first.
582 	 */
583 	copy = geteblk((int)fs->fs_bsize);
584 	bp = bread(ip->i_dev, fsbtodb(fs, bn), (int)fs->fs_bsize);
585 	if (bp->b_flags&B_ERROR) {
586 		brelse(copy);
587 		brelse(bp);
588 		return (0);
589 	}
590 	bap = bp->b_un.b_daddr;
591 	bcopy((caddr_t)bap, (caddr_t)copy->b_un.b_daddr, (u_int)fs->fs_bsize);
592 	bzero((caddr_t)&bap[last + 1],
593 	  (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t));
594 	bwrite(bp);
595 	bp = copy, bap = bp->b_un.b_daddr;
596 
597 	/*
598 	 * Recursively free totally unused blocks.
599 	 */
600 	for (i = NINDIR(fs) - 1; i > last; i--) {
601 		nb = bap[i];
602 		if (nb == 0)
603 			continue;
604 		if (level > SINGLE)
605 			blocksreleased +=
606 			    indirtrunc(ip, nb, (daddr_t)-1, level - 1);
607 		free(ip, nb, (int)fs->fs_bsize);
608 		blocksreleased += nblocks;
609 	}
610 
611 	/*
612 	 * Recursively free last partial block.
613 	 */
614 	if (level > SINGLE && lastbn >= 0) {
615 		last = lastbn % factor;
616 		nb = bap[i];
617 		if (nb != 0)
618 			blocksreleased += indirtrunc(ip, nb, last, level - 1);
619 	}
620 	brelse(bp);
621 	return (blocksreleased);
622 }
623 
624 /*
625  * remove any inodes in the inode cache belonging to dev
626  *
627  * There should not be any active ones, return error if any are found
628  * (nb: this is a user error, not a system err)
629  *
630  * Also, count the references to dev by block devices - this really
631  * has nothing to do with the object of the procedure, but as we have
632  * to scan the inode table here anyway, we might as well get the
633  * extra benefit.
634  *
635  * this is called from sumount()/sys3.c when dev is being unmounted
636  */
637 #ifdef QUOTA
638 iflush(dev, iq)
639 	dev_t dev;
640 	struct inode *iq;
641 #else
642 iflush(dev)
643 	dev_t dev;
644 #endif
645 {
646 	register struct inode *ip;
647 	register open = 0;
648 
649 	for (ip = inode; ip < inodeNINODE; ip++) {
650 #ifdef QUOTA
651 		if (ip != iq && ip->i_dev == dev)
652 #else
653 		if (ip->i_dev == dev)
654 #endif
655 			if (ip->i_count)
656 				return(-1);
657 			else {
658 				remque(ip);
659 				ip->i_forw = ip;
660 				ip->i_back = ip;
661 				/*
662 				 * as i_count == 0, the inode was on the free
663 				 * list already, just leave it there, it will
664 				 * fall off the bottom eventually. We could
665 				 * perhaps move it to the head of the free
666 				 * list, but as umounts are done so
667 				 * infrequently, we would gain very little,
668 				 * while making the code bigger.
669 				 */
670 #ifdef QUOTA
671 				dqrele(ip->i_dquot);
672 				ip->i_dquot = NODQUOT;
673 #endif
674 			}
675 		else if (ip->i_count && (ip->i_mode&IFMT)==IFBLK &&
676 		    ip->i_rdev == dev)
677 			open++;
678 	}
679 	return (open);
680 }
681 
682 /*
683  * Lock an inode. If its already locked, set the WANT bit and sleep.
684  */
685 ilock(ip)
686 	register struct inode *ip;
687 {
688 
689 	ILOCK(ip);
690 }
691 
692 /*
693  * Unlock an inode.  If WANT bit is on, wakeup.
694  */
695 iunlock(ip)
696 	register struct inode *ip;
697 {
698 
699 	IUNLOCK(ip);
700 }
701