xref: /csrg-svn/sys/ufs/ffs/ufs_inode.c (revision 16524)
1 /*	ufs_inode.c	6.4	84/05/22	*/
2 
3 #include "../h/param.h"
4 #include "../h/systm.h"
5 #include "../h/mount.h"
6 #include "../h/dir.h"
7 #include "../h/user.h"
8 #include "../h/inode.h"
9 #include "../h/fs.h"
10 #include "../h/conf.h"
11 #include "../h/buf.h"
12 #ifdef QUOTA
13 #include "../h/quota.h"
14 #endif
15 #include "../h/kernel.h"
16 
17 #define	INOHSZ	64
18 #if	((INOHSZ&(INOHSZ-1)) == 0)
19 #define	INOHASH(dev,ino)	(((dev)+(ino))&(INOHSZ-1))
20 #else
21 #define	INOHASH(dev,ino)	(((unsigned)((dev)+(ino)))%INOHSZ)
22 #endif
23 
24 union ihead {				/* inode LRU cache, Chris Maltby */
25 	union  ihead *ih_head[2];
26 	struct inode *ih_chain[2];
27 } ihead[INOHSZ];
28 
29 struct inode *ifreeh, **ifreet;
30 
31 /*
32  * Initialize hash links for inodes
33  * and build inode free list.
34  */
35 ihinit()
36 {
37 	register int i;
38 	register struct inode *ip = inode;
39 	register union  ihead *ih = ihead;
40 
41 	for (i = INOHSZ; --i >= 0; ih++) {
42 		ih->ih_head[0] = ih;
43 		ih->ih_head[1] = ih;
44 	}
45 	ifreeh = ip;
46 	ifreet = &ip->i_freef;
47 	ip->i_freeb = &ifreeh;
48 	ip->i_forw = ip;
49 	ip->i_back = ip;
50 	for (i = ninode; --i > 0; ) {
51 		++ip;
52 		ip->i_forw = ip;
53 		ip->i_back = ip;
54 		*ifreet = ip;
55 		ip->i_freeb = ifreet;
56 		ifreet = &ip->i_freef;
57 	}
58 	ip->i_freef = NULL;
59 }
60 
61 #ifdef notdef
62 /*
63  * Find an inode if it is incore.
64  * This is the equivalent, for inodes,
65  * of ``incore'' in bio.c or ``pfind'' in subr.c.
66  */
67 struct inode *
68 ifind(dev, ino)
69 	dev_t dev;
70 	ino_t ino;
71 {
72 	register struct inode *ip;
73 	register union  ihead *ih;
74 
75 	ih = &ihead[INOHASH(dev, ino)];
76 	for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw)
77 		if (ino==ip->i_number && dev==ip->i_dev)
78 			return (ip);
79 	return ((struct inode *)0);
80 }
81 #endif notdef
82 
83 /*
84  * Look up an inode by device,inumber.
85  * If it is in core (in the inode structure),
86  * honor the locking protocol.
87  * If it is not in core, read it in from the
88  * specified device.
89  * If the inode is mounted on, perform
90  * the indicated indirection.
91  * In all cases, a pointer to a locked
92  * inode structure is returned.
93  *
94  * panic: no imt -- if the mounted file
95  *	system is not in the mount table.
96  *	"cannot happen"
97  */
98 struct inode *
99 iget(dev, fs, ino)
100 	dev_t dev;
101 	register struct fs *fs;
102 	ino_t ino;
103 {
104 	register struct inode *ip;
105 	register union  ihead *ih;
106 	register struct mount *mp;
107 	register struct buf *bp;
108 	register struct dinode *dp;
109 	register struct inode *iq;
110 
111 loop:
112 	ih = &ihead[INOHASH(dev, ino)];
113 	for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw)
114 		if (ino == ip->i_number && dev == ip->i_dev) {
115 			if ((ip->i_flag&ILOCKED) != 0) {
116 				ip->i_flag |= IWANT;
117 				sleep((caddr_t)ip, PINOD);
118 				goto loop;
119 			}
120 			if ((ip->i_flag&IMOUNT) != 0) {
121 				for (mp = &mount[0]; mp < &mount[NMOUNT]; mp++)
122 					if(mp->m_inodp == ip) {
123 						dev = mp->m_dev;
124 						fs = mp->m_bufp->b_un.b_fs;
125 						ino = ROOTINO;
126 						goto loop;
127 					}
128 				panic("no imt");
129 			}
130 			if (ip->i_count == 0) {		/* ino on free list */
131 				if (iq = ip->i_freef)
132 					iq->i_freeb = ip->i_freeb;
133 				else
134 					ifreet = ip->i_freeb;
135 				*ip->i_freeb = iq;
136 				ip->i_freef = NULL;
137 				ip->i_freeb = NULL;
138 			}
139 			ip->i_count++;
140 			ip->i_flag |= ILOCKED;
141 			return(ip);
142 		}
143 
144 	if ((ip = ifreeh) == NULL) {
145 		tablefull("inode");
146 		u.u_error = ENFILE;
147 		return(NULL);
148 	}
149 	if (iq = ip->i_freef)
150 		iq->i_freeb = &ifreeh;
151 	ifreeh = iq;
152 	ip->i_freef = NULL;
153 	ip->i_freeb = NULL;
154 	/*
155 	 * Now to take inode off the hash chain it was on
156 	 * (initially, or after an iflush, it is on a "hash chain"
157 	 * consisting entirely of itself, and pointed to by no-one,
158 	 * but that doesn't matter), and put it on the chain for
159 	 * its new (ino, dev) pair
160 	 */
161 	remque(ip);
162 	insque(ip, ih);
163 #ifdef QUOTA
164 	dqrele(ip->i_dquot);
165 #endif
166 	ip->i_dev = dev;
167 	ip->i_fs = fs;
168 	ip->i_number = ino;
169 	ip->i_flag = ILOCKED;
170 	ip->i_count++;
171 	ip->i_lastr = 0;
172 	bp = bread(dev, fsbtodb(fs, itod(fs, ino)), (int)fs->fs_bsize);
173 	/*
174 	 * Check I/O errors
175 	 */
176 	if ((bp->b_flags&B_ERROR) != 0) {
177 		brelse(bp);
178 		/*
179 		 * the inode doesn't contain anything useful, so it would
180 		 * be misleading to leave it on its hash chain.
181 		 * 'iput' will take care of putting it back on the free list.
182 		 */
183 		remque(ip);
184 		ip->i_forw = ip;
185 		ip->i_back = ip;
186 		/*
187 		 * we also loose its inumber, just in case (as iput
188 		 * doesn't do that any more) - but as it isn't on its
189 		 * hash chain, I doubt if this is really necessary .. kre
190 		 * (probably the two methods are interchangable)
191 		 */
192 		ip->i_number = 0;
193 #ifdef QUOTA
194 		ip->i_dquot = NODQUOT;
195 #endif
196 		iput(ip);
197 		return(NULL);
198 	}
199 	dp = bp->b_un.b_dino;
200 	dp += itoo(fs, ino);
201 	ip->i_ic = dp->di_ic;
202 	brelse(bp);
203 #ifdef QUOTA
204 	if (ip->i_mode == 0)
205 		ip->i_dquot = NODQUOT;
206 	else
207 		ip->i_dquot = inoquota(ip);
208 #endif
209 	return (ip);
210 }
211 
212 /*
213  * Decrement reference count of
214  * an inode structure.
215  * On the last reference,
216  * write the inode out and if necessary,
217  * truncate and deallocate the file.
218  */
219 iput(ip)
220 	register struct inode *ip;
221 {
222 
223 	if ((ip->i_flag & ILOCKED) == 0)
224 		panic("iput");
225 	iunlock(ip);
226 	irele(ip);
227 }
228 
229 irele(ip)
230 	register struct inode *ip;
231 {
232 	int mode;
233 
234 	if (ip->i_count == 1) {
235 		ip->i_flag |= ILOCKED;
236 		if (ip->i_nlink <= 0) {
237 			itrunc(ip, (u_long)0);
238 			mode = ip->i_mode;
239 			ip->i_mode = 0;
240 			ip->i_rdev = 0;
241 			ip->i_flag |= IUPD|ICHG;
242 			ifree(ip, ip->i_number, mode);
243 #ifdef QUOTA
244 			(void) chkiq(ip->i_dev, ip, ip->i_uid, 0);
245 			dqrele(ip->i_dquot);
246 			ip->i_dquot = NODQUOT;
247 #endif
248 		}
249 		IUPDAT(ip, &time, &time, 0);
250 		iunlock(ip);
251 		ip->i_flag = 0;
252 		/*
253 		 * Put the inode on the end of the free list.
254 		 * Possibly in some cases it would be better to
255 		 * put the inode at the head of the free list,
256 		 * (eg: where i_mode == 0 || i_number == 0)
257 		 * but I will think about that later .. kre
258 		 * (i_number is rarely 0 - only after an i/o error in iget,
259 		 * where i_mode == 0, the inode will probably be wanted
260 		 * again soon for an ialloc, so possibly we should keep it)
261 		 */
262 		if (ifreeh) {
263 			*ifreet = ip;
264 			ip->i_freeb = ifreet;
265 		} else {
266 			ifreeh = ip;
267 			ip->i_freeb = &ifreeh;
268 		}
269 		ip->i_freef = NULL;
270 		ifreet = &ip->i_freef;
271 	} else if (!(ip->i_flag & ILOCKED))
272 		ITIMES(ip, &time, &time);
273 	ip->i_count--;
274 }
275 
276 /*
277  * Check accessed and update flags on
278  * an inode structure.
279  * If any is on, update the inode
280  * with the current time.
281  * If waitfor is given, then must insure
282  * i/o order so wait for write to complete.
283  */
284 iupdat(ip, ta, tm, waitfor)
285 	register struct inode *ip;
286 	struct timeval *ta, *tm;
287 	int waitfor;
288 {
289 	register struct buf *bp;
290 	struct dinode *dp;
291 	register struct fs *fp;
292 
293 	fp = ip->i_fs;
294 	if ((ip->i_flag & (IUPD|IACC|ICHG|IMOD)) != 0) {
295 		if (fp->fs_ronly)
296 			return;
297 		bp = bread(ip->i_dev, fsbtodb(fp, itod(fp, ip->i_number)),
298 			(int)fp->fs_bsize);
299 		if (bp->b_flags & B_ERROR) {
300 			brelse(bp);
301 			return;
302 		}
303 		if (ip->i_flag&IACC)
304 			ip->i_atime = ta->tv_sec;
305 		if (ip->i_flag&IUPD)
306 			ip->i_mtime = tm->tv_sec;
307 		if (ip->i_flag&ICHG)
308 			ip->i_ctime = time.tv_sec;
309 		ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD);
310 		dp = bp->b_un.b_dino + itoo(fp, ip->i_number);
311 		dp->di_ic = ip->i_ic;
312 		if (waitfor)
313 			bwrite(bp);
314 		else
315 			bdwrite(bp);
316 	}
317 }
318 
319 #define	SINGLE	0	/* index of single indirect block */
320 #define	DOUBLE	1	/* index of double indirect block */
321 #define	TRIPLE	2	/* index of triple indirect block */
322 /*
323  * Truncate the inode ip to at most
324  * length size.  Free affected disk
325  * blocks -- the blocks of the file
326  * are removed in reverse order.
327  *
328  * NB: triple indirect blocks are untested.
329  */
330 itrunc(oip, length)
331 	struct inode *oip;
332 	u_long length;
333 {
334 	register i;
335 	register daddr_t lastblock;
336 	daddr_t bn, lastiblock[NIADDR];
337 	register struct fs *fs;
338 	register struct inode *ip;
339 	struct inode tip;
340 	long blocksreleased = 0, nblocks;
341 	long indirtrunc();
342 	int level;
343 
344 	if (oip->i_size <= length) {
345 		oip->i_flag |= ICHG|IUPD;
346 		iupdat(oip, &time, &time, 1);
347 		return;
348 	}
349 	/*
350 	 * Calculate index into inode's block list of
351 	 * last direct and indirect blocks (if any)
352 	 * which we want to keep.  Lastblock is -1 when
353 	 * the file is truncated to 0.
354 	 */
355 	fs = oip->i_fs;
356 	lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1;
357 	lastiblock[SINGLE] = lastblock - NDADDR;
358 	lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs);
359 	lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs);
360 	nblocks = btodb(fs->fs_bsize);
361 	/*
362 	 * Update size of file and block pointers
363 	 * on disk before we start freeing blocks.
364 	 * If we crash before free'ing blocks below,
365 	 * the blocks will be returned to the free list.
366 	 * lastiblock values are also normalized to -1
367 	 * for calls to indirtrunc below.
368 	 * (? fsck doesn't check validity of pointers in indirect blocks)
369 	 */
370 	tip = *oip;
371 	for (level = TRIPLE; level >= SINGLE; level--)
372 		if (lastiblock[level] < 0) {
373 			oip->i_ib[level] = 0;
374 			lastiblock[level] = -1;
375 		}
376 	for (i = NDADDR - 1; i > lastblock; i--)
377 		oip->i_db[i] = 0;
378 	oip->i_size = length;
379 	oip->i_flag |= ICHG|IUPD;
380 	iupdat(oip, &time, &time, 1);
381 	ip = &tip;
382 
383 	/*
384 	 * Indirect blocks first.
385 	 */
386 	for (level = TRIPLE; level >= SINGLE; level--) {
387 		bn = ip->i_ib[level];
388 		if (bn != 0) {
389 			blocksreleased +=
390 			    indirtrunc(ip, bn, lastiblock[level], level);
391 			if (lastiblock[level] < 0) {
392 				ip->i_ib[level] = 0;
393 				free(ip, bn, (off_t)fs->fs_bsize);
394 				blocksreleased += nblocks;
395 			}
396 		}
397 		if (lastiblock[level] >= 0)
398 			goto done;
399 	}
400 
401 	/*
402 	 * All whole direct blocks or frags.
403 	 */
404 	for (i = NDADDR - 1; i > lastblock; i--) {
405 		register int size;
406 
407 		bn = ip->i_db[i];
408 		if (bn == 0)
409 			continue;
410 		ip->i_db[i] = 0;
411 		size = (off_t)blksize(fs, ip, i);
412 		free(ip, bn, size);
413 		blocksreleased += btodb(size);
414 	}
415 	if (lastblock < 0)
416 		goto done;
417 
418 	/*
419 	 * Finally, look for a change in size of the
420 	 * last direct block; release any frags.
421 	 */
422 	bn = ip->i_db[lastblock];
423 	if (bn != 0) {
424 		int oldspace, newspace;
425 
426 		/*
427 		 * Calculate amount of space we're giving
428 		 * back as old block size minus new block size.
429 		 */
430 		oldspace = blksize(fs, ip, lastblock);
431 		ip->i_size = length;
432 		newspace = blksize(fs, ip, lastblock);
433 		if (newspace == 0)
434 			panic("itrunc: newspace");
435 		if (oldspace - newspace > 0) {
436 			/*
437 			 * Block number of space to be free'd is
438 			 * the old block # plus the number of frags
439 			 * required for the storage we're keeping.
440 			 */
441 			bn += numfrags(fs, newspace);
442 			free(ip, bn, oldspace - newspace);
443 			blocksreleased += btodb(oldspace - newspace);
444 		}
445 	}
446 done:
447 /* BEGIN PARANOIA */
448 	for (level = SINGLE; level <= TRIPLE; level++)
449 		if (ip->i_ib[level] != oip->i_ib[level])
450 			panic("itrunc1");
451 	for (i = 0; i < NDADDR; i++)
452 		if (ip->i_db[i] != oip->i_db[i])
453 			panic("itrunc2");
454 /* END PARANOIA */
455 	oip->i_blocks -= blocksreleased;
456 	if (oip->i_blocks < 0)			/* sanity */
457 		oip->i_blocks = 0;
458 	oip->i_flag |= ICHG;
459 #ifdef QUOTA
460 	(void) chkdq(oip, -blocksreleased, 0);
461 #endif
462 }
463 
464 /*
465  * Release blocks associated with the inode ip and
466  * stored in the indirect block bn.  Blocks are free'd
467  * in LIFO order up to (but not including) lastbn.  If
468  * level is greater than SINGLE, the block is an indirect
469  * block and recursive calls to indirtrunc must be used to
470  * cleanse other indirect blocks.
471  *
472  * NB: triple indirect blocks are untested.
473  */
474 long
475 indirtrunc(ip, bn, lastbn, level)
476 	register struct inode *ip;
477 	daddr_t bn, lastbn;
478 	int level;
479 {
480 	register int i;
481 	struct buf *bp, *copy;
482 	register daddr_t *bap;
483 	register struct fs *fs = ip->i_fs;
484 	daddr_t nb, last;
485 	long factor;
486 	int blocksreleased = 0, nblocks;
487 
488 	/*
489 	 * Calculate index in current block of last
490 	 * block to be kept.  -1 indicates the entire
491 	 * block so we need not calculate the index.
492 	 */
493 	factor = 1;
494 	for (i = SINGLE; i < level; i++)
495 		factor *= NINDIR(fs);
496 	last = lastbn;
497 	if (lastbn > 0)
498 		last /= factor;
499 	nblocks = btodb(fs->fs_bsize);
500 	/*
501 	 * Get buffer of block pointers, zero those
502 	 * entries corresponding to blocks to be free'd,
503 	 * and update on disk copy first.
504 	 */
505 	copy = geteblk((int)fs->fs_bsize);
506 	bp = bread(ip->i_dev, fsbtodb(fs, bn), (int)fs->fs_bsize);
507 	if (bp->b_flags&B_ERROR) {
508 		brelse(copy);
509 		brelse(bp);
510 		return (0);
511 	}
512 	bap = bp->b_un.b_daddr;
513 	bcopy((caddr_t)bap, (caddr_t)copy->b_un.b_daddr, (u_int)fs->fs_bsize);
514 	bzero((caddr_t)&bap[last + 1],
515 	  (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t));
516 	bwrite(bp);
517 	bp = copy, bap = bp->b_un.b_daddr;
518 
519 	/*
520 	 * Recursively free totally unused blocks.
521 	 */
522 	for (i = NINDIR(fs) - 1; i > last; i--) {
523 		nb = bap[i];
524 		if (nb == 0)
525 			continue;
526 		if (level > SINGLE)
527 			blocksreleased +=
528 			    indirtrunc(ip, nb, (daddr_t)-1, level - 1);
529 		free(ip, nb, (int)fs->fs_bsize);
530 		blocksreleased += nblocks;
531 	}
532 
533 	/*
534 	 * Recursively free last partial block.
535 	 */
536 	if (level > SINGLE && lastbn >= 0) {
537 		last = lastbn % factor;
538 		nb = bap[i];
539 		if (nb != 0)
540 			blocksreleased += indirtrunc(ip, nb, last, level - 1);
541 	}
542 	brelse(bp);
543 	return (blocksreleased);
544 }
545 
546 /*
547  * remove any inodes in the inode cache belonging to dev
548  *
549  * There should not be any active ones, return error if any are found
550  * (nb: this is a user error, not a system err)
551  *
552  * Also, count the references to dev by block devices - this really
553  * has nothing to do with the object of the procedure, but as we have
554  * to scan the inode table here anyway, we might as well get the
555  * extra benefit.
556  *
557  * this is called from sumount()/sys3.c when dev is being unmounted
558  */
559 #ifdef QUOTA
560 iflush(dev, iq)
561 	dev_t dev;
562 	struct inode *iq;
563 #else
564 iflush(dev)
565 	dev_t dev;
566 #endif
567 {
568 	register struct inode *ip;
569 	register open = 0;
570 
571 	for (ip = inode; ip < inodeNINODE; ip++) {
572 #ifdef QUOTA
573 		if (ip != iq && ip->i_dev == dev)
574 #else
575 		if (ip->i_dev == dev)
576 #endif
577 			if (ip->i_count)
578 				return(-1);
579 			else {
580 				remque(ip);
581 				ip->i_forw = ip;
582 				ip->i_back = ip;
583 				/*
584 				 * as i_count == 0, the inode was on the free
585 				 * list already, just leave it there, it will
586 				 * fall off the bottom eventually. We could
587 				 * perhaps move it to the head of the free
588 				 * list, but as umounts are done so
589 				 * infrequently, we would gain very little,
590 				 * while making the code bigger.
591 				 */
592 #ifdef QUOTA
593 				dqrele(ip->i_dquot);
594 				ip->i_dquot = NODQUOT;
595 #endif
596 			}
597 		else if (ip->i_count && (ip->i_mode&IFMT)==IFBLK &&
598 		    ip->i_rdev == dev)
599 			open++;
600 	}
601 	return (open);
602 }
603 
604 /*
605  * Lock an inode. If its already locked, set the WANT bit and sleep.
606  */
607 ilock(ip)
608 	register struct inode *ip;
609 {
610 
611 	ILOCK(ip);
612 }
613 
614 /*
615  * Unlock an inode.  If WANT bit is on, wakeup.
616  */
617 iunlock(ip)
618 	register struct inode *ip;
619 {
620 
621 	IUNLOCK(ip);
622 }
623