xref: /csrg-svn/sys/ufs/lfs/lfs_inode.c (revision 37736)
1 /*
2  * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms are permitted
6  * provided that the above copyright notice and this paragraph are
7  * duplicated in all such forms and that any documentation,
8  * advertising materials, and other materials related to such
9  * distribution and use acknowledge that the software was developed
10  * by the University of California, Berkeley.  The name of the
11  * University may not be used to endorse or promote products derived
12  * from this software without specific prior written permission.
13  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
14  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
15  * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
16  *
17  *	@(#)lfs_inode.c	7.6 (Berkeley) 05/09/89
18  */
19 
20 #include "param.h"
21 #include "systm.h"
22 #include "mount.h"
23 #include "user.h"
24 #include "file.h"
25 #include "buf.h"
26 #include "cmap.h"
27 #include "vnode.h"
28 #include "../ufs/inode.h"
29 #include "../ufs/fs.h"
30 #include "../ufs/ufsmount.h"
31 #ifdef QUOTA
32 #include "../ufs/quota.h"
33 #endif
34 #include "kernel.h"
35 #include "malloc.h"
36 
37 #define	INOHSZ	512
38 #if	((INOHSZ&(INOHSZ-1)) == 0)
39 #define	INOHASH(dev,ino)	(((dev)+(ino))&(INOHSZ-1))
40 #else
41 #define	INOHASH(dev,ino)	(((unsigned)((dev)+(ino)))%INOHSZ)
42 #endif
43 
44 #define INSFREE(ip) {\
45 	if (ifreeh) { \
46 		*ifreet = (ip); \
47 		(ip)->i_freeb = ifreet; \
48 	} else { \
49 		ifreeh = (ip); \
50 		(ip)->i_freeb = &ifreeh; \
51 	} \
52 	(ip)->i_freef = NULL; \
53 	ifreet = &(ip)->i_freef; \
54 }
55 
56 union ihead {				/* inode LRU cache, Chris Maltby */
57 	union  ihead *ih_head[2];
58 	struct inode *ih_chain[2];
59 } ihead[INOHSZ];
60 
61 struct inode *ifreeh, **ifreet, *bdevlisth;
62 
63 /*
64  * Initialize hash links for inodes
65  * and build inode free list.
66  */
67 ihinit()
68 {
69 	register int i;
70 	register struct inode *ip = inode;
71 	register union  ihead *ih = ihead;
72 
73 	for (i = INOHSZ; --i >= 0; ih++) {
74 		ih->ih_head[0] = ih;
75 		ih->ih_head[1] = ih;
76 	}
77 	ifreeh = ip;
78 	ifreet = &ip->i_freef;
79 	ip->i_freeb = &ifreeh;
80 	ip->i_forw = ip;
81 	ip->i_back = ip;
82 	ITOV(ip)->v_data = (qaddr_t)ip;
83 	for (i = ninode; --i > 0; ) {
84 		++ip;
85 		ip->i_forw = ip;
86 		ip->i_back = ip;
87 		ITOV(ip)->v_data = (qaddr_t)ip;
88 		*ifreet = ip;
89 		ip->i_freeb = ifreet;
90 		ifreet = &ip->i_freef;
91 	}
92 	ip->i_freef = NULL;
93 }
94 
95 /*
96  * Look up an vnode/inode by device,inumber.
97  * If it is in core (in the inode structure),
98  * honor the locking protocol.
99  * If it is not in core, read it in from the
100  * specified device.
101  * Callers must check for mount points!!
102  * In all cases, a pointer to a locked
103  * inode structure is returned.
104  */
105 iget(xp, ino, ipp)
106 	struct inode *xp;
107 	ino_t ino;
108 	struct inode **ipp;
109 {
110 	dev_t dev = xp->i_dev;
111 	struct mount *mntp = ITOV(xp)->v_mount;
112 	register struct fs *fs = VFSTOUFS(mntp)->um_fs;
113 	register struct inode *ip, *iq;
114 	register struct vnode *vp;
115 	struct inode *nip;
116 	struct buf *bp;
117 	struct dinode tdip, *dp;
118 	union  ihead *ih;
119 	int error;
120 
121 loop:
122 	ih = &ihead[INOHASH(dev, ino)];
123 	for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw)
124 		if (ino == ip->i_number && dev == ip->i_dev) {
125 			/*
126 			 * Following is essentially an inline expanded
127 			 * copy of igrab(), expanded inline for speed,
128 			 * and so that the test for a mounted on inode
129 			 * can be deferred until after we are sure that
130 			 * the inode isn't busy.
131 			 */
132 			if ((ip->i_flag&ILOCKED) != 0) {
133 				ip->i_flag |= IWANT;
134 				sleep((caddr_t)ip, PINOD);
135 				goto loop;
136 			}
137 			vp = ITOV(ip);
138 			if (vp->v_count == 0) {		/* ino on free list */
139 				if (iq = ip->i_freef)
140 					iq->i_freeb = ip->i_freeb;
141 				else
142 					ifreet = ip->i_freeb;
143 				*ip->i_freeb = iq;
144 				ip->i_freef = NULL;
145 				ip->i_freeb = NULL;
146 			}
147 			ip->i_flag |= ILOCKED;
148 			vp->v_count++;
149 			*ipp = ip;
150 			return(0);
151 		}
152 	if (error = getnewino(dev, ino, &nip)) {
153 		*ipp = 0;
154 		return (error);
155 	}
156 	ip = nip;
157 	/*
158 	 * Read in the disk contents for the inode.
159 	 */
160 	if (error = bread(VFSTOUFS(mntp)->um_devvp, fsbtodb(fs, itod(fs, ino)),
161 	    (int)fs->fs_bsize, &bp)) {
162 		/*
163 		 * The inode doesn't contain anything useful, so it would
164 		 * be misleading to leave it on its hash chain. Iput() will
165 		 * take care of putting it back on the free list. We also
166 		 * lose its inumber, just in case.
167 		 */
168 		remque(ip);
169 		ip->i_forw = ip;
170 		ip->i_back = ip;
171 		ip->i_number = 0;
172 		INSFREE(ip);
173 		ip->i_flag = 0;
174 		brelse(bp);
175 		*ipp = 0;
176 		return(error);
177 	}
178 	/*
179 	 * Check to see if the new inode represents a block device
180 	 * for which we already have an inode (either because of
181 	 * bdevvp() or because of a different inode representing
182 	 * the same block device). If such an alias exists, put the
183 	 * just allocated inode back on the free list, and replace
184 	 * the contents of the existing inode with the contents of
185 	 * the new inode.
186 	 */
187 	dp = bp->b_un.b_dino;
188 	dp += itoo(fs, ino);
189 	if ((dp->di_mode & IFMT) != IFBLK) {
190 		ip->i_ic = dp->di_ic;
191 		brelse(bp);
192 	} else {
193 again:
194 		for (iq = bdevlisth; iq; iq = iq->i_devlst) {
195 			if (dp->di_rdev != ITOV(iq)->v_rdev)
196 				continue;
197 			igrab(iq);
198 			if (dp->di_rdev != ITOV(iq)->v_rdev) {
199 				iput(iq);
200 				goto again;
201 			}
202 			/*
203 			 * Discard unneeded inode.
204 			 */
205 			remque(ip);
206 			ip->i_forw = ip;
207 			ip->i_back = ip;
208 			ip->i_number = 0;
209 			INSFREE(ip);
210 			ip->i_flag = 0;
211 			/*
212 			 * Reinitialize aliased inode.
213 			 * We must release the buffer that we just read
214 			 * before doing the iupdat() to avoid a possible
215 			 * deadlock with updating an inode in the same
216 			 * disk block.
217 			 */
218 			ip = iq;
219 			vp = ITOV(iq);
220 			tdip.di_ic = dp->di_ic;
221 			brelse(bp);
222 			error = iupdat(ip, &time, &time, 1);
223 			ip->i_ic = tdip.di_ic;
224 			remque(ip);
225 			insque(ip, ih);
226 			ip->i_dev = dev;
227 			ip->i_number = ino;
228 			if (ip->i_devvp) {
229 				vrele(ip->i_devvp);
230 				ip->i_devvp = 0;
231 			}
232 			cache_purge(vp);
233 			break;
234 		}
235 		if (iq == 0) {
236 			ip->i_ic = dp->di_ic;
237 			brelse(bp);
238 			ip->i_devlst = bdevlisth;
239 			bdevlisth = ip;
240 		}
241 	}
242 	/*
243 	 * Finish inode initialization.
244 	 */
245 	ip->i_fs = fs;
246 	ip->i_devvp = VFSTOUFS(mntp)->um_devvp;
247 	ip->i_devvp->v_count++;
248 	/*
249 	 * Initialize the associated vnode
250 	 */
251 	vp = ITOV(ip);
252 	vinit(vp, mntp, IFTOVT(ip->i_mode), &ufs_vnodeops);
253 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
254 		vp->v_rdev = ip->i_rdev;
255 		vp->v_op = &blk_vnodeops;
256 	}
257 	if (ino == ROOTINO)
258 		vp->v_flag |= VROOT;
259 #ifdef QUOTA
260 	if (ip->i_mode != 0)
261 		ip->i_dquot = inoquota(ip);
262 #endif
263 	*ipp = ip;
264 	return (0);
265 }
266 
267 /*
268  * Allocate a new inode.
269  *
270  * Put it onto its hash chain and lock it so that other requests for
271  * this inode will block if they arrive while we are sleeping waiting
272  * for old data structures to be purged or for the contents of the disk
273  * portion of this inode to be read.
274  */
275 getnewino(dev, ino, ipp)
276 	dev_t dev;
277 	ino_t ino;
278 	struct inode **ipp;
279 {
280 	union ihead *ih;
281 	register struct inode *ip, *iq;
282 	register struct vnode *vp;
283 
284 	/*
285 	 * Remove the next inode from the free list.
286 	 */
287 	if ((ip = ifreeh) == NULL) {
288 		tablefull("inode");
289 		*ipp = 0;
290 		return(ENFILE);
291 	}
292 	vp = ITOV(ip);
293 	if (vp->v_count)
294 		panic("free inode isn't");
295 	if (iq = ip->i_freef)
296 		iq->i_freeb = &ifreeh;
297 	ifreeh = iq;
298 	ip->i_freef = NULL;
299 	ip->i_freeb = NULL;
300 	/*
301 	 * Now to take inode off the hash chain it was on
302 	 * (initially, or after an iflush, it is on a "hash chain"
303 	 * consisting entirely of itself, and pointed to by no-one)
304 	 * and put it on the chain for its new (ino, dev) pair.
305 	 */
306 	remque(ip);
307 	ip->i_dev = dev;
308 	ip->i_number = ino;
309 	if (dev != NODEV) {
310 		ih = &ihead[INOHASH(dev, ino)];
311 		insque(ip, ih);
312 	}
313 	ip->i_flag = ILOCKED;
314 	ip->i_lastr = 0;
315 	/*
316 	 * Purge old data structures associated with the inode.
317 	 */
318 	cache_purge(vp);
319 	if (ip->i_devvp) {
320 		vrele(ip->i_devvp);
321 		ip->i_devvp = 0;
322 	}
323 #ifdef QUOTA
324 	dqrele(ip->i_dquot);
325 	ip->i_dquot = NODQUOT;
326 #endif
327 	if (vp->v_type == VBLK) {
328 		if (bdevlisth == ip) {
329 			bdevlisth = ip->i_devlst;
330 		} else {
331 			for (iq = bdevlisth; iq; iq = iq->i_devlst) {
332 				if (iq->i_devlst != ip)
333 					continue;
334 				iq->i_devlst = ip->i_devlst;
335 				break;
336 			}
337 			if (iq == NULL)
338 				panic("missing bdev");
339 		}
340 	}
341 	*ipp = ip;
342 	return (0);
343 }
344 
345 /*
346  * Convert a pointer to an inode into a reference to an inode.
347  *
348  * This is basically the internal piece of iget (after the
349  * inode pointer is located) but without the test for mounted
350  * filesystems.  It is caller's responsibility to check that
351  * the inode pointer is valid.
352  */
353 igrab(ip)
354 	register struct inode *ip;
355 {
356 	register struct vnode *vp = ITOV(ip);
357 
358 	while ((ip->i_flag&ILOCKED) != 0) {
359 		ip->i_flag |= IWANT;
360 		sleep((caddr_t)ip, PINOD);
361 	}
362 	if (vp->v_count == 0) {		/* ino on free list */
363 		register struct inode *iq;
364 
365 		if (iq = ip->i_freef)
366 			iq->i_freeb = ip->i_freeb;
367 		else
368 			ifreet = ip->i_freeb;
369 		*ip->i_freeb = iq;
370 		ip->i_freef = NULL;
371 		ip->i_freeb = NULL;
372 	}
373 	vp->v_count++;
374 	ip->i_flag |= ILOCKED;
375 }
376 
377 /*
378  * Create a vnode for a block device.
379  * Used for root filesystem, argdev, and swap areas.
380  */
381 bdevvp(dev, vpp)
382 	dev_t dev;
383 	struct vnode **vpp;
384 {
385 	register struct inode *ip;
386 	register struct vnode *vp;
387 	struct inode *nip;
388 	int error;
389 
390 	/*
391 	 * Check for the existence of an existing vnode.
392 	 */
393 again:
394 	for (ip = bdevlisth; ip; ip = ip->i_devlst) {
395 		vp = ITOV(ip);
396 		if (dev != vp->v_rdev)
397 			continue;
398 		igrab(ip);
399 		if (dev != vp->v_rdev) {
400 			iput(ip);
401 			goto again;
402 		}
403 		IUNLOCK(ip);
404 		*vpp = vp;
405 		return (0);
406 	}
407 	if (error = getnewino(NODEV, (ino_t)0, &nip)) {
408 		*vpp = 0;
409 		return (error);
410 	}
411 	ip = nip;
412 	ip->i_fs = 0;
413 	ip->i_devlst = bdevlisth;
414 	bdevlisth = ip;
415 	vp = ITOV(ip);
416 	vinit(vp, 0, VBLK, &blk_vnodeops);
417 	vp->v_rdev = dev;
418 	IUNLOCK(ip);
419 	*vpp = vp;
420 	return (0);
421 }
422 
423 /*
424  * Decrement reference count of
425  * an inode structure.
426  * On the last reference,
427  * write the inode out and if necessary,
428  * truncate and deallocate the file.
429  */
430 iput(ip)
431 	register struct inode *ip;
432 {
433 
434 	if ((ip->i_flag & ILOCKED) == 0)
435 		panic("iput");
436 	IUNLOCK(ip);
437 	vrele(ITOV(ip));
438 }
439 
440 
441 ufs_inactive(vp)
442 	struct vnode *vp;
443 {
444 	register struct inode *ip = VTOI(vp);
445 	int mode, error;
446 
447 	if (ITOV(ip)->v_count != 0)
448 		panic("ufs_inactive: not inactive");
449 	ip->i_flag |= ILOCKED;
450 	if (ip->i_nlink <= 0 && (ITOV(ip)->v_mount->m_flag&M_RDONLY) == 0) {
451 		error = itrunc(ip, (u_long)0);
452 		mode = ip->i_mode;
453 		ip->i_mode = 0;
454 		ip->i_rdev = 0;
455 		ip->i_flag |= IUPD|ICHG;
456 		ifree(ip, ip->i_number, mode);
457 #ifdef QUOTA
458 		(void) chkiq(ip->i_dev, ip, ip->i_uid, 0);
459 		dqrele(ip->i_dquot);
460 		ip->i_dquot = NODQUOT;
461 #endif
462 	}
463 	IUPDAT(ip, &time, &time, 0);
464 	IUNLOCK(ip);
465 	ip->i_flag = 0;
466 	/*
467 	 * Put the inode on the end of the free list.
468 	 * Possibly in some cases it would be better to
469 	 * put the inode at the head of the free list,
470 	 * (eg: where i_mode == 0 || i_number == 0).
471 	 */
472 	INSFREE(ip);
473 	return (error);
474 }
475 
476 /*
477  * Check accessed and update flags on
478  * an inode structure.
479  * If any is on, update the inode
480  * with the current time.
481  * If waitfor is given, then must insure
482  * i/o order so wait for write to complete.
483  */
484 iupdat(ip, ta, tm, waitfor)
485 	register struct inode *ip;
486 	struct timeval *ta, *tm;
487 	int waitfor;
488 {
489 	struct buf *bp;
490 	struct vnode *vp = ITOV(ip);
491 	struct dinode *dp;
492 	register struct fs *fs;
493 	int error;
494 
495 	fs = ip->i_fs;
496 	if ((ip->i_flag & (IUPD|IACC|ICHG|IMOD)) == 0)
497 		return (0);
498 	if (vp->v_mount->m_flag & M_RDONLY)
499 		return (0);
500 	error = bread(ip->i_devvp, fsbtodb(fs, itod(fs, ip->i_number)),
501 		(int)fs->fs_bsize, &bp);
502 	if (error) {
503 		brelse(bp);
504 		return (error);
505 	}
506 	if (ip->i_flag&IACC)
507 		ip->i_atime = ta->tv_sec;
508 	if (ip->i_flag&IUPD)
509 		ip->i_mtime = tm->tv_sec;
510 	if (ip->i_flag&ICHG)
511 		ip->i_ctime = time.tv_sec;
512 	ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD);
513 	dp = bp->b_un.b_dino + itoo(fs, ip->i_number);
514 	dp->di_ic = ip->i_ic;
515 	if (waitfor) {
516 		return (bwrite(bp));
517 	} else {
518 		bdwrite(bp);
519 		return (0);
520 	}
521 }
522 
523 #define	SINGLE	0	/* index of single indirect block */
524 #define	DOUBLE	1	/* index of double indirect block */
525 #define	TRIPLE	2	/* index of triple indirect block */
526 /*
527  * Truncate the inode ip to at most
528  * length size.  Free affected disk
529  * blocks -- the blocks of the file
530  * are removed in reverse order.
531  *
532  * NB: triple indirect blocks are untested.
533  */
534 itrunc(oip, length)
535 	register struct inode *oip;
536 	u_long length;
537 {
538 	register daddr_t lastblock;
539 	daddr_t bn, lbn, lastiblock[NIADDR];
540 	register struct fs *fs;
541 	register struct inode *ip;
542 	struct buf *bp;
543 	int offset, osize, size, level;
544 	long count, nblocks, blocksreleased = 0;
545 	register int i;
546 	int error, allerror = 0;
547 	struct inode tip;
548 
549 	if (oip->i_size <= length) {
550 		oip->i_flag |= ICHG|IUPD;
551 		error = iupdat(oip, &time, &time, 1);
552 		return (error);
553 	}
554 	/*
555 	 * Calculate index into inode's block list of
556 	 * last direct and indirect blocks (if any)
557 	 * which we want to keep.  Lastblock is -1 when
558 	 * the file is truncated to 0.
559 	 */
560 	fs = oip->i_fs;
561 	lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1;
562 	lastiblock[SINGLE] = lastblock - NDADDR;
563 	lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs);
564 	lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs);
565 	nblocks = btodb(fs->fs_bsize);
566 	/*
567 	 * Update the size of the file. If the file is not being
568 	 * truncated to a block boundry, the contents of the
569 	 * partial block following the end of the file must be
570 	 * zero'ed in case it ever become accessable again because
571 	 * of subsequent file growth.
572 	 */
573 	osize = oip->i_size;
574 	offset = blkoff(fs, length);
575 	if (offset == 0) {
576 		oip->i_size = length;
577 	} else {
578 		lbn = lblkno(fs, length);
579 		error = balloc(oip, lbn, offset, &bn, B_CLRBUF);
580 		if (error)
581 			return (error);
582 		if ((long)bn < 0)
583 			panic("itrunc: hole");
584 		oip->i_size = length;
585 		size = blksize(fs, oip, lbn);
586 		count = howmany(size, CLBYTES);
587 		for (i = 0; i < count; i++)
588 			munhash(oip->i_devvp, bn + i * CLBYTES / DEV_BSIZE);
589 		error = bread(oip->i_devvp, bn, size, &bp);
590 		if (error) {
591 			oip->i_size = osize;
592 			brelse(bp);
593 			return (error);
594 		}
595 		bzero(bp->b_un.b_addr + offset, (unsigned)(size - offset));
596 		bdwrite(bp);
597 	}
598 	/*
599 	 * Update file and block pointers
600 	 * on disk before we start freeing blocks.
601 	 * If we crash before free'ing blocks below,
602 	 * the blocks will be returned to the free list.
603 	 * lastiblock values are also normalized to -1
604 	 * for calls to indirtrunc below.
605 	 */
606 	tip = *oip;
607 	tip.i_size = osize;
608 	for (level = TRIPLE; level >= SINGLE; level--)
609 		if (lastiblock[level] < 0) {
610 			oip->i_ib[level] = 0;
611 			lastiblock[level] = -1;
612 		}
613 	for (i = NDADDR - 1; i > lastblock; i--)
614 		oip->i_db[i] = 0;
615 	oip->i_flag |= ICHG|IUPD;
616 	allerror = syncip(oip);
617 
618 	/*
619 	 * Indirect blocks first.
620 	 */
621 	ip = &tip;
622 	for (level = TRIPLE; level >= SINGLE; level--) {
623 		bn = ip->i_ib[level];
624 		if (bn != 0) {
625 			error = indirtrunc(ip, bn, lastiblock[level], level,
626 				&count);
627 			if (error)
628 				allerror = error;
629 			blocksreleased += count;
630 			if (lastiblock[level] < 0) {
631 				ip->i_ib[level] = 0;
632 				blkfree(ip, bn, (off_t)fs->fs_bsize);
633 				blocksreleased += nblocks;
634 			}
635 		}
636 		if (lastiblock[level] >= 0)
637 			goto done;
638 	}
639 
640 	/*
641 	 * All whole direct blocks or frags.
642 	 */
643 	for (i = NDADDR - 1; i > lastblock; i--) {
644 		register off_t bsize;
645 
646 		bn = ip->i_db[i];
647 		if (bn == 0)
648 			continue;
649 		ip->i_db[i] = 0;
650 		bsize = (off_t)blksize(fs, ip, i);
651 		blkfree(ip, bn, bsize);
652 		blocksreleased += btodb(bsize);
653 	}
654 	if (lastblock < 0)
655 		goto done;
656 
657 	/*
658 	 * Finally, look for a change in size of the
659 	 * last direct block; release any frags.
660 	 */
661 	bn = ip->i_db[lastblock];
662 	if (bn != 0) {
663 		off_t oldspace, newspace;
664 
665 		/*
666 		 * Calculate amount of space we're giving
667 		 * back as old block size minus new block size.
668 		 */
669 		oldspace = blksize(fs, ip, lastblock);
670 		ip->i_size = length;
671 		newspace = blksize(fs, ip, lastblock);
672 		if (newspace == 0)
673 			panic("itrunc: newspace");
674 		if (oldspace - newspace > 0) {
675 			/*
676 			 * Block number of space to be free'd is
677 			 * the old block # plus the number of frags
678 			 * required for the storage we're keeping.
679 			 */
680 			bn += numfrags(fs, newspace);
681 			blkfree(ip, bn, oldspace - newspace);
682 			blocksreleased += btodb(oldspace - newspace);
683 		}
684 	}
685 done:
686 /* BEGIN PARANOIA */
687 	for (level = SINGLE; level <= TRIPLE; level++)
688 		if (ip->i_ib[level] != oip->i_ib[level])
689 			panic("itrunc1");
690 	for (i = 0; i < NDADDR; i++)
691 		if (ip->i_db[i] != oip->i_db[i])
692 			panic("itrunc2");
693 /* END PARANOIA */
694 	oip->i_blocks -= blocksreleased;
695 	if (oip->i_blocks < 0)			/* sanity */
696 		oip->i_blocks = 0;
697 	oip->i_flag |= ICHG;
698 #ifdef QUOTA
699 	(void) chkdq(oip, -blocksreleased, 0);
700 #endif
701 	return (allerror);
702 }
703 
704 /*
705  * Release blocks associated with the inode ip and
706  * stored in the indirect block bn.  Blocks are free'd
707  * in LIFO order up to (but not including) lastbn.  If
708  * level is greater than SINGLE, the block is an indirect
709  * block and recursive calls to indirtrunc must be used to
710  * cleanse other indirect blocks.
711  *
712  * NB: triple indirect blocks are untested.
713  */
714 indirtrunc(ip, bn, lastbn, level, countp)
715 	register struct inode *ip;
716 	daddr_t bn, lastbn;
717 	int level;
718 	long *countp;
719 {
720 	register int i;
721 	struct buf *bp;
722 	register struct fs *fs = ip->i_fs;
723 	register daddr_t *bap;
724 	daddr_t *copy, nb, last;
725 	long blkcount, factor;
726 	int nblocks, blocksreleased = 0;
727 	int error, allerror = 0;
728 
729 	/*
730 	 * Calculate index in current block of last
731 	 * block to be kept.  -1 indicates the entire
732 	 * block so we need not calculate the index.
733 	 */
734 	factor = 1;
735 	for (i = SINGLE; i < level; i++)
736 		factor *= NINDIR(fs);
737 	last = lastbn;
738 	if (lastbn > 0)
739 		last /= factor;
740 	nblocks = btodb(fs->fs_bsize);
741 	/*
742 	 * Get buffer of block pointers, zero those
743 	 * entries corresponding to blocks to be free'd,
744 	 * and update on disk copy first.
745 	 */
746 	error = bread(ip->i_devvp, fsbtodb(fs, bn), (int)fs->fs_bsize, &bp);
747 	if (error) {
748 		brelse(bp);
749 		*countp = 0;
750 		return (error);
751 	}
752 	bap = bp->b_un.b_daddr;
753 	MALLOC(copy, daddr_t *, fs->fs_bsize, M_TEMP, M_WAITOK);
754 	bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->fs_bsize);
755 	bzero((caddr_t)&bap[last + 1],
756 	  (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t));
757 	error = bwrite(bp);
758 	if (error)
759 		allerror = error;
760 	bap = copy;
761 
762 	/*
763 	 * Recursively free totally unused blocks.
764 	 */
765 	for (i = NINDIR(fs) - 1; i > last; i--) {
766 		nb = bap[i];
767 		if (nb == 0)
768 			continue;
769 		if (level > SINGLE) {
770 			error = indirtrunc(ip, nb, (daddr_t)-1, level - 1,
771 				&blkcount);
772 			if (error)
773 				allerror = error;
774 			blocksreleased += blkcount;
775 		}
776 		blkfree(ip, nb, (off_t)fs->fs_bsize);
777 		blocksreleased += nblocks;
778 	}
779 
780 	/*
781 	 * Recursively free last partial block.
782 	 */
783 	if (level > SINGLE && lastbn >= 0) {
784 		last = lastbn % factor;
785 		nb = bap[i];
786 		if (nb != 0) {
787 			error = indirtrunc(ip, nb, last, level - 1, &blkcount);
788 			if (error)
789 				allerror = error;
790 			blocksreleased += blkcount;
791 		}
792 	}
793 	FREE(copy, M_TEMP);
794 	*countp = blocksreleased;
795 	return (allerror);
796 }
797 
798 /*
799  * Remove any inodes in the inode cache belonging to dev.
800  *
801  * There should not be any active ones, return error if any are found
802  * (nb: this is a user error, not a system err).
803  */
804 #ifdef QUOTA
805 iflush(dev, iq)
806 	dev_t dev;
807 	struct inode *iq;
808 #else
809 iflush(dev)
810 	dev_t dev;
811 #endif
812 {
813 	register struct inode *ip;
814 
815 	for (ip = inode; ip < inodeNINODE; ip++) {
816 #ifdef QUOTA
817 		if (ip != iq && ip->i_dev == dev)
818 #else
819 		if (ip->i_dev == dev)
820 #endif
821 			if (ITOV(ip)->v_count)
822 				return (EBUSY);
823 			else {
824 				remque(ip);
825 				ip->i_forw = ip;
826 				ip->i_back = ip;
827 				/*
828 				 * as v_count == 0, the inode was on the free
829 				 * list already, just leave it there, it will
830 				 * fall off the bottom eventually. We could
831 				 * perhaps move it to the head of the free
832 				 * list, but as umounts are done so
833 				 * infrequently, we would gain very little,
834 				 * while making the code bigger.
835 				 */
836 #ifdef QUOTA
837 				dqrele(ip->i_dquot);
838 				ip->i_dquot = NODQUOT;
839 #endif
840 				if (ip->i_devvp) {
841 					vrele(ip->i_devvp);
842 					ip->i_devvp = 0;
843 				}
844 			}
845 	}
846 	return (0);
847 }
848 
849 /*
850  * Lock an inode. If its already locked, set the WANT bit and sleep.
851  */
852 ilock(ip)
853 	register struct inode *ip;
854 {
855 
856 	while (ip->i_flag & ILOCKED) {
857 		ip->i_flag |= IWANT;
858 		(void) sleep((caddr_t)ip, PINOD);
859 	}
860 	ip->i_flag |= ILOCKED;
861 }
862 
863 /*
864  * Unlock an inode.  If WANT bit is on, wakeup.
865  */
866 iunlock(ip)
867 	register struct inode *ip;
868 {
869 
870 	if ((ip->i_flag & ILOCKED) == 0)
871 		printf("unlocking unlocked inode %d on dev 0x%x\n",
872 			ip->i_number, ip->i_dev);
873 	ip->i_flag &= ~ILOCKED;
874 	if (ip->i_flag&IWANT) {
875 		ip->i_flag &= ~IWANT;
876 		wakeup((caddr_t)ip);
877 	}
878 }
879 
880 /*
881  * Check mode permission on inode pointer. Mode is READ, WRITE or EXEC.
882  * The mode is shifted to select the owner/group/other fields. The
883  * super user is granted all permissions.
884  *
885  * NB: Called from vnode op table. It seems this could all be done
886  * using vattr's but...
887  */
888 iaccess(ip, mode, cred)
889 	register struct inode *ip;
890 	register int mode;
891 	struct ucred *cred;
892 {
893 	register gid_t *gp;
894 	register struct vnode *vp = ITOV(ip);
895 	int i;
896 
897 	/*
898 	 * If you're the super-user,
899 	 * you always get access.
900 	 */
901 	if (cred->cr_uid == 0)
902 		return (0);
903 	/*
904 	 * Access check is based on only one of owner, group, public.
905 	 * If not owner, then check group. If not a member of the
906 	 * group, then check public access.
907 	 */
908 	if (cred->cr_uid != ip->i_uid) {
909 		mode >>= 3;
910 		gp = cred->cr_groups;
911 		for (i = 0; i < cred->cr_ngroups; i++, gp++)
912 			if (ip->i_gid == *gp)
913 				goto found;
914 		mode >>= 3;
915 found:
916 		;
917 	}
918 	if ((ip->i_mode & mode) != 0)
919 		return (0);
920 	return (EACCES);
921 }
922