xref: /csrg-svn/sys/ufs/lfs/lfs_inode.c (revision 38226)
1 /*
2  * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms are permitted
6  * provided that the above copyright notice and this paragraph are
7  * duplicated in all such forms and that any documentation,
8  * advertising materials, and other materials related to such
9  * distribution and use acknowledge that the software was developed
10  * by the University of California, Berkeley.  The name of the
11  * University may not be used to endorse or promote products derived
12  * from this software without specific prior written permission.
13  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
14  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
15  * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
16  *
17  *	@(#)lfs_inode.c	7.7 (Berkeley) 06/06/89
18  */
19 
20 #include "param.h"
21 #include "systm.h"
22 #include "mount.h"
23 #include "user.h"
24 #include "file.h"
25 #include "buf.h"
26 #include "cmap.h"
27 #include "vnode.h"
28 #include "../ufs/inode.h"
29 #include "../ufs/fs.h"
30 #include "../ufs/ufsmount.h"
31 #ifdef QUOTA
32 #include "../ufs/quota.h"
33 #endif
34 #include "kernel.h"
35 #include "malloc.h"
36 
37 #define	INOHSZ	512
38 #if	((INOHSZ&(INOHSZ-1)) == 0)
39 #define	INOHASH(dev,ino)	(((dev)+(ino))&(INOHSZ-1))
40 #else
41 #define	INOHASH(dev,ino)	(((unsigned)((dev)+(ino)))%INOHSZ)
42 #endif
43 
44 #define INSFREE(ip) {\
45 	if (ifreeh) { \
46 		*ifreet = (ip); \
47 		(ip)->i_freeb = ifreet; \
48 	} else { \
49 		ifreeh = (ip); \
50 		(ip)->i_freeb = &ifreeh; \
51 	} \
52 	(ip)->i_freef = NULL; \
53 	ifreet = &(ip)->i_freef; \
54 }
55 
56 union ihead {				/* inode LRU cache, Chris Maltby */
57 	union  ihead *ih_head[2];
58 	struct inode *ih_chain[2];
59 } ihead[INOHSZ];
60 
61 struct inode *ifreeh, **ifreet, *bdevlisth;
62 
63 /*
64  * Initialize hash links for inodes
65  * and build inode free list.
66  */
67 ihinit()
68 {
69 	register int i;
70 	register struct inode *ip = inode;
71 	register union  ihead *ih = ihead;
72 
73 	for (i = INOHSZ; --i >= 0; ih++) {
74 		ih->ih_head[0] = ih;
75 		ih->ih_head[1] = ih;
76 	}
77 	ifreeh = ip;
78 	ifreet = &ip->i_freef;
79 	ip->i_freeb = &ifreeh;
80 	ip->i_forw = ip;
81 	ip->i_back = ip;
82 	ITOV(ip)->v_data = (qaddr_t)ip;
83 	for (i = ninode; --i > 0; ) {
84 		++ip;
85 		ip->i_forw = ip;
86 		ip->i_back = ip;
87 		ITOV(ip)->v_data = (qaddr_t)ip;
88 		*ifreet = ip;
89 		ip->i_freeb = ifreet;
90 		ifreet = &ip->i_freef;
91 	}
92 	ip->i_freef = NULL;
93 }
94 
95 /*
96  * Look up an vnode/inode by device,inumber.
97  * If it is in core (in the inode structure),
98  * honor the locking protocol.
99  * If it is not in core, read it in from the
100  * specified device.
101  * Callers must check for mount points!!
102  * In all cases, a pointer to a locked
103  * inode structure is returned.
104  */
105 iget(xp, ino, ipp)
106 	struct inode *xp;
107 	ino_t ino;
108 	struct inode **ipp;
109 {
110 	dev_t dev = xp->i_dev;
111 	struct mount *mntp = ITOV(xp)->v_mount;
112 	register struct fs *fs = VFSTOUFS(mntp)->um_fs;
113 	register struct inode *ip, *iq;
114 	register struct vnode *vp;
115 	struct inode *nip;
116 	struct buf *bp;
117 	struct dinode tdip, *dp;
118 	union  ihead *ih;
119 	int error;
120 
121 loop:
122 	ih = &ihead[INOHASH(dev, ino)];
123 	for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw)
124 		if (ino == ip->i_number && dev == ip->i_dev) {
125 			/*
126 			 * Following is essentially an inline expanded
127 			 * copy of igrab(), expanded inline for speed,
128 			 * and so that the test for a mounted on inode
129 			 * can be deferred until after we are sure that
130 			 * the inode isn't busy.
131 			 */
132 			if ((ip->i_flag&ILOCKED) != 0) {
133 				ip->i_flag |= IWANT;
134 				sleep((caddr_t)ip, PINOD);
135 				goto loop;
136 			}
137 			vp = ITOV(ip);
138 			if (vp->v_count == 0) {		/* ino on free list */
139 				if (iq = ip->i_freef)
140 					iq->i_freeb = ip->i_freeb;
141 				else
142 					ifreet = ip->i_freeb;
143 				*ip->i_freeb = iq;
144 				ip->i_freef = NULL;
145 				ip->i_freeb = NULL;
146 			}
147 			ILOCK(ip);
148 			vp->v_count++;
149 			*ipp = ip;
150 			return(0);
151 		}
152 	if (error = getnewino(dev, ino, &nip)) {
153 		*ipp = 0;
154 		return (error);
155 	}
156 	ip = nip;
157 	/*
158 	 * Read in the disk contents for the inode.
159 	 */
160 	if (error = bread(VFSTOUFS(mntp)->um_devvp, fsbtodb(fs, itod(fs, ino)),
161 	    (int)fs->fs_bsize, &bp)) {
162 		/*
163 		 * The inode doesn't contain anything useful, so it would
164 		 * be misleading to leave it on its hash chain. Iput() will
165 		 * take care of putting it back on the free list. We also
166 		 * lose its inumber, just in case.
167 		 */
168 		remque(ip);
169 		ip->i_forw = ip;
170 		ip->i_back = ip;
171 		ip->i_number = 0;
172 		INSFREE(ip);
173 		iunlock(ip);
174 		ip->i_flag = 0;
175 		brelse(bp);
176 		*ipp = 0;
177 		return(error);
178 	}
179 	/*
180 	 * Check to see if the new inode represents a block device
181 	 * for which we already have an inode (either because of
182 	 * bdevvp() or because of a different inode representing
183 	 * the same block device). If such an alias exists, put the
184 	 * just allocated inode back on the free list, and replace
185 	 * the contents of the existing inode with the contents of
186 	 * the new inode.
187 	 */
188 	dp = bp->b_un.b_dino;
189 	dp += itoo(fs, ino);
190 	if ((dp->di_mode & IFMT) != IFBLK) {
191 		ip->i_ic = dp->di_ic;
192 		brelse(bp);
193 	} else {
194 again:
195 		for (iq = bdevlisth; iq; iq = iq->i_devlst) {
196 			if (dp->di_rdev != ITOV(iq)->v_rdev)
197 				continue;
198 			igrab(iq);
199 			if (dp->di_rdev != ITOV(iq)->v_rdev) {
200 				iput(iq);
201 				goto again;
202 			}
203 			/*
204 			 * Discard unneeded inode.
205 			 */
206 			remque(ip);
207 			ip->i_forw = ip;
208 			ip->i_back = ip;
209 			ip->i_number = 0;
210 			INSFREE(ip);
211 			iunlock(ip);
212 			ip->i_flag = 0;
213 			/*
214 			 * Reinitialize aliased inode.
215 			 * We must release the buffer that we just read
216 			 * before doing the iupdat() to avoid a possible
217 			 * deadlock with updating an inode in the same
218 			 * disk block.
219 			 */
220 			ip = iq;
221 			vp = ITOV(iq);
222 			tdip.di_ic = dp->di_ic;
223 			brelse(bp);
224 			error = iupdat(ip, &time, &time, 1);
225 			ip->i_ic = tdip.di_ic;
226 			remque(ip);
227 			insque(ip, ih);
228 			ip->i_dev = dev;
229 			ip->i_number = ino;
230 			if (ip->i_devvp) {
231 				vrele(ip->i_devvp);
232 				ip->i_devvp = 0;
233 			}
234 			cache_purge(vp);
235 			break;
236 		}
237 		if (iq == 0) {
238 			ip->i_ic = dp->di_ic;
239 			brelse(bp);
240 			ip->i_devlst = bdevlisth;
241 			bdevlisth = ip;
242 		}
243 	}
244 	/*
245 	 * Finish inode initialization.
246 	 */
247 	ip->i_fs = fs;
248 	ip->i_devvp = VFSTOUFS(mntp)->um_devvp;
249 	ip->i_devvp->v_count++;
250 	/*
251 	 * Initialize the associated vnode
252 	 */
253 	vp = ITOV(ip);
254 	vinit(vp, mntp, IFTOVT(ip->i_mode), &ufs_vnodeops);
255 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
256 		vp->v_rdev = ip->i_rdev;
257 		vp->v_op = &blk_vnodeops;
258 	}
259 	if (ino == ROOTINO)
260 		vp->v_flag |= VROOT;
261 #ifdef QUOTA
262 	if (ip->i_mode != 0)
263 		ip->i_dquot = inoquota(ip);
264 #endif
265 	*ipp = ip;
266 	return (0);
267 }
268 
269 /*
270  * Allocate a new inode.
271  *
272  * Put it onto its hash chain and lock it so that other requests for
273  * this inode will block if they arrive while we are sleeping waiting
274  * for old data structures to be purged or for the contents of the disk
275  * portion of this inode to be read.
276  */
277 getnewino(dev, ino, ipp)
278 	dev_t dev;
279 	ino_t ino;
280 	struct inode **ipp;
281 {
282 	union ihead *ih;
283 	register struct inode *ip, *iq;
284 	register struct vnode *vp;
285 
286 	/*
287 	 * Remove the next inode from the free list.
288 	 */
289 	if ((ip = ifreeh) == NULL) {
290 		tablefull("inode");
291 		*ipp = 0;
292 		return(ENFILE);
293 	}
294 	vp = ITOV(ip);
295 	if (vp->v_count)
296 		panic("free inode isn't");
297 	if (iq = ip->i_freef)
298 		iq->i_freeb = &ifreeh;
299 	ifreeh = iq;
300 	ip->i_freef = NULL;
301 	ip->i_freeb = NULL;
302 	/*
303 	 * Now to take inode off the hash chain it was on
304 	 * (initially, or after an iflush, it is on a "hash chain"
305 	 * consisting entirely of itself, and pointed to by no-one)
306 	 * and put it on the chain for its new (ino, dev) pair.
307 	 */
308 	remque(ip);
309 	ip->i_dev = dev;
310 	ip->i_number = ino;
311 	if (dev != NODEV) {
312 		ih = &ihead[INOHASH(dev, ino)];
313 		insque(ip, ih);
314 	}
315 	ip->i_flag = 0;
316 	ILOCK(ip);
317 	ip->i_lastr = 0;
318 	/*
319 	 * Purge old data structures associated with the inode.
320 	 */
321 	cache_purge(vp);
322 	if (ip->i_devvp) {
323 		vrele(ip->i_devvp);
324 		ip->i_devvp = 0;
325 	}
326 #ifdef QUOTA
327 	dqrele(ip->i_dquot);
328 	ip->i_dquot = NODQUOT;
329 #endif
330 	if (vp->v_type == VBLK) {
331 		if (bdevlisth == ip) {
332 			bdevlisth = ip->i_devlst;
333 		} else {
334 			for (iq = bdevlisth; iq; iq = iq->i_devlst) {
335 				if (iq->i_devlst != ip)
336 					continue;
337 				iq->i_devlst = ip->i_devlst;
338 				break;
339 			}
340 			if (iq == NULL)
341 				panic("missing bdev");
342 		}
343 	}
344 	*ipp = ip;
345 	return (0);
346 }
347 
348 /*
349  * Convert a pointer to an inode into a reference to an inode.
350  *
351  * This is basically the internal piece of iget (after the
352  * inode pointer is located) but without the test for mounted
353  * filesystems.  It is caller's responsibility to check that
354  * the inode pointer is valid.
355  */
356 igrab(ip)
357 	register struct inode *ip;
358 {
359 	register struct vnode *vp = ITOV(ip);
360 
361 	while ((ip->i_flag&ILOCKED) != 0) {
362 		ip->i_flag |= IWANT;
363 		sleep((caddr_t)ip, PINOD);
364 	}
365 	if (vp->v_count == 0) {		/* ino on free list */
366 		register struct inode *iq;
367 
368 		if (iq = ip->i_freef)
369 			iq->i_freeb = ip->i_freeb;
370 		else
371 			ifreet = ip->i_freeb;
372 		*ip->i_freeb = iq;
373 		ip->i_freef = NULL;
374 		ip->i_freeb = NULL;
375 	}
376 	vp->v_count++;
377 	ILOCK(ip);
378 }
379 
380 /*
381  * Create a vnode for a block device.
382  * Used for root filesystem, argdev, and swap areas.
383  */
384 bdevvp(dev, vpp)
385 	dev_t dev;
386 	struct vnode **vpp;
387 {
388 	register struct inode *ip;
389 	register struct vnode *vp;
390 	struct inode *nip;
391 	int error;
392 
393 	/*
394 	 * Check for the existence of an existing vnode.
395 	 */
396 again:
397 	for (ip = bdevlisth; ip; ip = ip->i_devlst) {
398 		vp = ITOV(ip);
399 		if (dev != vp->v_rdev)
400 			continue;
401 		igrab(ip);
402 		if (dev != vp->v_rdev) {
403 			iput(ip);
404 			goto again;
405 		}
406 		IUNLOCK(ip);
407 		*vpp = vp;
408 		return (0);
409 	}
410 	if (error = getnewino(NODEV, (ino_t)0, &nip)) {
411 		*vpp = 0;
412 		return (error);
413 	}
414 	ip = nip;
415 	ip->i_fs = 0;
416 	ip->i_devlst = bdevlisth;
417 	bdevlisth = ip;
418 	vp = ITOV(ip);
419 	vinit(vp, 0, VBLK, &blk_vnodeops);
420 	vp->v_rdev = dev;
421 	IUNLOCK(ip);
422 	*vpp = vp;
423 	return (0);
424 }
425 
426 /*
427  * Decrement reference count of
428  * an inode structure.
429  * On the last reference,
430  * write the inode out and if necessary,
431  * truncate and deallocate the file.
432  */
433 iput(ip)
434 	register struct inode *ip;
435 {
436 
437 	if ((ip->i_flag & ILOCKED) == 0)
438 		panic("iput");
439 	IUNLOCK(ip);
440 	vrele(ITOV(ip));
441 }
442 
443 
444 ufs_inactive(vp)
445 	struct vnode *vp;
446 {
447 	register struct inode *ip = VTOI(vp);
448 	int mode, error;
449 
450 	if (ITOV(ip)->v_count != 0)
451 		panic("ufs_inactive: not inactive");
452 	ILOCK(ip);
453 	if (ip->i_nlink <= 0 && (ITOV(ip)->v_mount->m_flag&M_RDONLY) == 0) {
454 		error = itrunc(ip, (u_long)0);
455 		mode = ip->i_mode;
456 		ip->i_mode = 0;
457 		ip->i_rdev = 0;
458 		ip->i_flag |= IUPD|ICHG;
459 		ifree(ip, ip->i_number, mode);
460 #ifdef QUOTA
461 		(void) chkiq(ip->i_dev, ip, ip->i_uid, 0);
462 		dqrele(ip->i_dquot);
463 		ip->i_dquot = NODQUOT;
464 #endif
465 	}
466 	IUPDAT(ip, &time, &time, 0);
467 	IUNLOCK(ip);
468 	ip->i_flag = 0;
469 	/*
470 	 * Put the inode on the end of the free list.
471 	 * Possibly in some cases it would be better to
472 	 * put the inode at the head of the free list,
473 	 * (eg: where i_mode == 0 || i_number == 0).
474 	 */
475 	INSFREE(ip);
476 	return (error);
477 }
478 
479 /*
480  * Check accessed and update flags on
481  * an inode structure.
482  * If any is on, update the inode
483  * with the current time.
484  * If waitfor is given, then must insure
485  * i/o order so wait for write to complete.
486  */
487 iupdat(ip, ta, tm, waitfor)
488 	register struct inode *ip;
489 	struct timeval *ta, *tm;
490 	int waitfor;
491 {
492 	struct buf *bp;
493 	struct vnode *vp = ITOV(ip);
494 	struct dinode *dp;
495 	register struct fs *fs;
496 	int error;
497 
498 	fs = ip->i_fs;
499 	if ((ip->i_flag & (IUPD|IACC|ICHG|IMOD)) == 0)
500 		return (0);
501 	if (vp->v_mount->m_flag & M_RDONLY)
502 		return (0);
503 	error = bread(ip->i_devvp, fsbtodb(fs, itod(fs, ip->i_number)),
504 		(int)fs->fs_bsize, &bp);
505 	if (error) {
506 		brelse(bp);
507 		return (error);
508 	}
509 	if (ip->i_flag&IACC)
510 		ip->i_atime = ta->tv_sec;
511 	if (ip->i_flag&IUPD)
512 		ip->i_mtime = tm->tv_sec;
513 	if (ip->i_flag&ICHG)
514 		ip->i_ctime = time.tv_sec;
515 	ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD);
516 	dp = bp->b_un.b_dino + itoo(fs, ip->i_number);
517 	dp->di_ic = ip->i_ic;
518 	if (waitfor) {
519 		return (bwrite(bp));
520 	} else {
521 		bdwrite(bp);
522 		return (0);
523 	}
524 }
525 
526 #define	SINGLE	0	/* index of single indirect block */
527 #define	DOUBLE	1	/* index of double indirect block */
528 #define	TRIPLE	2	/* index of triple indirect block */
529 /*
530  * Truncate the inode ip to at most
531  * length size.  Free affected disk
532  * blocks -- the blocks of the file
533  * are removed in reverse order.
534  *
535  * NB: triple indirect blocks are untested.
536  */
537 itrunc(oip, length)
538 	register struct inode *oip;
539 	u_long length;
540 {
541 	register daddr_t lastblock;
542 	daddr_t bn, lbn, lastiblock[NIADDR];
543 	register struct fs *fs;
544 	register struct inode *ip;
545 	struct buf *bp;
546 	int offset, osize, size, level;
547 	long count, nblocks, blocksreleased = 0;
548 	register int i;
549 	int error, allerror = 0;
550 	struct inode tip;
551 
552 	if (oip->i_size <= length) {
553 		oip->i_flag |= ICHG|IUPD;
554 		error = iupdat(oip, &time, &time, 1);
555 		return (error);
556 	}
557 	/*
558 	 * Calculate index into inode's block list of
559 	 * last direct and indirect blocks (if any)
560 	 * which we want to keep.  Lastblock is -1 when
561 	 * the file is truncated to 0.
562 	 */
563 	fs = oip->i_fs;
564 	lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1;
565 	lastiblock[SINGLE] = lastblock - NDADDR;
566 	lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs);
567 	lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs);
568 	nblocks = btodb(fs->fs_bsize);
569 	/*
570 	 * Update the size of the file. If the file is not being
571 	 * truncated to a block boundry, the contents of the
572 	 * partial block following the end of the file must be
573 	 * zero'ed in case it ever become accessable again because
574 	 * of subsequent file growth.
575 	 */
576 	osize = oip->i_size;
577 	offset = blkoff(fs, length);
578 	if (offset == 0) {
579 		oip->i_size = length;
580 	} else {
581 		lbn = lblkno(fs, length);
582 		error = balloc(oip, lbn, offset, &bn, B_CLRBUF);
583 		if (error)
584 			return (error);
585 		if ((long)bn < 0)
586 			panic("itrunc: hole");
587 		oip->i_size = length;
588 		size = blksize(fs, oip, lbn);
589 		count = howmany(size, CLBYTES);
590 		for (i = 0; i < count; i++)
591 			munhash(oip->i_devvp, bn + i * CLBYTES / DEV_BSIZE);
592 		error = bread(oip->i_devvp, bn, size, &bp);
593 		if (error) {
594 			oip->i_size = osize;
595 			brelse(bp);
596 			return (error);
597 		}
598 		bzero(bp->b_un.b_addr + offset, (unsigned)(size - offset));
599 		bdwrite(bp);
600 	}
601 	/*
602 	 * Update file and block pointers
603 	 * on disk before we start freeing blocks.
604 	 * If we crash before free'ing blocks below,
605 	 * the blocks will be returned to the free list.
606 	 * lastiblock values are also normalized to -1
607 	 * for calls to indirtrunc below.
608 	 */
609 	tip = *oip;
610 	tip.i_size = osize;
611 	for (level = TRIPLE; level >= SINGLE; level--)
612 		if (lastiblock[level] < 0) {
613 			oip->i_ib[level] = 0;
614 			lastiblock[level] = -1;
615 		}
616 	for (i = NDADDR - 1; i > lastblock; i--)
617 		oip->i_db[i] = 0;
618 	oip->i_flag |= ICHG|IUPD;
619 	allerror = syncip(oip);
620 
621 	/*
622 	 * Indirect blocks first.
623 	 */
624 	ip = &tip;
625 	for (level = TRIPLE; level >= SINGLE; level--) {
626 		bn = ip->i_ib[level];
627 		if (bn != 0) {
628 			error = indirtrunc(ip, bn, lastiblock[level], level,
629 				&count);
630 			if (error)
631 				allerror = error;
632 			blocksreleased += count;
633 			if (lastiblock[level] < 0) {
634 				ip->i_ib[level] = 0;
635 				blkfree(ip, bn, (off_t)fs->fs_bsize);
636 				blocksreleased += nblocks;
637 			}
638 		}
639 		if (lastiblock[level] >= 0)
640 			goto done;
641 	}
642 
643 	/*
644 	 * All whole direct blocks or frags.
645 	 */
646 	for (i = NDADDR - 1; i > lastblock; i--) {
647 		register off_t bsize;
648 
649 		bn = ip->i_db[i];
650 		if (bn == 0)
651 			continue;
652 		ip->i_db[i] = 0;
653 		bsize = (off_t)blksize(fs, ip, i);
654 		blkfree(ip, bn, bsize);
655 		blocksreleased += btodb(bsize);
656 	}
657 	if (lastblock < 0)
658 		goto done;
659 
660 	/*
661 	 * Finally, look for a change in size of the
662 	 * last direct block; release any frags.
663 	 */
664 	bn = ip->i_db[lastblock];
665 	if (bn != 0) {
666 		off_t oldspace, newspace;
667 
668 		/*
669 		 * Calculate amount of space we're giving
670 		 * back as old block size minus new block size.
671 		 */
672 		oldspace = blksize(fs, ip, lastblock);
673 		ip->i_size = length;
674 		newspace = blksize(fs, ip, lastblock);
675 		if (newspace == 0)
676 			panic("itrunc: newspace");
677 		if (oldspace - newspace > 0) {
678 			/*
679 			 * Block number of space to be free'd is
680 			 * the old block # plus the number of frags
681 			 * required for the storage we're keeping.
682 			 */
683 			bn += numfrags(fs, newspace);
684 			blkfree(ip, bn, oldspace - newspace);
685 			blocksreleased += btodb(oldspace - newspace);
686 		}
687 	}
688 done:
689 /* BEGIN PARANOIA */
690 	for (level = SINGLE; level <= TRIPLE; level++)
691 		if (ip->i_ib[level] != oip->i_ib[level])
692 			panic("itrunc1");
693 	for (i = 0; i < NDADDR; i++)
694 		if (ip->i_db[i] != oip->i_db[i])
695 			panic("itrunc2");
696 /* END PARANOIA */
697 	oip->i_blocks -= blocksreleased;
698 	if (oip->i_blocks < 0)			/* sanity */
699 		oip->i_blocks = 0;
700 	oip->i_flag |= ICHG;
701 #ifdef QUOTA
702 	(void) chkdq(oip, -blocksreleased, 0);
703 #endif
704 	return (allerror);
705 }
706 
707 /*
708  * Release blocks associated with the inode ip and
709  * stored in the indirect block bn.  Blocks are free'd
710  * in LIFO order up to (but not including) lastbn.  If
711  * level is greater than SINGLE, the block is an indirect
712  * block and recursive calls to indirtrunc must be used to
713  * cleanse other indirect blocks.
714  *
715  * NB: triple indirect blocks are untested.
716  */
717 indirtrunc(ip, bn, lastbn, level, countp)
718 	register struct inode *ip;
719 	daddr_t bn, lastbn;
720 	int level;
721 	long *countp;
722 {
723 	register int i;
724 	struct buf *bp;
725 	register struct fs *fs = ip->i_fs;
726 	register daddr_t *bap;
727 	daddr_t *copy, nb, last;
728 	long blkcount, factor;
729 	int nblocks, blocksreleased = 0;
730 	int error, allerror = 0;
731 
732 	/*
733 	 * Calculate index in current block of last
734 	 * block to be kept.  -1 indicates the entire
735 	 * block so we need not calculate the index.
736 	 */
737 	factor = 1;
738 	for (i = SINGLE; i < level; i++)
739 		factor *= NINDIR(fs);
740 	last = lastbn;
741 	if (lastbn > 0)
742 		last /= factor;
743 	nblocks = btodb(fs->fs_bsize);
744 	/*
745 	 * Get buffer of block pointers, zero those
746 	 * entries corresponding to blocks to be free'd,
747 	 * and update on disk copy first.
748 	 */
749 	error = bread(ip->i_devvp, fsbtodb(fs, bn), (int)fs->fs_bsize, &bp);
750 	if (error) {
751 		brelse(bp);
752 		*countp = 0;
753 		return (error);
754 	}
755 	bap = bp->b_un.b_daddr;
756 	MALLOC(copy, daddr_t *, fs->fs_bsize, M_TEMP, M_WAITOK);
757 	bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->fs_bsize);
758 	bzero((caddr_t)&bap[last + 1],
759 	  (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t));
760 	error = bwrite(bp);
761 	if (error)
762 		allerror = error;
763 	bap = copy;
764 
765 	/*
766 	 * Recursively free totally unused blocks.
767 	 */
768 	for (i = NINDIR(fs) - 1; i > last; i--) {
769 		nb = bap[i];
770 		if (nb == 0)
771 			continue;
772 		if (level > SINGLE) {
773 			error = indirtrunc(ip, nb, (daddr_t)-1, level - 1,
774 				&blkcount);
775 			if (error)
776 				allerror = error;
777 			blocksreleased += blkcount;
778 		}
779 		blkfree(ip, nb, (off_t)fs->fs_bsize);
780 		blocksreleased += nblocks;
781 	}
782 
783 	/*
784 	 * Recursively free last partial block.
785 	 */
786 	if (level > SINGLE && lastbn >= 0) {
787 		last = lastbn % factor;
788 		nb = bap[i];
789 		if (nb != 0) {
790 			error = indirtrunc(ip, nb, last, level - 1, &blkcount);
791 			if (error)
792 				allerror = error;
793 			blocksreleased += blkcount;
794 		}
795 	}
796 	FREE(copy, M_TEMP);
797 	*countp = blocksreleased;
798 	return (allerror);
799 }
800 
801 /*
802  * Remove any inodes in the inode cache belonging to dev.
803  *
804  * There should not be any active ones, return error if any are found
805  * (nb: this is a user error, not a system err).
806  */
807 #ifdef QUOTA
808 iflush(dev, iq)
809 	dev_t dev;
810 	struct inode *iq;
811 #else
812 iflush(dev)
813 	dev_t dev;
814 #endif
815 {
816 	register struct inode *ip;
817 
818 	for (ip = inode; ip < inodeNINODE; ip++) {
819 #ifdef QUOTA
820 		if (ip != iq && ip->i_dev == dev)
821 #else
822 		if (ip->i_dev == dev)
823 #endif
824 			if (ITOV(ip)->v_count)
825 				return (EBUSY);
826 			else {
827 				remque(ip);
828 				ip->i_forw = ip;
829 				ip->i_back = ip;
830 				/*
831 				 * as v_count == 0, the inode was on the free
832 				 * list already, just leave it there, it will
833 				 * fall off the bottom eventually. We could
834 				 * perhaps move it to the head of the free
835 				 * list, but as umounts are done so
836 				 * infrequently, we would gain very little,
837 				 * while making the code bigger.
838 				 */
839 #ifdef QUOTA
840 				dqrele(ip->i_dquot);
841 				ip->i_dquot = NODQUOT;
842 #endif
843 				if (ip->i_devvp) {
844 					vrele(ip->i_devvp);
845 					ip->i_devvp = 0;
846 				}
847 			}
848 	}
849 	return (0);
850 }
851 
852 /*
853  * Lock an inode. If its already locked, set the WANT bit and sleep.
854  */
855 ilock(ip)
856 	register struct inode *ip;
857 {
858 
859 	while (ip->i_flag & ILOCKED) {
860 		ip->i_flag |= IWANT;
861 		(void) sleep((caddr_t)ip, PINOD);
862 	}
863 	ip->i_flag |= ILOCKED;
864 }
865 
866 /*
867  * Unlock an inode.  If WANT bit is on, wakeup.
868  */
869 iunlock(ip)
870 	register struct inode *ip;
871 {
872 
873 	if ((ip->i_flag & ILOCKED) == 0)
874 		printf("unlocking unlocked inode %d on dev 0x%x\n",
875 			ip->i_number, ip->i_dev);
876 	ip->i_flag &= ~ILOCKED;
877 	if (ip->i_flag&IWANT) {
878 		ip->i_flag &= ~IWANT;
879 		wakeup((caddr_t)ip);
880 	}
881 }
882 
883 /*
884  * Check mode permission on inode pointer. Mode is READ, WRITE or EXEC.
885  * The mode is shifted to select the owner/group/other fields. The
886  * super user is granted all permissions.
887  *
888  * NB: Called from vnode op table. It seems this could all be done
889  * using vattr's but...
890  */
891 iaccess(ip, mode, cred)
892 	register struct inode *ip;
893 	register int mode;
894 	struct ucred *cred;
895 {
896 	register gid_t *gp;
897 	register struct vnode *vp = ITOV(ip);
898 	int i;
899 
900 	/*
901 	 * If you're the super-user,
902 	 * you always get access.
903 	 */
904 	if (cred->cr_uid == 0)
905 		return (0);
906 	/*
907 	 * Access check is based on only one of owner, group, public.
908 	 * If not owner, then check group. If not a member of the
909 	 * group, then check public access.
910 	 */
911 	if (cred->cr_uid != ip->i_uid) {
912 		mode >>= 3;
913 		gp = cred->cr_groups;
914 		for (i = 0; i < cred->cr_ngroups; i++, gp++)
915 			if (ip->i_gid == *gp)
916 				goto found;
917 		mode >>= 3;
918 found:
919 		;
920 	}
921 	if ((ip->i_mode & mode) != 0)
922 		return (0);
923 	return (EACCES);
924 }
925