xref: /csrg-svn/sys/ufs/lfs/lfs_inode.c (revision 39504)
1 /*
2  * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms are permitted
6  * provided that the above copyright notice and this paragraph are
7  * duplicated in all such forms and that any documentation,
8  * advertising materials, and other materials related to such
9  * distribution and use acknowledge that the software was developed
10  * by the University of California, Berkeley.  The name of the
11  * University may not be used to endorse or promote products derived
12  * from this software without specific prior written permission.
13  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
14  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
15  * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
16  *
17  *	@(#)lfs_inode.c	7.18 (Berkeley) 11/10/89
18  */
19 
20 #include "param.h"
21 #include "systm.h"
22 #include "mount.h"
23 #include "user.h"
24 #include "file.h"
25 #include "buf.h"
26 #include "cmap.h"
27 #include "vnode.h"
28 #include "../ufs/inode.h"
29 #include "../ufs/fs.h"
30 #include "../ufs/ufsmount.h"
31 #ifdef QUOTA
32 #include "../ufs/quota.h"
33 #endif
34 #include "kernel.h"
35 #include "malloc.h"
36 
37 #define	INOHSZ	512
38 #if	((INOHSZ&(INOHSZ-1)) == 0)
39 #define	INOHASH(dev,ino)	(((dev)+(ino))&(INOHSZ-1))
40 #else
41 #define	INOHASH(dev,ino)	(((unsigned)((dev)+(ino)))%INOHSZ)
42 #endif
43 
44 union ihead {
45 	union  ihead *ih_head[2];
46 	struct inode *ih_chain[2];
47 } ihead[INOHSZ];
48 
49 /*
50  * Initialize hash links for inodes.
51  */
52 ufs_init()
53 {
54 	register int i;
55 	register union ihead *ih = ihead;
56 
57 #ifndef lint
58 	if (VN_MAXPRIVATE < sizeof(struct inode))
59 		panic("ihinit: too small");
60 #endif /* not lint */
61 	for (i = INOHSZ; --i >= 0; ih++) {
62 		ih->ih_head[0] = ih;
63 		ih->ih_head[1] = ih;
64 	}
65 }
66 
67 /*
68  * Look up an vnode/inode by device,inumber.
69  * If it is in core (in the inode structure),
70  * honor the locking protocol.
71  * If it is not in core, read it in from the
72  * specified device.
73  * Callers must check for mount points!!
74  * In all cases, a pointer to a locked
75  * inode structure is returned.
76  */
77 iget(xp, ino, ipp)
78 	struct inode *xp;
79 	ino_t ino;
80 	struct inode **ipp;
81 {
82 	dev_t dev = xp->i_dev;
83 	struct mount *mntp = ITOV(xp)->v_mount;
84 	register struct fs *fs = VFSTOUFS(mntp)->um_fs;
85 	extern struct vnodeops ufs_vnodeops, spec_inodeops;
86 	register struct inode *ip, *iq;
87 	register struct vnode *vp;
88 	struct vnode *nvp;
89 	struct buf *bp;
90 	struct dinode *dp;
91 	union  ihead *ih;
92 	int error;
93 
94 	ih = &ihead[INOHASH(dev, ino)];
95 loop:
96 	for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw) {
97 		if (ino != ip->i_number || dev != ip->i_dev)
98 			continue;
99 		if ((ip->i_flag&ILOCKED) != 0) {
100 			ip->i_flag |= IWANT;
101 			sleep((caddr_t)ip, PINOD);
102 			goto loop;
103 		}
104 		if (vget(ITOV(ip)))
105 			goto loop;
106 		*ipp = ip;
107 		return(0);
108 	}
109 	/*
110 	 * Allocate a new inode.
111 	 */
112 	if (error = getnewvnode(VT_UFS, mntp, &ufs_vnodeops, &nvp)) {
113 		*ipp = 0;
114 		return (error);
115 	}
116 	ip = VTOI(nvp);
117 	ip->i_vnode = nvp;
118 	ip->i_flag = 0;
119 	ip->i_devvp = 0;
120 	ip->i_lastr = 0;
121 	ip->i_mode = 0;
122 	ip->i_flags = 0;
123 #ifdef QUOTA
124 	ip->i_dquot = NODQUOT;
125 #endif
126 	/*
127 	 * Put it onto its hash chain and lock it so that other requests for
128 	 * this inode will block if they arrive while we are sleeping waiting
129 	 * for old data structures to be purged or for the contents of the
130 	 * disk portion of this inode to be read.
131 	 */
132 	ip->i_dev = dev;
133 	ip->i_number = ino;
134 	insque(ip, ih);
135 	ILOCK(ip);
136 	/*
137 	 * Read in the disk contents for the inode.
138 	 */
139 	if (error = bread(VFSTOUFS(mntp)->um_devvp, fsbtodb(fs, itod(fs, ino)),
140 	    (int)fs->fs_bsize, NOCRED, &bp)) {
141 		/*
142 		 * Unlock and discard unneeded inode.
143 		 */
144 		iput(ip);
145 		brelse(bp);
146 		*ipp = 0;
147 		return (error);
148 	}
149 	dp = bp->b_un.b_dino;
150 	dp += itoo(fs, ino);
151 	ip->i_din = *dp;
152 	brelse(bp);
153 	/*
154 	 * Initialize the associated vnode
155 	 */
156 	vp = ITOV(ip);
157 	vp->v_type = IFTOVT(ip->i_mode);
158 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
159 		vp->v_rdev = ip->i_rdev;
160 		vp->v_op = &spec_inodeops;
161 		if (nvp = checkalias(vp, mntp)) {
162 			/*
163 			 * Reinitialize aliased inode.
164 			 */
165 			vp = nvp;
166 			iq = VTOI(vp);
167 			iq->i_vnode = vp;
168 			iq->i_lastr = 0;
169 			iq->i_flags = 0;
170 			ILOCK(iq);
171 			iq->i_din = ip->i_din;
172 			iq->i_dev = dev;
173 			iq->i_number = ino;
174 			insque(iq, ih);
175 			/*
176 			 * Discard unneeded vnode
177 			 */
178 			ip->i_mode = 0;
179 			iput(ip);
180 			ip = iq;
181 		}
182 	}
183 	if (ino == ROOTINO)
184 		vp->v_flag |= VROOT;
185 	/*
186 	 * Finish inode initialization.
187 	 */
188 	ip->i_fs = fs;
189 	ip->i_devvp = VFSTOUFS(mntp)->um_devvp;
190 	VREF(ip->i_devvp);
191 #ifdef QUOTA
192 	if (ip->i_mode != 0)
193 		ip->i_dquot = inoquota(ip);
194 #endif
195 	/*
196 	 * Set up a generation number for this inode if it does not
197 	 * already have one. This should only happen on old filesystems.
198 	 */
199 	if (ip->i_gen == 0) {
200 		if (++nextgennumber < (u_long)time.tv_sec)
201 			nextgennumber = time.tv_sec;
202 		ip->i_gen = nextgennumber;
203 		if ((vp->v_mount->m_flag & M_RDONLY) == 0)
204 			ip->i_flag |= IMOD;
205 	}
206 	*ipp = ip;
207 	return (0);
208 }
209 
210 /*
211  * Unlock and decrement the reference count of an inode structure.
212  */
213 iput(ip)
214 	register struct inode *ip;
215 {
216 
217 	if ((ip->i_flag & ILOCKED) == 0)
218 		panic("iput");
219 	IUNLOCK(ip);
220 	vrele(ITOV(ip));
221 }
222 
223 /*
224  * Last reference to an inode, write the inode out and if necessary,
225  * truncate and deallocate the file.
226  */
227 ufs_inactive(vp)
228 	struct vnode *vp;
229 {
230 	register struct inode *ip = VTOI(vp);
231 	int mode, error = 0;
232 
233 	if (vp->v_count != 0)
234 		printf("ufs_inactive: pushing active ino %d dev 0x%x\n",
235 			ip->i_number, ip->i_dev);
236 	/*
237 	 * Get rid of inodes related to stale file handles.
238 	 */
239 	if (ip->i_mode == 0) {
240 		vgone(vp);
241 		return (0);
242 	}
243 	ILOCK(ip);
244 	if (ip->i_nlink <= 0 && (vp->v_mount->m_flag & M_RDONLY) == 0) {
245 		error = itrunc(ip, (u_long)0);
246 		mode = ip->i_mode;
247 		ip->i_mode = 0;
248 		ip->i_rdev = 0;
249 		ip->i_flag |= IUPD|ICHG;
250 		ifree(ip, ip->i_number, mode);
251 #ifdef QUOTA
252 		(void) chkiq(ip->i_dev, ip, ip->i_uid, 0);
253 		dqrele(ip->i_dquot);
254 		ip->i_dquot = NODQUOT;
255 #endif
256 	}
257 	IUPDAT(ip, &time, &time, 0);
258 	IUNLOCK(ip);
259 	ip->i_flag = 0;
260 	/*
261 	 * If we are done with the inode, reclaim it
262 	 * so that it can be reused immediately.
263 	 */
264 	if (vp->v_count == 0 && ip->i_mode == 0)
265 		vgone(vp);
266 	return (error);
267 }
268 
269 /*
270  * Reclaim an inode so that it can be used for other purposes.
271  */
272 ufs_reclaim(vp)
273 	register struct vnode *vp;
274 {
275 	register struct inode *ip = VTOI(vp);
276 
277 	if (vp->v_count != 0)
278 		printf("ufs_reclaim: pushing active ino %d dev 0x%x\n",
279 			ip->i_number, ip->i_dev);
280 	/*
281 	 * Remove the inode from its hash chain.
282 	 */
283 	remque(ip);
284 	ip->i_forw = ip;
285 	ip->i_back = ip;
286 	/*
287 	 * Purge old data structures associated with the inode.
288 	 */
289 	cache_purge(vp);
290 	if (ip->i_devvp) {
291 		vrele(ip->i_devvp);
292 		ip->i_devvp = 0;
293 	}
294 #ifdef QUOTA
295 	dqrele(ip->i_dquot);
296 	ip->i_dquot = NODQUOT;
297 #endif
298 	ip->i_flag = 0;
299 	return (0);
300 }
301 
302 /*
303  * Check accessed and update flags on an inode structure.
304  * If any is on, update the inode with the current time.
305  * If waitfor is given, then must ensure I/O order,
306  * so wait for write to complete.
307  */
308 iupdat(ip, ta, tm, waitfor)
309 	register struct inode *ip;
310 	struct timeval *ta, *tm;
311 	int waitfor;
312 {
313 	struct buf *bp;
314 	struct vnode *vp = ITOV(ip);
315 	struct dinode *dp;
316 	register struct fs *fs;
317 	int error;
318 
319 	fs = ip->i_fs;
320 	if ((ip->i_flag & (IUPD|IACC|ICHG|IMOD)) == 0)
321 		return (0);
322 	if (vp->v_mount->m_flag & M_RDONLY)
323 		return (0);
324 	error = bread(ip->i_devvp, fsbtodb(fs, itod(fs, ip->i_number)),
325 		(int)fs->fs_bsize, NOCRED, &bp);
326 	if (error) {
327 		brelse(bp);
328 		return (error);
329 	}
330 	if (ip->i_flag&IACC)
331 		ip->i_atime = ta->tv_sec;
332 	if (ip->i_flag&IUPD)
333 		ip->i_mtime = tm->tv_sec;
334 	if (ip->i_flag&ICHG)
335 		ip->i_ctime = time.tv_sec;
336 	ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD);
337 	dp = bp->b_un.b_dino + itoo(fs, ip->i_number);
338 	*dp = ip->i_din;
339 	if (waitfor) {
340 		return (bwrite(bp));
341 	} else {
342 		bdwrite(bp);
343 		return (0);
344 	}
345 }
346 
347 #define	SINGLE	0	/* index of single indirect block */
348 #define	DOUBLE	1	/* index of double indirect block */
349 #define	TRIPLE	2	/* index of triple indirect block */
350 /*
351  * Truncate the inode ip to at most length size.  Free affected disk
352  * blocks -- the blocks of the file are removed in reverse order.
353  *
354  * NB: triple indirect blocks are untested.
355  */
356 itrunc(oip, length)
357 	register struct inode *oip;
358 	u_long length;
359 {
360 	register daddr_t lastblock;
361 	daddr_t bn, lbn, lastiblock[NIADDR];
362 	register struct fs *fs;
363 	register struct inode *ip;
364 	struct buf *bp;
365 	int offset, osize, size, level;
366 	long count, nblocks, blocksreleased = 0;
367 	register int i;
368 	int error, allerror = 0;
369 	struct inode tip;
370 
371 	if (oip->i_size <= length) {
372 		oip->i_flag |= ICHG|IUPD;
373 		error = iupdat(oip, &time, &time, 1);
374 		return (error);
375 	}
376 	/*
377 	 * Calculate index into inode's block list of
378 	 * last direct and indirect blocks (if any)
379 	 * which we want to keep.  Lastblock is -1 when
380 	 * the file is truncated to 0.
381 	 */
382 	fs = oip->i_fs;
383 	lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1;
384 	lastiblock[SINGLE] = lastblock - NDADDR;
385 	lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs);
386 	lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs);
387 	nblocks = btodb(fs->fs_bsize);
388 	/*
389 	 * Update the size of the file. If the file is not being
390 	 * truncated to a block boundry, the contents of the
391 	 * partial block following the end of the file must be
392 	 * zero'ed in case it ever become accessable again because
393 	 * of subsequent file growth.
394 	 */
395 	osize = oip->i_size;
396 	offset = blkoff(fs, length);
397 	if (offset == 0) {
398 		oip->i_size = length;
399 	} else {
400 		lbn = lblkno(fs, length);
401 		error = balloc(oip, lbn, offset, &bn, B_CLRBUF);
402 		if (error)
403 			return (error);
404 		if ((long)bn < 0)
405 			panic("itrunc: hole");
406 		oip->i_size = length;
407 		size = blksize(fs, oip, lbn);
408 		count = howmany(size, CLBYTES);
409 		for (i = 0; i < count; i++)
410 			munhash(oip->i_devvp, bn + i * CLBYTES / DEV_BSIZE);
411 		error = bread(oip->i_devvp, bn, size, NOCRED, &bp);
412 		if (error) {
413 			oip->i_size = osize;
414 			brelse(bp);
415 			return (error);
416 		}
417 		bzero(bp->b_un.b_addr + offset, (unsigned)(size - offset));
418 		bdwrite(bp);
419 	}
420 	/*
421 	 * Update file and block pointers
422 	 * on disk before we start freeing blocks.
423 	 * If we crash before free'ing blocks below,
424 	 * the blocks will be returned to the free list.
425 	 * lastiblock values are also normalized to -1
426 	 * for calls to indirtrunc below.
427 	 */
428 	tip = *oip;
429 	tip.i_size = osize;
430 	for (level = TRIPLE; level >= SINGLE; level--)
431 		if (lastiblock[level] < 0) {
432 			oip->i_ib[level] = 0;
433 			lastiblock[level] = -1;
434 		}
435 	for (i = NDADDR - 1; i > lastblock; i--)
436 		oip->i_db[i] = 0;
437 	oip->i_flag |= ICHG|IUPD;
438 	allerror = syncip(oip);
439 
440 	/*
441 	 * Indirect blocks first.
442 	 */
443 	ip = &tip;
444 	for (level = TRIPLE; level >= SINGLE; level--) {
445 		bn = ip->i_ib[level];
446 		if (bn != 0) {
447 			error = indirtrunc(ip, bn, lastiblock[level], level,
448 				&count);
449 			if (error)
450 				allerror = error;
451 			blocksreleased += count;
452 			if (lastiblock[level] < 0) {
453 				ip->i_ib[level] = 0;
454 				blkfree(ip, bn, (off_t)fs->fs_bsize);
455 				blocksreleased += nblocks;
456 			}
457 		}
458 		if (lastiblock[level] >= 0)
459 			goto done;
460 	}
461 
462 	/*
463 	 * All whole direct blocks or frags.
464 	 */
465 	for (i = NDADDR - 1; i > lastblock; i--) {
466 		register off_t bsize;
467 
468 		bn = ip->i_db[i];
469 		if (bn == 0)
470 			continue;
471 		ip->i_db[i] = 0;
472 		bsize = (off_t)blksize(fs, ip, i);
473 		blkfree(ip, bn, bsize);
474 		blocksreleased += btodb(bsize);
475 	}
476 	if (lastblock < 0)
477 		goto done;
478 
479 	/*
480 	 * Finally, look for a change in size of the
481 	 * last direct block; release any frags.
482 	 */
483 	bn = ip->i_db[lastblock];
484 	if (bn != 0) {
485 		off_t oldspace, newspace;
486 
487 		/*
488 		 * Calculate amount of space we're giving
489 		 * back as old block size minus new block size.
490 		 */
491 		oldspace = blksize(fs, ip, lastblock);
492 		ip->i_size = length;
493 		newspace = blksize(fs, ip, lastblock);
494 		if (newspace == 0)
495 			panic("itrunc: newspace");
496 		if (oldspace - newspace > 0) {
497 			/*
498 			 * Block number of space to be free'd is
499 			 * the old block # plus the number of frags
500 			 * required for the storage we're keeping.
501 			 */
502 			bn += numfrags(fs, newspace);
503 			blkfree(ip, bn, oldspace - newspace);
504 			blocksreleased += btodb(oldspace - newspace);
505 		}
506 	}
507 done:
508 /* BEGIN PARANOIA */
509 	for (level = SINGLE; level <= TRIPLE; level++)
510 		if (ip->i_ib[level] != oip->i_ib[level])
511 			panic("itrunc1");
512 	for (i = 0; i < NDADDR; i++)
513 		if (ip->i_db[i] != oip->i_db[i])
514 			panic("itrunc2");
515 /* END PARANOIA */
516 	oip->i_blocks -= blocksreleased;
517 	if (oip->i_blocks < 0)			/* sanity */
518 		oip->i_blocks = 0;
519 	oip->i_flag |= ICHG;
520 #ifdef QUOTA
521 	(void) chkdq(oip, -blocksreleased, 0);
522 #endif
523 	return (allerror);
524 }
525 
526 /*
527  * Release blocks associated with the inode ip and
528  * stored in the indirect block bn.  Blocks are free'd
529  * in LIFO order up to (but not including) lastbn.  If
530  * level is greater than SINGLE, the block is an indirect
531  * block and recursive calls to indirtrunc must be used to
532  * cleanse other indirect blocks.
533  *
534  * NB: triple indirect blocks are untested.
535  */
536 indirtrunc(ip, bn, lastbn, level, countp)
537 	register struct inode *ip;
538 	daddr_t bn, lastbn;
539 	int level;
540 	long *countp;
541 {
542 	register int i;
543 	struct buf *bp;
544 	register struct fs *fs = ip->i_fs;
545 	register daddr_t *bap;
546 	daddr_t *copy, nb, last;
547 	long blkcount, factor;
548 	int nblocks, blocksreleased = 0;
549 	int error, allerror = 0;
550 
551 	/*
552 	 * Calculate index in current block of last
553 	 * block to be kept.  -1 indicates the entire
554 	 * block so we need not calculate the index.
555 	 */
556 	factor = 1;
557 	for (i = SINGLE; i < level; i++)
558 		factor *= NINDIR(fs);
559 	last = lastbn;
560 	if (lastbn > 0)
561 		last /= factor;
562 	nblocks = btodb(fs->fs_bsize);
563 	/*
564 	 * Get buffer of block pointers, zero those
565 	 * entries corresponding to blocks to be free'd,
566 	 * and update on disk copy first.
567 	 */
568 	error = bread(ip->i_devvp, fsbtodb(fs, bn), (int)fs->fs_bsize,
569 		NOCRED, &bp);
570 	if (error) {
571 		brelse(bp);
572 		*countp = 0;
573 		return (error);
574 	}
575 	bap = bp->b_un.b_daddr;
576 	MALLOC(copy, daddr_t *, fs->fs_bsize, M_TEMP, M_WAITOK);
577 	bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->fs_bsize);
578 	bzero((caddr_t)&bap[last + 1],
579 	  (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t));
580 	error = bwrite(bp);
581 	if (error)
582 		allerror = error;
583 	bap = copy;
584 
585 	/*
586 	 * Recursively free totally unused blocks.
587 	 */
588 	for (i = NINDIR(fs) - 1; i > last; i--) {
589 		nb = bap[i];
590 		if (nb == 0)
591 			continue;
592 		if (level > SINGLE) {
593 			error = indirtrunc(ip, nb, (daddr_t)-1, level - 1,
594 				&blkcount);
595 			if (error)
596 				allerror = error;
597 			blocksreleased += blkcount;
598 		}
599 		blkfree(ip, nb, (off_t)fs->fs_bsize);
600 		blocksreleased += nblocks;
601 	}
602 
603 	/*
604 	 * Recursively free last partial block.
605 	 */
606 	if (level > SINGLE && lastbn >= 0) {
607 		last = lastbn % factor;
608 		nb = bap[i];
609 		if (nb != 0) {
610 			error = indirtrunc(ip, nb, last, level - 1, &blkcount);
611 			if (error)
612 				allerror = error;
613 			blocksreleased += blkcount;
614 		}
615 	}
616 	FREE(copy, M_TEMP);
617 	*countp = blocksreleased;
618 	return (allerror);
619 }
620 
621 /*
622  * Lock an inode. If its already locked, set the WANT bit and sleep.
623  */
624 ilock(ip)
625 	register struct inode *ip;
626 {
627 
628 	while (ip->i_flag & ILOCKED) {
629 		ip->i_flag |= IWANT;
630 		(void) sleep((caddr_t)ip, PINOD);
631 	}
632 	ip->i_flag |= ILOCKED;
633 }
634 
635 /*
636  * Unlock an inode.  If WANT bit is on, wakeup.
637  */
638 iunlock(ip)
639 	register struct inode *ip;
640 {
641 
642 	if ((ip->i_flag & ILOCKED) == 0)
643 		printf("unlocking unlocked inode %d on dev 0x%x\n",
644 			ip->i_number, ip->i_dev);
645 	ip->i_flag &= ~ILOCKED;
646 	if (ip->i_flag&IWANT) {
647 		ip->i_flag &= ~IWANT;
648 		wakeup((caddr_t)ip);
649 	}
650 }
651 
652 /*
653  * Check mode permission on inode pointer. Mode is READ, WRITE or EXEC.
654  * The mode is shifted to select the owner/group/other fields. The
655  * super user is granted all permissions.
656  *
657  * NB: Called from vnode op table. It seems this could all be done
658  * using vattr's but...
659  */
660 iaccess(ip, mode, cred)
661 	register struct inode *ip;
662 	register int mode;
663 	struct ucred *cred;
664 {
665 	register gid_t *gp;
666 	int i;
667 
668 	/*
669 	 * If you're the super-user, you always get access.
670 	 */
671 	if (cred->cr_uid == 0)
672 		return (0);
673 	/*
674 	 * Access check is based on only one of owner, group, public.
675 	 * If not owner, then check group. If not a member of the
676 	 * group, then check public access.
677 	 */
678 	if (cred->cr_uid != ip->i_uid) {
679 		mode >>= 3;
680 		gp = cred->cr_groups;
681 		for (i = 0; i < cred->cr_ngroups; i++, gp++)
682 			if (ip->i_gid == *gp)
683 				goto found;
684 		mode >>= 3;
685 found:
686 		;
687 	}
688 	if ((ip->i_mode & mode) != 0)
689 		return (0);
690 	return (EACCES);
691 }
692