xref: /csrg-svn/sys/ufs/ffs/ufs_inode.c (revision 39517)
1 /*
2  * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms are permitted
6  * provided that the above copyright notice and this paragraph are
7  * duplicated in all such forms and that any documentation,
8  * advertising materials, and other materials related to such
9  * distribution and use acknowledge that the software was developed
10  * by the University of California, Berkeley.  The name of the
11  * University may not be used to endorse or promote products derived
12  * from this software without specific prior written permission.
13  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
14  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
15  * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
16  *
17  *	@(#)ufs_inode.c	7.19 (Berkeley) 11/12/89
18  */
19 
20 #include "param.h"
21 #include "systm.h"
22 #include "mount.h"
23 #include "user.h"
24 #include "file.h"
25 #include "buf.h"
26 #include "cmap.h"
27 #include "vnode.h"
28 #include "../ufs/inode.h"
29 #include "../ufs/fs.h"
30 #include "../ufs/ufsmount.h"
31 #ifdef QUOTA
32 #include "../ufs/quota.h"
33 #endif
34 #include "kernel.h"
35 #include "malloc.h"
36 
37 #define	INOHSZ	512
38 #if	((INOHSZ&(INOHSZ-1)) == 0)
39 #define	INOHASH(dev,ino)	(((dev)+(ino))&(INOHSZ-1))
40 #else
41 #define	INOHASH(dev,ino)	(((unsigned)((dev)+(ino)))%INOHSZ)
42 #endif
43 
44 union ihead {
45 	union  ihead *ih_head[2];
46 	struct inode *ih_chain[2];
47 } ihead[INOHSZ];
48 
49 /*
50  * Initialize hash links for inodes.
51  */
52 ufs_init()
53 {
54 	register int i;
55 	register union ihead *ih = ihead;
56 
57 #ifndef lint
58 	if (VN_MAXPRIVATE < sizeof(struct inode))
59 		panic("ihinit: too small");
60 #endif /* not lint */
61 	for (i = INOHSZ; --i >= 0; ih++) {
62 		ih->ih_head[0] = ih;
63 		ih->ih_head[1] = ih;
64 	}
65 }
66 
67 /*
68  * Look up an vnode/inode by device,inumber.
69  * If it is in core (in the inode structure),
70  * honor the locking protocol.
71  * If it is not in core, read it in from the
72  * specified device.
73  * Callers must check for mount points!!
74  * In all cases, a pointer to a locked
75  * inode structure is returned.
76  */
77 iget(xp, ino, ipp)
78 	struct inode *xp;
79 	ino_t ino;
80 	struct inode **ipp;
81 {
82 	dev_t dev = xp->i_dev;
83 	struct mount *mntp = ITOV(xp)->v_mount;
84 	register struct fs *fs = VFSTOUFS(mntp)->um_fs;
85 	extern struct vnodeops ufs_vnodeops, spec_inodeops;
86 	register struct inode *ip, *iq;
87 	register struct vnode *vp;
88 	struct vnode *nvp;
89 	struct buf *bp;
90 	struct dinode *dp;
91 	union  ihead *ih;
92 	int error;
93 
94 	ih = &ihead[INOHASH(dev, ino)];
95 loop:
96 	for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw) {
97 		if (ino != ip->i_number || dev != ip->i_dev)
98 			continue;
99 		if ((ip->i_flag&ILOCKED) != 0) {
100 			ip->i_flag |= IWANT;
101 			sleep((caddr_t)ip, PINOD);
102 			goto loop;
103 		}
104 		if (vget(ITOV(ip)))
105 			goto loop;
106 		*ipp = ip;
107 		return(0);
108 	}
109 	/*
110 	 * Allocate a new inode.
111 	 */
112 	if (error = getnewvnode(VT_UFS, mntp, &ufs_vnodeops, &nvp)) {
113 		*ipp = 0;
114 		return (error);
115 	}
116 	ip = VTOI(nvp);
117 	ip->i_vnode = nvp;
118 	ip->i_flag = 0;
119 	ip->i_devvp = 0;
120 	ip->i_lastr = 0;
121 	ip->i_mode = 0;
122 #ifdef QUOTA
123 	ip->i_dquot = NODQUOT;
124 #endif
125 	/*
126 	 * Put it onto its hash chain and lock it so that other requests for
127 	 * this inode will block if they arrive while we are sleeping waiting
128 	 * for old data structures to be purged or for the contents of the
129 	 * disk portion of this inode to be read.
130 	 */
131 	ip->i_dev = dev;
132 	ip->i_number = ino;
133 	insque(ip, ih);
134 	ILOCK(ip);
135 	/*
136 	 * Read in the disk contents for the inode.
137 	 */
138 	if (error = bread(VFSTOUFS(mntp)->um_devvp, fsbtodb(fs, itod(fs, ino)),
139 	    (int)fs->fs_bsize, NOCRED, &bp)) {
140 		/*
141 		 * Unlock and discard unneeded inode.
142 		 */
143 		iput(ip);
144 		brelse(bp);
145 		*ipp = 0;
146 		return (error);
147 	}
148 	dp = bp->b_un.b_dino;
149 	dp += itoo(fs, ino);
150 	ip->i_din = *dp;
151 	brelse(bp);
152 	/*
153 	 * Initialize the associated vnode
154 	 */
155 	vp = ITOV(ip);
156 	vp->v_type = IFTOVT(ip->i_mode);
157 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
158 		vp->v_rdev = ip->i_rdev;
159 		vp->v_op = &spec_inodeops;
160 		if (nvp = checkalias(vp, mntp)) {
161 			/*
162 			 * Reinitialize aliased inode.
163 			 */
164 			vp = nvp;
165 			iq = VTOI(vp);
166 			iq->i_vnode = vp;
167 			iq->i_lastr = 0;
168 			iq->i_flag = 0;
169 			ILOCK(iq);
170 			iq->i_din = ip->i_din;
171 			iq->i_dev = dev;
172 			iq->i_number = ino;
173 			insque(iq, ih);
174 			/*
175 			 * Discard unneeded vnode
176 			 */
177 			ip->i_mode = 0;
178 			iput(ip);
179 			ip = iq;
180 		}
181 	}
182 	if (ino == ROOTINO)
183 		vp->v_flag |= VROOT;
184 	/*
185 	 * Finish inode initialization.
186 	 */
187 	ip->i_fs = fs;
188 	ip->i_devvp = VFSTOUFS(mntp)->um_devvp;
189 	VREF(ip->i_devvp);
190 #ifdef QUOTA
191 	if (ip->i_mode != 0)
192 		ip->i_dquot = inoquota(ip);
193 #endif
194 	/*
195 	 * Set up a generation number for this inode if it does not
196 	 * already have one. This should only happen on old filesystems.
197 	 */
198 	if (ip->i_gen == 0) {
199 		if (++nextgennumber < (u_long)time.tv_sec)
200 			nextgennumber = time.tv_sec;
201 		ip->i_gen = nextgennumber;
202 		if ((vp->v_mount->m_flag & M_RDONLY) == 0)
203 			ip->i_flag |= IMOD;
204 	}
205 	*ipp = ip;
206 	return (0);
207 }
208 
209 /*
210  * Unlock and decrement the reference count of an inode structure.
211  */
212 iput(ip)
213 	register struct inode *ip;
214 {
215 
216 	if ((ip->i_flag & ILOCKED) == 0)
217 		panic("iput");
218 	IUNLOCK(ip);
219 	vrele(ITOV(ip));
220 }
221 
222 /*
223  * Last reference to an inode, write the inode out and if necessary,
224  * truncate and deallocate the file.
225  */
226 ufs_inactive(vp)
227 	struct vnode *vp;
228 {
229 	register struct inode *ip = VTOI(vp);
230 	int mode, error = 0;
231 
232 	if (vp->v_count != 0)
233 		printf("ufs_inactive: pushing active ino %d dev 0x%x\n",
234 			ip->i_number, ip->i_dev);
235 	/*
236 	 * Get rid of inodes related to stale file handles.
237 	 */
238 	if (ip->i_mode == 0) {
239 		vgone(vp);
240 		return (0);
241 	}
242 	ILOCK(ip);
243 	if (ip->i_nlink <= 0 && (vp->v_mount->m_flag & M_RDONLY) == 0) {
244 		error = itrunc(ip, (u_long)0);
245 		mode = ip->i_mode;
246 		ip->i_mode = 0;
247 		ip->i_rdev = 0;
248 		ip->i_flag |= IUPD|ICHG;
249 		ifree(ip, ip->i_number, mode);
250 #ifdef QUOTA
251 		(void) chkiq(ip->i_dev, ip, ip->i_uid, 0);
252 		dqrele(ip->i_dquot);
253 		ip->i_dquot = NODQUOT;
254 #endif
255 	}
256 	IUPDAT(ip, &time, &time, 0);
257 	IUNLOCK(ip);
258 	ip->i_flag = 0;
259 	/*
260 	 * If we are done with the inode, reclaim it
261 	 * so that it can be reused immediately.
262 	 */
263 	if (vp->v_count == 0 && ip->i_mode == 0)
264 		vgone(vp);
265 	return (error);
266 }
267 
268 /*
269  * Reclaim an inode so that it can be used for other purposes.
270  */
271 ufs_reclaim(vp)
272 	register struct vnode *vp;
273 {
274 	register struct inode *ip = VTOI(vp);
275 
276 	if (vp->v_count != 0)
277 		printf("ufs_reclaim: pushing active ino %d dev 0x%x\n",
278 			ip->i_number, ip->i_dev);
279 	/*
280 	 * Remove the inode from its hash chain.
281 	 */
282 	remque(ip);
283 	ip->i_forw = ip;
284 	ip->i_back = ip;
285 	/*
286 	 * Purge old data structures associated with the inode.
287 	 */
288 	cache_purge(vp);
289 	if (ip->i_devvp) {
290 		vrele(ip->i_devvp);
291 		ip->i_devvp = 0;
292 	}
293 #ifdef QUOTA
294 	dqrele(ip->i_dquot);
295 	ip->i_dquot = NODQUOT;
296 #endif
297 	ip->i_flag = 0;
298 	return (0);
299 }
300 
301 /*
302  * Check accessed and update flags on an inode structure.
303  * If any is on, update the inode with the current time.
304  * If waitfor is given, then must ensure I/O order,
305  * so wait for write to complete.
306  */
307 iupdat(ip, ta, tm, waitfor)
308 	register struct inode *ip;
309 	struct timeval *ta, *tm;
310 	int waitfor;
311 {
312 	struct buf *bp;
313 	struct vnode *vp = ITOV(ip);
314 	struct dinode *dp;
315 	register struct fs *fs;
316 	int error;
317 
318 	fs = ip->i_fs;
319 	if ((ip->i_flag & (IUPD|IACC|ICHG|IMOD)) == 0)
320 		return (0);
321 	if (vp->v_mount->m_flag & M_RDONLY)
322 		return (0);
323 	error = bread(ip->i_devvp, fsbtodb(fs, itod(fs, ip->i_number)),
324 		(int)fs->fs_bsize, NOCRED, &bp);
325 	if (error) {
326 		brelse(bp);
327 		return (error);
328 	}
329 	if (ip->i_flag&IACC)
330 		ip->i_atime = ta->tv_sec;
331 	if (ip->i_flag&IUPD)
332 		ip->i_mtime = tm->tv_sec;
333 	if (ip->i_flag&ICHG)
334 		ip->i_ctime = time.tv_sec;
335 	ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD);
336 	dp = bp->b_un.b_dino + itoo(fs, ip->i_number);
337 	*dp = ip->i_din;
338 	if (waitfor) {
339 		return (bwrite(bp));
340 	} else {
341 		bdwrite(bp);
342 		return (0);
343 	}
344 }
345 
346 #define	SINGLE	0	/* index of single indirect block */
347 #define	DOUBLE	1	/* index of double indirect block */
348 #define	TRIPLE	2	/* index of triple indirect block */
349 /*
350  * Truncate the inode ip to at most length size.  Free affected disk
351  * blocks -- the blocks of the file are removed in reverse order.
352  *
353  * NB: triple indirect blocks are untested.
354  */
355 itrunc(oip, length)
356 	register struct inode *oip;
357 	u_long length;
358 {
359 	register daddr_t lastblock;
360 	daddr_t bn, lbn, lastiblock[NIADDR];
361 	register struct fs *fs;
362 	register struct inode *ip;
363 	struct buf *bp;
364 	int offset, osize, size, level;
365 	long count, nblocks, blocksreleased = 0;
366 	register int i;
367 	int error, allerror = 0;
368 	struct inode tip;
369 
370 	if (oip->i_size <= length) {
371 		oip->i_flag |= ICHG|IUPD;
372 		error = iupdat(oip, &time, &time, 1);
373 		return (error);
374 	}
375 	/*
376 	 * Calculate index into inode's block list of
377 	 * last direct and indirect blocks (if any)
378 	 * which we want to keep.  Lastblock is -1 when
379 	 * the file is truncated to 0.
380 	 */
381 	fs = oip->i_fs;
382 	lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1;
383 	lastiblock[SINGLE] = lastblock - NDADDR;
384 	lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs);
385 	lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs);
386 	nblocks = btodb(fs->fs_bsize);
387 	/*
388 	 * Update the size of the file. If the file is not being
389 	 * truncated to a block boundry, the contents of the
390 	 * partial block following the end of the file must be
391 	 * zero'ed in case it ever become accessable again because
392 	 * of subsequent file growth.
393 	 */
394 	osize = oip->i_size;
395 	offset = blkoff(fs, length);
396 	if (offset == 0) {
397 		oip->i_size = length;
398 	} else {
399 		lbn = lblkno(fs, length);
400 		error = balloc(oip, lbn, offset, &bn, B_CLRBUF);
401 		if (error)
402 			return (error);
403 		if ((long)bn < 0)
404 			panic("itrunc: hole");
405 		oip->i_size = length;
406 		size = blksize(fs, oip, lbn);
407 		count = howmany(size, CLBYTES);
408 		for (i = 0; i < count; i++)
409 			munhash(oip->i_devvp, bn + i * CLBYTES / DEV_BSIZE);
410 		error = bread(oip->i_devvp, bn, size, NOCRED, &bp);
411 		if (error) {
412 			oip->i_size = osize;
413 			brelse(bp);
414 			return (error);
415 		}
416 		bzero(bp->b_un.b_addr + offset, (unsigned)(size - offset));
417 		bdwrite(bp);
418 	}
419 	/*
420 	 * Update file and block pointers
421 	 * on disk before we start freeing blocks.
422 	 * If we crash before free'ing blocks below,
423 	 * the blocks will be returned to the free list.
424 	 * lastiblock values are also normalized to -1
425 	 * for calls to indirtrunc below.
426 	 */
427 	tip = *oip;
428 	tip.i_size = osize;
429 	for (level = TRIPLE; level >= SINGLE; level--)
430 		if (lastiblock[level] < 0) {
431 			oip->i_ib[level] = 0;
432 			lastiblock[level] = -1;
433 		}
434 	for (i = NDADDR - 1; i > lastblock; i--)
435 		oip->i_db[i] = 0;
436 	oip->i_flag |= ICHG|IUPD;
437 	allerror = syncip(oip);
438 
439 	/*
440 	 * Indirect blocks first.
441 	 */
442 	ip = &tip;
443 	for (level = TRIPLE; level >= SINGLE; level--) {
444 		bn = ip->i_ib[level];
445 		if (bn != 0) {
446 			error = indirtrunc(ip, bn, lastiblock[level], level,
447 				&count);
448 			if (error)
449 				allerror = error;
450 			blocksreleased += count;
451 			if (lastiblock[level] < 0) {
452 				ip->i_ib[level] = 0;
453 				blkfree(ip, bn, (off_t)fs->fs_bsize);
454 				blocksreleased += nblocks;
455 			}
456 		}
457 		if (lastiblock[level] >= 0)
458 			goto done;
459 	}
460 
461 	/*
462 	 * All whole direct blocks or frags.
463 	 */
464 	for (i = NDADDR - 1; i > lastblock; i--) {
465 		register off_t bsize;
466 
467 		bn = ip->i_db[i];
468 		if (bn == 0)
469 			continue;
470 		ip->i_db[i] = 0;
471 		bsize = (off_t)blksize(fs, ip, i);
472 		blkfree(ip, bn, bsize);
473 		blocksreleased += btodb(bsize);
474 	}
475 	if (lastblock < 0)
476 		goto done;
477 
478 	/*
479 	 * Finally, look for a change in size of the
480 	 * last direct block; release any frags.
481 	 */
482 	bn = ip->i_db[lastblock];
483 	if (bn != 0) {
484 		off_t oldspace, newspace;
485 
486 		/*
487 		 * Calculate amount of space we're giving
488 		 * back as old block size minus new block size.
489 		 */
490 		oldspace = blksize(fs, ip, lastblock);
491 		ip->i_size = length;
492 		newspace = blksize(fs, ip, lastblock);
493 		if (newspace == 0)
494 			panic("itrunc: newspace");
495 		if (oldspace - newspace > 0) {
496 			/*
497 			 * Block number of space to be free'd is
498 			 * the old block # plus the number of frags
499 			 * required for the storage we're keeping.
500 			 */
501 			bn += numfrags(fs, newspace);
502 			blkfree(ip, bn, oldspace - newspace);
503 			blocksreleased += btodb(oldspace - newspace);
504 		}
505 	}
506 done:
507 /* BEGIN PARANOIA */
508 	for (level = SINGLE; level <= TRIPLE; level++)
509 		if (ip->i_ib[level] != oip->i_ib[level])
510 			panic("itrunc1");
511 	for (i = 0; i < NDADDR; i++)
512 		if (ip->i_db[i] != oip->i_db[i])
513 			panic("itrunc2");
514 /* END PARANOIA */
515 	oip->i_blocks -= blocksreleased;
516 	if (oip->i_blocks < 0)			/* sanity */
517 		oip->i_blocks = 0;
518 	oip->i_flag |= ICHG;
519 #ifdef QUOTA
520 	(void) chkdq(oip, -blocksreleased, 0);
521 #endif
522 	return (allerror);
523 }
524 
525 /*
526  * Release blocks associated with the inode ip and
527  * stored in the indirect block bn.  Blocks are free'd
528  * in LIFO order up to (but not including) lastbn.  If
529  * level is greater than SINGLE, the block is an indirect
530  * block and recursive calls to indirtrunc must be used to
531  * cleanse other indirect blocks.
532  *
533  * NB: triple indirect blocks are untested.
534  */
535 indirtrunc(ip, bn, lastbn, level, countp)
536 	register struct inode *ip;
537 	daddr_t bn, lastbn;
538 	int level;
539 	long *countp;
540 {
541 	register int i;
542 	struct buf *bp;
543 	register struct fs *fs = ip->i_fs;
544 	register daddr_t *bap;
545 	daddr_t *copy, nb, last;
546 	long blkcount, factor;
547 	int nblocks, blocksreleased = 0;
548 	int error, allerror = 0;
549 
550 	/*
551 	 * Calculate index in current block of last
552 	 * block to be kept.  -1 indicates the entire
553 	 * block so we need not calculate the index.
554 	 */
555 	factor = 1;
556 	for (i = SINGLE; i < level; i++)
557 		factor *= NINDIR(fs);
558 	last = lastbn;
559 	if (lastbn > 0)
560 		last /= factor;
561 	nblocks = btodb(fs->fs_bsize);
562 	/*
563 	 * Get buffer of block pointers, zero those
564 	 * entries corresponding to blocks to be free'd,
565 	 * and update on disk copy first.
566 	 */
567 	error = bread(ip->i_devvp, fsbtodb(fs, bn), (int)fs->fs_bsize,
568 		NOCRED, &bp);
569 	if (error) {
570 		brelse(bp);
571 		*countp = 0;
572 		return (error);
573 	}
574 	bap = bp->b_un.b_daddr;
575 	MALLOC(copy, daddr_t *, fs->fs_bsize, M_TEMP, M_WAITOK);
576 	bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->fs_bsize);
577 	bzero((caddr_t)&bap[last + 1],
578 	  (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t));
579 	error = bwrite(bp);
580 	if (error)
581 		allerror = error;
582 	bap = copy;
583 
584 	/*
585 	 * Recursively free totally unused blocks.
586 	 */
587 	for (i = NINDIR(fs) - 1; i > last; i--) {
588 		nb = bap[i];
589 		if (nb == 0)
590 			continue;
591 		if (level > SINGLE) {
592 			error = indirtrunc(ip, nb, (daddr_t)-1, level - 1,
593 				&blkcount);
594 			if (error)
595 				allerror = error;
596 			blocksreleased += blkcount;
597 		}
598 		blkfree(ip, nb, (off_t)fs->fs_bsize);
599 		blocksreleased += nblocks;
600 	}
601 
602 	/*
603 	 * Recursively free last partial block.
604 	 */
605 	if (level > SINGLE && lastbn >= 0) {
606 		last = lastbn % factor;
607 		nb = bap[i];
608 		if (nb != 0) {
609 			error = indirtrunc(ip, nb, last, level - 1, &blkcount);
610 			if (error)
611 				allerror = error;
612 			blocksreleased += blkcount;
613 		}
614 	}
615 	FREE(copy, M_TEMP);
616 	*countp = blocksreleased;
617 	return (allerror);
618 }
619 
620 /*
621  * Lock an inode. If its already locked, set the WANT bit and sleep.
622  */
623 ilock(ip)
624 	register struct inode *ip;
625 {
626 
627 	while (ip->i_flag & ILOCKED) {
628 		ip->i_flag |= IWANT;
629 		(void) sleep((caddr_t)ip, PINOD);
630 	}
631 	ip->i_flag |= ILOCKED;
632 }
633 
634 /*
635  * Unlock an inode.  If WANT bit is on, wakeup.
636  */
637 iunlock(ip)
638 	register struct inode *ip;
639 {
640 
641 	if ((ip->i_flag & ILOCKED) == 0)
642 		printf("unlocking unlocked inode %d on dev 0x%x\n",
643 			ip->i_number, ip->i_dev);
644 	ip->i_flag &= ~ILOCKED;
645 	if (ip->i_flag&IWANT) {
646 		ip->i_flag &= ~IWANT;
647 		wakeup((caddr_t)ip);
648 	}
649 }
650 
651 /*
652  * Check mode permission on inode pointer. Mode is READ, WRITE or EXEC.
653  * The mode is shifted to select the owner/group/other fields. The
654  * super user is granted all permissions.
655  *
656  * NB: Called from vnode op table. It seems this could all be done
657  * using vattr's but...
658  */
659 iaccess(ip, mode, cred)
660 	register struct inode *ip;
661 	register int mode;
662 	struct ucred *cred;
663 {
664 	register gid_t *gp;
665 	int i;
666 
667 	/*
668 	 * If you're the super-user, you always get access.
669 	 */
670 	if (cred->cr_uid == 0)
671 		return (0);
672 	/*
673 	 * Access check is based on only one of owner, group, public.
674 	 * If not owner, then check group. If not a member of the
675 	 * group, then check public access.
676 	 */
677 	if (cred->cr_uid != ip->i_uid) {
678 		mode >>= 3;
679 		gp = cred->cr_groups;
680 		for (i = 0; i < cred->cr_ngroups; i++, gp++)
681 			if (ip->i_gid == *gp)
682 				goto found;
683 		mode >>= 3;
684 found:
685 		;
686 	}
687 	if ((ip->i_mode & mode) != 0)
688 		return (0);
689 	return (EACCES);
690 }
691