xref: /netbsd-src/sys/ufs/ffs/ffs_inode.c (revision 17306b8fd0952c7489f93f0230818481e5a1e2c9)
1 /*	$NetBSD: ffs_inode.c,v 1.41 2001/05/30 11:57:18 mrg Exp $	*/
2 
3 /*
4  * Copyright (c) 1982, 1986, 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed by the University of
18  *	California, Berkeley and its contributors.
19  * 4. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  *	@(#)ffs_inode.c	8.13 (Berkeley) 4/21/95
36  */
37 
38 #if defined(_KERNEL_OPT)
39 #include "opt_ffs.h"
40 #include "opt_quota.h"
41 #endif
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/mount.h>
46 #include <sys/proc.h>
47 #include <sys/file.h>
48 #include <sys/buf.h>
49 #include <sys/vnode.h>
50 #include <sys/kernel.h>
51 #include <sys/malloc.h>
52 #include <sys/trace.h>
53 #include <sys/resourcevar.h>
54 
55 #include <ufs/ufs/quota.h>
56 #include <ufs/ufs/inode.h>
57 #include <ufs/ufs/ufsmount.h>
58 #include <ufs/ufs/ufs_extern.h>
59 #include <ufs/ufs/ufs_bswap.h>
60 
61 #include <ufs/ffs/fs.h>
62 #include <ufs/ffs/ffs_extern.h>
63 
64 static int ffs_indirtrunc __P((struct inode *, ufs_daddr_t, ufs_daddr_t,
65 			       ufs_daddr_t, int, long *));
66 
67 /*
68  * Update the access, modified, and inode change times as specified
69  * by the IN_ACCESS, IN_UPDATE, and IN_CHANGE flags respectively.
70  * The IN_MODIFIED flag is used to specify that the inode needs to be
71  * updated but that the times have already been set. The access
72  * and modified times are taken from the second and third parameters;
73  * the inode change time is always taken from the current time. If
74  * UPDATE_WAIT flag is set, or UPDATE_DIROP is set and we are not doing
75  * softupdates, then wait for the disk write of the inode to complete.
76  */
77 
78 int
79 ffs_update(v)
80 	void *v;
81 {
82 	struct vop_update_args /* {
83 		struct vnode *a_vp;
84 		struct timespec *a_access;
85 		struct timespec *a_modify;
86 		int a_flags;
87 	} */ *ap = v;
88 	struct fs *fs;
89 	struct buf *bp;
90 	struct inode *ip;
91 	int error;
92 	struct timespec ts;
93 	caddr_t cp;
94 	int waitfor, flags;
95 
96 	if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
97 		return (0);
98 	ip = VTOI(ap->a_vp);
99 	TIMEVAL_TO_TIMESPEC(&time, &ts);
100 	FFS_ITIMES(ip,
101 	    ap->a_access ? ap->a_access : &ts,
102 	    ap->a_modify ? ap->a_modify : &ts, &ts);
103 	flags = ip->i_flag & (IN_MODIFIED | IN_ACCESSED);
104 	if (flags == 0)
105 		return (0);
106 	fs = ip->i_fs;
107 
108 	if ((flags & IN_MODIFIED) != 0 &&
109 	    (ap->a_vp->v_mount->mnt_flag & MNT_ASYNC) == 0) {
110 		waitfor = ap->a_flags & UPDATE_WAIT;
111 		if ((ap->a_flags & UPDATE_DIROP) && !DOINGSOFTDEP(ap->a_vp))
112 			waitfor |= UPDATE_WAIT;
113 	} else
114 		waitfor = 0;
115 
116 	/*
117 	 * Ensure that uid and gid are correct. This is a temporary
118 	 * fix until fsck has been changed to do the update.
119 	 */
120 	if (fs->fs_inodefmt < FS_44INODEFMT) {			/* XXX */
121 		ip->i_din.ffs_din.di_ouid = ip->i_ffs_uid;	/* XXX */
122 		ip->i_din.ffs_din.di_ogid = ip->i_ffs_gid;	/* XXX */
123 	}							/* XXX */
124 	error = bread(ip->i_devvp,
125 		      fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
126 		      (int)fs->fs_bsize, NOCRED, &bp);
127 	if (error) {
128 		brelse(bp);
129 		return (error);
130 	}
131 	ip->i_flag &= ~(IN_MODIFIED | IN_ACCESSED);
132 	if (DOINGSOFTDEP(ap->a_vp))
133 		softdep_update_inodeblock(ip, bp, waitfor);
134 	else if (ip->i_ffs_effnlink != ip->i_ffs_nlink)
135 		panic("ffs_update: bad link cnt");
136 	cp = (caddr_t)bp->b_data +
137 	    (ino_to_fsbo(fs, ip->i_number) * DINODE_SIZE);
138 #ifdef FFS_EI
139 	if (UFS_FSNEEDSWAP(fs))
140 		ffs_dinode_swap(&ip->i_din.ffs_din, (struct dinode *)cp);
141 	else
142 #endif
143 		memcpy(cp, &ip->i_din.ffs_din, DINODE_SIZE);
144 	if (waitfor) {
145 		return (bwrite(bp));
146 	} else {
147 		bdwrite(bp);
148 		return (0);
149 	}
150 }
151 
152 #define	SINGLE	0	/* index of single indirect block */
153 #define	DOUBLE	1	/* index of double indirect block */
154 #define	TRIPLE	2	/* index of triple indirect block */
155 /*
156  * Truncate the inode oip to at most length size, freeing the
157  * disk blocks.
158  */
159 int
160 ffs_truncate(v)
161 	void *v;
162 {
163 	struct vop_truncate_args /* {
164 		struct vnode *a_vp;
165 		off_t a_length;
166 		int a_flags;
167 		struct ucred *a_cred;
168 		struct proc *a_p;
169 	} */ *ap = v;
170 	struct vnode *ovp = ap->a_vp;
171 	ufs_daddr_t lastblock;
172 	struct inode *oip;
173 	ufs_daddr_t bn, lastiblock[NIADDR], indir_lbn[NIADDR];
174 	ufs_daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR];
175 	off_t length = ap->a_length;
176 	struct fs *fs;
177 	int offset, size, level;
178 	long count, nblocks, blocksreleased = 0;
179 	int i;
180 	int error, allerror = 0;
181 	off_t osize;
182 
183 	if (length < 0)
184 		return (EINVAL);
185 	oip = VTOI(ovp);
186 	if (ovp->v_type == VLNK &&
187 	    (oip->i_ffs_size < ovp->v_mount->mnt_maxsymlinklen ||
188 	     (ovp->v_mount->mnt_maxsymlinklen == 0 &&
189 	      oip->i_din.ffs_din.di_blocks == 0))) {
190 		KDASSERT(length == 0);
191 		memset(&oip->i_ffs_shortlink, 0, (size_t)oip->i_ffs_size);
192 		oip->i_ffs_size = 0;
193 		oip->i_flag |= IN_CHANGE | IN_UPDATE;
194 		return (VOP_UPDATE(ovp, NULL, NULL, UPDATE_WAIT));
195 	}
196 	if (oip->i_ffs_size == length) {
197 		oip->i_flag |= IN_CHANGE | IN_UPDATE;
198 		return (VOP_UPDATE(ovp, NULL, NULL, 0));
199 	}
200 #ifdef QUOTA
201 	if ((error = getinoquota(oip)) != 0)
202 		return (error);
203 #endif
204 	fs = oip->i_fs;
205 	if (length > fs->fs_maxfilesize)
206 		return (EFBIG);
207 
208 	osize = oip->i_ffs_size;
209 	ovp->v_lasta = ovp->v_clen = ovp->v_cstart = ovp->v_lastw = 0;
210 
211 	/*
212 	 * Lengthen the size of the file. We must ensure that the
213 	 * last byte of the file is allocated. Since the smallest
214 	 * value of osize is 0, length will be at least 1.
215 	 */
216 
217 	if (osize < length) {
218 		ufs_balloc_range(ovp, length - 1, 1, ap->a_cred,
219 		    ap->a_flags & IO_SYNC ? B_SYNC : 0);
220 		oip->i_flag |= IN_CHANGE | IN_UPDATE;
221 		return (VOP_UPDATE(ovp, NULL, NULL, 1));
222 	}
223 
224 	/*
225 	 * When truncating a regular file down to a non-block-aligned size,
226 	 * we must zero the part of last block which is past the new EOF.
227 	 * We must synchronously flush the zeroed pages to disk
228 	 * since the new pages will be invalidated as soon as we
229 	 * inform the VM system of the new, smaller size.
230 	 * We must to this before acquiring the GLOCK, since fetching
231 	 * the pages will acquire the GLOCK internally.
232 	 * So there is a window where another thread could see a whole
233 	 * zeroed page past EOF, but that's life.
234 	 */
235 
236 	offset = blkoff(fs, length);
237 	if (ovp->v_type == VREG && length < osize && offset != 0) {
238 		struct uvm_object *uobj;
239 		voff_t eoz;
240 
241 		size = blksize(fs, oip, lblkno(fs, length));
242 		eoz = min(lblktosize(fs, lblkno(fs, length)) + size, osize);
243 		uvm_vnp_zerorange(ovp, length, eoz - length);
244 		uobj = &ovp->v_uvm.u_obj;
245 		simple_lock(&uobj->vmobjlock);
246 		uobj->pgops->pgo_flush(uobj, length, eoz,
247 		    PGO_CLEANIT|PGO_DEACTIVATE|PGO_SYNCIO);
248 		simple_unlock(&ovp->v_uvm.u_obj.vmobjlock);
249 	}
250 
251 	lockmgr(&ovp->v_glock, LK_EXCLUSIVE, NULL);
252 
253 	if (DOINGSOFTDEP(ovp)) {
254 		if (length > 0) {
255 			/*
256 			 * If a file is only partially truncated, then
257 			 * we have to clean up the data structures
258 			 * describing the allocation past the truncation
259 			 * point. Finding and deallocating those structures
260 			 * is a lot of work. Since partial truncation occurs
261 			 * rarely, we solve the problem by syncing the file
262 			 * so that it will have no data structures left.
263 			 */
264 			if ((error = VOP_FSYNC(ovp, ap->a_cred, FSYNC_WAIT,
265 			    0, 0, ap->a_p)) != 0) {
266 				lockmgr(&ovp->v_glock, LK_RELEASE, NULL);
267 				return (error);
268 			}
269 		} else {
270 			uvm_vnp_setsize(ovp, length);
271 #ifdef QUOTA
272  			(void) chkdq(oip, -oip->i_ffs_blocks, NOCRED, 0);
273 #endif
274 			softdep_setup_freeblocks(oip, length);
275 			(void) vinvalbuf(ovp, 0, ap->a_cred, ap->a_p, 0, 0);
276 			lockmgr(&ovp->v_glock, LK_RELEASE, NULL);
277 			oip->i_flag |= IN_CHANGE | IN_UPDATE;
278 			return (VOP_UPDATE(ovp, NULL, NULL, 0));
279 		}
280 	}
281 
282 	/*
283 	 * Reduce the size of the file.
284 	 */
285 	oip->i_ffs_size = length;
286 	uvm_vnp_setsize(ovp, length);
287 	/*
288 	 * Calculate index into inode's block list of
289 	 * last direct and indirect blocks (if any)
290 	 * which we want to keep.  Lastblock is -1 when
291 	 * the file is truncated to 0.
292 	 */
293 	lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1;
294 	lastiblock[SINGLE] = lastblock - NDADDR;
295 	lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs);
296 	lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs);
297 	nblocks = btodb(fs->fs_bsize);
298 	/*
299 	 * Update file and block pointers on disk before we start freeing
300 	 * blocks.  If we crash before free'ing blocks below, the blocks
301 	 * will be returned to the free list.  lastiblock values are also
302 	 * normalized to -1 for calls to ffs_indirtrunc below.
303 	 */
304 	memcpy((caddr_t)oldblks, (caddr_t)&oip->i_ffs_db[0], sizeof oldblks);
305 	for (level = TRIPLE; level >= SINGLE; level--)
306 		if (lastiblock[level] < 0) {
307 			oip->i_ffs_ib[level] = 0;
308 			lastiblock[level] = -1;
309 		}
310 	for (i = NDADDR - 1; i > lastblock; i--)
311 		oip->i_ffs_db[i] = 0;
312 	oip->i_flag |= IN_CHANGE | IN_UPDATE;
313 	error = VOP_UPDATE(ovp, NULL, NULL, UPDATE_WAIT);
314 	if (error && !allerror)
315 		allerror = error;
316 
317 	/*
318 	 * Having written the new inode to disk, save its new configuration
319 	 * and put back the old block pointers long enough to process them.
320 	 * Note that we save the new block configuration so we can check it
321 	 * when we are done.
322 	 */
323 	memcpy((caddr_t)newblks, (caddr_t)&oip->i_ffs_db[0], sizeof newblks);
324 	memcpy((caddr_t)&oip->i_ffs_db[0], (caddr_t)oldblks, sizeof oldblks);
325 	oip->i_ffs_size = osize;
326 	error = vtruncbuf(ovp, lastblock + 1, 0, 0);
327 	if (error && !allerror)
328 		allerror = error;
329 
330 	/*
331 	 * Indirect blocks first.
332 	 */
333 	indir_lbn[SINGLE] = -NDADDR;
334 	indir_lbn[DOUBLE] = indir_lbn[SINGLE] - NINDIR(fs) - 1;
335 	indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - NINDIR(fs) * NINDIR(fs) - 1;
336 	for (level = TRIPLE; level >= SINGLE; level--) {
337 		bn = ufs_rw32(oip->i_ffs_ib[level], UFS_FSNEEDSWAP(fs));
338 		if (bn != 0) {
339 			error = ffs_indirtrunc(oip, indir_lbn[level],
340 			    fsbtodb(fs, bn), lastiblock[level], level, &count);
341 			if (error)
342 				allerror = error;
343 			blocksreleased += count;
344 			if (lastiblock[level] < 0) {
345 				oip->i_ffs_ib[level] = 0;
346 				ffs_blkfree(oip, bn, fs->fs_bsize);
347 				blocksreleased += nblocks;
348 			}
349 		}
350 		if (lastiblock[level] >= 0)
351 			goto done;
352 	}
353 
354 	/*
355 	 * All whole direct blocks or frags.
356 	 */
357 	for (i = NDADDR - 1; i > lastblock; i--) {
358 		long bsize;
359 
360 		bn = ufs_rw32(oip->i_ffs_db[i], UFS_FSNEEDSWAP(fs));
361 		if (bn == 0)
362 			continue;
363 		oip->i_ffs_db[i] = 0;
364 		bsize = blksize(fs, oip, i);
365 		ffs_blkfree(oip, bn, bsize);
366 		blocksreleased += btodb(bsize);
367 	}
368 	if (lastblock < 0)
369 		goto done;
370 
371 	/*
372 	 * Finally, look for a change in size of the
373 	 * last direct block; release any frags.
374 	 */
375 	bn = ufs_rw32(oip->i_ffs_db[lastblock], UFS_FSNEEDSWAP(fs));
376 	if (bn != 0) {
377 		long oldspace, newspace;
378 
379 		/*
380 		 * Calculate amount of space we're giving
381 		 * back as old block size minus new block size.
382 		 */
383 		oldspace = blksize(fs, oip, lastblock);
384 		oip->i_ffs_size = length;
385 		newspace = blksize(fs, oip, lastblock);
386 		if (newspace == 0)
387 			panic("itrunc: newspace");
388 		if (oldspace - newspace > 0) {
389 			/*
390 			 * Block number of space to be free'd is
391 			 * the old block # plus the number of frags
392 			 * required for the storage we're keeping.
393 			 */
394 			bn += numfrags(fs, newspace);
395 			ffs_blkfree(oip, bn, oldspace - newspace);
396 			blocksreleased += btodb(oldspace - newspace);
397 		}
398 	}
399 
400 done:
401 #ifdef DIAGNOSTIC
402 	for (level = SINGLE; level <= TRIPLE; level++)
403 		if (newblks[NDADDR + level] != oip->i_ffs_ib[level])
404 			panic("itrunc1");
405 	for (i = 0; i < NDADDR; i++)
406 		if (newblks[i] != oip->i_ffs_db[i])
407 			panic("itrunc2");
408 	if (length == 0 &&
409 	    (!LIST_EMPTY(&ovp->v_cleanblkhd) || !LIST_EMPTY(&ovp->v_dirtyblkhd)))
410 		panic("itrunc3");
411 #endif /* DIAGNOSTIC */
412 	/*
413 	 * Put back the real size.
414 	 */
415 	oip->i_ffs_size = length;
416 	oip->i_ffs_blocks -= blocksreleased;
417 	if (oip->i_ffs_blocks < 0)			/* sanity */
418 		oip->i_ffs_blocks = 0;
419 	lockmgr(&ovp->v_glock, LK_RELEASE, NULL);
420 	oip->i_flag |= IN_CHANGE;
421 #ifdef QUOTA
422 	(void) chkdq(oip, -blocksreleased, NOCRED, 0);
423 #endif
424 	return (allerror);
425 }
426 
427 /*
428  * Release blocks associated with the inode ip and stored in the indirect
429  * block bn.  Blocks are free'd in LIFO order up to (but not including)
430  * lastbn.  If level is greater than SINGLE, the block is an indirect block
431  * and recursive calls to indirtrunc must be used to cleanse other indirect
432  * blocks.
433  *
434  * NB: triple indirect blocks are untested.
435  */
436 static int
437 ffs_indirtrunc(ip, lbn, dbn, lastbn, level, countp)
438 	struct inode *ip;
439 	ufs_daddr_t lbn, lastbn;
440 	ufs_daddr_t dbn;
441 	int level;
442 	long *countp;
443 {
444 	int i;
445 	struct buf *bp;
446 	struct fs *fs = ip->i_fs;
447 	ufs_daddr_t *bap;
448 	struct vnode *vp;
449 	ufs_daddr_t *copy = NULL, nb, nlbn, last;
450 	long blkcount, factor;
451 	int nblocks, blocksreleased = 0;
452 	int error = 0, allerror = 0;
453 
454 	/*
455 	 * Calculate index in current block of last
456 	 * block to be kept.  -1 indicates the entire
457 	 * block so we need not calculate the index.
458 	 */
459 	factor = 1;
460 	for (i = SINGLE; i < level; i++)
461 		factor *= NINDIR(fs);
462 	last = lastbn;
463 	if (lastbn > 0)
464 		last /= factor;
465 	nblocks = btodb(fs->fs_bsize);
466 	/*
467 	 * Get buffer of block pointers, zero those entries corresponding
468 	 * to blocks to be free'd, and update on disk copy first.  Since
469 	 * double(triple) indirect before single(double) indirect, calls
470 	 * to bmap on these blocks will fail.  However, we already have
471 	 * the on disk address, so we have to set the b_blkno field
472 	 * explicitly instead of letting bread do everything for us.
473 	 */
474 	vp = ITOV(ip);
475 	bp = getblk(vp, lbn, (int)fs->fs_bsize, 0, 0);
476 	if (bp->b_flags & (B_DONE | B_DELWRI)) {
477 		/* Braces must be here in case trace evaluates to nothing. */
478 		trace(TR_BREADHIT, pack(vp, fs->fs_bsize), lbn);
479 	} else {
480 		trace(TR_BREADMISS, pack(vp, fs->fs_bsize), lbn);
481 		curproc->p_stats->p_ru.ru_inblock++;	/* pay for read */
482 		bp->b_flags |= B_READ;
483 		if (bp->b_bcount > bp->b_bufsize)
484 			panic("ffs_indirtrunc: bad buffer size");
485 		bp->b_blkno = dbn;
486 		VOP_STRATEGY(bp);
487 		error = biowait(bp);
488 	}
489 	if (error) {
490 		brelse(bp);
491 		*countp = 0;
492 		return (error);
493 	}
494 
495 	bap = (ufs_daddr_t *)bp->b_data;
496 	if (lastbn >= 0) {
497 		copy = (ufs_daddr_t *) malloc(fs->fs_bsize, M_TEMP, M_WAITOK);
498 		memcpy((caddr_t)copy, (caddr_t)bap, (u_int)fs->fs_bsize);
499 		memset((caddr_t)&bap[last + 1], 0,
500 		  (u_int)(NINDIR(fs) - (last + 1)) * sizeof (ufs_daddr_t));
501 		error = bwrite(bp);
502 		if (error)
503 			allerror = error;
504 		bap = copy;
505 	}
506 
507 	/*
508 	 * Recursively free totally unused blocks.
509 	 */
510 	for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last;
511 	    i--, nlbn += factor) {
512 		nb = ufs_rw32(bap[i], UFS_FSNEEDSWAP(fs));
513 		if (nb == 0)
514 			continue;
515 		if (level > SINGLE) {
516 			error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb),
517 					       (ufs_daddr_t)-1, level - 1,
518 					       &blkcount);
519 			if (error)
520 				allerror = error;
521 			blocksreleased += blkcount;
522 		}
523 		ffs_blkfree(ip, nb, fs->fs_bsize);
524 		blocksreleased += nblocks;
525 	}
526 
527 	/*
528 	 * Recursively free last partial block.
529 	 */
530 	if (level > SINGLE && lastbn >= 0) {
531 		last = lastbn % factor;
532 		nb = ufs_rw32(bap[i], UFS_FSNEEDSWAP(fs));
533 		if (nb != 0) {
534 			error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb),
535 					       last, level - 1, &blkcount);
536 			if (error)
537 				allerror = error;
538 			blocksreleased += blkcount;
539 		}
540 	}
541 
542 	if (copy != NULL) {
543 		FREE(copy, M_TEMP);
544 	} else {
545 		bp->b_flags |= B_INVAL;
546 		brelse(bp);
547 	}
548 
549 	*countp = blocksreleased;
550 	return (allerror);
551 }
552