xref: /openbsd-src/sys/ufs/ext2fs/ext2fs_inode.c (revision 2b0358df1d88d06ef4139321dd05bd5e05d91eaf)
1 /*	$OpenBSD: ext2fs_inode.c,v 1.43 2008/11/23 23:52:35 tedu Exp $	*/
2 /*	$NetBSD: ext2fs_inode.c,v 1.24 2001/06/19 12:59:18 wiz Exp $	*/
3 
4 /*
5  * Copyright (c) 1997 Manuel Bouyer.
6  * Copyright (c) 1982, 1986, 1989, 1993
7  *	The Regents of the University of California.  All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)ffs_inode.c	8.8 (Berkeley) 10/19/94
34  * Modified for ext2fs by Manuel Bouyer.
35  */
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/mount.h>
40 #include <sys/proc.h>
41 #include <sys/file.h>
42 #include <sys/buf.h>
43 #include <sys/vnode.h>
44 #include <sys/kernel.h>
45 #include <sys/malloc.h>
46 #include <sys/resourcevar.h>
47 
48 #include <uvm/uvm_extern.h>
49 
50 #include <ufs/ufs/quota.h>
51 #include <ufs/ufs/inode.h>
52 #include <ufs/ufs/ufsmount.h>
53 #include <ufs/ufs/ufs_extern.h>
54 
55 #include <ufs/ext2fs/ext2fs.h>
56 #include <ufs/ext2fs/ext2fs_extern.h>
57 
58 static int ext2fs_indirtrunc(struct inode *, int32_t, int32_t,
59 				int32_t, int, long *);
60 
61 /*
62  * Get the size of an inode.
63  */
64 u_int64_t
65 ext2fs_size(struct inode *ip)
66 {
67         u_int64_t size = ip->i_e2fs_size;
68 
69         if ((ip->i_e2fs_mode & IFMT) == IFREG)
70                 size |= (u_int64_t)ip->i_e2fs_dacl << 32;
71 
72         return (size);
73 }
74 
75 int
76 ext2fs_setsize(struct inode *ip, u_int64_t size)
77 {
78         if ((ip->i_e2fs_mode & IFMT) == IFREG ||
79             ip->i_e2fs_mode == 0) {
80                 ip->i_e2fs_dacl = size >> 32;
81                 if (size >= 0x80000000U) {
82                         struct m_ext2fs *fs = ip->i_e2fs;
83 
84                         if (fs->e2fs.e2fs_rev <= E2FS_REV0) {
85                                 /* Linux automagically upgrades to REV1 here! */
86                                 return (EFBIG);
87                         }
88                         if (!(fs->e2fs.e2fs_features_rocompat
89                             & EXT2F_ROCOMPAT_LARGEFILE)) {
90                                 fs->e2fs.e2fs_features_rocompat |=
91                                     EXT2F_ROCOMPAT_LARGEFILE;
92                                 fs->e2fs_fmod = 1;
93                         }
94                 }
95         } else if (size >= 0x80000000U)
96                 return (EFBIG);
97 
98         ip->i_e2fs_size = size;
99 
100         return (0);
101 }
102 
103 
104 /*
105  * Last reference to an inode.  If necessary, write or delete it.
106  */
107 int
108 ext2fs_inactive(void *v)
109 {
110 	struct vop_inactive_args *ap = v;
111 	struct vnode *vp = ap->a_vp;
112 	struct inode *ip = VTOI(vp);
113 	struct proc *p = ap->a_p;
114 	struct timespec ts;
115 	int error = 0;
116 #ifdef DIAGNOSTIC
117 	extern int prtactive;
118 
119 	if (prtactive && vp->v_usecount != 0)
120 		vprint("ext2fs_inactive: pushing active", vp);
121 #endif
122 
123 	/* Get rid of inodes related to stale file handles. */
124 	if (ip->i_e2din == NULL || ip->i_e2fs_mode == 0 || ip->i_e2fs_dtime)
125 		goto out;
126 
127 	error = 0;
128 	if (ip->i_e2fs_nlink == 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
129 		if (ext2fs_size(ip) != 0) {
130 			error = ext2fs_truncate(ip, (off_t)0, 0, NOCRED);
131 		}
132 		getnanotime(&ts);
133 		ip->i_e2fs_dtime = ts.tv_sec;
134 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
135 		ext2fs_inode_free(ip, ip->i_number, ip->i_e2fs_mode);
136 	}
137 	if (ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) {
138 		ext2fs_update(ip, NULL, NULL, 0);
139 	}
140 out:
141 	VOP_UNLOCK(vp, 0, p);
142 	/*
143 	 * If we are done with the inode, reclaim it
144 	 * so that it can be reused immediately.
145 	 */
146 	if (ip->i_e2din == NULL || ip->i_e2fs_dtime != 0)
147 		vrecycle(vp, p);
148 	return (error);
149 }
150 
151 
152 /*
153  * Update the access, modified, and inode change times as specified by the
154  * IACCESS, IUPDATE, and ICHANGE flags respectively. The IMODIFIED flag is
155  * used to specify that the inode needs to be updated but that the times have
156  * already been set. The access and modified times are taken from the second
157  * and third parameters; the inode change time is always taken from the current
158  * time. If waitfor is set, then wait for the disk write of the inode to
159  * complete.
160  */
161 int
162 ext2fs_update(struct inode *ip, struct timespec *atime, struct timespec *mtime,
163     int waitfor)
164 {
165 	struct m_ext2fs *fs;
166 	struct buf *bp;
167 	int error;
168 	struct timespec ts;
169 	caddr_t cp;
170 
171 	if (ITOV(ip)->v_mount->mnt_flag & MNT_RDONLY)
172 		return (0);
173 	getnanotime(&ts);
174 	EXT2FS_ITIMES(ip,
175 	    atime ? atime : &ts,
176 	    mtime ? mtime : &ts);
177 	if ((ip->i_flag & IN_MODIFIED) == 0)
178 		return (0);
179 	ip->i_flag &= ~IN_MODIFIED;
180 	fs = ip->i_e2fs;
181 	error = bread(ip->i_devvp,
182 			  fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
183 			  (int)fs->e2fs_bsize, NOCRED, &bp);
184 	if (error) {
185 		brelse(bp);
186 		return (error);
187 	}
188 	ip->i_flag &= ~(IN_MODIFIED);
189 	cp = (caddr_t)bp->b_data +
190 	    (ino_to_fsbo(fs, ip->i_number) * EXT2_DINODE_SIZE(fs));
191 
192 	/*
193 	 * See note about 16-bit UID/GID limitation in ext2fs_vget(). Now
194 	 * that we are about to write the inode, construct the split UID and
195 	 * GID fields out of the two 32-bit fields we kept in memory.
196 	 */
197 	ip->i_e2fs_uid_low = (u_int16_t)ip->i_e2fs_uid;
198 	ip->i_e2fs_gid_low = (u_int16_t)ip->i_e2fs_gid;
199 	ip->i_e2fs_uid_high = ip->i_e2fs_uid >> 16;
200 	ip->i_e2fs_gid_high = ip->i_e2fs_gid >> 16;
201 
202 	e2fs_isave(ip->i_e2din, (struct ext2fs_dinode *)cp);
203 	if (waitfor)
204 		return (bwrite(bp));
205 	else {
206 		bdwrite(bp);
207 		return (0);
208 	}
209 }
210 
211 #define	SINGLE	0	/* index of single indirect block */
212 #define	DOUBLE	1	/* index of double indirect block */
213 #define	TRIPLE	2	/* index of triple indirect block */
214 /*
215  * Truncate the inode oip to at most length size, freeing the
216  * disk blocks.
217  */
218 int
219 ext2fs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred)
220 {
221 	struct vnode *ovp = ITOV(oip);
222 	int32_t lastblock;
223 	int32_t bn, lbn, lastiblock[NIADDR], indir_lbn[NIADDR];
224 	int32_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR];
225 	struct m_ext2fs *fs;
226 	struct buf *bp;
227 	int offset, size, level;
228 	long count, nblocks, vflags, blocksreleased = 0;
229 	int i;
230 	int aflags, error, allerror;
231 	off_t osize;
232 
233 	if (length < 0)
234 		return (EINVAL);
235 
236 	if (ovp->v_type != VREG &&
237 	    ovp->v_type != VDIR &&
238 	    ovp->v_type != VLNK)
239 		return (0);
240 
241 	if (ovp->v_type == VLNK &&
242 		(ext2fs_size(oip) < ovp->v_mount->mnt_maxsymlinklen ||
243 		 (ovp->v_mount->mnt_maxsymlinklen == 0 &&
244 		  oip->i_e2fs_nblock == 0))) {
245 #ifdef DIAGNOSTIC
246 		if (length != 0)
247 			panic("ext2fs_truncate: partial truncate of symlink");
248 #endif
249 		bzero((char *)&oip->i_e2din->e2di_shortlink,
250 			(u_int)ext2fs_size(oip));
251 		(void)ext2fs_setsize(oip, 0);
252 		oip->i_flag |= IN_CHANGE | IN_UPDATE;
253 		return (ext2fs_update(oip, NULL, NULL, 1));
254 	}
255 
256 	if (ext2fs_size(oip) == length) {
257 		oip->i_flag |= IN_CHANGE | IN_UPDATE;
258 		return (ext2fs_update(oip, NULL, NULL, 0));
259 	}
260 	fs = oip->i_e2fs;
261 	osize = ext2fs_size(oip);
262 	/*
263 	 * Lengthen the size of the file. We must ensure that the
264 	 * last byte of the file is allocated. Since the smallest
265 	 * value of osize is 0, length will be at least 1.
266 	 */
267 	if (osize < length) {
268 #if 0 /* XXX */
269 		if (length > fs->fs_maxfilesize)
270 			return (EFBIG);
271 #endif
272 		offset = blkoff(fs, length - 1);
273 		lbn = lblkno(fs, length - 1);
274 		aflags = B_CLRBUF;
275 		if (flags & IO_SYNC)
276 			aflags |= B_SYNC;
277 		error = ext2fs_buf_alloc(oip, lbn, offset + 1, cred, &bp,
278 		    aflags);
279 		if (error)
280 			return (error);
281 		(void)ext2fs_setsize(oip, length);
282 		uvm_vnp_setsize(ovp, length);
283 		uvm_vnp_uncache(ovp);
284 		if (aflags & B_SYNC)
285 			bwrite(bp);
286 		else
287 			bawrite(bp);
288 		oip->i_flag |= IN_CHANGE | IN_UPDATE;
289 		return (ext2fs_update(oip, NULL, NULL, 1));
290 	}
291 	/*
292 	 * Shorten the size of the file. If the file is not being
293 	 * truncated to a block boundry, the contents of the
294 	 * partial block following the end of the file must be
295 	 * zero'ed in case it ever become accessible again because
296 	 * of subsequent file growth.
297 	 */
298 	offset = blkoff(fs, length);
299 	if (offset == 0) {
300 		(void)ext2fs_setsize(oip, length);
301 	} else {
302 		lbn = lblkno(fs, length);
303 		aflags = B_CLRBUF;
304 		if (flags & IO_SYNC)
305 			aflags |= B_SYNC;
306 		error = ext2fs_buf_alloc(oip, lbn, offset, cred, &bp,
307 		    aflags);
308 		if (error)
309 			return (error);
310 		(void)ext2fs_setsize(oip, length);
311 		size = fs->e2fs_bsize;
312 		uvm_vnp_setsize(ovp, length);
313 		uvm_vnp_uncache(ovp);
314 		bzero(bp->b_data + offset, (u_int)(size - offset));
315 		bp->b_bcount = size;
316 		if (aflags & B_SYNC)
317 			bwrite(bp);
318 		else
319 			bawrite(bp);
320 	}
321 	/*
322 	 * Calculate index into inode's block list of
323 	 * last direct and indirect blocks (if any)
324 	 * which we want to keep.  Lastblock is -1 when
325 	 * the file is truncated to 0.
326 	 */
327 	lastblock = lblkno(fs, length + fs->e2fs_bsize - 1) - 1;
328 	lastiblock[SINGLE] = lastblock - NDADDR;
329 	lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs);
330 	lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs);
331 	nblocks = btodb(fs->e2fs_bsize);
332 	/*
333 	 * Update file and block pointers on disk before we start freeing
334 	 * blocks.  If we crash before free'ing blocks below, the blocks
335 	 * will be returned to the free list.  lastiblock values are also
336 	 * normalized to -1 for calls to ext2fs_indirtrunc below.
337 	 */
338 	memcpy((caddr_t)oldblks, (caddr_t)&oip->i_e2fs_blocks[0], sizeof oldblks);
339 	for (level = TRIPLE; level >= SINGLE; level--)
340 		if (lastiblock[level] < 0) {
341 			oip->i_e2fs_blocks[NDADDR + level] = 0;
342 			lastiblock[level] = -1;
343 		}
344 	for (i = NDADDR - 1; i > lastblock; i--)
345 		oip->i_e2fs_blocks[i] = 0;
346 	oip->i_flag |= IN_CHANGE | IN_UPDATE;
347 	if ((error = ext2fs_update(oip, NULL, NULL, 1)) != 0)
348 		allerror = error;
349 	/*
350 	 * Having written the new inode to disk, save its new configuration
351 	 * and put back the old block pointers long enough to process them.
352 	 * Note that we save the new block configuration so we can check it
353 	 * when we are done.
354 	 */
355 	bcopy((caddr_t)&oip->i_e2fs_blocks[0], (caddr_t)newblks, sizeof newblks);
356 	bcopy((caddr_t)oldblks, (caddr_t)&oip->i_e2fs_blocks[0], sizeof oldblks);
357 	(void)ext2fs_setsize(oip, osize);
358 	vflags = ((length > 0) ? V_SAVE : 0) | V_SAVEMETA;
359 	allerror = vinvalbuf(ovp, vflags, cred, curproc, 0, 0);
360 
361 	/*
362 	 * Indirect blocks first.
363 	 */
364 	indir_lbn[SINGLE] = -NDADDR;
365 	indir_lbn[DOUBLE] = indir_lbn[SINGLE] - NINDIR(fs) -1;
366 	indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - NINDIR(fs) * NINDIR(fs) - 1;
367 	for (level = TRIPLE; level >= SINGLE; level--) {
368 		bn = fs2h32(oip->i_e2fs_blocks[NDADDR + level]);
369 		if (bn != 0) {
370 			error = ext2fs_indirtrunc(oip, indir_lbn[level],
371 			    fsbtodb(fs, bn), lastiblock[level], level, &count);
372 			if (error)
373 				allerror = error;
374 			blocksreleased += count;
375 			if (lastiblock[level] < 0) {
376 				oip->i_e2fs_blocks[NDADDR + level] = 0;
377 				ext2fs_blkfree(oip, bn);
378 				blocksreleased += nblocks;
379 			}
380 		}
381 		if (lastiblock[level] >= 0)
382 			goto done;
383 	}
384 
385 	/*
386 	 * All whole direct blocks or frags.
387 	 */
388 	for (i = NDADDR - 1; i > lastblock; i--) {
389 		bn = fs2h32(oip->i_e2fs_blocks[i]);
390 		if (bn == 0)
391 			continue;
392 		oip->i_e2fs_blocks[i] = 0;
393 		ext2fs_blkfree(oip, bn);
394 		blocksreleased += btodb(fs->e2fs_bsize);
395 	}
396 
397 done:
398 #ifdef DIAGNOSTIC
399 	for (level = SINGLE; level <= TRIPLE; level++)
400 		if (newblks[NDADDR + level] !=
401 		    oip->i_e2fs_blocks[NDADDR + level])
402 			panic("ext2fs_truncate1");
403 	for (i = 0; i < NDADDR; i++)
404 		if (newblks[i] != oip->i_e2fs_blocks[i])
405 			panic("ext2fs_truncate2");
406 	if (length == 0 &&
407 	    (!LIST_EMPTY(&ovp->v_cleanblkhd) ||
408 	     !LIST_EMPTY(&ovp->v_dirtyblkhd)))
409 		panic("ext2fs_truncate3");
410 #endif /* DIAGNOSTIC */
411 	/*
412 	 * Put back the real size.
413 	 */
414 	(void)ext2fs_setsize(oip, length);
415 	if (blocksreleased >= oip->i_e2fs_nblock)
416 		oip->i_e2fs_nblock = 0;
417 	else
418 		oip->i_e2fs_nblock -= blocksreleased;
419 	oip->i_flag |= IN_CHANGE;
420 	return (allerror);
421 }
422 
423 /*
424  * Release blocks associated with the inode ip and stored in the indirect
425  * block bn.  Blocks are free'd in LIFO order up to (but not including)
426  * lastbn.  If level is greater than SINGLE, the block is an indirect block
427  * and recursive calls to indirtrunc must be used to cleanse other indirect
428  * blocks.
429  *
430  * NB: triple indirect blocks are untested.
431  */
432 static int
433 ext2fs_indirtrunc(struct inode *ip, int32_t lbn, int32_t dbn, int32_t lastbn, int level, long *countp)
434 {
435 	int i;
436 	struct buf *bp;
437 	struct m_ext2fs *fs = ip->i_e2fs;
438 	int32_t *bap;
439 	struct vnode *vp;
440 	int32_t *copy = NULL, nb, nlbn, last;
441 	long blkcount, factor;
442 	int nblocks, blocksreleased = 0;
443 	int error = 0, allerror = 0;
444 
445 	/*
446 	 * Calculate index in current block of last
447 	 * block to be kept.  -1 indicates the entire
448 	 * block so we need not calculate the index.
449 	 */
450 	factor = 1;
451 	for (i = SINGLE; i < level; i++)
452 		factor *= NINDIR(fs);
453 	last = lastbn;
454 	if (lastbn > 0)
455 		last /= factor;
456 	nblocks = btodb(fs->e2fs_bsize);
457 	/*
458 	 * Get buffer of block pointers, zero those entries corresponding
459 	 * to blocks to be free'd, and update on disk copy first.  Since
460 	 * double(triple) indirect before single(double) indirect, calls
461 	 * to bmap on these blocks will fail.  However, we already have
462 	 * the on disk address, so we have to set the b_blkno field
463 	 * explicitly instead of letting bread do everything for us.
464 	 */
465 	vp = ITOV(ip);
466 	bp = getblk(vp, lbn, (int)fs->e2fs_bsize, 0, 0);
467 	if (!(bp->b_flags & (B_DONE | B_DELWRI))) {
468 		curproc->p_stats->p_ru.ru_inblock++;	/* pay for read */
469 		bcstats.pendingreads++;
470 		bcstats.numreads++;
471 		bp->b_flags |= B_READ;
472 		if (bp->b_bcount > bp->b_bufsize)
473 			panic("ext2fs_indirtrunc: bad buffer size");
474 		bp->b_blkno = dbn;
475 		VOP_STRATEGY(bp);
476 		error = biowait(bp);
477 	}
478 	if (error) {
479 		brelse(bp);
480 		*countp = 0;
481 		return (error);
482 	}
483 
484 	bap = (int32_t *)bp->b_data;
485 	if (lastbn >= 0) {
486 		copy = malloc(fs->e2fs_bsize, M_TEMP, M_WAITOK);
487 		memcpy((caddr_t)copy, (caddr_t)bap, (u_int)fs->e2fs_bsize);
488 		memset((caddr_t)&bap[last + 1], 0,
489 			(u_int)(NINDIR(fs) - (last + 1)) * sizeof (u_int32_t));
490 		error = bwrite(bp);
491 		if (error)
492 			allerror = error;
493 		bap = copy;
494 	}
495 
496 	/*
497 	 * Recursively free totally unused blocks.
498 	 */
499 	for (i = NINDIR(fs) - 1,
500 		nlbn = lbn + 1 - i * factor; i > last;
501 		i--, nlbn += factor) {
502 		nb = fs2h32(bap[i]);
503 		if (nb == 0)
504 			continue;
505 		if (level > SINGLE) {
506 			error = ext2fs_indirtrunc(ip, nlbn, fsbtodb(fs, nb),
507 						   (int32_t)-1, level - 1,
508 						   &blkcount);
509 			if (error)
510 				allerror = error;
511 			blocksreleased += blkcount;
512 		}
513 		ext2fs_blkfree(ip, nb);
514 		blocksreleased += nblocks;
515 	}
516 
517 	/*
518 	 * Recursively free last partial block.
519 	 */
520 	if (level > SINGLE && lastbn >= 0) {
521 		last = lastbn % factor;
522 		nb = fs2h32(bap[i]);
523 		if (nb != 0) {
524 			error = ext2fs_indirtrunc(ip, nlbn, fsbtodb(fs, nb),
525 						   last, level - 1, &blkcount);
526 			if (error)
527 				allerror = error;
528 			blocksreleased += blkcount;
529 		}
530 	}
531 
532 	if (copy != NULL) {
533 		free(copy, M_TEMP);
534 	} else {
535 		bp->b_flags |= B_INVAL;
536 		brelse(bp);
537 	}
538 
539 	*countp = blocksreleased;
540 	return (allerror);
541 }
542