xref: /openbsd-src/sys/ufs/ext2fs/ext2fs_vfsops.c (revision 50b7afb2c2c0993b0894d4e34bf857cb13ed9c80)
1 /*	$OpenBSD: ext2fs_vfsops.c,v 1.81 2014/07/13 15:07:01 pelikan Exp $	*/
2 /*	$NetBSD: ext2fs_vfsops.c,v 1.1 1997/06/11 09:34:07 bouyer Exp $	*/
3 
4 /*
5  * Copyright (c) 1997 Manuel Bouyer.
6  * Copyright (c) 1989, 1991, 1993, 1994
7  *	The Regents of the University of California.  All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *	notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *	notice, this list of conditions and the following disclaimer in the
16  *	documentation and/or other materials provided with the distribution.
17  * 3. Neither the name of the University nor the names of its contributors
18  *	may be used to endorse or promote products derived from this software
19  *	without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)ffs_vfsops.c	8.14 (Berkeley) 11/28/94
34  * Modified for ext2fs by Manuel Bouyer.
35  */
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/namei.h>
40 #include <sys/proc.h>
41 #include <sys/kernel.h>
42 #include <sys/vnode.h>
43 #include <sys/socket.h>
44 #include <sys/mount.h>
45 #include <sys/buf.h>
46 #include <sys/device.h>
47 #include <sys/disk.h>
48 #include <sys/mbuf.h>
49 #include <sys/file.h>
50 #include <sys/disklabel.h>
51 #include <sys/ioctl.h>
52 #include <sys/errno.h>
53 #include <sys/malloc.h>
54 #include <sys/pool.h>
55 #include <sys/lock.h>
56 #include <sys/dkio.h>
57 #include <sys/specdev.h>
58 
59 #include <ufs/ufs/quota.h>
60 #include <ufs/ufs/ufsmount.h>
61 #include <ufs/ufs/inode.h>
62 #include <ufs/ufs/dir.h>
63 #include <ufs/ufs/ufs_extern.h>
64 
65 #include <ufs/ext2fs/ext2fs.h>
66 #include <ufs/ext2fs/ext2fs_extern.h>
67 
68 extern struct lock ufs_hashlock;
69 
70 int ext2fs_sbupdate(struct ufsmount *, int);
71 static int	e2fs_sbcheck(struct ext2fs *, int);
72 
73 const struct vfsops ext2fs_vfsops = {
74 	ext2fs_mount,
75 	ufs_start,
76 	ext2fs_unmount,
77 	ufs_root,
78 	ufs_quotactl,
79 	ext2fs_statfs,
80 	ext2fs_sync,
81 	ext2fs_vget,
82 	ext2fs_fhtovp,
83 	ext2fs_vptofh,
84 	ext2fs_init,
85 	ext2fs_sysctl,
86 	ufs_check_export
87 };
88 
89 struct pool ext2fs_inode_pool;
90 struct pool ext2fs_dinode_pool;
91 
92 extern u_long ext2gennumber;
93 
94 int
95 ext2fs_init(struct vfsconf *vfsp)
96 {
97 	pool_init(&ext2fs_inode_pool, sizeof(struct inode), 0, 0, 0,
98 	    "ext2inopl", &pool_allocator_nointr);
99 	pool_init(&ext2fs_dinode_pool, sizeof(struct ext2fs_dinode), 0, 0, 0,
100 	    "ext2dinopl", &pool_allocator_nointr);
101 
102 	return (ufs_init(vfsp));
103 }
104 
105 /*
106  * Called by main() when ext2fs is going to be mounted as root.
107  *
108  * Name is updated by mount(8) after booting.
109  */
110 #define ROOTNAME	"root_device"
111 
112 int
113 ext2fs_mountroot(void)
114 {
115 	struct m_ext2fs *fs;
116         struct mount *mp;
117 	struct proc *p = curproc;	/* XXX */
118 	struct ufsmount *ump;
119 	int error;
120 
121 	/*
122 	 * Get vnodes for swapdev and rootdev.
123 	 */
124 	if (bdevvp(swapdev, &swapdev_vp) || bdevvp(rootdev, &rootvp))
125 		panic("ext2fs_mountroot: can't setup bdevvp's");
126 
127 	if ((error = vfs_rootmountalloc("ext2fs", "root_device", &mp)) != 0) {
128 		vrele(rootvp);
129 		return (error);
130 	}
131 
132 	if ((error = ext2fs_mountfs(rootvp, mp, p)) != 0) {
133 		mp->mnt_vfc->vfc_refcount--;
134 		vfs_unbusy(mp);
135 		free(mp, M_MOUNT, sizeof *mp);
136 		vrele(rootvp);
137 		return (error);
138 	}
139 
140 	TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
141 	ump = VFSTOUFS(mp);
142 	fs = ump->um_e2fs;
143 	memset(fs->e2fs_fsmnt, 0, sizeof(fs->e2fs_fsmnt));
144 	strlcpy(fs->e2fs_fsmnt, mp->mnt_stat.f_mntonname, sizeof(fs->e2fs_fsmnt));
145 	if (fs->e2fs.e2fs_rev > E2FS_REV0) {
146 		memset(fs->e2fs.e2fs_fsmnt, 0, sizeof(fs->e2fs.e2fs_fsmnt));
147 		strlcpy(fs->e2fs.e2fs_fsmnt, mp->mnt_stat.f_mntonname,
148 		    sizeof(fs->e2fs.e2fs_fsmnt));
149 	}
150 	(void)ext2fs_statfs(mp, &mp->mnt_stat, p);
151 	vfs_unbusy(mp);
152 	inittodr(fs->e2fs.e2fs_wtime);
153 	return (0);
154 }
155 
156 /*
157  * VFS Operations.
158  *
159  * mount system call
160  */
161 int
162 ext2fs_mount(struct mount *mp, const char *path, void *data,
163     struct nameidata *ndp, struct proc *p)
164 {
165 	struct vnode *devvp;
166 	struct ufs_args args;
167 	struct ufsmount *ump = NULL;
168 	struct m_ext2fs *fs;
169 	char fname[MNAMELEN];
170 	char fspec[MNAMELEN];
171 	int error, flags;
172 	mode_t accessmode;
173 
174 	error = copyin(data, &args, sizeof(struct ufs_args));
175 	if (error)
176 		return (error);
177 
178 	/*
179 	 * If updating, check whether changing from read-only to
180 	 * read/write; if there is no device name, that's all we do.
181 	 */
182 	if (mp->mnt_flag & MNT_UPDATE) {
183 		ump = VFSTOUFS(mp);
184 		fs = ump->um_e2fs;
185 		if (fs->e2fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
186 			flags = WRITECLOSE;
187 			if (mp->mnt_flag & MNT_FORCE)
188 				flags |= FORCECLOSE;
189 			error = ext2fs_flushfiles(mp, flags, p);
190 			if (error == 0 &&
191 			    ext2fs_cgupdate(ump, MNT_WAIT) == 0 &&
192 			    (fs->e2fs.e2fs_state & E2FS_ERRORS) == 0) {
193 				fs->e2fs.e2fs_state = E2FS_ISCLEAN;
194 				(void)ext2fs_sbupdate(ump, MNT_WAIT);
195 			}
196 			if (error)
197 				return (error);
198 			fs->e2fs_ronly = 1;
199 		}
200 		if (mp->mnt_flag & MNT_RELOAD) {
201 			error = ext2fs_reload(mp, ndp->ni_cnd.cn_cred, p);
202 			if (error)
203 				return (error);
204 		}
205 		if (fs->e2fs_ronly && (mp->mnt_flag & MNT_WANTRDWR)) {
206 			/*
207 			 * If upgrade to read-write by non-root, then verify
208 			 * that user has necessary permissions on the device.
209 			 */
210 			if (suser(p, 0) != 0) {
211 				devvp = ump->um_devvp;
212 				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
213 				error = VOP_ACCESS(devvp, VREAD | VWRITE,
214 				    p->p_ucred, p);
215 				VOP_UNLOCK(devvp, 0, p);
216 				if (error)
217 					return (error);
218 			}
219 			fs->e2fs_ronly = 0;
220 			if (fs->e2fs.e2fs_state == E2FS_ISCLEAN)
221 				fs->e2fs.e2fs_state = 0;
222 			else
223 				fs->e2fs.e2fs_state = E2FS_ERRORS;
224 			fs->e2fs_fmod = 1;
225 		}
226 		if (args.fspec == NULL) {
227 			/*
228 			 * Process export requests.
229 			 */
230 			return (vfs_export(mp, &ump->um_export,
231 			    &args.export_info));
232 		}
233 	}
234 	/*
235 	 * Not an update, or updating the name: look up the name
236 	 * and verify that it refers to a sensible block device.
237 	 */
238 	error = copyinstr(args.fspec, fspec, sizeof(fspec), NULL);
239 	if (error)
240 		goto error;
241 
242 	if (disk_map(fspec, fname, MNAMELEN, DM_OPENBLCK) == -1)
243 		memcpy(fname, fspec, sizeof(fname));
244 
245 	NDINIT(ndp, LOOKUP, FOLLOW, UIO_SYSSPACE, fname, p);
246 	if ((error = namei(ndp)) != 0)
247 		goto error;
248 	devvp = ndp->ni_vp;
249 
250 	if (devvp->v_type != VBLK) {
251 		error = ENOTBLK;
252 		goto error_devvp;
253 	}
254 	if (major(devvp->v_rdev) >= nblkdev) {
255 		error = ENXIO;
256 		goto error_devvp;
257 	}
258 	/*
259 	 * If mount by non-root, then verify that user has necessary
260 	 * permissions on the device.
261 	 */
262 	if (suser(p, 0) != 0) {
263 		accessmode = VREAD;
264 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
265 			accessmode |= VWRITE;
266 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
267 		error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p);
268 		VOP_UNLOCK(devvp, 0, p);
269 		if (error)
270 			goto error_devvp;
271 	}
272 	if ((mp->mnt_flag & MNT_UPDATE) == 0)
273 		error = ext2fs_mountfs(devvp, mp, p);
274 	else {
275 		if (devvp != ump->um_devvp)
276 			error = EINVAL;	/* XXX needs translation */
277 		else
278 			vrele(devvp);
279 	}
280 	if (error)
281 		goto error_devvp;
282 	ump = VFSTOUFS(mp);
283 	fs = ump->um_e2fs;
284 
285 	memset(fs->e2fs_fsmnt, 0, sizeof(fs->e2fs_fsmnt));
286 	strlcpy(fs->e2fs_fsmnt, path, sizeof(fs->e2fs_fsmnt));
287 	if (fs->e2fs.e2fs_rev > E2FS_REV0) {
288 		memset(fs->e2fs.e2fs_fsmnt, 0, sizeof(fs->e2fs.e2fs_fsmnt));
289 		strlcpy(fs->e2fs.e2fs_fsmnt, mp->mnt_stat.f_mntonname,
290 		    sizeof(fs->e2fs.e2fs_fsmnt));
291 	}
292 	memcpy(mp->mnt_stat.f_mntonname, fs->e2fs_fsmnt, MNAMELEN);
293 	memset(mp->mnt_stat.f_mntfromname, 0, MNAMELEN);
294 	strlcpy(mp->mnt_stat.f_mntfromname, fname, MNAMELEN);
295 	memset(mp->mnt_stat.f_mntfromspec, 0, MNAMELEN);
296 	strlcpy(mp->mnt_stat.f_mntfromspec, fspec, MNAMELEN);
297 
298 	if (fs->e2fs_fmod != 0) {	/* XXX */
299 		fs->e2fs_fmod = 0;
300 		if (fs->e2fs.e2fs_state == 0)
301 			fs->e2fs.e2fs_wtime = time_second;
302 		else
303 			printf("%s: file system not clean; please fsck(8)\n",
304 			    mp->mnt_stat.f_mntfromname);
305 		ext2fs_cgupdate(ump, MNT_WAIT);
306 	}
307 
308 	goto success;
309 
310 error_devvp:
311 	/* Error with devvp held. */
312 	vrele(devvp);
313 
314 error:
315 	/* Error with no state to backout. */
316 
317 success:
318 	return (error);
319 }
320 
321 int ext2fs_reload_vnode(struct vnode *, void *args);
322 
323 struct ext2fs_reload_args {
324 	struct m_ext2fs *fs;
325 	struct proc *p;
326 	struct ucred *cred;
327 	struct vnode *devvp;
328 };
329 
330 int
331 ext2fs_reload_vnode(struct vnode *vp, void *args)
332 {
333 	struct ext2fs_reload_args *era = args;
334 	struct buf *bp;
335 	struct inode *ip;
336 	int error;
337 	caddr_t cp;
338 
339 	/*
340 	 * Step 4: invalidate all inactive vnodes.
341 	 */
342 	if (vp->v_usecount == 0) {
343 		vgonel(vp, era->p);
344 		return (0);
345 	}
346 
347 	/*
348 	 * Step 5: invalidate all cached file data.
349 	 */
350 	if (vget(vp, LK_EXCLUSIVE, era->p))
351 		return (0);
352 
353 	if (vinvalbuf(vp, 0, era->cred, era->p, 0, 0))
354 		panic("ext2fs_reload: dirty2");
355 	/*
356 	 * Step 6: re-read inode data for all active vnodes.
357 	 */
358 	ip = VTOI(vp);
359 	error = bread(era->devvp,
360 	    fsbtodb(era->fs, ino_to_fsba(era->fs, ip->i_number)),
361 	    (int)era->fs->e2fs_bsize, &bp);
362 	if (error) {
363 		vput(vp);
364 		return (error);
365 	}
366 	cp = (caddr_t)bp->b_data +
367 	    (ino_to_fsbo(era->fs, ip->i_number) * EXT2_DINODE_SIZE(era->fs));
368 	e2fs_iload((struct ext2fs_dinode *)cp, ip->i_e2din);
369 	brelse(bp);
370 	vput(vp);
371 	return (0);
372 }
373 
374 static off_t
375 ext2fs_maxfilesize(struct m_ext2fs *fs)
376 {
377 	bool huge = fs->e2fs.e2fs_features_rocompat & EXT2F_ROCOMPAT_HUGE_FILE;
378 	off_t b = fs->e2fs_bsize / 4;
379 	off_t physically, logically;
380 
381 	physically = dbtob(huge ? ((1ULL << 48) - 1) : UINT_MAX);
382 	logically = (12ULL + b + b*b + b*b*b) * fs->e2fs_bsize;
383 
384 	return MIN(logically, physically);
385 }
386 
387 static int
388 e2fs_sbfill(struct vnode *devvp, struct m_ext2fs *fs, struct ext2fs *sb)
389 {
390 	struct buf *bp = NULL;
391 	int i, error;
392 
393 	/* XXX assume hardware block size == 512 */
394 	fs->e2fs_ncg = howmany(sb->e2fs_bcount - sb->e2fs_first_dblock,
395 	    sb->e2fs_bpg);
396 	fs->e2fs_fsbtodb = sb->e2fs_log_bsize + 1;
397 	fs->e2fs_bsize = 1024 << sb->e2fs_log_bsize;
398 	fs->e2fs_bshift = LOG_MINBSIZE + sb->e2fs_log_bsize;
399 	fs->e2fs_fsize = 1024 << sb->e2fs_log_fsize;
400 
401 	fs->e2fs_qbmask = fs->e2fs_bsize - 1;
402 	fs->e2fs_bmask = ~fs->e2fs_qbmask;
403 
404 	fs->e2fs_ipb = fs->e2fs_bsize / EXT2_DINODE_SIZE(fs);
405 	fs->e2fs_itpg = sb->e2fs_ipg / fs->e2fs_ipb;
406 
407 	/* Re-read group descriptors from the disk. */
408 	fs->e2fs_ngdb = howmany(fs->e2fs_ncg,
409 	    fs->e2fs_bsize / sizeof(struct ext2_gd));
410 	fs->e2fs_gd = mallocarray(fs->e2fs_ngdb, fs->e2fs_bsize,
411 	    M_UFSMNT, M_WAITOK);
412 
413 	for (i = 0; i < fs->e2fs_ngdb; ++i) {
414 		daddr_t dblk = ((fs->e2fs_bsize > 1024) ? 0 : 1) + i + 1;
415 		size_t gdesc = i * fs->e2fs_bsize / sizeof(struct ext2_gd);
416 		struct ext2_gd *gd;
417 
418 		error = bread(devvp, fsbtodb(fs, dblk), fs->e2fs_bsize, &bp);
419 		if (error) {
420 			size_t gdescs_space = fs->e2fs_ngdb * fs->e2fs_bsize;
421 
422 			free(fs->e2fs_gd, M_UFSMNT, gdescs_space);
423 			fs->e2fs_gd = NULL;
424 			brelse(bp);
425 			return (error);
426 		}
427 
428 		gd = (struct ext2_gd *) bp->b_data;
429 		e2fs_cgload(gd, fs->e2fs_gd + gdesc, fs->e2fs_bsize);
430 		brelse(bp);
431 		bp = NULL;
432 	}
433 
434 	if ((sb->e2fs_features_rocompat & EXT2F_ROCOMPAT_LARGEFILE) == 0 ||
435 	    (sb->e2fs_rev == E2FS_REV0))
436 		fs->e2fs_maxfilesize = INT_MAX;
437 	else
438 		fs->e2fs_maxfilesize = ext2fs_maxfilesize(fs);
439 
440 	if (sb->e2fs_features_incompat & EXT2F_INCOMPAT_EXTENTS)
441 		fs->e2fs_maxfilesize *= 4;
442 
443 	return (0);
444 }
445 
446 /*
447  * Reload all incore data for a filesystem (used after running fsck on
448  * the root filesystem and finding things to fix). The filesystem must
449  * be mounted read-only.
450  *
451  * Things to do to update the mount:
452  *	1) invalidate all cached meta-data.
453  *	2) re-read superblock from disk.
454  *	3) re-read summary information from disk.
455  *	4) invalidate all inactive vnodes.
456  *	5) invalidate all cached file data.
457  *	6) re-read inode data for all active vnodes.
458  */
459 int
460 ext2fs_reload(struct mount *mountp, struct ucred *cred, struct proc *p)
461 {
462 	struct vnode *devvp;
463 	struct buf *bp;
464 	struct m_ext2fs *fs;
465 	struct ext2fs *newfs;
466 	int error;
467 	struct ext2fs_reload_args era;
468 
469 	if ((mountp->mnt_flag & MNT_RDONLY) == 0)
470 		return (EINVAL);
471 	/*
472 	 * Step 1: invalidate all cached meta-data.
473 	 */
474 	devvp = VFSTOUFS(mountp)->um_devvp;
475 	if (vinvalbuf(devvp, 0, cred, p, 0, 0))
476 		panic("ext2fs_reload: dirty1");
477 
478 	/*
479 	 * Step 2: re-read superblock from disk.
480 	 */
481 	error = bread(devvp, (daddr_t)(SBOFF / DEV_BSIZE), SBSIZE, &bp);
482 	if (error) {
483 		brelse(bp);
484 		return (error);
485 	}
486 	newfs = (struct ext2fs *)bp->b_data;
487 	error = e2fs_sbcheck(newfs, (mountp->mnt_flag & MNT_RDONLY));
488 	if (error) {
489 		brelse(bp);
490 		return (error);
491 	}
492 
493 	fs = VFSTOUFS(mountp)->um_e2fs;
494 	/*
495 	 * Copy in the new superblock, compute in-memory values
496 	 * and load group descriptors.
497 	 */
498 	e2fs_sbload(newfs, &fs->e2fs);
499 	if ((error = e2fs_sbfill(devvp, fs, newfs)) != 0)
500 		return (error);
501 
502 	era.p = p;
503 	era.cred = cred;
504 	era.fs = fs;
505 	era.devvp = devvp;
506 
507 	error = vfs_mount_foreach_vnode(mountp, ext2fs_reload_vnode, &era);
508 
509 	return (error);
510 }
511 
512 /*
513  * Common code for mount and mountroot
514  */
515 int
516 ext2fs_mountfs(struct vnode *devvp, struct mount *mp, struct proc *p)
517 {
518 	struct ufsmount *ump;
519 	struct buf *bp;
520 	struct ext2fs *fs;
521 	dev_t dev;
522 	int error, ronly;
523 	struct ucred *cred;
524 
525 	dev = devvp->v_rdev;
526 	cred = p ? p->p_ucred : NOCRED;
527 	/*
528 	 * Disallow multiple mounts of the same device.
529 	 * Disallow mounting of a device that is currently in use
530 	 * (except for root, which might share swap device for miniroot).
531 	 * Flush out any old buffers remaining from a previous use.
532 	 */
533 	if ((error = vfs_mountedon(devvp)) != 0)
534 		return (error);
535 	if (vcount(devvp) > 1 && devvp != rootvp)
536 		return (EBUSY);
537 	if ((error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0)) != 0)
538 		return (error);
539 
540 	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
541 	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p);
542 	if (error)
543 		return (error);
544 
545 	bp = NULL;
546 	ump = NULL;
547 
548 	/*
549 	 * Read the superblock from disk.
550 	 */
551 	error = bread(devvp, (daddr_t)(SBOFF / DEV_BSIZE), SBSIZE, &bp);
552 	if (error)
553 		goto out;
554 	fs = (struct ext2fs *)bp->b_data;
555 	error = e2fs_sbcheck(fs, ronly);
556 	if (error)
557 		goto out;
558 
559 	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
560 	ump->um_e2fs = malloc(sizeof(struct m_ext2fs), M_UFSMNT,
561 	    M_WAITOK | M_ZERO);
562 
563 	/*
564 	 * Copy in the superblock, compute in-memory values
565 	 * and load group descriptors.
566 	 */
567 	e2fs_sbload(fs, &ump->um_e2fs->e2fs);
568 	if ((error = e2fs_sbfill(devvp, ump->um_e2fs, fs)) != 0)
569 		goto out;
570 	brelse(bp);
571 	bp = NULL;
572 	fs = &ump->um_e2fs->e2fs;
573 	ump->um_e2fs->e2fs_ronly = ronly;
574 	ump->um_fstype = UM_EXT2FS;
575 
576 	if (ronly == 0) {
577 		if (fs->e2fs_state == E2FS_ISCLEAN)
578 			fs->e2fs_state = 0;
579 		else
580 			fs->e2fs_state = E2FS_ERRORS;
581 		ump->um_e2fs->e2fs_fmod = 1;
582 	}
583 
584 	mp->mnt_data = (qaddr_t)ump;
585 	mp->mnt_stat.f_fsid.val[0] = (long)dev;
586 	mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
587 	mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN;
588 	mp->mnt_flag |= MNT_LOCAL;
589 	ump->um_mountp = mp;
590 	ump->um_dev = dev;
591 	ump->um_devvp = devvp;
592 	ump->um_nindir = NINDIR(ump->um_e2fs);
593 	ump->um_bptrtodb = ump->um_e2fs->e2fs_fsbtodb;
594 	ump->um_seqinc = 1; /* no frags */
595 	devvp->v_specmountpoint = mp;
596 	return (0);
597 out:
598 	if (bp)
599 		brelse(bp);
600 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
601 	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, p);
602 	VOP_UNLOCK(devvp, 0, p);
603 	if (ump) {
604 		free(ump->um_e2fs, M_UFSMNT, sizeof *ump->um_e2fs);
605 		free(ump, M_UFSMNT, sizeof *ump);
606 		mp->mnt_data = (qaddr_t)0;
607 	}
608 	return (error);
609 }
610 
611 /*
612  * unmount system call
613  */
614 int
615 ext2fs_unmount(struct mount *mp, int mntflags, struct proc *p)
616 {
617 	struct ufsmount *ump;
618 	struct m_ext2fs *fs;
619 	int error, flags;
620 	size_t gdescs_space;
621 
622 	flags = 0;
623 	if (mntflags & MNT_FORCE)
624 		flags |= FORCECLOSE;
625 	if ((error = ext2fs_flushfiles(mp, flags, p)) != 0)
626 		return (error);
627 	ump = VFSTOUFS(mp);
628 	fs = ump->um_e2fs;
629 	gdescs_space = fs->e2fs_ngdb * fs->e2fs_bsize;
630 
631 	if (!fs->e2fs_ronly && ext2fs_cgupdate(ump, MNT_WAIT) == 0 &&
632 	    (fs->e2fs.e2fs_state & E2FS_ERRORS) == 0) {
633 		fs->e2fs.e2fs_state = E2FS_ISCLEAN;
634 		(void) ext2fs_sbupdate(ump, MNT_WAIT);
635 	}
636 
637 	if (ump->um_devvp->v_type != VBAD)
638 		ump->um_devvp->v_specmountpoint = NULL;
639 	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p);
640 	error = VOP_CLOSE(ump->um_devvp, fs->e2fs_ronly ? FREAD : FREAD|FWRITE,
641 	    NOCRED, p);
642 	vput(ump->um_devvp);
643 	free(fs->e2fs_gd, M_UFSMNT, gdescs_space);
644 	free(fs, M_UFSMNT, sizeof *fs);
645 	free(ump, M_UFSMNT, sizeof *ump);
646 	mp->mnt_data = (qaddr_t)0;
647 	mp->mnt_flag &= ~MNT_LOCAL;
648 	return (error);
649 }
650 
651 /*
652  * Flush out all the files in a filesystem.
653  */
654 int
655 ext2fs_flushfiles(struct mount *mp, int flags, struct proc *p)
656 {
657 	struct ufsmount *ump;
658 	int error;
659 
660 	ump = VFSTOUFS(mp);
661 	/*
662 	 * Flush all the files.
663 	 */
664 	if ((error = vflush(mp, NULL, flags)) != 0)
665 		return (error);
666 	/*
667 	 * Flush filesystem metadata.
668 	 */
669 	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p);
670 	error = VOP_FSYNC(ump->um_devvp, p->p_ucred, MNT_WAIT, p);
671 	VOP_UNLOCK(ump->um_devvp, 0, p);
672 	return (error);
673 }
674 
675 /*
676  * Get file system statistics.
677  */
678 int
679 ext2fs_statfs(struct mount *mp, struct statfs *sbp, struct proc *p)
680 {
681 	struct ufsmount *ump;
682 	struct m_ext2fs *fs;
683 	u_int32_t overhead, overhead_per_group;
684 	int i, ngroups;
685 
686 	ump = VFSTOUFS(mp);
687 	fs = ump->um_e2fs;
688 	if (fs->e2fs.e2fs_magic != E2FS_MAGIC)
689 		panic("ext2fs_statfs");
690 
691 	/*
692 	 * Compute the overhead (FS structures)
693 	 */
694 	overhead_per_group = 1 /* block bitmap */ + 1 /* inode bitmap */ +
695 	    fs->e2fs_itpg;
696 	overhead = fs->e2fs.e2fs_first_dblock +
697 	    fs->e2fs_ncg * overhead_per_group;
698 	if (fs->e2fs.e2fs_rev > E2FS_REV0 &&
699 	    fs->e2fs.e2fs_features_rocompat & EXT2F_ROCOMPAT_SPARSESUPER) {
700 		for (i = 0, ngroups = 0; i < fs->e2fs_ncg; i++) {
701 			if (cg_has_sb(i))
702 				ngroups++;
703 		}
704 	} else {
705 		ngroups = fs->e2fs_ncg;
706 	}
707 	overhead += ngroups * (1 + fs->e2fs_ngdb);
708 
709 	sbp->f_bsize = fs->e2fs_bsize;
710 	sbp->f_iosize = fs->e2fs_bsize;
711 	sbp->f_blocks = fs->e2fs.e2fs_bcount - overhead;
712 	sbp->f_bfree = fs->e2fs.e2fs_fbcount;
713 	sbp->f_bavail = sbp->f_bfree - fs->e2fs.e2fs_rbcount;
714 	sbp->f_files =  fs->e2fs.e2fs_icount;
715 	sbp->f_ffree = fs->e2fs.e2fs_ficount;
716 	if (sbp != &mp->mnt_stat) {
717 		memcpy(sbp->f_mntonname, mp->mnt_stat.f_mntonname, MNAMELEN);
718 		memcpy(sbp->f_mntfromname, mp->mnt_stat.f_mntfromname, MNAMELEN);
719 		memcpy(sbp->f_mntfromspec, mp->mnt_stat.f_mntfromspec, MNAMELEN);
720 	}
721 	strncpy(sbp->f_fstypename, mp->mnt_vfc->vfc_name, MFSNAMELEN);
722 	return (0);
723 }
724 
725 int ext2fs_sync_vnode(struct vnode *vp, void *);
726 
727 struct ext2fs_sync_args {
728 	int allerror;
729 	int waitfor;
730 	struct proc *p;
731 	struct ucred *cred;
732 };
733 
734 int
735 ext2fs_sync_vnode(struct vnode *vp, void *args)
736 {
737 	struct ext2fs_sync_args *esa = args;
738 	struct inode *ip;
739 	int error;
740 
741 	ip = VTOI(vp);
742 	if (vp->v_type == VNON ||
743 	    ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
744 	    LIST_EMPTY(&vp->v_dirtyblkhd)) ||
745 	    esa->waitfor == MNT_LAZY) {
746 		return (0);
747 	}
748 
749 	if (vget(vp, LK_EXCLUSIVE | LK_NOWAIT, esa->p))
750 		return (0);
751 
752 	if ((error = VOP_FSYNC(vp, esa->cred, esa->waitfor, esa->p)) != 0)
753 		esa->allerror = error;
754 	vput(vp);
755 	return (0);
756 }
757 /*
758  * Go through the disk queues to initiate sandbagged IO;
759  * go through the inodes to write those that have been modified;
760  * initiate the writing of the super block if it has been modified.
761  *
762  * Should always be called with the mount point locked.
763  */
764 int
765 ext2fs_sync(struct mount *mp, int waitfor, struct ucred *cred, struct proc *p)
766 {
767 	struct ufsmount *ump = VFSTOUFS(mp);
768 	struct m_ext2fs *fs;
769 	int error, allerror = 0;
770 	struct ext2fs_sync_args esa;
771 
772 	fs = ump->um_e2fs;
773 	if (fs->e2fs_ronly != 0) {		/* XXX */
774 		printf("fs = %s\n", fs->e2fs_fsmnt);
775 		panic("update: rofs mod");
776 	}
777 
778 	/*
779 	 * Write back each (modified) inode.
780 	 */
781 	esa.p = p;
782 	esa.cred = cred;
783 	esa.allerror = 0;
784 	esa.waitfor = waitfor;
785 
786 	vfs_mount_foreach_vnode(mp, ext2fs_sync_vnode, &esa);
787 	if (esa.allerror != 0)
788 		allerror = esa.allerror;
789 
790 	/*
791 	 * Force stale file system control information to be flushed.
792 	 */
793 	if (waitfor != MNT_LAZY) {
794 		vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p);
795 		if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p)) != 0)
796 			allerror = error;
797 		VOP_UNLOCK(ump->um_devvp, 0, p);
798 	}
799 	/*
800 	 * Write back modified superblock.
801 	 */
802 	if (fs->e2fs_fmod != 0) {
803 		fs->e2fs_fmod = 0;
804 		fs->e2fs.e2fs_wtime = time_second;
805 		if ((error = ext2fs_cgupdate(ump, waitfor)))
806 			allerror = error;
807 	}
808 	return (allerror);
809 }
810 
811 /*
812  * Look up a EXT2FS dinode number to find its incore vnode, otherwise read it
813  * in from disk.  If it is in core, wait for the lock bit to clear, then
814  * return the inode locked.  Detection and handling of mount points must be
815  * done by the calling routine.
816  */
817 int
818 ext2fs_vget(struct mount *mp, ino_t ino, struct vnode **vpp)
819 {
820 	struct m_ext2fs *fs;
821 	struct inode *ip;
822 	struct ext2fs_dinode *dp;
823 	struct ufsmount *ump;
824 	struct buf *bp;
825 	struct vnode *vp;
826 	dev_t dev;
827 	int error;
828 
829 	if (ino > (ufsino_t)-1)
830 		panic("ext2fs_vget: alien ino_t %llu",
831 		    (unsigned long long)ino);
832 
833 	ump = VFSTOUFS(mp);
834 	dev = ump->um_dev;
835 
836  retry:
837 	if ((*vpp = ufs_ihashget(dev, ino)) != NULL)
838 		return (0);
839 
840 	/* Allocate a new vnode/inode. */
841 	if ((error = getnewvnode(VT_EXT2FS, mp, &ext2fs_vops, &vp)) != 0) {
842 		*vpp = NULL;
843 		return (error);
844 	}
845 
846 	ip = pool_get(&ext2fs_inode_pool, PR_WAITOK|PR_ZERO);
847 	lockinit(&ip->i_lock, PINOD, "inode", 0, 0);
848 	vp->v_data = ip;
849 	ip->i_vnode = vp;
850 	ip->i_ump = ump;
851 	ip->i_e2fs = fs = ump->um_e2fs;
852 	ip->i_dev = dev;
853 	ip->i_number = ino;
854 	ip->i_e2fs_last_lblk = 0;
855 	ip->i_e2fs_last_blk = 0;
856 
857 	/*
858 	 * Put it onto its hash chain and lock it so that other requests for
859 	 * this inode will block if they arrive while we are sleeping waiting
860 	 * for old data structures to be purged or for the contents of the
861 	 * disk portion of this inode to be read.
862 	 */
863 	error = ufs_ihashins(ip);
864 
865 	if (error) {
866 		vrele(vp);
867 
868 		if (error == EEXIST)
869 			goto retry;
870 
871 		return (error);
872 	}
873 
874 	/* Read in the disk contents for the inode, copy into the inode. */
875 	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
876 	    (int)fs->e2fs_bsize, &bp);
877 	if (error) {
878 		/*
879 		 * The inode does not contain anything useful, so it would
880 	 	 * be misleading to leave it on its hash chain. With mode
881 		 * still zero, it will be unlinked and returned to the free
882 		 * list by vput().
883 		 */
884 		vput(vp);
885 		brelse(bp);
886 		*vpp = NULL;
887 		return (error);
888 	}
889 
890 	dp = (struct ext2fs_dinode *) ((char *)bp->b_data
891 	    + EXT2_DINODE_SIZE(fs) * ino_to_fsbo(fs, ino));
892 
893 	ip->i_e2din = pool_get(&ext2fs_dinode_pool, PR_WAITOK);
894 	e2fs_iload(dp, ip->i_e2din);
895 	brelse(bp);
896 
897 	ip->i_effnlink = ip->i_e2fs_nlink;
898 
899 	/*
900 	 * The fields for storing the UID and GID of an ext2fs inode are
901 	 * limited to 16 bits. To overcome this limitation, Linux decided to
902 	 * scatter the highest bits of these values into a previously reserved
903 	 * area on the disk inode. We deal with this situation by having two
904 	 * 32-bit fields *out* of the disk inode to hold the complete values.
905 	 * Now that we are reading in the inode, compute these fields.
906 	 */
907 	ip->i_e2fs_uid = ip->i_e2fs_uid_low | (ip->i_e2fs_uid_high << 16);
908 	ip->i_e2fs_gid = ip->i_e2fs_gid_low | (ip->i_e2fs_gid_high << 16);
909 
910 	/* If the inode was deleted, reset all fields */
911 	if (ip->i_e2fs_dtime != 0) {
912 		ip->i_e2fs_mode = ip->i_e2fs_nblock = 0;
913 		(void)ext2fs_setsize(ip, 0);
914 	}
915 
916 	/*
917 	 * Initialize the vnode from the inode, check for aliases.
918 	 * Note that the underlying vnode may have changed.
919 	 */
920 	error = ext2fs_vinit(mp, &ext2fs_specvops, EXT2FS_FIFOOPS, &vp);
921 	if (error) {
922 		vput(vp);
923 		*vpp = NULL;
924 		return (error);
925 	}
926 
927 	/*
928 	 * Finish inode initialization now that aliasing has been resolved.
929 	 */
930 	vref(ip->i_devvp);
931 	/*
932 	 * Set up a generation number for this inode if it does not
933 	 * already have one. This should only happen on old filesystems.
934 	 */
935 	if (ip->i_e2fs_gen == 0) {
936 		if (++ext2gennumber < (u_long)time_second)
937 			ext2gennumber = time_second;
938 		ip->i_e2fs_gen = ext2gennumber;
939 		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
940 			ip->i_flag |= IN_MODIFIED;
941 	}
942 
943 	*vpp = vp;
944 	return (0);
945 }
946 
947 /*
948  * File handle to vnode
949  *
950  * Have to be really careful about stale file handles:
951  * - check that the inode number is valid
952  * - call ext2fs_vget() to get the locked inode
953  * - check for an unallocated inode (i_mode == 0)
954  * - check that the given client host has export rights and return
955  *   those rights via. exflagsp and credanonp
956  */
957 int
958 ext2fs_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp)
959 {
960 	struct inode *ip;
961 	struct vnode *nvp;
962 	int error;
963 	struct ufid *ufhp;
964 	struct m_ext2fs *fs;
965 
966 	ufhp = (struct ufid *)fhp;
967 	fs = VFSTOUFS(mp)->um_e2fs;
968 	if ((ufhp->ufid_ino < EXT2_FIRSTINO && ufhp->ufid_ino != EXT2_ROOTINO) ||
969 	    ufhp->ufid_ino > fs->e2fs_ncg * fs->e2fs.e2fs_ipg)
970 		return (ESTALE);
971 
972 	if ((error = VFS_VGET(mp, ufhp->ufid_ino, &nvp)) != 0) {
973 		*vpp = NULLVP;
974 		return (error);
975 	}
976 	ip = VTOI(nvp);
977 	if (ip->i_e2fs_mode == 0 || ip->i_e2fs_dtime != 0 ||
978 	    ip->i_e2fs_gen != ufhp->ufid_gen) {
979 		vput(nvp);
980 		*vpp = NULLVP;
981 		return (ESTALE);
982 	}
983 	*vpp = nvp;
984 	return (0);
985 }
986 
987 /*
988  * Vnode pointer to File handle
989  */
990 /* ARGSUSED */
991 int
992 ext2fs_vptofh(struct vnode *vp, struct fid *fhp)
993 {
994 	struct inode *ip;
995 	struct ufid *ufhp;
996 
997 	ip = VTOI(vp);
998 	ufhp = (struct ufid *)fhp;
999 	ufhp->ufid_len = sizeof(struct ufid);
1000 	ufhp->ufid_ino = ip->i_number;
1001 	ufhp->ufid_gen = ip->i_e2fs_gen;
1002 	return (0);
1003 }
1004 
1005 /*
1006  * no sysctl for ext2fs
1007  */
1008 
1009 int
1010 ext2fs_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
1011     void *newp, size_t newlen, struct proc *p)
1012 {
1013 	return (EOPNOTSUPP);
1014 }
1015 
1016 /*
1017  * Write a superblock and associated information back to disk.
1018  */
1019 int
1020 ext2fs_sbupdate(struct ufsmount *mp, int waitfor)
1021 {
1022 	struct m_ext2fs *fs = mp->um_e2fs;
1023 	struct buf *bp;
1024 	int error = 0;
1025 
1026 	bp = getblk(mp->um_devvp, SBLOCK, SBSIZE, 0, 0);
1027 	e2fs_sbsave(&fs->e2fs, (struct ext2fs *) bp->b_data);
1028 	if (waitfor == MNT_WAIT)
1029 		error = bwrite(bp);
1030 	else
1031 		bawrite(bp);
1032 	return (error);
1033 }
1034 
1035 int
1036 ext2fs_cgupdate(struct ufsmount *mp, int waitfor)
1037 {
1038 	struct m_ext2fs *fs = mp->um_e2fs;
1039 	struct buf *bp;
1040 	int i, error = 0, allerror = 0;
1041 
1042 	allerror = ext2fs_sbupdate(mp, waitfor);
1043 	for (i = 0; i < fs->e2fs_ngdb; i++) {
1044 		bp = getblk(mp->um_devvp, fsbtodb(fs, ((fs->e2fs_bsize>1024)?0:1)+i+1),
1045 		    fs->e2fs_bsize, 0, 0);
1046 		e2fs_cgsave(&fs->e2fs_gd[i* fs->e2fs_bsize / sizeof(struct ext2_gd)], (struct ext2_gd*)bp->b_data, fs->e2fs_bsize);
1047 		if (waitfor == MNT_WAIT)
1048 			error = bwrite(bp);
1049 		else
1050 			bawrite(bp);
1051 	}
1052 
1053 	if (!allerror && error)
1054 		allerror = error;
1055 	return (allerror);
1056 }
1057 
1058 /* This is called before the superblock is copied.  Watch out for endianity! */
1059 static int
1060 e2fs_sbcheck(struct ext2fs *fs, int ronly)
1061 {
1062 	u_int32_t tmp;
1063 
1064 	tmp = letoh16(fs->e2fs_magic);
1065 	if (tmp != E2FS_MAGIC) {
1066 		printf("ext2fs: wrong magic number 0x%x\n", tmp);
1067 		return (EIO);		/* XXX needs translation */
1068 	}
1069 
1070 	tmp = letoh32(fs->e2fs_log_bsize);
1071 	if (tmp > 2) {
1072 		/* skewed log(block size): 1024 -> 0 | 2048 -> 1 | 4096 -> 2 */
1073 		tmp += 10;
1074 		printf("ext2fs: wrong log2(block size) %d\n", tmp);
1075 		return (EIO);	   /* XXX needs translation */
1076 	}
1077 
1078 	if (fs->e2fs_bpg == 0) {
1079 		printf("ext2fs: zero blocks per group\n");
1080 		return (EIO);
1081 	}
1082 
1083 	tmp = letoh32(fs->e2fs_rev);
1084 	if (tmp > E2FS_REV1) {
1085 		printf("ext2fs: wrong revision number 0x%x\n", tmp);
1086 		return (EIO);		/* XXX needs translation */
1087 	}
1088 	else if (tmp == E2FS_REV0)
1089 		return (0);
1090 
1091 	tmp = letoh32(fs->e2fs_first_ino);
1092 	if (tmp != EXT2_FIRSTINO) {
1093 		printf("ext2fs: first inode at 0x%x\n", tmp);
1094 		return (EINVAL);      /* XXX needs translation */
1095 	}
1096 
1097 	tmp = letoh32(fs->e2fs_features_incompat);
1098 	if (tmp & ~(EXT2F_INCOMPAT_SUPP | EXT4F_RO_INCOMPAT_SUPP)) {
1099 		printf("ext2fs: unsupported incompat features 0x%x\n", tmp);
1100 		return (EINVAL);      /* XXX needs translation */
1101 	}
1102 
1103 	if (!ronly && (tmp & EXT4F_RO_INCOMPAT_SUPP)) {
1104 		printf("ext4fs: only read-only support right now\n");
1105 		return (EROFS);      /* XXX needs translation */
1106 	}
1107 
1108 	if (tmp & EXT2F_INCOMPAT_RECOVER) {
1109 		printf("ext2fs: your file system says it needs recovery\n");
1110 		if (!ronly)
1111 			return (EROFS);	/* XXX needs translation */
1112 	}
1113 
1114 	tmp = letoh32(fs->e2fs_features_rocompat);
1115 	if (!ronly && (tmp & ~EXT2F_ROCOMPAT_SUPP)) {
1116 		printf("ext2fs: unsupported R/O compat features 0x%x\n", tmp);
1117 		return (EROFS);      /* XXX needs translation */
1118 	}
1119 
1120 	return (0);
1121 }
1122