xref: /openbsd-src/sys/ufs/ext2fs/ext2fs_vfsops.c (revision d13be5d47e4149db2549a9828e244d59dbc43f15)
1 /*	$OpenBSD: ext2fs_vfsops.c,v 1.64 2011/07/04 20:35:35 deraadt Exp $	*/
2 /*	$NetBSD: ext2fs_vfsops.c,v 1.1 1997/06/11 09:34:07 bouyer Exp $	*/
3 
4 /*
5  * Copyright (c) 1997 Manuel Bouyer.
6  * Copyright (c) 1989, 1991, 1993, 1994
7  *	The Regents of the University of California.  All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *	notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *	notice, this list of conditions and the following disclaimer in the
16  *	documentation and/or other materials provided with the distribution.
17  * 3. Neither the name of the University nor the names of its contributors
18  *	may be used to endorse or promote products derived from this software
19  *	without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)ffs_vfsops.c	8.14 (Berkeley) 11/28/94
34  * Modified for ext2fs by Manuel Bouyer.
35  */
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/namei.h>
40 #include <sys/proc.h>
41 #include <sys/kernel.h>
42 #include <sys/vnode.h>
43 #include <sys/socket.h>
44 #include <sys/mount.h>
45 #include <sys/buf.h>
46 #include <sys/device.h>
47 #include <sys/disk.h>
48 #include <sys/mbuf.h>
49 #include <sys/file.h>
50 #include <sys/disklabel.h>
51 #include <sys/ioctl.h>
52 #include <sys/errno.h>
53 #include <sys/malloc.h>
54 #include <sys/pool.h>
55 #include <sys/lock.h>
56 #include <sys/dkio.h>
57 #include <sys/specdev.h>
58 
59 #include <ufs/ufs/quota.h>
60 #include <ufs/ufs/ufsmount.h>
61 #include <ufs/ufs/inode.h>
62 #include <ufs/ufs/dir.h>
63 #include <ufs/ufs/ufs_extern.h>
64 
65 #include <ufs/ext2fs/ext2fs.h>
66 #include <ufs/ext2fs/ext2fs_extern.h>
67 
68 extern struct lock ufs_hashlock;
69 
70 int ext2fs_sbupdate(struct ufsmount *, int);
71 static int ext2fs_checksb(struct ext2fs *, int);
72 
73 const struct vfsops ext2fs_vfsops = {
74 	ext2fs_mount,
75 	ufs_start,
76 	ext2fs_unmount,
77 	ufs_root,
78 	ufs_quotactl,
79 	ext2fs_statfs,
80 	ext2fs_sync,
81 	ext2fs_vget,
82 	ext2fs_fhtovp,
83 	ext2fs_vptofh,
84 	ext2fs_init,
85 	ext2fs_sysctl,
86 	ufs_check_export
87 };
88 
89 struct pool ext2fs_inode_pool;
90 struct pool ext2fs_dinode_pool;
91 
92 extern u_long ext2gennumber;
93 
94 int
95 ext2fs_init(struct vfsconf *vfsp)
96 {
97 	pool_init(&ext2fs_inode_pool, sizeof(struct inode), 0, 0, 0,
98 	    "ext2inopl", &pool_allocator_nointr);
99 	pool_init(&ext2fs_dinode_pool, sizeof(struct ext2fs_dinode), 0, 0, 0,
100 	    "ext2dinopl", &pool_allocator_nointr);
101 
102 	return (ufs_init(vfsp));
103 }
104 
105 /*
106  * Called by main() when ext2fs is going to be mounted as root.
107  *
108  * Name is updated by mount(8) after booting.
109  */
110 #define ROOTNAME	"root_device"
111 
112 int
113 ext2fs_mountroot(void)
114 {
115 	struct m_ext2fs *fs;
116         struct mount *mp;
117 	struct proc *p = curproc;	/* XXX */
118 	struct ufsmount *ump;
119 	int error;
120 
121 	/*
122 	 * Get vnodes for swapdev and rootdev.
123 	 */
124 	if (bdevvp(swapdev, &swapdev_vp) || bdevvp(rootdev, &rootvp))
125 		panic("ext2fs_mountroot: can't setup bdevvp's");
126 
127 	if ((error = vfs_rootmountalloc("ext2fs", "root_device", &mp)) != 0) {
128 		vrele(rootvp);
129 		return (error);
130 	}
131 
132 	if ((error = ext2fs_mountfs(rootvp, mp, p)) != 0) {
133 		mp->mnt_vfc->vfc_refcount--;
134 		vfs_unbusy(mp);
135 		free(mp, M_MOUNT);
136 		vrele(rootvp);
137 		return (error);
138 	}
139 
140 	CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
141 	ump = VFSTOUFS(mp);
142 	fs = ump->um_e2fs;
143 	bzero(fs->e2fs_fsmnt, sizeof(fs->e2fs_fsmnt));
144 	(void)copystr(mp->mnt_stat.f_mntonname, fs->e2fs_fsmnt,
145 	    sizeof(fs->e2fs_fsmnt) - 1, NULL);
146 	if (fs->e2fs.e2fs_rev > E2FS_REV0) {
147 		bzero(fs->e2fs.e2fs_fsmnt, sizeof(fs->e2fs.e2fs_fsmnt));
148 		(void)copystr(mp->mnt_stat.f_mntonname, fs->e2fs.e2fs_fsmnt,
149 		    sizeof(fs->e2fs.e2fs_fsmnt) - 1, NULL);
150 	}
151 	(void)ext2fs_statfs(mp, &mp->mnt_stat, p);
152 	vfs_unbusy(mp);
153 	inittodr(fs->e2fs.e2fs_wtime);
154 	return (0);
155 }
156 
157 /*
158  * VFS Operations.
159  *
160  * mount system call
161  */
162 int
163 ext2fs_mount(struct mount *mp, const char *path, void *data,
164     struct nameidata *ndp, struct proc *p)
165 {
166 	struct vnode *devvp;
167 	struct ufs_args args;
168 	struct ufsmount *ump = NULL;
169 	struct m_ext2fs *fs;
170 	size_t size;
171 	int error, flags;
172 	mode_t accessmode;
173 	char *fspec = NULL;
174 
175 	error = copyin(data, (caddr_t)&args, sizeof (struct ufs_args));
176 	if (error)
177 		return (error);
178 
179 	/*
180 	 * If updating, check whether changing from read-only to
181 	 * read/write; if there is no device name, that's all we do.
182 	 */
183 	if (mp->mnt_flag & MNT_UPDATE) {
184 		ump = VFSTOUFS(mp);
185 		fs = ump->um_e2fs;
186 		if (fs->e2fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
187 			flags = WRITECLOSE;
188 			if (mp->mnt_flag & MNT_FORCE)
189 				flags |= FORCECLOSE;
190 			error = ext2fs_flushfiles(mp, flags, p);
191 			if (error == 0 &&
192 			    ext2fs_cgupdate(ump, MNT_WAIT) == 0 &&
193 			    (fs->e2fs.e2fs_state & E2FS_ERRORS) == 0) {
194 				fs->e2fs.e2fs_state = E2FS_ISCLEAN;
195 				(void)ext2fs_sbupdate(ump, MNT_WAIT);
196 			}
197 			if (error)
198 				return (error);
199 			fs->e2fs_ronly = 1;
200 		}
201 		if (mp->mnt_flag & MNT_RELOAD) {
202 			error = ext2fs_reload(mp, ndp->ni_cnd.cn_cred, p);
203 			if (error)
204 				return (error);
205 		}
206 		if (fs->e2fs_ronly && (mp->mnt_flag & MNT_WANTRDWR)) {
207 			/*
208 			 * If upgrade to read-write by non-root, then verify
209 			 * that user has necessary permissions on the device.
210 			 */
211 			if (suser(p, 0) != 0) {
212 				devvp = ump->um_devvp;
213 				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
214 				error = VOP_ACCESS(devvp, VREAD | VWRITE,
215 				    p->p_ucred, p);
216 				VOP_UNLOCK(devvp, 0, p);
217 				if (error)
218 					return (error);
219 			}
220 			fs->e2fs_ronly = 0;
221 			if (fs->e2fs.e2fs_state == E2FS_ISCLEAN)
222 				fs->e2fs.e2fs_state = 0;
223 			else
224 				fs->e2fs.e2fs_state = E2FS_ERRORS;
225 			fs->e2fs_fmod = 1;
226 		}
227 		if (args.fspec == 0) {
228 			/*
229 			 * Process export requests.
230 			 */
231 			return (vfs_export(mp, &ump->um_export,
232 			    &args.export_info));
233 		}
234 	}
235 	/*
236 	 * Not an update, or updating the name: look up the name
237 	 * and verify that it refers to a sensible block device.
238 	 */
239 	fspec = malloc(MNAMELEN, M_MOUNT, M_WAITOK);
240 	error = copyinstr(args.fspec, fspec, MNAMELEN - 1, &size);
241 	if (error)
242 		goto error;
243 	disk_map(fspec, fspec, MNAMELEN, DM_OPENBLCK);
244 
245 	NDINIT(ndp, LOOKUP, FOLLOW, UIO_SYSSPACE, fspec, p);
246 	if ((error = namei(ndp)) != 0)
247 		goto error;
248 	devvp = ndp->ni_vp;
249 
250 	if (devvp->v_type != VBLK) {
251 		error = ENOTBLK;
252 		goto error_devvp;
253 	}
254 	if (major(devvp->v_rdev) >= nblkdev) {
255 		error = ENXIO;
256 		goto error_devvp;
257 	}
258 	/*
259 	 * If mount by non-root, then verify that user has necessary
260 	 * permissions on the device.
261 	 */
262 	if (suser(p, 0) != 0) {
263 		accessmode = VREAD;
264 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
265 			accessmode |= VWRITE;
266 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
267 		error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p);
268 		VOP_UNLOCK(devvp, 0, p);
269 		if (error)
270 			goto error_devvp;
271 	}
272 	if ((mp->mnt_flag & MNT_UPDATE) == 0)
273 		error = ext2fs_mountfs(devvp, mp, p);
274 	else {
275 		if (devvp != ump->um_devvp)
276 			error = EINVAL;	/* XXX needs translation */
277 		else
278 			vrele(devvp);
279 	}
280 	if (error)
281 		goto error_devvp;
282 	ump = VFSTOUFS(mp);
283 	fs = ump->um_e2fs;
284 	(void)copyinstr(path, fs->e2fs_fsmnt, sizeof(fs->e2fs_fsmnt) - 1,
285 	    &size);
286 	bzero(fs->e2fs_fsmnt + size, sizeof(fs->e2fs_fsmnt) - size);
287 	if (fs->e2fs.e2fs_rev > E2FS_REV0) {
288 		(void)copystr(mp->mnt_stat.f_mntonname, fs->e2fs.e2fs_fsmnt,
289 		    sizeof(fs->e2fs.e2fs_fsmnt) - 1, &size);
290 		bzero(fs->e2fs.e2fs_fsmnt, sizeof(fs->e2fs.e2fs_fsmnt) - size);
291 	}
292 	bcopy(fs->e2fs_fsmnt, mp->mnt_stat.f_mntonname, MNAMELEN);
293 	size = strlcpy(mp->mnt_stat.f_mntfromname, fspec, MNAMELEN - 1);
294 	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
295 	if (fs->e2fs_fmod != 0) {	/* XXX */
296 		fs->e2fs_fmod = 0;
297 		if (fs->e2fs.e2fs_state == 0)
298 			fs->e2fs.e2fs_wtime = time_second;
299 		else
300 			printf("%s: file system not clean; please fsck(8)\n",
301 				mp->mnt_stat.f_mntfromname);
302 		(void)ext2fs_cgupdate(ump, MNT_WAIT);
303 	}
304 
305 	goto success;
306 
307 error_devvp:
308 	/* Error with devvp held. */
309 	vrele(devvp);
310 
311 error:
312 	/* Error with no state to backout. */
313 
314 success:
315 	if (fspec)
316 		free(fspec, M_MOUNT);
317 
318 	return (error);
319 }
320 
321 int ext2fs_reload_vnode(struct vnode *, void *args);
322 
323 struct ext2fs_reload_args {
324 	struct m_ext2fs *fs;
325 	struct proc *p;
326 	struct ucred *cred;
327 	struct vnode *devvp;
328 };
329 
330 int
331 ext2fs_reload_vnode(struct vnode *vp, void *args)
332 {
333 	struct ext2fs_reload_args *era = args;
334 	struct buf *bp;
335 	struct inode *ip;
336 	int error;
337 	caddr_t cp;
338 
339 	/*
340 	 * Step 4: invalidate all inactive vnodes.
341 	 */
342 	if (vp->v_usecount == 0) {
343 		vgonel(vp, era->p);
344 		return (0);
345 	}
346 
347 	/*
348 	 * Step 5: invalidate all cached file data.
349 	 */
350 	if (vget(vp, LK_EXCLUSIVE, era->p))
351 		return (0);
352 
353 	if (vinvalbuf(vp, 0, era->cred, era->p, 0, 0))
354 		panic("ext2fs_reload: dirty2");
355 	/*
356 	 * Step 6: re-read inode data for all active vnodes.
357 	 */
358 	ip = VTOI(vp);
359 	error = bread(era->devvp,
360 	    fsbtodb(era->fs, ino_to_fsba(era->fs, ip->i_number)),
361 	    (int)era->fs->e2fs_bsize, &bp);
362 	if (error) {
363 		vput(vp);
364 		return (error);
365 	}
366 	cp = (caddr_t)bp->b_data +
367 	    (ino_to_fsbo(era->fs, ip->i_number) * EXT2_DINODE_SIZE(era->fs));
368 	e2fs_iload((struct ext2fs_dinode *)cp, ip->i_e2din);
369 	brelse(bp);
370 	vput(vp);
371 	return (0);
372 }
373 
374 /*
375  * Reload all incore data for a filesystem (used after running fsck on
376  * the root filesystem and finding things to fix). The filesystem must
377  * be mounted read-only.
378  *
379  * Things to do to update the mount:
380  *	1) invalidate all cached meta-data.
381  *	2) re-read superblock from disk.
382  *	3) re-read summary information from disk.
383  *	4) invalidate all inactive vnodes.
384  *	5) invalidate all cached file data.
385  *	6) re-read inode data for all active vnodes.
386  */
387 int
388 ext2fs_reload(struct mount *mountp, struct ucred *cred, struct proc *p)
389 {
390 	struct vnode *devvp;
391 	struct buf *bp;
392 	struct m_ext2fs *fs;
393 	struct ext2fs *newfs;
394 	int i, error;
395 	struct ext2fs_reload_args era;
396 
397 	if ((mountp->mnt_flag & MNT_RDONLY) == 0)
398 		return (EINVAL);
399 	/*
400 	 * Step 1: invalidate all cached meta-data.
401 	 */
402 	devvp = VFSTOUFS(mountp)->um_devvp;
403 	if (vinvalbuf(devvp, 0, cred, p, 0, 0))
404 		panic("ext2fs_reload: dirty1");
405 
406 	/*
407 	 * Step 2: re-read superblock from disk.
408 	 */
409 	error = bread(devvp, (daddr64_t)(SBOFF / DEV_BSIZE), SBSIZE, &bp);
410 	if (error) {
411 		brelse(bp);
412 		return (error);
413 	}
414 	newfs = (struct ext2fs *)bp->b_data;
415 	error = ext2fs_checksb(newfs, (mountp->mnt_flag & MNT_RDONLY) != 0);
416 	if (error) {
417 		brelse(bp);
418 		return (error);
419 	}
420 
421 	fs = VFSTOUFS(mountp)->um_e2fs;
422 	/*
423 	 * copy in new superblock, and compute in-memory values
424 	 */
425 	e2fs_sbload(newfs, &fs->e2fs);
426 	fs->e2fs_ncg =
427 	    howmany(fs->e2fs.e2fs_bcount - fs->e2fs.e2fs_first_dblock,
428 	    fs->e2fs.e2fs_bpg);
429 	/* XXX assume hw bsize = 512 */
430 	fs->e2fs_fsbtodb = fs->e2fs.e2fs_log_bsize + 1;
431 	fs->e2fs_bsize = 1024 << fs->e2fs.e2fs_log_bsize;
432 	fs->e2fs_bshift = LOG_MINBSIZE + fs->e2fs.e2fs_log_bsize;
433 	fs->e2fs_qbmask = fs->e2fs_bsize - 1;
434 	fs->e2fs_bmask = ~fs->e2fs_qbmask;
435 	fs->e2fs_ngdb = howmany(fs->e2fs_ncg,
436 	    fs->e2fs_bsize / sizeof(struct ext2_gd));
437 	fs->e2fs_ipb = fs->e2fs_bsize / EXT2_DINODE_SIZE(fs);
438 	fs->e2fs_itpg = fs->e2fs.e2fs_ipg/fs->e2fs_ipb;
439 
440 	/*
441 	 * Step 3: re-read summary information from disk.
442 	 */
443 
444 	for (i=0; i < fs->e2fs_ngdb; i++) {
445 		error = bread(devvp ,
446 		    fsbtodb(fs, ((fs->e2fs_bsize>1024)? 0 : 1) + i + 1),
447 		    fs->e2fs_bsize, &bp);
448 		if (error) {
449 			brelse(bp);
450 			return (error);
451 		}
452 		e2fs_cgload((struct ext2_gd*)bp->b_data,
453 		    &fs->e2fs_gd[i* fs->e2fs_bsize / sizeof(struct ext2_gd)],
454 		    fs->e2fs_bsize);
455 		brelse(bp);
456 	}
457 
458 	era.p = p;
459 	era.cred = cred;
460 	era.fs = fs;
461 	era.devvp = devvp;
462 
463 	error = vfs_mount_foreach_vnode(mountp, ext2fs_reload_vnode, &era);
464 
465 	return (error);
466 }
467 
468 /*
469  * Common code for mount and mountroot
470  */
471 int
472 ext2fs_mountfs(struct vnode *devvp, struct mount *mp, struct proc *p)
473 {
474 	struct ufsmount *ump;
475 	struct buf *bp;
476 	struct ext2fs *fs;
477 	struct m_ext2fs *m_fs;
478 	dev_t dev;
479 	int error, i, ronly;
480 	struct ucred *cred;
481 
482 	dev = devvp->v_rdev;
483 	cred = p ? p->p_ucred : NOCRED;
484 	/*
485 	 * Disallow multiple mounts of the same device.
486 	 * Disallow mounting of a device that is currently in use
487 	 * (except for root, which might share swap device for miniroot).
488 	 * Flush out any old buffers remaining from a previous use.
489 	 */
490 	if ((error = vfs_mountedon(devvp)) != 0)
491 		return (error);
492 	if (vcount(devvp) > 1 && devvp != rootvp)
493 		return (EBUSY);
494 	if ((error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0)) != 0)
495 		return (error);
496 
497 	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
498 	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p);
499 	if (error)
500 		return (error);
501 
502 	bp = NULL;
503 	ump = NULL;
504 
505 #ifdef DEBUG_EXT2
506 	printf("ext2 sb size: %d\n", sizeof(struct ext2fs));
507 #endif
508 	error = bread(devvp, (daddr64_t)(SBOFF / DEV_BSIZE), SBSIZE, &bp);
509 	if (error)
510 		goto out;
511 	fs = (struct ext2fs *)bp->b_data;
512 	error = ext2fs_checksb(fs, ronly);
513 	if (error)
514 		goto out;
515 	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
516 	ump->um_e2fs = malloc(sizeof(struct m_ext2fs), M_UFSMNT,
517 	    M_WAITOK | M_ZERO);
518 	e2fs_sbload((struct ext2fs*)bp->b_data, &ump->um_e2fs->e2fs);
519 	brelse(bp);
520 	bp = NULL;
521 	m_fs = ump->um_e2fs;
522 	m_fs->e2fs_ronly = ronly;
523 	ump->um_fstype = UM_EXT2FS;
524 
525 #ifdef DEBUG_EXT2
526 	printf("ext2 ino size %d\n", EXT2_DINODE_SIZE(m_fs));
527 #endif
528 	if (ronly == 0) {
529 		if (m_fs->e2fs.e2fs_state == E2FS_ISCLEAN)
530 			m_fs->e2fs.e2fs_state = 0;
531 		else
532 			m_fs->e2fs.e2fs_state = E2FS_ERRORS;
533 		m_fs->e2fs_fmod = 1;
534 	}
535 
536 	/* compute dynamic sb infos */
537 	m_fs->e2fs_ncg =
538 		howmany(m_fs->e2fs.e2fs_bcount - m_fs->e2fs.e2fs_first_dblock,
539 		m_fs->e2fs.e2fs_bpg);
540 	/* XXX assume hw bsize = 512 */
541 	m_fs->e2fs_fsbtodb = m_fs->e2fs.e2fs_log_bsize + 1;
542 	m_fs->e2fs_bsize = 1024 << m_fs->e2fs.e2fs_log_bsize;
543 	m_fs->e2fs_bshift = LOG_MINBSIZE + m_fs->e2fs.e2fs_log_bsize;
544 	m_fs->e2fs_qbmask = m_fs->e2fs_bsize - 1;
545 	m_fs->e2fs_bmask = ~m_fs->e2fs_qbmask;
546 	m_fs->e2fs_ngdb = howmany(m_fs->e2fs_ncg,
547 		m_fs->e2fs_bsize / sizeof(struct ext2_gd));
548 	m_fs->e2fs_ipb = m_fs->e2fs_bsize / EXT2_DINODE_SIZE(m_fs);
549 	m_fs->e2fs_itpg = m_fs->e2fs.e2fs_ipg/m_fs->e2fs_ipb;
550 
551 	m_fs->e2fs_gd = malloc(m_fs->e2fs_ngdb * m_fs->e2fs_bsize,
552 	    M_UFSMNT, M_WAITOK);
553 	for (i=0; i < m_fs->e2fs_ngdb; i++) {
554 		error = bread(devvp ,
555 		    fsbtodb(m_fs, ((m_fs->e2fs_bsize>1024)? 0 : 1) + i + 1),
556 		    m_fs->e2fs_bsize, &bp);
557 		if (error) {
558 			free(m_fs->e2fs_gd, M_UFSMNT);
559 			goto out;
560 		}
561 		e2fs_cgload((struct ext2_gd*)bp->b_data,
562 		    &m_fs->e2fs_gd[i * m_fs->e2fs_bsize
563 		    / sizeof(struct ext2_gd)],
564 		    m_fs->e2fs_bsize);
565 		brelse(bp);
566 		bp = NULL;
567 	}
568 
569 	mp->mnt_data = (qaddr_t)ump;
570 	mp->mnt_stat.f_fsid.val[0] = (long)dev;
571 	mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
572 	mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN;
573 	mp->mnt_flag |= MNT_LOCAL;
574 	ump->um_mountp = mp;
575 	ump->um_dev = dev;
576 	ump->um_devvp = devvp;
577 	ump->um_nindir = NINDIR(m_fs);
578 	ump->um_bptrtodb = m_fs->e2fs_fsbtodb;
579 	ump->um_seqinc = 1; /* no frags */
580 	devvp->v_specmountpoint = mp;
581 	return (0);
582 out:
583 	if (bp)
584 		brelse(bp);
585 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
586 	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, p);
587 	VOP_UNLOCK(devvp, 0, p);
588 	if (ump) {
589 		free(ump->um_e2fs, M_UFSMNT);
590 		free(ump, M_UFSMNT);
591 		mp->mnt_data = (qaddr_t)0;
592 	}
593 	return (error);
594 }
595 
596 /*
597  * unmount system call
598  */
599 int
600 ext2fs_unmount(struct mount *mp, int mntflags, struct proc *p)
601 {
602 	struct ufsmount *ump;
603 	struct m_ext2fs *fs;
604 	int error, flags;
605 
606 	flags = 0;
607 	if (mntflags & MNT_FORCE)
608 		flags |= FORCECLOSE;
609 	if ((error = ext2fs_flushfiles(mp, flags, p)) != 0)
610 		return (error);
611 	ump = VFSTOUFS(mp);
612 	fs = ump->um_e2fs;
613 	if (fs->e2fs_ronly == 0 &&
614 		ext2fs_cgupdate(ump, MNT_WAIT) == 0 &&
615 		(fs->e2fs.e2fs_state & E2FS_ERRORS) == 0) {
616 		fs->e2fs.e2fs_state = E2FS_ISCLEAN;
617 		(void) ext2fs_sbupdate(ump, MNT_WAIT);
618 	}
619 
620 	if (ump->um_devvp->v_type != VBAD)
621 		ump->um_devvp->v_specmountpoint = NULL;
622 	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p);
623 	error = VOP_CLOSE(ump->um_devvp, fs->e2fs_ronly ? FREAD : FREAD|FWRITE,
624 	    NOCRED, p);
625 	vput(ump->um_devvp);
626 	free(fs->e2fs_gd, M_UFSMNT);
627 	free(fs, M_UFSMNT);
628 	free(ump, M_UFSMNT);
629 	mp->mnt_data = (qaddr_t)0;
630 	mp->mnt_flag &= ~MNT_LOCAL;
631 	return (error);
632 }
633 
634 /*
635  * Flush out all the files in a filesystem.
636  */
637 int
638 ext2fs_flushfiles(struct mount *mp, int flags, struct proc *p)
639 {
640 	struct ufsmount *ump;
641 	int error;
642 
643 	ump = VFSTOUFS(mp);
644 	/*
645 	 * Flush all the files.
646 	 */
647 	if ((error = vflush(mp, NULL, flags)) != 0)
648 		return (error);
649 	/*
650 	 * Flush filesystem metadata.
651 	 */
652 	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p);
653 	error = VOP_FSYNC(ump->um_devvp, p->p_ucred, MNT_WAIT, p);
654 	VOP_UNLOCK(ump->um_devvp, 0, p);
655 	return (error);
656 }
657 
658 /*
659  * Get file system statistics.
660  */
661 int
662 ext2fs_statfs(struct mount *mp, struct statfs *sbp, struct proc *p)
663 {
664 	struct ufsmount *ump;
665 	struct m_ext2fs *fs;
666 	u_int32_t overhead, overhead_per_group;
667 	int i, ngroups;
668 
669 	ump = VFSTOUFS(mp);
670 	fs = ump->um_e2fs;
671 	if (fs->e2fs.e2fs_magic != E2FS_MAGIC)
672 		panic("ext2fs_statfs");
673 
674 	/*
675 	 * Compute the overhead (FS structures)
676 	 */
677 	overhead_per_group = 1 /* block bitmap */ + 1 /* inode bitmap */ +
678 	    fs->e2fs_itpg;
679 	overhead = fs->e2fs.e2fs_first_dblock +
680 	    fs->e2fs_ncg * overhead_per_group;
681 	if (fs->e2fs.e2fs_rev > E2FS_REV0 &&
682 	    fs->e2fs.e2fs_features_rocompat & EXT2F_ROCOMPAT_SPARSESUPER) {
683 		for (i = 0, ngroups = 0; i < fs->e2fs_ncg; i++) {
684 			if (cg_has_sb(i))
685 				ngroups++;
686 		}
687 	} else {
688 		ngroups = fs->e2fs_ncg;
689 	}
690 	overhead += ngroups * (1 + fs->e2fs_ngdb);
691 
692 	sbp->f_bsize = fs->e2fs_bsize;
693 	sbp->f_iosize = fs->e2fs_bsize;
694 	sbp->f_blocks = fs->e2fs.e2fs_bcount - overhead;
695 	sbp->f_bfree = fs->e2fs.e2fs_fbcount;
696 	sbp->f_bavail = sbp->f_bfree - fs->e2fs.e2fs_rbcount;
697 	sbp->f_files =  fs->e2fs.e2fs_icount;
698 	sbp->f_ffree = fs->e2fs.e2fs_ficount;
699 	if (sbp != &mp->mnt_stat) {
700 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
701 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
702 	}
703 	strncpy(sbp->f_fstypename, mp->mnt_vfc->vfc_name, MFSNAMELEN);
704 	return (0);
705 }
706 
707 int ext2fs_sync_vnode(struct vnode *vp, void *);
708 
709 struct ext2fs_sync_args {
710 	int allerror;
711 	int waitfor;
712 	struct proc *p;
713 	struct ucred *cred;
714 };
715 
716 int
717 ext2fs_sync_vnode(struct vnode *vp, void *args)
718 {
719 	struct ext2fs_sync_args *esa = args;
720 	struct inode *ip;
721 	int error;
722 
723 	ip = VTOI(vp);
724 	if (vp->v_type == VNON ||
725 	    ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
726 	    LIST_EMPTY(&vp->v_dirtyblkhd)) ||
727 	    esa->waitfor == MNT_LAZY) {
728 		return (0);
729 	}
730 
731 	if (vget(vp, LK_EXCLUSIVE | LK_NOWAIT, esa->p))
732 		return (0);
733 
734 	if ((error = VOP_FSYNC(vp, esa->cred, esa->waitfor, esa->p)) != 0)
735 		esa->allerror = error;
736 	vput(vp);
737 	return (0);
738 }
739 /*
740  * Go through the disk queues to initiate sandbagged IO;
741  * go through the inodes to write those that have been modified;
742  * initiate the writing of the super block if it has been modified.
743  *
744  * Should always be called with the mount point locked.
745  */
746 int
747 ext2fs_sync(struct mount *mp, int waitfor, struct ucred *cred, struct proc *p)
748 {
749 	struct ufsmount *ump = VFSTOUFS(mp);
750 	struct m_ext2fs *fs;
751 	int error, allerror = 0;
752 	struct ext2fs_sync_args esa;
753 
754 	fs = ump->um_e2fs;
755 	if (fs->e2fs_ronly != 0) {		/* XXX */
756 		printf("fs = %s\n", fs->e2fs_fsmnt);
757 		panic("update: rofs mod");
758 	}
759 
760 	/*
761 	 * Write back each (modified) inode.
762 	 */
763 	esa.p = p;
764 	esa.cred = cred;
765 	esa.allerror = 0;
766 	esa.waitfor = waitfor;
767 
768 	vfs_mount_foreach_vnode(mp, ext2fs_sync_vnode, &esa);
769 	if (esa.allerror != 0)
770 		allerror = esa.allerror;
771 
772 	/*
773 	 * Force stale file system control information to be flushed.
774 	 */
775 	if (waitfor != MNT_LAZY) {
776 		vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p);
777 		if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p)) != 0)
778 			allerror = error;
779 		VOP_UNLOCK(ump->um_devvp, 0, p);
780 	}
781 	/*
782 	 * Write back modified superblock.
783 	 */
784 	if (fs->e2fs_fmod != 0) {
785 		fs->e2fs_fmod = 0;
786 		fs->e2fs.e2fs_wtime = time_second;
787 		if ((error = ext2fs_cgupdate(ump, waitfor)))
788 			allerror = error;
789 	}
790 	return (allerror);
791 }
792 
793 /*
794  * Look up a EXT2FS dinode number to find its incore vnode, otherwise read it
795  * in from disk.  If it is in core, wait for the lock bit to clear, then
796  * return the inode locked.  Detection and handling of mount points must be
797  * done by the calling routine.
798  */
799 int
800 ext2fs_vget(struct mount *mp, ino_t ino, struct vnode **vpp)
801 {
802 	struct m_ext2fs *fs;
803 	struct inode *ip;
804 	struct ext2fs_dinode *dp;
805 	struct ufsmount *ump;
806 	struct buf *bp;
807 	struct vnode *vp;
808 	dev_t dev;
809 	int error;
810 
811 	ump = VFSTOUFS(mp);
812 	dev = ump->um_dev;
813 
814  retry:
815 	if ((*vpp = ufs_ihashget(dev, ino)) != NULL)
816 		return (0);
817 
818 	/* Allocate a new vnode/inode. */
819 	if ((error = getnewvnode(VT_EXT2FS, mp, &ext2fs_vops, &vp)) != 0) {
820 		*vpp = NULL;
821 		return (error);
822 	}
823 
824 	ip = pool_get(&ext2fs_inode_pool, PR_WAITOK|PR_ZERO);
825 	lockinit(&ip->i_lock, PINOD, "inode", 0, 0);
826 	vp->v_data = ip;
827 	ip->i_vnode = vp;
828 	ip->i_ump = ump;
829 	ip->i_e2fs = fs = ump->um_e2fs;
830 	ip->i_dev = dev;
831 	ip->i_number = ino;
832 	ip->i_e2fs_last_lblk = 0;
833 	ip->i_e2fs_last_blk = 0;
834 
835 	/*
836 	 * Put it onto its hash chain and lock it so that other requests for
837 	 * this inode will block if they arrive while we are sleeping waiting
838 	 * for old data structures to be purged or for the contents of the
839 	 * disk portion of this inode to be read.
840 	 */
841 	error = ufs_ihashins(ip);
842 
843 	if (error) {
844 		vrele(vp);
845 
846 		if (error == EEXIST)
847 			goto retry;
848 
849 		return (error);
850 	}
851 
852 	/* Read in the disk contents for the inode, copy into the inode. */
853 	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
854 	    (int)fs->e2fs_bsize, &bp);
855 	if (error) {
856 		/*
857 		 * The inode does not contain anything useful, so it would
858 	 	 * be misleading to leave it on its hash chain. With mode
859 		 * still zero, it will be unlinked and returned to the free
860 		 * list by vput().
861 		 */
862 		vput(vp);
863 		brelse(bp);
864 		*vpp = NULL;
865 		return (error);
866 	}
867 
868 	dp = (struct ext2fs_dinode *) ((char *)bp->b_data
869 	    + EXT2_DINODE_SIZE(fs) * ino_to_fsbo(fs, ino));
870 
871 	ip->i_e2din = pool_get(&ext2fs_dinode_pool, PR_WAITOK);
872 	e2fs_iload(dp, ip->i_e2din);
873 	brelse(bp);
874 
875 	ip->i_effnlink = ip->i_e2fs_nlink;
876 
877 	/*
878 	 * The fields for storing the UID and GID of an ext2fs inode are
879 	 * limited to 16 bits. To overcome this limitation, Linux decided to
880 	 * scatter the highest bits of these values into a previously reserved
881 	 * area on the disk inode. We deal with this situation by having two
882 	 * 32-bit fields *out* of the disk inode to hold the complete values.
883 	 * Now that we are reading in the inode, compute these fields.
884 	 */
885 	ip->i_e2fs_uid = ip->i_e2fs_uid_low | (ip->i_e2fs_uid_high << 16);
886 	ip->i_e2fs_gid = ip->i_e2fs_gid_low | (ip->i_e2fs_gid_high << 16);
887 
888 	/* If the inode was deleted, reset all fields */
889 	if (ip->i_e2fs_dtime != 0) {
890 		ip->i_e2fs_mode = ip->i_e2fs_nblock = 0;
891 		(void)ext2fs_setsize(ip, 0);
892 	}
893 
894 	/*
895 	 * Initialize the vnode from the inode, check for aliases.
896 	 * Note that the underlying vnode may have changed.
897 	 */
898 	error = ext2fs_vinit(mp, &ext2fs_specvops, EXT2FS_FIFOOPS, &vp);
899 	if (error) {
900 		vput(vp);
901 		*vpp = NULL;
902 		return (error);
903 	}
904 
905 	/*
906 	 * Finish inode initialization now that aliasing has been resolved.
907 	 */
908 	vref(ip->i_devvp);
909 	/*
910 	 * Set up a generation number for this inode if it does not
911 	 * already have one. This should only happen on old filesystems.
912 	 */
913 	if (ip->i_e2fs_gen == 0) {
914 		if (++ext2gennumber < (u_long)time_second)
915 			ext2gennumber = time_second;
916 		ip->i_e2fs_gen = ext2gennumber;
917 		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
918 			ip->i_flag |= IN_MODIFIED;
919 	}
920 
921 	*vpp = vp;
922 	return (0);
923 }
924 
925 /*
926  * File handle to vnode
927  *
928  * Have to be really careful about stale file handles:
929  * - check that the inode number is valid
930  * - call ext2fs_vget() to get the locked inode
931  * - check for an unallocated inode (i_mode == 0)
932  * - check that the given client host has export rights and return
933  *   those rights via. exflagsp and credanonp
934  */
935 int
936 ext2fs_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp)
937 {
938 	struct inode *ip;
939 	struct vnode *nvp;
940 	int error;
941 	struct ufid *ufhp;
942 	struct m_ext2fs *fs;
943 
944 	ufhp = (struct ufid *)fhp;
945 	fs = VFSTOUFS(mp)->um_e2fs;
946 	if ((ufhp->ufid_ino < EXT2_FIRSTINO && ufhp->ufid_ino != EXT2_ROOTINO) ||
947 	    ufhp->ufid_ino > fs->e2fs_ncg * fs->e2fs.e2fs_ipg)
948 		return (ESTALE);
949 
950 	if ((error = VFS_VGET(mp, ufhp->ufid_ino, &nvp)) != 0) {
951 		*vpp = NULLVP;
952 		return (error);
953 	}
954 	ip = VTOI(nvp);
955 	if (ip->i_e2fs_mode == 0 || ip->i_e2fs_dtime != 0 ||
956 	    ip->i_e2fs_gen != ufhp->ufid_gen) {
957 		vput(nvp);
958 		*vpp = NULLVP;
959 		return (ESTALE);
960 	}
961 	*vpp = nvp;
962 	return (0);
963 }
964 
965 /*
966  * Vnode pointer to File handle
967  */
968 /* ARGSUSED */
969 int
970 ext2fs_vptofh(struct vnode *vp, struct fid *fhp)
971 {
972 	struct inode *ip;
973 	struct ufid *ufhp;
974 
975 	ip = VTOI(vp);
976 	ufhp = (struct ufid *)fhp;
977 	ufhp->ufid_len = sizeof(struct ufid);
978 	ufhp->ufid_ino = ip->i_number;
979 	ufhp->ufid_gen = ip->i_e2fs_gen;
980 	return (0);
981 }
982 
983 /*
984  * no sysctl for ext2fs
985  */
986 
987 int
988 ext2fs_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
989     void *newp, size_t newlen, struct proc *p)
990 {
991 	return (EOPNOTSUPP);
992 }
993 
994 /*
995  * Write a superblock and associated information back to disk.
996  */
997 int
998 ext2fs_sbupdate(struct ufsmount *mp, int waitfor)
999 {
1000 	struct m_ext2fs *fs = mp->um_e2fs;
1001 	struct buf *bp;
1002 	int error = 0;
1003 
1004 	bp = getblk(mp->um_devvp, SBLOCK, SBSIZE, 0, 0);
1005 	e2fs_sbsave(&fs->e2fs, (struct ext2fs *) bp->b_data);
1006 	if (waitfor == MNT_WAIT)
1007 		error = bwrite(bp);
1008 	else
1009 		bawrite(bp);
1010 	return (error);
1011 }
1012 
1013 int
1014 ext2fs_cgupdate(struct ufsmount *mp, int waitfor)
1015 {
1016 	struct m_ext2fs *fs = mp->um_e2fs;
1017 	struct buf *bp;
1018 	int i, error = 0, allerror = 0;
1019 
1020 	allerror = ext2fs_sbupdate(mp, waitfor);
1021 	for (i = 0; i < fs->e2fs_ngdb; i++) {
1022 		bp = getblk(mp->um_devvp, fsbtodb(fs, ((fs->e2fs_bsize>1024)?0:1)+i+1),
1023 		    fs->e2fs_bsize, 0, 0);
1024 		e2fs_cgsave(&fs->e2fs_gd[i* fs->e2fs_bsize / sizeof(struct ext2_gd)], (struct ext2_gd*)bp->b_data, fs->e2fs_bsize);
1025 		if (waitfor == MNT_WAIT)
1026 			error = bwrite(bp);
1027 		else
1028 			bawrite(bp);
1029 	}
1030 
1031 	if (!allerror && error)
1032 		allerror = error;
1033 	return (allerror);
1034 }
1035 
1036 static int
1037 ext2fs_checksb(struct ext2fs *fs, int ronly)
1038 {
1039 	if (fs2h16(fs->e2fs_magic) != E2FS_MAGIC) {
1040 		return (EIO);		/* XXX needs translation */
1041 	}
1042 	if (fs2h32(fs->e2fs_rev) > E2FS_REV1) {
1043 #ifdef DIAGNOSTIC
1044 		printf("Ext2 fs: unsupported revision number: %x\n",
1045 		    fs2h32(fs->e2fs_rev));
1046 #endif
1047 		return (EIO);		/* XXX needs translation */
1048 	}
1049 	if (fs2h32(fs->e2fs_log_bsize) > 2) { /* block size = 1024|2048|4096 */
1050 #ifdef DIAGNOSTIC
1051 		printf("Ext2 fs: bad block size: %d (expected <=2 for ext2 fs)\n",
1052 		    fs2h32(fs->e2fs_log_bsize));
1053 #endif
1054 		return (EIO);	   /* XXX needs translation */
1055 	}
1056 	if (fs2h32(fs->e2fs_rev) > E2FS_REV0) {
1057 		if (fs2h32(fs->e2fs_first_ino) != EXT2_FIRSTINO) {
1058 			printf("Ext2 fs: unsupported first inode position");
1059 			return (EINVAL);      /* XXX needs translation */
1060 		}
1061 		if (fs2h32(fs->e2fs_features_incompat) &
1062 		    ~EXT2F_INCOMPAT_SUPP) {
1063 			printf("Ext2 fs: unsupported optional feature\n");
1064 			return (EINVAL);      /* XXX needs translation */
1065 		}
1066 		if (!ronly && fs2h32(fs->e2fs_features_rocompat) &
1067 		    ~EXT2F_ROCOMPAT_SUPP) {
1068 			return (EROFS);      /* XXX needs translation */
1069 		}
1070 	}
1071 	return (0);
1072 }
1073