xref: /openbsd-src/sys/ufs/ext2fs/ext2fs_vfsops.c (revision b2ea75c1b17e1a9a339660e7ed45cd24946b230e)
1 /*	$OpenBSD: ext2fs_vfsops.c,v 1.14 2001/03/04 06:32:41 csapuntz Exp $	*/
2 /*	$NetBSD: ext2fs_vfsops.c,v 1.1 1997/06/11 09:34:07 bouyer Exp $	*/
3 
4 /*
5  * Copyright (c) 1997 Manuel Bouyer.
6  * Copyright (c) 1989, 1991, 1993, 1994
7  *	The Regents of the University of California.  All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *	notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *	notice, this list of conditions and the following disclaimer in the
16  *	documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *	must display the following acknowledgement:
19  *	This product includes software developed by the University of
20  *	California, Berkeley and its contributors.
21  * 4. Neither the name of the University nor the names of its contributors
22  *	may be used to endorse or promote products derived from this software
23  *	without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	@(#)ffs_vfsops.c	8.14 (Berkeley) 11/28/94
38  * Modified for ext2fs by Manuel Bouyer.
39  */
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/namei.h>
44 #include <sys/proc.h>
45 #include <sys/kernel.h>
46 #include <sys/vnode.h>
47 #include <sys/socket.h>
48 #include <sys/mount.h>
49 #include <sys/buf.h>
50 #include <sys/device.h>
51 #include <sys/mbuf.h>
52 #include <sys/file.h>
53 #include <sys/disklabel.h>
54 #include <sys/ioctl.h>
55 #include <sys/errno.h>
56 #include <sys/malloc.h>
57 #include <sys/pool.h>
58 #include <sys/lock.h>
59 
60 #include <miscfs/specfs/specdev.h>
61 
62 #include <ufs/ufs/quota.h>
63 #include <ufs/ufs/ufsmount.h>
64 #include <ufs/ufs/inode.h>
65 #include <ufs/ufs/dir.h>
66 #include <ufs/ufs/ufs_extern.h>
67 
68 #include <ufs/ext2fs/ext2fs.h>
69 #include <ufs/ext2fs/ext2fs_extern.h>
70 
71 extern struct lock ufs_hashlock;
72 
73 int ext2fs_sbupdate __P((struct ufsmount *, int));
74 static int ext2fs_checksb __P((struct ext2fs *, int));
75 
76 extern struct vnodeopv_desc ext2fs_vnodeop_opv_desc;
77 extern struct vnodeopv_desc ext2fs_specop_opv_desc;
78 extern struct vnodeopv_desc ext2fs_fifoop_opv_desc;
79 
80 struct vnodeopv_desc *ext2fs_vnodeopv_descs[] = {
81 	&ext2fs_vnodeop_opv_desc,
82 	&ext2fs_specop_opv_desc,
83 	&ext2fs_fifoop_opv_desc,
84 	NULL,
85 };
86 
87 struct vfsops ext2fs_vfsops = {
88 	ext2fs_mount,
89 	ufs_start,
90 	ext2fs_unmount,
91 	ufs_root,
92 	ufs_quotactl,
93 	ext2fs_statfs,
94 	ext2fs_sync,
95 	ext2fs_vget,
96 	ext2fs_fhtovp,
97 	ext2fs_vptofh,
98 	ext2fs_init,
99 	ext2fs_sysctl,
100 	ufs_check_export
101 };
102 
103 struct pool ext2fs_inode_pool;
104 
105 extern u_long ext2gennumber;
106 
107 /*
108  * Called by main() when ext2fs is going to be mounted as root.
109  *
110  * Name is updated by mount(8) after booting.
111  */
112 #define ROOTNAME	"root_device"
113 
114 int
115 ext2fs_mountroot()
116 {
117 	extern struct vnode *rootvp;
118 	register struct m_ext2fs *fs;
119         struct mount *mp;
120 	struct proc *p = curproc;	/* XXX */
121 	struct ufsmount *ump;
122 	int error;
123 
124 	/*
125 	 * Get vnodes for swapdev and rootdev.
126 	 */
127 	if (bdevvp(swapdev, &swapdev_vp) || bdevvp(rootdev, &rootvp))
128 		panic("ext2fs_mountroot: can't setup bdevvp's");
129 
130 	if ((error = vfs_rootmountalloc("ext2fs", "root_device", &mp)) != 0) {
131 		vrele(rootvp);
132 		return (error);
133 	}
134 
135 	if ((error = ext2fs_mountfs(rootvp, mp, p)) != 0) {
136 		mp->mnt_vfc->vfc_refcount--;
137 		vfs_unbusy(mp, p);
138 		free(mp, M_MOUNT);
139 		vrele(rootvp);
140 		return (error);
141 	}
142 	simple_lock(&mountlist_slock);
143 	CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
144 	simple_unlock(&mountlist_slock);
145 	ump = VFSTOUFS(mp);
146 	fs = ump->um_e2fs;
147 	bzero(fs->e2fs_fsmnt, sizeof(fs->e2fs_fsmnt));
148 	(void) copystr(mp->mnt_stat.f_mntonname, fs->e2fs_fsmnt,
149 	    sizeof(fs->e2fs_fsmnt) - 1, 0);
150 	if (fs->e2fs.e2fs_rev > E2FS_REV0) {
151 		bzero(fs->e2fs.e2fs_fsmnt, sizeof(fs->e2fs.e2fs_fsmnt));
152 		(void) copystr(mp->mnt_stat.f_mntonname, fs->e2fs.e2fs_fsmnt,
153 		    sizeof(fs->e2fs.e2fs_fsmnt) - 1, 0);
154 	}
155 	(void)ext2fs_statfs(mp, &mp->mnt_stat, p);
156 	vfs_unbusy(mp, p);
157 	inittodr(fs->e2fs.e2fs_wtime);
158 	return (0);
159 }
160 
161 /*
162  * VFS Operations.
163  *
164  * mount system call
165  */
166 int
167 ext2fs_mount(mp, path, data, ndp, p)
168 	register struct mount *mp;
169 	const char *path;
170 	void *data;
171 	struct nameidata *ndp;
172 	struct proc *p;
173 {
174 	struct vnode *devvp;
175 	struct ufs_args args;
176 	struct ufsmount *ump = NULL;
177 	register struct m_ext2fs *fs;
178 	size_t size;
179 	int error, flags;
180 	mode_t accessmode;
181 
182 	error = copyin(data, (caddr_t)&args, sizeof (struct ufs_args));
183 	if (error)
184 		return (error);
185 	/*
186 	 * If updating, check whether changing from read-only to
187 	 * read/write; if there is no device name, that's all we do.
188 	 */
189 	if (mp->mnt_flag & MNT_UPDATE) {
190 		ump = VFSTOUFS(mp);
191 		fs = ump->um_e2fs;
192 		if (fs->e2fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
193 			flags = WRITECLOSE;
194 			if (mp->mnt_flag & MNT_FORCE)
195 				flags |= FORCECLOSE;
196 			error = ext2fs_flushfiles(mp, flags, p);
197 			if (error == 0 &&
198 				ext2fs_cgupdate(ump, MNT_WAIT) == 0 &&
199 				(fs->e2fs.e2fs_state & E2FS_ERRORS) == 0) {
200 				fs->e2fs.e2fs_state = E2FS_ISCLEAN;
201 				(void) ext2fs_sbupdate(ump, MNT_WAIT);
202 			}
203 			if (error)
204 				return (error);
205 			fs->e2fs_ronly = 1;
206 		}
207 		if (mp->mnt_flag & MNT_RELOAD) {
208 			error = ext2fs_reload(mp, ndp->ni_cnd.cn_cred, p);
209 			if (error)
210 				return (error);
211 		}
212 		if (fs->e2fs_ronly && (mp->mnt_flag & MNT_WANTRDWR)) {
213 			/*
214 			 * If upgrade to read-write by non-root, then verify
215 			 * that user has necessary permissions on the device.
216 			 */
217 			if (p->p_ucred->cr_uid != 0) {
218 				devvp = ump->um_devvp;
219 				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
220 				error = VOP_ACCESS(devvp, VREAD | VWRITE,
221 						   p->p_ucred, p);
222 				if (error) {
223 					VOP_UNLOCK(devvp, 0, p);
224 					return (error);
225 				}
226 				VOP_UNLOCK(devvp, 0, p);
227 			}
228 			fs->e2fs_ronly = 0;
229 			if (fs->e2fs.e2fs_state == E2FS_ISCLEAN)
230 				fs->e2fs.e2fs_state = 0;
231 			else
232 				fs->e2fs.e2fs_state = E2FS_ERRORS;
233 			fs->e2fs_fmod = 1;
234 		}
235 		if (args.fspec == 0) {
236 			/*
237 			 * Process export requests.
238 			 */
239 			return (vfs_export(mp, &ump->um_export, &args.export));
240 		}
241 	}
242 	/*
243 	 * Not an update, or updating the name: look up the name
244 	 * and verify that it refers to a sensible block device.
245 	 */
246 	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
247 	if ((error = namei(ndp)) != 0)
248 		return (error);
249 	devvp = ndp->ni_vp;
250 
251 	if (devvp->v_type != VBLK) {
252 		vrele(devvp);
253 		return (ENOTBLK);
254 	}
255 	if (major(devvp->v_rdev) >= nblkdev) {
256 		vrele(devvp);
257 		return (ENXIO);
258 	}
259 	/*
260 	 * If mount by non-root, then verify that user has necessary
261 	 * permissions on the device.
262 	 */
263 	if (p->p_ucred->cr_uid != 0) {
264 		accessmode = VREAD;
265 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
266 			accessmode |= VWRITE;
267 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
268 		error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p);
269 		if (error) {
270 			vput(devvp);
271 			return (error);
272 		}
273 		VOP_UNLOCK(devvp, 0, p);
274 	}
275 	if ((mp->mnt_flag & MNT_UPDATE) == 0)
276 		error = ext2fs_mountfs(devvp, mp, p);
277 	else {
278 		if (devvp != ump->um_devvp)
279 			error = EINVAL;	/* XXX needs translation */
280 		else
281 			vrele(devvp);
282 	}
283 	if (error) {
284 		vrele(devvp);
285 		return (error);
286 	}
287 	ump = VFSTOUFS(mp);
288 	fs = ump->um_e2fs;
289 	(void) copyinstr(path, fs->e2fs_fsmnt, sizeof(fs->e2fs_fsmnt) - 1,
290 	    &size);
291 	bzero(fs->e2fs_fsmnt + size, sizeof(fs->e2fs_fsmnt) - size);
292 	if (fs->e2fs.e2fs_rev > E2FS_REV0) {
293 		(void) copystr(mp->mnt_stat.f_mntonname, fs->e2fs.e2fs_fsmnt,
294 		    sizeof(fs->e2fs.e2fs_fsmnt) - 1, &size);
295 		bzero(fs->e2fs.e2fs_fsmnt, sizeof(fs->e2fs.e2fs_fsmnt) - size);
296 	}
297 	bcopy(fs->e2fs_fsmnt, mp->mnt_stat.f_mntonname, MNAMELEN);
298 	(void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
299 		&size);
300 	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
301 	if (fs->e2fs_fmod != 0) {	/* XXX */
302 		fs->e2fs_fmod = 0;
303 		if (fs->e2fs.e2fs_state == 0)
304 			fs->e2fs.e2fs_wtime = time.tv_sec;
305 		else
306 			printf("%s: file system not clean; please fsck(8)\n",
307 				mp->mnt_stat.f_mntfromname);
308 		(void) ext2fs_cgupdate(ump, MNT_WAIT);
309 	}
310 	return (0);
311 }
312 
313 /*
314  * Reload all incore data for a filesystem (used after running fsck on
315  * the root filesystem and finding things to fix). The filesystem must
316  * be mounted read-only.
317  *
318  * Things to do to update the mount:
319  *	1) invalidate all cached meta-data.
320  *	2) re-read superblock from disk.
321  *	3) re-read summary information from disk.
322  *	4) invalidate all inactive vnodes.
323  *	5) invalidate all cached file data.
324  *	6) re-read inode data for all active vnodes.
325  */
326 int
327 ext2fs_reload(mountp, cred, p)
328 	register struct mount *mountp;
329 	struct ucred *cred;
330 	struct proc *p;
331 {
332 	register struct vnode *vp, *nvp, *devvp;
333 	struct inode *ip;
334 	struct buf *bp;
335 	struct m_ext2fs *fs;
336 	struct ext2fs *newfs;
337 	struct partinfo dpart;
338 	int i, size, error;
339 	caddr_t cp;
340 
341 	if ((mountp->mnt_flag & MNT_RDONLY) == 0)
342 		return (EINVAL);
343 	/*
344 	 * Step 1: invalidate all cached meta-data.
345 	 */
346 	devvp = VFSTOUFS(mountp)->um_devvp;
347 	if (vinvalbuf(devvp, 0, cred, p, 0, 0))
348 		panic("ext2fs_reload: dirty1");
349 
350 	/*
351 	 * Step 2: re-read superblock from disk.
352 	 */
353 	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p) != 0)
354 		size = DEV_BSIZE;
355 	else
356 		size = dpart.disklab->d_secsize;
357 	error = bread(devvp, (ufs_daddr_t)(SBOFF / size), SBSIZE, NOCRED, &bp);
358 	if (error) {
359 		brelse(bp);
360 		return (error);
361 	}
362 	newfs = (struct ext2fs *)bp->b_data;
363 	error = ext2fs_checksb(newfs, (mountp->mnt_flag & MNT_RDONLY) != 0);
364 	if (error) {
365 		brelse(bp);
366 		return (error);
367 	}
368 
369 	fs = VFSTOUFS(mountp)->um_e2fs;
370 	/*
371 	 * copy in new superblock, and compute in-memory values
372 	 */
373 	e2fs_sbload(newfs, &fs->e2fs);
374 	fs->e2fs_ncg =
375 	    howmany(fs->e2fs.e2fs_bcount - fs->e2fs.e2fs_first_dblock,
376 	    fs->e2fs.e2fs_bpg);
377 	/* XXX assume hw bsize = 512 */
378 	fs->e2fs_fsbtodb = fs->e2fs.e2fs_log_bsize + 1;
379 	fs->e2fs_bsize = 1024 << fs->e2fs.e2fs_log_bsize;
380 	fs->e2fs_bshift = LOG_MINBSIZE + fs->e2fs.e2fs_log_bsize;
381 	fs->e2fs_qbmask = fs->e2fs_bsize - 1;
382 	fs->e2fs_bmask = ~fs->e2fs_qbmask;
383 	fs->e2fs_ngdb = howmany(fs->e2fs_ncg,
384 			fs->e2fs_bsize / sizeof(struct ext2_gd));
385 	fs->e2fs_ipb = fs->e2fs_bsize / EXT2_DINODE_SIZE;
386 	fs->e2fs_itpg = fs->e2fs.e2fs_ipg/fs->e2fs_ipb;
387 
388 	/*
389 	 * Step 3: re-read summary information from disk.
390 	 */
391 
392 	for (i=0; i < fs->e2fs_ngdb; i++) {
393 		error = bread(devvp ,
394 		    fsbtodb(fs, ((fs->e2fs_bsize>1024)? 0 : 1) + i + 1),
395 		    fs->e2fs_bsize, NOCRED, &bp);
396 		if (error) {
397 			brelse(bp);
398 			return (error);
399 		}
400 		e2fs_cgload((struct ext2_gd*)bp->b_data,
401 		    &fs->e2fs_gd[i* fs->e2fs_bsize / sizeof(struct ext2_gd)],
402 		    fs->e2fs_bsize);
403 		brelse(bp);
404 	}
405 
406 loop:
407 	simple_lock(&mntvnode_slock);
408 	for (vp = mountp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
409 		if (vp->v_mount != mountp) {
410 			simple_unlock(&mntvnode_slock);
411 			goto loop;
412 		}
413 
414 		nvp = vp->v_mntvnodes.le_next;
415 		/*
416 		 * Step 4: invalidate all inactive vnodes.
417 		 */
418 		if (vrecycle(vp, &mntvnode_slock, p))
419 			goto loop;
420 
421 		/*
422 		 * Step 5: invalidate all cached file data.
423 		 */
424 		simple_lock(&vp->v_interlock);
425 		simple_unlock(&mntvnode_slock);
426 		if (vget(vp, LK_EXCLUSIVE  | LK_INTERLOCK, p))
427 			goto loop;
428 		if (vinvalbuf(vp, 0, cred, p, 0, 0))
429 			panic("ext2fs_reload: dirty2");
430 		/*
431 		 * Step 6: re-read inode data for all active vnodes.
432 		 */
433 		ip = VTOI(vp);
434 		error = bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
435 				  (int)fs->e2fs_bsize, NOCRED, &bp);
436 		if (error) {
437 			vput(vp);
438 			return (error);
439 		}
440 		cp = (caddr_t)bp->b_data +
441 		    (ino_to_fsbo(fs, ip->i_number) * EXT2_DINODE_SIZE);
442 		e2fs_iload((struct ext2fs_dinode *)cp, &ip->i_din.e2fs_din);
443 		brelse(bp);
444 		vput(vp);
445 		simple_lock(&mntvnode_slock);
446 	}
447 	simple_unlock(&mntvnode_slock);
448 	return (0);
449 }
450 
451 /*
452  * Common code for mount and mountroot
453  */
454 int
455 ext2fs_mountfs(devvp, mp, p)
456 	register struct vnode *devvp;
457 	struct mount *mp;
458 	struct proc *p;
459 {
460 	register struct ufsmount *ump;
461 	struct buf *bp;
462 	register struct ext2fs *fs;
463 	register struct m_ext2fs *m_fs;
464 	dev_t dev;
465 	struct partinfo dpart;
466 	int error, i, size, ronly;
467 	struct ucred *cred;
468 	extern struct vnode *rootvp;
469 
470 	dev = devvp->v_rdev;
471 	cred = p ? p->p_ucred : NOCRED;
472 	/*
473 	 * Disallow multiple mounts of the same device.
474 	 * Disallow mounting of a device that is currently in use
475 	 * (except for root, which might share swap device for miniroot).
476 	 * Flush out any old buffers remaining from a previous use.
477 	 */
478 	if ((error = vfs_mountedon(devvp)) != 0)
479 		return (error);
480 	if (vcount(devvp) > 1 && devvp != rootvp)
481 		return (EBUSY);
482 	if ((error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0)) != 0)
483 		return (error);
484 
485 	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
486 	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p);
487 	if (error)
488 		return (error);
489 	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, cred, p) != 0)
490 		size = DEV_BSIZE;
491 	else
492 		size = dpart.disklab->d_secsize;
493 
494 	bp = NULL;
495 	ump = NULL;
496 
497 #ifdef DEBUG_EXT2
498 	printf("sb size: %d ino size %d\n", sizeof(struct ext2fs),
499 	    EXT2_DINODE_SIZE);
500 #endif
501 	error = bread(devvp, (SBOFF / DEV_BSIZE), SBSIZE, cred, &bp);
502 	if (error)
503 		goto out;
504 	fs = (struct ext2fs *)bp->b_data;
505 	error = ext2fs_checksb(fs, ronly);
506 	if (error)
507 		goto out;
508 	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK);
509 	memset((caddr_t)ump, 0, sizeof *ump);
510 	ump->um_e2fs = malloc(sizeof(struct m_ext2fs), M_UFSMNT, M_WAITOK);
511 	memset((caddr_t)ump->um_e2fs, 0, sizeof(struct m_ext2fs));
512 	e2fs_sbload((struct ext2fs*)bp->b_data, &ump->um_e2fs->e2fs);
513 	brelse(bp);
514 	bp = NULL;
515 	m_fs = ump->um_e2fs;
516 	m_fs->e2fs_ronly = ronly;
517 	if (ronly == 0) {
518 		if (m_fs->e2fs.e2fs_state == E2FS_ISCLEAN)
519 			m_fs->e2fs.e2fs_state = 0;
520 		else
521 			m_fs->e2fs.e2fs_state = E2FS_ERRORS;
522 		m_fs->e2fs_fmod = 1;
523 	}
524 
525 	/* compute dynamic sb infos */
526 	m_fs->e2fs_ncg =
527 		howmany(m_fs->e2fs.e2fs_bcount - m_fs->e2fs.e2fs_first_dblock,
528 		m_fs->e2fs.e2fs_bpg);
529 	/* XXX assume hw bsize = 512 */
530 	m_fs->e2fs_fsbtodb = m_fs->e2fs.e2fs_log_bsize + 1;
531 	m_fs->e2fs_bsize = 1024 << m_fs->e2fs.e2fs_log_bsize;
532 	m_fs->e2fs_bshift = LOG_MINBSIZE + m_fs->e2fs.e2fs_log_bsize;
533 	m_fs->e2fs_qbmask = m_fs->e2fs_bsize - 1;
534 	m_fs->e2fs_bmask = ~m_fs->e2fs_qbmask;
535 	m_fs->e2fs_ngdb = howmany(m_fs->e2fs_ncg,
536 		m_fs->e2fs_bsize / sizeof(struct ext2_gd));
537 	m_fs->e2fs_ipb = m_fs->e2fs_bsize / EXT2_DINODE_SIZE;
538 	m_fs->e2fs_itpg = m_fs->e2fs.e2fs_ipg/m_fs->e2fs_ipb;
539 
540 	m_fs->e2fs_gd = malloc(m_fs->e2fs_ngdb * m_fs->e2fs_bsize,
541 		M_UFSMNT, M_WAITOK);
542 	for (i=0; i < m_fs->e2fs_ngdb; i++) {
543 		error = bread(devvp ,
544 		    fsbtodb(m_fs, ((m_fs->e2fs_bsize>1024)? 0 : 1) + i + 1),
545 		    m_fs->e2fs_bsize, NOCRED, &bp);
546 		if (error) {
547 			free(m_fs->e2fs_gd, M_UFSMNT);
548 			goto out;
549 		}
550 		e2fs_cgload((struct ext2_gd*)bp->b_data,
551 		    &m_fs->e2fs_gd[
552 			i * m_fs->e2fs_bsize / sizeof(struct ext2_gd)],
553 		    m_fs->e2fs_bsize);
554 		brelse(bp);
555 		bp = NULL;
556 	}
557 
558 	mp->mnt_data = (qaddr_t)ump;
559 	mp->mnt_stat.f_fsid.val[0] = (long)dev;
560 	mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
561 	mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN;
562 	mp->mnt_flag |= MNT_LOCAL;
563 	ump->um_mountp = mp;
564 	ump->um_dev = dev;
565 	ump->um_devvp = devvp;
566 	ump->um_nindir = NINDIR(m_fs);
567 	ump->um_bptrtodb = m_fs->e2fs_fsbtodb;
568 	ump->um_seqinc = 1; /* no frags */
569 	devvp->v_specmountpoint = mp;
570 	return (0);
571 out:
572 	if (bp)
573 		brelse(bp);
574 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
575 	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, p);
576 	VOP_UNLOCK(devvp, 0, p);
577 	if (ump) {
578 		free(ump->um_e2fs, M_UFSMNT);
579 		free(ump, M_UFSMNT);
580 		mp->mnt_data = (qaddr_t)0;
581 	}
582 	return (error);
583 }
584 
585 /*
586  * unmount system call
587  */
588 int
589 ext2fs_unmount(mp, mntflags, p)
590 	struct mount *mp;
591 	int mntflags;
592 	struct proc *p;
593 {
594 	register struct ufsmount *ump;
595 	register struct m_ext2fs *fs;
596 	int error, flags;
597 
598 	flags = 0;
599 	if (mntflags & MNT_FORCE)
600 		flags |= FORCECLOSE;
601 	if ((error = ext2fs_flushfiles(mp, flags, p)) != 0)
602 		return (error);
603 	ump = VFSTOUFS(mp);
604 	fs = ump->um_e2fs;
605 	if (fs->e2fs_ronly == 0 &&
606 		ext2fs_cgupdate(ump, MNT_WAIT) == 0 &&
607 		(fs->e2fs.e2fs_state & E2FS_ERRORS) == 0) {
608 		fs->e2fs.e2fs_state = E2FS_ISCLEAN;
609 		(void) ext2fs_sbupdate(ump, MNT_WAIT);
610 	}
611 
612 	if (ump->um_devvp->v_type != VBAD)
613 		ump->um_devvp->v_specmountpoint = NULL;
614 	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p);
615 	error = VOP_CLOSE(ump->um_devvp, fs->e2fs_ronly ? FREAD : FREAD|FWRITE,
616 		NOCRED, p);
617 	vput(ump->um_devvp);
618 	free(fs->e2fs_gd, M_UFSMNT);
619 	free(fs, M_UFSMNT);
620 	free(ump, M_UFSMNT);
621 	mp->mnt_data = (qaddr_t)0;
622 	mp->mnt_flag &= ~MNT_LOCAL;
623 	return (error);
624 }
625 
626 /*
627  * Flush out all the files in a filesystem.
628  */
629 int
630 ext2fs_flushfiles(mp, flags, p)
631 	register struct mount *mp;
632 	int flags;
633 	struct proc *p;
634 {
635 	register struct ufsmount *ump;
636 	int error;
637 
638 	ump = VFSTOUFS(mp);
639 	/*
640 	 * Flush all the files.
641 	 */
642 	if ((error = vflush(mp, NULL, flags)) != 0)
643 		return (error);
644 	/*
645 	 * Flush filesystem metadata.
646 	 */
647 	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p);
648 	error = VOP_FSYNC(ump->um_devvp, p->p_ucred, MNT_WAIT, p);
649 	VOP_UNLOCK(ump->um_devvp, 0, p);
650 	return (error);
651 }
652 
653 /*
654  * Get file system statistics.
655  */
656 int
657 ext2fs_statfs(mp, sbp, p)
658 	struct mount *mp;
659 	register struct statfs *sbp;
660 	struct proc *p;
661 {
662 	register struct ufsmount *ump;
663 	register struct m_ext2fs *fs;
664 	u_int32_t overhead, overhead_per_group;
665 	int i, ngroups;
666 
667 	ump = VFSTOUFS(mp);
668 	fs = ump->um_e2fs;
669 	if (fs->e2fs.e2fs_magic != E2FS_MAGIC)
670 		panic("ext2fs_statfs");
671 
672 	/*
673 	 * Compute the overhead (FS structures)
674 	 */
675 	overhead_per_group = 1 /* block bitmap */ +
676 				 1 /* inode bitmap */ +
677 				 fs->e2fs_itpg;
678 	overhead = fs->e2fs.e2fs_first_dblock +
679 		   fs->e2fs_ncg * overhead_per_group;
680 	if (fs->e2fs.e2fs_rev > E2FS_REV0 &&
681 	    fs->e2fs.e2fs_features_rocompat & EXT2F_ROCOMPAT_SPARSESUPER) {
682 		for (i = 0, ngroups = 0; i < fs->e2fs_ncg; i++) {
683 			if (cg_has_sb(i))
684 				ngroups++;
685 		}
686 	} else {
687 		ngroups = fs->e2fs_ncg;
688 	}
689 	overhead += ngroups * (1 + fs->e2fs_ngdb);
690 
691 	sbp->f_bsize = fs->e2fs_bsize;
692 	sbp->f_iosize = fs->e2fs_bsize;
693 	sbp->f_blocks = fs->e2fs.e2fs_bcount - overhead;
694 	sbp->f_bfree = fs->e2fs.e2fs_fbcount;
695 	sbp->f_bavail = sbp->f_bfree - fs->e2fs.e2fs_rbcount;
696 	sbp->f_files =  fs->e2fs.e2fs_icount;
697 	sbp->f_ffree = fs->e2fs.e2fs_ficount;
698 	if (sbp != &mp->mnt_stat) {
699 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
700 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
701 	}
702 	strncpy(sbp->f_fstypename, mp->mnt_vfc->vfc_name, MFSNAMELEN);
703 	return (0);
704 }
705 
706 /*
707  * Go through the disk queues to initiate sandbagged IO;
708  * go through the inodes to write those that have been modified;
709  * initiate the writing of the super block if it has been modified.
710  *
711  * Note: we are always called with the filesystem marked `MPBUSY'.
712  */
713 int
714 ext2fs_sync(mp, waitfor, cred, p)
715 	struct mount *mp;
716 	int waitfor;
717 	struct ucred *cred;
718 	struct proc *p;
719 {
720 	register struct vnode *vp, *nvp;
721 	register struct inode *ip;
722 	register struct ufsmount *ump = VFSTOUFS(mp);
723 	register struct m_ext2fs *fs;
724 	int error, allerror = 0;
725 
726 	fs = ump->um_e2fs;
727 	if (fs->e2fs_ronly != 0) {		/* XXX */
728 		printf("fs = %s\n", fs->e2fs_fsmnt);
729 		panic("update: rofs mod");
730 	}
731 
732 	/*
733 	 * Write back each (modified) inode.
734 	 */
735 	simple_lock(&mntvnode_slock);
736 loop:
737 	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
738 		/*
739 		 * If the vnode that we are about to sync is no longer
740 		 * associated with this mount point, start over.
741 		 */
742 		if (vp->v_mount != mp)
743 			goto loop;
744 		simple_lock(&vp->v_interlock);
745 		nvp = vp->v_mntvnodes.le_next;
746 		ip = VTOI(vp);
747 		if (vp->v_type == VNON || ((ip->i_flag &
748 		     (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
749 		     vp->v_dirtyblkhd.lh_first == NULL) ||
750 		     waitfor == MNT_LAZY) {
751 			simple_unlock(&vp->v_interlock);
752 			continue;
753 		}
754 		simple_unlock(&mntvnode_slock);
755 		error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p);
756 		if (error) {
757 			simple_lock(&mntvnode_slock);
758 			if (error == ENOENT)
759 				goto loop;
760 			continue;
761 		}
762 		if ((error = VOP_FSYNC(vp, cred, waitfor, p)) != 0)
763 			allerror = error;
764 		vput(vp);
765 		simple_lock(&mntvnode_slock);
766 	}
767 	simple_unlock(&mntvnode_slock);
768 	/*
769 	 * Force stale file system control information to be flushed.
770 	 */
771 	if (waitfor != MNT_LAZY) {
772 		vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p);
773 		if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p)) != 0)
774 			allerror = error;
775 		VOP_UNLOCK(ump->um_devvp, 0, p);
776 	}
777 	/*
778 	 * Write back modified superblock.
779 	 */
780 	if (fs->e2fs_fmod != 0) {
781 		fs->e2fs_fmod = 0;
782 		fs->e2fs.e2fs_wtime = time.tv_sec;
783 		if ((error = ext2fs_cgupdate(ump, waitfor)))
784 			allerror = error;
785 	}
786 	return (allerror);
787 }
788 
789 /*
790  * Look up a EXT2FS dinode number to find its incore vnode, otherwise read it
791  * in from disk.  If it is in core, wait for the lock bit to clear, then
792  * return the inode locked.  Detection and handling of mount points must be
793  * done by the calling routine.
794  */
795 int
796 ext2fs_vget(mp, ino, vpp)
797 	struct mount *mp;
798 	ino_t ino;
799 	struct vnode **vpp;
800 {
801 	register struct m_ext2fs *fs;
802 	register struct inode *ip;
803 	struct ufsmount *ump;
804 	struct buf *bp;
805 	struct vnode *vp;
806 	dev_t dev;
807 	int error;
808 
809 	ump = VFSTOUFS(mp);
810 	dev = ump->um_dev;
811 
812  retry:
813 	if ((*vpp = ufs_ihashget(dev, ino)) != NULL)
814 		return (0);
815 
816 	/* Allocate a new vnode/inode. */
817 	if ((error = getnewvnode(VT_EXT2FS, mp, ext2fs_vnodeop_p, &vp)) != 0) {
818 		*vpp = NULL;
819 		return (error);
820 	}
821 	MALLOC(ip, struct inode *, sizeof(struct inode), M_EXT2FSNODE, M_WAITOK);
822 	bzero((caddr_t)ip, sizeof(struct inode));
823 	lockinit(&ip->i_lock, PINOD, "inode", 0, 0);
824 	vp->v_data = ip;
825 	ip->i_vnode = vp;
826 	ip->i_e2fs = fs = ump->um_e2fs;
827 	ip->i_dev = dev;
828 	ip->i_number = ino;
829 	ip->i_e2fs_last_lblk = 0;
830 	ip->i_e2fs_last_blk = 0;
831 
832 	/*
833 	 * Put it onto its hash chain and lock it so that other requests for
834 	 * this inode will block if they arrive while we are sleeping waiting
835 	 * for old data structures to be purged or for the contents of the
836 	 * disk portion of this inode to be read.
837 	 */
838 	error = ufs_ihashins(ip);
839 
840 	if (error) {
841 		vrele(vp);
842 
843 		if (error == EEXIST)
844 			goto retry;
845 
846 		return (error);
847 	}
848 
849 	/* Read in the disk contents for the inode, copy into the inode. */
850 	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
851 			  (int)fs->e2fs_bsize, NOCRED, &bp);
852 	if (error) {
853 		/*
854 		 * The inode does not contain anything useful, so it would
855 	 	 * be misleading to leave it on its hash chain. With mode
856 		 * still zero, it will be unlinked and returned to the free
857 		 * list by vput().
858 		 */
859 		vput(vp);
860 		brelse(bp);
861 		*vpp = NULL;
862 		return (error);
863 	}
864 	bcopy(((struct ext2fs_dinode*)bp->b_data + ino_to_fsbo(fs, ino)),
865 				&ip->i_din, sizeof(struct ext2fs_dinode));
866 	ip->i_effnlink = ip->i_e2fs_nlink;
867 	brelse(bp);
868 
869 	/*
870 	 * Initialize the vnode from the inode, check for aliases.
871 	 * Note that the underlying vnode may have changed.
872 	 */
873 	error = ufs_vinit(mp, ext2fs_specop_p, EXT2FS_FIFOOPS, &vp);
874 	if (error) {
875 		vput(vp);
876 		*vpp = NULL;
877 		return (error);
878 	}
879 	/*
880 	 * Finish inode initialization now that aliasing has been resolved.
881 	 */
882 	ip->i_devvp = ump->um_devvp;
883 	VREF(ip->i_devvp);
884 	/*
885 	 * Set up a generation number for this inode if it does not
886 	 * already have one. This should only happen on old filesystems.
887 	 */
888 	if (ip->i_e2fs_gen == 0) {
889 		if (++ext2gennumber < (u_long)time.tv_sec)
890 			ext2gennumber = time.tv_sec;
891 		ip->i_e2fs_gen = ext2gennumber;
892 		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
893 			ip->i_flag |= IN_MODIFIED;
894 	}
895 
896 	*vpp = vp;
897 	return (0);
898 }
899 
900 /*
901  * File handle to vnode
902  *
903  * Have to be really careful about stale file handles:
904  * - check that the inode number is valid
905  * - call ext2fs_vget() to get the locked inode
906  * - check for an unallocated inode (i_mode == 0)
907  * - check that the given client host has export rights and return
908  *   those rights via. exflagsp and credanonp
909  */
910 int
911 ext2fs_fhtovp(mp, fhp, vpp)
912 	register struct mount *mp;
913 	struct fid *fhp;
914 	struct vnode **vpp;
915 {
916 	register struct inode *ip;
917 	struct vnode *nvp;
918 	int error;
919 	register struct ufid *ufhp;
920 	struct m_ext2fs *fs;
921 
922 	ufhp = (struct ufid *)fhp;
923 	fs = VFSTOUFS(mp)->um_e2fs;
924 	if ((ufhp->ufid_ino < EXT2_FIRSTINO && ufhp->ufid_ino != EXT2_ROOTINO) ||
925 		ufhp->ufid_ino >= fs->e2fs_ncg * fs->e2fs.e2fs_ipg)
926 		return (ESTALE);
927 
928 	if ((error = VFS_VGET(mp, ufhp->ufid_ino, &nvp)) != 0) {
929 		*vpp = NULLVP;
930 		return (error);
931 	}
932 	ip = VTOI(nvp);
933 	if (ip->i_e2fs_mode == 0 || ip->i_e2fs_dtime != 0 ||
934 		ip->i_e2fs_gen != ufhp->ufid_gen) {
935 		vput(nvp);
936 		*vpp = NULLVP;
937 		return (ESTALE);
938 	}
939 	*vpp = nvp;
940 	return (0);
941 }
942 
943 /*
944  * Vnode pointer to File handle
945  */
946 /* ARGSUSED */
947 int
948 ext2fs_vptofh(vp, fhp)
949 	struct vnode *vp;
950 	struct fid *fhp;
951 {
952 	register struct inode *ip;
953 	register struct ufid *ufhp;
954 
955 	ip = VTOI(vp);
956 	ufhp = (struct ufid *)fhp;
957 	ufhp->ufid_len = sizeof(struct ufid);
958 	ufhp->ufid_ino = ip->i_number;
959 	ufhp->ufid_gen = ip->i_e2fs_gen;
960 	return (0);
961 }
962 
963 /*
964  * no sysctl for ext2fs
965  */
966 
967 int
968 ext2fs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
969 	int *name;
970 	u_int namelen;
971 	void *oldp;
972 	size_t *oldlenp;
973 	void *newp;
974 	size_t newlen;
975 	struct proc *p;
976 {
977 	return (EOPNOTSUPP);
978 }
979 
980 /*
981  * Write a superblock and associated information back to disk.
982  */
983 int
984 ext2fs_sbupdate(mp, waitfor)
985 	struct ufsmount *mp;
986 	int waitfor;
987 {
988 	register struct m_ext2fs *fs = mp->um_e2fs;
989 	register struct buf *bp;
990 	int error = 0;
991 
992 	bp = getblk(mp->um_devvp, SBLOCK, SBSIZE, 0, 0);
993 	bcopy((caddr_t)(&fs->e2fs), bp->b_data, SBSIZE);
994 	if (waitfor == MNT_WAIT)
995 		error = bwrite(bp);
996 	else
997 		bawrite(bp);
998 	return (error);
999 }
1000 
1001 int
1002 ext2fs_cgupdate(mp, waitfor)
1003 	struct ufsmount *mp;
1004 	int waitfor;
1005 {
1006 	register struct m_ext2fs *fs = mp->um_e2fs;
1007 	register struct buf *bp;
1008 	int i, error = 0, allerror = 0;
1009 
1010 	allerror = ext2fs_sbupdate(mp, waitfor);
1011 	for (i = 0; i < fs->e2fs_ngdb; i++) {
1012 		bp = getblk(mp->um_devvp, fsbtodb(fs, ((fs->e2fs_bsize>1024)?0:1)+i+1),
1013 		    fs->e2fs_bsize, 0, 0);
1014 		e2fs_cgsave(&fs->e2fs_gd[i* fs->e2fs_bsize / sizeof(struct ext2_gd)], (struct ext2_gd*)bp->b_data, fs->e2fs_bsize);
1015 		if (waitfor == MNT_WAIT)
1016 			error = bwrite(bp);
1017 		else
1018 			bawrite(bp);
1019 	}
1020 
1021 	if (!allerror && error)
1022 		allerror = error;
1023 	return (allerror);
1024 }
1025 
1026 static int
1027 ext2fs_checksb(fs, ronly)
1028 	struct ext2fs *fs;
1029 	int ronly;
1030 {
1031 	if (fs2h16(fs->e2fs_magic) != E2FS_MAGIC) {
1032 		return (EIO);		/* XXX needs translation */
1033 	}
1034 	if (fs2h32(fs->e2fs_rev) > E2FS_REV1) {
1035 #ifdef DIAGNOSTIC
1036 		printf("Ext2 fs: unsupported revision number: %x\n",
1037 		    fs2h32(fs->e2fs_rev));
1038 #endif
1039 		return (EIO);		/* XXX needs translation */
1040 	}
1041 	if (fs2h32(fs->e2fs_log_bsize) > 2) { /* block size = 1024|2048|4096 */
1042 #ifdef DIAGNOSTIC
1043 		printf("Ext2 fs: bad block size: %d (expected <=2 for ext2 fs)\n",
1044 		    fs2h32(fs->e2fs_log_bsize));
1045 #endif
1046 		return (EIO);	   /* XXX needs translation */
1047 	}
1048 	if (fs2h32(fs->e2fs_rev) > E2FS_REV0) {
1049 		if (fs2h32(fs->e2fs_first_ino) != EXT2_FIRSTINO ||
1050 		    fs2h16(fs->e2fs_inode_size) != EXT2_DINODE_SIZE) {
1051 			printf("Ext2 fs: unsupported inode size\n");
1052 			return (EINVAL);      /* XXX needs translation */
1053 		}
1054 		if (fs2h32(fs->e2fs_features_incompat) &
1055 		    ~EXT2F_INCOMPAT_SUPP) {
1056 			printf("Ext2 fs: unsupported optionnal feature\n");
1057 			return (EINVAL);      /* XXX needs translation */
1058 		}
1059 		if (!ronly && fs2h32(fs->e2fs_features_rocompat) &
1060 		    ~EXT2F_ROCOMPAT_SUPP) {
1061 			return (EROFS);      /* XXX needs translation */
1062 		}
1063 	}
1064 	return (0);
1065 }
1066