xref: /netbsd-src/sys/ufs/ffs/ffs_vfsops.c (revision cac8e449158efc7261bebc8657cbb0125a2cfdde)
1 /*	$NetBSD: ffs_vfsops.c,v 1.232 2008/07/31 15:37:56 hannken Exp $	*/
2 
3 /*-
4  * Copyright (c) 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Wasabi Systems, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * Copyright (c) 1989, 1991, 1993, 1994
34  *	The Regents of the University of California.  All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. Neither the name of the University nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  *
60  *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
61  */
62 
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.232 2008/07/31 15:37:56 hannken Exp $");
65 
66 #if defined(_KERNEL_OPT)
67 #include "opt_ffs.h"
68 #include "opt_quota.h"
69 #include "opt_softdep.h"
70 #include "opt_wapbl.h"
71 #endif
72 
73 #include <sys/param.h>
74 #include <sys/systm.h>
75 #include <sys/namei.h>
76 #include <sys/proc.h>
77 #include <sys/kernel.h>
78 #include <sys/vnode.h>
79 #include <sys/socket.h>
80 #include <sys/mount.h>
81 #include <sys/buf.h>
82 #include <sys/device.h>
83 #include <sys/mbuf.h>
84 #include <sys/file.h>
85 #include <sys/disklabel.h>
86 #include <sys/ioctl.h>
87 #include <sys/errno.h>
88 #include <sys/malloc.h>
89 #include <sys/pool.h>
90 #include <sys/lock.h>
91 #include <sys/sysctl.h>
92 #include <sys/conf.h>
93 #include <sys/kauth.h>
94 #include <sys/wapbl.h>
95 #include <sys/fstrans.h>
96 #include <sys/module.h>
97 
98 #include <miscfs/genfs/genfs.h>
99 #include <miscfs/specfs/specdev.h>
100 
101 #include <ufs/ufs/quota.h>
102 #include <ufs/ufs/ufsmount.h>
103 #include <ufs/ufs/inode.h>
104 #include <ufs/ufs/dir.h>
105 #include <ufs/ufs/ufs_extern.h>
106 #include <ufs/ufs/ufs_bswap.h>
107 #include <ufs/ufs/ufs_wapbl.h>
108 
109 #include <ufs/ffs/fs.h>
110 #include <ufs/ffs/ffs_extern.h>
111 
112 MODULE(MODULE_CLASS_VFS, ffs, NULL);
113 
114 static struct sysctllog *ffs_sysctl_log;
115 
116 /* how many times ffs_init() was called */
117 int ffs_initcount = 0;
118 
119 extern kmutex_t ufs_hashlock;
120 
121 extern const struct vnodeopv_desc ffs_vnodeop_opv_desc;
122 extern const struct vnodeopv_desc ffs_specop_opv_desc;
123 extern const struct vnodeopv_desc ffs_fifoop_opv_desc;
124 
125 const struct vnodeopv_desc * const ffs_vnodeopv_descs[] = {
126 	&ffs_vnodeop_opv_desc,
127 	&ffs_specop_opv_desc,
128 	&ffs_fifoop_opv_desc,
129 	NULL,
130 };
131 
132 struct vfsops ffs_vfsops = {
133 	MOUNT_FFS,
134 	sizeof (struct ufs_args),
135 	ffs_mount,
136 	ufs_start,
137 	ffs_unmount,
138 	ufs_root,
139 	ufs_quotactl,
140 	ffs_statvfs,
141 	ffs_sync,
142 	ffs_vget,
143 	ffs_fhtovp,
144 	ffs_vptofh,
145 	ffs_init,
146 	ffs_reinit,
147 	ffs_done,
148 	ffs_mountroot,
149 	ffs_snapshot,
150 	ffs_extattrctl,
151 	ffs_suspendctl,
152 	genfs_renamelock_enter,
153 	genfs_renamelock_exit,
154 	ffs_full_fsync,
155 	ffs_vnodeopv_descs,
156 	0,
157 	{ NULL, NULL },
158 };
159 
160 static const struct genfs_ops ffs_genfsops = {
161 	.gop_size = ffs_gop_size,
162 	.gop_alloc = ufs_gop_alloc,
163 	.gop_write = genfs_gop_write,
164 	.gop_markupdate = ufs_gop_markupdate,
165 };
166 
167 static const struct ufs_ops ffs_ufsops = {
168 	.uo_itimes = ffs_itimes,
169 	.uo_update = ffs_update,
170 	.uo_truncate = ffs_truncate,
171 	.uo_valloc = ffs_valloc,
172 	.uo_vfree = ffs_vfree,
173 	.uo_balloc = ffs_balloc,
174 };
175 
176 static int
177 ffs_modcmd(modcmd_t cmd, void *arg)
178 {
179 	int error;
180 
181 #if 0
182 	extern int doasyncfree;
183 #endif
184 	extern int ffs_log_changeopt;
185 
186 	switch (cmd) {
187 	case MODULE_CMD_INIT:
188 		error = vfs_attach(&ffs_vfsops);
189 		if (error != 0)
190 			break;
191 
192 		sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
193 			       CTLFLAG_PERMANENT,
194 			       CTLTYPE_NODE, "vfs", NULL,
195 			       NULL, 0, NULL, 0,
196 			       CTL_VFS, CTL_EOL);
197 		sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
198 			       CTLFLAG_PERMANENT,
199 			       CTLTYPE_NODE, "ffs",
200 			       SYSCTL_DESCR("Berkeley Fast File System"),
201 			       NULL, 0, NULL, 0,
202 			       CTL_VFS, 1, CTL_EOL);
203 
204 		/*
205 		 * @@@ should we even bother with these first three?
206 		 */
207 		sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
208 			       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
209 			       CTLTYPE_INT, "doclusterread", NULL,
210 			       sysctl_notavail, 0, NULL, 0,
211 			       CTL_VFS, 1, FFS_CLUSTERREAD, CTL_EOL);
212 		sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
213 			       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
214 			       CTLTYPE_INT, "doclusterwrite", NULL,
215 			       sysctl_notavail, 0, NULL, 0,
216 			       CTL_VFS, 1, FFS_CLUSTERWRITE, CTL_EOL);
217 		sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
218 			       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
219 			       CTLTYPE_INT, "doreallocblks", NULL,
220 			       sysctl_notavail, 0, NULL, 0,
221 			       CTL_VFS, 1, FFS_REALLOCBLKS, CTL_EOL);
222 #if 0
223 		sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
224 			       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
225 			       CTLTYPE_INT, "doasyncfree",
226 			       SYSCTL_DESCR("Release dirty blocks asynchronously"),
227 			       NULL, 0, &doasyncfree, 0,
228 			       CTL_VFS, 1, FFS_ASYNCFREE, CTL_EOL);
229 #endif
230 		sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
231 			       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
232 			       CTLTYPE_INT, "log_changeopt",
233 			       SYSCTL_DESCR("Log changes in optimization strategy"),
234 			       NULL, 0, &ffs_log_changeopt, 0,
235 			       CTL_VFS, 1, FFS_LOG_CHANGEOPT, CTL_EOL);
236 		break;
237 	case MODULE_CMD_FINI:
238 		error = vfs_detach(&ffs_vfsops);
239 		if (error != 0)
240 			break;
241 		sysctl_teardown(&ffs_sysctl_log);
242 		break;
243 	default:
244 		error = ENOTTY;
245 		break;
246 	}
247 
248 	return (error);
249 }
250 
251 pool_cache_t ffs_inode_cache;
252 pool_cache_t ffs_dinode1_cache;
253 pool_cache_t ffs_dinode2_cache;
254 
255 static void ffs_oldfscompat_read(struct fs *, struct ufsmount *, daddr_t);
256 static void ffs_oldfscompat_write(struct fs *, struct ufsmount *);
257 
258 /*
259  * Called by main() when ffs is going to be mounted as root.
260  */
261 
262 int
263 ffs_mountroot(void)
264 {
265 	struct fs *fs;
266 	struct mount *mp;
267 	struct lwp *l = curlwp;			/* XXX */
268 	struct ufsmount *ump;
269 	int error;
270 
271 	if (device_class(root_device) != DV_DISK)
272 		return (ENODEV);
273 
274 	if ((error = vfs_rootmountalloc(MOUNT_FFS, "root_device", &mp))) {
275 		vrele(rootvp);
276 		return (error);
277 	}
278 
279 	/*
280 	 * We always need to be able to mount the root file system.
281 	 */
282 	mp->mnt_flag |= MNT_FORCE;
283 	if ((error = ffs_mountfs(rootvp, mp, l)) != 0) {
284 		vfs_unbusy(mp, false, NULL);
285 		vfs_destroy(mp);
286 		return (error);
287 	}
288 	mp->mnt_flag &= ~MNT_FORCE;
289 	mutex_enter(&mountlist_lock);
290 	CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
291 	mutex_exit(&mountlist_lock);
292 	ump = VFSTOUFS(mp);
293 	fs = ump->um_fs;
294 	memset(fs->fs_fsmnt, 0, sizeof(fs->fs_fsmnt));
295 	(void)copystr(mp->mnt_stat.f_mntonname, fs->fs_fsmnt, MNAMELEN - 1, 0);
296 	(void)ffs_statvfs(mp, &mp->mnt_stat);
297 	vfs_unbusy(mp, false, NULL);
298 	setrootfstime((time_t)fs->fs_time);
299 	return (0);
300 }
301 
302 static int dolog;
303 
304 /*
305  * VFS Operations.
306  *
307  * mount system call
308  */
309 int
310 ffs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
311 {
312 	struct lwp *l = curlwp;
313 	struct nameidata nd;
314 	struct vnode *vp, *devvp = NULL;
315 	struct ufs_args *args = data;
316 	struct ufsmount *ump = NULL;
317 	struct fs *fs;
318 	int error = 0, flags, update;
319 	mode_t accessmode;
320 
321 	if (dolog)
322 		mp->mnt_flag |= MNT_LOG;
323 
324 	if (*data_len < sizeof *args)
325 		return EINVAL;
326 
327 	if (mp->mnt_flag & MNT_GETARGS) {
328 		ump = VFSTOUFS(mp);
329 		if (ump == NULL)
330 			return EIO;
331 		args->fspec = NULL;
332 		*data_len = sizeof *args;
333 		return 0;
334 	}
335 
336 #if !defined(SOFTDEP)
337 	mp->mnt_flag &= ~MNT_SOFTDEP;
338 #endif
339 
340 	update = mp->mnt_flag & MNT_UPDATE;
341 
342 	/* Check arguments */
343 	if (args->fspec != NULL) {
344 		/*
345 		 * Look up the name and verify that it's sane.
346 		 */
347 		NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, args->fspec);
348 		if ((error = namei(&nd)) != 0)
349 			return (error);
350 		devvp = nd.ni_vp;
351 
352 		if (!update) {
353 			/*
354 			 * Be sure this is a valid block device
355 			 */
356 			if (devvp->v_type != VBLK)
357 				error = ENOTBLK;
358 			else if (bdevsw_lookup(devvp->v_rdev) == NULL)
359 				error = ENXIO;
360 		} else {
361 			/*
362 			 * Be sure we're still naming the same device
363 			 * used for our initial mount
364 			 */
365 			ump = VFSTOUFS(mp);
366 			if (devvp != ump->um_devvp) {
367 				if (devvp->v_rdev != ump->um_devvp->v_rdev)
368 					error = EINVAL;
369 				else {
370 					vrele(devvp);
371 					devvp = ump->um_devvp;
372 					vref(devvp);
373 				}
374 			}
375 		}
376 	} else {
377 		if (!update) {
378 			/* New mounts must have a filename for the device */
379 			return (EINVAL);
380 		} else {
381 			/* Use the extant mount */
382 			ump = VFSTOUFS(mp);
383 			devvp = ump->um_devvp;
384 			vref(devvp);
385 		}
386 	}
387 
388 	/*
389 	 * Mark the device and any existing vnodes as involved in
390 	 * softdep processing.
391 	 */
392 	if ((mp->mnt_flag & MNT_SOFTDEP) != 0) {
393 		devvp->v_uflag |= VU_SOFTDEP;
394 		mutex_enter(&mntvnode_lock);
395 		TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
396 			if (vp->v_mount != mp || vismarker(vp))
397 				continue;
398 			vp->v_uflag |= VU_SOFTDEP;
399 		}
400 		mutex_exit(&mntvnode_lock);
401 	}
402 
403 	/*
404 	 * If mount by non-root, then verify that user has necessary
405 	 * permissions on the device.
406 	 */
407 	if (error == 0 && kauth_authorize_generic(l->l_cred,
408 	    KAUTH_GENERIC_ISSUSER, NULL) != 0) {
409 		accessmode = VREAD;
410 		if (update ?
411 		    (mp->mnt_iflag & IMNT_WANTRDWR) != 0 :
412 		    (mp->mnt_flag & MNT_RDONLY) == 0)
413 			accessmode |= VWRITE;
414 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
415 		error = VOP_ACCESS(devvp, accessmode, l->l_cred);
416 		VOP_UNLOCK(devvp, 0);
417 	}
418 
419 	if (error) {
420 		vrele(devvp);
421 		return (error);
422 	}
423 
424 #ifdef WAPBL
425 	/*
426 	 * WAPBL can only be enabled on a r/w mount
427 	 * that does not use softdep.
428 	 */
429 	if ((mp->mnt_flag & MNT_RDONLY) && !(mp->mnt_iflag & IMNT_WANTRDWR)) {
430 		mp->mnt_flag &= ~MNT_LOG;
431 	}
432 	if ((mp->mnt_flag & (MNT_SOFTDEP | MNT_LOG)) ==
433 			(MNT_SOFTDEP | MNT_LOG)) {
434 		printf("%s fs is journalled, ignoring soft update mode\n",
435 			VFSTOUFS(mp)->um_fs->fs_fsmnt);
436 		mp->mnt_flag &= ~MNT_SOFTDEP;
437 	}
438 #else /* !WAPBL */
439 	mp->mnt_flag &= ~MNT_LOG;
440 #endif /* !WAPBL */
441 
442 	if (!update) {
443 		int xflags;
444 
445 		if (mp->mnt_flag & MNT_RDONLY)
446 			xflags = FREAD;
447 		else
448 			xflags = FREAD | FWRITE;
449 		error = VOP_OPEN(devvp, xflags, FSCRED);
450 		if (error)
451 			goto fail;
452 		error = ffs_mountfs(devvp, mp, l);
453 		if (error) {
454 			vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
455 			(void)VOP_CLOSE(devvp, xflags, NOCRED);
456 			VOP_UNLOCK(devvp, 0);
457 			goto fail;
458 		}
459 
460 		ump = VFSTOUFS(mp);
461 		fs = ump->um_fs;
462 		if ((mp->mnt_flag & (MNT_SOFTDEP | MNT_ASYNC)) ==
463 		    (MNT_SOFTDEP | MNT_ASYNC)) {
464 			printf("%s fs uses soft updates, "
465 			    "ignoring async mode\n",
466 			    fs->fs_fsmnt);
467 			mp->mnt_flag &= ~MNT_ASYNC;
468 		}
469 	} else {
470 		/*
471 		 * Update the mount.
472 		 */
473 
474 		/*
475 		 * The initial mount got a reference on this
476 		 * device, so drop the one obtained via
477 		 * namei(), above.
478 		 */
479 		vrele(devvp);
480 
481 		ump = VFSTOUFS(mp);
482 		fs = ump->um_fs;
483 		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
484 			/*
485 			 * Changing from r/w to r/o
486 			 */
487 			flags = WRITECLOSE;
488 			if (mp->mnt_flag & MNT_FORCE)
489 				flags |= FORCECLOSE;
490 			if (mp->mnt_flag & MNT_SOFTDEP)
491 				error = softdep_flushfiles(mp, flags, l);
492 			else
493 				error = ffs_flushfiles(mp, flags, l);
494 			if (fs->fs_pendingblocks != 0 ||
495 			    fs->fs_pendinginodes != 0) {
496 				printf("%s: update error: blocks %" PRId64
497 				       " files %d\n",
498 				    fs->fs_fsmnt, fs->fs_pendingblocks,
499 				    fs->fs_pendinginodes);
500 				fs->fs_pendingblocks = 0;
501 				fs->fs_pendinginodes = 0;
502 			}
503 			if (error == 0)
504 				error = UFS_WAPBL_BEGIN(mp);
505 			if (error == 0 &&
506 			    ffs_cgupdate(ump, MNT_WAIT) == 0 &&
507 			    fs->fs_clean & FS_WASCLEAN) {
508 				if (mp->mnt_flag & MNT_SOFTDEP)
509 					fs->fs_flags &= ~FS_DOSOFTDEP;
510 				fs->fs_clean = FS_ISCLEAN;
511 				(void) ffs_sbupdate(ump, MNT_WAIT);
512 			}
513 			if (error == 0)
514 				UFS_WAPBL_END(mp);
515 			if (error)
516 				return (error);
517 		}
518 
519 #ifdef WAPBL
520 		if ((mp->mnt_flag & MNT_LOG) == 0) {
521 			error = ffs_wapbl_stop(mp, mp->mnt_flag & MNT_FORCE);
522 			if (error)
523 				return error;
524 		}
525 #endif /* WAPBL */
526 
527 		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
528 			/*
529 			 * Finish change from r/w to r/o
530 			 */
531 			fs->fs_ronly = 1;
532 			fs->fs_fmod = 0;
533 		}
534 
535 		/*
536 		 * Flush soft dependencies if disabling it via an update
537 		 * mount. This may leave some items to be processed,
538 		 * so don't do this yet XXX.
539 		 */
540 		if ((fs->fs_flags & FS_DOSOFTDEP) &&
541 		    !(mp->mnt_flag & MNT_SOFTDEP) && fs->fs_ronly == 0) {
542 #ifdef notyet
543 			flags = WRITECLOSE;
544 			if (mp->mnt_flag & MNT_FORCE)
545 				flags |= FORCECLOSE;
546 			error = softdep_flushfiles(mp, flags, l);
547 			if (error == 0 && ffs_cgupdate(ump, MNT_WAIT) == 0)
548 				fs->fs_flags &= ~FS_DOSOFTDEP;
549 				(void) ffs_sbupdate(ump, MNT_WAIT);
550 #elif defined(SOFTDEP)
551 			mp->mnt_flag |= MNT_SOFTDEP;
552 #endif
553 		}
554 
555 		/*
556 		 * When upgrading to a softdep mount, we must first flush
557 		 * all vnodes. (not done yet -- see above)
558 		 */
559 		if (!(fs->fs_flags & FS_DOSOFTDEP) &&
560 		    (mp->mnt_flag & MNT_SOFTDEP) && fs->fs_ronly == 0) {
561 #ifdef notyet
562 			flags = WRITECLOSE;
563 			if (mp->mnt_flag & MNT_FORCE)
564 				flags |= FORCECLOSE;
565 			error = ffs_flushfiles(mp, flags, l);
566 #else
567 			mp->mnt_flag &= ~MNT_SOFTDEP;
568 #endif
569 		}
570 
571 		if (mp->mnt_flag & MNT_RELOAD) {
572 			error = ffs_reload(mp, l->l_cred, l);
573 			if (error)
574 				return (error);
575 		}
576 
577 		if (fs->fs_ronly && (mp->mnt_iflag & IMNT_WANTRDWR)) {
578 			/*
579 			 * Changing from read-only to read/write
580 			 */
581 			fs->fs_ronly = 0;
582 			fs->fs_clean <<= 1;
583 			fs->fs_fmod = 1;
584 			if ((fs->fs_flags & FS_DOSOFTDEP)) {
585 				error = softdep_mount(devvp, mp, fs,
586 				    l->l_cred);
587 				if (error)
588 					return (error);
589 			}
590 #ifdef WAPBL
591 			if (fs->fs_flags & FS_DOWAPBL) {
592 				printf("%s: replaying log to disk\n",
593 				    fs->fs_fsmnt);
594 				KDASSERT(mp->mnt_wapbl_replay);
595 				error = wapbl_replay_write(mp->mnt_wapbl_replay,
596 							   devvp);
597 				if (error) {
598 					return error;
599 				}
600 				wapbl_replay_stop(mp->mnt_wapbl_replay);
601 				fs->fs_clean = FS_WASCLEAN;
602 			}
603 #endif /* WAPBL */
604 			if (fs->fs_snapinum[0] != 0)
605 				ffs_snapshot_mount(mp);
606 		}
607 
608 #ifdef WAPBL
609 		error = ffs_wapbl_start(mp);
610 		if (error)
611 			return error;
612 #endif /* WAPBL */
613 
614 		if (args->fspec == NULL)
615 			return EINVAL;
616 		if ((mp->mnt_flag & (MNT_SOFTDEP | MNT_ASYNC)) ==
617 		    (MNT_SOFTDEP | MNT_ASYNC)) {
618 			printf("%s fs uses soft updates, ignoring async mode\n",
619 			    fs->fs_fsmnt);
620 			mp->mnt_flag &= ~MNT_ASYNC;
621 		}
622 	}
623 
624 	error = set_statvfs_info(path, UIO_USERSPACE, args->fspec,
625 	    UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l);
626 	if (error == 0)
627 		(void)strncpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname,
628 		    sizeof(fs->fs_fsmnt));
629 	if (mp->mnt_flag & MNT_SOFTDEP)
630 		fs->fs_flags |= FS_DOSOFTDEP;
631 	else
632 		fs->fs_flags &= ~FS_DOSOFTDEP;
633 	if (fs->fs_fmod != 0) {	/* XXX */
634 		int err;
635 
636 		fs->fs_fmod = 0;
637 		if (fs->fs_clean & FS_WASCLEAN)
638 			fs->fs_time = time_second;
639 		else {
640 			printf("%s: file system not clean (fs_clean=%#x); "
641 			    "please fsck(8)\n", mp->mnt_stat.f_mntfromname,
642 			    fs->fs_clean);
643 			printf("%s: lost blocks %" PRId64 " files %d\n",
644 			    mp->mnt_stat.f_mntfromname, fs->fs_pendingblocks,
645 			    fs->fs_pendinginodes);
646 		}
647 		err = UFS_WAPBL_BEGIN(mp);
648 		if (err == 0) {
649 			(void) ffs_cgupdate(ump, MNT_WAIT);
650 			UFS_WAPBL_END(mp);
651 		}
652 	}
653 	return (error);
654 
655 fail:
656 	vrele(devvp);
657 	return (error);
658 }
659 
660 /*
661  * Reload all incore data for a filesystem (used after running fsck on
662  * the root filesystem and finding things to fix). The filesystem must
663  * be mounted read-only.
664  *
665  * Things to do to update the mount:
666  *	1) invalidate all cached meta-data.
667  *	2) re-read superblock from disk.
668  *	3) re-read summary information from disk.
669  *	4) invalidate all inactive vnodes.
670  *	5) invalidate all cached file data.
671  *	6) re-read inode data for all active vnodes.
672  */
673 int
674 ffs_reload(struct mount *mp, kauth_cred_t cred, struct lwp *l)
675 {
676 	struct vnode *vp, *mvp, *devvp;
677 	struct inode *ip;
678 	void *space;
679 	struct buf *bp;
680 	struct fs *fs, *newfs;
681 	struct partinfo dpart;
682 	int i, blks, size, error;
683 	int32_t *lp;
684 	struct ufsmount *ump;
685 	daddr_t sblockloc;
686 
687 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
688 		return (EINVAL);
689 
690 	ump = VFSTOUFS(mp);
691 	/*
692 	 * Step 1: invalidate all cached meta-data.
693 	 */
694 	devvp = ump->um_devvp;
695 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
696 	error = vinvalbuf(devvp, 0, cred, l, 0, 0);
697 	VOP_UNLOCK(devvp, 0);
698 	if (error)
699 		panic("ffs_reload: dirty1");
700 	/*
701 	 * Step 2: re-read superblock from disk.
702 	 */
703 	fs = ump->um_fs;
704 	if (VOP_IOCTL(devvp, DIOCGPART, &dpart, FREAD, NOCRED) != 0)
705 		size = DEV_BSIZE;
706 	else
707 		size = dpart.disklab->d_secsize;
708 	/* XXX we don't handle possibility that superblock moved. */
709 	error = bread(devvp, fs->fs_sblockloc / size, fs->fs_sbsize,
710 		      NOCRED, 0, &bp);
711 	if (error) {
712 		brelse(bp, 0);
713 		return (error);
714 	}
715 	newfs = malloc(fs->fs_sbsize, M_UFSMNT, M_WAITOK);
716 	memcpy(newfs, bp->b_data, fs->fs_sbsize);
717 #ifdef FFS_EI
718 	if (ump->um_flags & UFS_NEEDSWAP) {
719 		ffs_sb_swap((struct fs*)bp->b_data, newfs);
720 		fs->fs_flags |= FS_SWAPPED;
721 	} else
722 #endif
723 		fs->fs_flags &= ~FS_SWAPPED;
724 	if ((newfs->fs_magic != FS_UFS1_MAGIC &&
725 	     newfs->fs_magic != FS_UFS2_MAGIC)||
726 	     newfs->fs_bsize > MAXBSIZE ||
727 	     newfs->fs_bsize < sizeof(struct fs)) {
728 		brelse(bp, 0);
729 		free(newfs, M_UFSMNT);
730 		return (EIO);		/* XXX needs translation */
731 	}
732 	/* Store off old fs_sblockloc for fs_oldfscompat_read. */
733 	sblockloc = fs->fs_sblockloc;
734 	/*
735 	 * Copy pointer fields back into superblock before copying in	XXX
736 	 * new superblock. These should really be in the ufsmount.	XXX
737 	 * Note that important parameters (eg fs_ncg) are unchanged.
738 	 */
739 	newfs->fs_csp = fs->fs_csp;
740 	newfs->fs_maxcluster = fs->fs_maxcluster;
741 	newfs->fs_contigdirs = fs->fs_contigdirs;
742 	newfs->fs_ronly = fs->fs_ronly;
743 	newfs->fs_active = fs->fs_active;
744 	memcpy(fs, newfs, (u_int)fs->fs_sbsize);
745 	brelse(bp, 0);
746 	free(newfs, M_UFSMNT);
747 
748 	/* Recheck for apple UFS filesystem */
749 	ump->um_flags &= ~UFS_ISAPPLEUFS;
750 	/* First check to see if this is tagged as an Apple UFS filesystem
751 	 * in the disklabel
752 	 */
753 	if ((VOP_IOCTL(devvp, DIOCGPART, &dpart, FREAD, cred) == 0) &&
754 		(dpart.part->p_fstype == FS_APPLEUFS)) {
755 		ump->um_flags |= UFS_ISAPPLEUFS;
756 	}
757 #ifdef APPLE_UFS
758 	else {
759 		/* Manually look for an apple ufs label, and if a valid one
760 		 * is found, then treat it like an Apple UFS filesystem anyway
761 		 */
762 		error = bread(devvp, (daddr_t)(APPLEUFS_LABEL_OFFSET / size),
763 			APPLEUFS_LABEL_SIZE, cred, 0, &bp);
764 		if (error) {
765 			brelse(bp, 0);
766 			return (error);
767 		}
768 		error = ffs_appleufs_validate(fs->fs_fsmnt,
769 			(struct appleufslabel *)bp->b_data, NULL);
770 		if (error == 0)
771 			ump->um_flags |= UFS_ISAPPLEUFS;
772 		brelse(bp, 0);
773 		bp = NULL;
774 	}
775 #else
776 	if (ump->um_flags & UFS_ISAPPLEUFS)
777 		return (EIO);
778 #endif
779 
780 	if (UFS_MPISAPPLEUFS(ump)) {
781 		/* see comment about NeXT below */
782 		ump->um_maxsymlinklen = APPLEUFS_MAXSYMLINKLEN;
783 		ump->um_dirblksiz = APPLEUFS_DIRBLKSIZ;
784 		mp->mnt_iflag |= IMNT_DTYPE;
785 	} else {
786 		ump->um_maxsymlinklen = fs->fs_maxsymlinklen;
787 		ump->um_dirblksiz = DIRBLKSIZ;
788 		if (ump->um_maxsymlinklen > 0)
789 			mp->mnt_iflag |= IMNT_DTYPE;
790 		else
791 			mp->mnt_iflag &= ~IMNT_DTYPE;
792 	}
793 	ffs_oldfscompat_read(fs, ump, sblockloc);
794 	mutex_enter(&ump->um_lock);
795 	ump->um_maxfilesize = fs->fs_maxfilesize;
796 
797 	if (fs->fs_flags & ~(FS_KNOWN_FLAGS | FS_INTERNAL)) {
798 		uprintf("%s: unknown ufs flags: 0x%08"PRIx32"%s\n",
799 		    mp->mnt_stat.f_mntonname, fs->fs_flags,
800 		    (mp->mnt_flag & MNT_FORCE) ? "" : ", not mounting");
801 		if ((mp->mnt_flag & MNT_FORCE) == 0) {
802 			mutex_exit(&ump->um_lock);
803 			return (EINVAL);
804 		}
805 	}
806 
807 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
808 		fs->fs_pendingblocks = 0;
809 		fs->fs_pendinginodes = 0;
810 	}
811 	mutex_exit(&ump->um_lock);
812 
813 	ffs_statvfs(mp, &mp->mnt_stat);
814 	/*
815 	 * Step 3: re-read summary information from disk.
816 	 */
817 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
818 	space = fs->fs_csp;
819 	for (i = 0; i < blks; i += fs->fs_frag) {
820 		size = fs->fs_bsize;
821 		if (i + fs->fs_frag > blks)
822 			size = (blks - i) * fs->fs_fsize;
823 		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
824 			      NOCRED, 0, &bp);
825 		if (error) {
826 			brelse(bp, 0);
827 			return (error);
828 		}
829 #ifdef FFS_EI
830 		if (UFS_FSNEEDSWAP(fs))
831 			ffs_csum_swap((struct csum *)bp->b_data,
832 			    (struct csum *)space, size);
833 		else
834 #endif
835 			memcpy(space, bp->b_data, (size_t)size);
836 		space = (char *)space + size;
837 		brelse(bp, 0);
838 	}
839 	if ((fs->fs_flags & FS_DOSOFTDEP))
840 		softdep_mount(devvp, mp, fs, cred);
841 	if (fs->fs_snapinum[0] != 0)
842 		ffs_snapshot_mount(mp);
843 	/*
844 	 * We no longer know anything about clusters per cylinder group.
845 	 */
846 	if (fs->fs_contigsumsize > 0) {
847 		lp = fs->fs_maxcluster;
848 		for (i = 0; i < fs->fs_ncg; i++)
849 			*lp++ = fs->fs_contigsumsize;
850 	}
851 
852 	/* Allocate a marker vnode. */
853 	if ((mvp = vnalloc(mp)) == NULL)
854 		return ENOMEM;
855 	/*
856 	 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
857 	 * and vclean() can be called indirectly
858 	 */
859 	mutex_enter(&mntvnode_lock);
860  loop:
861 	for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) {
862 		vmark(mvp, vp);
863 		if (vp->v_mount != mp || vismarker(vp))
864 			continue;
865 		/*
866 		 * Step 4: invalidate all inactive vnodes.
867 		 */
868 		if (vrecycle(vp, &mntvnode_lock, l)) {
869 			mutex_enter(&mntvnode_lock);
870 			(void)vunmark(mvp);
871 			goto loop;
872 		}
873 		/*
874 		 * Step 5: invalidate all cached file data.
875 		 */
876 		mutex_enter(&vp->v_interlock);
877 		mutex_exit(&mntvnode_lock);
878 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) {
879 			(void)vunmark(mvp);
880 			goto loop;
881 		}
882 		if (vinvalbuf(vp, 0, cred, l, 0, 0))
883 			panic("ffs_reload: dirty2");
884 		/*
885 		 * Step 6: re-read inode data for all active vnodes.
886 		 */
887 		ip = VTOI(vp);
888 		error = bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
889 			      (int)fs->fs_bsize, NOCRED, 0, &bp);
890 		if (error) {
891 			brelse(bp, 0);
892 			vput(vp);
893 			(void)vunmark(mvp);
894 			break;
895 		}
896 		ffs_load_inode(bp, ip, fs, ip->i_number);
897 		ip->i_ffs_effnlink = ip->i_nlink;
898 		brelse(bp, 0);
899 		vput(vp);
900 		mutex_enter(&mntvnode_lock);
901 	}
902 	mutex_exit(&mntvnode_lock);
903 	vnfree(mvp);
904 	return (error);
905 }
906 
907 /*
908  * Possible superblock locations ordered from most to least likely.
909  */
910 static const int sblock_try[] = SBLOCKSEARCH;
911 
912 /*
913  * Common code for mount and mountroot
914  */
915 int
916 ffs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l)
917 {
918 	struct ufsmount *ump;
919 	struct buf *bp;
920 	struct fs *fs;
921 	dev_t dev;
922 	struct partinfo dpart;
923 	void *space;
924 	daddr_t sblockloc, fsblockloc;
925 	int blks, fstype;
926 	int error, i, size, ronly, bset = 0;
927 #ifdef FFS_EI
928 	int needswap = 0;		/* keep gcc happy */
929 #endif
930 	int32_t *lp;
931 	kauth_cred_t cred;
932 	u_int32_t sbsize = 8192;	/* keep gcc happy*/
933 
934 	dev = devvp->v_rdev;
935 	cred = l ? l->l_cred : NOCRED;
936 
937 	/* Flush out any old buffers remaining from a previous use. */
938 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
939 	error = vinvalbuf(devvp, V_SAVE, cred, l, 0, 0);
940 	VOP_UNLOCK(devvp, 0);
941 	if (error)
942 		return (error);
943 
944 	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
945 	if (VOP_IOCTL(devvp, DIOCGPART, &dpart, FREAD, cred) != 0)
946 		size = DEV_BSIZE;
947 	else
948 		size = dpart.disklab->d_secsize;
949 
950 	bp = NULL;
951 	ump = NULL;
952 	fs = NULL;
953 	sblockloc = 0;
954 	fstype = 0;
955 
956 	error = fstrans_mount(mp);
957 	if (error)
958 		return error;
959 
960 	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK);
961 	memset(ump, 0, sizeof *ump);
962 	mutex_init(&ump->um_lock, MUTEX_DEFAULT, IPL_NONE);
963 	error = ffs_snapshot_init(ump);
964 	if (error)
965 		goto out;
966 	ump->um_ops = &ffs_ufsops;
967 
968 #ifdef WAPBL
969  sbagain:
970 #endif
971 	/*
972 	 * Try reading the superblock in each of its possible locations.
973 	 */
974 	for (i = 0; ; i++) {
975 		if (bp != NULL) {
976 			brelse(bp, BC_NOCACHE);
977 			bp = NULL;
978 		}
979 		if (sblock_try[i] == -1) {
980 			error = EINVAL;
981 			fs = NULL;
982 			goto out;
983 		}
984 		error = bread(devvp, sblock_try[i] / size, SBLOCKSIZE, cred,
985 			      0, &bp);
986 		if (error) {
987 			fs = NULL;
988 			goto out;
989 		}
990 		fs = (struct fs*)bp->b_data;
991 		fsblockloc = sblockloc = sblock_try[i];
992 		if (fs->fs_magic == FS_UFS1_MAGIC) {
993 			sbsize = fs->fs_sbsize;
994 			fstype = UFS1;
995 #ifdef FFS_EI
996 			needswap = 0;
997 		} else if (fs->fs_magic == bswap32(FS_UFS1_MAGIC)) {
998 			sbsize = bswap32(fs->fs_sbsize);
999 			fstype = UFS1;
1000 			needswap = 1;
1001 #endif
1002 		} else if (fs->fs_magic == FS_UFS2_MAGIC) {
1003 			sbsize = fs->fs_sbsize;
1004 			fstype = UFS2;
1005 #ifdef FFS_EI
1006 			needswap = 0;
1007 		} else if (fs->fs_magic == bswap32(FS_UFS2_MAGIC)) {
1008 			sbsize = bswap32(fs->fs_sbsize);
1009 			fstype = UFS2;
1010 			needswap = 1;
1011 #endif
1012 		} else
1013 			continue;
1014 
1015 
1016 		/* fs->fs_sblockloc isn't defined for old filesystems */
1017 		if (fstype == UFS1 && !(fs->fs_old_flags & FS_FLAGS_UPDATED)) {
1018 			if (sblockloc == SBLOCK_UFS2)
1019 				/*
1020 				 * This is likely to be the first alternate
1021 				 * in a filesystem with 64k blocks.
1022 				 * Don't use it.
1023 				 */
1024 				continue;
1025 			fsblockloc = sblockloc;
1026 		} else {
1027 			fsblockloc = fs->fs_sblockloc;
1028 #ifdef FFS_EI
1029 			if (needswap)
1030 				fsblockloc = bswap64(fsblockloc);
1031 #endif
1032 		}
1033 
1034 		/* Check we haven't found an alternate superblock */
1035 		if (fsblockloc != sblockloc)
1036 			continue;
1037 
1038 		/* Validate size of superblock */
1039 		if (sbsize > MAXBSIZE || sbsize < sizeof(struct fs))
1040 			continue;
1041 
1042 		/* Ok seems to be a good superblock */
1043 		break;
1044 	}
1045 
1046 	fs = malloc((u_long)sbsize, M_UFSMNT, M_WAITOK);
1047 	memcpy(fs, bp->b_data, sbsize);
1048 	ump->um_fs = fs;
1049 
1050 #ifdef FFS_EI
1051 	if (needswap) {
1052 		ffs_sb_swap((struct fs*)bp->b_data, fs);
1053 		fs->fs_flags |= FS_SWAPPED;
1054 	} else
1055 #endif
1056 		fs->fs_flags &= ~FS_SWAPPED;
1057 
1058 #ifdef WAPBL
1059 	if ((mp->mnt_wapbl_replay == 0) && (fs->fs_flags & FS_DOWAPBL)) {
1060 		error = ffs_wapbl_replay_start(mp, fs, devvp);
1061 		if (error)
1062 			goto out;
1063 
1064 		if (!ronly) {
1065 			/* XXX fsmnt may be stale. */
1066 			printf("%s: replaying log to disk\n", fs->fs_fsmnt);
1067 			error = wapbl_replay_write(mp->mnt_wapbl_replay, devvp);
1068 			if (error)
1069 				goto out;
1070 			wapbl_replay_stop(mp->mnt_wapbl_replay);
1071 			fs->fs_clean = FS_WASCLEAN;
1072 		} else {
1073 			/* XXX fsmnt may be stale */
1074 			printf("%s: replaying log to memory\n", fs->fs_fsmnt);
1075 		}
1076 
1077 		/* Force a re-read of the superblock */
1078 		brelse(bp, BC_INVAL);
1079 		bp = NULL;
1080 		free(fs, M_UFSMNT);
1081 		fs = NULL;
1082 		goto sbagain;
1083 	}
1084 #else /* !WAPBL */
1085 	if ((fs->fs_flags & FS_DOWAPBL) && (mp->mnt_flag & MNT_FORCE) == 0) {
1086 		error = EPERM;
1087 		goto out;
1088 	}
1089 #endif /* !WAPBL */
1090 
1091 	ffs_oldfscompat_read(fs, ump, sblockloc);
1092 	ump->um_maxfilesize = fs->fs_maxfilesize;
1093 
1094 	if (fs->fs_flags & ~(FS_KNOWN_FLAGS | FS_INTERNAL)) {
1095 		uprintf("%s: unknown ufs flags: 0x%08"PRIx32"%s\n",
1096 		    mp->mnt_stat.f_mntonname, fs->fs_flags,
1097 		    (mp->mnt_flag & MNT_FORCE) ? "" : ", not mounting");
1098 		if ((mp->mnt_flag & MNT_FORCE) == 0) {
1099 			error = EINVAL;
1100 			goto out;
1101 		}
1102 	}
1103 
1104 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
1105 		fs->fs_pendingblocks = 0;
1106 		fs->fs_pendinginodes = 0;
1107 	}
1108 
1109 	ump->um_fstype = fstype;
1110 	if (fs->fs_sbsize < SBLOCKSIZE)
1111 		brelse(bp, BC_INVAL);
1112 	else
1113 		brelse(bp, 0);
1114 	bp = NULL;
1115 
1116 	/* First check to see if this is tagged as an Apple UFS filesystem
1117 	 * in the disklabel
1118 	 */
1119 	if ((VOP_IOCTL(devvp, DIOCGPART, &dpart, FREAD, cred) == 0) &&
1120 		(dpart.part->p_fstype == FS_APPLEUFS)) {
1121 		ump->um_flags |= UFS_ISAPPLEUFS;
1122 	}
1123 #ifdef APPLE_UFS
1124 	else {
1125 		/* Manually look for an apple ufs label, and if a valid one
1126 		 * is found, then treat it like an Apple UFS filesystem anyway
1127 		 */
1128 		error = bread(devvp, (daddr_t)(APPLEUFS_LABEL_OFFSET / size),
1129 			APPLEUFS_LABEL_SIZE, cred, 0, &bp);
1130 		if (error)
1131 			goto out;
1132 		error = ffs_appleufs_validate(fs->fs_fsmnt,
1133 			(struct appleufslabel *)bp->b_data, NULL);
1134 		if (error == 0) {
1135 			ump->um_flags |= UFS_ISAPPLEUFS;
1136 		}
1137 		brelse(bp, 0);
1138 		bp = NULL;
1139 	}
1140 #else
1141 	if (ump->um_flags & UFS_ISAPPLEUFS) {
1142 		error = EINVAL;
1143 		goto out;
1144 	}
1145 #endif
1146 
1147 #if 0
1148 /*
1149  * XXX This code changes the behaviour of mounting dirty filesystems, to
1150  * XXX require "mount -f ..." to mount them.  This doesn't match what
1151  * XXX mount(8) describes and is disabled for now.
1152  */
1153 	/*
1154 	 * If the file system is not clean, don't allow it to be mounted
1155 	 * unless MNT_FORCE is specified.  (Note: MNT_FORCE is always set
1156 	 * for the root file system.)
1157 	 */
1158 	if (fs->fs_flags & FS_DOWAPBL) {
1159 		/*
1160 		 * wapbl normally expects to be FS_WASCLEAN when the FS_DOWAPBL
1161 		 * bit is set, although there's a window in unmount where it
1162 		 * could be FS_ISCLEAN
1163 		 */
1164 		if ((mp->mnt_flag & MNT_FORCE) == 0 &&
1165 		    (fs->fs_clean & (FS_WASCLEAN | FS_ISCLEAN)) == 0) {
1166 			error = EPERM;
1167 			goto out;
1168 		}
1169 	} else
1170 		if ((fs->fs_clean & FS_ISCLEAN) == 0 &&
1171 		    (mp->mnt_flag & MNT_FORCE) == 0) {
1172 			error = EPERM;
1173 			goto out;
1174 		}
1175 #endif
1176 
1177 	/*
1178 	 * verify that we can access the last block in the fs
1179 	 * if we're mounting read/write.
1180 	 */
1181 
1182 	if (!ronly) {
1183 		error = bread(devvp, fsbtodb(fs, fs->fs_size - 1), fs->fs_fsize,
1184 		    cred, 0, &bp);
1185 		if (bp->b_bcount != fs->fs_fsize)
1186 			error = EINVAL;
1187 		if (error) {
1188 			bset = BC_INVAL;
1189 			goto out;
1190 		}
1191 		brelse(bp, BC_INVAL);
1192 		bp = NULL;
1193 	}
1194 
1195 	fs->fs_ronly = ronly;
1196 	/* Don't bump fs_clean if we're replaying journal */
1197 	if (!((fs->fs_flags & FS_DOWAPBL) && (fs->fs_clean & FS_WASCLEAN)))
1198 		if (ronly == 0) {
1199 			fs->fs_clean <<= 1;
1200 			fs->fs_fmod = 1;
1201 		}
1202 	size = fs->fs_cssize;
1203 	blks = howmany(size, fs->fs_fsize);
1204 	if (fs->fs_contigsumsize > 0)
1205 		size += fs->fs_ncg * sizeof(int32_t);
1206 	size += fs->fs_ncg * sizeof(*fs->fs_contigdirs);
1207 	space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
1208 	fs->fs_csp = space;
1209 	for (i = 0; i < blks; i += fs->fs_frag) {
1210 		size = fs->fs_bsize;
1211 		if (i + fs->fs_frag > blks)
1212 			size = (blks - i) * fs->fs_fsize;
1213 		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
1214 			      cred, 0, &bp);
1215 		if (error) {
1216 			free(fs->fs_csp, M_UFSMNT);
1217 			goto out;
1218 		}
1219 #ifdef FFS_EI
1220 		if (needswap)
1221 			ffs_csum_swap((struct csum *)bp->b_data,
1222 				(struct csum *)space, size);
1223 		else
1224 #endif
1225 			memcpy(space, bp->b_data, (u_int)size);
1226 
1227 		space = (char *)space + size;
1228 		brelse(bp, 0);
1229 		bp = NULL;
1230 	}
1231 	if (fs->fs_contigsumsize > 0) {
1232 		fs->fs_maxcluster = lp = space;
1233 		for (i = 0; i < fs->fs_ncg; i++)
1234 			*lp++ = fs->fs_contigsumsize;
1235 		space = lp;
1236 	}
1237 	size = fs->fs_ncg * sizeof(*fs->fs_contigdirs);
1238 	fs->fs_contigdirs = space;
1239 	space = (char *)space + size;
1240 	memset(fs->fs_contigdirs, 0, size);
1241 		/* Compatibility for old filesystems - XXX */
1242 	if (fs->fs_avgfilesize <= 0)
1243 		fs->fs_avgfilesize = AVFILESIZ;
1244 	if (fs->fs_avgfpdir <= 0)
1245 		fs->fs_avgfpdir = AFPDIR;
1246 	fs->fs_active = NULL;
1247 	mp->mnt_data = ump;
1248 	mp->mnt_stat.f_fsidx.__fsid_val[0] = (long)dev;
1249 	mp->mnt_stat.f_fsidx.__fsid_val[1] = makefstype(MOUNT_FFS);
1250 	mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
1251 	mp->mnt_stat.f_namemax = FFS_MAXNAMLEN;
1252 	if (UFS_MPISAPPLEUFS(ump)) {
1253 		/* NeXT used to keep short symlinks in the inode even
1254 		 * when using FS_42INODEFMT.  In that case fs->fs_maxsymlinklen
1255 		 * is probably -1, but we still need to be able to identify
1256 		 * short symlinks.
1257 		 */
1258 		ump->um_maxsymlinklen = APPLEUFS_MAXSYMLINKLEN;
1259 		ump->um_dirblksiz = APPLEUFS_DIRBLKSIZ;
1260 		mp->mnt_iflag |= IMNT_DTYPE;
1261 	} else {
1262 		ump->um_maxsymlinklen = fs->fs_maxsymlinklen;
1263 		ump->um_dirblksiz = DIRBLKSIZ;
1264 		if (ump->um_maxsymlinklen > 0)
1265 			mp->mnt_iflag |= IMNT_DTYPE;
1266 		else
1267 			mp->mnt_iflag &= ~IMNT_DTYPE;
1268 	}
1269 	mp->mnt_fs_bshift = fs->fs_bshift;
1270 	mp->mnt_dev_bshift = DEV_BSHIFT;	/* XXX */
1271 	mp->mnt_flag |= MNT_LOCAL;
1272 	mp->mnt_iflag |= IMNT_MPSAFE;
1273 #ifdef FFS_EI
1274 	if (needswap)
1275 		ump->um_flags |= UFS_NEEDSWAP;
1276 #endif
1277 	ump->um_mountp = mp;
1278 	ump->um_dev = dev;
1279 	ump->um_devvp = devvp;
1280 	ump->um_nindir = fs->fs_nindir;
1281 	ump->um_lognindir = ffs(fs->fs_nindir) - 1;
1282 	ump->um_bptrtodb = fs->fs_fsbtodb;
1283 	ump->um_seqinc = fs->fs_frag;
1284 	for (i = 0; i < MAXQUOTAS; i++)
1285 		ump->um_quotas[i] = NULLVP;
1286 	devvp->v_specmountpoint = mp;
1287 	if (ronly == 0 && (fs->fs_flags & FS_DOSOFTDEP)) {
1288 		error = softdep_mount(devvp, mp, fs, cred);
1289 		if (error) {
1290 			free(fs->fs_csp, M_UFSMNT);
1291 			goto out;
1292 		}
1293 	}
1294 	/* Snapshots do not work yet with WAPBL. */
1295 	if (ronly == 0 && fs->fs_snapinum[0] != 0 && (mp->mnt_flag & MNT_LOG)) {
1296 		printf("%s fs has snapshots -- logging not supported yet\n",
1297 		    fs->fs_fsmnt);
1298 		error = EINVAL;
1299 		free(fs->fs_csp, M_UFSMNT);
1300 		goto out;
1301 	}
1302 	if (ronly == 0 && fs->fs_snapinum[0] != 0)
1303 		ffs_snapshot_mount(mp);
1304 
1305 #ifdef WAPBL
1306 	if (!ronly) {
1307 		KDASSERT(fs->fs_ronly == 0);
1308 		/*
1309 		 * ffs_wapbl_start() needs mp->mnt_stat initialised if it
1310 		 * needs to create a new log file in-filesystem.
1311 		 */
1312 		ffs_statvfs(mp, &mp->mnt_stat);
1313 
1314 		error = ffs_wapbl_start(mp);
1315 		if (error) {
1316 			free(fs->fs_csp, M_UFSMNT);
1317 			goto out;
1318 		}
1319 	}
1320 #endif /* WAPBL */
1321 #ifdef UFS_EXTATTR
1322 	/*
1323 	 * Initialize file-backed extended attributes on UFS1 file
1324 	 * systems.
1325 	 */
1326 	if (ump->um_fstype == UFS1) {
1327 		ufs_extattr_uepm_init(&ump->um_extattr);
1328 #ifdef UFS_EXTATTR_AUTOSTART
1329 		/*
1330 		 * XXX Just ignore errors.  Not clear that we should
1331 		 * XXX fail the mount in this case.
1332 		 */
1333 		(void) ufs_extattr_autostart(mp, l);
1334 #endif
1335 	}
1336 #endif /* UFS_EXTATTR */
1337 	return (0);
1338 out:
1339 #ifdef WAPBL
1340 	if (mp->mnt_wapbl_replay) {
1341 		if (wapbl_replay_isopen(mp->mnt_wapbl_replay))
1342 			wapbl_replay_stop(mp->mnt_wapbl_replay);
1343 		wapbl_replay_free(mp->mnt_wapbl_replay);
1344 		mp->mnt_wapbl_replay = 0;
1345 	}
1346 #endif
1347 
1348 	fstrans_unmount(mp);
1349 	if (fs)
1350 		free(fs, M_UFSMNT);
1351 	devvp->v_specmountpoint = NULL;
1352 	if (bp)
1353 		brelse(bp, bset);
1354 	if (ump) {
1355 		if (ump->um_oldfscompat)
1356 			free(ump->um_oldfscompat, M_UFSMNT);
1357 		mutex_destroy(&ump->um_lock);
1358 		free(ump, M_UFSMNT);
1359 		mp->mnt_data = NULL;
1360 	}
1361 	return (error);
1362 }
1363 
1364 /*
1365  * Sanity checks for loading old filesystem superblocks.
1366  * See ffs_oldfscompat_write below for unwound actions.
1367  *
1368  * XXX - Parts get retired eventually.
1369  * Unfortunately new bits get added.
1370  */
1371 static void
1372 ffs_oldfscompat_read(struct fs *fs, struct ufsmount *ump, daddr_t sblockloc)
1373 {
1374 	off_t maxfilesize;
1375 	int32_t *extrasave;
1376 
1377 	if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1378 	    (fs->fs_old_flags & FS_FLAGS_UPDATED))
1379 		return;
1380 
1381 	if (!ump->um_oldfscompat)
1382 		ump->um_oldfscompat = malloc(512 + 3*sizeof(int32_t),
1383 		    M_UFSMNT, M_WAITOK);
1384 
1385 	memcpy(ump->um_oldfscompat, &fs->fs_old_postbl_start, 512);
1386 	extrasave = ump->um_oldfscompat;
1387 	extrasave += 512/sizeof(int32_t);
1388 	extrasave[0] = fs->fs_old_npsect;
1389 	extrasave[1] = fs->fs_old_interleave;
1390 	extrasave[2] = fs->fs_old_trackskew;
1391 
1392 	/* These fields will be overwritten by their
1393 	 * original values in fs_oldfscompat_write, so it is harmless
1394 	 * to modify them here.
1395 	 */
1396 	fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
1397 	fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
1398 	fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
1399 	fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
1400 
1401 	fs->fs_maxbsize = fs->fs_bsize;
1402 	fs->fs_time = fs->fs_old_time;
1403 	fs->fs_size = fs->fs_old_size;
1404 	fs->fs_dsize = fs->fs_old_dsize;
1405 	fs->fs_csaddr = fs->fs_old_csaddr;
1406 	fs->fs_sblockloc = sblockloc;
1407 
1408 	fs->fs_flags = fs->fs_old_flags | (fs->fs_flags & FS_INTERNAL);
1409 
1410 	if (fs->fs_old_postblformat == FS_42POSTBLFMT) {
1411 		fs->fs_old_nrpos = 8;
1412 		fs->fs_old_npsect = fs->fs_old_nsect;
1413 		fs->fs_old_interleave = 1;
1414 		fs->fs_old_trackskew = 0;
1415 	}
1416 
1417 	if (fs->fs_old_inodefmt < FS_44INODEFMT) {
1418 		fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
1419 		fs->fs_qbmask = ~fs->fs_bmask;
1420 		fs->fs_qfmask = ~fs->fs_fmask;
1421 	}
1422 
1423 	maxfilesize = (u_int64_t)0x80000000 * fs->fs_bsize - 1;
1424 	if (fs->fs_maxfilesize > maxfilesize)
1425 		fs->fs_maxfilesize = maxfilesize;
1426 
1427 	/* Compatibility for old filesystems */
1428 	if (fs->fs_avgfilesize <= 0)
1429 		fs->fs_avgfilesize = AVFILESIZ;
1430 	if (fs->fs_avgfpdir <= 0)
1431 		fs->fs_avgfpdir = AFPDIR;
1432 
1433 #if 0
1434 	if (bigcgs) {
1435 		fs->fs_save_cgsize = fs->fs_cgsize;
1436 		fs->fs_cgsize = fs->fs_bsize;
1437 	}
1438 #endif
1439 }
1440 
1441 /*
1442  * Unwinding superblock updates for old filesystems.
1443  * See ffs_oldfscompat_read above for details.
1444  *
1445  * XXX - Parts get retired eventually.
1446  * Unfortunately new bits get added.
1447  */
1448 static void
1449 ffs_oldfscompat_write(struct fs *fs, struct ufsmount *ump)
1450 {
1451 	int32_t *extrasave;
1452 
1453 	if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1454 	    (fs->fs_old_flags & FS_FLAGS_UPDATED))
1455 		return;
1456 
1457 	fs->fs_old_time = fs->fs_time;
1458 	fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
1459 	fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
1460 	fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
1461 	fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
1462 	fs->fs_old_flags = fs->fs_flags;
1463 
1464 #if 0
1465 	if (bigcgs) {
1466 		fs->fs_cgsize = fs->fs_save_cgsize;
1467 	}
1468 #endif
1469 
1470 	memcpy(&fs->fs_old_postbl_start, ump->um_oldfscompat, 512);
1471 	extrasave = ump->um_oldfscompat;
1472 	extrasave += 512/sizeof(int32_t);
1473 	fs->fs_old_npsect = extrasave[0];
1474 	fs->fs_old_interleave = extrasave[1];
1475 	fs->fs_old_trackskew = extrasave[2];
1476 
1477 }
1478 
1479 /*
1480  * unmount system call
1481  */
1482 int
1483 ffs_unmount(struct mount *mp, int mntflags)
1484 {
1485 	struct lwp *l = curlwp;
1486 	struct ufsmount *ump = VFSTOUFS(mp);
1487 	struct fs *fs = ump->um_fs;
1488 	int error, flags, penderr;
1489 #ifdef WAPBL
1490 	extern int doforce;
1491 #endif
1492 
1493 	penderr = 0;
1494 	flags = 0;
1495 	if (mntflags & MNT_FORCE)
1496 		flags |= FORCECLOSE;
1497 #ifdef UFS_EXTATTR
1498 	if (ump->um_fstype == UFS1) {
1499 		ufs_extattr_stop(mp, l);
1500 		ufs_extattr_uepm_destroy(&ump->um_extattr);
1501 	}
1502 #endif /* UFS_EXTATTR */
1503 	if (mp->mnt_flag & MNT_SOFTDEP) {
1504 		if ((error = softdep_flushfiles(mp, flags, l)) != 0)
1505 			return (error);
1506 	} else {
1507 		if ((error = ffs_flushfiles(mp, flags, l)) != 0)
1508 			return (error);
1509 	}
1510 	mutex_enter(&ump->um_lock);
1511 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
1512 		printf("%s: unmount pending error: blocks %" PRId64
1513 		       " files %d\n",
1514 		    fs->fs_fsmnt, fs->fs_pendingblocks, fs->fs_pendinginodes);
1515 		fs->fs_pendingblocks = 0;
1516 		fs->fs_pendinginodes = 0;
1517 		penderr = 1;
1518 	}
1519 	mutex_exit(&ump->um_lock);
1520 	error = UFS_WAPBL_BEGIN(mp);
1521 	if (error == 0)
1522 		if (fs->fs_ronly == 0 &&
1523 		    ffs_cgupdate(ump, MNT_WAIT) == 0 &&
1524 		    fs->fs_clean & FS_WASCLEAN) {
1525 			/*
1526 			 * XXXX don't mark fs clean in the case of softdep
1527 			 * pending block errors, until they are fixed.
1528 			 */
1529 			if (penderr == 0) {
1530 				if (mp->mnt_flag & MNT_SOFTDEP)
1531 					fs->fs_flags &= ~FS_DOSOFTDEP;
1532 				fs->fs_clean = FS_ISCLEAN;
1533 			}
1534 			fs->fs_fmod = 0;
1535 			(void) ffs_sbupdate(ump, MNT_WAIT);
1536 		}
1537 	if (error == 0)
1538 		UFS_WAPBL_END(mp);
1539 #ifdef WAPBL
1540 	KASSERT(!(mp->mnt_wapbl_replay && mp->mnt_wapbl));
1541 	if (mp->mnt_wapbl_replay) {
1542 		KDASSERT(fs->fs_ronly);
1543 		wapbl_replay_stop(mp->mnt_wapbl_replay);
1544 		wapbl_replay_free(mp->mnt_wapbl_replay);
1545 		mp->mnt_wapbl_replay = 0;
1546 	}
1547 	error = ffs_wapbl_stop(mp, doforce && (mntflags & MNT_FORCE));
1548 	if (error) {
1549 		return error;
1550 	}
1551 #endif /* WAPBL */
1552 	if (ump->um_devvp->v_type != VBAD)
1553 		ump->um_devvp->v_specmountpoint = NULL;
1554 	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1555 	(void)VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD | FWRITE,
1556 		NOCRED);
1557 	vput(ump->um_devvp);
1558 	free(fs->fs_csp, M_UFSMNT);
1559 	free(fs, M_UFSMNT);
1560 	if (ump->um_oldfscompat != NULL)
1561 		free(ump->um_oldfscompat, M_UFSMNT);
1562 	softdep_unmount(mp);
1563 	mutex_destroy(&ump->um_lock);
1564 	ffs_snapshot_fini(ump);
1565 	free(ump, M_UFSMNT);
1566 	mp->mnt_data = NULL;
1567 	mp->mnt_flag &= ~MNT_LOCAL;
1568 	fstrans_unmount(mp);
1569 	return (0);
1570 }
1571 
1572 /*
1573  * Flush out all the files in a filesystem.
1574  */
1575 int
1576 ffs_flushfiles(struct mount *mp, int flags, struct lwp *l)
1577 {
1578 	extern int doforce;
1579 	struct ufsmount *ump;
1580 	int error;
1581 
1582 	if (!doforce)
1583 		flags &= ~FORCECLOSE;
1584 	ump = VFSTOUFS(mp);
1585 #ifdef QUOTA
1586 	if (mp->mnt_flag & MNT_QUOTA) {
1587 		int i;
1588 		if ((error = vflush(mp, NULLVP, SKIPSYSTEM | flags)) != 0)
1589 			return (error);
1590 		for (i = 0; i < MAXQUOTAS; i++) {
1591 			if (ump->um_quotas[i] == NULLVP)
1592 				continue;
1593 			quotaoff(l, mp, i);
1594 		}
1595 		/*
1596 		 * Here we fall through to vflush again to ensure
1597 		 * that we have gotten rid of all the system vnodes.
1598 		 */
1599 	}
1600 #endif
1601 	if ((error = vflush(mp, 0, SKIPSYSTEM | flags)) != 0)
1602 		return (error);
1603 	ffs_snapshot_unmount(mp);
1604 	/*
1605 	 * Flush all the files.
1606 	 */
1607 	error = vflush(mp, NULLVP, flags);
1608 	if (error)
1609 		return (error);
1610 	/*
1611 	 * Flush filesystem metadata.
1612 	 */
1613 	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1614 	error = VOP_FSYNC(ump->um_devvp, l->l_cred, FSYNC_WAIT, 0, 0);
1615 	VOP_UNLOCK(ump->um_devvp, 0);
1616 	if (flags & FORCECLOSE) /* XXXDBJ */
1617 		error = 0;
1618 
1619 #ifdef WAPBL
1620 	if (error)
1621 		return error;
1622 	if (mp->mnt_wapbl) {
1623 		error = wapbl_flush(mp->mnt_wapbl, 1);
1624 		if (flags & FORCECLOSE)
1625 			error = 0;
1626 	}
1627 #endif
1628 
1629 	return (error);
1630 }
1631 
1632 /*
1633  * Get file system statistics.
1634  */
1635 int
1636 ffs_statvfs(struct mount *mp, struct statvfs *sbp)
1637 {
1638 	struct ufsmount *ump;
1639 	struct fs *fs;
1640 
1641 	ump = VFSTOUFS(mp);
1642 	fs = ump->um_fs;
1643 	mutex_enter(&ump->um_lock);
1644 	sbp->f_bsize = fs->fs_bsize;
1645 	sbp->f_frsize = fs->fs_fsize;
1646 	sbp->f_iosize = fs->fs_bsize;
1647 	sbp->f_blocks = fs->fs_dsize;
1648 	sbp->f_bfree = blkstofrags(fs, fs->fs_cstotal.cs_nbfree) +
1649 		fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1650 	sbp->f_bresvd = ((u_int64_t) fs->fs_dsize * (u_int64_t)
1651 	    fs->fs_minfree) / (u_int64_t) 100;
1652 	if (sbp->f_bfree > sbp->f_bresvd)
1653 		sbp->f_bavail = sbp->f_bfree - sbp->f_bresvd;
1654 	else
1655 		sbp->f_bavail = 0;
1656 	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
1657 	sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1658 	sbp->f_favail = sbp->f_ffree;
1659 	sbp->f_fresvd = 0;
1660 	mutex_exit(&ump->um_lock);
1661 	copy_statvfs_info(sbp, mp);
1662 
1663 	return (0);
1664 }
1665 
1666 /*
1667  * Go through the disk queues to initiate sandbagged IO;
1668  * go through the inodes to write those that have been modified;
1669  * initiate the writing of the super block if it has been modified.
1670  *
1671  * Note: we are always called with the filesystem marked `MPBUSY'.
1672  */
1673 int
1674 ffs_sync(struct mount *mp, int waitfor, kauth_cred_t cred)
1675 {
1676 	struct lwp *l = curlwp;
1677 	struct vnode *vp, *mvp;
1678 	struct inode *ip;
1679 	struct ufsmount *ump = VFSTOUFS(mp);
1680 	struct fs *fs;
1681 	int error, count, allerror = 0;
1682 
1683 	fs = ump->um_fs;
1684 	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
1685 		printf("fs = %s\n", fs->fs_fsmnt);
1686 		panic("update: rofs mod");
1687 	}
1688 
1689 	/* Allocate a marker vnode. */
1690 	if ((mvp = vnalloc(mp)) == NULL)
1691 		return (ENOMEM);
1692 
1693 	fstrans_start(mp, FSTRANS_SHARED);
1694 	/*
1695 	 * Write back each (modified) inode.
1696 	 */
1697 	mutex_enter(&mntvnode_lock);
1698 loop:
1699 	/*
1700 	 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
1701 	 * and vclean() can be called indirectly
1702 	 */
1703 	for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) {
1704 		vmark(mvp, vp);
1705 		/*
1706 		 * If the vnode that we are about to sync is no longer
1707 		 * associated with this mount point, start over.
1708 		 */
1709 		if (vp->v_mount != mp || vismarker(vp))
1710 			continue;
1711 		mutex_enter(&vp->v_interlock);
1712 		ip = VTOI(vp);
1713 		/* XXXpooka: why wapbl check? */
1714 		if (ip == NULL || (vp->v_iflag & (VI_XLOCK | VI_CLEAN)) != 0 ||
1715 		    vp->v_type == VNON || ((ip->i_flag &
1716 		    (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) == 0 &&
1717 		    (LIST_EMPTY(&vp->v_dirtyblkhd) || (mp->mnt_wapbl)) &&
1718 		    UVM_OBJ_IS_CLEAN(&vp->v_uobj)))
1719 		{
1720 			mutex_exit(&vp->v_interlock);
1721 			continue;
1722 		}
1723 		if (vp->v_type == VBLK &&
1724 		    fstrans_getstate(mp) == FSTRANS_SUSPENDING) {
1725 			mutex_exit(&vp->v_interlock);
1726 			continue;
1727 		}
1728 		mutex_exit(&mntvnode_lock);
1729 		error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK);
1730 		if (error) {
1731 			mutex_enter(&mntvnode_lock);
1732 			if (error == ENOENT) {
1733 				(void)vunmark(mvp);
1734 				goto loop;
1735 			}
1736 			continue;
1737 		}
1738 		if (vp->v_type == VREG && waitfor == MNT_LAZY) {
1739 			error = UFS_WAPBL_BEGIN(vp->v_mount);
1740 			if (!error) {
1741 				error = ffs_update(vp, NULL, NULL, 0);
1742 				UFS_WAPBL_END(vp->v_mount);
1743 			}
1744 		} else {
1745 			error = VOP_FSYNC(vp, cred, FSYNC_NOLOG |
1746 			    (waitfor == MNT_WAIT ? FSYNC_WAIT : 0), 0, 0);
1747 		}
1748 		if (error)
1749 			allerror = error;
1750 		vput(vp);
1751 		mutex_enter(&mntvnode_lock);
1752 	}
1753 	mutex_exit(&mntvnode_lock);
1754 	/*
1755 	 * Force stale file system control information to be flushed.
1756 	 */
1757 	if (waitfor == MNT_WAIT && (ump->um_mountp->mnt_flag & MNT_SOFTDEP)) {
1758 		if ((error = softdep_flushworklist(ump->um_mountp, &count, l)))
1759 			allerror = error;
1760 		/* Flushed work items may create new vnodes to clean */
1761 		if (allerror == 0 && count) {
1762 			mutex_enter(&mntvnode_lock);
1763 			goto loop;
1764 		}
1765 	}
1766 	if (waitfor != MNT_LAZY && (ump->um_devvp->v_numoutput > 0 ||
1767 	    !LIST_EMPTY(&ump->um_devvp->v_dirtyblkhd))) {
1768 		vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1769 		if ((error = VOP_FSYNC(ump->um_devvp, cred,
1770 		    (waitfor == MNT_WAIT ? FSYNC_WAIT : 0) | FSYNC_NOLOG,
1771 		    0, 0)) != 0)
1772 			allerror = error;
1773 		VOP_UNLOCK(ump->um_devvp, 0);
1774 		if (allerror == 0 && waitfor == MNT_WAIT && !mp->mnt_wapbl) {
1775 			mutex_enter(&mntvnode_lock);
1776 			goto loop;
1777 		}
1778 	}
1779 #ifdef QUOTA
1780 	qsync(mp);
1781 #endif
1782 	/*
1783 	 * Write back modified superblock.
1784 	 */
1785 	if (fs->fs_fmod != 0) {
1786 		fs->fs_fmod = 0;
1787 		fs->fs_time = time_second;
1788 		error = UFS_WAPBL_BEGIN(mp);
1789 		if (error)
1790 			allerror = error;
1791 		else {
1792 			if ((error = ffs_cgupdate(ump, waitfor)))
1793 				allerror = error;
1794 				UFS_WAPBL_END(mp);
1795 		}
1796 	}
1797 
1798 #ifdef WAPBL
1799 	if (mp->mnt_wapbl) {
1800 		error = wapbl_flush(mp->mnt_wapbl, 0);
1801 		if (error)
1802 			allerror = error;
1803 	}
1804 #endif
1805 
1806 	fstrans_done(mp);
1807 	vnfree(mvp);
1808 	return (allerror);
1809 }
1810 
1811 /*
1812  * Look up a FFS dinode number to find its incore vnode, otherwise read it
1813  * in from disk.  If it is in core, wait for the lock bit to clear, then
1814  * return the inode locked.  Detection and handling of mount points must be
1815  * done by the calling routine.
1816  */
1817 int
1818 ffs_vget(struct mount *mp, ino_t ino, struct vnode **vpp)
1819 {
1820 	struct fs *fs;
1821 	struct inode *ip;
1822 	struct ufsmount *ump;
1823 	struct buf *bp;
1824 	struct vnode *vp;
1825 	dev_t dev;
1826 	int error;
1827 
1828 	ump = VFSTOUFS(mp);
1829 	dev = ump->um_dev;
1830 
1831  retry:
1832 	if ((*vpp = ufs_ihashget(dev, ino, LK_EXCLUSIVE)) != NULL)
1833 		return (0);
1834 
1835 	/* Allocate a new vnode/inode. */
1836 	if ((error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp)) != 0) {
1837 		*vpp = NULL;
1838 		return (error);
1839 	}
1840 	ip = pool_cache_get(ffs_inode_cache, PR_WAITOK);
1841 
1842 	/*
1843 	 * If someone beat us to it, put back the freshly allocated
1844 	 * vnode/inode pair and retry.
1845 	 */
1846 	mutex_enter(&ufs_hashlock);
1847 	if (ufs_ihashget(dev, ino, 0) != NULL) {
1848 		mutex_exit(&ufs_hashlock);
1849 		ungetnewvnode(vp);
1850 		pool_cache_put(ffs_inode_cache, ip);
1851 		goto retry;
1852 	}
1853 
1854 	vp->v_vflag |= VV_LOCKSWORK;
1855 	if ((mp->mnt_flag & MNT_SOFTDEP) != 0)
1856 		vp->v_uflag |= VU_SOFTDEP;
1857 
1858 	/*
1859 	 * XXX MFS ends up here, too, to allocate an inode.  Should we
1860 	 * XXX create another pool for MFS inodes?
1861 	 */
1862 
1863 	memset(ip, 0, sizeof(struct inode));
1864 	vp->v_data = ip;
1865 	ip->i_vnode = vp;
1866 	ip->i_ump = ump;
1867 	ip->i_fs = fs = ump->um_fs;
1868 	ip->i_dev = dev;
1869 	ip->i_number = ino;
1870 	LIST_INIT(&ip->i_pcbufhd);
1871 #ifdef QUOTA
1872 	ufsquota_init(ip);
1873 #endif
1874 
1875 	/*
1876 	 * Initialize genfs node, we might proceed to destroy it in
1877 	 * error branches.
1878 	 */
1879 	genfs_node_init(vp, &ffs_genfsops);
1880 
1881 	/*
1882 	 * Put it onto its hash chain and lock it so that other requests for
1883 	 * this inode will block if they arrive while we are sleeping waiting
1884 	 * for old data structures to be purged or for the contents of the
1885 	 * disk portion of this inode to be read.
1886 	 */
1887 
1888 	ufs_ihashins(ip);
1889 	mutex_exit(&ufs_hashlock);
1890 
1891 	/* Read in the disk contents for the inode, copy into the inode. */
1892 	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1893 		      (int)fs->fs_bsize, NOCRED, 0, &bp);
1894 	if (error) {
1895 
1896 		/*
1897 		 * The inode does not contain anything useful, so it would
1898 		 * be misleading to leave it on its hash chain. With mode
1899 		 * still zero, it will be unlinked and returned to the free
1900 		 * list by vput().
1901 		 */
1902 
1903 		vput(vp);
1904 		brelse(bp, 0);
1905 		*vpp = NULL;
1906 		return (error);
1907 	}
1908 	if (ip->i_ump->um_fstype == UFS1)
1909 		ip->i_din.ffs1_din = pool_cache_get(ffs_dinode1_cache,
1910 		    PR_WAITOK);
1911 	else
1912 		ip->i_din.ffs2_din = pool_cache_get(ffs_dinode2_cache,
1913 		    PR_WAITOK);
1914 	ffs_load_inode(bp, ip, fs, ino);
1915 	if (DOINGSOFTDEP(vp))
1916 		softdep_load_inodeblock(ip);
1917 	else
1918 		ip->i_ffs_effnlink = ip->i_nlink;
1919 	brelse(bp, 0);
1920 
1921 	/*
1922 	 * Initialize the vnode from the inode, check for aliases.
1923 	 * Note that the underlying vnode may have changed.
1924 	 */
1925 
1926 	ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
1927 
1928 	/*
1929 	 * Finish inode initialization now that aliasing has been resolved.
1930 	 */
1931 
1932 	ip->i_devvp = ump->um_devvp;
1933 	VREF(ip->i_devvp);
1934 
1935 	/*
1936 	 * Ensure that uid and gid are correct. This is a temporary
1937 	 * fix until fsck has been changed to do the update.
1938 	 */
1939 
1940 	if (fs->fs_old_inodefmt < FS_44INODEFMT) {		/* XXX */
1941 		ip->i_uid = ip->i_ffs1_ouid;			/* XXX */
1942 		ip->i_gid = ip->i_ffs1_ogid;			/* XXX */
1943 	}							/* XXX */
1944 	uvm_vnp_setsize(vp, ip->i_size);
1945 	*vpp = vp;
1946 	return (0);
1947 }
1948 
1949 /*
1950  * File handle to vnode
1951  *
1952  * Have to be really careful about stale file handles:
1953  * - check that the inode number is valid
1954  * - call ffs_vget() to get the locked inode
1955  * - check for an unallocated inode (i_mode == 0)
1956  * - check that the given client host has export rights and return
1957  *   those rights via. exflagsp and credanonp
1958  */
1959 int
1960 ffs_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp)
1961 {
1962 	struct ufid ufh;
1963 	struct fs *fs;
1964 
1965 	if (fhp->fid_len != sizeof(struct ufid))
1966 		return EINVAL;
1967 
1968 	memcpy(&ufh, fhp, sizeof(ufh));
1969 	fs = VFSTOUFS(mp)->um_fs;
1970 	if (ufh.ufid_ino < ROOTINO ||
1971 	    ufh.ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1972 		return (ESTALE);
1973 	return (ufs_fhtovp(mp, &ufh, vpp));
1974 }
1975 
1976 /*
1977  * Vnode pointer to File handle
1978  */
1979 /* ARGSUSED */
1980 int
1981 ffs_vptofh(struct vnode *vp, struct fid *fhp, size_t *fh_size)
1982 {
1983 	struct inode *ip;
1984 	struct ufid ufh;
1985 
1986 	if (*fh_size < sizeof(struct ufid)) {
1987 		*fh_size = sizeof(struct ufid);
1988 		return E2BIG;
1989 	}
1990 	ip = VTOI(vp);
1991 	*fh_size = sizeof(struct ufid);
1992 	memset(&ufh, 0, sizeof(ufh));
1993 	ufh.ufid_len = sizeof(struct ufid);
1994 	ufh.ufid_ino = ip->i_number;
1995 	ufh.ufid_gen = ip->i_gen;
1996 	memcpy(fhp, &ufh, sizeof(ufh));
1997 	return (0);
1998 }
1999 
2000 void
2001 ffs_init(void)
2002 {
2003 	if (ffs_initcount++ > 0)
2004 		return;
2005 
2006 	ffs_inode_cache = pool_cache_init(sizeof(struct inode), 0, 0, 0,
2007 	    "ffsino", NULL, IPL_NONE, NULL, NULL, NULL);
2008 	ffs_dinode1_cache = pool_cache_init(sizeof(struct ufs1_dinode), 0, 0, 0,
2009 	    "ffsdino1", NULL, IPL_NONE, NULL, NULL, NULL);
2010 	ffs_dinode2_cache = pool_cache_init(sizeof(struct ufs2_dinode), 0, 0, 0,
2011 	    "ffsdino2", NULL, IPL_NONE, NULL, NULL, NULL);
2012 	softdep_initialize();
2013 	ufs_init();
2014 }
2015 
2016 void
2017 ffs_reinit(void)
2018 {
2019 	softdep_reinitialize();
2020 	ufs_reinit();
2021 }
2022 
2023 void
2024 ffs_done(void)
2025 {
2026 	if (--ffs_initcount > 0)
2027 		return;
2028 
2029 	/* XXX softdep cleanup ? */
2030 	ufs_done();
2031 	pool_cache_destroy(ffs_dinode2_cache);
2032 	pool_cache_destroy(ffs_dinode1_cache);
2033 	pool_cache_destroy(ffs_inode_cache);
2034 }
2035 
2036 /*
2037  * Write a superblock and associated information back to disk.
2038  */
2039 int
2040 ffs_sbupdate(struct ufsmount *mp, int waitfor)
2041 {
2042 	struct fs *fs = mp->um_fs;
2043 	struct buf *bp;
2044 	int error = 0;
2045 	u_int32_t saveflag;
2046 
2047 	error = ffs_getblk(mp->um_devvp,
2048 	    fs->fs_sblockloc >> (fs->fs_fshift - fs->fs_fsbtodb), FFS_NOBLK,
2049 	    fs->fs_sbsize, false, &bp);
2050 	if (error)
2051 		return error;
2052 	saveflag = fs->fs_flags & FS_INTERNAL;
2053 	fs->fs_flags &= ~FS_INTERNAL;
2054 
2055 	memcpy(bp->b_data, fs, fs->fs_sbsize);
2056 
2057 	ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
2058 #ifdef FFS_EI
2059 	if (mp->um_flags & UFS_NEEDSWAP)
2060 		ffs_sb_swap((struct fs *)bp->b_data, (struct fs *)bp->b_data);
2061 #endif
2062 	fs->fs_flags |= saveflag;
2063 
2064 	if (waitfor == MNT_WAIT)
2065 		error = bwrite(bp);
2066 	else
2067 		bawrite(bp);
2068 	return (error);
2069 }
2070 
2071 int
2072 ffs_cgupdate(struct ufsmount *mp, int waitfor)
2073 {
2074 	struct fs *fs = mp->um_fs;
2075 	struct buf *bp;
2076 	int blks;
2077 	void *space;
2078 	int i, size, error = 0, allerror = 0;
2079 
2080 	allerror = ffs_sbupdate(mp, waitfor);
2081 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
2082 	space = fs->fs_csp;
2083 	for (i = 0; i < blks; i += fs->fs_frag) {
2084 		size = fs->fs_bsize;
2085 		if (i + fs->fs_frag > blks)
2086 			size = (blks - i) * fs->fs_fsize;
2087 		error = ffs_getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
2088 		    FFS_NOBLK, size, false, &bp);
2089 		if (error)
2090 			break;
2091 #ifdef FFS_EI
2092 		if (mp->um_flags & UFS_NEEDSWAP)
2093 			ffs_csum_swap((struct csum*)space,
2094 			    (struct csum*)bp->b_data, size);
2095 		else
2096 #endif
2097 			memcpy(bp->b_data, space, (u_int)size);
2098 		space = (char *)space + size;
2099 		if (waitfor == MNT_WAIT)
2100 			error = bwrite(bp);
2101 		else
2102 			bawrite(bp);
2103 	}
2104 	if (!allerror && error)
2105 		allerror = error;
2106 	return (allerror);
2107 }
2108 
2109 int
2110 ffs_extattrctl(struct mount *mp, int cmd, struct vnode *vp,
2111     int attrnamespace, const char *attrname)
2112 {
2113 #ifdef UFS_EXTATTR
2114 	/*
2115 	 * File-backed extended attributes are only supported on UFS1.
2116 	 * UFS2 has native extended attributes.
2117 	 */
2118 	if (VFSTOUFS(mp)->um_fstype == UFS1)
2119 		return (ufs_extattrctl(mp, cmd, vp, attrnamespace, attrname));
2120 #endif
2121 	return (vfs_stdextattrctl(mp, cmd, vp, attrnamespace, attrname));
2122 }
2123 
2124 int
2125 ffs_suspendctl(struct mount *mp, int cmd)
2126 {
2127 	int error;
2128 	struct lwp *l = curlwp;
2129 
2130 	switch (cmd) {
2131 	case SUSPEND_SUSPEND:
2132 		if ((error = fstrans_setstate(mp, FSTRANS_SUSPENDING)) != 0)
2133 			return error;
2134 		error = ffs_sync(mp, MNT_WAIT, l->l_proc->p_cred);
2135 		if (error == 0)
2136 			error = fstrans_setstate(mp, FSTRANS_SUSPENDED);
2137 		if (error != 0) {
2138 			(void) fstrans_setstate(mp, FSTRANS_NORMAL);
2139 			return error;
2140 		}
2141 		return 0;
2142 
2143 	case SUSPEND_RESUME:
2144 		return fstrans_setstate(mp, FSTRANS_NORMAL);
2145 
2146 	default:
2147 		return EINVAL;
2148 	}
2149 }
2150