xref: /netbsd-src/sys/ufs/ffs/ffs_vfsops.c (revision cb861154c176d3dcc8ff846f449e3c16a5f5edb5)
1 /*	$NetBSD: ffs_vfsops.c,v 1.266 2011/04/27 07:24:53 hannken Exp $	*/
2 
3 /*-
4  * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Wasabi Systems, Inc, and by Andrew Doran.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * Copyright (c) 1989, 1991, 1993, 1994
34  *	The Regents of the University of California.  All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. Neither the name of the University nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  *
60  *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
61  */
62 
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.266 2011/04/27 07:24:53 hannken Exp $");
65 
66 #if defined(_KERNEL_OPT)
67 #include "opt_ffs.h"
68 #include "opt_quota.h"
69 #include "opt_wapbl.h"
70 #endif
71 
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/namei.h>
75 #include <sys/proc.h>
76 #include <sys/kernel.h>
77 #include <sys/vnode.h>
78 #include <sys/socket.h>
79 #include <sys/mount.h>
80 #include <sys/buf.h>
81 #include <sys/device.h>
82 #include <sys/mbuf.h>
83 #include <sys/file.h>
84 #include <sys/disklabel.h>
85 #include <sys/ioctl.h>
86 #include <sys/errno.h>
87 #include <sys/malloc.h>
88 #include <sys/pool.h>
89 #include <sys/lock.h>
90 #include <sys/sysctl.h>
91 #include <sys/conf.h>
92 #include <sys/kauth.h>
93 #include <sys/wapbl.h>
94 #include <sys/fstrans.h>
95 #include <sys/module.h>
96 
97 #include <miscfs/genfs/genfs.h>
98 #include <miscfs/specfs/specdev.h>
99 
100 #include <ufs/ufs/quota.h>
101 #include <ufs/ufs/ufsmount.h>
102 #include <ufs/ufs/inode.h>
103 #include <ufs/ufs/dir.h>
104 #include <ufs/ufs/ufs_extern.h>
105 #include <ufs/ufs/ufs_bswap.h>
106 #include <ufs/ufs/ufs_wapbl.h>
107 
108 #include <ufs/ffs/fs.h>
109 #include <ufs/ffs/ffs_extern.h>
110 
111 MODULE(MODULE_CLASS_VFS, ffs, NULL);
112 
113 static int	ffs_vfs_fsync(vnode_t *, int);
114 
115 static struct sysctllog *ffs_sysctl_log;
116 
117 /* how many times ffs_init() was called */
118 int ffs_initcount = 0;
119 
120 extern const struct vnodeopv_desc ffs_vnodeop_opv_desc;
121 extern const struct vnodeopv_desc ffs_specop_opv_desc;
122 extern const struct vnodeopv_desc ffs_fifoop_opv_desc;
123 
124 const struct vnodeopv_desc * const ffs_vnodeopv_descs[] = {
125 	&ffs_vnodeop_opv_desc,
126 	&ffs_specop_opv_desc,
127 	&ffs_fifoop_opv_desc,
128 	NULL,
129 };
130 
131 struct vfsops ffs_vfsops = {
132 	MOUNT_FFS,
133 	sizeof (struct ufs_args),
134 	ffs_mount,
135 	ufs_start,
136 	ffs_unmount,
137 	ufs_root,
138 	ufs_quotactl,
139 	ffs_statvfs,
140 	ffs_sync,
141 	ffs_vget,
142 	ffs_fhtovp,
143 	ffs_vptofh,
144 	ffs_init,
145 	ffs_reinit,
146 	ffs_done,
147 	ffs_mountroot,
148 	ffs_snapshot,
149 	ffs_extattrctl,
150 	ffs_suspendctl,
151 	genfs_renamelock_enter,
152 	genfs_renamelock_exit,
153 	ffs_vfs_fsync,
154 	ffs_vnodeopv_descs,
155 	0,
156 	{ NULL, NULL },
157 };
158 
159 static const struct genfs_ops ffs_genfsops = {
160 	.gop_size = ffs_gop_size,
161 	.gop_alloc = ufs_gop_alloc,
162 	.gop_write = genfs_gop_write,
163 	.gop_markupdate = ufs_gop_markupdate,
164 };
165 
166 static const struct ufs_ops ffs_ufsops = {
167 	.uo_itimes = ffs_itimes,
168 	.uo_update = ffs_update,
169 	.uo_truncate = ffs_truncate,
170 	.uo_valloc = ffs_valloc,
171 	.uo_vfree = ffs_vfree,
172 	.uo_balloc = ffs_balloc,
173 	.uo_unmark_vnode = (void (*)(vnode_t *))nullop,
174 };
175 
176 static int
177 ffs_modcmd(modcmd_t cmd, void *arg)
178 {
179 	int error;
180 
181 #if 0
182 	extern int doasyncfree;
183 #endif
184 	extern int ffs_log_changeopt;
185 
186 	switch (cmd) {
187 	case MODULE_CMD_INIT:
188 		error = vfs_attach(&ffs_vfsops);
189 		if (error != 0)
190 			break;
191 
192 		sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
193 			       CTLFLAG_PERMANENT,
194 			       CTLTYPE_NODE, "vfs", NULL,
195 			       NULL, 0, NULL, 0,
196 			       CTL_VFS, CTL_EOL);
197 		sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
198 			       CTLFLAG_PERMANENT,
199 			       CTLTYPE_NODE, "ffs",
200 			       SYSCTL_DESCR("Berkeley Fast File System"),
201 			       NULL, 0, NULL, 0,
202 			       CTL_VFS, 1, CTL_EOL);
203 
204 		/*
205 		 * @@@ should we even bother with these first three?
206 		 */
207 		sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
208 			       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
209 			       CTLTYPE_INT, "doclusterread", NULL,
210 			       sysctl_notavail, 0, NULL, 0,
211 			       CTL_VFS, 1, FFS_CLUSTERREAD, CTL_EOL);
212 		sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
213 			       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
214 			       CTLTYPE_INT, "doclusterwrite", NULL,
215 			       sysctl_notavail, 0, NULL, 0,
216 			       CTL_VFS, 1, FFS_CLUSTERWRITE, CTL_EOL);
217 		sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
218 			       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
219 			       CTLTYPE_INT, "doreallocblks", NULL,
220 			       sysctl_notavail, 0, NULL, 0,
221 			       CTL_VFS, 1, FFS_REALLOCBLKS, CTL_EOL);
222 #if 0
223 		sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
224 			       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
225 			       CTLTYPE_INT, "doasyncfree",
226 			       SYSCTL_DESCR("Release dirty blocks asynchronously"),
227 			       NULL, 0, &doasyncfree, 0,
228 			       CTL_VFS, 1, FFS_ASYNCFREE, CTL_EOL);
229 #endif
230 		sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
231 			       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
232 			       CTLTYPE_INT, "log_changeopt",
233 			       SYSCTL_DESCR("Log changes in optimization strategy"),
234 			       NULL, 0, &ffs_log_changeopt, 0,
235 			       CTL_VFS, 1, FFS_LOG_CHANGEOPT, CTL_EOL);
236 		break;
237 	case MODULE_CMD_FINI:
238 		error = vfs_detach(&ffs_vfsops);
239 		if (error != 0)
240 			break;
241 		sysctl_teardown(&ffs_sysctl_log);
242 		break;
243 	default:
244 		error = ENOTTY;
245 		break;
246 	}
247 
248 	return (error);
249 }
250 
251 pool_cache_t ffs_inode_cache;
252 pool_cache_t ffs_dinode1_cache;
253 pool_cache_t ffs_dinode2_cache;
254 
255 static void ffs_oldfscompat_read(struct fs *, struct ufsmount *, daddr_t);
256 static void ffs_oldfscompat_write(struct fs *, struct ufsmount *);
257 
258 /*
259  * Called by main() when ffs is going to be mounted as root.
260  */
261 
262 int
263 ffs_mountroot(void)
264 {
265 	struct fs *fs;
266 	struct mount *mp;
267 	struct lwp *l = curlwp;			/* XXX */
268 	struct ufsmount *ump;
269 	int error;
270 
271 	if (device_class(root_device) != DV_DISK)
272 		return (ENODEV);
273 
274 	if ((error = vfs_rootmountalloc(MOUNT_FFS, "root_device", &mp))) {
275 		vrele(rootvp);
276 		return (error);
277 	}
278 
279 	/*
280 	 * We always need to be able to mount the root file system.
281 	 */
282 	mp->mnt_flag |= MNT_FORCE;
283 	if ((error = ffs_mountfs(rootvp, mp, l)) != 0) {
284 		vfs_unbusy(mp, false, NULL);
285 		vfs_destroy(mp);
286 		return (error);
287 	}
288 	mp->mnt_flag &= ~MNT_FORCE;
289 	mutex_enter(&mountlist_lock);
290 	CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
291 	mutex_exit(&mountlist_lock);
292 	ump = VFSTOUFS(mp);
293 	fs = ump->um_fs;
294 	memset(fs->fs_fsmnt, 0, sizeof(fs->fs_fsmnt));
295 	(void)copystr(mp->mnt_stat.f_mntonname, fs->fs_fsmnt, MNAMELEN - 1, 0);
296 	(void)ffs_statvfs(mp, &mp->mnt_stat);
297 	vfs_unbusy(mp, false, NULL);
298 	setrootfstime((time_t)fs->fs_time);
299 	return (0);
300 }
301 
302 /*
303  * VFS Operations.
304  *
305  * mount system call
306  */
307 int
308 ffs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
309 {
310 	struct lwp *l = curlwp;
311 	struct vnode *devvp = NULL;
312 	struct ufs_args *args = data;
313 	struct ufsmount *ump = NULL;
314 	struct fs *fs;
315 	int error = 0, flags, update;
316 	mode_t accessmode;
317 
318 	if (*data_len < sizeof *args)
319 		return EINVAL;
320 
321 	if (mp->mnt_flag & MNT_GETARGS) {
322 		ump = VFSTOUFS(mp);
323 		if (ump == NULL)
324 			return EIO;
325 		args->fspec = NULL;
326 		*data_len = sizeof *args;
327 		return 0;
328 	}
329 
330 	update = mp->mnt_flag & MNT_UPDATE;
331 
332 	/* Check arguments */
333 	if (args->fspec != NULL) {
334 		/*
335 		 * Look up the name and verify that it's sane.
336 		 */
337 		error = namei_simple_user(args->fspec,
338 					NSM_FOLLOW_NOEMULROOT, &devvp);
339 		if (error != 0)
340 			return (error);
341 
342 		if (!update) {
343 			/*
344 			 * Be sure this is a valid block device
345 			 */
346 			if (devvp->v_type != VBLK)
347 				error = ENOTBLK;
348 			else if (bdevsw_lookup(devvp->v_rdev) == NULL)
349 				error = ENXIO;
350 		} else {
351 			/*
352 			 * Be sure we're still naming the same device
353 			 * used for our initial mount
354 			 */
355 			ump = VFSTOUFS(mp);
356 			if (devvp != ump->um_devvp) {
357 				if (devvp->v_rdev != ump->um_devvp->v_rdev)
358 					error = EINVAL;
359 				else {
360 					vrele(devvp);
361 					devvp = ump->um_devvp;
362 					vref(devvp);
363 				}
364 			}
365 		}
366 	} else {
367 		if (!update) {
368 			/* New mounts must have a filename for the device */
369 			return (EINVAL);
370 		} else {
371 			/* Use the extant mount */
372 			ump = VFSTOUFS(mp);
373 			devvp = ump->um_devvp;
374 			vref(devvp);
375 		}
376 	}
377 
378 	/*
379 	 * If mount by non-root, then verify that user has necessary
380 	 * permissions on the device.
381 	 *
382 	 * Permission to update a mount is checked higher, so here we presume
383 	 * updating the mount is okay (for example, as far as securelevel goes)
384 	 * which leaves us with the normal check.
385 	 */
386 	if (error == 0) {
387 		accessmode = VREAD;
388 		if (update ?
389 		    (mp->mnt_iflag & IMNT_WANTRDWR) != 0 :
390 		    (mp->mnt_flag & MNT_RDONLY) == 0)
391 			accessmode |= VWRITE;
392 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
393 		error = genfs_can_mount(devvp, accessmode, l->l_cred);
394 		VOP_UNLOCK(devvp);
395 	}
396 
397 	if (error) {
398 		vrele(devvp);
399 		return (error);
400 	}
401 
402 #ifdef WAPBL
403 	/* WAPBL can only be enabled on a r/w mount. */
404 	if ((mp->mnt_flag & MNT_RDONLY) && !(mp->mnt_iflag & IMNT_WANTRDWR)) {
405 		mp->mnt_flag &= ~MNT_LOG;
406 	}
407 #else /* !WAPBL */
408 	mp->mnt_flag &= ~MNT_LOG;
409 #endif /* !WAPBL */
410 
411 	if (!update) {
412 		int xflags;
413 
414 		if (mp->mnt_flag & MNT_RDONLY)
415 			xflags = FREAD;
416 		else
417 			xflags = FREAD | FWRITE;
418 		error = VOP_OPEN(devvp, xflags, FSCRED);
419 		if (error)
420 			goto fail;
421 		error = ffs_mountfs(devvp, mp, l);
422 		if (error) {
423 			vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
424 			(void)VOP_CLOSE(devvp, xflags, NOCRED);
425 			VOP_UNLOCK(devvp);
426 			goto fail;
427 		}
428 
429 		ump = VFSTOUFS(mp);
430 		fs = ump->um_fs;
431 	} else {
432 		/*
433 		 * Update the mount.
434 		 */
435 
436 		/*
437 		 * The initial mount got a reference on this
438 		 * device, so drop the one obtained via
439 		 * namei(), above.
440 		 */
441 		vrele(devvp);
442 
443 		ump = VFSTOUFS(mp);
444 		fs = ump->um_fs;
445 		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
446 			/*
447 			 * Changing from r/w to r/o
448 			 */
449 			flags = WRITECLOSE;
450 			if (mp->mnt_flag & MNT_FORCE)
451 				flags |= FORCECLOSE;
452 			error = ffs_flushfiles(mp, flags, l);
453 			if (error == 0)
454 				error = UFS_WAPBL_BEGIN(mp);
455 			if (error == 0 &&
456 			    ffs_cgupdate(ump, MNT_WAIT) == 0 &&
457 			    fs->fs_clean & FS_WASCLEAN) {
458 				if (mp->mnt_flag & MNT_SOFTDEP)
459 					fs->fs_flags &= ~FS_DOSOFTDEP;
460 				fs->fs_clean = FS_ISCLEAN;
461 				(void) ffs_sbupdate(ump, MNT_WAIT);
462 			}
463 			if (error == 0)
464 				UFS_WAPBL_END(mp);
465 			if (error)
466 				return (error);
467 		}
468 
469 #ifdef WAPBL
470 		if ((mp->mnt_flag & MNT_LOG) == 0) {
471 			error = ffs_wapbl_stop(mp, mp->mnt_flag & MNT_FORCE);
472 			if (error)
473 				return error;
474 		}
475 #endif /* WAPBL */
476 
477 		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
478 			/*
479 			 * Finish change from r/w to r/o
480 			 */
481 			fs->fs_ronly = 1;
482 			fs->fs_fmod = 0;
483 		}
484 
485 		if (mp->mnt_flag & MNT_RELOAD) {
486 			error = ffs_reload(mp, l->l_cred, l);
487 			if (error)
488 				return (error);
489 		}
490 
491 		if (fs->fs_ronly && (mp->mnt_iflag & IMNT_WANTRDWR)) {
492 			/*
493 			 * Changing from read-only to read/write
494 			 */
495 #ifndef QUOTA2
496 			if (fs->fs_flags & FS_DOQUOTA2) {
497 				ump->um_flags |= UFS_QUOTA2;
498 				uprintf("%s: options QUOTA2 not enabled%s\n",
499 				    mp->mnt_stat.f_mntonname,
500 				    (mp->mnt_flag & MNT_FORCE) ? "" :
501 				    ", not mounting");
502 				return EINVAL;
503 			}
504 #endif
505 			fs->fs_ronly = 0;
506 			fs->fs_clean <<= 1;
507 			fs->fs_fmod = 1;
508 #ifdef WAPBL
509 			if (fs->fs_flags & FS_DOWAPBL) {
510 				printf("%s: replaying log to disk\n",
511 				    fs->fs_fsmnt);
512 				KDASSERT(mp->mnt_wapbl_replay);
513 				error = wapbl_replay_write(mp->mnt_wapbl_replay,
514 							   devvp);
515 				if (error) {
516 					return error;
517 				}
518 				wapbl_replay_stop(mp->mnt_wapbl_replay);
519 				fs->fs_clean = FS_WASCLEAN;
520 			}
521 #endif /* WAPBL */
522 			if (fs->fs_snapinum[0] != 0)
523 				ffs_snapshot_mount(mp);
524 		}
525 
526 #ifdef WAPBL
527 		error = ffs_wapbl_start(mp);
528 		if (error)
529 			return error;
530 #endif /* WAPBL */
531 
532 #ifdef QUOTA2
533 		if (!fs->fs_ronly) {
534 			error = ffs_quota2_mount(mp);
535 			if (error) {
536 				return error;
537 			}
538 		}
539 #endif
540 		if (args->fspec == NULL)
541 			return 0;
542 	}
543 
544 	error = set_statvfs_info(path, UIO_USERSPACE, args->fspec,
545 	    UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l);
546 	if (error == 0)
547 		(void)strncpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname,
548 		    sizeof(fs->fs_fsmnt));
549 	fs->fs_flags &= ~FS_DOSOFTDEP;
550 	if (fs->fs_fmod != 0) {	/* XXX */
551 		int err;
552 
553 		fs->fs_fmod = 0;
554 		if (fs->fs_clean & FS_WASCLEAN)
555 			fs->fs_time = time_second;
556 		else {
557 			printf("%s: file system not clean (fs_clean=%#x); "
558 			    "please fsck(8)\n", mp->mnt_stat.f_mntfromname,
559 			    fs->fs_clean);
560 			printf("%s: lost blocks %" PRId64 " files %d\n",
561 			    mp->mnt_stat.f_mntfromname, fs->fs_pendingblocks,
562 			    fs->fs_pendinginodes);
563 		}
564 		err = UFS_WAPBL_BEGIN(mp);
565 		if (err == 0) {
566 			(void) ffs_cgupdate(ump, MNT_WAIT);
567 			UFS_WAPBL_END(mp);
568 		}
569 	}
570 	if ((mp->mnt_flag & MNT_SOFTDEP) != 0) {
571 		printf("%s: `-o softdep' is no longer supported, "
572 		    "consider `-o log'\n", mp->mnt_stat.f_mntfromname);
573 		mp->mnt_flag &= ~MNT_SOFTDEP;
574 	}
575 
576 	return (error);
577 
578 fail:
579 	vrele(devvp);
580 	return (error);
581 }
582 
583 /*
584  * Reload all incore data for a filesystem (used after running fsck on
585  * the root filesystem and finding things to fix). The filesystem must
586  * be mounted read-only.
587  *
588  * Things to do to update the mount:
589  *	1) invalidate all cached meta-data.
590  *	2) re-read superblock from disk.
591  *	3) re-read summary information from disk.
592  *	4) invalidate all inactive vnodes.
593  *	5) invalidate all cached file data.
594  *	6) re-read inode data for all active vnodes.
595  */
596 int
597 ffs_reload(struct mount *mp, kauth_cred_t cred, struct lwp *l)
598 {
599 	struct vnode *vp, *mvp, *devvp;
600 	struct inode *ip;
601 	void *space;
602 	struct buf *bp;
603 	struct fs *fs, *newfs;
604 	struct partinfo dpart;
605 	int i, bsize, blks, error;
606 	int32_t *lp;
607 	struct ufsmount *ump;
608 	daddr_t sblockloc;
609 
610 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
611 		return (EINVAL);
612 
613 	ump = VFSTOUFS(mp);
614 	/*
615 	 * Step 1: invalidate all cached meta-data.
616 	 */
617 	devvp = ump->um_devvp;
618 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
619 	error = vinvalbuf(devvp, 0, cred, l, 0, 0);
620 	VOP_UNLOCK(devvp);
621 	if (error)
622 		panic("ffs_reload: dirty1");
623 	/*
624 	 * Step 2: re-read superblock from disk.
625 	 */
626 	fs = ump->um_fs;
627 
628 	/* XXX we don't handle possibility that superblock moved. */
629 	error = bread(devvp, fs->fs_sblockloc / DEV_BSIZE, fs->fs_sbsize,
630 		      NOCRED, 0, &bp);
631 	if (error) {
632 		brelse(bp, 0);
633 		return (error);
634 	}
635 	newfs = malloc(fs->fs_sbsize, M_UFSMNT, M_WAITOK);
636 	memcpy(newfs, bp->b_data, fs->fs_sbsize);
637 #ifdef FFS_EI
638 	if (ump->um_flags & UFS_NEEDSWAP) {
639 		ffs_sb_swap((struct fs*)bp->b_data, newfs);
640 		fs->fs_flags |= FS_SWAPPED;
641 	} else
642 #endif
643 		fs->fs_flags &= ~FS_SWAPPED;
644 	if ((newfs->fs_magic != FS_UFS1_MAGIC &&
645 	     newfs->fs_magic != FS_UFS2_MAGIC)||
646 	     newfs->fs_bsize > MAXBSIZE ||
647 	     newfs->fs_bsize < sizeof(struct fs)) {
648 		brelse(bp, 0);
649 		free(newfs, M_UFSMNT);
650 		return (EIO);		/* XXX needs translation */
651 	}
652 	/* Store off old fs_sblockloc for fs_oldfscompat_read. */
653 	sblockloc = fs->fs_sblockloc;
654 	/*
655 	 * Copy pointer fields back into superblock before copying in	XXX
656 	 * new superblock. These should really be in the ufsmount.	XXX
657 	 * Note that important parameters (eg fs_ncg) are unchanged.
658 	 */
659 	newfs->fs_csp = fs->fs_csp;
660 	newfs->fs_maxcluster = fs->fs_maxcluster;
661 	newfs->fs_contigdirs = fs->fs_contigdirs;
662 	newfs->fs_ronly = fs->fs_ronly;
663 	newfs->fs_active = fs->fs_active;
664 	memcpy(fs, newfs, (u_int)fs->fs_sbsize);
665 	brelse(bp, 0);
666 	free(newfs, M_UFSMNT);
667 
668 	/* Recheck for apple UFS filesystem */
669 	ump->um_flags &= ~UFS_ISAPPLEUFS;
670 	/* First check to see if this is tagged as an Apple UFS filesystem
671 	 * in the disklabel
672 	 */
673 	if ((VOP_IOCTL(devvp, DIOCGPART, &dpart, FREAD, cred) == 0) &&
674 		(dpart.part->p_fstype == FS_APPLEUFS)) {
675 		ump->um_flags |= UFS_ISAPPLEUFS;
676 	}
677 #ifdef APPLE_UFS
678 	else {
679 		/* Manually look for an apple ufs label, and if a valid one
680 		 * is found, then treat it like an Apple UFS filesystem anyway
681 		 *
682 		 * EINVAL is most probably a blocksize or alignment problem,
683 		 * it is unlikely that this is an Apple UFS filesystem then.
684 		 */
685 		error = bread(devvp, (daddr_t)(APPLEUFS_LABEL_OFFSET / DEV_BSIZE),
686 			APPLEUFS_LABEL_SIZE, cred, 0, &bp);
687 		if (error && error != EINVAL) {
688 			brelse(bp, 0);
689 			return (error);
690 		}
691 		if (error == 0) {
692 			error = ffs_appleufs_validate(fs->fs_fsmnt,
693 				(struct appleufslabel *)bp->b_data, NULL);
694 			if (error == 0)
695 				ump->um_flags |= UFS_ISAPPLEUFS;
696 		}
697 		brelse(bp, 0);
698 		bp = NULL;
699 	}
700 #else
701 	if (ump->um_flags & UFS_ISAPPLEUFS)
702 		return (EIO);
703 #endif
704 
705 	if (UFS_MPISAPPLEUFS(ump)) {
706 		/* see comment about NeXT below */
707 		ump->um_maxsymlinklen = APPLEUFS_MAXSYMLINKLEN;
708 		ump->um_dirblksiz = APPLEUFS_DIRBLKSIZ;
709 		mp->mnt_iflag |= IMNT_DTYPE;
710 	} else {
711 		ump->um_maxsymlinklen = fs->fs_maxsymlinklen;
712 		ump->um_dirblksiz = DIRBLKSIZ;
713 		if (ump->um_maxsymlinklen > 0)
714 			mp->mnt_iflag |= IMNT_DTYPE;
715 		else
716 			mp->mnt_iflag &= ~IMNT_DTYPE;
717 	}
718 	ffs_oldfscompat_read(fs, ump, sblockloc);
719 
720 	mutex_enter(&ump->um_lock);
721 	ump->um_maxfilesize = fs->fs_maxfilesize;
722 	if (fs->fs_flags & ~(FS_KNOWN_FLAGS | FS_INTERNAL)) {
723 		uprintf("%s: unknown ufs flags: 0x%08"PRIx32"%s\n",
724 		    mp->mnt_stat.f_mntonname, fs->fs_flags,
725 		    (mp->mnt_flag & MNT_FORCE) ? "" : ", not mounting");
726 		if ((mp->mnt_flag & MNT_FORCE) == 0) {
727 			mutex_exit(&ump->um_lock);
728 			return (EINVAL);
729 		}
730 	}
731 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
732 		fs->fs_pendingblocks = 0;
733 		fs->fs_pendinginodes = 0;
734 	}
735 	mutex_exit(&ump->um_lock);
736 
737 	ffs_statvfs(mp, &mp->mnt_stat);
738 	/*
739 	 * Step 3: re-read summary information from disk.
740 	 */
741 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
742 	space = fs->fs_csp;
743 	for (i = 0; i < blks; i += fs->fs_frag) {
744 		bsize = fs->fs_bsize;
745 		if (i + fs->fs_frag > blks)
746 			bsize = (blks - i) * fs->fs_fsize;
747 		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), bsize,
748 			      NOCRED, 0, &bp);
749 		if (error) {
750 			brelse(bp, 0);
751 			return (error);
752 		}
753 #ifdef FFS_EI
754 		if (UFS_FSNEEDSWAP(fs))
755 			ffs_csum_swap((struct csum *)bp->b_data,
756 			    (struct csum *)space, bsize);
757 		else
758 #endif
759 			memcpy(space, bp->b_data, (size_t)bsize);
760 		space = (char *)space + bsize;
761 		brelse(bp, 0);
762 	}
763 	if (fs->fs_snapinum[0] != 0)
764 		ffs_snapshot_mount(mp);
765 	/*
766 	 * We no longer know anything about clusters per cylinder group.
767 	 */
768 	if (fs->fs_contigsumsize > 0) {
769 		lp = fs->fs_maxcluster;
770 		for (i = 0; i < fs->fs_ncg; i++)
771 			*lp++ = fs->fs_contigsumsize;
772 	}
773 
774 	/* Allocate a marker vnode. */
775 	if ((mvp = vnalloc(mp)) == NULL)
776 		return ENOMEM;
777 	/*
778 	 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
779 	 * and vclean() can be called indirectly
780 	 */
781 	mutex_enter(&mntvnode_lock);
782  loop:
783 	for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) {
784 		vmark(mvp, vp);
785 		if (vp->v_mount != mp || vismarker(vp))
786 			continue;
787 		/*
788 		 * Step 4: invalidate all inactive vnodes.
789 		 */
790 		if (vrecycle(vp, &mntvnode_lock, l)) {
791 			mutex_enter(&mntvnode_lock);
792 			(void)vunmark(mvp);
793 			goto loop;
794 		}
795 		/*
796 		 * Step 5: invalidate all cached file data.
797 		 */
798 		mutex_enter(&vp->v_interlock);
799 		mutex_exit(&mntvnode_lock);
800 		if (vget(vp, LK_EXCLUSIVE)) {
801 			(void)vunmark(mvp);
802 			goto loop;
803 		}
804 		if (vinvalbuf(vp, 0, cred, l, 0, 0))
805 			panic("ffs_reload: dirty2");
806 		/*
807 		 * Step 6: re-read inode data for all active vnodes.
808 		 */
809 		ip = VTOI(vp);
810 		error = bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
811 			      (int)fs->fs_bsize, NOCRED, 0, &bp);
812 		if (error) {
813 			brelse(bp, 0);
814 			vput(vp);
815 			(void)vunmark(mvp);
816 			break;
817 		}
818 		ffs_load_inode(bp, ip, fs, ip->i_number);
819 		brelse(bp, 0);
820 		vput(vp);
821 		mutex_enter(&mntvnode_lock);
822 	}
823 	mutex_exit(&mntvnode_lock);
824 	vnfree(mvp);
825 	return (error);
826 }
827 
828 /*
829  * Possible superblock locations ordered from most to least likely.
830  */
831 static const int sblock_try[] = SBLOCKSEARCH;
832 
833 /*
834  * Common code for mount and mountroot
835  */
836 int
837 ffs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l)
838 {
839 	struct ufsmount *ump;
840 	struct buf *bp;
841 	struct fs *fs;
842 	dev_t dev;
843 	struct partinfo dpart;
844 	void *space;
845 	daddr_t sblockloc, fsblockloc;
846 	int blks, fstype;
847 	int error, i, bsize, ronly, bset = 0;
848 #ifdef FFS_EI
849 	int needswap = 0;		/* keep gcc happy */
850 #endif
851 	int32_t *lp;
852 	kauth_cred_t cred;
853 	u_int32_t sbsize = 8192;	/* keep gcc happy*/
854 	int32_t fsbsize;
855 
856 	dev = devvp->v_rdev;
857 	cred = l ? l->l_cred : NOCRED;
858 
859 	/* Flush out any old buffers remaining from a previous use. */
860 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
861 	error = vinvalbuf(devvp, V_SAVE, cred, l, 0, 0);
862 	VOP_UNLOCK(devvp);
863 	if (error)
864 		return (error);
865 
866 	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
867 
868 	bp = NULL;
869 	ump = NULL;
870 	fs = NULL;
871 	sblockloc = 0;
872 	fstype = 0;
873 
874 	error = fstrans_mount(mp);
875 	if (error)
876 		return error;
877 
878 	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK);
879 	memset(ump, 0, sizeof *ump);
880 	mutex_init(&ump->um_lock, MUTEX_DEFAULT, IPL_NONE);
881 	error = ffs_snapshot_init(ump);
882 	if (error)
883 		goto out;
884 	ump->um_ops = &ffs_ufsops;
885 
886 #ifdef WAPBL
887  sbagain:
888 #endif
889 	/*
890 	 * Try reading the superblock in each of its possible locations.
891 	 */
892 	for (i = 0; ; i++) {
893 		if (bp != NULL) {
894 			brelse(bp, BC_NOCACHE);
895 			bp = NULL;
896 		}
897 		if (sblock_try[i] == -1) {
898 			error = EINVAL;
899 			fs = NULL;
900 			goto out;
901 		}
902 		error = bread(devvp, sblock_try[i] / DEV_BSIZE, SBLOCKSIZE, cred,
903 			      0, &bp);
904 		if (error) {
905 			fs = NULL;
906 			goto out;
907 		}
908 		fs = (struct fs*)bp->b_data;
909 		fsblockloc = sblockloc = sblock_try[i];
910 		if (fs->fs_magic == FS_UFS1_MAGIC) {
911 			sbsize = fs->fs_sbsize;
912 			fstype = UFS1;
913 			fsbsize = fs->fs_bsize;
914 #ifdef FFS_EI
915 			needswap = 0;
916 		} else if (fs->fs_magic == bswap32(FS_UFS1_MAGIC)) {
917 			sbsize = bswap32(fs->fs_sbsize);
918 			fstype = UFS1;
919 			fsbsize = bswap32(fs->fs_bsize);
920 			needswap = 1;
921 #endif
922 		} else if (fs->fs_magic == FS_UFS2_MAGIC) {
923 			sbsize = fs->fs_sbsize;
924 			fstype = UFS2;
925 			fsbsize = fs->fs_bsize;
926 #ifdef FFS_EI
927 			needswap = 0;
928 		} else if (fs->fs_magic == bswap32(FS_UFS2_MAGIC)) {
929 			sbsize = bswap32(fs->fs_sbsize);
930 			fstype = UFS2;
931 			fsbsize = bswap32(fs->fs_bsize);
932 			needswap = 1;
933 #endif
934 		} else
935 			continue;
936 
937 
938 		/* fs->fs_sblockloc isn't defined for old filesystems */
939 		if (fstype == UFS1 && !(fs->fs_old_flags & FS_FLAGS_UPDATED)) {
940 			if (sblockloc == SBLOCK_UFS2)
941 				/*
942 				 * This is likely to be the first alternate
943 				 * in a filesystem with 64k blocks.
944 				 * Don't use it.
945 				 */
946 				continue;
947 			fsblockloc = sblockloc;
948 		} else {
949 			fsblockloc = fs->fs_sblockloc;
950 #ifdef FFS_EI
951 			if (needswap)
952 				fsblockloc = bswap64(fsblockloc);
953 #endif
954 		}
955 
956 		/* Check we haven't found an alternate superblock */
957 		if (fsblockloc != sblockloc)
958 			continue;
959 
960 		/* Validate size of superblock */
961 		if (sbsize > MAXBSIZE || sbsize < sizeof(struct fs))
962 			continue;
963 
964 		/* Check that we can handle the file system blocksize */
965 		if (fsbsize > MAXBSIZE) {
966 			printf("ffs_mountfs: block size (%d) > MAXBSIZE (%d)\n",
967 			    fsbsize, MAXBSIZE);
968 			continue;
969 		}
970 
971 		/* Ok seems to be a good superblock */
972 		break;
973 	}
974 
975 	fs = malloc((u_long)sbsize, M_UFSMNT, M_WAITOK);
976 	memcpy(fs, bp->b_data, sbsize);
977 	ump->um_fs = fs;
978 
979 #ifdef FFS_EI
980 	if (needswap) {
981 		ffs_sb_swap((struct fs*)bp->b_data, fs);
982 		fs->fs_flags |= FS_SWAPPED;
983 	} else
984 #endif
985 		fs->fs_flags &= ~FS_SWAPPED;
986 
987 #ifdef WAPBL
988 	if ((mp->mnt_wapbl_replay == 0) && (fs->fs_flags & FS_DOWAPBL)) {
989 		error = ffs_wapbl_replay_start(mp, fs, devvp);
990 		if (error && (mp->mnt_flag & MNT_FORCE) == 0)
991 			goto out;
992 		if (!error) {
993 			if (!ronly) {
994 				/* XXX fsmnt may be stale. */
995 				printf("%s: replaying log to disk\n",
996 				    fs->fs_fsmnt);
997 				error = wapbl_replay_write(mp->mnt_wapbl_replay,
998 				    devvp);
999 				if (error)
1000 					goto out;
1001 				wapbl_replay_stop(mp->mnt_wapbl_replay);
1002 				fs->fs_clean = FS_WASCLEAN;
1003 			} else {
1004 				/* XXX fsmnt may be stale */
1005 				printf("%s: replaying log to memory\n",
1006 				    fs->fs_fsmnt);
1007 			}
1008 
1009 			/* Force a re-read of the superblock */
1010 			brelse(bp, BC_INVAL);
1011 			bp = NULL;
1012 			free(fs, M_UFSMNT);
1013 			fs = NULL;
1014 			goto sbagain;
1015 		}
1016 	}
1017 #else /* !WAPBL */
1018 	if ((fs->fs_flags & FS_DOWAPBL) && (mp->mnt_flag & MNT_FORCE) == 0) {
1019 		error = EPERM;
1020 		goto out;
1021 	}
1022 #endif /* !WAPBL */
1023 
1024 	ffs_oldfscompat_read(fs, ump, sblockloc);
1025 	ump->um_maxfilesize = fs->fs_maxfilesize;
1026 
1027 	if (fs->fs_flags & ~(FS_KNOWN_FLAGS | FS_INTERNAL)) {
1028 		uprintf("%s: unknown ufs flags: 0x%08"PRIx32"%s\n",
1029 		    mp->mnt_stat.f_mntonname, fs->fs_flags,
1030 		    (mp->mnt_flag & MNT_FORCE) ? "" : ", not mounting");
1031 		if ((mp->mnt_flag & MNT_FORCE) == 0) {
1032 			error = EINVAL;
1033 			goto out;
1034 		}
1035 	}
1036 
1037 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
1038 		fs->fs_pendingblocks = 0;
1039 		fs->fs_pendinginodes = 0;
1040 	}
1041 
1042 	ump->um_fstype = fstype;
1043 	if (fs->fs_sbsize < SBLOCKSIZE)
1044 		brelse(bp, BC_INVAL);
1045 	else
1046 		brelse(bp, 0);
1047 	bp = NULL;
1048 
1049 	/* First check to see if this is tagged as an Apple UFS filesystem
1050 	 * in the disklabel
1051 	 */
1052 	if ((VOP_IOCTL(devvp, DIOCGPART, &dpart, FREAD, cred) == 0) &&
1053 		(dpart.part->p_fstype == FS_APPLEUFS)) {
1054 		ump->um_flags |= UFS_ISAPPLEUFS;
1055 	}
1056 #ifdef APPLE_UFS
1057 	else {
1058 		/* Manually look for an apple ufs label, and if a valid one
1059 		 * is found, then treat it like an Apple UFS filesystem anyway
1060 		 */
1061 		error = bread(devvp, (daddr_t)(APPLEUFS_LABEL_OFFSET / DEV_BSIZE),
1062 			APPLEUFS_LABEL_SIZE, cred, 0, &bp);
1063 		if (error)
1064 			goto out;
1065 		error = ffs_appleufs_validate(fs->fs_fsmnt,
1066 			(struct appleufslabel *)bp->b_data, NULL);
1067 		if (error == 0) {
1068 			ump->um_flags |= UFS_ISAPPLEUFS;
1069 		}
1070 		brelse(bp, 0);
1071 		bp = NULL;
1072 	}
1073 #else
1074 	if (ump->um_flags & UFS_ISAPPLEUFS) {
1075 		error = EINVAL;
1076 		goto out;
1077 	}
1078 #endif
1079 
1080 #if 0
1081 /*
1082  * XXX This code changes the behaviour of mounting dirty filesystems, to
1083  * XXX require "mount -f ..." to mount them.  This doesn't match what
1084  * XXX mount(8) describes and is disabled for now.
1085  */
1086 	/*
1087 	 * If the file system is not clean, don't allow it to be mounted
1088 	 * unless MNT_FORCE is specified.  (Note: MNT_FORCE is always set
1089 	 * for the root file system.)
1090 	 */
1091 	if (fs->fs_flags & FS_DOWAPBL) {
1092 		/*
1093 		 * wapbl normally expects to be FS_WASCLEAN when the FS_DOWAPBL
1094 		 * bit is set, although there's a window in unmount where it
1095 		 * could be FS_ISCLEAN
1096 		 */
1097 		if ((mp->mnt_flag & MNT_FORCE) == 0 &&
1098 		    (fs->fs_clean & (FS_WASCLEAN | FS_ISCLEAN)) == 0) {
1099 			error = EPERM;
1100 			goto out;
1101 		}
1102 	} else
1103 		if ((fs->fs_clean & FS_ISCLEAN) == 0 &&
1104 		    (mp->mnt_flag & MNT_FORCE) == 0) {
1105 			error = EPERM;
1106 			goto out;
1107 		}
1108 #endif
1109 
1110 	/*
1111 	 * verify that we can access the last block in the fs
1112 	 * if we're mounting read/write.
1113 	 */
1114 
1115 	if (!ronly) {
1116 		error = bread(devvp, fsbtodb(fs, fs->fs_size - 1), fs->fs_fsize,
1117 		    cred, 0, &bp);
1118 		if (bp->b_bcount != fs->fs_fsize)
1119 			error = EINVAL;
1120 		if (error) {
1121 			bset = BC_INVAL;
1122 			goto out;
1123 		}
1124 		brelse(bp, BC_INVAL);
1125 		bp = NULL;
1126 	}
1127 
1128 	fs->fs_ronly = ronly;
1129 	/* Don't bump fs_clean if we're replaying journal */
1130 	if (!((fs->fs_flags & FS_DOWAPBL) && (fs->fs_clean & FS_WASCLEAN)))
1131 		if (ronly == 0) {
1132 			fs->fs_clean <<= 1;
1133 			fs->fs_fmod = 1;
1134 		}
1135 	bsize = fs->fs_cssize;
1136 	blks = howmany(bsize, fs->fs_fsize);
1137 	if (fs->fs_contigsumsize > 0)
1138 		bsize += fs->fs_ncg * sizeof(int32_t);
1139 	bsize += fs->fs_ncg * sizeof(*fs->fs_contigdirs);
1140 	space = malloc((u_long)bsize, M_UFSMNT, M_WAITOK);
1141 	fs->fs_csp = space;
1142 	for (i = 0; i < blks; i += fs->fs_frag) {
1143 		bsize = fs->fs_bsize;
1144 		if (i + fs->fs_frag > blks)
1145 			bsize = (blks - i) * fs->fs_fsize;
1146 		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), bsize,
1147 			      cred, 0, &bp);
1148 		if (error) {
1149 			free(fs->fs_csp, M_UFSMNT);
1150 			goto out;
1151 		}
1152 #ifdef FFS_EI
1153 		if (needswap)
1154 			ffs_csum_swap((struct csum *)bp->b_data,
1155 				(struct csum *)space, bsize);
1156 		else
1157 #endif
1158 			memcpy(space, bp->b_data, (u_int)bsize);
1159 
1160 		space = (char *)space + bsize;
1161 		brelse(bp, 0);
1162 		bp = NULL;
1163 	}
1164 	if (fs->fs_contigsumsize > 0) {
1165 		fs->fs_maxcluster = lp = space;
1166 		for (i = 0; i < fs->fs_ncg; i++)
1167 			*lp++ = fs->fs_contigsumsize;
1168 		space = lp;
1169 	}
1170 	bsize = fs->fs_ncg * sizeof(*fs->fs_contigdirs);
1171 	fs->fs_contigdirs = space;
1172 	space = (char *)space + bsize;
1173 	memset(fs->fs_contigdirs, 0, bsize);
1174 		/* Compatibility for old filesystems - XXX */
1175 	if (fs->fs_avgfilesize <= 0)
1176 		fs->fs_avgfilesize = AVFILESIZ;
1177 	if (fs->fs_avgfpdir <= 0)
1178 		fs->fs_avgfpdir = AFPDIR;
1179 	fs->fs_active = NULL;
1180 	mp->mnt_data = ump;
1181 	mp->mnt_stat.f_fsidx.__fsid_val[0] = (long)dev;
1182 	mp->mnt_stat.f_fsidx.__fsid_val[1] = makefstype(MOUNT_FFS);
1183 	mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
1184 	mp->mnt_stat.f_namemax = FFS_MAXNAMLEN;
1185 	if (UFS_MPISAPPLEUFS(ump)) {
1186 		/* NeXT used to keep short symlinks in the inode even
1187 		 * when using FS_42INODEFMT.  In that case fs->fs_maxsymlinklen
1188 		 * is probably -1, but we still need to be able to identify
1189 		 * short symlinks.
1190 		 */
1191 		ump->um_maxsymlinklen = APPLEUFS_MAXSYMLINKLEN;
1192 		ump->um_dirblksiz = APPLEUFS_DIRBLKSIZ;
1193 		mp->mnt_iflag |= IMNT_DTYPE;
1194 	} else {
1195 		ump->um_maxsymlinklen = fs->fs_maxsymlinklen;
1196 		ump->um_dirblksiz = DIRBLKSIZ;
1197 		if (ump->um_maxsymlinklen > 0)
1198 			mp->mnt_iflag |= IMNT_DTYPE;
1199 		else
1200 			mp->mnt_iflag &= ~IMNT_DTYPE;
1201 	}
1202 	mp->mnt_fs_bshift = fs->fs_bshift;
1203 	mp->mnt_dev_bshift = DEV_BSHIFT;	/* XXX */
1204 	mp->mnt_flag |= MNT_LOCAL;
1205 	mp->mnt_iflag |= IMNT_MPSAFE;
1206 #ifdef FFS_EI
1207 	if (needswap)
1208 		ump->um_flags |= UFS_NEEDSWAP;
1209 #endif
1210 	ump->um_mountp = mp;
1211 	ump->um_dev = dev;
1212 	ump->um_devvp = devvp;
1213 	ump->um_nindir = fs->fs_nindir;
1214 	ump->um_lognindir = ffs(fs->fs_nindir) - 1;
1215 	ump->um_bptrtodb = fs->fs_fshift - DEV_BSHIFT;
1216 	ump->um_seqinc = fs->fs_frag;
1217 	for (i = 0; i < MAXQUOTAS; i++)
1218 		ump->um_quotas[i] = NULLVP;
1219 	devvp->v_specmountpoint = mp;
1220 	if (ronly == 0 && fs->fs_snapinum[0] != 0)
1221 		ffs_snapshot_mount(mp);
1222 #ifdef WAPBL
1223 	if (!ronly) {
1224 		KDASSERT(fs->fs_ronly == 0);
1225 		/*
1226 		 * ffs_wapbl_start() needs mp->mnt_stat initialised if it
1227 		 * needs to create a new log file in-filesystem.
1228 		 */
1229 		ffs_statvfs(mp, &mp->mnt_stat);
1230 
1231 		error = ffs_wapbl_start(mp);
1232 		if (error) {
1233 			free(fs->fs_csp, M_UFSMNT);
1234 			goto out;
1235 		}
1236 	}
1237 #endif /* WAPBL */
1238 	if (ronly == 0) {
1239 #ifdef QUOTA2
1240 		error = ffs_quota2_mount(mp);
1241 		if (error) {
1242 			free(fs->fs_csp, M_UFSMNT);
1243 			goto out;
1244 		}
1245 #else
1246 		if (fs->fs_flags & FS_DOQUOTA2) {
1247 			ump->um_flags |= UFS_QUOTA2;
1248 			uprintf("%s: options QUOTA2 not enabled%s\n",
1249 			    mp->mnt_stat.f_mntonname,
1250 			    (mp->mnt_flag & MNT_FORCE) ? "" : ", not mounting");
1251 			if ((mp->mnt_flag & MNT_FORCE) == 0) {
1252 				error = EINVAL;
1253 				free(fs->fs_csp, M_UFSMNT);
1254 				goto out;
1255 			}
1256 		}
1257 #endif
1258 	}
1259 
1260 #ifdef UFS_EXTATTR
1261 	/*
1262 	 * Initialize file-backed extended attributes on UFS1 file
1263 	 * systems.
1264 	 */
1265 	if (ump->um_fstype == UFS1) {
1266 		ufs_extattr_uepm_init(&ump->um_extattr);
1267 #ifdef UFS_EXTATTR_AUTOSTART
1268 		/*
1269 		 * XXX Just ignore errors.  Not clear that we should
1270 		 * XXX fail the mount in this case.
1271 		 */
1272 		(void) ufs_extattr_autostart(mp, l);
1273 #endif
1274 	}
1275 #endif /* UFS_EXTATTR */
1276 	return (0);
1277 out:
1278 #ifdef WAPBL
1279 	if (mp->mnt_wapbl_replay) {
1280 		wapbl_replay_stop(mp->mnt_wapbl_replay);
1281 		wapbl_replay_free(mp->mnt_wapbl_replay);
1282 		mp->mnt_wapbl_replay = 0;
1283 	}
1284 #endif
1285 
1286 	fstrans_unmount(mp);
1287 	if (fs)
1288 		free(fs, M_UFSMNT);
1289 	devvp->v_specmountpoint = NULL;
1290 	if (bp)
1291 		brelse(bp, bset);
1292 	if (ump) {
1293 		if (ump->um_oldfscompat)
1294 			free(ump->um_oldfscompat, M_UFSMNT);
1295 		mutex_destroy(&ump->um_lock);
1296 		free(ump, M_UFSMNT);
1297 		mp->mnt_data = NULL;
1298 	}
1299 	return (error);
1300 }
1301 
1302 /*
1303  * Sanity checks for loading old filesystem superblocks.
1304  * See ffs_oldfscompat_write below for unwound actions.
1305  *
1306  * XXX - Parts get retired eventually.
1307  * Unfortunately new bits get added.
1308  */
1309 static void
1310 ffs_oldfscompat_read(struct fs *fs, struct ufsmount *ump, daddr_t sblockloc)
1311 {
1312 	off_t maxfilesize;
1313 	int32_t *extrasave;
1314 
1315 	if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1316 	    (fs->fs_old_flags & FS_FLAGS_UPDATED))
1317 		return;
1318 
1319 	if (!ump->um_oldfscompat)
1320 		ump->um_oldfscompat = malloc(512 + 3*sizeof(int32_t),
1321 		    M_UFSMNT, M_WAITOK);
1322 
1323 	memcpy(ump->um_oldfscompat, &fs->fs_old_postbl_start, 512);
1324 	extrasave = ump->um_oldfscompat;
1325 	extrasave += 512/sizeof(int32_t);
1326 	extrasave[0] = fs->fs_old_npsect;
1327 	extrasave[1] = fs->fs_old_interleave;
1328 	extrasave[2] = fs->fs_old_trackskew;
1329 
1330 	/* These fields will be overwritten by their
1331 	 * original values in fs_oldfscompat_write, so it is harmless
1332 	 * to modify them here.
1333 	 */
1334 	fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
1335 	fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
1336 	fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
1337 	fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
1338 
1339 	fs->fs_maxbsize = fs->fs_bsize;
1340 	fs->fs_time = fs->fs_old_time;
1341 	fs->fs_size = fs->fs_old_size;
1342 	fs->fs_dsize = fs->fs_old_dsize;
1343 	fs->fs_csaddr = fs->fs_old_csaddr;
1344 	fs->fs_sblockloc = sblockloc;
1345 
1346 	fs->fs_flags = fs->fs_old_flags | (fs->fs_flags & FS_INTERNAL);
1347 
1348 	if (fs->fs_old_postblformat == FS_42POSTBLFMT) {
1349 		fs->fs_old_nrpos = 8;
1350 		fs->fs_old_npsect = fs->fs_old_nsect;
1351 		fs->fs_old_interleave = 1;
1352 		fs->fs_old_trackskew = 0;
1353 	}
1354 
1355 	if (fs->fs_old_inodefmt < FS_44INODEFMT) {
1356 		fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
1357 		fs->fs_qbmask = ~fs->fs_bmask;
1358 		fs->fs_qfmask = ~fs->fs_fmask;
1359 	}
1360 
1361 	maxfilesize = (u_int64_t)0x80000000 * fs->fs_bsize - 1;
1362 	if (fs->fs_maxfilesize > maxfilesize)
1363 		fs->fs_maxfilesize = maxfilesize;
1364 
1365 	/* Compatibility for old filesystems */
1366 	if (fs->fs_avgfilesize <= 0)
1367 		fs->fs_avgfilesize = AVFILESIZ;
1368 	if (fs->fs_avgfpdir <= 0)
1369 		fs->fs_avgfpdir = AFPDIR;
1370 
1371 #if 0
1372 	if (bigcgs) {
1373 		fs->fs_save_cgsize = fs->fs_cgsize;
1374 		fs->fs_cgsize = fs->fs_bsize;
1375 	}
1376 #endif
1377 }
1378 
1379 /*
1380  * Unwinding superblock updates for old filesystems.
1381  * See ffs_oldfscompat_read above for details.
1382  *
1383  * XXX - Parts get retired eventually.
1384  * Unfortunately new bits get added.
1385  */
1386 static void
1387 ffs_oldfscompat_write(struct fs *fs, struct ufsmount *ump)
1388 {
1389 	int32_t *extrasave;
1390 
1391 	if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1392 	    (fs->fs_old_flags & FS_FLAGS_UPDATED))
1393 		return;
1394 
1395 	fs->fs_old_time = fs->fs_time;
1396 	fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
1397 	fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
1398 	fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
1399 	fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
1400 	fs->fs_old_flags = fs->fs_flags;
1401 
1402 #if 0
1403 	if (bigcgs) {
1404 		fs->fs_cgsize = fs->fs_save_cgsize;
1405 	}
1406 #endif
1407 
1408 	memcpy(&fs->fs_old_postbl_start, ump->um_oldfscompat, 512);
1409 	extrasave = ump->um_oldfscompat;
1410 	extrasave += 512/sizeof(int32_t);
1411 	fs->fs_old_npsect = extrasave[0];
1412 	fs->fs_old_interleave = extrasave[1];
1413 	fs->fs_old_trackskew = extrasave[2];
1414 
1415 }
1416 
1417 /*
1418  * unmount vfs operation
1419  */
1420 int
1421 ffs_unmount(struct mount *mp, int mntflags)
1422 {
1423 	struct lwp *l = curlwp;
1424 	struct ufsmount *ump = VFSTOUFS(mp);
1425 	struct fs *fs = ump->um_fs;
1426 	int error, flags;
1427 #ifdef WAPBL
1428 	extern int doforce;
1429 #endif
1430 
1431 	flags = 0;
1432 	if (mntflags & MNT_FORCE)
1433 		flags |= FORCECLOSE;
1434 	if ((error = ffs_flushfiles(mp, flags, l)) != 0)
1435 		return (error);
1436 	error = UFS_WAPBL_BEGIN(mp);
1437 	if (error == 0)
1438 		if (fs->fs_ronly == 0 &&
1439 		    ffs_cgupdate(ump, MNT_WAIT) == 0 &&
1440 		    fs->fs_clean & FS_WASCLEAN) {
1441 			fs->fs_clean = FS_ISCLEAN;
1442 			fs->fs_fmod = 0;
1443 			(void) ffs_sbupdate(ump, MNT_WAIT);
1444 		}
1445 	if (error == 0)
1446 		UFS_WAPBL_END(mp);
1447 #ifdef WAPBL
1448 	KASSERT(!(mp->mnt_wapbl_replay && mp->mnt_wapbl));
1449 	if (mp->mnt_wapbl_replay) {
1450 		KDASSERT(fs->fs_ronly);
1451 		wapbl_replay_stop(mp->mnt_wapbl_replay);
1452 		wapbl_replay_free(mp->mnt_wapbl_replay);
1453 		mp->mnt_wapbl_replay = 0;
1454 	}
1455 	error = ffs_wapbl_stop(mp, doforce && (mntflags & MNT_FORCE));
1456 	if (error) {
1457 		return error;
1458 	}
1459 #endif /* WAPBL */
1460 #ifdef UFS_EXTATTR
1461 	if (ump->um_fstype == UFS1) {
1462 		ufs_extattr_stop(mp, l);
1463 		ufs_extattr_uepm_destroy(&ump->um_extattr);
1464 	}
1465 #endif /* UFS_EXTATTR */
1466 
1467 	if (ump->um_devvp->v_type != VBAD)
1468 		ump->um_devvp->v_specmountpoint = NULL;
1469 	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1470 	(void)VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD | FWRITE,
1471 		NOCRED);
1472 	vput(ump->um_devvp);
1473 	free(fs->fs_csp, M_UFSMNT);
1474 	free(fs, M_UFSMNT);
1475 	if (ump->um_oldfscompat != NULL)
1476 		free(ump->um_oldfscompat, M_UFSMNT);
1477 	mutex_destroy(&ump->um_lock);
1478 	ffs_snapshot_fini(ump);
1479 	free(ump, M_UFSMNT);
1480 	mp->mnt_data = NULL;
1481 	mp->mnt_flag &= ~MNT_LOCAL;
1482 	fstrans_unmount(mp);
1483 	return (0);
1484 }
1485 
1486 /*
1487  * Flush out all the files in a filesystem.
1488  */
1489 int
1490 ffs_flushfiles(struct mount *mp, int flags, struct lwp *l)
1491 {
1492 	extern int doforce;
1493 	struct ufsmount *ump;
1494 	int error;
1495 
1496 	if (!doforce)
1497 		flags &= ~FORCECLOSE;
1498 	ump = VFSTOUFS(mp);
1499 #ifdef QUOTA
1500 	if ((error = quota1_umount(mp, flags)) != 0)
1501 		return (error);
1502 #endif
1503 #ifdef QUOTA2
1504 	if ((error = quota2_umount(mp, flags)) != 0)
1505 		return (error);
1506 #endif
1507 	if ((error = vflush(mp, 0, SKIPSYSTEM | flags)) != 0)
1508 		return (error);
1509 	ffs_snapshot_unmount(mp);
1510 	/*
1511 	 * Flush all the files.
1512 	 */
1513 	error = vflush(mp, NULLVP, flags);
1514 	if (error)
1515 		return (error);
1516 	/*
1517 	 * Flush filesystem metadata.
1518 	 */
1519 	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1520 	error = VOP_FSYNC(ump->um_devvp, l->l_cred, FSYNC_WAIT, 0, 0);
1521 	VOP_UNLOCK(ump->um_devvp);
1522 	if (flags & FORCECLOSE) /* XXXDBJ */
1523 		error = 0;
1524 
1525 #ifdef WAPBL
1526 	if (error)
1527 		return error;
1528 	if (mp->mnt_wapbl) {
1529 		error = wapbl_flush(mp->mnt_wapbl, 1);
1530 		if (flags & FORCECLOSE)
1531 			error = 0;
1532 	}
1533 #endif
1534 
1535 	return (error);
1536 }
1537 
1538 /*
1539  * Get file system statistics.
1540  */
1541 int
1542 ffs_statvfs(struct mount *mp, struct statvfs *sbp)
1543 {
1544 	struct ufsmount *ump;
1545 	struct fs *fs;
1546 
1547 	ump = VFSTOUFS(mp);
1548 	fs = ump->um_fs;
1549 	mutex_enter(&ump->um_lock);
1550 	sbp->f_bsize = fs->fs_bsize;
1551 	sbp->f_frsize = fs->fs_fsize;
1552 	sbp->f_iosize = fs->fs_bsize;
1553 	sbp->f_blocks = fs->fs_dsize;
1554 	sbp->f_bfree = blkstofrags(fs, fs->fs_cstotal.cs_nbfree) +
1555 	    fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1556 	sbp->f_bresvd = ((u_int64_t) fs->fs_dsize * (u_int64_t)
1557 	    fs->fs_minfree) / (u_int64_t) 100;
1558 	if (sbp->f_bfree > sbp->f_bresvd)
1559 		sbp->f_bavail = sbp->f_bfree - sbp->f_bresvd;
1560 	else
1561 		sbp->f_bavail = 0;
1562 	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
1563 	sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1564 	sbp->f_favail = sbp->f_ffree;
1565 	sbp->f_fresvd = 0;
1566 	mutex_exit(&ump->um_lock);
1567 	copy_statvfs_info(sbp, mp);
1568 
1569 	return (0);
1570 }
1571 
1572 /*
1573  * Go through the disk queues to initiate sandbagged IO;
1574  * go through the inodes to write those that have been modified;
1575  * initiate the writing of the super block if it has been modified.
1576  *
1577  * Note: we are always called with the filesystem marked `MPBUSY'.
1578  */
1579 int
1580 ffs_sync(struct mount *mp, int waitfor, kauth_cred_t cred)
1581 {
1582 	struct vnode *vp, *mvp, *nvp;
1583 	struct inode *ip;
1584 	struct ufsmount *ump = VFSTOUFS(mp);
1585 	struct fs *fs;
1586 	int error, allerror = 0;
1587 	bool is_suspending;
1588 
1589 	fs = ump->um_fs;
1590 	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
1591 		printf("fs = %s\n", fs->fs_fsmnt);
1592 		panic("update: rofs mod");
1593 	}
1594 
1595 	/* Allocate a marker vnode. */
1596 	if ((mvp = vnalloc(mp)) == NULL)
1597 		return (ENOMEM);
1598 
1599 	fstrans_start(mp, FSTRANS_SHARED);
1600 	is_suspending = (fstrans_getstate(mp) == FSTRANS_SUSPENDING);
1601 	/*
1602 	 * Write back each (modified) inode.
1603 	 */
1604 	mutex_enter(&mntvnode_lock);
1605 loop:
1606 	/*
1607 	 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
1608 	 * and vclean() can be called indirectly
1609 	 */
1610 	for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) {
1611 		nvp = TAILQ_NEXT(vp, v_mntvnodes);
1612 		/*
1613 		 * If the vnode that we are about to sync is no longer
1614 		 * associated with this mount point, start over.
1615 		 */
1616 		if (vp->v_mount != mp)
1617 			goto loop;
1618 		/*
1619 		 * Don't interfere with concurrent scans of this FS.
1620 		 */
1621 		if (vismarker(vp))
1622 			continue;
1623 		mutex_enter(&vp->v_interlock);
1624 		ip = VTOI(vp);
1625 
1626 		/*
1627 		 * Skip the vnode/inode if inaccessible.
1628 		 */
1629 		if (ip == NULL || (vp->v_iflag & (VI_XLOCK | VI_CLEAN)) != 0 ||
1630 		    vp->v_type == VNON) {
1631 			mutex_exit(&vp->v_interlock);
1632 			continue;
1633 		}
1634 
1635 		/*
1636 		 * We deliberately update inode times here.  This will
1637 		 * prevent a massive queue of updates accumulating, only
1638 		 * to be handled by a call to unmount.
1639 		 *
1640 		 * XXX It would be better to have the syncer trickle these
1641 		 * out.  Adjustment needed to allow registering vnodes for
1642 		 * sync when the vnode is clean, but the inode dirty.  Or
1643 		 * have ufs itself trickle out inode updates.
1644 		 *
1645 		 * If doing a lazy sync, we don't care about metadata or
1646 		 * data updates, because they are handled by each vnode's
1647 		 * synclist entry.  In this case we are only interested in
1648 		 * writing back modified inodes.
1649 		 */
1650 		if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE |
1651 		    IN_MODIFY | IN_MODIFIED | IN_ACCESSED)) == 0 &&
1652 		    (waitfor == MNT_LAZY || (LIST_EMPTY(&vp->v_dirtyblkhd) &&
1653 		    UVM_OBJ_IS_CLEAN(&vp->v_uobj)))) {
1654 			mutex_exit(&vp->v_interlock);
1655 			continue;
1656 		}
1657 		if (vp->v_type == VBLK && is_suspending) {
1658 			mutex_exit(&vp->v_interlock);
1659 			continue;
1660 		}
1661 		vmark(mvp, vp);
1662 		mutex_exit(&mntvnode_lock);
1663 		error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT);
1664 		if (error) {
1665 			mutex_enter(&mntvnode_lock);
1666 			nvp = vunmark(mvp);
1667 			if (error == ENOENT) {
1668 				goto loop;
1669 			}
1670 			continue;
1671 		}
1672 		if (waitfor == MNT_LAZY) {
1673 			error = UFS_WAPBL_BEGIN(vp->v_mount);
1674 			if (!error) {
1675 				error = ffs_update(vp, NULL, NULL,
1676 				    UPDATE_CLOSE);
1677 				UFS_WAPBL_END(vp->v_mount);
1678 			}
1679 		} else {
1680 			error = VOP_FSYNC(vp, cred, FSYNC_NOLOG |
1681 			    (waitfor == MNT_WAIT ? FSYNC_WAIT : 0), 0, 0);
1682 		}
1683 		if (error)
1684 			allerror = error;
1685 		vput(vp);
1686 		mutex_enter(&mntvnode_lock);
1687 		nvp = vunmark(mvp);
1688 	}
1689 	mutex_exit(&mntvnode_lock);
1690 	/*
1691 	 * Force stale file system control information to be flushed.
1692 	 */
1693 	if (waitfor != MNT_LAZY && (ump->um_devvp->v_numoutput > 0 ||
1694 	    !LIST_EMPTY(&ump->um_devvp->v_dirtyblkhd))) {
1695 		vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1696 		if ((error = VOP_FSYNC(ump->um_devvp, cred,
1697 		    (waitfor == MNT_WAIT ? FSYNC_WAIT : 0) | FSYNC_NOLOG,
1698 		    0, 0)) != 0)
1699 			allerror = error;
1700 		VOP_UNLOCK(ump->um_devvp);
1701 		if (allerror == 0 && waitfor == MNT_WAIT && !mp->mnt_wapbl) {
1702 			mutex_enter(&mntvnode_lock);
1703 			goto loop;
1704 		}
1705 	}
1706 #if defined(QUOTA) || defined(QUOTA2)
1707 	qsync(mp);
1708 #endif
1709 	/*
1710 	 * Write back modified superblock.
1711 	 */
1712 	if (fs->fs_fmod != 0) {
1713 		fs->fs_fmod = 0;
1714 		fs->fs_time = time_second;
1715 		error = UFS_WAPBL_BEGIN(mp);
1716 		if (error)
1717 			allerror = error;
1718 		else {
1719 			if ((error = ffs_cgupdate(ump, waitfor)))
1720 				allerror = error;
1721 			UFS_WAPBL_END(mp);
1722 		}
1723 	}
1724 
1725 #ifdef WAPBL
1726 	if (mp->mnt_wapbl) {
1727 		error = wapbl_flush(mp->mnt_wapbl, 0);
1728 		if (error)
1729 			allerror = error;
1730 	}
1731 #endif
1732 
1733 	fstrans_done(mp);
1734 	vnfree(mvp);
1735 	return (allerror);
1736 }
1737 
1738 /*
1739  * Look up a FFS dinode number to find its incore vnode, otherwise read it
1740  * in from disk.  If it is in core, wait for the lock bit to clear, then
1741  * return the inode locked.  Detection and handling of mount points must be
1742  * done by the calling routine.
1743  */
1744 int
1745 ffs_vget(struct mount *mp, ino_t ino, struct vnode **vpp)
1746 {
1747 	struct fs *fs;
1748 	struct inode *ip;
1749 	struct ufsmount *ump;
1750 	struct buf *bp;
1751 	struct vnode *vp;
1752 	dev_t dev;
1753 	int error;
1754 
1755 	ump = VFSTOUFS(mp);
1756 	dev = ump->um_dev;
1757 
1758  retry:
1759 	if ((*vpp = ufs_ihashget(dev, ino, LK_EXCLUSIVE)) != NULL)
1760 		return (0);
1761 
1762 	/* Allocate a new vnode/inode. */
1763 	if ((error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp)) != 0) {
1764 		*vpp = NULL;
1765 		return (error);
1766 	}
1767 	ip = pool_cache_get(ffs_inode_cache, PR_WAITOK);
1768 
1769 	/*
1770 	 * If someone beat us to it, put back the freshly allocated
1771 	 * vnode/inode pair and retry.
1772 	 */
1773 	mutex_enter(&ufs_hashlock);
1774 	if (ufs_ihashget(dev, ino, 0) != NULL) {
1775 		mutex_exit(&ufs_hashlock);
1776 		ungetnewvnode(vp);
1777 		pool_cache_put(ffs_inode_cache, ip);
1778 		goto retry;
1779 	}
1780 
1781 	vp->v_vflag |= VV_LOCKSWORK;
1782 
1783 	/*
1784 	 * XXX MFS ends up here, too, to allocate an inode.  Should we
1785 	 * XXX create another pool for MFS inodes?
1786 	 */
1787 
1788 	memset(ip, 0, sizeof(struct inode));
1789 	vp->v_data = ip;
1790 	ip->i_vnode = vp;
1791 	ip->i_ump = ump;
1792 	ip->i_fs = fs = ump->um_fs;
1793 	ip->i_dev = dev;
1794 	ip->i_number = ino;
1795 #if defined(QUOTA) || defined(QUOTA2)
1796 	ufsquota_init(ip);
1797 #endif
1798 
1799 	/*
1800 	 * Initialize genfs node, we might proceed to destroy it in
1801 	 * error branches.
1802 	 */
1803 	genfs_node_init(vp, &ffs_genfsops);
1804 
1805 	/*
1806 	 * Put it onto its hash chain and lock it so that other requests for
1807 	 * this inode will block if they arrive while we are sleeping waiting
1808 	 * for old data structures to be purged or for the contents of the
1809 	 * disk portion of this inode to be read.
1810 	 */
1811 
1812 	ufs_ihashins(ip);
1813 	mutex_exit(&ufs_hashlock);
1814 
1815 	/* Read in the disk contents for the inode, copy into the inode. */
1816 	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1817 		      (int)fs->fs_bsize, NOCRED, 0, &bp);
1818 	if (error) {
1819 
1820 		/*
1821 		 * The inode does not contain anything useful, so it would
1822 		 * be misleading to leave it on its hash chain. With mode
1823 		 * still zero, it will be unlinked and returned to the free
1824 		 * list by vput().
1825 		 */
1826 
1827 		vput(vp);
1828 		brelse(bp, 0);
1829 		*vpp = NULL;
1830 		return (error);
1831 	}
1832 	if (ip->i_ump->um_fstype == UFS1)
1833 		ip->i_din.ffs1_din = pool_cache_get(ffs_dinode1_cache,
1834 		    PR_WAITOK);
1835 	else
1836 		ip->i_din.ffs2_din = pool_cache_get(ffs_dinode2_cache,
1837 		    PR_WAITOK);
1838 	ffs_load_inode(bp, ip, fs, ino);
1839 	brelse(bp, 0);
1840 
1841 	/*
1842 	 * Initialize the vnode from the inode, check for aliases.
1843 	 * Note that the underlying vnode may have changed.
1844 	 */
1845 
1846 	ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
1847 
1848 	/*
1849 	 * Finish inode initialization now that aliasing has been resolved.
1850 	 */
1851 
1852 	ip->i_devvp = ump->um_devvp;
1853 	vref(ip->i_devvp);
1854 
1855 	/*
1856 	 * Ensure that uid and gid are correct. This is a temporary
1857 	 * fix until fsck has been changed to do the update.
1858 	 */
1859 
1860 	if (fs->fs_old_inodefmt < FS_44INODEFMT) {		/* XXX */
1861 		ip->i_uid = ip->i_ffs1_ouid;			/* XXX */
1862 		ip->i_gid = ip->i_ffs1_ogid;			/* XXX */
1863 	}							/* XXX */
1864 	uvm_vnp_setsize(vp, ip->i_size);
1865 	*vpp = vp;
1866 	return (0);
1867 }
1868 
1869 /*
1870  * File handle to vnode
1871  *
1872  * Have to be really careful about stale file handles:
1873  * - check that the inode number is valid
1874  * - call ffs_vget() to get the locked inode
1875  * - check for an unallocated inode (i_mode == 0)
1876  * - check that the given client host has export rights and return
1877  *   those rights via. exflagsp and credanonp
1878  */
1879 int
1880 ffs_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp)
1881 {
1882 	struct ufid ufh;
1883 	struct fs *fs;
1884 
1885 	if (fhp->fid_len != sizeof(struct ufid))
1886 		return EINVAL;
1887 
1888 	memcpy(&ufh, fhp, sizeof(ufh));
1889 	fs = VFSTOUFS(mp)->um_fs;
1890 	if (ufh.ufid_ino < ROOTINO ||
1891 	    ufh.ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1892 		return (ESTALE);
1893 	return (ufs_fhtovp(mp, &ufh, vpp));
1894 }
1895 
1896 /*
1897  * Vnode pointer to File handle
1898  */
1899 /* ARGSUSED */
1900 int
1901 ffs_vptofh(struct vnode *vp, struct fid *fhp, size_t *fh_size)
1902 {
1903 	struct inode *ip;
1904 	struct ufid ufh;
1905 
1906 	if (*fh_size < sizeof(struct ufid)) {
1907 		*fh_size = sizeof(struct ufid);
1908 		return E2BIG;
1909 	}
1910 	ip = VTOI(vp);
1911 	*fh_size = sizeof(struct ufid);
1912 	memset(&ufh, 0, sizeof(ufh));
1913 	ufh.ufid_len = sizeof(struct ufid);
1914 	ufh.ufid_ino = ip->i_number;
1915 	ufh.ufid_gen = ip->i_gen;
1916 	memcpy(fhp, &ufh, sizeof(ufh));
1917 	return (0);
1918 }
1919 
1920 void
1921 ffs_init(void)
1922 {
1923 	if (ffs_initcount++ > 0)
1924 		return;
1925 
1926 	ffs_inode_cache = pool_cache_init(sizeof(struct inode), 0, 0, 0,
1927 	    "ffsino", NULL, IPL_NONE, NULL, NULL, NULL);
1928 	ffs_dinode1_cache = pool_cache_init(sizeof(struct ufs1_dinode), 0, 0, 0,
1929 	    "ffsdino1", NULL, IPL_NONE, NULL, NULL, NULL);
1930 	ffs_dinode2_cache = pool_cache_init(sizeof(struct ufs2_dinode), 0, 0, 0,
1931 	    "ffsdino2", NULL, IPL_NONE, NULL, NULL, NULL);
1932 	ufs_init();
1933 }
1934 
1935 void
1936 ffs_reinit(void)
1937 {
1938 
1939 	ufs_reinit();
1940 }
1941 
1942 void
1943 ffs_done(void)
1944 {
1945 	if (--ffs_initcount > 0)
1946 		return;
1947 
1948 	ufs_done();
1949 	pool_cache_destroy(ffs_dinode2_cache);
1950 	pool_cache_destroy(ffs_dinode1_cache);
1951 	pool_cache_destroy(ffs_inode_cache);
1952 }
1953 
1954 /*
1955  * Write a superblock and associated information back to disk.
1956  */
1957 int
1958 ffs_sbupdate(struct ufsmount *mp, int waitfor)
1959 {
1960 	struct fs *fs = mp->um_fs;
1961 	struct buf *bp;
1962 	int error = 0;
1963 	u_int32_t saveflag;
1964 
1965 	error = ffs_getblk(mp->um_devvp,
1966 	    fs->fs_sblockloc / DEV_BSIZE, FFS_NOBLK,
1967 	    fs->fs_sbsize, false, &bp);
1968 	if (error)
1969 		return error;
1970 	saveflag = fs->fs_flags & FS_INTERNAL;
1971 	fs->fs_flags &= ~FS_INTERNAL;
1972 
1973 	memcpy(bp->b_data, fs, fs->fs_sbsize);
1974 
1975 	ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
1976 #ifdef FFS_EI
1977 	if (mp->um_flags & UFS_NEEDSWAP)
1978 		ffs_sb_swap((struct fs *)bp->b_data, (struct fs *)bp->b_data);
1979 #endif
1980 	fs->fs_flags |= saveflag;
1981 
1982 	if (waitfor == MNT_WAIT)
1983 		error = bwrite(bp);
1984 	else
1985 		bawrite(bp);
1986 	return (error);
1987 }
1988 
1989 int
1990 ffs_cgupdate(struct ufsmount *mp, int waitfor)
1991 {
1992 	struct fs *fs = mp->um_fs;
1993 	struct buf *bp;
1994 	int blks;
1995 	void *space;
1996 	int i, size, error = 0, allerror = 0;
1997 
1998 	allerror = ffs_sbupdate(mp, waitfor);
1999 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
2000 	space = fs->fs_csp;
2001 	for (i = 0; i < blks; i += fs->fs_frag) {
2002 		size = fs->fs_bsize;
2003 		if (i + fs->fs_frag > blks)
2004 			size = (blks - i) * fs->fs_fsize;
2005 		error = ffs_getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
2006 		    FFS_NOBLK, size, false, &bp);
2007 		if (error)
2008 			break;
2009 #ifdef FFS_EI
2010 		if (mp->um_flags & UFS_NEEDSWAP)
2011 			ffs_csum_swap((struct csum*)space,
2012 			    (struct csum*)bp->b_data, size);
2013 		else
2014 #endif
2015 			memcpy(bp->b_data, space, (u_int)size);
2016 		space = (char *)space + size;
2017 		if (waitfor == MNT_WAIT)
2018 			error = bwrite(bp);
2019 		else
2020 			bawrite(bp);
2021 	}
2022 	if (!allerror && error)
2023 		allerror = error;
2024 	return (allerror);
2025 }
2026 
2027 int
2028 ffs_extattrctl(struct mount *mp, int cmd, struct vnode *vp,
2029     int attrnamespace, const char *attrname)
2030 {
2031 #ifdef UFS_EXTATTR
2032 	/*
2033 	 * File-backed extended attributes are only supported on UFS1.
2034 	 * UFS2 has native extended attributes.
2035 	 */
2036 	if (VFSTOUFS(mp)->um_fstype == UFS1)
2037 		return (ufs_extattrctl(mp, cmd, vp, attrnamespace, attrname));
2038 #endif
2039 	return (vfs_stdextattrctl(mp, cmd, vp, attrnamespace, attrname));
2040 }
2041 
2042 int
2043 ffs_suspendctl(struct mount *mp, int cmd)
2044 {
2045 	int error;
2046 	struct lwp *l = curlwp;
2047 
2048 	switch (cmd) {
2049 	case SUSPEND_SUSPEND:
2050 		if ((error = fstrans_setstate(mp, FSTRANS_SUSPENDING)) != 0)
2051 			return error;
2052 		error = ffs_sync(mp, MNT_WAIT, l->l_proc->p_cred);
2053 		if (error == 0)
2054 			error = fstrans_setstate(mp, FSTRANS_SUSPENDED);
2055 #ifdef WAPBL
2056 		if (error == 0 && mp->mnt_wapbl)
2057 			error = wapbl_flush(mp->mnt_wapbl, 1);
2058 #endif
2059 		if (error != 0) {
2060 			(void) fstrans_setstate(mp, FSTRANS_NORMAL);
2061 			return error;
2062 		}
2063 		return 0;
2064 
2065 	case SUSPEND_RESUME:
2066 		return fstrans_setstate(mp, FSTRANS_NORMAL);
2067 
2068 	default:
2069 		return EINVAL;
2070 	}
2071 }
2072 
2073 /*
2074  * Synch vnode for a mounted file system.
2075  */
2076 static int
2077 ffs_vfs_fsync(vnode_t *vp, int flags)
2078 {
2079 	int error, i, pflags;
2080 #ifdef WAPBL
2081 	struct mount *mp;
2082 #endif
2083 
2084 	KASSERT(vp->v_type == VBLK);
2085 	KASSERT(vp->v_specmountpoint != NULL);
2086 
2087 	/*
2088 	 * Flush all dirty data associated with the vnode.
2089 	 */
2090 	pflags = PGO_ALLPAGES | PGO_CLEANIT;
2091 	if ((flags & FSYNC_WAIT) != 0)
2092 		pflags |= PGO_SYNCIO;
2093 	mutex_enter(&vp->v_interlock);
2094 	error = VOP_PUTPAGES(vp, 0, 0, pflags);
2095 	if (error)
2096 		return error;
2097 
2098 #ifdef WAPBL
2099 	mp = vp->v_specmountpoint;
2100 	if (mp && mp->mnt_wapbl) {
2101 		/*
2102 		 * Don't bother writing out metadata if the syncer is
2103 		 * making the request.  We will let the sync vnode
2104 		 * write it out in a single burst through a call to
2105 		 * VFS_SYNC().
2106 		 */
2107 		if ((flags & (FSYNC_DATAONLY | FSYNC_LAZY | FSYNC_NOLOG)) != 0)
2108 			return 0;
2109 
2110 		/*
2111 		 * Don't flush the log if the vnode being flushed
2112 		 * contains no dirty buffers that could be in the log.
2113 		 */
2114 		if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
2115 			error = wapbl_flush(mp->mnt_wapbl, 0);
2116 			if (error)
2117 				return error;
2118 		}
2119 
2120 		if ((flags & FSYNC_WAIT) != 0) {
2121 			mutex_enter(&vp->v_interlock);
2122 			while (vp->v_numoutput)
2123 				cv_wait(&vp->v_cv, &vp->v_interlock);
2124 			mutex_exit(&vp->v_interlock);
2125 		}
2126 
2127 		return 0;
2128 	}
2129 #endif /* WAPBL */
2130 
2131 	error = vflushbuf(vp, (flags & FSYNC_WAIT) != 0);
2132 	if (error == 0 && (flags & FSYNC_CACHE) != 0) {
2133 		i = 1;
2134 		(void)VOP_IOCTL(vp, DIOCCACHESYNC, &i, FWRITE,
2135 		    kauth_cred_get());
2136 	}
2137 
2138 	return error;
2139 }
2140