1 /* $NetBSD: ffs_vfsops.c,v 1.338 2015/12/23 23:31:28 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Wasabi Systems, Inc, and by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1989, 1991, 1993, 1994 34 * The Regents of the University of California. All rights reserved. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. Neither the name of the University nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 * 60 * @(#)ffs_vfsops.c 8.31 (Berkeley) 5/20/95 61 */ 62 63 #include <sys/cdefs.h> 64 __KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.338 2015/12/23 23:31:28 christos Exp $"); 65 66 #if defined(_KERNEL_OPT) 67 #include "opt_ffs.h" 68 #include "opt_quota.h" 69 #include "opt_wapbl.h" 70 #endif 71 72 #include <sys/param.h> 73 #include <sys/systm.h> 74 #include <sys/namei.h> 75 #include <sys/proc.h> 76 #include <sys/kernel.h> 77 #include <sys/vnode.h> 78 #include <sys/socket.h> 79 #include <sys/mount.h> 80 #include <sys/buf.h> 81 #include <sys/device.h> 82 #include <sys/disk.h> 83 #include <sys/mbuf.h> 84 #include <sys/file.h> 85 #include <sys/disklabel.h> 86 #include <sys/ioctl.h> 87 #include <sys/errno.h> 88 #include <sys/kmem.h> 89 #include <sys/pool.h> 90 #include <sys/lock.h> 91 #include <sys/sysctl.h> 92 #include <sys/conf.h> 93 #include <sys/kauth.h> 94 #include <sys/wapbl.h> 95 #include <sys/fstrans.h> 96 #include <sys/module.h> 97 98 #include <miscfs/genfs/genfs.h> 99 #include <miscfs/specfs/specdev.h> 100 101 #include <ufs/ufs/quota.h> 102 #include <ufs/ufs/ufsmount.h> 103 #include <ufs/ufs/inode.h> 104 #include <ufs/ufs/dir.h> 105 #include <ufs/ufs/ufs_extern.h> 106 #include <ufs/ufs/ufs_bswap.h> 107 #include <ufs/ufs/ufs_wapbl.h> 108 109 #include <ufs/ffs/fs.h> 110 #include <ufs/ffs/ffs_extern.h> 111 112 #ifdef WAPBL 113 MODULE(MODULE_CLASS_VFS, ffs, "wapbl"); 114 #else 115 MODULE(MODULE_CLASS_VFS, ffs, NULL); 116 #endif 117 118 static int ffs_vfs_fsync(vnode_t *, int); 119 static int ffs_superblock_validate(struct fs *); 120 static int ffs_is_appleufs(struct vnode *, struct fs *); 121 122 static int ffs_init_vnode(struct ufsmount *, struct vnode *, ino_t); 123 static void ffs_deinit_vnode(struct ufsmount *, struct vnode *); 124 125 static struct sysctllog *ffs_sysctl_log; 126 127 static kauth_listener_t ffs_snapshot_listener; 128 129 /* how many times ffs_init() was called */ 130 int ffs_initcount = 0; 131 132 #ifdef DEBUG_FFS_MOUNT 133 #define DPRINTF(_fmt, args...) printf("%s: " _fmt "\n", __func__, ##args) 134 #else 135 #define DPRINTF(_fmt, args...) do {} while (/*CONSTCOND*/0) 136 #endif 137 138 extern const struct vnodeopv_desc ffs_vnodeop_opv_desc; 139 extern const struct vnodeopv_desc ffs_specop_opv_desc; 140 extern const struct vnodeopv_desc ffs_fifoop_opv_desc; 141 142 const struct vnodeopv_desc * const ffs_vnodeopv_descs[] = { 143 &ffs_vnodeop_opv_desc, 144 &ffs_specop_opv_desc, 145 &ffs_fifoop_opv_desc, 146 NULL, 147 }; 148 149 struct vfsops ffs_vfsops = { 150 .vfs_name = MOUNT_FFS, 151 .vfs_min_mount_data = sizeof (struct ufs_args), 152 .vfs_mount = ffs_mount, 153 .vfs_start = ufs_start, 154 .vfs_unmount = ffs_unmount, 155 .vfs_root = ufs_root, 156 .vfs_quotactl = ufs_quotactl, 157 .vfs_statvfs = ffs_statvfs, 158 .vfs_sync = ffs_sync, 159 .vfs_vget = ufs_vget, 160 .vfs_loadvnode = ffs_loadvnode, 161 .vfs_newvnode = ffs_newvnode, 162 .vfs_fhtovp = ffs_fhtovp, 163 .vfs_vptofh = ffs_vptofh, 164 .vfs_init = ffs_init, 165 .vfs_reinit = ffs_reinit, 166 .vfs_done = ffs_done, 167 .vfs_mountroot = ffs_mountroot, 168 .vfs_snapshot = ffs_snapshot, 169 .vfs_extattrctl = ffs_extattrctl, 170 .vfs_suspendctl = ffs_suspendctl, 171 .vfs_renamelock_enter = genfs_renamelock_enter, 172 .vfs_renamelock_exit = genfs_renamelock_exit, 173 .vfs_fsync = ffs_vfs_fsync, 174 .vfs_opv_descs = ffs_vnodeopv_descs 175 }; 176 177 static const struct genfs_ops ffs_genfsops = { 178 .gop_size = ffs_gop_size, 179 .gop_alloc = ufs_gop_alloc, 180 .gop_write = genfs_gop_write, 181 .gop_markupdate = ufs_gop_markupdate, 182 }; 183 184 static const struct ufs_ops ffs_ufsops = { 185 .uo_itimes = ffs_itimes, 186 .uo_update = ffs_update, 187 .uo_truncate = ffs_truncate, 188 .uo_balloc = ffs_balloc, 189 .uo_snapgone = ffs_snapgone, 190 .uo_bufrd = ffs_bufrd, 191 .uo_bufwr = ffs_bufwr, 192 }; 193 194 static int 195 ffs_checkrange(struct mount *mp, uint32_t ino) 196 { 197 struct fs *fs = VFSTOUFS(mp)->um_fs; 198 199 if (ino < UFS_ROOTINO || ino >= fs->fs_ncg * fs->fs_ipg) { 200 DPRINTF("out of range %u\n", ino); 201 return ESTALE; 202 } 203 204 /* 205 * Need to check if inode is initialized because ffsv2 does 206 * lazy initialization and we can get here from nfs_fhtovp 207 */ 208 if (fs->fs_magic != FS_UFS2_MAGIC) 209 return 0; 210 211 struct buf *bp; 212 int cg = ino_to_cg(fs, ino); 213 struct ufsmount *ump = VFSTOUFS(mp); 214 215 int error = bread(ump->um_devvp, FFS_FSBTODB(fs, cgtod(fs, cg)), 216 (int)fs->fs_cgsize, B_MODIFY, &bp); 217 if (error) { 218 DPRINTF("error %d reading cg %d ino %u\n", error, cg, ino); 219 return error; 220 } 221 222 const int needswap = UFS_FSNEEDSWAP(fs); 223 224 struct cg *cgp = (struct cg *)bp->b_data; 225 if (!cg_chkmagic(cgp, needswap)) { 226 brelse(bp, 0); 227 DPRINTF("bad cylinder group magic cg %d ino %u\n", cg, ino); 228 return ESTALE; 229 } 230 231 int32_t initediblk = ufs_rw32(cgp->cg_initediblk, needswap); 232 brelse(bp, 0); 233 234 if (cg * fs->fs_ipg + initediblk < ino) { 235 DPRINTF("cg=%d fs->fs_ipg=%d initediblk=%d ino=%u\n", 236 cg, fs->fs_ipg, initediblk, ino); 237 return ESTALE; 238 } 239 return 0; 240 } 241 242 static int 243 ffs_snapshot_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, 244 void *arg0, void *arg1, void *arg2, void *arg3) 245 { 246 vnode_t *vp = arg2; 247 int result = KAUTH_RESULT_DEFER; 248 249 if (action != KAUTH_SYSTEM_FS_SNAPSHOT) 250 return result; 251 252 if (VTOI(vp)->i_uid == kauth_cred_geteuid(cred)) 253 result = KAUTH_RESULT_ALLOW; 254 255 return result; 256 } 257 258 static int 259 ffs_modcmd(modcmd_t cmd, void *arg) 260 { 261 int error; 262 263 #if 0 264 extern int doasyncfree; 265 #endif 266 #ifdef UFS_EXTATTR 267 extern int ufs_extattr_autocreate; 268 #endif 269 extern int ffs_log_changeopt; 270 271 switch (cmd) { 272 case MODULE_CMD_INIT: 273 error = vfs_attach(&ffs_vfsops); 274 if (error != 0) 275 break; 276 277 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL, 278 CTLFLAG_PERMANENT, 279 CTLTYPE_NODE, "ffs", 280 SYSCTL_DESCR("Berkeley Fast File System"), 281 NULL, 0, NULL, 0, 282 CTL_VFS, 1, CTL_EOL); 283 /* 284 * @@@ should we even bother with these first three? 285 */ 286 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL, 287 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 288 CTLTYPE_INT, "doclusterread", NULL, 289 sysctl_notavail, 0, NULL, 0, 290 CTL_VFS, 1, FFS_CLUSTERREAD, CTL_EOL); 291 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL, 292 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 293 CTLTYPE_INT, "doclusterwrite", NULL, 294 sysctl_notavail, 0, NULL, 0, 295 CTL_VFS, 1, FFS_CLUSTERWRITE, CTL_EOL); 296 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL, 297 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 298 CTLTYPE_INT, "doreallocblks", NULL, 299 sysctl_notavail, 0, NULL, 0, 300 CTL_VFS, 1, FFS_REALLOCBLKS, CTL_EOL); 301 #if 0 302 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL, 303 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 304 CTLTYPE_INT, "doasyncfree", 305 SYSCTL_DESCR("Release dirty blocks asynchronously"), 306 NULL, 0, &doasyncfree, 0, 307 CTL_VFS, 1, FFS_ASYNCFREE, CTL_EOL); 308 #endif 309 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL, 310 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 311 CTLTYPE_INT, "log_changeopt", 312 SYSCTL_DESCR("Log changes in optimization strategy"), 313 NULL, 0, &ffs_log_changeopt, 0, 314 CTL_VFS, 1, FFS_LOG_CHANGEOPT, CTL_EOL); 315 #ifdef UFS_EXTATTR 316 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL, 317 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 318 CTLTYPE_INT, "extattr_autocreate", 319 SYSCTL_DESCR("Size of attribute for " 320 "backing file autocreation"), 321 NULL, 0, &ufs_extattr_autocreate, 0, 322 CTL_VFS, 1, FFS_EXTATTR_AUTOCREATE, CTL_EOL); 323 324 #endif /* UFS_EXTATTR */ 325 326 ffs_snapshot_listener = kauth_listen_scope(KAUTH_SCOPE_SYSTEM, 327 ffs_snapshot_cb, NULL); 328 if (ffs_snapshot_listener == NULL) 329 printf("ffs_modcmd: can't listen on system scope.\n"); 330 331 break; 332 case MODULE_CMD_FINI: 333 error = vfs_detach(&ffs_vfsops); 334 if (error != 0) 335 break; 336 sysctl_teardown(&ffs_sysctl_log); 337 if (ffs_snapshot_listener != NULL) 338 kauth_unlisten_scope(ffs_snapshot_listener); 339 break; 340 default: 341 error = ENOTTY; 342 break; 343 } 344 345 return (error); 346 } 347 348 pool_cache_t ffs_inode_cache; 349 pool_cache_t ffs_dinode1_cache; 350 pool_cache_t ffs_dinode2_cache; 351 352 static void ffs_oldfscompat_read(struct fs *, struct ufsmount *, daddr_t); 353 static void ffs_oldfscompat_write(struct fs *, struct ufsmount *); 354 355 /* 356 * Called by main() when ffs is going to be mounted as root. 357 */ 358 359 int 360 ffs_mountroot(void) 361 { 362 struct fs *fs; 363 struct mount *mp; 364 struct lwp *l = curlwp; /* XXX */ 365 struct ufsmount *ump; 366 int error; 367 368 if (device_class(root_device) != DV_DISK) 369 return (ENODEV); 370 371 if ((error = vfs_rootmountalloc(MOUNT_FFS, "root_device", &mp))) { 372 vrele(rootvp); 373 return (error); 374 } 375 376 /* 377 * We always need to be able to mount the root file system. 378 */ 379 mp->mnt_flag |= MNT_FORCE; 380 if ((error = ffs_mountfs(rootvp, mp, l)) != 0) { 381 vfs_unbusy(mp, false, NULL); 382 vfs_destroy(mp); 383 return (error); 384 } 385 mp->mnt_flag &= ~MNT_FORCE; 386 mountlist_append(mp); 387 ump = VFSTOUFS(mp); 388 fs = ump->um_fs; 389 memset(fs->fs_fsmnt, 0, sizeof(fs->fs_fsmnt)); 390 (void)copystr(mp->mnt_stat.f_mntonname, fs->fs_fsmnt, MNAMELEN - 1, 0); 391 (void)ffs_statvfs(mp, &mp->mnt_stat); 392 vfs_unbusy(mp, false, NULL); 393 setrootfstime((time_t)fs->fs_time); 394 return (0); 395 } 396 397 /* 398 * VFS Operations. 399 * 400 * mount system call 401 */ 402 int 403 ffs_mount(struct mount *mp, const char *path, void *data, size_t *data_len) 404 { 405 struct lwp *l = curlwp; 406 struct vnode *devvp = NULL; 407 struct ufs_args *args = data; 408 struct ufsmount *ump = NULL; 409 struct fs *fs; 410 int error = 0, flags, update; 411 mode_t accessmode; 412 413 if (args == NULL) { 414 DPRINTF("NULL args"); 415 return EINVAL; 416 } 417 if (*data_len < sizeof(*args)) { 418 DPRINTF("bad size args %zu != %zu", *data_len, sizeof(*args)); 419 return EINVAL; 420 } 421 422 if (mp->mnt_flag & MNT_GETARGS) { 423 ump = VFSTOUFS(mp); 424 if (ump == NULL) { 425 DPRINTF("no ump"); 426 return EIO; 427 } 428 args->fspec = NULL; 429 *data_len = sizeof *args; 430 return 0; 431 } 432 433 update = mp->mnt_flag & MNT_UPDATE; 434 435 /* Check arguments */ 436 if (args->fspec != NULL) { 437 /* 438 * Look up the name and verify that it's sane. 439 */ 440 error = namei_simple_user(args->fspec, 441 NSM_FOLLOW_NOEMULROOT, &devvp); 442 if (error != 0) { 443 DPRINTF("namei_simple_user returned %d", error); 444 return error; 445 } 446 447 if (!update) { 448 /* 449 * Be sure this is a valid block device 450 */ 451 if (devvp->v_type != VBLK) { 452 DPRINTF("non block device %d", devvp->v_type); 453 error = ENOTBLK; 454 } else if (bdevsw_lookup(devvp->v_rdev) == NULL) { 455 DPRINTF("can't find block device 0x%jx", 456 devvp->v_rdev); 457 error = ENXIO; 458 } 459 } else { 460 /* 461 * Be sure we're still naming the same device 462 * used for our initial mount 463 */ 464 ump = VFSTOUFS(mp); 465 if (devvp != ump->um_devvp) { 466 if (devvp->v_rdev != ump->um_devvp->v_rdev) { 467 DPRINTF("wrong device 0x%jx != 0x%jx", 468 (uintmax_t)devvp->v_rdev, 469 (uintmax_t)ump->um_devvp->v_rdev); 470 error = EINVAL; 471 } else { 472 vrele(devvp); 473 devvp = ump->um_devvp; 474 vref(devvp); 475 } 476 } 477 } 478 } else { 479 if (!update) { 480 /* New mounts must have a filename for the device */ 481 DPRINTF("no filename for mount"); 482 return EINVAL; 483 } else { 484 /* Use the extant mount */ 485 ump = VFSTOUFS(mp); 486 devvp = ump->um_devvp; 487 vref(devvp); 488 } 489 } 490 491 /* 492 * If mount by non-root, then verify that user has necessary 493 * permissions on the device. 494 * 495 * Permission to update a mount is checked higher, so here we presume 496 * updating the mount is okay (for example, as far as securelevel goes) 497 * which leaves us with the normal check. 498 */ 499 if (error == 0) { 500 accessmode = VREAD; 501 if (update ? 502 (mp->mnt_iflag & IMNT_WANTRDWR) != 0 : 503 (mp->mnt_flag & MNT_RDONLY) == 0) 504 accessmode |= VWRITE; 505 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 506 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 507 KAUTH_REQ_SYSTEM_MOUNT_DEVICE, mp, devvp, 508 KAUTH_ARG(accessmode)); 509 if (error) { 510 DPRINTF("kauth returned %d", error); 511 } 512 VOP_UNLOCK(devvp); 513 } 514 515 if (error) { 516 vrele(devvp); 517 return (error); 518 } 519 520 #ifdef WAPBL 521 /* WAPBL can only be enabled on a r/w mount. */ 522 if ((mp->mnt_flag & MNT_RDONLY) && !(mp->mnt_iflag & IMNT_WANTRDWR)) { 523 mp->mnt_flag &= ~MNT_LOG; 524 } 525 #else /* !WAPBL */ 526 mp->mnt_flag &= ~MNT_LOG; 527 #endif /* !WAPBL */ 528 529 if (!update) { 530 int xflags; 531 532 if (mp->mnt_flag & MNT_RDONLY) 533 xflags = FREAD; 534 else 535 xflags = FREAD | FWRITE; 536 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 537 error = VOP_OPEN(devvp, xflags, FSCRED); 538 VOP_UNLOCK(devvp); 539 if (error) { 540 DPRINTF("VOP_OPEN returned %d", error); 541 goto fail; 542 } 543 error = ffs_mountfs(devvp, mp, l); 544 if (error) { 545 DPRINTF("ffs_mountfs returned %d", error); 546 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 547 (void)VOP_CLOSE(devvp, xflags, NOCRED); 548 VOP_UNLOCK(devvp); 549 goto fail; 550 } 551 552 ump = VFSTOUFS(mp); 553 fs = ump->um_fs; 554 } else { 555 /* 556 * Update the mount. 557 */ 558 559 /* 560 * The initial mount got a reference on this 561 * device, so drop the one obtained via 562 * namei(), above. 563 */ 564 vrele(devvp); 565 566 ump = VFSTOUFS(mp); 567 fs = ump->um_fs; 568 if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) { 569 /* 570 * Changing from r/w to r/o 571 */ 572 flags = WRITECLOSE; 573 if (mp->mnt_flag & MNT_FORCE) 574 flags |= FORCECLOSE; 575 error = ffs_flushfiles(mp, flags, l); 576 if (error == 0) 577 error = UFS_WAPBL_BEGIN(mp); 578 if (error == 0 && 579 ffs_cgupdate(ump, MNT_WAIT) == 0 && 580 fs->fs_clean & FS_WASCLEAN) { 581 if (mp->mnt_flag & MNT_SOFTDEP) 582 fs->fs_flags &= ~FS_DOSOFTDEP; 583 fs->fs_clean = FS_ISCLEAN; 584 (void) ffs_sbupdate(ump, MNT_WAIT); 585 } 586 if (error) { 587 DPRINTF("wapbl %d", error); 588 return error; 589 } 590 UFS_WAPBL_END(mp); 591 } 592 593 #ifdef WAPBL 594 if ((mp->mnt_flag & MNT_LOG) == 0) { 595 error = ffs_wapbl_stop(mp, mp->mnt_flag & MNT_FORCE); 596 if (error) { 597 DPRINTF("ffs_wapbl_stop returned %d", error); 598 return error; 599 } 600 } 601 #endif /* WAPBL */ 602 603 if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) { 604 /* 605 * Finish change from r/w to r/o 606 */ 607 fs->fs_ronly = 1; 608 fs->fs_fmod = 0; 609 } 610 611 if (mp->mnt_flag & MNT_RELOAD) { 612 error = ffs_reload(mp, l->l_cred, l); 613 if (error) { 614 DPRINTF("ffs_reload returned %d", error); 615 return error; 616 } 617 } 618 619 if (fs->fs_ronly && (mp->mnt_iflag & IMNT_WANTRDWR)) { 620 /* 621 * Changing from read-only to read/write 622 */ 623 #ifndef QUOTA2 624 if (fs->fs_flags & FS_DOQUOTA2) { 625 ump->um_flags |= UFS_QUOTA2; 626 uprintf("%s: options QUOTA2 not enabled%s\n", 627 mp->mnt_stat.f_mntonname, 628 (mp->mnt_flag & MNT_FORCE) ? "" : 629 ", not mounting"); 630 DPRINTF("ffs_quota2 %d", EINVAL); 631 return EINVAL; 632 } 633 #endif 634 fs->fs_ronly = 0; 635 fs->fs_clean <<= 1; 636 fs->fs_fmod = 1; 637 #ifdef WAPBL 638 if (fs->fs_flags & FS_DOWAPBL) { 639 const char *nm = mp->mnt_stat.f_mntonname; 640 if (!mp->mnt_wapbl_replay) { 641 printf("%s: log corrupted;" 642 " replay cancelled\n", nm); 643 return EFTYPE; 644 } 645 printf("%s: replaying log to disk\n", nm); 646 error = wapbl_replay_write(mp->mnt_wapbl_replay, 647 devvp); 648 if (error) { 649 DPRINTF("%s: wapbl_replay_write %d", 650 nm, error); 651 return error; 652 } 653 wapbl_replay_stop(mp->mnt_wapbl_replay); 654 fs->fs_clean = FS_WASCLEAN; 655 } 656 #endif /* WAPBL */ 657 if (fs->fs_snapinum[0] != 0) 658 ffs_snapshot_mount(mp); 659 } 660 661 #ifdef WAPBL 662 error = ffs_wapbl_start(mp); 663 if (error) { 664 DPRINTF("ffs_wapbl_start returned %d", error); 665 return error; 666 } 667 #endif /* WAPBL */ 668 669 #ifdef QUOTA2 670 if (!fs->fs_ronly) { 671 error = ffs_quota2_mount(mp); 672 if (error) { 673 DPRINTF("ffs_quota2_mount returned %d", error); 674 return error; 675 } 676 } 677 #endif 678 679 if ((mp->mnt_flag & MNT_DISCARD) && !(ump->um_discarddata)) 680 ump->um_discarddata = ffs_discard_init(devvp, fs); 681 682 if (args->fspec == NULL) 683 return 0; 684 } 685 686 error = set_statvfs_info(path, UIO_USERSPACE, args->fspec, 687 UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l); 688 if (error == 0) 689 (void)strncpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, 690 sizeof(fs->fs_fsmnt)); 691 else { 692 DPRINTF("set_statvfs_info returned %d", error); 693 } 694 fs->fs_flags &= ~FS_DOSOFTDEP; 695 if (fs->fs_fmod != 0) { /* XXX */ 696 int err; 697 698 fs->fs_fmod = 0; 699 if (fs->fs_clean & FS_WASCLEAN) 700 fs->fs_time = time_second; 701 else { 702 printf("%s: file system not clean (fs_clean=%#x); " 703 "please fsck(8)\n", mp->mnt_stat.f_mntfromname, 704 fs->fs_clean); 705 printf("%s: lost blocks %" PRId64 " files %d\n", 706 mp->mnt_stat.f_mntfromname, fs->fs_pendingblocks, 707 fs->fs_pendinginodes); 708 } 709 err = UFS_WAPBL_BEGIN(mp); 710 if (err == 0) { 711 (void) ffs_cgupdate(ump, MNT_WAIT); 712 UFS_WAPBL_END(mp); 713 } 714 } 715 if ((mp->mnt_flag & MNT_SOFTDEP) != 0) { 716 printf("%s: `-o softdep' is no longer supported, " 717 "consider `-o log'\n", mp->mnt_stat.f_mntfromname); 718 mp->mnt_flag &= ~MNT_SOFTDEP; 719 } 720 721 return (error); 722 723 fail: 724 vrele(devvp); 725 return (error); 726 } 727 728 /* 729 * Reload all incore data for a filesystem (used after running fsck on 730 * the root filesystem and finding things to fix). The filesystem must 731 * be mounted read-only. 732 * 733 * Things to do to update the mount: 734 * 1) invalidate all cached meta-data. 735 * 2) re-read superblock from disk. 736 * 3) re-read summary information from disk. 737 * 4) invalidate all inactive vnodes. 738 * 5) invalidate all cached file data. 739 * 6) re-read inode data for all active vnodes. 740 */ 741 int 742 ffs_reload(struct mount *mp, kauth_cred_t cred, struct lwp *l) 743 { 744 struct vnode *vp, *devvp; 745 struct inode *ip; 746 void *space; 747 struct buf *bp; 748 struct fs *fs, *newfs; 749 int i, bsize, blks, error; 750 int32_t *lp, fs_sbsize; 751 struct ufsmount *ump; 752 daddr_t sblockloc; 753 struct vnode_iterator *marker; 754 755 if ((mp->mnt_flag & MNT_RDONLY) == 0) 756 return (EINVAL); 757 758 ump = VFSTOUFS(mp); 759 760 /* 761 * Step 1: invalidate all cached meta-data. 762 */ 763 devvp = ump->um_devvp; 764 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 765 error = vinvalbuf(devvp, 0, cred, l, 0, 0); 766 VOP_UNLOCK(devvp); 767 if (error) 768 panic("ffs_reload: dirty1"); 769 770 /* 771 * Step 2: re-read superblock from disk. XXX: We don't handle 772 * possibility that superblock moved. Which implies that we don't 773 * want its size to change either. 774 */ 775 fs = ump->um_fs; 776 fs_sbsize = fs->fs_sbsize; 777 error = bread(devvp, fs->fs_sblockloc / DEV_BSIZE, fs_sbsize, 778 0, &bp); 779 if (error) 780 return (error); 781 newfs = kmem_alloc(fs_sbsize, KM_SLEEP); 782 memcpy(newfs, bp->b_data, fs_sbsize); 783 784 #ifdef FFS_EI 785 if (ump->um_flags & UFS_NEEDSWAP) { 786 ffs_sb_swap((struct fs *)bp->b_data, newfs); 787 newfs->fs_flags |= FS_SWAPPED; 788 } else 789 #endif 790 newfs->fs_flags &= ~FS_SWAPPED; 791 792 brelse(bp, 0); 793 794 if ((newfs->fs_magic != FS_UFS1_MAGIC) && 795 (newfs->fs_magic != FS_UFS2_MAGIC)) { 796 kmem_free(newfs, fs_sbsize); 797 return (EIO); /* XXX needs translation */ 798 } 799 if (!ffs_superblock_validate(newfs)) { 800 kmem_free(newfs, fs_sbsize); 801 return (EINVAL); 802 } 803 804 /* 805 * The current implementation doesn't handle the possibility that 806 * these values may have changed. 807 */ 808 if ((newfs->fs_sbsize != fs_sbsize) || 809 (newfs->fs_cssize != fs->fs_cssize) || 810 (newfs->fs_contigsumsize != fs->fs_contigsumsize) || 811 (newfs->fs_ncg != fs->fs_ncg)) { 812 kmem_free(newfs, fs_sbsize); 813 return (EINVAL); 814 } 815 816 /* Store off old fs_sblockloc for fs_oldfscompat_read. */ 817 sblockloc = fs->fs_sblockloc; 818 /* 819 * Copy pointer fields back into superblock before copying in XXX 820 * new superblock. These should really be in the ufsmount. XXX 821 * Note that important parameters (eg fs_ncg) are unchanged. 822 */ 823 newfs->fs_csp = fs->fs_csp; 824 newfs->fs_maxcluster = fs->fs_maxcluster; 825 newfs->fs_contigdirs = fs->fs_contigdirs; 826 newfs->fs_ronly = fs->fs_ronly; 827 newfs->fs_active = fs->fs_active; 828 memcpy(fs, newfs, (u_int)fs_sbsize); 829 kmem_free(newfs, fs_sbsize); 830 831 /* 832 * Recheck for Apple UFS filesystem. 833 */ 834 ump->um_flags &= ~UFS_ISAPPLEUFS; 835 if (ffs_is_appleufs(devvp, fs)) { 836 #ifdef APPLE_UFS 837 ump->um_flags |= UFS_ISAPPLEUFS; 838 #else 839 DPRINTF("AppleUFS not supported"); 840 return (EIO); /* XXX: really? */ 841 #endif 842 } 843 844 if (UFS_MPISAPPLEUFS(ump)) { 845 /* see comment about NeXT below */ 846 ump->um_maxsymlinklen = APPLEUFS_MAXSYMLINKLEN; 847 ump->um_dirblksiz = APPLEUFS_DIRBLKSIZ; 848 mp->mnt_iflag |= IMNT_DTYPE; 849 } else { 850 ump->um_maxsymlinklen = fs->fs_maxsymlinklen; 851 ump->um_dirblksiz = UFS_DIRBLKSIZ; 852 if (ump->um_maxsymlinklen > 0) 853 mp->mnt_iflag |= IMNT_DTYPE; 854 else 855 mp->mnt_iflag &= ~IMNT_DTYPE; 856 } 857 ffs_oldfscompat_read(fs, ump, sblockloc); 858 859 mutex_enter(&ump->um_lock); 860 ump->um_maxfilesize = fs->fs_maxfilesize; 861 if (fs->fs_flags & ~(FS_KNOWN_FLAGS | FS_INTERNAL)) { 862 uprintf("%s: unknown ufs flags: 0x%08"PRIx32"%s\n", 863 mp->mnt_stat.f_mntonname, fs->fs_flags, 864 (mp->mnt_flag & MNT_FORCE) ? "" : ", not mounting"); 865 if ((mp->mnt_flag & MNT_FORCE) == 0) { 866 mutex_exit(&ump->um_lock); 867 return (EINVAL); 868 } 869 } 870 if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) { 871 fs->fs_pendingblocks = 0; 872 fs->fs_pendinginodes = 0; 873 } 874 mutex_exit(&ump->um_lock); 875 876 ffs_statvfs(mp, &mp->mnt_stat); 877 /* 878 * Step 3: re-read summary information from disk. 879 */ 880 blks = howmany(fs->fs_cssize, fs->fs_fsize); 881 space = fs->fs_csp; 882 for (i = 0; i < blks; i += fs->fs_frag) { 883 bsize = fs->fs_bsize; 884 if (i + fs->fs_frag > blks) 885 bsize = (blks - i) * fs->fs_fsize; 886 error = bread(devvp, FFS_FSBTODB(fs, fs->fs_csaddr + i), bsize, 887 0, &bp); 888 if (error) { 889 return (error); 890 } 891 #ifdef FFS_EI 892 if (UFS_FSNEEDSWAP(fs)) 893 ffs_csum_swap((struct csum *)bp->b_data, 894 (struct csum *)space, bsize); 895 else 896 #endif 897 memcpy(space, bp->b_data, (size_t)bsize); 898 space = (char *)space + bsize; 899 brelse(bp, 0); 900 } 901 /* 902 * We no longer know anything about clusters per cylinder group. 903 */ 904 if (fs->fs_contigsumsize > 0) { 905 lp = fs->fs_maxcluster; 906 for (i = 0; i < fs->fs_ncg; i++) 907 *lp++ = fs->fs_contigsumsize; 908 } 909 910 vfs_vnode_iterator_init(mp, &marker); 911 while ((vp = vfs_vnode_iterator_next(marker, NULL, NULL))) { 912 /* 913 * Step 4: invalidate all inactive vnodes. 914 */ 915 if (vrecycle(vp)) 916 continue; 917 /* 918 * Step 5: invalidate all cached file data. 919 */ 920 if (vn_lock(vp, LK_EXCLUSIVE)) { 921 vrele(vp); 922 continue; 923 } 924 if (vinvalbuf(vp, 0, cred, l, 0, 0)) 925 panic("ffs_reload: dirty2"); 926 /* 927 * Step 6: re-read inode data for all active vnodes. 928 */ 929 ip = VTOI(vp); 930 error = bread(devvp, FFS_FSBTODB(fs, ino_to_fsba(fs, ip->i_number)), 931 (int)fs->fs_bsize, 0, &bp); 932 if (error) { 933 vput(vp); 934 break; 935 } 936 ffs_load_inode(bp, ip, fs, ip->i_number); 937 brelse(bp, 0); 938 vput(vp); 939 } 940 vfs_vnode_iterator_destroy(marker); 941 return (error); 942 } 943 944 /* 945 * Possible superblock locations ordered from most to least likely. 946 */ 947 static const int sblock_try[] = SBLOCKSEARCH; 948 949 950 static int 951 ffs_superblock_validate(struct fs *fs) 952 { 953 int32_t i, fs_bshift = 0, fs_fshift = 0, fs_fragshift = 0, fs_frag; 954 int32_t fs_inopb; 955 956 /* Check the superblock size */ 957 if (fs->fs_sbsize > SBLOCKSIZE || fs->fs_sbsize < sizeof(struct fs)) 958 return 0; 959 960 /* Check the file system blocksize */ 961 if (fs->fs_bsize > MAXBSIZE || fs->fs_bsize < MINBSIZE) 962 return 0; 963 if (!powerof2(fs->fs_bsize)) 964 return 0; 965 966 /* Check the size of frag blocks */ 967 if (!powerof2(fs->fs_fsize)) 968 return 0; 969 if (fs->fs_fsize == 0) 970 return 0; 971 972 /* 973 * XXX: these values are just zero-checked to prevent obvious 974 * bugs. We need more strict checks. 975 */ 976 if (fs->fs_size == 0) 977 return 0; 978 if (fs->fs_cssize == 0) 979 return 0; 980 if (fs->fs_ipg == 0) 981 return 0; 982 if (fs->fs_fpg == 0) 983 return 0; 984 if (fs->fs_ncg == 0) 985 return 0; 986 if (fs->fs_maxbpg == 0) 987 return 0; 988 989 /* Check the number of inodes per block */ 990 if (fs->fs_magic == FS_UFS1_MAGIC) 991 fs_inopb = fs->fs_bsize / sizeof(struct ufs1_dinode); 992 else /* fs->fs_magic == FS_UFS2_MAGIC */ 993 fs_inopb = fs->fs_bsize / sizeof(struct ufs2_dinode); 994 if (fs->fs_inopb != fs_inopb) 995 return 0; 996 997 /* Block size cannot be smaller than fragment size */ 998 if (fs->fs_bsize < fs->fs_fsize) 999 return 0; 1000 1001 /* Compute fs_bshift and ensure it is consistent */ 1002 for (i = fs->fs_bsize; i > 1; i >>= 1) 1003 fs_bshift++; 1004 if (fs->fs_bshift != fs_bshift) 1005 return 0; 1006 1007 /* Compute fs_fshift and ensure it is consistent */ 1008 for (i = fs->fs_fsize; i > 1; i >>= 1) 1009 fs_fshift++; 1010 if (fs->fs_fshift != fs_fshift) 1011 return 0; 1012 1013 /* Compute fs_fragshift and ensure it is consistent */ 1014 for (i = fs->fs_frag; i > 1; i >>= 1) 1015 fs_fragshift++; 1016 if (fs->fs_fragshift != fs_fragshift) 1017 return 0; 1018 1019 /* Check the masks */ 1020 if (fs->fs_bmask != ~(fs->fs_bsize - 1)) 1021 return 0; 1022 if (fs->fs_fmask != ~(fs->fs_fsize - 1)) 1023 return 0; 1024 1025 /* 1026 * Now that the shifts and masks are sanitized, we can use the ffs_ API. 1027 */ 1028 1029 /* Check the number of frag blocks */ 1030 if ((fs_frag = ffs_numfrags(fs, fs->fs_bsize)) > MAXFRAG) 1031 return 0; 1032 if (fs->fs_frag != fs_frag) 1033 return 0; 1034 1035 /* Check the size of cylinder groups */ 1036 if ((fs->fs_cgsize < sizeof(struct cg)) || 1037 (fs->fs_cgsize > fs->fs_bsize)) 1038 return 0; 1039 1040 return 1; 1041 } 1042 1043 static int 1044 ffs_is_appleufs(struct vnode *devvp, struct fs *fs) 1045 { 1046 struct dkwedge_info dkw; 1047 int ret = 0; 1048 1049 /* 1050 * First check to see if this is tagged as an Apple UFS filesystem 1051 * in the disklabel. 1052 */ 1053 if (getdiskinfo(devvp, &dkw) == 0 && 1054 strcmp(dkw.dkw_ptype, DKW_PTYPE_APPLEUFS) == 0) 1055 ret = 1; 1056 #ifdef APPLE_UFS 1057 else { 1058 struct appleufslabel *applefs; 1059 struct buf *bp; 1060 daddr_t blkno = APPLEUFS_LABEL_OFFSET / DEV_BSIZE; 1061 int error; 1062 1063 /* 1064 * Manually look for an Apple UFS label, and if a valid one 1065 * is found, then treat it like an Apple UFS filesystem anyway. 1066 */ 1067 error = bread(devvp, blkno, APPLEUFS_LABEL_SIZE, 0, &bp); 1068 if (error) { 1069 DPRINTF("bread@0x%jx returned %d", (intmax_t)blkno, error); 1070 return 0; 1071 } 1072 applefs = (struct appleufslabel *)bp->b_data; 1073 error = ffs_appleufs_validate(fs->fs_fsmnt, applefs, NULL); 1074 if (error == 0) 1075 ret = 1; 1076 brelse(bp, 0); 1077 } 1078 #endif 1079 1080 return ret; 1081 } 1082 1083 /* 1084 * Common code for mount and mountroot 1085 */ 1086 int 1087 ffs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l) 1088 { 1089 struct ufsmount *ump = NULL; 1090 struct buf *bp = NULL; 1091 struct fs *fs = NULL; 1092 dev_t dev; 1093 void *space; 1094 daddr_t sblockloc = 0; 1095 int blks, fstype = 0; 1096 int error, i, bsize, ronly, bset = 0; 1097 #ifdef FFS_EI 1098 int needswap = 0; /* keep gcc happy */ 1099 #endif 1100 int32_t *lp; 1101 kauth_cred_t cred; 1102 u_int32_t allocsbsize, fs_sbsize = 0; 1103 1104 dev = devvp->v_rdev; 1105 cred = l ? l->l_cred : NOCRED; 1106 1107 /* Flush out any old buffers remaining from a previous use. */ 1108 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 1109 error = vinvalbuf(devvp, V_SAVE, cred, l, 0, 0); 1110 VOP_UNLOCK(devvp); 1111 if (error) { 1112 DPRINTF("vinvalbuf returned %d", error); 1113 return error; 1114 } 1115 1116 ronly = (mp->mnt_flag & MNT_RDONLY) != 0; 1117 1118 error = fstrans_mount(mp); 1119 if (error) { 1120 DPRINTF("fstrans_mount returned %d", error); 1121 return error; 1122 } 1123 1124 ump = kmem_zalloc(sizeof(*ump), KM_SLEEP); 1125 mutex_init(&ump->um_lock, MUTEX_DEFAULT, IPL_NONE); 1126 error = ffs_snapshot_init(ump); 1127 if (error) { 1128 DPRINTF("ffs_snapshot_init returned %d", error); 1129 goto out; 1130 } 1131 ump->um_ops = &ffs_ufsops; 1132 1133 #ifdef WAPBL 1134 sbagain: 1135 #endif 1136 /* 1137 * Try reading the superblock in each of its possible locations. 1138 */ 1139 for (i = 0; ; i++) { 1140 daddr_t fs_sblockloc; 1141 1142 if (bp != NULL) { 1143 brelse(bp, BC_NOCACHE); 1144 bp = NULL; 1145 } 1146 if (sblock_try[i] == -1) { 1147 DPRINTF("no superblock found"); 1148 error = EINVAL; 1149 fs = NULL; 1150 goto out; 1151 } 1152 1153 error = bread(devvp, sblock_try[i] / DEV_BSIZE, SBLOCKSIZE, 1154 0, &bp); 1155 if (error) { 1156 DPRINTF("bread@0x%x returned %d", 1157 sblock_try[i] / DEV_BSIZE, error); 1158 fs = NULL; 1159 goto out; 1160 } 1161 fs = (struct fs *)bp->b_data; 1162 1163 sblockloc = sblock_try[i]; 1164 DPRINTF("fs_magic 0x%x", fs->fs_magic); 1165 1166 /* 1167 * Swap: here, we swap fs->fs_sbsize in order to get the correct 1168 * size to read the superblock. Once read, we swap the whole 1169 * superblock structure. 1170 */ 1171 if (fs->fs_magic == FS_UFS1_MAGIC) { 1172 fs_sbsize = fs->fs_sbsize; 1173 fstype = UFS1; 1174 #ifdef FFS_EI 1175 needswap = 0; 1176 } else if (fs->fs_magic == FS_UFS1_MAGIC_SWAPPED) { 1177 fs_sbsize = bswap32(fs->fs_sbsize); 1178 fstype = UFS1; 1179 needswap = 1; 1180 #endif 1181 } else if (fs->fs_magic == FS_UFS2_MAGIC) { 1182 fs_sbsize = fs->fs_sbsize; 1183 fstype = UFS2; 1184 #ifdef FFS_EI 1185 needswap = 0; 1186 } else if (fs->fs_magic == FS_UFS2_MAGIC_SWAPPED) { 1187 fs_sbsize = bswap32(fs->fs_sbsize); 1188 fstype = UFS2; 1189 needswap = 1; 1190 #endif 1191 } else 1192 continue; 1193 1194 /* fs->fs_sblockloc isn't defined for old filesystems */ 1195 if (fstype == UFS1 && !(fs->fs_old_flags & FS_FLAGS_UPDATED)) { 1196 if (sblockloc == SBLOCK_UFS2) 1197 /* 1198 * This is likely to be the first alternate 1199 * in a filesystem with 64k blocks. 1200 * Don't use it. 1201 */ 1202 continue; 1203 fs_sblockloc = sblockloc; 1204 } else { 1205 fs_sblockloc = fs->fs_sblockloc; 1206 #ifdef FFS_EI 1207 if (needswap) 1208 fs_sblockloc = bswap64(fs_sblockloc); 1209 #endif 1210 } 1211 1212 /* Check we haven't found an alternate superblock */ 1213 if (fs_sblockloc != sblockloc) 1214 continue; 1215 1216 /* Check the superblock size */ 1217 if (fs_sbsize > SBLOCKSIZE || fs_sbsize < sizeof(struct fs)) 1218 continue; 1219 fs = kmem_alloc((u_long)fs_sbsize, KM_SLEEP); 1220 memcpy(fs, bp->b_data, fs_sbsize); 1221 1222 /* Swap the whole superblock structure, if necessary. */ 1223 #ifdef FFS_EI 1224 if (needswap) { 1225 ffs_sb_swap((struct fs*)bp->b_data, fs); 1226 fs->fs_flags |= FS_SWAPPED; 1227 } else 1228 #endif 1229 fs->fs_flags &= ~FS_SWAPPED; 1230 1231 /* 1232 * Now that everything is swapped, the superblock is ready to 1233 * be sanitized. 1234 */ 1235 if (!ffs_superblock_validate(fs)) { 1236 kmem_free(fs, fs_sbsize); 1237 continue; 1238 } 1239 1240 /* Ok seems to be a good superblock */ 1241 break; 1242 } 1243 1244 ump->um_fs = fs; 1245 1246 #ifdef WAPBL 1247 if ((mp->mnt_wapbl_replay == 0) && (fs->fs_flags & FS_DOWAPBL)) { 1248 error = ffs_wapbl_replay_start(mp, fs, devvp); 1249 if (error && (mp->mnt_flag & MNT_FORCE) == 0) { 1250 DPRINTF("ffs_wapbl_replay_start returned %d", error); 1251 goto out; 1252 } 1253 if (!error) { 1254 if (!ronly) { 1255 /* XXX fsmnt may be stale. */ 1256 printf("%s: replaying log to disk\n", 1257 fs->fs_fsmnt); 1258 error = wapbl_replay_write(mp->mnt_wapbl_replay, 1259 devvp); 1260 if (error) { 1261 DPRINTF("wapbl_replay_write returned %d", 1262 error); 1263 goto out; 1264 } 1265 wapbl_replay_stop(mp->mnt_wapbl_replay); 1266 fs->fs_clean = FS_WASCLEAN; 1267 } else { 1268 /* XXX fsmnt may be stale */ 1269 printf("%s: replaying log to memory\n", 1270 fs->fs_fsmnt); 1271 } 1272 1273 /* Force a re-read of the superblock */ 1274 brelse(bp, BC_INVAL); 1275 bp = NULL; 1276 kmem_free(fs, fs_sbsize); 1277 fs = NULL; 1278 goto sbagain; 1279 } 1280 } 1281 #else /* !WAPBL */ 1282 if ((fs->fs_flags & FS_DOWAPBL) && (mp->mnt_flag & MNT_FORCE) == 0) { 1283 error = EPERM; 1284 DPRINTF("no force %d", error); 1285 goto out; 1286 } 1287 #endif /* !WAPBL */ 1288 1289 ffs_oldfscompat_read(fs, ump, sblockloc); 1290 ump->um_maxfilesize = fs->fs_maxfilesize; 1291 1292 if (fs->fs_flags & ~(FS_KNOWN_FLAGS | FS_INTERNAL)) { 1293 uprintf("%s: unknown ufs flags: 0x%08"PRIx32"%s\n", 1294 mp->mnt_stat.f_mntonname, fs->fs_flags, 1295 (mp->mnt_flag & MNT_FORCE) ? "" : ", not mounting"); 1296 if ((mp->mnt_flag & MNT_FORCE) == 0) { 1297 error = EINVAL; 1298 DPRINTF("no force %d", error); 1299 goto out; 1300 } 1301 } 1302 1303 if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) { 1304 fs->fs_pendingblocks = 0; 1305 fs->fs_pendinginodes = 0; 1306 } 1307 1308 ump->um_fstype = fstype; 1309 if (fs->fs_sbsize < SBLOCKSIZE) 1310 brelse(bp, BC_INVAL); 1311 else 1312 brelse(bp, 0); 1313 bp = NULL; 1314 1315 if (ffs_is_appleufs(devvp, fs)) { 1316 #ifdef APPLE_UFS 1317 ump->um_flags |= UFS_ISAPPLEUFS; 1318 #else 1319 DPRINTF("AppleUFS not supported"); 1320 error = EINVAL; 1321 goto out; 1322 #endif 1323 } 1324 1325 #if 0 1326 /* 1327 * XXX This code changes the behaviour of mounting dirty filesystems, to 1328 * XXX require "mount -f ..." to mount them. This doesn't match what 1329 * XXX mount(8) describes and is disabled for now. 1330 */ 1331 /* 1332 * If the file system is not clean, don't allow it to be mounted 1333 * unless MNT_FORCE is specified. (Note: MNT_FORCE is always set 1334 * for the root file system.) 1335 */ 1336 if (fs->fs_flags & FS_DOWAPBL) { 1337 /* 1338 * wapbl normally expects to be FS_WASCLEAN when the FS_DOWAPBL 1339 * bit is set, although there's a window in unmount where it 1340 * could be FS_ISCLEAN 1341 */ 1342 if ((mp->mnt_flag & MNT_FORCE) == 0 && 1343 (fs->fs_clean & (FS_WASCLEAN | FS_ISCLEAN)) == 0) { 1344 error = EPERM; 1345 goto out; 1346 } 1347 } else 1348 if ((fs->fs_clean & FS_ISCLEAN) == 0 && 1349 (mp->mnt_flag & MNT_FORCE) == 0) { 1350 error = EPERM; 1351 goto out; 1352 } 1353 #endif 1354 1355 /* 1356 * Verify that we can access the last block in the fs 1357 * if we're mounting read/write. 1358 */ 1359 if (!ronly) { 1360 error = bread(devvp, FFS_FSBTODB(fs, fs->fs_size - 1), 1361 fs->fs_fsize, 0, &bp); 1362 if (error) { 1363 DPRINTF("bread@0x%jx returned %d", 1364 (intmax_t)FFS_FSBTODB(fs, fs->fs_size - 1), 1365 error); 1366 bset = BC_INVAL; 1367 goto out; 1368 } 1369 if (bp->b_bcount != fs->fs_fsize) { 1370 DPRINTF("bcount %x != fsize %x", bp->b_bcount, 1371 fs->fs_fsize); 1372 error = EINVAL; 1373 bset = BC_INVAL; 1374 goto out; 1375 } 1376 brelse(bp, BC_INVAL); 1377 bp = NULL; 1378 } 1379 1380 fs->fs_ronly = ronly; 1381 /* Don't bump fs_clean if we're replaying journal */ 1382 if (!((fs->fs_flags & FS_DOWAPBL) && (fs->fs_clean & FS_WASCLEAN))) { 1383 if (ronly == 0) { 1384 fs->fs_clean <<= 1; 1385 fs->fs_fmod = 1; 1386 } 1387 } 1388 1389 bsize = fs->fs_cssize; 1390 blks = howmany(bsize, fs->fs_fsize); 1391 if (fs->fs_contigsumsize > 0) 1392 bsize += fs->fs_ncg * sizeof(int32_t); 1393 bsize += fs->fs_ncg * sizeof(*fs->fs_contigdirs); 1394 allocsbsize = bsize; 1395 space = kmem_alloc((u_long)allocsbsize, KM_SLEEP); 1396 fs->fs_csp = space; 1397 1398 for (i = 0; i < blks; i += fs->fs_frag) { 1399 bsize = fs->fs_bsize; 1400 if (i + fs->fs_frag > blks) 1401 bsize = (blks - i) * fs->fs_fsize; 1402 error = bread(devvp, FFS_FSBTODB(fs, fs->fs_csaddr + i), bsize, 1403 0, &bp); 1404 if (error) { 1405 DPRINTF("bread@0x%jx %d", 1406 (intmax_t)FFS_FSBTODB(fs, fs->fs_csaddr + i), 1407 error); 1408 goto out1; 1409 } 1410 #ifdef FFS_EI 1411 if (needswap) 1412 ffs_csum_swap((struct csum *)bp->b_data, 1413 (struct csum *)space, bsize); 1414 else 1415 #endif 1416 memcpy(space, bp->b_data, (u_int)bsize); 1417 1418 space = (char *)space + bsize; 1419 brelse(bp, 0); 1420 bp = NULL; 1421 } 1422 if (fs->fs_contigsumsize > 0) { 1423 fs->fs_maxcluster = lp = space; 1424 for (i = 0; i < fs->fs_ncg; i++) 1425 *lp++ = fs->fs_contigsumsize; 1426 space = lp; 1427 } 1428 bsize = fs->fs_ncg * sizeof(*fs->fs_contigdirs); 1429 fs->fs_contigdirs = space; 1430 space = (char *)space + bsize; 1431 memset(fs->fs_contigdirs, 0, bsize); 1432 1433 /* Compatibility for old filesystems - XXX */ 1434 if (fs->fs_avgfilesize <= 0) 1435 fs->fs_avgfilesize = AVFILESIZ; 1436 if (fs->fs_avgfpdir <= 0) 1437 fs->fs_avgfpdir = AFPDIR; 1438 fs->fs_active = NULL; 1439 1440 mp->mnt_data = ump; 1441 mp->mnt_stat.f_fsidx.__fsid_val[0] = (long)dev; 1442 mp->mnt_stat.f_fsidx.__fsid_val[1] = makefstype(MOUNT_FFS); 1443 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; 1444 mp->mnt_stat.f_namemax = FFS_MAXNAMLEN; 1445 if (UFS_MPISAPPLEUFS(ump)) { 1446 /* NeXT used to keep short symlinks in the inode even 1447 * when using FS_42INODEFMT. In that case fs->fs_maxsymlinklen 1448 * is probably -1, but we still need to be able to identify 1449 * short symlinks. 1450 */ 1451 ump->um_maxsymlinklen = APPLEUFS_MAXSYMLINKLEN; 1452 ump->um_dirblksiz = APPLEUFS_DIRBLKSIZ; 1453 mp->mnt_iflag |= IMNT_DTYPE; 1454 } else { 1455 ump->um_maxsymlinklen = fs->fs_maxsymlinklen; 1456 ump->um_dirblksiz = UFS_DIRBLKSIZ; 1457 if (ump->um_maxsymlinklen > 0) 1458 mp->mnt_iflag |= IMNT_DTYPE; 1459 else 1460 mp->mnt_iflag &= ~IMNT_DTYPE; 1461 } 1462 mp->mnt_fs_bshift = fs->fs_bshift; 1463 mp->mnt_dev_bshift = DEV_BSHIFT; /* XXX */ 1464 mp->mnt_flag |= MNT_LOCAL; 1465 mp->mnt_iflag |= IMNT_MPSAFE; 1466 #ifdef FFS_EI 1467 if (needswap) 1468 ump->um_flags |= UFS_NEEDSWAP; 1469 #endif 1470 ump->um_mountp = mp; 1471 ump->um_dev = dev; 1472 ump->um_devvp = devvp; 1473 ump->um_nindir = fs->fs_nindir; 1474 ump->um_lognindir = ffs(fs->fs_nindir) - 1; 1475 ump->um_bptrtodb = fs->fs_fshift - DEV_BSHIFT; 1476 ump->um_seqinc = fs->fs_frag; 1477 for (i = 0; i < MAXQUOTAS; i++) 1478 ump->um_quotas[i] = NULLVP; 1479 spec_node_setmountedfs(devvp, mp); 1480 if (ronly == 0 && fs->fs_snapinum[0] != 0) 1481 ffs_snapshot_mount(mp); 1482 #ifdef WAPBL 1483 if (!ronly) { 1484 KDASSERT(fs->fs_ronly == 0); 1485 /* 1486 * ffs_wapbl_start() needs mp->mnt_stat initialised if it 1487 * needs to create a new log file in-filesystem. 1488 */ 1489 error = ffs_statvfs(mp, &mp->mnt_stat); 1490 if (error) { 1491 DPRINTF("ffs_statvfs returned %d", error); 1492 goto out1; 1493 } 1494 1495 error = ffs_wapbl_start(mp); 1496 if (error) { 1497 DPRINTF("ffs_wapbl_start returned %d", error); 1498 goto out1; 1499 } 1500 } 1501 #endif /* WAPBL */ 1502 if (ronly == 0) { 1503 #ifdef QUOTA2 1504 error = ffs_quota2_mount(mp); 1505 if (error) { 1506 DPRINTF("ffs_quota2_mount returned %d", error); 1507 goto out1; 1508 } 1509 #else 1510 if (fs->fs_flags & FS_DOQUOTA2) { 1511 ump->um_flags |= UFS_QUOTA2; 1512 uprintf("%s: options QUOTA2 not enabled%s\n", 1513 mp->mnt_stat.f_mntonname, 1514 (mp->mnt_flag & MNT_FORCE) ? "" : ", not mounting"); 1515 if ((mp->mnt_flag & MNT_FORCE) == 0) { 1516 error = EINVAL; 1517 DPRINTF("quota disabled %d", error); 1518 goto out1; 1519 } 1520 } 1521 #endif 1522 } 1523 1524 if (mp->mnt_flag & MNT_DISCARD) 1525 ump->um_discarddata = ffs_discard_init(devvp, fs); 1526 1527 return (0); 1528 out1: 1529 kmem_free(fs->fs_csp, allocsbsize); 1530 out: 1531 #ifdef WAPBL 1532 if (mp->mnt_wapbl_replay) { 1533 wapbl_replay_stop(mp->mnt_wapbl_replay); 1534 wapbl_replay_free(mp->mnt_wapbl_replay); 1535 mp->mnt_wapbl_replay = 0; 1536 } 1537 #endif 1538 1539 fstrans_unmount(mp); 1540 if (fs) 1541 kmem_free(fs, fs->fs_sbsize); 1542 spec_node_setmountedfs(devvp, NULL); 1543 if (bp) 1544 brelse(bp, bset); 1545 if (ump) { 1546 if (ump->um_oldfscompat) 1547 kmem_free(ump->um_oldfscompat, 512 + 3*sizeof(int32_t)); 1548 mutex_destroy(&ump->um_lock); 1549 kmem_free(ump, sizeof(*ump)); 1550 mp->mnt_data = NULL; 1551 } 1552 return (error); 1553 } 1554 1555 /* 1556 * Sanity checks for loading old filesystem superblocks. 1557 * See ffs_oldfscompat_write below for unwound actions. 1558 * 1559 * XXX - Parts get retired eventually. 1560 * Unfortunately new bits get added. 1561 */ 1562 static void 1563 ffs_oldfscompat_read(struct fs *fs, struct ufsmount *ump, daddr_t sblockloc) 1564 { 1565 off_t maxfilesize; 1566 int32_t *extrasave; 1567 1568 if ((fs->fs_magic != FS_UFS1_MAGIC) || 1569 (fs->fs_old_flags & FS_FLAGS_UPDATED)) 1570 return; 1571 1572 if (!ump->um_oldfscompat) 1573 ump->um_oldfscompat = kmem_alloc(512 + 3*sizeof(int32_t), 1574 KM_SLEEP); 1575 1576 memcpy(ump->um_oldfscompat, &fs->fs_old_postbl_start, 512); 1577 extrasave = ump->um_oldfscompat; 1578 extrasave += 512/sizeof(int32_t); 1579 extrasave[0] = fs->fs_old_npsect; 1580 extrasave[1] = fs->fs_old_interleave; 1581 extrasave[2] = fs->fs_old_trackskew; 1582 1583 /* These fields will be overwritten by their 1584 * original values in fs_oldfscompat_write, so it is harmless 1585 * to modify them here. 1586 */ 1587 fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir; 1588 fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree; 1589 fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree; 1590 fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree; 1591 1592 fs->fs_maxbsize = fs->fs_bsize; 1593 fs->fs_time = fs->fs_old_time; 1594 fs->fs_size = fs->fs_old_size; 1595 fs->fs_dsize = fs->fs_old_dsize; 1596 fs->fs_csaddr = fs->fs_old_csaddr; 1597 fs->fs_sblockloc = sblockloc; 1598 1599 fs->fs_flags = fs->fs_old_flags | (fs->fs_flags & FS_INTERNAL); 1600 1601 if (fs->fs_old_postblformat == FS_42POSTBLFMT) { 1602 fs->fs_old_nrpos = 8; 1603 fs->fs_old_npsect = fs->fs_old_nsect; 1604 fs->fs_old_interleave = 1; 1605 fs->fs_old_trackskew = 0; 1606 } 1607 1608 if (fs->fs_old_inodefmt < FS_44INODEFMT) { 1609 fs->fs_maxfilesize = (u_quad_t) 1LL << 39; 1610 fs->fs_qbmask = ~fs->fs_bmask; 1611 fs->fs_qfmask = ~fs->fs_fmask; 1612 } 1613 1614 maxfilesize = (u_int64_t)0x80000000 * fs->fs_bsize - 1; 1615 if (fs->fs_maxfilesize > maxfilesize) 1616 fs->fs_maxfilesize = maxfilesize; 1617 1618 /* Compatibility for old filesystems */ 1619 if (fs->fs_avgfilesize <= 0) 1620 fs->fs_avgfilesize = AVFILESIZ; 1621 if (fs->fs_avgfpdir <= 0) 1622 fs->fs_avgfpdir = AFPDIR; 1623 1624 #if 0 1625 if (bigcgs) { 1626 fs->fs_save_cgsize = fs->fs_cgsize; 1627 fs->fs_cgsize = fs->fs_bsize; 1628 } 1629 #endif 1630 } 1631 1632 /* 1633 * Unwinding superblock updates for old filesystems. 1634 * See ffs_oldfscompat_read above for details. 1635 * 1636 * XXX - Parts get retired eventually. 1637 * Unfortunately new bits get added. 1638 */ 1639 static void 1640 ffs_oldfscompat_write(struct fs *fs, struct ufsmount *ump) 1641 { 1642 int32_t *extrasave; 1643 1644 if ((fs->fs_magic != FS_UFS1_MAGIC) || 1645 (fs->fs_old_flags & FS_FLAGS_UPDATED)) 1646 return; 1647 1648 fs->fs_old_time = fs->fs_time; 1649 fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir; 1650 fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree; 1651 fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree; 1652 fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree; 1653 fs->fs_old_flags = fs->fs_flags; 1654 1655 #if 0 1656 if (bigcgs) { 1657 fs->fs_cgsize = fs->fs_save_cgsize; 1658 } 1659 #endif 1660 1661 memcpy(&fs->fs_old_postbl_start, ump->um_oldfscompat, 512); 1662 extrasave = ump->um_oldfscompat; 1663 extrasave += 512/sizeof(int32_t); 1664 fs->fs_old_npsect = extrasave[0]; 1665 fs->fs_old_interleave = extrasave[1]; 1666 fs->fs_old_trackskew = extrasave[2]; 1667 1668 } 1669 1670 /* 1671 * unmount vfs operation 1672 */ 1673 int 1674 ffs_unmount(struct mount *mp, int mntflags) 1675 { 1676 struct lwp *l = curlwp; 1677 struct ufsmount *ump = VFSTOUFS(mp); 1678 struct fs *fs = ump->um_fs; 1679 int error, flags; 1680 u_int32_t bsize; 1681 #ifdef WAPBL 1682 extern int doforce; 1683 #endif 1684 1685 if (ump->um_discarddata) { 1686 ffs_discard_finish(ump->um_discarddata, mntflags); 1687 ump->um_discarddata = NULL; 1688 } 1689 1690 flags = 0; 1691 if (mntflags & MNT_FORCE) 1692 flags |= FORCECLOSE; 1693 if ((error = ffs_flushfiles(mp, flags, l)) != 0) 1694 return (error); 1695 error = UFS_WAPBL_BEGIN(mp); 1696 if (error == 0) 1697 if (fs->fs_ronly == 0 && 1698 ffs_cgupdate(ump, MNT_WAIT) == 0 && 1699 fs->fs_clean & FS_WASCLEAN) { 1700 fs->fs_clean = FS_ISCLEAN; 1701 fs->fs_fmod = 0; 1702 (void) ffs_sbupdate(ump, MNT_WAIT); 1703 } 1704 if (error == 0) 1705 UFS_WAPBL_END(mp); 1706 #ifdef WAPBL 1707 KASSERT(!(mp->mnt_wapbl_replay && mp->mnt_wapbl)); 1708 if (mp->mnt_wapbl_replay) { 1709 KDASSERT(fs->fs_ronly); 1710 wapbl_replay_stop(mp->mnt_wapbl_replay); 1711 wapbl_replay_free(mp->mnt_wapbl_replay); 1712 mp->mnt_wapbl_replay = 0; 1713 } 1714 error = ffs_wapbl_stop(mp, doforce && (mntflags & MNT_FORCE)); 1715 if (error) { 1716 return error; 1717 } 1718 #endif /* WAPBL */ 1719 1720 if (ump->um_devvp->v_type != VBAD) 1721 spec_node_setmountedfs(ump->um_devvp, NULL); 1722 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); 1723 (void)VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD | FWRITE, 1724 NOCRED); 1725 vput(ump->um_devvp); 1726 1727 bsize = fs->fs_cssize; 1728 if (fs->fs_contigsumsize > 0) 1729 bsize += fs->fs_ncg * sizeof(int32_t); 1730 bsize += fs->fs_ncg * sizeof(*fs->fs_contigdirs); 1731 kmem_free(fs->fs_csp, bsize); 1732 1733 kmem_free(fs, fs->fs_sbsize); 1734 if (ump->um_oldfscompat != NULL) 1735 kmem_free(ump->um_oldfscompat, 512 + 3*sizeof(int32_t)); 1736 mutex_destroy(&ump->um_lock); 1737 ffs_snapshot_fini(ump); 1738 kmem_free(ump, sizeof(*ump)); 1739 mp->mnt_data = NULL; 1740 mp->mnt_flag &= ~MNT_LOCAL; 1741 fstrans_unmount(mp); 1742 return (0); 1743 } 1744 1745 /* 1746 * Flush out all the files in a filesystem. 1747 */ 1748 int 1749 ffs_flushfiles(struct mount *mp, int flags, struct lwp *l) 1750 { 1751 extern int doforce; 1752 struct ufsmount *ump; 1753 int error; 1754 1755 if (!doforce) 1756 flags &= ~FORCECLOSE; 1757 ump = VFSTOUFS(mp); 1758 #ifdef QUOTA 1759 if ((error = quota1_umount(mp, flags)) != 0) 1760 return (error); 1761 #endif 1762 #ifdef QUOTA2 1763 if ((error = quota2_umount(mp, flags)) != 0) 1764 return (error); 1765 #endif 1766 #ifdef UFS_EXTATTR 1767 if (ump->um_fstype == UFS1) { 1768 if (ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED) 1769 ufs_extattr_stop(mp, l); 1770 if (ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_INITIALIZED) 1771 ufs_extattr_uepm_destroy(&ump->um_extattr); 1772 mp->mnt_flag &= ~MNT_EXTATTR; 1773 } 1774 #endif 1775 if ((error = vflush(mp, 0, SKIPSYSTEM | flags)) != 0) 1776 return (error); 1777 ffs_snapshot_unmount(mp); 1778 /* 1779 * Flush all the files. 1780 */ 1781 error = vflush(mp, NULLVP, flags); 1782 if (error) 1783 return (error); 1784 /* 1785 * Flush filesystem metadata. 1786 */ 1787 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); 1788 error = VOP_FSYNC(ump->um_devvp, l->l_cred, FSYNC_WAIT, 0, 0); 1789 VOP_UNLOCK(ump->um_devvp); 1790 if (flags & FORCECLOSE) /* XXXDBJ */ 1791 error = 0; 1792 1793 #ifdef WAPBL 1794 if (error) 1795 return error; 1796 if (mp->mnt_wapbl) { 1797 error = wapbl_flush(mp->mnt_wapbl, 1); 1798 if (flags & FORCECLOSE) 1799 error = 0; 1800 } 1801 #endif 1802 1803 return (error); 1804 } 1805 1806 /* 1807 * Get file system statistics. 1808 */ 1809 int 1810 ffs_statvfs(struct mount *mp, struct statvfs *sbp) 1811 { 1812 struct ufsmount *ump; 1813 struct fs *fs; 1814 1815 ump = VFSTOUFS(mp); 1816 fs = ump->um_fs; 1817 mutex_enter(&ump->um_lock); 1818 sbp->f_bsize = fs->fs_bsize; 1819 sbp->f_frsize = fs->fs_fsize; 1820 sbp->f_iosize = fs->fs_bsize; 1821 sbp->f_blocks = fs->fs_dsize; 1822 sbp->f_bfree = ffs_blkstofrags(fs, fs->fs_cstotal.cs_nbfree) + 1823 fs->fs_cstotal.cs_nffree + FFS_DBTOFSB(fs, fs->fs_pendingblocks); 1824 sbp->f_bresvd = ((u_int64_t) fs->fs_dsize * (u_int64_t) 1825 fs->fs_minfree) / (u_int64_t) 100; 1826 if (sbp->f_bfree > sbp->f_bresvd) 1827 sbp->f_bavail = sbp->f_bfree - sbp->f_bresvd; 1828 else 1829 sbp->f_bavail = 0; 1830 sbp->f_files = fs->fs_ncg * fs->fs_ipg - UFS_ROOTINO; 1831 sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes; 1832 sbp->f_favail = sbp->f_ffree; 1833 sbp->f_fresvd = 0; 1834 mutex_exit(&ump->um_lock); 1835 copy_statvfs_info(sbp, mp); 1836 1837 return (0); 1838 } 1839 1840 struct ffs_sync_ctx { 1841 int waitfor; 1842 bool is_suspending; 1843 }; 1844 1845 static bool 1846 ffs_sync_selector(void *cl, struct vnode *vp) 1847 { 1848 struct ffs_sync_ctx *c = cl; 1849 struct inode *ip; 1850 1851 ip = VTOI(vp); 1852 /* 1853 * Skip the vnode/inode if inaccessible. 1854 */ 1855 if (ip == NULL || vp->v_type == VNON) 1856 return false; 1857 1858 /* 1859 * We deliberately update inode times here. This will 1860 * prevent a massive queue of updates accumulating, only 1861 * to be handled by a call to unmount. 1862 * 1863 * XXX It would be better to have the syncer trickle these 1864 * out. Adjustment needed to allow registering vnodes for 1865 * sync when the vnode is clean, but the inode dirty. Or 1866 * have ufs itself trickle out inode updates. 1867 * 1868 * If doing a lazy sync, we don't care about metadata or 1869 * data updates, because they are handled by each vnode's 1870 * synclist entry. In this case we are only interested in 1871 * writing back modified inodes. 1872 */ 1873 if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE | 1874 IN_MODIFY | IN_MODIFIED | IN_ACCESSED)) == 0 && 1875 (c->waitfor == MNT_LAZY || (LIST_EMPTY(&vp->v_dirtyblkhd) && 1876 UVM_OBJ_IS_CLEAN(&vp->v_uobj)))) 1877 return false; 1878 1879 if (vp->v_type == VBLK && c->is_suspending) 1880 return false; 1881 1882 return true; 1883 } 1884 1885 /* 1886 * Go through the disk queues to initiate sandbagged IO; 1887 * go through the inodes to write those that have been modified; 1888 * initiate the writing of the super block if it has been modified. 1889 * 1890 * Note: we are always called with the filesystem marked `MPBUSY'. 1891 */ 1892 int 1893 ffs_sync(struct mount *mp, int waitfor, kauth_cred_t cred) 1894 { 1895 struct vnode *vp; 1896 struct ufsmount *ump = VFSTOUFS(mp); 1897 struct fs *fs; 1898 struct vnode_iterator *marker; 1899 int error, allerror = 0; 1900 bool is_suspending; 1901 struct ffs_sync_ctx ctx; 1902 1903 fs = ump->um_fs; 1904 if (fs->fs_fmod != 0 && fs->fs_ronly != 0) { /* XXX */ 1905 printf("fs = %s\n", fs->fs_fsmnt); 1906 panic("update: rofs mod"); 1907 } 1908 1909 fstrans_start(mp, FSTRANS_SHARED); 1910 is_suspending = (fstrans_getstate(mp) == FSTRANS_SUSPENDING); 1911 /* 1912 * Write back each (modified) inode. 1913 */ 1914 vfs_vnode_iterator_init(mp, &marker); 1915 1916 ctx.waitfor = waitfor; 1917 ctx.is_suspending = is_suspending; 1918 while ((vp = vfs_vnode_iterator_next(marker, ffs_sync_selector, &ctx))) 1919 { 1920 error = vn_lock(vp, LK_EXCLUSIVE); 1921 if (error) { 1922 vrele(vp); 1923 continue; 1924 } 1925 if (waitfor == MNT_LAZY) { 1926 error = UFS_WAPBL_BEGIN(vp->v_mount); 1927 if (!error) { 1928 error = ffs_update(vp, NULL, NULL, 1929 UPDATE_CLOSE); 1930 UFS_WAPBL_END(vp->v_mount); 1931 } 1932 } else { 1933 error = VOP_FSYNC(vp, cred, FSYNC_NOLOG | 1934 (waitfor == MNT_WAIT ? FSYNC_WAIT : 0), 0, 0); 1935 } 1936 if (error) 1937 allerror = error; 1938 vput(vp); 1939 } 1940 vfs_vnode_iterator_destroy(marker); 1941 1942 /* 1943 * Force stale file system control information to be flushed. 1944 */ 1945 if (waitfor != MNT_LAZY && (ump->um_devvp->v_numoutput > 0 || 1946 !LIST_EMPTY(&ump->um_devvp->v_dirtyblkhd))) { 1947 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); 1948 if ((error = VOP_FSYNC(ump->um_devvp, cred, 1949 (waitfor == MNT_WAIT ? FSYNC_WAIT : 0) | FSYNC_NOLOG, 1950 0, 0)) != 0) 1951 allerror = error; 1952 VOP_UNLOCK(ump->um_devvp); 1953 } 1954 #if defined(QUOTA) || defined(QUOTA2) 1955 qsync(mp); 1956 #endif 1957 /* 1958 * Write back modified superblock. 1959 */ 1960 if (fs->fs_fmod != 0) { 1961 fs->fs_fmod = 0; 1962 fs->fs_time = time_second; 1963 error = UFS_WAPBL_BEGIN(mp); 1964 if (error) 1965 allerror = error; 1966 else { 1967 if ((error = ffs_cgupdate(ump, waitfor))) 1968 allerror = error; 1969 UFS_WAPBL_END(mp); 1970 } 1971 } 1972 1973 #ifdef WAPBL 1974 if (mp->mnt_wapbl) { 1975 error = wapbl_flush(mp->mnt_wapbl, 0); 1976 if (error) 1977 allerror = error; 1978 } 1979 #endif 1980 1981 fstrans_done(mp); 1982 return (allerror); 1983 } 1984 1985 /* 1986 * Load inode from disk and initialize vnode. 1987 */ 1988 static int 1989 ffs_init_vnode(struct ufsmount *ump, struct vnode *vp, ino_t ino) 1990 { 1991 struct fs *fs; 1992 struct inode *ip; 1993 struct buf *bp; 1994 int error; 1995 1996 fs = ump->um_fs; 1997 1998 /* Read in the disk contents for the inode. */ 1999 error = bread(ump->um_devvp, FFS_FSBTODB(fs, ino_to_fsba(fs, ino)), 2000 (int)fs->fs_bsize, 0, &bp); 2001 if (error) 2002 return error; 2003 2004 /* Allocate and initialize inode. */ 2005 ip = pool_cache_get(ffs_inode_cache, PR_WAITOK); 2006 memset(ip, 0, sizeof(struct inode)); 2007 ip->i_ump = ump; 2008 ip->i_fs = fs; 2009 ip->i_dev = ump->um_dev; 2010 ip->i_number = ino; 2011 if (ump->um_fstype == UFS1) 2012 ip->i_din.ffs1_din = pool_cache_get(ffs_dinode1_cache, 2013 PR_WAITOK); 2014 else 2015 ip->i_din.ffs2_din = pool_cache_get(ffs_dinode2_cache, 2016 PR_WAITOK); 2017 ffs_load_inode(bp, ip, fs, ino); 2018 brelse(bp, 0); 2019 ip->i_vnode = vp; 2020 #if defined(QUOTA) || defined(QUOTA2) 2021 ufsquota_init(ip); 2022 #endif 2023 2024 /* Initialise vnode with this inode. */ 2025 vp->v_tag = VT_UFS; 2026 vp->v_op = ffs_vnodeop_p; 2027 vp->v_vflag |= VV_LOCKSWORK; 2028 vp->v_data = ip; 2029 2030 /* Initialize genfs node. */ 2031 genfs_node_init(vp, &ffs_genfsops); 2032 2033 return 0; 2034 } 2035 2036 /* 2037 * Undo ffs_init_vnode(). 2038 */ 2039 static void 2040 ffs_deinit_vnode(struct ufsmount *ump, struct vnode *vp) 2041 { 2042 struct inode *ip = VTOI(vp); 2043 2044 if (ump->um_fstype == UFS1) 2045 pool_cache_put(ffs_dinode1_cache, ip->i_din.ffs1_din); 2046 else 2047 pool_cache_put(ffs_dinode2_cache, ip->i_din.ffs2_din); 2048 pool_cache_put(ffs_inode_cache, ip); 2049 2050 genfs_node_destroy(vp); 2051 vp->v_data = NULL; 2052 } 2053 2054 /* 2055 * Read an inode from disk and initialize this vnode / inode pair. 2056 * Caller assures no other thread will try to load this inode. 2057 */ 2058 int 2059 ffs_loadvnode(struct mount *mp, struct vnode *vp, 2060 const void *key, size_t key_len, const void **new_key) 2061 { 2062 ino_t ino; 2063 struct fs *fs; 2064 struct inode *ip; 2065 struct ufsmount *ump; 2066 int error; 2067 2068 KASSERT(key_len == sizeof(ino)); 2069 memcpy(&ino, key, key_len); 2070 ump = VFSTOUFS(mp); 2071 fs = ump->um_fs; 2072 2073 error = ffs_init_vnode(ump, vp, ino); 2074 if (error) 2075 return error; 2076 2077 ip = VTOI(vp); 2078 if (ip->i_mode == 0) { 2079 ffs_deinit_vnode(ump, vp); 2080 2081 return ENOENT; 2082 } 2083 2084 /* Initialize the vnode from the inode. */ 2085 ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp); 2086 2087 /* Finish inode initialization. */ 2088 ip->i_devvp = ump->um_devvp; 2089 vref(ip->i_devvp); 2090 2091 /* 2092 * Ensure that uid and gid are correct. This is a temporary 2093 * fix until fsck has been changed to do the update. 2094 */ 2095 2096 if (fs->fs_old_inodefmt < FS_44INODEFMT) { /* XXX */ 2097 ip->i_uid = ip->i_ffs1_ouid; /* XXX */ 2098 ip->i_gid = ip->i_ffs1_ogid; /* XXX */ 2099 } /* XXX */ 2100 uvm_vnp_setsize(vp, ip->i_size); 2101 *new_key = &ip->i_number; 2102 return 0; 2103 } 2104 2105 /* 2106 * Create a new inode on disk and initialize this vnode / inode pair. 2107 */ 2108 int 2109 ffs_newvnode(struct mount *mp, struct vnode *dvp, struct vnode *vp, 2110 struct vattr *vap, kauth_cred_t cred, 2111 size_t *key_len, const void **new_key) 2112 { 2113 ino_t ino; 2114 struct fs *fs; 2115 struct inode *ip; 2116 struct timespec ts; 2117 struct ufsmount *ump; 2118 int error, mode; 2119 2120 KASSERT(dvp->v_mount == mp); 2121 KASSERT(vap->va_type != VNON); 2122 2123 *key_len = sizeof(ino); 2124 ump = VFSTOUFS(mp); 2125 fs = ump->um_fs; 2126 mode = MAKEIMODE(vap->va_type, vap->va_mode); 2127 2128 /* Allocate fresh inode. */ 2129 error = ffs_valloc(dvp, mode, cred, &ino); 2130 if (error) 2131 return error; 2132 2133 /* Attach inode to vnode. */ 2134 error = ffs_init_vnode(ump, vp, ino); 2135 if (error) { 2136 if (UFS_WAPBL_BEGIN(mp) == 0) { 2137 ffs_vfree(dvp, ino, mode); 2138 UFS_WAPBL_END(mp); 2139 } 2140 return error; 2141 } 2142 2143 ip = VTOI(vp); 2144 if (ip->i_mode || DIP(ip, size) || DIP(ip, blocks)) { 2145 printf("free ino %" PRId64 " on %s:\n", ino, fs->fs_fsmnt); 2146 printf("dmode %x mode %x dgen %x gen %x\n", 2147 DIP(ip, mode), ip->i_mode, 2148 DIP(ip, gen), ip->i_gen); 2149 printf("size %" PRIx64 " blocks %" PRIx64 "\n", 2150 DIP(ip, size), DIP(ip, blocks)); 2151 panic("ffs_init_vnode: dup alloc"); 2152 } 2153 2154 /* Set uid / gid. */ 2155 if (cred == NOCRED || cred == FSCRED) { 2156 ip->i_gid = 0; 2157 ip->i_uid = 0; 2158 } else { 2159 ip->i_gid = VTOI(dvp)->i_gid; 2160 ip->i_uid = kauth_cred_geteuid(cred); 2161 } 2162 DIP_ASSIGN(ip, gid, ip->i_gid); 2163 DIP_ASSIGN(ip, uid, ip->i_uid); 2164 2165 #if defined(QUOTA) || defined(QUOTA2) 2166 error = UFS_WAPBL_BEGIN(mp); 2167 if (error) { 2168 ffs_deinit_vnode(ump, vp); 2169 2170 return error; 2171 } 2172 error = chkiq(ip, 1, cred, 0); 2173 if (error) { 2174 ffs_vfree(dvp, ino, mode); 2175 UFS_WAPBL_END(mp); 2176 ffs_deinit_vnode(ump, vp); 2177 2178 return error; 2179 } 2180 UFS_WAPBL_END(mp); 2181 #endif 2182 2183 /* Set type and finalize. */ 2184 ip->i_flags = 0; 2185 DIP_ASSIGN(ip, flags, 0); 2186 ip->i_mode = mode; 2187 DIP_ASSIGN(ip, mode, mode); 2188 if (vap->va_rdev != VNOVAL) { 2189 /* 2190 * Want to be able to use this to make badblock 2191 * inodes, so don't truncate the dev number. 2192 */ 2193 if (ump->um_fstype == UFS1) 2194 ip->i_ffs1_rdev = ufs_rw32(vap->va_rdev, 2195 UFS_MPNEEDSWAP(ump)); 2196 else 2197 ip->i_ffs2_rdev = ufs_rw64(vap->va_rdev, 2198 UFS_MPNEEDSWAP(ump)); 2199 } 2200 ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp); 2201 ip->i_devvp = ump->um_devvp; 2202 vref(ip->i_devvp); 2203 2204 /* Set up a new generation number for this inode. */ 2205 ip->i_gen++; 2206 DIP_ASSIGN(ip, gen, ip->i_gen); 2207 if (fs->fs_magic == FS_UFS2_MAGIC) { 2208 vfs_timestamp(&ts); 2209 ip->i_ffs2_birthtime = ts.tv_sec; 2210 ip->i_ffs2_birthnsec = ts.tv_nsec; 2211 } 2212 2213 uvm_vnp_setsize(vp, ip->i_size); 2214 *new_key = &ip->i_number; 2215 return 0; 2216 } 2217 2218 /* 2219 * File handle to vnode 2220 * 2221 * Have to be really careful about stale file handles: 2222 * - check that the inode number is valid 2223 * - call ffs_vget() to get the locked inode 2224 * - check for an unallocated inode (i_mode == 0) 2225 * - check that the given client host has export rights and return 2226 * those rights via. exflagsp and credanonp 2227 */ 2228 int 2229 ffs_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp) 2230 { 2231 struct ufid ufh; 2232 int error; 2233 2234 if (fhp->fid_len != sizeof(struct ufid)) 2235 return EINVAL; 2236 2237 memcpy(&ufh, fhp, sizeof(ufh)); 2238 if ((error = ffs_checkrange(mp, ufh.ufid_ino)) != 0) 2239 return error; 2240 2241 return (ufs_fhtovp(mp, &ufh, vpp)); 2242 } 2243 2244 /* 2245 * Vnode pointer to File handle 2246 */ 2247 /* ARGSUSED */ 2248 int 2249 ffs_vptofh(struct vnode *vp, struct fid *fhp, size_t *fh_size) 2250 { 2251 struct inode *ip; 2252 struct ufid ufh; 2253 2254 if (*fh_size < sizeof(struct ufid)) { 2255 *fh_size = sizeof(struct ufid); 2256 return E2BIG; 2257 } 2258 ip = VTOI(vp); 2259 *fh_size = sizeof(struct ufid); 2260 memset(&ufh, 0, sizeof(ufh)); 2261 ufh.ufid_len = sizeof(struct ufid); 2262 ufh.ufid_ino = ip->i_number; 2263 ufh.ufid_gen = ip->i_gen; 2264 memcpy(fhp, &ufh, sizeof(ufh)); 2265 return (0); 2266 } 2267 2268 void 2269 ffs_init(void) 2270 { 2271 if (ffs_initcount++ > 0) 2272 return; 2273 2274 ffs_inode_cache = pool_cache_init(sizeof(struct inode), 0, 0, 0, 2275 "ffsino", NULL, IPL_NONE, NULL, NULL, NULL); 2276 ffs_dinode1_cache = pool_cache_init(sizeof(struct ufs1_dinode), 0, 0, 0, 2277 "ffsdino1", NULL, IPL_NONE, NULL, NULL, NULL); 2278 ffs_dinode2_cache = pool_cache_init(sizeof(struct ufs2_dinode), 0, 0, 0, 2279 "ffsdino2", NULL, IPL_NONE, NULL, NULL, NULL); 2280 ufs_init(); 2281 } 2282 2283 void 2284 ffs_reinit(void) 2285 { 2286 ufs_reinit(); 2287 } 2288 2289 void 2290 ffs_done(void) 2291 { 2292 if (--ffs_initcount > 0) 2293 return; 2294 2295 ufs_done(); 2296 pool_cache_destroy(ffs_dinode2_cache); 2297 pool_cache_destroy(ffs_dinode1_cache); 2298 pool_cache_destroy(ffs_inode_cache); 2299 } 2300 2301 /* 2302 * Write a superblock and associated information back to disk. 2303 */ 2304 int 2305 ffs_sbupdate(struct ufsmount *mp, int waitfor) 2306 { 2307 struct fs *fs = mp->um_fs; 2308 struct buf *bp; 2309 int error; 2310 u_int32_t saveflag; 2311 2312 error = ffs_getblk(mp->um_devvp, 2313 fs->fs_sblockloc / DEV_BSIZE, FFS_NOBLK, 2314 fs->fs_sbsize, false, &bp); 2315 if (error) 2316 return error; 2317 saveflag = fs->fs_flags & FS_INTERNAL; 2318 fs->fs_flags &= ~FS_INTERNAL; 2319 2320 memcpy(bp->b_data, fs, fs->fs_sbsize); 2321 2322 ffs_oldfscompat_write((struct fs *)bp->b_data, mp); 2323 #ifdef FFS_EI 2324 if (mp->um_flags & UFS_NEEDSWAP) 2325 ffs_sb_swap((struct fs *)bp->b_data, (struct fs *)bp->b_data); 2326 #endif 2327 fs->fs_flags |= saveflag; 2328 2329 if (waitfor == MNT_WAIT) 2330 error = bwrite(bp); 2331 else 2332 bawrite(bp); 2333 return (error); 2334 } 2335 2336 int 2337 ffs_cgupdate(struct ufsmount *mp, int waitfor) 2338 { 2339 struct fs *fs = mp->um_fs; 2340 struct buf *bp; 2341 int blks; 2342 void *space; 2343 int i, size, error = 0, allerror = 0; 2344 2345 allerror = ffs_sbupdate(mp, waitfor); 2346 blks = howmany(fs->fs_cssize, fs->fs_fsize); 2347 space = fs->fs_csp; 2348 for (i = 0; i < blks; i += fs->fs_frag) { 2349 size = fs->fs_bsize; 2350 if (i + fs->fs_frag > blks) 2351 size = (blks - i) * fs->fs_fsize; 2352 error = ffs_getblk(mp->um_devvp, FFS_FSBTODB(fs, fs->fs_csaddr + i), 2353 FFS_NOBLK, size, false, &bp); 2354 if (error) 2355 break; 2356 #ifdef FFS_EI 2357 if (mp->um_flags & UFS_NEEDSWAP) 2358 ffs_csum_swap((struct csum*)space, 2359 (struct csum*)bp->b_data, size); 2360 else 2361 #endif 2362 memcpy(bp->b_data, space, (u_int)size); 2363 space = (char *)space + size; 2364 if (waitfor == MNT_WAIT) 2365 error = bwrite(bp); 2366 else 2367 bawrite(bp); 2368 } 2369 if (!allerror && error) 2370 allerror = error; 2371 return (allerror); 2372 } 2373 2374 int 2375 ffs_extattrctl(struct mount *mp, int cmd, struct vnode *vp, 2376 int attrnamespace, const char *attrname) 2377 { 2378 #ifdef UFS_EXTATTR 2379 /* 2380 * File-backed extended attributes are only supported on UFS1. 2381 * UFS2 has native extended attributes. 2382 */ 2383 if (VFSTOUFS(mp)->um_fstype == UFS1) 2384 return (ufs_extattrctl(mp, cmd, vp, attrnamespace, attrname)); 2385 #endif 2386 return (vfs_stdextattrctl(mp, cmd, vp, attrnamespace, attrname)); 2387 } 2388 2389 int 2390 ffs_suspendctl(struct mount *mp, int cmd) 2391 { 2392 int error; 2393 struct lwp *l = curlwp; 2394 2395 switch (cmd) { 2396 case SUSPEND_SUSPEND: 2397 if ((error = fstrans_setstate(mp, FSTRANS_SUSPENDING)) != 0) 2398 return error; 2399 error = ffs_sync(mp, MNT_WAIT, l->l_proc->p_cred); 2400 if (error == 0) 2401 error = fstrans_setstate(mp, FSTRANS_SUSPENDED); 2402 #ifdef WAPBL 2403 if (error == 0 && mp->mnt_wapbl) 2404 error = wapbl_flush(mp->mnt_wapbl, 1); 2405 #endif 2406 if (error != 0) { 2407 (void) fstrans_setstate(mp, FSTRANS_NORMAL); 2408 return error; 2409 } 2410 return 0; 2411 2412 case SUSPEND_RESUME: 2413 return fstrans_setstate(mp, FSTRANS_NORMAL); 2414 2415 default: 2416 return EINVAL; 2417 } 2418 } 2419 2420 /* 2421 * Synch vnode for a mounted file system. 2422 */ 2423 static int 2424 ffs_vfs_fsync(vnode_t *vp, int flags) 2425 { 2426 int error, i, pflags; 2427 #ifdef WAPBL 2428 struct mount *mp; 2429 #endif 2430 2431 KASSERT(vp->v_type == VBLK); 2432 KASSERT(spec_node_getmountedfs(vp) != NULL); 2433 2434 /* 2435 * Flush all dirty data associated with the vnode. 2436 */ 2437 pflags = PGO_ALLPAGES | PGO_CLEANIT; 2438 if ((flags & FSYNC_WAIT) != 0) 2439 pflags |= PGO_SYNCIO; 2440 mutex_enter(vp->v_interlock); 2441 error = VOP_PUTPAGES(vp, 0, 0, pflags); 2442 if (error) 2443 return error; 2444 2445 #ifdef WAPBL 2446 mp = spec_node_getmountedfs(vp); 2447 if (mp && mp->mnt_wapbl) { 2448 /* 2449 * Don't bother writing out metadata if the syncer is 2450 * making the request. We will let the sync vnode 2451 * write it out in a single burst through a call to 2452 * VFS_SYNC(). 2453 */ 2454 if ((flags & (FSYNC_DATAONLY | FSYNC_LAZY | FSYNC_NOLOG)) != 0) 2455 return 0; 2456 2457 /* 2458 * Don't flush the log if the vnode being flushed 2459 * contains no dirty buffers that could be in the log. 2460 */ 2461 if (!LIST_EMPTY(&vp->v_dirtyblkhd)) { 2462 error = wapbl_flush(mp->mnt_wapbl, 0); 2463 if (error) 2464 return error; 2465 } 2466 2467 if ((flags & FSYNC_WAIT) != 0) { 2468 mutex_enter(vp->v_interlock); 2469 while (vp->v_numoutput) 2470 cv_wait(&vp->v_cv, vp->v_interlock); 2471 mutex_exit(vp->v_interlock); 2472 } 2473 2474 return 0; 2475 } 2476 #endif /* WAPBL */ 2477 2478 error = vflushbuf(vp, flags); 2479 if (error == 0 && (flags & FSYNC_CACHE) != 0) { 2480 i = 1; 2481 (void)VOP_IOCTL(vp, DIOCCACHESYNC, &i, FWRITE, 2482 kauth_cred_get()); 2483 } 2484 2485 return error; 2486 } 2487