1 /* $NetBSD: ffs_vfsops.c,v 1.350 2017/03/10 20:38:28 jdolecek Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Wasabi Systems, Inc, and by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1989, 1991, 1993, 1994 34 * The Regents of the University of California. All rights reserved. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. Neither the name of the University nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 * 60 * @(#)ffs_vfsops.c 8.31 (Berkeley) 5/20/95 61 */ 62 63 #include <sys/cdefs.h> 64 __KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.350 2017/03/10 20:38:28 jdolecek Exp $"); 65 66 #if defined(_KERNEL_OPT) 67 #include "opt_ffs.h" 68 #include "opt_quota.h" 69 #include "opt_wapbl.h" 70 #endif 71 72 #include <sys/param.h> 73 #include <sys/systm.h> 74 #include <sys/namei.h> 75 #include <sys/proc.h> 76 #include <sys/kernel.h> 77 #include <sys/vnode.h> 78 #include <sys/socket.h> 79 #include <sys/mount.h> 80 #include <sys/buf.h> 81 #include <sys/device.h> 82 #include <sys/disk.h> 83 #include <sys/mbuf.h> 84 #include <sys/file.h> 85 #include <sys/disklabel.h> 86 #include <sys/ioctl.h> 87 #include <sys/errno.h> 88 #include <sys/kmem.h> 89 #include <sys/pool.h> 90 #include <sys/lock.h> 91 #include <sys/sysctl.h> 92 #include <sys/conf.h> 93 #include <sys/kauth.h> 94 #include <sys/wapbl.h> 95 #include <sys/module.h> 96 97 #include <miscfs/genfs/genfs.h> 98 #include <miscfs/specfs/specdev.h> 99 100 #include <ufs/ufs/quota.h> 101 #include <ufs/ufs/ufsmount.h> 102 #include <ufs/ufs/inode.h> 103 #include <ufs/ufs/dir.h> 104 #include <ufs/ufs/ufs_extern.h> 105 #include <ufs/ufs/ufs_bswap.h> 106 #include <ufs/ufs/ufs_wapbl.h> 107 108 #include <ufs/ffs/fs.h> 109 #include <ufs/ffs/ffs_extern.h> 110 111 #ifdef WAPBL 112 MODULE(MODULE_CLASS_VFS, ffs, "wapbl"); 113 #else 114 MODULE(MODULE_CLASS_VFS, ffs, NULL); 115 #endif 116 117 static int ffs_vfs_fsync(vnode_t *, int); 118 static int ffs_superblock_validate(struct fs *); 119 static int ffs_is_appleufs(struct vnode *, struct fs *); 120 121 static int ffs_init_vnode(struct ufsmount *, struct vnode *, ino_t); 122 static void ffs_deinit_vnode(struct ufsmount *, struct vnode *); 123 124 static struct sysctllog *ffs_sysctl_log; 125 126 static kauth_listener_t ffs_snapshot_listener; 127 128 /* how many times ffs_init() was called */ 129 int ffs_initcount = 0; 130 131 #ifdef DEBUG_FFS_MOUNT 132 #define DPRINTF(_fmt, args...) printf("%s: " _fmt "\n", __func__, ##args) 133 #else 134 #define DPRINTF(_fmt, args...) do {} while (/*CONSTCOND*/0) 135 #endif 136 137 extern const struct vnodeopv_desc ffs_vnodeop_opv_desc; 138 extern const struct vnodeopv_desc ffs_specop_opv_desc; 139 extern const struct vnodeopv_desc ffs_fifoop_opv_desc; 140 141 const struct vnodeopv_desc * const ffs_vnodeopv_descs[] = { 142 &ffs_vnodeop_opv_desc, 143 &ffs_specop_opv_desc, 144 &ffs_fifoop_opv_desc, 145 NULL, 146 }; 147 148 struct vfsops ffs_vfsops = { 149 .vfs_name = MOUNT_FFS, 150 .vfs_min_mount_data = sizeof (struct ufs_args), 151 .vfs_mount = ffs_mount, 152 .vfs_start = ufs_start, 153 .vfs_unmount = ffs_unmount, 154 .vfs_root = ufs_root, 155 .vfs_quotactl = ufs_quotactl, 156 .vfs_statvfs = ffs_statvfs, 157 .vfs_sync = ffs_sync, 158 .vfs_vget = ufs_vget, 159 .vfs_loadvnode = ffs_loadvnode, 160 .vfs_newvnode = ffs_newvnode, 161 .vfs_fhtovp = ffs_fhtovp, 162 .vfs_vptofh = ffs_vptofh, 163 .vfs_init = ffs_init, 164 .vfs_reinit = ffs_reinit, 165 .vfs_done = ffs_done, 166 .vfs_mountroot = ffs_mountroot, 167 .vfs_snapshot = ffs_snapshot, 168 .vfs_extattrctl = ffs_extattrctl, 169 .vfs_suspendctl = genfs_suspendctl, 170 .vfs_renamelock_enter = genfs_renamelock_enter, 171 .vfs_renamelock_exit = genfs_renamelock_exit, 172 .vfs_fsync = ffs_vfs_fsync, 173 .vfs_opv_descs = ffs_vnodeopv_descs 174 }; 175 176 static const struct genfs_ops ffs_genfsops = { 177 .gop_size = ffs_gop_size, 178 .gop_alloc = ufs_gop_alloc, 179 .gop_write = genfs_gop_write, 180 .gop_markupdate = ufs_gop_markupdate, 181 }; 182 183 static const struct ufs_ops ffs_ufsops = { 184 .uo_itimes = ffs_itimes, 185 .uo_update = ffs_update, 186 .uo_truncate = ffs_truncate, 187 .uo_balloc = ffs_balloc, 188 .uo_snapgone = ffs_snapgone, 189 .uo_bufrd = ffs_bufrd, 190 .uo_bufwr = ffs_bufwr, 191 }; 192 193 static int 194 ffs_checkrange(struct mount *mp, uint32_t ino) 195 { 196 struct fs *fs = VFSTOUFS(mp)->um_fs; 197 198 if (ino < UFS_ROOTINO || ino >= fs->fs_ncg * fs->fs_ipg) { 199 DPRINTF("out of range %u\n", ino); 200 return ESTALE; 201 } 202 203 /* 204 * Need to check if inode is initialized because ffsv2 does 205 * lazy initialization and we can get here from nfs_fhtovp 206 */ 207 if (fs->fs_magic != FS_UFS2_MAGIC) 208 return 0; 209 210 struct buf *bp; 211 int cg = ino_to_cg(fs, ino); 212 struct ufsmount *ump = VFSTOUFS(mp); 213 214 int error = bread(ump->um_devvp, FFS_FSBTODB(fs, cgtod(fs, cg)), 215 (int)fs->fs_cgsize, B_MODIFY, &bp); 216 if (error) { 217 DPRINTF("error %d reading cg %d ino %u\n", error, cg, ino); 218 return error; 219 } 220 221 const int needswap = UFS_FSNEEDSWAP(fs); 222 223 struct cg *cgp = (struct cg *)bp->b_data; 224 if (!cg_chkmagic(cgp, needswap)) { 225 brelse(bp, 0); 226 DPRINTF("bad cylinder group magic cg %d ino %u\n", cg, ino); 227 return ESTALE; 228 } 229 230 int32_t initediblk = ufs_rw32(cgp->cg_initediblk, needswap); 231 brelse(bp, 0); 232 233 if (cg * fs->fs_ipg + initediblk < ino) { 234 DPRINTF("cg=%d fs->fs_ipg=%d initediblk=%d ino=%u\n", 235 cg, fs->fs_ipg, initediblk, ino); 236 return ESTALE; 237 } 238 return 0; 239 } 240 241 static int 242 ffs_snapshot_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, 243 void *arg0, void *arg1, void *arg2, void *arg3) 244 { 245 vnode_t *vp = arg2; 246 int result = KAUTH_RESULT_DEFER; 247 248 if (action != KAUTH_SYSTEM_FS_SNAPSHOT) 249 return result; 250 251 if (VTOI(vp)->i_uid == kauth_cred_geteuid(cred)) 252 result = KAUTH_RESULT_ALLOW; 253 254 return result; 255 } 256 257 static int 258 ffs_modcmd(modcmd_t cmd, void *arg) 259 { 260 int error; 261 262 #if 0 263 extern int doasyncfree; 264 #endif 265 #ifdef UFS_EXTATTR 266 extern int ufs_extattr_autocreate; 267 #endif 268 extern int ffs_log_changeopt; 269 270 switch (cmd) { 271 case MODULE_CMD_INIT: 272 error = vfs_attach(&ffs_vfsops); 273 if (error != 0) 274 break; 275 276 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL, 277 CTLFLAG_PERMANENT, 278 CTLTYPE_NODE, "ffs", 279 SYSCTL_DESCR("Berkeley Fast File System"), 280 NULL, 0, NULL, 0, 281 CTL_VFS, 1, CTL_EOL); 282 /* 283 * @@@ should we even bother with these first three? 284 */ 285 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL, 286 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 287 CTLTYPE_INT, "doclusterread", NULL, 288 sysctl_notavail, 0, NULL, 0, 289 CTL_VFS, 1, FFS_CLUSTERREAD, CTL_EOL); 290 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL, 291 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 292 CTLTYPE_INT, "doclusterwrite", NULL, 293 sysctl_notavail, 0, NULL, 0, 294 CTL_VFS, 1, FFS_CLUSTERWRITE, CTL_EOL); 295 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL, 296 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 297 CTLTYPE_INT, "doreallocblks", NULL, 298 sysctl_notavail, 0, NULL, 0, 299 CTL_VFS, 1, FFS_REALLOCBLKS, CTL_EOL); 300 #if 0 301 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL, 302 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 303 CTLTYPE_INT, "doasyncfree", 304 SYSCTL_DESCR("Release dirty blocks asynchronously"), 305 NULL, 0, &doasyncfree, 0, 306 CTL_VFS, 1, FFS_ASYNCFREE, CTL_EOL); 307 #endif 308 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL, 309 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 310 CTLTYPE_INT, "log_changeopt", 311 SYSCTL_DESCR("Log changes in optimization strategy"), 312 NULL, 0, &ffs_log_changeopt, 0, 313 CTL_VFS, 1, FFS_LOG_CHANGEOPT, CTL_EOL); 314 #ifdef UFS_EXTATTR 315 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL, 316 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 317 CTLTYPE_INT, "extattr_autocreate", 318 SYSCTL_DESCR("Size of attribute for " 319 "backing file autocreation"), 320 NULL, 0, &ufs_extattr_autocreate, 0, 321 CTL_VFS, 1, FFS_EXTATTR_AUTOCREATE, CTL_EOL); 322 323 #endif /* UFS_EXTATTR */ 324 325 ffs_snapshot_listener = kauth_listen_scope(KAUTH_SCOPE_SYSTEM, 326 ffs_snapshot_cb, NULL); 327 if (ffs_snapshot_listener == NULL) 328 printf("ffs_modcmd: can't listen on system scope.\n"); 329 330 break; 331 case MODULE_CMD_FINI: 332 error = vfs_detach(&ffs_vfsops); 333 if (error != 0) 334 break; 335 sysctl_teardown(&ffs_sysctl_log); 336 if (ffs_snapshot_listener != NULL) 337 kauth_unlisten_scope(ffs_snapshot_listener); 338 break; 339 default: 340 error = ENOTTY; 341 break; 342 } 343 344 return (error); 345 } 346 347 pool_cache_t ffs_inode_cache; 348 pool_cache_t ffs_dinode1_cache; 349 pool_cache_t ffs_dinode2_cache; 350 351 static void ffs_oldfscompat_read(struct fs *, struct ufsmount *, daddr_t); 352 static void ffs_oldfscompat_write(struct fs *, struct ufsmount *); 353 354 /* 355 * Called by main() when ffs is going to be mounted as root. 356 */ 357 358 int 359 ffs_mountroot(void) 360 { 361 struct fs *fs; 362 struct mount *mp; 363 struct lwp *l = curlwp; /* XXX */ 364 struct ufsmount *ump; 365 int error; 366 367 if (device_class(root_device) != DV_DISK) 368 return (ENODEV); 369 370 if ((error = vfs_rootmountalloc(MOUNT_FFS, "root_device", &mp))) { 371 vrele(rootvp); 372 return (error); 373 } 374 375 /* 376 * We always need to be able to mount the root file system. 377 */ 378 mp->mnt_flag |= MNT_FORCE; 379 if ((error = ffs_mountfs(rootvp, mp, l)) != 0) { 380 vfs_unbusy(mp, false, NULL); 381 vfs_destroy(mp); 382 return (error); 383 } 384 mp->mnt_flag &= ~MNT_FORCE; 385 mountlist_append(mp); 386 ump = VFSTOUFS(mp); 387 fs = ump->um_fs; 388 memset(fs->fs_fsmnt, 0, sizeof(fs->fs_fsmnt)); 389 (void)copystr(mp->mnt_stat.f_mntonname, fs->fs_fsmnt, MNAMELEN - 1, 0); 390 (void)ffs_statvfs(mp, &mp->mnt_stat); 391 vfs_unbusy(mp, false, NULL); 392 setrootfstime((time_t)fs->fs_time); 393 return (0); 394 } 395 396 /* 397 * VFS Operations. 398 * 399 * mount system call 400 */ 401 int 402 ffs_mount(struct mount *mp, const char *path, void *data, size_t *data_len) 403 { 404 struct lwp *l = curlwp; 405 struct vnode *devvp = NULL; 406 struct ufs_args *args = data; 407 struct ufsmount *ump = NULL; 408 struct fs *fs; 409 int error = 0, flags, update; 410 mode_t accessmode; 411 412 if (args == NULL) { 413 DPRINTF("NULL args"); 414 return EINVAL; 415 } 416 if (*data_len < sizeof(*args)) { 417 DPRINTF("bad size args %zu != %zu", *data_len, sizeof(*args)); 418 return EINVAL; 419 } 420 421 if (mp->mnt_flag & MNT_GETARGS) { 422 ump = VFSTOUFS(mp); 423 if (ump == NULL) { 424 DPRINTF("no ump"); 425 return EIO; 426 } 427 args->fspec = NULL; 428 *data_len = sizeof *args; 429 return 0; 430 } 431 432 update = mp->mnt_flag & MNT_UPDATE; 433 434 /* Check arguments */ 435 if (args->fspec != NULL) { 436 /* 437 * Look up the name and verify that it's sane. 438 */ 439 error = namei_simple_user(args->fspec, 440 NSM_FOLLOW_NOEMULROOT, &devvp); 441 if (error != 0) { 442 DPRINTF("namei_simple_user returned %d", error); 443 return error; 444 } 445 446 if (!update) { 447 /* 448 * Be sure this is a valid block device 449 */ 450 if (devvp->v_type != VBLK) { 451 DPRINTF("non block device %d", devvp->v_type); 452 error = ENOTBLK; 453 } else if (bdevsw_lookup(devvp->v_rdev) == NULL) { 454 DPRINTF("can't find block device 0x%jx", 455 devvp->v_rdev); 456 error = ENXIO; 457 } 458 } else { 459 /* 460 * Be sure we're still naming the same device 461 * used for our initial mount 462 */ 463 ump = VFSTOUFS(mp); 464 if (devvp != ump->um_devvp) { 465 if (devvp->v_rdev != ump->um_devvp->v_rdev) { 466 DPRINTF("wrong device 0x%jx != 0x%jx", 467 (uintmax_t)devvp->v_rdev, 468 (uintmax_t)ump->um_devvp->v_rdev); 469 error = EINVAL; 470 } else { 471 vrele(devvp); 472 devvp = ump->um_devvp; 473 vref(devvp); 474 } 475 } 476 } 477 } else { 478 if (!update) { 479 /* New mounts must have a filename for the device */ 480 DPRINTF("no filename for mount"); 481 return EINVAL; 482 } else { 483 /* Use the extant mount */ 484 ump = VFSTOUFS(mp); 485 devvp = ump->um_devvp; 486 vref(devvp); 487 } 488 } 489 490 /* 491 * If mount by non-root, then verify that user has necessary 492 * permissions on the device. 493 * 494 * Permission to update a mount is checked higher, so here we presume 495 * updating the mount is okay (for example, as far as securelevel goes) 496 * which leaves us with the normal check. 497 */ 498 if (error == 0) { 499 accessmode = VREAD; 500 if (update ? 501 (mp->mnt_iflag & IMNT_WANTRDWR) != 0 : 502 (mp->mnt_flag & MNT_RDONLY) == 0) 503 accessmode |= VWRITE; 504 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 505 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 506 KAUTH_REQ_SYSTEM_MOUNT_DEVICE, mp, devvp, 507 KAUTH_ARG(accessmode)); 508 if (error) { 509 DPRINTF("kauth returned %d", error); 510 } 511 VOP_UNLOCK(devvp); 512 } 513 514 if (error) { 515 vrele(devvp); 516 return (error); 517 } 518 519 #ifdef WAPBL 520 /* WAPBL can only be enabled on a r/w mount. */ 521 if (((mp->mnt_flag & MNT_RDONLY) && !(mp->mnt_iflag & IMNT_WANTRDWR)) || 522 (mp->mnt_iflag & IMNT_WANTRDONLY)) { 523 mp->mnt_flag &= ~MNT_LOG; 524 } 525 #else /* !WAPBL */ 526 mp->mnt_flag &= ~MNT_LOG; 527 #endif /* !WAPBL */ 528 529 if (!update) { 530 int xflags; 531 532 if (mp->mnt_flag & MNT_RDONLY) 533 xflags = FREAD; 534 else 535 xflags = FREAD | FWRITE; 536 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 537 error = VOP_OPEN(devvp, xflags, FSCRED); 538 VOP_UNLOCK(devvp); 539 if (error) { 540 DPRINTF("VOP_OPEN returned %d", error); 541 goto fail; 542 } 543 error = ffs_mountfs(devvp, mp, l); 544 if (error) { 545 DPRINTF("ffs_mountfs returned %d", error); 546 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 547 (void)VOP_CLOSE(devvp, xflags, NOCRED); 548 VOP_UNLOCK(devvp); 549 goto fail; 550 } 551 552 ump = VFSTOUFS(mp); 553 fs = ump->um_fs; 554 } else { 555 /* 556 * Update the mount. 557 */ 558 559 /* 560 * The initial mount got a reference on this 561 * device, so drop the one obtained via 562 * namei(), above. 563 */ 564 vrele(devvp); 565 566 ump = VFSTOUFS(mp); 567 fs = ump->um_fs; 568 if (fs->fs_ronly == 0 && (mp->mnt_iflag & IMNT_WANTRDONLY)) { 569 /* 570 * Changing from r/w to r/o 571 */ 572 flags = WRITECLOSE; 573 if (mp->mnt_flag & MNT_FORCE) 574 flags |= FORCECLOSE; 575 error = ffs_flushfiles(mp, flags, l); 576 if (error) 577 return error; 578 579 error = UFS_WAPBL_BEGIN(mp); 580 if (error) { 581 DPRINTF("wapbl %d", error); 582 return error; 583 } 584 585 if (ffs_cgupdate(ump, MNT_WAIT) == 0 && 586 fs->fs_clean & FS_WASCLEAN) { 587 if (mp->mnt_flag & MNT_SOFTDEP) 588 fs->fs_flags &= ~FS_DOSOFTDEP; 589 fs->fs_clean = FS_ISCLEAN; 590 (void) ffs_sbupdate(ump, MNT_WAIT); 591 } 592 593 UFS_WAPBL_END(mp); 594 } 595 596 #ifdef WAPBL 597 if ((mp->mnt_flag & MNT_LOG) == 0) { 598 error = ffs_wapbl_stop(mp, mp->mnt_flag & MNT_FORCE); 599 if (error) { 600 DPRINTF("ffs_wapbl_stop returned %d", error); 601 return error; 602 } 603 } 604 #endif /* WAPBL */ 605 606 if (fs->fs_ronly == 0 && (mp->mnt_iflag & IMNT_WANTRDONLY)) { 607 /* 608 * Finish change from r/w to r/o 609 */ 610 fs->fs_ronly = 1; 611 fs->fs_fmod = 0; 612 } 613 614 if (mp->mnt_flag & MNT_RELOAD) { 615 error = ffs_reload(mp, l->l_cred, l); 616 if (error) { 617 DPRINTF("ffs_reload returned %d", error); 618 return error; 619 } 620 } 621 622 if (fs->fs_ronly && (mp->mnt_iflag & IMNT_WANTRDWR)) { 623 /* 624 * Changing from read-only to read/write 625 */ 626 #ifndef QUOTA2 627 if (fs->fs_flags & FS_DOQUOTA2) { 628 ump->um_flags |= UFS_QUOTA2; 629 uprintf("%s: options QUOTA2 not enabled%s\n", 630 mp->mnt_stat.f_mntonname, 631 (mp->mnt_flag & MNT_FORCE) ? "" : 632 ", not mounting"); 633 DPRINTF("ffs_quota2 %d", EINVAL); 634 return EINVAL; 635 } 636 #endif 637 fs->fs_ronly = 0; 638 fs->fs_clean <<= 1; 639 fs->fs_fmod = 1; 640 #ifdef WAPBL 641 if (fs->fs_flags & FS_DOWAPBL) { 642 const char *nm = mp->mnt_stat.f_mntonname; 643 if (!mp->mnt_wapbl_replay) { 644 printf("%s: log corrupted;" 645 " replay cancelled\n", nm); 646 return EFTYPE; 647 } 648 printf("%s: replaying log to disk\n", nm); 649 error = wapbl_replay_write(mp->mnt_wapbl_replay, 650 devvp); 651 if (error) { 652 DPRINTF("%s: wapbl_replay_write %d", 653 nm, error); 654 return error; 655 } 656 wapbl_replay_stop(mp->mnt_wapbl_replay); 657 fs->fs_clean = FS_WASCLEAN; 658 } 659 #endif /* WAPBL */ 660 if (fs->fs_snapinum[0] != 0) 661 ffs_snapshot_mount(mp); 662 } 663 664 #ifdef WAPBL 665 error = ffs_wapbl_start(mp); 666 if (error) { 667 DPRINTF("ffs_wapbl_start returned %d", error); 668 return error; 669 } 670 #endif /* WAPBL */ 671 672 #ifdef QUOTA2 673 if (!fs->fs_ronly) { 674 error = ffs_quota2_mount(mp); 675 if (error) { 676 DPRINTF("ffs_quota2_mount returned %d", error); 677 return error; 678 } 679 } 680 #endif 681 682 if ((mp->mnt_flag & MNT_DISCARD) && !(ump->um_discarddata)) 683 ump->um_discarddata = ffs_discard_init(devvp, fs); 684 685 if (args->fspec == NULL) 686 return 0; 687 } 688 689 error = set_statvfs_info(path, UIO_USERSPACE, args->fspec, 690 UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l); 691 if (error == 0) 692 (void)strncpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, 693 sizeof(fs->fs_fsmnt)); 694 else { 695 DPRINTF("set_statvfs_info returned %d", error); 696 } 697 fs->fs_flags &= ~FS_DOSOFTDEP; 698 if (fs->fs_fmod != 0) { /* XXX */ 699 int err; 700 701 fs->fs_fmod = 0; 702 if (fs->fs_clean & FS_WASCLEAN) 703 fs->fs_time = time_second; 704 else { 705 printf("%s: file system not clean (fs_clean=%#x); " 706 "please fsck(8)\n", mp->mnt_stat.f_mntfromname, 707 fs->fs_clean); 708 printf("%s: lost blocks %" PRId64 " files %d\n", 709 mp->mnt_stat.f_mntfromname, fs->fs_pendingblocks, 710 fs->fs_pendinginodes); 711 } 712 err = UFS_WAPBL_BEGIN(mp); 713 if (err == 0) { 714 (void) ffs_cgupdate(ump, MNT_WAIT); 715 UFS_WAPBL_END(mp); 716 } 717 } 718 if ((mp->mnt_flag & MNT_SOFTDEP) != 0) { 719 printf("%s: `-o softdep' is no longer supported, " 720 "consider `-o log'\n", mp->mnt_stat.f_mntfromname); 721 mp->mnt_flag &= ~MNT_SOFTDEP; 722 } 723 724 return (error); 725 726 fail: 727 vrele(devvp); 728 return (error); 729 } 730 731 /* 732 * Reload all incore data for a filesystem (used after running fsck on 733 * the root filesystem and finding things to fix). The filesystem must 734 * be mounted read-only. 735 * 736 * Things to do to update the mount: 737 * 1) invalidate all cached meta-data. 738 * 2) re-read superblock from disk. 739 * 3) re-read summary information from disk. 740 * 4) invalidate all inactive vnodes. 741 * 5) invalidate all cached file data. 742 * 6) re-read inode data for all active vnodes. 743 */ 744 int 745 ffs_reload(struct mount *mp, kauth_cred_t cred, struct lwp *l) 746 { 747 struct vnode *vp, *devvp; 748 struct inode *ip; 749 void *space; 750 struct buf *bp; 751 struct fs *fs, *newfs; 752 int i, bsize, blks, error; 753 int32_t *lp, fs_sbsize; 754 struct ufsmount *ump; 755 daddr_t sblockloc; 756 struct vnode_iterator *marker; 757 758 if ((mp->mnt_flag & MNT_RDONLY) == 0) 759 return (EINVAL); 760 761 ump = VFSTOUFS(mp); 762 763 /* 764 * Step 1: invalidate all cached meta-data. 765 */ 766 devvp = ump->um_devvp; 767 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 768 error = vinvalbuf(devvp, 0, cred, l, 0, 0); 769 VOP_UNLOCK(devvp); 770 if (error) 771 panic("%s: dirty1", __func__); 772 773 /* 774 * Step 2: re-read superblock from disk. XXX: We don't handle 775 * possibility that superblock moved. Which implies that we don't 776 * want its size to change either. 777 */ 778 fs = ump->um_fs; 779 fs_sbsize = fs->fs_sbsize; 780 error = bread(devvp, fs->fs_sblockloc / DEV_BSIZE, fs_sbsize, 781 0, &bp); 782 if (error) 783 return (error); 784 newfs = kmem_alloc(fs_sbsize, KM_SLEEP); 785 memcpy(newfs, bp->b_data, fs_sbsize); 786 787 #ifdef FFS_EI 788 if (ump->um_flags & UFS_NEEDSWAP) { 789 ffs_sb_swap((struct fs *)bp->b_data, newfs); 790 newfs->fs_flags |= FS_SWAPPED; 791 } else 792 #endif 793 newfs->fs_flags &= ~FS_SWAPPED; 794 795 brelse(bp, 0); 796 797 if ((newfs->fs_magic != FS_UFS1_MAGIC) && 798 (newfs->fs_magic != FS_UFS2_MAGIC)) { 799 kmem_free(newfs, fs_sbsize); 800 return (EIO); /* XXX needs translation */ 801 } 802 if (!ffs_superblock_validate(newfs)) { 803 kmem_free(newfs, fs_sbsize); 804 return (EINVAL); 805 } 806 807 /* 808 * The current implementation doesn't handle the possibility that 809 * these values may have changed. 810 */ 811 if ((newfs->fs_sbsize != fs_sbsize) || 812 (newfs->fs_cssize != fs->fs_cssize) || 813 (newfs->fs_contigsumsize != fs->fs_contigsumsize) || 814 (newfs->fs_ncg != fs->fs_ncg)) { 815 kmem_free(newfs, fs_sbsize); 816 return (EINVAL); 817 } 818 819 /* Store off old fs_sblockloc for fs_oldfscompat_read. */ 820 sblockloc = fs->fs_sblockloc; 821 /* 822 * Copy pointer fields back into superblock before copying in XXX 823 * new superblock. These should really be in the ufsmount. XXX 824 * Note that important parameters (eg fs_ncg) are unchanged. 825 */ 826 newfs->fs_csp = fs->fs_csp; 827 newfs->fs_maxcluster = fs->fs_maxcluster; 828 newfs->fs_contigdirs = fs->fs_contigdirs; 829 newfs->fs_ronly = fs->fs_ronly; 830 newfs->fs_active = fs->fs_active; 831 memcpy(fs, newfs, (u_int)fs_sbsize); 832 kmem_free(newfs, fs_sbsize); 833 834 /* 835 * Recheck for Apple UFS filesystem. 836 */ 837 ump->um_flags &= ~UFS_ISAPPLEUFS; 838 if (ffs_is_appleufs(devvp, fs)) { 839 #ifdef APPLE_UFS 840 ump->um_flags |= UFS_ISAPPLEUFS; 841 #else 842 DPRINTF("AppleUFS not supported"); 843 return (EIO); /* XXX: really? */ 844 #endif 845 } 846 847 if (UFS_MPISAPPLEUFS(ump)) { 848 /* see comment about NeXT below */ 849 ump->um_maxsymlinklen = APPLEUFS_MAXSYMLINKLEN; 850 ump->um_dirblksiz = APPLEUFS_DIRBLKSIZ; 851 mp->mnt_iflag |= IMNT_DTYPE; 852 } else { 853 ump->um_maxsymlinklen = fs->fs_maxsymlinklen; 854 ump->um_dirblksiz = UFS_DIRBLKSIZ; 855 if (ump->um_maxsymlinklen > 0) 856 mp->mnt_iflag |= IMNT_DTYPE; 857 else 858 mp->mnt_iflag &= ~IMNT_DTYPE; 859 } 860 ffs_oldfscompat_read(fs, ump, sblockloc); 861 862 mutex_enter(&ump->um_lock); 863 ump->um_maxfilesize = fs->fs_maxfilesize; 864 if (fs->fs_flags & ~(FS_KNOWN_FLAGS | FS_INTERNAL)) { 865 uprintf("%s: unknown ufs flags: 0x%08"PRIx32"%s\n", 866 mp->mnt_stat.f_mntonname, fs->fs_flags, 867 (mp->mnt_flag & MNT_FORCE) ? "" : ", not mounting"); 868 if ((mp->mnt_flag & MNT_FORCE) == 0) { 869 mutex_exit(&ump->um_lock); 870 return (EINVAL); 871 } 872 } 873 if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) { 874 fs->fs_pendingblocks = 0; 875 fs->fs_pendinginodes = 0; 876 } 877 mutex_exit(&ump->um_lock); 878 879 ffs_statvfs(mp, &mp->mnt_stat); 880 /* 881 * Step 3: re-read summary information from disk. 882 */ 883 blks = howmany(fs->fs_cssize, fs->fs_fsize); 884 space = fs->fs_csp; 885 for (i = 0; i < blks; i += fs->fs_frag) { 886 bsize = fs->fs_bsize; 887 if (i + fs->fs_frag > blks) 888 bsize = (blks - i) * fs->fs_fsize; 889 error = bread(devvp, FFS_FSBTODB(fs, fs->fs_csaddr + i), bsize, 890 0, &bp); 891 if (error) { 892 return (error); 893 } 894 #ifdef FFS_EI 895 if (UFS_FSNEEDSWAP(fs)) 896 ffs_csum_swap((struct csum *)bp->b_data, 897 (struct csum *)space, bsize); 898 else 899 #endif 900 memcpy(space, bp->b_data, (size_t)bsize); 901 space = (char *)space + bsize; 902 brelse(bp, 0); 903 } 904 /* 905 * We no longer know anything about clusters per cylinder group. 906 */ 907 if (fs->fs_contigsumsize > 0) { 908 lp = fs->fs_maxcluster; 909 for (i = 0; i < fs->fs_ncg; i++) 910 *lp++ = fs->fs_contigsumsize; 911 } 912 913 vfs_vnode_iterator_init(mp, &marker); 914 while ((vp = vfs_vnode_iterator_next(marker, NULL, NULL))) { 915 /* 916 * Step 4: invalidate all inactive vnodes. 917 */ 918 if (vrecycle(vp)) 919 continue; 920 /* 921 * Step 5: invalidate all cached file data. 922 */ 923 if (vn_lock(vp, LK_EXCLUSIVE)) { 924 vrele(vp); 925 continue; 926 } 927 if (vinvalbuf(vp, 0, cred, l, 0, 0)) 928 panic("%s: dirty2", __func__); 929 /* 930 * Step 6: re-read inode data for all active vnodes. 931 */ 932 ip = VTOI(vp); 933 error = bread(devvp, FFS_FSBTODB(fs, ino_to_fsba(fs, ip->i_number)), 934 (int)fs->fs_bsize, 0, &bp); 935 if (error) { 936 vput(vp); 937 break; 938 } 939 ffs_load_inode(bp, ip, fs, ip->i_number); 940 brelse(bp, 0); 941 vput(vp); 942 } 943 vfs_vnode_iterator_destroy(marker); 944 return (error); 945 } 946 947 /* 948 * Possible superblock locations ordered from most to least likely. 949 */ 950 static const int sblock_try[] = SBLOCKSEARCH; 951 952 953 static int 954 ffs_superblock_validate(struct fs *fs) 955 { 956 int32_t i, fs_bshift = 0, fs_fshift = 0, fs_fragshift = 0, fs_frag; 957 int32_t fs_inopb; 958 959 /* Check the superblock size */ 960 if (fs->fs_sbsize > SBLOCKSIZE || fs->fs_sbsize < sizeof(struct fs)) 961 return 0; 962 963 /* Check the file system blocksize */ 964 if (fs->fs_bsize > MAXBSIZE || fs->fs_bsize < MINBSIZE) 965 return 0; 966 if (!powerof2(fs->fs_bsize)) 967 return 0; 968 969 /* Check the size of frag blocks */ 970 if (!powerof2(fs->fs_fsize)) 971 return 0; 972 if (fs->fs_fsize == 0) 973 return 0; 974 975 /* 976 * XXX: these values are just zero-checked to prevent obvious 977 * bugs. We need more strict checks. 978 */ 979 if (fs->fs_size == 0) 980 return 0; 981 if (fs->fs_cssize == 0) 982 return 0; 983 if (fs->fs_ipg == 0) 984 return 0; 985 if (fs->fs_fpg == 0) 986 return 0; 987 if (fs->fs_ncg == 0) 988 return 0; 989 if (fs->fs_maxbpg == 0) 990 return 0; 991 992 /* Check the number of inodes per block */ 993 if (fs->fs_magic == FS_UFS1_MAGIC) 994 fs_inopb = fs->fs_bsize / sizeof(struct ufs1_dinode); 995 else /* fs->fs_magic == FS_UFS2_MAGIC */ 996 fs_inopb = fs->fs_bsize / sizeof(struct ufs2_dinode); 997 if (fs->fs_inopb != fs_inopb) 998 return 0; 999 1000 /* Block size cannot be smaller than fragment size */ 1001 if (fs->fs_bsize < fs->fs_fsize) 1002 return 0; 1003 1004 /* Compute fs_bshift and ensure it is consistent */ 1005 for (i = fs->fs_bsize; i > 1; i >>= 1) 1006 fs_bshift++; 1007 if (fs->fs_bshift != fs_bshift) 1008 return 0; 1009 1010 /* Compute fs_fshift and ensure it is consistent */ 1011 for (i = fs->fs_fsize; i > 1; i >>= 1) 1012 fs_fshift++; 1013 if (fs->fs_fshift != fs_fshift) 1014 return 0; 1015 1016 /* Compute fs_fragshift and ensure it is consistent */ 1017 for (i = fs->fs_frag; i > 1; i >>= 1) 1018 fs_fragshift++; 1019 if (fs->fs_fragshift != fs_fragshift) 1020 return 0; 1021 1022 /* Check the masks */ 1023 if (fs->fs_bmask != ~(fs->fs_bsize - 1)) 1024 return 0; 1025 if (fs->fs_fmask != ~(fs->fs_fsize - 1)) 1026 return 0; 1027 1028 /* 1029 * Now that the shifts and masks are sanitized, we can use the ffs_ API. 1030 */ 1031 1032 /* Check the number of frag blocks */ 1033 if ((fs_frag = ffs_numfrags(fs, fs->fs_bsize)) > MAXFRAG) 1034 return 0; 1035 if (fs->fs_frag != fs_frag) 1036 return 0; 1037 1038 /* Check the size of cylinder groups */ 1039 if ((fs->fs_cgsize < sizeof(struct cg)) || 1040 (fs->fs_cgsize > fs->fs_bsize)) 1041 return 0; 1042 1043 return 1; 1044 } 1045 1046 static int 1047 ffs_is_appleufs(struct vnode *devvp, struct fs *fs) 1048 { 1049 struct dkwedge_info dkw; 1050 int ret = 0; 1051 1052 /* 1053 * First check to see if this is tagged as an Apple UFS filesystem 1054 * in the disklabel. 1055 */ 1056 if (getdiskinfo(devvp, &dkw) == 0 && 1057 strcmp(dkw.dkw_ptype, DKW_PTYPE_APPLEUFS) == 0) 1058 ret = 1; 1059 #ifdef APPLE_UFS 1060 else { 1061 struct appleufslabel *applefs; 1062 struct buf *bp; 1063 daddr_t blkno = APPLEUFS_LABEL_OFFSET / DEV_BSIZE; 1064 int error; 1065 1066 /* 1067 * Manually look for an Apple UFS label, and if a valid one 1068 * is found, then treat it like an Apple UFS filesystem anyway. 1069 */ 1070 error = bread(devvp, blkno, APPLEUFS_LABEL_SIZE, 0, &bp); 1071 if (error) { 1072 DPRINTF("bread@0x%jx returned %d", (intmax_t)blkno, error); 1073 return 0; 1074 } 1075 applefs = (struct appleufslabel *)bp->b_data; 1076 error = ffs_appleufs_validate(fs->fs_fsmnt, applefs, NULL); 1077 if (error == 0) 1078 ret = 1; 1079 brelse(bp, 0); 1080 } 1081 #endif 1082 1083 return ret; 1084 } 1085 1086 /* 1087 * Common code for mount and mountroot 1088 */ 1089 int 1090 ffs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l) 1091 { 1092 struct ufsmount *ump = NULL; 1093 struct buf *bp = NULL; 1094 struct fs *fs = NULL; 1095 dev_t dev; 1096 void *space; 1097 daddr_t sblockloc = 0; 1098 int blks, fstype = 0; 1099 int error, i, bsize, ronly, bset = 0; 1100 #ifdef FFS_EI 1101 int needswap = 0; /* keep gcc happy */ 1102 #endif 1103 int32_t *lp; 1104 kauth_cred_t cred; 1105 u_int32_t allocsbsize, fs_sbsize = 0; 1106 1107 dev = devvp->v_rdev; 1108 cred = l ? l->l_cred : NOCRED; 1109 1110 /* Flush out any old buffers remaining from a previous use. */ 1111 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 1112 error = vinvalbuf(devvp, V_SAVE, cred, l, 0, 0); 1113 VOP_UNLOCK(devvp); 1114 if (error) { 1115 DPRINTF("vinvalbuf returned %d", error); 1116 return error; 1117 } 1118 1119 ronly = (mp->mnt_flag & MNT_RDONLY) != 0; 1120 1121 ump = kmem_zalloc(sizeof(*ump), KM_SLEEP); 1122 mutex_init(&ump->um_lock, MUTEX_DEFAULT, IPL_NONE); 1123 error = ffs_snapshot_init(ump); 1124 if (error) { 1125 DPRINTF("ffs_snapshot_init returned %d", error); 1126 goto out; 1127 } 1128 ump->um_ops = &ffs_ufsops; 1129 1130 #ifdef WAPBL 1131 sbagain: 1132 #endif 1133 /* 1134 * Try reading the superblock in each of its possible locations. 1135 */ 1136 for (i = 0; ; i++) { 1137 daddr_t fs_sblockloc; 1138 1139 if (bp != NULL) { 1140 brelse(bp, BC_NOCACHE); 1141 bp = NULL; 1142 } 1143 if (sblock_try[i] == -1) { 1144 DPRINTF("no superblock found"); 1145 error = EINVAL; 1146 fs = NULL; 1147 goto out; 1148 } 1149 1150 error = bread(devvp, sblock_try[i] / DEV_BSIZE, SBLOCKSIZE, 1151 0, &bp); 1152 if (error) { 1153 DPRINTF("bread@0x%x returned %d", 1154 sblock_try[i] / DEV_BSIZE, error); 1155 fs = NULL; 1156 goto out; 1157 } 1158 fs = (struct fs *)bp->b_data; 1159 1160 sblockloc = sblock_try[i]; 1161 DPRINTF("fs_magic 0x%x", fs->fs_magic); 1162 1163 /* 1164 * Swap: here, we swap fs->fs_sbsize in order to get the correct 1165 * size to read the superblock. Once read, we swap the whole 1166 * superblock structure. 1167 */ 1168 if (fs->fs_magic == FS_UFS1_MAGIC) { 1169 fs_sbsize = fs->fs_sbsize; 1170 fstype = UFS1; 1171 #ifdef FFS_EI 1172 needswap = 0; 1173 } else if (fs->fs_magic == FS_UFS1_MAGIC_SWAPPED) { 1174 fs_sbsize = bswap32(fs->fs_sbsize); 1175 fstype = UFS1; 1176 needswap = 1; 1177 #endif 1178 } else if (fs->fs_magic == FS_UFS2_MAGIC) { 1179 fs_sbsize = fs->fs_sbsize; 1180 fstype = UFS2; 1181 #ifdef FFS_EI 1182 needswap = 0; 1183 } else if (fs->fs_magic == FS_UFS2_MAGIC_SWAPPED) { 1184 fs_sbsize = bswap32(fs->fs_sbsize); 1185 fstype = UFS2; 1186 needswap = 1; 1187 #endif 1188 } else 1189 continue; 1190 1191 /* fs->fs_sblockloc isn't defined for old filesystems */ 1192 if (fstype == UFS1 && !(fs->fs_old_flags & FS_FLAGS_UPDATED)) { 1193 if (sblockloc == SBLOCK_UFS2) 1194 /* 1195 * This is likely to be the first alternate 1196 * in a filesystem with 64k blocks. 1197 * Don't use it. 1198 */ 1199 continue; 1200 fs_sblockloc = sblockloc; 1201 } else { 1202 fs_sblockloc = fs->fs_sblockloc; 1203 #ifdef FFS_EI 1204 if (needswap) 1205 fs_sblockloc = bswap64(fs_sblockloc); 1206 #endif 1207 } 1208 1209 /* Check we haven't found an alternate superblock */ 1210 if (fs_sblockloc != sblockloc) 1211 continue; 1212 1213 /* Check the superblock size */ 1214 if (fs_sbsize > SBLOCKSIZE || fs_sbsize < sizeof(struct fs)) 1215 continue; 1216 fs = kmem_alloc((u_long)fs_sbsize, KM_SLEEP); 1217 memcpy(fs, bp->b_data, fs_sbsize); 1218 1219 /* Swap the whole superblock structure, if necessary. */ 1220 #ifdef FFS_EI 1221 if (needswap) { 1222 ffs_sb_swap((struct fs*)bp->b_data, fs); 1223 fs->fs_flags |= FS_SWAPPED; 1224 } else 1225 #endif 1226 fs->fs_flags &= ~FS_SWAPPED; 1227 1228 /* 1229 * Now that everything is swapped, the superblock is ready to 1230 * be sanitized. 1231 */ 1232 if (!ffs_superblock_validate(fs)) { 1233 kmem_free(fs, fs_sbsize); 1234 continue; 1235 } 1236 1237 /* Ok seems to be a good superblock */ 1238 break; 1239 } 1240 1241 ump->um_fs = fs; 1242 1243 #ifdef WAPBL 1244 if ((mp->mnt_wapbl_replay == 0) && (fs->fs_flags & FS_DOWAPBL)) { 1245 error = ffs_wapbl_replay_start(mp, fs, devvp); 1246 if (error && (mp->mnt_flag & MNT_FORCE) == 0) { 1247 DPRINTF("ffs_wapbl_replay_start returned %d", error); 1248 goto out; 1249 } 1250 if (!error) { 1251 if (!ronly) { 1252 /* XXX fsmnt may be stale. */ 1253 printf("%s: replaying log to disk\n", 1254 fs->fs_fsmnt); 1255 error = wapbl_replay_write(mp->mnt_wapbl_replay, 1256 devvp); 1257 if (error) { 1258 DPRINTF("wapbl_replay_write returned %d", 1259 error); 1260 goto out; 1261 } 1262 wapbl_replay_stop(mp->mnt_wapbl_replay); 1263 fs->fs_clean = FS_WASCLEAN; 1264 } else { 1265 /* XXX fsmnt may be stale */ 1266 printf("%s: replaying log to memory\n", 1267 fs->fs_fsmnt); 1268 } 1269 1270 /* Force a re-read of the superblock */ 1271 brelse(bp, BC_INVAL); 1272 bp = NULL; 1273 kmem_free(fs, fs_sbsize); 1274 fs = NULL; 1275 goto sbagain; 1276 } 1277 } 1278 #else /* !WAPBL */ 1279 if ((fs->fs_flags & FS_DOWAPBL) && (mp->mnt_flag & MNT_FORCE) == 0) { 1280 error = EPERM; 1281 DPRINTF("no force %d", error); 1282 goto out; 1283 } 1284 #endif /* !WAPBL */ 1285 1286 ffs_oldfscompat_read(fs, ump, sblockloc); 1287 ump->um_maxfilesize = fs->fs_maxfilesize; 1288 1289 if (fs->fs_flags & ~(FS_KNOWN_FLAGS | FS_INTERNAL)) { 1290 uprintf("%s: unknown ufs flags: 0x%08"PRIx32"%s\n", 1291 mp->mnt_stat.f_mntonname, fs->fs_flags, 1292 (mp->mnt_flag & MNT_FORCE) ? "" : ", not mounting"); 1293 if ((mp->mnt_flag & MNT_FORCE) == 0) { 1294 error = EINVAL; 1295 DPRINTF("no force %d", error); 1296 goto out; 1297 } 1298 } 1299 1300 if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) { 1301 fs->fs_pendingblocks = 0; 1302 fs->fs_pendinginodes = 0; 1303 } 1304 1305 ump->um_fstype = fstype; 1306 if (fs->fs_sbsize < SBLOCKSIZE) 1307 brelse(bp, BC_INVAL); 1308 else 1309 brelse(bp, 0); 1310 bp = NULL; 1311 1312 if (ffs_is_appleufs(devvp, fs)) { 1313 #ifdef APPLE_UFS 1314 ump->um_flags |= UFS_ISAPPLEUFS; 1315 #else 1316 DPRINTF("AppleUFS not supported"); 1317 error = EINVAL; 1318 goto out; 1319 #endif 1320 } 1321 1322 #if 0 1323 /* 1324 * XXX This code changes the behaviour of mounting dirty filesystems, to 1325 * XXX require "mount -f ..." to mount them. This doesn't match what 1326 * XXX mount(8) describes and is disabled for now. 1327 */ 1328 /* 1329 * If the file system is not clean, don't allow it to be mounted 1330 * unless MNT_FORCE is specified. (Note: MNT_FORCE is always set 1331 * for the root file system.) 1332 */ 1333 if (fs->fs_flags & FS_DOWAPBL) { 1334 /* 1335 * wapbl normally expects to be FS_WASCLEAN when the FS_DOWAPBL 1336 * bit is set, although there's a window in unmount where it 1337 * could be FS_ISCLEAN 1338 */ 1339 if ((mp->mnt_flag & MNT_FORCE) == 0 && 1340 (fs->fs_clean & (FS_WASCLEAN | FS_ISCLEAN)) == 0) { 1341 error = EPERM; 1342 goto out; 1343 } 1344 } else 1345 if ((fs->fs_clean & FS_ISCLEAN) == 0 && 1346 (mp->mnt_flag & MNT_FORCE) == 0) { 1347 error = EPERM; 1348 goto out; 1349 } 1350 #endif 1351 1352 /* 1353 * Verify that we can access the last block in the fs 1354 * if we're mounting read/write. 1355 */ 1356 if (!ronly) { 1357 error = bread(devvp, FFS_FSBTODB(fs, fs->fs_size - 1), 1358 fs->fs_fsize, 0, &bp); 1359 if (error) { 1360 DPRINTF("bread@0x%jx returned %d", 1361 (intmax_t)FFS_FSBTODB(fs, fs->fs_size - 1), 1362 error); 1363 bset = BC_INVAL; 1364 goto out; 1365 } 1366 if (bp->b_bcount != fs->fs_fsize) { 1367 DPRINTF("bcount %x != fsize %x", bp->b_bcount, 1368 fs->fs_fsize); 1369 error = EINVAL; 1370 bset = BC_INVAL; 1371 goto out; 1372 } 1373 brelse(bp, BC_INVAL); 1374 bp = NULL; 1375 } 1376 1377 fs->fs_ronly = ronly; 1378 /* Don't bump fs_clean if we're replaying journal */ 1379 if (!((fs->fs_flags & FS_DOWAPBL) && (fs->fs_clean & FS_WASCLEAN))) { 1380 if (ronly == 0) { 1381 fs->fs_clean <<= 1; 1382 fs->fs_fmod = 1; 1383 } 1384 } 1385 1386 bsize = fs->fs_cssize; 1387 blks = howmany(bsize, fs->fs_fsize); 1388 if (fs->fs_contigsumsize > 0) 1389 bsize += fs->fs_ncg * sizeof(int32_t); 1390 bsize += fs->fs_ncg * sizeof(*fs->fs_contigdirs); 1391 allocsbsize = bsize; 1392 space = kmem_alloc((u_long)allocsbsize, KM_SLEEP); 1393 fs->fs_csp = space; 1394 1395 for (i = 0; i < blks; i += fs->fs_frag) { 1396 bsize = fs->fs_bsize; 1397 if (i + fs->fs_frag > blks) 1398 bsize = (blks - i) * fs->fs_fsize; 1399 error = bread(devvp, FFS_FSBTODB(fs, fs->fs_csaddr + i), bsize, 1400 0, &bp); 1401 if (error) { 1402 DPRINTF("bread@0x%jx %d", 1403 (intmax_t)FFS_FSBTODB(fs, fs->fs_csaddr + i), 1404 error); 1405 goto out1; 1406 } 1407 #ifdef FFS_EI 1408 if (needswap) 1409 ffs_csum_swap((struct csum *)bp->b_data, 1410 (struct csum *)space, bsize); 1411 else 1412 #endif 1413 memcpy(space, bp->b_data, (u_int)bsize); 1414 1415 space = (char *)space + bsize; 1416 brelse(bp, 0); 1417 bp = NULL; 1418 } 1419 if (fs->fs_contigsumsize > 0) { 1420 fs->fs_maxcluster = lp = space; 1421 for (i = 0; i < fs->fs_ncg; i++) 1422 *lp++ = fs->fs_contigsumsize; 1423 space = lp; 1424 } 1425 bsize = fs->fs_ncg * sizeof(*fs->fs_contigdirs); 1426 fs->fs_contigdirs = space; 1427 space = (char *)space + bsize; 1428 memset(fs->fs_contigdirs, 0, bsize); 1429 1430 /* Compatibility for old filesystems - XXX */ 1431 if (fs->fs_avgfilesize <= 0) 1432 fs->fs_avgfilesize = AVFILESIZ; 1433 if (fs->fs_avgfpdir <= 0) 1434 fs->fs_avgfpdir = AFPDIR; 1435 fs->fs_active = NULL; 1436 1437 mp->mnt_data = ump; 1438 mp->mnt_stat.f_fsidx.__fsid_val[0] = (long)dev; 1439 mp->mnt_stat.f_fsidx.__fsid_val[1] = makefstype(MOUNT_FFS); 1440 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; 1441 mp->mnt_stat.f_namemax = FFS_MAXNAMLEN; 1442 if (UFS_MPISAPPLEUFS(ump)) { 1443 /* NeXT used to keep short symlinks in the inode even 1444 * when using FS_42INODEFMT. In that case fs->fs_maxsymlinklen 1445 * is probably -1, but we still need to be able to identify 1446 * short symlinks. 1447 */ 1448 ump->um_maxsymlinklen = APPLEUFS_MAXSYMLINKLEN; 1449 ump->um_dirblksiz = APPLEUFS_DIRBLKSIZ; 1450 mp->mnt_iflag |= IMNT_DTYPE; 1451 } else { 1452 ump->um_maxsymlinklen = fs->fs_maxsymlinklen; 1453 ump->um_dirblksiz = UFS_DIRBLKSIZ; 1454 if (ump->um_maxsymlinklen > 0) 1455 mp->mnt_iflag |= IMNT_DTYPE; 1456 else 1457 mp->mnt_iflag &= ~IMNT_DTYPE; 1458 } 1459 mp->mnt_fs_bshift = fs->fs_bshift; 1460 mp->mnt_dev_bshift = DEV_BSHIFT; /* XXX */ 1461 mp->mnt_flag |= MNT_LOCAL; 1462 mp->mnt_iflag |= IMNT_MPSAFE | IMNT_CAN_RWTORO; 1463 #ifdef FFS_EI 1464 if (needswap) 1465 ump->um_flags |= UFS_NEEDSWAP; 1466 #endif 1467 ump->um_mountp = mp; 1468 ump->um_dev = dev; 1469 ump->um_devvp = devvp; 1470 ump->um_nindir = fs->fs_nindir; 1471 ump->um_lognindir = ffs(fs->fs_nindir) - 1; 1472 ump->um_bptrtodb = fs->fs_fshift - DEV_BSHIFT; 1473 ump->um_seqinc = fs->fs_frag; 1474 for (i = 0; i < MAXQUOTAS; i++) 1475 ump->um_quotas[i] = NULLVP; 1476 spec_node_setmountedfs(devvp, mp); 1477 if (ronly == 0 && fs->fs_snapinum[0] != 0) 1478 ffs_snapshot_mount(mp); 1479 #ifdef WAPBL 1480 if (!ronly) { 1481 KDASSERT(fs->fs_ronly == 0); 1482 /* 1483 * ffs_wapbl_start() needs mp->mnt_stat initialised if it 1484 * needs to create a new log file in-filesystem. 1485 */ 1486 error = ffs_statvfs(mp, &mp->mnt_stat); 1487 if (error) { 1488 DPRINTF("ffs_statvfs returned %d", error); 1489 goto out1; 1490 } 1491 1492 error = ffs_wapbl_start(mp); 1493 if (error) { 1494 DPRINTF("ffs_wapbl_start returned %d", error); 1495 goto out1; 1496 } 1497 } 1498 #endif /* WAPBL */ 1499 if (ronly == 0) { 1500 #ifdef QUOTA2 1501 error = ffs_quota2_mount(mp); 1502 if (error) { 1503 DPRINTF("ffs_quota2_mount returned %d", error); 1504 goto out1; 1505 } 1506 #else 1507 if (fs->fs_flags & FS_DOQUOTA2) { 1508 ump->um_flags |= UFS_QUOTA2; 1509 uprintf("%s: options QUOTA2 not enabled%s\n", 1510 mp->mnt_stat.f_mntonname, 1511 (mp->mnt_flag & MNT_FORCE) ? "" : ", not mounting"); 1512 if ((mp->mnt_flag & MNT_FORCE) == 0) { 1513 error = EINVAL; 1514 DPRINTF("quota disabled %d", error); 1515 goto out1; 1516 } 1517 } 1518 #endif 1519 } 1520 1521 if (mp->mnt_flag & MNT_DISCARD) 1522 ump->um_discarddata = ffs_discard_init(devvp, fs); 1523 1524 return (0); 1525 out1: 1526 kmem_free(fs->fs_csp, allocsbsize); 1527 out: 1528 #ifdef WAPBL 1529 if (mp->mnt_wapbl_replay) { 1530 wapbl_replay_stop(mp->mnt_wapbl_replay); 1531 wapbl_replay_free(mp->mnt_wapbl_replay); 1532 mp->mnt_wapbl_replay = 0; 1533 } 1534 #endif 1535 1536 if (fs) 1537 kmem_free(fs, fs->fs_sbsize); 1538 spec_node_setmountedfs(devvp, NULL); 1539 if (bp) 1540 brelse(bp, bset); 1541 if (ump) { 1542 if (ump->um_oldfscompat) 1543 kmem_free(ump->um_oldfscompat, 512 + 3*sizeof(int32_t)); 1544 mutex_destroy(&ump->um_lock); 1545 kmem_free(ump, sizeof(*ump)); 1546 mp->mnt_data = NULL; 1547 } 1548 return (error); 1549 } 1550 1551 /* 1552 * Sanity checks for loading old filesystem superblocks. 1553 * See ffs_oldfscompat_write below for unwound actions. 1554 * 1555 * XXX - Parts get retired eventually. 1556 * Unfortunately new bits get added. 1557 */ 1558 static void 1559 ffs_oldfscompat_read(struct fs *fs, struct ufsmount *ump, daddr_t sblockloc) 1560 { 1561 off_t maxfilesize; 1562 int32_t *extrasave; 1563 1564 if ((fs->fs_magic != FS_UFS1_MAGIC) || 1565 (fs->fs_old_flags & FS_FLAGS_UPDATED)) 1566 return; 1567 1568 if (!ump->um_oldfscompat) 1569 ump->um_oldfscompat = kmem_alloc(512 + 3*sizeof(int32_t), 1570 KM_SLEEP); 1571 1572 memcpy(ump->um_oldfscompat, &fs->fs_old_postbl_start, 512); 1573 extrasave = ump->um_oldfscompat; 1574 extrasave += 512/sizeof(int32_t); 1575 extrasave[0] = fs->fs_old_npsect; 1576 extrasave[1] = fs->fs_old_interleave; 1577 extrasave[2] = fs->fs_old_trackskew; 1578 1579 /* These fields will be overwritten by their 1580 * original values in fs_oldfscompat_write, so it is harmless 1581 * to modify them here. 1582 */ 1583 fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir; 1584 fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree; 1585 fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree; 1586 fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree; 1587 1588 fs->fs_maxbsize = fs->fs_bsize; 1589 fs->fs_time = fs->fs_old_time; 1590 fs->fs_size = fs->fs_old_size; 1591 fs->fs_dsize = fs->fs_old_dsize; 1592 fs->fs_csaddr = fs->fs_old_csaddr; 1593 fs->fs_sblockloc = sblockloc; 1594 1595 fs->fs_flags = fs->fs_old_flags | (fs->fs_flags & FS_INTERNAL); 1596 1597 if (fs->fs_old_postblformat == FS_42POSTBLFMT) { 1598 fs->fs_old_nrpos = 8; 1599 fs->fs_old_npsect = fs->fs_old_nsect; 1600 fs->fs_old_interleave = 1; 1601 fs->fs_old_trackskew = 0; 1602 } 1603 1604 if (fs->fs_magic == FS_UFS1_MAGIC && 1605 fs->fs_old_inodefmt < FS_44INODEFMT) { 1606 fs->fs_maxfilesize = (u_quad_t) 1LL << 39; 1607 fs->fs_qbmask = ~fs->fs_bmask; 1608 fs->fs_qfmask = ~fs->fs_fmask; 1609 } 1610 1611 maxfilesize = (u_int64_t)0x80000000 * fs->fs_bsize - 1; 1612 if (fs->fs_maxfilesize > maxfilesize) 1613 fs->fs_maxfilesize = maxfilesize; 1614 1615 /* Compatibility for old filesystems */ 1616 if (fs->fs_avgfilesize <= 0) 1617 fs->fs_avgfilesize = AVFILESIZ; 1618 if (fs->fs_avgfpdir <= 0) 1619 fs->fs_avgfpdir = AFPDIR; 1620 1621 #if 0 1622 if (bigcgs) { 1623 fs->fs_save_cgsize = fs->fs_cgsize; 1624 fs->fs_cgsize = fs->fs_bsize; 1625 } 1626 #endif 1627 } 1628 1629 /* 1630 * Unwinding superblock updates for old filesystems. 1631 * See ffs_oldfscompat_read above for details. 1632 * 1633 * XXX - Parts get retired eventually. 1634 * Unfortunately new bits get added. 1635 */ 1636 static void 1637 ffs_oldfscompat_write(struct fs *fs, struct ufsmount *ump) 1638 { 1639 int32_t *extrasave; 1640 1641 if ((fs->fs_magic != FS_UFS1_MAGIC) || 1642 (fs->fs_old_flags & FS_FLAGS_UPDATED)) 1643 return; 1644 1645 fs->fs_old_time = fs->fs_time; 1646 fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir; 1647 fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree; 1648 fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree; 1649 fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree; 1650 fs->fs_old_flags = fs->fs_flags; 1651 1652 #if 0 1653 if (bigcgs) { 1654 fs->fs_cgsize = fs->fs_save_cgsize; 1655 } 1656 #endif 1657 1658 memcpy(&fs->fs_old_postbl_start, ump->um_oldfscompat, 512); 1659 extrasave = ump->um_oldfscompat; 1660 extrasave += 512/sizeof(int32_t); 1661 fs->fs_old_npsect = extrasave[0]; 1662 fs->fs_old_interleave = extrasave[1]; 1663 fs->fs_old_trackskew = extrasave[2]; 1664 1665 } 1666 1667 /* 1668 * unmount vfs operation 1669 */ 1670 int 1671 ffs_unmount(struct mount *mp, int mntflags) 1672 { 1673 struct lwp *l = curlwp; 1674 struct ufsmount *ump = VFSTOUFS(mp); 1675 struct fs *fs = ump->um_fs; 1676 int error, flags; 1677 u_int32_t bsize; 1678 #ifdef WAPBL 1679 extern int doforce; 1680 #endif 1681 1682 if (ump->um_discarddata) { 1683 ffs_discard_finish(ump->um_discarddata, mntflags); 1684 ump->um_discarddata = NULL; 1685 } 1686 1687 flags = 0; 1688 if (mntflags & MNT_FORCE) 1689 flags |= FORCECLOSE; 1690 if ((error = ffs_flushfiles(mp, flags, l)) != 0) 1691 return (error); 1692 error = UFS_WAPBL_BEGIN(mp); 1693 if (error == 0) 1694 if (fs->fs_ronly == 0 && 1695 ffs_cgupdate(ump, MNT_WAIT) == 0 && 1696 fs->fs_clean & FS_WASCLEAN) { 1697 fs->fs_clean = FS_ISCLEAN; 1698 fs->fs_fmod = 0; 1699 (void) ffs_sbupdate(ump, MNT_WAIT); 1700 } 1701 if (error == 0) 1702 UFS_WAPBL_END(mp); 1703 #ifdef WAPBL 1704 KASSERT(!(mp->mnt_wapbl_replay && mp->mnt_wapbl)); 1705 if (mp->mnt_wapbl_replay) { 1706 KDASSERT(fs->fs_ronly); 1707 wapbl_replay_stop(mp->mnt_wapbl_replay); 1708 wapbl_replay_free(mp->mnt_wapbl_replay); 1709 mp->mnt_wapbl_replay = 0; 1710 } 1711 error = ffs_wapbl_stop(mp, doforce && (mntflags & MNT_FORCE)); 1712 if (error) { 1713 return error; 1714 } 1715 #endif /* WAPBL */ 1716 1717 if (ump->um_devvp->v_type != VBAD) 1718 spec_node_setmountedfs(ump->um_devvp, NULL); 1719 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); 1720 (void)VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD | FWRITE, 1721 NOCRED); 1722 vput(ump->um_devvp); 1723 1724 bsize = fs->fs_cssize; 1725 if (fs->fs_contigsumsize > 0) 1726 bsize += fs->fs_ncg * sizeof(int32_t); 1727 bsize += fs->fs_ncg * sizeof(*fs->fs_contigdirs); 1728 kmem_free(fs->fs_csp, bsize); 1729 1730 kmem_free(fs, fs->fs_sbsize); 1731 if (ump->um_oldfscompat != NULL) 1732 kmem_free(ump->um_oldfscompat, 512 + 3*sizeof(int32_t)); 1733 mutex_destroy(&ump->um_lock); 1734 ffs_snapshot_fini(ump); 1735 kmem_free(ump, sizeof(*ump)); 1736 mp->mnt_data = NULL; 1737 mp->mnt_flag &= ~MNT_LOCAL; 1738 return (0); 1739 } 1740 1741 /* 1742 * Flush out all the files in a filesystem. 1743 */ 1744 int 1745 ffs_flushfiles(struct mount *mp, int flags, struct lwp *l) 1746 { 1747 extern int doforce; 1748 struct ufsmount *ump; 1749 int error; 1750 1751 if (!doforce) 1752 flags &= ~FORCECLOSE; 1753 ump = VFSTOUFS(mp); 1754 #ifdef QUOTA 1755 if ((error = quota1_umount(mp, flags)) != 0) 1756 return (error); 1757 #endif 1758 #ifdef QUOTA2 1759 if ((error = quota2_umount(mp, flags)) != 0) 1760 return (error); 1761 #endif 1762 #ifdef UFS_EXTATTR 1763 if (ump->um_fstype == UFS1) { 1764 if (ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED) 1765 ufs_extattr_stop(mp, l); 1766 if (ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_INITIALIZED) 1767 ufs_extattr_uepm_destroy(&ump->um_extattr); 1768 mp->mnt_flag &= ~MNT_EXTATTR; 1769 } 1770 #endif 1771 if ((error = vflush(mp, 0, SKIPSYSTEM | flags)) != 0) 1772 return (error); 1773 ffs_snapshot_unmount(mp); 1774 /* 1775 * Flush all the files. 1776 */ 1777 error = vflush(mp, NULLVP, flags); 1778 if (error) 1779 return (error); 1780 /* 1781 * Flush filesystem metadata. 1782 */ 1783 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); 1784 error = VOP_FSYNC(ump->um_devvp, l->l_cred, FSYNC_WAIT, 0, 0); 1785 VOP_UNLOCK(ump->um_devvp); 1786 if (flags & FORCECLOSE) /* XXXDBJ */ 1787 error = 0; 1788 1789 #ifdef WAPBL 1790 if (error) 1791 return error; 1792 if (mp->mnt_wapbl) { 1793 error = wapbl_flush(mp->mnt_wapbl, 1); 1794 if (flags & FORCECLOSE) 1795 error = 0; 1796 } 1797 #endif 1798 1799 return (error); 1800 } 1801 1802 /* 1803 * Get file system statistics. 1804 */ 1805 int 1806 ffs_statvfs(struct mount *mp, struct statvfs *sbp) 1807 { 1808 struct ufsmount *ump; 1809 struct fs *fs; 1810 1811 ump = VFSTOUFS(mp); 1812 fs = ump->um_fs; 1813 mutex_enter(&ump->um_lock); 1814 sbp->f_bsize = fs->fs_bsize; 1815 sbp->f_frsize = fs->fs_fsize; 1816 sbp->f_iosize = fs->fs_bsize; 1817 sbp->f_blocks = fs->fs_dsize; 1818 sbp->f_bfree = ffs_blkstofrags(fs, fs->fs_cstotal.cs_nbfree) + 1819 fs->fs_cstotal.cs_nffree + FFS_DBTOFSB(fs, fs->fs_pendingblocks); 1820 sbp->f_bresvd = ((u_int64_t) fs->fs_dsize * (u_int64_t) 1821 fs->fs_minfree) / (u_int64_t) 100; 1822 if (sbp->f_bfree > sbp->f_bresvd) 1823 sbp->f_bavail = sbp->f_bfree - sbp->f_bresvd; 1824 else 1825 sbp->f_bavail = 0; 1826 sbp->f_files = fs->fs_ncg * fs->fs_ipg - UFS_ROOTINO; 1827 sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes; 1828 sbp->f_favail = sbp->f_ffree; 1829 sbp->f_fresvd = 0; 1830 mutex_exit(&ump->um_lock); 1831 copy_statvfs_info(sbp, mp); 1832 1833 return (0); 1834 } 1835 1836 struct ffs_sync_ctx { 1837 int waitfor; 1838 }; 1839 1840 static bool 1841 ffs_sync_selector(void *cl, struct vnode *vp) 1842 { 1843 struct ffs_sync_ctx *c = cl; 1844 struct inode *ip; 1845 1846 ip = VTOI(vp); 1847 /* 1848 * Skip the vnode/inode if inaccessible. 1849 */ 1850 if (ip == NULL || vp->v_type == VNON) 1851 return false; 1852 1853 /* 1854 * We deliberately update inode times here. This will 1855 * prevent a massive queue of updates accumulating, only 1856 * to be handled by a call to unmount. 1857 * 1858 * XXX It would be better to have the syncer trickle these 1859 * out. Adjustment needed to allow registering vnodes for 1860 * sync when the vnode is clean, but the inode dirty. Or 1861 * have ufs itself trickle out inode updates. 1862 * 1863 * If doing a lazy sync, we don't care about metadata or 1864 * data updates, because they are handled by each vnode's 1865 * synclist entry. In this case we are only interested in 1866 * writing back modified inodes. 1867 */ 1868 if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE | 1869 IN_MODIFY | IN_MODIFIED | IN_ACCESSED)) == 0 && 1870 (c->waitfor == MNT_LAZY || (LIST_EMPTY(&vp->v_dirtyblkhd) && 1871 UVM_OBJ_IS_CLEAN(&vp->v_uobj)))) 1872 return false; 1873 1874 return true; 1875 } 1876 1877 /* 1878 * Go through the disk queues to initiate sandbagged IO; 1879 * go through the inodes to write those that have been modified; 1880 * initiate the writing of the super block if it has been modified. 1881 * 1882 * Note: we are always called with the filesystem marked `MPBUSY'. 1883 */ 1884 int 1885 ffs_sync(struct mount *mp, int waitfor, kauth_cred_t cred) 1886 { 1887 struct vnode *vp; 1888 struct ufsmount *ump = VFSTOUFS(mp); 1889 struct fs *fs; 1890 struct vnode_iterator *marker; 1891 int error, allerror = 0; 1892 struct ffs_sync_ctx ctx; 1893 1894 fs = ump->um_fs; 1895 if (fs->fs_fmod != 0 && fs->fs_ronly != 0) { /* XXX */ 1896 panic("%s: rofs mod, fs=%s", __func__, fs->fs_fsmnt); 1897 } 1898 1899 /* 1900 * Write back each (modified) inode. 1901 */ 1902 vfs_vnode_iterator_init(mp, &marker); 1903 1904 ctx.waitfor = waitfor; 1905 while ((vp = vfs_vnode_iterator_next(marker, ffs_sync_selector, &ctx))) 1906 { 1907 error = vn_lock(vp, 1908 LK_EXCLUSIVE | (waitfor == MNT_LAZY ? LK_NOWAIT : 0)); 1909 if (error) { 1910 vrele(vp); 1911 continue; 1912 } 1913 if (waitfor == MNT_LAZY) { 1914 error = UFS_WAPBL_BEGIN(vp->v_mount); 1915 if (!error) { 1916 error = ffs_update(vp, NULL, NULL, 1917 UPDATE_CLOSE); 1918 UFS_WAPBL_END(vp->v_mount); 1919 } 1920 } else { 1921 error = VOP_FSYNC(vp, cred, FSYNC_NOLOG | 1922 (waitfor == MNT_WAIT ? FSYNC_WAIT : 0), 0, 0); 1923 } 1924 if (error) 1925 allerror = error; 1926 vput(vp); 1927 } 1928 vfs_vnode_iterator_destroy(marker); 1929 1930 /* 1931 * Force stale file system control information to be flushed. 1932 */ 1933 if (waitfor != MNT_LAZY && (ump->um_devvp->v_numoutput > 0 || 1934 !LIST_EMPTY(&ump->um_devvp->v_dirtyblkhd))) { 1935 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); 1936 if ((error = VOP_FSYNC(ump->um_devvp, cred, 1937 (waitfor == MNT_WAIT ? FSYNC_WAIT : 0) | FSYNC_NOLOG, 1938 0, 0)) != 0) 1939 allerror = error; 1940 VOP_UNLOCK(ump->um_devvp); 1941 } 1942 #if defined(QUOTA) || defined(QUOTA2) 1943 qsync(mp); 1944 #endif 1945 /* 1946 * Write back modified superblock. 1947 */ 1948 if (fs->fs_fmod != 0) { 1949 fs->fs_fmod = 0; 1950 fs->fs_time = time_second; 1951 error = UFS_WAPBL_BEGIN(mp); 1952 if (error) 1953 allerror = error; 1954 else { 1955 if ((error = ffs_cgupdate(ump, waitfor))) 1956 allerror = error; 1957 UFS_WAPBL_END(mp); 1958 } 1959 } 1960 1961 #ifdef WAPBL 1962 if (mp->mnt_wapbl) { 1963 error = wapbl_flush(mp->mnt_wapbl, (waitfor == MNT_WAIT)); 1964 if (error) 1965 allerror = error; 1966 } 1967 #endif 1968 1969 return (allerror); 1970 } 1971 1972 /* 1973 * Load inode from disk and initialize vnode. 1974 */ 1975 static int 1976 ffs_init_vnode(struct ufsmount *ump, struct vnode *vp, ino_t ino) 1977 { 1978 struct fs *fs; 1979 struct inode *ip; 1980 struct buf *bp; 1981 int error; 1982 1983 fs = ump->um_fs; 1984 1985 /* Read in the disk contents for the inode. */ 1986 error = bread(ump->um_devvp, FFS_FSBTODB(fs, ino_to_fsba(fs, ino)), 1987 (int)fs->fs_bsize, 0, &bp); 1988 if (error) 1989 return error; 1990 1991 /* Allocate and initialize inode. */ 1992 ip = pool_cache_get(ffs_inode_cache, PR_WAITOK); 1993 memset(ip, 0, sizeof(struct inode)); 1994 ip->i_ump = ump; 1995 ip->i_fs = fs; 1996 ip->i_dev = ump->um_dev; 1997 ip->i_number = ino; 1998 if (ump->um_fstype == UFS1) 1999 ip->i_din.ffs1_din = pool_cache_get(ffs_dinode1_cache, 2000 PR_WAITOK); 2001 else 2002 ip->i_din.ffs2_din = pool_cache_get(ffs_dinode2_cache, 2003 PR_WAITOK); 2004 ffs_load_inode(bp, ip, fs, ino); 2005 brelse(bp, 0); 2006 ip->i_vnode = vp; 2007 #if defined(QUOTA) || defined(QUOTA2) 2008 ufsquota_init(ip); 2009 #endif 2010 2011 /* Initialise vnode with this inode. */ 2012 vp->v_tag = VT_UFS; 2013 vp->v_op = ffs_vnodeop_p; 2014 vp->v_vflag |= VV_LOCKSWORK; 2015 vp->v_data = ip; 2016 2017 /* Initialize genfs node. */ 2018 genfs_node_init(vp, &ffs_genfsops); 2019 2020 return 0; 2021 } 2022 2023 /* 2024 * Undo ffs_init_vnode(). 2025 */ 2026 static void 2027 ffs_deinit_vnode(struct ufsmount *ump, struct vnode *vp) 2028 { 2029 struct inode *ip = VTOI(vp); 2030 2031 if (ump->um_fstype == UFS1) 2032 pool_cache_put(ffs_dinode1_cache, ip->i_din.ffs1_din); 2033 else 2034 pool_cache_put(ffs_dinode2_cache, ip->i_din.ffs2_din); 2035 pool_cache_put(ffs_inode_cache, ip); 2036 2037 genfs_node_destroy(vp); 2038 vp->v_data = NULL; 2039 } 2040 2041 /* 2042 * Read an inode from disk and initialize this vnode / inode pair. 2043 * Caller assures no other thread will try to load this inode. 2044 */ 2045 int 2046 ffs_loadvnode(struct mount *mp, struct vnode *vp, 2047 const void *key, size_t key_len, const void **new_key) 2048 { 2049 ino_t ino; 2050 struct fs *fs; 2051 struct inode *ip; 2052 struct ufsmount *ump; 2053 int error; 2054 2055 KASSERT(key_len == sizeof(ino)); 2056 memcpy(&ino, key, key_len); 2057 ump = VFSTOUFS(mp); 2058 fs = ump->um_fs; 2059 2060 error = ffs_init_vnode(ump, vp, ino); 2061 if (error) 2062 return error; 2063 2064 ip = VTOI(vp); 2065 if (ip->i_mode == 0) { 2066 ffs_deinit_vnode(ump, vp); 2067 2068 return ENOENT; 2069 } 2070 2071 /* Initialize the vnode from the inode. */ 2072 ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp); 2073 2074 /* Finish inode initialization. */ 2075 ip->i_devvp = ump->um_devvp; 2076 vref(ip->i_devvp); 2077 2078 /* 2079 * Ensure that uid and gid are correct. This is a temporary 2080 * fix until fsck has been changed to do the update. 2081 */ 2082 2083 if (fs->fs_magic == FS_UFS1_MAGIC && /* XXX */ 2084 fs->fs_old_inodefmt < FS_44INODEFMT) { /* XXX */ 2085 ip->i_uid = ip->i_ffs1_ouid; /* XXX */ 2086 ip->i_gid = ip->i_ffs1_ogid; /* XXX */ 2087 } /* XXX */ 2088 uvm_vnp_setsize(vp, ip->i_size); 2089 *new_key = &ip->i_number; 2090 return 0; 2091 } 2092 2093 /* 2094 * Create a new inode on disk and initialize this vnode / inode pair. 2095 */ 2096 int 2097 ffs_newvnode(struct mount *mp, struct vnode *dvp, struct vnode *vp, 2098 struct vattr *vap, kauth_cred_t cred, 2099 size_t *key_len, const void **new_key) 2100 { 2101 ino_t ino; 2102 struct fs *fs; 2103 struct inode *ip; 2104 struct timespec ts; 2105 struct ufsmount *ump; 2106 int error, mode; 2107 2108 KASSERT(dvp->v_mount == mp); 2109 KASSERT(vap->va_type != VNON); 2110 2111 *key_len = sizeof(ino); 2112 ump = VFSTOUFS(mp); 2113 fs = ump->um_fs; 2114 mode = MAKEIMODE(vap->va_type, vap->va_mode); 2115 2116 /* Allocate fresh inode. */ 2117 error = ffs_valloc(dvp, mode, cred, &ino); 2118 if (error) 2119 return error; 2120 2121 /* Attach inode to vnode. */ 2122 error = ffs_init_vnode(ump, vp, ino); 2123 if (error) { 2124 if (UFS_WAPBL_BEGIN(mp) == 0) { 2125 ffs_vfree(dvp, ino, mode); 2126 UFS_WAPBL_END(mp); 2127 } 2128 return error; 2129 } 2130 2131 ip = VTOI(vp); 2132 if (ip->i_mode) { 2133 panic("%s: dup alloc ino=%" PRId64 " on %s: mode %x/%x " 2134 "gen %x/%x size %" PRIx64 " blocks %" PRIx64, 2135 __func__, ino, fs->fs_fsmnt, DIP(ip, mode), ip->i_mode, 2136 DIP(ip, gen), ip->i_gen, DIP(ip, size), DIP(ip, blocks)); 2137 } 2138 if (DIP(ip, size) || DIP(ip, blocks)) { 2139 printf("%s: ino=%" PRId64 " on %s: " 2140 "gen %x/%x has non zero blocks %" PRIx64 " or size %" 2141 PRIx64 "\n", 2142 __func__, ino, fs->fs_fsmnt, DIP(ip, gen), ip->i_gen, 2143 DIP(ip, blocks), DIP(ip, size)); 2144 if ((ip)->i_ump->um_fstype == UFS1) 2145 panic("%s: dirty filesystem?", __func__); 2146 DIP_ASSIGN(ip, blocks, 0); 2147 DIP_ASSIGN(ip, size, 0); 2148 } 2149 2150 /* Set uid / gid. */ 2151 if (cred == NOCRED || cred == FSCRED) { 2152 ip->i_gid = 0; 2153 ip->i_uid = 0; 2154 } else { 2155 ip->i_gid = VTOI(dvp)->i_gid; 2156 ip->i_uid = kauth_cred_geteuid(cred); 2157 } 2158 DIP_ASSIGN(ip, gid, ip->i_gid); 2159 DIP_ASSIGN(ip, uid, ip->i_uid); 2160 2161 #if defined(QUOTA) || defined(QUOTA2) 2162 error = UFS_WAPBL_BEGIN(mp); 2163 if (error) { 2164 ffs_deinit_vnode(ump, vp); 2165 2166 return error; 2167 } 2168 error = chkiq(ip, 1, cred, 0); 2169 if (error) { 2170 ffs_vfree(dvp, ino, mode); 2171 UFS_WAPBL_END(mp); 2172 ffs_deinit_vnode(ump, vp); 2173 2174 return error; 2175 } 2176 UFS_WAPBL_END(mp); 2177 #endif 2178 2179 /* Set type and finalize. */ 2180 ip->i_flags = 0; 2181 DIP_ASSIGN(ip, flags, 0); 2182 ip->i_mode = mode; 2183 DIP_ASSIGN(ip, mode, mode); 2184 if (vap->va_rdev != VNOVAL) { 2185 /* 2186 * Want to be able to use this to make badblock 2187 * inodes, so don't truncate the dev number. 2188 */ 2189 if (ump->um_fstype == UFS1) 2190 ip->i_ffs1_rdev = ufs_rw32(vap->va_rdev, 2191 UFS_MPNEEDSWAP(ump)); 2192 else 2193 ip->i_ffs2_rdev = ufs_rw64(vap->va_rdev, 2194 UFS_MPNEEDSWAP(ump)); 2195 } 2196 ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp); 2197 ip->i_devvp = ump->um_devvp; 2198 vref(ip->i_devvp); 2199 2200 /* Set up a new generation number for this inode. */ 2201 ip->i_gen++; 2202 DIP_ASSIGN(ip, gen, ip->i_gen); 2203 if (fs->fs_magic == FS_UFS2_MAGIC) { 2204 vfs_timestamp(&ts); 2205 ip->i_ffs2_birthtime = ts.tv_sec; 2206 ip->i_ffs2_birthnsec = ts.tv_nsec; 2207 } 2208 2209 uvm_vnp_setsize(vp, ip->i_size); 2210 *new_key = &ip->i_number; 2211 return 0; 2212 } 2213 2214 /* 2215 * File handle to vnode 2216 * 2217 * Have to be really careful about stale file handles: 2218 * - check that the inode number is valid 2219 * - call ffs_vget() to get the locked inode 2220 * - check for an unallocated inode (i_mode == 0) 2221 * - check that the given client host has export rights and return 2222 * those rights via. exflagsp and credanonp 2223 */ 2224 int 2225 ffs_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp) 2226 { 2227 struct ufid ufh; 2228 int error; 2229 2230 if (fhp->fid_len != sizeof(struct ufid)) 2231 return EINVAL; 2232 2233 memcpy(&ufh, fhp, sizeof(ufh)); 2234 if ((error = ffs_checkrange(mp, ufh.ufid_ino)) != 0) 2235 return error; 2236 2237 return (ufs_fhtovp(mp, &ufh, vpp)); 2238 } 2239 2240 /* 2241 * Vnode pointer to File handle 2242 */ 2243 /* ARGSUSED */ 2244 int 2245 ffs_vptofh(struct vnode *vp, struct fid *fhp, size_t *fh_size) 2246 { 2247 struct inode *ip; 2248 struct ufid ufh; 2249 2250 if (*fh_size < sizeof(struct ufid)) { 2251 *fh_size = sizeof(struct ufid); 2252 return E2BIG; 2253 } 2254 ip = VTOI(vp); 2255 *fh_size = sizeof(struct ufid); 2256 memset(&ufh, 0, sizeof(ufh)); 2257 ufh.ufid_len = sizeof(struct ufid); 2258 ufh.ufid_ino = ip->i_number; 2259 ufh.ufid_gen = ip->i_gen; 2260 memcpy(fhp, &ufh, sizeof(ufh)); 2261 return (0); 2262 } 2263 2264 void 2265 ffs_init(void) 2266 { 2267 if (ffs_initcount++ > 0) 2268 return; 2269 2270 ffs_inode_cache = pool_cache_init(sizeof(struct inode), 0, 0, 0, 2271 "ffsino", NULL, IPL_NONE, NULL, NULL, NULL); 2272 ffs_dinode1_cache = pool_cache_init(sizeof(struct ufs1_dinode), 0, 0, 0, 2273 "ffsdino1", NULL, IPL_NONE, NULL, NULL, NULL); 2274 ffs_dinode2_cache = pool_cache_init(sizeof(struct ufs2_dinode), 0, 0, 0, 2275 "ffsdino2", NULL, IPL_NONE, NULL, NULL, NULL); 2276 ufs_init(); 2277 } 2278 2279 void 2280 ffs_reinit(void) 2281 { 2282 ufs_reinit(); 2283 } 2284 2285 void 2286 ffs_done(void) 2287 { 2288 if (--ffs_initcount > 0) 2289 return; 2290 2291 ufs_done(); 2292 pool_cache_destroy(ffs_dinode2_cache); 2293 pool_cache_destroy(ffs_dinode1_cache); 2294 pool_cache_destroy(ffs_inode_cache); 2295 } 2296 2297 /* 2298 * Write a superblock and associated information back to disk. 2299 */ 2300 int 2301 ffs_sbupdate(struct ufsmount *mp, int waitfor) 2302 { 2303 struct fs *fs = mp->um_fs; 2304 struct buf *bp; 2305 int error; 2306 u_int32_t saveflag; 2307 2308 error = ffs_getblk(mp->um_devvp, 2309 fs->fs_sblockloc / DEV_BSIZE, FFS_NOBLK, 2310 fs->fs_sbsize, false, &bp); 2311 if (error) 2312 return error; 2313 saveflag = fs->fs_flags & FS_INTERNAL; 2314 fs->fs_flags &= ~FS_INTERNAL; 2315 2316 memcpy(bp->b_data, fs, fs->fs_sbsize); 2317 2318 ffs_oldfscompat_write((struct fs *)bp->b_data, mp); 2319 #ifdef FFS_EI 2320 if (mp->um_flags & UFS_NEEDSWAP) 2321 ffs_sb_swap((struct fs *)bp->b_data, (struct fs *)bp->b_data); 2322 #endif 2323 fs->fs_flags |= saveflag; 2324 2325 if (waitfor == MNT_WAIT) 2326 error = bwrite(bp); 2327 else 2328 bawrite(bp); 2329 return (error); 2330 } 2331 2332 int 2333 ffs_cgupdate(struct ufsmount *mp, int waitfor) 2334 { 2335 struct fs *fs = mp->um_fs; 2336 struct buf *bp; 2337 int blks; 2338 void *space; 2339 int i, size, error = 0, allerror = 0; 2340 2341 UFS_WAPBL_JLOCK_ASSERT(mp); 2342 2343 allerror = ffs_sbupdate(mp, waitfor); 2344 blks = howmany(fs->fs_cssize, fs->fs_fsize); 2345 space = fs->fs_csp; 2346 for (i = 0; i < blks; i += fs->fs_frag) { 2347 size = fs->fs_bsize; 2348 if (i + fs->fs_frag > blks) 2349 size = (blks - i) * fs->fs_fsize; 2350 error = ffs_getblk(mp->um_devvp, FFS_FSBTODB(fs, fs->fs_csaddr + i), 2351 FFS_NOBLK, size, false, &bp); 2352 if (error) 2353 break; 2354 #ifdef FFS_EI 2355 if (mp->um_flags & UFS_NEEDSWAP) 2356 ffs_csum_swap((struct csum*)space, 2357 (struct csum*)bp->b_data, size); 2358 else 2359 #endif 2360 memcpy(bp->b_data, space, (u_int)size); 2361 space = (char *)space + size; 2362 if (waitfor == MNT_WAIT) 2363 error = bwrite(bp); 2364 else 2365 bawrite(bp); 2366 } 2367 if (!allerror && error) 2368 allerror = error; 2369 return (allerror); 2370 } 2371 2372 int 2373 ffs_extattrctl(struct mount *mp, int cmd, struct vnode *vp, 2374 int attrnamespace, const char *attrname) 2375 { 2376 #ifdef UFS_EXTATTR 2377 /* 2378 * File-backed extended attributes are only supported on UFS1. 2379 * UFS2 has native extended attributes. 2380 */ 2381 if (VFSTOUFS(mp)->um_fstype == UFS1) 2382 return (ufs_extattrctl(mp, cmd, vp, attrnamespace, attrname)); 2383 #endif 2384 return (vfs_stdextattrctl(mp, cmd, vp, attrnamespace, attrname)); 2385 } 2386 2387 /* 2388 * Synch vnode for a mounted file system. 2389 */ 2390 static int 2391 ffs_vfs_fsync(vnode_t *vp, int flags) 2392 { 2393 int error, i, pflags; 2394 #ifdef WAPBL 2395 struct mount *mp; 2396 #endif 2397 2398 KASSERT(vp->v_type == VBLK); 2399 KASSERT(spec_node_getmountedfs(vp) != NULL); 2400 2401 /* 2402 * Flush all dirty data associated with the vnode. 2403 */ 2404 pflags = PGO_ALLPAGES | PGO_CLEANIT; 2405 if ((flags & FSYNC_WAIT) != 0) 2406 pflags |= PGO_SYNCIO; 2407 mutex_enter(vp->v_interlock); 2408 error = VOP_PUTPAGES(vp, 0, 0, pflags); 2409 if (error) 2410 return error; 2411 2412 #ifdef WAPBL 2413 mp = spec_node_getmountedfs(vp); 2414 if (mp && mp->mnt_wapbl) { 2415 /* 2416 * Don't bother writing out metadata if the syncer is 2417 * making the request. We will let the sync vnode 2418 * write it out in a single burst through a call to 2419 * VFS_SYNC(). 2420 */ 2421 if ((flags & (FSYNC_DATAONLY | FSYNC_LAZY | FSYNC_NOLOG)) != 0) 2422 return 0; 2423 2424 /* 2425 * Don't flush the log if the vnode being flushed 2426 * contains no dirty buffers that could be in the log. 2427 */ 2428 if (!LIST_EMPTY(&vp->v_dirtyblkhd)) { 2429 error = wapbl_flush(mp->mnt_wapbl, 0); 2430 if (error) 2431 return error; 2432 } 2433 2434 if ((flags & FSYNC_WAIT) != 0) { 2435 mutex_enter(vp->v_interlock); 2436 while (vp->v_numoutput) 2437 cv_wait(&vp->v_cv, vp->v_interlock); 2438 mutex_exit(vp->v_interlock); 2439 } 2440 2441 return 0; 2442 } 2443 #endif /* WAPBL */ 2444 2445 error = vflushbuf(vp, flags); 2446 if (error == 0 && (flags & FSYNC_CACHE) != 0) { 2447 i = 1; 2448 (void)VOP_IOCTL(vp, DIOCCACHESYNC, &i, FWRITE, 2449 kauth_cred_get()); 2450 } 2451 2452 return error; 2453 } 2454