1 /* $NetBSD: ffs_vfsops.c,v 1.356 2018/01/28 10:02:00 hannken Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Wasabi Systems, Inc, and by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1989, 1991, 1993, 1994 34 * The Regents of the University of California. All rights reserved. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. Neither the name of the University nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 * 60 * @(#)ffs_vfsops.c 8.31 (Berkeley) 5/20/95 61 */ 62 63 #include <sys/cdefs.h> 64 __KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.356 2018/01/28 10:02:00 hannken Exp $"); 65 66 #if defined(_KERNEL_OPT) 67 #include "opt_ffs.h" 68 #include "opt_quota.h" 69 #include "opt_wapbl.h" 70 #endif 71 72 #include <sys/param.h> 73 #include <sys/systm.h> 74 #include <sys/namei.h> 75 #include <sys/proc.h> 76 #include <sys/kernel.h> 77 #include <sys/vnode.h> 78 #include <sys/socket.h> 79 #include <sys/mount.h> 80 #include <sys/buf.h> 81 #include <sys/device.h> 82 #include <sys/disk.h> 83 #include <sys/mbuf.h> 84 #include <sys/file.h> 85 #include <sys/disklabel.h> 86 #include <sys/ioctl.h> 87 #include <sys/errno.h> 88 #include <sys/kmem.h> 89 #include <sys/pool.h> 90 #include <sys/lock.h> 91 #include <sys/sysctl.h> 92 #include <sys/conf.h> 93 #include <sys/kauth.h> 94 #include <sys/wapbl.h> 95 #include <sys/module.h> 96 97 #include <miscfs/genfs/genfs.h> 98 #include <miscfs/specfs/specdev.h> 99 100 #include <ufs/ufs/quota.h> 101 #include <ufs/ufs/ufsmount.h> 102 #include <ufs/ufs/inode.h> 103 #include <ufs/ufs/dir.h> 104 #include <ufs/ufs/ufs_extern.h> 105 #include <ufs/ufs/ufs_bswap.h> 106 #include <ufs/ufs/ufs_wapbl.h> 107 108 #include <ufs/ffs/fs.h> 109 #include <ufs/ffs/ffs_extern.h> 110 111 #ifdef WAPBL 112 MODULE(MODULE_CLASS_VFS, ffs, "wapbl"); 113 #else 114 MODULE(MODULE_CLASS_VFS, ffs, NULL); 115 #endif 116 117 static int ffs_vfs_fsync(vnode_t *, int); 118 static int ffs_superblock_validate(struct fs *); 119 static int ffs_is_appleufs(struct vnode *, struct fs *); 120 121 static int ffs_init_vnode(struct ufsmount *, struct vnode *, ino_t); 122 static void ffs_deinit_vnode(struct ufsmount *, struct vnode *); 123 124 static struct sysctllog *ffs_sysctl_log; 125 126 static kauth_listener_t ffs_snapshot_listener; 127 128 /* how many times ffs_init() was called */ 129 int ffs_initcount = 0; 130 131 #ifdef DEBUG_FFS_MOUNT 132 #define DPRINTF(_fmt, args...) printf("%s: " _fmt "\n", __func__, ##args) 133 #else 134 #define DPRINTF(_fmt, args...) do {} while (/*CONSTCOND*/0) 135 #endif 136 137 extern const struct vnodeopv_desc ffs_vnodeop_opv_desc; 138 extern const struct vnodeopv_desc ffs_specop_opv_desc; 139 extern const struct vnodeopv_desc ffs_fifoop_opv_desc; 140 141 const struct vnodeopv_desc * const ffs_vnodeopv_descs[] = { 142 &ffs_vnodeop_opv_desc, 143 &ffs_specop_opv_desc, 144 &ffs_fifoop_opv_desc, 145 NULL, 146 }; 147 148 struct vfsops ffs_vfsops = { 149 .vfs_name = MOUNT_FFS, 150 .vfs_min_mount_data = sizeof (struct ufs_args), 151 .vfs_mount = ffs_mount, 152 .vfs_start = ufs_start, 153 .vfs_unmount = ffs_unmount, 154 .vfs_root = ufs_root, 155 .vfs_quotactl = ufs_quotactl, 156 .vfs_statvfs = ffs_statvfs, 157 .vfs_sync = ffs_sync, 158 .vfs_vget = ufs_vget, 159 .vfs_loadvnode = ffs_loadvnode, 160 .vfs_newvnode = ffs_newvnode, 161 .vfs_fhtovp = ffs_fhtovp, 162 .vfs_vptofh = ffs_vptofh, 163 .vfs_init = ffs_init, 164 .vfs_reinit = ffs_reinit, 165 .vfs_done = ffs_done, 166 .vfs_mountroot = ffs_mountroot, 167 .vfs_snapshot = ffs_snapshot, 168 .vfs_extattrctl = ffs_extattrctl, 169 .vfs_suspendctl = genfs_suspendctl, 170 .vfs_renamelock_enter = genfs_renamelock_enter, 171 .vfs_renamelock_exit = genfs_renamelock_exit, 172 .vfs_fsync = ffs_vfs_fsync, 173 .vfs_opv_descs = ffs_vnodeopv_descs 174 }; 175 176 static const struct genfs_ops ffs_genfsops = { 177 .gop_size = ffs_gop_size, 178 .gop_alloc = ufs_gop_alloc, 179 .gop_write = genfs_gop_write, 180 .gop_markupdate = ufs_gop_markupdate, 181 }; 182 183 static const struct ufs_ops ffs_ufsops = { 184 .uo_itimes = ffs_itimes, 185 .uo_update = ffs_update, 186 .uo_truncate = ffs_truncate, 187 .uo_balloc = ffs_balloc, 188 .uo_snapgone = ffs_snapgone, 189 .uo_bufrd = ffs_bufrd, 190 .uo_bufwr = ffs_bufwr, 191 }; 192 193 static int 194 ffs_checkrange(struct mount *mp, uint32_t ino) 195 { 196 struct fs *fs = VFSTOUFS(mp)->um_fs; 197 198 if (ino < UFS_ROOTINO || ino >= fs->fs_ncg * fs->fs_ipg) { 199 DPRINTF("out of range %u\n", ino); 200 return ESTALE; 201 } 202 203 /* 204 * Need to check if inode is initialized because ffsv2 does 205 * lazy initialization and we can get here from nfs_fhtovp 206 */ 207 if (fs->fs_magic != FS_UFS2_MAGIC) 208 return 0; 209 210 struct buf *bp; 211 int cg = ino_to_cg(fs, ino); 212 struct ufsmount *ump = VFSTOUFS(mp); 213 214 int error = bread(ump->um_devvp, FFS_FSBTODB(fs, cgtod(fs, cg)), 215 (int)fs->fs_cgsize, B_MODIFY, &bp); 216 if (error) { 217 DPRINTF("error %d reading cg %d ino %u\n", error, cg, ino); 218 return error; 219 } 220 221 const int needswap = UFS_FSNEEDSWAP(fs); 222 223 struct cg *cgp = (struct cg *)bp->b_data; 224 if (!cg_chkmagic(cgp, needswap)) { 225 brelse(bp, 0); 226 DPRINTF("bad cylinder group magic cg %d ino %u\n", cg, ino); 227 return ESTALE; 228 } 229 230 int32_t initediblk = ufs_rw32(cgp->cg_initediblk, needswap); 231 brelse(bp, 0); 232 233 if (cg * fs->fs_ipg + initediblk < ino) { 234 DPRINTF("cg=%d fs->fs_ipg=%d initediblk=%d ino=%u\n", 235 cg, fs->fs_ipg, initediblk, ino); 236 return ESTALE; 237 } 238 return 0; 239 } 240 241 static int 242 ffs_snapshot_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, 243 void *arg0, void *arg1, void *arg2, void *arg3) 244 { 245 vnode_t *vp = arg2; 246 int result = KAUTH_RESULT_DEFER; 247 248 if (action != KAUTH_SYSTEM_FS_SNAPSHOT) 249 return result; 250 251 if (VTOI(vp)->i_uid == kauth_cred_geteuid(cred)) 252 result = KAUTH_RESULT_ALLOW; 253 254 return result; 255 } 256 257 static int 258 ffs_modcmd(modcmd_t cmd, void *arg) 259 { 260 int error; 261 262 #if 0 263 extern int doasyncfree; 264 #endif 265 #ifdef UFS_EXTATTR 266 extern int ufs_extattr_autocreate; 267 #endif 268 extern int ffs_log_changeopt; 269 270 switch (cmd) { 271 case MODULE_CMD_INIT: 272 error = vfs_attach(&ffs_vfsops); 273 if (error != 0) 274 break; 275 276 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL, 277 CTLFLAG_PERMANENT, 278 CTLTYPE_NODE, "ffs", 279 SYSCTL_DESCR("Berkeley Fast File System"), 280 NULL, 0, NULL, 0, 281 CTL_VFS, 1, CTL_EOL); 282 /* 283 * @@@ should we even bother with these first three? 284 */ 285 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL, 286 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 287 CTLTYPE_INT, "doclusterread", NULL, 288 sysctl_notavail, 0, NULL, 0, 289 CTL_VFS, 1, FFS_CLUSTERREAD, CTL_EOL); 290 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL, 291 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 292 CTLTYPE_INT, "doclusterwrite", NULL, 293 sysctl_notavail, 0, NULL, 0, 294 CTL_VFS, 1, FFS_CLUSTERWRITE, CTL_EOL); 295 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL, 296 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 297 CTLTYPE_INT, "doreallocblks", NULL, 298 sysctl_notavail, 0, NULL, 0, 299 CTL_VFS, 1, FFS_REALLOCBLKS, CTL_EOL); 300 #if 0 301 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL, 302 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 303 CTLTYPE_INT, "doasyncfree", 304 SYSCTL_DESCR("Release dirty blocks asynchronously"), 305 NULL, 0, &doasyncfree, 0, 306 CTL_VFS, 1, FFS_ASYNCFREE, CTL_EOL); 307 #endif 308 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL, 309 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 310 CTLTYPE_INT, "log_changeopt", 311 SYSCTL_DESCR("Log changes in optimization strategy"), 312 NULL, 0, &ffs_log_changeopt, 0, 313 CTL_VFS, 1, FFS_LOG_CHANGEOPT, CTL_EOL); 314 #ifdef UFS_EXTATTR 315 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL, 316 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 317 CTLTYPE_INT, "extattr_autocreate", 318 SYSCTL_DESCR("Size of attribute for " 319 "backing file autocreation"), 320 NULL, 0, &ufs_extattr_autocreate, 0, 321 CTL_VFS, 1, FFS_EXTATTR_AUTOCREATE, CTL_EOL); 322 323 #endif /* UFS_EXTATTR */ 324 325 ffs_snapshot_listener = kauth_listen_scope(KAUTH_SCOPE_SYSTEM, 326 ffs_snapshot_cb, NULL); 327 if (ffs_snapshot_listener == NULL) 328 printf("ffs_modcmd: can't listen on system scope.\n"); 329 330 break; 331 case MODULE_CMD_FINI: 332 error = vfs_detach(&ffs_vfsops); 333 if (error != 0) 334 break; 335 sysctl_teardown(&ffs_sysctl_log); 336 if (ffs_snapshot_listener != NULL) 337 kauth_unlisten_scope(ffs_snapshot_listener); 338 break; 339 default: 340 error = ENOTTY; 341 break; 342 } 343 344 return (error); 345 } 346 347 pool_cache_t ffs_inode_cache; 348 pool_cache_t ffs_dinode1_cache; 349 pool_cache_t ffs_dinode2_cache; 350 351 static void ffs_oldfscompat_read(struct fs *, struct ufsmount *, daddr_t); 352 static void ffs_oldfscompat_write(struct fs *, struct ufsmount *); 353 354 /* 355 * Called by main() when ffs is going to be mounted as root. 356 */ 357 358 int 359 ffs_mountroot(void) 360 { 361 struct fs *fs; 362 struct mount *mp; 363 struct lwp *l = curlwp; /* XXX */ 364 struct ufsmount *ump; 365 int error; 366 367 if (device_class(root_device) != DV_DISK) 368 return (ENODEV); 369 370 if ((error = vfs_rootmountalloc(MOUNT_FFS, "root_device", &mp))) { 371 vrele(rootvp); 372 return (error); 373 } 374 375 /* 376 * We always need to be able to mount the root file system. 377 */ 378 mp->mnt_flag |= MNT_FORCE; 379 if ((error = ffs_mountfs(rootvp, mp, l)) != 0) { 380 vfs_unbusy(mp); 381 vfs_rele(mp); 382 return (error); 383 } 384 mp->mnt_flag &= ~MNT_FORCE; 385 mountlist_append(mp); 386 ump = VFSTOUFS(mp); 387 fs = ump->um_fs; 388 memset(fs->fs_fsmnt, 0, sizeof(fs->fs_fsmnt)); 389 (void)copystr(mp->mnt_stat.f_mntonname, fs->fs_fsmnt, MNAMELEN - 1, 0); 390 (void)ffs_statvfs(mp, &mp->mnt_stat); 391 vfs_unbusy(mp); 392 setrootfstime((time_t)fs->fs_time); 393 return (0); 394 } 395 396 /* 397 * VFS Operations. 398 * 399 * mount system call 400 */ 401 int 402 ffs_mount(struct mount *mp, const char *path, void *data, size_t *data_len) 403 { 404 struct lwp *l = curlwp; 405 struct vnode *devvp = NULL; 406 struct ufs_args *args = data; 407 struct ufsmount *ump = NULL; 408 struct fs *fs; 409 int error = 0, flags, update; 410 mode_t accessmode; 411 412 if (args == NULL) { 413 DPRINTF("NULL args"); 414 return EINVAL; 415 } 416 if (*data_len < sizeof(*args)) { 417 DPRINTF("bad size args %zu != %zu", *data_len, sizeof(*args)); 418 return EINVAL; 419 } 420 421 ump = VFSTOUFS(mp); 422 if ((mp->mnt_flag & (MNT_GETARGS|MNT_UPDATE)) && ump == NULL) { 423 DPRINTF("no ump"); 424 return EIO; 425 } 426 427 if (mp->mnt_flag & MNT_GETARGS) { 428 args->fspec = NULL; 429 *data_len = sizeof *args; 430 return 0; 431 } 432 433 update = mp->mnt_flag & MNT_UPDATE; 434 435 /* Check arguments */ 436 if (args->fspec == NULL) { 437 if (!update) { 438 /* New mounts must have a filename for the device */ 439 DPRINTF("no filename for mount"); 440 return EINVAL; 441 } 442 } else { 443 /* 444 * Look up the name and verify that it's sane. 445 */ 446 error = namei_simple_user(args->fspec, 447 NSM_FOLLOW_NOEMULROOT, &devvp); 448 if (error != 0) { 449 DPRINTF("namei_simple_user returned %d", error); 450 return error; 451 } 452 453 /* 454 * Be sure this is a valid block device 455 */ 456 if (devvp->v_type != VBLK) { 457 DPRINTF("non block device %d", devvp->v_type); 458 error = ENOTBLK; 459 goto fail; 460 } 461 462 if (bdevsw_lookup(devvp->v_rdev) == NULL) { 463 DPRINTF("can't find block device 0x%jx", 464 devvp->v_rdev); 465 error = ENXIO; 466 goto fail; 467 } 468 469 if (update) { 470 /* 471 * Be sure we're still naming the same device 472 * used for our initial mount 473 */ 474 if (devvp != ump->um_devvp && 475 devvp->v_rdev != ump->um_devvp->v_rdev) { 476 DPRINTF("wrong device 0x%jx != 0x%jx", 477 (uintmax_t)devvp->v_rdev, 478 (uintmax_t)ump->um_devvp->v_rdev); 479 error = EINVAL; 480 goto fail; 481 } 482 vrele(devvp); 483 devvp = NULL; 484 } 485 } 486 487 if (devvp == NULL) { 488 devvp = ump->um_devvp; 489 vref(devvp); 490 } 491 492 /* 493 * If mount by non-root, then verify that user has necessary 494 * permissions on the device. 495 * 496 * Permission to update a mount is checked higher, so here we presume 497 * updating the mount is okay (for example, as far as securelevel goes) 498 * which leaves us with the normal check. 499 */ 500 accessmode = VREAD; 501 if (update ? (mp->mnt_iflag & IMNT_WANTRDWR) != 0 : 502 (mp->mnt_flag & MNT_RDONLY) == 0) 503 accessmode |= VWRITE; 504 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 505 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 506 KAUTH_REQ_SYSTEM_MOUNT_DEVICE, mp, devvp, KAUTH_ARG(accessmode)); 507 VOP_UNLOCK(devvp); 508 if (error) { 509 DPRINTF("kauth returned %d", error); 510 goto fail; 511 } 512 513 #ifdef WAPBL 514 /* WAPBL can only be enabled on a r/w mount. */ 515 if (((mp->mnt_flag & MNT_RDONLY) && !(mp->mnt_iflag & IMNT_WANTRDWR)) || 516 (mp->mnt_iflag & IMNT_WANTRDONLY)) { 517 mp->mnt_flag &= ~MNT_LOG; 518 } 519 #else /* !WAPBL */ 520 mp->mnt_flag &= ~MNT_LOG; 521 #endif /* !WAPBL */ 522 523 if (!update) { 524 int xflags; 525 526 if (mp->mnt_flag & MNT_RDONLY) 527 xflags = FREAD; 528 else 529 xflags = FREAD | FWRITE; 530 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 531 error = VOP_OPEN(devvp, xflags, FSCRED); 532 VOP_UNLOCK(devvp); 533 if (error) { 534 DPRINTF("VOP_OPEN returned %d", error); 535 goto fail; 536 } 537 error = ffs_mountfs(devvp, mp, l); 538 if (error) { 539 DPRINTF("ffs_mountfs returned %d", error); 540 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 541 (void)VOP_CLOSE(devvp, xflags, NOCRED); 542 VOP_UNLOCK(devvp); 543 goto fail; 544 } 545 546 ump = VFSTOUFS(mp); 547 fs = ump->um_fs; 548 } else { 549 /* 550 * Update the mount. 551 */ 552 553 /* 554 * The initial mount got a reference on this 555 * device, so drop the one obtained via 556 * namei(), above. 557 */ 558 vrele(devvp); 559 560 ump = VFSTOUFS(mp); 561 fs = ump->um_fs; 562 if (fs->fs_ronly == 0 && (mp->mnt_iflag & IMNT_WANTRDONLY)) { 563 /* 564 * Changing from r/w to r/o 565 */ 566 flags = WRITECLOSE; 567 if (mp->mnt_flag & MNT_FORCE) 568 flags |= FORCECLOSE; 569 error = ffs_flushfiles(mp, flags, l); 570 if (error) 571 return error; 572 573 error = UFS_WAPBL_BEGIN(mp); 574 if (error) { 575 DPRINTF("wapbl %d", error); 576 return error; 577 } 578 579 if (ffs_cgupdate(ump, MNT_WAIT) == 0 && 580 fs->fs_clean & FS_WASCLEAN) { 581 if (mp->mnt_flag & MNT_SOFTDEP) 582 fs->fs_flags &= ~FS_DOSOFTDEP; 583 fs->fs_clean = FS_ISCLEAN; 584 (void) ffs_sbupdate(ump, MNT_WAIT); 585 } 586 587 UFS_WAPBL_END(mp); 588 } 589 590 #ifdef WAPBL 591 if ((mp->mnt_flag & MNT_LOG) == 0) { 592 error = ffs_wapbl_stop(mp, mp->mnt_flag & MNT_FORCE); 593 if (error) { 594 DPRINTF("ffs_wapbl_stop returned %d", error); 595 return error; 596 } 597 } 598 #endif /* WAPBL */ 599 600 if (fs->fs_ronly == 0 && (mp->mnt_iflag & IMNT_WANTRDONLY)) { 601 /* 602 * Finish change from r/w to r/o 603 */ 604 fs->fs_ronly = 1; 605 fs->fs_fmod = 0; 606 } 607 608 if (mp->mnt_flag & MNT_RELOAD) { 609 error = ffs_reload(mp, l->l_cred, l); 610 if (error) { 611 DPRINTF("ffs_reload returned %d", error); 612 return error; 613 } 614 } 615 616 if (fs->fs_ronly && (mp->mnt_iflag & IMNT_WANTRDWR)) { 617 /* 618 * Changing from read-only to read/write 619 */ 620 #ifndef QUOTA2 621 if (fs->fs_flags & FS_DOQUOTA2) { 622 ump->um_flags |= UFS_QUOTA2; 623 uprintf("%s: options QUOTA2 not enabled%s\n", 624 mp->mnt_stat.f_mntonname, 625 (mp->mnt_flag & MNT_FORCE) ? "" : 626 ", not mounting"); 627 DPRINTF("ffs_quota2 %d", EINVAL); 628 return EINVAL; 629 } 630 #endif 631 fs->fs_ronly = 0; 632 fs->fs_clean <<= 1; 633 fs->fs_fmod = 1; 634 #ifdef WAPBL 635 if (fs->fs_flags & FS_DOWAPBL) { 636 const char *nm = mp->mnt_stat.f_mntonname; 637 if (!mp->mnt_wapbl_replay) { 638 printf("%s: log corrupted;" 639 " replay cancelled\n", nm); 640 return EFTYPE; 641 } 642 printf("%s: replaying log to disk\n", nm); 643 error = wapbl_replay_write(mp->mnt_wapbl_replay, 644 devvp); 645 if (error) { 646 DPRINTF("%s: wapbl_replay_write %d", 647 nm, error); 648 return error; 649 } 650 wapbl_replay_stop(mp->mnt_wapbl_replay); 651 fs->fs_clean = FS_WASCLEAN; 652 } 653 #endif /* WAPBL */ 654 if (fs->fs_snapinum[0] != 0) 655 ffs_snapshot_mount(mp); 656 } 657 658 #ifdef WAPBL 659 error = ffs_wapbl_start(mp); 660 if (error) { 661 DPRINTF("ffs_wapbl_start returned %d", error); 662 return error; 663 } 664 #endif /* WAPBL */ 665 666 #ifdef QUOTA2 667 if (!fs->fs_ronly) { 668 error = ffs_quota2_mount(mp); 669 if (error) { 670 DPRINTF("ffs_quota2_mount returned %d", error); 671 return error; 672 } 673 } 674 #endif 675 676 if ((mp->mnt_flag & MNT_DISCARD) && !(ump->um_discarddata)) 677 ump->um_discarddata = ffs_discard_init(devvp, fs); 678 679 if (args->fspec == NULL) 680 return 0; 681 } 682 683 error = set_statvfs_info(path, UIO_USERSPACE, args->fspec, 684 UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l); 685 if (error == 0) 686 (void)strncpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, 687 sizeof(fs->fs_fsmnt)); 688 else { 689 DPRINTF("set_statvfs_info returned %d", error); 690 } 691 fs->fs_flags &= ~FS_DOSOFTDEP; 692 if (fs->fs_fmod != 0) { /* XXX */ 693 int err; 694 695 fs->fs_fmod = 0; 696 if (fs->fs_clean & FS_WASCLEAN) 697 fs->fs_time = time_second; 698 else { 699 printf("%s: file system not clean (fs_clean=%#x); " 700 "please fsck(8)\n", mp->mnt_stat.f_mntfromname, 701 fs->fs_clean); 702 printf("%s: lost blocks %" PRId64 " files %d\n", 703 mp->mnt_stat.f_mntfromname, fs->fs_pendingblocks, 704 fs->fs_pendinginodes); 705 } 706 err = UFS_WAPBL_BEGIN(mp); 707 if (err == 0) { 708 (void) ffs_cgupdate(ump, MNT_WAIT); 709 UFS_WAPBL_END(mp); 710 } 711 } 712 if ((mp->mnt_flag & MNT_SOFTDEP) != 0) { 713 printf("%s: `-o softdep' is no longer supported, " 714 "consider `-o log'\n", mp->mnt_stat.f_mntfromname); 715 mp->mnt_flag &= ~MNT_SOFTDEP; 716 } 717 718 return (error); 719 720 fail: 721 vrele(devvp); 722 return (error); 723 } 724 725 /* 726 * Reload all incore data for a filesystem (used after running fsck on 727 * the root filesystem and finding things to fix). The filesystem must 728 * be mounted read-only. 729 * 730 * Things to do to update the mount: 731 * 1) invalidate all cached meta-data. 732 * 2) re-read superblock from disk. 733 * 3) re-read summary information from disk. 734 * 4) invalidate all inactive vnodes. 735 * 5) invalidate all cached file data. 736 * 6) re-read inode data for all active vnodes. 737 */ 738 int 739 ffs_reload(struct mount *mp, kauth_cred_t cred, struct lwp *l) 740 { 741 struct vnode *vp, *devvp; 742 struct inode *ip; 743 void *space; 744 struct buf *bp; 745 struct fs *fs, *newfs; 746 int i, bsize, blks, error; 747 int32_t *lp, fs_sbsize; 748 struct ufsmount *ump; 749 daddr_t sblockloc; 750 struct vnode_iterator *marker; 751 752 if ((mp->mnt_flag & MNT_RDONLY) == 0) 753 return (EINVAL); 754 755 ump = VFSTOUFS(mp); 756 757 /* 758 * Step 1: invalidate all cached meta-data. 759 */ 760 devvp = ump->um_devvp; 761 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 762 error = vinvalbuf(devvp, 0, cred, l, 0, 0); 763 VOP_UNLOCK(devvp); 764 if (error) 765 panic("%s: dirty1", __func__); 766 767 /* 768 * Step 2: re-read superblock from disk. XXX: We don't handle 769 * possibility that superblock moved. Which implies that we don't 770 * want its size to change either. 771 */ 772 fs = ump->um_fs; 773 fs_sbsize = fs->fs_sbsize; 774 error = bread(devvp, fs->fs_sblockloc / DEV_BSIZE, fs_sbsize, 775 0, &bp); 776 if (error) 777 return (error); 778 newfs = kmem_alloc(fs_sbsize, KM_SLEEP); 779 memcpy(newfs, bp->b_data, fs_sbsize); 780 781 #ifdef FFS_EI 782 if (ump->um_flags & UFS_NEEDSWAP) { 783 ffs_sb_swap((struct fs *)bp->b_data, newfs); 784 newfs->fs_flags |= FS_SWAPPED; 785 } else 786 #endif 787 newfs->fs_flags &= ~FS_SWAPPED; 788 789 brelse(bp, 0); 790 791 if ((newfs->fs_magic != FS_UFS1_MAGIC) && 792 (newfs->fs_magic != FS_UFS2_MAGIC)) { 793 kmem_free(newfs, fs_sbsize); 794 return (EIO); /* XXX needs translation */ 795 } 796 if (!ffs_superblock_validate(newfs)) { 797 kmem_free(newfs, fs_sbsize); 798 return (EINVAL); 799 } 800 801 /* 802 * The current implementation doesn't handle the possibility that 803 * these values may have changed. 804 */ 805 if ((newfs->fs_sbsize != fs_sbsize) || 806 (newfs->fs_cssize != fs->fs_cssize) || 807 (newfs->fs_contigsumsize != fs->fs_contigsumsize) || 808 (newfs->fs_ncg != fs->fs_ncg)) { 809 kmem_free(newfs, fs_sbsize); 810 return (EINVAL); 811 } 812 813 /* Store off old fs_sblockloc for fs_oldfscompat_read. */ 814 sblockloc = fs->fs_sblockloc; 815 /* 816 * Copy pointer fields back into superblock before copying in XXX 817 * new superblock. These should really be in the ufsmount. XXX 818 * Note that important parameters (eg fs_ncg) are unchanged. 819 */ 820 newfs->fs_csp = fs->fs_csp; 821 newfs->fs_maxcluster = fs->fs_maxcluster; 822 newfs->fs_contigdirs = fs->fs_contigdirs; 823 newfs->fs_ronly = fs->fs_ronly; 824 newfs->fs_active = fs->fs_active; 825 memcpy(fs, newfs, (u_int)fs_sbsize); 826 kmem_free(newfs, fs_sbsize); 827 828 /* 829 * Recheck for Apple UFS filesystem. 830 */ 831 ump->um_flags &= ~UFS_ISAPPLEUFS; 832 if (ffs_is_appleufs(devvp, fs)) { 833 #ifdef APPLE_UFS 834 ump->um_flags |= UFS_ISAPPLEUFS; 835 #else 836 DPRINTF("AppleUFS not supported"); 837 return (EIO); /* XXX: really? */ 838 #endif 839 } 840 841 if (UFS_MPISAPPLEUFS(ump)) { 842 /* see comment about NeXT below */ 843 ump->um_maxsymlinklen = APPLEUFS_MAXSYMLINKLEN; 844 ump->um_dirblksiz = APPLEUFS_DIRBLKSIZ; 845 mp->mnt_iflag |= IMNT_DTYPE; 846 } else { 847 ump->um_maxsymlinklen = fs->fs_maxsymlinklen; 848 ump->um_dirblksiz = UFS_DIRBLKSIZ; 849 if (ump->um_maxsymlinklen > 0) 850 mp->mnt_iflag |= IMNT_DTYPE; 851 else 852 mp->mnt_iflag &= ~IMNT_DTYPE; 853 } 854 ffs_oldfscompat_read(fs, ump, sblockloc); 855 856 mutex_enter(&ump->um_lock); 857 ump->um_maxfilesize = fs->fs_maxfilesize; 858 if (fs->fs_flags & ~(FS_KNOWN_FLAGS | FS_INTERNAL)) { 859 uprintf("%s: unknown ufs flags: 0x%08"PRIx32"%s\n", 860 mp->mnt_stat.f_mntonname, fs->fs_flags, 861 (mp->mnt_flag & MNT_FORCE) ? "" : ", not mounting"); 862 if ((mp->mnt_flag & MNT_FORCE) == 0) { 863 mutex_exit(&ump->um_lock); 864 return (EINVAL); 865 } 866 } 867 if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) { 868 fs->fs_pendingblocks = 0; 869 fs->fs_pendinginodes = 0; 870 } 871 mutex_exit(&ump->um_lock); 872 873 ffs_statvfs(mp, &mp->mnt_stat); 874 /* 875 * Step 3: re-read summary information from disk. 876 */ 877 blks = howmany(fs->fs_cssize, fs->fs_fsize); 878 space = fs->fs_csp; 879 for (i = 0; i < blks; i += fs->fs_frag) { 880 bsize = fs->fs_bsize; 881 if (i + fs->fs_frag > blks) 882 bsize = (blks - i) * fs->fs_fsize; 883 error = bread(devvp, FFS_FSBTODB(fs, fs->fs_csaddr + i), bsize, 884 0, &bp); 885 if (error) { 886 return (error); 887 } 888 #ifdef FFS_EI 889 if (UFS_FSNEEDSWAP(fs)) 890 ffs_csum_swap((struct csum *)bp->b_data, 891 (struct csum *)space, bsize); 892 else 893 #endif 894 memcpy(space, bp->b_data, (size_t)bsize); 895 space = (char *)space + bsize; 896 brelse(bp, 0); 897 } 898 /* 899 * We no longer know anything about clusters per cylinder group. 900 */ 901 if (fs->fs_contigsumsize > 0) { 902 lp = fs->fs_maxcluster; 903 for (i = 0; i < fs->fs_ncg; i++) 904 *lp++ = fs->fs_contigsumsize; 905 } 906 907 vfs_vnode_iterator_init(mp, &marker); 908 while ((vp = vfs_vnode_iterator_next(marker, NULL, NULL))) { 909 /* 910 * Step 4: invalidate all inactive vnodes. 911 */ 912 if (vrecycle(vp)) 913 continue; 914 /* 915 * Step 5: invalidate all cached file data. 916 */ 917 if (vn_lock(vp, LK_EXCLUSIVE)) { 918 vrele(vp); 919 continue; 920 } 921 if (vinvalbuf(vp, 0, cred, l, 0, 0)) 922 panic("%s: dirty2", __func__); 923 /* 924 * Step 6: re-read inode data for all active vnodes. 925 */ 926 ip = VTOI(vp); 927 error = bread(devvp, FFS_FSBTODB(fs, ino_to_fsba(fs, ip->i_number)), 928 (int)fs->fs_bsize, 0, &bp); 929 if (error) { 930 vput(vp); 931 break; 932 } 933 ffs_load_inode(bp, ip, fs, ip->i_number); 934 brelse(bp, 0); 935 vput(vp); 936 } 937 vfs_vnode_iterator_destroy(marker); 938 return (error); 939 } 940 941 /* 942 * Possible superblock locations ordered from most to least likely. 943 */ 944 static const int sblock_try[] = SBLOCKSEARCH; 945 946 947 static int 948 ffs_superblock_validate(struct fs *fs) 949 { 950 int32_t i, fs_bshift = 0, fs_fshift = 0, fs_fragshift = 0, fs_frag; 951 int32_t fs_inopb; 952 953 /* Check the superblock size */ 954 if (fs->fs_sbsize > SBLOCKSIZE || fs->fs_sbsize < sizeof(struct fs)) 955 return 0; 956 957 /* Check the file system blocksize */ 958 if (fs->fs_bsize > MAXBSIZE || fs->fs_bsize < MINBSIZE) 959 return 0; 960 if (!powerof2(fs->fs_bsize)) 961 return 0; 962 963 /* Check the size of frag blocks */ 964 if (!powerof2(fs->fs_fsize)) 965 return 0; 966 if (fs->fs_fsize == 0) 967 return 0; 968 969 /* 970 * XXX: these values are just zero-checked to prevent obvious 971 * bugs. We need more strict checks. 972 */ 973 if (fs->fs_size == 0) 974 return 0; 975 if (fs->fs_cssize == 0) 976 return 0; 977 if (fs->fs_ipg == 0) 978 return 0; 979 if (fs->fs_fpg == 0) 980 return 0; 981 if (fs->fs_ncg == 0) 982 return 0; 983 if (fs->fs_maxbpg == 0) 984 return 0; 985 986 /* Check the number of inodes per block */ 987 if (fs->fs_magic == FS_UFS1_MAGIC) 988 fs_inopb = fs->fs_bsize / sizeof(struct ufs1_dinode); 989 else /* fs->fs_magic == FS_UFS2_MAGIC */ 990 fs_inopb = fs->fs_bsize / sizeof(struct ufs2_dinode); 991 if (fs->fs_inopb != fs_inopb) 992 return 0; 993 994 /* Block size cannot be smaller than fragment size */ 995 if (fs->fs_bsize < fs->fs_fsize) 996 return 0; 997 998 /* Compute fs_bshift and ensure it is consistent */ 999 for (i = fs->fs_bsize; i > 1; i >>= 1) 1000 fs_bshift++; 1001 if (fs->fs_bshift != fs_bshift) 1002 return 0; 1003 1004 /* Compute fs_fshift and ensure it is consistent */ 1005 for (i = fs->fs_fsize; i > 1; i >>= 1) 1006 fs_fshift++; 1007 if (fs->fs_fshift != fs_fshift) 1008 return 0; 1009 1010 /* Compute fs_fragshift and ensure it is consistent */ 1011 for (i = fs->fs_frag; i > 1; i >>= 1) 1012 fs_fragshift++; 1013 if (fs->fs_fragshift != fs_fragshift) 1014 return 0; 1015 1016 /* Check the masks */ 1017 if (fs->fs_bmask != ~(fs->fs_bsize - 1)) 1018 return 0; 1019 if (fs->fs_fmask != ~(fs->fs_fsize - 1)) 1020 return 0; 1021 1022 /* 1023 * Now that the shifts and masks are sanitized, we can use the ffs_ API. 1024 */ 1025 1026 /* Check the number of frag blocks */ 1027 if ((fs_frag = ffs_numfrags(fs, fs->fs_bsize)) > MAXFRAG) 1028 return 0; 1029 if (fs->fs_frag != fs_frag) 1030 return 0; 1031 1032 /* Check the size of cylinder groups */ 1033 if ((fs->fs_cgsize < sizeof(struct cg)) || 1034 (fs->fs_cgsize > fs->fs_bsize)) 1035 return 0; 1036 1037 return 1; 1038 } 1039 1040 static int 1041 ffs_is_appleufs(struct vnode *devvp, struct fs *fs) 1042 { 1043 struct dkwedge_info dkw; 1044 int ret = 0; 1045 1046 /* 1047 * First check to see if this is tagged as an Apple UFS filesystem 1048 * in the disklabel. 1049 */ 1050 if (getdiskinfo(devvp, &dkw) == 0 && 1051 strcmp(dkw.dkw_ptype, DKW_PTYPE_APPLEUFS) == 0) 1052 ret = 1; 1053 #ifdef APPLE_UFS 1054 else { 1055 struct appleufslabel *applefs; 1056 struct buf *bp; 1057 daddr_t blkno = APPLEUFS_LABEL_OFFSET / DEV_BSIZE; 1058 int error; 1059 1060 /* 1061 * Manually look for an Apple UFS label, and if a valid one 1062 * is found, then treat it like an Apple UFS filesystem anyway. 1063 */ 1064 error = bread(devvp, blkno, APPLEUFS_LABEL_SIZE, 0, &bp); 1065 if (error) { 1066 DPRINTF("bread@0x%jx returned %d", (intmax_t)blkno, error); 1067 return 0; 1068 } 1069 applefs = (struct appleufslabel *)bp->b_data; 1070 error = ffs_appleufs_validate(fs->fs_fsmnt, applefs, NULL); 1071 if (error == 0) 1072 ret = 1; 1073 brelse(bp, 0); 1074 } 1075 #endif 1076 1077 return ret; 1078 } 1079 1080 /* 1081 * Common code for mount and mountroot 1082 */ 1083 int 1084 ffs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l) 1085 { 1086 struct ufsmount *ump = NULL; 1087 struct buf *bp = NULL; 1088 struct fs *fs = NULL; 1089 dev_t dev; 1090 void *space; 1091 daddr_t sblockloc = 0; 1092 int blks, fstype = 0; 1093 int error, i, bsize, ronly, bset = 0; 1094 #ifdef FFS_EI 1095 int needswap = 0; /* keep gcc happy */ 1096 #endif 1097 int32_t *lp; 1098 kauth_cred_t cred; 1099 u_int32_t allocsbsize, fs_sbsize = 0; 1100 1101 dev = devvp->v_rdev; 1102 cred = l ? l->l_cred : NOCRED; 1103 1104 /* Flush out any old buffers remaining from a previous use. */ 1105 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 1106 error = vinvalbuf(devvp, V_SAVE, cred, l, 0, 0); 1107 VOP_UNLOCK(devvp); 1108 if (error) { 1109 DPRINTF("vinvalbuf returned %d", error); 1110 return error; 1111 } 1112 1113 ronly = (mp->mnt_flag & MNT_RDONLY) != 0; 1114 1115 ump = kmem_zalloc(sizeof(*ump), KM_SLEEP); 1116 mutex_init(&ump->um_lock, MUTEX_DEFAULT, IPL_NONE); 1117 error = ffs_snapshot_init(ump); 1118 if (error) { 1119 DPRINTF("ffs_snapshot_init returned %d", error); 1120 goto out; 1121 } 1122 ump->um_ops = &ffs_ufsops; 1123 1124 #ifdef WAPBL 1125 sbagain: 1126 #endif 1127 /* 1128 * Try reading the superblock in each of its possible locations. 1129 */ 1130 for (i = 0; ; i++) { 1131 daddr_t fs_sblockloc; 1132 1133 if (bp != NULL) { 1134 brelse(bp, BC_NOCACHE); 1135 bp = NULL; 1136 } 1137 if (sblock_try[i] == -1) { 1138 DPRINTF("no superblock found"); 1139 error = EINVAL; 1140 fs = NULL; 1141 goto out; 1142 } 1143 1144 error = bread(devvp, sblock_try[i] / DEV_BSIZE, SBLOCKSIZE, 1145 0, &bp); 1146 if (error) { 1147 DPRINTF("bread@0x%x returned %d", 1148 sblock_try[i] / DEV_BSIZE, error); 1149 fs = NULL; 1150 goto out; 1151 } 1152 fs = (struct fs *)bp->b_data; 1153 1154 sblockloc = sblock_try[i]; 1155 DPRINTF("fs_magic 0x%x", fs->fs_magic); 1156 1157 /* 1158 * Swap: here, we swap fs->fs_sbsize in order to get the correct 1159 * size to read the superblock. Once read, we swap the whole 1160 * superblock structure. 1161 */ 1162 if (fs->fs_magic == FS_UFS1_MAGIC) { 1163 fs_sbsize = fs->fs_sbsize; 1164 fstype = UFS1; 1165 #ifdef FFS_EI 1166 needswap = 0; 1167 } else if (fs->fs_magic == FS_UFS1_MAGIC_SWAPPED) { 1168 fs_sbsize = bswap32(fs->fs_sbsize); 1169 fstype = UFS1; 1170 needswap = 1; 1171 #endif 1172 } else if (fs->fs_magic == FS_UFS2_MAGIC) { 1173 fs_sbsize = fs->fs_sbsize; 1174 fstype = UFS2; 1175 #ifdef FFS_EI 1176 needswap = 0; 1177 } else if (fs->fs_magic == FS_UFS2_MAGIC_SWAPPED) { 1178 fs_sbsize = bswap32(fs->fs_sbsize); 1179 fstype = UFS2; 1180 needswap = 1; 1181 #endif 1182 } else 1183 continue; 1184 1185 /* fs->fs_sblockloc isn't defined for old filesystems */ 1186 if (fstype == UFS1 && !(fs->fs_old_flags & FS_FLAGS_UPDATED)) { 1187 if (sblockloc == SBLOCK_UFS2) 1188 /* 1189 * This is likely to be the first alternate 1190 * in a filesystem with 64k blocks. 1191 * Don't use it. 1192 */ 1193 continue; 1194 fs_sblockloc = sblockloc; 1195 } else { 1196 fs_sblockloc = fs->fs_sblockloc; 1197 #ifdef FFS_EI 1198 if (needswap) 1199 fs_sblockloc = bswap64(fs_sblockloc); 1200 #endif 1201 } 1202 1203 /* Check we haven't found an alternate superblock */ 1204 if (fs_sblockloc != sblockloc) 1205 continue; 1206 1207 /* Check the superblock size */ 1208 if (fs_sbsize > SBLOCKSIZE || fs_sbsize < sizeof(struct fs)) 1209 continue; 1210 fs = kmem_alloc((u_long)fs_sbsize, KM_SLEEP); 1211 memcpy(fs, bp->b_data, fs_sbsize); 1212 1213 /* Swap the whole superblock structure, if necessary. */ 1214 #ifdef FFS_EI 1215 if (needswap) { 1216 ffs_sb_swap((struct fs*)bp->b_data, fs); 1217 fs->fs_flags |= FS_SWAPPED; 1218 } else 1219 #endif 1220 fs->fs_flags &= ~FS_SWAPPED; 1221 1222 /* 1223 * Now that everything is swapped, the superblock is ready to 1224 * be sanitized. 1225 */ 1226 if (!ffs_superblock_validate(fs)) { 1227 kmem_free(fs, fs_sbsize); 1228 continue; 1229 } 1230 1231 /* Ok seems to be a good superblock */ 1232 break; 1233 } 1234 1235 ump->um_fs = fs; 1236 1237 #ifdef WAPBL 1238 if ((mp->mnt_wapbl_replay == 0) && (fs->fs_flags & FS_DOWAPBL)) { 1239 error = ffs_wapbl_replay_start(mp, fs, devvp); 1240 if (error && (mp->mnt_flag & MNT_FORCE) == 0) { 1241 DPRINTF("ffs_wapbl_replay_start returned %d", error); 1242 goto out; 1243 } 1244 if (!error) { 1245 if (!ronly) { 1246 /* XXX fsmnt may be stale. */ 1247 printf("%s: replaying log to disk\n", 1248 fs->fs_fsmnt); 1249 error = wapbl_replay_write(mp->mnt_wapbl_replay, 1250 devvp); 1251 if (error) { 1252 DPRINTF("wapbl_replay_write returned %d", 1253 error); 1254 goto out; 1255 } 1256 wapbl_replay_stop(mp->mnt_wapbl_replay); 1257 fs->fs_clean = FS_WASCLEAN; 1258 } else { 1259 /* XXX fsmnt may be stale */ 1260 printf("%s: replaying log to memory\n", 1261 fs->fs_fsmnt); 1262 } 1263 1264 /* Force a re-read of the superblock */ 1265 brelse(bp, BC_INVAL); 1266 bp = NULL; 1267 kmem_free(fs, fs_sbsize); 1268 fs = NULL; 1269 goto sbagain; 1270 } 1271 } 1272 #else /* !WAPBL */ 1273 if ((fs->fs_flags & FS_DOWAPBL) && (mp->mnt_flag & MNT_FORCE) == 0) { 1274 error = EPERM; 1275 DPRINTF("no force %d", error); 1276 goto out; 1277 } 1278 #endif /* !WAPBL */ 1279 1280 ffs_oldfscompat_read(fs, ump, sblockloc); 1281 ump->um_maxfilesize = fs->fs_maxfilesize; 1282 1283 if (fs->fs_flags & ~(FS_KNOWN_FLAGS | FS_INTERNAL)) { 1284 uprintf("%s: unknown ufs flags: 0x%08"PRIx32"%s\n", 1285 mp->mnt_stat.f_mntonname, fs->fs_flags, 1286 (mp->mnt_flag & MNT_FORCE) ? "" : ", not mounting"); 1287 if ((mp->mnt_flag & MNT_FORCE) == 0) { 1288 error = EINVAL; 1289 DPRINTF("no force %d", error); 1290 goto out; 1291 } 1292 } 1293 1294 if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) { 1295 fs->fs_pendingblocks = 0; 1296 fs->fs_pendinginodes = 0; 1297 } 1298 1299 ump->um_fstype = fstype; 1300 if (fs->fs_sbsize < SBLOCKSIZE) 1301 brelse(bp, BC_INVAL); 1302 else 1303 brelse(bp, 0); 1304 bp = NULL; 1305 1306 if (ffs_is_appleufs(devvp, fs)) { 1307 #ifdef APPLE_UFS 1308 ump->um_flags |= UFS_ISAPPLEUFS; 1309 #else 1310 DPRINTF("AppleUFS not supported"); 1311 error = EINVAL; 1312 goto out; 1313 #endif 1314 } 1315 1316 #if 0 1317 /* 1318 * XXX This code changes the behaviour of mounting dirty filesystems, to 1319 * XXX require "mount -f ..." to mount them. This doesn't match what 1320 * XXX mount(8) describes and is disabled for now. 1321 */ 1322 /* 1323 * If the file system is not clean, don't allow it to be mounted 1324 * unless MNT_FORCE is specified. (Note: MNT_FORCE is always set 1325 * for the root file system.) 1326 */ 1327 if (fs->fs_flags & FS_DOWAPBL) { 1328 /* 1329 * wapbl normally expects to be FS_WASCLEAN when the FS_DOWAPBL 1330 * bit is set, although there's a window in unmount where it 1331 * could be FS_ISCLEAN 1332 */ 1333 if ((mp->mnt_flag & MNT_FORCE) == 0 && 1334 (fs->fs_clean & (FS_WASCLEAN | FS_ISCLEAN)) == 0) { 1335 error = EPERM; 1336 goto out; 1337 } 1338 } else 1339 if ((fs->fs_clean & FS_ISCLEAN) == 0 && 1340 (mp->mnt_flag & MNT_FORCE) == 0) { 1341 error = EPERM; 1342 goto out; 1343 } 1344 #endif 1345 1346 /* 1347 * Verify that we can access the last block in the fs 1348 * if we're mounting read/write. 1349 */ 1350 if (!ronly) { 1351 error = bread(devvp, FFS_FSBTODB(fs, fs->fs_size - 1), 1352 fs->fs_fsize, 0, &bp); 1353 if (error) { 1354 DPRINTF("bread@0x%jx returned %d", 1355 (intmax_t)FFS_FSBTODB(fs, fs->fs_size - 1), 1356 error); 1357 bset = BC_INVAL; 1358 goto out; 1359 } 1360 if (bp->b_bcount != fs->fs_fsize) { 1361 DPRINTF("bcount %x != fsize %x", bp->b_bcount, 1362 fs->fs_fsize); 1363 error = EINVAL; 1364 bset = BC_INVAL; 1365 goto out; 1366 } 1367 brelse(bp, BC_INVAL); 1368 bp = NULL; 1369 } 1370 1371 fs->fs_ronly = ronly; 1372 /* Don't bump fs_clean if we're replaying journal */ 1373 if (!((fs->fs_flags & FS_DOWAPBL) && (fs->fs_clean & FS_WASCLEAN))) { 1374 if (ronly == 0) { 1375 fs->fs_clean <<= 1; 1376 fs->fs_fmod = 1; 1377 } 1378 } 1379 1380 bsize = fs->fs_cssize; 1381 blks = howmany(bsize, fs->fs_fsize); 1382 if (fs->fs_contigsumsize > 0) 1383 bsize += fs->fs_ncg * sizeof(int32_t); 1384 bsize += fs->fs_ncg * sizeof(*fs->fs_contigdirs); 1385 allocsbsize = bsize; 1386 space = kmem_alloc((u_long)allocsbsize, KM_SLEEP); 1387 fs->fs_csp = space; 1388 1389 for (i = 0; i < blks; i += fs->fs_frag) { 1390 bsize = fs->fs_bsize; 1391 if (i + fs->fs_frag > blks) 1392 bsize = (blks - i) * fs->fs_fsize; 1393 error = bread(devvp, FFS_FSBTODB(fs, fs->fs_csaddr + i), bsize, 1394 0, &bp); 1395 if (error) { 1396 DPRINTF("bread@0x%jx %d", 1397 (intmax_t)FFS_FSBTODB(fs, fs->fs_csaddr + i), 1398 error); 1399 goto out1; 1400 } 1401 #ifdef FFS_EI 1402 if (needswap) 1403 ffs_csum_swap((struct csum *)bp->b_data, 1404 (struct csum *)space, bsize); 1405 else 1406 #endif 1407 memcpy(space, bp->b_data, (u_int)bsize); 1408 1409 space = (char *)space + bsize; 1410 brelse(bp, 0); 1411 bp = NULL; 1412 } 1413 if (fs->fs_contigsumsize > 0) { 1414 fs->fs_maxcluster = lp = space; 1415 for (i = 0; i < fs->fs_ncg; i++) 1416 *lp++ = fs->fs_contigsumsize; 1417 space = lp; 1418 } 1419 bsize = fs->fs_ncg * sizeof(*fs->fs_contigdirs); 1420 fs->fs_contigdirs = space; 1421 space = (char *)space + bsize; 1422 memset(fs->fs_contigdirs, 0, bsize); 1423 1424 /* Compatibility for old filesystems - XXX */ 1425 if (fs->fs_avgfilesize <= 0) 1426 fs->fs_avgfilesize = AVFILESIZ; 1427 if (fs->fs_avgfpdir <= 0) 1428 fs->fs_avgfpdir = AFPDIR; 1429 fs->fs_active = NULL; 1430 1431 mp->mnt_data = ump; 1432 mp->mnt_stat.f_fsidx.__fsid_val[0] = (long)dev; 1433 mp->mnt_stat.f_fsidx.__fsid_val[1] = makefstype(MOUNT_FFS); 1434 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; 1435 mp->mnt_stat.f_namemax = FFS_MAXNAMLEN; 1436 if (UFS_MPISAPPLEUFS(ump)) { 1437 /* NeXT used to keep short symlinks in the inode even 1438 * when using FS_42INODEFMT. In that case fs->fs_maxsymlinklen 1439 * is probably -1, but we still need to be able to identify 1440 * short symlinks. 1441 */ 1442 ump->um_maxsymlinklen = APPLEUFS_MAXSYMLINKLEN; 1443 ump->um_dirblksiz = APPLEUFS_DIRBLKSIZ; 1444 mp->mnt_iflag |= IMNT_DTYPE; 1445 } else { 1446 ump->um_maxsymlinklen = fs->fs_maxsymlinklen; 1447 ump->um_dirblksiz = UFS_DIRBLKSIZ; 1448 if (ump->um_maxsymlinklen > 0) 1449 mp->mnt_iflag |= IMNT_DTYPE; 1450 else 1451 mp->mnt_iflag &= ~IMNT_DTYPE; 1452 } 1453 mp->mnt_fs_bshift = fs->fs_bshift; 1454 mp->mnt_dev_bshift = DEV_BSHIFT; /* XXX */ 1455 mp->mnt_flag |= MNT_LOCAL; 1456 mp->mnt_iflag |= IMNT_MPSAFE | IMNT_CAN_RWTORO; 1457 #ifdef FFS_EI 1458 if (needswap) 1459 ump->um_flags |= UFS_NEEDSWAP; 1460 #endif 1461 ump->um_mountp = mp; 1462 ump->um_dev = dev; 1463 ump->um_devvp = devvp; 1464 ump->um_nindir = fs->fs_nindir; 1465 ump->um_lognindir = ffs(fs->fs_nindir) - 1; 1466 ump->um_bptrtodb = fs->fs_fshift - DEV_BSHIFT; 1467 ump->um_seqinc = fs->fs_frag; 1468 for (i = 0; i < MAXQUOTAS; i++) 1469 ump->um_quotas[i] = NULLVP; 1470 spec_node_setmountedfs(devvp, mp); 1471 if (ronly == 0 && fs->fs_snapinum[0] != 0) 1472 ffs_snapshot_mount(mp); 1473 #ifdef WAPBL 1474 if (!ronly) { 1475 KDASSERT(fs->fs_ronly == 0); 1476 /* 1477 * ffs_wapbl_start() needs mp->mnt_stat initialised if it 1478 * needs to create a new log file in-filesystem. 1479 */ 1480 error = ffs_statvfs(mp, &mp->mnt_stat); 1481 if (error) { 1482 DPRINTF("ffs_statvfs returned %d", error); 1483 goto out1; 1484 } 1485 1486 error = ffs_wapbl_start(mp); 1487 if (error) { 1488 DPRINTF("ffs_wapbl_start returned %d", error); 1489 goto out1; 1490 } 1491 } 1492 #endif /* WAPBL */ 1493 if (ronly == 0) { 1494 #ifdef QUOTA2 1495 error = ffs_quota2_mount(mp); 1496 if (error) { 1497 DPRINTF("ffs_quota2_mount returned %d", error); 1498 goto out1; 1499 } 1500 #else 1501 if (fs->fs_flags & FS_DOQUOTA2) { 1502 ump->um_flags |= UFS_QUOTA2; 1503 uprintf("%s: options QUOTA2 not enabled%s\n", 1504 mp->mnt_stat.f_mntonname, 1505 (mp->mnt_flag & MNT_FORCE) ? "" : ", not mounting"); 1506 if ((mp->mnt_flag & MNT_FORCE) == 0) { 1507 error = EINVAL; 1508 DPRINTF("quota disabled %d", error); 1509 goto out1; 1510 } 1511 } 1512 #endif 1513 } 1514 1515 if (mp->mnt_flag & MNT_DISCARD) 1516 ump->um_discarddata = ffs_discard_init(devvp, fs); 1517 1518 return (0); 1519 out1: 1520 kmem_free(fs->fs_csp, allocsbsize); 1521 out: 1522 #ifdef WAPBL 1523 if (mp->mnt_wapbl_replay) { 1524 wapbl_replay_stop(mp->mnt_wapbl_replay); 1525 wapbl_replay_free(mp->mnt_wapbl_replay); 1526 mp->mnt_wapbl_replay = 0; 1527 } 1528 #endif 1529 1530 if (fs) 1531 kmem_free(fs, fs->fs_sbsize); 1532 spec_node_setmountedfs(devvp, NULL); 1533 if (bp) 1534 brelse(bp, bset); 1535 if (ump) { 1536 if (ump->um_oldfscompat) 1537 kmem_free(ump->um_oldfscompat, 512 + 3*sizeof(int32_t)); 1538 mutex_destroy(&ump->um_lock); 1539 kmem_free(ump, sizeof(*ump)); 1540 mp->mnt_data = NULL; 1541 } 1542 return (error); 1543 } 1544 1545 /* 1546 * Sanity checks for loading old filesystem superblocks. 1547 * See ffs_oldfscompat_write below for unwound actions. 1548 * 1549 * XXX - Parts get retired eventually. 1550 * Unfortunately new bits get added. 1551 */ 1552 static void 1553 ffs_oldfscompat_read(struct fs *fs, struct ufsmount *ump, daddr_t sblockloc) 1554 { 1555 off_t maxfilesize; 1556 int32_t *extrasave; 1557 1558 if ((fs->fs_magic != FS_UFS1_MAGIC) || 1559 (fs->fs_old_flags & FS_FLAGS_UPDATED)) 1560 return; 1561 1562 if (!ump->um_oldfscompat) 1563 ump->um_oldfscompat = kmem_alloc(512 + 3*sizeof(int32_t), 1564 KM_SLEEP); 1565 1566 memcpy(ump->um_oldfscompat, &fs->fs_old_postbl_start, 512); 1567 extrasave = ump->um_oldfscompat; 1568 extrasave += 512/sizeof(int32_t); 1569 extrasave[0] = fs->fs_old_npsect; 1570 extrasave[1] = fs->fs_old_interleave; 1571 extrasave[2] = fs->fs_old_trackskew; 1572 1573 /* These fields will be overwritten by their 1574 * original values in fs_oldfscompat_write, so it is harmless 1575 * to modify them here. 1576 */ 1577 fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir; 1578 fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree; 1579 fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree; 1580 fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree; 1581 1582 fs->fs_maxbsize = fs->fs_bsize; 1583 fs->fs_time = fs->fs_old_time; 1584 fs->fs_size = fs->fs_old_size; 1585 fs->fs_dsize = fs->fs_old_dsize; 1586 fs->fs_csaddr = fs->fs_old_csaddr; 1587 fs->fs_sblockloc = sblockloc; 1588 1589 fs->fs_flags = fs->fs_old_flags | (fs->fs_flags & FS_INTERNAL); 1590 1591 if (fs->fs_old_postblformat == FS_42POSTBLFMT) { 1592 fs->fs_old_nrpos = 8; 1593 fs->fs_old_npsect = fs->fs_old_nsect; 1594 fs->fs_old_interleave = 1; 1595 fs->fs_old_trackskew = 0; 1596 } 1597 1598 if (fs->fs_magic == FS_UFS1_MAGIC && 1599 fs->fs_old_inodefmt < FS_44INODEFMT) { 1600 fs->fs_maxfilesize = (u_quad_t) 1LL << 39; 1601 fs->fs_qbmask = ~fs->fs_bmask; 1602 fs->fs_qfmask = ~fs->fs_fmask; 1603 } 1604 1605 maxfilesize = (u_int64_t)0x80000000 * fs->fs_bsize - 1; 1606 if (fs->fs_maxfilesize > maxfilesize) 1607 fs->fs_maxfilesize = maxfilesize; 1608 1609 /* Compatibility for old filesystems */ 1610 if (fs->fs_avgfilesize <= 0) 1611 fs->fs_avgfilesize = AVFILESIZ; 1612 if (fs->fs_avgfpdir <= 0) 1613 fs->fs_avgfpdir = AFPDIR; 1614 1615 #if 0 1616 if (bigcgs) { 1617 fs->fs_save_cgsize = fs->fs_cgsize; 1618 fs->fs_cgsize = fs->fs_bsize; 1619 } 1620 #endif 1621 } 1622 1623 /* 1624 * Unwinding superblock updates for old filesystems. 1625 * See ffs_oldfscompat_read above for details. 1626 * 1627 * XXX - Parts get retired eventually. 1628 * Unfortunately new bits get added. 1629 */ 1630 static void 1631 ffs_oldfscompat_write(struct fs *fs, struct ufsmount *ump) 1632 { 1633 int32_t *extrasave; 1634 1635 if ((fs->fs_magic != FS_UFS1_MAGIC) || 1636 (fs->fs_old_flags & FS_FLAGS_UPDATED)) 1637 return; 1638 1639 fs->fs_old_time = fs->fs_time; 1640 fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir; 1641 fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree; 1642 fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree; 1643 fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree; 1644 fs->fs_old_flags = fs->fs_flags; 1645 1646 #if 0 1647 if (bigcgs) { 1648 fs->fs_cgsize = fs->fs_save_cgsize; 1649 } 1650 #endif 1651 1652 memcpy(&fs->fs_old_postbl_start, ump->um_oldfscompat, 512); 1653 extrasave = ump->um_oldfscompat; 1654 extrasave += 512/sizeof(int32_t); 1655 fs->fs_old_npsect = extrasave[0]; 1656 fs->fs_old_interleave = extrasave[1]; 1657 fs->fs_old_trackskew = extrasave[2]; 1658 1659 } 1660 1661 /* 1662 * unmount vfs operation 1663 */ 1664 int 1665 ffs_unmount(struct mount *mp, int mntflags) 1666 { 1667 struct lwp *l = curlwp; 1668 struct ufsmount *ump = VFSTOUFS(mp); 1669 struct fs *fs = ump->um_fs; 1670 int error, flags; 1671 u_int32_t bsize; 1672 #ifdef WAPBL 1673 extern int doforce; 1674 #endif 1675 1676 if (ump->um_discarddata) { 1677 ffs_discard_finish(ump->um_discarddata, mntflags); 1678 ump->um_discarddata = NULL; 1679 } 1680 1681 flags = 0; 1682 if (mntflags & MNT_FORCE) 1683 flags |= FORCECLOSE; 1684 if ((error = ffs_flushfiles(mp, flags, l)) != 0) 1685 return (error); 1686 error = UFS_WAPBL_BEGIN(mp); 1687 if (error == 0) 1688 if (fs->fs_ronly == 0 && 1689 ffs_cgupdate(ump, MNT_WAIT) == 0 && 1690 fs->fs_clean & FS_WASCLEAN) { 1691 fs->fs_clean = FS_ISCLEAN; 1692 fs->fs_fmod = 0; 1693 (void) ffs_sbupdate(ump, MNT_WAIT); 1694 } 1695 if (error == 0) 1696 UFS_WAPBL_END(mp); 1697 #ifdef WAPBL 1698 KASSERT(!(mp->mnt_wapbl_replay && mp->mnt_wapbl)); 1699 if (mp->mnt_wapbl_replay) { 1700 KDASSERT(fs->fs_ronly); 1701 wapbl_replay_stop(mp->mnt_wapbl_replay); 1702 wapbl_replay_free(mp->mnt_wapbl_replay); 1703 mp->mnt_wapbl_replay = 0; 1704 } 1705 error = ffs_wapbl_stop(mp, doforce && (mntflags & MNT_FORCE)); 1706 if (error) { 1707 return error; 1708 } 1709 #endif /* WAPBL */ 1710 1711 if (ump->um_devvp->v_type != VBAD) 1712 spec_node_setmountedfs(ump->um_devvp, NULL); 1713 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); 1714 (void)VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD | FWRITE, 1715 NOCRED); 1716 vput(ump->um_devvp); 1717 1718 bsize = fs->fs_cssize; 1719 if (fs->fs_contigsumsize > 0) 1720 bsize += fs->fs_ncg * sizeof(int32_t); 1721 bsize += fs->fs_ncg * sizeof(*fs->fs_contigdirs); 1722 kmem_free(fs->fs_csp, bsize); 1723 1724 kmem_free(fs, fs->fs_sbsize); 1725 if (ump->um_oldfscompat != NULL) 1726 kmem_free(ump->um_oldfscompat, 512 + 3*sizeof(int32_t)); 1727 mutex_destroy(&ump->um_lock); 1728 ffs_snapshot_fini(ump); 1729 kmem_free(ump, sizeof(*ump)); 1730 mp->mnt_data = NULL; 1731 mp->mnt_flag &= ~MNT_LOCAL; 1732 return (0); 1733 } 1734 1735 /* 1736 * Flush out all the files in a filesystem. 1737 */ 1738 int 1739 ffs_flushfiles(struct mount *mp, int flags, struct lwp *l) 1740 { 1741 extern int doforce; 1742 struct ufsmount *ump; 1743 int error; 1744 1745 if (!doforce) 1746 flags &= ~FORCECLOSE; 1747 ump = VFSTOUFS(mp); 1748 #ifdef QUOTA 1749 if ((error = quota1_umount(mp, flags)) != 0) 1750 return (error); 1751 #endif 1752 #ifdef QUOTA2 1753 if ((error = quota2_umount(mp, flags)) != 0) 1754 return (error); 1755 #endif 1756 #ifdef UFS_EXTATTR 1757 if (ump->um_fstype == UFS1) { 1758 if (ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED) 1759 ufs_extattr_stop(mp, l); 1760 if (ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_INITIALIZED) 1761 ufs_extattr_uepm_destroy(&ump->um_extattr); 1762 mp->mnt_flag &= ~MNT_EXTATTR; 1763 } 1764 #endif 1765 if ((error = vflush(mp, 0, SKIPSYSTEM | flags)) != 0) 1766 return (error); 1767 ffs_snapshot_unmount(mp); 1768 /* 1769 * Flush all the files. 1770 */ 1771 error = vflush(mp, NULLVP, flags); 1772 if (error) 1773 return (error); 1774 /* 1775 * Flush filesystem metadata. 1776 */ 1777 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); 1778 error = VOP_FSYNC(ump->um_devvp, l->l_cred, FSYNC_WAIT, 0, 0); 1779 VOP_UNLOCK(ump->um_devvp); 1780 if (flags & FORCECLOSE) /* XXXDBJ */ 1781 error = 0; 1782 1783 #ifdef WAPBL 1784 if (error) 1785 return error; 1786 if (mp->mnt_wapbl) { 1787 error = wapbl_flush(mp->mnt_wapbl, 1); 1788 if (flags & FORCECLOSE) 1789 error = 0; 1790 } 1791 #endif 1792 1793 return (error); 1794 } 1795 1796 /* 1797 * Get file system statistics. 1798 */ 1799 int 1800 ffs_statvfs(struct mount *mp, struct statvfs *sbp) 1801 { 1802 struct ufsmount *ump; 1803 struct fs *fs; 1804 1805 ump = VFSTOUFS(mp); 1806 fs = ump->um_fs; 1807 mutex_enter(&ump->um_lock); 1808 sbp->f_bsize = fs->fs_bsize; 1809 sbp->f_frsize = fs->fs_fsize; 1810 sbp->f_iosize = fs->fs_bsize; 1811 sbp->f_blocks = fs->fs_dsize; 1812 sbp->f_bfree = ffs_blkstofrags(fs, fs->fs_cstotal.cs_nbfree) + 1813 fs->fs_cstotal.cs_nffree + FFS_DBTOFSB(fs, fs->fs_pendingblocks); 1814 sbp->f_bresvd = ((u_int64_t) fs->fs_dsize * (u_int64_t) 1815 fs->fs_minfree) / (u_int64_t) 100; 1816 if (sbp->f_bfree > sbp->f_bresvd) 1817 sbp->f_bavail = sbp->f_bfree - sbp->f_bresvd; 1818 else 1819 sbp->f_bavail = 0; 1820 sbp->f_files = fs->fs_ncg * fs->fs_ipg - UFS_ROOTINO; 1821 sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes; 1822 sbp->f_favail = sbp->f_ffree; 1823 sbp->f_fresvd = 0; 1824 mutex_exit(&ump->um_lock); 1825 copy_statvfs_info(sbp, mp); 1826 1827 return (0); 1828 } 1829 1830 struct ffs_sync_ctx { 1831 int waitfor; 1832 }; 1833 1834 static bool 1835 ffs_sync_selector(void *cl, struct vnode *vp) 1836 { 1837 struct ffs_sync_ctx *c = cl; 1838 struct inode *ip; 1839 1840 KASSERT(mutex_owned(vp->v_interlock)); 1841 1842 ip = VTOI(vp); 1843 /* 1844 * Skip the vnode/inode if inaccessible. 1845 */ 1846 if (ip == NULL || vp->v_type == VNON) 1847 return false; 1848 1849 /* 1850 * We deliberately update inode times here. This will 1851 * prevent a massive queue of updates accumulating, only 1852 * to be handled by a call to unmount. 1853 * 1854 * XXX It would be better to have the syncer trickle these 1855 * out. Adjustment needed to allow registering vnodes for 1856 * sync when the vnode is clean, but the inode dirty. Or 1857 * have ufs itself trickle out inode updates. 1858 * 1859 * If doing a lazy sync, we don't care about metadata or 1860 * data updates, because they are handled by each vnode's 1861 * synclist entry. In this case we are only interested in 1862 * writing back modified inodes. 1863 */ 1864 if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE | 1865 IN_MODIFY | IN_MODIFIED | IN_ACCESSED)) == 0 && 1866 (c->waitfor == MNT_LAZY || (LIST_EMPTY(&vp->v_dirtyblkhd) && 1867 UVM_OBJ_IS_CLEAN(&vp->v_uobj)))) 1868 return false; 1869 1870 return true; 1871 } 1872 1873 /* 1874 * Go through the disk queues to initiate sandbagged IO; 1875 * go through the inodes to write those that have been modified; 1876 * initiate the writing of the super block if it has been modified. 1877 * 1878 * Note: we are always called with the filesystem marked `MPBUSY'. 1879 */ 1880 int 1881 ffs_sync(struct mount *mp, int waitfor, kauth_cred_t cred) 1882 { 1883 struct vnode *vp; 1884 struct ufsmount *ump = VFSTOUFS(mp); 1885 struct fs *fs; 1886 struct vnode_iterator *marker; 1887 int error, allerror = 0; 1888 struct ffs_sync_ctx ctx; 1889 1890 fs = ump->um_fs; 1891 if (fs->fs_fmod != 0 && fs->fs_ronly != 0) { /* XXX */ 1892 panic("%s: rofs mod, fs=%s", __func__, fs->fs_fsmnt); 1893 } 1894 1895 /* 1896 * Write back each (modified) inode. 1897 */ 1898 vfs_vnode_iterator_init(mp, &marker); 1899 1900 ctx.waitfor = waitfor; 1901 while ((vp = vfs_vnode_iterator_next(marker, ffs_sync_selector, &ctx))) 1902 { 1903 error = vn_lock(vp, 1904 LK_EXCLUSIVE | (waitfor == MNT_LAZY ? LK_NOWAIT : 0)); 1905 if (error) { 1906 vrele(vp); 1907 continue; 1908 } 1909 if (waitfor == MNT_LAZY) { 1910 error = UFS_WAPBL_BEGIN(vp->v_mount); 1911 if (!error) { 1912 error = ffs_update(vp, NULL, NULL, 1913 UPDATE_CLOSE); 1914 UFS_WAPBL_END(vp->v_mount); 1915 } 1916 } else { 1917 error = VOP_FSYNC(vp, cred, FSYNC_NOLOG | 1918 (waitfor == MNT_WAIT ? FSYNC_WAIT : 0), 0, 0); 1919 } 1920 if (error) 1921 allerror = error; 1922 vput(vp); 1923 } 1924 vfs_vnode_iterator_destroy(marker); 1925 1926 /* 1927 * Force stale file system control information to be flushed. 1928 */ 1929 if (waitfor != MNT_LAZY && (ump->um_devvp->v_numoutput > 0 || 1930 !LIST_EMPTY(&ump->um_devvp->v_dirtyblkhd))) { 1931 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); 1932 if ((error = VOP_FSYNC(ump->um_devvp, cred, 1933 (waitfor == MNT_WAIT ? FSYNC_WAIT : 0) | FSYNC_NOLOG, 1934 0, 0)) != 0) 1935 allerror = error; 1936 VOP_UNLOCK(ump->um_devvp); 1937 } 1938 #if defined(QUOTA) || defined(QUOTA2) 1939 qsync(mp); 1940 #endif 1941 /* 1942 * Write back modified superblock. 1943 */ 1944 if (fs->fs_fmod != 0) { 1945 fs->fs_fmod = 0; 1946 fs->fs_time = time_second; 1947 error = UFS_WAPBL_BEGIN(mp); 1948 if (error) 1949 allerror = error; 1950 else { 1951 if ((error = ffs_cgupdate(ump, waitfor))) 1952 allerror = error; 1953 UFS_WAPBL_END(mp); 1954 } 1955 } 1956 1957 #ifdef WAPBL 1958 if (mp->mnt_wapbl) { 1959 error = wapbl_flush(mp->mnt_wapbl, (waitfor == MNT_WAIT)); 1960 if (error) 1961 allerror = error; 1962 } 1963 #endif 1964 1965 return (allerror); 1966 } 1967 1968 /* 1969 * Load inode from disk and initialize vnode. 1970 */ 1971 static int 1972 ffs_init_vnode(struct ufsmount *ump, struct vnode *vp, ino_t ino) 1973 { 1974 struct fs *fs; 1975 struct inode *ip; 1976 struct buf *bp; 1977 int error; 1978 1979 fs = ump->um_fs; 1980 1981 /* Read in the disk contents for the inode. */ 1982 error = bread(ump->um_devvp, FFS_FSBTODB(fs, ino_to_fsba(fs, ino)), 1983 (int)fs->fs_bsize, 0, &bp); 1984 if (error) 1985 return error; 1986 1987 /* Allocate and initialize inode. */ 1988 ip = pool_cache_get(ffs_inode_cache, PR_WAITOK); 1989 memset(ip, 0, sizeof(struct inode)); 1990 ip->i_ump = ump; 1991 ip->i_fs = fs; 1992 ip->i_dev = ump->um_dev; 1993 ip->i_number = ino; 1994 if (ump->um_fstype == UFS1) 1995 ip->i_din.ffs1_din = pool_cache_get(ffs_dinode1_cache, 1996 PR_WAITOK); 1997 else 1998 ip->i_din.ffs2_din = pool_cache_get(ffs_dinode2_cache, 1999 PR_WAITOK); 2000 ffs_load_inode(bp, ip, fs, ino); 2001 brelse(bp, 0); 2002 ip->i_vnode = vp; 2003 #if defined(QUOTA) || defined(QUOTA2) 2004 ufsquota_init(ip); 2005 #endif 2006 2007 /* Initialise vnode with this inode. */ 2008 vp->v_tag = VT_UFS; 2009 vp->v_op = ffs_vnodeop_p; 2010 vp->v_vflag |= VV_LOCKSWORK; 2011 vp->v_data = ip; 2012 2013 /* Initialize genfs node. */ 2014 genfs_node_init(vp, &ffs_genfsops); 2015 2016 return 0; 2017 } 2018 2019 /* 2020 * Undo ffs_init_vnode(). 2021 */ 2022 static void 2023 ffs_deinit_vnode(struct ufsmount *ump, struct vnode *vp) 2024 { 2025 struct inode *ip = VTOI(vp); 2026 2027 genfs_node_destroy(vp); 2028 vp->v_data = NULL; 2029 2030 if (ump->um_fstype == UFS1) 2031 pool_cache_put(ffs_dinode1_cache, ip->i_din.ffs1_din); 2032 else 2033 pool_cache_put(ffs_dinode2_cache, ip->i_din.ffs2_din); 2034 pool_cache_put(ffs_inode_cache, ip); 2035 } 2036 2037 /* 2038 * Read an inode from disk and initialize this vnode / inode pair. 2039 * Caller assures no other thread will try to load this inode. 2040 */ 2041 int 2042 ffs_loadvnode(struct mount *mp, struct vnode *vp, 2043 const void *key, size_t key_len, const void **new_key) 2044 { 2045 ino_t ino; 2046 struct fs *fs; 2047 struct inode *ip; 2048 struct ufsmount *ump; 2049 int error; 2050 2051 KASSERT(key_len == sizeof(ino)); 2052 memcpy(&ino, key, key_len); 2053 ump = VFSTOUFS(mp); 2054 fs = ump->um_fs; 2055 2056 error = ffs_init_vnode(ump, vp, ino); 2057 if (error) 2058 return error; 2059 2060 ip = VTOI(vp); 2061 if (ip->i_mode == 0) { 2062 ffs_deinit_vnode(ump, vp); 2063 2064 return ENOENT; 2065 } 2066 2067 /* Initialize the vnode from the inode. */ 2068 ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp); 2069 2070 /* Finish inode initialization. */ 2071 ip->i_devvp = ump->um_devvp; 2072 vref(ip->i_devvp); 2073 2074 /* 2075 * Ensure that uid and gid are correct. This is a temporary 2076 * fix until fsck has been changed to do the update. 2077 */ 2078 2079 if (fs->fs_magic == FS_UFS1_MAGIC && /* XXX */ 2080 fs->fs_old_inodefmt < FS_44INODEFMT) { /* XXX */ 2081 ip->i_uid = ip->i_ffs1_ouid; /* XXX */ 2082 ip->i_gid = ip->i_ffs1_ogid; /* XXX */ 2083 } /* XXX */ 2084 uvm_vnp_setsize(vp, ip->i_size); 2085 *new_key = &ip->i_number; 2086 return 0; 2087 } 2088 2089 /* 2090 * Create a new inode on disk and initialize this vnode / inode pair. 2091 */ 2092 int 2093 ffs_newvnode(struct mount *mp, struct vnode *dvp, struct vnode *vp, 2094 struct vattr *vap, kauth_cred_t cred, 2095 size_t *key_len, const void **new_key) 2096 { 2097 ino_t ino; 2098 struct fs *fs; 2099 struct inode *ip; 2100 struct timespec ts; 2101 struct ufsmount *ump; 2102 int error, mode; 2103 2104 KASSERT(dvp->v_mount == mp); 2105 KASSERT(vap->va_type != VNON); 2106 2107 *key_len = sizeof(ino); 2108 ump = VFSTOUFS(mp); 2109 fs = ump->um_fs; 2110 mode = MAKEIMODE(vap->va_type, vap->va_mode); 2111 2112 /* Allocate fresh inode. */ 2113 error = ffs_valloc(dvp, mode, cred, &ino); 2114 if (error) 2115 return error; 2116 2117 /* Attach inode to vnode. */ 2118 error = ffs_init_vnode(ump, vp, ino); 2119 if (error) { 2120 if (UFS_WAPBL_BEGIN(mp) == 0) { 2121 ffs_vfree(dvp, ino, mode); 2122 UFS_WAPBL_END(mp); 2123 } 2124 return error; 2125 } 2126 2127 ip = VTOI(vp); 2128 if (ip->i_mode) { 2129 panic("%s: dup alloc ino=%" PRId64 " on %s: mode %o/%o " 2130 "gen %x/%x size %" PRIx64 " blocks %" PRIx64, 2131 __func__, ino, fs->fs_fsmnt, DIP(ip, mode), ip->i_mode, 2132 DIP(ip, gen), ip->i_gen, DIP(ip, size), DIP(ip, blocks)); 2133 } 2134 if (DIP(ip, size) || DIP(ip, blocks)) { 2135 printf("%s: ino=%" PRId64 " on %s: " 2136 "gen %x/%x has non zero blocks %" PRIx64 " or size %" 2137 PRIx64 "\n", 2138 __func__, ino, fs->fs_fsmnt, DIP(ip, gen), ip->i_gen, 2139 DIP(ip, blocks), DIP(ip, size)); 2140 if ((ip)->i_ump->um_fstype == UFS1) 2141 panic("%s: dirty filesystem?", __func__); 2142 DIP_ASSIGN(ip, blocks, 0); 2143 DIP_ASSIGN(ip, size, 0); 2144 } 2145 2146 /* Set uid / gid. */ 2147 if (cred == NOCRED || cred == FSCRED) { 2148 ip->i_gid = 0; 2149 ip->i_uid = 0; 2150 } else { 2151 ip->i_gid = VTOI(dvp)->i_gid; 2152 ip->i_uid = kauth_cred_geteuid(cred); 2153 } 2154 DIP_ASSIGN(ip, gid, ip->i_gid); 2155 DIP_ASSIGN(ip, uid, ip->i_uid); 2156 2157 #if defined(QUOTA) || defined(QUOTA2) 2158 error = UFS_WAPBL_BEGIN(mp); 2159 if (error) { 2160 ffs_deinit_vnode(ump, vp); 2161 2162 return error; 2163 } 2164 error = chkiq(ip, 1, cred, 0); 2165 if (error) { 2166 ffs_vfree(dvp, ino, mode); 2167 UFS_WAPBL_END(mp); 2168 ffs_deinit_vnode(ump, vp); 2169 2170 return error; 2171 } 2172 UFS_WAPBL_END(mp); 2173 #endif 2174 2175 /* Set type and finalize. */ 2176 ip->i_flags = 0; 2177 DIP_ASSIGN(ip, flags, 0); 2178 ip->i_mode = mode; 2179 DIP_ASSIGN(ip, mode, mode); 2180 if (vap->va_rdev != VNOVAL) { 2181 /* 2182 * Want to be able to use this to make badblock 2183 * inodes, so don't truncate the dev number. 2184 */ 2185 if (ump->um_fstype == UFS1) 2186 ip->i_ffs1_rdev = ufs_rw32(vap->va_rdev, 2187 UFS_MPNEEDSWAP(ump)); 2188 else 2189 ip->i_ffs2_rdev = ufs_rw64(vap->va_rdev, 2190 UFS_MPNEEDSWAP(ump)); 2191 } 2192 ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp); 2193 ip->i_devvp = ump->um_devvp; 2194 vref(ip->i_devvp); 2195 2196 /* Set up a new generation number for this inode. */ 2197 ip->i_gen++; 2198 DIP_ASSIGN(ip, gen, ip->i_gen); 2199 if (fs->fs_magic == FS_UFS2_MAGIC) { 2200 vfs_timestamp(&ts); 2201 ip->i_ffs2_birthtime = ts.tv_sec; 2202 ip->i_ffs2_birthnsec = ts.tv_nsec; 2203 } 2204 2205 uvm_vnp_setsize(vp, ip->i_size); 2206 *new_key = &ip->i_number; 2207 return 0; 2208 } 2209 2210 /* 2211 * File handle to vnode 2212 * 2213 * Have to be really careful about stale file handles: 2214 * - check that the inode number is valid 2215 * - call ffs_vget() to get the locked inode 2216 * - check for an unallocated inode (i_mode == 0) 2217 * - check that the given client host has export rights and return 2218 * those rights via. exflagsp and credanonp 2219 */ 2220 int 2221 ffs_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp) 2222 { 2223 struct ufid ufh; 2224 int error; 2225 2226 if (fhp->fid_len != sizeof(struct ufid)) 2227 return EINVAL; 2228 2229 memcpy(&ufh, fhp, sizeof(ufh)); 2230 if ((error = ffs_checkrange(mp, ufh.ufid_ino)) != 0) 2231 return error; 2232 2233 return (ufs_fhtovp(mp, &ufh, vpp)); 2234 } 2235 2236 /* 2237 * Vnode pointer to File handle 2238 */ 2239 /* ARGSUSED */ 2240 int 2241 ffs_vptofh(struct vnode *vp, struct fid *fhp, size_t *fh_size) 2242 { 2243 struct inode *ip; 2244 struct ufid ufh; 2245 2246 if (*fh_size < sizeof(struct ufid)) { 2247 *fh_size = sizeof(struct ufid); 2248 return E2BIG; 2249 } 2250 ip = VTOI(vp); 2251 *fh_size = sizeof(struct ufid); 2252 memset(&ufh, 0, sizeof(ufh)); 2253 ufh.ufid_len = sizeof(struct ufid); 2254 ufh.ufid_ino = ip->i_number; 2255 ufh.ufid_gen = ip->i_gen; 2256 memcpy(fhp, &ufh, sizeof(ufh)); 2257 return (0); 2258 } 2259 2260 void 2261 ffs_init(void) 2262 { 2263 if (ffs_initcount++ > 0) 2264 return; 2265 2266 ffs_inode_cache = pool_cache_init(sizeof(struct inode), 0, 0, 0, 2267 "ffsino", NULL, IPL_NONE, NULL, NULL, NULL); 2268 ffs_dinode1_cache = pool_cache_init(sizeof(struct ufs1_dinode), 0, 0, 0, 2269 "ffsdino1", NULL, IPL_NONE, NULL, NULL, NULL); 2270 ffs_dinode2_cache = pool_cache_init(sizeof(struct ufs2_dinode), 0, 0, 0, 2271 "ffsdino2", NULL, IPL_NONE, NULL, NULL, NULL); 2272 ufs_init(); 2273 } 2274 2275 void 2276 ffs_reinit(void) 2277 { 2278 ufs_reinit(); 2279 } 2280 2281 void 2282 ffs_done(void) 2283 { 2284 if (--ffs_initcount > 0) 2285 return; 2286 2287 ufs_done(); 2288 pool_cache_destroy(ffs_dinode2_cache); 2289 pool_cache_destroy(ffs_dinode1_cache); 2290 pool_cache_destroy(ffs_inode_cache); 2291 } 2292 2293 /* 2294 * Write a superblock and associated information back to disk. 2295 */ 2296 int 2297 ffs_sbupdate(struct ufsmount *mp, int waitfor) 2298 { 2299 struct fs *fs = mp->um_fs; 2300 struct buf *bp; 2301 int error; 2302 u_int32_t saveflag; 2303 2304 error = ffs_getblk(mp->um_devvp, 2305 fs->fs_sblockloc / DEV_BSIZE, FFS_NOBLK, 2306 fs->fs_sbsize, false, &bp); 2307 if (error) 2308 return error; 2309 saveflag = fs->fs_flags & FS_INTERNAL; 2310 fs->fs_flags &= ~FS_INTERNAL; 2311 2312 memcpy(bp->b_data, fs, fs->fs_sbsize); 2313 2314 ffs_oldfscompat_write((struct fs *)bp->b_data, mp); 2315 #ifdef FFS_EI 2316 if (mp->um_flags & UFS_NEEDSWAP) 2317 ffs_sb_swap((struct fs *)bp->b_data, (struct fs *)bp->b_data); 2318 #endif 2319 fs->fs_flags |= saveflag; 2320 2321 if (waitfor == MNT_WAIT) 2322 error = bwrite(bp); 2323 else 2324 bawrite(bp); 2325 return (error); 2326 } 2327 2328 int 2329 ffs_cgupdate(struct ufsmount *mp, int waitfor) 2330 { 2331 struct fs *fs = mp->um_fs; 2332 struct buf *bp; 2333 int blks; 2334 void *space; 2335 int i, size, error = 0, allerror = 0; 2336 2337 UFS_WAPBL_JLOCK_ASSERT(mp); 2338 2339 allerror = ffs_sbupdate(mp, waitfor); 2340 blks = howmany(fs->fs_cssize, fs->fs_fsize); 2341 space = fs->fs_csp; 2342 for (i = 0; i < blks; i += fs->fs_frag) { 2343 size = fs->fs_bsize; 2344 if (i + fs->fs_frag > blks) 2345 size = (blks - i) * fs->fs_fsize; 2346 error = ffs_getblk(mp->um_devvp, FFS_FSBTODB(fs, fs->fs_csaddr + i), 2347 FFS_NOBLK, size, false, &bp); 2348 if (error) 2349 break; 2350 #ifdef FFS_EI 2351 if (mp->um_flags & UFS_NEEDSWAP) 2352 ffs_csum_swap((struct csum*)space, 2353 (struct csum*)bp->b_data, size); 2354 else 2355 #endif 2356 memcpy(bp->b_data, space, (u_int)size); 2357 space = (char *)space + size; 2358 if (waitfor == MNT_WAIT) 2359 error = bwrite(bp); 2360 else 2361 bawrite(bp); 2362 } 2363 if (!allerror && error) 2364 allerror = error; 2365 return (allerror); 2366 } 2367 2368 int 2369 ffs_extattrctl(struct mount *mp, int cmd, struct vnode *vp, 2370 int attrnamespace, const char *attrname) 2371 { 2372 #ifdef UFS_EXTATTR 2373 /* 2374 * File-backed extended attributes are only supported on UFS1. 2375 * UFS2 has native extended attributes. 2376 */ 2377 if (VFSTOUFS(mp)->um_fstype == UFS1) 2378 return (ufs_extattrctl(mp, cmd, vp, attrnamespace, attrname)); 2379 #endif 2380 return (vfs_stdextattrctl(mp, cmd, vp, attrnamespace, attrname)); 2381 } 2382 2383 /* 2384 * Synch vnode for a mounted file system. 2385 */ 2386 static int 2387 ffs_vfs_fsync(vnode_t *vp, int flags) 2388 { 2389 int error, i, pflags; 2390 #ifdef WAPBL 2391 struct mount *mp; 2392 #endif 2393 2394 KASSERT(vp->v_type == VBLK); 2395 KASSERT(spec_node_getmountedfs(vp) != NULL); 2396 2397 /* 2398 * Flush all dirty data associated with the vnode. 2399 */ 2400 pflags = PGO_ALLPAGES | PGO_CLEANIT; 2401 if ((flags & FSYNC_WAIT) != 0) 2402 pflags |= PGO_SYNCIO; 2403 mutex_enter(vp->v_interlock); 2404 error = VOP_PUTPAGES(vp, 0, 0, pflags); 2405 if (error) 2406 return error; 2407 2408 #ifdef WAPBL 2409 mp = spec_node_getmountedfs(vp); 2410 if (mp && mp->mnt_wapbl) { 2411 /* 2412 * Don't bother writing out metadata if the syncer is 2413 * making the request. We will let the sync vnode 2414 * write it out in a single burst through a call to 2415 * VFS_SYNC(). 2416 */ 2417 if ((flags & (FSYNC_DATAONLY | FSYNC_LAZY | FSYNC_NOLOG)) != 0) 2418 return 0; 2419 2420 /* 2421 * Don't flush the log if the vnode being flushed 2422 * contains no dirty buffers that could be in the log. 2423 */ 2424 if (!LIST_EMPTY(&vp->v_dirtyblkhd)) { 2425 error = wapbl_flush(mp->mnt_wapbl, 0); 2426 if (error) 2427 return error; 2428 } 2429 2430 if ((flags & FSYNC_WAIT) != 0) { 2431 mutex_enter(vp->v_interlock); 2432 while (vp->v_numoutput) 2433 cv_wait(&vp->v_cv, vp->v_interlock); 2434 mutex_exit(vp->v_interlock); 2435 } 2436 2437 return 0; 2438 } 2439 #endif /* WAPBL */ 2440 2441 error = vflushbuf(vp, flags); 2442 if (error == 0 && (flags & FSYNC_CACHE) != 0) { 2443 i = 1; 2444 (void)VOP_IOCTL(vp, DIOCCACHESYNC, &i, FWRITE, 2445 kauth_cred_get()); 2446 } 2447 2448 return error; 2449 } 2450