1 /* $NetBSD: ffs_vfsops.c,v 1.223 2008/04/17 09:52:47 hannken Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1991, 1993, 1994 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)ffs_vfsops.c 8.31 (Berkeley) 5/20/95 32 */ 33 34 #include <sys/cdefs.h> 35 __KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.223 2008/04/17 09:52:47 hannken Exp $"); 36 37 #if defined(_KERNEL_OPT) 38 #include "opt_ffs.h" 39 #include "opt_quota.h" 40 #include "opt_softdep.h" 41 #endif 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/namei.h> 46 #include <sys/proc.h> 47 #include <sys/kernel.h> 48 #include <sys/vnode.h> 49 #include <sys/socket.h> 50 #include <sys/mount.h> 51 #include <sys/buf.h> 52 #include <sys/device.h> 53 #include <sys/mbuf.h> 54 #include <sys/file.h> 55 #include <sys/disklabel.h> 56 #include <sys/ioctl.h> 57 #include <sys/errno.h> 58 #include <sys/malloc.h> 59 #include <sys/pool.h> 60 #include <sys/lock.h> 61 #include <sys/sysctl.h> 62 #include <sys/conf.h> 63 #include <sys/kauth.h> 64 #include <sys/fstrans.h> 65 66 #include <miscfs/genfs/genfs.h> 67 #include <miscfs/specfs/specdev.h> 68 69 #include <ufs/ufs/quota.h> 70 #include <ufs/ufs/ufsmount.h> 71 #include <ufs/ufs/inode.h> 72 #include <ufs/ufs/dir.h> 73 #include <ufs/ufs/ufs_extern.h> 74 #include <ufs/ufs/ufs_bswap.h> 75 76 #include <ufs/ffs/fs.h> 77 #include <ufs/ffs/ffs_extern.h> 78 79 /* how many times ffs_init() was called */ 80 int ffs_initcount = 0; 81 82 extern kmutex_t ufs_hashlock; 83 84 extern const struct vnodeopv_desc ffs_vnodeop_opv_desc; 85 extern const struct vnodeopv_desc ffs_specop_opv_desc; 86 extern const struct vnodeopv_desc ffs_fifoop_opv_desc; 87 88 const struct vnodeopv_desc * const ffs_vnodeopv_descs[] = { 89 &ffs_vnodeop_opv_desc, 90 &ffs_specop_opv_desc, 91 &ffs_fifoop_opv_desc, 92 NULL, 93 }; 94 95 struct vfsops ffs_vfsops = { 96 MOUNT_FFS, 97 sizeof (struct ufs_args), 98 ffs_mount, 99 ufs_start, 100 ffs_unmount, 101 ufs_root, 102 ufs_quotactl, 103 ffs_statvfs, 104 ffs_sync, 105 ffs_vget, 106 ffs_fhtovp, 107 ffs_vptofh, 108 ffs_init, 109 ffs_reinit, 110 ffs_done, 111 ffs_mountroot, 112 ffs_snapshot, 113 ffs_extattrctl, 114 ffs_suspendctl, 115 genfs_renamelock_enter, 116 genfs_renamelock_exit, 117 ffs_vnodeopv_descs, 118 0, 119 { NULL, NULL }, 120 }; 121 VFS_ATTACH(ffs_vfsops); 122 123 static const struct genfs_ops ffs_genfsops = { 124 .gop_size = ffs_gop_size, 125 .gop_alloc = ufs_gop_alloc, 126 .gop_write = genfs_gop_write, 127 .gop_markupdate = ufs_gop_markupdate, 128 }; 129 130 static const struct ufs_ops ffs_ufsops = { 131 .uo_itimes = ffs_itimes, 132 .uo_update = ffs_update, 133 .uo_truncate = ffs_truncate, 134 .uo_valloc = ffs_valloc, 135 .uo_vfree = ffs_vfree, 136 .uo_balloc = ffs_balloc, 137 }; 138 139 pool_cache_t ffs_inode_cache; 140 pool_cache_t ffs_dinode1_cache; 141 pool_cache_t ffs_dinode2_cache; 142 143 static void ffs_oldfscompat_read(struct fs *, struct ufsmount *, daddr_t); 144 static void ffs_oldfscompat_write(struct fs *, struct ufsmount *); 145 146 /* 147 * Called by main() when ffs is going to be mounted as root. 148 */ 149 150 int 151 ffs_mountroot(void) 152 { 153 struct fs *fs; 154 struct mount *mp; 155 struct lwp *l = curlwp; /* XXX */ 156 struct ufsmount *ump; 157 int error; 158 159 if (device_class(root_device) != DV_DISK) 160 return (ENODEV); 161 162 if ((error = vfs_rootmountalloc(MOUNT_FFS, "root_device", &mp))) { 163 vrele(rootvp); 164 return (error); 165 } 166 if ((error = ffs_mountfs(rootvp, mp, l)) != 0) { 167 vfs_unbusy(mp, false); 168 vfs_destroy(mp); 169 return (error); 170 } 171 mutex_enter(&mountlist_lock); 172 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); 173 mutex_exit(&mountlist_lock); 174 ump = VFSTOUFS(mp); 175 fs = ump->um_fs; 176 memset(fs->fs_fsmnt, 0, sizeof(fs->fs_fsmnt)); 177 (void)copystr(mp->mnt_stat.f_mntonname, fs->fs_fsmnt, MNAMELEN - 1, 0); 178 (void)ffs_statvfs(mp, &mp->mnt_stat); 179 vfs_unbusy(mp, false); 180 setrootfstime((time_t)fs->fs_time); 181 return (0); 182 } 183 184 /* 185 * VFS Operations. 186 * 187 * mount system call 188 */ 189 int 190 ffs_mount(struct mount *mp, const char *path, void *data, size_t *data_len) 191 { 192 struct lwp *l = curlwp; 193 struct nameidata nd; 194 struct vnode *vp, *devvp = NULL; 195 struct ufs_args *args = data; 196 struct ufsmount *ump = NULL; 197 struct fs *fs; 198 int error = 0, flags, update; 199 mode_t accessmode; 200 201 if (*data_len < sizeof *args) 202 return EINVAL; 203 204 if (mp->mnt_flag & MNT_GETARGS) { 205 ump = VFSTOUFS(mp); 206 if (ump == NULL) 207 return EIO; 208 args->fspec = NULL; 209 *data_len = sizeof *args; 210 return 0; 211 } 212 213 #if !defined(SOFTDEP) 214 mp->mnt_flag &= ~MNT_SOFTDEP; 215 #endif 216 217 update = mp->mnt_flag & MNT_UPDATE; 218 219 /* Check arguments */ 220 if (args->fspec != NULL) { 221 /* 222 * Look up the name and verify that it's sane. 223 */ 224 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, args->fspec); 225 if ((error = namei(&nd)) != 0) 226 return (error); 227 devvp = nd.ni_vp; 228 229 if (!update) { 230 /* 231 * Be sure this is a valid block device 232 */ 233 if (devvp->v_type != VBLK) 234 error = ENOTBLK; 235 else if (bdevsw_lookup(devvp->v_rdev) == NULL) 236 error = ENXIO; 237 } else { 238 /* 239 * Be sure we're still naming the same device 240 * used for our initial mount 241 */ 242 ump = VFSTOUFS(mp); 243 if (devvp != ump->um_devvp) { 244 if (devvp->v_rdev != ump->um_devvp->v_rdev) 245 error = EINVAL; 246 else { 247 vrele(devvp); 248 devvp = ump->um_devvp; 249 vref(devvp); 250 } 251 } 252 } 253 } else { 254 if (!update) { 255 /* New mounts must have a filename for the device */ 256 return (EINVAL); 257 } else { 258 /* Use the extant mount */ 259 ump = VFSTOUFS(mp); 260 devvp = ump->um_devvp; 261 vref(devvp); 262 } 263 } 264 265 /* 266 * Mark the device and any existing vnodes as involved in 267 * softdep processing. 268 */ 269 if ((mp->mnt_flag & MNT_SOFTDEP) != 0) { 270 devvp->v_uflag |= VU_SOFTDEP; 271 mutex_enter(&mntvnode_lock); 272 TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { 273 if (vp->v_mount != mp || vismarker(vp)) 274 continue; 275 vp->v_uflag |= VU_SOFTDEP; 276 } 277 mutex_exit(&mntvnode_lock); 278 } 279 280 /* 281 * If mount by non-root, then verify that user has necessary 282 * permissions on the device. 283 */ 284 if (error == 0 && kauth_authorize_generic(l->l_cred, 285 KAUTH_GENERIC_ISSUSER, NULL) != 0) { 286 accessmode = VREAD; 287 if (update ? 288 (mp->mnt_iflag & IMNT_WANTRDWR) != 0 : 289 (mp->mnt_flag & MNT_RDONLY) == 0) 290 accessmode |= VWRITE; 291 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 292 error = VOP_ACCESS(devvp, accessmode, l->l_cred); 293 VOP_UNLOCK(devvp, 0); 294 } 295 296 if (error) { 297 vrele(devvp); 298 return (error); 299 } 300 301 if (!update) { 302 int xflags; 303 304 if (mp->mnt_flag & MNT_RDONLY) 305 xflags = FREAD; 306 else 307 xflags = FREAD|FWRITE; 308 error = VOP_OPEN(devvp, xflags, FSCRED); 309 if (error) 310 goto fail; 311 error = ffs_mountfs(devvp, mp, l); 312 if (error) { 313 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 314 (void)VOP_CLOSE(devvp, xflags, NOCRED); 315 VOP_UNLOCK(devvp, 0); 316 goto fail; 317 } 318 319 ump = VFSTOUFS(mp); 320 fs = ump->um_fs; 321 if ((mp->mnt_flag & (MNT_SOFTDEP | MNT_ASYNC)) == 322 (MNT_SOFTDEP | MNT_ASYNC)) { 323 printf("%s fs uses soft updates, " 324 "ignoring async mode\n", 325 fs->fs_fsmnt); 326 mp->mnt_flag &= ~MNT_ASYNC; 327 } 328 } else { 329 /* 330 * Update the mount. 331 */ 332 333 /* 334 * The initial mount got a reference on this 335 * device, so drop the one obtained via 336 * namei(), above. 337 */ 338 vrele(devvp); 339 340 ump = VFSTOUFS(mp); 341 fs = ump->um_fs; 342 if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) { 343 /* 344 * Changing from r/w to r/o 345 */ 346 flags = WRITECLOSE; 347 if (mp->mnt_flag & MNT_FORCE) 348 flags |= FORCECLOSE; 349 if (mp->mnt_flag & MNT_SOFTDEP) 350 error = softdep_flushfiles(mp, flags, l); 351 else 352 error = ffs_flushfiles(mp, flags, l); 353 if (fs->fs_pendingblocks != 0 || 354 fs->fs_pendinginodes != 0) { 355 printf("%s: update error: blocks %" PRId64 356 " files %d\n", 357 fs->fs_fsmnt, fs->fs_pendingblocks, 358 fs->fs_pendinginodes); 359 fs->fs_pendingblocks = 0; 360 fs->fs_pendinginodes = 0; 361 } 362 if (error == 0 && 363 ffs_cgupdate(ump, MNT_WAIT) == 0 && 364 fs->fs_clean & FS_WASCLEAN) { 365 if (mp->mnt_flag & MNT_SOFTDEP) 366 fs->fs_flags &= ~FS_DOSOFTDEP; 367 fs->fs_clean = FS_ISCLEAN; 368 (void) ffs_sbupdate(ump, MNT_WAIT); 369 } 370 if (error) 371 return (error); 372 fs->fs_ronly = 1; 373 fs->fs_fmod = 0; 374 } 375 376 /* 377 * Flush soft dependencies if disabling it via an update 378 * mount. This may leave some items to be processed, 379 * so don't do this yet XXX. 380 */ 381 if ((fs->fs_flags & FS_DOSOFTDEP) && 382 !(mp->mnt_flag & MNT_SOFTDEP) && fs->fs_ronly == 0) { 383 #ifdef notyet 384 flags = WRITECLOSE; 385 if (mp->mnt_flag & MNT_FORCE) 386 flags |= FORCECLOSE; 387 error = softdep_flushfiles(mp, flags, l); 388 if (error == 0 && ffs_cgupdate(ump, MNT_WAIT) == 0) 389 fs->fs_flags &= ~FS_DOSOFTDEP; 390 (void) ffs_sbupdate(ump, MNT_WAIT); 391 #elif defined(SOFTDEP) 392 mp->mnt_flag |= MNT_SOFTDEP; 393 #endif 394 } 395 396 /* 397 * When upgrading to a softdep mount, we must first flush 398 * all vnodes. (not done yet -- see above) 399 */ 400 if (!(fs->fs_flags & FS_DOSOFTDEP) && 401 (mp->mnt_flag & MNT_SOFTDEP) && fs->fs_ronly == 0) { 402 #ifdef notyet 403 flags = WRITECLOSE; 404 if (mp->mnt_flag & MNT_FORCE) 405 flags |= FORCECLOSE; 406 error = ffs_flushfiles(mp, flags, l); 407 #else 408 mp->mnt_flag &= ~MNT_SOFTDEP; 409 #endif 410 } 411 412 if (mp->mnt_flag & MNT_RELOAD) { 413 error = ffs_reload(mp, l->l_cred, l); 414 if (error) 415 return (error); 416 } 417 418 if (fs->fs_ronly && (mp->mnt_iflag & IMNT_WANTRDWR)) { 419 /* 420 * Changing from read-only to read/write 421 */ 422 fs->fs_ronly = 0; 423 fs->fs_clean <<= 1; 424 fs->fs_fmod = 1; 425 if ((fs->fs_flags & FS_DOSOFTDEP)) { 426 error = softdep_mount(devvp, mp, fs, 427 l->l_cred); 428 if (error) 429 return (error); 430 } 431 if (fs->fs_snapinum[0] != 0) 432 ffs_snapshot_mount(mp); 433 } 434 if (args->fspec == NULL) 435 return EINVAL; 436 if ((mp->mnt_flag & (MNT_SOFTDEP | MNT_ASYNC)) == 437 (MNT_SOFTDEP | MNT_ASYNC)) { 438 printf("%s fs uses soft updates, ignoring async mode\n", 439 fs->fs_fsmnt); 440 mp->mnt_flag &= ~MNT_ASYNC; 441 } 442 } 443 444 error = set_statvfs_info(path, UIO_USERSPACE, args->fspec, 445 UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l); 446 if (error == 0) 447 (void)strncpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, 448 sizeof(fs->fs_fsmnt)); 449 if (mp->mnt_flag & MNT_SOFTDEP) 450 fs->fs_flags |= FS_DOSOFTDEP; 451 else 452 fs->fs_flags &= ~FS_DOSOFTDEP; 453 if (fs->fs_fmod != 0) { /* XXX */ 454 fs->fs_fmod = 0; 455 if (fs->fs_clean & FS_WASCLEAN) 456 fs->fs_time = time_second; 457 else { 458 printf("%s: file system not clean (fs_clean=%x); please fsck(8)\n", 459 mp->mnt_stat.f_mntfromname, fs->fs_clean); 460 printf("%s: lost blocks %" PRId64 " files %d\n", 461 mp->mnt_stat.f_mntfromname, fs->fs_pendingblocks, 462 fs->fs_pendinginodes); 463 } 464 (void) ffs_cgupdate(ump, MNT_WAIT); 465 } 466 return (error); 467 468 fail: 469 vrele(devvp); 470 return (error); 471 } 472 473 /* 474 * Reload all incore data for a filesystem (used after running fsck on 475 * the root filesystem and finding things to fix). The filesystem must 476 * be mounted read-only. 477 * 478 * Things to do to update the mount: 479 * 1) invalidate all cached meta-data. 480 * 2) re-read superblock from disk. 481 * 3) re-read summary information from disk. 482 * 4) invalidate all inactive vnodes. 483 * 5) invalidate all cached file data. 484 * 6) re-read inode data for all active vnodes. 485 */ 486 int 487 ffs_reload(struct mount *mp, kauth_cred_t cred, struct lwp *l) 488 { 489 struct vnode *vp, *mvp, *devvp; 490 struct inode *ip; 491 void *space; 492 struct buf *bp; 493 struct fs *fs, *newfs; 494 struct partinfo dpart; 495 int i, blks, size, error; 496 int32_t *lp; 497 struct ufsmount *ump; 498 daddr_t sblockloc; 499 500 if ((mp->mnt_flag & MNT_RDONLY) == 0) 501 return (EINVAL); 502 503 ump = VFSTOUFS(mp); 504 /* 505 * Step 1: invalidate all cached meta-data. 506 */ 507 devvp = ump->um_devvp; 508 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 509 error = vinvalbuf(devvp, 0, cred, l, 0, 0); 510 VOP_UNLOCK(devvp, 0); 511 if (error) 512 panic("ffs_reload: dirty1"); 513 /* 514 * Step 2: re-read superblock from disk. 515 */ 516 fs = ump->um_fs; 517 if (VOP_IOCTL(devvp, DIOCGPART, &dpart, FREAD, NOCRED) != 0) 518 size = DEV_BSIZE; 519 else 520 size = dpart.disklab->d_secsize; 521 /* XXX we don't handle possibility that superblock moved. */ 522 error = bread(devvp, fs->fs_sblockloc / size, fs->fs_sbsize, 523 NOCRED, &bp); 524 if (error) { 525 brelse(bp, 0); 526 return (error); 527 } 528 newfs = malloc(fs->fs_sbsize, M_UFSMNT, M_WAITOK); 529 memcpy(newfs, bp->b_data, fs->fs_sbsize); 530 #ifdef FFS_EI 531 if (ump->um_flags & UFS_NEEDSWAP) { 532 ffs_sb_swap((struct fs*)bp->b_data, newfs); 533 fs->fs_flags |= FS_SWAPPED; 534 } else 535 #endif 536 fs->fs_flags &= ~FS_SWAPPED; 537 if ((newfs->fs_magic != FS_UFS1_MAGIC && 538 newfs->fs_magic != FS_UFS2_MAGIC)|| 539 newfs->fs_bsize > MAXBSIZE || 540 newfs->fs_bsize < sizeof(struct fs)) { 541 brelse(bp, 0); 542 free(newfs, M_UFSMNT); 543 return (EIO); /* XXX needs translation */ 544 } 545 /* Store off old fs_sblockloc for fs_oldfscompat_read. */ 546 sblockloc = fs->fs_sblockloc; 547 /* 548 * Copy pointer fields back into superblock before copying in XXX 549 * new superblock. These should really be in the ufsmount. XXX 550 * Note that important parameters (eg fs_ncg) are unchanged. 551 */ 552 newfs->fs_csp = fs->fs_csp; 553 newfs->fs_maxcluster = fs->fs_maxcluster; 554 newfs->fs_contigdirs = fs->fs_contigdirs; 555 newfs->fs_ronly = fs->fs_ronly; 556 newfs->fs_active = fs->fs_active; 557 memcpy(fs, newfs, (u_int)fs->fs_sbsize); 558 brelse(bp, 0); 559 free(newfs, M_UFSMNT); 560 561 /* Recheck for apple UFS filesystem */ 562 ump->um_flags &= ~UFS_ISAPPLEUFS; 563 /* First check to see if this is tagged as an Apple UFS filesystem 564 * in the disklabel 565 */ 566 if ((VOP_IOCTL(devvp, DIOCGPART, &dpart, FREAD, cred) == 0) && 567 (dpart.part->p_fstype == FS_APPLEUFS)) { 568 ump->um_flags |= UFS_ISAPPLEUFS; 569 } 570 #ifdef APPLE_UFS 571 else { 572 /* Manually look for an apple ufs label, and if a valid one 573 * is found, then treat it like an Apple UFS filesystem anyway 574 */ 575 error = bread(devvp, (daddr_t)(APPLEUFS_LABEL_OFFSET / size), 576 APPLEUFS_LABEL_SIZE, cred, &bp); 577 if (error) { 578 brelse(bp, 0); 579 return (error); 580 } 581 error = ffs_appleufs_validate(fs->fs_fsmnt, 582 (struct appleufslabel *)bp->b_data,NULL); 583 if (error == 0) 584 ump->um_flags |= UFS_ISAPPLEUFS; 585 brelse(bp, 0); 586 bp = NULL; 587 } 588 #else 589 if (ump->um_flags & UFS_ISAPPLEUFS) 590 return (EIO); 591 #endif 592 593 if (UFS_MPISAPPLEUFS(ump)) { 594 /* see comment about NeXT below */ 595 ump->um_maxsymlinklen = APPLEUFS_MAXSYMLINKLEN; 596 ump->um_dirblksiz = APPLEUFS_DIRBLKSIZ; 597 mp->mnt_iflag |= IMNT_DTYPE; 598 } else { 599 ump->um_maxsymlinklen = fs->fs_maxsymlinklen; 600 ump->um_dirblksiz = DIRBLKSIZ; 601 if (ump->um_maxsymlinklen > 0) 602 mp->mnt_iflag |= IMNT_DTYPE; 603 else 604 mp->mnt_iflag &= ~IMNT_DTYPE; 605 } 606 ffs_oldfscompat_read(fs, ump, sblockloc); 607 mutex_enter(&ump->um_lock); 608 ump->um_maxfilesize = fs->fs_maxfilesize; 609 if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) { 610 fs->fs_pendingblocks = 0; 611 fs->fs_pendinginodes = 0; 612 } 613 mutex_exit(&ump->um_lock); 614 615 ffs_statvfs(mp, &mp->mnt_stat); 616 /* 617 * Step 3: re-read summary information from disk. 618 */ 619 blks = howmany(fs->fs_cssize, fs->fs_fsize); 620 space = fs->fs_csp; 621 for (i = 0; i < blks; i += fs->fs_frag) { 622 size = fs->fs_bsize; 623 if (i + fs->fs_frag > blks) 624 size = (blks - i) * fs->fs_fsize; 625 error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size, 626 NOCRED, &bp); 627 if (error) { 628 brelse(bp, 0); 629 return (error); 630 } 631 #ifdef FFS_EI 632 if (UFS_FSNEEDSWAP(fs)) 633 ffs_csum_swap((struct csum *)bp->b_data, 634 (struct csum *)space, size); 635 else 636 #endif 637 memcpy(space, bp->b_data, (size_t)size); 638 space = (char *)space + size; 639 brelse(bp, 0); 640 } 641 if ((fs->fs_flags & FS_DOSOFTDEP)) 642 softdep_mount(devvp, mp, fs, cred); 643 if (fs->fs_snapinum[0] != 0) 644 ffs_snapshot_mount(mp); 645 /* 646 * We no longer know anything about clusters per cylinder group. 647 */ 648 if (fs->fs_contigsumsize > 0) { 649 lp = fs->fs_maxcluster; 650 for (i = 0; i < fs->fs_ncg; i++) 651 *lp++ = fs->fs_contigsumsize; 652 } 653 654 /* Allocate a marker vnode. */ 655 if ((mvp = vnalloc(mp)) == NULL) 656 return ENOMEM; 657 /* 658 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone() 659 * and vclean() can be called indirectly 660 */ 661 mutex_enter(&mntvnode_lock); 662 loop: 663 for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) { 664 vmark(mvp, vp); 665 if (vp->v_mount != mp || vismarker(vp)) 666 continue; 667 /* 668 * Step 4: invalidate all inactive vnodes. 669 */ 670 if (vrecycle(vp, &mntvnode_lock, l)) { 671 mutex_enter(&mntvnode_lock); 672 (void)vunmark(mvp); 673 goto loop; 674 } 675 /* 676 * Step 5: invalidate all cached file data. 677 */ 678 mutex_enter(&vp->v_interlock); 679 mutex_exit(&mntvnode_lock); 680 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) { 681 (void)vunmark(mvp); 682 goto loop; 683 } 684 if (vinvalbuf(vp, 0, cred, l, 0, 0)) 685 panic("ffs_reload: dirty2"); 686 /* 687 * Step 6: re-read inode data for all active vnodes. 688 */ 689 ip = VTOI(vp); 690 error = bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), 691 (int)fs->fs_bsize, NOCRED, &bp); 692 if (error) { 693 brelse(bp, 0); 694 vput(vp); 695 (void)vunmark(mvp); 696 break; 697 } 698 ffs_load_inode(bp, ip, fs, ip->i_number); 699 ip->i_ffs_effnlink = ip->i_nlink; 700 brelse(bp, 0); 701 vput(vp); 702 mutex_enter(&mntvnode_lock); 703 } 704 mutex_exit(&mntvnode_lock); 705 vnfree(mvp); 706 return (error); 707 } 708 709 /* 710 * Possible superblock locations ordered from most to least likely. 711 */ 712 static const int sblock_try[] = SBLOCKSEARCH; 713 714 /* 715 * Common code for mount and mountroot 716 */ 717 int 718 ffs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l) 719 { 720 struct ufsmount *ump; 721 struct buf *bp; 722 struct fs *fs; 723 dev_t dev; 724 struct partinfo dpart; 725 void *space; 726 daddr_t sblockloc, fsblockloc; 727 int blks, fstype; 728 int error, i, size, ronly, bset = 0; 729 #ifdef FFS_EI 730 int needswap = 0; /* keep gcc happy */ 731 #endif 732 int32_t *lp; 733 kauth_cred_t cred; 734 u_int32_t sbsize = 8192; /* keep gcc happy*/ 735 736 dev = devvp->v_rdev; 737 cred = l ? l->l_cred : NOCRED; 738 739 /* Flush out any old buffers remaining from a previous use. */ 740 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 741 error = vinvalbuf(devvp, V_SAVE, cred, l, 0, 0); 742 VOP_UNLOCK(devvp, 0); 743 if (error) 744 return (error); 745 746 ronly = (mp->mnt_flag & MNT_RDONLY) != 0; 747 if (VOP_IOCTL(devvp, DIOCGPART, &dpart, FREAD, cred) != 0) 748 size = DEV_BSIZE; 749 else 750 size = dpart.disklab->d_secsize; 751 752 bp = NULL; 753 ump = NULL; 754 fs = NULL; 755 sblockloc = 0; 756 fstype = 0; 757 758 error = fstrans_mount(mp); 759 if (error) 760 return error; 761 762 /* 763 * Try reading the superblock in each of its possible locations. 764 */ 765 for (i = 0; ; i++) { 766 if (bp != NULL) { 767 brelse(bp, BC_NOCACHE); 768 bp = NULL; 769 } 770 if (sblock_try[i] == -1) { 771 error = EINVAL; 772 fs = NULL; 773 goto out; 774 } 775 error = bread(devvp, sblock_try[i] / size, SBLOCKSIZE, cred, 776 &bp); 777 if (error) { 778 fs = NULL; 779 goto out; 780 } 781 fs = (struct fs*)bp->b_data; 782 fsblockloc = sblockloc = sblock_try[i]; 783 if (fs->fs_magic == FS_UFS1_MAGIC) { 784 sbsize = fs->fs_sbsize; 785 fstype = UFS1; 786 #ifdef FFS_EI 787 needswap = 0; 788 } else if (fs->fs_magic == bswap32(FS_UFS1_MAGIC)) { 789 sbsize = bswap32(fs->fs_sbsize); 790 fstype = UFS1; 791 needswap = 1; 792 #endif 793 } else if (fs->fs_magic == FS_UFS2_MAGIC) { 794 sbsize = fs->fs_sbsize; 795 fstype = UFS2; 796 #ifdef FFS_EI 797 needswap = 0; 798 } else if (fs->fs_magic == bswap32(FS_UFS2_MAGIC)) { 799 sbsize = bswap32(fs->fs_sbsize); 800 fstype = UFS2; 801 needswap = 1; 802 #endif 803 } else 804 continue; 805 806 807 /* fs->fs_sblockloc isn't defined for old filesystems */ 808 if (fstype == UFS1 && !(fs->fs_old_flags & FS_FLAGS_UPDATED)) { 809 if (sblockloc == SBLOCK_UFS2) 810 /* 811 * This is likely to be the first alternate 812 * in a filesystem with 64k blocks. 813 * Don't use it. 814 */ 815 continue; 816 fsblockloc = sblockloc; 817 } else { 818 fsblockloc = fs->fs_sblockloc; 819 #ifdef FFS_EI 820 if (needswap) 821 fsblockloc = bswap64(fsblockloc); 822 #endif 823 } 824 825 /* Check we haven't found an alternate superblock */ 826 if (fsblockloc != sblockloc) 827 continue; 828 829 /* Validate size of superblock */ 830 if (sbsize > MAXBSIZE || sbsize < sizeof(struct fs)) 831 continue; 832 833 /* Ok seems to be a good superblock */ 834 break; 835 } 836 837 fs = malloc((u_long)sbsize, M_UFSMNT, M_WAITOK); 838 memcpy(fs, bp->b_data, sbsize); 839 840 ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK); 841 memset(ump, 0, sizeof *ump); 842 mutex_init(&ump->um_lock, MUTEX_DEFAULT, IPL_NONE); 843 error = ffs_snapshot_init(ump); 844 if (error) 845 goto out; 846 ump->um_fs = fs; 847 ump->um_ops = &ffs_ufsops; 848 849 #ifdef FFS_EI 850 if (needswap) { 851 ffs_sb_swap((struct fs*)bp->b_data, fs); 852 fs->fs_flags |= FS_SWAPPED; 853 } else 854 #endif 855 fs->fs_flags &= ~FS_SWAPPED; 856 857 ffs_oldfscompat_read(fs, ump, sblockloc); 858 ump->um_maxfilesize = fs->fs_maxfilesize; 859 860 if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) { 861 fs->fs_pendingblocks = 0; 862 fs->fs_pendinginodes = 0; 863 } 864 865 ump->um_fstype = fstype; 866 if (fs->fs_sbsize < SBLOCKSIZE) 867 brelse(bp, BC_INVAL); 868 else 869 brelse(bp, 0); 870 bp = NULL; 871 872 /* First check to see if this is tagged as an Apple UFS filesystem 873 * in the disklabel 874 */ 875 if ((VOP_IOCTL(devvp, DIOCGPART, &dpart, FREAD, cred) == 0) && 876 (dpart.part->p_fstype == FS_APPLEUFS)) { 877 ump->um_flags |= UFS_ISAPPLEUFS; 878 } 879 #ifdef APPLE_UFS 880 else { 881 /* Manually look for an apple ufs label, and if a valid one 882 * is found, then treat it like an Apple UFS filesystem anyway 883 */ 884 error = bread(devvp, (daddr_t)(APPLEUFS_LABEL_OFFSET / size), 885 APPLEUFS_LABEL_SIZE, cred, &bp); 886 if (error) 887 goto out; 888 error = ffs_appleufs_validate(fs->fs_fsmnt, 889 (struct appleufslabel *)bp->b_data,NULL); 890 if (error == 0) { 891 ump->um_flags |= UFS_ISAPPLEUFS; 892 } 893 brelse(bp, 0); 894 bp = NULL; 895 } 896 #else 897 if (ump->um_flags & UFS_ISAPPLEUFS) { 898 error = EINVAL; 899 goto out; 900 } 901 #endif 902 903 /* 904 * verify that we can access the last block in the fs 905 * if we're mounting read/write. 906 */ 907 908 if (!ronly) { 909 error = bread(devvp, fsbtodb(fs, fs->fs_size - 1), fs->fs_fsize, 910 cred, &bp); 911 if (bp->b_bcount != fs->fs_fsize) 912 error = EINVAL; 913 if (error) { 914 bset = BC_INVAL; 915 goto out; 916 } 917 brelse(bp, BC_INVAL); 918 bp = NULL; 919 } 920 921 fs->fs_ronly = ronly; 922 if (ronly == 0) { 923 fs->fs_clean <<= 1; 924 fs->fs_fmod = 1; 925 } 926 size = fs->fs_cssize; 927 blks = howmany(size, fs->fs_fsize); 928 if (fs->fs_contigsumsize > 0) 929 size += fs->fs_ncg * sizeof(int32_t); 930 size += fs->fs_ncg * sizeof(*fs->fs_contigdirs); 931 space = malloc((u_long)size, M_UFSMNT, M_WAITOK); 932 fs->fs_csp = space; 933 for (i = 0; i < blks; i += fs->fs_frag) { 934 size = fs->fs_bsize; 935 if (i + fs->fs_frag > blks) 936 size = (blks - i) * fs->fs_fsize; 937 error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size, 938 cred, &bp); 939 if (error) { 940 free(fs->fs_csp, M_UFSMNT); 941 goto out; 942 } 943 #ifdef FFS_EI 944 if (needswap) 945 ffs_csum_swap((struct csum *)bp->b_data, 946 (struct csum *)space, size); 947 else 948 #endif 949 memcpy(space, bp->b_data, (u_int)size); 950 951 space = (char *)space + size; 952 brelse(bp, 0); 953 bp = NULL; 954 } 955 if (fs->fs_contigsumsize > 0) { 956 fs->fs_maxcluster = lp = space; 957 for (i = 0; i < fs->fs_ncg; i++) 958 *lp++ = fs->fs_contigsumsize; 959 space = lp; 960 } 961 size = fs->fs_ncg * sizeof(*fs->fs_contigdirs); 962 fs->fs_contigdirs = space; 963 space = (char *)space + size; 964 memset(fs->fs_contigdirs, 0, size); 965 /* Compatibility for old filesystems - XXX */ 966 if (fs->fs_avgfilesize <= 0) 967 fs->fs_avgfilesize = AVFILESIZ; 968 if (fs->fs_avgfpdir <= 0) 969 fs->fs_avgfpdir = AFPDIR; 970 fs->fs_active = NULL; 971 mp->mnt_data = ump; 972 mp->mnt_stat.f_fsidx.__fsid_val[0] = (long)dev; 973 mp->mnt_stat.f_fsidx.__fsid_val[1] = makefstype(MOUNT_FFS); 974 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; 975 mp->mnt_stat.f_namemax = FFS_MAXNAMLEN; 976 if (UFS_MPISAPPLEUFS(ump)) { 977 /* NeXT used to keep short symlinks in the inode even 978 * when using FS_42INODEFMT. In that case fs->fs_maxsymlinklen 979 * is probably -1, but we still need to be able to identify 980 * short symlinks. 981 */ 982 ump->um_maxsymlinklen = APPLEUFS_MAXSYMLINKLEN; 983 ump->um_dirblksiz = APPLEUFS_DIRBLKSIZ; 984 mp->mnt_iflag |= IMNT_DTYPE; 985 } else { 986 ump->um_maxsymlinklen = fs->fs_maxsymlinklen; 987 ump->um_dirblksiz = DIRBLKSIZ; 988 if (ump->um_maxsymlinklen > 0) 989 mp->mnt_iflag |= IMNT_DTYPE; 990 else 991 mp->mnt_iflag &= ~IMNT_DTYPE; 992 } 993 mp->mnt_fs_bshift = fs->fs_bshift; 994 mp->mnt_dev_bshift = DEV_BSHIFT; /* XXX */ 995 mp->mnt_flag |= MNT_LOCAL; 996 mp->mnt_iflag |= IMNT_MPSAFE; 997 #ifdef FFS_EI 998 if (needswap) 999 ump->um_flags |= UFS_NEEDSWAP; 1000 #endif 1001 ump->um_mountp = mp; 1002 ump->um_dev = dev; 1003 ump->um_devvp = devvp; 1004 ump->um_nindir = fs->fs_nindir; 1005 ump->um_lognindir = ffs(fs->fs_nindir) - 1; 1006 ump->um_bptrtodb = fs->fs_fsbtodb; 1007 ump->um_seqinc = fs->fs_frag; 1008 for (i = 0; i < MAXQUOTAS; i++) 1009 ump->um_quotas[i] = NULLVP; 1010 devvp->v_specmountpoint = mp; 1011 if (ronly == 0 && (fs->fs_flags & FS_DOSOFTDEP)) { 1012 error = softdep_mount(devvp, mp, fs, cred); 1013 if (error) { 1014 free(fs->fs_csp, M_UFSMNT); 1015 goto out; 1016 } 1017 } 1018 if (ronly == 0 && fs->fs_snapinum[0] != 0) 1019 ffs_snapshot_mount(mp); 1020 #ifdef UFS_EXTATTR 1021 /* 1022 * Initialize file-backed extended attributes on UFS1 file 1023 * systems. 1024 */ 1025 if (ump->um_fstype == UFS1) { 1026 ufs_extattr_uepm_init(&ump->um_extattr); 1027 #ifdef UFS_EXTATTR_AUTOSTART 1028 /* 1029 * XXX Just ignore errors. Not clear that we should 1030 * XXX fail the mount in this case. 1031 */ 1032 (void) ufs_extattr_autostart(mp, l); 1033 #endif 1034 } 1035 #endif /* UFS_EXTATTR */ 1036 return (0); 1037 out: 1038 fstrans_unmount(mp); 1039 if (fs) 1040 free(fs, M_UFSMNT); 1041 devvp->v_specmountpoint = NULL; 1042 if (bp) 1043 brelse(bp, bset); 1044 if (ump) { 1045 if (ump->um_oldfscompat) 1046 free(ump->um_oldfscompat, M_UFSMNT); 1047 mutex_destroy(&ump->um_lock); 1048 free(ump, M_UFSMNT); 1049 mp->mnt_data = NULL; 1050 } 1051 return (error); 1052 } 1053 1054 /* 1055 * Sanity checks for loading old filesystem superblocks. 1056 * See ffs_oldfscompat_write below for unwound actions. 1057 * 1058 * XXX - Parts get retired eventually. 1059 * Unfortunately new bits get added. 1060 */ 1061 static void 1062 ffs_oldfscompat_read(struct fs *fs, struct ufsmount *ump, daddr_t sblockloc) 1063 { 1064 off_t maxfilesize; 1065 int32_t *extrasave; 1066 1067 if ((fs->fs_magic != FS_UFS1_MAGIC) || 1068 (fs->fs_old_flags & FS_FLAGS_UPDATED)) 1069 return; 1070 1071 if (!ump->um_oldfscompat) 1072 ump->um_oldfscompat = malloc(512 + 3*sizeof(int32_t), 1073 M_UFSMNT, M_WAITOK); 1074 1075 memcpy(ump->um_oldfscompat, &fs->fs_old_postbl_start, 512); 1076 extrasave = ump->um_oldfscompat; 1077 extrasave += 512/sizeof(int32_t); 1078 extrasave[0] = fs->fs_old_npsect; 1079 extrasave[1] = fs->fs_old_interleave; 1080 extrasave[2] = fs->fs_old_trackskew; 1081 1082 /* These fields will be overwritten by their 1083 * original values in fs_oldfscompat_write, so it is harmless 1084 * to modify them here. 1085 */ 1086 fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir; 1087 fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree; 1088 fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree; 1089 fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree; 1090 1091 fs->fs_maxbsize = fs->fs_bsize; 1092 fs->fs_time = fs->fs_old_time; 1093 fs->fs_size = fs->fs_old_size; 1094 fs->fs_dsize = fs->fs_old_dsize; 1095 fs->fs_csaddr = fs->fs_old_csaddr; 1096 fs->fs_sblockloc = sblockloc; 1097 1098 fs->fs_flags = fs->fs_old_flags | (fs->fs_flags & FS_INTERNAL); 1099 1100 if (fs->fs_old_postblformat == FS_42POSTBLFMT) { 1101 fs->fs_old_nrpos = 8; 1102 fs->fs_old_npsect = fs->fs_old_nsect; 1103 fs->fs_old_interleave = 1; 1104 fs->fs_old_trackskew = 0; 1105 } 1106 1107 if (fs->fs_old_inodefmt < FS_44INODEFMT) { 1108 fs->fs_maxfilesize = (u_quad_t) 1LL << 39; 1109 fs->fs_qbmask = ~fs->fs_bmask; 1110 fs->fs_qfmask = ~fs->fs_fmask; 1111 } 1112 1113 maxfilesize = (u_int64_t)0x80000000 * fs->fs_bsize - 1; 1114 if (fs->fs_maxfilesize > maxfilesize) 1115 fs->fs_maxfilesize = maxfilesize; 1116 1117 /* Compatibility for old filesystems */ 1118 if (fs->fs_avgfilesize <= 0) 1119 fs->fs_avgfilesize = AVFILESIZ; 1120 if (fs->fs_avgfpdir <= 0) 1121 fs->fs_avgfpdir = AFPDIR; 1122 1123 #if 0 1124 if (bigcgs) { 1125 fs->fs_save_cgsize = fs->fs_cgsize; 1126 fs->fs_cgsize = fs->fs_bsize; 1127 } 1128 #endif 1129 } 1130 1131 /* 1132 * Unwinding superblock updates for old filesystems. 1133 * See ffs_oldfscompat_read above for details. 1134 * 1135 * XXX - Parts get retired eventually. 1136 * Unfortunately new bits get added. 1137 */ 1138 static void 1139 ffs_oldfscompat_write(struct fs *fs, struct ufsmount *ump) 1140 { 1141 int32_t *extrasave; 1142 1143 if ((fs->fs_magic != FS_UFS1_MAGIC) || 1144 (fs->fs_old_flags & FS_FLAGS_UPDATED)) 1145 return; 1146 1147 fs->fs_old_time = fs->fs_time; 1148 fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir; 1149 fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree; 1150 fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree; 1151 fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree; 1152 fs->fs_old_flags = fs->fs_flags; 1153 1154 #if 0 1155 if (bigcgs) { 1156 fs->fs_cgsize = fs->fs_save_cgsize; 1157 } 1158 #endif 1159 1160 memcpy(&fs->fs_old_postbl_start, ump->um_oldfscompat, 512); 1161 extrasave = ump->um_oldfscompat; 1162 extrasave += 512/sizeof(int32_t); 1163 fs->fs_old_npsect = extrasave[0]; 1164 fs->fs_old_interleave = extrasave[1]; 1165 fs->fs_old_trackskew = extrasave[2]; 1166 1167 } 1168 1169 /* 1170 * unmount system call 1171 */ 1172 int 1173 ffs_unmount(struct mount *mp, int mntflags) 1174 { 1175 struct lwp *l = curlwp; 1176 struct ufsmount *ump = VFSTOUFS(mp); 1177 struct fs *fs = ump->um_fs; 1178 int error, flags, penderr; 1179 1180 penderr = 0; 1181 flags = 0; 1182 if (mntflags & MNT_FORCE) 1183 flags |= FORCECLOSE; 1184 #ifdef UFS_EXTATTR 1185 if (ump->um_fstype == UFS1) { 1186 ufs_extattr_stop(mp, l); 1187 ufs_extattr_uepm_destroy(&ump->um_extattr); 1188 } 1189 #endif /* UFS_EXTATTR */ 1190 if (mp->mnt_flag & MNT_SOFTDEP) { 1191 if ((error = softdep_flushfiles(mp, flags, l)) != 0) 1192 return (error); 1193 } else { 1194 if ((error = ffs_flushfiles(mp, flags, l)) != 0) 1195 return (error); 1196 } 1197 mutex_enter(&ump->um_lock); 1198 if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) { 1199 printf("%s: unmount pending error: blocks %" PRId64 1200 " files %d\n", 1201 fs->fs_fsmnt, fs->fs_pendingblocks, fs->fs_pendinginodes); 1202 fs->fs_pendingblocks = 0; 1203 fs->fs_pendinginodes = 0; 1204 penderr = 1; 1205 } 1206 mutex_exit(&ump->um_lock); 1207 if (fs->fs_ronly == 0 && 1208 ffs_cgupdate(ump, MNT_WAIT) == 0 && 1209 fs->fs_clean & FS_WASCLEAN) { 1210 /* 1211 * XXXX don't mark fs clean in the case of softdep 1212 * pending block errors, until they are fixed. 1213 */ 1214 if (penderr == 0) { 1215 if (mp->mnt_flag & MNT_SOFTDEP) 1216 fs->fs_flags &= ~FS_DOSOFTDEP; 1217 fs->fs_clean = FS_ISCLEAN; 1218 } 1219 fs->fs_fmod = 0; 1220 (void) ffs_sbupdate(ump, MNT_WAIT); 1221 } 1222 if (ump->um_devvp->v_type != VBAD) 1223 ump->um_devvp->v_specmountpoint = NULL; 1224 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); 1225 (void)VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE, 1226 NOCRED); 1227 vput(ump->um_devvp); 1228 free(fs->fs_csp, M_UFSMNT); 1229 free(fs, M_UFSMNT); 1230 if (ump->um_oldfscompat != NULL) 1231 free(ump->um_oldfscompat, M_UFSMNT); 1232 softdep_unmount(mp); 1233 mutex_destroy(&ump->um_lock); 1234 ffs_snapshot_fini(ump); 1235 free(ump, M_UFSMNT); 1236 mp->mnt_data = NULL; 1237 mp->mnt_flag &= ~MNT_LOCAL; 1238 fstrans_unmount(mp); 1239 return (0); 1240 } 1241 1242 /* 1243 * Flush out all the files in a filesystem. 1244 */ 1245 int 1246 ffs_flushfiles(struct mount *mp, int flags, struct lwp *l) 1247 { 1248 extern int doforce; 1249 struct ufsmount *ump; 1250 int error; 1251 1252 if (!doforce) 1253 flags &= ~FORCECLOSE; 1254 ump = VFSTOUFS(mp); 1255 #ifdef QUOTA 1256 if (mp->mnt_flag & MNT_QUOTA) { 1257 int i; 1258 if ((error = vflush(mp, NULLVP, SKIPSYSTEM|flags)) != 0) 1259 return (error); 1260 for (i = 0; i < MAXQUOTAS; i++) { 1261 if (ump->um_quotas[i] == NULLVP) 1262 continue; 1263 quotaoff(l, mp, i); 1264 } 1265 /* 1266 * Here we fall through to vflush again to ensure 1267 * that we have gotten rid of all the system vnodes. 1268 */ 1269 } 1270 #endif 1271 if ((error = vflush(mp, 0, SKIPSYSTEM | flags)) != 0) 1272 return (error); 1273 ffs_snapshot_unmount(mp); 1274 /* 1275 * Flush all the files. 1276 */ 1277 error = vflush(mp, NULLVP, flags); 1278 if (error) 1279 return (error); 1280 /* 1281 * Flush filesystem metadata. 1282 */ 1283 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); 1284 error = VOP_FSYNC(ump->um_devvp, l->l_cred, FSYNC_WAIT, 0, 0); 1285 VOP_UNLOCK(ump->um_devvp, 0); 1286 return (error); 1287 } 1288 1289 /* 1290 * Get file system statistics. 1291 */ 1292 int 1293 ffs_statvfs(struct mount *mp, struct statvfs *sbp) 1294 { 1295 struct ufsmount *ump; 1296 struct fs *fs; 1297 1298 ump = VFSTOUFS(mp); 1299 fs = ump->um_fs; 1300 mutex_enter(&ump->um_lock); 1301 sbp->f_bsize = fs->fs_bsize; 1302 sbp->f_frsize = fs->fs_fsize; 1303 sbp->f_iosize = fs->fs_bsize; 1304 sbp->f_blocks = fs->fs_dsize; 1305 sbp->f_bfree = blkstofrags(fs, fs->fs_cstotal.cs_nbfree) + 1306 fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks); 1307 sbp->f_bresvd = ((u_int64_t) fs->fs_dsize * (u_int64_t) 1308 fs->fs_minfree) / (u_int64_t) 100; 1309 if (sbp->f_bfree > sbp->f_bresvd) 1310 sbp->f_bavail = sbp->f_bfree - sbp->f_bresvd; 1311 else 1312 sbp->f_bavail = 0; 1313 sbp->f_files = fs->fs_ncg * fs->fs_ipg - ROOTINO; 1314 sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes; 1315 sbp->f_favail = sbp->f_ffree; 1316 sbp->f_fresvd = 0; 1317 mutex_exit(&ump->um_lock); 1318 copy_statvfs_info(sbp, mp); 1319 1320 return (0); 1321 } 1322 1323 /* 1324 * Go through the disk queues to initiate sandbagged IO; 1325 * go through the inodes to write those that have been modified; 1326 * initiate the writing of the super block if it has been modified. 1327 * 1328 * Note: we are always called with the filesystem marked `MPBUSY'. 1329 */ 1330 int 1331 ffs_sync(struct mount *mp, int waitfor, kauth_cred_t cred) 1332 { 1333 struct lwp *l = curlwp; 1334 struct vnode *vp, *mvp; 1335 struct inode *ip; 1336 struct ufsmount *ump = VFSTOUFS(mp); 1337 struct fs *fs; 1338 int error, count, allerror = 0; 1339 1340 fs = ump->um_fs; 1341 if (fs->fs_fmod != 0 && fs->fs_ronly != 0) { /* XXX */ 1342 printf("fs = %s\n", fs->fs_fsmnt); 1343 panic("update: rofs mod"); 1344 } 1345 1346 /* Allocate a marker vnode. */ 1347 if ((mvp = vnalloc(mp)) == NULL) 1348 return (ENOMEM); 1349 1350 fstrans_start(mp, FSTRANS_SHARED); 1351 /* 1352 * Write back each (modified) inode. 1353 */ 1354 mutex_enter(&mntvnode_lock); 1355 loop: 1356 /* 1357 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone() 1358 * and vclean() can be called indirectly 1359 */ 1360 for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) { 1361 vmark(mvp, vp); 1362 /* 1363 * If the vnode that we are about to sync is no longer 1364 * associated with this mount point, start over. 1365 */ 1366 if (vp->v_mount != mp || vismarker(vp)) 1367 continue; 1368 mutex_enter(&vp->v_interlock); 1369 ip = VTOI(vp); 1370 if (ip == NULL || (vp->v_iflag & (VI_XLOCK|VI_CLEAN)) != 0 || 1371 vp->v_type == VNON || ((ip->i_flag & 1372 (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) == 0 && 1373 LIST_EMPTY(&vp->v_dirtyblkhd) && 1374 UVM_OBJ_IS_CLEAN(&vp->v_uobj))) 1375 { 1376 mutex_exit(&vp->v_interlock); 1377 continue; 1378 } 1379 if (vp->v_type == VBLK && 1380 fstrans_getstate(mp) == FSTRANS_SUSPENDING) { 1381 mutex_exit(&vp->v_interlock); 1382 continue; 1383 } 1384 mutex_exit(&mntvnode_lock); 1385 error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK); 1386 if (error) { 1387 mutex_enter(&mntvnode_lock); 1388 if (error == ENOENT) { 1389 (void)vunmark(mvp); 1390 goto loop; 1391 } 1392 continue; 1393 } 1394 if (vp->v_type == VREG && waitfor == MNT_LAZY) 1395 error = ffs_update(vp, NULL, NULL, 0); 1396 else 1397 error = VOP_FSYNC(vp, cred, 1398 waitfor == MNT_WAIT ? FSYNC_WAIT : 0, 0, 0); 1399 if (error) 1400 allerror = error; 1401 vput(vp); 1402 mutex_enter(&mntvnode_lock); 1403 } 1404 mutex_exit(&mntvnode_lock); 1405 /* 1406 * Force stale file system control information to be flushed. 1407 */ 1408 if (waitfor == MNT_WAIT && (ump->um_mountp->mnt_flag & MNT_SOFTDEP)) { 1409 if ((error = softdep_flushworklist(ump->um_mountp, &count, l))) 1410 allerror = error; 1411 /* Flushed work items may create new vnodes to clean */ 1412 if (allerror == 0 && count) { 1413 mutex_enter(&mntvnode_lock); 1414 goto loop; 1415 } 1416 } 1417 if (waitfor != MNT_LAZY && (ump->um_devvp->v_numoutput > 0 || 1418 !LIST_EMPTY(&ump->um_devvp->v_dirtyblkhd))) { 1419 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); 1420 if ((error = VOP_FSYNC(ump->um_devvp, cred, 1421 waitfor == MNT_WAIT ? FSYNC_WAIT : 0, 0, 0)) != 0) 1422 allerror = error; 1423 VOP_UNLOCK(ump->um_devvp, 0); 1424 if (allerror == 0 && waitfor == MNT_WAIT) { 1425 mutex_enter(&mntvnode_lock); 1426 goto loop; 1427 } 1428 } 1429 #ifdef QUOTA 1430 qsync(mp); 1431 #endif 1432 /* 1433 * Write back modified superblock. 1434 */ 1435 if (fs->fs_fmod != 0) { 1436 fs->fs_fmod = 0; 1437 fs->fs_time = time_second; 1438 if ((error = ffs_cgupdate(ump, waitfor))) 1439 allerror = error; 1440 } 1441 fstrans_done(mp); 1442 vnfree(mvp); 1443 return (allerror); 1444 } 1445 1446 /* 1447 * Look up a FFS dinode number to find its incore vnode, otherwise read it 1448 * in from disk. If it is in core, wait for the lock bit to clear, then 1449 * return the inode locked. Detection and handling of mount points must be 1450 * done by the calling routine. 1451 */ 1452 int 1453 ffs_vget(struct mount *mp, ino_t ino, struct vnode **vpp) 1454 { 1455 struct fs *fs; 1456 struct inode *ip; 1457 struct ufsmount *ump; 1458 struct buf *bp; 1459 struct vnode *vp; 1460 dev_t dev; 1461 int error; 1462 1463 ump = VFSTOUFS(mp); 1464 dev = ump->um_dev; 1465 1466 retry: 1467 if ((*vpp = ufs_ihashget(dev, ino, LK_EXCLUSIVE)) != NULL) 1468 return (0); 1469 1470 /* Allocate a new vnode/inode. */ 1471 if ((error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp)) != 0) { 1472 *vpp = NULL; 1473 return (error); 1474 } 1475 ip = pool_cache_get(ffs_inode_cache, PR_WAITOK); 1476 1477 /* 1478 * If someone beat us to it, put back the freshly allocated 1479 * vnode/inode pair and retry. 1480 */ 1481 mutex_enter(&ufs_hashlock); 1482 if (ufs_ihashget(dev, ino, 0) != NULL) { 1483 mutex_exit(&ufs_hashlock); 1484 ungetnewvnode(vp); 1485 pool_cache_put(ffs_inode_cache, ip); 1486 goto retry; 1487 } 1488 1489 vp->v_vflag |= VV_LOCKSWORK; 1490 if ((mp->mnt_flag & MNT_SOFTDEP) != 0) 1491 vp->v_uflag |= VU_SOFTDEP; 1492 1493 /* 1494 * XXX MFS ends up here, too, to allocate an inode. Should we 1495 * XXX create another pool for MFS inodes? 1496 */ 1497 1498 memset(ip, 0, sizeof(struct inode)); 1499 vp->v_data = ip; 1500 ip->i_vnode = vp; 1501 ip->i_ump = ump; 1502 ip->i_fs = fs = ump->um_fs; 1503 ip->i_dev = dev; 1504 ip->i_number = ino; 1505 LIST_INIT(&ip->i_pcbufhd); 1506 #ifdef QUOTA 1507 ufsquota_init(ip); 1508 #endif 1509 1510 /* 1511 * Initialize genfs node, we might proceed to destroy it in 1512 * error branches. 1513 */ 1514 genfs_node_init(vp, &ffs_genfsops); 1515 1516 /* 1517 * Put it onto its hash chain and lock it so that other requests for 1518 * this inode will block if they arrive while we are sleeping waiting 1519 * for old data structures to be purged or for the contents of the 1520 * disk portion of this inode to be read. 1521 */ 1522 1523 ufs_ihashins(ip); 1524 mutex_exit(&ufs_hashlock); 1525 1526 /* Read in the disk contents for the inode, copy into the inode. */ 1527 error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), 1528 (int)fs->fs_bsize, NOCRED, &bp); 1529 if (error) { 1530 1531 /* 1532 * The inode does not contain anything useful, so it would 1533 * be misleading to leave it on its hash chain. With mode 1534 * still zero, it will be unlinked and returned to the free 1535 * list by vput(). 1536 */ 1537 1538 vput(vp); 1539 brelse(bp, 0); 1540 *vpp = NULL; 1541 return (error); 1542 } 1543 if (ip->i_ump->um_fstype == UFS1) 1544 ip->i_din.ffs1_din = pool_cache_get(ffs_dinode1_cache, 1545 PR_WAITOK); 1546 else 1547 ip->i_din.ffs2_din = pool_cache_get(ffs_dinode2_cache, 1548 PR_WAITOK); 1549 ffs_load_inode(bp, ip, fs, ino); 1550 if (DOINGSOFTDEP(vp)) 1551 softdep_load_inodeblock(ip); 1552 else 1553 ip->i_ffs_effnlink = ip->i_nlink; 1554 brelse(bp, 0); 1555 1556 /* 1557 * Initialize the vnode from the inode, check for aliases. 1558 * Note that the underlying vnode may have changed. 1559 */ 1560 1561 ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp); 1562 1563 /* 1564 * Finish inode initialization now that aliasing has been resolved. 1565 */ 1566 1567 ip->i_devvp = ump->um_devvp; 1568 VREF(ip->i_devvp); 1569 1570 /* 1571 * Ensure that uid and gid are correct. This is a temporary 1572 * fix until fsck has been changed to do the update. 1573 */ 1574 1575 if (fs->fs_old_inodefmt < FS_44INODEFMT) { /* XXX */ 1576 ip->i_uid = ip->i_ffs1_ouid; /* XXX */ 1577 ip->i_gid = ip->i_ffs1_ogid; /* XXX */ 1578 } /* XXX */ 1579 uvm_vnp_setsize(vp, ip->i_size); 1580 *vpp = vp; 1581 return (0); 1582 } 1583 1584 /* 1585 * File handle to vnode 1586 * 1587 * Have to be really careful about stale file handles: 1588 * - check that the inode number is valid 1589 * - call ffs_vget() to get the locked inode 1590 * - check for an unallocated inode (i_mode == 0) 1591 * - check that the given client host has export rights and return 1592 * those rights via. exflagsp and credanonp 1593 */ 1594 int 1595 ffs_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp) 1596 { 1597 struct ufid ufh; 1598 struct fs *fs; 1599 1600 if (fhp->fid_len != sizeof(struct ufid)) 1601 return EINVAL; 1602 1603 memcpy(&ufh, fhp, sizeof(ufh)); 1604 fs = VFSTOUFS(mp)->um_fs; 1605 if (ufh.ufid_ino < ROOTINO || 1606 ufh.ufid_ino >= fs->fs_ncg * fs->fs_ipg) 1607 return (ESTALE); 1608 return (ufs_fhtovp(mp, &ufh, vpp)); 1609 } 1610 1611 /* 1612 * Vnode pointer to File handle 1613 */ 1614 /* ARGSUSED */ 1615 int 1616 ffs_vptofh(struct vnode *vp, struct fid *fhp, size_t *fh_size) 1617 { 1618 struct inode *ip; 1619 struct ufid ufh; 1620 1621 if (*fh_size < sizeof(struct ufid)) { 1622 *fh_size = sizeof(struct ufid); 1623 return E2BIG; 1624 } 1625 ip = VTOI(vp); 1626 *fh_size = sizeof(struct ufid); 1627 memset(&ufh, 0, sizeof(ufh)); 1628 ufh.ufid_len = sizeof(struct ufid); 1629 ufh.ufid_ino = ip->i_number; 1630 ufh.ufid_gen = ip->i_gen; 1631 memcpy(fhp, &ufh, sizeof(ufh)); 1632 return (0); 1633 } 1634 1635 void 1636 ffs_init(void) 1637 { 1638 if (ffs_initcount++ > 0) 1639 return; 1640 1641 ffs_inode_cache = pool_cache_init(sizeof(struct inode), 0, 0, 0, 1642 "ffsino", NULL, IPL_NONE, NULL, NULL, NULL); 1643 ffs_dinode1_cache = pool_cache_init(sizeof(struct ufs1_dinode), 0, 0, 0, 1644 "ffsdino1", NULL, IPL_NONE, NULL, NULL, NULL); 1645 ffs_dinode2_cache = pool_cache_init(sizeof(struct ufs2_dinode), 0, 0, 0, 1646 "ffsdino2", NULL, IPL_NONE, NULL, NULL, NULL); 1647 softdep_initialize(); 1648 ufs_init(); 1649 } 1650 1651 void 1652 ffs_reinit(void) 1653 { 1654 softdep_reinitialize(); 1655 ufs_reinit(); 1656 } 1657 1658 void 1659 ffs_done(void) 1660 { 1661 if (--ffs_initcount > 0) 1662 return; 1663 1664 /* XXX softdep cleanup ? */ 1665 ufs_done(); 1666 pool_cache_destroy(ffs_dinode2_cache); 1667 pool_cache_destroy(ffs_dinode1_cache); 1668 pool_cache_destroy(ffs_inode_cache); 1669 } 1670 1671 SYSCTL_SETUP(sysctl_vfs_ffs_setup, "sysctl vfs.ffs subtree setup") 1672 { 1673 #if 0 1674 extern int doasyncfree; 1675 #endif 1676 extern int ffs_log_changeopt; 1677 1678 sysctl_createv(clog, 0, NULL, NULL, 1679 CTLFLAG_PERMANENT, 1680 CTLTYPE_NODE, "vfs", NULL, 1681 NULL, 0, NULL, 0, 1682 CTL_VFS, CTL_EOL); 1683 sysctl_createv(clog, 0, NULL, NULL, 1684 CTLFLAG_PERMANENT, 1685 CTLTYPE_NODE, "ffs", 1686 SYSCTL_DESCR("Berkeley Fast File System"), 1687 NULL, 0, NULL, 0, 1688 CTL_VFS, 1, CTL_EOL); 1689 1690 /* 1691 * @@@ should we even bother with these first three? 1692 */ 1693 sysctl_createv(clog, 0, NULL, NULL, 1694 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1695 CTLTYPE_INT, "doclusterread", NULL, 1696 sysctl_notavail, 0, NULL, 0, 1697 CTL_VFS, 1, FFS_CLUSTERREAD, CTL_EOL); 1698 sysctl_createv(clog, 0, NULL, NULL, 1699 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1700 CTLTYPE_INT, "doclusterwrite", NULL, 1701 sysctl_notavail, 0, NULL, 0, 1702 CTL_VFS, 1, FFS_CLUSTERWRITE, CTL_EOL); 1703 sysctl_createv(clog, 0, NULL, NULL, 1704 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1705 CTLTYPE_INT, "doreallocblks", NULL, 1706 sysctl_notavail, 0, NULL, 0, 1707 CTL_VFS, 1, FFS_REALLOCBLKS, CTL_EOL); 1708 #if 0 1709 sysctl_createv(clog, 0, NULL, NULL, 1710 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1711 CTLTYPE_INT, "doasyncfree", 1712 SYSCTL_DESCR("Release dirty blocks asynchronously"), 1713 NULL, 0, &doasyncfree, 0, 1714 CTL_VFS, 1, FFS_ASYNCFREE, CTL_EOL); 1715 #endif 1716 sysctl_createv(clog, 0, NULL, NULL, 1717 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1718 CTLTYPE_INT, "log_changeopt", 1719 SYSCTL_DESCR("Log changes in optimization strategy"), 1720 NULL, 0, &ffs_log_changeopt, 0, 1721 CTL_VFS, 1, FFS_LOG_CHANGEOPT, CTL_EOL); 1722 } 1723 1724 /* 1725 * Write a superblock and associated information back to disk. 1726 */ 1727 int 1728 ffs_sbupdate(struct ufsmount *mp, int waitfor) 1729 { 1730 struct fs *fs = mp->um_fs; 1731 struct buf *bp; 1732 int error = 0; 1733 u_int32_t saveflag; 1734 1735 bp = getblk(mp->um_devvp, 1736 fs->fs_sblockloc >> (fs->fs_fshift - fs->fs_fsbtodb), 1737 (int)fs->fs_sbsize, 0, 0); 1738 saveflag = fs->fs_flags & FS_INTERNAL; 1739 fs->fs_flags &= ~FS_INTERNAL; 1740 1741 memcpy(bp->b_data, fs, fs->fs_sbsize); 1742 1743 ffs_oldfscompat_write((struct fs *)bp->b_data, mp); 1744 #ifdef FFS_EI 1745 if (mp->um_flags & UFS_NEEDSWAP) 1746 ffs_sb_swap((struct fs *)bp->b_data, (struct fs *)bp->b_data); 1747 #endif 1748 fs->fs_flags |= saveflag; 1749 1750 if (waitfor == MNT_WAIT) 1751 error = bwrite(bp); 1752 else 1753 bawrite(bp); 1754 return (error); 1755 } 1756 1757 int 1758 ffs_cgupdate(struct ufsmount *mp, int waitfor) 1759 { 1760 struct fs *fs = mp->um_fs; 1761 struct buf *bp; 1762 int blks; 1763 void *space; 1764 int i, size, error = 0, allerror = 0; 1765 1766 allerror = ffs_sbupdate(mp, waitfor); 1767 blks = howmany(fs->fs_cssize, fs->fs_fsize); 1768 space = fs->fs_csp; 1769 for (i = 0; i < blks; i += fs->fs_frag) { 1770 size = fs->fs_bsize; 1771 if (i + fs->fs_frag > blks) 1772 size = (blks - i) * fs->fs_fsize; 1773 bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i), 1774 size, 0, 0); 1775 #ifdef FFS_EI 1776 if (mp->um_flags & UFS_NEEDSWAP) 1777 ffs_csum_swap((struct csum*)space, 1778 (struct csum*)bp->b_data, size); 1779 else 1780 #endif 1781 memcpy(bp->b_data, space, (u_int)size); 1782 space = (char *)space + size; 1783 if (waitfor == MNT_WAIT) 1784 error = bwrite(bp); 1785 else 1786 bawrite(bp); 1787 } 1788 if (!allerror && error) 1789 allerror = error; 1790 return (allerror); 1791 } 1792 1793 int 1794 ffs_extattrctl(struct mount *mp, int cmd, struct vnode *vp, 1795 int attrnamespace, const char *attrname) 1796 { 1797 #ifdef UFS_EXTATTR 1798 /* 1799 * File-backed extended attributes are only supported on UFS1. 1800 * UFS2 has native extended attributes. 1801 */ 1802 if (VFSTOUFS(mp)->um_fstype == UFS1) 1803 return (ufs_extattrctl(mp, cmd, vp, attrnamespace, attrname)); 1804 #endif 1805 return (vfs_stdextattrctl(mp, cmd, vp, attrnamespace, attrname)); 1806 } 1807 1808 int 1809 ffs_suspendctl(struct mount *mp, int cmd) 1810 { 1811 int error; 1812 struct lwp *l = curlwp; 1813 1814 switch (cmd) { 1815 case SUSPEND_SUSPEND: 1816 if ((error = fstrans_setstate(mp, FSTRANS_SUSPENDING)) != 0) 1817 return error; 1818 error = ffs_sync(mp, MNT_WAIT, l->l_proc->p_cred); 1819 if (error == 0) 1820 error = fstrans_setstate(mp, FSTRANS_SUSPENDED); 1821 if (error != 0) { 1822 (void) fstrans_setstate(mp, FSTRANS_NORMAL); 1823 return error; 1824 } 1825 return 0; 1826 1827 case SUSPEND_RESUME: 1828 return fstrans_setstate(mp, FSTRANS_NORMAL); 1829 1830 default: 1831 return EINVAL; 1832 } 1833 } 1834