1 /*- 2 * Copyright (c) 2011, 2012 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/kernel.h> 37 #include <sys/nlookup.h> 38 #include <sys/vnode.h> 39 #include <sys/mount.h> 40 #include <sys/fcntl.h> 41 #include <sys/buf.h> 42 #include <sys/uuid.h> 43 #include <sys/vfsops.h> 44 #include <sys/sysctl.h> 45 #include <sys/socket.h> 46 47 #include "hammer2.h" 48 #include "hammer2_disk.h" 49 #include "hammer2_mount.h" 50 51 struct hammer2_sync_info { 52 int error; 53 int waitfor; 54 }; 55 56 TAILQ_HEAD(hammer2_mntlist, hammer2_mount); 57 static struct hammer2_mntlist hammer2_mntlist; 58 static struct lock hammer2_mntlk; 59 60 int hammer2_debug; 61 int hammer2_cluster_enable = 1; 62 int hammer2_hardlink_enable = 1; 63 long hammer2_iod_file_read; 64 long hammer2_iod_meta_read; 65 long hammer2_iod_indr_read; 66 long hammer2_iod_file_write; 67 long hammer2_iod_meta_write; 68 long hammer2_iod_indr_write; 69 long hammer2_iod_volu_write; 70 long hammer2_ioa_file_read; 71 long hammer2_ioa_meta_read; 72 long hammer2_ioa_indr_read; 73 long hammer2_ioa_file_write; 74 long hammer2_ioa_meta_write; 75 long hammer2_ioa_indr_write; 76 long hammer2_ioa_volu_write; 77 78 SYSCTL_NODE(_vfs, OID_AUTO, hammer2, CTLFLAG_RW, 0, "HAMMER2 filesystem"); 79 80 SYSCTL_INT(_vfs_hammer2, OID_AUTO, debug, CTLFLAG_RW, 81 &hammer2_debug, 0, ""); 82 SYSCTL_INT(_vfs_hammer2, OID_AUTO, cluster_enable, CTLFLAG_RW, 83 &hammer2_cluster_enable, 0, ""); 84 SYSCTL_INT(_vfs_hammer2, OID_AUTO, hardlink_enable, CTLFLAG_RW, 85 &hammer2_hardlink_enable, 0, ""); 86 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_file_read, CTLFLAG_RW, 87 &hammer2_iod_file_read, 0, ""); 88 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_meta_read, CTLFLAG_RW, 89 &hammer2_iod_meta_read, 0, ""); 90 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_indr_read, CTLFLAG_RW, 91 &hammer2_iod_indr_read, 0, ""); 92 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_file_write, CTLFLAG_RW, 93 &hammer2_iod_file_write, 0, ""); 94 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_meta_write, CTLFLAG_RW, 95 &hammer2_iod_meta_write, 0, ""); 96 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_indr_write, CTLFLAG_RW, 97 &hammer2_iod_indr_write, 0, ""); 98 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_volu_write, CTLFLAG_RW, 99 &hammer2_iod_volu_write, 0, ""); 100 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_file_read, CTLFLAG_RW, 101 &hammer2_ioa_file_read, 0, ""); 102 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_meta_read, CTLFLAG_RW, 103 &hammer2_ioa_meta_read, 0, ""); 104 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_indr_read, CTLFLAG_RW, 105 &hammer2_ioa_indr_read, 0, ""); 106 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_file_write, CTLFLAG_RW, 107 &hammer2_ioa_file_write, 0, ""); 108 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_meta_write, CTLFLAG_RW, 109 &hammer2_ioa_meta_write, 0, ""); 110 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_indr_write, CTLFLAG_RW, 111 &hammer2_ioa_indr_write, 0, ""); 112 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_volu_write, CTLFLAG_RW, 113 &hammer2_ioa_volu_write, 0, ""); 114 115 static int hammer2_vfs_init(struct vfsconf *conf); 116 static int hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data, 117 struct ucred *cred); 118 static int hammer2_remount(struct mount *, char *, struct vnode *, 119 struct ucred *); 120 static int hammer2_vfs_unmount(struct mount *mp, int mntflags); 121 static int hammer2_vfs_root(struct mount *mp, struct vnode **vpp); 122 static int hammer2_vfs_statfs(struct mount *mp, struct statfs *sbp, 123 struct ucred *cred); 124 static int hammer2_vfs_statvfs(struct mount *mp, struct statvfs *sbp, 125 struct ucred *cred); 126 static int hammer2_vfs_sync(struct mount *mp, int waitfor); 127 static int hammer2_vfs_vget(struct mount *mp, struct vnode *dvp, 128 ino_t ino, struct vnode **vpp); 129 static int hammer2_vfs_fhtovp(struct mount *mp, struct vnode *rootvp, 130 struct fid *fhp, struct vnode **vpp); 131 static int hammer2_vfs_vptofh(struct vnode *vp, struct fid *fhp); 132 static int hammer2_vfs_checkexp(struct mount *mp, struct sockaddr *nam, 133 int *exflagsp, struct ucred **credanonp); 134 135 static int hammer2_install_volume_header(hammer2_mount_t *hmp); 136 static int hammer2_sync_scan1(struct mount *mp, struct vnode *vp, void *data); 137 static int hammer2_sync_scan2(struct mount *mp, struct vnode *vp, void *data); 138 139 static int hammer2_rcvdmsg(kdmsg_msg_t *msg); 140 static void hammer2_autodmsg(kdmsg_msg_t *msg); 141 142 /* 143 * HAMMER2 vfs operations. 144 */ 145 static struct vfsops hammer2_vfsops = { 146 .vfs_init = hammer2_vfs_init, 147 .vfs_sync = hammer2_vfs_sync, 148 .vfs_mount = hammer2_vfs_mount, 149 .vfs_unmount = hammer2_vfs_unmount, 150 .vfs_root = hammer2_vfs_root, 151 .vfs_statfs = hammer2_vfs_statfs, 152 .vfs_statvfs = hammer2_vfs_statvfs, 153 .vfs_vget = hammer2_vfs_vget, 154 .vfs_vptofh = hammer2_vfs_vptofh, 155 .vfs_fhtovp = hammer2_vfs_fhtovp, 156 .vfs_checkexp = hammer2_vfs_checkexp 157 }; 158 159 MALLOC_DEFINE(M_HAMMER2, "HAMMER2-mount", ""); 160 161 VFS_SET(hammer2_vfsops, hammer2, 0); 162 MODULE_VERSION(hammer2, 1); 163 164 static 165 int 166 hammer2_vfs_init(struct vfsconf *conf) 167 { 168 int error; 169 170 error = 0; 171 172 if (HAMMER2_BLOCKREF_BYTES != sizeof(struct hammer2_blockref)) 173 error = EINVAL; 174 if (HAMMER2_INODE_BYTES != sizeof(struct hammer2_inode_data)) 175 error = EINVAL; 176 if (HAMMER2_ALLOCREF_BYTES != sizeof(struct hammer2_allocref)) 177 error = EINVAL; 178 if (HAMMER2_VOLUME_BYTES != sizeof(struct hammer2_volume_data)) 179 error = EINVAL; 180 181 if (error) 182 kprintf("HAMMER2 structure size mismatch; cannot continue.\n"); 183 184 lockinit(&hammer2_mntlk, "mntlk", 0, 0); 185 TAILQ_INIT(&hammer2_mntlist); 186 187 return (error); 188 } 189 190 /* 191 * Mount or remount HAMMER2 fileystem from physical media 192 * 193 * mountroot 194 * mp mount point structure 195 * path NULL 196 * data <unused> 197 * cred <unused> 198 * 199 * mount 200 * mp mount point structure 201 * path path to mount point 202 * data pointer to argument structure in user space 203 * volume volume path (device@LABEL form) 204 * hflags user mount flags 205 * cred user credentials 206 * 207 * RETURNS: 0 Success 208 * !0 error number 209 */ 210 static 211 int 212 hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data, 213 struct ucred *cred) 214 { 215 struct hammer2_mount_info info; 216 hammer2_pfsmount_t *pmp; 217 hammer2_mount_t *hmp; 218 hammer2_key_t lhc; 219 struct vnode *devvp; 220 struct nlookupdata nd; 221 hammer2_chain_t *parent; 222 hammer2_chain_t *schain; 223 hammer2_chain_t *rchain; 224 struct file *fp; 225 char devstr[MNAMELEN]; 226 size_t size; 227 size_t done; 228 char *dev; 229 char *label; 230 int ronly = 1; 231 int create_hmp; 232 int error; 233 234 hmp = NULL; 235 pmp = NULL; 236 dev = NULL; 237 label = NULL; 238 devvp = NULL; 239 240 kprintf("hammer2_mount\n"); 241 242 if (path == NULL) { 243 /* 244 * Root mount 245 */ 246 bzero(&info, sizeof(info)); 247 info.cluster_fd = -1; 248 return (EOPNOTSUPP); 249 } else { 250 /* 251 * Non-root mount or updating a mount 252 */ 253 error = copyin(data, &info, sizeof(info)); 254 if (error) 255 return (error); 256 257 error = copyinstr(info.volume, devstr, MNAMELEN - 1, &done); 258 if (error) 259 return (error); 260 261 /* Extract device and label */ 262 dev = devstr; 263 label = strchr(devstr, '@'); 264 if (label == NULL || 265 ((label + 1) - dev) > done) { 266 return (EINVAL); 267 } 268 *label = '\0'; 269 label++; 270 if (*label == '\0') 271 return (EINVAL); 272 273 if (mp->mnt_flag & MNT_UPDATE) { 274 /* Update mount */ 275 /* HAMMER2 implements NFS export via mountctl */ 276 hmp = MPTOHMP(mp); 277 devvp = hmp->devvp; 278 error = hammer2_remount(mp, path, devvp, cred); 279 return error; 280 } 281 } 282 283 /* 284 * PFS mount 285 * 286 * Lookup name and verify it refers to a block device. 287 */ 288 error = nlookup_init(&nd, dev, UIO_SYSSPACE, NLC_FOLLOW); 289 if (error == 0) 290 error = nlookup(&nd); 291 if (error == 0) 292 error = cache_vref(&nd.nl_nch, nd.nl_cred, &devvp); 293 nlookup_done(&nd); 294 295 if (error == 0) { 296 if (vn_isdisk(devvp, &error)) 297 error = vfs_mountedon(devvp); 298 } 299 300 /* 301 * Determine if the device has already been mounted. After this 302 * check hmp will be non-NULL if we are doing the second or more 303 * hammer2 mounts from the same device. 304 */ 305 lockmgr(&hammer2_mntlk, LK_EXCLUSIVE); 306 TAILQ_FOREACH(hmp, &hammer2_mntlist, mntentry) { 307 if (hmp->devvp == devvp) 308 break; 309 } 310 311 /* 312 * Open the device if this isn't a secondary mount 313 */ 314 if (hmp) { 315 create_hmp = 0; 316 } else { 317 create_hmp = 1; 318 if (error == 0 && vcount(devvp) > 0) 319 error = EBUSY; 320 321 /* 322 * Now open the device 323 */ 324 if (error == 0) { 325 ronly = ((mp->mnt_flag & MNT_RDONLY) != 0); 326 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 327 error = vinvalbuf(devvp, V_SAVE, 0, 0); 328 if (error == 0) { 329 error = VOP_OPEN(devvp, 330 ronly ? FREAD : FREAD | FWRITE, 331 FSCRED, NULL); 332 } 333 vn_unlock(devvp); 334 } 335 if (error && devvp) { 336 vrele(devvp); 337 devvp = NULL; 338 } 339 if (error) { 340 lockmgr(&hammer2_mntlk, LK_RELEASE); 341 return error; 342 } 343 } 344 345 /* 346 * Block device opened successfully, finish initializing the 347 * mount structure. 348 * 349 * From this point on we have to call hammer2_unmount() on failure. 350 */ 351 pmp = kmalloc(sizeof(*pmp), M_HAMMER2, M_WAITOK | M_ZERO); 352 mp->mnt_data = (qaddr_t)pmp; 353 pmp->mp = mp; 354 355 kmalloc_create(&pmp->mmsg, "HAMMER2-pfsmsg"); 356 kdmsg_iocom_init(&pmp->iocom, pmp, 357 KDMSG_IOCOMF_AUTOCONN | 358 KDMSG_IOCOMF_AUTOSPAN | 359 KDMSG_IOCOMF_AUTOCIRC, 360 pmp->mmsg, hammer2_rcvdmsg); 361 362 if (create_hmp) { 363 hmp = kmalloc(sizeof(*hmp), M_HAMMER2, M_WAITOK | M_ZERO); 364 hmp->ronly = ronly; 365 hmp->devvp = devvp; 366 kmalloc_create(&hmp->minode, "HAMMER2-inodes"); 367 kmalloc_create(&hmp->mchain, "HAMMER2-chains"); 368 TAILQ_INSERT_TAIL(&hammer2_mntlist, hmp, mntentry); 369 } 370 ccms_domain_init(&pmp->ccms_dom); 371 pmp->hmp = hmp; 372 ++hmp->pmp_count; 373 lockmgr(&hammer2_mntlk, LK_RELEASE); 374 kprintf("hammer2_mount hmp=%p pmpcnt=%d\n", hmp, hmp->pmp_count); 375 376 mp->mnt_flag = MNT_LOCAL; 377 mp->mnt_kern_flag |= MNTK_ALL_MPSAFE; /* all entry pts are SMP */ 378 379 if (create_hmp) { 380 /* 381 * vchain setup. vchain.data is special cased to NULL. 382 * vchain.refs is initialized and will never drop to 0. 383 */ 384 hmp->vchain.refs = 1; 385 hmp->vchain.data = (void *)&hmp->voldata; 386 hmp->vchain.bref.type = HAMMER2_BREF_TYPE_VOLUME; 387 hmp->vchain.bref.data_off = 0 | HAMMER2_PBUFRADIX; 388 hmp->vchain.bref_flush = hmp->vchain.bref; 389 ccms_cst_init(&hmp->vchain.cst, NULL); 390 /* hmp->vchain.u.xxx is left NULL */ 391 lockinit(&hmp->alloclk, "h2alloc", 0, 0); 392 lockinit(&hmp->voldatalk, "voldata", 0, LK_CANRECURSE); 393 394 /* 395 * Install the volume header 396 */ 397 error = hammer2_install_volume_header(hmp); 398 if (error) { 399 hammer2_vfs_unmount(mp, MNT_FORCE); 400 return error; 401 } 402 } 403 404 /* 405 * required mount structure initializations 406 */ 407 mp->mnt_stat.f_iosize = HAMMER2_PBUFSIZE; 408 mp->mnt_stat.f_bsize = HAMMER2_PBUFSIZE; 409 410 mp->mnt_vstat.f_frsize = HAMMER2_PBUFSIZE; 411 mp->mnt_vstat.f_bsize = HAMMER2_PBUFSIZE; 412 413 /* 414 * Optional fields 415 */ 416 mp->mnt_iosize_max = MAXPHYS; 417 418 /* 419 * First locate the super-root inode, which is key 0 relative to the 420 * volume header's blockset. 421 * 422 * Then locate the root inode by scanning the directory keyspace 423 * represented by the label. 424 */ 425 if (create_hmp) { 426 parent = &hmp->vchain; 427 hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS); 428 schain = hammer2_chain_lookup(hmp, &parent, 429 HAMMER2_SROOT_KEY, HAMMER2_SROOT_KEY, 0); 430 hammer2_chain_unlock(hmp, parent); 431 if (schain == NULL) { 432 kprintf("hammer2_mount: invalid super-root\n"); 433 hammer2_vfs_unmount(mp, MNT_FORCE); 434 return EINVAL; 435 } 436 hammer2_chain_ref(hmp, schain); /* for hmp->schain */ 437 hmp->schain = schain; /* left locked */ 438 } else { 439 schain = hmp->schain; 440 hammer2_chain_lock(hmp, schain, HAMMER2_RESOLVE_ALWAYS); 441 } 442 443 parent = schain; 444 lhc = hammer2_dirhash(label, strlen(label)); 445 rchain = hammer2_chain_lookup(hmp, &parent, 446 lhc, lhc + HAMMER2_DIRHASH_LOMASK, 447 0); 448 while (rchain) { 449 if (rchain->bref.type == HAMMER2_BREF_TYPE_INODE && 450 rchain->u.ip && 451 strcmp(label, rchain->data->ipdata.filename) == 0) { 452 break; 453 } 454 rchain = hammer2_chain_next(hmp, &parent, rchain, 455 lhc, lhc + HAMMER2_DIRHASH_LOMASK, 456 0); 457 } 458 hammer2_chain_unlock(hmp, parent); 459 if (rchain == NULL) { 460 kprintf("hammer2_mount: PFS label not found\n"); 461 hammer2_vfs_unmount(mp, MNT_FORCE); 462 return EINVAL; 463 } 464 if (rchain->flags & HAMMER2_CHAIN_MOUNTED) { 465 hammer2_chain_unlock(hmp, rchain); 466 kprintf("hammer2_mount: PFS label already mounted!\n"); 467 hammer2_vfs_unmount(mp, MNT_FORCE); 468 return EBUSY; 469 } 470 atomic_set_int(&rchain->flags, HAMMER2_CHAIN_MOUNTED); 471 472 hammer2_chain_ref(hmp, rchain); /* for pmp->rchain */ 473 hammer2_chain_unlock(hmp, rchain); 474 pmp->rchain = rchain; /* left held & unlocked */ 475 pmp->iroot = rchain->u.ip; /* implied hold from rchain */ 476 pmp->iroot->pmp = pmp; 477 478 kprintf("iroot %p\n", pmp->iroot); 479 480 /* 481 * Ref the cluster management messaging descriptor. The mount 482 * program deals with the other end of the communications pipe. 483 */ 484 fp = holdfp(curproc->p_fd, info.cluster_fd, -1); 485 if (fp == NULL) { 486 kprintf("hammer2_mount: bad cluster_fd!\n"); 487 hammer2_vfs_unmount(mp, MNT_FORCE); 488 return EBADF; 489 } 490 hammer2_cluster_reconnect(pmp, fp); 491 492 /* 493 * Finish setup 494 */ 495 vfs_getnewfsid(mp); 496 vfs_add_vnodeops(mp, &hammer2_vnode_vops, &mp->mnt_vn_norm_ops); 497 vfs_add_vnodeops(mp, &hammer2_spec_vops, &mp->mnt_vn_spec_ops); 498 vfs_add_vnodeops(mp, &hammer2_fifo_vops, &mp->mnt_vn_fifo_ops); 499 500 copyinstr(info.volume, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); 501 bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); 502 bzero(mp->mnt_stat.f_mntonname, sizeof(mp->mnt_stat.f_mntonname)); 503 copyinstr(path, mp->mnt_stat.f_mntonname, 504 sizeof(mp->mnt_stat.f_mntonname) - 1, 505 &size); 506 507 /* 508 * Initial statfs to prime mnt_stat. 509 */ 510 hammer2_vfs_statfs(mp, &mp->mnt_stat, cred); 511 512 return 0; 513 } 514 515 static 516 int 517 hammer2_remount(struct mount *mp, char *path, struct vnode *devvp, 518 struct ucred *cred) 519 { 520 return (0); 521 } 522 523 static 524 int 525 hammer2_vfs_unmount(struct mount *mp, int mntflags) 526 { 527 hammer2_pfsmount_t *pmp; 528 hammer2_mount_t *hmp; 529 int flags; 530 int error = 0; 531 int ronly = ((mp->mnt_flag & MNT_RDONLY) != 0); 532 struct vnode *devvp; 533 534 pmp = MPTOPMP(mp); 535 hmp = pmp->hmp; 536 flags = 0; 537 538 if (mntflags & MNT_FORCE) 539 flags |= FORCECLOSE; 540 541 hammer2_mount_exlock(hmp); 542 543 /* 544 * If mount initialization proceeded far enough we must flush 545 * its vnodes. 546 */ 547 if (pmp->iroot) 548 error = vflush(mp, 0, flags); 549 550 if (error) 551 return error; 552 553 lockmgr(&hammer2_mntlk, LK_EXCLUSIVE); 554 --hmp->pmp_count; 555 kprintf("hammer2_unmount hmp=%p pmpcnt=%d\n", hmp, hmp->pmp_count); 556 557 /* 558 * Flush any left over chains. The voldata lock is only used 559 * to synchronize against HAMMER2_CHAIN_MODIFIED_AUX. 560 */ 561 hammer2_voldata_lock(hmp); 562 if (hmp->vchain.flags & (HAMMER2_CHAIN_MODIFIED | 563 HAMMER2_CHAIN_MODIFIED_AUX | 564 HAMMER2_CHAIN_SUBMODIFIED)) { 565 hammer2_voldata_unlock(hmp); 566 hammer2_vfs_sync(mp, MNT_WAIT); 567 } else { 568 hammer2_voldata_unlock(hmp); 569 } 570 if (hmp->pmp_count == 0) { 571 if (hmp->vchain.flags & (HAMMER2_CHAIN_MODIFIED | 572 HAMMER2_CHAIN_MODIFIED_AUX | 573 HAMMER2_CHAIN_SUBMODIFIED)) { 574 kprintf("hammer2_unmount: chains left over after " 575 "final sync\n"); 576 if (hammer2_debug & 0x0010) 577 Debugger("entered debugger"); 578 } 579 } 580 581 /* 582 * Cleanup the root and super-root chain elements (which should be 583 * clean). 584 */ 585 pmp->iroot = NULL; 586 if (pmp->rchain) { 587 atomic_clear_int(&pmp->rchain->flags, HAMMER2_CHAIN_MOUNTED); 588 KKASSERT(pmp->rchain->refs == 1); 589 hammer2_chain_drop(hmp, pmp->rchain); 590 pmp->rchain = NULL; 591 } 592 ccms_domain_uninit(&pmp->ccms_dom); 593 594 /* 595 * Kill cluster controller 596 */ 597 kdmsg_iocom_uninit(&pmp->iocom); 598 599 /* 600 * If no PFS's left drop the master hammer2_mount for the device. 601 */ 602 if (hmp->pmp_count == 0) { 603 if (hmp->schain) { 604 KKASSERT(hmp->schain->refs == 1); 605 hammer2_chain_drop(hmp, hmp->schain); 606 hmp->schain = NULL; 607 } 608 609 /* 610 * Finish up with the device vnode 611 */ 612 if ((devvp = hmp->devvp) != NULL) { 613 vinvalbuf(devvp, (ronly ? 0 : V_SAVE), 0, 0); 614 hmp->devvp = NULL; 615 VOP_CLOSE(devvp, (ronly ? FREAD : FREAD|FWRITE)); 616 vrele(devvp); 617 devvp = NULL; 618 } 619 } 620 hammer2_mount_unlock(hmp); 621 622 pmp->mp = NULL; 623 pmp->hmp = NULL; 624 mp->mnt_data = NULL; 625 626 kmalloc_destroy(&pmp->mmsg); 627 628 kfree(pmp, M_HAMMER2); 629 if (hmp->pmp_count == 0) { 630 TAILQ_REMOVE(&hammer2_mntlist, hmp, mntentry); 631 kmalloc_destroy(&hmp->minode); 632 kmalloc_destroy(&hmp->mchain); 633 kfree(hmp, M_HAMMER2); 634 } 635 lockmgr(&hammer2_mntlk, LK_RELEASE); 636 return (error); 637 } 638 639 static 640 int 641 hammer2_vfs_vget(struct mount *mp, struct vnode *dvp, 642 ino_t ino, struct vnode **vpp) 643 { 644 kprintf("hammer2_vget\n"); 645 return (EOPNOTSUPP); 646 } 647 648 static 649 int 650 hammer2_vfs_root(struct mount *mp, struct vnode **vpp) 651 { 652 hammer2_pfsmount_t *pmp; 653 hammer2_mount_t *hmp; 654 int error; 655 struct vnode *vp; 656 657 pmp = MPTOPMP(mp); 658 hmp = pmp->hmp; 659 hammer2_mount_exlock(hmp); 660 if (pmp->iroot == NULL) { 661 *vpp = NULL; 662 error = EINVAL; 663 } else { 664 hammer2_chain_lock(hmp, &pmp->iroot->chain, 665 HAMMER2_RESOLVE_ALWAYS | 666 HAMMER2_RESOLVE_SHARED); 667 vp = hammer2_igetv(pmp->iroot, &error); 668 hammer2_chain_unlock(hmp, &pmp->iroot->chain); 669 *vpp = vp; 670 if (vp == NULL) 671 kprintf("vnodefail\n"); 672 } 673 hammer2_mount_unlock(hmp); 674 675 return (error); 676 } 677 678 /* 679 * Filesystem status 680 * 681 * XXX incorporate pmp->iroot->ip_data.inode_quota and data_quota 682 */ 683 static 684 int 685 hammer2_vfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred) 686 { 687 hammer2_pfsmount_t *pmp; 688 hammer2_mount_t *hmp; 689 690 pmp = MPTOPMP(mp); 691 hmp = MPTOHMP(mp); 692 693 mp->mnt_stat.f_files = pmp->iroot->ip_data.inode_count + 694 pmp->iroot->delta_icount; 695 mp->mnt_stat.f_ffree = 0; 696 mp->mnt_stat.f_blocks = hmp->voldata.allocator_size / HAMMER2_PBUFSIZE; 697 mp->mnt_stat.f_bfree = (hmp->voldata.allocator_size - 698 hmp->voldata.allocator_beg) / HAMMER2_PBUFSIZE; 699 mp->mnt_stat.f_bavail = mp->mnt_stat.f_bfree; 700 701 *sbp = mp->mnt_stat; 702 return (0); 703 } 704 705 static 706 int 707 hammer2_vfs_statvfs(struct mount *mp, struct statvfs *sbp, struct ucred *cred) 708 { 709 hammer2_pfsmount_t *pmp; 710 hammer2_mount_t *hmp; 711 712 pmp = MPTOPMP(mp); 713 hmp = MPTOHMP(mp); 714 715 mp->mnt_vstat.f_bsize = HAMMER2_PBUFSIZE; 716 mp->mnt_vstat.f_files = pmp->iroot->ip_data.inode_count + 717 pmp->iroot->delta_icount; 718 mp->mnt_vstat.f_ffree = 0; 719 mp->mnt_vstat.f_blocks = hmp->voldata.allocator_size / HAMMER2_PBUFSIZE; 720 mp->mnt_vstat.f_bfree = (hmp->voldata.allocator_size - 721 hmp->voldata.allocator_beg) / HAMMER2_PBUFSIZE; 722 mp->mnt_vstat.f_bavail = mp->mnt_vstat.f_bfree; 723 724 *sbp = mp->mnt_vstat; 725 return (0); 726 } 727 728 /* 729 * Sync the entire filesystem; this is called from the filesystem syncer 730 * process periodically and whenever a user calls sync(1) on the hammer 731 * mountpoint. 732 * 733 * Currently is actually called from the syncer! \o/ 734 * 735 * This task will have to snapshot the state of the dirty inode chain. 736 * From that, it will have to make sure all of the inodes on the dirty 737 * chain have IO initiated. We make sure that io is initiated for the root 738 * block. 739 * 740 * If waitfor is set, we wait for media to acknowledge the new rootblock. 741 * 742 * THINKS: side A vs side B, to have sync not stall all I/O? 743 */ 744 static 745 int 746 hammer2_vfs_sync(struct mount *mp, int waitfor) 747 { 748 struct hammer2_sync_info info; 749 hammer2_mount_t *hmp; 750 int flags; 751 int error; 752 int haswork; 753 int i; 754 755 hmp = MPTOHMP(mp); 756 757 flags = VMSC_GETVP; 758 if (waitfor & MNT_LAZY) 759 flags |= VMSC_ONEPASS; 760 761 info.error = 0; 762 info.waitfor = MNT_NOWAIT; 763 vmntvnodescan(mp, flags | VMSC_NOWAIT, 764 hammer2_sync_scan1, 765 hammer2_sync_scan2, &info); 766 if (info.error == 0 && (waitfor & MNT_WAIT)) { 767 info.waitfor = waitfor; 768 vmntvnodescan(mp, flags, 769 hammer2_sync_scan1, 770 hammer2_sync_scan2, &info); 771 772 } 773 #if 0 774 if (waitfor == MNT_WAIT) { 775 /* XXX */ 776 } else { 777 /* XXX */ 778 } 779 #endif 780 hammer2_chain_lock(hmp, &hmp->vchain, HAMMER2_RESOLVE_ALWAYS); 781 if (hmp->vchain.flags & (HAMMER2_CHAIN_MODIFIED | 782 HAMMER2_CHAIN_MODIFIED_AUX | 783 HAMMER2_CHAIN_SUBMODIFIED)) { 784 hammer2_chain_flush(hmp, &hmp->vchain, 0); 785 haswork = 1; 786 } else { 787 haswork = 0; 788 } 789 hammer2_chain_unlock(hmp, &hmp->vchain); 790 791 error = 0; 792 793 /* 794 * We can't safely flush the volume header until we have 795 * flushed any device buffers which have built up. 796 */ 797 #if 0 798 if ((waitfor & MNT_LAZY) == 0) { 799 waitfor = MNT_NOWAIT; 800 vn_lock(hmp->devvp, LK_EXCLUSIVE | LK_RETRY); 801 error = VOP_FSYNC(hmp->devvp, waitfor, 0); 802 vn_unlock(hmp->devvp); 803 } 804 #endif 805 vn_lock(hmp->devvp, LK_EXCLUSIVE | LK_RETRY); 806 error = VOP_FSYNC(hmp->devvp, MNT_WAIT, 0); 807 vn_unlock(hmp->devvp); 808 809 if (error == 0 && haswork) { 810 struct buf *bp; 811 812 /* 813 * Synchronize the disk before flushing the volume 814 * header. 815 */ 816 bp = getpbuf(NULL); 817 bp->b_bio1.bio_offset = 0; 818 bp->b_bufsize = 0; 819 bp->b_bcount = 0; 820 bp->b_cmd = BUF_CMD_FLUSH; 821 bp->b_bio1.bio_done = biodone_sync; 822 bp->b_bio1.bio_flags |= BIO_SYNC; 823 vn_strategy(hmp->devvp, &bp->b_bio1); 824 biowait(&bp->b_bio1, "h2vol"); 825 relpbuf(bp, NULL); 826 827 /* 828 * Then we can safely flush the version of the volume header 829 * synchronized by the flush code. 830 */ 831 i = hmp->volhdrno + 1; 832 if (i >= HAMMER2_NUM_VOLHDRS) 833 i = 0; 834 if (i * HAMMER2_ZONE_BYTES64 + HAMMER2_SEGSIZE > 835 hmp->volsync.volu_size) { 836 i = 0; 837 } 838 kprintf("sync volhdr %d %jd\n", 839 i, (intmax_t)hmp->volsync.volu_size); 840 bp = getblk(hmp->devvp, i * HAMMER2_ZONE_BYTES64, 841 HAMMER2_PBUFSIZE, 0, 0); 842 bcopy(&hmp->volsync, bp->b_data, HAMMER2_PBUFSIZE); 843 bawrite(bp); 844 hmp->volhdrno = i; 845 } 846 return (error); 847 } 848 849 /* 850 * Sync passes. 851 * 852 * NOTE: We don't test SUBMODIFIED or MOVED here because the fsync code 853 * won't flush on those flags. The syncer code above will do a 854 * general meta-data flush globally that will catch these flags. 855 */ 856 static int 857 hammer2_sync_scan1(struct mount *mp, struct vnode *vp, void *data) 858 { 859 hammer2_inode_t *ip; 860 861 ip = VTOI(vp); 862 if (vp->v_type == VNON || ip == NULL || 863 ((ip->chain.flags & (HAMMER2_CHAIN_MODIFIED | 864 HAMMER2_CHAIN_DIRTYEMBED)) == 0 && 865 RB_EMPTY(&vp->v_rbdirty_tree))) { 866 return(-1); 867 } 868 return(0); 869 } 870 871 static int 872 hammer2_sync_scan2(struct mount *mp, struct vnode *vp, void *data) 873 { 874 struct hammer2_sync_info *info = data; 875 hammer2_inode_t *ip; 876 int error; 877 878 ip = VTOI(vp); 879 if (vp->v_type == VNON || vp->v_type == VBAD || 880 ((ip->chain.flags & (HAMMER2_CHAIN_MODIFIED | 881 HAMMER2_CHAIN_DIRTYEMBED)) == 0 && 882 RB_EMPTY(&vp->v_rbdirty_tree))) { 883 return(0); 884 } 885 error = VOP_FSYNC(vp, MNT_NOWAIT, 0); 886 if (error) 887 info->error = error; 888 return(0); 889 } 890 891 static 892 int 893 hammer2_vfs_vptofh(struct vnode *vp, struct fid *fhp) 894 { 895 return (0); 896 } 897 898 static 899 int 900 hammer2_vfs_fhtovp(struct mount *mp, struct vnode *rootvp, 901 struct fid *fhp, struct vnode **vpp) 902 { 903 return (0); 904 } 905 906 static 907 int 908 hammer2_vfs_checkexp(struct mount *mp, struct sockaddr *nam, 909 int *exflagsp, struct ucred **credanonp) 910 { 911 return (0); 912 } 913 914 /* 915 * Support code for hammer2_mount(). Read, verify, and install the volume 916 * header into the HMP 917 * 918 * XXX read four volhdrs and use the one with the highest TID whos CRC 919 * matches. 920 * 921 * XXX check iCRCs. 922 * 923 * XXX For filesystems w/ less than 4 volhdrs, make sure to not write to 924 * nonexistant locations. 925 * 926 * XXX Record selected volhdr and ring updates to each of 4 volhdrs 927 */ 928 static 929 int 930 hammer2_install_volume_header(hammer2_mount_t *hmp) 931 { 932 hammer2_volume_data_t *vd; 933 struct buf *bp; 934 hammer2_crc32_t crc0, crc, bcrc0, bcrc; 935 int error_reported; 936 int error; 937 int valid; 938 int i; 939 940 error_reported = 0; 941 error = 0; 942 valid = 0; 943 bp = NULL; 944 945 /* 946 * There are up to 4 copies of the volume header (syncs iterate 947 * between them so there is no single master). We don't trust the 948 * volu_size field so we don't know precisely how large the filesystem 949 * is, so depend on the OS to return an error if we go beyond the 950 * block device's EOF. 951 */ 952 for (i = 0; i < HAMMER2_NUM_VOLHDRS; i++) { 953 error = bread(hmp->devvp, i * HAMMER2_ZONE_BYTES64, 954 HAMMER2_VOLUME_BYTES, &bp); 955 if (error) { 956 brelse(bp); 957 bp = NULL; 958 continue; 959 } 960 961 vd = (struct hammer2_volume_data *) bp->b_data; 962 if ((vd->magic != HAMMER2_VOLUME_ID_HBO) && 963 (vd->magic != HAMMER2_VOLUME_ID_ABO)) { 964 brelse(bp); 965 bp = NULL; 966 continue; 967 } 968 969 if (vd->magic == HAMMER2_VOLUME_ID_ABO) { 970 /* XXX: Reversed-endianness filesystem */ 971 kprintf("hammer2: reverse-endian filesystem detected"); 972 brelse(bp); 973 bp = NULL; 974 continue; 975 } 976 977 crc = vd->icrc_sects[HAMMER2_VOL_ICRC_SECT0]; 978 crc0 = hammer2_icrc32(bp->b_data + HAMMER2_VOLUME_ICRC0_OFF, 979 HAMMER2_VOLUME_ICRC0_SIZE); 980 bcrc = vd->icrc_sects[HAMMER2_VOL_ICRC_SECT1]; 981 bcrc0 = hammer2_icrc32(bp->b_data + HAMMER2_VOLUME_ICRC1_OFF, 982 HAMMER2_VOLUME_ICRC1_SIZE); 983 if ((crc0 != crc) || (bcrc0 != bcrc)) { 984 kprintf("hammer2 volume header crc " 985 "mismatch copy #%d %08x/%08x\n", 986 i, crc0, crc); 987 error_reported = 1; 988 brelse(bp); 989 bp = NULL; 990 continue; 991 } 992 if (valid == 0 || hmp->voldata.mirror_tid < vd->mirror_tid) { 993 valid = 1; 994 hmp->voldata = *vd; 995 hmp->volhdrno = i; 996 } 997 brelse(bp); 998 bp = NULL; 999 } 1000 if (valid) { 1001 hmp->volsync = hmp->voldata; 1002 error = 0; 1003 if (error_reported || bootverbose || 1) { /* 1/DEBUG */ 1004 kprintf("hammer2: using volume header #%d\n", 1005 hmp->volhdrno); 1006 } 1007 } else { 1008 error = EINVAL; 1009 kprintf("hammer2: no valid volume headers found!\n"); 1010 } 1011 return (error); 1012 } 1013 1014 /* 1015 * Reconnect using the passed file pointer. The caller must ref the 1016 * fp for us. 1017 */ 1018 void 1019 hammer2_cluster_reconnect(hammer2_pfsmount_t *pmp, struct file *fp) 1020 { 1021 size_t name_len; 1022 1023 /* 1024 * Closes old comm descriptor, kills threads, cleans up 1025 * states, then installs the new descriptor and creates 1026 * new threads. 1027 */ 1028 kdmsg_iocom_reconnect(&pmp->iocom, fp, "hammer2"); 1029 1030 /* 1031 * Setup LNK_CONN fields for autoinitiated state machine 1032 */ 1033 pmp->iocom.auto_lnk_conn.pfs_clid = pmp->iroot->ip_data.pfs_clid; 1034 pmp->iocom.auto_lnk_conn.pfs_fsid = pmp->iroot->ip_data.pfs_fsid; 1035 pmp->iocom.auto_lnk_conn.pfs_type = pmp->iroot->ip_data.pfs_type; 1036 pmp->iocom.auto_lnk_conn.proto_version = DMSG_SPAN_PROTO_1; 1037 pmp->iocom.auto_lnk_conn.peer_type = pmp->hmp->voldata.peer_type; 1038 1039 /* 1040 * Filter adjustment. Clients do not need visibility into other 1041 * clients (otherwise millions of clients would present a serious 1042 * problem). The fs_label also serves to restrict the namespace. 1043 */ 1044 pmp->iocom.auto_lnk_conn.peer_mask = 1LLU << HAMMER2_PEER_HAMMER2; 1045 pmp->iocom.auto_lnk_conn.pfs_mask = (uint64_t)-1; 1046 switch (pmp->iroot->ip_data.pfs_type) { 1047 case DMSG_PFSTYPE_CLIENT: 1048 pmp->iocom.auto_lnk_conn.peer_mask &= 1049 ~(1LLU << DMSG_PFSTYPE_CLIENT); 1050 break; 1051 default: 1052 break; 1053 } 1054 1055 name_len = pmp->iroot->ip_data.name_len; 1056 if (name_len >= sizeof(pmp->iocom.auto_lnk_conn.fs_label)) 1057 name_len = sizeof(pmp->iocom.auto_lnk_conn.fs_label) - 1; 1058 bcopy(pmp->iroot->ip_data.filename, 1059 pmp->iocom.auto_lnk_conn.fs_label, 1060 name_len); 1061 pmp->iocom.auto_lnk_conn.fs_label[name_len] = 0; 1062 1063 /* 1064 * Setup LNK_SPAN fields for autoinitiated state machine 1065 */ 1066 pmp->iocom.auto_lnk_span.pfs_clid = pmp->iroot->ip_data.pfs_clid; 1067 pmp->iocom.auto_lnk_span.pfs_fsid = pmp->iroot->ip_data.pfs_fsid; 1068 pmp->iocom.auto_lnk_span.pfs_type = pmp->iroot->ip_data.pfs_type; 1069 pmp->iocom.auto_lnk_span.peer_type = pmp->hmp->voldata.peer_type; 1070 pmp->iocom.auto_lnk_span.proto_version = DMSG_SPAN_PROTO_1; 1071 name_len = pmp->iroot->ip_data.name_len; 1072 if (name_len >= sizeof(pmp->iocom.auto_lnk_span.fs_label)) 1073 name_len = sizeof(pmp->iocom.auto_lnk_span.fs_label) - 1; 1074 bcopy(pmp->iroot->ip_data.filename, 1075 pmp->iocom.auto_lnk_span.fs_label, 1076 name_len); 1077 pmp->iocom.auto_lnk_span.fs_label[name_len] = 0; 1078 1079 kdmsg_iocom_autoinitiate(&pmp->iocom, hammer2_autodmsg); 1080 } 1081 1082 static int 1083 hammer2_rcvdmsg(kdmsg_msg_t *msg) 1084 { 1085 switch(msg->any.head.cmd & DMSGF_TRANSMASK) { 1086 case DMSG_DBG_SHELL: 1087 /* 1088 * (non-transaction) 1089 * Execute shell command (not supported atm) 1090 */ 1091 kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP); 1092 break; 1093 case DMSG_DBG_SHELL | DMSGF_REPLY: 1094 /* 1095 * (non-transaction) 1096 */ 1097 if (msg->aux_data) { 1098 msg->aux_data[msg->aux_size - 1] = 0; 1099 kprintf("HAMMER2 DBG: %s\n", msg->aux_data); 1100 } 1101 break; 1102 default: 1103 /* 1104 * Unsupported message received. We only need to 1105 * reply if it's a transaction in order to close our end. 1106 * Ignore any one-way messages are any further messages 1107 * associated with the transaction. 1108 * 1109 * NOTE: This case also includes DMSG_LNK_ERROR messages 1110 * which might be one-way, replying to those would 1111 * cause an infinite ping-pong. 1112 */ 1113 if (msg->any.head.cmd & DMSGF_CREATE) 1114 kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP); 1115 break; 1116 } 1117 return(0); 1118 } 1119 1120 /* 1121 * This function is called after KDMSG has automatically handled processing 1122 * of a LNK layer message (typically CONN, SPAN, or CIRC). 1123 * 1124 * We tag off the LNK_CONN to trigger our LNK_VOLCONF messages which 1125 * advertises all available hammer2 super-root volumes. 1126 */ 1127 static void 1128 hammer2_autodmsg(kdmsg_msg_t *msg) 1129 { 1130 hammer2_pfsmount_t *pmp = msg->iocom->handle; 1131 hammer2_mount_t *hmp = pmp->hmp; 1132 int copyid; 1133 1134 /* 1135 * We only care about replies to our LNK_CONN auto-request. kdmsg 1136 * has already processed the reply, we use this calback as a shim 1137 * to know when we can advertise available super-root volumes. 1138 */ 1139 if ((msg->any.head.cmd & DMSGF_TRANSMASK) != 1140 (DMSG_LNK_CONN | DMSGF_CREATE | DMSGF_REPLY) || 1141 msg->state == NULL) { 1142 return; 1143 } 1144 1145 kprintf("LNK_CONN REPLY RECEIVED CMD %08x\n", msg->any.head.cmd); 1146 1147 if (msg->any.head.cmd & DMSGF_CREATE) { 1148 kprintf("HAMMER2: VOLDATA DUMP\n"); 1149 1150 /* 1151 * Dump the configuration stored in the volume header 1152 */ 1153 hammer2_voldata_lock(hmp); 1154 for (copyid = 0; copyid < HAMMER2_COPYID_COUNT; ++copyid) { 1155 if (hmp->voldata.copyinfo[copyid].copyid == 0) 1156 continue; 1157 hammer2_volconf_update(pmp, copyid); 1158 } 1159 hammer2_voldata_unlock(hmp); 1160 } 1161 if ((msg->any.head.cmd & DMSGF_DELETE) && 1162 msg->state && (msg->state->txcmd & DMSGF_DELETE) == 0) { 1163 kprintf("HAMMER2: CONN WAS TERMINATED\n"); 1164 } 1165 } 1166 1167 /* 1168 * Volume configuration updates are passed onto the userland service 1169 * daemon via the open LNK_CONN transaction. 1170 */ 1171 void 1172 hammer2_volconf_update(hammer2_pfsmount_t *pmp, int index) 1173 { 1174 hammer2_mount_t *hmp = pmp->hmp; 1175 kdmsg_msg_t *msg; 1176 1177 /* XXX interlock against connection state termination */ 1178 kprintf("volconf update %p\n", pmp->iocom.conn_state); 1179 if (pmp->iocom.conn_state) { 1180 kprintf("TRANSMIT VOLCONF VIA OPEN CONN TRANSACTION\n"); 1181 msg = kdmsg_msg_alloc_state(pmp->iocom.conn_state, 1182 DMSG_LNK_VOLCONF, NULL, NULL); 1183 msg->any.lnk_volconf.copy = hmp->voldata.copyinfo[index]; 1184 msg->any.lnk_volconf.mediaid = hmp->voldata.fsid; 1185 msg->any.lnk_volconf.index = index; 1186 kdmsg_msg_write(msg); 1187 } 1188 } 1189