1 /* 2 * Copyright (c) 2011-2015 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * by Daniel Flores (GSOC 2013 - mentored by Matthew Dillon, compression) 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 3. Neither the name of The DragonFly Project nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific, prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/kernel.h> 38 #include <sys/nlookup.h> 39 #include <sys/vnode.h> 40 #include <sys/mount.h> 41 #include <sys/fcntl.h> 42 #include <sys/buf.h> 43 #include <sys/uuid.h> 44 #include <sys/vfsops.h> 45 #include <sys/sysctl.h> 46 #include <sys/socket.h> 47 #include <sys/objcache.h> 48 49 #include <sys/proc.h> 50 #include <sys/namei.h> 51 #include <sys/mountctl.h> 52 #include <sys/dirent.h> 53 #include <sys/uio.h> 54 55 #include <sys/mutex.h> 56 #include <sys/mutex2.h> 57 58 #include "hammer2.h" 59 #include "hammer2_disk.h" 60 #include "hammer2_mount.h" 61 #include "hammer2_lz4.h" 62 63 #include "zlib/hammer2_zlib.h" 64 65 #define REPORT_REFS_ERRORS 1 /* XXX remove me */ 66 67 MALLOC_DEFINE(M_OBJCACHE, "objcache", "Object Cache"); 68 69 struct hammer2_sync_info { 70 int error; 71 int waitfor; 72 }; 73 74 TAILQ_HEAD(hammer2_mntlist, hammer2_dev); 75 TAILQ_HEAD(hammer2_pfslist, hammer2_pfs); 76 static struct hammer2_mntlist hammer2_mntlist; 77 static struct hammer2_pfslist hammer2_pfslist; 78 static struct lock hammer2_mntlk; 79 80 int hammer2_debug; 81 int hammer2_cluster_enable = 1; 82 int hammer2_hardlink_enable = 1; 83 int hammer2_flush_pipe = 100; 84 int hammer2_synchronous_flush = 1; 85 int hammer2_dio_count; 86 long hammer2_limit_dirty_chains; 87 long hammer2_iod_file_read; 88 long hammer2_iod_meta_read; 89 long hammer2_iod_indr_read; 90 long hammer2_iod_fmap_read; 91 long hammer2_iod_volu_read; 92 long hammer2_iod_file_write; 93 long hammer2_iod_meta_write; 94 long hammer2_iod_indr_write; 95 long hammer2_iod_fmap_write; 96 long hammer2_iod_volu_write; 97 long hammer2_ioa_file_read; 98 long hammer2_ioa_meta_read; 99 long hammer2_ioa_indr_read; 100 long hammer2_ioa_fmap_read; 101 long hammer2_ioa_volu_read; 102 long hammer2_ioa_fmap_write; 103 long hammer2_ioa_file_write; 104 long hammer2_ioa_meta_write; 105 long hammer2_ioa_indr_write; 106 long hammer2_ioa_volu_write; 107 108 MALLOC_DECLARE(M_HAMMER2_CBUFFER); 109 MALLOC_DEFINE(M_HAMMER2_CBUFFER, "HAMMER2-compbuffer", 110 "Buffer used for compression."); 111 112 MALLOC_DECLARE(M_HAMMER2_DEBUFFER); 113 MALLOC_DEFINE(M_HAMMER2_DEBUFFER, "HAMMER2-decompbuffer", 114 "Buffer used for decompression."); 115 116 SYSCTL_NODE(_vfs, OID_AUTO, hammer2, CTLFLAG_RW, 0, "HAMMER2 filesystem"); 117 118 SYSCTL_INT(_vfs_hammer2, OID_AUTO, debug, CTLFLAG_RW, 119 &hammer2_debug, 0, ""); 120 SYSCTL_INT(_vfs_hammer2, OID_AUTO, cluster_enable, CTLFLAG_RW, 121 &hammer2_cluster_enable, 0, ""); 122 SYSCTL_INT(_vfs_hammer2, OID_AUTO, hardlink_enable, CTLFLAG_RW, 123 &hammer2_hardlink_enable, 0, ""); 124 SYSCTL_INT(_vfs_hammer2, OID_AUTO, flush_pipe, CTLFLAG_RW, 125 &hammer2_flush_pipe, 0, ""); 126 SYSCTL_INT(_vfs_hammer2, OID_AUTO, synchronous_flush, CTLFLAG_RW, 127 &hammer2_synchronous_flush, 0, ""); 128 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, limit_dirty_chains, CTLFLAG_RW, 129 &hammer2_limit_dirty_chains, 0, ""); 130 SYSCTL_INT(_vfs_hammer2, OID_AUTO, dio_count, CTLFLAG_RD, 131 &hammer2_dio_count, 0, ""); 132 133 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_file_read, CTLFLAG_RW, 134 &hammer2_iod_file_read, 0, ""); 135 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_meta_read, CTLFLAG_RW, 136 &hammer2_iod_meta_read, 0, ""); 137 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_indr_read, CTLFLAG_RW, 138 &hammer2_iod_indr_read, 0, ""); 139 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_fmap_read, CTLFLAG_RW, 140 &hammer2_iod_fmap_read, 0, ""); 141 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_volu_read, CTLFLAG_RW, 142 &hammer2_iod_volu_read, 0, ""); 143 144 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_file_write, CTLFLAG_RW, 145 &hammer2_iod_file_write, 0, ""); 146 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_meta_write, CTLFLAG_RW, 147 &hammer2_iod_meta_write, 0, ""); 148 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_indr_write, CTLFLAG_RW, 149 &hammer2_iod_indr_write, 0, ""); 150 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_fmap_write, CTLFLAG_RW, 151 &hammer2_iod_fmap_write, 0, ""); 152 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_volu_write, CTLFLAG_RW, 153 &hammer2_iod_volu_write, 0, ""); 154 155 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_file_read, CTLFLAG_RW, 156 &hammer2_ioa_file_read, 0, ""); 157 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_meta_read, CTLFLAG_RW, 158 &hammer2_ioa_meta_read, 0, ""); 159 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_indr_read, CTLFLAG_RW, 160 &hammer2_ioa_indr_read, 0, ""); 161 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_fmap_read, CTLFLAG_RW, 162 &hammer2_ioa_fmap_read, 0, ""); 163 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_volu_read, CTLFLAG_RW, 164 &hammer2_ioa_volu_read, 0, ""); 165 166 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_file_write, CTLFLAG_RW, 167 &hammer2_ioa_file_write, 0, ""); 168 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_meta_write, CTLFLAG_RW, 169 &hammer2_ioa_meta_write, 0, ""); 170 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_indr_write, CTLFLAG_RW, 171 &hammer2_ioa_indr_write, 0, ""); 172 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_fmap_write, CTLFLAG_RW, 173 &hammer2_ioa_fmap_write, 0, ""); 174 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_volu_write, CTLFLAG_RW, 175 &hammer2_ioa_volu_write, 0, ""); 176 177 static int hammer2_vfs_init(struct vfsconf *conf); 178 static int hammer2_vfs_uninit(struct vfsconf *vfsp); 179 static int hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data, 180 struct ucred *cred); 181 static int hammer2_remount(hammer2_dev_t *, struct mount *, char *, 182 struct vnode *, struct ucred *); 183 static int hammer2_recovery(hammer2_dev_t *hmp); 184 static int hammer2_vfs_unmount(struct mount *mp, int mntflags); 185 static int hammer2_vfs_root(struct mount *mp, struct vnode **vpp); 186 static int hammer2_vfs_statfs(struct mount *mp, struct statfs *sbp, 187 struct ucred *cred); 188 static int hammer2_vfs_statvfs(struct mount *mp, struct statvfs *sbp, 189 struct ucred *cred); 190 static int hammer2_vfs_vget(struct mount *mp, struct vnode *dvp, 191 ino_t ino, struct vnode **vpp); 192 static int hammer2_vfs_fhtovp(struct mount *mp, struct vnode *rootvp, 193 struct fid *fhp, struct vnode **vpp); 194 static int hammer2_vfs_vptofh(struct vnode *vp, struct fid *fhp); 195 static int hammer2_vfs_checkexp(struct mount *mp, struct sockaddr *nam, 196 int *exflagsp, struct ucred **credanonp); 197 198 static int hammer2_install_volume_header(hammer2_dev_t *hmp); 199 static int hammer2_sync_scan2(struct mount *mp, struct vnode *vp, void *data); 200 201 static void hammer2_update_pmps(hammer2_dev_t *hmp); 202 203 static void hammer2_mount_helper(struct mount *mp, hammer2_pfs_t *pmp); 204 static void hammer2_unmount_helper(struct mount *mp, hammer2_pfs_t *pmp, 205 hammer2_dev_t *hmp); 206 207 /* 208 * HAMMER2 vfs operations. 209 */ 210 static struct vfsops hammer2_vfsops = { 211 .vfs_init = hammer2_vfs_init, 212 .vfs_uninit = hammer2_vfs_uninit, 213 .vfs_sync = hammer2_vfs_sync, 214 .vfs_mount = hammer2_vfs_mount, 215 .vfs_unmount = hammer2_vfs_unmount, 216 .vfs_root = hammer2_vfs_root, 217 .vfs_statfs = hammer2_vfs_statfs, 218 .vfs_statvfs = hammer2_vfs_statvfs, 219 .vfs_vget = hammer2_vfs_vget, 220 .vfs_vptofh = hammer2_vfs_vptofh, 221 .vfs_fhtovp = hammer2_vfs_fhtovp, 222 .vfs_checkexp = hammer2_vfs_checkexp 223 }; 224 225 MALLOC_DEFINE(M_HAMMER2, "HAMMER2-mount", ""); 226 227 VFS_SET(hammer2_vfsops, hammer2, 0); 228 MODULE_VERSION(hammer2, 1); 229 230 static 231 int 232 hammer2_vfs_init(struct vfsconf *conf) 233 { 234 static struct objcache_malloc_args margs_read; 235 static struct objcache_malloc_args margs_write; 236 static struct objcache_malloc_args margs_vop; 237 238 int error; 239 240 error = 0; 241 242 if (HAMMER2_BLOCKREF_BYTES != sizeof(struct hammer2_blockref)) 243 error = EINVAL; 244 if (HAMMER2_INODE_BYTES != sizeof(struct hammer2_inode_data)) 245 error = EINVAL; 246 if (HAMMER2_VOLUME_BYTES != sizeof(struct hammer2_volume_data)) 247 error = EINVAL; 248 249 if (error) 250 kprintf("HAMMER2 structure size mismatch; cannot continue.\n"); 251 252 margs_read.objsize = 65536; 253 margs_read.mtype = M_HAMMER2_DEBUFFER; 254 255 margs_write.objsize = 32768; 256 margs_write.mtype = M_HAMMER2_CBUFFER; 257 258 margs_vop.objsize = sizeof(hammer2_xop_t); 259 margs_vop.mtype = M_HAMMER2; 260 261 /* 262 * Note thaht for the XOPS cache we want backing store allocations 263 * to use M_ZERO. This is not allowed in objcache_get() (to avoid 264 * confusion), so use the backing store function that does it. This 265 * means that initial XOPS objects are zerod but REUSED objects are 266 * not. So we are responsible for cleaning the object up sufficiently 267 * for our needs before objcache_put()ing it back (typically just the 268 * FIFO indices). 269 */ 270 cache_buffer_read = objcache_create(margs_read.mtype->ks_shortdesc, 271 0, 1, NULL, NULL, NULL, 272 objcache_malloc_alloc, 273 objcache_malloc_free, 274 &margs_read); 275 cache_buffer_write = objcache_create(margs_write.mtype->ks_shortdesc, 276 0, 1, NULL, NULL, NULL, 277 objcache_malloc_alloc, 278 objcache_malloc_free, 279 &margs_write); 280 cache_xops = objcache_create(margs_vop.mtype->ks_shortdesc, 281 0, 1, NULL, NULL, NULL, 282 objcache_malloc_alloc_zero, 283 objcache_malloc_free, 284 &margs_vop); 285 286 287 lockinit(&hammer2_mntlk, "mntlk", 0, 0); 288 TAILQ_INIT(&hammer2_mntlist); 289 TAILQ_INIT(&hammer2_pfslist); 290 291 hammer2_limit_dirty_chains = desiredvnodes / 10; 292 293 return (error); 294 } 295 296 static 297 int 298 hammer2_vfs_uninit(struct vfsconf *vfsp __unused) 299 { 300 objcache_destroy(cache_buffer_read); 301 objcache_destroy(cache_buffer_write); 302 objcache_destroy(cache_xops); 303 return 0; 304 } 305 306 /* 307 * Core PFS allocator. Used to allocate the pmp structure for PFS cluster 308 * mounts and the spmp structure for media (hmp) structures. 309 * 310 * pmp->modify_tid tracks new modify_tid transaction ids for front-end 311 * transactions. Note that synchronization does not use this field. 312 * (typically frontend operations and synchronization cannot run on the 313 * same PFS node at the same time). 314 * 315 * XXX check locking 316 */ 317 hammer2_pfs_t * 318 hammer2_pfsalloc(hammer2_chain_t *chain, const hammer2_inode_data_t *ripdata, 319 hammer2_tid_t modify_tid) 320 { 321 hammer2_inode_t *iroot; 322 hammer2_pfs_t *pmp; 323 int count; 324 int i; 325 int j; 326 327 /* 328 * Locate or create the PFS based on the cluster id. If ripdata 329 * is NULL this is a spmp which is unique and is always allocated. 330 */ 331 if (ripdata) { 332 TAILQ_FOREACH(pmp, &hammer2_pfslist, mntentry) { 333 if (bcmp(&pmp->pfs_clid, &ripdata->meta.pfs_clid, 334 sizeof(pmp->pfs_clid)) == 0) { 335 break; 336 } 337 } 338 } else { 339 pmp = NULL; 340 } 341 342 if (pmp == NULL) { 343 pmp = kmalloc(sizeof(*pmp), M_HAMMER2, M_WAITOK | M_ZERO); 344 hammer2_trans_manage_init(pmp); 345 kmalloc_create(&pmp->minode, "HAMMER2-inodes"); 346 kmalloc_create(&pmp->mmsg, "HAMMER2-pfsmsg"); 347 lockinit(&pmp->lock, "pfslk", 0, 0); 348 spin_init(&pmp->inum_spin, "hm2pfsalloc_inum"); 349 RB_INIT(&pmp->inum_tree); 350 TAILQ_INIT(&pmp->unlinkq); 351 spin_init(&pmp->list_spin, "hm2pfsalloc_list"); 352 353 for (j = 0; j < HAMMER2_XOPGROUPS; ++j) 354 hammer2_xop_group_init(pmp, &pmp->xop_groups[j]); 355 356 /* 357 * Save the last media transaction id for the flusher. Set 358 * initial 359 */ 360 if (ripdata) 361 pmp->pfs_clid = ripdata->meta.pfs_clid; 362 TAILQ_INSERT_TAIL(&hammer2_pfslist, pmp, mntentry); 363 364 /* 365 * The synchronization thread may start too early, make 366 * sure it stays frozen until we are ready to let it go. 367 * XXX 368 */ 369 /* 370 pmp->primary_thr.flags = HAMMER2_THREAD_FROZEN | 371 HAMMER2_THREAD_REMASTER; 372 */ 373 } 374 375 /* 376 * Create the PFS's root inode. 377 */ 378 if ((iroot = pmp->iroot) == NULL) { 379 iroot = hammer2_inode_get(pmp, NULL, NULL); 380 pmp->iroot = iroot; 381 hammer2_inode_ref(iroot); 382 hammer2_inode_unlock(iroot); 383 } 384 385 /* 386 * Stop here if no chain is passed in. 387 */ 388 if (chain == NULL) 389 goto done; 390 391 /* 392 * When a chain is passed in we must add it to the PFS's root 393 * inode, update pmp->pfs_types[], and update the syncronization 394 * threads. 395 * 396 * At the moment empty spots can develop due to removals or failures. 397 * Ultimately we want to re-fill these spots but doing so might 398 * confused running code. XXX 399 */ 400 hammer2_inode_ref(iroot); 401 hammer2_mtx_ex(&iroot->lock); 402 j = iroot->cluster.nchains; 403 404 kprintf("add PFS to pmp %p[%d]\n", pmp, j); 405 406 if (j == HAMMER2_MAXCLUSTER) { 407 kprintf("hammer2_mount: cluster full!\n"); 408 /* XXX fatal error? */ 409 } else { 410 KKASSERT(chain->pmp == NULL); 411 chain->pmp = pmp; 412 hammer2_chain_ref(chain); 413 iroot->cluster.array[j].chain = chain; 414 pmp->pfs_types[j] = ripdata->meta.pfs_type; 415 pmp->pfs_names[j] = kstrdup(ripdata->filename, M_HAMMER2); 416 417 /* 418 * If the PFS is already mounted we must account 419 * for the mount_count here. 420 */ 421 if (pmp->mp) 422 ++chain->hmp->mount_count; 423 424 /* 425 * May have to fixup dirty chain tracking. Previous 426 * pmp was NULL so nothing to undo. 427 */ 428 if (chain->flags & HAMMER2_CHAIN_MODIFIED) 429 hammer2_pfs_memory_inc(pmp); 430 ++j; 431 } 432 iroot->cluster.nchains = j; 433 434 /* 435 * Update nmasters from any PFS inode which is part of the cluster. 436 * It is possible that this will result in a value which is too 437 * high. MASTER PFSs are authoritative for pfs_nmasters and will 438 * override this value later on. 439 * 440 * (This informs us of masters that might not currently be 441 * discoverable by this mount). 442 */ 443 if (ripdata && pmp->pfs_nmasters < ripdata->meta.pfs_nmasters) { 444 pmp->pfs_nmasters = ripdata->meta.pfs_nmasters; 445 } 446 447 /* 448 * Count visible masters. Masters are usually added with 449 * ripdata->meta.pfs_nmasters set to 1. This detects when there 450 * are more (XXX and must update the master inodes). 451 */ 452 count = 0; 453 for (i = 0; i < iroot->cluster.nchains; ++i) { 454 if (pmp->pfs_types[i] == HAMMER2_PFSTYPE_MASTER) 455 ++count; 456 } 457 if (pmp->pfs_nmasters < count) 458 pmp->pfs_nmasters = count; 459 460 /* 461 * Create missing synchronization and support threads. 462 * 463 * Single-node masters (including snapshots) have nothing to 464 * synchronize and do not require this thread. 465 * 466 * Multi-node masters or any number of soft masters, slaves, copy, 467 * or other PFS types need the thread. 468 * 469 * Each thread is responsible for its particular cluster index. 470 * We use independent threads so stalls or mismatches related to 471 * any given target do not affect other targets. 472 */ 473 for (i = 0; i < iroot->cluster.nchains; ++i) { 474 /* 475 * Single-node masters (including snapshots) have nothing 476 * to synchronize and will make direct xops support calls, 477 * thus they do not require this thread. 478 * 479 * Note that there can be thousands of snapshots. We do not 480 * want to create thousands of threads. 481 */ 482 if (pmp->pfs_nmasters <= 1 && 483 pmp->pfs_types[i] == HAMMER2_PFSTYPE_MASTER) { 484 continue; 485 } 486 487 /* 488 * Sync support thread 489 */ 490 if (pmp->sync_thrs[i].td == NULL) { 491 hammer2_thr_create(&pmp->sync_thrs[i], pmp, 492 "h2nod", i, -1, 493 hammer2_primary_sync_thread); 494 } 495 } 496 497 /* 498 * Create missing Xop threads 499 */ 500 if (pmp->mp) 501 hammer2_xop_helper_create(pmp); 502 503 hammer2_mtx_unlock(&iroot->lock); 504 hammer2_inode_drop(iroot); 505 done: 506 return pmp; 507 } 508 509 /* 510 * Destroy a PFS, typically only occurs after the last mount on a device 511 * has gone away. 512 */ 513 static void 514 hammer2_pfsfree(hammer2_pfs_t *pmp) 515 { 516 hammer2_inode_t *iroot; 517 int i; 518 int j; 519 520 /* 521 * Cleanup our reference on iroot. iroot is (should) not be needed 522 * by the flush code. 523 */ 524 TAILQ_REMOVE(&hammer2_pfslist, pmp, mntentry); 525 526 iroot = pmp->iroot; 527 if (iroot) { 528 for (i = 0; i < iroot->cluster.nchains; ++i) { 529 hammer2_thr_delete(&pmp->sync_thrs[i]); 530 for (j = 0; j < HAMMER2_XOPGROUPS; ++j) 531 hammer2_thr_delete(&pmp->xop_groups[j].thrs[i]); 532 } 533 #if REPORT_REFS_ERRORS 534 if (pmp->iroot->refs != 1) 535 kprintf("PMP->IROOT %p REFS WRONG %d\n", 536 pmp->iroot, pmp->iroot->refs); 537 #else 538 KKASSERT(pmp->iroot->refs == 1); 539 #endif 540 /* ref for pmp->iroot */ 541 hammer2_inode_drop(pmp->iroot); 542 pmp->iroot = NULL; 543 } 544 545 kmalloc_destroy(&pmp->mmsg); 546 kmalloc_destroy(&pmp->minode); 547 548 kfree(pmp, M_HAMMER2); 549 } 550 551 /* 552 * Remove all references to hmp from the pfs list. Any PFS which becomes 553 * empty is terminated and freed. 554 * 555 * XXX inefficient. 556 */ 557 static void 558 hammer2_pfsfree_scan(hammer2_dev_t *hmp) 559 { 560 hammer2_pfs_t *pmp; 561 hammer2_inode_t *iroot; 562 hammer2_cluster_t *cluster; 563 hammer2_chain_t *rchain; 564 int didfreeze; 565 int i; 566 int j; 567 568 again: 569 TAILQ_FOREACH(pmp, &hammer2_pfslist, mntentry) { 570 if ((iroot = pmp->iroot) == NULL) 571 continue; 572 if (hmp->spmp == pmp) { 573 kprintf("unmount hmp %p remove spmp %p\n", 574 hmp, pmp); 575 hmp->spmp = NULL; 576 } 577 578 /* 579 * Determine if this PFS is affected. If it is we must 580 * freeze all management threads and lock its iroot. 581 * 582 * Freezing a management thread forces it idle, operations 583 * in-progress will be aborted and it will have to start 584 * over again when unfrozen, or exit if told to exit. 585 */ 586 cluster = &iroot->cluster; 587 for (i = 0; i < cluster->nchains; ++i) { 588 rchain = cluster->array[i].chain; 589 if (rchain == NULL || rchain->hmp != hmp) 590 continue; 591 break; 592 } 593 if (i != cluster->nchains) { 594 /* 595 * Make sure all synchronization threads are locked 596 * down. 597 */ 598 for (i = 0; i < iroot->cluster.nchains; ++i) { 599 hammer2_thr_freeze_async(&pmp->sync_thrs[i]); 600 for (j = 0; j < HAMMER2_XOPGROUPS; ++j) { 601 hammer2_thr_freeze_async( 602 &pmp->xop_groups[j].thrs[i]); 603 } 604 } 605 for (i = 0; i < iroot->cluster.nchains; ++i) { 606 hammer2_thr_freeze(&pmp->sync_thrs[i]); 607 for (j = 0; j < HAMMER2_XOPGROUPS; ++j) { 608 hammer2_thr_freeze( 609 &pmp->xop_groups[j].thrs[i]); 610 } 611 } 612 613 /* 614 * Lock the inode and clean out matching chains. 615 * Note that we cannot use hammer2_inode_lock_*() 616 * here because that would attempt to validate the 617 * cluster that we are in the middle of ripping 618 * apart. 619 * 620 * WARNING! We are working directly on the inodes 621 * embedded cluster. 622 */ 623 hammer2_mtx_ex(&iroot->lock); 624 625 /* 626 * Remove the chain from matching elements of the PFS. 627 */ 628 for (i = 0; i < cluster->nchains; ++i) { 629 rchain = cluster->array[i].chain; 630 if (rchain == NULL || rchain->hmp != hmp) 631 continue; 632 hammer2_thr_delete(&pmp->sync_thrs[i]); 633 for (j = 0; j < HAMMER2_XOPGROUPS; ++j) { 634 hammer2_thr_delete( 635 &pmp->xop_groups[j].thrs[i]); 636 } 637 rchain = cluster->array[i].chain; 638 cluster->array[i].chain = NULL; 639 pmp->pfs_types[i] = 0; 640 if (pmp->pfs_names[i]) { 641 kfree(pmp->pfs_names[i], M_HAMMER2); 642 pmp->pfs_names[i] = NULL; 643 } 644 hammer2_chain_drop(rchain); 645 646 /* focus hint */ 647 if (cluster->focus == rchain) 648 cluster->focus = NULL; 649 } 650 hammer2_mtx_unlock(&iroot->lock); 651 didfreeze = 1; /* remaster, unfreeze down below */ 652 } else { 653 didfreeze = 0; 654 } 655 656 /* 657 * Cleanup trailing chains. Do not reorder chains (for now). 658 * XXX might remove more than we intended. 659 */ 660 while (i > 0) { 661 if (cluster->array[i - 1].chain) 662 break; 663 --i; 664 } 665 cluster->nchains = i; 666 667 /* 668 * If the PMP has no elements remaining we can destroy it. 669 * (this will transition management threads from frozen->exit). 670 */ 671 if (cluster->nchains == 0) { 672 kprintf("unmount hmp %p last ref to PMP=%p\n", 673 hmp, pmp); 674 hammer2_pfsfree(pmp); 675 goto again; 676 } 677 678 /* 679 * If elements still remain we need to set the REMASTER 680 * flag and unfreeze it. 681 */ 682 if (didfreeze) { 683 for (i = 0; i < iroot->cluster.nchains; ++i) { 684 hammer2_thr_remaster(&pmp->sync_thrs[i]); 685 hammer2_thr_unfreeze(&pmp->sync_thrs[i]); 686 for (j = 0; j < HAMMER2_XOPGROUPS; ++j) { 687 hammer2_thr_remaster( 688 &pmp->xop_groups[j].thrs[i]); 689 hammer2_thr_unfreeze( 690 &pmp->xop_groups[j].thrs[i]); 691 } 692 } 693 } 694 } 695 } 696 697 /* 698 * Mount or remount HAMMER2 fileystem from physical media 699 * 700 * mountroot 701 * mp mount point structure 702 * path NULL 703 * data <unused> 704 * cred <unused> 705 * 706 * mount 707 * mp mount point structure 708 * path path to mount point 709 * data pointer to argument structure in user space 710 * volume volume path (device@LABEL form) 711 * hflags user mount flags 712 * cred user credentials 713 * 714 * RETURNS: 0 Success 715 * !0 error number 716 */ 717 static 718 int 719 hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data, 720 struct ucred *cred) 721 { 722 struct hammer2_mount_info info; 723 hammer2_pfs_t *pmp; 724 hammer2_pfs_t *spmp; 725 hammer2_dev_t *hmp; 726 hammer2_key_t key_next; 727 hammer2_key_t key_dummy; 728 hammer2_key_t lhc; 729 struct vnode *devvp; 730 struct nlookupdata nd; 731 hammer2_chain_t *parent; 732 hammer2_chain_t *chain; 733 hammer2_cluster_t *cluster; 734 const hammer2_inode_data_t *ripdata; 735 hammer2_blockref_t bref; 736 struct file *fp; 737 char devstr[MNAMELEN]; 738 size_t size; 739 size_t done; 740 char *dev; 741 char *label; 742 int ronly = 1; 743 int error; 744 int cache_index; 745 int i; 746 747 hmp = NULL; 748 pmp = NULL; 749 dev = NULL; 750 label = NULL; 751 devvp = NULL; 752 cache_index = -1; 753 754 kprintf("hammer2_mount\n"); 755 756 if (path == NULL) { 757 /* 758 * Root mount 759 */ 760 bzero(&info, sizeof(info)); 761 info.cluster_fd = -1; 762 return (EOPNOTSUPP); 763 } else { 764 /* 765 * Non-root mount or updating a mount 766 */ 767 error = copyin(data, &info, sizeof(info)); 768 if (error) 769 return (error); 770 771 error = copyinstr(info.volume, devstr, MNAMELEN - 1, &done); 772 if (error) 773 return (error); 774 775 /* Extract device and label */ 776 dev = devstr; 777 label = strchr(devstr, '@'); 778 if (label == NULL || 779 ((label + 1) - dev) > done) { 780 return (EINVAL); 781 } 782 *label = '\0'; 783 label++; 784 if (*label == '\0') 785 return (EINVAL); 786 787 if (mp->mnt_flag & MNT_UPDATE) { 788 /* 789 * Update mount. Note that pmp->iroot->cluster is 790 * an inode-embedded cluster and thus cannot be 791 * directly locked. 792 * 793 * XXX HAMMER2 needs to implement NFS export via 794 * mountctl. 795 */ 796 pmp = MPTOPMP(mp); 797 cluster = &pmp->iroot->cluster; 798 for (i = 0; i < cluster->nchains; ++i) { 799 if (cluster->array[i].chain == NULL) 800 continue; 801 hmp = cluster->array[i].chain->hmp; 802 devvp = hmp->devvp; 803 error = hammer2_remount(hmp, mp, path, 804 devvp, cred); 805 if (error) 806 break; 807 } 808 809 return error; 810 } 811 } 812 813 /* 814 * HMP device mount 815 * 816 * Lookup name and verify it refers to a block device. 817 */ 818 error = nlookup_init(&nd, dev, UIO_SYSSPACE, NLC_FOLLOW); 819 if (error == 0) 820 error = nlookup(&nd); 821 if (error == 0) 822 error = cache_vref(&nd.nl_nch, nd.nl_cred, &devvp); 823 nlookup_done(&nd); 824 825 if (error == 0) { 826 if (vn_isdisk(devvp, &error)) 827 error = vfs_mountedon(devvp); 828 } 829 830 /* 831 * Determine if the device has already been mounted. After this 832 * check hmp will be non-NULL if we are doing the second or more 833 * hammer2 mounts from the same device. 834 */ 835 lockmgr(&hammer2_mntlk, LK_EXCLUSIVE); 836 TAILQ_FOREACH(hmp, &hammer2_mntlist, mntentry) { 837 if (hmp->devvp == devvp) 838 break; 839 } 840 841 /* 842 * Open the device if this isn't a secondary mount and construct 843 * the H2 device mount (hmp). 844 */ 845 if (hmp == NULL) { 846 hammer2_chain_t *schain; 847 hammer2_xid_t xid; 848 849 if (error == 0 && vcount(devvp) > 0) 850 error = EBUSY; 851 852 /* 853 * Now open the device 854 */ 855 if (error == 0) { 856 ronly = ((mp->mnt_flag & MNT_RDONLY) != 0); 857 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 858 error = vinvalbuf(devvp, V_SAVE, 0, 0); 859 if (error == 0) { 860 error = VOP_OPEN(devvp, 861 ronly ? FREAD : FREAD | FWRITE, 862 FSCRED, NULL); 863 } 864 vn_unlock(devvp); 865 } 866 if (error && devvp) { 867 vrele(devvp); 868 devvp = NULL; 869 } 870 if (error) { 871 lockmgr(&hammer2_mntlk, LK_RELEASE); 872 return error; 873 } 874 hmp = kmalloc(sizeof(*hmp), M_HAMMER2, M_WAITOK | M_ZERO); 875 ksnprintf(hmp->devrepname, sizeof(hmp->devrepname), "%s", dev); 876 hmp->ronly = ronly; 877 hmp->devvp = devvp; 878 kmalloc_create(&hmp->mchain, "HAMMER2-chains"); 879 TAILQ_INSERT_TAIL(&hammer2_mntlist, hmp, mntentry); 880 RB_INIT(&hmp->iotree); 881 spin_init(&hmp->io_spin, "hm2mount_io"); 882 spin_init(&hmp->list_spin, "hm2mount_list"); 883 TAILQ_INIT(&hmp->flushq); 884 885 lockinit(&hmp->vollk, "h2vol", 0, 0); 886 887 /* 888 * vchain setup. vchain.data is embedded. 889 * vchain.refs is initialized and will never drop to 0. 890 * 891 * NOTE! voldata is not yet loaded. 892 */ 893 hmp->vchain.hmp = hmp; 894 hmp->vchain.refs = 1; 895 hmp->vchain.data = (void *)&hmp->voldata; 896 hmp->vchain.bref.type = HAMMER2_BREF_TYPE_VOLUME; 897 hmp->vchain.bref.data_off = 0 | HAMMER2_PBUFRADIX; 898 hmp->vchain.bref.mirror_tid = hmp->voldata.mirror_tid; 899 900 hammer2_chain_core_init(&hmp->vchain); 901 /* hmp->vchain.u.xxx is left NULL */ 902 903 /* 904 * fchain setup. fchain.data is embedded. 905 * fchain.refs is initialized and will never drop to 0. 906 * 907 * The data is not used but needs to be initialized to 908 * pass assertion muster. We use this chain primarily 909 * as a placeholder for the freemap's top-level RBTREE 910 * so it does not interfere with the volume's topology 911 * RBTREE. 912 */ 913 hmp->fchain.hmp = hmp; 914 hmp->fchain.refs = 1; 915 hmp->fchain.data = (void *)&hmp->voldata.freemap_blockset; 916 hmp->fchain.bref.type = HAMMER2_BREF_TYPE_FREEMAP; 917 hmp->fchain.bref.data_off = 0 | HAMMER2_PBUFRADIX; 918 hmp->fchain.bref.mirror_tid = hmp->voldata.freemap_tid; 919 hmp->fchain.bref.methods = 920 HAMMER2_ENC_CHECK(HAMMER2_CHECK_FREEMAP) | 921 HAMMER2_ENC_COMP(HAMMER2_COMP_NONE); 922 923 hammer2_chain_core_init(&hmp->fchain); 924 /* hmp->fchain.u.xxx is left NULL */ 925 926 /* 927 * Install the volume header and initialize fields from 928 * voldata. 929 */ 930 error = hammer2_install_volume_header(hmp); 931 if (error) { 932 hammer2_unmount_helper(mp, NULL, hmp); 933 lockmgr(&hammer2_mntlk, LK_RELEASE); 934 hammer2_vfs_unmount(mp, MNT_FORCE); 935 return error; 936 } 937 938 /* 939 * Really important to get these right or flush will get 940 * confused. 941 */ 942 hmp->spmp = hammer2_pfsalloc(NULL, NULL, 0); 943 kprintf("alloc spmp %p tid %016jx\n", 944 hmp->spmp, hmp->voldata.mirror_tid); 945 spmp = hmp->spmp; 946 947 /* 948 * Dummy-up vchain and fchain's modify_tid. mirror_tid 949 * is inherited from the volume header. 950 */ 951 xid = 0; 952 hmp->vchain.bref.mirror_tid = hmp->voldata.mirror_tid; 953 hmp->vchain.bref.modify_tid = hmp->vchain.bref.mirror_tid; 954 hmp->vchain.pmp = spmp; 955 hmp->fchain.bref.mirror_tid = hmp->voldata.freemap_tid; 956 hmp->fchain.bref.modify_tid = hmp->fchain.bref.mirror_tid; 957 hmp->fchain.pmp = spmp; 958 959 /* 960 * First locate the super-root inode, which is key 0 961 * relative to the volume header's blockset. 962 * 963 * Then locate the root inode by scanning the directory keyspace 964 * represented by the label. 965 */ 966 parent = hammer2_chain_lookup_init(&hmp->vchain, 0); 967 schain = hammer2_chain_lookup(&parent, &key_dummy, 968 HAMMER2_SROOT_KEY, HAMMER2_SROOT_KEY, 969 &cache_index, 0); 970 hammer2_chain_lookup_done(parent); 971 if (schain == NULL) { 972 kprintf("hammer2_mount: invalid super-root\n"); 973 hammer2_unmount_helper(mp, NULL, hmp); 974 lockmgr(&hammer2_mntlk, LK_RELEASE); 975 hammer2_vfs_unmount(mp, MNT_FORCE); 976 return EINVAL; 977 } 978 if (schain->error) { 979 kprintf("hammer2_mount: error %s reading super-root\n", 980 hammer2_error_str(schain->error)); 981 hammer2_chain_unlock(schain); 982 hammer2_chain_drop(schain); 983 schain = NULL; 984 hammer2_unmount_helper(mp, NULL, hmp); 985 lockmgr(&hammer2_mntlk, LK_RELEASE); 986 hammer2_vfs_unmount(mp, MNT_FORCE); 987 return EINVAL; 988 } 989 990 /* 991 * The super-root always uses an inode_tid of 1 when 992 * creating PFSs. 993 */ 994 spmp->inode_tid = 1; 995 spmp->modify_tid = schain->bref.modify_tid + 1; 996 997 /* 998 * Sanity-check schain's pmp and finish initialization. 999 * Any chain belonging to the super-root topology should 1000 * have a NULL pmp (not even set to spmp). 1001 */ 1002 ripdata = &hammer2_chain_rdata(schain)->ipdata; 1003 KKASSERT(schain->pmp == NULL); 1004 spmp->pfs_clid = ripdata->meta.pfs_clid; 1005 1006 /* 1007 * Replace the dummy spmp->iroot with a real one. It's 1008 * easier to just do a wholesale replacement than to try 1009 * to update the chain and fixup the iroot fields. 1010 * 1011 * The returned inode is locked with the supplied cluster. 1012 */ 1013 cluster = hammer2_cluster_from_chain(schain); 1014 hammer2_inode_drop(spmp->iroot); 1015 spmp->iroot = NULL; 1016 spmp->iroot = hammer2_inode_get(spmp, NULL, cluster); 1017 spmp->spmp_hmp = hmp; 1018 spmp->pfs_types[0] = ripdata->meta.pfs_type; 1019 hammer2_inode_ref(spmp->iroot); 1020 hammer2_inode_unlock(spmp->iroot); 1021 hammer2_cluster_unlock(cluster); 1022 hammer2_cluster_drop(cluster); 1023 schain = NULL; 1024 /* leave spmp->iroot with one ref */ 1025 1026 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 1027 error = hammer2_recovery(hmp); 1028 /* XXX do something with error */ 1029 } 1030 hammer2_update_pmps(hmp); 1031 hammer2_iocom_init(hmp); 1032 1033 /* 1034 * Ref the cluster management messaging descriptor. The mount 1035 * program deals with the other end of the communications pipe. 1036 */ 1037 fp = holdfp(curproc->p_fd, info.cluster_fd, -1); 1038 if (fp) { 1039 hammer2_cluster_reconnect(hmp, fp); 1040 } else { 1041 kprintf("hammer2_mount: bad cluster_fd!\n"); 1042 } 1043 } else { 1044 spmp = hmp->spmp; 1045 } 1046 1047 /* 1048 * Lookup the mount point under the media-localized super-root. 1049 * Scanning hammer2_pfslist doesn't help us because it represents 1050 * PFS cluster ids which can aggregate several named PFSs together. 1051 * 1052 * cluster->pmp will incorrectly point to spmp and must be fixed 1053 * up later on. 1054 */ 1055 hammer2_inode_lock(spmp->iroot, 0); 1056 parent = hammer2_inode_chain(spmp->iroot, 0, HAMMER2_RESOLVE_ALWAYS); 1057 lhc = hammer2_dirhash(label, strlen(label)); 1058 chain = hammer2_chain_lookup(&parent, &key_next, 1059 lhc, lhc + HAMMER2_DIRHASH_LOMASK, 1060 &cache_index, 0); 1061 while (chain) { 1062 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE && 1063 strcmp(label, chain->data->ipdata.filename) == 0) { 1064 break; 1065 } 1066 chain = hammer2_chain_next(&parent, chain, &key_next, 1067 key_next, 1068 lhc + HAMMER2_DIRHASH_LOMASK, 1069 &cache_index, 0); 1070 } 1071 if (parent) { 1072 hammer2_chain_unlock(parent); 1073 hammer2_chain_drop(parent); 1074 } 1075 hammer2_inode_unlock(spmp->iroot); 1076 1077 /* 1078 * PFS could not be found? 1079 */ 1080 if (chain == NULL) { 1081 kprintf("hammer2_mount: PFS label not found\n"); 1082 hammer2_unmount_helper(mp, NULL, hmp); 1083 lockmgr(&hammer2_mntlk, LK_RELEASE); 1084 hammer2_vfs_unmount(mp, MNT_FORCE); 1085 1086 return EINVAL; 1087 } 1088 1089 /* 1090 * Acquire the pmp structure (it should have already been allocated 1091 * via hammer2_update_pmps() so do not pass cluster in to add to 1092 * available chains). 1093 * 1094 * Check if the cluster has already been mounted. A cluster can 1095 * only be mounted once, use null mounts to mount additional copies. 1096 */ 1097 ripdata = &chain->data->ipdata; 1098 bref = chain->bref; 1099 pmp = hammer2_pfsalloc(NULL, ripdata, bref.modify_tid); 1100 hammer2_chain_unlock(chain); 1101 hammer2_chain_drop(chain); 1102 1103 if (pmp->mp) { 1104 kprintf("hammer2_mount: PFS already mounted!\n"); 1105 hammer2_unmount_helper(mp, NULL, hmp); 1106 lockmgr(&hammer2_mntlk, LK_RELEASE); 1107 hammer2_vfs_unmount(mp, MNT_FORCE); 1108 1109 return EBUSY; 1110 } 1111 1112 /* 1113 * Finish the mount 1114 */ 1115 kprintf("hammer2_mount hmp=%p pmp=%p\n", hmp, pmp); 1116 1117 mp->mnt_flag = MNT_LOCAL; 1118 mp->mnt_kern_flag |= MNTK_ALL_MPSAFE; /* all entry pts are SMP */ 1119 mp->mnt_kern_flag |= MNTK_THR_SYNC; /* new vsyncscan semantics */ 1120 1121 /* 1122 * required mount structure initializations 1123 */ 1124 mp->mnt_stat.f_iosize = HAMMER2_PBUFSIZE; 1125 mp->mnt_stat.f_bsize = HAMMER2_PBUFSIZE; 1126 1127 mp->mnt_vstat.f_frsize = HAMMER2_PBUFSIZE; 1128 mp->mnt_vstat.f_bsize = HAMMER2_PBUFSIZE; 1129 1130 /* 1131 * Optional fields 1132 */ 1133 mp->mnt_iosize_max = MAXPHYS; 1134 1135 /* 1136 * Connect up mount pointers. 1137 */ 1138 hammer2_mount_helper(mp, pmp); 1139 1140 lockmgr(&hammer2_mntlk, LK_RELEASE); 1141 1142 /* 1143 * Finish setup 1144 */ 1145 vfs_getnewfsid(mp); 1146 vfs_add_vnodeops(mp, &hammer2_vnode_vops, &mp->mnt_vn_norm_ops); 1147 vfs_add_vnodeops(mp, &hammer2_spec_vops, &mp->mnt_vn_spec_ops); 1148 vfs_add_vnodeops(mp, &hammer2_fifo_vops, &mp->mnt_vn_fifo_ops); 1149 1150 copyinstr(info.volume, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); 1151 bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); 1152 bzero(mp->mnt_stat.f_mntonname, sizeof(mp->mnt_stat.f_mntonname)); 1153 copyinstr(path, mp->mnt_stat.f_mntonname, 1154 sizeof(mp->mnt_stat.f_mntonname) - 1, 1155 &size); 1156 1157 /* 1158 * Initial statfs to prime mnt_stat. 1159 */ 1160 hammer2_vfs_statfs(mp, &mp->mnt_stat, cred); 1161 1162 return 0; 1163 } 1164 1165 /* 1166 * Scan PFSs under the super-root and create hammer2_pfs structures. 1167 */ 1168 static 1169 void 1170 hammer2_update_pmps(hammer2_dev_t *hmp) 1171 { 1172 const hammer2_inode_data_t *ripdata; 1173 hammer2_chain_t *parent; 1174 hammer2_chain_t *chain; 1175 hammer2_blockref_t bref; 1176 hammer2_pfs_t *spmp; 1177 hammer2_pfs_t *pmp; 1178 hammer2_key_t key_next; 1179 int cache_index = -1; 1180 1181 /* 1182 * Lookup mount point under the media-localized super-root. 1183 * 1184 * cluster->pmp will incorrectly point to spmp and must be fixed 1185 * up later on. 1186 */ 1187 spmp = hmp->spmp; 1188 hammer2_inode_lock(spmp->iroot, 0); 1189 parent = hammer2_inode_chain(spmp->iroot, 0, HAMMER2_RESOLVE_ALWAYS); 1190 chain = hammer2_chain_lookup(&parent, &key_next, 1191 HAMMER2_KEY_MIN, HAMMER2_KEY_MAX, 1192 &cache_index, 0); 1193 while (chain) { 1194 if (chain->bref.type != HAMMER2_BREF_TYPE_INODE) 1195 continue; 1196 ripdata = &chain->data->ipdata; 1197 bref = chain->bref; 1198 kprintf("ADD LOCAL PFS: %s\n", ripdata->filename); 1199 1200 pmp = hammer2_pfsalloc(chain, ripdata, bref.modify_tid); 1201 chain = hammer2_chain_next(&parent, chain, &key_next, 1202 key_next, HAMMER2_KEY_MAX, 1203 &cache_index, 0); 1204 } 1205 if (parent) { 1206 hammer2_chain_unlock(parent); 1207 hammer2_chain_drop(parent); 1208 } 1209 hammer2_inode_unlock(spmp->iroot); 1210 } 1211 1212 static 1213 int 1214 hammer2_remount(hammer2_dev_t *hmp, struct mount *mp, char *path, 1215 struct vnode *devvp, struct ucred *cred) 1216 { 1217 int error; 1218 1219 if (hmp->ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) { 1220 error = hammer2_recovery(hmp); 1221 } else { 1222 error = 0; 1223 } 1224 return error; 1225 } 1226 1227 static 1228 int 1229 hammer2_vfs_unmount(struct mount *mp, int mntflags) 1230 { 1231 hammer2_pfs_t *pmp; 1232 int flags; 1233 int error = 0; 1234 1235 pmp = MPTOPMP(mp); 1236 1237 if (pmp == NULL) 1238 return(0); 1239 1240 lockmgr(&hammer2_mntlk, LK_EXCLUSIVE); 1241 1242 /* 1243 * If mount initialization proceeded far enough we must flush 1244 * its vnodes and sync the underlying mount points. Three syncs 1245 * are required to fully flush the filesystem (freemap updates lag 1246 * by one flush, and one extra for safety). 1247 */ 1248 if (mntflags & MNT_FORCE) 1249 flags = FORCECLOSE; 1250 else 1251 flags = 0; 1252 if (pmp->iroot) { 1253 error = vflush(mp, 0, flags); 1254 if (error) 1255 goto failed; 1256 hammer2_vfs_sync(mp, MNT_WAIT); 1257 hammer2_vfs_sync(mp, MNT_WAIT); 1258 hammer2_vfs_sync(mp, MNT_WAIT); 1259 } 1260 1261 /* 1262 * Cleanup the frontend support XOPS threads 1263 */ 1264 hammer2_xop_helper_cleanup(pmp); 1265 1266 /* 1267 * Cleanup our reference on ihidden. 1268 */ 1269 if (pmp->ihidden) { 1270 hammer2_inode_drop(pmp->ihidden); 1271 pmp->ihidden = NULL; 1272 } 1273 if (pmp->mp) 1274 hammer2_unmount_helper(mp, pmp, NULL); 1275 1276 error = 0; 1277 failed: 1278 lockmgr(&hammer2_mntlk, LK_RELEASE); 1279 1280 return (error); 1281 } 1282 1283 /* 1284 * Mount helper, hook the system mount into our PFS. 1285 * The mount lock is held. 1286 * 1287 * We must bump the mount_count on related devices for any 1288 * mounted PFSs. 1289 */ 1290 static 1291 void 1292 hammer2_mount_helper(struct mount *mp, hammer2_pfs_t *pmp) 1293 { 1294 hammer2_cluster_t *cluster; 1295 hammer2_chain_t *rchain; 1296 int i; 1297 1298 mp->mnt_data = (qaddr_t)pmp; 1299 pmp->mp = mp; 1300 1301 /* 1302 * After pmp->mp is set we have to adjust hmp->mount_count. 1303 */ 1304 cluster = &pmp->iroot->cluster; 1305 for (i = 0; i < cluster->nchains; ++i) { 1306 rchain = cluster->array[i].chain; 1307 if (rchain == NULL) 1308 continue; 1309 ++rchain->hmp->mount_count; 1310 kprintf("hammer2_mount hmp=%p ++mount_count=%d\n", 1311 rchain->hmp, rchain->hmp->mount_count); 1312 } 1313 1314 /* 1315 * Create missing Xop threads 1316 */ 1317 hammer2_xop_helper_create(pmp); 1318 } 1319 1320 /* 1321 * Mount helper, unhook the system mount from our PFS. 1322 * The mount lock is held. 1323 * 1324 * If hmp is supplied a mount responsible for being the first to open 1325 * the block device failed and the block device and all PFSs using the 1326 * block device must be cleaned up. 1327 * 1328 * If pmp is supplied multiple devices might be backing the PFS and each 1329 * must be disconnect. This might not be the last PFS using some of the 1330 * underlying devices. Also, we have to adjust our hmp->mount_count 1331 * accounting for the devices backing the pmp which is now undergoing an 1332 * unmount. 1333 */ 1334 static 1335 void 1336 hammer2_unmount_helper(struct mount *mp, hammer2_pfs_t *pmp, hammer2_dev_t *hmp) 1337 { 1338 hammer2_cluster_t *cluster; 1339 hammer2_chain_t *rchain; 1340 struct vnode *devvp; 1341 int dumpcnt; 1342 int ronly = 0; 1343 int i; 1344 1345 /* 1346 * If no device supplied this is a high-level unmount and we have to 1347 * to disconnect the mount, adjust mount_count, and locate devices 1348 * that might now have no mounts. 1349 */ 1350 if (pmp) { 1351 KKASSERT(hmp == NULL); 1352 KKASSERT((void *)(intptr_t)mp->mnt_data == pmp); 1353 pmp->mp = NULL; 1354 mp->mnt_data = NULL; 1355 1356 /* 1357 * After pmp->mp is cleared we have to account for 1358 * mount_count. 1359 */ 1360 cluster = &pmp->iroot->cluster; 1361 for (i = 0; i < cluster->nchains; ++i) { 1362 rchain = cluster->array[i].chain; 1363 if (rchain == NULL) 1364 continue; 1365 --rchain->hmp->mount_count; 1366 kprintf("hammer2_unmount hmp=%p --mount_count=%d\n", 1367 rchain->hmp, rchain->hmp->mount_count); 1368 /* scrapping hmp now may invalidate the pmp */ 1369 } 1370 again: 1371 TAILQ_FOREACH(hmp, &hammer2_mntlist, mntentry) { 1372 if (hmp->mount_count == 0) { 1373 hammer2_unmount_helper(NULL, NULL, hmp); 1374 goto again; 1375 } 1376 } 1377 return; 1378 } 1379 1380 /* 1381 * Try to terminate the block device. We can't terminate it if 1382 * there are still PFSs referencing it. 1383 */ 1384 kprintf("hammer2_unmount hmp=%p mount_count=%d\n", 1385 hmp, hmp->mount_count); 1386 if (hmp->mount_count) 1387 return; 1388 1389 hammer2_pfsfree_scan(hmp); 1390 hammer2_dev_exlock(hmp); /* XXX order */ 1391 1392 /* 1393 * Cycle the volume data lock as a safety (probably not needed any 1394 * more). To ensure everything is out we need to flush at least 1395 * three times. (1) The running of the unlinkq can dirty the 1396 * filesystem, (2) A normal flush can dirty the freemap, and 1397 * (3) ensure that the freemap is fully synchronized. 1398 * 1399 * The next mount's recovery scan can clean everything up but we want 1400 * to leave the filesystem in a 100% clean state on a normal unmount. 1401 */ 1402 #if 0 1403 hammer2_voldata_lock(hmp); 1404 hammer2_voldata_unlock(hmp); 1405 #endif 1406 hammer2_iocom_uninit(hmp); 1407 1408 if ((hmp->vchain.flags | hmp->fchain.flags) & 1409 HAMMER2_CHAIN_FLUSH_MASK) { 1410 kprintf("hammer2_unmount: chains left over " 1411 "after final sync\n"); 1412 kprintf(" vchain %08x\n", hmp->vchain.flags); 1413 kprintf(" fchain %08x\n", hmp->fchain.flags); 1414 1415 if (hammer2_debug & 0x0010) 1416 Debugger("entered debugger"); 1417 } 1418 1419 KKASSERT(hmp->spmp == NULL); 1420 1421 /* 1422 * Finish up with the device vnode 1423 */ 1424 if ((devvp = hmp->devvp) != NULL) { 1425 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 1426 vinvalbuf(devvp, (ronly ? 0 : V_SAVE), 0, 0); 1427 hmp->devvp = NULL; 1428 VOP_CLOSE(devvp, (ronly ? FREAD : FREAD|FWRITE), NULL); 1429 vn_unlock(devvp); 1430 vrele(devvp); 1431 devvp = NULL; 1432 } 1433 1434 /* 1435 * Clear vchain/fchain flags that might prevent final cleanup 1436 * of these chains. 1437 */ 1438 if (hmp->vchain.flags & HAMMER2_CHAIN_MODIFIED) { 1439 atomic_clear_int(&hmp->vchain.flags, 1440 HAMMER2_CHAIN_MODIFIED); 1441 hammer2_pfs_memory_wakeup(hmp->vchain.pmp); 1442 hammer2_chain_drop(&hmp->vchain); 1443 } 1444 if (hmp->vchain.flags & HAMMER2_CHAIN_UPDATE) { 1445 atomic_clear_int(&hmp->vchain.flags, 1446 HAMMER2_CHAIN_UPDATE); 1447 hammer2_chain_drop(&hmp->vchain); 1448 } 1449 1450 if (hmp->fchain.flags & HAMMER2_CHAIN_MODIFIED) { 1451 atomic_clear_int(&hmp->fchain.flags, 1452 HAMMER2_CHAIN_MODIFIED); 1453 hammer2_pfs_memory_wakeup(hmp->fchain.pmp); 1454 hammer2_chain_drop(&hmp->fchain); 1455 } 1456 if (hmp->fchain.flags & HAMMER2_CHAIN_UPDATE) { 1457 atomic_clear_int(&hmp->fchain.flags, 1458 HAMMER2_CHAIN_UPDATE); 1459 hammer2_chain_drop(&hmp->fchain); 1460 } 1461 1462 /* 1463 * Final drop of embedded freemap root chain to 1464 * clean up fchain.core (fchain structure is not 1465 * flagged ALLOCATED so it is cleaned out and then 1466 * left to rot). 1467 */ 1468 hammer2_chain_drop(&hmp->fchain); 1469 1470 /* 1471 * Final drop of embedded volume root chain to clean 1472 * up vchain.core (vchain structure is not flagged 1473 * ALLOCATED so it is cleaned out and then left to 1474 * rot). 1475 */ 1476 dumpcnt = 50; 1477 hammer2_dump_chain(&hmp->vchain, 0, &dumpcnt, 'v'); 1478 dumpcnt = 50; 1479 hammer2_dump_chain(&hmp->fchain, 0, &dumpcnt, 'f'); 1480 hammer2_dev_unlock(hmp); 1481 hammer2_chain_drop(&hmp->vchain); 1482 1483 hammer2_io_cleanup(hmp, &hmp->iotree); 1484 if (hmp->iofree_count) { 1485 kprintf("io_cleanup: %d I/O's left hanging\n", 1486 hmp->iofree_count); 1487 } 1488 1489 TAILQ_REMOVE(&hammer2_mntlist, hmp, mntentry); 1490 kmalloc_destroy(&hmp->mchain); 1491 kfree(hmp, M_HAMMER2); 1492 } 1493 1494 static 1495 int 1496 hammer2_vfs_vget(struct mount *mp, struct vnode *dvp, 1497 ino_t ino, struct vnode **vpp) 1498 { 1499 kprintf("hammer2_vget\n"); 1500 return (EOPNOTSUPP); 1501 } 1502 1503 static 1504 int 1505 hammer2_vfs_root(struct mount *mp, struct vnode **vpp) 1506 { 1507 hammer2_pfs_t *pmp; 1508 int error; 1509 struct vnode *vp; 1510 1511 pmp = MPTOPMP(mp); 1512 if (pmp->iroot == NULL) { 1513 *vpp = NULL; 1514 return EINVAL; 1515 } 1516 1517 error = 0; 1518 hammer2_inode_lock(pmp->iroot, HAMMER2_RESOLVE_SHARED); 1519 1520 while (pmp->inode_tid == 0) { 1521 hammer2_xop_vfsroot_t *xop; 1522 hammer2_inode_meta_t *meta; 1523 1524 xop = &hammer2_xop_alloc(pmp->iroot)->xop_vfsroot; 1525 hammer2_xop_start(&xop->head, hammer2_xop_vfsroot); 1526 error = hammer2_xop_collect(&xop->head, 0); 1527 1528 if (error == 0) { 1529 meta = &xop->head.cluster.focus->data->ipdata.meta; 1530 pmp->iroot->meta = *meta; 1531 pmp->iroot->bref = xop->head.cluster.focus->bref; 1532 pmp->inode_tid = meta->pfs_inum + 1; 1533 if (pmp->inode_tid < HAMMER2_INODE_START) 1534 pmp->inode_tid = HAMMER2_INODE_START; 1535 pmp->modify_tid = pmp->iroot->bref.modify_tid + 1; 1536 kprintf("PFS: Starting inode %jd\n", 1537 (intmax_t)pmp->inode_tid); 1538 kprintf("PMP focus good set nextino=%ld mod=%016jx\n", 1539 pmp->inode_tid, pmp->modify_tid); 1540 wakeup(&pmp->iroot); 1541 1542 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP); 1543 1544 /* 1545 * Prime the mount info. 1546 */ 1547 hammer2_vfs_statfs(mp, &mp->mnt_stat, NULL); 1548 1549 /* 1550 * With the cluster operational, check for and 1551 * install ihidden if needed. The install_hidden 1552 * code needs to get a transaction so we must unlock 1553 * iroot around it. 1554 * 1555 * This is only applicable PFS mounts, there is no 1556 * hidden directory in the spmp. 1557 */ 1558 hammer2_inode_unlock(pmp->iroot); 1559 hammer2_inode_install_hidden(pmp); 1560 hammer2_inode_lock(pmp->iroot, HAMMER2_RESOLVE_SHARED); 1561 1562 break; 1563 } 1564 1565 /* 1566 * Loop, try again 1567 */ 1568 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP); 1569 hammer2_inode_unlock(pmp->iroot); 1570 error = tsleep(&pmp->iroot, PCATCH, "h2root", hz); 1571 hammer2_inode_lock(pmp->iroot, HAMMER2_RESOLVE_SHARED); 1572 if (error == EINTR) 1573 break; 1574 } 1575 1576 if (error) { 1577 hammer2_inode_unlock(pmp->iroot); 1578 *vpp = NULL; 1579 } else { 1580 vp = hammer2_igetv(pmp->iroot, &error); 1581 hammer2_inode_unlock(pmp->iroot); 1582 *vpp = vp; 1583 } 1584 1585 return (error); 1586 } 1587 1588 /* 1589 * Filesystem status 1590 * 1591 * XXX incorporate ipdata->meta.inode_quota and data_quota 1592 */ 1593 static 1594 int 1595 hammer2_vfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred) 1596 { 1597 hammer2_pfs_t *pmp; 1598 hammer2_dev_t *hmp; 1599 hammer2_blockref_t bref; 1600 1601 /* 1602 * NOTE: iroot might not have validated the cluster yet. 1603 */ 1604 pmp = MPTOPMP(mp); 1605 if (pmp->iroot->cluster.focus == NULL) 1606 return EINVAL; 1607 1608 KKASSERT(pmp->iroot->cluster.nchains >= 1); 1609 hmp = pmp->iroot->cluster.focus->hmp; /* iroot retains focus */ 1610 bref = pmp->iroot->cluster.focus->bref; /* no lock */ 1611 1612 mp->mnt_stat.f_files = bref.inode_count; 1613 mp->mnt_stat.f_ffree = 0; 1614 mp->mnt_stat.f_blocks = (bref.data_count + 1615 hmp->voldata.allocator_free) / 1616 mp->mnt_vstat.f_bsize; 1617 mp->mnt_stat.f_bfree = hmp->voldata.allocator_free / 1618 mp->mnt_vstat.f_bsize; 1619 mp->mnt_stat.f_bavail = mp->mnt_stat.f_bfree; 1620 1621 *sbp = mp->mnt_stat; 1622 return (0); 1623 } 1624 1625 static 1626 int 1627 hammer2_vfs_statvfs(struct mount *mp, struct statvfs *sbp, struct ucred *cred) 1628 { 1629 hammer2_pfs_t *pmp; 1630 hammer2_dev_t *hmp; 1631 hammer2_blockref_t bref; 1632 1633 /* 1634 * NOTE: iroot might not have validated the cluster yet. 1635 */ 1636 pmp = MPTOPMP(mp); 1637 if (pmp->iroot->cluster.focus == NULL) 1638 return EINVAL; 1639 1640 KKASSERT(pmp->iroot->cluster.nchains >= 1); 1641 hmp = pmp->iroot->cluster.focus->hmp; /* iroot retains focus */ 1642 bref = pmp->iroot->cluster.focus->bref; /* no lock */ 1643 1644 mp->mnt_vstat.f_bsize = HAMMER2_PBUFSIZE; 1645 mp->mnt_vstat.f_files = bref.inode_count; 1646 mp->mnt_vstat.f_ffree = 0; 1647 mp->mnt_vstat.f_blocks = (bref.data_count + 1648 hmp->voldata.allocator_free) / 1649 mp->mnt_vstat.f_bsize; 1650 mp->mnt_vstat.f_bfree = hmp->voldata.allocator_free / 1651 mp->mnt_vstat.f_bsize; 1652 mp->mnt_vstat.f_bavail = mp->mnt_vstat.f_bfree; 1653 1654 *sbp = mp->mnt_vstat; 1655 return (0); 1656 } 1657 1658 /* 1659 * Mount-time recovery (RW mounts) 1660 * 1661 * Updates to the free block table are allowed to lag flushes by one 1662 * transaction. In case of a crash, then on a fresh mount we must do an 1663 * incremental scan of the last committed transaction id and make sure that 1664 * all related blocks have been marked allocated. 1665 * 1666 * The super-root topology and each PFS has its own transaction id domain, 1667 * so we must track PFS boundary transitions. 1668 */ 1669 struct hammer2_recovery_elm { 1670 TAILQ_ENTRY(hammer2_recovery_elm) entry; 1671 hammer2_chain_t *chain; 1672 hammer2_tid_t sync_tid; 1673 }; 1674 1675 TAILQ_HEAD(hammer2_recovery_list, hammer2_recovery_elm); 1676 1677 struct hammer2_recovery_info { 1678 struct hammer2_recovery_list list; 1679 int depth; 1680 }; 1681 1682 static int hammer2_recovery_scan(hammer2_dev_t *hmp, 1683 hammer2_chain_t *parent, 1684 struct hammer2_recovery_info *info, 1685 hammer2_tid_t sync_tid); 1686 1687 #define HAMMER2_RECOVERY_MAXDEPTH 10 1688 1689 static 1690 int 1691 hammer2_recovery(hammer2_dev_t *hmp) 1692 { 1693 struct hammer2_recovery_info info; 1694 struct hammer2_recovery_elm *elm; 1695 hammer2_chain_t *parent; 1696 hammer2_tid_t sync_tid; 1697 hammer2_tid_t mirror_tid; 1698 int error; 1699 int cumulative_error = 0; 1700 1701 hammer2_trans_init(hmp->spmp, 0); 1702 1703 sync_tid = hmp->voldata.freemap_tid; 1704 mirror_tid = hmp->voldata.mirror_tid; 1705 1706 kprintf("hammer2 mount \"%s\": ", hmp->devrepname); 1707 if (sync_tid >= mirror_tid) { 1708 kprintf(" no recovery needed\n"); 1709 } else { 1710 kprintf(" freemap recovery %016jx-%016jx\n", 1711 sync_tid + 1, mirror_tid); 1712 } 1713 1714 TAILQ_INIT(&info.list); 1715 info.depth = 0; 1716 parent = hammer2_chain_lookup_init(&hmp->vchain, 0); 1717 cumulative_error = hammer2_recovery_scan(hmp, parent, 1718 &info, sync_tid); 1719 hammer2_chain_lookup_done(parent); 1720 1721 while ((elm = TAILQ_FIRST(&info.list)) != NULL) { 1722 TAILQ_REMOVE(&info.list, elm, entry); 1723 parent = elm->chain; 1724 sync_tid = elm->sync_tid; 1725 kfree(elm, M_HAMMER2); 1726 1727 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); 1728 error = hammer2_recovery_scan(hmp, parent, 1729 &info, hmp->voldata.freemap_tid); 1730 hammer2_chain_unlock(parent); 1731 hammer2_chain_drop(parent); /* drop elm->chain ref */ 1732 if (error) 1733 cumulative_error = error; 1734 } 1735 hammer2_trans_done(hmp->spmp); 1736 1737 return cumulative_error; 1738 } 1739 1740 static 1741 int 1742 hammer2_recovery_scan(hammer2_dev_t *hmp, hammer2_chain_t *parent, 1743 struct hammer2_recovery_info *info, 1744 hammer2_tid_t sync_tid) 1745 { 1746 const hammer2_inode_data_t *ripdata; 1747 hammer2_chain_t *chain; 1748 int cache_index; 1749 int cumulative_error = 0; 1750 int error; 1751 1752 /* 1753 * Adjust freemap to ensure that the block(s) are marked allocated. 1754 */ 1755 if (parent->bref.type != HAMMER2_BREF_TYPE_VOLUME) { 1756 hammer2_freemap_adjust(hmp, &parent->bref, 1757 HAMMER2_FREEMAP_DORECOVER); 1758 } 1759 1760 /* 1761 * Check type for recursive scan 1762 */ 1763 switch(parent->bref.type) { 1764 case HAMMER2_BREF_TYPE_VOLUME: 1765 /* data already instantiated */ 1766 break; 1767 case HAMMER2_BREF_TYPE_INODE: 1768 /* 1769 * Must instantiate data for DIRECTDATA test and also 1770 * for recursion. 1771 */ 1772 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); 1773 ripdata = &hammer2_chain_rdata(parent)->ipdata; 1774 if (ripdata->meta.op_flags & HAMMER2_OPFLAG_DIRECTDATA) { 1775 /* not applicable to recovery scan */ 1776 hammer2_chain_unlock(parent); 1777 return 0; 1778 } 1779 hammer2_chain_unlock(parent); 1780 break; 1781 case HAMMER2_BREF_TYPE_INDIRECT: 1782 /* 1783 * Must instantiate data for recursion 1784 */ 1785 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); 1786 hammer2_chain_unlock(parent); 1787 break; 1788 case HAMMER2_BREF_TYPE_DATA: 1789 case HAMMER2_BREF_TYPE_FREEMAP: 1790 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1791 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 1792 /* not applicable to recovery scan */ 1793 return 0; 1794 break; 1795 default: 1796 return EDOM; 1797 } 1798 1799 /* 1800 * Defer operation if depth limit reached or if we are crossing a 1801 * PFS boundary. 1802 */ 1803 if (info->depth >= HAMMER2_RECOVERY_MAXDEPTH) { 1804 struct hammer2_recovery_elm *elm; 1805 1806 elm = kmalloc(sizeof(*elm), M_HAMMER2, M_ZERO | M_WAITOK); 1807 elm->chain = parent; 1808 elm->sync_tid = sync_tid; 1809 hammer2_chain_ref(parent); 1810 TAILQ_INSERT_TAIL(&info->list, elm, entry); 1811 /* unlocked by caller */ 1812 1813 return(0); 1814 } 1815 1816 1817 /* 1818 * Recursive scan of the last flushed transaction only. We are 1819 * doing this without pmp assignments so don't leave the chains 1820 * hanging around after we are done with them. 1821 */ 1822 cache_index = 0; 1823 chain = hammer2_chain_scan(parent, NULL, &cache_index, 1824 HAMMER2_LOOKUP_NODATA); 1825 while (chain) { 1826 atomic_set_int(&chain->flags, HAMMER2_CHAIN_RELEASE); 1827 if (chain->bref.mirror_tid > sync_tid) { 1828 ++info->depth; 1829 error = hammer2_recovery_scan(hmp, chain, 1830 info, sync_tid); 1831 --info->depth; 1832 if (error) 1833 cumulative_error = error; 1834 } 1835 1836 /* 1837 * Flush the recovery at the PFS boundary to stage it for 1838 * the final flush of the super-root topology. 1839 */ 1840 if ((chain->bref.flags & HAMMER2_BREF_FLAG_PFSROOT) && 1841 (chain->flags & HAMMER2_CHAIN_ONFLUSH)) { 1842 hammer2_flush(chain, 1); 1843 } 1844 chain = hammer2_chain_scan(parent, chain, &cache_index, 1845 HAMMER2_LOOKUP_NODATA); 1846 } 1847 1848 return cumulative_error; 1849 } 1850 1851 /* 1852 * Sync a mount point; this is called on a per-mount basis from the 1853 * filesystem syncer process periodically and whenever a user issues 1854 * a sync. 1855 */ 1856 int 1857 hammer2_vfs_sync(struct mount *mp, int waitfor) 1858 { 1859 hammer2_xop_flush_t *xop; 1860 struct hammer2_sync_info info; 1861 hammer2_inode_t *iroot; 1862 hammer2_pfs_t *pmp; 1863 int flags; 1864 int error; 1865 1866 pmp = MPTOPMP(mp); 1867 iroot = pmp->iroot; 1868 KKASSERT(iroot); 1869 KKASSERT(iroot->pmp == pmp); 1870 1871 /* 1872 * We can't acquire locks on existing vnodes while in a transaction 1873 * without risking a deadlock. This assumes that vfsync() can be 1874 * called without the vnode locked (which it can in DragonFly). 1875 * Otherwise we'd have to implement a multi-pass or flag the lock 1876 * failures and retry. 1877 * 1878 * The reclamation code interlocks with the sync list's token 1879 * (by removing the vnode from the scan list) before unlocking 1880 * the inode, giving us time to ref the inode. 1881 */ 1882 /*flags = VMSC_GETVP;*/ 1883 flags = 0; 1884 if (waitfor & MNT_LAZY) 1885 flags |= VMSC_ONEPASS; 1886 1887 #if 0 1888 /* 1889 * Preflush the vnodes using a normal transaction before interlocking 1890 * with a flush transaction. 1891 */ 1892 hammer2_trans_init(pmp, 0); 1893 info.error = 0; 1894 info.waitfor = MNT_NOWAIT; 1895 vsyncscan(mp, flags | VMSC_NOWAIT, hammer2_sync_scan2, &info); 1896 hammer2_trans_done(pmp); 1897 #endif 1898 1899 /* 1900 * Start our flush transaction. This does not return until all 1901 * concurrent transactions have completed and will prevent any 1902 * new transactions from running concurrently, except for the 1903 * buffer cache transactions. 1904 * 1905 * For efficiency do an async pass before making sure with a 1906 * synchronous pass on all related buffer cache buffers. It 1907 * should theoretically not be possible for any new file buffers 1908 * to be instantiated during this sequence. 1909 */ 1910 hammer2_trans_init(pmp, HAMMER2_TRANS_ISFLUSH | 1911 HAMMER2_TRANS_PREFLUSH); 1912 hammer2_inode_run_unlinkq(pmp); 1913 1914 info.error = 0; 1915 info.waitfor = MNT_NOWAIT; 1916 vsyncscan(mp, flags | VMSC_NOWAIT, hammer2_sync_scan2, &info); 1917 info.waitfor = MNT_WAIT; 1918 vsyncscan(mp, flags, hammer2_sync_scan2, &info); 1919 1920 /* 1921 * Clear PREFLUSH. This prevents (or asserts on) any new logical 1922 * buffer cache flushes which occur during the flush. Device buffers 1923 * are not affected. 1924 */ 1925 hammer2_bioq_sync(pmp); 1926 atomic_clear_int(&pmp->trans.flags, HAMMER2_TRANS_PREFLUSH); 1927 1928 /* 1929 * Use the XOP interface to concurrently flush all nodes to 1930 * synchronize the PFSROOT subtopology to the media. A standard 1931 * end-of-scan ENOENT error indicates cluster sufficiency. 1932 * 1933 * Note that this flush will not be visible on crash recovery until 1934 * we flush the super-root topology in the next loop. 1935 * 1936 * XXX For now wait for all flushes to complete. 1937 */ 1938 if (iroot) { 1939 xop = &hammer2_xop_alloc(iroot)->xop_flush; 1940 hammer2_xop_start(&xop->head, hammer2_inode_xop_flush); 1941 error = hammer2_xop_collect(&xop->head, 1942 HAMMER2_XOP_COLLECT_WAITALL); 1943 if (error == ENOENT) 1944 error = 0; 1945 } else { 1946 error = 0; 1947 } 1948 hammer2_trans_done(pmp); 1949 1950 return (error); 1951 } 1952 1953 /* 1954 * Sync passes. 1955 */ 1956 static int 1957 hammer2_sync_scan2(struct mount *mp, struct vnode *vp, void *data) 1958 { 1959 struct hammer2_sync_info *info = data; 1960 hammer2_inode_t *ip; 1961 int error; 1962 1963 /* 1964 * Degenerate cases. Note that ip == NULL typically means the 1965 * syncer vnode itself and we don't want to vclrisdirty() in that 1966 * situation. 1967 */ 1968 ip = VTOI(vp); 1969 if (ip == NULL) { 1970 return(0); 1971 } 1972 if (vp->v_type == VNON || vp->v_type == VBAD) { 1973 vclrisdirty(vp); 1974 return(0); 1975 } 1976 1977 /* 1978 * VOP_FSYNC will start a new transaction so replicate some code 1979 * here to do it inline (see hammer2_vop_fsync()). 1980 * 1981 * WARNING: The vfsync interacts with the buffer cache and might 1982 * block, we can't hold the inode lock at that time. 1983 * However, we MUST ref ip before blocking to ensure that 1984 * it isn't ripped out from under us (since we do not 1985 * hold a lock on the vnode). 1986 */ 1987 hammer2_inode_ref(ip); 1988 if ((ip->flags & HAMMER2_INODE_MODIFIED) || 1989 !RB_EMPTY(&vp->v_rbdirty_tree)) { 1990 vfsync(vp, info->waitfor, 1, NULL, NULL); 1991 hammer2_inode_fsync(ip, NULL); 1992 } 1993 if ((ip->flags & HAMMER2_INODE_MODIFIED) == 0 && 1994 RB_EMPTY(&vp->v_rbdirty_tree)) { 1995 vclrisdirty(vp); 1996 } 1997 1998 hammer2_inode_drop(ip); 1999 #if 1 2000 error = 0; 2001 if (error) 2002 info->error = error; 2003 #endif 2004 return(0); 2005 } 2006 2007 static 2008 int 2009 hammer2_vfs_vptofh(struct vnode *vp, struct fid *fhp) 2010 { 2011 return (0); 2012 } 2013 2014 static 2015 int 2016 hammer2_vfs_fhtovp(struct mount *mp, struct vnode *rootvp, 2017 struct fid *fhp, struct vnode **vpp) 2018 { 2019 return (0); 2020 } 2021 2022 static 2023 int 2024 hammer2_vfs_checkexp(struct mount *mp, struct sockaddr *nam, 2025 int *exflagsp, struct ucred **credanonp) 2026 { 2027 return (0); 2028 } 2029 2030 /* 2031 * Support code for hammer2_vfs_mount(). Read, verify, and install the volume 2032 * header into the HMP 2033 * 2034 * XXX read four volhdrs and use the one with the highest TID whos CRC 2035 * matches. 2036 * 2037 * XXX check iCRCs. 2038 * 2039 * XXX For filesystems w/ less than 4 volhdrs, make sure to not write to 2040 * nonexistant locations. 2041 * 2042 * XXX Record selected volhdr and ring updates to each of 4 volhdrs 2043 */ 2044 static 2045 int 2046 hammer2_install_volume_header(hammer2_dev_t *hmp) 2047 { 2048 hammer2_volume_data_t *vd; 2049 struct buf *bp; 2050 hammer2_crc32_t crc0, crc, bcrc0, bcrc; 2051 int error_reported; 2052 int error; 2053 int valid; 2054 int i; 2055 2056 error_reported = 0; 2057 error = 0; 2058 valid = 0; 2059 bp = NULL; 2060 2061 /* 2062 * There are up to 4 copies of the volume header (syncs iterate 2063 * between them so there is no single master). We don't trust the 2064 * volu_size field so we don't know precisely how large the filesystem 2065 * is, so depend on the OS to return an error if we go beyond the 2066 * block device's EOF. 2067 */ 2068 for (i = 0; i < HAMMER2_NUM_VOLHDRS; i++) { 2069 error = bread(hmp->devvp, i * HAMMER2_ZONE_BYTES64, 2070 HAMMER2_VOLUME_BYTES, &bp); 2071 if (error) { 2072 brelse(bp); 2073 bp = NULL; 2074 continue; 2075 } 2076 2077 vd = (struct hammer2_volume_data *) bp->b_data; 2078 if ((vd->magic != HAMMER2_VOLUME_ID_HBO) && 2079 (vd->magic != HAMMER2_VOLUME_ID_ABO)) { 2080 brelse(bp); 2081 bp = NULL; 2082 continue; 2083 } 2084 2085 if (vd->magic == HAMMER2_VOLUME_ID_ABO) { 2086 /* XXX: Reversed-endianness filesystem */ 2087 kprintf("hammer2: reverse-endian filesystem detected"); 2088 brelse(bp); 2089 bp = NULL; 2090 continue; 2091 } 2092 2093 crc = vd->icrc_sects[HAMMER2_VOL_ICRC_SECT0]; 2094 crc0 = hammer2_icrc32(bp->b_data + HAMMER2_VOLUME_ICRC0_OFF, 2095 HAMMER2_VOLUME_ICRC0_SIZE); 2096 bcrc = vd->icrc_sects[HAMMER2_VOL_ICRC_SECT1]; 2097 bcrc0 = hammer2_icrc32(bp->b_data + HAMMER2_VOLUME_ICRC1_OFF, 2098 HAMMER2_VOLUME_ICRC1_SIZE); 2099 if ((crc0 != crc) || (bcrc0 != bcrc)) { 2100 kprintf("hammer2 volume header crc " 2101 "mismatch copy #%d %08x/%08x\n", 2102 i, crc0, crc); 2103 error_reported = 1; 2104 brelse(bp); 2105 bp = NULL; 2106 continue; 2107 } 2108 if (valid == 0 || hmp->voldata.mirror_tid < vd->mirror_tid) { 2109 valid = 1; 2110 hmp->voldata = *vd; 2111 hmp->volhdrno = i; 2112 } 2113 brelse(bp); 2114 bp = NULL; 2115 } 2116 if (valid) { 2117 hmp->volsync = hmp->voldata; 2118 error = 0; 2119 if (error_reported || bootverbose || 1) { /* 1/DEBUG */ 2120 kprintf("hammer2: using volume header #%d\n", 2121 hmp->volhdrno); 2122 } 2123 } else { 2124 error = EINVAL; 2125 kprintf("hammer2: no valid volume headers found!\n"); 2126 } 2127 return (error); 2128 } 2129 2130 /* 2131 * This handles hysteresis on regular file flushes. Because the BIOs are 2132 * routed to a thread it is possible for an excessive number to build up 2133 * and cause long front-end stalls long before the runningbuffspace limit 2134 * is hit, so we implement hammer2_flush_pipe to control the 2135 * hysteresis. 2136 * 2137 * This is a particular problem when compression is used. 2138 */ 2139 void 2140 hammer2_lwinprog_ref(hammer2_pfs_t *pmp) 2141 { 2142 atomic_add_int(&pmp->count_lwinprog, 1); 2143 } 2144 2145 void 2146 hammer2_lwinprog_drop(hammer2_pfs_t *pmp) 2147 { 2148 int lwinprog; 2149 2150 lwinprog = atomic_fetchadd_int(&pmp->count_lwinprog, -1); 2151 if ((lwinprog & HAMMER2_LWINPROG_WAITING) && 2152 (lwinprog & HAMMER2_LWINPROG_MASK) <= hammer2_flush_pipe * 2 / 3) { 2153 atomic_clear_int(&pmp->count_lwinprog, 2154 HAMMER2_LWINPROG_WAITING); 2155 wakeup(&pmp->count_lwinprog); 2156 } 2157 if ((lwinprog & HAMMER2_LWINPROG_WAITING0) && 2158 (lwinprog & HAMMER2_LWINPROG_MASK) <= 0) { 2159 atomic_clear_int(&pmp->count_lwinprog, 2160 HAMMER2_LWINPROG_WAITING0); 2161 wakeup(&pmp->count_lwinprog); 2162 } 2163 } 2164 2165 void 2166 hammer2_lwinprog_wait(hammer2_pfs_t *pmp, int flush_pipe) 2167 { 2168 int lwinprog; 2169 int lwflag = (flush_pipe) ? HAMMER2_LWINPROG_WAITING : 2170 HAMMER2_LWINPROG_WAITING0; 2171 2172 for (;;) { 2173 lwinprog = pmp->count_lwinprog; 2174 cpu_ccfence(); 2175 if ((lwinprog & HAMMER2_LWINPROG_MASK) <= flush_pipe) 2176 break; 2177 tsleep_interlock(&pmp->count_lwinprog, 0); 2178 atomic_set_int(&pmp->count_lwinprog, lwflag); 2179 lwinprog = pmp->count_lwinprog; 2180 if ((lwinprog & HAMMER2_LWINPROG_MASK) <= flush_pipe) 2181 break; 2182 tsleep(&pmp->count_lwinprog, PINTERLOCKED, "h2wpipe", hz); 2183 } 2184 } 2185 2186 /* 2187 * Manage excessive memory resource use for chain and related 2188 * structures. 2189 */ 2190 void 2191 hammer2_pfs_memory_wait(hammer2_pfs_t *pmp) 2192 { 2193 uint32_t waiting; 2194 uint32_t count; 2195 uint32_t limit; 2196 #if 0 2197 static int zzticks; 2198 #endif 2199 2200 /* 2201 * Atomic check condition and wait. Also do an early speedup of 2202 * the syncer to try to avoid hitting the wait. 2203 */ 2204 for (;;) { 2205 waiting = pmp->inmem_dirty_chains; 2206 cpu_ccfence(); 2207 count = waiting & HAMMER2_DIRTYCHAIN_MASK; 2208 2209 limit = pmp->mp->mnt_nvnodelistsize / 10; 2210 if (limit < hammer2_limit_dirty_chains) 2211 limit = hammer2_limit_dirty_chains; 2212 if (limit < 1000) 2213 limit = 1000; 2214 2215 #if 0 2216 if ((int)(ticks - zzticks) > hz) { 2217 zzticks = ticks; 2218 kprintf("count %ld %ld\n", count, limit); 2219 } 2220 #endif 2221 2222 /* 2223 * Block if there are too many dirty chains present, wait 2224 * for the flush to clean some out. 2225 */ 2226 if (count > limit) { 2227 tsleep_interlock(&pmp->inmem_dirty_chains, 0); 2228 if (atomic_cmpset_int(&pmp->inmem_dirty_chains, 2229 waiting, 2230 waiting | HAMMER2_DIRTYCHAIN_WAITING)) { 2231 speedup_syncer(pmp->mp); 2232 tsleep(&pmp->inmem_dirty_chains, PINTERLOCKED, 2233 "chnmem", hz); 2234 } 2235 continue; /* loop on success or fail */ 2236 } 2237 2238 /* 2239 * Try to start an early flush before we are forced to block. 2240 */ 2241 if (count > limit * 7 / 10) 2242 speedup_syncer(pmp->mp); 2243 break; 2244 } 2245 } 2246 2247 void 2248 hammer2_pfs_memory_inc(hammer2_pfs_t *pmp) 2249 { 2250 if (pmp) { 2251 atomic_add_int(&pmp->inmem_dirty_chains, 1); 2252 } 2253 } 2254 2255 void 2256 hammer2_pfs_memory_wakeup(hammer2_pfs_t *pmp) 2257 { 2258 uint32_t waiting; 2259 2260 if (pmp == NULL) 2261 return; 2262 2263 for (;;) { 2264 waiting = pmp->inmem_dirty_chains; 2265 cpu_ccfence(); 2266 if (atomic_cmpset_int(&pmp->inmem_dirty_chains, 2267 waiting, 2268 (waiting - 1) & 2269 ~HAMMER2_DIRTYCHAIN_WAITING)) { 2270 break; 2271 } 2272 } 2273 2274 if (waiting & HAMMER2_DIRTYCHAIN_WAITING) 2275 wakeup(&pmp->inmem_dirty_chains); 2276 } 2277 2278 /* 2279 * Debugging 2280 */ 2281 void 2282 hammer2_dump_chain(hammer2_chain_t *chain, int tab, int *countp, char pfx) 2283 { 2284 hammer2_chain_t *scan; 2285 hammer2_chain_t *parent; 2286 2287 --*countp; 2288 if (*countp == 0) { 2289 kprintf("%*.*s...\n", tab, tab, ""); 2290 return; 2291 } 2292 if (*countp < 0) 2293 return; 2294 kprintf("%*.*s%c-chain %p.%d %016jx/%d mir=%016jx\n", 2295 tab, tab, "", pfx, 2296 chain, chain->bref.type, 2297 chain->bref.key, chain->bref.keybits, 2298 chain->bref.mirror_tid); 2299 2300 kprintf("%*.*s [%08x] (%s) refs=%d", 2301 tab, tab, "", 2302 chain->flags, 2303 ((chain->bref.type == HAMMER2_BREF_TYPE_INODE && 2304 chain->data) ? (char *)chain->data->ipdata.filename : "?"), 2305 chain->refs); 2306 2307 parent = chain->parent; 2308 if (parent) 2309 kprintf("\n%*.*s p=%p [pflags %08x prefs %d", 2310 tab, tab, "", 2311 parent, parent->flags, parent->refs); 2312 if (RB_EMPTY(&chain->core.rbtree)) { 2313 kprintf("\n"); 2314 } else { 2315 kprintf(" {\n"); 2316 RB_FOREACH(scan, hammer2_chain_tree, &chain->core.rbtree) 2317 hammer2_dump_chain(scan, tab + 4, countp, 'a'); 2318 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE && chain->data) 2319 kprintf("%*.*s}(%s)\n", tab, tab, "", 2320 chain->data->ipdata.filename); 2321 else 2322 kprintf("%*.*s}\n", tab, tab, ""); 2323 } 2324 } 2325