1 /*- 2 * Copyright (c) 2011-2013 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * by Daniel Flores (GSOC 2013 - mentored by Matthew Dillon, compression) 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 3. Neither the name of The DragonFly Project nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific, prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/kernel.h> 38 #include <sys/nlookup.h> 39 #include <sys/vnode.h> 40 #include <sys/mount.h> 41 #include <sys/fcntl.h> 42 #include <sys/buf.h> 43 #include <sys/uuid.h> 44 #include <sys/vfsops.h> 45 #include <sys/sysctl.h> 46 #include <sys/socket.h> 47 #include <sys/objcache.h> 48 49 #include <sys/proc.h> 50 #include <sys/namei.h> 51 #include <sys/mountctl.h> 52 #include <sys/dirent.h> 53 #include <sys/uio.h> 54 55 #include <sys/mutex.h> 56 #include <sys/mutex2.h> 57 58 #include "hammer2.h" 59 #include "hammer2_disk.h" 60 #include "hammer2_mount.h" 61 62 #include "hammer2.h" 63 #include "hammer2_lz4.h" 64 65 #include "zlib/hammer2_zlib.h" 66 67 #define REPORT_REFS_ERRORS 1 /* XXX remove me */ 68 69 MALLOC_DEFINE(M_OBJCACHE, "objcache", "Object Cache"); 70 71 struct hammer2_sync_info { 72 hammer2_trans_t trans; 73 int error; 74 int waitfor; 75 }; 76 77 TAILQ_HEAD(hammer2_mntlist, hammer2_mount); 78 static struct hammer2_mntlist hammer2_mntlist; 79 static struct lock hammer2_mntlk; 80 81 int hammer2_debug; 82 int hammer2_cluster_enable = 1; 83 int hammer2_hardlink_enable = 1; 84 long hammer2_iod_file_read; 85 long hammer2_iod_meta_read; 86 long hammer2_iod_indr_read; 87 long hammer2_iod_fmap_read; 88 long hammer2_iod_volu_read; 89 long hammer2_iod_file_write; 90 long hammer2_iod_meta_write; 91 long hammer2_iod_indr_write; 92 long hammer2_iod_fmap_write; 93 long hammer2_iod_volu_write; 94 long hammer2_ioa_file_read; 95 long hammer2_ioa_meta_read; 96 long hammer2_ioa_indr_read; 97 long hammer2_ioa_fmap_read; 98 long hammer2_ioa_volu_read; 99 long hammer2_ioa_fmap_write; 100 long hammer2_ioa_file_write; 101 long hammer2_ioa_meta_write; 102 long hammer2_ioa_indr_write; 103 long hammer2_ioa_volu_write; 104 105 MALLOC_DECLARE(C_BUFFER); 106 MALLOC_DEFINE(C_BUFFER, "compbuffer", "Buffer used for compression."); 107 108 MALLOC_DECLARE(D_BUFFER); 109 MALLOC_DEFINE(D_BUFFER, "decompbuffer", "Buffer used for decompression."); 110 111 MALLOC_DECLARE(W_BIOQUEUE); 112 MALLOC_DEFINE(W_BIOQUEUE, "wbioqueue", "Writing bio queue."); 113 114 MALLOC_DECLARE(W_MTX); 115 MALLOC_DEFINE(W_MTX, "wmutex", "Mutex for write thread."); 116 117 SYSCTL_NODE(_vfs, OID_AUTO, hammer2, CTLFLAG_RW, 0, "HAMMER2 filesystem"); 118 119 SYSCTL_INT(_vfs_hammer2, OID_AUTO, debug, CTLFLAG_RW, 120 &hammer2_debug, 0, ""); 121 SYSCTL_INT(_vfs_hammer2, OID_AUTO, cluster_enable, CTLFLAG_RW, 122 &hammer2_cluster_enable, 0, ""); 123 SYSCTL_INT(_vfs_hammer2, OID_AUTO, hardlink_enable, CTLFLAG_RW, 124 &hammer2_hardlink_enable, 0, ""); 125 126 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_file_read, CTLFLAG_RW, 127 &hammer2_iod_file_read, 0, ""); 128 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_meta_read, CTLFLAG_RW, 129 &hammer2_iod_meta_read, 0, ""); 130 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_indr_read, CTLFLAG_RW, 131 &hammer2_iod_indr_read, 0, ""); 132 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_fmap_read, CTLFLAG_RW, 133 &hammer2_iod_fmap_read, 0, ""); 134 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_volu_read, CTLFLAG_RW, 135 &hammer2_iod_volu_read, 0, ""); 136 137 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_file_write, CTLFLAG_RW, 138 &hammer2_iod_file_write, 0, ""); 139 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_meta_write, CTLFLAG_RW, 140 &hammer2_iod_meta_write, 0, ""); 141 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_indr_write, CTLFLAG_RW, 142 &hammer2_iod_indr_write, 0, ""); 143 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_fmap_write, CTLFLAG_RW, 144 &hammer2_iod_fmap_write, 0, ""); 145 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_volu_write, CTLFLAG_RW, 146 &hammer2_iod_volu_write, 0, ""); 147 148 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_file_read, CTLFLAG_RW, 149 &hammer2_ioa_file_read, 0, ""); 150 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_meta_read, CTLFLAG_RW, 151 &hammer2_ioa_meta_read, 0, ""); 152 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_indr_read, CTLFLAG_RW, 153 &hammer2_ioa_indr_read, 0, ""); 154 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_fmap_read, CTLFLAG_RW, 155 &hammer2_ioa_fmap_read, 0, ""); 156 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_volu_read, CTLFLAG_RW, 157 &hammer2_ioa_volu_read, 0, ""); 158 159 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_file_write, CTLFLAG_RW, 160 &hammer2_ioa_file_write, 0, ""); 161 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_meta_write, CTLFLAG_RW, 162 &hammer2_ioa_meta_write, 0, ""); 163 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_indr_write, CTLFLAG_RW, 164 &hammer2_ioa_indr_write, 0, ""); 165 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_fmap_write, CTLFLAG_RW, 166 &hammer2_ioa_fmap_write, 0, ""); 167 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_volu_write, CTLFLAG_RW, 168 &hammer2_ioa_volu_write, 0, ""); 169 170 static int hammer2_vfs_init(struct vfsconf *conf); 171 static int hammer2_vfs_uninit(struct vfsconf *vfsp); 172 static int hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data, 173 struct ucred *cred); 174 static int hammer2_remount(hammer2_mount_t *, char *, struct vnode *, 175 struct ucred *); 176 static int hammer2_vfs_unmount(struct mount *mp, int mntflags); 177 static int hammer2_vfs_root(struct mount *mp, struct vnode **vpp); 178 static int hammer2_vfs_statfs(struct mount *mp, struct statfs *sbp, 179 struct ucred *cred); 180 static int hammer2_vfs_statvfs(struct mount *mp, struct statvfs *sbp, 181 struct ucred *cred); 182 static int hammer2_vfs_sync(struct mount *mp, int waitfor); 183 static int hammer2_vfs_vget(struct mount *mp, struct vnode *dvp, 184 ino_t ino, struct vnode **vpp); 185 static int hammer2_vfs_fhtovp(struct mount *mp, struct vnode *rootvp, 186 struct fid *fhp, struct vnode **vpp); 187 static int hammer2_vfs_vptofh(struct vnode *vp, struct fid *fhp); 188 static int hammer2_vfs_checkexp(struct mount *mp, struct sockaddr *nam, 189 int *exflagsp, struct ucred **credanonp); 190 191 static int hammer2_install_volume_header(hammer2_mount_t *hmp); 192 static int hammer2_sync_scan1(struct mount *mp, struct vnode *vp, void *data); 193 static int hammer2_sync_scan2(struct mount *mp, struct vnode *vp, void *data); 194 195 static void hammer2_write_thread(void *arg); 196 197 /* 198 * Functions for compression in threads, 199 * from hammer2_vnops.c 200 */ 201 static void hammer2_write_file_core(struct buf *bp, hammer2_trans_t *trans, 202 hammer2_inode_t *ip, 203 hammer2_inode_data_t *ipdata, 204 hammer2_chain_t **parentp, 205 hammer2_key_t lbase, int ioflag, int pblksize, 206 int *errorp); 207 static void hammer2_compress_and_write(struct buf *bp, hammer2_trans_t *trans, 208 hammer2_inode_t *ip, 209 hammer2_inode_data_t *ipdata, 210 hammer2_chain_t **parentp, 211 hammer2_key_t lbase, int ioflag, 212 int pblksize, int *errorp, int comp_algo); 213 static void hammer2_zero_check_and_write(struct buf *bp, 214 hammer2_trans_t *trans, hammer2_inode_t *ip, 215 hammer2_inode_data_t *ipdata, 216 hammer2_chain_t **parentp, 217 hammer2_key_t lbase, 218 int ioflag, int pblksize, int *errorp); 219 static int test_block_zeros(const char *buf, size_t bytes); 220 static void zero_write(struct buf *bp, hammer2_trans_t *trans, 221 hammer2_inode_t *ip, 222 hammer2_inode_data_t *ipdata, 223 hammer2_chain_t **parentp, 224 hammer2_key_t lbase, 225 int *errorp); 226 static void hammer2_write_bp(hammer2_chain_t *chain, struct buf *bp, 227 int ioflag, int pblksize, int *errorp); 228 229 static int hammer2_rcvdmsg(kdmsg_msg_t *msg); 230 static void hammer2_autodmsg(kdmsg_msg_t *msg); 231 232 233 /* 234 * HAMMER2 vfs operations. 235 */ 236 static struct vfsops hammer2_vfsops = { 237 .vfs_init = hammer2_vfs_init, 238 .vfs_uninit = hammer2_vfs_uninit, 239 .vfs_sync = hammer2_vfs_sync, 240 .vfs_mount = hammer2_vfs_mount, 241 .vfs_unmount = hammer2_vfs_unmount, 242 .vfs_root = hammer2_vfs_root, 243 .vfs_statfs = hammer2_vfs_statfs, 244 .vfs_statvfs = hammer2_vfs_statvfs, 245 .vfs_vget = hammer2_vfs_vget, 246 .vfs_vptofh = hammer2_vfs_vptofh, 247 .vfs_fhtovp = hammer2_vfs_fhtovp, 248 .vfs_checkexp = hammer2_vfs_checkexp 249 }; 250 251 MALLOC_DEFINE(M_HAMMER2, "HAMMER2-mount", ""); 252 253 VFS_SET(hammer2_vfsops, hammer2, 0); 254 MODULE_VERSION(hammer2, 1); 255 256 static 257 int 258 hammer2_vfs_init(struct vfsconf *conf) 259 { 260 static struct objcache_malloc_args margs_read; 261 static struct objcache_malloc_args margs_write; 262 263 int error; 264 265 error = 0; 266 267 if (HAMMER2_BLOCKREF_BYTES != sizeof(struct hammer2_blockref)) 268 error = EINVAL; 269 if (HAMMER2_INODE_BYTES != sizeof(struct hammer2_inode_data)) 270 error = EINVAL; 271 if (HAMMER2_VOLUME_BYTES != sizeof(struct hammer2_volume_data)) 272 error = EINVAL; 273 274 if (error) 275 kprintf("HAMMER2 structure size mismatch; cannot continue.\n"); 276 277 margs_read.objsize = 65536; 278 margs_read.mtype = D_BUFFER; 279 280 margs_write.objsize = 32768; 281 margs_write.mtype = C_BUFFER; 282 283 cache_buffer_read = objcache_create(margs_read.mtype->ks_shortdesc, 284 0, 1, NULL, NULL, NULL, objcache_malloc_alloc, 285 objcache_malloc_free, &margs_read); 286 cache_buffer_write = objcache_create(margs_write.mtype->ks_shortdesc, 287 0, 1, NULL, NULL, NULL, objcache_malloc_alloc, 288 objcache_malloc_free, &margs_write); 289 290 lockinit(&hammer2_mntlk, "mntlk", 0, 0); 291 TAILQ_INIT(&hammer2_mntlist); 292 293 return (error); 294 } 295 296 static 297 int 298 hammer2_vfs_uninit(struct vfsconf *vfsp __unused) 299 { 300 objcache_destroy(cache_buffer_read); 301 objcache_destroy(cache_buffer_write); 302 return 0; 303 } 304 305 /* 306 * Mount or remount HAMMER2 fileystem from physical media 307 * 308 * mountroot 309 * mp mount point structure 310 * path NULL 311 * data <unused> 312 * cred <unused> 313 * 314 * mount 315 * mp mount point structure 316 * path path to mount point 317 * data pointer to argument structure in user space 318 * volume volume path (device@LABEL form) 319 * hflags user mount flags 320 * cred user credentials 321 * 322 * RETURNS: 0 Success 323 * !0 error number 324 */ 325 static 326 int 327 hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data, 328 struct ucred *cred) 329 { 330 struct hammer2_mount_info info; 331 hammer2_pfsmount_t *pmp; 332 hammer2_mount_t *hmp; 333 hammer2_key_t key_next; 334 hammer2_key_t key_dummy; 335 hammer2_key_t lhc; 336 struct vnode *devvp; 337 struct nlookupdata nd; 338 hammer2_chain_t *parent; 339 hammer2_chain_t *schain; 340 hammer2_chain_t *rchain; 341 struct file *fp; 342 char devstr[MNAMELEN]; 343 size_t size; 344 size_t done; 345 char *dev; 346 char *label; 347 int ronly = 1; 348 int error; 349 int cache_index; 350 int i; 351 352 hmp = NULL; 353 pmp = NULL; 354 dev = NULL; 355 label = NULL; 356 devvp = NULL; 357 cache_index = -1; 358 359 kprintf("hammer2_mount\n"); 360 361 if (path == NULL) { 362 /* 363 * Root mount 364 */ 365 bzero(&info, sizeof(info)); 366 info.cluster_fd = -1; 367 return (EOPNOTSUPP); 368 } else { 369 /* 370 * Non-root mount or updating a mount 371 */ 372 error = copyin(data, &info, sizeof(info)); 373 if (error) 374 return (error); 375 376 error = copyinstr(info.volume, devstr, MNAMELEN - 1, &done); 377 if (error) 378 return (error); 379 380 /* Extract device and label */ 381 dev = devstr; 382 label = strchr(devstr, '@'); 383 if (label == NULL || 384 ((label + 1) - dev) > done) { 385 return (EINVAL); 386 } 387 *label = '\0'; 388 label++; 389 if (*label == '\0') 390 return (EINVAL); 391 392 if (mp->mnt_flag & MNT_UPDATE) { 393 /* Update mount */ 394 /* HAMMER2 implements NFS export via mountctl */ 395 pmp = MPTOPMP(mp); 396 for (i = 0; i < pmp->cluster.nchains; ++i) { 397 hmp = pmp->cluster.chains[i]->hmp; 398 devvp = hmp->devvp; 399 error = hammer2_remount(hmp, path, devvp, cred); 400 if (error) 401 break; 402 } 403 return error; 404 } 405 } 406 407 /* 408 * PFS mount 409 * 410 * Lookup name and verify it refers to a block device. 411 */ 412 error = nlookup_init(&nd, dev, UIO_SYSSPACE, NLC_FOLLOW); 413 if (error == 0) 414 error = nlookup(&nd); 415 if (error == 0) 416 error = cache_vref(&nd.nl_nch, nd.nl_cred, &devvp); 417 nlookup_done(&nd); 418 419 if (error == 0) { 420 if (vn_isdisk(devvp, &error)) 421 error = vfs_mountedon(devvp); 422 } 423 424 /* 425 * Determine if the device has already been mounted. After this 426 * check hmp will be non-NULL if we are doing the second or more 427 * hammer2 mounts from the same device. 428 */ 429 lockmgr(&hammer2_mntlk, LK_EXCLUSIVE); 430 TAILQ_FOREACH(hmp, &hammer2_mntlist, mntentry) { 431 if (hmp->devvp == devvp) 432 break; 433 } 434 435 /* 436 * Open the device if this isn't a secondary mount and construct 437 * the H2 device mount (hmp). 438 */ 439 if (hmp == NULL) { 440 if (error == 0 && vcount(devvp) > 0) 441 error = EBUSY; 442 443 /* 444 * Now open the device 445 */ 446 if (error == 0) { 447 ronly = ((mp->mnt_flag & MNT_RDONLY) != 0); 448 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 449 error = vinvalbuf(devvp, V_SAVE, 0, 0); 450 if (error == 0) { 451 error = VOP_OPEN(devvp, 452 ronly ? FREAD : FREAD | FWRITE, 453 FSCRED, NULL); 454 } 455 vn_unlock(devvp); 456 } 457 if (error && devvp) { 458 vrele(devvp); 459 devvp = NULL; 460 } 461 if (error) { 462 lockmgr(&hammer2_mntlk, LK_RELEASE); 463 return error; 464 } 465 hmp = kmalloc(sizeof(*hmp), M_HAMMER2, M_WAITOK | M_ZERO); 466 hmp->ronly = ronly; 467 hmp->devvp = devvp; 468 kmalloc_create(&hmp->mchain, "HAMMER2-chains"); 469 TAILQ_INSERT_TAIL(&hammer2_mntlist, hmp, mntentry); 470 471 lockinit(&hmp->alloclk, "h2alloc", 0, 0); 472 lockinit(&hmp->voldatalk, "voldata", 0, LK_CANRECURSE); 473 TAILQ_INIT(&hmp->transq); 474 475 /* 476 * vchain setup. vchain.data is embedded. 477 * vchain.refs is initialized and will never drop to 0. 478 */ 479 hmp->vchain.hmp = hmp; 480 hmp->vchain.refs = 1; 481 hmp->vchain.data = (void *)&hmp->voldata; 482 hmp->vchain.bref.type = HAMMER2_BREF_TYPE_VOLUME; 483 hmp->vchain.bref.data_off = 0 | HAMMER2_PBUFRADIX; 484 hmp->vchain.delete_tid = HAMMER2_MAX_TID; 485 hammer2_chain_core_alloc(NULL, &hmp->vchain, NULL); 486 /* hmp->vchain.u.xxx is left NULL */ 487 488 /* 489 * fchain setup. fchain.data is embedded. 490 * fchain.refs is initialized and will never drop to 0. 491 * 492 * The data is not used but needs to be initialized to 493 * pass assertion muster. We use this chain primarily 494 * as a placeholder for the freemap's top-level RBTREE 495 * so it does not interfere with the volume's topology 496 * RBTREE. 497 */ 498 hmp->fchain.hmp = hmp; 499 hmp->fchain.refs = 1; 500 hmp->fchain.data = (void *)&hmp->voldata.freemap_blockset; 501 hmp->fchain.bref.type = HAMMER2_BREF_TYPE_FREEMAP; 502 hmp->fchain.bref.data_off = 0 | HAMMER2_PBUFRADIX; 503 hmp->fchain.bref.methods = 504 HAMMER2_ENC_CHECK(HAMMER2_CHECK_FREEMAP) | 505 HAMMER2_ENC_COMP(HAMMER2_COMP_NONE); 506 hmp->fchain.delete_tid = HAMMER2_MAX_TID; 507 508 hammer2_chain_core_alloc(NULL, &hmp->fchain, NULL); 509 /* hmp->fchain.u.xxx is left NULL */ 510 511 /* 512 * Install the volume header 513 */ 514 error = hammer2_install_volume_header(hmp); 515 if (error) { 516 hammer2_vfs_unmount(mp, MNT_FORCE); 517 return error; 518 } 519 520 /* 521 * First locate the super-root inode, which is key 0 522 * relative to the volume header's blockset. 523 * 524 * Then locate the root inode by scanning the directory keyspace 525 * represented by the label. 526 */ 527 parent = hammer2_chain_lookup_init(&hmp->vchain, 0); 528 schain = hammer2_chain_lookup(&parent, &key_dummy, 529 HAMMER2_SROOT_KEY, HAMMER2_SROOT_KEY, 530 &cache_index, 0); 531 hammer2_chain_lookup_done(parent); 532 if (schain == NULL) { 533 kprintf("hammer2_mount: invalid super-root\n"); 534 hammer2_vfs_unmount(mp, MNT_FORCE); 535 return EINVAL; 536 } 537 538 /* 539 * NOTE: inode_get sucks up schain's lock. 540 */ 541 atomic_set_int(&schain->flags, HAMMER2_CHAIN_PFSROOT); 542 hmp->sroot = hammer2_inode_get(NULL, NULL, schain); 543 hammer2_inode_ref(hmp->sroot); 544 hammer2_inode_unlock_ex(hmp->sroot, schain); 545 schain = NULL; 546 /* leave hmp->sroot with one ref */ 547 } 548 549 /* 550 * Block device opened successfully, finish initializing the 551 * mount structure. 552 * 553 * From this point on we have to call hammer2_unmount() on failure. 554 */ 555 pmp = kmalloc(sizeof(*pmp), M_HAMMER2, M_WAITOK | M_ZERO); 556 557 kmalloc_create(&pmp->minode, "HAMMER2-inodes"); 558 kmalloc_create(&pmp->mmsg, "HAMMER2-pfsmsg"); 559 560 spin_init(&pmp->inum_spin); 561 RB_INIT(&pmp->inum_tree); 562 563 kdmsg_iocom_init(&pmp->iocom, pmp, 564 KDMSG_IOCOMF_AUTOCONN | 565 KDMSG_IOCOMF_AUTOSPAN | 566 KDMSG_IOCOMF_AUTOCIRC, 567 pmp->mmsg, hammer2_rcvdmsg); 568 569 ccms_domain_init(&pmp->ccms_dom); 570 ++hmp->pmp_count; 571 lockmgr(&hammer2_mntlk, LK_RELEASE); 572 kprintf("hammer2_mount hmp=%p pmp=%p pmpcnt=%d\n", 573 hmp, pmp, hmp->pmp_count); 574 575 mp->mnt_flag = MNT_LOCAL; 576 mp->mnt_kern_flag |= MNTK_ALL_MPSAFE; /* all entry pts are SMP */ 577 578 /* 579 * required mount structure initializations 580 */ 581 mp->mnt_stat.f_iosize = HAMMER2_PBUFSIZE; 582 mp->mnt_stat.f_bsize = HAMMER2_PBUFSIZE; 583 584 mp->mnt_vstat.f_frsize = HAMMER2_PBUFSIZE; 585 mp->mnt_vstat.f_bsize = HAMMER2_PBUFSIZE; 586 587 /* 588 * Optional fields 589 */ 590 mp->mnt_iosize_max = MAXPHYS; 591 mp->mnt_data = (qaddr_t)pmp; 592 pmp->mp = mp; 593 594 /* 595 * Lookup mount point under the media-localized super-root. 596 */ 597 parent = hammer2_inode_lock_ex(hmp->sroot); 598 lhc = hammer2_dirhash(label, strlen(label)); 599 rchain = hammer2_chain_lookup(&parent, &key_next, 600 lhc, lhc + HAMMER2_DIRHASH_LOMASK, 601 &cache_index, 0); 602 while (rchain) { 603 if (rchain->bref.type == HAMMER2_BREF_TYPE_INODE && 604 strcmp(label, rchain->data->ipdata.filename) == 0) { 605 break; 606 } 607 rchain = hammer2_chain_next(&parent, rchain, &key_next, 608 key_next, 609 lhc + HAMMER2_DIRHASH_LOMASK, 610 &cache_index, 0); 611 } 612 hammer2_inode_unlock_ex(hmp->sroot, parent); 613 614 if (rchain == NULL) { 615 kprintf("hammer2_mount: PFS label not found\n"); 616 --hmp->pmp_count; 617 hammer2_vfs_unmount(mp, MNT_FORCE); 618 return EINVAL; 619 } 620 if (rchain->flags & HAMMER2_CHAIN_MOUNTED) { 621 hammer2_chain_unlock(rchain); 622 kprintf("hammer2_mount: PFS label already mounted!\n"); 623 --hmp->pmp_count; 624 hammer2_vfs_unmount(mp, MNT_FORCE); 625 return EBUSY; 626 } 627 #if 0 628 if (rchain->flags & HAMMER2_CHAIN_RECYCLE) { 629 kprintf("hammer2_mount: PFS label currently recycling\n"); 630 --hmp->pmp_count; 631 hammer2_vfs_unmount(mp, MNT_FORCE); 632 return EBUSY; 633 } 634 #endif 635 636 atomic_set_int(&rchain->flags, HAMMER2_CHAIN_MOUNTED); 637 638 /* 639 * NOTE: *_get() integrates chain's lock into the inode lock. 640 */ 641 hammer2_chain_ref(rchain); /* for pmp->rchain */ 642 pmp->cluster.nchains = 1; 643 pmp->cluster.chains[0] = rchain; 644 pmp->iroot = hammer2_inode_get(pmp, NULL, rchain); 645 hammer2_inode_ref(pmp->iroot); /* ref for pmp->iroot */ 646 647 KKASSERT(rchain->pmp == NULL); /* tracking pmp for rchain */ 648 rchain->pmp = pmp; 649 atomic_add_long(&pmp->inmem_chains, 1); 650 651 hammer2_inode_unlock_ex(pmp->iroot, rchain); 652 653 kprintf("iroot %p\n", pmp->iroot); 654 655 /* 656 * The logical file buffer bio write thread handles things 657 * like physical block assignment and compression. 658 */ 659 mtx_init(&pmp->wthread_mtx); 660 bioq_init(&pmp->wthread_bioq); 661 pmp->wthread_destroy = 0; 662 lwkt_create(hammer2_write_thread, pmp, 663 &pmp->wthread_td, NULL, 0, -1, "hwrite-%s", label); 664 665 /* 666 * Ref the cluster management messaging descriptor. The mount 667 * program deals with the other end of the communications pipe. 668 */ 669 fp = holdfp(curproc->p_fd, info.cluster_fd, -1); 670 if (fp == NULL) { 671 kprintf("hammer2_mount: bad cluster_fd!\n"); 672 hammer2_vfs_unmount(mp, MNT_FORCE); 673 return EBADF; 674 } 675 hammer2_cluster_reconnect(pmp, fp); 676 677 /* 678 * Finish setup 679 */ 680 vfs_getnewfsid(mp); 681 vfs_add_vnodeops(mp, &hammer2_vnode_vops, &mp->mnt_vn_norm_ops); 682 vfs_add_vnodeops(mp, &hammer2_spec_vops, &mp->mnt_vn_spec_ops); 683 vfs_add_vnodeops(mp, &hammer2_fifo_vops, &mp->mnt_vn_fifo_ops); 684 685 copyinstr(info.volume, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); 686 bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); 687 bzero(mp->mnt_stat.f_mntonname, sizeof(mp->mnt_stat.f_mntonname)); 688 copyinstr(path, mp->mnt_stat.f_mntonname, 689 sizeof(mp->mnt_stat.f_mntonname) - 1, 690 &size); 691 692 /* 693 * Initial statfs to prime mnt_stat. 694 */ 695 hammer2_vfs_statfs(mp, &mp->mnt_stat, cred); 696 697 return 0; 698 } 699 700 /* 701 * Handle bioq for strategy write 702 */ 703 static 704 void 705 hammer2_write_thread(void *arg) 706 { 707 hammer2_pfsmount_t *pmp; 708 struct bio *bio; 709 struct buf *bp; 710 hammer2_trans_t trans; 711 struct vnode *vp; 712 hammer2_inode_t *last_ip; 713 hammer2_inode_t *ip; 714 hammer2_chain_t *parent; 715 hammer2_chain_t **parentp; 716 hammer2_inode_data_t *ipdata; 717 hammer2_key_t lbase; 718 int lblksize; 719 int pblksize; 720 int error; 721 722 pmp = arg; 723 724 mtx_lock(&pmp->wthread_mtx); 725 while (pmp->wthread_destroy == 0) { 726 if (bioq_first(&pmp->wthread_bioq) == NULL) { 727 mtxsleep(&pmp->wthread_bioq, &pmp->wthread_mtx, 728 0, "h2bioqw", 0); 729 } 730 last_ip = NULL; 731 parent = NULL; 732 parentp = &parent; 733 734 while ((bio = bioq_takefirst(&pmp->wthread_bioq)) != NULL) { 735 mtx_unlock(&pmp->wthread_mtx); 736 737 error = 0; 738 bp = bio->bio_buf; 739 vp = bp->b_vp; 740 ip = VTOI(vp); 741 742 /* 743 * Cache transaction for multi-buffer flush efficiency. 744 * Lock the ip separately for each buffer to allow 745 * interleaving with frontend writes. 746 */ 747 if (last_ip != ip) { 748 if (last_ip) 749 hammer2_trans_done(&trans); 750 hammer2_trans_init(&trans, ip->pmp, 751 HAMMER2_TRANS_BUFCACHE); 752 last_ip = ip; 753 } 754 parent = hammer2_inode_lock_ex(ip); 755 756 /* 757 * Inode is modified, flush size and mtime changes 758 * to ensure that the file size remains consistent 759 * with the buffers being flushed. 760 */ 761 if (ip->flags & (HAMMER2_INODE_RESIZED | 762 HAMMER2_INODE_MTIME)) { 763 hammer2_inode_fsync(&trans, ip, parentp); 764 } 765 ipdata = hammer2_chain_modify_ip(&trans, ip, 766 parentp, 0); 767 lblksize = hammer2_calc_logical(ip, bio->bio_offset, 768 &lbase, NULL); 769 pblksize = hammer2_calc_physical(ip, lbase); 770 hammer2_write_file_core(bp, &trans, ip, ipdata, 771 parentp, 772 lbase, IO_ASYNC, 773 pblksize, &error); 774 hammer2_inode_unlock_ex(ip, parent); 775 if (error) { 776 kprintf("hammer2: error in buffer write\n"); 777 bp->b_flags |= B_ERROR; 778 bp->b_error = EIO; 779 } 780 biodone(bio); 781 mtx_lock(&pmp->wthread_mtx); 782 } 783 784 /* 785 * Clean out transaction cache 786 */ 787 if (last_ip) 788 hammer2_trans_done(&trans); 789 } 790 pmp->wthread_destroy = -1; 791 wakeup(&pmp->wthread_destroy); 792 793 mtx_unlock(&pmp->wthread_mtx); 794 } 795 796 /* 797 * Return a chain suitable for I/O, creating the chain if necessary 798 * and assigning its physical block. 799 */ 800 static 801 hammer2_chain_t * 802 hammer2_assign_physical(hammer2_trans_t *trans, 803 hammer2_inode_t *ip, hammer2_chain_t **parentp, 804 hammer2_key_t lbase, int pblksize, int *errorp) 805 { 806 hammer2_chain_t *parent; 807 hammer2_chain_t *chain; 808 hammer2_off_t pbase; 809 hammer2_key_t key_dummy; 810 int pradix = hammer2_getradix(pblksize); 811 int cache_index = -1; 812 813 /* 814 * Locate the chain associated with lbase, return a locked chain. 815 * However, do not instantiate any data reference (which utilizes a 816 * device buffer) because we will be using direct IO via the 817 * logical buffer cache buffer. 818 */ 819 *errorp = 0; 820 KKASSERT(pblksize >= HAMMER2_MIN_ALLOC); 821 retry: 822 parent = *parentp; 823 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); /* extra lock */ 824 chain = hammer2_chain_lookup(&parent, &key_dummy, 825 lbase, lbase, 826 &cache_index, HAMMER2_LOOKUP_NODATA); 827 828 if (chain == NULL) { 829 /* 830 * We found a hole, create a new chain entry. 831 * 832 * NOTE: DATA chains are created without device backing 833 * store (nor do we want any). 834 */ 835 *errorp = hammer2_chain_create(trans, &parent, &chain, 836 lbase, HAMMER2_PBUFRADIX, 837 HAMMER2_BREF_TYPE_DATA, 838 pblksize); 839 if (chain == NULL) { 840 hammer2_chain_lookup_done(parent); 841 panic("hammer2_chain_create: par=%p error=%d\n", 842 parent, *errorp); 843 goto retry; 844 } 845 846 pbase = chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX; 847 /*ip->delta_dcount += pblksize;*/ 848 } else { 849 switch (chain->bref.type) { 850 case HAMMER2_BREF_TYPE_INODE: 851 /* 852 * The data is embedded in the inode. The 853 * caller is responsible for marking the inode 854 * modified and copying the data to the embedded 855 * area. 856 */ 857 pbase = NOOFFSET; 858 break; 859 case HAMMER2_BREF_TYPE_DATA: 860 if (chain->bytes != pblksize) { 861 hammer2_chain_resize(trans, ip, 862 parent, &chain, 863 pradix, 864 HAMMER2_MODIFY_OPTDATA); 865 } 866 hammer2_chain_modify(trans, &chain, 867 HAMMER2_MODIFY_OPTDATA); 868 pbase = chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX; 869 break; 870 default: 871 panic("hammer2_assign_physical: bad type"); 872 /* NOT REACHED */ 873 pbase = NOOFFSET; 874 break; 875 } 876 } 877 878 /* 879 * Cleanup. If chain wound up being the inode (i.e. DIRECTDATA), 880 * we might have to replace *parentp. 881 */ 882 hammer2_chain_lookup_done(parent); 883 if (chain) { 884 if (*parentp != chain && 885 (*parentp)->core == chain->core) { 886 parent = *parentp; 887 *parentp = chain; /* eats lock */ 888 hammer2_chain_unlock(parent); 889 hammer2_chain_lock(chain, 0); /* need another */ 890 } 891 /* else chain already locked for return */ 892 } 893 return (chain); 894 } 895 896 /* 897 * From hammer2_vnops.c. 898 * The core write function which determines which path to take 899 * depending on compression settings. 900 */ 901 static 902 void 903 hammer2_write_file_core(struct buf *bp, hammer2_trans_t *trans, 904 hammer2_inode_t *ip, hammer2_inode_data_t *ipdata, 905 hammer2_chain_t **parentp, 906 hammer2_key_t lbase, int ioflag, int pblksize, 907 int *errorp) 908 { 909 hammer2_chain_t *chain; 910 911 switch(HAMMER2_DEC_COMP(ipdata->comp_algo)) { 912 case HAMMER2_COMP_NONE: 913 /* 914 * We have to assign physical storage to the buffer 915 * we intend to dirty or write now to avoid deadlocks 916 * in the strategy code later. 917 * 918 * This can return NOOFFSET for inode-embedded data. 919 * The strategy code will take care of it in that case. 920 */ 921 chain = hammer2_assign_physical(trans, ip, parentp, 922 lbase, pblksize, 923 errorp); 924 hammer2_write_bp(chain, bp, ioflag, pblksize, errorp); 925 if (chain) 926 hammer2_chain_unlock(chain); 927 break; 928 case HAMMER2_COMP_AUTOZERO: 929 /* 930 * Check for zero-fill only 931 */ 932 hammer2_zero_check_and_write(bp, trans, ip, 933 ipdata, parentp, lbase, 934 ioflag, pblksize, errorp); 935 break; 936 case HAMMER2_COMP_LZ4: 937 case HAMMER2_COMP_ZLIB: 938 default: 939 /* 940 * Check for zero-fill and attempt compression. 941 */ 942 hammer2_compress_and_write(bp, trans, ip, 943 ipdata, parentp, 944 lbase, ioflag, 945 pblksize, errorp, 946 ipdata->comp_algo); 947 break; 948 } 949 ipdata = &ip->chain->data->ipdata; /* reload */ 950 } 951 952 /* 953 * From hammer2_vnops.c 954 * Generic function that will perform the compression in compression 955 * write path. The compression algorithm is determined by the settings 956 * obtained from inode. 957 */ 958 static 959 void 960 hammer2_compress_and_write(struct buf *bp, hammer2_trans_t *trans, 961 hammer2_inode_t *ip, hammer2_inode_data_t *ipdata, 962 hammer2_chain_t **parentp, 963 hammer2_key_t lbase, int ioflag, int pblksize, 964 int *errorp, int comp_algo) 965 { 966 hammer2_chain_t *chain; 967 int comp_size; 968 int comp_block_size; 969 char *comp_buffer; 970 971 if (test_block_zeros(bp->b_data, pblksize)) { 972 zero_write(bp, trans, ip, ipdata, parentp, lbase, errorp); 973 return; 974 } 975 976 comp_size = 0; 977 comp_buffer = NULL; 978 979 KKASSERT(pblksize / 2 <= 32768); 980 981 if (ip->comp_heuristic < 8 || (ip->comp_heuristic & 7) == 0) { 982 z_stream strm_compress; 983 int comp_level; 984 int ret; 985 986 switch(HAMMER2_DEC_COMP(comp_algo)) { 987 case HAMMER2_COMP_LZ4: 988 comp_buffer = objcache_get(cache_buffer_write, 989 M_INTWAIT); 990 comp_size = LZ4_compress_limitedOutput( 991 bp->b_data, 992 &comp_buffer[sizeof(int)], 993 pblksize, 994 pblksize / 2 - sizeof(int)); 995 /* 996 * We need to prefix with the size, LZ4 997 * doesn't do it for us. Add the related 998 * overhead. 999 */ 1000 *(int *)comp_buffer = comp_size; 1001 if (comp_size) 1002 comp_size += sizeof(int); 1003 break; 1004 case HAMMER2_COMP_ZLIB: 1005 comp_level = HAMMER2_DEC_LEVEL(comp_algo); 1006 if (comp_level == 0) 1007 comp_level = 6; /* default zlib compression */ 1008 else if (comp_level < 6) 1009 comp_level = 6; 1010 else if (comp_level > 9) 1011 comp_level = 9; 1012 ret = deflateInit(&strm_compress, comp_level); 1013 if (ret != Z_OK) { 1014 kprintf("HAMMER2 ZLIB: fatal error " 1015 "on deflateInit.\n"); 1016 } 1017 1018 comp_buffer = objcache_get(cache_buffer_write, 1019 M_INTWAIT); 1020 strm_compress.next_in = bp->b_data; 1021 strm_compress.avail_in = pblksize; 1022 strm_compress.next_out = comp_buffer; 1023 strm_compress.avail_out = pblksize / 2; 1024 ret = deflate(&strm_compress, Z_FINISH); 1025 if (ret == Z_STREAM_END) { 1026 comp_size = pblksize / 2 - 1027 strm_compress.avail_out; 1028 } else { 1029 comp_size = 0; 1030 } 1031 ret = deflateEnd(&strm_compress); 1032 break; 1033 default: 1034 kprintf("Error: Unknown compression method.\n"); 1035 kprintf("Comp_method = %d.\n", comp_algo); 1036 break; 1037 } 1038 } 1039 1040 if (comp_size == 0) { 1041 /* 1042 * compression failed or turned off 1043 */ 1044 comp_block_size = pblksize; /* safety */ 1045 if (++ip->comp_heuristic > 128) 1046 ip->comp_heuristic = 8; 1047 } else { 1048 /* 1049 * compression succeeded 1050 */ 1051 ip->comp_heuristic = 0; 1052 if (comp_size <= 1024) { 1053 comp_block_size = 1024; 1054 } else if (comp_size <= 2048) { 1055 comp_block_size = 2048; 1056 } else if (comp_size <= 4096) { 1057 comp_block_size = 4096; 1058 } else if (comp_size <= 8192) { 1059 comp_block_size = 8192; 1060 } else if (comp_size <= 16384) { 1061 comp_block_size = 16384; 1062 } else if (comp_size <= 32768) { 1063 comp_block_size = 32768; 1064 } else { 1065 panic("hammer2: WRITE PATH: " 1066 "Weird comp_size value."); 1067 /* NOT REACHED */ 1068 comp_block_size = pblksize; 1069 } 1070 } 1071 1072 chain = hammer2_assign_physical(trans, ip, parentp, 1073 lbase, comp_block_size, 1074 errorp); 1075 ipdata = &ip->chain->data->ipdata; /* RELOAD */ 1076 1077 if (*errorp) { 1078 kprintf("WRITE PATH: An error occurred while " 1079 "assigning physical space.\n"); 1080 KKASSERT(chain == NULL); 1081 } else { 1082 /* Get device offset */ 1083 hammer2_off_t pbase; 1084 hammer2_off_t pmask; 1085 hammer2_off_t peof; 1086 size_t boff; 1087 size_t psize; 1088 struct buf *dbp; 1089 int temp_check; 1090 1091 KKASSERT(chain->flags & HAMMER2_CHAIN_MODIFIED); 1092 1093 switch(chain->bref.type) { 1094 case HAMMER2_BREF_TYPE_INODE: 1095 KKASSERT(chain->data->ipdata.op_flags & 1096 HAMMER2_OPFLAG_DIRECTDATA); 1097 KKASSERT(bp->b_loffset == 0); 1098 bcopy(bp->b_data, chain->data->ipdata.u.data, 1099 HAMMER2_EMBEDDED_BYTES); 1100 break; 1101 case HAMMER2_BREF_TYPE_DATA: 1102 psize = hammer2_devblksize(chain->bytes); 1103 pmask = (hammer2_off_t)psize - 1; 1104 pbase = chain->bref.data_off & ~pmask; 1105 boff = chain->bref.data_off & 1106 (HAMMER2_OFF_MASK & pmask); 1107 peof = (pbase + HAMMER2_SEGMASK64) & 1108 ~HAMMER2_SEGMASK64; 1109 temp_check = HAMMER2_DEC_CHECK(chain->bref.methods); 1110 1111 /* 1112 * Optimize out the read-before-write 1113 * if possible. 1114 */ 1115 if (comp_block_size == psize) { 1116 dbp = getblk(chain->hmp->devvp, pbase, 1117 psize, 0, 0); 1118 } else { 1119 *errorp = bread(chain->hmp->devvp, 1120 pbase, psize, &dbp); 1121 if (*errorp) { 1122 kprintf("hammer2: WRITE PATH: " 1123 "dbp bread error\n"); 1124 break; 1125 } 1126 } 1127 1128 /* 1129 * When loading the block make sure we don't 1130 * leave garbage after the compressed data. 1131 */ 1132 if (comp_size) { 1133 chain->bref.methods = 1134 HAMMER2_ENC_COMP(comp_algo) + 1135 HAMMER2_ENC_CHECK(temp_check); 1136 bcopy(comp_buffer, dbp->b_data + boff, 1137 comp_size); 1138 if (comp_size != comp_block_size) { 1139 bzero(dbp->b_data + boff + 1140 comp_size, 1141 comp_block_size - 1142 comp_size); 1143 } 1144 } else { 1145 chain->bref.methods = 1146 HAMMER2_ENC_COMP( 1147 HAMMER2_COMP_NONE) + 1148 HAMMER2_ENC_CHECK(temp_check); 1149 bcopy(bp->b_data, dbp->b_data + boff, 1150 pblksize); 1151 } 1152 1153 /* 1154 * Device buffer is now valid, chain is no 1155 * longer in the initial state. 1156 */ 1157 atomic_clear_int(&chain->flags, 1158 HAMMER2_CHAIN_INITIAL); 1159 1160 /* Now write the related bdp. */ 1161 if (ioflag & IO_SYNC) { 1162 /* 1163 * Synchronous I/O requested. 1164 */ 1165 bwrite(dbp); 1166 /* 1167 } else if ((ioflag & IO_DIRECT) && 1168 loff + n == pblksize) { 1169 bdwrite(dbp); 1170 */ 1171 } else if (ioflag & IO_ASYNC) { 1172 bawrite(dbp); 1173 } else if (hammer2_cluster_enable) { 1174 cluster_write(dbp, peof, 1175 HAMMER2_PBUFSIZE, 1176 4/*XXX*/); 1177 } else { 1178 bdwrite(dbp); 1179 } 1180 break; 1181 default: 1182 panic("hammer2_write_bp: bad chain type %d\n", 1183 chain->bref.type); 1184 /* NOT REACHED */ 1185 break; 1186 } 1187 1188 hammer2_chain_unlock(chain); 1189 } 1190 if (comp_buffer) 1191 objcache_put(cache_buffer_write, comp_buffer); 1192 } 1193 1194 /* 1195 * Function that performs zero-checking and writing without compression, 1196 * it corresponds to default zero-checking path. 1197 */ 1198 static 1199 void 1200 hammer2_zero_check_and_write(struct buf *bp, hammer2_trans_t *trans, 1201 hammer2_inode_t *ip, hammer2_inode_data_t *ipdata, 1202 hammer2_chain_t **parentp, 1203 hammer2_key_t lbase, int ioflag, int pblksize, int *errorp) 1204 { 1205 hammer2_chain_t *chain; 1206 1207 if (test_block_zeros(bp->b_data, pblksize)) { 1208 zero_write(bp, trans, ip, ipdata, parentp, lbase, errorp); 1209 } else { 1210 chain = hammer2_assign_physical(trans, ip, parentp, 1211 lbase, pblksize, errorp); 1212 hammer2_write_bp(chain, bp, ioflag, pblksize, errorp); 1213 if (chain) 1214 hammer2_chain_unlock(chain); 1215 } 1216 } 1217 1218 /* 1219 * A function to test whether a block of data contains only zeros, 1220 * returns TRUE (non-zero) if the block is all zeros. 1221 */ 1222 static 1223 int 1224 test_block_zeros(const char *buf, size_t bytes) 1225 { 1226 size_t i; 1227 1228 for (i = 0; i < bytes; i += sizeof(long)) { 1229 if (*(const long *)(buf + i) != 0) 1230 return (0); 1231 } 1232 return (1); 1233 } 1234 1235 /* 1236 * Function to "write" a block that contains only zeros. 1237 */ 1238 static 1239 void 1240 zero_write(struct buf *bp, hammer2_trans_t *trans, hammer2_inode_t *ip, 1241 hammer2_inode_data_t *ipdata, hammer2_chain_t **parentp, 1242 hammer2_key_t lbase, int *errorp __unused) 1243 { 1244 hammer2_chain_t *parent; 1245 hammer2_chain_t *chain; 1246 hammer2_key_t key_dummy; 1247 int cache_index = -1; 1248 1249 parent = hammer2_chain_lookup_init(*parentp, 0); 1250 1251 chain = hammer2_chain_lookup(&parent, &key_dummy, lbase, lbase, 1252 &cache_index, HAMMER2_LOOKUP_NODATA); 1253 if (chain) { 1254 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 1255 bzero(chain->data->ipdata.u.data, 1256 HAMMER2_EMBEDDED_BYTES); 1257 } else { 1258 hammer2_chain_delete(trans, chain, 0); 1259 } 1260 hammer2_chain_unlock(chain); 1261 } 1262 hammer2_chain_lookup_done(parent); 1263 } 1264 1265 /* 1266 * Function to write the data as it is, without performing any sort of 1267 * compression. This function is used in path without compression and 1268 * default zero-checking path. 1269 */ 1270 static 1271 void 1272 hammer2_write_bp(hammer2_chain_t *chain, struct buf *bp, int ioflag, 1273 int pblksize, int *errorp) 1274 { 1275 hammer2_off_t pbase; 1276 hammer2_off_t pmask; 1277 hammer2_off_t peof; 1278 struct buf *dbp; 1279 size_t boff; 1280 size_t psize; 1281 int error; 1282 int temp_check = HAMMER2_DEC_CHECK(chain->bref.methods); 1283 1284 KKASSERT(chain->flags & HAMMER2_CHAIN_MODIFIED); 1285 1286 switch(chain->bref.type) { 1287 case HAMMER2_BREF_TYPE_INODE: 1288 KKASSERT(chain->data->ipdata.op_flags & 1289 HAMMER2_OPFLAG_DIRECTDATA); 1290 KKASSERT(bp->b_loffset == 0); 1291 bcopy(bp->b_data, chain->data->ipdata.u.data, 1292 HAMMER2_EMBEDDED_BYTES); 1293 error = 0; 1294 break; 1295 case HAMMER2_BREF_TYPE_DATA: 1296 psize = hammer2_devblksize(chain->bytes); 1297 pmask = (hammer2_off_t)psize - 1; 1298 pbase = chain->bref.data_off & ~pmask; 1299 boff = chain->bref.data_off & (HAMMER2_OFF_MASK & pmask); 1300 peof = (pbase + HAMMER2_SEGMASK64) & ~HAMMER2_SEGMASK64; 1301 1302 if (psize == pblksize) { 1303 dbp = getblk(chain->hmp->devvp, pbase, 1304 psize, 0, 0); 1305 error = 0; 1306 } else { 1307 error = bread(chain->hmp->devvp, pbase, psize, &dbp); 1308 if (error) { 1309 kprintf("hammer2: WRITE PATH: " 1310 "dbp bread error\n"); 1311 break; 1312 } 1313 } 1314 1315 chain->bref.methods = HAMMER2_ENC_COMP(HAMMER2_COMP_NONE) + 1316 HAMMER2_ENC_CHECK(temp_check); 1317 bcopy(bp->b_data, dbp->b_data + boff, chain->bytes); 1318 1319 /* 1320 * Device buffer is now valid, chain is no 1321 * longer in the initial state. 1322 */ 1323 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 1324 1325 if (ioflag & IO_SYNC) { 1326 /* 1327 * Synchronous I/O requested. 1328 */ 1329 bwrite(dbp); 1330 /* 1331 } else if ((ioflag & IO_DIRECT) && loff + n == pblksize) { 1332 bdwrite(dbp); 1333 */ 1334 } else if (ioflag & IO_ASYNC) { 1335 bawrite(dbp); 1336 } else if (hammer2_cluster_enable) { 1337 cluster_write(dbp, peof, HAMMER2_PBUFSIZE, 4/*XXX*/); 1338 } else { 1339 bdwrite(dbp); 1340 } 1341 break; 1342 default: 1343 panic("hammer2_write_bp: bad chain type %d\n", 1344 chain->bref.type); 1345 /* NOT REACHED */ 1346 error = 0; 1347 break; 1348 } 1349 *errorp = error; 1350 } 1351 1352 static 1353 int 1354 hammer2_remount(hammer2_mount_t *hmp, char *path, struct vnode *devvp, 1355 struct ucred *cred) 1356 { 1357 return (0); 1358 } 1359 1360 static 1361 int 1362 hammer2_vfs_unmount(struct mount *mp, int mntflags) 1363 { 1364 hammer2_pfsmount_t *pmp; 1365 hammer2_mount_t *hmp; 1366 hammer2_chain_t *rchain; 1367 int flags; 1368 int error = 0; 1369 int ronly = ((mp->mnt_flag & MNT_RDONLY) != 0); 1370 int dumpcnt; 1371 int i; 1372 struct vnode *devvp; 1373 1374 pmp = MPTOPMP(mp); 1375 1376 ccms_domain_uninit(&pmp->ccms_dom); 1377 kdmsg_iocom_uninit(&pmp->iocom); /* XXX chain dependency */ 1378 1379 lockmgr(&hammer2_mntlk, LK_EXCLUSIVE); 1380 1381 /* 1382 * If mount initialization proceeded far enough we must flush 1383 * its vnodes. 1384 */ 1385 if (mntflags & MNT_FORCE) 1386 flags = FORCECLOSE; 1387 else 1388 flags = 0; 1389 if (pmp->iroot) { 1390 error = vflush(mp, 0, flags); 1391 if (error) 1392 goto failed; 1393 } 1394 1395 if (pmp->wthread_td) { 1396 mtx_lock(&pmp->wthread_mtx); 1397 pmp->wthread_destroy = 1; 1398 wakeup(&pmp->wthread_bioq); 1399 while (pmp->wthread_destroy != -1) { 1400 mtxsleep(&pmp->wthread_destroy, 1401 &pmp->wthread_mtx, 0, 1402 "umount-sleep", 0); 1403 } 1404 mtx_unlock(&pmp->wthread_mtx); 1405 pmp->wthread_td = NULL; 1406 } 1407 1408 for (i = 0; i < pmp->cluster.nchains; ++i) { 1409 hmp = pmp->cluster.chains[i]->hmp; 1410 1411 hammer2_mount_exlock(hmp); 1412 1413 --hmp->pmp_count; 1414 kprintf("hammer2_unmount hmp=%p pmpcnt=%d\n", 1415 hmp, hmp->pmp_count); 1416 1417 /* 1418 * Flush any left over chains. The voldata lock is only used 1419 * to synchronize against HAMMER2_CHAIN_MODIFIED_AUX. 1420 */ 1421 hammer2_voldata_lock(hmp); 1422 if ((hmp->vchain.flags | hmp->fchain.flags) & 1423 (HAMMER2_CHAIN_MODIFIED | HAMMER2_CHAIN_SUBMODIFIED)) { 1424 hammer2_voldata_unlock(hmp, 0); 1425 hammer2_vfs_sync(mp, MNT_WAIT); 1426 hammer2_vfs_sync(mp, MNT_WAIT); 1427 } else { 1428 hammer2_voldata_unlock(hmp, 0); 1429 } 1430 if (hmp->pmp_count == 0) { 1431 if (hmp->vchain.flags & (HAMMER2_CHAIN_MODIFIED | 1432 HAMMER2_CHAIN_SUBMODIFIED)) { 1433 kprintf("hammer2_unmount: chains left over " 1434 "after final sync\n"); 1435 if (hammer2_debug & 0x0010) 1436 Debugger("entered debugger"); 1437 } 1438 } 1439 1440 /* 1441 * Cleanup the root and super-root chain elements 1442 * (which should be clean). 1443 */ 1444 if (pmp->iroot) { 1445 #if REPORT_REFS_ERRORS 1446 if (pmp->iroot->refs != 1) 1447 kprintf("PMP->IROOT %p REFS WRONG %d\n", 1448 pmp->iroot, pmp->iroot->refs); 1449 #else 1450 KKASSERT(pmp->iroot->refs == 1); 1451 #endif 1452 /* ref for pmp->iroot */ 1453 hammer2_inode_drop(pmp->iroot); 1454 pmp->iroot = NULL; 1455 } 1456 1457 rchain = pmp->cluster.chains[i]; 1458 if (rchain) { 1459 atomic_clear_int(&rchain->flags, HAMMER2_CHAIN_MOUNTED); 1460 #if REPORT_REFS_ERRORS 1461 if (rchain->refs != 1) 1462 kprintf("PMP->RCHAIN %p REFS WRONG %d\n", 1463 rchain, rchain->refs); 1464 #else 1465 KKASSERT(rchain->refs == 1); 1466 #endif 1467 hammer2_chain_drop(rchain); 1468 pmp->cluster.chains[i] = NULL; 1469 } 1470 1471 /* 1472 * If no PFS's left drop the master hammer2_mount for the 1473 * device. 1474 */ 1475 if (hmp->pmp_count == 0) { 1476 if (hmp->sroot) { 1477 hammer2_inode_drop(hmp->sroot); 1478 hmp->sroot = NULL; 1479 } 1480 1481 /* 1482 * Finish up with the device vnode 1483 */ 1484 if ((devvp = hmp->devvp) != NULL) { 1485 vinvalbuf(devvp, (ronly ? 0 : V_SAVE), 0, 0); 1486 hmp->devvp = NULL; 1487 VOP_CLOSE(devvp, 1488 (ronly ? FREAD : FREAD|FWRITE)); 1489 vrele(devvp); 1490 devvp = NULL; 1491 } 1492 1493 /* 1494 * Final drop of embedded freemap root chain to 1495 * clean up fchain.core (fchain structure is not 1496 * flagged ALLOCATED so it is cleaned out and then 1497 * left to rot). 1498 */ 1499 hammer2_chain_drop(&hmp->fchain); 1500 1501 /* 1502 * Final drop of embedded volume root chain to clean 1503 * up vchain.core (vchain structure is not flagged 1504 * ALLOCATED so it is cleaned out and then left to 1505 * rot). 1506 */ 1507 dumpcnt = 50; 1508 hammer2_dump_chain(&hmp->vchain, 0, &dumpcnt); 1509 hammer2_mount_unlock(hmp); 1510 hammer2_chain_drop(&hmp->vchain); 1511 1512 TAILQ_REMOVE(&hammer2_mntlist, hmp, mntentry); 1513 kmalloc_destroy(&hmp->mchain); 1514 kfree(hmp, M_HAMMER2); 1515 } else { 1516 hammer2_mount_unlock(hmp); 1517 } 1518 } 1519 1520 pmp->mp = NULL; 1521 mp->mnt_data = NULL; 1522 1523 kmalloc_destroy(&pmp->mmsg); 1524 kmalloc_destroy(&pmp->minode); 1525 1526 kfree(pmp, M_HAMMER2); 1527 error = 0; 1528 1529 failed: 1530 lockmgr(&hammer2_mntlk, LK_RELEASE); 1531 1532 return (error); 1533 } 1534 1535 static 1536 int 1537 hammer2_vfs_vget(struct mount *mp, struct vnode *dvp, 1538 ino_t ino, struct vnode **vpp) 1539 { 1540 kprintf("hammer2_vget\n"); 1541 return (EOPNOTSUPP); 1542 } 1543 1544 static 1545 int 1546 hammer2_vfs_root(struct mount *mp, struct vnode **vpp) 1547 { 1548 hammer2_pfsmount_t *pmp; 1549 hammer2_chain_t *parent; 1550 int error; 1551 struct vnode *vp; 1552 1553 pmp = MPTOPMP(mp); 1554 if (pmp->iroot == NULL) { 1555 *vpp = NULL; 1556 error = EINVAL; 1557 } else { 1558 parent = hammer2_inode_lock_sh(pmp->iroot); 1559 vp = hammer2_igetv(pmp->iroot, &error); 1560 hammer2_inode_unlock_sh(pmp->iroot, parent); 1561 *vpp = vp; 1562 if (vp == NULL) 1563 kprintf("vnodefail\n"); 1564 } 1565 1566 return (error); 1567 } 1568 1569 /* 1570 * Filesystem status 1571 * 1572 * XXX incorporate ipdata->inode_quota and data_quota 1573 */ 1574 static 1575 int 1576 hammer2_vfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred) 1577 { 1578 hammer2_pfsmount_t *pmp; 1579 hammer2_mount_t *hmp; 1580 1581 pmp = MPTOPMP(mp); 1582 KKASSERT(pmp->cluster.nchains >= 1); 1583 hmp = pmp->cluster.chains[0]->hmp; /* XXX */ 1584 1585 mp->mnt_stat.f_files = pmp->inode_count; 1586 mp->mnt_stat.f_ffree = 0; 1587 mp->mnt_stat.f_blocks = hmp->voldata.allocator_size / HAMMER2_PBUFSIZE; 1588 mp->mnt_stat.f_bfree = hmp->voldata.allocator_free / HAMMER2_PBUFSIZE; 1589 mp->mnt_stat.f_bavail = mp->mnt_stat.f_bfree; 1590 1591 *sbp = mp->mnt_stat; 1592 return (0); 1593 } 1594 1595 static 1596 int 1597 hammer2_vfs_statvfs(struct mount *mp, struct statvfs *sbp, struct ucred *cred) 1598 { 1599 hammer2_pfsmount_t *pmp; 1600 hammer2_mount_t *hmp; 1601 1602 pmp = MPTOPMP(mp); 1603 KKASSERT(pmp->cluster.nchains >= 1); 1604 hmp = pmp->cluster.chains[0]->hmp; /* XXX */ 1605 1606 mp->mnt_vstat.f_bsize = HAMMER2_PBUFSIZE; 1607 mp->mnt_vstat.f_files = pmp->inode_count; 1608 mp->mnt_vstat.f_ffree = 0; 1609 mp->mnt_vstat.f_blocks = hmp->voldata.allocator_size / HAMMER2_PBUFSIZE; 1610 mp->mnt_vstat.f_bfree = hmp->voldata.allocator_free / HAMMER2_PBUFSIZE; 1611 mp->mnt_vstat.f_bavail = mp->mnt_vstat.f_bfree; 1612 1613 *sbp = mp->mnt_vstat; 1614 return (0); 1615 } 1616 1617 /* 1618 * Sync the entire filesystem; this is called from the filesystem syncer 1619 * process periodically and whenever a user calls sync(1) on the hammer 1620 * mountpoint. 1621 * 1622 * Currently is actually called from the syncer! \o/ 1623 * 1624 * This task will have to snapshot the state of the dirty inode chain. 1625 * From that, it will have to make sure all of the inodes on the dirty 1626 * chain have IO initiated. We make sure that io is initiated for the root 1627 * block. 1628 * 1629 * If waitfor is set, we wait for media to acknowledge the new rootblock. 1630 * 1631 * THINKS: side A vs side B, to have sync not stall all I/O? 1632 */ 1633 static 1634 int 1635 hammer2_vfs_sync(struct mount *mp, int waitfor) 1636 { 1637 struct hammer2_sync_info info; 1638 hammer2_pfsmount_t *pmp; 1639 hammer2_mount_t *hmp; 1640 int flags; 1641 int error; 1642 int total_error; 1643 int i; 1644 1645 pmp = MPTOPMP(mp); 1646 1647 /* 1648 * We can't acquire locks on existing vnodes while in a transaction 1649 * without risking a deadlock. This assumes that vfsync() can be 1650 * called without the vnode locked (which it can in DragonFly). 1651 * Otherwise we'd have to implement a multi-pass or flag the lock 1652 * failures and retry. 1653 */ 1654 /*flags = VMSC_GETVP;*/ 1655 flags = 0; 1656 if (waitfor & MNT_LAZY) 1657 flags |= VMSC_ONEPASS; 1658 1659 hammer2_trans_init(&info.trans, pmp, HAMMER2_TRANS_ISFLUSH); 1660 1661 /* 1662 * vfsync the vnodes. XXX 1663 */ 1664 info.error = 0; 1665 info.waitfor = MNT_NOWAIT; 1666 vmntvnodescan(mp, flags | VMSC_NOWAIT, 1667 hammer2_sync_scan1, 1668 hammer2_sync_scan2, &info); 1669 if (info.error == 0 && (waitfor & MNT_WAIT)) { 1670 info.waitfor = waitfor; 1671 vmntvnodescan(mp, flags, 1672 hammer2_sync_scan1, 1673 hammer2_sync_scan2, &info); 1674 1675 } 1676 #if 0 1677 if (waitfor == MNT_WAIT) { 1678 /* XXX */ 1679 } else { 1680 /* XXX */ 1681 } 1682 #endif 1683 1684 total_error = 0; 1685 for (i = 0; i < pmp->cluster.nchains; ++i) { 1686 hmp = pmp->cluster.chains[i]->hmp; 1687 1688 /* 1689 * Media mounts have two 'roots', vchain for the topology 1690 * and fchain for the free block table. Flush both. 1691 * 1692 * Note that the topology and free block table are handled 1693 * independently, so the free block table can wind up being 1694 * ahead of the topology. We depend on the bulk free scan 1695 * code to deal with any loose ends. 1696 */ 1697 hammer2_chain_lock(&hmp->vchain, HAMMER2_RESOLVE_ALWAYS); 1698 if (hmp->vchain.flags & (HAMMER2_CHAIN_MODIFIED | 1699 HAMMER2_CHAIN_SUBMODIFIED)) { 1700 hammer2_chain_flush(&info.trans, &hmp->vchain); 1701 } 1702 hammer2_chain_unlock(&hmp->vchain); 1703 1704 hammer2_chain_lock(&hmp->fchain, HAMMER2_RESOLVE_ALWAYS); 1705 if (hmp->fchain.flags & (HAMMER2_CHAIN_MODIFIED | 1706 HAMMER2_CHAIN_SUBMODIFIED)) { 1707 /* this will also modify vchain as a side effect */ 1708 hammer2_chain_flush(&info.trans, &hmp->fchain); 1709 } 1710 hammer2_chain_unlock(&hmp->fchain); 1711 1712 error = 0; 1713 1714 /* 1715 * We can't safely flush the volume header until we have 1716 * flushed any device buffers which have built up. 1717 * 1718 * XXX this isn't being incremental 1719 */ 1720 vn_lock(hmp->devvp, LK_EXCLUSIVE | LK_RETRY); 1721 error = VOP_FSYNC(hmp->devvp, MNT_WAIT, 0); 1722 vn_unlock(hmp->devvp); 1723 1724 /* 1725 * The flush code sets CHAIN_VOLUMESYNC to indicate that the 1726 * volume header needs synchronization via hmp->volsync. 1727 * 1728 * XXX synchronize the flag & data with only this flush XXX 1729 */ 1730 if (error == 0 && 1731 (hmp->vchain.flags & HAMMER2_CHAIN_VOLUMESYNC)) { 1732 struct buf *bp; 1733 1734 /* 1735 * Synchronize the disk before flushing the volume 1736 * header. 1737 */ 1738 bp = getpbuf(NULL); 1739 bp->b_bio1.bio_offset = 0; 1740 bp->b_bufsize = 0; 1741 bp->b_bcount = 0; 1742 bp->b_cmd = BUF_CMD_FLUSH; 1743 bp->b_bio1.bio_done = biodone_sync; 1744 bp->b_bio1.bio_flags |= BIO_SYNC; 1745 vn_strategy(hmp->devvp, &bp->b_bio1); 1746 biowait(&bp->b_bio1, "h2vol"); 1747 relpbuf(bp, NULL); 1748 1749 /* 1750 * Then we can safely flush the version of the 1751 * volume header synchronized by the flush code. 1752 */ 1753 i = hmp->volhdrno + 1; 1754 if (i >= HAMMER2_NUM_VOLHDRS) 1755 i = 0; 1756 if (i * HAMMER2_ZONE_BYTES64 + HAMMER2_SEGSIZE > 1757 hmp->volsync.volu_size) { 1758 i = 0; 1759 } 1760 kprintf("sync volhdr %d %jd\n", 1761 i, (intmax_t)hmp->volsync.volu_size); 1762 bp = getblk(hmp->devvp, i * HAMMER2_ZONE_BYTES64, 1763 HAMMER2_PBUFSIZE, 0, 0); 1764 atomic_clear_int(&hmp->vchain.flags, 1765 HAMMER2_CHAIN_VOLUMESYNC); 1766 bcopy(&hmp->volsync, bp->b_data, HAMMER2_PBUFSIZE); 1767 bawrite(bp); 1768 hmp->volhdrno = i; 1769 } 1770 if (error) 1771 total_error = error; 1772 } 1773 1774 hammer2_trans_done(&info.trans); 1775 return (total_error); 1776 } 1777 1778 /* 1779 * Sync passes. 1780 * 1781 * NOTE: We don't test SUBMODIFIED or MOVED here because the fsync code 1782 * won't flush on those flags. The syncer code above will do a 1783 * general meta-data flush globally that will catch these flags. 1784 */ 1785 static int 1786 hammer2_sync_scan1(struct mount *mp, struct vnode *vp, void *data) 1787 { 1788 hammer2_inode_t *ip; 1789 1790 ip = VTOI(vp); 1791 if (vp->v_type == VNON || ip == NULL || 1792 ((ip->flags & HAMMER2_INODE_MODIFIED) == 0 && 1793 RB_EMPTY(&vp->v_rbdirty_tree))) { 1794 return(-1); 1795 } 1796 return(0); 1797 } 1798 1799 static int 1800 hammer2_sync_scan2(struct mount *mp, struct vnode *vp, void *data) 1801 { 1802 struct hammer2_sync_info *info = data; 1803 hammer2_inode_t *ip; 1804 hammer2_chain_t *parent; 1805 int error; 1806 1807 ip = VTOI(vp); 1808 if (vp->v_type == VNON || vp->v_type == VBAD || 1809 ((ip->flags & HAMMER2_INODE_MODIFIED) == 0 && 1810 RB_EMPTY(&vp->v_rbdirty_tree))) { 1811 return(0); 1812 } 1813 1814 /* 1815 * VOP_FSYNC will start a new transaction so replicate some code 1816 * here to do it inline (see hammer2_vop_fsync()). 1817 * 1818 * WARNING: The vfsync interacts with the buffer cache and might 1819 * block, we can't hold the inode lock at that time. 1820 */ 1821 atomic_clear_int(&ip->flags, HAMMER2_INODE_MODIFIED); 1822 if (ip->vp) 1823 vfsync(ip->vp, MNT_NOWAIT, 1, NULL, NULL); 1824 parent = hammer2_inode_lock_ex(ip); 1825 hammer2_chain_flush(&info->trans, parent); 1826 hammer2_inode_unlock_ex(ip, parent); 1827 error = 0; 1828 #if 0 1829 error = VOP_FSYNC(vp, MNT_NOWAIT, 0); 1830 #endif 1831 if (error) 1832 info->error = error; 1833 return(0); 1834 } 1835 1836 static 1837 int 1838 hammer2_vfs_vptofh(struct vnode *vp, struct fid *fhp) 1839 { 1840 return (0); 1841 } 1842 1843 static 1844 int 1845 hammer2_vfs_fhtovp(struct mount *mp, struct vnode *rootvp, 1846 struct fid *fhp, struct vnode **vpp) 1847 { 1848 return (0); 1849 } 1850 1851 static 1852 int 1853 hammer2_vfs_checkexp(struct mount *mp, struct sockaddr *nam, 1854 int *exflagsp, struct ucred **credanonp) 1855 { 1856 return (0); 1857 } 1858 1859 /* 1860 * Support code for hammer2_mount(). Read, verify, and install the volume 1861 * header into the HMP 1862 * 1863 * XXX read four volhdrs and use the one with the highest TID whos CRC 1864 * matches. 1865 * 1866 * XXX check iCRCs. 1867 * 1868 * XXX For filesystems w/ less than 4 volhdrs, make sure to not write to 1869 * nonexistant locations. 1870 * 1871 * XXX Record selected volhdr and ring updates to each of 4 volhdrs 1872 */ 1873 static 1874 int 1875 hammer2_install_volume_header(hammer2_mount_t *hmp) 1876 { 1877 hammer2_volume_data_t *vd; 1878 struct buf *bp; 1879 hammer2_crc32_t crc0, crc, bcrc0, bcrc; 1880 int error_reported; 1881 int error; 1882 int valid; 1883 int i; 1884 1885 error_reported = 0; 1886 error = 0; 1887 valid = 0; 1888 bp = NULL; 1889 1890 /* 1891 * There are up to 4 copies of the volume header (syncs iterate 1892 * between them so there is no single master). We don't trust the 1893 * volu_size field so we don't know precisely how large the filesystem 1894 * is, so depend on the OS to return an error if we go beyond the 1895 * block device's EOF. 1896 */ 1897 for (i = 0; i < HAMMER2_NUM_VOLHDRS; i++) { 1898 error = bread(hmp->devvp, i * HAMMER2_ZONE_BYTES64, 1899 HAMMER2_VOLUME_BYTES, &bp); 1900 if (error) { 1901 brelse(bp); 1902 bp = NULL; 1903 continue; 1904 } 1905 1906 vd = (struct hammer2_volume_data *) bp->b_data; 1907 if ((vd->magic != HAMMER2_VOLUME_ID_HBO) && 1908 (vd->magic != HAMMER2_VOLUME_ID_ABO)) { 1909 brelse(bp); 1910 bp = NULL; 1911 continue; 1912 } 1913 1914 if (vd->magic == HAMMER2_VOLUME_ID_ABO) { 1915 /* XXX: Reversed-endianness filesystem */ 1916 kprintf("hammer2: reverse-endian filesystem detected"); 1917 brelse(bp); 1918 bp = NULL; 1919 continue; 1920 } 1921 1922 crc = vd->icrc_sects[HAMMER2_VOL_ICRC_SECT0]; 1923 crc0 = hammer2_icrc32(bp->b_data + HAMMER2_VOLUME_ICRC0_OFF, 1924 HAMMER2_VOLUME_ICRC0_SIZE); 1925 bcrc = vd->icrc_sects[HAMMER2_VOL_ICRC_SECT1]; 1926 bcrc0 = hammer2_icrc32(bp->b_data + HAMMER2_VOLUME_ICRC1_OFF, 1927 HAMMER2_VOLUME_ICRC1_SIZE); 1928 if ((crc0 != crc) || (bcrc0 != bcrc)) { 1929 kprintf("hammer2 volume header crc " 1930 "mismatch copy #%d %08x/%08x\n", 1931 i, crc0, crc); 1932 error_reported = 1; 1933 brelse(bp); 1934 bp = NULL; 1935 continue; 1936 } 1937 if (valid == 0 || hmp->voldata.mirror_tid < vd->mirror_tid) { 1938 valid = 1; 1939 hmp->voldata = *vd; 1940 hmp->volhdrno = i; 1941 } 1942 brelse(bp); 1943 bp = NULL; 1944 } 1945 if (valid) { 1946 hmp->volsync = hmp->voldata; 1947 error = 0; 1948 if (error_reported || bootverbose || 1) { /* 1/DEBUG */ 1949 kprintf("hammer2: using volume header #%d\n", 1950 hmp->volhdrno); 1951 } 1952 } else { 1953 error = EINVAL; 1954 kprintf("hammer2: no valid volume headers found!\n"); 1955 } 1956 return (error); 1957 } 1958 1959 /* 1960 * Reconnect using the passed file pointer. The caller must ref the 1961 * fp for us. 1962 */ 1963 void 1964 hammer2_cluster_reconnect(hammer2_pfsmount_t *pmp, struct file *fp) 1965 { 1966 hammer2_inode_data_t *ipdata; 1967 hammer2_chain_t *parent; 1968 hammer2_mount_t *hmp; 1969 size_t name_len; 1970 1971 hmp = pmp->cluster.chains[0]->hmp; /* XXX */ 1972 1973 /* 1974 * Closes old comm descriptor, kills threads, cleans up 1975 * states, then installs the new descriptor and creates 1976 * new threads. 1977 */ 1978 kdmsg_iocom_reconnect(&pmp->iocom, fp, "hammer2"); 1979 1980 /* 1981 * Setup LNK_CONN fields for autoinitiated state machine 1982 */ 1983 parent = hammer2_inode_lock_ex(pmp->iroot); 1984 ipdata = &parent->data->ipdata; 1985 pmp->iocom.auto_lnk_conn.pfs_clid = ipdata->pfs_clid; 1986 pmp->iocom.auto_lnk_conn.pfs_fsid = ipdata->pfs_fsid; 1987 pmp->iocom.auto_lnk_conn.pfs_type = ipdata->pfs_type; 1988 pmp->iocom.auto_lnk_conn.proto_version = DMSG_SPAN_PROTO_1; 1989 pmp->iocom.auto_lnk_conn.peer_type = hmp->voldata.peer_type; 1990 1991 /* 1992 * Filter adjustment. Clients do not need visibility into other 1993 * clients (otherwise millions of clients would present a serious 1994 * problem). The fs_label also serves to restrict the namespace. 1995 */ 1996 pmp->iocom.auto_lnk_conn.peer_mask = 1LLU << HAMMER2_PEER_HAMMER2; 1997 pmp->iocom.auto_lnk_conn.pfs_mask = (uint64_t)-1; 1998 switch (ipdata->pfs_type) { 1999 case DMSG_PFSTYPE_CLIENT: 2000 pmp->iocom.auto_lnk_conn.peer_mask &= 2001 ~(1LLU << DMSG_PFSTYPE_CLIENT); 2002 break; 2003 default: 2004 break; 2005 } 2006 2007 name_len = ipdata->name_len; 2008 if (name_len >= sizeof(pmp->iocom.auto_lnk_conn.fs_label)) 2009 name_len = sizeof(pmp->iocom.auto_lnk_conn.fs_label) - 1; 2010 bcopy(ipdata->filename, 2011 pmp->iocom.auto_lnk_conn.fs_label, 2012 name_len); 2013 pmp->iocom.auto_lnk_conn.fs_label[name_len] = 0; 2014 2015 /* 2016 * Setup LNK_SPAN fields for autoinitiated state machine 2017 */ 2018 pmp->iocom.auto_lnk_span.pfs_clid = ipdata->pfs_clid; 2019 pmp->iocom.auto_lnk_span.pfs_fsid = ipdata->pfs_fsid; 2020 pmp->iocom.auto_lnk_span.pfs_type = ipdata->pfs_type; 2021 pmp->iocom.auto_lnk_span.peer_type = hmp->voldata.peer_type; 2022 pmp->iocom.auto_lnk_span.proto_version = DMSG_SPAN_PROTO_1; 2023 name_len = ipdata->name_len; 2024 if (name_len >= sizeof(pmp->iocom.auto_lnk_span.fs_label)) 2025 name_len = sizeof(pmp->iocom.auto_lnk_span.fs_label) - 1; 2026 bcopy(ipdata->filename, 2027 pmp->iocom.auto_lnk_span.fs_label, 2028 name_len); 2029 pmp->iocom.auto_lnk_span.fs_label[name_len] = 0; 2030 hammer2_inode_unlock_ex(pmp->iroot, parent); 2031 2032 kdmsg_iocom_autoinitiate(&pmp->iocom, hammer2_autodmsg); 2033 } 2034 2035 static int 2036 hammer2_rcvdmsg(kdmsg_msg_t *msg) 2037 { 2038 switch(msg->any.head.cmd & DMSGF_TRANSMASK) { 2039 case DMSG_DBG_SHELL: 2040 /* 2041 * (non-transaction) 2042 * Execute shell command (not supported atm) 2043 */ 2044 kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP); 2045 break; 2046 case DMSG_DBG_SHELL | DMSGF_REPLY: 2047 /* 2048 * (non-transaction) 2049 */ 2050 if (msg->aux_data) { 2051 msg->aux_data[msg->aux_size - 1] = 0; 2052 kprintf("HAMMER2 DBG: %s\n", msg->aux_data); 2053 } 2054 break; 2055 default: 2056 /* 2057 * Unsupported message received. We only need to 2058 * reply if it's a transaction in order to close our end. 2059 * Ignore any one-way messages are any further messages 2060 * associated with the transaction. 2061 * 2062 * NOTE: This case also includes DMSG_LNK_ERROR messages 2063 * which might be one-way, replying to those would 2064 * cause an infinite ping-pong. 2065 */ 2066 if (msg->any.head.cmd & DMSGF_CREATE) 2067 kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP); 2068 break; 2069 } 2070 return(0); 2071 } 2072 2073 /* 2074 * This function is called after KDMSG has automatically handled processing 2075 * of a LNK layer message (typically CONN, SPAN, or CIRC). 2076 * 2077 * We tag off the LNK_CONN to trigger our LNK_VOLCONF messages which 2078 * advertises all available hammer2 super-root volumes. 2079 */ 2080 static void 2081 hammer2_autodmsg(kdmsg_msg_t *msg) 2082 { 2083 hammer2_pfsmount_t *pmp = msg->iocom->handle; 2084 hammer2_mount_t *hmp = pmp->cluster.chains[0]->hmp; /* XXX */ 2085 int copyid; 2086 2087 /* 2088 * We only care about replies to our LNK_CONN auto-request. kdmsg 2089 * has already processed the reply, we use this calback as a shim 2090 * to know when we can advertise available super-root volumes. 2091 */ 2092 if ((msg->any.head.cmd & DMSGF_TRANSMASK) != 2093 (DMSG_LNK_CONN | DMSGF_CREATE | DMSGF_REPLY) || 2094 msg->state == NULL) { 2095 return; 2096 } 2097 2098 kprintf("LNK_CONN REPLY RECEIVED CMD %08x\n", msg->any.head.cmd); 2099 2100 if (msg->any.head.cmd & DMSGF_CREATE) { 2101 kprintf("HAMMER2: VOLDATA DUMP\n"); 2102 2103 /* 2104 * Dump the configuration stored in the volume header 2105 */ 2106 hammer2_voldata_lock(hmp); 2107 for (copyid = 0; copyid < HAMMER2_COPYID_COUNT; ++copyid) { 2108 if (hmp->voldata.copyinfo[copyid].copyid == 0) 2109 continue; 2110 hammer2_volconf_update(pmp, copyid); 2111 } 2112 hammer2_voldata_unlock(hmp, 0); 2113 } 2114 if ((msg->any.head.cmd & DMSGF_DELETE) && 2115 msg->state && (msg->state->txcmd & DMSGF_DELETE) == 0) { 2116 kprintf("HAMMER2: CONN WAS TERMINATED\n"); 2117 } 2118 } 2119 2120 /* 2121 * Volume configuration updates are passed onto the userland service 2122 * daemon via the open LNK_CONN transaction. 2123 */ 2124 void 2125 hammer2_volconf_update(hammer2_pfsmount_t *pmp, int index) 2126 { 2127 hammer2_mount_t *hmp = pmp->cluster.chains[0]->hmp; /* XXX */ 2128 kdmsg_msg_t *msg; 2129 2130 /* XXX interlock against connection state termination */ 2131 kprintf("volconf update %p\n", pmp->iocom.conn_state); 2132 if (pmp->iocom.conn_state) { 2133 kprintf("TRANSMIT VOLCONF VIA OPEN CONN TRANSACTION\n"); 2134 msg = kdmsg_msg_alloc_state(pmp->iocom.conn_state, 2135 DMSG_LNK_VOLCONF, NULL, NULL); 2136 msg->any.lnk_volconf.copy = hmp->voldata.copyinfo[index]; 2137 msg->any.lnk_volconf.mediaid = hmp->voldata.fsid; 2138 msg->any.lnk_volconf.index = index; 2139 kdmsg_msg_write(msg); 2140 } 2141 } 2142 2143 void 2144 hammer2_dump_chain(hammer2_chain_t *chain, int tab, int *countp) 2145 { 2146 hammer2_chain_layer_t *layer; 2147 hammer2_chain_t *scan; 2148 hammer2_chain_t *first_parent; 2149 2150 --*countp; 2151 if (*countp == 0) { 2152 kprintf("%*.*s...\n", tab, tab, ""); 2153 return; 2154 } 2155 if (*countp < 0) 2156 return; 2157 first_parent = chain->core ? TAILQ_FIRST(&chain->core->ownerq) : NULL; 2158 kprintf("%*.*schain %p.%d [%08x][core=%p fp=%p] (%s) np=%p dt=%s refs=%d", 2159 tab, tab, "", 2160 chain, chain->bref.type, chain->flags, 2161 chain->core, 2162 first_parent, 2163 ((chain->bref.type == HAMMER2_BREF_TYPE_INODE && 2164 chain->data) ? (char *)chain->data->ipdata.filename : "?"), 2165 (first_parent ? TAILQ_NEXT(chain, core_entry) : NULL), 2166 (chain->delete_tid == HAMMER2_MAX_TID ? "max" : "fls"), 2167 chain->refs); 2168 if (first_parent) 2169 kprintf(" [fpflags %08x fprefs %d\n", 2170 first_parent->flags, 2171 first_parent->refs); 2172 if (chain->core == NULL || TAILQ_EMPTY(&chain->core->layerq)) 2173 kprintf("\n"); 2174 else 2175 kprintf(" {\n"); 2176 TAILQ_FOREACH(layer, &chain->core->layerq, entry) { 2177 RB_FOREACH(scan, hammer2_chain_tree, &layer->rbtree) { 2178 hammer2_dump_chain(scan, tab + 4, countp); 2179 } 2180 } 2181 if (chain->core && !TAILQ_EMPTY(&chain->core->layerq)) { 2182 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE && chain->data) 2183 kprintf("%*.*s}(%s)\n", tab, tab, "", 2184 chain->data->ipdata.filename); 2185 else 2186 kprintf("%*.*s}\n", tab, tab, ""); 2187 } 2188 } 2189