1 /*- 2 * Copyright (c) 2011-2013 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * by Daniel Flores (GSOC 2013 - mentored by Matthew Dillon, compression) 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 3. Neither the name of The DragonFly Project nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific, prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/kernel.h> 38 #include <sys/nlookup.h> 39 #include <sys/vnode.h> 40 #include <sys/mount.h> 41 #include <sys/fcntl.h> 42 #include <sys/buf.h> 43 #include <sys/uuid.h> 44 #include <sys/vfsops.h> 45 #include <sys/sysctl.h> 46 #include <sys/socket.h> 47 #include <sys/objcache.h> 48 49 #include <sys/proc.h> 50 #include <sys/namei.h> 51 #include <sys/mountctl.h> 52 #include <sys/dirent.h> 53 #include <sys/uio.h> 54 55 #include <sys/mutex.h> 56 #include <sys/mutex2.h> 57 58 #include "hammer2.h" 59 #include "hammer2_disk.h" 60 #include "hammer2_mount.h" 61 62 #include "hammer2.h" 63 #include "hammer2_lz4.h" 64 65 #include "zlib/hammer2_zlib.h" 66 67 #define REPORT_REFS_ERRORS 1 /* XXX remove me */ 68 69 MALLOC_DEFINE(M_OBJCACHE, "objcache", "Object Cache"); 70 71 struct hammer2_sync_info { 72 hammer2_trans_t trans; 73 int error; 74 int waitfor; 75 }; 76 77 TAILQ_HEAD(hammer2_mntlist, hammer2_mount); 78 static struct hammer2_mntlist hammer2_mntlist; 79 static struct lock hammer2_mntlk; 80 81 int hammer2_debug; 82 int hammer2_cluster_enable = 1; 83 int hammer2_hardlink_enable = 1; 84 long hammer2_iod_file_read; 85 long hammer2_iod_meta_read; 86 long hammer2_iod_indr_read; 87 long hammer2_iod_fmap_read; 88 long hammer2_iod_volu_read; 89 long hammer2_iod_file_write; 90 long hammer2_iod_meta_write; 91 long hammer2_iod_indr_write; 92 long hammer2_iod_fmap_write; 93 long hammer2_iod_volu_write; 94 long hammer2_ioa_file_read; 95 long hammer2_ioa_meta_read; 96 long hammer2_ioa_indr_read; 97 long hammer2_ioa_fmap_read; 98 long hammer2_ioa_volu_read; 99 long hammer2_ioa_fmap_write; 100 long hammer2_ioa_file_write; 101 long hammer2_ioa_meta_write; 102 long hammer2_ioa_indr_write; 103 long hammer2_ioa_volu_write; 104 105 MALLOC_DECLARE(C_BUFFER); 106 MALLOC_DEFINE(C_BUFFER, "compbuffer", "Buffer used for compression."); 107 108 MALLOC_DECLARE(D_BUFFER); 109 MALLOC_DEFINE(D_BUFFER, "decompbuffer", "Buffer used for decompression."); 110 111 SYSCTL_NODE(_vfs, OID_AUTO, hammer2, CTLFLAG_RW, 0, "HAMMER2 filesystem"); 112 113 SYSCTL_INT(_vfs_hammer2, OID_AUTO, debug, CTLFLAG_RW, 114 &hammer2_debug, 0, ""); 115 SYSCTL_INT(_vfs_hammer2, OID_AUTO, cluster_enable, CTLFLAG_RW, 116 &hammer2_cluster_enable, 0, ""); 117 SYSCTL_INT(_vfs_hammer2, OID_AUTO, hardlink_enable, CTLFLAG_RW, 118 &hammer2_hardlink_enable, 0, ""); 119 120 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_file_read, CTLFLAG_RW, 121 &hammer2_iod_file_read, 0, ""); 122 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_meta_read, CTLFLAG_RW, 123 &hammer2_iod_meta_read, 0, ""); 124 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_indr_read, CTLFLAG_RW, 125 &hammer2_iod_indr_read, 0, ""); 126 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_fmap_read, CTLFLAG_RW, 127 &hammer2_iod_fmap_read, 0, ""); 128 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_volu_read, CTLFLAG_RW, 129 &hammer2_iod_volu_read, 0, ""); 130 131 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_file_write, CTLFLAG_RW, 132 &hammer2_iod_file_write, 0, ""); 133 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_meta_write, CTLFLAG_RW, 134 &hammer2_iod_meta_write, 0, ""); 135 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_indr_write, CTLFLAG_RW, 136 &hammer2_iod_indr_write, 0, ""); 137 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_fmap_write, CTLFLAG_RW, 138 &hammer2_iod_fmap_write, 0, ""); 139 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_volu_write, CTLFLAG_RW, 140 &hammer2_iod_volu_write, 0, ""); 141 142 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_file_read, CTLFLAG_RW, 143 &hammer2_ioa_file_read, 0, ""); 144 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_meta_read, CTLFLAG_RW, 145 &hammer2_ioa_meta_read, 0, ""); 146 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_indr_read, CTLFLAG_RW, 147 &hammer2_ioa_indr_read, 0, ""); 148 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_fmap_read, CTLFLAG_RW, 149 &hammer2_ioa_fmap_read, 0, ""); 150 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_volu_read, CTLFLAG_RW, 151 &hammer2_ioa_volu_read, 0, ""); 152 153 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_file_write, CTLFLAG_RW, 154 &hammer2_ioa_file_write, 0, ""); 155 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_meta_write, CTLFLAG_RW, 156 &hammer2_ioa_meta_write, 0, ""); 157 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_indr_write, CTLFLAG_RW, 158 &hammer2_ioa_indr_write, 0, ""); 159 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_fmap_write, CTLFLAG_RW, 160 &hammer2_ioa_fmap_write, 0, ""); 161 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_volu_write, CTLFLAG_RW, 162 &hammer2_ioa_volu_write, 0, ""); 163 164 static int hammer2_vfs_init(struct vfsconf *conf); 165 static int hammer2_vfs_uninit(struct vfsconf *vfsp); 166 static int hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data, 167 struct ucred *cred); 168 static int hammer2_remount(hammer2_mount_t *, char *, struct vnode *, 169 struct ucred *); 170 static int hammer2_vfs_unmount(struct mount *mp, int mntflags); 171 static int hammer2_vfs_root(struct mount *mp, struct vnode **vpp); 172 static int hammer2_vfs_statfs(struct mount *mp, struct statfs *sbp, 173 struct ucred *cred); 174 static int hammer2_vfs_statvfs(struct mount *mp, struct statvfs *sbp, 175 struct ucred *cred); 176 static int hammer2_vfs_sync(struct mount *mp, int waitfor); 177 static int hammer2_vfs_vget(struct mount *mp, struct vnode *dvp, 178 ino_t ino, struct vnode **vpp); 179 static int hammer2_vfs_fhtovp(struct mount *mp, struct vnode *rootvp, 180 struct fid *fhp, struct vnode **vpp); 181 static int hammer2_vfs_vptofh(struct vnode *vp, struct fid *fhp); 182 static int hammer2_vfs_checkexp(struct mount *mp, struct sockaddr *nam, 183 int *exflagsp, struct ucred **credanonp); 184 185 static int hammer2_install_volume_header(hammer2_mount_t *hmp); 186 static int hammer2_sync_scan2(struct mount *mp, struct vnode *vp, void *data); 187 188 static void hammer2_write_thread(void *arg); 189 190 /* 191 * Functions for compression in threads, 192 * from hammer2_vnops.c 193 */ 194 static void hammer2_write_file_core(struct buf *bp, hammer2_trans_t *trans, 195 hammer2_inode_t *ip, 196 hammer2_inode_data_t *ipdata, 197 hammer2_chain_t **parentp, 198 hammer2_key_t lbase, int ioflag, int pblksize, 199 int *errorp); 200 static void hammer2_compress_and_write(struct buf *bp, hammer2_trans_t *trans, 201 hammer2_inode_t *ip, 202 hammer2_inode_data_t *ipdata, 203 hammer2_chain_t **parentp, 204 hammer2_key_t lbase, int ioflag, 205 int pblksize, int *errorp, int comp_algo); 206 static void hammer2_zero_check_and_write(struct buf *bp, 207 hammer2_trans_t *trans, hammer2_inode_t *ip, 208 hammer2_inode_data_t *ipdata, 209 hammer2_chain_t **parentp, 210 hammer2_key_t lbase, 211 int ioflag, int pblksize, int *errorp); 212 static int test_block_zeros(const char *buf, size_t bytes); 213 static void zero_write(struct buf *bp, hammer2_trans_t *trans, 214 hammer2_inode_t *ip, 215 hammer2_inode_data_t *ipdata, 216 hammer2_chain_t **parentp, 217 hammer2_key_t lbase, 218 int *errorp); 219 static void hammer2_write_bp(hammer2_chain_t *chain, struct buf *bp, 220 int ioflag, int pblksize, int *errorp); 221 222 static int hammer2_rcvdmsg(kdmsg_msg_t *msg); 223 static void hammer2_autodmsg(kdmsg_msg_t *msg); 224 225 226 /* 227 * HAMMER2 vfs operations. 228 */ 229 static struct vfsops hammer2_vfsops = { 230 .vfs_init = hammer2_vfs_init, 231 .vfs_uninit = hammer2_vfs_uninit, 232 .vfs_sync = hammer2_vfs_sync, 233 .vfs_mount = hammer2_vfs_mount, 234 .vfs_unmount = hammer2_vfs_unmount, 235 .vfs_root = hammer2_vfs_root, 236 .vfs_statfs = hammer2_vfs_statfs, 237 .vfs_statvfs = hammer2_vfs_statvfs, 238 .vfs_vget = hammer2_vfs_vget, 239 .vfs_vptofh = hammer2_vfs_vptofh, 240 .vfs_fhtovp = hammer2_vfs_fhtovp, 241 .vfs_checkexp = hammer2_vfs_checkexp 242 }; 243 244 MALLOC_DEFINE(M_HAMMER2, "HAMMER2-mount", ""); 245 246 VFS_SET(hammer2_vfsops, hammer2, 0); 247 MODULE_VERSION(hammer2, 1); 248 249 static 250 int 251 hammer2_vfs_init(struct vfsconf *conf) 252 { 253 static struct objcache_malloc_args margs_read; 254 static struct objcache_malloc_args margs_write; 255 256 int error; 257 258 error = 0; 259 260 if (HAMMER2_BLOCKREF_BYTES != sizeof(struct hammer2_blockref)) 261 error = EINVAL; 262 if (HAMMER2_INODE_BYTES != sizeof(struct hammer2_inode_data)) 263 error = EINVAL; 264 if (HAMMER2_VOLUME_BYTES != sizeof(struct hammer2_volume_data)) 265 error = EINVAL; 266 267 if (error) 268 kprintf("HAMMER2 structure size mismatch; cannot continue.\n"); 269 270 margs_read.objsize = 65536; 271 margs_read.mtype = D_BUFFER; 272 273 margs_write.objsize = 32768; 274 margs_write.mtype = C_BUFFER; 275 276 cache_buffer_read = objcache_create(margs_read.mtype->ks_shortdesc, 277 0, 1, NULL, NULL, NULL, objcache_malloc_alloc, 278 objcache_malloc_free, &margs_read); 279 cache_buffer_write = objcache_create(margs_write.mtype->ks_shortdesc, 280 0, 1, NULL, NULL, NULL, objcache_malloc_alloc, 281 objcache_malloc_free, &margs_write); 282 283 lockinit(&hammer2_mntlk, "mntlk", 0, 0); 284 TAILQ_INIT(&hammer2_mntlist); 285 286 return (error); 287 } 288 289 static 290 int 291 hammer2_vfs_uninit(struct vfsconf *vfsp __unused) 292 { 293 objcache_destroy(cache_buffer_read); 294 objcache_destroy(cache_buffer_write); 295 return 0; 296 } 297 298 /* 299 * Mount or remount HAMMER2 fileystem from physical media 300 * 301 * mountroot 302 * mp mount point structure 303 * path NULL 304 * data <unused> 305 * cred <unused> 306 * 307 * mount 308 * mp mount point structure 309 * path path to mount point 310 * data pointer to argument structure in user space 311 * volume volume path (device@LABEL form) 312 * hflags user mount flags 313 * cred user credentials 314 * 315 * RETURNS: 0 Success 316 * !0 error number 317 */ 318 static 319 int 320 hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data, 321 struct ucred *cred) 322 { 323 struct hammer2_mount_info info; 324 hammer2_pfsmount_t *pmp; 325 hammer2_mount_t *hmp; 326 hammer2_key_t key_next; 327 hammer2_key_t key_dummy; 328 hammer2_key_t lhc; 329 struct vnode *devvp; 330 struct nlookupdata nd; 331 hammer2_chain_t *parent; 332 hammer2_chain_t *schain; 333 hammer2_chain_t *rchain; 334 struct file *fp; 335 char devstr[MNAMELEN]; 336 size_t size; 337 size_t done; 338 char *dev; 339 char *label; 340 int ronly = 1; 341 int error; 342 int cache_index; 343 int i; 344 345 hmp = NULL; 346 pmp = NULL; 347 dev = NULL; 348 label = NULL; 349 devvp = NULL; 350 cache_index = -1; 351 352 kprintf("hammer2_mount\n"); 353 354 if (path == NULL) { 355 /* 356 * Root mount 357 */ 358 bzero(&info, sizeof(info)); 359 info.cluster_fd = -1; 360 return (EOPNOTSUPP); 361 } else { 362 /* 363 * Non-root mount or updating a mount 364 */ 365 error = copyin(data, &info, sizeof(info)); 366 if (error) 367 return (error); 368 369 error = copyinstr(info.volume, devstr, MNAMELEN - 1, &done); 370 if (error) 371 return (error); 372 373 /* Extract device and label */ 374 dev = devstr; 375 label = strchr(devstr, '@'); 376 if (label == NULL || 377 ((label + 1) - dev) > done) { 378 return (EINVAL); 379 } 380 *label = '\0'; 381 label++; 382 if (*label == '\0') 383 return (EINVAL); 384 385 if (mp->mnt_flag & MNT_UPDATE) { 386 /* Update mount */ 387 /* HAMMER2 implements NFS export via mountctl */ 388 pmp = MPTOPMP(mp); 389 for (i = 0; i < pmp->cluster.nchains; ++i) { 390 hmp = pmp->cluster.chains[i]->hmp; 391 devvp = hmp->devvp; 392 error = hammer2_remount(hmp, path, devvp, cred); 393 if (error) 394 break; 395 } 396 return error; 397 } 398 } 399 400 /* 401 * PFS mount 402 * 403 * Lookup name and verify it refers to a block device. 404 */ 405 error = nlookup_init(&nd, dev, UIO_SYSSPACE, NLC_FOLLOW); 406 if (error == 0) 407 error = nlookup(&nd); 408 if (error == 0) 409 error = cache_vref(&nd.nl_nch, nd.nl_cred, &devvp); 410 nlookup_done(&nd); 411 412 if (error == 0) { 413 if (vn_isdisk(devvp, &error)) 414 error = vfs_mountedon(devvp); 415 } 416 417 /* 418 * Determine if the device has already been mounted. After this 419 * check hmp will be non-NULL if we are doing the second or more 420 * hammer2 mounts from the same device. 421 */ 422 lockmgr(&hammer2_mntlk, LK_EXCLUSIVE); 423 TAILQ_FOREACH(hmp, &hammer2_mntlist, mntentry) { 424 if (hmp->devvp == devvp) 425 break; 426 } 427 428 /* 429 * Open the device if this isn't a secondary mount and construct 430 * the H2 device mount (hmp). 431 */ 432 if (hmp == NULL) { 433 if (error == 0 && vcount(devvp) > 0) 434 error = EBUSY; 435 436 /* 437 * Now open the device 438 */ 439 if (error == 0) { 440 ronly = ((mp->mnt_flag & MNT_RDONLY) != 0); 441 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 442 error = vinvalbuf(devvp, V_SAVE, 0, 0); 443 if (error == 0) { 444 error = VOP_OPEN(devvp, 445 ronly ? FREAD : FREAD | FWRITE, 446 FSCRED, NULL); 447 } 448 vn_unlock(devvp); 449 } 450 if (error && devvp) { 451 vrele(devvp); 452 devvp = NULL; 453 } 454 if (error) { 455 lockmgr(&hammer2_mntlk, LK_RELEASE); 456 return error; 457 } 458 hmp = kmalloc(sizeof(*hmp), M_HAMMER2, M_WAITOK | M_ZERO); 459 hmp->ronly = ronly; 460 hmp->devvp = devvp; 461 kmalloc_create(&hmp->mchain, "HAMMER2-chains"); 462 TAILQ_INSERT_TAIL(&hammer2_mntlist, hmp, mntentry); 463 464 lockinit(&hmp->alloclk, "h2alloc", 0, 0); 465 lockinit(&hmp->voldatalk, "voldata", 0, LK_CANRECURSE); 466 TAILQ_INIT(&hmp->transq); 467 468 /* 469 * vchain setup. vchain.data is embedded. 470 * vchain.refs is initialized and will never drop to 0. 471 */ 472 hmp->vchain.hmp = hmp; 473 hmp->vchain.refs = 1; 474 hmp->vchain.data = (void *)&hmp->voldata; 475 hmp->vchain.bref.type = HAMMER2_BREF_TYPE_VOLUME; 476 hmp->vchain.bref.data_off = 0 | HAMMER2_PBUFRADIX; 477 hmp->vchain.delete_tid = HAMMER2_MAX_TID; 478 hammer2_chain_core_alloc(NULL, &hmp->vchain, NULL); 479 /* hmp->vchain.u.xxx is left NULL */ 480 481 /* 482 * fchain setup. fchain.data is embedded. 483 * fchain.refs is initialized and will never drop to 0. 484 * 485 * The data is not used but needs to be initialized to 486 * pass assertion muster. We use this chain primarily 487 * as a placeholder for the freemap's top-level RBTREE 488 * so it does not interfere with the volume's topology 489 * RBTREE. 490 */ 491 hmp->fchain.hmp = hmp; 492 hmp->fchain.refs = 1; 493 hmp->fchain.data = (void *)&hmp->voldata.freemap_blockset; 494 hmp->fchain.bref.type = HAMMER2_BREF_TYPE_FREEMAP; 495 hmp->fchain.bref.data_off = 0 | HAMMER2_PBUFRADIX; 496 hmp->fchain.bref.methods = 497 HAMMER2_ENC_CHECK(HAMMER2_CHECK_FREEMAP) | 498 HAMMER2_ENC_COMP(HAMMER2_COMP_NONE); 499 hmp->fchain.delete_tid = HAMMER2_MAX_TID; 500 501 hammer2_chain_core_alloc(NULL, &hmp->fchain, NULL); 502 /* hmp->fchain.u.xxx is left NULL */ 503 504 /* 505 * Install the volume header 506 */ 507 error = hammer2_install_volume_header(hmp); 508 if (error) { 509 hammer2_vfs_unmount(mp, MNT_FORCE); 510 return error; 511 } 512 513 /* 514 * First locate the super-root inode, which is key 0 515 * relative to the volume header's blockset. 516 * 517 * Then locate the root inode by scanning the directory keyspace 518 * represented by the label. 519 */ 520 parent = hammer2_chain_lookup_init(&hmp->vchain, 0); 521 schain = hammer2_chain_lookup(&parent, &key_dummy, 522 HAMMER2_SROOT_KEY, HAMMER2_SROOT_KEY, 523 &cache_index, 0); 524 hammer2_chain_lookup_done(parent); 525 if (schain == NULL) { 526 kprintf("hammer2_mount: invalid super-root\n"); 527 hammer2_vfs_unmount(mp, MNT_FORCE); 528 return EINVAL; 529 } 530 531 /* 532 * NOTE: inode_get sucks up schain's lock. 533 */ 534 atomic_set_int(&schain->flags, HAMMER2_CHAIN_PFSROOT); 535 hmp->sroot = hammer2_inode_get(NULL, NULL, schain); 536 hammer2_inode_ref(hmp->sroot); 537 hammer2_inode_unlock_ex(hmp->sroot, schain); 538 schain = NULL; 539 /* leave hmp->sroot with one ref */ 540 } 541 542 /* 543 * Block device opened successfully, finish initializing the 544 * mount structure. 545 * 546 * From this point on we have to call hammer2_unmount() on failure. 547 */ 548 pmp = kmalloc(sizeof(*pmp), M_HAMMER2, M_WAITOK | M_ZERO); 549 550 kmalloc_create(&pmp->minode, "HAMMER2-inodes"); 551 kmalloc_create(&pmp->mmsg, "HAMMER2-pfsmsg"); 552 553 spin_init(&pmp->inum_spin); 554 RB_INIT(&pmp->inum_tree); 555 556 kdmsg_iocom_init(&pmp->iocom, pmp, 557 KDMSG_IOCOMF_AUTOCONN | 558 KDMSG_IOCOMF_AUTOSPAN | 559 KDMSG_IOCOMF_AUTOCIRC, 560 pmp->mmsg, hammer2_rcvdmsg); 561 562 ccms_domain_init(&pmp->ccms_dom); 563 ++hmp->pmp_count; 564 lockmgr(&hammer2_mntlk, LK_RELEASE); 565 kprintf("hammer2_mount hmp=%p pmp=%p pmpcnt=%d\n", 566 hmp, pmp, hmp->pmp_count); 567 568 mp->mnt_flag = MNT_LOCAL; 569 mp->mnt_kern_flag |= MNTK_ALL_MPSAFE; /* all entry pts are SMP */ 570 mp->mnt_kern_flag |= MNTK_THR_SYNC; /* new vsyncscan semantics */ 571 572 /* 573 * required mount structure initializations 574 */ 575 mp->mnt_stat.f_iosize = HAMMER2_PBUFSIZE; 576 mp->mnt_stat.f_bsize = HAMMER2_PBUFSIZE; 577 578 mp->mnt_vstat.f_frsize = HAMMER2_PBUFSIZE; 579 mp->mnt_vstat.f_bsize = HAMMER2_PBUFSIZE; 580 581 /* 582 * Optional fields 583 */ 584 mp->mnt_iosize_max = MAXPHYS; 585 mp->mnt_data = (qaddr_t)pmp; 586 pmp->mp = mp; 587 588 /* 589 * Lookup mount point under the media-localized super-root. 590 */ 591 parent = hammer2_inode_lock_ex(hmp->sroot); 592 lhc = hammer2_dirhash(label, strlen(label)); 593 rchain = hammer2_chain_lookup(&parent, &key_next, 594 lhc, lhc + HAMMER2_DIRHASH_LOMASK, 595 &cache_index, 0); 596 while (rchain) { 597 if (rchain->bref.type == HAMMER2_BREF_TYPE_INODE && 598 strcmp(label, rchain->data->ipdata.filename) == 0) { 599 break; 600 } 601 rchain = hammer2_chain_next(&parent, rchain, &key_next, 602 key_next, 603 lhc + HAMMER2_DIRHASH_LOMASK, 604 &cache_index, 0); 605 } 606 hammer2_inode_unlock_ex(hmp->sroot, parent); 607 608 if (rchain == NULL) { 609 kprintf("hammer2_mount: PFS label not found\n"); 610 --hmp->pmp_count; 611 hammer2_vfs_unmount(mp, MNT_FORCE); 612 return EINVAL; 613 } 614 if (rchain->flags & HAMMER2_CHAIN_MOUNTED) { 615 hammer2_chain_unlock(rchain); 616 kprintf("hammer2_mount: PFS label already mounted!\n"); 617 --hmp->pmp_count; 618 hammer2_vfs_unmount(mp, MNT_FORCE); 619 return EBUSY; 620 } 621 #if 0 622 if (rchain->flags & HAMMER2_CHAIN_RECYCLE) { 623 kprintf("hammer2_mount: PFS label currently recycling\n"); 624 --hmp->pmp_count; 625 hammer2_vfs_unmount(mp, MNT_FORCE); 626 return EBUSY; 627 } 628 #endif 629 630 atomic_set_int(&rchain->flags, HAMMER2_CHAIN_MOUNTED); 631 632 /* 633 * NOTE: *_get() integrates chain's lock into the inode lock. 634 */ 635 hammer2_chain_ref(rchain); /* for pmp->rchain */ 636 pmp->cluster.nchains = 1; 637 pmp->cluster.chains[0] = rchain; 638 pmp->iroot = hammer2_inode_get(pmp, NULL, rchain); 639 hammer2_inode_ref(pmp->iroot); /* ref for pmp->iroot */ 640 641 KKASSERT(rchain->pmp == NULL); /* tracking pmp for rchain */ 642 rchain->pmp = pmp; 643 atomic_add_long(&pmp->inmem_chains, 1); 644 645 hammer2_inode_unlock_ex(pmp->iroot, rchain); 646 647 kprintf("iroot %p\n", pmp->iroot); 648 649 /* 650 * The logical file buffer bio write thread handles things 651 * like physical block assignment and compression. 652 */ 653 mtx_init(&pmp->wthread_mtx); 654 bioq_init(&pmp->wthread_bioq); 655 pmp->wthread_destroy = 0; 656 lwkt_create(hammer2_write_thread, pmp, 657 &pmp->wthread_td, NULL, 0, -1, "hwrite-%s", label); 658 659 /* 660 * Ref the cluster management messaging descriptor. The mount 661 * program deals with the other end of the communications pipe. 662 */ 663 fp = holdfp(curproc->p_fd, info.cluster_fd, -1); 664 if (fp == NULL) { 665 kprintf("hammer2_mount: bad cluster_fd!\n"); 666 hammer2_vfs_unmount(mp, MNT_FORCE); 667 return EBADF; 668 } 669 hammer2_cluster_reconnect(pmp, fp); 670 671 /* 672 * Finish setup 673 */ 674 vfs_getnewfsid(mp); 675 vfs_add_vnodeops(mp, &hammer2_vnode_vops, &mp->mnt_vn_norm_ops); 676 vfs_add_vnodeops(mp, &hammer2_spec_vops, &mp->mnt_vn_spec_ops); 677 vfs_add_vnodeops(mp, &hammer2_fifo_vops, &mp->mnt_vn_fifo_ops); 678 679 copyinstr(info.volume, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); 680 bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); 681 bzero(mp->mnt_stat.f_mntonname, sizeof(mp->mnt_stat.f_mntonname)); 682 copyinstr(path, mp->mnt_stat.f_mntonname, 683 sizeof(mp->mnt_stat.f_mntonname) - 1, 684 &size); 685 686 /* 687 * Initial statfs to prime mnt_stat. 688 */ 689 hammer2_vfs_statfs(mp, &mp->mnt_stat, cred); 690 691 return 0; 692 } 693 694 /* 695 * Handle bioq for strategy write 696 */ 697 static 698 void 699 hammer2_write_thread(void *arg) 700 { 701 hammer2_pfsmount_t *pmp; 702 struct bio *bio; 703 struct buf *bp; 704 hammer2_trans_t trans; 705 struct vnode *vp; 706 hammer2_inode_t *last_ip; 707 hammer2_inode_t *ip; 708 hammer2_chain_t *parent; 709 hammer2_chain_t **parentp; 710 hammer2_inode_data_t *ipdata; 711 hammer2_key_t lbase; 712 int lblksize; 713 int pblksize; 714 int error; 715 716 pmp = arg; 717 718 mtx_lock(&pmp->wthread_mtx); 719 while (pmp->wthread_destroy == 0) { 720 if (bioq_first(&pmp->wthread_bioq) == NULL) { 721 mtxsleep(&pmp->wthread_bioq, &pmp->wthread_mtx, 722 0, "h2bioqw", 0); 723 } 724 last_ip = NULL; 725 parent = NULL; 726 parentp = &parent; 727 728 while ((bio = bioq_takefirst(&pmp->wthread_bioq)) != NULL) { 729 mtx_unlock(&pmp->wthread_mtx); 730 731 error = 0; 732 bp = bio->bio_buf; 733 vp = bp->b_vp; 734 ip = VTOI(vp); 735 736 /* 737 * Cache transaction for multi-buffer flush efficiency. 738 * Lock the ip separately for each buffer to allow 739 * interleaving with frontend writes. 740 */ 741 if (last_ip != ip) { 742 if (last_ip) 743 hammer2_trans_done(&trans); 744 hammer2_trans_init(&trans, ip->pmp, 745 HAMMER2_TRANS_BUFCACHE); 746 last_ip = ip; 747 } 748 parent = hammer2_inode_lock_ex(ip); 749 750 /* 751 * Inode is modified, flush size and mtime changes 752 * to ensure that the file size remains consistent 753 * with the buffers being flushed. 754 */ 755 if (ip->flags & (HAMMER2_INODE_RESIZED | 756 HAMMER2_INODE_MTIME)) { 757 hammer2_inode_fsync(&trans, ip, parentp); 758 } 759 ipdata = hammer2_chain_modify_ip(&trans, ip, 760 parentp, 0); 761 lblksize = hammer2_calc_logical(ip, bio->bio_offset, 762 &lbase, NULL); 763 pblksize = hammer2_calc_physical(ip, lbase); 764 hammer2_write_file_core(bp, &trans, ip, ipdata, 765 parentp, 766 lbase, IO_ASYNC, 767 pblksize, &error); 768 hammer2_inode_unlock_ex(ip, parent); 769 if (error) { 770 kprintf("hammer2: error in buffer write\n"); 771 bp->b_flags |= B_ERROR; 772 bp->b_error = EIO; 773 } 774 biodone(bio); 775 mtx_lock(&pmp->wthread_mtx); 776 } 777 778 /* 779 * Clean out transaction cache 780 */ 781 if (last_ip) 782 hammer2_trans_done(&trans); 783 } 784 pmp->wthread_destroy = -1; 785 wakeup(&pmp->wthread_destroy); 786 787 mtx_unlock(&pmp->wthread_mtx); 788 } 789 790 /* 791 * Return a chain suitable for I/O, creating the chain if necessary 792 * and assigning its physical block. 793 */ 794 static 795 hammer2_chain_t * 796 hammer2_assign_physical(hammer2_trans_t *trans, 797 hammer2_inode_t *ip, hammer2_chain_t **parentp, 798 hammer2_key_t lbase, int pblksize, int *errorp) 799 { 800 hammer2_chain_t *parent; 801 hammer2_chain_t *chain; 802 hammer2_off_t pbase; 803 hammer2_key_t key_dummy; 804 int pradix = hammer2_getradix(pblksize); 805 int cache_index = -1; 806 807 /* 808 * Locate the chain associated with lbase, return a locked chain. 809 * However, do not instantiate any data reference (which utilizes a 810 * device buffer) because we will be using direct IO via the 811 * logical buffer cache buffer. 812 */ 813 *errorp = 0; 814 KKASSERT(pblksize >= HAMMER2_MIN_ALLOC); 815 retry: 816 parent = *parentp; 817 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); /* extra lock */ 818 chain = hammer2_chain_lookup(&parent, &key_dummy, 819 lbase, lbase, 820 &cache_index, HAMMER2_LOOKUP_NODATA); 821 822 if (chain == NULL) { 823 /* 824 * We found a hole, create a new chain entry. 825 * 826 * NOTE: DATA chains are created without device backing 827 * store (nor do we want any). 828 */ 829 *errorp = hammer2_chain_create(trans, &parent, &chain, 830 lbase, HAMMER2_PBUFRADIX, 831 HAMMER2_BREF_TYPE_DATA, 832 pblksize); 833 if (chain == NULL) { 834 hammer2_chain_lookup_done(parent); 835 panic("hammer2_chain_create: par=%p error=%d\n", 836 parent, *errorp); 837 goto retry; 838 } 839 840 pbase = chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX; 841 /*ip->delta_dcount += pblksize;*/ 842 } else { 843 switch (chain->bref.type) { 844 case HAMMER2_BREF_TYPE_INODE: 845 /* 846 * The data is embedded in the inode. The 847 * caller is responsible for marking the inode 848 * modified and copying the data to the embedded 849 * area. 850 */ 851 pbase = NOOFFSET; 852 break; 853 case HAMMER2_BREF_TYPE_DATA: 854 if (chain->bytes != pblksize) { 855 hammer2_chain_resize(trans, ip, 856 parent, &chain, 857 pradix, 858 HAMMER2_MODIFY_OPTDATA); 859 } 860 hammer2_chain_modify(trans, &chain, 861 HAMMER2_MODIFY_OPTDATA); 862 pbase = chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX; 863 break; 864 default: 865 panic("hammer2_assign_physical: bad type"); 866 /* NOT REACHED */ 867 pbase = NOOFFSET; 868 break; 869 } 870 } 871 872 /* 873 * Cleanup. If chain wound up being the inode (i.e. DIRECTDATA), 874 * we might have to replace *parentp. 875 */ 876 hammer2_chain_lookup_done(parent); 877 if (chain) { 878 if (*parentp != chain && 879 (*parentp)->core == chain->core) { 880 parent = *parentp; 881 *parentp = chain; /* eats lock */ 882 hammer2_chain_unlock(parent); 883 hammer2_chain_lock(chain, 0); /* need another */ 884 } 885 /* else chain already locked for return */ 886 } 887 return (chain); 888 } 889 890 /* 891 * From hammer2_vnops.c. 892 * The core write function which determines which path to take 893 * depending on compression settings. 894 */ 895 static 896 void 897 hammer2_write_file_core(struct buf *bp, hammer2_trans_t *trans, 898 hammer2_inode_t *ip, hammer2_inode_data_t *ipdata, 899 hammer2_chain_t **parentp, 900 hammer2_key_t lbase, int ioflag, int pblksize, 901 int *errorp) 902 { 903 hammer2_chain_t *chain; 904 905 switch(HAMMER2_DEC_COMP(ipdata->comp_algo)) { 906 case HAMMER2_COMP_NONE: 907 /* 908 * We have to assign physical storage to the buffer 909 * we intend to dirty or write now to avoid deadlocks 910 * in the strategy code later. 911 * 912 * This can return NOOFFSET for inode-embedded data. 913 * The strategy code will take care of it in that case. 914 */ 915 chain = hammer2_assign_physical(trans, ip, parentp, 916 lbase, pblksize, 917 errorp); 918 hammer2_write_bp(chain, bp, ioflag, pblksize, errorp); 919 if (chain) 920 hammer2_chain_unlock(chain); 921 break; 922 case HAMMER2_COMP_AUTOZERO: 923 /* 924 * Check for zero-fill only 925 */ 926 hammer2_zero_check_and_write(bp, trans, ip, 927 ipdata, parentp, lbase, 928 ioflag, pblksize, errorp); 929 break; 930 case HAMMER2_COMP_LZ4: 931 case HAMMER2_COMP_ZLIB: 932 default: 933 /* 934 * Check for zero-fill and attempt compression. 935 */ 936 hammer2_compress_and_write(bp, trans, ip, 937 ipdata, parentp, 938 lbase, ioflag, 939 pblksize, errorp, 940 ipdata->comp_algo); 941 break; 942 } 943 ipdata = &ip->chain->data->ipdata; /* reload */ 944 } 945 946 /* 947 * From hammer2_vnops.c 948 * Generic function that will perform the compression in compression 949 * write path. The compression algorithm is determined by the settings 950 * obtained from inode. 951 */ 952 static 953 void 954 hammer2_compress_and_write(struct buf *bp, hammer2_trans_t *trans, 955 hammer2_inode_t *ip, hammer2_inode_data_t *ipdata, 956 hammer2_chain_t **parentp, 957 hammer2_key_t lbase, int ioflag, int pblksize, 958 int *errorp, int comp_algo) 959 { 960 hammer2_chain_t *chain; 961 int comp_size; 962 int comp_block_size; 963 char *comp_buffer; 964 965 if (test_block_zeros(bp->b_data, pblksize)) { 966 zero_write(bp, trans, ip, ipdata, parentp, lbase, errorp); 967 return; 968 } 969 970 comp_size = 0; 971 comp_buffer = NULL; 972 973 KKASSERT(pblksize / 2 <= 32768); 974 975 if (ip->comp_heuristic < 8 || (ip->comp_heuristic & 7) == 0) { 976 z_stream strm_compress; 977 int comp_level; 978 int ret; 979 980 switch(HAMMER2_DEC_COMP(comp_algo)) { 981 case HAMMER2_COMP_LZ4: 982 comp_buffer = objcache_get(cache_buffer_write, 983 M_INTWAIT); 984 comp_size = LZ4_compress_limitedOutput( 985 bp->b_data, 986 &comp_buffer[sizeof(int)], 987 pblksize, 988 pblksize / 2 - sizeof(int)); 989 /* 990 * We need to prefix with the size, LZ4 991 * doesn't do it for us. Add the related 992 * overhead. 993 */ 994 *(int *)comp_buffer = comp_size; 995 if (comp_size) 996 comp_size += sizeof(int); 997 break; 998 case HAMMER2_COMP_ZLIB: 999 comp_level = HAMMER2_DEC_LEVEL(comp_algo); 1000 if (comp_level == 0) 1001 comp_level = 6; /* default zlib compression */ 1002 else if (comp_level < 6) 1003 comp_level = 6; 1004 else if (comp_level > 9) 1005 comp_level = 9; 1006 ret = deflateInit(&strm_compress, comp_level); 1007 if (ret != Z_OK) { 1008 kprintf("HAMMER2 ZLIB: fatal error " 1009 "on deflateInit.\n"); 1010 } 1011 1012 comp_buffer = objcache_get(cache_buffer_write, 1013 M_INTWAIT); 1014 strm_compress.next_in = bp->b_data; 1015 strm_compress.avail_in = pblksize; 1016 strm_compress.next_out = comp_buffer; 1017 strm_compress.avail_out = pblksize / 2; 1018 ret = deflate(&strm_compress, Z_FINISH); 1019 if (ret == Z_STREAM_END) { 1020 comp_size = pblksize / 2 - 1021 strm_compress.avail_out; 1022 } else { 1023 comp_size = 0; 1024 } 1025 ret = deflateEnd(&strm_compress); 1026 break; 1027 default: 1028 kprintf("Error: Unknown compression method.\n"); 1029 kprintf("Comp_method = %d.\n", comp_algo); 1030 break; 1031 } 1032 } 1033 1034 if (comp_size == 0) { 1035 /* 1036 * compression failed or turned off 1037 */ 1038 comp_block_size = pblksize; /* safety */ 1039 if (++ip->comp_heuristic > 128) 1040 ip->comp_heuristic = 8; 1041 } else { 1042 /* 1043 * compression succeeded 1044 */ 1045 ip->comp_heuristic = 0; 1046 if (comp_size <= 1024) { 1047 comp_block_size = 1024; 1048 } else if (comp_size <= 2048) { 1049 comp_block_size = 2048; 1050 } else if (comp_size <= 4096) { 1051 comp_block_size = 4096; 1052 } else if (comp_size <= 8192) { 1053 comp_block_size = 8192; 1054 } else if (comp_size <= 16384) { 1055 comp_block_size = 16384; 1056 } else if (comp_size <= 32768) { 1057 comp_block_size = 32768; 1058 } else { 1059 panic("hammer2: WRITE PATH: " 1060 "Weird comp_size value."); 1061 /* NOT REACHED */ 1062 comp_block_size = pblksize; 1063 } 1064 } 1065 1066 chain = hammer2_assign_physical(trans, ip, parentp, 1067 lbase, comp_block_size, 1068 errorp); 1069 ipdata = &ip->chain->data->ipdata; /* RELOAD */ 1070 1071 if (*errorp) { 1072 kprintf("WRITE PATH: An error occurred while " 1073 "assigning physical space.\n"); 1074 KKASSERT(chain == NULL); 1075 } else { 1076 /* Get device offset */ 1077 hammer2_off_t pbase; 1078 hammer2_off_t pmask; 1079 hammer2_off_t peof; 1080 size_t boff; 1081 size_t psize; 1082 struct buf *dbp; 1083 int temp_check; 1084 1085 KKASSERT(chain->flags & HAMMER2_CHAIN_MODIFIED); 1086 1087 switch(chain->bref.type) { 1088 case HAMMER2_BREF_TYPE_INODE: 1089 KKASSERT(chain->data->ipdata.op_flags & 1090 HAMMER2_OPFLAG_DIRECTDATA); 1091 KKASSERT(bp->b_loffset == 0); 1092 bcopy(bp->b_data, chain->data->ipdata.u.data, 1093 HAMMER2_EMBEDDED_BYTES); 1094 break; 1095 case HAMMER2_BREF_TYPE_DATA: 1096 psize = hammer2_devblksize(chain->bytes); 1097 pmask = (hammer2_off_t)psize - 1; 1098 pbase = chain->bref.data_off & ~pmask; 1099 boff = chain->bref.data_off & 1100 (HAMMER2_OFF_MASK & pmask); 1101 peof = (pbase + HAMMER2_SEGMASK64) & 1102 ~HAMMER2_SEGMASK64; 1103 temp_check = HAMMER2_DEC_CHECK(chain->bref.methods); 1104 1105 /* 1106 * Optimize out the read-before-write 1107 * if possible. 1108 */ 1109 if (comp_block_size == psize) { 1110 dbp = getblk(chain->hmp->devvp, pbase, 1111 psize, 0, 0); 1112 } else { 1113 *errorp = bread(chain->hmp->devvp, 1114 pbase, psize, &dbp); 1115 if (*errorp) { 1116 kprintf("hammer2: WRITE PATH: " 1117 "dbp bread error\n"); 1118 break; 1119 } 1120 } 1121 1122 /* 1123 * When loading the block make sure we don't 1124 * leave garbage after the compressed data. 1125 */ 1126 if (comp_size) { 1127 chain->bref.methods = 1128 HAMMER2_ENC_COMP(comp_algo) + 1129 HAMMER2_ENC_CHECK(temp_check); 1130 bcopy(comp_buffer, dbp->b_data + boff, 1131 comp_size); 1132 if (comp_size != comp_block_size) { 1133 bzero(dbp->b_data + boff + 1134 comp_size, 1135 comp_block_size - 1136 comp_size); 1137 } 1138 } else { 1139 chain->bref.methods = 1140 HAMMER2_ENC_COMP( 1141 HAMMER2_COMP_NONE) + 1142 HAMMER2_ENC_CHECK(temp_check); 1143 bcopy(bp->b_data, dbp->b_data + boff, 1144 pblksize); 1145 } 1146 1147 /* 1148 * Device buffer is now valid, chain is no 1149 * longer in the initial state. 1150 */ 1151 atomic_clear_int(&chain->flags, 1152 HAMMER2_CHAIN_INITIAL); 1153 1154 /* Now write the related bdp. */ 1155 if (ioflag & IO_SYNC) { 1156 /* 1157 * Synchronous I/O requested. 1158 */ 1159 bwrite(dbp); 1160 /* 1161 } else if ((ioflag & IO_DIRECT) && 1162 loff + n == pblksize) { 1163 bdwrite(dbp); 1164 */ 1165 } else if (ioflag & IO_ASYNC) { 1166 bawrite(dbp); 1167 } else if (hammer2_cluster_enable) { 1168 cluster_write(dbp, peof, 1169 HAMMER2_PBUFSIZE, 1170 4/*XXX*/); 1171 } else { 1172 bdwrite(dbp); 1173 } 1174 break; 1175 default: 1176 panic("hammer2_write_bp: bad chain type %d\n", 1177 chain->bref.type); 1178 /* NOT REACHED */ 1179 break; 1180 } 1181 1182 hammer2_chain_unlock(chain); 1183 } 1184 if (comp_buffer) 1185 objcache_put(cache_buffer_write, comp_buffer); 1186 } 1187 1188 /* 1189 * Function that performs zero-checking and writing without compression, 1190 * it corresponds to default zero-checking path. 1191 */ 1192 static 1193 void 1194 hammer2_zero_check_and_write(struct buf *bp, hammer2_trans_t *trans, 1195 hammer2_inode_t *ip, hammer2_inode_data_t *ipdata, 1196 hammer2_chain_t **parentp, 1197 hammer2_key_t lbase, int ioflag, int pblksize, int *errorp) 1198 { 1199 hammer2_chain_t *chain; 1200 1201 if (test_block_zeros(bp->b_data, pblksize)) { 1202 zero_write(bp, trans, ip, ipdata, parentp, lbase, errorp); 1203 } else { 1204 chain = hammer2_assign_physical(trans, ip, parentp, 1205 lbase, pblksize, errorp); 1206 hammer2_write_bp(chain, bp, ioflag, pblksize, errorp); 1207 if (chain) 1208 hammer2_chain_unlock(chain); 1209 } 1210 } 1211 1212 /* 1213 * A function to test whether a block of data contains only zeros, 1214 * returns TRUE (non-zero) if the block is all zeros. 1215 */ 1216 static 1217 int 1218 test_block_zeros(const char *buf, size_t bytes) 1219 { 1220 size_t i; 1221 1222 for (i = 0; i < bytes; i += sizeof(long)) { 1223 if (*(const long *)(buf + i) != 0) 1224 return (0); 1225 } 1226 return (1); 1227 } 1228 1229 /* 1230 * Function to "write" a block that contains only zeros. 1231 */ 1232 static 1233 void 1234 zero_write(struct buf *bp, hammer2_trans_t *trans, hammer2_inode_t *ip, 1235 hammer2_inode_data_t *ipdata, hammer2_chain_t **parentp, 1236 hammer2_key_t lbase, int *errorp __unused) 1237 { 1238 hammer2_chain_t *parent; 1239 hammer2_chain_t *chain; 1240 hammer2_key_t key_dummy; 1241 int cache_index = -1; 1242 1243 parent = hammer2_chain_lookup_init(*parentp, 0); 1244 1245 chain = hammer2_chain_lookup(&parent, &key_dummy, lbase, lbase, 1246 &cache_index, HAMMER2_LOOKUP_NODATA); 1247 if (chain) { 1248 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 1249 bzero(chain->data->ipdata.u.data, 1250 HAMMER2_EMBEDDED_BYTES); 1251 } else { 1252 hammer2_chain_delete(trans, chain, 0); 1253 } 1254 hammer2_chain_unlock(chain); 1255 } 1256 hammer2_chain_lookup_done(parent); 1257 } 1258 1259 /* 1260 * Function to write the data as it is, without performing any sort of 1261 * compression. This function is used in path without compression and 1262 * default zero-checking path. 1263 */ 1264 static 1265 void 1266 hammer2_write_bp(hammer2_chain_t *chain, struct buf *bp, int ioflag, 1267 int pblksize, int *errorp) 1268 { 1269 hammer2_off_t pbase; 1270 hammer2_off_t pmask; 1271 hammer2_off_t peof; 1272 struct buf *dbp; 1273 size_t boff; 1274 size_t psize; 1275 int error; 1276 int temp_check = HAMMER2_DEC_CHECK(chain->bref.methods); 1277 1278 KKASSERT(chain->flags & HAMMER2_CHAIN_MODIFIED); 1279 1280 switch(chain->bref.type) { 1281 case HAMMER2_BREF_TYPE_INODE: 1282 KKASSERT(chain->data->ipdata.op_flags & 1283 HAMMER2_OPFLAG_DIRECTDATA); 1284 KKASSERT(bp->b_loffset == 0); 1285 bcopy(bp->b_data, chain->data->ipdata.u.data, 1286 HAMMER2_EMBEDDED_BYTES); 1287 error = 0; 1288 break; 1289 case HAMMER2_BREF_TYPE_DATA: 1290 psize = hammer2_devblksize(chain->bytes); 1291 pmask = (hammer2_off_t)psize - 1; 1292 pbase = chain->bref.data_off & ~pmask; 1293 boff = chain->bref.data_off & (HAMMER2_OFF_MASK & pmask); 1294 peof = (pbase + HAMMER2_SEGMASK64) & ~HAMMER2_SEGMASK64; 1295 1296 if (psize == pblksize) { 1297 dbp = getblk(chain->hmp->devvp, pbase, 1298 psize, 0, 0); 1299 error = 0; 1300 } else { 1301 error = bread(chain->hmp->devvp, pbase, psize, &dbp); 1302 if (error) { 1303 kprintf("hammer2: WRITE PATH: " 1304 "dbp bread error\n"); 1305 break; 1306 } 1307 } 1308 1309 chain->bref.methods = HAMMER2_ENC_COMP(HAMMER2_COMP_NONE) + 1310 HAMMER2_ENC_CHECK(temp_check); 1311 bcopy(bp->b_data, dbp->b_data + boff, chain->bytes); 1312 1313 /* 1314 * Device buffer is now valid, chain is no 1315 * longer in the initial state. 1316 */ 1317 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 1318 1319 if (ioflag & IO_SYNC) { 1320 /* 1321 * Synchronous I/O requested. 1322 */ 1323 bwrite(dbp); 1324 /* 1325 } else if ((ioflag & IO_DIRECT) && loff + n == pblksize) { 1326 bdwrite(dbp); 1327 */ 1328 } else if (ioflag & IO_ASYNC) { 1329 bawrite(dbp); 1330 } else if (hammer2_cluster_enable) { 1331 cluster_write(dbp, peof, HAMMER2_PBUFSIZE, 4/*XXX*/); 1332 } else { 1333 bdwrite(dbp); 1334 } 1335 break; 1336 default: 1337 panic("hammer2_write_bp: bad chain type %d\n", 1338 chain->bref.type); 1339 /* NOT REACHED */ 1340 error = 0; 1341 break; 1342 } 1343 *errorp = error; 1344 } 1345 1346 static 1347 int 1348 hammer2_remount(hammer2_mount_t *hmp, char *path, struct vnode *devvp, 1349 struct ucred *cred) 1350 { 1351 return (0); 1352 } 1353 1354 static 1355 int 1356 hammer2_vfs_unmount(struct mount *mp, int mntflags) 1357 { 1358 hammer2_pfsmount_t *pmp; 1359 hammer2_mount_t *hmp; 1360 hammer2_chain_t *rchain; 1361 int flags; 1362 int error = 0; 1363 int ronly = ((mp->mnt_flag & MNT_RDONLY) != 0); 1364 int dumpcnt; 1365 int i; 1366 struct vnode *devvp; 1367 1368 pmp = MPTOPMP(mp); 1369 1370 ccms_domain_uninit(&pmp->ccms_dom); 1371 kdmsg_iocom_uninit(&pmp->iocom); /* XXX chain dependency */ 1372 1373 lockmgr(&hammer2_mntlk, LK_EXCLUSIVE); 1374 1375 /* 1376 * If mount initialization proceeded far enough we must flush 1377 * its vnodes. 1378 */ 1379 if (mntflags & MNT_FORCE) 1380 flags = FORCECLOSE; 1381 else 1382 flags = 0; 1383 if (pmp->iroot) { 1384 error = vflush(mp, 0, flags); 1385 if (error) 1386 goto failed; 1387 } 1388 1389 if (pmp->wthread_td) { 1390 mtx_lock(&pmp->wthread_mtx); 1391 pmp->wthread_destroy = 1; 1392 wakeup(&pmp->wthread_bioq); 1393 while (pmp->wthread_destroy != -1) { 1394 mtxsleep(&pmp->wthread_destroy, 1395 &pmp->wthread_mtx, 0, 1396 "umount-sleep", 0); 1397 } 1398 mtx_unlock(&pmp->wthread_mtx); 1399 pmp->wthread_td = NULL; 1400 } 1401 1402 for (i = 0; i < pmp->cluster.nchains; ++i) { 1403 hmp = pmp->cluster.chains[i]->hmp; 1404 1405 hammer2_mount_exlock(hmp); 1406 1407 --hmp->pmp_count; 1408 kprintf("hammer2_unmount hmp=%p pmpcnt=%d\n", 1409 hmp, hmp->pmp_count); 1410 1411 /* 1412 * Flush any left over chains. The voldata lock is only used 1413 * to synchronize against HAMMER2_CHAIN_MODIFIED_AUX. 1414 */ 1415 hammer2_voldata_lock(hmp); 1416 if ((hmp->vchain.flags | hmp->fchain.flags) & 1417 (HAMMER2_CHAIN_MODIFIED | HAMMER2_CHAIN_SUBMODIFIED)) { 1418 hammer2_voldata_unlock(hmp, 0); 1419 hammer2_vfs_sync(mp, MNT_WAIT); 1420 hammer2_vfs_sync(mp, MNT_WAIT); 1421 } else { 1422 hammer2_voldata_unlock(hmp, 0); 1423 } 1424 if (hmp->pmp_count == 0) { 1425 if (hmp->vchain.flags & (HAMMER2_CHAIN_MODIFIED | 1426 HAMMER2_CHAIN_SUBMODIFIED)) { 1427 kprintf("hammer2_unmount: chains left over " 1428 "after final sync\n"); 1429 if (hammer2_debug & 0x0010) 1430 Debugger("entered debugger"); 1431 } 1432 } 1433 1434 /* 1435 * Cleanup the root and super-root chain elements 1436 * (which should be clean). 1437 */ 1438 if (pmp->iroot) { 1439 #if REPORT_REFS_ERRORS 1440 if (pmp->iroot->refs != 1) 1441 kprintf("PMP->IROOT %p REFS WRONG %d\n", 1442 pmp->iroot, pmp->iroot->refs); 1443 #else 1444 KKASSERT(pmp->iroot->refs == 1); 1445 #endif 1446 /* ref for pmp->iroot */ 1447 hammer2_inode_drop(pmp->iroot); 1448 pmp->iroot = NULL; 1449 } 1450 1451 rchain = pmp->cluster.chains[i]; 1452 if (rchain) { 1453 atomic_clear_int(&rchain->flags, HAMMER2_CHAIN_MOUNTED); 1454 #if REPORT_REFS_ERRORS 1455 if (rchain->refs != 1) 1456 kprintf("PMP->RCHAIN %p REFS WRONG %d\n", 1457 rchain, rchain->refs); 1458 #else 1459 KKASSERT(rchain->refs == 1); 1460 #endif 1461 hammer2_chain_drop(rchain); 1462 pmp->cluster.chains[i] = NULL; 1463 } 1464 1465 /* 1466 * If no PFS's left drop the master hammer2_mount for the 1467 * device. 1468 */ 1469 if (hmp->pmp_count == 0) { 1470 if (hmp->sroot) { 1471 hammer2_inode_drop(hmp->sroot); 1472 hmp->sroot = NULL; 1473 } 1474 1475 /* 1476 * Finish up with the device vnode 1477 */ 1478 if ((devvp = hmp->devvp) != NULL) { 1479 vinvalbuf(devvp, (ronly ? 0 : V_SAVE), 0, 0); 1480 hmp->devvp = NULL; 1481 VOP_CLOSE(devvp, 1482 (ronly ? FREAD : FREAD|FWRITE)); 1483 vrele(devvp); 1484 devvp = NULL; 1485 } 1486 1487 /* 1488 * Final drop of embedded freemap root chain to 1489 * clean up fchain.core (fchain structure is not 1490 * flagged ALLOCATED so it is cleaned out and then 1491 * left to rot). 1492 */ 1493 hammer2_chain_drop(&hmp->fchain); 1494 1495 /* 1496 * Final drop of embedded volume root chain to clean 1497 * up vchain.core (vchain structure is not flagged 1498 * ALLOCATED so it is cleaned out and then left to 1499 * rot). 1500 */ 1501 dumpcnt = 50; 1502 hammer2_dump_chain(&hmp->vchain, 0, &dumpcnt); 1503 hammer2_mount_unlock(hmp); 1504 hammer2_chain_drop(&hmp->vchain); 1505 1506 TAILQ_REMOVE(&hammer2_mntlist, hmp, mntentry); 1507 kmalloc_destroy(&hmp->mchain); 1508 kfree(hmp, M_HAMMER2); 1509 } else { 1510 hammer2_mount_unlock(hmp); 1511 } 1512 } 1513 1514 pmp->mp = NULL; 1515 mp->mnt_data = NULL; 1516 1517 kmalloc_destroy(&pmp->mmsg); 1518 kmalloc_destroy(&pmp->minode); 1519 1520 kfree(pmp, M_HAMMER2); 1521 error = 0; 1522 1523 failed: 1524 lockmgr(&hammer2_mntlk, LK_RELEASE); 1525 1526 return (error); 1527 } 1528 1529 static 1530 int 1531 hammer2_vfs_vget(struct mount *mp, struct vnode *dvp, 1532 ino_t ino, struct vnode **vpp) 1533 { 1534 kprintf("hammer2_vget\n"); 1535 return (EOPNOTSUPP); 1536 } 1537 1538 static 1539 int 1540 hammer2_vfs_root(struct mount *mp, struct vnode **vpp) 1541 { 1542 hammer2_pfsmount_t *pmp; 1543 hammer2_chain_t *parent; 1544 int error; 1545 struct vnode *vp; 1546 1547 pmp = MPTOPMP(mp); 1548 if (pmp->iroot == NULL) { 1549 *vpp = NULL; 1550 error = EINVAL; 1551 } else { 1552 parent = hammer2_inode_lock_sh(pmp->iroot); 1553 vp = hammer2_igetv(pmp->iroot, &error); 1554 hammer2_inode_unlock_sh(pmp->iroot, parent); 1555 *vpp = vp; 1556 if (vp == NULL) 1557 kprintf("vnodefail\n"); 1558 } 1559 1560 return (error); 1561 } 1562 1563 /* 1564 * Filesystem status 1565 * 1566 * XXX incorporate ipdata->inode_quota and data_quota 1567 */ 1568 static 1569 int 1570 hammer2_vfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred) 1571 { 1572 hammer2_pfsmount_t *pmp; 1573 hammer2_mount_t *hmp; 1574 1575 pmp = MPTOPMP(mp); 1576 KKASSERT(pmp->cluster.nchains >= 1); 1577 hmp = pmp->cluster.chains[0]->hmp; /* XXX */ 1578 1579 mp->mnt_stat.f_files = pmp->inode_count; 1580 mp->mnt_stat.f_ffree = 0; 1581 mp->mnt_stat.f_blocks = hmp->voldata.allocator_size / HAMMER2_PBUFSIZE; 1582 mp->mnt_stat.f_bfree = hmp->voldata.allocator_free / HAMMER2_PBUFSIZE; 1583 mp->mnt_stat.f_bavail = mp->mnt_stat.f_bfree; 1584 1585 *sbp = mp->mnt_stat; 1586 return (0); 1587 } 1588 1589 static 1590 int 1591 hammer2_vfs_statvfs(struct mount *mp, struct statvfs *sbp, struct ucred *cred) 1592 { 1593 hammer2_pfsmount_t *pmp; 1594 hammer2_mount_t *hmp; 1595 1596 pmp = MPTOPMP(mp); 1597 KKASSERT(pmp->cluster.nchains >= 1); 1598 hmp = pmp->cluster.chains[0]->hmp; /* XXX */ 1599 1600 mp->mnt_vstat.f_bsize = HAMMER2_PBUFSIZE; 1601 mp->mnt_vstat.f_files = pmp->inode_count; 1602 mp->mnt_vstat.f_ffree = 0; 1603 mp->mnt_vstat.f_blocks = hmp->voldata.allocator_size / HAMMER2_PBUFSIZE; 1604 mp->mnt_vstat.f_bfree = hmp->voldata.allocator_free / HAMMER2_PBUFSIZE; 1605 mp->mnt_vstat.f_bavail = mp->mnt_vstat.f_bfree; 1606 1607 *sbp = mp->mnt_vstat; 1608 return (0); 1609 } 1610 1611 /* 1612 * Sync the entire filesystem; this is called from the filesystem syncer 1613 * process periodically and whenever a user calls sync(1) on the hammer 1614 * mountpoint. 1615 * 1616 * Currently is actually called from the syncer! \o/ 1617 * 1618 * This task will have to snapshot the state of the dirty inode chain. 1619 * From that, it will have to make sure all of the inodes on the dirty 1620 * chain have IO initiated. We make sure that io is initiated for the root 1621 * block. 1622 * 1623 * If waitfor is set, we wait for media to acknowledge the new rootblock. 1624 * 1625 * THINKS: side A vs side B, to have sync not stall all I/O? 1626 */ 1627 static 1628 int 1629 hammer2_vfs_sync(struct mount *mp, int waitfor) 1630 { 1631 struct hammer2_sync_info info; 1632 hammer2_pfsmount_t *pmp; 1633 hammer2_mount_t *hmp; 1634 int flags; 1635 int error; 1636 int total_error; 1637 int i; 1638 1639 pmp = MPTOPMP(mp); 1640 1641 /* 1642 * We can't acquire locks on existing vnodes while in a transaction 1643 * without risking a deadlock. This assumes that vfsync() can be 1644 * called without the vnode locked (which it can in DragonFly). 1645 * Otherwise we'd have to implement a multi-pass or flag the lock 1646 * failures and retry. 1647 */ 1648 /*flags = VMSC_GETVP;*/ 1649 flags = 0; 1650 if (waitfor & MNT_LAZY) 1651 flags |= VMSC_ONEPASS; 1652 1653 hammer2_trans_init(&info.trans, pmp, HAMMER2_TRANS_ISFLUSH); 1654 1655 /* 1656 * vfsync the vnodes. XXX 1657 */ 1658 info.error = 0; 1659 info.waitfor = MNT_NOWAIT; 1660 vsyncscan(mp, flags | VMSC_NOWAIT, hammer2_sync_scan2, &info); 1661 if (info.error == 0 && (waitfor & MNT_WAIT)) { 1662 info.waitfor = waitfor; 1663 vsyncscan(mp, flags, hammer2_sync_scan2, &info); 1664 1665 } 1666 #if 0 1667 if (waitfor == MNT_WAIT) { 1668 /* XXX */ 1669 } else { 1670 /* XXX */ 1671 } 1672 #endif 1673 1674 total_error = 0; 1675 for (i = 0; i < pmp->cluster.nchains; ++i) { 1676 hmp = pmp->cluster.chains[i]->hmp; 1677 1678 /* 1679 * Media mounts have two 'roots', vchain for the topology 1680 * and fchain for the free block table. Flush both. 1681 * 1682 * Note that the topology and free block table are handled 1683 * independently, so the free block table can wind up being 1684 * ahead of the topology. We depend on the bulk free scan 1685 * code to deal with any loose ends. 1686 */ 1687 hammer2_chain_lock(&hmp->vchain, HAMMER2_RESOLVE_ALWAYS); 1688 if (hmp->vchain.flags & (HAMMER2_CHAIN_MODIFIED | 1689 HAMMER2_CHAIN_SUBMODIFIED)) { 1690 hammer2_chain_flush(&info.trans, &hmp->vchain); 1691 } 1692 hammer2_chain_unlock(&hmp->vchain); 1693 1694 hammer2_chain_lock(&hmp->fchain, HAMMER2_RESOLVE_ALWAYS); 1695 if (hmp->fchain.flags & (HAMMER2_CHAIN_MODIFIED | 1696 HAMMER2_CHAIN_SUBMODIFIED)) { 1697 /* this will also modify vchain as a side effect */ 1698 hammer2_chain_flush(&info.trans, &hmp->fchain); 1699 } 1700 hammer2_chain_unlock(&hmp->fchain); 1701 1702 error = 0; 1703 1704 /* 1705 * We can't safely flush the volume header until we have 1706 * flushed any device buffers which have built up. 1707 * 1708 * XXX this isn't being incremental 1709 */ 1710 vn_lock(hmp->devvp, LK_EXCLUSIVE | LK_RETRY); 1711 error = VOP_FSYNC(hmp->devvp, MNT_WAIT, 0); 1712 vn_unlock(hmp->devvp); 1713 1714 /* 1715 * The flush code sets CHAIN_VOLUMESYNC to indicate that the 1716 * volume header needs synchronization via hmp->volsync. 1717 * 1718 * XXX synchronize the flag & data with only this flush XXX 1719 */ 1720 if (error == 0 && 1721 (hmp->vchain.flags & HAMMER2_CHAIN_VOLUMESYNC)) { 1722 struct buf *bp; 1723 1724 /* 1725 * Synchronize the disk before flushing the volume 1726 * header. 1727 */ 1728 bp = getpbuf(NULL); 1729 bp->b_bio1.bio_offset = 0; 1730 bp->b_bufsize = 0; 1731 bp->b_bcount = 0; 1732 bp->b_cmd = BUF_CMD_FLUSH; 1733 bp->b_bio1.bio_done = biodone_sync; 1734 bp->b_bio1.bio_flags |= BIO_SYNC; 1735 vn_strategy(hmp->devvp, &bp->b_bio1); 1736 biowait(&bp->b_bio1, "h2vol"); 1737 relpbuf(bp, NULL); 1738 1739 /* 1740 * Then we can safely flush the version of the 1741 * volume header synchronized by the flush code. 1742 */ 1743 i = hmp->volhdrno + 1; 1744 if (i >= HAMMER2_NUM_VOLHDRS) 1745 i = 0; 1746 if (i * HAMMER2_ZONE_BYTES64 + HAMMER2_SEGSIZE > 1747 hmp->volsync.volu_size) { 1748 i = 0; 1749 } 1750 kprintf("sync volhdr %d %jd\n", 1751 i, (intmax_t)hmp->volsync.volu_size); 1752 bp = getblk(hmp->devvp, i * HAMMER2_ZONE_BYTES64, 1753 HAMMER2_PBUFSIZE, 0, 0); 1754 atomic_clear_int(&hmp->vchain.flags, 1755 HAMMER2_CHAIN_VOLUMESYNC); 1756 bcopy(&hmp->volsync, bp->b_data, HAMMER2_PBUFSIZE); 1757 bawrite(bp); 1758 hmp->volhdrno = i; 1759 } 1760 if (error) 1761 total_error = error; 1762 } 1763 1764 hammer2_trans_done(&info.trans); 1765 return (total_error); 1766 } 1767 1768 /* 1769 * Sync passes. 1770 * 1771 * NOTE: We don't test SUBMODIFIED or MOVED here because the fsync code 1772 * won't flush on those flags. The syncer code above will do a 1773 * general meta-data flush globally that will catch these flags. 1774 */ 1775 1776 static int 1777 hammer2_sync_scan2(struct mount *mp, struct vnode *vp, void *data) 1778 { 1779 struct hammer2_sync_info *info = data; 1780 hammer2_inode_t *ip; 1781 hammer2_chain_t *parent; 1782 int error; 1783 1784 ip = VTOI(vp); 1785 if (ip == NULL) 1786 return(0); 1787 if (vp->v_type == VNON || vp->v_type == VBAD) { 1788 vclrisdirty(vp); 1789 return(0); 1790 } 1791 if ((ip->flags & HAMMER2_INODE_MODIFIED) == 0 && 1792 RB_EMPTY(&vp->v_rbdirty_tree)) { 1793 vclrisdirty(vp); 1794 return(0); 1795 } 1796 1797 /* 1798 * VOP_FSYNC will start a new transaction so replicate some code 1799 * here to do it inline (see hammer2_vop_fsync()). 1800 * 1801 * WARNING: The vfsync interacts with the buffer cache and might 1802 * block, we can't hold the inode lock at that time. 1803 */ 1804 atomic_clear_int(&ip->flags, HAMMER2_INODE_MODIFIED); 1805 if (ip->vp) 1806 vfsync(ip->vp, MNT_NOWAIT, 1, NULL, NULL); 1807 parent = hammer2_inode_lock_ex(ip); 1808 hammer2_chain_flush(&info->trans, parent); 1809 hammer2_inode_unlock_ex(ip, parent); 1810 error = 0; 1811 #if 0 1812 error = VOP_FSYNC(vp, MNT_NOWAIT, 0); 1813 #endif 1814 if (error) 1815 info->error = error; 1816 return(0); 1817 } 1818 1819 static 1820 int 1821 hammer2_vfs_vptofh(struct vnode *vp, struct fid *fhp) 1822 { 1823 return (0); 1824 } 1825 1826 static 1827 int 1828 hammer2_vfs_fhtovp(struct mount *mp, struct vnode *rootvp, 1829 struct fid *fhp, struct vnode **vpp) 1830 { 1831 return (0); 1832 } 1833 1834 static 1835 int 1836 hammer2_vfs_checkexp(struct mount *mp, struct sockaddr *nam, 1837 int *exflagsp, struct ucred **credanonp) 1838 { 1839 return (0); 1840 } 1841 1842 /* 1843 * Support code for hammer2_mount(). Read, verify, and install the volume 1844 * header into the HMP 1845 * 1846 * XXX read four volhdrs and use the one with the highest TID whos CRC 1847 * matches. 1848 * 1849 * XXX check iCRCs. 1850 * 1851 * XXX For filesystems w/ less than 4 volhdrs, make sure to not write to 1852 * nonexistant locations. 1853 * 1854 * XXX Record selected volhdr and ring updates to each of 4 volhdrs 1855 */ 1856 static 1857 int 1858 hammer2_install_volume_header(hammer2_mount_t *hmp) 1859 { 1860 hammer2_volume_data_t *vd; 1861 struct buf *bp; 1862 hammer2_crc32_t crc0, crc, bcrc0, bcrc; 1863 int error_reported; 1864 int error; 1865 int valid; 1866 int i; 1867 1868 error_reported = 0; 1869 error = 0; 1870 valid = 0; 1871 bp = NULL; 1872 1873 /* 1874 * There are up to 4 copies of the volume header (syncs iterate 1875 * between them so there is no single master). We don't trust the 1876 * volu_size field so we don't know precisely how large the filesystem 1877 * is, so depend on the OS to return an error if we go beyond the 1878 * block device's EOF. 1879 */ 1880 for (i = 0; i < HAMMER2_NUM_VOLHDRS; i++) { 1881 error = bread(hmp->devvp, i * HAMMER2_ZONE_BYTES64, 1882 HAMMER2_VOLUME_BYTES, &bp); 1883 if (error) { 1884 brelse(bp); 1885 bp = NULL; 1886 continue; 1887 } 1888 1889 vd = (struct hammer2_volume_data *) bp->b_data; 1890 if ((vd->magic != HAMMER2_VOLUME_ID_HBO) && 1891 (vd->magic != HAMMER2_VOLUME_ID_ABO)) { 1892 brelse(bp); 1893 bp = NULL; 1894 continue; 1895 } 1896 1897 if (vd->magic == HAMMER2_VOLUME_ID_ABO) { 1898 /* XXX: Reversed-endianness filesystem */ 1899 kprintf("hammer2: reverse-endian filesystem detected"); 1900 brelse(bp); 1901 bp = NULL; 1902 continue; 1903 } 1904 1905 crc = vd->icrc_sects[HAMMER2_VOL_ICRC_SECT0]; 1906 crc0 = hammer2_icrc32(bp->b_data + HAMMER2_VOLUME_ICRC0_OFF, 1907 HAMMER2_VOLUME_ICRC0_SIZE); 1908 bcrc = vd->icrc_sects[HAMMER2_VOL_ICRC_SECT1]; 1909 bcrc0 = hammer2_icrc32(bp->b_data + HAMMER2_VOLUME_ICRC1_OFF, 1910 HAMMER2_VOLUME_ICRC1_SIZE); 1911 if ((crc0 != crc) || (bcrc0 != bcrc)) { 1912 kprintf("hammer2 volume header crc " 1913 "mismatch copy #%d %08x/%08x\n", 1914 i, crc0, crc); 1915 error_reported = 1; 1916 brelse(bp); 1917 bp = NULL; 1918 continue; 1919 } 1920 if (valid == 0 || hmp->voldata.mirror_tid < vd->mirror_tid) { 1921 valid = 1; 1922 hmp->voldata = *vd; 1923 hmp->volhdrno = i; 1924 } 1925 brelse(bp); 1926 bp = NULL; 1927 } 1928 if (valid) { 1929 hmp->volsync = hmp->voldata; 1930 error = 0; 1931 if (error_reported || bootverbose || 1) { /* 1/DEBUG */ 1932 kprintf("hammer2: using volume header #%d\n", 1933 hmp->volhdrno); 1934 } 1935 } else { 1936 error = EINVAL; 1937 kprintf("hammer2: no valid volume headers found!\n"); 1938 } 1939 return (error); 1940 } 1941 1942 /* 1943 * Reconnect using the passed file pointer. The caller must ref the 1944 * fp for us. 1945 */ 1946 void 1947 hammer2_cluster_reconnect(hammer2_pfsmount_t *pmp, struct file *fp) 1948 { 1949 hammer2_inode_data_t *ipdata; 1950 hammer2_chain_t *parent; 1951 hammer2_mount_t *hmp; 1952 size_t name_len; 1953 1954 hmp = pmp->cluster.chains[0]->hmp; /* XXX */ 1955 1956 /* 1957 * Closes old comm descriptor, kills threads, cleans up 1958 * states, then installs the new descriptor and creates 1959 * new threads. 1960 */ 1961 kdmsg_iocom_reconnect(&pmp->iocom, fp, "hammer2"); 1962 1963 /* 1964 * Setup LNK_CONN fields for autoinitiated state machine 1965 */ 1966 parent = hammer2_inode_lock_ex(pmp->iroot); 1967 ipdata = &parent->data->ipdata; 1968 pmp->iocom.auto_lnk_conn.pfs_clid = ipdata->pfs_clid; 1969 pmp->iocom.auto_lnk_conn.pfs_fsid = ipdata->pfs_fsid; 1970 pmp->iocom.auto_lnk_conn.pfs_type = ipdata->pfs_type; 1971 pmp->iocom.auto_lnk_conn.proto_version = DMSG_SPAN_PROTO_1; 1972 pmp->iocom.auto_lnk_conn.peer_type = hmp->voldata.peer_type; 1973 1974 /* 1975 * Filter adjustment. Clients do not need visibility into other 1976 * clients (otherwise millions of clients would present a serious 1977 * problem). The fs_label also serves to restrict the namespace. 1978 */ 1979 pmp->iocom.auto_lnk_conn.peer_mask = 1LLU << HAMMER2_PEER_HAMMER2; 1980 pmp->iocom.auto_lnk_conn.pfs_mask = (uint64_t)-1; 1981 switch (ipdata->pfs_type) { 1982 case DMSG_PFSTYPE_CLIENT: 1983 pmp->iocom.auto_lnk_conn.peer_mask &= 1984 ~(1LLU << DMSG_PFSTYPE_CLIENT); 1985 break; 1986 default: 1987 break; 1988 } 1989 1990 name_len = ipdata->name_len; 1991 if (name_len >= sizeof(pmp->iocom.auto_lnk_conn.fs_label)) 1992 name_len = sizeof(pmp->iocom.auto_lnk_conn.fs_label) - 1; 1993 bcopy(ipdata->filename, 1994 pmp->iocom.auto_lnk_conn.fs_label, 1995 name_len); 1996 pmp->iocom.auto_lnk_conn.fs_label[name_len] = 0; 1997 1998 /* 1999 * Setup LNK_SPAN fields for autoinitiated state machine 2000 */ 2001 pmp->iocom.auto_lnk_span.pfs_clid = ipdata->pfs_clid; 2002 pmp->iocom.auto_lnk_span.pfs_fsid = ipdata->pfs_fsid; 2003 pmp->iocom.auto_lnk_span.pfs_type = ipdata->pfs_type; 2004 pmp->iocom.auto_lnk_span.peer_type = hmp->voldata.peer_type; 2005 pmp->iocom.auto_lnk_span.proto_version = DMSG_SPAN_PROTO_1; 2006 name_len = ipdata->name_len; 2007 if (name_len >= sizeof(pmp->iocom.auto_lnk_span.fs_label)) 2008 name_len = sizeof(pmp->iocom.auto_lnk_span.fs_label) - 1; 2009 bcopy(ipdata->filename, 2010 pmp->iocom.auto_lnk_span.fs_label, 2011 name_len); 2012 pmp->iocom.auto_lnk_span.fs_label[name_len] = 0; 2013 hammer2_inode_unlock_ex(pmp->iroot, parent); 2014 2015 kdmsg_iocom_autoinitiate(&pmp->iocom, hammer2_autodmsg); 2016 } 2017 2018 static int 2019 hammer2_rcvdmsg(kdmsg_msg_t *msg) 2020 { 2021 switch(msg->any.head.cmd & DMSGF_TRANSMASK) { 2022 case DMSG_DBG_SHELL: 2023 /* 2024 * (non-transaction) 2025 * Execute shell command (not supported atm) 2026 */ 2027 kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP); 2028 break; 2029 case DMSG_DBG_SHELL | DMSGF_REPLY: 2030 /* 2031 * (non-transaction) 2032 */ 2033 if (msg->aux_data) { 2034 msg->aux_data[msg->aux_size - 1] = 0; 2035 kprintf("HAMMER2 DBG: %s\n", msg->aux_data); 2036 } 2037 break; 2038 default: 2039 /* 2040 * Unsupported message received. We only need to 2041 * reply if it's a transaction in order to close our end. 2042 * Ignore any one-way messages are any further messages 2043 * associated with the transaction. 2044 * 2045 * NOTE: This case also includes DMSG_LNK_ERROR messages 2046 * which might be one-way, replying to those would 2047 * cause an infinite ping-pong. 2048 */ 2049 if (msg->any.head.cmd & DMSGF_CREATE) 2050 kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP); 2051 break; 2052 } 2053 return(0); 2054 } 2055 2056 /* 2057 * This function is called after KDMSG has automatically handled processing 2058 * of a LNK layer message (typically CONN, SPAN, or CIRC). 2059 * 2060 * We tag off the LNK_CONN to trigger our LNK_VOLCONF messages which 2061 * advertises all available hammer2 super-root volumes. 2062 */ 2063 static void 2064 hammer2_autodmsg(kdmsg_msg_t *msg) 2065 { 2066 hammer2_pfsmount_t *pmp = msg->iocom->handle; 2067 hammer2_mount_t *hmp = pmp->cluster.chains[0]->hmp; /* XXX */ 2068 int copyid; 2069 2070 /* 2071 * We only care about replies to our LNK_CONN auto-request. kdmsg 2072 * has already processed the reply, we use this calback as a shim 2073 * to know when we can advertise available super-root volumes. 2074 */ 2075 if ((msg->any.head.cmd & DMSGF_TRANSMASK) != 2076 (DMSG_LNK_CONN | DMSGF_CREATE | DMSGF_REPLY) || 2077 msg->state == NULL) { 2078 return; 2079 } 2080 2081 kprintf("LNK_CONN REPLY RECEIVED CMD %08x\n", msg->any.head.cmd); 2082 2083 if (msg->any.head.cmd & DMSGF_CREATE) { 2084 kprintf("HAMMER2: VOLDATA DUMP\n"); 2085 2086 /* 2087 * Dump the configuration stored in the volume header 2088 */ 2089 hammer2_voldata_lock(hmp); 2090 for (copyid = 0; copyid < HAMMER2_COPYID_COUNT; ++copyid) { 2091 if (hmp->voldata.copyinfo[copyid].copyid == 0) 2092 continue; 2093 hammer2_volconf_update(pmp, copyid); 2094 } 2095 hammer2_voldata_unlock(hmp, 0); 2096 } 2097 if ((msg->any.head.cmd & DMSGF_DELETE) && 2098 msg->state && (msg->state->txcmd & DMSGF_DELETE) == 0) { 2099 kprintf("HAMMER2: CONN WAS TERMINATED\n"); 2100 } 2101 } 2102 2103 /* 2104 * Volume configuration updates are passed onto the userland service 2105 * daemon via the open LNK_CONN transaction. 2106 */ 2107 void 2108 hammer2_volconf_update(hammer2_pfsmount_t *pmp, int index) 2109 { 2110 hammer2_mount_t *hmp = pmp->cluster.chains[0]->hmp; /* XXX */ 2111 kdmsg_msg_t *msg; 2112 2113 /* XXX interlock against connection state termination */ 2114 kprintf("volconf update %p\n", pmp->iocom.conn_state); 2115 if (pmp->iocom.conn_state) { 2116 kprintf("TRANSMIT VOLCONF VIA OPEN CONN TRANSACTION\n"); 2117 msg = kdmsg_msg_alloc_state(pmp->iocom.conn_state, 2118 DMSG_LNK_VOLCONF, NULL, NULL); 2119 msg->any.lnk_volconf.copy = hmp->voldata.copyinfo[index]; 2120 msg->any.lnk_volconf.mediaid = hmp->voldata.fsid; 2121 msg->any.lnk_volconf.index = index; 2122 kdmsg_msg_write(msg); 2123 } 2124 } 2125 2126 void 2127 hammer2_dump_chain(hammer2_chain_t *chain, int tab, int *countp) 2128 { 2129 hammer2_chain_layer_t *layer; 2130 hammer2_chain_t *scan; 2131 hammer2_chain_t *first_parent; 2132 2133 --*countp; 2134 if (*countp == 0) { 2135 kprintf("%*.*s...\n", tab, tab, ""); 2136 return; 2137 } 2138 if (*countp < 0) 2139 return; 2140 first_parent = chain->core ? TAILQ_FIRST(&chain->core->ownerq) : NULL; 2141 kprintf("%*.*schain %p.%d [%08x][core=%p fp=%p] (%s) np=%p dt=%s refs=%d", 2142 tab, tab, "", 2143 chain, chain->bref.type, chain->flags, 2144 chain->core, 2145 first_parent, 2146 ((chain->bref.type == HAMMER2_BREF_TYPE_INODE && 2147 chain->data) ? (char *)chain->data->ipdata.filename : "?"), 2148 (first_parent ? TAILQ_NEXT(chain, core_entry) : NULL), 2149 (chain->delete_tid == HAMMER2_MAX_TID ? "max" : "fls"), 2150 chain->refs); 2151 if (first_parent) 2152 kprintf(" [fpflags %08x fprefs %d\n", 2153 first_parent->flags, 2154 first_parent->refs); 2155 if (chain->core == NULL || TAILQ_EMPTY(&chain->core->layerq)) 2156 kprintf("\n"); 2157 else 2158 kprintf(" {\n"); 2159 TAILQ_FOREACH(layer, &chain->core->layerq, entry) { 2160 RB_FOREACH(scan, hammer2_chain_tree, &layer->rbtree) { 2161 hammer2_dump_chain(scan, tab + 4, countp); 2162 } 2163 } 2164 if (chain->core && !TAILQ_EMPTY(&chain->core->layerq)) { 2165 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE && chain->data) 2166 kprintf("%*.*s}(%s)\n", tab, tab, "", 2167 chain->data->ipdata.filename); 2168 else 2169 kprintf("%*.*s}\n", tab, tab, ""); 2170 } 2171 } 2172