1 /*- 2 * Copyright (c) 2011-2013 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * by Daniel Flores (GSOC 2013 - mentored by Matthew Dillon, compression) 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 3. Neither the name of The DragonFly Project nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific, prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/kernel.h> 38 #include <sys/nlookup.h> 39 #include <sys/vnode.h> 40 #include <sys/mount.h> 41 #include <sys/fcntl.h> 42 #include <sys/buf.h> 43 #include <sys/uuid.h> 44 #include <sys/vfsops.h> 45 #include <sys/sysctl.h> 46 #include <sys/socket.h> 47 #include <sys/objcache.h> 48 49 #include <sys/proc.h> 50 #include <sys/namei.h> 51 #include <sys/mountctl.h> 52 #include <sys/dirent.h> 53 #include <sys/uio.h> 54 55 #include <sys/mutex.h> 56 #include <sys/mutex2.h> 57 58 #include "hammer2.h" 59 #include "hammer2_disk.h" 60 #include "hammer2_mount.h" 61 62 #include "hammer2.h" 63 #include "hammer2_lz4.h" 64 65 #include "zlib/hammer2_zlib.h" 66 67 #define REPORT_REFS_ERRORS 1 /* XXX remove me */ 68 69 MALLOC_DEFINE(M_OBJCACHE, "objcache", "Object Cache"); 70 71 struct hammer2_sync_info { 72 hammer2_trans_t trans; 73 int error; 74 int waitfor; 75 }; 76 77 TAILQ_HEAD(hammer2_mntlist, hammer2_mount); 78 static struct hammer2_mntlist hammer2_mntlist; 79 static struct lock hammer2_mntlk; 80 81 int hammer2_debug; 82 int hammer2_cluster_enable = 1; 83 int hammer2_hardlink_enable = 1; 84 long hammer2_iod_file_read; 85 long hammer2_iod_meta_read; 86 long hammer2_iod_indr_read; 87 long hammer2_iod_fmap_read; 88 long hammer2_iod_volu_read; 89 long hammer2_iod_file_write; 90 long hammer2_iod_meta_write; 91 long hammer2_iod_indr_write; 92 long hammer2_iod_fmap_write; 93 long hammer2_iod_volu_write; 94 long hammer2_ioa_file_read; 95 long hammer2_ioa_meta_read; 96 long hammer2_ioa_indr_read; 97 long hammer2_ioa_fmap_read; 98 long hammer2_ioa_volu_read; 99 long hammer2_ioa_fmap_write; 100 long hammer2_ioa_file_write; 101 long hammer2_ioa_meta_write; 102 long hammer2_ioa_indr_write; 103 long hammer2_ioa_volu_write; 104 105 MALLOC_DECLARE(C_BUFFER); 106 MALLOC_DEFINE(C_BUFFER, "compbuffer", "Buffer used for compression."); 107 108 MALLOC_DECLARE(D_BUFFER); 109 MALLOC_DEFINE(D_BUFFER, "decompbuffer", "Buffer used for decompression."); 110 111 MALLOC_DECLARE(W_BIOQUEUE); 112 MALLOC_DEFINE(W_BIOQUEUE, "wbioqueue", "Writing bio queue."); 113 114 MALLOC_DECLARE(W_MTX); 115 MALLOC_DEFINE(W_MTX, "wmutex", "Mutex for write thread."); 116 117 SYSCTL_NODE(_vfs, OID_AUTO, hammer2, CTLFLAG_RW, 0, "HAMMER2 filesystem"); 118 119 SYSCTL_INT(_vfs_hammer2, OID_AUTO, debug, CTLFLAG_RW, 120 &hammer2_debug, 0, ""); 121 SYSCTL_INT(_vfs_hammer2, OID_AUTO, cluster_enable, CTLFLAG_RW, 122 &hammer2_cluster_enable, 0, ""); 123 SYSCTL_INT(_vfs_hammer2, OID_AUTO, hardlink_enable, CTLFLAG_RW, 124 &hammer2_hardlink_enable, 0, ""); 125 126 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_file_read, CTLFLAG_RW, 127 &hammer2_iod_file_read, 0, ""); 128 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_meta_read, CTLFLAG_RW, 129 &hammer2_iod_meta_read, 0, ""); 130 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_indr_read, CTLFLAG_RW, 131 &hammer2_iod_indr_read, 0, ""); 132 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_fmap_read, CTLFLAG_RW, 133 &hammer2_iod_fmap_read, 0, ""); 134 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_volu_read, CTLFLAG_RW, 135 &hammer2_iod_volu_read, 0, ""); 136 137 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_file_write, CTLFLAG_RW, 138 &hammer2_iod_file_write, 0, ""); 139 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_meta_write, CTLFLAG_RW, 140 &hammer2_iod_meta_write, 0, ""); 141 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_indr_write, CTLFLAG_RW, 142 &hammer2_iod_indr_write, 0, ""); 143 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_fmap_write, CTLFLAG_RW, 144 &hammer2_iod_fmap_write, 0, ""); 145 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_volu_write, CTLFLAG_RW, 146 &hammer2_iod_volu_write, 0, ""); 147 148 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_file_read, CTLFLAG_RW, 149 &hammer2_ioa_file_read, 0, ""); 150 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_meta_read, CTLFLAG_RW, 151 &hammer2_ioa_meta_read, 0, ""); 152 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_indr_read, CTLFLAG_RW, 153 &hammer2_ioa_indr_read, 0, ""); 154 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_fmap_read, CTLFLAG_RW, 155 &hammer2_ioa_fmap_read, 0, ""); 156 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_volu_read, CTLFLAG_RW, 157 &hammer2_ioa_volu_read, 0, ""); 158 159 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_file_write, CTLFLAG_RW, 160 &hammer2_ioa_file_write, 0, ""); 161 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_meta_write, CTLFLAG_RW, 162 &hammer2_ioa_meta_write, 0, ""); 163 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_indr_write, CTLFLAG_RW, 164 &hammer2_ioa_indr_write, 0, ""); 165 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_fmap_write, CTLFLAG_RW, 166 &hammer2_ioa_fmap_write, 0, ""); 167 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_volu_write, CTLFLAG_RW, 168 &hammer2_ioa_volu_write, 0, ""); 169 170 static int hammer2_vfs_init(struct vfsconf *conf); 171 static int hammer2_vfs_uninit(struct vfsconf *vfsp); 172 static int hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data, 173 struct ucred *cred); 174 static int hammer2_remount(struct mount *, char *, struct vnode *, 175 struct ucred *); 176 static int hammer2_vfs_unmount(struct mount *mp, int mntflags); 177 static int hammer2_vfs_root(struct mount *mp, struct vnode **vpp); 178 static int hammer2_vfs_statfs(struct mount *mp, struct statfs *sbp, 179 struct ucred *cred); 180 static int hammer2_vfs_statvfs(struct mount *mp, struct statvfs *sbp, 181 struct ucred *cred); 182 static int hammer2_vfs_sync(struct mount *mp, int waitfor); 183 static int hammer2_vfs_vget(struct mount *mp, struct vnode *dvp, 184 ino_t ino, struct vnode **vpp); 185 static int hammer2_vfs_fhtovp(struct mount *mp, struct vnode *rootvp, 186 struct fid *fhp, struct vnode **vpp); 187 static int hammer2_vfs_vptofh(struct vnode *vp, struct fid *fhp); 188 static int hammer2_vfs_checkexp(struct mount *mp, struct sockaddr *nam, 189 int *exflagsp, struct ucred **credanonp); 190 191 static int hammer2_install_volume_header(hammer2_mount_t *hmp); 192 static int hammer2_sync_scan1(struct mount *mp, struct vnode *vp, void *data); 193 static int hammer2_sync_scan2(struct mount *mp, struct vnode *vp, void *data); 194 195 static void hammer2_write_thread(void *arg); 196 197 /* 198 * Functions for compression in threads, 199 * from hammer2_vnops.c 200 */ 201 static void hammer2_write_file_core(struct buf *bp, hammer2_trans_t *trans, 202 hammer2_inode_t *ip, 203 hammer2_inode_data_t *ipdata, 204 hammer2_chain_t **parentp, 205 hammer2_key_t lbase, int ioflag, int pblksize, 206 int *errorp); 207 static void hammer2_compress_and_write(struct buf *bp, hammer2_trans_t *trans, 208 hammer2_inode_t *ip, 209 hammer2_inode_data_t *ipdata, 210 hammer2_chain_t **parentp, 211 hammer2_key_t lbase, int ioflag, 212 int pblksize, int *errorp, int comp_algo); 213 static void hammer2_zero_check_and_write(struct buf *bp, 214 hammer2_trans_t *trans, hammer2_inode_t *ip, 215 hammer2_inode_data_t *ipdata, 216 hammer2_chain_t **parentp, 217 hammer2_key_t lbase, 218 int ioflag, int pblksize, int *errorp); 219 static int test_block_zeros(const char *buf, size_t bytes); 220 static void zero_write(struct buf *bp, hammer2_trans_t *trans, 221 hammer2_inode_t *ip, 222 hammer2_inode_data_t *ipdata, 223 hammer2_chain_t **parentp, 224 hammer2_key_t lbase, 225 int *errorp); 226 static void hammer2_write_bp(hammer2_chain_t *chain, struct buf *bp, 227 int ioflag, int pblksize, int *errorp); 228 229 static int hammer2_rcvdmsg(kdmsg_msg_t *msg); 230 static void hammer2_autodmsg(kdmsg_msg_t *msg); 231 232 233 /* 234 * HAMMER2 vfs operations. 235 */ 236 static struct vfsops hammer2_vfsops = { 237 .vfs_init = hammer2_vfs_init, 238 .vfs_uninit = hammer2_vfs_uninit, 239 .vfs_sync = hammer2_vfs_sync, 240 .vfs_mount = hammer2_vfs_mount, 241 .vfs_unmount = hammer2_vfs_unmount, 242 .vfs_root = hammer2_vfs_root, 243 .vfs_statfs = hammer2_vfs_statfs, 244 .vfs_statvfs = hammer2_vfs_statvfs, 245 .vfs_vget = hammer2_vfs_vget, 246 .vfs_vptofh = hammer2_vfs_vptofh, 247 .vfs_fhtovp = hammer2_vfs_fhtovp, 248 .vfs_checkexp = hammer2_vfs_checkexp 249 }; 250 251 MALLOC_DEFINE(M_HAMMER2, "HAMMER2-mount", ""); 252 253 VFS_SET(hammer2_vfsops, hammer2, 0); 254 MODULE_VERSION(hammer2, 1); 255 256 static 257 int 258 hammer2_vfs_init(struct vfsconf *conf) 259 { 260 static struct objcache_malloc_args margs_read; 261 static struct objcache_malloc_args margs_write; 262 263 int error; 264 265 error = 0; 266 267 if (HAMMER2_BLOCKREF_BYTES != sizeof(struct hammer2_blockref)) 268 error = EINVAL; 269 if (HAMMER2_INODE_BYTES != sizeof(struct hammer2_inode_data)) 270 error = EINVAL; 271 if (HAMMER2_VOLUME_BYTES != sizeof(struct hammer2_volume_data)) 272 error = EINVAL; 273 274 if (error) 275 kprintf("HAMMER2 structure size mismatch; cannot continue.\n"); 276 277 margs_read.objsize = 65536; 278 margs_read.mtype = D_BUFFER; 279 280 margs_write.objsize = 32768; 281 margs_write.mtype = C_BUFFER; 282 283 cache_buffer_read = objcache_create(margs_read.mtype->ks_shortdesc, 284 0, 1, NULL, NULL, NULL, objcache_malloc_alloc, 285 objcache_malloc_free, &margs_read); 286 cache_buffer_write = objcache_create(margs_write.mtype->ks_shortdesc, 287 0, 1, NULL, NULL, NULL, objcache_malloc_alloc, 288 objcache_malloc_free, &margs_write); 289 290 lockinit(&hammer2_mntlk, "mntlk", 0, 0); 291 TAILQ_INIT(&hammer2_mntlist); 292 293 return (error); 294 } 295 296 static 297 int 298 hammer2_vfs_uninit(struct vfsconf *vfsp __unused) 299 { 300 objcache_destroy(cache_buffer_read); 301 objcache_destroy(cache_buffer_write); 302 return 0; 303 } 304 305 /* 306 * Mount or remount HAMMER2 fileystem from physical media 307 * 308 * mountroot 309 * mp mount point structure 310 * path NULL 311 * data <unused> 312 * cred <unused> 313 * 314 * mount 315 * mp mount point structure 316 * path path to mount point 317 * data pointer to argument structure in user space 318 * volume volume path (device@LABEL form) 319 * hflags user mount flags 320 * cred user credentials 321 * 322 * RETURNS: 0 Success 323 * !0 error number 324 */ 325 static 326 int 327 hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data, 328 struct ucred *cred) 329 { 330 struct hammer2_mount_info info; 331 hammer2_pfsmount_t *pmp; 332 hammer2_mount_t *hmp; 333 hammer2_key_t key_next; 334 hammer2_key_t key_dummy; 335 hammer2_key_t lhc; 336 struct vnode *devvp; 337 struct nlookupdata nd; 338 hammer2_chain_t *parent; 339 hammer2_chain_t *schain; 340 hammer2_chain_t *rchain; 341 struct file *fp; 342 char devstr[MNAMELEN]; 343 size_t size; 344 size_t done; 345 char *dev; 346 char *label; 347 int ronly = 1; 348 int error; 349 int cache_index; 350 351 hmp = NULL; 352 pmp = NULL; 353 dev = NULL; 354 label = NULL; 355 devvp = NULL; 356 cache_index = -1; 357 358 kprintf("hammer2_mount\n"); 359 360 if (path == NULL) { 361 /* 362 * Root mount 363 */ 364 bzero(&info, sizeof(info)); 365 info.cluster_fd = -1; 366 return (EOPNOTSUPP); 367 } else { 368 /* 369 * Non-root mount or updating a mount 370 */ 371 error = copyin(data, &info, sizeof(info)); 372 if (error) 373 return (error); 374 375 error = copyinstr(info.volume, devstr, MNAMELEN - 1, &done); 376 if (error) 377 return (error); 378 379 /* Extract device and label */ 380 dev = devstr; 381 label = strchr(devstr, '@'); 382 if (label == NULL || 383 ((label + 1) - dev) > done) { 384 return (EINVAL); 385 } 386 *label = '\0'; 387 label++; 388 if (*label == '\0') 389 return (EINVAL); 390 391 if (mp->mnt_flag & MNT_UPDATE) { 392 /* Update mount */ 393 /* HAMMER2 implements NFS export via mountctl */ 394 hmp = MPTOHMP(mp); 395 devvp = hmp->devvp; 396 error = hammer2_remount(mp, path, devvp, cred); 397 return error; 398 } 399 } 400 401 /* 402 * PFS mount 403 * 404 * Lookup name and verify it refers to a block device. 405 */ 406 error = nlookup_init(&nd, dev, UIO_SYSSPACE, NLC_FOLLOW); 407 if (error == 0) 408 error = nlookup(&nd); 409 if (error == 0) 410 error = cache_vref(&nd.nl_nch, nd.nl_cred, &devvp); 411 nlookup_done(&nd); 412 413 if (error == 0) { 414 if (vn_isdisk(devvp, &error)) 415 error = vfs_mountedon(devvp); 416 } 417 418 /* 419 * Determine if the device has already been mounted. After this 420 * check hmp will be non-NULL if we are doing the second or more 421 * hammer2 mounts from the same device. 422 */ 423 lockmgr(&hammer2_mntlk, LK_EXCLUSIVE); 424 TAILQ_FOREACH(hmp, &hammer2_mntlist, mntentry) { 425 if (hmp->devvp == devvp) 426 break; 427 } 428 429 /* 430 * Open the device if this isn't a secondary mount and construct 431 * the H2 device mount (hmp). 432 */ 433 if (hmp == NULL) { 434 if (error == 0 && vcount(devvp) > 0) 435 error = EBUSY; 436 437 /* 438 * Now open the device 439 */ 440 if (error == 0) { 441 ronly = ((mp->mnt_flag & MNT_RDONLY) != 0); 442 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 443 error = vinvalbuf(devvp, V_SAVE, 0, 0); 444 if (error == 0) { 445 error = VOP_OPEN(devvp, 446 ronly ? FREAD : FREAD | FWRITE, 447 FSCRED, NULL); 448 } 449 vn_unlock(devvp); 450 } 451 if (error && devvp) { 452 vrele(devvp); 453 devvp = NULL; 454 } 455 if (error) { 456 lockmgr(&hammer2_mntlk, LK_RELEASE); 457 return error; 458 } 459 hmp = kmalloc(sizeof(*hmp), M_HAMMER2, M_WAITOK | M_ZERO); 460 hmp->ronly = ronly; 461 hmp->devvp = devvp; 462 kmalloc_create(&hmp->mchain, "HAMMER2-chains"); 463 TAILQ_INSERT_TAIL(&hammer2_mntlist, hmp, mntentry); 464 465 lockinit(&hmp->alloclk, "h2alloc", 0, 0); 466 lockinit(&hmp->voldatalk, "voldata", 0, LK_CANRECURSE); 467 TAILQ_INIT(&hmp->transq); 468 469 /* 470 * vchain setup. vchain.data is embedded. 471 * vchain.refs is initialized and will never drop to 0. 472 */ 473 hmp->vchain.hmp = hmp; 474 hmp->vchain.refs = 1; 475 hmp->vchain.data = (void *)&hmp->voldata; 476 hmp->vchain.bref.type = HAMMER2_BREF_TYPE_VOLUME; 477 hmp->vchain.bref.data_off = 0 | HAMMER2_PBUFRADIX; 478 hmp->vchain.delete_tid = HAMMER2_MAX_TID; 479 hammer2_chain_core_alloc(NULL, &hmp->vchain, NULL); 480 /* hmp->vchain.u.xxx is left NULL */ 481 482 /* 483 * fchain setup. fchain.data is embedded. 484 * fchain.refs is initialized and will never drop to 0. 485 * 486 * The data is not used but needs to be initialized to 487 * pass assertion muster. We use this chain primarily 488 * as a placeholder for the freemap's top-level RBTREE 489 * so it does not interfere with the volume's topology 490 * RBTREE. 491 */ 492 hmp->fchain.hmp = hmp; 493 hmp->fchain.refs = 1; 494 hmp->fchain.data = (void *)&hmp->voldata.freemap_blockset; 495 hmp->fchain.bref.type = HAMMER2_BREF_TYPE_FREEMAP; 496 hmp->fchain.bref.data_off = 0 | HAMMER2_PBUFRADIX; 497 hmp->fchain.bref.methods = 498 HAMMER2_ENC_CHECK(HAMMER2_CHECK_FREEMAP) | 499 HAMMER2_ENC_COMP(HAMMER2_COMP_NONE); 500 hmp->fchain.delete_tid = HAMMER2_MAX_TID; 501 502 hammer2_chain_core_alloc(NULL, &hmp->fchain, NULL); 503 /* hmp->fchain.u.xxx is left NULL */ 504 505 /* 506 * Install the volume header 507 */ 508 error = hammer2_install_volume_header(hmp); 509 if (error) { 510 hammer2_vfs_unmount(mp, MNT_FORCE); 511 return error; 512 } 513 514 /* 515 * First locate the super-root inode, which is key 0 516 * relative to the volume header's blockset. 517 * 518 * Then locate the root inode by scanning the directory keyspace 519 * represented by the label. 520 */ 521 parent = hammer2_chain_lookup_init(&hmp->vchain, 0); 522 schain = hammer2_chain_lookup(&parent, &key_dummy, 523 HAMMER2_SROOT_KEY, HAMMER2_SROOT_KEY, 524 &cache_index, 0); 525 hammer2_chain_lookup_done(parent); 526 if (schain == NULL) { 527 kprintf("hammer2_mount: invalid super-root\n"); 528 hammer2_vfs_unmount(mp, MNT_FORCE); 529 return EINVAL; 530 } 531 hammer2_chain_ref(schain); /* for hmp->schain */ 532 hmp->schain = schain; /* left locked for inode_get */ 533 hmp->sroot = hammer2_inode_get(NULL, NULL, schain); 534 hammer2_inode_ref(hmp->sroot); /* for hmp->sroot */ 535 hammer2_inode_unlock_ex(hmp->sroot, schain); 536 schain = NULL; 537 538 mtx_init(&hmp->wthread_mtx); 539 bioq_init(&hmp->wthread_bioq); 540 hmp->wthread_destroy = 0; 541 542 /* 543 * Launch threads. 544 */ 545 lwkt_create(hammer2_write_thread, hmp, 546 NULL, NULL, 0, -1, "hammer2-write"); 547 } 548 549 /* 550 * Block device opened successfully, finish initializing the 551 * mount structure. 552 * 553 * From this point on we have to call hammer2_unmount() on failure. 554 */ 555 pmp = kmalloc(sizeof(*pmp), M_HAMMER2, M_WAITOK | M_ZERO); 556 pmp->mount_cluster = kmalloc(sizeof(hammer2_cluster_t), M_HAMMER2, 557 M_WAITOK | M_ZERO); 558 pmp->cluster = pmp->mount_cluster; 559 560 kmalloc_create(&pmp->minode, "HAMMER2-inodes"); 561 kmalloc_create(&pmp->mmsg, "HAMMER2-pfsmsg"); 562 563 pmp->mount_cluster->hmp = hmp; 564 spin_init(&pmp->inum_spin); 565 RB_INIT(&pmp->inum_tree); 566 567 kdmsg_iocom_init(&pmp->iocom, pmp, 568 KDMSG_IOCOMF_AUTOCONN | 569 KDMSG_IOCOMF_AUTOSPAN | 570 KDMSG_IOCOMF_AUTOCIRC, 571 pmp->mmsg, hammer2_rcvdmsg); 572 573 ccms_domain_init(&pmp->ccms_dom); 574 ++hmp->pmp_count; 575 lockmgr(&hammer2_mntlk, LK_RELEASE); 576 kprintf("hammer2_mount hmp=%p pmp=%p pmpcnt=%d\n", 577 hmp, pmp, hmp->pmp_count); 578 579 mp->mnt_flag = MNT_LOCAL; 580 mp->mnt_kern_flag |= MNTK_ALL_MPSAFE; /* all entry pts are SMP */ 581 582 /* 583 * required mount structure initializations 584 */ 585 mp->mnt_stat.f_iosize = HAMMER2_PBUFSIZE; 586 mp->mnt_stat.f_bsize = HAMMER2_PBUFSIZE; 587 588 mp->mnt_vstat.f_frsize = HAMMER2_PBUFSIZE; 589 mp->mnt_vstat.f_bsize = HAMMER2_PBUFSIZE; 590 591 /* 592 * Optional fields 593 */ 594 mp->mnt_iosize_max = MAXPHYS; 595 mp->mnt_data = (qaddr_t)pmp; 596 pmp->mp = mp; 597 598 /* 599 * schain only has 1 ref now for its hmp->schain assignment. 600 * Setup for lookup (which will lock it). 601 */ 602 parent = hammer2_chain_lookup_init(hmp->schain, 0); 603 lhc = hammer2_dirhash(label, strlen(label)); 604 rchain = hammer2_chain_lookup(&parent, &key_next, 605 lhc, lhc + HAMMER2_DIRHASH_LOMASK, 606 &cache_index, 0); 607 while (rchain) { 608 if (rchain->bref.type == HAMMER2_BREF_TYPE_INODE && 609 strcmp(label, rchain->data->ipdata.filename) == 0) { 610 break; 611 } 612 rchain = hammer2_chain_next(&parent, rchain, &key_next, 613 key_next, 614 lhc + HAMMER2_DIRHASH_LOMASK, 615 &cache_index, 0); 616 } 617 hammer2_chain_lookup_done(parent); 618 if (rchain == NULL) { 619 kprintf("hammer2_mount: PFS label not found\n"); 620 hammer2_vfs_unmount(mp, MNT_FORCE); 621 return EINVAL; 622 } 623 if (rchain->flags & HAMMER2_CHAIN_MOUNTED) { 624 hammer2_chain_unlock(rchain); 625 kprintf("hammer2_mount: PFS label already mounted!\n"); 626 hammer2_vfs_unmount(mp, MNT_FORCE); 627 return EBUSY; 628 } 629 if (rchain->flags & HAMMER2_CHAIN_RECYCLE) { 630 kprintf("hammer2_mount: PFS label currently recycling\n"); 631 hammer2_vfs_unmount(mp, MNT_FORCE); 632 return EBUSY; 633 } 634 635 atomic_set_int(&rchain->flags, HAMMER2_CHAIN_MOUNTED); 636 637 /* 638 * NOTE: *_get() integrates chain's lock into the inode lock. 639 */ 640 hammer2_chain_ref(rchain); /* for pmp->rchain */ 641 pmp->mount_cluster->rchain = rchain; /* left held & unlocked */ 642 pmp->iroot = hammer2_inode_get(pmp, NULL, rchain); 643 hammer2_inode_ref(pmp->iroot); /* ref for pmp->iroot */ 644 645 KKASSERT(rchain->pmp == NULL); /* tracking pmp for rchain */ 646 rchain->pmp = pmp; 647 atomic_add_long(&pmp->inmem_chains, 1); 648 649 hammer2_inode_unlock_ex(pmp->iroot, rchain); 650 651 kprintf("iroot %p\n", pmp->iroot); 652 653 /* 654 * Ref the cluster management messaging descriptor. The mount 655 * program deals with the other end of the communications pipe. 656 */ 657 fp = holdfp(curproc->p_fd, info.cluster_fd, -1); 658 if (fp == NULL) { 659 kprintf("hammer2_mount: bad cluster_fd!\n"); 660 hammer2_vfs_unmount(mp, MNT_FORCE); 661 return EBADF; 662 } 663 hammer2_cluster_reconnect(pmp, fp); 664 665 /* 666 * Finish setup 667 */ 668 vfs_getnewfsid(mp); 669 vfs_add_vnodeops(mp, &hammer2_vnode_vops, &mp->mnt_vn_norm_ops); 670 vfs_add_vnodeops(mp, &hammer2_spec_vops, &mp->mnt_vn_spec_ops); 671 vfs_add_vnodeops(mp, &hammer2_fifo_vops, &mp->mnt_vn_fifo_ops); 672 673 copyinstr(info.volume, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); 674 bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); 675 bzero(mp->mnt_stat.f_mntonname, sizeof(mp->mnt_stat.f_mntonname)); 676 copyinstr(path, mp->mnt_stat.f_mntonname, 677 sizeof(mp->mnt_stat.f_mntonname) - 1, 678 &size); 679 680 /* 681 * Initial statfs to prime mnt_stat. 682 */ 683 hammer2_vfs_statfs(mp, &mp->mnt_stat, cred); 684 685 return 0; 686 } 687 688 /* 689 * Handle bioq for strategy write 690 */ 691 static 692 void 693 hammer2_write_thread(void *arg) 694 { 695 hammer2_mount_t* hmp; 696 struct bio *bio; 697 struct buf *bp; 698 hammer2_trans_t trans; 699 struct vnode *vp; 700 hammer2_inode_t *last_ip; 701 hammer2_inode_t *ip; 702 hammer2_chain_t *parent; 703 hammer2_chain_t **parentp; 704 hammer2_inode_data_t *ipdata; 705 hammer2_key_t lbase; 706 int lblksize; 707 int pblksize; 708 int error; 709 710 hmp = arg; 711 712 mtx_lock(&hmp->wthread_mtx); 713 while (hmp->wthread_destroy == 0) { 714 if (bioq_first(&hmp->wthread_bioq) == NULL) { 715 mtxsleep(&hmp->wthread_bioq, &hmp->wthread_mtx, 716 0, "h2bioqw", 0); 717 } 718 last_ip = NULL; 719 parent = NULL; 720 parentp = &parent; 721 722 while ((bio = bioq_takefirst(&hmp->wthread_bioq)) != NULL) { 723 mtx_unlock(&hmp->wthread_mtx); 724 725 error = 0; 726 bp = bio->bio_buf; 727 vp = bp->b_vp; 728 ip = VTOI(vp); 729 730 /* 731 * Cache transaction for multi-buffer flush efficiency. 732 * Lock the ip separately for each buffer to allow 733 * interleaving with frontend writes. 734 */ 735 if (last_ip != ip) { 736 if (last_ip) 737 hammer2_trans_done(&trans); 738 hammer2_trans_init(&trans, ip->pmp, 739 HAMMER2_TRANS_BUFCACHE); 740 last_ip = ip; 741 } 742 parent = hammer2_inode_lock_ex(ip); 743 744 /* 745 * Inode is modified, flush size and mtime changes 746 * to ensure that the file size remains consistent 747 * with the buffers being flushed. 748 */ 749 if (ip->flags & (HAMMER2_INODE_RESIZED | 750 HAMMER2_INODE_MTIME)) { 751 hammer2_inode_fsync(&trans, ip, parentp); 752 } 753 ipdata = hammer2_chain_modify_ip(&trans, ip, 754 parentp, 0); 755 lblksize = hammer2_calc_logical(ip, bio->bio_offset, 756 &lbase, NULL); 757 pblksize = hammer2_calc_physical(ip, lbase); 758 hammer2_write_file_core(bp, &trans, ip, ipdata, 759 parentp, 760 lbase, IO_ASYNC, 761 pblksize, &error); 762 hammer2_inode_unlock_ex(ip, parent); 763 if (error) { 764 kprintf("hammer2: error in buffer write\n"); 765 bp->b_flags |= B_ERROR; 766 bp->b_error = EIO; 767 } 768 biodone(bio); 769 mtx_lock(&hmp->wthread_mtx); 770 } 771 772 /* 773 * Clean out transaction cache 774 */ 775 if (last_ip) 776 hammer2_trans_done(&trans); 777 } 778 hmp->wthread_destroy = -1; 779 wakeup(&hmp->wthread_destroy); 780 781 mtx_unlock(&hmp->wthread_mtx); 782 } 783 784 /* 785 * Return a chain suitable for I/O, creating the chain if necessary 786 * and assigning its physical block. 787 */ 788 static 789 hammer2_chain_t * 790 hammer2_assign_physical(hammer2_trans_t *trans, 791 hammer2_inode_t *ip, hammer2_chain_t **parentp, 792 hammer2_key_t lbase, int pblksize, int *errorp) 793 { 794 hammer2_chain_t *parent; 795 hammer2_chain_t *chain; 796 hammer2_off_t pbase; 797 hammer2_key_t key_dummy; 798 int pradix = hammer2_getradix(pblksize); 799 int cache_index = -1; 800 801 /* 802 * Locate the chain associated with lbase, return a locked chain. 803 * However, do not instantiate any data reference (which utilizes a 804 * device buffer) because we will be using direct IO via the 805 * logical buffer cache buffer. 806 */ 807 *errorp = 0; 808 KKASSERT(pblksize >= HAMMER2_MIN_ALLOC); 809 retry: 810 parent = *parentp; 811 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); /* extra lock */ 812 chain = hammer2_chain_lookup(&parent, &key_dummy, 813 lbase, lbase, 814 &cache_index, HAMMER2_LOOKUP_NODATA); 815 816 if (chain == NULL) { 817 /* 818 * We found a hole, create a new chain entry. 819 * 820 * NOTE: DATA chains are created without device backing 821 * store (nor do we want any). 822 */ 823 *errorp = hammer2_chain_create(trans, &parent, &chain, 824 lbase, HAMMER2_PBUFRADIX, 825 HAMMER2_BREF_TYPE_DATA, 826 pblksize); 827 if (chain == NULL) { 828 hammer2_chain_lookup_done(parent); 829 panic("hammer2_chain_create: par=%p error=%d\n", 830 parent, *errorp); 831 goto retry; 832 } 833 834 pbase = chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX; 835 /*ip->delta_dcount += pblksize;*/ 836 } else { 837 switch (chain->bref.type) { 838 case HAMMER2_BREF_TYPE_INODE: 839 /* 840 * The data is embedded in the inode. The 841 * caller is responsible for marking the inode 842 * modified and copying the data to the embedded 843 * area. 844 */ 845 pbase = NOOFFSET; 846 break; 847 case HAMMER2_BREF_TYPE_DATA: 848 if (chain->bytes != pblksize) { 849 hammer2_chain_resize(trans, ip, 850 parent, &chain, 851 pradix, 852 HAMMER2_MODIFY_OPTDATA); 853 } 854 hammer2_chain_modify(trans, &chain, 855 HAMMER2_MODIFY_OPTDATA); 856 pbase = chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX; 857 break; 858 default: 859 panic("hammer2_assign_physical: bad type"); 860 /* NOT REACHED */ 861 pbase = NOOFFSET; 862 break; 863 } 864 } 865 866 /* 867 * Cleanup. If chain wound up being the inode (i.e. DIRECTDATA), 868 * we might have to replace *parentp. 869 */ 870 hammer2_chain_lookup_done(parent); 871 if (chain) { 872 if (*parentp != chain && 873 (*parentp)->core == chain->core) { 874 parent = *parentp; 875 *parentp = chain; /* eats lock */ 876 hammer2_chain_unlock(parent); 877 hammer2_chain_lock(chain, 0); /* need another */ 878 } 879 /* else chain already locked for return */ 880 } 881 return (chain); 882 } 883 884 /* 885 * From hammer2_vnops.c. 886 * The core write function which determines which path to take 887 * depending on compression settings. 888 */ 889 static 890 void 891 hammer2_write_file_core(struct buf *bp, hammer2_trans_t *trans, 892 hammer2_inode_t *ip, hammer2_inode_data_t *ipdata, 893 hammer2_chain_t **parentp, 894 hammer2_key_t lbase, int ioflag, int pblksize, 895 int *errorp) 896 { 897 hammer2_chain_t *chain; 898 899 switch(HAMMER2_DEC_COMP(ipdata->comp_algo)) { 900 case HAMMER2_COMP_NONE: 901 /* 902 * We have to assign physical storage to the buffer 903 * we intend to dirty or write now to avoid deadlocks 904 * in the strategy code later. 905 * 906 * This can return NOOFFSET for inode-embedded data. 907 * The strategy code will take care of it in that case. 908 */ 909 chain = hammer2_assign_physical(trans, ip, parentp, 910 lbase, pblksize, 911 errorp); 912 hammer2_write_bp(chain, bp, ioflag, pblksize, errorp); 913 if (chain) 914 hammer2_chain_unlock(chain); 915 break; 916 case HAMMER2_COMP_AUTOZERO: 917 /* 918 * Check for zero-fill only 919 */ 920 hammer2_zero_check_and_write(bp, trans, ip, 921 ipdata, parentp, lbase, 922 ioflag, pblksize, errorp); 923 break; 924 case HAMMER2_COMP_LZ4: 925 case HAMMER2_COMP_ZLIB: 926 default: 927 /* 928 * Check for zero-fill and attempt compression. 929 */ 930 hammer2_compress_and_write(bp, trans, ip, 931 ipdata, parentp, 932 lbase, ioflag, 933 pblksize, errorp, 934 ipdata->comp_algo); 935 break; 936 } 937 ipdata = &ip->chain->data->ipdata; /* reload */ 938 } 939 940 /* 941 * From hammer2_vnops.c 942 * Generic function that will perform the compression in compression 943 * write path. The compression algorithm is determined by the settings 944 * obtained from inode. 945 */ 946 static 947 void 948 hammer2_compress_and_write(struct buf *bp, hammer2_trans_t *trans, 949 hammer2_inode_t *ip, hammer2_inode_data_t *ipdata, 950 hammer2_chain_t **parentp, 951 hammer2_key_t lbase, int ioflag, int pblksize, 952 int *errorp, int comp_algo) 953 { 954 hammer2_chain_t *chain; 955 int comp_size; 956 int comp_block_size; 957 char *comp_buffer; 958 959 if (test_block_zeros(bp->b_data, pblksize)) { 960 zero_write(bp, trans, ip, ipdata, parentp, lbase, errorp); 961 return; 962 } 963 964 comp_size = 0; 965 comp_buffer = NULL; 966 967 KKASSERT(pblksize / 2 <= 32768); 968 969 if (ip->comp_heuristic < 8 || (ip->comp_heuristic & 7) == 0) { 970 z_stream strm_compress; 971 int comp_level; 972 int ret; 973 974 switch(HAMMER2_DEC_COMP(comp_algo)) { 975 case HAMMER2_COMP_LZ4: 976 comp_buffer = objcache_get(cache_buffer_write, 977 M_INTWAIT); 978 comp_size = LZ4_compress_limitedOutput( 979 bp->b_data, 980 &comp_buffer[sizeof(int)], 981 pblksize, 982 pblksize / 2 - sizeof(int)); 983 /* 984 * We need to prefix with the size, LZ4 985 * doesn't do it for us. Add the related 986 * overhead. 987 */ 988 *(int *)comp_buffer = comp_size; 989 if (comp_size) 990 comp_size += sizeof(int); 991 break; 992 case HAMMER2_COMP_ZLIB: 993 comp_level = HAMMER2_DEC_LEVEL(comp_algo); 994 if (comp_level == 0) 995 comp_level = 6; /* default zlib compression */ 996 else if (comp_level < 6) 997 comp_level = 6; 998 else if (comp_level > 9) 999 comp_level = 9; 1000 ret = deflateInit(&strm_compress, comp_level); 1001 if (ret != Z_OK) { 1002 kprintf("HAMMER2 ZLIB: fatal error " 1003 "on deflateInit.\n"); 1004 } 1005 1006 comp_buffer = objcache_get(cache_buffer_write, 1007 M_INTWAIT); 1008 strm_compress.next_in = bp->b_data; 1009 strm_compress.avail_in = pblksize; 1010 strm_compress.next_out = comp_buffer; 1011 strm_compress.avail_out = pblksize / 2; 1012 ret = deflate(&strm_compress, Z_FINISH); 1013 if (ret == Z_STREAM_END) { 1014 comp_size = pblksize / 2 - 1015 strm_compress.avail_out; 1016 } else { 1017 comp_size = 0; 1018 } 1019 ret = deflateEnd(&strm_compress); 1020 break; 1021 default: 1022 kprintf("Error: Unknown compression method.\n"); 1023 kprintf("Comp_method = %d.\n", comp_algo); 1024 break; 1025 } 1026 } 1027 1028 if (comp_size == 0) { 1029 /* 1030 * compression failed or turned off 1031 */ 1032 comp_block_size = pblksize; /* safety */ 1033 if (++ip->comp_heuristic > 128) 1034 ip->comp_heuristic = 8; 1035 } else { 1036 /* 1037 * compression succeeded 1038 */ 1039 ip->comp_heuristic = 0; 1040 if (comp_size <= 1024) { 1041 comp_block_size = 1024; 1042 } else if (comp_size <= 2048) { 1043 comp_block_size = 2048; 1044 } else if (comp_size <= 4096) { 1045 comp_block_size = 4096; 1046 } else if (comp_size <= 8192) { 1047 comp_block_size = 8192; 1048 } else if (comp_size <= 16384) { 1049 comp_block_size = 16384; 1050 } else if (comp_size <= 32768) { 1051 comp_block_size = 32768; 1052 } else { 1053 panic("hammer2: WRITE PATH: " 1054 "Weird comp_size value."); 1055 /* NOT REACHED */ 1056 comp_block_size = pblksize; 1057 } 1058 } 1059 1060 chain = hammer2_assign_physical(trans, ip, parentp, 1061 lbase, comp_block_size, 1062 errorp); 1063 ipdata = &ip->chain->data->ipdata; /* RELOAD */ 1064 1065 if (*errorp) { 1066 kprintf("WRITE PATH: An error occurred while " 1067 "assigning physical space.\n"); 1068 KKASSERT(chain == NULL); 1069 } else { 1070 /* Get device offset */ 1071 hammer2_off_t pbase; 1072 hammer2_off_t pmask; 1073 hammer2_off_t peof; 1074 size_t boff; 1075 size_t psize; 1076 struct buf *dbp; 1077 int temp_check; 1078 1079 KKASSERT(chain->flags & HAMMER2_CHAIN_MODIFIED); 1080 1081 switch(chain->bref.type) { 1082 case HAMMER2_BREF_TYPE_INODE: 1083 KKASSERT(chain->data->ipdata.op_flags & 1084 HAMMER2_OPFLAG_DIRECTDATA); 1085 KKASSERT(bp->b_loffset == 0); 1086 bcopy(bp->b_data, chain->data->ipdata.u.data, 1087 HAMMER2_EMBEDDED_BYTES); 1088 break; 1089 case HAMMER2_BREF_TYPE_DATA: 1090 psize = hammer2_devblksize(chain->bytes); 1091 pmask = (hammer2_off_t)psize - 1; 1092 pbase = chain->bref.data_off & ~pmask; 1093 boff = chain->bref.data_off & 1094 (HAMMER2_OFF_MASK & pmask); 1095 peof = (pbase + HAMMER2_SEGMASK64) & 1096 ~HAMMER2_SEGMASK64; 1097 temp_check = HAMMER2_DEC_CHECK(chain->bref.methods); 1098 1099 /* 1100 * Optimize out the read-before-write 1101 * if possible. 1102 */ 1103 if (comp_block_size == psize) { 1104 dbp = getblk(chain->hmp->devvp, pbase, 1105 psize, 0, 0); 1106 } else { 1107 *errorp = bread(chain->hmp->devvp, 1108 pbase, psize, &dbp); 1109 if (*errorp) { 1110 kprintf("hammer2: WRITE PATH: " 1111 "dbp bread error\n"); 1112 break; 1113 } 1114 } 1115 1116 /* 1117 * When loading the block make sure we don't 1118 * leave garbage after the compressed data. 1119 */ 1120 if (comp_size) { 1121 chain->bref.methods = 1122 HAMMER2_ENC_COMP(comp_algo) + 1123 HAMMER2_ENC_CHECK(temp_check); 1124 bcopy(comp_buffer, dbp->b_data + boff, 1125 comp_size); 1126 if (comp_size != comp_block_size) { 1127 bzero(dbp->b_data + boff + 1128 comp_size, 1129 comp_block_size - 1130 comp_size); 1131 } 1132 } else { 1133 chain->bref.methods = 1134 HAMMER2_ENC_COMP( 1135 HAMMER2_COMP_NONE) + 1136 HAMMER2_ENC_CHECK(temp_check); 1137 bcopy(bp->b_data, dbp->b_data + boff, 1138 pblksize); 1139 } 1140 1141 /* 1142 * Device buffer is now valid, chain is no 1143 * longer in the initial state. 1144 */ 1145 atomic_clear_int(&chain->flags, 1146 HAMMER2_CHAIN_INITIAL); 1147 1148 /* Now write the related bdp. */ 1149 if (ioflag & IO_SYNC) { 1150 /* 1151 * Synchronous I/O requested. 1152 */ 1153 bwrite(dbp); 1154 /* 1155 } else if ((ioflag & IO_DIRECT) && 1156 loff + n == pblksize) { 1157 bdwrite(dbp); 1158 */ 1159 } else if (ioflag & IO_ASYNC) { 1160 bawrite(dbp); 1161 } else if (hammer2_cluster_enable) { 1162 cluster_write(dbp, peof, 1163 HAMMER2_PBUFSIZE, 1164 4/*XXX*/); 1165 } else { 1166 bdwrite(dbp); 1167 } 1168 break; 1169 default: 1170 panic("hammer2_write_bp: bad chain type %d\n", 1171 chain->bref.type); 1172 /* NOT REACHED */ 1173 break; 1174 } 1175 1176 hammer2_chain_unlock(chain); 1177 } 1178 if (comp_buffer) 1179 objcache_put(cache_buffer_write, comp_buffer); 1180 } 1181 1182 /* 1183 * Function that performs zero-checking and writing without compression, 1184 * it corresponds to default zero-checking path. 1185 */ 1186 static 1187 void 1188 hammer2_zero_check_and_write(struct buf *bp, hammer2_trans_t *trans, 1189 hammer2_inode_t *ip, hammer2_inode_data_t *ipdata, 1190 hammer2_chain_t **parentp, 1191 hammer2_key_t lbase, int ioflag, int pblksize, int *errorp) 1192 { 1193 hammer2_chain_t *chain; 1194 1195 if (test_block_zeros(bp->b_data, pblksize)) { 1196 zero_write(bp, trans, ip, ipdata, parentp, lbase, errorp); 1197 } else { 1198 chain = hammer2_assign_physical(trans, ip, parentp, 1199 lbase, pblksize, errorp); 1200 hammer2_write_bp(chain, bp, ioflag, pblksize, errorp); 1201 if (chain) 1202 hammer2_chain_unlock(chain); 1203 } 1204 } 1205 1206 /* 1207 * A function to test whether a block of data contains only zeros, 1208 * returns TRUE (non-zero) if the block is all zeros. 1209 */ 1210 static 1211 int 1212 test_block_zeros(const char *buf, size_t bytes) 1213 { 1214 size_t i; 1215 1216 for (i = 0; i < bytes; i += sizeof(long)) { 1217 if (*(const long *)(buf + i) != 0) 1218 return (0); 1219 } 1220 return (1); 1221 } 1222 1223 /* 1224 * Function to "write" a block that contains only zeros. 1225 */ 1226 static 1227 void 1228 zero_write(struct buf *bp, hammer2_trans_t *trans, hammer2_inode_t *ip, 1229 hammer2_inode_data_t *ipdata, hammer2_chain_t **parentp, 1230 hammer2_key_t lbase, int *errorp __unused) 1231 { 1232 hammer2_chain_t *parent; 1233 hammer2_chain_t *chain; 1234 hammer2_key_t key_dummy; 1235 int cache_index = -1; 1236 1237 parent = hammer2_chain_lookup_init(*parentp, 0); 1238 1239 chain = hammer2_chain_lookup(&parent, &key_dummy, lbase, lbase, 1240 &cache_index, HAMMER2_LOOKUP_NODATA); 1241 if (chain) { 1242 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 1243 bzero(chain->data->ipdata.u.data, 1244 HAMMER2_EMBEDDED_BYTES); 1245 } else { 1246 hammer2_chain_delete(trans, chain, 0); 1247 } 1248 hammer2_chain_unlock(chain); 1249 } 1250 hammer2_chain_lookup_done(parent); 1251 } 1252 1253 /* 1254 * Function to write the data as it is, without performing any sort of 1255 * compression. This function is used in path without compression and 1256 * default zero-checking path. 1257 */ 1258 static 1259 void 1260 hammer2_write_bp(hammer2_chain_t *chain, struct buf *bp, int ioflag, 1261 int pblksize, int *errorp) 1262 { 1263 hammer2_off_t pbase; 1264 hammer2_off_t pmask; 1265 hammer2_off_t peof; 1266 struct buf *dbp; 1267 size_t boff; 1268 size_t psize; 1269 int error; 1270 int temp_check = HAMMER2_DEC_CHECK(chain->bref.methods); 1271 1272 KKASSERT(chain->flags & HAMMER2_CHAIN_MODIFIED); 1273 1274 switch(chain->bref.type) { 1275 case HAMMER2_BREF_TYPE_INODE: 1276 KKASSERT(chain->data->ipdata.op_flags & 1277 HAMMER2_OPFLAG_DIRECTDATA); 1278 KKASSERT(bp->b_loffset == 0); 1279 bcopy(bp->b_data, chain->data->ipdata.u.data, 1280 HAMMER2_EMBEDDED_BYTES); 1281 error = 0; 1282 break; 1283 case HAMMER2_BREF_TYPE_DATA: 1284 psize = hammer2_devblksize(chain->bytes); 1285 pmask = (hammer2_off_t)psize - 1; 1286 pbase = chain->bref.data_off & ~pmask; 1287 boff = chain->bref.data_off & (HAMMER2_OFF_MASK & pmask); 1288 peof = (pbase + HAMMER2_SEGMASK64) & ~HAMMER2_SEGMASK64; 1289 1290 if (psize == pblksize) { 1291 dbp = getblk(chain->hmp->devvp, pbase, 1292 psize, 0, 0); 1293 error = 0; 1294 } else { 1295 error = bread(chain->hmp->devvp, pbase, psize, &dbp); 1296 if (error) { 1297 kprintf("hammer2: WRITE PATH: " 1298 "dbp bread error\n"); 1299 break; 1300 } 1301 } 1302 1303 chain->bref.methods = HAMMER2_ENC_COMP(HAMMER2_COMP_NONE) + 1304 HAMMER2_ENC_CHECK(temp_check); 1305 bcopy(bp->b_data, dbp->b_data + boff, chain->bytes); 1306 1307 /* 1308 * Device buffer is now valid, chain is no 1309 * longer in the initial state. 1310 */ 1311 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 1312 1313 if (ioflag & IO_SYNC) { 1314 /* 1315 * Synchronous I/O requested. 1316 */ 1317 bwrite(dbp); 1318 /* 1319 } else if ((ioflag & IO_DIRECT) && loff + n == pblksize) { 1320 bdwrite(dbp); 1321 */ 1322 } else if (ioflag & IO_ASYNC) { 1323 bawrite(dbp); 1324 } else if (hammer2_cluster_enable) { 1325 cluster_write(dbp, peof, HAMMER2_PBUFSIZE, 4/*XXX*/); 1326 } else { 1327 bdwrite(dbp); 1328 } 1329 break; 1330 default: 1331 panic("hammer2_write_bp: bad chain type %d\n", 1332 chain->bref.type); 1333 /* NOT REACHED */ 1334 error = 0; 1335 break; 1336 } 1337 *errorp = error; 1338 } 1339 1340 static 1341 int 1342 hammer2_remount(struct mount *mp, char *path, struct vnode *devvp, 1343 struct ucred *cred) 1344 { 1345 return (0); 1346 } 1347 1348 static 1349 int 1350 hammer2_vfs_unmount(struct mount *mp, int mntflags) 1351 { 1352 hammer2_pfsmount_t *pmp; 1353 hammer2_mount_t *hmp; 1354 hammer2_cluster_t *cluster; 1355 int flags; 1356 int error = 0; 1357 int ronly = ((mp->mnt_flag & MNT_RDONLY) != 0); 1358 int dumpcnt; 1359 struct vnode *devvp; 1360 1361 pmp = MPTOPMP(mp); 1362 cluster = pmp->mount_cluster; 1363 hmp = cluster->hmp; 1364 flags = 0; 1365 1366 if (mntflags & MNT_FORCE) 1367 flags |= FORCECLOSE; 1368 1369 hammer2_mount_exlock(hmp); 1370 1371 /* 1372 * If mount initialization proceeded far enough we must flush 1373 * its vnodes. 1374 */ 1375 if (pmp->iroot) 1376 error = vflush(mp, 0, flags); 1377 1378 if (error) { 1379 hammer2_mount_unlock(hmp); 1380 return error; 1381 } 1382 1383 lockmgr(&hammer2_mntlk, LK_EXCLUSIVE); 1384 --hmp->pmp_count; 1385 kprintf("hammer2_unmount hmp=%p pmpcnt=%d\n", hmp, hmp->pmp_count); 1386 1387 /* 1388 * Flush any left over chains. The voldata lock is only used 1389 * to synchronize against HAMMER2_CHAIN_MODIFIED_AUX. 1390 */ 1391 hammer2_voldata_lock(hmp); 1392 if ((hmp->vchain.flags | hmp->fchain.flags) & 1393 (HAMMER2_CHAIN_MODIFIED | HAMMER2_CHAIN_SUBMODIFIED)) { 1394 hammer2_voldata_unlock(hmp, 0); 1395 hammer2_vfs_sync(mp, MNT_WAIT); 1396 hammer2_vfs_sync(mp, MNT_WAIT); 1397 } else { 1398 hammer2_voldata_unlock(hmp, 0); 1399 } 1400 if (hmp->pmp_count == 0) { 1401 if (hmp->vchain.flags & (HAMMER2_CHAIN_MODIFIED | 1402 HAMMER2_CHAIN_SUBMODIFIED)) { 1403 kprintf("hammer2_unmount: chains left over after " 1404 "final sync\n"); 1405 if (hammer2_debug & 0x0010) 1406 Debugger("entered debugger"); 1407 } 1408 } 1409 1410 /* 1411 * Cleanup the root and super-root chain elements (which should be 1412 * clean). 1413 */ 1414 if (pmp->iroot) { 1415 #if REPORT_REFS_ERRORS 1416 if (pmp->iroot->refs != 1) 1417 kprintf("PMP->IROOT %p REFS WRONG %d\n", 1418 pmp->iroot, pmp->iroot->refs); 1419 #else 1420 KKASSERT(pmp->iroot->refs == 1); 1421 #endif 1422 hammer2_inode_drop(pmp->iroot); /* ref for pmp->iroot */ 1423 pmp->iroot = NULL; 1424 } 1425 if (cluster->rchain) { 1426 atomic_clear_int(&cluster->rchain->flags, 1427 HAMMER2_CHAIN_MOUNTED); 1428 #if REPORT_REFS_ERRORS 1429 if (cluster->rchain->refs != 1) 1430 kprintf("PMP->RCHAIN %p REFS WRONG %d\n", 1431 cluster->rchain, cluster->rchain->refs); 1432 #else 1433 KKASSERT(cluster->rchain->refs == 1); 1434 #endif 1435 hammer2_chain_drop(cluster->rchain); 1436 cluster->rchain = NULL; 1437 } 1438 ccms_domain_uninit(&pmp->ccms_dom); 1439 1440 /* 1441 * Kill cluster controller 1442 */ 1443 kdmsg_iocom_uninit(&pmp->iocom); 1444 1445 /* 1446 * If no PFS's left drop the master hammer2_mount for the device. 1447 */ 1448 if (hmp->pmp_count == 0) { 1449 if (hmp->sroot) { 1450 hammer2_inode_drop(hmp->sroot); 1451 hmp->sroot = NULL; 1452 } 1453 if (hmp->schain) { 1454 #if REPORT_REFS_ERRORS 1455 if (hmp->schain->refs != 1) 1456 kprintf("HMP->SCHAIN %p REFS WRONG %d\n", 1457 hmp->schain, hmp->schain->refs); 1458 #else 1459 KKASSERT(hmp->schain->refs == 1); 1460 #endif 1461 hammer2_chain_drop(hmp->schain); 1462 hmp->schain = NULL; 1463 } 1464 1465 /* 1466 * Finish up with the device vnode 1467 */ 1468 if ((devvp = hmp->devvp) != NULL) { 1469 vinvalbuf(devvp, (ronly ? 0 : V_SAVE), 0, 0); 1470 hmp->devvp = NULL; 1471 VOP_CLOSE(devvp, (ronly ? FREAD : FREAD|FWRITE)); 1472 vrele(devvp); 1473 devvp = NULL; 1474 } 1475 1476 /* 1477 * Final drop of embedded freemap root chain to clean up 1478 * fchain.core (fchain structure is not flagged ALLOCATED 1479 * so it is cleaned out and then left to rot). 1480 */ 1481 hammer2_chain_drop(&hmp->fchain); 1482 1483 /* 1484 * Final drop of embedded volume root chain to clean up 1485 * vchain.core (vchain structure is not flagged ALLOCATED 1486 * so it is cleaned out and then left to rot). 1487 */ 1488 dumpcnt = 50; 1489 hammer2_dump_chain(&hmp->vchain, 0, &dumpcnt); 1490 hammer2_mount_unlock(hmp); 1491 hammer2_chain_drop(&hmp->vchain); 1492 } else { 1493 hammer2_mount_unlock(hmp); 1494 } 1495 1496 pmp->mp = NULL; 1497 mp->mnt_data = NULL; 1498 1499 pmp->mount_cluster = NULL; 1500 pmp->cluster = NULL; /* XXX */ 1501 1502 kmalloc_destroy(&pmp->mmsg); 1503 kmalloc_destroy(&pmp->minode); 1504 1505 cluster->hmp = NULL; 1506 1507 kfree(cluster, M_HAMMER2); 1508 kfree(pmp, M_HAMMER2); 1509 if (hmp->pmp_count == 0) { 1510 mtx_lock(&hmp->wthread_mtx); 1511 hmp->wthread_destroy = 1; 1512 wakeup(&hmp->wthread_bioq); 1513 while (hmp->wthread_destroy != -1) { 1514 mtxsleep(&hmp->wthread_destroy, &hmp->wthread_mtx, 0, 1515 "umount-sleep", 0); 1516 } 1517 mtx_unlock(&hmp->wthread_mtx); 1518 1519 TAILQ_REMOVE(&hammer2_mntlist, hmp, mntentry); 1520 kmalloc_destroy(&hmp->mchain); 1521 kfree(hmp, M_HAMMER2); 1522 } 1523 lockmgr(&hammer2_mntlk, LK_RELEASE); 1524 1525 return (error); 1526 } 1527 1528 static 1529 int 1530 hammer2_vfs_vget(struct mount *mp, struct vnode *dvp, 1531 ino_t ino, struct vnode **vpp) 1532 { 1533 kprintf("hammer2_vget\n"); 1534 return (EOPNOTSUPP); 1535 } 1536 1537 static 1538 int 1539 hammer2_vfs_root(struct mount *mp, struct vnode **vpp) 1540 { 1541 hammer2_pfsmount_t *pmp; 1542 hammer2_chain_t *parent; 1543 int error; 1544 struct vnode *vp; 1545 1546 pmp = MPTOPMP(mp); 1547 if (pmp->iroot == NULL) { 1548 *vpp = NULL; 1549 error = EINVAL; 1550 } else { 1551 parent = hammer2_inode_lock_sh(pmp->iroot); 1552 vp = hammer2_igetv(pmp->iroot, &error); 1553 hammer2_inode_unlock_sh(pmp->iroot, parent); 1554 *vpp = vp; 1555 if (vp == NULL) 1556 kprintf("vnodefail\n"); 1557 } 1558 1559 return (error); 1560 } 1561 1562 /* 1563 * Filesystem status 1564 * 1565 * XXX incorporate ipdata->inode_quota and data_quota 1566 */ 1567 static 1568 int 1569 hammer2_vfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred) 1570 { 1571 hammer2_pfsmount_t *pmp; 1572 hammer2_mount_t *hmp; 1573 1574 pmp = MPTOPMP(mp); 1575 hmp = MPTOHMP(mp); 1576 1577 mp->mnt_stat.f_files = pmp->inode_count; 1578 mp->mnt_stat.f_ffree = 0; 1579 mp->mnt_stat.f_blocks = hmp->voldata.allocator_size / HAMMER2_PBUFSIZE; 1580 mp->mnt_stat.f_bfree = hmp->voldata.allocator_free / HAMMER2_PBUFSIZE; 1581 mp->mnt_stat.f_bavail = mp->mnt_stat.f_bfree; 1582 1583 *sbp = mp->mnt_stat; 1584 return (0); 1585 } 1586 1587 static 1588 int 1589 hammer2_vfs_statvfs(struct mount *mp, struct statvfs *sbp, struct ucred *cred) 1590 { 1591 hammer2_pfsmount_t *pmp; 1592 hammer2_mount_t *hmp; 1593 1594 pmp = MPTOPMP(mp); 1595 hmp = MPTOHMP(mp); 1596 1597 mp->mnt_vstat.f_bsize = HAMMER2_PBUFSIZE; 1598 mp->mnt_vstat.f_files = pmp->inode_count; 1599 mp->mnt_vstat.f_ffree = 0; 1600 mp->mnt_vstat.f_blocks = hmp->voldata.allocator_size / HAMMER2_PBUFSIZE; 1601 mp->mnt_vstat.f_bfree = hmp->voldata.allocator_free / HAMMER2_PBUFSIZE; 1602 mp->mnt_vstat.f_bavail = mp->mnt_vstat.f_bfree; 1603 1604 *sbp = mp->mnt_vstat; 1605 return (0); 1606 } 1607 1608 /* 1609 * Sync the entire filesystem; this is called from the filesystem syncer 1610 * process periodically and whenever a user calls sync(1) on the hammer 1611 * mountpoint. 1612 * 1613 * Currently is actually called from the syncer! \o/ 1614 * 1615 * This task will have to snapshot the state of the dirty inode chain. 1616 * From that, it will have to make sure all of the inodes on the dirty 1617 * chain have IO initiated. We make sure that io is initiated for the root 1618 * block. 1619 * 1620 * If waitfor is set, we wait for media to acknowledge the new rootblock. 1621 * 1622 * THINKS: side A vs side B, to have sync not stall all I/O? 1623 */ 1624 static 1625 int 1626 hammer2_vfs_sync(struct mount *mp, int waitfor) 1627 { 1628 struct hammer2_sync_info info; 1629 hammer2_pfsmount_t *pmp; 1630 hammer2_cluster_t *cluster; 1631 hammer2_mount_t *hmp; 1632 int flags; 1633 int error; 1634 int i; 1635 1636 pmp = MPTOPMP(mp); 1637 1638 /* 1639 * We can't acquire locks on existing vnodes while in a transaction 1640 * without risking a deadlock. This assumes that vfsync() can be 1641 * called without the vnode locked (which it can in DragonFly). 1642 * Otherwise we'd have to implement a multi-pass or flag the lock 1643 * failures and retry. 1644 */ 1645 /*flags = VMSC_GETVP;*/ 1646 flags = 0; 1647 if (waitfor & MNT_LAZY) 1648 flags |= VMSC_ONEPASS; 1649 1650 hammer2_trans_init(&info.trans, pmp, HAMMER2_TRANS_ISFLUSH); 1651 1652 info.error = 0; 1653 info.waitfor = MNT_NOWAIT; 1654 vmntvnodescan(mp, flags | VMSC_NOWAIT, 1655 hammer2_sync_scan1, 1656 hammer2_sync_scan2, &info); 1657 if (info.error == 0 && (waitfor & MNT_WAIT)) { 1658 info.waitfor = waitfor; 1659 vmntvnodescan(mp, flags, 1660 hammer2_sync_scan1, 1661 hammer2_sync_scan2, &info); 1662 1663 } 1664 #if 0 1665 if (waitfor == MNT_WAIT) { 1666 /* XXX */ 1667 } else { 1668 /* XXX */ 1669 } 1670 #endif 1671 1672 cluster = pmp->cluster; 1673 hmp = cluster->hmp; 1674 1675 hammer2_chain_lock(&hmp->vchain, HAMMER2_RESOLVE_ALWAYS); 1676 if (hmp->vchain.flags & (HAMMER2_CHAIN_MODIFIED | 1677 HAMMER2_CHAIN_SUBMODIFIED)) { 1678 hammer2_chain_flush(&info.trans, &hmp->vchain); 1679 } 1680 hammer2_chain_unlock(&hmp->vchain); 1681 1682 #if 1 1683 /* 1684 * Rollup flush. The fsyncs above basically just flushed 1685 * data blocks. The flush below gets all the meta-data. 1686 */ 1687 hammer2_chain_lock(&hmp->fchain, HAMMER2_RESOLVE_ALWAYS); 1688 if (hmp->fchain.flags & (HAMMER2_CHAIN_MODIFIED | 1689 HAMMER2_CHAIN_SUBMODIFIED)) { 1690 /* this will modify vchain as a side effect */ 1691 hammer2_chain_flush(&info.trans, &hmp->fchain); 1692 } 1693 hammer2_chain_unlock(&hmp->fchain); 1694 #endif 1695 1696 1697 error = 0; 1698 1699 /* 1700 * We can't safely flush the volume header until we have 1701 * flushed any device buffers which have built up. 1702 * 1703 * XXX this isn't being incremental 1704 */ 1705 vn_lock(hmp->devvp, LK_EXCLUSIVE | LK_RETRY); 1706 error = VOP_FSYNC(hmp->devvp, MNT_WAIT, 0); 1707 vn_unlock(hmp->devvp); 1708 1709 /* 1710 * The flush code sets CHAIN_VOLUMESYNC to indicate that the 1711 * volume header needs synchronization via hmp->volsync. 1712 * 1713 * XXX synchronize the flag & data with only this flush XXX 1714 */ 1715 if (error == 0 && (hmp->vchain.flags & HAMMER2_CHAIN_VOLUMESYNC)) { 1716 struct buf *bp; 1717 1718 /* 1719 * Synchronize the disk before flushing the volume 1720 * header. 1721 */ 1722 bp = getpbuf(NULL); 1723 bp->b_bio1.bio_offset = 0; 1724 bp->b_bufsize = 0; 1725 bp->b_bcount = 0; 1726 bp->b_cmd = BUF_CMD_FLUSH; 1727 bp->b_bio1.bio_done = biodone_sync; 1728 bp->b_bio1.bio_flags |= BIO_SYNC; 1729 vn_strategy(hmp->devvp, &bp->b_bio1); 1730 biowait(&bp->b_bio1, "h2vol"); 1731 relpbuf(bp, NULL); 1732 1733 /* 1734 * Then we can safely flush the version of the volume header 1735 * synchronized by the flush code. 1736 */ 1737 i = hmp->volhdrno + 1; 1738 if (i >= HAMMER2_NUM_VOLHDRS) 1739 i = 0; 1740 if (i * HAMMER2_ZONE_BYTES64 + HAMMER2_SEGSIZE > 1741 hmp->volsync.volu_size) { 1742 i = 0; 1743 } 1744 kprintf("sync volhdr %d %jd\n", 1745 i, (intmax_t)hmp->volsync.volu_size); 1746 bp = getblk(hmp->devvp, i * HAMMER2_ZONE_BYTES64, 1747 HAMMER2_PBUFSIZE, 0, 0); 1748 atomic_clear_int(&hmp->vchain.flags, HAMMER2_CHAIN_VOLUMESYNC); 1749 bcopy(&hmp->volsync, bp->b_data, HAMMER2_PBUFSIZE); 1750 bawrite(bp); 1751 hmp->volhdrno = i; 1752 } 1753 hammer2_trans_done(&info.trans); 1754 return (error); 1755 } 1756 1757 /* 1758 * Sync passes. 1759 * 1760 * NOTE: We don't test SUBMODIFIED or MOVED here because the fsync code 1761 * won't flush on those flags. The syncer code above will do a 1762 * general meta-data flush globally that will catch these flags. 1763 */ 1764 static int 1765 hammer2_sync_scan1(struct mount *mp, struct vnode *vp, void *data) 1766 { 1767 hammer2_inode_t *ip; 1768 1769 ip = VTOI(vp); 1770 if (vp->v_type == VNON || ip == NULL || 1771 ((ip->flags & HAMMER2_INODE_MODIFIED) == 0 && 1772 RB_EMPTY(&vp->v_rbdirty_tree))) { 1773 return(-1); 1774 } 1775 return(0); 1776 } 1777 1778 static int 1779 hammer2_sync_scan2(struct mount *mp, struct vnode *vp, void *data) 1780 { 1781 struct hammer2_sync_info *info = data; 1782 hammer2_inode_t *ip; 1783 hammer2_chain_t *parent; 1784 int error; 1785 1786 ip = VTOI(vp); 1787 if (vp->v_type == VNON || vp->v_type == VBAD || 1788 ((ip->flags & HAMMER2_INODE_MODIFIED) == 0 && 1789 RB_EMPTY(&vp->v_rbdirty_tree))) { 1790 return(0); 1791 } 1792 1793 /* 1794 * VOP_FSYNC will start a new transaction so replicate some code 1795 * here to do it inline (see hammer2_vop_fsync()). 1796 * 1797 * WARNING: The vfsync interacts with the buffer cache and might 1798 * block, we can't hold the inode lock at that time. 1799 */ 1800 atomic_clear_int(&ip->flags, HAMMER2_INODE_MODIFIED); 1801 if (ip->vp) 1802 vfsync(ip->vp, MNT_NOWAIT, 1, NULL, NULL); 1803 parent = hammer2_inode_lock_ex(ip); 1804 hammer2_chain_flush(&info->trans, parent); 1805 hammer2_inode_unlock_ex(ip, parent); 1806 error = 0; 1807 #if 0 1808 error = VOP_FSYNC(vp, MNT_NOWAIT, 0); 1809 #endif 1810 if (error) 1811 info->error = error; 1812 return(0); 1813 } 1814 1815 static 1816 int 1817 hammer2_vfs_vptofh(struct vnode *vp, struct fid *fhp) 1818 { 1819 return (0); 1820 } 1821 1822 static 1823 int 1824 hammer2_vfs_fhtovp(struct mount *mp, struct vnode *rootvp, 1825 struct fid *fhp, struct vnode **vpp) 1826 { 1827 return (0); 1828 } 1829 1830 static 1831 int 1832 hammer2_vfs_checkexp(struct mount *mp, struct sockaddr *nam, 1833 int *exflagsp, struct ucred **credanonp) 1834 { 1835 return (0); 1836 } 1837 1838 /* 1839 * Support code for hammer2_mount(). Read, verify, and install the volume 1840 * header into the HMP 1841 * 1842 * XXX read four volhdrs and use the one with the highest TID whos CRC 1843 * matches. 1844 * 1845 * XXX check iCRCs. 1846 * 1847 * XXX For filesystems w/ less than 4 volhdrs, make sure to not write to 1848 * nonexistant locations. 1849 * 1850 * XXX Record selected volhdr and ring updates to each of 4 volhdrs 1851 */ 1852 static 1853 int 1854 hammer2_install_volume_header(hammer2_mount_t *hmp) 1855 { 1856 hammer2_volume_data_t *vd; 1857 struct buf *bp; 1858 hammer2_crc32_t crc0, crc, bcrc0, bcrc; 1859 int error_reported; 1860 int error; 1861 int valid; 1862 int i; 1863 1864 error_reported = 0; 1865 error = 0; 1866 valid = 0; 1867 bp = NULL; 1868 1869 /* 1870 * There are up to 4 copies of the volume header (syncs iterate 1871 * between them so there is no single master). We don't trust the 1872 * volu_size field so we don't know precisely how large the filesystem 1873 * is, so depend on the OS to return an error if we go beyond the 1874 * block device's EOF. 1875 */ 1876 for (i = 0; i < HAMMER2_NUM_VOLHDRS; i++) { 1877 error = bread(hmp->devvp, i * HAMMER2_ZONE_BYTES64, 1878 HAMMER2_VOLUME_BYTES, &bp); 1879 if (error) { 1880 brelse(bp); 1881 bp = NULL; 1882 continue; 1883 } 1884 1885 vd = (struct hammer2_volume_data *) bp->b_data; 1886 if ((vd->magic != HAMMER2_VOLUME_ID_HBO) && 1887 (vd->magic != HAMMER2_VOLUME_ID_ABO)) { 1888 brelse(bp); 1889 bp = NULL; 1890 continue; 1891 } 1892 1893 if (vd->magic == HAMMER2_VOLUME_ID_ABO) { 1894 /* XXX: Reversed-endianness filesystem */ 1895 kprintf("hammer2: reverse-endian filesystem detected"); 1896 brelse(bp); 1897 bp = NULL; 1898 continue; 1899 } 1900 1901 crc = vd->icrc_sects[HAMMER2_VOL_ICRC_SECT0]; 1902 crc0 = hammer2_icrc32(bp->b_data + HAMMER2_VOLUME_ICRC0_OFF, 1903 HAMMER2_VOLUME_ICRC0_SIZE); 1904 bcrc = vd->icrc_sects[HAMMER2_VOL_ICRC_SECT1]; 1905 bcrc0 = hammer2_icrc32(bp->b_data + HAMMER2_VOLUME_ICRC1_OFF, 1906 HAMMER2_VOLUME_ICRC1_SIZE); 1907 if ((crc0 != crc) || (bcrc0 != bcrc)) { 1908 kprintf("hammer2 volume header crc " 1909 "mismatch copy #%d %08x/%08x\n", 1910 i, crc0, crc); 1911 error_reported = 1; 1912 brelse(bp); 1913 bp = NULL; 1914 continue; 1915 } 1916 if (valid == 0 || hmp->voldata.mirror_tid < vd->mirror_tid) { 1917 valid = 1; 1918 hmp->voldata = *vd; 1919 hmp->volhdrno = i; 1920 } 1921 brelse(bp); 1922 bp = NULL; 1923 } 1924 if (valid) { 1925 hmp->volsync = hmp->voldata; 1926 error = 0; 1927 if (error_reported || bootverbose || 1) { /* 1/DEBUG */ 1928 kprintf("hammer2: using volume header #%d\n", 1929 hmp->volhdrno); 1930 } 1931 } else { 1932 error = EINVAL; 1933 kprintf("hammer2: no valid volume headers found!\n"); 1934 } 1935 return (error); 1936 } 1937 1938 /* 1939 * Reconnect using the passed file pointer. The caller must ref the 1940 * fp for us. 1941 */ 1942 void 1943 hammer2_cluster_reconnect(hammer2_pfsmount_t *pmp, struct file *fp) 1944 { 1945 hammer2_inode_data_t *ipdata; 1946 hammer2_chain_t *parent; 1947 hammer2_mount_t *hmp; 1948 size_t name_len; 1949 1950 hmp = pmp->mount_cluster->hmp; 1951 1952 /* 1953 * Closes old comm descriptor, kills threads, cleans up 1954 * states, then installs the new descriptor and creates 1955 * new threads. 1956 */ 1957 kdmsg_iocom_reconnect(&pmp->iocom, fp, "hammer2"); 1958 1959 /* 1960 * Setup LNK_CONN fields for autoinitiated state machine 1961 */ 1962 parent = hammer2_inode_lock_ex(pmp->iroot); 1963 ipdata = &parent->data->ipdata; 1964 pmp->iocom.auto_lnk_conn.pfs_clid = ipdata->pfs_clid; 1965 pmp->iocom.auto_lnk_conn.pfs_fsid = ipdata->pfs_fsid; 1966 pmp->iocom.auto_lnk_conn.pfs_type = ipdata->pfs_type; 1967 pmp->iocom.auto_lnk_conn.proto_version = DMSG_SPAN_PROTO_1; 1968 pmp->iocom.auto_lnk_conn.peer_type = hmp->voldata.peer_type; 1969 1970 /* 1971 * Filter adjustment. Clients do not need visibility into other 1972 * clients (otherwise millions of clients would present a serious 1973 * problem). The fs_label also serves to restrict the namespace. 1974 */ 1975 pmp->iocom.auto_lnk_conn.peer_mask = 1LLU << HAMMER2_PEER_HAMMER2; 1976 pmp->iocom.auto_lnk_conn.pfs_mask = (uint64_t)-1; 1977 switch (ipdata->pfs_type) { 1978 case DMSG_PFSTYPE_CLIENT: 1979 pmp->iocom.auto_lnk_conn.peer_mask &= 1980 ~(1LLU << DMSG_PFSTYPE_CLIENT); 1981 break; 1982 default: 1983 break; 1984 } 1985 1986 name_len = ipdata->name_len; 1987 if (name_len >= sizeof(pmp->iocom.auto_lnk_conn.fs_label)) 1988 name_len = sizeof(pmp->iocom.auto_lnk_conn.fs_label) - 1; 1989 bcopy(ipdata->filename, 1990 pmp->iocom.auto_lnk_conn.fs_label, 1991 name_len); 1992 pmp->iocom.auto_lnk_conn.fs_label[name_len] = 0; 1993 1994 /* 1995 * Setup LNK_SPAN fields for autoinitiated state machine 1996 */ 1997 pmp->iocom.auto_lnk_span.pfs_clid = ipdata->pfs_clid; 1998 pmp->iocom.auto_lnk_span.pfs_fsid = ipdata->pfs_fsid; 1999 pmp->iocom.auto_lnk_span.pfs_type = ipdata->pfs_type; 2000 pmp->iocom.auto_lnk_span.peer_type = hmp->voldata.peer_type; 2001 pmp->iocom.auto_lnk_span.proto_version = DMSG_SPAN_PROTO_1; 2002 name_len = ipdata->name_len; 2003 if (name_len >= sizeof(pmp->iocom.auto_lnk_span.fs_label)) 2004 name_len = sizeof(pmp->iocom.auto_lnk_span.fs_label) - 1; 2005 bcopy(ipdata->filename, 2006 pmp->iocom.auto_lnk_span.fs_label, 2007 name_len); 2008 pmp->iocom.auto_lnk_span.fs_label[name_len] = 0; 2009 hammer2_inode_unlock_ex(pmp->iroot, parent); 2010 2011 kdmsg_iocom_autoinitiate(&pmp->iocom, hammer2_autodmsg); 2012 } 2013 2014 static int 2015 hammer2_rcvdmsg(kdmsg_msg_t *msg) 2016 { 2017 switch(msg->any.head.cmd & DMSGF_TRANSMASK) { 2018 case DMSG_DBG_SHELL: 2019 /* 2020 * (non-transaction) 2021 * Execute shell command (not supported atm) 2022 */ 2023 kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP); 2024 break; 2025 case DMSG_DBG_SHELL | DMSGF_REPLY: 2026 /* 2027 * (non-transaction) 2028 */ 2029 if (msg->aux_data) { 2030 msg->aux_data[msg->aux_size - 1] = 0; 2031 kprintf("HAMMER2 DBG: %s\n", msg->aux_data); 2032 } 2033 break; 2034 default: 2035 /* 2036 * Unsupported message received. We only need to 2037 * reply if it's a transaction in order to close our end. 2038 * Ignore any one-way messages are any further messages 2039 * associated with the transaction. 2040 * 2041 * NOTE: This case also includes DMSG_LNK_ERROR messages 2042 * which might be one-way, replying to those would 2043 * cause an infinite ping-pong. 2044 */ 2045 if (msg->any.head.cmd & DMSGF_CREATE) 2046 kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP); 2047 break; 2048 } 2049 return(0); 2050 } 2051 2052 /* 2053 * This function is called after KDMSG has automatically handled processing 2054 * of a LNK layer message (typically CONN, SPAN, or CIRC). 2055 * 2056 * We tag off the LNK_CONN to trigger our LNK_VOLCONF messages which 2057 * advertises all available hammer2 super-root volumes. 2058 */ 2059 static void 2060 hammer2_autodmsg(kdmsg_msg_t *msg) 2061 { 2062 hammer2_pfsmount_t *pmp = msg->iocom->handle; 2063 hammer2_mount_t *hmp = pmp->mount_cluster->hmp; 2064 int copyid; 2065 2066 /* 2067 * We only care about replies to our LNK_CONN auto-request. kdmsg 2068 * has already processed the reply, we use this calback as a shim 2069 * to know when we can advertise available super-root volumes. 2070 */ 2071 if ((msg->any.head.cmd & DMSGF_TRANSMASK) != 2072 (DMSG_LNK_CONN | DMSGF_CREATE | DMSGF_REPLY) || 2073 msg->state == NULL) { 2074 return; 2075 } 2076 2077 kprintf("LNK_CONN REPLY RECEIVED CMD %08x\n", msg->any.head.cmd); 2078 2079 if (msg->any.head.cmd & DMSGF_CREATE) { 2080 kprintf("HAMMER2: VOLDATA DUMP\n"); 2081 2082 /* 2083 * Dump the configuration stored in the volume header 2084 */ 2085 hammer2_voldata_lock(hmp); 2086 for (copyid = 0; copyid < HAMMER2_COPYID_COUNT; ++copyid) { 2087 if (hmp->voldata.copyinfo[copyid].copyid == 0) 2088 continue; 2089 hammer2_volconf_update(pmp, copyid); 2090 } 2091 hammer2_voldata_unlock(hmp, 0); 2092 } 2093 if ((msg->any.head.cmd & DMSGF_DELETE) && 2094 msg->state && (msg->state->txcmd & DMSGF_DELETE) == 0) { 2095 kprintf("HAMMER2: CONN WAS TERMINATED\n"); 2096 } 2097 } 2098 2099 /* 2100 * Volume configuration updates are passed onto the userland service 2101 * daemon via the open LNK_CONN transaction. 2102 */ 2103 void 2104 hammer2_volconf_update(hammer2_pfsmount_t *pmp, int index) 2105 { 2106 hammer2_mount_t *hmp = pmp->mount_cluster->hmp; 2107 kdmsg_msg_t *msg; 2108 2109 /* XXX interlock against connection state termination */ 2110 kprintf("volconf update %p\n", pmp->iocom.conn_state); 2111 if (pmp->iocom.conn_state) { 2112 kprintf("TRANSMIT VOLCONF VIA OPEN CONN TRANSACTION\n"); 2113 msg = kdmsg_msg_alloc_state(pmp->iocom.conn_state, 2114 DMSG_LNK_VOLCONF, NULL, NULL); 2115 msg->any.lnk_volconf.copy = hmp->voldata.copyinfo[index]; 2116 msg->any.lnk_volconf.mediaid = hmp->voldata.fsid; 2117 msg->any.lnk_volconf.index = index; 2118 kdmsg_msg_write(msg); 2119 } 2120 } 2121 2122 void 2123 hammer2_dump_chain(hammer2_chain_t *chain, int tab, int *countp) 2124 { 2125 hammer2_chain_layer_t *layer; 2126 hammer2_chain_t *scan; 2127 hammer2_chain_t *first_parent; 2128 2129 --*countp; 2130 if (*countp == 0) { 2131 kprintf("%*.*s...\n", tab, tab, ""); 2132 return; 2133 } 2134 if (*countp < 0) 2135 return; 2136 first_parent = chain->core ? TAILQ_FIRST(&chain->core->ownerq) : NULL; 2137 kprintf("%*.*schain %p.%d [%08x][core=%p fp=%p] (%s) np=%p dt=%s refs=%d", 2138 tab, tab, "", 2139 chain, chain->bref.type, chain->flags, 2140 chain->core, 2141 first_parent, 2142 ((chain->bref.type == HAMMER2_BREF_TYPE_INODE && 2143 chain->data) ? (char *)chain->data->ipdata.filename : "?"), 2144 (first_parent ? TAILQ_NEXT(chain, core_entry) : NULL), 2145 (chain->delete_tid == HAMMER2_MAX_TID ? "max" : "fls"), 2146 chain->refs); 2147 if (first_parent) 2148 kprintf(" [fpflags %08x fprefs %d\n", 2149 first_parent->flags, 2150 first_parent->refs); 2151 if (chain->core == NULL || TAILQ_EMPTY(&chain->core->layerq)) 2152 kprintf("\n"); 2153 else 2154 kprintf(" {\n"); 2155 TAILQ_FOREACH(layer, &chain->core->layerq, entry) { 2156 RB_FOREACH(scan, hammer2_chain_tree, &layer->rbtree) { 2157 hammer2_dump_chain(scan, tab + 4, countp); 2158 } 2159 } 2160 if (chain->core && !TAILQ_EMPTY(&chain->core->layerq)) { 2161 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE && chain->data) 2162 kprintf("%*.*s}(%s)\n", tab, tab, "", 2163 chain->data->ipdata.filename); 2164 else 2165 kprintf("%*.*s}\n", tab, tab, ""); 2166 } 2167 } 2168