1 /* 2 * Copyright (c) 2011-2014 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * by Daniel Flores (GSOC 2013 - mentored by Matthew Dillon, compression) 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 3. Neither the name of The DragonFly Project nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific, prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/kernel.h> 38 #include <sys/nlookup.h> 39 #include <sys/vnode.h> 40 #include <sys/mount.h> 41 #include <sys/fcntl.h> 42 #include <sys/buf.h> 43 #include <sys/uuid.h> 44 #include <sys/vfsops.h> 45 #include <sys/sysctl.h> 46 #include <sys/socket.h> 47 #include <sys/objcache.h> 48 49 #include <sys/proc.h> 50 #include <sys/namei.h> 51 #include <sys/mountctl.h> 52 #include <sys/dirent.h> 53 #include <sys/uio.h> 54 55 #include <sys/mutex.h> 56 #include <sys/mutex2.h> 57 58 #include "hammer2.h" 59 #include "hammer2_disk.h" 60 #include "hammer2_mount.h" 61 62 #include "hammer2.h" 63 #include "hammer2_lz4.h" 64 65 #include "zlib/hammer2_zlib.h" 66 67 #define REPORT_REFS_ERRORS 1 /* XXX remove me */ 68 69 MALLOC_DEFINE(M_OBJCACHE, "objcache", "Object Cache"); 70 71 struct hammer2_sync_info { 72 hammer2_trans_t trans; 73 int error; 74 int waitfor; 75 }; 76 77 TAILQ_HEAD(hammer2_mntlist, hammer2_mount); 78 TAILQ_HEAD(hammer2_pfslist, hammer2_pfsmount); 79 static struct hammer2_mntlist hammer2_mntlist; 80 static struct hammer2_pfslist hammer2_pfslist; 81 static struct lock hammer2_mntlk; 82 83 int hammer2_debug; 84 int hammer2_cluster_enable = 1; 85 int hammer2_hardlink_enable = 1; 86 int hammer2_flush_pipe = 100; 87 int hammer2_synchronous_flush = 1; 88 int hammer2_dio_count; 89 long hammer2_limit_dirty_chains; 90 long hammer2_iod_file_read; 91 long hammer2_iod_meta_read; 92 long hammer2_iod_indr_read; 93 long hammer2_iod_fmap_read; 94 long hammer2_iod_volu_read; 95 long hammer2_iod_file_write; 96 long hammer2_iod_meta_write; 97 long hammer2_iod_indr_write; 98 long hammer2_iod_fmap_write; 99 long hammer2_iod_volu_write; 100 long hammer2_ioa_file_read; 101 long hammer2_ioa_meta_read; 102 long hammer2_ioa_indr_read; 103 long hammer2_ioa_fmap_read; 104 long hammer2_ioa_volu_read; 105 long hammer2_ioa_fmap_write; 106 long hammer2_ioa_file_write; 107 long hammer2_ioa_meta_write; 108 long hammer2_ioa_indr_write; 109 long hammer2_ioa_volu_write; 110 111 MALLOC_DECLARE(C_BUFFER); 112 MALLOC_DEFINE(C_BUFFER, "compbuffer", "Buffer used for compression."); 113 114 MALLOC_DECLARE(D_BUFFER); 115 MALLOC_DEFINE(D_BUFFER, "decompbuffer", "Buffer used for decompression."); 116 117 SYSCTL_NODE(_vfs, OID_AUTO, hammer2, CTLFLAG_RW, 0, "HAMMER2 filesystem"); 118 119 SYSCTL_INT(_vfs_hammer2, OID_AUTO, debug, CTLFLAG_RW, 120 &hammer2_debug, 0, ""); 121 SYSCTL_INT(_vfs_hammer2, OID_AUTO, cluster_enable, CTLFLAG_RW, 122 &hammer2_cluster_enable, 0, ""); 123 SYSCTL_INT(_vfs_hammer2, OID_AUTO, hardlink_enable, CTLFLAG_RW, 124 &hammer2_hardlink_enable, 0, ""); 125 SYSCTL_INT(_vfs_hammer2, OID_AUTO, flush_pipe, CTLFLAG_RW, 126 &hammer2_flush_pipe, 0, ""); 127 SYSCTL_INT(_vfs_hammer2, OID_AUTO, synchronous_flush, CTLFLAG_RW, 128 &hammer2_synchronous_flush, 0, ""); 129 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, limit_dirty_chains, CTLFLAG_RW, 130 &hammer2_limit_dirty_chains, 0, ""); 131 SYSCTL_INT(_vfs_hammer2, OID_AUTO, dio_count, CTLFLAG_RD, 132 &hammer2_dio_count, 0, ""); 133 134 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_file_read, CTLFLAG_RW, 135 &hammer2_iod_file_read, 0, ""); 136 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_meta_read, CTLFLAG_RW, 137 &hammer2_iod_meta_read, 0, ""); 138 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_indr_read, CTLFLAG_RW, 139 &hammer2_iod_indr_read, 0, ""); 140 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_fmap_read, CTLFLAG_RW, 141 &hammer2_iod_fmap_read, 0, ""); 142 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_volu_read, CTLFLAG_RW, 143 &hammer2_iod_volu_read, 0, ""); 144 145 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_file_write, CTLFLAG_RW, 146 &hammer2_iod_file_write, 0, ""); 147 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_meta_write, CTLFLAG_RW, 148 &hammer2_iod_meta_write, 0, ""); 149 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_indr_write, CTLFLAG_RW, 150 &hammer2_iod_indr_write, 0, ""); 151 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_fmap_write, CTLFLAG_RW, 152 &hammer2_iod_fmap_write, 0, ""); 153 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_volu_write, CTLFLAG_RW, 154 &hammer2_iod_volu_write, 0, ""); 155 156 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_file_read, CTLFLAG_RW, 157 &hammer2_ioa_file_read, 0, ""); 158 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_meta_read, CTLFLAG_RW, 159 &hammer2_ioa_meta_read, 0, ""); 160 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_indr_read, CTLFLAG_RW, 161 &hammer2_ioa_indr_read, 0, ""); 162 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_fmap_read, CTLFLAG_RW, 163 &hammer2_ioa_fmap_read, 0, ""); 164 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_volu_read, CTLFLAG_RW, 165 &hammer2_ioa_volu_read, 0, ""); 166 167 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_file_write, CTLFLAG_RW, 168 &hammer2_ioa_file_write, 0, ""); 169 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_meta_write, CTLFLAG_RW, 170 &hammer2_ioa_meta_write, 0, ""); 171 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_indr_write, CTLFLAG_RW, 172 &hammer2_ioa_indr_write, 0, ""); 173 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_fmap_write, CTLFLAG_RW, 174 &hammer2_ioa_fmap_write, 0, ""); 175 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_volu_write, CTLFLAG_RW, 176 &hammer2_ioa_volu_write, 0, ""); 177 178 static int hammer2_vfs_init(struct vfsconf *conf); 179 static int hammer2_vfs_uninit(struct vfsconf *vfsp); 180 static int hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data, 181 struct ucred *cred); 182 static int hammer2_remount(hammer2_mount_t *, struct mount *, char *, 183 struct vnode *, struct ucred *); 184 static int hammer2_recovery(hammer2_mount_t *hmp); 185 static int hammer2_vfs_unmount(struct mount *mp, int mntflags); 186 static int hammer2_vfs_root(struct mount *mp, struct vnode **vpp); 187 static int hammer2_vfs_statfs(struct mount *mp, struct statfs *sbp, 188 struct ucred *cred); 189 static int hammer2_vfs_statvfs(struct mount *mp, struct statvfs *sbp, 190 struct ucred *cred); 191 static int hammer2_vfs_vget(struct mount *mp, struct vnode *dvp, 192 ino_t ino, struct vnode **vpp); 193 static int hammer2_vfs_fhtovp(struct mount *mp, struct vnode *rootvp, 194 struct fid *fhp, struct vnode **vpp); 195 static int hammer2_vfs_vptofh(struct vnode *vp, struct fid *fhp); 196 static int hammer2_vfs_checkexp(struct mount *mp, struct sockaddr *nam, 197 int *exflagsp, struct ucred **credanonp); 198 199 static int hammer2_install_volume_header(hammer2_mount_t *hmp); 200 static int hammer2_sync_scan2(struct mount *mp, struct vnode *vp, void *data); 201 202 static void hammer2_write_thread(void *arg); 203 204 static void hammer2_vfs_unmount_hmp1(struct mount *mp, hammer2_mount_t *hmp); 205 static void hammer2_vfs_unmount_hmp2(struct mount *mp, hammer2_mount_t *hmp); 206 207 /* 208 * Functions for compression in threads, 209 * from hammer2_vnops.c 210 */ 211 static void hammer2_write_file_core(struct buf *bp, hammer2_trans_t *trans, 212 hammer2_inode_t *ip, 213 hammer2_inode_data_t *ipdata, 214 hammer2_cluster_t *cparent, 215 hammer2_key_t lbase, int ioflag, int pblksize, 216 int *errorp); 217 static void hammer2_compress_and_write(struct buf *bp, hammer2_trans_t *trans, 218 hammer2_inode_t *ip, 219 const hammer2_inode_data_t *ipdata, 220 hammer2_cluster_t *cparent, 221 hammer2_key_t lbase, int ioflag, 222 int pblksize, int *errorp, int comp_algo); 223 static void hammer2_zero_check_and_write(struct buf *bp, 224 hammer2_trans_t *trans, hammer2_inode_t *ip, 225 const hammer2_inode_data_t *ipdata, 226 hammer2_cluster_t *cparent, 227 hammer2_key_t lbase, 228 int ioflag, int pblksize, int *errorp); 229 static int test_block_zeros(const char *buf, size_t bytes); 230 static void zero_write(struct buf *bp, hammer2_trans_t *trans, 231 hammer2_inode_t *ip, 232 const hammer2_inode_data_t *ipdata, 233 hammer2_cluster_t *cparent, 234 hammer2_key_t lbase, 235 int *errorp); 236 static void hammer2_write_bp(hammer2_cluster_t *cluster, struct buf *bp, 237 int ioflag, int pblksize, int *errorp); 238 239 static int hammer2_rcvdmsg(kdmsg_msg_t *msg); 240 static void hammer2_autodmsg(kdmsg_msg_t *msg); 241 static int hammer2_lnk_span_reply(kdmsg_state_t *state, kdmsg_msg_t *msg); 242 243 244 /* 245 * HAMMER2 vfs operations. 246 */ 247 static struct vfsops hammer2_vfsops = { 248 .vfs_init = hammer2_vfs_init, 249 .vfs_uninit = hammer2_vfs_uninit, 250 .vfs_sync = hammer2_vfs_sync, 251 .vfs_mount = hammer2_vfs_mount, 252 .vfs_unmount = hammer2_vfs_unmount, 253 .vfs_root = hammer2_vfs_root, 254 .vfs_statfs = hammer2_vfs_statfs, 255 .vfs_statvfs = hammer2_vfs_statvfs, 256 .vfs_vget = hammer2_vfs_vget, 257 .vfs_vptofh = hammer2_vfs_vptofh, 258 .vfs_fhtovp = hammer2_vfs_fhtovp, 259 .vfs_checkexp = hammer2_vfs_checkexp 260 }; 261 262 MALLOC_DEFINE(M_HAMMER2, "HAMMER2-mount", ""); 263 264 VFS_SET(hammer2_vfsops, hammer2, 0); 265 MODULE_VERSION(hammer2, 1); 266 267 static 268 int 269 hammer2_vfs_init(struct vfsconf *conf) 270 { 271 static struct objcache_malloc_args margs_read; 272 static struct objcache_malloc_args margs_write; 273 274 int error; 275 276 error = 0; 277 278 if (HAMMER2_BLOCKREF_BYTES != sizeof(struct hammer2_blockref)) 279 error = EINVAL; 280 if (HAMMER2_INODE_BYTES != sizeof(struct hammer2_inode_data)) 281 error = EINVAL; 282 if (HAMMER2_VOLUME_BYTES != sizeof(struct hammer2_volume_data)) 283 error = EINVAL; 284 285 if (error) 286 kprintf("HAMMER2 structure size mismatch; cannot continue.\n"); 287 288 margs_read.objsize = 65536; 289 margs_read.mtype = D_BUFFER; 290 291 margs_write.objsize = 32768; 292 margs_write.mtype = C_BUFFER; 293 294 cache_buffer_read = objcache_create(margs_read.mtype->ks_shortdesc, 295 0, 1, NULL, NULL, NULL, objcache_malloc_alloc, 296 objcache_malloc_free, &margs_read); 297 cache_buffer_write = objcache_create(margs_write.mtype->ks_shortdesc, 298 0, 1, NULL, NULL, NULL, objcache_malloc_alloc, 299 objcache_malloc_free, &margs_write); 300 301 lockinit(&hammer2_mntlk, "mntlk", 0, 0); 302 TAILQ_INIT(&hammer2_mntlist); 303 TAILQ_INIT(&hammer2_pfslist); 304 305 hammer2_limit_dirty_chains = desiredvnodes / 10; 306 307 hammer2_trans_manage_init(); 308 309 return (error); 310 } 311 312 static 313 int 314 hammer2_vfs_uninit(struct vfsconf *vfsp __unused) 315 { 316 objcache_destroy(cache_buffer_read); 317 objcache_destroy(cache_buffer_write); 318 return 0; 319 } 320 321 /* 322 * Core PFS allocator. Used to allocate the pmp structure for PFS cluster 323 * mounts and the spmp structure for media (hmp) structures. 324 */ 325 static hammer2_pfsmount_t * 326 hammer2_pfsalloc(const hammer2_inode_data_t *ipdata, hammer2_tid_t alloc_tid) 327 { 328 hammer2_pfsmount_t *pmp; 329 330 pmp = kmalloc(sizeof(*pmp), M_HAMMER2, M_WAITOK | M_ZERO); 331 kmalloc_create(&pmp->minode, "HAMMER2-inodes"); 332 kmalloc_create(&pmp->mmsg, "HAMMER2-pfsmsg"); 333 lockinit(&pmp->lock, "pfslk", 0, 0); 334 spin_init(&pmp->inum_spin); 335 RB_INIT(&pmp->inum_tree); 336 TAILQ_INIT(&pmp->unlinkq); 337 spin_init(&pmp->unlinkq_spin); 338 339 pmp->alloc_tid = alloc_tid + 1; /* our first media transaction id */ 340 pmp->flush_tid = pmp->alloc_tid; 341 if (ipdata) { 342 pmp->inode_tid = ipdata->pfs_inum + 1; 343 pmp->pfs_clid = ipdata->pfs_clid; 344 } 345 mtx_init(&pmp->wthread_mtx); 346 bioq_init(&pmp->wthread_bioq); 347 348 return pmp; 349 } 350 351 /* 352 * Mount or remount HAMMER2 fileystem from physical media 353 * 354 * mountroot 355 * mp mount point structure 356 * path NULL 357 * data <unused> 358 * cred <unused> 359 * 360 * mount 361 * mp mount point structure 362 * path path to mount point 363 * data pointer to argument structure in user space 364 * volume volume path (device@LABEL form) 365 * hflags user mount flags 366 * cred user credentials 367 * 368 * RETURNS: 0 Success 369 * !0 error number 370 */ 371 static 372 int 373 hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data, 374 struct ucred *cred) 375 { 376 struct hammer2_mount_info info; 377 hammer2_pfsmount_t *pmp; 378 hammer2_pfsmount_t *spmp; 379 hammer2_mount_t *hmp; 380 hammer2_key_t key_next; 381 hammer2_key_t key_dummy; 382 hammer2_key_t lhc; 383 struct vnode *devvp; 384 struct nlookupdata nd; 385 hammer2_chain_t *parent; 386 hammer2_chain_t *rchain; 387 hammer2_cluster_t *cluster; 388 hammer2_cluster_t *cparent; 389 const hammer2_inode_data_t *ipdata; 390 hammer2_blockref_t bref; 391 struct file *fp; 392 char devstr[MNAMELEN]; 393 size_t size; 394 size_t done; 395 char *dev; 396 char *label; 397 int ronly = 1; 398 int error; 399 int cache_index; 400 int ddflag; 401 int i; 402 403 hmp = NULL; 404 pmp = NULL; 405 dev = NULL; 406 label = NULL; 407 devvp = NULL; 408 cache_index = -1; 409 410 kprintf("hammer2_mount\n"); 411 412 if (path == NULL) { 413 /* 414 * Root mount 415 */ 416 bzero(&info, sizeof(info)); 417 info.cluster_fd = -1; 418 return (EOPNOTSUPP); 419 } else { 420 /* 421 * Non-root mount or updating a mount 422 */ 423 error = copyin(data, &info, sizeof(info)); 424 if (error) 425 return (error); 426 427 error = copyinstr(info.volume, devstr, MNAMELEN - 1, &done); 428 if (error) 429 return (error); 430 431 /* Extract device and label */ 432 dev = devstr; 433 label = strchr(devstr, '@'); 434 if (label == NULL || 435 ((label + 1) - dev) > done) { 436 return (EINVAL); 437 } 438 *label = '\0'; 439 label++; 440 if (*label == '\0') 441 return (EINVAL); 442 443 if (mp->mnt_flag & MNT_UPDATE) { 444 /* Update mount */ 445 /* HAMMER2 implements NFS export via mountctl */ 446 pmp = MPTOPMP(mp); 447 for (i = 0; i < pmp->iroot->cluster.nchains; ++i) { 448 hmp = pmp->iroot->cluster.array[i]->hmp; 449 devvp = hmp->devvp; 450 error = hammer2_remount(hmp, mp, path, 451 devvp, cred); 452 if (error) 453 break; 454 } 455 /*hammer2_inode_install_hidden(pmp);*/ 456 457 return error; 458 } 459 } 460 461 /* 462 * HMP device mount 463 * 464 * Lookup name and verify it refers to a block device. 465 */ 466 error = nlookup_init(&nd, dev, UIO_SYSSPACE, NLC_FOLLOW); 467 if (error == 0) 468 error = nlookup(&nd); 469 if (error == 0) 470 error = cache_vref(&nd.nl_nch, nd.nl_cred, &devvp); 471 nlookup_done(&nd); 472 473 if (error == 0) { 474 if (vn_isdisk(devvp, &error)) 475 error = vfs_mountedon(devvp); 476 } 477 478 /* 479 * Determine if the device has already been mounted. After this 480 * check hmp will be non-NULL if we are doing the second or more 481 * hammer2 mounts from the same device. 482 */ 483 lockmgr(&hammer2_mntlk, LK_EXCLUSIVE); 484 TAILQ_FOREACH(hmp, &hammer2_mntlist, mntentry) { 485 if (hmp->devvp == devvp) 486 break; 487 } 488 489 /* 490 * Open the device if this isn't a secondary mount and construct 491 * the H2 device mount (hmp). 492 */ 493 if (hmp == NULL) { 494 hammer2_chain_t *schain; 495 hammer2_xid_t xid; 496 497 if (error == 0 && vcount(devvp) > 0) 498 error = EBUSY; 499 500 /* 501 * Now open the device 502 */ 503 if (error == 0) { 504 ronly = ((mp->mnt_flag & MNT_RDONLY) != 0); 505 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 506 error = vinvalbuf(devvp, V_SAVE, 0, 0); 507 if (error == 0) { 508 error = VOP_OPEN(devvp, 509 ronly ? FREAD : FREAD | FWRITE, 510 FSCRED, NULL); 511 } 512 vn_unlock(devvp); 513 } 514 if (error && devvp) { 515 vrele(devvp); 516 devvp = NULL; 517 } 518 if (error) { 519 lockmgr(&hammer2_mntlk, LK_RELEASE); 520 return error; 521 } 522 hmp = kmalloc(sizeof(*hmp), M_HAMMER2, M_WAITOK | M_ZERO); 523 hmp->ronly = ronly; 524 hmp->devvp = devvp; 525 kmalloc_create(&hmp->mchain, "HAMMER2-chains"); 526 TAILQ_INSERT_TAIL(&hammer2_mntlist, hmp, mntentry); 527 RB_INIT(&hmp->iotree); 528 529 lockinit(&hmp->vollk, "h2vol", 0, 0); 530 531 /* 532 * vchain setup. vchain.data is embedded. 533 * vchain.refs is initialized and will never drop to 0. 534 * 535 * NOTE! voldata is not yet loaded. 536 */ 537 hmp->vchain.hmp = hmp; 538 hmp->vchain.refs = 1; 539 hmp->vchain.data = (void *)&hmp->voldata; 540 hmp->vchain.bref.type = HAMMER2_BREF_TYPE_VOLUME; 541 hmp->vchain.bref.data_off = 0 | HAMMER2_PBUFRADIX; 542 hmp->vchain.bref.mirror_tid = hmp->voldata.mirror_tid; 543 hmp->vchain.delete_xid = HAMMER2_XID_MAX; 544 545 hammer2_chain_core_alloc(NULL, &hmp->vchain, NULL); 546 /* hmp->vchain.u.xxx is left NULL */ 547 548 /* 549 * fchain setup. fchain.data is embedded. 550 * fchain.refs is initialized and will never drop to 0. 551 * 552 * The data is not used but needs to be initialized to 553 * pass assertion muster. We use this chain primarily 554 * as a placeholder for the freemap's top-level RBTREE 555 * so it does not interfere with the volume's topology 556 * RBTREE. 557 */ 558 hmp->fchain.hmp = hmp; 559 hmp->fchain.refs = 1; 560 hmp->fchain.data = (void *)&hmp->voldata.freemap_blockset; 561 hmp->fchain.bref.type = HAMMER2_BREF_TYPE_FREEMAP; 562 hmp->fchain.bref.data_off = 0 | HAMMER2_PBUFRADIX; 563 hmp->fchain.bref.mirror_tid = hmp->voldata.freemap_tid; 564 hmp->fchain.bref.methods = 565 HAMMER2_ENC_CHECK(HAMMER2_CHECK_FREEMAP) | 566 HAMMER2_ENC_COMP(HAMMER2_COMP_NONE); 567 hmp->fchain.delete_xid = HAMMER2_XID_MAX; 568 569 hammer2_chain_core_alloc(NULL, &hmp->fchain, NULL); 570 /* hmp->fchain.u.xxx is left NULL */ 571 572 /* 573 * Install the volume header and initialize fields from 574 * voldata. 575 */ 576 error = hammer2_install_volume_header(hmp); 577 if (error) { 578 ++hmp->pmp_count; 579 hammer2_vfs_unmount_hmp1(mp, hmp); 580 hammer2_vfs_unmount_hmp2(mp, hmp); 581 lockmgr(&hammer2_mntlk, LK_RELEASE); 582 hammer2_vfs_unmount(mp, MNT_FORCE); 583 return error; 584 } 585 586 /* 587 * Really important to get these right or flush will get 588 * confused. 589 */ 590 hmp->spmp = hammer2_pfsalloc(NULL, hmp->voldata.mirror_tid); 591 kprintf("alloc spmp %p tid %016jx\n", 592 hmp->spmp, hmp->voldata.mirror_tid); 593 spmp = hmp->spmp; 594 spmp->inode_tid = 1; 595 596 xid = 0; 597 hmp->vchain.bref.mirror_tid = hmp->voldata.mirror_tid; 598 hmp->vchain.bref.modify_tid = hmp->vchain.bref.mirror_tid; 599 hmp->vchain.modify_xid = xid; 600 hmp->vchain.update_xlo = xid; 601 hmp->vchain.update_xhi = xid; 602 hmp->vchain.pmp = spmp; 603 hmp->fchain.bref.mirror_tid = hmp->voldata.freemap_tid; 604 hmp->fchain.bref.modify_tid = hmp->fchain.bref.mirror_tid; 605 hmp->fchain.modify_xid = xid; 606 hmp->fchain.update_xlo = xid; 607 hmp->fchain.update_xhi = xid; 608 hmp->fchain.pmp = spmp; 609 610 /* 611 * First locate the super-root inode, which is key 0 612 * relative to the volume header's blockset. 613 * 614 * Then locate the root inode by scanning the directory keyspace 615 * represented by the label. 616 */ 617 parent = hammer2_chain_lookup_init(&hmp->vchain, 0); 618 schain = hammer2_chain_lookup(&parent, &key_dummy, 619 HAMMER2_SROOT_KEY, HAMMER2_SROOT_KEY, 620 &cache_index, 0, &ddflag); 621 hammer2_chain_lookup_done(parent); 622 if (schain == NULL) { 623 kprintf("hammer2_mount: invalid super-root\n"); 624 ++hmp->pmp_count; 625 hammer2_vfs_unmount_hmp1(mp, hmp); 626 hammer2_vfs_unmount_hmp2(mp, hmp); 627 lockmgr(&hammer2_mntlk, LK_RELEASE); 628 hammer2_vfs_unmount(mp, MNT_FORCE); 629 return EINVAL; 630 } 631 632 /* 633 * Sanity-check schain's pmp, finish initializing spmp. 634 */ 635 KKASSERT(schain->pmp == spmp); 636 spmp->pfs_clid = schain->data->ipdata.pfs_clid; 637 638 /* 639 * NOTE: The CHAIN_PFSROOT is not set on the super-root inode. 640 * NOTE: inode_get sucks up schain's lock. 641 */ 642 cluster = hammer2_cluster_from_chain(schain); 643 spmp->iroot = hammer2_inode_get(spmp, NULL, cluster); 644 spmp->spmp_hmp = hmp; 645 hammer2_inode_ref(spmp->iroot); 646 hammer2_inode_unlock_ex(spmp->iroot, cluster); 647 schain = NULL; 648 /* leave spmp->iroot with one ref */ 649 650 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 651 error = hammer2_recovery(hmp); 652 /* XXX do something with error */ 653 } 654 ++hmp->pmp_count; 655 656 /* 657 * XXX RDONLY stuff is totally broken FIXME XXX 658 * 659 * Automatic LNK_CONN 660 * Automatic handling of received LNK_SPAN 661 * Automatic handling of received LNK_CIRC 662 * No automatic LNK_SPAN generation - we do this ourselves 663 * No automatic LNK_CIRC generation - we do this ourselves 664 */ 665 kdmsg_iocom_init(&hmp->iocom, hmp, 666 KDMSG_IOCOMF_AUTOCONN | 667 KDMSG_IOCOMF_AUTORXSPAN | 668 KDMSG_IOCOMF_AUTORXCIRC, 669 hmp->mchain, hammer2_rcvdmsg); 670 671 /* 672 * Ref the cluster management messaging descriptor. The mount 673 * program deals with the other end of the communications pipe. 674 */ 675 fp = holdfp(curproc->p_fd, info.cluster_fd, -1); 676 if (fp) { 677 hammer2_cluster_reconnect(hmp, fp); 678 } else { 679 kprintf("hammer2_mount: bad cluster_fd!\n"); 680 } 681 } else { 682 spmp = hmp->spmp; 683 ++hmp->pmp_count; 684 } 685 686 /* 687 * Lookup mount point under the media-localized super-root. 688 * 689 * cluster->pmp will incorrectly point to spmp and must be fixed 690 * up later on. 691 */ 692 cparent = hammer2_inode_lock_ex(spmp->iroot); 693 lhc = hammer2_dirhash(label, strlen(label)); 694 cluster = hammer2_cluster_lookup(cparent, &key_next, 695 lhc, lhc + HAMMER2_DIRHASH_LOMASK, 696 0, &ddflag); 697 while (cluster) { 698 if (hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE && 699 strcmp(label, 700 hammer2_cluster_data(cluster)->ipdata.filename) == 0) { 701 break; 702 } 703 cluster = hammer2_cluster_next(cparent, cluster, &key_next, 704 key_next, 705 lhc + HAMMER2_DIRHASH_LOMASK, 0); 706 } 707 hammer2_inode_unlock_ex(spmp->iroot, cparent); 708 709 if (cluster == NULL) { 710 kprintf("hammer2_mount: PFS label not found\n"); 711 hammer2_vfs_unmount_hmp1(mp, hmp); 712 hammer2_vfs_unmount_hmp2(mp, hmp); 713 lockmgr(&hammer2_mntlk, LK_RELEASE); 714 hammer2_vfs_unmount(mp, MNT_FORCE); 715 return EINVAL; 716 } 717 718 for (i = 0; i < cluster->nchains; ++i) { 719 rchain = cluster->array[i]; 720 KKASSERT(rchain->pmp == NULL); 721 if (rchain->flags & HAMMER2_CHAIN_MOUNTED) { 722 kprintf("hammer2_mount: PFS label already mounted!\n"); 723 hammer2_cluster_unlock(cluster); 724 hammer2_vfs_unmount_hmp1(mp, hmp); 725 hammer2_vfs_unmount_hmp2(mp, hmp); 726 lockmgr(&hammer2_mntlk, LK_RELEASE); 727 hammer2_vfs_unmount(mp, MNT_FORCE); 728 return EBUSY; 729 } 730 #if 0 731 if (rchain->flags & HAMMER2_CHAIN_RECYCLE) { 732 kprintf("hammer2_mount: PFS label is recycling\n"); 733 hammer2_cluster_unlock(cluster); 734 hammer2_vfs_unmount_hmp1(mp, hmp); 735 hammer2_vfs_unmount_hmp2(mp, hmp); 736 lockmgr(&hammer2_mntlk, LK_RELEASE); 737 hammer2_vfs_unmount(mp, MNT_FORCE); 738 return EBUSY; 739 } 740 #endif 741 } 742 743 /* 744 * Check to see if the cluster id is already mounted at the mount 745 * point. If it is, add us to the cluster. 746 */ 747 ipdata = &hammer2_cluster_data(cluster)->ipdata; 748 hammer2_cluster_bref(cluster, &bref); 749 TAILQ_FOREACH(pmp, &hammer2_pfslist, mntentry) { 750 if (pmp->spmp_hmp == NULL && 751 bcmp(&pmp->pfs_clid, &ipdata->pfs_clid, 752 sizeof(pmp->pfs_clid)) == 0) { 753 break; 754 } 755 } 756 757 if (pmp) { 758 int i; 759 int j; 760 761 hammer2_inode_ref(pmp->iroot); 762 ccms_thread_lock(&pmp->iroot->topo_cst, CCMS_STATE_EXCLUSIVE); 763 764 if (pmp->iroot->cluster.nchains + cluster->nchains > 765 HAMMER2_MAXCLUSTER) { 766 kprintf("hammer2_mount: cluster full!\n"); 767 768 ccms_thread_unlock(&pmp->iroot->topo_cst); 769 hammer2_inode_drop(pmp->iroot); 770 771 hammer2_cluster_unlock(cluster); 772 hammer2_vfs_unmount_hmp1(mp, hmp); 773 hammer2_vfs_unmount_hmp2(mp, hmp); 774 lockmgr(&hammer2_mntlk, LK_RELEASE); 775 hammer2_vfs_unmount(mp, MNT_FORCE); 776 return EBUSY; 777 } 778 kprintf("hammer2_vfs_mount: Adding pfs to existing cluster\n"); 779 j = pmp->iroot->cluster.nchains; 780 for (i = 0; i < cluster->nchains; ++i) { 781 rchain = cluster->array[i]; 782 KKASSERT(rchain->pmp == NULL); 783 rchain->pmp = pmp; 784 hammer2_chain_ref(cluster->array[i]); 785 pmp->iroot->cluster.array[j] = cluster->array[i]; 786 ++j; 787 } 788 pmp->iroot->cluster.nchains = j; 789 ccms_thread_unlock(&pmp->iroot->topo_cst); 790 hammer2_inode_drop(pmp->iroot); 791 hammer2_cluster_unlock(cluster); 792 lockmgr(&hammer2_mntlk, LK_RELEASE); 793 794 kprintf("ok\n"); 795 hammer2_inode_install_hidden(pmp); 796 797 return ERANGE; 798 } 799 800 /* 801 * Block device opened successfully, finish initializing the 802 * mount structure. 803 * 804 * From this point on we have to call hammer2_unmount() on failure. 805 */ 806 pmp = hammer2_pfsalloc(ipdata, bref.mirror_tid); 807 kprintf("PMP mirror_tid is %016jx\n", bref.mirror_tid); 808 for (i = 0; i < cluster->nchains; ++i) { 809 rchain = cluster->array[i]; 810 KKASSERT(rchain->pmp == NULL); 811 rchain->pmp = pmp; 812 atomic_set_int(&rchain->flags, HAMMER2_CHAIN_MOUNTED); 813 } 814 cluster->pmp = pmp; 815 816 ccms_domain_init(&pmp->ccms_dom); 817 TAILQ_INSERT_TAIL(&hammer2_pfslist, pmp, mntentry); 818 lockmgr(&hammer2_mntlk, LK_RELEASE); 819 820 kprintf("hammer2_mount hmp=%p pmp=%p pmpcnt=%d\n", 821 hmp, pmp, hmp->pmp_count); 822 823 mp->mnt_flag = MNT_LOCAL; 824 mp->mnt_kern_flag |= MNTK_ALL_MPSAFE; /* all entry pts are SMP */ 825 mp->mnt_kern_flag |= MNTK_THR_SYNC; /* new vsyncscan semantics */ 826 827 /* 828 * required mount structure initializations 829 */ 830 mp->mnt_stat.f_iosize = HAMMER2_PBUFSIZE; 831 mp->mnt_stat.f_bsize = HAMMER2_PBUFSIZE; 832 833 mp->mnt_vstat.f_frsize = HAMMER2_PBUFSIZE; 834 mp->mnt_vstat.f_bsize = HAMMER2_PBUFSIZE; 835 836 /* 837 * Optional fields 838 */ 839 mp->mnt_iosize_max = MAXPHYS; 840 mp->mnt_data = (qaddr_t)pmp; 841 pmp->mp = mp; 842 843 /* 844 * After this point hammer2_vfs_unmount() has visibility on hmp 845 * and manual hmp1/hmp2 calls are not needed on fatal errors. 846 */ 847 pmp->iroot = hammer2_inode_get(pmp, NULL, cluster); 848 hammer2_inode_ref(pmp->iroot); /* ref for pmp->iroot */ 849 hammer2_inode_unlock_ex(pmp->iroot, cluster); 850 851 /* 852 * The logical file buffer bio write thread handles things 853 * like physical block assignment and compression. 854 * 855 * (only applicable to pfs mounts, not applicable to spmp) 856 */ 857 pmp->wthread_destroy = 0; 858 lwkt_create(hammer2_write_thread, pmp, 859 &pmp->wthread_td, NULL, 0, -1, "hwrite-%s", label); 860 861 /* 862 * With the cluster operational install ihidden. 863 * (only applicable to pfs mounts, not applicable to spmp) 864 */ 865 hammer2_inode_install_hidden(pmp); 866 867 /* 868 * Finish setup 869 */ 870 vfs_getnewfsid(mp); 871 vfs_add_vnodeops(mp, &hammer2_vnode_vops, &mp->mnt_vn_norm_ops); 872 vfs_add_vnodeops(mp, &hammer2_spec_vops, &mp->mnt_vn_spec_ops); 873 vfs_add_vnodeops(mp, &hammer2_fifo_vops, &mp->mnt_vn_fifo_ops); 874 875 copyinstr(info.volume, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); 876 bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); 877 bzero(mp->mnt_stat.f_mntonname, sizeof(mp->mnt_stat.f_mntonname)); 878 copyinstr(path, mp->mnt_stat.f_mntonname, 879 sizeof(mp->mnt_stat.f_mntonname) - 1, 880 &size); 881 882 /* 883 * Initial statfs to prime mnt_stat. 884 */ 885 hammer2_vfs_statfs(mp, &mp->mnt_stat, cred); 886 887 return 0; 888 } 889 890 /* 891 * Handle bioq for strategy write 892 */ 893 static 894 void 895 hammer2_write_thread(void *arg) 896 { 897 hammer2_pfsmount_t *pmp; 898 struct bio *bio; 899 struct buf *bp; 900 hammer2_trans_t trans; 901 struct vnode *vp; 902 hammer2_inode_t *ip; 903 hammer2_cluster_t *cparent; 904 hammer2_inode_data_t *wipdata; 905 hammer2_key_t lbase; 906 int lblksize; 907 int pblksize; 908 int error; 909 910 pmp = arg; 911 912 mtx_lock(&pmp->wthread_mtx); 913 while (pmp->wthread_destroy == 0) { 914 if (bioq_first(&pmp->wthread_bioq) == NULL) { 915 mtxsleep(&pmp->wthread_bioq, &pmp->wthread_mtx, 916 0, "h2bioqw", 0); 917 } 918 cparent = NULL; 919 920 hammer2_trans_init(&trans, pmp, HAMMER2_TRANS_BUFCACHE); 921 922 while ((bio = bioq_takefirst(&pmp->wthread_bioq)) != NULL) { 923 /* 924 * dummy bio for synchronization. The transaction 925 * must be reinitialized. 926 */ 927 if (bio->bio_buf == NULL) { 928 bio->bio_flags |= BIO_DONE; 929 wakeup(bio); 930 hammer2_trans_done(&trans); 931 hammer2_trans_init(&trans, pmp, 932 HAMMER2_TRANS_BUFCACHE); 933 continue; 934 } 935 936 /* 937 * else normal bio processing 938 */ 939 mtx_unlock(&pmp->wthread_mtx); 940 941 hammer2_lwinprog_drop(pmp); 942 943 error = 0; 944 bp = bio->bio_buf; 945 vp = bp->b_vp; 946 ip = VTOI(vp); 947 948 /* 949 * Inode is modified, flush size and mtime changes 950 * to ensure that the file size remains consistent 951 * with the buffers being flushed. 952 * 953 * NOTE: The inode_fsync() call only flushes the 954 * inode's meta-data state, it doesn't try 955 * to flush underlying buffers or chains. 956 */ 957 cparent = hammer2_inode_lock_ex(ip); 958 if (ip->flags & (HAMMER2_INODE_RESIZED | 959 HAMMER2_INODE_MTIME)) { 960 hammer2_inode_fsync(&trans, ip, cparent); 961 } 962 wipdata = hammer2_cluster_modify_ip(&trans, ip, 963 cparent, 0); 964 lblksize = hammer2_calc_logical(ip, bio->bio_offset, 965 &lbase, NULL); 966 pblksize = hammer2_calc_physical(ip, wipdata, lbase); 967 hammer2_write_file_core(bp, &trans, ip, wipdata, 968 cparent, 969 lbase, IO_ASYNC, 970 pblksize, &error); 971 hammer2_cluster_modsync(cparent); 972 hammer2_inode_unlock_ex(ip, cparent); 973 if (error) { 974 kprintf("hammer2: error in buffer write\n"); 975 bp->b_flags |= B_ERROR; 976 bp->b_error = EIO; 977 } 978 biodone(bio); 979 mtx_lock(&pmp->wthread_mtx); 980 } 981 hammer2_trans_done(&trans); 982 } 983 pmp->wthread_destroy = -1; 984 wakeup(&pmp->wthread_destroy); 985 986 mtx_unlock(&pmp->wthread_mtx); 987 } 988 989 void 990 hammer2_bioq_sync(hammer2_pfsmount_t *pmp) 991 { 992 struct bio sync_bio; 993 994 bzero(&sync_bio, sizeof(sync_bio)); /* dummy with no bio_buf */ 995 mtx_lock(&pmp->wthread_mtx); 996 if (pmp->wthread_destroy == 0 && 997 TAILQ_FIRST(&pmp->wthread_bioq.queue)) { 998 bioq_insert_tail(&pmp->wthread_bioq, &sync_bio); 999 while ((sync_bio.bio_flags & BIO_DONE) == 0) 1000 mtxsleep(&sync_bio, &pmp->wthread_mtx, 0, "h2bioq", 0); 1001 } 1002 mtx_unlock(&pmp->wthread_mtx); 1003 } 1004 1005 /* 1006 * Return a chain suitable for I/O, creating the chain if necessary 1007 * and assigning its physical block. 1008 */ 1009 static 1010 hammer2_cluster_t * 1011 hammer2_assign_physical(hammer2_trans_t *trans, 1012 hammer2_inode_t *ip, hammer2_cluster_t *cparent, 1013 hammer2_key_t lbase, int pblksize, int *errorp) 1014 { 1015 hammer2_cluster_t *cluster; 1016 hammer2_cluster_t *dparent; 1017 hammer2_key_t key_dummy; 1018 int pradix = hammer2_getradix(pblksize); 1019 int ddflag; 1020 1021 /* 1022 * Locate the chain associated with lbase, return a locked chain. 1023 * However, do not instantiate any data reference (which utilizes a 1024 * device buffer) because we will be using direct IO via the 1025 * logical buffer cache buffer. 1026 */ 1027 *errorp = 0; 1028 KKASSERT(pblksize >= HAMMER2_ALLOC_MIN); 1029 retry: 1030 dparent = hammer2_cluster_lookup_init(cparent, 0); 1031 cluster = hammer2_cluster_lookup(dparent, &key_dummy, 1032 lbase, lbase, 1033 HAMMER2_LOOKUP_NODATA, &ddflag); 1034 1035 if (cluster == NULL) { 1036 /* 1037 * We found a hole, create a new chain entry. 1038 * 1039 * NOTE: DATA chains are created without device backing 1040 * store (nor do we want any). 1041 */ 1042 *errorp = hammer2_cluster_create(trans, dparent, &cluster, 1043 lbase, HAMMER2_PBUFRADIX, 1044 HAMMER2_BREF_TYPE_DATA, 1045 pblksize); 1046 if (cluster == NULL) { 1047 hammer2_cluster_lookup_done(dparent); 1048 panic("hammer2_cluster_create: par=%p error=%d\n", 1049 dparent->focus, *errorp); 1050 goto retry; 1051 } 1052 /*ip->delta_dcount += pblksize;*/ 1053 } else { 1054 switch (hammer2_cluster_type(cluster)) { 1055 case HAMMER2_BREF_TYPE_INODE: 1056 /* 1057 * The data is embedded in the inode. The 1058 * caller is responsible for marking the inode 1059 * modified and copying the data to the embedded 1060 * area. 1061 */ 1062 break; 1063 case HAMMER2_BREF_TYPE_DATA: 1064 if (hammer2_cluster_bytes(cluster) != pblksize) { 1065 hammer2_cluster_resize(trans, ip, 1066 dparent, cluster, 1067 pradix, 1068 HAMMER2_MODIFY_OPTDATA); 1069 } 1070 hammer2_cluster_modify(trans, cluster, 1071 HAMMER2_MODIFY_OPTDATA); 1072 break; 1073 default: 1074 panic("hammer2_assign_physical: bad type"); 1075 /* NOT REACHED */ 1076 break; 1077 } 1078 } 1079 1080 /* 1081 * Cleanup. If cluster wound up being the inode itself, i.e. 1082 * the DIRECTDATA case for offset 0, then we need to update cparent. 1083 * The caller expects cparent to not become stale. 1084 */ 1085 hammer2_cluster_lookup_done(dparent); 1086 /* dparent = NULL; safety */ 1087 if (cluster && ddflag) 1088 hammer2_cluster_replace_locked(cparent, cluster); 1089 return (cluster); 1090 } 1091 1092 /* 1093 * From hammer2_vnops.c. 1094 * The core write function which determines which path to take 1095 * depending on compression settings. 1096 */ 1097 static 1098 void 1099 hammer2_write_file_core(struct buf *bp, hammer2_trans_t *trans, 1100 hammer2_inode_t *ip, hammer2_inode_data_t *ipdata, 1101 hammer2_cluster_t *cparent, 1102 hammer2_key_t lbase, int ioflag, int pblksize, 1103 int *errorp) 1104 { 1105 hammer2_cluster_t *cluster; 1106 1107 switch(HAMMER2_DEC_COMP(ipdata->comp_algo)) { 1108 case HAMMER2_COMP_NONE: 1109 /* 1110 * We have to assign physical storage to the buffer 1111 * we intend to dirty or write now to avoid deadlocks 1112 * in the strategy code later. 1113 * 1114 * This can return NOOFFSET for inode-embedded data. 1115 * The strategy code will take care of it in that case. 1116 */ 1117 cluster = hammer2_assign_physical(trans, ip, cparent, 1118 lbase, pblksize, 1119 errorp); 1120 hammer2_write_bp(cluster, bp, ioflag, pblksize, errorp); 1121 if (cluster) 1122 hammer2_cluster_unlock(cluster); 1123 break; 1124 case HAMMER2_COMP_AUTOZERO: 1125 /* 1126 * Check for zero-fill only 1127 */ 1128 hammer2_zero_check_and_write(bp, trans, ip, 1129 ipdata, cparent, lbase, 1130 ioflag, pblksize, errorp); 1131 break; 1132 case HAMMER2_COMP_LZ4: 1133 case HAMMER2_COMP_ZLIB: 1134 default: 1135 /* 1136 * Check for zero-fill and attempt compression. 1137 */ 1138 hammer2_compress_and_write(bp, trans, ip, 1139 ipdata, cparent, 1140 lbase, ioflag, 1141 pblksize, errorp, 1142 ipdata->comp_algo); 1143 break; 1144 } 1145 } 1146 1147 /* 1148 * Generic function that will perform the compression in compression 1149 * write path. The compression algorithm is determined by the settings 1150 * obtained from inode. 1151 */ 1152 static 1153 void 1154 hammer2_compress_and_write(struct buf *bp, hammer2_trans_t *trans, 1155 hammer2_inode_t *ip, const hammer2_inode_data_t *ipdata, 1156 hammer2_cluster_t *cparent, 1157 hammer2_key_t lbase, int ioflag, int pblksize, 1158 int *errorp, int comp_algo) 1159 { 1160 hammer2_cluster_t *cluster; 1161 hammer2_chain_t *chain; 1162 int comp_size; 1163 int comp_block_size; 1164 int i; 1165 char *comp_buffer; 1166 1167 if (test_block_zeros(bp->b_data, pblksize)) { 1168 zero_write(bp, trans, ip, ipdata, cparent, lbase, errorp); 1169 return; 1170 } 1171 1172 comp_size = 0; 1173 comp_buffer = NULL; 1174 1175 KKASSERT(pblksize / 2 <= 32768); 1176 1177 if (ip->comp_heuristic < 8 || (ip->comp_heuristic & 7) == 0) { 1178 z_stream strm_compress; 1179 int comp_level; 1180 int ret; 1181 1182 switch(HAMMER2_DEC_COMP(comp_algo)) { 1183 case HAMMER2_COMP_LZ4: 1184 comp_buffer = objcache_get(cache_buffer_write, 1185 M_INTWAIT); 1186 comp_size = LZ4_compress_limitedOutput( 1187 bp->b_data, 1188 &comp_buffer[sizeof(int)], 1189 pblksize, 1190 pblksize / 2 - sizeof(int)); 1191 /* 1192 * We need to prefix with the size, LZ4 1193 * doesn't do it for us. Add the related 1194 * overhead. 1195 */ 1196 *(int *)comp_buffer = comp_size; 1197 if (comp_size) 1198 comp_size += sizeof(int); 1199 break; 1200 case HAMMER2_COMP_ZLIB: 1201 comp_level = HAMMER2_DEC_LEVEL(comp_algo); 1202 if (comp_level == 0) 1203 comp_level = 6; /* default zlib compression */ 1204 else if (comp_level < 6) 1205 comp_level = 6; 1206 else if (comp_level > 9) 1207 comp_level = 9; 1208 ret = deflateInit(&strm_compress, comp_level); 1209 if (ret != Z_OK) { 1210 kprintf("HAMMER2 ZLIB: fatal error " 1211 "on deflateInit.\n"); 1212 } 1213 1214 comp_buffer = objcache_get(cache_buffer_write, 1215 M_INTWAIT); 1216 strm_compress.next_in = bp->b_data; 1217 strm_compress.avail_in = pblksize; 1218 strm_compress.next_out = comp_buffer; 1219 strm_compress.avail_out = pblksize / 2; 1220 ret = deflate(&strm_compress, Z_FINISH); 1221 if (ret == Z_STREAM_END) { 1222 comp_size = pblksize / 2 - 1223 strm_compress.avail_out; 1224 } else { 1225 comp_size = 0; 1226 } 1227 ret = deflateEnd(&strm_compress); 1228 break; 1229 default: 1230 kprintf("Error: Unknown compression method.\n"); 1231 kprintf("Comp_method = %d.\n", comp_algo); 1232 break; 1233 } 1234 } 1235 1236 if (comp_size == 0) { 1237 /* 1238 * compression failed or turned off 1239 */ 1240 comp_block_size = pblksize; /* safety */ 1241 if (++ip->comp_heuristic > 128) 1242 ip->comp_heuristic = 8; 1243 } else { 1244 /* 1245 * compression succeeded 1246 */ 1247 ip->comp_heuristic = 0; 1248 if (comp_size <= 1024) { 1249 comp_block_size = 1024; 1250 } else if (comp_size <= 2048) { 1251 comp_block_size = 2048; 1252 } else if (comp_size <= 4096) { 1253 comp_block_size = 4096; 1254 } else if (comp_size <= 8192) { 1255 comp_block_size = 8192; 1256 } else if (comp_size <= 16384) { 1257 comp_block_size = 16384; 1258 } else if (comp_size <= 32768) { 1259 comp_block_size = 32768; 1260 } else { 1261 panic("hammer2: WRITE PATH: " 1262 "Weird comp_size value."); 1263 /* NOT REACHED */ 1264 comp_block_size = pblksize; 1265 } 1266 } 1267 1268 cluster = hammer2_assign_physical(trans, ip, cparent, 1269 lbase, comp_block_size, 1270 errorp); 1271 ipdata = &hammer2_cluster_data(cparent)->ipdata; 1272 1273 if (*errorp) { 1274 kprintf("WRITE PATH: An error occurred while " 1275 "assigning physical space.\n"); 1276 KKASSERT(cluster == NULL); 1277 goto done; 1278 } 1279 1280 for (i = 0; i < cluster->nchains; ++i) { 1281 hammer2_io_t *dio; 1282 char *bdata; 1283 int temp_check; 1284 1285 chain = cluster->array[i]; 1286 KKASSERT(chain->flags & HAMMER2_CHAIN_MODIFIED); 1287 1288 switch(chain->bref.type) { 1289 case HAMMER2_BREF_TYPE_INODE: 1290 KKASSERT(chain->data->ipdata.op_flags & 1291 HAMMER2_OPFLAG_DIRECTDATA); 1292 KKASSERT(bp->b_loffset == 0); 1293 bcopy(bp->b_data, chain->data->ipdata.u.data, 1294 HAMMER2_EMBEDDED_BYTES); 1295 break; 1296 case HAMMER2_BREF_TYPE_DATA: 1297 temp_check = HAMMER2_DEC_CHECK(chain->bref.methods); 1298 1299 /* 1300 * Optimize out the read-before-write 1301 * if possible. 1302 */ 1303 *errorp = hammer2_io_newnz(chain->hmp, 1304 chain->bref.data_off, 1305 chain->bytes, 1306 &dio); 1307 if (*errorp) { 1308 hammer2_io_brelse(&dio); 1309 kprintf("hammer2: WRITE PATH: " 1310 "dbp bread error\n"); 1311 break; 1312 } 1313 bdata = hammer2_io_data(dio, chain->bref.data_off); 1314 1315 /* 1316 * When loading the block make sure we don't 1317 * leave garbage after the compressed data. 1318 */ 1319 if (comp_size) { 1320 chain->bref.methods = 1321 HAMMER2_ENC_COMP(comp_algo) + 1322 HAMMER2_ENC_CHECK(temp_check); 1323 bcopy(comp_buffer, bdata, comp_size); 1324 if (comp_size != comp_block_size) { 1325 bzero(bdata + comp_size, 1326 comp_block_size - comp_size); 1327 } 1328 } else { 1329 chain->bref.methods = 1330 HAMMER2_ENC_COMP( 1331 HAMMER2_COMP_NONE) + 1332 HAMMER2_ENC_CHECK(temp_check); 1333 bcopy(bp->b_data, bdata, pblksize); 1334 } 1335 1336 /* 1337 * Device buffer is now valid, chain is no 1338 * longer in the initial state. 1339 */ 1340 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 1341 1342 /* Now write the related bdp. */ 1343 if (ioflag & IO_SYNC) { 1344 /* 1345 * Synchronous I/O requested. 1346 */ 1347 hammer2_io_bwrite(&dio); 1348 /* 1349 } else if ((ioflag & IO_DIRECT) && 1350 loff + n == pblksize) { 1351 hammer2_io_bdwrite(&dio); 1352 */ 1353 } else if (ioflag & IO_ASYNC) { 1354 hammer2_io_bawrite(&dio); 1355 } else { 1356 hammer2_io_bdwrite(&dio); 1357 } 1358 break; 1359 default: 1360 panic("hammer2_write_bp: bad chain type %d\n", 1361 chain->bref.type); 1362 /* NOT REACHED */ 1363 break; 1364 } 1365 } 1366 done: 1367 if (cluster) 1368 hammer2_cluster_unlock(cluster); 1369 if (comp_buffer) 1370 objcache_put(cache_buffer_write, comp_buffer); 1371 } 1372 1373 /* 1374 * Function that performs zero-checking and writing without compression, 1375 * it corresponds to default zero-checking path. 1376 */ 1377 static 1378 void 1379 hammer2_zero_check_and_write(struct buf *bp, hammer2_trans_t *trans, 1380 hammer2_inode_t *ip, const hammer2_inode_data_t *ipdata, 1381 hammer2_cluster_t *cparent, 1382 hammer2_key_t lbase, int ioflag, int pblksize, int *errorp) 1383 { 1384 hammer2_cluster_t *cluster; 1385 1386 if (test_block_zeros(bp->b_data, pblksize)) { 1387 zero_write(bp, trans, ip, ipdata, cparent, lbase, errorp); 1388 } else { 1389 cluster = hammer2_assign_physical(trans, ip, cparent, 1390 lbase, pblksize, errorp); 1391 hammer2_write_bp(cluster, bp, ioflag, pblksize, errorp); 1392 if (cluster) 1393 hammer2_cluster_unlock(cluster); 1394 } 1395 } 1396 1397 /* 1398 * A function to test whether a block of data contains only zeros, 1399 * returns TRUE (non-zero) if the block is all zeros. 1400 */ 1401 static 1402 int 1403 test_block_zeros(const char *buf, size_t bytes) 1404 { 1405 size_t i; 1406 1407 for (i = 0; i < bytes; i += sizeof(long)) { 1408 if (*(const long *)(buf + i) != 0) 1409 return (0); 1410 } 1411 return (1); 1412 } 1413 1414 /* 1415 * Function to "write" a block that contains only zeros. 1416 */ 1417 static 1418 void 1419 zero_write(struct buf *bp, hammer2_trans_t *trans, 1420 hammer2_inode_t *ip, const hammer2_inode_data_t *ipdata, 1421 hammer2_cluster_t *cparent, 1422 hammer2_key_t lbase, int *errorp __unused) 1423 { 1424 hammer2_cluster_t *cluster; 1425 hammer2_media_data_t *data; 1426 hammer2_key_t key_dummy; 1427 int ddflag; 1428 1429 cparent = hammer2_cluster_lookup_init(cparent, 0); 1430 cluster = hammer2_cluster_lookup(cparent, &key_dummy, lbase, lbase, 1431 HAMMER2_LOOKUP_NODATA, &ddflag); 1432 if (cluster) { 1433 data = hammer2_cluster_wdata(cluster); 1434 1435 if (ddflag) { 1436 KKASSERT(cluster->focus->flags & 1437 HAMMER2_CHAIN_MODIFIED); 1438 bzero(data->ipdata.u.data, HAMMER2_EMBEDDED_BYTES); 1439 hammer2_cluster_modsync(cluster); 1440 } else { 1441 hammer2_cluster_delete(trans, cluster, 0); 1442 } 1443 hammer2_cluster_unlock(cluster); 1444 } 1445 hammer2_cluster_lookup_done(cparent); 1446 } 1447 1448 /* 1449 * Function to write the data as it is, without performing any sort of 1450 * compression. This function is used in path without compression and 1451 * default zero-checking path. 1452 */ 1453 static 1454 void 1455 hammer2_write_bp(hammer2_cluster_t *cluster, struct buf *bp, int ioflag, 1456 int pblksize, int *errorp) 1457 { 1458 hammer2_chain_t *chain; 1459 hammer2_io_t *dio; 1460 char *bdata; 1461 int error; 1462 int i; 1463 int temp_check; 1464 1465 error = 0; /* XXX TODO below */ 1466 1467 for (i = 0; i < cluster->nchains; ++i) { 1468 chain = cluster->array[i]; 1469 1470 temp_check = HAMMER2_DEC_CHECK(chain->bref.methods); 1471 1472 KKASSERT(chain->flags & HAMMER2_CHAIN_MODIFIED); 1473 1474 switch(chain->bref.type) { 1475 case HAMMER2_BREF_TYPE_INODE: 1476 KKASSERT(chain->data->ipdata.op_flags & 1477 HAMMER2_OPFLAG_DIRECTDATA); 1478 KKASSERT(bp->b_loffset == 0); 1479 bcopy(bp->b_data, chain->data->ipdata.u.data, 1480 HAMMER2_EMBEDDED_BYTES); 1481 error = 0; 1482 break; 1483 case HAMMER2_BREF_TYPE_DATA: 1484 error = hammer2_io_newnz(chain->hmp, 1485 chain->bref.data_off, 1486 chain->bytes, &dio); 1487 if (error) { 1488 hammer2_io_bqrelse(&dio); 1489 kprintf("hammer2: WRITE PATH: " 1490 "dbp bread error\n"); 1491 break; 1492 } 1493 bdata = hammer2_io_data(dio, chain->bref.data_off); 1494 1495 chain->bref.methods = HAMMER2_ENC_COMP( 1496 HAMMER2_COMP_NONE) + 1497 HAMMER2_ENC_CHECK(temp_check); 1498 bcopy(bp->b_data, bdata, chain->bytes); 1499 1500 /* 1501 * Device buffer is now valid, chain is no 1502 * longer in the initial state. 1503 */ 1504 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 1505 1506 if (ioflag & IO_SYNC) { 1507 /* 1508 * Synchronous I/O requested. 1509 */ 1510 hammer2_io_bwrite(&dio); 1511 /* 1512 } else if ((ioflag & IO_DIRECT) && 1513 loff + n == pblksize) { 1514 hammer2_io_bdwrite(&dio); 1515 */ 1516 } else if (ioflag & IO_ASYNC) { 1517 hammer2_io_bawrite(&dio); 1518 } else { 1519 hammer2_io_bdwrite(&dio); 1520 } 1521 break; 1522 default: 1523 panic("hammer2_write_bp: bad chain type %d\n", 1524 chain->bref.type); 1525 /* NOT REACHED */ 1526 error = 0; 1527 break; 1528 } 1529 KKASSERT(error == 0); /* XXX TODO */ 1530 } 1531 *errorp = error; 1532 } 1533 1534 static 1535 int 1536 hammer2_remount(hammer2_mount_t *hmp, struct mount *mp, char *path, 1537 struct vnode *devvp, struct ucred *cred) 1538 { 1539 int error; 1540 1541 if (hmp->ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) { 1542 error = hammer2_recovery(hmp); 1543 } else { 1544 error = 0; 1545 } 1546 return error; 1547 } 1548 1549 static 1550 int 1551 hammer2_vfs_unmount(struct mount *mp, int mntflags) 1552 { 1553 hammer2_pfsmount_t *pmp; 1554 hammer2_mount_t *hmp; 1555 hammer2_chain_t *rchain; 1556 hammer2_cluster_t *cluster; 1557 int flags; 1558 int error = 0; 1559 int i; 1560 1561 pmp = MPTOPMP(mp); 1562 1563 if (pmp == NULL) 1564 return(0); 1565 1566 lockmgr(&hammer2_mntlk, LK_EXCLUSIVE); 1567 TAILQ_REMOVE(&hammer2_pfslist, pmp, mntentry); 1568 1569 /* 1570 * If mount initialization proceeded far enough we must flush 1571 * its vnodes. 1572 */ 1573 if (mntflags & MNT_FORCE) 1574 flags = FORCECLOSE; 1575 else 1576 flags = 0; 1577 if (pmp->iroot) { 1578 error = vflush(mp, 0, flags); 1579 if (error) 1580 goto failed; 1581 } 1582 1583 ccms_domain_uninit(&pmp->ccms_dom); 1584 1585 if (pmp->wthread_td) { 1586 mtx_lock(&pmp->wthread_mtx); 1587 pmp->wthread_destroy = 1; 1588 wakeup(&pmp->wthread_bioq); 1589 while (pmp->wthread_destroy != -1) { 1590 mtxsleep(&pmp->wthread_destroy, 1591 &pmp->wthread_mtx, 0, 1592 "umount-sleep", 0); 1593 } 1594 mtx_unlock(&pmp->wthread_mtx); 1595 pmp->wthread_td = NULL; 1596 } 1597 1598 /* 1599 * Cleanup our reference on ihidden. 1600 */ 1601 if (pmp->ihidden) { 1602 hammer2_inode_drop(pmp->ihidden); 1603 pmp->ihidden = NULL; 1604 } 1605 1606 /* 1607 * Cleanup our reference on iroot. iroot is (should) not be needed 1608 * by the flush code. 1609 */ 1610 if (pmp->iroot) { 1611 cluster = &pmp->iroot->cluster; 1612 for (i = 0; i < pmp->iroot->cluster.nchains; ++i) { 1613 rchain = pmp->iroot->cluster.array[i]; 1614 if (rchain == NULL) 1615 continue; 1616 hmp = rchain->hmp; 1617 hammer2_vfs_unmount_hmp1(mp, hmp); 1618 1619 atomic_clear_int(&rchain->flags, HAMMER2_CHAIN_MOUNTED); 1620 #if REPORT_REFS_ERRORS 1621 if (rchain->refs != 1) 1622 kprintf("PMP->RCHAIN %p REFS WRONG %d\n", 1623 rchain, rchain->refs); 1624 #else 1625 KKASSERT(rchain->refs == 1); 1626 #endif 1627 hammer2_chain_drop(rchain); 1628 cluster->array[i] = NULL; 1629 hammer2_vfs_unmount_hmp2(mp, hmp); 1630 } 1631 cluster->focus = NULL; 1632 1633 #if REPORT_REFS_ERRORS 1634 if (pmp->iroot->refs != 1) 1635 kprintf("PMP->IROOT %p REFS WRONG %d\n", 1636 pmp->iroot, pmp->iroot->refs); 1637 #else 1638 KKASSERT(pmp->iroot->refs == 1); 1639 #endif 1640 /* ref for pmp->iroot */ 1641 hammer2_inode_drop(pmp->iroot); 1642 pmp->iroot = NULL; 1643 } 1644 1645 pmp->mp = NULL; 1646 mp->mnt_data = NULL; 1647 1648 kmalloc_destroy(&pmp->mmsg); 1649 kmalloc_destroy(&pmp->minode); 1650 1651 kfree(pmp, M_HAMMER2); 1652 error = 0; 1653 1654 failed: 1655 lockmgr(&hammer2_mntlk, LK_RELEASE); 1656 1657 return (error); 1658 } 1659 1660 static 1661 void 1662 hammer2_vfs_unmount_hmp1(struct mount *mp, hammer2_mount_t *hmp) 1663 { 1664 hammer2_mount_exlock(hmp); 1665 --hmp->pmp_count; 1666 1667 kprintf("hammer2_unmount hmp=%p pmpcnt=%d\n", hmp, hmp->pmp_count); 1668 1669 kdmsg_iocom_uninit(&hmp->iocom); /* XXX chain depend deadlck? */ 1670 1671 /* 1672 * Flush any left over chains. The voldata lock is only used 1673 * to synchronize against HAMMER2_CHAIN_MODIFIED_AUX. 1674 * 1675 * Flush twice to ensure that the freemap is completely 1676 * synchronized. If we only do it once the next mount's 1677 * recovery scan will have to do some fixups (which isn't 1678 * bad, but we don't want it to have to do it except when 1679 * recovering from a crash). 1680 */ 1681 hammer2_voldata_lock(hmp); 1682 if (((hmp->vchain.flags | hmp->fchain.flags) & 1683 HAMMER2_CHAIN_MODIFIED) || 1684 hmp->vchain.update_xhi > hmp->vchain.update_xlo || 1685 hmp->fchain.update_xhi > hmp->fchain.update_xlo) { 1686 hammer2_voldata_unlock(hmp); 1687 hammer2_vfs_sync(mp, MNT_WAIT); 1688 /*hammer2_vfs_sync(mp, MNT_WAIT);*/ 1689 } else { 1690 hammer2_voldata_unlock(hmp); 1691 } 1692 if (hmp->pmp_count == 0) { 1693 if (((hmp->vchain.flags | hmp->fchain.flags) & 1694 HAMMER2_CHAIN_MODIFIED) || 1695 hmp->vchain.update_xhi > hmp->vchain.update_xlo || 1696 hmp->fchain.update_xhi > hmp->fchain.update_xlo) { 1697 kprintf("hammer2_unmount: chains left over " 1698 "after final sync\n"); 1699 kprintf(" vchain %08x update_xlo/hi %08x/%08x\n", 1700 hmp->vchain.flags, 1701 hmp->vchain.update_xlo, 1702 hmp->vchain.update_xhi); 1703 kprintf(" fchain %08x update_xhi/hi %08x/%08x\n", 1704 hmp->fchain.flags, 1705 hmp->fchain.update_xlo, 1706 hmp->fchain.update_xhi); 1707 1708 if (hammer2_debug & 0x0010) 1709 Debugger("entered debugger"); 1710 } 1711 } 1712 } 1713 1714 static 1715 void 1716 hammer2_vfs_unmount_hmp2(struct mount *mp, hammer2_mount_t *hmp) 1717 { 1718 hammer2_pfsmount_t *spmp; 1719 struct vnode *devvp; 1720 int dumpcnt; 1721 int ronly = ((mp->mnt_flag & MNT_RDONLY) != 0); 1722 1723 /* 1724 * If no PFS's left drop the master hammer2_mount for the 1725 * device. 1726 */ 1727 if (hmp->pmp_count == 0) { 1728 /* 1729 * Clean up SPMP and the super-root inode 1730 */ 1731 spmp = hmp->spmp; 1732 if (spmp) { 1733 if (spmp->iroot) { 1734 hammer2_inode_drop(spmp->iroot); 1735 spmp->iroot = NULL; 1736 } 1737 hmp->spmp = NULL; 1738 kmalloc_destroy(&spmp->mmsg); 1739 kmalloc_destroy(&spmp->minode); 1740 kfree(spmp, M_HAMMER2); 1741 } 1742 1743 /* 1744 * Finish up with the device vnode 1745 */ 1746 if ((devvp = hmp->devvp) != NULL) { 1747 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 1748 vinvalbuf(devvp, (ronly ? 0 : V_SAVE), 0, 0); 1749 hmp->devvp = NULL; 1750 VOP_CLOSE(devvp, (ronly ? FREAD : FREAD|FWRITE), NULL); 1751 vn_unlock(devvp); 1752 vrele(devvp); 1753 devvp = NULL; 1754 } 1755 1756 /* 1757 * Clear vchain/fchain flags that might prevent final cleanup 1758 * of these chains. 1759 */ 1760 if (hmp->vchain.flags & HAMMER2_CHAIN_MODIFIED) { 1761 atomic_clear_int(&hmp->vchain.flags, 1762 HAMMER2_CHAIN_MODIFIED); 1763 hammer2_chain_drop(&hmp->vchain); 1764 } 1765 if (hmp->vchain.flags & HAMMER2_CHAIN_FLUSH_CREATE) { 1766 atomic_clear_int(&hmp->vchain.flags, 1767 HAMMER2_CHAIN_FLUSH_CREATE); 1768 hammer2_chain_drop(&hmp->vchain); 1769 } 1770 if (hmp->vchain.flags & HAMMER2_CHAIN_FLUSH_DELETE) { 1771 atomic_clear_int(&hmp->vchain.flags, 1772 HAMMER2_CHAIN_FLUSH_DELETE); 1773 hammer2_chain_drop(&hmp->vchain); 1774 } 1775 1776 if (hmp->fchain.flags & HAMMER2_CHAIN_MODIFIED) { 1777 atomic_clear_int(&hmp->fchain.flags, 1778 HAMMER2_CHAIN_MODIFIED); 1779 hammer2_chain_drop(&hmp->fchain); 1780 } 1781 if (hmp->fchain.flags & HAMMER2_CHAIN_FLUSH_CREATE) { 1782 atomic_clear_int(&hmp->fchain.flags, 1783 HAMMER2_CHAIN_FLUSH_CREATE); 1784 hammer2_chain_drop(&hmp->fchain); 1785 } 1786 if (hmp->fchain.flags & HAMMER2_CHAIN_FLUSH_DELETE) { 1787 atomic_clear_int(&hmp->fchain.flags, 1788 HAMMER2_CHAIN_FLUSH_DELETE); 1789 hammer2_chain_drop(&hmp->fchain); 1790 } 1791 1792 /* 1793 * Final drop of embedded freemap root chain to 1794 * clean up fchain.core (fchain structure is not 1795 * flagged ALLOCATED so it is cleaned out and then 1796 * left to rot). 1797 */ 1798 hammer2_chain_drop(&hmp->fchain); 1799 1800 /* 1801 * Final drop of embedded volume root chain to clean 1802 * up vchain.core (vchain structure is not flagged 1803 * ALLOCATED so it is cleaned out and then left to 1804 * rot). 1805 */ 1806 dumpcnt = 50; 1807 hammer2_dump_chain(&hmp->vchain, 0, &dumpcnt, 'v'); 1808 dumpcnt = 50; 1809 hammer2_dump_chain(&hmp->fchain, 0, &dumpcnt, 'f'); 1810 hammer2_mount_unlock(hmp); 1811 hammer2_chain_drop(&hmp->vchain); 1812 1813 hammer2_io_cleanup(hmp, &hmp->iotree); 1814 if (hmp->iofree_count) { 1815 kprintf("io_cleanup: %d I/O's left hanging\n", 1816 hmp->iofree_count); 1817 } 1818 1819 TAILQ_REMOVE(&hammer2_mntlist, hmp, mntentry); 1820 kmalloc_destroy(&hmp->mchain); 1821 kfree(hmp, M_HAMMER2); 1822 } else { 1823 hammer2_mount_unlock(hmp); 1824 } 1825 } 1826 1827 static 1828 int 1829 hammer2_vfs_vget(struct mount *mp, struct vnode *dvp, 1830 ino_t ino, struct vnode **vpp) 1831 { 1832 kprintf("hammer2_vget\n"); 1833 return (EOPNOTSUPP); 1834 } 1835 1836 static 1837 int 1838 hammer2_vfs_root(struct mount *mp, struct vnode **vpp) 1839 { 1840 hammer2_pfsmount_t *pmp; 1841 hammer2_cluster_t *cparent; 1842 int error; 1843 struct vnode *vp; 1844 1845 pmp = MPTOPMP(mp); 1846 if (pmp->iroot == NULL) { 1847 *vpp = NULL; 1848 error = EINVAL; 1849 } else { 1850 cparent = hammer2_inode_lock_sh(pmp->iroot); 1851 vp = hammer2_igetv(pmp->iroot, cparent, &error); 1852 hammer2_inode_unlock_sh(pmp->iroot, cparent); 1853 *vpp = vp; 1854 if (vp == NULL) 1855 kprintf("vnodefail\n"); 1856 } 1857 1858 return (error); 1859 } 1860 1861 /* 1862 * Filesystem status 1863 * 1864 * XXX incorporate ipdata->inode_quota and data_quota 1865 */ 1866 static 1867 int 1868 hammer2_vfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred) 1869 { 1870 hammer2_pfsmount_t *pmp; 1871 hammer2_mount_t *hmp; 1872 1873 pmp = MPTOPMP(mp); 1874 KKASSERT(pmp->iroot->cluster.nchains >= 1); 1875 hmp = pmp->iroot->cluster.focus->hmp; /* XXX */ 1876 1877 mp->mnt_stat.f_files = pmp->inode_count; 1878 mp->mnt_stat.f_ffree = 0; 1879 mp->mnt_stat.f_blocks = hmp->voldata.allocator_size / HAMMER2_PBUFSIZE; 1880 mp->mnt_stat.f_bfree = hmp->voldata.allocator_free / HAMMER2_PBUFSIZE; 1881 mp->mnt_stat.f_bavail = mp->mnt_stat.f_bfree; 1882 1883 *sbp = mp->mnt_stat; 1884 return (0); 1885 } 1886 1887 static 1888 int 1889 hammer2_vfs_statvfs(struct mount *mp, struct statvfs *sbp, struct ucred *cred) 1890 { 1891 hammer2_pfsmount_t *pmp; 1892 hammer2_mount_t *hmp; 1893 1894 pmp = MPTOPMP(mp); 1895 KKASSERT(pmp->iroot->cluster.nchains >= 1); 1896 hmp = pmp->iroot->cluster.focus->hmp; /* XXX */ 1897 1898 mp->mnt_vstat.f_bsize = HAMMER2_PBUFSIZE; 1899 mp->mnt_vstat.f_files = pmp->inode_count; 1900 mp->mnt_vstat.f_ffree = 0; 1901 mp->mnt_vstat.f_blocks = hmp->voldata.allocator_size / HAMMER2_PBUFSIZE; 1902 mp->mnt_vstat.f_bfree = hmp->voldata.allocator_free / HAMMER2_PBUFSIZE; 1903 mp->mnt_vstat.f_bavail = mp->mnt_vstat.f_bfree; 1904 1905 *sbp = mp->mnt_vstat; 1906 return (0); 1907 } 1908 1909 /* 1910 * Mount-time recovery (RW mounts) 1911 * 1912 * Updates to the free block table are allowed to lag flushes by one 1913 * transaction. In case of a crash, then on a fresh mount we must do an 1914 * incremental scan of the last committed transaction id and make sure that 1915 * all related blocks have been marked allocated. 1916 * 1917 * The super-root topology and each PFS has its own transaction id domain, 1918 * so we must track PFS boundary transitions. 1919 */ 1920 struct hammer2_recovery_elm { 1921 TAILQ_ENTRY(hammer2_recovery_elm) entry; 1922 hammer2_chain_t *chain; 1923 hammer2_tid_t sync_tid; 1924 }; 1925 1926 TAILQ_HEAD(hammer2_recovery_list, hammer2_recovery_elm); 1927 1928 struct hammer2_recovery_info { 1929 struct hammer2_recovery_list list; 1930 int depth; 1931 }; 1932 1933 static int hammer2_recovery_scan(hammer2_trans_t *trans, hammer2_mount_t *hmp, 1934 hammer2_chain_t *parent, 1935 struct hammer2_recovery_info *info, 1936 hammer2_tid_t sync_tid); 1937 1938 #define HAMMER2_RECOVERY_MAXDEPTH 10 1939 1940 static 1941 int 1942 hammer2_recovery(hammer2_mount_t *hmp) 1943 { 1944 hammer2_trans_t trans; 1945 struct hammer2_recovery_info info; 1946 struct hammer2_recovery_elm *elm; 1947 hammer2_chain_t *parent; 1948 hammer2_tid_t sync_tid; 1949 int error; 1950 int cumulative_error = 0; 1951 1952 hammer2_trans_init(&trans, hmp->spmp, 0); 1953 1954 sync_tid = 0; 1955 TAILQ_INIT(&info.list); 1956 info.depth = 0; 1957 parent = hammer2_chain_lookup_init(&hmp->vchain, 0); 1958 cumulative_error = hammer2_recovery_scan(&trans, hmp, parent, 1959 &info, sync_tid); 1960 hammer2_chain_lookup_done(parent); 1961 1962 while ((elm = TAILQ_FIRST(&info.list)) != NULL) { 1963 TAILQ_REMOVE(&info.list, elm, entry); 1964 parent = elm->chain; 1965 sync_tid = elm->sync_tid; 1966 kfree(elm, M_HAMMER2); 1967 1968 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS | 1969 HAMMER2_RESOLVE_NOREF); 1970 error = hammer2_recovery_scan(&trans, hmp, parent, 1971 &info, sync_tid); 1972 hammer2_chain_unlock(parent); 1973 if (error) 1974 cumulative_error = error; 1975 } 1976 hammer2_trans_done(&trans); 1977 1978 return cumulative_error; 1979 } 1980 1981 static 1982 int 1983 hammer2_recovery_scan(hammer2_trans_t *trans, hammer2_mount_t *hmp, 1984 hammer2_chain_t *parent, 1985 struct hammer2_recovery_info *info, 1986 hammer2_tid_t sync_tid) 1987 { 1988 hammer2_chain_t *chain; 1989 int cache_index; 1990 int cumulative_error = 0; 1991 int pfs_boundary = 0; 1992 int error; 1993 1994 /* 1995 * Adjust freemap to ensure that the block(s) are marked allocated. 1996 */ 1997 if (parent->bref.type != HAMMER2_BREF_TYPE_VOLUME) { 1998 hammer2_freemap_adjust(trans, hmp, &parent->bref, 1999 HAMMER2_FREEMAP_DORECOVER); 2000 } 2001 2002 /* 2003 * Check type for recursive scan 2004 */ 2005 switch(parent->bref.type) { 2006 case HAMMER2_BREF_TYPE_VOLUME: 2007 /* data already instantiated */ 2008 break; 2009 case HAMMER2_BREF_TYPE_INODE: 2010 /* 2011 * Must instantiate data for DIRECTDATA test and also 2012 * for recursion. 2013 */ 2014 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); 2015 if (parent->data->ipdata.op_flags & HAMMER2_OPFLAG_DIRECTDATA) { 2016 /* not applicable to recovery scan */ 2017 hammer2_chain_unlock(parent); 2018 return 0; 2019 } 2020 if ((parent->data->ipdata.op_flags & HAMMER2_OPFLAG_PFSROOT) && 2021 info->depth != 0) { 2022 pfs_boundary = 1; 2023 sync_tid = parent->bref.mirror_tid - 1; 2024 } 2025 hammer2_chain_unlock(parent); 2026 break; 2027 case HAMMER2_BREF_TYPE_INDIRECT: 2028 /* 2029 * Must instantiate data for recursion 2030 */ 2031 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); 2032 hammer2_chain_unlock(parent); 2033 break; 2034 case HAMMER2_BREF_TYPE_DATA: 2035 case HAMMER2_BREF_TYPE_FREEMAP: 2036 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2037 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 2038 /* not applicable to recovery scan */ 2039 return 0; 2040 break; 2041 default: 2042 return EDOM; 2043 } 2044 2045 /* 2046 * Defer operation if depth limit reached or if we are crossing a 2047 * PFS boundary. 2048 */ 2049 if (info->depth >= HAMMER2_RECOVERY_MAXDEPTH || pfs_boundary) { 2050 struct hammer2_recovery_elm *elm; 2051 2052 elm = kmalloc(sizeof(*elm), M_HAMMER2, M_ZERO | M_WAITOK); 2053 elm->chain = parent; 2054 elm->sync_tid = sync_tid; 2055 hammer2_chain_ref(parent); 2056 TAILQ_INSERT_TAIL(&info->list, elm, entry); 2057 /* unlocked by caller */ 2058 2059 return(0); 2060 } 2061 2062 2063 /* 2064 * Recursive scan of the last flushed transaction only. We are 2065 * doing this without pmp assignments so don't leave the chains 2066 * hanging around after we are done with them. 2067 */ 2068 cache_index = 0; 2069 chain = hammer2_chain_scan(parent, NULL, &cache_index, 2070 HAMMER2_LOOKUP_NODATA); 2071 while (chain) { 2072 atomic_set_int(&chain->flags, HAMMER2_CHAIN_RELEASE); 2073 if (chain->bref.mirror_tid >= sync_tid) { 2074 ++info->depth; 2075 error = hammer2_recovery_scan(trans, hmp, chain, 2076 info, sync_tid); 2077 --info->depth; 2078 if (error) 2079 cumulative_error = error; 2080 } 2081 chain = hammer2_chain_scan(parent, chain, &cache_index, 2082 HAMMER2_LOOKUP_NODATA); 2083 } 2084 2085 return cumulative_error; 2086 } 2087 2088 /* 2089 * Sync the entire filesystem; this is called from the filesystem syncer 2090 * process periodically and whenever a user calls sync(1) on the hammer 2091 * mountpoint. 2092 * 2093 * Currently is actually called from the syncer! \o/ 2094 * 2095 * This task will have to snapshot the state of the dirty inode chain. 2096 * From that, it will have to make sure all of the inodes on the dirty 2097 * chain have IO initiated. We make sure that io is initiated for the root 2098 * block. 2099 * 2100 * If waitfor is set, we wait for media to acknowledge the new rootblock. 2101 * 2102 * THINKS: side A vs side B, to have sync not stall all I/O? 2103 */ 2104 int 2105 hammer2_vfs_sync(struct mount *mp, int waitfor) 2106 { 2107 struct hammer2_sync_info info; 2108 hammer2_inode_t *iroot; 2109 hammer2_chain_t *chain; 2110 hammer2_chain_t *parent; 2111 hammer2_pfsmount_t *pmp; 2112 hammer2_mount_t *hmp; 2113 int flags; 2114 int error; 2115 int total_error; 2116 int force_fchain; 2117 int i; 2118 int j; 2119 2120 pmp = MPTOPMP(mp); 2121 iroot = pmp->iroot; 2122 KKASSERT(iroot); 2123 KKASSERT(iroot->pmp == pmp); 2124 2125 /* 2126 * We can't acquire locks on existing vnodes while in a transaction 2127 * without risking a deadlock. This assumes that vfsync() can be 2128 * called without the vnode locked (which it can in DragonFly). 2129 * Otherwise we'd have to implement a multi-pass or flag the lock 2130 * failures and retry. 2131 * 2132 * The reclamation code interlocks with the sync list's token 2133 * (by removing the vnode from the scan list) before unlocking 2134 * the inode, giving us time to ref the inode. 2135 */ 2136 /*flags = VMSC_GETVP;*/ 2137 flags = 0; 2138 if (waitfor & MNT_LAZY) 2139 flags |= VMSC_ONEPASS; 2140 2141 /* 2142 * Start our flush transaction. This does not return until all 2143 * concurrent transactions have completed and will prevent any 2144 * new transactions from running concurrently, except for the 2145 * buffer cache transactions. 2146 * 2147 * For efficiency do an async pass before making sure with a 2148 * synchronous pass on all related buffer cache buffers. It 2149 * should theoretically not be possible for any new file buffers 2150 * to be instantiated during this sequence. 2151 */ 2152 hammer2_trans_init(&info.trans, pmp, HAMMER2_TRANS_ISFLUSH | 2153 HAMMER2_TRANS_PREFLUSH); 2154 hammer2_run_unlinkq(&info.trans, pmp); 2155 2156 info.error = 0; 2157 info.waitfor = MNT_NOWAIT; 2158 vsyncscan(mp, flags | VMSC_NOWAIT, hammer2_sync_scan2, &info); 2159 info.waitfor = MNT_WAIT; 2160 vsyncscan(mp, flags, hammer2_sync_scan2, &info); 2161 2162 /* 2163 * Clear PREFLUSH. This prevents (or asserts on) any new logical 2164 * buffer cache flushes which occur during the flush. Device buffers 2165 * are not affected. 2166 */ 2167 2168 #if 0 2169 if (info.error == 0 && (waitfor & MNT_WAIT)) { 2170 info.waitfor = waitfor; 2171 vsyncscan(mp, flags, hammer2_sync_scan2, &info); 2172 2173 } 2174 #endif 2175 hammer2_bioq_sync(info.trans.pmp); 2176 atomic_clear_int(&info.trans.flags, HAMMER2_TRANS_PREFLUSH); 2177 2178 total_error = 0; 2179 2180 /* 2181 * Flush all storage elements making up the cluster 2182 * 2183 * We must also flush any deleted siblings because the super-root 2184 * flush won't do it for us. They all must be staged or the 2185 * super-root flush will not be able to update its block table 2186 * properly. 2187 * 2188 * XXX currently done serially instead of concurrently 2189 */ 2190 for (i = 0; iroot && i < iroot->cluster.nchains; ++i) { 2191 chain = iroot->cluster.array[i]; 2192 if (chain) { 2193 hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS); 2194 hammer2_flush(&info.trans, &chain); 2195 hammer2_chain_unlock(chain); 2196 } 2197 if (chain) { 2198 hammer2_chain_t *nchain; 2199 chain = TAILQ_FIRST(&chain->core->ownerq); 2200 hammer2_chain_ref(chain); 2201 while (chain) { 2202 hammer2_chain_lock(chain, 2203 HAMMER2_RESOLVE_ALWAYS); 2204 hammer2_flush(&info.trans, &chain); 2205 hammer2_chain_unlock(chain); 2206 nchain = TAILQ_NEXT(chain, core_entry); 2207 if (nchain) 2208 hammer2_chain_ref(nchain); 2209 hammer2_chain_drop(chain); 2210 chain = nchain; 2211 } 2212 } 2213 } 2214 #if 0 2215 hammer2_trans_done(&info.trans); 2216 #endif 2217 2218 /* 2219 * Flush all volume roots to synchronize PFS flushes with the 2220 * storage media. Use a super-root transaction for each one. 2221 * 2222 * The flush code will detect super-root -> pfs-root chain 2223 * transitions using the last pfs-root flush. 2224 */ 2225 for (i = 0; iroot && i < iroot->cluster.nchains; ++i) { 2226 chain = iroot->cluster.array[i]; 2227 if (chain == NULL) 2228 continue; 2229 2230 hmp = chain->hmp; 2231 2232 /* 2233 * We only have to flush each hmp once 2234 */ 2235 for (j = i - 1; j >= 0; --j) { 2236 if (iroot->cluster.array[j] && 2237 iroot->cluster.array[j]->hmp == hmp) 2238 break; 2239 } 2240 if (j >= 0) 2241 continue; 2242 hammer2_trans_spmp(&info.trans, hmp->spmp); 2243 2244 /* 2245 * Force an update of the XID from the PFS root to the 2246 * topology root. We couldn't do this from the PFS 2247 * transaction because a SPMP transaction is needed. 2248 * This does not modify blocks, instead what it does is 2249 * allow the flush code to find the transition point and 2250 * then update on the way back up. 2251 */ 2252 parent = TAILQ_LAST(&chain->above->ownerq, h2_core_list); 2253 KKASSERT(chain->pmp != parent->pmp); 2254 hammer2_chain_setsubmod(&info.trans, parent); 2255 2256 /* 2257 * Media mounts have two 'roots', vchain for the topology 2258 * and fchain for the free block table. Flush both. 2259 * 2260 * Note that the topology and free block table are handled 2261 * independently, so the free block table can wind up being 2262 * ahead of the topology. We depend on the bulk free scan 2263 * code to deal with any loose ends. 2264 */ 2265 hammer2_chain_lock(&hmp->vchain, HAMMER2_RESOLVE_ALWAYS); 2266 hammer2_chain_lock(&hmp->fchain, HAMMER2_RESOLVE_ALWAYS); 2267 if ((hmp->fchain.flags & HAMMER2_CHAIN_MODIFIED) || 2268 hmp->fchain.update_xhi > hmp->fchain.update_xlo) { 2269 /* 2270 * This will also modify vchain as a side effect, 2271 * mark vchain as modified now. 2272 */ 2273 hammer2_voldata_modify(hmp); 2274 chain = &hmp->fchain; 2275 hammer2_flush(&info.trans, &chain); 2276 KKASSERT(chain == &hmp->fchain); 2277 } 2278 hammer2_chain_unlock(&hmp->fchain); 2279 hammer2_chain_unlock(&hmp->vchain); 2280 2281 hammer2_chain_lock(&hmp->vchain, HAMMER2_RESOLVE_ALWAYS); 2282 if ((hmp->vchain.flags & HAMMER2_CHAIN_MODIFIED) || 2283 hmp->vchain.update_xhi > hmp->vchain.update_xlo) { 2284 chain = &hmp->vchain; 2285 hammer2_flush(&info.trans, &chain); 2286 KKASSERT(chain == &hmp->vchain); 2287 force_fchain = 1; 2288 } else { 2289 force_fchain = 0; 2290 } 2291 hammer2_chain_unlock(&hmp->vchain); 2292 2293 #if 0 2294 hammer2_chain_lock(&hmp->fchain, HAMMER2_RESOLVE_ALWAYS); 2295 if ((hmp->fchain.flags & HAMMER2_CHAIN_MODIFIED) || 2296 hmp->fchain.update_xhi > hmp->fchain.update_xlo || 2297 force_fchain) { 2298 /* this will also modify vchain as a side effect */ 2299 chain = &hmp->fchain; 2300 hammer2_flush(&info.trans, &chain); 2301 KKASSERT(chain == &hmp->fchain); 2302 } 2303 hammer2_chain_unlock(&hmp->fchain); 2304 #endif 2305 2306 error = 0; 2307 2308 /* 2309 * We can't safely flush the volume header until we have 2310 * flushed any device buffers which have built up. 2311 * 2312 * XXX this isn't being incremental 2313 */ 2314 vn_lock(hmp->devvp, LK_EXCLUSIVE | LK_RETRY); 2315 error = VOP_FSYNC(hmp->devvp, MNT_WAIT, 0); 2316 vn_unlock(hmp->devvp); 2317 2318 /* 2319 * The flush code sets CHAIN_VOLUMESYNC to indicate that the 2320 * volume header needs synchronization via hmp->volsync. 2321 * 2322 * XXX synchronize the flag & data with only this flush XXX 2323 */ 2324 if (error == 0 && 2325 (hmp->vchain.flags & HAMMER2_CHAIN_VOLUMESYNC)) { 2326 struct buf *bp; 2327 2328 /* 2329 * Synchronize the disk before flushing the volume 2330 * header. 2331 */ 2332 bp = getpbuf(NULL); 2333 bp->b_bio1.bio_offset = 0; 2334 bp->b_bufsize = 0; 2335 bp->b_bcount = 0; 2336 bp->b_cmd = BUF_CMD_FLUSH; 2337 bp->b_bio1.bio_done = biodone_sync; 2338 bp->b_bio1.bio_flags |= BIO_SYNC; 2339 vn_strategy(hmp->devvp, &bp->b_bio1); 2340 biowait(&bp->b_bio1, "h2vol"); 2341 relpbuf(bp, NULL); 2342 2343 /* 2344 * Then we can safely flush the version of the 2345 * volume header synchronized by the flush code. 2346 */ 2347 i = hmp->volhdrno + 1; 2348 if (i >= HAMMER2_NUM_VOLHDRS) 2349 i = 0; 2350 if (i * HAMMER2_ZONE_BYTES64 + HAMMER2_SEGSIZE > 2351 hmp->volsync.volu_size) { 2352 i = 0; 2353 } 2354 kprintf("sync volhdr %d %jd\n", 2355 i, (intmax_t)hmp->volsync.volu_size); 2356 bp = getblk(hmp->devvp, i * HAMMER2_ZONE_BYTES64, 2357 HAMMER2_PBUFSIZE, 0, 0); 2358 atomic_clear_int(&hmp->vchain.flags, 2359 HAMMER2_CHAIN_VOLUMESYNC); 2360 bcopy(&hmp->volsync, bp->b_data, HAMMER2_PBUFSIZE); 2361 bawrite(bp); 2362 hmp->volhdrno = i; 2363 } 2364 if (error) 2365 total_error = error; 2366 2367 #if 0 2368 hammer2_trans_done(&info.trans); 2369 #endif 2370 } 2371 hammer2_trans_done(&info.trans); 2372 2373 return (total_error); 2374 } 2375 2376 /* 2377 * Sync passes. 2378 */ 2379 static int 2380 hammer2_sync_scan2(struct mount *mp, struct vnode *vp, void *data) 2381 { 2382 struct hammer2_sync_info *info = data; 2383 hammer2_inode_t *ip; 2384 int error; 2385 2386 /* 2387 * 2388 */ 2389 ip = VTOI(vp); 2390 if (ip == NULL) 2391 return(0); 2392 if (vp->v_type == VNON || vp->v_type == VBAD) { 2393 vclrisdirty(vp); 2394 return(0); 2395 } 2396 if ((ip->flags & HAMMER2_INODE_MODIFIED) == 0 && 2397 RB_EMPTY(&vp->v_rbdirty_tree)) { 2398 vclrisdirty(vp); 2399 return(0); 2400 } 2401 2402 /* 2403 * VOP_FSYNC will start a new transaction so replicate some code 2404 * here to do it inline (see hammer2_vop_fsync()). 2405 * 2406 * WARNING: The vfsync interacts with the buffer cache and might 2407 * block, we can't hold the inode lock at that time. 2408 * However, we MUST ref ip before blocking to ensure that 2409 * it isn't ripped out from under us (since we do not 2410 * hold a lock on the vnode). 2411 */ 2412 hammer2_inode_ref(ip); 2413 atomic_clear_int(&ip->flags, HAMMER2_INODE_MODIFIED); 2414 if (vp) 2415 vfsync(vp, MNT_NOWAIT, 1, NULL, NULL); 2416 2417 hammer2_inode_drop(ip); 2418 #if 1 2419 error = 0; 2420 if (error) 2421 info->error = error; 2422 #endif 2423 return(0); 2424 } 2425 2426 static 2427 int 2428 hammer2_vfs_vptofh(struct vnode *vp, struct fid *fhp) 2429 { 2430 return (0); 2431 } 2432 2433 static 2434 int 2435 hammer2_vfs_fhtovp(struct mount *mp, struct vnode *rootvp, 2436 struct fid *fhp, struct vnode **vpp) 2437 { 2438 return (0); 2439 } 2440 2441 static 2442 int 2443 hammer2_vfs_checkexp(struct mount *mp, struct sockaddr *nam, 2444 int *exflagsp, struct ucred **credanonp) 2445 { 2446 return (0); 2447 } 2448 2449 /* 2450 * Support code for hammer2_mount(). Read, verify, and install the volume 2451 * header into the HMP 2452 * 2453 * XXX read four volhdrs and use the one with the highest TID whos CRC 2454 * matches. 2455 * 2456 * XXX check iCRCs. 2457 * 2458 * XXX For filesystems w/ less than 4 volhdrs, make sure to not write to 2459 * nonexistant locations. 2460 * 2461 * XXX Record selected volhdr and ring updates to each of 4 volhdrs 2462 */ 2463 static 2464 int 2465 hammer2_install_volume_header(hammer2_mount_t *hmp) 2466 { 2467 hammer2_volume_data_t *vd; 2468 struct buf *bp; 2469 hammer2_crc32_t crc0, crc, bcrc0, bcrc; 2470 int error_reported; 2471 int error; 2472 int valid; 2473 int i; 2474 2475 error_reported = 0; 2476 error = 0; 2477 valid = 0; 2478 bp = NULL; 2479 2480 /* 2481 * There are up to 4 copies of the volume header (syncs iterate 2482 * between them so there is no single master). We don't trust the 2483 * volu_size field so we don't know precisely how large the filesystem 2484 * is, so depend on the OS to return an error if we go beyond the 2485 * block device's EOF. 2486 */ 2487 for (i = 0; i < HAMMER2_NUM_VOLHDRS; i++) { 2488 error = bread(hmp->devvp, i * HAMMER2_ZONE_BYTES64, 2489 HAMMER2_VOLUME_BYTES, &bp); 2490 if (error) { 2491 brelse(bp); 2492 bp = NULL; 2493 continue; 2494 } 2495 2496 vd = (struct hammer2_volume_data *) bp->b_data; 2497 if ((vd->magic != HAMMER2_VOLUME_ID_HBO) && 2498 (vd->magic != HAMMER2_VOLUME_ID_ABO)) { 2499 brelse(bp); 2500 bp = NULL; 2501 continue; 2502 } 2503 2504 if (vd->magic == HAMMER2_VOLUME_ID_ABO) { 2505 /* XXX: Reversed-endianness filesystem */ 2506 kprintf("hammer2: reverse-endian filesystem detected"); 2507 brelse(bp); 2508 bp = NULL; 2509 continue; 2510 } 2511 2512 crc = vd->icrc_sects[HAMMER2_VOL_ICRC_SECT0]; 2513 crc0 = hammer2_icrc32(bp->b_data + HAMMER2_VOLUME_ICRC0_OFF, 2514 HAMMER2_VOLUME_ICRC0_SIZE); 2515 bcrc = vd->icrc_sects[HAMMER2_VOL_ICRC_SECT1]; 2516 bcrc0 = hammer2_icrc32(bp->b_data + HAMMER2_VOLUME_ICRC1_OFF, 2517 HAMMER2_VOLUME_ICRC1_SIZE); 2518 if ((crc0 != crc) || (bcrc0 != bcrc)) { 2519 kprintf("hammer2 volume header crc " 2520 "mismatch copy #%d %08x/%08x\n", 2521 i, crc0, crc); 2522 error_reported = 1; 2523 brelse(bp); 2524 bp = NULL; 2525 continue; 2526 } 2527 if (valid == 0 || hmp->voldata.mirror_tid < vd->mirror_tid) { 2528 valid = 1; 2529 hmp->voldata = *vd; 2530 hmp->volhdrno = i; 2531 } 2532 brelse(bp); 2533 bp = NULL; 2534 } 2535 if (valid) { 2536 hmp->volsync = hmp->voldata; 2537 error = 0; 2538 if (error_reported || bootverbose || 1) { /* 1/DEBUG */ 2539 kprintf("hammer2: using volume header #%d\n", 2540 hmp->volhdrno); 2541 } 2542 } else { 2543 error = EINVAL; 2544 kprintf("hammer2: no valid volume headers found!\n"); 2545 } 2546 return (error); 2547 } 2548 2549 /* 2550 * Reconnect using the passed file pointer. The caller must ref the 2551 * fp for us. 2552 */ 2553 void 2554 hammer2_cluster_reconnect(hammer2_mount_t *hmp, struct file *fp) 2555 { 2556 size_t name_len; 2557 const char *name = "disk-volume"; 2558 2559 /* 2560 * Closes old comm descriptor, kills threads, cleans up 2561 * states, then installs the new descriptor and creates 2562 * new threads. 2563 */ 2564 kdmsg_iocom_reconnect(&hmp->iocom, fp, "hammer2"); 2565 2566 /* 2567 * Setup LNK_CONN fields for autoinitiated state machine. We 2568 * will use SPANs to advertise multiple PFSs so only pass the 2569 * fsid and HAMMER2_PFSTYPE_SUPROOT for the AUTOCONN. 2570 * 2571 * We are not initiating a LNK_SPAN so we do not have to set-up 2572 * iocom.auto_lnk_span. 2573 */ 2574 bzero(&hmp->iocom.auto_lnk_conn.pfs_clid, 2575 sizeof(hmp->iocom.auto_lnk_conn.pfs_clid)); 2576 hmp->iocom.auto_lnk_conn.pfs_fsid = hmp->voldata.fsid; 2577 hmp->iocom.auto_lnk_conn.pfs_type = HAMMER2_PFSTYPE_SUPROOT; 2578 hmp->iocom.auto_lnk_conn.proto_version = DMSG_SPAN_PROTO_1; 2579 #if 0 2580 hmp->iocom.auto_lnk_conn.peer_type = hmp->voldata.peer_type; 2581 #endif 2582 hmp->iocom.auto_lnk_conn.peer_type = DMSG_PEER_HAMMER2; 2583 2584 /* 2585 * Filter adjustment. Clients do not need visibility into other 2586 * clients (otherwise millions of clients would present a serious 2587 * problem). The fs_label also serves to restrict the namespace. 2588 */ 2589 hmp->iocom.auto_lnk_conn.peer_mask = 1LLU << DMSG_PEER_HAMMER2; 2590 hmp->iocom.auto_lnk_conn.pfs_mask = (uint64_t)-1; 2591 2592 #if 0 2593 switch (ipdata->pfs_type) { 2594 case DMSG_PFSTYPE_CLIENT: 2595 hmp->iocom.auto_lnk_conn.peer_mask &= 2596 ~(1LLU << DMSG_PFSTYPE_CLIENT); 2597 break; 2598 default: 2599 break; 2600 } 2601 #endif 2602 2603 name_len = strlen(name); 2604 if (name_len >= sizeof(hmp->iocom.auto_lnk_conn.fs_label)) 2605 name_len = sizeof(hmp->iocom.auto_lnk_conn.fs_label) - 1; 2606 bcopy(name, hmp->iocom.auto_lnk_conn.fs_label, name_len); 2607 hmp->iocom.auto_lnk_conn.fs_label[name_len] = 0; 2608 2609 kdmsg_iocom_autoinitiate(&hmp->iocom, hammer2_autodmsg); 2610 } 2611 2612 static int 2613 hammer2_rcvdmsg(kdmsg_msg_t *msg) 2614 { 2615 kprintf("RCVMSG %08x\n", msg->tcmd); 2616 2617 switch(msg->tcmd) { 2618 case DMSG_DBG_SHELL: 2619 /* 2620 * (non-transaction) 2621 * Execute shell command (not supported atm) 2622 */ 2623 kdmsg_msg_result(msg, DMSG_ERR_NOSUPP); 2624 break; 2625 case DMSG_DBG_SHELL | DMSGF_REPLY: 2626 /* 2627 * (non-transaction) 2628 */ 2629 if (msg->aux_data) { 2630 msg->aux_data[msg->aux_size - 1] = 0; 2631 kprintf("HAMMER2 DBG: %s\n", msg->aux_data); 2632 } 2633 break; 2634 default: 2635 /* 2636 * Unsupported message received. We only need to 2637 * reply if it's a transaction in order to close our end. 2638 * Ignore any one-way messages or any further messages 2639 * associated with the transaction. 2640 * 2641 * NOTE: This case also includes DMSG_LNK_ERROR messages 2642 * which might be one-way, replying to those would 2643 * cause an infinite ping-pong. 2644 */ 2645 if (msg->any.head.cmd & DMSGF_CREATE) 2646 kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP); 2647 break; 2648 } 2649 return(0); 2650 } 2651 2652 /* 2653 * This function is called after KDMSG has automatically handled processing 2654 * of a LNK layer message (typically CONN, SPAN, or CIRC). 2655 * 2656 * We tag off the LNK_CONN to trigger our LNK_VOLCONF messages which 2657 * advertises all available hammer2 super-root volumes. 2658 */ 2659 static void 2660 hammer2_autodmsg(kdmsg_msg_t *msg) 2661 { 2662 hammer2_mount_t *hmp = msg->iocom->handle; 2663 int copyid; 2664 2665 kprintf("RCAMSG %08x\n", msg->tcmd); 2666 2667 switch(msg->tcmd) { 2668 case DMSG_LNK_CONN | DMSGF_CREATE | DMSGF_REPLY: 2669 case DMSG_LNK_CONN | DMSGF_CREATE | DMSGF_DELETE | DMSGF_REPLY: 2670 if (msg->any.head.cmd & DMSGF_CREATE) { 2671 kprintf("HAMMER2: VOLDATA DUMP\n"); 2672 2673 /* 2674 * Dump the configuration stored in the volume header. 2675 * This will typically be import/export access rights, 2676 * master encryption keys (encrypted), etc. 2677 */ 2678 hammer2_voldata_lock(hmp); 2679 copyid = 0; 2680 while (copyid < HAMMER2_COPYID_COUNT) { 2681 if (hmp->voldata.copyinfo[copyid].copyid) 2682 hammer2_volconf_update(hmp, copyid); 2683 ++copyid; 2684 } 2685 hammer2_voldata_unlock(hmp); 2686 2687 kprintf("HAMMER2: INITIATE SPANs\n"); 2688 hammer2_update_spans(hmp); 2689 } 2690 if ((msg->any.head.cmd & DMSGF_DELETE) && 2691 msg->state && (msg->state->txcmd & DMSGF_DELETE) == 0) { 2692 kprintf("HAMMER2: CONN WAS TERMINATED\n"); 2693 } 2694 break; 2695 default: 2696 break; 2697 } 2698 } 2699 2700 /* 2701 * Update LNK_SPAN state 2702 */ 2703 void 2704 hammer2_update_spans(hammer2_mount_t *hmp) 2705 { 2706 const hammer2_inode_data_t *ipdata; 2707 hammer2_cluster_t *cparent; 2708 hammer2_cluster_t *cluster; 2709 hammer2_pfsmount_t *spmp; 2710 hammer2_key_t key_next; 2711 kdmsg_msg_t *rmsg; 2712 size_t name_len; 2713 int ddflag; 2714 2715 /* 2716 * Lookup mount point under the media-localized super-root. 2717 * 2718 * cluster->pmp will incorrectly point to spmp and must be fixed 2719 * up later on. 2720 */ 2721 spmp = hmp->spmp; 2722 cparent = hammer2_inode_lock_ex(spmp->iroot); 2723 cluster = hammer2_cluster_lookup(cparent, &key_next, 2724 HAMMER2_KEY_MIN, 2725 HAMMER2_KEY_MAX, 2726 0, &ddflag); 2727 while (cluster) { 2728 if (hammer2_cluster_type(cluster) != HAMMER2_BREF_TYPE_INODE) 2729 continue; 2730 ipdata = &hammer2_cluster_data(cluster)->ipdata; 2731 kprintf("UPDATE SPANS: %s\n", ipdata->filename); 2732 2733 rmsg = kdmsg_msg_alloc(&hmp->iocom, NULL, 2734 DMSG_LNK_SPAN | DMSGF_CREATE, 2735 hammer2_lnk_span_reply, NULL); 2736 rmsg->any.lnk_span.pfs_clid = ipdata->pfs_clid; 2737 rmsg->any.lnk_span.pfs_fsid = ipdata->pfs_fsid; 2738 rmsg->any.lnk_span.pfs_type = ipdata->pfs_type; 2739 rmsg->any.lnk_span.peer_type = DMSG_PEER_HAMMER2; 2740 rmsg->any.lnk_span.proto_version = DMSG_SPAN_PROTO_1; 2741 name_len = ipdata->name_len; 2742 if (name_len >= sizeof(rmsg->any.lnk_span.fs_label)) 2743 name_len = sizeof(rmsg->any.lnk_span.fs_label) - 1; 2744 bcopy(ipdata->filename, rmsg->any.lnk_span.fs_label, name_len); 2745 2746 kdmsg_msg_write(rmsg); 2747 2748 cluster = hammer2_cluster_next(cparent, cluster, 2749 &key_next, 2750 key_next, 2751 HAMMER2_KEY_MAX, 2752 0); 2753 } 2754 hammer2_inode_unlock_ex(spmp->iroot, cparent); 2755 } 2756 2757 static 2758 int 2759 hammer2_lnk_span_reply(kdmsg_state_t *state, kdmsg_msg_t *msg) 2760 { 2761 if ((state->txcmd & DMSGF_DELETE) == 0 && 2762 (msg->any.head.cmd & DMSGF_DELETE)) { 2763 kdmsg_msg_reply(msg, 0); 2764 } 2765 return 0; 2766 } 2767 2768 /* 2769 * Volume configuration updates are passed onto the userland service 2770 * daemon via the open LNK_CONN transaction. 2771 */ 2772 void 2773 hammer2_volconf_update(hammer2_mount_t *hmp, int index) 2774 { 2775 kdmsg_msg_t *msg; 2776 2777 /* XXX interlock against connection state termination */ 2778 kprintf("volconf update %p\n", hmp->iocom.conn_state); 2779 if (hmp->iocom.conn_state) { 2780 kprintf("TRANSMIT VOLCONF VIA OPEN CONN TRANSACTION\n"); 2781 msg = kdmsg_msg_alloc_state(hmp->iocom.conn_state, 2782 DMSG_LNK_HAMMER2_VOLCONF, 2783 NULL, NULL); 2784 H2_LNK_VOLCONF(msg)->copy = hmp->voldata.copyinfo[index]; 2785 H2_LNK_VOLCONF(msg)->mediaid = hmp->voldata.fsid; 2786 H2_LNK_VOLCONF(msg)->index = index; 2787 kdmsg_msg_write(msg); 2788 } 2789 } 2790 2791 /* 2792 * This handles hysteresis on regular file flushes. Because the BIOs are 2793 * routed to a thread it is possible for an excessive number to build up 2794 * and cause long front-end stalls long before the runningbuffspace limit 2795 * is hit, so we implement hammer2_flush_pipe to control the 2796 * hysteresis. 2797 * 2798 * This is a particular problem when compression is used. 2799 */ 2800 void 2801 hammer2_lwinprog_ref(hammer2_pfsmount_t *pmp) 2802 { 2803 atomic_add_int(&pmp->count_lwinprog, 1); 2804 } 2805 2806 void 2807 hammer2_lwinprog_drop(hammer2_pfsmount_t *pmp) 2808 { 2809 int lwinprog; 2810 2811 lwinprog = atomic_fetchadd_int(&pmp->count_lwinprog, -1); 2812 if ((lwinprog & HAMMER2_LWINPROG_WAITING) && 2813 (lwinprog & HAMMER2_LWINPROG_MASK) <= hammer2_flush_pipe * 2 / 3) { 2814 atomic_clear_int(&pmp->count_lwinprog, 2815 HAMMER2_LWINPROG_WAITING); 2816 wakeup(&pmp->count_lwinprog); 2817 } 2818 } 2819 2820 void 2821 hammer2_lwinprog_wait(hammer2_pfsmount_t *pmp) 2822 { 2823 int lwinprog; 2824 2825 for (;;) { 2826 lwinprog = pmp->count_lwinprog; 2827 cpu_ccfence(); 2828 if ((lwinprog & HAMMER2_LWINPROG_MASK) < hammer2_flush_pipe) 2829 break; 2830 tsleep_interlock(&pmp->count_lwinprog, 0); 2831 atomic_set_int(&pmp->count_lwinprog, HAMMER2_LWINPROG_WAITING); 2832 lwinprog = pmp->count_lwinprog; 2833 if ((lwinprog & HAMMER2_LWINPROG_MASK) < hammer2_flush_pipe) 2834 break; 2835 tsleep(&pmp->count_lwinprog, PINTERLOCKED, "h2wpipe", hz); 2836 } 2837 } 2838 2839 /* 2840 * Manage excessive memory resource use for chain and related 2841 * structures. 2842 */ 2843 void 2844 hammer2_pfs_memory_wait(hammer2_pfsmount_t *pmp) 2845 { 2846 long waiting; 2847 long count; 2848 long limit; 2849 #if 0 2850 static int zzticks; 2851 #endif 2852 2853 /* 2854 * Atomic check condition and wait. Also do an early speedup of 2855 * the syncer to try to avoid hitting the wait. 2856 */ 2857 for (;;) { 2858 waiting = pmp->inmem_dirty_chains; 2859 cpu_ccfence(); 2860 count = waiting & HAMMER2_DIRTYCHAIN_MASK; 2861 2862 limit = pmp->mp->mnt_nvnodelistsize / 10; 2863 if (limit < hammer2_limit_dirty_chains) 2864 limit = hammer2_limit_dirty_chains; 2865 if (limit < 1000) 2866 limit = 1000; 2867 2868 #if 0 2869 if ((int)(ticks - zzticks) > hz) { 2870 zzticks = ticks; 2871 kprintf("count %ld %ld\n", count, limit); 2872 } 2873 #endif 2874 2875 /* 2876 * Block if there are too many dirty chains present, wait 2877 * for the flush to clean some out. 2878 */ 2879 if (count > limit) { 2880 tsleep_interlock(&pmp->inmem_dirty_chains, 0); 2881 if (atomic_cmpset_long(&pmp->inmem_dirty_chains, 2882 waiting, 2883 waiting | HAMMER2_DIRTYCHAIN_WAITING)) { 2884 speedup_syncer(pmp->mp); 2885 tsleep(&pmp->inmem_dirty_chains, PINTERLOCKED, 2886 "chnmem", hz); 2887 } 2888 continue; /* loop on success or fail */ 2889 } 2890 2891 /* 2892 * Try to start an early flush before we are forced to block. 2893 */ 2894 if (count > limit * 7 / 10) 2895 speedup_syncer(pmp->mp); 2896 break; 2897 } 2898 } 2899 2900 void 2901 hammer2_pfs_memory_inc(hammer2_pfsmount_t *pmp) 2902 { 2903 if (pmp) 2904 atomic_add_long(&pmp->inmem_dirty_chains, 1); 2905 } 2906 2907 void 2908 hammer2_pfs_memory_wakeup(hammer2_pfsmount_t *pmp) 2909 { 2910 long waiting; 2911 2912 if (pmp == NULL) 2913 return; 2914 2915 for (;;) { 2916 waiting = pmp->inmem_dirty_chains; 2917 cpu_ccfence(); 2918 if (atomic_cmpset_long(&pmp->inmem_dirty_chains, 2919 waiting, 2920 (waiting - 1) & 2921 ~HAMMER2_DIRTYCHAIN_WAITING)) { 2922 break; 2923 } 2924 } 2925 2926 if (waiting & HAMMER2_DIRTYCHAIN_WAITING) 2927 wakeup(&pmp->inmem_dirty_chains); 2928 } 2929 2930 /* 2931 * Debugging 2932 */ 2933 void 2934 hammer2_dump_chain(hammer2_chain_t *chain, int tab, int *countp, char pfx) 2935 { 2936 hammer2_chain_t *scan; 2937 hammer2_chain_t *first_parent; 2938 2939 --*countp; 2940 if (*countp == 0) { 2941 kprintf("%*.*s...\n", tab, tab, ""); 2942 return; 2943 } 2944 if (*countp < 0) 2945 return; 2946 first_parent = chain->core ? TAILQ_FIRST(&chain->core->ownerq) : NULL; 2947 kprintf("%*.*s%c-chain %p.%d %016jx/%d mir=%016jx\n", 2948 tab, tab, "", pfx, 2949 chain, chain->bref.type, 2950 chain->bref.key, chain->bref.keybits, 2951 chain->bref.mirror_tid); 2952 2953 kprintf("%*.*s [%08x] (%s) mod=%08x del=%08x " 2954 "lo=%08x hi=%08x refs=%d\n", 2955 tab, tab, "", 2956 chain->flags, 2957 ((chain->bref.type == HAMMER2_BREF_TYPE_INODE && 2958 chain->data) ? (char *)chain->data->ipdata.filename : "?"), 2959 chain->modify_xid, 2960 chain->delete_xid, 2961 chain->update_xlo, 2962 chain->update_xhi, 2963 chain->refs); 2964 2965 kprintf("%*.*s core %p [%08x]", 2966 tab, tab, "", 2967 chain->core, (chain->core ? chain->core->flags : 0)); 2968 2969 if (first_parent) 2970 kprintf("\n%*.*s fp=%p np=%p [fpflags %08x fprefs %d", 2971 tab, tab, "", 2972 first_parent, 2973 (first_parent ? TAILQ_NEXT(first_parent, core_entry) : 2974 NULL), 2975 first_parent->flags, 2976 first_parent->refs); 2977 if (chain->core == NULL || RB_EMPTY(&chain->core->rbtree)) 2978 kprintf("\n"); 2979 else 2980 kprintf(" {\n"); 2981 if (chain->core) { 2982 RB_FOREACH(scan, hammer2_chain_tree, &chain->core->rbtree) 2983 hammer2_dump_chain(scan, tab + 4, countp, 'a'); 2984 RB_FOREACH(scan, hammer2_chain_tree, &chain->core->dbtree) 2985 hammer2_dump_chain(scan, tab + 4, countp, 'r'); 2986 TAILQ_FOREACH(scan, &chain->core->dbq, db_entry) 2987 hammer2_dump_chain(scan, tab + 4, countp, 'd'); 2988 } 2989 if (chain->core && !RB_EMPTY(&chain->core->rbtree)) { 2990 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE && chain->data) 2991 kprintf("%*.*s}(%s)\n", tab, tab, "", 2992 chain->data->ipdata.filename); 2993 else 2994 kprintf("%*.*s}\n", tab, tab, ""); 2995 } 2996 } 2997