1 /* 2 * Copyright (c) 2011-2015 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * by Daniel Flores (GSOC 2013 - mentored by Matthew Dillon, compression) 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 3. Neither the name of The DragonFly Project nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific, prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/kernel.h> 38 #include <sys/nlookup.h> 39 #include <sys/vnode.h> 40 #include <sys/mount.h> 41 #include <sys/fcntl.h> 42 #include <sys/buf.h> 43 #include <sys/uuid.h> 44 #include <sys/vfsops.h> 45 #include <sys/sysctl.h> 46 #include <sys/socket.h> 47 #include <sys/objcache.h> 48 49 #include <sys/proc.h> 50 #include <sys/namei.h> 51 #include <sys/mountctl.h> 52 #include <sys/dirent.h> 53 #include <sys/uio.h> 54 55 #include <sys/mutex.h> 56 #include <sys/mutex2.h> 57 58 #include "hammer2.h" 59 #include "hammer2_disk.h" 60 #include "hammer2_mount.h" 61 #include "hammer2_lz4.h" 62 63 #include "zlib/hammer2_zlib.h" 64 65 #define REPORT_REFS_ERRORS 1 /* XXX remove me */ 66 67 MALLOC_DEFINE(M_OBJCACHE, "objcache", "Object Cache"); 68 69 struct hammer2_sync_info { 70 hammer2_trans_t trans; 71 int error; 72 int waitfor; 73 }; 74 75 TAILQ_HEAD(hammer2_mntlist, hammer2_mount); 76 TAILQ_HEAD(hammer2_pfslist, hammer2_pfsmount); 77 static struct hammer2_mntlist hammer2_mntlist; 78 static struct hammer2_pfslist hammer2_pfslist; 79 static struct lock hammer2_mntlk; 80 81 int hammer2_debug; 82 int hammer2_cluster_enable = 1; 83 int hammer2_hardlink_enable = 1; 84 int hammer2_flush_pipe = 100; 85 int hammer2_synchronous_flush = 1; 86 int hammer2_dio_count; 87 long hammer2_limit_dirty_chains; 88 long hammer2_iod_file_read; 89 long hammer2_iod_meta_read; 90 long hammer2_iod_indr_read; 91 long hammer2_iod_fmap_read; 92 long hammer2_iod_volu_read; 93 long hammer2_iod_file_write; 94 long hammer2_iod_meta_write; 95 long hammer2_iod_indr_write; 96 long hammer2_iod_fmap_write; 97 long hammer2_iod_volu_write; 98 long hammer2_ioa_file_read; 99 long hammer2_ioa_meta_read; 100 long hammer2_ioa_indr_read; 101 long hammer2_ioa_fmap_read; 102 long hammer2_ioa_volu_read; 103 long hammer2_ioa_fmap_write; 104 long hammer2_ioa_file_write; 105 long hammer2_ioa_meta_write; 106 long hammer2_ioa_indr_write; 107 long hammer2_ioa_volu_write; 108 109 MALLOC_DECLARE(C_BUFFER); 110 MALLOC_DEFINE(C_BUFFER, "compbuffer", "Buffer used for compression."); 111 112 MALLOC_DECLARE(D_BUFFER); 113 MALLOC_DEFINE(D_BUFFER, "decompbuffer", "Buffer used for decompression."); 114 115 SYSCTL_NODE(_vfs, OID_AUTO, hammer2, CTLFLAG_RW, 0, "HAMMER2 filesystem"); 116 117 SYSCTL_INT(_vfs_hammer2, OID_AUTO, debug, CTLFLAG_RW, 118 &hammer2_debug, 0, ""); 119 SYSCTL_INT(_vfs_hammer2, OID_AUTO, cluster_enable, CTLFLAG_RW, 120 &hammer2_cluster_enable, 0, ""); 121 SYSCTL_INT(_vfs_hammer2, OID_AUTO, hardlink_enable, CTLFLAG_RW, 122 &hammer2_hardlink_enable, 0, ""); 123 SYSCTL_INT(_vfs_hammer2, OID_AUTO, flush_pipe, CTLFLAG_RW, 124 &hammer2_flush_pipe, 0, ""); 125 SYSCTL_INT(_vfs_hammer2, OID_AUTO, synchronous_flush, CTLFLAG_RW, 126 &hammer2_synchronous_flush, 0, ""); 127 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, limit_dirty_chains, CTLFLAG_RW, 128 &hammer2_limit_dirty_chains, 0, ""); 129 SYSCTL_INT(_vfs_hammer2, OID_AUTO, dio_count, CTLFLAG_RD, 130 &hammer2_dio_count, 0, ""); 131 132 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_file_read, CTLFLAG_RW, 133 &hammer2_iod_file_read, 0, ""); 134 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_meta_read, CTLFLAG_RW, 135 &hammer2_iod_meta_read, 0, ""); 136 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_indr_read, CTLFLAG_RW, 137 &hammer2_iod_indr_read, 0, ""); 138 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_fmap_read, CTLFLAG_RW, 139 &hammer2_iod_fmap_read, 0, ""); 140 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_volu_read, CTLFLAG_RW, 141 &hammer2_iod_volu_read, 0, ""); 142 143 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_file_write, CTLFLAG_RW, 144 &hammer2_iod_file_write, 0, ""); 145 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_meta_write, CTLFLAG_RW, 146 &hammer2_iod_meta_write, 0, ""); 147 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_indr_write, CTLFLAG_RW, 148 &hammer2_iod_indr_write, 0, ""); 149 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_fmap_write, CTLFLAG_RW, 150 &hammer2_iod_fmap_write, 0, ""); 151 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_volu_write, CTLFLAG_RW, 152 &hammer2_iod_volu_write, 0, ""); 153 154 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_file_read, CTLFLAG_RW, 155 &hammer2_ioa_file_read, 0, ""); 156 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_meta_read, CTLFLAG_RW, 157 &hammer2_ioa_meta_read, 0, ""); 158 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_indr_read, CTLFLAG_RW, 159 &hammer2_ioa_indr_read, 0, ""); 160 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_fmap_read, CTLFLAG_RW, 161 &hammer2_ioa_fmap_read, 0, ""); 162 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_volu_read, CTLFLAG_RW, 163 &hammer2_ioa_volu_read, 0, ""); 164 165 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_file_write, CTLFLAG_RW, 166 &hammer2_ioa_file_write, 0, ""); 167 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_meta_write, CTLFLAG_RW, 168 &hammer2_ioa_meta_write, 0, ""); 169 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_indr_write, CTLFLAG_RW, 170 &hammer2_ioa_indr_write, 0, ""); 171 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_fmap_write, CTLFLAG_RW, 172 &hammer2_ioa_fmap_write, 0, ""); 173 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, ioa_volu_write, CTLFLAG_RW, 174 &hammer2_ioa_volu_write, 0, ""); 175 176 static int hammer2_vfs_init(struct vfsconf *conf); 177 static int hammer2_vfs_uninit(struct vfsconf *vfsp); 178 static int hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data, 179 struct ucred *cred); 180 static int hammer2_remount(hammer2_mount_t *, struct mount *, char *, 181 struct vnode *, struct ucred *); 182 static int hammer2_recovery(hammer2_mount_t *hmp); 183 static int hammer2_vfs_unmount(struct mount *mp, int mntflags); 184 static int hammer2_vfs_root(struct mount *mp, struct vnode **vpp); 185 static int hammer2_vfs_statfs(struct mount *mp, struct statfs *sbp, 186 struct ucred *cred); 187 static int hammer2_vfs_statvfs(struct mount *mp, struct statvfs *sbp, 188 struct ucred *cred); 189 static int hammer2_vfs_vget(struct mount *mp, struct vnode *dvp, 190 ino_t ino, struct vnode **vpp); 191 static int hammer2_vfs_fhtovp(struct mount *mp, struct vnode *rootvp, 192 struct fid *fhp, struct vnode **vpp); 193 static int hammer2_vfs_vptofh(struct vnode *vp, struct fid *fhp); 194 static int hammer2_vfs_checkexp(struct mount *mp, struct sockaddr *nam, 195 int *exflagsp, struct ucred **credanonp); 196 197 static int hammer2_install_volume_header(hammer2_mount_t *hmp); 198 static int hammer2_sync_scan2(struct mount *mp, struct vnode *vp, void *data); 199 200 static void hammer2_write_thread(void *arg); 201 202 static void hammer2_vfs_unmount_hmp1(struct mount *mp, hammer2_mount_t *hmp); 203 static void hammer2_vfs_unmount_hmp2(struct mount *mp, hammer2_mount_t *hmp); 204 205 /* 206 * Functions for compression in threads, 207 * from hammer2_vnops.c 208 */ 209 static void hammer2_write_file_core(struct buf *bp, hammer2_trans_t *trans, 210 hammer2_inode_t *ip, 211 const hammer2_inode_data_t *ripdata, 212 hammer2_cluster_t *cparent, 213 hammer2_key_t lbase, int ioflag, int pblksize, 214 int *errorp); 215 static void hammer2_compress_and_write(struct buf *bp, hammer2_trans_t *trans, 216 hammer2_inode_t *ip, 217 const hammer2_inode_data_t *ripdata, 218 hammer2_cluster_t *cparent, 219 hammer2_key_t lbase, int ioflag, 220 int pblksize, int *errorp, 221 int comp_algo, int check_algo); 222 static void hammer2_zero_check_and_write(struct buf *bp, 223 hammer2_trans_t *trans, hammer2_inode_t *ip, 224 const hammer2_inode_data_t *ripdata, 225 hammer2_cluster_t *cparent, 226 hammer2_key_t lbase, 227 int ioflag, int pblksize, int *errorp, 228 int check_algo); 229 static int test_block_zeros(const char *buf, size_t bytes); 230 static void zero_write(struct buf *bp, hammer2_trans_t *trans, 231 hammer2_inode_t *ip, 232 const hammer2_inode_data_t *ripdata, 233 hammer2_cluster_t *cparent, 234 hammer2_key_t lbase, 235 int *errorp); 236 static void hammer2_write_bp(hammer2_cluster_t *cluster, struct buf *bp, 237 int ioflag, int pblksize, int *errorp, 238 int check_algo); 239 240 /* 241 * HAMMER2 vfs operations. 242 */ 243 static struct vfsops hammer2_vfsops = { 244 .vfs_init = hammer2_vfs_init, 245 .vfs_uninit = hammer2_vfs_uninit, 246 .vfs_sync = hammer2_vfs_sync, 247 .vfs_mount = hammer2_vfs_mount, 248 .vfs_unmount = hammer2_vfs_unmount, 249 .vfs_root = hammer2_vfs_root, 250 .vfs_statfs = hammer2_vfs_statfs, 251 .vfs_statvfs = hammer2_vfs_statvfs, 252 .vfs_vget = hammer2_vfs_vget, 253 .vfs_vptofh = hammer2_vfs_vptofh, 254 .vfs_fhtovp = hammer2_vfs_fhtovp, 255 .vfs_checkexp = hammer2_vfs_checkexp 256 }; 257 258 MALLOC_DEFINE(M_HAMMER2, "HAMMER2-mount", ""); 259 260 VFS_SET(hammer2_vfsops, hammer2, 0); 261 MODULE_VERSION(hammer2, 1); 262 263 static 264 int 265 hammer2_vfs_init(struct vfsconf *conf) 266 { 267 static struct objcache_malloc_args margs_read; 268 static struct objcache_malloc_args margs_write; 269 270 int error; 271 272 error = 0; 273 274 if (HAMMER2_BLOCKREF_BYTES != sizeof(struct hammer2_blockref)) 275 error = EINVAL; 276 if (HAMMER2_INODE_BYTES != sizeof(struct hammer2_inode_data)) 277 error = EINVAL; 278 if (HAMMER2_VOLUME_BYTES != sizeof(struct hammer2_volume_data)) 279 error = EINVAL; 280 281 if (error) 282 kprintf("HAMMER2 structure size mismatch; cannot continue.\n"); 283 284 margs_read.objsize = 65536; 285 margs_read.mtype = D_BUFFER; 286 287 margs_write.objsize = 32768; 288 margs_write.mtype = C_BUFFER; 289 290 cache_buffer_read = objcache_create(margs_read.mtype->ks_shortdesc, 291 0, 1, NULL, NULL, NULL, objcache_malloc_alloc, 292 objcache_malloc_free, &margs_read); 293 cache_buffer_write = objcache_create(margs_write.mtype->ks_shortdesc, 294 0, 1, NULL, NULL, NULL, objcache_malloc_alloc, 295 objcache_malloc_free, &margs_write); 296 297 lockinit(&hammer2_mntlk, "mntlk", 0, 0); 298 TAILQ_INIT(&hammer2_mntlist); 299 TAILQ_INIT(&hammer2_pfslist); 300 301 hammer2_limit_dirty_chains = desiredvnodes / 10; 302 303 hammer2_trans_manage_init(); 304 305 return (error); 306 } 307 308 static 309 int 310 hammer2_vfs_uninit(struct vfsconf *vfsp __unused) 311 { 312 objcache_destroy(cache_buffer_read); 313 objcache_destroy(cache_buffer_write); 314 return 0; 315 } 316 317 /* 318 * Core PFS allocator. Used to allocate the pmp structure for PFS cluster 319 * mounts and the spmp structure for media (hmp) structures. 320 */ 321 static hammer2_pfsmount_t * 322 hammer2_pfsalloc(const hammer2_inode_data_t *ripdata, hammer2_tid_t alloc_tid) 323 { 324 hammer2_pfsmount_t *pmp; 325 326 pmp = kmalloc(sizeof(*pmp), M_HAMMER2, M_WAITOK | M_ZERO); 327 kmalloc_create(&pmp->minode, "HAMMER2-inodes"); 328 kmalloc_create(&pmp->mmsg, "HAMMER2-pfsmsg"); 329 lockinit(&pmp->lock, "pfslk", 0, 0); 330 spin_init(&pmp->inum_spin, "hm2pfsalloc_inum"); 331 RB_INIT(&pmp->inum_tree); 332 TAILQ_INIT(&pmp->unlinkq); 333 spin_init(&pmp->list_spin, "hm2pfsalloc_list"); 334 335 pmp->alloc_tid = alloc_tid + 1; /* our first media transaction id */ 336 pmp->flush_tid = pmp->alloc_tid; 337 if (ripdata) { 338 pmp->inode_tid = ripdata->pfs_inum + 1; 339 pmp->pfs_clid = ripdata->pfs_clid; 340 } 341 hammer2_mtx_init(&pmp->wthread_mtx, "h2wthr"); 342 bioq_init(&pmp->wthread_bioq); 343 344 return pmp; 345 } 346 347 /* 348 * Mount or remount HAMMER2 fileystem from physical media 349 * 350 * mountroot 351 * mp mount point structure 352 * path NULL 353 * data <unused> 354 * cred <unused> 355 * 356 * mount 357 * mp mount point structure 358 * path path to mount point 359 * data pointer to argument structure in user space 360 * volume volume path (device@LABEL form) 361 * hflags user mount flags 362 * cred user credentials 363 * 364 * RETURNS: 0 Success 365 * !0 error number 366 */ 367 static 368 int 369 hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data, 370 struct ucred *cred) 371 { 372 struct hammer2_mount_info info; 373 hammer2_pfsmount_t *pmp; 374 hammer2_pfsmount_t *spmp; 375 hammer2_mount_t *hmp; 376 hammer2_key_t key_next; 377 hammer2_key_t key_dummy; 378 hammer2_key_t lhc; 379 struct vnode *devvp; 380 struct nlookupdata nd; 381 hammer2_chain_t *parent; 382 hammer2_chain_t *rchain; 383 hammer2_cluster_t *cluster; 384 hammer2_cluster_t *cparent; 385 const hammer2_inode_data_t *ripdata; 386 hammer2_blockref_t bref; 387 struct file *fp; 388 char devstr[MNAMELEN]; 389 size_t size; 390 size_t done; 391 char *dev; 392 char *label; 393 int ronly = 1; 394 int error; 395 int cache_index; 396 int ddflag; 397 int i; 398 399 hmp = NULL; 400 pmp = NULL; 401 dev = NULL; 402 label = NULL; 403 devvp = NULL; 404 cache_index = -1; 405 406 kprintf("hammer2_mount\n"); 407 408 if (path == NULL) { 409 /* 410 * Root mount 411 */ 412 bzero(&info, sizeof(info)); 413 info.cluster_fd = -1; 414 return (EOPNOTSUPP); 415 } else { 416 /* 417 * Non-root mount or updating a mount 418 */ 419 error = copyin(data, &info, sizeof(info)); 420 if (error) 421 return (error); 422 423 error = copyinstr(info.volume, devstr, MNAMELEN - 1, &done); 424 if (error) 425 return (error); 426 427 /* Extract device and label */ 428 dev = devstr; 429 label = strchr(devstr, '@'); 430 if (label == NULL || 431 ((label + 1) - dev) > done) { 432 return (EINVAL); 433 } 434 *label = '\0'; 435 label++; 436 if (*label == '\0') 437 return (EINVAL); 438 439 if (mp->mnt_flag & MNT_UPDATE) { 440 /* 441 * Update mount. Note that pmp->iroot->cluster is 442 * an inode-embedded cluster and thus cannot be 443 * directly locked. 444 * 445 * XXX HAMMER2 needs to implement NFS export via 446 * mountctl. 447 */ 448 pmp = MPTOPMP(mp); 449 cluster = &pmp->iroot->cluster; 450 for (i = 0; i < cluster->nchains; ++i) { 451 hmp = cluster->array[i].chain->hmp; 452 devvp = hmp->devvp; 453 error = hammer2_remount(hmp, mp, path, 454 devvp, cred); 455 if (error) 456 break; 457 } 458 /*hammer2_inode_install_hidden(pmp);*/ 459 460 return error; 461 } 462 } 463 464 /* 465 * HMP device mount 466 * 467 * Lookup name and verify it refers to a block device. 468 */ 469 error = nlookup_init(&nd, dev, UIO_SYSSPACE, NLC_FOLLOW); 470 if (error == 0) 471 error = nlookup(&nd); 472 if (error == 0) 473 error = cache_vref(&nd.nl_nch, nd.nl_cred, &devvp); 474 nlookup_done(&nd); 475 476 if (error == 0) { 477 if (vn_isdisk(devvp, &error)) 478 error = vfs_mountedon(devvp); 479 } 480 481 /* 482 * Determine if the device has already been mounted. After this 483 * check hmp will be non-NULL if we are doing the second or more 484 * hammer2 mounts from the same device. 485 */ 486 lockmgr(&hammer2_mntlk, LK_EXCLUSIVE); 487 TAILQ_FOREACH(hmp, &hammer2_mntlist, mntentry) { 488 if (hmp->devvp == devvp) 489 break; 490 } 491 492 /* 493 * Open the device if this isn't a secondary mount and construct 494 * the H2 device mount (hmp). 495 */ 496 if (hmp == NULL) { 497 hammer2_chain_t *schain; 498 hammer2_xid_t xid; 499 500 if (error == 0 && vcount(devvp) > 0) 501 error = EBUSY; 502 503 /* 504 * Now open the device 505 */ 506 if (error == 0) { 507 ronly = ((mp->mnt_flag & MNT_RDONLY) != 0); 508 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 509 error = vinvalbuf(devvp, V_SAVE, 0, 0); 510 if (error == 0) { 511 error = VOP_OPEN(devvp, 512 ronly ? FREAD : FREAD | FWRITE, 513 FSCRED, NULL); 514 } 515 vn_unlock(devvp); 516 } 517 if (error && devvp) { 518 vrele(devvp); 519 devvp = NULL; 520 } 521 if (error) { 522 lockmgr(&hammer2_mntlk, LK_RELEASE); 523 return error; 524 } 525 hmp = kmalloc(sizeof(*hmp), M_HAMMER2, M_WAITOK | M_ZERO); 526 hmp->ronly = ronly; 527 hmp->devvp = devvp; 528 kmalloc_create(&hmp->mchain, "HAMMER2-chains"); 529 TAILQ_INSERT_TAIL(&hammer2_mntlist, hmp, mntentry); 530 RB_INIT(&hmp->iotree); 531 spin_init(&hmp->io_spin, "hm2mount_io"); 532 spin_init(&hmp->list_spin, "hm2mount_list"); 533 TAILQ_INIT(&hmp->flushq); 534 535 lockinit(&hmp->vollk, "h2vol", 0, 0); 536 537 /* 538 * vchain setup. vchain.data is embedded. 539 * vchain.refs is initialized and will never drop to 0. 540 * 541 * NOTE! voldata is not yet loaded. 542 */ 543 hmp->vchain.hmp = hmp; 544 hmp->vchain.refs = 1; 545 hmp->vchain.data = (void *)&hmp->voldata; 546 hmp->vchain.bref.type = HAMMER2_BREF_TYPE_VOLUME; 547 hmp->vchain.bref.data_off = 0 | HAMMER2_PBUFRADIX; 548 hmp->vchain.bref.mirror_tid = hmp->voldata.mirror_tid; 549 550 hammer2_chain_core_alloc(NULL, &hmp->vchain); 551 /* hmp->vchain.u.xxx is left NULL */ 552 553 /* 554 * fchain setup. fchain.data is embedded. 555 * fchain.refs is initialized and will never drop to 0. 556 * 557 * The data is not used but needs to be initialized to 558 * pass assertion muster. We use this chain primarily 559 * as a placeholder for the freemap's top-level RBTREE 560 * so it does not interfere with the volume's topology 561 * RBTREE. 562 */ 563 hmp->fchain.hmp = hmp; 564 hmp->fchain.refs = 1; 565 hmp->fchain.data = (void *)&hmp->voldata.freemap_blockset; 566 hmp->fchain.bref.type = HAMMER2_BREF_TYPE_FREEMAP; 567 hmp->fchain.bref.data_off = 0 | HAMMER2_PBUFRADIX; 568 hmp->fchain.bref.mirror_tid = hmp->voldata.freemap_tid; 569 hmp->fchain.bref.methods = 570 HAMMER2_ENC_CHECK(HAMMER2_CHECK_FREEMAP) | 571 HAMMER2_ENC_COMP(HAMMER2_COMP_NONE); 572 573 hammer2_chain_core_alloc(NULL, &hmp->fchain); 574 /* hmp->fchain.u.xxx is left NULL */ 575 576 /* 577 * Install the volume header and initialize fields from 578 * voldata. 579 */ 580 error = hammer2_install_volume_header(hmp); 581 if (error) { 582 ++hmp->pmp_count; 583 hammer2_vfs_unmount_hmp1(mp, hmp); 584 hammer2_vfs_unmount_hmp2(mp, hmp); 585 lockmgr(&hammer2_mntlk, LK_RELEASE); 586 hammer2_vfs_unmount(mp, MNT_FORCE); 587 return error; 588 } 589 590 /* 591 * Really important to get these right or flush will get 592 * confused. 593 */ 594 hmp->spmp = hammer2_pfsalloc(NULL, hmp->voldata.mirror_tid); 595 kprintf("alloc spmp %p tid %016jx\n", 596 hmp->spmp, hmp->voldata.mirror_tid); 597 spmp = hmp->spmp; 598 spmp->inode_tid = 1; 599 600 xid = 0; 601 hmp->vchain.bref.mirror_tid = hmp->voldata.mirror_tid; 602 hmp->vchain.bref.modify_tid = hmp->vchain.bref.mirror_tid; 603 hmp->vchain.pmp = spmp; 604 hmp->fchain.bref.mirror_tid = hmp->voldata.freemap_tid; 605 hmp->fchain.bref.modify_tid = hmp->fchain.bref.mirror_tid; 606 hmp->fchain.pmp = spmp; 607 608 /* 609 * First locate the super-root inode, which is key 0 610 * relative to the volume header's blockset. 611 * 612 * Then locate the root inode by scanning the directory keyspace 613 * represented by the label. 614 */ 615 parent = hammer2_chain_lookup_init(&hmp->vchain, 0); 616 schain = hammer2_chain_lookup(&parent, &key_dummy, 617 HAMMER2_SROOT_KEY, HAMMER2_SROOT_KEY, 618 &cache_index, 0, &ddflag); 619 hammer2_chain_lookup_done(parent); 620 if (schain == NULL) { 621 kprintf("hammer2_mount: invalid super-root\n"); 622 ++hmp->pmp_count; 623 hammer2_vfs_unmount_hmp1(mp, hmp); 624 hammer2_vfs_unmount_hmp2(mp, hmp); 625 lockmgr(&hammer2_mntlk, LK_RELEASE); 626 hammer2_vfs_unmount(mp, MNT_FORCE); 627 return EINVAL; 628 } 629 630 /* 631 * Sanity-check schain's pmp, finish initializing spmp. 632 */ 633 ripdata = &hammer2_chain_rdata(schain)->ipdata; 634 KKASSERT(schain->pmp == spmp); 635 spmp->pfs_clid = ripdata->pfs_clid; 636 637 /* 638 * NOTE: inode_get sucks up schain's lock. 639 */ 640 cluster = hammer2_cluster_from_chain(schain); 641 spmp->iroot = hammer2_inode_get(spmp, NULL, cluster); 642 spmp->spmp_hmp = hmp; 643 hammer2_inode_ref(spmp->iroot); 644 hammer2_inode_unlock_ex(spmp->iroot, cluster); 645 schain = NULL; 646 /* leave spmp->iroot with one ref */ 647 648 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 649 error = hammer2_recovery(hmp); 650 /* XXX do something with error */ 651 } 652 ++hmp->pmp_count; 653 654 hammer2_iocom_init(hmp); 655 656 /* 657 * Ref the cluster management messaging descriptor. The mount 658 * program deals with the other end of the communications pipe. 659 */ 660 fp = holdfp(curproc->p_fd, info.cluster_fd, -1); 661 if (fp) { 662 hammer2_cluster_reconnect(hmp, fp); 663 } else { 664 kprintf("hammer2_mount: bad cluster_fd!\n"); 665 } 666 } else { 667 spmp = hmp->spmp; 668 ++hmp->pmp_count; 669 } 670 671 /* 672 * Lookup mount point under the media-localized super-root. 673 * 674 * cluster->pmp will incorrectly point to spmp and must be fixed 675 * up later on. 676 */ 677 cparent = hammer2_inode_lock_ex(spmp->iroot); 678 lhc = hammer2_dirhash(label, strlen(label)); 679 cluster = hammer2_cluster_lookup(cparent, &key_next, 680 lhc, lhc + HAMMER2_DIRHASH_LOMASK, 681 0, &ddflag); 682 while (cluster) { 683 if (hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE && 684 strcmp(label, 685 hammer2_cluster_rdata(cluster)->ipdata.filename) == 0) { 686 break; 687 } 688 cluster = hammer2_cluster_next(cparent, cluster, &key_next, 689 key_next, 690 lhc + HAMMER2_DIRHASH_LOMASK, 0); 691 } 692 hammer2_inode_unlock_ex(spmp->iroot, cparent); 693 694 if (cluster == NULL) { 695 kprintf("hammer2_mount: PFS label not found\n"); 696 hammer2_vfs_unmount_hmp1(mp, hmp); 697 hammer2_vfs_unmount_hmp2(mp, hmp); 698 lockmgr(&hammer2_mntlk, LK_RELEASE); 699 hammer2_vfs_unmount(mp, MNT_FORCE); 700 return EINVAL; 701 } 702 703 for (i = 0; i < cluster->nchains; ++i) { 704 rchain = cluster->array[i].chain; 705 if (rchain->flags & HAMMER2_CHAIN_MOUNTED) { 706 kprintf("hammer2_mount: PFS label already mounted!\n"); 707 hammer2_cluster_unlock(cluster); 708 hammer2_vfs_unmount_hmp1(mp, hmp); 709 hammer2_vfs_unmount_hmp2(mp, hmp); 710 lockmgr(&hammer2_mntlk, LK_RELEASE); 711 hammer2_vfs_unmount(mp, MNT_FORCE); 712 return EBUSY; 713 } 714 KKASSERT(rchain->pmp == NULL); 715 #if 0 716 if (rchain->flags & HAMMER2_CHAIN_RECYCLE) { 717 kprintf("hammer2_mount: PFS label is recycling\n"); 718 hammer2_cluster_unlock(cluster); 719 hammer2_vfs_unmount_hmp1(mp, hmp); 720 hammer2_vfs_unmount_hmp2(mp, hmp); 721 lockmgr(&hammer2_mntlk, LK_RELEASE); 722 hammer2_vfs_unmount(mp, MNT_FORCE); 723 return EBUSY; 724 } 725 #endif 726 } 727 728 /* 729 * Check to see if the cluster id is already mounted at the mount 730 * point. If it is, add us to the cluster. 731 */ 732 ripdata = &hammer2_cluster_rdata(cluster)->ipdata; 733 hammer2_cluster_bref(cluster, &bref); 734 TAILQ_FOREACH(pmp, &hammer2_pfslist, mntentry) { 735 if (pmp->spmp_hmp == NULL && 736 bcmp(&pmp->pfs_clid, &ripdata->pfs_clid, 737 sizeof(pmp->pfs_clid)) == 0) { 738 break; 739 } 740 } 741 742 if (pmp) { 743 int i; 744 int j; 745 746 /* 747 * Directly lock the inode->lock, do not run through 748 * hammer2_inode_lock*(). 749 */ 750 hammer2_inode_ref(pmp->iroot); 751 hammer2_mtx_ex(&pmp->iroot->lock); 752 753 if (pmp->iroot->cluster.nchains + cluster->nchains > 754 HAMMER2_MAXCLUSTER) { 755 kprintf("hammer2_mount: cluster full!\n"); 756 757 hammer2_mtx_unlock(&pmp->iroot->lock); 758 hammer2_inode_drop(pmp->iroot); 759 760 hammer2_cluster_unlock(cluster); 761 hammer2_vfs_unmount_hmp1(mp, hmp); 762 hammer2_vfs_unmount_hmp2(mp, hmp); 763 lockmgr(&hammer2_mntlk, LK_RELEASE); 764 hammer2_vfs_unmount(mp, MNT_FORCE); 765 return EBUSY; 766 } 767 kprintf("hammer2_vfs_mount: Adding pfs to existing cluster\n"); 768 j = pmp->iroot->cluster.nchains; 769 for (i = 0; i < cluster->nchains; ++i) { 770 rchain = cluster->array[i].chain; 771 KKASSERT(rchain->pmp == NULL); 772 rchain->pmp = pmp; 773 hammer2_chain_ref(rchain); 774 pmp->iroot->cluster.array[j].chain = rchain; 775 ++j; 776 } 777 pmp->iroot->cluster.nchains = j; 778 hammer2_mtx_unlock(&pmp->iroot->lock); 779 hammer2_inode_drop(pmp->iroot); 780 hammer2_cluster_unlock(cluster); 781 lockmgr(&hammer2_mntlk, LK_RELEASE); 782 783 kprintf("ok\n"); 784 hammer2_inode_install_hidden(pmp); 785 786 return ERANGE; 787 } 788 789 /* 790 * Block device opened successfully, finish initializing the 791 * mount structure. 792 * 793 * From this point on we have to call hammer2_unmount() on failure. 794 */ 795 pmp = hammer2_pfsalloc(ripdata, bref.mirror_tid); 796 kprintf("PMP mirror_tid is %016jx\n", bref.mirror_tid); 797 for (i = 0; i < cluster->nchains; ++i) { 798 rchain = cluster->array[i].chain; 799 KKASSERT(rchain->pmp == NULL); 800 rchain->pmp = pmp; 801 atomic_set_int(&rchain->flags, HAMMER2_CHAIN_MOUNTED); 802 } 803 cluster->pmp = pmp; 804 805 TAILQ_INSERT_TAIL(&hammer2_pfslist, pmp, mntentry); 806 lockmgr(&hammer2_mntlk, LK_RELEASE); 807 808 kprintf("hammer2_mount hmp=%p pmp=%p pmpcnt=%d\n", 809 hmp, pmp, hmp->pmp_count); 810 811 mp->mnt_flag = MNT_LOCAL; 812 mp->mnt_kern_flag |= MNTK_ALL_MPSAFE; /* all entry pts are SMP */ 813 mp->mnt_kern_flag |= MNTK_THR_SYNC; /* new vsyncscan semantics */ 814 815 /* 816 * required mount structure initializations 817 */ 818 mp->mnt_stat.f_iosize = HAMMER2_PBUFSIZE; 819 mp->mnt_stat.f_bsize = HAMMER2_PBUFSIZE; 820 821 mp->mnt_vstat.f_frsize = HAMMER2_PBUFSIZE; 822 mp->mnt_vstat.f_bsize = HAMMER2_PBUFSIZE; 823 824 /* 825 * Optional fields 826 */ 827 mp->mnt_iosize_max = MAXPHYS; 828 mp->mnt_data = (qaddr_t)pmp; 829 pmp->mp = mp; 830 831 /* 832 * After this point hammer2_vfs_unmount() has visibility on hmp 833 * and manual hmp1/hmp2 calls are not needed on fatal errors. 834 */ 835 pmp->iroot = hammer2_inode_get(pmp, NULL, cluster); 836 hammer2_inode_ref(pmp->iroot); /* ref for pmp->iroot */ 837 hammer2_inode_unlock_ex(pmp->iroot, cluster); 838 839 /* 840 * The logical file buffer bio write thread handles things 841 * like physical block assignment and compression. 842 * 843 * (only applicable to pfs mounts, not applicable to spmp) 844 */ 845 pmp->wthread_destroy = 0; 846 lwkt_create(hammer2_write_thread, pmp, 847 &pmp->wthread_td, NULL, 0, -1, "hwrite-%s", label); 848 849 /* 850 * With the cluster operational install ihidden. 851 * (only applicable to pfs mounts, not applicable to spmp) 852 */ 853 hammer2_inode_install_hidden(pmp); 854 855 /* 856 * Finish setup 857 */ 858 vfs_getnewfsid(mp); 859 vfs_add_vnodeops(mp, &hammer2_vnode_vops, &mp->mnt_vn_norm_ops); 860 vfs_add_vnodeops(mp, &hammer2_spec_vops, &mp->mnt_vn_spec_ops); 861 vfs_add_vnodeops(mp, &hammer2_fifo_vops, &mp->mnt_vn_fifo_ops); 862 863 copyinstr(info.volume, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); 864 bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); 865 bzero(mp->mnt_stat.f_mntonname, sizeof(mp->mnt_stat.f_mntonname)); 866 copyinstr(path, mp->mnt_stat.f_mntonname, 867 sizeof(mp->mnt_stat.f_mntonname) - 1, 868 &size); 869 870 /* 871 * Initial statfs to prime mnt_stat. 872 */ 873 hammer2_vfs_statfs(mp, &mp->mnt_stat, cred); 874 875 return 0; 876 } 877 878 /* 879 * Handle bioq for strategy write 880 */ 881 static 882 void 883 hammer2_write_thread(void *arg) 884 { 885 hammer2_pfsmount_t *pmp; 886 struct bio *bio; 887 struct buf *bp; 888 hammer2_trans_t trans; 889 struct vnode *vp; 890 hammer2_inode_t *ip; 891 hammer2_cluster_t *cparent; 892 hammer2_inode_data_t *wipdata; 893 hammer2_key_t lbase; 894 int lblksize; 895 int pblksize; 896 int error; 897 898 pmp = arg; 899 900 hammer2_mtx_ex(&pmp->wthread_mtx); 901 while (pmp->wthread_destroy == 0) { 902 if (bioq_first(&pmp->wthread_bioq) == NULL) { 903 mtxsleep(&pmp->wthread_bioq, &pmp->wthread_mtx, 904 0, "h2bioqw", 0); 905 } 906 cparent = NULL; 907 908 hammer2_trans_init(&trans, pmp, HAMMER2_TRANS_BUFCACHE); 909 910 while ((bio = bioq_takefirst(&pmp->wthread_bioq)) != NULL) { 911 /* 912 * dummy bio for synchronization. The transaction 913 * must be reinitialized. 914 */ 915 if (bio->bio_buf == NULL) { 916 bio->bio_flags |= BIO_DONE; 917 wakeup(bio); 918 hammer2_trans_done(&trans); 919 hammer2_trans_init(&trans, pmp, 920 HAMMER2_TRANS_BUFCACHE); 921 continue; 922 } 923 924 /* 925 * else normal bio processing 926 */ 927 hammer2_mtx_unlock(&pmp->wthread_mtx); 928 929 hammer2_lwinprog_drop(pmp); 930 931 error = 0; 932 bp = bio->bio_buf; 933 vp = bp->b_vp; 934 ip = VTOI(vp); 935 936 /* 937 * Inode is modified, flush size and mtime changes 938 * to ensure that the file size remains consistent 939 * with the buffers being flushed. 940 * 941 * NOTE: The inode_fsync() call only flushes the 942 * inode's meta-data state, it doesn't try 943 * to flush underlying buffers or chains. 944 */ 945 cparent = hammer2_inode_lock_ex(ip); 946 if (ip->flags & (HAMMER2_INODE_RESIZED | 947 HAMMER2_INODE_MTIME)) { 948 hammer2_inode_fsync(&trans, ip, cparent); 949 } 950 wipdata = hammer2_cluster_modify_ip(&trans, ip, 951 cparent, 0); 952 lblksize = hammer2_calc_logical(ip, bio->bio_offset, 953 &lbase, NULL); 954 pblksize = hammer2_calc_physical(ip, wipdata, lbase); 955 hammer2_write_file_core(bp, &trans, ip, wipdata, 956 cparent, 957 lbase, IO_ASYNC, 958 pblksize, &error); 959 hammer2_cluster_modsync(cparent); 960 hammer2_inode_unlock_ex(ip, cparent); 961 if (error) { 962 kprintf("hammer2: error in buffer write\n"); 963 bp->b_flags |= B_ERROR; 964 bp->b_error = EIO; 965 } 966 biodone(bio); 967 hammer2_mtx_ex(&pmp->wthread_mtx); 968 } 969 hammer2_trans_done(&trans); 970 } 971 pmp->wthread_destroy = -1; 972 wakeup(&pmp->wthread_destroy); 973 974 hammer2_mtx_unlock(&pmp->wthread_mtx); 975 } 976 977 void 978 hammer2_bioq_sync(hammer2_pfsmount_t *pmp) 979 { 980 struct bio sync_bio; 981 982 bzero(&sync_bio, sizeof(sync_bio)); /* dummy with no bio_buf */ 983 hammer2_mtx_ex(&pmp->wthread_mtx); 984 if (pmp->wthread_destroy == 0 && 985 TAILQ_FIRST(&pmp->wthread_bioq.queue)) { 986 bioq_insert_tail(&pmp->wthread_bioq, &sync_bio); 987 while ((sync_bio.bio_flags & BIO_DONE) == 0) 988 mtxsleep(&sync_bio, &pmp->wthread_mtx, 0, "h2bioq", 0); 989 } 990 hammer2_mtx_unlock(&pmp->wthread_mtx); 991 } 992 993 /* 994 * Return a chain suitable for I/O, creating the chain if necessary 995 * and assigning its physical block. 996 */ 997 static 998 hammer2_cluster_t * 999 hammer2_assign_physical(hammer2_trans_t *trans, 1000 hammer2_inode_t *ip, hammer2_cluster_t *cparent, 1001 hammer2_key_t lbase, int pblksize, int *errorp) 1002 { 1003 hammer2_cluster_t *cluster; 1004 hammer2_cluster_t *dparent; 1005 hammer2_key_t key_dummy; 1006 int pradix = hammer2_getradix(pblksize); 1007 int ddflag; 1008 1009 /* 1010 * Locate the chain associated with lbase, return a locked chain. 1011 * However, do not instantiate any data reference (which utilizes a 1012 * device buffer) because we will be using direct IO via the 1013 * logical buffer cache buffer. 1014 */ 1015 *errorp = 0; 1016 KKASSERT(pblksize >= HAMMER2_ALLOC_MIN); 1017 retry: 1018 dparent = hammer2_cluster_lookup_init(cparent, 0); 1019 cluster = hammer2_cluster_lookup(dparent, &key_dummy, 1020 lbase, lbase, 1021 HAMMER2_LOOKUP_NODATA, &ddflag); 1022 1023 if (cluster == NULL) { 1024 /* 1025 * We found a hole, create a new chain entry. 1026 * 1027 * NOTE: DATA chains are created without device backing 1028 * store (nor do we want any). 1029 */ 1030 *errorp = hammer2_cluster_create(trans, dparent, &cluster, 1031 lbase, HAMMER2_PBUFRADIX, 1032 HAMMER2_BREF_TYPE_DATA, 1033 pblksize, 0); 1034 if (cluster == NULL) { 1035 hammer2_cluster_lookup_done(dparent); 1036 panic("hammer2_cluster_create: par=%p error=%d\n", 1037 dparent->focus, *errorp); 1038 goto retry; 1039 } 1040 /*ip->delta_dcount += pblksize;*/ 1041 } else { 1042 switch (hammer2_cluster_type(cluster)) { 1043 case HAMMER2_BREF_TYPE_INODE: 1044 /* 1045 * The data is embedded in the inode. The 1046 * caller is responsible for marking the inode 1047 * modified and copying the data to the embedded 1048 * area. 1049 */ 1050 break; 1051 case HAMMER2_BREF_TYPE_DATA: 1052 if (hammer2_cluster_need_resize(cluster, pblksize)) { 1053 hammer2_cluster_resize(trans, ip, 1054 dparent, cluster, 1055 pradix, 1056 HAMMER2_MODIFY_OPTDATA); 1057 } 1058 1059 /* 1060 * DATA buffers must be marked modified whether the 1061 * data is in a logical buffer or not. We also have 1062 * to make this call to fixup the chain data pointers 1063 * after resizing in case this is an encrypted or 1064 * compressed buffer. 1065 */ 1066 hammer2_cluster_modify(trans, cluster, 1067 HAMMER2_MODIFY_OPTDATA); 1068 break; 1069 default: 1070 panic("hammer2_assign_physical: bad type"); 1071 /* NOT REACHED */ 1072 break; 1073 } 1074 } 1075 1076 /* 1077 * Cleanup. If cluster wound up being the inode itself, i.e. 1078 * the DIRECTDATA case for offset 0, then we need to update cparent. 1079 * The caller expects cparent to not become stale. 1080 */ 1081 hammer2_cluster_lookup_done(dparent); 1082 /* dparent = NULL; safety */ 1083 if (cluster && ddflag) 1084 hammer2_cluster_replace_locked(cparent, cluster); 1085 return (cluster); 1086 } 1087 1088 /* 1089 * bio queued from hammer2_vnops.c. 1090 * 1091 * The core write function which determines which path to take 1092 * depending on compression settings. We also have to locate the 1093 * related clusters so we can calculate and set the check data for 1094 * the blockref. 1095 */ 1096 static 1097 void 1098 hammer2_write_file_core(struct buf *bp, hammer2_trans_t *trans, 1099 hammer2_inode_t *ip, 1100 const hammer2_inode_data_t *ripdata, 1101 hammer2_cluster_t *cparent, 1102 hammer2_key_t lbase, int ioflag, int pblksize, 1103 int *errorp) 1104 { 1105 hammer2_cluster_t *cluster; 1106 1107 switch(HAMMER2_DEC_ALGO(ripdata->comp_algo)) { 1108 case HAMMER2_COMP_NONE: 1109 /* 1110 * We have to assign physical storage to the buffer 1111 * we intend to dirty or write now to avoid deadlocks 1112 * in the strategy code later. 1113 * 1114 * This can return NOOFFSET for inode-embedded data. 1115 * The strategy code will take care of it in that case. 1116 */ 1117 cluster = hammer2_assign_physical(trans, ip, cparent, 1118 lbase, pblksize, 1119 errorp); 1120 hammer2_write_bp(cluster, bp, ioflag, pblksize, errorp, 1121 ripdata->check_algo); 1122 if (cluster) 1123 hammer2_cluster_unlock(cluster); 1124 break; 1125 case HAMMER2_COMP_AUTOZERO: 1126 /* 1127 * Check for zero-fill only 1128 */ 1129 hammer2_zero_check_and_write(bp, trans, ip, 1130 ripdata, cparent, lbase, 1131 ioflag, pblksize, errorp, 1132 ripdata->check_algo); 1133 break; 1134 case HAMMER2_COMP_LZ4: 1135 case HAMMER2_COMP_ZLIB: 1136 default: 1137 /* 1138 * Check for zero-fill and attempt compression. 1139 */ 1140 hammer2_compress_and_write(bp, trans, ip, 1141 ripdata, cparent, 1142 lbase, ioflag, 1143 pblksize, errorp, 1144 ripdata->comp_algo, 1145 ripdata->check_algo); 1146 break; 1147 } 1148 } 1149 1150 /* 1151 * Generic function that will perform the compression in compression 1152 * write path. The compression algorithm is determined by the settings 1153 * obtained from inode. 1154 */ 1155 static 1156 void 1157 hammer2_compress_and_write(struct buf *bp, hammer2_trans_t *trans, 1158 hammer2_inode_t *ip, const hammer2_inode_data_t *ripdata, 1159 hammer2_cluster_t *cparent, 1160 hammer2_key_t lbase, int ioflag, int pblksize, 1161 int *errorp, int comp_algo, int check_algo) 1162 { 1163 hammer2_cluster_t *cluster; 1164 hammer2_chain_t *chain; 1165 int comp_size; 1166 int comp_block_size; 1167 int i; 1168 char *comp_buffer; 1169 1170 if (test_block_zeros(bp->b_data, pblksize)) { 1171 zero_write(bp, trans, ip, ripdata, cparent, lbase, errorp); 1172 return; 1173 } 1174 1175 comp_size = 0; 1176 comp_buffer = NULL; 1177 1178 KKASSERT(pblksize / 2 <= 32768); 1179 1180 if (ip->comp_heuristic < 8 || (ip->comp_heuristic & 7) == 0) { 1181 z_stream strm_compress; 1182 int comp_level; 1183 int ret; 1184 1185 switch(HAMMER2_DEC_ALGO(comp_algo)) { 1186 case HAMMER2_COMP_LZ4: 1187 comp_buffer = objcache_get(cache_buffer_write, 1188 M_INTWAIT); 1189 comp_size = LZ4_compress_limitedOutput( 1190 bp->b_data, 1191 &comp_buffer[sizeof(int)], 1192 pblksize, 1193 pblksize / 2 - sizeof(int)); 1194 /* 1195 * We need to prefix with the size, LZ4 1196 * doesn't do it for us. Add the related 1197 * overhead. 1198 */ 1199 *(int *)comp_buffer = comp_size; 1200 if (comp_size) 1201 comp_size += sizeof(int); 1202 break; 1203 case HAMMER2_COMP_ZLIB: 1204 comp_level = HAMMER2_DEC_LEVEL(comp_algo); 1205 if (comp_level == 0) 1206 comp_level = 6; /* default zlib compression */ 1207 else if (comp_level < 6) 1208 comp_level = 6; 1209 else if (comp_level > 9) 1210 comp_level = 9; 1211 ret = deflateInit(&strm_compress, comp_level); 1212 if (ret != Z_OK) { 1213 kprintf("HAMMER2 ZLIB: fatal error " 1214 "on deflateInit.\n"); 1215 } 1216 1217 comp_buffer = objcache_get(cache_buffer_write, 1218 M_INTWAIT); 1219 strm_compress.next_in = bp->b_data; 1220 strm_compress.avail_in = pblksize; 1221 strm_compress.next_out = comp_buffer; 1222 strm_compress.avail_out = pblksize / 2; 1223 ret = deflate(&strm_compress, Z_FINISH); 1224 if (ret == Z_STREAM_END) { 1225 comp_size = pblksize / 2 - 1226 strm_compress.avail_out; 1227 } else { 1228 comp_size = 0; 1229 } 1230 ret = deflateEnd(&strm_compress); 1231 break; 1232 default: 1233 kprintf("Error: Unknown compression method.\n"); 1234 kprintf("Comp_method = %d.\n", comp_algo); 1235 break; 1236 } 1237 } 1238 1239 if (comp_size == 0) { 1240 /* 1241 * compression failed or turned off 1242 */ 1243 comp_block_size = pblksize; /* safety */ 1244 if (++ip->comp_heuristic > 128) 1245 ip->comp_heuristic = 8; 1246 } else { 1247 /* 1248 * compression succeeded 1249 */ 1250 ip->comp_heuristic = 0; 1251 if (comp_size <= 1024) { 1252 comp_block_size = 1024; 1253 } else if (comp_size <= 2048) { 1254 comp_block_size = 2048; 1255 } else if (comp_size <= 4096) { 1256 comp_block_size = 4096; 1257 } else if (comp_size <= 8192) { 1258 comp_block_size = 8192; 1259 } else if (comp_size <= 16384) { 1260 comp_block_size = 16384; 1261 } else if (comp_size <= 32768) { 1262 comp_block_size = 32768; 1263 } else { 1264 panic("hammer2: WRITE PATH: " 1265 "Weird comp_size value."); 1266 /* NOT REACHED */ 1267 comp_block_size = pblksize; 1268 } 1269 } 1270 1271 cluster = hammer2_assign_physical(trans, ip, cparent, 1272 lbase, comp_block_size, 1273 errorp); 1274 ripdata = NULL; 1275 1276 if (*errorp) { 1277 kprintf("WRITE PATH: An error occurred while " 1278 "assigning physical space.\n"); 1279 KKASSERT(cluster == NULL); 1280 goto done; 1281 } 1282 1283 for (i = 0; i < cluster->nchains; ++i) { 1284 hammer2_inode_data_t *wipdata; 1285 hammer2_io_t *dio; 1286 char *bdata; 1287 1288 chain = cluster->array[i].chain; /* XXX */ 1289 KKASSERT(chain->flags & HAMMER2_CHAIN_MODIFIED); 1290 1291 switch(chain->bref.type) { 1292 case HAMMER2_BREF_TYPE_INODE: 1293 wipdata = &hammer2_chain_wdata(chain)->ipdata; 1294 KKASSERT(wipdata->op_flags & HAMMER2_OPFLAG_DIRECTDATA); 1295 KKASSERT(bp->b_loffset == 0); 1296 bcopy(bp->b_data, wipdata->u.data, 1297 HAMMER2_EMBEDDED_BYTES); 1298 break; 1299 case HAMMER2_BREF_TYPE_DATA: 1300 /* 1301 * Optimize out the read-before-write 1302 * if possible. 1303 */ 1304 *errorp = hammer2_io_newnz(chain->hmp, 1305 chain->bref.data_off, 1306 chain->bytes, 1307 &dio); 1308 if (*errorp) { 1309 hammer2_io_brelse(&dio); 1310 kprintf("hammer2: WRITE PATH: " 1311 "dbp bread error\n"); 1312 break; 1313 } 1314 bdata = hammer2_io_data(dio, chain->bref.data_off); 1315 1316 /* 1317 * When loading the block make sure we don't 1318 * leave garbage after the compressed data. 1319 */ 1320 if (comp_size) { 1321 chain->bref.methods = 1322 HAMMER2_ENC_COMP(comp_algo) + 1323 HAMMER2_ENC_CHECK(check_algo); 1324 bcopy(comp_buffer, bdata, comp_size); 1325 if (comp_size != comp_block_size) { 1326 bzero(bdata + comp_size, 1327 comp_block_size - comp_size); 1328 } 1329 } else { 1330 chain->bref.methods = 1331 HAMMER2_ENC_COMP( 1332 HAMMER2_COMP_NONE) + 1333 HAMMER2_ENC_CHECK(check_algo); 1334 bcopy(bp->b_data, bdata, pblksize); 1335 } 1336 1337 /* 1338 * The flush code doesn't calculate check codes for 1339 * file data (doing so can result in excessive I/O), 1340 * so we do it here. 1341 */ 1342 hammer2_chain_setcheck(chain, bdata); 1343 1344 /* 1345 * Device buffer is now valid, chain is no longer in 1346 * the initial state. 1347 * 1348 * (No blockref table worries with file data) 1349 */ 1350 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 1351 1352 /* Now write the related bdp. */ 1353 if (ioflag & IO_SYNC) { 1354 /* 1355 * Synchronous I/O requested. 1356 */ 1357 hammer2_io_bwrite(&dio); 1358 /* 1359 } else if ((ioflag & IO_DIRECT) && 1360 loff + n == pblksize) { 1361 hammer2_io_bdwrite(&dio); 1362 */ 1363 } else if (ioflag & IO_ASYNC) { 1364 hammer2_io_bawrite(&dio); 1365 } else { 1366 hammer2_io_bdwrite(&dio); 1367 } 1368 break; 1369 default: 1370 panic("hammer2_write_bp: bad chain type %d\n", 1371 chain->bref.type); 1372 /* NOT REACHED */ 1373 break; 1374 } 1375 } 1376 done: 1377 if (cluster) 1378 hammer2_cluster_unlock(cluster); 1379 if (comp_buffer) 1380 objcache_put(cache_buffer_write, comp_buffer); 1381 } 1382 1383 /* 1384 * Function that performs zero-checking and writing without compression, 1385 * it corresponds to default zero-checking path. 1386 */ 1387 static 1388 void 1389 hammer2_zero_check_and_write(struct buf *bp, hammer2_trans_t *trans, 1390 hammer2_inode_t *ip, const hammer2_inode_data_t *ripdata, 1391 hammer2_cluster_t *cparent, 1392 hammer2_key_t lbase, int ioflag, int pblksize, int *errorp, 1393 int check_algo) 1394 { 1395 hammer2_cluster_t *cluster; 1396 1397 if (test_block_zeros(bp->b_data, pblksize)) { 1398 zero_write(bp, trans, ip, ripdata, cparent, lbase, errorp); 1399 } else { 1400 cluster = hammer2_assign_physical(trans, ip, cparent, 1401 lbase, pblksize, errorp); 1402 hammer2_write_bp(cluster, bp, ioflag, pblksize, errorp, 1403 check_algo); 1404 if (cluster) 1405 hammer2_cluster_unlock(cluster); 1406 } 1407 } 1408 1409 /* 1410 * A function to test whether a block of data contains only zeros, 1411 * returns TRUE (non-zero) if the block is all zeros. 1412 */ 1413 static 1414 int 1415 test_block_zeros(const char *buf, size_t bytes) 1416 { 1417 size_t i; 1418 1419 for (i = 0; i < bytes; i += sizeof(long)) { 1420 if (*(const long *)(buf + i) != 0) 1421 return (0); 1422 } 1423 return (1); 1424 } 1425 1426 /* 1427 * Function to "write" a block that contains only zeros. 1428 */ 1429 static 1430 void 1431 zero_write(struct buf *bp, hammer2_trans_t *trans, 1432 hammer2_inode_t *ip, const hammer2_inode_data_t *ripdata, 1433 hammer2_cluster_t *cparent, 1434 hammer2_key_t lbase, int *errorp __unused) 1435 { 1436 hammer2_cluster_t *cluster; 1437 hammer2_media_data_t *data; 1438 hammer2_key_t key_dummy; 1439 int ddflag; 1440 1441 cparent = hammer2_cluster_lookup_init(cparent, 0); 1442 cluster = hammer2_cluster_lookup(cparent, &key_dummy, lbase, lbase, 1443 HAMMER2_LOOKUP_NODATA, &ddflag); 1444 if (cluster) { 1445 data = hammer2_cluster_wdata(cluster); 1446 1447 if (ddflag) { 1448 KKASSERT(cluster->focus->flags & 1449 HAMMER2_CHAIN_MODIFIED); 1450 bzero(data->ipdata.u.data, HAMMER2_EMBEDDED_BYTES); 1451 hammer2_cluster_modsync(cluster); 1452 } else { 1453 hammer2_cluster_delete(trans, cparent, cluster, 1454 HAMMER2_DELETE_PERMANENT); 1455 } 1456 hammer2_cluster_unlock(cluster); 1457 } 1458 hammer2_cluster_lookup_done(cparent); 1459 } 1460 1461 /* 1462 * Function to write the data as it is, without performing any sort of 1463 * compression. This function is used in path without compression and 1464 * default zero-checking path. 1465 */ 1466 static 1467 void 1468 hammer2_write_bp(hammer2_cluster_t *cluster, struct buf *bp, int ioflag, 1469 int pblksize, int *errorp, int check_algo) 1470 { 1471 hammer2_chain_t *chain; 1472 hammer2_inode_data_t *wipdata; 1473 hammer2_io_t *dio; 1474 char *bdata; 1475 int error; 1476 int i; 1477 1478 error = 0; /* XXX TODO below */ 1479 1480 for (i = 0; i < cluster->nchains; ++i) { 1481 chain = cluster->array[i].chain; /* XXX */ 1482 KKASSERT(chain->flags & HAMMER2_CHAIN_MODIFIED); 1483 1484 switch(chain->bref.type) { 1485 case HAMMER2_BREF_TYPE_INODE: 1486 wipdata = &hammer2_chain_wdata(chain)->ipdata; 1487 KKASSERT(wipdata->op_flags & HAMMER2_OPFLAG_DIRECTDATA); 1488 KKASSERT(bp->b_loffset == 0); 1489 bcopy(bp->b_data, wipdata->u.data, 1490 HAMMER2_EMBEDDED_BYTES); 1491 error = 0; 1492 break; 1493 case HAMMER2_BREF_TYPE_DATA: 1494 error = hammer2_io_newnz(chain->hmp, 1495 chain->bref.data_off, 1496 chain->bytes, &dio); 1497 if (error) { 1498 hammer2_io_bqrelse(&dio); 1499 kprintf("hammer2: WRITE PATH: " 1500 "dbp bread error\n"); 1501 break; 1502 } 1503 bdata = hammer2_io_data(dio, chain->bref.data_off); 1504 1505 chain->bref.methods = HAMMER2_ENC_COMP( 1506 HAMMER2_COMP_NONE) + 1507 HAMMER2_ENC_CHECK(check_algo); 1508 bcopy(bp->b_data, bdata, chain->bytes); 1509 1510 /* 1511 * The flush code doesn't calculate check codes for 1512 * file data (doing so can result in excessive I/O), 1513 * so we do it here. 1514 */ 1515 hammer2_chain_setcheck(chain, bdata); 1516 1517 /* 1518 * Device buffer is now valid, chain is no longer in 1519 * the initial state. 1520 * 1521 * (No blockref table worries with file data) 1522 */ 1523 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 1524 1525 if (ioflag & IO_SYNC) { 1526 /* 1527 * Synchronous I/O requested. 1528 */ 1529 hammer2_io_bwrite(&dio); 1530 /* 1531 } else if ((ioflag & IO_DIRECT) && 1532 loff + n == pblksize) { 1533 hammer2_io_bdwrite(&dio); 1534 */ 1535 } else if (ioflag & IO_ASYNC) { 1536 hammer2_io_bawrite(&dio); 1537 } else { 1538 hammer2_io_bdwrite(&dio); 1539 } 1540 break; 1541 default: 1542 panic("hammer2_write_bp: bad chain type %d\n", 1543 chain->bref.type); 1544 /* NOT REACHED */ 1545 error = 0; 1546 break; 1547 } 1548 KKASSERT(error == 0); /* XXX TODO */ 1549 } 1550 *errorp = error; 1551 } 1552 1553 static 1554 int 1555 hammer2_remount(hammer2_mount_t *hmp, struct mount *mp, char *path, 1556 struct vnode *devvp, struct ucred *cred) 1557 { 1558 int error; 1559 1560 if (hmp->ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) { 1561 error = hammer2_recovery(hmp); 1562 } else { 1563 error = 0; 1564 } 1565 return error; 1566 } 1567 1568 static 1569 int 1570 hammer2_vfs_unmount(struct mount *mp, int mntflags) 1571 { 1572 hammer2_pfsmount_t *pmp; 1573 hammer2_mount_t *hmp; 1574 hammer2_chain_t *rchain; 1575 hammer2_cluster_t *cluster; 1576 int flags; 1577 int error = 0; 1578 int i; 1579 1580 pmp = MPTOPMP(mp); 1581 1582 if (pmp == NULL) 1583 return(0); 1584 1585 lockmgr(&hammer2_mntlk, LK_EXCLUSIVE); 1586 TAILQ_REMOVE(&hammer2_pfslist, pmp, mntentry); 1587 1588 /* 1589 * If mount initialization proceeded far enough we must flush 1590 * its vnodes. 1591 */ 1592 if (mntflags & MNT_FORCE) 1593 flags = FORCECLOSE; 1594 else 1595 flags = 0; 1596 if (pmp->iroot) { 1597 error = vflush(mp, 0, flags); 1598 if (error) 1599 goto failed; 1600 } 1601 1602 if (pmp->wthread_td) { 1603 hammer2_mtx_ex(&pmp->wthread_mtx); 1604 pmp->wthread_destroy = 1; 1605 wakeup(&pmp->wthread_bioq); 1606 while (pmp->wthread_destroy != -1) { 1607 mtxsleep(&pmp->wthread_destroy, 1608 &pmp->wthread_mtx, 0, 1609 "umount-sleep", 0); 1610 } 1611 hammer2_mtx_unlock(&pmp->wthread_mtx); 1612 pmp->wthread_td = NULL; 1613 } 1614 1615 /* 1616 * Cleanup our reference on ihidden. 1617 */ 1618 if (pmp->ihidden) { 1619 hammer2_inode_drop(pmp->ihidden); 1620 pmp->ihidden = NULL; 1621 } 1622 1623 /* 1624 * Cleanup our reference on iroot. iroot is (should) not be needed 1625 * by the flush code. 1626 */ 1627 if (pmp->iroot) { 1628 cluster = &pmp->iroot->cluster; 1629 for (i = 0; i < pmp->iroot->cluster.nchains; ++i) { 1630 rchain = pmp->iroot->cluster.array[i].chain; 1631 if (rchain == NULL) 1632 continue; 1633 hmp = rchain->hmp; 1634 hammer2_vfs_unmount_hmp1(mp, hmp); 1635 1636 atomic_clear_int(&rchain->flags, HAMMER2_CHAIN_MOUNTED); 1637 #if REPORT_REFS_ERRORS 1638 if (rchain->refs != 1) 1639 kprintf("PMP->RCHAIN %p REFS WRONG %d\n", 1640 rchain, rchain->refs); 1641 #else 1642 KKASSERT(rchain->refs == 1); 1643 #endif 1644 hammer2_chain_drop(rchain); 1645 cluster->array[i].chain = NULL; 1646 hammer2_vfs_unmount_hmp2(mp, hmp); 1647 } 1648 cluster->focus = NULL; 1649 1650 #if REPORT_REFS_ERRORS 1651 if (pmp->iroot->refs != 1) 1652 kprintf("PMP->IROOT %p REFS WRONG %d\n", 1653 pmp->iroot, pmp->iroot->refs); 1654 #else 1655 KKASSERT(pmp->iroot->refs == 1); 1656 #endif 1657 /* ref for pmp->iroot */ 1658 hammer2_inode_drop(pmp->iroot); 1659 pmp->iroot = NULL; 1660 } 1661 1662 pmp->mp = NULL; 1663 mp->mnt_data = NULL; 1664 1665 kmalloc_destroy(&pmp->mmsg); 1666 kmalloc_destroy(&pmp->minode); 1667 1668 kfree(pmp, M_HAMMER2); 1669 error = 0; 1670 1671 failed: 1672 lockmgr(&hammer2_mntlk, LK_RELEASE); 1673 1674 return (error); 1675 } 1676 1677 static 1678 void 1679 hammer2_vfs_unmount_hmp1(struct mount *mp, hammer2_mount_t *hmp) 1680 { 1681 hammer2_mount_exlock(hmp); 1682 --hmp->pmp_count; 1683 1684 kprintf("hammer2_unmount hmp=%p pmpcnt=%d\n", hmp, hmp->pmp_count); 1685 1686 /* 1687 * Cycle the volume data lock as a safety (probably not needed any 1688 * more). To ensure everything is out we need to flush at least 1689 * three times. (1) The running of the unlinkq can dirty the 1690 * filesystem, (2) A normal flush can dirty the freemap, and 1691 * (3) ensure that the freemap is fully synchronized. 1692 * 1693 * The next mount's recovery scan can clean everything up but we want 1694 * to leave the filesystem in a 100% clean state on a normal unmount. 1695 */ 1696 hammer2_voldata_lock(hmp); 1697 hammer2_voldata_unlock(hmp); 1698 if (mp->mnt_data) { 1699 hammer2_vfs_sync(mp, MNT_WAIT); 1700 hammer2_vfs_sync(mp, MNT_WAIT); 1701 hammer2_vfs_sync(mp, MNT_WAIT); 1702 } 1703 1704 /* 1705 * XXX chain depend deadlock? 1706 */ 1707 hammer2_iocom_uninit(hmp); 1708 1709 if (hmp->pmp_count == 0) { 1710 if ((hmp->vchain.flags | hmp->fchain.flags) & 1711 HAMMER2_CHAIN_FLUSH_MASK) { 1712 kprintf("hammer2_unmount: chains left over " 1713 "after final sync\n"); 1714 kprintf(" vchain %08x\n", hmp->vchain.flags); 1715 kprintf(" fchain %08x\n", hmp->fchain.flags); 1716 1717 if (hammer2_debug & 0x0010) 1718 Debugger("entered debugger"); 1719 } 1720 } 1721 } 1722 1723 static 1724 void 1725 hammer2_vfs_unmount_hmp2(struct mount *mp, hammer2_mount_t *hmp) 1726 { 1727 hammer2_pfsmount_t *spmp; 1728 struct vnode *devvp; 1729 int dumpcnt; 1730 int ronly = ((mp->mnt_flag & MNT_RDONLY) != 0); 1731 1732 /* 1733 * If no PFS's left drop the master hammer2_mount for the 1734 * device. 1735 */ 1736 if (hmp->pmp_count == 0) { 1737 /* 1738 * Clean up SPMP and the super-root inode 1739 */ 1740 spmp = hmp->spmp; 1741 if (spmp) { 1742 if (spmp->iroot) { 1743 hammer2_inode_drop(spmp->iroot); 1744 spmp->iroot = NULL; 1745 } 1746 hmp->spmp = NULL; 1747 kmalloc_destroy(&spmp->mmsg); 1748 kmalloc_destroy(&spmp->minode); 1749 kfree(spmp, M_HAMMER2); 1750 } 1751 1752 /* 1753 * Finish up with the device vnode 1754 */ 1755 if ((devvp = hmp->devvp) != NULL) { 1756 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 1757 vinvalbuf(devvp, (ronly ? 0 : V_SAVE), 0, 0); 1758 hmp->devvp = NULL; 1759 VOP_CLOSE(devvp, (ronly ? FREAD : FREAD|FWRITE), NULL); 1760 vn_unlock(devvp); 1761 vrele(devvp); 1762 devvp = NULL; 1763 } 1764 1765 /* 1766 * Clear vchain/fchain flags that might prevent final cleanup 1767 * of these chains. 1768 */ 1769 if (hmp->vchain.flags & HAMMER2_CHAIN_MODIFIED) { 1770 atomic_clear_int(&hmp->vchain.flags, 1771 HAMMER2_CHAIN_MODIFIED); 1772 hammer2_pfs_memory_wakeup(hmp->vchain.pmp); 1773 hammer2_chain_drop(&hmp->vchain); 1774 } 1775 if (hmp->vchain.flags & HAMMER2_CHAIN_UPDATE) { 1776 atomic_clear_int(&hmp->vchain.flags, 1777 HAMMER2_CHAIN_UPDATE); 1778 hammer2_chain_drop(&hmp->vchain); 1779 } 1780 1781 if (hmp->fchain.flags & HAMMER2_CHAIN_MODIFIED) { 1782 atomic_clear_int(&hmp->fchain.flags, 1783 HAMMER2_CHAIN_MODIFIED); 1784 hammer2_pfs_memory_wakeup(hmp->fchain.pmp); 1785 hammer2_chain_drop(&hmp->fchain); 1786 } 1787 if (hmp->fchain.flags & HAMMER2_CHAIN_UPDATE) { 1788 atomic_clear_int(&hmp->fchain.flags, 1789 HAMMER2_CHAIN_UPDATE); 1790 hammer2_chain_drop(&hmp->fchain); 1791 } 1792 1793 /* 1794 * Final drop of embedded freemap root chain to 1795 * clean up fchain.core (fchain structure is not 1796 * flagged ALLOCATED so it is cleaned out and then 1797 * left to rot). 1798 */ 1799 hammer2_chain_drop(&hmp->fchain); 1800 1801 /* 1802 * Final drop of embedded volume root chain to clean 1803 * up vchain.core (vchain structure is not flagged 1804 * ALLOCATED so it is cleaned out and then left to 1805 * rot). 1806 */ 1807 dumpcnt = 50; 1808 hammer2_dump_chain(&hmp->vchain, 0, &dumpcnt, 'v'); 1809 dumpcnt = 50; 1810 hammer2_dump_chain(&hmp->fchain, 0, &dumpcnt, 'f'); 1811 hammer2_mount_unlock(hmp); 1812 hammer2_chain_drop(&hmp->vchain); 1813 1814 hammer2_io_cleanup(hmp, &hmp->iotree); 1815 if (hmp->iofree_count) { 1816 kprintf("io_cleanup: %d I/O's left hanging\n", 1817 hmp->iofree_count); 1818 } 1819 1820 TAILQ_REMOVE(&hammer2_mntlist, hmp, mntentry); 1821 kmalloc_destroy(&hmp->mchain); 1822 kfree(hmp, M_HAMMER2); 1823 } else { 1824 hammer2_mount_unlock(hmp); 1825 } 1826 } 1827 1828 static 1829 int 1830 hammer2_vfs_vget(struct mount *mp, struct vnode *dvp, 1831 ino_t ino, struct vnode **vpp) 1832 { 1833 kprintf("hammer2_vget\n"); 1834 return (EOPNOTSUPP); 1835 } 1836 1837 static 1838 int 1839 hammer2_vfs_root(struct mount *mp, struct vnode **vpp) 1840 { 1841 hammer2_pfsmount_t *pmp; 1842 hammer2_cluster_t *cparent; 1843 int error; 1844 struct vnode *vp; 1845 1846 pmp = MPTOPMP(mp); 1847 if (pmp->iroot == NULL) { 1848 *vpp = NULL; 1849 error = EINVAL; 1850 } else { 1851 cparent = hammer2_inode_lock_sh(pmp->iroot); 1852 vp = hammer2_igetv(pmp->iroot, cparent, &error); 1853 hammer2_inode_unlock_sh(pmp->iroot, cparent); 1854 *vpp = vp; 1855 if (vp == NULL) 1856 kprintf("vnodefail\n"); 1857 } 1858 1859 return (error); 1860 } 1861 1862 /* 1863 * Filesystem status 1864 * 1865 * XXX incorporate ipdata->inode_quota and data_quota 1866 */ 1867 static 1868 int 1869 hammer2_vfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred) 1870 { 1871 hammer2_pfsmount_t *pmp; 1872 hammer2_mount_t *hmp; 1873 1874 pmp = MPTOPMP(mp); 1875 KKASSERT(pmp->iroot->cluster.nchains >= 1); 1876 hmp = pmp->iroot->cluster.focus->hmp; /* XXX */ 1877 1878 mp->mnt_stat.f_files = pmp->inode_count; 1879 mp->mnt_stat.f_ffree = 0; 1880 mp->mnt_stat.f_blocks = hmp->voldata.allocator_size / HAMMER2_PBUFSIZE; 1881 mp->mnt_stat.f_bfree = hmp->voldata.allocator_free / HAMMER2_PBUFSIZE; 1882 mp->mnt_stat.f_bavail = mp->mnt_stat.f_bfree; 1883 1884 *sbp = mp->mnt_stat; 1885 return (0); 1886 } 1887 1888 static 1889 int 1890 hammer2_vfs_statvfs(struct mount *mp, struct statvfs *sbp, struct ucred *cred) 1891 { 1892 hammer2_pfsmount_t *pmp; 1893 hammer2_mount_t *hmp; 1894 1895 pmp = MPTOPMP(mp); 1896 KKASSERT(pmp->iroot->cluster.nchains >= 1); 1897 hmp = pmp->iroot->cluster.focus->hmp; /* XXX */ 1898 1899 mp->mnt_vstat.f_bsize = HAMMER2_PBUFSIZE; 1900 mp->mnt_vstat.f_files = pmp->inode_count; 1901 mp->mnt_vstat.f_ffree = 0; 1902 mp->mnt_vstat.f_blocks = hmp->voldata.allocator_size / HAMMER2_PBUFSIZE; 1903 mp->mnt_vstat.f_bfree = hmp->voldata.allocator_free / HAMMER2_PBUFSIZE; 1904 mp->mnt_vstat.f_bavail = mp->mnt_vstat.f_bfree; 1905 1906 *sbp = mp->mnt_vstat; 1907 return (0); 1908 } 1909 1910 /* 1911 * Mount-time recovery (RW mounts) 1912 * 1913 * Updates to the free block table are allowed to lag flushes by one 1914 * transaction. In case of a crash, then on a fresh mount we must do an 1915 * incremental scan of the last committed transaction id and make sure that 1916 * all related blocks have been marked allocated. 1917 * 1918 * The super-root topology and each PFS has its own transaction id domain, 1919 * so we must track PFS boundary transitions. 1920 */ 1921 struct hammer2_recovery_elm { 1922 TAILQ_ENTRY(hammer2_recovery_elm) entry; 1923 hammer2_chain_t *chain; 1924 hammer2_tid_t sync_tid; 1925 }; 1926 1927 TAILQ_HEAD(hammer2_recovery_list, hammer2_recovery_elm); 1928 1929 struct hammer2_recovery_info { 1930 struct hammer2_recovery_list list; 1931 int depth; 1932 }; 1933 1934 static int hammer2_recovery_scan(hammer2_trans_t *trans, hammer2_mount_t *hmp, 1935 hammer2_chain_t *parent, 1936 struct hammer2_recovery_info *info, 1937 hammer2_tid_t sync_tid); 1938 1939 #define HAMMER2_RECOVERY_MAXDEPTH 10 1940 1941 static 1942 int 1943 hammer2_recovery(hammer2_mount_t *hmp) 1944 { 1945 hammer2_trans_t trans; 1946 struct hammer2_recovery_info info; 1947 struct hammer2_recovery_elm *elm; 1948 hammer2_chain_t *parent; 1949 hammer2_tid_t sync_tid; 1950 int error; 1951 int cumulative_error = 0; 1952 1953 hammer2_trans_init(&trans, hmp->spmp, 0); 1954 1955 sync_tid = 0; 1956 TAILQ_INIT(&info.list); 1957 info.depth = 0; 1958 parent = hammer2_chain_lookup_init(&hmp->vchain, 0); 1959 cumulative_error = hammer2_recovery_scan(&trans, hmp, parent, 1960 &info, sync_tid); 1961 hammer2_chain_lookup_done(parent); 1962 1963 while ((elm = TAILQ_FIRST(&info.list)) != NULL) { 1964 TAILQ_REMOVE(&info.list, elm, entry); 1965 parent = elm->chain; 1966 sync_tid = elm->sync_tid; 1967 kfree(elm, M_HAMMER2); 1968 1969 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS | 1970 HAMMER2_RESOLVE_NOREF); 1971 error = hammer2_recovery_scan(&trans, hmp, parent, 1972 &info, sync_tid); 1973 hammer2_chain_unlock(parent); 1974 if (error) 1975 cumulative_error = error; 1976 } 1977 hammer2_trans_done(&trans); 1978 1979 return cumulative_error; 1980 } 1981 1982 static 1983 int 1984 hammer2_recovery_scan(hammer2_trans_t *trans, hammer2_mount_t *hmp, 1985 hammer2_chain_t *parent, 1986 struct hammer2_recovery_info *info, 1987 hammer2_tid_t sync_tid) 1988 { 1989 const hammer2_inode_data_t *ripdata; 1990 hammer2_chain_t *chain; 1991 int cache_index; 1992 int cumulative_error = 0; 1993 int pfs_boundary = 0; 1994 int error; 1995 1996 /* 1997 * Adjust freemap to ensure that the block(s) are marked allocated. 1998 */ 1999 if (parent->bref.type != HAMMER2_BREF_TYPE_VOLUME) { 2000 hammer2_freemap_adjust(trans, hmp, &parent->bref, 2001 HAMMER2_FREEMAP_DORECOVER); 2002 } 2003 2004 /* 2005 * Check type for recursive scan 2006 */ 2007 switch(parent->bref.type) { 2008 case HAMMER2_BREF_TYPE_VOLUME: 2009 /* data already instantiated */ 2010 break; 2011 case HAMMER2_BREF_TYPE_INODE: 2012 /* 2013 * Must instantiate data for DIRECTDATA test and also 2014 * for recursion. 2015 */ 2016 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); 2017 ripdata = &hammer2_chain_rdata(parent)->ipdata; 2018 if (ripdata->op_flags & HAMMER2_OPFLAG_DIRECTDATA) { 2019 /* not applicable to recovery scan */ 2020 hammer2_chain_unlock(parent); 2021 return 0; 2022 } 2023 if ((ripdata->op_flags & HAMMER2_OPFLAG_PFSROOT) && 2024 info->depth != 0) { 2025 pfs_boundary = 1; 2026 sync_tid = parent->bref.mirror_tid - 1; 2027 } 2028 hammer2_chain_unlock(parent); 2029 break; 2030 case HAMMER2_BREF_TYPE_INDIRECT: 2031 /* 2032 * Must instantiate data for recursion 2033 */ 2034 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); 2035 hammer2_chain_unlock(parent); 2036 break; 2037 case HAMMER2_BREF_TYPE_DATA: 2038 case HAMMER2_BREF_TYPE_FREEMAP: 2039 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2040 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 2041 /* not applicable to recovery scan */ 2042 return 0; 2043 break; 2044 default: 2045 return EDOM; 2046 } 2047 2048 /* 2049 * Defer operation if depth limit reached or if we are crossing a 2050 * PFS boundary. 2051 */ 2052 if (info->depth >= HAMMER2_RECOVERY_MAXDEPTH || pfs_boundary) { 2053 struct hammer2_recovery_elm *elm; 2054 2055 elm = kmalloc(sizeof(*elm), M_HAMMER2, M_ZERO | M_WAITOK); 2056 elm->chain = parent; 2057 elm->sync_tid = sync_tid; 2058 hammer2_chain_ref(parent); 2059 TAILQ_INSERT_TAIL(&info->list, elm, entry); 2060 /* unlocked by caller */ 2061 2062 return(0); 2063 } 2064 2065 2066 /* 2067 * Recursive scan of the last flushed transaction only. We are 2068 * doing this without pmp assignments so don't leave the chains 2069 * hanging around after we are done with them. 2070 */ 2071 cache_index = 0; 2072 chain = hammer2_chain_scan(parent, NULL, &cache_index, 2073 HAMMER2_LOOKUP_NODATA); 2074 while (chain) { 2075 atomic_set_int(&chain->flags, HAMMER2_CHAIN_RELEASE); 2076 if (chain->bref.mirror_tid >= sync_tid) { 2077 ++info->depth; 2078 error = hammer2_recovery_scan(trans, hmp, chain, 2079 info, sync_tid); 2080 --info->depth; 2081 if (error) 2082 cumulative_error = error; 2083 } 2084 chain = hammer2_chain_scan(parent, chain, &cache_index, 2085 HAMMER2_LOOKUP_NODATA); 2086 } 2087 2088 return cumulative_error; 2089 } 2090 2091 /* 2092 * Sync the entire filesystem; this is called from the filesystem syncer 2093 * process periodically and whenever a user calls sync(1) on the hammer 2094 * mountpoint. 2095 * 2096 * Currently is actually called from the syncer! \o/ 2097 * 2098 * This task will have to snapshot the state of the dirty inode chain. 2099 * From that, it will have to make sure all of the inodes on the dirty 2100 * chain have IO initiated. We make sure that io is initiated for the root 2101 * block. 2102 * 2103 * If waitfor is set, we wait for media to acknowledge the new rootblock. 2104 * 2105 * THINKS: side A vs side B, to have sync not stall all I/O? 2106 */ 2107 int 2108 hammer2_vfs_sync(struct mount *mp, int waitfor) 2109 { 2110 struct hammer2_sync_info info; 2111 hammer2_inode_t *iroot; 2112 hammer2_chain_t *chain; 2113 hammer2_chain_t *parent; 2114 hammer2_pfsmount_t *pmp; 2115 hammer2_mount_t *hmp; 2116 int flags; 2117 int error; 2118 int total_error; 2119 int force_fchain; 2120 int i; 2121 int j; 2122 2123 pmp = MPTOPMP(mp); 2124 iroot = pmp->iroot; 2125 KKASSERT(iroot); 2126 KKASSERT(iroot->pmp == pmp); 2127 2128 /* 2129 * We can't acquire locks on existing vnodes while in a transaction 2130 * without risking a deadlock. This assumes that vfsync() can be 2131 * called without the vnode locked (which it can in DragonFly). 2132 * Otherwise we'd have to implement a multi-pass or flag the lock 2133 * failures and retry. 2134 * 2135 * The reclamation code interlocks with the sync list's token 2136 * (by removing the vnode from the scan list) before unlocking 2137 * the inode, giving us time to ref the inode. 2138 */ 2139 /*flags = VMSC_GETVP;*/ 2140 flags = 0; 2141 if (waitfor & MNT_LAZY) 2142 flags |= VMSC_ONEPASS; 2143 2144 /* 2145 * Start our flush transaction. This does not return until all 2146 * concurrent transactions have completed and will prevent any 2147 * new transactions from running concurrently, except for the 2148 * buffer cache transactions. 2149 * 2150 * For efficiency do an async pass before making sure with a 2151 * synchronous pass on all related buffer cache buffers. It 2152 * should theoretically not be possible for any new file buffers 2153 * to be instantiated during this sequence. 2154 */ 2155 hammer2_trans_init(&info.trans, pmp, HAMMER2_TRANS_ISFLUSH | 2156 HAMMER2_TRANS_PREFLUSH); 2157 hammer2_run_unlinkq(&info.trans, pmp); 2158 2159 info.error = 0; 2160 info.waitfor = MNT_NOWAIT; 2161 vsyncscan(mp, flags | VMSC_NOWAIT, hammer2_sync_scan2, &info); 2162 info.waitfor = MNT_WAIT; 2163 vsyncscan(mp, flags, hammer2_sync_scan2, &info); 2164 2165 /* 2166 * Clear PREFLUSH. This prevents (or asserts on) any new logical 2167 * buffer cache flushes which occur during the flush. Device buffers 2168 * are not affected. 2169 */ 2170 2171 #if 0 2172 if (info.error == 0 && (waitfor & MNT_WAIT)) { 2173 info.waitfor = waitfor; 2174 vsyncscan(mp, flags, hammer2_sync_scan2, &info); 2175 2176 } 2177 #endif 2178 hammer2_bioq_sync(info.trans.pmp); 2179 atomic_clear_int(&info.trans.flags, HAMMER2_TRANS_PREFLUSH); 2180 2181 total_error = 0; 2182 2183 /* 2184 * Flush all storage elements making up the cluster 2185 * 2186 * We must also flush any deleted siblings because the super-root 2187 * flush won't do it for us. They all must be staged or the 2188 * super-root flush will not be able to update its block table 2189 * properly. 2190 * 2191 * XXX currently done serially instead of concurrently 2192 */ 2193 for (i = 0; iroot && i < iroot->cluster.nchains; ++i) { 2194 chain = iroot->cluster.array[i].chain; 2195 if (chain) { 2196 hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS); 2197 hammer2_flush(&info.trans, chain); 2198 hammer2_chain_unlock(chain); 2199 } 2200 } 2201 #if 0 2202 hammer2_trans_done(&info.trans); 2203 #endif 2204 2205 /* 2206 * Flush all volume roots to synchronize PFS flushes with the 2207 * storage media. Use a super-root transaction for each one. 2208 * 2209 * The flush code will detect super-root -> pfs-root chain 2210 * transitions using the last pfs-root flush. 2211 */ 2212 for (i = 0; iroot && i < iroot->cluster.nchains; ++i) { 2213 hammer2_chain_t *tmp; 2214 2215 chain = iroot->cluster.array[i].chain; 2216 if (chain == NULL) 2217 continue; 2218 2219 hmp = chain->hmp; 2220 2221 /* 2222 * We only have to flush each hmp once 2223 */ 2224 for (j = i - 1; j >= 0; --j) { 2225 if ((tmp = iroot->cluster.array[j].chain) != NULL) { 2226 if (tmp->hmp == hmp) 2227 break; 2228 } 2229 } 2230 if (j >= 0) 2231 continue; 2232 hammer2_trans_spmp(&info.trans, hmp->spmp); 2233 2234 /* 2235 * Force an update of the XID from the PFS root to the 2236 * topology root. We couldn't do this from the PFS 2237 * transaction because a SPMP transaction is needed. 2238 * This does not modify blocks, instead what it does is 2239 * allow the flush code to find the transition point and 2240 * then update on the way back up. 2241 */ 2242 parent = chain->parent; 2243 KKASSERT(chain->pmp != parent->pmp); 2244 hammer2_chain_setflush(&info.trans, parent); 2245 2246 /* 2247 * Media mounts have two 'roots', vchain for the topology 2248 * and fchain for the free block table. Flush both. 2249 * 2250 * Note that the topology and free block table are handled 2251 * independently, so the free block table can wind up being 2252 * ahead of the topology. We depend on the bulk free scan 2253 * code to deal with any loose ends. 2254 */ 2255 hammer2_chain_lock(&hmp->vchain, HAMMER2_RESOLVE_ALWAYS); 2256 hammer2_chain_lock(&hmp->fchain, HAMMER2_RESOLVE_ALWAYS); 2257 if (hmp->fchain.flags & HAMMER2_CHAIN_FLUSH_MASK) { 2258 /* 2259 * This will also modify vchain as a side effect, 2260 * mark vchain as modified now. 2261 */ 2262 hammer2_voldata_modify(hmp); 2263 chain = &hmp->fchain; 2264 hammer2_flush(&info.trans, chain); 2265 KKASSERT(chain == &hmp->fchain); 2266 } 2267 hammer2_chain_unlock(&hmp->fchain); 2268 hammer2_chain_unlock(&hmp->vchain); 2269 2270 hammer2_chain_lock(&hmp->vchain, HAMMER2_RESOLVE_ALWAYS); 2271 if (hmp->vchain.flags & HAMMER2_CHAIN_FLUSH_MASK) { 2272 chain = &hmp->vchain; 2273 hammer2_flush(&info.trans, chain); 2274 KKASSERT(chain == &hmp->vchain); 2275 force_fchain = 1; 2276 } else { 2277 force_fchain = 0; 2278 } 2279 hammer2_chain_unlock(&hmp->vchain); 2280 2281 #if 0 2282 hammer2_chain_lock(&hmp->fchain, HAMMER2_RESOLVE_ALWAYS); 2283 if ((hmp->fchain.flags & HAMMER2_CHAIN_FLUSH_MASK) || 2284 force_fchain) { 2285 /* this will also modify vchain as a side effect */ 2286 chain = &hmp->fchain; 2287 hammer2_flush(&info.trans, chain); 2288 KKASSERT(chain == &hmp->fchain); 2289 } 2290 hammer2_chain_unlock(&hmp->fchain); 2291 #endif 2292 2293 error = 0; 2294 2295 /* 2296 * We can't safely flush the volume header until we have 2297 * flushed any device buffers which have built up. 2298 * 2299 * XXX this isn't being incremental 2300 */ 2301 vn_lock(hmp->devvp, LK_EXCLUSIVE | LK_RETRY); 2302 error = VOP_FSYNC(hmp->devvp, MNT_WAIT, 0); 2303 vn_unlock(hmp->devvp); 2304 2305 /* 2306 * The flush code sets CHAIN_VOLUMESYNC to indicate that the 2307 * volume header needs synchronization via hmp->volsync. 2308 * 2309 * XXX synchronize the flag & data with only this flush XXX 2310 */ 2311 if (error == 0 && 2312 (hmp->vchain.flags & HAMMER2_CHAIN_VOLUMESYNC)) { 2313 struct buf *bp; 2314 2315 /* 2316 * Synchronize the disk before flushing the volume 2317 * header. 2318 */ 2319 bp = getpbuf(NULL); 2320 bp->b_bio1.bio_offset = 0; 2321 bp->b_bufsize = 0; 2322 bp->b_bcount = 0; 2323 bp->b_cmd = BUF_CMD_FLUSH; 2324 bp->b_bio1.bio_done = biodone_sync; 2325 bp->b_bio1.bio_flags |= BIO_SYNC; 2326 vn_strategy(hmp->devvp, &bp->b_bio1); 2327 biowait(&bp->b_bio1, "h2vol"); 2328 relpbuf(bp, NULL); 2329 2330 /* 2331 * Then we can safely flush the version of the 2332 * volume header synchronized by the flush code. 2333 */ 2334 i = hmp->volhdrno + 1; 2335 if (i >= HAMMER2_NUM_VOLHDRS) 2336 i = 0; 2337 if (i * HAMMER2_ZONE_BYTES64 + HAMMER2_SEGSIZE > 2338 hmp->volsync.volu_size) { 2339 i = 0; 2340 } 2341 kprintf("sync volhdr %d %jd\n", 2342 i, (intmax_t)hmp->volsync.volu_size); 2343 bp = getblk(hmp->devvp, i * HAMMER2_ZONE_BYTES64, 2344 HAMMER2_PBUFSIZE, 0, 0); 2345 atomic_clear_int(&hmp->vchain.flags, 2346 HAMMER2_CHAIN_VOLUMESYNC); 2347 bcopy(&hmp->volsync, bp->b_data, HAMMER2_PBUFSIZE); 2348 bawrite(bp); 2349 hmp->volhdrno = i; 2350 } 2351 if (error) 2352 total_error = error; 2353 2354 #if 0 2355 hammer2_trans_done(&info.trans); 2356 #endif 2357 } 2358 hammer2_trans_done(&info.trans); 2359 2360 return (total_error); 2361 } 2362 2363 /* 2364 * Sync passes. 2365 */ 2366 static int 2367 hammer2_sync_scan2(struct mount *mp, struct vnode *vp, void *data) 2368 { 2369 struct hammer2_sync_info *info = data; 2370 hammer2_inode_t *ip; 2371 int error; 2372 2373 /* 2374 * 2375 */ 2376 ip = VTOI(vp); 2377 if (ip == NULL) 2378 return(0); 2379 if (vp->v_type == VNON || vp->v_type == VBAD) { 2380 vclrisdirty(vp); 2381 return(0); 2382 } 2383 if ((ip->flags & HAMMER2_INODE_MODIFIED) == 0 && 2384 RB_EMPTY(&vp->v_rbdirty_tree)) { 2385 vclrisdirty(vp); 2386 return(0); 2387 } 2388 2389 /* 2390 * VOP_FSYNC will start a new transaction so replicate some code 2391 * here to do it inline (see hammer2_vop_fsync()). 2392 * 2393 * WARNING: The vfsync interacts with the buffer cache and might 2394 * block, we can't hold the inode lock at that time. 2395 * However, we MUST ref ip before blocking to ensure that 2396 * it isn't ripped out from under us (since we do not 2397 * hold a lock on the vnode). 2398 */ 2399 hammer2_inode_ref(ip); 2400 atomic_clear_int(&ip->flags, HAMMER2_INODE_MODIFIED); 2401 if (vp) 2402 vfsync(vp, MNT_NOWAIT, 1, NULL, NULL); 2403 2404 hammer2_inode_drop(ip); 2405 #if 1 2406 error = 0; 2407 if (error) 2408 info->error = error; 2409 #endif 2410 return(0); 2411 } 2412 2413 static 2414 int 2415 hammer2_vfs_vptofh(struct vnode *vp, struct fid *fhp) 2416 { 2417 return (0); 2418 } 2419 2420 static 2421 int 2422 hammer2_vfs_fhtovp(struct mount *mp, struct vnode *rootvp, 2423 struct fid *fhp, struct vnode **vpp) 2424 { 2425 return (0); 2426 } 2427 2428 static 2429 int 2430 hammer2_vfs_checkexp(struct mount *mp, struct sockaddr *nam, 2431 int *exflagsp, struct ucred **credanonp) 2432 { 2433 return (0); 2434 } 2435 2436 /* 2437 * Support code for hammer2_vfs_mount(). Read, verify, and install the volume 2438 * header into the HMP 2439 * 2440 * XXX read four volhdrs and use the one with the highest TID whos CRC 2441 * matches. 2442 * 2443 * XXX check iCRCs. 2444 * 2445 * XXX For filesystems w/ less than 4 volhdrs, make sure to not write to 2446 * nonexistant locations. 2447 * 2448 * XXX Record selected volhdr and ring updates to each of 4 volhdrs 2449 */ 2450 static 2451 int 2452 hammer2_install_volume_header(hammer2_mount_t *hmp) 2453 { 2454 hammer2_volume_data_t *vd; 2455 struct buf *bp; 2456 hammer2_crc32_t crc0, crc, bcrc0, bcrc; 2457 int error_reported; 2458 int error; 2459 int valid; 2460 int i; 2461 2462 error_reported = 0; 2463 error = 0; 2464 valid = 0; 2465 bp = NULL; 2466 2467 /* 2468 * There are up to 4 copies of the volume header (syncs iterate 2469 * between them so there is no single master). We don't trust the 2470 * volu_size field so we don't know precisely how large the filesystem 2471 * is, so depend on the OS to return an error if we go beyond the 2472 * block device's EOF. 2473 */ 2474 for (i = 0; i < HAMMER2_NUM_VOLHDRS; i++) { 2475 error = bread(hmp->devvp, i * HAMMER2_ZONE_BYTES64, 2476 HAMMER2_VOLUME_BYTES, &bp); 2477 if (error) { 2478 brelse(bp); 2479 bp = NULL; 2480 continue; 2481 } 2482 2483 vd = (struct hammer2_volume_data *) bp->b_data; 2484 if ((vd->magic != HAMMER2_VOLUME_ID_HBO) && 2485 (vd->magic != HAMMER2_VOLUME_ID_ABO)) { 2486 brelse(bp); 2487 bp = NULL; 2488 continue; 2489 } 2490 2491 if (vd->magic == HAMMER2_VOLUME_ID_ABO) { 2492 /* XXX: Reversed-endianness filesystem */ 2493 kprintf("hammer2: reverse-endian filesystem detected"); 2494 brelse(bp); 2495 bp = NULL; 2496 continue; 2497 } 2498 2499 crc = vd->icrc_sects[HAMMER2_VOL_ICRC_SECT0]; 2500 crc0 = hammer2_icrc32(bp->b_data + HAMMER2_VOLUME_ICRC0_OFF, 2501 HAMMER2_VOLUME_ICRC0_SIZE); 2502 bcrc = vd->icrc_sects[HAMMER2_VOL_ICRC_SECT1]; 2503 bcrc0 = hammer2_icrc32(bp->b_data + HAMMER2_VOLUME_ICRC1_OFF, 2504 HAMMER2_VOLUME_ICRC1_SIZE); 2505 if ((crc0 != crc) || (bcrc0 != bcrc)) { 2506 kprintf("hammer2 volume header crc " 2507 "mismatch copy #%d %08x/%08x\n", 2508 i, crc0, crc); 2509 error_reported = 1; 2510 brelse(bp); 2511 bp = NULL; 2512 continue; 2513 } 2514 if (valid == 0 || hmp->voldata.mirror_tid < vd->mirror_tid) { 2515 valid = 1; 2516 hmp->voldata = *vd; 2517 hmp->volhdrno = i; 2518 } 2519 brelse(bp); 2520 bp = NULL; 2521 } 2522 if (valid) { 2523 hmp->volsync = hmp->voldata; 2524 error = 0; 2525 if (error_reported || bootverbose || 1) { /* 1/DEBUG */ 2526 kprintf("hammer2: using volume header #%d\n", 2527 hmp->volhdrno); 2528 } 2529 } else { 2530 error = EINVAL; 2531 kprintf("hammer2: no valid volume headers found!\n"); 2532 } 2533 return (error); 2534 } 2535 2536 /* 2537 * This handles hysteresis on regular file flushes. Because the BIOs are 2538 * routed to a thread it is possible for an excessive number to build up 2539 * and cause long front-end stalls long before the runningbuffspace limit 2540 * is hit, so we implement hammer2_flush_pipe to control the 2541 * hysteresis. 2542 * 2543 * This is a particular problem when compression is used. 2544 */ 2545 void 2546 hammer2_lwinprog_ref(hammer2_pfsmount_t *pmp) 2547 { 2548 atomic_add_int(&pmp->count_lwinprog, 1); 2549 } 2550 2551 void 2552 hammer2_lwinprog_drop(hammer2_pfsmount_t *pmp) 2553 { 2554 int lwinprog; 2555 2556 lwinprog = atomic_fetchadd_int(&pmp->count_lwinprog, -1); 2557 if ((lwinprog & HAMMER2_LWINPROG_WAITING) && 2558 (lwinprog & HAMMER2_LWINPROG_MASK) <= hammer2_flush_pipe * 2 / 3) { 2559 atomic_clear_int(&pmp->count_lwinprog, 2560 HAMMER2_LWINPROG_WAITING); 2561 wakeup(&pmp->count_lwinprog); 2562 } 2563 } 2564 2565 void 2566 hammer2_lwinprog_wait(hammer2_pfsmount_t *pmp) 2567 { 2568 int lwinprog; 2569 2570 for (;;) { 2571 lwinprog = pmp->count_lwinprog; 2572 cpu_ccfence(); 2573 if ((lwinprog & HAMMER2_LWINPROG_MASK) < hammer2_flush_pipe) 2574 break; 2575 tsleep_interlock(&pmp->count_lwinprog, 0); 2576 atomic_set_int(&pmp->count_lwinprog, HAMMER2_LWINPROG_WAITING); 2577 lwinprog = pmp->count_lwinprog; 2578 if ((lwinprog & HAMMER2_LWINPROG_MASK) < hammer2_flush_pipe) 2579 break; 2580 tsleep(&pmp->count_lwinprog, PINTERLOCKED, "h2wpipe", hz); 2581 } 2582 } 2583 2584 /* 2585 * Manage excessive memory resource use for chain and related 2586 * structures. 2587 */ 2588 void 2589 hammer2_pfs_memory_wait(hammer2_pfsmount_t *pmp) 2590 { 2591 uint32_t waiting; 2592 uint32_t count; 2593 uint32_t limit; 2594 #if 0 2595 static int zzticks; 2596 #endif 2597 2598 /* 2599 * Atomic check condition and wait. Also do an early speedup of 2600 * the syncer to try to avoid hitting the wait. 2601 */ 2602 for (;;) { 2603 waiting = pmp->inmem_dirty_chains; 2604 cpu_ccfence(); 2605 count = waiting & HAMMER2_DIRTYCHAIN_MASK; 2606 2607 limit = pmp->mp->mnt_nvnodelistsize / 10; 2608 if (limit < hammer2_limit_dirty_chains) 2609 limit = hammer2_limit_dirty_chains; 2610 if (limit < 1000) 2611 limit = 1000; 2612 2613 #if 0 2614 if ((int)(ticks - zzticks) > hz) { 2615 zzticks = ticks; 2616 kprintf("count %ld %ld\n", count, limit); 2617 } 2618 #endif 2619 2620 /* 2621 * Block if there are too many dirty chains present, wait 2622 * for the flush to clean some out. 2623 */ 2624 if (count > limit) { 2625 tsleep_interlock(&pmp->inmem_dirty_chains, 0); 2626 if (atomic_cmpset_int(&pmp->inmem_dirty_chains, 2627 waiting, 2628 waiting | HAMMER2_DIRTYCHAIN_WAITING)) { 2629 speedup_syncer(pmp->mp); 2630 tsleep(&pmp->inmem_dirty_chains, PINTERLOCKED, 2631 "chnmem", hz); 2632 } 2633 continue; /* loop on success or fail */ 2634 } 2635 2636 /* 2637 * Try to start an early flush before we are forced to block. 2638 */ 2639 if (count > limit * 7 / 10) 2640 speedup_syncer(pmp->mp); 2641 break; 2642 } 2643 } 2644 2645 void 2646 hammer2_pfs_memory_inc(hammer2_pfsmount_t *pmp) 2647 { 2648 if (pmp) { 2649 atomic_add_int(&pmp->inmem_dirty_chains, 1); 2650 } 2651 } 2652 2653 void 2654 hammer2_pfs_memory_wakeup(hammer2_pfsmount_t *pmp) 2655 { 2656 uint32_t waiting; 2657 2658 if (pmp == NULL) 2659 return; 2660 2661 for (;;) { 2662 waiting = pmp->inmem_dirty_chains; 2663 cpu_ccfence(); 2664 if (atomic_cmpset_int(&pmp->inmem_dirty_chains, 2665 waiting, 2666 (waiting - 1) & 2667 ~HAMMER2_DIRTYCHAIN_WAITING)) { 2668 break; 2669 } 2670 } 2671 2672 if (waiting & HAMMER2_DIRTYCHAIN_WAITING) 2673 wakeup(&pmp->inmem_dirty_chains); 2674 } 2675 2676 /* 2677 * Debugging 2678 */ 2679 void 2680 hammer2_dump_chain(hammer2_chain_t *chain, int tab, int *countp, char pfx) 2681 { 2682 hammer2_chain_t *scan; 2683 hammer2_chain_t *parent; 2684 2685 --*countp; 2686 if (*countp == 0) { 2687 kprintf("%*.*s...\n", tab, tab, ""); 2688 return; 2689 } 2690 if (*countp < 0) 2691 return; 2692 kprintf("%*.*s%c-chain %p.%d %016jx/%d mir=%016jx\n", 2693 tab, tab, "", pfx, 2694 chain, chain->bref.type, 2695 chain->bref.key, chain->bref.keybits, 2696 chain->bref.mirror_tid); 2697 2698 kprintf("%*.*s [%08x] (%s) refs=%d\n", 2699 tab, tab, "", 2700 chain->flags, 2701 ((chain->bref.type == HAMMER2_BREF_TYPE_INODE && 2702 chain->data) ? (char *)chain->data->ipdata.filename : "?"), 2703 chain->refs); 2704 2705 kprintf("%*.*s core [%08x]", 2706 tab, tab, "", 2707 chain->core.flags); 2708 2709 parent = chain->parent; 2710 if (parent) 2711 kprintf("\n%*.*s p=%p [pflags %08x prefs %d", 2712 tab, tab, "", 2713 parent, parent->flags, parent->refs); 2714 if (RB_EMPTY(&chain->core.rbtree)) { 2715 kprintf("\n"); 2716 } else { 2717 kprintf(" {\n"); 2718 RB_FOREACH(scan, hammer2_chain_tree, &chain->core.rbtree) 2719 hammer2_dump_chain(scan, tab + 4, countp, 'a'); 2720 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE && chain->data) 2721 kprintf("%*.*s}(%s)\n", tab, tab, "", 2722 chain->data->ipdata.filename); 2723 else 2724 kprintf("%*.*s}\n", tab, tab, ""); 2725 } 2726 } 2727