1 /* 2 * Copyright (c) 2011-2013 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 7 * by Daniel Flores (GSOC 2013 - mentored by Matthew Dillon, compression) 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in 17 * the documentation and/or other materials provided with the 18 * distribution. 19 * 3. Neither the name of The DragonFly Project nor the names of its 20 * contributors may be used to endorse or promote products derived 21 * from this software without specific, prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 26 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 27 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 33 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 /* 37 * Kernel Filesystem interface 38 * 39 * NOTE! local ipdata pointers must be reloaded on any modifying operation 40 * to the inode as its underlying chain may have changed. 41 */ 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/kernel.h> 46 #include <sys/fcntl.h> 47 #include <sys/buf.h> 48 #include <sys/proc.h> 49 #include <sys/namei.h> 50 #include <sys/mount.h> 51 #include <sys/vnode.h> 52 #include <sys/mountctl.h> 53 #include <sys/dirent.h> 54 #include <sys/uio.h> 55 #include <sys/objcache.h> 56 57 #include "hammer2.h" 58 #include "hammer2_lz4.h" 59 60 #include "zlib/hammer2_zlib.h" 61 62 #define ZFOFFSET (-2LL) 63 64 static int hammer2_read_file(hammer2_inode_t *ip, struct uio *uio, 65 int seqcount); 66 static int hammer2_write_file(hammer2_inode_t *ip, struct uio *uio, 67 int ioflag, int seqcount); 68 static void hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize); 69 static void hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize); 70 static void hammer2_decompress_LZ4_callback(struct bio *bio); 71 static void hammer2_decompress_ZLIB_callback(struct bio *bio); 72 73 struct objcache *cache_buffer_read; 74 struct objcache *cache_buffer_write; 75 76 /* 77 * Callback used in read path in case that a block is compressed with LZ4. 78 */ 79 static 80 void 81 hammer2_decompress_LZ4_callback(struct bio *bio) 82 { 83 struct buf *bp = bio->bio_buf; 84 struct buf *obp; 85 struct bio *obio; 86 int loff; 87 88 /* 89 * If BIO_DONE is already set the device buffer was already 90 * fully valid (B_CACHE). If it is not set then I/O was issued 91 * and we have to run I/O completion as the last bio. 92 * 93 * Nobody is waiting for our device I/O to complete, we are 94 * responsible for bqrelse()ing it which means we also have to do 95 * the equivalent of biowait() and clear BIO_DONE (which breadcb() 96 * may have set). 97 * 98 * Any preexisting device buffer should match the requested size, 99 * but due to bigblock recycling and other factors there is some 100 * fragility there, so we assert that the device buffer covers 101 * the request. 102 */ 103 if ((bio->bio_flags & BIO_DONE) == 0) 104 bpdone(bp, 0); 105 bio->bio_flags &= ~(BIO_DONE | BIO_SYNC); 106 107 obio = bio->bio_caller_info1.ptr; 108 obp = obio->bio_buf; 109 loff = obio->bio_caller_info3.value; 110 111 if (bp->b_flags & B_ERROR) { 112 obp->b_flags |= B_ERROR; 113 obp->b_error = bp->b_error; 114 } else if (obio->bio_caller_info2.index && 115 obio->bio_caller_info1.uvalue32 != 116 crc32(bp->b_data, bp->b_bufsize)) { 117 obp->b_flags |= B_ERROR; 118 obp->b_error = EIO; 119 } else { 120 KKASSERT(obp->b_bufsize <= 65536); 121 122 char *buffer; 123 char *compressed_buffer; 124 int *compressed_size; 125 126 buffer = bp->b_data + loff; 127 compressed_size = (int*)buffer; 128 compressed_buffer = objcache_get(cache_buffer_read, M_INTWAIT); 129 KKASSERT((unsigned int)*compressed_size <= 65536); 130 int result = LZ4_decompress_safe(&buffer[sizeof(int)], 131 compressed_buffer, *compressed_size, obp->b_bufsize); 132 if (result < 0) { 133 kprintf("READ PATH: Error during decompression." 134 "bio %016jx/%d loff=%d\n", 135 (intmax_t)bio->bio_offset, bio->bio_buf->b_bufsize, loff); 136 /* make sure it isn't random garbage */ 137 bzero(compressed_buffer, obp->b_bufsize); 138 } 139 KKASSERT(result <= obp->b_bufsize); 140 bcopy(compressed_buffer, obp->b_data, obp->b_bufsize); 141 if (result < obp->b_bufsize) 142 bzero(obp->b_data + result, obp->b_bufsize - result); 143 objcache_put(cache_buffer_read, compressed_buffer); 144 obp->b_resid = 0; 145 obp->b_flags |= B_AGE; 146 } 147 biodone(obio); 148 bqrelse(bp); 149 } 150 151 /* 152 * Callback used in read path in case that a block is compressed with ZLIB. 153 * It is almost identical to LZ4 callback, so in theory they can be unified, 154 * but we didn't want to make changes in bio structure for that. 155 */ 156 static 157 void 158 hammer2_decompress_ZLIB_callback(struct bio *bio) 159 { 160 struct buf *bp = bio->bio_buf; 161 struct buf *obp; 162 struct bio *obio; 163 int loff; 164 165 /* 166 * If BIO_DONE is already set the device buffer was already 167 * fully valid (B_CACHE). If it is not set then I/O was issued 168 * and we have to run I/O completion as the last bio. 169 * 170 * Nobody is waiting for our device I/O to complete, we are 171 * responsible for bqrelse()ing it which means we also have to do 172 * the equivalent of biowait() and clear BIO_DONE (which breadcb() 173 * may have set). 174 * 175 * Any preexisting device buffer should match the requested size, 176 * but due to bigblock recycling and other factors there is some 177 * fragility there, so we assert that the device buffer covers 178 * the request. 179 */ 180 if ((bio->bio_flags & BIO_DONE) == 0) 181 bpdone(bp, 0); 182 bio->bio_flags &= ~(BIO_DONE | BIO_SYNC); 183 184 obio = bio->bio_caller_info1.ptr; 185 obp = obio->bio_buf; 186 loff = obio->bio_caller_info3.value; 187 188 if (bp->b_flags & B_ERROR) { 189 obp->b_flags |= B_ERROR; 190 obp->b_error = bp->b_error; 191 } else if (obio->bio_caller_info2.index && 192 obio->bio_caller_info1.uvalue32 != 193 crc32(bp->b_data, bp->b_bufsize)) { 194 obp->b_flags |= B_ERROR; 195 obp->b_error = EIO; 196 } else { 197 KKASSERT(obp->b_bufsize <= 65536); 198 199 char *buffer; 200 char *compressed_buffer; 201 int ret; 202 203 z_stream strm_decompress; 204 205 strm_decompress.avail_in = 0; 206 strm_decompress.next_in = Z_NULL; 207 208 ret = inflateInit(&strm_decompress); 209 210 if (ret != Z_OK) 211 kprintf("HAMMER2 ZLIB: Fatal error in inflateInit.\n"); 212 213 buffer = bp->b_data + loff; 214 compressed_buffer = objcache_get(cache_buffer_read, M_INTWAIT); 215 strm_decompress.next_in = buffer; 216 217 /* XXX supply proper size, subset of device bp */ 218 strm_decompress.avail_in = bp->b_bufsize - loff; 219 strm_decompress.next_out = compressed_buffer; 220 strm_decompress.avail_out = obp->b_bufsize; 221 222 ret = inflate(&strm_decompress, Z_FINISH); 223 if (ret != Z_STREAM_END) { 224 kprintf("HAMMER2 ZLIB: Fatar error during decompression.\n"); 225 bzero(compressed_buffer, obp->b_bufsize); 226 } 227 bcopy(compressed_buffer, obp->b_data, obp->b_bufsize); 228 int result = obp->b_bufsize - strm_decompress.avail_out; 229 if (result < obp->b_bufsize) 230 bzero(obp->b_data + result, strm_decompress.avail_out); 231 objcache_put(cache_buffer_read, compressed_buffer); 232 obp->b_resid = 0; 233 obp->b_flags |= B_AGE; 234 ret = inflateEnd(&strm_decompress); 235 } 236 biodone(obio); 237 bqrelse(bp); 238 } 239 240 static __inline 241 void 242 hammer2_knote(struct vnode *vp, int flags) 243 { 244 if (flags) 245 KNOTE(&vp->v_pollinfo.vpi_kqinfo.ki_note, flags); 246 } 247 248 /* 249 * Last reference to a vnode is going away but it is still cached. 250 */ 251 static 252 int 253 hammer2_vop_inactive(struct vop_inactive_args *ap) 254 { 255 hammer2_inode_t *ip; 256 hammer2_chain_t *parent; 257 struct vnode *vp; 258 259 vp = ap->a_vp; 260 ip = VTOI(vp); 261 262 /* 263 * Degenerate case 264 */ 265 if (ip == NULL) { 266 vrecycle(vp); 267 return (0); 268 } 269 270 /* 271 * Detect updates to the embedded data which may be synchronized by 272 * the strategy code. Simply mark the inode modified so it gets 273 * picked up by our normal flush. 274 */ 275 parent = hammer2_inode_lock_ex(ip); 276 KKASSERT(parent); 277 278 /* 279 * Check for deleted inodes and recycle immediately. 280 */ 281 if (parent->flags & HAMMER2_CHAIN_DELETED) { 282 hammer2_inode_unlock_ex(ip, parent); 283 vrecycle(vp); 284 } else { 285 hammer2_inode_unlock_ex(ip, parent); 286 } 287 return (0); 288 } 289 290 /* 291 * Reclaim a vnode so that it can be reused; after the inode is 292 * disassociated, the filesystem must manage it alone. 293 */ 294 static 295 int 296 hammer2_vop_reclaim(struct vop_reclaim_args *ap) 297 { 298 hammer2_chain_t *chain; 299 hammer2_inode_t *ip; 300 #if 0 301 hammer2_trans_t trans; 302 #endif 303 struct vnode *vp; 304 305 vp = ap->a_vp; 306 ip = VTOI(vp); 307 if (ip == NULL) 308 return(0); 309 310 /* 311 * Set SUBMODIFIED so we can detect and propagate the DESTROYED 312 * bit in the flush code. 313 * 314 * ip->chain might be stale, correct it before checking as older 315 * versions of the chain are likely marked deleted even if the 316 * file hasn't been. XXX ip->chain should never be stale on 317 * reclaim. 318 */ 319 chain = hammer2_inode_lock_ex(ip); 320 #if 0 321 if (chain->next_parent) 322 kprintf("RECLAIM DUPLINKED IP: %p ip->ch=%p ch=%p np=%p\n", 323 ip, ip->chain, chain, chain->next_parent); 324 #endif 325 326 /* 327 * The final close of a deleted file or directory marks it for 328 * destruction. The DESTROYED flag allows the flusher to shortcut 329 * any modified blocks still unflushed (that is, just ignore them). 330 * 331 * HAMMER2 usually does not try to optimize the freemap by returning 332 * deleted blocks to it as it does not usually know how many snapshots 333 * might be referencing portions of the file/dir. XXX TODO. 334 * 335 * XXX TODO - However, any modified file as-of when a snapshot is made 336 * cannot use this optimization as some of the modifications 337 * may wind up being part of the snapshot. 338 */ 339 vp->v_data = NULL; 340 ip->vp = NULL; 341 if (chain->flags & HAMMER2_CHAIN_DELETED) { 342 KKASSERT(chain->flags & HAMMER2_CHAIN_DELETED); 343 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROYED | 344 HAMMER2_CHAIN_SUBMODIFIED); 345 } 346 #if 0 347 /* 348 * XXX chains will be flushed on sync, no need to do it here. 349 */ 350 if (chain->flags & (HAMMER2_CHAIN_MODIFIED | 351 HAMMER2_CHAIN_DELETED | 352 HAMMER2_CHAIN_SUBMODIFIED)) { 353 hammer2_trans_init(&trans, ip->pmp, HAMMER2_TRANS_ISFLUSH); 354 hammer2_chain_flush(&trans, chain); 355 hammer2_trans_done(&trans); 356 } 357 #endif 358 hammer2_inode_unlock_ex(ip, chain); /* unlock */ 359 hammer2_inode_drop(ip); /* vp ref */ 360 /* chain no longer referenced */ 361 /* chain = NULL; not needed */ 362 363 /* 364 * XXX handle background sync when ip dirty, kernel will no longer 365 * notify us regarding this inode because there is no longer a 366 * vnode attached to it. 367 */ 368 369 return (0); 370 } 371 372 static 373 int 374 hammer2_vop_fsync(struct vop_fsync_args *ap) 375 { 376 hammer2_inode_t *ip; 377 hammer2_trans_t trans; 378 hammer2_chain_t *chain; 379 struct vnode *vp; 380 381 vp = ap->a_vp; 382 ip = VTOI(vp); 383 384 /* 385 * WARNING: The vfsync interacts with the buffer cache and might 386 * block, we can't hold the inode lock and we can't 387 * have a flush transaction pending. 388 */ 389 hammer2_trans_init(&trans, ip->pmp, HAMMER2_TRANS_ISFLUSH); 390 vfsync(vp, ap->a_waitfor, 1, NULL, NULL); 391 392 /* 393 * Calling chain_flush here creates a lot of duplicative 394 * COW operations due to non-optimal vnode ordering. 395 * 396 * Only do it for an actual fsync() syscall. The other forms 397 * which call this function will eventually call chain_flush 398 * on the volume root as a catch-all, which is far more optimal. 399 */ 400 chain = hammer2_inode_lock_ex(ip); 401 atomic_clear_int(&ip->flags, HAMMER2_INODE_MODIFIED); 402 if (ip->flags & (HAMMER2_INODE_RESIZED|HAMMER2_INODE_MTIME)) 403 hammer2_inode_fsync(&trans, ip, &chain); 404 405 if (ap->a_flags & VOP_FSYNC_SYSCALL) { 406 hammer2_chain_flush(&trans, chain); 407 } 408 hammer2_inode_unlock_ex(ip, chain); 409 hammer2_trans_done(&trans); 410 411 return (0); 412 } 413 414 static 415 int 416 hammer2_vop_access(struct vop_access_args *ap) 417 { 418 hammer2_inode_t *ip = VTOI(ap->a_vp); 419 hammer2_inode_data_t *ipdata; 420 hammer2_chain_t *chain; 421 uid_t uid; 422 gid_t gid; 423 int error; 424 425 chain = hammer2_inode_lock_sh(ip); 426 ipdata = &chain->data->ipdata; 427 uid = hammer2_to_unix_xid(&ipdata->uid); 428 gid = hammer2_to_unix_xid(&ipdata->gid); 429 error = vop_helper_access(ap, uid, gid, ipdata->mode, ipdata->uflags); 430 hammer2_inode_unlock_sh(ip, chain); 431 432 return (error); 433 } 434 435 static 436 int 437 hammer2_vop_getattr(struct vop_getattr_args *ap) 438 { 439 hammer2_inode_data_t *ipdata; 440 hammer2_chain_t *chain; 441 hammer2_pfsmount_t *pmp; 442 hammer2_inode_t *ip; 443 struct vnode *vp; 444 struct vattr *vap; 445 446 vp = ap->a_vp; 447 vap = ap->a_vap; 448 449 ip = VTOI(vp); 450 pmp = ip->pmp; 451 452 chain = hammer2_inode_lock_sh(ip); 453 ipdata = &chain->data->ipdata; 454 455 vap->va_fsid = pmp->mp->mnt_stat.f_fsid.val[0]; 456 vap->va_fileid = ipdata->inum; 457 vap->va_mode = ipdata->mode; 458 vap->va_nlink = ipdata->nlinks; 459 vap->va_uid = hammer2_to_unix_xid(&ipdata->uid); 460 vap->va_gid = hammer2_to_unix_xid(&ipdata->gid); 461 vap->va_rmajor = 0; 462 vap->va_rminor = 0; 463 vap->va_size = ip->size; /* protected by shared lock */ 464 vap->va_blocksize = HAMMER2_PBUFSIZE; 465 vap->va_flags = ipdata->uflags; 466 hammer2_time_to_timespec(ipdata->ctime, &vap->va_ctime); 467 hammer2_time_to_timespec(ipdata->mtime, &vap->va_mtime); 468 hammer2_time_to_timespec(ipdata->mtime, &vap->va_atime); 469 vap->va_gen = 1; 470 vap->va_bytes = vap->va_size; /* XXX */ 471 vap->va_type = hammer2_get_vtype(chain); 472 vap->va_filerev = 0; 473 vap->va_uid_uuid = ipdata->uid; 474 vap->va_gid_uuid = ipdata->gid; 475 vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID | 476 VA_FSID_UUID_VALID; 477 478 hammer2_inode_unlock_sh(ip, chain); 479 480 return (0); 481 } 482 483 static 484 int 485 hammer2_vop_setattr(struct vop_setattr_args *ap) 486 { 487 hammer2_inode_data_t *ipdata; 488 hammer2_inode_t *ip; 489 hammer2_chain_t *chain; 490 hammer2_trans_t trans; 491 struct vnode *vp; 492 struct vattr *vap; 493 int error; 494 int kflags = 0; 495 int domtime = 0; 496 uint64_t ctime; 497 498 vp = ap->a_vp; 499 vap = ap->a_vap; 500 hammer2_update_time(&ctime); 501 502 ip = VTOI(vp); 503 504 if (ip->pmp->ronly) 505 return(EROFS); 506 507 hammer2_chain_memory_wait(ip->pmp); 508 hammer2_trans_init(&trans, ip->pmp, 0); 509 chain = hammer2_inode_lock_ex(ip); 510 ipdata = &chain->data->ipdata; 511 error = 0; 512 513 if (vap->va_flags != VNOVAL) { 514 u_int32_t flags; 515 516 flags = ipdata->uflags; 517 error = vop_helper_setattr_flags(&flags, vap->va_flags, 518 hammer2_to_unix_xid(&ipdata->uid), 519 ap->a_cred); 520 if (error == 0) { 521 if (ipdata->uflags != flags) { 522 ipdata = hammer2_chain_modify_ip(&trans, ip, 523 &chain, 0); 524 ipdata->uflags = flags; 525 ipdata->ctime = ctime; 526 kflags |= NOTE_ATTRIB; 527 } 528 if (ipdata->uflags & (IMMUTABLE | APPEND)) { 529 error = 0; 530 goto done; 531 } 532 } 533 goto done; 534 } 535 if (ipdata->uflags & (IMMUTABLE | APPEND)) { 536 error = EPERM; 537 goto done; 538 } 539 if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { 540 mode_t cur_mode = ipdata->mode; 541 uid_t cur_uid = hammer2_to_unix_xid(&ipdata->uid); 542 gid_t cur_gid = hammer2_to_unix_xid(&ipdata->gid); 543 uuid_t uuid_uid; 544 uuid_t uuid_gid; 545 546 error = vop_helper_chown(ap->a_vp, vap->va_uid, vap->va_gid, 547 ap->a_cred, 548 &cur_uid, &cur_gid, &cur_mode); 549 if (error == 0) { 550 hammer2_guid_to_uuid(&uuid_uid, cur_uid); 551 hammer2_guid_to_uuid(&uuid_gid, cur_gid); 552 if (bcmp(&uuid_uid, &ipdata->uid, sizeof(uuid_uid)) || 553 bcmp(&uuid_gid, &ipdata->gid, sizeof(uuid_gid)) || 554 ipdata->mode != cur_mode 555 ) { 556 ipdata = hammer2_chain_modify_ip(&trans, ip, 557 &chain, 0); 558 ipdata->uid = uuid_uid; 559 ipdata->gid = uuid_gid; 560 ipdata->mode = cur_mode; 561 ipdata->ctime = ctime; 562 } 563 kflags |= NOTE_ATTRIB; 564 } 565 } 566 567 /* 568 * Resize the file 569 */ 570 if (vap->va_size != VNOVAL && ip->size != vap->va_size) { 571 switch(vp->v_type) { 572 case VREG: 573 if (vap->va_size == ip->size) 574 break; 575 hammer2_inode_unlock_ex(ip, chain); 576 if (vap->va_size < ip->size) { 577 hammer2_truncate_file(ip, vap->va_size); 578 } else { 579 hammer2_extend_file(ip, vap->va_size); 580 } 581 chain = hammer2_inode_lock_ex(ip); 582 ipdata = &chain->data->ipdata; /* RELOAD */ 583 domtime = 1; 584 break; 585 default: 586 error = EINVAL; 587 goto done; 588 } 589 } 590 #if 0 591 /* atime not supported */ 592 if (vap->va_atime.tv_sec != VNOVAL) { 593 ipdata = hammer2_chain_modify_ip(&trans, ip, &chain, 0); 594 ipdata->atime = hammer2_timespec_to_time(&vap->va_atime); 595 kflags |= NOTE_ATTRIB; 596 } 597 #endif 598 if (vap->va_mtime.tv_sec != VNOVAL) { 599 ipdata = hammer2_chain_modify_ip(&trans, ip, &chain, 0); 600 ipdata->mtime = hammer2_timespec_to_time(&vap->va_mtime); 601 kflags |= NOTE_ATTRIB; 602 } 603 if (vap->va_mode != (mode_t)VNOVAL) { 604 mode_t cur_mode = ipdata->mode; 605 uid_t cur_uid = hammer2_to_unix_xid(&ipdata->uid); 606 gid_t cur_gid = hammer2_to_unix_xid(&ipdata->gid); 607 608 error = vop_helper_chmod(ap->a_vp, vap->va_mode, ap->a_cred, 609 cur_uid, cur_gid, &cur_mode); 610 if (error == 0 && ipdata->mode != cur_mode) { 611 ipdata = hammer2_chain_modify_ip(&trans, ip, &chain, 0); 612 ipdata->mode = cur_mode; 613 ipdata->ctime = ctime; 614 kflags |= NOTE_ATTRIB; 615 } 616 } 617 618 /* 619 * If a truncation occurred we must call inode_fsync() now in order 620 * to trim the related data chains, otherwise a later expansion can 621 * cause havoc. 622 */ 623 hammer2_inode_fsync(&trans, ip, &chain); 624 done: 625 hammer2_inode_unlock_ex(ip, chain); 626 hammer2_trans_done(&trans); 627 return (error); 628 } 629 630 static 631 int 632 hammer2_vop_readdir(struct vop_readdir_args *ap) 633 { 634 hammer2_inode_data_t *ipdata; 635 hammer2_inode_t *ip; 636 hammer2_inode_t *xip; 637 hammer2_chain_t *parent; 638 hammer2_chain_t *chain; 639 hammer2_chain_t *xchain; 640 hammer2_tid_t inum; 641 hammer2_key_t key_next; 642 hammer2_key_t lkey; 643 struct uio *uio; 644 off_t *cookies; 645 off_t saveoff; 646 int cookie_index; 647 int cache_index = -1; 648 int ncookies; 649 int error; 650 int dtype; 651 int r; 652 653 ip = VTOI(ap->a_vp); 654 uio = ap->a_uio; 655 saveoff = uio->uio_offset; 656 657 /* 658 * Setup cookies directory entry cookies if requested 659 */ 660 if (ap->a_ncookies) { 661 ncookies = uio->uio_resid / 16 + 1; 662 if (ncookies > 1024) 663 ncookies = 1024; 664 cookies = kmalloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK); 665 } else { 666 ncookies = -1; 667 cookies = NULL; 668 } 669 cookie_index = 0; 670 671 parent = hammer2_inode_lock_sh(ip); 672 ipdata = &parent->data->ipdata; 673 674 /* 675 * Handle artificial entries. To ensure that only positive 64 bit 676 * quantities are returned to userland we always strip off bit 63. 677 * The hash code is designed such that codes 0x0000-0x7FFF are not 678 * used, allowing us to use these codes for articial entries. 679 * 680 * Entry 0 is used for '.' and entry 1 is used for '..'. Do not 681 * allow '..' to cross the mount point into (e.g.) the super-root. 682 */ 683 error = 0; 684 chain = (void *)(intptr_t)-1; /* non-NULL for early goto done case */ 685 686 if (saveoff == 0) { 687 inum = ipdata->inum & HAMMER2_DIRHASH_USERMSK; 688 r = vop_write_dirent(&error, uio, inum, DT_DIR, 1, "."); 689 if (r) 690 goto done; 691 if (cookies) 692 cookies[cookie_index] = saveoff; 693 ++saveoff; 694 ++cookie_index; 695 if (cookie_index == ncookies) 696 goto done; 697 } 698 699 if (saveoff == 1) { 700 /* 701 * Be careful with lockorder when accessing ".." 702 * 703 * (ip is the current dir. xip is the parent dir). 704 */ 705 inum = ipdata->inum & HAMMER2_DIRHASH_USERMSK; 706 while (ip->pip != NULL && ip != ip->pmp->iroot) { 707 xip = ip->pip; 708 hammer2_inode_ref(xip); 709 hammer2_inode_unlock_sh(ip, parent); 710 xchain = hammer2_inode_lock_sh(xip); 711 parent = hammer2_inode_lock_sh(ip); 712 hammer2_inode_drop(xip); 713 if (xip == ip->pip) { 714 inum = xchain->data->ipdata.inum & 715 HAMMER2_DIRHASH_USERMSK; 716 hammer2_inode_unlock_sh(xip, xchain); 717 break; 718 } 719 hammer2_inode_unlock_sh(xip, xchain); 720 } 721 r = vop_write_dirent(&error, uio, inum, DT_DIR, 2, ".."); 722 if (r) 723 goto done; 724 if (cookies) 725 cookies[cookie_index] = saveoff; 726 ++saveoff; 727 ++cookie_index; 728 if (cookie_index == ncookies) 729 goto done; 730 } 731 732 lkey = saveoff | HAMMER2_DIRHASH_VISIBLE; 733 734 /* 735 * parent is the inode chain, already locked for us. Don't 736 * double lock shared locks as this will screw up upgrades. 737 */ 738 if (error) { 739 goto done; 740 } 741 chain = hammer2_chain_lookup(&parent, &key_next, lkey, lkey, 742 &cache_index, HAMMER2_LOOKUP_SHARED); 743 if (chain == NULL) { 744 chain = hammer2_chain_lookup(&parent, &key_next, 745 lkey, (hammer2_key_t)-1, 746 &cache_index, 747 HAMMER2_LOOKUP_SHARED); 748 } 749 while (chain) { 750 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 751 dtype = hammer2_get_dtype(chain); 752 saveoff = chain->bref.key & HAMMER2_DIRHASH_USERMSK; 753 r = vop_write_dirent(&error, uio, 754 chain->data->ipdata.inum & 755 HAMMER2_DIRHASH_USERMSK, 756 dtype, 757 chain->data->ipdata.name_len, 758 chain->data->ipdata.filename); 759 if (r) 760 break; 761 if (cookies) 762 cookies[cookie_index] = saveoff; 763 ++cookie_index; 764 } else { 765 /* XXX chain error */ 766 kprintf("bad chain type readdir %d\n", 767 chain->bref.type); 768 } 769 770 /* 771 * Keys may not be returned in order so once we have a 772 * placemarker (chain) the scan must allow the full range 773 * or some entries will be missed. 774 */ 775 chain = hammer2_chain_next(&parent, chain, &key_next, 776 key_next, (hammer2_key_t)-1, 777 &cache_index, HAMMER2_LOOKUP_SHARED); 778 if (chain) { 779 saveoff = (chain->bref.key & 780 HAMMER2_DIRHASH_USERMSK) + 1; 781 } else { 782 saveoff = (hammer2_key_t)-1; 783 } 784 if (cookie_index == ncookies) 785 break; 786 } 787 if (chain) 788 hammer2_chain_unlock(chain); 789 done: 790 hammer2_inode_unlock_sh(ip, parent); 791 if (ap->a_eofflag) 792 *ap->a_eofflag = (chain == NULL); 793 uio->uio_offset = saveoff & ~HAMMER2_DIRHASH_VISIBLE; 794 if (error && cookie_index == 0) { 795 if (cookies) { 796 kfree(cookies, M_TEMP); 797 *ap->a_ncookies = 0; 798 *ap->a_cookies = NULL; 799 } 800 } else { 801 if (cookies) { 802 *ap->a_ncookies = cookie_index; 803 *ap->a_cookies = cookies; 804 } 805 } 806 return (error); 807 } 808 809 /* 810 * hammer2_vop_readlink { vp, uio, cred } 811 */ 812 static 813 int 814 hammer2_vop_readlink(struct vop_readlink_args *ap) 815 { 816 struct vnode *vp; 817 hammer2_inode_t *ip; 818 int error; 819 820 vp = ap->a_vp; 821 if (vp->v_type != VLNK) 822 return (EINVAL); 823 ip = VTOI(vp); 824 825 error = hammer2_read_file(ip, ap->a_uio, 0); 826 return (error); 827 } 828 829 static 830 int 831 hammer2_vop_read(struct vop_read_args *ap) 832 { 833 struct vnode *vp; 834 hammer2_inode_t *ip; 835 struct uio *uio; 836 int error; 837 int seqcount; 838 int bigread; 839 840 /* 841 * Read operations supported on this vnode? 842 */ 843 vp = ap->a_vp; 844 if (vp->v_type != VREG) 845 return (EINVAL); 846 847 /* 848 * Misc 849 */ 850 ip = VTOI(vp); 851 uio = ap->a_uio; 852 error = 0; 853 854 seqcount = ap->a_ioflag >> 16; 855 bigread = (uio->uio_resid > 100 * 1024 * 1024); 856 857 error = hammer2_read_file(ip, uio, seqcount); 858 return (error); 859 } 860 861 static 862 int 863 hammer2_vop_write(struct vop_write_args *ap) 864 { 865 hammer2_inode_t *ip; 866 hammer2_trans_t trans; 867 thread_t td; 868 struct vnode *vp; 869 struct uio *uio; 870 int error; 871 int seqcount; 872 int bigwrite; 873 874 /* 875 * Read operations supported on this vnode? 876 */ 877 vp = ap->a_vp; 878 if (vp->v_type != VREG) 879 return (EINVAL); 880 881 /* 882 * Misc 883 */ 884 ip = VTOI(vp); 885 uio = ap->a_uio; 886 error = 0; 887 if (ip->pmp->ronly) 888 return (EROFS); 889 890 seqcount = ap->a_ioflag >> 16; 891 bigwrite = (uio->uio_resid > 100 * 1024 * 1024); 892 893 /* 894 * Check resource limit 895 */ 896 if (uio->uio_resid > 0 && (td = uio->uio_td) != NULL && td->td_proc && 897 uio->uio_offset + uio->uio_resid > 898 td->td_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 899 lwpsignal(td->td_proc, td->td_lwp, SIGXFSZ); 900 return (EFBIG); 901 } 902 903 bigwrite = (uio->uio_resid > 100 * 1024 * 1024); 904 905 /* 906 * The transaction interlocks against flushes initiations 907 * (note: but will run concurrently with the actual flush). 908 */ 909 hammer2_trans_init(&trans, ip->pmp, 0); 910 error = hammer2_write_file(ip, uio, ap->a_ioflag, seqcount); 911 hammer2_trans_done(&trans); 912 913 return (error); 914 } 915 916 /* 917 * Perform read operations on a file or symlink given an UNLOCKED 918 * inode and uio. 919 * 920 * The passed ip is not locked. 921 */ 922 static 923 int 924 hammer2_read_file(hammer2_inode_t *ip, struct uio *uio, int seqcount) 925 { 926 hammer2_off_t size; 927 struct buf *bp; 928 int error; 929 930 error = 0; 931 932 /* 933 * UIO read loop. 934 */ 935 ccms_thread_lock(&ip->topo_cst, CCMS_STATE_EXCLUSIVE); 936 size = ip->size; 937 ccms_thread_unlock(&ip->topo_cst); 938 939 while (uio->uio_resid > 0 && uio->uio_offset < size) { 940 hammer2_key_t lbase; 941 hammer2_key_t leof; 942 int lblksize; 943 int loff; 944 int n; 945 946 lblksize = hammer2_calc_logical(ip, uio->uio_offset, 947 &lbase, &leof); 948 949 error = cluster_read(ip->vp, leof, lbase, lblksize, 950 uio->uio_resid, seqcount * BKVASIZE, 951 &bp); 952 953 if (error) 954 break; 955 loff = (int)(uio->uio_offset - lbase); 956 n = lblksize - loff; 957 if (n > uio->uio_resid) 958 n = uio->uio_resid; 959 if (n > size - uio->uio_offset) 960 n = (int)(size - uio->uio_offset); 961 bp->b_flags |= B_AGE; 962 uiomove((char *)bp->b_data + loff, n, uio); 963 bqrelse(bp); 964 } 965 return (error); 966 } 967 968 /* 969 * Write to the file represented by the inode via the logical buffer cache. 970 * The inode may represent a regular file or a symlink. 971 * 972 * The inode must not be locked. 973 */ 974 static 975 int 976 hammer2_write_file(hammer2_inode_t *ip, 977 struct uio *uio, int ioflag, int seqcount) 978 { 979 hammer2_key_t old_eof; 980 hammer2_key_t new_eof; 981 struct buf *bp; 982 int kflags; 983 int error; 984 int modified; 985 986 /* 987 * Setup if append 988 */ 989 ccms_thread_lock(&ip->topo_cst, CCMS_STATE_EXCLUSIVE); 990 if (ioflag & IO_APPEND) 991 uio->uio_offset = ip->size; 992 old_eof = ip->size; 993 ccms_thread_unlock(&ip->topo_cst); 994 995 /* 996 * Extend the file if necessary. If the write fails at some point 997 * we will truncate it back down to cover as much as we were able 998 * to write. 999 * 1000 * Doing this now makes it easier to calculate buffer sizes in 1001 * the loop. 1002 */ 1003 kflags = 0; 1004 error = 0; 1005 modified = 0; 1006 1007 if (uio->uio_offset + uio->uio_resid > old_eof) { 1008 new_eof = uio->uio_offset + uio->uio_resid; 1009 modified = 1; 1010 hammer2_extend_file(ip, new_eof); 1011 kflags |= NOTE_EXTEND; 1012 } else { 1013 new_eof = old_eof; 1014 } 1015 1016 /* 1017 * UIO write loop 1018 */ 1019 while (uio->uio_resid > 0) { 1020 hammer2_key_t lbase; 1021 int trivial; 1022 int lblksize; 1023 int loff; 1024 int n; 1025 int rem_size; 1026 1027 /* 1028 * Don't allow the buffer build to blow out the buffer 1029 * cache. 1030 */ 1031 if ((ioflag & IO_RECURSE) == 0) 1032 bwillwrite(HAMMER2_PBUFSIZE); 1033 1034 /* 1035 * This nominally tells us how much we can cluster and 1036 * what the logical buffer size needs to be. Currently 1037 * we don't try to cluster the write and just handle one 1038 * block at a time. 1039 */ 1040 lblksize = hammer2_calc_logical(ip, uio->uio_offset, 1041 &lbase, NULL); 1042 loff = (int)(uio->uio_offset - lbase); 1043 1044 if (uio->uio_resid < lblksize) { 1045 rem_size = (int)uio->uio_resid; 1046 } 1047 else { 1048 rem_size = 0; 1049 } 1050 1051 KKASSERT(lblksize <= 65536); 1052 1053 /* 1054 * Calculate bytes to copy this transfer and whether the 1055 * copy completely covers the buffer or not. 1056 */ 1057 trivial = 0; 1058 n = lblksize - loff; 1059 if (n > uio->uio_resid) { 1060 n = uio->uio_resid; 1061 if (loff == lbase && uio->uio_offset + n == new_eof) 1062 trivial = 1; 1063 } else if (loff == 0) { 1064 trivial = 1; 1065 } 1066 1067 /* 1068 * Get the buffer 1069 */ 1070 if (uio->uio_segflg == UIO_NOCOPY) { 1071 /* 1072 * Issuing a write with the same data backing the 1073 * buffer. Instantiate the buffer to collect the 1074 * backing vm pages, then read-in any missing bits. 1075 * 1076 * This case is used by vop_stdputpages(). 1077 */ 1078 bp = getblk(ip->vp, lbase, lblksize, GETBLK_BHEAVY, 0); 1079 if ((bp->b_flags & B_CACHE) == 0) { 1080 bqrelse(bp); 1081 error = bread(ip->vp, lbase, lblksize, &bp); 1082 } 1083 } else if (trivial) { 1084 /* 1085 * Even though we are entirely overwriting the buffer 1086 * we may still have to zero it out to avoid a 1087 * mmap/write visibility issue. 1088 */ 1089 bp = getblk(ip->vp, lbase, lblksize, GETBLK_BHEAVY, 0); 1090 if ((bp->b_flags & B_CACHE) == 0) 1091 vfs_bio_clrbuf(bp); 1092 } else { 1093 /* 1094 * Partial overwrite, read in any missing bits then 1095 * replace the portion being written. 1096 * 1097 * (The strategy code will detect zero-fill physical 1098 * blocks for this case). 1099 */ 1100 error = bread(ip->vp, lbase, lblksize, &bp); 1101 if (error == 0) 1102 bheavy(bp); 1103 } 1104 1105 if (error) { 1106 brelse(bp); 1107 break; 1108 } 1109 1110 /* 1111 * Ok, copy the data in 1112 */ 1113 error = uiomove(bp->b_data + loff, n, uio); 1114 kflags |= NOTE_WRITE; 1115 modified = 1; 1116 if (error) { 1117 brelse(bp); 1118 break; 1119 } 1120 bdwrite(bp); 1121 if (error) 1122 break; 1123 } 1124 1125 /* 1126 * Cleanup. If we extended the file EOF but failed to write through 1127 * the entire write is a failure and we have to back-up. 1128 */ 1129 if (error && new_eof != old_eof) { 1130 hammer2_truncate_file(ip, old_eof); 1131 } else if (modified) { 1132 ccms_thread_lock(&ip->topo_cst, CCMS_STATE_EXCLUSIVE); 1133 hammer2_update_time(&ip->mtime); 1134 atomic_set_int(&ip->flags, HAMMER2_INODE_MTIME); 1135 ccms_thread_unlock(&ip->topo_cst); 1136 } 1137 atomic_set_int(&ip->flags, HAMMER2_INODE_MODIFIED); 1138 hammer2_knote(ip->vp, kflags); 1139 1140 return error; 1141 } 1142 1143 /* 1144 * Truncate the size of a file. The inode must not be locked. 1145 */ 1146 static 1147 void 1148 hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize) 1149 { 1150 hammer2_key_t lbase; 1151 int nblksize; 1152 1153 if (ip->vp) { 1154 nblksize = hammer2_calc_logical(ip, nsize, &lbase, NULL); 1155 nvtruncbuf(ip->vp, nsize, 1156 nblksize, (int)nsize & (nblksize - 1), 1157 0); 1158 } 1159 ccms_thread_lock(&ip->topo_cst, CCMS_STATE_EXCLUSIVE); 1160 ip->size = nsize; 1161 atomic_set_int(&ip->flags, HAMMER2_INODE_RESIZED); 1162 ccms_thread_unlock(&ip->topo_cst); 1163 } 1164 1165 /* 1166 * Extend the size of a file. The inode must not be locked. 1167 */ 1168 static 1169 void 1170 hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize) 1171 { 1172 hammer2_key_t lbase; 1173 hammer2_key_t osize; 1174 int oblksize; 1175 int nblksize; 1176 1177 ccms_thread_lock(&ip->topo_cst, CCMS_STATE_EXCLUSIVE); 1178 osize = ip->size; 1179 ip->size = nsize; 1180 ccms_thread_unlock(&ip->topo_cst); 1181 1182 if (ip->vp) { 1183 oblksize = hammer2_calc_logical(ip, osize, &lbase, NULL); 1184 nblksize = hammer2_calc_logical(ip, nsize, &lbase, NULL); 1185 nvextendbuf(ip->vp, 1186 osize, nsize, 1187 oblksize, nblksize, 1188 -1, -1, 0); 1189 } 1190 atomic_set_int(&ip->flags, HAMMER2_INODE_RESIZED); 1191 } 1192 1193 static 1194 int 1195 hammer2_vop_nresolve(struct vop_nresolve_args *ap) 1196 { 1197 hammer2_inode_t *ip; 1198 hammer2_inode_t *dip; 1199 hammer2_chain_t *parent; 1200 hammer2_chain_t *chain; 1201 hammer2_chain_t *ochain; 1202 hammer2_trans_t trans; 1203 hammer2_key_t key_next; 1204 hammer2_key_t lhc; 1205 struct namecache *ncp; 1206 const uint8_t *name; 1207 size_t name_len; 1208 int error = 0; 1209 int cache_index = -1; 1210 struct vnode *vp; 1211 1212 dip = VTOI(ap->a_dvp); 1213 ncp = ap->a_nch->ncp; 1214 name = ncp->nc_name; 1215 name_len = ncp->nc_nlen; 1216 lhc = hammer2_dirhash(name, name_len); 1217 1218 /* 1219 * Note: In DragonFly the kernel handles '.' and '..'. 1220 */ 1221 parent = hammer2_inode_lock_sh(dip); 1222 chain = hammer2_chain_lookup(&parent, &key_next, 1223 lhc, lhc + HAMMER2_DIRHASH_LOMASK, 1224 &cache_index, HAMMER2_LOOKUP_SHARED); 1225 while (chain) { 1226 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE && 1227 name_len == chain->data->ipdata.name_len && 1228 bcmp(name, chain->data->ipdata.filename, name_len) == 0) { 1229 break; 1230 } 1231 chain = hammer2_chain_next(&parent, chain, &key_next, 1232 key_next, 1233 lhc + HAMMER2_DIRHASH_LOMASK, 1234 &cache_index, HAMMER2_LOOKUP_SHARED); 1235 } 1236 hammer2_inode_unlock_sh(dip, parent); 1237 1238 /* 1239 * If the inode represents a forwarding entry for a hardlink we have 1240 * to locate the actual inode. The original ip is saved for possible 1241 * deconsolidation. (ip) will only be set to non-NULL when we have 1242 * to locate the real file via a hardlink. ip will be referenced but 1243 * not locked in that situation. chain is passed in locked and 1244 * returned locked. 1245 * 1246 * XXX what kind of chain lock? 1247 */ 1248 ochain = NULL; 1249 if (chain && chain->data->ipdata.type == HAMMER2_OBJTYPE_HARDLINK) { 1250 error = hammer2_hardlink_find(dip, &chain, &ochain); 1251 if (error) { 1252 kprintf("hammer2: unable to find hardlink\n"); 1253 if (chain) { 1254 hammer2_chain_unlock(chain); 1255 chain = NULL; 1256 } 1257 goto failed; 1258 } 1259 } 1260 1261 /* 1262 * Deconsolidate any hardlink whos nlinks == 1. Ignore errors. 1263 * If an error occurs chain and ip are left alone. 1264 * 1265 * XXX upgrade shared lock? 1266 */ 1267 if (ochain && chain && 1268 chain->data->ipdata.nlinks == 1 && !dip->pmp->ronly) { 1269 kprintf("hammer2: need to unconsolidate hardlink for %s\n", 1270 chain->data->ipdata.filename); 1271 /* XXX retain shared lock on dip? (currently not held) */ 1272 hammer2_trans_init(&trans, dip->pmp, 0); 1273 hammer2_hardlink_deconsolidate(&trans, dip, &chain, &ochain); 1274 hammer2_trans_done(&trans); 1275 } 1276 1277 /* 1278 * Acquire the related vnode 1279 * 1280 * NOTE: For error processing, only ENOENT resolves the namecache 1281 * entry to NULL, otherwise we just return the error and 1282 * leave the namecache unresolved. 1283 * 1284 * NOTE: multiple hammer2_inode structures can be aliased to the 1285 * same chain element, for example for hardlinks. This 1286 * use case does not 'reattach' inode associations that 1287 * might already exist, but always allocates a new one. 1288 * 1289 * WARNING: inode structure is locked exclusively via inode_get 1290 * but chain was locked shared. inode_unlock_ex() 1291 * will handle it properly. 1292 */ 1293 if (chain) { 1294 ip = hammer2_inode_get(dip->pmp, dip, chain); 1295 vp = hammer2_igetv(ip, &error); 1296 if (error == 0) { 1297 vn_unlock(vp); 1298 cache_setvp(ap->a_nch, vp); 1299 } else if (error == ENOENT) { 1300 cache_setvp(ap->a_nch, NULL); 1301 } 1302 hammer2_inode_unlock_ex(ip, chain); 1303 1304 /* 1305 * The vp should not be released until after we've disposed 1306 * of our locks, because it might cause vop_inactive() to 1307 * be called. 1308 */ 1309 if (vp) 1310 vrele(vp); 1311 } else { 1312 error = ENOENT; 1313 cache_setvp(ap->a_nch, NULL); 1314 } 1315 failed: 1316 KASSERT(error || ap->a_nch->ncp->nc_vp != NULL, 1317 ("resolve error %d/%p chain %p ap %p\n", 1318 error, ap->a_nch->ncp->nc_vp, chain, ap)); 1319 if (ochain) 1320 hammer2_chain_drop(ochain); 1321 return error; 1322 } 1323 1324 static 1325 int 1326 hammer2_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap) 1327 { 1328 hammer2_inode_t *dip; 1329 hammer2_inode_t *ip; 1330 hammer2_chain_t *parent; 1331 int error; 1332 1333 dip = VTOI(ap->a_dvp); 1334 1335 if ((ip = dip->pip) == NULL) { 1336 *ap->a_vpp = NULL; 1337 return ENOENT; 1338 } 1339 parent = hammer2_inode_lock_ex(ip); 1340 *ap->a_vpp = hammer2_igetv(ip, &error); 1341 hammer2_inode_unlock_ex(ip, parent); 1342 1343 return error; 1344 } 1345 1346 static 1347 int 1348 hammer2_vop_nmkdir(struct vop_nmkdir_args *ap) 1349 { 1350 hammer2_inode_t *dip; 1351 hammer2_inode_t *nip; 1352 hammer2_trans_t trans; 1353 hammer2_chain_t *chain; 1354 struct namecache *ncp; 1355 const uint8_t *name; 1356 size_t name_len; 1357 int error; 1358 1359 dip = VTOI(ap->a_dvp); 1360 if (dip->pmp->ronly) 1361 return (EROFS); 1362 1363 ncp = ap->a_nch->ncp; 1364 name = ncp->nc_name; 1365 name_len = ncp->nc_nlen; 1366 1367 hammer2_chain_memory_wait(dip->pmp); 1368 hammer2_trans_init(&trans, dip->pmp, 0); 1369 nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred, 1370 name, name_len, &chain, &error); 1371 if (error) { 1372 KKASSERT(nip == NULL); 1373 *ap->a_vpp = NULL; 1374 } else { 1375 *ap->a_vpp = hammer2_igetv(nip, &error); 1376 hammer2_inode_unlock_ex(nip, chain); 1377 } 1378 hammer2_trans_done(&trans); 1379 1380 if (error == 0) { 1381 cache_setunresolved(ap->a_nch); 1382 cache_setvp(ap->a_nch, *ap->a_vpp); 1383 } 1384 return error; 1385 } 1386 1387 /* 1388 * Return the largest contiguous physical disk range for the logical 1389 * request, in bytes. 1390 * 1391 * (struct vnode *vp, off_t loffset, off_t *doffsetp, int *runp, int *runb) 1392 */ 1393 static 1394 int 1395 hammer2_vop_bmap(struct vop_bmap_args *ap) 1396 { 1397 *ap->a_doffsetp = NOOFFSET; 1398 if (ap->a_runp) 1399 *ap->a_runp = 0; 1400 if (ap->a_runb) 1401 *ap->a_runb = 0; 1402 return (EOPNOTSUPP); 1403 #if 0 1404 struct vnode *vp; 1405 hammer2_inode_t *ip; 1406 hammer2_chain_t *parent; 1407 hammer2_chain_t *chain; 1408 hammer2_key_t key_next; 1409 hammer2_key_t lbeg; 1410 hammer2_key_t lend; 1411 hammer2_off_t pbeg; 1412 hammer2_off_t pbytes; 1413 hammer2_off_t array[HAMMER2_BMAP_COUNT][2]; 1414 int loff; 1415 int ai; 1416 int cache_index; 1417 1418 /* 1419 * Only supported on regular files 1420 * 1421 * Only supported for read operations (required for cluster_read). 1422 * The block allocation is delayed for write operations. 1423 */ 1424 vp = ap->a_vp; 1425 if (vp->v_type != VREG) 1426 return (EOPNOTSUPP); 1427 if (ap->a_cmd != BUF_CMD_READ) 1428 return (EOPNOTSUPP); 1429 1430 ip = VTOI(vp); 1431 bzero(array, sizeof(array)); 1432 1433 /* 1434 * Calculate logical range 1435 */ 1436 KKASSERT((ap->a_loffset & HAMMER2_LBUFMASK64) == 0); 1437 lbeg = ap->a_loffset & HAMMER2_OFF_MASK_HI; 1438 lend = lbeg + HAMMER2_BMAP_COUNT * HAMMER2_PBUFSIZE - 1; 1439 if (lend < lbeg) 1440 lend = lbeg; 1441 loff = ap->a_loffset & HAMMER2_OFF_MASK_LO; 1442 1443 parent = hammer2_inode_lock_sh(ip); 1444 chain = hammer2_chain_lookup(&parent, &key_next, 1445 lbeg, lend, 1446 &cache_index, 1447 HAMMER2_LOOKUP_NODATA | 1448 HAMMER2_LOOKUP_SHARED); 1449 if (chain == NULL) { 1450 *ap->a_doffsetp = ZFOFFSET; 1451 hammer2_inode_unlock_sh(ip, parent); 1452 return (0); 1453 } 1454 1455 while (chain) { 1456 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) { 1457 ai = (chain->bref.key - lbeg) / HAMMER2_PBUFSIZE; 1458 KKASSERT(ai >= 0 && ai < HAMMER2_BMAP_COUNT); 1459 array[ai][0] = chain->bref.data_off & HAMMER2_OFF_MASK; 1460 array[ai][1] = chain->bytes; 1461 } 1462 chain = hammer2_chain_next(&parent, chain, &key_next, 1463 key_next, lend, 1464 &cache_index, 1465 HAMMER2_LOOKUP_NODATA | 1466 HAMMER2_LOOKUP_SHARED); 1467 } 1468 hammer2_inode_unlock_sh(ip, parent); 1469 1470 /* 1471 * If the requested loffset is not mappable physically we can't 1472 * bmap. The caller will have to access the file data via a 1473 * device buffer. 1474 */ 1475 if (array[0][0] == 0 || array[0][1] < loff + HAMMER2_MINIOSIZE) { 1476 *ap->a_doffsetp = NOOFFSET; 1477 return (0); 1478 } 1479 1480 /* 1481 * Calculate the physical disk offset range for array[0] 1482 */ 1483 pbeg = array[0][0] + loff; 1484 pbytes = array[0][1] - loff; 1485 1486 for (ai = 1; ai < HAMMER2_BMAP_COUNT; ++ai) { 1487 if (array[ai][0] != pbeg + pbytes) 1488 break; 1489 pbytes += array[ai][1]; 1490 } 1491 1492 *ap->a_doffsetp = pbeg; 1493 if (ap->a_runp) 1494 *ap->a_runp = pbytes; 1495 return (0); 1496 #endif 1497 } 1498 1499 static 1500 int 1501 hammer2_vop_open(struct vop_open_args *ap) 1502 { 1503 return vop_stdopen(ap); 1504 } 1505 1506 /* 1507 * hammer2_vop_advlock { vp, id, op, fl, flags } 1508 */ 1509 static 1510 int 1511 hammer2_vop_advlock(struct vop_advlock_args *ap) 1512 { 1513 hammer2_inode_t *ip = VTOI(ap->a_vp); 1514 hammer2_chain_t *parent; 1515 hammer2_off_t size; 1516 1517 parent = hammer2_inode_lock_sh(ip); 1518 size = parent->data->ipdata.size; 1519 hammer2_inode_unlock_sh(ip, parent); 1520 return (lf_advlock(ap, &ip->advlock, size)); 1521 } 1522 1523 1524 static 1525 int 1526 hammer2_vop_close(struct vop_close_args *ap) 1527 { 1528 return vop_stdclose(ap); 1529 } 1530 1531 /* 1532 * hammer2_vop_nlink { nch, dvp, vp, cred } 1533 * 1534 * Create a hardlink from (vp) to {dvp, nch}. 1535 */ 1536 static 1537 int 1538 hammer2_vop_nlink(struct vop_nlink_args *ap) 1539 { 1540 hammer2_inode_t *dip; /* target directory to create link in */ 1541 hammer2_inode_t *ip; /* inode we are hardlinking to */ 1542 hammer2_chain_t *chain; 1543 hammer2_trans_t trans; 1544 struct namecache *ncp; 1545 const uint8_t *name; 1546 size_t name_len; 1547 int error; 1548 1549 dip = VTOI(ap->a_dvp); 1550 if (dip->pmp->ronly) 1551 return (EROFS); 1552 1553 ncp = ap->a_nch->ncp; 1554 name = ncp->nc_name; 1555 name_len = ncp->nc_nlen; 1556 1557 /* 1558 * ip represents the file being hardlinked. The file could be a 1559 * normal file or a hardlink target if it has already been hardlinked. 1560 * If ip is a hardlinked target then ip->pip represents the location 1561 * of the hardlinked target, NOT the location of the hardlink pointer. 1562 * 1563 * Bump nlinks and potentially also create or move the hardlink 1564 * target in the parent directory common to (ip) and (dip). The 1565 * consolidation code can modify ip->chain and ip->pip. The 1566 * returned chain is locked. 1567 */ 1568 ip = VTOI(ap->a_vp); 1569 hammer2_chain_memory_wait(ip->pmp); 1570 hammer2_trans_init(&trans, ip->pmp, 0); 1571 1572 chain = hammer2_inode_lock_ex(ip); 1573 error = hammer2_hardlink_consolidate(&trans, ip, &chain, dip, 1); 1574 if (error) 1575 goto done; 1576 1577 /* 1578 * Create a directory entry connected to the specified chain. 1579 * The hardlink consolidation code has already adjusted ip->pip 1580 * to the common parent directory containing the actual hardlink 1581 * 1582 * (which may be different from dip where we created our hardlink 1583 * entry. ip->chain always represents the actual hardlink and not 1584 * any of the pointers to the actual hardlink). 1585 */ 1586 error = hammer2_inode_connect(&trans, 1, 1587 dip, &chain, 1588 name, name_len); 1589 if (error == 0) { 1590 cache_setunresolved(ap->a_nch); 1591 cache_setvp(ap->a_nch, ap->a_vp); 1592 } 1593 done: 1594 hammer2_inode_unlock_ex(ip, chain); 1595 hammer2_trans_done(&trans); 1596 1597 return error; 1598 } 1599 1600 /* 1601 * hammer2_vop_ncreate { nch, dvp, vpp, cred, vap } 1602 * 1603 * The operating system has already ensured that the directory entry 1604 * does not exist and done all appropriate namespace locking. 1605 */ 1606 static 1607 int 1608 hammer2_vop_ncreate(struct vop_ncreate_args *ap) 1609 { 1610 hammer2_inode_t *dip; 1611 hammer2_inode_t *nip; 1612 hammer2_trans_t trans; 1613 hammer2_chain_t *nchain; 1614 struct namecache *ncp; 1615 const uint8_t *name; 1616 size_t name_len; 1617 int error; 1618 1619 dip = VTOI(ap->a_dvp); 1620 if (dip->pmp->ronly) 1621 return (EROFS); 1622 1623 ncp = ap->a_nch->ncp; 1624 name = ncp->nc_name; 1625 name_len = ncp->nc_nlen; 1626 hammer2_chain_memory_wait(dip->pmp); 1627 hammer2_trans_init(&trans, dip->pmp, 0); 1628 1629 nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred, 1630 name, name_len, &nchain, &error); 1631 if (error) { 1632 KKASSERT(nip == NULL); 1633 *ap->a_vpp = NULL; 1634 } else { 1635 *ap->a_vpp = hammer2_igetv(nip, &error); 1636 hammer2_inode_unlock_ex(nip, nchain); 1637 } 1638 hammer2_trans_done(&trans); 1639 1640 if (error == 0) { 1641 cache_setunresolved(ap->a_nch); 1642 cache_setvp(ap->a_nch, *ap->a_vpp); 1643 } 1644 return error; 1645 } 1646 1647 /* 1648 * hammer2_vop_nsymlink { nch, dvp, vpp, cred, vap, target } 1649 */ 1650 static 1651 int 1652 hammer2_vop_nsymlink(struct vop_nsymlink_args *ap) 1653 { 1654 hammer2_inode_t *dip; 1655 hammer2_inode_t *nip; 1656 hammer2_chain_t *nparent; 1657 hammer2_trans_t trans; 1658 struct namecache *ncp; 1659 const uint8_t *name; 1660 size_t name_len; 1661 int error; 1662 1663 dip = VTOI(ap->a_dvp); 1664 if (dip->pmp->ronly) 1665 return (EROFS); 1666 1667 ncp = ap->a_nch->ncp; 1668 name = ncp->nc_name; 1669 name_len = ncp->nc_nlen; 1670 hammer2_chain_memory_wait(dip->pmp); 1671 hammer2_trans_init(&trans, dip->pmp, 0); 1672 1673 ap->a_vap->va_type = VLNK; /* enforce type */ 1674 1675 nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred, 1676 name, name_len, &nparent, &error); 1677 if (error) { 1678 KKASSERT(nip == NULL); 1679 *ap->a_vpp = NULL; 1680 hammer2_trans_done(&trans); 1681 return error; 1682 } 1683 *ap->a_vpp = hammer2_igetv(nip, &error); 1684 1685 /* 1686 * Build the softlink (~like file data) and finalize the namecache. 1687 */ 1688 if (error == 0) { 1689 size_t bytes; 1690 struct uio auio; 1691 struct iovec aiov; 1692 hammer2_inode_data_t *nipdata; 1693 1694 nipdata = &nip->chain->data->ipdata; 1695 bytes = strlen(ap->a_target); 1696 1697 if (bytes <= HAMMER2_EMBEDDED_BYTES) { 1698 KKASSERT(nipdata->op_flags & 1699 HAMMER2_OPFLAG_DIRECTDATA); 1700 bcopy(ap->a_target, nipdata->u.data, bytes); 1701 nipdata->size = bytes; 1702 nip->size = bytes; 1703 hammer2_inode_unlock_ex(nip, nparent); 1704 } else { 1705 hammer2_inode_unlock_ex(nip, nparent); 1706 bzero(&auio, sizeof(auio)); 1707 bzero(&aiov, sizeof(aiov)); 1708 auio.uio_iov = &aiov; 1709 auio.uio_segflg = UIO_SYSSPACE; 1710 auio.uio_rw = UIO_WRITE; 1711 auio.uio_resid = bytes; 1712 auio.uio_iovcnt = 1; 1713 auio.uio_td = curthread; 1714 aiov.iov_base = ap->a_target; 1715 aiov.iov_len = bytes; 1716 error = hammer2_write_file(nip, &auio, IO_APPEND, 0); 1717 nipdata = &nip->chain->data->ipdata; /* RELOAD */ 1718 /* XXX handle error */ 1719 error = 0; 1720 } 1721 } else { 1722 hammer2_inode_unlock_ex(nip, nparent); 1723 } 1724 hammer2_trans_done(&trans); 1725 1726 /* 1727 * Finalize namecache 1728 */ 1729 if (error == 0) { 1730 cache_setunresolved(ap->a_nch); 1731 cache_setvp(ap->a_nch, *ap->a_vpp); 1732 /* hammer2_knote(ap->a_dvp, NOTE_WRITE); */ 1733 } 1734 return error; 1735 } 1736 1737 /* 1738 * hammer2_vop_nremove { nch, dvp, cred } 1739 */ 1740 static 1741 int 1742 hammer2_vop_nremove(struct vop_nremove_args *ap) 1743 { 1744 hammer2_inode_t *dip; 1745 hammer2_trans_t trans; 1746 struct namecache *ncp; 1747 const uint8_t *name; 1748 size_t name_len; 1749 int error; 1750 1751 dip = VTOI(ap->a_dvp); 1752 if (dip->pmp->ronly) 1753 return(EROFS); 1754 1755 ncp = ap->a_nch->ncp; 1756 name = ncp->nc_name; 1757 name_len = ncp->nc_nlen; 1758 hammer2_chain_memory_wait(dip->pmp); 1759 hammer2_trans_init(&trans, dip->pmp, 0); 1760 error = hammer2_unlink_file(&trans, dip, name, name_len, 0, NULL); 1761 hammer2_trans_done(&trans); 1762 if (error == 0) { 1763 cache_unlink(ap->a_nch); 1764 } 1765 return (error); 1766 } 1767 1768 /* 1769 * hammer2_vop_nrmdir { nch, dvp, cred } 1770 */ 1771 static 1772 int 1773 hammer2_vop_nrmdir(struct vop_nrmdir_args *ap) 1774 { 1775 hammer2_inode_t *dip; 1776 hammer2_trans_t trans; 1777 struct namecache *ncp; 1778 const uint8_t *name; 1779 size_t name_len; 1780 int error; 1781 1782 dip = VTOI(ap->a_dvp); 1783 if (dip->pmp->ronly) 1784 return(EROFS); 1785 1786 ncp = ap->a_nch->ncp; 1787 name = ncp->nc_name; 1788 name_len = ncp->nc_nlen; 1789 1790 hammer2_chain_memory_wait(dip->pmp); 1791 hammer2_trans_init(&trans, dip->pmp, 0); 1792 error = hammer2_unlink_file(&trans, dip, name, name_len, 1, NULL); 1793 hammer2_trans_done(&trans); 1794 if (error == 0) { 1795 cache_unlink(ap->a_nch); 1796 } 1797 return (error); 1798 } 1799 1800 /* 1801 * hammer2_vop_nrename { fnch, tnch, fdvp, tdvp, cred } 1802 */ 1803 static 1804 int 1805 hammer2_vop_nrename(struct vop_nrename_args *ap) 1806 { 1807 struct namecache *fncp; 1808 struct namecache *tncp; 1809 hammer2_inode_t *fdip; 1810 hammer2_inode_t *tdip; 1811 hammer2_inode_t *ip; 1812 hammer2_chain_t *chain; 1813 hammer2_trans_t trans; 1814 const uint8_t *fname; 1815 size_t fname_len; 1816 const uint8_t *tname; 1817 size_t tname_len; 1818 int error; 1819 int hlink; 1820 1821 if (ap->a_fdvp->v_mount != ap->a_tdvp->v_mount) 1822 return(EXDEV); 1823 if (ap->a_fdvp->v_mount != ap->a_fnch->ncp->nc_vp->v_mount) 1824 return(EXDEV); 1825 1826 fdip = VTOI(ap->a_fdvp); /* source directory */ 1827 tdip = VTOI(ap->a_tdvp); /* target directory */ 1828 1829 if (fdip->pmp->ronly) 1830 return(EROFS); 1831 1832 fncp = ap->a_fnch->ncp; /* entry name in source */ 1833 fname = fncp->nc_name; 1834 fname_len = fncp->nc_nlen; 1835 1836 tncp = ap->a_tnch->ncp; /* entry name in target */ 1837 tname = tncp->nc_name; 1838 tname_len = tncp->nc_nlen; 1839 1840 hammer2_chain_memory_wait(tdip->pmp); 1841 hammer2_trans_init(&trans, tdip->pmp, 0); 1842 1843 /* 1844 * ip is the inode being renamed. If this is a hardlink then 1845 * ip represents the actual file and not the hardlink marker. 1846 */ 1847 ip = VTOI(fncp->nc_vp); 1848 chain = NULL; 1849 1850 /* 1851 * Keep a tight grip on the inode so the temporary unlinking from 1852 * the source location prior to linking to the target location 1853 * does not cause the chain to be destroyed. 1854 * 1855 * NOTE: To avoid deadlocks we cannot lock (ip) while we are 1856 * unlinking elements from their directories. Locking 1857 * the nlinks field does not lock the whole inode. 1858 */ 1859 hammer2_inode_ref(ip); 1860 1861 /* 1862 * Remove target if it exists 1863 */ 1864 error = hammer2_unlink_file(&trans, tdip, tname, tname_len, -1, NULL); 1865 if (error && error != ENOENT) 1866 goto done; 1867 cache_setunresolved(ap->a_tnch); 1868 1869 /* 1870 * When renaming a hardlinked file we may have to re-consolidate 1871 * the location of the hardlink target. Since the element is simply 1872 * being moved, nlinks is not modified in this case. 1873 * 1874 * If ip represents a regular file the consolidation code essentially 1875 * does nothing other than return the same locked chain that was 1876 * passed in. 1877 * 1878 * The returned chain will be locked. 1879 * 1880 * WARNING! We do not currently have a local copy of ipdata but 1881 * we do use one later remember that it must be reloaded 1882 * on any modification to the inode, including connects. 1883 */ 1884 chain = hammer2_inode_lock_ex(ip); 1885 error = hammer2_hardlink_consolidate(&trans, ip, &chain, tdip, 0); 1886 if (error) 1887 goto done; 1888 1889 /* 1890 * Disconnect (fdip, fname) from the source directory. This will 1891 * disconnect (ip) if it represents a direct file. If (ip) represents 1892 * a hardlink the HARDLINK pointer object will be removed but the 1893 * hardlink will stay intact. 1894 * 1895 * The target chain may be marked DELETED but will not be destroyed 1896 * since we retain our hold on ip and chain. 1897 */ 1898 error = hammer2_unlink_file(&trans, fdip, fname, fname_len, -1, &hlink); 1899 KKASSERT(error != EAGAIN); 1900 if (error) 1901 goto done; 1902 1903 /* 1904 * Reconnect ip to target directory using chain. Chains cannot 1905 * actually be moved, so this will duplicate the chain in the new 1906 * spot and assign it to the ip, replacing the old chain. 1907 * 1908 * WARNING: chain locks can lock buffer cache buffers, to avoid 1909 * deadlocks we want to unlock before issuing a cache_*() 1910 * op (that might have to lock a vnode). 1911 */ 1912 error = hammer2_inode_connect(&trans, hlink, 1913 tdip, &chain, 1914 tname, tname_len); 1915 if (error == 0) { 1916 KKASSERT(chain != NULL); 1917 hammer2_inode_repoint(ip, (hlink ? ip->pip : tdip), chain); 1918 cache_rename(ap->a_fnch, ap->a_tnch); 1919 } 1920 done: 1921 hammer2_inode_unlock_ex(ip, chain); 1922 hammer2_inode_drop(ip); 1923 hammer2_trans_done(&trans); 1924 1925 return (error); 1926 } 1927 1928 /* 1929 * Strategy code 1930 * 1931 * WARNING: The strategy code cannot safely use hammer2 transactions 1932 * as this can deadlock against vfs_sync's vfsync() call 1933 * if multiple flushes are queued. 1934 */ 1935 static int hammer2_strategy_read(struct vop_strategy_args *ap); 1936 static int hammer2_strategy_write(struct vop_strategy_args *ap); 1937 static void hammer2_strategy_read_callback(hammer2_chain_t *chain, 1938 struct buf *dbp, char *data, void *arg); 1939 1940 static 1941 int 1942 hammer2_vop_strategy(struct vop_strategy_args *ap) 1943 { 1944 struct bio *biop; 1945 struct buf *bp; 1946 int error; 1947 1948 biop = ap->a_bio; 1949 bp = biop->bio_buf; 1950 1951 switch(bp->b_cmd) { 1952 case BUF_CMD_READ: 1953 error = hammer2_strategy_read(ap); 1954 ++hammer2_iod_file_read; 1955 break; 1956 case BUF_CMD_WRITE: 1957 error = hammer2_strategy_write(ap); 1958 ++hammer2_iod_file_write; 1959 break; 1960 default: 1961 bp->b_error = error = EINVAL; 1962 bp->b_flags |= B_ERROR; 1963 biodone(biop); 1964 break; 1965 } 1966 1967 return (error); 1968 } 1969 1970 static 1971 int 1972 hammer2_strategy_read(struct vop_strategy_args *ap) 1973 { 1974 struct buf *bp; 1975 struct bio *bio; 1976 struct bio *nbio; 1977 hammer2_inode_t *ip; 1978 hammer2_chain_t *parent; 1979 hammer2_chain_t *chain; 1980 hammer2_key_t key_dummy; 1981 hammer2_key_t lbase; 1982 int loff; 1983 int cache_index = -1; 1984 1985 bio = ap->a_bio; 1986 bp = bio->bio_buf; 1987 ip = VTOI(ap->a_vp); 1988 nbio = push_bio(bio); 1989 1990 lbase = bio->bio_offset; 1991 chain = NULL; 1992 KKASSERT(((int)lbase & HAMMER2_PBUFMASK) == 0); 1993 1994 parent = hammer2_inode_lock_sh(ip); 1995 chain = hammer2_chain_lookup(&parent, &key_dummy, 1996 lbase, lbase, 1997 &cache_index, 1998 HAMMER2_LOOKUP_NODATA | 1999 HAMMER2_LOOKUP_SHARED); 2000 2001 if (chain == NULL) { 2002 /* 2003 * Data is zero-fill 2004 */ 2005 bp->b_resid = 0; 2006 bp->b_error = 0; 2007 bzero(bp->b_data, bp->b_bcount); 2008 biodone(nbio); 2009 } else if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 2010 /* 2011 * Data is embedded in the inode (copy from inode). 2012 */ 2013 hammer2_chain_load_async(chain, hammer2_strategy_read_callback, 2014 nbio); 2015 } else if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) { 2016 /* 2017 * Data is on-media, issue device I/O and copy. 2018 * 2019 * XXX direct-IO shortcut could go here XXX. 2020 */ 2021 if (HAMMER2_DEC_COMP(chain->bref.methods) == HAMMER2_COMP_LZ4) { 2022 /* 2023 * Block compression is determined by bref.methods value. 2024 */ 2025 hammer2_blockref_t *bref; 2026 hammer2_off_t pbase; 2027 hammer2_off_t pmask; 2028 size_t psize; 2029 2030 bref = &chain->bref; 2031 psize = hammer2_devblksize(chain->bytes); 2032 pmask = (hammer2_off_t)psize - 1; 2033 pbase = bref->data_off & ~pmask; 2034 loff = (int)((bref->data_off & 2035 ~HAMMER2_OFF_MASK_RADIX) - pbase); 2036 nbio->bio_caller_info3.value = loff; 2037 breadcb(chain->hmp->devvp, pbase, psize, 2038 hammer2_decompress_LZ4_callback, nbio); 2039 /* XXX async read dev blk not protected by chain lk */ 2040 hammer2_chain_unlock(chain); 2041 } else if (HAMMER2_DEC_COMP(chain->bref.methods) == HAMMER2_COMP_ZLIB) { 2042 hammer2_blockref_t *bref; 2043 hammer2_off_t pbase; 2044 hammer2_off_t pmask; 2045 size_t psize; 2046 2047 bref = &chain->bref; 2048 psize = hammer2_devblksize(chain->bytes); 2049 pmask = (hammer2_off_t)psize - 1; 2050 pbase = bref->data_off & ~pmask; 2051 loff = (int)((bref->data_off & 2052 ~HAMMER2_OFF_MASK_RADIX) - pbase); 2053 nbio->bio_caller_info3.value = loff; 2054 breadcb(chain->hmp->devvp, pbase, psize, 2055 hammer2_decompress_ZLIB_callback, nbio); 2056 /* XXX async read dev blk not protected by chain lk */ 2057 hammer2_chain_unlock(chain); 2058 } 2059 else { 2060 hammer2_chain_load_async(chain, 2061 hammer2_strategy_read_callback, 2062 nbio); 2063 } 2064 } else { 2065 panic("READ PATH: hammer2_strategy_read: unknown bref type"); 2066 chain = NULL; 2067 } 2068 hammer2_inode_unlock_sh(ip, parent); 2069 return (0); 2070 } 2071 2072 /* 2073 * Read callback for block that is not compressed. 2074 */ 2075 static 2076 void 2077 hammer2_strategy_read_callback(hammer2_chain_t *chain, struct buf *dbp, 2078 char *data, void *arg) 2079 { 2080 struct bio *nbio = arg; 2081 struct buf *bp = nbio->bio_buf; 2082 2083 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 2084 /* 2085 * Data is embedded in the inode (copy from inode). 2086 */ 2087 bcopy(((hammer2_inode_data_t *)data)->u.data, 2088 bp->b_data, HAMMER2_EMBEDDED_BYTES); 2089 bzero(bp->b_data + HAMMER2_EMBEDDED_BYTES, 2090 bp->b_bcount - HAMMER2_EMBEDDED_BYTES); 2091 bp->b_resid = 0; 2092 bp->b_error = 0; 2093 hammer2_chain_unlock(chain); 2094 biodone(nbio); 2095 } else if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) { 2096 /* 2097 * Data is on-media, issue device I/O and copy. 2098 * 2099 * XXX direct-IO shortcut could go here XXX. 2100 */ 2101 KKASSERT(chain->bytes <= bp->b_bcount); 2102 bcopy(data, bp->b_data, chain->bytes); 2103 if (chain->bytes < bp->b_bcount); { 2104 bzero(bp->b_data + chain->bytes, 2105 bp->b_bcount - chain->bytes); 2106 } 2107 bp->b_flags |= B_NOTMETA; 2108 bp->b_resid = 0; 2109 bp->b_error = 0; 2110 hammer2_chain_unlock(chain); 2111 biodone(nbio); 2112 } else { 2113 if (dbp) 2114 bqrelse(dbp); 2115 panic("hammer2_strategy_read: unknown bref type"); 2116 /*hammer2_chain_unlock(chain);*/ 2117 /*chain = NULL;*/ 2118 } 2119 } 2120 2121 static 2122 int 2123 hammer2_strategy_write(struct vop_strategy_args *ap) 2124 { 2125 /* 2126 * XXX temporary because all write handling is currently 2127 * in the vop_write path (which is incorrect and won't catch 2128 * certain file modifications via mmap()). What we need 2129 * to do is have the strategy_write code queue the bio to 2130 * one or more support threads which will do the complex 2131 * logical->physical work and have the vop_write path just do 2132 * normal operations on the logical buffer. 2133 */ 2134 hammer2_mount_t *hmp; 2135 struct bio *bio; 2136 struct buf *bp; 2137 hammer2_inode_t *ip; 2138 2139 bio = ap->a_bio; 2140 bp = bio->bio_buf; 2141 ip = VTOI(ap->a_vp); 2142 hmp = ip->pmp->mount_cluster->hmp; 2143 2144 mtx_lock(&hmp->wthread_mtx); 2145 bioq_insert_tail(&hmp->wthread_bioq, ap->a_bio); 2146 wakeup(&hmp->wthread_bioq); 2147 mtx_unlock(&hmp->wthread_mtx); 2148 return(0); 2149 } 2150 2151 /* 2152 * hammer2_vop_ioctl { vp, command, data, fflag, cred } 2153 */ 2154 static 2155 int 2156 hammer2_vop_ioctl(struct vop_ioctl_args *ap) 2157 { 2158 hammer2_inode_t *ip; 2159 int error; 2160 2161 ip = VTOI(ap->a_vp); 2162 2163 error = hammer2_ioctl(ip, ap->a_command, (void *)ap->a_data, 2164 ap->a_fflag, ap->a_cred); 2165 return (error); 2166 } 2167 2168 static 2169 int 2170 hammer2_vop_mountctl(struct vop_mountctl_args *ap) 2171 { 2172 struct mount *mp; 2173 hammer2_pfsmount_t *pmp; 2174 int rc; 2175 2176 switch (ap->a_op) { 2177 case (MOUNTCTL_SET_EXPORT): 2178 mp = ap->a_head.a_ops->head.vv_mount; 2179 pmp = MPTOPMP(mp); 2180 2181 if (ap->a_ctllen != sizeof(struct export_args)) 2182 rc = (EINVAL); 2183 else 2184 rc = vfs_export(mp, &pmp->export, 2185 (const struct export_args *)ap->a_ctl); 2186 break; 2187 default: 2188 rc = vop_stdmountctl(ap); 2189 break; 2190 } 2191 return (rc); 2192 } 2193 2194 struct vop_ops hammer2_vnode_vops = { 2195 .vop_default = vop_defaultop, 2196 .vop_fsync = hammer2_vop_fsync, 2197 .vop_getpages = vop_stdgetpages, 2198 .vop_putpages = vop_stdputpages, 2199 .vop_access = hammer2_vop_access, 2200 .vop_advlock = hammer2_vop_advlock, 2201 .vop_close = hammer2_vop_close, 2202 .vop_nlink = hammer2_vop_nlink, 2203 .vop_ncreate = hammer2_vop_ncreate, 2204 .vop_nsymlink = hammer2_vop_nsymlink, 2205 .vop_nremove = hammer2_vop_nremove, 2206 .vop_nrmdir = hammer2_vop_nrmdir, 2207 .vop_nrename = hammer2_vop_nrename, 2208 .vop_getattr = hammer2_vop_getattr, 2209 .vop_setattr = hammer2_vop_setattr, 2210 .vop_readdir = hammer2_vop_readdir, 2211 .vop_readlink = hammer2_vop_readlink, 2212 .vop_getpages = vop_stdgetpages, 2213 .vop_putpages = vop_stdputpages, 2214 .vop_read = hammer2_vop_read, 2215 .vop_write = hammer2_vop_write, 2216 .vop_open = hammer2_vop_open, 2217 .vop_inactive = hammer2_vop_inactive, 2218 .vop_reclaim = hammer2_vop_reclaim, 2219 .vop_nresolve = hammer2_vop_nresolve, 2220 .vop_nlookupdotdot = hammer2_vop_nlookupdotdot, 2221 .vop_nmkdir = hammer2_vop_nmkdir, 2222 .vop_ioctl = hammer2_vop_ioctl, 2223 .vop_mountctl = hammer2_vop_mountctl, 2224 .vop_bmap = hammer2_vop_bmap, 2225 .vop_strategy = hammer2_vop_strategy, 2226 }; 2227 2228 struct vop_ops hammer2_spec_vops = { 2229 2230 }; 2231 2232 struct vop_ops hammer2_fifo_vops = { 2233 2234 }; 2235 2236