1 /* 2 * Copyright (c) 2011-2014 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 7 * by Daniel Flores (GSOC 2013 - mentored by Matthew Dillon, compression) 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in 17 * the documentation and/or other materials provided with the 18 * distribution. 19 * 3. Neither the name of The DragonFly Project nor the names of its 20 * contributors may be used to endorse or promote products derived 21 * from this software without specific, prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 26 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 27 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 33 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 /* 37 * Kernel Filesystem interface 38 * 39 * NOTE! local ipdata pointers must be reloaded on any modifying operation 40 * to the inode as its underlying chain may have changed. 41 */ 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/kernel.h> 46 #include <sys/fcntl.h> 47 #include <sys/buf.h> 48 #include <sys/proc.h> 49 #include <sys/namei.h> 50 #include <sys/mount.h> 51 #include <sys/vnode.h> 52 #include <sys/mountctl.h> 53 #include <sys/dirent.h> 54 #include <sys/uio.h> 55 #include <sys/objcache.h> 56 #include <sys/event.h> 57 #include <sys/file.h> 58 #include <vfs/fifofs/fifo.h> 59 60 #include "hammer2.h" 61 #include "hammer2_lz4.h" 62 63 #include "zlib/hammer2_zlib.h" 64 65 #define ZFOFFSET (-2LL) 66 67 static int hammer2_read_file(hammer2_inode_t *ip, struct uio *uio, 68 int seqcount); 69 static int hammer2_write_file(hammer2_inode_t *ip, struct uio *uio, 70 int ioflag, int seqcount); 71 static void hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize); 72 static void hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize); 73 74 struct objcache *cache_buffer_read; 75 struct objcache *cache_buffer_write; 76 77 /* 78 * Callback used in read path in case that a block is compressed with LZ4. 79 */ 80 static 81 void 82 hammer2_decompress_LZ4_callback(const char *data, u_int bytes, struct bio *bio) 83 { 84 struct buf *bp; 85 char *compressed_buffer; 86 int compressed_size; 87 int result; 88 89 bp = bio->bio_buf; 90 91 #if 0 92 if bio->bio_caller_info2.index && 93 bio->bio_caller_info1.uvalue32 != 94 crc32(bp->b_data, bp->b_bufsize) --- return error 95 #endif 96 97 KKASSERT(bp->b_bufsize <= HAMMER2_PBUFSIZE); 98 compressed_size = *(const int *)data; 99 KKASSERT(compressed_size <= bytes - sizeof(int)); 100 101 compressed_buffer = objcache_get(cache_buffer_read, M_INTWAIT); 102 result = LZ4_decompress_safe(__DECONST(char *, &data[sizeof(int)]), 103 compressed_buffer, 104 compressed_size, 105 bp->b_bufsize); 106 if (result < 0) { 107 kprintf("READ PATH: Error during decompression." 108 "bio %016jx/%d\n", 109 (intmax_t)bio->bio_offset, bytes); 110 /* make sure it isn't random garbage */ 111 bzero(compressed_buffer, bp->b_bufsize); 112 } 113 KKASSERT(result <= bp->b_bufsize); 114 bcopy(compressed_buffer, bp->b_data, bp->b_bufsize); 115 if (result < bp->b_bufsize) 116 bzero(bp->b_data + result, bp->b_bufsize - result); 117 objcache_put(cache_buffer_read, compressed_buffer); 118 bp->b_resid = 0; 119 bp->b_flags |= B_AGE; 120 } 121 122 /* 123 * Callback used in read path in case that a block is compressed with ZLIB. 124 * It is almost identical to LZ4 callback, so in theory they can be unified, 125 * but we didn't want to make changes in bio structure for that. 126 */ 127 static 128 void 129 hammer2_decompress_ZLIB_callback(const char *data, u_int bytes, struct bio *bio) 130 { 131 struct buf *bp; 132 char *compressed_buffer; 133 z_stream strm_decompress; 134 int result; 135 int ret; 136 137 bp = bio->bio_buf; 138 139 KKASSERT(bp->b_bufsize <= HAMMER2_PBUFSIZE); 140 strm_decompress.avail_in = 0; 141 strm_decompress.next_in = Z_NULL; 142 143 ret = inflateInit(&strm_decompress); 144 145 if (ret != Z_OK) 146 kprintf("HAMMER2 ZLIB: Fatal error in inflateInit.\n"); 147 148 compressed_buffer = objcache_get(cache_buffer_read, M_INTWAIT); 149 strm_decompress.next_in = __DECONST(char *, data); 150 151 /* XXX supply proper size, subset of device bp */ 152 strm_decompress.avail_in = bytes; 153 strm_decompress.next_out = compressed_buffer; 154 strm_decompress.avail_out = bp->b_bufsize; 155 156 ret = inflate(&strm_decompress, Z_FINISH); 157 if (ret != Z_STREAM_END) { 158 kprintf("HAMMER2 ZLIB: Fatar error during decompression.\n"); 159 bzero(compressed_buffer, bp->b_bufsize); 160 } 161 bcopy(compressed_buffer, bp->b_data, bp->b_bufsize); 162 result = bp->b_bufsize - strm_decompress.avail_out; 163 if (result < bp->b_bufsize) 164 bzero(bp->b_data + result, strm_decompress.avail_out); 165 objcache_put(cache_buffer_read, compressed_buffer); 166 ret = inflateEnd(&strm_decompress); 167 168 bp->b_resid = 0; 169 bp->b_flags |= B_AGE; 170 } 171 172 static __inline 173 void 174 hammer2_knote(struct vnode *vp, int flags) 175 { 176 if (flags) 177 KNOTE(&vp->v_pollinfo.vpi_kqinfo.ki_note, flags); 178 } 179 180 /* 181 * Last reference to a vnode is going away but it is still cached. 182 */ 183 static 184 int 185 hammer2_vop_inactive(struct vop_inactive_args *ap) 186 { 187 hammer2_inode_t *ip; 188 hammer2_cluster_t *cparent; 189 struct vnode *vp; 190 191 vp = ap->a_vp; 192 ip = VTOI(vp); 193 194 /* 195 * Degenerate case 196 */ 197 if (ip == NULL) { 198 vrecycle(vp); 199 return (0); 200 } 201 202 /* 203 * Detect updates to the embedded data which may be synchronized by 204 * the strategy code. Simply mark the inode modified so it gets 205 * picked up by our normal flush. 206 */ 207 cparent = hammer2_inode_lock_ex(ip); 208 KKASSERT(cparent); 209 210 /* 211 * Check for deleted inodes and recycle immediately. 212 */ 213 if (hammer2_cluster_unlinked(cparent) & HAMMER2_CHAIN_UNLINKED) { 214 hammer2_inode_unlock_ex(ip, cparent); 215 vrecycle(vp); 216 } else { 217 hammer2_inode_unlock_ex(ip, cparent); 218 } 219 return (0); 220 } 221 222 /* 223 * Reclaim a vnode so that it can be reused; after the inode is 224 * disassociated, the filesystem must manage it alone. 225 */ 226 static 227 int 228 hammer2_vop_reclaim(struct vop_reclaim_args *ap) 229 { 230 hammer2_cluster_t *cluster; 231 hammer2_inode_t *ip; 232 hammer2_pfsmount_t *pmp; 233 struct vnode *vp; 234 235 vp = ap->a_vp; 236 ip = VTOI(vp); 237 if (ip == NULL) 238 return(0); 239 240 /* 241 * Inode must be locked for reclaim. 242 */ 243 pmp = ip->pmp; 244 cluster = hammer2_inode_lock_ex(ip); 245 246 /* 247 * The final close of a deleted file or directory marks it for 248 * destruction. The DELETED flag allows the flusher to shortcut 249 * any modified blocks still unflushed (that is, just ignore them). 250 * 251 * HAMMER2 usually does not try to optimize the freemap by returning 252 * deleted blocks to it as it does not usually know how many snapshots 253 * might be referencing portions of the file/dir. 254 */ 255 vp->v_data = NULL; 256 ip->vp = NULL; 257 258 /* 259 * NOTE! We do not attempt to flush chains here, flushing is 260 * really fragile and could also deadlock. 261 */ 262 vclrisdirty(vp); 263 264 /* 265 * A reclaim can occur at any time so we cannot safely start a 266 * transaction to handle reclamation of unlinked files. Instead, 267 * the ip is left with a reference and placed on a linked list and 268 * handled later on. 269 */ 270 if (hammer2_cluster_unlinked(cluster)) { 271 hammer2_inode_unlink_t *ipul; 272 273 ipul = kmalloc(sizeof(*ipul), pmp->minode, M_WAITOK | M_ZERO); 274 ipul->ip = ip; 275 276 spin_lock(&pmp->unlinkq_spin); 277 TAILQ_INSERT_TAIL(&pmp->unlinkq, ipul, entry); 278 spin_unlock(&pmp->unlinkq_spin); 279 hammer2_inode_unlock_ex(ip, cluster); /* unlock */ 280 /* retain ref from vp for ipul */ 281 } else { 282 hammer2_inode_unlock_ex(ip, cluster); /* unlock */ 283 hammer2_inode_drop(ip); /* vp ref */ 284 } 285 /* cluster no longer referenced */ 286 /* cluster = NULL; not needed */ 287 288 /* 289 * XXX handle background sync when ip dirty, kernel will no longer 290 * notify us regarding this inode because there is no longer a 291 * vnode attached to it. 292 */ 293 294 return (0); 295 } 296 297 static 298 int 299 hammer2_vop_fsync(struct vop_fsync_args *ap) 300 { 301 hammer2_inode_t *ip; 302 hammer2_trans_t trans; 303 hammer2_cluster_t *cluster; 304 struct vnode *vp; 305 306 vp = ap->a_vp; 307 ip = VTOI(vp); 308 309 #if 0 310 /* XXX can't do this yet */ 311 hammer2_trans_init(&trans, ip->pmp, HAMMER2_TRANS_ISFLUSH); 312 vfsync(vp, ap->a_waitfor, 1, NULL, NULL); 313 #endif 314 hammer2_trans_init(&trans, ip->pmp, 0); 315 vfsync(vp, ap->a_waitfor, 1, NULL, NULL); 316 317 /* 318 * Calling chain_flush here creates a lot of duplicative 319 * COW operations due to non-optimal vnode ordering. 320 * 321 * Only do it for an actual fsync() syscall. The other forms 322 * which call this function will eventually call chain_flush 323 * on the volume root as a catch-all, which is far more optimal. 324 */ 325 cluster = hammer2_inode_lock_ex(ip); 326 atomic_clear_int(&ip->flags, HAMMER2_INODE_MODIFIED); 327 vclrisdirty(vp); 328 if (ip->flags & (HAMMER2_INODE_RESIZED|HAMMER2_INODE_MTIME)) 329 hammer2_inode_fsync(&trans, ip, cluster); 330 331 #if 0 332 /* 333 * XXX creates discontinuity w/modify_tid 334 */ 335 if (ap->a_flags & VOP_FSYNC_SYSCALL) { 336 hammer2_flush(&trans, cluster); 337 } 338 #endif 339 hammer2_inode_unlock_ex(ip, cluster); 340 hammer2_trans_done(&trans); 341 342 return (0); 343 } 344 345 static 346 int 347 hammer2_vop_access(struct vop_access_args *ap) 348 { 349 hammer2_inode_t *ip = VTOI(ap->a_vp); 350 const hammer2_inode_data_t *ipdata; 351 hammer2_cluster_t *cluster; 352 uid_t uid; 353 gid_t gid; 354 int error; 355 356 cluster = hammer2_inode_lock_sh(ip); 357 ipdata = &hammer2_cluster_data(cluster)->ipdata; 358 uid = hammer2_to_unix_xid(&ipdata->uid); 359 gid = hammer2_to_unix_xid(&ipdata->gid); 360 error = vop_helper_access(ap, uid, gid, ipdata->mode, ipdata->uflags); 361 hammer2_inode_unlock_sh(ip, cluster); 362 363 return (error); 364 } 365 366 static 367 int 368 hammer2_vop_getattr(struct vop_getattr_args *ap) 369 { 370 const hammer2_inode_data_t *ipdata; 371 hammer2_cluster_t *cluster; 372 hammer2_pfsmount_t *pmp; 373 hammer2_inode_t *ip; 374 struct vnode *vp; 375 struct vattr *vap; 376 377 vp = ap->a_vp; 378 vap = ap->a_vap; 379 380 ip = VTOI(vp); 381 pmp = ip->pmp; 382 383 cluster = hammer2_inode_lock_sh(ip); 384 ipdata = &hammer2_cluster_data(cluster)->ipdata; 385 KKASSERT(hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE); 386 387 vap->va_fsid = pmp->mp->mnt_stat.f_fsid.val[0]; 388 vap->va_fileid = ipdata->inum; 389 vap->va_mode = ipdata->mode; 390 vap->va_nlink = ipdata->nlinks; 391 vap->va_uid = hammer2_to_unix_xid(&ipdata->uid); 392 vap->va_gid = hammer2_to_unix_xid(&ipdata->gid); 393 vap->va_rmajor = 0; 394 vap->va_rminor = 0; 395 vap->va_size = ip->size; /* protected by shared lock */ 396 vap->va_blocksize = HAMMER2_PBUFSIZE; 397 vap->va_flags = ipdata->uflags; 398 hammer2_time_to_timespec(ipdata->ctime, &vap->va_ctime); 399 hammer2_time_to_timespec(ipdata->mtime, &vap->va_mtime); 400 hammer2_time_to_timespec(ipdata->mtime, &vap->va_atime); 401 vap->va_gen = 1; 402 vap->va_bytes = vap->va_size; /* XXX */ 403 vap->va_type = hammer2_get_vtype(ipdata); 404 vap->va_filerev = 0; 405 vap->va_uid_uuid = ipdata->uid; 406 vap->va_gid_uuid = ipdata->gid; 407 vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID | 408 VA_FSID_UUID_VALID; 409 410 hammer2_inode_unlock_sh(ip, cluster); 411 412 return (0); 413 } 414 415 static 416 int 417 hammer2_vop_setattr(struct vop_setattr_args *ap) 418 { 419 const hammer2_inode_data_t *ripdata; 420 hammer2_inode_data_t *wipdata; 421 hammer2_inode_t *ip; 422 hammer2_cluster_t *cluster; 423 hammer2_trans_t trans; 424 struct vnode *vp; 425 struct vattr *vap; 426 int error; 427 int kflags = 0; 428 int domtime = 0; 429 int dosync = 0; 430 uint64_t ctime; 431 432 vp = ap->a_vp; 433 vap = ap->a_vap; 434 hammer2_update_time(&ctime); 435 436 ip = VTOI(vp); 437 438 if (ip->pmp->ronly) 439 return(EROFS); 440 441 hammer2_pfs_memory_wait(ip->pmp); 442 hammer2_trans_init(&trans, ip->pmp, 0); 443 cluster = hammer2_inode_lock_ex(ip); 444 ripdata = &hammer2_cluster_data(cluster)->ipdata; 445 error = 0; 446 447 if (vap->va_flags != VNOVAL) { 448 u_int32_t flags; 449 450 flags = ripdata->uflags; 451 error = vop_helper_setattr_flags(&flags, vap->va_flags, 452 hammer2_to_unix_xid(&ripdata->uid), 453 ap->a_cred); 454 if (error == 0) { 455 if (ripdata->uflags != flags) { 456 wipdata = hammer2_cluster_modify_ip(&trans, ip, 457 cluster, 0); 458 wipdata->uflags = flags; 459 wipdata->ctime = ctime; 460 kflags |= NOTE_ATTRIB; 461 dosync = 1; 462 ripdata = wipdata; 463 } 464 if (ripdata->uflags & (IMMUTABLE | APPEND)) { 465 error = 0; 466 goto done; 467 } 468 } 469 goto done; 470 } 471 if (ripdata->uflags & (IMMUTABLE | APPEND)) { 472 error = EPERM; 473 goto done; 474 } 475 if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { 476 mode_t cur_mode = ripdata->mode; 477 uid_t cur_uid = hammer2_to_unix_xid(&ripdata->uid); 478 gid_t cur_gid = hammer2_to_unix_xid(&ripdata->gid); 479 uuid_t uuid_uid; 480 uuid_t uuid_gid; 481 482 error = vop_helper_chown(ap->a_vp, vap->va_uid, vap->va_gid, 483 ap->a_cred, 484 &cur_uid, &cur_gid, &cur_mode); 485 if (error == 0) { 486 hammer2_guid_to_uuid(&uuid_uid, cur_uid); 487 hammer2_guid_to_uuid(&uuid_gid, cur_gid); 488 if (bcmp(&uuid_uid, &ripdata->uid, sizeof(uuid_uid)) || 489 bcmp(&uuid_gid, &ripdata->gid, sizeof(uuid_gid)) || 490 ripdata->mode != cur_mode 491 ) { 492 wipdata = hammer2_cluster_modify_ip(&trans, ip, 493 cluster, 0); 494 wipdata->uid = uuid_uid; 495 wipdata->gid = uuid_gid; 496 wipdata->mode = cur_mode; 497 wipdata->ctime = ctime; 498 dosync = 1; 499 ripdata = wipdata; 500 } 501 kflags |= NOTE_ATTRIB; 502 } 503 } 504 505 /* 506 * Resize the file 507 */ 508 if (vap->va_size != VNOVAL && ip->size != vap->va_size) { 509 switch(vp->v_type) { 510 case VREG: 511 if (vap->va_size == ip->size) 512 break; 513 hammer2_inode_unlock_ex(ip, cluster); 514 if (vap->va_size < ip->size) { 515 hammer2_truncate_file(ip, vap->va_size); 516 } else { 517 hammer2_extend_file(ip, vap->va_size); 518 } 519 cluster = hammer2_inode_lock_ex(ip); 520 /* RELOAD */ 521 ripdata = &hammer2_cluster_data(cluster)->ipdata; 522 domtime = 1; 523 break; 524 default: 525 error = EINVAL; 526 goto done; 527 } 528 } 529 #if 0 530 /* atime not supported */ 531 if (vap->va_atime.tv_sec != VNOVAL) { 532 wipdata = hammer2_cluster_modify_ip(&trans, ip, cluster, 0); 533 wipdata->atime = hammer2_timespec_to_time(&vap->va_atime); 534 kflags |= NOTE_ATTRIB; 535 dosync = 1; 536 ripdata = wipdata; 537 } 538 #endif 539 if (vap->va_mtime.tv_sec != VNOVAL) { 540 wipdata = hammer2_cluster_modify_ip(&trans, ip, cluster, 0); 541 wipdata->mtime = hammer2_timespec_to_time(&vap->va_mtime); 542 kflags |= NOTE_ATTRIB; 543 domtime = 0; 544 dosync = 1; 545 ripdata = wipdata; 546 } 547 if (vap->va_mode != (mode_t)VNOVAL) { 548 mode_t cur_mode = ripdata->mode; 549 uid_t cur_uid = hammer2_to_unix_xid(&ripdata->uid); 550 gid_t cur_gid = hammer2_to_unix_xid(&ripdata->gid); 551 552 error = vop_helper_chmod(ap->a_vp, vap->va_mode, ap->a_cred, 553 cur_uid, cur_gid, &cur_mode); 554 if (error == 0 && ripdata->mode != cur_mode) { 555 wipdata = hammer2_cluster_modify_ip(&trans, ip, 556 cluster, 0); 557 wipdata->mode = cur_mode; 558 wipdata->ctime = ctime; 559 kflags |= NOTE_ATTRIB; 560 dosync = 1; 561 ripdata = wipdata; 562 } 563 } 564 565 /* 566 * If a truncation occurred we must call inode_fsync() now in order 567 * to trim the related data chains, otherwise a later expansion can 568 * cause havoc. 569 */ 570 if (dosync) { 571 hammer2_cluster_modsync(cluster); 572 dosync = 0; 573 } 574 hammer2_inode_fsync(&trans, ip, cluster); 575 576 /* 577 * Cleanup. If domtime is set an additional inode modification 578 * must be flagged. All other modifications will have already 579 * set INODE_MODIFIED and called vsetisdirty(). 580 */ 581 done: 582 if (domtime) { 583 atomic_set_int(&ip->flags, HAMMER2_INODE_MODIFIED | 584 HAMMER2_INODE_MTIME); 585 vsetisdirty(ip->vp); 586 } 587 if (dosync) 588 hammer2_cluster_modsync(cluster); 589 hammer2_inode_unlock_ex(ip, cluster); 590 hammer2_trans_done(&trans); 591 hammer2_knote(ip->vp, kflags); 592 593 return (error); 594 } 595 596 static 597 int 598 hammer2_vop_readdir(struct vop_readdir_args *ap) 599 { 600 const hammer2_inode_data_t *ipdata; 601 hammer2_inode_t *ip; 602 hammer2_inode_t *xip; 603 hammer2_cluster_t *cparent; 604 hammer2_cluster_t *cluster; 605 hammer2_cluster_t *xcluster; 606 hammer2_blockref_t bref; 607 hammer2_tid_t inum; 608 hammer2_key_t key_next; 609 hammer2_key_t lkey; 610 struct uio *uio; 611 off_t *cookies; 612 off_t saveoff; 613 int cookie_index; 614 int ncookies; 615 int error; 616 int dtype; 617 int ddflag; 618 int r; 619 620 ip = VTOI(ap->a_vp); 621 uio = ap->a_uio; 622 saveoff = uio->uio_offset; 623 624 /* 625 * Setup cookies directory entry cookies if requested 626 */ 627 if (ap->a_ncookies) { 628 ncookies = uio->uio_resid / 16 + 1; 629 if (ncookies > 1024) 630 ncookies = 1024; 631 cookies = kmalloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK); 632 } else { 633 ncookies = -1; 634 cookies = NULL; 635 } 636 cookie_index = 0; 637 638 cparent = hammer2_inode_lock_sh(ip); 639 ipdata = &hammer2_cluster_data(cparent)->ipdata; 640 641 /* 642 * Handle artificial entries. To ensure that only positive 64 bit 643 * quantities are returned to userland we always strip off bit 63. 644 * The hash code is designed such that codes 0x0000-0x7FFF are not 645 * used, allowing us to use these codes for articial entries. 646 * 647 * Entry 0 is used for '.' and entry 1 is used for '..'. Do not 648 * allow '..' to cross the mount point into (e.g.) the super-root. 649 */ 650 error = 0; 651 cluster = (void *)(intptr_t)-1; /* non-NULL for early goto done case */ 652 653 if (saveoff == 0) { 654 inum = ipdata->inum & HAMMER2_DIRHASH_USERMSK; 655 r = vop_write_dirent(&error, uio, inum, DT_DIR, 1, "."); 656 if (r) 657 goto done; 658 if (cookies) 659 cookies[cookie_index] = saveoff; 660 ++saveoff; 661 ++cookie_index; 662 if (cookie_index == ncookies) 663 goto done; 664 } 665 666 if (saveoff == 1) { 667 /* 668 * Be careful with lockorder when accessing ".." 669 * 670 * (ip is the current dir. xip is the parent dir). 671 */ 672 inum = ipdata->inum & HAMMER2_DIRHASH_USERMSK; 673 while (ip->pip != NULL && ip != ip->pmp->iroot) { 674 xip = ip->pip; 675 hammer2_inode_ref(xip); 676 hammer2_inode_unlock_sh(ip, cparent); 677 xcluster = hammer2_inode_lock_sh(xip); 678 cparent = hammer2_inode_lock_sh(ip); 679 hammer2_inode_drop(xip); 680 ipdata = &hammer2_cluster_data(cparent)->ipdata; 681 if (xip == ip->pip) { 682 inum = hammer2_cluster_data(xcluster)-> 683 ipdata.inum & HAMMER2_DIRHASH_USERMSK; 684 hammer2_inode_unlock_sh(xip, xcluster); 685 break; 686 } 687 hammer2_inode_unlock_sh(xip, xcluster); 688 } 689 r = vop_write_dirent(&error, uio, inum, DT_DIR, 2, ".."); 690 if (r) 691 goto done; 692 if (cookies) 693 cookies[cookie_index] = saveoff; 694 ++saveoff; 695 ++cookie_index; 696 if (cookie_index == ncookies) 697 goto done; 698 } 699 700 lkey = saveoff | HAMMER2_DIRHASH_VISIBLE; 701 if (hammer2_debug & 0x0020) 702 kprintf("readdir: lkey %016jx\n", lkey); 703 704 /* 705 * parent is the inode cluster, already locked for us. Don't 706 * double lock shared locks as this will screw up upgrades. 707 */ 708 if (error) { 709 goto done; 710 } 711 cluster = hammer2_cluster_lookup(cparent, &key_next, lkey, lkey, 712 HAMMER2_LOOKUP_SHARED, &ddflag); 713 if (cluster == NULL) { 714 cluster = hammer2_cluster_lookup(cparent, &key_next, 715 lkey, (hammer2_key_t)-1, 716 HAMMER2_LOOKUP_SHARED, &ddflag); 717 } 718 if (cluster) 719 hammer2_cluster_bref(cluster, &bref); 720 while (cluster) { 721 if (hammer2_debug & 0x0020) 722 kprintf("readdir: p=%p chain=%p %016jx (next %016jx)\n", 723 cparent->focus, cluster->focus, 724 bref.key, key_next); 725 726 if (bref.type == HAMMER2_BREF_TYPE_INODE) { 727 ipdata = &hammer2_cluster_data(cluster)->ipdata; 728 dtype = hammer2_get_dtype(ipdata); 729 saveoff = bref.key & HAMMER2_DIRHASH_USERMSK; 730 r = vop_write_dirent(&error, uio, 731 ipdata->inum & 732 HAMMER2_DIRHASH_USERMSK, 733 dtype, 734 ipdata->name_len, 735 ipdata->filename); 736 if (r) 737 break; 738 if (cookies) 739 cookies[cookie_index] = saveoff; 740 ++cookie_index; 741 } else { 742 /* XXX chain error */ 743 kprintf("bad chain type readdir %d\n", bref.type); 744 } 745 746 /* 747 * Keys may not be returned in order so once we have a 748 * placemarker (cluster) the scan must allow the full range 749 * or some entries will be missed. 750 */ 751 cluster = hammer2_cluster_next(cparent, cluster, &key_next, 752 key_next, (hammer2_key_t)-1, 753 HAMMER2_LOOKUP_SHARED); 754 if (cluster) { 755 hammer2_cluster_bref(cluster, &bref); 756 saveoff = (bref.key & HAMMER2_DIRHASH_USERMSK) + 1; 757 } else { 758 saveoff = (hammer2_key_t)-1; 759 } 760 if (cookie_index == ncookies) 761 break; 762 } 763 if (cluster) 764 hammer2_cluster_unlock(cluster); 765 done: 766 hammer2_inode_unlock_sh(ip, cparent); 767 if (ap->a_eofflag) 768 *ap->a_eofflag = (cluster == NULL); 769 if (hammer2_debug & 0x0020) 770 kprintf("readdir: done at %016jx\n", saveoff); 771 uio->uio_offset = saveoff & ~HAMMER2_DIRHASH_VISIBLE; 772 if (error && cookie_index == 0) { 773 if (cookies) { 774 kfree(cookies, M_TEMP); 775 *ap->a_ncookies = 0; 776 *ap->a_cookies = NULL; 777 } 778 } else { 779 if (cookies) { 780 *ap->a_ncookies = cookie_index; 781 *ap->a_cookies = cookies; 782 } 783 } 784 return (error); 785 } 786 787 /* 788 * hammer2_vop_readlink { vp, uio, cred } 789 */ 790 static 791 int 792 hammer2_vop_readlink(struct vop_readlink_args *ap) 793 { 794 struct vnode *vp; 795 hammer2_inode_t *ip; 796 int error; 797 798 vp = ap->a_vp; 799 if (vp->v_type != VLNK) 800 return (EINVAL); 801 ip = VTOI(vp); 802 803 error = hammer2_read_file(ip, ap->a_uio, 0); 804 return (error); 805 } 806 807 static 808 int 809 hammer2_vop_read(struct vop_read_args *ap) 810 { 811 struct vnode *vp; 812 hammer2_inode_t *ip; 813 struct uio *uio; 814 int error; 815 int seqcount; 816 int bigread; 817 818 /* 819 * Read operations supported on this vnode? 820 */ 821 vp = ap->a_vp; 822 if (vp->v_type != VREG) 823 return (EINVAL); 824 825 /* 826 * Misc 827 */ 828 ip = VTOI(vp); 829 uio = ap->a_uio; 830 error = 0; 831 832 seqcount = ap->a_ioflag >> 16; 833 bigread = (uio->uio_resid > 100 * 1024 * 1024); 834 835 error = hammer2_read_file(ip, uio, seqcount); 836 return (error); 837 } 838 839 static 840 int 841 hammer2_vop_write(struct vop_write_args *ap) 842 { 843 hammer2_inode_t *ip; 844 hammer2_trans_t trans; 845 thread_t td; 846 struct vnode *vp; 847 struct uio *uio; 848 int error; 849 int seqcount; 850 int bigwrite; 851 852 /* 853 * Read operations supported on this vnode? 854 */ 855 vp = ap->a_vp; 856 if (vp->v_type != VREG) 857 return (EINVAL); 858 859 /* 860 * Misc 861 */ 862 ip = VTOI(vp); 863 uio = ap->a_uio; 864 error = 0; 865 if (ip->pmp->ronly) 866 return (EROFS); 867 868 seqcount = ap->a_ioflag >> 16; 869 bigwrite = (uio->uio_resid > 100 * 1024 * 1024); 870 871 /* 872 * Check resource limit 873 */ 874 if (uio->uio_resid > 0 && (td = uio->uio_td) != NULL && td->td_proc && 875 uio->uio_offset + uio->uio_resid > 876 td->td_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 877 lwpsignal(td->td_proc, td->td_lwp, SIGXFSZ); 878 return (EFBIG); 879 } 880 881 bigwrite = (uio->uio_resid > 100 * 1024 * 1024); 882 883 /* 884 * The transaction interlocks against flushes initiations 885 * (note: but will run concurrently with the actual flush). 886 */ 887 hammer2_trans_init(&trans, ip->pmp, 0); 888 error = hammer2_write_file(ip, uio, ap->a_ioflag, seqcount); 889 hammer2_trans_done(&trans); 890 891 return (error); 892 } 893 894 /* 895 * Perform read operations on a file or symlink given an UNLOCKED 896 * inode and uio. 897 * 898 * The passed ip is not locked. 899 */ 900 static 901 int 902 hammer2_read_file(hammer2_inode_t *ip, struct uio *uio, int seqcount) 903 { 904 hammer2_off_t size; 905 struct buf *bp; 906 int error; 907 908 error = 0; 909 910 /* 911 * UIO read loop. 912 */ 913 ccms_thread_lock(&ip->topo_cst, CCMS_STATE_EXCLUSIVE); 914 size = ip->size; 915 ccms_thread_unlock(&ip->topo_cst); 916 917 while (uio->uio_resid > 0 && uio->uio_offset < size) { 918 hammer2_key_t lbase; 919 hammer2_key_t leof; 920 int lblksize; 921 int loff; 922 int n; 923 924 lblksize = hammer2_calc_logical(ip, uio->uio_offset, 925 &lbase, &leof); 926 927 error = cluster_read(ip->vp, leof, lbase, lblksize, 928 uio->uio_resid, seqcount * BKVASIZE, 929 &bp); 930 931 if (error) 932 break; 933 loff = (int)(uio->uio_offset - lbase); 934 n = lblksize - loff; 935 if (n > uio->uio_resid) 936 n = uio->uio_resid; 937 if (n > size - uio->uio_offset) 938 n = (int)(size - uio->uio_offset); 939 bp->b_flags |= B_AGE; 940 uiomove((char *)bp->b_data + loff, n, uio); 941 bqrelse(bp); 942 } 943 return (error); 944 } 945 946 /* 947 * Write to the file represented by the inode via the logical buffer cache. 948 * The inode may represent a regular file or a symlink. 949 * 950 * The inode must not be locked. 951 */ 952 static 953 int 954 hammer2_write_file(hammer2_inode_t *ip, 955 struct uio *uio, int ioflag, int seqcount) 956 { 957 hammer2_key_t old_eof; 958 hammer2_key_t new_eof; 959 struct buf *bp; 960 int kflags; 961 int error; 962 int modified; 963 964 /* 965 * Setup if append 966 */ 967 ccms_thread_lock(&ip->topo_cst, CCMS_STATE_EXCLUSIVE); 968 if (ioflag & IO_APPEND) 969 uio->uio_offset = ip->size; 970 old_eof = ip->size; 971 ccms_thread_unlock(&ip->topo_cst); 972 973 /* 974 * Extend the file if necessary. If the write fails at some point 975 * we will truncate it back down to cover as much as we were able 976 * to write. 977 * 978 * Doing this now makes it easier to calculate buffer sizes in 979 * the loop. 980 */ 981 kflags = 0; 982 error = 0; 983 modified = 0; 984 985 if (uio->uio_offset + uio->uio_resid > old_eof) { 986 new_eof = uio->uio_offset + uio->uio_resid; 987 modified = 1; 988 hammer2_extend_file(ip, new_eof); 989 kflags |= NOTE_EXTEND; 990 } else { 991 new_eof = old_eof; 992 } 993 994 /* 995 * UIO write loop 996 */ 997 while (uio->uio_resid > 0) { 998 hammer2_key_t lbase; 999 int trivial; 1000 int endofblk; 1001 int lblksize; 1002 int loff; 1003 int n; 1004 1005 /* 1006 * Don't allow the buffer build to blow out the buffer 1007 * cache. 1008 */ 1009 if ((ioflag & IO_RECURSE) == 0) 1010 bwillwrite(HAMMER2_PBUFSIZE); 1011 1012 /* 1013 * This nominally tells us how much we can cluster and 1014 * what the logical buffer size needs to be. Currently 1015 * we don't try to cluster the write and just handle one 1016 * block at a time. 1017 */ 1018 lblksize = hammer2_calc_logical(ip, uio->uio_offset, 1019 &lbase, NULL); 1020 loff = (int)(uio->uio_offset - lbase); 1021 1022 KKASSERT(lblksize <= 65536); 1023 1024 /* 1025 * Calculate bytes to copy this transfer and whether the 1026 * copy completely covers the buffer or not. 1027 */ 1028 trivial = 0; 1029 n = lblksize - loff; 1030 if (n > uio->uio_resid) { 1031 n = uio->uio_resid; 1032 if (loff == lbase && uio->uio_offset + n == new_eof) 1033 trivial = 1; 1034 endofblk = 0; 1035 } else { 1036 if (loff == 0) 1037 trivial = 1; 1038 endofblk = 1; 1039 } 1040 1041 /* 1042 * Get the buffer 1043 */ 1044 if (uio->uio_segflg == UIO_NOCOPY) { 1045 /* 1046 * Issuing a write with the same data backing the 1047 * buffer. Instantiate the buffer to collect the 1048 * backing vm pages, then read-in any missing bits. 1049 * 1050 * This case is used by vop_stdputpages(). 1051 */ 1052 bp = getblk(ip->vp, lbase, lblksize, GETBLK_BHEAVY, 0); 1053 if ((bp->b_flags & B_CACHE) == 0) { 1054 bqrelse(bp); 1055 error = bread(ip->vp, lbase, lblksize, &bp); 1056 } 1057 } else if (trivial) { 1058 /* 1059 * Even though we are entirely overwriting the buffer 1060 * we may still have to zero it out to avoid a 1061 * mmap/write visibility issue. 1062 */ 1063 bp = getblk(ip->vp, lbase, lblksize, GETBLK_BHEAVY, 0); 1064 if ((bp->b_flags & B_CACHE) == 0) 1065 vfs_bio_clrbuf(bp); 1066 } else { 1067 /* 1068 * Partial overwrite, read in any missing bits then 1069 * replace the portion being written. 1070 * 1071 * (The strategy code will detect zero-fill physical 1072 * blocks for this case). 1073 */ 1074 error = bread(ip->vp, lbase, lblksize, &bp); 1075 if (error == 0) 1076 bheavy(bp); 1077 } 1078 1079 if (error) { 1080 brelse(bp); 1081 break; 1082 } 1083 1084 /* 1085 * Ok, copy the data in 1086 */ 1087 error = uiomove(bp->b_data + loff, n, uio); 1088 kflags |= NOTE_WRITE; 1089 modified = 1; 1090 if (error) { 1091 brelse(bp); 1092 break; 1093 } 1094 1095 /* 1096 * WARNING: Pageout daemon will issue UIO_NOCOPY writes 1097 * with IO_SYNC or IO_ASYNC set. These writes 1098 * must be handled as the pageout daemon expects. 1099 */ 1100 if (ioflag & IO_SYNC) { 1101 bwrite(bp); 1102 } else if ((ioflag & IO_DIRECT) && endofblk) { 1103 bawrite(bp); 1104 } else if (ioflag & IO_ASYNC) { 1105 bawrite(bp); 1106 } else { 1107 bdwrite(bp); 1108 } 1109 } 1110 1111 /* 1112 * Cleanup. If we extended the file EOF but failed to write through 1113 * the entire write is a failure and we have to back-up. 1114 */ 1115 if (error && new_eof != old_eof) { 1116 hammer2_truncate_file(ip, old_eof); 1117 } else if (modified) { 1118 ccms_thread_lock(&ip->topo_cst, CCMS_STATE_EXCLUSIVE); 1119 hammer2_update_time(&ip->mtime); 1120 atomic_set_int(&ip->flags, HAMMER2_INODE_MTIME); 1121 ccms_thread_unlock(&ip->topo_cst); 1122 } 1123 atomic_set_int(&ip->flags, HAMMER2_INODE_MODIFIED); 1124 hammer2_knote(ip->vp, kflags); 1125 vsetisdirty(ip->vp); 1126 1127 return error; 1128 } 1129 1130 /* 1131 * Truncate the size of a file. The inode must not be locked. 1132 * 1133 * NOTE: Caller handles setting HAMMER2_INODE_MODIFIED 1134 */ 1135 static 1136 void 1137 hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize) 1138 { 1139 hammer2_key_t lbase; 1140 int nblksize; 1141 1142 if (ip->vp) { 1143 nblksize = hammer2_calc_logical(ip, nsize, &lbase, NULL); 1144 nvtruncbuf(ip->vp, nsize, 1145 nblksize, (int)nsize & (nblksize - 1), 1146 0); 1147 } 1148 ccms_thread_lock(&ip->topo_cst, CCMS_STATE_EXCLUSIVE); 1149 ip->size = nsize; 1150 atomic_set_int(&ip->flags, HAMMER2_INODE_RESIZED); 1151 ccms_thread_unlock(&ip->topo_cst); 1152 } 1153 1154 /* 1155 * Extend the size of a file. The inode must not be locked. 1156 * 1157 * NOTE: Caller handles setting HAMMER2_INODE_MODIFIED 1158 */ 1159 static 1160 void 1161 hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize) 1162 { 1163 hammer2_key_t lbase; 1164 hammer2_key_t osize; 1165 int oblksize; 1166 int nblksize; 1167 1168 ccms_thread_lock(&ip->topo_cst, CCMS_STATE_EXCLUSIVE); 1169 osize = ip->size; 1170 ip->size = nsize; 1171 ccms_thread_unlock(&ip->topo_cst); 1172 1173 if (ip->vp) { 1174 oblksize = hammer2_calc_logical(ip, osize, &lbase, NULL); 1175 nblksize = hammer2_calc_logical(ip, nsize, &lbase, NULL); 1176 nvextendbuf(ip->vp, 1177 osize, nsize, 1178 oblksize, nblksize, 1179 -1, -1, 0); 1180 } 1181 atomic_set_int(&ip->flags, HAMMER2_INODE_RESIZED); 1182 } 1183 1184 static 1185 int 1186 hammer2_vop_nresolve(struct vop_nresolve_args *ap) 1187 { 1188 hammer2_inode_t *ip; 1189 hammer2_inode_t *dip; 1190 hammer2_cluster_t *cparent; 1191 hammer2_cluster_t *cluster; 1192 const hammer2_inode_data_t *ipdata; 1193 hammer2_key_t key_next; 1194 hammer2_key_t lhc; 1195 struct namecache *ncp; 1196 const uint8_t *name; 1197 size_t name_len; 1198 int error = 0; 1199 int ddflag; 1200 struct vnode *vp; 1201 1202 dip = VTOI(ap->a_dvp); 1203 ncp = ap->a_nch->ncp; 1204 name = ncp->nc_name; 1205 name_len = ncp->nc_nlen; 1206 lhc = hammer2_dirhash(name, name_len); 1207 1208 /* 1209 * Note: In DragonFly the kernel handles '.' and '..'. 1210 */ 1211 cparent = hammer2_inode_lock_sh(dip); 1212 cluster = hammer2_cluster_lookup(cparent, &key_next, 1213 lhc, lhc + HAMMER2_DIRHASH_LOMASK, 1214 HAMMER2_LOOKUP_SHARED, &ddflag); 1215 while (cluster) { 1216 if (hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE) { 1217 ipdata = &hammer2_cluster_data(cluster)->ipdata; 1218 if (ipdata->name_len == name_len && 1219 bcmp(ipdata->filename, name, name_len) == 0) { 1220 break; 1221 } 1222 } 1223 cluster = hammer2_cluster_next(cparent, cluster, &key_next, 1224 key_next, 1225 lhc + HAMMER2_DIRHASH_LOMASK, 1226 HAMMER2_LOOKUP_SHARED); 1227 } 1228 hammer2_inode_unlock_sh(dip, cparent); 1229 1230 /* 1231 * Resolve hardlink entries before acquiring the inode. 1232 */ 1233 if (cluster) { 1234 ipdata = &hammer2_cluster_data(cluster)->ipdata; 1235 if (ipdata->type == HAMMER2_OBJTYPE_HARDLINK) { 1236 hammer2_tid_t inum = ipdata->inum; 1237 error = hammer2_hardlink_find(dip, cluster); 1238 if (error) { 1239 kprintf("hammer2: unable to find hardlink " 1240 "0x%016jx\n", inum); 1241 hammer2_cluster_unlock(cluster); 1242 return error; 1243 } 1244 } 1245 } 1246 1247 /* 1248 * nresolve needs to resolve hardlinks, the original cluster is not 1249 * sufficient. 1250 */ 1251 if (cluster) { 1252 ip = hammer2_inode_get(dip->pmp, dip, cluster); 1253 ipdata = &hammer2_cluster_data(cluster)->ipdata; 1254 if (ipdata->type == HAMMER2_OBJTYPE_HARDLINK) { 1255 kprintf("nresolve: fixup hardlink\n"); 1256 hammer2_inode_ref(ip); 1257 hammer2_inode_unlock_ex(ip, NULL); 1258 hammer2_cluster_unlock(cluster); 1259 cluster = hammer2_inode_lock_ex(ip); 1260 ipdata = &hammer2_cluster_data(cluster)->ipdata; 1261 kprintf("nresolve: fixup to type %02x\n", ipdata->type); 1262 } 1263 } else { 1264 ip = NULL; 1265 } 1266 1267 #if 0 1268 /* 1269 * Deconsolidate any hardlink whos nlinks == 1. Ignore errors. 1270 * If an error occurs chain and ip are left alone. 1271 * 1272 * XXX upgrade shared lock? 1273 */ 1274 if (ochain && chain && 1275 chain->data->ipdata.nlinks == 1 && !dip->pmp->ronly) { 1276 kprintf("hammer2: need to unconsolidate hardlink for %s\n", 1277 chain->data->ipdata.filename); 1278 /* XXX retain shared lock on dip? (currently not held) */ 1279 hammer2_trans_init(&trans, dip->pmp, 0); 1280 hammer2_hardlink_deconsolidate(&trans, dip, &chain, &ochain); 1281 hammer2_trans_done(&trans); 1282 } 1283 #endif 1284 1285 /* 1286 * Acquire the related vnode 1287 * 1288 * NOTE: For error processing, only ENOENT resolves the namecache 1289 * entry to NULL, otherwise we just return the error and 1290 * leave the namecache unresolved. 1291 * 1292 * NOTE: multiple hammer2_inode structures can be aliased to the 1293 * same chain element, for example for hardlinks. This 1294 * use case does not 'reattach' inode associations that 1295 * might already exist, but always allocates a new one. 1296 * 1297 * WARNING: inode structure is locked exclusively via inode_get 1298 * but chain was locked shared. inode_unlock_ex() 1299 * will handle it properly. 1300 */ 1301 if (cluster) { 1302 vp = hammer2_igetv(ip, cluster, &error); 1303 if (error == 0) { 1304 vn_unlock(vp); 1305 cache_setvp(ap->a_nch, vp); 1306 } else if (error == ENOENT) { 1307 cache_setvp(ap->a_nch, NULL); 1308 } 1309 hammer2_inode_unlock_ex(ip, cluster); 1310 1311 /* 1312 * The vp should not be released until after we've disposed 1313 * of our locks, because it might cause vop_inactive() to 1314 * be called. 1315 */ 1316 if (vp) 1317 vrele(vp); 1318 } else { 1319 error = ENOENT; 1320 cache_setvp(ap->a_nch, NULL); 1321 } 1322 KASSERT(error || ap->a_nch->ncp->nc_vp != NULL, 1323 ("resolve error %d/%p ap %p\n", 1324 error, ap->a_nch->ncp->nc_vp, ap)); 1325 return error; 1326 } 1327 1328 static 1329 int 1330 hammer2_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap) 1331 { 1332 hammer2_inode_t *dip; 1333 hammer2_inode_t *ip; 1334 hammer2_cluster_t *cparent; 1335 int error; 1336 1337 dip = VTOI(ap->a_dvp); 1338 1339 if ((ip = dip->pip) == NULL) { 1340 *ap->a_vpp = NULL; 1341 return ENOENT; 1342 } 1343 cparent = hammer2_inode_lock_ex(ip); 1344 *ap->a_vpp = hammer2_igetv(ip, cparent, &error); 1345 hammer2_inode_unlock_ex(ip, cparent); 1346 1347 return error; 1348 } 1349 1350 static 1351 int 1352 hammer2_vop_nmkdir(struct vop_nmkdir_args *ap) 1353 { 1354 hammer2_inode_t *dip; 1355 hammer2_inode_t *nip; 1356 hammer2_trans_t trans; 1357 hammer2_cluster_t *cluster; 1358 struct namecache *ncp; 1359 const uint8_t *name; 1360 size_t name_len; 1361 int error; 1362 1363 dip = VTOI(ap->a_dvp); 1364 if (dip->pmp->ronly) 1365 return (EROFS); 1366 1367 ncp = ap->a_nch->ncp; 1368 name = ncp->nc_name; 1369 name_len = ncp->nc_nlen; 1370 cluster = NULL; 1371 1372 hammer2_pfs_memory_wait(dip->pmp); 1373 hammer2_trans_init(&trans, dip->pmp, HAMMER2_TRANS_NEWINODE); 1374 nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred, 1375 name, name_len, &cluster, &error); 1376 cluster->focus->inode_reason = 1; 1377 if (error) { 1378 KKASSERT(nip == NULL); 1379 *ap->a_vpp = NULL; 1380 } else { 1381 *ap->a_vpp = hammer2_igetv(nip, cluster, &error); 1382 hammer2_inode_unlock_ex(nip, cluster); 1383 } 1384 hammer2_trans_done(&trans); 1385 1386 if (error == 0) { 1387 cache_setunresolved(ap->a_nch); 1388 cache_setvp(ap->a_nch, *ap->a_vpp); 1389 } 1390 return error; 1391 } 1392 1393 /* 1394 * Return the largest contiguous physical disk range for the logical 1395 * request, in bytes. 1396 * 1397 * (struct vnode *vp, off_t loffset, off_t *doffsetp, int *runp, int *runb) 1398 * 1399 * Basically disabled, the logical buffer write thread has to deal with 1400 * buffers one-at-a-time. 1401 */ 1402 static 1403 int 1404 hammer2_vop_bmap(struct vop_bmap_args *ap) 1405 { 1406 *ap->a_doffsetp = NOOFFSET; 1407 if (ap->a_runp) 1408 *ap->a_runp = 0; 1409 if (ap->a_runb) 1410 *ap->a_runb = 0; 1411 return (EOPNOTSUPP); 1412 } 1413 1414 static 1415 int 1416 hammer2_vop_open(struct vop_open_args *ap) 1417 { 1418 return vop_stdopen(ap); 1419 } 1420 1421 /* 1422 * hammer2_vop_advlock { vp, id, op, fl, flags } 1423 */ 1424 static 1425 int 1426 hammer2_vop_advlock(struct vop_advlock_args *ap) 1427 { 1428 hammer2_inode_t *ip = VTOI(ap->a_vp); 1429 const hammer2_inode_data_t *ipdata; 1430 hammer2_cluster_t *cparent; 1431 hammer2_off_t size; 1432 1433 cparent = hammer2_inode_lock_sh(ip); 1434 ipdata = &hammer2_cluster_data(cparent)->ipdata; 1435 size = ipdata->size; 1436 hammer2_inode_unlock_sh(ip, cparent); 1437 return (lf_advlock(ap, &ip->advlock, size)); 1438 } 1439 1440 1441 static 1442 int 1443 hammer2_vop_close(struct vop_close_args *ap) 1444 { 1445 return vop_stdclose(ap); 1446 } 1447 1448 /* 1449 * hammer2_vop_nlink { nch, dvp, vp, cred } 1450 * 1451 * Create a hardlink from (vp) to {dvp, nch}. 1452 */ 1453 static 1454 int 1455 hammer2_vop_nlink(struct vop_nlink_args *ap) 1456 { 1457 hammer2_inode_t *fdip; /* target directory to create link in */ 1458 hammer2_inode_t *tdip; /* target directory to create link in */ 1459 hammer2_inode_t *cdip; /* common parent directory */ 1460 hammer2_inode_t *ip; /* inode we are hardlinking to */ 1461 hammer2_cluster_t *cluster; 1462 hammer2_cluster_t *fdcluster; 1463 hammer2_cluster_t *tdcluster; 1464 hammer2_cluster_t *cdcluster; 1465 hammer2_trans_t trans; 1466 struct namecache *ncp; 1467 const uint8_t *name; 1468 size_t name_len; 1469 int error; 1470 1471 tdip = VTOI(ap->a_dvp); 1472 if (tdip->pmp->ronly) 1473 return (EROFS); 1474 1475 ncp = ap->a_nch->ncp; 1476 name = ncp->nc_name; 1477 name_len = ncp->nc_nlen; 1478 1479 /* 1480 * ip represents the file being hardlinked. The file could be a 1481 * normal file or a hardlink target if it has already been hardlinked. 1482 * If ip is a hardlinked target then ip->pip represents the location 1483 * of the hardlinked target, NOT the location of the hardlink pointer. 1484 * 1485 * Bump nlinks and potentially also create or move the hardlink 1486 * target in the parent directory common to (ip) and (tdip). The 1487 * consolidation code can modify ip->cluster and ip->pip. The 1488 * returned cluster is locked. 1489 */ 1490 ip = VTOI(ap->a_vp); 1491 hammer2_pfs_memory_wait(ip->pmp); 1492 hammer2_trans_init(&trans, ip->pmp, HAMMER2_TRANS_NEWINODE); 1493 1494 /* 1495 * The common parent directory must be locked first to avoid deadlocks. 1496 * Also note that fdip and/or tdip might match cdip. 1497 */ 1498 fdip = ip->pip; 1499 cdip = hammer2_inode_common_parent(fdip, tdip); 1500 cdcluster = hammer2_inode_lock_ex(cdip); 1501 fdcluster = hammer2_inode_lock_ex(fdip); 1502 tdcluster = hammer2_inode_lock_ex(tdip); 1503 cluster = hammer2_inode_lock_ex(ip); 1504 error = hammer2_hardlink_consolidate(&trans, ip, &cluster, 1505 cdip, cdcluster, 1); 1506 if (error) 1507 goto done; 1508 1509 /* 1510 * Create a directory entry connected to the specified cluster. 1511 * 1512 * WARNING! chain can get moved by the connect (indirectly due to 1513 * potential indirect block creation). 1514 */ 1515 error = hammer2_inode_connect(&trans, &cluster, 1, 1516 tdip, tdcluster, 1517 name, name_len, 0); 1518 if (error == 0) { 1519 cache_setunresolved(ap->a_nch); 1520 cache_setvp(ap->a_nch, ap->a_vp); 1521 } 1522 done: 1523 hammer2_inode_unlock_ex(ip, cluster); 1524 hammer2_inode_unlock_ex(tdip, tdcluster); 1525 hammer2_inode_unlock_ex(fdip, fdcluster); 1526 hammer2_inode_unlock_ex(cdip, cdcluster); 1527 hammer2_trans_done(&trans); 1528 1529 return error; 1530 } 1531 1532 /* 1533 * hammer2_vop_ncreate { nch, dvp, vpp, cred, vap } 1534 * 1535 * The operating system has already ensured that the directory entry 1536 * does not exist and done all appropriate namespace locking. 1537 */ 1538 static 1539 int 1540 hammer2_vop_ncreate(struct vop_ncreate_args *ap) 1541 { 1542 hammer2_inode_t *dip; 1543 hammer2_inode_t *nip; 1544 hammer2_trans_t trans; 1545 hammer2_cluster_t *ncluster; 1546 struct namecache *ncp; 1547 const uint8_t *name; 1548 size_t name_len; 1549 int error; 1550 1551 dip = VTOI(ap->a_dvp); 1552 if (dip->pmp->ronly) 1553 return (EROFS); 1554 1555 ncp = ap->a_nch->ncp; 1556 name = ncp->nc_name; 1557 name_len = ncp->nc_nlen; 1558 hammer2_pfs_memory_wait(dip->pmp); 1559 hammer2_trans_init(&trans, dip->pmp, HAMMER2_TRANS_NEWINODE); 1560 ncluster = NULL; 1561 1562 nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred, 1563 name, name_len, &ncluster, &error); 1564 ncluster->focus->inode_reason = 2; 1565 if (error) { 1566 KKASSERT(nip == NULL); 1567 *ap->a_vpp = NULL; 1568 } else { 1569 *ap->a_vpp = hammer2_igetv(nip, ncluster, &error); 1570 hammer2_inode_unlock_ex(nip, ncluster); 1571 } 1572 hammer2_trans_done(&trans); 1573 1574 if (error == 0) { 1575 cache_setunresolved(ap->a_nch); 1576 cache_setvp(ap->a_nch, *ap->a_vpp); 1577 } 1578 return error; 1579 } 1580 1581 /* 1582 * Make a device node (typically a fifo) 1583 */ 1584 static 1585 int 1586 hammer2_vop_nmknod(struct vop_nmknod_args *ap) 1587 { 1588 hammer2_inode_t *dip; 1589 hammer2_inode_t *nip; 1590 hammer2_trans_t trans; 1591 hammer2_cluster_t *ncluster; 1592 struct namecache *ncp; 1593 const uint8_t *name; 1594 size_t name_len; 1595 int error; 1596 1597 dip = VTOI(ap->a_dvp); 1598 if (dip->pmp->ronly) 1599 return (EROFS); 1600 1601 ncp = ap->a_nch->ncp; 1602 name = ncp->nc_name; 1603 name_len = ncp->nc_nlen; 1604 hammer2_pfs_memory_wait(dip->pmp); 1605 hammer2_trans_init(&trans, dip->pmp, HAMMER2_TRANS_NEWINODE); 1606 ncluster = NULL; 1607 1608 nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred, 1609 name, name_len, &ncluster, &error); 1610 ncluster->focus->inode_reason = 3; 1611 if (error) { 1612 KKASSERT(nip == NULL); 1613 *ap->a_vpp = NULL; 1614 } else { 1615 *ap->a_vpp = hammer2_igetv(nip, ncluster, &error); 1616 hammer2_inode_unlock_ex(nip, ncluster); 1617 } 1618 hammer2_trans_done(&trans); 1619 1620 if (error == 0) { 1621 cache_setunresolved(ap->a_nch); 1622 cache_setvp(ap->a_nch, *ap->a_vpp); 1623 } 1624 return error; 1625 } 1626 1627 /* 1628 * hammer2_vop_nsymlink { nch, dvp, vpp, cred, vap, target } 1629 */ 1630 static 1631 int 1632 hammer2_vop_nsymlink(struct vop_nsymlink_args *ap) 1633 { 1634 hammer2_inode_t *dip; 1635 hammer2_inode_t *nip; 1636 hammer2_cluster_t *ncparent; 1637 hammer2_trans_t trans; 1638 struct namecache *ncp; 1639 const uint8_t *name; 1640 size_t name_len; 1641 int error; 1642 1643 dip = VTOI(ap->a_dvp); 1644 if (dip->pmp->ronly) 1645 return (EROFS); 1646 1647 ncp = ap->a_nch->ncp; 1648 name = ncp->nc_name; 1649 name_len = ncp->nc_nlen; 1650 hammer2_pfs_memory_wait(dip->pmp); 1651 hammer2_trans_init(&trans, dip->pmp, HAMMER2_TRANS_NEWINODE); 1652 ncparent = NULL; 1653 1654 ap->a_vap->va_type = VLNK; /* enforce type */ 1655 1656 nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred, 1657 name, name_len, &ncparent, &error); 1658 ncparent->focus->inode_reason = 4; 1659 if (error) { 1660 KKASSERT(nip == NULL); 1661 *ap->a_vpp = NULL; 1662 hammer2_trans_done(&trans); 1663 return error; 1664 } 1665 *ap->a_vpp = hammer2_igetv(nip, ncparent, &error); 1666 1667 /* 1668 * Build the softlink (~like file data) and finalize the namecache. 1669 */ 1670 if (error == 0) { 1671 size_t bytes; 1672 struct uio auio; 1673 struct iovec aiov; 1674 hammer2_inode_data_t *nipdata; 1675 1676 nipdata = &hammer2_cluster_wdata(ncparent)->ipdata; 1677 /* nipdata = &nip->chain->data->ipdata;XXX */ 1678 bytes = strlen(ap->a_target); 1679 1680 if (bytes <= HAMMER2_EMBEDDED_BYTES) { 1681 KKASSERT(nipdata->op_flags & 1682 HAMMER2_OPFLAG_DIRECTDATA); 1683 bcopy(ap->a_target, nipdata->u.data, bytes); 1684 nipdata->size = bytes; 1685 nip->size = bytes; 1686 hammer2_cluster_modsync(ncparent); 1687 hammer2_inode_unlock_ex(nip, ncparent); 1688 /* nipdata = NULL; not needed */ 1689 } else { 1690 hammer2_inode_unlock_ex(nip, ncparent); 1691 /* nipdata = NULL; not needed */ 1692 bzero(&auio, sizeof(auio)); 1693 bzero(&aiov, sizeof(aiov)); 1694 auio.uio_iov = &aiov; 1695 auio.uio_segflg = UIO_SYSSPACE; 1696 auio.uio_rw = UIO_WRITE; 1697 auio.uio_resid = bytes; 1698 auio.uio_iovcnt = 1; 1699 auio.uio_td = curthread; 1700 aiov.iov_base = ap->a_target; 1701 aiov.iov_len = bytes; 1702 error = hammer2_write_file(nip, &auio, IO_APPEND, 0); 1703 /* XXX handle error */ 1704 error = 0; 1705 } 1706 } else { 1707 hammer2_inode_unlock_ex(nip, ncparent); 1708 } 1709 hammer2_trans_done(&trans); 1710 1711 /* 1712 * Finalize namecache 1713 */ 1714 if (error == 0) { 1715 cache_setunresolved(ap->a_nch); 1716 cache_setvp(ap->a_nch, *ap->a_vpp); 1717 /* hammer2_knote(ap->a_dvp, NOTE_WRITE); */ 1718 } 1719 return error; 1720 } 1721 1722 /* 1723 * hammer2_vop_nremove { nch, dvp, cred } 1724 */ 1725 static 1726 int 1727 hammer2_vop_nremove(struct vop_nremove_args *ap) 1728 { 1729 hammer2_inode_t *dip; 1730 hammer2_trans_t trans; 1731 struct namecache *ncp; 1732 const uint8_t *name; 1733 size_t name_len; 1734 int error; 1735 1736 dip = VTOI(ap->a_dvp); 1737 if (dip->pmp->ronly) 1738 return(EROFS); 1739 1740 ncp = ap->a_nch->ncp; 1741 name = ncp->nc_name; 1742 name_len = ncp->nc_nlen; 1743 1744 hammer2_pfs_memory_wait(dip->pmp); 1745 hammer2_trans_init(&trans, dip->pmp, 0); 1746 error = hammer2_unlink_file(&trans, dip, name, name_len, 1747 0, NULL, ap->a_nch); 1748 hammer2_trans_done(&trans); 1749 if (error == 0) 1750 cache_unlink(ap->a_nch); 1751 return (error); 1752 } 1753 1754 /* 1755 * hammer2_vop_nrmdir { nch, dvp, cred } 1756 */ 1757 static 1758 int 1759 hammer2_vop_nrmdir(struct vop_nrmdir_args *ap) 1760 { 1761 hammer2_inode_t *dip; 1762 hammer2_trans_t trans; 1763 struct namecache *ncp; 1764 const uint8_t *name; 1765 size_t name_len; 1766 int error; 1767 1768 dip = VTOI(ap->a_dvp); 1769 if (dip->pmp->ronly) 1770 return(EROFS); 1771 1772 ncp = ap->a_nch->ncp; 1773 name = ncp->nc_name; 1774 name_len = ncp->nc_nlen; 1775 1776 hammer2_pfs_memory_wait(dip->pmp); 1777 hammer2_trans_init(&trans, dip->pmp, 0); 1778 hammer2_run_unlinkq(&trans, dip->pmp); 1779 error = hammer2_unlink_file(&trans, dip, name, name_len, 1780 1, NULL, ap->a_nch); 1781 hammer2_trans_done(&trans); 1782 if (error == 0) 1783 cache_unlink(ap->a_nch); 1784 return (error); 1785 } 1786 1787 /* 1788 * hammer2_vop_nrename { fnch, tnch, fdvp, tdvp, cred } 1789 */ 1790 static 1791 int 1792 hammer2_vop_nrename(struct vop_nrename_args *ap) 1793 { 1794 struct namecache *fncp; 1795 struct namecache *tncp; 1796 hammer2_inode_t *cdip; 1797 hammer2_inode_t *fdip; 1798 hammer2_inode_t *tdip; 1799 hammer2_inode_t *ip; 1800 hammer2_cluster_t *cluster; 1801 hammer2_cluster_t *fdcluster; 1802 hammer2_cluster_t *tdcluster; 1803 hammer2_cluster_t *cdcluster; 1804 hammer2_trans_t trans; 1805 const uint8_t *fname; 1806 size_t fname_len; 1807 const uint8_t *tname; 1808 size_t tname_len; 1809 int error; 1810 int hlink; 1811 1812 if (ap->a_fdvp->v_mount != ap->a_tdvp->v_mount) 1813 return(EXDEV); 1814 if (ap->a_fdvp->v_mount != ap->a_fnch->ncp->nc_vp->v_mount) 1815 return(EXDEV); 1816 1817 fdip = VTOI(ap->a_fdvp); /* source directory */ 1818 tdip = VTOI(ap->a_tdvp); /* target directory */ 1819 1820 if (fdip->pmp->ronly) 1821 return(EROFS); 1822 1823 fncp = ap->a_fnch->ncp; /* entry name in source */ 1824 fname = fncp->nc_name; 1825 fname_len = fncp->nc_nlen; 1826 1827 tncp = ap->a_tnch->ncp; /* entry name in target */ 1828 tname = tncp->nc_name; 1829 tname_len = tncp->nc_nlen; 1830 1831 hammer2_pfs_memory_wait(tdip->pmp); 1832 hammer2_trans_init(&trans, tdip->pmp, 0); 1833 1834 /* 1835 * ip is the inode being renamed. If this is a hardlink then 1836 * ip represents the actual file and not the hardlink marker. 1837 */ 1838 ip = VTOI(fncp->nc_vp); 1839 cluster = NULL; 1840 1841 1842 /* 1843 * The common parent directory must be locked first to avoid deadlocks. 1844 * Also note that fdip and/or tdip might match cdip. 1845 * 1846 * WARNING! fdip may not match ip->pip. That is, if the source file 1847 * is already a hardlink then what we are renaming is the 1848 * hardlink pointer, not the hardlink itself. The hardlink 1849 * directory (ip->pip) will already be at a common parent 1850 * of fdrip. 1851 * 1852 * Be sure to use ip->pip when finding the common parent 1853 * against tdip or we might accidently move the hardlink 1854 * target into a subdirectory that makes it inaccessible to 1855 * other pointers. 1856 */ 1857 cdip = hammer2_inode_common_parent(ip->pip, tdip); 1858 cdcluster = hammer2_inode_lock_ex(cdip); 1859 fdcluster = hammer2_inode_lock_ex(fdip); 1860 tdcluster = hammer2_inode_lock_ex(tdip); 1861 1862 /* 1863 * Keep a tight grip on the inode so the temporary unlinking from 1864 * the source location prior to linking to the target location 1865 * does not cause the cluster to be destroyed. 1866 * 1867 * NOTE: To avoid deadlocks we cannot lock (ip) while we are 1868 * unlinking elements from their directories. Locking 1869 * the nlinks field does not lock the whole inode. 1870 */ 1871 hammer2_inode_ref(ip); 1872 1873 /* 1874 * Remove target if it exists 1875 */ 1876 error = hammer2_unlink_file(&trans, tdip, tname, tname_len, 1877 -1, NULL, ap->a_tnch); 1878 if (error && error != ENOENT) 1879 goto done; 1880 cache_setunresolved(ap->a_tnch); 1881 1882 /* 1883 * When renaming a hardlinked file we may have to re-consolidate 1884 * the location of the hardlink target. Also adjust nlinks by +1 1885 * to counter-act the unlink below. 1886 * 1887 * If ip represents a regular file the consolidation code essentially 1888 * does nothing other than return the same locked cluster that was 1889 * passed in. 1890 * 1891 * The returned cluster will be locked. 1892 * 1893 * WARNING! We do not currently have a local copy of ipdata but 1894 * we do use one later remember that it must be reloaded 1895 * on any modification to the inode, including connects. 1896 */ 1897 cluster = hammer2_inode_lock_ex(ip); 1898 error = hammer2_hardlink_consolidate(&trans, ip, &cluster, 1899 cdip, cdcluster, 1); 1900 if (error) 1901 goto done; 1902 1903 /* 1904 * Disconnect (fdip, fname) from the source directory. This will 1905 * disconnect (ip) if it represents a direct file. If (ip) represents 1906 * a hardlink the HARDLINK pointer object will be removed but the 1907 * hardlink will stay intact. 1908 * 1909 * Always pass nch as NULL because we intend to reconnect the inode, 1910 * so we don't want hammer2_unlink_file() to rename it to the hidden 1911 * open-but-unlinked directory. 1912 * 1913 * The target cluster may be marked DELETED but will not be destroyed 1914 * since we retain our hold on ip and cluster. 1915 */ 1916 error = hammer2_unlink_file(&trans, fdip, fname, fname_len, 1917 -1, &hlink, NULL); 1918 KKASSERT(error != EAGAIN); 1919 if (error) 1920 goto done; 1921 1922 /* 1923 * Reconnect ip to target directory using cluster. Chains cannot 1924 * actually be moved, so this will duplicate the cluster in the new 1925 * spot and assign it to the ip, replacing the old cluster. 1926 * 1927 * WARNING: Because recursive locks are allowed and we unlinked the 1928 * file that we have a cluster-in-hand for just above, the 1929 * cluster might have been delete-duplicated. We must 1930 * refactor the cluster. 1931 * 1932 * WARNING: Chain locks can lock buffer cache buffers, to avoid 1933 * deadlocks we want to unlock before issuing a cache_*() 1934 * op (that might have to lock a vnode). 1935 */ 1936 hammer2_cluster_refactor(cluster); 1937 error = hammer2_inode_connect(&trans, &cluster, hlink, 1938 tdip, tdcluster, 1939 tname, tname_len, 0); 1940 cluster->focus->inode_reason = 5; 1941 if (error == 0) { 1942 KKASSERT(cluster != NULL); 1943 hammer2_inode_repoint(ip, (hlink ? ip->pip : tdip), cluster); 1944 } 1945 done: 1946 hammer2_inode_unlock_ex(ip, cluster); 1947 hammer2_inode_unlock_ex(tdip, tdcluster); 1948 hammer2_inode_unlock_ex(fdip, fdcluster); 1949 hammer2_inode_unlock_ex(cdip, cdcluster); 1950 hammer2_inode_drop(ip); 1951 hammer2_trans_done(&trans); 1952 1953 /* 1954 * Issue the namecache update after unlocking all the internal 1955 * hammer structures, otherwise we might deadlock. 1956 */ 1957 if (error == 0) 1958 cache_rename(ap->a_fnch, ap->a_tnch); 1959 1960 return (error); 1961 } 1962 1963 /* 1964 * Strategy code 1965 * 1966 * WARNING: The strategy code cannot safely use hammer2 transactions 1967 * as this can deadlock against vfs_sync's vfsync() call 1968 * if multiple flushes are queued. 1969 */ 1970 static int hammer2_strategy_read(struct vop_strategy_args *ap); 1971 static int hammer2_strategy_write(struct vop_strategy_args *ap); 1972 static void hammer2_strategy_read_callback(hammer2_io_t *dio, 1973 hammer2_cluster_t *cluster, 1974 hammer2_chain_t *chain, 1975 void *arg_p, off_t arg_o); 1976 1977 static 1978 int 1979 hammer2_vop_strategy(struct vop_strategy_args *ap) 1980 { 1981 struct bio *biop; 1982 struct buf *bp; 1983 int error; 1984 1985 biop = ap->a_bio; 1986 bp = biop->bio_buf; 1987 1988 switch(bp->b_cmd) { 1989 case BUF_CMD_READ: 1990 error = hammer2_strategy_read(ap); 1991 ++hammer2_iod_file_read; 1992 break; 1993 case BUF_CMD_WRITE: 1994 error = hammer2_strategy_write(ap); 1995 ++hammer2_iod_file_write; 1996 break; 1997 default: 1998 bp->b_error = error = EINVAL; 1999 bp->b_flags |= B_ERROR; 2000 biodone(biop); 2001 break; 2002 } 2003 2004 return (error); 2005 } 2006 2007 static 2008 int 2009 hammer2_strategy_read(struct vop_strategy_args *ap) 2010 { 2011 struct buf *bp; 2012 struct bio *bio; 2013 struct bio *nbio; 2014 hammer2_inode_t *ip; 2015 hammer2_cluster_t *cparent; 2016 hammer2_cluster_t *cluster; 2017 hammer2_key_t key_dummy; 2018 hammer2_key_t lbase; 2019 int ddflag; 2020 uint8_t btype; 2021 2022 bio = ap->a_bio; 2023 bp = bio->bio_buf; 2024 ip = VTOI(ap->a_vp); 2025 nbio = push_bio(bio); 2026 2027 lbase = bio->bio_offset; 2028 KKASSERT(((int)lbase & HAMMER2_PBUFMASK) == 0); 2029 2030 cparent = hammer2_inode_lock_sh(ip); 2031 cluster = hammer2_cluster_lookup(cparent, &key_dummy, 2032 lbase, lbase, 2033 HAMMER2_LOOKUP_NODATA | 2034 HAMMER2_LOOKUP_SHARED, 2035 &ddflag); 2036 hammer2_inode_unlock_sh(ip, cparent); 2037 2038 /* 2039 * Data is zero-fill if no cluster could be found 2040 * (XXX or EIO on a cluster failure). 2041 */ 2042 if (cluster == NULL) { 2043 bp->b_resid = 0; 2044 bp->b_error = 0; 2045 bzero(bp->b_data, bp->b_bcount); 2046 biodone(nbio); 2047 return(0); 2048 } 2049 2050 /* 2051 * Cluster elements must be type INODE or type DATA, but the 2052 * compression mode (or not) for DATA chains can be different for 2053 * each chain. This will be handled by the callback. 2054 */ 2055 btype = hammer2_cluster_type(cluster); 2056 if (btype != HAMMER2_BREF_TYPE_INODE && 2057 btype != HAMMER2_BREF_TYPE_DATA) { 2058 panic("READ PATH: hammer2_strategy_read: unknown bref type"); 2059 } 2060 hammer2_chain_load_async(cluster, hammer2_strategy_read_callback, nbio); 2061 return(0); 2062 } 2063 2064 /* 2065 * Read callback for block that is not compressed. 2066 */ 2067 static 2068 void 2069 hammer2_strategy_read_callback(hammer2_io_t *dio, 2070 hammer2_cluster_t *cluster, 2071 hammer2_chain_t *chain, 2072 void *arg_p, off_t arg_o) 2073 { 2074 struct bio *bio = arg_p; 2075 struct buf *bp = bio->bio_buf; 2076 char *data; 2077 int i; 2078 2079 /* 2080 * Extract data and handle iteration on I/O failure. arg_o is the 2081 * cluster index for iteration. 2082 */ 2083 if (dio) { 2084 if (dio->bp->b_flags & B_ERROR) { 2085 i = (int)arg_o + 1; 2086 if (i >= cluster->nchains) { 2087 bp->b_flags |= B_ERROR; 2088 bp->b_error = dio->bp->b_error; 2089 biodone(bio); 2090 hammer2_cluster_unlock(cluster); 2091 } else { 2092 chain = cluster->array[i]; 2093 kprintf("hammer2: IO CHAIN-%d %p\n", i, chain); 2094 hammer2_adjreadcounter(&chain->bref, 2095 chain->bytes); 2096 hammer2_io_breadcb(chain->hmp, 2097 chain->bref.data_off, 2098 chain->bytes, 2099 hammer2_strategy_read_callback, 2100 cluster, chain, 2101 arg_p, (off_t)i); 2102 } 2103 return; 2104 } 2105 data = hammer2_io_data(dio, chain->bref.data_off); 2106 } else { 2107 data = (void *)chain->data; 2108 } 2109 2110 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 2111 /* 2112 * Data is embedded in the inode (copy from inode). 2113 */ 2114 bcopy(((hammer2_inode_data_t *)data)->u.data, 2115 bp->b_data, HAMMER2_EMBEDDED_BYTES); 2116 bzero(bp->b_data + HAMMER2_EMBEDDED_BYTES, 2117 bp->b_bcount - HAMMER2_EMBEDDED_BYTES); 2118 bp->b_resid = 0; 2119 bp->b_error = 0; 2120 } else if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) { 2121 /* 2122 * Data is on-media, issue device I/O and copy. 2123 * 2124 * XXX direct-IO shortcut could go here XXX. 2125 */ 2126 switch (HAMMER2_DEC_COMP(chain->bref.methods)) { 2127 case HAMMER2_COMP_LZ4: 2128 hammer2_decompress_LZ4_callback(data, chain->bytes, 2129 bio); 2130 break; 2131 case HAMMER2_COMP_ZLIB: 2132 hammer2_decompress_ZLIB_callback(data, chain->bytes, 2133 bio); 2134 break; 2135 case HAMMER2_COMP_NONE: 2136 KKASSERT(chain->bytes <= bp->b_bcount); 2137 bcopy(data, bp->b_data, chain->bytes); 2138 if (chain->bytes < bp->b_bcount) { 2139 bzero(bp->b_data + chain->bytes, 2140 bp->b_bcount - chain->bytes); 2141 } 2142 bp->b_flags |= B_NOTMETA; 2143 bp->b_resid = 0; 2144 bp->b_error = 0; 2145 break; 2146 default: 2147 panic("hammer2_strategy_read: " 2148 "unknown compression type"); 2149 } 2150 } else { 2151 /* bqrelse the dio to help stabilize the call to panic() */ 2152 if (dio) 2153 hammer2_io_bqrelse(&dio); 2154 panic("hammer2_strategy_read: unknown bref type"); 2155 } 2156 hammer2_cluster_unlock(cluster); 2157 biodone(bio); 2158 } 2159 2160 static 2161 int 2162 hammer2_strategy_write(struct vop_strategy_args *ap) 2163 { 2164 hammer2_pfsmount_t *pmp; 2165 struct bio *bio; 2166 struct buf *bp; 2167 hammer2_inode_t *ip; 2168 2169 bio = ap->a_bio; 2170 bp = bio->bio_buf; 2171 ip = VTOI(ap->a_vp); 2172 pmp = ip->pmp; 2173 2174 hammer2_lwinprog_ref(pmp); 2175 mtx_lock(&pmp->wthread_mtx); 2176 if (TAILQ_EMPTY(&pmp->wthread_bioq.queue)) { 2177 bioq_insert_tail(&pmp->wthread_bioq, ap->a_bio); 2178 mtx_unlock(&pmp->wthread_mtx); 2179 wakeup(&pmp->wthread_bioq); 2180 } else { 2181 bioq_insert_tail(&pmp->wthread_bioq, ap->a_bio); 2182 mtx_unlock(&pmp->wthread_mtx); 2183 } 2184 hammer2_lwinprog_wait(pmp); 2185 2186 return(0); 2187 } 2188 2189 /* 2190 * hammer2_vop_ioctl { vp, command, data, fflag, cred } 2191 */ 2192 static 2193 int 2194 hammer2_vop_ioctl(struct vop_ioctl_args *ap) 2195 { 2196 hammer2_inode_t *ip; 2197 int error; 2198 2199 ip = VTOI(ap->a_vp); 2200 2201 error = hammer2_ioctl(ip, ap->a_command, (void *)ap->a_data, 2202 ap->a_fflag, ap->a_cred); 2203 return (error); 2204 } 2205 2206 static 2207 int 2208 hammer2_vop_mountctl(struct vop_mountctl_args *ap) 2209 { 2210 struct mount *mp; 2211 hammer2_pfsmount_t *pmp; 2212 int rc; 2213 2214 switch (ap->a_op) { 2215 case (MOUNTCTL_SET_EXPORT): 2216 mp = ap->a_head.a_ops->head.vv_mount; 2217 pmp = MPTOPMP(mp); 2218 2219 if (ap->a_ctllen != sizeof(struct export_args)) 2220 rc = (EINVAL); 2221 else 2222 rc = vfs_export(mp, &pmp->export, 2223 (const struct export_args *)ap->a_ctl); 2224 break; 2225 default: 2226 rc = vop_stdmountctl(ap); 2227 break; 2228 } 2229 return (rc); 2230 } 2231 2232 /* 2233 * This handles unlinked open files after the cnode is finally dereferenced. 2234 */ 2235 void 2236 hammer2_run_unlinkq(hammer2_trans_t *trans, hammer2_pfsmount_t *pmp) 2237 { 2238 hammer2_inode_unlink_t *ipul; 2239 hammer2_inode_t *ip; 2240 hammer2_cluster_t *cluster; 2241 2242 if (TAILQ_EMPTY(&pmp->unlinkq)) 2243 return; 2244 2245 spin_lock(&pmp->unlinkq_spin); 2246 while ((ipul = TAILQ_FIRST(&pmp->unlinkq)) != NULL) { 2247 TAILQ_REMOVE(&pmp->unlinkq, ipul, entry); 2248 spin_unlock(&pmp->unlinkq_spin); 2249 ip = ipul->ip; 2250 kfree(ipul, pmp->minode); 2251 2252 cluster = hammer2_inode_lock_ex(ip); 2253 KKASSERT(cluster->focus->flags & HAMMER2_CHAIN_UNLINKED); 2254 kprintf("hammer2: unlink on reclaim: %s\n", 2255 cluster->focus->data->ipdata.filename); 2256 hammer2_cluster_delete(trans, cluster, 0); 2257 hammer2_inode_unlock_ex(ip, cluster); /* inode lock */ 2258 hammer2_inode_drop(ip); /* ipul ref */ 2259 2260 spin_lock(&pmp->unlinkq_spin); 2261 } 2262 spin_unlock(&pmp->unlinkq_spin); 2263 } 2264 2265 2266 /* 2267 * KQFILTER 2268 */ 2269 static void filt_hammer2detach(struct knote *kn); 2270 static int filt_hammer2read(struct knote *kn, long hint); 2271 static int filt_hammer2write(struct knote *kn, long hint); 2272 static int filt_hammer2vnode(struct knote *kn, long hint); 2273 2274 static struct filterops hammer2read_filtops = 2275 { FILTEROP_ISFD | FILTEROP_MPSAFE, 2276 NULL, filt_hammer2detach, filt_hammer2read }; 2277 static struct filterops hammer2write_filtops = 2278 { FILTEROP_ISFD | FILTEROP_MPSAFE, 2279 NULL, filt_hammer2detach, filt_hammer2write }; 2280 static struct filterops hammer2vnode_filtops = 2281 { FILTEROP_ISFD | FILTEROP_MPSAFE, 2282 NULL, filt_hammer2detach, filt_hammer2vnode }; 2283 2284 static 2285 int 2286 hammer2_vop_kqfilter(struct vop_kqfilter_args *ap) 2287 { 2288 struct vnode *vp = ap->a_vp; 2289 struct knote *kn = ap->a_kn; 2290 2291 switch (kn->kn_filter) { 2292 case EVFILT_READ: 2293 kn->kn_fop = &hammer2read_filtops; 2294 break; 2295 case EVFILT_WRITE: 2296 kn->kn_fop = &hammer2write_filtops; 2297 break; 2298 case EVFILT_VNODE: 2299 kn->kn_fop = &hammer2vnode_filtops; 2300 break; 2301 default: 2302 return (EOPNOTSUPP); 2303 } 2304 2305 kn->kn_hook = (caddr_t)vp; 2306 2307 knote_insert(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn); 2308 2309 return(0); 2310 } 2311 2312 static void 2313 filt_hammer2detach(struct knote *kn) 2314 { 2315 struct vnode *vp = (void *)kn->kn_hook; 2316 2317 knote_remove(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn); 2318 } 2319 2320 static int 2321 filt_hammer2read(struct knote *kn, long hint) 2322 { 2323 struct vnode *vp = (void *)kn->kn_hook; 2324 hammer2_inode_t *ip = VTOI(vp); 2325 off_t off; 2326 2327 if (hint == NOTE_REVOKE) { 2328 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT); 2329 return(1); 2330 } 2331 off = ip->size - kn->kn_fp->f_offset; 2332 kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX; 2333 if (kn->kn_sfflags & NOTE_OLDAPI) 2334 return(1); 2335 return (kn->kn_data != 0); 2336 } 2337 2338 2339 static int 2340 filt_hammer2write(struct knote *kn, long hint) 2341 { 2342 if (hint == NOTE_REVOKE) 2343 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT); 2344 kn->kn_data = 0; 2345 return (1); 2346 } 2347 2348 static int 2349 filt_hammer2vnode(struct knote *kn, long hint) 2350 { 2351 if (kn->kn_sfflags & hint) 2352 kn->kn_fflags |= hint; 2353 if (hint == NOTE_REVOKE) { 2354 kn->kn_flags |= (EV_EOF | EV_NODATA); 2355 return (1); 2356 } 2357 return (kn->kn_fflags != 0); 2358 } 2359 2360 /* 2361 * FIFO VOPS 2362 */ 2363 static 2364 int 2365 hammer2_vop_markatime(struct vop_markatime_args *ap) 2366 { 2367 hammer2_inode_t *ip; 2368 struct vnode *vp; 2369 2370 vp = ap->a_vp; 2371 ip = VTOI(vp); 2372 2373 if (ip->pmp->ronly) 2374 return(EROFS); 2375 return(0); 2376 } 2377 2378 static 2379 int 2380 hammer2_vop_fifokqfilter(struct vop_kqfilter_args *ap) 2381 { 2382 int error; 2383 2384 error = VOCALL(&fifo_vnode_vops, &ap->a_head); 2385 if (error) 2386 error = hammer2_vop_kqfilter(ap); 2387 return(error); 2388 } 2389 2390 /* 2391 * VOPS vector 2392 */ 2393 struct vop_ops hammer2_vnode_vops = { 2394 .vop_default = vop_defaultop, 2395 .vop_fsync = hammer2_vop_fsync, 2396 .vop_getpages = vop_stdgetpages, 2397 .vop_putpages = vop_stdputpages, 2398 .vop_access = hammer2_vop_access, 2399 .vop_advlock = hammer2_vop_advlock, 2400 .vop_close = hammer2_vop_close, 2401 .vop_nlink = hammer2_vop_nlink, 2402 .vop_ncreate = hammer2_vop_ncreate, 2403 .vop_nsymlink = hammer2_vop_nsymlink, 2404 .vop_nremove = hammer2_vop_nremove, 2405 .vop_nrmdir = hammer2_vop_nrmdir, 2406 .vop_nrename = hammer2_vop_nrename, 2407 .vop_getattr = hammer2_vop_getattr, 2408 .vop_setattr = hammer2_vop_setattr, 2409 .vop_readdir = hammer2_vop_readdir, 2410 .vop_readlink = hammer2_vop_readlink, 2411 .vop_getpages = vop_stdgetpages, 2412 .vop_putpages = vop_stdputpages, 2413 .vop_read = hammer2_vop_read, 2414 .vop_write = hammer2_vop_write, 2415 .vop_open = hammer2_vop_open, 2416 .vop_inactive = hammer2_vop_inactive, 2417 .vop_reclaim = hammer2_vop_reclaim, 2418 .vop_nresolve = hammer2_vop_nresolve, 2419 .vop_nlookupdotdot = hammer2_vop_nlookupdotdot, 2420 .vop_nmkdir = hammer2_vop_nmkdir, 2421 .vop_nmknod = hammer2_vop_nmknod, 2422 .vop_ioctl = hammer2_vop_ioctl, 2423 .vop_mountctl = hammer2_vop_mountctl, 2424 .vop_bmap = hammer2_vop_bmap, 2425 .vop_strategy = hammer2_vop_strategy, 2426 .vop_kqfilter = hammer2_vop_kqfilter 2427 }; 2428 2429 struct vop_ops hammer2_spec_vops = { 2430 .vop_default = vop_defaultop, 2431 .vop_fsync = hammer2_vop_fsync, 2432 .vop_read = vop_stdnoread, 2433 .vop_write = vop_stdnowrite, 2434 .vop_access = hammer2_vop_access, 2435 .vop_close = hammer2_vop_close, 2436 .vop_markatime = hammer2_vop_markatime, 2437 .vop_getattr = hammer2_vop_getattr, 2438 .vop_inactive = hammer2_vop_inactive, 2439 .vop_reclaim = hammer2_vop_reclaim, 2440 .vop_setattr = hammer2_vop_setattr 2441 }; 2442 2443 struct vop_ops hammer2_fifo_vops = { 2444 .vop_default = fifo_vnoperate, 2445 .vop_fsync = hammer2_vop_fsync, 2446 #if 0 2447 .vop_read = hammer2_vop_fiforead, 2448 .vop_write = hammer2_vop_fifowrite, 2449 #endif 2450 .vop_access = hammer2_vop_access, 2451 #if 0 2452 .vop_close = hammer2_vop_fifoclose, 2453 #endif 2454 .vop_markatime = hammer2_vop_markatime, 2455 .vop_getattr = hammer2_vop_getattr, 2456 .vop_inactive = hammer2_vop_inactive, 2457 .vop_reclaim = hammer2_vop_reclaim, 2458 .vop_setattr = hammer2_vop_setattr, 2459 .vop_kqfilter = hammer2_vop_fifokqfilter 2460 }; 2461 2462