1 /* 2 * Copyright (c) 2011-2013 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 3. Neither the name of The DragonFly Project nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific, prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 /* 36 * Kernel Filesystem interface 37 * 38 * NOTE! local ipdata pointers must be reloaded on any modifying operation 39 * to the inode as its underlying chain may have changed. 40 */ 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/kernel.h> 45 #include <sys/fcntl.h> 46 #include <sys/buf.h> 47 #include <sys/proc.h> 48 #include <sys/namei.h> 49 #include <sys/mount.h> 50 #include <sys/vnode.h> 51 #include <sys/mountctl.h> 52 #include <sys/dirent.h> 53 #include <sys/uio.h> 54 55 #include "hammer2.h" 56 57 #define ZFOFFSET (-2LL) 58 59 static int hammer2_read_file(hammer2_inode_t *ip, struct uio *uio, 60 int seqcount); 61 static int hammer2_write_file(hammer2_trans_t *trans, hammer2_inode_t *ip, 62 hammer2_chain_t **parentp, 63 struct uio *uio, int ioflag, int seqcount); 64 static void hammer2_write_bp(hammer2_chain_t *chain, struct buf *bp, 65 int ioflag); 66 static hammer2_chain_t *hammer2_assign_physical(hammer2_trans_t *trans, 67 hammer2_inode_t *ip, hammer2_chain_t **parentp, 68 hammer2_key_t lbase, int lblksize, 69 int *errorp); 70 static void hammer2_extend_file(hammer2_trans_t *trans, hammer2_inode_t *ip, 71 hammer2_chain_t **parentp, hammer2_key_t nsize); 72 static void hammer2_truncate_file(hammer2_trans_t *trans, hammer2_inode_t *ip, 73 hammer2_chain_t **parentp, hammer2_key_t nsize); 74 75 static __inline 76 void 77 hammer2_knote(struct vnode *vp, int flags) 78 { 79 if (flags) 80 KNOTE(&vp->v_pollinfo.vpi_kqinfo.ki_note, flags); 81 } 82 83 /* 84 * Last reference to a vnode is going away but it is still cached. 85 */ 86 static 87 int 88 hammer2_vop_inactive(struct vop_inactive_args *ap) 89 { 90 hammer2_inode_t *ip; 91 hammer2_chain_t *parent; 92 struct vnode *vp; 93 94 vp = ap->a_vp; 95 ip = VTOI(vp); 96 97 /* 98 * Degenerate case 99 */ 100 if (ip == NULL) { 101 vrecycle(vp); 102 return (0); 103 } 104 105 /* 106 * Detect updates to the embedded data which may be synchronized by 107 * the strategy code. Simply mark the inode modified so it gets 108 * picked up by our normal flush. 109 */ 110 parent = hammer2_inode_lock_ex(ip); 111 KKASSERT(parent); 112 113 /* 114 * Check for deleted inodes and recycle immediately. 115 */ 116 if (parent->flags & HAMMER2_CHAIN_DELETED) { 117 hammer2_inode_unlock_ex(ip, parent); 118 vrecycle(vp); 119 } else { 120 hammer2_inode_unlock_ex(ip, parent); 121 } 122 return (0); 123 } 124 125 /* 126 * Reclaim a vnode so that it can be reused; after the inode is 127 * disassociated, the filesystem must manage it alone. 128 */ 129 static 130 int 131 hammer2_vop_reclaim(struct vop_reclaim_args *ap) 132 { 133 hammer2_chain_t *chain; 134 hammer2_inode_t *ip; 135 #if 0 136 hammer2_trans_t trans; 137 #endif 138 struct vnode *vp; 139 140 vp = ap->a_vp; 141 ip = VTOI(vp); 142 if (ip == NULL) 143 return(0); 144 145 /* 146 * Set SUBMODIFIED so we can detect and propagate the DESTROYED 147 * bit in the flush code. 148 * 149 * ip->chain might be stale, correct it before checking as older 150 * versions of the chain are likely marked deleted even if the 151 * file hasn't been. XXX ip->chain should never be stale on 152 * reclaim. 153 */ 154 chain = hammer2_inode_lock_ex(ip); 155 #if 0 156 if (chain->next_parent) 157 kprintf("RECLAIM DUPLINKED IP: %p ip->ch=%p ch=%p np=%p\n", 158 ip, ip->chain, chain, chain->next_parent); 159 #endif 160 161 /* 162 * The final close of a deleted file or directory marks it for 163 * destruction. The DESTROYED flag allows the flusher to shortcut 164 * any modified blocks still unflushed (that is, just ignore them). 165 * 166 * HAMMER2 usually does not try to optimize the freemap by returning 167 * deleted blocks to it as it does not usually know how many snapshots 168 * might be referencing portions of the file/dir. XXX TODO. 169 * 170 * XXX TODO - However, any modified file as-of when a snapshot is made 171 * cannot use this optimization as some of the modifications 172 * may wind up being part of the snapshot. 173 */ 174 vp->v_data = NULL; 175 ip->vp = NULL; 176 if (chain->flags & HAMMER2_CHAIN_DELETED) { 177 KKASSERT(chain->flags & HAMMER2_CHAIN_DELETED); 178 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROYED | 179 HAMMER2_CHAIN_SUBMODIFIED); 180 } 181 #if 0 182 /* 183 * XXX chains will be flushed on sync, no need to do it here. 184 */ 185 if (chain->flags & (HAMMER2_CHAIN_MODIFIED | 186 HAMMER2_CHAIN_DELETED | 187 HAMMER2_CHAIN_SUBMODIFIED)) { 188 hammer2_trans_init(&trans, ip->pmp, HAMMER2_TRANS_ISFLUSH); 189 hammer2_chain_flush(&trans, chain); 190 hammer2_trans_done(&trans); 191 } 192 #endif 193 hammer2_inode_unlock_ex(ip, chain); /* unlock */ 194 hammer2_inode_drop(ip); /* vp ref */ 195 /* chain no longer referenced */ 196 /* chain = NULL; not needed */ 197 198 /* 199 * XXX handle background sync when ip dirty, kernel will no longer 200 * notify us regarding this inode because there is no longer a 201 * vnode attached to it. 202 */ 203 204 return (0); 205 } 206 207 static 208 int 209 hammer2_vop_fsync(struct vop_fsync_args *ap) 210 { 211 hammer2_inode_t *ip; 212 hammer2_trans_t trans; 213 hammer2_chain_t *chain; 214 struct vnode *vp; 215 216 vp = ap->a_vp; 217 ip = VTOI(vp); 218 219 hammer2_trans_init(&trans, ip->pmp, HAMMER2_TRANS_ISFLUSH); 220 chain = hammer2_inode_lock_ex(ip); 221 222 vfsync(vp, ap->a_waitfor, 1, NULL, NULL); 223 224 /* 225 * Calling chain_flush here creates a lot of duplicative 226 * COW operations due to non-optimal vnode ordering. 227 * 228 * Only do it for an actual fsync() syscall. The other forms 229 * which call this function will eventually call chain_flush 230 * on the volume root as a catch-all, which is far more optimal. 231 */ 232 atomic_clear_int(&ip->flags, HAMMER2_INODE_MODIFIED); 233 if (ap->a_flags & VOP_FSYNC_SYSCALL) { 234 hammer2_chain_flush(&trans, chain); 235 } 236 hammer2_inode_unlock_ex(ip, chain); 237 hammer2_trans_done(&trans); 238 239 return (0); 240 } 241 242 static 243 int 244 hammer2_vop_access(struct vop_access_args *ap) 245 { 246 hammer2_inode_t *ip = VTOI(ap->a_vp); 247 hammer2_inode_data_t *ipdata; 248 hammer2_chain_t *chain; 249 uid_t uid; 250 gid_t gid; 251 int error; 252 253 chain = hammer2_inode_lock_sh(ip); 254 ipdata = &chain->data->ipdata; 255 uid = hammer2_to_unix_xid(&ipdata->uid); 256 gid = hammer2_to_unix_xid(&ipdata->gid); 257 error = vop_helper_access(ap, uid, gid, ipdata->mode, ipdata->uflags); 258 hammer2_inode_unlock_sh(ip, chain); 259 260 return (error); 261 } 262 263 static 264 int 265 hammer2_vop_getattr(struct vop_getattr_args *ap) 266 { 267 hammer2_inode_data_t *ipdata; 268 hammer2_chain_t *chain; 269 hammer2_pfsmount_t *pmp; 270 hammer2_inode_t *ip; 271 struct vnode *vp; 272 struct vattr *vap; 273 274 vp = ap->a_vp; 275 vap = ap->a_vap; 276 277 ip = VTOI(vp); 278 pmp = ip->pmp; 279 280 chain = hammer2_inode_lock_sh(ip); 281 ipdata = &chain->data->ipdata; 282 283 vap->va_fsid = pmp->mp->mnt_stat.f_fsid.val[0]; 284 vap->va_fileid = ipdata->inum; 285 vap->va_mode = ipdata->mode; 286 vap->va_nlink = ipdata->nlinks; 287 vap->va_uid = hammer2_to_unix_xid(&ipdata->uid); 288 vap->va_gid = hammer2_to_unix_xid(&ipdata->gid); 289 vap->va_rmajor = 0; 290 vap->va_rminor = 0; 291 vap->va_size = ipdata->size; 292 vap->va_blocksize = HAMMER2_PBUFSIZE; 293 vap->va_flags = ipdata->uflags; 294 hammer2_time_to_timespec(ipdata->ctime, &vap->va_ctime); 295 hammer2_time_to_timespec(ipdata->mtime, &vap->va_mtime); 296 hammer2_time_to_timespec(ipdata->mtime, &vap->va_atime); 297 vap->va_gen = 1; 298 vap->va_bytes = vap->va_size; /* XXX */ 299 vap->va_type = hammer2_get_vtype(chain); 300 vap->va_filerev = 0; 301 vap->va_uid_uuid = ipdata->uid; 302 vap->va_gid_uuid = ipdata->gid; 303 vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID | 304 VA_FSID_UUID_VALID; 305 306 hammer2_inode_unlock_sh(ip, chain); 307 308 return (0); 309 } 310 311 static 312 int 313 hammer2_vop_setattr(struct vop_setattr_args *ap) 314 { 315 hammer2_inode_data_t *ipdata; 316 hammer2_inode_t *ip; 317 hammer2_chain_t *chain; 318 hammer2_trans_t trans; 319 struct vnode *vp; 320 struct vattr *vap; 321 int error; 322 int kflags = 0; 323 int domtime = 0; 324 uint64_t ctime; 325 326 vp = ap->a_vp; 327 vap = ap->a_vap; 328 hammer2_update_time(&ctime); 329 330 ip = VTOI(vp); 331 332 if (ip->pmp->ronly) 333 return(EROFS); 334 335 hammer2_trans_init(&trans, ip->pmp, 0); 336 chain = hammer2_inode_lock_ex(ip); 337 ipdata = &chain->data->ipdata; 338 error = 0; 339 340 if (vap->va_flags != VNOVAL) { 341 u_int32_t flags; 342 343 flags = ipdata->uflags; 344 error = vop_helper_setattr_flags(&flags, vap->va_flags, 345 hammer2_to_unix_xid(&ipdata->uid), 346 ap->a_cred); 347 if (error == 0) { 348 if (ipdata->uflags != flags) { 349 ipdata = hammer2_chain_modify_ip(&trans, ip, 350 &chain, 0); 351 ipdata->uflags = flags; 352 ipdata->ctime = ctime; 353 kflags |= NOTE_ATTRIB; 354 } 355 if (ipdata->uflags & (IMMUTABLE | APPEND)) { 356 error = 0; 357 goto done; 358 } 359 } 360 goto done; 361 } 362 if (ipdata->uflags & (IMMUTABLE | APPEND)) { 363 error = EPERM; 364 goto done; 365 } 366 if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { 367 mode_t cur_mode = ipdata->mode; 368 uid_t cur_uid = hammer2_to_unix_xid(&ipdata->uid); 369 gid_t cur_gid = hammer2_to_unix_xid(&ipdata->gid); 370 uuid_t uuid_uid; 371 uuid_t uuid_gid; 372 373 error = vop_helper_chown(ap->a_vp, vap->va_uid, vap->va_gid, 374 ap->a_cred, 375 &cur_uid, &cur_gid, &cur_mode); 376 if (error == 0) { 377 hammer2_guid_to_uuid(&uuid_uid, cur_uid); 378 hammer2_guid_to_uuid(&uuid_gid, cur_gid); 379 if (bcmp(&uuid_uid, &ipdata->uid, sizeof(uuid_uid)) || 380 bcmp(&uuid_gid, &ipdata->gid, sizeof(uuid_gid)) || 381 ipdata->mode != cur_mode 382 ) { 383 ipdata = hammer2_chain_modify_ip(&trans, ip, 384 &chain, 0); 385 ipdata->uid = uuid_uid; 386 ipdata->gid = uuid_gid; 387 ipdata->mode = cur_mode; 388 ipdata->ctime = ctime; 389 } 390 kflags |= NOTE_ATTRIB; 391 } 392 } 393 394 /* 395 * Resize the file 396 */ 397 if (vap->va_size != VNOVAL && ipdata->size != vap->va_size) { 398 switch(vp->v_type) { 399 case VREG: 400 if (vap->va_size == ipdata->size) 401 break; 402 if (vap->va_size < ipdata->size) { 403 hammer2_truncate_file(&trans, ip, 404 &chain, vap->va_size); 405 } else { 406 hammer2_extend_file(&trans, ip, 407 &chain, vap->va_size); 408 } 409 ipdata = &chain->data->ipdata; /* RELOAD */ 410 domtime = 1; 411 break; 412 default: 413 error = EINVAL; 414 goto done; 415 } 416 } 417 #if 0 418 /* atime not supported */ 419 if (vap->va_atime.tv_sec != VNOVAL) { 420 ipdata = hammer2_chain_modify_ip(&trans, ip, &chain, 0); 421 ipdata->atime = hammer2_timespec_to_time(&vap->va_atime); 422 kflags |= NOTE_ATTRIB; 423 } 424 #endif 425 if (vap->va_mtime.tv_sec != VNOVAL) { 426 ipdata = hammer2_chain_modify_ip(&trans, ip, &chain, 0); 427 ipdata->mtime = hammer2_timespec_to_time(&vap->va_mtime); 428 kflags |= NOTE_ATTRIB; 429 } 430 if (vap->va_mode != (mode_t)VNOVAL) { 431 mode_t cur_mode = ipdata->mode; 432 uid_t cur_uid = hammer2_to_unix_xid(&ipdata->uid); 433 gid_t cur_gid = hammer2_to_unix_xid(&ipdata->gid); 434 435 error = vop_helper_chmod(ap->a_vp, vap->va_mode, ap->a_cred, 436 cur_uid, cur_gid, &cur_mode); 437 if (error == 0 && ipdata->mode != cur_mode) { 438 ipdata = hammer2_chain_modify_ip(&trans, ip, &chain, 0); 439 ipdata->mode = cur_mode; 440 ipdata->ctime = ctime; 441 kflags |= NOTE_ATTRIB; 442 } 443 } 444 done: 445 hammer2_inode_unlock_ex(ip, chain); 446 hammer2_trans_done(&trans); 447 return (error); 448 } 449 450 static 451 int 452 hammer2_vop_readdir(struct vop_readdir_args *ap) 453 { 454 hammer2_inode_data_t *ipdata; 455 hammer2_inode_t *ip; 456 hammer2_inode_t *xip; 457 hammer2_chain_t *parent; 458 hammer2_chain_t *chain; 459 hammer2_chain_t *xchain; 460 hammer2_tid_t inum; 461 hammer2_key_t lkey; 462 struct uio *uio; 463 off_t *cookies; 464 off_t saveoff; 465 int cookie_index; 466 int ncookies; 467 int error; 468 int dtype; 469 int r; 470 471 ip = VTOI(ap->a_vp); 472 uio = ap->a_uio; 473 saveoff = uio->uio_offset; 474 475 /* 476 * Setup cookies directory entry cookies if requested 477 */ 478 if (ap->a_ncookies) { 479 ncookies = uio->uio_resid / 16 + 1; 480 if (ncookies > 1024) 481 ncookies = 1024; 482 cookies = kmalloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK); 483 } else { 484 ncookies = -1; 485 cookies = NULL; 486 } 487 cookie_index = 0; 488 489 parent = hammer2_inode_lock_sh(ip); 490 ipdata = &parent->data->ipdata; 491 492 /* 493 * Handle artificial entries. To ensure that only positive 64 bit 494 * quantities are returned to userland we always strip off bit 63. 495 * The hash code is designed such that codes 0x0000-0x7FFF are not 496 * used, allowing us to use these codes for articial entries. 497 * 498 * Entry 0 is used for '.' and entry 1 is used for '..'. Do not 499 * allow '..' to cross the mount point into (e.g.) the super-root. 500 */ 501 error = 0; 502 chain = (void *)(intptr_t)-1; /* non-NULL for early goto done case */ 503 504 if (saveoff == 0) { 505 inum = ipdata->inum & HAMMER2_DIRHASH_USERMSK; 506 r = vop_write_dirent(&error, uio, inum, DT_DIR, 1, "."); 507 if (r) 508 goto done; 509 if (cookies) 510 cookies[cookie_index] = saveoff; 511 ++saveoff; 512 ++cookie_index; 513 if (cookie_index == ncookies) 514 goto done; 515 } 516 517 if (saveoff == 1) { 518 /* 519 * Be careful with lockorder when accessing ".." 520 * 521 * (ip is the current dir. xip is the parent dir). 522 */ 523 inum = ipdata->inum & HAMMER2_DIRHASH_USERMSK; 524 while (ip->pip != NULL && ip != ip->pmp->iroot) { 525 xip = ip->pip; 526 hammer2_inode_ref(xip); 527 hammer2_inode_unlock_sh(ip, parent); 528 xchain = hammer2_inode_lock_sh(xip); 529 parent = hammer2_inode_lock_sh(ip); 530 hammer2_inode_drop(xip); 531 if (xip == ip->pip) { 532 inum = xchain->data->ipdata.inum & 533 HAMMER2_DIRHASH_USERMSK; 534 hammer2_inode_unlock_sh(xip, xchain); 535 break; 536 } 537 hammer2_inode_unlock_sh(xip, xchain); 538 } 539 r = vop_write_dirent(&error, uio, inum, DT_DIR, 2, ".."); 540 if (r) 541 goto done; 542 if (cookies) 543 cookies[cookie_index] = saveoff; 544 ++saveoff; 545 ++cookie_index; 546 if (cookie_index == ncookies) 547 goto done; 548 } 549 550 lkey = saveoff | HAMMER2_DIRHASH_VISIBLE; 551 552 /* 553 * parent is the inode chain, already locked for us. Don't 554 * double lock shared locks as this will screw up upgrades. 555 */ 556 if (error) { 557 goto done; 558 } 559 chain = hammer2_chain_lookup(&parent, lkey, lkey, 560 HAMMER2_LOOKUP_SHARED); 561 if (chain == NULL) { 562 chain = hammer2_chain_lookup(&parent, 563 lkey, (hammer2_key_t)-1, 564 HAMMER2_LOOKUP_SHARED); 565 } 566 while (chain) { 567 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 568 dtype = hammer2_get_dtype(chain); 569 saveoff = chain->bref.key & HAMMER2_DIRHASH_USERMSK; 570 r = vop_write_dirent(&error, uio, 571 chain->data->ipdata.inum & 572 HAMMER2_DIRHASH_USERMSK, 573 dtype, 574 chain->data->ipdata.name_len, 575 chain->data->ipdata.filename); 576 if (r) 577 break; 578 if (cookies) 579 cookies[cookie_index] = saveoff; 580 ++cookie_index; 581 } else { 582 /* XXX chain error */ 583 kprintf("bad chain type readdir %d\n", 584 chain->bref.type); 585 } 586 587 /* 588 * Keys may not be returned in order so once we have a 589 * placemarker (chain) the scan must allow the full range 590 * or some entries will be missed. 591 */ 592 chain = hammer2_chain_next(&parent, chain, 593 HAMMER2_DIRHASH_VISIBLE, 594 (hammer2_key_t)-1, 595 HAMMER2_LOOKUP_SHARED); 596 if (chain) { 597 saveoff = (chain->bref.key & 598 HAMMER2_DIRHASH_USERMSK) + 1; 599 } else { 600 saveoff = (hammer2_key_t)-1; 601 } 602 if (cookie_index == ncookies) 603 break; 604 } 605 if (chain) 606 hammer2_chain_unlock(chain); 607 done: 608 hammer2_inode_unlock_sh(ip, parent); 609 if (ap->a_eofflag) 610 *ap->a_eofflag = (chain == NULL); 611 uio->uio_offset = saveoff & ~HAMMER2_DIRHASH_VISIBLE; 612 if (error && cookie_index == 0) { 613 if (cookies) { 614 kfree(cookies, M_TEMP); 615 *ap->a_ncookies = 0; 616 *ap->a_cookies = NULL; 617 } 618 } else { 619 if (cookies) { 620 *ap->a_ncookies = cookie_index; 621 *ap->a_cookies = cookies; 622 } 623 } 624 return (error); 625 } 626 627 /* 628 * hammer2_vop_readlink { vp, uio, cred } 629 */ 630 static 631 int 632 hammer2_vop_readlink(struct vop_readlink_args *ap) 633 { 634 struct vnode *vp; 635 hammer2_inode_t *ip; 636 int error; 637 638 vp = ap->a_vp; 639 if (vp->v_type != VLNK) 640 return (EINVAL); 641 ip = VTOI(vp); 642 643 error = hammer2_read_file(ip, ap->a_uio, 0); 644 return (error); 645 } 646 647 static 648 int 649 hammer2_vop_read(struct vop_read_args *ap) 650 { 651 struct vnode *vp; 652 hammer2_inode_t *ip; 653 struct uio *uio; 654 int error; 655 int seqcount; 656 int bigread; 657 658 /* 659 * Read operations supported on this vnode? 660 */ 661 vp = ap->a_vp; 662 if (vp->v_type != VREG) 663 return (EINVAL); 664 665 /* 666 * Misc 667 */ 668 ip = VTOI(vp); 669 uio = ap->a_uio; 670 error = 0; 671 672 seqcount = ap->a_ioflag >> 16; 673 bigread = (uio->uio_resid > 100 * 1024 * 1024); 674 675 error = hammer2_read_file(ip, uio, seqcount); 676 return (error); 677 } 678 679 static 680 int 681 hammer2_vop_write(struct vop_write_args *ap) 682 { 683 hammer2_inode_t *ip; 684 hammer2_trans_t trans; 685 hammer2_chain_t *parent; 686 thread_t td; 687 struct vnode *vp; 688 struct uio *uio; 689 int error; 690 int seqcount; 691 int bigwrite; 692 693 /* 694 * Read operations supported on this vnode? 695 */ 696 vp = ap->a_vp; 697 if (vp->v_type != VREG) 698 return (EINVAL); 699 700 /* 701 * Misc 702 */ 703 ip = VTOI(vp); 704 uio = ap->a_uio; 705 error = 0; 706 if (ip->pmp->ronly) 707 return (EROFS); 708 709 seqcount = ap->a_ioflag >> 16; 710 bigwrite = (uio->uio_resid > 100 * 1024 * 1024); 711 712 /* 713 * Check resource limit 714 */ 715 if (uio->uio_resid > 0 && (td = uio->uio_td) != NULL && td->td_proc && 716 uio->uio_offset + uio->uio_resid > 717 td->td_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 718 lwpsignal(td->td_proc, td->td_lwp, SIGXFSZ); 719 return (EFBIG); 720 } 721 722 bigwrite = (uio->uio_resid > 100 * 1024 * 1024); 723 724 /* 725 * ip must be locked if extending the file. 726 * ip must be locked to avoid racing a truncation. 727 * 728 * ip must be marked modified, particularly because the write 729 * might wind up being copied into the embedded data area. 730 */ 731 hammer2_trans_init(&trans, ip->pmp, 0); 732 parent = hammer2_inode_lock_ex(ip); 733 error = hammer2_write_file(&trans, ip, &parent, 734 uio, ap->a_ioflag, seqcount); 735 hammer2_inode_unlock_ex(ip, parent); 736 hammer2_trans_done(&trans); 737 738 return (error); 739 } 740 741 /* 742 * Perform read operations on a file or symlink given an UNLOCKED 743 * inode and uio. 744 * 745 * The passed ip is not locked. 746 */ 747 static 748 int 749 hammer2_read_file(hammer2_inode_t *ip, struct uio *uio, int seqcount) 750 { 751 hammer2_off_t size; 752 hammer2_chain_t *parent; 753 struct buf *bp; 754 int error; 755 756 error = 0; 757 758 /* 759 * UIO read loop. 760 */ 761 parent = hammer2_inode_lock_sh(ip); 762 size = ip->chain->data->ipdata.size; 763 764 while (uio->uio_resid > 0 && uio->uio_offset < size) { 765 hammer2_key_t lbase; 766 hammer2_key_t leof; 767 int lblksize; 768 int loff; 769 int n; 770 771 lblksize = hammer2_calc_logical(ip, uio->uio_offset, 772 &lbase, &leof); 773 774 error = cluster_read(ip->vp, leof, lbase, lblksize, 775 uio->uio_resid, seqcount * BKVASIZE, 776 &bp); 777 778 if (error) 779 break; 780 loff = (int)(uio->uio_offset - lbase); 781 n = lblksize - loff; 782 if (n > uio->uio_resid) 783 n = uio->uio_resid; 784 if (n > size - uio->uio_offset) 785 n = (int)(size - uio->uio_offset); 786 bp->b_flags |= B_AGE; 787 uiomove((char *)bp->b_data + loff, n, uio); 788 bqrelse(bp); 789 } 790 hammer2_inode_unlock_sh(ip, parent); 791 return (error); 792 } 793 794 /* 795 * Called with a locked (ip) to do the underlying write to a file or 796 * to build the symlink target. 797 */ 798 static 799 int 800 hammer2_write_file(hammer2_trans_t *trans, hammer2_inode_t *ip, 801 hammer2_chain_t **parentp, 802 struct uio *uio, int ioflag, int seqcount) 803 { 804 hammer2_inode_data_t *ipdata; 805 hammer2_key_t old_eof; 806 struct buf *bp; 807 int kflags; 808 int error; 809 int modified = 0; 810 811 /* 812 * Setup if append 813 */ 814 ipdata = hammer2_chain_modify_ip(trans, ip, parentp, 0); 815 if (ioflag & IO_APPEND) 816 uio->uio_offset = ipdata->size; 817 kflags = 0; 818 error = 0; 819 820 /* 821 * vfs_sync visibility. Interlocked by the inode ex lock so we 822 * shouldn't have to reassert it multiple times if the ip->chain 823 * is modified/flushed multiple times during the write, except 824 * when we release/reacquire the inode ex lock. 825 */ 826 atomic_set_int(&ip->flags, HAMMER2_INODE_MODIFIED); 827 828 /* 829 * Extend the file if necessary. If the write fails at some point 830 * we will truncate it back down to cover as much as we were able 831 * to write. 832 * 833 * Doing this now makes it easier to calculate buffer sizes in 834 * the loop. 835 */ 836 KKASSERT(ipdata->type != HAMMER2_OBJTYPE_HARDLINK); 837 old_eof = ipdata->size; 838 if (uio->uio_offset + uio->uio_resid > ipdata->size) { 839 modified = 1; 840 hammer2_extend_file(trans, ip, parentp, 841 uio->uio_offset + uio->uio_resid); 842 ipdata = &ip->chain->data->ipdata; /* RELOAD */ 843 kflags |= NOTE_EXTEND; 844 } 845 KKASSERT(ipdata->type != HAMMER2_OBJTYPE_HARDLINK); 846 847 /* 848 * UIO write loop 849 */ 850 while (uio->uio_resid > 0) { 851 hammer2_chain_t *chain; 852 hammer2_key_t lbase; 853 hammer2_key_t leof; 854 int trivial; 855 int lblksize; 856 int loff; 857 int n; 858 859 /* 860 * Don't allow the buffer build to blow out the buffer 861 * cache. 862 */ 863 if ((ioflag & IO_RECURSE) == 0) { 864 /* 865 * XXX should try to leave this unlocked through 866 * the whole loop 867 */ 868 hammer2_inode_unlock_ex(ip, *parentp); 869 bwillwrite(HAMMER2_PBUFSIZE); 870 *parentp = hammer2_inode_lock_ex(ip); 871 atomic_set_int(&ip->flags, HAMMER2_INODE_MODIFIED); 872 ipdata = &ip->chain->data->ipdata; /* reload */ 873 } 874 875 /* XXX bigwrite & signal check test */ 876 877 /* 878 * This nominally tells us how much we can cluster and 879 * what the logical buffer size needs to be. Currently 880 * we don't try to cluster the write and just handle one 881 * block at a time. 882 */ 883 lblksize = hammer2_calc_logical(ip, uio->uio_offset, 884 &lbase, &leof); 885 loff = (int)(uio->uio_offset - lbase); 886 887 /* 888 * Calculate bytes to copy this transfer and whether the 889 * copy completely covers the buffer or not. 890 */ 891 trivial = 0; 892 n = lblksize - loff; 893 if (n > uio->uio_resid) { 894 n = uio->uio_resid; 895 if (loff == lbase && 896 uio->uio_offset + n == ipdata->size) 897 trivial = 1; 898 } else if (loff == 0) { 899 trivial = 1; 900 } 901 902 /* 903 * Get the buffer 904 */ 905 if (uio->uio_segflg == UIO_NOCOPY) { 906 /* 907 * Issuing a write with the same data backing the 908 * buffer. Instantiate the buffer to collect the 909 * backing vm pages, then read-in any missing bits. 910 * 911 * This case is used by vop_stdputpages(). 912 */ 913 bp = getblk(ip->vp, lbase, lblksize, GETBLK_BHEAVY, 0); 914 if ((bp->b_flags & B_CACHE) == 0) { 915 bqrelse(bp); 916 error = bread(ip->vp, lbase, lblksize, &bp); 917 } 918 } else if (trivial) { 919 /* 920 * Even though we are entirely overwriting the buffer 921 * we may still have to zero it out to avoid a 922 * mmap/write visibility issue. 923 */ 924 bp = getblk(ip->vp, lbase, lblksize, GETBLK_BHEAVY, 0); 925 if ((bp->b_flags & B_CACHE) == 0) 926 vfs_bio_clrbuf(bp); 927 } else { 928 /* 929 * Partial overwrite, read in any missing bits then 930 * replace the portion being written. 931 * 932 * (The strategy code will detect zero-fill physical 933 * blocks for this case). 934 */ 935 error = bread(ip->vp, lbase, lblksize, &bp); 936 if (error == 0) 937 bheavy(bp); 938 } 939 940 if (error) { 941 brelse(bp); 942 break; 943 } 944 945 /* 946 * Ok, copy the data in 947 */ 948 hammer2_inode_unlock_ex(ip, *parentp); 949 error = uiomove(bp->b_data + loff, n, uio); 950 *parentp = hammer2_inode_lock_ex(ip); 951 atomic_set_int(&ip->flags, HAMMER2_INODE_MODIFIED); 952 ipdata = &ip->chain->data->ipdata; /* reload */ 953 kflags |= NOTE_WRITE; 954 modified = 1; 955 if (error) { 956 brelse(bp); 957 break; 958 } 959 960 /* 961 * We have to assign physical storage to the buffer we intend 962 * to dirty or write now to avoid deadlocks in the strategy 963 * code later. 964 * 965 * This can return NOOFFSET for inode-embedded data. The 966 * strategy code will take care of it in that case. 967 */ 968 chain = hammer2_assign_physical(trans, ip, parentp, 969 lbase, lblksize, &error); 970 ipdata = &ip->chain->data->ipdata; /* RELOAD */ 971 972 if (error) { 973 KKASSERT(chain == NULL); 974 brelse(bp); 975 break; 976 } 977 978 /* XXX update ip_data.mtime */ 979 980 /* 981 * Once we dirty a buffer any cached offset becomes invalid. 982 * 983 * NOTE: For cluster_write() always use the trailing block 984 * size, which is HAMMER2_PBUFSIZE. lblksize is the 985 * eof-straddling blocksize and is incorrect. 986 */ 987 bp->b_flags |= B_AGE; 988 hammer2_write_bp(chain, bp, ioflag); 989 hammer2_chain_unlock(chain); 990 } 991 992 /* 993 * Cleanup. If we extended the file EOF but failed to write through 994 * the entire write is a failure and we have to back-up. 995 */ 996 if (error && ipdata->size != old_eof) { 997 hammer2_truncate_file(trans, ip, parentp, old_eof); 998 ipdata = &ip->chain->data->ipdata; /* RELOAD */ 999 } else if (modified) { 1000 ipdata = hammer2_chain_modify_ip(trans, ip, parentp, 0); 1001 hammer2_update_time(&ipdata->mtime); 1002 } 1003 hammer2_knote(ip->vp, kflags); 1004 1005 return error; 1006 } 1007 1008 /* 1009 * Write the logical file bp out. 1010 */ 1011 static 1012 void 1013 hammer2_write_bp(hammer2_chain_t *chain, struct buf *bp, int ioflag) 1014 { 1015 hammer2_off_t pbase; 1016 hammer2_off_t pmask; 1017 hammer2_off_t peof; 1018 struct buf *dbp; 1019 size_t boff; 1020 size_t psize; 1021 1022 KKASSERT(chain->flags & HAMMER2_CHAIN_MODIFIED); 1023 1024 switch(chain->bref.type) { 1025 case HAMMER2_BREF_TYPE_INODE: 1026 KKASSERT(chain->data->ipdata.op_flags & 1027 HAMMER2_OPFLAG_DIRECTDATA); 1028 KKASSERT(bp->b_loffset == 0); 1029 bcopy(bp->b_data, chain->data->ipdata.u.data, 1030 HAMMER2_EMBEDDED_BYTES); 1031 break; 1032 case HAMMER2_BREF_TYPE_DATA: 1033 psize = hammer2_devblksize(chain->bytes); 1034 pmask = (hammer2_off_t)psize - 1; 1035 pbase = chain->bref.data_off & ~pmask; 1036 boff = chain->bref.data_off & (HAMMER2_OFF_MASK & pmask); 1037 peof = (pbase + HAMMER2_SEGMASK64) & ~HAMMER2_SEGMASK64; 1038 1039 dbp = getblk(chain->hmp->devvp, pbase, psize, 0, 0); 1040 bcopy(bp->b_data, dbp->b_data + boff, chain->bytes); 1041 1042 if (ioflag & IO_SYNC) { 1043 /* 1044 * Synchronous I/O requested. 1045 */ 1046 bwrite(dbp); 1047 /* 1048 } else if ((ioflag & IO_DIRECT) && loff + n == lblksize) { 1049 bdwrite(dbp); 1050 */ 1051 } else if (ioflag & IO_ASYNC) { 1052 bawrite(dbp); 1053 } else if (hammer2_cluster_enable) { 1054 cluster_write(dbp, peof, HAMMER2_PBUFSIZE, 4/*XXX*/); 1055 } else { 1056 bdwrite(dbp); 1057 } 1058 break; 1059 default: 1060 panic("hammer2_write_bp: bad chain type %d\n", 1061 chain->bref.type); 1062 /* NOT REACHED */ 1063 break; 1064 } 1065 bqrelse(bp); 1066 } 1067 1068 /* 1069 * Assign physical storage to a logical block. This function creates the 1070 * related meta-data chains representing the data blocks and marks them 1071 * MODIFIED. We could mark them MOVED instead but ultimately I need to 1072 * XXX code the flusher to check that the related logical buffer is 1073 * flushed. 1074 * 1075 * NOOFFSET is returned if the data is inode-embedded. In this case the 1076 * strategy code will simply bcopy() the data into the inode. 1077 * 1078 * The inode's delta_dcount is adjusted. 1079 */ 1080 static 1081 hammer2_chain_t * 1082 hammer2_assign_physical(hammer2_trans_t *trans, 1083 hammer2_inode_t *ip, hammer2_chain_t **parentp, 1084 hammer2_key_t lbase, int lblksize, int *errorp) 1085 { 1086 hammer2_chain_t *parent; 1087 hammer2_chain_t *chain; 1088 hammer2_off_t pbase; 1089 1090 /* 1091 * Locate the chain associated with lbase, return a locked chain. 1092 * However, do not instantiate any data reference (which utilizes a 1093 * device buffer) because we will be using direct IO via the 1094 * logical buffer cache buffer. 1095 */ 1096 *errorp = 0; 1097 retry: 1098 parent = *parentp; 1099 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); /* extra lock */ 1100 chain = hammer2_chain_lookup(&parent, 1101 lbase, lbase, 1102 HAMMER2_LOOKUP_NODATA); 1103 1104 if (chain == NULL) { 1105 /* 1106 * We found a hole, create a new chain entry. 1107 * 1108 * NOTE: DATA chains are created without device backing 1109 * store (nor do we want any). 1110 */ 1111 *errorp = hammer2_chain_create(trans, &parent, &chain, 1112 lbase, HAMMER2_PBUFRADIX, 1113 HAMMER2_BREF_TYPE_DATA, 1114 lblksize); 1115 if (chain == NULL) { 1116 hammer2_chain_lookup_done(parent); 1117 panic("hammer2_chain_create: par=%p error=%d\n", 1118 parent, *errorp); 1119 goto retry; 1120 } 1121 1122 pbase = chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX; 1123 /*ip->delta_dcount += lblksize;*/ 1124 } else { 1125 switch (chain->bref.type) { 1126 case HAMMER2_BREF_TYPE_INODE: 1127 /* 1128 * The data is embedded in the inode. The 1129 * caller is responsible for marking the inode 1130 * modified and copying the data to the embedded 1131 * area. 1132 */ 1133 pbase = NOOFFSET; 1134 break; 1135 case HAMMER2_BREF_TYPE_DATA: 1136 if (chain->bytes != lblksize) { 1137 panic("hammer2_assign_physical: " 1138 "size mismatch %d/%d\n", 1139 lblksize, chain->bytes); 1140 } 1141 hammer2_chain_modify(trans, &chain, 1142 HAMMER2_MODIFY_OPTDATA); 1143 pbase = chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX; 1144 break; 1145 default: 1146 panic("hammer2_assign_physical: bad type"); 1147 /* NOT REACHED */ 1148 pbase = NOOFFSET; 1149 break; 1150 } 1151 } 1152 1153 /* 1154 * Cleanup. If chain wound up being the inode (i.e. DIRECTDATA), 1155 * we might have to replace *parentp. 1156 */ 1157 hammer2_chain_lookup_done(parent); 1158 if (chain) { 1159 if (*parentp != chain && 1160 (*parentp)->core == chain->core) { 1161 parent = *parentp; 1162 *parentp = chain; /* eats lock */ 1163 hammer2_chain_unlock(parent); 1164 hammer2_chain_lock(chain, 0); /* need another */ 1165 } 1166 /* else chain already locked for return */ 1167 } 1168 return (chain); 1169 } 1170 1171 /* 1172 * Truncate the size of a file. 1173 * 1174 * This routine adjusts ipdata->size smaller, destroying any related 1175 * data beyond the new EOF and potentially resizing the block straddling 1176 * the EOF. 1177 * 1178 * The inode must be locked. 1179 */ 1180 static 1181 void 1182 hammer2_truncate_file(hammer2_trans_t *trans, hammer2_inode_t *ip, 1183 hammer2_chain_t **parentp, hammer2_key_t nsize) 1184 { 1185 hammer2_inode_data_t *ipdata; 1186 hammer2_chain_t *parent; 1187 hammer2_chain_t *chain; 1188 hammer2_key_t lbase; 1189 hammer2_key_t leof; 1190 struct buf *bp; 1191 int loff; 1192 int error; 1193 int oblksize; 1194 int nblksize; 1195 1196 bp = NULL; 1197 error = 0; 1198 ipdata = hammer2_chain_modify_ip(trans, ip, parentp, 0); 1199 1200 /* 1201 * Destroy any logical buffer cache buffers beyond the file EOF. 1202 * 1203 * We call nvtruncbuf() w/ trivial == 1 to prevent it from messing 1204 * around with the buffer straddling EOF, because we need to assign 1205 * a new physical offset to it. 1206 */ 1207 if (ip->vp) { 1208 nvtruncbuf(ip->vp, nsize, 1209 HAMMER2_PBUFSIZE, (int)nsize & HAMMER2_PBUFMASK, 1210 1); 1211 } 1212 1213 /* 1214 * Setup for lookup/search 1215 */ 1216 parent = hammer2_chain_lookup_init(ip->chain, 0); 1217 1218 /* 1219 * Handle the case where a chain/logical-buffer straddles the new 1220 * EOF. We told nvtruncbuf() above not to mess with the logical 1221 * buffer straddling the EOF because we need to reassign its storage 1222 * and can't let the strategy code do it for us. 1223 */ 1224 loff = (int)nsize & HAMMER2_PBUFMASK; 1225 if (loff && ip->vp) { 1226 oblksize = hammer2_calc_logical(ip, nsize, &lbase, &leof); 1227 error = bread(ip->vp, lbase, oblksize, &bp); 1228 KKASSERT(error == 0); 1229 } 1230 ipdata->size = nsize; 1231 nblksize = hammer2_calc_logical(ip, nsize, &lbase, &leof); 1232 1233 /* 1234 * Fixup the chain element. If we have a logical buffer in-hand 1235 * we don't want to create a conflicting device buffer. 1236 */ 1237 if (loff && bp) { 1238 chain = hammer2_chain_lookup(&parent, lbase, lbase, 1239 HAMMER2_LOOKUP_NODATA); 1240 if (chain) { 1241 switch(chain->bref.type) { 1242 case HAMMER2_BREF_TYPE_DATA: 1243 hammer2_chain_resize(trans, ip, bp, 1244 parent, &chain, 1245 hammer2_getradix(nblksize), 1246 HAMMER2_MODIFY_OPTDATA); 1247 allocbuf(bp, nblksize); 1248 bzero(bp->b_data + loff, nblksize - loff); 1249 bp->b_bio2.bio_caller_info1.ptr = chain->hmp; 1250 bp->b_bio2.bio_offset = chain->bref.data_off & 1251 HAMMER2_OFF_MASK; 1252 break; 1253 case HAMMER2_BREF_TYPE_INODE: 1254 allocbuf(bp, nblksize); 1255 bzero(bp->b_data + loff, nblksize - loff); 1256 bp->b_bio2.bio_caller_info1.ptr = NULL; 1257 bp->b_bio2.bio_offset = NOOFFSET; 1258 break; 1259 default: 1260 panic("hammer2_truncate_file: bad type"); 1261 break; 1262 } 1263 hammer2_write_bp(chain, bp, 0); 1264 hammer2_chain_unlock(chain); 1265 } else { 1266 /* 1267 * Destroy clean buffer w/ wrong buffer size. Retain 1268 * backing store. 1269 */ 1270 bp->b_flags |= B_RELBUF; 1271 KKASSERT(bp->b_bio2.bio_offset == NOOFFSET); 1272 KKASSERT((bp->b_flags & B_DIRTY) == 0); 1273 bqrelse(bp); 1274 } 1275 } else if (loff) { 1276 /* 1277 * WARNING: This utilizes a device buffer for the data. 1278 * 1279 * This case should not occur because file truncations without 1280 * a vnode (and hence no logical buffer cache) should only 1281 * always truncate to 0-length. 1282 */ 1283 panic("hammer2_truncate_file: non-zero truncation, no-vnode"); 1284 } 1285 1286 /* 1287 * Clean up any fragmentory VM pages now that we have properly 1288 * resized the straddling buffer. These pages are no longer 1289 * part of the buffer. 1290 */ 1291 if (ip->vp) { 1292 nvtruncbuf(ip->vp, nsize, 1293 nblksize, (int)nsize & (nblksize - 1), 1294 1); 1295 } 1296 1297 /* 1298 * Destroy any physical blocks after the new EOF point. 1299 */ 1300 lbase = (nsize + HAMMER2_PBUFMASK64) & ~HAMMER2_PBUFMASK64; 1301 chain = hammer2_chain_lookup(&parent, 1302 lbase, (hammer2_key_t)-1, 1303 HAMMER2_LOOKUP_NODATA); 1304 while (chain) { 1305 /* 1306 * Degenerate embedded data case, nothing to loop on. 1307 */ 1308 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 1309 hammer2_chain_unlock(chain); 1310 break; 1311 } 1312 1313 /* 1314 * Delete physical data blocks past the file EOF. 1315 */ 1316 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) { 1317 /*ip->delta_dcount -= chain->bytes;*/ 1318 hammer2_chain_delete(trans, chain, 0); 1319 } 1320 /* XXX check parent if empty indirect block & delete */ 1321 chain = hammer2_chain_next(&parent, chain, 1322 lbase, (hammer2_key_t)-1, 1323 HAMMER2_LOOKUP_NODATA); 1324 } 1325 hammer2_chain_lookup_done(parent); 1326 } 1327 1328 /* 1329 * Extend the size of a file. The inode must be locked. 1330 * 1331 * We may have to resize the block straddling the old EOF. 1332 */ 1333 static 1334 void 1335 hammer2_extend_file(hammer2_trans_t *trans, hammer2_inode_t *ip, 1336 hammer2_chain_t **parentp, hammer2_key_t nsize) 1337 { 1338 hammer2_inode_data_t *ipdata; 1339 hammer2_chain_t *parent; 1340 hammer2_chain_t *chain; 1341 struct buf *bp; 1342 hammer2_key_t osize; 1343 hammer2_key_t obase; 1344 hammer2_key_t nbase; 1345 hammer2_key_t leof; 1346 int oblksize; 1347 int nblksize; 1348 int nradix; 1349 int error; 1350 1351 KKASSERT(ip->vp); 1352 1353 ipdata = hammer2_chain_modify_ip(trans, ip, parentp, 0); 1354 1355 /* 1356 * Nothing to do if the direct-data case is still intact 1357 */ 1358 if ((ipdata->op_flags & HAMMER2_OPFLAG_DIRECTDATA) && 1359 nsize <= HAMMER2_EMBEDDED_BYTES) { 1360 ipdata->size = nsize; 1361 nvextendbuf(ip->vp, 1362 ipdata->size, nsize, 1363 0, HAMMER2_EMBEDDED_BYTES, 1364 0, (int)nsize, 1365 1); 1366 /* ipdata = &ip->chain->data->ipdata; RELOAD */ 1367 return; 1368 } 1369 1370 /* 1371 * Calculate the blocksize at the original EOF and resize the block 1372 * if necessary. Adjust the file size in the inode. 1373 */ 1374 osize = ipdata->size; 1375 oblksize = hammer2_calc_logical(ip, osize, &obase, &leof); 1376 ipdata->size = nsize; 1377 nblksize = hammer2_calc_logical(ip, osize, &nbase, &leof); 1378 1379 /* 1380 * Do all required vnode operations, but do not mess with the 1381 * buffer straddling the orignal EOF. 1382 */ 1383 nvextendbuf(ip->vp, 1384 ipdata->size, nsize, 1385 0, nblksize, 1386 0, (int)nsize & HAMMER2_PBUFMASK, 1387 1); 1388 ipdata = &ip->chain->data->ipdata; 1389 1390 /* 1391 * Early return if we have no more work to do. 1392 */ 1393 if (obase == nbase && oblksize == nblksize && 1394 (ipdata->op_flags & HAMMER2_OPFLAG_DIRECTDATA) == 0) { 1395 return; 1396 } 1397 1398 /* 1399 * We have work to do, including possibly resizing the buffer 1400 * at the previous EOF point and turning off DIRECTDATA mode. 1401 */ 1402 bp = NULL; 1403 if (((int)osize & HAMMER2_PBUFMASK)) { 1404 error = bread(ip->vp, obase, oblksize, &bp); 1405 KKASSERT(error == 0); 1406 } 1407 1408 /* 1409 * Disable direct-data mode by loading up a buffer cache buffer 1410 * with the data, then converting the inode data area into the 1411 * inode indirect block array area. 1412 */ 1413 if (ipdata->op_flags & HAMMER2_OPFLAG_DIRECTDATA) { 1414 ipdata->op_flags &= ~HAMMER2_OPFLAG_DIRECTDATA; 1415 bzero(&ipdata->u.blockset, sizeof(ipdata->u.blockset)); 1416 } 1417 1418 /* 1419 * Resize the chain element at the old EOF. 1420 */ 1421 if (((int)osize & HAMMER2_PBUFMASK)) { 1422 retry: 1423 error = 0; 1424 parent = hammer2_chain_lookup_init(ip->chain, 0); 1425 nradix = hammer2_getradix(nblksize); 1426 1427 chain = hammer2_chain_lookup(&parent, 1428 obase, obase, 1429 HAMMER2_LOOKUP_NODATA); 1430 if (chain == NULL) { 1431 error = hammer2_chain_create(trans, &parent, &chain, 1432 obase, nblksize, 1433 HAMMER2_BREF_TYPE_DATA, 1434 nblksize); 1435 if (chain == NULL) { 1436 hammer2_chain_lookup_done(parent); 1437 panic("hammer2_chain_create: par=%p error=%d\n", 1438 parent, error); 1439 goto retry; 1440 } 1441 /*ip->delta_dcount += nblksize;*/ 1442 } else { 1443 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA); 1444 hammer2_chain_resize(trans, ip, bp, 1445 parent, &chain, 1446 nradix, 1447 HAMMER2_MODIFY_OPTDATA); 1448 } 1449 if (obase != nbase) { 1450 if (oblksize != HAMMER2_PBUFSIZE) 1451 allocbuf(bp, HAMMER2_PBUFSIZE); 1452 } else { 1453 if (oblksize != nblksize) 1454 allocbuf(bp, nblksize); 1455 } 1456 hammer2_write_bp(chain, bp, 0); 1457 hammer2_chain_unlock(chain); 1458 hammer2_chain_lookup_done(parent); 1459 } 1460 } 1461 1462 static 1463 int 1464 hammer2_vop_nresolve(struct vop_nresolve_args *ap) 1465 { 1466 hammer2_inode_t *ip; 1467 hammer2_inode_t *dip; 1468 hammer2_chain_t *parent; 1469 hammer2_chain_t *chain; 1470 hammer2_chain_t *ochain; 1471 hammer2_trans_t trans; 1472 struct namecache *ncp; 1473 const uint8_t *name; 1474 size_t name_len; 1475 hammer2_key_t lhc; 1476 int error = 0; 1477 struct vnode *vp; 1478 1479 dip = VTOI(ap->a_dvp); 1480 ncp = ap->a_nch->ncp; 1481 name = ncp->nc_name; 1482 name_len = ncp->nc_nlen; 1483 lhc = hammer2_dirhash(name, name_len); 1484 1485 /* 1486 * Note: In DragonFly the kernel handles '.' and '..'. 1487 */ 1488 parent = hammer2_inode_lock_sh(dip); 1489 chain = hammer2_chain_lookup(&parent, 1490 lhc, lhc + HAMMER2_DIRHASH_LOMASK, 1491 HAMMER2_LOOKUP_SHARED); 1492 while (chain) { 1493 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE && 1494 name_len == chain->data->ipdata.name_len && 1495 bcmp(name, chain->data->ipdata.filename, name_len) == 0) { 1496 break; 1497 } 1498 chain = hammer2_chain_next(&parent, chain, 1499 lhc, lhc + HAMMER2_DIRHASH_LOMASK, 1500 HAMMER2_LOOKUP_SHARED); 1501 } 1502 hammer2_inode_unlock_sh(dip, parent); 1503 1504 /* 1505 * If the inode represents a forwarding entry for a hardlink we have 1506 * to locate the actual inode. The original ip is saved for possible 1507 * deconsolidation. (ip) will only be set to non-NULL when we have 1508 * to locate the real file via a hardlink. ip will be referenced but 1509 * not locked in that situation. chain is passed in locked and 1510 * returned locked. 1511 * 1512 * XXX what kind of chain lock? 1513 */ 1514 ochain = NULL; 1515 if (chain && chain->data->ipdata.type == HAMMER2_OBJTYPE_HARDLINK) { 1516 error = hammer2_hardlink_find(dip, &chain, &ochain); 1517 if (error) { 1518 kprintf("hammer2: unable to find hardlink\n"); 1519 if (chain) { 1520 hammer2_chain_unlock(chain); 1521 chain = NULL; 1522 } 1523 goto failed; 1524 } 1525 } 1526 1527 /* 1528 * Deconsolidate any hardlink whos nlinks == 1. Ignore errors. 1529 * If an error occurs chain and ip are left alone. 1530 * 1531 * XXX upgrade shared lock? 1532 */ 1533 if (ochain && chain && 1534 chain->data->ipdata.nlinks == 1 && !dip->pmp->ronly) { 1535 kprintf("hammer2: need to unconsolidate hardlink for %s\n", 1536 chain->data->ipdata.filename); 1537 /* XXX retain shared lock on dip? (currently not held) */ 1538 hammer2_trans_init(&trans, dip->pmp, 0); 1539 hammer2_hardlink_deconsolidate(&trans, dip, &chain, &ochain); 1540 hammer2_trans_done(&trans); 1541 } 1542 1543 /* 1544 * Acquire the related vnode 1545 * 1546 * NOTE: For error processing, only ENOENT resolves the namecache 1547 * entry to NULL, otherwise we just return the error and 1548 * leave the namecache unresolved. 1549 * 1550 * NOTE: multiple hammer2_inode structures can be aliased to the 1551 * same chain element, for example for hardlinks. This 1552 * use case does not 'reattach' inode associations that 1553 * might already exist, but always allocates a new one. 1554 * 1555 * WARNING: inode structure is locked exclusively via inode_get 1556 * but chain was locked shared. inode_unlock_ex() 1557 * will handle it properly. 1558 */ 1559 if (chain) { 1560 ip = hammer2_inode_get(dip->pmp, dip, chain); 1561 vp = hammer2_igetv(ip, &error); 1562 if (error == 0) { 1563 vn_unlock(vp); 1564 cache_setvp(ap->a_nch, vp); 1565 } else if (error == ENOENT) { 1566 cache_setvp(ap->a_nch, NULL); 1567 } 1568 hammer2_inode_unlock_ex(ip, chain); 1569 1570 /* 1571 * The vp should not be released until after we've disposed 1572 * of our locks, because it might cause vop_inactive() to 1573 * be called. 1574 */ 1575 if (vp) 1576 vrele(vp); 1577 } else { 1578 error = ENOENT; 1579 cache_setvp(ap->a_nch, NULL); 1580 } 1581 failed: 1582 KASSERT(error || ap->a_nch->ncp->nc_vp != NULL, 1583 ("resolve error %d/%p chain %p ap %p\n", 1584 error, ap->a_nch->ncp->nc_vp, chain, ap)); 1585 if (ochain) 1586 hammer2_chain_drop(ochain); 1587 return error; 1588 } 1589 1590 static 1591 int 1592 hammer2_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap) 1593 { 1594 hammer2_inode_t *dip; 1595 hammer2_inode_t *ip; 1596 hammer2_chain_t *parent; 1597 int error; 1598 1599 dip = VTOI(ap->a_dvp); 1600 1601 if ((ip = dip->pip) == NULL) { 1602 *ap->a_vpp = NULL; 1603 return ENOENT; 1604 } 1605 parent = hammer2_inode_lock_ex(ip); 1606 *ap->a_vpp = hammer2_igetv(ip, &error); 1607 hammer2_inode_unlock_ex(ip, parent); 1608 1609 return error; 1610 } 1611 1612 static 1613 int 1614 hammer2_vop_nmkdir(struct vop_nmkdir_args *ap) 1615 { 1616 hammer2_inode_t *dip; 1617 hammer2_inode_t *nip; 1618 hammer2_trans_t trans; 1619 hammer2_chain_t *chain; 1620 struct namecache *ncp; 1621 const uint8_t *name; 1622 size_t name_len; 1623 int error; 1624 1625 dip = VTOI(ap->a_dvp); 1626 if (dip->pmp->ronly) 1627 return (EROFS); 1628 1629 ncp = ap->a_nch->ncp; 1630 name = ncp->nc_name; 1631 name_len = ncp->nc_nlen; 1632 1633 hammer2_trans_init(&trans, dip->pmp, 0); 1634 nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred, 1635 name, name_len, &chain, &error); 1636 if (error) { 1637 KKASSERT(nip == NULL); 1638 *ap->a_vpp = NULL; 1639 } else { 1640 *ap->a_vpp = hammer2_igetv(nip, &error); 1641 hammer2_inode_unlock_ex(nip, chain); 1642 } 1643 hammer2_trans_done(&trans); 1644 1645 if (error == 0) { 1646 cache_setunresolved(ap->a_nch); 1647 cache_setvp(ap->a_nch, *ap->a_vpp); 1648 } 1649 return error; 1650 } 1651 1652 /* 1653 * Return the largest contiguous physical disk range for the logical 1654 * request, in bytes. 1655 * 1656 * (struct vnode *vp, off_t loffset, off_t *doffsetp, int *runp, int *runb) 1657 */ 1658 static 1659 int 1660 hammer2_vop_bmap(struct vop_bmap_args *ap) 1661 { 1662 *ap->a_doffsetp = NOOFFSET; 1663 if (ap->a_runp) 1664 *ap->a_runp = 0; 1665 if (ap->a_runb) 1666 *ap->a_runb = 0; 1667 return (EOPNOTSUPP); 1668 #if 0 1669 struct vnode *vp; 1670 hammer2_inode_t *ip; 1671 hammer2_chain_t *parent; 1672 hammer2_chain_t *chain; 1673 hammer2_key_t lbeg; 1674 hammer2_key_t lend; 1675 hammer2_off_t pbeg; 1676 hammer2_off_t pbytes; 1677 hammer2_off_t array[HAMMER2_BMAP_COUNT][2]; 1678 int loff; 1679 int ai; 1680 1681 /* 1682 * Only supported on regular files 1683 * 1684 * Only supported for read operations (required for cluster_read). 1685 * The block allocation is delayed for write operations. 1686 */ 1687 vp = ap->a_vp; 1688 if (vp->v_type != VREG) 1689 return (EOPNOTSUPP); 1690 if (ap->a_cmd != BUF_CMD_READ) 1691 return (EOPNOTSUPP); 1692 1693 ip = VTOI(vp); 1694 bzero(array, sizeof(array)); 1695 1696 /* 1697 * Calculate logical range 1698 */ 1699 KKASSERT((ap->a_loffset & HAMMER2_LBUFMASK64) == 0); 1700 lbeg = ap->a_loffset & HAMMER2_OFF_MASK_HI; 1701 lend = lbeg + HAMMER2_BMAP_COUNT * HAMMER2_PBUFSIZE - 1; 1702 if (lend < lbeg) 1703 lend = lbeg; 1704 loff = ap->a_loffset & HAMMER2_OFF_MASK_LO; 1705 1706 parent = hammer2_inode_lock_sh(ip); 1707 chain = hammer2_chain_lookup(&parent, 1708 lbeg, lend, 1709 HAMMER2_LOOKUP_NODATA | 1710 HAMMER2_LOOKUP_SHARED); 1711 if (chain == NULL) { 1712 *ap->a_doffsetp = ZFOFFSET; 1713 hammer2_inode_unlock_sh(ip, parent); 1714 return (0); 1715 } 1716 1717 while (chain) { 1718 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) { 1719 ai = (chain->bref.key - lbeg) / HAMMER2_PBUFSIZE; 1720 KKASSERT(ai >= 0 && ai < HAMMER2_BMAP_COUNT); 1721 array[ai][0] = chain->bref.data_off & HAMMER2_OFF_MASK; 1722 array[ai][1] = chain->bytes; 1723 } 1724 chain = hammer2_chain_next(&parent, chain, 1725 lbeg, lend, 1726 HAMMER2_LOOKUP_NODATA | 1727 HAMMER2_LOOKUP_SHARED); 1728 } 1729 hammer2_inode_unlock_sh(ip, parent); 1730 1731 /* 1732 * If the requested loffset is not mappable physically we can't 1733 * bmap. The caller will have to access the file data via a 1734 * device buffer. 1735 */ 1736 if (array[0][0] == 0 || array[0][1] < loff + HAMMER2_MINIOSIZE) { 1737 *ap->a_doffsetp = NOOFFSET; 1738 return (0); 1739 } 1740 1741 /* 1742 * Calculate the physical disk offset range for array[0] 1743 */ 1744 pbeg = array[0][0] + loff; 1745 pbytes = array[0][1] - loff; 1746 1747 for (ai = 1; ai < HAMMER2_BMAP_COUNT; ++ai) { 1748 if (array[ai][0] != pbeg + pbytes) 1749 break; 1750 pbytes += array[ai][1]; 1751 } 1752 1753 *ap->a_doffsetp = pbeg; 1754 if (ap->a_runp) 1755 *ap->a_runp = pbytes; 1756 return (0); 1757 #endif 1758 } 1759 1760 static 1761 int 1762 hammer2_vop_open(struct vop_open_args *ap) 1763 { 1764 return vop_stdopen(ap); 1765 } 1766 1767 /* 1768 * hammer2_vop_advlock { vp, id, op, fl, flags } 1769 */ 1770 static 1771 int 1772 hammer2_vop_advlock(struct vop_advlock_args *ap) 1773 { 1774 hammer2_inode_t *ip = VTOI(ap->a_vp); 1775 hammer2_chain_t *parent; 1776 hammer2_off_t size; 1777 1778 parent = hammer2_inode_lock_sh(ip); 1779 size = parent->data->ipdata.size; 1780 hammer2_inode_unlock_sh(ip, parent); 1781 return (lf_advlock(ap, &ip->advlock, size)); 1782 } 1783 1784 1785 static 1786 int 1787 hammer2_vop_close(struct vop_close_args *ap) 1788 { 1789 return vop_stdclose(ap); 1790 } 1791 1792 /* 1793 * hammer2_vop_nlink { nch, dvp, vp, cred } 1794 * 1795 * Create a hardlink from (vp) to {dvp, nch}. 1796 */ 1797 static 1798 int 1799 hammer2_vop_nlink(struct vop_nlink_args *ap) 1800 { 1801 hammer2_inode_t *dip; /* target directory to create link in */ 1802 hammer2_inode_t *ip; /* inode we are hardlinking to */ 1803 hammer2_chain_t *chain; 1804 hammer2_trans_t trans; 1805 struct namecache *ncp; 1806 const uint8_t *name; 1807 size_t name_len; 1808 int error; 1809 1810 dip = VTOI(ap->a_dvp); 1811 if (dip->pmp->ronly) 1812 return (EROFS); 1813 1814 ncp = ap->a_nch->ncp; 1815 name = ncp->nc_name; 1816 name_len = ncp->nc_nlen; 1817 1818 /* 1819 * ip represents the file being hardlinked. The file could be a 1820 * normal file or a hardlink target if it has already been hardlinked. 1821 * If ip is a hardlinked target then ip->pip represents the location 1822 * of the hardlinked target, NOT the location of the hardlink pointer. 1823 * 1824 * Bump nlinks and potentially also create or move the hardlink 1825 * target in the parent directory common to (ip) and (dip). The 1826 * consolidation code can modify ip->chain and ip->pip. The 1827 * returned chain is locked. 1828 */ 1829 ip = VTOI(ap->a_vp); 1830 hammer2_trans_init(&trans, ip->pmp, 0); 1831 1832 chain = hammer2_inode_lock_ex(ip); 1833 error = hammer2_hardlink_consolidate(&trans, ip, &chain, dip, 1); 1834 if (error) 1835 goto done; 1836 1837 /* 1838 * Create a directory entry connected to the specified chain. 1839 * The hardlink consolidation code has already adjusted ip->pip 1840 * to the common parent directory containing the actual hardlink 1841 * 1842 * (which may be different from dip where we created our hardlink 1843 * entry. ip->chain always represents the actual hardlink and not 1844 * any of the pointers to the actual hardlink). 1845 */ 1846 error = hammer2_inode_connect(&trans, 1, 1847 dip, &chain, 1848 name, name_len); 1849 if (error == 0) { 1850 cache_setunresolved(ap->a_nch); 1851 cache_setvp(ap->a_nch, ap->a_vp); 1852 } 1853 done: 1854 hammer2_inode_unlock_ex(ip, chain); 1855 hammer2_trans_done(&trans); 1856 1857 return error; 1858 } 1859 1860 /* 1861 * hammer2_vop_ncreate { nch, dvp, vpp, cred, vap } 1862 * 1863 * The operating system has already ensured that the directory entry 1864 * does not exist and done all appropriate namespace locking. 1865 */ 1866 static 1867 int 1868 hammer2_vop_ncreate(struct vop_ncreate_args *ap) 1869 { 1870 hammer2_inode_t *dip; 1871 hammer2_inode_t *nip; 1872 hammer2_trans_t trans; 1873 hammer2_chain_t *nchain; 1874 struct namecache *ncp; 1875 const uint8_t *name; 1876 size_t name_len; 1877 int error; 1878 1879 dip = VTOI(ap->a_dvp); 1880 if (dip->pmp->ronly) 1881 return (EROFS); 1882 1883 ncp = ap->a_nch->ncp; 1884 name = ncp->nc_name; 1885 name_len = ncp->nc_nlen; 1886 hammer2_trans_init(&trans, dip->pmp, 0); 1887 1888 nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred, 1889 name, name_len, &nchain, &error); 1890 if (error) { 1891 KKASSERT(nip == NULL); 1892 *ap->a_vpp = NULL; 1893 } else { 1894 *ap->a_vpp = hammer2_igetv(nip, &error); 1895 hammer2_inode_unlock_ex(nip, nchain); 1896 } 1897 hammer2_trans_done(&trans); 1898 1899 if (error == 0) { 1900 cache_setunresolved(ap->a_nch); 1901 cache_setvp(ap->a_nch, *ap->a_vpp); 1902 } 1903 return error; 1904 } 1905 1906 /* 1907 * hammer2_vop_nsymlink { nch, dvp, vpp, cred, vap, target } 1908 */ 1909 static 1910 int 1911 hammer2_vop_nsymlink(struct vop_nsymlink_args *ap) 1912 { 1913 hammer2_inode_t *dip; 1914 hammer2_inode_t *nip; 1915 hammer2_chain_t *nparent; 1916 hammer2_trans_t trans; 1917 struct namecache *ncp; 1918 const uint8_t *name; 1919 size_t name_len; 1920 int error; 1921 1922 dip = VTOI(ap->a_dvp); 1923 if (dip->pmp->ronly) 1924 return (EROFS); 1925 1926 ncp = ap->a_nch->ncp; 1927 name = ncp->nc_name; 1928 name_len = ncp->nc_nlen; 1929 hammer2_trans_init(&trans, dip->pmp, 0); 1930 1931 ap->a_vap->va_type = VLNK; /* enforce type */ 1932 1933 nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred, 1934 name, name_len, &nparent, &error); 1935 if (error) { 1936 KKASSERT(nip == NULL); 1937 *ap->a_vpp = NULL; 1938 hammer2_trans_done(&trans); 1939 return error; 1940 } 1941 *ap->a_vpp = hammer2_igetv(nip, &error); 1942 1943 /* 1944 * Build the softlink (~like file data) and finalize the namecache. 1945 */ 1946 if (error == 0) { 1947 size_t bytes; 1948 struct uio auio; 1949 struct iovec aiov; 1950 hammer2_inode_data_t *nipdata; 1951 1952 nipdata = &nip->chain->data->ipdata; 1953 bytes = strlen(ap->a_target); 1954 1955 if (bytes <= HAMMER2_EMBEDDED_BYTES) { 1956 KKASSERT(nipdata->op_flags & 1957 HAMMER2_OPFLAG_DIRECTDATA); 1958 bcopy(ap->a_target, nipdata->u.data, bytes); 1959 nipdata->size = bytes; 1960 } else { 1961 bzero(&auio, sizeof(auio)); 1962 bzero(&aiov, sizeof(aiov)); 1963 auio.uio_iov = &aiov; 1964 auio.uio_segflg = UIO_SYSSPACE; 1965 auio.uio_rw = UIO_WRITE; 1966 auio.uio_resid = bytes; 1967 auio.uio_iovcnt = 1; 1968 auio.uio_td = curthread; 1969 aiov.iov_base = ap->a_target; 1970 aiov.iov_len = bytes; 1971 error = hammer2_write_file(&trans, nip, &nparent, 1972 &auio, IO_APPEND, 0); 1973 nipdata = &nip->chain->data->ipdata; /* RELOAD */ 1974 /* XXX handle error */ 1975 error = 0; 1976 } 1977 } 1978 hammer2_inode_unlock_ex(nip, nparent); 1979 hammer2_trans_done(&trans); 1980 1981 /* 1982 * Finalize namecache 1983 */ 1984 if (error == 0) { 1985 cache_setunresolved(ap->a_nch); 1986 cache_setvp(ap->a_nch, *ap->a_vpp); 1987 /* hammer2_knote(ap->a_dvp, NOTE_WRITE); */ 1988 } 1989 return error; 1990 } 1991 1992 /* 1993 * hammer2_vop_nremove { nch, dvp, cred } 1994 */ 1995 static 1996 int 1997 hammer2_vop_nremove(struct vop_nremove_args *ap) 1998 { 1999 hammer2_inode_t *dip; 2000 hammer2_trans_t trans; 2001 struct namecache *ncp; 2002 const uint8_t *name; 2003 size_t name_len; 2004 int error; 2005 2006 dip = VTOI(ap->a_dvp); 2007 if (dip->pmp->ronly) 2008 return(EROFS); 2009 2010 ncp = ap->a_nch->ncp; 2011 name = ncp->nc_name; 2012 name_len = ncp->nc_nlen; 2013 hammer2_trans_init(&trans, dip->pmp, 0); 2014 error = hammer2_unlink_file(&trans, dip, name, name_len, 0, NULL); 2015 hammer2_trans_done(&trans); 2016 if (error == 0) { 2017 cache_unlink(ap->a_nch); 2018 } 2019 return (error); 2020 } 2021 2022 /* 2023 * hammer2_vop_nrmdir { nch, dvp, cred } 2024 */ 2025 static 2026 int 2027 hammer2_vop_nrmdir(struct vop_nrmdir_args *ap) 2028 { 2029 hammer2_inode_t *dip; 2030 hammer2_trans_t trans; 2031 struct namecache *ncp; 2032 const uint8_t *name; 2033 size_t name_len; 2034 int error; 2035 2036 dip = VTOI(ap->a_dvp); 2037 if (dip->pmp->ronly) 2038 return(EROFS); 2039 2040 ncp = ap->a_nch->ncp; 2041 name = ncp->nc_name; 2042 name_len = ncp->nc_nlen; 2043 2044 hammer2_trans_init(&trans, dip->pmp, 0); 2045 error = hammer2_unlink_file(&trans, dip, name, name_len, 1, NULL); 2046 hammer2_trans_done(&trans); 2047 if (error == 0) { 2048 cache_unlink(ap->a_nch); 2049 } 2050 return (error); 2051 } 2052 2053 /* 2054 * hammer2_vop_nrename { fnch, tnch, fdvp, tdvp, cred } 2055 */ 2056 static 2057 int 2058 hammer2_vop_nrename(struct vop_nrename_args *ap) 2059 { 2060 struct namecache *fncp; 2061 struct namecache *tncp; 2062 hammer2_inode_t *fdip; 2063 hammer2_inode_t *tdip; 2064 hammer2_inode_t *ip; 2065 hammer2_chain_t *chain; 2066 hammer2_trans_t trans; 2067 const uint8_t *fname; 2068 size_t fname_len; 2069 const uint8_t *tname; 2070 size_t tname_len; 2071 int error; 2072 int hlink; 2073 2074 if (ap->a_fdvp->v_mount != ap->a_tdvp->v_mount) 2075 return(EXDEV); 2076 if (ap->a_fdvp->v_mount != ap->a_fnch->ncp->nc_vp->v_mount) 2077 return(EXDEV); 2078 2079 fdip = VTOI(ap->a_fdvp); /* source directory */ 2080 tdip = VTOI(ap->a_tdvp); /* target directory */ 2081 2082 if (fdip->pmp->ronly) 2083 return(EROFS); 2084 2085 fncp = ap->a_fnch->ncp; /* entry name in source */ 2086 fname = fncp->nc_name; 2087 fname_len = fncp->nc_nlen; 2088 2089 tncp = ap->a_tnch->ncp; /* entry name in target */ 2090 tname = tncp->nc_name; 2091 tname_len = tncp->nc_nlen; 2092 2093 hammer2_trans_init(&trans, tdip->pmp, 0); 2094 2095 /* 2096 * ip is the inode being renamed. If this is a hardlink then 2097 * ip represents the actual file and not the hardlink marker. 2098 */ 2099 ip = VTOI(fncp->nc_vp); 2100 chain = NULL; 2101 2102 /* 2103 * Keep a tight grip on the inode so the temporary unlinking from 2104 * the source location prior to linking to the target location 2105 * does not cause the chain to be destroyed. 2106 * 2107 * NOTE: To avoid deadlocks we cannot lock (ip) while we are 2108 * unlinking elements from their directories. Locking 2109 * the nlinks field does not lock the whole inode. 2110 */ 2111 hammer2_inode_ref(ip); 2112 2113 /* 2114 * Remove target if it exists 2115 */ 2116 error = hammer2_unlink_file(&trans, tdip, tname, tname_len, -1, NULL); 2117 if (error && error != ENOENT) 2118 goto done; 2119 cache_setunresolved(ap->a_tnch); 2120 2121 /* 2122 * When renaming a hardlinked file we may have to re-consolidate 2123 * the location of the hardlink target. Since the element is simply 2124 * being moved, nlinks is not modified in this case. 2125 * 2126 * If ip represents a regular file the consolidation code essentially 2127 * does nothing other than return the same locked chain that was 2128 * passed in. 2129 * 2130 * The returned chain will be locked. 2131 * 2132 * WARNING! We do not currently have a local copy of ipdata but 2133 * we do use one later remember that it must be reloaded 2134 * on any modification to the inode, including connects. 2135 */ 2136 chain = hammer2_inode_lock_ex(ip); 2137 error = hammer2_hardlink_consolidate(&trans, ip, &chain, tdip, 0); 2138 if (error) 2139 goto done; 2140 2141 /* 2142 * Disconnect (fdip, fname) from the source directory. This will 2143 * disconnect (ip) if it represents a direct file. If (ip) represents 2144 * a hardlink the HARDLINK pointer object will be removed but the 2145 * hardlink will stay intact. 2146 * 2147 * The target chain may be marked DELETED but will not be destroyed 2148 * since we retain our hold on ip and chain. 2149 */ 2150 error = hammer2_unlink_file(&trans, fdip, fname, fname_len, -1, &hlink); 2151 KKASSERT(error != EAGAIN); 2152 if (error) 2153 goto done; 2154 2155 /* 2156 * Reconnect ip to target directory using chain. Chains cannot 2157 * actually be moved, so this will duplicate the chain in the new 2158 * spot and assign it to the ip, replacing the old chain. 2159 * 2160 * WARNING: chain locks can lock buffer cache buffers, to avoid 2161 * deadlocks we want to unlock before issuing a cache_*() 2162 * op (that might have to lock a vnode). 2163 */ 2164 error = hammer2_inode_connect(&trans, hlink, 2165 tdip, &chain, 2166 tname, tname_len); 2167 if (error == 0) { 2168 KKASSERT(chain != NULL); 2169 hammer2_inode_repoint(ip, (hlink ? ip->pip : tdip), chain); 2170 cache_rename(ap->a_fnch, ap->a_tnch); 2171 } 2172 done: 2173 hammer2_inode_unlock_ex(ip, chain); 2174 hammer2_inode_drop(ip); 2175 hammer2_trans_done(&trans); 2176 2177 return (error); 2178 } 2179 2180 /* 2181 * Strategy code 2182 * 2183 * WARNING: The strategy code cannot safely use hammer2 transactions 2184 * as this can deadlock against vfs_sync's vfsync() call 2185 * if multiple flushes are queued. 2186 */ 2187 static int hammer2_strategy_read(struct vop_strategy_args *ap); 2188 static int hammer2_strategy_write(struct vop_strategy_args *ap); 2189 static void hammer2_strategy_read_callback(hammer2_chain_t *chain, 2190 struct buf *dbp, char *data, void *arg); 2191 2192 static 2193 int 2194 hammer2_vop_strategy(struct vop_strategy_args *ap) 2195 { 2196 struct bio *biop; 2197 struct buf *bp; 2198 int error; 2199 2200 biop = ap->a_bio; 2201 bp = biop->bio_buf; 2202 2203 switch(bp->b_cmd) { 2204 case BUF_CMD_READ: 2205 error = hammer2_strategy_read(ap); 2206 ++hammer2_iod_file_read; 2207 break; 2208 case BUF_CMD_WRITE: 2209 error = hammer2_strategy_write(ap); 2210 ++hammer2_iod_file_write; 2211 break; 2212 default: 2213 bp->b_error = error = EINVAL; 2214 bp->b_flags |= B_ERROR; 2215 biodone(biop); 2216 break; 2217 } 2218 2219 return (error); 2220 } 2221 2222 static 2223 int 2224 hammer2_strategy_read(struct vop_strategy_args *ap) 2225 { 2226 struct buf *bp; 2227 struct bio *bio; 2228 struct bio *nbio; 2229 hammer2_inode_t *ip; 2230 hammer2_chain_t *parent; 2231 hammer2_chain_t *chain; 2232 hammer2_key_t lbase; 2233 2234 bio = ap->a_bio; 2235 bp = bio->bio_buf; 2236 ip = VTOI(ap->a_vp); 2237 nbio = push_bio(bio); 2238 2239 lbase = bio->bio_offset; 2240 chain = NULL; 2241 KKASSERT(((int)lbase & HAMMER2_PBUFMASK) == 0); 2242 2243 #if 0 2244 kprintf("read lbase %jd cached %016jx\n", 2245 lbase, nbio->bio_offset); 2246 #endif 2247 2248 parent = hammer2_inode_lock_sh(ip); 2249 chain = hammer2_chain_lookup(&parent, lbase, lbase, 2250 HAMMER2_LOOKUP_NODATA | 2251 HAMMER2_LOOKUP_SHARED); 2252 2253 if (chain == NULL) { 2254 /* 2255 * Data is zero-fill 2256 */ 2257 bp->b_resid = 0; 2258 bp->b_error = 0; 2259 bzero(bp->b_data, bp->b_bcount); 2260 biodone(nbio); 2261 } else if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 2262 /* 2263 * Data is embedded in the inode (copy from inode). 2264 */ 2265 hammer2_chain_load_async(chain, hammer2_strategy_read_callback, 2266 nbio); 2267 } else if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) { 2268 /* 2269 * Data is on-media, issue device I/O and copy. 2270 * 2271 * XXX direct-IO shortcut could go here XXX. 2272 */ 2273 hammer2_chain_load_async(chain, hammer2_strategy_read_callback, 2274 nbio); 2275 } else { 2276 panic("hammer2_strategy_read: unknown bref type"); 2277 chain = NULL; 2278 } 2279 hammer2_inode_unlock_sh(ip, parent); 2280 return (0); 2281 } 2282 2283 static 2284 void 2285 hammer2_strategy_read_callback(hammer2_chain_t *chain, struct buf *dbp, 2286 char *data, void *arg) 2287 { 2288 struct bio *nbio = arg; 2289 struct buf *bp = nbio->bio_buf; 2290 2291 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 2292 /* 2293 * Data is embedded in the inode (copy from inode). 2294 */ 2295 bcopy(((hammer2_inode_data_t *)data)->u.data, 2296 bp->b_data, HAMMER2_EMBEDDED_BYTES); 2297 bzero(bp->b_data + HAMMER2_EMBEDDED_BYTES, 2298 bp->b_bcount - HAMMER2_EMBEDDED_BYTES); 2299 bp->b_resid = 0; 2300 bp->b_error = 0; 2301 hammer2_chain_unlock(chain); 2302 biodone(nbio); 2303 } else if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) { 2304 /* 2305 * Data is on-media, issue device I/O and copy. 2306 * 2307 * XXX direct-IO shortcut could go here XXX. 2308 */ 2309 bcopy(data, bp->b_data, bp->b_bcount); 2310 bp->b_flags |= B_NOTMETA; 2311 bp->b_resid = 0; 2312 bp->b_error = 0; 2313 hammer2_chain_unlock(chain); 2314 biodone(nbio); 2315 } else { 2316 if (dbp) 2317 bqrelse(dbp); 2318 panic("hammer2_strategy_read: unknown bref type"); 2319 chain = NULL; 2320 } 2321 } 2322 2323 static 2324 int 2325 hammer2_strategy_write(struct vop_strategy_args *ap) 2326 { 2327 KKASSERT(0); 2328 #if 0 2329 struct buf *bp; 2330 struct bio *bio; 2331 struct bio *nbio; 2332 hammer2_chain_t *chain; 2333 hammer2_mount_t *hmp; 2334 hammer2_inode_t *ip; 2335 2336 bio = ap->a_bio; 2337 bp = bio->bio_buf; 2338 ip = VTOI(ap->a_vp); 2339 nbio = push_bio(bio); 2340 2341 KKASSERT((bio->bio_offset & HAMMER2_PBUFMASK64) == 0); 2342 KKASSERT(nbio->bio_offset != 0 && nbio->bio_offset != ZFOFFSET); 2343 2344 if (nbio->bio_offset == NOOFFSET) { 2345 /* 2346 * The data is embedded in the inode. Note that strategy 2347 * calls for embedded data are synchronous in order to 2348 * ensure that ip->chain is stable. Chain modification 2349 * status is handled by the caller. 2350 */ 2351 KKASSERT(ip->chain->flags & HAMMER2_CHAIN_MODIFIED); 2352 KKASSERT(bio->bio_offset == 0); 2353 KKASSERT(ip->chain && ip->chain->data); 2354 chain = ip->chain; 2355 bcopy(bp->b_data, chain->data->ipdata.u.data, 2356 HAMMER2_EMBEDDED_BYTES); 2357 bp->b_resid = 0; 2358 bp->b_error = 0; 2359 biodone(nbio); 2360 } else { 2361 /* 2362 * Forward direct IO to the device 2363 */ 2364 hmp = nbio->bio_caller_info1.ptr; 2365 KKASSERT(hmp); 2366 vn_strategy(hmp->devvp, nbio); 2367 } 2368 return (0); 2369 #endif 2370 } 2371 2372 /* 2373 * hammer2_vop_ioctl { vp, command, data, fflag, cred } 2374 */ 2375 static 2376 int 2377 hammer2_vop_ioctl(struct vop_ioctl_args *ap) 2378 { 2379 hammer2_inode_t *ip; 2380 int error; 2381 2382 ip = VTOI(ap->a_vp); 2383 2384 error = hammer2_ioctl(ip, ap->a_command, (void *)ap->a_data, 2385 ap->a_fflag, ap->a_cred); 2386 return (error); 2387 } 2388 2389 static 2390 int 2391 hammer2_vop_mountctl(struct vop_mountctl_args *ap) 2392 { 2393 struct mount *mp; 2394 hammer2_pfsmount_t *pmp; 2395 int rc; 2396 2397 switch (ap->a_op) { 2398 case (MOUNTCTL_SET_EXPORT): 2399 mp = ap->a_head.a_ops->head.vv_mount; 2400 pmp = MPTOPMP(mp); 2401 2402 if (ap->a_ctllen != sizeof(struct export_args)) 2403 rc = (EINVAL); 2404 else 2405 rc = vfs_export(mp, &pmp->export, 2406 (const struct export_args *)ap->a_ctl); 2407 break; 2408 default: 2409 rc = vop_stdmountctl(ap); 2410 break; 2411 } 2412 return (rc); 2413 } 2414 2415 struct vop_ops hammer2_vnode_vops = { 2416 .vop_default = vop_defaultop, 2417 .vop_fsync = hammer2_vop_fsync, 2418 .vop_getpages = vop_stdgetpages, 2419 .vop_putpages = vop_stdputpages, 2420 .vop_access = hammer2_vop_access, 2421 .vop_advlock = hammer2_vop_advlock, 2422 .vop_close = hammer2_vop_close, 2423 .vop_nlink = hammer2_vop_nlink, 2424 .vop_ncreate = hammer2_vop_ncreate, 2425 .vop_nsymlink = hammer2_vop_nsymlink, 2426 .vop_nremove = hammer2_vop_nremove, 2427 .vop_nrmdir = hammer2_vop_nrmdir, 2428 .vop_nrename = hammer2_vop_nrename, 2429 .vop_getattr = hammer2_vop_getattr, 2430 .vop_setattr = hammer2_vop_setattr, 2431 .vop_readdir = hammer2_vop_readdir, 2432 .vop_readlink = hammer2_vop_readlink, 2433 .vop_getpages = vop_stdgetpages, 2434 .vop_putpages = vop_stdputpages, 2435 .vop_read = hammer2_vop_read, 2436 .vop_write = hammer2_vop_write, 2437 .vop_open = hammer2_vop_open, 2438 .vop_inactive = hammer2_vop_inactive, 2439 .vop_reclaim = hammer2_vop_reclaim, 2440 .vop_nresolve = hammer2_vop_nresolve, 2441 .vop_nlookupdotdot = hammer2_vop_nlookupdotdot, 2442 .vop_nmkdir = hammer2_vop_nmkdir, 2443 .vop_ioctl = hammer2_vop_ioctl, 2444 .vop_mountctl = hammer2_vop_mountctl, 2445 .vop_bmap = hammer2_vop_bmap, 2446 .vop_strategy = hammer2_vop_strategy, 2447 }; 2448 2449 struct vop_ops hammer2_spec_vops = { 2450 2451 }; 2452 2453 struct vop_ops hammer2_fifo_vops = { 2454 2455 }; 2456