1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright (c) 2012, 2015 by Delphix. All rights reserved. 25 * Copyright (c) 2014 Integros [integros.com] 26 * Copyright 2017 Nexenta Systems, Inc. 27 */ 28 29 /* Portions Copyright 2007 Jeremy Teo */ 30 /* Portions Copyright 2010 Robert Milkowski */ 31 32 #include <sys/param.h> 33 #include <sys/time.h> 34 #include <sys/systm.h> 35 #include <sys/sysmacros.h> 36 #include <sys/resource.h> 37 #include <security/mac/mac_framework.h> 38 #include <sys/vfs.h> 39 #include <sys/endian.h> 40 #include <sys/vm.h> 41 #include <sys/vnode.h> 42 #if __FreeBSD_version >= 1300102 43 #include <sys/smr.h> 44 #endif 45 #include <sys/dirent.h> 46 #include <sys/file.h> 47 #include <sys/stat.h> 48 #include <sys/kmem.h> 49 #include <sys/taskq.h> 50 #include <sys/uio.h> 51 #include <sys/atomic.h> 52 #include <sys/namei.h> 53 #include <sys/mman.h> 54 #include <sys/cmn_err.h> 55 #include <sys/kdb.h> 56 #include <sys/sysproto.h> 57 #include <sys/errno.h> 58 #include <sys/unistd.h> 59 #include <sys/zfs_dir.h> 60 #include <sys/zfs_ioctl.h> 61 #include <sys/fs/zfs.h> 62 #include <sys/dmu.h> 63 #include <sys/dmu_objset.h> 64 #include <sys/spa.h> 65 #include <sys/txg.h> 66 #include <sys/dbuf.h> 67 #include <sys/zap.h> 68 #include <sys/sa.h> 69 #include <sys/policy.h> 70 #include <sys/sunddi.h> 71 #include <sys/filio.h> 72 #include <sys/sid.h> 73 #include <sys/zfs_ctldir.h> 74 #include <sys/zfs_fuid.h> 75 #include <sys/zfs_quota.h> 76 #include <sys/zfs_sa.h> 77 #include <sys/zfs_rlock.h> 78 #include <sys/bio.h> 79 #include <sys/buf.h> 80 #include <sys/sched.h> 81 #include <sys/acl.h> 82 #include <sys/vmmeter.h> 83 #include <vm/vm_param.h> 84 #include <sys/zil.h> 85 #include <sys/zfs_vnops.h> 86 #include <sys/module.h> 87 #include <sys/sysent.h> 88 #include <sys/dmu_impl.h> 89 #include <sys/brt.h> 90 #include <sys/zfeature.h> 91 92 #include <vm/vm_object.h> 93 94 #include <sys/extattr.h> 95 #include <sys/priv.h> 96 97 #ifndef VN_OPEN_INVFS 98 #define VN_OPEN_INVFS 0x0 99 #endif 100 101 VFS_SMR_DECLARE; 102 103 #if __FreeBSD_version < 1300103 104 #define NDFREE_PNBUF(ndp) NDFREE((ndp), NDF_ONLY_PNBUF) 105 #endif 106 107 #if __FreeBSD_version >= 1300047 108 #define vm_page_wire_lock(pp) 109 #define vm_page_wire_unlock(pp) 110 #else 111 #define vm_page_wire_lock(pp) vm_page_lock(pp) 112 #define vm_page_wire_unlock(pp) vm_page_unlock(pp) 113 #endif 114 115 #ifdef DEBUG_VFS_LOCKS 116 #define VNCHECKREF(vp) \ 117 VNASSERT((vp)->v_holdcnt > 0 && (vp)->v_usecount > 0, vp, \ 118 ("%s: wrong ref counts", __func__)); 119 #else 120 #define VNCHECKREF(vp) 121 #endif 122 123 #if __FreeBSD_version >= 1400045 124 typedef uint64_t cookie_t; 125 #else 126 typedef ulong_t cookie_t; 127 #endif 128 129 /* 130 * Programming rules. 131 * 132 * Each vnode op performs some logical unit of work. To do this, the ZPL must 133 * properly lock its in-core state, create a DMU transaction, do the work, 134 * record this work in the intent log (ZIL), commit the DMU transaction, 135 * and wait for the intent log to commit if it is a synchronous operation. 136 * Moreover, the vnode ops must work in both normal and log replay context. 137 * The ordering of events is important to avoid deadlocks and references 138 * to freed memory. The example below illustrates the following Big Rules: 139 * 140 * (1) A check must be made in each zfs thread for a mounted file system. 141 * This is done avoiding races using zfs_enter(zfsvfs). 142 * A zfs_exit(zfsvfs) is needed before all returns. Any znodes 143 * must be checked with zfs_verify_zp(zp). Both of these macros 144 * can return EIO from the calling function. 145 * 146 * (2) VN_RELE() should always be the last thing except for zil_commit() 147 * (if necessary) and zfs_exit(). This is for 3 reasons: 148 * First, if it's the last reference, the vnode/znode 149 * can be freed, so the zp may point to freed memory. Second, the last 150 * reference will call zfs_zinactive(), which may induce a lot of work -- 151 * pushing cached pages (which acquires range locks) and syncing out 152 * cached atime changes. Third, zfs_zinactive() may require a new tx, 153 * which could deadlock the system if you were already holding one. 154 * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC(). 155 * 156 * (3) All range locks must be grabbed before calling dmu_tx_assign(), 157 * as they can span dmu_tx_assign() calls. 158 * 159 * (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to 160 * dmu_tx_assign(). This is critical because we don't want to block 161 * while holding locks. 162 * 163 * If no ZPL locks are held (aside from zfs_enter()), use TXG_WAIT. This 164 * reduces lock contention and CPU usage when we must wait (note that if 165 * throughput is constrained by the storage, nearly every transaction 166 * must wait). 167 * 168 * Note, in particular, that if a lock is sometimes acquired before 169 * the tx assigns, and sometimes after (e.g. z_lock), then failing 170 * to use a non-blocking assign can deadlock the system. The scenario: 171 * 172 * Thread A has grabbed a lock before calling dmu_tx_assign(). 173 * Thread B is in an already-assigned tx, and blocks for this lock. 174 * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open() 175 * forever, because the previous txg can't quiesce until B's tx commits. 176 * 177 * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT, 178 * then drop all locks, call dmu_tx_wait(), and try again. On subsequent 179 * calls to dmu_tx_assign(), pass TXG_NOTHROTTLE in addition to TXG_NOWAIT, 180 * to indicate that this operation has already called dmu_tx_wait(). 181 * This will ensure that we don't retry forever, waiting a short bit 182 * each time. 183 * 184 * (5) If the operation succeeded, generate the intent log entry for it 185 * before dropping locks. This ensures that the ordering of events 186 * in the intent log matches the order in which they actually occurred. 187 * During ZIL replay the zfs_log_* functions will update the sequence 188 * number to indicate the zil transaction has replayed. 189 * 190 * (6) At the end of each vnode op, the DMU tx must always commit, 191 * regardless of whether there were any errors. 192 * 193 * (7) After dropping all locks, invoke zil_commit(zilog, foid) 194 * to ensure that synchronous semantics are provided when necessary. 195 * 196 * In general, this is how things should be ordered in each vnode op: 197 * 198 * zfs_enter(zfsvfs); // exit if unmounted 199 * top: 200 * zfs_dirent_lookup(&dl, ...) // lock directory entry (may VN_HOLD()) 201 * rw_enter(...); // grab any other locks you need 202 * tx = dmu_tx_create(...); // get DMU tx 203 * dmu_tx_hold_*(); // hold each object you might modify 204 * error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT); 205 * if (error) { 206 * rw_exit(...); // drop locks 207 * zfs_dirent_unlock(dl); // unlock directory entry 208 * VN_RELE(...); // release held vnodes 209 * if (error == ERESTART) { 210 * waited = B_TRUE; 211 * dmu_tx_wait(tx); 212 * dmu_tx_abort(tx); 213 * goto top; 214 * } 215 * dmu_tx_abort(tx); // abort DMU tx 216 * zfs_exit(zfsvfs); // finished in zfs 217 * return (error); // really out of space 218 * } 219 * error = do_real_work(); // do whatever this VOP does 220 * if (error == 0) 221 * zfs_log_*(...); // on success, make ZIL entry 222 * dmu_tx_commit(tx); // commit DMU tx -- error or not 223 * rw_exit(...); // drop locks 224 * zfs_dirent_unlock(dl); // unlock directory entry 225 * VN_RELE(...); // release held vnodes 226 * zil_commit(zilog, foid); // synchronous when necessary 227 * zfs_exit(zfsvfs); // finished in zfs 228 * return (error); // done, report error 229 */ 230 static int 231 zfs_open(vnode_t **vpp, int flag, cred_t *cr) 232 { 233 (void) cr; 234 znode_t *zp = VTOZ(*vpp); 235 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 236 int error; 237 238 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0) 239 return (error); 240 241 if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) && 242 ((flag & FAPPEND) == 0)) { 243 zfs_exit(zfsvfs, FTAG); 244 return (SET_ERROR(EPERM)); 245 } 246 247 /* Keep a count of the synchronous opens in the znode */ 248 if (flag & O_SYNC) 249 atomic_inc_32(&zp->z_sync_cnt); 250 251 zfs_exit(zfsvfs, FTAG); 252 return (0); 253 } 254 255 static int 256 zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr) 257 { 258 (void) offset, (void) cr; 259 znode_t *zp = VTOZ(vp); 260 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 261 int error; 262 263 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0) 264 return (error); 265 266 /* Decrement the synchronous opens in the znode */ 267 if ((flag & O_SYNC) && (count == 1)) 268 atomic_dec_32(&zp->z_sync_cnt); 269 270 zfs_exit(zfsvfs, FTAG); 271 return (0); 272 } 273 274 static int 275 zfs_ioctl(vnode_t *vp, ulong_t com, intptr_t data, int flag, cred_t *cred, 276 int *rvalp) 277 { 278 (void) flag, (void) cred, (void) rvalp; 279 loff_t off; 280 int error; 281 282 switch (com) { 283 case _FIOFFS: 284 { 285 return (0); 286 287 /* 288 * The following two ioctls are used by bfu. Faking out, 289 * necessary to avoid bfu errors. 290 */ 291 } 292 case _FIOGDIO: 293 case _FIOSDIO: 294 { 295 return (0); 296 } 297 298 case F_SEEK_DATA: 299 case F_SEEK_HOLE: 300 { 301 off = *(offset_t *)data; 302 /* offset parameter is in/out */ 303 error = zfs_holey(VTOZ(vp), com, &off); 304 if (error) 305 return (error); 306 *(offset_t *)data = off; 307 return (0); 308 } 309 } 310 return (SET_ERROR(ENOTTY)); 311 } 312 313 static vm_page_t 314 page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes) 315 { 316 vm_object_t obj; 317 vm_page_t pp; 318 int64_t end; 319 320 /* 321 * At present vm_page_clear_dirty extends the cleared range to DEV_BSIZE 322 * aligned boundaries, if the range is not aligned. As a result a 323 * DEV_BSIZE subrange with partially dirty data may get marked as clean. 324 * It may happen that all DEV_BSIZE subranges are marked clean and thus 325 * the whole page would be considered clean despite have some 326 * dirty data. 327 * For this reason we should shrink the range to DEV_BSIZE aligned 328 * boundaries before calling vm_page_clear_dirty. 329 */ 330 end = rounddown2(off + nbytes, DEV_BSIZE); 331 off = roundup2(off, DEV_BSIZE); 332 nbytes = end - off; 333 334 obj = vp->v_object; 335 zfs_vmobject_assert_wlocked_12(obj); 336 #if __FreeBSD_version < 1300050 337 for (;;) { 338 if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 339 pp->valid) { 340 if (vm_page_xbusied(pp)) { 341 /* 342 * Reference the page before unlocking and 343 * sleeping so that the page daemon is less 344 * likely to reclaim it. 345 */ 346 vm_page_reference(pp); 347 vm_page_lock(pp); 348 zfs_vmobject_wunlock(obj); 349 vm_page_busy_sleep(pp, "zfsmwb", true); 350 zfs_vmobject_wlock(obj); 351 continue; 352 } 353 vm_page_sbusy(pp); 354 } else if (pp != NULL) { 355 ASSERT(!pp->valid); 356 pp = NULL; 357 } 358 if (pp != NULL) { 359 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 360 vm_object_pip_add(obj, 1); 361 pmap_remove_write(pp); 362 if (nbytes != 0) 363 vm_page_clear_dirty(pp, off, nbytes); 364 } 365 break; 366 } 367 #else 368 vm_page_grab_valid_unlocked(&pp, obj, OFF_TO_IDX(start), 369 VM_ALLOC_NOCREAT | VM_ALLOC_SBUSY | VM_ALLOC_NORMAL | 370 VM_ALLOC_IGN_SBUSY); 371 if (pp != NULL) { 372 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 373 vm_object_pip_add(obj, 1); 374 pmap_remove_write(pp); 375 if (nbytes != 0) 376 vm_page_clear_dirty(pp, off, nbytes); 377 } 378 #endif 379 return (pp); 380 } 381 382 static void 383 page_unbusy(vm_page_t pp) 384 { 385 386 vm_page_sunbusy(pp); 387 #if __FreeBSD_version >= 1300041 388 vm_object_pip_wakeup(pp->object); 389 #else 390 vm_object_pip_subtract(pp->object, 1); 391 #endif 392 } 393 394 #if __FreeBSD_version > 1300051 395 static vm_page_t 396 page_hold(vnode_t *vp, int64_t start) 397 { 398 vm_object_t obj; 399 vm_page_t m; 400 401 obj = vp->v_object; 402 vm_page_grab_valid_unlocked(&m, obj, OFF_TO_IDX(start), 403 VM_ALLOC_NOCREAT | VM_ALLOC_WIRED | VM_ALLOC_IGN_SBUSY | 404 VM_ALLOC_NOBUSY); 405 return (m); 406 } 407 #else 408 static vm_page_t 409 page_hold(vnode_t *vp, int64_t start) 410 { 411 vm_object_t obj; 412 vm_page_t pp; 413 414 obj = vp->v_object; 415 zfs_vmobject_assert_wlocked(obj); 416 417 for (;;) { 418 if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 419 pp->valid) { 420 if (vm_page_xbusied(pp)) { 421 /* 422 * Reference the page before unlocking and 423 * sleeping so that the page daemon is less 424 * likely to reclaim it. 425 */ 426 vm_page_reference(pp); 427 vm_page_lock(pp); 428 zfs_vmobject_wunlock(obj); 429 vm_page_busy_sleep(pp, "zfsmwb", true); 430 zfs_vmobject_wlock(obj); 431 continue; 432 } 433 434 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 435 vm_page_wire_lock(pp); 436 vm_page_hold(pp); 437 vm_page_wire_unlock(pp); 438 439 } else 440 pp = NULL; 441 break; 442 } 443 return (pp); 444 } 445 #endif 446 447 static void 448 page_unhold(vm_page_t pp) 449 { 450 451 vm_page_wire_lock(pp); 452 #if __FreeBSD_version >= 1300035 453 vm_page_unwire(pp, PQ_ACTIVE); 454 #else 455 vm_page_unhold(pp); 456 #endif 457 vm_page_wire_unlock(pp); 458 } 459 460 /* 461 * When a file is memory mapped, we must keep the IO data synchronized 462 * between the DMU cache and the memory mapped pages. What this means: 463 * 464 * On Write: If we find a memory mapped page, we write to *both* 465 * the page and the dmu buffer. 466 */ 467 void 468 update_pages(znode_t *zp, int64_t start, int len, objset_t *os) 469 { 470 vm_object_t obj; 471 struct sf_buf *sf; 472 vnode_t *vp = ZTOV(zp); 473 caddr_t va; 474 int off; 475 476 ASSERT3P(vp->v_mount, !=, NULL); 477 obj = vp->v_object; 478 ASSERT3P(obj, !=, NULL); 479 480 off = start & PAGEOFFSET; 481 zfs_vmobject_wlock_12(obj); 482 #if __FreeBSD_version >= 1300041 483 vm_object_pip_add(obj, 1); 484 #endif 485 for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 486 vm_page_t pp; 487 int nbytes = imin(PAGESIZE - off, len); 488 489 if ((pp = page_busy(vp, start, off, nbytes)) != NULL) { 490 zfs_vmobject_wunlock_12(obj); 491 492 va = zfs_map_page(pp, &sf); 493 (void) dmu_read(os, zp->z_id, start + off, nbytes, 494 va + off, DMU_READ_PREFETCH); 495 zfs_unmap_page(sf); 496 497 zfs_vmobject_wlock_12(obj); 498 page_unbusy(pp); 499 } 500 len -= nbytes; 501 off = 0; 502 } 503 #if __FreeBSD_version >= 1300041 504 vm_object_pip_wakeup(obj); 505 #else 506 vm_object_pip_wakeupn(obj, 0); 507 #endif 508 zfs_vmobject_wunlock_12(obj); 509 } 510 511 /* 512 * Read with UIO_NOCOPY flag means that sendfile(2) requests 513 * ZFS to populate a range of page cache pages with data. 514 * 515 * NOTE: this function could be optimized to pre-allocate 516 * all pages in advance, drain exclusive busy on all of them, 517 * map them into contiguous KVA region and populate them 518 * in one single dmu_read() call. 519 */ 520 int 521 mappedread_sf(znode_t *zp, int nbytes, zfs_uio_t *uio) 522 { 523 vnode_t *vp = ZTOV(zp); 524 objset_t *os = zp->z_zfsvfs->z_os; 525 struct sf_buf *sf; 526 vm_object_t obj; 527 vm_page_t pp; 528 int64_t start; 529 caddr_t va; 530 int len = nbytes; 531 int error = 0; 532 533 ASSERT3U(zfs_uio_segflg(uio), ==, UIO_NOCOPY); 534 ASSERT3P(vp->v_mount, !=, NULL); 535 obj = vp->v_object; 536 ASSERT3P(obj, !=, NULL); 537 ASSERT0(zfs_uio_offset(uio) & PAGEOFFSET); 538 539 zfs_vmobject_wlock_12(obj); 540 for (start = zfs_uio_offset(uio); len > 0; start += PAGESIZE) { 541 int bytes = MIN(PAGESIZE, len); 542 543 pp = vm_page_grab_unlocked(obj, OFF_TO_IDX(start), 544 VM_ALLOC_SBUSY | VM_ALLOC_NORMAL | VM_ALLOC_IGN_SBUSY); 545 if (vm_page_none_valid(pp)) { 546 zfs_vmobject_wunlock_12(obj); 547 va = zfs_map_page(pp, &sf); 548 error = dmu_read(os, zp->z_id, start, bytes, va, 549 DMU_READ_PREFETCH); 550 if (bytes != PAGESIZE && error == 0) 551 memset(va + bytes, 0, PAGESIZE - bytes); 552 zfs_unmap_page(sf); 553 zfs_vmobject_wlock_12(obj); 554 #if __FreeBSD_version >= 1300081 555 if (error == 0) { 556 vm_page_valid(pp); 557 vm_page_activate(pp); 558 vm_page_do_sunbusy(pp); 559 } else { 560 zfs_vmobject_wlock(obj); 561 if (!vm_page_wired(pp) && pp->valid == 0 && 562 vm_page_busy_tryupgrade(pp)) 563 vm_page_free(pp); 564 else 565 vm_page_sunbusy(pp); 566 zfs_vmobject_wunlock(obj); 567 } 568 #else 569 vm_page_do_sunbusy(pp); 570 vm_page_lock(pp); 571 if (error) { 572 if (pp->wire_count == 0 && pp->valid == 0 && 573 !vm_page_busied(pp)) 574 vm_page_free(pp); 575 } else { 576 pp->valid = VM_PAGE_BITS_ALL; 577 vm_page_activate(pp); 578 } 579 vm_page_unlock(pp); 580 #endif 581 } else { 582 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 583 vm_page_do_sunbusy(pp); 584 } 585 if (error) 586 break; 587 zfs_uio_advance(uio, bytes); 588 len -= bytes; 589 } 590 zfs_vmobject_wunlock_12(obj); 591 return (error); 592 } 593 594 /* 595 * When a file is memory mapped, we must keep the IO data synchronized 596 * between the DMU cache and the memory mapped pages. What this means: 597 * 598 * On Read: We "read" preferentially from memory mapped pages, 599 * else we default from the dmu buffer. 600 * 601 * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when 602 * the file is memory mapped. 603 */ 604 int 605 mappedread(znode_t *zp, int nbytes, zfs_uio_t *uio) 606 { 607 vnode_t *vp = ZTOV(zp); 608 vm_object_t obj; 609 int64_t start; 610 int len = nbytes; 611 int off; 612 int error = 0; 613 614 ASSERT3P(vp->v_mount, !=, NULL); 615 obj = vp->v_object; 616 ASSERT3P(obj, !=, NULL); 617 618 start = zfs_uio_offset(uio); 619 off = start & PAGEOFFSET; 620 zfs_vmobject_wlock_12(obj); 621 for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 622 vm_page_t pp; 623 uint64_t bytes = MIN(PAGESIZE - off, len); 624 625 if ((pp = page_hold(vp, start))) { 626 struct sf_buf *sf; 627 caddr_t va; 628 629 zfs_vmobject_wunlock_12(obj); 630 va = zfs_map_page(pp, &sf); 631 error = vn_io_fault_uiomove(va + off, bytes, 632 GET_UIO_STRUCT(uio)); 633 zfs_unmap_page(sf); 634 zfs_vmobject_wlock_12(obj); 635 page_unhold(pp); 636 } else { 637 zfs_vmobject_wunlock_12(obj); 638 error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), 639 uio, bytes); 640 zfs_vmobject_wlock_12(obj); 641 } 642 len -= bytes; 643 off = 0; 644 if (error) 645 break; 646 } 647 zfs_vmobject_wunlock_12(obj); 648 return (error); 649 } 650 651 int 652 zfs_write_simple(znode_t *zp, const void *data, size_t len, 653 loff_t pos, size_t *presid) 654 { 655 int error = 0; 656 ssize_t resid; 657 658 error = vn_rdwr(UIO_WRITE, ZTOV(zp), __DECONST(void *, data), len, pos, 659 UIO_SYSSPACE, IO_SYNC, kcred, NOCRED, &resid, curthread); 660 661 if (error) { 662 return (SET_ERROR(error)); 663 } else if (presid == NULL) { 664 if (resid != 0) { 665 error = SET_ERROR(EIO); 666 } 667 } else { 668 *presid = resid; 669 } 670 return (error); 671 } 672 673 void 674 zfs_zrele_async(znode_t *zp) 675 { 676 vnode_t *vp = ZTOV(zp); 677 objset_t *os = ITOZSB(vp)->z_os; 678 679 VN_RELE_ASYNC(vp, dsl_pool_zrele_taskq(dmu_objset_pool(os))); 680 } 681 682 static int 683 zfs_dd_callback(struct mount *mp, void *arg, int lkflags, struct vnode **vpp) 684 { 685 int error; 686 687 *vpp = arg; 688 error = vn_lock(*vpp, lkflags); 689 if (error != 0) 690 vrele(*vpp); 691 return (error); 692 } 693 694 static int 695 zfs_lookup_lock(vnode_t *dvp, vnode_t *vp, const char *name, int lkflags) 696 { 697 znode_t *zdp = VTOZ(dvp); 698 zfsvfs_t *zfsvfs __unused = zdp->z_zfsvfs; 699 int error; 700 int ltype; 701 702 if (zfsvfs->z_replay == B_FALSE) 703 ASSERT_VOP_LOCKED(dvp, __func__); 704 705 if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) { 706 ASSERT3P(dvp, ==, vp); 707 vref(dvp); 708 ltype = lkflags & LK_TYPE_MASK; 709 if (ltype != VOP_ISLOCKED(dvp)) { 710 if (ltype == LK_EXCLUSIVE) 711 vn_lock(dvp, LK_UPGRADE | LK_RETRY); 712 else /* if (ltype == LK_SHARED) */ 713 vn_lock(dvp, LK_DOWNGRADE | LK_RETRY); 714 715 /* 716 * Relock for the "." case could leave us with 717 * reclaimed vnode. 718 */ 719 if (VN_IS_DOOMED(dvp)) { 720 vrele(dvp); 721 return (SET_ERROR(ENOENT)); 722 } 723 } 724 return (0); 725 } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) { 726 /* 727 * Note that in this case, dvp is the child vnode, and we 728 * are looking up the parent vnode - exactly reverse from 729 * normal operation. Unlocking dvp requires some rather 730 * tricky unlock/relock dance to prevent mp from being freed; 731 * use vn_vget_ino_gen() which takes care of all that. 732 * 733 * XXX Note that there is a time window when both vnodes are 734 * unlocked. It is possible, although highly unlikely, that 735 * during that window the parent-child relationship between 736 * the vnodes may change, for example, get reversed. 737 * In that case we would have a wrong lock order for the vnodes. 738 * All other filesystems seem to ignore this problem, so we 739 * do the same here. 740 * A potential solution could be implemented as follows: 741 * - using LK_NOWAIT when locking the second vnode and retrying 742 * if necessary 743 * - checking that the parent-child relationship still holds 744 * after locking both vnodes and retrying if it doesn't 745 */ 746 error = vn_vget_ino_gen(dvp, zfs_dd_callback, vp, lkflags, &vp); 747 return (error); 748 } else { 749 error = vn_lock(vp, lkflags); 750 if (error != 0) 751 vrele(vp); 752 return (error); 753 } 754 } 755 756 /* 757 * Lookup an entry in a directory, or an extended attribute directory. 758 * If it exists, return a held vnode reference for it. 759 * 760 * IN: dvp - vnode of directory to search. 761 * nm - name of entry to lookup. 762 * pnp - full pathname to lookup [UNUSED]. 763 * flags - LOOKUP_XATTR set if looking for an attribute. 764 * rdir - root directory vnode [UNUSED]. 765 * cr - credentials of caller. 766 * ct - caller context 767 * 768 * OUT: vpp - vnode of located entry, NULL if not found. 769 * 770 * RETURN: 0 on success, error code on failure. 771 * 772 * Timestamps: 773 * NA 774 */ 775 static int 776 zfs_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp, 777 struct componentname *cnp, int nameiop, cred_t *cr, int flags, 778 boolean_t cached) 779 { 780 znode_t *zdp = VTOZ(dvp); 781 znode_t *zp; 782 zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 783 #if __FreeBSD_version > 1300124 784 seqc_t dvp_seqc; 785 #endif 786 int error = 0; 787 788 /* 789 * Fast path lookup, however we must skip DNLC lookup 790 * for case folding or normalizing lookups because the 791 * DNLC code only stores the passed in name. This means 792 * creating 'a' and removing 'A' on a case insensitive 793 * file system would work, but DNLC still thinks 'a' 794 * exists and won't let you create it again on the next 795 * pass through fast path. 796 */ 797 if (!(flags & LOOKUP_XATTR)) { 798 if (dvp->v_type != VDIR) { 799 return (SET_ERROR(ENOTDIR)); 800 } else if (zdp->z_sa_hdl == NULL) { 801 return (SET_ERROR(EIO)); 802 } 803 } 804 805 DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, 806 const char *, nm); 807 808 if ((error = zfs_enter_verify_zp(zfsvfs, zdp, FTAG)) != 0) 809 return (error); 810 811 #if __FreeBSD_version > 1300124 812 dvp_seqc = vn_seqc_read_notmodify(dvp); 813 #endif 814 815 *vpp = NULL; 816 817 if (flags & LOOKUP_XATTR) { 818 /* 819 * If the xattr property is off, refuse the lookup request. 820 */ 821 if (!(zfsvfs->z_flags & ZSB_XATTR)) { 822 zfs_exit(zfsvfs, FTAG); 823 return (SET_ERROR(EOPNOTSUPP)); 824 } 825 826 /* 827 * We don't allow recursive attributes.. 828 * Maybe someday we will. 829 */ 830 if (zdp->z_pflags & ZFS_XATTR) { 831 zfs_exit(zfsvfs, FTAG); 832 return (SET_ERROR(EINVAL)); 833 } 834 835 if ((error = zfs_get_xattrdir(VTOZ(dvp), &zp, cr, flags))) { 836 zfs_exit(zfsvfs, FTAG); 837 return (error); 838 } 839 *vpp = ZTOV(zp); 840 841 /* 842 * Do we have permission to get into attribute directory? 843 */ 844 error = zfs_zaccess(zp, ACE_EXECUTE, 0, B_FALSE, cr, NULL); 845 if (error) { 846 vrele(ZTOV(zp)); 847 } 848 849 zfs_exit(zfsvfs, FTAG); 850 return (error); 851 } 852 853 /* 854 * Check accessibility of directory if we're not coming in via 855 * VOP_CACHEDLOOKUP. 856 */ 857 if (!cached) { 858 #ifdef NOEXECCHECK 859 if ((cnp->cn_flags & NOEXECCHECK) != 0) { 860 cnp->cn_flags &= ~NOEXECCHECK; 861 } else 862 #endif 863 if ((error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr, 864 NULL))) { 865 zfs_exit(zfsvfs, FTAG); 866 return (error); 867 } 868 } 869 870 if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), 871 NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 872 zfs_exit(zfsvfs, FTAG); 873 return (SET_ERROR(EILSEQ)); 874 } 875 876 877 /* 878 * First handle the special cases. 879 */ 880 if ((cnp->cn_flags & ISDOTDOT) != 0) { 881 /* 882 * If we are a snapshot mounted under .zfs, return 883 * the vp for the snapshot directory. 884 */ 885 if (zdp->z_id == zfsvfs->z_root && zfsvfs->z_parent != zfsvfs) { 886 struct componentname cn; 887 vnode_t *zfsctl_vp; 888 int ltype; 889 890 zfs_exit(zfsvfs, FTAG); 891 ltype = VOP_ISLOCKED(dvp); 892 VOP_UNLOCK1(dvp); 893 error = zfsctl_root(zfsvfs->z_parent, LK_SHARED, 894 &zfsctl_vp); 895 if (error == 0) { 896 cn.cn_nameptr = "snapshot"; 897 cn.cn_namelen = strlen(cn.cn_nameptr); 898 cn.cn_nameiop = cnp->cn_nameiop; 899 cn.cn_flags = cnp->cn_flags & ~ISDOTDOT; 900 cn.cn_lkflags = cnp->cn_lkflags; 901 error = VOP_LOOKUP(zfsctl_vp, vpp, &cn); 902 vput(zfsctl_vp); 903 } 904 vn_lock(dvp, ltype | LK_RETRY); 905 return (error); 906 } 907 } 908 if (zfs_has_ctldir(zdp) && strcmp(nm, ZFS_CTLDIR_NAME) == 0) { 909 zfs_exit(zfsvfs, FTAG); 910 if ((cnp->cn_flags & ISLASTCN) != 0 && nameiop != LOOKUP) 911 return (SET_ERROR(ENOTSUP)); 912 error = zfsctl_root(zfsvfs, cnp->cn_lkflags, vpp); 913 return (error); 914 } 915 916 /* 917 * The loop is retry the lookup if the parent-child relationship 918 * changes during the dot-dot locking complexities. 919 */ 920 for (;;) { 921 uint64_t parent; 922 923 error = zfs_dirlook(zdp, nm, &zp); 924 if (error == 0) 925 *vpp = ZTOV(zp); 926 927 zfs_exit(zfsvfs, FTAG); 928 if (error != 0) 929 break; 930 931 error = zfs_lookup_lock(dvp, *vpp, nm, cnp->cn_lkflags); 932 if (error != 0) { 933 /* 934 * If we've got a locking error, then the vnode 935 * got reclaimed because of a force unmount. 936 * We never enter doomed vnodes into the name cache. 937 */ 938 *vpp = NULL; 939 return (error); 940 } 941 942 if ((cnp->cn_flags & ISDOTDOT) == 0) 943 break; 944 945 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) { 946 vput(ZTOV(zp)); 947 *vpp = NULL; 948 return (error); 949 } 950 if (zdp->z_sa_hdl == NULL) { 951 error = SET_ERROR(EIO); 952 } else { 953 error = sa_lookup(zdp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 954 &parent, sizeof (parent)); 955 } 956 if (error != 0) { 957 zfs_exit(zfsvfs, FTAG); 958 vput(ZTOV(zp)); 959 break; 960 } 961 if (zp->z_id == parent) { 962 zfs_exit(zfsvfs, FTAG); 963 break; 964 } 965 vput(ZTOV(zp)); 966 } 967 968 if (error != 0) 969 *vpp = NULL; 970 971 /* Translate errors and add SAVENAME when needed. */ 972 if (cnp->cn_flags & ISLASTCN) { 973 switch (nameiop) { 974 case CREATE: 975 case RENAME: 976 if (error == ENOENT) { 977 error = EJUSTRETURN; 978 #if __FreeBSD_version < 1400068 979 cnp->cn_flags |= SAVENAME; 980 #endif 981 break; 982 } 983 zfs_fallthrough; 984 case DELETE: 985 #if __FreeBSD_version < 1400068 986 if (error == 0) 987 cnp->cn_flags |= SAVENAME; 988 #endif 989 break; 990 } 991 } 992 993 #if __FreeBSD_version > 1300124 994 if ((cnp->cn_flags & ISDOTDOT) != 0) { 995 /* 996 * FIXME: zfs_lookup_lock relocks vnodes and does nothing to 997 * handle races. In particular different callers may end up 998 * with different vnodes and will try to add conflicting 999 * entries to the namecache. 1000 * 1001 * While finding different result may be acceptable in face 1002 * of concurrent modification, adding conflicting entries 1003 * trips over an assert in the namecache. 1004 * 1005 * Ultimately let an entry through once everything settles. 1006 */ 1007 if (!vn_seqc_consistent(dvp, dvp_seqc)) { 1008 cnp->cn_flags &= ~MAKEENTRY; 1009 } 1010 } 1011 #endif 1012 1013 /* Insert name into cache (as non-existent) if appropriate. */ 1014 if (zfsvfs->z_use_namecache && !zfsvfs->z_replay && 1015 error == ENOENT && (cnp->cn_flags & MAKEENTRY) != 0) 1016 cache_enter(dvp, NULL, cnp); 1017 1018 /* Insert name into cache if appropriate. */ 1019 if (zfsvfs->z_use_namecache && !zfsvfs->z_replay && 1020 error == 0 && (cnp->cn_flags & MAKEENTRY)) { 1021 if (!(cnp->cn_flags & ISLASTCN) || 1022 (nameiop != DELETE && nameiop != RENAME)) { 1023 cache_enter(dvp, *vpp, cnp); 1024 } 1025 } 1026 1027 return (error); 1028 } 1029 1030 /* 1031 * Attempt to create a new entry in a directory. If the entry 1032 * already exists, truncate the file if permissible, else return 1033 * an error. Return the vp of the created or trunc'd file. 1034 * 1035 * IN: dvp - vnode of directory to put new file entry in. 1036 * name - name of new file entry. 1037 * vap - attributes of new file. 1038 * excl - flag indicating exclusive or non-exclusive mode. 1039 * mode - mode to open file with. 1040 * cr - credentials of caller. 1041 * flag - large file flag [UNUSED]. 1042 * ct - caller context 1043 * vsecp - ACL to be set 1044 * mnt_ns - Unused on FreeBSD 1045 * 1046 * OUT: vpp - vnode of created or trunc'd entry. 1047 * 1048 * RETURN: 0 on success, error code on failure. 1049 * 1050 * Timestamps: 1051 * dvp - ctime|mtime updated if new entry created 1052 * vp - ctime|mtime always, atime if new 1053 */ 1054 int 1055 zfs_create(znode_t *dzp, const char *name, vattr_t *vap, int excl, int mode, 1056 znode_t **zpp, cred_t *cr, int flag, vsecattr_t *vsecp, zidmap_t *mnt_ns) 1057 { 1058 (void) excl, (void) mode, (void) flag; 1059 znode_t *zp; 1060 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1061 zilog_t *zilog; 1062 objset_t *os; 1063 dmu_tx_t *tx; 1064 int error; 1065 uid_t uid = crgetuid(cr); 1066 gid_t gid = crgetgid(cr); 1067 uint64_t projid = ZFS_DEFAULT_PROJID; 1068 zfs_acl_ids_t acl_ids; 1069 boolean_t fuid_dirtied; 1070 uint64_t txtype; 1071 #ifdef DEBUG_VFS_LOCKS 1072 vnode_t *dvp = ZTOV(dzp); 1073 #endif 1074 1075 /* 1076 * If we have an ephemeral id, ACL, or XVATTR then 1077 * make sure file system is at proper version 1078 */ 1079 if (zfsvfs->z_use_fuids == B_FALSE && 1080 (vsecp || (vap->va_mask & AT_XVATTR) || 1081 IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 1082 return (SET_ERROR(EINVAL)); 1083 1084 if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0) 1085 return (error); 1086 os = zfsvfs->z_os; 1087 zilog = zfsvfs->z_log; 1088 1089 if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 1090 NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1091 zfs_exit(zfsvfs, FTAG); 1092 return (SET_ERROR(EILSEQ)); 1093 } 1094 1095 if (vap->va_mask & AT_XVATTR) { 1096 if ((error = secpolicy_xvattr(ZTOV(dzp), (xvattr_t *)vap, 1097 crgetuid(cr), cr, vap->va_type)) != 0) { 1098 zfs_exit(zfsvfs, FTAG); 1099 return (error); 1100 } 1101 } 1102 1103 *zpp = NULL; 1104 1105 if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr)) 1106 vap->va_mode &= ~S_ISVTX; 1107 1108 error = zfs_dirent_lookup(dzp, name, &zp, ZNEW); 1109 if (error) { 1110 zfs_exit(zfsvfs, FTAG); 1111 return (error); 1112 } 1113 ASSERT3P(zp, ==, NULL); 1114 1115 /* 1116 * Create a new file object and update the directory 1117 * to reference it. 1118 */ 1119 if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr, mnt_ns))) { 1120 goto out; 1121 } 1122 1123 /* 1124 * We only support the creation of regular files in 1125 * extended attribute directories. 1126 */ 1127 1128 if ((dzp->z_pflags & ZFS_XATTR) && 1129 (vap->va_type != VREG)) { 1130 error = SET_ERROR(EINVAL); 1131 goto out; 1132 } 1133 1134 if ((error = zfs_acl_ids_create(dzp, 0, vap, 1135 cr, vsecp, &acl_ids, NULL)) != 0) 1136 goto out; 1137 1138 if (S_ISREG(vap->va_mode) || S_ISDIR(vap->va_mode)) 1139 projid = zfs_inherit_projid(dzp); 1140 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, projid)) { 1141 zfs_acl_ids_free(&acl_ids); 1142 error = SET_ERROR(EDQUOT); 1143 goto out; 1144 } 1145 1146 getnewvnode_reserve_(); 1147 1148 tx = dmu_tx_create(os); 1149 1150 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 1151 ZFS_SA_BASE_ATTR_SIZE); 1152 1153 fuid_dirtied = zfsvfs->z_fuid_dirty; 1154 if (fuid_dirtied) 1155 zfs_fuid_txhold(zfsvfs, tx); 1156 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 1157 dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 1158 if (!zfsvfs->z_use_sa && 1159 acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 1160 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 1161 0, acl_ids.z_aclp->z_acl_bytes); 1162 } 1163 error = dmu_tx_assign(tx, TXG_WAIT); 1164 if (error) { 1165 zfs_acl_ids_free(&acl_ids); 1166 dmu_tx_abort(tx); 1167 getnewvnode_drop_reserve(); 1168 zfs_exit(zfsvfs, FTAG); 1169 return (error); 1170 } 1171 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 1172 if (fuid_dirtied) 1173 zfs_fuid_sync(zfsvfs, tx); 1174 1175 (void) zfs_link_create(dzp, name, zp, tx, ZNEW); 1176 txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); 1177 zfs_log_create(zilog, tx, txtype, dzp, zp, name, 1178 vsecp, acl_ids.z_fuidp, vap); 1179 zfs_acl_ids_free(&acl_ids); 1180 dmu_tx_commit(tx); 1181 1182 getnewvnode_drop_reserve(); 1183 1184 out: 1185 VNCHECKREF(dvp); 1186 if (error == 0) { 1187 *zpp = zp; 1188 } 1189 1190 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1191 zil_commit(zilog, 0); 1192 1193 zfs_exit(zfsvfs, FTAG); 1194 return (error); 1195 } 1196 1197 /* 1198 * Remove an entry from a directory. 1199 * 1200 * IN: dvp - vnode of directory to remove entry from. 1201 * name - name of entry to remove. 1202 * cr - credentials of caller. 1203 * ct - caller context 1204 * flags - case flags 1205 * 1206 * RETURN: 0 on success, error code on failure. 1207 * 1208 * Timestamps: 1209 * dvp - ctime|mtime 1210 * vp - ctime (if nlink > 0) 1211 */ 1212 static int 1213 zfs_remove_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr) 1214 { 1215 znode_t *dzp = VTOZ(dvp); 1216 znode_t *zp; 1217 znode_t *xzp; 1218 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1219 zilog_t *zilog; 1220 uint64_t xattr_obj; 1221 uint64_t obj = 0; 1222 dmu_tx_t *tx; 1223 boolean_t unlinked; 1224 uint64_t txtype; 1225 int error; 1226 1227 1228 if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0) 1229 return (error); 1230 zp = VTOZ(vp); 1231 if ((error = zfs_verify_zp(zp)) != 0) { 1232 zfs_exit(zfsvfs, FTAG); 1233 return (error); 1234 } 1235 zilog = zfsvfs->z_log; 1236 1237 xattr_obj = 0; 1238 xzp = NULL; 1239 1240 if ((error = zfs_zaccess_delete(dzp, zp, cr, NULL))) { 1241 goto out; 1242 } 1243 1244 /* 1245 * Need to use rmdir for removing directories. 1246 */ 1247 if (vp->v_type == VDIR) { 1248 error = SET_ERROR(EPERM); 1249 goto out; 1250 } 1251 1252 vnevent_remove(vp, dvp, name, ct); 1253 1254 obj = zp->z_id; 1255 1256 /* are there any extended attributes? */ 1257 error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 1258 &xattr_obj, sizeof (xattr_obj)); 1259 if (error == 0 && xattr_obj) { 1260 error = zfs_zget(zfsvfs, xattr_obj, &xzp); 1261 ASSERT0(error); 1262 } 1263 1264 /* 1265 * We may delete the znode now, or we may put it in the unlinked set; 1266 * it depends on whether we're the last link, and on whether there are 1267 * other holds on the vnode. So we dmu_tx_hold() the right things to 1268 * allow for either case. 1269 */ 1270 tx = dmu_tx_create(zfsvfs->z_os); 1271 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 1272 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1273 zfs_sa_upgrade_txholds(tx, zp); 1274 zfs_sa_upgrade_txholds(tx, dzp); 1275 1276 if (xzp) { 1277 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 1278 dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); 1279 } 1280 1281 /* charge as an update -- would be nice not to charge at all */ 1282 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 1283 1284 /* 1285 * Mark this transaction as typically resulting in a net free of space 1286 */ 1287 dmu_tx_mark_netfree(tx); 1288 1289 error = dmu_tx_assign(tx, TXG_WAIT); 1290 if (error) { 1291 dmu_tx_abort(tx); 1292 zfs_exit(zfsvfs, FTAG); 1293 return (error); 1294 } 1295 1296 /* 1297 * Remove the directory entry. 1298 */ 1299 error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, &unlinked); 1300 1301 if (error) { 1302 dmu_tx_commit(tx); 1303 goto out; 1304 } 1305 1306 if (unlinked) { 1307 zfs_unlinked_add(zp, tx); 1308 vp->v_vflag |= VV_NOSYNC; 1309 } 1310 /* XXX check changes to linux vnops */ 1311 txtype = TX_REMOVE; 1312 zfs_log_remove(zilog, tx, txtype, dzp, name, obj, unlinked); 1313 1314 dmu_tx_commit(tx); 1315 out: 1316 1317 if (xzp) 1318 vrele(ZTOV(xzp)); 1319 1320 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1321 zil_commit(zilog, 0); 1322 1323 1324 zfs_exit(zfsvfs, FTAG); 1325 return (error); 1326 } 1327 1328 1329 static int 1330 zfs_lookup_internal(znode_t *dzp, const char *name, vnode_t **vpp, 1331 struct componentname *cnp, int nameiop) 1332 { 1333 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1334 int error; 1335 1336 cnp->cn_nameptr = __DECONST(char *, name); 1337 cnp->cn_namelen = strlen(name); 1338 cnp->cn_nameiop = nameiop; 1339 cnp->cn_flags = ISLASTCN; 1340 #if __FreeBSD_version < 1400068 1341 cnp->cn_flags |= SAVENAME; 1342 #endif 1343 cnp->cn_lkflags = LK_EXCLUSIVE | LK_RETRY; 1344 cnp->cn_cred = kcred; 1345 #if __FreeBSD_version < 1400037 1346 cnp->cn_thread = curthread; 1347 #endif 1348 1349 if (zfsvfs->z_use_namecache && !zfsvfs->z_replay) { 1350 struct vop_lookup_args a; 1351 1352 a.a_gen.a_desc = &vop_lookup_desc; 1353 a.a_dvp = ZTOV(dzp); 1354 a.a_vpp = vpp; 1355 a.a_cnp = cnp; 1356 error = vfs_cache_lookup(&a); 1357 } else { 1358 error = zfs_lookup(ZTOV(dzp), name, vpp, cnp, nameiop, kcred, 0, 1359 B_FALSE); 1360 } 1361 #ifdef ZFS_DEBUG 1362 if (error) { 1363 printf("got error %d on name %s on op %d\n", error, name, 1364 nameiop); 1365 kdb_backtrace(); 1366 } 1367 #endif 1368 return (error); 1369 } 1370 1371 int 1372 zfs_remove(znode_t *dzp, const char *name, cred_t *cr, int flags) 1373 { 1374 vnode_t *vp; 1375 int error; 1376 struct componentname cn; 1377 1378 if ((error = zfs_lookup_internal(dzp, name, &vp, &cn, DELETE))) 1379 return (error); 1380 1381 error = zfs_remove_(ZTOV(dzp), vp, name, cr); 1382 vput(vp); 1383 return (error); 1384 } 1385 /* 1386 * Create a new directory and insert it into dvp using the name 1387 * provided. Return a pointer to the inserted directory. 1388 * 1389 * IN: dvp - vnode of directory to add subdir to. 1390 * dirname - name of new directory. 1391 * vap - attributes of new directory. 1392 * cr - credentials of caller. 1393 * ct - caller context 1394 * flags - case flags 1395 * vsecp - ACL to be set 1396 * mnt_ns - Unused on FreeBSD 1397 * 1398 * OUT: vpp - vnode of created directory. 1399 * 1400 * RETURN: 0 on success, error code on failure. 1401 * 1402 * Timestamps: 1403 * dvp - ctime|mtime updated 1404 * vp - ctime|mtime|atime updated 1405 */ 1406 int 1407 zfs_mkdir(znode_t *dzp, const char *dirname, vattr_t *vap, znode_t **zpp, 1408 cred_t *cr, int flags, vsecattr_t *vsecp, zidmap_t *mnt_ns) 1409 { 1410 (void) flags, (void) vsecp; 1411 znode_t *zp; 1412 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1413 zilog_t *zilog; 1414 uint64_t txtype; 1415 dmu_tx_t *tx; 1416 int error; 1417 uid_t uid = crgetuid(cr); 1418 gid_t gid = crgetgid(cr); 1419 zfs_acl_ids_t acl_ids; 1420 boolean_t fuid_dirtied; 1421 1422 ASSERT3U(vap->va_type, ==, VDIR); 1423 1424 /* 1425 * If we have an ephemeral id, ACL, or XVATTR then 1426 * make sure file system is at proper version 1427 */ 1428 if (zfsvfs->z_use_fuids == B_FALSE && 1429 ((vap->va_mask & AT_XVATTR) || 1430 IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 1431 return (SET_ERROR(EINVAL)); 1432 1433 if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0) 1434 return (error); 1435 zilog = zfsvfs->z_log; 1436 1437 if (dzp->z_pflags & ZFS_XATTR) { 1438 zfs_exit(zfsvfs, FTAG); 1439 return (SET_ERROR(EINVAL)); 1440 } 1441 1442 if (zfsvfs->z_utf8 && u8_validate(dirname, 1443 strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1444 zfs_exit(zfsvfs, FTAG); 1445 return (SET_ERROR(EILSEQ)); 1446 } 1447 1448 if (vap->va_mask & AT_XVATTR) { 1449 if ((error = secpolicy_xvattr(ZTOV(dzp), (xvattr_t *)vap, 1450 crgetuid(cr), cr, vap->va_type)) != 0) { 1451 zfs_exit(zfsvfs, FTAG); 1452 return (error); 1453 } 1454 } 1455 1456 if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, 1457 NULL, &acl_ids, NULL)) != 0) { 1458 zfs_exit(zfsvfs, FTAG); 1459 return (error); 1460 } 1461 1462 /* 1463 * First make sure the new directory doesn't exist. 1464 * 1465 * Existence is checked first to make sure we don't return 1466 * EACCES instead of EEXIST which can cause some applications 1467 * to fail. 1468 */ 1469 *zpp = NULL; 1470 1471 if ((error = zfs_dirent_lookup(dzp, dirname, &zp, ZNEW))) { 1472 zfs_acl_ids_free(&acl_ids); 1473 zfs_exit(zfsvfs, FTAG); 1474 return (error); 1475 } 1476 ASSERT3P(zp, ==, NULL); 1477 1478 if ((error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr, 1479 mnt_ns))) { 1480 zfs_acl_ids_free(&acl_ids); 1481 zfs_exit(zfsvfs, FTAG); 1482 return (error); 1483 } 1484 1485 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, zfs_inherit_projid(dzp))) { 1486 zfs_acl_ids_free(&acl_ids); 1487 zfs_exit(zfsvfs, FTAG); 1488 return (SET_ERROR(EDQUOT)); 1489 } 1490 1491 /* 1492 * Add a new entry to the directory. 1493 */ 1494 getnewvnode_reserve_(); 1495 tx = dmu_tx_create(zfsvfs->z_os); 1496 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); 1497 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 1498 fuid_dirtied = zfsvfs->z_fuid_dirty; 1499 if (fuid_dirtied) 1500 zfs_fuid_txhold(zfsvfs, tx); 1501 if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 1502 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 1503 acl_ids.z_aclp->z_acl_bytes); 1504 } 1505 1506 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 1507 ZFS_SA_BASE_ATTR_SIZE); 1508 1509 error = dmu_tx_assign(tx, TXG_WAIT); 1510 if (error) { 1511 zfs_acl_ids_free(&acl_ids); 1512 dmu_tx_abort(tx); 1513 getnewvnode_drop_reserve(); 1514 zfs_exit(zfsvfs, FTAG); 1515 return (error); 1516 } 1517 1518 /* 1519 * Create new node. 1520 */ 1521 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 1522 1523 if (fuid_dirtied) 1524 zfs_fuid_sync(zfsvfs, tx); 1525 1526 /* 1527 * Now put new name in parent dir. 1528 */ 1529 (void) zfs_link_create(dzp, dirname, zp, tx, ZNEW); 1530 1531 *zpp = zp; 1532 1533 txtype = zfs_log_create_txtype(Z_DIR, NULL, vap); 1534 zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, NULL, 1535 acl_ids.z_fuidp, vap); 1536 1537 zfs_acl_ids_free(&acl_ids); 1538 1539 dmu_tx_commit(tx); 1540 1541 getnewvnode_drop_reserve(); 1542 1543 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1544 zil_commit(zilog, 0); 1545 1546 zfs_exit(zfsvfs, FTAG); 1547 return (0); 1548 } 1549 1550 #if __FreeBSD_version < 1300124 1551 static void 1552 cache_vop_rmdir(struct vnode *dvp, struct vnode *vp) 1553 { 1554 1555 cache_purge(dvp); 1556 cache_purge(vp); 1557 } 1558 #endif 1559 1560 /* 1561 * Remove a directory subdir entry. If the current working 1562 * directory is the same as the subdir to be removed, the 1563 * remove will fail. 1564 * 1565 * IN: dvp - vnode of directory to remove from. 1566 * name - name of directory to be removed. 1567 * cwd - vnode of current working directory. 1568 * cr - credentials of caller. 1569 * ct - caller context 1570 * flags - case flags 1571 * 1572 * RETURN: 0 on success, error code on failure. 1573 * 1574 * Timestamps: 1575 * dvp - ctime|mtime updated 1576 */ 1577 static int 1578 zfs_rmdir_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr) 1579 { 1580 znode_t *dzp = VTOZ(dvp); 1581 znode_t *zp = VTOZ(vp); 1582 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1583 zilog_t *zilog; 1584 dmu_tx_t *tx; 1585 int error; 1586 1587 if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0) 1588 return (error); 1589 if ((error = zfs_verify_zp(zp)) != 0) { 1590 zfs_exit(zfsvfs, FTAG); 1591 return (error); 1592 } 1593 zilog = zfsvfs->z_log; 1594 1595 1596 if ((error = zfs_zaccess_delete(dzp, zp, cr, NULL))) { 1597 goto out; 1598 } 1599 1600 if (vp->v_type != VDIR) { 1601 error = SET_ERROR(ENOTDIR); 1602 goto out; 1603 } 1604 1605 vnevent_rmdir(vp, dvp, name, ct); 1606 1607 tx = dmu_tx_create(zfsvfs->z_os); 1608 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 1609 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1610 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 1611 zfs_sa_upgrade_txholds(tx, zp); 1612 zfs_sa_upgrade_txholds(tx, dzp); 1613 dmu_tx_mark_netfree(tx); 1614 error = dmu_tx_assign(tx, TXG_WAIT); 1615 if (error) { 1616 dmu_tx_abort(tx); 1617 zfs_exit(zfsvfs, FTAG); 1618 return (error); 1619 } 1620 1621 error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, NULL); 1622 1623 if (error == 0) { 1624 uint64_t txtype = TX_RMDIR; 1625 zfs_log_remove(zilog, tx, txtype, dzp, name, 1626 ZFS_NO_OBJECT, B_FALSE); 1627 } 1628 1629 dmu_tx_commit(tx); 1630 1631 if (zfsvfs->z_use_namecache) 1632 cache_vop_rmdir(dvp, vp); 1633 out: 1634 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1635 zil_commit(zilog, 0); 1636 1637 zfs_exit(zfsvfs, FTAG); 1638 return (error); 1639 } 1640 1641 int 1642 zfs_rmdir(znode_t *dzp, const char *name, znode_t *cwd, cred_t *cr, int flags) 1643 { 1644 struct componentname cn; 1645 vnode_t *vp; 1646 int error; 1647 1648 if ((error = zfs_lookup_internal(dzp, name, &vp, &cn, DELETE))) 1649 return (error); 1650 1651 error = zfs_rmdir_(ZTOV(dzp), vp, name, cr); 1652 vput(vp); 1653 return (error); 1654 } 1655 1656 /* 1657 * Read as many directory entries as will fit into the provided 1658 * buffer from the given directory cursor position (specified in 1659 * the uio structure). 1660 * 1661 * IN: vp - vnode of directory to read. 1662 * uio - structure supplying read location, range info, 1663 * and return buffer. 1664 * cr - credentials of caller. 1665 * ct - caller context 1666 * 1667 * OUT: uio - updated offset and range, buffer filled. 1668 * eofp - set to true if end-of-file detected. 1669 * ncookies- number of entries in cookies 1670 * cookies - offsets to directory entries 1671 * 1672 * RETURN: 0 on success, error code on failure. 1673 * 1674 * Timestamps: 1675 * vp - atime updated 1676 * 1677 * Note that the low 4 bits of the cookie returned by zap is always zero. 1678 * This allows us to use the low range for "special" directory entries: 1679 * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem, 1680 * we use the offset 2 for the '.zfs' directory. 1681 */ 1682 static int 1683 zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp, 1684 int *ncookies, cookie_t **cookies) 1685 { 1686 znode_t *zp = VTOZ(vp); 1687 iovec_t *iovp; 1688 dirent64_t *odp; 1689 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1690 objset_t *os; 1691 caddr_t outbuf; 1692 size_t bufsize; 1693 zap_cursor_t zc; 1694 zap_attribute_t zap; 1695 uint_t bytes_wanted; 1696 uint64_t offset; /* must be unsigned; checks for < 1 */ 1697 uint64_t parent; 1698 int local_eof; 1699 int outcount; 1700 int error; 1701 uint8_t prefetch; 1702 uint8_t type; 1703 int ncooks; 1704 cookie_t *cooks = NULL; 1705 1706 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0) 1707 return (error); 1708 1709 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 1710 &parent, sizeof (parent))) != 0) { 1711 zfs_exit(zfsvfs, FTAG); 1712 return (error); 1713 } 1714 1715 /* 1716 * If we are not given an eof variable, 1717 * use a local one. 1718 */ 1719 if (eofp == NULL) 1720 eofp = &local_eof; 1721 1722 /* 1723 * Check for valid iov_len. 1724 */ 1725 if (GET_UIO_STRUCT(uio)->uio_iov->iov_len <= 0) { 1726 zfs_exit(zfsvfs, FTAG); 1727 return (SET_ERROR(EINVAL)); 1728 } 1729 1730 /* 1731 * Quit if directory has been removed (posix) 1732 */ 1733 if ((*eofp = zp->z_unlinked) != 0) { 1734 zfs_exit(zfsvfs, FTAG); 1735 return (0); 1736 } 1737 1738 error = 0; 1739 os = zfsvfs->z_os; 1740 offset = zfs_uio_offset(uio); 1741 prefetch = zp->z_zn_prefetch; 1742 1743 /* 1744 * Initialize the iterator cursor. 1745 */ 1746 if (offset <= 3) { 1747 /* 1748 * Start iteration from the beginning of the directory. 1749 */ 1750 zap_cursor_init(&zc, os, zp->z_id); 1751 } else { 1752 /* 1753 * The offset is a serialized cursor. 1754 */ 1755 zap_cursor_init_serialized(&zc, os, zp->z_id, offset); 1756 } 1757 1758 /* 1759 * Get space to change directory entries into fs independent format. 1760 */ 1761 iovp = GET_UIO_STRUCT(uio)->uio_iov; 1762 bytes_wanted = iovp->iov_len; 1763 if (zfs_uio_segflg(uio) != UIO_SYSSPACE || zfs_uio_iovcnt(uio) != 1) { 1764 bufsize = bytes_wanted; 1765 outbuf = kmem_alloc(bufsize, KM_SLEEP); 1766 odp = (struct dirent64 *)outbuf; 1767 } else { 1768 bufsize = bytes_wanted; 1769 outbuf = NULL; 1770 odp = (struct dirent64 *)iovp->iov_base; 1771 } 1772 1773 if (ncookies != NULL) { 1774 /* 1775 * Minimum entry size is dirent size and 1 byte for a file name. 1776 */ 1777 ncooks = zfs_uio_resid(uio) / (sizeof (struct dirent) - 1778 sizeof (((struct dirent *)NULL)->d_name) + 1); 1779 cooks = malloc(ncooks * sizeof (*cooks), M_TEMP, M_WAITOK); 1780 *cookies = cooks; 1781 *ncookies = ncooks; 1782 } 1783 1784 /* 1785 * Transform to file-system independent format 1786 */ 1787 outcount = 0; 1788 while (outcount < bytes_wanted) { 1789 ino64_t objnum; 1790 ushort_t reclen; 1791 off64_t *next = NULL; 1792 1793 /* 1794 * Special case `.', `..', and `.zfs'. 1795 */ 1796 if (offset == 0) { 1797 (void) strcpy(zap.za_name, "."); 1798 zap.za_normalization_conflict = 0; 1799 objnum = zp->z_id; 1800 type = DT_DIR; 1801 } else if (offset == 1) { 1802 (void) strcpy(zap.za_name, ".."); 1803 zap.za_normalization_conflict = 0; 1804 objnum = parent; 1805 type = DT_DIR; 1806 } else if (offset == 2 && zfs_show_ctldir(zp)) { 1807 (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); 1808 zap.za_normalization_conflict = 0; 1809 objnum = ZFSCTL_INO_ROOT; 1810 type = DT_DIR; 1811 } else { 1812 /* 1813 * Grab next entry. 1814 */ 1815 if ((error = zap_cursor_retrieve(&zc, &zap))) { 1816 if ((*eofp = (error == ENOENT)) != 0) 1817 break; 1818 else 1819 goto update; 1820 } 1821 1822 if (zap.za_integer_length != 8 || 1823 zap.za_num_integers != 1) { 1824 cmn_err(CE_WARN, "zap_readdir: bad directory " 1825 "entry, obj = %lld, offset = %lld\n", 1826 (u_longlong_t)zp->z_id, 1827 (u_longlong_t)offset); 1828 error = SET_ERROR(ENXIO); 1829 goto update; 1830 } 1831 1832 objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); 1833 /* 1834 * MacOS X can extract the object type here such as: 1835 * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer); 1836 */ 1837 type = ZFS_DIRENT_TYPE(zap.za_first_integer); 1838 } 1839 1840 reclen = DIRENT64_RECLEN(strlen(zap.za_name)); 1841 1842 /* 1843 * Will this entry fit in the buffer? 1844 */ 1845 if (outcount + reclen > bufsize) { 1846 /* 1847 * Did we manage to fit anything in the buffer? 1848 */ 1849 if (!outcount) { 1850 error = SET_ERROR(EINVAL); 1851 goto update; 1852 } 1853 break; 1854 } 1855 /* 1856 * Add normal entry: 1857 */ 1858 odp->d_ino = objnum; 1859 odp->d_reclen = reclen; 1860 odp->d_namlen = strlen(zap.za_name); 1861 /* NOTE: d_off is the offset for the *next* entry. */ 1862 next = &odp->d_off; 1863 strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1); 1864 odp->d_type = type; 1865 dirent_terminate(odp); 1866 odp = (dirent64_t *)((intptr_t)odp + reclen); 1867 1868 outcount += reclen; 1869 1870 ASSERT3S(outcount, <=, bufsize); 1871 1872 if (prefetch) 1873 dmu_prefetch_dnode(os, objnum, ZIO_PRIORITY_SYNC_READ); 1874 1875 /* 1876 * Move to the next entry, fill in the previous offset. 1877 */ 1878 if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) { 1879 zap_cursor_advance(&zc); 1880 offset = zap_cursor_serialize(&zc); 1881 } else { 1882 offset += 1; 1883 } 1884 1885 /* Fill the offset right after advancing the cursor. */ 1886 if (next != NULL) 1887 *next = offset; 1888 if (cooks != NULL) { 1889 *cooks++ = offset; 1890 ncooks--; 1891 KASSERT(ncooks >= 0, ("ncookies=%d", ncooks)); 1892 } 1893 } 1894 zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */ 1895 1896 /* Subtract unused cookies */ 1897 if (ncookies != NULL) 1898 *ncookies -= ncooks; 1899 1900 if (zfs_uio_segflg(uio) == UIO_SYSSPACE && zfs_uio_iovcnt(uio) == 1) { 1901 iovp->iov_base += outcount; 1902 iovp->iov_len -= outcount; 1903 zfs_uio_resid(uio) -= outcount; 1904 } else if ((error = 1905 zfs_uiomove(outbuf, (long)outcount, UIO_READ, uio))) { 1906 /* 1907 * Reset the pointer. 1908 */ 1909 offset = zfs_uio_offset(uio); 1910 } 1911 1912 update: 1913 zap_cursor_fini(&zc); 1914 if (zfs_uio_segflg(uio) != UIO_SYSSPACE || zfs_uio_iovcnt(uio) != 1) 1915 kmem_free(outbuf, bufsize); 1916 1917 if (error == ENOENT) 1918 error = 0; 1919 1920 ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 1921 1922 zfs_uio_setoffset(uio, offset); 1923 zfs_exit(zfsvfs, FTAG); 1924 if (error != 0 && cookies != NULL) { 1925 free(*cookies, M_TEMP); 1926 *cookies = NULL; 1927 *ncookies = 0; 1928 } 1929 return (error); 1930 } 1931 1932 /* 1933 * Get the requested file attributes and place them in the provided 1934 * vattr structure. 1935 * 1936 * IN: vp - vnode of file. 1937 * vap - va_mask identifies requested attributes. 1938 * If AT_XVATTR set, then optional attrs are requested 1939 * flags - ATTR_NOACLCHECK (CIFS server context) 1940 * cr - credentials of caller. 1941 * 1942 * OUT: vap - attribute values. 1943 * 1944 * RETURN: 0 (always succeeds). 1945 */ 1946 static int 1947 zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr) 1948 { 1949 znode_t *zp = VTOZ(vp); 1950 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1951 int error = 0; 1952 uint32_t blksize; 1953 u_longlong_t nblocks; 1954 uint64_t mtime[2], ctime[2], crtime[2], rdev; 1955 xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 1956 xoptattr_t *xoap = NULL; 1957 boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 1958 sa_bulk_attr_t bulk[4]; 1959 int count = 0; 1960 1961 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0) 1962 return (error); 1963 1964 zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid); 1965 1966 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 1967 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 1968 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16); 1969 if (vp->v_type == VBLK || vp->v_type == VCHR) 1970 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL, 1971 &rdev, 8); 1972 1973 if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) { 1974 zfs_exit(zfsvfs, FTAG); 1975 return (error); 1976 } 1977 1978 /* 1979 * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES. 1980 * Also, if we are the owner don't bother, since owner should 1981 * always be allowed to read basic attributes of file. 1982 */ 1983 if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) && 1984 (vap->va_uid != crgetuid(cr))) { 1985 if ((error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0, 1986 skipaclchk, cr, NULL))) { 1987 zfs_exit(zfsvfs, FTAG); 1988 return (error); 1989 } 1990 } 1991 1992 /* 1993 * Return all attributes. It's cheaper to provide the answer 1994 * than to determine whether we were asked the question. 1995 */ 1996 1997 vap->va_type = IFTOVT(zp->z_mode); 1998 vap->va_mode = zp->z_mode & ~S_IFMT; 1999 vn_fsid(vp, vap); 2000 vap->va_nodeid = zp->z_id; 2001 vap->va_nlink = zp->z_links; 2002 if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp) && 2003 zp->z_links < ZFS_LINK_MAX) 2004 vap->va_nlink++; 2005 vap->va_size = zp->z_size; 2006 if (vp->v_type == VBLK || vp->v_type == VCHR) 2007 vap->va_rdev = zfs_cmpldev(rdev); 2008 vap->va_gen = zp->z_gen; 2009 vap->va_flags = 0; /* FreeBSD: Reset chflags(2) flags. */ 2010 vap->va_filerev = zp->z_seq; 2011 2012 /* 2013 * Add in any requested optional attributes and the create time. 2014 * Also set the corresponding bits in the returned attribute bitmap. 2015 */ 2016 if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) { 2017 if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 2018 xoap->xoa_archive = 2019 ((zp->z_pflags & ZFS_ARCHIVE) != 0); 2020 XVA_SET_RTN(xvap, XAT_ARCHIVE); 2021 } 2022 2023 if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 2024 xoap->xoa_readonly = 2025 ((zp->z_pflags & ZFS_READONLY) != 0); 2026 XVA_SET_RTN(xvap, XAT_READONLY); 2027 } 2028 2029 if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 2030 xoap->xoa_system = 2031 ((zp->z_pflags & ZFS_SYSTEM) != 0); 2032 XVA_SET_RTN(xvap, XAT_SYSTEM); 2033 } 2034 2035 if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 2036 xoap->xoa_hidden = 2037 ((zp->z_pflags & ZFS_HIDDEN) != 0); 2038 XVA_SET_RTN(xvap, XAT_HIDDEN); 2039 } 2040 2041 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2042 xoap->xoa_nounlink = 2043 ((zp->z_pflags & ZFS_NOUNLINK) != 0); 2044 XVA_SET_RTN(xvap, XAT_NOUNLINK); 2045 } 2046 2047 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2048 xoap->xoa_immutable = 2049 ((zp->z_pflags & ZFS_IMMUTABLE) != 0); 2050 XVA_SET_RTN(xvap, XAT_IMMUTABLE); 2051 } 2052 2053 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2054 xoap->xoa_appendonly = 2055 ((zp->z_pflags & ZFS_APPENDONLY) != 0); 2056 XVA_SET_RTN(xvap, XAT_APPENDONLY); 2057 } 2058 2059 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2060 xoap->xoa_nodump = 2061 ((zp->z_pflags & ZFS_NODUMP) != 0); 2062 XVA_SET_RTN(xvap, XAT_NODUMP); 2063 } 2064 2065 if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 2066 xoap->xoa_opaque = 2067 ((zp->z_pflags & ZFS_OPAQUE) != 0); 2068 XVA_SET_RTN(xvap, XAT_OPAQUE); 2069 } 2070 2071 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2072 xoap->xoa_av_quarantined = 2073 ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0); 2074 XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 2075 } 2076 2077 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2078 xoap->xoa_av_modified = 2079 ((zp->z_pflags & ZFS_AV_MODIFIED) != 0); 2080 XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 2081 } 2082 2083 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) && 2084 vp->v_type == VREG) { 2085 zfs_sa_get_scanstamp(zp, xvap); 2086 } 2087 2088 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 2089 xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0); 2090 XVA_SET_RTN(xvap, XAT_REPARSE); 2091 } 2092 if (XVA_ISSET_REQ(xvap, XAT_GEN)) { 2093 xoap->xoa_generation = zp->z_gen; 2094 XVA_SET_RTN(xvap, XAT_GEN); 2095 } 2096 2097 if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { 2098 xoap->xoa_offline = 2099 ((zp->z_pflags & ZFS_OFFLINE) != 0); 2100 XVA_SET_RTN(xvap, XAT_OFFLINE); 2101 } 2102 2103 if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { 2104 xoap->xoa_sparse = 2105 ((zp->z_pflags & ZFS_SPARSE) != 0); 2106 XVA_SET_RTN(xvap, XAT_SPARSE); 2107 } 2108 2109 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) { 2110 xoap->xoa_projinherit = 2111 ((zp->z_pflags & ZFS_PROJINHERIT) != 0); 2112 XVA_SET_RTN(xvap, XAT_PROJINHERIT); 2113 } 2114 2115 if (XVA_ISSET_REQ(xvap, XAT_PROJID)) { 2116 xoap->xoa_projid = zp->z_projid; 2117 XVA_SET_RTN(xvap, XAT_PROJID); 2118 } 2119 } 2120 2121 ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime); 2122 ZFS_TIME_DECODE(&vap->va_mtime, mtime); 2123 ZFS_TIME_DECODE(&vap->va_ctime, ctime); 2124 ZFS_TIME_DECODE(&vap->va_birthtime, crtime); 2125 2126 2127 sa_object_size(zp->z_sa_hdl, &blksize, &nblocks); 2128 vap->va_blksize = blksize; 2129 vap->va_bytes = nblocks << 9; /* nblocks * 512 */ 2130 2131 if (zp->z_blksz == 0) { 2132 /* 2133 * Block size hasn't been set; suggest maximal I/O transfers. 2134 */ 2135 vap->va_blksize = zfsvfs->z_max_blksz; 2136 } 2137 2138 zfs_exit(zfsvfs, FTAG); 2139 return (0); 2140 } 2141 2142 /* 2143 * Set the file attributes to the values contained in the 2144 * vattr structure. 2145 * 2146 * IN: zp - znode of file to be modified. 2147 * vap - new attribute values. 2148 * If AT_XVATTR set, then optional attrs are being set 2149 * flags - ATTR_UTIME set if non-default time values provided. 2150 * - ATTR_NOACLCHECK (CIFS context only). 2151 * cr - credentials of caller. 2152 * mnt_ns - Unused on FreeBSD 2153 * 2154 * RETURN: 0 on success, error code on failure. 2155 * 2156 * Timestamps: 2157 * vp - ctime updated, mtime updated if size changed. 2158 */ 2159 int 2160 zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr, zidmap_t *mnt_ns) 2161 { 2162 vnode_t *vp = ZTOV(zp); 2163 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2164 objset_t *os; 2165 zilog_t *zilog; 2166 dmu_tx_t *tx; 2167 vattr_t oldva; 2168 xvattr_t tmpxvattr; 2169 uint_t mask = vap->va_mask; 2170 uint_t saved_mask = 0; 2171 uint64_t saved_mode; 2172 int trim_mask = 0; 2173 uint64_t new_mode; 2174 uint64_t new_uid, new_gid; 2175 uint64_t xattr_obj; 2176 uint64_t mtime[2], ctime[2]; 2177 uint64_t projid = ZFS_INVALID_PROJID; 2178 znode_t *attrzp; 2179 int need_policy = FALSE; 2180 int err, err2; 2181 zfs_fuid_info_t *fuidp = NULL; 2182 xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2183 xoptattr_t *xoap; 2184 zfs_acl_t *aclp; 2185 boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2186 boolean_t fuid_dirtied = B_FALSE; 2187 sa_bulk_attr_t bulk[7], xattr_bulk[7]; 2188 int count = 0, xattr_count = 0; 2189 2190 if (mask == 0) 2191 return (0); 2192 2193 if (mask & AT_NOSET) 2194 return (SET_ERROR(EINVAL)); 2195 2196 if ((err = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0) 2197 return (err); 2198 2199 os = zfsvfs->z_os; 2200 zilog = zfsvfs->z_log; 2201 2202 /* 2203 * Make sure that if we have ephemeral uid/gid or xvattr specified 2204 * that file system is at proper version level 2205 */ 2206 2207 if (zfsvfs->z_use_fuids == B_FALSE && 2208 (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) || 2209 ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) || 2210 (mask & AT_XVATTR))) { 2211 zfs_exit(zfsvfs, FTAG); 2212 return (SET_ERROR(EINVAL)); 2213 } 2214 2215 if (mask & AT_SIZE && vp->v_type == VDIR) { 2216 zfs_exit(zfsvfs, FTAG); 2217 return (SET_ERROR(EISDIR)); 2218 } 2219 2220 if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) { 2221 zfs_exit(zfsvfs, FTAG); 2222 return (SET_ERROR(EINVAL)); 2223 } 2224 2225 /* 2226 * If this is an xvattr_t, then get a pointer to the structure of 2227 * optional attributes. If this is NULL, then we have a vattr_t. 2228 */ 2229 xoap = xva_getxoptattr(xvap); 2230 2231 xva_init(&tmpxvattr); 2232 2233 /* 2234 * Immutable files can only alter immutable bit and atime 2235 */ 2236 if ((zp->z_pflags & ZFS_IMMUTABLE) && 2237 ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) || 2238 ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) { 2239 zfs_exit(zfsvfs, FTAG); 2240 return (SET_ERROR(EPERM)); 2241 } 2242 2243 /* 2244 * Note: ZFS_READONLY is handled in zfs_zaccess_common. 2245 */ 2246 2247 /* 2248 * Verify timestamps doesn't overflow 32 bits. 2249 * ZFS can handle large timestamps, but 32bit syscalls can't 2250 * handle times greater than 2039. This check should be removed 2251 * once large timestamps are fully supported. 2252 */ 2253 if (mask & (AT_ATIME | AT_MTIME)) { 2254 if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) || 2255 ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) { 2256 zfs_exit(zfsvfs, FTAG); 2257 return (SET_ERROR(EOVERFLOW)); 2258 } 2259 } 2260 if (xoap != NULL && (mask & AT_XVATTR)) { 2261 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME) && 2262 TIMESPEC_OVERFLOW(&vap->va_birthtime)) { 2263 zfs_exit(zfsvfs, FTAG); 2264 return (SET_ERROR(EOVERFLOW)); 2265 } 2266 2267 if (XVA_ISSET_REQ(xvap, XAT_PROJID)) { 2268 if (!dmu_objset_projectquota_enabled(os) || 2269 (!S_ISREG(zp->z_mode) && !S_ISDIR(zp->z_mode))) { 2270 zfs_exit(zfsvfs, FTAG); 2271 return (SET_ERROR(EOPNOTSUPP)); 2272 } 2273 2274 projid = xoap->xoa_projid; 2275 if (unlikely(projid == ZFS_INVALID_PROJID)) { 2276 zfs_exit(zfsvfs, FTAG); 2277 return (SET_ERROR(EINVAL)); 2278 } 2279 2280 if (projid == zp->z_projid && zp->z_pflags & ZFS_PROJID) 2281 projid = ZFS_INVALID_PROJID; 2282 else 2283 need_policy = TRUE; 2284 } 2285 2286 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT) && 2287 (xoap->xoa_projinherit != 2288 ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) && 2289 (!dmu_objset_projectquota_enabled(os) || 2290 (!S_ISREG(zp->z_mode) && !S_ISDIR(zp->z_mode)))) { 2291 zfs_exit(zfsvfs, FTAG); 2292 return (SET_ERROR(EOPNOTSUPP)); 2293 } 2294 } 2295 2296 attrzp = NULL; 2297 aclp = NULL; 2298 2299 if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 2300 zfs_exit(zfsvfs, FTAG); 2301 return (SET_ERROR(EROFS)); 2302 } 2303 2304 /* 2305 * First validate permissions 2306 */ 2307 2308 if (mask & AT_SIZE) { 2309 /* 2310 * XXX - Note, we are not providing any open 2311 * mode flags here (like FNDELAY), so we may 2312 * block if there are locks present... this 2313 * should be addressed in openat(). 2314 */ 2315 /* XXX - would it be OK to generate a log record here? */ 2316 err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); 2317 if (err) { 2318 zfs_exit(zfsvfs, FTAG); 2319 return (err); 2320 } 2321 } 2322 2323 if (mask & (AT_ATIME|AT_MTIME) || 2324 ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) || 2325 XVA_ISSET_REQ(xvap, XAT_READONLY) || 2326 XVA_ISSET_REQ(xvap, XAT_ARCHIVE) || 2327 XVA_ISSET_REQ(xvap, XAT_OFFLINE) || 2328 XVA_ISSET_REQ(xvap, XAT_SPARSE) || 2329 XVA_ISSET_REQ(xvap, XAT_CREATETIME) || 2330 XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) { 2331 need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0, 2332 skipaclchk, cr, mnt_ns); 2333 } 2334 2335 if (mask & (AT_UID|AT_GID)) { 2336 int idmask = (mask & (AT_UID|AT_GID)); 2337 int take_owner; 2338 int take_group; 2339 2340 /* 2341 * NOTE: even if a new mode is being set, 2342 * we may clear S_ISUID/S_ISGID bits. 2343 */ 2344 2345 if (!(mask & AT_MODE)) 2346 vap->va_mode = zp->z_mode; 2347 2348 /* 2349 * Take ownership or chgrp to group we are a member of 2350 */ 2351 2352 take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr)); 2353 take_group = (mask & AT_GID) && 2354 zfs_groupmember(zfsvfs, vap->va_gid, cr); 2355 2356 /* 2357 * If both AT_UID and AT_GID are set then take_owner and 2358 * take_group must both be set in order to allow taking 2359 * ownership. 2360 * 2361 * Otherwise, send the check through secpolicy_vnode_setattr() 2362 * 2363 */ 2364 2365 if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) || 2366 ((idmask == AT_UID) && take_owner) || 2367 ((idmask == AT_GID) && take_group)) { 2368 if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0, 2369 skipaclchk, cr, mnt_ns) == 0) { 2370 /* 2371 * Remove setuid/setgid for non-privileged users 2372 */ 2373 secpolicy_setid_clear(vap, vp, cr); 2374 trim_mask = (mask & (AT_UID|AT_GID)); 2375 } else { 2376 need_policy = TRUE; 2377 } 2378 } else { 2379 need_policy = TRUE; 2380 } 2381 } 2382 2383 oldva.va_mode = zp->z_mode; 2384 zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid); 2385 if (mask & AT_XVATTR) { 2386 /* 2387 * Update xvattr mask to include only those attributes 2388 * that are actually changing. 2389 * 2390 * the bits will be restored prior to actually setting 2391 * the attributes so the caller thinks they were set. 2392 */ 2393 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2394 if (xoap->xoa_appendonly != 2395 ((zp->z_pflags & ZFS_APPENDONLY) != 0)) { 2396 need_policy = TRUE; 2397 } else { 2398 XVA_CLR_REQ(xvap, XAT_APPENDONLY); 2399 XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY); 2400 } 2401 } 2402 2403 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) { 2404 if (xoap->xoa_projinherit != 2405 ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) { 2406 need_policy = TRUE; 2407 } else { 2408 XVA_CLR_REQ(xvap, XAT_PROJINHERIT); 2409 XVA_SET_REQ(&tmpxvattr, XAT_PROJINHERIT); 2410 } 2411 } 2412 2413 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2414 if (xoap->xoa_nounlink != 2415 ((zp->z_pflags & ZFS_NOUNLINK) != 0)) { 2416 need_policy = TRUE; 2417 } else { 2418 XVA_CLR_REQ(xvap, XAT_NOUNLINK); 2419 XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK); 2420 } 2421 } 2422 2423 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2424 if (xoap->xoa_immutable != 2425 ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) { 2426 need_policy = TRUE; 2427 } else { 2428 XVA_CLR_REQ(xvap, XAT_IMMUTABLE); 2429 XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE); 2430 } 2431 } 2432 2433 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2434 if (xoap->xoa_nodump != 2435 ((zp->z_pflags & ZFS_NODUMP) != 0)) { 2436 need_policy = TRUE; 2437 } else { 2438 XVA_CLR_REQ(xvap, XAT_NODUMP); 2439 XVA_SET_REQ(&tmpxvattr, XAT_NODUMP); 2440 } 2441 } 2442 2443 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2444 if (xoap->xoa_av_modified != 2445 ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) { 2446 need_policy = TRUE; 2447 } else { 2448 XVA_CLR_REQ(xvap, XAT_AV_MODIFIED); 2449 XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED); 2450 } 2451 } 2452 2453 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2454 if ((vp->v_type != VREG && 2455 xoap->xoa_av_quarantined) || 2456 xoap->xoa_av_quarantined != 2457 ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) { 2458 need_policy = TRUE; 2459 } else { 2460 XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED); 2461 XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED); 2462 } 2463 } 2464 2465 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 2466 zfs_exit(zfsvfs, FTAG); 2467 return (SET_ERROR(EPERM)); 2468 } 2469 2470 if (need_policy == FALSE && 2471 (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) || 2472 XVA_ISSET_REQ(xvap, XAT_OPAQUE))) { 2473 need_policy = TRUE; 2474 } 2475 } 2476 2477 if (mask & AT_MODE) { 2478 if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr, 2479 mnt_ns) == 0) { 2480 err = secpolicy_setid_setsticky_clear(vp, vap, 2481 &oldva, cr); 2482 if (err) { 2483 zfs_exit(zfsvfs, FTAG); 2484 return (err); 2485 } 2486 trim_mask |= AT_MODE; 2487 } else { 2488 need_policy = TRUE; 2489 } 2490 } 2491 2492 if (need_policy) { 2493 /* 2494 * If trim_mask is set then take ownership 2495 * has been granted or write_acl is present and user 2496 * has the ability to modify mode. In that case remove 2497 * UID|GID and or MODE from mask so that 2498 * secpolicy_vnode_setattr() doesn't revoke it. 2499 */ 2500 2501 if (trim_mask) { 2502 saved_mask = vap->va_mask; 2503 vap->va_mask &= ~trim_mask; 2504 if (trim_mask & AT_MODE) { 2505 /* 2506 * Save the mode, as secpolicy_vnode_setattr() 2507 * will overwrite it with ova.va_mode. 2508 */ 2509 saved_mode = vap->va_mode; 2510 } 2511 } 2512 err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags, 2513 (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp); 2514 if (err) { 2515 zfs_exit(zfsvfs, FTAG); 2516 return (err); 2517 } 2518 2519 if (trim_mask) { 2520 vap->va_mask |= saved_mask; 2521 if (trim_mask & AT_MODE) { 2522 /* 2523 * Recover the mode after 2524 * secpolicy_vnode_setattr(). 2525 */ 2526 vap->va_mode = saved_mode; 2527 } 2528 } 2529 } 2530 2531 /* 2532 * secpolicy_vnode_setattr, or take ownership may have 2533 * changed va_mask 2534 */ 2535 mask = vap->va_mask; 2536 2537 if ((mask & (AT_UID | AT_GID)) || projid != ZFS_INVALID_PROJID) { 2538 err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 2539 &xattr_obj, sizeof (xattr_obj)); 2540 2541 if (err == 0 && xattr_obj) { 2542 err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp); 2543 if (err == 0) { 2544 err = vn_lock(ZTOV(attrzp), LK_EXCLUSIVE); 2545 if (err != 0) 2546 vrele(ZTOV(attrzp)); 2547 } 2548 if (err) 2549 goto out2; 2550 } 2551 if (mask & AT_UID) { 2552 new_uid = zfs_fuid_create(zfsvfs, 2553 (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp); 2554 if (new_uid != zp->z_uid && 2555 zfs_id_overquota(zfsvfs, DMU_USERUSED_OBJECT, 2556 new_uid)) { 2557 if (attrzp) 2558 vput(ZTOV(attrzp)); 2559 err = SET_ERROR(EDQUOT); 2560 goto out2; 2561 } 2562 } 2563 2564 if (mask & AT_GID) { 2565 new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid, 2566 cr, ZFS_GROUP, &fuidp); 2567 if (new_gid != zp->z_gid && 2568 zfs_id_overquota(zfsvfs, DMU_GROUPUSED_OBJECT, 2569 new_gid)) { 2570 if (attrzp) 2571 vput(ZTOV(attrzp)); 2572 err = SET_ERROR(EDQUOT); 2573 goto out2; 2574 } 2575 } 2576 2577 if (projid != ZFS_INVALID_PROJID && 2578 zfs_id_overquota(zfsvfs, DMU_PROJECTUSED_OBJECT, projid)) { 2579 if (attrzp) 2580 vput(ZTOV(attrzp)); 2581 err = SET_ERROR(EDQUOT); 2582 goto out2; 2583 } 2584 } 2585 tx = dmu_tx_create(os); 2586 2587 if (mask & AT_MODE) { 2588 uint64_t pmode = zp->z_mode; 2589 uint64_t acl_obj; 2590 new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT); 2591 2592 if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED && 2593 !(zp->z_pflags & ZFS_ACL_TRIVIAL)) { 2594 err = SET_ERROR(EPERM); 2595 goto out; 2596 } 2597 2598 if ((err = zfs_acl_chmod_setattr(zp, &aclp, new_mode))) 2599 goto out; 2600 2601 if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) { 2602 /* 2603 * Are we upgrading ACL from old V0 format 2604 * to V1 format? 2605 */ 2606 if (zfsvfs->z_version >= ZPL_VERSION_FUID && 2607 zfs_znode_acl_version(zp) == 2608 ZFS_ACL_VERSION_INITIAL) { 2609 dmu_tx_hold_free(tx, acl_obj, 0, 2610 DMU_OBJECT_END); 2611 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 2612 0, aclp->z_acl_bytes); 2613 } else { 2614 dmu_tx_hold_write(tx, acl_obj, 0, 2615 aclp->z_acl_bytes); 2616 } 2617 } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) { 2618 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 2619 0, aclp->z_acl_bytes); 2620 } 2621 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 2622 } else { 2623 if (((mask & AT_XVATTR) && 2624 XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) || 2625 (projid != ZFS_INVALID_PROJID && 2626 !(zp->z_pflags & ZFS_PROJID))) 2627 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 2628 else 2629 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 2630 } 2631 2632 if (attrzp) { 2633 dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE); 2634 } 2635 2636 fuid_dirtied = zfsvfs->z_fuid_dirty; 2637 if (fuid_dirtied) 2638 zfs_fuid_txhold(zfsvfs, tx); 2639 2640 zfs_sa_upgrade_txholds(tx, zp); 2641 2642 err = dmu_tx_assign(tx, TXG_WAIT); 2643 if (err) 2644 goto out; 2645 2646 count = 0; 2647 /* 2648 * Set each attribute requested. 2649 * We group settings according to the locks they need to acquire. 2650 * 2651 * Note: you cannot set ctime directly, although it will be 2652 * updated as a side-effect of calling this function. 2653 */ 2654 2655 if (projid != ZFS_INVALID_PROJID && !(zp->z_pflags & ZFS_PROJID)) { 2656 /* 2657 * For the existed object that is upgraded from old system, 2658 * its on-disk layout has no slot for the project ID attribute. 2659 * But quota accounting logic needs to access related slots by 2660 * offset directly. So we need to adjust old objects' layout 2661 * to make the project ID to some unified and fixed offset. 2662 */ 2663 if (attrzp) 2664 err = sa_add_projid(attrzp->z_sa_hdl, tx, projid); 2665 if (err == 0) 2666 err = sa_add_projid(zp->z_sa_hdl, tx, projid); 2667 2668 if (unlikely(err == EEXIST)) 2669 err = 0; 2670 else if (err != 0) 2671 goto out; 2672 else 2673 projid = ZFS_INVALID_PROJID; 2674 } 2675 2676 if (mask & (AT_UID|AT_GID|AT_MODE)) 2677 mutex_enter(&zp->z_acl_lock); 2678 2679 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 2680 &zp->z_pflags, sizeof (zp->z_pflags)); 2681 2682 if (attrzp) { 2683 if (mask & (AT_UID|AT_GID|AT_MODE)) 2684 mutex_enter(&attrzp->z_acl_lock); 2685 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 2686 SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags, 2687 sizeof (attrzp->z_pflags)); 2688 if (projid != ZFS_INVALID_PROJID) { 2689 attrzp->z_projid = projid; 2690 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 2691 SA_ZPL_PROJID(zfsvfs), NULL, &attrzp->z_projid, 2692 sizeof (attrzp->z_projid)); 2693 } 2694 } 2695 2696 if (mask & (AT_UID|AT_GID)) { 2697 2698 if (mask & AT_UID) { 2699 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, 2700 &new_uid, sizeof (new_uid)); 2701 zp->z_uid = new_uid; 2702 if (attrzp) { 2703 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 2704 SA_ZPL_UID(zfsvfs), NULL, &new_uid, 2705 sizeof (new_uid)); 2706 attrzp->z_uid = new_uid; 2707 } 2708 } 2709 2710 if (mask & AT_GID) { 2711 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), 2712 NULL, &new_gid, sizeof (new_gid)); 2713 zp->z_gid = new_gid; 2714 if (attrzp) { 2715 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 2716 SA_ZPL_GID(zfsvfs), NULL, &new_gid, 2717 sizeof (new_gid)); 2718 attrzp->z_gid = new_gid; 2719 } 2720 } 2721 if (!(mask & AT_MODE)) { 2722 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), 2723 NULL, &new_mode, sizeof (new_mode)); 2724 new_mode = zp->z_mode; 2725 } 2726 err = zfs_acl_chown_setattr(zp); 2727 ASSERT0(err); 2728 if (attrzp) { 2729 vn_seqc_write_begin(ZTOV(attrzp)); 2730 err = zfs_acl_chown_setattr(attrzp); 2731 vn_seqc_write_end(ZTOV(attrzp)); 2732 ASSERT0(err); 2733 } 2734 } 2735 2736 if (mask & AT_MODE) { 2737 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, 2738 &new_mode, sizeof (new_mode)); 2739 zp->z_mode = new_mode; 2740 ASSERT3P(aclp, !=, NULL); 2741 err = zfs_aclset_common(zp, aclp, cr, tx); 2742 ASSERT0(err); 2743 if (zp->z_acl_cached) 2744 zfs_acl_free(zp->z_acl_cached); 2745 zp->z_acl_cached = aclp; 2746 aclp = NULL; 2747 } 2748 2749 2750 if (mask & AT_ATIME) { 2751 ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime); 2752 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, 2753 &zp->z_atime, sizeof (zp->z_atime)); 2754 } 2755 2756 if (mask & AT_MTIME) { 2757 ZFS_TIME_ENCODE(&vap->va_mtime, mtime); 2758 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 2759 mtime, sizeof (mtime)); 2760 } 2761 2762 if (projid != ZFS_INVALID_PROJID) { 2763 zp->z_projid = projid; 2764 SA_ADD_BULK_ATTR(bulk, count, 2765 SA_ZPL_PROJID(zfsvfs), NULL, &zp->z_projid, 2766 sizeof (zp->z_projid)); 2767 } 2768 2769 /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */ 2770 if (mask & AT_SIZE && !(mask & AT_MTIME)) { 2771 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), 2772 NULL, mtime, sizeof (mtime)); 2773 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 2774 &ctime, sizeof (ctime)); 2775 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime); 2776 } else if (mask != 0) { 2777 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 2778 &ctime, sizeof (ctime)); 2779 zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime); 2780 if (attrzp) { 2781 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 2782 SA_ZPL_CTIME(zfsvfs), NULL, 2783 &ctime, sizeof (ctime)); 2784 zfs_tstamp_update_setup(attrzp, STATE_CHANGED, 2785 mtime, ctime); 2786 } 2787 } 2788 2789 /* 2790 * Do this after setting timestamps to prevent timestamp 2791 * update from toggling bit 2792 */ 2793 2794 if (xoap && (mask & AT_XVATTR)) { 2795 2796 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) 2797 xoap->xoa_createtime = vap->va_birthtime; 2798 /* 2799 * restore trimmed off masks 2800 * so that return masks can be set for caller. 2801 */ 2802 2803 if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) { 2804 XVA_SET_REQ(xvap, XAT_APPENDONLY); 2805 } 2806 if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) { 2807 XVA_SET_REQ(xvap, XAT_NOUNLINK); 2808 } 2809 if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) { 2810 XVA_SET_REQ(xvap, XAT_IMMUTABLE); 2811 } 2812 if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) { 2813 XVA_SET_REQ(xvap, XAT_NODUMP); 2814 } 2815 if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) { 2816 XVA_SET_REQ(xvap, XAT_AV_MODIFIED); 2817 } 2818 if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) { 2819 XVA_SET_REQ(xvap, XAT_AV_QUARANTINED); 2820 } 2821 if (XVA_ISSET_REQ(&tmpxvattr, XAT_PROJINHERIT)) { 2822 XVA_SET_REQ(xvap, XAT_PROJINHERIT); 2823 } 2824 2825 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 2826 ASSERT3S(vp->v_type, ==, VREG); 2827 2828 zfs_xvattr_set(zp, xvap, tx); 2829 } 2830 2831 if (fuid_dirtied) 2832 zfs_fuid_sync(zfsvfs, tx); 2833 2834 if (mask != 0) 2835 zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp); 2836 2837 if (mask & (AT_UID|AT_GID|AT_MODE)) 2838 mutex_exit(&zp->z_acl_lock); 2839 2840 if (attrzp) { 2841 if (mask & (AT_UID|AT_GID|AT_MODE)) 2842 mutex_exit(&attrzp->z_acl_lock); 2843 } 2844 out: 2845 if (err == 0 && attrzp) { 2846 err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk, 2847 xattr_count, tx); 2848 ASSERT0(err2); 2849 } 2850 2851 if (attrzp) 2852 vput(ZTOV(attrzp)); 2853 2854 if (aclp) 2855 zfs_acl_free(aclp); 2856 2857 if (fuidp) { 2858 zfs_fuid_info_free(fuidp); 2859 fuidp = NULL; 2860 } 2861 2862 if (err) { 2863 dmu_tx_abort(tx); 2864 } else { 2865 err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 2866 dmu_tx_commit(tx); 2867 } 2868 2869 out2: 2870 if (os->os_sync == ZFS_SYNC_ALWAYS) 2871 zil_commit(zilog, 0); 2872 2873 zfs_exit(zfsvfs, FTAG); 2874 return (err); 2875 } 2876 2877 /* 2878 * Look up the directory entries corresponding to the source and target 2879 * directory/name pairs. 2880 */ 2881 static int 2882 zfs_rename_relock_lookup(znode_t *sdzp, const struct componentname *scnp, 2883 znode_t **szpp, znode_t *tdzp, const struct componentname *tcnp, 2884 znode_t **tzpp) 2885 { 2886 zfsvfs_t *zfsvfs; 2887 znode_t *szp, *tzp; 2888 int error; 2889 2890 /* 2891 * Before using sdzp and tdzp we must ensure that they are live. 2892 * As a porting legacy from illumos we have two things to worry 2893 * about. One is typical for FreeBSD and it is that the vnode is 2894 * not reclaimed (doomed). The other is that the znode is live. 2895 * The current code can invalidate the znode without acquiring the 2896 * corresponding vnode lock if the object represented by the znode 2897 * and vnode is no longer valid after a rollback or receive operation. 2898 * z_teardown_lock hidden behind zfs_enter and zfs_exit is the lock 2899 * that protects the znodes from the invalidation. 2900 */ 2901 zfsvfs = sdzp->z_zfsvfs; 2902 ASSERT3P(zfsvfs, ==, tdzp->z_zfsvfs); 2903 if ((error = zfs_enter_verify_zp(zfsvfs, sdzp, FTAG)) != 0) 2904 return (error); 2905 if ((error = zfs_verify_zp(tdzp)) != 0) { 2906 zfs_exit(zfsvfs, FTAG); 2907 return (error); 2908 } 2909 2910 /* 2911 * Re-resolve svp to be certain it still exists and fetch the 2912 * correct vnode. 2913 */ 2914 error = zfs_dirent_lookup(sdzp, scnp->cn_nameptr, &szp, ZEXISTS); 2915 if (error != 0) { 2916 /* Source entry invalid or not there. */ 2917 if ((scnp->cn_flags & ISDOTDOT) != 0 || 2918 (scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.')) 2919 error = SET_ERROR(EINVAL); 2920 goto out; 2921 } 2922 *szpp = szp; 2923 2924 /* 2925 * Re-resolve tvp, if it disappeared we just carry on. 2926 */ 2927 error = zfs_dirent_lookup(tdzp, tcnp->cn_nameptr, &tzp, 0); 2928 if (error != 0) { 2929 vrele(ZTOV(szp)); 2930 if ((tcnp->cn_flags & ISDOTDOT) != 0) 2931 error = SET_ERROR(EINVAL); 2932 goto out; 2933 } 2934 *tzpp = tzp; 2935 out: 2936 zfs_exit(zfsvfs, FTAG); 2937 return (error); 2938 } 2939 2940 /* 2941 * We acquire all but fdvp locks using non-blocking acquisitions. If we 2942 * fail to acquire any lock in the path we will drop all held locks, 2943 * acquire the new lock in a blocking fashion, and then release it and 2944 * restart the rename. This acquire/release step ensures that we do not 2945 * spin on a lock waiting for release. On error release all vnode locks 2946 * and decrement references the way tmpfs_rename() would do. 2947 */ 2948 static int 2949 zfs_rename_relock(struct vnode *sdvp, struct vnode **svpp, 2950 struct vnode *tdvp, struct vnode **tvpp, 2951 const struct componentname *scnp, const struct componentname *tcnp) 2952 { 2953 struct vnode *nvp, *svp, *tvp; 2954 znode_t *sdzp, *tdzp, *szp, *tzp; 2955 int error; 2956 2957 VOP_UNLOCK1(tdvp); 2958 if (*tvpp != NULL && *tvpp != tdvp) 2959 VOP_UNLOCK1(*tvpp); 2960 2961 relock: 2962 error = vn_lock(sdvp, LK_EXCLUSIVE); 2963 if (error) 2964 goto out; 2965 error = vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT); 2966 if (error != 0) { 2967 VOP_UNLOCK1(sdvp); 2968 if (error != EBUSY) 2969 goto out; 2970 error = vn_lock(tdvp, LK_EXCLUSIVE); 2971 if (error) 2972 goto out; 2973 VOP_UNLOCK1(tdvp); 2974 goto relock; 2975 } 2976 tdzp = VTOZ(tdvp); 2977 sdzp = VTOZ(sdvp); 2978 2979 error = zfs_rename_relock_lookup(sdzp, scnp, &szp, tdzp, tcnp, &tzp); 2980 if (error != 0) { 2981 VOP_UNLOCK1(sdvp); 2982 VOP_UNLOCK1(tdvp); 2983 goto out; 2984 } 2985 svp = ZTOV(szp); 2986 tvp = tzp != NULL ? ZTOV(tzp) : NULL; 2987 2988 /* 2989 * Now try acquire locks on svp and tvp. 2990 */ 2991 nvp = svp; 2992 error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT); 2993 if (error != 0) { 2994 VOP_UNLOCK1(sdvp); 2995 VOP_UNLOCK1(tdvp); 2996 if (tvp != NULL) 2997 vrele(tvp); 2998 if (error != EBUSY) { 2999 vrele(nvp); 3000 goto out; 3001 } 3002 error = vn_lock(nvp, LK_EXCLUSIVE); 3003 if (error != 0) { 3004 vrele(nvp); 3005 goto out; 3006 } 3007 VOP_UNLOCK1(nvp); 3008 /* 3009 * Concurrent rename race. 3010 * XXX ? 3011 */ 3012 if (nvp == tdvp) { 3013 vrele(nvp); 3014 error = SET_ERROR(EINVAL); 3015 goto out; 3016 } 3017 vrele(*svpp); 3018 *svpp = nvp; 3019 goto relock; 3020 } 3021 vrele(*svpp); 3022 *svpp = nvp; 3023 3024 if (*tvpp != NULL) 3025 vrele(*tvpp); 3026 *tvpp = NULL; 3027 if (tvp != NULL) { 3028 nvp = tvp; 3029 error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT); 3030 if (error != 0) { 3031 VOP_UNLOCK1(sdvp); 3032 VOP_UNLOCK1(tdvp); 3033 VOP_UNLOCK1(*svpp); 3034 if (error != EBUSY) { 3035 vrele(nvp); 3036 goto out; 3037 } 3038 error = vn_lock(nvp, LK_EXCLUSIVE); 3039 if (error != 0) { 3040 vrele(nvp); 3041 goto out; 3042 } 3043 vput(nvp); 3044 goto relock; 3045 } 3046 *tvpp = nvp; 3047 } 3048 3049 return (0); 3050 3051 out: 3052 return (error); 3053 } 3054 3055 /* 3056 * Note that we must use VRELE_ASYNC in this function as it walks 3057 * up the directory tree and vrele may need to acquire an exclusive 3058 * lock if a last reference to a vnode is dropped. 3059 */ 3060 static int 3061 zfs_rename_check(znode_t *szp, znode_t *sdzp, znode_t *tdzp) 3062 { 3063 zfsvfs_t *zfsvfs; 3064 znode_t *zp, *zp1; 3065 uint64_t parent; 3066 int error; 3067 3068 zfsvfs = tdzp->z_zfsvfs; 3069 if (tdzp == szp) 3070 return (SET_ERROR(EINVAL)); 3071 if (tdzp == sdzp) 3072 return (0); 3073 if (tdzp->z_id == zfsvfs->z_root) 3074 return (0); 3075 zp = tdzp; 3076 for (;;) { 3077 ASSERT(!zp->z_unlinked); 3078 if ((error = sa_lookup(zp->z_sa_hdl, 3079 SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0) 3080 break; 3081 3082 if (parent == szp->z_id) { 3083 error = SET_ERROR(EINVAL); 3084 break; 3085 } 3086 if (parent == zfsvfs->z_root) 3087 break; 3088 if (parent == sdzp->z_id) 3089 break; 3090 3091 error = zfs_zget(zfsvfs, parent, &zp1); 3092 if (error != 0) 3093 break; 3094 3095 if (zp != tdzp) 3096 VN_RELE_ASYNC(ZTOV(zp), 3097 dsl_pool_zrele_taskq( 3098 dmu_objset_pool(zfsvfs->z_os))); 3099 zp = zp1; 3100 } 3101 3102 if (error == ENOTDIR) 3103 panic("checkpath: .. not a directory\n"); 3104 if (zp != tdzp) 3105 VN_RELE_ASYNC(ZTOV(zp), 3106 dsl_pool_zrele_taskq(dmu_objset_pool(zfsvfs->z_os))); 3107 return (error); 3108 } 3109 3110 #if __FreeBSD_version < 1300124 3111 static void 3112 cache_vop_rename(struct vnode *fdvp, struct vnode *fvp, struct vnode *tdvp, 3113 struct vnode *tvp, struct componentname *fcnp, struct componentname *tcnp) 3114 { 3115 3116 cache_purge(fvp); 3117 if (tvp != NULL) 3118 cache_purge(tvp); 3119 cache_purge_negative(tdvp); 3120 } 3121 #endif 3122 3123 static int 3124 zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp, 3125 vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp, 3126 cred_t *cr); 3127 3128 /* 3129 * Move an entry from the provided source directory to the target 3130 * directory. Change the entry name as indicated. 3131 * 3132 * IN: sdvp - Source directory containing the "old entry". 3133 * scnp - Old entry name. 3134 * tdvp - Target directory to contain the "new entry". 3135 * tcnp - New entry name. 3136 * cr - credentials of caller. 3137 * INOUT: svpp - Source file 3138 * tvpp - Target file, may point to NULL initially 3139 * 3140 * RETURN: 0 on success, error code on failure. 3141 * 3142 * Timestamps: 3143 * sdvp,tdvp - ctime|mtime updated 3144 */ 3145 static int 3146 zfs_do_rename(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp, 3147 vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp, 3148 cred_t *cr) 3149 { 3150 int error; 3151 3152 ASSERT_VOP_ELOCKED(tdvp, __func__); 3153 if (*tvpp != NULL) 3154 ASSERT_VOP_ELOCKED(*tvpp, __func__); 3155 3156 /* Reject renames across filesystems. */ 3157 if ((*svpp)->v_mount != tdvp->v_mount || 3158 ((*tvpp) != NULL && (*svpp)->v_mount != (*tvpp)->v_mount)) { 3159 error = SET_ERROR(EXDEV); 3160 goto out; 3161 } 3162 3163 if (zfsctl_is_node(tdvp)) { 3164 error = SET_ERROR(EXDEV); 3165 goto out; 3166 } 3167 3168 /* 3169 * Lock all four vnodes to ensure safety and semantics of renaming. 3170 */ 3171 error = zfs_rename_relock(sdvp, svpp, tdvp, tvpp, scnp, tcnp); 3172 if (error != 0) { 3173 /* no vnodes are locked in the case of error here */ 3174 return (error); 3175 } 3176 3177 error = zfs_do_rename_impl(sdvp, svpp, scnp, tdvp, tvpp, tcnp, cr); 3178 VOP_UNLOCK1(sdvp); 3179 VOP_UNLOCK1(*svpp); 3180 out: 3181 if (*tvpp != NULL) 3182 VOP_UNLOCK1(*tvpp); 3183 if (tdvp != *tvpp) 3184 VOP_UNLOCK1(tdvp); 3185 3186 return (error); 3187 } 3188 3189 static int 3190 zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp, 3191 vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp, 3192 cred_t *cr) 3193 { 3194 dmu_tx_t *tx; 3195 zfsvfs_t *zfsvfs; 3196 zilog_t *zilog; 3197 znode_t *tdzp, *sdzp, *tzp, *szp; 3198 const char *snm = scnp->cn_nameptr; 3199 const char *tnm = tcnp->cn_nameptr; 3200 int error; 3201 3202 tdzp = VTOZ(tdvp); 3203 sdzp = VTOZ(sdvp); 3204 zfsvfs = tdzp->z_zfsvfs; 3205 3206 if ((error = zfs_enter_verify_zp(zfsvfs, tdzp, FTAG)) != 0) 3207 return (error); 3208 if ((error = zfs_verify_zp(sdzp)) != 0) { 3209 zfs_exit(zfsvfs, FTAG); 3210 return (error); 3211 } 3212 zilog = zfsvfs->z_log; 3213 3214 if (zfsvfs->z_utf8 && u8_validate(tnm, 3215 strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3216 error = SET_ERROR(EILSEQ); 3217 goto out; 3218 } 3219 3220 /* If source and target are the same file, there is nothing to do. */ 3221 if ((*svpp) == (*tvpp)) { 3222 error = 0; 3223 goto out; 3224 } 3225 3226 if (((*svpp)->v_type == VDIR && (*svpp)->v_mountedhere != NULL) || 3227 ((*tvpp) != NULL && (*tvpp)->v_type == VDIR && 3228 (*tvpp)->v_mountedhere != NULL)) { 3229 error = SET_ERROR(EXDEV); 3230 goto out; 3231 } 3232 3233 szp = VTOZ(*svpp); 3234 if ((error = zfs_verify_zp(szp)) != 0) { 3235 zfs_exit(zfsvfs, FTAG); 3236 return (error); 3237 } 3238 tzp = *tvpp == NULL ? NULL : VTOZ(*tvpp); 3239 if (tzp != NULL) { 3240 if ((error = zfs_verify_zp(tzp)) != 0) { 3241 zfs_exit(zfsvfs, FTAG); 3242 return (error); 3243 } 3244 } 3245 3246 /* 3247 * This is to prevent the creation of links into attribute space 3248 * by renaming a linked file into/outof an attribute directory. 3249 * See the comment in zfs_link() for why this is considered bad. 3250 */ 3251 if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) { 3252 error = SET_ERROR(EINVAL); 3253 goto out; 3254 } 3255 3256 /* 3257 * If we are using project inheritance, means if the directory has 3258 * ZFS_PROJINHERIT set, then its descendant directories will inherit 3259 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under 3260 * such case, we only allow renames into our tree when the project 3261 * IDs are the same. 3262 */ 3263 if (tdzp->z_pflags & ZFS_PROJINHERIT && 3264 tdzp->z_projid != szp->z_projid) { 3265 error = SET_ERROR(EXDEV); 3266 goto out; 3267 } 3268 3269 /* 3270 * Must have write access at the source to remove the old entry 3271 * and write access at the target to create the new entry. 3272 * Note that if target and source are the same, this can be 3273 * done in a single check. 3274 */ 3275 if ((error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr, NULL))) 3276 goto out; 3277 3278 if ((*svpp)->v_type == VDIR) { 3279 /* 3280 * Avoid ".", "..", and aliases of "." for obvious reasons. 3281 */ 3282 if ((scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.') || 3283 sdzp == szp || 3284 (scnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) { 3285 error = EINVAL; 3286 goto out; 3287 } 3288 3289 /* 3290 * Check to make sure rename is valid. 3291 * Can't do a move like this: /usr/a/b to /usr/a/b/c/d 3292 */ 3293 if ((error = zfs_rename_check(szp, sdzp, tdzp))) 3294 goto out; 3295 } 3296 3297 /* 3298 * Does target exist? 3299 */ 3300 if (tzp) { 3301 /* 3302 * Source and target must be the same type. 3303 */ 3304 if ((*svpp)->v_type == VDIR) { 3305 if ((*tvpp)->v_type != VDIR) { 3306 error = SET_ERROR(ENOTDIR); 3307 goto out; 3308 } else { 3309 cache_purge(tdvp); 3310 if (sdvp != tdvp) 3311 cache_purge(sdvp); 3312 } 3313 } else { 3314 if ((*tvpp)->v_type == VDIR) { 3315 error = SET_ERROR(EISDIR); 3316 goto out; 3317 } 3318 } 3319 } 3320 3321 vn_seqc_write_begin(*svpp); 3322 vn_seqc_write_begin(sdvp); 3323 if (*tvpp != NULL) 3324 vn_seqc_write_begin(*tvpp); 3325 if (tdvp != *tvpp) 3326 vn_seqc_write_begin(tdvp); 3327 3328 vnevent_rename_src(*svpp, sdvp, scnp->cn_nameptr, ct); 3329 if (tzp) 3330 vnevent_rename_dest(*tvpp, tdvp, tnm, ct); 3331 3332 /* 3333 * notify the target directory if it is not the same 3334 * as source directory. 3335 */ 3336 if (tdvp != sdvp) { 3337 vnevent_rename_dest_dir(tdvp, ct); 3338 } 3339 3340 tx = dmu_tx_create(zfsvfs->z_os); 3341 dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 3342 dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE); 3343 dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); 3344 dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm); 3345 if (sdzp != tdzp) { 3346 dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE); 3347 zfs_sa_upgrade_txholds(tx, tdzp); 3348 } 3349 if (tzp) { 3350 dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE); 3351 zfs_sa_upgrade_txholds(tx, tzp); 3352 } 3353 3354 zfs_sa_upgrade_txholds(tx, szp); 3355 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 3356 error = dmu_tx_assign(tx, TXG_WAIT); 3357 if (error) { 3358 dmu_tx_abort(tx); 3359 goto out_seq; 3360 } 3361 3362 if (tzp) /* Attempt to remove the existing target */ 3363 error = zfs_link_destroy(tdzp, tnm, tzp, tx, 0, NULL); 3364 3365 if (error == 0) { 3366 error = zfs_link_create(tdzp, tnm, szp, tx, ZRENAMING); 3367 if (error == 0) { 3368 szp->z_pflags |= ZFS_AV_MODIFIED; 3369 3370 error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), 3371 (void *)&szp->z_pflags, sizeof (uint64_t), tx); 3372 ASSERT0(error); 3373 3374 error = zfs_link_destroy(sdzp, snm, szp, tx, ZRENAMING, 3375 NULL); 3376 if (error == 0) { 3377 zfs_log_rename(zilog, tx, TX_RENAME, sdzp, 3378 snm, tdzp, tnm, szp); 3379 } else { 3380 /* 3381 * At this point, we have successfully created 3382 * the target name, but have failed to remove 3383 * the source name. Since the create was done 3384 * with the ZRENAMING flag, there are 3385 * complications; for one, the link count is 3386 * wrong. The easiest way to deal with this 3387 * is to remove the newly created target, and 3388 * return the original error. This must 3389 * succeed; fortunately, it is very unlikely to 3390 * fail, since we just created it. 3391 */ 3392 VERIFY0(zfs_link_destroy(tdzp, tnm, szp, tx, 3393 ZRENAMING, NULL)); 3394 } 3395 } 3396 if (error == 0) { 3397 cache_vop_rename(sdvp, *svpp, tdvp, *tvpp, scnp, tcnp); 3398 } 3399 } 3400 3401 dmu_tx_commit(tx); 3402 3403 out_seq: 3404 vn_seqc_write_end(*svpp); 3405 vn_seqc_write_end(sdvp); 3406 if (*tvpp != NULL) 3407 vn_seqc_write_end(*tvpp); 3408 if (tdvp != *tvpp) 3409 vn_seqc_write_end(tdvp); 3410 3411 out: 3412 if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 3413 zil_commit(zilog, 0); 3414 zfs_exit(zfsvfs, FTAG); 3415 3416 return (error); 3417 } 3418 3419 int 3420 zfs_rename(znode_t *sdzp, const char *sname, znode_t *tdzp, const char *tname, 3421 cred_t *cr, int flags, uint64_t rflags, vattr_t *wo_vap, zidmap_t *mnt_ns) 3422 { 3423 struct componentname scn, tcn; 3424 vnode_t *sdvp, *tdvp; 3425 vnode_t *svp, *tvp; 3426 int error; 3427 svp = tvp = NULL; 3428 3429 if (rflags != 0 || wo_vap != NULL) 3430 return (SET_ERROR(EINVAL)); 3431 3432 sdvp = ZTOV(sdzp); 3433 tdvp = ZTOV(tdzp); 3434 error = zfs_lookup_internal(sdzp, sname, &svp, &scn, DELETE); 3435 if (sdzp->z_zfsvfs->z_replay == B_FALSE) 3436 VOP_UNLOCK1(sdvp); 3437 if (error != 0) 3438 goto fail; 3439 VOP_UNLOCK1(svp); 3440 3441 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY); 3442 error = zfs_lookup_internal(tdzp, tname, &tvp, &tcn, RENAME); 3443 if (error == EJUSTRETURN) 3444 tvp = NULL; 3445 else if (error != 0) { 3446 VOP_UNLOCK1(tdvp); 3447 goto fail; 3448 } 3449 3450 error = zfs_do_rename(sdvp, &svp, &scn, tdvp, &tvp, &tcn, cr); 3451 fail: 3452 if (svp != NULL) 3453 vrele(svp); 3454 if (tvp != NULL) 3455 vrele(tvp); 3456 3457 return (error); 3458 } 3459 3460 /* 3461 * Insert the indicated symbolic reference entry into the directory. 3462 * 3463 * IN: dvp - Directory to contain new symbolic link. 3464 * link - Name for new symlink entry. 3465 * vap - Attributes of new entry. 3466 * cr - credentials of caller. 3467 * ct - caller context 3468 * flags - case flags 3469 * mnt_ns - Unused on FreeBSD 3470 * 3471 * RETURN: 0 on success, error code on failure. 3472 * 3473 * Timestamps: 3474 * dvp - ctime|mtime updated 3475 */ 3476 int 3477 zfs_symlink(znode_t *dzp, const char *name, vattr_t *vap, 3478 const char *link, znode_t **zpp, cred_t *cr, int flags, zidmap_t *mnt_ns) 3479 { 3480 (void) flags; 3481 znode_t *zp; 3482 dmu_tx_t *tx; 3483 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 3484 zilog_t *zilog; 3485 uint64_t len = strlen(link); 3486 int error; 3487 zfs_acl_ids_t acl_ids; 3488 boolean_t fuid_dirtied; 3489 uint64_t txtype = TX_SYMLINK; 3490 3491 ASSERT3S(vap->va_type, ==, VLNK); 3492 3493 if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0) 3494 return (error); 3495 zilog = zfsvfs->z_log; 3496 3497 if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 3498 NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3499 zfs_exit(zfsvfs, FTAG); 3500 return (SET_ERROR(EILSEQ)); 3501 } 3502 3503 if (len > MAXPATHLEN) { 3504 zfs_exit(zfsvfs, FTAG); 3505 return (SET_ERROR(ENAMETOOLONG)); 3506 } 3507 3508 if ((error = zfs_acl_ids_create(dzp, 0, 3509 vap, cr, NULL, &acl_ids, NULL)) != 0) { 3510 zfs_exit(zfsvfs, FTAG); 3511 return (error); 3512 } 3513 3514 /* 3515 * Attempt to lock directory; fail if entry already exists. 3516 */ 3517 error = zfs_dirent_lookup(dzp, name, &zp, ZNEW); 3518 if (error) { 3519 zfs_acl_ids_free(&acl_ids); 3520 zfs_exit(zfsvfs, FTAG); 3521 return (error); 3522 } 3523 3524 if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr, mnt_ns))) { 3525 zfs_acl_ids_free(&acl_ids); 3526 zfs_exit(zfsvfs, FTAG); 3527 return (error); 3528 } 3529 3530 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, 3531 0 /* projid */)) { 3532 zfs_acl_ids_free(&acl_ids); 3533 zfs_exit(zfsvfs, FTAG); 3534 return (SET_ERROR(EDQUOT)); 3535 } 3536 3537 getnewvnode_reserve_(); 3538 tx = dmu_tx_create(zfsvfs->z_os); 3539 fuid_dirtied = zfsvfs->z_fuid_dirty; 3540 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len)); 3541 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 3542 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 3543 ZFS_SA_BASE_ATTR_SIZE + len); 3544 dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 3545 if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 3546 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 3547 acl_ids.z_aclp->z_acl_bytes); 3548 } 3549 if (fuid_dirtied) 3550 zfs_fuid_txhold(zfsvfs, tx); 3551 error = dmu_tx_assign(tx, TXG_WAIT); 3552 if (error) { 3553 zfs_acl_ids_free(&acl_ids); 3554 dmu_tx_abort(tx); 3555 getnewvnode_drop_reserve(); 3556 zfs_exit(zfsvfs, FTAG); 3557 return (error); 3558 } 3559 3560 /* 3561 * Create a new object for the symlink. 3562 * for version 4 ZPL datasets the symlink will be an SA attribute 3563 */ 3564 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 3565 3566 if (fuid_dirtied) 3567 zfs_fuid_sync(zfsvfs, tx); 3568 3569 if (zp->z_is_sa) 3570 error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs), 3571 __DECONST(void *, link), len, tx); 3572 else 3573 zfs_sa_symlink(zp, __DECONST(char *, link), len, tx); 3574 3575 zp->z_size = len; 3576 (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 3577 &zp->z_size, sizeof (zp->z_size), tx); 3578 /* 3579 * Insert the new object into the directory. 3580 */ 3581 (void) zfs_link_create(dzp, name, zp, tx, ZNEW); 3582 3583 zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); 3584 *zpp = zp; 3585 3586 zfs_acl_ids_free(&acl_ids); 3587 3588 dmu_tx_commit(tx); 3589 3590 getnewvnode_drop_reserve(); 3591 3592 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 3593 zil_commit(zilog, 0); 3594 3595 zfs_exit(zfsvfs, FTAG); 3596 return (error); 3597 } 3598 3599 /* 3600 * Return, in the buffer contained in the provided uio structure, 3601 * the symbolic path referred to by vp. 3602 * 3603 * IN: vp - vnode of symbolic link. 3604 * uio - structure to contain the link path. 3605 * cr - credentials of caller. 3606 * ct - caller context 3607 * 3608 * OUT: uio - structure containing the link path. 3609 * 3610 * RETURN: 0 on success, error code on failure. 3611 * 3612 * Timestamps: 3613 * vp - atime updated 3614 */ 3615 static int 3616 zfs_readlink(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, caller_context_t *ct) 3617 { 3618 (void) cr, (void) ct; 3619 znode_t *zp = VTOZ(vp); 3620 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3621 int error; 3622 3623 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0) 3624 return (error); 3625 3626 if (zp->z_is_sa) 3627 error = sa_lookup_uio(zp->z_sa_hdl, 3628 SA_ZPL_SYMLINK(zfsvfs), uio); 3629 else 3630 error = zfs_sa_readlink(zp, uio); 3631 3632 ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 3633 3634 zfs_exit(zfsvfs, FTAG); 3635 return (error); 3636 } 3637 3638 /* 3639 * Insert a new entry into directory tdvp referencing svp. 3640 * 3641 * IN: tdvp - Directory to contain new entry. 3642 * svp - vnode of new entry. 3643 * name - name of new entry. 3644 * cr - credentials of caller. 3645 * 3646 * RETURN: 0 on success, error code on failure. 3647 * 3648 * Timestamps: 3649 * tdvp - ctime|mtime updated 3650 * svp - ctime updated 3651 */ 3652 int 3653 zfs_link(znode_t *tdzp, znode_t *szp, const char *name, cred_t *cr, 3654 int flags) 3655 { 3656 (void) flags; 3657 znode_t *tzp; 3658 zfsvfs_t *zfsvfs = tdzp->z_zfsvfs; 3659 zilog_t *zilog; 3660 dmu_tx_t *tx; 3661 int error; 3662 uint64_t parent; 3663 uid_t owner; 3664 3665 ASSERT3S(ZTOV(tdzp)->v_type, ==, VDIR); 3666 3667 if ((error = zfs_enter_verify_zp(zfsvfs, tdzp, FTAG)) != 0) 3668 return (error); 3669 zilog = zfsvfs->z_log; 3670 3671 /* 3672 * POSIX dictates that we return EPERM here. 3673 * Better choices include ENOTSUP or EISDIR. 3674 */ 3675 if (ZTOV(szp)->v_type == VDIR) { 3676 zfs_exit(zfsvfs, FTAG); 3677 return (SET_ERROR(EPERM)); 3678 } 3679 3680 if ((error = zfs_verify_zp(szp)) != 0) { 3681 zfs_exit(zfsvfs, FTAG); 3682 return (error); 3683 } 3684 3685 /* 3686 * If we are using project inheritance, means if the directory has 3687 * ZFS_PROJINHERIT set, then its descendant directories will inherit 3688 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under 3689 * such case, we only allow hard link creation in our tree when the 3690 * project IDs are the same. 3691 */ 3692 if (tdzp->z_pflags & ZFS_PROJINHERIT && 3693 tdzp->z_projid != szp->z_projid) { 3694 zfs_exit(zfsvfs, FTAG); 3695 return (SET_ERROR(EXDEV)); 3696 } 3697 3698 if (szp->z_pflags & (ZFS_APPENDONLY | 3699 ZFS_IMMUTABLE | ZFS_READONLY)) { 3700 zfs_exit(zfsvfs, FTAG); 3701 return (SET_ERROR(EPERM)); 3702 } 3703 3704 /* Prevent links to .zfs/shares files */ 3705 3706 if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 3707 &parent, sizeof (uint64_t))) != 0) { 3708 zfs_exit(zfsvfs, FTAG); 3709 return (error); 3710 } 3711 if (parent == zfsvfs->z_shares_dir) { 3712 zfs_exit(zfsvfs, FTAG); 3713 return (SET_ERROR(EPERM)); 3714 } 3715 3716 if (zfsvfs->z_utf8 && u8_validate(name, 3717 strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3718 zfs_exit(zfsvfs, FTAG); 3719 return (SET_ERROR(EILSEQ)); 3720 } 3721 3722 /* 3723 * We do not support links between attributes and non-attributes 3724 * because of the potential security risk of creating links 3725 * into "normal" file space in order to circumvent restrictions 3726 * imposed in attribute space. 3727 */ 3728 if ((szp->z_pflags & ZFS_XATTR) != (tdzp->z_pflags & ZFS_XATTR)) { 3729 zfs_exit(zfsvfs, FTAG); 3730 return (SET_ERROR(EINVAL)); 3731 } 3732 3733 3734 owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER); 3735 if (owner != crgetuid(cr) && secpolicy_basic_link(ZTOV(szp), cr) != 0) { 3736 zfs_exit(zfsvfs, FTAG); 3737 return (SET_ERROR(EPERM)); 3738 } 3739 3740 if ((error = zfs_zaccess(tdzp, ACE_ADD_FILE, 0, B_FALSE, cr, NULL))) { 3741 zfs_exit(zfsvfs, FTAG); 3742 return (error); 3743 } 3744 3745 /* 3746 * Attempt to lock directory; fail if entry already exists. 3747 */ 3748 error = zfs_dirent_lookup(tdzp, name, &tzp, ZNEW); 3749 if (error) { 3750 zfs_exit(zfsvfs, FTAG); 3751 return (error); 3752 } 3753 3754 tx = dmu_tx_create(zfsvfs->z_os); 3755 dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 3756 dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, name); 3757 zfs_sa_upgrade_txholds(tx, szp); 3758 zfs_sa_upgrade_txholds(tx, tdzp); 3759 error = dmu_tx_assign(tx, TXG_WAIT); 3760 if (error) { 3761 dmu_tx_abort(tx); 3762 zfs_exit(zfsvfs, FTAG); 3763 return (error); 3764 } 3765 3766 error = zfs_link_create(tdzp, name, szp, tx, 0); 3767 3768 if (error == 0) { 3769 uint64_t txtype = TX_LINK; 3770 zfs_log_link(zilog, tx, txtype, tdzp, szp, name); 3771 } 3772 3773 dmu_tx_commit(tx); 3774 3775 if (error == 0) { 3776 vnevent_link(ZTOV(szp), ct); 3777 } 3778 3779 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 3780 zil_commit(zilog, 0); 3781 3782 zfs_exit(zfsvfs, FTAG); 3783 return (error); 3784 } 3785 3786 /* 3787 * Free or allocate space in a file. Currently, this function only 3788 * supports the `F_FREESP' command. However, this command is somewhat 3789 * misnamed, as its functionality includes the ability to allocate as 3790 * well as free space. 3791 * 3792 * IN: ip - inode of file to free data in. 3793 * cmd - action to take (only F_FREESP supported). 3794 * bfp - section of file to free/alloc. 3795 * flag - current file open mode flags. 3796 * offset - current file offset. 3797 * cr - credentials of caller. 3798 * 3799 * RETURN: 0 on success, error code on failure. 3800 * 3801 * Timestamps: 3802 * ip - ctime|mtime updated 3803 */ 3804 int 3805 zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag, 3806 offset_t offset, cred_t *cr) 3807 { 3808 (void) offset; 3809 zfsvfs_t *zfsvfs = ZTOZSB(zp); 3810 uint64_t off, len; 3811 int error; 3812 3813 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0) 3814 return (error); 3815 3816 if (cmd != F_FREESP) { 3817 zfs_exit(zfsvfs, FTAG); 3818 return (SET_ERROR(EINVAL)); 3819 } 3820 3821 /* 3822 * Callers might not be able to detect properly that we are read-only, 3823 * so check it explicitly here. 3824 */ 3825 if (zfs_is_readonly(zfsvfs)) { 3826 zfs_exit(zfsvfs, FTAG); 3827 return (SET_ERROR(EROFS)); 3828 } 3829 3830 if (bfp->l_len < 0) { 3831 zfs_exit(zfsvfs, FTAG); 3832 return (SET_ERROR(EINVAL)); 3833 } 3834 3835 /* 3836 * Permissions aren't checked on Solaris because on this OS 3837 * zfs_space() can only be called with an opened file handle. 3838 * On Linux we can get here through truncate_range() which 3839 * operates directly on inodes, so we need to check access rights. 3840 */ 3841 if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr, NULL))) { 3842 zfs_exit(zfsvfs, FTAG); 3843 return (error); 3844 } 3845 3846 off = bfp->l_start; 3847 len = bfp->l_len; /* 0 means from off to end of file */ 3848 3849 error = zfs_freesp(zp, off, len, flag, TRUE); 3850 3851 zfs_exit(zfsvfs, FTAG); 3852 return (error); 3853 } 3854 3855 static void 3856 zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 3857 { 3858 (void) cr, (void) ct; 3859 znode_t *zp = VTOZ(vp); 3860 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3861 int error; 3862 3863 ZFS_TEARDOWN_INACTIVE_ENTER_READ(zfsvfs); 3864 if (zp->z_sa_hdl == NULL) { 3865 /* 3866 * The fs has been unmounted, or we did a 3867 * suspend/resume and this file no longer exists. 3868 */ 3869 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs); 3870 vrecycle(vp); 3871 return; 3872 } 3873 3874 if (zp->z_unlinked) { 3875 /* 3876 * Fast path to recycle a vnode of a removed file. 3877 */ 3878 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs); 3879 vrecycle(vp); 3880 return; 3881 } 3882 3883 if (zp->z_atime_dirty && zp->z_unlinked == 0) { 3884 dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); 3885 3886 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 3887 zfs_sa_upgrade_txholds(tx, zp); 3888 error = dmu_tx_assign(tx, TXG_WAIT); 3889 if (error) { 3890 dmu_tx_abort(tx); 3891 } else { 3892 (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs), 3893 (void *)&zp->z_atime, sizeof (zp->z_atime), tx); 3894 zp->z_atime_dirty = 0; 3895 dmu_tx_commit(tx); 3896 } 3897 } 3898 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs); 3899 } 3900 3901 3902 _Static_assert(sizeof (struct zfid_short) <= sizeof (struct fid), 3903 "struct zfid_short bigger than struct fid"); 3904 _Static_assert(sizeof (struct zfid_long) <= sizeof (struct fid), 3905 "struct zfid_long bigger than struct fid"); 3906 3907 static int 3908 zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) 3909 { 3910 (void) ct; 3911 znode_t *zp = VTOZ(vp); 3912 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3913 uint32_t gen; 3914 uint64_t gen64; 3915 uint64_t object = zp->z_id; 3916 zfid_short_t *zfid; 3917 int size, i, error; 3918 3919 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0) 3920 return (error); 3921 3922 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), 3923 &gen64, sizeof (uint64_t))) != 0) { 3924 zfs_exit(zfsvfs, FTAG); 3925 return (error); 3926 } 3927 3928 gen = (uint32_t)gen64; 3929 3930 size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN; 3931 fidp->fid_len = size; 3932 3933 zfid = (zfid_short_t *)fidp; 3934 3935 zfid->zf_len = size; 3936 3937 for (i = 0; i < sizeof (zfid->zf_object); i++) 3938 zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 3939 3940 /* Must have a non-zero generation number to distinguish from .zfs */ 3941 if (gen == 0) 3942 gen = 1; 3943 for (i = 0; i < sizeof (zfid->zf_gen); i++) 3944 zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); 3945 3946 if (size == LONG_FID_LEN) { 3947 uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); 3948 zfid_long_t *zlfid; 3949 3950 zlfid = (zfid_long_t *)fidp; 3951 3952 for (i = 0; i < sizeof (zlfid->zf_setid); i++) 3953 zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); 3954 3955 /* XXX - this should be the generation number for the objset */ 3956 for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 3957 zlfid->zf_setgen[i] = 0; 3958 } 3959 3960 zfs_exit(zfsvfs, FTAG); 3961 return (0); 3962 } 3963 3964 static int 3965 zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 3966 caller_context_t *ct) 3967 { 3968 znode_t *zp; 3969 zfsvfs_t *zfsvfs; 3970 int error; 3971 3972 switch (cmd) { 3973 case _PC_LINK_MAX: 3974 *valp = MIN(LONG_MAX, ZFS_LINK_MAX); 3975 return (0); 3976 3977 case _PC_FILESIZEBITS: 3978 *valp = 64; 3979 return (0); 3980 case _PC_MIN_HOLE_SIZE: 3981 *valp = (int)SPA_MINBLOCKSIZE; 3982 return (0); 3983 case _PC_ACL_EXTENDED: 3984 #if 0 /* POSIX ACLs are not implemented for ZFS on FreeBSD yet. */ 3985 zp = VTOZ(vp); 3986 zfsvfs = zp->z_zfsvfs; 3987 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0) 3988 return (error); 3989 *valp = zfsvfs->z_acl_type == ZFSACLTYPE_POSIX ? 1 : 0; 3990 zfs_exit(zfsvfs, FTAG); 3991 #else 3992 *valp = 0; 3993 #endif 3994 return (0); 3995 3996 case _PC_ACL_NFS4: 3997 zp = VTOZ(vp); 3998 zfsvfs = zp->z_zfsvfs; 3999 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0) 4000 return (error); 4001 *valp = zfsvfs->z_acl_type == ZFS_ACLTYPE_NFSV4 ? 1 : 0; 4002 zfs_exit(zfsvfs, FTAG); 4003 return (0); 4004 4005 case _PC_ACL_PATH_MAX: 4006 *valp = ACL_MAX_ENTRIES; 4007 return (0); 4008 4009 default: 4010 return (EOPNOTSUPP); 4011 } 4012 } 4013 4014 static int 4015 zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind, 4016 int *rahead) 4017 { 4018 znode_t *zp = VTOZ(vp); 4019 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4020 zfs_locked_range_t *lr; 4021 vm_object_t object; 4022 off_t start, end, obj_size; 4023 uint_t blksz; 4024 int pgsin_b, pgsin_a; 4025 int error; 4026 4027 if (zfs_enter_verify_zp(zfsvfs, zp, FTAG) != 0) 4028 return (zfs_vm_pagerret_error); 4029 4030 start = IDX_TO_OFF(ma[0]->pindex); 4031 end = IDX_TO_OFF(ma[count - 1]->pindex + 1); 4032 4033 /* 4034 * Lock a range covering all required and optional pages. 4035 * Note that we need to handle the case of the block size growing. 4036 */ 4037 for (;;) { 4038 blksz = zp->z_blksz; 4039 lr = zfs_rangelock_tryenter(&zp->z_rangelock, 4040 rounddown(start, blksz), 4041 roundup(end, blksz) - rounddown(start, blksz), RL_READER); 4042 if (lr == NULL) { 4043 if (rahead != NULL) { 4044 *rahead = 0; 4045 rahead = NULL; 4046 } 4047 if (rbehind != NULL) { 4048 *rbehind = 0; 4049 rbehind = NULL; 4050 } 4051 break; 4052 } 4053 if (blksz == zp->z_blksz) 4054 break; 4055 zfs_rangelock_exit(lr); 4056 } 4057 4058 object = ma[0]->object; 4059 zfs_vmobject_wlock(object); 4060 obj_size = object->un_pager.vnp.vnp_size; 4061 zfs_vmobject_wunlock(object); 4062 if (IDX_TO_OFF(ma[count - 1]->pindex) >= obj_size) { 4063 if (lr != NULL) 4064 zfs_rangelock_exit(lr); 4065 zfs_exit(zfsvfs, FTAG); 4066 return (zfs_vm_pagerret_bad); 4067 } 4068 4069 pgsin_b = 0; 4070 if (rbehind != NULL) { 4071 pgsin_b = OFF_TO_IDX(start - rounddown(start, blksz)); 4072 pgsin_b = MIN(*rbehind, pgsin_b); 4073 } 4074 4075 pgsin_a = 0; 4076 if (rahead != NULL) { 4077 pgsin_a = OFF_TO_IDX(roundup(end, blksz) - end); 4078 if (end + IDX_TO_OFF(pgsin_a) >= obj_size) 4079 pgsin_a = OFF_TO_IDX(round_page(obj_size) - end); 4080 pgsin_a = MIN(*rahead, pgsin_a); 4081 } 4082 4083 /* 4084 * NB: we need to pass the exact byte size of the data that we expect 4085 * to read after accounting for the file size. This is required because 4086 * ZFS will panic if we request DMU to read beyond the end of the last 4087 * allocated block. 4088 */ 4089 error = dmu_read_pages(zfsvfs->z_os, zp->z_id, ma, count, &pgsin_b, 4090 &pgsin_a, MIN(end, obj_size) - (end - PAGE_SIZE)); 4091 4092 if (lr != NULL) 4093 zfs_rangelock_exit(lr); 4094 ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 4095 4096 dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, count*PAGE_SIZE); 4097 4098 zfs_exit(zfsvfs, FTAG); 4099 4100 if (error != 0) 4101 return (zfs_vm_pagerret_error); 4102 4103 VM_CNT_INC(v_vnodein); 4104 VM_CNT_ADD(v_vnodepgsin, count + pgsin_b + pgsin_a); 4105 if (rbehind != NULL) 4106 *rbehind = pgsin_b; 4107 if (rahead != NULL) 4108 *rahead = pgsin_a; 4109 return (zfs_vm_pagerret_ok); 4110 } 4111 4112 #ifndef _SYS_SYSPROTO_H_ 4113 struct vop_getpages_args { 4114 struct vnode *a_vp; 4115 vm_page_t *a_m; 4116 int a_count; 4117 int *a_rbehind; 4118 int *a_rahead; 4119 }; 4120 #endif 4121 4122 static int 4123 zfs_freebsd_getpages(struct vop_getpages_args *ap) 4124 { 4125 4126 return (zfs_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind, 4127 ap->a_rahead)); 4128 } 4129 4130 static int 4131 zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags, 4132 int *rtvals) 4133 { 4134 znode_t *zp = VTOZ(vp); 4135 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4136 zfs_locked_range_t *lr; 4137 dmu_tx_t *tx; 4138 struct sf_buf *sf; 4139 vm_object_t object; 4140 vm_page_t m; 4141 caddr_t va; 4142 size_t tocopy; 4143 size_t lo_len; 4144 vm_ooffset_t lo_off; 4145 vm_ooffset_t off; 4146 uint_t blksz; 4147 int ncount; 4148 int pcount; 4149 int err; 4150 int i; 4151 4152 object = vp->v_object; 4153 KASSERT(ma[0]->object == object, ("mismatching object")); 4154 KASSERT(len > 0 && (len & PAGE_MASK) == 0, ("unexpected length")); 4155 4156 pcount = btoc(len); 4157 ncount = pcount; 4158 for (i = 0; i < pcount; i++) 4159 rtvals[i] = zfs_vm_pagerret_error; 4160 4161 if (zfs_enter_verify_zp(zfsvfs, zp, FTAG) != 0) 4162 return (zfs_vm_pagerret_error); 4163 4164 off = IDX_TO_OFF(ma[0]->pindex); 4165 blksz = zp->z_blksz; 4166 lo_off = rounddown(off, blksz); 4167 lo_len = roundup(len + (off - lo_off), blksz); 4168 lr = zfs_rangelock_enter(&zp->z_rangelock, lo_off, lo_len, RL_WRITER); 4169 4170 zfs_vmobject_wlock(object); 4171 if (len + off > object->un_pager.vnp.vnp_size) { 4172 if (object->un_pager.vnp.vnp_size > off) { 4173 int pgoff; 4174 4175 len = object->un_pager.vnp.vnp_size - off; 4176 ncount = btoc(len); 4177 if ((pgoff = (int)len & PAGE_MASK) != 0) { 4178 /* 4179 * If the object is locked and the following 4180 * conditions hold, then the page's dirty 4181 * field cannot be concurrently changed by a 4182 * pmap operation. 4183 */ 4184 m = ma[ncount - 1]; 4185 vm_page_assert_sbusied(m); 4186 KASSERT(!pmap_page_is_write_mapped(m), 4187 ("zfs_putpages: page %p is not read-only", 4188 m)); 4189 vm_page_clear_dirty(m, pgoff, PAGE_SIZE - 4190 pgoff); 4191 } 4192 } else { 4193 len = 0; 4194 ncount = 0; 4195 } 4196 if (ncount < pcount) { 4197 for (i = ncount; i < pcount; i++) { 4198 rtvals[i] = zfs_vm_pagerret_bad; 4199 } 4200 } 4201 } 4202 zfs_vmobject_wunlock(object); 4203 4204 if (ncount == 0) 4205 goto out; 4206 4207 if (zfs_id_overblockquota(zfsvfs, DMU_USERUSED_OBJECT, zp->z_uid) || 4208 zfs_id_overblockquota(zfsvfs, DMU_GROUPUSED_OBJECT, zp->z_gid) || 4209 (zp->z_projid != ZFS_DEFAULT_PROJID && 4210 zfs_id_overblockquota(zfsvfs, DMU_PROJECTUSED_OBJECT, 4211 zp->z_projid))) { 4212 goto out; 4213 } 4214 4215 tx = dmu_tx_create(zfsvfs->z_os); 4216 dmu_tx_hold_write(tx, zp->z_id, off, len); 4217 4218 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 4219 zfs_sa_upgrade_txholds(tx, zp); 4220 err = dmu_tx_assign(tx, TXG_WAIT); 4221 if (err != 0) { 4222 dmu_tx_abort(tx); 4223 goto out; 4224 } 4225 4226 if (zp->z_blksz < PAGE_SIZE) { 4227 for (i = 0; len > 0; off += tocopy, len -= tocopy, i++) { 4228 tocopy = len > PAGE_SIZE ? PAGE_SIZE : len; 4229 va = zfs_map_page(ma[i], &sf); 4230 dmu_write(zfsvfs->z_os, zp->z_id, off, tocopy, va, tx); 4231 zfs_unmap_page(sf); 4232 } 4233 } else { 4234 err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, ma, tx); 4235 } 4236 4237 if (err == 0) { 4238 uint64_t mtime[2], ctime[2]; 4239 sa_bulk_attr_t bulk[3]; 4240 int count = 0; 4241 4242 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 4243 &mtime, 16); 4244 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 4245 &ctime, 16); 4246 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 4247 &zp->z_pflags, 8); 4248 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime); 4249 err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 4250 ASSERT0(err); 4251 /* 4252 * XXX we should be passing a callback to undirty 4253 * but that would make the locking messier 4254 */ 4255 zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, 4256 len, 0, NULL, NULL); 4257 4258 zfs_vmobject_wlock(object); 4259 for (i = 0; i < ncount; i++) { 4260 rtvals[i] = zfs_vm_pagerret_ok; 4261 vm_page_undirty(ma[i]); 4262 } 4263 zfs_vmobject_wunlock(object); 4264 VM_CNT_INC(v_vnodeout); 4265 VM_CNT_ADD(v_vnodepgsout, ncount); 4266 } 4267 dmu_tx_commit(tx); 4268 4269 out: 4270 zfs_rangelock_exit(lr); 4271 if ((flags & (zfs_vm_pagerput_sync | zfs_vm_pagerput_inval)) != 0 || 4272 zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4273 zil_commit(zfsvfs->z_log, zp->z_id); 4274 4275 dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, len); 4276 4277 zfs_exit(zfsvfs, FTAG); 4278 return (rtvals[0]); 4279 } 4280 4281 #ifndef _SYS_SYSPROTO_H_ 4282 struct vop_putpages_args { 4283 struct vnode *a_vp; 4284 vm_page_t *a_m; 4285 int a_count; 4286 int a_sync; 4287 int *a_rtvals; 4288 }; 4289 #endif 4290 4291 static int 4292 zfs_freebsd_putpages(struct vop_putpages_args *ap) 4293 { 4294 4295 return (zfs_putpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_sync, 4296 ap->a_rtvals)); 4297 } 4298 4299 #ifndef _SYS_SYSPROTO_H_ 4300 struct vop_bmap_args { 4301 struct vnode *a_vp; 4302 daddr_t a_bn; 4303 struct bufobj **a_bop; 4304 daddr_t *a_bnp; 4305 int *a_runp; 4306 int *a_runb; 4307 }; 4308 #endif 4309 4310 static int 4311 zfs_freebsd_bmap(struct vop_bmap_args *ap) 4312 { 4313 4314 if (ap->a_bop != NULL) 4315 *ap->a_bop = &ap->a_vp->v_bufobj; 4316 if (ap->a_bnp != NULL) 4317 *ap->a_bnp = ap->a_bn; 4318 if (ap->a_runp != NULL) 4319 *ap->a_runp = 0; 4320 if (ap->a_runb != NULL) 4321 *ap->a_runb = 0; 4322 4323 return (0); 4324 } 4325 4326 #ifndef _SYS_SYSPROTO_H_ 4327 struct vop_open_args { 4328 struct vnode *a_vp; 4329 int a_mode; 4330 struct ucred *a_cred; 4331 struct thread *a_td; 4332 }; 4333 #endif 4334 4335 static int 4336 zfs_freebsd_open(struct vop_open_args *ap) 4337 { 4338 vnode_t *vp = ap->a_vp; 4339 znode_t *zp = VTOZ(vp); 4340 int error; 4341 4342 error = zfs_open(&vp, ap->a_mode, ap->a_cred); 4343 if (error == 0) 4344 vnode_create_vobject(vp, zp->z_size, ap->a_td); 4345 return (error); 4346 } 4347 4348 #ifndef _SYS_SYSPROTO_H_ 4349 struct vop_close_args { 4350 struct vnode *a_vp; 4351 int a_fflag; 4352 struct ucred *a_cred; 4353 struct thread *a_td; 4354 }; 4355 #endif 4356 4357 static int 4358 zfs_freebsd_close(struct vop_close_args *ap) 4359 { 4360 4361 return (zfs_close(ap->a_vp, ap->a_fflag, 1, 0, ap->a_cred)); 4362 } 4363 4364 #ifndef _SYS_SYSPROTO_H_ 4365 struct vop_ioctl_args { 4366 struct vnode *a_vp; 4367 ulong_t a_command; 4368 caddr_t a_data; 4369 int a_fflag; 4370 struct ucred *cred; 4371 struct thread *td; 4372 }; 4373 #endif 4374 4375 static int 4376 zfs_freebsd_ioctl(struct vop_ioctl_args *ap) 4377 { 4378 4379 return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data, 4380 ap->a_fflag, ap->a_cred, NULL)); 4381 } 4382 4383 static int 4384 ioflags(int ioflags) 4385 { 4386 int flags = 0; 4387 4388 if (ioflags & IO_APPEND) 4389 flags |= O_APPEND; 4390 if (ioflags & IO_NDELAY) 4391 flags |= O_NONBLOCK; 4392 if (ioflags & IO_SYNC) 4393 flags |= O_SYNC; 4394 4395 return (flags); 4396 } 4397 4398 #ifndef _SYS_SYSPROTO_H_ 4399 struct vop_read_args { 4400 struct vnode *a_vp; 4401 struct uio *a_uio; 4402 int a_ioflag; 4403 struct ucred *a_cred; 4404 }; 4405 #endif 4406 4407 static int 4408 zfs_freebsd_read(struct vop_read_args *ap) 4409 { 4410 zfs_uio_t uio; 4411 zfs_uio_init(&uio, ap->a_uio); 4412 return (zfs_read(VTOZ(ap->a_vp), &uio, ioflags(ap->a_ioflag), 4413 ap->a_cred)); 4414 } 4415 4416 #ifndef _SYS_SYSPROTO_H_ 4417 struct vop_write_args { 4418 struct vnode *a_vp; 4419 struct uio *a_uio; 4420 int a_ioflag; 4421 struct ucred *a_cred; 4422 }; 4423 #endif 4424 4425 static int 4426 zfs_freebsd_write(struct vop_write_args *ap) 4427 { 4428 zfs_uio_t uio; 4429 zfs_uio_init(&uio, ap->a_uio); 4430 return (zfs_write(VTOZ(ap->a_vp), &uio, ioflags(ap->a_ioflag), 4431 ap->a_cred)); 4432 } 4433 4434 #if __FreeBSD_version >= 1300102 4435 /* 4436 * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see 4437 * the comment above cache_fplookup for details. 4438 */ 4439 static int 4440 zfs_freebsd_fplookup_vexec(struct vop_fplookup_vexec_args *v) 4441 { 4442 vnode_t *vp; 4443 znode_t *zp; 4444 uint64_t pflags; 4445 4446 vp = v->a_vp; 4447 zp = VTOZ_SMR(vp); 4448 if (__predict_false(zp == NULL)) 4449 return (EAGAIN); 4450 pflags = atomic_load_64(&zp->z_pflags); 4451 if (pflags & ZFS_AV_QUARANTINED) 4452 return (EAGAIN); 4453 if (pflags & ZFS_XATTR) 4454 return (EAGAIN); 4455 if ((pflags & ZFS_NO_EXECS_DENIED) == 0) 4456 return (EAGAIN); 4457 return (0); 4458 } 4459 #endif 4460 4461 #if __FreeBSD_version >= 1300139 4462 static int 4463 zfs_freebsd_fplookup_symlink(struct vop_fplookup_symlink_args *v) 4464 { 4465 vnode_t *vp; 4466 znode_t *zp; 4467 char *target; 4468 4469 vp = v->a_vp; 4470 zp = VTOZ_SMR(vp); 4471 if (__predict_false(zp == NULL)) { 4472 return (EAGAIN); 4473 } 4474 4475 target = atomic_load_consume_ptr(&zp->z_cached_symlink); 4476 if (target == NULL) { 4477 return (EAGAIN); 4478 } 4479 return (cache_symlink_resolve(v->a_fpl, target, strlen(target))); 4480 } 4481 #endif 4482 4483 #ifndef _SYS_SYSPROTO_H_ 4484 struct vop_access_args { 4485 struct vnode *a_vp; 4486 accmode_t a_accmode; 4487 struct ucred *a_cred; 4488 struct thread *a_td; 4489 }; 4490 #endif 4491 4492 static int 4493 zfs_freebsd_access(struct vop_access_args *ap) 4494 { 4495 vnode_t *vp = ap->a_vp; 4496 znode_t *zp = VTOZ(vp); 4497 accmode_t accmode; 4498 int error = 0; 4499 4500 4501 if (ap->a_accmode == VEXEC) { 4502 if (zfs_fastaccesschk_execute(zp, ap->a_cred) == 0) 4503 return (0); 4504 } 4505 4506 /* 4507 * ZFS itself only knowns about VREAD, VWRITE, VEXEC and VAPPEND, 4508 */ 4509 accmode = ap->a_accmode & (VREAD|VWRITE|VEXEC|VAPPEND); 4510 if (accmode != 0) 4511 error = zfs_access(zp, accmode, 0, ap->a_cred); 4512 4513 /* 4514 * VADMIN has to be handled by vaccess(). 4515 */ 4516 if (error == 0) { 4517 accmode = ap->a_accmode & ~(VREAD|VWRITE|VEXEC|VAPPEND); 4518 if (accmode != 0) { 4519 #if __FreeBSD_version >= 1300105 4520 error = vaccess(vp->v_type, zp->z_mode, zp->z_uid, 4521 zp->z_gid, accmode, ap->a_cred); 4522 #else 4523 error = vaccess(vp->v_type, zp->z_mode, zp->z_uid, 4524 zp->z_gid, accmode, ap->a_cred, NULL); 4525 #endif 4526 } 4527 } 4528 4529 /* 4530 * For VEXEC, ensure that at least one execute bit is set for 4531 * non-directories. 4532 */ 4533 if (error == 0 && (ap->a_accmode & VEXEC) != 0 && vp->v_type != VDIR && 4534 (zp->z_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) { 4535 error = EACCES; 4536 } 4537 4538 return (error); 4539 } 4540 4541 #ifndef _SYS_SYSPROTO_H_ 4542 struct vop_lookup_args { 4543 struct vnode *a_dvp; 4544 struct vnode **a_vpp; 4545 struct componentname *a_cnp; 4546 }; 4547 #endif 4548 4549 static int 4550 zfs_freebsd_lookup(struct vop_lookup_args *ap, boolean_t cached) 4551 { 4552 struct componentname *cnp = ap->a_cnp; 4553 char nm[NAME_MAX + 1]; 4554 4555 ASSERT3U(cnp->cn_namelen, <, sizeof (nm)); 4556 strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof (nm))); 4557 4558 return (zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop, 4559 cnp->cn_cred, 0, cached)); 4560 } 4561 4562 static int 4563 zfs_freebsd_cachedlookup(struct vop_cachedlookup_args *ap) 4564 { 4565 4566 return (zfs_freebsd_lookup((struct vop_lookup_args *)ap, B_TRUE)); 4567 } 4568 4569 #ifndef _SYS_SYSPROTO_H_ 4570 struct vop_lookup_args { 4571 struct vnode *a_dvp; 4572 struct vnode **a_vpp; 4573 struct componentname *a_cnp; 4574 }; 4575 #endif 4576 4577 static int 4578 zfs_cache_lookup(struct vop_lookup_args *ap) 4579 { 4580 zfsvfs_t *zfsvfs; 4581 4582 zfsvfs = ap->a_dvp->v_mount->mnt_data; 4583 if (zfsvfs->z_use_namecache) 4584 return (vfs_cache_lookup(ap)); 4585 else 4586 return (zfs_freebsd_lookup(ap, B_FALSE)); 4587 } 4588 4589 #ifndef _SYS_SYSPROTO_H_ 4590 struct vop_create_args { 4591 struct vnode *a_dvp; 4592 struct vnode **a_vpp; 4593 struct componentname *a_cnp; 4594 struct vattr *a_vap; 4595 }; 4596 #endif 4597 4598 static int 4599 zfs_freebsd_create(struct vop_create_args *ap) 4600 { 4601 zfsvfs_t *zfsvfs; 4602 struct componentname *cnp = ap->a_cnp; 4603 vattr_t *vap = ap->a_vap; 4604 znode_t *zp = NULL; 4605 int rc, mode; 4606 4607 #if __FreeBSD_version < 1400068 4608 ASSERT(cnp->cn_flags & SAVENAME); 4609 #endif 4610 4611 vattr_init_mask(vap); 4612 mode = vap->va_mode & ALLPERMS; 4613 zfsvfs = ap->a_dvp->v_mount->mnt_data; 4614 *ap->a_vpp = NULL; 4615 4616 rc = zfs_create(VTOZ(ap->a_dvp), cnp->cn_nameptr, vap, 0, mode, 4617 &zp, cnp->cn_cred, 0 /* flag */, NULL /* vsecattr */, NULL); 4618 if (rc == 0) 4619 *ap->a_vpp = ZTOV(zp); 4620 if (zfsvfs->z_use_namecache && 4621 rc == 0 && (cnp->cn_flags & MAKEENTRY) != 0) 4622 cache_enter(ap->a_dvp, *ap->a_vpp, cnp); 4623 4624 return (rc); 4625 } 4626 4627 #ifndef _SYS_SYSPROTO_H_ 4628 struct vop_remove_args { 4629 struct vnode *a_dvp; 4630 struct vnode *a_vp; 4631 struct componentname *a_cnp; 4632 }; 4633 #endif 4634 4635 static int 4636 zfs_freebsd_remove(struct vop_remove_args *ap) 4637 { 4638 4639 #if __FreeBSD_version < 1400068 4640 ASSERT(ap->a_cnp->cn_flags & SAVENAME); 4641 #endif 4642 4643 return (zfs_remove_(ap->a_dvp, ap->a_vp, ap->a_cnp->cn_nameptr, 4644 ap->a_cnp->cn_cred)); 4645 } 4646 4647 #ifndef _SYS_SYSPROTO_H_ 4648 struct vop_mkdir_args { 4649 struct vnode *a_dvp; 4650 struct vnode **a_vpp; 4651 struct componentname *a_cnp; 4652 struct vattr *a_vap; 4653 }; 4654 #endif 4655 4656 static int 4657 zfs_freebsd_mkdir(struct vop_mkdir_args *ap) 4658 { 4659 vattr_t *vap = ap->a_vap; 4660 znode_t *zp = NULL; 4661 int rc; 4662 4663 #if __FreeBSD_version < 1400068 4664 ASSERT(ap->a_cnp->cn_flags & SAVENAME); 4665 #endif 4666 4667 vattr_init_mask(vap); 4668 *ap->a_vpp = NULL; 4669 4670 rc = zfs_mkdir(VTOZ(ap->a_dvp), ap->a_cnp->cn_nameptr, vap, &zp, 4671 ap->a_cnp->cn_cred, 0, NULL, NULL); 4672 4673 if (rc == 0) 4674 *ap->a_vpp = ZTOV(zp); 4675 return (rc); 4676 } 4677 4678 #ifndef _SYS_SYSPROTO_H_ 4679 struct vop_rmdir_args { 4680 struct vnode *a_dvp; 4681 struct vnode *a_vp; 4682 struct componentname *a_cnp; 4683 }; 4684 #endif 4685 4686 static int 4687 zfs_freebsd_rmdir(struct vop_rmdir_args *ap) 4688 { 4689 struct componentname *cnp = ap->a_cnp; 4690 4691 #if __FreeBSD_version < 1400068 4692 ASSERT(cnp->cn_flags & SAVENAME); 4693 #endif 4694 4695 return (zfs_rmdir_(ap->a_dvp, ap->a_vp, cnp->cn_nameptr, cnp->cn_cred)); 4696 } 4697 4698 #ifndef _SYS_SYSPROTO_H_ 4699 struct vop_readdir_args { 4700 struct vnode *a_vp; 4701 struct uio *a_uio; 4702 struct ucred *a_cred; 4703 int *a_eofflag; 4704 int *a_ncookies; 4705 cookie_t **a_cookies; 4706 }; 4707 #endif 4708 4709 static int 4710 zfs_freebsd_readdir(struct vop_readdir_args *ap) 4711 { 4712 zfs_uio_t uio; 4713 zfs_uio_init(&uio, ap->a_uio); 4714 return (zfs_readdir(ap->a_vp, &uio, ap->a_cred, ap->a_eofflag, 4715 ap->a_ncookies, ap->a_cookies)); 4716 } 4717 4718 #ifndef _SYS_SYSPROTO_H_ 4719 struct vop_fsync_args { 4720 struct vnode *a_vp; 4721 int a_waitfor; 4722 struct thread *a_td; 4723 }; 4724 #endif 4725 4726 static int 4727 zfs_freebsd_fsync(struct vop_fsync_args *ap) 4728 { 4729 4730 return (zfs_fsync(VTOZ(ap->a_vp), 0, ap->a_td->td_ucred)); 4731 } 4732 4733 #ifndef _SYS_SYSPROTO_H_ 4734 struct vop_getattr_args { 4735 struct vnode *a_vp; 4736 struct vattr *a_vap; 4737 struct ucred *a_cred; 4738 }; 4739 #endif 4740 4741 static int 4742 zfs_freebsd_getattr(struct vop_getattr_args *ap) 4743 { 4744 vattr_t *vap = ap->a_vap; 4745 xvattr_t xvap; 4746 ulong_t fflags = 0; 4747 int error; 4748 4749 xva_init(&xvap); 4750 xvap.xva_vattr = *vap; 4751 xvap.xva_vattr.va_mask |= AT_XVATTR; 4752 4753 /* Convert chflags into ZFS-type flags. */ 4754 /* XXX: what about SF_SETTABLE?. */ 4755 XVA_SET_REQ(&xvap, XAT_IMMUTABLE); 4756 XVA_SET_REQ(&xvap, XAT_APPENDONLY); 4757 XVA_SET_REQ(&xvap, XAT_NOUNLINK); 4758 XVA_SET_REQ(&xvap, XAT_NODUMP); 4759 XVA_SET_REQ(&xvap, XAT_READONLY); 4760 XVA_SET_REQ(&xvap, XAT_ARCHIVE); 4761 XVA_SET_REQ(&xvap, XAT_SYSTEM); 4762 XVA_SET_REQ(&xvap, XAT_HIDDEN); 4763 XVA_SET_REQ(&xvap, XAT_REPARSE); 4764 XVA_SET_REQ(&xvap, XAT_OFFLINE); 4765 XVA_SET_REQ(&xvap, XAT_SPARSE); 4766 4767 error = zfs_getattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred); 4768 if (error != 0) 4769 return (error); 4770 4771 /* Convert ZFS xattr into chflags. */ 4772 #define FLAG_CHECK(fflag, xflag, xfield) do { \ 4773 if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0) \ 4774 fflags |= (fflag); \ 4775 } while (0) 4776 FLAG_CHECK(SF_IMMUTABLE, XAT_IMMUTABLE, 4777 xvap.xva_xoptattrs.xoa_immutable); 4778 FLAG_CHECK(SF_APPEND, XAT_APPENDONLY, 4779 xvap.xva_xoptattrs.xoa_appendonly); 4780 FLAG_CHECK(SF_NOUNLINK, XAT_NOUNLINK, 4781 xvap.xva_xoptattrs.xoa_nounlink); 4782 FLAG_CHECK(UF_ARCHIVE, XAT_ARCHIVE, 4783 xvap.xva_xoptattrs.xoa_archive); 4784 FLAG_CHECK(UF_NODUMP, XAT_NODUMP, 4785 xvap.xva_xoptattrs.xoa_nodump); 4786 FLAG_CHECK(UF_READONLY, XAT_READONLY, 4787 xvap.xva_xoptattrs.xoa_readonly); 4788 FLAG_CHECK(UF_SYSTEM, XAT_SYSTEM, 4789 xvap.xva_xoptattrs.xoa_system); 4790 FLAG_CHECK(UF_HIDDEN, XAT_HIDDEN, 4791 xvap.xva_xoptattrs.xoa_hidden); 4792 FLAG_CHECK(UF_REPARSE, XAT_REPARSE, 4793 xvap.xva_xoptattrs.xoa_reparse); 4794 FLAG_CHECK(UF_OFFLINE, XAT_OFFLINE, 4795 xvap.xva_xoptattrs.xoa_offline); 4796 FLAG_CHECK(UF_SPARSE, XAT_SPARSE, 4797 xvap.xva_xoptattrs.xoa_sparse); 4798 4799 #undef FLAG_CHECK 4800 *vap = xvap.xva_vattr; 4801 vap->va_flags = fflags; 4802 return (0); 4803 } 4804 4805 #ifndef _SYS_SYSPROTO_H_ 4806 struct vop_setattr_args { 4807 struct vnode *a_vp; 4808 struct vattr *a_vap; 4809 struct ucred *a_cred; 4810 }; 4811 #endif 4812 4813 static int 4814 zfs_freebsd_setattr(struct vop_setattr_args *ap) 4815 { 4816 vnode_t *vp = ap->a_vp; 4817 vattr_t *vap = ap->a_vap; 4818 cred_t *cred = ap->a_cred; 4819 xvattr_t xvap; 4820 ulong_t fflags; 4821 uint64_t zflags; 4822 4823 vattr_init_mask(vap); 4824 vap->va_mask &= ~AT_NOSET; 4825 4826 xva_init(&xvap); 4827 xvap.xva_vattr = *vap; 4828 4829 zflags = VTOZ(vp)->z_pflags; 4830 4831 if (vap->va_flags != VNOVAL) { 4832 zfsvfs_t *zfsvfs = VTOZ(vp)->z_zfsvfs; 4833 int error; 4834 4835 if (zfsvfs->z_use_fuids == B_FALSE) 4836 return (EOPNOTSUPP); 4837 4838 fflags = vap->va_flags; 4839 /* 4840 * XXX KDM 4841 * We need to figure out whether it makes sense to allow 4842 * UF_REPARSE through, since we don't really have other 4843 * facilities to handle reparse points and zfs_setattr() 4844 * doesn't currently allow setting that attribute anyway. 4845 */ 4846 if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_ARCHIVE| 4847 UF_NODUMP|UF_SYSTEM|UF_HIDDEN|UF_READONLY|UF_REPARSE| 4848 UF_OFFLINE|UF_SPARSE)) != 0) 4849 return (EOPNOTSUPP); 4850 /* 4851 * Unprivileged processes are not permitted to unset system 4852 * flags, or modify flags if any system flags are set. 4853 * Privileged non-jail processes may not modify system flags 4854 * if securelevel > 0 and any existing system flags are set. 4855 * Privileged jail processes behave like privileged non-jail 4856 * processes if the PR_ALLOW_CHFLAGS permission bit is set; 4857 * otherwise, they behave like unprivileged processes. 4858 */ 4859 if (secpolicy_fs_owner(vp->v_mount, cred) == 0 || 4860 spl_priv_check_cred(cred, PRIV_VFS_SYSFLAGS) == 0) { 4861 if (zflags & 4862 (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) { 4863 error = securelevel_gt(cred, 0); 4864 if (error != 0) 4865 return (error); 4866 } 4867 } else { 4868 /* 4869 * Callers may only modify the file flags on 4870 * objects they have VADMIN rights for. 4871 */ 4872 if ((error = VOP_ACCESS(vp, VADMIN, cred, 4873 curthread)) != 0) 4874 return (error); 4875 if (zflags & 4876 (ZFS_IMMUTABLE | ZFS_APPENDONLY | 4877 ZFS_NOUNLINK)) { 4878 return (EPERM); 4879 } 4880 if (fflags & 4881 (SF_IMMUTABLE | SF_APPEND | SF_NOUNLINK)) { 4882 return (EPERM); 4883 } 4884 } 4885 4886 #define FLAG_CHANGE(fflag, zflag, xflag, xfield) do { \ 4887 if (((fflags & (fflag)) && !(zflags & (zflag))) || \ 4888 ((zflags & (zflag)) && !(fflags & (fflag)))) { \ 4889 XVA_SET_REQ(&xvap, (xflag)); \ 4890 (xfield) = ((fflags & (fflag)) != 0); \ 4891 } \ 4892 } while (0) 4893 /* Convert chflags into ZFS-type flags. */ 4894 /* XXX: what about SF_SETTABLE?. */ 4895 FLAG_CHANGE(SF_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE, 4896 xvap.xva_xoptattrs.xoa_immutable); 4897 FLAG_CHANGE(SF_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY, 4898 xvap.xva_xoptattrs.xoa_appendonly); 4899 FLAG_CHANGE(SF_NOUNLINK, ZFS_NOUNLINK, XAT_NOUNLINK, 4900 xvap.xva_xoptattrs.xoa_nounlink); 4901 FLAG_CHANGE(UF_ARCHIVE, ZFS_ARCHIVE, XAT_ARCHIVE, 4902 xvap.xva_xoptattrs.xoa_archive); 4903 FLAG_CHANGE(UF_NODUMP, ZFS_NODUMP, XAT_NODUMP, 4904 xvap.xva_xoptattrs.xoa_nodump); 4905 FLAG_CHANGE(UF_READONLY, ZFS_READONLY, XAT_READONLY, 4906 xvap.xva_xoptattrs.xoa_readonly); 4907 FLAG_CHANGE(UF_SYSTEM, ZFS_SYSTEM, XAT_SYSTEM, 4908 xvap.xva_xoptattrs.xoa_system); 4909 FLAG_CHANGE(UF_HIDDEN, ZFS_HIDDEN, XAT_HIDDEN, 4910 xvap.xva_xoptattrs.xoa_hidden); 4911 FLAG_CHANGE(UF_REPARSE, ZFS_REPARSE, XAT_REPARSE, 4912 xvap.xva_xoptattrs.xoa_reparse); 4913 FLAG_CHANGE(UF_OFFLINE, ZFS_OFFLINE, XAT_OFFLINE, 4914 xvap.xva_xoptattrs.xoa_offline); 4915 FLAG_CHANGE(UF_SPARSE, ZFS_SPARSE, XAT_SPARSE, 4916 xvap.xva_xoptattrs.xoa_sparse); 4917 #undef FLAG_CHANGE 4918 } 4919 if (vap->va_birthtime.tv_sec != VNOVAL) { 4920 xvap.xva_vattr.va_mask |= AT_XVATTR; 4921 XVA_SET_REQ(&xvap, XAT_CREATETIME); 4922 } 4923 return (zfs_setattr(VTOZ(vp), (vattr_t *)&xvap, 0, cred, NULL)); 4924 } 4925 4926 #ifndef _SYS_SYSPROTO_H_ 4927 struct vop_rename_args { 4928 struct vnode *a_fdvp; 4929 struct vnode *a_fvp; 4930 struct componentname *a_fcnp; 4931 struct vnode *a_tdvp; 4932 struct vnode *a_tvp; 4933 struct componentname *a_tcnp; 4934 }; 4935 #endif 4936 4937 static int 4938 zfs_freebsd_rename(struct vop_rename_args *ap) 4939 { 4940 vnode_t *fdvp = ap->a_fdvp; 4941 vnode_t *fvp = ap->a_fvp; 4942 vnode_t *tdvp = ap->a_tdvp; 4943 vnode_t *tvp = ap->a_tvp; 4944 int error; 4945 4946 #if __FreeBSD_version < 1400068 4947 ASSERT(ap->a_fcnp->cn_flags & (SAVENAME|SAVESTART)); 4948 ASSERT(ap->a_tcnp->cn_flags & (SAVENAME|SAVESTART)); 4949 #endif 4950 4951 error = zfs_do_rename(fdvp, &fvp, ap->a_fcnp, tdvp, &tvp, 4952 ap->a_tcnp, ap->a_fcnp->cn_cred); 4953 4954 vrele(fdvp); 4955 vrele(fvp); 4956 vrele(tdvp); 4957 if (tvp != NULL) 4958 vrele(tvp); 4959 4960 return (error); 4961 } 4962 4963 #ifndef _SYS_SYSPROTO_H_ 4964 struct vop_symlink_args { 4965 struct vnode *a_dvp; 4966 struct vnode **a_vpp; 4967 struct componentname *a_cnp; 4968 struct vattr *a_vap; 4969 char *a_target; 4970 }; 4971 #endif 4972 4973 static int 4974 zfs_freebsd_symlink(struct vop_symlink_args *ap) 4975 { 4976 struct componentname *cnp = ap->a_cnp; 4977 vattr_t *vap = ap->a_vap; 4978 znode_t *zp = NULL; 4979 #if __FreeBSD_version >= 1300139 4980 char *symlink; 4981 size_t symlink_len; 4982 #endif 4983 int rc; 4984 4985 #if __FreeBSD_version < 1400068 4986 ASSERT(cnp->cn_flags & SAVENAME); 4987 #endif 4988 4989 vap->va_type = VLNK; /* FreeBSD: Syscall only sets va_mode. */ 4990 vattr_init_mask(vap); 4991 *ap->a_vpp = NULL; 4992 4993 rc = zfs_symlink(VTOZ(ap->a_dvp), cnp->cn_nameptr, vap, 4994 ap->a_target, &zp, cnp->cn_cred, 0 /* flags */, NULL); 4995 if (rc == 0) { 4996 *ap->a_vpp = ZTOV(zp); 4997 ASSERT_VOP_ELOCKED(ZTOV(zp), __func__); 4998 #if __FreeBSD_version >= 1300139 4999 MPASS(zp->z_cached_symlink == NULL); 5000 symlink_len = strlen(ap->a_target); 5001 symlink = cache_symlink_alloc(symlink_len + 1, M_WAITOK); 5002 if (symlink != NULL) { 5003 memcpy(symlink, ap->a_target, symlink_len); 5004 symlink[symlink_len] = '\0'; 5005 atomic_store_rel_ptr((uintptr_t *)&zp->z_cached_symlink, 5006 (uintptr_t)symlink); 5007 } 5008 #endif 5009 } 5010 return (rc); 5011 } 5012 5013 #ifndef _SYS_SYSPROTO_H_ 5014 struct vop_readlink_args { 5015 struct vnode *a_vp; 5016 struct uio *a_uio; 5017 struct ucred *a_cred; 5018 }; 5019 #endif 5020 5021 static int 5022 zfs_freebsd_readlink(struct vop_readlink_args *ap) 5023 { 5024 zfs_uio_t uio; 5025 int error; 5026 #if __FreeBSD_version >= 1300139 5027 znode_t *zp = VTOZ(ap->a_vp); 5028 char *symlink, *base; 5029 size_t symlink_len; 5030 bool trycache; 5031 #endif 5032 5033 zfs_uio_init(&uio, ap->a_uio); 5034 #if __FreeBSD_version >= 1300139 5035 trycache = false; 5036 if (zfs_uio_segflg(&uio) == UIO_SYSSPACE && 5037 zfs_uio_iovcnt(&uio) == 1) { 5038 base = zfs_uio_iovbase(&uio, 0); 5039 symlink_len = zfs_uio_iovlen(&uio, 0); 5040 trycache = true; 5041 } 5042 #endif 5043 error = zfs_readlink(ap->a_vp, &uio, ap->a_cred, NULL); 5044 #if __FreeBSD_version >= 1300139 5045 if (atomic_load_ptr(&zp->z_cached_symlink) != NULL || 5046 error != 0 || !trycache) { 5047 return (error); 5048 } 5049 symlink_len -= zfs_uio_resid(&uio); 5050 symlink = cache_symlink_alloc(symlink_len + 1, M_WAITOK); 5051 if (symlink != NULL) { 5052 memcpy(symlink, base, symlink_len); 5053 symlink[symlink_len] = '\0'; 5054 if (!atomic_cmpset_rel_ptr((uintptr_t *)&zp->z_cached_symlink, 5055 (uintptr_t)NULL, (uintptr_t)symlink)) { 5056 cache_symlink_free(symlink, symlink_len + 1); 5057 } 5058 } 5059 #endif 5060 return (error); 5061 } 5062 5063 #ifndef _SYS_SYSPROTO_H_ 5064 struct vop_link_args { 5065 struct vnode *a_tdvp; 5066 struct vnode *a_vp; 5067 struct componentname *a_cnp; 5068 }; 5069 #endif 5070 5071 static int 5072 zfs_freebsd_link(struct vop_link_args *ap) 5073 { 5074 struct componentname *cnp = ap->a_cnp; 5075 vnode_t *vp = ap->a_vp; 5076 vnode_t *tdvp = ap->a_tdvp; 5077 5078 if (tdvp->v_mount != vp->v_mount) 5079 return (EXDEV); 5080 5081 #if __FreeBSD_version < 1400068 5082 ASSERT(cnp->cn_flags & SAVENAME); 5083 #endif 5084 5085 return (zfs_link(VTOZ(tdvp), VTOZ(vp), 5086 cnp->cn_nameptr, cnp->cn_cred, 0)); 5087 } 5088 5089 #ifndef _SYS_SYSPROTO_H_ 5090 struct vop_inactive_args { 5091 struct vnode *a_vp; 5092 struct thread *a_td; 5093 }; 5094 #endif 5095 5096 static int 5097 zfs_freebsd_inactive(struct vop_inactive_args *ap) 5098 { 5099 vnode_t *vp = ap->a_vp; 5100 5101 #if __FreeBSD_version >= 1300123 5102 zfs_inactive(vp, curthread->td_ucred, NULL); 5103 #else 5104 zfs_inactive(vp, ap->a_td->td_ucred, NULL); 5105 #endif 5106 return (0); 5107 } 5108 5109 #if __FreeBSD_version >= 1300042 5110 #ifndef _SYS_SYSPROTO_H_ 5111 struct vop_need_inactive_args { 5112 struct vnode *a_vp; 5113 struct thread *a_td; 5114 }; 5115 #endif 5116 5117 static int 5118 zfs_freebsd_need_inactive(struct vop_need_inactive_args *ap) 5119 { 5120 vnode_t *vp = ap->a_vp; 5121 znode_t *zp = VTOZ(vp); 5122 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5123 int need; 5124 5125 if (vn_need_pageq_flush(vp)) 5126 return (1); 5127 5128 if (!ZFS_TEARDOWN_INACTIVE_TRY_ENTER_READ(zfsvfs)) 5129 return (1); 5130 need = (zp->z_sa_hdl == NULL || zp->z_unlinked || zp->z_atime_dirty); 5131 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs); 5132 5133 return (need); 5134 } 5135 #endif 5136 5137 #ifndef _SYS_SYSPROTO_H_ 5138 struct vop_reclaim_args { 5139 struct vnode *a_vp; 5140 struct thread *a_td; 5141 }; 5142 #endif 5143 5144 static int 5145 zfs_freebsd_reclaim(struct vop_reclaim_args *ap) 5146 { 5147 vnode_t *vp = ap->a_vp; 5148 znode_t *zp = VTOZ(vp); 5149 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5150 5151 ASSERT3P(zp, !=, NULL); 5152 5153 #if __FreeBSD_version < 1300042 5154 /* Destroy the vm object and flush associated pages. */ 5155 vnode_destroy_vobject(vp); 5156 #endif 5157 /* 5158 * z_teardown_inactive_lock protects from a race with 5159 * zfs_znode_dmu_fini in zfsvfs_teardown during 5160 * force unmount. 5161 */ 5162 ZFS_TEARDOWN_INACTIVE_ENTER_READ(zfsvfs); 5163 if (zp->z_sa_hdl == NULL) 5164 zfs_znode_free(zp); 5165 else 5166 zfs_zinactive(zp); 5167 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs); 5168 5169 vp->v_data = NULL; 5170 return (0); 5171 } 5172 5173 #ifndef _SYS_SYSPROTO_H_ 5174 struct vop_fid_args { 5175 struct vnode *a_vp; 5176 struct fid *a_fid; 5177 }; 5178 #endif 5179 5180 static int 5181 zfs_freebsd_fid(struct vop_fid_args *ap) 5182 { 5183 5184 return (zfs_fid(ap->a_vp, (void *)ap->a_fid, NULL)); 5185 } 5186 5187 5188 #ifndef _SYS_SYSPROTO_H_ 5189 struct vop_pathconf_args { 5190 struct vnode *a_vp; 5191 int a_name; 5192 register_t *a_retval; 5193 } *ap; 5194 #endif 5195 5196 static int 5197 zfs_freebsd_pathconf(struct vop_pathconf_args *ap) 5198 { 5199 ulong_t val; 5200 int error; 5201 5202 error = zfs_pathconf(ap->a_vp, ap->a_name, &val, 5203 curthread->td_ucred, NULL); 5204 if (error == 0) { 5205 *ap->a_retval = val; 5206 return (error); 5207 } 5208 if (error != EOPNOTSUPP) 5209 return (error); 5210 5211 switch (ap->a_name) { 5212 case _PC_NAME_MAX: 5213 *ap->a_retval = NAME_MAX; 5214 return (0); 5215 #if __FreeBSD_version >= 1400032 5216 case _PC_DEALLOC_PRESENT: 5217 *ap->a_retval = 1; 5218 return (0); 5219 #endif 5220 case _PC_PIPE_BUF: 5221 if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) { 5222 *ap->a_retval = PIPE_BUF; 5223 return (0); 5224 } 5225 return (EINVAL); 5226 default: 5227 return (vop_stdpathconf(ap)); 5228 } 5229 } 5230 5231 static int zfs_xattr_compat = 1; 5232 5233 static int 5234 zfs_check_attrname(const char *name) 5235 { 5236 /* We don't allow '/' character in attribute name. */ 5237 if (strchr(name, '/') != NULL) 5238 return (SET_ERROR(EINVAL)); 5239 /* We don't allow attribute names that start with a namespace prefix. */ 5240 if (ZFS_XA_NS_PREFIX_FORBIDDEN(name)) 5241 return (SET_ERROR(EINVAL)); 5242 return (0); 5243 } 5244 5245 /* 5246 * FreeBSD's extended attributes namespace defines file name prefix for ZFS' 5247 * extended attribute name: 5248 * 5249 * NAMESPACE XATTR_COMPAT PREFIX 5250 * system * freebsd:system: 5251 * user 1 (none, can be used to access ZFS 5252 * fsattr(5) attributes created on Solaris) 5253 * user 0 user. 5254 */ 5255 static int 5256 zfs_create_attrname(int attrnamespace, const char *name, char *attrname, 5257 size_t size, boolean_t compat) 5258 { 5259 const char *namespace, *prefix, *suffix; 5260 5261 memset(attrname, 0, size); 5262 5263 switch (attrnamespace) { 5264 case EXTATTR_NAMESPACE_USER: 5265 if (compat) { 5266 /* 5267 * This is the default namespace by which we can access 5268 * all attributes created on Solaris. 5269 */ 5270 prefix = namespace = suffix = ""; 5271 } else { 5272 /* 5273 * This is compatible with the user namespace encoding 5274 * on Linux prior to xattr_compat, but nothing 5275 * else. 5276 */ 5277 prefix = ""; 5278 namespace = "user"; 5279 suffix = "."; 5280 } 5281 break; 5282 case EXTATTR_NAMESPACE_SYSTEM: 5283 prefix = "freebsd:"; 5284 namespace = EXTATTR_NAMESPACE_SYSTEM_STRING; 5285 suffix = ":"; 5286 break; 5287 case EXTATTR_NAMESPACE_EMPTY: 5288 default: 5289 return (SET_ERROR(EINVAL)); 5290 } 5291 if (snprintf(attrname, size, "%s%s%s%s", prefix, namespace, suffix, 5292 name) >= size) { 5293 return (SET_ERROR(ENAMETOOLONG)); 5294 } 5295 return (0); 5296 } 5297 5298 static int 5299 zfs_ensure_xattr_cached(znode_t *zp) 5300 { 5301 int error = 0; 5302 5303 ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock)); 5304 5305 if (zp->z_xattr_cached != NULL) 5306 return (0); 5307 5308 if (rw_write_held(&zp->z_xattr_lock)) 5309 return (zfs_sa_get_xattr(zp)); 5310 5311 if (!rw_tryupgrade(&zp->z_xattr_lock)) { 5312 rw_exit(&zp->z_xattr_lock); 5313 rw_enter(&zp->z_xattr_lock, RW_WRITER); 5314 } 5315 if (zp->z_xattr_cached == NULL) 5316 error = zfs_sa_get_xattr(zp); 5317 rw_downgrade(&zp->z_xattr_lock); 5318 return (error); 5319 } 5320 5321 #ifndef _SYS_SYSPROTO_H_ 5322 struct vop_getextattr { 5323 IN struct vnode *a_vp; 5324 IN int a_attrnamespace; 5325 IN const char *a_name; 5326 INOUT struct uio *a_uio; 5327 OUT size_t *a_size; 5328 IN struct ucred *a_cred; 5329 IN struct thread *a_td; 5330 }; 5331 #endif 5332 5333 static int 5334 zfs_getextattr_dir(struct vop_getextattr_args *ap, const char *attrname) 5335 { 5336 struct thread *td = ap->a_td; 5337 struct nameidata nd; 5338 struct vattr va; 5339 vnode_t *xvp = NULL, *vp; 5340 int error, flags; 5341 5342 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, 5343 LOOKUP_XATTR, B_FALSE); 5344 if (error != 0) 5345 return (error); 5346 5347 flags = FREAD; 5348 #if __FreeBSD_version < 1400043 5349 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, 5350 xvp, td); 5351 #else 5352 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp); 5353 #endif 5354 error = vn_open_cred(&nd, &flags, 0, VN_OPEN_INVFS, ap->a_cred, NULL); 5355 if (error != 0) 5356 return (SET_ERROR(error)); 5357 vp = nd.ni_vp; 5358 NDFREE_PNBUF(&nd); 5359 5360 if (ap->a_size != NULL) { 5361 error = VOP_GETATTR(vp, &va, ap->a_cred); 5362 if (error == 0) 5363 *ap->a_size = (size_t)va.va_size; 5364 } else if (ap->a_uio != NULL) 5365 error = VOP_READ(vp, ap->a_uio, IO_UNIT, ap->a_cred); 5366 5367 VOP_UNLOCK1(vp); 5368 vn_close(vp, flags, ap->a_cred, td); 5369 return (error); 5370 } 5371 5372 static int 5373 zfs_getextattr_sa(struct vop_getextattr_args *ap, const char *attrname) 5374 { 5375 znode_t *zp = VTOZ(ap->a_vp); 5376 uchar_t *nv_value; 5377 uint_t nv_size; 5378 int error; 5379 5380 error = zfs_ensure_xattr_cached(zp); 5381 if (error != 0) 5382 return (error); 5383 5384 ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock)); 5385 ASSERT3P(zp->z_xattr_cached, !=, NULL); 5386 5387 error = nvlist_lookup_byte_array(zp->z_xattr_cached, attrname, 5388 &nv_value, &nv_size); 5389 if (error != 0) 5390 return (SET_ERROR(error)); 5391 5392 if (ap->a_size != NULL) 5393 *ap->a_size = nv_size; 5394 else if (ap->a_uio != NULL) 5395 error = uiomove(nv_value, nv_size, ap->a_uio); 5396 if (error != 0) 5397 return (SET_ERROR(error)); 5398 5399 return (0); 5400 } 5401 5402 static int 5403 zfs_getextattr_impl(struct vop_getextattr_args *ap, boolean_t compat) 5404 { 5405 znode_t *zp = VTOZ(ap->a_vp); 5406 zfsvfs_t *zfsvfs = ZTOZSB(zp); 5407 char attrname[EXTATTR_MAXNAMELEN+1]; 5408 int error; 5409 5410 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 5411 sizeof (attrname), compat); 5412 if (error != 0) 5413 return (error); 5414 5415 error = ENOENT; 5416 if (zfsvfs->z_use_sa && zp->z_is_sa) 5417 error = zfs_getextattr_sa(ap, attrname); 5418 if (error == ENOENT) 5419 error = zfs_getextattr_dir(ap, attrname); 5420 return (error); 5421 } 5422 5423 /* 5424 * Vnode operation to retrieve a named extended attribute. 5425 */ 5426 static int 5427 zfs_getextattr(struct vop_getextattr_args *ap) 5428 { 5429 znode_t *zp = VTOZ(ap->a_vp); 5430 zfsvfs_t *zfsvfs = ZTOZSB(zp); 5431 int error; 5432 5433 /* 5434 * If the xattr property is off, refuse the request. 5435 */ 5436 if (!(zfsvfs->z_flags & ZSB_XATTR)) 5437 return (SET_ERROR(EOPNOTSUPP)); 5438 5439 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5440 ap->a_cred, ap->a_td, VREAD); 5441 if (error != 0) 5442 return (SET_ERROR(error)); 5443 5444 error = zfs_check_attrname(ap->a_name); 5445 if (error != 0) 5446 return (error); 5447 5448 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0) 5449 return (error); 5450 error = ENOENT; 5451 rw_enter(&zp->z_xattr_lock, RW_READER); 5452 5453 error = zfs_getextattr_impl(ap, zfs_xattr_compat); 5454 if ((error == ENOENT || error == ENOATTR) && 5455 ap->a_attrnamespace == EXTATTR_NAMESPACE_USER) { 5456 /* 5457 * Fall back to the alternate namespace format if we failed to 5458 * find a user xattr. 5459 */ 5460 error = zfs_getextattr_impl(ap, !zfs_xattr_compat); 5461 } 5462 5463 rw_exit(&zp->z_xattr_lock); 5464 zfs_exit(zfsvfs, FTAG); 5465 if (error == ENOENT) 5466 error = SET_ERROR(ENOATTR); 5467 return (error); 5468 } 5469 5470 #ifndef _SYS_SYSPROTO_H_ 5471 struct vop_deleteextattr { 5472 IN struct vnode *a_vp; 5473 IN int a_attrnamespace; 5474 IN const char *a_name; 5475 IN struct ucred *a_cred; 5476 IN struct thread *a_td; 5477 }; 5478 #endif 5479 5480 static int 5481 zfs_deleteextattr_dir(struct vop_deleteextattr_args *ap, const char *attrname) 5482 { 5483 struct nameidata nd; 5484 vnode_t *xvp = NULL, *vp; 5485 int error; 5486 5487 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, 5488 LOOKUP_XATTR, B_FALSE); 5489 if (error != 0) 5490 return (error); 5491 5492 #if __FreeBSD_version < 1400043 5493 NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF, 5494 UIO_SYSSPACE, attrname, xvp, ap->a_td); 5495 #else 5496 NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF, 5497 UIO_SYSSPACE, attrname, xvp); 5498 #endif 5499 error = namei(&nd); 5500 if (error != 0) 5501 return (SET_ERROR(error)); 5502 5503 vp = nd.ni_vp; 5504 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 5505 NDFREE_PNBUF(&nd); 5506 5507 vput(nd.ni_dvp); 5508 if (vp == nd.ni_dvp) 5509 vrele(vp); 5510 else 5511 vput(vp); 5512 5513 return (error); 5514 } 5515 5516 static int 5517 zfs_deleteextattr_sa(struct vop_deleteextattr_args *ap, const char *attrname) 5518 { 5519 znode_t *zp = VTOZ(ap->a_vp); 5520 nvlist_t *nvl; 5521 int error; 5522 5523 error = zfs_ensure_xattr_cached(zp); 5524 if (error != 0) 5525 return (error); 5526 5527 ASSERT(RW_WRITE_HELD(&zp->z_xattr_lock)); 5528 ASSERT3P(zp->z_xattr_cached, !=, NULL); 5529 5530 nvl = zp->z_xattr_cached; 5531 error = nvlist_remove(nvl, attrname, DATA_TYPE_BYTE_ARRAY); 5532 if (error != 0) 5533 error = SET_ERROR(error); 5534 else 5535 error = zfs_sa_set_xattr(zp, attrname, NULL, 0); 5536 if (error != 0) { 5537 zp->z_xattr_cached = NULL; 5538 nvlist_free(nvl); 5539 } 5540 return (error); 5541 } 5542 5543 static int 5544 zfs_deleteextattr_impl(struct vop_deleteextattr_args *ap, boolean_t compat) 5545 { 5546 znode_t *zp = VTOZ(ap->a_vp); 5547 zfsvfs_t *zfsvfs = ZTOZSB(zp); 5548 char attrname[EXTATTR_MAXNAMELEN+1]; 5549 int error; 5550 5551 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 5552 sizeof (attrname), compat); 5553 if (error != 0) 5554 return (error); 5555 5556 error = ENOENT; 5557 if (zfsvfs->z_use_sa && zp->z_is_sa) 5558 error = zfs_deleteextattr_sa(ap, attrname); 5559 if (error == ENOENT) 5560 error = zfs_deleteextattr_dir(ap, attrname); 5561 return (error); 5562 } 5563 5564 /* 5565 * Vnode operation to remove a named attribute. 5566 */ 5567 static int 5568 zfs_deleteextattr(struct vop_deleteextattr_args *ap) 5569 { 5570 znode_t *zp = VTOZ(ap->a_vp); 5571 zfsvfs_t *zfsvfs = ZTOZSB(zp); 5572 int error; 5573 5574 /* 5575 * If the xattr property is off, refuse the request. 5576 */ 5577 if (!(zfsvfs->z_flags & ZSB_XATTR)) 5578 return (SET_ERROR(EOPNOTSUPP)); 5579 5580 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5581 ap->a_cred, ap->a_td, VWRITE); 5582 if (error != 0) 5583 return (SET_ERROR(error)); 5584 5585 error = zfs_check_attrname(ap->a_name); 5586 if (error != 0) 5587 return (error); 5588 5589 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0) 5590 return (error); 5591 rw_enter(&zp->z_xattr_lock, RW_WRITER); 5592 5593 error = zfs_deleteextattr_impl(ap, zfs_xattr_compat); 5594 if ((error == ENOENT || error == ENOATTR) && 5595 ap->a_attrnamespace == EXTATTR_NAMESPACE_USER) { 5596 /* 5597 * Fall back to the alternate namespace format if we failed to 5598 * find a user xattr. 5599 */ 5600 error = zfs_deleteextattr_impl(ap, !zfs_xattr_compat); 5601 } 5602 5603 rw_exit(&zp->z_xattr_lock); 5604 zfs_exit(zfsvfs, FTAG); 5605 if (error == ENOENT) 5606 error = SET_ERROR(ENOATTR); 5607 return (error); 5608 } 5609 5610 #ifndef _SYS_SYSPROTO_H_ 5611 struct vop_setextattr { 5612 IN struct vnode *a_vp; 5613 IN int a_attrnamespace; 5614 IN const char *a_name; 5615 INOUT struct uio *a_uio; 5616 IN struct ucred *a_cred; 5617 IN struct thread *a_td; 5618 }; 5619 #endif 5620 5621 static int 5622 zfs_setextattr_dir(struct vop_setextattr_args *ap, const char *attrname) 5623 { 5624 struct thread *td = ap->a_td; 5625 struct nameidata nd; 5626 struct vattr va; 5627 vnode_t *xvp = NULL, *vp; 5628 int error, flags; 5629 5630 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, 5631 LOOKUP_XATTR | CREATE_XATTR_DIR, B_FALSE); 5632 if (error != 0) 5633 return (error); 5634 5635 flags = FFLAGS(O_WRONLY | O_CREAT); 5636 #if __FreeBSD_version < 1400043 5637 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp, td); 5638 #else 5639 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp); 5640 #endif 5641 error = vn_open_cred(&nd, &flags, 0600, VN_OPEN_INVFS, ap->a_cred, 5642 NULL); 5643 if (error != 0) 5644 return (SET_ERROR(error)); 5645 vp = nd.ni_vp; 5646 NDFREE_PNBUF(&nd); 5647 5648 VATTR_NULL(&va); 5649 va.va_size = 0; 5650 error = VOP_SETATTR(vp, &va, ap->a_cred); 5651 if (error == 0) 5652 VOP_WRITE(vp, ap->a_uio, IO_UNIT, ap->a_cred); 5653 5654 VOP_UNLOCK1(vp); 5655 vn_close(vp, flags, ap->a_cred, td); 5656 return (error); 5657 } 5658 5659 static int 5660 zfs_setextattr_sa(struct vop_setextattr_args *ap, const char *attrname) 5661 { 5662 znode_t *zp = VTOZ(ap->a_vp); 5663 nvlist_t *nvl; 5664 size_t sa_size; 5665 int error; 5666 5667 error = zfs_ensure_xattr_cached(zp); 5668 if (error != 0) 5669 return (error); 5670 5671 ASSERT(RW_WRITE_HELD(&zp->z_xattr_lock)); 5672 ASSERT3P(zp->z_xattr_cached, !=, NULL); 5673 5674 nvl = zp->z_xattr_cached; 5675 size_t entry_size = ap->a_uio->uio_resid; 5676 if (entry_size > DXATTR_MAX_ENTRY_SIZE) 5677 return (SET_ERROR(EFBIG)); 5678 error = nvlist_size(nvl, &sa_size, NV_ENCODE_XDR); 5679 if (error != 0) 5680 return (SET_ERROR(error)); 5681 if (sa_size > DXATTR_MAX_SA_SIZE) 5682 return (SET_ERROR(EFBIG)); 5683 uchar_t *buf = kmem_alloc(entry_size, KM_SLEEP); 5684 error = uiomove(buf, entry_size, ap->a_uio); 5685 if (error != 0) { 5686 error = SET_ERROR(error); 5687 } else { 5688 error = nvlist_add_byte_array(nvl, attrname, buf, entry_size); 5689 if (error != 0) 5690 error = SET_ERROR(error); 5691 } 5692 if (error == 0) 5693 error = zfs_sa_set_xattr(zp, attrname, buf, entry_size); 5694 kmem_free(buf, entry_size); 5695 if (error != 0) { 5696 zp->z_xattr_cached = NULL; 5697 nvlist_free(nvl); 5698 } 5699 return (error); 5700 } 5701 5702 static int 5703 zfs_setextattr_impl(struct vop_setextattr_args *ap, boolean_t compat) 5704 { 5705 znode_t *zp = VTOZ(ap->a_vp); 5706 zfsvfs_t *zfsvfs = ZTOZSB(zp); 5707 char attrname[EXTATTR_MAXNAMELEN+1]; 5708 int error; 5709 5710 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 5711 sizeof (attrname), compat); 5712 if (error != 0) 5713 return (error); 5714 5715 struct vop_deleteextattr_args vda = { 5716 .a_vp = ap->a_vp, 5717 .a_attrnamespace = ap->a_attrnamespace, 5718 .a_name = ap->a_name, 5719 .a_cred = ap->a_cred, 5720 .a_td = ap->a_td, 5721 }; 5722 error = ENOENT; 5723 if (zfsvfs->z_use_sa && zp->z_is_sa && zfsvfs->z_xattr_sa) { 5724 error = zfs_setextattr_sa(ap, attrname); 5725 if (error == 0) { 5726 /* 5727 * Successfully put into SA, we need to clear the one 5728 * in dir if present. 5729 */ 5730 zfs_deleteextattr_dir(&vda, attrname); 5731 } 5732 } 5733 if (error != 0) { 5734 error = zfs_setextattr_dir(ap, attrname); 5735 if (error == 0 && zp->z_is_sa) { 5736 /* 5737 * Successfully put into dir, we need to clear the one 5738 * in SA if present. 5739 */ 5740 zfs_deleteextattr_sa(&vda, attrname); 5741 } 5742 } 5743 if (error == 0 && ap->a_attrnamespace == EXTATTR_NAMESPACE_USER) { 5744 /* 5745 * Also clear all versions of the alternate compat name. 5746 */ 5747 zfs_deleteextattr_impl(&vda, !compat); 5748 } 5749 return (error); 5750 } 5751 5752 /* 5753 * Vnode operation to set a named attribute. 5754 */ 5755 static int 5756 zfs_setextattr(struct vop_setextattr_args *ap) 5757 { 5758 znode_t *zp = VTOZ(ap->a_vp); 5759 zfsvfs_t *zfsvfs = ZTOZSB(zp); 5760 int error; 5761 5762 /* 5763 * If the xattr property is off, refuse the request. 5764 */ 5765 if (!(zfsvfs->z_flags & ZSB_XATTR)) 5766 return (SET_ERROR(EOPNOTSUPP)); 5767 5768 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5769 ap->a_cred, ap->a_td, VWRITE); 5770 if (error != 0) 5771 return (SET_ERROR(error)); 5772 5773 error = zfs_check_attrname(ap->a_name); 5774 if (error != 0) 5775 return (error); 5776 5777 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0) 5778 return (error); 5779 rw_enter(&zp->z_xattr_lock, RW_WRITER); 5780 5781 error = zfs_setextattr_impl(ap, zfs_xattr_compat); 5782 5783 rw_exit(&zp->z_xattr_lock); 5784 zfs_exit(zfsvfs, FTAG); 5785 return (error); 5786 } 5787 5788 #ifndef _SYS_SYSPROTO_H_ 5789 struct vop_listextattr { 5790 IN struct vnode *a_vp; 5791 IN int a_attrnamespace; 5792 INOUT struct uio *a_uio; 5793 OUT size_t *a_size; 5794 IN struct ucred *a_cred; 5795 IN struct thread *a_td; 5796 }; 5797 #endif 5798 5799 static int 5800 zfs_listextattr_dir(struct vop_listextattr_args *ap, const char *attrprefix) 5801 { 5802 struct thread *td = ap->a_td; 5803 struct nameidata nd; 5804 uint8_t dirbuf[sizeof (struct dirent)]; 5805 struct iovec aiov; 5806 struct uio auio; 5807 vnode_t *xvp = NULL, *vp; 5808 int error, eof; 5809 5810 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, 5811 LOOKUP_XATTR, B_FALSE); 5812 if (error != 0) { 5813 /* 5814 * ENOATTR means that the EA directory does not yet exist, 5815 * i.e. there are no extended attributes there. 5816 */ 5817 if (error == ENOATTR) 5818 error = 0; 5819 return (error); 5820 } 5821 5822 #if __FreeBSD_version < 1400043 5823 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED, 5824 UIO_SYSSPACE, ".", xvp, td); 5825 #else 5826 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED, 5827 UIO_SYSSPACE, ".", xvp); 5828 #endif 5829 error = namei(&nd); 5830 if (error != 0) 5831 return (SET_ERROR(error)); 5832 vp = nd.ni_vp; 5833 NDFREE_PNBUF(&nd); 5834 5835 auio.uio_iov = &aiov; 5836 auio.uio_iovcnt = 1; 5837 auio.uio_segflg = UIO_SYSSPACE; 5838 auio.uio_td = td; 5839 auio.uio_rw = UIO_READ; 5840 auio.uio_offset = 0; 5841 5842 size_t plen = strlen(attrprefix); 5843 5844 do { 5845 aiov.iov_base = (void *)dirbuf; 5846 aiov.iov_len = sizeof (dirbuf); 5847 auio.uio_resid = sizeof (dirbuf); 5848 error = VOP_READDIR(vp, &auio, ap->a_cred, &eof, NULL, NULL); 5849 if (error != 0) 5850 break; 5851 int done = sizeof (dirbuf) - auio.uio_resid; 5852 for (int pos = 0; pos < done; ) { 5853 struct dirent *dp = (struct dirent *)(dirbuf + pos); 5854 pos += dp->d_reclen; 5855 /* 5856 * XXX: Temporarily we also accept DT_UNKNOWN, as this 5857 * is what we get when attribute was created on Solaris. 5858 */ 5859 if (dp->d_type != DT_REG && dp->d_type != DT_UNKNOWN) 5860 continue; 5861 else if (plen == 0 && 5862 ZFS_XA_NS_PREFIX_FORBIDDEN(dp->d_name)) 5863 continue; 5864 else if (strncmp(dp->d_name, attrprefix, plen) != 0) 5865 continue; 5866 uint8_t nlen = dp->d_namlen - plen; 5867 if (ap->a_size != NULL) { 5868 *ap->a_size += 1 + nlen; 5869 } else if (ap->a_uio != NULL) { 5870 /* 5871 * Format of extattr name entry is one byte for 5872 * length and the rest for name. 5873 */ 5874 error = uiomove(&nlen, 1, ap->a_uio); 5875 if (error == 0) { 5876 char *namep = dp->d_name + plen; 5877 error = uiomove(namep, nlen, ap->a_uio); 5878 } 5879 if (error != 0) { 5880 error = SET_ERROR(error); 5881 break; 5882 } 5883 } 5884 } 5885 } while (!eof && error == 0); 5886 5887 vput(vp); 5888 return (error); 5889 } 5890 5891 static int 5892 zfs_listextattr_sa(struct vop_listextattr_args *ap, const char *attrprefix) 5893 { 5894 znode_t *zp = VTOZ(ap->a_vp); 5895 int error; 5896 5897 error = zfs_ensure_xattr_cached(zp); 5898 if (error != 0) 5899 return (error); 5900 5901 ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock)); 5902 ASSERT3P(zp->z_xattr_cached, !=, NULL); 5903 5904 size_t plen = strlen(attrprefix); 5905 nvpair_t *nvp = NULL; 5906 while ((nvp = nvlist_next_nvpair(zp->z_xattr_cached, nvp)) != NULL) { 5907 ASSERT3U(nvpair_type(nvp), ==, DATA_TYPE_BYTE_ARRAY); 5908 5909 const char *name = nvpair_name(nvp); 5910 if (plen == 0 && ZFS_XA_NS_PREFIX_FORBIDDEN(name)) 5911 continue; 5912 else if (strncmp(name, attrprefix, plen) != 0) 5913 continue; 5914 uint8_t nlen = strlen(name) - plen; 5915 if (ap->a_size != NULL) { 5916 *ap->a_size += 1 + nlen; 5917 } else if (ap->a_uio != NULL) { 5918 /* 5919 * Format of extattr name entry is one byte for 5920 * length and the rest for name. 5921 */ 5922 error = uiomove(&nlen, 1, ap->a_uio); 5923 if (error == 0) { 5924 char *namep = __DECONST(char *, name) + plen; 5925 error = uiomove(namep, nlen, ap->a_uio); 5926 } 5927 if (error != 0) { 5928 error = SET_ERROR(error); 5929 break; 5930 } 5931 } 5932 } 5933 5934 return (error); 5935 } 5936 5937 static int 5938 zfs_listextattr_impl(struct vop_listextattr_args *ap, boolean_t compat) 5939 { 5940 znode_t *zp = VTOZ(ap->a_vp); 5941 zfsvfs_t *zfsvfs = ZTOZSB(zp); 5942 char attrprefix[16]; 5943 int error; 5944 5945 error = zfs_create_attrname(ap->a_attrnamespace, "", attrprefix, 5946 sizeof (attrprefix), compat); 5947 if (error != 0) 5948 return (error); 5949 5950 if (zfsvfs->z_use_sa && zp->z_is_sa) 5951 error = zfs_listextattr_sa(ap, attrprefix); 5952 if (error == 0) 5953 error = zfs_listextattr_dir(ap, attrprefix); 5954 return (error); 5955 } 5956 5957 /* 5958 * Vnode operation to retrieve extended attributes on a vnode. 5959 */ 5960 static int 5961 zfs_listextattr(struct vop_listextattr_args *ap) 5962 { 5963 znode_t *zp = VTOZ(ap->a_vp); 5964 zfsvfs_t *zfsvfs = ZTOZSB(zp); 5965 int error; 5966 5967 if (ap->a_size != NULL) 5968 *ap->a_size = 0; 5969 5970 /* 5971 * If the xattr property is off, refuse the request. 5972 */ 5973 if (!(zfsvfs->z_flags & ZSB_XATTR)) 5974 return (SET_ERROR(EOPNOTSUPP)); 5975 5976 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5977 ap->a_cred, ap->a_td, VREAD); 5978 if (error != 0) 5979 return (SET_ERROR(error)); 5980 5981 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0) 5982 return (error); 5983 rw_enter(&zp->z_xattr_lock, RW_READER); 5984 5985 error = zfs_listextattr_impl(ap, zfs_xattr_compat); 5986 if (error == 0 && ap->a_attrnamespace == EXTATTR_NAMESPACE_USER) { 5987 /* Also list user xattrs with the alternate format. */ 5988 error = zfs_listextattr_impl(ap, !zfs_xattr_compat); 5989 } 5990 5991 rw_exit(&zp->z_xattr_lock); 5992 zfs_exit(zfsvfs, FTAG); 5993 return (error); 5994 } 5995 5996 #ifndef _SYS_SYSPROTO_H_ 5997 struct vop_getacl_args { 5998 struct vnode *vp; 5999 acl_type_t type; 6000 struct acl *aclp; 6001 struct ucred *cred; 6002 struct thread *td; 6003 }; 6004 #endif 6005 6006 static int 6007 zfs_freebsd_getacl(struct vop_getacl_args *ap) 6008 { 6009 int error; 6010 vsecattr_t vsecattr; 6011 6012 if (ap->a_type != ACL_TYPE_NFS4) 6013 return (EINVAL); 6014 6015 vsecattr.vsa_mask = VSA_ACE | VSA_ACECNT; 6016 if ((error = zfs_getsecattr(VTOZ(ap->a_vp), 6017 &vsecattr, 0, ap->a_cred))) 6018 return (error); 6019 6020 error = acl_from_aces(ap->a_aclp, vsecattr.vsa_aclentp, 6021 vsecattr.vsa_aclcnt); 6022 if (vsecattr.vsa_aclentp != NULL) 6023 kmem_free(vsecattr.vsa_aclentp, vsecattr.vsa_aclentsz); 6024 6025 return (error); 6026 } 6027 6028 #ifndef _SYS_SYSPROTO_H_ 6029 struct vop_setacl_args { 6030 struct vnode *vp; 6031 acl_type_t type; 6032 struct acl *aclp; 6033 struct ucred *cred; 6034 struct thread *td; 6035 }; 6036 #endif 6037 6038 static int 6039 zfs_freebsd_setacl(struct vop_setacl_args *ap) 6040 { 6041 int error; 6042 vsecattr_t vsecattr; 6043 int aclbsize; /* size of acl list in bytes */ 6044 aclent_t *aaclp; 6045 6046 if (ap->a_type != ACL_TYPE_NFS4) 6047 return (EINVAL); 6048 6049 if (ap->a_aclp == NULL) 6050 return (EINVAL); 6051 6052 if (ap->a_aclp->acl_cnt < 1 || ap->a_aclp->acl_cnt > MAX_ACL_ENTRIES) 6053 return (EINVAL); 6054 6055 /* 6056 * With NFSv4 ACLs, chmod(2) may need to add additional entries, 6057 * splitting every entry into two and appending "canonical six" 6058 * entries at the end. Don't allow for setting an ACL that would 6059 * cause chmod(2) to run out of ACL entries. 6060 */ 6061 if (ap->a_aclp->acl_cnt * 2 + 6 > ACL_MAX_ENTRIES) 6062 return (ENOSPC); 6063 6064 error = acl_nfs4_check(ap->a_aclp, ap->a_vp->v_type == VDIR); 6065 if (error != 0) 6066 return (error); 6067 6068 vsecattr.vsa_mask = VSA_ACE; 6069 aclbsize = ap->a_aclp->acl_cnt * sizeof (ace_t); 6070 vsecattr.vsa_aclentp = kmem_alloc(aclbsize, KM_SLEEP); 6071 aaclp = vsecattr.vsa_aclentp; 6072 vsecattr.vsa_aclentsz = aclbsize; 6073 6074 aces_from_acl(vsecattr.vsa_aclentp, &vsecattr.vsa_aclcnt, ap->a_aclp); 6075 error = zfs_setsecattr(VTOZ(ap->a_vp), &vsecattr, 0, ap->a_cred); 6076 kmem_free(aaclp, aclbsize); 6077 6078 return (error); 6079 } 6080 6081 #ifndef _SYS_SYSPROTO_H_ 6082 struct vop_aclcheck_args { 6083 struct vnode *vp; 6084 acl_type_t type; 6085 struct acl *aclp; 6086 struct ucred *cred; 6087 struct thread *td; 6088 }; 6089 #endif 6090 6091 static int 6092 zfs_freebsd_aclcheck(struct vop_aclcheck_args *ap) 6093 { 6094 6095 return (EOPNOTSUPP); 6096 } 6097 6098 static int 6099 zfs_vptocnp(struct vop_vptocnp_args *ap) 6100 { 6101 vnode_t *covered_vp; 6102 vnode_t *vp = ap->a_vp; 6103 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; 6104 znode_t *zp = VTOZ(vp); 6105 int ltype; 6106 int error; 6107 6108 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0) 6109 return (error); 6110 6111 /* 6112 * If we are a snapshot mounted under .zfs, run the operation 6113 * on the covered vnode. 6114 */ 6115 if (zp->z_id != zfsvfs->z_root || zfsvfs->z_parent == zfsvfs) { 6116 char name[MAXNAMLEN + 1]; 6117 znode_t *dzp; 6118 size_t len; 6119 6120 error = zfs_znode_parent_and_name(zp, &dzp, name); 6121 if (error == 0) { 6122 len = strlen(name); 6123 if (*ap->a_buflen < len) 6124 error = SET_ERROR(ENOMEM); 6125 } 6126 if (error == 0) { 6127 *ap->a_buflen -= len; 6128 memcpy(ap->a_buf + *ap->a_buflen, name, len); 6129 *ap->a_vpp = ZTOV(dzp); 6130 } 6131 zfs_exit(zfsvfs, FTAG); 6132 return (error); 6133 } 6134 zfs_exit(zfsvfs, FTAG); 6135 6136 covered_vp = vp->v_mount->mnt_vnodecovered; 6137 #if __FreeBSD_version >= 1300045 6138 enum vgetstate vs = vget_prep(covered_vp); 6139 #else 6140 vhold(covered_vp); 6141 #endif 6142 ltype = VOP_ISLOCKED(vp); 6143 VOP_UNLOCK1(vp); 6144 #if __FreeBSD_version >= 1300045 6145 error = vget_finish(covered_vp, LK_SHARED, vs); 6146 #else 6147 error = vget(covered_vp, LK_SHARED | LK_VNHELD, curthread); 6148 #endif 6149 if (error == 0) { 6150 #if __FreeBSD_version >= 1300123 6151 error = VOP_VPTOCNP(covered_vp, ap->a_vpp, ap->a_buf, 6152 ap->a_buflen); 6153 #else 6154 error = VOP_VPTOCNP(covered_vp, ap->a_vpp, ap->a_cred, 6155 ap->a_buf, ap->a_buflen); 6156 #endif 6157 vput(covered_vp); 6158 } 6159 vn_lock(vp, ltype | LK_RETRY); 6160 if (VN_IS_DOOMED(vp)) 6161 error = SET_ERROR(ENOENT); 6162 return (error); 6163 } 6164 6165 #if __FreeBSD_version >= 1400032 6166 static int 6167 zfs_deallocate(struct vop_deallocate_args *ap) 6168 { 6169 znode_t *zp = VTOZ(ap->a_vp); 6170 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 6171 zilog_t *zilog; 6172 off_t off, len, file_sz; 6173 int error; 6174 6175 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0) 6176 return (error); 6177 6178 /* 6179 * Callers might not be able to detect properly that we are read-only, 6180 * so check it explicitly here. 6181 */ 6182 if (zfs_is_readonly(zfsvfs)) { 6183 zfs_exit(zfsvfs, FTAG); 6184 return (SET_ERROR(EROFS)); 6185 } 6186 6187 zilog = zfsvfs->z_log; 6188 off = *ap->a_offset; 6189 len = *ap->a_len; 6190 file_sz = zp->z_size; 6191 if (off + len > file_sz) 6192 len = file_sz - off; 6193 /* Fast path for out-of-range request. */ 6194 if (len <= 0) { 6195 *ap->a_len = 0; 6196 zfs_exit(zfsvfs, FTAG); 6197 return (0); 6198 } 6199 6200 error = zfs_freesp(zp, off, len, O_RDWR, TRUE); 6201 if (error == 0) { 6202 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS || 6203 (ap->a_ioflag & IO_SYNC) != 0) 6204 zil_commit(zilog, zp->z_id); 6205 *ap->a_offset = off + len; 6206 *ap->a_len = 0; 6207 } 6208 6209 zfs_exit(zfsvfs, FTAG); 6210 return (error); 6211 } 6212 #endif 6213 6214 #ifndef _SYS_SYSPROTO_H_ 6215 struct vop_copy_file_range_args { 6216 struct vnode *a_invp; 6217 off_t *a_inoffp; 6218 struct vnode *a_outvp; 6219 off_t *a_outoffp; 6220 size_t *a_lenp; 6221 unsigned int a_flags; 6222 struct ucred *a_incred; 6223 struct ucred *a_outcred; 6224 struct thread *a_fsizetd; 6225 } 6226 #endif 6227 /* 6228 * TODO: FreeBSD will only call file system-specific copy_file_range() if both 6229 * files resides under the same mountpoint. In case of ZFS we want to be called 6230 * even is files are in different datasets (but on the same pools, but we need 6231 * to check that ourselves). 6232 */ 6233 static int 6234 zfs_freebsd_copy_file_range(struct vop_copy_file_range_args *ap) 6235 { 6236 zfsvfs_t *outzfsvfs; 6237 struct vnode *invp = ap->a_invp; 6238 struct vnode *outvp = ap->a_outvp; 6239 struct mount *mp; 6240 struct uio io; 6241 int error; 6242 uint64_t len = *ap->a_lenp; 6243 6244 if (!zfs_bclone_enabled) { 6245 mp = NULL; 6246 goto bad_write_fallback; 6247 } 6248 6249 /* 6250 * TODO: If offset/length is not aligned to recordsize, use 6251 * vn_generic_copy_file_range() on this fragment. 6252 * It would be better to do this after we lock the vnodes, but then we 6253 * need something else than vn_generic_copy_file_range(). 6254 */ 6255 6256 vn_start_write(outvp, &mp, V_WAIT); 6257 if (__predict_true(mp == outvp->v_mount)) { 6258 outzfsvfs = (zfsvfs_t *)mp->mnt_data; 6259 if (!spa_feature_is_enabled(dmu_objset_spa(outzfsvfs->z_os), 6260 SPA_FEATURE_BLOCK_CLONING)) { 6261 goto bad_write_fallback; 6262 } 6263 } 6264 if (invp == outvp) { 6265 if (vn_lock(outvp, LK_EXCLUSIVE) != 0) { 6266 goto bad_write_fallback; 6267 } 6268 } else { 6269 #if (__FreeBSD_version >= 1302506 && __FreeBSD_version < 1400000) || \ 6270 __FreeBSD_version >= 1400086 6271 vn_lock_pair(invp, false, LK_EXCLUSIVE, outvp, false, 6272 LK_EXCLUSIVE); 6273 #else 6274 vn_lock_pair(invp, false, outvp, false); 6275 #endif 6276 if (VN_IS_DOOMED(invp) || VN_IS_DOOMED(outvp)) { 6277 goto bad_locked_fallback; 6278 } 6279 } 6280 6281 #ifdef MAC 6282 error = mac_vnode_check_write(curthread->td_ucred, ap->a_outcred, 6283 outvp); 6284 if (error != 0) 6285 goto out_locked; 6286 #endif 6287 6288 io.uio_offset = *ap->a_outoffp; 6289 io.uio_resid = *ap->a_lenp; 6290 error = vn_rlimit_fsize(outvp, &io, ap->a_fsizetd); 6291 if (error != 0) 6292 goto out_locked; 6293 6294 error = zfs_clone_range(VTOZ(invp), ap->a_inoffp, VTOZ(outvp), 6295 ap->a_outoffp, &len, ap->a_outcred); 6296 if (error == EXDEV || error == EAGAIN || error == EINVAL || 6297 error == EOPNOTSUPP) 6298 goto bad_locked_fallback; 6299 *ap->a_lenp = (size_t)len; 6300 out_locked: 6301 if (invp != outvp) 6302 VOP_UNLOCK(invp); 6303 VOP_UNLOCK(outvp); 6304 if (mp != NULL) 6305 vn_finished_write(mp); 6306 return (error); 6307 6308 bad_locked_fallback: 6309 if (invp != outvp) 6310 VOP_UNLOCK(invp); 6311 VOP_UNLOCK(outvp); 6312 bad_write_fallback: 6313 if (mp != NULL) 6314 vn_finished_write(mp); 6315 error = vn_generic_copy_file_range(ap->a_invp, ap->a_inoffp, 6316 ap->a_outvp, ap->a_outoffp, ap->a_lenp, ap->a_flags, 6317 ap->a_incred, ap->a_outcred, ap->a_fsizetd); 6318 return (error); 6319 } 6320 6321 struct vop_vector zfs_vnodeops; 6322 struct vop_vector zfs_fifoops; 6323 struct vop_vector zfs_shareops; 6324 6325 struct vop_vector zfs_vnodeops = { 6326 .vop_default = &default_vnodeops, 6327 .vop_inactive = zfs_freebsd_inactive, 6328 #if __FreeBSD_version >= 1300042 6329 .vop_need_inactive = zfs_freebsd_need_inactive, 6330 #endif 6331 .vop_reclaim = zfs_freebsd_reclaim, 6332 #if __FreeBSD_version >= 1300102 6333 .vop_fplookup_vexec = zfs_freebsd_fplookup_vexec, 6334 #endif 6335 #if __FreeBSD_version >= 1300139 6336 .vop_fplookup_symlink = zfs_freebsd_fplookup_symlink, 6337 #endif 6338 .vop_access = zfs_freebsd_access, 6339 .vop_allocate = VOP_EINVAL, 6340 #if __FreeBSD_version >= 1400032 6341 .vop_deallocate = zfs_deallocate, 6342 #endif 6343 .vop_lookup = zfs_cache_lookup, 6344 .vop_cachedlookup = zfs_freebsd_cachedlookup, 6345 .vop_getattr = zfs_freebsd_getattr, 6346 .vop_setattr = zfs_freebsd_setattr, 6347 .vop_create = zfs_freebsd_create, 6348 .vop_mknod = (vop_mknod_t *)zfs_freebsd_create, 6349 .vop_mkdir = zfs_freebsd_mkdir, 6350 .vop_readdir = zfs_freebsd_readdir, 6351 .vop_fsync = zfs_freebsd_fsync, 6352 .vop_open = zfs_freebsd_open, 6353 .vop_close = zfs_freebsd_close, 6354 .vop_rmdir = zfs_freebsd_rmdir, 6355 .vop_ioctl = zfs_freebsd_ioctl, 6356 .vop_link = zfs_freebsd_link, 6357 .vop_symlink = zfs_freebsd_symlink, 6358 .vop_readlink = zfs_freebsd_readlink, 6359 .vop_read = zfs_freebsd_read, 6360 .vop_write = zfs_freebsd_write, 6361 .vop_remove = zfs_freebsd_remove, 6362 .vop_rename = zfs_freebsd_rename, 6363 .vop_pathconf = zfs_freebsd_pathconf, 6364 .vop_bmap = zfs_freebsd_bmap, 6365 .vop_fid = zfs_freebsd_fid, 6366 .vop_getextattr = zfs_getextattr, 6367 .vop_deleteextattr = zfs_deleteextattr, 6368 .vop_setextattr = zfs_setextattr, 6369 .vop_listextattr = zfs_listextattr, 6370 .vop_getacl = zfs_freebsd_getacl, 6371 .vop_setacl = zfs_freebsd_setacl, 6372 .vop_aclcheck = zfs_freebsd_aclcheck, 6373 .vop_getpages = zfs_freebsd_getpages, 6374 .vop_putpages = zfs_freebsd_putpages, 6375 .vop_vptocnp = zfs_vptocnp, 6376 #if __FreeBSD_version >= 1300064 6377 .vop_lock1 = vop_lock, 6378 .vop_unlock = vop_unlock, 6379 .vop_islocked = vop_islocked, 6380 #endif 6381 #if __FreeBSD_version >= 1400043 6382 .vop_add_writecount = vop_stdadd_writecount_nomsync, 6383 #endif 6384 .vop_copy_file_range = zfs_freebsd_copy_file_range, 6385 }; 6386 VFS_VOP_VECTOR_REGISTER(zfs_vnodeops); 6387 6388 struct vop_vector zfs_fifoops = { 6389 .vop_default = &fifo_specops, 6390 .vop_fsync = zfs_freebsd_fsync, 6391 #if __FreeBSD_version >= 1300102 6392 .vop_fplookup_vexec = zfs_freebsd_fplookup_vexec, 6393 #endif 6394 #if __FreeBSD_version >= 1300139 6395 .vop_fplookup_symlink = zfs_freebsd_fplookup_symlink, 6396 #endif 6397 .vop_access = zfs_freebsd_access, 6398 .vop_getattr = zfs_freebsd_getattr, 6399 .vop_inactive = zfs_freebsd_inactive, 6400 .vop_read = VOP_PANIC, 6401 .vop_reclaim = zfs_freebsd_reclaim, 6402 .vop_setattr = zfs_freebsd_setattr, 6403 .vop_write = VOP_PANIC, 6404 .vop_pathconf = zfs_freebsd_pathconf, 6405 .vop_fid = zfs_freebsd_fid, 6406 .vop_getacl = zfs_freebsd_getacl, 6407 .vop_setacl = zfs_freebsd_setacl, 6408 .vop_aclcheck = zfs_freebsd_aclcheck, 6409 #if __FreeBSD_version >= 1400043 6410 .vop_add_writecount = vop_stdadd_writecount_nomsync, 6411 #endif 6412 }; 6413 VFS_VOP_VECTOR_REGISTER(zfs_fifoops); 6414 6415 /* 6416 * special share hidden files vnode operations template 6417 */ 6418 struct vop_vector zfs_shareops = { 6419 .vop_default = &default_vnodeops, 6420 #if __FreeBSD_version >= 1300121 6421 .vop_fplookup_vexec = VOP_EAGAIN, 6422 #endif 6423 #if __FreeBSD_version >= 1300139 6424 .vop_fplookup_symlink = VOP_EAGAIN, 6425 #endif 6426 .vop_access = zfs_freebsd_access, 6427 .vop_inactive = zfs_freebsd_inactive, 6428 .vop_reclaim = zfs_freebsd_reclaim, 6429 .vop_fid = zfs_freebsd_fid, 6430 .vop_pathconf = zfs_freebsd_pathconf, 6431 #if __FreeBSD_version >= 1400043 6432 .vop_add_writecount = vop_stdadd_writecount_nomsync, 6433 #endif 6434 }; 6435 VFS_VOP_VECTOR_REGISTER(zfs_shareops); 6436 6437 ZFS_MODULE_PARAM(zfs, zfs_, xattr_compat, INT, ZMOD_RW, 6438 "Use legacy ZFS xattr naming for writing new user namespace xattrs"); 6439