1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright (c) 2012, 2015 by Delphix. All rights reserved. 25 * Copyright (c) 2014 Integros [integros.com] 26 * Copyright 2017 Nexenta Systems, Inc. 27 */ 28 29 /* Portions Copyright 2007 Jeremy Teo */ 30 /* Portions Copyright 2010 Robert Milkowski */ 31 32 33 #include <sys/types.h> 34 #include <sys/param.h> 35 #include <sys/time.h> 36 #include <sys/systm.h> 37 #include <sys/sysmacros.h> 38 #include <sys/resource.h> 39 #include <sys/vfs.h> 40 #include <sys/endian.h> 41 #include <sys/vm.h> 42 #include <sys/vnode.h> 43 #if __FreeBSD_version >= 1300102 44 #include <sys/smr.h> 45 #endif 46 #include <sys/dirent.h> 47 #include <sys/file.h> 48 #include <sys/stat.h> 49 #include <sys/kmem.h> 50 #include <sys/taskq.h> 51 #include <sys/uio.h> 52 #include <sys/atomic.h> 53 #include <sys/namei.h> 54 #include <sys/mman.h> 55 #include <sys/cmn_err.h> 56 #include <sys/kdb.h> 57 #include <sys/sysproto.h> 58 #include <sys/errno.h> 59 #include <sys/unistd.h> 60 #include <sys/zfs_dir.h> 61 #include <sys/zfs_ioctl.h> 62 #include <sys/fs/zfs.h> 63 #include <sys/dmu.h> 64 #include <sys/dmu_objset.h> 65 #include <sys/spa.h> 66 #include <sys/txg.h> 67 #include <sys/dbuf.h> 68 #include <sys/zap.h> 69 #include <sys/sa.h> 70 #include <sys/policy.h> 71 #include <sys/sunddi.h> 72 #include <sys/filio.h> 73 #include <sys/sid.h> 74 #include <sys/zfs_ctldir.h> 75 #include <sys/zfs_fuid.h> 76 #include <sys/zfs_quota.h> 77 #include <sys/zfs_sa.h> 78 #include <sys/zfs_rlock.h> 79 #include <sys/extdirent.h> 80 #include <sys/bio.h> 81 #include <sys/buf.h> 82 #include <sys/sched.h> 83 #include <sys/acl.h> 84 #include <sys/vmmeter.h> 85 #include <vm/vm_param.h> 86 #include <sys/zil.h> 87 #include <sys/zfs_vnops.h> 88 89 #include <vm/vm_object.h> 90 91 #include <sys/extattr.h> 92 #include <sys/priv.h> 93 94 #ifndef VN_OPEN_INVFS 95 #define VN_OPEN_INVFS 0x0 96 #endif 97 98 VFS_SMR_DECLARE; 99 100 #if __FreeBSD_version >= 1300047 101 #define vm_page_wire_lock(pp) 102 #define vm_page_wire_unlock(pp) 103 #else 104 #define vm_page_wire_lock(pp) vm_page_lock(pp) 105 #define vm_page_wire_unlock(pp) vm_page_unlock(pp) 106 #endif 107 108 #ifdef DEBUG_VFS_LOCKS 109 #define VNCHECKREF(vp) \ 110 VNASSERT((vp)->v_holdcnt > 0 && (vp)->v_usecount > 0, vp, \ 111 ("%s: wrong ref counts", __func__)); 112 #else 113 #define VNCHECKREF(vp) 114 #endif 115 116 /* 117 * Programming rules. 118 * 119 * Each vnode op performs some logical unit of work. To do this, the ZPL must 120 * properly lock its in-core state, create a DMU transaction, do the work, 121 * record this work in the intent log (ZIL), commit the DMU transaction, 122 * and wait for the intent log to commit if it is a synchronous operation. 123 * Moreover, the vnode ops must work in both normal and log replay context. 124 * The ordering of events is important to avoid deadlocks and references 125 * to freed memory. The example below illustrates the following Big Rules: 126 * 127 * (1) A check must be made in each zfs thread for a mounted file system. 128 * This is done avoiding races using ZFS_ENTER(zfsvfs). 129 * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes 130 * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros 131 * can return EIO from the calling function. 132 * 133 * (2) VN_RELE() should always be the last thing except for zil_commit() 134 * (if necessary) and ZFS_EXIT(). This is for 3 reasons: 135 * First, if it's the last reference, the vnode/znode 136 * can be freed, so the zp may point to freed memory. Second, the last 137 * reference will call zfs_zinactive(), which may induce a lot of work -- 138 * pushing cached pages (which acquires range locks) and syncing out 139 * cached atime changes. Third, zfs_zinactive() may require a new tx, 140 * which could deadlock the system if you were already holding one. 141 * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC(). 142 * 143 * (3) All range locks must be grabbed before calling dmu_tx_assign(), 144 * as they can span dmu_tx_assign() calls. 145 * 146 * (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to 147 * dmu_tx_assign(). This is critical because we don't want to block 148 * while holding locks. 149 * 150 * If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT. This 151 * reduces lock contention and CPU usage when we must wait (note that if 152 * throughput is constrained by the storage, nearly every transaction 153 * must wait). 154 * 155 * Note, in particular, that if a lock is sometimes acquired before 156 * the tx assigns, and sometimes after (e.g. z_lock), then failing 157 * to use a non-blocking assign can deadlock the system. The scenario: 158 * 159 * Thread A has grabbed a lock before calling dmu_tx_assign(). 160 * Thread B is in an already-assigned tx, and blocks for this lock. 161 * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open() 162 * forever, because the previous txg can't quiesce until B's tx commits. 163 * 164 * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT, 165 * then drop all locks, call dmu_tx_wait(), and try again. On subsequent 166 * calls to dmu_tx_assign(), pass TXG_NOTHROTTLE in addition to TXG_NOWAIT, 167 * to indicate that this operation has already called dmu_tx_wait(). 168 * This will ensure that we don't retry forever, waiting a short bit 169 * each time. 170 * 171 * (5) If the operation succeeded, generate the intent log entry for it 172 * before dropping locks. This ensures that the ordering of events 173 * in the intent log matches the order in which they actually occurred. 174 * During ZIL replay the zfs_log_* functions will update the sequence 175 * number to indicate the zil transaction has replayed. 176 * 177 * (6) At the end of each vnode op, the DMU tx must always commit, 178 * regardless of whether there were any errors. 179 * 180 * (7) After dropping all locks, invoke zil_commit(zilog, foid) 181 * to ensure that synchronous semantics are provided when necessary. 182 * 183 * In general, this is how things should be ordered in each vnode op: 184 * 185 * ZFS_ENTER(zfsvfs); // exit if unmounted 186 * top: 187 * zfs_dirent_lookup(&dl, ...) // lock directory entry (may VN_HOLD()) 188 * rw_enter(...); // grab any other locks you need 189 * tx = dmu_tx_create(...); // get DMU tx 190 * dmu_tx_hold_*(); // hold each object you might modify 191 * error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT); 192 * if (error) { 193 * rw_exit(...); // drop locks 194 * zfs_dirent_unlock(dl); // unlock directory entry 195 * VN_RELE(...); // release held vnodes 196 * if (error == ERESTART) { 197 * waited = B_TRUE; 198 * dmu_tx_wait(tx); 199 * dmu_tx_abort(tx); 200 * goto top; 201 * } 202 * dmu_tx_abort(tx); // abort DMU tx 203 * ZFS_EXIT(zfsvfs); // finished in zfs 204 * return (error); // really out of space 205 * } 206 * error = do_real_work(); // do whatever this VOP does 207 * if (error == 0) 208 * zfs_log_*(...); // on success, make ZIL entry 209 * dmu_tx_commit(tx); // commit DMU tx -- error or not 210 * rw_exit(...); // drop locks 211 * zfs_dirent_unlock(dl); // unlock directory entry 212 * VN_RELE(...); // release held vnodes 213 * zil_commit(zilog, foid); // synchronous when necessary 214 * ZFS_EXIT(zfsvfs); // finished in zfs 215 * return (error); // done, report error 216 */ 217 218 /* ARGSUSED */ 219 static int 220 zfs_open(vnode_t **vpp, int flag, cred_t *cr) 221 { 222 znode_t *zp = VTOZ(*vpp); 223 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 224 225 ZFS_ENTER(zfsvfs); 226 ZFS_VERIFY_ZP(zp); 227 228 if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) && 229 ((flag & FAPPEND) == 0)) { 230 ZFS_EXIT(zfsvfs); 231 return (SET_ERROR(EPERM)); 232 } 233 234 /* Keep a count of the synchronous opens in the znode */ 235 if (flag & (FSYNC | FDSYNC)) 236 atomic_inc_32(&zp->z_sync_cnt); 237 238 ZFS_EXIT(zfsvfs); 239 return (0); 240 } 241 242 /* ARGSUSED */ 243 static int 244 zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr) 245 { 246 znode_t *zp = VTOZ(vp); 247 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 248 249 ZFS_ENTER(zfsvfs); 250 ZFS_VERIFY_ZP(zp); 251 252 /* Decrement the synchronous opens in the znode */ 253 if ((flag & (FSYNC | FDSYNC)) && (count == 1)) 254 atomic_dec_32(&zp->z_sync_cnt); 255 256 ZFS_EXIT(zfsvfs); 257 return (0); 258 } 259 260 /* ARGSUSED */ 261 static int 262 zfs_ioctl(vnode_t *vp, ulong_t com, intptr_t data, int flag, cred_t *cred, 263 int *rvalp) 264 { 265 loff_t off; 266 int error; 267 268 switch (com) { 269 case _FIOFFS: 270 { 271 return (0); 272 273 /* 274 * The following two ioctls are used by bfu. Faking out, 275 * necessary to avoid bfu errors. 276 */ 277 } 278 case _FIOGDIO: 279 case _FIOSDIO: 280 { 281 return (0); 282 } 283 284 case F_SEEK_DATA: 285 case F_SEEK_HOLE: 286 { 287 off = *(offset_t *)data; 288 /* offset parameter is in/out */ 289 error = zfs_holey(VTOZ(vp), com, &off); 290 if (error) 291 return (error); 292 *(offset_t *)data = off; 293 return (0); 294 } 295 } 296 return (SET_ERROR(ENOTTY)); 297 } 298 299 static vm_page_t 300 page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes) 301 { 302 vm_object_t obj; 303 vm_page_t pp; 304 int64_t end; 305 306 /* 307 * At present vm_page_clear_dirty extends the cleared range to DEV_BSIZE 308 * aligned boundaries, if the range is not aligned. As a result a 309 * DEV_BSIZE subrange with partially dirty data may get marked as clean. 310 * It may happen that all DEV_BSIZE subranges are marked clean and thus 311 * the whole page would be considered clean despite have some 312 * dirty data. 313 * For this reason we should shrink the range to DEV_BSIZE aligned 314 * boundaries before calling vm_page_clear_dirty. 315 */ 316 end = rounddown2(off + nbytes, DEV_BSIZE); 317 off = roundup2(off, DEV_BSIZE); 318 nbytes = end - off; 319 320 obj = vp->v_object; 321 zfs_vmobject_assert_wlocked_12(obj); 322 #if __FreeBSD_version < 1300050 323 for (;;) { 324 if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 325 pp->valid) { 326 if (vm_page_xbusied(pp)) { 327 /* 328 * Reference the page before unlocking and 329 * sleeping so that the page daemon is less 330 * likely to reclaim it. 331 */ 332 vm_page_reference(pp); 333 vm_page_lock(pp); 334 zfs_vmobject_wunlock(obj); 335 vm_page_busy_sleep(pp, "zfsmwb", true); 336 zfs_vmobject_wlock(obj); 337 continue; 338 } 339 vm_page_sbusy(pp); 340 } else if (pp != NULL) { 341 ASSERT(!pp->valid); 342 pp = NULL; 343 } 344 if (pp != NULL) { 345 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 346 vm_object_pip_add(obj, 1); 347 pmap_remove_write(pp); 348 if (nbytes != 0) 349 vm_page_clear_dirty(pp, off, nbytes); 350 } 351 break; 352 } 353 #else 354 vm_page_grab_valid_unlocked(&pp, obj, OFF_TO_IDX(start), 355 VM_ALLOC_NOCREAT | VM_ALLOC_SBUSY | VM_ALLOC_NORMAL | 356 VM_ALLOC_IGN_SBUSY); 357 if (pp != NULL) { 358 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 359 vm_object_pip_add(obj, 1); 360 pmap_remove_write(pp); 361 if (nbytes != 0) 362 vm_page_clear_dirty(pp, off, nbytes); 363 } 364 #endif 365 return (pp); 366 } 367 368 static void 369 page_unbusy(vm_page_t pp) 370 { 371 372 vm_page_sunbusy(pp); 373 #if __FreeBSD_version >= 1300041 374 vm_object_pip_wakeup(pp->object); 375 #else 376 vm_object_pip_subtract(pp->object, 1); 377 #endif 378 } 379 380 #if __FreeBSD_version > 1300051 381 static vm_page_t 382 page_hold(vnode_t *vp, int64_t start) 383 { 384 vm_object_t obj; 385 vm_page_t m; 386 387 obj = vp->v_object; 388 vm_page_grab_valid_unlocked(&m, obj, OFF_TO_IDX(start), 389 VM_ALLOC_NOCREAT | VM_ALLOC_WIRED | VM_ALLOC_IGN_SBUSY | 390 VM_ALLOC_NOBUSY); 391 return (m); 392 } 393 #else 394 static vm_page_t 395 page_hold(vnode_t *vp, int64_t start) 396 { 397 vm_object_t obj; 398 vm_page_t pp; 399 400 obj = vp->v_object; 401 zfs_vmobject_assert_wlocked(obj); 402 403 for (;;) { 404 if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 405 pp->valid) { 406 if (vm_page_xbusied(pp)) { 407 /* 408 * Reference the page before unlocking and 409 * sleeping so that the page daemon is less 410 * likely to reclaim it. 411 */ 412 vm_page_reference(pp); 413 vm_page_lock(pp); 414 zfs_vmobject_wunlock(obj); 415 vm_page_busy_sleep(pp, "zfsmwb", true); 416 zfs_vmobject_wlock(obj); 417 continue; 418 } 419 420 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 421 vm_page_wire_lock(pp); 422 vm_page_hold(pp); 423 vm_page_wire_unlock(pp); 424 425 } else 426 pp = NULL; 427 break; 428 } 429 return (pp); 430 } 431 #endif 432 433 static void 434 page_unhold(vm_page_t pp) 435 { 436 437 vm_page_wire_lock(pp); 438 #if __FreeBSD_version >= 1300035 439 vm_page_unwire(pp, PQ_ACTIVE); 440 #else 441 vm_page_unhold(pp); 442 #endif 443 vm_page_wire_unlock(pp); 444 } 445 446 /* 447 * When a file is memory mapped, we must keep the IO data synchronized 448 * between the DMU cache and the memory mapped pages. What this means: 449 * 450 * On Write: If we find a memory mapped page, we write to *both* 451 * the page and the dmu buffer. 452 */ 453 void 454 update_pages(znode_t *zp, int64_t start, int len, objset_t *os) 455 { 456 vm_object_t obj; 457 struct sf_buf *sf; 458 vnode_t *vp = ZTOV(zp); 459 caddr_t va; 460 int off; 461 462 ASSERT3P(vp->v_mount, !=, NULL); 463 obj = vp->v_object; 464 ASSERT3P(obj, !=, NULL); 465 466 off = start & PAGEOFFSET; 467 zfs_vmobject_wlock_12(obj); 468 #if __FreeBSD_version >= 1300041 469 vm_object_pip_add(obj, 1); 470 #endif 471 for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 472 vm_page_t pp; 473 int nbytes = imin(PAGESIZE - off, len); 474 475 if ((pp = page_busy(vp, start, off, nbytes)) != NULL) { 476 zfs_vmobject_wunlock_12(obj); 477 478 va = zfs_map_page(pp, &sf); 479 (void) dmu_read(os, zp->z_id, start + off, nbytes, 480 va + off, DMU_READ_PREFETCH); 481 zfs_unmap_page(sf); 482 483 zfs_vmobject_wlock_12(obj); 484 page_unbusy(pp); 485 } 486 len -= nbytes; 487 off = 0; 488 } 489 #if __FreeBSD_version >= 1300041 490 vm_object_pip_wakeup(obj); 491 #else 492 vm_object_pip_wakeupn(obj, 0); 493 #endif 494 zfs_vmobject_wunlock_12(obj); 495 } 496 497 /* 498 * Read with UIO_NOCOPY flag means that sendfile(2) requests 499 * ZFS to populate a range of page cache pages with data. 500 * 501 * NOTE: this function could be optimized to pre-allocate 502 * all pages in advance, drain exclusive busy on all of them, 503 * map them into contiguous KVA region and populate them 504 * in one single dmu_read() call. 505 */ 506 int 507 mappedread_sf(znode_t *zp, int nbytes, zfs_uio_t *uio) 508 { 509 vnode_t *vp = ZTOV(zp); 510 objset_t *os = zp->z_zfsvfs->z_os; 511 struct sf_buf *sf; 512 vm_object_t obj; 513 vm_page_t pp; 514 int64_t start; 515 caddr_t va; 516 int len = nbytes; 517 int error = 0; 518 519 ASSERT3U(zfs_uio_segflg(uio), ==, UIO_NOCOPY); 520 ASSERT3P(vp->v_mount, !=, NULL); 521 obj = vp->v_object; 522 ASSERT3P(obj, !=, NULL); 523 ASSERT0(zfs_uio_offset(uio) & PAGEOFFSET); 524 525 zfs_vmobject_wlock_12(obj); 526 for (start = zfs_uio_offset(uio); len > 0; start += PAGESIZE) { 527 int bytes = MIN(PAGESIZE, len); 528 529 pp = vm_page_grab_unlocked(obj, OFF_TO_IDX(start), 530 VM_ALLOC_SBUSY | VM_ALLOC_NORMAL | VM_ALLOC_IGN_SBUSY); 531 if (vm_page_none_valid(pp)) { 532 zfs_vmobject_wunlock_12(obj); 533 va = zfs_map_page(pp, &sf); 534 error = dmu_read(os, zp->z_id, start, bytes, va, 535 DMU_READ_PREFETCH); 536 if (bytes != PAGESIZE && error == 0) 537 bzero(va + bytes, PAGESIZE - bytes); 538 zfs_unmap_page(sf); 539 zfs_vmobject_wlock_12(obj); 540 #if __FreeBSD_version >= 1300081 541 if (error == 0) { 542 vm_page_valid(pp); 543 vm_page_activate(pp); 544 vm_page_do_sunbusy(pp); 545 } else { 546 zfs_vmobject_wlock(obj); 547 if (!vm_page_wired(pp) && pp->valid == 0 && 548 vm_page_busy_tryupgrade(pp)) 549 vm_page_free(pp); 550 else 551 vm_page_sunbusy(pp); 552 zfs_vmobject_wunlock(obj); 553 } 554 #else 555 vm_page_do_sunbusy(pp); 556 vm_page_lock(pp); 557 if (error) { 558 if (pp->wire_count == 0 && pp->valid == 0 && 559 !vm_page_busied(pp)) 560 vm_page_free(pp); 561 } else { 562 pp->valid = VM_PAGE_BITS_ALL; 563 vm_page_activate(pp); 564 } 565 vm_page_unlock(pp); 566 #endif 567 } else { 568 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 569 vm_page_do_sunbusy(pp); 570 } 571 if (error) 572 break; 573 zfs_uio_advance(uio, bytes); 574 len -= bytes; 575 } 576 zfs_vmobject_wunlock_12(obj); 577 return (error); 578 } 579 580 /* 581 * When a file is memory mapped, we must keep the IO data synchronized 582 * between the DMU cache and the memory mapped pages. What this means: 583 * 584 * On Read: We "read" preferentially from memory mapped pages, 585 * else we default from the dmu buffer. 586 * 587 * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when 588 * the file is memory mapped. 589 */ 590 int 591 mappedread(znode_t *zp, int nbytes, zfs_uio_t *uio) 592 { 593 vnode_t *vp = ZTOV(zp); 594 vm_object_t obj; 595 int64_t start; 596 int len = nbytes; 597 int off; 598 int error = 0; 599 600 ASSERT3P(vp->v_mount, !=, NULL); 601 obj = vp->v_object; 602 ASSERT3P(obj, !=, NULL); 603 604 start = zfs_uio_offset(uio); 605 off = start & PAGEOFFSET; 606 zfs_vmobject_wlock_12(obj); 607 for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 608 vm_page_t pp; 609 uint64_t bytes = MIN(PAGESIZE - off, len); 610 611 if ((pp = page_hold(vp, start))) { 612 struct sf_buf *sf; 613 caddr_t va; 614 615 zfs_vmobject_wunlock_12(obj); 616 va = zfs_map_page(pp, &sf); 617 error = vn_io_fault_uiomove(va + off, bytes, 618 GET_UIO_STRUCT(uio)); 619 zfs_unmap_page(sf); 620 zfs_vmobject_wlock_12(obj); 621 page_unhold(pp); 622 } else { 623 zfs_vmobject_wunlock_12(obj); 624 error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), 625 uio, bytes); 626 zfs_vmobject_wlock_12(obj); 627 } 628 len -= bytes; 629 off = 0; 630 if (error) 631 break; 632 } 633 zfs_vmobject_wunlock_12(obj); 634 return (error); 635 } 636 637 int 638 zfs_write_simple(znode_t *zp, const void *data, size_t len, 639 loff_t pos, size_t *presid) 640 { 641 int error = 0; 642 ssize_t resid; 643 644 error = vn_rdwr(UIO_WRITE, ZTOV(zp), __DECONST(void *, data), len, pos, 645 UIO_SYSSPACE, IO_SYNC, kcred, NOCRED, &resid, curthread); 646 647 if (error) { 648 return (SET_ERROR(error)); 649 } else if (presid == NULL) { 650 if (resid != 0) { 651 error = SET_ERROR(EIO); 652 } 653 } else { 654 *presid = resid; 655 } 656 return (error); 657 } 658 659 void 660 zfs_zrele_async(znode_t *zp) 661 { 662 vnode_t *vp = ZTOV(zp); 663 objset_t *os = ITOZSB(vp)->z_os; 664 665 VN_RELE_ASYNC(vp, dsl_pool_zrele_taskq(dmu_objset_pool(os))); 666 } 667 668 static int 669 zfs_dd_callback(struct mount *mp, void *arg, int lkflags, struct vnode **vpp) 670 { 671 int error; 672 673 *vpp = arg; 674 error = vn_lock(*vpp, lkflags); 675 if (error != 0) 676 vrele(*vpp); 677 return (error); 678 } 679 680 static int 681 zfs_lookup_lock(vnode_t *dvp, vnode_t *vp, const char *name, int lkflags) 682 { 683 znode_t *zdp = VTOZ(dvp); 684 zfsvfs_t *zfsvfs __unused = zdp->z_zfsvfs; 685 int error; 686 int ltype; 687 688 if (zfsvfs->z_replay == B_FALSE) 689 ASSERT_VOP_LOCKED(dvp, __func__); 690 691 if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) { 692 ASSERT3P(dvp, ==, vp); 693 vref(dvp); 694 ltype = lkflags & LK_TYPE_MASK; 695 if (ltype != VOP_ISLOCKED(dvp)) { 696 if (ltype == LK_EXCLUSIVE) 697 vn_lock(dvp, LK_UPGRADE | LK_RETRY); 698 else /* if (ltype == LK_SHARED) */ 699 vn_lock(dvp, LK_DOWNGRADE | LK_RETRY); 700 701 /* 702 * Relock for the "." case could leave us with 703 * reclaimed vnode. 704 */ 705 if (VN_IS_DOOMED(dvp)) { 706 vrele(dvp); 707 return (SET_ERROR(ENOENT)); 708 } 709 } 710 return (0); 711 } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) { 712 /* 713 * Note that in this case, dvp is the child vnode, and we 714 * are looking up the parent vnode - exactly reverse from 715 * normal operation. Unlocking dvp requires some rather 716 * tricky unlock/relock dance to prevent mp from being freed; 717 * use vn_vget_ino_gen() which takes care of all that. 718 * 719 * XXX Note that there is a time window when both vnodes are 720 * unlocked. It is possible, although highly unlikely, that 721 * during that window the parent-child relationship between 722 * the vnodes may change, for example, get reversed. 723 * In that case we would have a wrong lock order for the vnodes. 724 * All other filesystems seem to ignore this problem, so we 725 * do the same here. 726 * A potential solution could be implemented as follows: 727 * - using LK_NOWAIT when locking the second vnode and retrying 728 * if necessary 729 * - checking that the parent-child relationship still holds 730 * after locking both vnodes and retrying if it doesn't 731 */ 732 error = vn_vget_ino_gen(dvp, zfs_dd_callback, vp, lkflags, &vp); 733 return (error); 734 } else { 735 error = vn_lock(vp, lkflags); 736 if (error != 0) 737 vrele(vp); 738 return (error); 739 } 740 } 741 742 /* 743 * Lookup an entry in a directory, or an extended attribute directory. 744 * If it exists, return a held vnode reference for it. 745 * 746 * IN: dvp - vnode of directory to search. 747 * nm - name of entry to lookup. 748 * pnp - full pathname to lookup [UNUSED]. 749 * flags - LOOKUP_XATTR set if looking for an attribute. 750 * rdir - root directory vnode [UNUSED]. 751 * cr - credentials of caller. 752 * ct - caller context 753 * 754 * OUT: vpp - vnode of located entry, NULL if not found. 755 * 756 * RETURN: 0 on success, error code on failure. 757 * 758 * Timestamps: 759 * NA 760 */ 761 /* ARGSUSED */ 762 static int 763 zfs_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp, 764 struct componentname *cnp, int nameiop, cred_t *cr, int flags, 765 boolean_t cached) 766 { 767 znode_t *zdp = VTOZ(dvp); 768 znode_t *zp; 769 zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 770 #if __FreeBSD_version > 1300124 771 seqc_t dvp_seqc; 772 #endif 773 int error = 0; 774 775 /* 776 * Fast path lookup, however we must skip DNLC lookup 777 * for case folding or normalizing lookups because the 778 * DNLC code only stores the passed in name. This means 779 * creating 'a' and removing 'A' on a case insensitive 780 * file system would work, but DNLC still thinks 'a' 781 * exists and won't let you create it again on the next 782 * pass through fast path. 783 */ 784 if (!(flags & LOOKUP_XATTR)) { 785 if (dvp->v_type != VDIR) { 786 return (SET_ERROR(ENOTDIR)); 787 } else if (zdp->z_sa_hdl == NULL) { 788 return (SET_ERROR(EIO)); 789 } 790 } 791 792 DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, 793 const char *, nm); 794 795 ZFS_ENTER(zfsvfs); 796 ZFS_VERIFY_ZP(zdp); 797 798 #if __FreeBSD_version > 1300124 799 dvp_seqc = vn_seqc_read_notmodify(dvp); 800 #endif 801 802 *vpp = NULL; 803 804 if (flags & LOOKUP_XATTR) { 805 /* 806 * If the xattr property is off, refuse the lookup request. 807 */ 808 if (!(zfsvfs->z_flags & ZSB_XATTR)) { 809 ZFS_EXIT(zfsvfs); 810 return (SET_ERROR(EOPNOTSUPP)); 811 } 812 813 /* 814 * We don't allow recursive attributes.. 815 * Maybe someday we will. 816 */ 817 if (zdp->z_pflags & ZFS_XATTR) { 818 ZFS_EXIT(zfsvfs); 819 return (SET_ERROR(EINVAL)); 820 } 821 822 if ((error = zfs_get_xattrdir(VTOZ(dvp), &zp, cr, flags))) { 823 ZFS_EXIT(zfsvfs); 824 return (error); 825 } 826 *vpp = ZTOV(zp); 827 828 /* 829 * Do we have permission to get into attribute directory? 830 */ 831 error = zfs_zaccess(zp, ACE_EXECUTE, 0, B_FALSE, cr); 832 if (error) { 833 vrele(ZTOV(zp)); 834 } 835 836 ZFS_EXIT(zfsvfs); 837 return (error); 838 } 839 840 /* 841 * Check accessibility of directory if we're not coming in via 842 * VOP_CACHEDLOOKUP. 843 */ 844 if (!cached) { 845 #ifdef NOEXECCHECK 846 if ((cnp->cn_flags & NOEXECCHECK) != 0) { 847 cnp->cn_flags &= ~NOEXECCHECK; 848 } else 849 #endif 850 if ((error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr))) { 851 ZFS_EXIT(zfsvfs); 852 return (error); 853 } 854 } 855 856 if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), 857 NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 858 ZFS_EXIT(zfsvfs); 859 return (SET_ERROR(EILSEQ)); 860 } 861 862 863 /* 864 * First handle the special cases. 865 */ 866 if ((cnp->cn_flags & ISDOTDOT) != 0) { 867 /* 868 * If we are a snapshot mounted under .zfs, return 869 * the vp for the snapshot directory. 870 */ 871 if (zdp->z_id == zfsvfs->z_root && zfsvfs->z_parent != zfsvfs) { 872 struct componentname cn; 873 vnode_t *zfsctl_vp; 874 int ltype; 875 876 ZFS_EXIT(zfsvfs); 877 ltype = VOP_ISLOCKED(dvp); 878 VOP_UNLOCK1(dvp); 879 error = zfsctl_root(zfsvfs->z_parent, LK_SHARED, 880 &zfsctl_vp); 881 if (error == 0) { 882 cn.cn_nameptr = "snapshot"; 883 cn.cn_namelen = strlen(cn.cn_nameptr); 884 cn.cn_nameiop = cnp->cn_nameiop; 885 cn.cn_flags = cnp->cn_flags & ~ISDOTDOT; 886 cn.cn_lkflags = cnp->cn_lkflags; 887 error = VOP_LOOKUP(zfsctl_vp, vpp, &cn); 888 vput(zfsctl_vp); 889 } 890 vn_lock(dvp, ltype | LK_RETRY); 891 return (error); 892 } 893 } 894 if (zfs_has_ctldir(zdp) && strcmp(nm, ZFS_CTLDIR_NAME) == 0) { 895 ZFS_EXIT(zfsvfs); 896 if ((cnp->cn_flags & ISLASTCN) != 0 && nameiop != LOOKUP) 897 return (SET_ERROR(ENOTSUP)); 898 error = zfsctl_root(zfsvfs, cnp->cn_lkflags, vpp); 899 return (error); 900 } 901 902 /* 903 * The loop is retry the lookup if the parent-child relationship 904 * changes during the dot-dot locking complexities. 905 */ 906 for (;;) { 907 uint64_t parent; 908 909 error = zfs_dirlook(zdp, nm, &zp); 910 if (error == 0) 911 *vpp = ZTOV(zp); 912 913 ZFS_EXIT(zfsvfs); 914 if (error != 0) 915 break; 916 917 error = zfs_lookup_lock(dvp, *vpp, nm, cnp->cn_lkflags); 918 if (error != 0) { 919 /* 920 * If we've got a locking error, then the vnode 921 * got reclaimed because of a force unmount. 922 * We never enter doomed vnodes into the name cache. 923 */ 924 *vpp = NULL; 925 return (error); 926 } 927 928 if ((cnp->cn_flags & ISDOTDOT) == 0) 929 break; 930 931 ZFS_ENTER(zfsvfs); 932 if (zdp->z_sa_hdl == NULL) { 933 error = SET_ERROR(EIO); 934 } else { 935 error = sa_lookup(zdp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 936 &parent, sizeof (parent)); 937 } 938 if (error != 0) { 939 ZFS_EXIT(zfsvfs); 940 vput(ZTOV(zp)); 941 break; 942 } 943 if (zp->z_id == parent) { 944 ZFS_EXIT(zfsvfs); 945 break; 946 } 947 vput(ZTOV(zp)); 948 } 949 950 if (error != 0) 951 *vpp = NULL; 952 953 /* Translate errors and add SAVENAME when needed. */ 954 if (cnp->cn_flags & ISLASTCN) { 955 switch (nameiop) { 956 case CREATE: 957 case RENAME: 958 if (error == ENOENT) { 959 error = EJUSTRETURN; 960 cnp->cn_flags |= SAVENAME; 961 break; 962 } 963 fallthrough; 964 case DELETE: 965 if (error == 0) 966 cnp->cn_flags |= SAVENAME; 967 break; 968 } 969 } 970 971 #if __FreeBSD_version > 1300124 972 if ((cnp->cn_flags & ISDOTDOT) != 0) { 973 /* 974 * FIXME: zfs_lookup_lock relocks vnodes and does nothing to 975 * handle races. In particular different callers may end up 976 * with different vnodes and will try to add conflicting 977 * entries to the namecache. 978 * 979 * While finding different result may be acceptable in face 980 * of concurrent modification, adding conflicting entries 981 * trips over an assert in the namecache. 982 * 983 * Ultimately let an entry through once everything settles. 984 */ 985 if (!vn_seqc_consistent(dvp, dvp_seqc)) { 986 cnp->cn_flags &= ~MAKEENTRY; 987 } 988 } 989 #endif 990 991 /* Insert name into cache (as non-existent) if appropriate. */ 992 if (zfsvfs->z_use_namecache && !zfsvfs->z_replay && 993 error == ENOENT && (cnp->cn_flags & MAKEENTRY) != 0) 994 cache_enter(dvp, NULL, cnp); 995 996 /* Insert name into cache if appropriate. */ 997 if (zfsvfs->z_use_namecache && !zfsvfs->z_replay && 998 error == 0 && (cnp->cn_flags & MAKEENTRY)) { 999 if (!(cnp->cn_flags & ISLASTCN) || 1000 (nameiop != DELETE && nameiop != RENAME)) { 1001 cache_enter(dvp, *vpp, cnp); 1002 } 1003 } 1004 1005 return (error); 1006 } 1007 1008 /* 1009 * Attempt to create a new entry in a directory. If the entry 1010 * already exists, truncate the file if permissible, else return 1011 * an error. Return the vp of the created or trunc'd file. 1012 * 1013 * IN: dvp - vnode of directory to put new file entry in. 1014 * name - name of new file entry. 1015 * vap - attributes of new file. 1016 * excl - flag indicating exclusive or non-exclusive mode. 1017 * mode - mode to open file with. 1018 * cr - credentials of caller. 1019 * flag - large file flag [UNUSED]. 1020 * ct - caller context 1021 * vsecp - ACL to be set 1022 * 1023 * OUT: vpp - vnode of created or trunc'd entry. 1024 * 1025 * RETURN: 0 on success, error code on failure. 1026 * 1027 * Timestamps: 1028 * dvp - ctime|mtime updated if new entry created 1029 * vp - ctime|mtime always, atime if new 1030 */ 1031 1032 /* ARGSUSED */ 1033 int 1034 zfs_create(znode_t *dzp, const char *name, vattr_t *vap, int excl, int mode, 1035 znode_t **zpp, cred_t *cr, int flag, vsecattr_t *vsecp) 1036 { 1037 znode_t *zp; 1038 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1039 zilog_t *zilog; 1040 objset_t *os; 1041 dmu_tx_t *tx; 1042 int error; 1043 ksid_t *ksid; 1044 uid_t uid; 1045 gid_t gid = crgetgid(cr); 1046 uint64_t projid = ZFS_DEFAULT_PROJID; 1047 zfs_acl_ids_t acl_ids; 1048 boolean_t fuid_dirtied; 1049 uint64_t txtype; 1050 #ifdef DEBUG_VFS_LOCKS 1051 vnode_t *dvp = ZTOV(dzp); 1052 #endif 1053 1054 /* 1055 * If we have an ephemeral id, ACL, or XVATTR then 1056 * make sure file system is at proper version 1057 */ 1058 1059 ksid = crgetsid(cr, KSID_OWNER); 1060 if (ksid) 1061 uid = ksid_getid(ksid); 1062 else 1063 uid = crgetuid(cr); 1064 1065 if (zfsvfs->z_use_fuids == B_FALSE && 1066 (vsecp || (vap->va_mask & AT_XVATTR) || 1067 IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 1068 return (SET_ERROR(EINVAL)); 1069 1070 ZFS_ENTER(zfsvfs); 1071 ZFS_VERIFY_ZP(dzp); 1072 os = zfsvfs->z_os; 1073 zilog = zfsvfs->z_log; 1074 1075 if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 1076 NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1077 ZFS_EXIT(zfsvfs); 1078 return (SET_ERROR(EILSEQ)); 1079 } 1080 1081 if (vap->va_mask & AT_XVATTR) { 1082 if ((error = secpolicy_xvattr(ZTOV(dzp), (xvattr_t *)vap, 1083 crgetuid(cr), cr, vap->va_type)) != 0) { 1084 ZFS_EXIT(zfsvfs); 1085 return (error); 1086 } 1087 } 1088 1089 *zpp = NULL; 1090 1091 if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr)) 1092 vap->va_mode &= ~S_ISVTX; 1093 1094 error = zfs_dirent_lookup(dzp, name, &zp, ZNEW); 1095 if (error) { 1096 ZFS_EXIT(zfsvfs); 1097 return (error); 1098 } 1099 ASSERT3P(zp, ==, NULL); 1100 1101 /* 1102 * Create a new file object and update the directory 1103 * to reference it. 1104 */ 1105 if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) { 1106 goto out; 1107 } 1108 1109 /* 1110 * We only support the creation of regular files in 1111 * extended attribute directories. 1112 */ 1113 1114 if ((dzp->z_pflags & ZFS_XATTR) && 1115 (vap->va_type != VREG)) { 1116 error = SET_ERROR(EINVAL); 1117 goto out; 1118 } 1119 1120 if ((error = zfs_acl_ids_create(dzp, 0, vap, 1121 cr, vsecp, &acl_ids)) != 0) 1122 goto out; 1123 1124 if (S_ISREG(vap->va_mode) || S_ISDIR(vap->va_mode)) 1125 projid = zfs_inherit_projid(dzp); 1126 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, projid)) { 1127 zfs_acl_ids_free(&acl_ids); 1128 error = SET_ERROR(EDQUOT); 1129 goto out; 1130 } 1131 1132 getnewvnode_reserve_(); 1133 1134 tx = dmu_tx_create(os); 1135 1136 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 1137 ZFS_SA_BASE_ATTR_SIZE); 1138 1139 fuid_dirtied = zfsvfs->z_fuid_dirty; 1140 if (fuid_dirtied) 1141 zfs_fuid_txhold(zfsvfs, tx); 1142 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 1143 dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 1144 if (!zfsvfs->z_use_sa && 1145 acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 1146 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 1147 0, acl_ids.z_aclp->z_acl_bytes); 1148 } 1149 error = dmu_tx_assign(tx, TXG_WAIT); 1150 if (error) { 1151 zfs_acl_ids_free(&acl_ids); 1152 dmu_tx_abort(tx); 1153 getnewvnode_drop_reserve(); 1154 ZFS_EXIT(zfsvfs); 1155 return (error); 1156 } 1157 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 1158 if (fuid_dirtied) 1159 zfs_fuid_sync(zfsvfs, tx); 1160 1161 (void) zfs_link_create(dzp, name, zp, tx, ZNEW); 1162 txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); 1163 zfs_log_create(zilog, tx, txtype, dzp, zp, name, 1164 vsecp, acl_ids.z_fuidp, vap); 1165 zfs_acl_ids_free(&acl_ids); 1166 dmu_tx_commit(tx); 1167 1168 getnewvnode_drop_reserve(); 1169 1170 out: 1171 VNCHECKREF(dvp); 1172 if (error == 0) { 1173 *zpp = zp; 1174 } 1175 1176 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1177 zil_commit(zilog, 0); 1178 1179 ZFS_EXIT(zfsvfs); 1180 return (error); 1181 } 1182 1183 /* 1184 * Remove an entry from a directory. 1185 * 1186 * IN: dvp - vnode of directory to remove entry from. 1187 * name - name of entry to remove. 1188 * cr - credentials of caller. 1189 * ct - caller context 1190 * flags - case flags 1191 * 1192 * RETURN: 0 on success, error code on failure. 1193 * 1194 * Timestamps: 1195 * dvp - ctime|mtime 1196 * vp - ctime (if nlink > 0) 1197 */ 1198 1199 /*ARGSUSED*/ 1200 static int 1201 zfs_remove_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr) 1202 { 1203 znode_t *dzp = VTOZ(dvp); 1204 znode_t *zp; 1205 znode_t *xzp; 1206 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1207 zilog_t *zilog; 1208 uint64_t xattr_obj; 1209 uint64_t obj = 0; 1210 dmu_tx_t *tx; 1211 boolean_t unlinked; 1212 uint64_t txtype; 1213 int error; 1214 1215 1216 ZFS_ENTER(zfsvfs); 1217 ZFS_VERIFY_ZP(dzp); 1218 zp = VTOZ(vp); 1219 ZFS_VERIFY_ZP(zp); 1220 zilog = zfsvfs->z_log; 1221 1222 xattr_obj = 0; 1223 xzp = NULL; 1224 1225 if ((error = zfs_zaccess_delete(dzp, zp, cr))) { 1226 goto out; 1227 } 1228 1229 /* 1230 * Need to use rmdir for removing directories. 1231 */ 1232 if (vp->v_type == VDIR) { 1233 error = SET_ERROR(EPERM); 1234 goto out; 1235 } 1236 1237 vnevent_remove(vp, dvp, name, ct); 1238 1239 obj = zp->z_id; 1240 1241 /* are there any extended attributes? */ 1242 error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 1243 &xattr_obj, sizeof (xattr_obj)); 1244 if (error == 0 && xattr_obj) { 1245 error = zfs_zget(zfsvfs, xattr_obj, &xzp); 1246 ASSERT0(error); 1247 } 1248 1249 /* 1250 * We may delete the znode now, or we may put it in the unlinked set; 1251 * it depends on whether we're the last link, and on whether there are 1252 * other holds on the vnode. So we dmu_tx_hold() the right things to 1253 * allow for either case. 1254 */ 1255 tx = dmu_tx_create(zfsvfs->z_os); 1256 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 1257 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1258 zfs_sa_upgrade_txholds(tx, zp); 1259 zfs_sa_upgrade_txholds(tx, dzp); 1260 1261 if (xzp) { 1262 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 1263 dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); 1264 } 1265 1266 /* charge as an update -- would be nice not to charge at all */ 1267 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 1268 1269 /* 1270 * Mark this transaction as typically resulting in a net free of space 1271 */ 1272 dmu_tx_mark_netfree(tx); 1273 1274 error = dmu_tx_assign(tx, TXG_WAIT); 1275 if (error) { 1276 dmu_tx_abort(tx); 1277 ZFS_EXIT(zfsvfs); 1278 return (error); 1279 } 1280 1281 /* 1282 * Remove the directory entry. 1283 */ 1284 error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, &unlinked); 1285 1286 if (error) { 1287 dmu_tx_commit(tx); 1288 goto out; 1289 } 1290 1291 if (unlinked) { 1292 zfs_unlinked_add(zp, tx); 1293 vp->v_vflag |= VV_NOSYNC; 1294 } 1295 /* XXX check changes to linux vnops */ 1296 txtype = TX_REMOVE; 1297 zfs_log_remove(zilog, tx, txtype, dzp, name, obj, unlinked); 1298 1299 dmu_tx_commit(tx); 1300 out: 1301 1302 if (xzp) 1303 vrele(ZTOV(xzp)); 1304 1305 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1306 zil_commit(zilog, 0); 1307 1308 1309 ZFS_EXIT(zfsvfs); 1310 return (error); 1311 } 1312 1313 1314 static int 1315 zfs_lookup_internal(znode_t *dzp, const char *name, vnode_t **vpp, 1316 struct componentname *cnp, int nameiop) 1317 { 1318 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1319 int error; 1320 1321 cnp->cn_nameptr = __DECONST(char *, name); 1322 cnp->cn_namelen = strlen(name); 1323 cnp->cn_nameiop = nameiop; 1324 cnp->cn_flags = ISLASTCN | SAVENAME; 1325 cnp->cn_lkflags = LK_EXCLUSIVE | LK_RETRY; 1326 cnp->cn_cred = kcred; 1327 #if __FreeBSD_version < 1400037 1328 cnp->cn_thread = curthread; 1329 #endif 1330 1331 if (zfsvfs->z_use_namecache && !zfsvfs->z_replay) { 1332 struct vop_lookup_args a; 1333 1334 a.a_gen.a_desc = &vop_lookup_desc; 1335 a.a_dvp = ZTOV(dzp); 1336 a.a_vpp = vpp; 1337 a.a_cnp = cnp; 1338 error = vfs_cache_lookup(&a); 1339 } else { 1340 error = zfs_lookup(ZTOV(dzp), name, vpp, cnp, nameiop, kcred, 0, 1341 B_FALSE); 1342 } 1343 #ifdef ZFS_DEBUG 1344 if (error) { 1345 printf("got error %d on name %s on op %d\n", error, name, 1346 nameiop); 1347 kdb_backtrace(); 1348 } 1349 #endif 1350 return (error); 1351 } 1352 1353 int 1354 zfs_remove(znode_t *dzp, const char *name, cred_t *cr, int flags) 1355 { 1356 vnode_t *vp; 1357 int error; 1358 struct componentname cn; 1359 1360 if ((error = zfs_lookup_internal(dzp, name, &vp, &cn, DELETE))) 1361 return (error); 1362 1363 error = zfs_remove_(ZTOV(dzp), vp, name, cr); 1364 vput(vp); 1365 return (error); 1366 } 1367 /* 1368 * Create a new directory and insert it into dvp using the name 1369 * provided. Return a pointer to the inserted directory. 1370 * 1371 * IN: dvp - vnode of directory to add subdir to. 1372 * dirname - name of new directory. 1373 * vap - attributes of new directory. 1374 * cr - credentials of caller. 1375 * ct - caller context 1376 * flags - case flags 1377 * vsecp - ACL to be set 1378 * 1379 * OUT: vpp - vnode of created directory. 1380 * 1381 * RETURN: 0 on success, error code on failure. 1382 * 1383 * Timestamps: 1384 * dvp - ctime|mtime updated 1385 * vp - ctime|mtime|atime updated 1386 */ 1387 /*ARGSUSED*/ 1388 int 1389 zfs_mkdir(znode_t *dzp, const char *dirname, vattr_t *vap, znode_t **zpp, 1390 cred_t *cr, int flags, vsecattr_t *vsecp) 1391 { 1392 znode_t *zp; 1393 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1394 zilog_t *zilog; 1395 uint64_t txtype; 1396 dmu_tx_t *tx; 1397 int error; 1398 ksid_t *ksid; 1399 uid_t uid; 1400 gid_t gid = crgetgid(cr); 1401 zfs_acl_ids_t acl_ids; 1402 boolean_t fuid_dirtied; 1403 1404 ASSERT3U(vap->va_type, ==, VDIR); 1405 1406 /* 1407 * If we have an ephemeral id, ACL, or XVATTR then 1408 * make sure file system is at proper version 1409 */ 1410 1411 ksid = crgetsid(cr, KSID_OWNER); 1412 if (ksid) 1413 uid = ksid_getid(ksid); 1414 else 1415 uid = crgetuid(cr); 1416 if (zfsvfs->z_use_fuids == B_FALSE && 1417 ((vap->va_mask & AT_XVATTR) || 1418 IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 1419 return (SET_ERROR(EINVAL)); 1420 1421 ZFS_ENTER(zfsvfs); 1422 ZFS_VERIFY_ZP(dzp); 1423 zilog = zfsvfs->z_log; 1424 1425 if (dzp->z_pflags & ZFS_XATTR) { 1426 ZFS_EXIT(zfsvfs); 1427 return (SET_ERROR(EINVAL)); 1428 } 1429 1430 if (zfsvfs->z_utf8 && u8_validate(dirname, 1431 strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1432 ZFS_EXIT(zfsvfs); 1433 return (SET_ERROR(EILSEQ)); 1434 } 1435 1436 if (vap->va_mask & AT_XVATTR) { 1437 if ((error = secpolicy_xvattr(ZTOV(dzp), (xvattr_t *)vap, 1438 crgetuid(cr), cr, vap->va_type)) != 0) { 1439 ZFS_EXIT(zfsvfs); 1440 return (error); 1441 } 1442 } 1443 1444 if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, 1445 NULL, &acl_ids)) != 0) { 1446 ZFS_EXIT(zfsvfs); 1447 return (error); 1448 } 1449 1450 /* 1451 * First make sure the new directory doesn't exist. 1452 * 1453 * Existence is checked first to make sure we don't return 1454 * EACCES instead of EEXIST which can cause some applications 1455 * to fail. 1456 */ 1457 *zpp = NULL; 1458 1459 if ((error = zfs_dirent_lookup(dzp, dirname, &zp, ZNEW))) { 1460 zfs_acl_ids_free(&acl_ids); 1461 ZFS_EXIT(zfsvfs); 1462 return (error); 1463 } 1464 ASSERT3P(zp, ==, NULL); 1465 1466 if ((error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr))) { 1467 zfs_acl_ids_free(&acl_ids); 1468 ZFS_EXIT(zfsvfs); 1469 return (error); 1470 } 1471 1472 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, zfs_inherit_projid(dzp))) { 1473 zfs_acl_ids_free(&acl_ids); 1474 ZFS_EXIT(zfsvfs); 1475 return (SET_ERROR(EDQUOT)); 1476 } 1477 1478 /* 1479 * Add a new entry to the directory. 1480 */ 1481 getnewvnode_reserve_(); 1482 tx = dmu_tx_create(zfsvfs->z_os); 1483 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); 1484 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 1485 fuid_dirtied = zfsvfs->z_fuid_dirty; 1486 if (fuid_dirtied) 1487 zfs_fuid_txhold(zfsvfs, tx); 1488 if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 1489 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 1490 acl_ids.z_aclp->z_acl_bytes); 1491 } 1492 1493 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 1494 ZFS_SA_BASE_ATTR_SIZE); 1495 1496 error = dmu_tx_assign(tx, TXG_WAIT); 1497 if (error) { 1498 zfs_acl_ids_free(&acl_ids); 1499 dmu_tx_abort(tx); 1500 getnewvnode_drop_reserve(); 1501 ZFS_EXIT(zfsvfs); 1502 return (error); 1503 } 1504 1505 /* 1506 * Create new node. 1507 */ 1508 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 1509 1510 if (fuid_dirtied) 1511 zfs_fuid_sync(zfsvfs, tx); 1512 1513 /* 1514 * Now put new name in parent dir. 1515 */ 1516 (void) zfs_link_create(dzp, dirname, zp, tx, ZNEW); 1517 1518 *zpp = zp; 1519 1520 txtype = zfs_log_create_txtype(Z_DIR, NULL, vap); 1521 zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, NULL, 1522 acl_ids.z_fuidp, vap); 1523 1524 zfs_acl_ids_free(&acl_ids); 1525 1526 dmu_tx_commit(tx); 1527 1528 getnewvnode_drop_reserve(); 1529 1530 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1531 zil_commit(zilog, 0); 1532 1533 ZFS_EXIT(zfsvfs); 1534 return (0); 1535 } 1536 1537 #if __FreeBSD_version < 1300124 1538 static void 1539 cache_vop_rmdir(struct vnode *dvp, struct vnode *vp) 1540 { 1541 1542 cache_purge(dvp); 1543 cache_purge(vp); 1544 } 1545 #endif 1546 1547 /* 1548 * Remove a directory subdir entry. If the current working 1549 * directory is the same as the subdir to be removed, the 1550 * remove will fail. 1551 * 1552 * IN: dvp - vnode of directory to remove from. 1553 * name - name of directory to be removed. 1554 * cwd - vnode of current working directory. 1555 * cr - credentials of caller. 1556 * ct - caller context 1557 * flags - case flags 1558 * 1559 * RETURN: 0 on success, error code on failure. 1560 * 1561 * Timestamps: 1562 * dvp - ctime|mtime updated 1563 */ 1564 /*ARGSUSED*/ 1565 static int 1566 zfs_rmdir_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr) 1567 { 1568 znode_t *dzp = VTOZ(dvp); 1569 znode_t *zp = VTOZ(vp); 1570 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1571 zilog_t *zilog; 1572 dmu_tx_t *tx; 1573 int error; 1574 1575 ZFS_ENTER(zfsvfs); 1576 ZFS_VERIFY_ZP(dzp); 1577 ZFS_VERIFY_ZP(zp); 1578 zilog = zfsvfs->z_log; 1579 1580 1581 if ((error = zfs_zaccess_delete(dzp, zp, cr))) { 1582 goto out; 1583 } 1584 1585 if (vp->v_type != VDIR) { 1586 error = SET_ERROR(ENOTDIR); 1587 goto out; 1588 } 1589 1590 vnevent_rmdir(vp, dvp, name, ct); 1591 1592 tx = dmu_tx_create(zfsvfs->z_os); 1593 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 1594 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1595 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 1596 zfs_sa_upgrade_txholds(tx, zp); 1597 zfs_sa_upgrade_txholds(tx, dzp); 1598 dmu_tx_mark_netfree(tx); 1599 error = dmu_tx_assign(tx, TXG_WAIT); 1600 if (error) { 1601 dmu_tx_abort(tx); 1602 ZFS_EXIT(zfsvfs); 1603 return (error); 1604 } 1605 1606 error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, NULL); 1607 1608 if (error == 0) { 1609 uint64_t txtype = TX_RMDIR; 1610 zfs_log_remove(zilog, tx, txtype, dzp, name, 1611 ZFS_NO_OBJECT, B_FALSE); 1612 } 1613 1614 dmu_tx_commit(tx); 1615 1616 cache_vop_rmdir(dvp, vp); 1617 out: 1618 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1619 zil_commit(zilog, 0); 1620 1621 ZFS_EXIT(zfsvfs); 1622 return (error); 1623 } 1624 1625 int 1626 zfs_rmdir(znode_t *dzp, const char *name, znode_t *cwd, cred_t *cr, int flags) 1627 { 1628 struct componentname cn; 1629 vnode_t *vp; 1630 int error; 1631 1632 if ((error = zfs_lookup_internal(dzp, name, &vp, &cn, DELETE))) 1633 return (error); 1634 1635 error = zfs_rmdir_(ZTOV(dzp), vp, name, cr); 1636 vput(vp); 1637 return (error); 1638 } 1639 1640 /* 1641 * Read as many directory entries as will fit into the provided 1642 * buffer from the given directory cursor position (specified in 1643 * the uio structure). 1644 * 1645 * IN: vp - vnode of directory to read. 1646 * uio - structure supplying read location, range info, 1647 * and return buffer. 1648 * cr - credentials of caller. 1649 * ct - caller context 1650 * flags - case flags 1651 * 1652 * OUT: uio - updated offset and range, buffer filled. 1653 * eofp - set to true if end-of-file detected. 1654 * 1655 * RETURN: 0 on success, error code on failure. 1656 * 1657 * Timestamps: 1658 * vp - atime updated 1659 * 1660 * Note that the low 4 bits of the cookie returned by zap is always zero. 1661 * This allows us to use the low range for "special" directory entries: 1662 * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem, 1663 * we use the offset 2 for the '.zfs' directory. 1664 */ 1665 /* ARGSUSED */ 1666 static int 1667 zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp, 1668 int *ncookies, uint64_t **cookies) 1669 { 1670 znode_t *zp = VTOZ(vp); 1671 iovec_t *iovp; 1672 edirent_t *eodp; 1673 dirent64_t *odp; 1674 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1675 objset_t *os; 1676 caddr_t outbuf; 1677 size_t bufsize; 1678 zap_cursor_t zc; 1679 zap_attribute_t zap; 1680 uint_t bytes_wanted; 1681 uint64_t offset; /* must be unsigned; checks for < 1 */ 1682 uint64_t parent; 1683 int local_eof; 1684 int outcount; 1685 int error; 1686 uint8_t prefetch; 1687 boolean_t check_sysattrs; 1688 uint8_t type; 1689 int ncooks; 1690 uint64_t *cooks = NULL; 1691 int flags = 0; 1692 1693 ZFS_ENTER(zfsvfs); 1694 ZFS_VERIFY_ZP(zp); 1695 1696 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 1697 &parent, sizeof (parent))) != 0) { 1698 ZFS_EXIT(zfsvfs); 1699 return (error); 1700 } 1701 1702 /* 1703 * If we are not given an eof variable, 1704 * use a local one. 1705 */ 1706 if (eofp == NULL) 1707 eofp = &local_eof; 1708 1709 /* 1710 * Check for valid iov_len. 1711 */ 1712 if (GET_UIO_STRUCT(uio)->uio_iov->iov_len <= 0) { 1713 ZFS_EXIT(zfsvfs); 1714 return (SET_ERROR(EINVAL)); 1715 } 1716 1717 /* 1718 * Quit if directory has been removed (posix) 1719 */ 1720 if ((*eofp = zp->z_unlinked) != 0) { 1721 ZFS_EXIT(zfsvfs); 1722 return (0); 1723 } 1724 1725 error = 0; 1726 os = zfsvfs->z_os; 1727 offset = zfs_uio_offset(uio); 1728 prefetch = zp->z_zn_prefetch; 1729 1730 /* 1731 * Initialize the iterator cursor. 1732 */ 1733 if (offset <= 3) { 1734 /* 1735 * Start iteration from the beginning of the directory. 1736 */ 1737 zap_cursor_init(&zc, os, zp->z_id); 1738 } else { 1739 /* 1740 * The offset is a serialized cursor. 1741 */ 1742 zap_cursor_init_serialized(&zc, os, zp->z_id, offset); 1743 } 1744 1745 /* 1746 * Get space to change directory entries into fs independent format. 1747 */ 1748 iovp = GET_UIO_STRUCT(uio)->uio_iov; 1749 bytes_wanted = iovp->iov_len; 1750 if (zfs_uio_segflg(uio) != UIO_SYSSPACE || zfs_uio_iovcnt(uio) != 1) { 1751 bufsize = bytes_wanted; 1752 outbuf = kmem_alloc(bufsize, KM_SLEEP); 1753 odp = (struct dirent64 *)outbuf; 1754 } else { 1755 bufsize = bytes_wanted; 1756 outbuf = NULL; 1757 odp = (struct dirent64 *)iovp->iov_base; 1758 } 1759 eodp = (struct edirent *)odp; 1760 1761 if (ncookies != NULL) { 1762 /* 1763 * Minimum entry size is dirent size and 1 byte for a file name. 1764 */ 1765 ncooks = zfs_uio_resid(uio) / (sizeof (struct dirent) - 1766 sizeof (((struct dirent *)NULL)->d_name) + 1); 1767 cooks = malloc(ncooks * sizeof (*cooks), M_TEMP, M_WAITOK); 1768 *cookies = cooks; 1769 *ncookies = ncooks; 1770 } 1771 /* 1772 * If this VFS supports the system attribute view interface; and 1773 * we're looking at an extended attribute directory; and we care 1774 * about normalization conflicts on this vfs; then we must check 1775 * for normalization conflicts with the sysattr name space. 1776 */ 1777 #ifdef TODO 1778 check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 1779 (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm && 1780 (flags & V_RDDIR_ENTFLAGS); 1781 #else 1782 check_sysattrs = 0; 1783 #endif 1784 1785 /* 1786 * Transform to file-system independent format 1787 */ 1788 outcount = 0; 1789 while (outcount < bytes_wanted) { 1790 ino64_t objnum; 1791 ushort_t reclen; 1792 off64_t *next = NULL; 1793 1794 /* 1795 * Special case `.', `..', and `.zfs'. 1796 */ 1797 if (offset == 0) { 1798 (void) strcpy(zap.za_name, "."); 1799 zap.za_normalization_conflict = 0; 1800 objnum = zp->z_id; 1801 type = DT_DIR; 1802 } else if (offset == 1) { 1803 (void) strcpy(zap.za_name, ".."); 1804 zap.za_normalization_conflict = 0; 1805 objnum = parent; 1806 type = DT_DIR; 1807 } else if (offset == 2 && zfs_show_ctldir(zp)) { 1808 (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); 1809 zap.za_normalization_conflict = 0; 1810 objnum = ZFSCTL_INO_ROOT; 1811 type = DT_DIR; 1812 } else { 1813 /* 1814 * Grab next entry. 1815 */ 1816 if ((error = zap_cursor_retrieve(&zc, &zap))) { 1817 if ((*eofp = (error == ENOENT)) != 0) 1818 break; 1819 else 1820 goto update; 1821 } 1822 1823 if (zap.za_integer_length != 8 || 1824 zap.za_num_integers != 1) { 1825 cmn_err(CE_WARN, "zap_readdir: bad directory " 1826 "entry, obj = %lld, offset = %lld\n", 1827 (u_longlong_t)zp->z_id, 1828 (u_longlong_t)offset); 1829 error = SET_ERROR(ENXIO); 1830 goto update; 1831 } 1832 1833 objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); 1834 /* 1835 * MacOS X can extract the object type here such as: 1836 * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer); 1837 */ 1838 type = ZFS_DIRENT_TYPE(zap.za_first_integer); 1839 1840 if (check_sysattrs && !zap.za_normalization_conflict) { 1841 #ifdef TODO 1842 zap.za_normalization_conflict = 1843 xattr_sysattr_casechk(zap.za_name); 1844 #else 1845 panic("%s:%u: TODO", __func__, __LINE__); 1846 #endif 1847 } 1848 } 1849 1850 if (flags & V_RDDIR_ACCFILTER) { 1851 /* 1852 * If we have no access at all, don't include 1853 * this entry in the returned information 1854 */ 1855 znode_t *ezp; 1856 if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0) 1857 goto skip_entry; 1858 if (!zfs_has_access(ezp, cr)) { 1859 vrele(ZTOV(ezp)); 1860 goto skip_entry; 1861 } 1862 vrele(ZTOV(ezp)); 1863 } 1864 1865 if (flags & V_RDDIR_ENTFLAGS) 1866 reclen = EDIRENT_RECLEN(strlen(zap.za_name)); 1867 else 1868 reclen = DIRENT64_RECLEN(strlen(zap.za_name)); 1869 1870 /* 1871 * Will this entry fit in the buffer? 1872 */ 1873 if (outcount + reclen > bufsize) { 1874 /* 1875 * Did we manage to fit anything in the buffer? 1876 */ 1877 if (!outcount) { 1878 error = SET_ERROR(EINVAL); 1879 goto update; 1880 } 1881 break; 1882 } 1883 if (flags & V_RDDIR_ENTFLAGS) { 1884 /* 1885 * Add extended flag entry: 1886 */ 1887 eodp->ed_ino = objnum; 1888 eodp->ed_reclen = reclen; 1889 /* NOTE: ed_off is the offset for the *next* entry */ 1890 next = &(eodp->ed_off); 1891 eodp->ed_eflags = zap.za_normalization_conflict ? 1892 ED_CASE_CONFLICT : 0; 1893 (void) strncpy(eodp->ed_name, zap.za_name, 1894 EDIRENT_NAMELEN(reclen)); 1895 eodp = (edirent_t *)((intptr_t)eodp + reclen); 1896 } else { 1897 /* 1898 * Add normal entry: 1899 */ 1900 odp->d_ino = objnum; 1901 odp->d_reclen = reclen; 1902 odp->d_namlen = strlen(zap.za_name); 1903 /* NOTE: d_off is the offset for the *next* entry. */ 1904 next = &odp->d_off; 1905 strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1); 1906 odp->d_type = type; 1907 dirent_terminate(odp); 1908 odp = (dirent64_t *)((intptr_t)odp + reclen); 1909 } 1910 outcount += reclen; 1911 1912 ASSERT3S(outcount, <=, bufsize); 1913 1914 /* Prefetch znode */ 1915 if (prefetch) 1916 dmu_prefetch(os, objnum, 0, 0, 0, 1917 ZIO_PRIORITY_SYNC_READ); 1918 1919 skip_entry: 1920 /* 1921 * Move to the next entry, fill in the previous offset. 1922 */ 1923 if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) { 1924 zap_cursor_advance(&zc); 1925 offset = zap_cursor_serialize(&zc); 1926 } else { 1927 offset += 1; 1928 } 1929 1930 /* Fill the offset right after advancing the cursor. */ 1931 if (next != NULL) 1932 *next = offset; 1933 if (cooks != NULL) { 1934 *cooks++ = offset; 1935 ncooks--; 1936 KASSERT(ncooks >= 0, ("ncookies=%d", ncooks)); 1937 } 1938 } 1939 zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */ 1940 1941 /* Subtract unused cookies */ 1942 if (ncookies != NULL) 1943 *ncookies -= ncooks; 1944 1945 if (zfs_uio_segflg(uio) == UIO_SYSSPACE && zfs_uio_iovcnt(uio) == 1) { 1946 iovp->iov_base += outcount; 1947 iovp->iov_len -= outcount; 1948 zfs_uio_resid(uio) -= outcount; 1949 } else if ((error = 1950 zfs_uiomove(outbuf, (long)outcount, UIO_READ, uio))) { 1951 /* 1952 * Reset the pointer. 1953 */ 1954 offset = zfs_uio_offset(uio); 1955 } 1956 1957 update: 1958 zap_cursor_fini(&zc); 1959 if (zfs_uio_segflg(uio) != UIO_SYSSPACE || zfs_uio_iovcnt(uio) != 1) 1960 kmem_free(outbuf, bufsize); 1961 1962 if (error == ENOENT) 1963 error = 0; 1964 1965 ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 1966 1967 zfs_uio_setoffset(uio, offset); 1968 ZFS_EXIT(zfsvfs); 1969 if (error != 0 && cookies != NULL) { 1970 free(*cookies, M_TEMP); 1971 *cookies = NULL; 1972 *ncookies = 0; 1973 } 1974 return (error); 1975 } 1976 1977 /* 1978 * Get the requested file attributes and place them in the provided 1979 * vattr structure. 1980 * 1981 * IN: vp - vnode of file. 1982 * vap - va_mask identifies requested attributes. 1983 * If AT_XVATTR set, then optional attrs are requested 1984 * flags - ATTR_NOACLCHECK (CIFS server context) 1985 * cr - credentials of caller. 1986 * 1987 * OUT: vap - attribute values. 1988 * 1989 * RETURN: 0 (always succeeds). 1990 */ 1991 /* ARGSUSED */ 1992 static int 1993 zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr) 1994 { 1995 znode_t *zp = VTOZ(vp); 1996 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1997 int error = 0; 1998 uint32_t blksize; 1999 u_longlong_t nblocks; 2000 uint64_t mtime[2], ctime[2], crtime[2], rdev; 2001 xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2002 xoptattr_t *xoap = NULL; 2003 boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2004 sa_bulk_attr_t bulk[4]; 2005 int count = 0; 2006 2007 ZFS_ENTER(zfsvfs); 2008 ZFS_VERIFY_ZP(zp); 2009 2010 zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid); 2011 2012 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 2013 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 2014 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16); 2015 if (vp->v_type == VBLK || vp->v_type == VCHR) 2016 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL, 2017 &rdev, 8); 2018 2019 if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) { 2020 ZFS_EXIT(zfsvfs); 2021 return (error); 2022 } 2023 2024 /* 2025 * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES. 2026 * Also, if we are the owner don't bother, since owner should 2027 * always be allowed to read basic attributes of file. 2028 */ 2029 if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) && 2030 (vap->va_uid != crgetuid(cr))) { 2031 if ((error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0, 2032 skipaclchk, cr))) { 2033 ZFS_EXIT(zfsvfs); 2034 return (error); 2035 } 2036 } 2037 2038 /* 2039 * Return all attributes. It's cheaper to provide the answer 2040 * than to determine whether we were asked the question. 2041 */ 2042 2043 vap->va_type = IFTOVT(zp->z_mode); 2044 vap->va_mode = zp->z_mode & ~S_IFMT; 2045 vn_fsid(vp, vap); 2046 vap->va_nodeid = zp->z_id; 2047 vap->va_nlink = zp->z_links; 2048 if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp) && 2049 zp->z_links < ZFS_LINK_MAX) 2050 vap->va_nlink++; 2051 vap->va_size = zp->z_size; 2052 if (vp->v_type == VBLK || vp->v_type == VCHR) 2053 vap->va_rdev = zfs_cmpldev(rdev); 2054 vap->va_seq = zp->z_seq; 2055 vap->va_flags = 0; /* FreeBSD: Reset chflags(2) flags. */ 2056 vap->va_filerev = zp->z_seq; 2057 2058 /* 2059 * Add in any requested optional attributes and the create time. 2060 * Also set the corresponding bits in the returned attribute bitmap. 2061 */ 2062 if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) { 2063 if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 2064 xoap->xoa_archive = 2065 ((zp->z_pflags & ZFS_ARCHIVE) != 0); 2066 XVA_SET_RTN(xvap, XAT_ARCHIVE); 2067 } 2068 2069 if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 2070 xoap->xoa_readonly = 2071 ((zp->z_pflags & ZFS_READONLY) != 0); 2072 XVA_SET_RTN(xvap, XAT_READONLY); 2073 } 2074 2075 if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 2076 xoap->xoa_system = 2077 ((zp->z_pflags & ZFS_SYSTEM) != 0); 2078 XVA_SET_RTN(xvap, XAT_SYSTEM); 2079 } 2080 2081 if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 2082 xoap->xoa_hidden = 2083 ((zp->z_pflags & ZFS_HIDDEN) != 0); 2084 XVA_SET_RTN(xvap, XAT_HIDDEN); 2085 } 2086 2087 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2088 xoap->xoa_nounlink = 2089 ((zp->z_pflags & ZFS_NOUNLINK) != 0); 2090 XVA_SET_RTN(xvap, XAT_NOUNLINK); 2091 } 2092 2093 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2094 xoap->xoa_immutable = 2095 ((zp->z_pflags & ZFS_IMMUTABLE) != 0); 2096 XVA_SET_RTN(xvap, XAT_IMMUTABLE); 2097 } 2098 2099 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2100 xoap->xoa_appendonly = 2101 ((zp->z_pflags & ZFS_APPENDONLY) != 0); 2102 XVA_SET_RTN(xvap, XAT_APPENDONLY); 2103 } 2104 2105 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2106 xoap->xoa_nodump = 2107 ((zp->z_pflags & ZFS_NODUMP) != 0); 2108 XVA_SET_RTN(xvap, XAT_NODUMP); 2109 } 2110 2111 if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 2112 xoap->xoa_opaque = 2113 ((zp->z_pflags & ZFS_OPAQUE) != 0); 2114 XVA_SET_RTN(xvap, XAT_OPAQUE); 2115 } 2116 2117 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2118 xoap->xoa_av_quarantined = 2119 ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0); 2120 XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 2121 } 2122 2123 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2124 xoap->xoa_av_modified = 2125 ((zp->z_pflags & ZFS_AV_MODIFIED) != 0); 2126 XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 2127 } 2128 2129 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) && 2130 vp->v_type == VREG) { 2131 zfs_sa_get_scanstamp(zp, xvap); 2132 } 2133 2134 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 2135 xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0); 2136 XVA_SET_RTN(xvap, XAT_REPARSE); 2137 } 2138 if (XVA_ISSET_REQ(xvap, XAT_GEN)) { 2139 xoap->xoa_generation = zp->z_gen; 2140 XVA_SET_RTN(xvap, XAT_GEN); 2141 } 2142 2143 if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { 2144 xoap->xoa_offline = 2145 ((zp->z_pflags & ZFS_OFFLINE) != 0); 2146 XVA_SET_RTN(xvap, XAT_OFFLINE); 2147 } 2148 2149 if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { 2150 xoap->xoa_sparse = 2151 ((zp->z_pflags & ZFS_SPARSE) != 0); 2152 XVA_SET_RTN(xvap, XAT_SPARSE); 2153 } 2154 2155 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) { 2156 xoap->xoa_projinherit = 2157 ((zp->z_pflags & ZFS_PROJINHERIT) != 0); 2158 XVA_SET_RTN(xvap, XAT_PROJINHERIT); 2159 } 2160 2161 if (XVA_ISSET_REQ(xvap, XAT_PROJID)) { 2162 xoap->xoa_projid = zp->z_projid; 2163 XVA_SET_RTN(xvap, XAT_PROJID); 2164 } 2165 } 2166 2167 ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime); 2168 ZFS_TIME_DECODE(&vap->va_mtime, mtime); 2169 ZFS_TIME_DECODE(&vap->va_ctime, ctime); 2170 ZFS_TIME_DECODE(&vap->va_birthtime, crtime); 2171 2172 2173 sa_object_size(zp->z_sa_hdl, &blksize, &nblocks); 2174 vap->va_blksize = blksize; 2175 vap->va_bytes = nblocks << 9; /* nblocks * 512 */ 2176 2177 if (zp->z_blksz == 0) { 2178 /* 2179 * Block size hasn't been set; suggest maximal I/O transfers. 2180 */ 2181 vap->va_blksize = zfsvfs->z_max_blksz; 2182 } 2183 2184 ZFS_EXIT(zfsvfs); 2185 return (0); 2186 } 2187 2188 /* 2189 * Set the file attributes to the values contained in the 2190 * vattr structure. 2191 * 2192 * IN: zp - znode of file to be modified. 2193 * vap - new attribute values. 2194 * If AT_XVATTR set, then optional attrs are being set 2195 * flags - ATTR_UTIME set if non-default time values provided. 2196 * - ATTR_NOACLCHECK (CIFS context only). 2197 * cr - credentials of caller. 2198 * ct - caller context 2199 * 2200 * RETURN: 0 on success, error code on failure. 2201 * 2202 * Timestamps: 2203 * vp - ctime updated, mtime updated if size changed. 2204 */ 2205 /* ARGSUSED */ 2206 int 2207 zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr) 2208 { 2209 vnode_t *vp = ZTOV(zp); 2210 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2211 objset_t *os; 2212 zilog_t *zilog; 2213 dmu_tx_t *tx; 2214 vattr_t oldva; 2215 xvattr_t tmpxvattr; 2216 uint_t mask = vap->va_mask; 2217 uint_t saved_mask = 0; 2218 uint64_t saved_mode; 2219 int trim_mask = 0; 2220 uint64_t new_mode; 2221 uint64_t new_uid, new_gid; 2222 uint64_t xattr_obj; 2223 uint64_t mtime[2], ctime[2]; 2224 uint64_t projid = ZFS_INVALID_PROJID; 2225 znode_t *attrzp; 2226 int need_policy = FALSE; 2227 int err, err2; 2228 zfs_fuid_info_t *fuidp = NULL; 2229 xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2230 xoptattr_t *xoap; 2231 zfs_acl_t *aclp; 2232 boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2233 boolean_t fuid_dirtied = B_FALSE; 2234 sa_bulk_attr_t bulk[7], xattr_bulk[7]; 2235 int count = 0, xattr_count = 0; 2236 2237 if (mask == 0) 2238 return (0); 2239 2240 if (mask & AT_NOSET) 2241 return (SET_ERROR(EINVAL)); 2242 2243 ZFS_ENTER(zfsvfs); 2244 ZFS_VERIFY_ZP(zp); 2245 2246 os = zfsvfs->z_os; 2247 zilog = zfsvfs->z_log; 2248 2249 /* 2250 * Make sure that if we have ephemeral uid/gid or xvattr specified 2251 * that file system is at proper version level 2252 */ 2253 2254 if (zfsvfs->z_use_fuids == B_FALSE && 2255 (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) || 2256 ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) || 2257 (mask & AT_XVATTR))) { 2258 ZFS_EXIT(zfsvfs); 2259 return (SET_ERROR(EINVAL)); 2260 } 2261 2262 if (mask & AT_SIZE && vp->v_type == VDIR) { 2263 ZFS_EXIT(zfsvfs); 2264 return (SET_ERROR(EISDIR)); 2265 } 2266 2267 if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) { 2268 ZFS_EXIT(zfsvfs); 2269 return (SET_ERROR(EINVAL)); 2270 } 2271 2272 /* 2273 * If this is an xvattr_t, then get a pointer to the structure of 2274 * optional attributes. If this is NULL, then we have a vattr_t. 2275 */ 2276 xoap = xva_getxoptattr(xvap); 2277 2278 xva_init(&tmpxvattr); 2279 2280 /* 2281 * Immutable files can only alter immutable bit and atime 2282 */ 2283 if ((zp->z_pflags & ZFS_IMMUTABLE) && 2284 ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) || 2285 ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) { 2286 ZFS_EXIT(zfsvfs); 2287 return (SET_ERROR(EPERM)); 2288 } 2289 2290 /* 2291 * Note: ZFS_READONLY is handled in zfs_zaccess_common. 2292 */ 2293 2294 /* 2295 * Verify timestamps doesn't overflow 32 bits. 2296 * ZFS can handle large timestamps, but 32bit syscalls can't 2297 * handle times greater than 2039. This check should be removed 2298 * once large timestamps are fully supported. 2299 */ 2300 if (mask & (AT_ATIME | AT_MTIME)) { 2301 if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) || 2302 ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) { 2303 ZFS_EXIT(zfsvfs); 2304 return (SET_ERROR(EOVERFLOW)); 2305 } 2306 } 2307 if (xoap != NULL && (mask & AT_XVATTR)) { 2308 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME) && 2309 TIMESPEC_OVERFLOW(&vap->va_birthtime)) { 2310 ZFS_EXIT(zfsvfs); 2311 return (SET_ERROR(EOVERFLOW)); 2312 } 2313 2314 if (XVA_ISSET_REQ(xvap, XAT_PROJID)) { 2315 if (!dmu_objset_projectquota_enabled(os) || 2316 (!S_ISREG(zp->z_mode) && !S_ISDIR(zp->z_mode))) { 2317 ZFS_EXIT(zfsvfs); 2318 return (SET_ERROR(EOPNOTSUPP)); 2319 } 2320 2321 projid = xoap->xoa_projid; 2322 if (unlikely(projid == ZFS_INVALID_PROJID)) { 2323 ZFS_EXIT(zfsvfs); 2324 return (SET_ERROR(EINVAL)); 2325 } 2326 2327 if (projid == zp->z_projid && zp->z_pflags & ZFS_PROJID) 2328 projid = ZFS_INVALID_PROJID; 2329 else 2330 need_policy = TRUE; 2331 } 2332 2333 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT) && 2334 (xoap->xoa_projinherit != 2335 ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) && 2336 (!dmu_objset_projectquota_enabled(os) || 2337 (!S_ISREG(zp->z_mode) && !S_ISDIR(zp->z_mode)))) { 2338 ZFS_EXIT(zfsvfs); 2339 return (SET_ERROR(EOPNOTSUPP)); 2340 } 2341 } 2342 2343 attrzp = NULL; 2344 aclp = NULL; 2345 2346 if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 2347 ZFS_EXIT(zfsvfs); 2348 return (SET_ERROR(EROFS)); 2349 } 2350 2351 /* 2352 * First validate permissions 2353 */ 2354 2355 if (mask & AT_SIZE) { 2356 /* 2357 * XXX - Note, we are not providing any open 2358 * mode flags here (like FNDELAY), so we may 2359 * block if there are locks present... this 2360 * should be addressed in openat(). 2361 */ 2362 /* XXX - would it be OK to generate a log record here? */ 2363 err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); 2364 if (err) { 2365 ZFS_EXIT(zfsvfs); 2366 return (err); 2367 } 2368 } 2369 2370 if (mask & (AT_ATIME|AT_MTIME) || 2371 ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) || 2372 XVA_ISSET_REQ(xvap, XAT_READONLY) || 2373 XVA_ISSET_REQ(xvap, XAT_ARCHIVE) || 2374 XVA_ISSET_REQ(xvap, XAT_OFFLINE) || 2375 XVA_ISSET_REQ(xvap, XAT_SPARSE) || 2376 XVA_ISSET_REQ(xvap, XAT_CREATETIME) || 2377 XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) { 2378 need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0, 2379 skipaclchk, cr); 2380 } 2381 2382 if (mask & (AT_UID|AT_GID)) { 2383 int idmask = (mask & (AT_UID|AT_GID)); 2384 int take_owner; 2385 int take_group; 2386 2387 /* 2388 * NOTE: even if a new mode is being set, 2389 * we may clear S_ISUID/S_ISGID bits. 2390 */ 2391 2392 if (!(mask & AT_MODE)) 2393 vap->va_mode = zp->z_mode; 2394 2395 /* 2396 * Take ownership or chgrp to group we are a member of 2397 */ 2398 2399 take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr)); 2400 take_group = (mask & AT_GID) && 2401 zfs_groupmember(zfsvfs, vap->va_gid, cr); 2402 2403 /* 2404 * If both AT_UID and AT_GID are set then take_owner and 2405 * take_group must both be set in order to allow taking 2406 * ownership. 2407 * 2408 * Otherwise, send the check through secpolicy_vnode_setattr() 2409 * 2410 */ 2411 2412 if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) || 2413 ((idmask == AT_UID) && take_owner) || 2414 ((idmask == AT_GID) && take_group)) { 2415 if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0, 2416 skipaclchk, cr) == 0) { 2417 /* 2418 * Remove setuid/setgid for non-privileged users 2419 */ 2420 secpolicy_setid_clear(vap, vp, cr); 2421 trim_mask = (mask & (AT_UID|AT_GID)); 2422 } else { 2423 need_policy = TRUE; 2424 } 2425 } else { 2426 need_policy = TRUE; 2427 } 2428 } 2429 2430 oldva.va_mode = zp->z_mode; 2431 zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid); 2432 if (mask & AT_XVATTR) { 2433 /* 2434 * Update xvattr mask to include only those attributes 2435 * that are actually changing. 2436 * 2437 * the bits will be restored prior to actually setting 2438 * the attributes so the caller thinks they were set. 2439 */ 2440 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2441 if (xoap->xoa_appendonly != 2442 ((zp->z_pflags & ZFS_APPENDONLY) != 0)) { 2443 need_policy = TRUE; 2444 } else { 2445 XVA_CLR_REQ(xvap, XAT_APPENDONLY); 2446 XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY); 2447 } 2448 } 2449 2450 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) { 2451 if (xoap->xoa_projinherit != 2452 ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) { 2453 need_policy = TRUE; 2454 } else { 2455 XVA_CLR_REQ(xvap, XAT_PROJINHERIT); 2456 XVA_SET_REQ(&tmpxvattr, XAT_PROJINHERIT); 2457 } 2458 } 2459 2460 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2461 if (xoap->xoa_nounlink != 2462 ((zp->z_pflags & ZFS_NOUNLINK) != 0)) { 2463 need_policy = TRUE; 2464 } else { 2465 XVA_CLR_REQ(xvap, XAT_NOUNLINK); 2466 XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK); 2467 } 2468 } 2469 2470 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2471 if (xoap->xoa_immutable != 2472 ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) { 2473 need_policy = TRUE; 2474 } else { 2475 XVA_CLR_REQ(xvap, XAT_IMMUTABLE); 2476 XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE); 2477 } 2478 } 2479 2480 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2481 if (xoap->xoa_nodump != 2482 ((zp->z_pflags & ZFS_NODUMP) != 0)) { 2483 need_policy = TRUE; 2484 } else { 2485 XVA_CLR_REQ(xvap, XAT_NODUMP); 2486 XVA_SET_REQ(&tmpxvattr, XAT_NODUMP); 2487 } 2488 } 2489 2490 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2491 if (xoap->xoa_av_modified != 2492 ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) { 2493 need_policy = TRUE; 2494 } else { 2495 XVA_CLR_REQ(xvap, XAT_AV_MODIFIED); 2496 XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED); 2497 } 2498 } 2499 2500 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2501 if ((vp->v_type != VREG && 2502 xoap->xoa_av_quarantined) || 2503 xoap->xoa_av_quarantined != 2504 ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) { 2505 need_policy = TRUE; 2506 } else { 2507 XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED); 2508 XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED); 2509 } 2510 } 2511 2512 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 2513 ZFS_EXIT(zfsvfs); 2514 return (SET_ERROR(EPERM)); 2515 } 2516 2517 if (need_policy == FALSE && 2518 (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) || 2519 XVA_ISSET_REQ(xvap, XAT_OPAQUE))) { 2520 need_policy = TRUE; 2521 } 2522 } 2523 2524 if (mask & AT_MODE) { 2525 if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) { 2526 err = secpolicy_setid_setsticky_clear(vp, vap, 2527 &oldva, cr); 2528 if (err) { 2529 ZFS_EXIT(zfsvfs); 2530 return (err); 2531 } 2532 trim_mask |= AT_MODE; 2533 } else { 2534 need_policy = TRUE; 2535 } 2536 } 2537 2538 if (need_policy) { 2539 /* 2540 * If trim_mask is set then take ownership 2541 * has been granted or write_acl is present and user 2542 * has the ability to modify mode. In that case remove 2543 * UID|GID and or MODE from mask so that 2544 * secpolicy_vnode_setattr() doesn't revoke it. 2545 */ 2546 2547 if (trim_mask) { 2548 saved_mask = vap->va_mask; 2549 vap->va_mask &= ~trim_mask; 2550 if (trim_mask & AT_MODE) { 2551 /* 2552 * Save the mode, as secpolicy_vnode_setattr() 2553 * will overwrite it with ova.va_mode. 2554 */ 2555 saved_mode = vap->va_mode; 2556 } 2557 } 2558 err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags, 2559 (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp); 2560 if (err) { 2561 ZFS_EXIT(zfsvfs); 2562 return (err); 2563 } 2564 2565 if (trim_mask) { 2566 vap->va_mask |= saved_mask; 2567 if (trim_mask & AT_MODE) { 2568 /* 2569 * Recover the mode after 2570 * secpolicy_vnode_setattr(). 2571 */ 2572 vap->va_mode = saved_mode; 2573 } 2574 } 2575 } 2576 2577 /* 2578 * secpolicy_vnode_setattr, or take ownership may have 2579 * changed va_mask 2580 */ 2581 mask = vap->va_mask; 2582 2583 if ((mask & (AT_UID | AT_GID)) || projid != ZFS_INVALID_PROJID) { 2584 err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 2585 &xattr_obj, sizeof (xattr_obj)); 2586 2587 if (err == 0 && xattr_obj) { 2588 err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp); 2589 if (err == 0) { 2590 err = vn_lock(ZTOV(attrzp), LK_EXCLUSIVE); 2591 if (err != 0) 2592 vrele(ZTOV(attrzp)); 2593 } 2594 if (err) 2595 goto out2; 2596 } 2597 if (mask & AT_UID) { 2598 new_uid = zfs_fuid_create(zfsvfs, 2599 (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp); 2600 if (new_uid != zp->z_uid && 2601 zfs_id_overquota(zfsvfs, DMU_USERUSED_OBJECT, 2602 new_uid)) { 2603 if (attrzp) 2604 vput(ZTOV(attrzp)); 2605 err = SET_ERROR(EDQUOT); 2606 goto out2; 2607 } 2608 } 2609 2610 if (mask & AT_GID) { 2611 new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid, 2612 cr, ZFS_GROUP, &fuidp); 2613 if (new_gid != zp->z_gid && 2614 zfs_id_overquota(zfsvfs, DMU_GROUPUSED_OBJECT, 2615 new_gid)) { 2616 if (attrzp) 2617 vput(ZTOV(attrzp)); 2618 err = SET_ERROR(EDQUOT); 2619 goto out2; 2620 } 2621 } 2622 2623 if (projid != ZFS_INVALID_PROJID && 2624 zfs_id_overquota(zfsvfs, DMU_PROJECTUSED_OBJECT, projid)) { 2625 if (attrzp) 2626 vput(ZTOV(attrzp)); 2627 err = SET_ERROR(EDQUOT); 2628 goto out2; 2629 } 2630 } 2631 tx = dmu_tx_create(os); 2632 2633 if (mask & AT_MODE) { 2634 uint64_t pmode = zp->z_mode; 2635 uint64_t acl_obj; 2636 new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT); 2637 2638 if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED && 2639 !(zp->z_pflags & ZFS_ACL_TRIVIAL)) { 2640 err = SET_ERROR(EPERM); 2641 goto out; 2642 } 2643 2644 if ((err = zfs_acl_chmod_setattr(zp, &aclp, new_mode))) 2645 goto out; 2646 2647 if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) { 2648 /* 2649 * Are we upgrading ACL from old V0 format 2650 * to V1 format? 2651 */ 2652 if (zfsvfs->z_version >= ZPL_VERSION_FUID && 2653 zfs_znode_acl_version(zp) == 2654 ZFS_ACL_VERSION_INITIAL) { 2655 dmu_tx_hold_free(tx, acl_obj, 0, 2656 DMU_OBJECT_END); 2657 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 2658 0, aclp->z_acl_bytes); 2659 } else { 2660 dmu_tx_hold_write(tx, acl_obj, 0, 2661 aclp->z_acl_bytes); 2662 } 2663 } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) { 2664 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 2665 0, aclp->z_acl_bytes); 2666 } 2667 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 2668 } else { 2669 if (((mask & AT_XVATTR) && 2670 XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) || 2671 (projid != ZFS_INVALID_PROJID && 2672 !(zp->z_pflags & ZFS_PROJID))) 2673 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 2674 else 2675 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 2676 } 2677 2678 if (attrzp) { 2679 dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE); 2680 } 2681 2682 fuid_dirtied = zfsvfs->z_fuid_dirty; 2683 if (fuid_dirtied) 2684 zfs_fuid_txhold(zfsvfs, tx); 2685 2686 zfs_sa_upgrade_txholds(tx, zp); 2687 2688 err = dmu_tx_assign(tx, TXG_WAIT); 2689 if (err) 2690 goto out; 2691 2692 count = 0; 2693 /* 2694 * Set each attribute requested. 2695 * We group settings according to the locks they need to acquire. 2696 * 2697 * Note: you cannot set ctime directly, although it will be 2698 * updated as a side-effect of calling this function. 2699 */ 2700 2701 if (projid != ZFS_INVALID_PROJID && !(zp->z_pflags & ZFS_PROJID)) { 2702 /* 2703 * For the existed object that is upgraded from old system, 2704 * its on-disk layout has no slot for the project ID attribute. 2705 * But quota accounting logic needs to access related slots by 2706 * offset directly. So we need to adjust old objects' layout 2707 * to make the project ID to some unified and fixed offset. 2708 */ 2709 if (attrzp) 2710 err = sa_add_projid(attrzp->z_sa_hdl, tx, projid); 2711 if (err == 0) 2712 err = sa_add_projid(zp->z_sa_hdl, tx, projid); 2713 2714 if (unlikely(err == EEXIST)) 2715 err = 0; 2716 else if (err != 0) 2717 goto out; 2718 else 2719 projid = ZFS_INVALID_PROJID; 2720 } 2721 2722 if (mask & (AT_UID|AT_GID|AT_MODE)) 2723 mutex_enter(&zp->z_acl_lock); 2724 2725 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 2726 &zp->z_pflags, sizeof (zp->z_pflags)); 2727 2728 if (attrzp) { 2729 if (mask & (AT_UID|AT_GID|AT_MODE)) 2730 mutex_enter(&attrzp->z_acl_lock); 2731 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 2732 SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags, 2733 sizeof (attrzp->z_pflags)); 2734 if (projid != ZFS_INVALID_PROJID) { 2735 attrzp->z_projid = projid; 2736 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 2737 SA_ZPL_PROJID(zfsvfs), NULL, &attrzp->z_projid, 2738 sizeof (attrzp->z_projid)); 2739 } 2740 } 2741 2742 if (mask & (AT_UID|AT_GID)) { 2743 2744 if (mask & AT_UID) { 2745 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, 2746 &new_uid, sizeof (new_uid)); 2747 zp->z_uid = new_uid; 2748 if (attrzp) { 2749 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 2750 SA_ZPL_UID(zfsvfs), NULL, &new_uid, 2751 sizeof (new_uid)); 2752 attrzp->z_uid = new_uid; 2753 } 2754 } 2755 2756 if (mask & AT_GID) { 2757 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), 2758 NULL, &new_gid, sizeof (new_gid)); 2759 zp->z_gid = new_gid; 2760 if (attrzp) { 2761 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 2762 SA_ZPL_GID(zfsvfs), NULL, &new_gid, 2763 sizeof (new_gid)); 2764 attrzp->z_gid = new_gid; 2765 } 2766 } 2767 if (!(mask & AT_MODE)) { 2768 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), 2769 NULL, &new_mode, sizeof (new_mode)); 2770 new_mode = zp->z_mode; 2771 } 2772 err = zfs_acl_chown_setattr(zp); 2773 ASSERT0(err); 2774 if (attrzp) { 2775 vn_seqc_write_begin(ZTOV(attrzp)); 2776 err = zfs_acl_chown_setattr(attrzp); 2777 vn_seqc_write_end(ZTOV(attrzp)); 2778 ASSERT0(err); 2779 } 2780 } 2781 2782 if (mask & AT_MODE) { 2783 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, 2784 &new_mode, sizeof (new_mode)); 2785 zp->z_mode = new_mode; 2786 ASSERT3P(aclp, !=, NULL); 2787 err = zfs_aclset_common(zp, aclp, cr, tx); 2788 ASSERT0(err); 2789 if (zp->z_acl_cached) 2790 zfs_acl_free(zp->z_acl_cached); 2791 zp->z_acl_cached = aclp; 2792 aclp = NULL; 2793 } 2794 2795 2796 if (mask & AT_ATIME) { 2797 ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime); 2798 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, 2799 &zp->z_atime, sizeof (zp->z_atime)); 2800 } 2801 2802 if (mask & AT_MTIME) { 2803 ZFS_TIME_ENCODE(&vap->va_mtime, mtime); 2804 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 2805 mtime, sizeof (mtime)); 2806 } 2807 2808 if (projid != ZFS_INVALID_PROJID) { 2809 zp->z_projid = projid; 2810 SA_ADD_BULK_ATTR(bulk, count, 2811 SA_ZPL_PROJID(zfsvfs), NULL, &zp->z_projid, 2812 sizeof (zp->z_projid)); 2813 } 2814 2815 /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */ 2816 if (mask & AT_SIZE && !(mask & AT_MTIME)) { 2817 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), 2818 NULL, mtime, sizeof (mtime)); 2819 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 2820 &ctime, sizeof (ctime)); 2821 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime); 2822 } else if (mask != 0) { 2823 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 2824 &ctime, sizeof (ctime)); 2825 zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime); 2826 if (attrzp) { 2827 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 2828 SA_ZPL_CTIME(zfsvfs), NULL, 2829 &ctime, sizeof (ctime)); 2830 zfs_tstamp_update_setup(attrzp, STATE_CHANGED, 2831 mtime, ctime); 2832 } 2833 } 2834 2835 /* 2836 * Do this after setting timestamps to prevent timestamp 2837 * update from toggling bit 2838 */ 2839 2840 if (xoap && (mask & AT_XVATTR)) { 2841 2842 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) 2843 xoap->xoa_createtime = vap->va_birthtime; 2844 /* 2845 * restore trimmed off masks 2846 * so that return masks can be set for caller. 2847 */ 2848 2849 if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) { 2850 XVA_SET_REQ(xvap, XAT_APPENDONLY); 2851 } 2852 if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) { 2853 XVA_SET_REQ(xvap, XAT_NOUNLINK); 2854 } 2855 if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) { 2856 XVA_SET_REQ(xvap, XAT_IMMUTABLE); 2857 } 2858 if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) { 2859 XVA_SET_REQ(xvap, XAT_NODUMP); 2860 } 2861 if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) { 2862 XVA_SET_REQ(xvap, XAT_AV_MODIFIED); 2863 } 2864 if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) { 2865 XVA_SET_REQ(xvap, XAT_AV_QUARANTINED); 2866 } 2867 if (XVA_ISSET_REQ(&tmpxvattr, XAT_PROJINHERIT)) { 2868 XVA_SET_REQ(xvap, XAT_PROJINHERIT); 2869 } 2870 2871 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 2872 ASSERT3S(vp->v_type, ==, VREG); 2873 2874 zfs_xvattr_set(zp, xvap, tx); 2875 } 2876 2877 if (fuid_dirtied) 2878 zfs_fuid_sync(zfsvfs, tx); 2879 2880 if (mask != 0) 2881 zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp); 2882 2883 if (mask & (AT_UID|AT_GID|AT_MODE)) 2884 mutex_exit(&zp->z_acl_lock); 2885 2886 if (attrzp) { 2887 if (mask & (AT_UID|AT_GID|AT_MODE)) 2888 mutex_exit(&attrzp->z_acl_lock); 2889 } 2890 out: 2891 if (err == 0 && attrzp) { 2892 err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk, 2893 xattr_count, tx); 2894 ASSERT0(err2); 2895 } 2896 2897 if (attrzp) 2898 vput(ZTOV(attrzp)); 2899 2900 if (aclp) 2901 zfs_acl_free(aclp); 2902 2903 if (fuidp) { 2904 zfs_fuid_info_free(fuidp); 2905 fuidp = NULL; 2906 } 2907 2908 if (err) { 2909 dmu_tx_abort(tx); 2910 } else { 2911 err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 2912 dmu_tx_commit(tx); 2913 } 2914 2915 out2: 2916 if (os->os_sync == ZFS_SYNC_ALWAYS) 2917 zil_commit(zilog, 0); 2918 2919 ZFS_EXIT(zfsvfs); 2920 return (err); 2921 } 2922 2923 /* 2924 * Look up the directory entries corresponding to the source and target 2925 * directory/name pairs. 2926 */ 2927 static int 2928 zfs_rename_relock_lookup(znode_t *sdzp, const struct componentname *scnp, 2929 znode_t **szpp, znode_t *tdzp, const struct componentname *tcnp, 2930 znode_t **tzpp) 2931 { 2932 zfsvfs_t *zfsvfs; 2933 znode_t *szp, *tzp; 2934 int error; 2935 2936 /* 2937 * Before using sdzp and tdzp we must ensure that they are live. 2938 * As a porting legacy from illumos we have two things to worry 2939 * about. One is typical for FreeBSD and it is that the vnode is 2940 * not reclaimed (doomed). The other is that the znode is live. 2941 * The current code can invalidate the znode without acquiring the 2942 * corresponding vnode lock if the object represented by the znode 2943 * and vnode is no longer valid after a rollback or receive operation. 2944 * z_teardown_lock hidden behind ZFS_ENTER and ZFS_EXIT is the lock 2945 * that protects the znodes from the invalidation. 2946 */ 2947 zfsvfs = sdzp->z_zfsvfs; 2948 ASSERT3P(zfsvfs, ==, tdzp->z_zfsvfs); 2949 ZFS_ENTER(zfsvfs); 2950 ZFS_VERIFY_ZP(sdzp); 2951 ZFS_VERIFY_ZP(tdzp); 2952 2953 /* 2954 * Re-resolve svp to be certain it still exists and fetch the 2955 * correct vnode. 2956 */ 2957 error = zfs_dirent_lookup(sdzp, scnp->cn_nameptr, &szp, ZEXISTS); 2958 if (error != 0) { 2959 /* Source entry invalid or not there. */ 2960 if ((scnp->cn_flags & ISDOTDOT) != 0 || 2961 (scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.')) 2962 error = SET_ERROR(EINVAL); 2963 goto out; 2964 } 2965 *szpp = szp; 2966 2967 /* 2968 * Re-resolve tvp, if it disappeared we just carry on. 2969 */ 2970 error = zfs_dirent_lookup(tdzp, tcnp->cn_nameptr, &tzp, 0); 2971 if (error != 0) { 2972 vrele(ZTOV(szp)); 2973 if ((tcnp->cn_flags & ISDOTDOT) != 0) 2974 error = SET_ERROR(EINVAL); 2975 goto out; 2976 } 2977 *tzpp = tzp; 2978 out: 2979 ZFS_EXIT(zfsvfs); 2980 return (error); 2981 } 2982 2983 /* 2984 * We acquire all but fdvp locks using non-blocking acquisitions. If we 2985 * fail to acquire any lock in the path we will drop all held locks, 2986 * acquire the new lock in a blocking fashion, and then release it and 2987 * restart the rename. This acquire/release step ensures that we do not 2988 * spin on a lock waiting for release. On error release all vnode locks 2989 * and decrement references the way tmpfs_rename() would do. 2990 */ 2991 static int 2992 zfs_rename_relock(struct vnode *sdvp, struct vnode **svpp, 2993 struct vnode *tdvp, struct vnode **tvpp, 2994 const struct componentname *scnp, const struct componentname *tcnp) 2995 { 2996 struct vnode *nvp, *svp, *tvp; 2997 znode_t *sdzp, *tdzp, *szp, *tzp; 2998 int error; 2999 3000 VOP_UNLOCK1(tdvp); 3001 if (*tvpp != NULL && *tvpp != tdvp) 3002 VOP_UNLOCK1(*tvpp); 3003 3004 relock: 3005 error = vn_lock(sdvp, LK_EXCLUSIVE); 3006 if (error) 3007 goto out; 3008 error = vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT); 3009 if (error != 0) { 3010 VOP_UNLOCK1(sdvp); 3011 if (error != EBUSY) 3012 goto out; 3013 error = vn_lock(tdvp, LK_EXCLUSIVE); 3014 if (error) 3015 goto out; 3016 VOP_UNLOCK1(tdvp); 3017 goto relock; 3018 } 3019 tdzp = VTOZ(tdvp); 3020 sdzp = VTOZ(sdvp); 3021 3022 error = zfs_rename_relock_lookup(sdzp, scnp, &szp, tdzp, tcnp, &tzp); 3023 if (error != 0) { 3024 VOP_UNLOCK1(sdvp); 3025 VOP_UNLOCK1(tdvp); 3026 goto out; 3027 } 3028 svp = ZTOV(szp); 3029 tvp = tzp != NULL ? ZTOV(tzp) : NULL; 3030 3031 /* 3032 * Now try acquire locks on svp and tvp. 3033 */ 3034 nvp = svp; 3035 error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT); 3036 if (error != 0) { 3037 VOP_UNLOCK1(sdvp); 3038 VOP_UNLOCK1(tdvp); 3039 if (tvp != NULL) 3040 vrele(tvp); 3041 if (error != EBUSY) { 3042 vrele(nvp); 3043 goto out; 3044 } 3045 error = vn_lock(nvp, LK_EXCLUSIVE); 3046 if (error != 0) { 3047 vrele(nvp); 3048 goto out; 3049 } 3050 VOP_UNLOCK1(nvp); 3051 /* 3052 * Concurrent rename race. 3053 * XXX ? 3054 */ 3055 if (nvp == tdvp) { 3056 vrele(nvp); 3057 error = SET_ERROR(EINVAL); 3058 goto out; 3059 } 3060 vrele(*svpp); 3061 *svpp = nvp; 3062 goto relock; 3063 } 3064 vrele(*svpp); 3065 *svpp = nvp; 3066 3067 if (*tvpp != NULL) 3068 vrele(*tvpp); 3069 *tvpp = NULL; 3070 if (tvp != NULL) { 3071 nvp = tvp; 3072 error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT); 3073 if (error != 0) { 3074 VOP_UNLOCK1(sdvp); 3075 VOP_UNLOCK1(tdvp); 3076 VOP_UNLOCK1(*svpp); 3077 if (error != EBUSY) { 3078 vrele(nvp); 3079 goto out; 3080 } 3081 error = vn_lock(nvp, LK_EXCLUSIVE); 3082 if (error != 0) { 3083 vrele(nvp); 3084 goto out; 3085 } 3086 vput(nvp); 3087 goto relock; 3088 } 3089 *tvpp = nvp; 3090 } 3091 3092 return (0); 3093 3094 out: 3095 return (error); 3096 } 3097 3098 /* 3099 * Note that we must use VRELE_ASYNC in this function as it walks 3100 * up the directory tree and vrele may need to acquire an exclusive 3101 * lock if a last reference to a vnode is dropped. 3102 */ 3103 static int 3104 zfs_rename_check(znode_t *szp, znode_t *sdzp, znode_t *tdzp) 3105 { 3106 zfsvfs_t *zfsvfs; 3107 znode_t *zp, *zp1; 3108 uint64_t parent; 3109 int error; 3110 3111 zfsvfs = tdzp->z_zfsvfs; 3112 if (tdzp == szp) 3113 return (SET_ERROR(EINVAL)); 3114 if (tdzp == sdzp) 3115 return (0); 3116 if (tdzp->z_id == zfsvfs->z_root) 3117 return (0); 3118 zp = tdzp; 3119 for (;;) { 3120 ASSERT(!zp->z_unlinked); 3121 if ((error = sa_lookup(zp->z_sa_hdl, 3122 SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0) 3123 break; 3124 3125 if (parent == szp->z_id) { 3126 error = SET_ERROR(EINVAL); 3127 break; 3128 } 3129 if (parent == zfsvfs->z_root) 3130 break; 3131 if (parent == sdzp->z_id) 3132 break; 3133 3134 error = zfs_zget(zfsvfs, parent, &zp1); 3135 if (error != 0) 3136 break; 3137 3138 if (zp != tdzp) 3139 VN_RELE_ASYNC(ZTOV(zp), 3140 dsl_pool_zrele_taskq( 3141 dmu_objset_pool(zfsvfs->z_os))); 3142 zp = zp1; 3143 } 3144 3145 if (error == ENOTDIR) 3146 panic("checkpath: .. not a directory\n"); 3147 if (zp != tdzp) 3148 VN_RELE_ASYNC(ZTOV(zp), 3149 dsl_pool_zrele_taskq(dmu_objset_pool(zfsvfs->z_os))); 3150 return (error); 3151 } 3152 3153 #if __FreeBSD_version < 1300124 3154 static void 3155 cache_vop_rename(struct vnode *fdvp, struct vnode *fvp, struct vnode *tdvp, 3156 struct vnode *tvp, struct componentname *fcnp, struct componentname *tcnp) 3157 { 3158 3159 cache_purge(fvp); 3160 if (tvp != NULL) 3161 cache_purge(tvp); 3162 cache_purge_negative(tdvp); 3163 } 3164 #endif 3165 3166 static int 3167 zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp, 3168 vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp, 3169 cred_t *cr); 3170 3171 /* 3172 * Move an entry from the provided source directory to the target 3173 * directory. Change the entry name as indicated. 3174 * 3175 * IN: sdvp - Source directory containing the "old entry". 3176 * scnp - Old entry name. 3177 * tdvp - Target directory to contain the "new entry". 3178 * tcnp - New entry name. 3179 * cr - credentials of caller. 3180 * INOUT: svpp - Source file 3181 * tvpp - Target file, may point to NULL initially 3182 * 3183 * RETURN: 0 on success, error code on failure. 3184 * 3185 * Timestamps: 3186 * sdvp,tdvp - ctime|mtime updated 3187 */ 3188 /*ARGSUSED*/ 3189 static int 3190 zfs_do_rename(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp, 3191 vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp, 3192 cred_t *cr) 3193 { 3194 int error; 3195 3196 ASSERT_VOP_ELOCKED(tdvp, __func__); 3197 if (*tvpp != NULL) 3198 ASSERT_VOP_ELOCKED(*tvpp, __func__); 3199 3200 /* Reject renames across filesystems. */ 3201 if ((*svpp)->v_mount != tdvp->v_mount || 3202 ((*tvpp) != NULL && (*svpp)->v_mount != (*tvpp)->v_mount)) { 3203 error = SET_ERROR(EXDEV); 3204 goto out; 3205 } 3206 3207 if (zfsctl_is_node(tdvp)) { 3208 error = SET_ERROR(EXDEV); 3209 goto out; 3210 } 3211 3212 /* 3213 * Lock all four vnodes to ensure safety and semantics of renaming. 3214 */ 3215 error = zfs_rename_relock(sdvp, svpp, tdvp, tvpp, scnp, tcnp); 3216 if (error != 0) { 3217 /* no vnodes are locked in the case of error here */ 3218 return (error); 3219 } 3220 3221 error = zfs_do_rename_impl(sdvp, svpp, scnp, tdvp, tvpp, tcnp, cr); 3222 VOP_UNLOCK1(sdvp); 3223 VOP_UNLOCK1(*svpp); 3224 out: 3225 if (*tvpp != NULL) 3226 VOP_UNLOCK1(*tvpp); 3227 if (tdvp != *tvpp) 3228 VOP_UNLOCK1(tdvp); 3229 3230 return (error); 3231 } 3232 3233 static int 3234 zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp, 3235 vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp, 3236 cred_t *cr) 3237 { 3238 dmu_tx_t *tx; 3239 zfsvfs_t *zfsvfs; 3240 zilog_t *zilog; 3241 znode_t *tdzp, *sdzp, *tzp, *szp; 3242 const char *snm = scnp->cn_nameptr; 3243 const char *tnm = tcnp->cn_nameptr; 3244 int error; 3245 3246 tdzp = VTOZ(tdvp); 3247 sdzp = VTOZ(sdvp); 3248 zfsvfs = tdzp->z_zfsvfs; 3249 3250 ZFS_ENTER(zfsvfs); 3251 ZFS_VERIFY_ZP(tdzp); 3252 ZFS_VERIFY_ZP(sdzp); 3253 zilog = zfsvfs->z_log; 3254 3255 if (zfsvfs->z_utf8 && u8_validate(tnm, 3256 strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3257 error = SET_ERROR(EILSEQ); 3258 goto out; 3259 } 3260 3261 /* If source and target are the same file, there is nothing to do. */ 3262 if ((*svpp) == (*tvpp)) { 3263 error = 0; 3264 goto out; 3265 } 3266 3267 if (((*svpp)->v_type == VDIR && (*svpp)->v_mountedhere != NULL) || 3268 ((*tvpp) != NULL && (*tvpp)->v_type == VDIR && 3269 (*tvpp)->v_mountedhere != NULL)) { 3270 error = SET_ERROR(EXDEV); 3271 goto out; 3272 } 3273 3274 szp = VTOZ(*svpp); 3275 ZFS_VERIFY_ZP(szp); 3276 tzp = *tvpp == NULL ? NULL : VTOZ(*tvpp); 3277 if (tzp != NULL) 3278 ZFS_VERIFY_ZP(tzp); 3279 3280 /* 3281 * This is to prevent the creation of links into attribute space 3282 * by renaming a linked file into/outof an attribute directory. 3283 * See the comment in zfs_link() for why this is considered bad. 3284 */ 3285 if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) { 3286 error = SET_ERROR(EINVAL); 3287 goto out; 3288 } 3289 3290 /* 3291 * If we are using project inheritance, means if the directory has 3292 * ZFS_PROJINHERIT set, then its descendant directories will inherit 3293 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under 3294 * such case, we only allow renames into our tree when the project 3295 * IDs are the same. 3296 */ 3297 if (tdzp->z_pflags & ZFS_PROJINHERIT && 3298 tdzp->z_projid != szp->z_projid) { 3299 error = SET_ERROR(EXDEV); 3300 goto out; 3301 } 3302 3303 /* 3304 * Must have write access at the source to remove the old entry 3305 * and write access at the target to create the new entry. 3306 * Note that if target and source are the same, this can be 3307 * done in a single check. 3308 */ 3309 if ((error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr))) 3310 goto out; 3311 3312 if ((*svpp)->v_type == VDIR) { 3313 /* 3314 * Avoid ".", "..", and aliases of "." for obvious reasons. 3315 */ 3316 if ((scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.') || 3317 sdzp == szp || 3318 (scnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) { 3319 error = EINVAL; 3320 goto out; 3321 } 3322 3323 /* 3324 * Check to make sure rename is valid. 3325 * Can't do a move like this: /usr/a/b to /usr/a/b/c/d 3326 */ 3327 if ((error = zfs_rename_check(szp, sdzp, tdzp))) 3328 goto out; 3329 } 3330 3331 /* 3332 * Does target exist? 3333 */ 3334 if (tzp) { 3335 /* 3336 * Source and target must be the same type. 3337 */ 3338 if ((*svpp)->v_type == VDIR) { 3339 if ((*tvpp)->v_type != VDIR) { 3340 error = SET_ERROR(ENOTDIR); 3341 goto out; 3342 } else { 3343 cache_purge(tdvp); 3344 if (sdvp != tdvp) 3345 cache_purge(sdvp); 3346 } 3347 } else { 3348 if ((*tvpp)->v_type == VDIR) { 3349 error = SET_ERROR(EISDIR); 3350 goto out; 3351 } 3352 } 3353 } 3354 3355 vn_seqc_write_begin(*svpp); 3356 vn_seqc_write_begin(sdvp); 3357 if (*tvpp != NULL) 3358 vn_seqc_write_begin(*tvpp); 3359 if (tdvp != *tvpp) 3360 vn_seqc_write_begin(tdvp); 3361 3362 vnevent_rename_src(*svpp, sdvp, scnp->cn_nameptr, ct); 3363 if (tzp) 3364 vnevent_rename_dest(*tvpp, tdvp, tnm, ct); 3365 3366 /* 3367 * notify the target directory if it is not the same 3368 * as source directory. 3369 */ 3370 if (tdvp != sdvp) { 3371 vnevent_rename_dest_dir(tdvp, ct); 3372 } 3373 3374 tx = dmu_tx_create(zfsvfs->z_os); 3375 dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 3376 dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE); 3377 dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); 3378 dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm); 3379 if (sdzp != tdzp) { 3380 dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE); 3381 zfs_sa_upgrade_txholds(tx, tdzp); 3382 } 3383 if (tzp) { 3384 dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE); 3385 zfs_sa_upgrade_txholds(tx, tzp); 3386 } 3387 3388 zfs_sa_upgrade_txholds(tx, szp); 3389 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 3390 error = dmu_tx_assign(tx, TXG_WAIT); 3391 if (error) { 3392 dmu_tx_abort(tx); 3393 goto out_seq; 3394 } 3395 3396 if (tzp) /* Attempt to remove the existing target */ 3397 error = zfs_link_destroy(tdzp, tnm, tzp, tx, 0, NULL); 3398 3399 if (error == 0) { 3400 error = zfs_link_create(tdzp, tnm, szp, tx, ZRENAMING); 3401 if (error == 0) { 3402 szp->z_pflags |= ZFS_AV_MODIFIED; 3403 3404 error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), 3405 (void *)&szp->z_pflags, sizeof (uint64_t), tx); 3406 ASSERT0(error); 3407 3408 error = zfs_link_destroy(sdzp, snm, szp, tx, ZRENAMING, 3409 NULL); 3410 if (error == 0) { 3411 zfs_log_rename(zilog, tx, TX_RENAME, sdzp, 3412 snm, tdzp, tnm, szp); 3413 3414 /* 3415 * Update path information for the target vnode 3416 */ 3417 vn_renamepath(tdvp, *svpp, tnm, strlen(tnm)); 3418 } else { 3419 /* 3420 * At this point, we have successfully created 3421 * the target name, but have failed to remove 3422 * the source name. Since the create was done 3423 * with the ZRENAMING flag, there are 3424 * complications; for one, the link count is 3425 * wrong. The easiest way to deal with this 3426 * is to remove the newly created target, and 3427 * return the original error. This must 3428 * succeed; fortunately, it is very unlikely to 3429 * fail, since we just created it. 3430 */ 3431 VERIFY0(zfs_link_destroy(tdzp, tnm, szp, tx, 3432 ZRENAMING, NULL)); 3433 } 3434 } 3435 if (error == 0) { 3436 cache_vop_rename(sdvp, *svpp, tdvp, *tvpp, scnp, tcnp); 3437 } 3438 } 3439 3440 dmu_tx_commit(tx); 3441 3442 out_seq: 3443 vn_seqc_write_end(*svpp); 3444 vn_seqc_write_end(sdvp); 3445 if (*tvpp != NULL) 3446 vn_seqc_write_end(*tvpp); 3447 if (tdvp != *tvpp) 3448 vn_seqc_write_end(tdvp); 3449 3450 out: 3451 if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 3452 zil_commit(zilog, 0); 3453 ZFS_EXIT(zfsvfs); 3454 3455 return (error); 3456 } 3457 3458 int 3459 zfs_rename(znode_t *sdzp, const char *sname, znode_t *tdzp, const char *tname, 3460 cred_t *cr, int flags) 3461 { 3462 struct componentname scn, tcn; 3463 vnode_t *sdvp, *tdvp; 3464 vnode_t *svp, *tvp; 3465 int error; 3466 svp = tvp = NULL; 3467 3468 sdvp = ZTOV(sdzp); 3469 tdvp = ZTOV(tdzp); 3470 error = zfs_lookup_internal(sdzp, sname, &svp, &scn, DELETE); 3471 if (sdzp->z_zfsvfs->z_replay == B_FALSE) 3472 VOP_UNLOCK1(sdvp); 3473 if (error != 0) 3474 goto fail; 3475 VOP_UNLOCK1(svp); 3476 3477 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY); 3478 error = zfs_lookup_internal(tdzp, tname, &tvp, &tcn, RENAME); 3479 if (error == EJUSTRETURN) 3480 tvp = NULL; 3481 else if (error != 0) { 3482 VOP_UNLOCK1(tdvp); 3483 goto fail; 3484 } 3485 3486 error = zfs_do_rename(sdvp, &svp, &scn, tdvp, &tvp, &tcn, cr); 3487 fail: 3488 if (svp != NULL) 3489 vrele(svp); 3490 if (tvp != NULL) 3491 vrele(tvp); 3492 3493 return (error); 3494 } 3495 3496 /* 3497 * Insert the indicated symbolic reference entry into the directory. 3498 * 3499 * IN: dvp - Directory to contain new symbolic link. 3500 * link - Name for new symlink entry. 3501 * vap - Attributes of new entry. 3502 * cr - credentials of caller. 3503 * ct - caller context 3504 * flags - case flags 3505 * 3506 * RETURN: 0 on success, error code on failure. 3507 * 3508 * Timestamps: 3509 * dvp - ctime|mtime updated 3510 */ 3511 /*ARGSUSED*/ 3512 int 3513 zfs_symlink(znode_t *dzp, const char *name, vattr_t *vap, 3514 const char *link, znode_t **zpp, cred_t *cr, int flags) 3515 { 3516 znode_t *zp; 3517 dmu_tx_t *tx; 3518 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 3519 zilog_t *zilog; 3520 uint64_t len = strlen(link); 3521 int error; 3522 zfs_acl_ids_t acl_ids; 3523 boolean_t fuid_dirtied; 3524 uint64_t txtype = TX_SYMLINK; 3525 3526 ASSERT3S(vap->va_type, ==, VLNK); 3527 3528 ZFS_ENTER(zfsvfs); 3529 ZFS_VERIFY_ZP(dzp); 3530 zilog = zfsvfs->z_log; 3531 3532 if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 3533 NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3534 ZFS_EXIT(zfsvfs); 3535 return (SET_ERROR(EILSEQ)); 3536 } 3537 3538 if (len > MAXPATHLEN) { 3539 ZFS_EXIT(zfsvfs); 3540 return (SET_ERROR(ENAMETOOLONG)); 3541 } 3542 3543 if ((error = zfs_acl_ids_create(dzp, 0, 3544 vap, cr, NULL, &acl_ids)) != 0) { 3545 ZFS_EXIT(zfsvfs); 3546 return (error); 3547 } 3548 3549 /* 3550 * Attempt to lock directory; fail if entry already exists. 3551 */ 3552 error = zfs_dirent_lookup(dzp, name, &zp, ZNEW); 3553 if (error) { 3554 zfs_acl_ids_free(&acl_ids); 3555 ZFS_EXIT(zfsvfs); 3556 return (error); 3557 } 3558 3559 if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) { 3560 zfs_acl_ids_free(&acl_ids); 3561 ZFS_EXIT(zfsvfs); 3562 return (error); 3563 } 3564 3565 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, 3566 0 /* projid */)) { 3567 zfs_acl_ids_free(&acl_ids); 3568 ZFS_EXIT(zfsvfs); 3569 return (SET_ERROR(EDQUOT)); 3570 } 3571 3572 getnewvnode_reserve_(); 3573 tx = dmu_tx_create(zfsvfs->z_os); 3574 fuid_dirtied = zfsvfs->z_fuid_dirty; 3575 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len)); 3576 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 3577 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 3578 ZFS_SA_BASE_ATTR_SIZE + len); 3579 dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 3580 if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 3581 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 3582 acl_ids.z_aclp->z_acl_bytes); 3583 } 3584 if (fuid_dirtied) 3585 zfs_fuid_txhold(zfsvfs, tx); 3586 error = dmu_tx_assign(tx, TXG_WAIT); 3587 if (error) { 3588 zfs_acl_ids_free(&acl_ids); 3589 dmu_tx_abort(tx); 3590 getnewvnode_drop_reserve(); 3591 ZFS_EXIT(zfsvfs); 3592 return (error); 3593 } 3594 3595 /* 3596 * Create a new object for the symlink. 3597 * for version 4 ZPL datasets the symlink will be an SA attribute 3598 */ 3599 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 3600 3601 if (fuid_dirtied) 3602 zfs_fuid_sync(zfsvfs, tx); 3603 3604 if (zp->z_is_sa) 3605 error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs), 3606 __DECONST(void *, link), len, tx); 3607 else 3608 zfs_sa_symlink(zp, __DECONST(char *, link), len, tx); 3609 3610 zp->z_size = len; 3611 (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 3612 &zp->z_size, sizeof (zp->z_size), tx); 3613 /* 3614 * Insert the new object into the directory. 3615 */ 3616 (void) zfs_link_create(dzp, name, zp, tx, ZNEW); 3617 3618 zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); 3619 *zpp = zp; 3620 3621 zfs_acl_ids_free(&acl_ids); 3622 3623 dmu_tx_commit(tx); 3624 3625 getnewvnode_drop_reserve(); 3626 3627 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 3628 zil_commit(zilog, 0); 3629 3630 ZFS_EXIT(zfsvfs); 3631 return (error); 3632 } 3633 3634 /* 3635 * Return, in the buffer contained in the provided uio structure, 3636 * the symbolic path referred to by vp. 3637 * 3638 * IN: vp - vnode of symbolic link. 3639 * uio - structure to contain the link path. 3640 * cr - credentials of caller. 3641 * ct - caller context 3642 * 3643 * OUT: uio - structure containing the link path. 3644 * 3645 * RETURN: 0 on success, error code on failure. 3646 * 3647 * Timestamps: 3648 * vp - atime updated 3649 */ 3650 /* ARGSUSED */ 3651 static int 3652 zfs_readlink(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, caller_context_t *ct) 3653 { 3654 znode_t *zp = VTOZ(vp); 3655 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3656 int error; 3657 3658 ZFS_ENTER(zfsvfs); 3659 ZFS_VERIFY_ZP(zp); 3660 3661 if (zp->z_is_sa) 3662 error = sa_lookup_uio(zp->z_sa_hdl, 3663 SA_ZPL_SYMLINK(zfsvfs), uio); 3664 else 3665 error = zfs_sa_readlink(zp, uio); 3666 3667 ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 3668 3669 ZFS_EXIT(zfsvfs); 3670 return (error); 3671 } 3672 3673 /* 3674 * Insert a new entry into directory tdvp referencing svp. 3675 * 3676 * IN: tdvp - Directory to contain new entry. 3677 * svp - vnode of new entry. 3678 * name - name of new entry. 3679 * cr - credentials of caller. 3680 * 3681 * RETURN: 0 on success, error code on failure. 3682 * 3683 * Timestamps: 3684 * tdvp - ctime|mtime updated 3685 * svp - ctime updated 3686 */ 3687 /* ARGSUSED */ 3688 int 3689 zfs_link(znode_t *tdzp, znode_t *szp, const char *name, cred_t *cr, 3690 int flags) 3691 { 3692 znode_t *tzp; 3693 zfsvfs_t *zfsvfs = tdzp->z_zfsvfs; 3694 zilog_t *zilog; 3695 dmu_tx_t *tx; 3696 int error; 3697 uint64_t parent; 3698 uid_t owner; 3699 3700 ASSERT3S(ZTOV(tdzp)->v_type, ==, VDIR); 3701 3702 ZFS_ENTER(zfsvfs); 3703 ZFS_VERIFY_ZP(tdzp); 3704 zilog = zfsvfs->z_log; 3705 3706 /* 3707 * POSIX dictates that we return EPERM here. 3708 * Better choices include ENOTSUP or EISDIR. 3709 */ 3710 if (ZTOV(szp)->v_type == VDIR) { 3711 ZFS_EXIT(zfsvfs); 3712 return (SET_ERROR(EPERM)); 3713 } 3714 3715 ZFS_VERIFY_ZP(szp); 3716 3717 /* 3718 * If we are using project inheritance, means if the directory has 3719 * ZFS_PROJINHERIT set, then its descendant directories will inherit 3720 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under 3721 * such case, we only allow hard link creation in our tree when the 3722 * project IDs are the same. 3723 */ 3724 if (tdzp->z_pflags & ZFS_PROJINHERIT && 3725 tdzp->z_projid != szp->z_projid) { 3726 ZFS_EXIT(zfsvfs); 3727 return (SET_ERROR(EXDEV)); 3728 } 3729 3730 if (szp->z_pflags & (ZFS_APPENDONLY | 3731 ZFS_IMMUTABLE | ZFS_READONLY)) { 3732 ZFS_EXIT(zfsvfs); 3733 return (SET_ERROR(EPERM)); 3734 } 3735 3736 /* Prevent links to .zfs/shares files */ 3737 3738 if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 3739 &parent, sizeof (uint64_t))) != 0) { 3740 ZFS_EXIT(zfsvfs); 3741 return (error); 3742 } 3743 if (parent == zfsvfs->z_shares_dir) { 3744 ZFS_EXIT(zfsvfs); 3745 return (SET_ERROR(EPERM)); 3746 } 3747 3748 if (zfsvfs->z_utf8 && u8_validate(name, 3749 strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3750 ZFS_EXIT(zfsvfs); 3751 return (SET_ERROR(EILSEQ)); 3752 } 3753 3754 /* 3755 * We do not support links between attributes and non-attributes 3756 * because of the potential security risk of creating links 3757 * into "normal" file space in order to circumvent restrictions 3758 * imposed in attribute space. 3759 */ 3760 if ((szp->z_pflags & ZFS_XATTR) != (tdzp->z_pflags & ZFS_XATTR)) { 3761 ZFS_EXIT(zfsvfs); 3762 return (SET_ERROR(EINVAL)); 3763 } 3764 3765 3766 owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER); 3767 if (owner != crgetuid(cr) && secpolicy_basic_link(ZTOV(szp), cr) != 0) { 3768 ZFS_EXIT(zfsvfs); 3769 return (SET_ERROR(EPERM)); 3770 } 3771 3772 if ((error = zfs_zaccess(tdzp, ACE_ADD_FILE, 0, B_FALSE, cr))) { 3773 ZFS_EXIT(zfsvfs); 3774 return (error); 3775 } 3776 3777 /* 3778 * Attempt to lock directory; fail if entry already exists. 3779 */ 3780 error = zfs_dirent_lookup(tdzp, name, &tzp, ZNEW); 3781 if (error) { 3782 ZFS_EXIT(zfsvfs); 3783 return (error); 3784 } 3785 3786 tx = dmu_tx_create(zfsvfs->z_os); 3787 dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 3788 dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, name); 3789 zfs_sa_upgrade_txholds(tx, szp); 3790 zfs_sa_upgrade_txholds(tx, tdzp); 3791 error = dmu_tx_assign(tx, TXG_WAIT); 3792 if (error) { 3793 dmu_tx_abort(tx); 3794 ZFS_EXIT(zfsvfs); 3795 return (error); 3796 } 3797 3798 error = zfs_link_create(tdzp, name, szp, tx, 0); 3799 3800 if (error == 0) { 3801 uint64_t txtype = TX_LINK; 3802 zfs_log_link(zilog, tx, txtype, tdzp, szp, name); 3803 } 3804 3805 dmu_tx_commit(tx); 3806 3807 if (error == 0) { 3808 vnevent_link(ZTOV(szp), ct); 3809 } 3810 3811 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 3812 zil_commit(zilog, 0); 3813 3814 ZFS_EXIT(zfsvfs); 3815 return (error); 3816 } 3817 3818 /* 3819 * Free or allocate space in a file. Currently, this function only 3820 * supports the `F_FREESP' command. However, this command is somewhat 3821 * misnamed, as its functionality includes the ability to allocate as 3822 * well as free space. 3823 * 3824 * IN: ip - inode of file to free data in. 3825 * cmd - action to take (only F_FREESP supported). 3826 * bfp - section of file to free/alloc. 3827 * flag - current file open mode flags. 3828 * offset - current file offset. 3829 * cr - credentials of caller. 3830 * 3831 * RETURN: 0 on success, error code on failure. 3832 * 3833 * Timestamps: 3834 * ip - ctime|mtime updated 3835 */ 3836 /* ARGSUSED */ 3837 int 3838 zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag, 3839 offset_t offset, cred_t *cr) 3840 { 3841 zfsvfs_t *zfsvfs = ZTOZSB(zp); 3842 uint64_t off, len; 3843 int error; 3844 3845 ZFS_ENTER(zfsvfs); 3846 ZFS_VERIFY_ZP(zp); 3847 3848 if (cmd != F_FREESP) { 3849 ZFS_EXIT(zfsvfs); 3850 return (SET_ERROR(EINVAL)); 3851 } 3852 3853 /* 3854 * Callers might not be able to detect properly that we are read-only, 3855 * so check it explicitly here. 3856 */ 3857 if (zfs_is_readonly(zfsvfs)) { 3858 ZFS_EXIT(zfsvfs); 3859 return (SET_ERROR(EROFS)); 3860 } 3861 3862 if (bfp->l_len < 0) { 3863 ZFS_EXIT(zfsvfs); 3864 return (SET_ERROR(EINVAL)); 3865 } 3866 3867 /* 3868 * Permissions aren't checked on Solaris because on this OS 3869 * zfs_space() can only be called with an opened file handle. 3870 * On Linux we can get here through truncate_range() which 3871 * operates directly on inodes, so we need to check access rights. 3872 */ 3873 if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr))) { 3874 ZFS_EXIT(zfsvfs); 3875 return (error); 3876 } 3877 3878 off = bfp->l_start; 3879 len = bfp->l_len; /* 0 means from off to end of file */ 3880 3881 error = zfs_freesp(zp, off, len, flag, TRUE); 3882 3883 ZFS_EXIT(zfsvfs); 3884 return (error); 3885 } 3886 3887 /*ARGSUSED*/ 3888 static void 3889 zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 3890 { 3891 znode_t *zp = VTOZ(vp); 3892 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3893 int error; 3894 3895 ZFS_TEARDOWN_INACTIVE_ENTER_READ(zfsvfs); 3896 if (zp->z_sa_hdl == NULL) { 3897 /* 3898 * The fs has been unmounted, or we did a 3899 * suspend/resume and this file no longer exists. 3900 */ 3901 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs); 3902 vrecycle(vp); 3903 return; 3904 } 3905 3906 if (zp->z_unlinked) { 3907 /* 3908 * Fast path to recycle a vnode of a removed file. 3909 */ 3910 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs); 3911 vrecycle(vp); 3912 return; 3913 } 3914 3915 if (zp->z_atime_dirty && zp->z_unlinked == 0) { 3916 dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); 3917 3918 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 3919 zfs_sa_upgrade_txholds(tx, zp); 3920 error = dmu_tx_assign(tx, TXG_WAIT); 3921 if (error) { 3922 dmu_tx_abort(tx); 3923 } else { 3924 (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs), 3925 (void *)&zp->z_atime, sizeof (zp->z_atime), tx); 3926 zp->z_atime_dirty = 0; 3927 dmu_tx_commit(tx); 3928 } 3929 } 3930 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs); 3931 } 3932 3933 3934 CTASSERT(sizeof (struct zfid_short) <= sizeof (struct fid)); 3935 CTASSERT(sizeof (struct zfid_long) <= sizeof (struct fid)); 3936 3937 /*ARGSUSED*/ 3938 static int 3939 zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) 3940 { 3941 znode_t *zp = VTOZ(vp); 3942 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3943 uint32_t gen; 3944 uint64_t gen64; 3945 uint64_t object = zp->z_id; 3946 zfid_short_t *zfid; 3947 int size, i, error; 3948 3949 ZFS_ENTER(zfsvfs); 3950 ZFS_VERIFY_ZP(zp); 3951 3952 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), 3953 &gen64, sizeof (uint64_t))) != 0) { 3954 ZFS_EXIT(zfsvfs); 3955 return (error); 3956 } 3957 3958 gen = (uint32_t)gen64; 3959 3960 size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN; 3961 fidp->fid_len = size; 3962 3963 zfid = (zfid_short_t *)fidp; 3964 3965 zfid->zf_len = size; 3966 3967 for (i = 0; i < sizeof (zfid->zf_object); i++) 3968 zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 3969 3970 /* Must have a non-zero generation number to distinguish from .zfs */ 3971 if (gen == 0) 3972 gen = 1; 3973 for (i = 0; i < sizeof (zfid->zf_gen); i++) 3974 zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); 3975 3976 if (size == LONG_FID_LEN) { 3977 uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); 3978 zfid_long_t *zlfid; 3979 3980 zlfid = (zfid_long_t *)fidp; 3981 3982 for (i = 0; i < sizeof (zlfid->zf_setid); i++) 3983 zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); 3984 3985 /* XXX - this should be the generation number for the objset */ 3986 for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 3987 zlfid->zf_setgen[i] = 0; 3988 } 3989 3990 ZFS_EXIT(zfsvfs); 3991 return (0); 3992 } 3993 3994 static int 3995 zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 3996 caller_context_t *ct) 3997 { 3998 znode_t *zp; 3999 zfsvfs_t *zfsvfs; 4000 4001 switch (cmd) { 4002 case _PC_LINK_MAX: 4003 *valp = MIN(LONG_MAX, ZFS_LINK_MAX); 4004 return (0); 4005 4006 case _PC_FILESIZEBITS: 4007 *valp = 64; 4008 return (0); 4009 case _PC_MIN_HOLE_SIZE: 4010 *valp = (int)SPA_MINBLOCKSIZE; 4011 return (0); 4012 case _PC_ACL_EXTENDED: 4013 #if 0 /* POSIX ACLs are not implemented for ZFS on FreeBSD yet. */ 4014 zp = VTOZ(vp); 4015 zfsvfs = zp->z_zfsvfs; 4016 ZFS_ENTER(zfsvfs); 4017 ZFS_VERIFY_ZP(zp); 4018 *valp = zfsvfs->z_acl_type == ZFSACLTYPE_POSIX ? 1 : 0; 4019 ZFS_EXIT(zfsvfs); 4020 #else 4021 *valp = 0; 4022 #endif 4023 return (0); 4024 4025 case _PC_ACL_NFS4: 4026 zp = VTOZ(vp); 4027 zfsvfs = zp->z_zfsvfs; 4028 ZFS_ENTER(zfsvfs); 4029 ZFS_VERIFY_ZP(zp); 4030 *valp = zfsvfs->z_acl_type == ZFS_ACLTYPE_NFSV4 ? 1 : 0; 4031 ZFS_EXIT(zfsvfs); 4032 return (0); 4033 4034 case _PC_ACL_PATH_MAX: 4035 *valp = ACL_MAX_ENTRIES; 4036 return (0); 4037 4038 default: 4039 return (EOPNOTSUPP); 4040 } 4041 } 4042 4043 static int 4044 zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind, 4045 int *rahead) 4046 { 4047 znode_t *zp = VTOZ(vp); 4048 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4049 zfs_locked_range_t *lr; 4050 vm_object_t object; 4051 off_t start, end, obj_size; 4052 uint_t blksz; 4053 int pgsin_b, pgsin_a; 4054 int error; 4055 4056 ZFS_ENTER(zfsvfs); 4057 ZFS_VERIFY_ZP(zp); 4058 4059 start = IDX_TO_OFF(ma[0]->pindex); 4060 end = IDX_TO_OFF(ma[count - 1]->pindex + 1); 4061 4062 /* 4063 * Lock a range covering all required and optional pages. 4064 * Note that we need to handle the case of the block size growing. 4065 */ 4066 for (;;) { 4067 blksz = zp->z_blksz; 4068 lr = zfs_rangelock_tryenter(&zp->z_rangelock, 4069 rounddown(start, blksz), 4070 roundup(end, blksz) - rounddown(start, blksz), RL_READER); 4071 if (lr == NULL) { 4072 if (rahead != NULL) { 4073 *rahead = 0; 4074 rahead = NULL; 4075 } 4076 if (rbehind != NULL) { 4077 *rbehind = 0; 4078 rbehind = NULL; 4079 } 4080 break; 4081 } 4082 if (blksz == zp->z_blksz) 4083 break; 4084 zfs_rangelock_exit(lr); 4085 } 4086 4087 object = ma[0]->object; 4088 zfs_vmobject_wlock(object); 4089 obj_size = object->un_pager.vnp.vnp_size; 4090 zfs_vmobject_wunlock(object); 4091 if (IDX_TO_OFF(ma[count - 1]->pindex) >= obj_size) { 4092 if (lr != NULL) 4093 zfs_rangelock_exit(lr); 4094 ZFS_EXIT(zfsvfs); 4095 return (zfs_vm_pagerret_bad); 4096 } 4097 4098 pgsin_b = 0; 4099 if (rbehind != NULL) { 4100 pgsin_b = OFF_TO_IDX(start - rounddown(start, blksz)); 4101 pgsin_b = MIN(*rbehind, pgsin_b); 4102 } 4103 4104 pgsin_a = 0; 4105 if (rahead != NULL) { 4106 pgsin_a = OFF_TO_IDX(roundup(end, blksz) - end); 4107 if (end + IDX_TO_OFF(pgsin_a) >= obj_size) 4108 pgsin_a = OFF_TO_IDX(round_page(obj_size) - end); 4109 pgsin_a = MIN(*rahead, pgsin_a); 4110 } 4111 4112 /* 4113 * NB: we need to pass the exact byte size of the data that we expect 4114 * to read after accounting for the file size. This is required because 4115 * ZFS will panic if we request DMU to read beyond the end of the last 4116 * allocated block. 4117 */ 4118 error = dmu_read_pages(zfsvfs->z_os, zp->z_id, ma, count, &pgsin_b, 4119 &pgsin_a, MIN(end, obj_size) - (end - PAGE_SIZE)); 4120 4121 if (lr != NULL) 4122 zfs_rangelock_exit(lr); 4123 ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 4124 ZFS_EXIT(zfsvfs); 4125 4126 if (error != 0) 4127 return (zfs_vm_pagerret_error); 4128 4129 VM_CNT_INC(v_vnodein); 4130 VM_CNT_ADD(v_vnodepgsin, count + pgsin_b + pgsin_a); 4131 if (rbehind != NULL) 4132 *rbehind = pgsin_b; 4133 if (rahead != NULL) 4134 *rahead = pgsin_a; 4135 return (zfs_vm_pagerret_ok); 4136 } 4137 4138 #ifndef _SYS_SYSPROTO_H_ 4139 struct vop_getpages_args { 4140 struct vnode *a_vp; 4141 vm_page_t *a_m; 4142 int a_count; 4143 int *a_rbehind; 4144 int *a_rahead; 4145 }; 4146 #endif 4147 4148 static int 4149 zfs_freebsd_getpages(struct vop_getpages_args *ap) 4150 { 4151 4152 return (zfs_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind, 4153 ap->a_rahead)); 4154 } 4155 4156 static int 4157 zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags, 4158 int *rtvals) 4159 { 4160 znode_t *zp = VTOZ(vp); 4161 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4162 zfs_locked_range_t *lr; 4163 dmu_tx_t *tx; 4164 struct sf_buf *sf; 4165 vm_object_t object; 4166 vm_page_t m; 4167 caddr_t va; 4168 size_t tocopy; 4169 size_t lo_len; 4170 vm_ooffset_t lo_off; 4171 vm_ooffset_t off; 4172 uint_t blksz; 4173 int ncount; 4174 int pcount; 4175 int err; 4176 int i; 4177 4178 ZFS_ENTER(zfsvfs); 4179 ZFS_VERIFY_ZP(zp); 4180 4181 object = vp->v_object; 4182 pcount = btoc(len); 4183 ncount = pcount; 4184 4185 KASSERT(ma[0]->object == object, ("mismatching object")); 4186 KASSERT(len > 0 && (len & PAGE_MASK) == 0, ("unexpected length")); 4187 4188 for (i = 0; i < pcount; i++) 4189 rtvals[i] = zfs_vm_pagerret_error; 4190 4191 off = IDX_TO_OFF(ma[0]->pindex); 4192 blksz = zp->z_blksz; 4193 lo_off = rounddown(off, blksz); 4194 lo_len = roundup(len + (off - lo_off), blksz); 4195 lr = zfs_rangelock_enter(&zp->z_rangelock, lo_off, lo_len, RL_WRITER); 4196 4197 zfs_vmobject_wlock(object); 4198 if (len + off > object->un_pager.vnp.vnp_size) { 4199 if (object->un_pager.vnp.vnp_size > off) { 4200 int pgoff; 4201 4202 len = object->un_pager.vnp.vnp_size - off; 4203 ncount = btoc(len); 4204 if ((pgoff = (int)len & PAGE_MASK) != 0) { 4205 /* 4206 * If the object is locked and the following 4207 * conditions hold, then the page's dirty 4208 * field cannot be concurrently changed by a 4209 * pmap operation. 4210 */ 4211 m = ma[ncount - 1]; 4212 vm_page_assert_sbusied(m); 4213 KASSERT(!pmap_page_is_write_mapped(m), 4214 ("zfs_putpages: page %p is not read-only", 4215 m)); 4216 vm_page_clear_dirty(m, pgoff, PAGE_SIZE - 4217 pgoff); 4218 } 4219 } else { 4220 len = 0; 4221 ncount = 0; 4222 } 4223 if (ncount < pcount) { 4224 for (i = ncount; i < pcount; i++) { 4225 rtvals[i] = zfs_vm_pagerret_bad; 4226 } 4227 } 4228 } 4229 zfs_vmobject_wunlock(object); 4230 4231 if (ncount == 0) 4232 goto out; 4233 4234 if (zfs_id_overblockquota(zfsvfs, DMU_USERUSED_OBJECT, zp->z_uid) || 4235 zfs_id_overblockquota(zfsvfs, DMU_GROUPUSED_OBJECT, zp->z_gid) || 4236 (zp->z_projid != ZFS_DEFAULT_PROJID && 4237 zfs_id_overblockquota(zfsvfs, DMU_PROJECTUSED_OBJECT, 4238 zp->z_projid))) { 4239 goto out; 4240 } 4241 4242 tx = dmu_tx_create(zfsvfs->z_os); 4243 dmu_tx_hold_write(tx, zp->z_id, off, len); 4244 4245 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 4246 zfs_sa_upgrade_txholds(tx, zp); 4247 err = dmu_tx_assign(tx, TXG_WAIT); 4248 if (err != 0) { 4249 dmu_tx_abort(tx); 4250 goto out; 4251 } 4252 4253 if (zp->z_blksz < PAGE_SIZE) { 4254 for (i = 0; len > 0; off += tocopy, len -= tocopy, i++) { 4255 tocopy = len > PAGE_SIZE ? PAGE_SIZE : len; 4256 va = zfs_map_page(ma[i], &sf); 4257 dmu_write(zfsvfs->z_os, zp->z_id, off, tocopy, va, tx); 4258 zfs_unmap_page(sf); 4259 } 4260 } else { 4261 err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, ma, tx); 4262 } 4263 4264 if (err == 0) { 4265 uint64_t mtime[2], ctime[2]; 4266 sa_bulk_attr_t bulk[3]; 4267 int count = 0; 4268 4269 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 4270 &mtime, 16); 4271 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 4272 &ctime, 16); 4273 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 4274 &zp->z_pflags, 8); 4275 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime); 4276 err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 4277 ASSERT0(err); 4278 /* 4279 * XXX we should be passing a callback to undirty 4280 * but that would make the locking messier 4281 */ 4282 zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, 4283 len, 0, NULL, NULL); 4284 4285 zfs_vmobject_wlock(object); 4286 for (i = 0; i < ncount; i++) { 4287 rtvals[i] = zfs_vm_pagerret_ok; 4288 vm_page_undirty(ma[i]); 4289 } 4290 zfs_vmobject_wunlock(object); 4291 VM_CNT_INC(v_vnodeout); 4292 VM_CNT_ADD(v_vnodepgsout, ncount); 4293 } 4294 dmu_tx_commit(tx); 4295 4296 out: 4297 zfs_rangelock_exit(lr); 4298 if ((flags & (zfs_vm_pagerput_sync | zfs_vm_pagerput_inval)) != 0 || 4299 zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4300 zil_commit(zfsvfs->z_log, zp->z_id); 4301 ZFS_EXIT(zfsvfs); 4302 return (rtvals[0]); 4303 } 4304 4305 #ifndef _SYS_SYSPROTO_H_ 4306 struct vop_putpages_args { 4307 struct vnode *a_vp; 4308 vm_page_t *a_m; 4309 int a_count; 4310 int a_sync; 4311 int *a_rtvals; 4312 }; 4313 #endif 4314 4315 static int 4316 zfs_freebsd_putpages(struct vop_putpages_args *ap) 4317 { 4318 4319 return (zfs_putpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_sync, 4320 ap->a_rtvals)); 4321 } 4322 4323 #ifndef _SYS_SYSPROTO_H_ 4324 struct vop_bmap_args { 4325 struct vnode *a_vp; 4326 daddr_t a_bn; 4327 struct bufobj **a_bop; 4328 daddr_t *a_bnp; 4329 int *a_runp; 4330 int *a_runb; 4331 }; 4332 #endif 4333 4334 static int 4335 zfs_freebsd_bmap(struct vop_bmap_args *ap) 4336 { 4337 4338 if (ap->a_bop != NULL) 4339 *ap->a_bop = &ap->a_vp->v_bufobj; 4340 if (ap->a_bnp != NULL) 4341 *ap->a_bnp = ap->a_bn; 4342 if (ap->a_runp != NULL) 4343 *ap->a_runp = 0; 4344 if (ap->a_runb != NULL) 4345 *ap->a_runb = 0; 4346 4347 return (0); 4348 } 4349 4350 #ifndef _SYS_SYSPROTO_H_ 4351 struct vop_open_args { 4352 struct vnode *a_vp; 4353 int a_mode; 4354 struct ucred *a_cred; 4355 struct thread *a_td; 4356 }; 4357 #endif 4358 4359 static int 4360 zfs_freebsd_open(struct vop_open_args *ap) 4361 { 4362 vnode_t *vp = ap->a_vp; 4363 znode_t *zp = VTOZ(vp); 4364 int error; 4365 4366 error = zfs_open(&vp, ap->a_mode, ap->a_cred); 4367 if (error == 0) 4368 vnode_create_vobject(vp, zp->z_size, ap->a_td); 4369 return (error); 4370 } 4371 4372 #ifndef _SYS_SYSPROTO_H_ 4373 struct vop_close_args { 4374 struct vnode *a_vp; 4375 int a_fflag; 4376 struct ucred *a_cred; 4377 struct thread *a_td; 4378 }; 4379 #endif 4380 4381 static int 4382 zfs_freebsd_close(struct vop_close_args *ap) 4383 { 4384 4385 return (zfs_close(ap->a_vp, ap->a_fflag, 1, 0, ap->a_cred)); 4386 } 4387 4388 #ifndef _SYS_SYSPROTO_H_ 4389 struct vop_ioctl_args { 4390 struct vnode *a_vp; 4391 ulong_t a_command; 4392 caddr_t a_data; 4393 int a_fflag; 4394 struct ucred *cred; 4395 struct thread *td; 4396 }; 4397 #endif 4398 4399 static int 4400 zfs_freebsd_ioctl(struct vop_ioctl_args *ap) 4401 { 4402 4403 return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data, 4404 ap->a_fflag, ap->a_cred, NULL)); 4405 } 4406 4407 static int 4408 ioflags(int ioflags) 4409 { 4410 int flags = 0; 4411 4412 if (ioflags & IO_APPEND) 4413 flags |= FAPPEND; 4414 if (ioflags & IO_NDELAY) 4415 flags |= FNONBLOCK; 4416 if (ioflags & IO_SYNC) 4417 flags |= (FSYNC | FDSYNC | FRSYNC); 4418 4419 return (flags); 4420 } 4421 4422 #ifndef _SYS_SYSPROTO_H_ 4423 struct vop_read_args { 4424 struct vnode *a_vp; 4425 struct uio *a_uio; 4426 int a_ioflag; 4427 struct ucred *a_cred; 4428 }; 4429 #endif 4430 4431 static int 4432 zfs_freebsd_read(struct vop_read_args *ap) 4433 { 4434 zfs_uio_t uio; 4435 zfs_uio_init(&uio, ap->a_uio); 4436 return (zfs_read(VTOZ(ap->a_vp), &uio, ioflags(ap->a_ioflag), 4437 ap->a_cred)); 4438 } 4439 4440 #ifndef _SYS_SYSPROTO_H_ 4441 struct vop_write_args { 4442 struct vnode *a_vp; 4443 struct uio *a_uio; 4444 int a_ioflag; 4445 struct ucred *a_cred; 4446 }; 4447 #endif 4448 4449 static int 4450 zfs_freebsd_write(struct vop_write_args *ap) 4451 { 4452 zfs_uio_t uio; 4453 zfs_uio_init(&uio, ap->a_uio); 4454 return (zfs_write(VTOZ(ap->a_vp), &uio, ioflags(ap->a_ioflag), 4455 ap->a_cred)); 4456 } 4457 4458 #if __FreeBSD_version >= 1300102 4459 /* 4460 * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see 4461 * the comment above cache_fplookup for details. 4462 */ 4463 static int 4464 zfs_freebsd_fplookup_vexec(struct vop_fplookup_vexec_args *v) 4465 { 4466 vnode_t *vp; 4467 znode_t *zp; 4468 uint64_t pflags; 4469 4470 vp = v->a_vp; 4471 zp = VTOZ_SMR(vp); 4472 if (__predict_false(zp == NULL)) 4473 return (EAGAIN); 4474 pflags = atomic_load_64(&zp->z_pflags); 4475 if (pflags & ZFS_AV_QUARANTINED) 4476 return (EAGAIN); 4477 if (pflags & ZFS_XATTR) 4478 return (EAGAIN); 4479 if ((pflags & ZFS_NO_EXECS_DENIED) == 0) 4480 return (EAGAIN); 4481 return (0); 4482 } 4483 #endif 4484 4485 #if __FreeBSD_version >= 1300139 4486 static int 4487 zfs_freebsd_fplookup_symlink(struct vop_fplookup_symlink_args *v) 4488 { 4489 vnode_t *vp; 4490 znode_t *zp; 4491 char *target; 4492 4493 vp = v->a_vp; 4494 zp = VTOZ_SMR(vp); 4495 if (__predict_false(zp == NULL)) { 4496 return (EAGAIN); 4497 } 4498 4499 target = atomic_load_consume_ptr(&zp->z_cached_symlink); 4500 if (target == NULL) { 4501 return (EAGAIN); 4502 } 4503 return (cache_symlink_resolve(v->a_fpl, target, strlen(target))); 4504 } 4505 #endif 4506 4507 #ifndef _SYS_SYSPROTO_H_ 4508 struct vop_access_args { 4509 struct vnode *a_vp; 4510 accmode_t a_accmode; 4511 struct ucred *a_cred; 4512 struct thread *a_td; 4513 }; 4514 #endif 4515 4516 static int 4517 zfs_freebsd_access(struct vop_access_args *ap) 4518 { 4519 vnode_t *vp = ap->a_vp; 4520 znode_t *zp = VTOZ(vp); 4521 accmode_t accmode; 4522 int error = 0; 4523 4524 4525 if (ap->a_accmode == VEXEC) { 4526 if (zfs_fastaccesschk_execute(zp, ap->a_cred) == 0) 4527 return (0); 4528 } 4529 4530 /* 4531 * ZFS itself only knowns about VREAD, VWRITE, VEXEC and VAPPEND, 4532 */ 4533 accmode = ap->a_accmode & (VREAD|VWRITE|VEXEC|VAPPEND); 4534 if (accmode != 0) 4535 error = zfs_access(zp, accmode, 0, ap->a_cred); 4536 4537 /* 4538 * VADMIN has to be handled by vaccess(). 4539 */ 4540 if (error == 0) { 4541 accmode = ap->a_accmode & ~(VREAD|VWRITE|VEXEC|VAPPEND); 4542 if (accmode != 0) { 4543 #if __FreeBSD_version >= 1300105 4544 error = vaccess(vp->v_type, zp->z_mode, zp->z_uid, 4545 zp->z_gid, accmode, ap->a_cred); 4546 #else 4547 error = vaccess(vp->v_type, zp->z_mode, zp->z_uid, 4548 zp->z_gid, accmode, ap->a_cred, NULL); 4549 #endif 4550 } 4551 } 4552 4553 /* 4554 * For VEXEC, ensure that at least one execute bit is set for 4555 * non-directories. 4556 */ 4557 if (error == 0 && (ap->a_accmode & VEXEC) != 0 && vp->v_type != VDIR && 4558 (zp->z_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) { 4559 error = EACCES; 4560 } 4561 4562 return (error); 4563 } 4564 4565 #ifndef _SYS_SYSPROTO_H_ 4566 struct vop_lookup_args { 4567 struct vnode *a_dvp; 4568 struct vnode **a_vpp; 4569 struct componentname *a_cnp; 4570 }; 4571 #endif 4572 4573 static int 4574 zfs_freebsd_lookup(struct vop_lookup_args *ap, boolean_t cached) 4575 { 4576 struct componentname *cnp = ap->a_cnp; 4577 char nm[NAME_MAX + 1]; 4578 4579 ASSERT3U(cnp->cn_namelen, <, sizeof (nm)); 4580 strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof (nm))); 4581 4582 return (zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop, 4583 cnp->cn_cred, 0, cached)); 4584 } 4585 4586 static int 4587 zfs_freebsd_cachedlookup(struct vop_cachedlookup_args *ap) 4588 { 4589 4590 return (zfs_freebsd_lookup((struct vop_lookup_args *)ap, B_TRUE)); 4591 } 4592 4593 #ifndef _SYS_SYSPROTO_H_ 4594 struct vop_lookup_args { 4595 struct vnode *a_dvp; 4596 struct vnode **a_vpp; 4597 struct componentname *a_cnp; 4598 }; 4599 #endif 4600 4601 static int 4602 zfs_cache_lookup(struct vop_lookup_args *ap) 4603 { 4604 zfsvfs_t *zfsvfs; 4605 4606 zfsvfs = ap->a_dvp->v_mount->mnt_data; 4607 if (zfsvfs->z_use_namecache) 4608 return (vfs_cache_lookup(ap)); 4609 else 4610 return (zfs_freebsd_lookup(ap, B_FALSE)); 4611 } 4612 4613 #ifndef _SYS_SYSPROTO_H_ 4614 struct vop_create_args { 4615 struct vnode *a_dvp; 4616 struct vnode **a_vpp; 4617 struct componentname *a_cnp; 4618 struct vattr *a_vap; 4619 }; 4620 #endif 4621 4622 static int 4623 zfs_freebsd_create(struct vop_create_args *ap) 4624 { 4625 zfsvfs_t *zfsvfs; 4626 struct componentname *cnp = ap->a_cnp; 4627 vattr_t *vap = ap->a_vap; 4628 znode_t *zp = NULL; 4629 int rc, mode; 4630 4631 ASSERT(cnp->cn_flags & SAVENAME); 4632 4633 vattr_init_mask(vap); 4634 mode = vap->va_mode & ALLPERMS; 4635 zfsvfs = ap->a_dvp->v_mount->mnt_data; 4636 *ap->a_vpp = NULL; 4637 4638 rc = zfs_create(VTOZ(ap->a_dvp), cnp->cn_nameptr, vap, !EXCL, mode, 4639 &zp, cnp->cn_cred, 0 /* flag */, NULL /* vsecattr */); 4640 if (rc == 0) 4641 *ap->a_vpp = ZTOV(zp); 4642 if (zfsvfs->z_use_namecache && 4643 rc == 0 && (cnp->cn_flags & MAKEENTRY) != 0) 4644 cache_enter(ap->a_dvp, *ap->a_vpp, cnp); 4645 4646 return (rc); 4647 } 4648 4649 #ifndef _SYS_SYSPROTO_H_ 4650 struct vop_remove_args { 4651 struct vnode *a_dvp; 4652 struct vnode *a_vp; 4653 struct componentname *a_cnp; 4654 }; 4655 #endif 4656 4657 static int 4658 zfs_freebsd_remove(struct vop_remove_args *ap) 4659 { 4660 4661 ASSERT(ap->a_cnp->cn_flags & SAVENAME); 4662 4663 return (zfs_remove_(ap->a_dvp, ap->a_vp, ap->a_cnp->cn_nameptr, 4664 ap->a_cnp->cn_cred)); 4665 } 4666 4667 #ifndef _SYS_SYSPROTO_H_ 4668 struct vop_mkdir_args { 4669 struct vnode *a_dvp; 4670 struct vnode **a_vpp; 4671 struct componentname *a_cnp; 4672 struct vattr *a_vap; 4673 }; 4674 #endif 4675 4676 static int 4677 zfs_freebsd_mkdir(struct vop_mkdir_args *ap) 4678 { 4679 vattr_t *vap = ap->a_vap; 4680 znode_t *zp = NULL; 4681 int rc; 4682 4683 ASSERT(ap->a_cnp->cn_flags & SAVENAME); 4684 4685 vattr_init_mask(vap); 4686 *ap->a_vpp = NULL; 4687 4688 rc = zfs_mkdir(VTOZ(ap->a_dvp), ap->a_cnp->cn_nameptr, vap, &zp, 4689 ap->a_cnp->cn_cred, 0, NULL); 4690 4691 if (rc == 0) 4692 *ap->a_vpp = ZTOV(zp); 4693 return (rc); 4694 } 4695 4696 #ifndef _SYS_SYSPROTO_H_ 4697 struct vop_rmdir_args { 4698 struct vnode *a_dvp; 4699 struct vnode *a_vp; 4700 struct componentname *a_cnp; 4701 }; 4702 #endif 4703 4704 static int 4705 zfs_freebsd_rmdir(struct vop_rmdir_args *ap) 4706 { 4707 struct componentname *cnp = ap->a_cnp; 4708 4709 ASSERT(cnp->cn_flags & SAVENAME); 4710 4711 return (zfs_rmdir_(ap->a_dvp, ap->a_vp, cnp->cn_nameptr, cnp->cn_cred)); 4712 } 4713 4714 #ifndef _SYS_SYSPROTO_H_ 4715 struct vop_readdir_args { 4716 struct vnode *a_vp; 4717 struct uio *a_uio; 4718 struct ucred *a_cred; 4719 int *a_eofflag; 4720 int *a_ncookies; 4721 uint64_t **a_cookies; 4722 }; 4723 #endif 4724 4725 static int 4726 zfs_freebsd_readdir(struct vop_readdir_args *ap) 4727 { 4728 zfs_uio_t uio; 4729 zfs_uio_init(&uio, ap->a_uio); 4730 return (zfs_readdir(ap->a_vp, &uio, ap->a_cred, ap->a_eofflag, 4731 ap->a_ncookies, ap->a_cookies)); 4732 } 4733 4734 #ifndef _SYS_SYSPROTO_H_ 4735 struct vop_fsync_args { 4736 struct vnode *a_vp; 4737 int a_waitfor; 4738 struct thread *a_td; 4739 }; 4740 #endif 4741 4742 static int 4743 zfs_freebsd_fsync(struct vop_fsync_args *ap) 4744 { 4745 4746 vop_stdfsync(ap); 4747 return (zfs_fsync(VTOZ(ap->a_vp), 0, ap->a_td->td_ucred)); 4748 } 4749 4750 #ifndef _SYS_SYSPROTO_H_ 4751 struct vop_getattr_args { 4752 struct vnode *a_vp; 4753 struct vattr *a_vap; 4754 struct ucred *a_cred; 4755 }; 4756 #endif 4757 4758 static int 4759 zfs_freebsd_getattr(struct vop_getattr_args *ap) 4760 { 4761 vattr_t *vap = ap->a_vap; 4762 xvattr_t xvap; 4763 ulong_t fflags = 0; 4764 int error; 4765 4766 xva_init(&xvap); 4767 xvap.xva_vattr = *vap; 4768 xvap.xva_vattr.va_mask |= AT_XVATTR; 4769 4770 /* Convert chflags into ZFS-type flags. */ 4771 /* XXX: what about SF_SETTABLE?. */ 4772 XVA_SET_REQ(&xvap, XAT_IMMUTABLE); 4773 XVA_SET_REQ(&xvap, XAT_APPENDONLY); 4774 XVA_SET_REQ(&xvap, XAT_NOUNLINK); 4775 XVA_SET_REQ(&xvap, XAT_NODUMP); 4776 XVA_SET_REQ(&xvap, XAT_READONLY); 4777 XVA_SET_REQ(&xvap, XAT_ARCHIVE); 4778 XVA_SET_REQ(&xvap, XAT_SYSTEM); 4779 XVA_SET_REQ(&xvap, XAT_HIDDEN); 4780 XVA_SET_REQ(&xvap, XAT_REPARSE); 4781 XVA_SET_REQ(&xvap, XAT_OFFLINE); 4782 XVA_SET_REQ(&xvap, XAT_SPARSE); 4783 4784 error = zfs_getattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred); 4785 if (error != 0) 4786 return (error); 4787 4788 /* Convert ZFS xattr into chflags. */ 4789 #define FLAG_CHECK(fflag, xflag, xfield) do { \ 4790 if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0) \ 4791 fflags |= (fflag); \ 4792 } while (0) 4793 FLAG_CHECK(SF_IMMUTABLE, XAT_IMMUTABLE, 4794 xvap.xva_xoptattrs.xoa_immutable); 4795 FLAG_CHECK(SF_APPEND, XAT_APPENDONLY, 4796 xvap.xva_xoptattrs.xoa_appendonly); 4797 FLAG_CHECK(SF_NOUNLINK, XAT_NOUNLINK, 4798 xvap.xva_xoptattrs.xoa_nounlink); 4799 FLAG_CHECK(UF_ARCHIVE, XAT_ARCHIVE, 4800 xvap.xva_xoptattrs.xoa_archive); 4801 FLAG_CHECK(UF_NODUMP, XAT_NODUMP, 4802 xvap.xva_xoptattrs.xoa_nodump); 4803 FLAG_CHECK(UF_READONLY, XAT_READONLY, 4804 xvap.xva_xoptattrs.xoa_readonly); 4805 FLAG_CHECK(UF_SYSTEM, XAT_SYSTEM, 4806 xvap.xva_xoptattrs.xoa_system); 4807 FLAG_CHECK(UF_HIDDEN, XAT_HIDDEN, 4808 xvap.xva_xoptattrs.xoa_hidden); 4809 FLAG_CHECK(UF_REPARSE, XAT_REPARSE, 4810 xvap.xva_xoptattrs.xoa_reparse); 4811 FLAG_CHECK(UF_OFFLINE, XAT_OFFLINE, 4812 xvap.xva_xoptattrs.xoa_offline); 4813 FLAG_CHECK(UF_SPARSE, XAT_SPARSE, 4814 xvap.xva_xoptattrs.xoa_sparse); 4815 4816 #undef FLAG_CHECK 4817 *vap = xvap.xva_vattr; 4818 vap->va_flags = fflags; 4819 return (0); 4820 } 4821 4822 #ifndef _SYS_SYSPROTO_H_ 4823 struct vop_setattr_args { 4824 struct vnode *a_vp; 4825 struct vattr *a_vap; 4826 struct ucred *a_cred; 4827 }; 4828 #endif 4829 4830 static int 4831 zfs_freebsd_setattr(struct vop_setattr_args *ap) 4832 { 4833 vnode_t *vp = ap->a_vp; 4834 vattr_t *vap = ap->a_vap; 4835 cred_t *cred = ap->a_cred; 4836 xvattr_t xvap; 4837 ulong_t fflags; 4838 uint64_t zflags; 4839 4840 vattr_init_mask(vap); 4841 vap->va_mask &= ~AT_NOSET; 4842 4843 xva_init(&xvap); 4844 xvap.xva_vattr = *vap; 4845 4846 zflags = VTOZ(vp)->z_pflags; 4847 4848 if (vap->va_flags != VNOVAL) { 4849 zfsvfs_t *zfsvfs = VTOZ(vp)->z_zfsvfs; 4850 int error; 4851 4852 if (zfsvfs->z_use_fuids == B_FALSE) 4853 return (EOPNOTSUPP); 4854 4855 fflags = vap->va_flags; 4856 /* 4857 * XXX KDM 4858 * We need to figure out whether it makes sense to allow 4859 * UF_REPARSE through, since we don't really have other 4860 * facilities to handle reparse points and zfs_setattr() 4861 * doesn't currently allow setting that attribute anyway. 4862 */ 4863 if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_ARCHIVE| 4864 UF_NODUMP|UF_SYSTEM|UF_HIDDEN|UF_READONLY|UF_REPARSE| 4865 UF_OFFLINE|UF_SPARSE)) != 0) 4866 return (EOPNOTSUPP); 4867 /* 4868 * Unprivileged processes are not permitted to unset system 4869 * flags, or modify flags if any system flags are set. 4870 * Privileged non-jail processes may not modify system flags 4871 * if securelevel > 0 and any existing system flags are set. 4872 * Privileged jail processes behave like privileged non-jail 4873 * processes if the PR_ALLOW_CHFLAGS permission bit is set; 4874 * otherwise, they behave like unprivileged processes. 4875 */ 4876 if (secpolicy_fs_owner(vp->v_mount, cred) == 0 || 4877 spl_priv_check_cred(cred, PRIV_VFS_SYSFLAGS) == 0) { 4878 if (zflags & 4879 (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) { 4880 error = securelevel_gt(cred, 0); 4881 if (error != 0) 4882 return (error); 4883 } 4884 } else { 4885 /* 4886 * Callers may only modify the file flags on 4887 * objects they have VADMIN rights for. 4888 */ 4889 if ((error = VOP_ACCESS(vp, VADMIN, cred, 4890 curthread)) != 0) 4891 return (error); 4892 if (zflags & 4893 (ZFS_IMMUTABLE | ZFS_APPENDONLY | 4894 ZFS_NOUNLINK)) { 4895 return (EPERM); 4896 } 4897 if (fflags & 4898 (SF_IMMUTABLE | SF_APPEND | SF_NOUNLINK)) { 4899 return (EPERM); 4900 } 4901 } 4902 4903 #define FLAG_CHANGE(fflag, zflag, xflag, xfield) do { \ 4904 if (((fflags & (fflag)) && !(zflags & (zflag))) || \ 4905 ((zflags & (zflag)) && !(fflags & (fflag)))) { \ 4906 XVA_SET_REQ(&xvap, (xflag)); \ 4907 (xfield) = ((fflags & (fflag)) != 0); \ 4908 } \ 4909 } while (0) 4910 /* Convert chflags into ZFS-type flags. */ 4911 /* XXX: what about SF_SETTABLE?. */ 4912 FLAG_CHANGE(SF_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE, 4913 xvap.xva_xoptattrs.xoa_immutable); 4914 FLAG_CHANGE(SF_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY, 4915 xvap.xva_xoptattrs.xoa_appendonly); 4916 FLAG_CHANGE(SF_NOUNLINK, ZFS_NOUNLINK, XAT_NOUNLINK, 4917 xvap.xva_xoptattrs.xoa_nounlink); 4918 FLAG_CHANGE(UF_ARCHIVE, ZFS_ARCHIVE, XAT_ARCHIVE, 4919 xvap.xva_xoptattrs.xoa_archive); 4920 FLAG_CHANGE(UF_NODUMP, ZFS_NODUMP, XAT_NODUMP, 4921 xvap.xva_xoptattrs.xoa_nodump); 4922 FLAG_CHANGE(UF_READONLY, ZFS_READONLY, XAT_READONLY, 4923 xvap.xva_xoptattrs.xoa_readonly); 4924 FLAG_CHANGE(UF_SYSTEM, ZFS_SYSTEM, XAT_SYSTEM, 4925 xvap.xva_xoptattrs.xoa_system); 4926 FLAG_CHANGE(UF_HIDDEN, ZFS_HIDDEN, XAT_HIDDEN, 4927 xvap.xva_xoptattrs.xoa_hidden); 4928 FLAG_CHANGE(UF_REPARSE, ZFS_REPARSE, XAT_REPARSE, 4929 xvap.xva_xoptattrs.xoa_reparse); 4930 FLAG_CHANGE(UF_OFFLINE, ZFS_OFFLINE, XAT_OFFLINE, 4931 xvap.xva_xoptattrs.xoa_offline); 4932 FLAG_CHANGE(UF_SPARSE, ZFS_SPARSE, XAT_SPARSE, 4933 xvap.xva_xoptattrs.xoa_sparse); 4934 #undef FLAG_CHANGE 4935 } 4936 if (vap->va_birthtime.tv_sec != VNOVAL) { 4937 xvap.xva_vattr.va_mask |= AT_XVATTR; 4938 XVA_SET_REQ(&xvap, XAT_CREATETIME); 4939 } 4940 return (zfs_setattr(VTOZ(vp), (vattr_t *)&xvap, 0, cred)); 4941 } 4942 4943 #ifndef _SYS_SYSPROTO_H_ 4944 struct vop_rename_args { 4945 struct vnode *a_fdvp; 4946 struct vnode *a_fvp; 4947 struct componentname *a_fcnp; 4948 struct vnode *a_tdvp; 4949 struct vnode *a_tvp; 4950 struct componentname *a_tcnp; 4951 }; 4952 #endif 4953 4954 static int 4955 zfs_freebsd_rename(struct vop_rename_args *ap) 4956 { 4957 vnode_t *fdvp = ap->a_fdvp; 4958 vnode_t *fvp = ap->a_fvp; 4959 vnode_t *tdvp = ap->a_tdvp; 4960 vnode_t *tvp = ap->a_tvp; 4961 int error; 4962 4963 ASSERT(ap->a_fcnp->cn_flags & (SAVENAME|SAVESTART)); 4964 ASSERT(ap->a_tcnp->cn_flags & (SAVENAME|SAVESTART)); 4965 4966 error = zfs_do_rename(fdvp, &fvp, ap->a_fcnp, tdvp, &tvp, 4967 ap->a_tcnp, ap->a_fcnp->cn_cred); 4968 4969 vrele(fdvp); 4970 vrele(fvp); 4971 vrele(tdvp); 4972 if (tvp != NULL) 4973 vrele(tvp); 4974 4975 return (error); 4976 } 4977 4978 #ifndef _SYS_SYSPROTO_H_ 4979 struct vop_symlink_args { 4980 struct vnode *a_dvp; 4981 struct vnode **a_vpp; 4982 struct componentname *a_cnp; 4983 struct vattr *a_vap; 4984 char *a_target; 4985 }; 4986 #endif 4987 4988 static int 4989 zfs_freebsd_symlink(struct vop_symlink_args *ap) 4990 { 4991 struct componentname *cnp = ap->a_cnp; 4992 vattr_t *vap = ap->a_vap; 4993 znode_t *zp = NULL; 4994 #if __FreeBSD_version >= 1300139 4995 char *symlink; 4996 size_t symlink_len; 4997 #endif 4998 int rc; 4999 5000 ASSERT(cnp->cn_flags & SAVENAME); 5001 5002 vap->va_type = VLNK; /* FreeBSD: Syscall only sets va_mode. */ 5003 vattr_init_mask(vap); 5004 *ap->a_vpp = NULL; 5005 5006 rc = zfs_symlink(VTOZ(ap->a_dvp), cnp->cn_nameptr, vap, 5007 ap->a_target, &zp, cnp->cn_cred, 0 /* flags */); 5008 if (rc == 0) { 5009 *ap->a_vpp = ZTOV(zp); 5010 ASSERT_VOP_ELOCKED(ZTOV(zp), __func__); 5011 #if __FreeBSD_version >= 1300139 5012 MPASS(zp->z_cached_symlink == NULL); 5013 symlink_len = strlen(ap->a_target); 5014 symlink = cache_symlink_alloc(symlink_len + 1, M_WAITOK); 5015 if (symlink != NULL) { 5016 memcpy(symlink, ap->a_target, symlink_len); 5017 symlink[symlink_len] = '\0'; 5018 atomic_store_rel_ptr((uintptr_t *)&zp->z_cached_symlink, 5019 (uintptr_t)symlink); 5020 } 5021 #endif 5022 } 5023 return (rc); 5024 } 5025 5026 #ifndef _SYS_SYSPROTO_H_ 5027 struct vop_readlink_args { 5028 struct vnode *a_vp; 5029 struct uio *a_uio; 5030 struct ucred *a_cred; 5031 }; 5032 #endif 5033 5034 static int 5035 zfs_freebsd_readlink(struct vop_readlink_args *ap) 5036 { 5037 zfs_uio_t uio; 5038 int error; 5039 #if __FreeBSD_version >= 1300139 5040 znode_t *zp = VTOZ(ap->a_vp); 5041 char *symlink, *base; 5042 size_t symlink_len; 5043 bool trycache; 5044 #endif 5045 5046 zfs_uio_init(&uio, ap->a_uio); 5047 #if __FreeBSD_version >= 1300139 5048 trycache = false; 5049 if (zfs_uio_segflg(&uio) == UIO_SYSSPACE && 5050 zfs_uio_iovcnt(&uio) == 1) { 5051 base = zfs_uio_iovbase(&uio, 0); 5052 symlink_len = zfs_uio_iovlen(&uio, 0); 5053 trycache = true; 5054 } 5055 #endif 5056 error = zfs_readlink(ap->a_vp, &uio, ap->a_cred, NULL); 5057 #if __FreeBSD_version >= 1300139 5058 if (atomic_load_ptr(&zp->z_cached_symlink) != NULL || 5059 error != 0 || !trycache) { 5060 return (error); 5061 } 5062 symlink_len -= zfs_uio_resid(&uio); 5063 symlink = cache_symlink_alloc(symlink_len + 1, M_WAITOK); 5064 if (symlink != NULL) { 5065 memcpy(symlink, base, symlink_len); 5066 symlink[symlink_len] = '\0'; 5067 if (!atomic_cmpset_rel_ptr((uintptr_t *)&zp->z_cached_symlink, 5068 (uintptr_t)NULL, (uintptr_t)symlink)) { 5069 cache_symlink_free(symlink, symlink_len + 1); 5070 } 5071 } 5072 #endif 5073 return (error); 5074 } 5075 5076 #ifndef _SYS_SYSPROTO_H_ 5077 struct vop_link_args { 5078 struct vnode *a_tdvp; 5079 struct vnode *a_vp; 5080 struct componentname *a_cnp; 5081 }; 5082 #endif 5083 5084 static int 5085 zfs_freebsd_link(struct vop_link_args *ap) 5086 { 5087 struct componentname *cnp = ap->a_cnp; 5088 vnode_t *vp = ap->a_vp; 5089 vnode_t *tdvp = ap->a_tdvp; 5090 5091 if (tdvp->v_mount != vp->v_mount) 5092 return (EXDEV); 5093 5094 ASSERT(cnp->cn_flags & SAVENAME); 5095 5096 return (zfs_link(VTOZ(tdvp), VTOZ(vp), 5097 cnp->cn_nameptr, cnp->cn_cred, 0)); 5098 } 5099 5100 #ifndef _SYS_SYSPROTO_H_ 5101 struct vop_inactive_args { 5102 struct vnode *a_vp; 5103 struct thread *a_td; 5104 }; 5105 #endif 5106 5107 static int 5108 zfs_freebsd_inactive(struct vop_inactive_args *ap) 5109 { 5110 vnode_t *vp = ap->a_vp; 5111 5112 #if __FreeBSD_version >= 1300123 5113 zfs_inactive(vp, curthread->td_ucred, NULL); 5114 #else 5115 zfs_inactive(vp, ap->a_td->td_ucred, NULL); 5116 #endif 5117 return (0); 5118 } 5119 5120 #if __FreeBSD_version >= 1300042 5121 #ifndef _SYS_SYSPROTO_H_ 5122 struct vop_need_inactive_args { 5123 struct vnode *a_vp; 5124 struct thread *a_td; 5125 }; 5126 #endif 5127 5128 static int 5129 zfs_freebsd_need_inactive(struct vop_need_inactive_args *ap) 5130 { 5131 vnode_t *vp = ap->a_vp; 5132 znode_t *zp = VTOZ(vp); 5133 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5134 int need; 5135 5136 if (vn_need_pageq_flush(vp)) 5137 return (1); 5138 5139 if (!ZFS_TEARDOWN_INACTIVE_TRY_ENTER_READ(zfsvfs)) 5140 return (1); 5141 need = (zp->z_sa_hdl == NULL || zp->z_unlinked || zp->z_atime_dirty); 5142 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs); 5143 5144 return (need); 5145 } 5146 #endif 5147 5148 #ifndef _SYS_SYSPROTO_H_ 5149 struct vop_reclaim_args { 5150 struct vnode *a_vp; 5151 struct thread *a_td; 5152 }; 5153 #endif 5154 5155 static int 5156 zfs_freebsd_reclaim(struct vop_reclaim_args *ap) 5157 { 5158 vnode_t *vp = ap->a_vp; 5159 znode_t *zp = VTOZ(vp); 5160 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5161 5162 ASSERT3P(zp, !=, NULL); 5163 5164 #if __FreeBSD_version < 1300042 5165 /* Destroy the vm object and flush associated pages. */ 5166 vnode_destroy_vobject(vp); 5167 #endif 5168 /* 5169 * z_teardown_inactive_lock protects from a race with 5170 * zfs_znode_dmu_fini in zfsvfs_teardown during 5171 * force unmount. 5172 */ 5173 ZFS_TEARDOWN_INACTIVE_ENTER_READ(zfsvfs); 5174 if (zp->z_sa_hdl == NULL) 5175 zfs_znode_free(zp); 5176 else 5177 zfs_zinactive(zp); 5178 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs); 5179 5180 vp->v_data = NULL; 5181 return (0); 5182 } 5183 5184 #ifndef _SYS_SYSPROTO_H_ 5185 struct vop_fid_args { 5186 struct vnode *a_vp; 5187 struct fid *a_fid; 5188 }; 5189 #endif 5190 5191 static int 5192 zfs_freebsd_fid(struct vop_fid_args *ap) 5193 { 5194 5195 return (zfs_fid(ap->a_vp, (void *)ap->a_fid, NULL)); 5196 } 5197 5198 5199 #ifndef _SYS_SYSPROTO_H_ 5200 struct vop_pathconf_args { 5201 struct vnode *a_vp; 5202 int a_name; 5203 register_t *a_retval; 5204 } *ap; 5205 #endif 5206 5207 static int 5208 zfs_freebsd_pathconf(struct vop_pathconf_args *ap) 5209 { 5210 ulong_t val; 5211 int error; 5212 5213 error = zfs_pathconf(ap->a_vp, ap->a_name, &val, 5214 curthread->td_ucred, NULL); 5215 if (error == 0) { 5216 *ap->a_retval = val; 5217 return (error); 5218 } 5219 if (error != EOPNOTSUPP) 5220 return (error); 5221 5222 switch (ap->a_name) { 5223 case _PC_NAME_MAX: 5224 *ap->a_retval = NAME_MAX; 5225 return (0); 5226 #if __FreeBSD_version >= 1400032 5227 case _PC_DEALLOC_PRESENT: 5228 *ap->a_retval = 1; 5229 return (0); 5230 #endif 5231 case _PC_PIPE_BUF: 5232 if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) { 5233 *ap->a_retval = PIPE_BUF; 5234 return (0); 5235 } 5236 return (EINVAL); 5237 default: 5238 return (vop_stdpathconf(ap)); 5239 } 5240 } 5241 5242 /* 5243 * FreeBSD's extended attributes namespace defines file name prefix for ZFS' 5244 * extended attribute name: 5245 * 5246 * NAMESPACE PREFIX 5247 * system freebsd:system: 5248 * user (none, can be used to access ZFS fsattr(5) attributes 5249 * created on Solaris) 5250 */ 5251 static int 5252 zfs_create_attrname(int attrnamespace, const char *name, char *attrname, 5253 size_t size) 5254 { 5255 const char *namespace, *prefix, *suffix; 5256 5257 /* We don't allow '/' character in attribute name. */ 5258 if (strchr(name, '/') != NULL) 5259 return (SET_ERROR(EINVAL)); 5260 /* We don't allow attribute names that start with "freebsd:" string. */ 5261 if (strncmp(name, "freebsd:", 8) == 0) 5262 return (SET_ERROR(EINVAL)); 5263 5264 bzero(attrname, size); 5265 5266 switch (attrnamespace) { 5267 case EXTATTR_NAMESPACE_USER: 5268 #if 0 5269 prefix = "freebsd:"; 5270 namespace = EXTATTR_NAMESPACE_USER_STRING; 5271 suffix = ":"; 5272 #else 5273 /* 5274 * This is the default namespace by which we can access all 5275 * attributes created on Solaris. 5276 */ 5277 prefix = namespace = suffix = ""; 5278 #endif 5279 break; 5280 case EXTATTR_NAMESPACE_SYSTEM: 5281 prefix = "freebsd:"; 5282 namespace = EXTATTR_NAMESPACE_SYSTEM_STRING; 5283 suffix = ":"; 5284 break; 5285 case EXTATTR_NAMESPACE_EMPTY: 5286 default: 5287 return (SET_ERROR(EINVAL)); 5288 } 5289 if (snprintf(attrname, size, "%s%s%s%s", prefix, namespace, suffix, 5290 name) >= size) { 5291 return (SET_ERROR(ENAMETOOLONG)); 5292 } 5293 return (0); 5294 } 5295 5296 static int 5297 zfs_ensure_xattr_cached(znode_t *zp) 5298 { 5299 int error = 0; 5300 5301 ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock)); 5302 5303 if (zp->z_xattr_cached != NULL) 5304 return (0); 5305 5306 if (rw_write_held(&zp->z_xattr_lock)) 5307 return (zfs_sa_get_xattr(zp)); 5308 5309 if (!rw_tryupgrade(&zp->z_xattr_lock)) { 5310 rw_exit(&zp->z_xattr_lock); 5311 rw_enter(&zp->z_xattr_lock, RW_WRITER); 5312 } 5313 if (zp->z_xattr_cached == NULL) 5314 error = zfs_sa_get_xattr(zp); 5315 rw_downgrade(&zp->z_xattr_lock); 5316 return (error); 5317 } 5318 5319 #ifndef _SYS_SYSPROTO_H_ 5320 struct vop_getextattr { 5321 IN struct vnode *a_vp; 5322 IN int a_attrnamespace; 5323 IN const char *a_name; 5324 INOUT struct uio *a_uio; 5325 OUT size_t *a_size; 5326 IN struct ucred *a_cred; 5327 IN struct thread *a_td; 5328 }; 5329 #endif 5330 5331 static int 5332 zfs_getextattr_dir(struct vop_getextattr_args *ap, const char *attrname) 5333 { 5334 struct thread *td = ap->a_td; 5335 struct nameidata nd; 5336 struct vattr va; 5337 vnode_t *xvp = NULL, *vp; 5338 int error, flags; 5339 5340 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, 5341 LOOKUP_XATTR, B_FALSE); 5342 if (error != 0) 5343 return (error); 5344 5345 flags = FREAD; 5346 #if __FreeBSD_version < 1400043 5347 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, 5348 xvp, td); 5349 #else 5350 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp); 5351 #endif 5352 error = vn_open_cred(&nd, &flags, 0, VN_OPEN_INVFS, ap->a_cred, NULL); 5353 vp = nd.ni_vp; 5354 NDFREE(&nd, NDF_ONLY_PNBUF); 5355 if (error != 0) 5356 return (SET_ERROR(error)); 5357 5358 if (ap->a_size != NULL) { 5359 error = VOP_GETATTR(vp, &va, ap->a_cred); 5360 if (error == 0) 5361 *ap->a_size = (size_t)va.va_size; 5362 } else if (ap->a_uio != NULL) 5363 error = VOP_READ(vp, ap->a_uio, IO_UNIT, ap->a_cred); 5364 5365 VOP_UNLOCK1(vp); 5366 vn_close(vp, flags, ap->a_cred, td); 5367 return (error); 5368 } 5369 5370 static int 5371 zfs_getextattr_sa(struct vop_getextattr_args *ap, const char *attrname) 5372 { 5373 znode_t *zp = VTOZ(ap->a_vp); 5374 uchar_t *nv_value; 5375 uint_t nv_size; 5376 int error; 5377 5378 error = zfs_ensure_xattr_cached(zp); 5379 if (error != 0) 5380 return (error); 5381 5382 ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock)); 5383 ASSERT3P(zp->z_xattr_cached, !=, NULL); 5384 5385 error = nvlist_lookup_byte_array(zp->z_xattr_cached, attrname, 5386 &nv_value, &nv_size); 5387 if (error != 0) 5388 return (SET_ERROR(error)); 5389 5390 if (ap->a_size != NULL) 5391 *ap->a_size = nv_size; 5392 else if (ap->a_uio != NULL) 5393 error = uiomove(nv_value, nv_size, ap->a_uio); 5394 if (error != 0) 5395 return (SET_ERROR(error)); 5396 5397 return (0); 5398 } 5399 5400 /* 5401 * Vnode operation to retrieve a named extended attribute. 5402 */ 5403 static int 5404 zfs_getextattr(struct vop_getextattr_args *ap) 5405 { 5406 znode_t *zp = VTOZ(ap->a_vp); 5407 zfsvfs_t *zfsvfs = ZTOZSB(zp); 5408 char attrname[EXTATTR_MAXNAMELEN+1]; 5409 int error; 5410 5411 /* 5412 * If the xattr property is off, refuse the request. 5413 */ 5414 if (!(zfsvfs->z_flags & ZSB_XATTR)) 5415 return (SET_ERROR(EOPNOTSUPP)); 5416 5417 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5418 ap->a_cred, ap->a_td, VREAD); 5419 if (error != 0) 5420 return (SET_ERROR(error)); 5421 5422 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 5423 sizeof (attrname)); 5424 if (error != 0) 5425 return (error); 5426 5427 error = ENOENT; 5428 ZFS_ENTER(zfsvfs); 5429 ZFS_VERIFY_ZP(zp) 5430 rw_enter(&zp->z_xattr_lock, RW_READER); 5431 if (zfsvfs->z_use_sa && zp->z_is_sa) 5432 error = zfs_getextattr_sa(ap, attrname); 5433 if (error == ENOENT) 5434 error = zfs_getextattr_dir(ap, attrname); 5435 rw_exit(&zp->z_xattr_lock); 5436 ZFS_EXIT(zfsvfs); 5437 if (error == ENOENT) 5438 error = SET_ERROR(ENOATTR); 5439 return (error); 5440 } 5441 5442 #ifndef _SYS_SYSPROTO_H_ 5443 struct vop_deleteextattr { 5444 IN struct vnode *a_vp; 5445 IN int a_attrnamespace; 5446 IN const char *a_name; 5447 IN struct ucred *a_cred; 5448 IN struct thread *a_td; 5449 }; 5450 #endif 5451 5452 static int 5453 zfs_deleteextattr_dir(struct vop_deleteextattr_args *ap, const char *attrname) 5454 { 5455 struct nameidata nd; 5456 vnode_t *xvp = NULL, *vp; 5457 int error; 5458 5459 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, 5460 LOOKUP_XATTR, B_FALSE); 5461 if (error != 0) 5462 return (error); 5463 5464 #if __FreeBSD_version < 1400043 5465 NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF, 5466 UIO_SYSSPACE, attrname, xvp, ap->a_td); 5467 #else 5468 NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF, 5469 UIO_SYSSPACE, attrname, xvp); 5470 #endif 5471 error = namei(&nd); 5472 vp = nd.ni_vp; 5473 if (error != 0) { 5474 NDFREE(&nd, NDF_ONLY_PNBUF); 5475 return (SET_ERROR(error)); 5476 } 5477 5478 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 5479 NDFREE(&nd, NDF_ONLY_PNBUF); 5480 5481 vput(nd.ni_dvp); 5482 if (vp == nd.ni_dvp) 5483 vrele(vp); 5484 else 5485 vput(vp); 5486 5487 return (error); 5488 } 5489 5490 static int 5491 zfs_deleteextattr_sa(struct vop_deleteextattr_args *ap, const char *attrname) 5492 { 5493 znode_t *zp = VTOZ(ap->a_vp); 5494 nvlist_t *nvl; 5495 int error; 5496 5497 error = zfs_ensure_xattr_cached(zp); 5498 if (error != 0) 5499 return (error); 5500 5501 ASSERT(RW_WRITE_HELD(&zp->z_xattr_lock)); 5502 ASSERT3P(zp->z_xattr_cached, !=, NULL); 5503 5504 nvl = zp->z_xattr_cached; 5505 error = nvlist_remove(nvl, attrname, DATA_TYPE_BYTE_ARRAY); 5506 if (error != 0) 5507 error = SET_ERROR(error); 5508 else 5509 error = zfs_sa_set_xattr(zp); 5510 if (error != 0) { 5511 zp->z_xattr_cached = NULL; 5512 nvlist_free(nvl); 5513 } 5514 return (error); 5515 } 5516 5517 /* 5518 * Vnode operation to remove a named attribute. 5519 */ 5520 static int 5521 zfs_deleteextattr(struct vop_deleteextattr_args *ap) 5522 { 5523 znode_t *zp = VTOZ(ap->a_vp); 5524 zfsvfs_t *zfsvfs = ZTOZSB(zp); 5525 char attrname[EXTATTR_MAXNAMELEN+1]; 5526 int error; 5527 5528 /* 5529 * If the xattr property is off, refuse the request. 5530 */ 5531 if (!(zfsvfs->z_flags & ZSB_XATTR)) 5532 return (SET_ERROR(EOPNOTSUPP)); 5533 5534 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5535 ap->a_cred, ap->a_td, VWRITE); 5536 if (error != 0) 5537 return (SET_ERROR(error)); 5538 5539 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 5540 sizeof (attrname)); 5541 if (error != 0) 5542 return (error); 5543 5544 size_t size = 0; 5545 struct vop_getextattr_args vga = { 5546 .a_vp = ap->a_vp, 5547 .a_size = &size, 5548 .a_cred = ap->a_cred, 5549 .a_td = ap->a_td, 5550 }; 5551 error = ENOENT; 5552 ZFS_ENTER(zfsvfs); 5553 ZFS_VERIFY_ZP(zp); 5554 rw_enter(&zp->z_xattr_lock, RW_WRITER); 5555 if (zfsvfs->z_use_sa && zp->z_is_sa) { 5556 error = zfs_getextattr_sa(&vga, attrname); 5557 if (error == 0) 5558 error = zfs_deleteextattr_sa(ap, attrname); 5559 } 5560 if (error == ENOENT) { 5561 error = zfs_getextattr_dir(&vga, attrname); 5562 if (error == 0) 5563 error = zfs_deleteextattr_dir(ap, attrname); 5564 } 5565 rw_exit(&zp->z_xattr_lock); 5566 ZFS_EXIT(zfsvfs); 5567 if (error == ENOENT) 5568 error = SET_ERROR(ENOATTR); 5569 return (error); 5570 } 5571 5572 #ifndef _SYS_SYSPROTO_H_ 5573 struct vop_setextattr { 5574 IN struct vnode *a_vp; 5575 IN int a_attrnamespace; 5576 IN const char *a_name; 5577 INOUT struct uio *a_uio; 5578 IN struct ucred *a_cred; 5579 IN struct thread *a_td; 5580 }; 5581 #endif 5582 5583 static int 5584 zfs_setextattr_dir(struct vop_setextattr_args *ap, const char *attrname) 5585 { 5586 struct thread *td = ap->a_td; 5587 struct nameidata nd; 5588 struct vattr va; 5589 vnode_t *xvp = NULL, *vp; 5590 int error, flags; 5591 5592 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, 5593 LOOKUP_XATTR | CREATE_XATTR_DIR, B_FALSE); 5594 if (error != 0) 5595 return (error); 5596 5597 flags = FFLAGS(O_WRONLY | O_CREAT); 5598 #if __FreeBSD_version < 1400043 5599 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp, td); 5600 #else 5601 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp); 5602 #endif 5603 error = vn_open_cred(&nd, &flags, 0600, VN_OPEN_INVFS, ap->a_cred, 5604 NULL); 5605 vp = nd.ni_vp; 5606 NDFREE(&nd, NDF_ONLY_PNBUF); 5607 if (error != 0) 5608 return (SET_ERROR(error)); 5609 5610 VATTR_NULL(&va); 5611 va.va_size = 0; 5612 error = VOP_SETATTR(vp, &va, ap->a_cred); 5613 if (error == 0) 5614 VOP_WRITE(vp, ap->a_uio, IO_UNIT, ap->a_cred); 5615 5616 VOP_UNLOCK1(vp); 5617 vn_close(vp, flags, ap->a_cred, td); 5618 return (error); 5619 } 5620 5621 static int 5622 zfs_setextattr_sa(struct vop_setextattr_args *ap, const char *attrname) 5623 { 5624 znode_t *zp = VTOZ(ap->a_vp); 5625 nvlist_t *nvl; 5626 size_t sa_size; 5627 int error; 5628 5629 error = zfs_ensure_xattr_cached(zp); 5630 if (error != 0) 5631 return (error); 5632 5633 ASSERT(RW_WRITE_HELD(&zp->z_xattr_lock)); 5634 ASSERT3P(zp->z_xattr_cached, !=, NULL); 5635 5636 nvl = zp->z_xattr_cached; 5637 size_t entry_size = ap->a_uio->uio_resid; 5638 if (entry_size > DXATTR_MAX_ENTRY_SIZE) 5639 return (SET_ERROR(EFBIG)); 5640 error = nvlist_size(nvl, &sa_size, NV_ENCODE_XDR); 5641 if (error != 0) 5642 return (SET_ERROR(error)); 5643 if (sa_size > DXATTR_MAX_SA_SIZE) 5644 return (SET_ERROR(EFBIG)); 5645 uchar_t *buf = kmem_alloc(entry_size, KM_SLEEP); 5646 error = uiomove(buf, entry_size, ap->a_uio); 5647 if (error != 0) { 5648 error = SET_ERROR(error); 5649 } else { 5650 error = nvlist_add_byte_array(nvl, attrname, buf, entry_size); 5651 if (error != 0) 5652 error = SET_ERROR(error); 5653 } 5654 kmem_free(buf, entry_size); 5655 if (error == 0) 5656 error = zfs_sa_set_xattr(zp); 5657 if (error != 0) { 5658 zp->z_xattr_cached = NULL; 5659 nvlist_free(nvl); 5660 } 5661 return (error); 5662 } 5663 5664 /* 5665 * Vnode operation to set a named attribute. 5666 */ 5667 static int 5668 zfs_setextattr(struct vop_setextattr_args *ap) 5669 { 5670 znode_t *zp = VTOZ(ap->a_vp); 5671 zfsvfs_t *zfsvfs = ZTOZSB(zp); 5672 char attrname[EXTATTR_MAXNAMELEN+1]; 5673 int error; 5674 5675 /* 5676 * If the xattr property is off, refuse the request. 5677 */ 5678 if (!(zfsvfs->z_flags & ZSB_XATTR)) 5679 return (SET_ERROR(EOPNOTSUPP)); 5680 5681 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5682 ap->a_cred, ap->a_td, VWRITE); 5683 if (error != 0) 5684 return (SET_ERROR(error)); 5685 5686 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 5687 sizeof (attrname)); 5688 if (error != 0) 5689 return (error); 5690 5691 struct vop_deleteextattr_args vda = { 5692 .a_vp = ap->a_vp, 5693 .a_cred = ap->a_cred, 5694 .a_td = ap->a_td, 5695 }; 5696 error = ENOENT; 5697 ZFS_ENTER(zfsvfs); 5698 ZFS_VERIFY_ZP(zp); 5699 rw_enter(&zp->z_xattr_lock, RW_WRITER); 5700 if (zfsvfs->z_use_sa && zp->z_is_sa && zfsvfs->z_xattr_sa) { 5701 error = zfs_setextattr_sa(ap, attrname); 5702 if (error == 0) 5703 /* 5704 * Successfully put into SA, we need to clear the one 5705 * in dir if present. 5706 */ 5707 zfs_deleteextattr_dir(&vda, attrname); 5708 } 5709 if (error) { 5710 error = zfs_setextattr_dir(ap, attrname); 5711 if (error == 0 && zp->z_is_sa) 5712 /* 5713 * Successfully put into dir, we need to clear the one 5714 * in SA if present. 5715 */ 5716 zfs_deleteextattr_sa(&vda, attrname); 5717 } 5718 rw_exit(&zp->z_xattr_lock); 5719 ZFS_EXIT(zfsvfs); 5720 return (error); 5721 } 5722 5723 #ifndef _SYS_SYSPROTO_H_ 5724 struct vop_listextattr { 5725 IN struct vnode *a_vp; 5726 IN int a_attrnamespace; 5727 INOUT struct uio *a_uio; 5728 OUT size_t *a_size; 5729 IN struct ucred *a_cred; 5730 IN struct thread *a_td; 5731 }; 5732 #endif 5733 5734 static int 5735 zfs_listextattr_dir(struct vop_listextattr_args *ap, const char *attrprefix) 5736 { 5737 struct thread *td = ap->a_td; 5738 struct nameidata nd; 5739 uint8_t dirbuf[sizeof (struct dirent)]; 5740 struct iovec aiov; 5741 struct uio auio; 5742 vnode_t *xvp = NULL, *vp; 5743 int error, eof; 5744 5745 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, 5746 LOOKUP_XATTR, B_FALSE); 5747 if (error != 0) { 5748 /* 5749 * ENOATTR means that the EA directory does not yet exist, 5750 * i.e. there are no extended attributes there. 5751 */ 5752 if (error == ENOATTR) 5753 error = 0; 5754 return (error); 5755 } 5756 5757 #if __FreeBSD_version < 1400043 5758 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED, 5759 UIO_SYSSPACE, ".", xvp, td); 5760 #else 5761 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED, 5762 UIO_SYSSPACE, ".", xvp); 5763 #endif 5764 error = namei(&nd); 5765 vp = nd.ni_vp; 5766 NDFREE(&nd, NDF_ONLY_PNBUF); 5767 if (error != 0) 5768 return (SET_ERROR(error)); 5769 5770 auio.uio_iov = &aiov; 5771 auio.uio_iovcnt = 1; 5772 auio.uio_segflg = UIO_SYSSPACE; 5773 auio.uio_td = td; 5774 auio.uio_rw = UIO_READ; 5775 auio.uio_offset = 0; 5776 5777 size_t plen = strlen(attrprefix); 5778 5779 do { 5780 aiov.iov_base = (void *)dirbuf; 5781 aiov.iov_len = sizeof (dirbuf); 5782 auio.uio_resid = sizeof (dirbuf); 5783 error = VOP_READDIR(vp, &auio, ap->a_cred, &eof, NULL, NULL); 5784 if (error != 0) 5785 break; 5786 int done = sizeof (dirbuf) - auio.uio_resid; 5787 for (int pos = 0; pos < done; ) { 5788 struct dirent *dp = (struct dirent *)(dirbuf + pos); 5789 pos += dp->d_reclen; 5790 /* 5791 * XXX: Temporarily we also accept DT_UNKNOWN, as this 5792 * is what we get when attribute was created on Solaris. 5793 */ 5794 if (dp->d_type != DT_REG && dp->d_type != DT_UNKNOWN) 5795 continue; 5796 else if (plen == 0 && 5797 strncmp(dp->d_name, "freebsd:", 8) == 0) 5798 continue; 5799 else if (strncmp(dp->d_name, attrprefix, plen) != 0) 5800 continue; 5801 uint8_t nlen = dp->d_namlen - plen; 5802 if (ap->a_size != NULL) { 5803 *ap->a_size += 1 + nlen; 5804 } else if (ap->a_uio != NULL) { 5805 /* 5806 * Format of extattr name entry is one byte for 5807 * length and the rest for name. 5808 */ 5809 error = uiomove(&nlen, 1, ap->a_uio); 5810 if (error == 0) { 5811 char *namep = dp->d_name + plen; 5812 error = uiomove(namep, nlen, ap->a_uio); 5813 } 5814 if (error != 0) { 5815 error = SET_ERROR(error); 5816 break; 5817 } 5818 } 5819 } 5820 } while (!eof && error == 0); 5821 5822 vput(vp); 5823 return (error); 5824 } 5825 5826 static int 5827 zfs_listextattr_sa(struct vop_listextattr_args *ap, const char *attrprefix) 5828 { 5829 znode_t *zp = VTOZ(ap->a_vp); 5830 int error; 5831 5832 error = zfs_ensure_xattr_cached(zp); 5833 if (error != 0) 5834 return (error); 5835 5836 ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock)); 5837 ASSERT3P(zp->z_xattr_cached, !=, NULL); 5838 5839 size_t plen = strlen(attrprefix); 5840 nvpair_t *nvp = NULL; 5841 while ((nvp = nvlist_next_nvpair(zp->z_xattr_cached, nvp)) != NULL) { 5842 ASSERT3U(nvpair_type(nvp), ==, DATA_TYPE_BYTE_ARRAY); 5843 5844 const char *name = nvpair_name(nvp); 5845 if (plen == 0 && strncmp(name, "freebsd:", 8) == 0) 5846 continue; 5847 else if (strncmp(name, attrprefix, plen) != 0) 5848 continue; 5849 uint8_t nlen = strlen(name) - plen; 5850 if (ap->a_size != NULL) { 5851 *ap->a_size += 1 + nlen; 5852 } else if (ap->a_uio != NULL) { 5853 /* 5854 * Format of extattr name entry is one byte for 5855 * length and the rest for name. 5856 */ 5857 error = uiomove(&nlen, 1, ap->a_uio); 5858 if (error == 0) { 5859 char *namep = __DECONST(char *, name) + plen; 5860 error = uiomove(namep, nlen, ap->a_uio); 5861 } 5862 if (error != 0) { 5863 error = SET_ERROR(error); 5864 break; 5865 } 5866 } 5867 } 5868 5869 return (error); 5870 } 5871 5872 /* 5873 * Vnode operation to retrieve extended attributes on a vnode. 5874 */ 5875 static int 5876 zfs_listextattr(struct vop_listextattr_args *ap) 5877 { 5878 znode_t *zp = VTOZ(ap->a_vp); 5879 zfsvfs_t *zfsvfs = ZTOZSB(zp); 5880 char attrprefix[16]; 5881 int error; 5882 5883 if (ap->a_size != NULL) 5884 *ap->a_size = 0; 5885 5886 /* 5887 * If the xattr property is off, refuse the request. 5888 */ 5889 if (!(zfsvfs->z_flags & ZSB_XATTR)) 5890 return (SET_ERROR(EOPNOTSUPP)); 5891 5892 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5893 ap->a_cred, ap->a_td, VREAD); 5894 if (error != 0) 5895 return (SET_ERROR(error)); 5896 5897 error = zfs_create_attrname(ap->a_attrnamespace, "", attrprefix, 5898 sizeof (attrprefix)); 5899 if (error != 0) 5900 return (error); 5901 5902 ZFS_ENTER(zfsvfs); 5903 ZFS_VERIFY_ZP(zp); 5904 rw_enter(&zp->z_xattr_lock, RW_READER); 5905 if (zfsvfs->z_use_sa && zp->z_is_sa) 5906 error = zfs_listextattr_sa(ap, attrprefix); 5907 if (error == 0) 5908 error = zfs_listextattr_dir(ap, attrprefix); 5909 rw_exit(&zp->z_xattr_lock); 5910 ZFS_EXIT(zfsvfs); 5911 return (error); 5912 } 5913 5914 #ifndef _SYS_SYSPROTO_H_ 5915 struct vop_getacl_args { 5916 struct vnode *vp; 5917 acl_type_t type; 5918 struct acl *aclp; 5919 struct ucred *cred; 5920 struct thread *td; 5921 }; 5922 #endif 5923 5924 static int 5925 zfs_freebsd_getacl(struct vop_getacl_args *ap) 5926 { 5927 int error; 5928 vsecattr_t vsecattr; 5929 5930 if (ap->a_type != ACL_TYPE_NFS4) 5931 return (EINVAL); 5932 5933 vsecattr.vsa_mask = VSA_ACE | VSA_ACECNT; 5934 if ((error = zfs_getsecattr(VTOZ(ap->a_vp), 5935 &vsecattr, 0, ap->a_cred))) 5936 return (error); 5937 5938 error = acl_from_aces(ap->a_aclp, vsecattr.vsa_aclentp, 5939 vsecattr.vsa_aclcnt); 5940 if (vsecattr.vsa_aclentp != NULL) 5941 kmem_free(vsecattr.vsa_aclentp, vsecattr.vsa_aclentsz); 5942 5943 return (error); 5944 } 5945 5946 #ifndef _SYS_SYSPROTO_H_ 5947 struct vop_setacl_args { 5948 struct vnode *vp; 5949 acl_type_t type; 5950 struct acl *aclp; 5951 struct ucred *cred; 5952 struct thread *td; 5953 }; 5954 #endif 5955 5956 static int 5957 zfs_freebsd_setacl(struct vop_setacl_args *ap) 5958 { 5959 int error; 5960 vsecattr_t vsecattr; 5961 int aclbsize; /* size of acl list in bytes */ 5962 aclent_t *aaclp; 5963 5964 if (ap->a_type != ACL_TYPE_NFS4) 5965 return (EINVAL); 5966 5967 if (ap->a_aclp == NULL) 5968 return (EINVAL); 5969 5970 if (ap->a_aclp->acl_cnt < 1 || ap->a_aclp->acl_cnt > MAX_ACL_ENTRIES) 5971 return (EINVAL); 5972 5973 /* 5974 * With NFSv4 ACLs, chmod(2) may need to add additional entries, 5975 * splitting every entry into two and appending "canonical six" 5976 * entries at the end. Don't allow for setting an ACL that would 5977 * cause chmod(2) to run out of ACL entries. 5978 */ 5979 if (ap->a_aclp->acl_cnt * 2 + 6 > ACL_MAX_ENTRIES) 5980 return (ENOSPC); 5981 5982 error = acl_nfs4_check(ap->a_aclp, ap->a_vp->v_type == VDIR); 5983 if (error != 0) 5984 return (error); 5985 5986 vsecattr.vsa_mask = VSA_ACE; 5987 aclbsize = ap->a_aclp->acl_cnt * sizeof (ace_t); 5988 vsecattr.vsa_aclentp = kmem_alloc(aclbsize, KM_SLEEP); 5989 aaclp = vsecattr.vsa_aclentp; 5990 vsecattr.vsa_aclentsz = aclbsize; 5991 5992 aces_from_acl(vsecattr.vsa_aclentp, &vsecattr.vsa_aclcnt, ap->a_aclp); 5993 error = zfs_setsecattr(VTOZ(ap->a_vp), &vsecattr, 0, ap->a_cred); 5994 kmem_free(aaclp, aclbsize); 5995 5996 return (error); 5997 } 5998 5999 #ifndef _SYS_SYSPROTO_H_ 6000 struct vop_aclcheck_args { 6001 struct vnode *vp; 6002 acl_type_t type; 6003 struct acl *aclp; 6004 struct ucred *cred; 6005 struct thread *td; 6006 }; 6007 #endif 6008 6009 static int 6010 zfs_freebsd_aclcheck(struct vop_aclcheck_args *ap) 6011 { 6012 6013 return (EOPNOTSUPP); 6014 } 6015 6016 static int 6017 zfs_vptocnp(struct vop_vptocnp_args *ap) 6018 { 6019 vnode_t *covered_vp; 6020 vnode_t *vp = ap->a_vp; 6021 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; 6022 znode_t *zp = VTOZ(vp); 6023 int ltype; 6024 int error; 6025 6026 ZFS_ENTER(zfsvfs); 6027 ZFS_VERIFY_ZP(zp); 6028 6029 /* 6030 * If we are a snapshot mounted under .zfs, run the operation 6031 * on the covered vnode. 6032 */ 6033 if (zp->z_id != zfsvfs->z_root || zfsvfs->z_parent == zfsvfs) { 6034 char name[MAXNAMLEN + 1]; 6035 znode_t *dzp; 6036 size_t len; 6037 6038 error = zfs_znode_parent_and_name(zp, &dzp, name); 6039 if (error == 0) { 6040 len = strlen(name); 6041 if (*ap->a_buflen < len) 6042 error = SET_ERROR(ENOMEM); 6043 } 6044 if (error == 0) { 6045 *ap->a_buflen -= len; 6046 bcopy(name, ap->a_buf + *ap->a_buflen, len); 6047 *ap->a_vpp = ZTOV(dzp); 6048 } 6049 ZFS_EXIT(zfsvfs); 6050 return (error); 6051 } 6052 ZFS_EXIT(zfsvfs); 6053 6054 covered_vp = vp->v_mount->mnt_vnodecovered; 6055 #if __FreeBSD_version >= 1300045 6056 enum vgetstate vs = vget_prep(covered_vp); 6057 #else 6058 vhold(covered_vp); 6059 #endif 6060 ltype = VOP_ISLOCKED(vp); 6061 VOP_UNLOCK1(vp); 6062 #if __FreeBSD_version >= 1300045 6063 error = vget_finish(covered_vp, LK_SHARED, vs); 6064 #else 6065 error = vget(covered_vp, LK_SHARED | LK_VNHELD, curthread); 6066 #endif 6067 if (error == 0) { 6068 #if __FreeBSD_version >= 1300123 6069 error = VOP_VPTOCNP(covered_vp, ap->a_vpp, ap->a_buf, 6070 ap->a_buflen); 6071 #else 6072 error = VOP_VPTOCNP(covered_vp, ap->a_vpp, ap->a_cred, 6073 ap->a_buf, ap->a_buflen); 6074 #endif 6075 vput(covered_vp); 6076 } 6077 vn_lock(vp, ltype | LK_RETRY); 6078 if (VN_IS_DOOMED(vp)) 6079 error = SET_ERROR(ENOENT); 6080 return (error); 6081 } 6082 6083 #if __FreeBSD_version >= 1400032 6084 static int 6085 zfs_deallocate(struct vop_deallocate_args *ap) 6086 { 6087 znode_t *zp = VTOZ(ap->a_vp); 6088 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 6089 zilog_t *zilog; 6090 off_t off, len, file_sz; 6091 int error; 6092 6093 ZFS_ENTER(zfsvfs); 6094 ZFS_VERIFY_ZP(zp); 6095 6096 /* 6097 * Callers might not be able to detect properly that we are read-only, 6098 * so check it explicitly here. 6099 */ 6100 if (zfs_is_readonly(zfsvfs)) { 6101 ZFS_EXIT(zfsvfs); 6102 return (SET_ERROR(EROFS)); 6103 } 6104 6105 zilog = zfsvfs->z_log; 6106 off = *ap->a_offset; 6107 len = *ap->a_len; 6108 file_sz = zp->z_size; 6109 if (off + len > file_sz) 6110 len = file_sz - off; 6111 /* Fast path for out-of-range request. */ 6112 if (len <= 0) { 6113 *ap->a_len = 0; 6114 ZFS_EXIT(zfsvfs); 6115 return (0); 6116 } 6117 6118 error = zfs_freesp(zp, off, len, O_RDWR, TRUE); 6119 if (error == 0) { 6120 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS || 6121 (ap->a_ioflag & IO_SYNC) != 0) 6122 zil_commit(zilog, zp->z_id); 6123 *ap->a_offset = off + len; 6124 *ap->a_len = 0; 6125 } 6126 6127 ZFS_EXIT(zfsvfs); 6128 return (error); 6129 } 6130 #endif 6131 6132 struct vop_vector zfs_vnodeops; 6133 struct vop_vector zfs_fifoops; 6134 struct vop_vector zfs_shareops; 6135 6136 struct vop_vector zfs_vnodeops = { 6137 .vop_default = &default_vnodeops, 6138 .vop_inactive = zfs_freebsd_inactive, 6139 #if __FreeBSD_version >= 1300042 6140 .vop_need_inactive = zfs_freebsd_need_inactive, 6141 #endif 6142 .vop_reclaim = zfs_freebsd_reclaim, 6143 #if __FreeBSD_version >= 1300102 6144 .vop_fplookup_vexec = zfs_freebsd_fplookup_vexec, 6145 #endif 6146 #if __FreeBSD_version >= 1300139 6147 .vop_fplookup_symlink = zfs_freebsd_fplookup_symlink, 6148 #endif 6149 .vop_access = zfs_freebsd_access, 6150 .vop_allocate = VOP_EINVAL, 6151 #if __FreeBSD_version >= 1400032 6152 .vop_deallocate = zfs_deallocate, 6153 #endif 6154 .vop_lookup = zfs_cache_lookup, 6155 .vop_cachedlookup = zfs_freebsd_cachedlookup, 6156 .vop_getattr = zfs_freebsd_getattr, 6157 .vop_setattr = zfs_freebsd_setattr, 6158 .vop_create = zfs_freebsd_create, 6159 .vop_mknod = (vop_mknod_t *)zfs_freebsd_create, 6160 .vop_mkdir = zfs_freebsd_mkdir, 6161 .vop_readdir = zfs_freebsd_readdir, 6162 .vop_fsync = zfs_freebsd_fsync, 6163 .vop_open = zfs_freebsd_open, 6164 .vop_close = zfs_freebsd_close, 6165 .vop_rmdir = zfs_freebsd_rmdir, 6166 .vop_ioctl = zfs_freebsd_ioctl, 6167 .vop_link = zfs_freebsd_link, 6168 .vop_symlink = zfs_freebsd_symlink, 6169 .vop_readlink = zfs_freebsd_readlink, 6170 .vop_read = zfs_freebsd_read, 6171 .vop_write = zfs_freebsd_write, 6172 .vop_remove = zfs_freebsd_remove, 6173 .vop_rename = zfs_freebsd_rename, 6174 .vop_pathconf = zfs_freebsd_pathconf, 6175 .vop_bmap = zfs_freebsd_bmap, 6176 .vop_fid = zfs_freebsd_fid, 6177 .vop_getextattr = zfs_getextattr, 6178 .vop_deleteextattr = zfs_deleteextattr, 6179 .vop_setextattr = zfs_setextattr, 6180 .vop_listextattr = zfs_listextattr, 6181 .vop_getacl = zfs_freebsd_getacl, 6182 .vop_setacl = zfs_freebsd_setacl, 6183 .vop_aclcheck = zfs_freebsd_aclcheck, 6184 .vop_getpages = zfs_freebsd_getpages, 6185 .vop_putpages = zfs_freebsd_putpages, 6186 .vop_vptocnp = zfs_vptocnp, 6187 #if __FreeBSD_version >= 1300064 6188 .vop_lock1 = vop_lock, 6189 .vop_unlock = vop_unlock, 6190 .vop_islocked = vop_islocked, 6191 #endif 6192 #if __FreeBSD_version >= 1400043 6193 .vop_add_writecount = vop_stdadd_writecount_nomsync, 6194 #endif 6195 }; 6196 VFS_VOP_VECTOR_REGISTER(zfs_vnodeops); 6197 6198 struct vop_vector zfs_fifoops = { 6199 .vop_default = &fifo_specops, 6200 .vop_fsync = zfs_freebsd_fsync, 6201 #if __FreeBSD_version >= 1300102 6202 .vop_fplookup_vexec = zfs_freebsd_fplookup_vexec, 6203 #endif 6204 #if __FreeBSD_version >= 1300139 6205 .vop_fplookup_symlink = zfs_freebsd_fplookup_symlink, 6206 #endif 6207 .vop_access = zfs_freebsd_access, 6208 .vop_getattr = zfs_freebsd_getattr, 6209 .vop_inactive = zfs_freebsd_inactive, 6210 .vop_read = VOP_PANIC, 6211 .vop_reclaim = zfs_freebsd_reclaim, 6212 .vop_setattr = zfs_freebsd_setattr, 6213 .vop_write = VOP_PANIC, 6214 .vop_pathconf = zfs_freebsd_pathconf, 6215 .vop_fid = zfs_freebsd_fid, 6216 .vop_getacl = zfs_freebsd_getacl, 6217 .vop_setacl = zfs_freebsd_setacl, 6218 .vop_aclcheck = zfs_freebsd_aclcheck, 6219 #if __FreeBSD_version >= 1400043 6220 .vop_add_writecount = vop_stdadd_writecount_nomsync, 6221 #endif 6222 }; 6223 VFS_VOP_VECTOR_REGISTER(zfs_fifoops); 6224 6225 /* 6226 * special share hidden files vnode operations template 6227 */ 6228 struct vop_vector zfs_shareops = { 6229 .vop_default = &default_vnodeops, 6230 #if __FreeBSD_version >= 1300121 6231 .vop_fplookup_vexec = VOP_EAGAIN, 6232 #endif 6233 #if __FreeBSD_version >= 1300139 6234 .vop_fplookup_symlink = VOP_EAGAIN, 6235 #endif 6236 .vop_access = zfs_freebsd_access, 6237 .vop_inactive = zfs_freebsd_inactive, 6238 .vop_reclaim = zfs_freebsd_reclaim, 6239 .vop_fid = zfs_freebsd_fid, 6240 .vop_pathconf = zfs_freebsd_pathconf, 6241 #if __FreeBSD_version >= 1400043 6242 .vop_add_writecount = vop_stdadd_writecount_nomsync, 6243 #endif 6244 }; 6245 VFS_VOP_VECTOR_REGISTER(zfs_shareops); 6246