1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2012, 2015 by Delphix. All rights reserved. 24 * Copyright 2014 Nexenta Systems, Inc. All rights reserved. 25 * Copyright (c) 2014 Integros [integros.com] 26 */ 27 28 /* Portions Copyright 2007 Jeremy Teo */ 29 /* Portions Copyright 2010 Robert Milkowski */ 30 31 #include <sys/types.h> 32 #include <sys/param.h> 33 #include <sys/time.h> 34 #include <sys/systm.h> 35 #include <sys/sysmacros.h> 36 #include <sys/resource.h> 37 #include <sys/vfs.h> 38 #include <sys/vm.h> 39 #include <sys/vnode.h> 40 #include <sys/file.h> 41 #include <sys/stat.h> 42 #include <sys/kmem.h> 43 #include <sys/taskq.h> 44 #include <sys/uio.h> 45 #include <sys/atomic.h> 46 #include <sys/namei.h> 47 #include <sys/mman.h> 48 #include <sys/cmn_err.h> 49 #include <sys/errno.h> 50 #include <sys/unistd.h> 51 #include <sys/zfs_dir.h> 52 #include <sys/zfs_ioctl.h> 53 #include <sys/fs/zfs.h> 54 #include <sys/dmu.h> 55 #include <sys/dmu_objset.h> 56 #include <sys/spa.h> 57 #include <sys/txg.h> 58 #include <sys/dbuf.h> 59 #include <sys/zap.h> 60 #include <sys/sa.h> 61 #include <sys/dirent.h> 62 #include <sys/policy.h> 63 #include <sys/sunddi.h> 64 #include <sys/filio.h> 65 #include <sys/sid.h> 66 #include <sys/zfs_ctldir.h> 67 #include <sys/zfs_fuid.h> 68 #include <sys/zfs_sa.h> 69 #include <sys/dnlc.h> 70 #include <sys/zfs_rlock.h> 71 #include <sys/buf.h> 72 #include <sys/sched.h> 73 #include <sys/acl.h> 74 #include <sys/extdirent.h> 75 76 #ifdef __FreeBSD__ 77 #include <sys/kidmap.h> 78 #include <sys/bio.h> 79 #include <vm/vm_param.h> 80 #endif 81 82 #ifdef __NetBSD__ 83 #include <dev/mm.h> 84 #include <miscfs/fifofs/fifo.h> 85 #include <miscfs/genfs/genfs.h> 86 #include <miscfs/genfs/genfs_node.h> 87 #include <uvm/uvm_extern.h> 88 #include <sys/fstrans.h> 89 #include <sys/malloc.h> 90 91 uint_t zfs_putpage_key; 92 #endif 93 94 /* 95 * Programming rules. 96 * 97 * Each vnode op performs some logical unit of work. To do this, the ZPL must 98 * properly lock its in-core state, create a DMU transaction, do the work, 99 * record this work in the intent log (ZIL), commit the DMU transaction, 100 * and wait for the intent log to commit if it is a synchronous operation. 101 * Moreover, the vnode ops must work in both normal and log replay context. 102 * The ordering of events is important to avoid deadlocks and references 103 * to freed memory. The example below illustrates the following Big Rules: 104 * 105 * (1) A check must be made in each zfs thread for a mounted file system. 106 * This is done avoiding races using ZFS_ENTER(zfsvfs). 107 * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes 108 * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros 109 * can return EIO from the calling function. 110 * 111 * (2) VN_RELE() should always be the last thing except for zil_commit() 112 * (if necessary) and ZFS_EXIT(). This is for 3 reasons: 113 * First, if it's the last reference, the vnode/znode 114 * can be freed, so the zp may point to freed memory. Second, the last 115 * reference will call zfs_zinactive(), which may induce a lot of work -- 116 * pushing cached pages (which acquires range locks) and syncing out 117 * cached atime changes. Third, zfs_zinactive() may require a new tx, 118 * which could deadlock the system if you were already holding one. 119 * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC(). 120 * 121 * (3) All range locks must be grabbed before calling dmu_tx_assign(), 122 * as they can span dmu_tx_assign() calls. 123 * 124 * (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to 125 * dmu_tx_assign(). This is critical because we don't want to block 126 * while holding locks. 127 * 128 * If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT. This 129 * reduces lock contention and CPU usage when we must wait (note that if 130 * throughput is constrained by the storage, nearly every transaction 131 * must wait). 132 * 133 * Note, in particular, that if a lock is sometimes acquired before 134 * the tx assigns, and sometimes after (e.g. z_lock), then failing 135 * to use a non-blocking assign can deadlock the system. The scenario: 136 * 137 * Thread A has grabbed a lock before calling dmu_tx_assign(). 138 * Thread B is in an already-assigned tx, and blocks for this lock. 139 * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open() 140 * forever, because the previous txg can't quiesce until B's tx commits. 141 * 142 * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT, 143 * then drop all locks, call dmu_tx_wait(), and try again. On subsequent 144 * calls to dmu_tx_assign(), pass TXG_WAITED rather than TXG_NOWAIT, 145 * to indicate that this operation has already called dmu_tx_wait(). 146 * This will ensure that we don't retry forever, waiting a short bit 147 * each time. 148 * 149 * (5) If the operation succeeded, generate the intent log entry for it 150 * before dropping locks. This ensures that the ordering of events 151 * in the intent log matches the order in which they actually occurred. 152 * During ZIL replay the zfs_log_* functions will update the sequence 153 * number to indicate the zil transaction has replayed. 154 * 155 * (6) At the end of each vnode op, the DMU tx must always commit, 156 * regardless of whether there were any errors. 157 * 158 * (7) After dropping all locks, invoke zil_commit(zilog, foid) 159 * to ensure that synchronous semantics are provided when necessary. 160 * 161 * In general, this is how things should be ordered in each vnode op: 162 * 163 * ZFS_ENTER(zfsvfs); // exit if unmounted 164 * top: 165 * zfs_dirent_lookup(&dl, ...) // lock directory entry (may VN_HOLD()) 166 * rw_enter(...); // grab any other locks you need 167 * tx = dmu_tx_create(...); // get DMU tx 168 * dmu_tx_hold_*(); // hold each object you might modify 169 * error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 170 * if (error) { 171 * rw_exit(...); // drop locks 172 * zfs_dirent_unlock(dl); // unlock directory entry 173 * VN_RELE(...); // release held vnodes 174 * if (error == ERESTART) { 175 * waited = B_TRUE; 176 * dmu_tx_wait(tx); 177 * dmu_tx_abort(tx); 178 * goto top; 179 * } 180 * dmu_tx_abort(tx); // abort DMU tx 181 * ZFS_EXIT(zfsvfs); // finished in zfs 182 * return (error); // really out of space 183 * } 184 * error = do_real_work(); // do whatever this VOP does 185 * if (error == 0) 186 * zfs_log_*(...); // on success, make ZIL entry 187 * dmu_tx_commit(tx); // commit DMU tx -- error or not 188 * rw_exit(...); // drop locks 189 * zfs_dirent_unlock(dl); // unlock directory entry 190 * VN_RELE(...); // release held vnodes 191 * zil_commit(zilog, foid); // synchronous when necessary 192 * ZFS_EXIT(zfsvfs); // finished in zfs 193 * return (error); // done, report error 194 */ 195 196 /* ARGSUSED */ 197 static int 198 zfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 199 { 200 znode_t *zp = VTOZ(*vpp); 201 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 202 203 ZFS_ENTER(zfsvfs); 204 ZFS_VERIFY_ZP(zp); 205 206 if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) && 207 ((flag & FAPPEND) == 0)) { 208 ZFS_EXIT(zfsvfs); 209 return (SET_ERROR(EPERM)); 210 } 211 212 if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 213 ZTOV(zp)->v_type == VREG && 214 !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) { 215 if (fs_vscan(*vpp, cr, 0) != 0) { 216 ZFS_EXIT(zfsvfs); 217 return (SET_ERROR(EACCES)); 218 } 219 } 220 221 /* Keep a count of the synchronous opens in the znode */ 222 if (flag & (FSYNC | FDSYNC)) 223 atomic_inc_32(&zp->z_sync_cnt); 224 225 ZFS_EXIT(zfsvfs); 226 return (0); 227 } 228 229 /* ARGSUSED */ 230 static int 231 zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 232 caller_context_t *ct) 233 { 234 znode_t *zp = VTOZ(vp); 235 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 236 237 /* 238 * Clean up any locks held by this process on the vp. 239 */ 240 cleanlocks(vp, ddi_get_pid(), 0); 241 cleanshares(vp, ddi_get_pid()); 242 243 ZFS_ENTER(zfsvfs); 244 ZFS_VERIFY_ZP(zp); 245 246 /* Decrement the synchronous opens in the znode */ 247 if ((flag & (FSYNC | FDSYNC)) && (count == 1)) 248 atomic_dec_32(&zp->z_sync_cnt); 249 250 if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 251 ZTOV(zp)->v_type == VREG && 252 !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) 253 VERIFY(fs_vscan(vp, cr, 1) == 0); 254 255 ZFS_EXIT(zfsvfs); 256 return (0); 257 } 258 259 /* 260 * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and 261 * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter. 262 */ 263 static int 264 zfs_holey(vnode_t *vp, u_long cmd, offset_t *off) 265 { 266 znode_t *zp = VTOZ(vp); 267 uint64_t noff = (uint64_t)*off; /* new offset */ 268 uint64_t file_sz; 269 int error; 270 boolean_t hole; 271 272 file_sz = zp->z_size; 273 if (noff >= file_sz) { 274 return (SET_ERROR(ENXIO)); 275 } 276 277 if (cmd == _FIO_SEEK_HOLE) 278 hole = B_TRUE; 279 else 280 hole = B_FALSE; 281 282 error = dmu_offset_next(zp->z_zfsvfs->z_os, zp->z_id, hole, &noff); 283 284 if (error == ESRCH) 285 return (SET_ERROR(ENXIO)); 286 287 /* 288 * We could find a hole that begins after the logical end-of-file, 289 * because dmu_offset_next() only works on whole blocks. If the 290 * EOF falls mid-block, then indicate that the "virtual hole" 291 * at the end of the file begins at the logical EOF, rather than 292 * at the end of the last block. 293 */ 294 if (noff > file_sz) { 295 ASSERT(hole); 296 noff = file_sz; 297 } 298 299 if (noff < *off) 300 return (error); 301 *off = noff; 302 return (error); 303 } 304 305 /* ARGSUSED */ 306 static int 307 zfs_ioctl(vnode_t *vp, u_long com, intptr_t data, int flag, cred_t *cred, 308 int *rvalp, caller_context_t *ct) 309 { 310 offset_t off; 311 offset_t ndata; 312 dmu_object_info_t doi; 313 int error; 314 zfsvfs_t *zfsvfs; 315 znode_t *zp; 316 317 switch (com) { 318 case _FIOFFS: 319 { 320 return (0); 321 322 /* 323 * The following two ioctls are used by bfu. Faking out, 324 * necessary to avoid bfu errors. 325 */ 326 } 327 case _FIOGDIO: 328 case _FIOSDIO: 329 { 330 return (0); 331 } 332 333 case _FIO_SEEK_DATA: 334 case _FIO_SEEK_HOLE: 335 { 336 #ifdef illumos 337 if (ddi_copyin((void *)data, &off, sizeof (off), flag)) 338 return (SET_ERROR(EFAULT)); 339 #else 340 off = *(offset_t *)data; 341 #endif 342 zp = VTOZ(vp); 343 zfsvfs = zp->z_zfsvfs; 344 ZFS_ENTER(zfsvfs); 345 ZFS_VERIFY_ZP(zp); 346 347 /* offset parameter is in/out */ 348 error = zfs_holey(vp, com, &off); 349 ZFS_EXIT(zfsvfs); 350 if (error) 351 return (error); 352 #ifdef illumos 353 if (ddi_copyout(&off, (void *)data, sizeof (off), flag)) 354 return (SET_ERROR(EFAULT)); 355 #else 356 *(offset_t *)data = off; 357 #endif 358 return (0); 359 } 360 #ifdef illumos 361 case _FIO_COUNT_FILLED: 362 { 363 /* 364 * _FIO_COUNT_FILLED adds a new ioctl command which 365 * exposes the number of filled blocks in a 366 * ZFS object. 367 */ 368 zp = VTOZ(vp); 369 zfsvfs = zp->z_zfsvfs; 370 ZFS_ENTER(zfsvfs); 371 ZFS_VERIFY_ZP(zp); 372 373 /* 374 * Wait for all dirty blocks for this object 375 * to get synced out to disk, and the DMU info 376 * updated. 377 */ 378 error = dmu_object_wait_synced(zfsvfs->z_os, zp->z_id); 379 if (error) { 380 ZFS_EXIT(zfsvfs); 381 return (error); 382 } 383 384 /* 385 * Retrieve fill count from DMU object. 386 */ 387 error = dmu_object_info(zfsvfs->z_os, zp->z_id, &doi); 388 if (error) { 389 ZFS_EXIT(zfsvfs); 390 return (error); 391 } 392 393 ndata = doi.doi_fill_count; 394 395 ZFS_EXIT(zfsvfs); 396 if (ddi_copyout(&ndata, (void *)data, sizeof (ndata), flag)) 397 return (SET_ERROR(EFAULT)); 398 return (0); 399 } 400 #endif 401 } 402 return (SET_ERROR(ENOTTY)); 403 } 404 405 #ifdef __FreeBSD__ 406 static vm_page_t 407 page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes) 408 { 409 vm_object_t obj; 410 vm_page_t pp; 411 int64_t end; 412 413 /* 414 * At present vm_page_clear_dirty extends the cleared range to DEV_BSIZE 415 * aligned boundaries, if the range is not aligned. As a result a 416 * DEV_BSIZE subrange with partially dirty data may get marked as clean. 417 * It may happen that all DEV_BSIZE subranges are marked clean and thus 418 * the whole page would be considred clean despite have some dirty data. 419 * For this reason we should shrink the range to DEV_BSIZE aligned 420 * boundaries before calling vm_page_clear_dirty. 421 */ 422 end = rounddown2(off + nbytes, DEV_BSIZE); 423 off = roundup2(off, DEV_BSIZE); 424 nbytes = end - off; 425 426 obj = vp->v_object; 427 zfs_vmobject_assert_wlocked(obj); 428 429 for (;;) { 430 if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 431 pp->valid) { 432 if (vm_page_xbusied(pp)) { 433 /* 434 * Reference the page before unlocking and 435 * sleeping so that the page daemon is less 436 * likely to reclaim it. 437 */ 438 vm_page_reference(pp); 439 vm_page_lock(pp); 440 zfs_vmobject_wunlock(obj); 441 vm_page_busy_sleep(pp, "zfsmwb", true); 442 zfs_vmobject_wlock(obj); 443 continue; 444 } 445 vm_page_sbusy(pp); 446 } else if (pp != NULL) { 447 ASSERT(!pp->valid); 448 pp = NULL; 449 } 450 451 if (pp != NULL) { 452 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 453 vm_object_pip_add(obj, 1); 454 pmap_remove_write(pp); 455 if (nbytes != 0) 456 vm_page_clear_dirty(pp, off, nbytes); 457 } 458 break; 459 } 460 return (pp); 461 } 462 463 static void 464 page_unbusy(vm_page_t pp) 465 { 466 467 vm_page_sunbusy(pp); 468 vm_object_pip_subtract(pp->object, 1); 469 } 470 471 static vm_page_t 472 page_hold(vnode_t *vp, int64_t start) 473 { 474 vm_object_t obj; 475 vm_page_t pp; 476 477 obj = vp->v_object; 478 zfs_vmobject_assert_wlocked(obj); 479 480 for (;;) { 481 if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 482 pp->valid) { 483 if (vm_page_xbusied(pp)) { 484 /* 485 * Reference the page before unlocking and 486 * sleeping so that the page daemon is less 487 * likely to reclaim it. 488 */ 489 vm_page_reference(pp); 490 vm_page_lock(pp); 491 zfs_vmobject_wunlock(obj); 492 vm_page_busy_sleep(pp, "zfsmwb", true); 493 zfs_vmobject_wlock(obj); 494 continue; 495 } 496 497 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 498 vm_page_lock(pp); 499 vm_page_hold(pp); 500 vm_page_unlock(pp); 501 502 } else 503 pp = NULL; 504 break; 505 } 506 return (pp); 507 } 508 509 static void 510 page_unhold(vm_page_t pp) 511 { 512 513 vm_page_lock(pp); 514 vm_page_unhold(pp); 515 vm_page_unlock(pp); 516 } 517 518 /* 519 * When a file is memory mapped, we must keep the IO data synchronized 520 * between the DMU cache and the memory mapped pages. What this means: 521 * 522 * On Write: If we find a memory mapped page, we write to *both* 523 * the page and the dmu buffer. 524 */ 525 static void 526 update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid, 527 int segflg, dmu_tx_t *tx) 528 { 529 vm_object_t obj; 530 struct sf_buf *sf; 531 caddr_t va; 532 int off; 533 534 ASSERT(segflg != UIO_NOCOPY); 535 ASSERT(vp->v_mount != NULL); 536 obj = vp->v_object; 537 ASSERT(obj != NULL); 538 539 off = start & PAGEOFFSET; 540 zfs_vmobject_wlock(obj); 541 for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 542 vm_page_t pp; 543 int nbytes = imin(PAGESIZE - off, len); 544 545 if ((pp = page_busy(vp, start, off, nbytes)) != NULL) { 546 zfs_vmobject_wunlock(obj); 547 548 va = zfs_map_page(pp, &sf); 549 (void) dmu_read(os, oid, start+off, nbytes, 550 va+off, DMU_READ_PREFETCH);; 551 zfs_unmap_page(sf); 552 553 zfs_vmobject_wlock(obj); 554 page_unbusy(pp); 555 } 556 len -= nbytes; 557 off = 0; 558 } 559 vm_object_pip_wakeupn(obj, 0); 560 zfs_vmobject_wunlock(obj); 561 } 562 563 /* 564 * Read with UIO_NOCOPY flag means that sendfile(2) requests 565 * ZFS to populate a range of page cache pages with data. 566 * 567 * NOTE: this function could be optimized to pre-allocate 568 * all pages in advance, drain exclusive busy on all of them, 569 * map them into contiguous KVA region and populate them 570 * in one single dmu_read() call. 571 */ 572 static int 573 mappedread_sf(vnode_t *vp, int nbytes, uio_t *uio) 574 { 575 znode_t *zp = VTOZ(vp); 576 objset_t *os = zp->z_zfsvfs->z_os; 577 struct sf_buf *sf; 578 vm_object_t obj; 579 vm_page_t pp; 580 int64_t start; 581 caddr_t va; 582 int len = nbytes; 583 int off; 584 int error = 0; 585 586 ASSERT(uio->uio_segflg == UIO_NOCOPY); 587 ASSERT(vp->v_mount != NULL); 588 obj = vp->v_object; 589 ASSERT(obj != NULL); 590 ASSERT((uio->uio_loffset & PAGEOFFSET) == 0); 591 592 zfs_vmobject_wlock(obj); 593 for (start = uio->uio_loffset; len > 0; start += PAGESIZE) { 594 int bytes = MIN(PAGESIZE, len); 595 596 pp = vm_page_grab(obj, OFF_TO_IDX(start), VM_ALLOC_SBUSY | 597 VM_ALLOC_NORMAL | VM_ALLOC_IGN_SBUSY); 598 if (pp->valid == 0) { 599 zfs_vmobject_wunlock(obj); 600 va = zfs_map_page(pp, &sf); 601 error = dmu_read(os, zp->z_id, start, bytes, va, 602 DMU_READ_PREFETCH); 603 if (bytes != PAGESIZE && error == 0) 604 bzero(va + bytes, PAGESIZE - bytes); 605 zfs_unmap_page(sf); 606 zfs_vmobject_wlock(obj); 607 vm_page_sunbusy(pp); 608 vm_page_lock(pp); 609 if (error) { 610 if (pp->wire_count == 0 && pp->valid == 0 && 611 !vm_page_busied(pp)) 612 vm_page_free(pp); 613 } else { 614 pp->valid = VM_PAGE_BITS_ALL; 615 vm_page_activate(pp); 616 } 617 vm_page_unlock(pp); 618 } else { 619 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 620 vm_page_sunbusy(pp); 621 } 622 if (error) 623 break; 624 uio->uio_resid -= bytes; 625 uio->uio_offset += bytes; 626 len -= bytes; 627 } 628 zfs_vmobject_wunlock(obj); 629 return (error); 630 } 631 632 /* 633 * When a file is memory mapped, we must keep the IO data synchronized 634 * between the DMU cache and the memory mapped pages. What this means: 635 * 636 * On Read: We "read" preferentially from memory mapped pages, 637 * else we default from the dmu buffer. 638 * 639 * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when 640 * the file is memory mapped. 641 */ 642 static int 643 mappedread(vnode_t *vp, int nbytes, uio_t *uio) 644 { 645 znode_t *zp = VTOZ(vp); 646 vm_object_t obj; 647 int64_t start; 648 caddr_t va; 649 int len = nbytes; 650 int off; 651 int error = 0; 652 653 ASSERT(vp->v_mount != NULL); 654 obj = vp->v_object; 655 ASSERT(obj != NULL); 656 657 start = uio->uio_loffset; 658 off = start & PAGEOFFSET; 659 zfs_vmobject_wlock(obj); 660 for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 661 vm_page_t pp; 662 uint64_t bytes = MIN(PAGESIZE - off, len); 663 664 if (pp = page_hold(vp, start)) { 665 struct sf_buf *sf; 666 caddr_t va; 667 668 zfs_vmobject_wunlock(obj); 669 va = zfs_map_page(pp, &sf); 670 #ifdef illumos 671 error = uiomove(va + off, bytes, UIO_READ, uio); 672 #else 673 error = vn_io_fault_uiomove(va + off, bytes, uio); 674 #endif 675 zfs_unmap_page(sf); 676 zfs_vmobject_wlock(obj); 677 page_unhold(pp); 678 } else { 679 zfs_vmobject_wunlock(obj); 680 error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), 681 uio, bytes); 682 zfs_vmobject_wlock(obj); 683 } 684 len -= bytes; 685 off = 0; 686 if (error) 687 break; 688 } 689 zfs_vmobject_wunlock(obj); 690 return (error); 691 } 692 #endif /* __FreeBSD__ */ 693 694 #ifdef __NetBSD__ 695 696 caddr_t 697 zfs_map_page(page_t *pp, enum seg_rw rw) 698 { 699 vaddr_t va; 700 int flags; 701 702 #ifdef __HAVE_MM_MD_DIRECT_MAPPED_PHYS 703 if (mm_md_direct_mapped_phys(VM_PAGE_TO_PHYS(pp), &va)) 704 return (caddr_t)va; 705 #endif 706 707 flags = UVMPAGER_MAPIN_WAITOK | 708 (rw == S_READ ? UVMPAGER_MAPIN_WRITE : UVMPAGER_MAPIN_READ); 709 va = uvm_pagermapin(&pp, 1, flags); 710 return (caddr_t)va; 711 } 712 713 void 714 zfs_unmap_page(page_t *pp, caddr_t addr) 715 { 716 717 #ifdef __HAVE_MM_MD_DIRECT_MAPPED_PHYS 718 vaddr_t va; 719 720 if (mm_md_direct_mapped_phys(VM_PAGE_TO_PHYS(pp), &va)) 721 return; 722 #endif 723 uvm_pagermapout((vaddr_t)addr, 1); 724 } 725 726 static int 727 mappedread(vnode_t *vp, int nbytes, uio_t *uio) 728 { 729 znode_t *zp = VTOZ(vp); 730 struct uvm_object *uobj = &vp->v_uobj; 731 kmutex_t *mtx = uobj->vmobjlock; 732 int64_t start; 733 caddr_t va; 734 size_t len = nbytes; 735 int off; 736 int error = 0; 737 int npages, found; 738 739 start = uio->uio_loffset; 740 off = start & PAGEOFFSET; 741 742 for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 743 page_t *pp; 744 uint64_t bytes = MIN(PAGESIZE - off, len); 745 746 pp = NULL; 747 npages = 1; 748 mutex_enter(mtx); 749 found = uvn_findpages(uobj, start, &npages, &pp, UFP_NOALLOC); 750 mutex_exit(mtx); 751 752 /* XXXNETBSD shouldn't access userspace with the page busy */ 753 if (found) { 754 va = zfs_map_page(pp, S_READ); 755 error = uiomove(va + off, bytes, UIO_READ, uio); 756 zfs_unmap_page(pp, va); 757 } else { 758 error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), 759 uio, bytes); 760 } 761 762 mutex_enter(mtx); 763 uvm_page_unbusy(&pp, 1); 764 mutex_exit(mtx); 765 766 len -= bytes; 767 off = 0; 768 if (error) 769 break; 770 } 771 return (error); 772 } 773 774 static void 775 update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid, 776 int segflg, dmu_tx_t *tx) 777 { 778 struct uvm_object *uobj = &vp->v_uobj; 779 kmutex_t *mtx = uobj->vmobjlock; 780 caddr_t va; 781 int off; 782 783 ASSERT(vp->v_mount != NULL); 784 785 mutex_enter(mtx); 786 787 off = start & PAGEOFFSET; 788 for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 789 page_t *pp; 790 int nbytes = MIN(PAGESIZE - off, len); 791 int npages, found; 792 793 pp = NULL; 794 npages = 1; 795 found = uvn_findpages(uobj, start, &npages, &pp, UFP_NOALLOC); 796 if (found) { 797 mutex_exit(mtx); 798 799 va = zfs_map_page(pp, S_WRITE); 800 (void) dmu_read(os, oid, start + off, nbytes, 801 va + off, DMU_READ_PREFETCH); 802 zfs_unmap_page(pp, va); 803 804 mutex_enter(mtx); 805 uvm_page_unbusy(&pp, 1); 806 } 807 len -= nbytes; 808 off = 0; 809 } 810 mutex_exit(mtx); 811 } 812 #endif /* __NetBSD__ */ 813 814 offset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */ 815 816 /* 817 * Read bytes from specified file into supplied buffer. 818 * 819 * IN: vp - vnode of file to be read from. 820 * uio - structure supplying read location, range info, 821 * and return buffer. 822 * ioflag - SYNC flags; used to provide FRSYNC semantics. 823 * cr - credentials of caller. 824 * ct - caller context 825 * 826 * OUT: uio - updated offset and range, buffer filled. 827 * 828 * RETURN: 0 on success, error code on failure. 829 * 830 * Side Effects: 831 * vp - atime updated if byte count > 0 832 */ 833 /* ARGSUSED */ 834 static int 835 zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 836 { 837 znode_t *zp = VTOZ(vp); 838 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 839 ssize_t n, nbytes; 840 int error = 0; 841 rl_t *rl; 842 xuio_t *xuio = NULL; 843 844 ZFS_ENTER(zfsvfs); 845 ZFS_VERIFY_ZP(zp); 846 847 if (zp->z_pflags & ZFS_AV_QUARANTINED) { 848 ZFS_EXIT(zfsvfs); 849 return (SET_ERROR(EACCES)); 850 } 851 852 /* 853 * Validate file offset 854 */ 855 if (uio->uio_loffset < (offset_t)0) { 856 ZFS_EXIT(zfsvfs); 857 return (SET_ERROR(EINVAL)); 858 } 859 860 /* 861 * Fasttrack empty reads 862 */ 863 if (uio->uio_resid == 0) { 864 ZFS_EXIT(zfsvfs); 865 return (0); 866 } 867 868 /* 869 * Check for mandatory locks 870 */ 871 if (MANDMODE(zp->z_mode)) { 872 if (error = chklock(vp, FREAD, 873 uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) { 874 ZFS_EXIT(zfsvfs); 875 return (error); 876 } 877 } 878 879 /* 880 * If we're in FRSYNC mode, sync out this znode before reading it. 881 */ 882 if (zfsvfs->z_log && 883 (ioflag & FRSYNC || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)) 884 zil_commit(zfsvfs->z_log, zp->z_id); 885 886 /* 887 * Lock the range against changes. 888 */ 889 rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER); 890 891 /* 892 * If we are reading past end-of-file we can skip 893 * to the end; but we might still need to set atime. 894 */ 895 if (uio->uio_loffset >= zp->z_size) { 896 error = 0; 897 goto out; 898 } 899 900 ASSERT(uio->uio_loffset < zp->z_size); 901 n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset); 902 903 #ifdef illumos 904 if ((uio->uio_extflg == UIO_XUIO) && 905 (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) { 906 int nblk; 907 int blksz = zp->z_blksz; 908 uint64_t offset = uio->uio_loffset; 909 910 xuio = (xuio_t *)uio; 911 if ((ISP2(blksz))) { 912 nblk = (P2ROUNDUP(offset + n, blksz) - P2ALIGN(offset, 913 blksz)) / blksz; 914 } else { 915 ASSERT(offset + n <= blksz); 916 nblk = 1; 917 } 918 (void) dmu_xuio_init(xuio, nblk); 919 920 if (vn_has_cached_data(vp)) { 921 /* 922 * For simplicity, we always allocate a full buffer 923 * even if we only expect to read a portion of a block. 924 */ 925 while (--nblk >= 0) { 926 (void) dmu_xuio_add(xuio, 927 dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 928 blksz), 0, blksz); 929 } 930 } 931 } 932 #endif /* illumos */ 933 934 while (n > 0) { 935 nbytes = MIN(n, zfs_read_chunk_size - 936 P2PHASE(uio->uio_loffset, zfs_read_chunk_size)); 937 938 #ifdef __FreeBSD__ 939 if (uio->uio_segflg == UIO_NOCOPY) 940 error = mappedread_sf(vp, nbytes, uio); 941 else 942 #endif /* __FreeBSD__ */ 943 if (vn_has_cached_data(vp)) { 944 error = mappedread(vp, nbytes, uio); 945 } else { 946 error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), 947 uio, nbytes); 948 } 949 if (error) { 950 /* convert checksum errors into IO errors */ 951 if (error == ECKSUM) 952 error = SET_ERROR(EIO); 953 break; 954 } 955 956 n -= nbytes; 957 } 958 out: 959 zfs_range_unlock(rl); 960 961 ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 962 ZFS_EXIT(zfsvfs); 963 return (error); 964 } 965 966 /* 967 * Write the bytes to a file. 968 * 969 * IN: vp - vnode of file to be written to. 970 * uio - structure supplying write location, range info, 971 * and data buffer. 972 * ioflag - FAPPEND, FSYNC, and/or FDSYNC. FAPPEND is 973 * set if in append mode. 974 * cr - credentials of caller. 975 * ct - caller context (NFS/CIFS fem monitor only) 976 * 977 * OUT: uio - updated offset and range. 978 * 979 * RETURN: 0 on success, error code on failure. 980 * 981 * Timestamps: 982 * vp - ctime|mtime updated if byte count > 0 983 */ 984 985 /* ARGSUSED */ 986 static int 987 zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 988 { 989 znode_t *zp = VTOZ(vp); 990 rlim64_t limit = MAXOFFSET_T; 991 ssize_t start_resid = uio->uio_resid; 992 ssize_t tx_bytes; 993 uint64_t end_size; 994 dmu_tx_t *tx; 995 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 996 zilog_t *zilog; 997 offset_t woff; 998 ssize_t n, nbytes; 999 rl_t *rl; 1000 int max_blksz = zfsvfs->z_max_blksz; 1001 int error = 0; 1002 arc_buf_t *abuf; 1003 iovec_t *aiov = NULL; 1004 xuio_t *xuio = NULL; 1005 int i_iov = 0; 1006 int iovcnt = uio->uio_iovcnt; 1007 iovec_t *iovp = uio->uio_iov; 1008 int write_eof; 1009 int count = 0; 1010 sa_bulk_attr_t bulk[4]; 1011 uint64_t mtime[2], ctime[2]; 1012 int segflg; 1013 1014 #ifdef __NetBSD__ 1015 segflg = VMSPACE_IS_KERNEL_P(uio->uio_vmspace) ? 1016 UIO_SYSSPACE : UIO_USERSPACE; 1017 #else 1018 segflg = uio->uio_segflg; 1019 #endif 1020 1021 /* 1022 * Fasttrack empty write 1023 */ 1024 n = start_resid; 1025 if (n == 0) 1026 return (0); 1027 1028 if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) 1029 limit = MAXOFFSET_T; 1030 1031 ZFS_ENTER(zfsvfs); 1032 ZFS_VERIFY_ZP(zp); 1033 1034 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 1035 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 1036 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, 1037 &zp->z_size, 8); 1038 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 1039 &zp->z_pflags, 8); 1040 1041 /* 1042 * In a case vp->v_vfsp != zp->z_zfsvfs->z_vfs (e.g. snapshots) our 1043 * callers might not be able to detect properly that we are read-only, 1044 * so check it explicitly here. 1045 */ 1046 if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 1047 ZFS_EXIT(zfsvfs); 1048 return (SET_ERROR(EROFS)); 1049 } 1050 1051 /* 1052 * If immutable or not appending then return EPERM 1053 */ 1054 if ((zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) || 1055 ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) && 1056 (uio->uio_loffset < zp->z_size))) { 1057 ZFS_EXIT(zfsvfs); 1058 return (SET_ERROR(EPERM)); 1059 } 1060 1061 zilog = zfsvfs->z_log; 1062 1063 /* 1064 * Validate file offset 1065 */ 1066 woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset; 1067 if (woff < 0) { 1068 ZFS_EXIT(zfsvfs); 1069 return (SET_ERROR(EINVAL)); 1070 } 1071 1072 /* 1073 * Check for mandatory locks before calling zfs_range_lock() 1074 * in order to prevent a deadlock with locks set via fcntl(). 1075 */ 1076 if (MANDMODE((mode_t)zp->z_mode) && 1077 (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) { 1078 ZFS_EXIT(zfsvfs); 1079 return (error); 1080 } 1081 1082 #ifdef illumos 1083 /* 1084 * Pre-fault the pages to ensure slow (eg NFS) pages 1085 * don't hold up txg. 1086 * Skip this if uio contains loaned arc_buf. 1087 */ 1088 if ((uio->uio_extflg == UIO_XUIO) && 1089 (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) 1090 xuio = (xuio_t *)uio; 1091 else 1092 uio_prefaultpages(MIN(n, max_blksz), uio); 1093 #endif 1094 1095 /* 1096 * If in append mode, set the io offset pointer to eof. 1097 */ 1098 if (ioflag & FAPPEND) { 1099 /* 1100 * Obtain an appending range lock to guarantee file append 1101 * semantics. We reset the write offset once we have the lock. 1102 */ 1103 rl = zfs_range_lock(zp, 0, n, RL_APPEND); 1104 woff = rl->r_off; 1105 if (rl->r_len == UINT64_MAX) { 1106 /* 1107 * We overlocked the file because this write will cause 1108 * the file block size to increase. 1109 * Note that zp_size cannot change with this lock held. 1110 */ 1111 woff = zp->z_size; 1112 } 1113 uio->uio_loffset = woff; 1114 } else { 1115 /* 1116 * Note that if the file block size will change as a result of 1117 * this write, then this range lock will lock the entire file 1118 * so that we can re-write the block safely. 1119 */ 1120 rl = zfs_range_lock(zp, woff, n, RL_WRITER); 1121 } 1122 1123 #ifdef illumos 1124 if (woff >= limit) { 1125 zfs_range_unlock(rl); 1126 ZFS_EXIT(zfsvfs); 1127 return (SET_ERROR(EFBIG)); 1128 } 1129 1130 #endif 1131 #ifdef __FreeBSD__ 1132 if (vn_rlimit_fsize(vp, uio, uio->uio_td)) { 1133 zfs_range_unlock(rl); 1134 ZFS_EXIT(zfsvfs); 1135 return (SET_ERROR(EFBIG)); 1136 } 1137 #endif 1138 #ifdef __NetBSD__ 1139 /* XXXNETBSD we might need vn_rlimit_fsize() too here eventually */ 1140 #endif 1141 1142 if ((woff + n) > limit || woff > (limit - n)) 1143 n = limit - woff; 1144 1145 /* Will this write extend the file length? */ 1146 write_eof = (woff + n > zp->z_size); 1147 1148 end_size = MAX(zp->z_size, woff + n); 1149 1150 /* 1151 * Write the file in reasonable size chunks. Each chunk is written 1152 * in a separate transaction; this keeps the intent log records small 1153 * and allows us to do more fine-grained space accounting. 1154 */ 1155 while (n > 0) { 1156 abuf = NULL; 1157 woff = uio->uio_loffset; 1158 if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 1159 zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 1160 if (abuf != NULL) 1161 dmu_return_arcbuf(abuf); 1162 error = SET_ERROR(EDQUOT); 1163 break; 1164 } 1165 1166 if (xuio && abuf == NULL) { 1167 ASSERT(i_iov < iovcnt); 1168 aiov = &iovp[i_iov]; 1169 abuf = dmu_xuio_arcbuf(xuio, i_iov); 1170 dmu_xuio_clear(xuio, i_iov); 1171 DTRACE_PROBE3(zfs_cp_write, int, i_iov, 1172 iovec_t *, aiov, arc_buf_t *, abuf); 1173 ASSERT((aiov->iov_base == abuf->b_data) || 1174 ((char *)aiov->iov_base - (char *)abuf->b_data + 1175 aiov->iov_len == arc_buf_size(abuf))); 1176 i_iov++; 1177 } else if (abuf == NULL && n >= max_blksz && 1178 woff >= zp->z_size && 1179 P2PHASE(woff, max_blksz) == 0 && 1180 zp->z_blksz == max_blksz) { 1181 /* 1182 * This write covers a full block. "Borrow" a buffer 1183 * from the dmu so that we can fill it before we enter 1184 * a transaction. This avoids the possibility of 1185 * holding up the transaction if the data copy hangs 1186 * up on a pagefault (e.g., from an NFS server mapping). 1187 */ 1188 #if defined(illumos) || defined(__NetBSD__) 1189 size_t cbytes; 1190 #endif 1191 1192 abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 1193 max_blksz); 1194 ASSERT(abuf != NULL); 1195 ASSERT(arc_buf_size(abuf) == max_blksz); 1196 #if defined(illumos) || defined(__NetBSD__) 1197 if (error = uiocopy(abuf->b_data, max_blksz, 1198 UIO_WRITE, uio, &cbytes)) { 1199 dmu_return_arcbuf(abuf); 1200 break; 1201 } 1202 ASSERT(cbytes == max_blksz); 1203 #endif 1204 #ifdef __FreeBSD__ 1205 ssize_t resid = uio->uio_resid; 1206 1207 error = vn_io_fault_uiomove(abuf->b_data, max_blksz, uio); 1208 if (error != 0) { 1209 uio->uio_offset -= resid - uio->uio_resid; 1210 uio->uio_resid = resid; 1211 dmu_return_arcbuf(abuf); 1212 break; 1213 } 1214 #endif 1215 } 1216 1217 /* 1218 * Start a transaction. 1219 */ 1220 tx = dmu_tx_create(zfsvfs->z_os); 1221 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1222 dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); 1223 zfs_sa_upgrade_txholds(tx, zp); 1224 error = dmu_tx_assign(tx, TXG_WAIT); 1225 if (error) { 1226 dmu_tx_abort(tx); 1227 if (abuf != NULL) 1228 dmu_return_arcbuf(abuf); 1229 break; 1230 } 1231 1232 /* 1233 * If zfs_range_lock() over-locked we grow the blocksize 1234 * and then reduce the lock range. This will only happen 1235 * on the first iteration since zfs_range_reduce() will 1236 * shrink down r_len to the appropriate size. 1237 */ 1238 if (rl->r_len == UINT64_MAX) { 1239 uint64_t new_blksz; 1240 1241 if (zp->z_blksz > max_blksz) { 1242 /* 1243 * File's blocksize is already larger than the 1244 * "recordsize" property. Only let it grow to 1245 * the next power of 2. 1246 */ 1247 ASSERT(!ISP2(zp->z_blksz)); 1248 new_blksz = MIN(end_size, 1249 1 << highbit64(zp->z_blksz)); 1250 } else { 1251 new_blksz = MIN(end_size, max_blksz); 1252 } 1253 zfs_grow_blocksize(zp, new_blksz, tx); 1254 zfs_range_reduce(rl, woff, n); 1255 } 1256 1257 /* 1258 * XXX - should we really limit each write to z_max_blksz? 1259 * Perhaps we should use SPA_MAXBLOCKSIZE chunks? 1260 */ 1261 nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz)); 1262 1263 if (woff + nbytes > zp->z_size) 1264 vnode_pager_setsize(vp, woff + nbytes); 1265 1266 if (abuf == NULL) { 1267 tx_bytes = uio->uio_resid; 1268 error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl), 1269 uio, nbytes, tx); 1270 tx_bytes -= uio->uio_resid; 1271 } else { 1272 tx_bytes = nbytes; 1273 ASSERT(xuio == NULL || tx_bytes == aiov->iov_len); 1274 /* 1275 * If this is not a full block write, but we are 1276 * extending the file past EOF and this data starts 1277 * block-aligned, use assign_arcbuf(). Otherwise, 1278 * write via dmu_write(). 1279 */ 1280 if (tx_bytes < max_blksz && (!write_eof || 1281 aiov->iov_base != abuf->b_data)) { 1282 ASSERT(xuio); 1283 dmu_write(zfsvfs->z_os, zp->z_id, woff, 1284 aiov->iov_len, aiov->iov_base, tx); 1285 dmu_return_arcbuf(abuf); 1286 xuio_stat_wbuf_copied(); 1287 } else { 1288 ASSERT(xuio || tx_bytes == max_blksz); 1289 dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl), 1290 woff, abuf, tx); 1291 } 1292 #if defined(illumos) || defined(__NetBSD__) 1293 ASSERT(tx_bytes <= uio->uio_resid); 1294 uioskip(uio, tx_bytes); 1295 #endif 1296 } 1297 if (tx_bytes && vn_has_cached_data(vp)) { 1298 update_pages(vp, woff, tx_bytes, zfsvfs->z_os, 1299 zp->z_id, segflg, tx); 1300 } 1301 1302 /* 1303 * If we made no progress, we're done. If we made even 1304 * partial progress, update the znode and ZIL accordingly. 1305 */ 1306 if (tx_bytes == 0) { 1307 (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 1308 (void *)&zp->z_size, sizeof (uint64_t), tx); 1309 dmu_tx_commit(tx); 1310 ASSERT(error != 0); 1311 break; 1312 } 1313 1314 /* 1315 * Clear Set-UID/Set-GID bits on successful write if not 1316 * privileged and at least one of the excute bits is set. 1317 * 1318 * It would be nice to to this after all writes have 1319 * been done, but that would still expose the ISUID/ISGID 1320 * to another app after the partial write is committed. 1321 * 1322 * Note: we don't call zfs_fuid_map_id() here because 1323 * user 0 is not an ephemeral uid. 1324 */ 1325 mutex_enter(&zp->z_acl_lock); 1326 if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) | 1327 (S_IXUSR >> 6))) != 0 && 1328 (zp->z_mode & (S_ISUID | S_ISGID)) != 0 && 1329 secpolicy_vnode_setid_retain(vp, cr, 1330 (zp->z_mode & S_ISUID) != 0 && zp->z_uid == 0) != 0) { 1331 uint64_t newmode; 1332 zp->z_mode &= ~(S_ISUID | S_ISGID); 1333 newmode = zp->z_mode; 1334 (void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), 1335 (void *)&newmode, sizeof (uint64_t), tx); 1336 } 1337 mutex_exit(&zp->z_acl_lock); 1338 1339 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 1340 B_TRUE); 1341 1342 /* 1343 * Update the file size (zp_size) if it has changed; 1344 * account for possible concurrent updates. 1345 */ 1346 while ((end_size = zp->z_size) < uio->uio_loffset) { 1347 (void) atomic_cas_64(&zp->z_size, end_size, 1348 uio->uio_loffset); 1349 #ifdef illumos 1350 ASSERT(error == 0); 1351 #else 1352 ASSERT(error == 0 || error == EFAULT); 1353 #endif 1354 } 1355 /* 1356 * If we are replaying and eof is non zero then force 1357 * the file size to the specified eof. Note, there's no 1358 * concurrency during replay. 1359 */ 1360 if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0) 1361 zp->z_size = zfsvfs->z_replay_eof; 1362 1363 if (error == 0) 1364 error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 1365 else 1366 (void) sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 1367 1368 zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag); 1369 dmu_tx_commit(tx); 1370 1371 if (error != 0) 1372 break; 1373 ASSERT(tx_bytes == nbytes); 1374 n -= nbytes; 1375 1376 #ifdef illumos 1377 if (!xuio && n > 0) 1378 uio_prefaultpages(MIN(n, max_blksz), uio); 1379 #endif 1380 } 1381 1382 zfs_range_unlock(rl); 1383 1384 /* 1385 * If we're in replay mode, or we made no progress, return error. 1386 * Otherwise, it's at least a partial write, so it's successful. 1387 */ 1388 if (zfsvfs->z_replay || uio->uio_resid == start_resid) { 1389 ZFS_EXIT(zfsvfs); 1390 return (error); 1391 } 1392 1393 #ifdef __FreeBSD__ 1394 /* 1395 * EFAULT means that at least one page of the source buffer was not 1396 * available. VFS will re-try remaining I/O upon this error. 1397 */ 1398 if (error == EFAULT) { 1399 ZFS_EXIT(zfsvfs); 1400 return (error); 1401 } 1402 #endif 1403 1404 if (ioflag & (FSYNC | FDSYNC) || 1405 zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1406 zil_commit(zilog, zp->z_id); 1407 1408 ZFS_EXIT(zfsvfs); 1409 return (0); 1410 } 1411 1412 void 1413 zfs_get_done(zgd_t *zgd, int error) 1414 { 1415 znode_t *zp = zgd->zgd_private; 1416 objset_t *os = zp->z_zfsvfs->z_os; 1417 1418 if (zgd->zgd_db) 1419 dmu_buf_rele(zgd->zgd_db, zgd); 1420 1421 zfs_range_unlock(zgd->zgd_rl); 1422 1423 /* 1424 * Release the vnode asynchronously as we currently have the 1425 * txg stopped from syncing. 1426 */ 1427 VN_RELE_CLEANER(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1428 1429 if (error == 0 && zgd->zgd_bp) 1430 zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); 1431 1432 kmem_free(zgd, sizeof (zgd_t)); 1433 } 1434 1435 #ifdef DEBUG 1436 static int zil_fault_io = 0; 1437 #endif 1438 1439 /* 1440 * Get data to generate a TX_WRITE intent log record. 1441 */ 1442 int 1443 zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) 1444 { 1445 zfsvfs_t *zfsvfs = arg; 1446 objset_t *os = zfsvfs->z_os; 1447 znode_t *zp; 1448 uint64_t object = lr->lr_foid; 1449 uint64_t offset = lr->lr_offset; 1450 uint64_t size = lr->lr_length; 1451 blkptr_t *bp = &lr->lr_blkptr; 1452 dmu_buf_t *db; 1453 zgd_t *zgd; 1454 int error = 0; 1455 1456 ASSERT(zio != NULL); 1457 ASSERT(size != 0); 1458 1459 /* 1460 * Nothing to do if the file has been removed 1461 */ 1462 if (zfs_zget_cleaner(zfsvfs, object, &zp) != 0) 1463 return (SET_ERROR(ENOENT)); 1464 if (zp->z_unlinked) { 1465 /* 1466 * Release the vnode asynchronously as we currently have the 1467 * txg stopped from syncing. 1468 */ 1469 VN_RELE_CLEANER(ZTOV(zp), 1470 dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1471 return (SET_ERROR(ENOENT)); 1472 } 1473 1474 zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); 1475 zgd->zgd_zilog = zfsvfs->z_log; 1476 zgd->zgd_private = zp; 1477 1478 /* 1479 * Write records come in two flavors: immediate and indirect. 1480 * For small writes it's cheaper to store the data with the 1481 * log record (immediate); for large writes it's cheaper to 1482 * sync the data and get a pointer to it (indirect) so that 1483 * we don't have to write the data twice. 1484 */ 1485 if (buf != NULL) { /* immediate write */ 1486 zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER); 1487 /* test for truncation needs to be done while range locked */ 1488 if (offset >= zp->z_size) { 1489 error = SET_ERROR(ENOENT); 1490 } else { 1491 error = dmu_read(os, object, offset, size, buf, 1492 DMU_READ_NO_PREFETCH); 1493 } 1494 ASSERT(error == 0 || error == ENOENT); 1495 } else { /* indirect write */ 1496 /* 1497 * Have to lock the whole block to ensure when it's 1498 * written out and it's checksum is being calculated 1499 * that no one can change the data. We need to re-check 1500 * blocksize after we get the lock in case it's changed! 1501 */ 1502 for (;;) { 1503 uint64_t blkoff; 1504 size = zp->z_blksz; 1505 blkoff = ISP2(size) ? P2PHASE(offset, size) : offset; 1506 offset -= blkoff; 1507 zgd->zgd_rl = zfs_range_lock(zp, offset, size, 1508 RL_READER); 1509 if (zp->z_blksz == size) 1510 break; 1511 offset += blkoff; 1512 zfs_range_unlock(zgd->zgd_rl); 1513 } 1514 /* test for truncation needs to be done while range locked */ 1515 if (lr->lr_offset >= zp->z_size) 1516 error = SET_ERROR(ENOENT); 1517 #ifdef DEBUG 1518 if (zil_fault_io) { 1519 error = SET_ERROR(EIO); 1520 zil_fault_io = 0; 1521 } 1522 #endif 1523 if (error == 0) 1524 error = dmu_buf_hold(os, object, offset, zgd, &db, 1525 DMU_READ_NO_PREFETCH); 1526 1527 if (error == 0) { 1528 blkptr_t *obp = dmu_buf_get_blkptr(db); 1529 if (obp) { 1530 ASSERT(BP_IS_HOLE(bp)); 1531 *bp = *obp; 1532 } 1533 1534 zgd->zgd_db = db; 1535 zgd->zgd_bp = bp; 1536 1537 ASSERT(db->db_offset == offset); 1538 ASSERT(db->db_size == size); 1539 1540 error = dmu_sync(zio, lr->lr_common.lrc_txg, 1541 zfs_get_done, zgd); 1542 ASSERT(error || lr->lr_length <= zp->z_blksz); 1543 1544 /* 1545 * On success, we need to wait for the write I/O 1546 * initiated by dmu_sync() to complete before we can 1547 * release this dbuf. We will finish everything up 1548 * in the zfs_get_done() callback. 1549 */ 1550 if (error == 0) 1551 return (0); 1552 1553 if (error == EALREADY) { 1554 lr->lr_common.lrc_txtype = TX_WRITE2; 1555 error = 0; 1556 } 1557 } 1558 } 1559 1560 zfs_get_done(zgd, error); 1561 1562 return (error); 1563 } 1564 1565 /*ARGSUSED*/ 1566 static int 1567 zfs_access(vnode_t *vp, int mode, int flag, cred_t *cr, 1568 caller_context_t *ct) 1569 { 1570 znode_t *zp = VTOZ(vp); 1571 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1572 int error; 1573 1574 ZFS_ENTER(zfsvfs); 1575 ZFS_VERIFY_ZP(zp); 1576 1577 if (flag & V_ACE_MASK) 1578 error = zfs_zaccess(zp, mode, flag, B_FALSE, cr); 1579 else 1580 error = zfs_zaccess_rwx(zp, mode, flag, cr); 1581 1582 ZFS_EXIT(zfsvfs); 1583 return (error); 1584 } 1585 1586 #ifdef __FreeBSD__ 1587 static int 1588 zfs_dd_callback(struct mount *mp, void *arg, int lkflags, struct vnode **vpp) 1589 { 1590 int error; 1591 1592 *vpp = arg; 1593 error = vn_lock(*vpp, lkflags); 1594 if (error != 0) 1595 vrele(*vpp); 1596 return (error); 1597 } 1598 1599 static int 1600 zfs_lookup_lock(vnode_t *dvp, vnode_t *vp, const char *name, int lkflags) 1601 { 1602 znode_t *zdp = VTOZ(dvp); 1603 zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1604 int error; 1605 int ltype; 1606 1607 ASSERT_VOP_LOCKED(dvp, __func__); 1608 #ifdef DIAGNOSTIC 1609 if ((zdp->z_pflags & ZFS_XATTR) == 0) 1610 VERIFY(!RRM_LOCK_HELD(&zfsvfs->z_teardown_lock)); 1611 #endif 1612 1613 if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) { 1614 ASSERT3P(dvp, ==, vp); 1615 vref(dvp); 1616 ltype = lkflags & LK_TYPE_MASK; 1617 if (ltype != VOP_ISLOCKED(dvp)) { 1618 if (ltype == LK_EXCLUSIVE) 1619 vn_lock(dvp, LK_UPGRADE | LK_RETRY); 1620 else /* if (ltype == LK_SHARED) */ 1621 vn_lock(dvp, LK_DOWNGRADE | LK_RETRY); 1622 1623 /* 1624 * Relock for the "." case could leave us with 1625 * reclaimed vnode. 1626 */ 1627 if (dvp->v_iflag & VI_DOOMED) { 1628 vrele(dvp); 1629 return (SET_ERROR(ENOENT)); 1630 } 1631 } 1632 return (0); 1633 } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) { 1634 /* 1635 * Note that in this case, dvp is the child vnode, and we 1636 * are looking up the parent vnode - exactly reverse from 1637 * normal operation. Unlocking dvp requires some rather 1638 * tricky unlock/relock dance to prevent mp from being freed; 1639 * use vn_vget_ino_gen() which takes care of all that. 1640 * 1641 * XXX Note that there is a time window when both vnodes are 1642 * unlocked. It is possible, although highly unlikely, that 1643 * during that window the parent-child relationship between 1644 * the vnodes may change, for example, get reversed. 1645 * In that case we would have a wrong lock order for the vnodes. 1646 * All other filesystems seem to ignore this problem, so we 1647 * do the same here. 1648 * A potential solution could be implemented as follows: 1649 * - using LK_NOWAIT when locking the second vnode and retrying 1650 * if necessary 1651 * - checking that the parent-child relationship still holds 1652 * after locking both vnodes and retrying if it doesn't 1653 */ 1654 error = vn_vget_ino_gen(dvp, zfs_dd_callback, vp, lkflags, &vp); 1655 return (error); 1656 } else { 1657 error = vn_lock(vp, lkflags); 1658 if (error != 0) 1659 vrele(vp); 1660 return (error); 1661 } 1662 } 1663 1664 /* 1665 * Lookup an entry in a directory, or an extended attribute directory. 1666 * If it exists, return a held vnode reference for it. 1667 * 1668 * IN: dvp - vnode of directory to search. 1669 * nm - name of entry to lookup. 1670 * pnp - full pathname to lookup [UNUSED]. 1671 * flags - LOOKUP_XATTR set if looking for an attribute. 1672 * rdir - root directory vnode [UNUSED]. 1673 * cr - credentials of caller. 1674 * ct - caller context 1675 * 1676 * OUT: vpp - vnode of located entry, NULL if not found. 1677 * 1678 * RETURN: 0 on success, error code on failure. 1679 * 1680 * Timestamps: 1681 * NA 1682 */ 1683 /* ARGSUSED */ 1684 static int 1685 zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct componentname *cnp, 1686 int nameiop, cred_t *cr, kthread_t *td, int flags) 1687 { 1688 znode_t *zdp = VTOZ(dvp); 1689 znode_t *zp; 1690 zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1691 int error = 0; 1692 1693 /* fast path (should be redundant with vfs namecache) */ 1694 if (!(flags & LOOKUP_XATTR)) { 1695 if (dvp->v_type != VDIR) { 1696 return (SET_ERROR(ENOTDIR)); 1697 } else if (zdp->z_sa_hdl == NULL) { 1698 return (SET_ERROR(EIO)); 1699 } 1700 } 1701 1702 DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm); 1703 1704 ZFS_ENTER(zfsvfs); 1705 ZFS_VERIFY_ZP(zdp); 1706 1707 *vpp = NULL; 1708 1709 if (flags & LOOKUP_XATTR) { 1710 #ifdef TODO 1711 /* 1712 * If the xattr property is off, refuse the lookup request. 1713 */ 1714 if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) { 1715 ZFS_EXIT(zfsvfs); 1716 return (SET_ERROR(EINVAL)); 1717 } 1718 #endif 1719 1720 /* 1721 * We don't allow recursive attributes.. 1722 * Maybe someday we will. 1723 */ 1724 if (zdp->z_pflags & ZFS_XATTR) { 1725 ZFS_EXIT(zfsvfs); 1726 return (SET_ERROR(EINVAL)); 1727 } 1728 1729 if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) { 1730 ZFS_EXIT(zfsvfs); 1731 return (error); 1732 } 1733 1734 /* 1735 * Do we have permission to get into attribute directory? 1736 */ 1737 if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0, 1738 B_FALSE, cr)) { 1739 vrele(*vpp); 1740 *vpp = NULL; 1741 } 1742 1743 ZFS_EXIT(zfsvfs); 1744 return (error); 1745 } 1746 1747 /* 1748 * Check accessibility of directory. 1749 */ 1750 if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) { 1751 ZFS_EXIT(zfsvfs); 1752 return (error); 1753 } 1754 1755 if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), 1756 NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1757 ZFS_EXIT(zfsvfs); 1758 return (SET_ERROR(EILSEQ)); 1759 } 1760 1761 1762 /* 1763 * First handle the special cases. 1764 */ 1765 if ((cnp->cn_flags & ISDOTDOT) != 0) { 1766 /* 1767 * If we are a snapshot mounted under .zfs, return 1768 * the vp for the snapshot directory. 1769 */ 1770 if (zdp->z_id == zfsvfs->z_root && zfsvfs->z_parent != zfsvfs) { 1771 struct componentname cn; 1772 vnode_t *zfsctl_vp; 1773 int ltype; 1774 1775 ZFS_EXIT(zfsvfs); 1776 ltype = VOP_ISLOCKED(dvp); 1777 VOP_UNLOCK(dvp, 0); 1778 error = zfsctl_root(zfsvfs->z_parent, LK_SHARED, 1779 &zfsctl_vp); 1780 if (error == 0) { 1781 cn.cn_nameptr = "snapshot"; 1782 cn.cn_namelen = strlen(cn.cn_nameptr); 1783 cn.cn_nameiop = cnp->cn_nameiop; 1784 cn.cn_flags = cnp->cn_flags; 1785 cn.cn_lkflags = cnp->cn_lkflags; 1786 error = VOP_LOOKUP(zfsctl_vp, vpp, &cn); 1787 vput(zfsctl_vp); 1788 } 1789 vn_lock(dvp, ltype | LK_RETRY); 1790 return (error); 1791 } 1792 } 1793 if (zfs_has_ctldir(zdp) && strcmp(nm, ZFS_CTLDIR_NAME) == 0) { 1794 ZFS_EXIT(zfsvfs); 1795 if ((cnp->cn_flags & ISLASTCN) != 0 && nameiop != LOOKUP) 1796 return (SET_ERROR(ENOTSUP)); 1797 error = zfsctl_root(zfsvfs, cnp->cn_lkflags, vpp); 1798 return (error); 1799 } 1800 1801 /* 1802 * The loop is retry the lookup if the parent-child relationship 1803 * changes during the dot-dot locking complexities. 1804 */ 1805 for (;;) { 1806 uint64_t parent; 1807 1808 error = zfs_dirlook(zdp, nm, &zp); 1809 if (error == 0) 1810 *vpp = ZTOV(zp); 1811 1812 ZFS_EXIT(zfsvfs); 1813 if (error != 0) 1814 break; 1815 1816 error = zfs_lookup_lock(dvp, *vpp, nm, cnp->cn_lkflags); 1817 if (error != 0) { 1818 /* 1819 * If we've got a locking error, then the vnode 1820 * got reclaimed because of a force unmount. 1821 * We never enter doomed vnodes into the name cache. 1822 */ 1823 *vpp = NULL; 1824 return (error); 1825 } 1826 1827 if ((cnp->cn_flags & ISDOTDOT) == 0) 1828 break; 1829 1830 ZFS_ENTER(zfsvfs); 1831 if (zdp->z_sa_hdl == NULL) { 1832 error = SET_ERROR(EIO); 1833 } else { 1834 error = sa_lookup(zdp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 1835 &parent, sizeof (parent)); 1836 } 1837 if (error != 0) { 1838 ZFS_EXIT(zfsvfs); 1839 vput(ZTOV(zp)); 1840 break; 1841 } 1842 if (zp->z_id == parent) { 1843 ZFS_EXIT(zfsvfs); 1844 break; 1845 } 1846 vput(ZTOV(zp)); 1847 } 1848 1849 out: 1850 if (error != 0) 1851 *vpp = NULL; 1852 1853 /* Translate errors and add SAVENAME when needed. */ 1854 if (cnp->cn_flags & ISLASTCN) { 1855 switch (nameiop) { 1856 case CREATE: 1857 case RENAME: 1858 if (error == ENOENT) { 1859 error = EJUSTRETURN; 1860 cnp->cn_flags |= SAVENAME; 1861 break; 1862 } 1863 /* FALLTHROUGH */ 1864 case DELETE: 1865 if (error == 0) 1866 cnp->cn_flags |= SAVENAME; 1867 break; 1868 } 1869 } 1870 1871 /* Insert name into cache (as non-existent) if appropriate. */ 1872 if (zfsvfs->z_use_namecache && 1873 error == ENOENT && (cnp->cn_flags & MAKEENTRY) != 0) 1874 cache_enter(dvp, NULL, cnp); 1875 1876 /* Insert name into cache if appropriate. */ 1877 if (zfsvfs->z_use_namecache && 1878 error == 0 && (cnp->cn_flags & MAKEENTRY)) { 1879 if (!(cnp->cn_flags & ISLASTCN) || 1880 (nameiop != DELETE && nameiop != RENAME)) { 1881 cache_enter(dvp, *vpp, cnp); 1882 } 1883 } 1884 1885 return (error); 1886 } 1887 #endif /* __FreeBSD__ */ 1888 1889 #ifdef __NetBSD__ 1890 /* 1891 * If vnode is for a device return a specfs vnode instead. 1892 */ 1893 static int 1894 specvp_check(vnode_t **vpp, cred_t *cr) 1895 { 1896 int error = 0; 1897 1898 if (IS_DEVVP(*vpp)) { 1899 struct vnode *svp; 1900 1901 svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 1902 VN_RELE(*vpp); 1903 if (svp == NULL) 1904 error = ENOSYS; 1905 *vpp = svp; 1906 } 1907 return (error); 1908 } 1909 1910 /* 1911 * Lookup an entry in a directory, or an extended attribute directory. 1912 * If it exists, return a held vnode reference for it. 1913 * 1914 * IN: dvp - vnode of directory to search. 1915 * nm - name of entry to lookup. 1916 * pnp - full pathname to lookup [UNUSED]. 1917 * flags - LOOKUP_XATTR set if looking for an attribute. 1918 * rdir - root directory vnode [UNUSED]. 1919 * cr - credentials of caller. 1920 * ct - caller context 1921 * direntflags - directory lookup flags 1922 * realpnp - returned pathname. 1923 * 1924 * OUT: vpp - vnode of located entry, NULL if not found. 1925 * 1926 * RETURN: 0 if success 1927 * error code if failure 1928 * 1929 * Timestamps: 1930 * NA 1931 */ 1932 /* ARGSUSED */ 1933 static int 1934 zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, int flags, 1935 struct componentname *cnp, int nameiop, cred_t *cr) 1936 { 1937 znode_t *zdp = VTOZ(dvp); 1938 znode_t *zp; 1939 zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1940 int error = 0; 1941 1942 /* fast path */ 1943 if (!(flags & LOOKUP_XATTR)) { 1944 if (dvp->v_type != VDIR) { 1945 return (ENOTDIR); 1946 } else if (zdp->z_sa_hdl == NULL) { 1947 return (SET_ERROR(EIO)); 1948 } 1949 1950 if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) { 1951 error = zfs_fastaccesschk_execute(zdp, cr); 1952 if (!error) { 1953 *vpp = dvp; 1954 VN_HOLD(*vpp); 1955 return (0); 1956 } 1957 return (error); 1958 } else { 1959 vnode_t *tvp = dnlc_lookup(dvp, nm); 1960 1961 if (tvp) { 1962 error = zfs_fastaccesschk_execute(zdp, cr); 1963 if (error) { 1964 VN_RELE(tvp); 1965 return (error); 1966 } 1967 if (tvp == DNLC_NO_VNODE) { 1968 VN_RELE(tvp); 1969 return (ENOENT); 1970 } else { 1971 *vpp = tvp; 1972 return (specvp_check(vpp, cr)); 1973 } 1974 } 1975 } 1976 } 1977 1978 DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm); 1979 1980 ZFS_ENTER(zfsvfs); 1981 ZFS_VERIFY_ZP(zdp); 1982 1983 *vpp = NULL; 1984 1985 if (flags & LOOKUP_XATTR) { 1986 #ifdef TODO 1987 /* 1988 * If the xattr property is off, refuse the lookup request. 1989 */ 1990 if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) { 1991 ZFS_EXIT(zfsvfs); 1992 return (EINVAL); 1993 } 1994 #endif 1995 1996 /* 1997 * We don't allow recursive attributes.. 1998 * Maybe someday we will. 1999 */ 2000 if (zdp->z_pflags & ZFS_XATTR) { 2001 ZFS_EXIT(zfsvfs); 2002 return (EINVAL); 2003 } 2004 2005 if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) { 2006 ZFS_EXIT(zfsvfs); 2007 return (error); 2008 } 2009 2010 /* 2011 * Do we have permission to get into attribute directory? 2012 */ 2013 if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0, 2014 B_FALSE, cr)) { 2015 VN_RELE(*vpp); 2016 *vpp = NULL; 2017 } 2018 2019 ZFS_EXIT(zfsvfs); 2020 return (error); 2021 } 2022 2023 if (dvp->v_type != VDIR) { 2024 ZFS_EXIT(zfsvfs); 2025 return (ENOTDIR); 2026 } 2027 2028 /* 2029 * Check accessibility of directory. 2030 */ 2031 if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) { 2032 ZFS_EXIT(zfsvfs); 2033 return (error); 2034 } 2035 2036 if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), 2037 NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 2038 ZFS_EXIT(zfsvfs); 2039 return (EILSEQ); 2040 } 2041 2042 /* 2043 * First handle the special cases. 2044 */ 2045 if ((cnp->cn_flags & ISDOTDOT) != 0) { 2046 /* 2047 * If we are a snapshot mounted under .zfs, return 2048 * the vp for the snapshot directory. 2049 */ 2050 if (zdp->z_id == zfsvfs->z_root && zfsvfs->z_parent != zfsvfs) { 2051 ZFS_EXIT(zfsvfs); 2052 error = zfsctl_snapshot(zfsvfs->z_parent, vpp); 2053 2054 return (error); 2055 } 2056 } 2057 if (zfs_has_ctldir(zdp) && strcmp(nm, ZFS_CTLDIR_NAME) == 0) { 2058 ZFS_EXIT(zfsvfs); 2059 if ((cnp->cn_flags & ISLASTCN) != 0 && nameiop != LOOKUP) 2060 return (SET_ERROR(ENOTSUP)); 2061 error = zfsctl_root(zfsvfs, vpp); 2062 return (error); 2063 } 2064 2065 error = zfs_dirlook(zdp, nm, &zp); 2066 if (error == 0) { 2067 *vpp = ZTOV(zp); 2068 error = specvp_check(vpp, cr); 2069 } 2070 2071 ZFS_EXIT(zfsvfs); 2072 return (error); 2073 } 2074 #endif 2075 2076 /* 2077 * Attempt to create a new entry in a directory. If the entry 2078 * already exists, truncate the file if permissible, else return 2079 * an error. Return the vp of the created or trunc'd file. 2080 * 2081 * IN: dvp - vnode of directory to put new file entry in. 2082 * name - name of new file entry. 2083 * vap - attributes of new file. 2084 * excl - flag indicating exclusive or non-exclusive mode. 2085 * mode - mode to open file with. 2086 * cr - credentials of caller. 2087 * flag - large file flag [UNUSED]. 2088 * ct - caller context 2089 * vsecp - ACL to be set 2090 * 2091 * OUT: vpp - vnode of created or trunc'd entry. 2092 * 2093 * RETURN: 0 on success, error code on failure. 2094 * 2095 * Timestamps: 2096 * dvp - ctime|mtime updated if new entry created 2097 * vp - ctime|mtime always, atime if new 2098 */ 2099 2100 /* ARGSUSED */ 2101 static int 2102 zfs_create(vnode_t *dvp, char *name, vattr_t *vap, int excl, int mode, 2103 vnode_t **vpp, cred_t *cr, kthread_t *td) 2104 { 2105 znode_t *zp, *dzp = VTOZ(dvp); 2106 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2107 zilog_t *zilog; 2108 objset_t *os; 2109 dmu_tx_t *tx; 2110 int error; 2111 ksid_t *ksid; 2112 uid_t uid; 2113 gid_t gid = crgetgid(cr); 2114 zfs_acl_ids_t acl_ids; 2115 boolean_t fuid_dirtied; 2116 void *vsecp = NULL; 2117 int flag = 0; 2118 uint64_t txtype; 2119 2120 /* 2121 * If we have an ephemeral id, ACL, or XVATTR then 2122 * make sure file system is at proper version 2123 */ 2124 2125 ksid = crgetsid(cr, KSID_OWNER); 2126 if (ksid) 2127 uid = ksid_getid(ksid); 2128 else 2129 uid = crgetuid(cr); 2130 2131 if (zfsvfs->z_use_fuids == B_FALSE && 2132 (vsecp || (vap->va_mask & AT_XVATTR) || 2133 IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 2134 return (SET_ERROR(EINVAL)); 2135 2136 ZFS_ENTER(zfsvfs); 2137 ZFS_VERIFY_ZP(dzp); 2138 os = zfsvfs->z_os; 2139 zilog = zfsvfs->z_log; 2140 2141 if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 2142 NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 2143 ZFS_EXIT(zfsvfs); 2144 return (SET_ERROR(EILSEQ)); 2145 } 2146 2147 if (vap->va_mask & AT_XVATTR) { 2148 if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, 2149 crgetuid(cr), cr, vap->va_type)) != 0) { 2150 ZFS_EXIT(zfsvfs); 2151 return (error); 2152 } 2153 } 2154 2155 *vpp = NULL; 2156 2157 if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr)) 2158 vap->va_mode &= ~S_ISVTX; 2159 2160 error = zfs_dirent_lookup(dzp, name, &zp, ZNEW); 2161 if (error) { 2162 ZFS_EXIT(zfsvfs); 2163 return (error); 2164 } 2165 ASSERT3P(zp, ==, NULL); 2166 2167 /* 2168 * Create a new file object and update the directory 2169 * to reference it. 2170 */ 2171 if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 2172 goto out; 2173 } 2174 2175 /* 2176 * We only support the creation of regular files in 2177 * extended attribute directories. 2178 */ 2179 2180 if ((dzp->z_pflags & ZFS_XATTR) && 2181 (vap->va_type != VREG)) { 2182 error = SET_ERROR(EINVAL); 2183 goto out; 2184 } 2185 2186 if ((error = zfs_acl_ids_create(dzp, 0, vap, 2187 cr, vsecp, &acl_ids)) != 0) 2188 goto out; 2189 2190 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 2191 zfs_acl_ids_free(&acl_ids); 2192 error = SET_ERROR(EDQUOT); 2193 goto out; 2194 } 2195 2196 getnewvnode_reserve(1); 2197 2198 tx = dmu_tx_create(os); 2199 2200 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 2201 ZFS_SA_BASE_ATTR_SIZE); 2202 2203 fuid_dirtied = zfsvfs->z_fuid_dirty; 2204 if (fuid_dirtied) 2205 zfs_fuid_txhold(zfsvfs, tx); 2206 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 2207 dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 2208 if (!zfsvfs->z_use_sa && 2209 acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 2210 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 2211 0, acl_ids.z_aclp->z_acl_bytes); 2212 } 2213 error = dmu_tx_assign(tx, TXG_WAIT); 2214 if (error) { 2215 zfs_acl_ids_free(&acl_ids); 2216 dmu_tx_abort(tx); 2217 getnewvnode_drop_reserve(); 2218 ZFS_EXIT(zfsvfs); 2219 return (error); 2220 } 2221 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 2222 2223 if (fuid_dirtied) 2224 zfs_fuid_sync(zfsvfs, tx); 2225 2226 (void) zfs_link_create(dzp, name, zp, tx, ZNEW); 2227 txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); 2228 zfs_log_create(zilog, tx, txtype, dzp, zp, name, 2229 vsecp, acl_ids.z_fuidp, vap); 2230 zfs_acl_ids_free(&acl_ids); 2231 dmu_tx_commit(tx); 2232 2233 getnewvnode_drop_reserve(); 2234 2235 out: 2236 if (error == 0) { 2237 *vpp = ZTOV(zp); 2238 } 2239 2240 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2241 zil_commit(zilog, 0); 2242 2243 ZFS_EXIT(zfsvfs); 2244 return (error); 2245 } 2246 2247 /* 2248 * Remove an entry from a directory. 2249 * 2250 * IN: dvp - vnode of directory to remove entry from. 2251 * name - name of entry to remove. 2252 * cr - credentials of caller. 2253 * ct - caller context 2254 * flags - case flags 2255 * 2256 * RETURN: 0 on success, error code on failure. 2257 * 2258 * Timestamps: 2259 * dvp - ctime|mtime 2260 * vp - ctime (if nlink > 0) 2261 */ 2262 2263 /*ARGSUSED*/ 2264 static int 2265 zfs_remove(vnode_t *dvp, vnode_t *vp, char *name, cred_t *cr) 2266 { 2267 znode_t *dzp = VTOZ(dvp); 2268 znode_t *zp = VTOZ(vp); 2269 znode_t *xzp; 2270 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2271 zilog_t *zilog; 2272 uint64_t acl_obj, xattr_obj; 2273 uint64_t obj = 0; 2274 dmu_tx_t *tx; 2275 boolean_t unlinked, toobig = FALSE; 2276 uint64_t txtype; 2277 int error; 2278 2279 ZFS_ENTER(zfsvfs); 2280 ZFS_VERIFY_ZP(dzp); 2281 ZFS_VERIFY_ZP(zp); 2282 zilog = zfsvfs->z_log; 2283 zp = VTOZ(vp); 2284 2285 xattr_obj = 0; 2286 xzp = NULL; 2287 2288 if (error = zfs_zaccess_delete(dzp, zp, cr)) { 2289 goto out; 2290 } 2291 2292 /* 2293 * Need to use rmdir for removing directories. 2294 */ 2295 if (vp->v_type == VDIR) { 2296 error = SET_ERROR(EPERM); 2297 goto out; 2298 } 2299 2300 vnevent_remove(vp, dvp, name, ct); 2301 2302 obj = zp->z_id; 2303 2304 /* are there any extended attributes? */ 2305 error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 2306 &xattr_obj, sizeof (xattr_obj)); 2307 if (error == 0 && xattr_obj) { 2308 error = zfs_zget(zfsvfs, xattr_obj, &xzp); 2309 ASSERT0(error); 2310 } 2311 2312 /* 2313 * We may delete the znode now, or we may put it in the unlinked set; 2314 * it depends on whether we're the last link, and on whether there are 2315 * other holds on the vnode. So we dmu_tx_hold() the right things to 2316 * allow for either case. 2317 */ 2318 tx = dmu_tx_create(zfsvfs->z_os); 2319 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 2320 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 2321 zfs_sa_upgrade_txholds(tx, zp); 2322 zfs_sa_upgrade_txholds(tx, dzp); 2323 2324 if (xzp) { 2325 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 2326 dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); 2327 } 2328 2329 /* charge as an update -- would be nice not to charge at all */ 2330 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 2331 2332 /* 2333 * Mark this transaction as typically resulting in a net free of space 2334 */ 2335 dmu_tx_mark_netfree(tx); 2336 2337 error = dmu_tx_assign(tx, TXG_WAIT); 2338 if (error) { 2339 dmu_tx_abort(tx); 2340 ZFS_EXIT(zfsvfs); 2341 return (error); 2342 } 2343 2344 /* 2345 * Remove the directory entry. 2346 */ 2347 error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, &unlinked); 2348 2349 if (error) { 2350 dmu_tx_commit(tx); 2351 goto out; 2352 } 2353 2354 if (unlinked) { 2355 zfs_unlinked_add(zp, tx); 2356 vp->v_vflag |= VV_NOSYNC; 2357 } 2358 2359 txtype = TX_REMOVE; 2360 zfs_log_remove(zilog, tx, txtype, dzp, name, obj); 2361 2362 dmu_tx_commit(tx); 2363 out: 2364 2365 if (xzp) 2366 vrele(ZTOV(xzp)); 2367 2368 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2369 zil_commit(zilog, 0); 2370 2371 ZFS_EXIT(zfsvfs); 2372 return (error); 2373 } 2374 2375 /* 2376 * Create a new directory and insert it into dvp using the name 2377 * provided. Return a pointer to the inserted directory. 2378 * 2379 * IN: dvp - vnode of directory to add subdir to. 2380 * dirname - name of new directory. 2381 * vap - attributes of new directory. 2382 * cr - credentials of caller. 2383 * ct - caller context 2384 * flags - case flags 2385 * vsecp - ACL to be set 2386 * 2387 * OUT: vpp - vnode of created directory. 2388 * 2389 * RETURN: 0 on success, error code on failure. 2390 * 2391 * Timestamps: 2392 * dvp - ctime|mtime updated 2393 * vp - ctime|mtime|atime updated 2394 */ 2395 /*ARGSUSED*/ 2396 static int 2397 zfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr) 2398 { 2399 znode_t *zp, *dzp = VTOZ(dvp); 2400 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2401 zilog_t *zilog; 2402 uint64_t txtype; 2403 dmu_tx_t *tx; 2404 int error; 2405 ksid_t *ksid; 2406 uid_t uid; 2407 gid_t gid = crgetgid(cr); 2408 zfs_acl_ids_t acl_ids; 2409 boolean_t fuid_dirtied; 2410 2411 ASSERT(vap->va_type == VDIR); 2412 2413 /* 2414 * If we have an ephemeral id, ACL, or XVATTR then 2415 * make sure file system is at proper version 2416 */ 2417 2418 ksid = crgetsid(cr, KSID_OWNER); 2419 if (ksid) 2420 uid = ksid_getid(ksid); 2421 else 2422 uid = crgetuid(cr); 2423 if (zfsvfs->z_use_fuids == B_FALSE && 2424 ((vap->va_mask & AT_XVATTR) || 2425 IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 2426 return (SET_ERROR(EINVAL)); 2427 2428 ZFS_ENTER(zfsvfs); 2429 ZFS_VERIFY_ZP(dzp); 2430 zilog = zfsvfs->z_log; 2431 2432 if (dzp->z_pflags & ZFS_XATTR) { 2433 ZFS_EXIT(zfsvfs); 2434 return (SET_ERROR(EINVAL)); 2435 } 2436 2437 if (zfsvfs->z_utf8 && u8_validate(dirname, 2438 strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 2439 ZFS_EXIT(zfsvfs); 2440 return (SET_ERROR(EILSEQ)); 2441 } 2442 2443 if (vap->va_mask & AT_XVATTR) { 2444 if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, 2445 crgetuid(cr), cr, vap->va_type)) != 0) { 2446 ZFS_EXIT(zfsvfs); 2447 return (error); 2448 } 2449 } 2450 2451 if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, 2452 NULL, &acl_ids)) != 0) { 2453 ZFS_EXIT(zfsvfs); 2454 return (error); 2455 } 2456 2457 /* 2458 * First make sure the new directory doesn't exist. 2459 * 2460 * Existence is checked first to make sure we don't return 2461 * EACCES instead of EEXIST which can cause some applications 2462 * to fail. 2463 */ 2464 *vpp = NULL; 2465 2466 if (error = zfs_dirent_lookup(dzp, dirname, &zp, ZNEW)) { 2467 zfs_acl_ids_free(&acl_ids); 2468 ZFS_EXIT(zfsvfs); 2469 return (error); 2470 } 2471 ASSERT3P(zp, ==, NULL); 2472 2473 if (error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr)) { 2474 zfs_acl_ids_free(&acl_ids); 2475 ZFS_EXIT(zfsvfs); 2476 return (error); 2477 } 2478 2479 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 2480 zfs_acl_ids_free(&acl_ids); 2481 ZFS_EXIT(zfsvfs); 2482 return (SET_ERROR(EDQUOT)); 2483 } 2484 2485 /* 2486 * Add a new entry to the directory. 2487 */ 2488 getnewvnode_reserve(1); 2489 tx = dmu_tx_create(zfsvfs->z_os); 2490 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); 2491 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 2492 fuid_dirtied = zfsvfs->z_fuid_dirty; 2493 if (fuid_dirtied) 2494 zfs_fuid_txhold(zfsvfs, tx); 2495 if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 2496 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 2497 acl_ids.z_aclp->z_acl_bytes); 2498 } 2499 2500 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 2501 ZFS_SA_BASE_ATTR_SIZE); 2502 2503 error = dmu_tx_assign(tx, TXG_WAIT); 2504 if (error) { 2505 zfs_acl_ids_free(&acl_ids); 2506 dmu_tx_abort(tx); 2507 getnewvnode_drop_reserve(); 2508 ZFS_EXIT(zfsvfs); 2509 return (error); 2510 } 2511 2512 /* 2513 * Create new node. 2514 */ 2515 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 2516 2517 if (fuid_dirtied) 2518 zfs_fuid_sync(zfsvfs, tx); 2519 2520 /* 2521 * Now put new name in parent dir. 2522 */ 2523 (void) zfs_link_create(dzp, dirname, zp, tx, ZNEW); 2524 2525 *vpp = ZTOV(zp); 2526 2527 txtype = zfs_log_create_txtype(Z_DIR, NULL, vap); 2528 zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, NULL, 2529 acl_ids.z_fuidp, vap); 2530 2531 zfs_acl_ids_free(&acl_ids); 2532 2533 dmu_tx_commit(tx); 2534 2535 getnewvnode_drop_reserve(); 2536 2537 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2538 zil_commit(zilog, 0); 2539 2540 ZFS_EXIT(zfsvfs); 2541 return (0); 2542 } 2543 2544 /* 2545 * Remove a directory subdir entry. If the current working 2546 * directory is the same as the subdir to be removed, the 2547 * remove will fail. 2548 * 2549 * IN: dvp - vnode of directory to remove from. 2550 * name - name of directory to be removed. 2551 * cwd - vnode of current working directory. 2552 * cr - credentials of caller. 2553 * ct - caller context 2554 * flags - case flags 2555 * 2556 * RETURN: 0 on success, error code on failure. 2557 * 2558 * Timestamps: 2559 * dvp - ctime|mtime updated 2560 */ 2561 /*ARGSUSED*/ 2562 static int 2563 zfs_rmdir(vnode_t *dvp, vnode_t *vp, char *name, cred_t *cr) 2564 { 2565 znode_t *dzp = VTOZ(dvp); 2566 znode_t *zp = VTOZ(vp); 2567 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2568 zilog_t *zilog; 2569 dmu_tx_t *tx; 2570 int error; 2571 2572 ZFS_ENTER(zfsvfs); 2573 ZFS_VERIFY_ZP(dzp); 2574 ZFS_VERIFY_ZP(zp); 2575 zilog = zfsvfs->z_log; 2576 2577 2578 if (error = zfs_zaccess_delete(dzp, zp, cr)) { 2579 goto out; 2580 } 2581 2582 if (vp->v_type != VDIR) { 2583 error = SET_ERROR(ENOTDIR); 2584 goto out; 2585 } 2586 2587 vnevent_rmdir(vp, dvp, name, ct); 2588 2589 tx = dmu_tx_create(zfsvfs->z_os); 2590 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 2591 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 2592 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 2593 zfs_sa_upgrade_txholds(tx, zp); 2594 zfs_sa_upgrade_txholds(tx, dzp); 2595 dmu_tx_mark_netfree(tx); 2596 error = dmu_tx_assign(tx, TXG_WAIT); 2597 if (error) { 2598 dmu_tx_abort(tx); 2599 ZFS_EXIT(zfsvfs); 2600 return (error); 2601 } 2602 2603 cache_purge(dvp); 2604 2605 error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, NULL); 2606 2607 if (error == 0) { 2608 uint64_t txtype = TX_RMDIR; 2609 zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT); 2610 } 2611 2612 dmu_tx_commit(tx); 2613 2614 cache_purge(vp); 2615 out: 2616 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2617 zil_commit(zilog, 0); 2618 2619 ZFS_EXIT(zfsvfs); 2620 return (error); 2621 } 2622 2623 /* 2624 * Read as many directory entries as will fit into the provided 2625 * buffer from the given directory cursor position (specified in 2626 * the uio structure). 2627 * 2628 * IN: vp - vnode of directory to read. 2629 * uio - structure supplying read location, range info, 2630 * and return buffer. 2631 * cr - credentials of caller. 2632 * ct - caller context 2633 * flags - case flags 2634 * 2635 * OUT: uio - updated offset and range, buffer filled. 2636 * eofp - set to true if end-of-file detected. 2637 * 2638 * RETURN: 0 on success, error code on failure. 2639 * 2640 * Timestamps: 2641 * vp - atime updated 2642 * 2643 * Note that the low 4 bits of the cookie returned by zap is always zero. 2644 * This allows us to use the low range for "special" directory entries: 2645 * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem, 2646 * we use the offset 2 for the '.zfs' directory. 2647 */ 2648 /* ARGSUSED */ 2649 static int 2650 zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, int *ncookies, off_t **cookies) 2651 { 2652 znode_t *zp = VTOZ(vp); 2653 iovec_t *iovp; 2654 edirent_t *eodp; 2655 dirent64_t *odp; 2656 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2657 objset_t *os; 2658 caddr_t outbuf; 2659 size_t bufsize; 2660 zap_cursor_t zc; 2661 zap_attribute_t zap; 2662 uint_t bytes_wanted; 2663 uint64_t offset; /* must be unsigned; checks for < 1 */ 2664 uint64_t parent; 2665 int local_eof; 2666 int outcount; 2667 int error; 2668 uint8_t prefetch; 2669 boolean_t check_sysattrs; 2670 uint8_t type; 2671 int ncooks = 0; 2672 off_t *cooks = NULL; 2673 int flags = 0; 2674 #ifdef __FreeBSD__ 2675 boolean_t user = uio->uio_segflg != UIO_SYSSPACE; 2676 #endif 2677 #ifdef __NetBSD__ 2678 boolean_t user = !VMSPACE_IS_KERNEL_P(uio->uio_vmspace); 2679 #endif 2680 2681 ZFS_ENTER(zfsvfs); 2682 ZFS_VERIFY_ZP(zp); 2683 2684 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 2685 &parent, sizeof (parent))) != 0) { 2686 ZFS_EXIT(zfsvfs); 2687 return (error); 2688 } 2689 2690 /* 2691 * If we are not given an eof variable, 2692 * use a local one. 2693 */ 2694 if (eofp == NULL) 2695 eofp = &local_eof; 2696 2697 /* 2698 * Check for valid iov_len. 2699 */ 2700 if (uio->uio_iov->iov_len <= 0) { 2701 ZFS_EXIT(zfsvfs); 2702 return (SET_ERROR(EINVAL)); 2703 } 2704 2705 /* 2706 * Quit if directory has been removed (posix) 2707 */ 2708 if ((*eofp = zp->z_unlinked) != 0) { 2709 ZFS_EXIT(zfsvfs); 2710 return (0); 2711 } 2712 2713 error = 0; 2714 os = zfsvfs->z_os; 2715 offset = uio->uio_loffset; 2716 prefetch = zp->z_zn_prefetch; 2717 2718 /* 2719 * Initialize the iterator cursor. 2720 */ 2721 if (offset <= 3) { 2722 /* 2723 * Start iteration from the beginning of the directory. 2724 */ 2725 zap_cursor_init(&zc, os, zp->z_id); 2726 } else { 2727 /* 2728 * The offset is a serialized cursor. 2729 */ 2730 zap_cursor_init_serialized(&zc, os, zp->z_id, offset); 2731 } 2732 2733 /* 2734 * Get space to change directory entries into fs independent format. 2735 */ 2736 iovp = uio->uio_iov; 2737 bytes_wanted = iovp->iov_len; 2738 if (user || uio->uio_iovcnt != 1) { 2739 bufsize = bytes_wanted; 2740 outbuf = kmem_alloc(bufsize, KM_SLEEP); 2741 odp = (struct dirent64 *)outbuf; 2742 } else { 2743 bufsize = bytes_wanted; 2744 outbuf = NULL; 2745 odp = (struct dirent64 *)iovp->iov_base; 2746 } 2747 eodp = (struct edirent *)odp; 2748 2749 if (ncookies != NULL) { 2750 /* 2751 * Minimum entry size is dirent size and 1 byte for a file name. 2752 */ 2753 #ifdef __FreeBSD__ 2754 ncooks = uio->uio_resid / (sizeof(struct dirent) - sizeof(((struct dirent *)NULL)->d_name) + 1); 2755 cooks = malloc(ncooks * sizeof(u_long), M_TEMP, M_WAITOK); 2756 #endif 2757 #ifdef __NetBSD__ 2758 ncooks = uio->uio_resid / _DIRENT_MINSIZE(odp); 2759 cooks = malloc(ncooks * sizeof(off_t), M_TEMP, M_WAITOK); 2760 #endif 2761 *cookies = cooks; 2762 *ncookies = ncooks; 2763 } 2764 2765 /* 2766 * If this VFS supports the system attribute view interface; and 2767 * we're looking at an extended attribute directory; and we care 2768 * about normalization conflicts on this vfs; then we must check 2769 * for normalization conflicts with the sysattr name space. 2770 */ 2771 #ifdef TODO 2772 check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 2773 (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm && 2774 (flags & V_RDDIR_ENTFLAGS); 2775 #else 2776 check_sysattrs = 0; 2777 #endif 2778 2779 /* 2780 * Transform to file-system independent format 2781 */ 2782 outcount = 0; 2783 while (outcount < bytes_wanted) { 2784 ino64_t objnum; 2785 ushort_t reclen; 2786 off64_t *next = NULL; 2787 2788 /* 2789 * Special case `.', `..', and `.zfs'. 2790 */ 2791 if (offset == 0) { 2792 (void) strcpy(zap.za_name, "."); 2793 zap.za_normalization_conflict = 0; 2794 objnum = zp->z_id; 2795 type = DT_DIR; 2796 } else if (offset == 1) { 2797 (void) strcpy(zap.za_name, ".."); 2798 zap.za_normalization_conflict = 0; 2799 objnum = parent; 2800 type = DT_DIR; 2801 } else if (offset == 2 && zfs_show_ctldir(zp)) { 2802 (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); 2803 zap.za_normalization_conflict = 0; 2804 objnum = ZFSCTL_INO_ROOT; 2805 type = DT_DIR; 2806 } else { 2807 /* 2808 * Grab next entry. 2809 */ 2810 if (error = zap_cursor_retrieve(&zc, &zap)) { 2811 if ((*eofp = (error == ENOENT)) != 0) 2812 break; 2813 else 2814 goto update; 2815 } 2816 2817 if (zap.za_integer_length != 8 || 2818 zap.za_num_integers != 1) { 2819 cmn_err(CE_WARN, "zap_readdir: bad directory " 2820 "entry, obj = %lld, offset = %lld\n", 2821 (u_longlong_t)zp->z_id, 2822 (u_longlong_t)offset); 2823 error = SET_ERROR(ENXIO); 2824 goto update; 2825 } 2826 2827 objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); 2828 /* 2829 * MacOS X can extract the object type here such as: 2830 * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2831 */ 2832 type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2833 2834 if (check_sysattrs && !zap.za_normalization_conflict) { 2835 #ifdef TODO 2836 zap.za_normalization_conflict = 2837 xattr_sysattr_casechk(zap.za_name); 2838 #else 2839 panic("%s:%u: TODO", __func__, __LINE__); 2840 #endif 2841 } 2842 } 2843 2844 if (flags & V_RDDIR_ACCFILTER) { 2845 /* 2846 * If we have no access at all, don't include 2847 * this entry in the returned information 2848 */ 2849 znode_t *ezp; 2850 if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0) 2851 goto skip_entry; 2852 if (!zfs_has_access(ezp, cr)) { 2853 vrele(ZTOV(ezp)); 2854 goto skip_entry; 2855 } 2856 vrele(ZTOV(ezp)); 2857 } 2858 2859 if (flags & V_RDDIR_ENTFLAGS) 2860 reclen = EDIRENT_RECLEN(strlen(zap.za_name)); 2861 else 2862 reclen = DIRENT64_RECLEN(strlen(zap.za_name)); 2863 2864 /* 2865 * Will this entry fit in the buffer? 2866 */ 2867 if (outcount + reclen > bufsize) { 2868 /* 2869 * Did we manage to fit anything in the buffer? 2870 */ 2871 if (!outcount) { 2872 error = SET_ERROR(EINVAL); 2873 goto update; 2874 } 2875 break; 2876 } 2877 if (flags & V_RDDIR_ENTFLAGS) { 2878 /* 2879 * Add extended flag entry: 2880 */ 2881 eodp->ed_ino = objnum; 2882 eodp->ed_reclen = reclen; 2883 /* NOTE: ed_off is the offset for the *next* entry */ 2884 next = &(eodp->ed_off); 2885 eodp->ed_eflags = zap.za_normalization_conflict ? 2886 ED_CASE_CONFLICT : 0; 2887 (void) strncpy(eodp->ed_name, zap.za_name, 2888 EDIRENT_NAMELEN(reclen)); 2889 eodp = (edirent_t *)((intptr_t)eodp + reclen); 2890 } else { 2891 /* 2892 * Add normal entry: 2893 */ 2894 odp->d_ino = objnum; 2895 odp->d_reclen = reclen; 2896 odp->d_namlen = strlen(zap.za_name); 2897 (void) strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1); 2898 odp->d_type = type; 2899 odp = (dirent64_t *)((intptr_t)odp + reclen); 2900 } 2901 outcount += reclen; 2902 2903 ASSERT(outcount <= bufsize); 2904 2905 /* Prefetch znode */ 2906 if (prefetch) 2907 dmu_prefetch(os, objnum, 0, 0, 0, 2908 ZIO_PRIORITY_SYNC_READ); 2909 2910 skip_entry: 2911 /* 2912 * Move to the next entry, fill in the previous offset. 2913 */ 2914 if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) { 2915 zap_cursor_advance(&zc); 2916 offset = zap_cursor_serialize(&zc); 2917 } else { 2918 offset += 1; 2919 } 2920 2921 if (cooks != NULL) { 2922 *cooks++ = offset; 2923 ncooks--; 2924 #ifdef __FreeBSD__ 2925 KASSERT(ncooks >= 0, ("ncookies=%d", ncooks)); 2926 #endif 2927 #ifdef __NetBSD__ 2928 KASSERTMSG(ncooks >= 0, "ncooks=%d", ncooks); 2929 #endif 2930 } 2931 } 2932 zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */ 2933 2934 /* Subtract unused cookies */ 2935 if (ncookies != NULL) 2936 *ncookies -= ncooks; 2937 2938 if (!user && uio->uio_iovcnt == 1) { 2939 iovp->iov_base += outcount; 2940 iovp->iov_len -= outcount; 2941 uio->uio_resid -= outcount; 2942 } else if (error = uiomove(outbuf, (size_t)outcount, UIO_READ, uio)) { 2943 /* 2944 * Reset the pointer. 2945 */ 2946 offset = uio->uio_loffset; 2947 } 2948 2949 update: 2950 zap_cursor_fini(&zc); 2951 if (user || uio->uio_iovcnt != 1) 2952 kmem_free(outbuf, bufsize); 2953 2954 if (error == ENOENT) 2955 error = 0; 2956 2957 ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 2958 2959 uio->uio_loffset = offset; 2960 ZFS_EXIT(zfsvfs); 2961 if (error != 0 && cookies != NULL) { 2962 #ifdef __FreeBSD__ 2963 free(*cookies, M_TEMP); 2964 #endif 2965 #ifdef __NetBSD__ 2966 kmem_free(*cookies, ncooks * sizeof(off_t)); 2967 #endif 2968 *cookies = NULL; 2969 *ncookies = 0; 2970 } 2971 return (error); 2972 } 2973 2974 ulong_t zfs_fsync_sync_cnt = 4; 2975 2976 static int 2977 zfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) 2978 { 2979 znode_t *zp = VTOZ(vp); 2980 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2981 2982 (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt); 2983 2984 if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) { 2985 ZFS_ENTER(zfsvfs); 2986 ZFS_VERIFY_ZP(zp); 2987 2988 #ifdef __NetBSD__ 2989 if (!zp->z_unlinked) 2990 #endif 2991 zil_commit(zfsvfs->z_log, zp->z_id); 2992 ZFS_EXIT(zfsvfs); 2993 } 2994 return (0); 2995 } 2996 2997 2998 /* 2999 * Get the requested file attributes and place them in the provided 3000 * vattr structure. 3001 * 3002 * IN: vp - vnode of file. 3003 * vap - va_mask identifies requested attributes. 3004 * If AT_XVATTR set, then optional attrs are requested 3005 * flags - ATTR_NOACLCHECK (CIFS server context) 3006 * cr - credentials of caller. 3007 * ct - caller context 3008 * 3009 * OUT: vap - attribute values. 3010 * 3011 * RETURN: 0 (always succeeds). 3012 */ 3013 /* ARGSUSED */ 3014 static int 3015 zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 3016 caller_context_t *ct) 3017 { 3018 znode_t *zp = VTOZ(vp); 3019 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3020 int error = 0; 3021 uint32_t blksize; 3022 u_longlong_t nblocks; 3023 uint64_t links; 3024 uint64_t mtime[2], ctime[2], crtime[2], rdev; 3025 xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 3026 xoptattr_t *xoap = NULL; 3027 boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 3028 sa_bulk_attr_t bulk[4]; 3029 int count = 0; 3030 3031 ZFS_ENTER(zfsvfs); 3032 ZFS_VERIFY_ZP(zp); 3033 3034 zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid); 3035 3036 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 3037 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 3038 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16); 3039 if (vp->v_type == VBLK || vp->v_type == VCHR) 3040 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL, 3041 &rdev, 8); 3042 3043 if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) { 3044 ZFS_EXIT(zfsvfs); 3045 return (error); 3046 } 3047 3048 /* 3049 * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES. 3050 * Also, if we are the owner don't bother, since owner should 3051 * always be allowed to read basic attributes of file. 3052 */ 3053 if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) && 3054 (vap->va_uid != crgetuid(cr))) { 3055 if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0, 3056 skipaclchk, cr)) { 3057 ZFS_EXIT(zfsvfs); 3058 return (error); 3059 } 3060 } 3061 3062 /* 3063 * Return all attributes. It's cheaper to provide the answer 3064 * than to determine whether we were asked the question. 3065 */ 3066 3067 vap->va_type = IFTOVT(zp->z_mode); 3068 vap->va_mode = zp->z_mode & ~S_IFMT; 3069 #ifdef illumos 3070 vap->va_fsid = zp->z_zfsvfs->z_vfs->vfs_dev; 3071 #endif 3072 #ifdef __FreeBSD__ 3073 vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; 3074 vap->va_nodeid = zp->z_id; 3075 #endif 3076 #ifdef __NetBSD__ 3077 vap->va_fsid = vp->v_mount->mnt_stat.f_fsid; 3078 vap->va_nodeid = zp->z_id; 3079 /* 3080 * If we are a snapshot mounted under .zfs, return 3081 * the object id of the snapshot to make getcwd happy. 3082 */ 3083 if (zp->z_id == zfsvfs->z_root && zfsvfs->z_parent != zfsvfs) { 3084 vnode_t *cvp = vp->v_mount->mnt_vnodecovered; 3085 3086 if (cvp && zfsctl_is_node(cvp)) 3087 vap->va_nodeid = dmu_objset_id(zfsvfs->z_os); 3088 } 3089 #endif 3090 if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp)) 3091 links = zp->z_links + 1; 3092 else 3093 links = zp->z_links; 3094 vap->va_nlink = MIN(links, LINK_MAX); /* nlink_t limit! */ 3095 vap->va_size = zp->z_size; 3096 #ifdef illumos 3097 vap->va_rdev = vp->v_rdev; 3098 #else 3099 if (vp->v_type == VBLK || vp->v_type == VCHR) 3100 vap->va_rdev = zfs_cmpldev(rdev); 3101 #endif 3102 vap->va_seq = zp->z_seq; 3103 vap->va_flags = 0; /* FreeBSD: Reset chflags(2) flags. */ 3104 vap->va_filerev = zp->z_seq; 3105 3106 /* 3107 * Add in any requested optional attributes and the create time. 3108 * Also set the corresponding bits in the returned attribute bitmap. 3109 */ 3110 if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) { 3111 if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 3112 xoap->xoa_archive = 3113 ((zp->z_pflags & ZFS_ARCHIVE) != 0); 3114 XVA_SET_RTN(xvap, XAT_ARCHIVE); 3115 } 3116 3117 if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 3118 xoap->xoa_readonly = 3119 ((zp->z_pflags & ZFS_READONLY) != 0); 3120 XVA_SET_RTN(xvap, XAT_READONLY); 3121 } 3122 3123 if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 3124 xoap->xoa_system = 3125 ((zp->z_pflags & ZFS_SYSTEM) != 0); 3126 XVA_SET_RTN(xvap, XAT_SYSTEM); 3127 } 3128 3129 if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 3130 xoap->xoa_hidden = 3131 ((zp->z_pflags & ZFS_HIDDEN) != 0); 3132 XVA_SET_RTN(xvap, XAT_HIDDEN); 3133 } 3134 3135 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 3136 xoap->xoa_nounlink = 3137 ((zp->z_pflags & ZFS_NOUNLINK) != 0); 3138 XVA_SET_RTN(xvap, XAT_NOUNLINK); 3139 } 3140 3141 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 3142 xoap->xoa_immutable = 3143 ((zp->z_pflags & ZFS_IMMUTABLE) != 0); 3144 XVA_SET_RTN(xvap, XAT_IMMUTABLE); 3145 } 3146 3147 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 3148 xoap->xoa_appendonly = 3149 ((zp->z_pflags & ZFS_APPENDONLY) != 0); 3150 XVA_SET_RTN(xvap, XAT_APPENDONLY); 3151 } 3152 3153 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 3154 xoap->xoa_nodump = 3155 ((zp->z_pflags & ZFS_NODUMP) != 0); 3156 XVA_SET_RTN(xvap, XAT_NODUMP); 3157 } 3158 3159 if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 3160 xoap->xoa_opaque = 3161 ((zp->z_pflags & ZFS_OPAQUE) != 0); 3162 XVA_SET_RTN(xvap, XAT_OPAQUE); 3163 } 3164 3165 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 3166 xoap->xoa_av_quarantined = 3167 ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0); 3168 XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 3169 } 3170 3171 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 3172 xoap->xoa_av_modified = 3173 ((zp->z_pflags & ZFS_AV_MODIFIED) != 0); 3174 XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 3175 } 3176 3177 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) && 3178 vp->v_type == VREG) { 3179 zfs_sa_get_scanstamp(zp, xvap); 3180 } 3181 3182 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 3183 xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0); 3184 XVA_SET_RTN(xvap, XAT_REPARSE); 3185 } 3186 if (XVA_ISSET_REQ(xvap, XAT_GEN)) { 3187 xoap->xoa_generation = zp->z_gen; 3188 XVA_SET_RTN(xvap, XAT_GEN); 3189 } 3190 3191 if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { 3192 xoap->xoa_offline = 3193 ((zp->z_pflags & ZFS_OFFLINE) != 0); 3194 XVA_SET_RTN(xvap, XAT_OFFLINE); 3195 } 3196 3197 if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { 3198 xoap->xoa_sparse = 3199 ((zp->z_pflags & ZFS_SPARSE) != 0); 3200 XVA_SET_RTN(xvap, XAT_SPARSE); 3201 } 3202 } 3203 3204 ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime); 3205 ZFS_TIME_DECODE(&vap->va_mtime, mtime); 3206 ZFS_TIME_DECODE(&vap->va_ctime, ctime); 3207 ZFS_TIME_DECODE(&vap->va_birthtime, crtime); 3208 3209 3210 sa_object_size(zp->z_sa_hdl, &blksize, &nblocks); 3211 vap->va_blksize = blksize; 3212 vap->va_bytes = nblocks << 9; /* nblocks * 512 */ 3213 3214 if (zp->z_blksz == 0) { 3215 /* 3216 * Block size hasn't been set; suggest maximal I/O transfers. 3217 */ 3218 vap->va_blksize = zfsvfs->z_max_blksz; 3219 } 3220 3221 ZFS_EXIT(zfsvfs); 3222 return (0); 3223 } 3224 3225 /* 3226 * Set the file attributes to the values contained in the 3227 * vattr structure. 3228 * 3229 * IN: vp - vnode of file to be modified. 3230 * vap - new attribute values. 3231 * If AT_XVATTR set, then optional attrs are being set 3232 * flags - ATTR_UTIME set if non-default time values provided. 3233 * - ATTR_NOACLCHECK (CIFS context only). 3234 * cr - credentials of caller. 3235 * ct - caller context 3236 * 3237 * RETURN: 0 on success, error code on failure. 3238 * 3239 * Timestamps: 3240 * vp - ctime updated, mtime updated if size changed. 3241 */ 3242 /* ARGSUSED */ 3243 static int 3244 zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 3245 caller_context_t *ct) 3246 { 3247 znode_t *zp = VTOZ(vp); 3248 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3249 zilog_t *zilog; 3250 dmu_tx_t *tx; 3251 vattr_t oldva; 3252 xvattr_t tmpxvattr; 3253 uint_t mask = vap->va_mask; 3254 uint_t saved_mask = 0; 3255 uint64_t saved_mode; 3256 int trim_mask = 0; 3257 uint64_t new_mode; 3258 uint64_t new_uid, new_gid; 3259 uint64_t xattr_obj; 3260 uint64_t mtime[2], ctime[2]; 3261 znode_t *attrzp; 3262 int need_policy = FALSE; 3263 int err, err2; 3264 zfs_fuid_info_t *fuidp = NULL; 3265 xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 3266 xoptattr_t *xoap; 3267 zfs_acl_t *aclp; 3268 boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 3269 boolean_t fuid_dirtied = B_FALSE; 3270 sa_bulk_attr_t bulk[7], xattr_bulk[7]; 3271 int count = 0, xattr_count = 0; 3272 3273 if (mask == 0) 3274 return (0); 3275 3276 if (mask & AT_NOSET) 3277 return (SET_ERROR(EINVAL)); 3278 3279 ZFS_ENTER(zfsvfs); 3280 ZFS_VERIFY_ZP(zp); 3281 3282 zilog = zfsvfs->z_log; 3283 3284 /* 3285 * Make sure that if we have ephemeral uid/gid or xvattr specified 3286 * that file system is at proper version level 3287 */ 3288 3289 if (zfsvfs->z_use_fuids == B_FALSE && 3290 (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) || 3291 ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) || 3292 (mask & AT_XVATTR))) { 3293 ZFS_EXIT(zfsvfs); 3294 return (SET_ERROR(EINVAL)); 3295 } 3296 3297 if (mask & AT_SIZE && vp->v_type == VDIR) { 3298 ZFS_EXIT(zfsvfs); 3299 return (SET_ERROR(EISDIR)); 3300 } 3301 3302 if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) { 3303 ZFS_EXIT(zfsvfs); 3304 return (SET_ERROR(EINVAL)); 3305 } 3306 3307 /* 3308 * If this is an xvattr_t, then get a pointer to the structure of 3309 * optional attributes. If this is NULL, then we have a vattr_t. 3310 */ 3311 xoap = xva_getxoptattr(xvap); 3312 3313 xva_init(&tmpxvattr); 3314 3315 /* 3316 * Immutable files can only alter immutable bit and atime 3317 */ 3318 if ((zp->z_pflags & ZFS_IMMUTABLE) && 3319 ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) || 3320 ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) { 3321 ZFS_EXIT(zfsvfs); 3322 return (SET_ERROR(EPERM)); 3323 } 3324 3325 if ((mask & AT_SIZE) && (zp->z_pflags & ZFS_READONLY)) { 3326 ZFS_EXIT(zfsvfs); 3327 return (SET_ERROR(EPERM)); 3328 } 3329 3330 /* 3331 * Verify timestamps doesn't overflow 32 bits. 3332 * ZFS can handle large timestamps, but 32bit syscalls can't 3333 * handle times greater than 2039. This check should be removed 3334 * once large timestamps are fully supported. 3335 */ 3336 if (mask & (AT_ATIME | AT_MTIME)) { 3337 if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) || 3338 ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) { 3339 ZFS_EXIT(zfsvfs); 3340 return (SET_ERROR(EOVERFLOW)); 3341 } 3342 } 3343 if (xoap && (mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME) && 3344 TIMESPEC_OVERFLOW(&vap->va_birthtime)) { 3345 ZFS_EXIT(zfsvfs); 3346 return (SET_ERROR(EOVERFLOW)); 3347 } 3348 3349 attrzp = NULL; 3350 aclp = NULL; 3351 3352 /* Can this be moved to before the top label? */ 3353 if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 3354 ZFS_EXIT(zfsvfs); 3355 return (SET_ERROR(EROFS)); 3356 } 3357 3358 /* 3359 * First validate permissions 3360 */ 3361 3362 if (mask & AT_SIZE) { 3363 /* 3364 * XXX - Note, we are not providing any open 3365 * mode flags here (like FNDELAY), so we may 3366 * block if there are locks present... this 3367 * should be addressed in openat(). 3368 */ 3369 /* XXX - would it be OK to generate a log record here? */ 3370 err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); 3371 if (err) { 3372 ZFS_EXIT(zfsvfs); 3373 return (err); 3374 } 3375 } 3376 3377 if (mask & (AT_ATIME|AT_MTIME) || 3378 ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) || 3379 XVA_ISSET_REQ(xvap, XAT_READONLY) || 3380 XVA_ISSET_REQ(xvap, XAT_ARCHIVE) || 3381 XVA_ISSET_REQ(xvap, XAT_OFFLINE) || 3382 XVA_ISSET_REQ(xvap, XAT_SPARSE) || 3383 XVA_ISSET_REQ(xvap, XAT_CREATETIME) || 3384 XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) { 3385 need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0, 3386 skipaclchk, cr); 3387 } 3388 3389 if (mask & (AT_UID|AT_GID)) { 3390 int idmask = (mask & (AT_UID|AT_GID)); 3391 int take_owner; 3392 int take_group; 3393 3394 /* 3395 * NOTE: even if a new mode is being set, 3396 * we may clear S_ISUID/S_ISGID bits. 3397 */ 3398 3399 if (!(mask & AT_MODE)) 3400 vap->va_mode = zp->z_mode; 3401 3402 /* 3403 * Take ownership or chgrp to group we are a member of 3404 */ 3405 3406 take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr)); 3407 take_group = (mask & AT_GID) && 3408 zfs_groupmember(zfsvfs, vap->va_gid, cr); 3409 3410 /* 3411 * If both AT_UID and AT_GID are set then take_owner and 3412 * take_group must both be set in order to allow taking 3413 * ownership. 3414 * 3415 * Otherwise, send the check through secpolicy_vnode_setattr() 3416 * 3417 */ 3418 3419 if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) || 3420 ((idmask == AT_UID) && take_owner) || 3421 ((idmask == AT_GID) && take_group)) { 3422 if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0, 3423 skipaclchk, cr) == 0) { 3424 /* 3425 * Remove setuid/setgid for non-privileged users 3426 */ 3427 secpolicy_setid_clear(vap, vp, cr); 3428 trim_mask = (mask & (AT_UID|AT_GID)); 3429 } else { 3430 need_policy = TRUE; 3431 } 3432 } else { 3433 need_policy = TRUE; 3434 } 3435 } 3436 3437 oldva.va_mode = zp->z_mode; 3438 zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid); 3439 if (mask & AT_XVATTR) { 3440 /* 3441 * Update xvattr mask to include only those attributes 3442 * that are actually changing. 3443 * 3444 * the bits will be restored prior to actually setting 3445 * the attributes so the caller thinks they were set. 3446 */ 3447 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 3448 if (xoap->xoa_appendonly != 3449 ((zp->z_pflags & ZFS_APPENDONLY) != 0)) { 3450 need_policy = TRUE; 3451 } else { 3452 XVA_CLR_REQ(xvap, XAT_APPENDONLY); 3453 XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY); 3454 } 3455 } 3456 3457 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 3458 if (xoap->xoa_nounlink != 3459 ((zp->z_pflags & ZFS_NOUNLINK) != 0)) { 3460 need_policy = TRUE; 3461 } else { 3462 XVA_CLR_REQ(xvap, XAT_NOUNLINK); 3463 XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK); 3464 } 3465 } 3466 3467 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 3468 if (xoap->xoa_immutable != 3469 ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) { 3470 need_policy = TRUE; 3471 } else { 3472 XVA_CLR_REQ(xvap, XAT_IMMUTABLE); 3473 XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE); 3474 } 3475 } 3476 3477 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 3478 if (xoap->xoa_nodump != 3479 ((zp->z_pflags & ZFS_NODUMP) != 0)) { 3480 need_policy = TRUE; 3481 } else { 3482 XVA_CLR_REQ(xvap, XAT_NODUMP); 3483 XVA_SET_REQ(&tmpxvattr, XAT_NODUMP); 3484 } 3485 } 3486 3487 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 3488 if (xoap->xoa_av_modified != 3489 ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) { 3490 need_policy = TRUE; 3491 } else { 3492 XVA_CLR_REQ(xvap, XAT_AV_MODIFIED); 3493 XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED); 3494 } 3495 } 3496 3497 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 3498 if ((vp->v_type != VREG && 3499 xoap->xoa_av_quarantined) || 3500 xoap->xoa_av_quarantined != 3501 ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) { 3502 need_policy = TRUE; 3503 } else { 3504 XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED); 3505 XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED); 3506 } 3507 } 3508 3509 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 3510 ZFS_EXIT(zfsvfs); 3511 return (SET_ERROR(EPERM)); 3512 } 3513 3514 if (need_policy == FALSE && 3515 (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) || 3516 XVA_ISSET_REQ(xvap, XAT_OPAQUE))) { 3517 need_policy = TRUE; 3518 } 3519 } 3520 3521 if (mask & AT_MODE) { 3522 if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) { 3523 err = secpolicy_setid_setsticky_clear(vp, vap, 3524 &oldva, cr); 3525 if (err) { 3526 ZFS_EXIT(zfsvfs); 3527 return (err); 3528 } 3529 trim_mask |= AT_MODE; 3530 } else { 3531 need_policy = TRUE; 3532 } 3533 } 3534 3535 if (need_policy) { 3536 /* 3537 * If trim_mask is set then take ownership 3538 * has been granted or write_acl is present and user 3539 * has the ability to modify mode. In that case remove 3540 * UID|GID and or MODE from mask so that 3541 * secpolicy_vnode_setattr() doesn't revoke it. 3542 */ 3543 3544 if (trim_mask) { 3545 saved_mask = vap->va_mask; 3546 vap->va_mask &= ~trim_mask; 3547 if (trim_mask & AT_MODE) { 3548 /* 3549 * Save the mode, as secpolicy_vnode_setattr() 3550 * will overwrite it with ova.va_mode. 3551 */ 3552 saved_mode = vap->va_mode; 3553 } 3554 } 3555 err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags, 3556 (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp); 3557 if (err) { 3558 ZFS_EXIT(zfsvfs); 3559 return (err); 3560 } 3561 3562 if (trim_mask) { 3563 vap->va_mask |= saved_mask; 3564 if (trim_mask & AT_MODE) { 3565 /* 3566 * Recover the mode after 3567 * secpolicy_vnode_setattr(). 3568 */ 3569 vap->va_mode = saved_mode; 3570 } 3571 } 3572 } 3573 3574 /* 3575 * secpolicy_vnode_setattr, or take ownership may have 3576 * changed va_mask 3577 */ 3578 mask = vap->va_mask; 3579 3580 if ((mask & (AT_UID | AT_GID))) { 3581 err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 3582 &xattr_obj, sizeof (xattr_obj)); 3583 3584 if (err == 0 && xattr_obj) { 3585 err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp); 3586 if (err == 0) { 3587 err = vn_lock(ZTOV(attrzp), LK_EXCLUSIVE); 3588 if (err != 0) 3589 vrele(ZTOV(attrzp)); 3590 } 3591 if (err) 3592 goto out2; 3593 } 3594 if (mask & AT_UID) { 3595 new_uid = zfs_fuid_create(zfsvfs, 3596 (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp); 3597 if (new_uid != zp->z_uid && 3598 zfs_fuid_overquota(zfsvfs, B_FALSE, new_uid)) { 3599 if (attrzp) 3600 vput(ZTOV(attrzp)); 3601 err = SET_ERROR(EDQUOT); 3602 goto out2; 3603 } 3604 } 3605 3606 if (mask & AT_GID) { 3607 new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid, 3608 cr, ZFS_GROUP, &fuidp); 3609 if (new_gid != zp->z_gid && 3610 zfs_fuid_overquota(zfsvfs, B_TRUE, new_gid)) { 3611 if (attrzp) 3612 vput(ZTOV(attrzp)); 3613 err = SET_ERROR(EDQUOT); 3614 goto out2; 3615 } 3616 } 3617 } 3618 tx = dmu_tx_create(zfsvfs->z_os); 3619 3620 if (mask & AT_MODE) { 3621 uint64_t pmode = zp->z_mode; 3622 uint64_t acl_obj; 3623 new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT); 3624 3625 if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED && 3626 !(zp->z_pflags & ZFS_ACL_TRIVIAL)) { 3627 err = SET_ERROR(EPERM); 3628 goto out; 3629 } 3630 3631 if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)) 3632 goto out; 3633 3634 if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) { 3635 /* 3636 * Are we upgrading ACL from old V0 format 3637 * to V1 format? 3638 */ 3639 if (zfsvfs->z_version >= ZPL_VERSION_FUID && 3640 zfs_znode_acl_version(zp) == 3641 ZFS_ACL_VERSION_INITIAL) { 3642 dmu_tx_hold_free(tx, acl_obj, 0, 3643 DMU_OBJECT_END); 3644 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 3645 0, aclp->z_acl_bytes); 3646 } else { 3647 dmu_tx_hold_write(tx, acl_obj, 0, 3648 aclp->z_acl_bytes); 3649 } 3650 } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) { 3651 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 3652 0, aclp->z_acl_bytes); 3653 } 3654 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 3655 } else { 3656 if ((mask & AT_XVATTR) && 3657 XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 3658 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 3659 else 3660 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 3661 } 3662 3663 if (attrzp) { 3664 dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE); 3665 } 3666 3667 fuid_dirtied = zfsvfs->z_fuid_dirty; 3668 if (fuid_dirtied) 3669 zfs_fuid_txhold(zfsvfs, tx); 3670 3671 zfs_sa_upgrade_txholds(tx, zp); 3672 3673 err = dmu_tx_assign(tx, TXG_WAIT); 3674 if (err) 3675 goto out; 3676 3677 count = 0; 3678 /* 3679 * Set each attribute requested. 3680 * We group settings according to the locks they need to acquire. 3681 * 3682 * Note: you cannot set ctime directly, although it will be 3683 * updated as a side-effect of calling this function. 3684 */ 3685 3686 if (mask & (AT_UID|AT_GID|AT_MODE)) 3687 mutex_enter(&zp->z_acl_lock); 3688 3689 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 3690 &zp->z_pflags, sizeof (zp->z_pflags)); 3691 3692 if (attrzp) { 3693 if (mask & (AT_UID|AT_GID|AT_MODE)) 3694 mutex_enter(&attrzp->z_acl_lock); 3695 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3696 SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags, 3697 sizeof (attrzp->z_pflags)); 3698 } 3699 3700 if (mask & (AT_UID|AT_GID)) { 3701 3702 if (mask & AT_UID) { 3703 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, 3704 &new_uid, sizeof (new_uid)); 3705 zp->z_uid = new_uid; 3706 if (attrzp) { 3707 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3708 SA_ZPL_UID(zfsvfs), NULL, &new_uid, 3709 sizeof (new_uid)); 3710 attrzp->z_uid = new_uid; 3711 } 3712 } 3713 3714 if (mask & AT_GID) { 3715 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), 3716 NULL, &new_gid, sizeof (new_gid)); 3717 zp->z_gid = new_gid; 3718 if (attrzp) { 3719 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3720 SA_ZPL_GID(zfsvfs), NULL, &new_gid, 3721 sizeof (new_gid)); 3722 attrzp->z_gid = new_gid; 3723 } 3724 } 3725 if (!(mask & AT_MODE)) { 3726 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), 3727 NULL, &new_mode, sizeof (new_mode)); 3728 new_mode = zp->z_mode; 3729 } 3730 err = zfs_acl_chown_setattr(zp); 3731 ASSERT(err == 0); 3732 if (attrzp) { 3733 err = zfs_acl_chown_setattr(attrzp); 3734 ASSERT(err == 0); 3735 } 3736 } 3737 3738 if (mask & AT_MODE) { 3739 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, 3740 &new_mode, sizeof (new_mode)); 3741 zp->z_mode = new_mode; 3742 ASSERT3U((uintptr_t)aclp, !=, 0); 3743 err = zfs_aclset_common(zp, aclp, cr, tx); 3744 ASSERT0(err); 3745 if (zp->z_acl_cached) 3746 zfs_acl_free(zp->z_acl_cached); 3747 zp->z_acl_cached = aclp; 3748 aclp = NULL; 3749 } 3750 3751 3752 if (mask & AT_ATIME) { 3753 ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime); 3754 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, 3755 &zp->z_atime, sizeof (zp->z_atime)); 3756 } 3757 3758 if (mask & AT_MTIME) { 3759 ZFS_TIME_ENCODE(&vap->va_mtime, mtime); 3760 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 3761 mtime, sizeof (mtime)); 3762 } 3763 3764 /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */ 3765 if (mask & AT_SIZE && !(mask & AT_MTIME)) { 3766 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), 3767 NULL, mtime, sizeof (mtime)); 3768 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 3769 &ctime, sizeof (ctime)); 3770 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 3771 B_TRUE); 3772 } else if (mask != 0) { 3773 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 3774 &ctime, sizeof (ctime)); 3775 zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime, 3776 B_TRUE); 3777 if (attrzp) { 3778 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3779 SA_ZPL_CTIME(zfsvfs), NULL, 3780 &ctime, sizeof (ctime)); 3781 zfs_tstamp_update_setup(attrzp, STATE_CHANGED, 3782 mtime, ctime, B_TRUE); 3783 } 3784 } 3785 /* 3786 * Do this after setting timestamps to prevent timestamp 3787 * update from toggling bit 3788 */ 3789 3790 if (xoap && (mask & AT_XVATTR)) { 3791 3792 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) 3793 xoap->xoa_createtime = vap->va_birthtime; 3794 /* 3795 * restore trimmed off masks 3796 * so that return masks can be set for caller. 3797 */ 3798 3799 if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) { 3800 XVA_SET_REQ(xvap, XAT_APPENDONLY); 3801 } 3802 if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) { 3803 XVA_SET_REQ(xvap, XAT_NOUNLINK); 3804 } 3805 if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) { 3806 XVA_SET_REQ(xvap, XAT_IMMUTABLE); 3807 } 3808 if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) { 3809 XVA_SET_REQ(xvap, XAT_NODUMP); 3810 } 3811 if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) { 3812 XVA_SET_REQ(xvap, XAT_AV_MODIFIED); 3813 } 3814 if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) { 3815 XVA_SET_REQ(xvap, XAT_AV_QUARANTINED); 3816 } 3817 3818 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 3819 ASSERT(vp->v_type == VREG); 3820 3821 zfs_xvattr_set(zp, xvap, tx); 3822 } 3823 3824 if (fuid_dirtied) 3825 zfs_fuid_sync(zfsvfs, tx); 3826 3827 if (mask != 0) 3828 zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp); 3829 3830 if (mask & (AT_UID|AT_GID|AT_MODE)) 3831 mutex_exit(&zp->z_acl_lock); 3832 3833 if (attrzp) { 3834 if (mask & (AT_UID|AT_GID|AT_MODE)) 3835 mutex_exit(&attrzp->z_acl_lock); 3836 } 3837 out: 3838 if (err == 0 && attrzp) { 3839 err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk, 3840 xattr_count, tx); 3841 ASSERT(err2 == 0); 3842 } 3843 3844 if (attrzp) 3845 vput(ZTOV(attrzp)); 3846 3847 if (aclp) 3848 zfs_acl_free(aclp); 3849 3850 if (fuidp) { 3851 zfs_fuid_info_free(fuidp); 3852 fuidp = NULL; 3853 } 3854 3855 if (err) { 3856 dmu_tx_abort(tx); 3857 } else { 3858 err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 3859 dmu_tx_commit(tx); 3860 } 3861 3862 out2: 3863 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 3864 zil_commit(zilog, 0); 3865 3866 ZFS_EXIT(zfsvfs); 3867 return (err); 3868 } 3869 3870 /* 3871 * We acquire all but fdvp locks using non-blocking acquisitions. If we 3872 * fail to acquire any lock in the path we will drop all held locks, 3873 * acquire the new lock in a blocking fashion, and then release it and 3874 * restart the rename. This acquire/release step ensures that we do not 3875 * spin on a lock waiting for release. On error release all vnode locks 3876 * and decrement references the way tmpfs_rename() would do. 3877 */ 3878 static int 3879 zfs_rename_relock(struct vnode *sdvp, struct vnode **svpp, 3880 struct vnode *tdvp, struct vnode **tvpp, 3881 const struct componentname *scnp, const struct componentname *tcnp) 3882 { 3883 zfsvfs_t *zfsvfs; 3884 struct vnode *nvp, *svp, *tvp; 3885 znode_t *sdzp, *tdzp, *szp, *tzp; 3886 #ifdef __FreeBSD__ 3887 const char *snm = scnp->cn_nameptr; 3888 const char *tnm = tcnp->cn_nameptr; 3889 #endif 3890 #ifdef __NetBSD__ 3891 char *snm, *tnm; 3892 #endif 3893 int error; 3894 3895 #ifdef __FreeBSD__ 3896 VOP_UNLOCK(tdvp, 0); 3897 if (*tvpp != NULL && *tvpp != tdvp) 3898 VOP_UNLOCK(*tvpp, 0); 3899 #endif 3900 3901 relock: 3902 error = vn_lock(sdvp, LK_EXCLUSIVE); 3903 if (error) 3904 goto out; 3905 sdzp = VTOZ(sdvp); 3906 3907 #ifdef __NetBSD__ 3908 if (tdvp == sdvp) { 3909 } else { 3910 #endif 3911 error = vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT); 3912 if (error != 0) { 3913 VOP_UNLOCK(sdvp, 0); 3914 if (error != EBUSY) 3915 goto out; 3916 error = vn_lock(tdvp, LK_EXCLUSIVE); 3917 if (error) 3918 goto out; 3919 VOP_UNLOCK(tdvp, 0); 3920 goto relock; 3921 } 3922 #ifdef __NetBSD__ 3923 } /* end if (tdvp == sdvp) */ 3924 #endif 3925 3926 tdzp = VTOZ(tdvp); 3927 3928 /* 3929 * Before using sdzp and tdzp we must ensure that they are live. 3930 * As a porting legacy from illumos we have two things to worry 3931 * about. One is typical for FreeBSD and it is that the vnode is 3932 * not reclaimed (doomed). The other is that the znode is live. 3933 * The current code can invalidate the znode without acquiring the 3934 * corresponding vnode lock if the object represented by the znode 3935 * and vnode is no longer valid after a rollback or receive operation. 3936 * z_teardown_lock hidden behind ZFS_ENTER and ZFS_EXIT is the lock 3937 * that protects the znodes from the invalidation. 3938 */ 3939 zfsvfs = sdzp->z_zfsvfs; 3940 ASSERT3P(zfsvfs, ==, tdzp->z_zfsvfs); 3941 ZFS_ENTER(zfsvfs); 3942 3943 /* 3944 * We can not use ZFS_VERIFY_ZP() here because it could directly return 3945 * bypassing the cleanup code in the case of an error. 3946 */ 3947 if (tdzp->z_sa_hdl == NULL || sdzp->z_sa_hdl == NULL) { 3948 ZFS_EXIT(zfsvfs); 3949 VOP_UNLOCK(sdvp, 0); 3950 #ifdef __NetBSD__ 3951 if (tdvp != sdvp) 3952 #endif 3953 VOP_UNLOCK(tdvp, 0); 3954 error = SET_ERROR(EIO); 3955 goto out; 3956 } 3957 3958 /* 3959 * Re-resolve svp to be certain it still exists and fetch the 3960 * correct vnode. 3961 */ 3962 #ifdef __NetBSD__ 3963 /* ZFS wants a null-terminated name. */ 3964 snm = PNBUF_GET(); 3965 strlcpy(snm, scnp->cn_nameptr, scnp->cn_namelen + 1); 3966 #endif 3967 error = zfs_dirent_lookup(sdzp, snm, &szp, ZEXISTS); 3968 #ifdef __NetBSD__ 3969 PNBUF_PUT(snm); 3970 #endif 3971 if (error != 0) { 3972 /* Source entry invalid or not there. */ 3973 ZFS_EXIT(zfsvfs); 3974 VOP_UNLOCK(sdvp, 0); 3975 #ifdef __NetBSD__ 3976 if (tdvp != sdvp) 3977 #endif 3978 VOP_UNLOCK(tdvp, 0); 3979 if ((scnp->cn_flags & ISDOTDOT) != 0 || 3980 (scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.')) 3981 error = SET_ERROR(EINVAL); 3982 goto out; 3983 } 3984 svp = ZTOV(szp); 3985 3986 /* 3987 * Re-resolve tvp, if it disappeared we just carry on. 3988 */ 3989 #ifdef __NetBSD__ 3990 /* ZFS wants a null-terminated name. */ 3991 tnm = PNBUF_GET(); 3992 strlcpy(tnm, tcnp->cn_nameptr, tcnp->cn_namelen + 1); 3993 #endif 3994 error = zfs_dirent_lookup(tdzp, tnm, &tzp, 0); 3995 #ifdef __NetBSD__ 3996 PNBUF_PUT(tnm); 3997 #endif 3998 if (error != 0) { 3999 ZFS_EXIT(zfsvfs); 4000 VOP_UNLOCK(sdvp, 0); 4001 #ifdef __NetBSD__ 4002 if (tdvp != sdvp) 4003 #endif 4004 VOP_UNLOCK(tdvp, 0); 4005 vrele(svp); 4006 if ((tcnp->cn_flags & ISDOTDOT) != 0) 4007 error = SET_ERROR(EINVAL); 4008 goto out; 4009 } 4010 if (tzp != NULL) 4011 tvp = ZTOV(tzp); 4012 else 4013 tvp = NULL; 4014 4015 /* 4016 * At present the vnode locks must be acquired before z_teardown_lock, 4017 * although it would be more logical to use the opposite order. 4018 */ 4019 ZFS_EXIT(zfsvfs); 4020 4021 /* 4022 * Now try acquire locks on svp and tvp. 4023 */ 4024 nvp = svp; 4025 error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT); 4026 if (error != 0) { 4027 VOP_UNLOCK(sdvp, 0); 4028 #ifdef __NetBSD__ 4029 if (tdvp != sdvp) 4030 #endif 4031 VOP_UNLOCK(tdvp, 0); 4032 if (tvp != NULL) 4033 vrele(tvp); 4034 if (error != EBUSY) { 4035 vrele(nvp); 4036 goto out; 4037 } 4038 error = vn_lock(nvp, LK_EXCLUSIVE); 4039 if (error != 0) { 4040 vrele(nvp); 4041 goto out; 4042 } 4043 VOP_UNLOCK(nvp, 0); 4044 /* 4045 * Concurrent rename race. 4046 * XXX ? 4047 */ 4048 if (nvp == tdvp) { 4049 vrele(nvp); 4050 error = SET_ERROR(EINVAL); 4051 goto out; 4052 } 4053 #ifdef __NetBSD__ 4054 if (*svpp != NULL) 4055 #endif 4056 vrele(*svpp); 4057 *svpp = nvp; 4058 goto relock; 4059 } 4060 #ifdef __NetBSD__ 4061 if (*svpp != NULL) 4062 #endif 4063 vrele(*svpp); 4064 *svpp = nvp; 4065 4066 if (*tvpp != NULL) 4067 vrele(*tvpp); 4068 *tvpp = NULL; 4069 if (tvp != NULL) { 4070 nvp = tvp; 4071 4072 #ifdef __NetBSD__ 4073 if (tvp == svp || tvp == sdvp) { 4074 } else { 4075 #endif 4076 error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT); 4077 if (error != 0) { 4078 VOP_UNLOCK(sdvp, 0); 4079 #ifdef __NetBSD__ 4080 if (tdvp != sdvp) 4081 #endif 4082 VOP_UNLOCK(tdvp, 0); 4083 #ifdef __NetBSD__ 4084 if (*svpp != tdvp) 4085 #endif 4086 VOP_UNLOCK(*svpp, 0); 4087 if (error != EBUSY) { 4088 vrele(nvp); 4089 goto out; 4090 } 4091 error = vn_lock(nvp, LK_EXCLUSIVE); 4092 if (error != 0) { 4093 vrele(nvp); 4094 goto out; 4095 } 4096 vput(nvp); 4097 goto relock; 4098 } 4099 #ifdef __NetBSD__ 4100 } /* end if (tvp == svp || tvp == sdvp) */ 4101 #endif 4102 4103 *tvpp = nvp; 4104 } 4105 4106 KASSERT(VOP_ISLOCKED(sdvp) == LK_EXCLUSIVE); 4107 KASSERT(VOP_ISLOCKED(*svpp) == LK_EXCLUSIVE); 4108 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4109 KASSERT(*tvpp == NULL || VOP_ISLOCKED(*tvpp) == LK_EXCLUSIVE); 4110 4111 return (0); 4112 4113 out: 4114 return (error); 4115 } 4116 4117 /* 4118 * Note that we must use VRELE_ASYNC in this function as it walks 4119 * up the directory tree and vrele may need to acquire an exclusive 4120 * lock if a last reference to a vnode is dropped. 4121 */ 4122 static int 4123 zfs_rename_check(znode_t *szp, znode_t *sdzp, znode_t *tdzp) 4124 { 4125 zfsvfs_t *zfsvfs; 4126 znode_t *zp, *zp1; 4127 uint64_t parent; 4128 int error; 4129 4130 zfsvfs = tdzp->z_zfsvfs; 4131 if (tdzp == szp) 4132 return (SET_ERROR(EINVAL)); 4133 if (tdzp == sdzp) 4134 return (0); 4135 if (tdzp->z_id == zfsvfs->z_root) 4136 return (0); 4137 zp = tdzp; 4138 for (;;) { 4139 ASSERT(!zp->z_unlinked); 4140 if ((error = sa_lookup(zp->z_sa_hdl, 4141 SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0) 4142 break; 4143 4144 if (parent == szp->z_id) { 4145 error = SET_ERROR(EINVAL); 4146 break; 4147 } 4148 if (parent == zfsvfs->z_root) 4149 break; 4150 if (parent == sdzp->z_id) 4151 break; 4152 4153 error = zfs_zget(zfsvfs, parent, &zp1); 4154 if (error != 0) 4155 break; 4156 4157 if (zp != tdzp) 4158 VN_RELE_ASYNC(ZTOV(zp), 4159 dsl_pool_vnrele_taskq(dmu_objset_pool(zfsvfs->z_os))); 4160 zp = zp1; 4161 } 4162 4163 if (error == ENOTDIR) 4164 panic("checkpath: .. not a directory\n"); 4165 if (zp != tdzp) 4166 VN_RELE_ASYNC(ZTOV(zp), 4167 dsl_pool_vnrele_taskq(dmu_objset_pool(zfsvfs->z_os))); 4168 return (error); 4169 } 4170 4171 /* 4172 * Move an entry from the provided source directory to the target 4173 * directory. Change the entry name as indicated. 4174 * 4175 * IN: sdvp - Source directory containing the "old entry". 4176 * snm - Old entry name. 4177 * tdvp - Target directory to contain the "new entry". 4178 * tnm - New entry name. 4179 * cr - credentials of caller. 4180 * ct - caller context 4181 * flags - case flags 4182 * 4183 * RETURN: 0 on success, error code on failure. 4184 * 4185 * Timestamps: 4186 * sdvp,tdvp - ctime|mtime updated 4187 */ 4188 /*ARGSUSED*/ 4189 static int 4190 zfs_rename(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp, 4191 vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp, 4192 cred_t *cr) 4193 { 4194 zfsvfs_t *zfsvfs; 4195 znode_t *sdzp, *tdzp, *szp, *tzp; 4196 zilog_t *zilog = NULL; 4197 dmu_tx_t *tx; 4198 #ifdef __FreeBSD__ 4199 char *snm = __UNCONST(scnp->cn_nameptr); 4200 char *tnm = __UNCONST(tcnp->cn_nameptr); 4201 #endif 4202 #ifdef __NetBSD__ 4203 char *snm, *tnm; 4204 #endif 4205 int error = 0; 4206 4207 /* Reject renames across filesystems. */ 4208 if (((*svpp) != NULL && (*svpp)->v_mount != tdvp->v_mount) || 4209 ((*tvpp) != NULL && (*svpp)->v_mount != (*tvpp)->v_mount)) { 4210 error = SET_ERROR(EXDEV); 4211 goto out; 4212 } 4213 4214 if (zfsctl_is_node(tdvp)) { 4215 error = SET_ERROR(EXDEV); 4216 goto out; 4217 } 4218 4219 /* 4220 * Lock all four vnodes to ensure safety and semantics of renaming. 4221 */ 4222 error = zfs_rename_relock(sdvp, svpp, tdvp, tvpp, scnp, tcnp); 4223 if (error != 0) { 4224 /* no vnodes are locked in the case of error here */ 4225 return (error); 4226 } 4227 4228 tdzp = VTOZ(tdvp); 4229 sdzp = VTOZ(sdvp); 4230 zfsvfs = tdzp->z_zfsvfs; 4231 zilog = zfsvfs->z_log; 4232 #ifdef __NetBSD__ 4233 /* ZFS wants a null-terminated name. */ 4234 snm = PNBUF_GET(); 4235 strlcpy(snm, scnp->cn_nameptr, scnp->cn_namelen + 1); 4236 tnm = PNBUF_GET(); 4237 strlcpy(tnm, tcnp->cn_nameptr, tcnp->cn_namelen + 1); 4238 #endif 4239 4240 /* 4241 * After we re-enter ZFS_ENTER() we will have to revalidate all 4242 * znodes involved. 4243 */ 4244 ZFS_ENTER(zfsvfs); 4245 4246 if (zfsvfs->z_utf8 && u8_validate(tnm, 4247 strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 4248 error = SET_ERROR(EILSEQ); 4249 goto unlockout; 4250 } 4251 4252 #ifndef __NetBSD__ 4253 /* If source and target are the same file, there is nothing to do. */ 4254 if ((*svpp) == (*tvpp)) { 4255 error = 0; 4256 goto unlockout; 4257 } 4258 #endif 4259 4260 if (((*svpp)->v_type == VDIR && (*svpp)->v_mountedhere != NULL) || 4261 ((*tvpp) != NULL && (*tvpp)->v_type == VDIR && 4262 (*tvpp)->v_mountedhere != NULL)) { 4263 error = SET_ERROR(EXDEV); 4264 goto unlockout; 4265 } 4266 4267 /* 4268 * We can not use ZFS_VERIFY_ZP() here because it could directly return 4269 * bypassing the cleanup code in the case of an error. 4270 */ 4271 if (tdzp->z_sa_hdl == NULL || sdzp->z_sa_hdl == NULL) { 4272 error = SET_ERROR(EIO); 4273 goto unlockout; 4274 } 4275 4276 szp = VTOZ(*svpp); 4277 tzp = *tvpp == NULL ? NULL : VTOZ(*tvpp); 4278 if (szp->z_sa_hdl == NULL || (tzp != NULL && tzp->z_sa_hdl == NULL)) { 4279 error = SET_ERROR(EIO); 4280 goto unlockout; 4281 } 4282 4283 /* 4284 * This is to prevent the creation of links into attribute space 4285 * by renaming a linked file into/outof an attribute directory. 4286 * See the comment in zfs_link() for why this is considered bad. 4287 */ 4288 if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) { 4289 error = SET_ERROR(EINVAL); 4290 goto unlockout; 4291 } 4292 4293 /* 4294 * Must have write access at the source to remove the old entry 4295 * and write access at the target to create the new entry. 4296 * Note that if target and source are the same, this can be 4297 * done in a single check. 4298 */ 4299 if (error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)) 4300 goto unlockout; 4301 4302 if ((*svpp)->v_type == VDIR) { 4303 /* 4304 * Avoid ".", "..", and aliases of "." for obvious reasons. 4305 */ 4306 if ((scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.') || 4307 sdzp == szp || 4308 (scnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) { 4309 error = SET_ERROR(EINVAL); 4310 goto unlockout; 4311 } 4312 4313 /* 4314 * Check to make sure rename is valid. 4315 * Can't do a move like this: /usr/a/b to /usr/a/b/c/d 4316 */ 4317 if (error = zfs_rename_check(szp, sdzp, tdzp)) 4318 goto unlockout; 4319 } 4320 4321 /* 4322 * Does target exist? 4323 */ 4324 if (tzp) { 4325 /* 4326 * Source and target must be the same type. 4327 */ 4328 if ((*svpp)->v_type == VDIR) { 4329 if ((*tvpp)->v_type != VDIR) { 4330 error = SET_ERROR(ENOTDIR); 4331 goto unlockout; 4332 } else { 4333 cache_purge(tdvp); 4334 if (sdvp != tdvp) 4335 cache_purge(sdvp); 4336 } 4337 } else { 4338 if ((*tvpp)->v_type == VDIR) { 4339 error = SET_ERROR(EISDIR); 4340 goto unlockout; 4341 } 4342 } 4343 4344 /* 4345 * POSIX dictates that when the source and target 4346 * entries refer to the same file object, rename 4347 * must do nothing and exit without error. 4348 */ 4349 #ifndef __NetBSD__ 4350 /* 4351 * But on NetBSD we have a different system call to do 4352 * this, posix_rename, which sorta kinda handles this 4353 * case (modulo races), and our tests expect BSD 4354 * semantics for rename, so we'll do that until we can 4355 * push the choice between BSD and POSIX semantics into 4356 * the VOP_RENAME protocol as a flag. 4357 */ 4358 if (szp->z_id == tzp->z_id) { 4359 error = 0; 4360 goto unlockout; 4361 } 4362 #endif 4363 } 4364 4365 vnevent_rename_src(*svpp, sdvp, scnp->cn_nameptr, ct); 4366 if (tzp) 4367 vnevent_rename_dest(*tvpp, tdvp, tnm, ct); 4368 4369 /* 4370 * notify the target directory if it is not the same 4371 * as source directory. 4372 */ 4373 if (tdvp != sdvp) { 4374 vnevent_rename_dest_dir(tdvp, ct); 4375 } 4376 4377 tx = dmu_tx_create(zfsvfs->z_os); 4378 dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 4379 dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE); 4380 dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); 4381 dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm); 4382 if (sdzp != tdzp) { 4383 dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE); 4384 zfs_sa_upgrade_txholds(tx, tdzp); 4385 } 4386 if (tzp) { 4387 dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE); 4388 zfs_sa_upgrade_txholds(tx, tzp); 4389 } 4390 4391 zfs_sa_upgrade_txholds(tx, szp); 4392 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 4393 error = dmu_tx_assign(tx, TXG_WAIT); 4394 if (error) { 4395 dmu_tx_abort(tx); 4396 goto unlockout; 4397 } 4398 4399 4400 if (tzp && (tzp->z_id != szp->z_id)) 4401 /* Attempt to remove the existing target */ 4402 error = zfs_link_destroy(tdzp, tnm, tzp, tx, 0, NULL); 4403 4404 if (error == 0) { 4405 if (!tzp || (tzp->z_id != szp->z_id)) 4406 error = zfs_link_create(tdzp, tnm, szp, tx, ZRENAMING); 4407 if (error == 0) { 4408 szp->z_pflags |= ZFS_AV_MODIFIED; 4409 4410 error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), 4411 (void *)&szp->z_pflags, sizeof (uint64_t), tx); 4412 ASSERT0(error); 4413 4414 error = zfs_link_destroy(sdzp, snm, szp, tx, 4415 /* Kludge for BSD rename semantics. */ 4416 tzp && tzp->z_id == szp->z_id ? 0: ZRENAMING, NULL); 4417 if (error == 0) { 4418 zfs_log_rename(zilog, tx, TX_RENAME, sdzp, 4419 snm, tdzp, tnm, szp); 4420 4421 /* 4422 * Update path information for the target vnode 4423 */ 4424 vn_renamepath(tdvp, *svpp, tnm, strlen(tnm)); 4425 } else { 4426 /* 4427 * At this point, we have successfully created 4428 * the target name, but have failed to remove 4429 * the source name. Since the create was done 4430 * with the ZRENAMING flag, there are 4431 * complications; for one, the link count is 4432 * wrong. The easiest way to deal with this 4433 * is to remove the newly created target, and 4434 * return the original error. This must 4435 * succeed; fortunately, it is very unlikely to 4436 * fail, since we just created it. 4437 */ 4438 VERIFY3U(zfs_link_destroy(tdzp, tnm, szp, tx, 4439 ZRENAMING, NULL), ==, 0); 4440 } 4441 } 4442 if (error == 0) { 4443 cache_purge(*svpp); 4444 if (*tvpp != NULL) 4445 cache_purge(*tvpp); 4446 cache_purge_negative(tdvp); 4447 } 4448 } 4449 4450 dmu_tx_commit(tx); 4451 4452 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4453 zil_commit(zilog, 0); 4454 4455 unlockout: /* all 4 vnodes are locked, ZFS_ENTER called */ 4456 ZFS_EXIT(zfsvfs); 4457 4458 VOP_UNLOCK(*svpp, 0); 4459 VOP_UNLOCK(sdvp, 0); 4460 #ifdef __NetBSD__ 4461 PNBUF_PUT(snm); 4462 PNBUF_PUT(tnm); 4463 #endif 4464 4465 if (*tvpp != sdvp && *tvpp != *svpp) 4466 if (*tvpp != NULL) 4467 VOP_UNLOCK(*tvpp, 0); 4468 if (tdvp != sdvp && tdvp != *svpp) 4469 if (tdvp != *tvpp) 4470 VOP_UNLOCK(tdvp, 0); 4471 4472 out: 4473 return (error); 4474 } 4475 4476 /* 4477 * Insert the indicated symbolic reference entry into the directory. 4478 * 4479 * IN: dvp - Directory to contain new symbolic link. 4480 * link - Name for new symlink entry. 4481 * vap - Attributes of new entry. 4482 * cr - credentials of caller. 4483 * ct - caller context 4484 * flags - case flags 4485 * 4486 * RETURN: 0 on success, error code on failure. 4487 * 4488 * Timestamps: 4489 * dvp - ctime|mtime updated 4490 */ 4491 /*ARGSUSED*/ 4492 static int 4493 zfs_symlink(vnode_t *dvp, vnode_t **vpp, char *name, vattr_t *vap, char *link, 4494 cred_t *cr, kthread_t *td) 4495 { 4496 znode_t *zp, *dzp = VTOZ(dvp); 4497 dmu_tx_t *tx; 4498 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 4499 zilog_t *zilog; 4500 uint64_t len = strlen(link); 4501 int error; 4502 zfs_acl_ids_t acl_ids; 4503 boolean_t fuid_dirtied; 4504 uint64_t txtype = TX_SYMLINK; 4505 int flags = 0; 4506 4507 ASSERT(vap->va_type == VLNK); 4508 4509 ZFS_ENTER(zfsvfs); 4510 ZFS_VERIFY_ZP(dzp); 4511 zilog = zfsvfs->z_log; 4512 4513 if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 4514 NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 4515 ZFS_EXIT(zfsvfs); 4516 return (SET_ERROR(EILSEQ)); 4517 } 4518 4519 if (len > MAXPATHLEN) { 4520 ZFS_EXIT(zfsvfs); 4521 return (SET_ERROR(ENAMETOOLONG)); 4522 } 4523 4524 if ((error = zfs_acl_ids_create(dzp, 0, 4525 vap, cr, NULL, &acl_ids)) != 0) { 4526 ZFS_EXIT(zfsvfs); 4527 return (error); 4528 } 4529 4530 /* 4531 * Attempt to lock directory; fail if entry already exists. 4532 */ 4533 error = zfs_dirent_lookup(dzp, name, &zp, ZNEW); 4534 if (error) { 4535 zfs_acl_ids_free(&acl_ids); 4536 ZFS_EXIT(zfsvfs); 4537 return (error); 4538 } 4539 4540 if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 4541 zfs_acl_ids_free(&acl_ids); 4542 ZFS_EXIT(zfsvfs); 4543 return (error); 4544 } 4545 4546 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 4547 zfs_acl_ids_free(&acl_ids); 4548 ZFS_EXIT(zfsvfs); 4549 return (SET_ERROR(EDQUOT)); 4550 } 4551 4552 getnewvnode_reserve(1); 4553 tx = dmu_tx_create(zfsvfs->z_os); 4554 fuid_dirtied = zfsvfs->z_fuid_dirty; 4555 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len)); 4556 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 4557 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 4558 ZFS_SA_BASE_ATTR_SIZE + len); 4559 dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 4560 if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 4561 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 4562 acl_ids.z_aclp->z_acl_bytes); 4563 } 4564 if (fuid_dirtied) 4565 zfs_fuid_txhold(zfsvfs, tx); 4566 error = dmu_tx_assign(tx, TXG_WAIT); 4567 if (error) { 4568 zfs_acl_ids_free(&acl_ids); 4569 dmu_tx_abort(tx); 4570 getnewvnode_drop_reserve(); 4571 ZFS_EXIT(zfsvfs); 4572 return (error); 4573 } 4574 4575 /* 4576 * Create a new object for the symlink. 4577 * for version 4 ZPL datsets the symlink will be an SA attribute 4578 */ 4579 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 4580 4581 if (fuid_dirtied) 4582 zfs_fuid_sync(zfsvfs, tx); 4583 4584 if (zp->z_is_sa) 4585 error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs), 4586 link, len, tx); 4587 else 4588 zfs_sa_symlink(zp, link, len, tx); 4589 4590 zp->z_size = len; 4591 (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 4592 &zp->z_size, sizeof (zp->z_size), tx); 4593 /* 4594 * Insert the new object into the directory. 4595 */ 4596 (void) zfs_link_create(dzp, name, zp, tx, ZNEW); 4597 4598 zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); 4599 *vpp = ZTOV(zp); 4600 4601 zfs_acl_ids_free(&acl_ids); 4602 4603 dmu_tx_commit(tx); 4604 4605 getnewvnode_drop_reserve(); 4606 4607 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4608 zil_commit(zilog, 0); 4609 4610 ZFS_EXIT(zfsvfs); 4611 return (error); 4612 } 4613 4614 /* 4615 * Return, in the buffer contained in the provided uio structure, 4616 * the symbolic path referred to by vp. 4617 * 4618 * IN: vp - vnode of symbolic link. 4619 * uio - structure to contain the link path. 4620 * cr - credentials of caller. 4621 * ct - caller context 4622 * 4623 * OUT: uio - structure containing the link path. 4624 * 4625 * RETURN: 0 on success, error code on failure. 4626 * 4627 * Timestamps: 4628 * vp - atime updated 4629 */ 4630 /* ARGSUSED */ 4631 static int 4632 zfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct) 4633 { 4634 znode_t *zp = VTOZ(vp); 4635 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4636 int error; 4637 4638 ZFS_ENTER(zfsvfs); 4639 ZFS_VERIFY_ZP(zp); 4640 4641 if (zp->z_is_sa) 4642 error = sa_lookup_uio(zp->z_sa_hdl, 4643 SA_ZPL_SYMLINK(zfsvfs), uio); 4644 else 4645 error = zfs_sa_readlink(zp, uio); 4646 4647 ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 4648 4649 ZFS_EXIT(zfsvfs); 4650 return (error); 4651 } 4652 4653 /* 4654 * Insert a new entry into directory tdvp referencing svp. 4655 * 4656 * IN: tdvp - Directory to contain new entry. 4657 * svp - vnode of new entry. 4658 * name - name of new entry. 4659 * cr - credentials of caller. 4660 * ct - caller context 4661 * 4662 * RETURN: 0 on success, error code on failure. 4663 * 4664 * Timestamps: 4665 * tdvp - ctime|mtime updated 4666 * svp - ctime updated 4667 */ 4668 /* ARGSUSED */ 4669 static int 4670 zfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr, 4671 caller_context_t *ct, int flags) 4672 { 4673 znode_t *dzp = VTOZ(tdvp); 4674 znode_t *tzp, *szp; 4675 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 4676 zilog_t *zilog; 4677 dmu_tx_t *tx; 4678 int error; 4679 uint64_t parent; 4680 uid_t owner; 4681 4682 ASSERT(tdvp->v_type == VDIR); 4683 4684 ZFS_ENTER(zfsvfs); 4685 ZFS_VERIFY_ZP(dzp); 4686 zilog = zfsvfs->z_log; 4687 4688 /* 4689 * POSIX dictates that we return EPERM here. 4690 * Better choices include ENOTSUP or EISDIR. 4691 */ 4692 if (svp->v_type == VDIR) { 4693 ZFS_EXIT(zfsvfs); 4694 return (SET_ERROR(EPERM)); 4695 } 4696 4697 szp = VTOZ(svp); 4698 ZFS_VERIFY_ZP(szp); 4699 4700 if (szp->z_pflags & (ZFS_APPENDONLY | ZFS_IMMUTABLE | ZFS_READONLY)) { 4701 ZFS_EXIT(zfsvfs); 4702 return (SET_ERROR(EPERM)); 4703 } 4704 4705 /* Prevent links to .zfs/shares files */ 4706 4707 if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 4708 &parent, sizeof (uint64_t))) != 0) { 4709 ZFS_EXIT(zfsvfs); 4710 return (error); 4711 } 4712 if (parent == zfsvfs->z_shares_dir) { 4713 ZFS_EXIT(zfsvfs); 4714 return (SET_ERROR(EPERM)); 4715 } 4716 4717 if (zfsvfs->z_utf8 && u8_validate(name, 4718 strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 4719 ZFS_EXIT(zfsvfs); 4720 return (SET_ERROR(EILSEQ)); 4721 } 4722 4723 /* 4724 * We do not support links between attributes and non-attributes 4725 * because of the potential security risk of creating links 4726 * into "normal" file space in order to circumvent restrictions 4727 * imposed in attribute space. 4728 */ 4729 if ((szp->z_pflags & ZFS_XATTR) != (dzp->z_pflags & ZFS_XATTR)) { 4730 ZFS_EXIT(zfsvfs); 4731 return (SET_ERROR(EINVAL)); 4732 } 4733 4734 4735 owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER); 4736 if (owner != crgetuid(cr) && secpolicy_basic_link(svp, cr) != 0) { 4737 ZFS_EXIT(zfsvfs); 4738 return (SET_ERROR(EPERM)); 4739 } 4740 4741 if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 4742 ZFS_EXIT(zfsvfs); 4743 return (error); 4744 } 4745 4746 /* 4747 * Attempt to lock directory; fail if entry already exists. 4748 */ 4749 error = zfs_dirent_lookup(dzp, name, &tzp, ZNEW); 4750 if (error) { 4751 ZFS_EXIT(zfsvfs); 4752 return (error); 4753 } 4754 4755 tx = dmu_tx_create(zfsvfs->z_os); 4756 dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 4757 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 4758 zfs_sa_upgrade_txholds(tx, szp); 4759 zfs_sa_upgrade_txholds(tx, dzp); 4760 error = dmu_tx_assign(tx, TXG_WAIT); 4761 if (error) { 4762 dmu_tx_abort(tx); 4763 ZFS_EXIT(zfsvfs); 4764 return (error); 4765 } 4766 4767 error = zfs_link_create(dzp, name, szp, tx, 0); 4768 4769 if (error == 0) { 4770 uint64_t txtype = TX_LINK; 4771 zfs_log_link(zilog, tx, txtype, dzp, szp, name); 4772 } 4773 4774 dmu_tx_commit(tx); 4775 4776 if (error == 0) { 4777 vnevent_link(svp, ct); 4778 } 4779 4780 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4781 zil_commit(zilog, 0); 4782 4783 ZFS_EXIT(zfsvfs); 4784 return (error); 4785 } 4786 4787 4788 /*ARGSUSED*/ 4789 void 4790 zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 4791 { 4792 znode_t *zp = VTOZ(vp); 4793 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4794 int error; 4795 4796 rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 4797 if (zp->z_sa_hdl == NULL) { 4798 /* 4799 * The fs has been unmounted, or we did a 4800 * suspend/resume and this file no longer exists. 4801 */ 4802 rw_exit(&zfsvfs->z_teardown_inactive_lock); 4803 vrecycle(vp); 4804 return; 4805 } 4806 4807 if (zp->z_unlinked) { 4808 /* 4809 * Fast path to recycle a vnode of a removed file. 4810 */ 4811 rw_exit(&zfsvfs->z_teardown_inactive_lock); 4812 vrecycle(vp); 4813 return; 4814 } 4815 4816 if (zp->z_atime_dirty && zp->z_unlinked == 0) { 4817 dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); 4818 4819 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 4820 zfs_sa_upgrade_txholds(tx, zp); 4821 error = dmu_tx_assign(tx, TXG_WAIT); 4822 if (error) { 4823 dmu_tx_abort(tx); 4824 } else { 4825 (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs), 4826 (void *)&zp->z_atime, sizeof (zp->z_atime), tx); 4827 zp->z_atime_dirty = 0; 4828 dmu_tx_commit(tx); 4829 } 4830 } 4831 rw_exit(&zfsvfs->z_teardown_inactive_lock); 4832 } 4833 4834 4835 #ifdef __FreeBSD__ 4836 CTASSERT(sizeof(struct zfid_short) <= sizeof(struct fid)); 4837 CTASSERT(sizeof(struct zfid_long) <= sizeof(struct fid)); 4838 #endif 4839 4840 /*ARGSUSED*/ 4841 static int 4842 zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) 4843 { 4844 znode_t *zp = VTOZ(vp); 4845 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4846 uint32_t gen; 4847 uint64_t gen64; 4848 uint64_t object = zp->z_id; 4849 zfid_short_t *zfid; 4850 int size, i, error; 4851 4852 ZFS_ENTER(zfsvfs); 4853 ZFS_VERIFY_ZP(zp); 4854 4855 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), 4856 &gen64, sizeof (uint64_t))) != 0) { 4857 ZFS_EXIT(zfsvfs); 4858 return (error); 4859 } 4860 4861 gen = (uint32_t)gen64; 4862 4863 size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN; 4864 4865 #ifdef illumos 4866 if (fidp->fid_len < size) { 4867 fidp->fid_len = size; 4868 ZFS_EXIT(zfsvfs); 4869 return (SET_ERROR(ENOSPC)); 4870 } 4871 #else 4872 fidp->fid_len = size; 4873 #endif 4874 4875 zfid = (zfid_short_t *)fidp; 4876 4877 zfid->zf_len = size; 4878 4879 for (i = 0; i < sizeof (zfid->zf_object); i++) 4880 zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 4881 4882 /* Must have a non-zero generation number to distinguish from .zfs */ 4883 if (gen == 0) 4884 gen = 1; 4885 for (i = 0; i < sizeof (zfid->zf_gen); i++) 4886 zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); 4887 4888 if (size == LONG_FID_LEN) { 4889 uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); 4890 zfid_long_t *zlfid; 4891 4892 zlfid = (zfid_long_t *)fidp; 4893 4894 for (i = 0; i < sizeof (zlfid->zf_setid); i++) 4895 zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); 4896 4897 /* XXX - this should be the generation number for the objset */ 4898 for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 4899 zlfid->zf_setgen[i] = 0; 4900 } 4901 4902 ZFS_EXIT(zfsvfs); 4903 return (0); 4904 } 4905 4906 static int 4907 zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 4908 caller_context_t *ct) 4909 { 4910 znode_t *zp, *xzp; 4911 zfsvfs_t *zfsvfs; 4912 int error; 4913 4914 switch (cmd) { 4915 case _PC_LINK_MAX: 4916 *valp = INT_MAX; 4917 return (0); 4918 4919 case _PC_FILESIZEBITS: 4920 *valp = 64; 4921 return (0); 4922 #ifdef illumos 4923 case _PC_XATTR_EXISTS: 4924 zp = VTOZ(vp); 4925 zfsvfs = zp->z_zfsvfs; 4926 ZFS_ENTER(zfsvfs); 4927 ZFS_VERIFY_ZP(zp); 4928 *valp = 0; 4929 error = zfs_dirent_lookup(zp, "", &xzp, 4930 ZXATTR | ZEXISTS | ZSHARED); 4931 if (error == 0) { 4932 if (!zfs_dirempty(xzp)) 4933 *valp = 1; 4934 vrele(ZTOV(xzp)); 4935 } else if (error == ENOENT) { 4936 /* 4937 * If there aren't extended attributes, it's the 4938 * same as having zero of them. 4939 */ 4940 error = 0; 4941 } 4942 ZFS_EXIT(zfsvfs); 4943 return (error); 4944 4945 case _PC_SATTR_ENABLED: 4946 case _PC_SATTR_EXISTS: 4947 *valp = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 4948 (vp->v_type == VREG || vp->v_type == VDIR); 4949 return (0); 4950 4951 case _PC_ACCESS_FILTERING: 4952 *valp = vfs_has_feature(vp->v_vfsp, VFSFT_ACCESS_FILTER) && 4953 vp->v_type == VDIR; 4954 return (0); 4955 4956 case _PC_ACL_ENABLED: 4957 *valp = _ACL_ACE_ENABLED; 4958 return (0); 4959 #endif /* illumos */ 4960 case _PC_MIN_HOLE_SIZE: 4961 *valp = (int)SPA_MINBLOCKSIZE; 4962 return (0); 4963 #ifdef illumos 4964 case _PC_TIMESTAMP_RESOLUTION: 4965 /* nanosecond timestamp resolution */ 4966 *valp = 1L; 4967 return (0); 4968 #endif 4969 case _PC_ACL_EXTENDED: 4970 *valp = 0; 4971 return (0); 4972 4973 #ifndef __NetBSD__ 4974 case _PC_ACL_NFS4: 4975 *valp = 1; 4976 return (0); 4977 4978 case _PC_ACL_PATH_MAX: 4979 *valp = ACL_MAX_ENTRIES; 4980 return (0); 4981 #endif 4982 4983 default: 4984 return (EOPNOTSUPP); 4985 } 4986 } 4987 4988 /*ARGSUSED*/ 4989 static int 4990 zfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 4991 caller_context_t *ct) 4992 { 4993 znode_t *zp = VTOZ(vp); 4994 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4995 int error; 4996 boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 4997 4998 ZFS_ENTER(zfsvfs); 4999 ZFS_VERIFY_ZP(zp); 5000 error = zfs_getacl(zp, vsecp, skipaclchk, cr); 5001 ZFS_EXIT(zfsvfs); 5002 5003 return (error); 5004 } 5005 5006 /*ARGSUSED*/ 5007 int 5008 zfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 5009 caller_context_t *ct) 5010 { 5011 znode_t *zp = VTOZ(vp); 5012 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5013 int error; 5014 boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 5015 zilog_t *zilog = zfsvfs->z_log; 5016 5017 ZFS_ENTER(zfsvfs); 5018 ZFS_VERIFY_ZP(zp); 5019 5020 error = zfs_setacl(zp, vsecp, skipaclchk, cr); 5021 5022 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 5023 zil_commit(zilog, 0); 5024 5025 ZFS_EXIT(zfsvfs); 5026 return (error); 5027 } 5028 5029 static int 5030 ioflags(int ioflags) 5031 { 5032 int flags = 0; 5033 5034 if (ioflags & IO_APPEND) 5035 flags |= FAPPEND; 5036 if (ioflags & IO_NDELAY) 5037 flags |= FNONBLOCK; 5038 if (ioflags & IO_SYNC) 5039 flags |= (FSYNC | FDSYNC | FRSYNC); 5040 5041 return (flags); 5042 } 5043 5044 #ifdef __NetBSD__ 5045 5046 static int 5047 zfs_netbsd_open(void *v) 5048 { 5049 struct vop_open_args *ap = v; 5050 5051 return (zfs_open(&ap->a_vp, ap->a_mode, ap->a_cred, NULL)); 5052 } 5053 5054 static int 5055 zfs_netbsd_close(void *v) 5056 { 5057 struct vop_close_args *ap = v; 5058 5059 return (zfs_close(ap->a_vp, ap->a_fflag, 0, 0, ap->a_cred, NULL)); 5060 } 5061 5062 static int 5063 zfs_netbsd_ioctl(void *v) 5064 { 5065 struct vop_ioctl_args *ap = v; 5066 5067 return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data, 5068 ap->a_fflag, ap->a_cred, NULL, NULL)); 5069 } 5070 5071 5072 static int 5073 zfs_netbsd_read(void *v) 5074 { 5075 struct vop_read_args *ap = v; 5076 vnode_t *vp = ap->a_vp; 5077 znode_t *zp = VTOZ(vp); 5078 5079 switch (vp->v_type) { 5080 case VBLK: 5081 case VCHR: 5082 ZFS_ACCESSTIME_STAMP(zp->z_zfsvfs, zp); 5083 return (VOCALL(spec_vnodeop_p, VOFFSET(vop_read), ap)); 5084 case VFIFO: 5085 ZFS_ACCESSTIME_STAMP(zp->z_zfsvfs, zp); 5086 return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_read), ap)); 5087 } 5088 5089 return (zfs_read(vp, ap->a_uio, ioflags(ap->a_ioflag), ap->a_cred, NULL)); 5090 } 5091 5092 static int 5093 zfs_netbsd_write(void *v) 5094 { 5095 struct vop_write_args *ap = v; 5096 vnode_t *vp = ap->a_vp; 5097 5098 switch (vp->v_type) { 5099 case VBLK: 5100 case VCHR: 5101 GOP_MARKUPDATE(vp, GOP_UPDATE_MODIFIED); 5102 return (VOCALL(spec_vnodeop_p, VOFFSET(vop_write), ap)); 5103 case VFIFO: 5104 GOP_MARKUPDATE(vp, GOP_UPDATE_MODIFIED); 5105 return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_write), ap)); 5106 } 5107 5108 return (zfs_write(vp, ap->a_uio, ioflags(ap->a_ioflag), ap->a_cred, NULL)); 5109 } 5110 5111 static int 5112 zfs_netbsd_access(void *v) 5113 { 5114 struct vop_access_args /* { 5115 struct vnode *a_vp; 5116 int a_mode; 5117 kauth_cred_t a_cred; 5118 } */ *ap = v; 5119 struct vnode *vp = ap->a_vp; 5120 int mode = ap->a_mode; 5121 mode_t zfs_mode = 0; 5122 kauth_cred_t cred = ap->a_cred; 5123 int error; 5124 5125 /* 5126 * XXX This is really random, especially the left shift by six, 5127 * and it exists only because of randomness in zfs_unix_to_v4 5128 * and zfs_zaccess_rwx in zfs_acl.c. 5129 */ 5130 if (mode & VREAD) 5131 zfs_mode |= S_IROTH; 5132 if (mode & VWRITE) 5133 zfs_mode |= S_IWOTH; 5134 if (mode & VEXEC) 5135 zfs_mode |= S_IXOTH; 5136 zfs_mode <<= 6; 5137 5138 KASSERT(VOP_ISLOCKED(vp)); 5139 error = zfs_access(vp, zfs_mode, 0, cred, NULL); 5140 5141 /* We expect EACCES as common error. */ 5142 if (error == EPERM) 5143 error = EACCES; 5144 5145 return (error); 5146 } 5147 5148 static int 5149 zfs_netbsd_lookup(void *v) 5150 { 5151 struct vop_lookup_v2_args /* { 5152 struct vnode *a_dvp; 5153 struct vnode **a_vpp; 5154 struct componentname *a_cnp; 5155 } */ *ap = v; 5156 struct vnode *dvp = ap->a_dvp; 5157 struct vnode **vpp = ap->a_vpp; 5158 struct componentname *cnp = ap->a_cnp; 5159 char *nm, short_nm[31]; 5160 int error; 5161 int iswhiteout; 5162 5163 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5164 5165 *vpp = NULL; 5166 5167 /* 5168 * Do an access check before the cache lookup. zfs_lookup does 5169 * an access check too, but it's too scary to contemplate 5170 * injecting our namecache stuff into zfs internals. 5171 * 5172 * XXX Is this the correct access check? 5173 */ 5174 if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred)) != 0) 5175 goto out; 5176 5177 /* 5178 * Check the namecache before entering zfs_lookup. 5179 * cache_lookup does the locking dance for us. 5180 */ 5181 if (cache_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen, 5182 cnp->cn_nameiop, cnp->cn_flags, &iswhiteout, vpp)) { 5183 if (iswhiteout) { 5184 cnp->cn_flags |= ISWHITEOUT; 5185 } 5186 return *vpp == NULL ? ENOENT : 0; 5187 } 5188 5189 /* 5190 * zfs_lookup wants a null-terminated component name, but namei 5191 * gives us a pointer into the full pathname. 5192 */ 5193 ASSERT(cnp->cn_namelen < PATH_MAX - 1); 5194 if (cnp->cn_namelen + 1 > sizeof(short_nm)) 5195 nm = PNBUF_GET(); 5196 else 5197 nm = short_nm; 5198 (void)strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1); 5199 5200 error = zfs_lookup(dvp, nm, vpp, 0, cnp, cnp->cn_nameiop, cnp->cn_cred); 5201 5202 if (nm != short_nm) 5203 PNBUF_PUT(nm); 5204 5205 /* 5206 * Translate errors to match our namei insanity. Also, if the 5207 * caller wants to create an entry here, it's apparently our 5208 * responsibility as lookup to make sure that's permissible. 5209 * Go figure. 5210 */ 5211 if (cnp->cn_flags & ISLASTCN) { 5212 switch (cnp->cn_nameiop) { 5213 case CREATE: 5214 case RENAME: 5215 if (error == ENOENT) { 5216 error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred); 5217 if (error) 5218 break; 5219 error = EJUSTRETURN; 5220 break; 5221 } 5222 break; 5223 case DELETE: 5224 if (error == 0) { 5225 error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred); 5226 if (error) { 5227 VN_RELE(*vpp); 5228 *vpp = NULL; 5229 } 5230 } 5231 break; 5232 } 5233 } 5234 5235 if (error) { 5236 KASSERT(*vpp == NULL); 5237 goto out; 5238 } 5239 KASSERT(*vpp != NULL); 5240 5241 if ((cnp->cn_namelen == 1) && (cnp->cn_nameptr[0] == '.')) { 5242 KASSERT(!(cnp->cn_flags & ISDOTDOT)); 5243 KASSERT(dvp == *vpp); 5244 } else if ((cnp->cn_namelen == 2) && 5245 (cnp->cn_nameptr[0] == '.') && 5246 (cnp->cn_nameptr[1] == '.')) { 5247 KASSERT(cnp->cn_flags & ISDOTDOT); 5248 } else { 5249 KASSERT(!(cnp->cn_flags & ISDOTDOT)); 5250 } 5251 5252 out: 5253 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5254 5255 /* 5256 * Insert name into cache if appropriate. 5257 */ 5258 5259 if (error == 0 || (error == ENOENT && cnp->cn_nameiop != CREATE)) 5260 cache_enter(dvp, *vpp, cnp->cn_nameptr, cnp->cn_namelen, 5261 cnp->cn_flags); 5262 5263 return (error); 5264 } 5265 5266 static int 5267 zfs_netbsd_create(void *v) 5268 { 5269 struct vop_create_v3_args /* { 5270 struct vnode *a_dvp; 5271 struct vnode **a_vpp; 5272 struct componentname *a_cnp; 5273 struct vattr *a_vap; 5274 } */ *ap = v; 5275 struct vnode *dvp = ap->a_dvp; 5276 struct vnode **vpp = ap->a_vpp; 5277 struct componentname *cnp = ap->a_cnp; 5278 struct vattr *vap = ap->a_vap; 5279 char *nm; 5280 int mode; 5281 int error; 5282 5283 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5284 5285 vattr_init_mask(vap); 5286 mode = vap->va_mode & ALLPERMS; 5287 5288 /* ZFS wants a null-terminated name. */ 5289 nm = PNBUF_GET(); 5290 (void)strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1); 5291 5292 /* XXX !EXCL is wrong here... */ 5293 error = zfs_create(dvp, nm, vap, !EXCL, mode, vpp, cnp->cn_cred, NULL); 5294 5295 PNBUF_PUT(nm); 5296 5297 KASSERT((error == 0) == (*vpp != NULL)); 5298 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5299 VOP_UNLOCK(*vpp, 0); 5300 5301 return (error); 5302 } 5303 5304 static int 5305 zfs_netbsd_mknod(void *v) 5306 { 5307 struct vop_mknod_v3_args /* { 5308 struct vnode *a_dvp; 5309 struct vnode **a_vpp; 5310 struct componentname *a_cnp; 5311 struct vattr *a_vap; 5312 } */ *ap = v; 5313 struct vnode *dvp = ap->a_dvp; 5314 struct vnode **vpp = ap->a_vpp; 5315 struct componentname *cnp = ap->a_cnp; 5316 struct vattr *vap = ap->a_vap; 5317 char *nm; 5318 int mode; 5319 int error; 5320 5321 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5322 5323 vattr_init_mask(vap); 5324 mode = vap->va_mode & ALLPERMS; 5325 5326 /* ZFS wants a null-terminated name. */ 5327 nm = PNBUF_GET(); 5328 (void)strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1); 5329 5330 /* XXX !EXCL is wrong here... */ 5331 error = zfs_create(dvp, nm, vap, !EXCL, mode, vpp, cnp->cn_cred, NULL); 5332 5333 PNBUF_PUT(nm); 5334 5335 KASSERT((error == 0) == (*vpp != NULL)); 5336 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5337 VOP_UNLOCK(*vpp, 0); 5338 5339 return (error); 5340 } 5341 5342 static int 5343 zfs_netbsd_remove(void *v) 5344 { 5345 struct vop_remove_v2_args /* { 5346 struct vnode *a_dvp; 5347 struct vnode *a_vp; 5348 struct componentname *a_cnp; 5349 } */ *ap = v; 5350 struct vnode *dvp = ap->a_dvp; 5351 struct vnode *vp = ap->a_vp; 5352 struct componentname *cnp = ap->a_cnp; 5353 char *nm; 5354 int error; 5355 5356 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5357 KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 5358 5359 /* ZFS wants a null-terminated name. */ 5360 nm = PNBUF_GET(); 5361 (void)strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1); 5362 5363 error = zfs_remove(dvp, vp, nm, cnp->cn_cred); 5364 5365 PNBUF_PUT(nm); 5366 vput(vp); 5367 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5368 return (error); 5369 } 5370 5371 static int 5372 zfs_netbsd_mkdir(void *v) 5373 { 5374 struct vop_mkdir_v3_args /* { 5375 struct vnode *a_dvp; 5376 struct vnode **a_vpp; 5377 struct componentname *a_cnp; 5378 struct vattr *a_vap; 5379 } */ *ap = v; 5380 struct vnode *dvp = ap->a_dvp; 5381 struct vnode **vpp = ap->a_vpp; 5382 struct componentname *cnp = ap->a_cnp; 5383 struct vattr *vap = ap->a_vap; 5384 char *nm; 5385 int error; 5386 5387 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5388 5389 vattr_init_mask(vap); 5390 5391 /* ZFS wants a null-terminated name. */ 5392 nm = PNBUF_GET(); 5393 (void)strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1); 5394 5395 error = zfs_mkdir(dvp, nm, vap, vpp, cnp->cn_cred); 5396 5397 PNBUF_PUT(nm); 5398 5399 KASSERT((error == 0) == (*vpp != NULL)); 5400 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5401 VOP_UNLOCK(*vpp, 0); 5402 5403 return (error); 5404 } 5405 5406 static int 5407 zfs_netbsd_rmdir(void *v) 5408 { 5409 struct vop_rmdir_v2_args /* { 5410 struct vnode *a_dvp; 5411 struct vnode *a_vp; 5412 struct componentname *a_cnp; 5413 } */ *ap = v; 5414 struct vnode *dvp = ap->a_dvp; 5415 struct vnode *vp = ap->a_vp; 5416 struct componentname *cnp = ap->a_cnp; 5417 char *nm; 5418 int error; 5419 5420 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5421 KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 5422 5423 /* ZFS wants a null-terminated name. */ 5424 nm = PNBUF_GET(); 5425 (void)strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1); 5426 5427 error = zfs_rmdir(dvp, vp, nm, cnp->cn_cred); 5428 5429 PNBUF_PUT(nm); 5430 vput(vp); 5431 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5432 return error; 5433 } 5434 5435 static int 5436 zfs_netbsd_readdir(void *v) 5437 { 5438 struct vop_readdir_args *ap = v; 5439 5440 return (zfs_readdir(ap->a_vp, ap->a_uio, ap->a_cred, ap->a_eofflag, 5441 ap->a_ncookies, ap->a_cookies)); 5442 } 5443 5444 static int 5445 zfs_netbsd_fsync(void *v) 5446 { 5447 struct vop_fsync_args *ap = v; 5448 5449 return (zfs_fsync(ap->a_vp, ap->a_flags, ap->a_cred, NULL)); 5450 } 5451 5452 static int 5453 zfs_netbsd_getattr(void *v) 5454 { 5455 struct vop_getattr_args *ap = v; 5456 vattr_t *vap = ap->a_vap; 5457 xvattr_t xvap; 5458 u_long fflags = 0; 5459 int error; 5460 5461 xva_init(&xvap); 5462 xvap.xva_vattr = *vap; 5463 xvap.xva_vattr.va_mask |= AT_XVATTR; 5464 5465 /* Convert chflags into ZFS-type flags. */ 5466 /* XXX: what about SF_SETTABLE?. */ 5467 XVA_SET_REQ(&xvap, XAT_IMMUTABLE); 5468 XVA_SET_REQ(&xvap, XAT_APPENDONLY); 5469 XVA_SET_REQ(&xvap, XAT_NOUNLINK); 5470 XVA_SET_REQ(&xvap, XAT_NODUMP); 5471 error = zfs_getattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred, NULL); 5472 if (error != 0) 5473 return (error); 5474 5475 /* Convert ZFS xattr into chflags. */ 5476 #define FLAG_CHECK(fflag, xflag, xfield) do { \ 5477 if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0) \ 5478 fflags |= (fflag); \ 5479 } while (0) 5480 FLAG_CHECK(SF_IMMUTABLE, XAT_IMMUTABLE, 5481 xvap.xva_xoptattrs.xoa_immutable); 5482 FLAG_CHECK(SF_APPEND, XAT_APPENDONLY, 5483 xvap.xva_xoptattrs.xoa_appendonly); 5484 FLAG_CHECK(SF_NOUNLINK, XAT_NOUNLINK, 5485 xvap.xva_xoptattrs.xoa_nounlink); 5486 FLAG_CHECK(UF_NODUMP, XAT_NODUMP, 5487 xvap.xva_xoptattrs.xoa_nodump); 5488 #undef FLAG_CHECK 5489 *vap = xvap.xva_vattr; 5490 vap->va_flags = fflags; 5491 return (0); 5492 } 5493 5494 static int 5495 zfs_netbsd_setattr(void *v) 5496 { 5497 struct vop_setattr_args *ap = v; 5498 vnode_t *vp = ap->a_vp; 5499 vattr_t *vap = ap->a_vap; 5500 cred_t *cred = ap->a_cred; 5501 znode_t *zp = VTOZ(vp); 5502 xvattr_t xvap; 5503 kauth_action_t action; 5504 u_long fflags, sfflags = 0; 5505 uint64_t zflags; 5506 int error, flags = 0; 5507 bool changing_sysflags; 5508 5509 vattr_init_mask(vap); 5510 vap->va_mask &= ~AT_NOSET; 5511 if (ISSET(vap->va_vaflags, VA_UTIMES_NULL)) 5512 flags |= ATTR_UTIME; 5513 5514 xva_init(&xvap); 5515 xvap.xva_vattr = *vap; 5516 5517 zflags = VTOZ(vp)->z_pflags; 5518 5519 if (vap->va_flags != VNOVAL) { 5520 int error; 5521 5522 fflags = vap->va_flags; 5523 if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_NODUMP)) != 0) 5524 return (EOPNOTSUPP); 5525 5526 #define FLAG_CHANGE(fflag, zflag, xflag, xfield) do { \ 5527 if (((fflags & (fflag)) && !(zflags & (zflag))) || \ 5528 ((zflags & (zflag)) && !(fflags & (fflag)))) { \ 5529 XVA_SET_REQ(&xvap, (xflag)); \ 5530 (xfield) = ((fflags & (fflag)) != 0); \ 5531 if (((fflag) & SF_SETTABLE) != 0) \ 5532 sfflags |= (fflag); \ 5533 } \ 5534 } while (0) 5535 /* Convert chflags into ZFS-type flags. */ 5536 /* XXX: what about SF_SETTABLE?. */ 5537 FLAG_CHANGE(SF_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE, 5538 xvap.xva_xoptattrs.xoa_immutable); 5539 FLAG_CHANGE(SF_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY, 5540 xvap.xva_xoptattrs.xoa_appendonly); 5541 FLAG_CHANGE(SF_NOUNLINK, ZFS_NOUNLINK, XAT_NOUNLINK, 5542 xvap.xva_xoptattrs.xoa_nounlink); 5543 FLAG_CHANGE(UF_NODUMP, ZFS_NODUMP, XAT_NODUMP, 5544 xvap.xva_xoptattrs.xoa_nodump); 5545 #undef FLAG_CHANGE 5546 5547 action = KAUTH_VNODE_WRITE_FLAGS; 5548 changing_sysflags = false; 5549 5550 if (zflags & (ZFS_IMMUTABLE|ZFS_APPENDONLY|ZFS_NOUNLINK)) { 5551 action |= KAUTH_VNODE_HAS_SYSFLAGS; 5552 } 5553 if (sfflags != 0) { 5554 action |= KAUTH_VNODE_WRITE_SYSFLAGS; 5555 changing_sysflags = true; 5556 } 5557 5558 error = kauth_authorize_vnode(cred, action, vp, NULL, 5559 genfs_can_chflags(cred, vp->v_type, zp->z_uid, 5560 changing_sysflags)); 5561 if (error) 5562 return error; 5563 } 5564 5565 if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL || 5566 vap->va_birthtime.tv_sec != VNOVAL) { 5567 error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_TIMES, vp, 5568 NULL, genfs_can_chtimes(vp, vap->va_vaflags, zp->z_uid, 5569 cred)); 5570 if (error) 5571 return error; 5572 } 5573 5574 return (zfs_setattr(vp, (vattr_t *)&xvap, flags, cred, NULL)); 5575 } 5576 5577 static int 5578 zfs_netbsd_rename(void *v) 5579 { 5580 struct vop_rename_args /* { 5581 struct vnode *a_fdvp; 5582 struct vnode *a_fvp; 5583 struct componentname *a_fcnp; 5584 struct vnode *a_tdvp; 5585 struct vnode *a_tvp; 5586 struct componentname *a_tcnp; 5587 } */ *ap = v; 5588 vnode_t *fdvp = ap->a_fdvp; 5589 vnode_t *fvp = ap->a_fvp; 5590 struct componentname *fcnp = ap->a_fcnp; 5591 vnode_t *tdvp = ap->a_tdvp; 5592 vnode_t *tvp = ap->a_tvp; 5593 struct componentname *tcnp = ap->a_tcnp; 5594 kauth_cred_t cred; 5595 int error; 5596 5597 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 5598 KASSERT(tvp == NULL || VOP_ISLOCKED(tvp) == LK_EXCLUSIVE); 5599 KASSERT(fdvp->v_type == VDIR); 5600 KASSERT(tdvp->v_type == VDIR); 5601 5602 cred = fcnp->cn_cred; 5603 5604 /* 5605 * XXX Want a better equality test. `tcnp->cn_cred == cred' 5606 * hoses p2k because puffs transmits the creds separately and 5607 * allocates distinct but equivalent structures for them. 5608 */ 5609 KASSERT(kauth_cred_uidmatch(cred, tcnp->cn_cred)); 5610 5611 /* 5612 * Drop the insane locks. 5613 */ 5614 VOP_UNLOCK(tdvp, 0); 5615 if (tvp != NULL && tvp != tdvp) 5616 VOP_UNLOCK(tvp, 0); 5617 5618 /* 5619 * Release the source and target nodes; zfs_rename will look 5620 * them up again once the locking situation is sane. 5621 */ 5622 VN_RELE(fvp); 5623 if (tvp != NULL) 5624 VN_RELE(tvp); 5625 fvp = NULL; 5626 tvp = NULL; 5627 5628 /* 5629 * Do the rename ZFSly. 5630 */ 5631 error = zfs_rename(fdvp, &fvp, fcnp, tdvp, &tvp, tcnp, cred); 5632 5633 /* 5634 * Release the directories now too, because the VOP_RENAME 5635 * protocol is insane. 5636 */ 5637 5638 VN_RELE(fdvp); 5639 VN_RELE(tdvp); 5640 if (fvp != NULL) 5641 VN_RELE(fvp); 5642 if (tvp != NULL) 5643 VN_RELE(tvp); 5644 5645 return (error); 5646 } 5647 5648 static int 5649 zfs_netbsd_symlink(void *v) 5650 { 5651 struct vop_symlink_v3_args /* { 5652 struct vnode *a_dvp; 5653 struct vnode **a_vpp; 5654 struct componentname *a_cnp; 5655 struct vattr *a_vap; 5656 char *a_target; 5657 } */ *ap = v; 5658 struct vnode *dvp = ap->a_dvp; 5659 struct vnode **vpp = ap->a_vpp; 5660 struct componentname *cnp = ap->a_cnp; 5661 struct vattr *vap = ap->a_vap; 5662 char *target = ap->a_target; 5663 char *nm; 5664 int error; 5665 5666 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5667 5668 vap->va_type = VLNK; /* Netbsd: Syscall only sets va_mode. */ 5669 vattr_init_mask(vap); 5670 5671 /* ZFS wants a null-terminated name. */ 5672 nm = PNBUF_GET(); 5673 (void)strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1); 5674 5675 error = zfs_symlink(dvp, vpp, nm, vap, target, cnp->cn_cred, 0); 5676 5677 PNBUF_PUT(nm); 5678 5679 KASSERT((error == 0) == (*vpp != NULL)); 5680 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5681 VOP_UNLOCK(*vpp, 0); 5682 5683 return (error); 5684 } 5685 5686 static int 5687 zfs_netbsd_readlink(void *v) 5688 { 5689 struct vop_readlink_args *ap = v; 5690 5691 return (zfs_readlink(ap->a_vp, ap->a_uio, ap->a_cred, NULL)); 5692 } 5693 5694 static int 5695 zfs_netbsd_link(void *v) 5696 { 5697 struct vop_link_v2_args /* { 5698 struct vnode *a_dvp; 5699 struct vnode *a_vp; 5700 struct componentname *a_cnp; 5701 } */ *ap = v; 5702 struct vnode *dvp = ap->a_dvp; 5703 struct vnode *vp = ap->a_vp; 5704 struct componentname *cnp = ap->a_cnp; 5705 char *nm; 5706 int error; 5707 5708 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5709 5710 /* ZFS wants a null-terminated name. */ 5711 nm = PNBUF_GET(); 5712 (void)strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1); 5713 5714 vn_lock(vp, LK_EXCLUSIVE); 5715 error = zfs_link(dvp, vp, nm, cnp->cn_cred, 5716 NULL, 0); 5717 5718 PNBUF_PUT(nm); 5719 VOP_UNLOCK(vp, 0); 5720 return error; 5721 } 5722 5723 static int 5724 zfs_netbsd_inactive(void *v) 5725 { 5726 struct vop_inactive_v2_args *ap = v; 5727 vnode_t *vp = ap->a_vp; 5728 znode_t *zp = VTOZ(vp); 5729 5730 /* 5731 * NetBSD: nothing to do here, other than indicate if the 5732 * vnode should be reclaimed. No need to lock, if we race 5733 * vrele() will call us again. 5734 */ 5735 *ap->a_recycle = (zp->z_unlinked != 0); 5736 5737 return (0); 5738 } 5739 5740 static int 5741 zfs_netbsd_reclaim(void *v) 5742 { 5743 struct vop_reclaim_v2_args /* { 5744 struct vnode *a_vp; 5745 } */ *ap = v; 5746 struct vnode *vp = ap->a_vp; 5747 znode_t *zp; 5748 zfsvfs_t *zfsvfs; 5749 int error; 5750 5751 VOP_UNLOCK(vp, 0); 5752 zp = VTOZ(vp); 5753 zfsvfs = zp->z_zfsvfs; 5754 5755 KASSERTMSG(!vn_has_cached_data(vp), "vp %p", vp); 5756 5757 rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 5758 5759 /* 5760 * Process a deferred atime update. 5761 */ 5762 if (zp->z_atime_dirty && zp->z_unlinked == 0) { 5763 dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); 5764 5765 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 5766 zfs_sa_upgrade_txholds(tx, zp); 5767 error = dmu_tx_assign(tx, TXG_WAIT); 5768 if (error) { 5769 dmu_tx_abort(tx); 5770 } else { 5771 (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs), 5772 (void *)&zp->z_atime, sizeof (zp->z_atime), tx); 5773 zp->z_atime_dirty = 0; 5774 dmu_tx_commit(tx); 5775 } 5776 } 5777 5778 if (zfsvfs->z_log) 5779 zil_commit(zfsvfs->z_log, zp->z_id); 5780 5781 if (zp->z_sa_hdl == NULL) 5782 zfs_znode_free(zp); 5783 else 5784 zfs_zinactive(zp); 5785 rw_exit(&zfsvfs->z_teardown_inactive_lock); 5786 return 0; 5787 } 5788 5789 static int 5790 zfs_netbsd_fid(void *v) 5791 { 5792 struct vop_fid_args *ap = v; 5793 5794 return (zfs_fid(ap->a_vp, (void *)ap->a_fid, NULL)); 5795 } 5796 5797 static int 5798 zfs_netbsd_pathconf(void *v) 5799 { 5800 struct vop_pathconf_args *ap = v; 5801 ulong_t val; 5802 int error; 5803 5804 error = zfs_pathconf(ap->a_vp, ap->a_name, &val, curthread->l_cred, NULL); 5805 if (error == 0) 5806 *ap->a_retval = val; 5807 else if (error == EOPNOTSUPP) { 5808 switch (ap->a_name) { 5809 case _PC_NAME_MAX: 5810 *ap->a_retval = NAME_MAX; 5811 return (0); 5812 case _PC_PATH_MAX: 5813 *ap->a_retval = PATH_MAX; 5814 return (0); 5815 case _PC_LINK_MAX: 5816 *ap->a_retval = LINK_MAX; 5817 return (0); 5818 case _PC_MAX_CANON: 5819 *ap->a_retval = MAX_CANON; 5820 return (0); 5821 case _PC_MAX_INPUT: 5822 *ap->a_retval = MAX_INPUT; 5823 return (0); 5824 case _PC_PIPE_BUF: 5825 *ap->a_retval = PIPE_BUF; 5826 return (0); 5827 case _PC_CHOWN_RESTRICTED: 5828 *ap->a_retval = 1; 5829 return (0); 5830 case _PC_NO_TRUNC: 5831 *ap->a_retval = 1; 5832 return (0); 5833 case _PC_VDISABLE: 5834 *ap->a_retval = _POSIX_VDISABLE; 5835 return (0); 5836 default: 5837 return (EINVAL); 5838 } 5839 /* NOTREACHED */ 5840 } 5841 return (error); 5842 } 5843 5844 static int 5845 zfs_netbsd_advlock(void *v) 5846 { 5847 struct vop_advlock_args /* { 5848 struct vnode *a_vp; 5849 void *a_id; 5850 int a_op; 5851 struct flock *a_fl; 5852 int a_flags; 5853 } */ *ap = v; 5854 struct vnode *vp; 5855 struct znode *zp; 5856 struct zfsvfs *zfsvfs; 5857 int error; 5858 5859 vp = ap->a_vp; 5860 zp = VTOZ(vp); 5861 zfsvfs = zp->z_zfsvfs; 5862 5863 ZFS_ENTER(zfsvfs); 5864 ZFS_VERIFY_ZP(zp); 5865 error = lf_advlock(ap, &zp->z_lockf, zp->z_size); 5866 ZFS_EXIT(zfsvfs); 5867 5868 return error; 5869 } 5870 5871 static int 5872 zfs_netbsd_getpages(void *v) 5873 { 5874 struct vop_getpages_args /* { 5875 struct vnode *a_vp; 5876 voff_t a_offset; 5877 struct vm_page **a_m; 5878 int *a_count; 5879 int a_centeridx; 5880 vm_prot_t a_access_type; 5881 int a_advice; 5882 int a_flags; 5883 } */ * const ap = v; 5884 5885 vnode_t *const vp = ap->a_vp; 5886 off_t offset = ap->a_offset + (ap->a_centeridx << PAGE_SHIFT); 5887 const int flags = ap->a_flags; 5888 const bool async = (flags & PGO_SYNCIO) == 0; 5889 const bool memwrite = (ap->a_access_type & VM_PROT_WRITE) != 0; 5890 5891 struct uvm_object * const uobj = &vp->v_uobj; 5892 kmutex_t * const mtx = uobj->vmobjlock; 5893 znode_t *zp = VTOZ(vp); 5894 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5895 struct vm_page *pg; 5896 caddr_t va; 5897 int npages, found, err = 0; 5898 5899 if (flags & PGO_LOCKED) { 5900 *ap->a_count = 0; 5901 ap->a_m[ap->a_centeridx] = NULL; 5902 return EBUSY; 5903 } 5904 mutex_exit(mtx); 5905 5906 if (async) { 5907 return 0; 5908 } 5909 if (*ap->a_count != 1) { 5910 return EBUSY; 5911 } 5912 5913 ZFS_ENTER(zfsvfs); 5914 ZFS_VERIFY_ZP(zp); 5915 5916 mutex_enter(mtx); 5917 npages = 1; 5918 pg = NULL; 5919 uvn_findpages(uobj, offset, &npages, &pg, UFP_ALL); 5920 5921 if (pg->flags & PG_FAKE) { 5922 mutex_exit(mtx); 5923 5924 va = zfs_map_page(pg, S_WRITE); 5925 err = dmu_read(zfsvfs->z_os, zp->z_id, offset, PAGE_SIZE, 5926 va, DMU_READ_PREFETCH); 5927 zfs_unmap_page(pg, va); 5928 5929 mutex_enter(mtx); 5930 pg->flags &= ~(PG_FAKE); 5931 pmap_clear_modify(pg); 5932 } 5933 5934 if (memwrite) { 5935 if ((vp->v_iflag & VI_ONWORKLST) == 0) { 5936 vn_syncer_add_to_worklist(vp, filedelay); 5937 } 5938 if ((vp->v_iflag & (VI_WRMAP|VI_WRMAPDIRTY)) == VI_WRMAP) { 5939 vp->v_iflag |= VI_WRMAPDIRTY; 5940 } 5941 } 5942 mutex_exit(mtx); 5943 ap->a_m[ap->a_centeridx] = pg; 5944 5945 ZFS_EXIT(zfsvfs); 5946 5947 return (err); 5948 } 5949 5950 static int 5951 zfs_putapage(vnode_t *vp, page_t **pp, int count, int flags) 5952 { 5953 znode_t *zp = VTOZ(vp); 5954 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5955 dmu_tx_t *tx; 5956 voff_t off, koff; 5957 voff_t len, klen; 5958 int err; 5959 5960 bool async = (flags & PGO_SYNCIO) == 0; 5961 bool *cleanedp; 5962 struct uvm_object *uobj = &vp->v_uobj; 5963 kmutex_t *mtx = uobj->vmobjlock; 5964 5965 if (zp->z_sa_hdl == NULL) { 5966 err = 0; 5967 goto out_unbusy; 5968 } 5969 5970 off = pp[0]->offset; 5971 len = count * PAGESIZE; 5972 KASSERT(off + len <= round_page(zp->z_size)); 5973 5974 if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 5975 zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 5976 err = SET_ERROR(EDQUOT); 5977 goto out; 5978 } 5979 tx = dmu_tx_create(zfsvfs->z_os); 5980 dmu_tx_hold_write(tx, zp->z_id, off, len); 5981 5982 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 5983 zfs_sa_upgrade_txholds(tx, zp); 5984 err = dmu_tx_assign(tx, TXG_WAIT); 5985 if (err != 0) { 5986 dmu_tx_abort(tx); 5987 goto out; 5988 } 5989 5990 if (zp->z_blksz <= PAGESIZE) { 5991 KASSERTMSG(count == 1, "vp %p pp %p count %d", vp, pp, count); 5992 caddr_t va = zfs_map_page(*pp, S_READ); 5993 ASSERT3U(len, <=, PAGESIZE); 5994 dmu_write(zfsvfs->z_os, zp->z_id, off, len, va, tx); 5995 zfs_unmap_page(*pp, va); 5996 } else { 5997 err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, pp, tx); 5998 } 5999 cleanedp = tsd_get(zfs_putpage_key); 6000 *cleanedp = true; 6001 6002 if (err == 0) { 6003 uint64_t mtime[2], ctime[2]; 6004 sa_bulk_attr_t bulk[3]; 6005 int count = 0; 6006 6007 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 6008 &mtime, 16); 6009 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 6010 &ctime, 16); 6011 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 6012 &zp->z_pflags, 8); 6013 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 6014 B_TRUE); 6015 err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 6016 ASSERT0(err); 6017 zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0); 6018 } 6019 dmu_tx_commit(tx); 6020 6021 out_unbusy: 6022 mutex_enter(mtx); 6023 mutex_enter(&uvm_pageqlock); 6024 uvm_page_unbusy(pp, count); 6025 mutex_exit(&uvm_pageqlock); 6026 mutex_exit(mtx); 6027 6028 out: 6029 return (err); 6030 } 6031 6032 static void 6033 zfs_netbsd_gop_markupdate(vnode_t *vp, int flags) 6034 { 6035 znode_t *zp = VTOZ(vp); 6036 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 6037 dmu_tx_t *tx; 6038 sa_bulk_attr_t bulk[2]; 6039 uint64_t mtime[2], ctime[2]; 6040 int count = 0, err; 6041 6042 KASSERT(flags == GOP_UPDATE_MODIFIED); 6043 6044 tx = dmu_tx_create(zfsvfs->z_os); 6045 err = dmu_tx_assign(tx, TXG_WAIT); 6046 if (err != 0) { 6047 dmu_tx_abort(tx); 6048 return; 6049 } 6050 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 6051 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 6052 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, B_TRUE); 6053 dmu_tx_commit(tx); 6054 } 6055 6056 static int 6057 zfs_netbsd_putpages(void *v) 6058 { 6059 struct vop_putpages_args /* { 6060 struct vnode *a_vp; 6061 voff_t a_offlo; 6062 voff_t a_offhi; 6063 int a_flags; 6064 } */ * const ap = v; 6065 6066 struct vnode *vp = ap->a_vp; 6067 voff_t offlo = ap->a_offlo; 6068 voff_t offhi = ap->a_offhi; 6069 int flags = ap->a_flags; 6070 6071 znode_t *zp = VTOZ(vp); 6072 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 6073 rl_t *rl = NULL; 6074 uint64_t len; 6075 int error; 6076 bool cleaned = false; 6077 6078 bool async = (flags & PGO_SYNCIO) == 0; 6079 bool cleaning = (flags & PGO_CLEANIT) != 0; 6080 6081 if (cleaning) { 6082 ASSERT((offlo & PAGE_MASK) == 0 && (offhi & PAGE_MASK) == 0); 6083 ASSERT(offlo < offhi || offhi == 0); 6084 if (offhi == 0) 6085 len = UINT64_MAX; 6086 else 6087 len = offhi - offlo; 6088 mutex_exit(vp->v_interlock); 6089 if (curlwp == uvm.pagedaemon_lwp) { 6090 error = fstrans_start_nowait(vp->v_mount); 6091 if (error) 6092 return error; 6093 } else { 6094 vfs_t *mp = vp->v_mount; 6095 fstrans_start(mp); 6096 if (vp->v_mount != mp) { 6097 fstrans_done(mp); 6098 ASSERT(!vn_has_cached_data(vp)); 6099 return 0; 6100 } 6101 } 6102 /* 6103 * Cannot use ZFS_ENTER() here as it returns with error 6104 * if z_unmounted. The next statement is equivalent. 6105 */ 6106 rrm_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG); 6107 6108 rl = zfs_range_lock(zp, offlo, len, RL_WRITER); 6109 mutex_enter(vp->v_interlock); 6110 tsd_set(zfs_putpage_key, &cleaned); 6111 } 6112 error = genfs_putpages(v); 6113 if (cleaning) { 6114 tsd_set(zfs_putpage_key, NULL); 6115 zfs_range_unlock(rl); 6116 6117 /* 6118 * Only zil_commit() if we cleaned something. This avoids 6119 * deadlock if we're called from zfs_netbsd_setsize(). 6120 */ 6121 6122 if (cleaned) 6123 if (!async || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 6124 zil_commit(zfsvfs->z_log, zp->z_id); 6125 ZFS_EXIT(zfsvfs); 6126 fstrans_done(vp->v_mount); 6127 } 6128 return error; 6129 } 6130 6131 /* 6132 * Restrict the putpages range to the ZFS block containing the offset. 6133 */ 6134 static void 6135 zfs_netbsd_gop_putrange(struct vnode *vp, off_t off, off_t *lop, off_t *hip) 6136 { 6137 znode_t *zp = VTOZ(vp); 6138 6139 *lop = trunc_page(rounddown2(off, zp->z_blksz)); 6140 *hip = round_page(*lop + zp->z_blksz); 6141 } 6142 6143 void 6144 zfs_netbsd_setsize(vnode_t *vp, off_t size) 6145 { 6146 struct uvm_object *uobj = &vp->v_uobj; 6147 kmutex_t *mtx = uobj->vmobjlock; 6148 page_t *pg; 6149 int count, pgoff; 6150 caddr_t va; 6151 off_t tsize; 6152 6153 uvm_vnp_setsize(vp, size); 6154 if (!vn_has_cached_data(vp)) 6155 return; 6156 6157 tsize = trunc_page(size); 6158 if (tsize == size) 6159 return; 6160 6161 /* 6162 * If there's a partial page, we need to zero the tail. 6163 */ 6164 6165 mutex_enter(mtx); 6166 count = 1; 6167 pg = NULL; 6168 if (uvn_findpages(uobj, tsize, &count, &pg, UFP_NOALLOC)) { 6169 va = zfs_map_page(pg, S_WRITE); 6170 pgoff = size - tsize; 6171 memset(va + pgoff, 0, PAGESIZE - pgoff); 6172 zfs_unmap_page(pg, va); 6173 uvm_page_unbusy(&pg, 1); 6174 } 6175 6176 mutex_exit(mtx); 6177 } 6178 6179 static int 6180 zfs_netbsd_print(void *v) 6181 { 6182 struct vop_print_args /* { 6183 struct vnode *a_vp; 6184 } */ *ap = v; 6185 vnode_t *vp; 6186 znode_t *zp; 6187 6188 vp = ap->a_vp; 6189 zp = VTOZ(vp); 6190 6191 printf("\tino %" PRIu64 " size %" PRIu64 "\n", 6192 zp->z_id, zp->z_size); 6193 return 0; 6194 } 6195 6196 const struct genfs_ops zfs_genfsops = { 6197 .gop_write = zfs_putapage, 6198 .gop_markupdate = zfs_netbsd_gop_markupdate, 6199 .gop_putrange = zfs_netbsd_gop_putrange, 6200 }; 6201 6202 #define zfs_netbsd_lock genfs_lock 6203 #define zfs_netbsd_unlock genfs_unlock 6204 #define zfs_netbsd_islocked genfs_islocked 6205 #define zfs_netbsd_seek genfs_seek 6206 #define zfs_netbsd_mmap genfs_mmap 6207 #define zfs_netbsd_fcntl genfs_fcntl 6208 6209 int (**zfs_vnodeop_p)(void *); 6210 const struct vnodeopv_entry_desc zfs_vnodeop_entries[] = { 6211 { &vop_default_desc, vn_default_error }, 6212 { &vop_lookup_desc, zfs_netbsd_lookup }, 6213 { &vop_create_desc, zfs_netbsd_create }, 6214 { &vop_mknod_desc, zfs_netbsd_mknod }, 6215 { &vop_open_desc, zfs_netbsd_open }, 6216 { &vop_close_desc, zfs_netbsd_close }, 6217 { &vop_access_desc, zfs_netbsd_access }, 6218 { &vop_getattr_desc, zfs_netbsd_getattr }, 6219 { &vop_setattr_desc, zfs_netbsd_setattr }, 6220 { &vop_read_desc, zfs_netbsd_read }, 6221 { &vop_write_desc, zfs_netbsd_write }, 6222 { &vop_ioctl_desc, zfs_netbsd_ioctl }, 6223 { &vop_fsync_desc, zfs_netbsd_fsync }, 6224 { &vop_remove_desc, zfs_netbsd_remove }, 6225 { &vop_link_desc, zfs_netbsd_link }, 6226 { &vop_lock_desc, zfs_netbsd_lock }, 6227 { &vop_unlock_desc, zfs_netbsd_unlock }, 6228 { &vop_rename_desc, zfs_netbsd_rename }, 6229 { &vop_mkdir_desc, zfs_netbsd_mkdir }, 6230 { &vop_rmdir_desc, zfs_netbsd_rmdir }, 6231 { &vop_symlink_desc, zfs_netbsd_symlink }, 6232 { &vop_readdir_desc, zfs_netbsd_readdir }, 6233 { &vop_readlink_desc, zfs_netbsd_readlink }, 6234 { &vop_inactive_desc, zfs_netbsd_inactive }, 6235 { &vop_reclaim_desc, zfs_netbsd_reclaim }, 6236 { &vop_pathconf_desc, zfs_netbsd_pathconf }, 6237 { &vop_seek_desc, zfs_netbsd_seek }, 6238 { &vop_getpages_desc, zfs_netbsd_getpages }, 6239 { &vop_putpages_desc, zfs_netbsd_putpages }, 6240 { &vop_mmap_desc, zfs_netbsd_mmap }, 6241 { &vop_islocked_desc, zfs_netbsd_islocked }, 6242 { &vop_advlock_desc, zfs_netbsd_advlock }, 6243 { &vop_print_desc, zfs_netbsd_print }, 6244 { &vop_fcntl_desc, zfs_netbsd_fcntl }, 6245 { NULL, NULL } 6246 }; 6247 6248 const struct vnodeopv_desc zfs_vnodeop_opv_desc = 6249 { &zfs_vnodeop_p, zfs_vnodeop_entries }; 6250 6251 int (**zfs_specop_p)(void *); 6252 const struct vnodeopv_entry_desc zfs_specop_entries[] = { 6253 { &vop_default_desc, vn_default_error }, 6254 { &vop_lookup_desc, spec_lookup }, 6255 { &vop_create_desc, spec_create }, 6256 { &vop_mknod_desc, spec_mknod }, 6257 { &vop_open_desc, spec_open }, 6258 { &vop_close_desc, spec_close }, 6259 { &vop_access_desc, zfs_netbsd_access }, 6260 { &vop_getattr_desc, zfs_netbsd_getattr }, 6261 { &vop_setattr_desc, zfs_netbsd_setattr }, 6262 { &vop_read_desc, /**/zfs_netbsd_read }, 6263 { &vop_write_desc, /**/zfs_netbsd_write }, 6264 { &vop_ioctl_desc, spec_ioctl }, 6265 { &vop_fsync_desc, zfs_netbsd_fsync }, 6266 { &vop_remove_desc, spec_remove }, 6267 { &vop_link_desc, spec_link }, 6268 { &vop_lock_desc, zfs_netbsd_lock }, 6269 { &vop_unlock_desc, zfs_netbsd_unlock }, 6270 { &vop_rename_desc, spec_rename }, 6271 { &vop_mkdir_desc, spec_mkdir }, 6272 { &vop_rmdir_desc, spec_rmdir }, 6273 { &vop_symlink_desc, spec_symlink }, 6274 { &vop_readdir_desc, spec_readdir }, 6275 { &vop_readlink_desc, spec_readlink }, 6276 { &vop_inactive_desc, zfs_netbsd_inactive }, 6277 { &vop_reclaim_desc, zfs_netbsd_reclaim }, 6278 { &vop_pathconf_desc, spec_pathconf }, 6279 { &vop_seek_desc, spec_seek }, 6280 { &vop_getpages_desc, spec_getpages }, 6281 { &vop_putpages_desc, spec_putpages }, 6282 { &vop_mmap_desc, spec_mmap }, 6283 { &vop_islocked_desc, zfs_netbsd_islocked }, 6284 { &vop_advlock_desc, spec_advlock }, 6285 { &vop_print_desc, zfs_netbsd_print }, 6286 { &vop_fcntl_desc, zfs_netbsd_fcntl }, 6287 { NULL, NULL } 6288 }; 6289 6290 const struct vnodeopv_desc zfs_specop_opv_desc = 6291 { &zfs_specop_p, zfs_specop_entries }; 6292 6293 int (**zfs_fifoop_p)(void *); 6294 const struct vnodeopv_entry_desc zfs_fifoop_entries[] = { 6295 { &vop_default_desc, vn_default_error }, 6296 { &vop_lookup_desc, vn_fifo_bypass }, 6297 { &vop_create_desc, vn_fifo_bypass }, 6298 { &vop_mknod_desc, vn_fifo_bypass }, 6299 { &vop_open_desc, vn_fifo_bypass }, 6300 { &vop_close_desc, vn_fifo_bypass }, 6301 { &vop_access_desc, zfs_netbsd_access }, 6302 { &vop_getattr_desc, zfs_netbsd_getattr }, 6303 { &vop_setattr_desc, zfs_netbsd_setattr }, 6304 { &vop_read_desc, /**/zfs_netbsd_read }, 6305 { &vop_write_desc, /**/zfs_netbsd_write }, 6306 { &vop_ioctl_desc, vn_fifo_bypass }, 6307 { &vop_fsync_desc, zfs_netbsd_fsync }, 6308 { &vop_remove_desc, vn_fifo_bypass }, 6309 { &vop_link_desc, vn_fifo_bypass }, 6310 { &vop_lock_desc, zfs_netbsd_lock }, 6311 { &vop_unlock_desc, zfs_netbsd_unlock }, 6312 { &vop_rename_desc, vn_fifo_bypass }, 6313 { &vop_mkdir_desc, vn_fifo_bypass }, 6314 { &vop_rmdir_desc, vn_fifo_bypass }, 6315 { &vop_symlink_desc, vn_fifo_bypass }, 6316 { &vop_readdir_desc, vn_fifo_bypass }, 6317 { &vop_readlink_desc, vn_fifo_bypass }, 6318 { &vop_inactive_desc, zfs_netbsd_inactive }, 6319 { &vop_reclaim_desc, zfs_netbsd_reclaim }, 6320 { &vop_pathconf_desc, vn_fifo_bypass }, 6321 { &vop_seek_desc, vn_fifo_bypass }, 6322 { &vop_putpages_desc, vn_fifo_bypass }, 6323 { &vop_mmap_desc, vn_fifo_bypass }, 6324 { &vop_islocked_desc, zfs_netbsd_islocked }, 6325 { &vop_advlock_desc, vn_fifo_bypass }, 6326 { &vop_print_desc, zfs_netbsd_print }, 6327 { &vop_fcntl_desc, zfs_netbsd_fcntl }, 6328 { NULL, NULL } 6329 }; 6330 6331 const struct vnodeopv_desc zfs_fifoop_opv_desc = 6332 { &zfs_fifoop_p, zfs_fifoop_entries }; 6333 6334 #endif /* __NetBSD__ */ 6335