1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2012, 2015 by Delphix. All rights reserved. 24 * Copyright 2014 Nexenta Systems, Inc. All rights reserved. 25 * Copyright (c) 2014 Integros [integros.com] 26 */ 27 28 /* Portions Copyright 2007 Jeremy Teo */ 29 /* Portions Copyright 2010 Robert Milkowski */ 30 31 #include <sys/types.h> 32 #include <sys/param.h> 33 #include <sys/time.h> 34 #include <sys/systm.h> 35 #include <sys/sysmacros.h> 36 #include <sys/resource.h> 37 #include <sys/vfs.h> 38 #include <sys/vm.h> 39 #include <sys/vnode.h> 40 #include <sys/file.h> 41 #include <sys/stat.h> 42 #include <sys/kmem.h> 43 #include <sys/taskq.h> 44 #include <sys/uio.h> 45 #include <sys/atomic.h> 46 #include <sys/namei.h> 47 #include <sys/mman.h> 48 #include <sys/cmn_err.h> 49 #include <sys/errno.h> 50 #include <sys/unistd.h> 51 #include <sys/zfs_dir.h> 52 #include <sys/zfs_ioctl.h> 53 #include <sys/fs/zfs.h> 54 #include <sys/dmu.h> 55 #include <sys/dmu_objset.h> 56 #include <sys/spa.h> 57 #include <sys/txg.h> 58 #include <sys/dbuf.h> 59 #include <sys/zap.h> 60 #include <sys/sa.h> 61 #include <sys/dirent.h> 62 #include <sys/policy.h> 63 #include <sys/sunddi.h> 64 #include <sys/filio.h> 65 #include <sys/sid.h> 66 #include <sys/zfs_ctldir.h> 67 #include <sys/zfs_fuid.h> 68 #include <sys/zfs_sa.h> 69 #include <sys/dnlc.h> 70 #include <sys/zfs_rlock.h> 71 #include <sys/buf.h> 72 #include <sys/sched.h> 73 #include <sys/acl.h> 74 #include <sys/extdirent.h> 75 76 #ifdef __FreeBSD__ 77 #include <sys/kidmap.h> 78 #include <sys/bio.h> 79 #include <vm/vm_param.h> 80 #endif 81 82 #ifdef __NetBSD__ 83 #include <dev/mm.h> 84 #include <miscfs/fifofs/fifo.h> 85 #include <miscfs/genfs/genfs.h> 86 #include <miscfs/genfs/genfs_node.h> 87 #include <uvm/uvm_extern.h> 88 #include <sys/fstrans.h> 89 #include <sys/malloc.h> 90 91 uint_t zfs_putpage_key; 92 #endif 93 94 /* 95 * Programming rules. 96 * 97 * Each vnode op performs some logical unit of work. To do this, the ZPL must 98 * properly lock its in-core state, create a DMU transaction, do the work, 99 * record this work in the intent log (ZIL), commit the DMU transaction, 100 * and wait for the intent log to commit if it is a synchronous operation. 101 * Moreover, the vnode ops must work in both normal and log replay context. 102 * The ordering of events is important to avoid deadlocks and references 103 * to freed memory. The example below illustrates the following Big Rules: 104 * 105 * (1) A check must be made in each zfs thread for a mounted file system. 106 * This is done avoiding races using ZFS_ENTER(zfsvfs). 107 * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes 108 * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros 109 * can return EIO from the calling function. 110 * 111 * (2) VN_RELE() should always be the last thing except for zil_commit() 112 * (if necessary) and ZFS_EXIT(). This is for 3 reasons: 113 * First, if it's the last reference, the vnode/znode 114 * can be freed, so the zp may point to freed memory. Second, the last 115 * reference will call zfs_zinactive(), which may induce a lot of work -- 116 * pushing cached pages (which acquires range locks) and syncing out 117 * cached atime changes. Third, zfs_zinactive() may require a new tx, 118 * which could deadlock the system if you were already holding one. 119 * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC(). 120 * 121 * (3) All range locks must be grabbed before calling dmu_tx_assign(), 122 * as they can span dmu_tx_assign() calls. 123 * 124 * (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to 125 * dmu_tx_assign(). This is critical because we don't want to block 126 * while holding locks. 127 * 128 * If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT. This 129 * reduces lock contention and CPU usage when we must wait (note that if 130 * throughput is constrained by the storage, nearly every transaction 131 * must wait). 132 * 133 * Note, in particular, that if a lock is sometimes acquired before 134 * the tx assigns, and sometimes after (e.g. z_lock), then failing 135 * to use a non-blocking assign can deadlock the system. The scenario: 136 * 137 * Thread A has grabbed a lock before calling dmu_tx_assign(). 138 * Thread B is in an already-assigned tx, and blocks for this lock. 139 * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open() 140 * forever, because the previous txg can't quiesce until B's tx commits. 141 * 142 * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT, 143 * then drop all locks, call dmu_tx_wait(), and try again. On subsequent 144 * calls to dmu_tx_assign(), pass TXG_WAITED rather than TXG_NOWAIT, 145 * to indicate that this operation has already called dmu_tx_wait(). 146 * This will ensure that we don't retry forever, waiting a short bit 147 * each time. 148 * 149 * (5) If the operation succeeded, generate the intent log entry for it 150 * before dropping locks. This ensures that the ordering of events 151 * in the intent log matches the order in which they actually occurred. 152 * During ZIL replay the zfs_log_* functions will update the sequence 153 * number to indicate the zil transaction has replayed. 154 * 155 * (6) At the end of each vnode op, the DMU tx must always commit, 156 * regardless of whether there were any errors. 157 * 158 * (7) After dropping all locks, invoke zil_commit(zilog, foid) 159 * to ensure that synchronous semantics are provided when necessary. 160 * 161 * In general, this is how things should be ordered in each vnode op: 162 * 163 * ZFS_ENTER(zfsvfs); // exit if unmounted 164 * top: 165 * zfs_dirent_lookup(&dl, ...) // lock directory entry (may VN_HOLD()) 166 * rw_enter(...); // grab any other locks you need 167 * tx = dmu_tx_create(...); // get DMU tx 168 * dmu_tx_hold_*(); // hold each object you might modify 169 * error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 170 * if (error) { 171 * rw_exit(...); // drop locks 172 * zfs_dirent_unlock(dl); // unlock directory entry 173 * VN_RELE(...); // release held vnodes 174 * if (error == ERESTART) { 175 * waited = B_TRUE; 176 * dmu_tx_wait(tx); 177 * dmu_tx_abort(tx); 178 * goto top; 179 * } 180 * dmu_tx_abort(tx); // abort DMU tx 181 * ZFS_EXIT(zfsvfs); // finished in zfs 182 * return (error); // really out of space 183 * } 184 * error = do_real_work(); // do whatever this VOP does 185 * if (error == 0) 186 * zfs_log_*(...); // on success, make ZIL entry 187 * dmu_tx_commit(tx); // commit DMU tx -- error or not 188 * rw_exit(...); // drop locks 189 * zfs_dirent_unlock(dl); // unlock directory entry 190 * VN_RELE(...); // release held vnodes 191 * zil_commit(zilog, foid); // synchronous when necessary 192 * ZFS_EXIT(zfsvfs); // finished in zfs 193 * return (error); // done, report error 194 */ 195 196 /* ARGSUSED */ 197 static int 198 zfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 199 { 200 znode_t *zp = VTOZ(*vpp); 201 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 202 203 ZFS_ENTER(zfsvfs); 204 ZFS_VERIFY_ZP(zp); 205 206 if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) && 207 ((flag & FAPPEND) == 0)) { 208 ZFS_EXIT(zfsvfs); 209 return (SET_ERROR(EPERM)); 210 } 211 212 if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 213 ZTOV(zp)->v_type == VREG && 214 !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) { 215 if (fs_vscan(*vpp, cr, 0) != 0) { 216 ZFS_EXIT(zfsvfs); 217 return (SET_ERROR(EACCES)); 218 } 219 } 220 221 /* Keep a count of the synchronous opens in the znode */ 222 if (flag & (FSYNC | FDSYNC)) 223 atomic_inc_32(&zp->z_sync_cnt); 224 225 ZFS_EXIT(zfsvfs); 226 return (0); 227 } 228 229 /* ARGSUSED */ 230 static int 231 zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 232 caller_context_t *ct) 233 { 234 znode_t *zp = VTOZ(vp); 235 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 236 237 /* 238 * Clean up any locks held by this process on the vp. 239 */ 240 cleanlocks(vp, ddi_get_pid(), 0); 241 cleanshares(vp, ddi_get_pid()); 242 243 ZFS_ENTER(zfsvfs); 244 ZFS_VERIFY_ZP(zp); 245 246 /* Decrement the synchronous opens in the znode */ 247 if ((flag & (FSYNC | FDSYNC)) && (count == 1)) 248 atomic_dec_32(&zp->z_sync_cnt); 249 250 if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 251 ZTOV(zp)->v_type == VREG && 252 !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) 253 VERIFY(fs_vscan(vp, cr, 1) == 0); 254 255 ZFS_EXIT(zfsvfs); 256 return (0); 257 } 258 259 /* 260 * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and 261 * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter. 262 */ 263 static int 264 zfs_holey(vnode_t *vp, u_long cmd, offset_t *off) 265 { 266 znode_t *zp = VTOZ(vp); 267 uint64_t noff = (uint64_t)*off; /* new offset */ 268 uint64_t file_sz; 269 int error; 270 boolean_t hole; 271 272 file_sz = zp->z_size; 273 if (noff >= file_sz) { 274 return (SET_ERROR(ENXIO)); 275 } 276 277 if (cmd == _FIO_SEEK_HOLE) 278 hole = B_TRUE; 279 else 280 hole = B_FALSE; 281 282 error = dmu_offset_next(zp->z_zfsvfs->z_os, zp->z_id, hole, &noff); 283 284 if (error == ESRCH) 285 return (SET_ERROR(ENXIO)); 286 287 /* 288 * We could find a hole that begins after the logical end-of-file, 289 * because dmu_offset_next() only works on whole blocks. If the 290 * EOF falls mid-block, then indicate that the "virtual hole" 291 * at the end of the file begins at the logical EOF, rather than 292 * at the end of the last block. 293 */ 294 if (noff > file_sz) { 295 ASSERT(hole); 296 noff = file_sz; 297 } 298 299 if (noff < *off) 300 return (error); 301 *off = noff; 302 return (error); 303 } 304 305 /* ARGSUSED */ 306 static int 307 zfs_ioctl(vnode_t *vp, u_long com, intptr_t data, int flag, cred_t *cred, 308 int *rvalp, caller_context_t *ct) 309 { 310 offset_t off; 311 offset_t ndata; 312 dmu_object_info_t doi; 313 int error; 314 zfsvfs_t *zfsvfs; 315 znode_t *zp; 316 317 switch (com) { 318 case _FIOFFS: 319 { 320 return (0); 321 322 /* 323 * The following two ioctls are used by bfu. Faking out, 324 * necessary to avoid bfu errors. 325 */ 326 } 327 case _FIOGDIO: 328 case _FIOSDIO: 329 { 330 return (0); 331 } 332 333 case _FIO_SEEK_DATA: 334 case _FIO_SEEK_HOLE: 335 { 336 #ifdef illumos 337 if (ddi_copyin((void *)data, &off, sizeof (off), flag)) 338 return (SET_ERROR(EFAULT)); 339 #else 340 off = *(offset_t *)data; 341 #endif 342 zp = VTOZ(vp); 343 zfsvfs = zp->z_zfsvfs; 344 ZFS_ENTER(zfsvfs); 345 ZFS_VERIFY_ZP(zp); 346 347 /* offset parameter is in/out */ 348 error = zfs_holey(vp, com, &off); 349 ZFS_EXIT(zfsvfs); 350 if (error) 351 return (error); 352 #ifdef illumos 353 if (ddi_copyout(&off, (void *)data, sizeof (off), flag)) 354 return (SET_ERROR(EFAULT)); 355 #else 356 *(offset_t *)data = off; 357 #endif 358 return (0); 359 } 360 #ifdef illumos 361 case _FIO_COUNT_FILLED: 362 { 363 /* 364 * _FIO_COUNT_FILLED adds a new ioctl command which 365 * exposes the number of filled blocks in a 366 * ZFS object. 367 */ 368 zp = VTOZ(vp); 369 zfsvfs = zp->z_zfsvfs; 370 ZFS_ENTER(zfsvfs); 371 ZFS_VERIFY_ZP(zp); 372 373 /* 374 * Wait for all dirty blocks for this object 375 * to get synced out to disk, and the DMU info 376 * updated. 377 */ 378 error = dmu_object_wait_synced(zfsvfs->z_os, zp->z_id); 379 if (error) { 380 ZFS_EXIT(zfsvfs); 381 return (error); 382 } 383 384 /* 385 * Retrieve fill count from DMU object. 386 */ 387 error = dmu_object_info(zfsvfs->z_os, zp->z_id, &doi); 388 if (error) { 389 ZFS_EXIT(zfsvfs); 390 return (error); 391 } 392 393 ndata = doi.doi_fill_count; 394 395 ZFS_EXIT(zfsvfs); 396 if (ddi_copyout(&ndata, (void *)data, sizeof (ndata), flag)) 397 return (SET_ERROR(EFAULT)); 398 return (0); 399 } 400 #endif 401 } 402 return (SET_ERROR(ENOTTY)); 403 } 404 405 #ifdef __FreeBSD__ 406 static vm_page_t 407 page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes) 408 { 409 vm_object_t obj; 410 vm_page_t pp; 411 int64_t end; 412 413 /* 414 * At present vm_page_clear_dirty extends the cleared range to DEV_BSIZE 415 * aligned boundaries, if the range is not aligned. As a result a 416 * DEV_BSIZE subrange with partially dirty data may get marked as clean. 417 * It may happen that all DEV_BSIZE subranges are marked clean and thus 418 * the whole page would be considred clean despite have some dirty data. 419 * For this reason we should shrink the range to DEV_BSIZE aligned 420 * boundaries before calling vm_page_clear_dirty. 421 */ 422 end = rounddown2(off + nbytes, DEV_BSIZE); 423 off = roundup2(off, DEV_BSIZE); 424 nbytes = end - off; 425 426 obj = vp->v_object; 427 zfs_vmobject_assert_wlocked(obj); 428 429 for (;;) { 430 if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 431 pp->valid) { 432 if (vm_page_xbusied(pp)) { 433 /* 434 * Reference the page before unlocking and 435 * sleeping so that the page daemon is less 436 * likely to reclaim it. 437 */ 438 vm_page_reference(pp); 439 vm_page_lock(pp); 440 zfs_vmobject_wunlock(obj); 441 vm_page_busy_sleep(pp, "zfsmwb", true); 442 zfs_vmobject_wlock(obj); 443 continue; 444 } 445 vm_page_sbusy(pp); 446 } else if (pp != NULL) { 447 ASSERT(!pp->valid); 448 pp = NULL; 449 } 450 451 if (pp != NULL) { 452 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 453 vm_object_pip_add(obj, 1); 454 pmap_remove_write(pp); 455 if (nbytes != 0) 456 vm_page_clear_dirty(pp, off, nbytes); 457 } 458 break; 459 } 460 return (pp); 461 } 462 463 static void 464 page_unbusy(vm_page_t pp) 465 { 466 467 vm_page_sunbusy(pp); 468 vm_object_pip_subtract(pp->object, 1); 469 } 470 471 static vm_page_t 472 page_hold(vnode_t *vp, int64_t start) 473 { 474 vm_object_t obj; 475 vm_page_t pp; 476 477 obj = vp->v_object; 478 zfs_vmobject_assert_wlocked(obj); 479 480 for (;;) { 481 if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 482 pp->valid) { 483 if (vm_page_xbusied(pp)) { 484 /* 485 * Reference the page before unlocking and 486 * sleeping so that the page daemon is less 487 * likely to reclaim it. 488 */ 489 vm_page_reference(pp); 490 vm_page_lock(pp); 491 zfs_vmobject_wunlock(obj); 492 vm_page_busy_sleep(pp, "zfsmwb", true); 493 zfs_vmobject_wlock(obj); 494 continue; 495 } 496 497 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 498 vm_page_lock(pp); 499 vm_page_hold(pp); 500 vm_page_unlock(pp); 501 502 } else 503 pp = NULL; 504 break; 505 } 506 return (pp); 507 } 508 509 static void 510 page_unhold(vm_page_t pp) 511 { 512 513 vm_page_lock(pp); 514 vm_page_unhold(pp); 515 vm_page_unlock(pp); 516 } 517 518 /* 519 * When a file is memory mapped, we must keep the IO data synchronized 520 * between the DMU cache and the memory mapped pages. What this means: 521 * 522 * On Write: If we find a memory mapped page, we write to *both* 523 * the page and the dmu buffer. 524 */ 525 static void 526 update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid, 527 int segflg, dmu_tx_t *tx) 528 { 529 vm_object_t obj; 530 struct sf_buf *sf; 531 caddr_t va; 532 int off; 533 534 ASSERT(segflg != UIO_NOCOPY); 535 ASSERT(vp->v_mount != NULL); 536 obj = vp->v_object; 537 ASSERT(obj != NULL); 538 539 off = start & PAGEOFFSET; 540 zfs_vmobject_wlock(obj); 541 for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 542 vm_page_t pp; 543 int nbytes = imin(PAGESIZE - off, len); 544 545 if ((pp = page_busy(vp, start, off, nbytes)) != NULL) { 546 zfs_vmobject_wunlock(obj); 547 548 va = zfs_map_page(pp, &sf); 549 (void) dmu_read(os, oid, start+off, nbytes, 550 va+off, DMU_READ_PREFETCH);; 551 zfs_unmap_page(sf); 552 553 zfs_vmobject_wlock(obj); 554 page_unbusy(pp); 555 } 556 len -= nbytes; 557 off = 0; 558 } 559 vm_object_pip_wakeupn(obj, 0); 560 zfs_vmobject_wunlock(obj); 561 } 562 563 /* 564 * Read with UIO_NOCOPY flag means that sendfile(2) requests 565 * ZFS to populate a range of page cache pages with data. 566 * 567 * NOTE: this function could be optimized to pre-allocate 568 * all pages in advance, drain exclusive busy on all of them, 569 * map them into contiguous KVA region and populate them 570 * in one single dmu_read() call. 571 */ 572 static int 573 mappedread_sf(vnode_t *vp, int nbytes, uio_t *uio) 574 { 575 znode_t *zp = VTOZ(vp); 576 objset_t *os = zp->z_zfsvfs->z_os; 577 struct sf_buf *sf; 578 vm_object_t obj; 579 vm_page_t pp; 580 int64_t start; 581 caddr_t va; 582 int len = nbytes; 583 int off; 584 int error = 0; 585 586 ASSERT(uio->uio_segflg == UIO_NOCOPY); 587 ASSERT(vp->v_mount != NULL); 588 obj = vp->v_object; 589 ASSERT(obj != NULL); 590 ASSERT((uio->uio_loffset & PAGEOFFSET) == 0); 591 592 zfs_vmobject_wlock(obj); 593 for (start = uio->uio_loffset; len > 0; start += PAGESIZE) { 594 int bytes = MIN(PAGESIZE, len); 595 596 pp = vm_page_grab(obj, OFF_TO_IDX(start), VM_ALLOC_SBUSY | 597 VM_ALLOC_NORMAL | VM_ALLOC_IGN_SBUSY); 598 if (pp->valid == 0) { 599 zfs_vmobject_wunlock(obj); 600 va = zfs_map_page(pp, &sf); 601 error = dmu_read(os, zp->z_id, start, bytes, va, 602 DMU_READ_PREFETCH); 603 if (bytes != PAGESIZE && error == 0) 604 bzero(va + bytes, PAGESIZE - bytes); 605 zfs_unmap_page(sf); 606 zfs_vmobject_wlock(obj); 607 vm_page_sunbusy(pp); 608 vm_page_lock(pp); 609 if (error) { 610 if (pp->wire_count == 0 && pp->valid == 0 && 611 !vm_page_busied(pp)) 612 vm_page_free(pp); 613 } else { 614 pp->valid = VM_PAGE_BITS_ALL; 615 vm_page_activate(pp); 616 } 617 vm_page_unlock(pp); 618 } else { 619 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 620 vm_page_sunbusy(pp); 621 } 622 if (error) 623 break; 624 uio->uio_resid -= bytes; 625 uio->uio_offset += bytes; 626 len -= bytes; 627 } 628 zfs_vmobject_wunlock(obj); 629 return (error); 630 } 631 632 /* 633 * When a file is memory mapped, we must keep the IO data synchronized 634 * between the DMU cache and the memory mapped pages. What this means: 635 * 636 * On Read: We "read" preferentially from memory mapped pages, 637 * else we default from the dmu buffer. 638 * 639 * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when 640 * the file is memory mapped. 641 */ 642 static int 643 mappedread(vnode_t *vp, int nbytes, uio_t *uio) 644 { 645 znode_t *zp = VTOZ(vp); 646 vm_object_t obj; 647 int64_t start; 648 caddr_t va; 649 int len = nbytes; 650 int off; 651 int error = 0; 652 653 ASSERT(vp->v_mount != NULL); 654 obj = vp->v_object; 655 ASSERT(obj != NULL); 656 657 start = uio->uio_loffset; 658 off = start & PAGEOFFSET; 659 zfs_vmobject_wlock(obj); 660 for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 661 vm_page_t pp; 662 uint64_t bytes = MIN(PAGESIZE - off, len); 663 664 if (pp = page_hold(vp, start)) { 665 struct sf_buf *sf; 666 caddr_t va; 667 668 zfs_vmobject_wunlock(obj); 669 va = zfs_map_page(pp, &sf); 670 #ifdef illumos 671 error = uiomove(va + off, bytes, UIO_READ, uio); 672 #else 673 error = vn_io_fault_uiomove(va + off, bytes, uio); 674 #endif 675 zfs_unmap_page(sf); 676 zfs_vmobject_wlock(obj); 677 page_unhold(pp); 678 } else { 679 zfs_vmobject_wunlock(obj); 680 error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), 681 uio, bytes); 682 zfs_vmobject_wlock(obj); 683 } 684 len -= bytes; 685 off = 0; 686 if (error) 687 break; 688 } 689 zfs_vmobject_wunlock(obj); 690 return (error); 691 } 692 #endif /* __FreeBSD__ */ 693 694 #ifdef __NetBSD__ 695 696 caddr_t 697 zfs_map_page(page_t *pp, enum seg_rw rw) 698 { 699 vaddr_t va; 700 int flags; 701 702 #ifdef __HAVE_MM_MD_DIRECT_MAPPED_PHYS 703 if (mm_md_direct_mapped_phys(VM_PAGE_TO_PHYS(pp), &va)) 704 return (caddr_t)va; 705 #endif 706 707 flags = UVMPAGER_MAPIN_WAITOK | 708 (rw == S_READ ? UVMPAGER_MAPIN_WRITE : UVMPAGER_MAPIN_READ); 709 va = uvm_pagermapin(&pp, 1, flags); 710 return (caddr_t)va; 711 } 712 713 void 714 zfs_unmap_page(page_t *pp, caddr_t addr) 715 { 716 717 #ifdef __HAVE_MM_MD_DIRECT_MAPPED_PHYS 718 vaddr_t va; 719 720 if (mm_md_direct_mapped_phys(VM_PAGE_TO_PHYS(pp), &va)) 721 return; 722 #endif 723 uvm_pagermapout((vaddr_t)addr, 1); 724 } 725 726 static int 727 mappedread(vnode_t *vp, int nbytes, uio_t *uio) 728 { 729 znode_t *zp = VTOZ(vp); 730 struct uvm_object *uobj = &vp->v_uobj; 731 krwlock_t *rw = uobj->vmobjlock; 732 int64_t start; 733 caddr_t va; 734 size_t len = nbytes; 735 int off; 736 int error = 0; 737 int npages, found; 738 739 start = uio->uio_loffset; 740 off = start & PAGEOFFSET; 741 742 for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 743 page_t *pp; 744 uint64_t bytes = MIN(PAGESIZE - off, len); 745 746 pp = NULL; 747 npages = 1; 748 rw_enter(rw, RW_WRITER); 749 found = uvn_findpages(uobj, start, &npages, &pp, NULL, 750 UFP_NOALLOC); 751 rw_exit(rw); 752 753 /* XXXNETBSD shouldn't access userspace with the page busy */ 754 if (found) { 755 va = zfs_map_page(pp, S_READ); 756 error = uiomove(va + off, bytes, UIO_READ, uio); 757 zfs_unmap_page(pp, va); 758 rw_enter(rw, RW_WRITER); 759 uvm_page_unbusy(&pp, 1); 760 rw_exit(rw); 761 } else { 762 error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), 763 uio, bytes); 764 } 765 766 len -= bytes; 767 off = 0; 768 if (error) 769 break; 770 } 771 return (error); 772 } 773 774 static void 775 update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid, 776 int segflg, dmu_tx_t *tx) 777 { 778 struct uvm_object *uobj = &vp->v_uobj; 779 krwlock_t *rw = uobj->vmobjlock; 780 caddr_t va; 781 int off, status; 782 783 ASSERT(vp->v_mount != NULL); 784 785 rw_enter(rw, RW_WRITER); 786 787 off = start & PAGEOFFSET; 788 for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 789 page_t *pp; 790 int nbytes = MIN(PAGESIZE - off, len); 791 int npages, found; 792 793 pp = NULL; 794 npages = 1; 795 found = uvn_findpages(uobj, start, &npages, &pp, NULL, 796 UFP_NOALLOC); 797 if (found) { 798 /* 799 * We're about to zap the page's contents and don't 800 * care about any existing modifications. We must 801 * keep track of any new modifications past this 802 * point. Clear the modified bit in the pmap, and 803 * if the page is marked dirty revert to tracking 804 * the modified bit. 805 */ 806 switch (uvm_pagegetdirty(pp)) { 807 case UVM_PAGE_STATUS_DIRTY: 808 /* Does pmap_clear_modify(). */ 809 uvm_pagemarkdirty(pp, UVM_PAGE_STATUS_UNKNOWN); 810 break; 811 case UVM_PAGE_STATUS_UNKNOWN: 812 pmap_clear_modify(pp); 813 break; 814 case UVM_PAGE_STATUS_CLEAN: 815 /* Nothing to do. */ 816 break; 817 } 818 rw_exit(rw); 819 820 va = zfs_map_page(pp, S_WRITE); 821 (void) dmu_read(os, oid, start + off, nbytes, 822 va + off, DMU_READ_PREFETCH); 823 zfs_unmap_page(pp, va); 824 825 rw_enter(rw, RW_WRITER); 826 uvm_page_unbusy(&pp, 1); 827 } 828 len -= nbytes; 829 off = 0; 830 } 831 rw_exit(rw); 832 } 833 #endif /* __NetBSD__ */ 834 835 offset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */ 836 837 /* 838 * Read bytes from specified file into supplied buffer. 839 * 840 * IN: vp - vnode of file to be read from. 841 * uio - structure supplying read location, range info, 842 * and return buffer. 843 * ioflag - SYNC flags; used to provide FRSYNC semantics. 844 * cr - credentials of caller. 845 * ct - caller context 846 * 847 * OUT: uio - updated offset and range, buffer filled. 848 * 849 * RETURN: 0 on success, error code on failure. 850 * 851 * Side Effects: 852 * vp - atime updated if byte count > 0 853 */ 854 /* ARGSUSED */ 855 static int 856 zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 857 { 858 znode_t *zp = VTOZ(vp); 859 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 860 ssize_t n, nbytes; 861 int error = 0; 862 rl_t *rl; 863 xuio_t *xuio = NULL; 864 865 ZFS_ENTER(zfsvfs); 866 ZFS_VERIFY_ZP(zp); 867 868 if (zp->z_pflags & ZFS_AV_QUARANTINED) { 869 ZFS_EXIT(zfsvfs); 870 return (SET_ERROR(EACCES)); 871 } 872 873 /* 874 * Validate file offset 875 */ 876 if (uio->uio_loffset < (offset_t)0) { 877 ZFS_EXIT(zfsvfs); 878 return (SET_ERROR(EINVAL)); 879 } 880 881 /* 882 * Fasttrack empty reads 883 */ 884 if (uio->uio_resid == 0) { 885 ZFS_EXIT(zfsvfs); 886 return (0); 887 } 888 889 /* 890 * Check for mandatory locks 891 */ 892 if (MANDMODE(zp->z_mode)) { 893 if (error = chklock(vp, FREAD, 894 uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) { 895 ZFS_EXIT(zfsvfs); 896 return (error); 897 } 898 } 899 900 /* 901 * If we're in FRSYNC mode, sync out this znode before reading it. 902 */ 903 if (zfsvfs->z_log && 904 (ioflag & FRSYNC || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)) 905 zil_commit(zfsvfs->z_log, zp->z_id); 906 907 /* 908 * Lock the range against changes. 909 */ 910 rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER); 911 912 /* 913 * If we are reading past end-of-file we can skip 914 * to the end; but we might still need to set atime. 915 */ 916 if (uio->uio_loffset >= zp->z_size) { 917 error = 0; 918 goto out; 919 } 920 921 ASSERT(uio->uio_loffset < zp->z_size); 922 n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset); 923 924 #ifdef illumos 925 if ((uio->uio_extflg == UIO_XUIO) && 926 (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) { 927 int nblk; 928 int blksz = zp->z_blksz; 929 uint64_t offset = uio->uio_loffset; 930 931 xuio = (xuio_t *)uio; 932 if ((ISP2(blksz))) { 933 nblk = (P2ROUNDUP(offset + n, blksz) - P2ALIGN(offset, 934 blksz)) / blksz; 935 } else { 936 ASSERT(offset + n <= blksz); 937 nblk = 1; 938 } 939 (void) dmu_xuio_init(xuio, nblk); 940 941 if (vn_has_cached_data(vp)) { 942 /* 943 * For simplicity, we always allocate a full buffer 944 * even if we only expect to read a portion of a block. 945 */ 946 while (--nblk >= 0) { 947 (void) dmu_xuio_add(xuio, 948 dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 949 blksz), 0, blksz); 950 } 951 } 952 } 953 #endif /* illumos */ 954 955 while (n > 0) { 956 nbytes = MIN(n, zfs_read_chunk_size - 957 P2PHASE(uio->uio_loffset, zfs_read_chunk_size)); 958 959 #ifdef __FreeBSD__ 960 if (uio->uio_segflg == UIO_NOCOPY) 961 error = mappedread_sf(vp, nbytes, uio); 962 else 963 #endif /* __FreeBSD__ */ 964 if (vn_has_cached_data(vp)) { 965 error = mappedread(vp, nbytes, uio); 966 } else { 967 error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), 968 uio, nbytes); 969 } 970 if (error) { 971 /* convert checksum errors into IO errors */ 972 if (error == ECKSUM) 973 error = SET_ERROR(EIO); 974 break; 975 } 976 977 n -= nbytes; 978 } 979 out: 980 zfs_range_unlock(rl); 981 982 ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 983 ZFS_EXIT(zfsvfs); 984 return (error); 985 } 986 987 /* 988 * Write the bytes to a file. 989 * 990 * IN: vp - vnode of file to be written to. 991 * uio - structure supplying write location, range info, 992 * and data buffer. 993 * ioflag - FAPPEND, FSYNC, and/or FDSYNC. FAPPEND is 994 * set if in append mode. 995 * cr - credentials of caller. 996 * ct - caller context (NFS/CIFS fem monitor only) 997 * 998 * OUT: uio - updated offset and range. 999 * 1000 * RETURN: 0 on success, error code on failure. 1001 * 1002 * Timestamps: 1003 * vp - ctime|mtime updated if byte count > 0 1004 */ 1005 1006 /* ARGSUSED */ 1007 static int 1008 zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 1009 { 1010 znode_t *zp = VTOZ(vp); 1011 rlim64_t limit = MAXOFFSET_T; 1012 ssize_t start_resid = uio->uio_resid; 1013 ssize_t tx_bytes; 1014 uint64_t end_size; 1015 dmu_tx_t *tx; 1016 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1017 zilog_t *zilog; 1018 offset_t woff; 1019 ssize_t n, nbytes; 1020 rl_t *rl; 1021 int max_blksz = zfsvfs->z_max_blksz; 1022 int error = 0; 1023 arc_buf_t *abuf; 1024 iovec_t *aiov = NULL; 1025 xuio_t *xuio = NULL; 1026 int i_iov = 0; 1027 int iovcnt = uio->uio_iovcnt; 1028 iovec_t *iovp = uio->uio_iov; 1029 int write_eof; 1030 int count = 0; 1031 sa_bulk_attr_t bulk[4]; 1032 uint64_t mtime[2], ctime[2]; 1033 int segflg; 1034 1035 #ifdef __NetBSD__ 1036 segflg = VMSPACE_IS_KERNEL_P(uio->uio_vmspace) ? 1037 UIO_SYSSPACE : UIO_USERSPACE; 1038 #else 1039 segflg = uio->uio_segflg; 1040 #endif 1041 1042 /* 1043 * Fasttrack empty write 1044 */ 1045 n = start_resid; 1046 if (n == 0) 1047 return (0); 1048 1049 if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) 1050 limit = MAXOFFSET_T; 1051 1052 ZFS_ENTER(zfsvfs); 1053 ZFS_VERIFY_ZP(zp); 1054 1055 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 1056 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 1057 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, 1058 &zp->z_size, 8); 1059 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 1060 &zp->z_pflags, 8); 1061 1062 /* 1063 * In a case vp->v_vfsp != zp->z_zfsvfs->z_vfs (e.g. snapshots) our 1064 * callers might not be able to detect properly that we are read-only, 1065 * so check it explicitly here. 1066 */ 1067 if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 1068 ZFS_EXIT(zfsvfs); 1069 return (SET_ERROR(EROFS)); 1070 } 1071 1072 /* 1073 * If immutable or not appending then return EPERM 1074 */ 1075 if ((zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) || 1076 ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) && 1077 (uio->uio_loffset < zp->z_size))) { 1078 ZFS_EXIT(zfsvfs); 1079 return (SET_ERROR(EPERM)); 1080 } 1081 1082 zilog = zfsvfs->z_log; 1083 1084 /* 1085 * Validate file offset 1086 */ 1087 woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset; 1088 if (woff < 0) { 1089 ZFS_EXIT(zfsvfs); 1090 return (SET_ERROR(EINVAL)); 1091 } 1092 1093 /* 1094 * Check for mandatory locks before calling zfs_range_lock() 1095 * in order to prevent a deadlock with locks set via fcntl(). 1096 */ 1097 if (MANDMODE((mode_t)zp->z_mode) && 1098 (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) { 1099 ZFS_EXIT(zfsvfs); 1100 return (error); 1101 } 1102 1103 #ifdef illumos 1104 /* 1105 * Pre-fault the pages to ensure slow (eg NFS) pages 1106 * don't hold up txg. 1107 * Skip this if uio contains loaned arc_buf. 1108 */ 1109 if ((uio->uio_extflg == UIO_XUIO) && 1110 (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) 1111 xuio = (xuio_t *)uio; 1112 else 1113 uio_prefaultpages(MIN(n, max_blksz), uio); 1114 #endif 1115 1116 /* 1117 * If in append mode, set the io offset pointer to eof. 1118 */ 1119 if (ioflag & FAPPEND) { 1120 /* 1121 * Obtain an appending range lock to guarantee file append 1122 * semantics. We reset the write offset once we have the lock. 1123 */ 1124 rl = zfs_range_lock(zp, 0, n, RL_APPEND); 1125 woff = rl->r_off; 1126 if (rl->r_len == UINT64_MAX) { 1127 /* 1128 * We overlocked the file because this write will cause 1129 * the file block size to increase. 1130 * Note that zp_size cannot change with this lock held. 1131 */ 1132 woff = zp->z_size; 1133 } 1134 uio->uio_loffset = woff; 1135 } else { 1136 /* 1137 * Note that if the file block size will change as a result of 1138 * this write, then this range lock will lock the entire file 1139 * so that we can re-write the block safely. 1140 */ 1141 rl = zfs_range_lock(zp, woff, n, RL_WRITER); 1142 } 1143 1144 #ifdef illumos 1145 if (woff >= limit) { 1146 zfs_range_unlock(rl); 1147 ZFS_EXIT(zfsvfs); 1148 return (SET_ERROR(EFBIG)); 1149 } 1150 1151 #endif 1152 #ifdef __FreeBSD__ 1153 if (vn_rlimit_fsize(vp, uio, uio->uio_td)) { 1154 zfs_range_unlock(rl); 1155 ZFS_EXIT(zfsvfs); 1156 return (SET_ERROR(EFBIG)); 1157 } 1158 #endif 1159 #ifdef __NetBSD__ 1160 /* XXXNETBSD we might need vn_rlimit_fsize() too here eventually */ 1161 #endif 1162 1163 if ((woff + n) > limit || woff > (limit - n)) 1164 n = limit - woff; 1165 1166 /* Will this write extend the file length? */ 1167 write_eof = (woff + n > zp->z_size); 1168 1169 end_size = MAX(zp->z_size, woff + n); 1170 1171 /* 1172 * Write the file in reasonable size chunks. Each chunk is written 1173 * in a separate transaction; this keeps the intent log records small 1174 * and allows us to do more fine-grained space accounting. 1175 */ 1176 while (n > 0) { 1177 abuf = NULL; 1178 woff = uio->uio_loffset; 1179 if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 1180 zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 1181 if (abuf != NULL) 1182 dmu_return_arcbuf(abuf); 1183 error = SET_ERROR(EDQUOT); 1184 break; 1185 } 1186 1187 if (xuio && abuf == NULL) { 1188 ASSERT(i_iov < iovcnt); 1189 aiov = &iovp[i_iov]; 1190 abuf = dmu_xuio_arcbuf(xuio, i_iov); 1191 dmu_xuio_clear(xuio, i_iov); 1192 DTRACE_PROBE3(zfs_cp_write, int, i_iov, 1193 iovec_t *, aiov, arc_buf_t *, abuf); 1194 ASSERT((aiov->iov_base == abuf->b_data) || 1195 ((char *)aiov->iov_base - (char *)abuf->b_data + 1196 aiov->iov_len == arc_buf_size(abuf))); 1197 i_iov++; 1198 } else if (abuf == NULL && n >= max_blksz && 1199 woff >= zp->z_size && 1200 P2PHASE(woff, max_blksz) == 0 && 1201 zp->z_blksz == max_blksz) { 1202 /* 1203 * This write covers a full block. "Borrow" a buffer 1204 * from the dmu so that we can fill it before we enter 1205 * a transaction. This avoids the possibility of 1206 * holding up the transaction if the data copy hangs 1207 * up on a pagefault (e.g., from an NFS server mapping). 1208 */ 1209 #if defined(illumos) || defined(__NetBSD__) 1210 size_t cbytes; 1211 #endif 1212 1213 abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 1214 max_blksz); 1215 ASSERT(abuf != NULL); 1216 ASSERT(arc_buf_size(abuf) == max_blksz); 1217 #if defined(illumos) || defined(__NetBSD__) 1218 if (error = uiocopy(abuf->b_data, max_blksz, 1219 UIO_WRITE, uio, &cbytes)) { 1220 dmu_return_arcbuf(abuf); 1221 break; 1222 } 1223 ASSERT(cbytes == max_blksz); 1224 #endif 1225 #ifdef __FreeBSD__ 1226 ssize_t resid = uio->uio_resid; 1227 1228 error = vn_io_fault_uiomove(abuf->b_data, max_blksz, uio); 1229 if (error != 0) { 1230 uio->uio_offset -= resid - uio->uio_resid; 1231 uio->uio_resid = resid; 1232 dmu_return_arcbuf(abuf); 1233 break; 1234 } 1235 #endif 1236 } 1237 1238 /* 1239 * Start a transaction. 1240 */ 1241 tx = dmu_tx_create(zfsvfs->z_os); 1242 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1243 dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); 1244 zfs_sa_upgrade_txholds(tx, zp); 1245 error = dmu_tx_assign(tx, TXG_WAIT); 1246 if (error) { 1247 dmu_tx_abort(tx); 1248 if (abuf != NULL) 1249 dmu_return_arcbuf(abuf); 1250 break; 1251 } 1252 1253 /* 1254 * If zfs_range_lock() over-locked we grow the blocksize 1255 * and then reduce the lock range. This will only happen 1256 * on the first iteration since zfs_range_reduce() will 1257 * shrink down r_len to the appropriate size. 1258 */ 1259 if (rl->r_len == UINT64_MAX) { 1260 uint64_t new_blksz; 1261 1262 if (zp->z_blksz > max_blksz) { 1263 /* 1264 * File's blocksize is already larger than the 1265 * "recordsize" property. Only let it grow to 1266 * the next power of 2. 1267 */ 1268 ASSERT(!ISP2(zp->z_blksz)); 1269 new_blksz = MIN(end_size, 1270 1 << highbit64(zp->z_blksz)); 1271 } else { 1272 new_blksz = MIN(end_size, max_blksz); 1273 } 1274 zfs_grow_blocksize(zp, new_blksz, tx); 1275 zfs_range_reduce(rl, woff, n); 1276 } 1277 1278 /* 1279 * XXX - should we really limit each write to z_max_blksz? 1280 * Perhaps we should use SPA_MAXBLOCKSIZE chunks? 1281 */ 1282 nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz)); 1283 1284 if (woff + nbytes > zp->z_size) 1285 vnode_pager_setsize(vp, woff + nbytes); 1286 1287 if (abuf == NULL) { 1288 tx_bytes = uio->uio_resid; 1289 error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl), 1290 uio, nbytes, tx); 1291 tx_bytes -= uio->uio_resid; 1292 } else { 1293 tx_bytes = nbytes; 1294 ASSERT(xuio == NULL || tx_bytes == aiov->iov_len); 1295 /* 1296 * If this is not a full block write, but we are 1297 * extending the file past EOF and this data starts 1298 * block-aligned, use assign_arcbuf(). Otherwise, 1299 * write via dmu_write(). 1300 */ 1301 if (tx_bytes < max_blksz && (!write_eof || 1302 aiov->iov_base != abuf->b_data)) { 1303 ASSERT(xuio); 1304 dmu_write(zfsvfs->z_os, zp->z_id, woff, 1305 aiov->iov_len, aiov->iov_base, tx); 1306 dmu_return_arcbuf(abuf); 1307 xuio_stat_wbuf_copied(); 1308 } else { 1309 ASSERT(xuio || tx_bytes == max_blksz); 1310 dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl), 1311 woff, abuf, tx); 1312 } 1313 #if defined(illumos) || defined(__NetBSD__) 1314 ASSERT(tx_bytes <= uio->uio_resid); 1315 uioskip(uio, tx_bytes); 1316 #endif 1317 } 1318 if (tx_bytes && vn_has_cached_data(vp)) { 1319 update_pages(vp, woff, tx_bytes, zfsvfs->z_os, 1320 zp->z_id, segflg, tx); 1321 } 1322 1323 /* 1324 * If we made no progress, we're done. If we made even 1325 * partial progress, update the znode and ZIL accordingly. 1326 */ 1327 if (tx_bytes == 0) { 1328 (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 1329 (void *)&zp->z_size, sizeof (uint64_t), tx); 1330 dmu_tx_commit(tx); 1331 ASSERT(error != 0); 1332 break; 1333 } 1334 1335 /* 1336 * Clear Set-UID/Set-GID bits on successful write if not 1337 * privileged and at least one of the excute bits is set. 1338 * 1339 * It would be nice to to this after all writes have 1340 * been done, but that would still expose the ISUID/ISGID 1341 * to another app after the partial write is committed. 1342 * 1343 * Note: we don't call zfs_fuid_map_id() here because 1344 * user 0 is not an ephemeral uid. 1345 */ 1346 mutex_enter(&zp->z_acl_lock); 1347 if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) | 1348 (S_IXUSR >> 6))) != 0 && 1349 (zp->z_mode & (S_ISUID | S_ISGID)) != 0 && 1350 secpolicy_vnode_setid_retain(vp, cr, 1351 (zp->z_mode & S_ISUID) != 0 && zp->z_uid == 0) != 0) { 1352 uint64_t newmode; 1353 zp->z_mode &= ~(S_ISUID | S_ISGID); 1354 newmode = zp->z_mode; 1355 (void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), 1356 (void *)&newmode, sizeof (uint64_t), tx); 1357 #ifdef __NetBSD__ 1358 cache_enter_id(vp, zp->z_mode, zp->z_uid, zp->z_gid, 1359 true); 1360 #endif 1361 } 1362 mutex_exit(&zp->z_acl_lock); 1363 1364 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 1365 B_TRUE); 1366 1367 /* 1368 * Update the file size (zp_size) if it has changed; 1369 * account for possible concurrent updates. 1370 */ 1371 while ((end_size = zp->z_size) < uio->uio_loffset) { 1372 (void) atomic_cas_64(&zp->z_size, end_size, 1373 uio->uio_loffset); 1374 #ifdef illumos 1375 ASSERT(error == 0); 1376 #else 1377 ASSERT(error == 0 || error == EFAULT); 1378 #endif 1379 } 1380 /* 1381 * If we are replaying and eof is non zero then force 1382 * the file size to the specified eof. Note, there's no 1383 * concurrency during replay. 1384 */ 1385 if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0) 1386 zp->z_size = zfsvfs->z_replay_eof; 1387 1388 if (error == 0) 1389 error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 1390 else 1391 (void) sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 1392 1393 zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag); 1394 dmu_tx_commit(tx); 1395 1396 if (error != 0) 1397 break; 1398 ASSERT(tx_bytes == nbytes); 1399 n -= nbytes; 1400 1401 #ifdef illumos 1402 if (!xuio && n > 0) 1403 uio_prefaultpages(MIN(n, max_blksz), uio); 1404 #endif 1405 } 1406 1407 zfs_range_unlock(rl); 1408 1409 /* 1410 * If we're in replay mode, or we made no progress, return error. 1411 * Otherwise, it's at least a partial write, so it's successful. 1412 */ 1413 if (zfsvfs->z_replay || uio->uio_resid == start_resid) { 1414 ZFS_EXIT(zfsvfs); 1415 return (error); 1416 } 1417 1418 #ifdef __FreeBSD__ 1419 /* 1420 * EFAULT means that at least one page of the source buffer was not 1421 * available. VFS will re-try remaining I/O upon this error. 1422 */ 1423 if (error == EFAULT) { 1424 ZFS_EXIT(zfsvfs); 1425 return (error); 1426 } 1427 #endif 1428 1429 if (ioflag & (FSYNC | FDSYNC) || 1430 zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1431 zil_commit(zilog, zp->z_id); 1432 1433 ZFS_EXIT(zfsvfs); 1434 return (0); 1435 } 1436 1437 void 1438 zfs_get_done(zgd_t *zgd, int error) 1439 { 1440 znode_t *zp = zgd->zgd_private; 1441 objset_t *os = zp->z_zfsvfs->z_os; 1442 1443 if (zgd->zgd_db) 1444 dmu_buf_rele(zgd->zgd_db, zgd); 1445 1446 zfs_range_unlock(zgd->zgd_rl); 1447 1448 /* 1449 * Release the vnode asynchronously as we currently have the 1450 * txg stopped from syncing. 1451 */ 1452 VN_RELE_CLEANER(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1453 1454 if (error == 0 && zgd->zgd_bp) 1455 zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); 1456 1457 kmem_free(zgd, sizeof (zgd_t)); 1458 } 1459 1460 #ifdef DEBUG 1461 static int zil_fault_io = 0; 1462 #endif 1463 1464 /* 1465 * Get data to generate a TX_WRITE intent log record. 1466 */ 1467 int 1468 zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) 1469 { 1470 zfsvfs_t *zfsvfs = arg; 1471 objset_t *os = zfsvfs->z_os; 1472 znode_t *zp; 1473 uint64_t object = lr->lr_foid; 1474 uint64_t offset = lr->lr_offset; 1475 uint64_t size = lr->lr_length; 1476 blkptr_t *bp = &lr->lr_blkptr; 1477 dmu_buf_t *db; 1478 zgd_t *zgd; 1479 int error = 0; 1480 1481 ASSERT(zio != NULL); 1482 ASSERT(size != 0); 1483 1484 /* 1485 * Nothing to do if the file has been removed 1486 */ 1487 if (zfs_zget_cleaner(zfsvfs, object, &zp) != 0) 1488 return (SET_ERROR(ENOENT)); 1489 if (zp->z_unlinked) { 1490 /* 1491 * Release the vnode asynchronously as we currently have the 1492 * txg stopped from syncing. 1493 */ 1494 VN_RELE_CLEANER(ZTOV(zp), 1495 dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1496 return (SET_ERROR(ENOENT)); 1497 } 1498 1499 zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); 1500 zgd->zgd_zilog = zfsvfs->z_log; 1501 zgd->zgd_private = zp; 1502 1503 /* 1504 * Write records come in two flavors: immediate and indirect. 1505 * For small writes it's cheaper to store the data with the 1506 * log record (immediate); for large writes it's cheaper to 1507 * sync the data and get a pointer to it (indirect) so that 1508 * we don't have to write the data twice. 1509 */ 1510 if (buf != NULL) { /* immediate write */ 1511 zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER); 1512 /* test for truncation needs to be done while range locked */ 1513 if (offset >= zp->z_size) { 1514 error = SET_ERROR(ENOENT); 1515 } else { 1516 error = dmu_read(os, object, offset, size, buf, 1517 DMU_READ_NO_PREFETCH); 1518 } 1519 ASSERT(error == 0 || error == ENOENT); 1520 } else { /* indirect write */ 1521 /* 1522 * Have to lock the whole block to ensure when it's 1523 * written out and it's checksum is being calculated 1524 * that no one can change the data. We need to re-check 1525 * blocksize after we get the lock in case it's changed! 1526 */ 1527 for (;;) { 1528 uint64_t blkoff; 1529 size = zp->z_blksz; 1530 blkoff = ISP2(size) ? P2PHASE(offset, size) : offset; 1531 offset -= blkoff; 1532 zgd->zgd_rl = zfs_range_lock(zp, offset, size, 1533 RL_READER); 1534 if (zp->z_blksz == size) 1535 break; 1536 offset += blkoff; 1537 zfs_range_unlock(zgd->zgd_rl); 1538 } 1539 /* test for truncation needs to be done while range locked */ 1540 if (lr->lr_offset >= zp->z_size) 1541 error = SET_ERROR(ENOENT); 1542 #ifdef DEBUG 1543 if (zil_fault_io) { 1544 error = SET_ERROR(EIO); 1545 zil_fault_io = 0; 1546 } 1547 #endif 1548 if (error == 0) 1549 error = dmu_buf_hold(os, object, offset, zgd, &db, 1550 DMU_READ_NO_PREFETCH); 1551 1552 if (error == 0) { 1553 blkptr_t *obp = dmu_buf_get_blkptr(db); 1554 if (obp) { 1555 ASSERT(BP_IS_HOLE(bp)); 1556 *bp = *obp; 1557 } 1558 1559 zgd->zgd_db = db; 1560 zgd->zgd_bp = bp; 1561 1562 ASSERT(db->db_offset == offset); 1563 ASSERT(db->db_size == size); 1564 1565 error = dmu_sync(zio, lr->lr_common.lrc_txg, 1566 zfs_get_done, zgd); 1567 ASSERT(error || lr->lr_length <= zp->z_blksz); 1568 1569 /* 1570 * On success, we need to wait for the write I/O 1571 * initiated by dmu_sync() to complete before we can 1572 * release this dbuf. We will finish everything up 1573 * in the zfs_get_done() callback. 1574 */ 1575 if (error == 0) 1576 return (0); 1577 1578 if (error == EALREADY) { 1579 lr->lr_common.lrc_txtype = TX_WRITE2; 1580 error = 0; 1581 } 1582 } 1583 } 1584 1585 zfs_get_done(zgd, error); 1586 1587 return (error); 1588 } 1589 1590 /*ARGSUSED*/ 1591 static int 1592 zfs_access(vnode_t *vp, int mode, int flag, cred_t *cr, 1593 caller_context_t *ct) 1594 { 1595 znode_t *zp = VTOZ(vp); 1596 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1597 int error; 1598 1599 ZFS_ENTER(zfsvfs); 1600 ZFS_VERIFY_ZP(zp); 1601 1602 if (flag & V_ACE_MASK) 1603 error = zfs_zaccess(zp, mode, flag, B_FALSE, cr); 1604 else 1605 error = zfs_zaccess_rwx(zp, mode, flag, cr); 1606 1607 ZFS_EXIT(zfsvfs); 1608 return (error); 1609 } 1610 1611 #ifdef __FreeBSD__ 1612 static int 1613 zfs_dd_callback(struct mount *mp, void *arg, int lkflags, struct vnode **vpp) 1614 { 1615 int error; 1616 1617 *vpp = arg; 1618 error = vn_lock(*vpp, lkflags); 1619 if (error != 0) 1620 vrele(*vpp); 1621 return (error); 1622 } 1623 1624 static int 1625 zfs_lookup_lock(vnode_t *dvp, vnode_t *vp, const char *name, int lkflags) 1626 { 1627 znode_t *zdp = VTOZ(dvp); 1628 zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1629 int error; 1630 int ltype; 1631 1632 ASSERT_VOP_LOCKED(dvp, __func__); 1633 #ifdef DIAGNOSTIC 1634 if ((zdp->z_pflags & ZFS_XATTR) == 0) 1635 VERIFY(!RRM_LOCK_HELD(&zfsvfs->z_teardown_lock)); 1636 #endif 1637 1638 if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) { 1639 ASSERT3P(dvp, ==, vp); 1640 vref(dvp); 1641 ltype = lkflags & LK_TYPE_MASK; 1642 if (ltype != VOP_ISLOCKED(dvp)) { 1643 if (ltype == LK_EXCLUSIVE) 1644 vn_lock(dvp, LK_UPGRADE | LK_RETRY); 1645 else /* if (ltype == LK_SHARED) */ 1646 vn_lock(dvp, LK_DOWNGRADE | LK_RETRY); 1647 1648 /* 1649 * Relock for the "." case could leave us with 1650 * reclaimed vnode. 1651 */ 1652 if (dvp->v_iflag & VI_DOOMED) { 1653 vrele(dvp); 1654 return (SET_ERROR(ENOENT)); 1655 } 1656 } 1657 return (0); 1658 } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) { 1659 /* 1660 * Note that in this case, dvp is the child vnode, and we 1661 * are looking up the parent vnode - exactly reverse from 1662 * normal operation. Unlocking dvp requires some rather 1663 * tricky unlock/relock dance to prevent mp from being freed; 1664 * use vn_vget_ino_gen() which takes care of all that. 1665 * 1666 * XXX Note that there is a time window when both vnodes are 1667 * unlocked. It is possible, although highly unlikely, that 1668 * during that window the parent-child relationship between 1669 * the vnodes may change, for example, get reversed. 1670 * In that case we would have a wrong lock order for the vnodes. 1671 * All other filesystems seem to ignore this problem, so we 1672 * do the same here. 1673 * A potential solution could be implemented as follows: 1674 * - using LK_NOWAIT when locking the second vnode and retrying 1675 * if necessary 1676 * - checking that the parent-child relationship still holds 1677 * after locking both vnodes and retrying if it doesn't 1678 */ 1679 error = vn_vget_ino_gen(dvp, zfs_dd_callback, vp, lkflags, &vp); 1680 return (error); 1681 } else { 1682 error = vn_lock(vp, lkflags); 1683 if (error != 0) 1684 vrele(vp); 1685 return (error); 1686 } 1687 } 1688 1689 /* 1690 * Lookup an entry in a directory, or an extended attribute directory. 1691 * If it exists, return a held vnode reference for it. 1692 * 1693 * IN: dvp - vnode of directory to search. 1694 * nm - name of entry to lookup. 1695 * pnp - full pathname to lookup [UNUSED]. 1696 * flags - LOOKUP_XATTR set if looking for an attribute. 1697 * rdir - root directory vnode [UNUSED]. 1698 * cr - credentials of caller. 1699 * ct - caller context 1700 * 1701 * OUT: vpp - vnode of located entry, NULL if not found. 1702 * 1703 * RETURN: 0 on success, error code on failure. 1704 * 1705 * Timestamps: 1706 * NA 1707 */ 1708 /* ARGSUSED */ 1709 static int 1710 zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct componentname *cnp, 1711 int nameiop, cred_t *cr, kthread_t *td, int flags) 1712 { 1713 znode_t *zdp = VTOZ(dvp); 1714 znode_t *zp; 1715 zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1716 int error = 0; 1717 1718 /* fast path (should be redundant with vfs namecache) */ 1719 if (!(flags & LOOKUP_XATTR)) { 1720 if (dvp->v_type != VDIR) { 1721 return (SET_ERROR(ENOTDIR)); 1722 } else if (zdp->z_sa_hdl == NULL) { 1723 return (SET_ERROR(EIO)); 1724 } 1725 } 1726 1727 DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm); 1728 1729 ZFS_ENTER(zfsvfs); 1730 ZFS_VERIFY_ZP(zdp); 1731 1732 *vpp = NULL; 1733 1734 if (flags & LOOKUP_XATTR) { 1735 #ifdef TODO 1736 /* 1737 * If the xattr property is off, refuse the lookup request. 1738 */ 1739 if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) { 1740 ZFS_EXIT(zfsvfs); 1741 return (SET_ERROR(EINVAL)); 1742 } 1743 #endif 1744 1745 /* 1746 * We don't allow recursive attributes.. 1747 * Maybe someday we will. 1748 */ 1749 if (zdp->z_pflags & ZFS_XATTR) { 1750 ZFS_EXIT(zfsvfs); 1751 return (SET_ERROR(EINVAL)); 1752 } 1753 1754 if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) { 1755 ZFS_EXIT(zfsvfs); 1756 return (error); 1757 } 1758 1759 /* 1760 * Do we have permission to get into attribute directory? 1761 */ 1762 if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0, 1763 B_FALSE, cr)) { 1764 vrele(*vpp); 1765 *vpp = NULL; 1766 } 1767 1768 ZFS_EXIT(zfsvfs); 1769 return (error); 1770 } 1771 1772 /* 1773 * Check accessibility of directory. 1774 */ 1775 if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) { 1776 ZFS_EXIT(zfsvfs); 1777 return (error); 1778 } 1779 1780 if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), 1781 NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1782 ZFS_EXIT(zfsvfs); 1783 return (SET_ERROR(EILSEQ)); 1784 } 1785 1786 1787 /* 1788 * First handle the special cases. 1789 */ 1790 if ((cnp->cn_flags & ISDOTDOT) != 0) { 1791 /* 1792 * If we are a snapshot mounted under .zfs, return 1793 * the vp for the snapshot directory. 1794 */ 1795 if (zdp->z_id == zfsvfs->z_root && zfsvfs->z_parent != zfsvfs) { 1796 struct componentname cn; 1797 vnode_t *zfsctl_vp; 1798 int ltype; 1799 1800 ZFS_EXIT(zfsvfs); 1801 ltype = VOP_ISLOCKED(dvp); 1802 VOP_UNLOCK(dvp, 0); 1803 error = zfsctl_root(zfsvfs->z_parent, LK_SHARED, 1804 &zfsctl_vp); 1805 if (error == 0) { 1806 cn.cn_nameptr = "snapshot"; 1807 cn.cn_namelen = strlen(cn.cn_nameptr); 1808 cn.cn_nameiop = cnp->cn_nameiop; 1809 cn.cn_flags = cnp->cn_flags; 1810 cn.cn_lkflags = cnp->cn_lkflags; 1811 error = VOP_LOOKUP(zfsctl_vp, vpp, &cn); 1812 vput(zfsctl_vp); 1813 } 1814 vn_lock(dvp, ltype | LK_RETRY); 1815 return (error); 1816 } 1817 } 1818 if (zfs_has_ctldir(zdp) && strcmp(nm, ZFS_CTLDIR_NAME) == 0) { 1819 ZFS_EXIT(zfsvfs); 1820 if ((cnp->cn_flags & ISLASTCN) != 0 && nameiop != LOOKUP) 1821 return (SET_ERROR(ENOTSUP)); 1822 error = zfsctl_root(zfsvfs, cnp->cn_lkflags, vpp); 1823 return (error); 1824 } 1825 1826 /* 1827 * The loop is retry the lookup if the parent-child relationship 1828 * changes during the dot-dot locking complexities. 1829 */ 1830 for (;;) { 1831 uint64_t parent; 1832 1833 error = zfs_dirlook(zdp, nm, &zp); 1834 if (error == 0) 1835 *vpp = ZTOV(zp); 1836 1837 ZFS_EXIT(zfsvfs); 1838 if (error != 0) 1839 break; 1840 1841 error = zfs_lookup_lock(dvp, *vpp, nm, cnp->cn_lkflags); 1842 if (error != 0) { 1843 /* 1844 * If we've got a locking error, then the vnode 1845 * got reclaimed because of a force unmount. 1846 * We never enter doomed vnodes into the name cache. 1847 */ 1848 *vpp = NULL; 1849 return (error); 1850 } 1851 1852 if ((cnp->cn_flags & ISDOTDOT) == 0) 1853 break; 1854 1855 ZFS_ENTER(zfsvfs); 1856 if (zdp->z_sa_hdl == NULL) { 1857 error = SET_ERROR(EIO); 1858 } else { 1859 error = sa_lookup(zdp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 1860 &parent, sizeof (parent)); 1861 } 1862 if (error != 0) { 1863 ZFS_EXIT(zfsvfs); 1864 vput(ZTOV(zp)); 1865 break; 1866 } 1867 if (zp->z_id == parent) { 1868 ZFS_EXIT(zfsvfs); 1869 break; 1870 } 1871 vput(ZTOV(zp)); 1872 } 1873 1874 out: 1875 if (error != 0) 1876 *vpp = NULL; 1877 1878 /* Translate errors and add SAVENAME when needed. */ 1879 if (cnp->cn_flags & ISLASTCN) { 1880 switch (nameiop) { 1881 case CREATE: 1882 case RENAME: 1883 if (error == ENOENT) { 1884 error = EJUSTRETURN; 1885 cnp->cn_flags |= SAVENAME; 1886 break; 1887 } 1888 /* FALLTHROUGH */ 1889 case DELETE: 1890 if (error == 0) 1891 cnp->cn_flags |= SAVENAME; 1892 break; 1893 } 1894 } 1895 1896 /* Insert name into cache (as non-existent) if appropriate. */ 1897 if (zfsvfs->z_use_namecache && 1898 error == ENOENT && (cnp->cn_flags & MAKEENTRY) != 0) 1899 cache_enter(dvp, NULL, cnp); 1900 1901 /* Insert name into cache if appropriate. */ 1902 if (zfsvfs->z_use_namecache && 1903 error == 0 && (cnp->cn_flags & MAKEENTRY)) { 1904 if (!(cnp->cn_flags & ISLASTCN) || 1905 (nameiop != DELETE && nameiop != RENAME)) { 1906 cache_enter(dvp, *vpp, cnp); 1907 } 1908 } 1909 1910 return (error); 1911 } 1912 #endif /* __FreeBSD__ */ 1913 1914 #ifdef __NetBSD__ 1915 /* 1916 * If vnode is for a device return a specfs vnode instead. 1917 */ 1918 static int 1919 specvp_check(vnode_t **vpp, cred_t *cr) 1920 { 1921 int error = 0; 1922 1923 if (IS_DEVVP(*vpp)) { 1924 struct vnode *svp; 1925 1926 svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 1927 VN_RELE(*vpp); 1928 if (svp == NULL) 1929 error = ENOSYS; 1930 *vpp = svp; 1931 } 1932 return (error); 1933 } 1934 1935 /* 1936 * Lookup an entry in a directory, or an extended attribute directory. 1937 * If it exists, return a held vnode reference for it. 1938 * 1939 * IN: dvp - vnode of directory to search. 1940 * nm - name of entry to lookup. 1941 * pnp - full pathname to lookup [UNUSED]. 1942 * flags - LOOKUP_XATTR set if looking for an attribute. 1943 * rdir - root directory vnode [UNUSED]. 1944 * cr - credentials of caller. 1945 * ct - caller context 1946 * direntflags - directory lookup flags 1947 * realpnp - returned pathname. 1948 * 1949 * OUT: vpp - vnode of located entry, NULL if not found. 1950 * 1951 * RETURN: 0 if success 1952 * error code if failure 1953 * 1954 * Timestamps: 1955 * NA 1956 */ 1957 /* ARGSUSED */ 1958 static int 1959 zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, int flags, 1960 struct componentname *cnp, int nameiop, cred_t *cr) 1961 { 1962 znode_t *zdp = VTOZ(dvp); 1963 znode_t *zp; 1964 zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1965 int error = 0; 1966 1967 /* fast path */ 1968 if (!(flags & LOOKUP_XATTR)) { 1969 if (dvp->v_type != VDIR) { 1970 return (ENOTDIR); 1971 } else if (zdp->z_sa_hdl == NULL) { 1972 return (SET_ERROR(EIO)); 1973 } 1974 1975 if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) { 1976 error = zfs_fastaccesschk_execute(zdp, cr); 1977 if (!error) { 1978 *vpp = dvp; 1979 VN_HOLD(*vpp); 1980 return (0); 1981 } 1982 return (error); 1983 } else { 1984 vnode_t *tvp = dnlc_lookup(dvp, nm); 1985 1986 if (tvp) { 1987 error = zfs_fastaccesschk_execute(zdp, cr); 1988 if (error) { 1989 VN_RELE(tvp); 1990 return (error); 1991 } 1992 if (tvp == DNLC_NO_VNODE) { 1993 VN_RELE(tvp); 1994 return (ENOENT); 1995 } else { 1996 *vpp = tvp; 1997 return (specvp_check(vpp, cr)); 1998 } 1999 } 2000 } 2001 } 2002 2003 DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm); 2004 2005 ZFS_ENTER(zfsvfs); 2006 ZFS_VERIFY_ZP(zdp); 2007 2008 *vpp = NULL; 2009 2010 if (flags & LOOKUP_XATTR) { 2011 #ifdef TODO 2012 /* 2013 * If the xattr property is off, refuse the lookup request. 2014 */ 2015 if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) { 2016 ZFS_EXIT(zfsvfs); 2017 return (EINVAL); 2018 } 2019 #endif 2020 2021 /* 2022 * We don't allow recursive attributes.. 2023 * Maybe someday we will. 2024 */ 2025 if (zdp->z_pflags & ZFS_XATTR) { 2026 ZFS_EXIT(zfsvfs); 2027 return (EINVAL); 2028 } 2029 2030 if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) { 2031 ZFS_EXIT(zfsvfs); 2032 return (error); 2033 } 2034 2035 /* 2036 * Do we have permission to get into attribute directory? 2037 */ 2038 if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0, 2039 B_FALSE, cr)) { 2040 VN_RELE(*vpp); 2041 *vpp = NULL; 2042 } 2043 2044 ZFS_EXIT(zfsvfs); 2045 return (error); 2046 } 2047 2048 if (dvp->v_type != VDIR) { 2049 ZFS_EXIT(zfsvfs); 2050 return (ENOTDIR); 2051 } 2052 2053 /* 2054 * Check accessibility of directory. 2055 */ 2056 if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) { 2057 ZFS_EXIT(zfsvfs); 2058 return (error); 2059 } 2060 2061 if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), 2062 NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 2063 ZFS_EXIT(zfsvfs); 2064 return (EILSEQ); 2065 } 2066 2067 /* 2068 * First handle the special cases. 2069 */ 2070 if ((cnp->cn_flags & ISDOTDOT) != 0) { 2071 /* 2072 * If we are a snapshot mounted under .zfs, return 2073 * the vp for the snapshot directory. 2074 */ 2075 if (zdp->z_id == zfsvfs->z_root && zfsvfs->z_parent != zfsvfs) { 2076 ZFS_EXIT(zfsvfs); 2077 error = zfsctl_snapshot(zfsvfs->z_parent, vpp); 2078 2079 return (error); 2080 } 2081 } 2082 if (zfs_has_ctldir(zdp) && strcmp(nm, ZFS_CTLDIR_NAME) == 0) { 2083 ZFS_EXIT(zfsvfs); 2084 if ((cnp->cn_flags & ISLASTCN) != 0 && nameiop != LOOKUP) 2085 return (SET_ERROR(ENOTSUP)); 2086 error = zfsctl_root(zfsvfs, vpp); 2087 return (error); 2088 } 2089 2090 error = zfs_dirlook(zdp, nm, &zp); 2091 if (error == 0) { 2092 *vpp = ZTOV(zp); 2093 error = specvp_check(vpp, cr); 2094 } 2095 2096 ZFS_EXIT(zfsvfs); 2097 return (error); 2098 } 2099 #endif 2100 2101 /* 2102 * Attempt to create a new entry in a directory. If the entry 2103 * already exists, truncate the file if permissible, else return 2104 * an error. Return the vp of the created or trunc'd file. 2105 * 2106 * IN: dvp - vnode of directory to put new file entry in. 2107 * name - name of new file entry. 2108 * vap - attributes of new file. 2109 * excl - flag indicating exclusive or non-exclusive mode. 2110 * mode - mode to open file with. 2111 * cr - credentials of caller. 2112 * flag - large file flag [UNUSED]. 2113 * ct - caller context 2114 * vsecp - ACL to be set 2115 * 2116 * OUT: vpp - vnode of created or trunc'd entry. 2117 * 2118 * RETURN: 0 on success, error code on failure. 2119 * 2120 * Timestamps: 2121 * dvp - ctime|mtime updated if new entry created 2122 * vp - ctime|mtime always, atime if new 2123 */ 2124 2125 /* ARGSUSED */ 2126 static int 2127 zfs_create(vnode_t *dvp, char *name, vattr_t *vap, int excl, int mode, 2128 vnode_t **vpp, cred_t *cr, kthread_t *td) 2129 { 2130 znode_t *zp, *dzp = VTOZ(dvp); 2131 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2132 zilog_t *zilog; 2133 objset_t *os; 2134 dmu_tx_t *tx; 2135 int error; 2136 ksid_t *ksid; 2137 uid_t uid; 2138 gid_t gid = crgetgid(cr); 2139 zfs_acl_ids_t acl_ids; 2140 boolean_t fuid_dirtied; 2141 void *vsecp = NULL; 2142 int flag = 0; 2143 uint64_t txtype; 2144 2145 /* 2146 * If we have an ephemeral id, ACL, or XVATTR then 2147 * make sure file system is at proper version 2148 */ 2149 2150 ksid = crgetsid(cr, KSID_OWNER); 2151 if (ksid) 2152 uid = ksid_getid(ksid); 2153 else 2154 uid = crgetuid(cr); 2155 2156 if (zfsvfs->z_use_fuids == B_FALSE && 2157 (vsecp || (vap->va_mask & AT_XVATTR) || 2158 IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 2159 return (SET_ERROR(EINVAL)); 2160 2161 ZFS_ENTER(zfsvfs); 2162 ZFS_VERIFY_ZP(dzp); 2163 os = zfsvfs->z_os; 2164 zilog = zfsvfs->z_log; 2165 2166 if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 2167 NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 2168 ZFS_EXIT(zfsvfs); 2169 return (SET_ERROR(EILSEQ)); 2170 } 2171 2172 if (vap->va_mask & AT_XVATTR) { 2173 if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, 2174 crgetuid(cr), cr, vap->va_type)) != 0) { 2175 ZFS_EXIT(zfsvfs); 2176 return (error); 2177 } 2178 } 2179 2180 *vpp = NULL; 2181 2182 if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr)) 2183 vap->va_mode &= ~S_ISVTX; 2184 2185 error = zfs_dirent_lookup(dzp, name, &zp, ZNEW); 2186 if (error) { 2187 ZFS_EXIT(zfsvfs); 2188 return (error); 2189 } 2190 ASSERT3P(zp, ==, NULL); 2191 2192 /* 2193 * Create a new file object and update the directory 2194 * to reference it. 2195 */ 2196 if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 2197 goto out; 2198 } 2199 2200 /* 2201 * We only support the creation of regular files in 2202 * extended attribute directories. 2203 */ 2204 2205 if ((dzp->z_pflags & ZFS_XATTR) && 2206 (vap->va_type != VREG)) { 2207 error = SET_ERROR(EINVAL); 2208 goto out; 2209 } 2210 2211 if ((error = zfs_acl_ids_create(dzp, 0, vap, 2212 cr, vsecp, &acl_ids)) != 0) 2213 goto out; 2214 2215 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 2216 zfs_acl_ids_free(&acl_ids); 2217 error = SET_ERROR(EDQUOT); 2218 goto out; 2219 } 2220 2221 getnewvnode_reserve(1); 2222 2223 tx = dmu_tx_create(os); 2224 2225 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 2226 ZFS_SA_BASE_ATTR_SIZE); 2227 2228 fuid_dirtied = zfsvfs->z_fuid_dirty; 2229 if (fuid_dirtied) 2230 zfs_fuid_txhold(zfsvfs, tx); 2231 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 2232 dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 2233 if (!zfsvfs->z_use_sa && 2234 acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 2235 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 2236 0, acl_ids.z_aclp->z_acl_bytes); 2237 } 2238 error = dmu_tx_assign(tx, TXG_WAIT); 2239 if (error) { 2240 zfs_acl_ids_free(&acl_ids); 2241 dmu_tx_abort(tx); 2242 getnewvnode_drop_reserve(); 2243 ZFS_EXIT(zfsvfs); 2244 return (error); 2245 } 2246 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 2247 2248 if (fuid_dirtied) 2249 zfs_fuid_sync(zfsvfs, tx); 2250 2251 (void) zfs_link_create(dzp, name, zp, tx, ZNEW); 2252 txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); 2253 zfs_log_create(zilog, tx, txtype, dzp, zp, name, 2254 vsecp, acl_ids.z_fuidp, vap); 2255 zfs_acl_ids_free(&acl_ids); 2256 dmu_tx_commit(tx); 2257 2258 getnewvnode_drop_reserve(); 2259 2260 out: 2261 if (error == 0) { 2262 *vpp = ZTOV(zp); 2263 } 2264 2265 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2266 zil_commit(zilog, 0); 2267 2268 ZFS_EXIT(zfsvfs); 2269 return (error); 2270 } 2271 2272 /* 2273 * Remove an entry from a directory. 2274 * 2275 * IN: dvp - vnode of directory to remove entry from. 2276 * name - name of entry to remove. 2277 * cr - credentials of caller. 2278 * ct - caller context 2279 * flags - case flags 2280 * 2281 * RETURN: 0 on success, error code on failure. 2282 * 2283 * Timestamps: 2284 * dvp - ctime|mtime 2285 * vp - ctime (if nlink > 0) 2286 */ 2287 2288 /*ARGSUSED*/ 2289 static int 2290 zfs_remove(vnode_t *dvp, vnode_t *vp, char *name, cred_t *cr) 2291 { 2292 znode_t *dzp = VTOZ(dvp); 2293 znode_t *zp = VTOZ(vp); 2294 znode_t *xzp; 2295 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2296 zilog_t *zilog; 2297 uint64_t acl_obj, xattr_obj; 2298 uint64_t obj = 0; 2299 dmu_tx_t *tx; 2300 boolean_t unlinked, toobig = FALSE; 2301 uint64_t txtype; 2302 int error; 2303 2304 ZFS_ENTER(zfsvfs); 2305 ZFS_VERIFY_ZP(dzp); 2306 ZFS_VERIFY_ZP(zp); 2307 zilog = zfsvfs->z_log; 2308 zp = VTOZ(vp); 2309 2310 xattr_obj = 0; 2311 xzp = NULL; 2312 2313 if (error = zfs_zaccess_delete(dzp, zp, cr)) { 2314 goto out; 2315 } 2316 2317 /* 2318 * Need to use rmdir for removing directories. 2319 */ 2320 if (vp->v_type == VDIR) { 2321 error = SET_ERROR(EPERM); 2322 goto out; 2323 } 2324 2325 vnevent_remove(vp, dvp, name, ct); 2326 2327 obj = zp->z_id; 2328 2329 /* are there any extended attributes? */ 2330 error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 2331 &xattr_obj, sizeof (xattr_obj)); 2332 if (error == 0 && xattr_obj) { 2333 error = zfs_zget(zfsvfs, xattr_obj, &xzp); 2334 ASSERT0(error); 2335 } 2336 2337 /* 2338 * We may delete the znode now, or we may put it in the unlinked set; 2339 * it depends on whether we're the last link, and on whether there are 2340 * other holds on the vnode. So we dmu_tx_hold() the right things to 2341 * allow for either case. 2342 */ 2343 tx = dmu_tx_create(zfsvfs->z_os); 2344 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 2345 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 2346 zfs_sa_upgrade_txholds(tx, zp); 2347 zfs_sa_upgrade_txholds(tx, dzp); 2348 2349 if (xzp) { 2350 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 2351 dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); 2352 } 2353 2354 /* charge as an update -- would be nice not to charge at all */ 2355 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 2356 2357 /* 2358 * Mark this transaction as typically resulting in a net free of space 2359 */ 2360 dmu_tx_mark_netfree(tx); 2361 2362 error = dmu_tx_assign(tx, TXG_WAIT); 2363 if (error) { 2364 dmu_tx_abort(tx); 2365 ZFS_EXIT(zfsvfs); 2366 return (error); 2367 } 2368 2369 /* 2370 * Remove the directory entry. 2371 */ 2372 error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, &unlinked); 2373 2374 if (error) { 2375 dmu_tx_commit(tx); 2376 goto out; 2377 } 2378 2379 if (unlinked) { 2380 zfs_unlinked_add(zp, tx); 2381 vp->v_vflag |= VV_NOSYNC; 2382 } 2383 2384 txtype = TX_REMOVE; 2385 zfs_log_remove(zilog, tx, txtype, dzp, name, obj); 2386 2387 dmu_tx_commit(tx); 2388 out: 2389 2390 if (xzp) 2391 vrele(ZTOV(xzp)); 2392 2393 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2394 zil_commit(zilog, 0); 2395 2396 ZFS_EXIT(zfsvfs); 2397 return (error); 2398 } 2399 2400 /* 2401 * Create a new directory and insert it into dvp using the name 2402 * provided. Return a pointer to the inserted directory. 2403 * 2404 * IN: dvp - vnode of directory to add subdir to. 2405 * dirname - name of new directory. 2406 * vap - attributes of new directory. 2407 * cr - credentials of caller. 2408 * ct - caller context 2409 * flags - case flags 2410 * vsecp - ACL to be set 2411 * 2412 * OUT: vpp - vnode of created directory. 2413 * 2414 * RETURN: 0 on success, error code on failure. 2415 * 2416 * Timestamps: 2417 * dvp - ctime|mtime updated 2418 * vp - ctime|mtime|atime updated 2419 */ 2420 /*ARGSUSED*/ 2421 static int 2422 zfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr) 2423 { 2424 znode_t *zp, *dzp = VTOZ(dvp); 2425 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2426 zilog_t *zilog; 2427 uint64_t txtype; 2428 dmu_tx_t *tx; 2429 int error; 2430 ksid_t *ksid; 2431 uid_t uid; 2432 gid_t gid = crgetgid(cr); 2433 zfs_acl_ids_t acl_ids; 2434 boolean_t fuid_dirtied; 2435 2436 ASSERT(vap->va_type == VDIR); 2437 2438 /* 2439 * If we have an ephemeral id, ACL, or XVATTR then 2440 * make sure file system is at proper version 2441 */ 2442 2443 ksid = crgetsid(cr, KSID_OWNER); 2444 if (ksid) 2445 uid = ksid_getid(ksid); 2446 else 2447 uid = crgetuid(cr); 2448 if (zfsvfs->z_use_fuids == B_FALSE && 2449 ((vap->va_mask & AT_XVATTR) || 2450 IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 2451 return (SET_ERROR(EINVAL)); 2452 2453 ZFS_ENTER(zfsvfs); 2454 ZFS_VERIFY_ZP(dzp); 2455 zilog = zfsvfs->z_log; 2456 2457 if (dzp->z_pflags & ZFS_XATTR) { 2458 ZFS_EXIT(zfsvfs); 2459 return (SET_ERROR(EINVAL)); 2460 } 2461 2462 if (zfsvfs->z_utf8 && u8_validate(dirname, 2463 strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 2464 ZFS_EXIT(zfsvfs); 2465 return (SET_ERROR(EILSEQ)); 2466 } 2467 2468 if (vap->va_mask & AT_XVATTR) { 2469 if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap, 2470 crgetuid(cr), cr, vap->va_type)) != 0) { 2471 ZFS_EXIT(zfsvfs); 2472 return (error); 2473 } 2474 } 2475 2476 if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, 2477 NULL, &acl_ids)) != 0) { 2478 ZFS_EXIT(zfsvfs); 2479 return (error); 2480 } 2481 2482 /* 2483 * First make sure the new directory doesn't exist. 2484 * 2485 * Existence is checked first to make sure we don't return 2486 * EACCES instead of EEXIST which can cause some applications 2487 * to fail. 2488 */ 2489 *vpp = NULL; 2490 2491 if (error = zfs_dirent_lookup(dzp, dirname, &zp, ZNEW)) { 2492 zfs_acl_ids_free(&acl_ids); 2493 ZFS_EXIT(zfsvfs); 2494 return (error); 2495 } 2496 ASSERT3P(zp, ==, NULL); 2497 2498 if (error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr)) { 2499 zfs_acl_ids_free(&acl_ids); 2500 ZFS_EXIT(zfsvfs); 2501 return (error); 2502 } 2503 2504 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 2505 zfs_acl_ids_free(&acl_ids); 2506 ZFS_EXIT(zfsvfs); 2507 return (SET_ERROR(EDQUOT)); 2508 } 2509 2510 /* 2511 * Add a new entry to the directory. 2512 */ 2513 getnewvnode_reserve(1); 2514 tx = dmu_tx_create(zfsvfs->z_os); 2515 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); 2516 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 2517 fuid_dirtied = zfsvfs->z_fuid_dirty; 2518 if (fuid_dirtied) 2519 zfs_fuid_txhold(zfsvfs, tx); 2520 if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 2521 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 2522 acl_ids.z_aclp->z_acl_bytes); 2523 } 2524 2525 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 2526 ZFS_SA_BASE_ATTR_SIZE); 2527 2528 error = dmu_tx_assign(tx, TXG_WAIT); 2529 if (error) { 2530 zfs_acl_ids_free(&acl_ids); 2531 dmu_tx_abort(tx); 2532 getnewvnode_drop_reserve(); 2533 ZFS_EXIT(zfsvfs); 2534 return (error); 2535 } 2536 2537 /* 2538 * Create new node. 2539 */ 2540 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 2541 2542 if (fuid_dirtied) 2543 zfs_fuid_sync(zfsvfs, tx); 2544 2545 /* 2546 * Now put new name in parent dir. 2547 */ 2548 (void) zfs_link_create(dzp, dirname, zp, tx, ZNEW); 2549 2550 *vpp = ZTOV(zp); 2551 2552 txtype = zfs_log_create_txtype(Z_DIR, NULL, vap); 2553 zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, NULL, 2554 acl_ids.z_fuidp, vap); 2555 2556 zfs_acl_ids_free(&acl_ids); 2557 2558 dmu_tx_commit(tx); 2559 2560 getnewvnode_drop_reserve(); 2561 2562 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2563 zil_commit(zilog, 0); 2564 2565 ZFS_EXIT(zfsvfs); 2566 return (0); 2567 } 2568 2569 /* 2570 * Remove a directory subdir entry. If the current working 2571 * directory is the same as the subdir to be removed, the 2572 * remove will fail. 2573 * 2574 * IN: dvp - vnode of directory to remove from. 2575 * name - name of directory to be removed. 2576 * cwd - vnode of current working directory. 2577 * cr - credentials of caller. 2578 * ct - caller context 2579 * flags - case flags 2580 * 2581 * RETURN: 0 on success, error code on failure. 2582 * 2583 * Timestamps: 2584 * dvp - ctime|mtime updated 2585 */ 2586 /*ARGSUSED*/ 2587 static int 2588 zfs_rmdir(vnode_t *dvp, vnode_t *vp, char *name, cred_t *cr) 2589 { 2590 znode_t *dzp = VTOZ(dvp); 2591 znode_t *zp = VTOZ(vp); 2592 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2593 zilog_t *zilog; 2594 dmu_tx_t *tx; 2595 int error; 2596 2597 ZFS_ENTER(zfsvfs); 2598 ZFS_VERIFY_ZP(dzp); 2599 ZFS_VERIFY_ZP(zp); 2600 zilog = zfsvfs->z_log; 2601 2602 2603 if (error = zfs_zaccess_delete(dzp, zp, cr)) { 2604 goto out; 2605 } 2606 2607 if (vp->v_type != VDIR) { 2608 error = SET_ERROR(ENOTDIR); 2609 goto out; 2610 } 2611 2612 vnevent_rmdir(vp, dvp, name, ct); 2613 2614 tx = dmu_tx_create(zfsvfs->z_os); 2615 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 2616 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 2617 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 2618 zfs_sa_upgrade_txholds(tx, zp); 2619 zfs_sa_upgrade_txholds(tx, dzp); 2620 dmu_tx_mark_netfree(tx); 2621 error = dmu_tx_assign(tx, TXG_WAIT); 2622 if (error) { 2623 dmu_tx_abort(tx); 2624 ZFS_EXIT(zfsvfs); 2625 return (error); 2626 } 2627 2628 cache_purge(dvp); 2629 2630 error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, NULL); 2631 2632 if (error == 0) { 2633 uint64_t txtype = TX_RMDIR; 2634 zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT); 2635 } 2636 2637 dmu_tx_commit(tx); 2638 2639 cache_purge(vp); 2640 out: 2641 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2642 zil_commit(zilog, 0); 2643 2644 ZFS_EXIT(zfsvfs); 2645 return (error); 2646 } 2647 2648 /* 2649 * Read as many directory entries as will fit into the provided 2650 * buffer from the given directory cursor position (specified in 2651 * the uio structure). 2652 * 2653 * IN: vp - vnode of directory to read. 2654 * uio - structure supplying read location, range info, 2655 * and return buffer. 2656 * cr - credentials of caller. 2657 * ct - caller context 2658 * flags - case flags 2659 * 2660 * OUT: uio - updated offset and range, buffer filled. 2661 * eofp - set to true if end-of-file detected. 2662 * 2663 * RETURN: 0 on success, error code on failure. 2664 * 2665 * Timestamps: 2666 * vp - atime updated 2667 * 2668 * Note that the low 4 bits of the cookie returned by zap is always zero. 2669 * This allows us to use the low range for "special" directory entries: 2670 * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem, 2671 * we use the offset 2 for the '.zfs' directory. 2672 */ 2673 /* ARGSUSED */ 2674 static int 2675 zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, int *ncookies, off_t **cookies) 2676 { 2677 znode_t *zp = VTOZ(vp); 2678 iovec_t *iovp; 2679 edirent_t *eodp; 2680 dirent64_t *odp; 2681 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2682 objset_t *os; 2683 caddr_t outbuf; 2684 size_t bufsize; 2685 zap_cursor_t zc; 2686 zap_attribute_t zap; 2687 uint_t bytes_wanted; 2688 uint64_t offset; /* must be unsigned; checks for < 1 */ 2689 uint64_t parent; 2690 int local_eof; 2691 int outcount; 2692 int error; 2693 uint8_t prefetch; 2694 boolean_t check_sysattrs; 2695 uint8_t type; 2696 int ncooks = 0; 2697 off_t *cooks = NULL; 2698 int flags = 0; 2699 #ifdef __FreeBSD__ 2700 boolean_t user = uio->uio_segflg != UIO_SYSSPACE; 2701 #endif 2702 #ifdef __NetBSD__ 2703 boolean_t user = !VMSPACE_IS_KERNEL_P(uio->uio_vmspace); 2704 #endif 2705 2706 ZFS_ENTER(zfsvfs); 2707 ZFS_VERIFY_ZP(zp); 2708 2709 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 2710 &parent, sizeof (parent))) != 0) { 2711 ZFS_EXIT(zfsvfs); 2712 return (error); 2713 } 2714 2715 /* 2716 * If we are not given an eof variable, 2717 * use a local one. 2718 */ 2719 if (eofp == NULL) 2720 eofp = &local_eof; 2721 2722 /* 2723 * Check for valid iov_len. 2724 */ 2725 if (uio->uio_iov->iov_len <= 0) { 2726 ZFS_EXIT(zfsvfs); 2727 return (SET_ERROR(EINVAL)); 2728 } 2729 2730 /* 2731 * Quit if directory has been removed (posix) 2732 */ 2733 if ((*eofp = zp->z_unlinked) != 0) { 2734 ZFS_EXIT(zfsvfs); 2735 return (0); 2736 } 2737 2738 error = 0; 2739 os = zfsvfs->z_os; 2740 offset = uio->uio_loffset; 2741 prefetch = zp->z_zn_prefetch; 2742 2743 /* 2744 * Initialize the iterator cursor. 2745 */ 2746 if (offset <= 3) { 2747 /* 2748 * Start iteration from the beginning of the directory. 2749 */ 2750 zap_cursor_init(&zc, os, zp->z_id); 2751 } else { 2752 /* 2753 * The offset is a serialized cursor. 2754 */ 2755 zap_cursor_init_serialized(&zc, os, zp->z_id, offset); 2756 } 2757 2758 /* 2759 * Get space to change directory entries into fs independent format. 2760 */ 2761 iovp = uio->uio_iov; 2762 bytes_wanted = iovp->iov_len; 2763 if (user || uio->uio_iovcnt != 1) { 2764 bufsize = bytes_wanted; 2765 outbuf = kmem_alloc(bufsize, KM_SLEEP); 2766 odp = (struct dirent64 *)outbuf; 2767 } else { 2768 bufsize = bytes_wanted; 2769 outbuf = NULL; 2770 odp = (struct dirent64 *)iovp->iov_base; 2771 } 2772 eodp = (struct edirent *)odp; 2773 2774 if (ncookies != NULL) { 2775 /* 2776 * Minimum entry size is dirent size and 1 byte for a file name. 2777 */ 2778 #ifdef __FreeBSD__ 2779 ncooks = uio->uio_resid / (sizeof(struct dirent) - sizeof(((struct dirent *)NULL)->d_name) + 1); 2780 cooks = malloc(ncooks * sizeof(u_long), M_TEMP, M_WAITOK); 2781 #endif 2782 #ifdef __NetBSD__ 2783 ncooks = uio->uio_resid / _DIRENT_MINSIZE(odp); 2784 cooks = malloc(ncooks * sizeof(off_t), M_TEMP, M_WAITOK); 2785 #endif 2786 *cookies = cooks; 2787 *ncookies = ncooks; 2788 } 2789 2790 /* 2791 * If this VFS supports the system attribute view interface; and 2792 * we're looking at an extended attribute directory; and we care 2793 * about normalization conflicts on this vfs; then we must check 2794 * for normalization conflicts with the sysattr name space. 2795 */ 2796 #ifdef TODO 2797 check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 2798 (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm && 2799 (flags & V_RDDIR_ENTFLAGS); 2800 #else 2801 check_sysattrs = 0; 2802 #endif 2803 2804 /* 2805 * Transform to file-system independent format 2806 */ 2807 outcount = 0; 2808 while (outcount < bytes_wanted) { 2809 ino64_t objnum; 2810 ushort_t reclen; 2811 off64_t *next = NULL; 2812 2813 /* 2814 * Special case `.', `..', and `.zfs'. 2815 */ 2816 if (offset == 0) { 2817 (void) strcpy(zap.za_name, "."); 2818 zap.za_normalization_conflict = 0; 2819 objnum = zp->z_id; 2820 type = DT_DIR; 2821 } else if (offset == 1) { 2822 (void) strcpy(zap.za_name, ".."); 2823 zap.za_normalization_conflict = 0; 2824 objnum = parent; 2825 type = DT_DIR; 2826 } else if (offset == 2 && zfs_show_ctldir(zp)) { 2827 (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); 2828 zap.za_normalization_conflict = 0; 2829 objnum = ZFSCTL_INO_ROOT; 2830 type = DT_DIR; 2831 } else { 2832 /* 2833 * Grab next entry. 2834 */ 2835 if (error = zap_cursor_retrieve(&zc, &zap)) { 2836 if ((*eofp = (error == ENOENT)) != 0) 2837 break; 2838 else 2839 goto update; 2840 } 2841 2842 if (zap.za_integer_length != 8 || 2843 zap.za_num_integers != 1) { 2844 cmn_err(CE_WARN, "zap_readdir: bad directory " 2845 "entry, obj = %lld, offset = %lld\n", 2846 (u_longlong_t)zp->z_id, 2847 (u_longlong_t)offset); 2848 error = SET_ERROR(ENXIO); 2849 goto update; 2850 } 2851 2852 objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); 2853 /* 2854 * MacOS X can extract the object type here such as: 2855 * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2856 */ 2857 type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2858 2859 if (check_sysattrs && !zap.za_normalization_conflict) { 2860 #ifdef TODO 2861 zap.za_normalization_conflict = 2862 xattr_sysattr_casechk(zap.za_name); 2863 #else 2864 panic("%s:%u: TODO", __func__, __LINE__); 2865 #endif 2866 } 2867 } 2868 2869 if (flags & V_RDDIR_ACCFILTER) { 2870 /* 2871 * If we have no access at all, don't include 2872 * this entry in the returned information 2873 */ 2874 znode_t *ezp; 2875 if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0) 2876 goto skip_entry; 2877 if (!zfs_has_access(ezp, cr)) { 2878 vrele(ZTOV(ezp)); 2879 goto skip_entry; 2880 } 2881 vrele(ZTOV(ezp)); 2882 } 2883 2884 if (flags & V_RDDIR_ENTFLAGS) 2885 reclen = EDIRENT_RECLEN(strlen(zap.za_name)); 2886 else 2887 reclen = DIRENT64_RECLEN(strlen(zap.za_name)); 2888 2889 /* 2890 * Will this entry fit in the buffer? 2891 */ 2892 if (outcount + reclen > bufsize) { 2893 /* 2894 * Did we manage to fit anything in the buffer? 2895 */ 2896 if (!outcount) { 2897 error = SET_ERROR(EINVAL); 2898 goto update; 2899 } 2900 break; 2901 } 2902 if (flags & V_RDDIR_ENTFLAGS) { 2903 /* 2904 * Add extended flag entry: 2905 */ 2906 eodp->ed_ino = objnum; 2907 eodp->ed_reclen = reclen; 2908 /* NOTE: ed_off is the offset for the *next* entry */ 2909 next = &(eodp->ed_off); 2910 eodp->ed_eflags = zap.za_normalization_conflict ? 2911 ED_CASE_CONFLICT : 0; 2912 (void) strncpy(eodp->ed_name, zap.za_name, 2913 EDIRENT_NAMELEN(reclen)); 2914 eodp = (edirent_t *)((intptr_t)eodp + reclen); 2915 } else { 2916 /* 2917 * Add normal entry: 2918 */ 2919 odp->d_ino = objnum; 2920 odp->d_reclen = reclen; 2921 odp->d_namlen = strlen(zap.za_name); 2922 (void) strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1); 2923 odp->d_type = type; 2924 odp = (dirent64_t *)((intptr_t)odp + reclen); 2925 } 2926 outcount += reclen; 2927 2928 ASSERT(outcount <= bufsize); 2929 2930 /* Prefetch znode */ 2931 if (prefetch) 2932 dmu_prefetch(os, objnum, 0, 0, 0, 2933 ZIO_PRIORITY_SYNC_READ); 2934 2935 skip_entry: 2936 /* 2937 * Move to the next entry, fill in the previous offset. 2938 */ 2939 if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) { 2940 zap_cursor_advance(&zc); 2941 offset = zap_cursor_serialize(&zc); 2942 } else { 2943 offset += 1; 2944 } 2945 2946 if (cooks != NULL) { 2947 *cooks++ = offset; 2948 ncooks--; 2949 #ifdef __FreeBSD__ 2950 KASSERT(ncooks >= 0, ("ncookies=%d", ncooks)); 2951 #endif 2952 #ifdef __NetBSD__ 2953 KASSERTMSG(ncooks >= 0, "ncooks=%d", ncooks); 2954 #endif 2955 } 2956 } 2957 zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */ 2958 2959 /* Subtract unused cookies */ 2960 if (ncookies != NULL) 2961 *ncookies -= ncooks; 2962 2963 if (!user && uio->uio_iovcnt == 1) { 2964 iovp->iov_base += outcount; 2965 iovp->iov_len -= outcount; 2966 uio->uio_resid -= outcount; 2967 } else if (error = uiomove(outbuf, (size_t)outcount, UIO_READ, uio)) { 2968 /* 2969 * Reset the pointer. 2970 */ 2971 offset = uio->uio_loffset; 2972 } 2973 2974 update: 2975 zap_cursor_fini(&zc); 2976 if (user || uio->uio_iovcnt != 1) 2977 kmem_free(outbuf, bufsize); 2978 2979 if (error == ENOENT) 2980 error = 0; 2981 2982 ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 2983 2984 uio->uio_loffset = offset; 2985 ZFS_EXIT(zfsvfs); 2986 if (error != 0 && cookies != NULL) { 2987 #ifdef __FreeBSD__ 2988 free(*cookies, M_TEMP); 2989 #endif 2990 #ifdef __NetBSD__ 2991 kmem_free(*cookies, ncooks * sizeof(off_t)); 2992 #endif 2993 *cookies = NULL; 2994 *ncookies = 0; 2995 } 2996 return (error); 2997 } 2998 2999 ulong_t zfs_fsync_sync_cnt = 4; 3000 3001 static int 3002 zfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) 3003 { 3004 znode_t *zp = VTOZ(vp); 3005 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3006 3007 (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt); 3008 3009 if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) { 3010 ZFS_ENTER(zfsvfs); 3011 ZFS_VERIFY_ZP(zp); 3012 3013 #ifdef __NetBSD__ 3014 if (!zp->z_unlinked) 3015 #endif 3016 zil_commit(zfsvfs->z_log, zp->z_id); 3017 ZFS_EXIT(zfsvfs); 3018 } 3019 return (0); 3020 } 3021 3022 3023 /* 3024 * Get the requested file attributes and place them in the provided 3025 * vattr structure. 3026 * 3027 * IN: vp - vnode of file. 3028 * vap - va_mask identifies requested attributes. 3029 * If AT_XVATTR set, then optional attrs are requested 3030 * flags - ATTR_NOACLCHECK (CIFS server context) 3031 * cr - credentials of caller. 3032 * ct - caller context 3033 * 3034 * OUT: vap - attribute values. 3035 * 3036 * RETURN: 0 (always succeeds). 3037 */ 3038 /* ARGSUSED */ 3039 static int 3040 zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 3041 caller_context_t *ct) 3042 { 3043 znode_t *zp = VTOZ(vp); 3044 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3045 int error = 0; 3046 uint32_t blksize; 3047 u_longlong_t nblocks; 3048 uint64_t links; 3049 uint64_t mtime[2], ctime[2], crtime[2], rdev; 3050 xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 3051 xoptattr_t *xoap = NULL; 3052 boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 3053 sa_bulk_attr_t bulk[4]; 3054 int count = 0; 3055 3056 ZFS_ENTER(zfsvfs); 3057 ZFS_VERIFY_ZP(zp); 3058 3059 zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid); 3060 3061 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 3062 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 3063 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16); 3064 if (vp->v_type == VBLK || vp->v_type == VCHR) 3065 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL, 3066 &rdev, 8); 3067 3068 if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) { 3069 ZFS_EXIT(zfsvfs); 3070 return (error); 3071 } 3072 3073 /* 3074 * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES. 3075 * Also, if we are the owner don't bother, since owner should 3076 * always be allowed to read basic attributes of file. 3077 */ 3078 if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) && 3079 (vap->va_uid != crgetuid(cr))) { 3080 if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0, 3081 skipaclchk, cr)) { 3082 ZFS_EXIT(zfsvfs); 3083 return (error); 3084 } 3085 } 3086 3087 /* 3088 * Return all attributes. It's cheaper to provide the answer 3089 * than to determine whether we were asked the question. 3090 */ 3091 3092 vap->va_type = IFTOVT(zp->z_mode); 3093 vap->va_mode = zp->z_mode & ~S_IFMT; 3094 #ifdef illumos 3095 vap->va_fsid = zp->z_zfsvfs->z_vfs->vfs_dev; 3096 #endif 3097 #ifdef __FreeBSD__ 3098 vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; 3099 vap->va_nodeid = zp->z_id; 3100 #endif 3101 #ifdef __NetBSD__ 3102 vap->va_fsid = vp->v_mount->mnt_stat.f_fsid; 3103 vap->va_nodeid = zp->z_id; 3104 /* 3105 * If we are a snapshot mounted under .zfs, return 3106 * the object id of the snapshot to make getcwd happy. 3107 */ 3108 if (zp->z_id == zfsvfs->z_root && zfsvfs->z_parent != zfsvfs) { 3109 vnode_t *cvp = vp->v_mount->mnt_vnodecovered; 3110 3111 if (cvp && zfsctl_is_node(cvp)) 3112 vap->va_nodeid = dmu_objset_id(zfsvfs->z_os); 3113 } 3114 #endif 3115 if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp)) 3116 links = zp->z_links + 1; 3117 else 3118 links = zp->z_links; 3119 /* XXX NetBSD: use LINK_MAX when that value matches 32-bit nlink_t */ 3120 vap->va_nlink = MIN(links, UINT32_MAX); /* nlink_t limit! */ 3121 vap->va_size = zp->z_size; 3122 #ifdef illumos 3123 vap->va_rdev = vp->v_rdev; 3124 #else 3125 if (vp->v_type == VBLK || vp->v_type == VCHR) 3126 vap->va_rdev = zfs_cmpldev(rdev); 3127 #endif 3128 vap->va_seq = zp->z_seq; 3129 vap->va_flags = 0; /* FreeBSD: Reset chflags(2) flags. */ 3130 vap->va_filerev = zp->z_seq; 3131 3132 /* 3133 * Add in any requested optional attributes and the create time. 3134 * Also set the corresponding bits in the returned attribute bitmap. 3135 */ 3136 if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) { 3137 if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 3138 xoap->xoa_archive = 3139 ((zp->z_pflags & ZFS_ARCHIVE) != 0); 3140 XVA_SET_RTN(xvap, XAT_ARCHIVE); 3141 } 3142 3143 if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 3144 xoap->xoa_readonly = 3145 ((zp->z_pflags & ZFS_READONLY) != 0); 3146 XVA_SET_RTN(xvap, XAT_READONLY); 3147 } 3148 3149 if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 3150 xoap->xoa_system = 3151 ((zp->z_pflags & ZFS_SYSTEM) != 0); 3152 XVA_SET_RTN(xvap, XAT_SYSTEM); 3153 } 3154 3155 if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 3156 xoap->xoa_hidden = 3157 ((zp->z_pflags & ZFS_HIDDEN) != 0); 3158 XVA_SET_RTN(xvap, XAT_HIDDEN); 3159 } 3160 3161 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 3162 xoap->xoa_nounlink = 3163 ((zp->z_pflags & ZFS_NOUNLINK) != 0); 3164 XVA_SET_RTN(xvap, XAT_NOUNLINK); 3165 } 3166 3167 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 3168 xoap->xoa_immutable = 3169 ((zp->z_pflags & ZFS_IMMUTABLE) != 0); 3170 XVA_SET_RTN(xvap, XAT_IMMUTABLE); 3171 } 3172 3173 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 3174 xoap->xoa_appendonly = 3175 ((zp->z_pflags & ZFS_APPENDONLY) != 0); 3176 XVA_SET_RTN(xvap, XAT_APPENDONLY); 3177 } 3178 3179 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 3180 xoap->xoa_nodump = 3181 ((zp->z_pflags & ZFS_NODUMP) != 0); 3182 XVA_SET_RTN(xvap, XAT_NODUMP); 3183 } 3184 3185 if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 3186 xoap->xoa_opaque = 3187 ((zp->z_pflags & ZFS_OPAQUE) != 0); 3188 XVA_SET_RTN(xvap, XAT_OPAQUE); 3189 } 3190 3191 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 3192 xoap->xoa_av_quarantined = 3193 ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0); 3194 XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 3195 } 3196 3197 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 3198 xoap->xoa_av_modified = 3199 ((zp->z_pflags & ZFS_AV_MODIFIED) != 0); 3200 XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 3201 } 3202 3203 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) && 3204 vp->v_type == VREG) { 3205 zfs_sa_get_scanstamp(zp, xvap); 3206 } 3207 3208 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 3209 xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0); 3210 XVA_SET_RTN(xvap, XAT_REPARSE); 3211 } 3212 if (XVA_ISSET_REQ(xvap, XAT_GEN)) { 3213 xoap->xoa_generation = zp->z_gen; 3214 XVA_SET_RTN(xvap, XAT_GEN); 3215 } 3216 3217 if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { 3218 xoap->xoa_offline = 3219 ((zp->z_pflags & ZFS_OFFLINE) != 0); 3220 XVA_SET_RTN(xvap, XAT_OFFLINE); 3221 } 3222 3223 if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { 3224 xoap->xoa_sparse = 3225 ((zp->z_pflags & ZFS_SPARSE) != 0); 3226 XVA_SET_RTN(xvap, XAT_SPARSE); 3227 } 3228 } 3229 3230 ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime); 3231 ZFS_TIME_DECODE(&vap->va_mtime, mtime); 3232 ZFS_TIME_DECODE(&vap->va_ctime, ctime); 3233 ZFS_TIME_DECODE(&vap->va_birthtime, crtime); 3234 3235 3236 sa_object_size(zp->z_sa_hdl, &blksize, &nblocks); 3237 vap->va_blksize = blksize; 3238 vap->va_bytes = nblocks << 9; /* nblocks * 512 */ 3239 3240 if (zp->z_blksz == 0) { 3241 /* 3242 * Block size hasn't been set; suggest maximal I/O transfers. 3243 */ 3244 vap->va_blksize = zfsvfs->z_max_blksz; 3245 } 3246 3247 ZFS_EXIT(zfsvfs); 3248 return (0); 3249 } 3250 3251 /* 3252 * Set the file attributes to the values contained in the 3253 * vattr structure. 3254 * 3255 * IN: vp - vnode of file to be modified. 3256 * vap - new attribute values. 3257 * If AT_XVATTR set, then optional attrs are being set 3258 * flags - ATTR_UTIME set if non-default time values provided. 3259 * - ATTR_NOACLCHECK (CIFS context only). 3260 * cr - credentials of caller. 3261 * ct - caller context 3262 * 3263 * RETURN: 0 on success, error code on failure. 3264 * 3265 * Timestamps: 3266 * vp - ctime updated, mtime updated if size changed. 3267 */ 3268 /* ARGSUSED */ 3269 static int 3270 zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 3271 caller_context_t *ct) 3272 { 3273 znode_t *zp = VTOZ(vp); 3274 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3275 zilog_t *zilog; 3276 dmu_tx_t *tx; 3277 vattr_t oldva; 3278 xvattr_t tmpxvattr; 3279 uint_t mask = vap->va_mask; 3280 uint_t saved_mask = 0; 3281 uint64_t saved_mode; 3282 int trim_mask = 0; 3283 uint64_t new_mode; 3284 uint64_t new_uid, new_gid; 3285 uint64_t xattr_obj; 3286 uint64_t mtime[2], ctime[2]; 3287 znode_t *attrzp; 3288 int need_policy = FALSE; 3289 int err, err2; 3290 zfs_fuid_info_t *fuidp = NULL; 3291 xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 3292 xoptattr_t *xoap; 3293 zfs_acl_t *aclp; 3294 boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 3295 boolean_t fuid_dirtied = B_FALSE; 3296 sa_bulk_attr_t bulk[7], xattr_bulk[7]; 3297 int count = 0, xattr_count = 0; 3298 3299 if (mask == 0) 3300 return (0); 3301 3302 if (mask & AT_NOSET) 3303 return (SET_ERROR(EINVAL)); 3304 3305 ZFS_ENTER(zfsvfs); 3306 ZFS_VERIFY_ZP(zp); 3307 3308 zilog = zfsvfs->z_log; 3309 3310 /* 3311 * Make sure that if we have ephemeral uid/gid or xvattr specified 3312 * that file system is at proper version level 3313 */ 3314 3315 if (zfsvfs->z_use_fuids == B_FALSE && 3316 (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) || 3317 ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) || 3318 (mask & AT_XVATTR))) { 3319 ZFS_EXIT(zfsvfs); 3320 return (SET_ERROR(EINVAL)); 3321 } 3322 3323 if (mask & AT_SIZE && vp->v_type == VDIR) { 3324 ZFS_EXIT(zfsvfs); 3325 return (SET_ERROR(EISDIR)); 3326 } 3327 3328 if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) { 3329 ZFS_EXIT(zfsvfs); 3330 return (SET_ERROR(EINVAL)); 3331 } 3332 3333 /* 3334 * If this is an xvattr_t, then get a pointer to the structure of 3335 * optional attributes. If this is NULL, then we have a vattr_t. 3336 */ 3337 xoap = xva_getxoptattr(xvap); 3338 3339 xva_init(&tmpxvattr); 3340 3341 /* 3342 * Immutable files can only alter immutable bit and atime 3343 */ 3344 if ((zp->z_pflags & ZFS_IMMUTABLE) && 3345 ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) || 3346 ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) { 3347 ZFS_EXIT(zfsvfs); 3348 return (SET_ERROR(EPERM)); 3349 } 3350 3351 if ((mask & AT_SIZE) && (zp->z_pflags & ZFS_READONLY)) { 3352 ZFS_EXIT(zfsvfs); 3353 return (SET_ERROR(EPERM)); 3354 } 3355 3356 /* 3357 * Verify timestamps doesn't overflow 32 bits. 3358 * ZFS can handle large timestamps, but 32bit syscalls can't 3359 * handle times greater than 2039. This check should be removed 3360 * once large timestamps are fully supported. 3361 */ 3362 if (mask & (AT_ATIME | AT_MTIME)) { 3363 if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) || 3364 ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) { 3365 ZFS_EXIT(zfsvfs); 3366 return (SET_ERROR(EOVERFLOW)); 3367 } 3368 } 3369 if (xoap && (mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME) && 3370 TIMESPEC_OVERFLOW(&vap->va_birthtime)) { 3371 ZFS_EXIT(zfsvfs); 3372 return (SET_ERROR(EOVERFLOW)); 3373 } 3374 3375 attrzp = NULL; 3376 aclp = NULL; 3377 3378 /* Can this be moved to before the top label? */ 3379 if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 3380 ZFS_EXIT(zfsvfs); 3381 return (SET_ERROR(EROFS)); 3382 } 3383 3384 /* 3385 * First validate permissions 3386 */ 3387 3388 if (mask & AT_SIZE) { 3389 /* 3390 * XXX - Note, we are not providing any open 3391 * mode flags here (like FNDELAY), so we may 3392 * block if there are locks present... this 3393 * should be addressed in openat(). 3394 */ 3395 /* XXX - would it be OK to generate a log record here? */ 3396 err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); 3397 if (err) { 3398 ZFS_EXIT(zfsvfs); 3399 return (err); 3400 } 3401 } 3402 3403 if (mask & (AT_ATIME|AT_MTIME) || 3404 ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) || 3405 XVA_ISSET_REQ(xvap, XAT_READONLY) || 3406 XVA_ISSET_REQ(xvap, XAT_ARCHIVE) || 3407 XVA_ISSET_REQ(xvap, XAT_OFFLINE) || 3408 XVA_ISSET_REQ(xvap, XAT_SPARSE) || 3409 XVA_ISSET_REQ(xvap, XAT_CREATETIME) || 3410 XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) { 3411 need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0, 3412 skipaclchk, cr); 3413 } 3414 3415 if (mask & (AT_UID|AT_GID)) { 3416 int idmask = (mask & (AT_UID|AT_GID)); 3417 int take_owner; 3418 int take_group; 3419 3420 /* 3421 * NOTE: even if a new mode is being set, 3422 * we may clear S_ISUID/S_ISGID bits. 3423 */ 3424 3425 if (!(mask & AT_MODE)) 3426 vap->va_mode = zp->z_mode; 3427 3428 /* 3429 * Take ownership or chgrp to group we are a member of 3430 */ 3431 3432 take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr)); 3433 take_group = (mask & AT_GID) && 3434 zfs_groupmember(zfsvfs, vap->va_gid, cr); 3435 3436 /* 3437 * If both AT_UID and AT_GID are set then take_owner and 3438 * take_group must both be set in order to allow taking 3439 * ownership. 3440 * 3441 * Otherwise, send the check through secpolicy_vnode_setattr() 3442 * 3443 */ 3444 3445 if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) || 3446 ((idmask == AT_UID) && take_owner) || 3447 ((idmask == AT_GID) && take_group)) { 3448 if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0, 3449 skipaclchk, cr) == 0) { 3450 /* 3451 * Remove setuid/setgid for non-privileged users 3452 */ 3453 secpolicy_setid_clear(vap, vp, cr); 3454 trim_mask = (mask & (AT_UID|AT_GID)); 3455 } else { 3456 need_policy = TRUE; 3457 } 3458 } else { 3459 need_policy = TRUE; 3460 } 3461 } 3462 3463 oldva.va_mode = zp->z_mode; 3464 zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid); 3465 if (mask & AT_XVATTR) { 3466 /* 3467 * Update xvattr mask to include only those attributes 3468 * that are actually changing. 3469 * 3470 * the bits will be restored prior to actually setting 3471 * the attributes so the caller thinks they were set. 3472 */ 3473 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 3474 if (xoap->xoa_appendonly != 3475 ((zp->z_pflags & ZFS_APPENDONLY) != 0)) { 3476 need_policy = TRUE; 3477 } else { 3478 XVA_CLR_REQ(xvap, XAT_APPENDONLY); 3479 XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY); 3480 } 3481 } 3482 3483 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 3484 if (xoap->xoa_nounlink != 3485 ((zp->z_pflags & ZFS_NOUNLINK) != 0)) { 3486 need_policy = TRUE; 3487 } else { 3488 XVA_CLR_REQ(xvap, XAT_NOUNLINK); 3489 XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK); 3490 } 3491 } 3492 3493 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 3494 if (xoap->xoa_immutable != 3495 ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) { 3496 need_policy = TRUE; 3497 } else { 3498 XVA_CLR_REQ(xvap, XAT_IMMUTABLE); 3499 XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE); 3500 } 3501 } 3502 3503 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 3504 if (xoap->xoa_nodump != 3505 ((zp->z_pflags & ZFS_NODUMP) != 0)) { 3506 need_policy = TRUE; 3507 } else { 3508 XVA_CLR_REQ(xvap, XAT_NODUMP); 3509 XVA_SET_REQ(&tmpxvattr, XAT_NODUMP); 3510 } 3511 } 3512 3513 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 3514 if (xoap->xoa_av_modified != 3515 ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) { 3516 need_policy = TRUE; 3517 } else { 3518 XVA_CLR_REQ(xvap, XAT_AV_MODIFIED); 3519 XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED); 3520 } 3521 } 3522 3523 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 3524 if ((vp->v_type != VREG && 3525 xoap->xoa_av_quarantined) || 3526 xoap->xoa_av_quarantined != 3527 ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) { 3528 need_policy = TRUE; 3529 } else { 3530 XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED); 3531 XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED); 3532 } 3533 } 3534 3535 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 3536 ZFS_EXIT(zfsvfs); 3537 return (SET_ERROR(EPERM)); 3538 } 3539 3540 if (need_policy == FALSE && 3541 (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) || 3542 XVA_ISSET_REQ(xvap, XAT_OPAQUE))) { 3543 need_policy = TRUE; 3544 } 3545 } 3546 3547 if (mask & AT_MODE) { 3548 if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) { 3549 err = secpolicy_setid_setsticky_clear(vp, vap, 3550 &oldva, cr); 3551 if (err) { 3552 ZFS_EXIT(zfsvfs); 3553 return (err); 3554 } 3555 trim_mask |= AT_MODE; 3556 } else { 3557 need_policy = TRUE; 3558 } 3559 } 3560 3561 if (need_policy) { 3562 /* 3563 * If trim_mask is set then take ownership 3564 * has been granted or write_acl is present and user 3565 * has the ability to modify mode. In that case remove 3566 * UID|GID and or MODE from mask so that 3567 * secpolicy_vnode_setattr() doesn't revoke it. 3568 */ 3569 3570 if (trim_mask) { 3571 saved_mask = vap->va_mask; 3572 vap->va_mask &= ~trim_mask; 3573 if (trim_mask & AT_MODE) { 3574 /* 3575 * Save the mode, as secpolicy_vnode_setattr() 3576 * will overwrite it with ova.va_mode. 3577 */ 3578 saved_mode = vap->va_mode; 3579 } 3580 } 3581 err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags, 3582 (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp); 3583 if (err) { 3584 ZFS_EXIT(zfsvfs); 3585 return (err); 3586 } 3587 3588 if (trim_mask) { 3589 vap->va_mask |= saved_mask; 3590 if (trim_mask & AT_MODE) { 3591 /* 3592 * Recover the mode after 3593 * secpolicy_vnode_setattr(). 3594 */ 3595 vap->va_mode = saved_mode; 3596 } 3597 } 3598 } 3599 3600 /* 3601 * secpolicy_vnode_setattr, or take ownership may have 3602 * changed va_mask 3603 */ 3604 mask = vap->va_mask; 3605 3606 if ((mask & (AT_UID | AT_GID))) { 3607 err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 3608 &xattr_obj, sizeof (xattr_obj)); 3609 3610 if (err == 0 && xattr_obj) { 3611 err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp); 3612 if (err == 0) { 3613 err = vn_lock(ZTOV(attrzp), LK_EXCLUSIVE); 3614 if (err != 0) 3615 vrele(ZTOV(attrzp)); 3616 } 3617 if (err) 3618 goto out2; 3619 } 3620 if (mask & AT_UID) { 3621 new_uid = zfs_fuid_create(zfsvfs, 3622 (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp); 3623 if (new_uid != zp->z_uid && 3624 zfs_fuid_overquota(zfsvfs, B_FALSE, new_uid)) { 3625 if (attrzp) 3626 vput(ZTOV(attrzp)); 3627 err = SET_ERROR(EDQUOT); 3628 goto out2; 3629 } 3630 } 3631 3632 if (mask & AT_GID) { 3633 new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid, 3634 cr, ZFS_GROUP, &fuidp); 3635 if (new_gid != zp->z_gid && 3636 zfs_fuid_overquota(zfsvfs, B_TRUE, new_gid)) { 3637 if (attrzp) 3638 vput(ZTOV(attrzp)); 3639 err = SET_ERROR(EDQUOT); 3640 goto out2; 3641 } 3642 } 3643 } 3644 tx = dmu_tx_create(zfsvfs->z_os); 3645 3646 if (mask & AT_MODE) { 3647 uint64_t pmode = zp->z_mode; 3648 uint64_t acl_obj; 3649 new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT); 3650 3651 if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED && 3652 !(zp->z_pflags & ZFS_ACL_TRIVIAL)) { 3653 err = SET_ERROR(EPERM); 3654 goto out; 3655 } 3656 3657 if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)) 3658 goto out; 3659 3660 if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) { 3661 /* 3662 * Are we upgrading ACL from old V0 format 3663 * to V1 format? 3664 */ 3665 if (zfsvfs->z_version >= ZPL_VERSION_FUID && 3666 zfs_znode_acl_version(zp) == 3667 ZFS_ACL_VERSION_INITIAL) { 3668 dmu_tx_hold_free(tx, acl_obj, 0, 3669 DMU_OBJECT_END); 3670 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 3671 0, aclp->z_acl_bytes); 3672 } else { 3673 dmu_tx_hold_write(tx, acl_obj, 0, 3674 aclp->z_acl_bytes); 3675 } 3676 } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) { 3677 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 3678 0, aclp->z_acl_bytes); 3679 } 3680 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 3681 } else { 3682 if ((mask & AT_XVATTR) && 3683 XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 3684 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 3685 else 3686 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 3687 } 3688 3689 if (attrzp) { 3690 dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE); 3691 } 3692 3693 fuid_dirtied = zfsvfs->z_fuid_dirty; 3694 if (fuid_dirtied) 3695 zfs_fuid_txhold(zfsvfs, tx); 3696 3697 zfs_sa_upgrade_txholds(tx, zp); 3698 3699 err = dmu_tx_assign(tx, TXG_WAIT); 3700 if (err) 3701 goto out; 3702 3703 count = 0; 3704 /* 3705 * Set each attribute requested. 3706 * We group settings according to the locks they need to acquire. 3707 * 3708 * Note: you cannot set ctime directly, although it will be 3709 * updated as a side-effect of calling this function. 3710 */ 3711 3712 if (mask & (AT_UID|AT_GID|AT_MODE)) 3713 mutex_enter(&zp->z_acl_lock); 3714 3715 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 3716 &zp->z_pflags, sizeof (zp->z_pflags)); 3717 3718 if (attrzp) { 3719 if (mask & (AT_UID|AT_GID|AT_MODE)) 3720 mutex_enter(&attrzp->z_acl_lock); 3721 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3722 SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags, 3723 sizeof (attrzp->z_pflags)); 3724 } 3725 3726 if (mask & (AT_UID|AT_GID)) { 3727 3728 if (mask & AT_UID) { 3729 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, 3730 &new_uid, sizeof (new_uid)); 3731 zp->z_uid = new_uid; 3732 if (attrzp) { 3733 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3734 SA_ZPL_UID(zfsvfs), NULL, &new_uid, 3735 sizeof (new_uid)); 3736 attrzp->z_uid = new_uid; 3737 } 3738 } 3739 3740 if (mask & AT_GID) { 3741 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), 3742 NULL, &new_gid, sizeof (new_gid)); 3743 zp->z_gid = new_gid; 3744 if (attrzp) { 3745 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3746 SA_ZPL_GID(zfsvfs), NULL, &new_gid, 3747 sizeof (new_gid)); 3748 attrzp->z_gid = new_gid; 3749 } 3750 } 3751 if (!(mask & AT_MODE)) { 3752 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), 3753 NULL, &new_mode, sizeof (new_mode)); 3754 new_mode = zp->z_mode; 3755 } 3756 err = zfs_acl_chown_setattr(zp); 3757 ASSERT(err == 0); 3758 if (attrzp) { 3759 err = zfs_acl_chown_setattr(attrzp); 3760 ASSERT(err == 0); 3761 } 3762 } 3763 3764 if (mask & AT_MODE) { 3765 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, 3766 &new_mode, sizeof (new_mode)); 3767 zp->z_mode = new_mode; 3768 ASSERT3U((uintptr_t)aclp, !=, 0); 3769 err = zfs_aclset_common(zp, aclp, cr, tx); 3770 ASSERT0(err); 3771 if (zp->z_acl_cached) 3772 zfs_acl_free(zp->z_acl_cached); 3773 zp->z_acl_cached = aclp; 3774 aclp = NULL; 3775 } 3776 3777 3778 if (mask & AT_ATIME) { 3779 ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime); 3780 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, 3781 &zp->z_atime, sizeof (zp->z_atime)); 3782 } 3783 3784 if (mask & AT_MTIME) { 3785 ZFS_TIME_ENCODE(&vap->va_mtime, mtime); 3786 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 3787 mtime, sizeof (mtime)); 3788 } 3789 3790 /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */ 3791 if (mask & AT_SIZE && !(mask & AT_MTIME)) { 3792 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), 3793 NULL, mtime, sizeof (mtime)); 3794 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 3795 &ctime, sizeof (ctime)); 3796 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 3797 B_TRUE); 3798 } else if (mask != 0) { 3799 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 3800 &ctime, sizeof (ctime)); 3801 zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime, 3802 B_TRUE); 3803 if (attrzp) { 3804 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3805 SA_ZPL_CTIME(zfsvfs), NULL, 3806 &ctime, sizeof (ctime)); 3807 zfs_tstamp_update_setup(attrzp, STATE_CHANGED, 3808 mtime, ctime, B_TRUE); 3809 } 3810 } 3811 /* 3812 * Do this after setting timestamps to prevent timestamp 3813 * update from toggling bit 3814 */ 3815 3816 if (xoap && (mask & AT_XVATTR)) { 3817 3818 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) 3819 xoap->xoa_createtime = vap->va_birthtime; 3820 /* 3821 * restore trimmed off masks 3822 * so that return masks can be set for caller. 3823 */ 3824 3825 if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) { 3826 XVA_SET_REQ(xvap, XAT_APPENDONLY); 3827 } 3828 if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) { 3829 XVA_SET_REQ(xvap, XAT_NOUNLINK); 3830 } 3831 if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) { 3832 XVA_SET_REQ(xvap, XAT_IMMUTABLE); 3833 } 3834 if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) { 3835 XVA_SET_REQ(xvap, XAT_NODUMP); 3836 } 3837 if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) { 3838 XVA_SET_REQ(xvap, XAT_AV_MODIFIED); 3839 } 3840 if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) { 3841 XVA_SET_REQ(xvap, XAT_AV_QUARANTINED); 3842 } 3843 3844 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 3845 ASSERT(vp->v_type == VREG); 3846 3847 zfs_xvattr_set(zp, xvap, tx); 3848 } 3849 3850 if (fuid_dirtied) 3851 zfs_fuid_sync(zfsvfs, tx); 3852 3853 if (mask != 0) 3854 zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp); 3855 3856 if (mask & (AT_UID|AT_GID|AT_MODE)) 3857 mutex_exit(&zp->z_acl_lock); 3858 3859 if (attrzp) { 3860 if (mask & (AT_UID|AT_GID|AT_MODE)) 3861 mutex_exit(&attrzp->z_acl_lock); 3862 } 3863 out: 3864 if (err == 0 && attrzp) { 3865 err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk, 3866 xattr_count, tx); 3867 ASSERT(err2 == 0); 3868 } 3869 3870 if (attrzp) 3871 vput(ZTOV(attrzp)); 3872 3873 if (aclp) 3874 zfs_acl_free(aclp); 3875 3876 if (fuidp) { 3877 zfs_fuid_info_free(fuidp); 3878 fuidp = NULL; 3879 } 3880 3881 if (err) { 3882 dmu_tx_abort(tx); 3883 } else { 3884 err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 3885 dmu_tx_commit(tx); 3886 } 3887 3888 out2: 3889 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 3890 zil_commit(zilog, 0); 3891 3892 ZFS_EXIT(zfsvfs); 3893 return (err); 3894 } 3895 3896 /* 3897 * We acquire all but fdvp locks using non-blocking acquisitions. If we 3898 * fail to acquire any lock in the path we will drop all held locks, 3899 * acquire the new lock in a blocking fashion, and then release it and 3900 * restart the rename. This acquire/release step ensures that we do not 3901 * spin on a lock waiting for release. On error release all vnode locks 3902 * and decrement references the way tmpfs_rename() would do. 3903 */ 3904 static int 3905 zfs_rename_relock(struct vnode *sdvp, struct vnode **svpp, 3906 struct vnode *tdvp, struct vnode **tvpp, 3907 const struct componentname *scnp, const struct componentname *tcnp) 3908 { 3909 zfsvfs_t *zfsvfs; 3910 struct vnode *nvp, *svp, *tvp; 3911 znode_t *sdzp, *tdzp, *szp, *tzp; 3912 #ifdef __FreeBSD__ 3913 const char *snm = scnp->cn_nameptr; 3914 const char *tnm = tcnp->cn_nameptr; 3915 #endif 3916 #ifdef __NetBSD__ 3917 char *snm, *tnm; 3918 #endif 3919 int error; 3920 3921 #ifdef __FreeBSD__ 3922 VOP_UNLOCK(tdvp, 0); 3923 if (*tvpp != NULL && *tvpp != tdvp) 3924 VOP_UNLOCK(*tvpp, 0); 3925 #endif 3926 3927 relock: 3928 error = vn_lock(sdvp, LK_EXCLUSIVE); 3929 if (error) 3930 goto out; 3931 sdzp = VTOZ(sdvp); 3932 3933 #ifdef __NetBSD__ 3934 if (tdvp == sdvp) { 3935 } else { 3936 #endif 3937 error = vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT); 3938 if (error != 0) { 3939 VOP_UNLOCK(sdvp, 0); 3940 if (error != EBUSY) 3941 goto out; 3942 error = vn_lock(tdvp, LK_EXCLUSIVE); 3943 if (error) 3944 goto out; 3945 VOP_UNLOCK(tdvp, 0); 3946 goto relock; 3947 } 3948 #ifdef __NetBSD__ 3949 } /* end if (tdvp == sdvp) */ 3950 #endif 3951 3952 tdzp = VTOZ(tdvp); 3953 3954 /* 3955 * Before using sdzp and tdzp we must ensure that they are live. 3956 * As a porting legacy from illumos we have two things to worry 3957 * about. One is typical for FreeBSD and it is that the vnode is 3958 * not reclaimed (doomed). The other is that the znode is live. 3959 * The current code can invalidate the znode without acquiring the 3960 * corresponding vnode lock if the object represented by the znode 3961 * and vnode is no longer valid after a rollback or receive operation. 3962 * z_teardown_lock hidden behind ZFS_ENTER and ZFS_EXIT is the lock 3963 * that protects the znodes from the invalidation. 3964 */ 3965 zfsvfs = sdzp->z_zfsvfs; 3966 ASSERT3P(zfsvfs, ==, tdzp->z_zfsvfs); 3967 ZFS_ENTER(zfsvfs); 3968 3969 /* 3970 * We can not use ZFS_VERIFY_ZP() here because it could directly return 3971 * bypassing the cleanup code in the case of an error. 3972 */ 3973 if (tdzp->z_sa_hdl == NULL || sdzp->z_sa_hdl == NULL) { 3974 ZFS_EXIT(zfsvfs); 3975 VOP_UNLOCK(sdvp, 0); 3976 #ifdef __NetBSD__ 3977 if (tdvp != sdvp) 3978 #endif 3979 VOP_UNLOCK(tdvp, 0); 3980 error = SET_ERROR(EIO); 3981 goto out; 3982 } 3983 3984 /* 3985 * Re-resolve svp to be certain it still exists and fetch the 3986 * correct vnode. 3987 */ 3988 #ifdef __NetBSD__ 3989 /* ZFS wants a null-terminated name. */ 3990 snm = PNBUF_GET(); 3991 strlcpy(snm, scnp->cn_nameptr, scnp->cn_namelen + 1); 3992 #endif 3993 error = zfs_dirent_lookup(sdzp, snm, &szp, ZEXISTS); 3994 #ifdef __NetBSD__ 3995 PNBUF_PUT(snm); 3996 #endif 3997 if (error != 0) { 3998 /* Source entry invalid or not there. */ 3999 ZFS_EXIT(zfsvfs); 4000 VOP_UNLOCK(sdvp, 0); 4001 #ifdef __NetBSD__ 4002 if (tdvp != sdvp) 4003 #endif 4004 VOP_UNLOCK(tdvp, 0); 4005 if ((scnp->cn_flags & ISDOTDOT) != 0 || 4006 (scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.')) 4007 error = SET_ERROR(EINVAL); 4008 goto out; 4009 } 4010 svp = ZTOV(szp); 4011 4012 /* 4013 * Re-resolve tvp, if it disappeared we just carry on. 4014 */ 4015 #ifdef __NetBSD__ 4016 /* ZFS wants a null-terminated name. */ 4017 tnm = PNBUF_GET(); 4018 strlcpy(tnm, tcnp->cn_nameptr, tcnp->cn_namelen + 1); 4019 #endif 4020 error = zfs_dirent_lookup(tdzp, tnm, &tzp, 0); 4021 #ifdef __NetBSD__ 4022 PNBUF_PUT(tnm); 4023 #endif 4024 if (error != 0) { 4025 ZFS_EXIT(zfsvfs); 4026 VOP_UNLOCK(sdvp, 0); 4027 #ifdef __NetBSD__ 4028 if (tdvp != sdvp) 4029 #endif 4030 VOP_UNLOCK(tdvp, 0); 4031 vrele(svp); 4032 if ((tcnp->cn_flags & ISDOTDOT) != 0) 4033 error = SET_ERROR(EINVAL); 4034 goto out; 4035 } 4036 if (tzp != NULL) 4037 tvp = ZTOV(tzp); 4038 else 4039 tvp = NULL; 4040 4041 /* 4042 * At present the vnode locks must be acquired before z_teardown_lock, 4043 * although it would be more logical to use the opposite order. 4044 */ 4045 ZFS_EXIT(zfsvfs); 4046 4047 /* 4048 * Now try acquire locks on svp and tvp. 4049 */ 4050 nvp = svp; 4051 error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT); 4052 if (error != 0) { 4053 VOP_UNLOCK(sdvp, 0); 4054 #ifdef __NetBSD__ 4055 if (tdvp != sdvp) 4056 #endif 4057 VOP_UNLOCK(tdvp, 0); 4058 if (tvp != NULL) 4059 vrele(tvp); 4060 if (error != EBUSY) { 4061 vrele(nvp); 4062 goto out; 4063 } 4064 error = vn_lock(nvp, LK_EXCLUSIVE); 4065 if (error != 0) { 4066 vrele(nvp); 4067 goto out; 4068 } 4069 VOP_UNLOCK(nvp, 0); 4070 /* 4071 * Concurrent rename race. 4072 * XXX ? 4073 */ 4074 if (nvp == tdvp) { 4075 vrele(nvp); 4076 error = SET_ERROR(EINVAL); 4077 goto out; 4078 } 4079 #ifdef __NetBSD__ 4080 if (*svpp != NULL) 4081 #endif 4082 vrele(*svpp); 4083 *svpp = nvp; 4084 goto relock; 4085 } 4086 #ifdef __NetBSD__ 4087 if (*svpp != NULL) 4088 #endif 4089 vrele(*svpp); 4090 *svpp = nvp; 4091 4092 if (*tvpp != NULL) 4093 vrele(*tvpp); 4094 *tvpp = NULL; 4095 if (tvp != NULL) { 4096 nvp = tvp; 4097 4098 #ifdef __NetBSD__ 4099 if (tvp == svp || tvp == sdvp) { 4100 } else { 4101 #endif 4102 error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT); 4103 if (error != 0) { 4104 VOP_UNLOCK(sdvp, 0); 4105 #ifdef __NetBSD__ 4106 if (tdvp != sdvp) 4107 #endif 4108 VOP_UNLOCK(tdvp, 0); 4109 #ifdef __NetBSD__ 4110 if (*svpp != tdvp) 4111 #endif 4112 VOP_UNLOCK(*svpp, 0); 4113 if (error != EBUSY) { 4114 vrele(nvp); 4115 goto out; 4116 } 4117 error = vn_lock(nvp, LK_EXCLUSIVE); 4118 if (error != 0) { 4119 vrele(nvp); 4120 goto out; 4121 } 4122 vput(nvp); 4123 goto relock; 4124 } 4125 #ifdef __NetBSD__ 4126 } /* end if (tvp == svp || tvp == sdvp) */ 4127 #endif 4128 4129 *tvpp = nvp; 4130 } 4131 4132 KASSERT(VOP_ISLOCKED(sdvp) == LK_EXCLUSIVE); 4133 KASSERT(VOP_ISLOCKED(*svpp) == LK_EXCLUSIVE); 4134 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4135 KASSERT(*tvpp == NULL || VOP_ISLOCKED(*tvpp) == LK_EXCLUSIVE); 4136 4137 return (0); 4138 4139 out: 4140 return (error); 4141 } 4142 4143 /* 4144 * Note that we must use VRELE_ASYNC in this function as it walks 4145 * up the directory tree and vrele may need to acquire an exclusive 4146 * lock if a last reference to a vnode is dropped. 4147 */ 4148 static int 4149 zfs_rename_check(znode_t *szp, znode_t *sdzp, znode_t *tdzp) 4150 { 4151 zfsvfs_t *zfsvfs; 4152 znode_t *zp, *zp1; 4153 uint64_t parent; 4154 int error; 4155 4156 zfsvfs = tdzp->z_zfsvfs; 4157 if (tdzp == szp) 4158 return (SET_ERROR(EINVAL)); 4159 if (tdzp == sdzp) 4160 return (0); 4161 if (tdzp->z_id == zfsvfs->z_root) 4162 return (0); 4163 zp = tdzp; 4164 for (;;) { 4165 ASSERT(!zp->z_unlinked); 4166 if ((error = sa_lookup(zp->z_sa_hdl, 4167 SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0) 4168 break; 4169 4170 if (parent == szp->z_id) { 4171 error = SET_ERROR(EINVAL); 4172 break; 4173 } 4174 if (parent == zfsvfs->z_root) 4175 break; 4176 if (parent == sdzp->z_id) 4177 break; 4178 4179 error = zfs_zget(zfsvfs, parent, &zp1); 4180 if (error != 0) 4181 break; 4182 4183 if (zp != tdzp) 4184 VN_RELE_ASYNC(ZTOV(zp), 4185 dsl_pool_vnrele_taskq(dmu_objset_pool(zfsvfs->z_os))); 4186 zp = zp1; 4187 } 4188 4189 if (error == ENOTDIR) 4190 panic("checkpath: .. not a directory\n"); 4191 if (zp != tdzp) 4192 VN_RELE_ASYNC(ZTOV(zp), 4193 dsl_pool_vnrele_taskq(dmu_objset_pool(zfsvfs->z_os))); 4194 return (error); 4195 } 4196 4197 /* 4198 * Move an entry from the provided source directory to the target 4199 * directory. Change the entry name as indicated. 4200 * 4201 * IN: sdvp - Source directory containing the "old entry". 4202 * snm - Old entry name. 4203 * tdvp - Target directory to contain the "new entry". 4204 * tnm - New entry name. 4205 * cr - credentials of caller. 4206 * ct - caller context 4207 * flags - case flags 4208 * 4209 * RETURN: 0 on success, error code on failure. 4210 * 4211 * Timestamps: 4212 * sdvp,tdvp - ctime|mtime updated 4213 */ 4214 /*ARGSUSED*/ 4215 static int 4216 zfs_rename(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp, 4217 vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp, 4218 cred_t *cr) 4219 { 4220 zfsvfs_t *zfsvfs; 4221 znode_t *sdzp, *tdzp, *szp, *tzp; 4222 zilog_t *zilog = NULL; 4223 dmu_tx_t *tx; 4224 #ifdef __FreeBSD__ 4225 char *snm = __UNCONST(scnp->cn_nameptr); 4226 char *tnm = __UNCONST(tcnp->cn_nameptr); 4227 #endif 4228 #ifdef __NetBSD__ 4229 char *snm, *tnm; 4230 #endif 4231 int error = 0; 4232 4233 /* Reject renames across filesystems. */ 4234 if (((*svpp) != NULL && (*svpp)->v_mount != tdvp->v_mount) || 4235 ((*tvpp) != NULL && (*svpp)->v_mount != (*tvpp)->v_mount)) { 4236 error = SET_ERROR(EXDEV); 4237 goto out; 4238 } 4239 4240 if (zfsctl_is_node(tdvp)) { 4241 error = SET_ERROR(EXDEV); 4242 goto out; 4243 } 4244 4245 /* 4246 * Lock all four vnodes to ensure safety and semantics of renaming. 4247 */ 4248 error = zfs_rename_relock(sdvp, svpp, tdvp, tvpp, scnp, tcnp); 4249 if (error != 0) { 4250 /* no vnodes are locked in the case of error here */ 4251 return (error); 4252 } 4253 4254 tdzp = VTOZ(tdvp); 4255 sdzp = VTOZ(sdvp); 4256 zfsvfs = tdzp->z_zfsvfs; 4257 zilog = zfsvfs->z_log; 4258 #ifdef __NetBSD__ 4259 /* ZFS wants a null-terminated name. */ 4260 snm = PNBUF_GET(); 4261 strlcpy(snm, scnp->cn_nameptr, scnp->cn_namelen + 1); 4262 tnm = PNBUF_GET(); 4263 strlcpy(tnm, tcnp->cn_nameptr, tcnp->cn_namelen + 1); 4264 #endif 4265 4266 /* 4267 * After we re-enter ZFS_ENTER() we will have to revalidate all 4268 * znodes involved. 4269 */ 4270 ZFS_ENTER(zfsvfs); 4271 4272 if (zfsvfs->z_utf8 && u8_validate(tnm, 4273 strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 4274 error = SET_ERROR(EILSEQ); 4275 goto unlockout; 4276 } 4277 4278 #ifndef __NetBSD__ 4279 /* If source and target are the same file, there is nothing to do. */ 4280 if ((*svpp) == (*tvpp)) { 4281 error = 0; 4282 goto unlockout; 4283 } 4284 #endif 4285 4286 if (((*svpp)->v_type == VDIR && (*svpp)->v_mountedhere != NULL) || 4287 ((*tvpp) != NULL && (*tvpp)->v_type == VDIR && 4288 (*tvpp)->v_mountedhere != NULL)) { 4289 error = SET_ERROR(EXDEV); 4290 goto unlockout; 4291 } 4292 4293 /* 4294 * We can not use ZFS_VERIFY_ZP() here because it could directly return 4295 * bypassing the cleanup code in the case of an error. 4296 */ 4297 if (tdzp->z_sa_hdl == NULL || sdzp->z_sa_hdl == NULL) { 4298 error = SET_ERROR(EIO); 4299 goto unlockout; 4300 } 4301 4302 szp = VTOZ(*svpp); 4303 tzp = *tvpp == NULL ? NULL : VTOZ(*tvpp); 4304 if (szp->z_sa_hdl == NULL || (tzp != NULL && tzp->z_sa_hdl == NULL)) { 4305 error = SET_ERROR(EIO); 4306 goto unlockout; 4307 } 4308 4309 /* 4310 * This is to prevent the creation of links into attribute space 4311 * by renaming a linked file into/outof an attribute directory. 4312 * See the comment in zfs_link() for why this is considered bad. 4313 */ 4314 if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) { 4315 error = SET_ERROR(EINVAL); 4316 goto unlockout; 4317 } 4318 4319 /* 4320 * Must have write access at the source to remove the old entry 4321 * and write access at the target to create the new entry. 4322 * Note that if target and source are the same, this can be 4323 * done in a single check. 4324 */ 4325 if (error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)) 4326 goto unlockout; 4327 4328 if ((*svpp)->v_type == VDIR) { 4329 /* 4330 * Avoid ".", "..", and aliases of "." for obvious reasons. 4331 */ 4332 if ((scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.') || 4333 sdzp == szp || 4334 (scnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) { 4335 error = SET_ERROR(EINVAL); 4336 goto unlockout; 4337 } 4338 4339 /* 4340 * Check to make sure rename is valid. 4341 * Can't do a move like this: /usr/a/b to /usr/a/b/c/d 4342 */ 4343 if (error = zfs_rename_check(szp, sdzp, tdzp)) 4344 goto unlockout; 4345 } 4346 4347 /* 4348 * Does target exist? 4349 */ 4350 if (tzp) { 4351 /* 4352 * Source and target must be the same type. 4353 */ 4354 if ((*svpp)->v_type == VDIR) { 4355 if ((*tvpp)->v_type != VDIR) { 4356 error = SET_ERROR(ENOTDIR); 4357 goto unlockout; 4358 } else { 4359 cache_purge(tdvp); 4360 if (sdvp != tdvp) 4361 cache_purge(sdvp); 4362 } 4363 } else { 4364 if ((*tvpp)->v_type == VDIR) { 4365 error = SET_ERROR(EISDIR); 4366 goto unlockout; 4367 } 4368 } 4369 4370 /* 4371 * POSIX dictates that when the source and target 4372 * entries refer to the same file object, rename 4373 * must do nothing and exit without error. 4374 */ 4375 #ifndef __NetBSD__ 4376 /* 4377 * But on NetBSD we have a different system call to do 4378 * this, posix_rename, which sorta kinda handles this 4379 * case (modulo races), and our tests expect BSD 4380 * semantics for rename, so we'll do that until we can 4381 * push the choice between BSD and POSIX semantics into 4382 * the VOP_RENAME protocol as a flag. 4383 */ 4384 if (szp->z_id == tzp->z_id) { 4385 error = 0; 4386 goto unlockout; 4387 } 4388 #endif 4389 } 4390 4391 vnevent_rename_src(*svpp, sdvp, scnp->cn_nameptr, ct); 4392 if (tzp) 4393 vnevent_rename_dest(*tvpp, tdvp, tnm, ct); 4394 4395 /* 4396 * notify the target directory if it is not the same 4397 * as source directory. 4398 */ 4399 if (tdvp != sdvp) { 4400 vnevent_rename_dest_dir(tdvp, ct); 4401 } 4402 4403 tx = dmu_tx_create(zfsvfs->z_os); 4404 dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 4405 dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE); 4406 dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); 4407 dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm); 4408 if (sdzp != tdzp) { 4409 dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE); 4410 zfs_sa_upgrade_txholds(tx, tdzp); 4411 } 4412 if (tzp) { 4413 dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE); 4414 zfs_sa_upgrade_txholds(tx, tzp); 4415 } 4416 4417 zfs_sa_upgrade_txholds(tx, szp); 4418 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 4419 error = dmu_tx_assign(tx, TXG_WAIT); 4420 if (error) { 4421 dmu_tx_abort(tx); 4422 goto unlockout; 4423 } 4424 4425 4426 if (tzp && (tzp->z_id != szp->z_id)) 4427 /* Attempt to remove the existing target */ 4428 error = zfs_link_destroy(tdzp, tnm, tzp, tx, 0, NULL); 4429 4430 if (error == 0) { 4431 if (!tzp || (tzp->z_id != szp->z_id)) 4432 error = zfs_link_create(tdzp, tnm, szp, tx, ZRENAMING); 4433 if (error == 0) { 4434 szp->z_pflags |= ZFS_AV_MODIFIED; 4435 4436 error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), 4437 (void *)&szp->z_pflags, sizeof (uint64_t), tx); 4438 ASSERT0(error); 4439 4440 error = zfs_link_destroy(sdzp, snm, szp, tx, 4441 /* Kludge for BSD rename semantics. */ 4442 tzp && tzp->z_id == szp->z_id ? 0: ZRENAMING, NULL); 4443 if (error == 0) { 4444 zfs_log_rename(zilog, tx, TX_RENAME, sdzp, 4445 snm, tdzp, tnm, szp); 4446 4447 /* 4448 * Update path information for the target vnode 4449 */ 4450 vn_renamepath(tdvp, *svpp, tnm, strlen(tnm)); 4451 } else { 4452 /* 4453 * At this point, we have successfully created 4454 * the target name, but have failed to remove 4455 * the source name. Since the create was done 4456 * with the ZRENAMING flag, there are 4457 * complications; for one, the link count is 4458 * wrong. The easiest way to deal with this 4459 * is to remove the newly created target, and 4460 * return the original error. This must 4461 * succeed; fortunately, it is very unlikely to 4462 * fail, since we just created it. 4463 */ 4464 VERIFY3U(zfs_link_destroy(tdzp, tnm, szp, tx, 4465 ZRENAMING, NULL), ==, 0); 4466 } 4467 } 4468 if (error == 0) { 4469 cache_purge(*svpp); 4470 if (*tvpp != NULL) 4471 cache_purge(*tvpp); 4472 cache_purge_negative(tdvp); 4473 #ifdef __NetBSD__ 4474 if (*svpp == *tvpp) { 4475 VN_KNOTE(sdvp, NOTE_WRITE); 4476 VN_KNOTE(*svpp, (szp->z_links == 0 ? 4477 NOTE_DELETE : NOTE_LINK)); 4478 } else { 4479 genfs_rename_knote(sdvp, *svpp, tdvp, *tvpp, 4480 tzp != NULL ? tzp->z_links : 0); 4481 } 4482 #endif 4483 } 4484 } 4485 4486 dmu_tx_commit(tx); 4487 4488 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4489 zil_commit(zilog, 0); 4490 4491 unlockout: /* all 4 vnodes are locked, ZFS_ENTER called */ 4492 ZFS_EXIT(zfsvfs); 4493 4494 VOP_UNLOCK(*svpp, 0); 4495 VOP_UNLOCK(sdvp, 0); 4496 #ifdef __NetBSD__ 4497 PNBUF_PUT(snm); 4498 PNBUF_PUT(tnm); 4499 #endif 4500 4501 if (*tvpp != sdvp && *tvpp != *svpp) 4502 if (*tvpp != NULL) 4503 VOP_UNLOCK(*tvpp, 0); 4504 if (tdvp != sdvp && tdvp != *svpp) 4505 if (tdvp != *tvpp) 4506 VOP_UNLOCK(tdvp, 0); 4507 4508 out: 4509 return (error); 4510 } 4511 4512 /* 4513 * Insert the indicated symbolic reference entry into the directory. 4514 * 4515 * IN: dvp - Directory to contain new symbolic link. 4516 * link - Name for new symlink entry. 4517 * vap - Attributes of new entry. 4518 * cr - credentials of caller. 4519 * ct - caller context 4520 * flags - case flags 4521 * 4522 * RETURN: 0 on success, error code on failure. 4523 * 4524 * Timestamps: 4525 * dvp - ctime|mtime updated 4526 */ 4527 /*ARGSUSED*/ 4528 static int 4529 zfs_symlink(vnode_t *dvp, vnode_t **vpp, char *name, vattr_t *vap, char *link, 4530 cred_t *cr, kthread_t *td) 4531 { 4532 znode_t *zp, *dzp = VTOZ(dvp); 4533 dmu_tx_t *tx; 4534 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 4535 zilog_t *zilog; 4536 uint64_t len = strlen(link); 4537 int error; 4538 zfs_acl_ids_t acl_ids; 4539 boolean_t fuid_dirtied; 4540 uint64_t txtype = TX_SYMLINK; 4541 int flags = 0; 4542 4543 ASSERT(vap->va_type == VLNK); 4544 4545 ZFS_ENTER(zfsvfs); 4546 ZFS_VERIFY_ZP(dzp); 4547 zilog = zfsvfs->z_log; 4548 4549 if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 4550 NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 4551 ZFS_EXIT(zfsvfs); 4552 return (SET_ERROR(EILSEQ)); 4553 } 4554 4555 if (len > MAXPATHLEN) { 4556 ZFS_EXIT(zfsvfs); 4557 return (SET_ERROR(ENAMETOOLONG)); 4558 } 4559 4560 if ((error = zfs_acl_ids_create(dzp, 0, 4561 vap, cr, NULL, &acl_ids)) != 0) { 4562 ZFS_EXIT(zfsvfs); 4563 return (error); 4564 } 4565 4566 /* 4567 * Attempt to lock directory; fail if entry already exists. 4568 */ 4569 error = zfs_dirent_lookup(dzp, name, &zp, ZNEW); 4570 if (error) { 4571 zfs_acl_ids_free(&acl_ids); 4572 ZFS_EXIT(zfsvfs); 4573 return (error); 4574 } 4575 4576 if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 4577 zfs_acl_ids_free(&acl_ids); 4578 ZFS_EXIT(zfsvfs); 4579 return (error); 4580 } 4581 4582 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 4583 zfs_acl_ids_free(&acl_ids); 4584 ZFS_EXIT(zfsvfs); 4585 return (SET_ERROR(EDQUOT)); 4586 } 4587 4588 getnewvnode_reserve(1); 4589 tx = dmu_tx_create(zfsvfs->z_os); 4590 fuid_dirtied = zfsvfs->z_fuid_dirty; 4591 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len)); 4592 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 4593 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 4594 ZFS_SA_BASE_ATTR_SIZE + len); 4595 dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 4596 if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 4597 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 4598 acl_ids.z_aclp->z_acl_bytes); 4599 } 4600 if (fuid_dirtied) 4601 zfs_fuid_txhold(zfsvfs, tx); 4602 error = dmu_tx_assign(tx, TXG_WAIT); 4603 if (error) { 4604 zfs_acl_ids_free(&acl_ids); 4605 dmu_tx_abort(tx); 4606 getnewvnode_drop_reserve(); 4607 ZFS_EXIT(zfsvfs); 4608 return (error); 4609 } 4610 4611 /* 4612 * Create a new object for the symlink. 4613 * for version 4 ZPL datsets the symlink will be an SA attribute 4614 */ 4615 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 4616 4617 if (fuid_dirtied) 4618 zfs_fuid_sync(zfsvfs, tx); 4619 4620 if (zp->z_is_sa) 4621 error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs), 4622 link, len, tx); 4623 else 4624 zfs_sa_symlink(zp, link, len, tx); 4625 4626 zp->z_size = len; 4627 (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 4628 &zp->z_size, sizeof (zp->z_size), tx); 4629 /* 4630 * Insert the new object into the directory. 4631 */ 4632 (void) zfs_link_create(dzp, name, zp, tx, ZNEW); 4633 4634 zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); 4635 *vpp = ZTOV(zp); 4636 4637 zfs_acl_ids_free(&acl_ids); 4638 4639 dmu_tx_commit(tx); 4640 4641 getnewvnode_drop_reserve(); 4642 4643 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4644 zil_commit(zilog, 0); 4645 4646 ZFS_EXIT(zfsvfs); 4647 return (error); 4648 } 4649 4650 /* 4651 * Return, in the buffer contained in the provided uio structure, 4652 * the symbolic path referred to by vp. 4653 * 4654 * IN: vp - vnode of symbolic link. 4655 * uio - structure to contain the link path. 4656 * cr - credentials of caller. 4657 * ct - caller context 4658 * 4659 * OUT: uio - structure containing the link path. 4660 * 4661 * RETURN: 0 on success, error code on failure. 4662 * 4663 * Timestamps: 4664 * vp - atime updated 4665 */ 4666 /* ARGSUSED */ 4667 static int 4668 zfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct) 4669 { 4670 znode_t *zp = VTOZ(vp); 4671 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4672 int error; 4673 4674 ZFS_ENTER(zfsvfs); 4675 ZFS_VERIFY_ZP(zp); 4676 4677 if (zp->z_is_sa) 4678 error = sa_lookup_uio(zp->z_sa_hdl, 4679 SA_ZPL_SYMLINK(zfsvfs), uio); 4680 else 4681 error = zfs_sa_readlink(zp, uio); 4682 4683 ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 4684 4685 ZFS_EXIT(zfsvfs); 4686 return (error); 4687 } 4688 4689 /* 4690 * Insert a new entry into directory tdvp referencing svp. 4691 * 4692 * IN: tdvp - Directory to contain new entry. 4693 * svp - vnode of new entry. 4694 * name - name of new entry. 4695 * cr - credentials of caller. 4696 * ct - caller context 4697 * 4698 * RETURN: 0 on success, error code on failure. 4699 * 4700 * Timestamps: 4701 * tdvp - ctime|mtime updated 4702 * svp - ctime updated 4703 */ 4704 /* ARGSUSED */ 4705 static int 4706 zfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr, 4707 caller_context_t *ct, int flags) 4708 { 4709 znode_t *dzp = VTOZ(tdvp); 4710 znode_t *tzp, *szp; 4711 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 4712 zilog_t *zilog; 4713 dmu_tx_t *tx; 4714 int error; 4715 uint64_t parent; 4716 uid_t owner; 4717 4718 ASSERT(tdvp->v_type == VDIR); 4719 4720 ZFS_ENTER(zfsvfs); 4721 ZFS_VERIFY_ZP(dzp); 4722 zilog = zfsvfs->z_log; 4723 4724 /* 4725 * POSIX dictates that we return EPERM here. 4726 * Better choices include ENOTSUP or EISDIR. 4727 */ 4728 if (svp->v_type == VDIR) { 4729 ZFS_EXIT(zfsvfs); 4730 return (SET_ERROR(EPERM)); 4731 } 4732 4733 szp = VTOZ(svp); 4734 ZFS_VERIFY_ZP(szp); 4735 4736 if (szp->z_pflags & (ZFS_APPENDONLY | ZFS_IMMUTABLE | ZFS_READONLY)) { 4737 ZFS_EXIT(zfsvfs); 4738 return (SET_ERROR(EPERM)); 4739 } 4740 4741 /* Prevent links to .zfs/shares files */ 4742 4743 if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 4744 &parent, sizeof (uint64_t))) != 0) { 4745 ZFS_EXIT(zfsvfs); 4746 return (error); 4747 } 4748 if (parent == zfsvfs->z_shares_dir) { 4749 ZFS_EXIT(zfsvfs); 4750 return (SET_ERROR(EPERM)); 4751 } 4752 4753 if (zfsvfs->z_utf8 && u8_validate(name, 4754 strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 4755 ZFS_EXIT(zfsvfs); 4756 return (SET_ERROR(EILSEQ)); 4757 } 4758 4759 /* 4760 * We do not support links between attributes and non-attributes 4761 * because of the potential security risk of creating links 4762 * into "normal" file space in order to circumvent restrictions 4763 * imposed in attribute space. 4764 */ 4765 if ((szp->z_pflags & ZFS_XATTR) != (dzp->z_pflags & ZFS_XATTR)) { 4766 ZFS_EXIT(zfsvfs); 4767 return (SET_ERROR(EINVAL)); 4768 } 4769 4770 4771 owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER); 4772 if (owner != crgetuid(cr) && secpolicy_basic_link(svp, cr) != 0) { 4773 ZFS_EXIT(zfsvfs); 4774 return (SET_ERROR(EPERM)); 4775 } 4776 4777 if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 4778 ZFS_EXIT(zfsvfs); 4779 return (error); 4780 } 4781 4782 /* 4783 * Attempt to lock directory; fail if entry already exists. 4784 */ 4785 error = zfs_dirent_lookup(dzp, name, &tzp, ZNEW); 4786 if (error) { 4787 ZFS_EXIT(zfsvfs); 4788 return (error); 4789 } 4790 4791 tx = dmu_tx_create(zfsvfs->z_os); 4792 dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 4793 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 4794 zfs_sa_upgrade_txholds(tx, szp); 4795 zfs_sa_upgrade_txholds(tx, dzp); 4796 error = dmu_tx_assign(tx, TXG_WAIT); 4797 if (error) { 4798 dmu_tx_abort(tx); 4799 ZFS_EXIT(zfsvfs); 4800 return (error); 4801 } 4802 4803 error = zfs_link_create(dzp, name, szp, tx, 0); 4804 4805 if (error == 0) { 4806 uint64_t txtype = TX_LINK; 4807 zfs_log_link(zilog, tx, txtype, dzp, szp, name); 4808 } 4809 4810 dmu_tx_commit(tx); 4811 4812 if (error == 0) { 4813 vnevent_link(svp, ct); 4814 } 4815 4816 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4817 zil_commit(zilog, 0); 4818 4819 ZFS_EXIT(zfsvfs); 4820 return (error); 4821 } 4822 4823 4824 /*ARGSUSED*/ 4825 void 4826 zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 4827 { 4828 znode_t *zp = VTOZ(vp); 4829 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4830 int error; 4831 4832 rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 4833 if (zp->z_sa_hdl == NULL) { 4834 /* 4835 * The fs has been unmounted, or we did a 4836 * suspend/resume and this file no longer exists. 4837 */ 4838 rw_exit(&zfsvfs->z_teardown_inactive_lock); 4839 vrecycle(vp); 4840 return; 4841 } 4842 4843 if (zp->z_unlinked) { 4844 /* 4845 * Fast path to recycle a vnode of a removed file. 4846 */ 4847 rw_exit(&zfsvfs->z_teardown_inactive_lock); 4848 vrecycle(vp); 4849 return; 4850 } 4851 4852 if (zp->z_atime_dirty && zp->z_unlinked == 0) { 4853 dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); 4854 4855 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 4856 zfs_sa_upgrade_txholds(tx, zp); 4857 error = dmu_tx_assign(tx, TXG_WAIT); 4858 if (error) { 4859 dmu_tx_abort(tx); 4860 } else { 4861 (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs), 4862 (void *)&zp->z_atime, sizeof (zp->z_atime), tx); 4863 zp->z_atime_dirty = 0; 4864 dmu_tx_commit(tx); 4865 } 4866 } 4867 rw_exit(&zfsvfs->z_teardown_inactive_lock); 4868 } 4869 4870 4871 #ifdef __FreeBSD__ 4872 CTASSERT(sizeof(struct zfid_short) <= sizeof(struct fid)); 4873 CTASSERT(sizeof(struct zfid_long) <= sizeof(struct fid)); 4874 #endif 4875 4876 /*ARGSUSED*/ 4877 static int 4878 zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) 4879 { 4880 znode_t *zp = VTOZ(vp); 4881 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4882 uint32_t gen; 4883 uint64_t gen64; 4884 uint64_t object = zp->z_id; 4885 zfid_short_t *zfid; 4886 int size, i, error; 4887 4888 ZFS_ENTER(zfsvfs); 4889 ZFS_VERIFY_ZP(zp); 4890 4891 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), 4892 &gen64, sizeof (uint64_t))) != 0) { 4893 ZFS_EXIT(zfsvfs); 4894 return (error); 4895 } 4896 4897 gen = (uint32_t)gen64; 4898 4899 size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN; 4900 4901 #ifdef illumos 4902 if (fidp->fid_len < size) { 4903 fidp->fid_len = size; 4904 ZFS_EXIT(zfsvfs); 4905 return (SET_ERROR(ENOSPC)); 4906 } 4907 #else 4908 fidp->fid_len = size; 4909 #endif 4910 4911 zfid = (zfid_short_t *)fidp; 4912 4913 zfid->zf_len = size; 4914 4915 for (i = 0; i < sizeof (zfid->zf_object); i++) 4916 zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 4917 4918 /* Must have a non-zero generation number to distinguish from .zfs */ 4919 if (gen == 0) 4920 gen = 1; 4921 for (i = 0; i < sizeof (zfid->zf_gen); i++) 4922 zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); 4923 4924 if (size == LONG_FID_LEN) { 4925 uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); 4926 zfid_long_t *zlfid; 4927 4928 zlfid = (zfid_long_t *)fidp; 4929 4930 for (i = 0; i < sizeof (zlfid->zf_setid); i++) 4931 zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); 4932 4933 /* XXX - this should be the generation number for the objset */ 4934 for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 4935 zlfid->zf_setgen[i] = 0; 4936 } 4937 4938 ZFS_EXIT(zfsvfs); 4939 return (0); 4940 } 4941 4942 static int 4943 zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 4944 caller_context_t *ct) 4945 { 4946 znode_t *zp, *xzp; 4947 zfsvfs_t *zfsvfs; 4948 int error; 4949 4950 switch (cmd) { 4951 case _PC_LINK_MAX: 4952 *valp = INT_MAX; 4953 return (0); 4954 4955 case _PC_FILESIZEBITS: 4956 *valp = 64; 4957 return (0); 4958 #ifdef illumos 4959 case _PC_XATTR_EXISTS: 4960 zp = VTOZ(vp); 4961 zfsvfs = zp->z_zfsvfs; 4962 ZFS_ENTER(zfsvfs); 4963 ZFS_VERIFY_ZP(zp); 4964 *valp = 0; 4965 error = zfs_dirent_lookup(zp, "", &xzp, 4966 ZXATTR | ZEXISTS | ZSHARED); 4967 if (error == 0) { 4968 if (!zfs_dirempty(xzp)) 4969 *valp = 1; 4970 vrele(ZTOV(xzp)); 4971 } else if (error == ENOENT) { 4972 /* 4973 * If there aren't extended attributes, it's the 4974 * same as having zero of them. 4975 */ 4976 error = 0; 4977 } 4978 ZFS_EXIT(zfsvfs); 4979 return (error); 4980 4981 case _PC_SATTR_ENABLED: 4982 case _PC_SATTR_EXISTS: 4983 *valp = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 4984 (vp->v_type == VREG || vp->v_type == VDIR); 4985 return (0); 4986 4987 case _PC_ACCESS_FILTERING: 4988 *valp = vfs_has_feature(vp->v_vfsp, VFSFT_ACCESS_FILTER) && 4989 vp->v_type == VDIR; 4990 return (0); 4991 4992 case _PC_ACL_ENABLED: 4993 *valp = _ACL_ACE_ENABLED; 4994 return (0); 4995 #endif /* illumos */ 4996 case _PC_MIN_HOLE_SIZE: 4997 *valp = (int)SPA_MINBLOCKSIZE; 4998 return (0); 4999 #ifdef illumos 5000 case _PC_TIMESTAMP_RESOLUTION: 5001 /* nanosecond timestamp resolution */ 5002 *valp = 1L; 5003 return (0); 5004 #endif 5005 case _PC_ACL_EXTENDED: 5006 *valp = 0; 5007 return (0); 5008 5009 #ifndef __NetBSD__ 5010 case _PC_ACL_NFS4: 5011 *valp = 1; 5012 return (0); 5013 5014 case _PC_ACL_PATH_MAX: 5015 *valp = ACL_MAX_ENTRIES; 5016 return (0); 5017 #endif 5018 5019 default: 5020 return (EOPNOTSUPP); 5021 } 5022 } 5023 5024 /*ARGSUSED*/ 5025 static int 5026 zfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 5027 caller_context_t *ct) 5028 { 5029 znode_t *zp = VTOZ(vp); 5030 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5031 int error; 5032 boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 5033 5034 ZFS_ENTER(zfsvfs); 5035 ZFS_VERIFY_ZP(zp); 5036 error = zfs_getacl(zp, vsecp, skipaclchk, cr); 5037 ZFS_EXIT(zfsvfs); 5038 5039 return (error); 5040 } 5041 5042 /*ARGSUSED*/ 5043 int 5044 zfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 5045 caller_context_t *ct) 5046 { 5047 znode_t *zp = VTOZ(vp); 5048 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5049 int error; 5050 boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 5051 zilog_t *zilog = zfsvfs->z_log; 5052 5053 ZFS_ENTER(zfsvfs); 5054 ZFS_VERIFY_ZP(zp); 5055 5056 error = zfs_setacl(zp, vsecp, skipaclchk, cr); 5057 5058 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 5059 zil_commit(zilog, 0); 5060 5061 ZFS_EXIT(zfsvfs); 5062 return (error); 5063 } 5064 5065 static int 5066 ioflags(int ioflags) 5067 { 5068 int flags = 0; 5069 5070 if (ioflags & IO_APPEND) 5071 flags |= FAPPEND; 5072 if (ioflags & IO_NDELAY) 5073 flags |= FNONBLOCK; 5074 if (ioflags & IO_SYNC) 5075 flags |= (FSYNC | FDSYNC | FRSYNC); 5076 5077 return (flags); 5078 } 5079 5080 #ifdef __NetBSD__ 5081 5082 static int 5083 zfs_netbsd_open(void *v) 5084 { 5085 struct vop_open_args *ap = v; 5086 5087 return (zfs_open(&ap->a_vp, ap->a_mode, ap->a_cred, NULL)); 5088 } 5089 5090 static int 5091 zfs_netbsd_close(void *v) 5092 { 5093 struct vop_close_args *ap = v; 5094 5095 return (zfs_close(ap->a_vp, ap->a_fflag, 0, 0, ap->a_cred, NULL)); 5096 } 5097 5098 static int 5099 zfs_netbsd_ioctl(void *v) 5100 { 5101 struct vop_ioctl_args *ap = v; 5102 5103 return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data, 5104 ap->a_fflag, ap->a_cred, NULL, NULL)); 5105 } 5106 5107 5108 static int 5109 zfs_netbsd_read(void *v) 5110 { 5111 struct vop_read_args *ap = v; 5112 vnode_t *vp = ap->a_vp; 5113 znode_t *zp = VTOZ(vp); 5114 5115 switch (vp->v_type) { 5116 case VBLK: 5117 case VCHR: 5118 ZFS_ACCESSTIME_STAMP(zp->z_zfsvfs, zp); 5119 return (VOCALL(spec_vnodeop_p, VOFFSET(vop_read), ap)); 5120 case VFIFO: 5121 ZFS_ACCESSTIME_STAMP(zp->z_zfsvfs, zp); 5122 return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_read), ap)); 5123 } 5124 5125 return (zfs_read(vp, ap->a_uio, ioflags(ap->a_ioflag), ap->a_cred, NULL)); 5126 } 5127 5128 static int 5129 zfs_netbsd_write(void *v) 5130 { 5131 struct vop_write_args *ap = v; 5132 vnode_t *vp = ap->a_vp; 5133 znode_t *zp = VTOZ(vp); 5134 struct uio *uio = ap->a_uio; 5135 off_t osize = zp->z_size; 5136 int error, resid; 5137 5138 switch (vp->v_type) { 5139 case VBLK: 5140 case VCHR: 5141 GOP_MARKUPDATE(vp, GOP_UPDATE_MODIFIED); 5142 return (VOCALL(spec_vnodeop_p, VOFFSET(vop_write), ap)); 5143 case VFIFO: 5144 GOP_MARKUPDATE(vp, GOP_UPDATE_MODIFIED); 5145 return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_write), ap)); 5146 } 5147 5148 resid = uio->uio_resid; 5149 error = zfs_write(vp, uio, ioflags(ap->a_ioflag), ap->a_cred, NULL); 5150 5151 return error; 5152 } 5153 5154 static int 5155 zfs_netbsd_access(void *v) 5156 { 5157 struct vop_access_args /* { 5158 struct vnode *a_vp; 5159 accmode_t a_accmode; 5160 kauth_cred_t a_cred; 5161 } */ *ap = v; 5162 struct vnode *vp = ap->a_vp; 5163 accmode_t accmode = ap->a_accmode; 5164 mode_t zfs_mode = 0; 5165 kauth_cred_t cred = ap->a_cred; 5166 int error; 5167 5168 /* 5169 * XXX This is really random, especially the left shift by six, 5170 * and it exists only because of randomness in zfs_unix_to_v4 5171 * and zfs_zaccess_rwx in zfs_acl.c. 5172 */ 5173 if (accmode & VREAD) 5174 zfs_mode |= S_IROTH; 5175 if (accmode & VWRITE) 5176 zfs_mode |= S_IWOTH; 5177 if (accmode & VEXEC) 5178 zfs_mode |= S_IXOTH; 5179 zfs_mode <<= 6; 5180 5181 KASSERT(VOP_ISLOCKED(vp)); 5182 error = zfs_access(vp, zfs_mode, 0, cred, NULL); 5183 5184 /* We expect EACCES as common error. */ 5185 if (error == EPERM) 5186 error = EACCES; 5187 5188 return (error); 5189 } 5190 5191 static int 5192 zfs_netbsd_lookup(void *v) 5193 { 5194 struct vop_lookup_v2_args /* { 5195 struct vnode *a_dvp; 5196 struct vnode **a_vpp; 5197 struct componentname *a_cnp; 5198 } */ *ap = v; 5199 struct vnode *dvp = ap->a_dvp; 5200 struct vnode **vpp = ap->a_vpp; 5201 struct componentname *cnp = ap->a_cnp; 5202 char *nm, short_nm[31]; 5203 int error; 5204 int iswhiteout; 5205 5206 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5207 5208 *vpp = NULL; 5209 5210 /* 5211 * Do an access check before the cache lookup. zfs_lookup does 5212 * an access check too, but it's too scary to contemplate 5213 * injecting our namecache stuff into zfs internals. 5214 * 5215 * XXX Is this the correct access check? 5216 */ 5217 if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred)) != 0) 5218 goto out; 5219 5220 /* 5221 * Check the namecache before entering zfs_lookup. 5222 * cache_lookup does the locking dance for us. 5223 */ 5224 if (cache_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen, 5225 cnp->cn_nameiop, cnp->cn_flags, &iswhiteout, vpp)) { 5226 if (iswhiteout) { 5227 cnp->cn_flags |= ISWHITEOUT; 5228 } 5229 return *vpp == NULL ? ENOENT : 0; 5230 } 5231 5232 /* 5233 * zfs_lookup wants a null-terminated component name, but namei 5234 * gives us a pointer into the full pathname. 5235 */ 5236 ASSERT(cnp->cn_namelen < PATH_MAX - 1); 5237 if (cnp->cn_namelen + 1 > sizeof(short_nm)) 5238 nm = PNBUF_GET(); 5239 else 5240 nm = short_nm; 5241 (void)strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1); 5242 5243 error = zfs_lookup(dvp, nm, vpp, 0, cnp, cnp->cn_nameiop, cnp->cn_cred); 5244 5245 if (nm != short_nm) 5246 PNBUF_PUT(nm); 5247 5248 /* 5249 * Translate errors to match our namei insanity. Also, if the 5250 * caller wants to create an entry here, it's apparently our 5251 * responsibility as lookup to make sure that's permissible. 5252 * Go figure. 5253 */ 5254 if (cnp->cn_flags & ISLASTCN) { 5255 switch (cnp->cn_nameiop) { 5256 case CREATE: 5257 case RENAME: 5258 if (error == ENOENT) { 5259 error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred); 5260 if (error) 5261 break; 5262 error = EJUSTRETURN; 5263 break; 5264 } 5265 break; 5266 case DELETE: 5267 if (error == 0) { 5268 error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred); 5269 if (error) { 5270 VN_RELE(*vpp); 5271 *vpp = NULL; 5272 } 5273 } 5274 break; 5275 } 5276 } 5277 5278 if (error) { 5279 KASSERT(*vpp == NULL); 5280 goto out; 5281 } 5282 KASSERT(*vpp != NULL); 5283 5284 if ((cnp->cn_namelen == 1) && (cnp->cn_nameptr[0] == '.')) { 5285 KASSERT(!(cnp->cn_flags & ISDOTDOT)); 5286 KASSERT(dvp == *vpp); 5287 } else if ((cnp->cn_namelen == 2) && 5288 (cnp->cn_nameptr[0] == '.') && 5289 (cnp->cn_nameptr[1] == '.')) { 5290 KASSERT(cnp->cn_flags & ISDOTDOT); 5291 } else { 5292 KASSERT(!(cnp->cn_flags & ISDOTDOT)); 5293 } 5294 5295 out: 5296 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5297 5298 /* 5299 * Insert name into cache if appropriate. 5300 */ 5301 5302 if (error == 0 || (error == ENOENT && cnp->cn_nameiop != CREATE)) 5303 cache_enter(dvp, *vpp, cnp->cn_nameptr, cnp->cn_namelen, 5304 cnp->cn_flags); 5305 5306 return (error); 5307 } 5308 5309 static int 5310 zfs_netbsd_create(void *v) 5311 { 5312 struct vop_create_v3_args /* { 5313 struct vnode *a_dvp; 5314 struct vnode **a_vpp; 5315 struct componentname *a_cnp; 5316 struct vattr *a_vap; 5317 } */ *ap = v; 5318 struct vnode *dvp = ap->a_dvp; 5319 struct vnode **vpp = ap->a_vpp; 5320 struct componentname *cnp = ap->a_cnp; 5321 struct vattr *vap = ap->a_vap; 5322 char *nm; 5323 int mode; 5324 int error; 5325 5326 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5327 5328 vattr_init_mask(vap); 5329 mode = vap->va_mode & ALLPERMS; 5330 5331 /* ZFS wants a null-terminated name. */ 5332 nm = PNBUF_GET(); 5333 (void)strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1); 5334 5335 /* XXX !EXCL is wrong here... */ 5336 error = zfs_create(dvp, nm, vap, !EXCL, mode, vpp, cnp->cn_cred, NULL); 5337 5338 PNBUF_PUT(nm); 5339 5340 KASSERT((error == 0) == (*vpp != NULL)); 5341 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5342 if (*vpp != NULL) 5343 VOP_UNLOCK(*vpp, 0); 5344 5345 return (error); 5346 } 5347 5348 static int 5349 zfs_netbsd_mknod(void *v) 5350 { 5351 struct vop_mknod_v3_args /* { 5352 struct vnode *a_dvp; 5353 struct vnode **a_vpp; 5354 struct componentname *a_cnp; 5355 struct vattr *a_vap; 5356 } */ *ap = v; 5357 struct vnode *dvp = ap->a_dvp; 5358 struct vnode **vpp = ap->a_vpp; 5359 struct componentname *cnp = ap->a_cnp; 5360 struct vattr *vap = ap->a_vap; 5361 char *nm; 5362 int mode; 5363 int error; 5364 5365 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5366 5367 vattr_init_mask(vap); 5368 mode = vap->va_mode & ALLPERMS; 5369 5370 /* ZFS wants a null-terminated name. */ 5371 nm = PNBUF_GET(); 5372 (void)strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1); 5373 5374 /* XXX !EXCL is wrong here... */ 5375 error = zfs_create(dvp, nm, vap, !EXCL, mode, vpp, cnp->cn_cred, NULL); 5376 5377 PNBUF_PUT(nm); 5378 5379 KASSERT((error == 0) == (*vpp != NULL)); 5380 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5381 if (*vpp != NULL) 5382 VOP_UNLOCK(*vpp, 0); 5383 5384 return (error); 5385 } 5386 5387 static int 5388 zfs_netbsd_remove(void *v) 5389 { 5390 struct vop_remove_v3_args /* { 5391 struct vnode *a_dvp; 5392 struct vnode *a_vp; 5393 struct componentname *a_cnp; 5394 nlink_t ctx_vp_new_nlink; 5395 } */ *ap = v; 5396 struct vnode *dvp = ap->a_dvp; 5397 struct vnode *vp = ap->a_vp; 5398 struct componentname *cnp = ap->a_cnp; 5399 char *nm; 5400 int error; 5401 5402 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5403 KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 5404 5405 /* ZFS wants a null-terminated name. */ 5406 nm = PNBUF_GET(); 5407 (void)strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1); 5408 5409 error = zfs_remove(dvp, vp, nm, cnp->cn_cred); 5410 5411 /* 5412 * XXX Should update ctx_vp_new_nlink, but for now the 5413 * XXX the kevent sent on "vp" matches historical behavior. 5414 */ 5415 5416 PNBUF_PUT(nm); 5417 vput(vp); 5418 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5419 return (error); 5420 } 5421 5422 static int 5423 zfs_netbsd_mkdir(void *v) 5424 { 5425 struct vop_mkdir_v3_args /* { 5426 struct vnode *a_dvp; 5427 struct vnode **a_vpp; 5428 struct componentname *a_cnp; 5429 struct vattr *a_vap; 5430 } */ *ap = v; 5431 struct vnode *dvp = ap->a_dvp; 5432 struct vnode **vpp = ap->a_vpp; 5433 struct componentname *cnp = ap->a_cnp; 5434 struct vattr *vap = ap->a_vap; 5435 char *nm; 5436 int error; 5437 5438 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5439 5440 vattr_init_mask(vap); 5441 5442 /* ZFS wants a null-terminated name. */ 5443 nm = PNBUF_GET(); 5444 (void)strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1); 5445 5446 error = zfs_mkdir(dvp, nm, vap, vpp, cnp->cn_cred); 5447 5448 PNBUF_PUT(nm); 5449 5450 KASSERT((error == 0) == (*vpp != NULL)); 5451 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5452 if (*vpp != NULL) 5453 VOP_UNLOCK(*vpp, 0); 5454 5455 return (error); 5456 } 5457 5458 static int 5459 zfs_netbsd_rmdir(void *v) 5460 { 5461 struct vop_rmdir_v2_args /* { 5462 struct vnode *a_dvp; 5463 struct vnode *a_vp; 5464 struct componentname *a_cnp; 5465 } */ *ap = v; 5466 struct vnode *dvp = ap->a_dvp; 5467 struct vnode *vp = ap->a_vp; 5468 struct componentname *cnp = ap->a_cnp; 5469 char *nm; 5470 int error; 5471 5472 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5473 KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 5474 5475 /* ZFS wants a null-terminated name. */ 5476 nm = PNBUF_GET(); 5477 (void)strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1); 5478 5479 error = zfs_rmdir(dvp, vp, nm, cnp->cn_cred); 5480 5481 PNBUF_PUT(nm); 5482 vput(vp); 5483 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5484 return error; 5485 } 5486 5487 static int 5488 zfs_netbsd_readdir(void *v) 5489 { 5490 struct vop_readdir_args *ap = v; 5491 5492 return (zfs_readdir(ap->a_vp, ap->a_uio, ap->a_cred, ap->a_eofflag, 5493 ap->a_ncookies, ap->a_cookies)); 5494 } 5495 5496 static int 5497 zfs_netbsd_fsync(void *v) 5498 { 5499 struct vop_fsync_args *ap = v; 5500 5501 return (zfs_fsync(ap->a_vp, ap->a_flags, ap->a_cred, NULL)); 5502 } 5503 5504 static int 5505 zfs_spec_fsync(void *v) 5506 { 5507 struct vop_fsync_args *ap = v; 5508 int error; 5509 5510 error = spec_fsync(v); 5511 if (error) 5512 return error; 5513 5514 return (zfs_fsync(ap->a_vp, ap->a_flags, ap->a_cred, NULL)); 5515 } 5516 5517 static int 5518 zfs_netbsd_getattr(void *v) 5519 { 5520 struct vop_getattr_args *ap = v; 5521 vattr_t *vap = ap->a_vap; 5522 xvattr_t xvap; 5523 u_long fflags = 0; 5524 int error; 5525 5526 xva_init(&xvap); 5527 xvap.xva_vattr = *vap; 5528 xvap.xva_vattr.va_mask |= AT_XVATTR; 5529 5530 /* Convert chflags into ZFS-type flags. */ 5531 /* XXX: what about SF_SETTABLE?. */ 5532 XVA_SET_REQ(&xvap, XAT_IMMUTABLE); 5533 XVA_SET_REQ(&xvap, XAT_APPENDONLY); 5534 XVA_SET_REQ(&xvap, XAT_NOUNLINK); 5535 XVA_SET_REQ(&xvap, XAT_NODUMP); 5536 error = zfs_getattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred, NULL); 5537 if (error != 0) 5538 return (error); 5539 5540 /* Convert ZFS xattr into chflags. */ 5541 #define FLAG_CHECK(fflag, xflag, xfield) do { \ 5542 if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0) \ 5543 fflags |= (fflag); \ 5544 } while (0) 5545 FLAG_CHECK(SF_IMMUTABLE, XAT_IMMUTABLE, 5546 xvap.xva_xoptattrs.xoa_immutable); 5547 FLAG_CHECK(SF_APPEND, XAT_APPENDONLY, 5548 xvap.xva_xoptattrs.xoa_appendonly); 5549 FLAG_CHECK(SF_NOUNLINK, XAT_NOUNLINK, 5550 xvap.xva_xoptattrs.xoa_nounlink); 5551 FLAG_CHECK(UF_NODUMP, XAT_NODUMP, 5552 xvap.xva_xoptattrs.xoa_nodump); 5553 #undef FLAG_CHECK 5554 *vap = xvap.xva_vattr; 5555 vap->va_flags = fflags; 5556 return (0); 5557 } 5558 5559 static int 5560 zfs_netbsd_setattr(void *v) 5561 { 5562 struct vop_setattr_args *ap = v; 5563 vnode_t *vp = ap->a_vp; 5564 vattr_t *vap = ap->a_vap; 5565 cred_t *cred = ap->a_cred; 5566 znode_t *zp = VTOZ(vp); 5567 xvattr_t xvap; 5568 kauth_action_t action; 5569 u_long fflags, sfflags = 0; 5570 uint64_t zflags; 5571 int error, flags = 0; 5572 bool changing_sysflags; 5573 5574 vattr_init_mask(vap); 5575 vap->va_mask &= ~AT_NOSET; 5576 if (ISSET(vap->va_vaflags, VA_UTIMES_NULL)) 5577 flags |= ATTR_UTIME; 5578 5579 xva_init(&xvap); 5580 xvap.xva_vattr = *vap; 5581 5582 zflags = VTOZ(vp)->z_pflags; 5583 5584 /* Ignore size changes on device nodes. */ 5585 if (vp->v_type == VBLK || vp->v_type == VCHR) 5586 xvap.xva_vattr.va_mask &= ~AT_SIZE; 5587 if (vap->va_flags != VNOVAL) { 5588 int error; 5589 5590 fflags = vap->va_flags; 5591 if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_NODUMP)) != 0) 5592 return (EOPNOTSUPP); 5593 5594 #define FLAG_CHANGE(fflag, zflag, xflag, xfield) do { \ 5595 if (((fflags & (fflag)) && !(zflags & (zflag))) || \ 5596 ((zflags & (zflag)) && !(fflags & (fflag)))) { \ 5597 XVA_SET_REQ(&xvap, (xflag)); \ 5598 (xfield) = ((fflags & (fflag)) != 0); \ 5599 if (((fflag) & SF_SETTABLE) != 0) \ 5600 sfflags |= (fflag); \ 5601 } \ 5602 } while (0) 5603 /* Convert chflags into ZFS-type flags. */ 5604 /* XXX: what about SF_SETTABLE?. */ 5605 FLAG_CHANGE(SF_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE, 5606 xvap.xva_xoptattrs.xoa_immutable); 5607 FLAG_CHANGE(SF_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY, 5608 xvap.xva_xoptattrs.xoa_appendonly); 5609 FLAG_CHANGE(SF_NOUNLINK, ZFS_NOUNLINK, XAT_NOUNLINK, 5610 xvap.xva_xoptattrs.xoa_nounlink); 5611 FLAG_CHANGE(UF_NODUMP, ZFS_NODUMP, XAT_NODUMP, 5612 xvap.xva_xoptattrs.xoa_nodump); 5613 #undef FLAG_CHANGE 5614 5615 action = KAUTH_VNODE_WRITE_FLAGS; 5616 changing_sysflags = false; 5617 5618 if (zflags & (ZFS_IMMUTABLE|ZFS_APPENDONLY|ZFS_NOUNLINK)) { 5619 action |= KAUTH_VNODE_HAS_SYSFLAGS; 5620 } 5621 if (sfflags != 0) { 5622 action |= KAUTH_VNODE_WRITE_SYSFLAGS; 5623 changing_sysflags = true; 5624 } 5625 5626 error = kauth_authorize_vnode(cred, action, vp, NULL, 5627 genfs_can_chflags(vp, cred, zp->z_uid, changing_sysflags)); 5628 if (error) 5629 return error; 5630 } 5631 5632 if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL || 5633 vap->va_birthtime.tv_sec != VNOVAL) { 5634 error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_TIMES, vp, 5635 NULL, genfs_can_chtimes(vp, cred, zp->z_uid, 5636 vap->va_vaflags)); 5637 if (error) 5638 return error; 5639 } 5640 5641 error = zfs_setattr(vp, (vattr_t *)&xvap, flags, cred, NULL); 5642 if (error) 5643 return error; 5644 5645 cache_enter_id(vp, zp->z_mode, zp->z_uid, zp->z_gid, true); 5646 5647 return error; 5648 } 5649 5650 static int 5651 zfs_netbsd_rename(void *v) 5652 { 5653 struct vop_rename_args /* { 5654 struct vnode *a_fdvp; 5655 struct vnode *a_fvp; 5656 struct componentname *a_fcnp; 5657 struct vnode *a_tdvp; 5658 struct vnode *a_tvp; 5659 struct componentname *a_tcnp; 5660 } */ *ap = v; 5661 vnode_t *fdvp = ap->a_fdvp; 5662 vnode_t *fvp = ap->a_fvp; 5663 struct componentname *fcnp = ap->a_fcnp; 5664 vnode_t *tdvp = ap->a_tdvp; 5665 vnode_t *tvp = ap->a_tvp; 5666 struct componentname *tcnp = ap->a_tcnp; 5667 kauth_cred_t cred; 5668 int error; 5669 5670 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 5671 KASSERT(tvp == NULL || VOP_ISLOCKED(tvp) == LK_EXCLUSIVE); 5672 KASSERT(fdvp->v_type == VDIR); 5673 KASSERT(tdvp->v_type == VDIR); 5674 5675 cred = fcnp->cn_cred; 5676 5677 /* 5678 * XXX Want a better equality test. `tcnp->cn_cred == cred' 5679 * hoses p2k because puffs transmits the creds separately and 5680 * allocates distinct but equivalent structures for them. 5681 */ 5682 KASSERT(kauth_cred_uidmatch(cred, tcnp->cn_cred)); 5683 5684 /* 5685 * Drop the insane locks. 5686 */ 5687 VOP_UNLOCK(tdvp, 0); 5688 if (tvp != NULL && tvp != tdvp) 5689 VOP_UNLOCK(tvp, 0); 5690 5691 /* 5692 * Release the source and target nodes; zfs_rename will look 5693 * them up again once the locking situation is sane. 5694 */ 5695 VN_RELE(fvp); 5696 if (tvp != NULL) 5697 VN_RELE(tvp); 5698 fvp = NULL; 5699 tvp = NULL; 5700 5701 /* 5702 * Do the rename ZFSly. 5703 */ 5704 error = zfs_rename(fdvp, &fvp, fcnp, tdvp, &tvp, tcnp, cred); 5705 5706 /* 5707 * Release the directories now too, because the VOP_RENAME 5708 * protocol is insane. 5709 */ 5710 5711 VN_RELE(fdvp); 5712 VN_RELE(tdvp); 5713 if (fvp != NULL) 5714 VN_RELE(fvp); 5715 if (tvp != NULL) 5716 VN_RELE(tvp); 5717 5718 return (error); 5719 } 5720 5721 static int 5722 zfs_netbsd_symlink(void *v) 5723 { 5724 struct vop_symlink_v3_args /* { 5725 struct vnode *a_dvp; 5726 struct vnode **a_vpp; 5727 struct componentname *a_cnp; 5728 struct vattr *a_vap; 5729 char *a_target; 5730 } */ *ap = v; 5731 struct vnode *dvp = ap->a_dvp; 5732 struct vnode **vpp = ap->a_vpp; 5733 struct componentname *cnp = ap->a_cnp; 5734 struct vattr *vap = ap->a_vap; 5735 char *target = ap->a_target; 5736 char *nm; 5737 int error; 5738 5739 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5740 5741 vap->va_type = VLNK; /* Netbsd: Syscall only sets va_mode. */ 5742 vattr_init_mask(vap); 5743 5744 /* ZFS wants a null-terminated name. */ 5745 nm = PNBUF_GET(); 5746 (void)strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1); 5747 5748 error = zfs_symlink(dvp, vpp, nm, vap, target, cnp->cn_cred, 0); 5749 5750 PNBUF_PUT(nm); 5751 KASSERT((error == 0) == (*vpp != NULL)); 5752 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5753 if (*vpp != NULL) 5754 VOP_UNLOCK(*vpp, 0); 5755 5756 return (error); 5757 } 5758 5759 static int 5760 zfs_netbsd_readlink(void *v) 5761 { 5762 struct vop_readlink_args *ap = v; 5763 5764 return (zfs_readlink(ap->a_vp, ap->a_uio, ap->a_cred, NULL)); 5765 } 5766 5767 static int 5768 zfs_netbsd_link(void *v) 5769 { 5770 struct vop_link_v2_args /* { 5771 struct vnode *a_dvp; 5772 struct vnode *a_vp; 5773 struct componentname *a_cnp; 5774 } */ *ap = v; 5775 struct vnode *dvp = ap->a_dvp; 5776 struct vnode *vp = ap->a_vp; 5777 struct componentname *cnp = ap->a_cnp; 5778 char *nm; 5779 int error; 5780 5781 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 5782 5783 /* ZFS wants a null-terminated name. */ 5784 nm = PNBUF_GET(); 5785 (void)strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1); 5786 5787 vn_lock(vp, LK_EXCLUSIVE); 5788 error = zfs_link(dvp, vp, nm, cnp->cn_cred, 5789 NULL, 0); 5790 5791 PNBUF_PUT(nm); 5792 VOP_UNLOCK(vp, 0); 5793 return error; 5794 } 5795 5796 static int 5797 zfs_netbsd_inactive(void *v) 5798 { 5799 struct vop_inactive_v2_args *ap = v; 5800 vnode_t *vp = ap->a_vp; 5801 znode_t *zp = VTOZ(vp); 5802 5803 /* 5804 * NetBSD: nothing to do here, other than indicate if the 5805 * vnode should be reclaimed. No need to lock, if we race 5806 * vrele() will call us again. 5807 */ 5808 *ap->a_recycle = (zp->z_unlinked != 0); 5809 5810 return (0); 5811 } 5812 5813 static int 5814 zfs_netbsd_reclaim(void *v) 5815 { 5816 struct vop_reclaim_v2_args /* { 5817 struct vnode *a_vp; 5818 } */ *ap = v; 5819 struct vnode *vp = ap->a_vp; 5820 znode_t *zp; 5821 zfsvfs_t *zfsvfs; 5822 int error; 5823 5824 VOP_UNLOCK(vp, 0); 5825 zp = VTOZ(vp); 5826 zfsvfs = zp->z_zfsvfs; 5827 5828 KASSERTMSG(!vn_has_cached_data(vp), "vp %p", vp); 5829 5830 rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 5831 5832 /* 5833 * Process a deferred atime update. 5834 */ 5835 if (zp->z_atime_dirty && zp->z_unlinked == 0 && zp->z_sa_hdl != NULL) { 5836 dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); 5837 5838 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 5839 zfs_sa_upgrade_txholds(tx, zp); 5840 error = dmu_tx_assign(tx, TXG_WAIT); 5841 if (error) { 5842 dmu_tx_abort(tx); 5843 } else { 5844 (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs), 5845 (void *)&zp->z_atime, sizeof (zp->z_atime), tx); 5846 zp->z_atime_dirty = 0; 5847 dmu_tx_commit(tx); 5848 } 5849 } 5850 5851 /* 5852 * Operation zfs_znode.c::zfs_zget_cleaner() depends on this 5853 * zil_commit() as a barrier to guarantee the znode cannot 5854 * get freed before its log entries are resolved. 5855 */ 5856 if (zfsvfs->z_log) 5857 zil_commit(zfsvfs->z_log, zp->z_id); 5858 5859 if (zp->z_sa_hdl == NULL) 5860 zfs_znode_free(zp); 5861 else 5862 zfs_zinactive(zp); 5863 rw_exit(&zfsvfs->z_teardown_inactive_lock); 5864 return 0; 5865 } 5866 5867 static int 5868 zfs_netbsd_fid(void *v) 5869 { 5870 struct vop_fid_args *ap = v; 5871 5872 return (zfs_fid(ap->a_vp, (void *)ap->a_fid, NULL)); 5873 } 5874 5875 static int 5876 zfs_netbsd_pathconf(void *v) 5877 { 5878 struct vop_pathconf_args *ap = v; 5879 ulong_t val; 5880 int error; 5881 5882 error = zfs_pathconf(ap->a_vp, ap->a_name, &val, curthread->l_cred, NULL); 5883 if (error == 0) 5884 *ap->a_retval = val; 5885 else if (error == EOPNOTSUPP) { 5886 switch (ap->a_name) { 5887 case _PC_NAME_MAX: 5888 *ap->a_retval = NAME_MAX; 5889 return (0); 5890 case _PC_PATH_MAX: 5891 *ap->a_retval = PATH_MAX; 5892 return (0); 5893 case _PC_LINK_MAX: 5894 *ap->a_retval = LINK_MAX; 5895 return (0); 5896 case _PC_MAX_CANON: 5897 *ap->a_retval = MAX_CANON; 5898 return (0); 5899 case _PC_MAX_INPUT: 5900 *ap->a_retval = MAX_INPUT; 5901 return (0); 5902 case _PC_PIPE_BUF: 5903 *ap->a_retval = PIPE_BUF; 5904 return (0); 5905 case _PC_CHOWN_RESTRICTED: 5906 *ap->a_retval = 1; 5907 return (0); 5908 case _PC_NO_TRUNC: 5909 *ap->a_retval = 1; 5910 return (0); 5911 case _PC_VDISABLE: 5912 *ap->a_retval = _POSIX_VDISABLE; 5913 return (0); 5914 default: 5915 return (EINVAL); 5916 } 5917 /* NOTREACHED */ 5918 } 5919 return (error); 5920 } 5921 5922 static int 5923 zfs_netbsd_advlock(void *v) 5924 { 5925 struct vop_advlock_args /* { 5926 struct vnode *a_vp; 5927 void *a_id; 5928 int a_op; 5929 struct flock *a_fl; 5930 int a_flags; 5931 } */ *ap = v; 5932 struct vnode *vp; 5933 struct znode *zp; 5934 struct zfsvfs *zfsvfs; 5935 int error; 5936 5937 vp = ap->a_vp; 5938 zp = VTOZ(vp); 5939 zfsvfs = zp->z_zfsvfs; 5940 5941 ZFS_ENTER(zfsvfs); 5942 ZFS_VERIFY_ZP(zp); 5943 error = lf_advlock(ap, &zp->z_lockf, zp->z_size); 5944 ZFS_EXIT(zfsvfs); 5945 5946 return error; 5947 } 5948 5949 static int 5950 zfs_netbsd_getpages(void *v) 5951 { 5952 struct vop_getpages_args /* { 5953 struct vnode *a_vp; 5954 voff_t a_offset; 5955 struct vm_page **a_m; 5956 int *a_count; 5957 int a_centeridx; 5958 vm_prot_t a_access_type; 5959 int a_advice; 5960 int a_flags; 5961 } */ * const ap = v; 5962 5963 vnode_t *const vp = ap->a_vp; 5964 const int flags = ap->a_flags; 5965 const bool async = (flags & PGO_SYNCIO) == 0; 5966 const bool memwrite = (ap->a_access_type & VM_PROT_WRITE) != 0; 5967 5968 struct uvm_object * const uobj = &vp->v_uobj; 5969 krwlock_t * const rw = uobj->vmobjlock; 5970 znode_t *zp = VTOZ(vp); 5971 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5972 vfs_t *mp; 5973 struct vm_page *pg; 5974 caddr_t va; 5975 int npages = *ap->a_count, found, err = 0; 5976 5977 if (flags & PGO_LOCKED) { 5978 uvn_findpages(uobj, ap->a_offset, &npages, ap->a_m, NULL, 5979 UFP_NOWAIT | UFP_NOALLOC | UFP_NOBUSY | 5980 (memwrite ? UFP_NORDONLY : 0)); 5981 KASSERT(npages == *ap->a_count); 5982 if (memwrite) { 5983 KASSERT(rw_write_held(uobj->vmobjlock)); 5984 for (int i = 0; i < npages; i++) { 5985 pg = ap->a_m[i]; 5986 if (pg == NULL || pg == PGO_DONTCARE) { 5987 continue; 5988 } 5989 if (uvm_pagegetdirty(pg) == 5990 UVM_PAGE_STATUS_CLEAN) { 5991 uvm_pagemarkdirty(pg, 5992 UVM_PAGE_STATUS_UNKNOWN); 5993 } 5994 } 5995 } 5996 return ap->a_m[ap->a_centeridx] == NULL ? EBUSY : 0; 5997 } 5998 rw_exit(rw); 5999 6000 if (async) { 6001 return 0; 6002 } 6003 6004 mp = vp->v_mount; 6005 fstrans_start(mp); 6006 if (vp->v_mount != mp) { 6007 fstrans_done(mp); 6008 return ENOENT; 6009 } 6010 ZFS_ENTER(zfsvfs); 6011 ZFS_VERIFY_ZP(zp); 6012 6013 rw_enter(rw, RW_WRITER); 6014 if (ap->a_offset + (npages << PAGE_SHIFT) > round_page(vp->v_size)) { 6015 rw_exit(rw); 6016 ZFS_EXIT(zfsvfs); 6017 fstrans_done(mp); 6018 return EINVAL; 6019 } 6020 uvn_findpages(uobj, ap->a_offset, &npages, ap->a_m, NULL, UFP_ALL); 6021 KASSERT(npages == *ap->a_count); 6022 6023 for (int i = 0; i < npages; i++) { 6024 pg = ap->a_m[i]; 6025 if (pg->flags & PG_FAKE) { 6026 voff_t offset = pg->offset; 6027 KASSERT(pg->offset == ap->a_offset + (i << PAGE_SHIFT)); 6028 rw_exit(rw); 6029 6030 va = zfs_map_page(pg, S_WRITE); 6031 err = dmu_read(zfsvfs->z_os, zp->z_id, offset, 6032 PAGE_SIZE, va, DMU_READ_PREFETCH); 6033 zfs_unmap_page(pg, va); 6034 6035 if (err != 0) { 6036 uvm_aio_aiodone_pages(ap->a_m, npages, false, err); 6037 memset(ap->a_m, 0, sizeof(ap->a_m[0]) * 6038 npages); 6039 break; 6040 } 6041 rw_enter(rw, RW_WRITER); 6042 pg->flags &= ~(PG_FAKE); 6043 } 6044 6045 if (memwrite && uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_CLEAN) { 6046 /* For write faults, start dirtiness tracking. */ 6047 uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_UNKNOWN); 6048 } 6049 } 6050 rw_exit(rw); 6051 6052 ZFS_EXIT(zfsvfs); 6053 fstrans_done(mp); 6054 6055 return (err); 6056 } 6057 6058 static int 6059 zfs_putapage(vnode_t *vp, page_t **pp, int count, int flags) 6060 { 6061 znode_t *zp = VTOZ(vp); 6062 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 6063 dmu_tx_t *tx; 6064 voff_t off, koff; 6065 voff_t len, klen; 6066 int err; 6067 6068 bool *cleanedp; 6069 struct uvm_object *uobj = &vp->v_uobj; 6070 krwlock_t *rw = uobj->vmobjlock; 6071 6072 if (zp->z_sa_hdl == NULL) { 6073 err = 0; 6074 goto out; 6075 } 6076 6077 /* 6078 * Calculate the length and assert that no whole pages are past EOF. 6079 * This check is equivalent to "off + len <= round_page(zp->z_size)", 6080 * with gyrations to avoid signed integer overflow. 6081 */ 6082 6083 off = pp[0]->offset; 6084 len = count * PAGESIZE; 6085 KASSERT(off <= zp->z_size); 6086 KASSERT(len <= round_page(zp->z_size)); 6087 KASSERT(off <= round_page(zp->z_size) - len); 6088 6089 /* 6090 * If EOF is within the last page, reduce len to avoid writing past 6091 * the file size in the ZFS buffer. Assert that 6092 * "off + len <= zp->z_size", again avoiding signed integer overflow. 6093 */ 6094 6095 if (len > zp->z_size - off) { 6096 len = zp->z_size - off; 6097 } 6098 KASSERT(len <= zp->z_size); 6099 KASSERT(off <= zp->z_size - len); 6100 6101 if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 6102 zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 6103 err = SET_ERROR(EDQUOT); 6104 goto out; 6105 } 6106 tx = dmu_tx_create(zfsvfs->z_os); 6107 dmu_tx_hold_write(tx, zp->z_id, off, len); 6108 6109 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 6110 zfs_sa_upgrade_txholds(tx, zp); 6111 err = dmu_tx_assign(tx, TXG_WAIT); 6112 if (err != 0) { 6113 dmu_tx_abort(tx); 6114 goto out; 6115 } 6116 6117 if (zp->z_blksz <= PAGESIZE) { 6118 KASSERTMSG(count == 1, "vp %p pp %p count %d", vp, pp, count); 6119 caddr_t va = zfs_map_page(*pp, S_READ); 6120 ASSERT3U(len, <=, PAGESIZE); 6121 dmu_write(zfsvfs->z_os, zp->z_id, off, len, va, tx); 6122 zfs_unmap_page(*pp, va); 6123 } else { 6124 err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, pp, tx); 6125 } 6126 cleanedp = tsd_get(zfs_putpage_key); 6127 *cleanedp = true; 6128 6129 if (err == 0) { 6130 uint64_t mtime[2], ctime[2]; 6131 sa_bulk_attr_t bulk[3]; 6132 int count = 0; 6133 6134 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 6135 &mtime, 16); 6136 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 6137 &ctime, 16); 6138 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 6139 &zp->z_pflags, 8); 6140 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 6141 B_TRUE); 6142 err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 6143 ASSERT0(err); 6144 zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0); 6145 } 6146 dmu_tx_commit(tx); 6147 6148 out: 6149 uvm_aio_aiodone_pages(pp, count, true, err); 6150 return (err); 6151 } 6152 6153 static void 6154 zfs_netbsd_gop_markupdate(vnode_t *vp, int flags) 6155 { 6156 znode_t *zp = VTOZ(vp); 6157 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 6158 dmu_tx_t *tx; 6159 sa_bulk_attr_t bulk[2]; 6160 uint64_t mtime[2], ctime[2]; 6161 int count = 0, err; 6162 6163 KASSERT(flags == GOP_UPDATE_MODIFIED); 6164 6165 tx = dmu_tx_create(zfsvfs->z_os); 6166 err = dmu_tx_assign(tx, TXG_WAIT); 6167 if (err != 0) { 6168 dmu_tx_abort(tx); 6169 return; 6170 } 6171 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 6172 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 6173 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, B_TRUE); 6174 dmu_tx_commit(tx); 6175 } 6176 6177 static int 6178 zfs_netbsd_putpages(void *v) 6179 { 6180 struct vop_putpages_args /* { 6181 struct vnode *a_vp; 6182 voff_t a_offlo; 6183 voff_t a_offhi; 6184 int a_flags; 6185 } */ * const ap = v; 6186 6187 struct vnode *vp = ap->a_vp; 6188 voff_t offlo = ap->a_offlo; 6189 voff_t offhi = ap->a_offhi; 6190 int flags = ap->a_flags; 6191 6192 znode_t *zp = VTOZ(vp); 6193 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 6194 rl_t *rl = NULL; 6195 uint64_t len; 6196 int error; 6197 bool cleaned = false; 6198 6199 bool async = (flags & PGO_SYNCIO) == 0; 6200 bool cleaning = (flags & PGO_CLEANIT) != 0; 6201 6202 if (cleaning) { 6203 ASSERT((offlo & PAGE_MASK) == 0 && (offhi & PAGE_MASK) == 0); 6204 ASSERT(offlo < offhi || offhi == 0); 6205 if (offhi == 0) 6206 len = UINT64_MAX; 6207 else 6208 len = offhi - offlo; 6209 rw_exit(vp->v_uobj.vmobjlock); 6210 if (curlwp == uvm.pagedaemon_lwp) { 6211 error = fstrans_start_nowait(vp->v_mount); 6212 if (error) 6213 return error; 6214 } else { 6215 vfs_t *mp = vp->v_mount; 6216 fstrans_start(mp); 6217 if (vp->v_mount != mp) { 6218 fstrans_done(mp); 6219 ASSERT(!vn_has_cached_data(vp)); 6220 return 0; 6221 } 6222 } 6223 /* 6224 * Cannot use ZFS_ENTER() here as it returns with error 6225 * if z_unmounted. The next statement is equivalent. 6226 */ 6227 rrm_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG); 6228 6229 rl = zfs_range_lock(zp, offlo, len, RL_WRITER); 6230 rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); 6231 tsd_set(zfs_putpage_key, &cleaned); 6232 } 6233 error = genfs_putpages(v); 6234 if (cleaning) { 6235 tsd_set(zfs_putpage_key, NULL); 6236 zfs_range_unlock(rl); 6237 6238 /* 6239 * Only zil_commit() if we cleaned something. This avoids 6240 * deadlock if we're called from zfs_netbsd_setsize(). 6241 */ 6242 6243 if (cleaned) 6244 if (!async || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 6245 zil_commit(zfsvfs->z_log, zp->z_id); 6246 ZFS_EXIT(zfsvfs); 6247 fstrans_done(vp->v_mount); 6248 } 6249 return error; 6250 } 6251 6252 /* 6253 * Restrict the putpages range to the ZFS block containing the offset. 6254 */ 6255 static void 6256 zfs_netbsd_gop_putrange(struct vnode *vp, off_t off, off_t *lop, off_t *hip) 6257 { 6258 znode_t *zp = VTOZ(vp); 6259 6260 *lop = trunc_page(rounddown2(off, zp->z_blksz)); 6261 *hip = round_page(*lop + zp->z_blksz); 6262 } 6263 6264 void 6265 zfs_netbsd_setsize(vnode_t *vp, off_t size) 6266 { 6267 struct uvm_object *uobj = &vp->v_uobj; 6268 krwlock_t *rw = uobj->vmobjlock; 6269 page_t *pg; 6270 int count, pgoff; 6271 caddr_t va; 6272 off_t tsize; 6273 6274 uvm_vnp_setsize(vp, size); 6275 if (!vn_has_cached_data(vp)) 6276 return; 6277 6278 tsize = trunc_page(size); 6279 if (tsize == size) 6280 return; 6281 6282 /* 6283 * If there's a partial page, we need to zero the tail. 6284 */ 6285 6286 rw_enter(rw, RW_WRITER); 6287 count = 1; 6288 pg = NULL; 6289 if (uvn_findpages(uobj, tsize, &count, &pg, NULL, UFP_NOALLOC)) { 6290 va = zfs_map_page(pg, S_WRITE); 6291 pgoff = size - tsize; 6292 memset(va + pgoff, 0, PAGESIZE - pgoff); 6293 zfs_unmap_page(pg, va); 6294 uvm_page_unbusy(&pg, 1); 6295 } 6296 6297 rw_exit(rw); 6298 } 6299 6300 static int 6301 zfs_netbsd_print(void *v) 6302 { 6303 struct vop_print_args /* { 6304 struct vnode *a_vp; 6305 } */ *ap = v; 6306 vnode_t *vp; 6307 znode_t *zp; 6308 6309 vp = ap->a_vp; 6310 zp = VTOZ(vp); 6311 6312 printf("\tino %" PRIu64 " size %" PRIu64 "\n", 6313 zp->z_id, zp->z_size); 6314 return 0; 6315 } 6316 6317 const struct genfs_ops zfs_genfsops = { 6318 .gop_write = zfs_putapage, 6319 .gop_markupdate = zfs_netbsd_gop_markupdate, 6320 .gop_putrange = zfs_netbsd_gop_putrange, 6321 }; 6322 6323 int (**zfs_vnodeop_p)(void *); 6324 const struct vnodeopv_entry_desc zfs_vnodeop_entries[] = { 6325 { &vop_default_desc, vn_default_error }, 6326 { &vop_parsepath_desc, genfs_parsepath }, 6327 { &vop_lookup_desc, zfs_netbsd_lookup }, 6328 { &vop_create_desc, zfs_netbsd_create }, 6329 { &vop_mknod_desc, zfs_netbsd_mknod }, 6330 { &vop_open_desc, zfs_netbsd_open }, 6331 { &vop_close_desc, zfs_netbsd_close }, 6332 { &vop_access_desc, zfs_netbsd_access }, 6333 { &vop_accessx_desc, genfs_accessx }, 6334 { &vop_getattr_desc, zfs_netbsd_getattr }, 6335 { &vop_setattr_desc, zfs_netbsd_setattr }, 6336 { &vop_read_desc, zfs_netbsd_read }, 6337 { &vop_write_desc, zfs_netbsd_write }, 6338 { &vop_ioctl_desc, zfs_netbsd_ioctl }, 6339 { &vop_poll_desc, genfs_poll }, 6340 { &vop_kqfilter_desc, genfs_kqfilter }, 6341 { &vop_revoke_desc, genfs_revoke }, 6342 { &vop_fsync_desc, zfs_netbsd_fsync }, 6343 { &vop_remove_desc, zfs_netbsd_remove }, 6344 { &vop_link_desc, zfs_netbsd_link }, 6345 { &vop_lock_desc, genfs_lock }, 6346 { &vop_unlock_desc, genfs_unlock }, 6347 { &vop_rename_desc, zfs_netbsd_rename }, 6348 { &vop_mkdir_desc, zfs_netbsd_mkdir }, 6349 { &vop_rmdir_desc, zfs_netbsd_rmdir }, 6350 { &vop_symlink_desc, zfs_netbsd_symlink }, 6351 { &vop_readdir_desc, zfs_netbsd_readdir }, 6352 { &vop_readlink_desc, zfs_netbsd_readlink }, 6353 { &vop_inactive_desc, zfs_netbsd_inactive }, 6354 { &vop_reclaim_desc, zfs_netbsd_reclaim }, 6355 { &vop_pathconf_desc, zfs_netbsd_pathconf }, 6356 { &vop_seek_desc, genfs_seek }, 6357 { &vop_getpages_desc, zfs_netbsd_getpages }, 6358 { &vop_putpages_desc, zfs_netbsd_putpages }, 6359 { &vop_mmap_desc, genfs_mmap }, 6360 { &vop_islocked_desc, genfs_islocked }, 6361 { &vop_advlock_desc, zfs_netbsd_advlock }, 6362 { &vop_print_desc, zfs_netbsd_print }, 6363 { &vop_fcntl_desc, genfs_fcntl }, 6364 { NULL, NULL } 6365 }; 6366 6367 const struct vnodeopv_desc zfs_vnodeop_opv_desc = 6368 { &zfs_vnodeop_p, zfs_vnodeop_entries }; 6369 6370 int (**zfs_specop_p)(void *); 6371 const struct vnodeopv_entry_desc zfs_specop_entries[] = { 6372 { &vop_default_desc, vn_default_error }, 6373 GENFS_SPECOP_ENTRIES, 6374 { &vop_close_desc, spec_close }, 6375 { &vop_access_desc, zfs_netbsd_access }, 6376 { &vop_accessx_desc, genfs_accessx }, 6377 { &vop_getattr_desc, zfs_netbsd_getattr }, 6378 { &vop_setattr_desc, zfs_netbsd_setattr }, 6379 { &vop_read_desc, /**/zfs_netbsd_read }, 6380 { &vop_write_desc, /**/zfs_netbsd_write }, 6381 { &vop_fsync_desc, zfs_spec_fsync }, 6382 { &vop_lock_desc, genfs_lock }, 6383 { &vop_unlock_desc, genfs_unlock }, 6384 { &vop_inactive_desc, zfs_netbsd_inactive }, 6385 { &vop_reclaim_desc, zfs_netbsd_reclaim }, 6386 { &vop_islocked_desc, genfs_islocked }, 6387 { &vop_bwrite_desc, vn_bwrite }, 6388 { &vop_print_desc, zfs_netbsd_print }, 6389 { &vop_fcntl_desc, genfs_fcntl }, 6390 { NULL, NULL } 6391 }; 6392 6393 const struct vnodeopv_desc zfs_specop_opv_desc = 6394 { &zfs_specop_p, zfs_specop_entries }; 6395 6396 int (**zfs_fifoop_p)(void *); 6397 const struct vnodeopv_entry_desc zfs_fifoop_entries[] = { 6398 { &vop_default_desc, vn_default_error }, 6399 GENFS_FIFOOP_ENTRIES, 6400 { &vop_close_desc, vn_fifo_bypass }, 6401 { &vop_access_desc, zfs_netbsd_access }, 6402 { &vop_accessx_desc, genfs_accessx }, 6403 { &vop_getattr_desc, zfs_netbsd_getattr }, 6404 { &vop_setattr_desc, zfs_netbsd_setattr }, 6405 { &vop_read_desc, /**/zfs_netbsd_read }, 6406 { &vop_write_desc, /**/zfs_netbsd_write }, 6407 { &vop_fsync_desc, zfs_netbsd_fsync }, 6408 { &vop_lock_desc, genfs_lock }, 6409 { &vop_unlock_desc, genfs_unlock }, 6410 { &vop_inactive_desc, zfs_netbsd_inactive }, 6411 { &vop_reclaim_desc, zfs_netbsd_reclaim }, 6412 { &vop_islocked_desc, genfs_islocked }, 6413 { &vop_bwrite_desc, vn_bwrite }, 6414 { &vop_strategy_desc, vn_fifo_bypass }, 6415 { &vop_print_desc, zfs_netbsd_print }, 6416 { &vop_fcntl_desc, genfs_fcntl }, 6417 { NULL, NULL } 6418 }; 6419 6420 const struct vnodeopv_desc zfs_fifoop_opv_desc = 6421 { &zfs_fifoop_p, zfs_fifoop_entries }; 6422 6423 #endif /* __NetBSD__ */ 6424