1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2012, 2014 by Delphix. All rights reserved. 24 * Copyright (c) 2014 Integros [integros.com] 25 */ 26 27 /* Portions Copyright 2007 Jeremy Teo */ 28 /* Portions Copyright 2011 Martin Matuska <mm@FreeBSD.org> */ 29 30 #include <sys/types.h> 31 #include <sys/param.h> 32 #include <sys/time.h> 33 #include <sys/systm.h> 34 #include <sys/sysmacros.h> 35 #include <sys/resource.h> 36 #include <sys/resourcevar.h> 37 #include <sys/mntent.h> 38 #include <sys/u8_textprep.h> 39 #include <sys/dsl_dataset.h> 40 #include <sys/vfs.h> 41 #include <sys/vnode.h> 42 #include <sys/file.h> 43 #include <sys/kmem.h> 44 #include <sys/errno.h> 45 #include <sys/unistd.h> 46 #include <sys/atomic.h> 47 #include <sys/zfs_dir.h> 48 #include <sys/zfs_acl.h> 49 #include <sys/zfs_ioctl.h> 50 #include <sys/zfs_rlock.h> 51 #include <sys/zfs_fuid.h> 52 #include <sys/dnode.h> 53 #include <sys/fs/zfs.h> 54 #include <sys/dmu.h> 55 #include <sys/dmu_objset.h> 56 #include <sys/dmu_tx.h> 57 #include <sys/zfs_refcount.h> 58 #include <sys/stat.h> 59 #include <sys/zap.h> 60 #include <sys/zfs_znode.h> 61 #include <sys/sa.h> 62 #include <sys/zfs_sa.h> 63 #include <sys/zfs_stat.h> 64 65 #include "zfs_prop.h" 66 #include "zfs_comutil.h" 67 68 /* Used by fstat(1). */ 69 SYSCTL_INT(_debug_sizeof, OID_AUTO, znode, CTLFLAG_RD, 70 SYSCTL_NULL_INT_PTR, sizeof (znode_t), "sizeof(znode_t)"); 71 72 /* 73 * Define ZNODE_STATS to turn on statistic gathering. By default, it is only 74 * turned on when DEBUG is also defined. 75 */ 76 #ifdef ZFS_DEBUG 77 #define ZNODE_STATS 78 #endif /* DEBUG */ 79 80 #ifdef ZNODE_STATS 81 #define ZNODE_STAT_ADD(stat) ((stat)++) 82 #else 83 #define ZNODE_STAT_ADD(stat) /* nothing */ 84 #endif /* ZNODE_STATS */ 85 86 #if !defined(KMEM_DEBUG) 87 #define _ZFS_USE_SMR 88 static uma_zone_t znode_uma_zone; 89 #else 90 static kmem_cache_t *znode_cache = NULL; 91 #endif 92 93 extern struct vop_vector zfs_vnodeops; 94 extern struct vop_vector zfs_fifoops; 95 extern struct vop_vector zfs_shareops; 96 97 98 /* 99 * This callback is invoked when acquiring a RL_WRITER or RL_APPEND lock on 100 * z_rangelock. It will modify the offset and length of the lock to reflect 101 * znode-specific information, and convert RL_APPEND to RL_WRITER. This is 102 * called with the rangelock_t's rl_lock held, which avoids races. 103 */ 104 static void 105 zfs_rangelock_cb(zfs_locked_range_t *new, void *arg) 106 { 107 znode_t *zp = arg; 108 109 /* 110 * If in append mode, convert to writer and lock starting at the 111 * current end of file. 112 */ 113 if (new->lr_type == RL_APPEND) { 114 new->lr_offset = zp->z_size; 115 new->lr_type = RL_WRITER; 116 } 117 118 /* 119 * If we need to grow the block size then lock the whole file range. 120 */ 121 uint64_t end_size = MAX(zp->z_size, new->lr_offset + new->lr_length); 122 if (end_size > zp->z_blksz && (!ISP2(zp->z_blksz) || 123 zp->z_blksz < ZTOZSB(zp)->z_max_blksz)) { 124 new->lr_offset = 0; 125 new->lr_length = UINT64_MAX; 126 } 127 } 128 129 static int 130 zfs_znode_cache_constructor(void *buf, void *arg, int kmflags) 131 { 132 znode_t *zp = buf; 133 134 POINTER_INVALIDATE(&zp->z_zfsvfs); 135 136 list_link_init(&zp->z_link_node); 137 138 mutex_init(&zp->z_lock, NULL, MUTEX_DEFAULT, NULL); 139 mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL); 140 rw_init(&zp->z_xattr_lock, NULL, RW_DEFAULT, NULL); 141 142 zfs_rangelock_init(&zp->z_rangelock, zfs_rangelock_cb, zp); 143 144 zp->z_acl_cached = NULL; 145 zp->z_xattr_cached = NULL; 146 zp->z_xattr_parent = 0; 147 zp->z_vnode = NULL; 148 zp->z_sync_writes_cnt = 0; 149 zp->z_async_writes_cnt = 0; 150 151 return (0); 152 } 153 154 static void 155 zfs_znode_cache_destructor(void *buf, void *arg) 156 { 157 (void) arg; 158 znode_t *zp = buf; 159 160 ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs)); 161 ASSERT3P(zp->z_vnode, ==, NULL); 162 ASSERT(!list_link_active(&zp->z_link_node)); 163 mutex_destroy(&zp->z_lock); 164 mutex_destroy(&zp->z_acl_lock); 165 rw_destroy(&zp->z_xattr_lock); 166 zfs_rangelock_fini(&zp->z_rangelock); 167 168 ASSERT3P(zp->z_acl_cached, ==, NULL); 169 ASSERT3P(zp->z_xattr_cached, ==, NULL); 170 171 ASSERT0(atomic_load_32(&zp->z_sync_writes_cnt)); 172 ASSERT0(atomic_load_32(&zp->z_async_writes_cnt)); 173 } 174 175 176 #ifdef _ZFS_USE_SMR 177 VFS_SMR_DECLARE; 178 179 static int 180 zfs_znode_cache_constructor_smr(void *mem, int size __unused, void *private, 181 int flags) 182 { 183 return (zfs_znode_cache_constructor(mem, private, flags)); 184 } 185 186 static void 187 zfs_znode_cache_destructor_smr(void *mem, int size __unused, void *private) 188 { 189 zfs_znode_cache_destructor(mem, private); 190 } 191 192 void 193 zfs_znode_init(void) 194 { 195 /* 196 * Initialize zcache 197 */ 198 ASSERT3P(znode_uma_zone, ==, NULL); 199 znode_uma_zone = uma_zcreate("zfs_znode_cache", 200 sizeof (znode_t), zfs_znode_cache_constructor_smr, 201 zfs_znode_cache_destructor_smr, NULL, NULL, 0, 0); 202 VFS_SMR_ZONE_SET(znode_uma_zone); 203 } 204 205 static znode_t * 206 zfs_znode_alloc_kmem(int flags) 207 { 208 return (uma_zalloc_smr(znode_uma_zone, flags)); 209 } 210 211 static void 212 zfs_znode_free_kmem(znode_t *zp) 213 { 214 if (zp->z_xattr_cached) { 215 nvlist_free(zp->z_xattr_cached); 216 zp->z_xattr_cached = NULL; 217 } 218 uma_zfree_smr(znode_uma_zone, zp); 219 } 220 #else 221 void 222 zfs_znode_init(void) 223 { 224 /* 225 * Initialize zcache 226 */ 227 ASSERT3P(znode_cache, ==, NULL); 228 znode_cache = kmem_cache_create("zfs_znode_cache", 229 sizeof (znode_t), 0, zfs_znode_cache_constructor, 230 zfs_znode_cache_destructor, NULL, NULL, NULL, KMC_RECLAIMABLE); 231 } 232 233 static znode_t * 234 zfs_znode_alloc_kmem(int flags) 235 { 236 return (kmem_cache_alloc(znode_cache, flags)); 237 } 238 239 static void 240 zfs_znode_free_kmem(znode_t *zp) 241 { 242 if (zp->z_xattr_cached) { 243 nvlist_free(zp->z_xattr_cached); 244 zp->z_xattr_cached = NULL; 245 } 246 kmem_cache_free(znode_cache, zp); 247 } 248 #endif 249 250 void 251 zfs_znode_fini(void) 252 { 253 /* 254 * Cleanup zcache 255 */ 256 #ifdef _ZFS_USE_SMR 257 if (znode_uma_zone) { 258 uma_zdestroy(znode_uma_zone); 259 znode_uma_zone = NULL; 260 } 261 #else 262 if (znode_cache) { 263 kmem_cache_destroy(znode_cache); 264 znode_cache = NULL; 265 } 266 #endif 267 } 268 269 270 static int 271 zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx) 272 { 273 zfs_acl_ids_t acl_ids; 274 vattr_t vattr; 275 znode_t *sharezp; 276 znode_t *zp; 277 int error; 278 279 vattr.va_mask = AT_MODE|AT_UID|AT_GID; 280 vattr.va_type = VDIR; 281 vattr.va_mode = S_IFDIR|0555; 282 vattr.va_uid = crgetuid(kcred); 283 vattr.va_gid = crgetgid(kcred); 284 285 sharezp = zfs_znode_alloc_kmem(KM_SLEEP); 286 ASSERT(!POINTER_IS_VALID(sharezp->z_zfsvfs)); 287 sharezp->z_unlinked = 0; 288 sharezp->z_atime_dirty = 0; 289 sharezp->z_zfsvfs = zfsvfs; 290 sharezp->z_is_sa = zfsvfs->z_use_sa; 291 292 VERIFY0(zfs_acl_ids_create(sharezp, IS_ROOT_NODE, &vattr, 293 kcred, NULL, &acl_ids, NULL)); 294 zfs_mknode(sharezp, &vattr, tx, kcred, IS_ROOT_NODE, &zp, &acl_ids); 295 ASSERT3P(zp, ==, sharezp); 296 POINTER_INVALIDATE(&sharezp->z_zfsvfs); 297 error = zap_add(zfsvfs->z_os, MASTER_NODE_OBJ, 298 ZFS_SHARES_DIR, 8, 1, &sharezp->z_id, tx); 299 zfsvfs->z_shares_dir = sharezp->z_id; 300 301 zfs_acl_ids_free(&acl_ids); 302 sa_handle_destroy(sharezp->z_sa_hdl); 303 zfs_znode_free_kmem(sharezp); 304 305 return (error); 306 } 307 308 /* 309 * define a couple of values we need available 310 * for both 64 and 32 bit environments. 311 */ 312 #ifndef NBITSMINOR64 313 #define NBITSMINOR64 32 314 #endif 315 #ifndef MAXMAJ64 316 #define MAXMAJ64 0xffffffffUL 317 #endif 318 #ifndef MAXMIN64 319 #define MAXMIN64 0xffffffffUL 320 #endif 321 322 /* 323 * Create special expldev for ZFS private use. 324 * Can't use standard expldev since it doesn't do 325 * what we want. The standard expldev() takes a 326 * dev32_t in LP64 and expands it to a long dev_t. 327 * We need an interface that takes a dev32_t in ILP32 328 * and expands it to a long dev_t. 329 */ 330 static uint64_t 331 zfs_expldev(dev_t dev) 332 { 333 return (((uint64_t)major(dev) << NBITSMINOR64) | minor(dev)); 334 } 335 /* 336 * Special cmpldev for ZFS private use. 337 * Can't use standard cmpldev since it takes 338 * a long dev_t and compresses it to dev32_t in 339 * LP64. We need to do a compaction of a long dev_t 340 * to a dev32_t in ILP32. 341 */ 342 dev_t 343 zfs_cmpldev(uint64_t dev) 344 { 345 return (makedev((dev >> NBITSMINOR64), (dev & MAXMIN64))); 346 } 347 348 static void 349 zfs_znode_sa_init(zfsvfs_t *zfsvfs, znode_t *zp, 350 dmu_buf_t *db, dmu_object_type_t obj_type, sa_handle_t *sa_hdl) 351 { 352 ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs) || (zfsvfs == zp->z_zfsvfs)); 353 ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zfsvfs, zp->z_id))); 354 355 ASSERT3P(zp->z_sa_hdl, ==, NULL); 356 ASSERT3P(zp->z_acl_cached, ==, NULL); 357 if (sa_hdl == NULL) { 358 VERIFY0(sa_handle_get_from_db(zfsvfs->z_os, db, zp, 359 SA_HDL_SHARED, &zp->z_sa_hdl)); 360 } else { 361 zp->z_sa_hdl = sa_hdl; 362 sa_set_userp(sa_hdl, zp); 363 } 364 365 zp->z_is_sa = (obj_type == DMU_OT_SA) ? B_TRUE : B_FALSE; 366 367 /* 368 * Slap on VROOT if we are the root znode unless we are the root 369 * node of a snapshot mounted under .zfs. 370 */ 371 if (zp->z_id == zfsvfs->z_root && zfsvfs->z_parent == zfsvfs) 372 ZTOV(zp)->v_flag |= VROOT; 373 } 374 375 void 376 zfs_znode_dmu_fini(znode_t *zp) 377 { 378 ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zp->z_zfsvfs, zp->z_id)) || 379 ZFS_TEARDOWN_INACTIVE_WRITE_HELD(zp->z_zfsvfs)); 380 381 sa_handle_destroy(zp->z_sa_hdl); 382 zp->z_sa_hdl = NULL; 383 } 384 385 static void 386 zfs_vnode_forget(vnode_t *vp) 387 { 388 389 /* copied from insmntque_stddtr */ 390 vp->v_data = NULL; 391 vp->v_op = &dead_vnodeops; 392 vgone(vp); 393 vput(vp); 394 } 395 396 /* 397 * Construct a new znode/vnode and initialize. 398 * 399 * This does not do a call to dmu_set_user() that is 400 * up to the caller to do, in case you don't want to 401 * return the znode 402 */ 403 static znode_t * 404 zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, 405 dmu_object_type_t obj_type, sa_handle_t *hdl) 406 { 407 znode_t *zp; 408 vnode_t *vp; 409 uint64_t mode; 410 uint64_t parent; 411 #ifdef notyet 412 uint64_t mtime[2], ctime[2]; 413 #endif 414 uint64_t projid = ZFS_DEFAULT_PROJID; 415 sa_bulk_attr_t bulk[9]; 416 int count = 0; 417 int error; 418 419 zp = zfs_znode_alloc_kmem(KM_SLEEP); 420 421 #ifndef _ZFS_USE_SMR 422 KASSERT((zfsvfs->z_parent->z_vfs->mnt_kern_flag & MNTK_FPLOOKUP) == 0, 423 ("%s: fast path lookup enabled without smr", __func__)); 424 #endif 425 426 KASSERT(curthread->td_vp_reserved != NULL, 427 ("zfs_znode_alloc: getnewvnode without any vnodes reserved")); 428 error = getnewvnode("zfs", zfsvfs->z_parent->z_vfs, &zfs_vnodeops, &vp); 429 if (error != 0) { 430 zfs_znode_free_kmem(zp); 431 return (NULL); 432 } 433 zp->z_vnode = vp; 434 vp->v_data = zp; 435 436 /* 437 * Acquire the vnode lock before any possible interaction with the 438 * outside world. Specifically, there is an error path that calls 439 * zfs_vnode_forget() and the vnode should be exclusively locked. 440 */ 441 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 442 443 ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs)); 444 445 zp->z_sa_hdl = NULL; 446 zp->z_unlinked = 0; 447 zp->z_atime_dirty = 0; 448 zp->z_mapcnt = 0; 449 zp->z_id = db->db_object; 450 zp->z_blksz = blksz; 451 zp->z_seq = 0x7A4653; 452 zp->z_sync_cnt = 0; 453 zp->z_sync_writes_cnt = 0; 454 zp->z_async_writes_cnt = 0; 455 atomic_store_ptr(&zp->z_cached_symlink, NULL); 456 457 zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl); 458 459 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8); 460 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL, &zp->z_gen, 8); 461 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, 462 &zp->z_size, 8); 463 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, 464 &zp->z_links, 8); 465 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 466 &zp->z_pflags, 8); 467 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, &parent, 8); 468 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, 469 &zp->z_atime, 16); 470 #ifdef notyet 471 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 472 &mtime, 16); 473 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 474 &ctime, 16); 475 #endif 476 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, 477 &zp->z_uid, 8); 478 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL, 479 &zp->z_gid, 8); 480 481 if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count) != 0 || zp->z_gen == 0 || 482 (dmu_objset_projectquota_enabled(zfsvfs->z_os) && 483 (zp->z_pflags & ZFS_PROJID) && 484 sa_lookup(zp->z_sa_hdl, SA_ZPL_PROJID(zfsvfs), &projid, 8) != 0)) { 485 if (hdl == NULL) 486 sa_handle_destroy(zp->z_sa_hdl); 487 zfs_vnode_forget(vp); 488 zp->z_vnode = NULL; 489 zfs_znode_free_kmem(zp); 490 return (NULL); 491 } 492 493 zp->z_projid = projid; 494 zp->z_mode = mode; 495 496 /* Cache the xattr parent id */ 497 if (zp->z_pflags & ZFS_XATTR) 498 zp->z_xattr_parent = parent; 499 500 vp->v_type = IFTOVT((mode_t)mode); 501 502 switch (vp->v_type) { 503 case VDIR: 504 zp->z_zn_prefetch = B_TRUE; /* z_prefetch default is enabled */ 505 break; 506 case VFIFO: 507 vp->v_op = &zfs_fifoops; 508 break; 509 case VREG: 510 if (parent == zfsvfs->z_shares_dir) { 511 ASSERT0(zp->z_uid); 512 ASSERT0(zp->z_gid); 513 vp->v_op = &zfs_shareops; 514 } 515 break; 516 default: 517 break; 518 } 519 520 mutex_enter(&zfsvfs->z_znodes_lock); 521 list_insert_tail(&zfsvfs->z_all_znodes, zp); 522 zp->z_zfsvfs = zfsvfs; 523 mutex_exit(&zfsvfs->z_znodes_lock); 524 525 #if __FreeBSD_version >= 1400077 526 vn_set_state(vp, VSTATE_CONSTRUCTED); 527 #endif 528 VN_LOCK_AREC(vp); 529 if (vp->v_type != VFIFO) 530 VN_LOCK_ASHARE(vp); 531 532 return (zp); 533 } 534 535 static uint64_t empty_xattr; 536 static uint64_t pad[4]; 537 static zfs_acl_phys_t acl_phys; 538 /* 539 * Create a new DMU object to hold a zfs znode. 540 * 541 * IN: dzp - parent directory for new znode 542 * vap - file attributes for new znode 543 * tx - dmu transaction id for zap operations 544 * cr - credentials of caller 545 * flag - flags: 546 * IS_ROOT_NODE - new object will be root 547 * IS_XATTR - new object is an attribute 548 * bonuslen - length of bonus buffer 549 * setaclp - File/Dir initial ACL 550 * fuidp - Tracks fuid allocation. 551 * 552 * OUT: zpp - allocated znode 553 * 554 */ 555 void 556 zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, 557 uint_t flag, znode_t **zpp, zfs_acl_ids_t *acl_ids) 558 { 559 uint64_t crtime[2], atime[2], mtime[2], ctime[2]; 560 uint64_t mode, size, links, parent, pflags; 561 uint64_t dzp_pflags = 0; 562 uint64_t rdev = 0; 563 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 564 dmu_buf_t *db; 565 timestruc_t now; 566 uint64_t gen, obj; 567 int bonuslen; 568 int dnodesize; 569 sa_handle_t *sa_hdl; 570 dmu_object_type_t obj_type; 571 sa_bulk_attr_t *sa_attrs; 572 int cnt = 0; 573 zfs_acl_locator_cb_t locate = { 0 }; 574 575 ASSERT3P(vap, !=, NULL); 576 ASSERT3U((vap->va_mask & AT_MODE), ==, AT_MODE); 577 578 if (zfsvfs->z_replay) { 579 obj = vap->va_nodeid; 580 now = vap->va_ctime; /* see zfs_replay_create() */ 581 gen = vap->va_nblocks; /* ditto */ 582 dnodesize = vap->va_fsid; /* ditto */ 583 } else { 584 obj = 0; 585 vfs_timestamp(&now); 586 gen = dmu_tx_get_txg(tx); 587 dnodesize = dmu_objset_dnodesize(zfsvfs->z_os); 588 } 589 590 if (dnodesize == 0) 591 dnodesize = DNODE_MIN_SIZE; 592 593 obj_type = zfsvfs->z_use_sa ? DMU_OT_SA : DMU_OT_ZNODE; 594 bonuslen = (obj_type == DMU_OT_SA) ? 595 DN_BONUS_SIZE(dnodesize) : ZFS_OLD_ZNODE_PHYS_SIZE; 596 597 /* 598 * Create a new DMU object. 599 */ 600 /* 601 * There's currently no mechanism for pre-reading the blocks that will 602 * be needed to allocate a new object, so we accept the small chance 603 * that there will be an i/o error and we will fail one of the 604 * assertions below. 605 */ 606 if (vap->va_type == VDIR) { 607 if (zfsvfs->z_replay) { 608 VERIFY0(zap_create_claim_norm_dnsize(zfsvfs->z_os, obj, 609 zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS, 610 obj_type, bonuslen, dnodesize, tx)); 611 } else { 612 obj = zap_create_norm_dnsize(zfsvfs->z_os, 613 zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS, 614 obj_type, bonuslen, dnodesize, tx); 615 } 616 } else { 617 if (zfsvfs->z_replay) { 618 VERIFY0(dmu_object_claim_dnsize(zfsvfs->z_os, obj, 619 DMU_OT_PLAIN_FILE_CONTENTS, 0, 620 obj_type, bonuslen, dnodesize, tx)); 621 } else { 622 obj = dmu_object_alloc_dnsize(zfsvfs->z_os, 623 DMU_OT_PLAIN_FILE_CONTENTS, 0, 624 obj_type, bonuslen, dnodesize, tx); 625 } 626 } 627 628 ZFS_OBJ_HOLD_ENTER(zfsvfs, obj); 629 VERIFY0(sa_buf_hold(zfsvfs->z_os, obj, NULL, &db)); 630 631 /* 632 * If this is the root, fix up the half-initialized parent pointer 633 * to reference the just-allocated physical data area. 634 */ 635 if (flag & IS_ROOT_NODE) { 636 dzp->z_id = obj; 637 } else { 638 dzp_pflags = dzp->z_pflags; 639 } 640 641 /* 642 * If parent is an xattr, so am I. 643 */ 644 if (dzp_pflags & ZFS_XATTR) { 645 flag |= IS_XATTR; 646 } 647 648 if (zfsvfs->z_use_fuids) 649 pflags = ZFS_ARCHIVE | ZFS_AV_MODIFIED; 650 else 651 pflags = 0; 652 653 if (vap->va_type == VDIR) { 654 size = 2; /* contents ("." and "..") */ 655 links = (flag & (IS_ROOT_NODE | IS_XATTR)) ? 2 : 1; 656 } else { 657 size = links = 0; 658 } 659 660 if (vap->va_type == VBLK || vap->va_type == VCHR) { 661 rdev = zfs_expldev(vap->va_rdev); 662 } 663 664 parent = dzp->z_id; 665 mode = acl_ids->z_mode; 666 if (flag & IS_XATTR) 667 pflags |= ZFS_XATTR; 668 669 /* 670 * No execs denied will be determined when zfs_mode_compute() is called. 671 */ 672 pflags |= acl_ids->z_aclp->z_hints & 673 (ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|ZFS_ACL_AUTO_INHERIT| 674 ZFS_ACL_DEFAULTED|ZFS_ACL_PROTECTED); 675 676 ZFS_TIME_ENCODE(&now, crtime); 677 ZFS_TIME_ENCODE(&now, ctime); 678 679 if (vap->va_mask & AT_ATIME) { 680 ZFS_TIME_ENCODE(&vap->va_atime, atime); 681 } else { 682 ZFS_TIME_ENCODE(&now, atime); 683 } 684 685 if (vap->va_mask & AT_MTIME) { 686 ZFS_TIME_ENCODE(&vap->va_mtime, mtime); 687 } else { 688 ZFS_TIME_ENCODE(&now, mtime); 689 } 690 691 /* Now add in all of the "SA" attributes */ 692 VERIFY0(sa_handle_get_from_db(zfsvfs->z_os, db, NULL, SA_HDL_SHARED, 693 &sa_hdl)); 694 695 /* 696 * Setup the array of attributes to be replaced/set on the new file 697 * 698 * order for DMU_OT_ZNODE is critical since it needs to be constructed 699 * in the old znode_phys_t format. Don't change this ordering 700 */ 701 sa_attrs = kmem_alloc(sizeof (sa_bulk_attr_t) * ZPL_END, KM_SLEEP); 702 703 if (obj_type == DMU_OT_ZNODE) { 704 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs), 705 NULL, &atime, 16); 706 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs), 707 NULL, &mtime, 16); 708 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs), 709 NULL, &ctime, 16); 710 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs), 711 NULL, &crtime, 16); 712 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs), 713 NULL, &gen, 8); 714 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs), 715 NULL, &mode, 8); 716 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs), 717 NULL, &size, 8); 718 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs), 719 NULL, &parent, 8); 720 } else { 721 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs), 722 NULL, &mode, 8); 723 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs), 724 NULL, &size, 8); 725 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs), 726 NULL, &gen, 8); 727 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), 728 NULL, &acl_ids->z_fuid, 8); 729 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), 730 NULL, &acl_ids->z_fgid, 8); 731 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs), 732 NULL, &parent, 8); 733 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs), 734 NULL, &pflags, 8); 735 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs), 736 NULL, &atime, 16); 737 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs), 738 NULL, &mtime, 16); 739 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs), 740 NULL, &ctime, 16); 741 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs), 742 NULL, &crtime, 16); 743 } 744 745 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_LINKS(zfsvfs), NULL, &links, 8); 746 747 if (obj_type == DMU_OT_ZNODE) { 748 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_XATTR(zfsvfs), NULL, 749 &empty_xattr, 8); 750 } 751 if (obj_type == DMU_OT_ZNODE || 752 (vap->va_type == VBLK || vap->va_type == VCHR)) { 753 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zfsvfs), 754 NULL, &rdev, 8); 755 756 } 757 if (obj_type == DMU_OT_ZNODE) { 758 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs), 759 NULL, &pflags, 8); 760 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), NULL, 761 &acl_ids->z_fuid, 8); 762 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), NULL, 763 &acl_ids->z_fgid, 8); 764 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PAD(zfsvfs), NULL, pad, 765 sizeof (uint64_t) * 4); 766 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ZNODE_ACL(zfsvfs), NULL, 767 &acl_phys, sizeof (zfs_acl_phys_t)); 768 } else if (acl_ids->z_aclp->z_version >= ZFS_ACL_VERSION_FUID) { 769 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_COUNT(zfsvfs), NULL, 770 &acl_ids->z_aclp->z_acl_count, 8); 771 locate.cb_aclp = acl_ids->z_aclp; 772 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_ACES(zfsvfs), 773 zfs_acl_data_locator, &locate, 774 acl_ids->z_aclp->z_acl_bytes); 775 mode = zfs_mode_compute(mode, acl_ids->z_aclp, &pflags, 776 acl_ids->z_fuid, acl_ids->z_fgid); 777 } 778 779 VERIFY0(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx)); 780 781 if (!(flag & IS_ROOT_NODE)) { 782 *zpp = zfs_znode_alloc(zfsvfs, db, 0, obj_type, sa_hdl); 783 ASSERT3P(*zpp, !=, NULL); 784 } else { 785 /* 786 * If we are creating the root node, the "parent" we 787 * passed in is the znode for the root. 788 */ 789 *zpp = dzp; 790 791 (*zpp)->z_sa_hdl = sa_hdl; 792 } 793 794 (*zpp)->z_pflags = pflags; 795 (*zpp)->z_mode = mode; 796 (*zpp)->z_dnodesize = dnodesize; 797 798 if (vap->va_mask & AT_XVATTR) 799 zfs_xvattr_set(*zpp, (xvattr_t *)vap, tx); 800 801 if (obj_type == DMU_OT_ZNODE || 802 acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) { 803 VERIFY0(zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx)); 804 } 805 if (!(flag & IS_ROOT_NODE)) { 806 vnode_t *vp = ZTOV(*zpp); 807 vp->v_vflag |= VV_FORCEINSMQ; 808 int err = insmntque(vp, zfsvfs->z_vfs); 809 vp->v_vflag &= ~VV_FORCEINSMQ; 810 (void) err; 811 KASSERT(err == 0, ("insmntque() failed: error %d", err)); 812 } 813 kmem_free(sa_attrs, sizeof (sa_bulk_attr_t) * ZPL_END); 814 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj); 815 } 816 817 /* 818 * Update in-core attributes. It is assumed the caller will be doing an 819 * sa_bulk_update to push the changes out. 820 */ 821 void 822 zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx) 823 { 824 xoptattr_t *xoap; 825 826 xoap = xva_getxoptattr(xvap); 827 ASSERT3P(xoap, !=, NULL); 828 829 if (zp->z_zfsvfs->z_replay == B_FALSE) { 830 ASSERT_VOP_IN_SEQC(ZTOV(zp)); 831 } 832 833 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { 834 uint64_t times[2]; 835 ZFS_TIME_ENCODE(&xoap->xoa_createtime, times); 836 (void) sa_update(zp->z_sa_hdl, SA_ZPL_CRTIME(zp->z_zfsvfs), 837 ×, sizeof (times), tx); 838 XVA_SET_RTN(xvap, XAT_CREATETIME); 839 } 840 if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 841 ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly, 842 zp->z_pflags, tx); 843 XVA_SET_RTN(xvap, XAT_READONLY); 844 } 845 if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 846 ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden, 847 zp->z_pflags, tx); 848 XVA_SET_RTN(xvap, XAT_HIDDEN); 849 } 850 if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 851 ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system, 852 zp->z_pflags, tx); 853 XVA_SET_RTN(xvap, XAT_SYSTEM); 854 } 855 if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 856 ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive, 857 zp->z_pflags, tx); 858 XVA_SET_RTN(xvap, XAT_ARCHIVE); 859 } 860 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 861 ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable, 862 zp->z_pflags, tx); 863 XVA_SET_RTN(xvap, XAT_IMMUTABLE); 864 } 865 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 866 ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink, 867 zp->z_pflags, tx); 868 XVA_SET_RTN(xvap, XAT_NOUNLINK); 869 } 870 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 871 ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly, 872 zp->z_pflags, tx); 873 XVA_SET_RTN(xvap, XAT_APPENDONLY); 874 } 875 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 876 ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump, 877 zp->z_pflags, tx); 878 XVA_SET_RTN(xvap, XAT_NODUMP); 879 } 880 if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 881 ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque, 882 zp->z_pflags, tx); 883 XVA_SET_RTN(xvap, XAT_OPAQUE); 884 } 885 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 886 ZFS_ATTR_SET(zp, ZFS_AV_QUARANTINED, 887 xoap->xoa_av_quarantined, zp->z_pflags, tx); 888 XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 889 } 890 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 891 ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified, 892 zp->z_pflags, tx); 893 XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 894 } 895 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) { 896 zfs_sa_set_scanstamp(zp, xvap, tx); 897 XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP); 898 } 899 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 900 ZFS_ATTR_SET(zp, ZFS_REPARSE, xoap->xoa_reparse, 901 zp->z_pflags, tx); 902 XVA_SET_RTN(xvap, XAT_REPARSE); 903 } 904 if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { 905 ZFS_ATTR_SET(zp, ZFS_OFFLINE, xoap->xoa_offline, 906 zp->z_pflags, tx); 907 XVA_SET_RTN(xvap, XAT_OFFLINE); 908 } 909 if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { 910 ZFS_ATTR_SET(zp, ZFS_SPARSE, xoap->xoa_sparse, 911 zp->z_pflags, tx); 912 XVA_SET_RTN(xvap, XAT_SPARSE); 913 } 914 } 915 916 int 917 zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp) 918 { 919 dmu_object_info_t doi; 920 dmu_buf_t *db; 921 znode_t *zp; 922 vnode_t *vp; 923 sa_handle_t *hdl; 924 int locked; 925 int err; 926 927 getnewvnode_reserve(); 928 again: 929 *zpp = NULL; 930 ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num); 931 932 err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db); 933 if (err) { 934 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 935 getnewvnode_drop_reserve(); 936 return (err); 937 } 938 939 dmu_object_info_from_db(db, &doi); 940 if (doi.doi_bonus_type != DMU_OT_SA && 941 (doi.doi_bonus_type != DMU_OT_ZNODE || 942 (doi.doi_bonus_type == DMU_OT_ZNODE && 943 doi.doi_bonus_size < sizeof (znode_phys_t)))) { 944 sa_buf_rele(db, NULL); 945 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 946 getnewvnode_drop_reserve(); 947 return (SET_ERROR(EINVAL)); 948 } 949 950 hdl = dmu_buf_get_user(db); 951 if (hdl != NULL) { 952 zp = sa_get_userdata(hdl); 953 954 /* 955 * Since "SA" does immediate eviction we 956 * should never find a sa handle that doesn't 957 * know about the znode. 958 */ 959 ASSERT3P(zp, !=, NULL); 960 ASSERT3U(zp->z_id, ==, obj_num); 961 if (zp->z_unlinked) { 962 err = SET_ERROR(ENOENT); 963 } else { 964 vp = ZTOV(zp); 965 /* 966 * Don't let the vnode disappear after 967 * ZFS_OBJ_HOLD_EXIT. 968 */ 969 VN_HOLD(vp); 970 *zpp = zp; 971 err = 0; 972 } 973 974 sa_buf_rele(db, NULL); 975 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 976 977 if (err) { 978 getnewvnode_drop_reserve(); 979 return (err); 980 } 981 982 locked = VOP_ISLOCKED(vp); 983 VI_LOCK(vp); 984 if (VN_IS_DOOMED(vp) && locked != LK_EXCLUSIVE) { 985 /* 986 * The vnode is doomed and this thread doesn't 987 * hold the exclusive lock on it, so the vnode 988 * must be being reclaimed by another thread. 989 * Otherwise the doomed vnode is being reclaimed 990 * by this thread and zfs_zget is called from 991 * ZIL internals. 992 */ 993 VI_UNLOCK(vp); 994 995 /* 996 * XXX vrele() locks the vnode when the last reference 997 * is dropped. Although in this case the vnode is 998 * doomed / dead and so no inactivation is required, 999 * the vnode lock is still acquired. That could result 1000 * in a LOR with z_teardown_lock if another thread holds 1001 * the vnode's lock and tries to take z_teardown_lock. 1002 * But that is only possible if the other thread peforms 1003 * a ZFS vnode operation on the vnode. That either 1004 * should not happen if the vnode is dead or the thread 1005 * should also have a reference to the vnode and thus 1006 * our reference is not last. 1007 */ 1008 VN_RELE(vp); 1009 goto again; 1010 } 1011 VI_UNLOCK(vp); 1012 getnewvnode_drop_reserve(); 1013 return (err); 1014 } 1015 1016 /* 1017 * Not found create new znode/vnode 1018 * but only if file exists. 1019 * 1020 * There is a small window where zfs_vget() could 1021 * find this object while a file create is still in 1022 * progress. This is checked for in zfs_znode_alloc() 1023 * 1024 * if zfs_znode_alloc() fails it will drop the hold on the 1025 * bonus buffer. 1026 */ 1027 zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size, 1028 doi.doi_bonus_type, NULL); 1029 if (zp == NULL) { 1030 err = SET_ERROR(ENOENT); 1031 } else { 1032 *zpp = zp; 1033 } 1034 if (err == 0) { 1035 vnode_t *vp = ZTOV(zp); 1036 1037 err = insmntque(vp, zfsvfs->z_vfs); 1038 if (err == 0) { 1039 vp->v_hash = obj_num; 1040 VOP_UNLOCK(vp); 1041 } else { 1042 zp->z_vnode = NULL; 1043 zfs_znode_dmu_fini(zp); 1044 zfs_znode_free(zp); 1045 *zpp = NULL; 1046 } 1047 } 1048 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 1049 getnewvnode_drop_reserve(); 1050 return (err); 1051 } 1052 1053 int 1054 zfs_rezget(znode_t *zp) 1055 { 1056 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1057 dmu_object_info_t doi; 1058 dmu_buf_t *db; 1059 vnode_t *vp; 1060 uint64_t obj_num = zp->z_id; 1061 uint64_t mode, size; 1062 sa_bulk_attr_t bulk[8]; 1063 int err; 1064 int count = 0; 1065 uint64_t gen; 1066 1067 /* 1068 * Remove cached pages before reloading the znode, so that they are not 1069 * lingering after we run into any error. Ideally, we should vgone() 1070 * the vnode in case of error, but currently we cannot do that 1071 * because of the LOR between the vnode lock and z_teardown_lock. 1072 * So, instead, we have to "doom" the znode in the illumos style. 1073 * 1074 * Ignore invalid pages during the scan. This is to avoid deadlocks 1075 * between page busying and the teardown lock, as pages are busied prior 1076 * to a VOP_GETPAGES operation, which acquires the teardown read lock. 1077 * Such pages will be invalid and can safely be skipped here. 1078 */ 1079 vp = ZTOV(zp); 1080 #if __FreeBSD_version >= 1400042 1081 vn_pages_remove_valid(vp, 0, 0); 1082 #else 1083 vn_pages_remove(vp, 0, 0); 1084 #endif 1085 1086 ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num); 1087 1088 mutex_enter(&zp->z_acl_lock); 1089 if (zp->z_acl_cached) { 1090 zfs_acl_free(zp->z_acl_cached); 1091 zp->z_acl_cached = NULL; 1092 } 1093 mutex_exit(&zp->z_acl_lock); 1094 1095 rw_enter(&zp->z_xattr_lock, RW_WRITER); 1096 if (zp->z_xattr_cached) { 1097 nvlist_free(zp->z_xattr_cached); 1098 zp->z_xattr_cached = NULL; 1099 } 1100 rw_exit(&zp->z_xattr_lock); 1101 1102 ASSERT3P(zp->z_sa_hdl, ==, NULL); 1103 err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db); 1104 if (err) { 1105 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 1106 return (err); 1107 } 1108 1109 dmu_object_info_from_db(db, &doi); 1110 if (doi.doi_bonus_type != DMU_OT_SA && 1111 (doi.doi_bonus_type != DMU_OT_ZNODE || 1112 (doi.doi_bonus_type == DMU_OT_ZNODE && 1113 doi.doi_bonus_size < sizeof (znode_phys_t)))) { 1114 sa_buf_rele(db, NULL); 1115 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 1116 return (SET_ERROR(EINVAL)); 1117 } 1118 1119 zfs_znode_sa_init(zfsvfs, zp, db, doi.doi_bonus_type, NULL); 1120 size = zp->z_size; 1121 1122 /* reload cached values */ 1123 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL, 1124 &gen, sizeof (gen)); 1125 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, 1126 &zp->z_size, sizeof (zp->z_size)); 1127 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, 1128 &zp->z_links, sizeof (zp->z_links)); 1129 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 1130 &zp->z_pflags, sizeof (zp->z_pflags)); 1131 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, 1132 &zp->z_atime, sizeof (zp->z_atime)); 1133 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, 1134 &zp->z_uid, sizeof (zp->z_uid)); 1135 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL, 1136 &zp->z_gid, sizeof (zp->z_gid)); 1137 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, 1138 &mode, sizeof (mode)); 1139 1140 if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) { 1141 zfs_znode_dmu_fini(zp); 1142 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 1143 return (SET_ERROR(EIO)); 1144 } 1145 1146 zp->z_mode = mode; 1147 1148 if (gen != zp->z_gen) { 1149 zfs_znode_dmu_fini(zp); 1150 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 1151 return (SET_ERROR(EIO)); 1152 } 1153 1154 /* 1155 * It is highly improbable but still quite possible that two 1156 * objects in different datasets are created with the same 1157 * object numbers and in transaction groups with the same 1158 * numbers. znodes corresponding to those objects would 1159 * have the same z_id and z_gen, but their other attributes 1160 * may be different. 1161 * zfs recv -F may replace one of such objects with the other. 1162 * As a result file properties recorded in the replaced 1163 * object's vnode may no longer match the received object's 1164 * properties. At present the only cached property is the 1165 * files type recorded in v_type. 1166 * So, handle this case by leaving the old vnode and znode 1167 * disassociated from the actual object. A new vnode and a 1168 * znode will be created if the object is accessed 1169 * (e.g. via a look-up). The old vnode and znode will be 1170 * recycled when the last vnode reference is dropped. 1171 */ 1172 if (vp->v_type != IFTOVT((mode_t)zp->z_mode)) { 1173 zfs_znode_dmu_fini(zp); 1174 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 1175 return (SET_ERROR(EIO)); 1176 } 1177 1178 /* 1179 * If the file has zero links, then it has been unlinked on the send 1180 * side and it must be in the received unlinked set. 1181 * We call zfs_znode_dmu_fini() now to prevent any accesses to the 1182 * stale data and to prevent automatically removal of the file in 1183 * zfs_zinactive(). The file will be removed either when it is removed 1184 * on the send side and the next incremental stream is received or 1185 * when the unlinked set gets processed. 1186 */ 1187 zp->z_unlinked = (zp->z_links == 0); 1188 if (zp->z_unlinked) { 1189 zfs_znode_dmu_fini(zp); 1190 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 1191 return (0); 1192 } 1193 1194 zp->z_blksz = doi.doi_data_block_size; 1195 if (zp->z_size != size) 1196 vnode_pager_setsize(vp, zp->z_size); 1197 1198 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 1199 1200 return (0); 1201 } 1202 1203 void 1204 zfs_znode_delete(znode_t *zp, dmu_tx_t *tx) 1205 { 1206 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1207 objset_t *os = zfsvfs->z_os; 1208 uint64_t obj = zp->z_id; 1209 uint64_t acl_obj = zfs_external_acl(zp); 1210 1211 ZFS_OBJ_HOLD_ENTER(zfsvfs, obj); 1212 if (acl_obj) { 1213 VERIFY(!zp->z_is_sa); 1214 VERIFY0(dmu_object_free(os, acl_obj, tx)); 1215 } 1216 VERIFY0(dmu_object_free(os, obj, tx)); 1217 zfs_znode_dmu_fini(zp); 1218 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj); 1219 } 1220 1221 void 1222 zfs_zinactive(znode_t *zp) 1223 { 1224 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1225 uint64_t z_id = zp->z_id; 1226 1227 ASSERT3P(zp->z_sa_hdl, !=, NULL); 1228 1229 /* 1230 * Don't allow a zfs_zget() while were trying to release this znode 1231 */ 1232 ZFS_OBJ_HOLD_ENTER(zfsvfs, z_id); 1233 1234 /* 1235 * If this was the last reference to a file with no links, remove 1236 * the file from the file system unless the file system is mounted 1237 * read-only. That can happen, for example, if the file system was 1238 * originally read-write, the file was opened, then unlinked and 1239 * the file system was made read-only before the file was finally 1240 * closed. The file will remain in the unlinked set. 1241 */ 1242 if (zp->z_unlinked) { 1243 ASSERT(!zfsvfs->z_issnap); 1244 if ((zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) == 0) { 1245 ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); 1246 zfs_rmnode(zp); 1247 return; 1248 } 1249 } 1250 1251 zfs_znode_dmu_fini(zp); 1252 ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); 1253 zfs_znode_free(zp); 1254 } 1255 1256 void 1257 zfs_znode_free(znode_t *zp) 1258 { 1259 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1260 char *symlink; 1261 1262 ASSERT3P(zp->z_sa_hdl, ==, NULL); 1263 zp->z_vnode = NULL; 1264 mutex_enter(&zfsvfs->z_znodes_lock); 1265 POINTER_INVALIDATE(&zp->z_zfsvfs); 1266 list_remove(&zfsvfs->z_all_znodes, zp); 1267 mutex_exit(&zfsvfs->z_znodes_lock); 1268 1269 symlink = atomic_load_ptr(&zp->z_cached_symlink); 1270 if (symlink != NULL) { 1271 atomic_store_rel_ptr((uintptr_t *)&zp->z_cached_symlink, 1272 (uintptr_t)NULL); 1273 cache_symlink_free(symlink, strlen(symlink) + 1); 1274 } 1275 1276 if (zp->z_acl_cached) { 1277 zfs_acl_free(zp->z_acl_cached); 1278 zp->z_acl_cached = NULL; 1279 } 1280 1281 zfs_znode_free_kmem(zp); 1282 } 1283 1284 void 1285 zfs_tstamp_update_setup_ext(znode_t *zp, uint_t flag, uint64_t mtime[2], 1286 uint64_t ctime[2], boolean_t have_tx) 1287 { 1288 timestruc_t now; 1289 1290 vfs_timestamp(&now); 1291 1292 if (have_tx) { /* will sa_bulk_update happen really soon? */ 1293 zp->z_atime_dirty = 0; 1294 zp->z_seq++; 1295 } else { 1296 zp->z_atime_dirty = 1; 1297 } 1298 1299 if (flag & AT_ATIME) { 1300 ZFS_TIME_ENCODE(&now, zp->z_atime); 1301 } 1302 1303 if (flag & AT_MTIME) { 1304 ZFS_TIME_ENCODE(&now, mtime); 1305 if (zp->z_zfsvfs->z_use_fuids) { 1306 zp->z_pflags |= (ZFS_ARCHIVE | 1307 ZFS_AV_MODIFIED); 1308 } 1309 } 1310 1311 if (flag & AT_CTIME) { 1312 ZFS_TIME_ENCODE(&now, ctime); 1313 if (zp->z_zfsvfs->z_use_fuids) 1314 zp->z_pflags |= ZFS_ARCHIVE; 1315 } 1316 } 1317 1318 1319 void 1320 zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2], 1321 uint64_t ctime[2]) 1322 { 1323 zfs_tstamp_update_setup_ext(zp, flag, mtime, ctime, B_TRUE); 1324 } 1325 /* 1326 * Grow the block size for a file. 1327 * 1328 * IN: zp - znode of file to free data in. 1329 * size - requested block size 1330 * tx - open transaction. 1331 * 1332 * NOTE: this function assumes that the znode is write locked. 1333 */ 1334 void 1335 zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx) 1336 { 1337 int error; 1338 u_longlong_t dummy; 1339 1340 if (size <= zp->z_blksz) 1341 return; 1342 /* 1343 * If the file size is already greater than the current blocksize, 1344 * we will not grow. If there is more than one block in a file, 1345 * the blocksize cannot change. 1346 */ 1347 if (zp->z_blksz && zp->z_size > zp->z_blksz) 1348 return; 1349 1350 error = dmu_object_set_blocksize(zp->z_zfsvfs->z_os, zp->z_id, 1351 size, 0, tx); 1352 1353 if (error == ENOTSUP) 1354 return; 1355 ASSERT0(error); 1356 1357 /* What blocksize did we actually get? */ 1358 dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &zp->z_blksz, &dummy); 1359 } 1360 1361 /* 1362 * Increase the file length 1363 * 1364 * IN: zp - znode of file to free data in. 1365 * end - new end-of-file 1366 * 1367 * RETURN: 0 on success, error code on failure 1368 */ 1369 static int 1370 zfs_extend(znode_t *zp, uint64_t end) 1371 { 1372 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1373 dmu_tx_t *tx; 1374 zfs_locked_range_t *lr; 1375 uint64_t newblksz; 1376 int error; 1377 1378 /* 1379 * We will change zp_size, lock the whole file. 1380 */ 1381 lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER); 1382 1383 /* 1384 * Nothing to do if file already at desired length. 1385 */ 1386 if (end <= zp->z_size) { 1387 zfs_rangelock_exit(lr); 1388 return (0); 1389 } 1390 tx = dmu_tx_create(zfsvfs->z_os); 1391 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1392 zfs_sa_upgrade_txholds(tx, zp); 1393 if (end > zp->z_blksz && 1394 (!ISP2(zp->z_blksz) || zp->z_blksz < zfsvfs->z_max_blksz)) { 1395 /* 1396 * We are growing the file past the current block size. 1397 */ 1398 if (zp->z_blksz > zp->z_zfsvfs->z_max_blksz) { 1399 /* 1400 * File's blocksize is already larger than the 1401 * "recordsize" property. Only let it grow to 1402 * the next power of 2. 1403 */ 1404 ASSERT(!ISP2(zp->z_blksz)); 1405 newblksz = MIN(end, 1 << highbit64(zp->z_blksz)); 1406 } else { 1407 newblksz = MIN(end, zp->z_zfsvfs->z_max_blksz); 1408 } 1409 dmu_tx_hold_write(tx, zp->z_id, 0, newblksz); 1410 } else { 1411 newblksz = 0; 1412 } 1413 1414 error = dmu_tx_assign(tx, TXG_WAIT); 1415 if (error) { 1416 dmu_tx_abort(tx); 1417 zfs_rangelock_exit(lr); 1418 return (error); 1419 } 1420 1421 if (newblksz) 1422 zfs_grow_blocksize(zp, newblksz, tx); 1423 1424 zp->z_size = end; 1425 1426 VERIFY0(sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zp->z_zfsvfs), 1427 &zp->z_size, sizeof (zp->z_size), tx)); 1428 1429 vnode_pager_setsize(ZTOV(zp), end); 1430 1431 zfs_rangelock_exit(lr); 1432 1433 dmu_tx_commit(tx); 1434 1435 return (0); 1436 } 1437 1438 /* 1439 * Free space in a file. 1440 * 1441 * IN: zp - znode of file to free data in. 1442 * off - start of section to free. 1443 * len - length of section to free. 1444 * 1445 * RETURN: 0 on success, error code on failure 1446 */ 1447 static int 1448 zfs_free_range(znode_t *zp, uint64_t off, uint64_t len) 1449 { 1450 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1451 zfs_locked_range_t *lr; 1452 int error; 1453 1454 /* 1455 * Lock the range being freed. 1456 */ 1457 lr = zfs_rangelock_enter(&zp->z_rangelock, off, len, RL_WRITER); 1458 1459 /* 1460 * Nothing to do if file already at desired length. 1461 */ 1462 if (off >= zp->z_size) { 1463 zfs_rangelock_exit(lr); 1464 return (0); 1465 } 1466 1467 if (off + len > zp->z_size) 1468 len = zp->z_size - off; 1469 1470 error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, off, len); 1471 1472 if (error == 0) { 1473 #if __FreeBSD_version >= 1400032 1474 vnode_pager_purge_range(ZTOV(zp), off, off + len); 1475 #else 1476 /* 1477 * Before __FreeBSD_version 1400032 we cannot free block in the 1478 * middle of a file, but only at the end of a file, so this code 1479 * path should never happen. 1480 */ 1481 vnode_pager_setsize(ZTOV(zp), off); 1482 #endif 1483 } 1484 1485 zfs_rangelock_exit(lr); 1486 1487 return (error); 1488 } 1489 1490 /* 1491 * Truncate a file 1492 * 1493 * IN: zp - znode of file to free data in. 1494 * end - new end-of-file. 1495 * 1496 * RETURN: 0 on success, error code on failure 1497 */ 1498 static int 1499 zfs_trunc(znode_t *zp, uint64_t end) 1500 { 1501 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1502 vnode_t *vp = ZTOV(zp); 1503 dmu_tx_t *tx; 1504 zfs_locked_range_t *lr; 1505 int error; 1506 sa_bulk_attr_t bulk[2]; 1507 int count = 0; 1508 1509 /* 1510 * We will change zp_size, lock the whole file. 1511 */ 1512 lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER); 1513 1514 /* 1515 * Nothing to do if file already at desired length. 1516 */ 1517 if (end >= zp->z_size) { 1518 zfs_rangelock_exit(lr); 1519 return (0); 1520 } 1521 1522 error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, end, 1523 DMU_OBJECT_END); 1524 if (error) { 1525 zfs_rangelock_exit(lr); 1526 return (error); 1527 } 1528 tx = dmu_tx_create(zfsvfs->z_os); 1529 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1530 zfs_sa_upgrade_txholds(tx, zp); 1531 dmu_tx_mark_netfree(tx); 1532 error = dmu_tx_assign(tx, TXG_WAIT); 1533 if (error) { 1534 dmu_tx_abort(tx); 1535 zfs_rangelock_exit(lr); 1536 return (error); 1537 } 1538 1539 zp->z_size = end; 1540 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), 1541 NULL, &zp->z_size, sizeof (zp->z_size)); 1542 1543 if (end == 0) { 1544 zp->z_pflags &= ~ZFS_SPARSE; 1545 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), 1546 NULL, &zp->z_pflags, 8); 1547 } 1548 VERIFY0(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx)); 1549 1550 dmu_tx_commit(tx); 1551 1552 /* 1553 * Clear any mapped pages in the truncated region. This has to 1554 * happen outside of the transaction to avoid the possibility of 1555 * a deadlock with someone trying to push a page that we are 1556 * about to invalidate. 1557 */ 1558 vnode_pager_setsize(vp, end); 1559 1560 zfs_rangelock_exit(lr); 1561 1562 return (0); 1563 } 1564 1565 /* 1566 * Free space in a file 1567 * 1568 * IN: zp - znode of file to free data in. 1569 * off - start of range 1570 * len - end of range (0 => EOF) 1571 * flag - current file open mode flags. 1572 * log - TRUE if this action should be logged 1573 * 1574 * RETURN: 0 on success, error code on failure 1575 */ 1576 int 1577 zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log) 1578 { 1579 dmu_tx_t *tx; 1580 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1581 zilog_t *zilog = zfsvfs->z_log; 1582 uint64_t mode; 1583 uint64_t mtime[2], ctime[2]; 1584 sa_bulk_attr_t bulk[3]; 1585 int count = 0; 1586 int error; 1587 1588 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), &mode, 1589 sizeof (mode))) != 0) 1590 return (error); 1591 1592 if (off > zp->z_size) { 1593 error = zfs_extend(zp, off+len); 1594 if (error == 0 && log) 1595 goto log; 1596 else 1597 return (error); 1598 } 1599 1600 if (len == 0) { 1601 error = zfs_trunc(zp, off); 1602 } else { 1603 if ((error = zfs_free_range(zp, off, len)) == 0 && 1604 off + len > zp->z_size) 1605 error = zfs_extend(zp, off+len); 1606 } 1607 if (error || !log) 1608 return (error); 1609 log: 1610 tx = dmu_tx_create(zfsvfs->z_os); 1611 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1612 zfs_sa_upgrade_txholds(tx, zp); 1613 error = dmu_tx_assign(tx, TXG_WAIT); 1614 if (error) { 1615 dmu_tx_abort(tx); 1616 return (error); 1617 } 1618 1619 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, mtime, 16); 1620 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, ctime, 16); 1621 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), 1622 NULL, &zp->z_pflags, 8); 1623 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime); 1624 error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 1625 ASSERT0(error); 1626 1627 zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len); 1628 1629 dmu_tx_commit(tx); 1630 return (0); 1631 } 1632 1633 void 1634 zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) 1635 { 1636 uint64_t moid, obj, sa_obj, version; 1637 uint64_t sense = ZFS_CASE_SENSITIVE; 1638 uint64_t norm = 0; 1639 nvpair_t *elem; 1640 int error; 1641 int i; 1642 znode_t *rootzp = NULL; 1643 zfsvfs_t *zfsvfs; 1644 vattr_t vattr; 1645 znode_t *zp; 1646 zfs_acl_ids_t acl_ids; 1647 1648 /* 1649 * First attempt to create master node. 1650 */ 1651 /* 1652 * In an empty objset, there are no blocks to read and thus 1653 * there can be no i/o errors (which we assert below). 1654 */ 1655 moid = MASTER_NODE_OBJ; 1656 error = zap_create_claim(os, moid, DMU_OT_MASTER_NODE, 1657 DMU_OT_NONE, 0, tx); 1658 ASSERT0(error); 1659 1660 /* 1661 * Set starting attributes. 1662 */ 1663 version = zfs_zpl_version_map(spa_version(dmu_objset_spa(os))); 1664 elem = NULL; 1665 while ((elem = nvlist_next_nvpair(zplprops, elem)) != NULL) { 1666 /* For the moment we expect all zpl props to be uint64_ts */ 1667 uint64_t val; 1668 const char *name; 1669 1670 ASSERT3S(nvpair_type(elem), ==, DATA_TYPE_UINT64); 1671 val = fnvpair_value_uint64(elem); 1672 name = nvpair_name(elem); 1673 if (strcmp(name, zfs_prop_to_name(ZFS_PROP_VERSION)) == 0) { 1674 if (val < version) 1675 version = val; 1676 } else { 1677 error = zap_update(os, moid, name, 8, 1, &val, tx); 1678 } 1679 ASSERT0(error); 1680 if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0) 1681 norm = val; 1682 else if (strcmp(name, zfs_prop_to_name(ZFS_PROP_CASE)) == 0) 1683 sense = val; 1684 } 1685 ASSERT3U(version, !=, 0); 1686 error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx); 1687 ASSERT0(error); 1688 1689 /* 1690 * Create zap object used for SA attribute registration 1691 */ 1692 1693 if (version >= ZPL_VERSION_SA) { 1694 sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE, 1695 DMU_OT_NONE, 0, tx); 1696 error = zap_add(os, moid, ZFS_SA_ATTRS, 8, 1, &sa_obj, tx); 1697 ASSERT0(error); 1698 } else { 1699 sa_obj = 0; 1700 } 1701 /* 1702 * Create a delete queue. 1703 */ 1704 obj = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx); 1705 1706 error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &obj, tx); 1707 ASSERT0(error); 1708 1709 /* 1710 * Create root znode. Create minimal znode/vnode/zfsvfs 1711 * to allow zfs_mknode to work. 1712 */ 1713 VATTR_NULL(&vattr); 1714 vattr.va_mask = AT_MODE|AT_UID|AT_GID; 1715 vattr.va_type = VDIR; 1716 vattr.va_mode = S_IFDIR|0755; 1717 vattr.va_uid = crgetuid(cr); 1718 vattr.va_gid = crgetgid(cr); 1719 1720 zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 1721 1722 rootzp = zfs_znode_alloc_kmem(KM_SLEEP); 1723 ASSERT(!POINTER_IS_VALID(rootzp->z_zfsvfs)); 1724 rootzp->z_unlinked = 0; 1725 rootzp->z_atime_dirty = 0; 1726 rootzp->z_is_sa = USE_SA(version, os); 1727 1728 zfsvfs->z_os = os; 1729 zfsvfs->z_parent = zfsvfs; 1730 zfsvfs->z_version = version; 1731 zfsvfs->z_use_fuids = USE_FUIDS(version, os); 1732 zfsvfs->z_use_sa = USE_SA(version, os); 1733 zfsvfs->z_norm = norm; 1734 1735 error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END, 1736 &zfsvfs->z_attr_table); 1737 1738 ASSERT0(error); 1739 1740 /* 1741 * Fold case on file systems that are always or sometimes case 1742 * insensitive. 1743 */ 1744 if (sense == ZFS_CASE_INSENSITIVE || sense == ZFS_CASE_MIXED) 1745 zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER; 1746 1747 mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 1748 list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 1749 offsetof(znode_t, z_link_node)); 1750 1751 for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 1752 mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); 1753 1754 rootzp->z_zfsvfs = zfsvfs; 1755 VERIFY0(zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr, 1756 cr, NULL, &acl_ids, NULL)); 1757 zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids); 1758 ASSERT3P(zp, ==, rootzp); 1759 error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx); 1760 ASSERT0(error); 1761 zfs_acl_ids_free(&acl_ids); 1762 POINTER_INVALIDATE(&rootzp->z_zfsvfs); 1763 1764 sa_handle_destroy(rootzp->z_sa_hdl); 1765 zfs_znode_free_kmem(rootzp); 1766 1767 /* 1768 * Create shares directory 1769 */ 1770 1771 error = zfs_create_share_dir(zfsvfs, tx); 1772 1773 ASSERT0(error); 1774 1775 for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 1776 mutex_destroy(&zfsvfs->z_hold_mtx[i]); 1777 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 1778 } 1779 1780 void 1781 zfs_znode_update_vfs(znode_t *zp) 1782 { 1783 vm_object_t object; 1784 1785 if ((object = ZTOV(zp)->v_object) == NULL || 1786 zp->z_size == object->un_pager.vnp.vnp_size) 1787 return; 1788 1789 vnode_pager_setsize(ZTOV(zp), zp->z_size); 1790 } 1791 1792 int 1793 zfs_znode_parent_and_name(znode_t *zp, znode_t **dzpp, char *buf, 1794 uint64_t buflen) 1795 { 1796 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1797 uint64_t parent; 1798 int is_xattrdir; 1799 int err; 1800 1801 /* Extended attributes should not be visible as regular files. */ 1802 if ((zp->z_pflags & ZFS_XATTR) != 0) 1803 return (SET_ERROR(EINVAL)); 1804 1805 err = zfs_obj_to_pobj(zfsvfs->z_os, zp->z_sa_hdl, zfsvfs->z_attr_table, 1806 &parent, &is_xattrdir); 1807 if (err != 0) 1808 return (err); 1809 ASSERT0(is_xattrdir); 1810 1811 /* No name as this is a root object. */ 1812 if (parent == zp->z_id) 1813 return (SET_ERROR(EINVAL)); 1814 1815 err = zap_value_search(zfsvfs->z_os, parent, zp->z_id, 1816 ZFS_DIRENT_OBJ(-1ULL), buf, buflen); 1817 if (err != 0) 1818 return (err); 1819 err = zfs_zget(zfsvfs, parent, dzpp); 1820 return (err); 1821 } 1822 1823 int 1824 zfs_rlimit_fsize(off_t fsize) 1825 { 1826 struct thread *td = curthread; 1827 off_t lim; 1828 1829 if (td == NULL) 1830 return (0); 1831 1832 lim = lim_cur(td, RLIMIT_FSIZE); 1833 if (__predict_true((uoff_t)fsize <= lim)) 1834 return (0); 1835 1836 /* 1837 * The limit is reached. 1838 */ 1839 PROC_LOCK(td->td_proc); 1840 kern_psignal(td->td_proc, SIGXFSZ); 1841 PROC_UNLOCK(td->td_proc); 1842 1843 return (EFBIG); 1844 } 1845