1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 24 * Portions Copyright 2011 iXsystems, Inc 25 * Copyright (c) 2013, 2016 by Delphix. All rights reserved. 26 * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. 27 * Copyright (c) 2014 Integros [integros.com] 28 */ 29 30 #include <sys/zfs_context.h> 31 #include <sys/types.h> 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/sysmacros.h> 35 #include <sys/dmu.h> 36 #include <sys/dmu_impl.h> 37 #include <sys/dmu_objset.h> 38 #include <sys/dbuf.h> 39 #include <sys/dnode.h> 40 #include <sys/zap.h> 41 #include <sys/sa.h> 42 #include <sys/sunddi.h> 43 #include <sys/sa_impl.h> 44 #include <sys/dnode.h> 45 #include <sys/errno.h> 46 #include <sys/zfs_context.h> 47 48 /* 49 * ZFS System attributes: 50 * 51 * A generic mechanism to allow for arbitrary attributes 52 * to be stored in a dnode. The data will be stored in the bonus buffer of 53 * the dnode and if necessary a special "spill" block will be used to handle 54 * overflow situations. The spill block will be sized to fit the data 55 * from 512 - 128K. When a spill block is used the BP (blkptr_t) for the 56 * spill block is stored at the end of the current bonus buffer. Any 57 * attributes that would be in the way of the blkptr_t will be relocated 58 * into the spill block. 59 * 60 * Attribute registration: 61 * 62 * Stored persistently on a per dataset basis 63 * a mapping between attribute "string" names and their actual attribute 64 * numeric values, length, and byteswap function. The names are only used 65 * during registration. All attributes are known by their unique attribute 66 * id value. If an attribute can have a variable size then the value 67 * 0 will be used to indicate this. 68 * 69 * Attribute Layout: 70 * 71 * Attribute layouts are a way to compactly store multiple attributes, but 72 * without taking the overhead associated with managing each attribute 73 * individually. Since you will typically have the same set of attributes 74 * stored in the same order a single table will be used to represent that 75 * layout. The ZPL for example will usually have only about 10 different 76 * layouts (regular files, device files, symlinks, 77 * regular files + scanstamp, files/dir with extended attributes, and then 78 * you have the possibility of all of those minus ACL, because it would 79 * be kicked out into the spill block) 80 * 81 * Layouts are simply an array of the attributes and their 82 * ordering i.e. [0, 1, 4, 5, 2] 83 * 84 * Each distinct layout is given a unique layout number and that is whats 85 * stored in the header at the beginning of the SA data buffer. 86 * 87 * A layout only covers a single dbuf (bonus or spill). If a set of 88 * attributes is split up between the bonus buffer and a spill buffer then 89 * two different layouts will be used. This allows us to byteswap the 90 * spill without looking at the bonus buffer and keeps the on disk format of 91 * the bonus and spill buffer the same. 92 * 93 * Adding a single attribute will cause the entire set of attributes to 94 * be rewritten and could result in a new layout number being constructed 95 * as part of the rewrite if no such layout exists for the new set of 96 * attribues. The new attribute will be appended to the end of the already 97 * existing attributes. 98 * 99 * Both the attribute registration and attribute layout information are 100 * stored in normal ZAP attributes. Their should be a small number of 101 * known layouts and the set of attributes is assumed to typically be quite 102 * small. 103 * 104 * The registered attributes and layout "table" information is maintained 105 * in core and a special "sa_os_t" is attached to the objset_t. 106 * 107 * A special interface is provided to allow for quickly applying 108 * a large set of attributes at once. sa_replace_all_by_template() is 109 * used to set an array of attributes. This is used by the ZPL when 110 * creating a brand new file. The template that is passed into the function 111 * specifies the attribute, size for variable length attributes, location of 112 * data and special "data locator" function if the data isn't in a contiguous 113 * location. 114 * 115 * Byteswap implications: 116 * 117 * Since the SA attributes are not entirely self describing we can't do 118 * the normal byteswap processing. The special ZAP layout attribute and 119 * attribute registration attributes define the byteswap function and the 120 * size of the attributes, unless it is variable sized. 121 * The normal ZFS byteswapping infrastructure assumes you don't need 122 * to read any objects in order to do the necessary byteswapping. Whereas 123 * SA attributes can only be properly byteswapped if the dataset is opened 124 * and the layout/attribute ZAP attributes are available. Because of this 125 * the SA attributes will be byteswapped when they are first accessed by 126 * the SA code that will read the SA data. 127 */ 128 129 typedef void (sa_iterfunc_t)(void *hdr, void *addr, sa_attr_type_t, 130 uint16_t length, int length_idx, boolean_t, void *userp); 131 132 static int sa_build_index(sa_handle_t *hdl, sa_buf_type_t buftype); 133 static void sa_idx_tab_hold(objset_t *os, sa_idx_tab_t *idx_tab); 134 static void *sa_find_idx_tab(objset_t *os, dmu_object_type_t bonustype, 135 void *data); 136 static void sa_idx_tab_rele(objset_t *os, void *arg); 137 static void sa_copy_data(sa_data_locator_t *func, void *start, void *target, 138 int buflen); 139 static int sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr, 140 sa_data_op_t action, sa_data_locator_t *locator, void *datastart, 141 uint16_t buflen, dmu_tx_t *tx); 142 143 arc_byteswap_func_t *sa_bswap_table[] = { 144 byteswap_uint64_array, 145 byteswap_uint32_array, 146 byteswap_uint16_array, 147 byteswap_uint8_array, 148 zfs_acl_byteswap, 149 }; 150 151 #define SA_COPY_DATA(f, s, t, l) \ 152 { \ 153 if (f == NULL) { \ 154 if (l == 8) { \ 155 *(uint64_t *)t = *(uint64_t *)s; \ 156 } else if (l == 16) { \ 157 *(uint64_t *)t = *(uint64_t *)s; \ 158 *(uint64_t *)((uintptr_t)t + 8) = \ 159 *(uint64_t *)((uintptr_t)s + 8); \ 160 } else { \ 161 bcopy(s, t, l); \ 162 } \ 163 } else \ 164 sa_copy_data(f, s, t, l); \ 165 } 166 167 /* 168 * This table is fixed and cannot be changed. Its purpose is to 169 * allow the SA code to work with both old/new ZPL file systems. 170 * It contains the list of legacy attributes. These attributes aren't 171 * stored in the "attribute" registry zap objects, since older ZPL file systems 172 * won't have the registry. Only objsets of type ZFS_TYPE_FILESYSTEM will 173 * use this static table. 174 */ 175 sa_attr_reg_t sa_legacy_attrs[] = { 176 {"ZPL_ATIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 0}, 177 {"ZPL_MTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 1}, 178 {"ZPL_CTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 2}, 179 {"ZPL_CRTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 3}, 180 {"ZPL_GEN", sizeof (uint64_t), SA_UINT64_ARRAY, 4}, 181 {"ZPL_MODE", sizeof (uint64_t), SA_UINT64_ARRAY, 5}, 182 {"ZPL_SIZE", sizeof (uint64_t), SA_UINT64_ARRAY, 6}, 183 {"ZPL_PARENT", sizeof (uint64_t), SA_UINT64_ARRAY, 7}, 184 {"ZPL_LINKS", sizeof (uint64_t), SA_UINT64_ARRAY, 8}, 185 {"ZPL_XATTR", sizeof (uint64_t), SA_UINT64_ARRAY, 9}, 186 {"ZPL_RDEV", sizeof (uint64_t), SA_UINT64_ARRAY, 10}, 187 {"ZPL_FLAGS", sizeof (uint64_t), SA_UINT64_ARRAY, 11}, 188 {"ZPL_UID", sizeof (uint64_t), SA_UINT64_ARRAY, 12}, 189 {"ZPL_GID", sizeof (uint64_t), SA_UINT64_ARRAY, 13}, 190 {"ZPL_PAD", sizeof (uint64_t) * 4, SA_UINT64_ARRAY, 14}, 191 {"ZPL_ZNODE_ACL", 88, SA_UINT8_ARRAY, 15}, 192 }; 193 194 /* 195 * This is only used for objects of type DMU_OT_ZNODE 196 */ 197 sa_attr_type_t sa_legacy_zpl_layout[] = { 198 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 199 }; 200 201 /* 202 * Special dummy layout used for buffers with no attributes. 203 */ 204 sa_attr_type_t sa_dummy_zpl_layout[] = { 0 }; 205 206 static int sa_legacy_attr_count = 16; 207 static kmem_cache_t *sa_cache = NULL; 208 209 /*ARGSUSED*/ 210 static int 211 sa_cache_constructor(void *buf, void *unused, int kmflag) 212 { 213 sa_handle_t *hdl = buf; 214 215 #ifdef __NetBSD__ 216 hdl = unused; 217 #endif 218 mutex_init(&hdl->sa_lock, NULL, MUTEX_DEFAULT, NULL); 219 return (0); 220 } 221 222 /*ARGSUSED*/ 223 static void 224 sa_cache_destructor(void *buf, void *unused) 225 { 226 sa_handle_t *hdl = buf; 227 228 #ifdef __NetBSD__ 229 hdl = unused; 230 #endif 231 mutex_destroy(&hdl->sa_lock); 232 } 233 234 void 235 sa_cache_init(void) 236 { 237 sa_cache = kmem_cache_create("sa_cache", 238 sizeof (sa_handle_t), 0, sa_cache_constructor, 239 sa_cache_destructor, NULL, NULL, NULL, 0); 240 } 241 242 void 243 sa_cache_fini(void) 244 { 245 if (sa_cache) 246 kmem_cache_destroy(sa_cache); 247 } 248 249 static int 250 layout_num_compare(const void *arg1, const void *arg2) 251 { 252 const sa_lot_t *node1 = arg1; 253 const sa_lot_t *node2 = arg2; 254 255 if (node1->lot_num > node2->lot_num) 256 return (1); 257 else if (node1->lot_num < node2->lot_num) 258 return (-1); 259 return (0); 260 } 261 262 static int 263 layout_hash_compare(const void *arg1, const void *arg2) 264 { 265 const sa_lot_t *node1 = arg1; 266 const sa_lot_t *node2 = arg2; 267 268 if (node1->lot_hash > node2->lot_hash) 269 return (1); 270 if (node1->lot_hash < node2->lot_hash) 271 return (-1); 272 if (node1->lot_instance > node2->lot_instance) 273 return (1); 274 if (node1->lot_instance < node2->lot_instance) 275 return (-1); 276 return (0); 277 } 278 279 boolean_t 280 sa_layout_equal(sa_lot_t *tbf, sa_attr_type_t *attrs, int count) 281 { 282 int i; 283 284 if (count != tbf->lot_attr_count) 285 return (1); 286 287 for (i = 0; i != count; i++) { 288 if (attrs[i] != tbf->lot_attrs[i]) 289 return (1); 290 } 291 return (0); 292 } 293 294 #define SA_ATTR_HASH(attr) (zfs_crc64_table[(-1ULL ^ attr) & 0xFF]) 295 296 static uint64_t 297 sa_layout_info_hash(sa_attr_type_t *attrs, int attr_count) 298 { 299 int i; 300 uint64_t crc = -1ULL; 301 302 for (i = 0; i != attr_count; i++) 303 crc ^= SA_ATTR_HASH(attrs[i]); 304 305 return (crc); 306 } 307 308 static int 309 sa_get_spill(sa_handle_t *hdl) 310 { 311 int rc; 312 if (hdl->sa_spill == NULL) { 313 if ((rc = dmu_spill_hold_existing(hdl->sa_bonus, NULL, 314 &hdl->sa_spill)) == 0) 315 VERIFY(0 == sa_build_index(hdl, SA_SPILL)); 316 } else { 317 rc = 0; 318 } 319 320 return (rc); 321 } 322 323 /* 324 * Main attribute lookup/update function 325 * returns 0 for success or non zero for failures 326 * 327 * Operates on bulk array, first failure will abort further processing 328 */ 329 int 330 sa_attr_op(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count, 331 sa_data_op_t data_op, dmu_tx_t *tx) 332 { 333 sa_os_t *sa = hdl->sa_os->os_sa; 334 int i; 335 int error = 0; 336 sa_buf_type_t buftypes; 337 338 buftypes = 0; 339 340 ASSERT(count > 0); 341 for (i = 0; i != count; i++) { 342 ASSERT(bulk[i].sa_attr <= hdl->sa_os->os_sa->sa_num_attrs); 343 344 bulk[i].sa_addr = NULL; 345 /* First check the bonus buffer */ 346 347 if (hdl->sa_bonus_tab && TOC_ATTR_PRESENT( 348 hdl->sa_bonus_tab->sa_idx_tab[bulk[i].sa_attr])) { 349 SA_ATTR_INFO(sa, hdl->sa_bonus_tab, 350 SA_GET_HDR(hdl, SA_BONUS), 351 bulk[i].sa_attr, bulk[i], SA_BONUS, hdl); 352 if (tx && !(buftypes & SA_BONUS)) { 353 dmu_buf_will_dirty(hdl->sa_bonus, tx); 354 buftypes |= SA_BONUS; 355 } 356 } 357 if (bulk[i].sa_addr == NULL && 358 ((error = sa_get_spill(hdl)) == 0)) { 359 if (TOC_ATTR_PRESENT( 360 hdl->sa_spill_tab->sa_idx_tab[bulk[i].sa_attr])) { 361 SA_ATTR_INFO(sa, hdl->sa_spill_tab, 362 SA_GET_HDR(hdl, SA_SPILL), 363 bulk[i].sa_attr, bulk[i], SA_SPILL, hdl); 364 if (tx && !(buftypes & SA_SPILL) && 365 bulk[i].sa_size == bulk[i].sa_length) { 366 dmu_buf_will_dirty(hdl->sa_spill, tx); 367 buftypes |= SA_SPILL; 368 } 369 } 370 } 371 if (error && error != ENOENT) { 372 return ((error == ECKSUM) ? EIO : error); 373 } 374 375 switch (data_op) { 376 case SA_LOOKUP: 377 if (bulk[i].sa_addr == NULL) 378 return (SET_ERROR(ENOENT)); 379 if (bulk[i].sa_data) { 380 SA_COPY_DATA(bulk[i].sa_data_func, 381 bulk[i].sa_addr, bulk[i].sa_data, 382 bulk[i].sa_size); 383 } 384 continue; 385 386 case SA_UPDATE: 387 /* existing rewrite of attr */ 388 if (bulk[i].sa_addr && 389 bulk[i].sa_size == bulk[i].sa_length) { 390 SA_COPY_DATA(bulk[i].sa_data_func, 391 bulk[i].sa_data, bulk[i].sa_addr, 392 bulk[i].sa_length); 393 continue; 394 } else if (bulk[i].sa_addr) { /* attr size change */ 395 error = sa_modify_attrs(hdl, bulk[i].sa_attr, 396 SA_REPLACE, bulk[i].sa_data_func, 397 bulk[i].sa_data, bulk[i].sa_length, tx); 398 } else { /* adding new attribute */ 399 error = sa_modify_attrs(hdl, bulk[i].sa_attr, 400 SA_ADD, bulk[i].sa_data_func, 401 bulk[i].sa_data, bulk[i].sa_length, tx); 402 } 403 if (error) 404 return (error); 405 break; 406 } 407 } 408 return (error); 409 } 410 411 static sa_lot_t * 412 sa_add_layout_entry(objset_t *os, sa_attr_type_t *attrs, int attr_count, 413 uint64_t lot_num, uint64_t hash, boolean_t zapadd, dmu_tx_t *tx) 414 { 415 sa_os_t *sa = os->os_sa; 416 sa_lot_t *tb, *findtb; 417 int i; 418 avl_index_t loc; 419 420 ASSERT(MUTEX_HELD(&sa->sa_lock)); 421 tb = kmem_zalloc(sizeof (sa_lot_t), KM_SLEEP); 422 tb->lot_attr_count = attr_count; 423 #ifdef __NetBSD__ 424 if (attr_count != 0) 425 #endif 426 tb->lot_attrs = kmem_alloc(sizeof (sa_attr_type_t) * attr_count, 427 KM_SLEEP); 428 bcopy(attrs, tb->lot_attrs, sizeof (sa_attr_type_t) * attr_count); 429 tb->lot_num = lot_num; 430 tb->lot_hash = hash; 431 tb->lot_instance = 0; 432 433 if (zapadd) { 434 char attr_name[8]; 435 436 if (sa->sa_layout_attr_obj == 0) { 437 sa->sa_layout_attr_obj = zap_create_link(os, 438 DMU_OT_SA_ATTR_LAYOUTS, 439 sa->sa_master_obj, SA_LAYOUTS, tx); 440 } 441 442 (void) snprintf(attr_name, sizeof (attr_name), 443 "%d", (int)lot_num); 444 VERIFY(0 == zap_update(os, os->os_sa->sa_layout_attr_obj, 445 attr_name, 2, attr_count, attrs, tx)); 446 } 447 448 list_create(&tb->lot_idx_tab, sizeof (sa_idx_tab_t), 449 offsetof(sa_idx_tab_t, sa_next)); 450 451 for (i = 0; i != attr_count; i++) { 452 if (sa->sa_attr_table[tb->lot_attrs[i]].sa_length == 0) 453 tb->lot_var_sizes++; 454 } 455 456 avl_add(&sa->sa_layout_num_tree, tb); 457 458 /* verify we don't have a hash collision */ 459 if ((findtb = avl_find(&sa->sa_layout_hash_tree, tb, &loc)) != NULL) { 460 for (; findtb && findtb->lot_hash == hash; 461 findtb = AVL_NEXT(&sa->sa_layout_hash_tree, findtb)) { 462 if (findtb->lot_instance != tb->lot_instance) 463 break; 464 tb->lot_instance++; 465 } 466 } 467 avl_add(&sa->sa_layout_hash_tree, tb); 468 return (tb); 469 } 470 471 static void 472 sa_find_layout(objset_t *os, uint64_t hash, sa_attr_type_t *attrs, 473 int count, dmu_tx_t *tx, sa_lot_t **lot) 474 { 475 sa_lot_t *tb, tbsearch; 476 avl_index_t loc; 477 sa_os_t *sa = os->os_sa; 478 boolean_t found = B_FALSE; 479 480 mutex_enter(&sa->sa_lock); 481 tbsearch.lot_hash = hash; 482 tbsearch.lot_instance = 0; 483 tb = avl_find(&sa->sa_layout_hash_tree, &tbsearch, &loc); 484 if (tb) { 485 for (; tb && tb->lot_hash == hash; 486 tb = AVL_NEXT(&sa->sa_layout_hash_tree, tb)) { 487 if (sa_layout_equal(tb, attrs, count) == 0) { 488 found = B_TRUE; 489 break; 490 } 491 } 492 } 493 if (!found) { 494 tb = sa_add_layout_entry(os, attrs, count, 495 avl_numnodes(&sa->sa_layout_num_tree), hash, B_TRUE, tx); 496 } 497 mutex_exit(&sa->sa_lock); 498 *lot = tb; 499 } 500 501 static int 502 sa_resize_spill(sa_handle_t *hdl, uint32_t size, dmu_tx_t *tx) 503 { 504 int error; 505 uint32_t blocksize; 506 507 if (size == 0) { 508 blocksize = SPA_MINBLOCKSIZE; 509 } else if (size > SPA_OLD_MAXBLOCKSIZE) { 510 ASSERT(0); 511 return (SET_ERROR(EFBIG)); 512 } else { 513 blocksize = P2ROUNDUP_TYPED(size, SPA_MINBLOCKSIZE, uint32_t); 514 } 515 516 error = dbuf_spill_set_blksz(hdl->sa_spill, blocksize, tx); 517 ASSERT(error == 0); 518 return (error); 519 } 520 521 static void 522 sa_copy_data(sa_data_locator_t *func, void *datastart, void *target, int buflen) 523 { 524 if (func == NULL) { 525 bcopy(datastart, target, buflen); 526 } else { 527 boolean_t start; 528 int bytes; 529 void *dataptr; 530 void *saptr = target; 531 uint32_t length; 532 533 start = B_TRUE; 534 bytes = 0; 535 while (bytes < buflen) { 536 func(&dataptr, &length, buflen, start, datastart); 537 bcopy(dataptr, saptr, length); 538 saptr = (void *)((caddr_t)saptr + length); 539 bytes += length; 540 start = B_FALSE; 541 } 542 } 543 } 544 545 /* 546 * Determine several different sizes 547 * first the sa header size 548 * the number of bytes to be stored 549 * if spill would occur the index in the attribute array is returned 550 * 551 * the boolean will_spill will be set when spilling is necessary. It 552 * is only set when the buftype is SA_BONUS 553 */ 554 static int 555 sa_find_sizes(sa_os_t *sa, sa_bulk_attr_t *attr_desc, int attr_count, 556 dmu_buf_t *db, sa_buf_type_t buftype, int *index, int *total, 557 boolean_t *will_spill) 558 { 559 int var_size = 0; 560 int i; 561 int full_space; 562 int hdrsize; 563 int extra_hdrsize; 564 565 if (buftype == SA_BONUS && sa->sa_force_spill) { 566 *total = 0; 567 *index = 0; 568 *will_spill = B_TRUE; 569 return (0); 570 } 571 572 *index = -1; 573 *total = 0; 574 *will_spill = B_FALSE; 575 576 extra_hdrsize = 0; 577 hdrsize = (SA_BONUSTYPE_FROM_DB(db) == DMU_OT_ZNODE) ? 0 : 578 sizeof (sa_hdr_phys_t); 579 580 full_space = (buftype == SA_BONUS) ? DN_MAX_BONUSLEN : db->db_size; 581 ASSERT(IS_P2ALIGNED(full_space, 8)); 582 583 for (i = 0; i != attr_count; i++) { 584 boolean_t is_var_sz; 585 586 *total = P2ROUNDUP(*total, 8); 587 *total += attr_desc[i].sa_length; 588 if (*will_spill) 589 continue; 590 591 is_var_sz = (SA_REGISTERED_LEN(sa, attr_desc[i].sa_attr) == 0); 592 if (is_var_sz) { 593 var_size++; 594 } 595 596 if (is_var_sz && var_size > 1) { 597 /* 598 * Don't worry that the spill block might overflow. 599 * It will be resized if needed in sa_build_layouts(). 600 */ 601 if (buftype == SA_SPILL || 602 P2ROUNDUP(hdrsize + sizeof (uint16_t), 8) + 603 *total < full_space) { 604 /* 605 * Account for header space used by array of 606 * optional sizes of variable-length attributes. 607 * Record the extra header size in case this 608 * increase needs to be reversed due to 609 * spill-over. 610 */ 611 hdrsize += sizeof (uint16_t); 612 if (*index != -1) 613 extra_hdrsize += sizeof (uint16_t); 614 } else { 615 ASSERT(buftype == SA_BONUS); 616 if (*index == -1) 617 *index = i; 618 *will_spill = B_TRUE; 619 continue; 620 } 621 } 622 623 /* 624 * find index of where spill *could* occur. 625 * Then continue to count of remainder attribute 626 * space. The sum is used later for sizing bonus 627 * and spill buffer. 628 */ 629 if (buftype == SA_BONUS && *index == -1 && 630 (*total + P2ROUNDUP(hdrsize, 8)) > 631 (full_space - sizeof (blkptr_t))) { 632 *index = i; 633 } 634 635 if ((*total + P2ROUNDUP(hdrsize, 8)) > full_space && 636 buftype == SA_BONUS) 637 *will_spill = B_TRUE; 638 } 639 640 if (*will_spill) 641 hdrsize -= extra_hdrsize; 642 643 hdrsize = P2ROUNDUP(hdrsize, 8); 644 return (hdrsize); 645 } 646 647 #define BUF_SPACE_NEEDED(total, header) (total + header) 648 649 /* 650 * Find layout that corresponds to ordering of attributes 651 * If not found a new layout number is created and added to 652 * persistent layout tables. 653 */ 654 static int 655 sa_build_layouts(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, int attr_count, 656 dmu_tx_t *tx) 657 { 658 sa_os_t *sa = hdl->sa_os->os_sa; 659 uint64_t hash; 660 sa_buf_type_t buftype; 661 sa_hdr_phys_t *sahdr; 662 void *data_start; 663 int buf_space; 664 sa_attr_type_t *attrs, *attrs_start; 665 int i, lot_count; 666 int hdrsize; 667 int spillhdrsize = 0; 668 int used; 669 dmu_object_type_t bonustype; 670 sa_lot_t *lot; 671 int len_idx; 672 int spill_used; 673 boolean_t spilling; 674 675 dmu_buf_will_dirty(hdl->sa_bonus, tx); 676 bonustype = SA_BONUSTYPE_FROM_DB(hdl->sa_bonus); 677 678 /* first determine bonus header size and sum of all attributes */ 679 hdrsize = sa_find_sizes(sa, attr_desc, attr_count, hdl->sa_bonus, 680 SA_BONUS, &i, &used, &spilling); 681 682 if (used > SPA_OLD_MAXBLOCKSIZE) 683 return (SET_ERROR(EFBIG)); 684 685 VERIFY(0 == dmu_set_bonus(hdl->sa_bonus, spilling ? 686 MIN(DN_MAX_BONUSLEN - sizeof (blkptr_t), used + hdrsize) : 687 used + hdrsize, tx)); 688 689 ASSERT((bonustype == DMU_OT_ZNODE && spilling == 0) || 690 bonustype == DMU_OT_SA); 691 692 /* setup and size spill buffer when needed */ 693 if (spilling) { 694 boolean_t dummy; 695 696 if (hdl->sa_spill == NULL) { 697 VERIFY(dmu_spill_hold_by_bonus(hdl->sa_bonus, NULL, 698 &hdl->sa_spill) == 0); 699 } 700 dmu_buf_will_dirty(hdl->sa_spill, tx); 701 702 spillhdrsize = sa_find_sizes(sa, &attr_desc[i], 703 attr_count - i, hdl->sa_spill, SA_SPILL, &i, 704 &spill_used, &dummy); 705 706 if (spill_used > SPA_OLD_MAXBLOCKSIZE) 707 return (SET_ERROR(EFBIG)); 708 709 buf_space = hdl->sa_spill->db_size - spillhdrsize; 710 if (BUF_SPACE_NEEDED(spill_used, spillhdrsize) > 711 hdl->sa_spill->db_size) 712 VERIFY(0 == sa_resize_spill(hdl, 713 BUF_SPACE_NEEDED(spill_used, spillhdrsize), tx)); 714 } 715 716 /* setup starting pointers to lay down data */ 717 data_start = (void *)((uintptr_t)hdl->sa_bonus->db_data + hdrsize); 718 sahdr = (sa_hdr_phys_t *)hdl->sa_bonus->db_data; 719 buftype = SA_BONUS; 720 721 if (spilling) 722 buf_space = (sa->sa_force_spill) ? 723 0 : SA_BLKPTR_SPACE - hdrsize; 724 else 725 buf_space = hdl->sa_bonus->db_size - hdrsize; 726 727 attrs_start = attrs = kmem_alloc(sizeof (sa_attr_type_t) * attr_count, 728 KM_SLEEP); 729 lot_count = 0; 730 731 for (i = 0, len_idx = 0, hash = -1ULL; i != attr_count; i++) { 732 uint16_t length; 733 734 ASSERT(IS_P2ALIGNED(data_start, 8)); 735 ASSERT(IS_P2ALIGNED(buf_space, 8)); 736 attrs[i] = attr_desc[i].sa_attr; 737 length = SA_REGISTERED_LEN(sa, attrs[i]); 738 if (length == 0) 739 length = attr_desc[i].sa_length; 740 else 741 VERIFY(length == attr_desc[i].sa_length); 742 743 if (buf_space < length) { /* switch to spill buffer */ 744 VERIFY(spilling); 745 VERIFY(bonustype == DMU_OT_SA); 746 if (buftype == SA_BONUS && !sa->sa_force_spill) { 747 sa_find_layout(hdl->sa_os, hash, attrs_start, 748 lot_count, tx, &lot); 749 SA_SET_HDR(sahdr, lot->lot_num, hdrsize); 750 } 751 752 buftype = SA_SPILL; 753 hash = -1ULL; 754 len_idx = 0; 755 756 sahdr = (sa_hdr_phys_t *)hdl->sa_spill->db_data; 757 sahdr->sa_magic = SA_MAGIC; 758 data_start = (void *)((uintptr_t)sahdr + 759 spillhdrsize); 760 attrs_start = &attrs[i]; 761 buf_space = hdl->sa_spill->db_size - spillhdrsize; 762 lot_count = 0; 763 } 764 hash ^= SA_ATTR_HASH(attrs[i]); 765 attr_desc[i].sa_addr = data_start; 766 attr_desc[i].sa_size = length; 767 SA_COPY_DATA(attr_desc[i].sa_data_func, attr_desc[i].sa_data, 768 data_start, length); 769 if (sa->sa_attr_table[attrs[i]].sa_length == 0) { 770 sahdr->sa_lengths[len_idx++] = length; 771 } 772 VERIFY((uintptr_t)data_start % 8 == 0); 773 data_start = (void *)P2ROUNDUP(((uintptr_t)data_start + 774 length), 8); 775 buf_space -= P2ROUNDUP(length, 8); 776 lot_count++; 777 } 778 779 sa_find_layout(hdl->sa_os, hash, attrs_start, lot_count, tx, &lot); 780 781 /* 782 * Verify that old znodes always have layout number 0. 783 * Must be DMU_OT_SA for arbitrary layouts 784 */ 785 VERIFY((bonustype == DMU_OT_ZNODE && lot->lot_num == 0) || 786 (bonustype == DMU_OT_SA && lot->lot_num > 1)); 787 788 if (bonustype == DMU_OT_SA) { 789 SA_SET_HDR(sahdr, lot->lot_num, 790 buftype == SA_BONUS ? hdrsize : spillhdrsize); 791 } 792 793 kmem_free(attrs, sizeof (sa_attr_type_t) * attr_count); 794 if (hdl->sa_bonus_tab) { 795 sa_idx_tab_rele(hdl->sa_os, hdl->sa_bonus_tab); 796 hdl->sa_bonus_tab = NULL; 797 } 798 if (!sa->sa_force_spill) 799 VERIFY(0 == sa_build_index(hdl, SA_BONUS)); 800 if (hdl->sa_spill) { 801 sa_idx_tab_rele(hdl->sa_os, hdl->sa_spill_tab); 802 if (!spilling) { 803 /* 804 * remove spill block that is no longer needed. 805 */ 806 dmu_buf_rele(hdl->sa_spill, NULL); 807 hdl->sa_spill = NULL; 808 hdl->sa_spill_tab = NULL; 809 VERIFY(0 == dmu_rm_spill(hdl->sa_os, 810 sa_handle_object(hdl), tx)); 811 } else { 812 VERIFY(0 == sa_build_index(hdl, SA_SPILL)); 813 } 814 } 815 816 return (0); 817 } 818 819 static void 820 sa_free_attr_table(sa_os_t *sa) 821 { 822 int i; 823 824 if (sa->sa_attr_table == NULL) 825 return; 826 827 for (i = 0; i != sa->sa_num_attrs; i++) { 828 if (sa->sa_attr_table[i].sa_name) 829 kmem_free(sa->sa_attr_table[i].sa_name, 830 strlen(sa->sa_attr_table[i].sa_name) + 1); 831 } 832 833 kmem_free(sa->sa_attr_table, 834 sizeof (sa_attr_table_t) * sa->sa_num_attrs); 835 836 sa->sa_attr_table = NULL; 837 } 838 839 static int 840 sa_attr_table_setup(objset_t *os, sa_attr_reg_t *reg_attrs, int count) 841 { 842 sa_os_t *sa = os->os_sa; 843 uint64_t sa_attr_count = 0; 844 uint64_t sa_reg_count = 0; 845 int error = 0; 846 uint64_t attr_value; 847 sa_attr_table_t *tb; 848 zap_cursor_t zc; 849 zap_attribute_t za; 850 int registered_count = 0; 851 int i; 852 dmu_objset_type_t ostype = dmu_objset_type(os); 853 854 sa->sa_user_table = 855 kmem_zalloc(count * sizeof (sa_attr_type_t), KM_SLEEP); 856 sa->sa_user_table_sz = count * sizeof (sa_attr_type_t); 857 858 if (sa->sa_reg_attr_obj != 0) { 859 error = zap_count(os, sa->sa_reg_attr_obj, 860 &sa_attr_count); 861 862 /* 863 * Make sure we retrieved a count and that it isn't zero 864 */ 865 if (error || (error == 0 && sa_attr_count == 0)) { 866 if (error == 0) 867 error = SET_ERROR(EINVAL); 868 goto bail; 869 } 870 sa_reg_count = sa_attr_count; 871 } 872 873 if (ostype == DMU_OST_ZFS && sa_attr_count == 0) 874 sa_attr_count += sa_legacy_attr_count; 875 876 /* Allocate attribute numbers for attributes that aren't registered */ 877 for (i = 0; i != count; i++) { 878 boolean_t found = B_FALSE; 879 int j; 880 881 if (ostype == DMU_OST_ZFS) { 882 for (j = 0; j != sa_legacy_attr_count; j++) { 883 if (strcmp(reg_attrs[i].sa_name, 884 sa_legacy_attrs[j].sa_name) == 0) { 885 sa->sa_user_table[i] = 886 sa_legacy_attrs[j].sa_attr; 887 found = B_TRUE; 888 } 889 } 890 } 891 if (found) 892 continue; 893 894 if (sa->sa_reg_attr_obj) 895 error = zap_lookup(os, sa->sa_reg_attr_obj, 896 reg_attrs[i].sa_name, 8, 1, &attr_value); 897 else 898 error = SET_ERROR(ENOENT); 899 switch (error) { 900 case ENOENT: 901 sa->sa_user_table[i] = (sa_attr_type_t)sa_attr_count; 902 sa_attr_count++; 903 break; 904 case 0: 905 sa->sa_user_table[i] = ATTR_NUM(attr_value); 906 break; 907 default: 908 goto bail; 909 } 910 } 911 912 sa->sa_num_attrs = sa_attr_count; 913 tb = sa->sa_attr_table = 914 kmem_zalloc(sizeof (sa_attr_table_t) * sa_attr_count, KM_SLEEP); 915 916 /* 917 * Attribute table is constructed from requested attribute list, 918 * previously foreign registered attributes, and also the legacy 919 * ZPL set of attributes. 920 */ 921 922 if (sa->sa_reg_attr_obj) { 923 for (zap_cursor_init(&zc, os, sa->sa_reg_attr_obj); 924 (error = zap_cursor_retrieve(&zc, &za)) == 0; 925 zap_cursor_advance(&zc)) { 926 uint64_t value; 927 value = za.za_first_integer; 928 929 registered_count++; 930 tb[ATTR_NUM(value)].sa_attr = ATTR_NUM(value); 931 tb[ATTR_NUM(value)].sa_length = ATTR_LENGTH(value); 932 tb[ATTR_NUM(value)].sa_byteswap = ATTR_BSWAP(value); 933 tb[ATTR_NUM(value)].sa_registered = B_TRUE; 934 935 if (tb[ATTR_NUM(value)].sa_name) { 936 continue; 937 } 938 tb[ATTR_NUM(value)].sa_name = 939 kmem_zalloc(strlen(za.za_name) +1, KM_SLEEP); 940 (void) strlcpy(tb[ATTR_NUM(value)].sa_name, za.za_name, 941 strlen(za.za_name) +1); 942 } 943 zap_cursor_fini(&zc); 944 /* 945 * Make sure we processed the correct number of registered 946 * attributes 947 */ 948 if (registered_count != sa_reg_count) { 949 ASSERT(error != 0); 950 goto bail; 951 } 952 953 } 954 955 if (ostype == DMU_OST_ZFS) { 956 for (i = 0; i != sa_legacy_attr_count; i++) { 957 if (tb[i].sa_name) 958 continue; 959 tb[i].sa_attr = sa_legacy_attrs[i].sa_attr; 960 tb[i].sa_length = sa_legacy_attrs[i].sa_length; 961 tb[i].sa_byteswap = sa_legacy_attrs[i].sa_byteswap; 962 tb[i].sa_registered = B_FALSE; 963 tb[i].sa_name = 964 kmem_zalloc(strlen(sa_legacy_attrs[i].sa_name) +1, 965 KM_SLEEP); 966 (void) strlcpy(tb[i].sa_name, 967 sa_legacy_attrs[i].sa_name, 968 strlen(sa_legacy_attrs[i].sa_name) + 1); 969 } 970 } 971 972 for (i = 0; i != count; i++) { 973 sa_attr_type_t attr_id; 974 975 attr_id = sa->sa_user_table[i]; 976 if (tb[attr_id].sa_name) 977 continue; 978 979 tb[attr_id].sa_length = reg_attrs[i].sa_length; 980 tb[attr_id].sa_byteswap = reg_attrs[i].sa_byteswap; 981 tb[attr_id].sa_attr = attr_id; 982 tb[attr_id].sa_name = 983 kmem_zalloc(strlen(reg_attrs[i].sa_name) + 1, KM_SLEEP); 984 (void) strlcpy(tb[attr_id].sa_name, reg_attrs[i].sa_name, 985 strlen(reg_attrs[i].sa_name) + 1); 986 } 987 988 sa->sa_need_attr_registration = 989 (sa_attr_count != registered_count); 990 991 return (0); 992 bail: 993 kmem_free(sa->sa_user_table, count * sizeof (sa_attr_type_t)); 994 sa->sa_user_table = NULL; 995 sa_free_attr_table(sa); 996 return ((error != 0) ? error : EINVAL); 997 } 998 999 int 1000 sa_setup(objset_t *os, uint64_t sa_obj, sa_attr_reg_t *reg_attrs, int count, 1001 sa_attr_type_t **user_table) 1002 { 1003 zap_cursor_t zc; 1004 zap_attribute_t za; 1005 sa_os_t *sa; 1006 dmu_objset_type_t ostype = dmu_objset_type(os); 1007 sa_attr_type_t *tb; 1008 int error; 1009 1010 mutex_enter(&os->os_user_ptr_lock); 1011 if (os->os_sa) { 1012 mutex_enter(&os->os_sa->sa_lock); 1013 mutex_exit(&os->os_user_ptr_lock); 1014 tb = os->os_sa->sa_user_table; 1015 mutex_exit(&os->os_sa->sa_lock); 1016 *user_table = tb; 1017 return (0); 1018 } 1019 1020 sa = kmem_zalloc(sizeof (sa_os_t), KM_SLEEP); 1021 mutex_init(&sa->sa_lock, NULL, MUTEX_DEFAULT, NULL); 1022 sa->sa_master_obj = sa_obj; 1023 1024 os->os_sa = sa; 1025 mutex_enter(&sa->sa_lock); 1026 mutex_exit(&os->os_user_ptr_lock); 1027 avl_create(&sa->sa_layout_num_tree, layout_num_compare, 1028 sizeof (sa_lot_t), offsetof(sa_lot_t, lot_num_node)); 1029 avl_create(&sa->sa_layout_hash_tree, layout_hash_compare, 1030 sizeof (sa_lot_t), offsetof(sa_lot_t, lot_hash_node)); 1031 1032 if (sa_obj) { 1033 error = zap_lookup(os, sa_obj, SA_LAYOUTS, 1034 8, 1, &sa->sa_layout_attr_obj); 1035 if (error != 0 && error != ENOENT) 1036 goto fail; 1037 error = zap_lookup(os, sa_obj, SA_REGISTRY, 1038 8, 1, &sa->sa_reg_attr_obj); 1039 if (error != 0 && error != ENOENT) 1040 goto fail; 1041 } 1042 1043 if ((error = sa_attr_table_setup(os, reg_attrs, count)) != 0) 1044 goto fail; 1045 1046 if (sa->sa_layout_attr_obj != 0) { 1047 uint64_t layout_count; 1048 1049 error = zap_count(os, sa->sa_layout_attr_obj, 1050 &layout_count); 1051 1052 /* 1053 * Layout number count should be > 0 1054 */ 1055 if (error || (error == 0 && layout_count == 0)) { 1056 if (error == 0) 1057 error = SET_ERROR(EINVAL); 1058 goto fail; 1059 } 1060 1061 for (zap_cursor_init(&zc, os, sa->sa_layout_attr_obj); 1062 (error = zap_cursor_retrieve(&zc, &za)) == 0; 1063 zap_cursor_advance(&zc)) { 1064 sa_attr_type_t *lot_attrs; 1065 uint64_t lot_num; 1066 1067 lot_attrs = kmem_zalloc(sizeof (sa_attr_type_t) * 1068 za.za_num_integers, KM_SLEEP); 1069 1070 if ((error = (zap_lookup(os, sa->sa_layout_attr_obj, 1071 za.za_name, 2, za.za_num_integers, 1072 lot_attrs))) != 0) { 1073 kmem_free(lot_attrs, sizeof (sa_attr_type_t) * 1074 za.za_num_integers); 1075 break; 1076 } 1077 VERIFY(ddi_strtoull(za.za_name, NULL, 10, 1078 (unsigned long long *)&lot_num) == 0); 1079 1080 (void) sa_add_layout_entry(os, lot_attrs, 1081 za.za_num_integers, lot_num, 1082 sa_layout_info_hash(lot_attrs, 1083 za.za_num_integers), B_FALSE, NULL); 1084 kmem_free(lot_attrs, sizeof (sa_attr_type_t) * 1085 za.za_num_integers); 1086 } 1087 zap_cursor_fini(&zc); 1088 1089 /* 1090 * Make sure layout count matches number of entries added 1091 * to AVL tree 1092 */ 1093 if (avl_numnodes(&sa->sa_layout_num_tree) != layout_count) { 1094 ASSERT(error != 0); 1095 goto fail; 1096 } 1097 } 1098 1099 /* Add special layout number for old ZNODES */ 1100 if (ostype == DMU_OST_ZFS) { 1101 (void) sa_add_layout_entry(os, sa_legacy_zpl_layout, 1102 sa_legacy_attr_count, 0, 1103 sa_layout_info_hash(sa_legacy_zpl_layout, 1104 sa_legacy_attr_count), B_FALSE, NULL); 1105 1106 (void) sa_add_layout_entry(os, sa_dummy_zpl_layout, 0, 1, 1107 0, B_FALSE, NULL); 1108 } 1109 *user_table = os->os_sa->sa_user_table; 1110 mutex_exit(&sa->sa_lock); 1111 return (0); 1112 fail: 1113 os->os_sa = NULL; 1114 sa_free_attr_table(sa); 1115 if (sa->sa_user_table) 1116 kmem_free(sa->sa_user_table, sa->sa_user_table_sz); 1117 mutex_exit(&sa->sa_lock); 1118 avl_destroy(&sa->sa_layout_hash_tree); 1119 avl_destroy(&sa->sa_layout_num_tree); 1120 mutex_destroy(&sa->sa_lock); 1121 kmem_free(sa, sizeof (sa_os_t)); 1122 return ((error == ECKSUM) ? EIO : error); 1123 } 1124 1125 void 1126 sa_tear_down(objset_t *os) 1127 { 1128 sa_os_t *sa = os->os_sa; 1129 sa_lot_t *layout; 1130 void *cookie; 1131 1132 kmem_free(sa->sa_user_table, sa->sa_user_table_sz); 1133 1134 /* Free up attr table */ 1135 1136 sa_free_attr_table(sa); 1137 1138 cookie = NULL; 1139 while (layout = avl_destroy_nodes(&sa->sa_layout_hash_tree, &cookie)) { 1140 sa_idx_tab_t *tab; 1141 while (tab = list_head(&layout->lot_idx_tab)) { 1142 ASSERT(refcount_count(&tab->sa_refcount)); 1143 sa_idx_tab_rele(os, tab); 1144 } 1145 } 1146 1147 cookie = NULL; 1148 while (layout = avl_destroy_nodes(&sa->sa_layout_num_tree, &cookie)) { 1149 #ifdef __NetBSD__ 1150 if (layout->lot_attr_count != 0) 1151 #endif 1152 kmem_free(layout->lot_attrs, 1153 sizeof (sa_attr_type_t) * layout->lot_attr_count); 1154 kmem_free(layout, sizeof (sa_lot_t)); 1155 } 1156 1157 avl_destroy(&sa->sa_layout_hash_tree); 1158 avl_destroy(&sa->sa_layout_num_tree); 1159 mutex_destroy(&sa->sa_lock); 1160 1161 kmem_free(sa, sizeof (sa_os_t)); 1162 os->os_sa = NULL; 1163 } 1164 1165 void 1166 sa_build_idx_tab(void *hdr, void *attr_addr, sa_attr_type_t attr, 1167 uint16_t length, int length_idx, boolean_t var_length, void *userp) 1168 { 1169 sa_idx_tab_t *idx_tab = userp; 1170 1171 if (var_length) { 1172 ASSERT(idx_tab->sa_variable_lengths); 1173 idx_tab->sa_variable_lengths[length_idx] = length; 1174 } 1175 TOC_ATTR_ENCODE(idx_tab->sa_idx_tab[attr], length_idx, 1176 (uint32_t)((uintptr_t)attr_addr - (uintptr_t)hdr)); 1177 } 1178 1179 static void 1180 sa_attr_iter(objset_t *os, sa_hdr_phys_t *hdr, dmu_object_type_t type, 1181 sa_iterfunc_t func, sa_lot_t *tab, void *userp) 1182 { 1183 void *data_start; 1184 sa_lot_t *tb = tab; 1185 sa_lot_t search; 1186 avl_index_t loc; 1187 sa_os_t *sa = os->os_sa; 1188 int i; 1189 uint16_t *length_start = NULL; 1190 uint8_t length_idx = 0; 1191 1192 if (tab == NULL) { 1193 search.lot_num = SA_LAYOUT_NUM(hdr, type); 1194 tb = avl_find(&sa->sa_layout_num_tree, &search, &loc); 1195 ASSERT(tb); 1196 } 1197 1198 if (IS_SA_BONUSTYPE(type)) { 1199 data_start = (void *)P2ROUNDUP(((uintptr_t)hdr + 1200 offsetof(sa_hdr_phys_t, sa_lengths) + 1201 (sizeof (uint16_t) * tb->lot_var_sizes)), 8); 1202 length_start = hdr->sa_lengths; 1203 } else { 1204 data_start = hdr; 1205 } 1206 1207 for (i = 0; i != tb->lot_attr_count; i++) { 1208 int attr_length, reg_length; 1209 uint8_t idx_len; 1210 1211 reg_length = sa->sa_attr_table[tb->lot_attrs[i]].sa_length; 1212 if (reg_length) { 1213 attr_length = reg_length; 1214 idx_len = 0; 1215 } else { 1216 attr_length = length_start[length_idx]; 1217 idx_len = length_idx++; 1218 } 1219 1220 func(hdr, data_start, tb->lot_attrs[i], attr_length, 1221 idx_len, reg_length == 0 ? B_TRUE : B_FALSE, userp); 1222 1223 data_start = (void *)P2ROUNDUP(((uintptr_t)data_start + 1224 attr_length), 8); 1225 } 1226 } 1227 1228 /*ARGSUSED*/ 1229 void 1230 sa_byteswap_cb(void *hdr, void *attr_addr, sa_attr_type_t attr, 1231 uint16_t length, int length_idx, boolean_t variable_length, void *userp) 1232 { 1233 sa_handle_t *hdl = userp; 1234 sa_os_t *sa = hdl->sa_os->os_sa; 1235 1236 sa_bswap_table[sa->sa_attr_table[attr].sa_byteswap](attr_addr, length); 1237 } 1238 1239 void 1240 sa_byteswap(sa_handle_t *hdl, sa_buf_type_t buftype) 1241 { 1242 sa_hdr_phys_t *sa_hdr_phys = SA_GET_HDR(hdl, buftype); 1243 dmu_buf_impl_t *db; 1244 sa_os_t *sa = hdl->sa_os->os_sa; 1245 int num_lengths = 1; 1246 int i; 1247 1248 ASSERT(MUTEX_HELD(&sa->sa_lock)); 1249 if (sa_hdr_phys->sa_magic == SA_MAGIC) 1250 return; 1251 1252 db = SA_GET_DB(hdl, buftype); 1253 1254 if (buftype == SA_SPILL) { 1255 arc_release(db->db_buf, NULL); 1256 arc_buf_thaw(db->db_buf); 1257 } 1258 1259 sa_hdr_phys->sa_magic = BSWAP_32(sa_hdr_phys->sa_magic); 1260 sa_hdr_phys->sa_layout_info = BSWAP_16(sa_hdr_phys->sa_layout_info); 1261 1262 /* 1263 * Determine number of variable lenghts in header 1264 * The standard 8 byte header has one for free and a 1265 * 16 byte header would have 4 + 1; 1266 */ 1267 if (SA_HDR_SIZE(sa_hdr_phys) > 8) 1268 num_lengths += (SA_HDR_SIZE(sa_hdr_phys) - 8) >> 1; 1269 for (i = 0; i != num_lengths; i++) 1270 sa_hdr_phys->sa_lengths[i] = 1271 BSWAP_16(sa_hdr_phys->sa_lengths[i]); 1272 1273 sa_attr_iter(hdl->sa_os, sa_hdr_phys, DMU_OT_SA, 1274 sa_byteswap_cb, NULL, hdl); 1275 1276 if (buftype == SA_SPILL) 1277 arc_buf_freeze(((dmu_buf_impl_t *)hdl->sa_spill)->db_buf); 1278 } 1279 1280 static int 1281 sa_build_index(sa_handle_t *hdl, sa_buf_type_t buftype) 1282 { 1283 sa_hdr_phys_t *sa_hdr_phys; 1284 dmu_buf_impl_t *db = SA_GET_DB(hdl, buftype); 1285 dmu_object_type_t bonustype = SA_BONUSTYPE_FROM_DB(db); 1286 sa_os_t *sa = hdl->sa_os->os_sa; 1287 sa_idx_tab_t *idx_tab; 1288 1289 sa_hdr_phys = SA_GET_HDR(hdl, buftype); 1290 1291 mutex_enter(&sa->sa_lock); 1292 1293 /* Do we need to byteswap? */ 1294 1295 /* only check if not old znode */ 1296 if (IS_SA_BONUSTYPE(bonustype) && sa_hdr_phys->sa_magic != SA_MAGIC && 1297 sa_hdr_phys->sa_magic != 0) { 1298 VERIFY(BSWAP_32(sa_hdr_phys->sa_magic) == SA_MAGIC); 1299 sa_byteswap(hdl, buftype); 1300 } 1301 1302 idx_tab = sa_find_idx_tab(hdl->sa_os, bonustype, sa_hdr_phys); 1303 1304 if (buftype == SA_BONUS) 1305 hdl->sa_bonus_tab = idx_tab; 1306 else 1307 hdl->sa_spill_tab = idx_tab; 1308 1309 mutex_exit(&sa->sa_lock); 1310 return (0); 1311 } 1312 1313 /*ARGSUSED*/ 1314 static void 1315 sa_evict_sync(void *dbu) 1316 { 1317 panic("evicting sa dbuf\n"); 1318 } 1319 1320 static void 1321 sa_idx_tab_rele(objset_t *os, void *arg) 1322 { 1323 sa_os_t *sa = os->os_sa; 1324 sa_idx_tab_t *idx_tab = arg; 1325 1326 if (idx_tab == NULL) 1327 return; 1328 1329 mutex_enter(&sa->sa_lock); 1330 if (refcount_remove(&idx_tab->sa_refcount, NULL) == 0) { 1331 list_remove(&idx_tab->sa_layout->lot_idx_tab, idx_tab); 1332 if (idx_tab->sa_variable_lengths) 1333 kmem_free(idx_tab->sa_variable_lengths, 1334 sizeof (uint16_t) * 1335 idx_tab->sa_layout->lot_var_sizes); 1336 refcount_destroy(&idx_tab->sa_refcount); 1337 kmem_free(idx_tab->sa_idx_tab, 1338 sizeof (uint32_t) * sa->sa_num_attrs); 1339 kmem_free(idx_tab, sizeof (sa_idx_tab_t)); 1340 } 1341 mutex_exit(&sa->sa_lock); 1342 } 1343 1344 static void 1345 sa_idx_tab_hold(objset_t *os, sa_idx_tab_t *idx_tab) 1346 { 1347 sa_os_t *sa = os->os_sa; 1348 1349 ASSERT(MUTEX_HELD(&sa->sa_lock)); 1350 (void) refcount_add(&idx_tab->sa_refcount, NULL); 1351 } 1352 1353 void 1354 sa_handle_destroy(sa_handle_t *hdl) 1355 { 1356 dmu_buf_t *db = hdl->sa_bonus; 1357 1358 mutex_enter(&hdl->sa_lock); 1359 (void) dmu_buf_remove_user(db, &hdl->sa_dbu); 1360 1361 if (hdl->sa_bonus_tab) 1362 sa_idx_tab_rele(hdl->sa_os, hdl->sa_bonus_tab); 1363 1364 if (hdl->sa_spill_tab) 1365 sa_idx_tab_rele(hdl->sa_os, hdl->sa_spill_tab); 1366 1367 dmu_buf_rele(hdl->sa_bonus, NULL); 1368 1369 if (hdl->sa_spill) 1370 dmu_buf_rele((dmu_buf_t *)hdl->sa_spill, NULL); 1371 mutex_exit(&hdl->sa_lock); 1372 1373 kmem_cache_free(sa_cache, hdl); 1374 } 1375 1376 int 1377 sa_handle_get_from_db(objset_t *os, dmu_buf_t *db, void *userp, 1378 sa_handle_type_t hdl_type, sa_handle_t **handlepp) 1379 { 1380 int error = 0; 1381 dmu_object_info_t doi; 1382 sa_handle_t *handle = NULL; 1383 1384 #ifdef ZFS_DEBUG 1385 dmu_object_info_from_db(db, &doi); 1386 ASSERT(doi.doi_bonus_type == DMU_OT_SA || 1387 doi.doi_bonus_type == DMU_OT_ZNODE); 1388 #endif 1389 /* find handle, if it exists */ 1390 /* if one doesn't exist then create a new one, and initialize it */ 1391 1392 if (hdl_type == SA_HDL_SHARED) 1393 handle = dmu_buf_get_user(db); 1394 1395 if (handle == NULL) { 1396 sa_handle_t *winner = NULL; 1397 1398 handle = kmem_cache_alloc(sa_cache, KM_SLEEP); 1399 handle->sa_dbu.dbu_evict_func_sync = NULL; 1400 handle->sa_dbu.dbu_evict_func_async = NULL; 1401 handle->sa_userp = userp; 1402 handle->sa_bonus = db; 1403 handle->sa_os = os; 1404 handle->sa_spill = NULL; 1405 handle->sa_bonus_tab = NULL; 1406 handle->sa_spill_tab = NULL; 1407 1408 error = sa_build_index(handle, SA_BONUS); 1409 1410 if (hdl_type == SA_HDL_SHARED) { 1411 dmu_buf_init_user(&handle->sa_dbu, sa_evict_sync, NULL, 1412 NULL); 1413 winner = dmu_buf_set_user_ie(db, &handle->sa_dbu); 1414 } 1415 1416 if (winner != NULL) { 1417 kmem_cache_free(sa_cache, handle); 1418 handle = winner; 1419 } 1420 } 1421 *handlepp = handle; 1422 1423 return (error); 1424 } 1425 1426 int 1427 sa_handle_get(objset_t *objset, uint64_t objid, void *userp, 1428 sa_handle_type_t hdl_type, sa_handle_t **handlepp) 1429 { 1430 dmu_buf_t *db; 1431 int error; 1432 1433 if (error = dmu_bonus_hold(objset, objid, NULL, &db)) 1434 return (error); 1435 1436 return (sa_handle_get_from_db(objset, db, userp, hdl_type, 1437 handlepp)); 1438 } 1439 1440 int 1441 sa_buf_hold(objset_t *objset, uint64_t obj_num, void *tag, dmu_buf_t **db) 1442 { 1443 return (dmu_bonus_hold(objset, obj_num, tag, db)); 1444 } 1445 1446 void 1447 sa_buf_rele(dmu_buf_t *db, void *tag) 1448 { 1449 dmu_buf_rele(db, tag); 1450 } 1451 1452 int 1453 sa_lookup_impl(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count) 1454 { 1455 ASSERT(hdl); 1456 ASSERT(MUTEX_HELD(&hdl->sa_lock)); 1457 return (sa_attr_op(hdl, bulk, count, SA_LOOKUP, NULL)); 1458 } 1459 1460 int 1461 sa_lookup(sa_handle_t *hdl, sa_attr_type_t attr, void *buf, uint32_t buflen) 1462 { 1463 int error; 1464 sa_bulk_attr_t bulk; 1465 1466 bulk.sa_attr = attr; 1467 bulk.sa_data = buf; 1468 bulk.sa_length = buflen; 1469 bulk.sa_data_func = NULL; 1470 1471 ASSERT(hdl); 1472 mutex_enter(&hdl->sa_lock); 1473 error = sa_lookup_impl(hdl, &bulk, 1); 1474 mutex_exit(&hdl->sa_lock); 1475 return (error); 1476 } 1477 1478 #ifdef _KERNEL 1479 int 1480 sa_lookup_uio(sa_handle_t *hdl, sa_attr_type_t attr, uio_t *uio) 1481 { 1482 int error; 1483 sa_bulk_attr_t bulk; 1484 1485 bulk.sa_data = NULL; 1486 bulk.sa_attr = attr; 1487 bulk.sa_data_func = NULL; 1488 1489 ASSERT(hdl); 1490 1491 mutex_enter(&hdl->sa_lock); 1492 if ((error = sa_attr_op(hdl, &bulk, 1, SA_LOOKUP, NULL)) == 0) { 1493 error = uiomove((void *)bulk.sa_addr, MIN(bulk.sa_size, 1494 uio->uio_resid), UIO_READ, uio); 1495 } 1496 mutex_exit(&hdl->sa_lock); 1497 return (error); 1498 1499 } 1500 #endif 1501 1502 void * 1503 sa_find_idx_tab(objset_t *os, dmu_object_type_t bonustype, void *data) 1504 { 1505 sa_idx_tab_t *idx_tab; 1506 sa_hdr_phys_t *hdr = (sa_hdr_phys_t *)data; 1507 sa_os_t *sa = os->os_sa; 1508 sa_lot_t *tb, search; 1509 avl_index_t loc; 1510 1511 /* 1512 * Deterimine layout number. If SA node and header == 0 then 1513 * force the index table to the dummy "1" empty layout. 1514 * 1515 * The layout number would only be zero for a newly created file 1516 * that has not added any attributes yet, or with crypto enabled which 1517 * doesn't write any attributes to the bonus buffer. 1518 */ 1519 1520 search.lot_num = SA_LAYOUT_NUM(hdr, bonustype); 1521 1522 tb = avl_find(&sa->sa_layout_num_tree, &search, &loc); 1523 1524 /* Verify header size is consistent with layout information */ 1525 ASSERT(tb); 1526 ASSERT(IS_SA_BONUSTYPE(bonustype) && 1527 SA_HDR_SIZE_MATCH_LAYOUT(hdr, tb) || !IS_SA_BONUSTYPE(bonustype) || 1528 (IS_SA_BONUSTYPE(bonustype) && hdr->sa_layout_info == 0)); 1529 1530 /* 1531 * See if any of the already existing TOC entries can be reused? 1532 */ 1533 1534 for (idx_tab = list_head(&tb->lot_idx_tab); idx_tab; 1535 idx_tab = list_next(&tb->lot_idx_tab, idx_tab)) { 1536 boolean_t valid_idx = B_TRUE; 1537 int i; 1538 1539 if (tb->lot_var_sizes != 0 && 1540 idx_tab->sa_variable_lengths != NULL) { 1541 for (i = 0; i != tb->lot_var_sizes; i++) { 1542 if (hdr->sa_lengths[i] != 1543 idx_tab->sa_variable_lengths[i]) { 1544 valid_idx = B_FALSE; 1545 break; 1546 } 1547 } 1548 } 1549 if (valid_idx) { 1550 sa_idx_tab_hold(os, idx_tab); 1551 return (idx_tab); 1552 } 1553 } 1554 1555 /* No such luck, create a new entry */ 1556 idx_tab = kmem_zalloc(sizeof (sa_idx_tab_t), KM_SLEEP); 1557 idx_tab->sa_idx_tab = 1558 kmem_zalloc(sizeof (uint32_t) * sa->sa_num_attrs, KM_SLEEP); 1559 idx_tab->sa_layout = tb; 1560 refcount_create(&idx_tab->sa_refcount); 1561 if (tb->lot_var_sizes) 1562 idx_tab->sa_variable_lengths = kmem_alloc(sizeof (uint16_t) * 1563 tb->lot_var_sizes, KM_SLEEP); 1564 1565 sa_attr_iter(os, hdr, bonustype, sa_build_idx_tab, 1566 tb, idx_tab); 1567 sa_idx_tab_hold(os, idx_tab); /* one hold for consumer */ 1568 sa_idx_tab_hold(os, idx_tab); /* one for layout */ 1569 list_insert_tail(&tb->lot_idx_tab, idx_tab); 1570 return (idx_tab); 1571 } 1572 1573 void 1574 sa_default_locator(void **dataptr, uint32_t *len, uint32_t total_len, 1575 boolean_t start, void *userdata) 1576 { 1577 ASSERT(start); 1578 1579 *dataptr = userdata; 1580 *len = total_len; 1581 } 1582 1583 static void 1584 sa_attr_register_sync(sa_handle_t *hdl, dmu_tx_t *tx) 1585 { 1586 uint64_t attr_value = 0; 1587 sa_os_t *sa = hdl->sa_os->os_sa; 1588 sa_attr_table_t *tb = sa->sa_attr_table; 1589 int i; 1590 1591 mutex_enter(&sa->sa_lock); 1592 1593 if (!sa->sa_need_attr_registration || sa->sa_master_obj == 0) { 1594 mutex_exit(&sa->sa_lock); 1595 return; 1596 } 1597 1598 if (sa->sa_reg_attr_obj == 0) { 1599 sa->sa_reg_attr_obj = zap_create_link(hdl->sa_os, 1600 DMU_OT_SA_ATTR_REGISTRATION, 1601 sa->sa_master_obj, SA_REGISTRY, tx); 1602 } 1603 for (i = 0; i != sa->sa_num_attrs; i++) { 1604 if (sa->sa_attr_table[i].sa_registered) 1605 continue; 1606 ATTR_ENCODE(attr_value, tb[i].sa_attr, tb[i].sa_length, 1607 tb[i].sa_byteswap); 1608 VERIFY(0 == zap_update(hdl->sa_os, sa->sa_reg_attr_obj, 1609 tb[i].sa_name, 8, 1, &attr_value, tx)); 1610 tb[i].sa_registered = B_TRUE; 1611 } 1612 sa->sa_need_attr_registration = B_FALSE; 1613 mutex_exit(&sa->sa_lock); 1614 } 1615 1616 /* 1617 * Replace all attributes with attributes specified in template. 1618 * If dnode had a spill buffer then those attributes will be 1619 * also be replaced, possibly with just an empty spill block 1620 * 1621 * This interface is intended to only be used for bulk adding of 1622 * attributes for a new file. It will also be used by the ZPL 1623 * when converting and old formatted znode to native SA support. 1624 */ 1625 int 1626 sa_replace_all_by_template_locked(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, 1627 int attr_count, dmu_tx_t *tx) 1628 { 1629 sa_os_t *sa = hdl->sa_os->os_sa; 1630 1631 if (sa->sa_need_attr_registration) 1632 sa_attr_register_sync(hdl, tx); 1633 return (sa_build_layouts(hdl, attr_desc, attr_count, tx)); 1634 } 1635 1636 int 1637 sa_replace_all_by_template(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, 1638 int attr_count, dmu_tx_t *tx) 1639 { 1640 int error; 1641 1642 mutex_enter(&hdl->sa_lock); 1643 error = sa_replace_all_by_template_locked(hdl, attr_desc, 1644 attr_count, tx); 1645 mutex_exit(&hdl->sa_lock); 1646 return (error); 1647 } 1648 1649 /* 1650 * Add/remove a single attribute or replace a variable-sized attribute value 1651 * with a value of a different size, and then rewrite the entire set 1652 * of attributes. 1653 * Same-length attribute value replacement (including fixed-length attributes) 1654 * is handled more efficiently by the upper layers. 1655 */ 1656 static int 1657 sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr, 1658 sa_data_op_t action, sa_data_locator_t *locator, void *datastart, 1659 uint16_t buflen, dmu_tx_t *tx) 1660 { 1661 sa_os_t *sa = hdl->sa_os->os_sa; 1662 dmu_buf_impl_t *db = (dmu_buf_impl_t *)hdl->sa_bonus; 1663 dnode_t *dn; 1664 sa_bulk_attr_t *attr_desc; 1665 void *old_data[2]; 1666 int bonus_attr_count = 0; 1667 int bonus_data_size = 0; 1668 int spill_data_size = 0; 1669 int spill_attr_count = 0; 1670 int error; 1671 uint16_t length, reg_length; 1672 int i, j, k, length_idx; 1673 sa_hdr_phys_t *hdr; 1674 sa_idx_tab_t *idx_tab; 1675 int attr_count; 1676 int count; 1677 1678 ASSERT(MUTEX_HELD(&hdl->sa_lock)); 1679 1680 /* First make of copy of the old data */ 1681 1682 DB_DNODE_ENTER(db); 1683 dn = DB_DNODE(db); 1684 if (dn->dn_bonuslen != 0) { 1685 bonus_data_size = hdl->sa_bonus->db_size; 1686 old_data[0] = kmem_alloc(bonus_data_size, KM_SLEEP); 1687 bcopy(hdl->sa_bonus->db_data, old_data[0], 1688 hdl->sa_bonus->db_size); 1689 bonus_attr_count = hdl->sa_bonus_tab->sa_layout->lot_attr_count; 1690 } else { 1691 old_data[0] = NULL; 1692 } 1693 DB_DNODE_EXIT(db); 1694 1695 /* Bring spill buffer online if it isn't currently */ 1696 1697 if ((error = sa_get_spill(hdl)) == 0) { 1698 spill_data_size = hdl->sa_spill->db_size; 1699 old_data[1] = kmem_alloc(spill_data_size, KM_SLEEP); 1700 bcopy(hdl->sa_spill->db_data, old_data[1], 1701 hdl->sa_spill->db_size); 1702 spill_attr_count = 1703 hdl->sa_spill_tab->sa_layout->lot_attr_count; 1704 } else if (error && error != ENOENT) { 1705 if (old_data[0]) 1706 kmem_free(old_data[0], bonus_data_size); 1707 return (error); 1708 } else { 1709 old_data[1] = NULL; 1710 } 1711 1712 /* build descriptor of all attributes */ 1713 1714 attr_count = bonus_attr_count + spill_attr_count; 1715 if (action == SA_ADD) 1716 attr_count++; 1717 else if (action == SA_REMOVE) 1718 attr_count--; 1719 1720 attr_desc = kmem_zalloc(sizeof (sa_bulk_attr_t) * attr_count, KM_SLEEP); 1721 1722 /* 1723 * loop through bonus and spill buffer if it exists, and 1724 * build up new attr_descriptor to reset the attributes 1725 */ 1726 k = j = 0; 1727 count = bonus_attr_count; 1728 hdr = SA_GET_HDR(hdl, SA_BONUS); 1729 idx_tab = SA_IDX_TAB_GET(hdl, SA_BONUS); 1730 for (; k != 2; k++) { 1731 /* 1732 * Iterate over each attribute in layout. Fetch the 1733 * size of variable-length attributes needing rewrite 1734 * from sa_lengths[]. 1735 */ 1736 for (i = 0, length_idx = 0; i != count; i++) { 1737 sa_attr_type_t attr; 1738 1739 attr = idx_tab->sa_layout->lot_attrs[i]; 1740 reg_length = SA_REGISTERED_LEN(sa, attr); 1741 if (reg_length == 0) { 1742 length = hdr->sa_lengths[length_idx]; 1743 length_idx++; 1744 } else { 1745 length = reg_length; 1746 } 1747 if (attr == newattr) { 1748 /* 1749 * There is nothing to do for SA_REMOVE, 1750 * so it is just skipped. 1751 */ 1752 if (action == SA_REMOVE) 1753 continue; 1754 1755 /* 1756 * Duplicate attributes are not allowed, so the 1757 * action can not be SA_ADD here. 1758 */ 1759 ASSERT3S(action, ==, SA_REPLACE); 1760 1761 /* 1762 * Only a variable-sized attribute can be 1763 * replaced here, and its size must be changing. 1764 */ 1765 ASSERT3U(reg_length, ==, 0); 1766 ASSERT3U(length, !=, buflen); 1767 SA_ADD_BULK_ATTR(attr_desc, j, attr, 1768 locator, datastart, buflen); 1769 } else { 1770 SA_ADD_BULK_ATTR(attr_desc, j, attr, 1771 NULL, (void *) 1772 (TOC_OFF(idx_tab->sa_idx_tab[attr]) + 1773 (uintptr_t)old_data[k]), length); 1774 } 1775 } 1776 if (k == 0 && hdl->sa_spill) { 1777 hdr = SA_GET_HDR(hdl, SA_SPILL); 1778 idx_tab = SA_IDX_TAB_GET(hdl, SA_SPILL); 1779 count = spill_attr_count; 1780 } else { 1781 break; 1782 } 1783 } 1784 if (action == SA_ADD) { 1785 reg_length = SA_REGISTERED_LEN(sa, newattr); 1786 IMPLY(reg_length != 0, reg_length == buflen); 1787 SA_ADD_BULK_ATTR(attr_desc, j, newattr, locator, 1788 datastart, buflen); 1789 } 1790 ASSERT3U(j, ==, attr_count); 1791 1792 error = sa_build_layouts(hdl, attr_desc, attr_count, tx); 1793 1794 if (old_data[0]) 1795 kmem_free(old_data[0], bonus_data_size); 1796 if (old_data[1]) 1797 kmem_free(old_data[1], spill_data_size); 1798 kmem_free(attr_desc, sizeof (sa_bulk_attr_t) * attr_count); 1799 1800 return (error); 1801 } 1802 1803 static int 1804 sa_bulk_update_impl(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count, 1805 dmu_tx_t *tx) 1806 { 1807 int error; 1808 sa_os_t *sa = hdl->sa_os->os_sa; 1809 dmu_object_type_t bonustype; 1810 1811 bonustype = SA_BONUSTYPE_FROM_DB(SA_GET_DB(hdl, SA_BONUS)); 1812 1813 ASSERT(hdl); 1814 ASSERT(MUTEX_HELD(&hdl->sa_lock)); 1815 1816 /* sync out registration table if necessary */ 1817 if (sa->sa_need_attr_registration) 1818 sa_attr_register_sync(hdl, tx); 1819 1820 error = sa_attr_op(hdl, bulk, count, SA_UPDATE, tx); 1821 if (error == 0 && !IS_SA_BONUSTYPE(bonustype) && sa->sa_update_cb) 1822 sa->sa_update_cb(hdl, tx); 1823 1824 return (error); 1825 } 1826 1827 /* 1828 * update or add new attribute 1829 */ 1830 int 1831 sa_update(sa_handle_t *hdl, sa_attr_type_t type, 1832 void *buf, uint32_t buflen, dmu_tx_t *tx) 1833 { 1834 int error; 1835 sa_bulk_attr_t bulk; 1836 1837 bulk.sa_attr = type; 1838 bulk.sa_data_func = NULL; 1839 bulk.sa_length = buflen; 1840 bulk.sa_data = buf; 1841 1842 mutex_enter(&hdl->sa_lock); 1843 error = sa_bulk_update_impl(hdl, &bulk, 1, tx); 1844 mutex_exit(&hdl->sa_lock); 1845 return (error); 1846 } 1847 1848 int 1849 sa_update_from_cb(sa_handle_t *hdl, sa_attr_type_t attr, 1850 uint32_t buflen, sa_data_locator_t *locator, void *userdata, dmu_tx_t *tx) 1851 { 1852 int error; 1853 sa_bulk_attr_t bulk; 1854 1855 bulk.sa_attr = attr; 1856 bulk.sa_data = userdata; 1857 bulk.sa_data_func = locator; 1858 bulk.sa_length = buflen; 1859 1860 mutex_enter(&hdl->sa_lock); 1861 error = sa_bulk_update_impl(hdl, &bulk, 1, tx); 1862 mutex_exit(&hdl->sa_lock); 1863 return (error); 1864 } 1865 1866 /* 1867 * Return size of an attribute 1868 */ 1869 1870 int 1871 sa_size(sa_handle_t *hdl, sa_attr_type_t attr, int *size) 1872 { 1873 sa_bulk_attr_t bulk; 1874 int error; 1875 1876 bulk.sa_data = NULL; 1877 bulk.sa_attr = attr; 1878 bulk.sa_data_func = NULL; 1879 1880 ASSERT(hdl); 1881 mutex_enter(&hdl->sa_lock); 1882 if ((error = sa_attr_op(hdl, &bulk, 1, SA_LOOKUP, NULL)) != 0) { 1883 mutex_exit(&hdl->sa_lock); 1884 return (error); 1885 } 1886 *size = bulk.sa_size; 1887 1888 mutex_exit(&hdl->sa_lock); 1889 return (0); 1890 } 1891 1892 int 1893 sa_bulk_lookup_locked(sa_handle_t *hdl, sa_bulk_attr_t *attrs, int count) 1894 { 1895 ASSERT(hdl); 1896 ASSERT(MUTEX_HELD(&hdl->sa_lock)); 1897 return (sa_lookup_impl(hdl, attrs, count)); 1898 } 1899 1900 int 1901 sa_bulk_lookup(sa_handle_t *hdl, sa_bulk_attr_t *attrs, int count) 1902 { 1903 int error; 1904 1905 ASSERT(hdl); 1906 mutex_enter(&hdl->sa_lock); 1907 error = sa_bulk_lookup_locked(hdl, attrs, count); 1908 mutex_exit(&hdl->sa_lock); 1909 return (error); 1910 } 1911 1912 int 1913 sa_bulk_update(sa_handle_t *hdl, sa_bulk_attr_t *attrs, int count, dmu_tx_t *tx) 1914 { 1915 int error; 1916 1917 ASSERT(hdl); 1918 mutex_enter(&hdl->sa_lock); 1919 error = sa_bulk_update_impl(hdl, attrs, count, tx); 1920 mutex_exit(&hdl->sa_lock); 1921 return (error); 1922 } 1923 1924 int 1925 sa_remove(sa_handle_t *hdl, sa_attr_type_t attr, dmu_tx_t *tx) 1926 { 1927 int error; 1928 1929 mutex_enter(&hdl->sa_lock); 1930 error = sa_modify_attrs(hdl, attr, SA_REMOVE, NULL, 1931 NULL, 0, tx); 1932 mutex_exit(&hdl->sa_lock); 1933 return (error); 1934 } 1935 1936 void 1937 sa_object_info(sa_handle_t *hdl, dmu_object_info_t *doi) 1938 { 1939 dmu_object_info_from_db((dmu_buf_t *)hdl->sa_bonus, doi); 1940 } 1941 1942 void 1943 sa_object_size(sa_handle_t *hdl, uint32_t *blksize, u_longlong_t *nblocks) 1944 { 1945 dmu_object_size_from_db((dmu_buf_t *)hdl->sa_bonus, 1946 blksize, nblocks); 1947 } 1948 1949 void 1950 sa_set_userp(sa_handle_t *hdl, void *ptr) 1951 { 1952 hdl->sa_userp = ptr; 1953 } 1954 1955 dmu_buf_t * 1956 sa_get_db(sa_handle_t *hdl) 1957 { 1958 return ((dmu_buf_t *)hdl->sa_bonus); 1959 } 1960 1961 void * 1962 sa_get_userdata(sa_handle_t *hdl) 1963 { 1964 return (hdl->sa_userp); 1965 } 1966 1967 void 1968 sa_register_update_callback_locked(objset_t *os, sa_update_cb_t *func) 1969 { 1970 ASSERT(MUTEX_HELD(&os->os_sa->sa_lock)); 1971 os->os_sa->sa_update_cb = func; 1972 } 1973 1974 void 1975 sa_register_update_callback(objset_t *os, sa_update_cb_t *func) 1976 { 1977 1978 mutex_enter(&os->os_sa->sa_lock); 1979 sa_register_update_callback_locked(os, func); 1980 mutex_exit(&os->os_sa->sa_lock); 1981 } 1982 1983 uint64_t 1984 sa_handle_object(sa_handle_t *hdl) 1985 { 1986 return (hdl->sa_bonus->db_object); 1987 } 1988 1989 boolean_t 1990 sa_enabled(objset_t *os) 1991 { 1992 return (os->os_sa == NULL); 1993 } 1994 1995 int 1996 sa_set_sa_object(objset_t *os, uint64_t sa_object) 1997 { 1998 sa_os_t *sa = os->os_sa; 1999 2000 if (sa->sa_master_obj) 2001 return (1); 2002 2003 sa->sa_master_obj = sa_object; 2004 2005 return (0); 2006 } 2007 2008 int 2009 sa_hdrsize(void *arg) 2010 { 2011 sa_hdr_phys_t *hdr = arg; 2012 2013 return (SA_HDR_SIZE(hdr)); 2014 } 2015 2016 void 2017 sa_handle_lock(sa_handle_t *hdl) 2018 { 2019 ASSERT(hdl); 2020 mutex_enter(&hdl->sa_lock); 2021 } 2022 2023 void 2024 sa_handle_unlock(sa_handle_t *hdl) 2025 { 2026 ASSERT(hdl); 2027 mutex_exit(&hdl->sa_lock); 2028 } 2029