110922SJeff.Bonwick@Sun.COM /* 210922SJeff.Bonwick@Sun.COM * CDDL HEADER START 310922SJeff.Bonwick@Sun.COM * 410922SJeff.Bonwick@Sun.COM * The contents of this file are subject to the terms of the 510922SJeff.Bonwick@Sun.COM * Common Development and Distribution License (the "License"). 610922SJeff.Bonwick@Sun.COM * You may not use this file except in compliance with the License. 710922SJeff.Bonwick@Sun.COM * 810922SJeff.Bonwick@Sun.COM * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 910922SJeff.Bonwick@Sun.COM * or http://www.opensolaris.org/os/licensing. 1010922SJeff.Bonwick@Sun.COM * See the License for the specific language governing permissions 1110922SJeff.Bonwick@Sun.COM * and limitations under the License. 1210922SJeff.Bonwick@Sun.COM * 1310922SJeff.Bonwick@Sun.COM * When distributing Covered Code, include this CDDL HEADER in each 1410922SJeff.Bonwick@Sun.COM * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 1510922SJeff.Bonwick@Sun.COM * If applicable, add the following below this CDDL HEADER, with the 1610922SJeff.Bonwick@Sun.COM * fields enclosed by brackets "[]" replaced with your own identifying 1710922SJeff.Bonwick@Sun.COM * information: Portions Copyright [yyyy] [name of copyright owner] 1810922SJeff.Bonwick@Sun.COM * 1910922SJeff.Bonwick@Sun.COM * CDDL HEADER END 2010922SJeff.Bonwick@Sun.COM */ 2110922SJeff.Bonwick@Sun.COM 2210922SJeff.Bonwick@Sun.COM /* 2312296SLin.Ling@Sun.COM * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 2410922SJeff.Bonwick@Sun.COM */ 2510922SJeff.Bonwick@Sun.COM 2610922SJeff.Bonwick@Sun.COM #include <sys/zfs_context.h> 2710922SJeff.Bonwick@Sun.COM #include <sys/spa.h> 2810922SJeff.Bonwick@Sun.COM #include <sys/spa_impl.h> 2910922SJeff.Bonwick@Sun.COM #include <sys/zio.h> 3010922SJeff.Bonwick@Sun.COM #include <sys/ddt.h> 3110922SJeff.Bonwick@Sun.COM #include <sys/zap.h> 3210922SJeff.Bonwick@Sun.COM #include <sys/dmu_tx.h> 3310922SJeff.Bonwick@Sun.COM #include <sys/arc.h> 3411125SJeff.Bonwick@Sun.COM #include <sys/dsl_pool.h> 3510922SJeff.Bonwick@Sun.COM #include <sys/zio_checksum.h> 3610922SJeff.Bonwick@Sun.COM #include <sys/zio_compress.h> 3712296SLin.Ling@Sun.COM #include <sys/dsl_scan.h> 3810922SJeff.Bonwick@Sun.COM 39*12587SGeorge.Wilson@Sun.COM /* 40*12587SGeorge.Wilson@Sun.COM * Enable/disable prefetching of dedup-ed blocks which are going to be freed. 41*12587SGeorge.Wilson@Sun.COM */ 42*12587SGeorge.Wilson@Sun.COM int zfs_dedup_prefetch = 1; 43*12587SGeorge.Wilson@Sun.COM 4410922SJeff.Bonwick@Sun.COM static const ddt_ops_t *ddt_ops[DDT_TYPES] = { 4510922SJeff.Bonwick@Sun.COM &ddt_zap_ops, 4610922SJeff.Bonwick@Sun.COM }; 4710922SJeff.Bonwick@Sun.COM 4810922SJeff.Bonwick@Sun.COM static const char *ddt_class_name[DDT_CLASSES] = { 4910922SJeff.Bonwick@Sun.COM "ditto", 5010922SJeff.Bonwick@Sun.COM "duplicate", 5110922SJeff.Bonwick@Sun.COM "unique", 5210922SJeff.Bonwick@Sun.COM }; 5310922SJeff.Bonwick@Sun.COM 5410922SJeff.Bonwick@Sun.COM static void 5510922SJeff.Bonwick@Sun.COM ddt_object_create(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 5610922SJeff.Bonwick@Sun.COM dmu_tx_t *tx) 5710922SJeff.Bonwick@Sun.COM { 5810922SJeff.Bonwick@Sun.COM spa_t *spa = ddt->ddt_spa; 5910922SJeff.Bonwick@Sun.COM objset_t *os = ddt->ddt_os; 6010922SJeff.Bonwick@Sun.COM uint64_t *objectp = &ddt->ddt_object[type][class]; 6110922SJeff.Bonwick@Sun.COM boolean_t prehash = zio_checksum_table[ddt->ddt_checksum].ci_dedup; 6210922SJeff.Bonwick@Sun.COM char name[DDT_NAMELEN]; 6310922SJeff.Bonwick@Sun.COM 6410922SJeff.Bonwick@Sun.COM ddt_object_name(ddt, type, class, name); 6510922SJeff.Bonwick@Sun.COM 6610922SJeff.Bonwick@Sun.COM ASSERT(*objectp == 0); 6710922SJeff.Bonwick@Sun.COM VERIFY(ddt_ops[type]->ddt_op_create(os, objectp, tx, prehash) == 0); 6810922SJeff.Bonwick@Sun.COM ASSERT(*objectp != 0); 6910922SJeff.Bonwick@Sun.COM 7010922SJeff.Bonwick@Sun.COM VERIFY(zap_add(os, DMU_POOL_DIRECTORY_OBJECT, name, 7110922SJeff.Bonwick@Sun.COM sizeof (uint64_t), 1, objectp, tx) == 0); 7210922SJeff.Bonwick@Sun.COM 7310922SJeff.Bonwick@Sun.COM VERIFY(zap_add(os, spa->spa_ddt_stat_object, name, 7410922SJeff.Bonwick@Sun.COM sizeof (uint64_t), sizeof (ddt_histogram_t) / sizeof (uint64_t), 7510922SJeff.Bonwick@Sun.COM &ddt->ddt_histogram[type][class], tx) == 0); 7610922SJeff.Bonwick@Sun.COM } 7710922SJeff.Bonwick@Sun.COM 7810922SJeff.Bonwick@Sun.COM static void 7910922SJeff.Bonwick@Sun.COM ddt_object_destroy(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 8010922SJeff.Bonwick@Sun.COM dmu_tx_t *tx) 8110922SJeff.Bonwick@Sun.COM { 8210922SJeff.Bonwick@Sun.COM spa_t *spa = ddt->ddt_spa; 8310922SJeff.Bonwick@Sun.COM objset_t *os = ddt->ddt_os; 8410922SJeff.Bonwick@Sun.COM uint64_t *objectp = &ddt->ddt_object[type][class]; 8510922SJeff.Bonwick@Sun.COM char name[DDT_NAMELEN]; 8610922SJeff.Bonwick@Sun.COM 8710922SJeff.Bonwick@Sun.COM ddt_object_name(ddt, type, class, name); 8810922SJeff.Bonwick@Sun.COM 8910922SJeff.Bonwick@Sun.COM ASSERT(*objectp != 0); 9010922SJeff.Bonwick@Sun.COM ASSERT(ddt_object_count(ddt, type, class) == 0); 9110922SJeff.Bonwick@Sun.COM ASSERT(ddt_histogram_empty(&ddt->ddt_histogram[type][class])); 9210922SJeff.Bonwick@Sun.COM VERIFY(zap_remove(os, DMU_POOL_DIRECTORY_OBJECT, name, tx) == 0); 9310922SJeff.Bonwick@Sun.COM VERIFY(zap_remove(os, spa->spa_ddt_stat_object, name, tx) == 0); 9410922SJeff.Bonwick@Sun.COM VERIFY(ddt_ops[type]->ddt_op_destroy(os, *objectp, tx) == 0); 9511938SGeorge.Wilson@Sun.COM bzero(&ddt->ddt_object_stats[type][class], sizeof (ddt_object_t)); 9610922SJeff.Bonwick@Sun.COM 9710922SJeff.Bonwick@Sun.COM *objectp = 0; 9810922SJeff.Bonwick@Sun.COM } 9910922SJeff.Bonwick@Sun.COM 10010922SJeff.Bonwick@Sun.COM static int 10110922SJeff.Bonwick@Sun.COM ddt_object_load(ddt_t *ddt, enum ddt_type type, enum ddt_class class) 10210922SJeff.Bonwick@Sun.COM { 10311938SGeorge.Wilson@Sun.COM ddt_object_t *ddo = &ddt->ddt_object_stats[type][class]; 10411938SGeorge.Wilson@Sun.COM dmu_object_info_t doi; 10510922SJeff.Bonwick@Sun.COM char name[DDT_NAMELEN]; 10610922SJeff.Bonwick@Sun.COM int error; 10710922SJeff.Bonwick@Sun.COM 10810922SJeff.Bonwick@Sun.COM ddt_object_name(ddt, type, class, name); 10910922SJeff.Bonwick@Sun.COM 11010922SJeff.Bonwick@Sun.COM error = zap_lookup(ddt->ddt_os, DMU_POOL_DIRECTORY_OBJECT, name, 11110922SJeff.Bonwick@Sun.COM sizeof (uint64_t), 1, &ddt->ddt_object[type][class]); 11210922SJeff.Bonwick@Sun.COM 11310922SJeff.Bonwick@Sun.COM if (error) 11410922SJeff.Bonwick@Sun.COM return (error); 11510922SJeff.Bonwick@Sun.COM 11610922SJeff.Bonwick@Sun.COM error = zap_lookup(ddt->ddt_os, ddt->ddt_spa->spa_ddt_stat_object, name, 11710922SJeff.Bonwick@Sun.COM sizeof (uint64_t), sizeof (ddt_histogram_t) / sizeof (uint64_t), 11810922SJeff.Bonwick@Sun.COM &ddt->ddt_histogram[type][class]); 11910922SJeff.Bonwick@Sun.COM 12011938SGeorge.Wilson@Sun.COM /* 12111938SGeorge.Wilson@Sun.COM * Seed the cached statistics. 12211938SGeorge.Wilson@Sun.COM */ 12311938SGeorge.Wilson@Sun.COM VERIFY(ddt_object_info(ddt, type, class, &doi) == 0); 12411938SGeorge.Wilson@Sun.COM 12511938SGeorge.Wilson@Sun.COM ddo->ddo_count = ddt_object_count(ddt, type, class); 12611938SGeorge.Wilson@Sun.COM ddo->ddo_dspace = doi.doi_physical_blocks_512 << 9; 12711938SGeorge.Wilson@Sun.COM ddo->ddo_mspace = doi.doi_fill_count * doi.doi_data_block_size; 12811938SGeorge.Wilson@Sun.COM 12910922SJeff.Bonwick@Sun.COM ASSERT(error == 0); 13010922SJeff.Bonwick@Sun.COM return (error); 13110922SJeff.Bonwick@Sun.COM } 13210922SJeff.Bonwick@Sun.COM 13310922SJeff.Bonwick@Sun.COM static void 13410922SJeff.Bonwick@Sun.COM ddt_object_sync(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 13510922SJeff.Bonwick@Sun.COM dmu_tx_t *tx) 13610922SJeff.Bonwick@Sun.COM { 13711938SGeorge.Wilson@Sun.COM ddt_object_t *ddo = &ddt->ddt_object_stats[type][class]; 13811938SGeorge.Wilson@Sun.COM dmu_object_info_t doi; 13910922SJeff.Bonwick@Sun.COM char name[DDT_NAMELEN]; 14010922SJeff.Bonwick@Sun.COM 14110922SJeff.Bonwick@Sun.COM ddt_object_name(ddt, type, class, name); 14210922SJeff.Bonwick@Sun.COM 14310922SJeff.Bonwick@Sun.COM VERIFY(zap_update(ddt->ddt_os, ddt->ddt_spa->spa_ddt_stat_object, name, 14410922SJeff.Bonwick@Sun.COM sizeof (uint64_t), sizeof (ddt_histogram_t) / sizeof (uint64_t), 14510922SJeff.Bonwick@Sun.COM &ddt->ddt_histogram[type][class], tx) == 0); 14611938SGeorge.Wilson@Sun.COM 14711938SGeorge.Wilson@Sun.COM /* 14811938SGeorge.Wilson@Sun.COM * Cache DDT statistics; this is the only time they'll change. 14911938SGeorge.Wilson@Sun.COM */ 15011938SGeorge.Wilson@Sun.COM VERIFY(ddt_object_info(ddt, type, class, &doi) == 0); 15111938SGeorge.Wilson@Sun.COM 15211938SGeorge.Wilson@Sun.COM ddo->ddo_count = ddt_object_count(ddt, type, class); 15311938SGeorge.Wilson@Sun.COM ddo->ddo_dspace = doi.doi_physical_blocks_512 << 9; 15411938SGeorge.Wilson@Sun.COM ddo->ddo_mspace = doi.doi_fill_count * doi.doi_data_block_size; 15510922SJeff.Bonwick@Sun.COM } 15610922SJeff.Bonwick@Sun.COM 15710922SJeff.Bonwick@Sun.COM static int 15810922SJeff.Bonwick@Sun.COM ddt_object_lookup(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 15910922SJeff.Bonwick@Sun.COM ddt_entry_t *dde) 16010922SJeff.Bonwick@Sun.COM { 16110922SJeff.Bonwick@Sun.COM if (!ddt_object_exists(ddt, type, class)) 16210922SJeff.Bonwick@Sun.COM return (ENOENT); 16310922SJeff.Bonwick@Sun.COM 16410922SJeff.Bonwick@Sun.COM return (ddt_ops[type]->ddt_op_lookup(ddt->ddt_os, 16510922SJeff.Bonwick@Sun.COM ddt->ddt_object[type][class], dde)); 16610922SJeff.Bonwick@Sun.COM } 16710922SJeff.Bonwick@Sun.COM 16812450SGeorge.Wilson@Sun.COM static void 16912450SGeorge.Wilson@Sun.COM ddt_object_prefetch(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 17012450SGeorge.Wilson@Sun.COM ddt_entry_t *dde) 17112450SGeorge.Wilson@Sun.COM { 17212450SGeorge.Wilson@Sun.COM if (!ddt_object_exists(ddt, type, class)) 17312450SGeorge.Wilson@Sun.COM return; 17412450SGeorge.Wilson@Sun.COM 17512450SGeorge.Wilson@Sun.COM ddt_ops[type]->ddt_op_prefetch(ddt->ddt_os, 17612450SGeorge.Wilson@Sun.COM ddt->ddt_object[type][class], dde); 17712450SGeorge.Wilson@Sun.COM } 17812450SGeorge.Wilson@Sun.COM 17912296SLin.Ling@Sun.COM int 18010922SJeff.Bonwick@Sun.COM ddt_object_update(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 18110922SJeff.Bonwick@Sun.COM ddt_entry_t *dde, dmu_tx_t *tx) 18210922SJeff.Bonwick@Sun.COM { 18310922SJeff.Bonwick@Sun.COM ASSERT(ddt_object_exists(ddt, type, class)); 18410922SJeff.Bonwick@Sun.COM 18510922SJeff.Bonwick@Sun.COM return (ddt_ops[type]->ddt_op_update(ddt->ddt_os, 18610922SJeff.Bonwick@Sun.COM ddt->ddt_object[type][class], dde, tx)); 18710922SJeff.Bonwick@Sun.COM } 18810922SJeff.Bonwick@Sun.COM 18910922SJeff.Bonwick@Sun.COM static int 19010922SJeff.Bonwick@Sun.COM ddt_object_remove(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 19110922SJeff.Bonwick@Sun.COM ddt_entry_t *dde, dmu_tx_t *tx) 19210922SJeff.Bonwick@Sun.COM { 19310922SJeff.Bonwick@Sun.COM ASSERT(ddt_object_exists(ddt, type, class)); 19410922SJeff.Bonwick@Sun.COM 19510922SJeff.Bonwick@Sun.COM return (ddt_ops[type]->ddt_op_remove(ddt->ddt_os, 19610922SJeff.Bonwick@Sun.COM ddt->ddt_object[type][class], dde, tx)); 19710922SJeff.Bonwick@Sun.COM } 19810922SJeff.Bonwick@Sun.COM 19910922SJeff.Bonwick@Sun.COM int 20010922SJeff.Bonwick@Sun.COM ddt_object_walk(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 20111125SJeff.Bonwick@Sun.COM uint64_t *walk, ddt_entry_t *dde) 20210922SJeff.Bonwick@Sun.COM { 20310922SJeff.Bonwick@Sun.COM ASSERT(ddt_object_exists(ddt, type, class)); 20410922SJeff.Bonwick@Sun.COM 20510922SJeff.Bonwick@Sun.COM return (ddt_ops[type]->ddt_op_walk(ddt->ddt_os, 20610922SJeff.Bonwick@Sun.COM ddt->ddt_object[type][class], dde, walk)); 20710922SJeff.Bonwick@Sun.COM } 20810922SJeff.Bonwick@Sun.COM 20910922SJeff.Bonwick@Sun.COM uint64_t 21010922SJeff.Bonwick@Sun.COM ddt_object_count(ddt_t *ddt, enum ddt_type type, enum ddt_class class) 21110922SJeff.Bonwick@Sun.COM { 21210922SJeff.Bonwick@Sun.COM ASSERT(ddt_object_exists(ddt, type, class)); 21310922SJeff.Bonwick@Sun.COM 21410922SJeff.Bonwick@Sun.COM return (ddt_ops[type]->ddt_op_count(ddt->ddt_os, 21510922SJeff.Bonwick@Sun.COM ddt->ddt_object[type][class])); 21610922SJeff.Bonwick@Sun.COM } 21710922SJeff.Bonwick@Sun.COM 21810922SJeff.Bonwick@Sun.COM int 21910922SJeff.Bonwick@Sun.COM ddt_object_info(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 22010922SJeff.Bonwick@Sun.COM dmu_object_info_t *doi) 22110922SJeff.Bonwick@Sun.COM { 22210922SJeff.Bonwick@Sun.COM if (!ddt_object_exists(ddt, type, class)) 22310922SJeff.Bonwick@Sun.COM return (ENOENT); 22410922SJeff.Bonwick@Sun.COM 22510922SJeff.Bonwick@Sun.COM return (dmu_object_info(ddt->ddt_os, ddt->ddt_object[type][class], 22610922SJeff.Bonwick@Sun.COM doi)); 22710922SJeff.Bonwick@Sun.COM } 22810922SJeff.Bonwick@Sun.COM 22910922SJeff.Bonwick@Sun.COM boolean_t 23010922SJeff.Bonwick@Sun.COM ddt_object_exists(ddt_t *ddt, enum ddt_type type, enum ddt_class class) 23110922SJeff.Bonwick@Sun.COM { 23210922SJeff.Bonwick@Sun.COM return (!!ddt->ddt_object[type][class]); 23310922SJeff.Bonwick@Sun.COM } 23410922SJeff.Bonwick@Sun.COM 23510922SJeff.Bonwick@Sun.COM void 23610922SJeff.Bonwick@Sun.COM ddt_object_name(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 23710922SJeff.Bonwick@Sun.COM char *name) 23810922SJeff.Bonwick@Sun.COM { 23910922SJeff.Bonwick@Sun.COM (void) sprintf(name, DMU_POOL_DDT, 24010922SJeff.Bonwick@Sun.COM zio_checksum_table[ddt->ddt_checksum].ci_name, 24110922SJeff.Bonwick@Sun.COM ddt_ops[type]->ddt_op_name, ddt_class_name[class]); 24210922SJeff.Bonwick@Sun.COM } 24310922SJeff.Bonwick@Sun.COM 24410922SJeff.Bonwick@Sun.COM void 24510922SJeff.Bonwick@Sun.COM ddt_bp_fill(const ddt_phys_t *ddp, blkptr_t *bp, uint64_t txg) 24610922SJeff.Bonwick@Sun.COM { 24710922SJeff.Bonwick@Sun.COM ASSERT(txg != 0); 24810922SJeff.Bonwick@Sun.COM 24910922SJeff.Bonwick@Sun.COM for (int d = 0; d < SPA_DVAS_PER_BP; d++) 25010922SJeff.Bonwick@Sun.COM bp->blk_dva[d] = ddp->ddp_dva[d]; 25110922SJeff.Bonwick@Sun.COM BP_SET_BIRTH(bp, txg, ddp->ddp_phys_birth); 25210922SJeff.Bonwick@Sun.COM } 25310922SJeff.Bonwick@Sun.COM 25410922SJeff.Bonwick@Sun.COM void 25511125SJeff.Bonwick@Sun.COM ddt_bp_create(enum zio_checksum checksum, 25611125SJeff.Bonwick@Sun.COM const ddt_key_t *ddk, const ddt_phys_t *ddp, blkptr_t *bp) 25710922SJeff.Bonwick@Sun.COM { 25810922SJeff.Bonwick@Sun.COM BP_ZERO(bp); 25910922SJeff.Bonwick@Sun.COM 26010922SJeff.Bonwick@Sun.COM if (ddp != NULL) 26110922SJeff.Bonwick@Sun.COM ddt_bp_fill(ddp, bp, ddp->ddp_phys_birth); 26210922SJeff.Bonwick@Sun.COM 26310922SJeff.Bonwick@Sun.COM bp->blk_cksum = ddk->ddk_cksum; 26412296SLin.Ling@Sun.COM bp->blk_fill = 1; 26510922SJeff.Bonwick@Sun.COM 26610922SJeff.Bonwick@Sun.COM BP_SET_LSIZE(bp, DDK_GET_LSIZE(ddk)); 26710922SJeff.Bonwick@Sun.COM BP_SET_PSIZE(bp, DDK_GET_PSIZE(ddk)); 26810922SJeff.Bonwick@Sun.COM BP_SET_COMPRESS(bp, DDK_GET_COMPRESS(ddk)); 26911125SJeff.Bonwick@Sun.COM BP_SET_CHECKSUM(bp, checksum); 27012296SLin.Ling@Sun.COM BP_SET_TYPE(bp, DMU_OT_DEDUP); 27110922SJeff.Bonwick@Sun.COM BP_SET_LEVEL(bp, 0); 27210922SJeff.Bonwick@Sun.COM BP_SET_DEDUP(bp, 0); 27310922SJeff.Bonwick@Sun.COM BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER); 27410922SJeff.Bonwick@Sun.COM } 27510922SJeff.Bonwick@Sun.COM 27610922SJeff.Bonwick@Sun.COM void 27710922SJeff.Bonwick@Sun.COM ddt_key_fill(ddt_key_t *ddk, const blkptr_t *bp) 27810922SJeff.Bonwick@Sun.COM { 27910922SJeff.Bonwick@Sun.COM ddk->ddk_cksum = bp->blk_cksum; 28010922SJeff.Bonwick@Sun.COM ddk->ddk_prop = 0; 28110922SJeff.Bonwick@Sun.COM 28210922SJeff.Bonwick@Sun.COM DDK_SET_LSIZE(ddk, BP_GET_LSIZE(bp)); 28310922SJeff.Bonwick@Sun.COM DDK_SET_PSIZE(ddk, BP_GET_PSIZE(bp)); 28410922SJeff.Bonwick@Sun.COM DDK_SET_COMPRESS(ddk, BP_GET_COMPRESS(bp)); 28510922SJeff.Bonwick@Sun.COM } 28610922SJeff.Bonwick@Sun.COM 28710922SJeff.Bonwick@Sun.COM void 28810922SJeff.Bonwick@Sun.COM ddt_phys_fill(ddt_phys_t *ddp, const blkptr_t *bp) 28910922SJeff.Bonwick@Sun.COM { 29010922SJeff.Bonwick@Sun.COM ASSERT(ddp->ddp_phys_birth == 0); 29110922SJeff.Bonwick@Sun.COM 29210922SJeff.Bonwick@Sun.COM for (int d = 0; d < SPA_DVAS_PER_BP; d++) 29310922SJeff.Bonwick@Sun.COM ddp->ddp_dva[d] = bp->blk_dva[d]; 29410922SJeff.Bonwick@Sun.COM ddp->ddp_phys_birth = BP_PHYSICAL_BIRTH(bp); 29510922SJeff.Bonwick@Sun.COM } 29610922SJeff.Bonwick@Sun.COM 29710922SJeff.Bonwick@Sun.COM void 29810922SJeff.Bonwick@Sun.COM ddt_phys_clear(ddt_phys_t *ddp) 29910922SJeff.Bonwick@Sun.COM { 30010922SJeff.Bonwick@Sun.COM bzero(ddp, sizeof (*ddp)); 30110922SJeff.Bonwick@Sun.COM } 30210922SJeff.Bonwick@Sun.COM 30310922SJeff.Bonwick@Sun.COM void 30410922SJeff.Bonwick@Sun.COM ddt_phys_addref(ddt_phys_t *ddp) 30510922SJeff.Bonwick@Sun.COM { 30610922SJeff.Bonwick@Sun.COM ddp->ddp_refcnt++; 30710922SJeff.Bonwick@Sun.COM } 30810922SJeff.Bonwick@Sun.COM 30910922SJeff.Bonwick@Sun.COM void 31010922SJeff.Bonwick@Sun.COM ddt_phys_decref(ddt_phys_t *ddp) 31110922SJeff.Bonwick@Sun.COM { 31210922SJeff.Bonwick@Sun.COM ASSERT((int64_t)ddp->ddp_refcnt > 0); 31310922SJeff.Bonwick@Sun.COM ddp->ddp_refcnt--; 31410922SJeff.Bonwick@Sun.COM } 31510922SJeff.Bonwick@Sun.COM 31610922SJeff.Bonwick@Sun.COM void 31710922SJeff.Bonwick@Sun.COM ddt_phys_free(ddt_t *ddt, ddt_key_t *ddk, ddt_phys_t *ddp, uint64_t txg) 31810922SJeff.Bonwick@Sun.COM { 31910922SJeff.Bonwick@Sun.COM blkptr_t blk; 32010922SJeff.Bonwick@Sun.COM 32111125SJeff.Bonwick@Sun.COM ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk); 32210922SJeff.Bonwick@Sun.COM ddt_phys_clear(ddp); 32310922SJeff.Bonwick@Sun.COM zio_free(ddt->ddt_spa, txg, &blk); 32410922SJeff.Bonwick@Sun.COM } 32510922SJeff.Bonwick@Sun.COM 32610922SJeff.Bonwick@Sun.COM ddt_phys_t * 32710922SJeff.Bonwick@Sun.COM ddt_phys_select(const ddt_entry_t *dde, const blkptr_t *bp) 32810922SJeff.Bonwick@Sun.COM { 32910922SJeff.Bonwick@Sun.COM ddt_phys_t *ddp = (ddt_phys_t *)dde->dde_phys; 33010922SJeff.Bonwick@Sun.COM 33110922SJeff.Bonwick@Sun.COM for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { 33210922SJeff.Bonwick@Sun.COM if (DVA_EQUAL(BP_IDENTITY(bp), &ddp->ddp_dva[0]) && 33310922SJeff.Bonwick@Sun.COM BP_PHYSICAL_BIRTH(bp) == ddp->ddp_phys_birth) 33410922SJeff.Bonwick@Sun.COM return (ddp); 33510922SJeff.Bonwick@Sun.COM } 33610922SJeff.Bonwick@Sun.COM return (NULL); 33710922SJeff.Bonwick@Sun.COM } 33810922SJeff.Bonwick@Sun.COM 33910922SJeff.Bonwick@Sun.COM uint64_t 34010922SJeff.Bonwick@Sun.COM ddt_phys_total_refcnt(const ddt_entry_t *dde) 34110922SJeff.Bonwick@Sun.COM { 34210922SJeff.Bonwick@Sun.COM uint64_t refcnt = 0; 34310922SJeff.Bonwick@Sun.COM 34410922SJeff.Bonwick@Sun.COM for (int p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) 34510922SJeff.Bonwick@Sun.COM refcnt += dde->dde_phys[p].ddp_refcnt; 34610922SJeff.Bonwick@Sun.COM 34710922SJeff.Bonwick@Sun.COM return (refcnt); 34810922SJeff.Bonwick@Sun.COM } 34910922SJeff.Bonwick@Sun.COM 35010922SJeff.Bonwick@Sun.COM static void 35110922SJeff.Bonwick@Sun.COM ddt_stat_generate(ddt_t *ddt, ddt_entry_t *dde, ddt_stat_t *dds) 35210922SJeff.Bonwick@Sun.COM { 35310922SJeff.Bonwick@Sun.COM spa_t *spa = ddt->ddt_spa; 35410922SJeff.Bonwick@Sun.COM ddt_phys_t *ddp = dde->dde_phys; 35510922SJeff.Bonwick@Sun.COM ddt_key_t *ddk = &dde->dde_key; 35610922SJeff.Bonwick@Sun.COM uint64_t lsize = DDK_GET_LSIZE(ddk); 35710922SJeff.Bonwick@Sun.COM uint64_t psize = DDK_GET_PSIZE(ddk); 35810922SJeff.Bonwick@Sun.COM 35910922SJeff.Bonwick@Sun.COM bzero(dds, sizeof (*dds)); 36010922SJeff.Bonwick@Sun.COM 36110922SJeff.Bonwick@Sun.COM for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { 36210922SJeff.Bonwick@Sun.COM uint64_t dsize = 0; 36310922SJeff.Bonwick@Sun.COM uint64_t refcnt = ddp->ddp_refcnt; 36410922SJeff.Bonwick@Sun.COM 36510922SJeff.Bonwick@Sun.COM if (ddp->ddp_phys_birth == 0) 36610922SJeff.Bonwick@Sun.COM continue; 36710922SJeff.Bonwick@Sun.COM 36810922SJeff.Bonwick@Sun.COM for (int d = 0; d < SPA_DVAS_PER_BP; d++) 36910922SJeff.Bonwick@Sun.COM dsize += dva_get_dsize_sync(spa, &ddp->ddp_dva[d]); 37010922SJeff.Bonwick@Sun.COM 37110922SJeff.Bonwick@Sun.COM dds->dds_blocks += 1; 37210922SJeff.Bonwick@Sun.COM dds->dds_lsize += lsize; 37310922SJeff.Bonwick@Sun.COM dds->dds_psize += psize; 37410922SJeff.Bonwick@Sun.COM dds->dds_dsize += dsize; 37510922SJeff.Bonwick@Sun.COM 37610922SJeff.Bonwick@Sun.COM dds->dds_ref_blocks += refcnt; 37710922SJeff.Bonwick@Sun.COM dds->dds_ref_lsize += lsize * refcnt; 37810922SJeff.Bonwick@Sun.COM dds->dds_ref_psize += psize * refcnt; 37910922SJeff.Bonwick@Sun.COM dds->dds_ref_dsize += dsize * refcnt; 38010922SJeff.Bonwick@Sun.COM } 38110922SJeff.Bonwick@Sun.COM } 38210922SJeff.Bonwick@Sun.COM 38310922SJeff.Bonwick@Sun.COM void 38410922SJeff.Bonwick@Sun.COM ddt_stat_add(ddt_stat_t *dst, const ddt_stat_t *src, uint64_t neg) 38510922SJeff.Bonwick@Sun.COM { 38610922SJeff.Bonwick@Sun.COM const uint64_t *s = (const uint64_t *)src; 38710922SJeff.Bonwick@Sun.COM uint64_t *d = (uint64_t *)dst; 38810922SJeff.Bonwick@Sun.COM uint64_t *d_end = (uint64_t *)(dst + 1); 38910922SJeff.Bonwick@Sun.COM 39010922SJeff.Bonwick@Sun.COM ASSERT(neg == 0 || neg == -1ULL); /* add or subtract */ 39110922SJeff.Bonwick@Sun.COM 39210922SJeff.Bonwick@Sun.COM while (d < d_end) 39310922SJeff.Bonwick@Sun.COM *d++ += (*s++ ^ neg) - neg; 39410922SJeff.Bonwick@Sun.COM } 39510922SJeff.Bonwick@Sun.COM 39610922SJeff.Bonwick@Sun.COM static void 39710922SJeff.Bonwick@Sun.COM ddt_stat_update(ddt_t *ddt, ddt_entry_t *dde, uint64_t neg) 39810922SJeff.Bonwick@Sun.COM { 39910922SJeff.Bonwick@Sun.COM ddt_stat_t dds; 40010922SJeff.Bonwick@Sun.COM ddt_histogram_t *ddh; 40110922SJeff.Bonwick@Sun.COM int bucket; 40210922SJeff.Bonwick@Sun.COM 40310922SJeff.Bonwick@Sun.COM ddt_stat_generate(ddt, dde, &dds); 40410922SJeff.Bonwick@Sun.COM 40510922SJeff.Bonwick@Sun.COM bucket = highbit(dds.dds_ref_blocks) - 1; 40610922SJeff.Bonwick@Sun.COM ASSERT(bucket >= 0); 40710922SJeff.Bonwick@Sun.COM 40810922SJeff.Bonwick@Sun.COM ddh = &ddt->ddt_histogram[dde->dde_type][dde->dde_class]; 40910922SJeff.Bonwick@Sun.COM 41010922SJeff.Bonwick@Sun.COM ddt_stat_add(&ddh->ddh_stat[bucket], &dds, neg); 41110922SJeff.Bonwick@Sun.COM } 41210922SJeff.Bonwick@Sun.COM 41310922SJeff.Bonwick@Sun.COM void 41410922SJeff.Bonwick@Sun.COM ddt_histogram_add(ddt_histogram_t *dst, const ddt_histogram_t *src) 41510922SJeff.Bonwick@Sun.COM { 41610922SJeff.Bonwick@Sun.COM for (int h = 0; h < 64; h++) 41710922SJeff.Bonwick@Sun.COM ddt_stat_add(&dst->ddh_stat[h], &src->ddh_stat[h], 0); 41810922SJeff.Bonwick@Sun.COM } 41910922SJeff.Bonwick@Sun.COM 42010922SJeff.Bonwick@Sun.COM void 42110922SJeff.Bonwick@Sun.COM ddt_histogram_stat(ddt_stat_t *dds, const ddt_histogram_t *ddh) 42210922SJeff.Bonwick@Sun.COM { 42310922SJeff.Bonwick@Sun.COM bzero(dds, sizeof (*dds)); 42410922SJeff.Bonwick@Sun.COM 42510922SJeff.Bonwick@Sun.COM for (int h = 0; h < 64; h++) 42610922SJeff.Bonwick@Sun.COM ddt_stat_add(dds, &ddh->ddh_stat[h], 0); 42710922SJeff.Bonwick@Sun.COM } 42810922SJeff.Bonwick@Sun.COM 42910922SJeff.Bonwick@Sun.COM boolean_t 43010922SJeff.Bonwick@Sun.COM ddt_histogram_empty(const ddt_histogram_t *ddh) 43110922SJeff.Bonwick@Sun.COM { 43210922SJeff.Bonwick@Sun.COM const uint64_t *s = (const uint64_t *)ddh; 43310922SJeff.Bonwick@Sun.COM const uint64_t *s_end = (const uint64_t *)(ddh + 1); 43410922SJeff.Bonwick@Sun.COM 43510922SJeff.Bonwick@Sun.COM while (s < s_end) 43610922SJeff.Bonwick@Sun.COM if (*s++ != 0) 43710922SJeff.Bonwick@Sun.COM return (B_FALSE); 43810922SJeff.Bonwick@Sun.COM 43910922SJeff.Bonwick@Sun.COM return (B_TRUE); 44010922SJeff.Bonwick@Sun.COM } 44110922SJeff.Bonwick@Sun.COM 44211149SGeorge.Wilson@Sun.COM void 44311938SGeorge.Wilson@Sun.COM ddt_get_dedup_object_stats(spa_t *spa, ddt_object_t *ddo_total) 44410922SJeff.Bonwick@Sun.COM { 44511938SGeorge.Wilson@Sun.COM /* Sum the statistics we cached in ddt_object_sync(). */ 44610922SJeff.Bonwick@Sun.COM for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { 44710922SJeff.Bonwick@Sun.COM ddt_t *ddt = spa->spa_ddt[c]; 44810922SJeff.Bonwick@Sun.COM for (enum ddt_type type = 0; type < DDT_TYPES; type++) { 44910922SJeff.Bonwick@Sun.COM for (enum ddt_class class = 0; class < DDT_CLASSES; 45010922SJeff.Bonwick@Sun.COM class++) { 45111938SGeorge.Wilson@Sun.COM ddt_object_t *ddo = 45211938SGeorge.Wilson@Sun.COM &ddt->ddt_object_stats[type][class]; 45311938SGeorge.Wilson@Sun.COM ddo_total->ddo_count += ddo->ddo_count; 45411938SGeorge.Wilson@Sun.COM ddo_total->ddo_dspace += ddo->ddo_dspace; 45511938SGeorge.Wilson@Sun.COM ddo_total->ddo_mspace += ddo->ddo_mspace; 45611149SGeorge.Wilson@Sun.COM } 45711149SGeorge.Wilson@Sun.COM } 45811149SGeorge.Wilson@Sun.COM } 45911938SGeorge.Wilson@Sun.COM 46011938SGeorge.Wilson@Sun.COM /* ... and compute the averages. */ 46111938SGeorge.Wilson@Sun.COM if (ddo_total->ddo_count != 0) { 46211938SGeorge.Wilson@Sun.COM ddo_total->ddo_dspace /= ddo_total->ddo_count; 46311938SGeorge.Wilson@Sun.COM ddo_total->ddo_mspace /= ddo_total->ddo_count; 46411938SGeorge.Wilson@Sun.COM } else { 46511938SGeorge.Wilson@Sun.COM ASSERT(ddo_total->ddo_dspace == 0); 46611938SGeorge.Wilson@Sun.COM ASSERT(ddo_total->ddo_mspace == 0); 46711938SGeorge.Wilson@Sun.COM } 46811149SGeorge.Wilson@Sun.COM } 46911149SGeorge.Wilson@Sun.COM 47011149SGeorge.Wilson@Sun.COM void 47111149SGeorge.Wilson@Sun.COM ddt_get_dedup_histogram(spa_t *spa, ddt_histogram_t *ddh) 47211149SGeorge.Wilson@Sun.COM { 47311149SGeorge.Wilson@Sun.COM for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { 47411149SGeorge.Wilson@Sun.COM ddt_t *ddt = spa->spa_ddt[c]; 47511149SGeorge.Wilson@Sun.COM for (enum ddt_type type = 0; type < DDT_TYPES; type++) { 47611149SGeorge.Wilson@Sun.COM for (enum ddt_class class = 0; class < DDT_CLASSES; 47711149SGeorge.Wilson@Sun.COM class++) { 47811149SGeorge.Wilson@Sun.COM ddt_histogram_add(ddh, 47911938SGeorge.Wilson@Sun.COM &ddt->ddt_histogram_cache[type][class]); 48010922SJeff.Bonwick@Sun.COM } 48110922SJeff.Bonwick@Sun.COM } 48210922SJeff.Bonwick@Sun.COM } 48311149SGeorge.Wilson@Sun.COM } 48410922SJeff.Bonwick@Sun.COM 48511149SGeorge.Wilson@Sun.COM void 48611149SGeorge.Wilson@Sun.COM ddt_get_dedup_stats(spa_t *spa, ddt_stat_t *dds_total) 48711149SGeorge.Wilson@Sun.COM { 48811149SGeorge.Wilson@Sun.COM ddt_histogram_t *ddh_total; 48911149SGeorge.Wilson@Sun.COM 49011149SGeorge.Wilson@Sun.COM ddh_total = kmem_zalloc(sizeof (ddt_histogram_t), KM_SLEEP); 49111149SGeorge.Wilson@Sun.COM ddt_get_dedup_histogram(spa, ddh_total); 49211149SGeorge.Wilson@Sun.COM ddt_histogram_stat(dds_total, ddh_total); 49311149SGeorge.Wilson@Sun.COM kmem_free(ddh_total, sizeof (ddt_histogram_t)); 49410956SGeorge.Wilson@Sun.COM } 49510956SGeorge.Wilson@Sun.COM 49610956SGeorge.Wilson@Sun.COM uint64_t 49710956SGeorge.Wilson@Sun.COM ddt_get_dedup_dspace(spa_t *spa) 49810956SGeorge.Wilson@Sun.COM { 49910956SGeorge.Wilson@Sun.COM ddt_stat_t dds_total = { 0 }; 50010922SJeff.Bonwick@Sun.COM 50110956SGeorge.Wilson@Sun.COM ddt_get_dedup_stats(spa, &dds_total); 50210956SGeorge.Wilson@Sun.COM return (dds_total.dds_ref_dsize - dds_total.dds_dsize); 50310956SGeorge.Wilson@Sun.COM } 50410956SGeorge.Wilson@Sun.COM 50510956SGeorge.Wilson@Sun.COM uint64_t 50610956SGeorge.Wilson@Sun.COM ddt_get_pool_dedup_ratio(spa_t *spa) 50710956SGeorge.Wilson@Sun.COM { 50810956SGeorge.Wilson@Sun.COM ddt_stat_t dds_total = { 0 }; 50910956SGeorge.Wilson@Sun.COM 51010956SGeorge.Wilson@Sun.COM ddt_get_dedup_stats(spa, &dds_total); 51110922SJeff.Bonwick@Sun.COM if (dds_total.dds_dsize == 0) 51210922SJeff.Bonwick@Sun.COM return (100); 51310922SJeff.Bonwick@Sun.COM 51410922SJeff.Bonwick@Sun.COM return (dds_total.dds_ref_dsize * 100 / dds_total.dds_dsize); 51510922SJeff.Bonwick@Sun.COM } 51610922SJeff.Bonwick@Sun.COM 51710922SJeff.Bonwick@Sun.COM int 51810922SJeff.Bonwick@Sun.COM ddt_ditto_copies_needed(ddt_t *ddt, ddt_entry_t *dde, ddt_phys_t *ddp_willref) 51910922SJeff.Bonwick@Sun.COM { 52010922SJeff.Bonwick@Sun.COM spa_t *spa = ddt->ddt_spa; 52110922SJeff.Bonwick@Sun.COM uint64_t total_refcnt = 0; 52210922SJeff.Bonwick@Sun.COM uint64_t ditto = spa->spa_dedup_ditto; 52310922SJeff.Bonwick@Sun.COM int total_copies = 0; 52410922SJeff.Bonwick@Sun.COM int desired_copies = 0; 52510922SJeff.Bonwick@Sun.COM 52610922SJeff.Bonwick@Sun.COM for (int p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) { 52710922SJeff.Bonwick@Sun.COM ddt_phys_t *ddp = &dde->dde_phys[p]; 52810922SJeff.Bonwick@Sun.COM zio_t *zio = dde->dde_lead_zio[p]; 52910922SJeff.Bonwick@Sun.COM uint64_t refcnt = ddp->ddp_refcnt; /* committed refs */ 53010922SJeff.Bonwick@Sun.COM if (zio != NULL) 53110922SJeff.Bonwick@Sun.COM refcnt += zio->io_parent_count; /* pending refs */ 53210922SJeff.Bonwick@Sun.COM if (ddp == ddp_willref) 53310922SJeff.Bonwick@Sun.COM refcnt++; /* caller's ref */ 53410922SJeff.Bonwick@Sun.COM if (refcnt != 0) { 53510922SJeff.Bonwick@Sun.COM total_refcnt += refcnt; 53610922SJeff.Bonwick@Sun.COM total_copies += p; 53710922SJeff.Bonwick@Sun.COM } 53810922SJeff.Bonwick@Sun.COM } 53910922SJeff.Bonwick@Sun.COM 54010922SJeff.Bonwick@Sun.COM if (ditto == 0 || ditto > UINT32_MAX) 54110922SJeff.Bonwick@Sun.COM ditto = UINT32_MAX; 54210922SJeff.Bonwick@Sun.COM 54310922SJeff.Bonwick@Sun.COM if (total_refcnt >= 1) 54410922SJeff.Bonwick@Sun.COM desired_copies++; 54510922SJeff.Bonwick@Sun.COM if (total_refcnt >= ditto) 54610922SJeff.Bonwick@Sun.COM desired_copies++; 54710922SJeff.Bonwick@Sun.COM if (total_refcnt >= ditto * ditto) 54810922SJeff.Bonwick@Sun.COM desired_copies++; 54910922SJeff.Bonwick@Sun.COM 55010922SJeff.Bonwick@Sun.COM return (MAX(desired_copies, total_copies) - total_copies); 55110922SJeff.Bonwick@Sun.COM } 55210922SJeff.Bonwick@Sun.COM 55310922SJeff.Bonwick@Sun.COM int 55410922SJeff.Bonwick@Sun.COM ddt_ditto_copies_present(ddt_entry_t *dde) 55510922SJeff.Bonwick@Sun.COM { 55610922SJeff.Bonwick@Sun.COM ddt_phys_t *ddp = &dde->dde_phys[DDT_PHYS_DITTO]; 55710922SJeff.Bonwick@Sun.COM dva_t *dva = ddp->ddp_dva; 55810922SJeff.Bonwick@Sun.COM int copies = 0 - DVA_GET_GANG(dva); 55910922SJeff.Bonwick@Sun.COM 56010922SJeff.Bonwick@Sun.COM for (int d = 0; d < SPA_DVAS_PER_BP; d++, dva++) 56110922SJeff.Bonwick@Sun.COM if (DVA_IS_VALID(dva)) 56210922SJeff.Bonwick@Sun.COM copies++; 56310922SJeff.Bonwick@Sun.COM 56410922SJeff.Bonwick@Sun.COM ASSERT(copies >= 0 && copies < SPA_DVAS_PER_BP); 56510922SJeff.Bonwick@Sun.COM 56610922SJeff.Bonwick@Sun.COM return (copies); 56710922SJeff.Bonwick@Sun.COM } 56810922SJeff.Bonwick@Sun.COM 56910922SJeff.Bonwick@Sun.COM size_t 57010922SJeff.Bonwick@Sun.COM ddt_compress(void *src, uchar_t *dst, size_t s_len, size_t d_len) 57110922SJeff.Bonwick@Sun.COM { 57210922SJeff.Bonwick@Sun.COM uchar_t *version = dst++; 57310922SJeff.Bonwick@Sun.COM int cpfunc = ZIO_COMPRESS_ZLE; 57410922SJeff.Bonwick@Sun.COM zio_compress_info_t *ci = &zio_compress_table[cpfunc]; 57510922SJeff.Bonwick@Sun.COM size_t c_len; 57610922SJeff.Bonwick@Sun.COM 57710922SJeff.Bonwick@Sun.COM ASSERT(d_len >= s_len + 1); /* no compression plus version byte */ 57810922SJeff.Bonwick@Sun.COM 57910922SJeff.Bonwick@Sun.COM c_len = ci->ci_compress(src, dst, s_len, d_len - 1, ci->ci_level); 58010922SJeff.Bonwick@Sun.COM 58110922SJeff.Bonwick@Sun.COM if (c_len == s_len) { 58210922SJeff.Bonwick@Sun.COM cpfunc = ZIO_COMPRESS_OFF; 58310922SJeff.Bonwick@Sun.COM bcopy(src, dst, s_len); 58410922SJeff.Bonwick@Sun.COM } 58510922SJeff.Bonwick@Sun.COM 58610922SJeff.Bonwick@Sun.COM *version = (ZFS_HOST_BYTEORDER & DDT_COMPRESS_BYTEORDER_MASK) | cpfunc; 58710922SJeff.Bonwick@Sun.COM 58810922SJeff.Bonwick@Sun.COM return (c_len + 1); 58910922SJeff.Bonwick@Sun.COM } 59010922SJeff.Bonwick@Sun.COM 59110922SJeff.Bonwick@Sun.COM void 59210922SJeff.Bonwick@Sun.COM ddt_decompress(uchar_t *src, void *dst, size_t s_len, size_t d_len) 59310922SJeff.Bonwick@Sun.COM { 59410922SJeff.Bonwick@Sun.COM uchar_t version = *src++; 59510922SJeff.Bonwick@Sun.COM int cpfunc = version & DDT_COMPRESS_FUNCTION_MASK; 59610922SJeff.Bonwick@Sun.COM zio_compress_info_t *ci = &zio_compress_table[cpfunc]; 59710922SJeff.Bonwick@Sun.COM 59810922SJeff.Bonwick@Sun.COM if (ci->ci_decompress != NULL) 59910922SJeff.Bonwick@Sun.COM (void) ci->ci_decompress(src, dst, s_len, d_len, ci->ci_level); 60010922SJeff.Bonwick@Sun.COM else 60110922SJeff.Bonwick@Sun.COM bcopy(src, dst, d_len); 60210922SJeff.Bonwick@Sun.COM 60310922SJeff.Bonwick@Sun.COM if ((version ^ ZFS_HOST_BYTEORDER) & DDT_COMPRESS_BYTEORDER_MASK) 60410922SJeff.Bonwick@Sun.COM byteswap_uint64_array(dst, d_len); 60510922SJeff.Bonwick@Sun.COM } 60610922SJeff.Bonwick@Sun.COM 60710922SJeff.Bonwick@Sun.COM ddt_t * 60810922SJeff.Bonwick@Sun.COM ddt_select_by_checksum(spa_t *spa, enum zio_checksum c) 60910922SJeff.Bonwick@Sun.COM { 61010922SJeff.Bonwick@Sun.COM return (spa->spa_ddt[c]); 61110922SJeff.Bonwick@Sun.COM } 61210922SJeff.Bonwick@Sun.COM 61310922SJeff.Bonwick@Sun.COM ddt_t * 61410922SJeff.Bonwick@Sun.COM ddt_select(spa_t *spa, const blkptr_t *bp) 61510922SJeff.Bonwick@Sun.COM { 61610922SJeff.Bonwick@Sun.COM return (spa->spa_ddt[BP_GET_CHECKSUM(bp)]); 61710922SJeff.Bonwick@Sun.COM } 61810922SJeff.Bonwick@Sun.COM 61910922SJeff.Bonwick@Sun.COM void 62010922SJeff.Bonwick@Sun.COM ddt_enter(ddt_t *ddt) 62110922SJeff.Bonwick@Sun.COM { 62210922SJeff.Bonwick@Sun.COM mutex_enter(&ddt->ddt_lock); 62310922SJeff.Bonwick@Sun.COM } 62410922SJeff.Bonwick@Sun.COM 62510922SJeff.Bonwick@Sun.COM void 62610922SJeff.Bonwick@Sun.COM ddt_exit(ddt_t *ddt) 62710922SJeff.Bonwick@Sun.COM { 62810922SJeff.Bonwick@Sun.COM mutex_exit(&ddt->ddt_lock); 62910922SJeff.Bonwick@Sun.COM } 63010922SJeff.Bonwick@Sun.COM 63110922SJeff.Bonwick@Sun.COM static ddt_entry_t * 63210922SJeff.Bonwick@Sun.COM ddt_alloc(const ddt_key_t *ddk) 63310922SJeff.Bonwick@Sun.COM { 63410922SJeff.Bonwick@Sun.COM ddt_entry_t *dde; 63510922SJeff.Bonwick@Sun.COM 63610922SJeff.Bonwick@Sun.COM dde = kmem_zalloc(sizeof (ddt_entry_t), KM_SLEEP); 63710922SJeff.Bonwick@Sun.COM cv_init(&dde->dde_cv, NULL, CV_DEFAULT, NULL); 63810922SJeff.Bonwick@Sun.COM 63910922SJeff.Bonwick@Sun.COM dde->dde_key = *ddk; 64010922SJeff.Bonwick@Sun.COM 64110922SJeff.Bonwick@Sun.COM return (dde); 64210922SJeff.Bonwick@Sun.COM } 64310922SJeff.Bonwick@Sun.COM 64410922SJeff.Bonwick@Sun.COM static void 64510922SJeff.Bonwick@Sun.COM ddt_free(ddt_entry_t *dde) 64610922SJeff.Bonwick@Sun.COM { 64710922SJeff.Bonwick@Sun.COM ASSERT(!dde->dde_loading); 64810922SJeff.Bonwick@Sun.COM 64910922SJeff.Bonwick@Sun.COM for (int p = 0; p < DDT_PHYS_TYPES; p++) 65010922SJeff.Bonwick@Sun.COM ASSERT(dde->dde_lead_zio[p] == NULL); 65110922SJeff.Bonwick@Sun.COM 65210922SJeff.Bonwick@Sun.COM if (dde->dde_repair_data != NULL) 65310922SJeff.Bonwick@Sun.COM zio_buf_free(dde->dde_repair_data, 65410922SJeff.Bonwick@Sun.COM DDK_GET_PSIZE(&dde->dde_key)); 65510922SJeff.Bonwick@Sun.COM 65610922SJeff.Bonwick@Sun.COM cv_destroy(&dde->dde_cv); 65710922SJeff.Bonwick@Sun.COM kmem_free(dde, sizeof (*dde)); 65810922SJeff.Bonwick@Sun.COM } 65910922SJeff.Bonwick@Sun.COM 66010922SJeff.Bonwick@Sun.COM void 66110922SJeff.Bonwick@Sun.COM ddt_remove(ddt_t *ddt, ddt_entry_t *dde) 66210922SJeff.Bonwick@Sun.COM { 66310922SJeff.Bonwick@Sun.COM ASSERT(MUTEX_HELD(&ddt->ddt_lock)); 66410922SJeff.Bonwick@Sun.COM 66510922SJeff.Bonwick@Sun.COM avl_remove(&ddt->ddt_tree, dde); 66610922SJeff.Bonwick@Sun.COM ddt_free(dde); 66710922SJeff.Bonwick@Sun.COM } 66810922SJeff.Bonwick@Sun.COM 66910922SJeff.Bonwick@Sun.COM ddt_entry_t * 67010922SJeff.Bonwick@Sun.COM ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t add) 67110922SJeff.Bonwick@Sun.COM { 67210922SJeff.Bonwick@Sun.COM ddt_entry_t *dde, dde_search; 67310922SJeff.Bonwick@Sun.COM enum ddt_type type; 67410922SJeff.Bonwick@Sun.COM enum ddt_class class; 67510922SJeff.Bonwick@Sun.COM avl_index_t where; 67610922SJeff.Bonwick@Sun.COM int error; 67710922SJeff.Bonwick@Sun.COM 67810922SJeff.Bonwick@Sun.COM ASSERT(MUTEX_HELD(&ddt->ddt_lock)); 67910922SJeff.Bonwick@Sun.COM 68010922SJeff.Bonwick@Sun.COM ddt_key_fill(&dde_search.dde_key, bp); 68110922SJeff.Bonwick@Sun.COM 68210922SJeff.Bonwick@Sun.COM dde = avl_find(&ddt->ddt_tree, &dde_search, &where); 68310922SJeff.Bonwick@Sun.COM if (dde == NULL) { 68410922SJeff.Bonwick@Sun.COM if (!add) 68510922SJeff.Bonwick@Sun.COM return (NULL); 68610922SJeff.Bonwick@Sun.COM dde = ddt_alloc(&dde_search.dde_key); 68710922SJeff.Bonwick@Sun.COM avl_insert(&ddt->ddt_tree, dde, where); 68810922SJeff.Bonwick@Sun.COM } 68910922SJeff.Bonwick@Sun.COM 69010922SJeff.Bonwick@Sun.COM while (dde->dde_loading) 69110922SJeff.Bonwick@Sun.COM cv_wait(&dde->dde_cv, &ddt->ddt_lock); 69210922SJeff.Bonwick@Sun.COM 69310922SJeff.Bonwick@Sun.COM if (dde->dde_loaded) 69410922SJeff.Bonwick@Sun.COM return (dde); 69510922SJeff.Bonwick@Sun.COM 69610922SJeff.Bonwick@Sun.COM dde->dde_loading = B_TRUE; 69710922SJeff.Bonwick@Sun.COM 69810922SJeff.Bonwick@Sun.COM ddt_exit(ddt); 69910922SJeff.Bonwick@Sun.COM 70010922SJeff.Bonwick@Sun.COM error = ENOENT; 70110922SJeff.Bonwick@Sun.COM 70210922SJeff.Bonwick@Sun.COM for (type = 0; type < DDT_TYPES; type++) { 70310922SJeff.Bonwick@Sun.COM for (class = 0; class < DDT_CLASSES; class++) { 70410922SJeff.Bonwick@Sun.COM error = ddt_object_lookup(ddt, type, class, dde); 70510922SJeff.Bonwick@Sun.COM if (error != ENOENT) 70610922SJeff.Bonwick@Sun.COM break; 70710922SJeff.Bonwick@Sun.COM } 70810922SJeff.Bonwick@Sun.COM if (error != ENOENT) 70910922SJeff.Bonwick@Sun.COM break; 71010922SJeff.Bonwick@Sun.COM } 71110922SJeff.Bonwick@Sun.COM 71210922SJeff.Bonwick@Sun.COM ASSERT(error == 0 || error == ENOENT); 71310922SJeff.Bonwick@Sun.COM 71410922SJeff.Bonwick@Sun.COM ddt_enter(ddt); 71510922SJeff.Bonwick@Sun.COM 71610922SJeff.Bonwick@Sun.COM ASSERT(dde->dde_loaded == B_FALSE); 71710922SJeff.Bonwick@Sun.COM ASSERT(dde->dde_loading == B_TRUE); 71810922SJeff.Bonwick@Sun.COM 71910922SJeff.Bonwick@Sun.COM dde->dde_type = type; /* will be DDT_TYPES if no entry found */ 72010922SJeff.Bonwick@Sun.COM dde->dde_class = class; /* will be DDT_CLASSES if no entry found */ 72110922SJeff.Bonwick@Sun.COM dde->dde_loaded = B_TRUE; 72210922SJeff.Bonwick@Sun.COM dde->dde_loading = B_FALSE; 72310922SJeff.Bonwick@Sun.COM 72410922SJeff.Bonwick@Sun.COM if (error == 0) 72510922SJeff.Bonwick@Sun.COM ddt_stat_update(ddt, dde, -1ULL); 72610922SJeff.Bonwick@Sun.COM 72710922SJeff.Bonwick@Sun.COM cv_broadcast(&dde->dde_cv); 72810922SJeff.Bonwick@Sun.COM 72910922SJeff.Bonwick@Sun.COM return (dde); 73010922SJeff.Bonwick@Sun.COM } 73110922SJeff.Bonwick@Sun.COM 73212450SGeorge.Wilson@Sun.COM void 73312450SGeorge.Wilson@Sun.COM ddt_prefetch(spa_t *spa, const blkptr_t *bp) 73412450SGeorge.Wilson@Sun.COM { 73512450SGeorge.Wilson@Sun.COM ddt_t *ddt; 73612450SGeorge.Wilson@Sun.COM ddt_entry_t dde; 73712450SGeorge.Wilson@Sun.COM 738*12587SGeorge.Wilson@Sun.COM if (!zfs_dedup_prefetch || bp == NULL || !BP_GET_DEDUP(bp)) 73912450SGeorge.Wilson@Sun.COM return; 74012450SGeorge.Wilson@Sun.COM 74112450SGeorge.Wilson@Sun.COM /* 74212450SGeorge.Wilson@Sun.COM * We remove the DDT once it's empty and only prefetch dedup blocks 74312450SGeorge.Wilson@Sun.COM * when there are entries in the DDT. Thus no locking is required 74412450SGeorge.Wilson@Sun.COM * as the DDT can't disappear on us. 74512450SGeorge.Wilson@Sun.COM */ 74612450SGeorge.Wilson@Sun.COM ddt = ddt_select(spa, bp); 74712450SGeorge.Wilson@Sun.COM ddt_key_fill(&dde.dde_key, bp); 74812450SGeorge.Wilson@Sun.COM 74912450SGeorge.Wilson@Sun.COM for (enum ddt_type type = 0; type < DDT_TYPES; type++) { 75012450SGeorge.Wilson@Sun.COM for (enum ddt_class class = 0; class < DDT_CLASSES; class++) { 75112450SGeorge.Wilson@Sun.COM ddt_object_prefetch(ddt, type, class, &dde); 75212450SGeorge.Wilson@Sun.COM } 75312450SGeorge.Wilson@Sun.COM } 75412450SGeorge.Wilson@Sun.COM } 75512450SGeorge.Wilson@Sun.COM 75610922SJeff.Bonwick@Sun.COM int 75710922SJeff.Bonwick@Sun.COM ddt_entry_compare(const void *x1, const void *x2) 75810922SJeff.Bonwick@Sun.COM { 75910922SJeff.Bonwick@Sun.COM const ddt_entry_t *dde1 = x1; 76010922SJeff.Bonwick@Sun.COM const ddt_entry_t *dde2 = x2; 76110922SJeff.Bonwick@Sun.COM const uint64_t *u1 = (const uint64_t *)&dde1->dde_key; 76210922SJeff.Bonwick@Sun.COM const uint64_t *u2 = (const uint64_t *)&dde2->dde_key; 76310922SJeff.Bonwick@Sun.COM 76410922SJeff.Bonwick@Sun.COM for (int i = 0; i < DDT_KEY_WORDS; i++) { 76510922SJeff.Bonwick@Sun.COM if (u1[i] < u2[i]) 76610922SJeff.Bonwick@Sun.COM return (-1); 76710922SJeff.Bonwick@Sun.COM if (u1[i] > u2[i]) 76810922SJeff.Bonwick@Sun.COM return (1); 76910922SJeff.Bonwick@Sun.COM } 77010922SJeff.Bonwick@Sun.COM 77110922SJeff.Bonwick@Sun.COM return (0); 77210922SJeff.Bonwick@Sun.COM } 77310922SJeff.Bonwick@Sun.COM 77410922SJeff.Bonwick@Sun.COM static ddt_t * 77510922SJeff.Bonwick@Sun.COM ddt_table_alloc(spa_t *spa, enum zio_checksum c) 77610922SJeff.Bonwick@Sun.COM { 77710922SJeff.Bonwick@Sun.COM ddt_t *ddt; 77810922SJeff.Bonwick@Sun.COM 77910922SJeff.Bonwick@Sun.COM ddt = kmem_zalloc(sizeof (*ddt), KM_SLEEP); 78010922SJeff.Bonwick@Sun.COM 78110922SJeff.Bonwick@Sun.COM mutex_init(&ddt->ddt_lock, NULL, MUTEX_DEFAULT, NULL); 78210922SJeff.Bonwick@Sun.COM avl_create(&ddt->ddt_tree, ddt_entry_compare, 78310922SJeff.Bonwick@Sun.COM sizeof (ddt_entry_t), offsetof(ddt_entry_t, dde_node)); 78410922SJeff.Bonwick@Sun.COM avl_create(&ddt->ddt_repair_tree, ddt_entry_compare, 78510922SJeff.Bonwick@Sun.COM sizeof (ddt_entry_t), offsetof(ddt_entry_t, dde_node)); 78610922SJeff.Bonwick@Sun.COM ddt->ddt_checksum = c; 78710922SJeff.Bonwick@Sun.COM ddt->ddt_spa = spa; 78810922SJeff.Bonwick@Sun.COM ddt->ddt_os = spa->spa_meta_objset; 78910922SJeff.Bonwick@Sun.COM 79010922SJeff.Bonwick@Sun.COM return (ddt); 79110922SJeff.Bonwick@Sun.COM } 79210922SJeff.Bonwick@Sun.COM 79310922SJeff.Bonwick@Sun.COM static void 79410922SJeff.Bonwick@Sun.COM ddt_table_free(ddt_t *ddt) 79510922SJeff.Bonwick@Sun.COM { 79610922SJeff.Bonwick@Sun.COM ASSERT(avl_numnodes(&ddt->ddt_tree) == 0); 79710922SJeff.Bonwick@Sun.COM ASSERT(avl_numnodes(&ddt->ddt_repair_tree) == 0); 79810922SJeff.Bonwick@Sun.COM avl_destroy(&ddt->ddt_tree); 79910922SJeff.Bonwick@Sun.COM avl_destroy(&ddt->ddt_repair_tree); 80010922SJeff.Bonwick@Sun.COM mutex_destroy(&ddt->ddt_lock); 80110922SJeff.Bonwick@Sun.COM kmem_free(ddt, sizeof (*ddt)); 80210922SJeff.Bonwick@Sun.COM } 80310922SJeff.Bonwick@Sun.COM 80410922SJeff.Bonwick@Sun.COM void 80510922SJeff.Bonwick@Sun.COM ddt_create(spa_t *spa) 80610922SJeff.Bonwick@Sun.COM { 80710922SJeff.Bonwick@Sun.COM spa->spa_dedup_checksum = ZIO_DEDUPCHECKSUM; 80810922SJeff.Bonwick@Sun.COM 80910922SJeff.Bonwick@Sun.COM for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) 81010922SJeff.Bonwick@Sun.COM spa->spa_ddt[c] = ddt_table_alloc(spa, c); 81110922SJeff.Bonwick@Sun.COM } 81210922SJeff.Bonwick@Sun.COM 81310922SJeff.Bonwick@Sun.COM int 81410922SJeff.Bonwick@Sun.COM ddt_load(spa_t *spa) 81510922SJeff.Bonwick@Sun.COM { 81610922SJeff.Bonwick@Sun.COM int error; 81710922SJeff.Bonwick@Sun.COM 81810922SJeff.Bonwick@Sun.COM ddt_create(spa); 81910922SJeff.Bonwick@Sun.COM 82010922SJeff.Bonwick@Sun.COM error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 82110922SJeff.Bonwick@Sun.COM DMU_POOL_DDT_STATS, sizeof (uint64_t), 1, 82210922SJeff.Bonwick@Sun.COM &spa->spa_ddt_stat_object); 82310922SJeff.Bonwick@Sun.COM 82410922SJeff.Bonwick@Sun.COM if (error) 82510922SJeff.Bonwick@Sun.COM return (error == ENOENT ? 0 : error); 82610922SJeff.Bonwick@Sun.COM 82710922SJeff.Bonwick@Sun.COM for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { 82811938SGeorge.Wilson@Sun.COM ddt_t *ddt = spa->spa_ddt[c]; 82910922SJeff.Bonwick@Sun.COM for (enum ddt_type type = 0; type < DDT_TYPES; type++) { 83010922SJeff.Bonwick@Sun.COM for (enum ddt_class class = 0; class < DDT_CLASSES; 83110922SJeff.Bonwick@Sun.COM class++) { 83210922SJeff.Bonwick@Sun.COM error = ddt_object_load(ddt, type, class); 83310922SJeff.Bonwick@Sun.COM if (error != 0 && error != ENOENT) 83410922SJeff.Bonwick@Sun.COM return (error); 83510922SJeff.Bonwick@Sun.COM } 83610922SJeff.Bonwick@Sun.COM } 83711938SGeorge.Wilson@Sun.COM 83811938SGeorge.Wilson@Sun.COM /* 83911938SGeorge.Wilson@Sun.COM * Seed the cached histograms. 84011938SGeorge.Wilson@Sun.COM */ 84111938SGeorge.Wilson@Sun.COM bcopy(ddt->ddt_histogram, &ddt->ddt_histogram_cache, 84211938SGeorge.Wilson@Sun.COM sizeof (ddt->ddt_histogram)); 84310922SJeff.Bonwick@Sun.COM } 84410922SJeff.Bonwick@Sun.COM 84510922SJeff.Bonwick@Sun.COM return (0); 84610922SJeff.Bonwick@Sun.COM } 84710922SJeff.Bonwick@Sun.COM 84810922SJeff.Bonwick@Sun.COM void 84910922SJeff.Bonwick@Sun.COM ddt_unload(spa_t *spa) 85010922SJeff.Bonwick@Sun.COM { 85110922SJeff.Bonwick@Sun.COM for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { 85210922SJeff.Bonwick@Sun.COM if (spa->spa_ddt[c]) { 85310922SJeff.Bonwick@Sun.COM ddt_table_free(spa->spa_ddt[c]); 85410922SJeff.Bonwick@Sun.COM spa->spa_ddt[c] = NULL; 85510922SJeff.Bonwick@Sun.COM } 85610922SJeff.Bonwick@Sun.COM } 85710922SJeff.Bonwick@Sun.COM } 85810922SJeff.Bonwick@Sun.COM 85911125SJeff.Bonwick@Sun.COM boolean_t 86011125SJeff.Bonwick@Sun.COM ddt_class_contains(spa_t *spa, enum ddt_class max_class, const blkptr_t *bp) 86111125SJeff.Bonwick@Sun.COM { 86211125SJeff.Bonwick@Sun.COM ddt_t *ddt; 86311125SJeff.Bonwick@Sun.COM ddt_entry_t dde; 86411125SJeff.Bonwick@Sun.COM 86511125SJeff.Bonwick@Sun.COM if (!BP_GET_DEDUP(bp)) 86611125SJeff.Bonwick@Sun.COM return (B_FALSE); 86711125SJeff.Bonwick@Sun.COM 86811125SJeff.Bonwick@Sun.COM if (max_class == DDT_CLASS_UNIQUE) 86911125SJeff.Bonwick@Sun.COM return (B_TRUE); 87011125SJeff.Bonwick@Sun.COM 87111125SJeff.Bonwick@Sun.COM ddt = spa->spa_ddt[BP_GET_CHECKSUM(bp)]; 87211125SJeff.Bonwick@Sun.COM 87311125SJeff.Bonwick@Sun.COM ddt_key_fill(&dde.dde_key, bp); 87411125SJeff.Bonwick@Sun.COM 87511125SJeff.Bonwick@Sun.COM for (enum ddt_type type = 0; type < DDT_TYPES; type++) 87611125SJeff.Bonwick@Sun.COM for (enum ddt_class class = 0; class <= max_class; class++) 87711125SJeff.Bonwick@Sun.COM if (ddt_object_lookup(ddt, type, class, &dde) == 0) 87811125SJeff.Bonwick@Sun.COM return (B_TRUE); 87911125SJeff.Bonwick@Sun.COM 88011125SJeff.Bonwick@Sun.COM return (B_FALSE); 88111125SJeff.Bonwick@Sun.COM } 88211125SJeff.Bonwick@Sun.COM 88310922SJeff.Bonwick@Sun.COM ddt_entry_t * 88410922SJeff.Bonwick@Sun.COM ddt_repair_start(ddt_t *ddt, const blkptr_t *bp) 88510922SJeff.Bonwick@Sun.COM { 88610922SJeff.Bonwick@Sun.COM ddt_key_t ddk; 88710922SJeff.Bonwick@Sun.COM ddt_entry_t *dde; 88810922SJeff.Bonwick@Sun.COM 88910922SJeff.Bonwick@Sun.COM ddt_key_fill(&ddk, bp); 89010922SJeff.Bonwick@Sun.COM 89110922SJeff.Bonwick@Sun.COM dde = ddt_alloc(&ddk); 89210922SJeff.Bonwick@Sun.COM 89310922SJeff.Bonwick@Sun.COM for (enum ddt_type type = 0; type < DDT_TYPES; type++) { 89410922SJeff.Bonwick@Sun.COM for (enum ddt_class class = 0; class < DDT_CLASSES; class++) { 89510922SJeff.Bonwick@Sun.COM /* 89610922SJeff.Bonwick@Sun.COM * We can only do repair if there are multiple copies 89710922SJeff.Bonwick@Sun.COM * of the block. For anything in the UNIQUE class, 89810922SJeff.Bonwick@Sun.COM * there's definitely only one copy, so don't even try. 89910922SJeff.Bonwick@Sun.COM */ 90010922SJeff.Bonwick@Sun.COM if (class != DDT_CLASS_UNIQUE && 90110922SJeff.Bonwick@Sun.COM ddt_object_lookup(ddt, type, class, dde) == 0) 90210922SJeff.Bonwick@Sun.COM return (dde); 90310922SJeff.Bonwick@Sun.COM } 90410922SJeff.Bonwick@Sun.COM } 90510922SJeff.Bonwick@Sun.COM 90610922SJeff.Bonwick@Sun.COM bzero(dde->dde_phys, sizeof (dde->dde_phys)); 90710922SJeff.Bonwick@Sun.COM 90810922SJeff.Bonwick@Sun.COM return (dde); 90910922SJeff.Bonwick@Sun.COM } 91010922SJeff.Bonwick@Sun.COM 91110922SJeff.Bonwick@Sun.COM void 91210922SJeff.Bonwick@Sun.COM ddt_repair_done(ddt_t *ddt, ddt_entry_t *dde) 91310922SJeff.Bonwick@Sun.COM { 91410922SJeff.Bonwick@Sun.COM avl_index_t where; 91510922SJeff.Bonwick@Sun.COM 91610922SJeff.Bonwick@Sun.COM ddt_enter(ddt); 91710922SJeff.Bonwick@Sun.COM 91810922SJeff.Bonwick@Sun.COM if (dde->dde_repair_data != NULL && spa_writeable(ddt->ddt_spa) && 91910922SJeff.Bonwick@Sun.COM avl_find(&ddt->ddt_repair_tree, dde, &where) == NULL) 92010922SJeff.Bonwick@Sun.COM avl_insert(&ddt->ddt_repair_tree, dde, where); 92110922SJeff.Bonwick@Sun.COM else 92210922SJeff.Bonwick@Sun.COM ddt_free(dde); 92310922SJeff.Bonwick@Sun.COM 92410922SJeff.Bonwick@Sun.COM ddt_exit(ddt); 92510922SJeff.Bonwick@Sun.COM } 92610922SJeff.Bonwick@Sun.COM 92710922SJeff.Bonwick@Sun.COM static void 92810922SJeff.Bonwick@Sun.COM ddt_repair_entry_done(zio_t *zio) 92910922SJeff.Bonwick@Sun.COM { 93010922SJeff.Bonwick@Sun.COM ddt_entry_t *rdde = zio->io_private; 93110922SJeff.Bonwick@Sun.COM 93210922SJeff.Bonwick@Sun.COM ddt_free(rdde); 93310922SJeff.Bonwick@Sun.COM } 93410922SJeff.Bonwick@Sun.COM 93510922SJeff.Bonwick@Sun.COM static void 93610922SJeff.Bonwick@Sun.COM ddt_repair_entry(ddt_t *ddt, ddt_entry_t *dde, ddt_entry_t *rdde, zio_t *rio) 93710922SJeff.Bonwick@Sun.COM { 93810922SJeff.Bonwick@Sun.COM ddt_phys_t *ddp = dde->dde_phys; 93910922SJeff.Bonwick@Sun.COM ddt_phys_t *rddp = rdde->dde_phys; 94010922SJeff.Bonwick@Sun.COM ddt_key_t *ddk = &dde->dde_key; 94110922SJeff.Bonwick@Sun.COM ddt_key_t *rddk = &rdde->dde_key; 94210922SJeff.Bonwick@Sun.COM zio_t *zio; 94310922SJeff.Bonwick@Sun.COM blkptr_t blk; 94410922SJeff.Bonwick@Sun.COM 94510922SJeff.Bonwick@Sun.COM zio = zio_null(rio, rio->io_spa, NULL, 94610922SJeff.Bonwick@Sun.COM ddt_repair_entry_done, rdde, rio->io_flags); 94710922SJeff.Bonwick@Sun.COM 94810922SJeff.Bonwick@Sun.COM for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++, rddp++) { 94910922SJeff.Bonwick@Sun.COM if (ddp->ddp_phys_birth == 0 || 95010922SJeff.Bonwick@Sun.COM ddp->ddp_phys_birth != rddp->ddp_phys_birth || 95110922SJeff.Bonwick@Sun.COM bcmp(ddp->ddp_dva, rddp->ddp_dva, sizeof (ddp->ddp_dva))) 95210922SJeff.Bonwick@Sun.COM continue; 95311125SJeff.Bonwick@Sun.COM ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk); 95410922SJeff.Bonwick@Sun.COM zio_nowait(zio_rewrite(zio, zio->io_spa, 0, &blk, 95510922SJeff.Bonwick@Sun.COM rdde->dde_repair_data, DDK_GET_PSIZE(rddk), NULL, NULL, 95610922SJeff.Bonwick@Sun.COM ZIO_PRIORITY_SYNC_WRITE, ZIO_DDT_CHILD_FLAGS(zio), NULL)); 95710922SJeff.Bonwick@Sun.COM } 95810922SJeff.Bonwick@Sun.COM 95910922SJeff.Bonwick@Sun.COM zio_nowait(zio); 96010922SJeff.Bonwick@Sun.COM } 96110922SJeff.Bonwick@Sun.COM 96210922SJeff.Bonwick@Sun.COM static void 96310922SJeff.Bonwick@Sun.COM ddt_repair_table(ddt_t *ddt, zio_t *rio) 96410922SJeff.Bonwick@Sun.COM { 96510922SJeff.Bonwick@Sun.COM spa_t *spa = ddt->ddt_spa; 96610922SJeff.Bonwick@Sun.COM ddt_entry_t *dde, *rdde_next, *rdde; 96710922SJeff.Bonwick@Sun.COM avl_tree_t *t = &ddt->ddt_repair_tree; 96810922SJeff.Bonwick@Sun.COM blkptr_t blk; 96910922SJeff.Bonwick@Sun.COM 97010922SJeff.Bonwick@Sun.COM if (spa_sync_pass(spa) > 1) 97110922SJeff.Bonwick@Sun.COM return; 97210922SJeff.Bonwick@Sun.COM 97310922SJeff.Bonwick@Sun.COM ddt_enter(ddt); 97410922SJeff.Bonwick@Sun.COM for (rdde = avl_first(t); rdde != NULL; rdde = rdde_next) { 97510922SJeff.Bonwick@Sun.COM rdde_next = AVL_NEXT(t, rdde); 97610922SJeff.Bonwick@Sun.COM avl_remove(&ddt->ddt_repair_tree, rdde); 97710922SJeff.Bonwick@Sun.COM ddt_exit(ddt); 97811125SJeff.Bonwick@Sun.COM ddt_bp_create(ddt->ddt_checksum, &rdde->dde_key, NULL, &blk); 97910922SJeff.Bonwick@Sun.COM dde = ddt_repair_start(ddt, &blk); 98010922SJeff.Bonwick@Sun.COM ddt_repair_entry(ddt, dde, rdde, rio); 98110922SJeff.Bonwick@Sun.COM ddt_repair_done(ddt, dde); 98210922SJeff.Bonwick@Sun.COM ddt_enter(ddt); 98310922SJeff.Bonwick@Sun.COM } 98410922SJeff.Bonwick@Sun.COM ddt_exit(ddt); 98510922SJeff.Bonwick@Sun.COM } 98610922SJeff.Bonwick@Sun.COM 98710922SJeff.Bonwick@Sun.COM static void 98810922SJeff.Bonwick@Sun.COM ddt_sync_entry(ddt_t *ddt, ddt_entry_t *dde, dmu_tx_t *tx, uint64_t txg) 98910922SJeff.Bonwick@Sun.COM { 99011125SJeff.Bonwick@Sun.COM dsl_pool_t *dp = ddt->ddt_spa->spa_dsl_pool; 99110922SJeff.Bonwick@Sun.COM ddt_phys_t *ddp = dde->dde_phys; 99210922SJeff.Bonwick@Sun.COM ddt_key_t *ddk = &dde->dde_key; 99310922SJeff.Bonwick@Sun.COM enum ddt_type otype = dde->dde_type; 99410922SJeff.Bonwick@Sun.COM enum ddt_type ntype = DDT_TYPE_CURRENT; 99510922SJeff.Bonwick@Sun.COM enum ddt_class oclass = dde->dde_class; 99610922SJeff.Bonwick@Sun.COM enum ddt_class nclass; 99710922SJeff.Bonwick@Sun.COM uint64_t total_refcnt = 0; 99810922SJeff.Bonwick@Sun.COM 99910922SJeff.Bonwick@Sun.COM ASSERT(dde->dde_loaded); 100010922SJeff.Bonwick@Sun.COM ASSERT(!dde->dde_loading); 100110922SJeff.Bonwick@Sun.COM 100210922SJeff.Bonwick@Sun.COM for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { 100310922SJeff.Bonwick@Sun.COM ASSERT(dde->dde_lead_zio[p] == NULL); 100410922SJeff.Bonwick@Sun.COM ASSERT((int64_t)ddp->ddp_refcnt >= 0); 100510922SJeff.Bonwick@Sun.COM if (ddp->ddp_phys_birth == 0) { 100610922SJeff.Bonwick@Sun.COM ASSERT(ddp->ddp_refcnt == 0); 100710922SJeff.Bonwick@Sun.COM continue; 100810922SJeff.Bonwick@Sun.COM } 100910922SJeff.Bonwick@Sun.COM if (p == DDT_PHYS_DITTO) { 101010922SJeff.Bonwick@Sun.COM if (ddt_ditto_copies_needed(ddt, dde, NULL) == 0) 101110922SJeff.Bonwick@Sun.COM ddt_phys_free(ddt, ddk, ddp, txg); 101210922SJeff.Bonwick@Sun.COM continue; 101310922SJeff.Bonwick@Sun.COM } 101410922SJeff.Bonwick@Sun.COM if (ddp->ddp_refcnt == 0) 101510922SJeff.Bonwick@Sun.COM ddt_phys_free(ddt, ddk, ddp, txg); 101610922SJeff.Bonwick@Sun.COM total_refcnt += ddp->ddp_refcnt; 101710922SJeff.Bonwick@Sun.COM } 101810922SJeff.Bonwick@Sun.COM 101910922SJeff.Bonwick@Sun.COM if (dde->dde_phys[DDT_PHYS_DITTO].ddp_phys_birth != 0) 102010922SJeff.Bonwick@Sun.COM nclass = DDT_CLASS_DITTO; 102110922SJeff.Bonwick@Sun.COM else if (total_refcnt > 1) 102210922SJeff.Bonwick@Sun.COM nclass = DDT_CLASS_DUPLICATE; 102310922SJeff.Bonwick@Sun.COM else 102410922SJeff.Bonwick@Sun.COM nclass = DDT_CLASS_UNIQUE; 102510922SJeff.Bonwick@Sun.COM 102610922SJeff.Bonwick@Sun.COM if (otype != DDT_TYPES && 102710922SJeff.Bonwick@Sun.COM (otype != ntype || oclass != nclass || total_refcnt == 0)) { 102810922SJeff.Bonwick@Sun.COM VERIFY(ddt_object_remove(ddt, otype, oclass, dde, tx) == 0); 102910922SJeff.Bonwick@Sun.COM ASSERT(ddt_object_lookup(ddt, otype, oclass, dde) == ENOENT); 103010922SJeff.Bonwick@Sun.COM } 103110922SJeff.Bonwick@Sun.COM 103210922SJeff.Bonwick@Sun.COM if (total_refcnt != 0) { 103310922SJeff.Bonwick@Sun.COM dde->dde_type = ntype; 103410922SJeff.Bonwick@Sun.COM dde->dde_class = nclass; 103510922SJeff.Bonwick@Sun.COM ddt_stat_update(ddt, dde, 0); 103610922SJeff.Bonwick@Sun.COM if (!ddt_object_exists(ddt, ntype, nclass)) 103710922SJeff.Bonwick@Sun.COM ddt_object_create(ddt, ntype, nclass, tx); 103810922SJeff.Bonwick@Sun.COM VERIFY(ddt_object_update(ddt, ntype, nclass, dde, tx) == 0); 103911125SJeff.Bonwick@Sun.COM 104012296SLin.Ling@Sun.COM /* 104112296SLin.Ling@Sun.COM * If the class changes, the order that we scan this bp 104212296SLin.Ling@Sun.COM * changes. If it decreases, we could miss it, so 104312296SLin.Ling@Sun.COM * scan it right now. (This covers both class changing 104412296SLin.Ling@Sun.COM * while we are doing ddt_walk(), and when we are 104512296SLin.Ling@Sun.COM * traversing.) 104612296SLin.Ling@Sun.COM */ 104712296SLin.Ling@Sun.COM if (nclass < oclass) { 104812296SLin.Ling@Sun.COM dsl_scan_ddt_entry(dp->dp_scan, 104912296SLin.Ling@Sun.COM ddt->ddt_checksum, dde, tx); 105012296SLin.Ling@Sun.COM } 105110922SJeff.Bonwick@Sun.COM } 105210922SJeff.Bonwick@Sun.COM } 105310922SJeff.Bonwick@Sun.COM 105410922SJeff.Bonwick@Sun.COM static void 105510922SJeff.Bonwick@Sun.COM ddt_sync_table(ddt_t *ddt, dmu_tx_t *tx, uint64_t txg) 105610922SJeff.Bonwick@Sun.COM { 105710922SJeff.Bonwick@Sun.COM spa_t *spa = ddt->ddt_spa; 105810922SJeff.Bonwick@Sun.COM ddt_entry_t *dde; 105910922SJeff.Bonwick@Sun.COM void *cookie = NULL; 106010922SJeff.Bonwick@Sun.COM 106110922SJeff.Bonwick@Sun.COM if (avl_numnodes(&ddt->ddt_tree) == 0) 106210922SJeff.Bonwick@Sun.COM return; 106310922SJeff.Bonwick@Sun.COM 106410922SJeff.Bonwick@Sun.COM ASSERT(spa->spa_uberblock.ub_version >= SPA_VERSION_DEDUP); 106510922SJeff.Bonwick@Sun.COM 106610922SJeff.Bonwick@Sun.COM if (spa->spa_ddt_stat_object == 0) { 106710922SJeff.Bonwick@Sun.COM spa->spa_ddt_stat_object = zap_create(ddt->ddt_os, 106810922SJeff.Bonwick@Sun.COM DMU_OT_DDT_STATS, DMU_OT_NONE, 0, tx); 106910922SJeff.Bonwick@Sun.COM VERIFY(zap_add(ddt->ddt_os, DMU_POOL_DIRECTORY_OBJECT, 107010922SJeff.Bonwick@Sun.COM DMU_POOL_DDT_STATS, sizeof (uint64_t), 1, 107110922SJeff.Bonwick@Sun.COM &spa->spa_ddt_stat_object, tx) == 0); 107210922SJeff.Bonwick@Sun.COM } 107310922SJeff.Bonwick@Sun.COM 107410922SJeff.Bonwick@Sun.COM while ((dde = avl_destroy_nodes(&ddt->ddt_tree, &cookie)) != NULL) { 107510922SJeff.Bonwick@Sun.COM ddt_sync_entry(ddt, dde, tx, txg); 107610922SJeff.Bonwick@Sun.COM ddt_free(dde); 107710922SJeff.Bonwick@Sun.COM } 107810922SJeff.Bonwick@Sun.COM 107910922SJeff.Bonwick@Sun.COM for (enum ddt_type type = 0; type < DDT_TYPES; type++) { 108010922SJeff.Bonwick@Sun.COM for (enum ddt_class class = 0; class < DDT_CLASSES; class++) { 108110922SJeff.Bonwick@Sun.COM if (!ddt_object_exists(ddt, type, class)) 108210922SJeff.Bonwick@Sun.COM continue; 108310922SJeff.Bonwick@Sun.COM ddt_object_sync(ddt, type, class, tx); 108410922SJeff.Bonwick@Sun.COM if (ddt_object_count(ddt, type, class) == 0) 108510922SJeff.Bonwick@Sun.COM ddt_object_destroy(ddt, type, class, tx); 108610922SJeff.Bonwick@Sun.COM } 108710922SJeff.Bonwick@Sun.COM } 108811938SGeorge.Wilson@Sun.COM 108911938SGeorge.Wilson@Sun.COM bcopy(ddt->ddt_histogram, &ddt->ddt_histogram_cache, 109011938SGeorge.Wilson@Sun.COM sizeof (ddt->ddt_histogram)); 109110922SJeff.Bonwick@Sun.COM } 109210922SJeff.Bonwick@Sun.COM 109310922SJeff.Bonwick@Sun.COM void 109410922SJeff.Bonwick@Sun.COM ddt_sync(spa_t *spa, uint64_t txg) 109510922SJeff.Bonwick@Sun.COM { 109610922SJeff.Bonwick@Sun.COM dmu_tx_t *tx; 109710922SJeff.Bonwick@Sun.COM zio_t *rio = zio_root(spa, NULL, NULL, 109810922SJeff.Bonwick@Sun.COM ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE); 109910922SJeff.Bonwick@Sun.COM 110010922SJeff.Bonwick@Sun.COM ASSERT(spa_syncing_txg(spa) == txg); 110110922SJeff.Bonwick@Sun.COM 110210922SJeff.Bonwick@Sun.COM tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); 110310922SJeff.Bonwick@Sun.COM 110410922SJeff.Bonwick@Sun.COM for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { 110510922SJeff.Bonwick@Sun.COM ddt_t *ddt = spa->spa_ddt[c]; 110610922SJeff.Bonwick@Sun.COM if (ddt == NULL) 110710922SJeff.Bonwick@Sun.COM continue; 110810922SJeff.Bonwick@Sun.COM ddt_sync_table(ddt, tx, txg); 110910922SJeff.Bonwick@Sun.COM ddt_repair_table(ddt, rio); 111010922SJeff.Bonwick@Sun.COM } 111110922SJeff.Bonwick@Sun.COM 111210922SJeff.Bonwick@Sun.COM (void) zio_wait(rio); 111310922SJeff.Bonwick@Sun.COM 111410922SJeff.Bonwick@Sun.COM dmu_tx_commit(tx); 111510922SJeff.Bonwick@Sun.COM } 111611125SJeff.Bonwick@Sun.COM 111711125SJeff.Bonwick@Sun.COM int 111811125SJeff.Bonwick@Sun.COM ddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_entry_t *dde) 111911125SJeff.Bonwick@Sun.COM { 112011125SJeff.Bonwick@Sun.COM do { 112111125SJeff.Bonwick@Sun.COM do { 112211125SJeff.Bonwick@Sun.COM do { 112311125SJeff.Bonwick@Sun.COM ddt_t *ddt = spa->spa_ddt[ddb->ddb_checksum]; 112411125SJeff.Bonwick@Sun.COM int error = ENOENT; 112511125SJeff.Bonwick@Sun.COM if (ddt_object_exists(ddt, ddb->ddb_type, 112611125SJeff.Bonwick@Sun.COM ddb->ddb_class)) { 112711125SJeff.Bonwick@Sun.COM error = ddt_object_walk(ddt, 112811125SJeff.Bonwick@Sun.COM ddb->ddb_type, ddb->ddb_class, 112911125SJeff.Bonwick@Sun.COM &ddb->ddb_cursor, dde); 113011125SJeff.Bonwick@Sun.COM } 113112296SLin.Ling@Sun.COM dde->dde_type = ddb->ddb_type; 113212296SLin.Ling@Sun.COM dde->dde_class = ddb->ddb_class; 113311125SJeff.Bonwick@Sun.COM if (error == 0) 113411125SJeff.Bonwick@Sun.COM return (0); 113511125SJeff.Bonwick@Sun.COM if (error != ENOENT) 113611125SJeff.Bonwick@Sun.COM return (error); 113711125SJeff.Bonwick@Sun.COM ddb->ddb_cursor = 0; 113811125SJeff.Bonwick@Sun.COM } while (++ddb->ddb_checksum < ZIO_CHECKSUM_FUNCTIONS); 113911125SJeff.Bonwick@Sun.COM ddb->ddb_checksum = 0; 114011125SJeff.Bonwick@Sun.COM } while (++ddb->ddb_type < DDT_TYPES); 114111125SJeff.Bonwick@Sun.COM ddb->ddb_type = 0; 114211125SJeff.Bonwick@Sun.COM } while (++ddb->ddb_class < DDT_CLASSES); 114311125SJeff.Bonwick@Sun.COM 114411125SJeff.Bonwick@Sun.COM return (ENOENT); 114511125SJeff.Bonwick@Sun.COM } 1146