112470SMatthew.Ahrens@Sun.COM /* 212470SMatthew.Ahrens@Sun.COM * CDDL HEADER START 312470SMatthew.Ahrens@Sun.COM * 412470SMatthew.Ahrens@Sun.COM * The contents of this file are subject to the terms of the 512470SMatthew.Ahrens@Sun.COM * Common Development and Distribution License (the "License"). 612470SMatthew.Ahrens@Sun.COM * You may not use this file except in compliance with the License. 712470SMatthew.Ahrens@Sun.COM * 812470SMatthew.Ahrens@Sun.COM * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 912470SMatthew.Ahrens@Sun.COM * or http://www.opensolaris.org/os/licensing. 1012470SMatthew.Ahrens@Sun.COM * See the License for the specific language governing permissions 1112470SMatthew.Ahrens@Sun.COM * and limitations under the License. 1212470SMatthew.Ahrens@Sun.COM * 1312470SMatthew.Ahrens@Sun.COM * When distributing Covered Code, include this CDDL HEADER in each 1412470SMatthew.Ahrens@Sun.COM * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 1512470SMatthew.Ahrens@Sun.COM * If applicable, add the following below this CDDL HEADER, with the 1612470SMatthew.Ahrens@Sun.COM * fields enclosed by brackets "[]" replaced with your own identifying 1712470SMatthew.Ahrens@Sun.COM * information: Portions Copyright [yyyy] [name of copyright owner] 1812470SMatthew.Ahrens@Sun.COM * 1912470SMatthew.Ahrens@Sun.COM * CDDL HEADER END 2012470SMatthew.Ahrens@Sun.COM */ 2112470SMatthew.Ahrens@Sun.COM /* 2212470SMatthew.Ahrens@Sun.COM * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 2312470SMatthew.Ahrens@Sun.COM */ 2412470SMatthew.Ahrens@Sun.COM 2512470SMatthew.Ahrens@Sun.COM #include <sys/bpobj.h> 2612470SMatthew.Ahrens@Sun.COM #include <sys/zfs_context.h> 2712470SMatthew.Ahrens@Sun.COM #include <sys/refcount.h> 2812470SMatthew.Ahrens@Sun.COM 2912470SMatthew.Ahrens@Sun.COM uint64_t 3012470SMatthew.Ahrens@Sun.COM bpobj_alloc(objset_t *os, int blocksize, dmu_tx_t *tx) 3112470SMatthew.Ahrens@Sun.COM { 3212470SMatthew.Ahrens@Sun.COM int size; 3312470SMatthew.Ahrens@Sun.COM 3412470SMatthew.Ahrens@Sun.COM if (spa_version(dmu_objset_spa(os)) < SPA_VERSION_BPOBJ_ACCOUNT) 3512470SMatthew.Ahrens@Sun.COM size = BPOBJ_SIZE_V0; 3612470SMatthew.Ahrens@Sun.COM else if (spa_version(dmu_objset_spa(os)) < SPA_VERSION_DEADLISTS) 3712470SMatthew.Ahrens@Sun.COM size = BPOBJ_SIZE_V1; 3812470SMatthew.Ahrens@Sun.COM else 3912470SMatthew.Ahrens@Sun.COM size = sizeof (bpobj_phys_t); 4012470SMatthew.Ahrens@Sun.COM 4112470SMatthew.Ahrens@Sun.COM return (dmu_object_alloc(os, DMU_OT_BPOBJ, blocksize, 4212470SMatthew.Ahrens@Sun.COM DMU_OT_BPOBJ_HDR, size, tx)); 4312470SMatthew.Ahrens@Sun.COM } 4412470SMatthew.Ahrens@Sun.COM 4512470SMatthew.Ahrens@Sun.COM void 4612470SMatthew.Ahrens@Sun.COM bpobj_free(objset_t *os, uint64_t obj, dmu_tx_t *tx) 4712470SMatthew.Ahrens@Sun.COM { 4812470SMatthew.Ahrens@Sun.COM int64_t i; 4912470SMatthew.Ahrens@Sun.COM bpobj_t bpo; 5012470SMatthew.Ahrens@Sun.COM dmu_object_info_t doi; 5112470SMatthew.Ahrens@Sun.COM int epb; 5212470SMatthew.Ahrens@Sun.COM dmu_buf_t *dbuf = NULL; 5312470SMatthew.Ahrens@Sun.COM 5412470SMatthew.Ahrens@Sun.COM VERIFY3U(0, ==, bpobj_open(&bpo, os, obj)); 5512470SMatthew.Ahrens@Sun.COM 5612470SMatthew.Ahrens@Sun.COM mutex_enter(&bpo.bpo_lock); 5712470SMatthew.Ahrens@Sun.COM 5812470SMatthew.Ahrens@Sun.COM if (!bpo.bpo_havesubobj || bpo.bpo_phys->bpo_subobjs == 0) 5912470SMatthew.Ahrens@Sun.COM goto out; 6012470SMatthew.Ahrens@Sun.COM 6112470SMatthew.Ahrens@Sun.COM VERIFY3U(0, ==, dmu_object_info(os, bpo.bpo_phys->bpo_subobjs, &doi)); 6212470SMatthew.Ahrens@Sun.COM epb = doi.doi_data_block_size / sizeof (uint64_t); 6312470SMatthew.Ahrens@Sun.COM 6412470SMatthew.Ahrens@Sun.COM for (i = bpo.bpo_phys->bpo_num_subobjs - 1; i >= 0; i--) { 6512470SMatthew.Ahrens@Sun.COM uint64_t *objarray; 6612470SMatthew.Ahrens@Sun.COM uint64_t offset, blkoff; 6712470SMatthew.Ahrens@Sun.COM 6812470SMatthew.Ahrens@Sun.COM offset = i * sizeof (uint64_t); 6912470SMatthew.Ahrens@Sun.COM blkoff = P2PHASE(i, epb); 7012470SMatthew.Ahrens@Sun.COM 7112470SMatthew.Ahrens@Sun.COM if (dbuf == NULL || dbuf->db_offset > offset) { 7212470SMatthew.Ahrens@Sun.COM if (dbuf) 7312470SMatthew.Ahrens@Sun.COM dmu_buf_rele(dbuf, FTAG); 7412470SMatthew.Ahrens@Sun.COM VERIFY3U(0, ==, dmu_buf_hold(os, 7512470SMatthew.Ahrens@Sun.COM bpo.bpo_phys->bpo_subobjs, offset, FTAG, &dbuf, 0)); 7612470SMatthew.Ahrens@Sun.COM } 7712470SMatthew.Ahrens@Sun.COM 7812470SMatthew.Ahrens@Sun.COM ASSERT3U(offset, >=, dbuf->db_offset); 7912470SMatthew.Ahrens@Sun.COM ASSERT3U(offset, <, dbuf->db_offset + dbuf->db_size); 8012470SMatthew.Ahrens@Sun.COM 8112470SMatthew.Ahrens@Sun.COM objarray = dbuf->db_data; 8212470SMatthew.Ahrens@Sun.COM bpobj_free(os, objarray[blkoff], tx); 8312470SMatthew.Ahrens@Sun.COM } 8412470SMatthew.Ahrens@Sun.COM if (dbuf) { 8512470SMatthew.Ahrens@Sun.COM dmu_buf_rele(dbuf, FTAG); 8612470SMatthew.Ahrens@Sun.COM dbuf = NULL; 8712470SMatthew.Ahrens@Sun.COM } 8812470SMatthew.Ahrens@Sun.COM VERIFY3U(0, ==, dmu_object_free(os, bpo.bpo_phys->bpo_subobjs, tx)); 8912470SMatthew.Ahrens@Sun.COM 9012470SMatthew.Ahrens@Sun.COM out: 9112470SMatthew.Ahrens@Sun.COM mutex_exit(&bpo.bpo_lock); 9212470SMatthew.Ahrens@Sun.COM bpobj_close(&bpo); 9312470SMatthew.Ahrens@Sun.COM 9412470SMatthew.Ahrens@Sun.COM VERIFY3U(0, ==, dmu_object_free(os, obj, tx)); 9512470SMatthew.Ahrens@Sun.COM } 9612470SMatthew.Ahrens@Sun.COM 9712470SMatthew.Ahrens@Sun.COM int 9812470SMatthew.Ahrens@Sun.COM bpobj_open(bpobj_t *bpo, objset_t *os, uint64_t object) 9912470SMatthew.Ahrens@Sun.COM { 10012470SMatthew.Ahrens@Sun.COM dmu_object_info_t doi; 10112470SMatthew.Ahrens@Sun.COM int err; 10212470SMatthew.Ahrens@Sun.COM 10312470SMatthew.Ahrens@Sun.COM err = dmu_object_info(os, object, &doi); 10412470SMatthew.Ahrens@Sun.COM if (err) 10512470SMatthew.Ahrens@Sun.COM return (err); 10612470SMatthew.Ahrens@Sun.COM 10712470SMatthew.Ahrens@Sun.COM bzero(bpo, sizeof (*bpo)); 10812470SMatthew.Ahrens@Sun.COM mutex_init(&bpo->bpo_lock, NULL, MUTEX_DEFAULT, NULL); 10912470SMatthew.Ahrens@Sun.COM 11012470SMatthew.Ahrens@Sun.COM ASSERT(bpo->bpo_dbuf == NULL); 11112470SMatthew.Ahrens@Sun.COM ASSERT(bpo->bpo_phys == NULL); 11212470SMatthew.Ahrens@Sun.COM ASSERT(object != 0); 11312470SMatthew.Ahrens@Sun.COM ASSERT3U(doi.doi_type, ==, DMU_OT_BPOBJ); 11412470SMatthew.Ahrens@Sun.COM ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_BPOBJ_HDR); 11512470SMatthew.Ahrens@Sun.COM 11612470SMatthew.Ahrens@Sun.COM bpo->bpo_os = os; 11712470SMatthew.Ahrens@Sun.COM bpo->bpo_object = object; 11812470SMatthew.Ahrens@Sun.COM bpo->bpo_epb = doi.doi_data_block_size >> SPA_BLKPTRSHIFT; 11912470SMatthew.Ahrens@Sun.COM bpo->bpo_havecomp = (doi.doi_bonus_size > BPOBJ_SIZE_V0); 12012470SMatthew.Ahrens@Sun.COM bpo->bpo_havesubobj = (doi.doi_bonus_size > BPOBJ_SIZE_V1); 12112470SMatthew.Ahrens@Sun.COM 12212470SMatthew.Ahrens@Sun.COM err = dmu_bonus_hold(bpo->bpo_os, 12312470SMatthew.Ahrens@Sun.COM bpo->bpo_object, bpo, &bpo->bpo_dbuf); 12412470SMatthew.Ahrens@Sun.COM if (err) 12512470SMatthew.Ahrens@Sun.COM return (err); 12612470SMatthew.Ahrens@Sun.COM bpo->bpo_phys = bpo->bpo_dbuf->db_data; 12712470SMatthew.Ahrens@Sun.COM return (0); 12812470SMatthew.Ahrens@Sun.COM } 12912470SMatthew.Ahrens@Sun.COM 13012470SMatthew.Ahrens@Sun.COM void 13112470SMatthew.Ahrens@Sun.COM bpobj_close(bpobj_t *bpo) 13212470SMatthew.Ahrens@Sun.COM { 13312470SMatthew.Ahrens@Sun.COM /* Lame workaround for closing a bpobj that was never opened. */ 13412470SMatthew.Ahrens@Sun.COM if (bpo->bpo_object == 0) 13512470SMatthew.Ahrens@Sun.COM return; 13612470SMatthew.Ahrens@Sun.COM 13712470SMatthew.Ahrens@Sun.COM dmu_buf_rele(bpo->bpo_dbuf, bpo); 13812470SMatthew.Ahrens@Sun.COM if (bpo->bpo_cached_dbuf != NULL) 13912470SMatthew.Ahrens@Sun.COM dmu_buf_rele(bpo->bpo_cached_dbuf, bpo); 14012470SMatthew.Ahrens@Sun.COM bpo->bpo_dbuf = NULL; 14112470SMatthew.Ahrens@Sun.COM bpo->bpo_phys = NULL; 14212470SMatthew.Ahrens@Sun.COM bpo->bpo_cached_dbuf = NULL; 14312470SMatthew.Ahrens@Sun.COM 14412470SMatthew.Ahrens@Sun.COM mutex_destroy(&bpo->bpo_lock); 14512470SMatthew.Ahrens@Sun.COM } 14612470SMatthew.Ahrens@Sun.COM 14712470SMatthew.Ahrens@Sun.COM static int 14812470SMatthew.Ahrens@Sun.COM bpobj_iterate_impl(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx, 14912470SMatthew.Ahrens@Sun.COM boolean_t free) 15012470SMatthew.Ahrens@Sun.COM { 15112470SMatthew.Ahrens@Sun.COM dmu_object_info_t doi; 15212470SMatthew.Ahrens@Sun.COM int epb; 15312470SMatthew.Ahrens@Sun.COM int64_t i; 15412470SMatthew.Ahrens@Sun.COM int err = 0; 15512470SMatthew.Ahrens@Sun.COM dmu_buf_t *dbuf = NULL; 15612470SMatthew.Ahrens@Sun.COM 15712470SMatthew.Ahrens@Sun.COM mutex_enter(&bpo->bpo_lock); 15812470SMatthew.Ahrens@Sun.COM 15912470SMatthew.Ahrens@Sun.COM if (free) 16012470SMatthew.Ahrens@Sun.COM dmu_buf_will_dirty(bpo->bpo_dbuf, tx); 16112470SMatthew.Ahrens@Sun.COM 16212470SMatthew.Ahrens@Sun.COM for (i = bpo->bpo_phys->bpo_num_blkptrs - 1; i >= 0; i--) { 16312470SMatthew.Ahrens@Sun.COM blkptr_t *bparray; 16412470SMatthew.Ahrens@Sun.COM blkptr_t *bp; 16512470SMatthew.Ahrens@Sun.COM uint64_t offset, blkoff; 16612470SMatthew.Ahrens@Sun.COM 16712470SMatthew.Ahrens@Sun.COM offset = i * sizeof (blkptr_t); 16812470SMatthew.Ahrens@Sun.COM blkoff = P2PHASE(i, bpo->bpo_epb); 16912470SMatthew.Ahrens@Sun.COM 17012470SMatthew.Ahrens@Sun.COM if (dbuf == NULL || dbuf->db_offset > offset) { 17112470SMatthew.Ahrens@Sun.COM if (dbuf) 17212470SMatthew.Ahrens@Sun.COM dmu_buf_rele(dbuf, FTAG); 17312470SMatthew.Ahrens@Sun.COM err = dmu_buf_hold(bpo->bpo_os, bpo->bpo_object, offset, 17412470SMatthew.Ahrens@Sun.COM FTAG, &dbuf, 0); 17512470SMatthew.Ahrens@Sun.COM if (err) 17612470SMatthew.Ahrens@Sun.COM break; 17712470SMatthew.Ahrens@Sun.COM } 17812470SMatthew.Ahrens@Sun.COM 17912470SMatthew.Ahrens@Sun.COM ASSERT3U(offset, >=, dbuf->db_offset); 18012470SMatthew.Ahrens@Sun.COM ASSERT3U(offset, <, dbuf->db_offset + dbuf->db_size); 18112470SMatthew.Ahrens@Sun.COM 18212470SMatthew.Ahrens@Sun.COM bparray = dbuf->db_data; 18312470SMatthew.Ahrens@Sun.COM bp = &bparray[blkoff]; 18412470SMatthew.Ahrens@Sun.COM err = func(arg, bp, tx); 18512470SMatthew.Ahrens@Sun.COM if (err) 18612470SMatthew.Ahrens@Sun.COM break; 18712470SMatthew.Ahrens@Sun.COM if (free) { 18812470SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_bytes -= 18912470SMatthew.Ahrens@Sun.COM bp_get_dsize_sync(dmu_objset_spa(bpo->bpo_os), bp); 19012470SMatthew.Ahrens@Sun.COM ASSERT3S(bpo->bpo_phys->bpo_bytes, >=, 0); 19112470SMatthew.Ahrens@Sun.COM if (bpo->bpo_havecomp) { 19212470SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_comp -= BP_GET_PSIZE(bp); 19312470SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_uncomp -= BP_GET_UCSIZE(bp); 19412470SMatthew.Ahrens@Sun.COM } 19512470SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_num_blkptrs--; 19612470SMatthew.Ahrens@Sun.COM ASSERT3S(bpo->bpo_phys->bpo_num_blkptrs, >=, 0); 19712470SMatthew.Ahrens@Sun.COM } 19812470SMatthew.Ahrens@Sun.COM } 19912470SMatthew.Ahrens@Sun.COM if (dbuf) { 20012470SMatthew.Ahrens@Sun.COM dmu_buf_rele(dbuf, FTAG); 20112470SMatthew.Ahrens@Sun.COM dbuf = NULL; 20212470SMatthew.Ahrens@Sun.COM } 20312470SMatthew.Ahrens@Sun.COM if (free) { 20412470SMatthew.Ahrens@Sun.COM i++; 20512470SMatthew.Ahrens@Sun.COM VERIFY3U(0, ==, dmu_free_range(bpo->bpo_os, bpo->bpo_object, 20612470SMatthew.Ahrens@Sun.COM i * sizeof (blkptr_t), -1ULL, tx)); 20712470SMatthew.Ahrens@Sun.COM } 20812470SMatthew.Ahrens@Sun.COM if (err || !bpo->bpo_havesubobj || bpo->bpo_phys->bpo_subobjs == 0) 20912470SMatthew.Ahrens@Sun.COM goto out; 21012470SMatthew.Ahrens@Sun.COM 21112470SMatthew.Ahrens@Sun.COM ASSERT(bpo->bpo_havecomp); 21212470SMatthew.Ahrens@Sun.COM err = dmu_object_info(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, &doi); 213*12538SLin.Ling@Sun.COM if (err) { 214*12538SLin.Ling@Sun.COM mutex_exit(&bpo->bpo_lock); 21512470SMatthew.Ahrens@Sun.COM return (err); 216*12538SLin.Ling@Sun.COM } 21712470SMatthew.Ahrens@Sun.COM epb = doi.doi_data_block_size / sizeof (uint64_t); 21812470SMatthew.Ahrens@Sun.COM 21912470SMatthew.Ahrens@Sun.COM for (i = bpo->bpo_phys->bpo_num_subobjs - 1; i >= 0; i--) { 22012470SMatthew.Ahrens@Sun.COM uint64_t *objarray; 22112470SMatthew.Ahrens@Sun.COM uint64_t offset, blkoff; 22212470SMatthew.Ahrens@Sun.COM bpobj_t sublist; 22312470SMatthew.Ahrens@Sun.COM uint64_t used_before, comp_before, uncomp_before; 22412470SMatthew.Ahrens@Sun.COM uint64_t used_after, comp_after, uncomp_after; 22512470SMatthew.Ahrens@Sun.COM 22612470SMatthew.Ahrens@Sun.COM offset = i * sizeof (uint64_t); 22712470SMatthew.Ahrens@Sun.COM blkoff = P2PHASE(i, epb); 22812470SMatthew.Ahrens@Sun.COM 22912470SMatthew.Ahrens@Sun.COM if (dbuf == NULL || dbuf->db_offset > offset) { 23012470SMatthew.Ahrens@Sun.COM if (dbuf) 23112470SMatthew.Ahrens@Sun.COM dmu_buf_rele(dbuf, FTAG); 23212470SMatthew.Ahrens@Sun.COM err = dmu_buf_hold(bpo->bpo_os, 23312470SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_subobjs, offset, FTAG, &dbuf, 0); 23412470SMatthew.Ahrens@Sun.COM if (err) 23512470SMatthew.Ahrens@Sun.COM break; 23612470SMatthew.Ahrens@Sun.COM } 23712470SMatthew.Ahrens@Sun.COM 23812470SMatthew.Ahrens@Sun.COM ASSERT3U(offset, >=, dbuf->db_offset); 23912470SMatthew.Ahrens@Sun.COM ASSERT3U(offset, <, dbuf->db_offset + dbuf->db_size); 24012470SMatthew.Ahrens@Sun.COM 24112470SMatthew.Ahrens@Sun.COM objarray = dbuf->db_data; 24212470SMatthew.Ahrens@Sun.COM err = bpobj_open(&sublist, bpo->bpo_os, objarray[blkoff]); 24312470SMatthew.Ahrens@Sun.COM if (err) 24412470SMatthew.Ahrens@Sun.COM break; 24512470SMatthew.Ahrens@Sun.COM if (free) { 24612470SMatthew.Ahrens@Sun.COM err = bpobj_space(&sublist, 24712470SMatthew.Ahrens@Sun.COM &used_before, &comp_before, &uncomp_before); 24812470SMatthew.Ahrens@Sun.COM if (err) 24912470SMatthew.Ahrens@Sun.COM break; 25012470SMatthew.Ahrens@Sun.COM } 25112470SMatthew.Ahrens@Sun.COM err = bpobj_iterate_impl(&sublist, func, arg, tx, free); 25212470SMatthew.Ahrens@Sun.COM if (free) { 25312470SMatthew.Ahrens@Sun.COM VERIFY3U(0, ==, bpobj_space(&sublist, 25412470SMatthew.Ahrens@Sun.COM &used_after, &comp_after, &uncomp_after)); 25512470SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_bytes -= used_before - used_after; 25612470SMatthew.Ahrens@Sun.COM ASSERT3S(bpo->bpo_phys->bpo_bytes, >=, 0); 25712470SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_comp -= comp_before - used_after; 25812470SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_uncomp -= 25912470SMatthew.Ahrens@Sun.COM uncomp_before - uncomp_after; 26012470SMatthew.Ahrens@Sun.COM } 26112470SMatthew.Ahrens@Sun.COM 26212470SMatthew.Ahrens@Sun.COM bpobj_close(&sublist); 26312470SMatthew.Ahrens@Sun.COM if (err) 26412470SMatthew.Ahrens@Sun.COM break; 26512470SMatthew.Ahrens@Sun.COM if (free) { 26612470SMatthew.Ahrens@Sun.COM err = dmu_object_free(bpo->bpo_os, 26712470SMatthew.Ahrens@Sun.COM objarray[blkoff], tx); 26812470SMatthew.Ahrens@Sun.COM if (err) 26912470SMatthew.Ahrens@Sun.COM break; 27012470SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_num_subobjs--; 27112470SMatthew.Ahrens@Sun.COM ASSERT3S(bpo->bpo_phys->bpo_num_subobjs, >=, 0); 27212470SMatthew.Ahrens@Sun.COM } 27312470SMatthew.Ahrens@Sun.COM } 27412470SMatthew.Ahrens@Sun.COM if (dbuf) { 27512470SMatthew.Ahrens@Sun.COM dmu_buf_rele(dbuf, FTAG); 27612470SMatthew.Ahrens@Sun.COM dbuf = NULL; 27712470SMatthew.Ahrens@Sun.COM } 27812470SMatthew.Ahrens@Sun.COM if (free) { 27912470SMatthew.Ahrens@Sun.COM VERIFY3U(0, ==, dmu_free_range(bpo->bpo_os, 28012470SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_subobjs, 28112470SMatthew.Ahrens@Sun.COM (i + 1) * sizeof (uint64_t), -1ULL, tx)); 28212470SMatthew.Ahrens@Sun.COM } 28312470SMatthew.Ahrens@Sun.COM 28412470SMatthew.Ahrens@Sun.COM out: 28512470SMatthew.Ahrens@Sun.COM /* If there are no entries, there should be no bytes. */ 28612470SMatthew.Ahrens@Sun.COM ASSERT(bpo->bpo_phys->bpo_num_blkptrs > 0 || 28712470SMatthew.Ahrens@Sun.COM (bpo->bpo_havesubobj && bpo->bpo_phys->bpo_num_subobjs > 0) || 28812470SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_bytes == 0); 28912470SMatthew.Ahrens@Sun.COM 29012470SMatthew.Ahrens@Sun.COM mutex_exit(&bpo->bpo_lock); 29112470SMatthew.Ahrens@Sun.COM return (err); 29212470SMatthew.Ahrens@Sun.COM } 29312470SMatthew.Ahrens@Sun.COM 29412470SMatthew.Ahrens@Sun.COM /* 29512470SMatthew.Ahrens@Sun.COM * Iterate and remove the entries. If func returns nonzero, iteration 29612470SMatthew.Ahrens@Sun.COM * will stop and that entry will not be removed. 29712470SMatthew.Ahrens@Sun.COM */ 29812470SMatthew.Ahrens@Sun.COM int 29912470SMatthew.Ahrens@Sun.COM bpobj_iterate(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx) 30012470SMatthew.Ahrens@Sun.COM { 30112470SMatthew.Ahrens@Sun.COM return (bpobj_iterate_impl(bpo, func, arg, tx, B_TRUE)); 30212470SMatthew.Ahrens@Sun.COM } 30312470SMatthew.Ahrens@Sun.COM 30412470SMatthew.Ahrens@Sun.COM /* 30512470SMatthew.Ahrens@Sun.COM * Iterate the entries. If func returns nonzero, iteration will stop. 30612470SMatthew.Ahrens@Sun.COM */ 30712470SMatthew.Ahrens@Sun.COM int 30812470SMatthew.Ahrens@Sun.COM bpobj_iterate_nofree(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx) 30912470SMatthew.Ahrens@Sun.COM { 31012470SMatthew.Ahrens@Sun.COM return (bpobj_iterate_impl(bpo, func, arg, tx, B_FALSE)); 31112470SMatthew.Ahrens@Sun.COM } 31212470SMatthew.Ahrens@Sun.COM 31312470SMatthew.Ahrens@Sun.COM void 31412470SMatthew.Ahrens@Sun.COM bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx) 31512470SMatthew.Ahrens@Sun.COM { 31612470SMatthew.Ahrens@Sun.COM bpobj_t subbpo; 31712470SMatthew.Ahrens@Sun.COM uint64_t used, comp, uncomp; 31812470SMatthew.Ahrens@Sun.COM 31912470SMatthew.Ahrens@Sun.COM ASSERT(bpo->bpo_havesubobj); 32012470SMatthew.Ahrens@Sun.COM ASSERT(bpo->bpo_havecomp); 32112470SMatthew.Ahrens@Sun.COM 32212470SMatthew.Ahrens@Sun.COM VERIFY3U(0, ==, bpobj_open(&subbpo, bpo->bpo_os, subobj)); 32312470SMatthew.Ahrens@Sun.COM VERIFY3U(0, ==, bpobj_space(&subbpo, &used, &comp, &uncomp)); 32412470SMatthew.Ahrens@Sun.COM bpobj_close(&subbpo); 32512470SMatthew.Ahrens@Sun.COM 32612470SMatthew.Ahrens@Sun.COM if (used == 0) { 32712470SMatthew.Ahrens@Sun.COM /* No point in having an empty subobj. */ 32812470SMatthew.Ahrens@Sun.COM bpobj_free(bpo->bpo_os, subobj, tx); 32912470SMatthew.Ahrens@Sun.COM return; 33012470SMatthew.Ahrens@Sun.COM } 33112470SMatthew.Ahrens@Sun.COM 33212470SMatthew.Ahrens@Sun.COM dmu_buf_will_dirty(bpo->bpo_dbuf, tx); 33312470SMatthew.Ahrens@Sun.COM if (bpo->bpo_phys->bpo_subobjs == 0) { 33412470SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_subobjs = dmu_object_alloc(bpo->bpo_os, 33512470SMatthew.Ahrens@Sun.COM DMU_OT_BPOBJ_SUBOBJ, SPA_MAXBLOCKSIZE, DMU_OT_NONE, 0, tx); 33612470SMatthew.Ahrens@Sun.COM } 33712470SMatthew.Ahrens@Sun.COM 33812470SMatthew.Ahrens@Sun.COM mutex_enter(&bpo->bpo_lock); 33912470SMatthew.Ahrens@Sun.COM dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, 34012470SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj), 34112470SMatthew.Ahrens@Sun.COM sizeof (subobj), &subobj, tx); 34212470SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_num_subobjs++; 34312470SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_bytes += used; 34412470SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_comp += comp; 34512470SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_uncomp += uncomp; 34612470SMatthew.Ahrens@Sun.COM mutex_exit(&bpo->bpo_lock); 34712470SMatthew.Ahrens@Sun.COM } 34812470SMatthew.Ahrens@Sun.COM 34912470SMatthew.Ahrens@Sun.COM void 35012470SMatthew.Ahrens@Sun.COM bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, dmu_tx_t *tx) 35112470SMatthew.Ahrens@Sun.COM { 35212470SMatthew.Ahrens@Sun.COM blkptr_t stored_bp = *bp; 35312470SMatthew.Ahrens@Sun.COM uint64_t offset; 35412470SMatthew.Ahrens@Sun.COM int blkoff; 35512470SMatthew.Ahrens@Sun.COM blkptr_t *bparray; 35612470SMatthew.Ahrens@Sun.COM 35712470SMatthew.Ahrens@Sun.COM ASSERT(!BP_IS_HOLE(bp)); 35812470SMatthew.Ahrens@Sun.COM 35912470SMatthew.Ahrens@Sun.COM /* We never need the fill count. */ 36012470SMatthew.Ahrens@Sun.COM stored_bp.blk_fill = 0; 36112470SMatthew.Ahrens@Sun.COM 36212470SMatthew.Ahrens@Sun.COM /* The bpobj will compress better if we can leave off the checksum */ 36312470SMatthew.Ahrens@Sun.COM if (!BP_GET_DEDUP(bp)) 36412470SMatthew.Ahrens@Sun.COM bzero(&stored_bp.blk_cksum, sizeof (stored_bp.blk_cksum)); 36512470SMatthew.Ahrens@Sun.COM 36612470SMatthew.Ahrens@Sun.COM mutex_enter(&bpo->bpo_lock); 36712470SMatthew.Ahrens@Sun.COM 36812470SMatthew.Ahrens@Sun.COM offset = bpo->bpo_phys->bpo_num_blkptrs * sizeof (stored_bp); 36912470SMatthew.Ahrens@Sun.COM blkoff = P2PHASE(bpo->bpo_phys->bpo_num_blkptrs, bpo->bpo_epb); 37012470SMatthew.Ahrens@Sun.COM 37112470SMatthew.Ahrens@Sun.COM if (bpo->bpo_cached_dbuf == NULL || 37212470SMatthew.Ahrens@Sun.COM offset < bpo->bpo_cached_dbuf->db_offset || 37312470SMatthew.Ahrens@Sun.COM offset >= bpo->bpo_cached_dbuf->db_offset + 37412470SMatthew.Ahrens@Sun.COM bpo->bpo_cached_dbuf->db_size) { 37512470SMatthew.Ahrens@Sun.COM if (bpo->bpo_cached_dbuf) 37612470SMatthew.Ahrens@Sun.COM dmu_buf_rele(bpo->bpo_cached_dbuf, bpo); 37712470SMatthew.Ahrens@Sun.COM VERIFY3U(0, ==, dmu_buf_hold(bpo->bpo_os, bpo->bpo_object, 37812470SMatthew.Ahrens@Sun.COM offset, bpo, &bpo->bpo_cached_dbuf, 0)); 37912470SMatthew.Ahrens@Sun.COM } 38012470SMatthew.Ahrens@Sun.COM 38112470SMatthew.Ahrens@Sun.COM dmu_buf_will_dirty(bpo->bpo_cached_dbuf, tx); 38212470SMatthew.Ahrens@Sun.COM bparray = bpo->bpo_cached_dbuf->db_data; 38312470SMatthew.Ahrens@Sun.COM bparray[blkoff] = stored_bp; 38412470SMatthew.Ahrens@Sun.COM 38512470SMatthew.Ahrens@Sun.COM dmu_buf_will_dirty(bpo->bpo_dbuf, tx); 38612470SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_num_blkptrs++; 38712470SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_bytes += 38812470SMatthew.Ahrens@Sun.COM bp_get_dsize_sync(dmu_objset_spa(bpo->bpo_os), bp); 38912470SMatthew.Ahrens@Sun.COM if (bpo->bpo_havecomp) { 39012470SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_comp += BP_GET_PSIZE(bp); 39112470SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_uncomp += BP_GET_UCSIZE(bp); 39212470SMatthew.Ahrens@Sun.COM } 39312470SMatthew.Ahrens@Sun.COM mutex_exit(&bpo->bpo_lock); 39412470SMatthew.Ahrens@Sun.COM } 39512470SMatthew.Ahrens@Sun.COM 39612470SMatthew.Ahrens@Sun.COM struct space_range_arg { 39712470SMatthew.Ahrens@Sun.COM spa_t *spa; 39812470SMatthew.Ahrens@Sun.COM uint64_t mintxg; 39912470SMatthew.Ahrens@Sun.COM uint64_t maxtxg; 40012470SMatthew.Ahrens@Sun.COM uint64_t used; 40112470SMatthew.Ahrens@Sun.COM uint64_t comp; 40212470SMatthew.Ahrens@Sun.COM uint64_t uncomp; 40312470SMatthew.Ahrens@Sun.COM }; 40412470SMatthew.Ahrens@Sun.COM 40512470SMatthew.Ahrens@Sun.COM /* ARGSUSED */ 40612470SMatthew.Ahrens@Sun.COM static int 40712470SMatthew.Ahrens@Sun.COM space_range_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) 40812470SMatthew.Ahrens@Sun.COM { 40912470SMatthew.Ahrens@Sun.COM struct space_range_arg *sra = arg; 41012470SMatthew.Ahrens@Sun.COM 41112470SMatthew.Ahrens@Sun.COM if (bp->blk_birth > sra->mintxg && bp->blk_birth <= sra->maxtxg) { 41212470SMatthew.Ahrens@Sun.COM sra->used += bp_get_dsize_sync(sra->spa, bp); 41312470SMatthew.Ahrens@Sun.COM sra->comp += BP_GET_PSIZE(bp); 41412470SMatthew.Ahrens@Sun.COM sra->uncomp += BP_GET_UCSIZE(bp); 41512470SMatthew.Ahrens@Sun.COM } 41612470SMatthew.Ahrens@Sun.COM return (0); 41712470SMatthew.Ahrens@Sun.COM } 41812470SMatthew.Ahrens@Sun.COM 41912470SMatthew.Ahrens@Sun.COM int 42012470SMatthew.Ahrens@Sun.COM bpobj_space(bpobj_t *bpo, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) 42112470SMatthew.Ahrens@Sun.COM { 42212470SMatthew.Ahrens@Sun.COM mutex_enter(&bpo->bpo_lock); 42312470SMatthew.Ahrens@Sun.COM 42412470SMatthew.Ahrens@Sun.COM *usedp = bpo->bpo_phys->bpo_bytes; 42512470SMatthew.Ahrens@Sun.COM if (bpo->bpo_havecomp) { 42612470SMatthew.Ahrens@Sun.COM *compp = bpo->bpo_phys->bpo_comp; 42712470SMatthew.Ahrens@Sun.COM *uncompp = bpo->bpo_phys->bpo_uncomp; 42812470SMatthew.Ahrens@Sun.COM mutex_exit(&bpo->bpo_lock); 42912470SMatthew.Ahrens@Sun.COM return (0); 43012470SMatthew.Ahrens@Sun.COM } else { 43112470SMatthew.Ahrens@Sun.COM mutex_exit(&bpo->bpo_lock); 43212470SMatthew.Ahrens@Sun.COM return (bpobj_space_range(bpo, 0, UINT64_MAX, 43312470SMatthew.Ahrens@Sun.COM usedp, compp, uncompp)); 43412470SMatthew.Ahrens@Sun.COM } 43512470SMatthew.Ahrens@Sun.COM } 43612470SMatthew.Ahrens@Sun.COM 43712470SMatthew.Ahrens@Sun.COM /* 43812470SMatthew.Ahrens@Sun.COM * Return the amount of space in the bpobj which is: 43912470SMatthew.Ahrens@Sun.COM * mintxg < blk_birth <= maxtxg 44012470SMatthew.Ahrens@Sun.COM */ 44112470SMatthew.Ahrens@Sun.COM int 44212470SMatthew.Ahrens@Sun.COM bpobj_space_range(bpobj_t *bpo, uint64_t mintxg, uint64_t maxtxg, 44312470SMatthew.Ahrens@Sun.COM uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) 44412470SMatthew.Ahrens@Sun.COM { 44512470SMatthew.Ahrens@Sun.COM struct space_range_arg sra = { 0 }; 44612470SMatthew.Ahrens@Sun.COM int err; 44712470SMatthew.Ahrens@Sun.COM 44812470SMatthew.Ahrens@Sun.COM /* 44912470SMatthew.Ahrens@Sun.COM * As an optimization, if they want the whole txg range, just 45012470SMatthew.Ahrens@Sun.COM * get bpo_bytes rather than iterating over the bps. 45112470SMatthew.Ahrens@Sun.COM */ 45212470SMatthew.Ahrens@Sun.COM if (mintxg < TXG_INITIAL && maxtxg == UINT64_MAX && bpo->bpo_havecomp) 45312470SMatthew.Ahrens@Sun.COM return (bpobj_space(bpo, usedp, compp, uncompp)); 45412470SMatthew.Ahrens@Sun.COM 45512470SMatthew.Ahrens@Sun.COM sra.spa = dmu_objset_spa(bpo->bpo_os); 45612470SMatthew.Ahrens@Sun.COM sra.mintxg = mintxg; 45712470SMatthew.Ahrens@Sun.COM sra.maxtxg = maxtxg; 45812470SMatthew.Ahrens@Sun.COM 45912470SMatthew.Ahrens@Sun.COM err = bpobj_iterate_nofree(bpo, space_range_cb, &sra, NULL); 46012470SMatthew.Ahrens@Sun.COM *usedp = sra.used; 46112470SMatthew.Ahrens@Sun.COM *compp = sra.comp; 46212470SMatthew.Ahrens@Sun.COM *uncompp = sra.uncomp; 46312470SMatthew.Ahrens@Sun.COM return (err); 46412470SMatthew.Ahrens@Sun.COM } 465