112470SMatthew.Ahrens@Sun.COM /*
212470SMatthew.Ahrens@Sun.COM * CDDL HEADER START
312470SMatthew.Ahrens@Sun.COM *
412470SMatthew.Ahrens@Sun.COM * The contents of this file are subject to the terms of the
512470SMatthew.Ahrens@Sun.COM * Common Development and Distribution License (the "License").
612470SMatthew.Ahrens@Sun.COM * You may not use this file except in compliance with the License.
712470SMatthew.Ahrens@Sun.COM *
812470SMatthew.Ahrens@Sun.COM * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
912470SMatthew.Ahrens@Sun.COM * or http://www.opensolaris.org/os/licensing.
1012470SMatthew.Ahrens@Sun.COM * See the License for the specific language governing permissions
1112470SMatthew.Ahrens@Sun.COM * and limitations under the License.
1212470SMatthew.Ahrens@Sun.COM *
1312470SMatthew.Ahrens@Sun.COM * When distributing Covered Code, include this CDDL HEADER in each
1412470SMatthew.Ahrens@Sun.COM * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1512470SMatthew.Ahrens@Sun.COM * If applicable, add the following below this CDDL HEADER, with the
1612470SMatthew.Ahrens@Sun.COM * fields enclosed by brackets "[]" replaced with your own identifying
1712470SMatthew.Ahrens@Sun.COM * information: Portions Copyright [yyyy] [name of copyright owner]
1812470SMatthew.Ahrens@Sun.COM *
1912470SMatthew.Ahrens@Sun.COM * CDDL HEADER END
2012470SMatthew.Ahrens@Sun.COM */
2112470SMatthew.Ahrens@Sun.COM /*
2212470SMatthew.Ahrens@Sun.COM * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
2312470SMatthew.Ahrens@Sun.COM */
2412470SMatthew.Ahrens@Sun.COM
2512470SMatthew.Ahrens@Sun.COM #include <sys/bpobj.h>
2612470SMatthew.Ahrens@Sun.COM #include <sys/zfs_context.h>
2712470SMatthew.Ahrens@Sun.COM #include <sys/refcount.h>
2812470SMatthew.Ahrens@Sun.COM
2912470SMatthew.Ahrens@Sun.COM uint64_t
bpobj_alloc(objset_t * os,int blocksize,dmu_tx_t * tx)3012470SMatthew.Ahrens@Sun.COM bpobj_alloc(objset_t *os, int blocksize, dmu_tx_t *tx)
3112470SMatthew.Ahrens@Sun.COM {
3212470SMatthew.Ahrens@Sun.COM int size;
3312470SMatthew.Ahrens@Sun.COM
3412470SMatthew.Ahrens@Sun.COM if (spa_version(dmu_objset_spa(os)) < SPA_VERSION_BPOBJ_ACCOUNT)
3512470SMatthew.Ahrens@Sun.COM size = BPOBJ_SIZE_V0;
3612470SMatthew.Ahrens@Sun.COM else if (spa_version(dmu_objset_spa(os)) < SPA_VERSION_DEADLISTS)
3712470SMatthew.Ahrens@Sun.COM size = BPOBJ_SIZE_V1;
3812470SMatthew.Ahrens@Sun.COM else
3912470SMatthew.Ahrens@Sun.COM size = sizeof (bpobj_phys_t);
4012470SMatthew.Ahrens@Sun.COM
4112470SMatthew.Ahrens@Sun.COM return (dmu_object_alloc(os, DMU_OT_BPOBJ, blocksize,
4212470SMatthew.Ahrens@Sun.COM DMU_OT_BPOBJ_HDR, size, tx));
4312470SMatthew.Ahrens@Sun.COM }
4412470SMatthew.Ahrens@Sun.COM
4512470SMatthew.Ahrens@Sun.COM void
bpobj_free(objset_t * os,uint64_t obj,dmu_tx_t * tx)4612470SMatthew.Ahrens@Sun.COM bpobj_free(objset_t *os, uint64_t obj, dmu_tx_t *tx)
4712470SMatthew.Ahrens@Sun.COM {
4812470SMatthew.Ahrens@Sun.COM int64_t i;
4912470SMatthew.Ahrens@Sun.COM bpobj_t bpo;
5012470SMatthew.Ahrens@Sun.COM dmu_object_info_t doi;
5112470SMatthew.Ahrens@Sun.COM int epb;
5212470SMatthew.Ahrens@Sun.COM dmu_buf_t *dbuf = NULL;
5312470SMatthew.Ahrens@Sun.COM
5412470SMatthew.Ahrens@Sun.COM VERIFY3U(0, ==, bpobj_open(&bpo, os, obj));
5512470SMatthew.Ahrens@Sun.COM
5612470SMatthew.Ahrens@Sun.COM mutex_enter(&bpo.bpo_lock);
5712470SMatthew.Ahrens@Sun.COM
5812470SMatthew.Ahrens@Sun.COM if (!bpo.bpo_havesubobj || bpo.bpo_phys->bpo_subobjs == 0)
5912470SMatthew.Ahrens@Sun.COM goto out;
6012470SMatthew.Ahrens@Sun.COM
6112470SMatthew.Ahrens@Sun.COM VERIFY3U(0, ==, dmu_object_info(os, bpo.bpo_phys->bpo_subobjs, &doi));
6212470SMatthew.Ahrens@Sun.COM epb = doi.doi_data_block_size / sizeof (uint64_t);
6312470SMatthew.Ahrens@Sun.COM
6412470SMatthew.Ahrens@Sun.COM for (i = bpo.bpo_phys->bpo_num_subobjs - 1; i >= 0; i--) {
6512470SMatthew.Ahrens@Sun.COM uint64_t *objarray;
6612470SMatthew.Ahrens@Sun.COM uint64_t offset, blkoff;
6712470SMatthew.Ahrens@Sun.COM
6812470SMatthew.Ahrens@Sun.COM offset = i * sizeof (uint64_t);
6912470SMatthew.Ahrens@Sun.COM blkoff = P2PHASE(i, epb);
7012470SMatthew.Ahrens@Sun.COM
7112470SMatthew.Ahrens@Sun.COM if (dbuf == NULL || dbuf->db_offset > offset) {
7212470SMatthew.Ahrens@Sun.COM if (dbuf)
7312470SMatthew.Ahrens@Sun.COM dmu_buf_rele(dbuf, FTAG);
7412470SMatthew.Ahrens@Sun.COM VERIFY3U(0, ==, dmu_buf_hold(os,
7512470SMatthew.Ahrens@Sun.COM bpo.bpo_phys->bpo_subobjs, offset, FTAG, &dbuf, 0));
7612470SMatthew.Ahrens@Sun.COM }
7712470SMatthew.Ahrens@Sun.COM
7812470SMatthew.Ahrens@Sun.COM ASSERT3U(offset, >=, dbuf->db_offset);
7912470SMatthew.Ahrens@Sun.COM ASSERT3U(offset, <, dbuf->db_offset + dbuf->db_size);
8012470SMatthew.Ahrens@Sun.COM
8112470SMatthew.Ahrens@Sun.COM objarray = dbuf->db_data;
8212470SMatthew.Ahrens@Sun.COM bpobj_free(os, objarray[blkoff], tx);
8312470SMatthew.Ahrens@Sun.COM }
8412470SMatthew.Ahrens@Sun.COM if (dbuf) {
8512470SMatthew.Ahrens@Sun.COM dmu_buf_rele(dbuf, FTAG);
8612470SMatthew.Ahrens@Sun.COM dbuf = NULL;
8712470SMatthew.Ahrens@Sun.COM }
8812470SMatthew.Ahrens@Sun.COM VERIFY3U(0, ==, dmu_object_free(os, bpo.bpo_phys->bpo_subobjs, tx));
8912470SMatthew.Ahrens@Sun.COM
9012470SMatthew.Ahrens@Sun.COM out:
9112470SMatthew.Ahrens@Sun.COM mutex_exit(&bpo.bpo_lock);
9212470SMatthew.Ahrens@Sun.COM bpobj_close(&bpo);
9312470SMatthew.Ahrens@Sun.COM
9412470SMatthew.Ahrens@Sun.COM VERIFY3U(0, ==, dmu_object_free(os, obj, tx));
9512470SMatthew.Ahrens@Sun.COM }
9612470SMatthew.Ahrens@Sun.COM
9712470SMatthew.Ahrens@Sun.COM int
bpobj_open(bpobj_t * bpo,objset_t * os,uint64_t object)9812470SMatthew.Ahrens@Sun.COM bpobj_open(bpobj_t *bpo, objset_t *os, uint64_t object)
9912470SMatthew.Ahrens@Sun.COM {
10012470SMatthew.Ahrens@Sun.COM dmu_object_info_t doi;
10112470SMatthew.Ahrens@Sun.COM int err;
10212470SMatthew.Ahrens@Sun.COM
10312470SMatthew.Ahrens@Sun.COM err = dmu_object_info(os, object, &doi);
10412470SMatthew.Ahrens@Sun.COM if (err)
10512470SMatthew.Ahrens@Sun.COM return (err);
10612470SMatthew.Ahrens@Sun.COM
10712470SMatthew.Ahrens@Sun.COM bzero(bpo, sizeof (*bpo));
10812470SMatthew.Ahrens@Sun.COM mutex_init(&bpo->bpo_lock, NULL, MUTEX_DEFAULT, NULL);
10912470SMatthew.Ahrens@Sun.COM
11012470SMatthew.Ahrens@Sun.COM ASSERT(bpo->bpo_dbuf == NULL);
11112470SMatthew.Ahrens@Sun.COM ASSERT(bpo->bpo_phys == NULL);
11212470SMatthew.Ahrens@Sun.COM ASSERT(object != 0);
11312470SMatthew.Ahrens@Sun.COM ASSERT3U(doi.doi_type, ==, DMU_OT_BPOBJ);
11412470SMatthew.Ahrens@Sun.COM ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_BPOBJ_HDR);
11512470SMatthew.Ahrens@Sun.COM
11612587SGeorge.Wilson@Sun.COM err = dmu_bonus_hold(os, object, bpo, &bpo->bpo_dbuf);
11712587SGeorge.Wilson@Sun.COM if (err)
11812587SGeorge.Wilson@Sun.COM return (err);
11912587SGeorge.Wilson@Sun.COM
12012470SMatthew.Ahrens@Sun.COM bpo->bpo_os = os;
12112470SMatthew.Ahrens@Sun.COM bpo->bpo_object = object;
12212470SMatthew.Ahrens@Sun.COM bpo->bpo_epb = doi.doi_data_block_size >> SPA_BLKPTRSHIFT;
12312470SMatthew.Ahrens@Sun.COM bpo->bpo_havecomp = (doi.doi_bonus_size > BPOBJ_SIZE_V0);
12412470SMatthew.Ahrens@Sun.COM bpo->bpo_havesubobj = (doi.doi_bonus_size > BPOBJ_SIZE_V1);
12512470SMatthew.Ahrens@Sun.COM bpo->bpo_phys = bpo->bpo_dbuf->db_data;
12612470SMatthew.Ahrens@Sun.COM return (0);
12712470SMatthew.Ahrens@Sun.COM }
12812470SMatthew.Ahrens@Sun.COM
12912470SMatthew.Ahrens@Sun.COM void
bpobj_close(bpobj_t * bpo)13012470SMatthew.Ahrens@Sun.COM bpobj_close(bpobj_t *bpo)
13112470SMatthew.Ahrens@Sun.COM {
13212470SMatthew.Ahrens@Sun.COM /* Lame workaround for closing a bpobj that was never opened. */
13312470SMatthew.Ahrens@Sun.COM if (bpo->bpo_object == 0)
13412470SMatthew.Ahrens@Sun.COM return;
13512470SMatthew.Ahrens@Sun.COM
13612470SMatthew.Ahrens@Sun.COM dmu_buf_rele(bpo->bpo_dbuf, bpo);
13712470SMatthew.Ahrens@Sun.COM if (bpo->bpo_cached_dbuf != NULL)
13812470SMatthew.Ahrens@Sun.COM dmu_buf_rele(bpo->bpo_cached_dbuf, bpo);
13912470SMatthew.Ahrens@Sun.COM bpo->bpo_dbuf = NULL;
14012470SMatthew.Ahrens@Sun.COM bpo->bpo_phys = NULL;
14112470SMatthew.Ahrens@Sun.COM bpo->bpo_cached_dbuf = NULL;
14212587SGeorge.Wilson@Sun.COM bpo->bpo_object = 0;
14312470SMatthew.Ahrens@Sun.COM
14412470SMatthew.Ahrens@Sun.COM mutex_destroy(&bpo->bpo_lock);
14512470SMatthew.Ahrens@Sun.COM }
14612470SMatthew.Ahrens@Sun.COM
14712470SMatthew.Ahrens@Sun.COM static int
bpobj_iterate_impl(bpobj_t * bpo,bpobj_itor_t func,void * arg,dmu_tx_t * tx,boolean_t free)14812470SMatthew.Ahrens@Sun.COM bpobj_iterate_impl(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx,
14912470SMatthew.Ahrens@Sun.COM boolean_t free)
15012470SMatthew.Ahrens@Sun.COM {
15112470SMatthew.Ahrens@Sun.COM dmu_object_info_t doi;
15212470SMatthew.Ahrens@Sun.COM int epb;
15312470SMatthew.Ahrens@Sun.COM int64_t i;
15412470SMatthew.Ahrens@Sun.COM int err = 0;
15512470SMatthew.Ahrens@Sun.COM dmu_buf_t *dbuf = NULL;
15612470SMatthew.Ahrens@Sun.COM
15712470SMatthew.Ahrens@Sun.COM mutex_enter(&bpo->bpo_lock);
15812470SMatthew.Ahrens@Sun.COM
15912470SMatthew.Ahrens@Sun.COM if (free)
16012470SMatthew.Ahrens@Sun.COM dmu_buf_will_dirty(bpo->bpo_dbuf, tx);
16112470SMatthew.Ahrens@Sun.COM
16212470SMatthew.Ahrens@Sun.COM for (i = bpo->bpo_phys->bpo_num_blkptrs - 1; i >= 0; i--) {
16312470SMatthew.Ahrens@Sun.COM blkptr_t *bparray;
16412470SMatthew.Ahrens@Sun.COM blkptr_t *bp;
16512470SMatthew.Ahrens@Sun.COM uint64_t offset, blkoff;
16612470SMatthew.Ahrens@Sun.COM
16712470SMatthew.Ahrens@Sun.COM offset = i * sizeof (blkptr_t);
16812470SMatthew.Ahrens@Sun.COM blkoff = P2PHASE(i, bpo->bpo_epb);
16912470SMatthew.Ahrens@Sun.COM
17012470SMatthew.Ahrens@Sun.COM if (dbuf == NULL || dbuf->db_offset > offset) {
17112470SMatthew.Ahrens@Sun.COM if (dbuf)
17212470SMatthew.Ahrens@Sun.COM dmu_buf_rele(dbuf, FTAG);
17312470SMatthew.Ahrens@Sun.COM err = dmu_buf_hold(bpo->bpo_os, bpo->bpo_object, offset,
17412470SMatthew.Ahrens@Sun.COM FTAG, &dbuf, 0);
17512470SMatthew.Ahrens@Sun.COM if (err)
17612470SMatthew.Ahrens@Sun.COM break;
17712470SMatthew.Ahrens@Sun.COM }
17812470SMatthew.Ahrens@Sun.COM
17912470SMatthew.Ahrens@Sun.COM ASSERT3U(offset, >=, dbuf->db_offset);
18012470SMatthew.Ahrens@Sun.COM ASSERT3U(offset, <, dbuf->db_offset + dbuf->db_size);
18112470SMatthew.Ahrens@Sun.COM
18212470SMatthew.Ahrens@Sun.COM bparray = dbuf->db_data;
18312470SMatthew.Ahrens@Sun.COM bp = &bparray[blkoff];
18412470SMatthew.Ahrens@Sun.COM err = func(arg, bp, tx);
18512470SMatthew.Ahrens@Sun.COM if (err)
18612470SMatthew.Ahrens@Sun.COM break;
18712470SMatthew.Ahrens@Sun.COM if (free) {
18812470SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_bytes -=
18912470SMatthew.Ahrens@Sun.COM bp_get_dsize_sync(dmu_objset_spa(bpo->bpo_os), bp);
19012470SMatthew.Ahrens@Sun.COM ASSERT3S(bpo->bpo_phys->bpo_bytes, >=, 0);
19112470SMatthew.Ahrens@Sun.COM if (bpo->bpo_havecomp) {
19212470SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_comp -= BP_GET_PSIZE(bp);
19312470SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_uncomp -= BP_GET_UCSIZE(bp);
19412470SMatthew.Ahrens@Sun.COM }
19512470SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_num_blkptrs--;
19612470SMatthew.Ahrens@Sun.COM ASSERT3S(bpo->bpo_phys->bpo_num_blkptrs, >=, 0);
19712470SMatthew.Ahrens@Sun.COM }
19812470SMatthew.Ahrens@Sun.COM }
19912470SMatthew.Ahrens@Sun.COM if (dbuf) {
20012470SMatthew.Ahrens@Sun.COM dmu_buf_rele(dbuf, FTAG);
20112470SMatthew.Ahrens@Sun.COM dbuf = NULL;
20212470SMatthew.Ahrens@Sun.COM }
20312470SMatthew.Ahrens@Sun.COM if (free) {
20412470SMatthew.Ahrens@Sun.COM i++;
20512470SMatthew.Ahrens@Sun.COM VERIFY3U(0, ==, dmu_free_range(bpo->bpo_os, bpo->bpo_object,
20612470SMatthew.Ahrens@Sun.COM i * sizeof (blkptr_t), -1ULL, tx));
20712470SMatthew.Ahrens@Sun.COM }
20812470SMatthew.Ahrens@Sun.COM if (err || !bpo->bpo_havesubobj || bpo->bpo_phys->bpo_subobjs == 0)
20912470SMatthew.Ahrens@Sun.COM goto out;
21012470SMatthew.Ahrens@Sun.COM
21112470SMatthew.Ahrens@Sun.COM ASSERT(bpo->bpo_havecomp);
21212470SMatthew.Ahrens@Sun.COM err = dmu_object_info(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, &doi);
21312538SLin.Ling@Sun.COM if (err) {
21412538SLin.Ling@Sun.COM mutex_exit(&bpo->bpo_lock);
21512470SMatthew.Ahrens@Sun.COM return (err);
21612538SLin.Ling@Sun.COM }
21712470SMatthew.Ahrens@Sun.COM epb = doi.doi_data_block_size / sizeof (uint64_t);
21812470SMatthew.Ahrens@Sun.COM
21912470SMatthew.Ahrens@Sun.COM for (i = bpo->bpo_phys->bpo_num_subobjs - 1; i >= 0; i--) {
22012470SMatthew.Ahrens@Sun.COM uint64_t *objarray;
22112470SMatthew.Ahrens@Sun.COM uint64_t offset, blkoff;
22212470SMatthew.Ahrens@Sun.COM bpobj_t sublist;
22312470SMatthew.Ahrens@Sun.COM uint64_t used_before, comp_before, uncomp_before;
22412470SMatthew.Ahrens@Sun.COM uint64_t used_after, comp_after, uncomp_after;
22512470SMatthew.Ahrens@Sun.COM
22612470SMatthew.Ahrens@Sun.COM offset = i * sizeof (uint64_t);
22712470SMatthew.Ahrens@Sun.COM blkoff = P2PHASE(i, epb);
22812470SMatthew.Ahrens@Sun.COM
22912470SMatthew.Ahrens@Sun.COM if (dbuf == NULL || dbuf->db_offset > offset) {
23012470SMatthew.Ahrens@Sun.COM if (dbuf)
23112470SMatthew.Ahrens@Sun.COM dmu_buf_rele(dbuf, FTAG);
23212470SMatthew.Ahrens@Sun.COM err = dmu_buf_hold(bpo->bpo_os,
23312470SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_subobjs, offset, FTAG, &dbuf, 0);
23412470SMatthew.Ahrens@Sun.COM if (err)
23512470SMatthew.Ahrens@Sun.COM break;
23612470SMatthew.Ahrens@Sun.COM }
23712470SMatthew.Ahrens@Sun.COM
23812470SMatthew.Ahrens@Sun.COM ASSERT3U(offset, >=, dbuf->db_offset);
23912470SMatthew.Ahrens@Sun.COM ASSERT3U(offset, <, dbuf->db_offset + dbuf->db_size);
24012470SMatthew.Ahrens@Sun.COM
24112470SMatthew.Ahrens@Sun.COM objarray = dbuf->db_data;
24212470SMatthew.Ahrens@Sun.COM err = bpobj_open(&sublist, bpo->bpo_os, objarray[blkoff]);
24312470SMatthew.Ahrens@Sun.COM if (err)
24412470SMatthew.Ahrens@Sun.COM break;
24512470SMatthew.Ahrens@Sun.COM if (free) {
24612470SMatthew.Ahrens@Sun.COM err = bpobj_space(&sublist,
24712470SMatthew.Ahrens@Sun.COM &used_before, &comp_before, &uncomp_before);
24812470SMatthew.Ahrens@Sun.COM if (err)
24912470SMatthew.Ahrens@Sun.COM break;
25012470SMatthew.Ahrens@Sun.COM }
25112470SMatthew.Ahrens@Sun.COM err = bpobj_iterate_impl(&sublist, func, arg, tx, free);
25212470SMatthew.Ahrens@Sun.COM if (free) {
25312470SMatthew.Ahrens@Sun.COM VERIFY3U(0, ==, bpobj_space(&sublist,
25412470SMatthew.Ahrens@Sun.COM &used_after, &comp_after, &uncomp_after));
25512470SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_bytes -= used_before - used_after;
25612470SMatthew.Ahrens@Sun.COM ASSERT3S(bpo->bpo_phys->bpo_bytes, >=, 0);
257*12593SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_comp -= comp_before - comp_after;
25812470SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_uncomp -=
25912470SMatthew.Ahrens@Sun.COM uncomp_before - uncomp_after;
26012470SMatthew.Ahrens@Sun.COM }
26112470SMatthew.Ahrens@Sun.COM
26212470SMatthew.Ahrens@Sun.COM bpobj_close(&sublist);
26312470SMatthew.Ahrens@Sun.COM if (err)
26412470SMatthew.Ahrens@Sun.COM break;
26512470SMatthew.Ahrens@Sun.COM if (free) {
26612470SMatthew.Ahrens@Sun.COM err = dmu_object_free(bpo->bpo_os,
26712470SMatthew.Ahrens@Sun.COM objarray[blkoff], tx);
26812470SMatthew.Ahrens@Sun.COM if (err)
26912470SMatthew.Ahrens@Sun.COM break;
27012470SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_num_subobjs--;
27112470SMatthew.Ahrens@Sun.COM ASSERT3S(bpo->bpo_phys->bpo_num_subobjs, >=, 0);
27212470SMatthew.Ahrens@Sun.COM }
27312470SMatthew.Ahrens@Sun.COM }
27412470SMatthew.Ahrens@Sun.COM if (dbuf) {
27512470SMatthew.Ahrens@Sun.COM dmu_buf_rele(dbuf, FTAG);
27612470SMatthew.Ahrens@Sun.COM dbuf = NULL;
27712470SMatthew.Ahrens@Sun.COM }
27812470SMatthew.Ahrens@Sun.COM if (free) {
27912470SMatthew.Ahrens@Sun.COM VERIFY3U(0, ==, dmu_free_range(bpo->bpo_os,
28012470SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_subobjs,
28112470SMatthew.Ahrens@Sun.COM (i + 1) * sizeof (uint64_t), -1ULL, tx));
28212470SMatthew.Ahrens@Sun.COM }
28312470SMatthew.Ahrens@Sun.COM
28412470SMatthew.Ahrens@Sun.COM out:
28512470SMatthew.Ahrens@Sun.COM /* If there are no entries, there should be no bytes. */
28612470SMatthew.Ahrens@Sun.COM ASSERT(bpo->bpo_phys->bpo_num_blkptrs > 0 ||
28712470SMatthew.Ahrens@Sun.COM (bpo->bpo_havesubobj && bpo->bpo_phys->bpo_num_subobjs > 0) ||
28812470SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_bytes == 0);
28912470SMatthew.Ahrens@Sun.COM
29012470SMatthew.Ahrens@Sun.COM mutex_exit(&bpo->bpo_lock);
29112470SMatthew.Ahrens@Sun.COM return (err);
29212470SMatthew.Ahrens@Sun.COM }
29312470SMatthew.Ahrens@Sun.COM
29412470SMatthew.Ahrens@Sun.COM /*
29512470SMatthew.Ahrens@Sun.COM * Iterate and remove the entries. If func returns nonzero, iteration
29612470SMatthew.Ahrens@Sun.COM * will stop and that entry will not be removed.
29712470SMatthew.Ahrens@Sun.COM */
29812470SMatthew.Ahrens@Sun.COM int
bpobj_iterate(bpobj_t * bpo,bpobj_itor_t func,void * arg,dmu_tx_t * tx)29912470SMatthew.Ahrens@Sun.COM bpobj_iterate(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx)
30012470SMatthew.Ahrens@Sun.COM {
30112470SMatthew.Ahrens@Sun.COM return (bpobj_iterate_impl(bpo, func, arg, tx, B_TRUE));
30212470SMatthew.Ahrens@Sun.COM }
30312470SMatthew.Ahrens@Sun.COM
30412470SMatthew.Ahrens@Sun.COM /*
30512470SMatthew.Ahrens@Sun.COM * Iterate the entries. If func returns nonzero, iteration will stop.
30612470SMatthew.Ahrens@Sun.COM */
30712470SMatthew.Ahrens@Sun.COM int
bpobj_iterate_nofree(bpobj_t * bpo,bpobj_itor_t func,void * arg,dmu_tx_t * tx)30812470SMatthew.Ahrens@Sun.COM bpobj_iterate_nofree(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx)
30912470SMatthew.Ahrens@Sun.COM {
31012470SMatthew.Ahrens@Sun.COM return (bpobj_iterate_impl(bpo, func, arg, tx, B_FALSE));
31112470SMatthew.Ahrens@Sun.COM }
31212470SMatthew.Ahrens@Sun.COM
31312470SMatthew.Ahrens@Sun.COM void
bpobj_enqueue_subobj(bpobj_t * bpo,uint64_t subobj,dmu_tx_t * tx)31412470SMatthew.Ahrens@Sun.COM bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx)
31512470SMatthew.Ahrens@Sun.COM {
31612470SMatthew.Ahrens@Sun.COM bpobj_t subbpo;
317*12593SMatthew.Ahrens@Sun.COM uint64_t used, comp, uncomp, subsubobjs;
31812470SMatthew.Ahrens@Sun.COM
31912470SMatthew.Ahrens@Sun.COM ASSERT(bpo->bpo_havesubobj);
32012470SMatthew.Ahrens@Sun.COM ASSERT(bpo->bpo_havecomp);
32112470SMatthew.Ahrens@Sun.COM
32212470SMatthew.Ahrens@Sun.COM VERIFY3U(0, ==, bpobj_open(&subbpo, bpo->bpo_os, subobj));
32312470SMatthew.Ahrens@Sun.COM VERIFY3U(0, ==, bpobj_space(&subbpo, &used, &comp, &uncomp));
32412470SMatthew.Ahrens@Sun.COM
32512470SMatthew.Ahrens@Sun.COM if (used == 0) {
32612470SMatthew.Ahrens@Sun.COM /* No point in having an empty subobj. */
327*12593SMatthew.Ahrens@Sun.COM bpobj_close(&subbpo);
32812470SMatthew.Ahrens@Sun.COM bpobj_free(bpo->bpo_os, subobj, tx);
32912470SMatthew.Ahrens@Sun.COM return;
33012470SMatthew.Ahrens@Sun.COM }
33112470SMatthew.Ahrens@Sun.COM
33212470SMatthew.Ahrens@Sun.COM dmu_buf_will_dirty(bpo->bpo_dbuf, tx);
33312470SMatthew.Ahrens@Sun.COM if (bpo->bpo_phys->bpo_subobjs == 0) {
33412470SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_subobjs = dmu_object_alloc(bpo->bpo_os,
33512470SMatthew.Ahrens@Sun.COM DMU_OT_BPOBJ_SUBOBJ, SPA_MAXBLOCKSIZE, DMU_OT_NONE, 0, tx);
33612470SMatthew.Ahrens@Sun.COM }
33712470SMatthew.Ahrens@Sun.COM
33812470SMatthew.Ahrens@Sun.COM mutex_enter(&bpo->bpo_lock);
33912470SMatthew.Ahrens@Sun.COM dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs,
34012470SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj),
34112470SMatthew.Ahrens@Sun.COM sizeof (subobj), &subobj, tx);
34212470SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_num_subobjs++;
343*12593SMatthew.Ahrens@Sun.COM
344*12593SMatthew.Ahrens@Sun.COM /*
345*12593SMatthew.Ahrens@Sun.COM * If subobj has only one block of subobjs, then move subobj's
346*12593SMatthew.Ahrens@Sun.COM * subobjs to bpo's subobj list directly. This reduces
347*12593SMatthew.Ahrens@Sun.COM * recursion in bpobj_iterate due to nested subobjs.
348*12593SMatthew.Ahrens@Sun.COM */
349*12593SMatthew.Ahrens@Sun.COM subsubobjs = subbpo.bpo_phys->bpo_subobjs;
350*12593SMatthew.Ahrens@Sun.COM if (subsubobjs != 0) {
351*12593SMatthew.Ahrens@Sun.COM dmu_object_info_t doi;
352*12593SMatthew.Ahrens@Sun.COM
353*12593SMatthew.Ahrens@Sun.COM VERIFY3U(0, ==, dmu_object_info(bpo->bpo_os, subsubobjs, &doi));
354*12593SMatthew.Ahrens@Sun.COM if (doi.doi_max_offset == doi.doi_data_block_size) {
355*12593SMatthew.Ahrens@Sun.COM dmu_buf_t *subdb;
356*12593SMatthew.Ahrens@Sun.COM uint64_t numsubsub = subbpo.bpo_phys->bpo_num_subobjs;
357*12593SMatthew.Ahrens@Sun.COM
358*12593SMatthew.Ahrens@Sun.COM VERIFY3U(0, ==, dmu_buf_hold(bpo->bpo_os, subsubobjs,
359*12593SMatthew.Ahrens@Sun.COM 0, FTAG, &subdb, 0));
360*12593SMatthew.Ahrens@Sun.COM dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs,
361*12593SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj),
362*12593SMatthew.Ahrens@Sun.COM numsubsub * sizeof (subobj), subdb->db_data, tx);
363*12593SMatthew.Ahrens@Sun.COM dmu_buf_rele(subdb, FTAG);
364*12593SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_num_subobjs += numsubsub;
365*12593SMatthew.Ahrens@Sun.COM
366*12593SMatthew.Ahrens@Sun.COM dmu_buf_will_dirty(subbpo.bpo_dbuf, tx);
367*12593SMatthew.Ahrens@Sun.COM subbpo.bpo_phys->bpo_subobjs = 0;
368*12593SMatthew.Ahrens@Sun.COM VERIFY3U(0, ==, dmu_object_free(bpo->bpo_os,
369*12593SMatthew.Ahrens@Sun.COM subsubobjs, tx));
370*12593SMatthew.Ahrens@Sun.COM }
371*12593SMatthew.Ahrens@Sun.COM }
37212470SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_bytes += used;
37312470SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_comp += comp;
37412470SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_uncomp += uncomp;
37512470SMatthew.Ahrens@Sun.COM mutex_exit(&bpo->bpo_lock);
376*12593SMatthew.Ahrens@Sun.COM
377*12593SMatthew.Ahrens@Sun.COM bpobj_close(&subbpo);
37812470SMatthew.Ahrens@Sun.COM }
37912470SMatthew.Ahrens@Sun.COM
38012470SMatthew.Ahrens@Sun.COM void
bpobj_enqueue(bpobj_t * bpo,const blkptr_t * bp,dmu_tx_t * tx)38112470SMatthew.Ahrens@Sun.COM bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, dmu_tx_t *tx)
38212470SMatthew.Ahrens@Sun.COM {
38312470SMatthew.Ahrens@Sun.COM blkptr_t stored_bp = *bp;
38412470SMatthew.Ahrens@Sun.COM uint64_t offset;
38512470SMatthew.Ahrens@Sun.COM int blkoff;
38612470SMatthew.Ahrens@Sun.COM blkptr_t *bparray;
38712470SMatthew.Ahrens@Sun.COM
38812470SMatthew.Ahrens@Sun.COM ASSERT(!BP_IS_HOLE(bp));
38912470SMatthew.Ahrens@Sun.COM
39012470SMatthew.Ahrens@Sun.COM /* We never need the fill count. */
39112470SMatthew.Ahrens@Sun.COM stored_bp.blk_fill = 0;
39212470SMatthew.Ahrens@Sun.COM
39312470SMatthew.Ahrens@Sun.COM /* The bpobj will compress better if we can leave off the checksum */
39412470SMatthew.Ahrens@Sun.COM if (!BP_GET_DEDUP(bp))
39512470SMatthew.Ahrens@Sun.COM bzero(&stored_bp.blk_cksum, sizeof (stored_bp.blk_cksum));
39612470SMatthew.Ahrens@Sun.COM
39712470SMatthew.Ahrens@Sun.COM mutex_enter(&bpo->bpo_lock);
39812470SMatthew.Ahrens@Sun.COM
39912470SMatthew.Ahrens@Sun.COM offset = bpo->bpo_phys->bpo_num_blkptrs * sizeof (stored_bp);
40012470SMatthew.Ahrens@Sun.COM blkoff = P2PHASE(bpo->bpo_phys->bpo_num_blkptrs, bpo->bpo_epb);
40112470SMatthew.Ahrens@Sun.COM
40212470SMatthew.Ahrens@Sun.COM if (bpo->bpo_cached_dbuf == NULL ||
40312470SMatthew.Ahrens@Sun.COM offset < bpo->bpo_cached_dbuf->db_offset ||
40412470SMatthew.Ahrens@Sun.COM offset >= bpo->bpo_cached_dbuf->db_offset +
40512470SMatthew.Ahrens@Sun.COM bpo->bpo_cached_dbuf->db_size) {
40612470SMatthew.Ahrens@Sun.COM if (bpo->bpo_cached_dbuf)
40712470SMatthew.Ahrens@Sun.COM dmu_buf_rele(bpo->bpo_cached_dbuf, bpo);
40812470SMatthew.Ahrens@Sun.COM VERIFY3U(0, ==, dmu_buf_hold(bpo->bpo_os, bpo->bpo_object,
40912470SMatthew.Ahrens@Sun.COM offset, bpo, &bpo->bpo_cached_dbuf, 0));
41012470SMatthew.Ahrens@Sun.COM }
41112470SMatthew.Ahrens@Sun.COM
41212470SMatthew.Ahrens@Sun.COM dmu_buf_will_dirty(bpo->bpo_cached_dbuf, tx);
41312470SMatthew.Ahrens@Sun.COM bparray = bpo->bpo_cached_dbuf->db_data;
41412470SMatthew.Ahrens@Sun.COM bparray[blkoff] = stored_bp;
41512470SMatthew.Ahrens@Sun.COM
41612470SMatthew.Ahrens@Sun.COM dmu_buf_will_dirty(bpo->bpo_dbuf, tx);
41712470SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_num_blkptrs++;
41812470SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_bytes +=
41912470SMatthew.Ahrens@Sun.COM bp_get_dsize_sync(dmu_objset_spa(bpo->bpo_os), bp);
42012470SMatthew.Ahrens@Sun.COM if (bpo->bpo_havecomp) {
42112470SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_comp += BP_GET_PSIZE(bp);
42212470SMatthew.Ahrens@Sun.COM bpo->bpo_phys->bpo_uncomp += BP_GET_UCSIZE(bp);
42312470SMatthew.Ahrens@Sun.COM }
42412470SMatthew.Ahrens@Sun.COM mutex_exit(&bpo->bpo_lock);
42512470SMatthew.Ahrens@Sun.COM }
42612470SMatthew.Ahrens@Sun.COM
42712470SMatthew.Ahrens@Sun.COM struct space_range_arg {
42812470SMatthew.Ahrens@Sun.COM spa_t *spa;
42912470SMatthew.Ahrens@Sun.COM uint64_t mintxg;
43012470SMatthew.Ahrens@Sun.COM uint64_t maxtxg;
43112470SMatthew.Ahrens@Sun.COM uint64_t used;
43212470SMatthew.Ahrens@Sun.COM uint64_t comp;
43312470SMatthew.Ahrens@Sun.COM uint64_t uncomp;
43412470SMatthew.Ahrens@Sun.COM };
43512470SMatthew.Ahrens@Sun.COM
43612470SMatthew.Ahrens@Sun.COM /* ARGSUSED */
43712470SMatthew.Ahrens@Sun.COM static int
space_range_cb(void * arg,const blkptr_t * bp,dmu_tx_t * tx)43812470SMatthew.Ahrens@Sun.COM space_range_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
43912470SMatthew.Ahrens@Sun.COM {
44012470SMatthew.Ahrens@Sun.COM struct space_range_arg *sra = arg;
44112470SMatthew.Ahrens@Sun.COM
44212470SMatthew.Ahrens@Sun.COM if (bp->blk_birth > sra->mintxg && bp->blk_birth <= sra->maxtxg) {
44312470SMatthew.Ahrens@Sun.COM sra->used += bp_get_dsize_sync(sra->spa, bp);
44412470SMatthew.Ahrens@Sun.COM sra->comp += BP_GET_PSIZE(bp);
44512470SMatthew.Ahrens@Sun.COM sra->uncomp += BP_GET_UCSIZE(bp);
44612470SMatthew.Ahrens@Sun.COM }
44712470SMatthew.Ahrens@Sun.COM return (0);
44812470SMatthew.Ahrens@Sun.COM }
44912470SMatthew.Ahrens@Sun.COM
45012470SMatthew.Ahrens@Sun.COM int
bpobj_space(bpobj_t * bpo,uint64_t * usedp,uint64_t * compp,uint64_t * uncompp)45112470SMatthew.Ahrens@Sun.COM bpobj_space(bpobj_t *bpo, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
45212470SMatthew.Ahrens@Sun.COM {
45312470SMatthew.Ahrens@Sun.COM mutex_enter(&bpo->bpo_lock);
45412470SMatthew.Ahrens@Sun.COM
45512470SMatthew.Ahrens@Sun.COM *usedp = bpo->bpo_phys->bpo_bytes;
45612470SMatthew.Ahrens@Sun.COM if (bpo->bpo_havecomp) {
45712470SMatthew.Ahrens@Sun.COM *compp = bpo->bpo_phys->bpo_comp;
45812470SMatthew.Ahrens@Sun.COM *uncompp = bpo->bpo_phys->bpo_uncomp;
45912470SMatthew.Ahrens@Sun.COM mutex_exit(&bpo->bpo_lock);
46012470SMatthew.Ahrens@Sun.COM return (0);
46112470SMatthew.Ahrens@Sun.COM } else {
46212470SMatthew.Ahrens@Sun.COM mutex_exit(&bpo->bpo_lock);
46312470SMatthew.Ahrens@Sun.COM return (bpobj_space_range(bpo, 0, UINT64_MAX,
46412470SMatthew.Ahrens@Sun.COM usedp, compp, uncompp));
46512470SMatthew.Ahrens@Sun.COM }
46612470SMatthew.Ahrens@Sun.COM }
46712470SMatthew.Ahrens@Sun.COM
46812470SMatthew.Ahrens@Sun.COM /*
46912470SMatthew.Ahrens@Sun.COM * Return the amount of space in the bpobj which is:
47012470SMatthew.Ahrens@Sun.COM * mintxg < blk_birth <= maxtxg
47112470SMatthew.Ahrens@Sun.COM */
47212470SMatthew.Ahrens@Sun.COM int
bpobj_space_range(bpobj_t * bpo,uint64_t mintxg,uint64_t maxtxg,uint64_t * usedp,uint64_t * compp,uint64_t * uncompp)47312470SMatthew.Ahrens@Sun.COM bpobj_space_range(bpobj_t *bpo, uint64_t mintxg, uint64_t maxtxg,
47412470SMatthew.Ahrens@Sun.COM uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
47512470SMatthew.Ahrens@Sun.COM {
47612470SMatthew.Ahrens@Sun.COM struct space_range_arg sra = { 0 };
47712470SMatthew.Ahrens@Sun.COM int err;
47812470SMatthew.Ahrens@Sun.COM
47912470SMatthew.Ahrens@Sun.COM /*
48012470SMatthew.Ahrens@Sun.COM * As an optimization, if they want the whole txg range, just
48112470SMatthew.Ahrens@Sun.COM * get bpo_bytes rather than iterating over the bps.
48212470SMatthew.Ahrens@Sun.COM */
48312470SMatthew.Ahrens@Sun.COM if (mintxg < TXG_INITIAL && maxtxg == UINT64_MAX && bpo->bpo_havecomp)
48412470SMatthew.Ahrens@Sun.COM return (bpobj_space(bpo, usedp, compp, uncompp));
48512470SMatthew.Ahrens@Sun.COM
48612470SMatthew.Ahrens@Sun.COM sra.spa = dmu_objset_spa(bpo->bpo_os);
48712470SMatthew.Ahrens@Sun.COM sra.mintxg = mintxg;
48812470SMatthew.Ahrens@Sun.COM sra.maxtxg = maxtxg;
48912470SMatthew.Ahrens@Sun.COM
49012470SMatthew.Ahrens@Sun.COM err = bpobj_iterate_nofree(bpo, space_range_cb, &sra, NULL);
49112470SMatthew.Ahrens@Sun.COM *usedp = sra.used;
49212470SMatthew.Ahrens@Sun.COM *compp = sra.comp;
49312470SMatthew.Ahrens@Sun.COM *uncompp = sra.uncomp;
49412470SMatthew.Ahrens@Sun.COM return (err);
49512470SMatthew.Ahrens@Sun.COM }
496