1240afd8cSMark Johnston /*- 24d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause 3240afd8cSMark Johnston * 4240afd8cSMark Johnston * Copyright (c) 2022 The FreeBSD Foundation 5240afd8cSMark Johnston * 6240afd8cSMark Johnston * This software was developed by Mark Johnston under sponsorship from 7240afd8cSMark Johnston * the FreeBSD Foundation. 8240afd8cSMark Johnston * 9240afd8cSMark Johnston * Redistribution and use in source and binary forms, with or without 10240afd8cSMark Johnston * modification, are permitted provided that the following conditions are 11240afd8cSMark Johnston * met: 12240afd8cSMark Johnston * 1. Redistributions of source code must retain the above copyright 13240afd8cSMark Johnston * notice, this list of conditions and the following disclaimer. 14240afd8cSMark Johnston * 2. Redistributions in binary form must reproduce the above copyright 15240afd8cSMark Johnston * notice, this list of conditions and the following disclaimer in 16240afd8cSMark Johnston * the documentation and/or other materials provided with the distribution. 17240afd8cSMark Johnston * 18240afd8cSMark Johnston * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19240afd8cSMark Johnston * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20240afd8cSMark Johnston * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21240afd8cSMark Johnston * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22240afd8cSMark Johnston * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23240afd8cSMark Johnston * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24240afd8cSMark Johnston * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25240afd8cSMark Johnston * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26240afd8cSMark Johnston * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27240afd8cSMark Johnston * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28240afd8cSMark Johnston * SUCH DAMAGE. 29240afd8cSMark Johnston */ 30240afd8cSMark Johnston 31240afd8cSMark Johnston #include <sys/param.h> 32240afd8cSMark Johnston #include <sys/errno.h> 33240afd8cSMark Johnston #include <sys/queue.h> 34240afd8cSMark Johnston 35240afd8cSMark Johnston #include <assert.h> 36a9e7a44cSMark Johnston #include <ctype.h> 37240afd8cSMark Johnston #include <fcntl.h> 38187084ddSMark Johnston #include <stdalign.h> 39240afd8cSMark Johnston #include <stdbool.h> 40240afd8cSMark Johnston #include <stddef.h> 41240afd8cSMark Johnston #include <stdlib.h> 42240afd8cSMark Johnston #include <string.h> 43240afd8cSMark Johnston #include <unistd.h> 44240afd8cSMark Johnston 45240afd8cSMark Johnston #include <util.h> 46240afd8cSMark Johnston 47240afd8cSMark Johnston #include "makefs.h" 48240afd8cSMark Johnston #include "zfs.h" 49240afd8cSMark Johnston 50240afd8cSMark Johnston #define VDEV_LABEL_SPACE \ 51240afd8cSMark Johnston ((off_t)(VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE)) 52240afd8cSMark Johnston _Static_assert(VDEV_LABEL_SPACE <= MINDEVSIZE, ""); 53240afd8cSMark Johnston 54240afd8cSMark Johnston #define MINMSSIZE ((off_t)1 << 24) /* 16MB */ 55240afd8cSMark Johnston #define DFLTMSSIZE ((off_t)1 << 29) /* 512MB */ 56240afd8cSMark Johnston #define MAXMSSIZE ((off_t)1 << 34) /* 16GB */ 57240afd8cSMark Johnston 58240afd8cSMark Johnston #define INDIR_LEVELS 6 59240afd8cSMark Johnston /* Indirect blocks are always 128KB. */ 60240afd8cSMark Johnston #define BLKPTR_PER_INDIR (MAXBLOCKSIZE / sizeof(blkptr_t)) 61240afd8cSMark Johnston 62240afd8cSMark Johnston struct dnode_cursor { 63240afd8cSMark Johnston char inddir[INDIR_LEVELS][MAXBLOCKSIZE]; 64240afd8cSMark Johnston off_t indloc; 65240afd8cSMark Johnston off_t indspace; 66240afd8cSMark Johnston dnode_phys_t *dnode; 67240afd8cSMark Johnston off_t dataoff; 68240afd8cSMark Johnston off_t datablksz; 69240afd8cSMark Johnston }; 70240afd8cSMark Johnston 71240afd8cSMark Johnston void 72240afd8cSMark Johnston zfs_prep_opts(fsinfo_t *fsopts) 73240afd8cSMark Johnston { 74c4d26f02SMark Johnston zfs_opt_t *zfs; 75187084ddSMark Johnston size_t align; 76187084ddSMark Johnston 77187084ddSMark Johnston align = alignof(uint64_t); 78c4d26f02SMark Johnston zfs = aligned_alloc(align, roundup2(sizeof(*zfs), align)); 79187084ddSMark Johnston if (zfs == NULL) 80187084ddSMark Johnston err(1, "aligned_alloc"); 81187084ddSMark Johnston memset(zfs, 0, sizeof(*zfs)); 82240afd8cSMark Johnston 83240afd8cSMark Johnston const option_t zfs_options[] = { 84240afd8cSMark Johnston { '\0', "bootfs", &zfs->bootfs, OPT_STRPTR, 85240afd8cSMark Johnston 0, 0, "Bootable dataset" }, 86240afd8cSMark Johnston { '\0', "mssize", &zfs->mssize, OPT_INT64, 87240afd8cSMark Johnston MINMSSIZE, MAXMSSIZE, "Metaslab size" }, 88240afd8cSMark Johnston { '\0', "poolname", &zfs->poolname, OPT_STRPTR, 89240afd8cSMark Johnston 0, 0, "ZFS pool name" }, 90240afd8cSMark Johnston { '\0', "rootpath", &zfs->rootpath, OPT_STRPTR, 91240afd8cSMark Johnston 0, 0, "Prefix for all dataset mount points" }, 92240afd8cSMark Johnston { '\0', "ashift", &zfs->ashift, OPT_INT32, 93240afd8cSMark Johnston MINBLOCKSHIFT, MAXBLOCKSHIFT, "ZFS pool ashift" }, 94*4e15366cSMark Johnston { '\0', "verify-txgs", &zfs->verify_txgs, OPT_BOOL, 95*4e15366cSMark Johnston 0, 0, "Make OpenZFS verify data upon import" }, 96240afd8cSMark Johnston { '\0', "nowarn", &zfs->nowarn, OPT_BOOL, 97d9fe7182SMark Johnston 0, 0, "Provided for backwards compatibility, ignored" }, 98240afd8cSMark Johnston { .name = NULL } 99240afd8cSMark Johnston }; 100240afd8cSMark Johnston 101240afd8cSMark Johnston STAILQ_INIT(&zfs->datasetdescs); 102240afd8cSMark Johnston 103240afd8cSMark Johnston fsopts->fs_specific = zfs; 104240afd8cSMark Johnston fsopts->fs_options = copy_opts(zfs_options); 105240afd8cSMark Johnston } 106240afd8cSMark Johnston 107240afd8cSMark Johnston int 108240afd8cSMark Johnston zfs_parse_opts(const char *option, fsinfo_t *fsopts) 109240afd8cSMark Johnston { 110240afd8cSMark Johnston zfs_opt_t *zfs; 111240afd8cSMark Johnston struct dataset_desc *dsdesc; 112240afd8cSMark Johnston char buf[BUFSIZ], *opt, *val; 113240afd8cSMark Johnston int rv; 114240afd8cSMark Johnston 115240afd8cSMark Johnston zfs = fsopts->fs_specific; 116240afd8cSMark Johnston 117240afd8cSMark Johnston opt = val = estrdup(option); 118240afd8cSMark Johnston opt = strsep(&val, "="); 119240afd8cSMark Johnston if (strcmp(opt, "fs") == 0) { 120240afd8cSMark Johnston if (val == NULL) 121240afd8cSMark Johnston errx(1, "invalid filesystem parameters `%s'", option); 122240afd8cSMark Johnston 123240afd8cSMark Johnston /* 124240afd8cSMark Johnston * Dataset descriptions will be parsed later, in dsl_init(). 125240afd8cSMark Johnston * Just stash them away for now. 126240afd8cSMark Johnston */ 127240afd8cSMark Johnston dsdesc = ecalloc(1, sizeof(*dsdesc)); 128240afd8cSMark Johnston dsdesc->params = estrdup(val); 129240afd8cSMark Johnston free(opt); 130240afd8cSMark Johnston STAILQ_INSERT_TAIL(&zfs->datasetdescs, dsdesc, next); 131240afd8cSMark Johnston return (1); 132240afd8cSMark Johnston } 133240afd8cSMark Johnston free(opt); 134240afd8cSMark Johnston 135240afd8cSMark Johnston rv = set_option(fsopts->fs_options, option, buf, sizeof(buf)); 136240afd8cSMark Johnston return (rv == -1 ? 0 : 1); 137240afd8cSMark Johnston } 138240afd8cSMark Johnston 139240afd8cSMark Johnston static void 140240afd8cSMark Johnston zfs_size_vdev(fsinfo_t *fsopts) 141240afd8cSMark Johnston { 142240afd8cSMark Johnston zfs_opt_t *zfs; 143240afd8cSMark Johnston off_t asize, mssize, vdevsize, vdevsize1; 144240afd8cSMark Johnston 145240afd8cSMark Johnston zfs = fsopts->fs_specific; 146240afd8cSMark Johnston 147240afd8cSMark Johnston assert(fsopts->maxsize != 0); 148240afd8cSMark Johnston assert(zfs->ashift != 0); 149240afd8cSMark Johnston 150240afd8cSMark Johnston /* 151240afd8cSMark Johnston * Figure out how big the vdev should be. 152240afd8cSMark Johnston */ 153240afd8cSMark Johnston vdevsize = rounddown2(fsopts->maxsize, 1 << zfs->ashift); 154240afd8cSMark Johnston if (vdevsize < MINDEVSIZE) 155240afd8cSMark Johnston errx(1, "maximum image size is too small"); 156240afd8cSMark Johnston if (vdevsize < fsopts->minsize || vdevsize > fsopts->maxsize) { 157240afd8cSMark Johnston errx(1, "image size bounds must be multiples of %d", 158240afd8cSMark Johnston 1 << zfs->ashift); 159240afd8cSMark Johnston } 160240afd8cSMark Johnston asize = vdevsize - VDEV_LABEL_SPACE; 161240afd8cSMark Johnston 162240afd8cSMark Johnston /* 163240afd8cSMark Johnston * Size metaslabs according to the following heuristic: 164240afd8cSMark Johnston * - provide at least 8 metaslabs, 165240afd8cSMark Johnston * - without using a metaslab size larger than 512MB. 166240afd8cSMark Johnston * This approximates what OpenZFS does without being complicated. In 167240afd8cSMark Johnston * practice we expect pools to be expanded upon first use, and OpenZFS 168240afd8cSMark Johnston * does not resize metaslabs in that case, so there is no right answer 169240afd8cSMark Johnston * here. In general we want to provide large metaslabs even if the 170240afd8cSMark Johnston * image size is small, and 512MB is a reasonable size for pools up to 171240afd8cSMark Johnston * several hundred gigabytes. 172240afd8cSMark Johnston * 173240afd8cSMark Johnston * The user may override this heuristic using the "-o mssize" option. 174240afd8cSMark Johnston */ 175240afd8cSMark Johnston mssize = zfs->mssize; 176240afd8cSMark Johnston if (mssize == 0) { 177240afd8cSMark Johnston mssize = MAX(MIN(asize / 8, DFLTMSSIZE), MINMSSIZE); 178240afd8cSMark Johnston if (!powerof2(mssize)) 179240afd8cSMark Johnston mssize = 1l << (flsll(mssize) - 1); 180240afd8cSMark Johnston } 181240afd8cSMark Johnston if (!powerof2(mssize)) 182240afd8cSMark Johnston errx(1, "metaslab size must be a power of 2"); 183240afd8cSMark Johnston 184240afd8cSMark Johnston /* 185240afd8cSMark Johnston * If we have some slop left over, try to cover it by resizing the vdev, 186240afd8cSMark Johnston * subject to the maxsize and minsize parameters. 187240afd8cSMark Johnston */ 188240afd8cSMark Johnston if (asize % mssize != 0) { 189240afd8cSMark Johnston vdevsize1 = rounddown2(asize, mssize) + VDEV_LABEL_SPACE; 190240afd8cSMark Johnston if (vdevsize1 < fsopts->minsize) 191240afd8cSMark Johnston vdevsize1 = roundup2(asize, mssize) + VDEV_LABEL_SPACE; 192240afd8cSMark Johnston if (vdevsize1 <= fsopts->maxsize) 193240afd8cSMark Johnston vdevsize = vdevsize1; 194240afd8cSMark Johnston } 195240afd8cSMark Johnston asize = vdevsize - VDEV_LABEL_SPACE; 196240afd8cSMark Johnston 197240afd8cSMark Johnston zfs->asize = asize; 198240afd8cSMark Johnston zfs->vdevsize = vdevsize; 199240afd8cSMark Johnston zfs->mssize = mssize; 200240afd8cSMark Johnston zfs->msshift = flsll(mssize) - 1; 201240afd8cSMark Johnston zfs->mscount = asize / mssize; 202240afd8cSMark Johnston } 203240afd8cSMark Johnston 204240afd8cSMark Johnston /* 205240afd8cSMark Johnston * Validate options and set some default values. 206240afd8cSMark Johnston */ 207240afd8cSMark Johnston static void 208240afd8cSMark Johnston zfs_check_opts(fsinfo_t *fsopts) 209240afd8cSMark Johnston { 210240afd8cSMark Johnston zfs_opt_t *zfs; 211240afd8cSMark Johnston 212240afd8cSMark Johnston zfs = fsopts->fs_specific; 213240afd8cSMark Johnston 214240afd8cSMark Johnston if (fsopts->offset != 0) 215240afd8cSMark Johnston errx(1, "unhandled offset option"); 216240afd8cSMark Johnston if (fsopts->maxsize == 0) 217240afd8cSMark Johnston errx(1, "an image size must be specified"); 218240afd8cSMark Johnston 219240afd8cSMark Johnston if (zfs->poolname == NULL) 220240afd8cSMark Johnston errx(1, "a pool name must be specified"); 221a9e7a44cSMark Johnston if (!isalpha(zfs->poolname[0])) 222a9e7a44cSMark Johnston errx(1, "the pool name must begin with a letter"); 223a9e7a44cSMark Johnston for (size_t i = 0, len = strlen(zfs->poolname); i < len; i++) { 224a9e7a44cSMark Johnston if (!isalnum(zfs->poolname[i]) && zfs->poolname[i] != '_') 225a9e7a44cSMark Johnston errx(1, "invalid character '%c' in pool name", 226a9e7a44cSMark Johnston zfs->poolname[i]); 227a9e7a44cSMark Johnston } 228a9e7a44cSMark Johnston if (strcmp(zfs->poolname, "mirror") == 0 || 229a9e7a44cSMark Johnston strcmp(zfs->poolname, "raidz") == 0 || 230a9e7a44cSMark Johnston strcmp(zfs->poolname, "draid") == 0) { 231a9e7a44cSMark Johnston errx(1, "pool name '%s' is reserved and cannot be used", 232a9e7a44cSMark Johnston zfs->poolname); 233a9e7a44cSMark Johnston } 234240afd8cSMark Johnston 235240afd8cSMark Johnston if (zfs->rootpath == NULL) 236240afd8cSMark Johnston easprintf(&zfs->rootpath, "/%s", zfs->poolname); 237240afd8cSMark Johnston if (zfs->rootpath[0] != '/') 238240afd8cSMark Johnston errx(1, "mountpoint `%s' must be absolute", zfs->rootpath); 239240afd8cSMark Johnston 240240afd8cSMark Johnston if (zfs->ashift == 0) 241240afd8cSMark Johnston zfs->ashift = 12; 242240afd8cSMark Johnston 243240afd8cSMark Johnston zfs_size_vdev(fsopts); 244240afd8cSMark Johnston } 245240afd8cSMark Johnston 246240afd8cSMark Johnston void 247240afd8cSMark Johnston zfs_cleanup_opts(fsinfo_t *fsopts) 248240afd8cSMark Johnston { 249240afd8cSMark Johnston struct dataset_desc *d, *tmp; 250240afd8cSMark Johnston zfs_opt_t *zfs; 251240afd8cSMark Johnston 252240afd8cSMark Johnston zfs = fsopts->fs_specific; 253240afd8cSMark Johnston free(zfs->rootpath); 254240afd8cSMark Johnston free(zfs->bootfs); 255240afd8cSMark Johnston free(__DECONST(void *, zfs->poolname)); 256240afd8cSMark Johnston STAILQ_FOREACH_SAFE(d, &zfs->datasetdescs, next, tmp) { 257240afd8cSMark Johnston free(d->params); 258240afd8cSMark Johnston free(d); 259240afd8cSMark Johnston } 260240afd8cSMark Johnston free(zfs); 261240afd8cSMark Johnston free(fsopts->fs_options); 262240afd8cSMark Johnston } 263240afd8cSMark Johnston 264240afd8cSMark Johnston static size_t 265240afd8cSMark Johnston nvlist_size(const nvlist_t *nvl) 266240afd8cSMark Johnston { 267240afd8cSMark Johnston return (sizeof(nvl->nv_header) + nvl->nv_size); 268240afd8cSMark Johnston } 269240afd8cSMark Johnston 270240afd8cSMark Johnston static void 271240afd8cSMark Johnston nvlist_copy(const nvlist_t *nvl, char *buf, size_t sz) 272240afd8cSMark Johnston { 273240afd8cSMark Johnston assert(sz >= nvlist_size(nvl)); 274240afd8cSMark Johnston 275240afd8cSMark Johnston memcpy(buf, &nvl->nv_header, sizeof(nvl->nv_header)); 276240afd8cSMark Johnston memcpy(buf + sizeof(nvl->nv_header), nvl->nv_data, nvl->nv_size); 277240afd8cSMark Johnston } 278240afd8cSMark Johnston 27914c5cf3aSMark Johnston /* 28014c5cf3aSMark Johnston * Avoid returning a GUID of 0, just to avoid the possibility that something 28114c5cf3aSMark Johnston * will interpret that as meaning that the GUID is uninitialized. 28214c5cf3aSMark Johnston */ 28314c5cf3aSMark Johnston uint64_t 28414c5cf3aSMark Johnston randomguid(void) 28514c5cf3aSMark Johnston { 28614c5cf3aSMark Johnston uint64_t ret; 28714c5cf3aSMark Johnston 28814c5cf3aSMark Johnston do { 28914c5cf3aSMark Johnston ret = ((uint64_t)random() << 32) | random(); 29014c5cf3aSMark Johnston } while (ret == 0); 29114c5cf3aSMark Johnston 29214c5cf3aSMark Johnston return (ret); 29314c5cf3aSMark Johnston } 29414c5cf3aSMark Johnston 295240afd8cSMark Johnston static nvlist_t * 296240afd8cSMark Johnston pool_config_nvcreate(zfs_opt_t *zfs) 297240afd8cSMark Johnston { 298240afd8cSMark Johnston nvlist_t *featuresnv, *poolnv; 299240afd8cSMark Johnston 300240afd8cSMark Johnston poolnv = nvlist_create(NV_UNIQUE_NAME); 301240afd8cSMark Johnston nvlist_add_uint64(poolnv, ZPOOL_CONFIG_POOL_TXG, TXG); 302240afd8cSMark Johnston nvlist_add_uint64(poolnv, ZPOOL_CONFIG_VERSION, SPA_VERSION); 303240afd8cSMark Johnston nvlist_add_uint64(poolnv, ZPOOL_CONFIG_POOL_STATE, POOL_STATE_EXPORTED); 304240afd8cSMark Johnston nvlist_add_string(poolnv, ZPOOL_CONFIG_POOL_NAME, zfs->poolname); 305240afd8cSMark Johnston nvlist_add_uint64(poolnv, ZPOOL_CONFIG_POOL_GUID, zfs->poolguid); 306240afd8cSMark Johnston nvlist_add_uint64(poolnv, ZPOOL_CONFIG_TOP_GUID, zfs->vdevguid); 307240afd8cSMark Johnston nvlist_add_uint64(poolnv, ZPOOL_CONFIG_GUID, zfs->vdevguid); 308240afd8cSMark Johnston nvlist_add_uint64(poolnv, ZPOOL_CONFIG_VDEV_CHILDREN, 1); 309240afd8cSMark Johnston 310240afd8cSMark Johnston featuresnv = nvlist_create(NV_UNIQUE_NAME); 311240afd8cSMark Johnston nvlist_add_nvlist(poolnv, ZPOOL_CONFIG_FEATURES_FOR_READ, featuresnv); 312240afd8cSMark Johnston nvlist_destroy(featuresnv); 313240afd8cSMark Johnston 314240afd8cSMark Johnston return (poolnv); 315240afd8cSMark Johnston } 316240afd8cSMark Johnston 317240afd8cSMark Johnston static nvlist_t * 318240afd8cSMark Johnston pool_disk_vdev_config_nvcreate(zfs_opt_t *zfs) 319240afd8cSMark Johnston { 320240afd8cSMark Johnston nvlist_t *diskvdevnv; 321240afd8cSMark Johnston 322240afd8cSMark Johnston assert(zfs->objarrid != 0); 323240afd8cSMark Johnston 324240afd8cSMark Johnston diskvdevnv = nvlist_create(NV_UNIQUE_NAME); 325240afd8cSMark Johnston nvlist_add_string(diskvdevnv, ZPOOL_CONFIG_TYPE, VDEV_TYPE_DISK); 326240afd8cSMark Johnston nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_ASHIFT, zfs->ashift); 327240afd8cSMark Johnston nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_ASIZE, zfs->asize); 328240afd8cSMark Johnston nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_GUID, zfs->vdevguid); 329240afd8cSMark Johnston nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_ID, 0); 330240afd8cSMark Johnston nvlist_add_string(diskvdevnv, ZPOOL_CONFIG_PATH, "/dev/null"); 331240afd8cSMark Johnston nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_WHOLE_DISK, 1); 332240afd8cSMark Johnston nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_CREATE_TXG, TXG); 333240afd8cSMark Johnston nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_METASLAB_ARRAY, 334240afd8cSMark Johnston zfs->objarrid); 335240afd8cSMark Johnston nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_METASLAB_SHIFT, 336240afd8cSMark Johnston zfs->msshift); 337240afd8cSMark Johnston 338240afd8cSMark Johnston return (diskvdevnv); 339240afd8cSMark Johnston } 340240afd8cSMark Johnston 341240afd8cSMark Johnston static nvlist_t * 342240afd8cSMark Johnston pool_root_vdev_config_nvcreate(zfs_opt_t *zfs) 343240afd8cSMark Johnston { 344240afd8cSMark Johnston nvlist_t *diskvdevnv, *rootvdevnv; 345240afd8cSMark Johnston 346240afd8cSMark Johnston diskvdevnv = pool_disk_vdev_config_nvcreate(zfs); 347240afd8cSMark Johnston rootvdevnv = nvlist_create(NV_UNIQUE_NAME); 348240afd8cSMark Johnston 349240afd8cSMark Johnston nvlist_add_uint64(rootvdevnv, ZPOOL_CONFIG_ID, 0); 350240afd8cSMark Johnston nvlist_add_uint64(rootvdevnv, ZPOOL_CONFIG_GUID, zfs->poolguid); 351240afd8cSMark Johnston nvlist_add_string(rootvdevnv, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT); 352240afd8cSMark Johnston nvlist_add_uint64(rootvdevnv, ZPOOL_CONFIG_CREATE_TXG, TXG); 353240afd8cSMark Johnston nvlist_add_nvlist_array(rootvdevnv, ZPOOL_CONFIG_CHILDREN, &diskvdevnv, 354240afd8cSMark Johnston 1); 355240afd8cSMark Johnston nvlist_destroy(diskvdevnv); 356240afd8cSMark Johnston 357240afd8cSMark Johnston return (rootvdevnv); 358240afd8cSMark Johnston } 359240afd8cSMark Johnston 360240afd8cSMark Johnston /* 361240afd8cSMark Johnston * Create the pool's "config" object, which contains an nvlist describing pool 362240afd8cSMark Johnston * parameters and the vdev topology. It is similar but not identical to the 363240afd8cSMark Johnston * nvlist stored in vdev labels. The main difference is that vdev labels do not 364240afd8cSMark Johnston * describe the full vdev tree and in particular do not contain the "root" 365240afd8cSMark Johnston * meta-vdev. 366240afd8cSMark Johnston */ 367240afd8cSMark Johnston static void 368240afd8cSMark Johnston pool_init_objdir_config(zfs_opt_t *zfs, zfs_zap_t *objdir) 369240afd8cSMark Johnston { 370240afd8cSMark Johnston dnode_phys_t *dnode; 371240afd8cSMark Johnston nvlist_t *poolconfig, *vdevconfig; 372240afd8cSMark Johnston void *configbuf; 373240afd8cSMark Johnston uint64_t dnid; 374240afd8cSMark Johnston off_t configloc, configblksz; 375240afd8cSMark Johnston int error; 376240afd8cSMark Johnston 377240afd8cSMark Johnston dnode = objset_dnode_bonus_alloc(zfs->mos, DMU_OT_PACKED_NVLIST, 378240afd8cSMark Johnston DMU_OT_PACKED_NVLIST_SIZE, sizeof(uint64_t), &dnid); 379240afd8cSMark Johnston 380240afd8cSMark Johnston poolconfig = pool_config_nvcreate(zfs); 381240afd8cSMark Johnston 382240afd8cSMark Johnston vdevconfig = pool_root_vdev_config_nvcreate(zfs); 383240afd8cSMark Johnston nvlist_add_nvlist(poolconfig, ZPOOL_CONFIG_VDEV_TREE, vdevconfig); 384240afd8cSMark Johnston nvlist_destroy(vdevconfig); 385240afd8cSMark Johnston 386240afd8cSMark Johnston error = nvlist_export(poolconfig); 387240afd8cSMark Johnston if (error != 0) 388240afd8cSMark Johnston errc(1, error, "nvlist_export"); 389240afd8cSMark Johnston 390240afd8cSMark Johnston configblksz = nvlist_size(poolconfig); 391240afd8cSMark Johnston configloc = objset_space_alloc(zfs, zfs->mos, &configblksz); 392240afd8cSMark Johnston configbuf = ecalloc(1, configblksz); 393240afd8cSMark Johnston nvlist_copy(poolconfig, configbuf, configblksz); 394240afd8cSMark Johnston 395240afd8cSMark Johnston vdev_pwrite_dnode_data(zfs, dnode, configbuf, configblksz, configloc); 396240afd8cSMark Johnston 397240afd8cSMark Johnston dnode->dn_datablkszsec = configblksz >> MINBLOCKSHIFT; 398240afd8cSMark Johnston dnode->dn_flags = DNODE_FLAG_USED_BYTES; 399240afd8cSMark Johnston *(uint64_t *)DN_BONUS(dnode) = nvlist_size(poolconfig); 400240afd8cSMark Johnston 401240afd8cSMark Johnston zap_add_uint64(objdir, DMU_POOL_CONFIG, dnid); 402240afd8cSMark Johnston 403240afd8cSMark Johnston nvlist_destroy(poolconfig); 404240afd8cSMark Johnston free(configbuf); 405240afd8cSMark Johnston } 406240afd8cSMark Johnston 407240afd8cSMark Johnston /* 408240afd8cSMark Johnston * Add objects block pointer list objects, used for deferred frees. We don't do 409240afd8cSMark Johnston * anything with them, but they need to be present or OpenZFS will refuse to 410240afd8cSMark Johnston * import the pool. 411240afd8cSMark Johnston */ 412240afd8cSMark Johnston static void 413240afd8cSMark Johnston pool_init_objdir_bplists(zfs_opt_t *zfs __unused, zfs_zap_t *objdir) 414240afd8cSMark Johnston { 415240afd8cSMark Johnston uint64_t dnid; 416240afd8cSMark Johnston 417240afd8cSMark Johnston (void)objset_dnode_bonus_alloc(zfs->mos, DMU_OT_BPOBJ, DMU_OT_BPOBJ_HDR, 418240afd8cSMark Johnston BPOBJ_SIZE_V2, &dnid); 419240afd8cSMark Johnston zap_add_uint64(objdir, DMU_POOL_FREE_BPOBJ, dnid); 420240afd8cSMark Johnston 421240afd8cSMark Johnston (void)objset_dnode_bonus_alloc(zfs->mos, DMU_OT_BPOBJ, DMU_OT_BPOBJ_HDR, 422240afd8cSMark Johnston BPOBJ_SIZE_V2, &dnid); 423240afd8cSMark Johnston zap_add_uint64(objdir, DMU_POOL_SYNC_BPLIST, dnid); 424240afd8cSMark Johnston } 425240afd8cSMark Johnston 426240afd8cSMark Johnston /* 427240afd8cSMark Johnston * Add required feature metadata objects. We don't know anything about ZFS 428240afd8cSMark Johnston * features, so the objects are just empty ZAPs. 429240afd8cSMark Johnston */ 430240afd8cSMark Johnston static void 431240afd8cSMark Johnston pool_init_objdir_feature_maps(zfs_opt_t *zfs, zfs_zap_t *objdir) 432240afd8cSMark Johnston { 433240afd8cSMark Johnston dnode_phys_t *dnode; 434240afd8cSMark Johnston uint64_t dnid; 435240afd8cSMark Johnston 436240afd8cSMark Johnston dnode = objset_dnode_alloc(zfs->mos, DMU_OTN_ZAP_METADATA, &dnid); 437240afd8cSMark Johnston zap_add_uint64(objdir, DMU_POOL_FEATURES_FOR_READ, dnid); 438240afd8cSMark Johnston zap_write(zfs, zap_alloc(zfs->mos, dnode)); 439240afd8cSMark Johnston 440240afd8cSMark Johnston dnode = objset_dnode_alloc(zfs->mos, DMU_OTN_ZAP_METADATA, &dnid); 441240afd8cSMark Johnston zap_add_uint64(objdir, DMU_POOL_FEATURES_FOR_WRITE, dnid); 442240afd8cSMark Johnston zap_write(zfs, zap_alloc(zfs->mos, dnode)); 443240afd8cSMark Johnston 444240afd8cSMark Johnston dnode = objset_dnode_alloc(zfs->mos, DMU_OTN_ZAP_METADATA, &dnid); 445240afd8cSMark Johnston zap_add_uint64(objdir, DMU_POOL_FEATURE_DESCRIPTIONS, dnid); 446240afd8cSMark Johnston zap_write(zfs, zap_alloc(zfs->mos, dnode)); 447240afd8cSMark Johnston } 448240afd8cSMark Johnston 449240afd8cSMark Johnston static void 450240afd8cSMark Johnston pool_init_objdir_dsl(zfs_opt_t *zfs, zfs_zap_t *objdir) 451240afd8cSMark Johnston { 452240afd8cSMark Johnston zap_add_uint64(objdir, DMU_POOL_ROOT_DATASET, 453240afd8cSMark Johnston dsl_dir_id(zfs->rootdsldir)); 454240afd8cSMark Johnston } 455240afd8cSMark Johnston 456240afd8cSMark Johnston static void 457240afd8cSMark Johnston pool_init_objdir_poolprops(zfs_opt_t *zfs, zfs_zap_t *objdir) 458240afd8cSMark Johnston { 459240afd8cSMark Johnston dnode_phys_t *dnode; 460240afd8cSMark Johnston uint64_t id; 461240afd8cSMark Johnston 462240afd8cSMark Johnston dnode = objset_dnode_alloc(zfs->mos, DMU_OT_POOL_PROPS, &id); 463240afd8cSMark Johnston zap_add_uint64(objdir, DMU_POOL_PROPS, id); 464240afd8cSMark Johnston 465240afd8cSMark Johnston zfs->poolprops = zap_alloc(zfs->mos, dnode); 466240afd8cSMark Johnston } 467240afd8cSMark Johnston 468240afd8cSMark Johnston /* 469240afd8cSMark Johnston * Initialize the MOS object directory, the root of virtually all of the pool's 470240afd8cSMark Johnston * data and metadata. 471240afd8cSMark Johnston */ 472240afd8cSMark Johnston static void 473240afd8cSMark Johnston pool_init_objdir(zfs_opt_t *zfs) 474240afd8cSMark Johnston { 475240afd8cSMark Johnston zfs_zap_t *zap; 476240afd8cSMark Johnston dnode_phys_t *objdir; 477240afd8cSMark Johnston 478240afd8cSMark Johnston objdir = objset_dnode_lookup(zfs->mos, DMU_POOL_DIRECTORY_OBJECT); 479240afd8cSMark Johnston 480240afd8cSMark Johnston zap = zap_alloc(zfs->mos, objdir); 481240afd8cSMark Johnston pool_init_objdir_config(zfs, zap); 482240afd8cSMark Johnston pool_init_objdir_bplists(zfs, zap); 483240afd8cSMark Johnston pool_init_objdir_feature_maps(zfs, zap); 484240afd8cSMark Johnston pool_init_objdir_dsl(zfs, zap); 485240afd8cSMark Johnston pool_init_objdir_poolprops(zfs, zap); 486240afd8cSMark Johnston zap_write(zfs, zap); 487240afd8cSMark Johnston } 488240afd8cSMark Johnston 489240afd8cSMark Johnston /* 490240afd8cSMark Johnston * Initialize the meta-object set (MOS) and immediately write out several 491240afd8cSMark Johnston * special objects whose contents are already finalized, including the object 492240afd8cSMark Johnston * directory. 493240afd8cSMark Johnston * 494240afd8cSMark Johnston * Once the MOS is finalized, it'll look roughly like this: 495240afd8cSMark Johnston * 496240afd8cSMark Johnston * object directory (ZAP) 497240afd8cSMark Johnston * |-> vdev config object (nvlist) 498240afd8cSMark Johnston * |-> features for read 499240afd8cSMark Johnston * |-> features for write 500240afd8cSMark Johnston * |-> feature descriptions 501240afd8cSMark Johnston * |-> sync bplist 502240afd8cSMark Johnston * |-> free bplist 503240afd8cSMark Johnston * |-> pool properties 504240afd8cSMark Johnston * L-> root DSL directory 505240afd8cSMark Johnston * |-> DSL child directory (ZAP) 506240afd8cSMark Johnston * | |-> $MOS (DSL dir) 507240afd8cSMark Johnston * | | |-> child map 508240afd8cSMark Johnston * | | L-> props (ZAP) 509240afd8cSMark Johnston * | |-> $FREE (DSL dir) 510240afd8cSMark Johnston * | | |-> child map 511240afd8cSMark Johnston * | | L-> props (ZAP) 512240afd8cSMark Johnston * | |-> $ORIGIN (DSL dir) 513240afd8cSMark Johnston * | | |-> child map 514240afd8cSMark Johnston * | | |-> dataset 515240afd8cSMark Johnston * | | | L-> deadlist 516240afd8cSMark Johnston * | | |-> snapshot 517240afd8cSMark Johnston * | | | |-> deadlist 518240afd8cSMark Johnston * | | | L-> snapshot names 519240afd8cSMark Johnston * | | |-> props (ZAP) 520240afd8cSMark Johnston * | | L-> clones (ZAP) 521240afd8cSMark Johnston * | |-> dataset 1 (DSL dir) 522240afd8cSMark Johnston * | | |-> DSL dataset 523240afd8cSMark Johnston * | | | |-> snapshot names 524240afd8cSMark Johnston * | | | L-> deadlist 525240afd8cSMark Johnston * | | |-> child map 526240afd8cSMark Johnston * | | | L-> ... 527240afd8cSMark Johnston * | | L-> props 528240afd8cSMark Johnston * | |-> dataset 2 529240afd8cSMark Johnston * | | L-> ... 530240afd8cSMark Johnston * | |-> ... 531240afd8cSMark Johnston * | L-> dataset n 532240afd8cSMark Johnston * |-> DSL root dataset 533240afd8cSMark Johnston * | |-> snapshot names 534240afd8cSMark Johnston * | L-> deadlist 535240afd8cSMark Johnston * L-> props (ZAP) 536240afd8cSMark Johnston * space map object array 537240afd8cSMark Johnston * |-> space map 1 538240afd8cSMark Johnston * |-> space map 2 539240afd8cSMark Johnston * |-> ... 540240afd8cSMark Johnston * L-> space map n (zfs->mscount) 541240afd8cSMark Johnston * 542240afd8cSMark Johnston * The space map object array is pointed to by the "msarray" property in the 543240afd8cSMark Johnston * pool configuration. 544240afd8cSMark Johnston */ 545240afd8cSMark Johnston static void 546240afd8cSMark Johnston pool_init(zfs_opt_t *zfs) 547240afd8cSMark Johnston { 548240afd8cSMark Johnston uint64_t dnid; 549240afd8cSMark Johnston 55014c5cf3aSMark Johnston zfs->poolguid = randomguid(); 55114c5cf3aSMark Johnston zfs->vdevguid = randomguid(); 552240afd8cSMark Johnston 553240afd8cSMark Johnston zfs->mos = objset_alloc(zfs, DMU_OST_META); 554240afd8cSMark Johnston 555240afd8cSMark Johnston (void)objset_dnode_alloc(zfs->mos, DMU_OT_OBJECT_DIRECTORY, &dnid); 556240afd8cSMark Johnston assert(dnid == DMU_POOL_DIRECTORY_OBJECT); 557240afd8cSMark Johnston 558240afd8cSMark Johnston (void)objset_dnode_alloc(zfs->mos, DMU_OT_OBJECT_ARRAY, &zfs->objarrid); 559240afd8cSMark Johnston 560240afd8cSMark Johnston dsl_init(zfs); 561240afd8cSMark Johnston 562240afd8cSMark Johnston pool_init_objdir(zfs); 563240afd8cSMark Johnston } 564240afd8cSMark Johnston 565240afd8cSMark Johnston static void 566240afd8cSMark Johnston pool_labels_write(zfs_opt_t *zfs) 567240afd8cSMark Johnston { 568240afd8cSMark Johnston uberblock_t *ub; 569240afd8cSMark Johnston vdev_label_t *label; 570240afd8cSMark Johnston nvlist_t *poolconfig, *vdevconfig; 571240afd8cSMark Johnston int error; 572240afd8cSMark Johnston 573240afd8cSMark Johnston label = ecalloc(1, sizeof(*label)); 574240afd8cSMark Johnston 575240afd8cSMark Johnston /* 576240afd8cSMark Johnston * Assemble the vdev configuration and store it in the label. 577240afd8cSMark Johnston */ 578240afd8cSMark Johnston poolconfig = pool_config_nvcreate(zfs); 579240afd8cSMark Johnston vdevconfig = pool_disk_vdev_config_nvcreate(zfs); 580240afd8cSMark Johnston nvlist_add_nvlist(poolconfig, ZPOOL_CONFIG_VDEV_TREE, vdevconfig); 581240afd8cSMark Johnston nvlist_destroy(vdevconfig); 582240afd8cSMark Johnston 583240afd8cSMark Johnston error = nvlist_export(poolconfig); 584240afd8cSMark Johnston if (error != 0) 585240afd8cSMark Johnston errc(1, error, "nvlist_export"); 586240afd8cSMark Johnston nvlist_copy(poolconfig, label->vl_vdev_phys.vp_nvlist, 587240afd8cSMark Johnston sizeof(label->vl_vdev_phys.vp_nvlist)); 588240afd8cSMark Johnston nvlist_destroy(poolconfig); 589240afd8cSMark Johnston 590240afd8cSMark Johnston /* 591240afd8cSMark Johnston * Fill out the uberblock. Just make each one the same. The embedded 592240afd8cSMark Johnston * checksum is calculated in vdev_label_write(). 593240afd8cSMark Johnston */ 594240afd8cSMark Johnston for (size_t uoff = 0; uoff < sizeof(label->vl_uberblock); 595240afd8cSMark Johnston uoff += (1 << zfs->ashift)) { 596240afd8cSMark Johnston ub = (uberblock_t *)(&label->vl_uberblock[0] + uoff); 597240afd8cSMark Johnston ub->ub_magic = UBERBLOCK_MAGIC; 598240afd8cSMark Johnston ub->ub_version = SPA_VERSION; 599*4e15366cSMark Johnston 600*4e15366cSMark Johnston /* 601*4e15366cSMark Johnston * Upon import, OpenZFS will perform metadata verification of 602*4e15366cSMark Johnston * the last TXG by default. If all data is written in the same 603*4e15366cSMark Johnston * TXG, it'll all get verified, which can be painfully slow in 604*4e15366cSMark Johnston * some cases, e.g., initial boot in a cloud environment with 605*4e15366cSMark Johnston * slow storage. So, fabricate additional TXGs to avoid this 606*4e15366cSMark Johnston * overhead, unless the user requests otherwise. 607*4e15366cSMark Johnston */ 608240afd8cSMark Johnston ub->ub_txg = TXG; 609*4e15366cSMark Johnston if (!zfs->verify_txgs) 610*4e15366cSMark Johnston ub->ub_txg += TXG_SIZE; 611240afd8cSMark Johnston ub->ub_guid_sum = zfs->poolguid + zfs->vdevguid; 612240afd8cSMark Johnston ub->ub_timestamp = 0; 613240afd8cSMark Johnston 614240afd8cSMark Johnston ub->ub_software_version = SPA_VERSION; 615240afd8cSMark Johnston ub->ub_mmp_magic = MMP_MAGIC; 616240afd8cSMark Johnston ub->ub_mmp_delay = 0; 617240afd8cSMark Johnston ub->ub_mmp_config = 0; 618240afd8cSMark Johnston ub->ub_checkpoint_txg = 0; 619240afd8cSMark Johnston objset_root_blkptr_copy(zfs->mos, &ub->ub_rootbp); 620240afd8cSMark Johnston } 621240afd8cSMark Johnston 622240afd8cSMark Johnston /* 623240afd8cSMark Johnston * Write out four copies of the label: two at the beginning of the vdev 624240afd8cSMark Johnston * and two at the end. 625240afd8cSMark Johnston */ 626240afd8cSMark Johnston for (int i = 0; i < VDEV_LABELS; i++) 627240afd8cSMark Johnston vdev_label_write(zfs, i, label); 628240afd8cSMark Johnston 629240afd8cSMark Johnston free(label); 630240afd8cSMark Johnston } 631240afd8cSMark Johnston 632240afd8cSMark Johnston static void 633240afd8cSMark Johnston pool_fini(zfs_opt_t *zfs) 634240afd8cSMark Johnston { 635240afd8cSMark Johnston zap_write(zfs, zfs->poolprops); 636240afd8cSMark Johnston dsl_write(zfs); 637240afd8cSMark Johnston objset_write(zfs, zfs->mos); 638240afd8cSMark Johnston pool_labels_write(zfs); 639240afd8cSMark Johnston } 640240afd8cSMark Johnston 641240afd8cSMark Johnston struct dnode_cursor * 642240afd8cSMark Johnston dnode_cursor_init(zfs_opt_t *zfs, zfs_objset_t *os, dnode_phys_t *dnode, 643240afd8cSMark Johnston off_t size, off_t blksz) 644240afd8cSMark Johnston { 645240afd8cSMark Johnston struct dnode_cursor *c; 646240afd8cSMark Johnston uint64_t nbppindir, indlevel, ndatablks, nindblks; 647240afd8cSMark Johnston 648240afd8cSMark Johnston assert(dnode->dn_nblkptr == 1); 649240afd8cSMark Johnston assert(blksz <= MAXBLOCKSIZE); 650240afd8cSMark Johnston 651240afd8cSMark Johnston if (blksz == 0) { 652240afd8cSMark Johnston /* Must be between 1<<ashift and 128KB. */ 653240afd8cSMark Johnston blksz = MIN(MAXBLOCKSIZE, MAX(1 << zfs->ashift, 6549821e244SJohn Baldwin powerof2(size) ? size : (1l << flsll(size)))); 655240afd8cSMark Johnston } 656240afd8cSMark Johnston assert(powerof2(blksz)); 657240afd8cSMark Johnston 658240afd8cSMark Johnston /* 659240afd8cSMark Johnston * Do we need indirect blocks? Figure out how many levels are needed 660240afd8cSMark Johnston * (indlevel == 1 means no indirect blocks) and how much space is needed 661240afd8cSMark Johnston * (it has to be allocated up-front to break the dependency cycle 662240afd8cSMark Johnston * described in objset_write()). 663240afd8cSMark Johnston */ 664240afd8cSMark Johnston ndatablks = size == 0 ? 0 : howmany(size, blksz); 665240afd8cSMark Johnston nindblks = 0; 666240afd8cSMark Johnston for (indlevel = 1, nbppindir = 1; ndatablks > nbppindir; indlevel++) { 667240afd8cSMark Johnston nbppindir *= BLKPTR_PER_INDIR; 668240afd8cSMark Johnston nindblks += howmany(ndatablks, indlevel * nbppindir); 669240afd8cSMark Johnston } 670240afd8cSMark Johnston assert(indlevel < INDIR_LEVELS); 671240afd8cSMark Johnston 672240afd8cSMark Johnston dnode->dn_nlevels = (uint8_t)indlevel; 673240afd8cSMark Johnston dnode->dn_maxblkid = ndatablks > 0 ? ndatablks - 1 : 0; 674240afd8cSMark Johnston dnode->dn_datablkszsec = blksz >> MINBLOCKSHIFT; 675240afd8cSMark Johnston 676240afd8cSMark Johnston c = ecalloc(1, sizeof(*c)); 677240afd8cSMark Johnston if (nindblks > 0) { 678240afd8cSMark Johnston c->indspace = nindblks * MAXBLOCKSIZE; 679240afd8cSMark Johnston c->indloc = objset_space_alloc(zfs, os, &c->indspace); 680240afd8cSMark Johnston } 681240afd8cSMark Johnston c->dnode = dnode; 682240afd8cSMark Johnston c->dataoff = 0; 683240afd8cSMark Johnston c->datablksz = blksz; 684240afd8cSMark Johnston 685240afd8cSMark Johnston return (c); 686240afd8cSMark Johnston } 687240afd8cSMark Johnston 688240afd8cSMark Johnston static void 689b5a2bf51SMark Johnston _dnode_cursor_flush(zfs_opt_t *zfs, struct dnode_cursor *c, unsigned int levels) 690240afd8cSMark Johnston { 691240afd8cSMark Johnston blkptr_t *bp, *pbp; 692240afd8cSMark Johnston void *buf; 693240afd8cSMark Johnston uint64_t fill; 694240afd8cSMark Johnston off_t blkid, blksz, loc; 695240afd8cSMark Johnston 696240afd8cSMark Johnston assert(levels > 0); 6978a77bc5eSDimitry Andric assert(levels <= c->dnode->dn_nlevels - 1U); 698240afd8cSMark Johnston 699240afd8cSMark Johnston blksz = MAXBLOCKSIZE; 700240afd8cSMark Johnston blkid = (c->dataoff / c->datablksz) / BLKPTR_PER_INDIR; 701b5a2bf51SMark Johnston for (unsigned int level = 1; level <= levels; level++) { 702240afd8cSMark Johnston buf = c->inddir[level - 1]; 703240afd8cSMark Johnston 7048a77bc5eSDimitry Andric if (level == c->dnode->dn_nlevels - 1U) { 705240afd8cSMark Johnston pbp = &c->dnode->dn_blkptr[0]; 706240afd8cSMark Johnston } else { 707240afd8cSMark Johnston uint64_t iblkid; 708240afd8cSMark Johnston 709240afd8cSMark Johnston iblkid = blkid & (BLKPTR_PER_INDIR - 1); 710240afd8cSMark Johnston pbp = (blkptr_t *) 711240afd8cSMark Johnston &c->inddir[level][iblkid * sizeof(blkptr_t)]; 712240afd8cSMark Johnston } 713240afd8cSMark Johnston 714240afd8cSMark Johnston /* 715240afd8cSMark Johnston * Space for indirect blocks is allocated up-front; see the 716240afd8cSMark Johnston * comment in objset_write(). 717240afd8cSMark Johnston */ 718240afd8cSMark Johnston loc = c->indloc; 719240afd8cSMark Johnston c->indloc += blksz; 720240afd8cSMark Johnston assert(c->indspace >= blksz); 721240afd8cSMark Johnston c->indspace -= blksz; 722240afd8cSMark Johnston 723240afd8cSMark Johnston bp = buf; 724240afd8cSMark Johnston fill = 0; 725240afd8cSMark Johnston for (size_t i = 0; i < BLKPTR_PER_INDIR; i++) 726240afd8cSMark Johnston fill += BP_GET_FILL(&bp[i]); 727240afd8cSMark Johnston 728240afd8cSMark Johnston vdev_pwrite_dnode_indir(zfs, c->dnode, level, fill, buf, blksz, 729240afd8cSMark Johnston loc, pbp); 730240afd8cSMark Johnston memset(buf, 0, MAXBLOCKSIZE); 731240afd8cSMark Johnston 732240afd8cSMark Johnston blkid /= BLKPTR_PER_INDIR; 733240afd8cSMark Johnston } 734240afd8cSMark Johnston } 735240afd8cSMark Johnston 736240afd8cSMark Johnston blkptr_t * 737240afd8cSMark Johnston dnode_cursor_next(zfs_opt_t *zfs, struct dnode_cursor *c, off_t off) 738240afd8cSMark Johnston { 739240afd8cSMark Johnston off_t blkid, l1id; 740b5a2bf51SMark Johnston unsigned int levels; 741240afd8cSMark Johnston 742240afd8cSMark Johnston if (c->dnode->dn_nlevels == 1) { 743240afd8cSMark Johnston assert(off < MAXBLOCKSIZE); 744240afd8cSMark Johnston return (&c->dnode->dn_blkptr[0]); 745240afd8cSMark Johnston } 746240afd8cSMark Johnston 747240afd8cSMark Johnston assert(off % c->datablksz == 0); 748240afd8cSMark Johnston 749240afd8cSMark Johnston /* Do we need to flush any full indirect blocks? */ 750240afd8cSMark Johnston if (off > 0) { 751240afd8cSMark Johnston blkid = off / c->datablksz; 7528a77bc5eSDimitry Andric for (levels = 0; levels < c->dnode->dn_nlevels - 1U; levels++) { 753240afd8cSMark Johnston if (blkid % BLKPTR_PER_INDIR != 0) 754240afd8cSMark Johnston break; 755240afd8cSMark Johnston blkid /= BLKPTR_PER_INDIR; 756240afd8cSMark Johnston } 757240afd8cSMark Johnston if (levels > 0) 758240afd8cSMark Johnston _dnode_cursor_flush(zfs, c, levels); 759240afd8cSMark Johnston } 760240afd8cSMark Johnston 761240afd8cSMark Johnston c->dataoff = off; 762240afd8cSMark Johnston l1id = (off / c->datablksz) & (BLKPTR_PER_INDIR - 1); 763240afd8cSMark Johnston return ((blkptr_t *)&c->inddir[0][l1id * sizeof(blkptr_t)]); 764240afd8cSMark Johnston } 765240afd8cSMark Johnston 766240afd8cSMark Johnston void 767240afd8cSMark Johnston dnode_cursor_finish(zfs_opt_t *zfs, struct dnode_cursor *c) 768240afd8cSMark Johnston { 769b5a2bf51SMark Johnston unsigned int levels; 770240afd8cSMark Johnston 771b5a2bf51SMark Johnston assert(c->dnode->dn_nlevels > 0); 772240afd8cSMark Johnston levels = c->dnode->dn_nlevels - 1; 773240afd8cSMark Johnston if (levels > 0) 774240afd8cSMark Johnston _dnode_cursor_flush(zfs, c, levels); 775240afd8cSMark Johnston assert(c->indspace == 0); 776240afd8cSMark Johnston free(c); 777240afd8cSMark Johnston } 778240afd8cSMark Johnston 779240afd8cSMark Johnston void 780240afd8cSMark Johnston zfs_makefs(const char *image, const char *dir, fsnode *root, fsinfo_t *fsopts) 781240afd8cSMark Johnston { 782240afd8cSMark Johnston zfs_opt_t *zfs; 783240afd8cSMark Johnston int dirfd; 784240afd8cSMark Johnston 785240afd8cSMark Johnston zfs = fsopts->fs_specific; 786240afd8cSMark Johnston 787240afd8cSMark Johnston /* 788240afd8cSMark Johnston * Use a fixed seed to provide reproducible pseudo-random numbers for 789240afd8cSMark Johnston * on-disk structures when needed (e.g., GUIDs, ZAP hash salts). 790240afd8cSMark Johnston */ 791240afd8cSMark Johnston srandom(1729); 792240afd8cSMark Johnston 793240afd8cSMark Johnston zfs_check_opts(fsopts); 794240afd8cSMark Johnston 795240afd8cSMark Johnston dirfd = open(dir, O_DIRECTORY | O_RDONLY); 796240afd8cSMark Johnston if (dirfd < 0) 797240afd8cSMark Johnston err(1, "open(%s)", dir); 798240afd8cSMark Johnston 799240afd8cSMark Johnston vdev_init(zfs, image); 800240afd8cSMark Johnston pool_init(zfs); 801240afd8cSMark Johnston fs_build(zfs, dirfd, root); 802240afd8cSMark Johnston pool_fini(zfs); 803240afd8cSMark Johnston vdev_fini(zfs); 804240afd8cSMark Johnston } 805