1789Sahrens /*
2789Sahrens * CDDL HEADER START
3789Sahrens *
4789Sahrens * The contents of this file are subject to the terms of the
51544Seschrock * Common Development and Distribution License (the "License").
61544Seschrock * You may not use this file except in compliance with the License.
7789Sahrens *
8789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9789Sahrens * or http://www.opensolaris.org/os/licensing.
10789Sahrens * See the License for the specific language governing permissions
11789Sahrens * and limitations under the License.
12789Sahrens *
13789Sahrens * When distributing Covered Code, include this CDDL HEADER in each
14789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15789Sahrens * If applicable, add the following below this CDDL HEADER, with the
16789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying
17789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner]
18789Sahrens *
19789Sahrens * CDDL HEADER END
20789Sahrens */
212082Seschrock
22789Sahrens /*
23*12296SLin.Ling@Sun.COM * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24789Sahrens */
25789Sahrens
26789Sahrens /*
27789Sahrens * Functions to convert between a list of vdevs and an nvlist representing the
28789Sahrens * configuration. Each entry in the list can be one of:
29789Sahrens *
30789Sahrens * Device vdevs
31789Sahrens * disk=(path=..., devid=...)
32789Sahrens * file=(path=...)
33789Sahrens *
34789Sahrens * Group vdevs
352082Seschrock * raidz[1|2]=(...)
36789Sahrens * mirror=(...)
37789Sahrens *
382082Seschrock * Hot spares
392082Seschrock *
40789Sahrens * While the underlying implementation supports it, group vdevs cannot contain
41789Sahrens * other group vdevs. All userland verification of devices is contained within
42789Sahrens * this file. If successful, the nvlist returned can be passed directly to the
43789Sahrens * kernel; we've done as much verification as possible in userland.
44789Sahrens *
452082Seschrock * Hot spares are a special case, and passed down as an array of disk vdevs, at
462082Seschrock * the same level as the root of the vdev tree.
472082Seschrock *
484276Staylor * The only function exported by this file is 'make_root_vdev'. The
494276Staylor * function performs several passes:
50789Sahrens *
51789Sahrens * 1. Construct the vdev specification. Performs syntax validation and
52789Sahrens * makes sure each device is valid.
53789Sahrens * 2. Check for devices in use. Using libdiskmgt, makes sure that no
54789Sahrens * devices are also in use. Some can be overridden using the 'force'
55789Sahrens * flag, others cannot.
56789Sahrens * 3. Check for replication errors if the 'force' flag is not specified.
57789Sahrens * validates that the replication level is consistent across the
58789Sahrens * entire pool.
594276Staylor * 4. Call libzfs to label any whole disks with an EFI label.
60789Sahrens */
61789Sahrens
62789Sahrens #include <assert.h>
63789Sahrens #include <devid.h>
64789Sahrens #include <errno.h>
65789Sahrens #include <fcntl.h>
66789Sahrens #include <libdiskmgt.h>
67789Sahrens #include <libintl.h>
68789Sahrens #include <libnvpair.h>
6910105Sadam.leventhal@sun.com #include <limits.h>
70789Sahrens #include <stdio.h>
71789Sahrens #include <string.h>
72789Sahrens #include <unistd.h>
73789Sahrens #include <sys/efi_partition.h>
74789Sahrens #include <sys/stat.h>
75789Sahrens #include <sys/vtoc.h>
76789Sahrens #include <sys/mntent.h>
77789Sahrens
78789Sahrens #include "zpool_util.h"
79789Sahrens
80789Sahrens #define DISK_ROOT "/dev/dsk"
81789Sahrens #define RDISK_ROOT "/dev/rdsk"
82789Sahrens #define BACKUP_SLICE "s2"
83789Sahrens
84789Sahrens /*
85789Sahrens * For any given vdev specification, we can have multiple errors. The
86789Sahrens * vdev_error() function keeps track of whether we have seen an error yet, and
87789Sahrens * prints out a header if its the first error we've seen.
88789Sahrens */
892082Seschrock boolean_t error_seen;
902082Seschrock boolean_t is_force;
91789Sahrens
922082Seschrock /*PRINTFLIKE1*/
932082Seschrock static void
vdev_error(const char * fmt,...)94789Sahrens vdev_error(const char *fmt, ...)
95789Sahrens {
96789Sahrens va_list ap;
97789Sahrens
98789Sahrens if (!error_seen) {
99789Sahrens (void) fprintf(stderr, gettext("invalid vdev specification\n"));
100789Sahrens if (!is_force)
101789Sahrens (void) fprintf(stderr, gettext("use '-f' to override "
102789Sahrens "the following errors:\n"));
103789Sahrens else
104789Sahrens (void) fprintf(stderr, gettext("the following errors "
105789Sahrens "must be manually repaired:\n"));
1062082Seschrock error_seen = B_TRUE;
107789Sahrens }
108789Sahrens
109789Sahrens va_start(ap, fmt);
110789Sahrens (void) vfprintf(stderr, fmt, ap);
111789Sahrens va_end(ap);
112789Sahrens }
113789Sahrens
1141352Seschrock static void
libdiskmgt_error(int error)1151352Seschrock libdiskmgt_error(int error)
116789Sahrens {
1171544Seschrock /*
1182082Seschrock * ENXIO/ENODEV is a valid error message if the device doesn't live in
1191544Seschrock * /dev/dsk. Don't bother printing an error message in this case.
1201544Seschrock */
1212082Seschrock if (error == ENXIO || error == ENODEV)
1221544Seschrock return;
1231544Seschrock
1241352Seschrock (void) fprintf(stderr, gettext("warning: device in use checking "
1251352Seschrock "failed: %s\n"), strerror(error));
126789Sahrens }
127789Sahrens
128789Sahrens /*
1291352Seschrock * Validate a device, passing the bulk of the work off to libdiskmgt.
130789Sahrens */
1314276Staylor static int
check_slice(const char * path,int force,boolean_t wholedisk,boolean_t isspare)1322082Seschrock check_slice(const char *path, int force, boolean_t wholedisk, boolean_t isspare)
133789Sahrens {
1341352Seschrock char *msg;
1351352Seschrock int error = 0;
1364946Smmusante dm_who_type_t who;
137789Sahrens
1384946Smmusante if (force)
1394946Smmusante who = DM_WHO_ZPOOL_FORCE;
1404946Smmusante else if (isspare)
1414946Smmusante who = DM_WHO_ZPOOL_SPARE;
1424946Smmusante else
1434946Smmusante who = DM_WHO_ZPOOL;
1444946Smmusante
1454946Smmusante if (dm_inuse((char *)path, &msg, who, &error) || error) {
1461352Seschrock if (error != 0) {
1471352Seschrock libdiskmgt_error(error);
1481352Seschrock return (0);
1493741Smmusante } else {
1501352Seschrock vdev_error("%s", msg);
1511352Seschrock free(msg);
1524082Smmusante return (-1);
153789Sahrens }
154789Sahrens }
155789Sahrens
156789Sahrens /*
1571352Seschrock * If we're given a whole disk, ignore overlapping slices since we're
1581352Seschrock * about to label it anyway.
159789Sahrens */
1601352Seschrock error = 0;
1611352Seschrock if (!wholedisk && !force &&
1621352Seschrock (dm_isoverlapping((char *)path, &msg, &error) || error)) {
1634082Smmusante if (error == 0) {
1644082Smmusante /* dm_isoverlapping returned -1 */
1654082Smmusante vdev_error(gettext("%s overlaps with %s\n"), path, msg);
1664082Smmusante free(msg);
1674082Smmusante return (-1);
1684082Smmusante } else if (error != ENODEV) {
1694082Smmusante /* libdiskmgt's devcache only handles physical drives */
1701352Seschrock libdiskmgt_error(error);
1711352Seschrock return (0);
1721352Seschrock }
173789Sahrens }
174789Sahrens
1754082Smmusante return (0);
176789Sahrens }
177789Sahrens
1784276Staylor
179789Sahrens /*
180789Sahrens * Validate a whole disk. Iterate over all slices on the disk and make sure
181789Sahrens * that none is in use by calling check_slice().
182789Sahrens */
1834276Staylor static int
check_disk(const char * name,dm_descriptor_t disk,int force,int isspare)1842082Seschrock check_disk(const char *name, dm_descriptor_t disk, int force, int isspare)
185789Sahrens {
186789Sahrens dm_descriptor_t *drive, *media, *slice;
187789Sahrens int err = 0;
188789Sahrens int i;
189789Sahrens int ret;
190789Sahrens
191789Sahrens /*
192789Sahrens * Get the drive associated with this disk. This should never fail,
193789Sahrens * because we already have an alias handle open for the device.
194789Sahrens */
195789Sahrens if ((drive = dm_get_associated_descriptors(disk, DM_DRIVE,
1961352Seschrock &err)) == NULL || *drive == NULL) {
1971352Seschrock if (err)
1981352Seschrock libdiskmgt_error(err);
1991352Seschrock return (0);
2001352Seschrock }
201789Sahrens
202789Sahrens if ((media = dm_get_associated_descriptors(*drive, DM_MEDIA,
2031352Seschrock &err)) == NULL) {
2041352Seschrock dm_free_descriptors(drive);
2051352Seschrock if (err)
2061352Seschrock libdiskmgt_error(err);
2071352Seschrock return (0);
2081352Seschrock }
209789Sahrens
210789Sahrens dm_free_descriptors(drive);
211789Sahrens
212789Sahrens /*
213789Sahrens * It is possible that the user has specified a removable media drive,
214789Sahrens * and the media is not present.
215789Sahrens */
216789Sahrens if (*media == NULL) {
2171352Seschrock dm_free_descriptors(media);
218789Sahrens vdev_error(gettext("'%s' has no media in drive\n"), name);
219789Sahrens return (-1);
220789Sahrens }
221789Sahrens
222789Sahrens if ((slice = dm_get_associated_descriptors(*media, DM_SLICE,
2231352Seschrock &err)) == NULL) {
2241352Seschrock dm_free_descriptors(media);
2251352Seschrock if (err)
2261352Seschrock libdiskmgt_error(err);
2271352Seschrock return (0);
2281352Seschrock }
229789Sahrens
230789Sahrens dm_free_descriptors(media);
231789Sahrens
232789Sahrens ret = 0;
233789Sahrens
234789Sahrens /*
235789Sahrens * Iterate over all slices and report any errors. We don't care about
236789Sahrens * overlapping slices because we are using the whole disk.
237789Sahrens */
238789Sahrens for (i = 0; slice[i] != NULL; i++) {
2392082Seschrock char *name = dm_get_name(slice[i], &err);
2402082Seschrock
2412082Seschrock if (check_slice(name, force, B_TRUE, isspare) != 0)
242789Sahrens ret = -1;
2432082Seschrock
2442082Seschrock dm_free_name(name);
245789Sahrens }
246789Sahrens
247789Sahrens dm_free_descriptors(slice);
248789Sahrens return (ret);
249789Sahrens }
250789Sahrens
251789Sahrens /*
2521352Seschrock * Validate a device.
253789Sahrens */
2544276Staylor static int
check_device(const char * path,boolean_t force,boolean_t isspare)2552082Seschrock check_device(const char *path, boolean_t force, boolean_t isspare)
256789Sahrens {
257789Sahrens dm_descriptor_t desc;
258789Sahrens int err;
2591352Seschrock char *dev;
260789Sahrens
261789Sahrens /*
262789Sahrens * For whole disks, libdiskmgt does not include the leading dev path.
263789Sahrens */
264789Sahrens dev = strrchr(path, '/');
265789Sahrens assert(dev != NULL);
266789Sahrens dev++;
2671352Seschrock if ((desc = dm_get_descriptor_by_name(DM_ALIAS, dev, &err)) != NULL) {
2682082Seschrock err = check_disk(path, desc, force, isspare);
2691352Seschrock dm_free_descriptor(desc);
2701352Seschrock return (err);
271789Sahrens }
272789Sahrens
2732082Seschrock return (check_slice(path, force, B_FALSE, isspare));
274789Sahrens }
275789Sahrens
276789Sahrens /*
277789Sahrens * Check that a file is valid. All we can do in this case is check that it's
2784082Smmusante * not in use by another pool, and not in use by swap.
279789Sahrens */
2804276Staylor static int
check_file(const char * file,boolean_t force,boolean_t isspare)2812082Seschrock check_file(const char *file, boolean_t force, boolean_t isspare)
282789Sahrens {
2831352Seschrock char *name;
284789Sahrens int fd;
285789Sahrens int ret = 0;
2864082Smmusante int err;
2871352Seschrock pool_state_t state;
2882082Seschrock boolean_t inuse;
289789Sahrens
2904082Smmusante if (dm_inuse_swap(file, &err)) {
2914082Smmusante if (err)
2924082Smmusante libdiskmgt_error(err);
2934082Smmusante else
2944082Smmusante vdev_error(gettext("%s is currently used by swap. "
2954082Smmusante "Please see swap(1M).\n"), file);
2964082Smmusante return (-1);
2974082Smmusante }
2984082Smmusante
299789Sahrens if ((fd = open(file, O_RDONLY)) < 0)
300789Sahrens return (0);
301789Sahrens
3022082Seschrock if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) == 0 && inuse) {
3031352Seschrock const char *desc;
3041352Seschrock
3051352Seschrock switch (state) {
3061352Seschrock case POOL_STATE_ACTIVE:
3071352Seschrock desc = gettext("active");
3081352Seschrock break;
3091352Seschrock
3101352Seschrock case POOL_STATE_EXPORTED:
3111352Seschrock desc = gettext("exported");
3121352Seschrock break;
3131352Seschrock
3141352Seschrock case POOL_STATE_POTENTIALLY_ACTIVE:
3151352Seschrock desc = gettext("potentially active");
3161352Seschrock break;
3171352Seschrock
3181352Seschrock default:
3191352Seschrock desc = gettext("unknown");
3201352Seschrock break;
3211352Seschrock }
3221352Seschrock
3232082Seschrock /*
3242082Seschrock * Allow hot spares to be shared between pools.
3252082Seschrock */
3262082Seschrock if (state == POOL_STATE_SPARE && isspare)
3272082Seschrock return (0);
3282082Seschrock
3292082Seschrock if (state == POOL_STATE_ACTIVE ||
3302082Seschrock state == POOL_STATE_SPARE || !force) {
3312082Seschrock switch (state) {
3322082Seschrock case POOL_STATE_SPARE:
3332082Seschrock vdev_error(gettext("%s is reserved as a hot "
3342082Seschrock "spare for pool %s\n"), file, name);
3352082Seschrock break;
3362082Seschrock default:
3372082Seschrock vdev_error(gettext("%s is part of %s pool "
3382082Seschrock "'%s'\n"), file, desc, name);
3392082Seschrock break;
3402082Seschrock }
341789Sahrens ret = -1;
342789Sahrens }
343789Sahrens
344789Sahrens free(name);
345789Sahrens }
346789Sahrens
347789Sahrens (void) close(fd);
348789Sahrens return (ret);
349789Sahrens }
350789Sahrens
351789Sahrens
3524276Staylor /*
3534276Staylor * By "whole disk" we mean an entire physical disk (something we can
3544276Staylor * label, toggle the write cache on, etc.) as opposed to the full
3554276Staylor * capacity of a pseudo-device such as lofi or did. We act as if we
3564276Staylor * are labeling the disk, which should be a pretty good test of whether
3574276Staylor * it's a viable device or not. Returns B_TRUE if it is and B_FALSE if
3584276Staylor * it isn't.
3594276Staylor */
3604276Staylor static boolean_t
is_whole_disk(const char * arg)3614276Staylor is_whole_disk(const char *arg)
3624276Staylor {
3634276Staylor struct dk_gpt *label;
3644276Staylor int fd;
3654276Staylor char path[MAXPATHLEN];
366789Sahrens
3674276Staylor (void) snprintf(path, sizeof (path), "%s%s%s",
3684276Staylor RDISK_ROOT, strrchr(arg, '/'), BACKUP_SLICE);
3694276Staylor if ((fd = open(path, O_RDWR | O_NDELAY)) < 0)
3704276Staylor return (B_FALSE);
3714276Staylor if (efi_alloc_and_init(fd, EFI_NUMPAR, &label) != 0) {
3724276Staylor (void) close(fd);
3734276Staylor return (B_FALSE);
3744276Staylor }
3754276Staylor efi_free(label);
3764276Staylor (void) close(fd);
3774276Staylor return (B_TRUE);
378789Sahrens }
379789Sahrens
380789Sahrens /*
381789Sahrens * Create a leaf vdev. Determine if this is a file or a device. If it's a
382789Sahrens * device, fill in the device id to make a complete nvlist. Valid forms for a
383789Sahrens * leaf vdev are:
384789Sahrens *
385789Sahrens * /dev/dsk/xxx Complete disk path
386789Sahrens * /xxx Full path to file
387789Sahrens * xxx Shorthand for /dev/dsk/xxx
388789Sahrens */
3894276Staylor static nvlist_t *
make_leaf_vdev(const char * arg,uint64_t is_log)3904527Sperrin make_leaf_vdev(const char *arg, uint64_t is_log)
391789Sahrens {
392789Sahrens char path[MAXPATHLEN];
393789Sahrens struct stat64 statbuf;
394789Sahrens nvlist_t *vdev = NULL;
395789Sahrens char *type = NULL;
3962082Seschrock boolean_t wholedisk = B_FALSE;
397789Sahrens
398789Sahrens /*
399789Sahrens * Determine what type of vdev this is, and put the full path into
400789Sahrens * 'path'. We detect whether this is a device of file afterwards by
401789Sahrens * checking the st_mode of the file.
402789Sahrens */
403789Sahrens if (arg[0] == '/') {
404789Sahrens /*
405789Sahrens * Complete device or file path. Exact type is determined by
406789Sahrens * examining the file descriptor afterwards.
407789Sahrens */
4084276Staylor wholedisk = is_whole_disk(arg);
4094276Staylor if (!wholedisk && (stat64(arg, &statbuf) != 0)) {
410789Sahrens (void) fprintf(stderr,
411789Sahrens gettext("cannot open '%s': %s\n"),
412789Sahrens arg, strerror(errno));
413789Sahrens return (NULL);
414789Sahrens }
415789Sahrens
416789Sahrens (void) strlcpy(path, arg, sizeof (path));
417789Sahrens } else {
418789Sahrens /*
419789Sahrens * This may be a short path for a device, or it could be total
420789Sahrens * gibberish. Check to see if it's a known device in
421789Sahrens * /dev/dsk/. As part of this check, see if we've been given a
422789Sahrens * an entire disk (minus the slice number).
423789Sahrens */
424789Sahrens (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT,
425789Sahrens arg);
4264276Staylor wholedisk = is_whole_disk(path);
4274276Staylor if (!wholedisk && (stat64(path, &statbuf) != 0)) {
428789Sahrens /*
429789Sahrens * If we got ENOENT, then the user gave us
430789Sahrens * gibberish, so try to direct them with a
431789Sahrens * reasonable error message. Otherwise,
432789Sahrens * regurgitate strerror() since it's the best we
433789Sahrens * can do.
434789Sahrens */
435789Sahrens if (errno == ENOENT) {
436789Sahrens (void) fprintf(stderr,
437789Sahrens gettext("cannot open '%s': no such "
438789Sahrens "device in %s\n"), arg, DISK_ROOT);
439789Sahrens (void) fprintf(stderr,
440789Sahrens gettext("must be a full path or "
441789Sahrens "shorthand device name\n"));
442789Sahrens return (NULL);
443789Sahrens } else {
444789Sahrens (void) fprintf(stderr,
445789Sahrens gettext("cannot open '%s': %s\n"),
446789Sahrens path, strerror(errno));
447789Sahrens return (NULL);
448789Sahrens }
449789Sahrens }
450789Sahrens }
451789Sahrens
452789Sahrens /*
453789Sahrens * Determine whether this is a device or a file.
454789Sahrens */
4554276Staylor if (wholedisk || S_ISBLK(statbuf.st_mode)) {
456789Sahrens type = VDEV_TYPE_DISK;
457789Sahrens } else if (S_ISREG(statbuf.st_mode)) {
458789Sahrens type = VDEV_TYPE_FILE;
459789Sahrens } else {
460789Sahrens (void) fprintf(stderr, gettext("cannot use '%s': must be a "
461789Sahrens "block device or regular file\n"), path);
462789Sahrens return (NULL);
463789Sahrens }
464789Sahrens
465789Sahrens /*
466789Sahrens * Finally, we have the complete device or file, and we know that it is
467789Sahrens * acceptable to use. Construct the nvlist to describe this vdev. All
468789Sahrens * vdevs have a 'path' element, and devices also have a 'devid' element.
469789Sahrens */
470789Sahrens verify(nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) == 0);
471789Sahrens verify(nvlist_add_string(vdev, ZPOOL_CONFIG_PATH, path) == 0);
472789Sahrens verify(nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE, type) == 0);
4734527Sperrin verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_LOG, is_log) == 0);
4741171Seschrock if (strcmp(type, VDEV_TYPE_DISK) == 0)
4751171Seschrock verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK,
4761171Seschrock (uint64_t)wholedisk) == 0);
477789Sahrens
478789Sahrens /*
479789Sahrens * For a whole disk, defer getting its devid until after labeling it.
480789Sahrens */
481789Sahrens if (S_ISBLK(statbuf.st_mode) && !wholedisk) {
482789Sahrens /*
483789Sahrens * Get the devid for the device.
484789Sahrens */
485789Sahrens int fd;
486789Sahrens ddi_devid_t devid;
487789Sahrens char *minor = NULL, *devid_str = NULL;
488789Sahrens
489789Sahrens if ((fd = open(path, O_RDONLY)) < 0) {
490789Sahrens (void) fprintf(stderr, gettext("cannot open '%s': "
491789Sahrens "%s\n"), path, strerror(errno));
492789Sahrens nvlist_free(vdev);
493789Sahrens return (NULL);
494789Sahrens }
495789Sahrens
496789Sahrens if (devid_get(fd, &devid) == 0) {
497789Sahrens if (devid_get_minor_name(fd, &minor) == 0 &&
498789Sahrens (devid_str = devid_str_encode(devid, minor)) !=
499789Sahrens NULL) {
500789Sahrens verify(nvlist_add_string(vdev,
501789Sahrens ZPOOL_CONFIG_DEVID, devid_str) == 0);
502789Sahrens }
503789Sahrens if (devid_str != NULL)
504789Sahrens devid_str_free(devid_str);
505789Sahrens if (minor != NULL)
506789Sahrens devid_str_free(minor);
507789Sahrens devid_free(devid);
508789Sahrens }
509789Sahrens
510789Sahrens (void) close(fd);
511789Sahrens }
512789Sahrens
513789Sahrens return (vdev);
514789Sahrens }
515789Sahrens
516789Sahrens /*
517789Sahrens * Go through and verify the replication level of the pool is consistent.
518789Sahrens * Performs the following checks:
519789Sahrens *
520789Sahrens * For the new spec, verifies that devices in mirrors and raidz are the
521789Sahrens * same size.
522789Sahrens *
523789Sahrens * If the current configuration already has inconsistent replication
524789Sahrens * levels, ignore any other potential problems in the new spec.
525789Sahrens *
526789Sahrens * Otherwise, make sure that the current spec (if there is one) and the new
527789Sahrens * spec have consistent replication levels.
528789Sahrens */
529789Sahrens typedef struct replication_level {
5302082Seschrock char *zprl_type;
5312082Seschrock uint64_t zprl_children;
5322082Seschrock uint64_t zprl_parity;
533789Sahrens } replication_level_t;
534789Sahrens
5354276Staylor #define ZPOOL_FUZZ (16 * 1024 * 1024)
5364276Staylor
537789Sahrens /*
538789Sahrens * Given a list of toplevel vdevs, return the current replication level. If
539789Sahrens * the config is inconsistent, then NULL is returned. If 'fatal' is set, then
540789Sahrens * an error message will be displayed for each self-inconsistent vdev.
541789Sahrens */
5424276Staylor static replication_level_t *
get_replication(nvlist_t * nvroot,boolean_t fatal)5432082Seschrock get_replication(nvlist_t *nvroot, boolean_t fatal)
544789Sahrens {
545789Sahrens nvlist_t **top;
546789Sahrens uint_t t, toplevels;
547789Sahrens nvlist_t **child;
548789Sahrens uint_t c, children;
549789Sahrens nvlist_t *nv;
550789Sahrens char *type;
551789Sahrens replication_level_t lastrep, rep, *ret;
5522082Seschrock boolean_t dontreport;
553789Sahrens
554789Sahrens ret = safe_malloc(sizeof (replication_level_t));
555789Sahrens
556789Sahrens verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
557789Sahrens &top, &toplevels) == 0);
558789Sahrens
5592082Seschrock lastrep.zprl_type = NULL;
560789Sahrens for (t = 0; t < toplevels; t++) {
5614527Sperrin uint64_t is_log = B_FALSE;
5624527Sperrin
563789Sahrens nv = top[t];
564789Sahrens
5654527Sperrin /*
5664527Sperrin * For separate logs we ignore the top level vdev replication
5674527Sperrin * constraints.
5684527Sperrin */
5694527Sperrin (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG, &is_log);
5704527Sperrin if (is_log)
5714527Sperrin continue;
5724527Sperrin
5734527Sperrin verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE,
5744527Sperrin &type) == 0);
575789Sahrens if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
576789Sahrens &child, &children) != 0) {
577789Sahrens /*
578789Sahrens * This is a 'file' or 'disk' vdev.
579789Sahrens */
5802082Seschrock rep.zprl_type = type;
5812082Seschrock rep.zprl_children = 1;
5822082Seschrock rep.zprl_parity = 0;
583789Sahrens } else {
584789Sahrens uint64_t vdev_size;
585789Sahrens
586789Sahrens /*
587789Sahrens * This is a mirror or RAID-Z vdev. Go through and make
588789Sahrens * sure the contents are all the same (files vs. disks),
589789Sahrens * keeping track of the number of elements in the
590789Sahrens * process.
591789Sahrens *
592789Sahrens * We also check that the size of each vdev (if it can
593789Sahrens * be determined) is the same.
594789Sahrens */
5952082Seschrock rep.zprl_type = type;
5962082Seschrock rep.zprl_children = 0;
5972082Seschrock
5982082Seschrock if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) {
5992082Seschrock verify(nvlist_lookup_uint64(nv,
6002082Seschrock ZPOOL_CONFIG_NPARITY,
6012082Seschrock &rep.zprl_parity) == 0);
6022082Seschrock assert(rep.zprl_parity != 0);
6032082Seschrock } else {
6042082Seschrock rep.zprl_parity = 0;
6052082Seschrock }
606789Sahrens
607789Sahrens /*
6084527Sperrin * The 'dontreport' variable indicates that we've
609789Sahrens * already reported an error for this spec, so don't
610789Sahrens * bother doing it again.
611789Sahrens */
612789Sahrens type = NULL;
613789Sahrens dontreport = 0;
614789Sahrens vdev_size = -1ULL;
615789Sahrens for (c = 0; c < children; c++) {
616789Sahrens nvlist_t *cnv = child[c];
617789Sahrens char *path;
618789Sahrens struct stat64 statbuf;
619789Sahrens uint64_t size = -1ULL;
620789Sahrens char *childtype;
621789Sahrens int fd, err;
622789Sahrens
6232082Seschrock rep.zprl_children++;
624789Sahrens
625789Sahrens verify(nvlist_lookup_string(cnv,
626789Sahrens ZPOOL_CONFIG_TYPE, &childtype) == 0);
6272142Seschrock
6282142Seschrock /*
6294527Sperrin * If this is a replacing or spare vdev, then
6302142Seschrock * get the real first child of the vdev.
6312142Seschrock */
6322142Seschrock if (strcmp(childtype,
6332142Seschrock VDEV_TYPE_REPLACING) == 0 ||
6342142Seschrock strcmp(childtype, VDEV_TYPE_SPARE) == 0) {
6352142Seschrock nvlist_t **rchild;
6362142Seschrock uint_t rchildren;
6372142Seschrock
6382142Seschrock verify(nvlist_lookup_nvlist_array(cnv,
6392142Seschrock ZPOOL_CONFIG_CHILDREN, &rchild,
6402142Seschrock &rchildren) == 0);
6412142Seschrock assert(rchildren == 2);
6422142Seschrock cnv = rchild[0];
6432142Seschrock
6442142Seschrock verify(nvlist_lookup_string(cnv,
6452142Seschrock ZPOOL_CONFIG_TYPE,
6462142Seschrock &childtype) == 0);
6472142Seschrock }
6482142Seschrock
649789Sahrens verify(nvlist_lookup_string(cnv,
650789Sahrens ZPOOL_CONFIG_PATH, &path) == 0);
651789Sahrens
652789Sahrens /*
653789Sahrens * If we have a raidz/mirror that combines disks
654789Sahrens * with files, report it as an error.
655789Sahrens */
656789Sahrens if (!dontreport && type != NULL &&
657789Sahrens strcmp(type, childtype) != 0) {
658789Sahrens if (ret != NULL)
659789Sahrens free(ret);
660789Sahrens ret = NULL;
661789Sahrens if (fatal)
662789Sahrens vdev_error(gettext(
663789Sahrens "mismatched replication "
664789Sahrens "level: %s contains both "
665789Sahrens "files and devices\n"),
6662082Seschrock rep.zprl_type);
667789Sahrens else
668789Sahrens return (NULL);
6692082Seschrock dontreport = B_TRUE;
670789Sahrens }
671789Sahrens
672789Sahrens /*
673789Sahrens * According to stat(2), the value of 'st_size'
674789Sahrens * is undefined for block devices and character
675789Sahrens * devices. But there is no effective way to
676789Sahrens * determine the real size in userland.
677789Sahrens *
678789Sahrens * Instead, we'll take advantage of an
679789Sahrens * implementation detail of spec_size(). If the
680789Sahrens * device is currently open, then we (should)
681789Sahrens * return a valid size.
682789Sahrens *
683789Sahrens * If we still don't get a valid size (indicated
684789Sahrens * by a size of 0 or MAXOFFSET_T), then ignore
685789Sahrens * this device altogether.
686789Sahrens */
687789Sahrens if ((fd = open(path, O_RDONLY)) >= 0) {
688789Sahrens err = fstat64(fd, &statbuf);
689789Sahrens (void) close(fd);
690789Sahrens } else {
691789Sahrens err = stat64(path, &statbuf);
692789Sahrens }
693789Sahrens
694789Sahrens if (err != 0 ||
695789Sahrens statbuf.st_size == 0 ||
696789Sahrens statbuf.st_size == MAXOFFSET_T)
697789Sahrens continue;
698789Sahrens
699789Sahrens size = statbuf.st_size;
700789Sahrens
701789Sahrens /*
7024276Staylor * Also make sure that devices and
7034276Staylor * slices have a consistent size. If
7044276Staylor * they differ by a significant amount
7054276Staylor * (~16MB) then report an error.
706789Sahrens */
7074276Staylor if (!dontreport &&
7084276Staylor (vdev_size != -1ULL &&
7094276Staylor (labs(size - vdev_size) >
7104276Staylor ZPOOL_FUZZ))) {
711789Sahrens if (ret != NULL)
712789Sahrens free(ret);
713789Sahrens ret = NULL;
714789Sahrens if (fatal)
715789Sahrens vdev_error(gettext(
716789Sahrens "%s contains devices of "
717789Sahrens "different sizes\n"),
7182082Seschrock rep.zprl_type);
719789Sahrens else
720789Sahrens return (NULL);
7212082Seschrock dontreport = B_TRUE;
722789Sahrens }
723789Sahrens
724789Sahrens type = childtype;
725789Sahrens vdev_size = size;
726789Sahrens }
727789Sahrens }
728789Sahrens
729789Sahrens /*
730789Sahrens * At this point, we have the replication of the last toplevel
731789Sahrens * vdev in 'rep'. Compare it to 'lastrep' to see if its
732789Sahrens * different.
733789Sahrens */
7342082Seschrock if (lastrep.zprl_type != NULL) {
7352082Seschrock if (strcmp(lastrep.zprl_type, rep.zprl_type) != 0) {
736789Sahrens if (ret != NULL)
737789Sahrens free(ret);
738789Sahrens ret = NULL;
739789Sahrens if (fatal)
740789Sahrens vdev_error(gettext(
7412082Seschrock "mismatched replication level: "
7422082Seschrock "both %s and %s vdevs are "
743789Sahrens "present\n"),
7442082Seschrock lastrep.zprl_type, rep.zprl_type);
745789Sahrens else
746789Sahrens return (NULL);
7472082Seschrock } else if (lastrep.zprl_parity != rep.zprl_parity) {
748789Sahrens if (ret)
749789Sahrens free(ret);
750789Sahrens ret = NULL;
751789Sahrens if (fatal)
752789Sahrens vdev_error(gettext(
7532082Seschrock "mismatched replication level: "
7542082Seschrock "both %llu and %llu device parity "
7552082Seschrock "%s vdevs are present\n"),
7562082Seschrock lastrep.zprl_parity,
7572082Seschrock rep.zprl_parity,
7582082Seschrock rep.zprl_type);
7592082Seschrock else
7602082Seschrock return (NULL);
7612082Seschrock } else if (lastrep.zprl_children != rep.zprl_children) {
7622082Seschrock if (ret)
7632082Seschrock free(ret);
7642082Seschrock ret = NULL;
7652082Seschrock if (fatal)
7662082Seschrock vdev_error(gettext(
7672082Seschrock "mismatched replication level: "
7682082Seschrock "both %llu-way and %llu-way %s "
769789Sahrens "vdevs are present\n"),
7702082Seschrock lastrep.zprl_children,
7712082Seschrock rep.zprl_children,
7722082Seschrock rep.zprl_type);
773789Sahrens else
774789Sahrens return (NULL);
775789Sahrens }
776789Sahrens }
777789Sahrens lastrep = rep;
778789Sahrens }
779789Sahrens
7802082Seschrock if (ret != NULL)
7812082Seschrock *ret = rep;
782789Sahrens
783789Sahrens return (ret);
784789Sahrens }
785789Sahrens
786789Sahrens /*
787789Sahrens * Check the replication level of the vdev spec against the current pool. Calls
788789Sahrens * get_replication() to make sure the new spec is self-consistent. If the pool
789789Sahrens * has a consistent replication level, then we ignore any errors. Otherwise,
790789Sahrens * report any difference between the two.
791789Sahrens */
7924276Staylor static int
check_replication(nvlist_t * config,nvlist_t * newroot)793789Sahrens check_replication(nvlist_t *config, nvlist_t *newroot)
794789Sahrens {
7954276Staylor nvlist_t **child;
7964276Staylor uint_t children;
797789Sahrens replication_level_t *current = NULL, *new;
798789Sahrens int ret;
799789Sahrens
800789Sahrens /*
801789Sahrens * If we have a current pool configuration, check to see if it's
802789Sahrens * self-consistent. If not, simply return success.
803789Sahrens */
804789Sahrens if (config != NULL) {
805789Sahrens nvlist_t *nvroot;
806789Sahrens
807789Sahrens verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
808789Sahrens &nvroot) == 0);
8092082Seschrock if ((current = get_replication(nvroot, B_FALSE)) == NULL)
810789Sahrens return (0);
811789Sahrens }
8124276Staylor /*
8134276Staylor * for spares there may be no children, and therefore no
8144276Staylor * replication level to check
8154276Staylor */
8164276Staylor if ((nvlist_lookup_nvlist_array(newroot, ZPOOL_CONFIG_CHILDREN,
8174276Staylor &child, &children) != 0) || (children == 0)) {
8184276Staylor free(current);
8194276Staylor return (0);
8204276Staylor }
821789Sahrens
822789Sahrens /*
8234527Sperrin * If all we have is logs then there's no replication level to check.
8244527Sperrin */
8254527Sperrin if (num_logs(newroot) == children) {
8264527Sperrin free(current);
8274527Sperrin return (0);
8284527Sperrin }
8294527Sperrin
8304527Sperrin /*
831789Sahrens * Get the replication level of the new vdev spec, reporting any
832789Sahrens * inconsistencies found.
833789Sahrens */
8342082Seschrock if ((new = get_replication(newroot, B_TRUE)) == NULL) {
835789Sahrens free(current);
836789Sahrens return (-1);
837789Sahrens }
838789Sahrens
839789Sahrens /*
840789Sahrens * Check to see if the new vdev spec matches the replication level of
841789Sahrens * the current pool.
842789Sahrens */
843789Sahrens ret = 0;
844789Sahrens if (current != NULL) {
8452082Seschrock if (strcmp(current->zprl_type, new->zprl_type) != 0) {
8462082Seschrock vdev_error(gettext(
8472082Seschrock "mismatched replication level: pool uses %s "
8482082Seschrock "and new vdev is %s\n"),
8492082Seschrock current->zprl_type, new->zprl_type);
8502082Seschrock ret = -1;
8512082Seschrock } else if (current->zprl_parity != new->zprl_parity) {
852789Sahrens vdev_error(gettext(
8532082Seschrock "mismatched replication level: pool uses %llu "
8542082Seschrock "device parity and new vdev uses %llu\n"),
8552082Seschrock current->zprl_parity, new->zprl_parity);
8562082Seschrock ret = -1;
8572082Seschrock } else if (current->zprl_children != new->zprl_children) {
8582082Seschrock vdev_error(gettext(
8592082Seschrock "mismatched replication level: pool uses %llu-way "
8602082Seschrock "%s and new vdev uses %llu-way %s\n"),
8612082Seschrock current->zprl_children, current->zprl_type,
8622082Seschrock new->zprl_children, new->zprl_type);
863789Sahrens ret = -1;
864789Sahrens }
865789Sahrens }
866789Sahrens
867789Sahrens free(new);
868789Sahrens if (current != NULL)
869789Sahrens free(current);
870789Sahrens
871789Sahrens return (ret);
872789Sahrens }
873789Sahrens
874789Sahrens /*
875789Sahrens * Go through and find any whole disks in the vdev specification, labelling them
876789Sahrens * as appropriate. When constructing the vdev spec, we were unable to open this
877789Sahrens * device in order to provide a devid. Now that we have labelled the disk and
878789Sahrens * know that slice 0 is valid, we can construct the devid now.
879789Sahrens *
8804276Staylor * If the disk was already labeled with an EFI label, we will have gotten the
881789Sahrens * devid already (because we were able to open the whole disk). Otherwise, we
882789Sahrens * need to get the devid after we label the disk.
883789Sahrens */
8844276Staylor static int
make_disks(zpool_handle_t * zhp,nvlist_t * nv)8854276Staylor make_disks(zpool_handle_t *zhp, nvlist_t *nv)
886789Sahrens {
887789Sahrens nvlist_t **child;
888789Sahrens uint_t c, children;
889789Sahrens char *type, *path, *diskname;
890789Sahrens char buf[MAXPATHLEN];
8911171Seschrock uint64_t wholedisk;
892789Sahrens int fd;
893789Sahrens int ret;
894789Sahrens ddi_devid_t devid;
895789Sahrens char *minor = NULL, *devid_str = NULL;
896789Sahrens
897789Sahrens verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
898789Sahrens
899789Sahrens if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
900789Sahrens &child, &children) != 0) {
901789Sahrens
902789Sahrens if (strcmp(type, VDEV_TYPE_DISK) != 0)
903789Sahrens return (0);
904789Sahrens
905789Sahrens /*
906789Sahrens * We have a disk device. Get the path to the device
9074276Staylor * and see if it's a whole disk by appending the backup
908789Sahrens * slice and stat()ing the device.
909789Sahrens */
910789Sahrens verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
9111171Seschrock if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
9121171Seschrock &wholedisk) != 0 || !wholedisk)
913789Sahrens return (0);
914789Sahrens
915789Sahrens diskname = strrchr(path, '/');
916789Sahrens assert(diskname != NULL);
917789Sahrens diskname++;
9184276Staylor if (zpool_label_disk(g_zfs, zhp, diskname) == -1)
919789Sahrens return (-1);
920789Sahrens
921789Sahrens /*
922789Sahrens * Fill in the devid, now that we've labeled the disk.
923789Sahrens */
924789Sahrens (void) snprintf(buf, sizeof (buf), "%ss0", path);
925789Sahrens if ((fd = open(buf, O_RDONLY)) < 0) {
926789Sahrens (void) fprintf(stderr,
927789Sahrens gettext("cannot open '%s': %s\n"),
928789Sahrens buf, strerror(errno));
929789Sahrens return (-1);
930789Sahrens }
931789Sahrens
932789Sahrens if (devid_get(fd, &devid) == 0) {
933789Sahrens if (devid_get_minor_name(fd, &minor) == 0 &&
934789Sahrens (devid_str = devid_str_encode(devid, minor)) !=
935789Sahrens NULL) {
936789Sahrens verify(nvlist_add_string(nv,
937789Sahrens ZPOOL_CONFIG_DEVID, devid_str) == 0);
938789Sahrens }
939789Sahrens if (devid_str != NULL)
940789Sahrens devid_str_free(devid_str);
941789Sahrens if (minor != NULL)
942789Sahrens devid_str_free(minor);
943789Sahrens devid_free(devid);
944789Sahrens }
945789Sahrens
9461171Seschrock /*
9471171Seschrock * Update the path to refer to the 's0' slice. The presence of
9481171Seschrock * the 'whole_disk' field indicates to the CLI that we should
9491171Seschrock * chop off the slice number when displaying the device in
9501171Seschrock * future output.
9511171Seschrock */
9521171Seschrock verify(nvlist_add_string(nv, ZPOOL_CONFIG_PATH, buf) == 0);
9531171Seschrock
954789Sahrens (void) close(fd);
955789Sahrens
956789Sahrens return (0);
957789Sahrens }
958789Sahrens
959789Sahrens for (c = 0; c < children; c++)
9604276Staylor if ((ret = make_disks(zhp, child[c])) != 0)
961789Sahrens return (ret);
962789Sahrens
9632082Seschrock if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
9642082Seschrock &child, &children) == 0)
9652082Seschrock for (c = 0; c < children; c++)
9664276Staylor if ((ret = make_disks(zhp, child[c])) != 0)
9672082Seschrock return (ret);
9682082Seschrock
9695450Sbrendan if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
9705450Sbrendan &child, &children) == 0)
9715450Sbrendan for (c = 0; c < children; c++)
9725450Sbrendan if ((ret = make_disks(zhp, child[c])) != 0)
9735450Sbrendan return (ret);
9745450Sbrendan
975789Sahrens return (0);
976789Sahrens }
977789Sahrens
978789Sahrens /*
9792082Seschrock * Determine if the given path is a hot spare within the given configuration.
9802082Seschrock */
9812082Seschrock static boolean_t
is_spare(nvlist_t * config,const char * path)9822082Seschrock is_spare(nvlist_t *config, const char *path)
9832082Seschrock {
9842082Seschrock int fd;
9852082Seschrock pool_state_t state;
9863265Sahrens char *name = NULL;
9872082Seschrock nvlist_t *label;
9882082Seschrock uint64_t guid, spareguid;
9892082Seschrock nvlist_t *nvroot;
9902082Seschrock nvlist_t **spares;
9912082Seschrock uint_t i, nspares;
9922082Seschrock boolean_t inuse;
9932082Seschrock
9942082Seschrock if ((fd = open(path, O_RDONLY)) < 0)
9952082Seschrock return (B_FALSE);
9962082Seschrock
9972082Seschrock if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) != 0 ||
9982082Seschrock !inuse ||
9992082Seschrock state != POOL_STATE_SPARE ||
10002082Seschrock zpool_read_label(fd, &label) != 0) {
10013265Sahrens free(name);
10022082Seschrock (void) close(fd);
10032082Seschrock return (B_FALSE);
10042082Seschrock }
10053265Sahrens free(name);
1006*12296SLin.Ling@Sun.COM (void) close(fd);
10072082Seschrock
10082082Seschrock verify(nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) == 0);
10092082Seschrock nvlist_free(label);
10102082Seschrock
10112082Seschrock verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
10122082Seschrock &nvroot) == 0);
10132082Seschrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
10142082Seschrock &spares, &nspares) == 0) {
10152082Seschrock for (i = 0; i < nspares; i++) {
10162082Seschrock verify(nvlist_lookup_uint64(spares[i],
10172082Seschrock ZPOOL_CONFIG_GUID, &spareguid) == 0);
10182082Seschrock if (spareguid == guid)
10192082Seschrock return (B_TRUE);
10202082Seschrock }
10212082Seschrock }
10222082Seschrock
10232082Seschrock return (B_FALSE);
10242082Seschrock }
10252082Seschrock
10262082Seschrock /*
1027789Sahrens * Go through and find any devices that are in use. We rely on libdiskmgt for
1028789Sahrens * the majority of this task.
1029789Sahrens */
10304276Staylor static int
check_in_use(nvlist_t * config,nvlist_t * nv,boolean_t force,boolean_t replacing,boolean_t isspare)1031*12296SLin.Ling@Sun.COM check_in_use(nvlist_t *config, nvlist_t *nv, boolean_t force,
1032*12296SLin.Ling@Sun.COM boolean_t replacing, boolean_t isspare)
1033789Sahrens {
1034789Sahrens nvlist_t **child;
1035789Sahrens uint_t c, children;
1036789Sahrens char *type, *path;
1037789Sahrens int ret;
10382082Seschrock char buf[MAXPATHLEN];
10392082Seschrock uint64_t wholedisk;
1040789Sahrens
1041789Sahrens verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
1042789Sahrens
1043789Sahrens if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
1044789Sahrens &child, &children) != 0) {
1045789Sahrens
1046789Sahrens verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
1047789Sahrens
10482082Seschrock /*
10492082Seschrock * As a generic check, we look to see if this is a replace of a
10502082Seschrock * hot spare within the same pool. If so, we allow it
10512082Seschrock * regardless of what libdiskmgt or zpool_in_use() says.
10522082Seschrock */
1053*12296SLin.Ling@Sun.COM if (replacing) {
10542082Seschrock if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
10552082Seschrock &wholedisk) == 0 && wholedisk)
10562082Seschrock (void) snprintf(buf, sizeof (buf), "%ss0",
10572082Seschrock path);
10582082Seschrock else
10592082Seschrock (void) strlcpy(buf, path, sizeof (buf));
1060*12296SLin.Ling@Sun.COM
10612082Seschrock if (is_spare(config, buf))
10622082Seschrock return (0);
10632082Seschrock }
10642082Seschrock
1065789Sahrens if (strcmp(type, VDEV_TYPE_DISK) == 0)
10662082Seschrock ret = check_device(path, force, isspare);
1067789Sahrens
1068789Sahrens if (strcmp(type, VDEV_TYPE_FILE) == 0)
10692082Seschrock ret = check_file(path, force, isspare);
1070789Sahrens
1071789Sahrens return (ret);
1072789Sahrens }
1073789Sahrens
1074789Sahrens for (c = 0; c < children; c++)
10752082Seschrock if ((ret = check_in_use(config, child[c], force,
1076*12296SLin.Ling@Sun.COM replacing, B_FALSE)) != 0)
1077789Sahrens return (ret);
1078789Sahrens
10792082Seschrock if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
10802082Seschrock &child, &children) == 0)
10812082Seschrock for (c = 0; c < children; c++)
10822082Seschrock if ((ret = check_in_use(config, child[c], force,
1083*12296SLin.Ling@Sun.COM replacing, B_TRUE)) != 0)
10842082Seschrock return (ret);
10855450Sbrendan
10865450Sbrendan if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
10875450Sbrendan &child, &children) == 0)
10885450Sbrendan for (c = 0; c < children; c++)
10895450Sbrendan if ((ret = check_in_use(config, child[c], force,
1090*12296SLin.Ling@Sun.COM replacing, B_FALSE)) != 0)
10915450Sbrendan return (ret);
10925450Sbrendan
1093789Sahrens return (0);
1094789Sahrens }
1095789Sahrens
10964276Staylor static const char *
is_grouping(const char * type,int * mindev,int * maxdev)109710105Sadam.leventhal@sun.com is_grouping(const char *type, int *mindev, int *maxdev)
10982082Seschrock {
109910105Sadam.leventhal@sun.com if (strncmp(type, "raidz", 5) == 0) {
110010105Sadam.leventhal@sun.com const char *p = type + 5;
110110105Sadam.leventhal@sun.com char *end;
110210105Sadam.leventhal@sun.com long nparity;
110310105Sadam.leventhal@sun.com
110410105Sadam.leventhal@sun.com if (*p == '\0') {
110510105Sadam.leventhal@sun.com nparity = 1;
110610105Sadam.leventhal@sun.com } else if (*p == '0') {
110710105Sadam.leventhal@sun.com return (NULL); /* no zero prefixes allowed */
110810105Sadam.leventhal@sun.com } else {
110910105Sadam.leventhal@sun.com errno = 0;
111010105Sadam.leventhal@sun.com nparity = strtol(p, &end, 10);
111110105Sadam.leventhal@sun.com if (errno != 0 || nparity < 1 || nparity >= 255 ||
111210105Sadam.leventhal@sun.com *end != '\0')
111310105Sadam.leventhal@sun.com return (NULL);
111410105Sadam.leventhal@sun.com }
111510105Sadam.leventhal@sun.com
11162082Seschrock if (mindev != NULL)
111710105Sadam.leventhal@sun.com *mindev = nparity + 1;
111810105Sadam.leventhal@sun.com if (maxdev != NULL)
111910105Sadam.leventhal@sun.com *maxdev = 255;
11202082Seschrock return (VDEV_TYPE_RAIDZ);
11212082Seschrock }
11222082Seschrock
112310105Sadam.leventhal@sun.com if (maxdev != NULL)
112410105Sadam.leventhal@sun.com *maxdev = INT_MAX;
11252082Seschrock
11262082Seschrock if (strcmp(type, "mirror") == 0) {
11272082Seschrock if (mindev != NULL)
11282082Seschrock *mindev = 2;
11292082Seschrock return (VDEV_TYPE_MIRROR);
11302082Seschrock }
11312082Seschrock
11322082Seschrock if (strcmp(type, "spare") == 0) {
11332082Seschrock if (mindev != NULL)
11342082Seschrock *mindev = 1;
11352082Seschrock return (VDEV_TYPE_SPARE);
11362082Seschrock }
11372082Seschrock
11384527Sperrin if (strcmp(type, "log") == 0) {
11394527Sperrin if (mindev != NULL)
11404527Sperrin *mindev = 1;
11414527Sperrin return (VDEV_TYPE_LOG);
11424527Sperrin }
11434527Sperrin
11445450Sbrendan if (strcmp(type, "cache") == 0) {
11455450Sbrendan if (mindev != NULL)
11465450Sbrendan *mindev = 1;
11475450Sbrendan return (VDEV_TYPE_L2CACHE);
11485450Sbrendan }
11495450Sbrendan
11502082Seschrock return (NULL);
11512082Seschrock }
11522082Seschrock
1153789Sahrens /*
1154789Sahrens * Construct a syntactically valid vdev specification,
1155789Sahrens * and ensure that all devices and files exist and can be opened.
1156789Sahrens * Note: we don't bother freeing anything in the error paths
1157789Sahrens * because the program is just going to exit anyway.
1158789Sahrens */
1159789Sahrens nvlist_t *
construct_spec(int argc,char ** argv)1160789Sahrens construct_spec(int argc, char **argv)
1161789Sahrens {
11625450Sbrendan nvlist_t *nvroot, *nv, **top, **spares, **l2cache;
116310105Sadam.leventhal@sun.com int t, toplevels, mindev, maxdev, nspares, nlogs, nl2cache;
11642082Seschrock const char *type;
11654527Sperrin uint64_t is_log;
11664527Sperrin boolean_t seen_logs;
1167789Sahrens
1168789Sahrens top = NULL;
1169789Sahrens toplevels = 0;
11702082Seschrock spares = NULL;
11715450Sbrendan l2cache = NULL;
11722082Seschrock nspares = 0;
11734527Sperrin nlogs = 0;
11745450Sbrendan nl2cache = 0;
11754527Sperrin is_log = B_FALSE;
11764527Sperrin seen_logs = B_FALSE;
1177789Sahrens
1178789Sahrens while (argc > 0) {
1179789Sahrens nv = NULL;
1180789Sahrens
1181789Sahrens /*
1182789Sahrens * If it's a mirror or raidz, the subsequent arguments are
1183789Sahrens * its leaves -- until we encounter the next mirror or raidz.
1184789Sahrens */
118510105Sadam.leventhal@sun.com if ((type = is_grouping(argv[0], &mindev, &maxdev)) != NULL) {
11862082Seschrock nvlist_t **child = NULL;
11872082Seschrock int c, children = 0;
1188789Sahrens
11894527Sperrin if (strcmp(type, VDEV_TYPE_SPARE) == 0) {
11904527Sperrin if (spares != NULL) {
11914527Sperrin (void) fprintf(stderr,
11924527Sperrin gettext("invalid vdev "
11934527Sperrin "specification: 'spare' can be "
11944527Sperrin "specified only once\n"));
11954527Sperrin return (NULL);
11964527Sperrin }
11974527Sperrin is_log = B_FALSE;
11984527Sperrin }
11994527Sperrin
12004527Sperrin if (strcmp(type, VDEV_TYPE_LOG) == 0) {
12014527Sperrin if (seen_logs) {
12024527Sperrin (void) fprintf(stderr,
12034527Sperrin gettext("invalid vdev "
12044527Sperrin "specification: 'log' can be "
12054527Sperrin "specified only once\n"));
12064527Sperrin return (NULL);
12074527Sperrin }
12084527Sperrin seen_logs = B_TRUE;
12094527Sperrin is_log = B_TRUE;
12104527Sperrin argc--;
12114527Sperrin argv++;
12124527Sperrin /*
12134527Sperrin * A log is not a real grouping device.
12144527Sperrin * We just set is_log and continue.
12154527Sperrin */
12164527Sperrin continue;
12174527Sperrin }
12184527Sperrin
12195450Sbrendan if (strcmp(type, VDEV_TYPE_L2CACHE) == 0) {
12205450Sbrendan if (l2cache != NULL) {
12215450Sbrendan (void) fprintf(stderr,
12225450Sbrendan gettext("invalid vdev "
12235450Sbrendan "specification: 'cache' can be "
12245450Sbrendan "specified only once\n"));
12255450Sbrendan return (NULL);
12265450Sbrendan }
12275450Sbrendan is_log = B_FALSE;
12285450Sbrendan }
12295450Sbrendan
12304527Sperrin if (is_log) {
12314527Sperrin if (strcmp(type, VDEV_TYPE_MIRROR) != 0) {
12324527Sperrin (void) fprintf(stderr,
12334527Sperrin gettext("invalid vdev "
12344527Sperrin "specification: unsupported 'log' "
12354527Sperrin "device: %s\n"), type);
12364527Sperrin return (NULL);
12374527Sperrin }
12384527Sperrin nlogs++;
12392082Seschrock }
1240789Sahrens
1241789Sahrens for (c = 1; c < argc; c++) {
124210105Sadam.leventhal@sun.com if (is_grouping(argv[c], NULL, NULL) != NULL)
1243789Sahrens break;
1244789Sahrens children++;
1245789Sahrens child = realloc(child,
1246789Sahrens children * sizeof (nvlist_t *));
1247789Sahrens if (child == NULL)
12482856Snd150628 zpool_no_memory();
12494527Sperrin if ((nv = make_leaf_vdev(argv[c], B_FALSE))
12504527Sperrin == NULL)
1251789Sahrens return (NULL);
1252789Sahrens child[children - 1] = nv;
1253789Sahrens }
1254789Sahrens
12552082Seschrock if (children < mindev) {
12562082Seschrock (void) fprintf(stderr, gettext("invalid vdev "
12572082Seschrock "specification: %s requires at least %d "
12582082Seschrock "devices\n"), argv[0], mindev);
12592082Seschrock return (NULL);
12602082Seschrock }
12612082Seschrock
126210105Sadam.leventhal@sun.com if (children > maxdev) {
126310105Sadam.leventhal@sun.com (void) fprintf(stderr, gettext("invalid vdev "
126410105Sadam.leventhal@sun.com "specification: %s supports no more than "
126510105Sadam.leventhal@sun.com "%d devices\n"), argv[0], maxdev);
126610105Sadam.leventhal@sun.com return (NULL);
126710105Sadam.leventhal@sun.com }
126810105Sadam.leventhal@sun.com
1269789Sahrens argc -= c;
1270789Sahrens argv += c;
1271789Sahrens
12722082Seschrock if (strcmp(type, VDEV_TYPE_SPARE) == 0) {
12732082Seschrock spares = child;
12742082Seschrock nspares = children;
12752082Seschrock continue;
12765450Sbrendan } else if (strcmp(type, VDEV_TYPE_L2CACHE) == 0) {
12775450Sbrendan l2cache = child;
12785450Sbrendan nl2cache = children;
12795450Sbrendan continue;
12802082Seschrock } else {
12812082Seschrock verify(nvlist_alloc(&nv, NV_UNIQUE_NAME,
12822082Seschrock 0) == 0);
12832082Seschrock verify(nvlist_add_string(nv, ZPOOL_CONFIG_TYPE,
12842082Seschrock type) == 0);
12854527Sperrin verify(nvlist_add_uint64(nv,
12864527Sperrin ZPOOL_CONFIG_IS_LOG, is_log) == 0);
12872082Seschrock if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) {
12882082Seschrock verify(nvlist_add_uint64(nv,
12892082Seschrock ZPOOL_CONFIG_NPARITY,
12902082Seschrock mindev - 1) == 0);
12912082Seschrock }
12922082Seschrock verify(nvlist_add_nvlist_array(nv,
12932082Seschrock ZPOOL_CONFIG_CHILDREN, child,
12942082Seschrock children) == 0);
12952082Seschrock
12962082Seschrock for (c = 0; c < children; c++)
12972082Seschrock nvlist_free(child[c]);
12982082Seschrock free(child);
1299789Sahrens }
1300789Sahrens } else {
1301789Sahrens /*
1302789Sahrens * We have a device. Pass off to make_leaf_vdev() to
1303789Sahrens * construct the appropriate nvlist describing the vdev.
1304789Sahrens */
13054527Sperrin if ((nv = make_leaf_vdev(argv[0], is_log)) == NULL)
1306789Sahrens return (NULL);
13074527Sperrin if (is_log)
13084527Sperrin nlogs++;
1309789Sahrens argc--;
1310789Sahrens argv++;
1311789Sahrens }
1312789Sahrens
1313789Sahrens toplevels++;
1314789Sahrens top = realloc(top, toplevels * sizeof (nvlist_t *));
1315789Sahrens if (top == NULL)
13162856Snd150628 zpool_no_memory();
1317789Sahrens top[toplevels - 1] = nv;
1318789Sahrens }
1319789Sahrens
13205450Sbrendan if (toplevels == 0 && nspares == 0 && nl2cache == 0) {
13212082Seschrock (void) fprintf(stderr, gettext("invalid vdev "
13222082Seschrock "specification: at least one toplevel vdev must be "
13232082Seschrock "specified\n"));
13242082Seschrock return (NULL);
13252082Seschrock }
13262082Seschrock
13274527Sperrin if (seen_logs && nlogs == 0) {
13284527Sperrin (void) fprintf(stderr, gettext("invalid vdev specification: "
13294527Sperrin "log requires at least 1 device\n"));
13304527Sperrin return (NULL);
13314527Sperrin }
13324527Sperrin
1333789Sahrens /*
1334789Sahrens * Finally, create nvroot and add all top-level vdevs to it.
1335789Sahrens */
1336789Sahrens verify(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) == 0);
1337789Sahrens verify(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
1338789Sahrens VDEV_TYPE_ROOT) == 0);
1339789Sahrens verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
1340789Sahrens top, toplevels) == 0);
13412082Seschrock if (nspares != 0)
13422082Seschrock verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
13432082Seschrock spares, nspares) == 0);
13445450Sbrendan if (nl2cache != 0)
13455450Sbrendan verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
13465450Sbrendan l2cache, nl2cache) == 0);
1347789Sahrens
1348789Sahrens for (t = 0; t < toplevels; t++)
1349789Sahrens nvlist_free(top[t]);
13502082Seschrock for (t = 0; t < nspares; t++)
13512082Seschrock nvlist_free(spares[t]);
13525450Sbrendan for (t = 0; t < nl2cache; t++)
13535450Sbrendan nvlist_free(l2cache[t]);
13542082Seschrock if (spares)
13552082Seschrock free(spares);
13565450Sbrendan if (l2cache)
13575450Sbrendan free(l2cache);
1358789Sahrens free(top);
1359789Sahrens
1360789Sahrens return (nvroot);
1361789Sahrens }
1362789Sahrens
136311422SMark.Musante@Sun.COM nvlist_t *
split_mirror_vdev(zpool_handle_t * zhp,char * newname,nvlist_t * props,splitflags_t flags,int argc,char ** argv)136411422SMark.Musante@Sun.COM split_mirror_vdev(zpool_handle_t *zhp, char *newname, nvlist_t *props,
136511422SMark.Musante@Sun.COM splitflags_t flags, int argc, char **argv)
136611422SMark.Musante@Sun.COM {
136711422SMark.Musante@Sun.COM nvlist_t *newroot = NULL, **child;
136811422SMark.Musante@Sun.COM uint_t c, children;
136911422SMark.Musante@Sun.COM
137011422SMark.Musante@Sun.COM if (argc > 0) {
137111422SMark.Musante@Sun.COM if ((newroot = construct_spec(argc, argv)) == NULL) {
137211422SMark.Musante@Sun.COM (void) fprintf(stderr, gettext("Unable to build a "
137311422SMark.Musante@Sun.COM "pool from the specified devices\n"));
137411422SMark.Musante@Sun.COM return (NULL);
137511422SMark.Musante@Sun.COM }
137611422SMark.Musante@Sun.COM
137711422SMark.Musante@Sun.COM if (!flags.dryrun && make_disks(zhp, newroot) != 0) {
137811422SMark.Musante@Sun.COM nvlist_free(newroot);
137911422SMark.Musante@Sun.COM return (NULL);
138011422SMark.Musante@Sun.COM }
138111422SMark.Musante@Sun.COM
138211422SMark.Musante@Sun.COM /* avoid any tricks in the spec */
138311422SMark.Musante@Sun.COM verify(nvlist_lookup_nvlist_array(newroot,
138411422SMark.Musante@Sun.COM ZPOOL_CONFIG_CHILDREN, &child, &children) == 0);
138511422SMark.Musante@Sun.COM for (c = 0; c < children; c++) {
138611422SMark.Musante@Sun.COM char *path;
138711422SMark.Musante@Sun.COM const char *type;
138811422SMark.Musante@Sun.COM int min, max;
138911422SMark.Musante@Sun.COM
139011422SMark.Musante@Sun.COM verify(nvlist_lookup_string(child[c],
139111422SMark.Musante@Sun.COM ZPOOL_CONFIG_PATH, &path) == 0);
139211422SMark.Musante@Sun.COM if ((type = is_grouping(path, &min, &max)) != NULL) {
139311422SMark.Musante@Sun.COM (void) fprintf(stderr, gettext("Cannot use "
139411422SMark.Musante@Sun.COM "'%s' as a device for splitting\n"), type);
139511422SMark.Musante@Sun.COM nvlist_free(newroot);
139611422SMark.Musante@Sun.COM return (NULL);
139711422SMark.Musante@Sun.COM }
139811422SMark.Musante@Sun.COM }
139911422SMark.Musante@Sun.COM }
140011422SMark.Musante@Sun.COM
140111422SMark.Musante@Sun.COM if (zpool_vdev_split(zhp, newname, &newroot, props, flags) != 0) {
140211422SMark.Musante@Sun.COM if (newroot != NULL)
140311422SMark.Musante@Sun.COM nvlist_free(newroot);
140411422SMark.Musante@Sun.COM return (NULL);
140511422SMark.Musante@Sun.COM }
140611422SMark.Musante@Sun.COM
140711422SMark.Musante@Sun.COM return (newroot);
140811422SMark.Musante@Sun.COM }
14094276Staylor
1410789Sahrens /*
1411789Sahrens * Get and validate the contents of the given vdev specification. This ensures
1412789Sahrens * that the nvlist returned is well-formed, that all the devices exist, and that
1413789Sahrens * they are not currently in use by any other known consumer. The 'poolconfig'
1414789Sahrens * parameter is the current configuration of the pool when adding devices
1415789Sahrens * existing pool, and is used to perform additional checks, such as changing the
1416789Sahrens * replication level of the pool. It can be 'NULL' to indicate that this is a
1417789Sahrens * new pool. The 'force' flag controls whether devices should be forcefully
1418789Sahrens * added, even if they appear in use.
1419789Sahrens */
1420789Sahrens nvlist_t *
make_root_vdev(zpool_handle_t * zhp,int force,int check_rep,boolean_t replacing,boolean_t dryrun,int argc,char ** argv)14214276Staylor make_root_vdev(zpool_handle_t *zhp, int force, int check_rep,
1422*12296SLin.Ling@Sun.COM boolean_t replacing, boolean_t dryrun, int argc, char **argv)
1423789Sahrens {
1424789Sahrens nvlist_t *newroot;
14254276Staylor nvlist_t *poolconfig = NULL;
1426789Sahrens is_force = force;
1427789Sahrens
1428789Sahrens /*
1429789Sahrens * Construct the vdev specification. If this is successful, we know
1430789Sahrens * that we have a valid specification, and that all devices can be
1431789Sahrens * opened.
1432789Sahrens */
1433789Sahrens if ((newroot = construct_spec(argc, argv)) == NULL)
1434789Sahrens return (NULL);
1435789Sahrens
14364276Staylor if (zhp && ((poolconfig = zpool_get_config(zhp, NULL)) == NULL))
14374276Staylor return (NULL);
14384276Staylor
1439789Sahrens /*
1440789Sahrens * Validate each device to make sure that its not shared with another
1441789Sahrens * subsystem. We do this even if 'force' is set, because there are some
1442789Sahrens * uses (such as a dedicated dump device) that even '-f' cannot
1443789Sahrens * override.
1444789Sahrens */
1445*12296SLin.Ling@Sun.COM if (check_in_use(poolconfig, newroot, force, replacing, B_FALSE) != 0) {
1446789Sahrens nvlist_free(newroot);
1447789Sahrens return (NULL);
1448789Sahrens }
1449789Sahrens
1450789Sahrens /*
1451789Sahrens * Check the replication level of the given vdevs and report any errors
1452789Sahrens * found. We include the existing pool spec, if any, as we need to
1453789Sahrens * catch changes against the existing replication level.
1454789Sahrens */
1455789Sahrens if (check_rep && check_replication(poolconfig, newroot) != 0) {
1456789Sahrens nvlist_free(newroot);
1457789Sahrens return (NULL);
1458789Sahrens }
1459789Sahrens
1460789Sahrens /*
1461789Sahrens * Run through the vdev specification and label any whole disks found.
1462789Sahrens */
14637343SEric.Taylor@Sun.COM if (!dryrun && make_disks(zhp, newroot) != 0) {
1464789Sahrens nvlist_free(newroot);
1465789Sahrens return (NULL);
1466789Sahrens }
1467789Sahrens
1468789Sahrens return (newroot);
1469789Sahrens }
1470