1eda14cbcSMatt Macy /*
2eda14cbcSMatt Macy * CDDL HEADER START
3eda14cbcSMatt Macy *
4eda14cbcSMatt Macy * The contents of this file are subject to the terms of the
5eda14cbcSMatt Macy * Common Development and Distribution License (the "License").
6eda14cbcSMatt Macy * You may not use this file except in compliance with the License.
7eda14cbcSMatt Macy *
8eda14cbcSMatt Macy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9271171e0SMartin Matuska * or https://opensource.org/licenses/CDDL-1.0.
10eda14cbcSMatt Macy * See the License for the specific language governing permissions
11eda14cbcSMatt Macy * and limitations under the License.
12eda14cbcSMatt Macy *
13eda14cbcSMatt Macy * When distributing Covered Code, include this CDDL HEADER in each
14eda14cbcSMatt Macy * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15eda14cbcSMatt Macy * If applicable, add the following below this CDDL HEADER, with the
16eda14cbcSMatt Macy * fields enclosed by brackets "[]" replaced with your own identifying
17eda14cbcSMatt Macy * information: Portions Copyright [yyyy] [name of copyright owner]
18eda14cbcSMatt Macy *
19eda14cbcSMatt Macy * CDDL HEADER END
20eda14cbcSMatt Macy */
21eda14cbcSMatt Macy
22eda14cbcSMatt Macy /*
23eda14cbcSMatt Macy * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24eda14cbcSMatt Macy * Copyright (c) 2013, 2018 by Delphix. All rights reserved.
25eda14cbcSMatt Macy * Copyright (c) 2016, 2017 Intel Corporation.
26eda14cbcSMatt Macy * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>.
27eda14cbcSMatt Macy */
28eda14cbcSMatt Macy
29eda14cbcSMatt Macy /*
30eda14cbcSMatt Macy * Functions to convert between a list of vdevs and an nvlist representing the
31eda14cbcSMatt Macy * configuration. Each entry in the list can be one of:
32eda14cbcSMatt Macy *
33eda14cbcSMatt Macy * Device vdevs
34eda14cbcSMatt Macy * disk=(path=..., devid=...)
35eda14cbcSMatt Macy * file=(path=...)
36eda14cbcSMatt Macy *
37eda14cbcSMatt Macy * Group vdevs
38eda14cbcSMatt Macy * raidz[1|2]=(...)
39eda14cbcSMatt Macy * mirror=(...)
40eda14cbcSMatt Macy *
41eda14cbcSMatt Macy * Hot spares
42eda14cbcSMatt Macy *
43eda14cbcSMatt Macy * While the underlying implementation supports it, group vdevs cannot contain
44eda14cbcSMatt Macy * other group vdevs. All userland verification of devices is contained within
45eda14cbcSMatt Macy * this file. If successful, the nvlist returned can be passed directly to the
46eda14cbcSMatt Macy * kernel; we've done as much verification as possible in userland.
47eda14cbcSMatt Macy *
48eda14cbcSMatt Macy * Hot spares are a special case, and passed down as an array of disk vdevs, at
49eda14cbcSMatt Macy * the same level as the root of the vdev tree.
50eda14cbcSMatt Macy *
51eda14cbcSMatt Macy * The only function exported by this file is 'make_root_vdev'. The
52eda14cbcSMatt Macy * function performs several passes:
53eda14cbcSMatt Macy *
54eda14cbcSMatt Macy * 1. Construct the vdev specification. Performs syntax validation and
55eda14cbcSMatt Macy * makes sure each device is valid.
56eda14cbcSMatt Macy * 2. Check for devices in use. Using libblkid to make sure that no
57eda14cbcSMatt Macy * devices are also in use. Some can be overridden using the 'force'
58eda14cbcSMatt Macy * flag, others cannot.
59eda14cbcSMatt Macy * 3. Check for replication errors if the 'force' flag is not specified.
60eda14cbcSMatt Macy * validates that the replication level is consistent across the
61eda14cbcSMatt Macy * entire pool.
62eda14cbcSMatt Macy * 4. Call libzfs to label any whole disks with an EFI label.
63eda14cbcSMatt Macy */
64eda14cbcSMatt Macy
65eda14cbcSMatt Macy #include <assert.h>
66eda14cbcSMatt Macy #include <ctype.h>
67eda14cbcSMatt Macy #include <errno.h>
68eda14cbcSMatt Macy #include <fcntl.h>
69eda14cbcSMatt Macy #include <libintl.h>
70eda14cbcSMatt Macy #include <libnvpair.h>
71eda14cbcSMatt Macy #include <libzutil.h>
72eda14cbcSMatt Macy #include <limits.h>
73eda14cbcSMatt Macy #include <sys/spa.h>
74eda14cbcSMatt Macy #include <stdio.h>
75eda14cbcSMatt Macy #include <string.h>
76eda14cbcSMatt Macy #include <unistd.h>
77eda14cbcSMatt Macy #include "zpool_util.h"
78eda14cbcSMatt Macy #include <sys/zfs_context.h>
79eda14cbcSMatt Macy
80eda14cbcSMatt Macy #include <scsi/scsi.h>
81eda14cbcSMatt Macy #include <scsi/sg.h>
82eda14cbcSMatt Macy #include <sys/efi_partition.h>
83eda14cbcSMatt Macy #include <sys/stat.h>
84eda14cbcSMatt Macy #include <sys/mntent.h>
85eda14cbcSMatt Macy #include <uuid/uuid.h>
86eda14cbcSMatt Macy #include <blkid/blkid.h>
87eda14cbcSMatt Macy
88eda14cbcSMatt Macy typedef struct vdev_disk_db_entry
89eda14cbcSMatt Macy {
90eda14cbcSMatt Macy char id[24];
91eda14cbcSMatt Macy int sector_size;
92eda14cbcSMatt Macy } vdev_disk_db_entry_t;
93eda14cbcSMatt Macy
94eda14cbcSMatt Macy /*
95eda14cbcSMatt Macy * Database of block devices that lie about physical sector sizes. The
96eda14cbcSMatt Macy * identification string must be precisely 24 characters to avoid false
97eda14cbcSMatt Macy * negatives
98eda14cbcSMatt Macy */
99eda14cbcSMatt Macy static vdev_disk_db_entry_t vdev_disk_database[] = {
100eda14cbcSMatt Macy {"ATA ADATA SSD S396 3", 8192},
101eda14cbcSMatt Macy {"ATA APPLE SSD SM128E", 8192},
102eda14cbcSMatt Macy {"ATA APPLE SSD SM256E", 8192},
103eda14cbcSMatt Macy {"ATA APPLE SSD SM512E", 8192},
104eda14cbcSMatt Macy {"ATA APPLE SSD SM768E", 8192},
105eda14cbcSMatt Macy {"ATA C400-MTFDDAC064M", 8192},
106eda14cbcSMatt Macy {"ATA C400-MTFDDAC128M", 8192},
107eda14cbcSMatt Macy {"ATA C400-MTFDDAC256M", 8192},
108eda14cbcSMatt Macy {"ATA C400-MTFDDAC512M", 8192},
109eda14cbcSMatt Macy {"ATA Corsair Force 3 ", 8192},
110eda14cbcSMatt Macy {"ATA Corsair Force GS", 8192},
111eda14cbcSMatt Macy {"ATA INTEL SSDSA2CT04", 8192},
112eda14cbcSMatt Macy {"ATA INTEL SSDSA2BZ10", 8192},
113eda14cbcSMatt Macy {"ATA INTEL SSDSA2BZ20", 8192},
114eda14cbcSMatt Macy {"ATA INTEL SSDSA2BZ30", 8192},
115eda14cbcSMatt Macy {"ATA INTEL SSDSA2CW04", 8192},
116eda14cbcSMatt Macy {"ATA INTEL SSDSA2CW08", 8192},
117eda14cbcSMatt Macy {"ATA INTEL SSDSA2CW12", 8192},
118eda14cbcSMatt Macy {"ATA INTEL SSDSA2CW16", 8192},
119eda14cbcSMatt Macy {"ATA INTEL SSDSA2CW30", 8192},
120eda14cbcSMatt Macy {"ATA INTEL SSDSA2CW60", 8192},
121eda14cbcSMatt Macy {"ATA INTEL SSDSC2CT06", 8192},
122eda14cbcSMatt Macy {"ATA INTEL SSDSC2CT12", 8192},
123eda14cbcSMatt Macy {"ATA INTEL SSDSC2CT18", 8192},
124eda14cbcSMatt Macy {"ATA INTEL SSDSC2CT24", 8192},
125eda14cbcSMatt Macy {"ATA INTEL SSDSC2CW06", 8192},
126eda14cbcSMatt Macy {"ATA INTEL SSDSC2CW12", 8192},
127eda14cbcSMatt Macy {"ATA INTEL SSDSC2CW18", 8192},
128eda14cbcSMatt Macy {"ATA INTEL SSDSC2CW24", 8192},
129eda14cbcSMatt Macy {"ATA INTEL SSDSC2CW48", 8192},
130eda14cbcSMatt Macy {"ATA KINGSTON SH100S3", 8192},
131eda14cbcSMatt Macy {"ATA KINGSTON SH103S3", 8192},
132eda14cbcSMatt Macy {"ATA M4-CT064M4SSD2 ", 8192},
133eda14cbcSMatt Macy {"ATA M4-CT128M4SSD2 ", 8192},
134eda14cbcSMatt Macy {"ATA M4-CT256M4SSD2 ", 8192},
135eda14cbcSMatt Macy {"ATA M4-CT512M4SSD2 ", 8192},
136eda14cbcSMatt Macy {"ATA OCZ-AGILITY2 ", 8192},
137eda14cbcSMatt Macy {"ATA OCZ-AGILITY3 ", 8192},
138eda14cbcSMatt Macy {"ATA OCZ-VERTEX2 3.5 ", 8192},
139eda14cbcSMatt Macy {"ATA OCZ-VERTEX3 ", 8192},
140eda14cbcSMatt Macy {"ATA OCZ-VERTEX3 LT ", 8192},
141eda14cbcSMatt Macy {"ATA OCZ-VERTEX3 MI ", 8192},
142eda14cbcSMatt Macy {"ATA OCZ-VERTEX4 ", 8192},
143eda14cbcSMatt Macy {"ATA SAMSUNG MZ7WD120", 8192},
144eda14cbcSMatt Macy {"ATA SAMSUNG MZ7WD240", 8192},
145eda14cbcSMatt Macy {"ATA SAMSUNG MZ7WD480", 8192},
146eda14cbcSMatt Macy {"ATA SAMSUNG MZ7WD960", 8192},
147eda14cbcSMatt Macy {"ATA SAMSUNG SSD 830 ", 8192},
148eda14cbcSMatt Macy {"ATA Samsung SSD 840 ", 8192},
149eda14cbcSMatt Macy {"ATA SanDisk SSD U100", 8192},
150eda14cbcSMatt Macy {"ATA TOSHIBA THNSNH06", 8192},
151eda14cbcSMatt Macy {"ATA TOSHIBA THNSNH12", 8192},
152eda14cbcSMatt Macy {"ATA TOSHIBA THNSNH25", 8192},
153eda14cbcSMatt Macy {"ATA TOSHIBA THNSNH51", 8192},
154eda14cbcSMatt Macy {"ATA APPLE SSD TS064C", 4096},
155eda14cbcSMatt Macy {"ATA APPLE SSD TS128C", 4096},
156eda14cbcSMatt Macy {"ATA APPLE SSD TS256C", 4096},
157eda14cbcSMatt Macy {"ATA APPLE SSD TS512C", 4096},
158eda14cbcSMatt Macy {"ATA INTEL SSDSA2M040", 4096},
159eda14cbcSMatt Macy {"ATA INTEL SSDSA2M080", 4096},
160eda14cbcSMatt Macy {"ATA INTEL SSDSA2M160", 4096},
161eda14cbcSMatt Macy {"ATA INTEL SSDSC2MH12", 4096},
162eda14cbcSMatt Macy {"ATA INTEL SSDSC2MH25", 4096},
163eda14cbcSMatt Macy {"ATA OCZ CORE_SSD ", 4096},
164eda14cbcSMatt Macy {"ATA OCZ-VERTEX ", 4096},
165eda14cbcSMatt Macy {"ATA SAMSUNG MCCOE32G", 4096},
166eda14cbcSMatt Macy {"ATA SAMSUNG MCCOE64G", 4096},
167eda14cbcSMatt Macy {"ATA SAMSUNG SSD PM80", 4096},
168eda14cbcSMatt Macy /* Flash drives optimized for 4KB IOs on larger pages */
169eda14cbcSMatt Macy {"ATA INTEL SSDSC2BA10", 4096},
170eda14cbcSMatt Macy {"ATA INTEL SSDSC2BA20", 4096},
171eda14cbcSMatt Macy {"ATA INTEL SSDSC2BA40", 4096},
172eda14cbcSMatt Macy {"ATA INTEL SSDSC2BA80", 4096},
173eda14cbcSMatt Macy {"ATA INTEL SSDSC2BB08", 4096},
174eda14cbcSMatt Macy {"ATA INTEL SSDSC2BB12", 4096},
175eda14cbcSMatt Macy {"ATA INTEL SSDSC2BB16", 4096},
176eda14cbcSMatt Macy {"ATA INTEL SSDSC2BB24", 4096},
177eda14cbcSMatt Macy {"ATA INTEL SSDSC2BB30", 4096},
178eda14cbcSMatt Macy {"ATA INTEL SSDSC2BB40", 4096},
179eda14cbcSMatt Macy {"ATA INTEL SSDSC2BB48", 4096},
180eda14cbcSMatt Macy {"ATA INTEL SSDSC2BB60", 4096},
181eda14cbcSMatt Macy {"ATA INTEL SSDSC2BB80", 4096},
182eda14cbcSMatt Macy {"ATA INTEL SSDSC2BW24", 4096},
183eda14cbcSMatt Macy {"ATA INTEL SSDSC2BW48", 4096},
184eda14cbcSMatt Macy {"ATA INTEL SSDSC2BP24", 4096},
185eda14cbcSMatt Macy {"ATA INTEL SSDSC2BP48", 4096},
186eda14cbcSMatt Macy {"NA SmrtStorSDLKAE9W", 4096},
187eda14cbcSMatt Macy {"NVMe Amazon EC2 NVMe ", 4096},
188eda14cbcSMatt Macy /* Imported from Open Solaris */
189eda14cbcSMatt Macy {"ATA MARVELL SD88SA02", 4096},
190eda14cbcSMatt Macy /* Advanced format Hard drives */
191eda14cbcSMatt Macy {"ATA Hitachi HDS5C303", 4096},
192eda14cbcSMatt Macy {"ATA SAMSUNG HD204UI ", 4096},
193eda14cbcSMatt Macy {"ATA ST2000DL004 HD20", 4096},
194eda14cbcSMatt Macy {"ATA WDC WD10EARS-00M", 4096},
195eda14cbcSMatt Macy {"ATA WDC WD10EARS-00S", 4096},
196eda14cbcSMatt Macy {"ATA WDC WD10EARS-00Z", 4096},
197eda14cbcSMatt Macy {"ATA WDC WD15EARS-00M", 4096},
198eda14cbcSMatt Macy {"ATA WDC WD15EARS-00S", 4096},
199eda14cbcSMatt Macy {"ATA WDC WD15EARS-00Z", 4096},
200eda14cbcSMatt Macy {"ATA WDC WD20EARS-00M", 4096},
201eda14cbcSMatt Macy {"ATA WDC WD20EARS-00S", 4096},
202eda14cbcSMatt Macy {"ATA WDC WD20EARS-00Z", 4096},
203eda14cbcSMatt Macy {"ATA WDC WD1600BEVT-0", 4096},
204eda14cbcSMatt Macy {"ATA WDC WD2500BEVT-0", 4096},
205eda14cbcSMatt Macy {"ATA WDC WD3200BEVT-0", 4096},
206eda14cbcSMatt Macy {"ATA WDC WD5000BEVT-0", 4096},
207eda14cbcSMatt Macy };
208eda14cbcSMatt Macy
209eda14cbcSMatt Macy
210eda14cbcSMatt Macy #define INQ_REPLY_LEN 96
211eda14cbcSMatt Macy #define INQ_CMD_LEN 6
212eda14cbcSMatt Macy
213eda14cbcSMatt Macy static const int vdev_disk_database_size =
214eda14cbcSMatt Macy sizeof (vdev_disk_database) / sizeof (vdev_disk_database[0]);
215eda14cbcSMatt Macy
216eda14cbcSMatt Macy boolean_t
check_sector_size_database(char * path,int * sector_size)217eda14cbcSMatt Macy check_sector_size_database(char *path, int *sector_size)
218eda14cbcSMatt Macy {
219eda14cbcSMatt Macy unsigned char inq_buff[INQ_REPLY_LEN];
220eda14cbcSMatt Macy unsigned char sense_buffer[32];
221eda14cbcSMatt Macy unsigned char inq_cmd_blk[INQ_CMD_LEN] =
222eda14cbcSMatt Macy {INQUIRY, 0, 0, 0, INQ_REPLY_LEN, 0};
223eda14cbcSMatt Macy sg_io_hdr_t io_hdr;
224eda14cbcSMatt Macy int error;
225eda14cbcSMatt Macy int fd;
226eda14cbcSMatt Macy int i;
227eda14cbcSMatt Macy
228eda14cbcSMatt Macy /* Prepare INQUIRY command */
229eda14cbcSMatt Macy memset(&io_hdr, 0, sizeof (sg_io_hdr_t));
230eda14cbcSMatt Macy io_hdr.interface_id = 'S';
231eda14cbcSMatt Macy io_hdr.cmd_len = sizeof (inq_cmd_blk);
232eda14cbcSMatt Macy io_hdr.mx_sb_len = sizeof (sense_buffer);
233eda14cbcSMatt Macy io_hdr.dxfer_direction = SG_DXFER_FROM_DEV;
234eda14cbcSMatt Macy io_hdr.dxfer_len = INQ_REPLY_LEN;
235eda14cbcSMatt Macy io_hdr.dxferp = inq_buff;
236eda14cbcSMatt Macy io_hdr.cmdp = inq_cmd_blk;
237eda14cbcSMatt Macy io_hdr.sbp = sense_buffer;
238eda14cbcSMatt Macy io_hdr.timeout = 10; /* 10 milliseconds is ample time */
239eda14cbcSMatt Macy
240eda14cbcSMatt Macy if ((fd = open(path, O_RDONLY|O_DIRECT)) < 0)
241eda14cbcSMatt Macy return (B_FALSE);
242eda14cbcSMatt Macy
243eda14cbcSMatt Macy error = ioctl(fd, SG_IO, (unsigned long) &io_hdr);
244eda14cbcSMatt Macy
245eda14cbcSMatt Macy (void) close(fd);
246eda14cbcSMatt Macy
247eda14cbcSMatt Macy if (error < 0)
248eda14cbcSMatt Macy return (B_FALSE);
249eda14cbcSMatt Macy
250eda14cbcSMatt Macy if ((io_hdr.info & SG_INFO_OK_MASK) != SG_INFO_OK)
251eda14cbcSMatt Macy return (B_FALSE);
252eda14cbcSMatt Macy
253eda14cbcSMatt Macy for (i = 0; i < vdev_disk_database_size; i++) {
254eda14cbcSMatt Macy if (memcmp(inq_buff + 8, vdev_disk_database[i].id, 24))
255eda14cbcSMatt Macy continue;
256eda14cbcSMatt Macy
257eda14cbcSMatt Macy *sector_size = vdev_disk_database[i].sector_size;
258eda14cbcSMatt Macy return (B_TRUE);
259eda14cbcSMatt Macy }
260eda14cbcSMatt Macy
261eda14cbcSMatt Macy return (B_FALSE);
262eda14cbcSMatt Macy }
263eda14cbcSMatt Macy
264eda14cbcSMatt Macy static int
check_slice(const char * path,blkid_cache cache,int force,boolean_t isspare)265eda14cbcSMatt Macy check_slice(const char *path, blkid_cache cache, int force, boolean_t isspare)
266eda14cbcSMatt Macy {
267eda14cbcSMatt Macy int err;
268eda14cbcSMatt Macy char *value;
269eda14cbcSMatt Macy
270eda14cbcSMatt Macy /* No valid type detected device is safe to use */
271eda14cbcSMatt Macy value = blkid_get_tag_value(cache, "TYPE", path);
272eda14cbcSMatt Macy if (value == NULL)
273eda14cbcSMatt Macy return (0);
274eda14cbcSMatt Macy
275eda14cbcSMatt Macy /*
276eda14cbcSMatt Macy * If libblkid detects a ZFS device, we check the device
277eda14cbcSMatt Macy * using check_file() to see if it's safe. The one safe
278eda14cbcSMatt Macy * case is a spare device shared between multiple pools.
279eda14cbcSMatt Macy */
280eda14cbcSMatt Macy if (strcmp(value, "zfs_member") == 0) {
281eda14cbcSMatt Macy err = check_file(path, force, isspare);
282eda14cbcSMatt Macy } else {
283eda14cbcSMatt Macy if (force) {
284eda14cbcSMatt Macy err = 0;
285eda14cbcSMatt Macy } else {
286eda14cbcSMatt Macy err = -1;
287eda14cbcSMatt Macy vdev_error(gettext("%s contains a filesystem of "
288eda14cbcSMatt Macy "type '%s'\n"), path, value);
289eda14cbcSMatt Macy }
290eda14cbcSMatt Macy }
291eda14cbcSMatt Macy
292eda14cbcSMatt Macy free(value);
293eda14cbcSMatt Macy
294eda14cbcSMatt Macy return (err);
295eda14cbcSMatt Macy }
296eda14cbcSMatt Macy
297eda14cbcSMatt Macy /*
298eda14cbcSMatt Macy * Validate that a disk including all partitions are safe to use.
299eda14cbcSMatt Macy *
300eda14cbcSMatt Macy * For EFI labeled disks this can done relatively easily with the libefi
301eda14cbcSMatt Macy * library. The partition numbers are extracted from the label and used
302eda14cbcSMatt Macy * to generate the expected /dev/ paths. Each partition can then be
303eda14cbcSMatt Macy * checked for conflicts.
304eda14cbcSMatt Macy *
305eda14cbcSMatt Macy * For non-EFI labeled disks (MBR/EBR/etc) the same process is possible
306eda14cbcSMatt Macy * but due to the lack of a readily available libraries this scanning is
307eda14cbcSMatt Macy * not implemented. Instead only the device path as given is checked.
308eda14cbcSMatt Macy */
309eda14cbcSMatt Macy static int
check_disk(const char * path,blkid_cache cache,int force,boolean_t isspare,boolean_t iswholedisk)310eda14cbcSMatt Macy check_disk(const char *path, blkid_cache cache, int force,
311eda14cbcSMatt Macy boolean_t isspare, boolean_t iswholedisk)
312eda14cbcSMatt Macy {
313eda14cbcSMatt Macy struct dk_gpt *vtoc;
314eda14cbcSMatt Macy char slice_path[MAXPATHLEN];
315eda14cbcSMatt Macy int err = 0;
316eda14cbcSMatt Macy int fd, i;
317eda14cbcSMatt Macy int flags = O_RDONLY|O_DIRECT;
318eda14cbcSMatt Macy
319eda14cbcSMatt Macy if (!iswholedisk)
320eda14cbcSMatt Macy return (check_slice(path, cache, force, isspare));
321eda14cbcSMatt Macy
322eda14cbcSMatt Macy /* only spares can be shared, other devices require exclusive access */
323eda14cbcSMatt Macy if (!isspare)
324eda14cbcSMatt Macy flags |= O_EXCL;
325eda14cbcSMatt Macy
326eda14cbcSMatt Macy if ((fd = open(path, flags)) < 0) {
327eda14cbcSMatt Macy char *value = blkid_get_tag_value(cache, "TYPE", path);
328eda14cbcSMatt Macy (void) fprintf(stderr, gettext("%s is in use and contains "
329eda14cbcSMatt Macy "a %s filesystem.\n"), path, value ? value : "unknown");
330eda14cbcSMatt Macy free(value);
331eda14cbcSMatt Macy return (-1);
332eda14cbcSMatt Macy }
333eda14cbcSMatt Macy
334eda14cbcSMatt Macy /*
335eda14cbcSMatt Macy * Expected to fail for non-EFI labeled disks. Just check the device
336eda14cbcSMatt Macy * as given and do not attempt to detect and scan partitions.
337eda14cbcSMatt Macy */
338eda14cbcSMatt Macy err = efi_alloc_and_read(fd, &vtoc);
339eda14cbcSMatt Macy if (err) {
340eda14cbcSMatt Macy (void) close(fd);
341eda14cbcSMatt Macy return (check_slice(path, cache, force, isspare));
342eda14cbcSMatt Macy }
343eda14cbcSMatt Macy
344eda14cbcSMatt Macy /*
345eda14cbcSMatt Macy * The primary efi partition label is damaged however the secondary
346eda14cbcSMatt Macy * label at the end of the device is intact. Rather than use this
347eda14cbcSMatt Macy * label we should play it safe and treat this as a non efi device.
348eda14cbcSMatt Macy */
349eda14cbcSMatt Macy if (vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) {
350eda14cbcSMatt Macy efi_free(vtoc);
351eda14cbcSMatt Macy (void) close(fd);
352eda14cbcSMatt Macy
353eda14cbcSMatt Macy if (force) {
354eda14cbcSMatt Macy /* Partitions will now be created using the backup */
355eda14cbcSMatt Macy return (0);
356eda14cbcSMatt Macy } else {
357eda14cbcSMatt Macy vdev_error(gettext("%s contains a corrupt primary "
358eda14cbcSMatt Macy "EFI label.\n"), path);
359eda14cbcSMatt Macy return (-1);
360eda14cbcSMatt Macy }
361eda14cbcSMatt Macy }
362eda14cbcSMatt Macy
363eda14cbcSMatt Macy for (i = 0; i < vtoc->efi_nparts; i++) {
364eda14cbcSMatt Macy
365eda14cbcSMatt Macy if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED ||
366eda14cbcSMatt Macy uuid_is_null((uchar_t *)&vtoc->efi_parts[i].p_guid))
367eda14cbcSMatt Macy continue;
368eda14cbcSMatt Macy
369eda14cbcSMatt Macy if (strncmp(path, UDISK_ROOT, strlen(UDISK_ROOT)) == 0)
370eda14cbcSMatt Macy (void) snprintf(slice_path, sizeof (slice_path),
371eda14cbcSMatt Macy "%s%s%d", path, "-part", i+1);
372eda14cbcSMatt Macy else
373eda14cbcSMatt Macy (void) snprintf(slice_path, sizeof (slice_path),
374eda14cbcSMatt Macy "%s%s%d", path, isdigit(path[strlen(path)-1]) ?
375eda14cbcSMatt Macy "p" : "", i+1);
376eda14cbcSMatt Macy
377eda14cbcSMatt Macy err = check_slice(slice_path, cache, force, isspare);
378eda14cbcSMatt Macy if (err)
379eda14cbcSMatt Macy break;
380eda14cbcSMatt Macy }
381eda14cbcSMatt Macy
382eda14cbcSMatt Macy efi_free(vtoc);
383eda14cbcSMatt Macy (void) close(fd);
384eda14cbcSMatt Macy
385eda14cbcSMatt Macy return (err);
386eda14cbcSMatt Macy }
387eda14cbcSMatt Macy
388eda14cbcSMatt Macy int
check_device(const char * path,boolean_t force,boolean_t isspare,boolean_t iswholedisk)389eda14cbcSMatt Macy check_device(const char *path, boolean_t force,
390eda14cbcSMatt Macy boolean_t isspare, boolean_t iswholedisk)
391eda14cbcSMatt Macy {
392eda14cbcSMatt Macy blkid_cache cache;
393eda14cbcSMatt Macy int error;
394eda14cbcSMatt Macy
395eda14cbcSMatt Macy error = blkid_get_cache(&cache, NULL);
396eda14cbcSMatt Macy if (error != 0) {
397eda14cbcSMatt Macy (void) fprintf(stderr, gettext("unable to access the blkid "
398eda14cbcSMatt Macy "cache.\n"));
399eda14cbcSMatt Macy return (-1);
400eda14cbcSMatt Macy }
401eda14cbcSMatt Macy
402eda14cbcSMatt Macy error = check_disk(path, cache, force, isspare, iswholedisk);
403eda14cbcSMatt Macy blkid_put_cache(cache);
404eda14cbcSMatt Macy
405eda14cbcSMatt Macy return (error);
406eda14cbcSMatt Macy }
40716038816SMartin Matuska
40816038816SMartin Matuska void
after_zpool_upgrade(zpool_handle_t * zhp)40916038816SMartin Matuska after_zpool_upgrade(zpool_handle_t *zhp)
41016038816SMartin Matuska {
411e92ffd9bSMartin Matuska (void) zhp;
41216038816SMartin Matuska }
4131f88aa09SMartin Matuska
4141f88aa09SMartin Matuska int
check_file(const char * file,boolean_t force,boolean_t isspare)4151f88aa09SMartin Matuska check_file(const char *file, boolean_t force, boolean_t isspare)
4161f88aa09SMartin Matuska {
4171f88aa09SMartin Matuska return (check_file_generic(file, force, isspare));
4181f88aa09SMartin Matuska }
419b356da80SMartin Matuska
420b356da80SMartin Matuska /*
421b356da80SMartin Matuska * Read from a sysfs file and return an allocated string. Removes
422b356da80SMartin Matuska * the newline from the end of the string if there is one.
423b356da80SMartin Matuska *
424b356da80SMartin Matuska * Returns a string on success (which must be freed), or NULL on error.
425b356da80SMartin Matuska */
zpool_sysfs_gets(char * path)426b356da80SMartin Matuska static char *zpool_sysfs_gets(char *path)
427b356da80SMartin Matuska {
428b356da80SMartin Matuska int fd;
429b356da80SMartin Matuska struct stat statbuf;
430b356da80SMartin Matuska char *buf = NULL;
431b356da80SMartin Matuska ssize_t count = 0;
432b356da80SMartin Matuska fd = open(path, O_RDONLY);
433b356da80SMartin Matuska if (fd < 0)
434b356da80SMartin Matuska return (NULL);
435b356da80SMartin Matuska
436b356da80SMartin Matuska if (fstat(fd, &statbuf) != 0) {
437b356da80SMartin Matuska close(fd);
438b356da80SMartin Matuska return (NULL);
439b356da80SMartin Matuska }
440b356da80SMartin Matuska
441*b985c9caSMartin Matuska buf = calloc(statbuf.st_size + 1, sizeof (*buf));
442b356da80SMartin Matuska if (buf == NULL) {
443b356da80SMartin Matuska close(fd);
444b356da80SMartin Matuska return (NULL);
445b356da80SMartin Matuska }
446b356da80SMartin Matuska
447b356da80SMartin Matuska /*
448b356da80SMartin Matuska * Note, we can read less bytes than st_size, and that's ok. Sysfs
449b356da80SMartin Matuska * files will report their size is 4k even if they only return a small
450b356da80SMartin Matuska * string.
451b356da80SMartin Matuska */
452b356da80SMartin Matuska count = read(fd, buf, statbuf.st_size);
453b356da80SMartin Matuska if (count < 0) {
454b356da80SMartin Matuska /* Error doing read() or we overran the buffer */
455b356da80SMartin Matuska close(fd);
456b356da80SMartin Matuska free(buf);
457b356da80SMartin Matuska return (NULL);
458b356da80SMartin Matuska }
459b356da80SMartin Matuska
460b356da80SMartin Matuska /* Remove trailing newline */
461783d3ff6SMartin Matuska if (count > 0 && buf[count - 1] == '\n')
462b356da80SMartin Matuska buf[count - 1] = 0;
463b356da80SMartin Matuska
464b356da80SMartin Matuska close(fd);
465b356da80SMartin Matuska
466b356da80SMartin Matuska return (buf);
467b356da80SMartin Matuska }
468b356da80SMartin Matuska
469b356da80SMartin Matuska /*
470b356da80SMartin Matuska * Write a string to a sysfs file.
471b356da80SMartin Matuska *
472b356da80SMartin Matuska * Returns 0 on success, non-zero otherwise.
473b356da80SMartin Matuska */
zpool_sysfs_puts(char * path,char * str)474b356da80SMartin Matuska static int zpool_sysfs_puts(char *path, char *str)
475b356da80SMartin Matuska {
476b356da80SMartin Matuska FILE *file;
477b356da80SMartin Matuska
478b356da80SMartin Matuska file = fopen(path, "w");
479b356da80SMartin Matuska if (!file) {
480b356da80SMartin Matuska return (-1);
481b356da80SMartin Matuska }
482b356da80SMartin Matuska
483b356da80SMartin Matuska if (fputs(str, file) < 0) {
484b356da80SMartin Matuska fclose(file);
485b356da80SMartin Matuska return (-2);
486b356da80SMartin Matuska }
487b356da80SMartin Matuska fclose(file);
488b356da80SMartin Matuska return (0);
489b356da80SMartin Matuska }
490b356da80SMartin Matuska
491b356da80SMartin Matuska /* Given a vdev nvlist_t, rescan its enclosure sysfs path */
492b356da80SMartin Matuska static void
rescan_vdev_config_dev_sysfs_path(nvlist_t * vdev_nv)493b356da80SMartin Matuska rescan_vdev_config_dev_sysfs_path(nvlist_t *vdev_nv)
494b356da80SMartin Matuska {
495b356da80SMartin Matuska update_vdev_config_dev_sysfs_path(vdev_nv,
496b356da80SMartin Matuska fnvlist_lookup_string(vdev_nv, ZPOOL_CONFIG_PATH),
497b356da80SMartin Matuska ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
498b356da80SMartin Matuska }
499b356da80SMartin Matuska
500b356da80SMartin Matuska /*
501b356da80SMartin Matuska * Given a power string: "on", "off", "1", or "0", return 0 if it's an
502b356da80SMartin Matuska * off value, 1 if it's an on value, and -1 if the value is unrecognized.
503b356da80SMartin Matuska */
zpool_power_parse_value(char * str)504b356da80SMartin Matuska static int zpool_power_parse_value(char *str)
505b356da80SMartin Matuska {
506b356da80SMartin Matuska if ((strcmp(str, "off") == 0) || (strcmp(str, "0") == 0))
507b356da80SMartin Matuska return (0);
508b356da80SMartin Matuska
509b356da80SMartin Matuska if ((strcmp(str, "on") == 0) || (strcmp(str, "1") == 0))
510b356da80SMartin Matuska return (1);
511b356da80SMartin Matuska
512b356da80SMartin Matuska return (-1);
513b356da80SMartin Matuska }
514b356da80SMartin Matuska
515b356da80SMartin Matuska /*
516b356da80SMartin Matuska * Given a vdev string return an allocated string containing the sysfs path to
517b356da80SMartin Matuska * its power control file. Also do a check if the power control file really
518b356da80SMartin Matuska * exists and has correct permissions.
519b356da80SMartin Matuska *
520b356da80SMartin Matuska * Example returned strings:
521b356da80SMartin Matuska *
522b356da80SMartin Matuska * /sys/class/enclosure/0:0:122:0/10/power_status
523b356da80SMartin Matuska * /sys/bus/pci/slots/10/power
524b356da80SMartin Matuska *
525b356da80SMartin Matuska * Returns allocated string on success (which must be freed), NULL on failure.
526b356da80SMartin Matuska */
527b356da80SMartin Matuska static char *
zpool_power_sysfs_path(zpool_handle_t * zhp,char * vdev)528b356da80SMartin Matuska zpool_power_sysfs_path(zpool_handle_t *zhp, char *vdev)
529b356da80SMartin Matuska {
530b356da80SMartin Matuska const char *enc_sysfs_dir = NULL;
531b356da80SMartin Matuska char *path = NULL;
532b356da80SMartin Matuska nvlist_t *vdev_nv = zpool_find_vdev(zhp, vdev, NULL, NULL, NULL);
533b356da80SMartin Matuska
534b356da80SMartin Matuska if (vdev_nv == NULL) {
535b356da80SMartin Matuska return (NULL);
536b356da80SMartin Matuska }
537b356da80SMartin Matuska
538b356da80SMartin Matuska /* Make sure we're getting the updated enclosure sysfs path */
539b356da80SMartin Matuska rescan_vdev_config_dev_sysfs_path(vdev_nv);
540b356da80SMartin Matuska
541b356da80SMartin Matuska if (nvlist_lookup_string(vdev_nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH,
542b356da80SMartin Matuska &enc_sysfs_dir) != 0) {
543b356da80SMartin Matuska return (NULL);
544b356da80SMartin Matuska }
545b356da80SMartin Matuska
546b356da80SMartin Matuska if (asprintf(&path, "%s/power_status", enc_sysfs_dir) == -1)
547b356da80SMartin Matuska return (NULL);
548b356da80SMartin Matuska
549b356da80SMartin Matuska if (access(path, W_OK) != 0) {
550b356da80SMartin Matuska free(path);
551b356da80SMartin Matuska path = NULL;
552b356da80SMartin Matuska /* No HDD 'power_control' file, maybe it's NVMe? */
553b356da80SMartin Matuska if (asprintf(&path, "%s/power", enc_sysfs_dir) == -1) {
554b356da80SMartin Matuska return (NULL);
555b356da80SMartin Matuska }
556b356da80SMartin Matuska
557b356da80SMartin Matuska if (access(path, R_OK | W_OK) != 0) {
558b356da80SMartin Matuska /* Not NVMe either */
559b356da80SMartin Matuska free(path);
560b356da80SMartin Matuska return (NULL);
561b356da80SMartin Matuska }
562b356da80SMartin Matuska }
563b356da80SMartin Matuska
564b356da80SMartin Matuska return (path);
565b356da80SMartin Matuska }
566b356da80SMartin Matuska
567b356da80SMartin Matuska /*
568b356da80SMartin Matuska * Given a path to a sysfs power control file, return B_TRUE if you should use
569b356da80SMartin Matuska * "on/off" words to control it, or B_FALSE otherwise ("0/1" to control).
570b356da80SMartin Matuska */
571b356da80SMartin Matuska static boolean_t
zpool_power_use_word(char * sysfs_path)572b356da80SMartin Matuska zpool_power_use_word(char *sysfs_path)
573b356da80SMartin Matuska {
574b356da80SMartin Matuska if (strcmp(&sysfs_path[strlen(sysfs_path) - strlen("power_status")],
575b356da80SMartin Matuska "power_status") == 0) {
576b356da80SMartin Matuska return (B_TRUE);
577b356da80SMartin Matuska }
578b356da80SMartin Matuska return (B_FALSE);
579b356da80SMartin Matuska }
580b356da80SMartin Matuska
581b356da80SMartin Matuska /*
582b356da80SMartin Matuska * Check the sysfs power control value for a vdev.
583b356da80SMartin Matuska *
584b356da80SMartin Matuska * Returns:
585b356da80SMartin Matuska * 0 - Power is off
586b356da80SMartin Matuska * 1 - Power is on
587b356da80SMartin Matuska * -1 - Error or unsupported
588b356da80SMartin Matuska */
589b356da80SMartin Matuska int
zpool_power_current_state(zpool_handle_t * zhp,char * vdev)590b356da80SMartin Matuska zpool_power_current_state(zpool_handle_t *zhp, char *vdev)
591b356da80SMartin Matuska {
592b356da80SMartin Matuska char *val;
593b356da80SMartin Matuska int rc;
594b356da80SMartin Matuska
595b356da80SMartin Matuska char *path = zpool_power_sysfs_path(zhp, vdev);
596b356da80SMartin Matuska if (path == NULL)
597b356da80SMartin Matuska return (-1);
598b356da80SMartin Matuska
599b356da80SMartin Matuska val = zpool_sysfs_gets(path);
600b356da80SMartin Matuska if (val == NULL) {
601b356da80SMartin Matuska free(path);
602b356da80SMartin Matuska return (-1);
603b356da80SMartin Matuska }
604b356da80SMartin Matuska
605b356da80SMartin Matuska rc = zpool_power_parse_value(val);
606b356da80SMartin Matuska free(val);
607b356da80SMartin Matuska free(path);
608b356da80SMartin Matuska return (rc);
609b356da80SMartin Matuska }
610b356da80SMartin Matuska
611b356da80SMartin Matuska /*
612b356da80SMartin Matuska * Turn on or off the slot to a device
613b356da80SMartin Matuska *
614b356da80SMartin Matuska * Device path is the full path to the device (like /dev/sda or /dev/sda1).
615b356da80SMartin Matuska *
616b356da80SMartin Matuska * Return code:
617b356da80SMartin Matuska * 0: Success
618b356da80SMartin Matuska * ENOTSUP: Power control not supported for OS
619b356da80SMartin Matuska * EBADSLT: Couldn't read current power state
620b356da80SMartin Matuska * ENOENT: No sysfs path to power control
621b356da80SMartin Matuska * EIO: Couldn't write sysfs power value
622b356da80SMartin Matuska * EBADE: Sysfs power value didn't change
623b356da80SMartin Matuska */
624b356da80SMartin Matuska int
zpool_power(zpool_handle_t * zhp,char * vdev,boolean_t turn_on)625b356da80SMartin Matuska zpool_power(zpool_handle_t *zhp, char *vdev, boolean_t turn_on)
626b356da80SMartin Matuska {
627b356da80SMartin Matuska char *sysfs_path;
628b356da80SMartin Matuska const char *val;
629b356da80SMartin Matuska int rc;
630b356da80SMartin Matuska int timeout_ms;
631b356da80SMartin Matuska
632b356da80SMartin Matuska rc = zpool_power_current_state(zhp, vdev);
633b356da80SMartin Matuska if (rc == -1) {
634b356da80SMartin Matuska return (EBADSLT);
635b356da80SMartin Matuska }
636b356da80SMartin Matuska
637b356da80SMartin Matuska /* Already correct value? */
638b356da80SMartin Matuska if (rc == (int)turn_on)
639b356da80SMartin Matuska return (0);
640b356da80SMartin Matuska
641b356da80SMartin Matuska sysfs_path = zpool_power_sysfs_path(zhp, vdev);
642b356da80SMartin Matuska if (sysfs_path == NULL)
643b356da80SMartin Matuska return (ENOENT);
644b356da80SMartin Matuska
645b356da80SMartin Matuska if (zpool_power_use_word(sysfs_path)) {
646b356da80SMartin Matuska val = turn_on ? "on" : "off";
647b356da80SMartin Matuska } else {
648b356da80SMartin Matuska val = turn_on ? "1" : "0";
649b356da80SMartin Matuska }
650b356da80SMartin Matuska
651b356da80SMartin Matuska rc = zpool_sysfs_puts(sysfs_path, (char *)val);
652b356da80SMartin Matuska
653b356da80SMartin Matuska free(sysfs_path);
654b356da80SMartin Matuska if (rc != 0) {
655b356da80SMartin Matuska return (EIO);
656b356da80SMartin Matuska }
657b356da80SMartin Matuska
658b356da80SMartin Matuska /*
659b356da80SMartin Matuska * Wait up to 30 seconds for sysfs power value to change after
660b356da80SMartin Matuska * writing it.
661b356da80SMartin Matuska */
662b356da80SMartin Matuska timeout_ms = zpool_getenv_int("ZPOOL_POWER_ON_SLOT_TIMEOUT_MS", 30000);
663b356da80SMartin Matuska for (int i = 0; i < MAX(1, timeout_ms / 200); i++) {
664b356da80SMartin Matuska rc = zpool_power_current_state(zhp, vdev);
665b356da80SMartin Matuska if (rc == (int)turn_on)
666b356da80SMartin Matuska return (0); /* success */
667b356da80SMartin Matuska
668b356da80SMartin Matuska fsleep(0.200); /* 200ms */
669b356da80SMartin Matuska }
670b356da80SMartin Matuska
671b356da80SMartin Matuska /* sysfs value never changed */
672b356da80SMartin Matuska return (EBADE);
673b356da80SMartin Matuska }
674