xref: /onnv-gate/usr/src/uts/sun4v/io/vdsk_common.c (revision 8528:f7e4908237ce)
12531Snarayan /*
22531Snarayan  * CDDL HEADER START
32531Snarayan  *
42531Snarayan  * The contents of this file are subject to the terms of the
52531Snarayan  * Common Development and Distribution License (the "License").
62531Snarayan  * You may not use this file except in compliance with the License.
72531Snarayan  *
82531Snarayan  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
92531Snarayan  * or http://www.opensolaris.org/os/licensing.
102531Snarayan  * See the License for the specific language governing permissions
112531Snarayan  * and limitations under the License.
122531Snarayan  *
132531Snarayan  * When distributing Covered Code, include this CDDL HEADER in each
142531Snarayan  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
152531Snarayan  * If applicable, add the following below this CDDL HEADER, with the
162531Snarayan  * fields enclosed by brackets "[]" replaced with your own identifying
172531Snarayan  * information: Portions Copyright [yyyy] [name of copyright owner]
182531Snarayan  *
192531Snarayan  * CDDL HEADER END
202531Snarayan  */
212531Snarayan 
222531Snarayan /*
23*8528SAlexandre.Chartre@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
242531Snarayan  * Use is subject to license terms.
252531Snarayan  */
262531Snarayan 
272531Snarayan #include <sys/crc32.h>
282531Snarayan #include <sys/cred.h>
292531Snarayan #include <sys/ddi.h>
302531Snarayan #include <sys/dkio.h>
312531Snarayan #include <sys/file.h>
322531Snarayan #include <sys/kmem.h>
332531Snarayan #include <sys/sunddi.h>
342531Snarayan #include <sys/sunldi.h>
352531Snarayan #include <sys/types.h>
362531Snarayan #include <sys/varargs.h>
372531Snarayan #include <sys/vtoc.h>
382531Snarayan 
392531Snarayan #include <sys/vdsk_common.h>
402531Snarayan 
412531Snarayan /*
422531Snarayan  * Hooks for EFI support
432531Snarayan  */
442531Snarayan 
452531Snarayan /*
465874Sachartre  * This code provides generic functions to the vds and vdc drivers to read
475874Sachartre  * EFI labels from the disk backend and to get the EFI GPT and GPE. This is
485874Sachartre  * inspired from the libefi userland library and the cmlb driver. We will
495874Sachartre  * certainly be able to remove that code if RFE 6213117 is ever implemented.
502531Snarayan  */
512531Snarayan 
527929SAlexandre.Chartre@Sun.COM #ifdef DEBUG
537929SAlexandre.Chartre@Sun.COM 
542531Snarayan #define	VD_EFI_DEBUG	if (vd_efi_debug) vd_efi_print
552531Snarayan 
567929SAlexandre.Chartre@Sun.COM static int vd_efi_debug = 0;
577929SAlexandre.Chartre@Sun.COM 
585874Sachartre #else
597929SAlexandre.Chartre@Sun.COM 
607929SAlexandre.Chartre@Sun.COM #define	VD_EFI_DEBUG(...)
617929SAlexandre.Chartre@Sun.COM 
625874Sachartre #endif
632531Snarayan 
645874Sachartre #define	VD_EFI_GPE_LEN(vdisk, nparts) \
655874Sachartre 	((((sizeof (efi_gpe_t) * (nparts) - 1) / (vdisk)->block_size) + 1) * \
665874Sachartre 	(vdisk)->block_size)
672531Snarayan 
682531Snarayan static void
vd_efi_print(const char * format,...)692531Snarayan vd_efi_print(const char *format, ...)
702531Snarayan {
712531Snarayan 	va_list args;
722531Snarayan 
732531Snarayan 	va_start(args, format);
742531Snarayan 	vcmn_err(CE_CONT, format, args);
752531Snarayan 	va_end(args);
762531Snarayan }
772531Snarayan 
782531Snarayan /*
792531Snarayan  * Return a 32-bit CRC of the contents of the buffer.
802531Snarayan  *
812531Snarayan  * The seed is 0xffffffff and the result is XORed with 0xffffffff
822531Snarayan  * because this is what the Itanium firmware expects.
832531Snarayan  */
842531Snarayan unsigned int
vd_efi_crc32(const unsigned char * s,unsigned int len)852531Snarayan vd_efi_crc32(const unsigned char *s, unsigned int len)
862531Snarayan {
872531Snarayan 	unsigned int crc32val;
882531Snarayan 
892531Snarayan 	CRC32(crc32val, s, len, -1U, crc32_table);
902531Snarayan 
912531Snarayan 	return (crc32val ^ -1U);
922531Snarayan }
932531Snarayan 
942531Snarayan static int
vd_efi_ioctl(vd_efi_dev_t * dev,int cmd,void * arg)955874Sachartre vd_efi_ioctl(vd_efi_dev_t *dev, int cmd, void *arg)
962531Snarayan {
975874Sachartre 	int status;
985874Sachartre 
995874Sachartre 	ASSERT(dev->vdisk_ioctl != NULL);
1005874Sachartre 	ASSERT(dev->vdisk != NULL);
1015874Sachartre 	status = (*dev->vdisk_ioctl)(dev->vdisk, cmd, (uintptr_t)arg);
1025874Sachartre 
1035874Sachartre 	return (status);
1045874Sachartre }
1052531Snarayan 
1065874Sachartre /*
1075874Sachartre  * Swap GPT data to match with the system endianness.
1085874Sachartre  */
1095874Sachartre static void
vd_efi_swap_gpt(efi_gpt_t * gpt)1105874Sachartre vd_efi_swap_gpt(efi_gpt_t *gpt)
1115874Sachartre {
1125874Sachartre 	gpt->efi_gpt_Signature = LE_64(gpt->efi_gpt_Signature);
1135874Sachartre 	gpt->efi_gpt_Revision = LE_32(gpt->efi_gpt_Revision);
1145874Sachartre 	gpt->efi_gpt_HeaderSize = LE_32(gpt->efi_gpt_HeaderSize);
1155874Sachartre 	gpt->efi_gpt_HeaderCRC32 = LE_32(gpt->efi_gpt_HeaderCRC32);
1165874Sachartre 	gpt->efi_gpt_MyLBA = LE_64(gpt->efi_gpt_MyLBA);
1175874Sachartre 	gpt->efi_gpt_AlternateLBA = LE_64(gpt->efi_gpt_AlternateLBA);
1185874Sachartre 	gpt->efi_gpt_FirstUsableLBA = LE_64(gpt->efi_gpt_FirstUsableLBA);
1195874Sachartre 	gpt->efi_gpt_LastUsableLBA = LE_64(gpt->efi_gpt_LastUsableLBA);
1205874Sachartre 	UUID_LE_CONVERT(gpt->efi_gpt_DiskGUID, gpt->efi_gpt_DiskGUID);
1215874Sachartre 	gpt->efi_gpt_PartitionEntryLBA = LE_64(gpt->efi_gpt_PartitionEntryLBA);
1225874Sachartre 	gpt->efi_gpt_NumberOfPartitionEntries =
1235874Sachartre 	    LE_32(gpt->efi_gpt_NumberOfPartitionEntries);
1245874Sachartre 	gpt->efi_gpt_SizeOfPartitionEntry =
1255874Sachartre 	    LE_32(gpt->efi_gpt_SizeOfPartitionEntry);
1265874Sachartre 	gpt->efi_gpt_PartitionEntryArrayCRC32 =
1275874Sachartre 	    LE_32(gpt->efi_gpt_PartitionEntryArrayCRC32);
1282531Snarayan }
1292531Snarayan 
1305874Sachartre /*
1315874Sachartre  * Swap GPE data to match with the system endianness.
1325874Sachartre  */
1335874Sachartre static void
vd_efi_swap_gpe(efi_gpe_t * gpe,int nparts)1345874Sachartre vd_efi_swap_gpe(efi_gpe_t *gpe, int nparts)
1352531Snarayan {
1365874Sachartre 	int i, j;
1372531Snarayan 
1385874Sachartre 	for (i = 0; i < nparts; i++) {
1395874Sachartre 		UUID_LE_CONVERT(gpe[i].efi_gpe_PartitionTypeGUID,
1405874Sachartre 		    gpe[i].efi_gpe_PartitionTypeGUID);
1415874Sachartre 		UUID_LE_CONVERT(gpe[i].efi_gpe_UniquePartitionGUID,
1425874Sachartre 		    gpe[i].efi_gpe_UniquePartitionGUID);
1435874Sachartre 		gpe[i].efi_gpe_StartingLBA = LE_64(gpe[i].efi_gpe_StartingLBA);
1445874Sachartre 		gpe[i].efi_gpe_EndingLBA = LE_64(gpe[i].efi_gpe_EndingLBA);
1455874Sachartre 		gpe[i].efi_gpe_Attributes.PartitionAttrs =
1465874Sachartre 		    LE_16(gpe[i].efi_gpe_Attributes.PartitionAttrs);
1475874Sachartre 		for (j = 0; j < EFI_PART_NAME_LEN; j++) {
1485874Sachartre 			gpe[i].efi_gpe_PartitionName[j] =
1495874Sachartre 			    LE_16(gpe[i].efi_gpe_PartitionName[j]);
1505874Sachartre 		}
1515874Sachartre 	}
1522531Snarayan }
1532531Snarayan 
1545874Sachartre /*
1555874Sachartre  * Check that an EFI GPT is valid. This function should be called with a raw
1565874Sachartre  * EFI GPT i.e. GPT data should be in little endian format as indicated in the
1575874Sachartre  * EFI specification and they should not have been swapped to match with the
1585874Sachartre  * system endianness.
1595874Sachartre  */
1602531Snarayan static int
vd_efi_check_gpt(vd_efi_dev_t * dev,efi_gpt_t * gpt)1615874Sachartre vd_efi_check_gpt(vd_efi_dev_t *dev, efi_gpt_t *gpt)
1622531Snarayan {
1635874Sachartre 	uint_t crc_stored, crc_computed;
1642531Snarayan 
1655874Sachartre 	if (gpt->efi_gpt_Signature != LE_64(EFI_SIGNATURE)) {
1662531Snarayan 		VD_EFI_DEBUG("Bad EFI signature: 0x%llx != 0x%llx\n",
1675874Sachartre 		    (long long)gpt->efi_gpt_Signature,
1682531Snarayan 		    (long long)LE_64(EFI_SIGNATURE));
1692531Snarayan 		return (EINVAL);
1702531Snarayan 	}
1712531Snarayan 
1722531Snarayan 	/*
1732531Snarayan 	 * check CRC of the header; the size of the header should
1742531Snarayan 	 * never be larger than one block
1752531Snarayan 	 */
1765874Sachartre 	if (LE_32(gpt->efi_gpt_HeaderSize) > dev->block_size) {
1775874Sachartre 		VD_EFI_DEBUG("Header size (%u bytes) larger than one block"
1785874Sachartre 		    "(%u bytes)\n", LE_32(gpt->efi_gpt_HeaderSize),
1795874Sachartre 		    dev->block_size);
1805874Sachartre 		return (EINVAL);
1815874Sachartre 	}
1822531Snarayan 
1835874Sachartre 	crc_stored = LE_32(gpt->efi_gpt_HeaderCRC32);
1845874Sachartre 	gpt->efi_gpt_HeaderCRC32 = LE_32(0);
1855874Sachartre 	crc_computed = vd_efi_crc32((unsigned char *)gpt,
1865874Sachartre 	    LE_32(gpt->efi_gpt_HeaderSize));
1875874Sachartre 	gpt->efi_gpt_HeaderCRC32 = LE_32(crc_stored);
1885874Sachartre 
1895874Sachartre 	if (crc_stored != crc_computed) {
1902531Snarayan 		VD_EFI_DEBUG("Bad EFI CRC: 0x%x != 0x%x\n",
1915874Sachartre 		    crc_stored, crc_computed);
192*8528SAlexandre.Chartre@Sun.COM 			return (EINVAL);
1932531Snarayan 	}
1942531Snarayan 
1952531Snarayan 	return (0);
1962531Snarayan }
1972531Snarayan 
1985874Sachartre /*
1995874Sachartre  * Allocate and read the EFI GPT and GPE from the disk backend. Note that the
2005874Sachartre  * on-disk GPT and GPE are stored in little endian format but this function
2015874Sachartre  * returns them using the endianness of the system so that any field in the
2025874Sachartre  * GPT/GPE structures can be directly accessible without any further conversion.
2035874Sachartre  * The caller is responsible for freeing the allocated structures by calling
2045874Sachartre  * vd_efi_free().
2055874Sachartre  */
2065874Sachartre int
vd_efi_alloc_and_read(vd_efi_dev_t * dev,efi_gpt_t ** efi_gpt,efi_gpe_t ** efi_gpe)2075874Sachartre vd_efi_alloc_and_read(vd_efi_dev_t *dev, efi_gpt_t **efi_gpt,
2085874Sachartre     efi_gpe_t **efi_gpe)
2092531Snarayan {
2105874Sachartre 	dk_efi_t		dk_efi;
2115874Sachartre 	efi_gpt_t		*gpt = NULL;
2125874Sachartre 	efi_gpe_t		*gpe = NULL;
213*8528SAlexandre.Chartre@Sun.COM 	efi_gpt_t		*data = NULL;
214*8528SAlexandre.Chartre@Sun.COM 	size_t			gpt_len, gpe_len, data_len;
2155874Sachartre 	int 			nparts, status;
2165874Sachartre 
2175874Sachartre 	ASSERT(dev->block_size >= sizeof (efi_gpt_t));
2185874Sachartre 	gpt_len = dev->block_size;
2195874Sachartre 	gpt = kmem_zalloc(gpt_len, KM_SLEEP);
2202531Snarayan 
2212531Snarayan 	/*
2225874Sachartre 	 * Read the EFI GPT.
2232531Snarayan 	 */
2245874Sachartre 	dk_efi.dki_lba = 1;
2255874Sachartre 	dk_efi.dki_data = gpt;
2265874Sachartre 	dk_efi.dki_length = gpt_len;
2275874Sachartre 
228*8528SAlexandre.Chartre@Sun.COM 	status = vd_efi_ioctl(dev, DKIOCGETEFI, &dk_efi);
229*8528SAlexandre.Chartre@Sun.COM 
230*8528SAlexandre.Chartre@Sun.COM 	if (status == EINVAL) {
231*8528SAlexandre.Chartre@Sun.COM 		/*
232*8528SAlexandre.Chartre@Sun.COM 		 * Because the DKIOCGETEFI ioctl was initially incorrectly
233*8528SAlexandre.Chartre@Sun.COM 		 * implemented for a ZFS volume, the ioctl can fail with
234*8528SAlexandre.Chartre@Sun.COM 		 * EINVAL if it is done on a ZFS volume managed by an old
235*8528SAlexandre.Chartre@Sun.COM 		 * version of Solaris. This can happen if a ZFS volume is
236*8528SAlexandre.Chartre@Sun.COM 		 * exported as a single-slice disk by a service domain
237*8528SAlexandre.Chartre@Sun.COM 		 * running Solaris older than Solaris 10 Update 6.
238*8528SAlexandre.Chartre@Sun.COM 		 *
239*8528SAlexandre.Chartre@Sun.COM 		 * So we retry the ioctl to read both the GPT and the GPE at
240*8528SAlexandre.Chartre@Sun.COM 		 * the same time accordingly to the old implementation.
241*8528SAlexandre.Chartre@Sun.COM 		 */
242*8528SAlexandre.Chartre@Sun.COM 		data_len = sizeof (efi_gpt_t) + sizeof (efi_gpe_t);
243*8528SAlexandre.Chartre@Sun.COM 		data = kmem_zalloc(data_len, KM_SLEEP);
244*8528SAlexandre.Chartre@Sun.COM 
245*8528SAlexandre.Chartre@Sun.COM 		dk_efi.dki_lba = 1;
246*8528SAlexandre.Chartre@Sun.COM 		dk_efi.dki_data = data;
247*8528SAlexandre.Chartre@Sun.COM 		dk_efi.dki_length = data_len;
248*8528SAlexandre.Chartre@Sun.COM 		status = vd_efi_ioctl(dev, DKIOCGETEFI, &dk_efi);
249*8528SAlexandre.Chartre@Sun.COM 
250*8528SAlexandre.Chartre@Sun.COM 		if (status == 0)
251*8528SAlexandre.Chartre@Sun.COM 			bcopy(data, gpt, sizeof (efi_gpt_t));
252*8528SAlexandre.Chartre@Sun.COM 	}
253*8528SAlexandre.Chartre@Sun.COM 
254*8528SAlexandre.Chartre@Sun.COM 	if (status != 0) {
2555874Sachartre 		VD_EFI_DEBUG("DKIOCGETEFI (GPT, LBA=1) error %d\n", status);
2565874Sachartre 		goto errdone;
2572531Snarayan 	}
2582531Snarayan 
2595874Sachartre 	if ((status = vd_efi_check_gpt(dev, gpt)) != 0) {
2605874Sachartre 		/*
2615874Sachartre 		 * No valid label here; try the alternate. The alternate GPT is
2625874Sachartre 		 * located in the last block of the disk.
2635874Sachartre 		 */
2645874Sachartre 		dk_efi.dki_lba = dev->disk_size - 1;
2655874Sachartre 		dk_efi.dki_data = gpt;
2665874Sachartre 		dk_efi.dki_length = gpt_len;
2675874Sachartre 
2685874Sachartre 		if ((status = vd_efi_ioctl(dev, DKIOCGETEFI, &dk_efi)) != 0) {
2695874Sachartre 			VD_EFI_DEBUG("DKIOCGETEFI (LBA=%lu) error %d\n",
2705874Sachartre 			    dev->disk_size - 1, status);
2715874Sachartre 			goto errdone;
2722531Snarayan 		}
2735874Sachartre 
2745874Sachartre 		if ((status = vd_efi_check_gpt(dev, gpt)) != 0)
2755874Sachartre 			goto errdone;
2765874Sachartre 
2775874Sachartre 		VD_EFI_DEBUG("efi_read: primary label corrupt; using backup\n");
2782531Snarayan 	}
2792531Snarayan 
2805874Sachartre 	/* swap GPT data after checking the GPT is valid */
2815874Sachartre 	vd_efi_swap_gpt(gpt);
2822531Snarayan 
2832531Snarayan 	/*
2845874Sachartre 	 * Read the EFI GPE.
2852531Snarayan 	 */
2865874Sachartre 	nparts = gpt->efi_gpt_NumberOfPartitionEntries;
2872531Snarayan 
2885874Sachartre 	if (nparts > NDKMAP + 1) {
2895874Sachartre 		VD_EFI_DEBUG("Too many EFI partitions (%u)", nparts);
2905874Sachartre 		status = EINVAL;
2915874Sachartre 		goto errdone;
2925874Sachartre 	}
2935874Sachartre 
2945874Sachartre 	if (nparts == 0) {
2955874Sachartre 		VD_EFI_DEBUG("No partition defined");
2965874Sachartre 		status = EINVAL;
2975874Sachartre 		goto errdone;
2982531Snarayan 	}
2992531Snarayan 
3005874Sachartre 	gpe_len = VD_EFI_GPE_LEN(dev, nparts);
3015874Sachartre 	gpe = kmem_zalloc(gpe_len, KM_SLEEP);
3022531Snarayan 
303*8528SAlexandre.Chartre@Sun.COM 	if (data != NULL) {
304*8528SAlexandre.Chartre@Sun.COM 		/*
305*8528SAlexandre.Chartre@Sun.COM 		 * The data variable is not NULL if we have used the old ioctl
306*8528SAlexandre.Chartre@Sun.COM 		 * implementation for a ZFS volume. In that case, we only expect
307*8528SAlexandre.Chartre@Sun.COM 		 * one partition and GPE data are already available in the data
308*8528SAlexandre.Chartre@Sun.COM 		 * buffer, right after GPT data.
309*8528SAlexandre.Chartre@Sun.COM 		 */
310*8528SAlexandre.Chartre@Sun.COM 		if (nparts != 1) {
311*8528SAlexandre.Chartre@Sun.COM 			VD_EFI_DEBUG("Unexpected number of partitions (%u)",
312*8528SAlexandre.Chartre@Sun.COM 			    nparts);
313*8528SAlexandre.Chartre@Sun.COM 			status = EINVAL;
314*8528SAlexandre.Chartre@Sun.COM 			goto errdone;
315*8528SAlexandre.Chartre@Sun.COM 		}
3162531Snarayan 
317*8528SAlexandre.Chartre@Sun.COM 		bcopy(data + 1, gpe, sizeof (efi_gpe_t));
318*8528SAlexandre.Chartre@Sun.COM 
319*8528SAlexandre.Chartre@Sun.COM 	} else {
320*8528SAlexandre.Chartre@Sun.COM 		dk_efi.dki_lba = gpt->efi_gpt_PartitionEntryLBA;
321*8528SAlexandre.Chartre@Sun.COM 		dk_efi.dki_data = (efi_gpt_t *)gpe;
322*8528SAlexandre.Chartre@Sun.COM 		dk_efi.dki_length = gpe_len;
323*8528SAlexandre.Chartre@Sun.COM 
324*8528SAlexandre.Chartre@Sun.COM 		if ((status = vd_efi_ioctl(dev, DKIOCGETEFI, &dk_efi)) != 0) {
325*8528SAlexandre.Chartre@Sun.COM 			VD_EFI_DEBUG("DKIOCGETEFI (GPE, LBA=%lu) error %d\n",
326*8528SAlexandre.Chartre@Sun.COM 			    gpt->efi_gpt_PartitionEntryLBA, status);
327*8528SAlexandre.Chartre@Sun.COM 			goto errdone;
328*8528SAlexandre.Chartre@Sun.COM 		}
3295874Sachartre 	}
3302531Snarayan 
3315874Sachartre 	vd_efi_swap_gpe(gpe, nparts);
3325874Sachartre 
3335874Sachartre 	*efi_gpt = gpt;
3345874Sachartre 	*efi_gpe = gpe;
3352531Snarayan 
3365874Sachartre errdone:
3375874Sachartre 
338*8528SAlexandre.Chartre@Sun.COM 	if (data != NULL)
339*8528SAlexandre.Chartre@Sun.COM 		kmem_free(data, data_len);
340*8528SAlexandre.Chartre@Sun.COM 
341*8528SAlexandre.Chartre@Sun.COM 	if (status != 0) {
342*8528SAlexandre.Chartre@Sun.COM 		if (gpe != NULL)
343*8528SAlexandre.Chartre@Sun.COM 			kmem_free(gpe, gpe_len);
344*8528SAlexandre.Chartre@Sun.COM 		if (gpt != NULL)
345*8528SAlexandre.Chartre@Sun.COM 			kmem_free(gpt, gpt_len);
346*8528SAlexandre.Chartre@Sun.COM 	}
3475874Sachartre 
3485874Sachartre 	return (status);
3492531Snarayan }
3502531Snarayan 
3512531Snarayan /*
3525874Sachartre  * Free the EFI GPE and GPT structures returned by vd_efi_alloc_and_read().
3532531Snarayan  */
3542531Snarayan void
vd_efi_free(vd_efi_dev_t * dev,efi_gpt_t * gpt,efi_gpe_t * gpe)3555874Sachartre vd_efi_free(vd_efi_dev_t *dev, efi_gpt_t *gpt, efi_gpe_t *gpe)
3562531Snarayan {
3575874Sachartre 	kmem_free(gpe, VD_EFI_GPE_LEN(dev,
3585874Sachartre 	    gpt->efi_gpt_NumberOfPartitionEntries));
3595874Sachartre 	kmem_free(gpt, dev->block_size);
3602531Snarayan }
361