10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
50Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
60Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
70Sstevel@tonic-gate  * with the License.
80Sstevel@tonic-gate  *
90Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
100Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
110Sstevel@tonic-gate  * See the License for the specific language governing permissions
120Sstevel@tonic-gate  * and limitations under the License.
130Sstevel@tonic-gate  *
140Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
150Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
160Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
170Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
180Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
190Sstevel@tonic-gate  *
200Sstevel@tonic-gate  * CDDL HEADER END
210Sstevel@tonic-gate  */
220Sstevel@tonic-gate /*
230Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate  * Use is subject to license terms.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
280Sstevel@tonic-gate 
290Sstevel@tonic-gate #include <sys/param.h>
300Sstevel@tonic-gate #include <sys/systm.h>
310Sstevel@tonic-gate #include <sys/conf.h>
320Sstevel@tonic-gate #include <sys/file.h>
330Sstevel@tonic-gate #include <sys/user.h>
340Sstevel@tonic-gate #include <sys/uio.h>
350Sstevel@tonic-gate #include <sys/t_lock.h>
360Sstevel@tonic-gate #include <sys/buf.h>
370Sstevel@tonic-gate #include <sys/dkio.h>
380Sstevel@tonic-gate #include <sys/vtoc.h>
390Sstevel@tonic-gate #include <sys/kmem.h>
400Sstevel@tonic-gate #include <vm/page.h>
410Sstevel@tonic-gate #include <sys/cmn_err.h>
420Sstevel@tonic-gate #include <sys/sysmacros.h>
430Sstevel@tonic-gate #include <sys/types.h>
440Sstevel@tonic-gate #include <sys/mkdev.h>
450Sstevel@tonic-gate #include <sys/stat.h>
460Sstevel@tonic-gate #include <sys/open.h>
470Sstevel@tonic-gate #include <sys/lvm/mdio.h>
480Sstevel@tonic-gate #include <sys/lvm/mdvar.h>
490Sstevel@tonic-gate #include <sys/lvm/md_stripe.h>
500Sstevel@tonic-gate #include <sys/lvm/md_convert.h>
510Sstevel@tonic-gate #include <sys/lvm/md_notify.h>
520Sstevel@tonic-gate #include <sys/modctl.h>
530Sstevel@tonic-gate #include <sys/ddi.h>
540Sstevel@tonic-gate #include <sys/sunddi.h>
550Sstevel@tonic-gate #include <sys/debug.h>
560Sstevel@tonic-gate #include <sys/sysevent/eventdefs.h>
570Sstevel@tonic-gate #include <sys/sysevent/svm.h>
580Sstevel@tonic-gate 
590Sstevel@tonic-gate md_ops_t		stripe_md_ops;
600Sstevel@tonic-gate #ifndef	lint
610Sstevel@tonic-gate static char		_depends_on[] = "drv/md";
620Sstevel@tonic-gate static md_ops_t		*md_interface_ops = &stripe_md_ops;
630Sstevel@tonic-gate #endif
640Sstevel@tonic-gate 
650Sstevel@tonic-gate extern unit_t		md_nunits;
660Sstevel@tonic-gate extern set_t		md_nsets;
670Sstevel@tonic-gate extern md_set_t		md_set[];
680Sstevel@tonic-gate 
690Sstevel@tonic-gate extern kmutex_t		md_mx;
700Sstevel@tonic-gate extern kcondvar_t	md_cv;
710Sstevel@tonic-gate 
720Sstevel@tonic-gate extern int		md_status;
730Sstevel@tonic-gate extern major_t		md_major;
740Sstevel@tonic-gate extern mdq_anchor_t	md_done_daemon;
750Sstevel@tonic-gate 
760Sstevel@tonic-gate static int		md_stripe_mcs_buf_off;
770Sstevel@tonic-gate static kmem_cache_t	*stripe_parent_cache = NULL;
780Sstevel@tonic-gate static kmem_cache_t	*stripe_child_cache = NULL;
790Sstevel@tonic-gate 
800Sstevel@tonic-gate /*ARGSUSED1*/
810Sstevel@tonic-gate static int
820Sstevel@tonic-gate stripe_parent_constructor(void *p, void *d1, int d2)
830Sstevel@tonic-gate {
840Sstevel@tonic-gate 	mutex_init(&((md_sps_t *)p)->ps_mx,
850Sstevel@tonic-gate 	    NULL, MUTEX_DEFAULT, NULL);
860Sstevel@tonic-gate 	return (0);
870Sstevel@tonic-gate }
880Sstevel@tonic-gate 
890Sstevel@tonic-gate static void
900Sstevel@tonic-gate stripe_parent_init(void *ps)
910Sstevel@tonic-gate {
920Sstevel@tonic-gate 	bzero(ps, offsetof(md_sps_t, ps_mx));
930Sstevel@tonic-gate }
940Sstevel@tonic-gate 
950Sstevel@tonic-gate /*ARGSUSED1*/
960Sstevel@tonic-gate static void
970Sstevel@tonic-gate stripe_parent_destructor(void *p, void *d)
980Sstevel@tonic-gate {
990Sstevel@tonic-gate 	mutex_destroy(&((md_sps_t *)p)->ps_mx);
1000Sstevel@tonic-gate }
1010Sstevel@tonic-gate 
1020Sstevel@tonic-gate /*ARGSUSED1*/
1030Sstevel@tonic-gate static int
1040Sstevel@tonic-gate stripe_child_constructor(void *p, void *d1, int d2)
1050Sstevel@tonic-gate {
1060Sstevel@tonic-gate 	bioinit(&((md_scs_t *)p)->cs_buf);
1070Sstevel@tonic-gate 	return (0);
1080Sstevel@tonic-gate }
1090Sstevel@tonic-gate 
1100Sstevel@tonic-gate static void
1110Sstevel@tonic-gate stripe_child_init(md_scs_t *cs)
1120Sstevel@tonic-gate {
1130Sstevel@tonic-gate 	cs->cs_mdunit = 0;
1140Sstevel@tonic-gate 	cs->cs_ps = NULL;
1150Sstevel@tonic-gate 	cs->cs_comp = NULL;
1160Sstevel@tonic-gate 	md_bioreset(&cs->cs_buf);
1170Sstevel@tonic-gate }
1180Sstevel@tonic-gate 
1190Sstevel@tonic-gate /*ARGSUSED1*/
1200Sstevel@tonic-gate static void
1210Sstevel@tonic-gate stripe_child_destructor(void *p, void *d)
1220Sstevel@tonic-gate {
1230Sstevel@tonic-gate 	biofini(&((md_scs_t *)p)->cs_buf);
1240Sstevel@tonic-gate }
1250Sstevel@tonic-gate 
1260Sstevel@tonic-gate /*ARGSUSED*/
1270Sstevel@tonic-gate static void
1280Sstevel@tonic-gate stripe_run_queue(void *d)
1290Sstevel@tonic-gate {
1300Sstevel@tonic-gate 	if (!(md_status & MD_GBL_DAEMONS_LIVE))
1310Sstevel@tonic-gate 		md_daemon(1, &md_done_daemon);
1320Sstevel@tonic-gate }
1330Sstevel@tonic-gate 
1340Sstevel@tonic-gate static void
1350Sstevel@tonic-gate stripe_close_all_devs(ms_unit_t *un, int md_cflags)
1360Sstevel@tonic-gate {
1370Sstevel@tonic-gate 	int		row;
1380Sstevel@tonic-gate 	int		i;
1390Sstevel@tonic-gate 	int		c;
1400Sstevel@tonic-gate 	struct ms_comp	*mdcomp;
1410Sstevel@tonic-gate 
1420Sstevel@tonic-gate 	mdcomp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]);
1430Sstevel@tonic-gate 	for (row = 0; row < un->un_nrows; row++) {
1440Sstevel@tonic-gate 		struct ms_row *mdr = &un->un_row[row];
1450Sstevel@tonic-gate 		for (i = 0, c = mdr->un_icomp; i < mdr->un_ncomp; i++) {
1460Sstevel@tonic-gate 			struct ms_comp	*mdc;
1470Sstevel@tonic-gate 			mdc = &mdcomp[c++];
1480Sstevel@tonic-gate 			if (md_cflags & MD_OFLG_PROBEDEV) {
1490Sstevel@tonic-gate 
1500Sstevel@tonic-gate 			/*
1510Sstevel@tonic-gate 			 * It is possible that the md_layered_open
1520Sstevel@tonic-gate 			 * failed because the stripe unit structure
1530Sstevel@tonic-gate 			 * contained a NODEV.  In such a case since
1540Sstevel@tonic-gate 			 * there is nothing to open, there is nothing
1550Sstevel@tonic-gate 			 * to close.
1560Sstevel@tonic-gate 			 */
1570Sstevel@tonic-gate 				if (mdc->un_dev == NODEV64)
1580Sstevel@tonic-gate 					continue;
1590Sstevel@tonic-gate 			}
1600Sstevel@tonic-gate 			if ((md_cflags & MD_OFLG_PROBEDEV) &&
1610Sstevel@tonic-gate 			    (mdc->un_mirror.ms_flags & MDM_S_PROBEOPEN)) {
1620Sstevel@tonic-gate 				md_layered_close(mdc->un_dev,
1630Sstevel@tonic-gate 				    md_cflags);
1640Sstevel@tonic-gate 				mdc->un_mirror.ms_flags &=
1650Sstevel@tonic-gate 						~MDM_S_PROBEOPEN;
1660Sstevel@tonic-gate 			} else if (mdc->un_mirror.ms_flags & MDM_S_ISOPEN) {
1670Sstevel@tonic-gate 				md_layered_close(mdc->un_dev, md_cflags);
1680Sstevel@tonic-gate 				mdc->un_mirror.ms_flags &= ~MDM_S_ISOPEN;
1690Sstevel@tonic-gate 			}
1700Sstevel@tonic-gate 		}
1710Sstevel@tonic-gate 	}
1720Sstevel@tonic-gate }
1730Sstevel@tonic-gate 
1740Sstevel@tonic-gate static int
1750Sstevel@tonic-gate stripe_open_all_devs(ms_unit_t *un, int md_oflags)
1760Sstevel@tonic-gate {
1770Sstevel@tonic-gate 	minor_t		mnum = MD_SID(un);
1780Sstevel@tonic-gate 	int		row;
1790Sstevel@tonic-gate 	int		i;
1800Sstevel@tonic-gate 	int		c;
1810Sstevel@tonic-gate 	struct ms_comp	*mdcomp;
1820Sstevel@tonic-gate 	int		err;
1830Sstevel@tonic-gate 	int		cont_on_errors = (md_oflags & MD_OFLG_CONT_ERRS);
1840Sstevel@tonic-gate 	int		probe_err_cnt = 0;
1850Sstevel@tonic-gate 	int		total_comp_cnt = 0;
1860Sstevel@tonic-gate 	set_t		setno = MD_MIN2SET(MD_SID(un));
1870Sstevel@tonic-gate 	side_t		side = mddb_getsidenum(setno);
1880Sstevel@tonic-gate 	mdkey_t		key;
1890Sstevel@tonic-gate 
1900Sstevel@tonic-gate 	mdcomp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]);
1910Sstevel@tonic-gate 
1920Sstevel@tonic-gate 	/*
1930Sstevel@tonic-gate 	 * For a probe call, if any component of a stripe or a concat
1940Sstevel@tonic-gate 	 * can be opened, it is considered to be a success. The total number
1950Sstevel@tonic-gate 	 * of components in a stripe are computed prior to starting a probe.
1960Sstevel@tonic-gate 	 * This number is then compared against the number of components
1970Sstevel@tonic-gate 	 * that could be be successfully opened. If none of the components
1980Sstevel@tonic-gate 	 * in a stripe can be opened, only then an ENXIO is returned for a
1990Sstevel@tonic-gate 	 * probe type open.
2000Sstevel@tonic-gate 	 */
2010Sstevel@tonic-gate 
2020Sstevel@tonic-gate 	for (row = 0; row < un->un_nrows; row++) {
2030Sstevel@tonic-gate 		struct ms_row *mdr = &un->un_row[row];
2040Sstevel@tonic-gate 
2050Sstevel@tonic-gate 		if (md_oflags & MD_OFLG_PROBEDEV)
2060Sstevel@tonic-gate 			total_comp_cnt += mdr->un_ncomp;
2070Sstevel@tonic-gate 
2080Sstevel@tonic-gate 		for (i = 0, c = mdr->un_icomp; i < mdr->un_ncomp; i++) {
2090Sstevel@tonic-gate 			struct ms_comp	*mdc;
2100Sstevel@tonic-gate 			md_dev64_t tmpdev;
2110Sstevel@tonic-gate 
2120Sstevel@tonic-gate 			mdc = &mdcomp[c++];
2130Sstevel@tonic-gate 			tmpdev = mdc->un_dev;
2140Sstevel@tonic-gate 			/*
2150Sstevel@tonic-gate 			 * Do the open by device id
2160Sstevel@tonic-gate 			 * Check if this comp is hotspared and
2170Sstevel@tonic-gate 			 * if it is then use the key for hotspare.
2180Sstevel@tonic-gate 			 * MN disksets don't use devids, so we better don't use
2190Sstevel@tonic-gate 			 * md_devid_found/md_resolve_bydevid there. Rather do,
2200Sstevel@tonic-gate 			 * what's done in stripe_build_incore()
2210Sstevel@tonic-gate 			 */
2220Sstevel@tonic-gate 			if (MD_MNSET_SETNO(setno)) {
2230Sstevel@tonic-gate 				if (mdc->un_mirror.ms_hs_id != 0) {
2240Sstevel@tonic-gate 					(void) md_hot_spare_ifc(HS_MKDEV, 0, 0,
2250Sstevel@tonic-gate 					    0, &mdc->un_mirror.ms_hs_id, NULL,
2260Sstevel@tonic-gate 					    &tmpdev, NULL);
2270Sstevel@tonic-gate 				}
2280Sstevel@tonic-gate 			} else {
2290Sstevel@tonic-gate 				key = mdc->un_mirror.ms_hs_id ?
2300Sstevel@tonic-gate 				    mdc->un_mirror.ms_hs_key : mdc->un_key;
2310Sstevel@tonic-gate 				if ((md_getmajor(tmpdev) != md_major) &&
2320Sstevel@tonic-gate 				    md_devid_found(setno, side, key) == 1) {
2330Sstevel@tonic-gate 					tmpdev = md_resolve_bydevid(mnum,
2340Sstevel@tonic-gate 					    tmpdev, key);
2350Sstevel@tonic-gate 				}
2360Sstevel@tonic-gate 			}
2370Sstevel@tonic-gate 
2380Sstevel@tonic-gate 			/*
2390Sstevel@tonic-gate 			 * For a submirror, we only want to open those devices
2400Sstevel@tonic-gate 			 * that are not errored. If the device is errored then
2410Sstevel@tonic-gate 			 * then there is no reason to open it and leaving it
2420Sstevel@tonic-gate 			 * closed allows the RCM/DR code to work so that the
2430Sstevel@tonic-gate 			 * errored device can be replaced.
2440Sstevel@tonic-gate 			 */
2450Sstevel@tonic-gate 			if ((md_oflags & MD_OFLG_PROBEDEV) ||
2460Sstevel@tonic-gate 			    ! (mdc->un_mirror.ms_state & CS_ERRED)) {
2470Sstevel@tonic-gate 
2480Sstevel@tonic-gate 				err = md_layered_open(mnum, &tmpdev, md_oflags);
2490Sstevel@tonic-gate 			} else {
2500Sstevel@tonic-gate 				err = ENXIO;
2510Sstevel@tonic-gate 			}
2520Sstevel@tonic-gate 
2530Sstevel@tonic-gate 			/*
2540Sstevel@tonic-gate 			 * Only set the un_dev if the tmpdev != NODEV64. If
2550Sstevel@tonic-gate 			 * it is NODEV64 then the md_layered_open() will have
2560Sstevel@tonic-gate 			 * failed in some manner.
2570Sstevel@tonic-gate 			 */
2580Sstevel@tonic-gate 			if (tmpdev != NODEV64)
2590Sstevel@tonic-gate 				mdc->un_dev = tmpdev;
2600Sstevel@tonic-gate 
2610Sstevel@tonic-gate 			if (err) {
2620Sstevel@tonic-gate 				if (!cont_on_errors) {
2630Sstevel@tonic-gate 					stripe_close_all_devs(un, md_oflags);
2640Sstevel@tonic-gate 					return (ENXIO);
2650Sstevel@tonic-gate 				}
2660Sstevel@tonic-gate 
2670Sstevel@tonic-gate 				if (md_oflags & MD_OFLG_PROBEDEV)
2680Sstevel@tonic-gate 					probe_err_cnt++;
2690Sstevel@tonic-gate 			} else {
2700Sstevel@tonic-gate 				if (md_oflags & MD_OFLG_PROBEDEV) {
2710Sstevel@tonic-gate 					mdc->un_mirror.ms_flags |=
2720Sstevel@tonic-gate 						MDM_S_PROBEOPEN;
2730Sstevel@tonic-gate 				} else
2740Sstevel@tonic-gate 					mdc->un_mirror.ms_flags |= MDM_S_ISOPEN;
2750Sstevel@tonic-gate 			}
2760Sstevel@tonic-gate 		}
2770Sstevel@tonic-gate 	}
2780Sstevel@tonic-gate 
2790Sstevel@tonic-gate 	/* If every component in a stripe could not be opened fail */
2800Sstevel@tonic-gate 	if ((md_oflags & MD_OFLG_PROBEDEV) &&
2810Sstevel@tonic-gate 	    (probe_err_cnt == total_comp_cnt))
2820Sstevel@tonic-gate 		return (ENXIO);
2830Sstevel@tonic-gate 	else
2840Sstevel@tonic-gate 		return (0);
2850Sstevel@tonic-gate }
2860Sstevel@tonic-gate 
2870Sstevel@tonic-gate int
2880Sstevel@tonic-gate stripe_build_incore(void *p, int snarfing)
2890Sstevel@tonic-gate {
2900Sstevel@tonic-gate 	ms_unit_t *un = (ms_unit_t *)p;
2910Sstevel@tonic-gate 	struct ms_comp	*mdcomp;
2920Sstevel@tonic-gate 	minor_t		mnum;
2930Sstevel@tonic-gate 	int		row;
2940Sstevel@tonic-gate 	int		i;
2950Sstevel@tonic-gate 	int		c;
2960Sstevel@tonic-gate 	int		ncomps;
2970Sstevel@tonic-gate 
2980Sstevel@tonic-gate 	mnum = MD_SID(un);
2990Sstevel@tonic-gate 
3000Sstevel@tonic-gate 	if (MD_UNIT(mnum) != NULL)
3010Sstevel@tonic-gate 		return (0);
3020Sstevel@tonic-gate 
3030Sstevel@tonic-gate 	MD_STATUS(un) = 0;
3040Sstevel@tonic-gate 
3050Sstevel@tonic-gate 	/*
3060Sstevel@tonic-gate 	 * Reset all the is_open flags, these are probably set
3070Sstevel@tonic-gate 	 * cause they just came out of the database.
3080Sstevel@tonic-gate 	 */
3090Sstevel@tonic-gate 	mdcomp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]);
3100Sstevel@tonic-gate 
3110Sstevel@tonic-gate 	ncomps = 0;
3120Sstevel@tonic-gate 	for (row = 0; row < un->un_nrows; row++) {
3130Sstevel@tonic-gate 		struct ms_row *mdr = &un->un_row[row];
3140Sstevel@tonic-gate 		ncomps += mdr->un_ncomp;
3150Sstevel@tonic-gate 	}
3160Sstevel@tonic-gate 
3170Sstevel@tonic-gate 	for (row = 0; row < un->un_nrows; row++) {
3180Sstevel@tonic-gate 		struct ms_row *mdr = &un->un_row[row];
3190Sstevel@tonic-gate 		for (i = 0, c = mdr->un_icomp; i < mdr->un_ncomp; i++) {
3200Sstevel@tonic-gate 			struct ms_comp		*mdc;
3210Sstevel@tonic-gate 			set_t			setno;
3220Sstevel@tonic-gate 			md_dev64_t		tmpdev;
3230Sstevel@tonic-gate 
3240Sstevel@tonic-gate 			mdc = &mdcomp[c++];
3250Sstevel@tonic-gate 			mdc->un_mirror.ms_flags &=
3260Sstevel@tonic-gate 			    ~(MDM_S_ISOPEN | MDM_S_IOERR | MDM_S_RS_TRIED);
3270Sstevel@tonic-gate 
3280Sstevel@tonic-gate 			if (!snarfing)
3290Sstevel@tonic-gate 				continue;
3300Sstevel@tonic-gate 
3310Sstevel@tonic-gate 			setno = MD_MIN2SET(mnum);
3320Sstevel@tonic-gate 
3330Sstevel@tonic-gate 			tmpdev = md_getdevnum(setno, mddb_getsidenum(setno),
3340Sstevel@tonic-gate 			    mdc->un_key, MD_NOTRUST_DEVT);
3350Sstevel@tonic-gate 			mdc->un_dev = tmpdev;
3360Sstevel@tonic-gate 			/*
3370Sstevel@tonic-gate 			 * Check for hotspares. If the hotspares haven't been
3380Sstevel@tonic-gate 			 * snarfed yet, stripe_open_all_devs() will do the
3390Sstevel@tonic-gate 			 * remapping of the dev's later.
3400Sstevel@tonic-gate 			 */
3410Sstevel@tonic-gate 			if (mdc->un_mirror.ms_hs_id != 0) {
3420Sstevel@tonic-gate 				mdc->un_mirror.ms_orig_dev = mdc->un_dev;
3430Sstevel@tonic-gate 				(void) md_hot_spare_ifc(HS_MKDEV, 0, 0,
3440Sstevel@tonic-gate 				    0, &mdc->un_mirror.ms_hs_id, NULL,
3450Sstevel@tonic-gate 				    &tmpdev, NULL);
3460Sstevel@tonic-gate 				mdc->un_dev = tmpdev;
3470Sstevel@tonic-gate 			}
3480Sstevel@tonic-gate 		}
3490Sstevel@tonic-gate 	}
3500Sstevel@tonic-gate 
3510Sstevel@tonic-gate 	MD_UNIT(mnum) = un;
3520Sstevel@tonic-gate 	return (0);
3530Sstevel@tonic-gate }
3540Sstevel@tonic-gate 
3550Sstevel@tonic-gate void
3560Sstevel@tonic-gate reset_stripe(ms_unit_t *un, minor_t mnum, int removing)
3570Sstevel@tonic-gate {
3580Sstevel@tonic-gate 	ms_comp_t	*mdcomp;
3590Sstevel@tonic-gate 	struct ms_row	*mdr;
3600Sstevel@tonic-gate 	int		i, c;
3610Sstevel@tonic-gate 	int		row;
3620Sstevel@tonic-gate 	int		nsv;
3630Sstevel@tonic-gate 	int		isv;
3640Sstevel@tonic-gate 	sv_dev_t	*sv;
3650Sstevel@tonic-gate 	mddb_recid_t	*recids;
3660Sstevel@tonic-gate 	mddb_recid_t	vtoc_id;
3670Sstevel@tonic-gate 	int		rid = 0;
3680Sstevel@tonic-gate 
3690Sstevel@tonic-gate 	md_destroy_unit_incore(mnum, &stripe_md_ops);
3700Sstevel@tonic-gate 
3710Sstevel@tonic-gate 	MD_UNIT(mnum) = NULL;
3720Sstevel@tonic-gate 
3730Sstevel@tonic-gate 	if (!removing)
3740Sstevel@tonic-gate 		return;
3750Sstevel@tonic-gate 
3760Sstevel@tonic-gate 	nsv = 0;
3770Sstevel@tonic-gate 	/* Count the number of devices */
3780Sstevel@tonic-gate 	for (row = 0; row < un->un_nrows; row++) {
3790Sstevel@tonic-gate 		mdr = &un->un_row[row];
3800Sstevel@tonic-gate 		nsv += mdr->un_ncomp;
3810Sstevel@tonic-gate 	}
3820Sstevel@tonic-gate 	sv = (sv_dev_t *)kmem_alloc(sizeof (sv_dev_t) * nsv, KM_SLEEP);
3830Sstevel@tonic-gate 
3840Sstevel@tonic-gate 	/*
3850Sstevel@tonic-gate 	 * allocate recids array.  since we may have to commit
3860Sstevel@tonic-gate 	 * underlying soft partition records, we need an array
3870Sstevel@tonic-gate 	 * of size: total number of components in stripe + 3
3880Sstevel@tonic-gate 	 * (one for the stripe itself, one for the hotspare, one
3890Sstevel@tonic-gate 	 * for the end marker).
3900Sstevel@tonic-gate 	 */
3910Sstevel@tonic-gate 	recids = kmem_alloc(sizeof (mddb_recid_t) * (nsv + 3), KM_SLEEP);
3920Sstevel@tonic-gate 
3930Sstevel@tonic-gate 	/*
3940Sstevel@tonic-gate 	 * Save the md_dev64_t's and driver nm indexes.
3950Sstevel@tonic-gate 	 * Because after the mddb_deleterec() we will
3960Sstevel@tonic-gate 	 * not be able to access the unit structure.
3970Sstevel@tonic-gate 	 *
3980Sstevel@tonic-gate 	 * NOTE: Deleting the names before deleting the
3990Sstevel@tonic-gate 	 *	 unit structure would cause problems if
4000Sstevel@tonic-gate 	 *	 the machine crashed in between the two.
4010Sstevel@tonic-gate 	 */
4020Sstevel@tonic-gate 	isv = 0;
4030Sstevel@tonic-gate 	mdcomp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]);
4040Sstevel@tonic-gate 
4050Sstevel@tonic-gate 	for (row = 0; row < un->un_nrows; row++) {
4060Sstevel@tonic-gate 		mdr = &un->un_row[row];
4070Sstevel@tonic-gate 		for (i = 0, c = mdr->un_icomp; i < mdr->un_ncomp; i++) {
4080Sstevel@tonic-gate 			struct ms_comp	*mdc;
4090Sstevel@tonic-gate 			md_dev64_t	child_dev;
4100Sstevel@tonic-gate 			md_unit_t	*child_un;
4110Sstevel@tonic-gate 
4120Sstevel@tonic-gate 			mdc = &mdcomp[c++];
4130Sstevel@tonic-gate 			if (mdc->un_mirror.ms_hs_id != 0) {
4140Sstevel@tonic-gate 				mdkey_t		hs_key;
4150Sstevel@tonic-gate 
4160Sstevel@tonic-gate 				hs_key = mdc->un_mirror.ms_hs_key;
4170Sstevel@tonic-gate 
4180Sstevel@tonic-gate 				mdc->un_dev = mdc->un_mirror.ms_orig_dev;
4190Sstevel@tonic-gate 				mdc->un_start_block =
4200Sstevel@tonic-gate 				    mdc->un_mirror.ms_orig_blk;
4210Sstevel@tonic-gate 				mdc->un_mirror.ms_hs_id = 0;
4220Sstevel@tonic-gate 				mdc->un_mirror.ms_hs_key = 0;
4230Sstevel@tonic-gate 				mdc->un_mirror.ms_orig_dev = 0;
4240Sstevel@tonic-gate 				recids[0] = 0;
4250Sstevel@tonic-gate 				recids[1] = 0;	/* recids[1] filled in below */
4260Sstevel@tonic-gate 				recids[2] = 0;
4270Sstevel@tonic-gate 				(void) md_hot_spare_ifc(HS_FREE, un->un_hsp_id,
4280Sstevel@tonic-gate 				    0, 0, &recids[0], &hs_key, NULL, NULL);
4290Sstevel@tonic-gate 				mddb_commitrecs_wrapper(recids);
4300Sstevel@tonic-gate 			}
4310Sstevel@tonic-gate 
4320Sstevel@tonic-gate 			/*
4330Sstevel@tonic-gate 			 * check if we've got metadevice below us and
4340Sstevel@tonic-gate 			 * deparent it if we do.
4350Sstevel@tonic-gate 			 * NOTE: currently soft partitions are the
4360Sstevel@tonic-gate 			 * the only metadevices stripes can be
4370Sstevel@tonic-gate 			 * built on top of.
4380Sstevel@tonic-gate 			 */
4390Sstevel@tonic-gate 			child_dev = mdc->un_dev;
4400Sstevel@tonic-gate 			if (md_getmajor(child_dev) == md_major) {
4410Sstevel@tonic-gate 				child_un = MD_UNIT(md_getminor(child_dev));
4420Sstevel@tonic-gate 				md_reset_parent(child_dev);
4430Sstevel@tonic-gate 				recids[rid++] = MD_RECID(child_un);
4440Sstevel@tonic-gate 			}
4450Sstevel@tonic-gate 
4460Sstevel@tonic-gate 			sv[isv].setno = MD_MIN2SET(mnum);
4470Sstevel@tonic-gate 			sv[isv++].key = mdc->un_key;
4480Sstevel@tonic-gate 		}
4490Sstevel@tonic-gate 	}
4500Sstevel@tonic-gate 
4510Sstevel@tonic-gate 	recids[rid++] = un->c.un_record_id;
4520Sstevel@tonic-gate 	recids[rid] = 0;	/* filled in below */
4530Sstevel@tonic-gate 
4540Sstevel@tonic-gate 	/*
4550Sstevel@tonic-gate 	 * Decrement the HSP reference count and
4560Sstevel@tonic-gate 	 * remove the knowledge of the HSP from the unit struct.
4570Sstevel@tonic-gate 	 * This is done atomically to remove a window.
4580Sstevel@tonic-gate 	 */
4590Sstevel@tonic-gate 	if (un->un_hsp_id != -1) {
4600Sstevel@tonic-gate 		(void) md_hot_spare_ifc(HSP_DECREF, un->un_hsp_id, 0, 0,
4610Sstevel@tonic-gate 		    &recids[rid++], NULL, NULL, NULL);
4620Sstevel@tonic-gate 		un->un_hsp_id = -1;
4630Sstevel@tonic-gate 	}
4640Sstevel@tonic-gate 
4650Sstevel@tonic-gate 	/* set end marker and commit records */
4660Sstevel@tonic-gate 	recids[rid] = 0;
4670Sstevel@tonic-gate 	mddb_commitrecs_wrapper(recids);
4680Sstevel@tonic-gate 
4690Sstevel@tonic-gate 	vtoc_id = un->c.un_vtoc_id;
4700Sstevel@tonic-gate 
4710Sstevel@tonic-gate 	/* Remove the unit structure */
4720Sstevel@tonic-gate 	mddb_deleterec_wrapper(un->c.un_record_id);
4730Sstevel@tonic-gate 
4740Sstevel@tonic-gate 	/* Remove the vtoc, if present */
4750Sstevel@tonic-gate 	if (vtoc_id)
4760Sstevel@tonic-gate 		mddb_deleterec_wrapper(vtoc_id);
4770Sstevel@tonic-gate 
4780Sstevel@tonic-gate 	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DELETE, SVM_TAG_METADEVICE,
4790Sstevel@tonic-gate 	    MD_MIN2SET(mnum), MD_MIN2UNIT(mnum));
4800Sstevel@tonic-gate 	md_rem_names(sv, nsv);
4810Sstevel@tonic-gate 	kmem_free(sv, sizeof (sv_dev_t) * nsv);
4820Sstevel@tonic-gate 	kmem_free(recids, sizeof (mddb_recid_t) * (nsv + 3));
4830Sstevel@tonic-gate }
4840Sstevel@tonic-gate 
4850Sstevel@tonic-gate static void
4860Sstevel@tonic-gate stripe_error(md_sps_t *ps)
4870Sstevel@tonic-gate {
4880Sstevel@tonic-gate 	struct buf	*pb = ps->ps_bp;
4890Sstevel@tonic-gate 	mdi_unit_t	*ui = ps->ps_ui;
4900Sstevel@tonic-gate 	md_dev64_t	dev = ps->ps_errcomp->un_dev;
4910Sstevel@tonic-gate 	md_dev64_t	md_dev = md_expldev(pb->b_edev);
4920Sstevel@tonic-gate 	char		*str;
4930Sstevel@tonic-gate 
4940Sstevel@tonic-gate 	if (pb->b_flags & B_READ) {
4950Sstevel@tonic-gate 		ps->ps_errcomp->un_mirror.ms_flags |= MDM_S_READERR;
4960Sstevel@tonic-gate 		str = "read";
4970Sstevel@tonic-gate 	} else {
4980Sstevel@tonic-gate 		ps->ps_errcomp->un_mirror.ms_flags |= MDM_S_WRTERR;
4990Sstevel@tonic-gate 		str = "write";
5000Sstevel@tonic-gate 	}
5010Sstevel@tonic-gate 	if (!(ps->ps_flags & MD_SPS_DONTFREE)) {
5020Sstevel@tonic-gate 		if (MUTEX_HELD(&ps->ps_mx)) {
5030Sstevel@tonic-gate 			mutex_exit(&ps->ps_mx);
5040Sstevel@tonic-gate 		}
5050Sstevel@tonic-gate 	} else {
5060Sstevel@tonic-gate 		ASSERT(panicstr);
5070Sstevel@tonic-gate 	}
5080Sstevel@tonic-gate 	SPS_FREE(stripe_parent_cache, ps);
5090Sstevel@tonic-gate 	pb->b_flags |= B_ERROR;
5100Sstevel@tonic-gate 
5110Sstevel@tonic-gate 	md_kstat_done(ui, pb, 0);
5120Sstevel@tonic-gate 	md_unit_readerexit(ui);
5130Sstevel@tonic-gate 	md_biodone(pb);
5140Sstevel@tonic-gate 
5150Sstevel@tonic-gate 	cmn_err(CE_WARN, "md: %s: %s error on %s",
5160Sstevel@tonic-gate 	    md_shortname(md_getminor(md_dev)), str,
5170Sstevel@tonic-gate 	    md_devname(MD_DEV2SET(md_dev), dev, NULL, 0));
5180Sstevel@tonic-gate }
5190Sstevel@tonic-gate 
5200Sstevel@tonic-gate static int
5210Sstevel@tonic-gate stripe_done(struct buf *cb)
5220Sstevel@tonic-gate {
5230Sstevel@tonic-gate 	struct buf	*pb;
5240Sstevel@tonic-gate 	mdi_unit_t	*ui;
5250Sstevel@tonic-gate 	md_sps_t	*ps;
5260Sstevel@tonic-gate 	md_scs_t	*cs;
5270Sstevel@tonic-gate 
5280Sstevel@tonic-gate 	/*LINTED*/
5290Sstevel@tonic-gate 	cs = (md_scs_t *)((caddr_t)cb - md_stripe_mcs_buf_off);
5300Sstevel@tonic-gate 	ps = cs->cs_ps;
5310Sstevel@tonic-gate 	pb = ps->ps_bp;
5320Sstevel@tonic-gate 
5330Sstevel@tonic-gate 	mutex_enter(&ps->ps_mx);
5340Sstevel@tonic-gate 	if (cb->b_flags & B_ERROR) {
5350Sstevel@tonic-gate 		ps->ps_flags |= MD_SPS_ERROR;
5360Sstevel@tonic-gate 		pb->b_error = cb->b_error;
5370Sstevel@tonic-gate 		ps->ps_errcomp = cs->cs_comp;
5380Sstevel@tonic-gate 	}
5390Sstevel@tonic-gate 
5400Sstevel@tonic-gate 	if (cb->b_flags & B_REMAPPED)
5410Sstevel@tonic-gate 		bp_mapout(cb);
5420Sstevel@tonic-gate 
5430Sstevel@tonic-gate 	ps->ps_frags--;
5440Sstevel@tonic-gate 	if (ps->ps_frags != 0) {
5450Sstevel@tonic-gate 		mutex_exit(&ps->ps_mx);
5460Sstevel@tonic-gate 		kmem_cache_free(stripe_child_cache, cs);
5470Sstevel@tonic-gate 		return (1);
5480Sstevel@tonic-gate 	}
5490Sstevel@tonic-gate 	kmem_cache_free(stripe_child_cache, cs);
5500Sstevel@tonic-gate 	if (ps->ps_flags & MD_SPS_ERROR) {
5510Sstevel@tonic-gate 		stripe_error(ps);
5520Sstevel@tonic-gate 		return (1);
5530Sstevel@tonic-gate 	}
5540Sstevel@tonic-gate 	ui = ps->ps_ui;
5550Sstevel@tonic-gate 	if (!(ps->ps_flags & MD_SPS_DONTFREE)) {
5560Sstevel@tonic-gate 		mutex_exit(&ps->ps_mx);
5570Sstevel@tonic-gate 	} else {
5580Sstevel@tonic-gate 		ASSERT(panicstr);
5590Sstevel@tonic-gate 	}
5600Sstevel@tonic-gate 	SPS_FREE(stripe_parent_cache, ps);
5610Sstevel@tonic-gate 	md_kstat_done(ui, pb, 0);
5620Sstevel@tonic-gate 	md_unit_readerexit(ui);
5630Sstevel@tonic-gate 	md_biodone(pb);
5640Sstevel@tonic-gate 	return (0);
5650Sstevel@tonic-gate }
5660Sstevel@tonic-gate 
5670Sstevel@tonic-gate 
5680Sstevel@tonic-gate /*
5690Sstevel@tonic-gate  * This routine does the mapping from virtual (dev, blkno) of a metapartition
5700Sstevel@tonic-gate  * to the real (dev, blkno) of a real disk partition.
5710Sstevel@tonic-gate  * It goes to the md_conf[] table to find out the correct real partition
5720Sstevel@tonic-gate  * dev and block number for this buffer.
5730Sstevel@tonic-gate  *
5740Sstevel@tonic-gate  * A single buf request can not go across real disk partition boundary.
5750Sstevel@tonic-gate  * When the virtual request specified by (dev, blkno) spans more than one
5760Sstevel@tonic-gate  * real partition, md_mapbuf will return 1. Then the caller should prepare
5770Sstevel@tonic-gate  * another real buf and continue calling md_mapbuf to do the mapping until
5780Sstevel@tonic-gate  * it returns 0.
5790Sstevel@tonic-gate  *
5800Sstevel@tonic-gate  */
5810Sstevel@tonic-gate 
5820Sstevel@tonic-gate static int
5830Sstevel@tonic-gate md_mapbuf(
5840Sstevel@tonic-gate 	ms_unit_t	*un,
5850Sstevel@tonic-gate 	diskaddr_t	blkno,
5860Sstevel@tonic-gate 	u_longlong_t	bcount,
5870Sstevel@tonic-gate 	buf_t		*bp,	/* if bp==NULL, skip bp updates */
5880Sstevel@tonic-gate 	ms_comp_t	**mdc)	/* if bp==NULL, skip mdc update */
5890Sstevel@tonic-gate {
5900Sstevel@tonic-gate 	struct ms_row	*mdr;
5910Sstevel@tonic-gate 	struct ms_comp	*mdcomp;
5920Sstevel@tonic-gate 	diskaddr_t	stripe_blk;
5930Sstevel@tonic-gate 	diskaddr_t	fragment, blk_in_row, endblk;
5940Sstevel@tonic-gate 	offset_t	interlace;
5950Sstevel@tonic-gate 	size_t		dev_index;
5960Sstevel@tonic-gate 	int		row_index, more;
5970Sstevel@tonic-gate 	extern unsigned md_maxphys;
5980Sstevel@tonic-gate 	/* Work var's when bp==NULL */
5990Sstevel@tonic-gate 	u_longlong_t	wb_bcount;
6000Sstevel@tonic-gate 	diskaddr_t	wb_blkno;
6010Sstevel@tonic-gate 	md_dev64_t	wb_edev;
6020Sstevel@tonic-gate 	ms_comp_t	*wmdc;
6030Sstevel@tonic-gate 
6040Sstevel@tonic-gate 	/*
6050Sstevel@tonic-gate 	 * Do a real calculation to derive the minor device of the
6060Sstevel@tonic-gate 	 * Virtual Disk, which in turn will let us derive the
6070Sstevel@tonic-gate 	 * device/minor of the underlying real device.
6080Sstevel@tonic-gate 	 */
6090Sstevel@tonic-gate 
6100Sstevel@tonic-gate 
6110Sstevel@tonic-gate 	for (row_index = 0; row_index < un->un_nrows; row_index++) {
6120Sstevel@tonic-gate 		mdr = &un->un_row[row_index];
6130Sstevel@tonic-gate 		if (blkno < mdr->un_cum_blocks)
6140Sstevel@tonic-gate 			break;
6150Sstevel@tonic-gate 	}
6160Sstevel@tonic-gate 	ASSERT(row_index != un->un_nrows);
6170Sstevel@tonic-gate 
6180Sstevel@tonic-gate 	mdcomp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]);
6190Sstevel@tonic-gate 
6200Sstevel@tonic-gate 	blk_in_row = blkno - mdr->un_cum_blocks + mdr->un_blocks;
6210Sstevel@tonic-gate 	endblk = (diskaddr_t)(blkno + howmany(bcount, DEV_BSIZE));
6220Sstevel@tonic-gate 	if (mdr->un_ncomp == 1) { /* No striping */
6230Sstevel@tonic-gate 		if (endblk > mdr->un_cum_blocks) {
6240Sstevel@tonic-gate 			wb_bcount = ldbtob(mdr->un_cum_blocks - blkno);
6250Sstevel@tonic-gate 			if ((row_index + 1) == un->un_nrows)
6260Sstevel@tonic-gate 				more = 0;
6270Sstevel@tonic-gate 			else
6280Sstevel@tonic-gate 				more = 1;
6290Sstevel@tonic-gate 		} else {
6300Sstevel@tonic-gate 			wb_bcount = bcount;
6310Sstevel@tonic-gate 			more = 0;
6320Sstevel@tonic-gate 		}
6330Sstevel@tonic-gate 		wmdc = &mdcomp[mdr->un_icomp];
6340Sstevel@tonic-gate 		wb_blkno = blk_in_row;
6350Sstevel@tonic-gate 	} else { /* Have striping */
6360Sstevel@tonic-gate 		interlace = mdr->un_interlace;
6370Sstevel@tonic-gate 		fragment = blk_in_row % interlace;
6380Sstevel@tonic-gate 		if (bcount > ldbtob(interlace - fragment)) {
6390Sstevel@tonic-gate 			more = 1;
6400Sstevel@tonic-gate 			wb_bcount = ldbtob(interlace - fragment);
6410Sstevel@tonic-gate 		} else {
6420Sstevel@tonic-gate 			more = 0;
6430Sstevel@tonic-gate 			wb_bcount = bcount;
6440Sstevel@tonic-gate 		}
6450Sstevel@tonic-gate 
6460Sstevel@tonic-gate 		stripe_blk = blk_in_row / interlace;
6470Sstevel@tonic-gate 		dev_index = (size_t)(stripe_blk % mdr->un_ncomp);
6480Sstevel@tonic-gate 		wmdc = &mdcomp[mdr->un_icomp + dev_index];
6490Sstevel@tonic-gate 		wb_blkno = (diskaddr_t)(((stripe_blk / mdr->un_ncomp)
6500Sstevel@tonic-gate 			* interlace) + fragment);
6510Sstevel@tonic-gate 	}
6520Sstevel@tonic-gate 
6530Sstevel@tonic-gate 	wb_blkno += wmdc->un_start_block;
6540Sstevel@tonic-gate 	wb_edev = wmdc->un_dev;
6550Sstevel@tonic-gate 
6560Sstevel@tonic-gate 	/* only break up the I/O if we're not built on another metadevice */
6570Sstevel@tonic-gate 	if ((md_getmajor(wb_edev) != md_major) && (wb_bcount > md_maxphys)) {
6580Sstevel@tonic-gate 		wb_bcount = md_maxphys;
6590Sstevel@tonic-gate 		more = 1;
6600Sstevel@tonic-gate 	}
6610Sstevel@tonic-gate 	if (bp != (buf_t *)NULL) {
6620Sstevel@tonic-gate 		/*
6630Sstevel@tonic-gate 		 * wb_bcount is limited by md_maxphys which is 'int'
6640Sstevel@tonic-gate 		 */
6650Sstevel@tonic-gate 		bp->b_bcount = (size_t)wb_bcount;
6660Sstevel@tonic-gate 		bp->b_lblkno = wb_blkno;
6670Sstevel@tonic-gate 		bp->b_edev = md_dev64_to_dev(wb_edev);
6680Sstevel@tonic-gate 		*mdc = wmdc;
6690Sstevel@tonic-gate 	}
6700Sstevel@tonic-gate 	return (more);
6710Sstevel@tonic-gate }
6720Sstevel@tonic-gate 
6730Sstevel@tonic-gate static void
6740Sstevel@tonic-gate md_stripe_strategy(buf_t *pb, int flag, void *private)
6750Sstevel@tonic-gate {
6760Sstevel@tonic-gate 	md_sps_t	*ps;
6770Sstevel@tonic-gate 	md_scs_t	*cs;
6780Sstevel@tonic-gate 	int		doing_writes;
6790Sstevel@tonic-gate 	int		more;
6800Sstevel@tonic-gate 	ms_unit_t	*un;
6810Sstevel@tonic-gate 	mdi_unit_t	*ui;
6820Sstevel@tonic-gate 	size_t		current_count;
6830Sstevel@tonic-gate 	diskaddr_t	current_blkno;
6840Sstevel@tonic-gate 	off_t		current_offset;
6850Sstevel@tonic-gate 	buf_t		*cb;		/* child buf pointer */
6860Sstevel@tonic-gate 	set_t		setno;
6870Sstevel@tonic-gate 
6880Sstevel@tonic-gate 	setno = MD_MIN2SET(getminor(pb->b_edev));
6890Sstevel@tonic-gate 
6900Sstevel@tonic-gate 	/*
6910Sstevel@tonic-gate 	 * When doing IO to a multi owner meta device, check if set is halted.
6920Sstevel@tonic-gate 	 * We do this check without the needed lock held, for performance
6930Sstevel@tonic-gate 	 * reasons.
6940Sstevel@tonic-gate 	 * If an IO just slips through while the set is locked via an
6950Sstevel@tonic-gate 	 * MD_MN_SUSPEND_SET, we don't care about it.
6960Sstevel@tonic-gate 	 * Only check for a suspended set if we are a top-level i/o request
6970Sstevel@tonic-gate 	 * (MD_STR_NOTTOP is cleared in 'flag').
6980Sstevel@tonic-gate 	 */
6990Sstevel@tonic-gate 	if ((md_set[setno].s_status & (MD_SET_HALTED | MD_SET_MNSET)) ==
7000Sstevel@tonic-gate 	    (MD_SET_HALTED | MD_SET_MNSET)) {
7010Sstevel@tonic-gate 		if ((flag & MD_STR_NOTTOP) == 0) {
7020Sstevel@tonic-gate 			mutex_enter(&md_mx);
7030Sstevel@tonic-gate 			/* Here we loop until the set is no longer halted */
7040Sstevel@tonic-gate 			while (md_set[setno].s_status & MD_SET_HALTED) {
7050Sstevel@tonic-gate 				cv_wait(&md_cv, &md_mx);
7060Sstevel@tonic-gate 			}
7070Sstevel@tonic-gate 			mutex_exit(&md_mx);
7080Sstevel@tonic-gate 		}
7090Sstevel@tonic-gate 	}
7100Sstevel@tonic-gate 
7110Sstevel@tonic-gate 	ui = MDI_UNIT(getminor(pb->b_edev));
7120Sstevel@tonic-gate 
7130Sstevel@tonic-gate 	md_kstat_waitq_enter(ui);
7140Sstevel@tonic-gate 
7150Sstevel@tonic-gate 	un = (ms_unit_t *)md_unit_readerlock(ui);
7160Sstevel@tonic-gate 
7170Sstevel@tonic-gate 	if ((flag & MD_NOBLOCK) == 0) {
7180Sstevel@tonic-gate 		if (md_inc_iocount(setno) != 0) {
7190Sstevel@tonic-gate 			pb->b_flags |= B_ERROR;
7200Sstevel@tonic-gate 			pb->b_error = ENXIO;
7210Sstevel@tonic-gate 			pb->b_resid = pb->b_bcount;
7220Sstevel@tonic-gate 			md_unit_readerexit(ui);
7230Sstevel@tonic-gate 			biodone(pb);
7240Sstevel@tonic-gate 			return;
7250Sstevel@tonic-gate 		}
7260Sstevel@tonic-gate 	} else {
7270Sstevel@tonic-gate 		md_inc_iocount_noblock(setno);
7280Sstevel@tonic-gate 	}
7290Sstevel@tonic-gate 
7300Sstevel@tonic-gate 	if (!(flag & MD_STR_NOTTOP)) {
7310Sstevel@tonic-gate 		if (md_checkbuf(ui, (md_unit_t *)un, pb) != 0) {
7320Sstevel@tonic-gate 			md_kstat_waitq_exit(ui);
7330Sstevel@tonic-gate 			return;
7340Sstevel@tonic-gate 		}
7350Sstevel@tonic-gate 	}
7360Sstevel@tonic-gate 
7370Sstevel@tonic-gate 	ps = kmem_cache_alloc(stripe_parent_cache, MD_ALLOCFLAGS);
7380Sstevel@tonic-gate 	stripe_parent_init(ps);
7390Sstevel@tonic-gate 
7400Sstevel@tonic-gate 	/*
7410Sstevel@tonic-gate 	 * Save essential information from the original buffhdr
7420Sstevel@tonic-gate 	 * in the md_save structure.
7430Sstevel@tonic-gate 	 */
7440Sstevel@tonic-gate 	ps->ps_un = un;
7450Sstevel@tonic-gate 	ps->ps_ui = ui;
7460Sstevel@tonic-gate 	ps->ps_bp = pb;
7470Sstevel@tonic-gate 	ps->ps_addr = pb->b_un.b_addr;
7480Sstevel@tonic-gate 
7490Sstevel@tonic-gate 	if ((pb->b_flags & B_READ) == 0)
7500Sstevel@tonic-gate 		doing_writes = 1;
7510Sstevel@tonic-gate 	else
7520Sstevel@tonic-gate 		doing_writes = 0;
7530Sstevel@tonic-gate 
7540Sstevel@tonic-gate 
7550Sstevel@tonic-gate 	current_count = pb->b_bcount;
7560Sstevel@tonic-gate 	current_blkno = pb->b_lblkno;
7570Sstevel@tonic-gate 	current_offset  = 0;
7580Sstevel@tonic-gate 
7590Sstevel@tonic-gate 	if (!(flag & MD_STR_NOTTOP) && panicstr)
7600Sstevel@tonic-gate 		ps->ps_flags |= MD_SPS_DONTFREE;
7610Sstevel@tonic-gate 
7620Sstevel@tonic-gate 	md_kstat_waitq_to_runq(ui);
7630Sstevel@tonic-gate 
7640Sstevel@tonic-gate 	ps->ps_frags++;
7650Sstevel@tonic-gate 	do {
7660Sstevel@tonic-gate 		cs = kmem_cache_alloc(stripe_child_cache, MD_ALLOCFLAGS);
7670Sstevel@tonic-gate 		stripe_child_init(cs);
7680Sstevel@tonic-gate 		cb = &cs->cs_buf;
7690Sstevel@tonic-gate 		cs->cs_ps = ps;
7700Sstevel@tonic-gate 		more = md_mapbuf(un, current_blkno, current_count, cb,
7710Sstevel@tonic-gate 			&cs->cs_comp);
7720Sstevel@tonic-gate 
7730Sstevel@tonic-gate 		cb = md_bioclone(pb, current_offset, cb->b_bcount, cb->b_edev,
7740Sstevel@tonic-gate 				cb->b_lblkno, stripe_done, cb, KM_NOSLEEP);
7750Sstevel@tonic-gate 		/*
7760Sstevel@tonic-gate 		 * Do these calculations now,
7770Sstevel@tonic-gate 		 *  so that we pickup a valid b_bcount from the chld_bp.
7780Sstevel@tonic-gate 		 */
7790Sstevel@tonic-gate 		current_offset += cb->b_bcount;
7800Sstevel@tonic-gate 		current_count -=  cb->b_bcount;
7810Sstevel@tonic-gate 		current_blkno +=  (diskaddr_t)(lbtodb(cb->b_bcount));
7820Sstevel@tonic-gate 
7830Sstevel@tonic-gate 		if (more) {
7840Sstevel@tonic-gate 			mutex_enter(&ps->ps_mx);
7850Sstevel@tonic-gate 			ps->ps_frags++;
7860Sstevel@tonic-gate 			mutex_exit(&ps->ps_mx);
7870Sstevel@tonic-gate 		}
7880Sstevel@tonic-gate 
7890Sstevel@tonic-gate 		if (doing_writes &&
7900Sstevel@tonic-gate 		    cs->cs_comp->un_mirror.ms_flags & MDM_S_NOWRITE) {
7910Sstevel@tonic-gate 			(void) stripe_done(cb);
7920Sstevel@tonic-gate 			continue;
7930Sstevel@tonic-gate 		}
7940Sstevel@tonic-gate 		md_call_strategy(cb, flag, private);
7950Sstevel@tonic-gate 	} while (more);
7960Sstevel@tonic-gate 
7970Sstevel@tonic-gate 	if (!(flag & MD_STR_NOTTOP) && panicstr) {
7980Sstevel@tonic-gate 		while (!(ps->ps_flags & MD_SPS_DONE)) {
7990Sstevel@tonic-gate 			md_daemon(1, &md_done_daemon);
8000Sstevel@tonic-gate 			drv_usecwait(10);
8010Sstevel@tonic-gate 		}
8020Sstevel@tonic-gate 		kmem_cache_free(stripe_parent_cache, ps);
8030Sstevel@tonic-gate 	}
8040Sstevel@tonic-gate }
8050Sstevel@tonic-gate 
8060Sstevel@tonic-gate static int
8070Sstevel@tonic-gate stripe_snarf(md_snarfcmd_t cmd, set_t setno)
8080Sstevel@tonic-gate {
8090Sstevel@tonic-gate 	ms_unit_t	*un;
8100Sstevel@tonic-gate 	mddb_recid_t	recid;
8110Sstevel@tonic-gate 	int		gotsomething;
8120Sstevel@tonic-gate 	int		all_stripes_gotten;
8130Sstevel@tonic-gate 	mddb_type_t	typ1;
8140Sstevel@tonic-gate 	mddb_de_ic_t	*dep;
8150Sstevel@tonic-gate 	mddb_rb32_t	*rbp;
8160Sstevel@tonic-gate 	size_t		newreqsize;
8170Sstevel@tonic-gate 	ms_unit_t	*big_un;
8180Sstevel@tonic-gate 	ms_unit32_od_t	*small_un;
8190Sstevel@tonic-gate 
8200Sstevel@tonic-gate 
8210Sstevel@tonic-gate 	if (cmd == MD_SNARF_CLEANUP)
8220Sstevel@tonic-gate 		return (0);
8230Sstevel@tonic-gate 
8240Sstevel@tonic-gate 	all_stripes_gotten = 1;
8250Sstevel@tonic-gate 	gotsomething = 0;
8260Sstevel@tonic-gate 
8270Sstevel@tonic-gate 	typ1 = (mddb_type_t)md_getshared_key(setno,
8280Sstevel@tonic-gate 	    stripe_md_ops.md_driver.md_drivername);
8290Sstevel@tonic-gate 	recid = mddb_makerecid(setno, 0);
8300Sstevel@tonic-gate 
8310Sstevel@tonic-gate 	while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) {
8320Sstevel@tonic-gate 		if (mddb_getrecprivate(recid) & MD_PRV_GOTIT)
8330Sstevel@tonic-gate 			continue;
8340Sstevel@tonic-gate 
8350Sstevel@tonic-gate 		dep = mddb_getrecdep(recid);
8360Sstevel@tonic-gate 		dep->de_flags = MDDB_F_STRIPE;
8370Sstevel@tonic-gate 		rbp = dep->de_rb;
8380Sstevel@tonic-gate 
8390Sstevel@tonic-gate 		if ((rbp->rb_revision == MDDB_REV_RB) &&
8400Sstevel@tonic-gate 		    ((rbp->rb_private & MD_PRV_CONVD) == 0)) {
8410Sstevel@tonic-gate 			/*
8420Sstevel@tonic-gate 			 * This means, we have an old and small record
8430Sstevel@tonic-gate 			 * and this record hasn't already been converted.
8440Sstevel@tonic-gate 			 * Before we create an incore metadevice from this
8450Sstevel@tonic-gate 			 * we have to convert it to a big record.
8460Sstevel@tonic-gate 			 */
8470Sstevel@tonic-gate 			small_un = (ms_unit32_od_t *)mddb_getrecaddr(recid);
8480Sstevel@tonic-gate 			newreqsize = get_big_stripe_req_size(small_un,
8490Sstevel@tonic-gate 					COMPLETE_STRUCTURE);
8500Sstevel@tonic-gate 			big_un = (ms_unit_t *)kmem_zalloc(newreqsize, KM_SLEEP);
8510Sstevel@tonic-gate 			stripe_convert((caddr_t)small_un, (caddr_t)big_un,
8520Sstevel@tonic-gate 			    SMALL_2_BIG);
8530Sstevel@tonic-gate 			kmem_free(small_un, dep->de_reqsize);
8540Sstevel@tonic-gate 			dep->de_rb_userdata = big_un;
8550Sstevel@tonic-gate 			dep->de_reqsize = newreqsize;
8560Sstevel@tonic-gate 			un = big_un;
8570Sstevel@tonic-gate 			rbp->rb_private |= MD_PRV_CONVD;
8580Sstevel@tonic-gate 		} else {
8590Sstevel@tonic-gate 			/* Big device */
8600Sstevel@tonic-gate 			un = (ms_unit_t *)mddb_getrecaddr(recid);
8610Sstevel@tonic-gate 		}
8620Sstevel@tonic-gate 
8630Sstevel@tonic-gate 		/* Set revision and flag accordingly */
8640Sstevel@tonic-gate 		if (rbp->rb_revision == MDDB_REV_RB) {
8650Sstevel@tonic-gate 			un->c.un_revision = MD_32BIT_META_DEV;
8660Sstevel@tonic-gate 		} else {
8670Sstevel@tonic-gate 			un->c.un_revision = MD_64BIT_META_DEV;
8680Sstevel@tonic-gate 			un->c.un_flag |= MD_EFILABEL;
8690Sstevel@tonic-gate 		}
8700Sstevel@tonic-gate 
8710Sstevel@tonic-gate 		/* Create minor node for snarfed unit. */
8720Sstevel@tonic-gate 		(void) md_create_minor_node(MD_MIN2SET(MD_SID(un)), MD_SID(un));
8730Sstevel@tonic-gate 
8740Sstevel@tonic-gate 		if (MD_UNIT(MD_SID(un)) != NULL) {
8750Sstevel@tonic-gate 			mddb_setrecprivate(recid, MD_PRV_PENDDEL);
8760Sstevel@tonic-gate 			continue;
8770Sstevel@tonic-gate 		}
8780Sstevel@tonic-gate 		all_stripes_gotten = 0;
8790Sstevel@tonic-gate 		if (stripe_build_incore((void *)un, 1) == 0) {
8800Sstevel@tonic-gate 			mddb_setrecprivate(recid, MD_PRV_GOTIT);
8810Sstevel@tonic-gate 			md_create_unit_incore(MD_SID(un), &stripe_md_ops, 0);
8820Sstevel@tonic-gate 			gotsomething = 1;
8830Sstevel@tonic-gate 		}
8840Sstevel@tonic-gate 	}
8850Sstevel@tonic-gate 
8860Sstevel@tonic-gate 	if (!all_stripes_gotten)
8870Sstevel@tonic-gate 		return (gotsomething);
8880Sstevel@tonic-gate 
8890Sstevel@tonic-gate 	recid = mddb_makerecid(setno, 0);
8900Sstevel@tonic-gate 	while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0)
8910Sstevel@tonic-gate 		if (!(mddb_getrecprivate(recid) & MD_PRV_GOTIT))
8920Sstevel@tonic-gate 			mddb_setrecprivate(recid, MD_PRV_PENDDEL);
8930Sstevel@tonic-gate 
8940Sstevel@tonic-gate 	return (0);
8950Sstevel@tonic-gate }
8960Sstevel@tonic-gate 
8970Sstevel@tonic-gate static int
8980Sstevel@tonic-gate stripe_halt(md_haltcmd_t cmd, set_t setno)
8990Sstevel@tonic-gate {
9000Sstevel@tonic-gate 	int		i;
9010Sstevel@tonic-gate 	mdi_unit_t	*ui;
9020Sstevel@tonic-gate 	minor_t		mnum;
9030Sstevel@tonic-gate 
9040Sstevel@tonic-gate 	if (cmd == MD_HALT_CLOSE)
9050Sstevel@tonic-gate 		return (0);
9060Sstevel@tonic-gate 
9070Sstevel@tonic-gate 	if (cmd == MD_HALT_OPEN)
9080Sstevel@tonic-gate 		return (0);
9090Sstevel@tonic-gate 
9100Sstevel@tonic-gate 	if (cmd == MD_HALT_UNLOAD)
9110Sstevel@tonic-gate 		return (0);
9120Sstevel@tonic-gate 
9130Sstevel@tonic-gate 	if (cmd == MD_HALT_CHECK) {
9140Sstevel@tonic-gate 		for (i = 0; i < md_nunits; i++) {
9150Sstevel@tonic-gate 			mnum = MD_MKMIN(setno, i);
9160Sstevel@tonic-gate 			if ((ui = MDI_UNIT(mnum)) == NULL)
9170Sstevel@tonic-gate 				continue;
9180Sstevel@tonic-gate 			if (ui->ui_opsindex != stripe_md_ops.md_selfindex)
9190Sstevel@tonic-gate 				continue;
9200Sstevel@tonic-gate 			if (md_unit_isopen(ui))
9210Sstevel@tonic-gate 				return (1);
9220Sstevel@tonic-gate 		}
9230Sstevel@tonic-gate 		return (0);
9240Sstevel@tonic-gate 	}
9250Sstevel@tonic-gate 
9260Sstevel@tonic-gate 	if (cmd != MD_HALT_DOIT)
9270Sstevel@tonic-gate 		return (1);
9280Sstevel@tonic-gate 
9290Sstevel@tonic-gate 	for (i = 0; i < md_nunits; i++) {
9300Sstevel@tonic-gate 		mnum = MD_MKMIN(setno, i);
9310Sstevel@tonic-gate 		if ((ui = MDI_UNIT(mnum)) == NULL)
9320Sstevel@tonic-gate 			continue;
9330Sstevel@tonic-gate 		if (ui->ui_opsindex != stripe_md_ops.md_selfindex)
9340Sstevel@tonic-gate 			continue;
9350Sstevel@tonic-gate 		reset_stripe((ms_unit_t *)MD_UNIT(mnum), mnum, 0);
9360Sstevel@tonic-gate 	}
9370Sstevel@tonic-gate 
9380Sstevel@tonic-gate 	return (0);
9390Sstevel@tonic-gate }
9400Sstevel@tonic-gate 
9410Sstevel@tonic-gate /*ARGSUSED3*/
9420Sstevel@tonic-gate static int
9430Sstevel@tonic-gate stripe_open(dev_t *dev, int flag, int otyp, cred_t *cred_p, int md_oflags)
9440Sstevel@tonic-gate {
9450Sstevel@tonic-gate 	minor_t		mnum = getminor(*dev);
9460Sstevel@tonic-gate 	mdi_unit_t	*ui = MDI_UNIT(mnum);
9470Sstevel@tonic-gate 	ms_unit_t	*un;
9480Sstevel@tonic-gate 	int		err = 0;
949*46Sskamm 	set_t		setno;
950*46Sskamm 
951*46Sskamm 	/*
952*46Sskamm 	 * When doing an open of a multi owner metadevice, check to see if this
953*46Sskamm 	 * node is a starting node and if a reconfig cycle is underway.
954*46Sskamm 	 * If so, the system isn't sufficiently set up enough to handle the
955*46Sskamm 	 * open (which involves I/O during sp_validate), so fail with ENXIO.
956*46Sskamm 	 */
957*46Sskamm 	setno = MD_MIN2SET(mnum);
958*46Sskamm 	if ((md_set[setno].s_status & (MD_SET_MNSET | MD_SET_MN_START_RC)) ==
959*46Sskamm 	    (MD_SET_MNSET | MD_SET_MN_START_RC)) {
960*46Sskamm 			return (ENXIO);
961*46Sskamm 	}
9620Sstevel@tonic-gate 
9630Sstevel@tonic-gate 	/* single thread */
9640Sstevel@tonic-gate 	un = (ms_unit_t *)md_unit_openclose_enter(ui);
9650Sstevel@tonic-gate 
9660Sstevel@tonic-gate 	/* open devices, if necessary */
9670Sstevel@tonic-gate 	if (! md_unit_isopen(ui) || (md_oflags & MD_OFLG_PROBEDEV)) {
9680Sstevel@tonic-gate 		if ((err = stripe_open_all_devs(un, md_oflags)) != 0) {
9690Sstevel@tonic-gate 			goto out;
9700Sstevel@tonic-gate 		}
9710Sstevel@tonic-gate 	}
9720Sstevel@tonic-gate 
9730Sstevel@tonic-gate 	/* count open */
9740Sstevel@tonic-gate 	if ((err = md_unit_incopen(mnum, flag, otyp)) != 0)
9750Sstevel@tonic-gate 		goto out;
9760Sstevel@tonic-gate 
9770Sstevel@tonic-gate 	/* unlock, return success */
9780Sstevel@tonic-gate out:
9790Sstevel@tonic-gate 	md_unit_openclose_exit(ui);
9800Sstevel@tonic-gate 	return (err);
9810Sstevel@tonic-gate }
9820Sstevel@tonic-gate 
9830Sstevel@tonic-gate /*ARGSUSED1*/
9840Sstevel@tonic-gate static int
9850Sstevel@tonic-gate stripe_close(
9860Sstevel@tonic-gate 	dev_t		dev,
9870Sstevel@tonic-gate 	int		flag,
9880Sstevel@tonic-gate 	int		otyp,
9890Sstevel@tonic-gate 	cred_t		*cred_p,
9900Sstevel@tonic-gate 	int		md_cflags
9910Sstevel@tonic-gate )
9920Sstevel@tonic-gate {
9930Sstevel@tonic-gate 	minor_t		mnum = getminor(dev);
9940Sstevel@tonic-gate 	mdi_unit_t	*ui = MDI_UNIT(mnum);
9950Sstevel@tonic-gate 	ms_unit_t	*un;
9960Sstevel@tonic-gate 	int		err = 0;
9970Sstevel@tonic-gate 
9980Sstevel@tonic-gate 	/* single thread */
9990Sstevel@tonic-gate 	un = (ms_unit_t *)md_unit_openclose_enter(ui);
10000Sstevel@tonic-gate 
10010Sstevel@tonic-gate 	/* count closed */
10020Sstevel@tonic-gate 	if ((err = md_unit_decopen(mnum, otyp)) != 0)
10030Sstevel@tonic-gate 		goto out;
10040Sstevel@tonic-gate 
10050Sstevel@tonic-gate 	/* close devices, if necessary */
10060Sstevel@tonic-gate 	if (! md_unit_isopen(ui) || (md_cflags & MD_OFLG_PROBEDEV)) {
10070Sstevel@tonic-gate 		stripe_close_all_devs(un, md_cflags);
10080Sstevel@tonic-gate 	}
10090Sstevel@tonic-gate 
10100Sstevel@tonic-gate 	/* unlock, return success */
10110Sstevel@tonic-gate out:
10120Sstevel@tonic-gate 	md_unit_openclose_exit(ui);
10130Sstevel@tonic-gate 	return (err);
10140Sstevel@tonic-gate }
10150Sstevel@tonic-gate 
10160Sstevel@tonic-gate 
10170Sstevel@tonic-gate static struct buf dumpbuf;
10180Sstevel@tonic-gate 
10190Sstevel@tonic-gate /*
10200Sstevel@tonic-gate  * This routine dumps memory to the disk.  It assumes that the memory has
10210Sstevel@tonic-gate  * already been mapped into mainbus space.  It is called at disk interrupt
10220Sstevel@tonic-gate  * priority when the system is in trouble.
10230Sstevel@tonic-gate  *
10240Sstevel@tonic-gate  */
10250Sstevel@tonic-gate static int
10260Sstevel@tonic-gate stripe_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
10270Sstevel@tonic-gate {
10280Sstevel@tonic-gate 	ms_unit_t	*un;
10290Sstevel@tonic-gate 	buf_t		*bp;
10300Sstevel@tonic-gate 	ms_comp_t	*mdc;
10310Sstevel@tonic-gate 	u_longlong_t	nb;
10320Sstevel@tonic-gate 	diskaddr_t	mapblk;
10330Sstevel@tonic-gate 	int		result;
10340Sstevel@tonic-gate 	int		more;
10350Sstevel@tonic-gate 	int		saveresult = 0;
10360Sstevel@tonic-gate 
10370Sstevel@tonic-gate 	/*
10380Sstevel@tonic-gate 	 * Don't need to grab the unit lock.
10390Sstevel@tonic-gate 	 * Cause nothing else is suppose to be happenning.
10400Sstevel@tonic-gate 	 * Also dump is not suppose to sleep.
10410Sstevel@tonic-gate 	 */
10420Sstevel@tonic-gate 	un = (ms_unit_t *)MD_UNIT(getminor(dev));
10430Sstevel@tonic-gate 
10440Sstevel@tonic-gate 	if ((diskaddr_t)blkno >= un->c.un_total_blocks)
10450Sstevel@tonic-gate 		return (EINVAL);
10460Sstevel@tonic-gate 
10470Sstevel@tonic-gate 	if ((diskaddr_t)blkno + nblk > un->c.un_total_blocks)
10480Sstevel@tonic-gate 		return (EINVAL);
10490Sstevel@tonic-gate 
10500Sstevel@tonic-gate 	bp = &dumpbuf;
10510Sstevel@tonic-gate 	nb = ldbtob(nblk);
10520Sstevel@tonic-gate 	do {
10530Sstevel@tonic-gate 		bzero((caddr_t)bp, sizeof (*bp));
10540Sstevel@tonic-gate 		more = md_mapbuf(un, (diskaddr_t)blkno, nb, bp, &mdc);
10550Sstevel@tonic-gate 		nblk = btodb(bp->b_bcount);
10560Sstevel@tonic-gate 		mapblk = bp->b_lblkno;
10570Sstevel@tonic-gate 		if (!(mdc->un_mirror.ms_flags & MDM_S_NOWRITE)) {
10580Sstevel@tonic-gate 			/*
10590Sstevel@tonic-gate 			 * bdev_dump() is currently only able to take
10600Sstevel@tonic-gate 			 * 32 bit wide blkno's.
10610Sstevel@tonic-gate 			 */
10620Sstevel@tonic-gate 			result = bdev_dump(bp->b_edev, addr, (daddr_t)mapblk,
10630Sstevel@tonic-gate 						nblk);
10640Sstevel@tonic-gate 			if (result)
10650Sstevel@tonic-gate 				saveresult = result;
10660Sstevel@tonic-gate 		}
10670Sstevel@tonic-gate 
10680Sstevel@tonic-gate 		nb -= bp->b_bcount;
10690Sstevel@tonic-gate 		addr += bp->b_bcount;
10700Sstevel@tonic-gate 		blkno += nblk;
10710Sstevel@tonic-gate 	} while (more);
10720Sstevel@tonic-gate 
10730Sstevel@tonic-gate 	return (saveresult);
10740Sstevel@tonic-gate }
10750Sstevel@tonic-gate 
10760Sstevel@tonic-gate /*ARGSUSED*/
10770Sstevel@tonic-gate static intptr_t
10780Sstevel@tonic-gate stripe_shared_by_blk(
10790Sstevel@tonic-gate 	md_dev64_t dev,
10800Sstevel@tonic-gate 	void *junk,
10810Sstevel@tonic-gate 	diskaddr_t blkno,
10820Sstevel@tonic-gate 	u_longlong_t *cnt)
10830Sstevel@tonic-gate {
10840Sstevel@tonic-gate 	ms_unit_t	*un;
10850Sstevel@tonic-gate 	buf_t		bp;
10860Sstevel@tonic-gate 	ms_comp_t	*comp;
10870Sstevel@tonic-gate 
10880Sstevel@tonic-gate 	un = MD_UNIT(md_getminor(dev));
10890Sstevel@tonic-gate 	(void) md_mapbuf(un, blkno, ldbtob(*cnt), &bp, &comp);
10900Sstevel@tonic-gate 	*cnt = (u_longlong_t)lbtodb(bp.b_bcount);
10910Sstevel@tonic-gate 	return ((intptr_t)&comp->un_mirror);
10920Sstevel@tonic-gate }
10930Sstevel@tonic-gate 
10940Sstevel@tonic-gate /*
10950Sstevel@tonic-gate  * stripe_block_count_skip_size() returns the following values
10960Sstevel@tonic-gate  *	so that the logical to physical block mappings can
10970Sstevel@tonic-gate  *	be calculated without intimate knowledge of the underpinnings.
10980Sstevel@tonic-gate  *
10990Sstevel@tonic-gate  *	block - first logical block number of the device.
11000Sstevel@tonic-gate  *		block = [ # of blocks before THE row ] +
11010Sstevel@tonic-gate  *			[ # of blocks in THE row before the component ]
11020Sstevel@tonic-gate  *	count - # of segments (interlaced size).
11030Sstevel@tonic-gate  *	skip  - # of logical blocks between segments, or delta to
11040Sstevel@tonic-gate  *		  get to next segment
11050Sstevel@tonic-gate  *	size  - interlace size used for the block, count, skip.
11060Sstevel@tonic-gate  */
11070Sstevel@tonic-gate /*ARGSUSED*/
11080Sstevel@tonic-gate static intptr_t
11090Sstevel@tonic-gate stripe_block_count_skip_size(
11100Sstevel@tonic-gate 	md_dev64_t	 dev,
11110Sstevel@tonic-gate 	void		*junk,
11120Sstevel@tonic-gate 	int		ci,
11130Sstevel@tonic-gate 	diskaddr_t	*block,
11140Sstevel@tonic-gate 	size_t		*count,
11150Sstevel@tonic-gate 	u_longlong_t	*skip,
11160Sstevel@tonic-gate 	u_longlong_t	*size)
11170Sstevel@tonic-gate {
11180Sstevel@tonic-gate 	ms_unit_t	*un;
11190Sstevel@tonic-gate 	int		row;
11200Sstevel@tonic-gate 	struct ms_row	*mdr;
11210Sstevel@tonic-gate 	int		cmpcount = 0;
11220Sstevel@tonic-gate 
11230Sstevel@tonic-gate 	un = MD_UNIT(md_getminor(dev));
11240Sstevel@tonic-gate 
11250Sstevel@tonic-gate 	for (row = 0; row < un->un_nrows; row++) {
11260Sstevel@tonic-gate 		mdr = &un->un_row[row];
11270Sstevel@tonic-gate 		if ((mdr->un_ncomp + cmpcount) > ci)
11280Sstevel@tonic-gate 			break;
11290Sstevel@tonic-gate 		cmpcount += mdr->un_ncomp;
11300Sstevel@tonic-gate 	}
11310Sstevel@tonic-gate 	ASSERT(row != un->un_nrows);
11320Sstevel@tonic-gate 
11330Sstevel@tonic-gate 	/*
11340Sstevel@tonic-gate 	 * Concatenations are always contiguous blocks,
11350Sstevel@tonic-gate 	 * you cannot depend on the interlace being a usable
11360Sstevel@tonic-gate 	 * value (except for stripes).
11370Sstevel@tonic-gate 	 */
11380Sstevel@tonic-gate 	if (mdr->un_ncomp == 1) {	/* Concats */
11390Sstevel@tonic-gate 		*block = mdr->un_cum_blocks - mdr->un_blocks;
11400Sstevel@tonic-gate 		*count = 1;
11410Sstevel@tonic-gate 		*skip = 0;
11420Sstevel@tonic-gate 		*size = mdr->un_blocks;
11430Sstevel@tonic-gate 	} else {			/* Stripes */
11440Sstevel@tonic-gate 		*block = (mdr->un_cum_blocks - mdr->un_blocks) +
11450Sstevel@tonic-gate 		    ((ci - cmpcount) * mdr->un_interlace);
11460Sstevel@tonic-gate 		*count	= (size_t)(mdr->un_blocks / (mdr->un_interlace
11470Sstevel@tonic-gate 			* mdr->un_ncomp));
11480Sstevel@tonic-gate 		*skip = (mdr->un_interlace * mdr->un_ncomp) - mdr->un_interlace;
11490Sstevel@tonic-gate 		*size = mdr->un_interlace;
11500Sstevel@tonic-gate 	}
11510Sstevel@tonic-gate 
11520Sstevel@tonic-gate 	return (0);
11530Sstevel@tonic-gate }
11540Sstevel@tonic-gate 
11550Sstevel@tonic-gate /*ARGSUSED*/
11560Sstevel@tonic-gate static intptr_t
11570Sstevel@tonic-gate stripe_shared_by_indx(md_dev64_t dev, void *junk, int indx)
11580Sstevel@tonic-gate {
11590Sstevel@tonic-gate 	ms_unit_t	*un;
11600Sstevel@tonic-gate 	ms_comp_t	*comp;
11610Sstevel@tonic-gate 
11620Sstevel@tonic-gate 	un = MD_UNIT(md_getminor(dev));
11630Sstevel@tonic-gate 	comp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]);
11640Sstevel@tonic-gate 	comp += indx;
11650Sstevel@tonic-gate 	return ((intptr_t)&comp->un_mirror);
11660Sstevel@tonic-gate }
11670Sstevel@tonic-gate 
11680Sstevel@tonic-gate /*ARGSUSED*/
11690Sstevel@tonic-gate intptr_t
11700Sstevel@tonic-gate stripe_component_count(md_dev64_t dev, void *junk)
11710Sstevel@tonic-gate {
11720Sstevel@tonic-gate 	/*
11730Sstevel@tonic-gate 	 * See comments for stripe_get_dev
11740Sstevel@tonic-gate 	 */
11750Sstevel@tonic-gate 
11760Sstevel@tonic-gate 	ms_unit_t	*un;
11770Sstevel@tonic-gate 	int		count = 0;
11780Sstevel@tonic-gate 	int		row;
11790Sstevel@tonic-gate 
11800Sstevel@tonic-gate 	un = MD_UNIT(md_getminor(dev));
11810Sstevel@tonic-gate 	for (row = 0; row < un->un_nrows; row++)
11820Sstevel@tonic-gate 		count += un->un_row[row].un_ncomp;
11830Sstevel@tonic-gate 	return (count);
11840Sstevel@tonic-gate }
11850Sstevel@tonic-gate 
11860Sstevel@tonic-gate /*ARGSUSED*/
11870Sstevel@tonic-gate intptr_t
11880Sstevel@tonic-gate stripe_get_dev(md_dev64_t dev, void *junk, int indx, ms_cd_info_t *cd)
11890Sstevel@tonic-gate {
11900Sstevel@tonic-gate 	/*
11910Sstevel@tonic-gate 	 * It should be noted that stripe_replace in stripe_ioctl.c calls this
11920Sstevel@tonic-gate 	 * routine using makedevice(0, minor) for the first argument.
11930Sstevel@tonic-gate 	 *
11940Sstevel@tonic-gate 	 * If this routine at some point in the future needs to use the major
11950Sstevel@tonic-gate 	 * number stripe_replace must be changed.
11960Sstevel@tonic-gate 	 */
11970Sstevel@tonic-gate 
11980Sstevel@tonic-gate 	ms_unit_t	*un;
11990Sstevel@tonic-gate 	ms_comp_t	*comp;
12000Sstevel@tonic-gate 	md_dev64_t	tmpdev;
12010Sstevel@tonic-gate 
12020Sstevel@tonic-gate 	un = MD_UNIT(md_getminor(dev));
12030Sstevel@tonic-gate 	comp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]);
12040Sstevel@tonic-gate 	comp += indx;
12050Sstevel@tonic-gate 	tmpdev = comp->un_dev;
12060Sstevel@tonic-gate 	/*
12070Sstevel@tonic-gate 	 * Try to resolve devt again if NODEV64
12080Sstevel@tonic-gate 	 * Check if this comp is hotspared and if it is
12090Sstevel@tonic-gate 	 * then use key for hotspare
12100Sstevel@tonic-gate 	 */
12110Sstevel@tonic-gate 	if (tmpdev == NODEV64) {
12120Sstevel@tonic-gate 		tmpdev = md_resolve_bydevid(md_getminor(dev), tmpdev,
12130Sstevel@tonic-gate 			comp->un_mirror.ms_hs_id ?
12140Sstevel@tonic-gate 			comp->un_mirror.ms_hs_key :
12150Sstevel@tonic-gate 			comp->un_key);
12160Sstevel@tonic-gate 		comp->un_dev = tmpdev;
12170Sstevel@tonic-gate 	}
12180Sstevel@tonic-gate 
12190Sstevel@tonic-gate 	cd->cd_dev = comp->un_dev;
12200Sstevel@tonic-gate 	cd->cd_orig_dev = comp->un_mirror.ms_orig_dev;
12210Sstevel@tonic-gate 	return (0);
12220Sstevel@tonic-gate }
12230Sstevel@tonic-gate 
12240Sstevel@tonic-gate /*ARGSUSED*/
12250Sstevel@tonic-gate void
12260Sstevel@tonic-gate stripe_replace_done(md_dev64_t dev, sv_dev_t *sv)
12270Sstevel@tonic-gate {
12280Sstevel@tonic-gate 	/*
12290Sstevel@tonic-gate 	 * See comments for stripe_get_dev
12300Sstevel@tonic-gate 	 */
12310Sstevel@tonic-gate 
12320Sstevel@tonic-gate 	minor_t		mnum = md_getminor(dev);
12330Sstevel@tonic-gate 
12340Sstevel@tonic-gate 	if (sv != NULL) {
12350Sstevel@tonic-gate 		md_rem_names(sv, 1);
12360Sstevel@tonic-gate 		kmem_free(sv, sizeof (sv_dev_t));
12370Sstevel@tonic-gate 	}
12380Sstevel@tonic-gate 
12390Sstevel@tonic-gate 	md_unit_writerexit(MDI_UNIT(mnum));
12400Sstevel@tonic-gate }
12410Sstevel@tonic-gate 
12420Sstevel@tonic-gate /*ARGSUSED*/
12430Sstevel@tonic-gate intptr_t
12440Sstevel@tonic-gate stripe_replace_dev(md_dev64_t dev, void *junk, int ci, ms_new_dev_t *nd,
12450Sstevel@tonic-gate     mddb_recid_t *recids, int nrecids, void (**replace_done)(),
12460Sstevel@tonic-gate     void **replace_data)
12470Sstevel@tonic-gate {
12480Sstevel@tonic-gate 	minor_t		mnum;
12490Sstevel@tonic-gate 	ms_unit_t	*un;
12500Sstevel@tonic-gate 	mdi_unit_t	*ui;
12510Sstevel@tonic-gate 	ms_comp_t	*comp;
12520Sstevel@tonic-gate 	diskaddr_t	dev_size;
12530Sstevel@tonic-gate 	int		row;
12540Sstevel@tonic-gate 	int		ncomps = 0;
12550Sstevel@tonic-gate 	int		cmpcount = 0;
12560Sstevel@tonic-gate 	int		rid = 0;
12570Sstevel@tonic-gate 	struct ms_row	*mdr;
12580Sstevel@tonic-gate 	sv_dev_t	*sv = NULL;
12590Sstevel@tonic-gate 	mddb_recid_t	hs_id = 0;
12600Sstevel@tonic-gate 	set_t		setno;
12610Sstevel@tonic-gate 	side_t		side;
12620Sstevel@tonic-gate 	md_dev64_t	this_dev;
12630Sstevel@tonic-gate 
12640Sstevel@tonic-gate 	mnum = md_getminor(dev);
12650Sstevel@tonic-gate 	ui = MDI_UNIT(mnum);
12660Sstevel@tonic-gate 	setno = MD_MIN2SET(mnum);
12670Sstevel@tonic-gate 	side = mddb_getsidenum(setno);
12680Sstevel@tonic-gate 
12690Sstevel@tonic-gate 	un = md_unit_writerlock(ui);
12700Sstevel@tonic-gate 
12710Sstevel@tonic-gate 	*replace_data = NULL;
12720Sstevel@tonic-gate 	comp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]);
12730Sstevel@tonic-gate 
12740Sstevel@tonic-gate 	comp += ci;
12750Sstevel@tonic-gate 
12760Sstevel@tonic-gate 	/*
12770Sstevel@tonic-gate 	 * Count the number of components
12780Sstevel@tonic-gate 	 */
12790Sstevel@tonic-gate 	for (row = 0; row < un->un_nrows; row++) {
12800Sstevel@tonic-gate 		struct ms_row *mdr = &un->un_row[row];
12810Sstevel@tonic-gate 		ncomps += mdr->un_ncomp;
12820Sstevel@tonic-gate 	}
12830Sstevel@tonic-gate 
12840Sstevel@tonic-gate 	recids[0] = 0;
12850Sstevel@tonic-gate 	/*
12860Sstevel@tonic-gate 	 * No need of checking size of new device,
12870Sstevel@tonic-gate 	 * when hotsparing (it has already been done), or
12880Sstevel@tonic-gate 	 * when enabling the device.
12890Sstevel@tonic-gate 	 */
12900Sstevel@tonic-gate 	if ((nd != NULL) && (nd->nd_hs_id == 0)) {
12910Sstevel@tonic-gate 		for (row = 0; row < un->un_nrows; row++) {
12920Sstevel@tonic-gate 			mdr = &un->un_row[row];
12930Sstevel@tonic-gate 			if ((mdr->un_ncomp + cmpcount) > ci)
12940Sstevel@tonic-gate 				break;
12950Sstevel@tonic-gate 			cmpcount += mdr->un_ncomp;
12960Sstevel@tonic-gate 		}
12970Sstevel@tonic-gate 		ASSERT(row != un->un_nrows);
12980Sstevel@tonic-gate 
12990Sstevel@tonic-gate 		/* Concatenations have a ncomp = 1 */
13000Sstevel@tonic-gate 		dev_size = mdr->un_blocks / mdr->un_ncomp;
13010Sstevel@tonic-gate 
13020Sstevel@tonic-gate 		/*
13030Sstevel@tonic-gate 		 * now check to see if new comp can be used in
13040Sstevel@tonic-gate 		 * place of old comp
13050Sstevel@tonic-gate 		 */
13060Sstevel@tonic-gate 		if ((un->c.un_flag & MD_LABELED) && (ci == 0) &&
13070Sstevel@tonic-gate 		    nd->nd_labeled)
13080Sstevel@tonic-gate 			nd->nd_start_blk = 0;
13090Sstevel@tonic-gate 		else
13100Sstevel@tonic-gate 			nd->nd_nblks -= nd->nd_start_blk;
13110Sstevel@tonic-gate 
13120Sstevel@tonic-gate 		if (dev_size > nd->nd_nblks) {
13130Sstevel@tonic-gate 			md_unit_writerexit(ui);
13140Sstevel@tonic-gate 			return (MDE_COMP_TOO_SMALL);
13150Sstevel@tonic-gate 		}
13160Sstevel@tonic-gate 
13170Sstevel@tonic-gate 		sv = (sv_dev_t *)kmem_alloc(sizeof (sv_dev_t), KM_SLEEP);
13180Sstevel@tonic-gate 		sv->setno = MD_MIN2SET(mnum);
13190Sstevel@tonic-gate 		sv->key = comp->un_key;
13200Sstevel@tonic-gate 	}
13210Sstevel@tonic-gate 
13220Sstevel@tonic-gate 	/*
13230Sstevel@tonic-gate 	 * Close this component.
13240Sstevel@tonic-gate 	 */
13250Sstevel@tonic-gate 	if (comp->un_mirror.ms_flags & MDM_S_ISOPEN) {
13260Sstevel@tonic-gate 		md_layered_close(comp->un_dev, MD_OFLG_NULL);
13270Sstevel@tonic-gate 		comp->un_mirror.ms_flags &= ~MDM_S_ISOPEN;
13280Sstevel@tonic-gate 	}
13290Sstevel@tonic-gate 
13300Sstevel@tonic-gate 	/*
13310Sstevel@tonic-gate 	 * If the component is hotspared, return to the pool.
13320Sstevel@tonic-gate 	 */
13330Sstevel@tonic-gate 	if (comp->un_mirror.ms_hs_id != 0) {
13340Sstevel@tonic-gate 		hs_cmds_t	cmd;
13350Sstevel@tonic-gate 		mdkey_t		hs_key;
13360Sstevel@tonic-gate 
13370Sstevel@tonic-gate 		hs_key = comp->un_mirror.ms_hs_key;
13380Sstevel@tonic-gate 		comp->un_dev = comp->un_mirror.ms_orig_dev;
13390Sstevel@tonic-gate 		comp->un_start_block = comp->un_mirror.ms_orig_blk;
13400Sstevel@tonic-gate 		comp->un_mirror.ms_hs_key = 0;
13410Sstevel@tonic-gate 		comp->un_mirror.ms_hs_id = 0;
13420Sstevel@tonic-gate 		comp->un_mirror.ms_orig_dev = 0;
13430Sstevel@tonic-gate 
13440Sstevel@tonic-gate 		cmd = HS_FREE;
13450Sstevel@tonic-gate 		if ((comp->un_mirror.ms_state != CS_OKAY) &&
13460Sstevel@tonic-gate 		    (comp->un_mirror.ms_state != CS_RESYNC))
13470Sstevel@tonic-gate 			cmd = HS_BAD;
13480Sstevel@tonic-gate 		(void) md_hot_spare_ifc(cmd, un->un_hsp_id, 0, 0, &hs_id,
13490Sstevel@tonic-gate 		    &hs_key, NULL, NULL);
13500Sstevel@tonic-gate 	}
13510Sstevel@tonic-gate 
13520Sstevel@tonic-gate 	/*
13530Sstevel@tonic-gate 	 * Open by device id; for enable (indicated by a NULL
13540Sstevel@tonic-gate 	 * nd pointer), use the existing component info.  For
13550Sstevel@tonic-gate 	 * replace, use the new device.
13560Sstevel@tonic-gate 	 */
13570Sstevel@tonic-gate 	if (nd == NULL) {
13580Sstevel@tonic-gate 		this_dev = md_resolve_bydevid(mnum, comp->un_dev, comp->un_key);
13590Sstevel@tonic-gate 		/*
13600Sstevel@tonic-gate 		 * If someone replaced a new disk in the same slot
13610Sstevel@tonic-gate 		 * we get NODEV64 since old device id cannot be
13620Sstevel@tonic-gate 		 * resolved. The new devt is obtained from the
13630Sstevel@tonic-gate 		 * mddb since devt is going to be unchanged for the
13640Sstevel@tonic-gate 		 * enable case. No need to check for multiple
13650Sstevel@tonic-gate 		 * keys here because the caller (comp_replace)
13660Sstevel@tonic-gate 		 * has already sanity checked it for us.
13670Sstevel@tonic-gate 		 */
13680Sstevel@tonic-gate 		if (this_dev == NODEV64) {
13690Sstevel@tonic-gate 			this_dev = md_getdevnum(setno, side, comp->un_key,
13700Sstevel@tonic-gate 			    MD_TRUST_DEVT);
13710Sstevel@tonic-gate 		}
13720Sstevel@tonic-gate 	} else {
13730Sstevel@tonic-gate 		/*
13740Sstevel@tonic-gate 		 * If this is a hotspare, save the original dev_t for later
13750Sstevel@tonic-gate 		 * use. If this has occured during boot then the value of
13760Sstevel@tonic-gate 		 * comp->un_dev will be NODEV64 because of the failure to look
13770Sstevel@tonic-gate 		 * up the devid of the device.
13780Sstevel@tonic-gate 		 */
13790Sstevel@tonic-gate 		if (nd->nd_hs_id != 0)
13800Sstevel@tonic-gate 			comp->un_mirror.ms_orig_dev = comp->un_dev;
13810Sstevel@tonic-gate 		this_dev = md_resolve_bydevid(mnum, nd->nd_dev, nd->nd_key);
13820Sstevel@tonic-gate 	}
13830Sstevel@tonic-gate 
13840Sstevel@tonic-gate 	comp->un_dev = this_dev;
13850Sstevel@tonic-gate 
13860Sstevel@tonic-gate 	/*
13870Sstevel@tonic-gate 	 * Now open the new device if required. Note for a single component
13880Sstevel@tonic-gate 	 * stripe it will not be open - leave this for the mirror driver to
13890Sstevel@tonic-gate 	 * deal with.
13900Sstevel@tonic-gate 	 */
13910Sstevel@tonic-gate 	if (md_unit_isopen(ui)) {
13920Sstevel@tonic-gate 		if (md_layered_open(mnum, &this_dev, MD_OFLG_NULL)) {
13930Sstevel@tonic-gate 			mddb_recid_t	ids[3];
13940Sstevel@tonic-gate 
13950Sstevel@tonic-gate 			ids[0] = un->c.un_record_id;
13960Sstevel@tonic-gate 			ids[1] = hs_id;
13970Sstevel@tonic-gate 			ids[2] = 0;
13980Sstevel@tonic-gate 			mddb_commitrecs_wrapper(ids);
13990Sstevel@tonic-gate 			if ((nd != NULL) && (nd->nd_hs_id != 0)) {
14000Sstevel@tonic-gate 				/*
14010Sstevel@tonic-gate 				 * Revert back to the original device.
14020Sstevel@tonic-gate 				 */
14030Sstevel@tonic-gate 				comp->un_dev = comp->un_mirror.ms_orig_dev;
14040Sstevel@tonic-gate 
14050Sstevel@tonic-gate 				cmn_err(CE_WARN,
14060Sstevel@tonic-gate 				    "md: %s: open error of hotspare %s",
14070Sstevel@tonic-gate 				    md_shortname(mnum),
14080Sstevel@tonic-gate 				    md_devname(MD_MIN2SET(mnum), nd->nd_dev,
14090Sstevel@tonic-gate 				    NULL, 0));
14100Sstevel@tonic-gate 				SE_NOTIFY(EC_SVM_STATE, ESC_SVM_OPEN_FAIL,
14110Sstevel@tonic-gate 				    SVM_TAG_HS, MD_MIN2SET(mnum), nd->nd_dev);
14120Sstevel@tonic-gate 			}
14130Sstevel@tonic-gate 			md_unit_writerexit(ui);
14140Sstevel@tonic-gate 			return (MDE_COMP_OPEN_ERR);
14150Sstevel@tonic-gate 		}
14160Sstevel@tonic-gate 		if (nd != NULL)
14170Sstevel@tonic-gate 			nd->nd_dev = this_dev;
14180Sstevel@tonic-gate 
14190Sstevel@tonic-gate 		comp->un_mirror.ms_flags |= MDM_S_ISOPEN;
14200Sstevel@tonic-gate 	}
14210Sstevel@tonic-gate 
14220Sstevel@tonic-gate 	if (nd == NULL) {
14230Sstevel@tonic-gate 		recids[0] = un->c.un_record_id;
14240Sstevel@tonic-gate 		recids[1] = hs_id;
14250Sstevel@tonic-gate 		recids[2] = 0;
14260Sstevel@tonic-gate 		*replace_done = stripe_replace_done;
14270Sstevel@tonic-gate 		return (0);
14280Sstevel@tonic-gate 	}
14290Sstevel@tonic-gate 
14300Sstevel@tonic-gate 	/* if hot sparing this device */
14310Sstevel@tonic-gate 	if (nd->nd_hs_id != 0) {
14320Sstevel@tonic-gate 		char	devname[MD_MAX_CTDLEN];
14330Sstevel@tonic-gate 		char	hs_devname[MD_MAX_CTDLEN];
14340Sstevel@tonic-gate 		set_t	setno;
14350Sstevel@tonic-gate 
14360Sstevel@tonic-gate 		comp->un_mirror.ms_hs_id = nd->nd_hs_id;
14370Sstevel@tonic-gate 		comp->un_mirror.ms_hs_key = nd->nd_key;
14380Sstevel@tonic-gate 
14390Sstevel@tonic-gate 		comp->un_mirror.ms_orig_blk = comp->un_start_block;
14400Sstevel@tonic-gate 
14410Sstevel@tonic-gate 		setno = MD_MIN2SET(mnum);
14420Sstevel@tonic-gate 
14430Sstevel@tonic-gate 		(void) md_devname(setno, comp->un_mirror.ms_orig_dev, devname,
14440Sstevel@tonic-gate 					sizeof (devname));
14450Sstevel@tonic-gate 		(void) md_devname(setno, nd->nd_dev, hs_devname,
14460Sstevel@tonic-gate 		    sizeof (hs_devname));
14470Sstevel@tonic-gate 
14480Sstevel@tonic-gate 		cmn_err(CE_NOTE, "md: %s: hotspared device %s with %s",
14490Sstevel@tonic-gate 		    md_shortname(mnum), devname, hs_devname);
14500Sstevel@tonic-gate 
14510Sstevel@tonic-gate 	} else {	/* replacing the device */
14520Sstevel@tonic-gate 		comp->un_key = nd->nd_key;
14530Sstevel@tonic-gate 		*replace_data = (void *)sv;
14540Sstevel@tonic-gate 
14550Sstevel@tonic-gate 		/*
14560Sstevel@tonic-gate 		 * For the old device, make sure to reset the parent
14570Sstevel@tonic-gate 		 * if it's a  metadevice.
14580Sstevel@tonic-gate 		 */
14590Sstevel@tonic-gate 		if (md_getmajor(comp->un_dev) == md_major) {
14600Sstevel@tonic-gate 			minor_t	  comp_mnum = md_getminor(comp->un_dev);
14610Sstevel@tonic-gate 			md_unit_t *comp_un = MD_UNIT(comp_mnum);
14620Sstevel@tonic-gate 
14630Sstevel@tonic-gate 			md_reset_parent(comp->un_dev);
14640Sstevel@tonic-gate 			recids[rid++] = MD_RECID(comp_un);
14650Sstevel@tonic-gate 		}
14660Sstevel@tonic-gate 	}
14670Sstevel@tonic-gate 
14680Sstevel@tonic-gate 	comp->un_dev = nd->nd_dev;
14690Sstevel@tonic-gate 	comp->un_start_block = nd->nd_start_blk;
14700Sstevel@tonic-gate 
14710Sstevel@tonic-gate 	/*
14720Sstevel@tonic-gate 	 * For the new device, make sure to set the parent if it's a
14730Sstevel@tonic-gate 	 * metadevice.
14740Sstevel@tonic-gate 	 *
14750Sstevel@tonic-gate 	 * If we ever support using metadevices as hot spares, this
14760Sstevel@tonic-gate 	 * will need to be tested, and possibly moved into the
14770Sstevel@tonic-gate 	 * preceding "else" clause, immediately following the parent
14780Sstevel@tonic-gate 	 * reset block.  For now, it's convenient to leave it here and
14790Sstevel@tonic-gate 	 * only compress nd->nd_dev once.
14800Sstevel@tonic-gate 	 */
14810Sstevel@tonic-gate 	if (md_getmajor(comp->un_dev) == md_major) {
14820Sstevel@tonic-gate 		minor_t		comp_mnum = md_getminor(comp->un_dev);
14830Sstevel@tonic-gate 		md_unit_t	*comp_un = MD_UNIT(comp_mnum);
14840Sstevel@tonic-gate 
14850Sstevel@tonic-gate 		md_set_parent(comp->un_dev, MD_SID(un));
14860Sstevel@tonic-gate 		recids[rid++] = MD_RECID(comp_un);
14870Sstevel@tonic-gate 	}
14880Sstevel@tonic-gate 
14890Sstevel@tonic-gate 	recids[rid++] = un->c.un_record_id;
14900Sstevel@tonic-gate 	recids[rid++] = hs_id;
14910Sstevel@tonic-gate 	recids[rid] = 0;
14920Sstevel@tonic-gate 	*replace_done = stripe_replace_done;
14930Sstevel@tonic-gate 	return (0);
14940Sstevel@tonic-gate }
14950Sstevel@tonic-gate 
14960Sstevel@tonic-gate /*ARGSUSED*/
14970Sstevel@tonic-gate static intptr_t
14980Sstevel@tonic-gate stripe_hotspare_dev(
14990Sstevel@tonic-gate 	md_dev64_t	dev,
15000Sstevel@tonic-gate 	void		*junk,
15010Sstevel@tonic-gate 	int		ci,
15020Sstevel@tonic-gate 	mddb_recid_t	*recids,
15030Sstevel@tonic-gate 	int		nrecids,
15040Sstevel@tonic-gate 	void		(**replace_done)(),
15050Sstevel@tonic-gate 	void		**replace_data)
15060Sstevel@tonic-gate {
15070Sstevel@tonic-gate 	ms_unit_t	*un;
15080Sstevel@tonic-gate 	mdi_unit_t	*ui;
15090Sstevel@tonic-gate 	ms_comp_t	*comp;
15100Sstevel@tonic-gate 	int		row;
15110Sstevel@tonic-gate 	struct ms_row	*mdr;
15120Sstevel@tonic-gate 	ms_new_dev_t	nd;
15130Sstevel@tonic-gate 	int		err;
15140Sstevel@tonic-gate 	int		i;
15150Sstevel@tonic-gate 	minor_t		mnum;
15160Sstevel@tonic-gate 	set_t		setno;
15170Sstevel@tonic-gate 	int		cmpcount = 0;
15180Sstevel@tonic-gate 
15190Sstevel@tonic-gate 	mnum = md_getminor(dev);
15200Sstevel@tonic-gate 	ui = MDI_UNIT(mnum);
15210Sstevel@tonic-gate 	un = MD_UNIT(mnum);
15220Sstevel@tonic-gate 	setno = MD_MIN2SET(mnum);
15230Sstevel@tonic-gate 
15240Sstevel@tonic-gate 	if (md_get_setstatus(setno) & MD_SET_STALE)
15250Sstevel@tonic-gate 		return (1);
15260Sstevel@tonic-gate 
15270Sstevel@tonic-gate 	if (un->un_hsp_id == -1)
15280Sstevel@tonic-gate 		return (1);
15290Sstevel@tonic-gate 
15300Sstevel@tonic-gate 	for (row = 0; row < un->un_nrows; row++) {
15310Sstevel@tonic-gate 		mdr = &un->un_row[row];
15320Sstevel@tonic-gate 		if ((mdr->un_ncomp + cmpcount) > ci)
15330Sstevel@tonic-gate 			break;
15340Sstevel@tonic-gate 		cmpcount += mdr->un_ncomp;
15350Sstevel@tonic-gate 	}
15360Sstevel@tonic-gate 	ASSERT(row != un->un_nrows);
15370Sstevel@tonic-gate 
15380Sstevel@tonic-gate 	comp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]);
15390Sstevel@tonic-gate 	comp += ci;
15400Sstevel@tonic-gate 	/* Concatenations have a ncomp = 1 */
15410Sstevel@tonic-gate 	nd.nd_nblks = mdr->un_blocks / mdr->un_ncomp;
15420Sstevel@tonic-gate 
15430Sstevel@tonic-gate 	if ((un->c.un_flag & MD_LABELED) && (ci == 0))
15440Sstevel@tonic-gate 		nd.nd_labeled = 1;
15450Sstevel@tonic-gate 	else
15460Sstevel@tonic-gate 		nd.nd_labeled = 0;
15470Sstevel@tonic-gate 
15480Sstevel@tonic-gate again:
15490Sstevel@tonic-gate 	err = md_hot_spare_ifc(HS_GET, un->un_hsp_id, nd.nd_nblks,
15500Sstevel@tonic-gate 	    nd.nd_labeled, &nd.nd_hs_id, &nd.nd_key, &nd.nd_dev,
15510Sstevel@tonic-gate 	    &nd.nd_start_blk);
15520Sstevel@tonic-gate 
15530Sstevel@tonic-gate 	if (err) {
15540Sstevel@tonic-gate 		if (!stripe_replace_dev(dev, junk, ci, NULL, recids, nrecids,
15550Sstevel@tonic-gate 		    replace_done, replace_data)) {
15560Sstevel@tonic-gate 			mddb_commitrecs_wrapper(recids);
15570Sstevel@tonic-gate 			md_unit_writerexit(ui);
15580Sstevel@tonic-gate 		}
15590Sstevel@tonic-gate 		recids[0] = 0;
15600Sstevel@tonic-gate 		return (1);
15610Sstevel@tonic-gate 	}
15620Sstevel@tonic-gate 
15630Sstevel@tonic-gate 	if (stripe_replace_dev(dev, junk, ci, &nd, recids, nrecids,
15640Sstevel@tonic-gate 		replace_done, replace_data)) {
15650Sstevel@tonic-gate 
15660Sstevel@tonic-gate 		(void) md_hot_spare_ifc(HS_BAD, un->un_hsp_id, 0, 0,
15670Sstevel@tonic-gate 		    &nd.nd_hs_id, &nd.nd_key, NULL, NULL);
15680Sstevel@tonic-gate 		mddb_commitrec_wrapper(nd.nd_hs_id);
15690Sstevel@tonic-gate 		goto again;
15700Sstevel@tonic-gate 	}
15710Sstevel@tonic-gate 
15720Sstevel@tonic-gate 	/* Leave a slot for the null recid */
15730Sstevel@tonic-gate 	for (i = 0; i < (nrecids - 1); i++) {
15740Sstevel@tonic-gate 		if (recids[i] == 0) {
15750Sstevel@tonic-gate 			recids[i++] = nd.nd_hs_id;
15760Sstevel@tonic-gate 			recids[i] = 0;
15770Sstevel@tonic-gate 		}
15780Sstevel@tonic-gate 	}
15790Sstevel@tonic-gate 	return (0);
15800Sstevel@tonic-gate }
15810Sstevel@tonic-gate 
15820Sstevel@tonic-gate static int
15830Sstevel@tonic-gate stripe_imp_set(
15840Sstevel@tonic-gate 	set_t	setno
15850Sstevel@tonic-gate )
15860Sstevel@tonic-gate {
15870Sstevel@tonic-gate 
15880Sstevel@tonic-gate 	mddb_recid_t	recid;
15890Sstevel@tonic-gate 	int		i, row, c, gotsomething;
15900Sstevel@tonic-gate 	mddb_type_t	typ1;
15910Sstevel@tonic-gate 	mddb_de_ic_t	*dep;
15920Sstevel@tonic-gate 	mddb_rb32_t	*rbp;
15930Sstevel@tonic-gate 	ms_unit32_od_t	*un32;
15940Sstevel@tonic-gate 	ms_unit_t	*un64;
15950Sstevel@tonic-gate 	minor_t		*self_id;	/* minor needs to be updated */
15960Sstevel@tonic-gate 	md_parent_t	*parent_id;	/* parent needs to be updated */
15970Sstevel@tonic-gate 	mddb_recid_t	*record_id;	/* record id needs to be updated */
15980Sstevel@tonic-gate 	mddb_recid_t	*hsp_id;
15990Sstevel@tonic-gate 	ms_comp32_od_t	*comp32;
16000Sstevel@tonic-gate 	ms_comp_t	*comp64;
16010Sstevel@tonic-gate 
16020Sstevel@tonic-gate 
16030Sstevel@tonic-gate 	gotsomething = 0;
16040Sstevel@tonic-gate 
16050Sstevel@tonic-gate 	typ1 = (mddb_type_t)md_getshared_key(setno,
16060Sstevel@tonic-gate 	    stripe_md_ops.md_driver.md_drivername);
16070Sstevel@tonic-gate 	recid = mddb_makerecid(setno, 0);
16080Sstevel@tonic-gate 
16090Sstevel@tonic-gate 	while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) {
16100Sstevel@tonic-gate 		if (mddb_getrecprivate(recid) & MD_PRV_GOTIT)
16110Sstevel@tonic-gate 			continue;
16120Sstevel@tonic-gate 
16130Sstevel@tonic-gate 		dep = mddb_getrecdep(recid);
16140Sstevel@tonic-gate 		rbp = dep->de_rb;
16150Sstevel@tonic-gate 
16160Sstevel@tonic-gate 		if (rbp->rb_revision == MDDB_REV_RB) {
16170Sstevel@tonic-gate 			/*
16180Sstevel@tonic-gate 			 * Small device
16190Sstevel@tonic-gate 			 */
16200Sstevel@tonic-gate 			un32 = (ms_unit32_od_t *)mddb_getrecaddr(recid);
16210Sstevel@tonic-gate 			self_id = &(un32->c.un_self_id);
16220Sstevel@tonic-gate 			parent_id = &(un32->c.un_parent);
16230Sstevel@tonic-gate 			record_id = &(un32->c.un_record_id);
16240Sstevel@tonic-gate 			hsp_id = &(un32->un_hsp_id);
16250Sstevel@tonic-gate 
16260Sstevel@tonic-gate 			comp32 = (ms_comp32_od_t *)((void *)&((char *)un32)
16270Sstevel@tonic-gate 				[un32->un_ocomp]);
16280Sstevel@tonic-gate 			for (row = 0; row < un32->un_nrows; row++) {
16290Sstevel@tonic-gate 			    struct ms_row32_od *mdr = &un32->un_row[row];
16300Sstevel@tonic-gate 			    for (i = 0, c = mdr->un_icomp;
16310Sstevel@tonic-gate 				i < mdr->un_ncomp; i++) {
16320Sstevel@tonic-gate 				ms_comp32_od_t *mdc;
16330Sstevel@tonic-gate 				mdc = &comp32[c++];
16340Sstevel@tonic-gate 
16350Sstevel@tonic-gate 				if (!md_update_minor(setno, mddb_getsidenum
16360Sstevel@tonic-gate 				    (setno), mdc->un_key))
16370Sstevel@tonic-gate 					goto out;
16380Sstevel@tonic-gate 
16390Sstevel@tonic-gate 				if (mdc->un_mirror.ms_hs_id != 0)
16400Sstevel@tonic-gate 				    mdc->un_mirror.ms_hs_id = MAKERECID(
16410Sstevel@tonic-gate 				    setno, mdc->un_mirror.ms_hs_id);
16420Sstevel@tonic-gate 			    }
16430Sstevel@tonic-gate 			}
16440Sstevel@tonic-gate 		} else {
16450Sstevel@tonic-gate 			un64 = (ms_unit_t *)mddb_getrecaddr(recid);
16460Sstevel@tonic-gate 			self_id = &(un64->c.un_self_id);
16470Sstevel@tonic-gate 			parent_id = &(un64->c.un_parent);
16480Sstevel@tonic-gate 			record_id = &(un64->c.un_record_id);
16490Sstevel@tonic-gate 			hsp_id = &(un64->un_hsp_id);
16500Sstevel@tonic-gate 
16510Sstevel@tonic-gate 			comp64 = (ms_comp_t *)((void *)&((char *)un64)
16520Sstevel@tonic-gate 				[un64->un_ocomp]);
16530Sstevel@tonic-gate 			for (row = 0; row < un64->un_nrows; row++) {
16540Sstevel@tonic-gate 			    struct ms_row *mdr = &un64->un_row[row];
16550Sstevel@tonic-gate 			    for (i = 0, c = mdr->un_icomp;
16560Sstevel@tonic-gate 				i < mdr->un_ncomp; i++) {
16570Sstevel@tonic-gate 				ms_comp_t *mdc;
16580Sstevel@tonic-gate 				mdc = &comp64[c++];
16590Sstevel@tonic-gate 
16600Sstevel@tonic-gate 				if (!md_update_minor(setno, mddb_getsidenum
16610Sstevel@tonic-gate 				    (setno), mdc->un_key))
16620Sstevel@tonic-gate 					goto out;
16630Sstevel@tonic-gate 
16640Sstevel@tonic-gate 				if (mdc->un_mirror.ms_hs_id != 0)
16650Sstevel@tonic-gate 				    mdc->un_mirror.ms_hs_id = MAKERECID(
16660Sstevel@tonic-gate 				    setno, mdc->un_mirror.ms_hs_id);
16670Sstevel@tonic-gate 			    }
16680Sstevel@tonic-gate 			}
16690Sstevel@tonic-gate 		}
16700Sstevel@tonic-gate 
16710Sstevel@tonic-gate 		/*
16720Sstevel@tonic-gate 		 * Update unit with the imported setno
16730Sstevel@tonic-gate 		 *
16740Sstevel@tonic-gate 		 */
16750Sstevel@tonic-gate 		mddb_setrecprivate(recid, MD_PRV_GOTIT);
16760Sstevel@tonic-gate 
16770Sstevel@tonic-gate 		*self_id = MD_MKMIN(setno, MD_MIN2UNIT(*self_id));
16780Sstevel@tonic-gate 
16790Sstevel@tonic-gate 		if (*hsp_id != -1)
16800Sstevel@tonic-gate 			*hsp_id = MAKERECID(setno, DBID(*hsp_id));
16810Sstevel@tonic-gate 
16820Sstevel@tonic-gate 		if (*parent_id != MD_NO_PARENT)
16830Sstevel@tonic-gate 			*parent_id = MD_MKMIN(setno, MD_MIN2UNIT(*parent_id));
16840Sstevel@tonic-gate 		*record_id = MAKERECID(setno, DBID(*record_id));
16850Sstevel@tonic-gate 
16860Sstevel@tonic-gate 		gotsomething = 1;
16870Sstevel@tonic-gate 	}
16880Sstevel@tonic-gate 
16890Sstevel@tonic-gate out:
16900Sstevel@tonic-gate 	return (gotsomething);
16910Sstevel@tonic-gate }
16920Sstevel@tonic-gate 
16930Sstevel@tonic-gate static md_named_services_t stripe_named_services[] = {
16940Sstevel@tonic-gate 	{stripe_shared_by_blk,			"shared by blk"		    },
16950Sstevel@tonic-gate 	{stripe_shared_by_indx,			"shared by indx"	    },
16960Sstevel@tonic-gate 	{stripe_component_count,		"get component count"	    },
16970Sstevel@tonic-gate 	{stripe_block_count_skip_size,		"get block count skip size" },
16980Sstevel@tonic-gate 	{stripe_get_dev,			"get device"		    },
16990Sstevel@tonic-gate 	{stripe_replace_dev,			"replace device"	    },
17000Sstevel@tonic-gate 	{stripe_hotspare_dev,			"hotspare device"	    },
17010Sstevel@tonic-gate 	{stripe_rename_check,			MDRNM_CHECK		    },
17020Sstevel@tonic-gate 	{NULL,					0}
17030Sstevel@tonic-gate };
17040Sstevel@tonic-gate 
17050Sstevel@tonic-gate md_ops_t stripe_md_ops = {
17060Sstevel@tonic-gate 	stripe_open,		/* open */
17070Sstevel@tonic-gate 	stripe_close,		/* close */
17080Sstevel@tonic-gate 	md_stripe_strategy,	/* strategy */
17090Sstevel@tonic-gate 	NULL,			/* print */
17100Sstevel@tonic-gate 	stripe_dump,		/* dump */
17110Sstevel@tonic-gate 	NULL,			/* read */
17120Sstevel@tonic-gate 	NULL,			/* write */
17130Sstevel@tonic-gate 	md_stripe_ioctl,	/* stripe_ioctl, */
17140Sstevel@tonic-gate 	stripe_snarf,		/* stripe_snarf */
17150Sstevel@tonic-gate 	stripe_halt,		/* stripe_halt */
17160Sstevel@tonic-gate 	NULL,			/* aread */
17170Sstevel@tonic-gate 	NULL,			/* awrite */
17180Sstevel@tonic-gate 	stripe_imp_set,		/* import set */
17190Sstevel@tonic-gate 	stripe_named_services
17200Sstevel@tonic-gate };
17210Sstevel@tonic-gate 
17220Sstevel@tonic-gate static void
17230Sstevel@tonic-gate init_init()
17240Sstevel@tonic-gate {
17250Sstevel@tonic-gate 	md_stripe_mcs_buf_off = sizeof (md_scs_t) - sizeof (buf_t);
17260Sstevel@tonic-gate 
17270Sstevel@tonic-gate 	stripe_parent_cache = kmem_cache_create("md_stripe_parent",
17280Sstevel@tonic-gate 	    sizeof (md_sps_t), 0, stripe_parent_constructor,
17290Sstevel@tonic-gate 	    stripe_parent_destructor, stripe_run_queue, NULL, NULL,
17300Sstevel@tonic-gate 	    0);
17310Sstevel@tonic-gate 	stripe_child_cache = kmem_cache_create("md_stripe_child",
17320Sstevel@tonic-gate 	    sizeof (md_scs_t) - sizeof (buf_t) + biosize(), 0,
17330Sstevel@tonic-gate 	    stripe_child_constructor, stripe_child_destructor,
17340Sstevel@tonic-gate 	    stripe_run_queue, NULL, NULL, 0);
17350Sstevel@tonic-gate }
17360Sstevel@tonic-gate 
17370Sstevel@tonic-gate static void
17380Sstevel@tonic-gate fini_uninit()
17390Sstevel@tonic-gate {
17400Sstevel@tonic-gate 	kmem_cache_destroy(stripe_parent_cache);
17410Sstevel@tonic-gate 	kmem_cache_destroy(stripe_child_cache);
17420Sstevel@tonic-gate 	stripe_parent_cache = stripe_child_cache = NULL;
17430Sstevel@tonic-gate }
17440Sstevel@tonic-gate 
17450Sstevel@tonic-gate /* define the module linkage */
17460Sstevel@tonic-gate MD_PLUGIN_MISC_MODULE("stripes module %I%", init_init(), fini_uninit())
1747