xref: /onnv-gate/usr/src/uts/common/io/lvm/md/md.c (revision 0)
1*0Sstevel@tonic-gate /*
2*0Sstevel@tonic-gate  * CDDL HEADER START
3*0Sstevel@tonic-gate  *
4*0Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*0Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*0Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*0Sstevel@tonic-gate  * with the License.
8*0Sstevel@tonic-gate  *
9*0Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*0Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*0Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*0Sstevel@tonic-gate  * and limitations under the License.
13*0Sstevel@tonic-gate  *
14*0Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*0Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*0Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*0Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*0Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*0Sstevel@tonic-gate  *
20*0Sstevel@tonic-gate  * CDDL HEADER END
21*0Sstevel@tonic-gate  */
22*0Sstevel@tonic-gate /*
23*0Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*0Sstevel@tonic-gate  * Use is subject to license terms.
25*0Sstevel@tonic-gate  */
26*0Sstevel@tonic-gate 
27*0Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*0Sstevel@tonic-gate 
29*0Sstevel@tonic-gate /*
30*0Sstevel@tonic-gate  * Md - is the meta-disk driver.   It sits below the UFS file system
31*0Sstevel@tonic-gate  * but above the 'real' disk drivers, xy, id, sd etc.
32*0Sstevel@tonic-gate  *
33*0Sstevel@tonic-gate  * To the UFS software, md looks like a normal driver, since it has
34*0Sstevel@tonic-gate  * the normal kinds of entries in the bdevsw and cdevsw arrays. So
35*0Sstevel@tonic-gate  * UFS accesses md in the usual ways.  In particular, the strategy
36*0Sstevel@tonic-gate  * routine, mdstrategy(), gets called by fbiwrite(), ufs_getapage(),
37*0Sstevel@tonic-gate  * and ufs_writelbn().
38*0Sstevel@tonic-gate  *
39*0Sstevel@tonic-gate  * Md maintains an array of minor devices (meta-partitions).   Each
40*0Sstevel@tonic-gate  * meta partition stands for a matrix of real partitions, in rows
41*0Sstevel@tonic-gate  * which are not necessarily of equal length.	Md maintains a table,
42*0Sstevel@tonic-gate  * with one entry for each meta-partition,  which lists the rows and
43*0Sstevel@tonic-gate  * columns of actual partitions, and the job of the strategy routine
44*0Sstevel@tonic-gate  * is to translate from the meta-partition device and block numbers
45*0Sstevel@tonic-gate  * known to UFS into the actual partitions' device and block numbers.
46*0Sstevel@tonic-gate  *
47*0Sstevel@tonic-gate  * See below, in mdstrategy(), mdreal(), and mddone() for details of
48*0Sstevel@tonic-gate  * this translation.
49*0Sstevel@tonic-gate  */
50*0Sstevel@tonic-gate 
51*0Sstevel@tonic-gate /*
52*0Sstevel@tonic-gate  * Driver for Virtual Disk.
53*0Sstevel@tonic-gate  */
54*0Sstevel@tonic-gate 
55*0Sstevel@tonic-gate #include <sys/user.h>
56*0Sstevel@tonic-gate #include <sys/sysmacros.h>
57*0Sstevel@tonic-gate #include <sys/conf.h>
58*0Sstevel@tonic-gate #include <sys/stat.h>
59*0Sstevel@tonic-gate #include <sys/errno.h>
60*0Sstevel@tonic-gate #include <sys/param.h>
61*0Sstevel@tonic-gate #include <sys/systm.h>
62*0Sstevel@tonic-gate #include <sys/file.h>
63*0Sstevel@tonic-gate #include <sys/open.h>
64*0Sstevel@tonic-gate #include <sys/dkio.h>
65*0Sstevel@tonic-gate #include <sys/vtoc.h>
66*0Sstevel@tonic-gate #include <sys/cmn_err.h>
67*0Sstevel@tonic-gate #include <sys/ddi.h>
68*0Sstevel@tonic-gate #include <sys/sunddi.h>
69*0Sstevel@tonic-gate #include <sys/debug.h>
70*0Sstevel@tonic-gate #include <sys/utsname.h>
71*0Sstevel@tonic-gate #include <sys/lvm/mdvar.h>
72*0Sstevel@tonic-gate #include <sys/lvm/md_names.h>
73*0Sstevel@tonic-gate #include <sys/lvm/md_mddb.h>
74*0Sstevel@tonic-gate #include <sys/lvm/md_sp.h>
75*0Sstevel@tonic-gate #include <sys/types.h>
76*0Sstevel@tonic-gate #include <sys/kmem.h>
77*0Sstevel@tonic-gate #include <sys/cladm.h>
78*0Sstevel@tonic-gate #include <sys/priv_names.h>
79*0Sstevel@tonic-gate 
80*0Sstevel@tonic-gate #ifndef	lint
81*0Sstevel@tonic-gate static char _depends_on[] = "strmod/rpcmod";
82*0Sstevel@tonic-gate #endif	/* lint */
83*0Sstevel@tonic-gate int		md_init_debug	= 0;	/* module binding debug */
84*0Sstevel@tonic-gate 
85*0Sstevel@tonic-gate /*
86*0Sstevel@tonic-gate  * Tunable to turn off the failfast behavior.
87*0Sstevel@tonic-gate  */
88*0Sstevel@tonic-gate int		md_ff_disable = 0;
89*0Sstevel@tonic-gate 
90*0Sstevel@tonic-gate md_krwlock_t	md_unit_array_rw;	/* protects all unit arrays */
91*0Sstevel@tonic-gate md_krwlock_t	nm_lock;		/* protects all the name spaces */
92*0Sstevel@tonic-gate 
93*0Sstevel@tonic-gate md_resync_t	md_cpr_resync;
94*0Sstevel@tonic-gate 
95*0Sstevel@tonic-gate extern char	svm_bootpath[];
96*0Sstevel@tonic-gate #define	SVM_PSEUDO_STR	"/pseudo/md@0:"
97*0Sstevel@tonic-gate 
98*0Sstevel@tonic-gate #define		VERSION_LENGTH	6
99*0Sstevel@tonic-gate #define		VERSION		"1.0"
100*0Sstevel@tonic-gate 
101*0Sstevel@tonic-gate /*
102*0Sstevel@tonic-gate  * Keep track of possible 'orphan' entries in the name space
103*0Sstevel@tonic-gate  */
104*0Sstevel@tonic-gate int		*md_nm_snarfed = NULL;
105*0Sstevel@tonic-gate 
106*0Sstevel@tonic-gate /*
107*0Sstevel@tonic-gate  * Global tunable giving the percentage of free space left in replica during
108*0Sstevel@tonic-gate  * conversion of non-devid style replica to devid style replica.
109*0Sstevel@tonic-gate  */
110*0Sstevel@tonic-gate int		md_conv_perc = MDDB_DEVID_CONV_PERC;
111*0Sstevel@tonic-gate 
112*0Sstevel@tonic-gate #ifdef	DEBUG
113*0Sstevel@tonic-gate /* debug code to verify framework exclusion guarantees */
114*0Sstevel@tonic-gate int		md_in;
115*0Sstevel@tonic-gate kmutex_t	md_in_mx;			/* used to md global stuff */
116*0Sstevel@tonic-gate #define	IN_INIT		0x01
117*0Sstevel@tonic-gate #define	IN_FINI		0x02
118*0Sstevel@tonic-gate #define	IN_ATTACH	0x04
119*0Sstevel@tonic-gate #define	IN_DETACH	0x08
120*0Sstevel@tonic-gate #define	IN_OPEN		0x10
121*0Sstevel@tonic-gate #define	MD_SET_IN(x) {						\
122*0Sstevel@tonic-gate 	mutex_enter(&md_in_mx);					\
123*0Sstevel@tonic-gate 	if (md_in)						\
124*0Sstevel@tonic-gate 		debug_enter("MD_SET_IN exclusion lost");	\
125*0Sstevel@tonic-gate 	if (md_in & x)						\
126*0Sstevel@tonic-gate 		debug_enter("MD_SET_IN already set");		\
127*0Sstevel@tonic-gate 	md_in |= x;						\
128*0Sstevel@tonic-gate 	mutex_exit(&md_in_mx);					\
129*0Sstevel@tonic-gate }
130*0Sstevel@tonic-gate 
131*0Sstevel@tonic-gate #define	MD_CLR_IN(x) {						\
132*0Sstevel@tonic-gate 	mutex_enter(&md_in_mx);					\
133*0Sstevel@tonic-gate 	if (md_in & ~(x))					\
134*0Sstevel@tonic-gate 		debug_enter("MD_CLR_IN exclusion lost");	\
135*0Sstevel@tonic-gate 	if (!(md_in & x))					\
136*0Sstevel@tonic-gate 		debug_enter("MD_CLR_IN already clr");		\
137*0Sstevel@tonic-gate 	md_in &= ~x;						\
138*0Sstevel@tonic-gate 	mutex_exit(&md_in_mx);					\
139*0Sstevel@tonic-gate }
140*0Sstevel@tonic-gate #else	/* DEBUG */
141*0Sstevel@tonic-gate #define	MD_SET_IN(x)
142*0Sstevel@tonic-gate #define	MD_CLR_IN(x)
143*0Sstevel@tonic-gate #endif	/* DEBUG */
144*0Sstevel@tonic-gate hrtime_t savetime1, savetime2;
145*0Sstevel@tonic-gate 
146*0Sstevel@tonic-gate 
147*0Sstevel@tonic-gate /*
148*0Sstevel@tonic-gate  * list things protected by md_mx even if they aren't
149*0Sstevel@tonic-gate  * used in this file.
150*0Sstevel@tonic-gate  */
151*0Sstevel@tonic-gate kmutex_t	md_mx;			/* used to md global stuff */
152*0Sstevel@tonic-gate kcondvar_t	md_cv;			/* md_status events */
153*0Sstevel@tonic-gate int		md_status = 0;		/* global status for the meta-driver */
154*0Sstevel@tonic-gate int		md_num_daemons = 0;
155*0Sstevel@tonic-gate int		md_ioctl_cnt = 0;
156*0Sstevel@tonic-gate int		md_mtioctl_cnt = 0;	/* multithreaded ioctl cnt */
157*0Sstevel@tonic-gate uint_t		md_mdelay = 10;		/* variable so can be patched */
158*0Sstevel@tonic-gate 
159*0Sstevel@tonic-gate int		(*mdv_strategy_tstpnt)(buf_t *, int, void*);
160*0Sstevel@tonic-gate 
161*0Sstevel@tonic-gate major_t		md_major, md_major_targ;
162*0Sstevel@tonic-gate 
163*0Sstevel@tonic-gate unit_t		md_nunits = MD_MAXUNITS;
164*0Sstevel@tonic-gate set_t		md_nsets = MD_MAXSETS;
165*0Sstevel@tonic-gate int		md_nmedh = 0;
166*0Sstevel@tonic-gate char		*md_med_trans_lst = NULL;
167*0Sstevel@tonic-gate md_set_t	md_set[MD_MAXSETS];
168*0Sstevel@tonic-gate md_set_io_t	md_set_io[MD_MAXSETS];
169*0Sstevel@tonic-gate 
170*0Sstevel@tonic-gate md_krwlock_t	hsp_rwlp;		/* protects hot_spare_interface */
171*0Sstevel@tonic-gate md_krwlock_t	ni_rwlp;		/* protects notify_interface */
172*0Sstevel@tonic-gate md_ops_t	**md_ops;
173*0Sstevel@tonic-gate ddi_modhandle_t	*md_mods;
174*0Sstevel@tonic-gate md_ops_t	*md_opslist;
175*0Sstevel@tonic-gate clock_t		md_hz;
176*0Sstevel@tonic-gate md_event_queue_t	*md_event_queue = NULL;
177*0Sstevel@tonic-gate 
178*0Sstevel@tonic-gate int		md_in_upgrade;
179*0Sstevel@tonic-gate int		md_keep_repl_state;
180*0Sstevel@tonic-gate int		md_devid_destroy;
181*0Sstevel@tonic-gate 
182*0Sstevel@tonic-gate /* for sending messages thru a door to userland */
183*0Sstevel@tonic-gate door_handle_t	mdmn_door_handle = NULL;
184*0Sstevel@tonic-gate int		mdmn_door_did = -1;
185*0Sstevel@tonic-gate 
186*0Sstevel@tonic-gate dev_info_t		*md_devinfo = NULL;
187*0Sstevel@tonic-gate 
188*0Sstevel@tonic-gate md_mn_nodeid_t	md_mn_mynode_id = ~0u;	/* My node id (for multi-node sets) */
189*0Sstevel@tonic-gate 
190*0Sstevel@tonic-gate static	uint_t		md_ocnt[OTYPCNT];
191*0Sstevel@tonic-gate 
192*0Sstevel@tonic-gate static int		mdinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
193*0Sstevel@tonic-gate static int		mdattach(dev_info_t *, ddi_attach_cmd_t);
194*0Sstevel@tonic-gate static int		mddetach(dev_info_t *, ddi_detach_cmd_t);
195*0Sstevel@tonic-gate static int		mdopen(dev_t *, int, int, cred_t *);
196*0Sstevel@tonic-gate static int		mdclose(dev_t, int, int, cred_t *);
197*0Sstevel@tonic-gate static int		mddump(dev_t, caddr_t, daddr_t, int);
198*0Sstevel@tonic-gate static int		mdread(dev_t, struct uio *, cred_t *);
199*0Sstevel@tonic-gate static int		mdwrite(dev_t, struct uio *, cred_t *);
200*0Sstevel@tonic-gate static int		mdaread(dev_t, struct aio_req *, cred_t *);
201*0Sstevel@tonic-gate static int		mdawrite(dev_t, struct aio_req *, cred_t *);
202*0Sstevel@tonic-gate static int		mdioctl(dev_t, int, intptr_t, int, cred_t *, int *);
203*0Sstevel@tonic-gate static int		mdprop_op(dev_t, dev_info_t *,
204*0Sstevel@tonic-gate 				ddi_prop_op_t, int, char *, caddr_t, int *);
205*0Sstevel@tonic-gate 
206*0Sstevel@tonic-gate static struct cb_ops md_cb_ops = {
207*0Sstevel@tonic-gate 	mdopen,			/* open */
208*0Sstevel@tonic-gate 	mdclose,		/* close */
209*0Sstevel@tonic-gate 	mdstrategy,		/* strategy */
210*0Sstevel@tonic-gate 				/* print routine -- none yet */
211*0Sstevel@tonic-gate 	(int(*)(dev_t, char *))nulldev,
212*0Sstevel@tonic-gate 	mddump,			/* dump */
213*0Sstevel@tonic-gate 	mdread,			/* read */
214*0Sstevel@tonic-gate 	mdwrite,		/* write */
215*0Sstevel@tonic-gate 	mdioctl,		/* ioctl */
216*0Sstevel@tonic-gate 				/* devmap */
217*0Sstevel@tonic-gate 	(int(*)(dev_t, devmap_cookie_t, offset_t, size_t, size_t *,
218*0Sstevel@tonic-gate 			uint_t))nodev,
219*0Sstevel@tonic-gate 				/* mmap */
220*0Sstevel@tonic-gate 	(int(*)(dev_t, off_t, int))nodev,
221*0Sstevel@tonic-gate 				/* segmap */
222*0Sstevel@tonic-gate 	(int(*)(dev_t, off_t, struct as *, caddr_t *, off_t, unsigned,
223*0Sstevel@tonic-gate 		unsigned, unsigned, cred_t *))nodev,
224*0Sstevel@tonic-gate 	nochpoll,		/* poll */
225*0Sstevel@tonic-gate 	mdprop_op,		/* prop_op */
226*0Sstevel@tonic-gate 	0,			/* streamtab */
227*0Sstevel@tonic-gate 	(D_64BIT|D_MP|D_NEW),	/* driver compatibility flag */
228*0Sstevel@tonic-gate 	CB_REV,			/* cb_ops version */
229*0Sstevel@tonic-gate 	mdaread,		/* aread */
230*0Sstevel@tonic-gate 	mdawrite,		/* awrite */
231*0Sstevel@tonic-gate };
232*0Sstevel@tonic-gate 
233*0Sstevel@tonic-gate static struct dev_ops md_devops = {
234*0Sstevel@tonic-gate 	DEVO_REV,		/* dev_ops version */
235*0Sstevel@tonic-gate 	0,			/* device reference count */
236*0Sstevel@tonic-gate 	mdinfo,			/* info routine */
237*0Sstevel@tonic-gate 	nulldev,		/* identify routine */
238*0Sstevel@tonic-gate 	nulldev,		/* probe - not defined */
239*0Sstevel@tonic-gate 	mdattach,		/* attach routine */
240*0Sstevel@tonic-gate 	mddetach,		/* detach routine */
241*0Sstevel@tonic-gate 	nodev,			/* reset - not defined */
242*0Sstevel@tonic-gate 	&md_cb_ops,		/* driver operations */
243*0Sstevel@tonic-gate 	NULL,			/* bus operations */
244*0Sstevel@tonic-gate 	nodev			/* power management */
245*0Sstevel@tonic-gate };
246*0Sstevel@tonic-gate 
247*0Sstevel@tonic-gate /*
248*0Sstevel@tonic-gate  * loadable module wrapper
249*0Sstevel@tonic-gate  */
250*0Sstevel@tonic-gate #include <sys/modctl.h>
251*0Sstevel@tonic-gate 
252*0Sstevel@tonic-gate static struct modldrv modldrv = {
253*0Sstevel@tonic-gate 	&mod_driverops,			/* type of module -- a pseudodriver */
254*0Sstevel@tonic-gate 	"Solaris Volume Manager base module %I%", /* name of the module */
255*0Sstevel@tonic-gate 	&md_devops,			/* driver ops */
256*0Sstevel@tonic-gate };
257*0Sstevel@tonic-gate 
258*0Sstevel@tonic-gate static struct modlinkage modlinkage = {
259*0Sstevel@tonic-gate 	MODREV_1,
260*0Sstevel@tonic-gate 	(void *)&modldrv,
261*0Sstevel@tonic-gate 	NULL
262*0Sstevel@tonic-gate };
263*0Sstevel@tonic-gate 
264*0Sstevel@tonic-gate 
265*0Sstevel@tonic-gate /* md_medd.c */
266*0Sstevel@tonic-gate extern	void	med_init(void);
267*0Sstevel@tonic-gate extern	void	med_fini(void);
268*0Sstevel@tonic-gate extern  void	md_devid_cleanup(set_t, uint_t);
269*0Sstevel@tonic-gate 
270*0Sstevel@tonic-gate /* md_names.c */
271*0Sstevel@tonic-gate extern void			*lookup_entry(struct nm_next_hdr *, set_t,
272*0Sstevel@tonic-gate 					side_t, mdkey_t, md_dev64_t, int);
273*0Sstevel@tonic-gate extern struct nm_next_hdr	*get_first_record(set_t, int, int);
274*0Sstevel@tonic-gate extern int			remove_entry(struct nm_next_hdr *,
275*0Sstevel@tonic-gate 					side_t, mdkey_t, int);
276*0Sstevel@tonic-gate 
277*0Sstevel@tonic-gate int		md_maxphys	= 0;	/* maximum io size in bytes */
278*0Sstevel@tonic-gate #define		MD_MAXBCOUNT	(1024 * 1024)
279*0Sstevel@tonic-gate unsigned	md_maxbcount	= 0;	/* maximum physio size in bytes */
280*0Sstevel@tonic-gate 
281*0Sstevel@tonic-gate /* allocate/free dynamic space associated with driver globals */
282*0Sstevel@tonic-gate void
283*0Sstevel@tonic-gate md_global_alloc_free(int alloc)
284*0Sstevel@tonic-gate {
285*0Sstevel@tonic-gate 	set_t	s;
286*0Sstevel@tonic-gate 
287*0Sstevel@tonic-gate 	if (alloc) {
288*0Sstevel@tonic-gate 		/* initialize driver global locks */
289*0Sstevel@tonic-gate 		cv_init(&md_cv, NULL, CV_DEFAULT, NULL);
290*0Sstevel@tonic-gate 		mutex_init(&md_mx, NULL, MUTEX_DEFAULT, NULL);
291*0Sstevel@tonic-gate 		rw_init(&md_unit_array_rw.lock, NULL, RW_DEFAULT, NULL);
292*0Sstevel@tonic-gate 		rw_init(&nm_lock.lock, NULL, RW_DEFAULT, NULL);
293*0Sstevel@tonic-gate 		rw_init(&ni_rwlp.lock, NULL, RW_DRIVER, NULL);
294*0Sstevel@tonic-gate 		rw_init(&hsp_rwlp.lock, NULL, RW_DRIVER, NULL);
295*0Sstevel@tonic-gate 		mutex_init(&md_cpr_resync.md_resync_mutex, NULL,
296*0Sstevel@tonic-gate 			MUTEX_DEFAULT, NULL);
297*0Sstevel@tonic-gate 
298*0Sstevel@tonic-gate 		/* initialize per set driver global locks */
299*0Sstevel@tonic-gate 		for (s = 0; s < MD_MAXSETS; s++) {
300*0Sstevel@tonic-gate 			/* initialize per set driver globals locks */
301*0Sstevel@tonic-gate 			mutex_init(&md_set[s].s_dbmx,
302*0Sstevel@tonic-gate 			    NULL, MUTEX_DEFAULT, NULL);
303*0Sstevel@tonic-gate 			mutex_init(&md_set_io[s].md_io_mx,
304*0Sstevel@tonic-gate 			    NULL, MUTEX_DEFAULT, NULL);
305*0Sstevel@tonic-gate 			cv_init(&md_set_io[s].md_io_cv,
306*0Sstevel@tonic-gate 			    NULL, CV_DEFAULT, NULL);
307*0Sstevel@tonic-gate 		}
308*0Sstevel@tonic-gate 	} else {
309*0Sstevel@tonic-gate 		/* destroy per set driver global locks */
310*0Sstevel@tonic-gate 		for (s = 0; s < MD_MAXSETS; s++) {
311*0Sstevel@tonic-gate 			cv_destroy(&md_set_io[s].md_io_cv);
312*0Sstevel@tonic-gate 			mutex_destroy(&md_set_io[s].md_io_mx);
313*0Sstevel@tonic-gate 			mutex_destroy(&md_set[s].s_dbmx);
314*0Sstevel@tonic-gate 		}
315*0Sstevel@tonic-gate 
316*0Sstevel@tonic-gate 		/* destroy driver global locks */
317*0Sstevel@tonic-gate 		mutex_destroy(&md_cpr_resync.md_resync_mutex);
318*0Sstevel@tonic-gate 		rw_destroy(&hsp_rwlp.lock);
319*0Sstevel@tonic-gate 		rw_destroy(&ni_rwlp.lock);
320*0Sstevel@tonic-gate 		rw_destroy(&nm_lock.lock);
321*0Sstevel@tonic-gate 		rw_destroy(&md_unit_array_rw.lock);
322*0Sstevel@tonic-gate 		mutex_destroy(&md_mx);
323*0Sstevel@tonic-gate 		cv_destroy(&md_cv);
324*0Sstevel@tonic-gate 	}
325*0Sstevel@tonic-gate }
326*0Sstevel@tonic-gate 
327*0Sstevel@tonic-gate int
328*0Sstevel@tonic-gate _init(void)
329*0Sstevel@tonic-gate {
330*0Sstevel@tonic-gate 	set_t	s;
331*0Sstevel@tonic-gate 	int	err;
332*0Sstevel@tonic-gate 
333*0Sstevel@tonic-gate 	MD_SET_IN(IN_INIT);
334*0Sstevel@tonic-gate 
335*0Sstevel@tonic-gate 	/* allocate dynamic space associated with driver globals */
336*0Sstevel@tonic-gate 	md_global_alloc_free(1);
337*0Sstevel@tonic-gate 
338*0Sstevel@tonic-gate 	/* initialize driver globals */
339*0Sstevel@tonic-gate 	md_major = ddi_name_to_major("md");
340*0Sstevel@tonic-gate 	md_hz = drv_usectohz(NUM_USEC_IN_SEC);
341*0Sstevel@tonic-gate 
342*0Sstevel@tonic-gate 	/* initialize tunable globals */
343*0Sstevel@tonic-gate 	if (md_maxphys == 0)		/* maximum io size in bytes */
344*0Sstevel@tonic-gate 		md_maxphys = maxphys;
345*0Sstevel@tonic-gate 	if (md_maxbcount == 0)		/* maximum physio size in bytes */
346*0Sstevel@tonic-gate 		md_maxbcount = MD_MAXBCOUNT;
347*0Sstevel@tonic-gate 
348*0Sstevel@tonic-gate 	/* initialize per set driver globals */
349*0Sstevel@tonic-gate 	for (s = 0; s < MD_MAXSETS; s++)
350*0Sstevel@tonic-gate 		md_set_io[s].io_state = MD_SET_ACTIVE;
351*0Sstevel@tonic-gate 
352*0Sstevel@tonic-gate 	/*
353*0Sstevel@tonic-gate 	 * NOTE: the framework does not currently guarantee exclusion
354*0Sstevel@tonic-gate 	 * between _init and attach after calling mod_install.
355*0Sstevel@tonic-gate 	 */
356*0Sstevel@tonic-gate 	MD_CLR_IN(IN_INIT);
357*0Sstevel@tonic-gate 	if ((err = mod_install(&modlinkage))) {
358*0Sstevel@tonic-gate 		MD_SET_IN(IN_INIT);
359*0Sstevel@tonic-gate 		md_global_alloc_free(0);	/* free dynamic space */
360*0Sstevel@tonic-gate 		MD_CLR_IN(IN_INIT);
361*0Sstevel@tonic-gate 	}
362*0Sstevel@tonic-gate 	return (err);
363*0Sstevel@tonic-gate }
364*0Sstevel@tonic-gate 
365*0Sstevel@tonic-gate int
366*0Sstevel@tonic-gate _fini(void)
367*0Sstevel@tonic-gate {
368*0Sstevel@tonic-gate 	int	err;
369*0Sstevel@tonic-gate 
370*0Sstevel@tonic-gate 	/*
371*0Sstevel@tonic-gate 	 * NOTE: the framework currently does not guarantee exclusion
372*0Sstevel@tonic-gate 	 * with attach until after mod_remove returns 0.
373*0Sstevel@tonic-gate 	 */
374*0Sstevel@tonic-gate 	if ((err = mod_remove(&modlinkage)))
375*0Sstevel@tonic-gate 		return (err);
376*0Sstevel@tonic-gate 
377*0Sstevel@tonic-gate 	MD_SET_IN(IN_FINI);
378*0Sstevel@tonic-gate 	md_global_alloc_free(0);	/* free dynamic space */
379*0Sstevel@tonic-gate 	MD_CLR_IN(IN_FINI);
380*0Sstevel@tonic-gate 	return (err);
381*0Sstevel@tonic-gate }
382*0Sstevel@tonic-gate 
383*0Sstevel@tonic-gate int
384*0Sstevel@tonic-gate _info(struct modinfo *modinfop)
385*0Sstevel@tonic-gate {
386*0Sstevel@tonic-gate 	return (mod_info(&modlinkage, modinfop));
387*0Sstevel@tonic-gate }
388*0Sstevel@tonic-gate 
389*0Sstevel@tonic-gate /* ARGSUSED */
390*0Sstevel@tonic-gate static int
391*0Sstevel@tonic-gate mdattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
392*0Sstevel@tonic-gate {
393*0Sstevel@tonic-gate 	int	len;
394*0Sstevel@tonic-gate 	unit_t	i;
395*0Sstevel@tonic-gate 	size_t	sz;
396*0Sstevel@tonic-gate 	char	ver[VERSION_LENGTH];
397*0Sstevel@tonic-gate 	char	**maj_str_array;
398*0Sstevel@tonic-gate 	char	*str, *str2;
399*0Sstevel@tonic-gate 
400*0Sstevel@tonic-gate 	MD_SET_IN(IN_ATTACH);
401*0Sstevel@tonic-gate 	md_in_upgrade = 0;
402*0Sstevel@tonic-gate 	md_keep_repl_state = 0;
403*0Sstevel@tonic-gate 	md_devid_destroy = 0;
404*0Sstevel@tonic-gate 
405*0Sstevel@tonic-gate 	if (cmd != DDI_ATTACH) {
406*0Sstevel@tonic-gate 		MD_CLR_IN(IN_ATTACH);
407*0Sstevel@tonic-gate 		return (DDI_FAILURE);
408*0Sstevel@tonic-gate 	}
409*0Sstevel@tonic-gate 
410*0Sstevel@tonic-gate 	if (md_devinfo != NULL) {
411*0Sstevel@tonic-gate 		MD_CLR_IN(IN_ATTACH);
412*0Sstevel@tonic-gate 		return (DDI_FAILURE);
413*0Sstevel@tonic-gate 	}
414*0Sstevel@tonic-gate 
415*0Sstevel@tonic-gate 	mddb_init();
416*0Sstevel@tonic-gate 
417*0Sstevel@tonic-gate 	if (md_start_daemons(TRUE)) {
418*0Sstevel@tonic-gate 		MD_CLR_IN(IN_ATTACH);
419*0Sstevel@tonic-gate 		mddb_unload();		/* undo mddb_init() allocations */
420*0Sstevel@tonic-gate 		return (DDI_FAILURE);
421*0Sstevel@tonic-gate 	}
422*0Sstevel@tonic-gate 
423*0Sstevel@tonic-gate 	/* clear the halted state */
424*0Sstevel@tonic-gate 	md_clr_status(MD_GBL_HALTED);
425*0Sstevel@tonic-gate 
426*0Sstevel@tonic-gate 	/* see if the diagnostic switch is on */
427*0Sstevel@tonic-gate 	if (ddi_prop_get_int(DDI_DEV_T_ANY, dip,
428*0Sstevel@tonic-gate 	    DDI_PROP_DONTPASS, "md_init_debug", 0))
429*0Sstevel@tonic-gate 		md_init_debug++;
430*0Sstevel@tonic-gate 
431*0Sstevel@tonic-gate 	/* see if the failfast disable switch is on */
432*0Sstevel@tonic-gate 	if (ddi_prop_get_int(DDI_DEV_T_ANY, dip,
433*0Sstevel@tonic-gate 	    DDI_PROP_DONTPASS, "md_ff_disable", 0))
434*0Sstevel@tonic-gate 		md_ff_disable++;
435*0Sstevel@tonic-gate 
436*0Sstevel@tonic-gate 	/* try and get the md_nmedh property */
437*0Sstevel@tonic-gate 	md_nmedh = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
438*0Sstevel@tonic-gate 	    DDI_PROP_DONTPASS, "md_nmedh", MED_DEF_HOSTS);
439*0Sstevel@tonic-gate 	if ((md_nmedh <= 0) || (md_nmedh > MED_MAX_HOSTS))
440*0Sstevel@tonic-gate 		md_nmedh = MED_DEF_HOSTS;
441*0Sstevel@tonic-gate 
442*0Sstevel@tonic-gate 	/* try and get the md_med_trans_lst property */
443*0Sstevel@tonic-gate 	len = 0;
444*0Sstevel@tonic-gate 	if (ddi_prop_op(DDI_DEV_T_ANY, dip, PROP_LEN,
445*0Sstevel@tonic-gate 	    0, "md_med_trans_lst", NULL, &len) != DDI_PROP_SUCCESS ||
446*0Sstevel@tonic-gate 	    len == 0) {
447*0Sstevel@tonic-gate 		md_med_trans_lst = md_strdup("tcp");
448*0Sstevel@tonic-gate 	} else {
449*0Sstevel@tonic-gate 		md_med_trans_lst = kmem_zalloc((size_t)len, KM_SLEEP);
450*0Sstevel@tonic-gate 		if (ddi_prop_op(DDI_DEV_T_ANY, dip, PROP_LEN_AND_VAL_BUF,
451*0Sstevel@tonic-gate 		    0, "md_med_trans_lst", md_med_trans_lst, &len) !=
452*0Sstevel@tonic-gate 		    DDI_PROP_SUCCESS) {
453*0Sstevel@tonic-gate 			kmem_free(md_med_trans_lst, (size_t)len);
454*0Sstevel@tonic-gate 			md_med_trans_lst = md_strdup("tcp");
455*0Sstevel@tonic-gate 		}
456*0Sstevel@tonic-gate 	}
457*0Sstevel@tonic-gate 
458*0Sstevel@tonic-gate 	/* try and get the md_xlate property */
459*0Sstevel@tonic-gate 	/* Should we only do this if upgrade? */
460*0Sstevel@tonic-gate 	len = sizeof (char) * 5;
461*0Sstevel@tonic-gate 	if (ddi_prop_op(DDI_DEV_T_ANY, dip, PROP_LEN_AND_VAL_BUF,
462*0Sstevel@tonic-gate 	    0, "md_xlate_ver", ver, &len) == DDI_PROP_SUCCESS) {
463*0Sstevel@tonic-gate 		if (strcmp(ver, VERSION) == 0) {
464*0Sstevel@tonic-gate 			len = 0;
465*0Sstevel@tonic-gate 			if (ddi_prop_op(DDI_DEV_T_ANY, dip,
466*0Sstevel@tonic-gate 			    PROP_LEN_AND_VAL_ALLOC, 0, "md_xlate",
467*0Sstevel@tonic-gate 			    (caddr_t)&md_tuple_table, &len) !=
468*0Sstevel@tonic-gate 			    DDI_PROP_SUCCESS) {
469*0Sstevel@tonic-gate 				if (md_init_debug)
470*0Sstevel@tonic-gate 					cmn_err(CE_WARN,
471*0Sstevel@tonic-gate 					    "md_xlate ddi_prop_op failed");
472*0Sstevel@tonic-gate 				goto attach_failure;
473*0Sstevel@tonic-gate 			} else {
474*0Sstevel@tonic-gate 				md_tuple_length =
475*0Sstevel@tonic-gate 				    len/(2 * ((int)sizeof (dev32_t)));
476*0Sstevel@tonic-gate 				md_in_upgrade = 1;
477*0Sstevel@tonic-gate 			}
478*0Sstevel@tonic-gate 
479*0Sstevel@tonic-gate 			/* Get target's name to major table */
480*0Sstevel@tonic-gate 			if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY,
481*0Sstevel@tonic-gate 			    dip, DDI_PROP_DONTPASS,
482*0Sstevel@tonic-gate 			    "md_targ_nm_table", &maj_str_array,
483*0Sstevel@tonic-gate 			    &md_majortab_len) != DDI_PROP_SUCCESS) {
484*0Sstevel@tonic-gate 				md_majortab_len = 0;
485*0Sstevel@tonic-gate 				if (md_init_debug)
486*0Sstevel@tonic-gate 				    cmn_err(CE_WARN, "md_targ_nm_table "
487*0Sstevel@tonic-gate 				    "ddi_prop_lookup_string_array failed");
488*0Sstevel@tonic-gate 				goto attach_failure;
489*0Sstevel@tonic-gate 			}
490*0Sstevel@tonic-gate 
491*0Sstevel@tonic-gate 			md_major_tuple_table =
492*0Sstevel@tonic-gate 			    (struct md_xlate_major_table *)
493*0Sstevel@tonic-gate 			    kmem_zalloc(md_majortab_len *
494*0Sstevel@tonic-gate 			    sizeof (struct md_xlate_major_table), KM_SLEEP);
495*0Sstevel@tonic-gate 
496*0Sstevel@tonic-gate 			for (i = 0; i < md_majortab_len; i++) {
497*0Sstevel@tonic-gate 				/* Getting major name */
498*0Sstevel@tonic-gate 				str = strchr(maj_str_array[i], ' ');
499*0Sstevel@tonic-gate 				if (str == NULL)
500*0Sstevel@tonic-gate 					continue;
501*0Sstevel@tonic-gate 				*str = '\0';
502*0Sstevel@tonic-gate 				md_major_tuple_table[i].drv_name =
503*0Sstevel@tonic-gate 				    md_strdup(maj_str_array[i]);
504*0Sstevel@tonic-gate 
505*0Sstevel@tonic-gate 				/* Simplified atoi to get major number */
506*0Sstevel@tonic-gate 				str2 = str + 1;
507*0Sstevel@tonic-gate 				md_major_tuple_table[i].targ_maj = 0;
508*0Sstevel@tonic-gate 				while ((*str2 >= '0') && (*str2 <= '9')) {
509*0Sstevel@tonic-gate 				    md_major_tuple_table[i].targ_maj *= 10;
510*0Sstevel@tonic-gate 				    md_major_tuple_table[i].targ_maj +=
511*0Sstevel@tonic-gate 					*str2++ - '0';
512*0Sstevel@tonic-gate 				}
513*0Sstevel@tonic-gate 				*str = ' ';
514*0Sstevel@tonic-gate 			}
515*0Sstevel@tonic-gate 			ddi_prop_free((void *)maj_str_array);
516*0Sstevel@tonic-gate 		} else {
517*0Sstevel@tonic-gate 			if (md_init_debug)
518*0Sstevel@tonic-gate 				cmn_err(CE_WARN, "md_xlate_ver is incorrect");
519*0Sstevel@tonic-gate 			goto attach_failure;
520*0Sstevel@tonic-gate 		}
521*0Sstevel@tonic-gate 	}
522*0Sstevel@tonic-gate 
523*0Sstevel@tonic-gate 	/*
524*0Sstevel@tonic-gate 	 * Check for properties:
525*0Sstevel@tonic-gate 	 * 	md_keep_repl_state and md_devid_destroy
526*0Sstevel@tonic-gate 	 * and set globals if these exist.
527*0Sstevel@tonic-gate 	 */
528*0Sstevel@tonic-gate 	md_keep_repl_state = ddi_getprop(DDI_DEV_T_ANY, dip,
529*0Sstevel@tonic-gate 				    0, "md_keep_repl_state", 0);
530*0Sstevel@tonic-gate 
531*0Sstevel@tonic-gate 	md_devid_destroy = ddi_getprop(DDI_DEV_T_ANY, dip,
532*0Sstevel@tonic-gate 				    0, "md_devid_destroy", 0);
533*0Sstevel@tonic-gate 
534*0Sstevel@tonic-gate 	if (MD_UPGRADE)
535*0Sstevel@tonic-gate 		md_major_targ = md_targ_name_to_major("md");
536*0Sstevel@tonic-gate 	else
537*0Sstevel@tonic-gate 		md_major_targ = 0;
538*0Sstevel@tonic-gate 
539*0Sstevel@tonic-gate 	/* alloc md_ops and md_mods struct */
540*0Sstevel@tonic-gate 	md_ops = (md_ops_t **)kmem_zalloc(
541*0Sstevel@tonic-gate 	    sizeof (md_ops_t *) * MD_NOPS, KM_SLEEP);
542*0Sstevel@tonic-gate 	md_mods = (ddi_modhandle_t *)kmem_zalloc(
543*0Sstevel@tonic-gate 	    sizeof (ddi_modhandle_t) * MD_NOPS, KM_SLEEP);
544*0Sstevel@tonic-gate 
545*0Sstevel@tonic-gate 	/* allocate admin device node */
546*0Sstevel@tonic-gate 	if (ddi_create_priv_minor_node(dip, "admin", S_IFCHR,
547*0Sstevel@tonic-gate 	    MD_ADM_MINOR, DDI_PSEUDO, 0, NULL, PRIV_SYS_CONFIG, 0640))
548*0Sstevel@tonic-gate 		goto attach_failure;
549*0Sstevel@tonic-gate 
550*0Sstevel@tonic-gate 	if (ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
551*0Sstevel@tonic-gate 	    DDI_KERNEL_IOCTL, NULL, 0) != DDI_SUCCESS)
552*0Sstevel@tonic-gate 		goto attach_failure;
553*0Sstevel@tonic-gate 
554*0Sstevel@tonic-gate 	if (ddi_prop_update_int(DDI_DEV_T_NONE, dip,
555*0Sstevel@tonic-gate 	    "ddi-abrwrite-supported", 1) != DDI_SUCCESS)
556*0Sstevel@tonic-gate 		goto attach_failure;
557*0Sstevel@tonic-gate 
558*0Sstevel@tonic-gate 	/* these could have been cleared by a detach */
559*0Sstevel@tonic-gate 	md_nunits = MD_MAXUNITS;
560*0Sstevel@tonic-gate 	md_nsets = MD_MAXSETS;
561*0Sstevel@tonic-gate 
562*0Sstevel@tonic-gate 	sz = sizeof (void *) * MD_MAXUNITS;
563*0Sstevel@tonic-gate 	if (md_set[0].s_un == NULL)
564*0Sstevel@tonic-gate 		md_set[0].s_un = kmem_zalloc(sz, KM_SLEEP);
565*0Sstevel@tonic-gate 	if (md_set[0].s_ui == NULL)
566*0Sstevel@tonic-gate 		md_set[0].s_ui = kmem_zalloc(sz, KM_SLEEP);
567*0Sstevel@tonic-gate 
568*0Sstevel@tonic-gate 	md_devinfo = dip;
569*0Sstevel@tonic-gate 
570*0Sstevel@tonic-gate 	/*
571*0Sstevel@tonic-gate 	 * Only allocate device node for root mirror metadevice.
572*0Sstevel@tonic-gate 	 * Don't pre-allocate unnecessary device nodes (thus slowing down a
573*0Sstevel@tonic-gate 	 * boot when we attach).
574*0Sstevel@tonic-gate 	 * We can't read the mddbs in attach.  The mddbs will be read
575*0Sstevel@tonic-gate 	 * by metainit during the boot process when it is doing the
576*0Sstevel@tonic-gate 	 * auto-take processing and any other minor nodes will be
577*0Sstevel@tonic-gate 	 * allocated at that point.
578*0Sstevel@tonic-gate 	 *
579*0Sstevel@tonic-gate 	 * There are two scenarios to be aware of here:
580*0Sstevel@tonic-gate 	 * 1) when we are booting from a mirrored root we need the root
581*0Sstevel@tonic-gate 	 *    metadevice to exist very early (during vfs_mountroot processing)
582*0Sstevel@tonic-gate 	 * 2) we need all of the nodes to be created so that any mnttab entries
583*0Sstevel@tonic-gate 	 *    will succeed (handled by metainit reading the mddb during boot).
584*0Sstevel@tonic-gate 	 */
585*0Sstevel@tonic-gate 	if (strncmp(SVM_PSEUDO_STR, svm_bootpath, sizeof (SVM_PSEUDO_STR) - 1)
586*0Sstevel@tonic-gate 	    == 0) {
587*0Sstevel@tonic-gate 		char *p;
588*0Sstevel@tonic-gate 		int mnum = 0;
589*0Sstevel@tonic-gate 
590*0Sstevel@tonic-gate 		/*
591*0Sstevel@tonic-gate 		 * The svm_bootpath string looks something like
592*0Sstevel@tonic-gate 		 * /pseudo/md@0:0,150,blk where 150 is the minor number
593*0Sstevel@tonic-gate 		 * in this example so we need to set the pointer p onto
594*0Sstevel@tonic-gate 		 * the first digit of the minor number and convert it
595*0Sstevel@tonic-gate 		 * from ascii.
596*0Sstevel@tonic-gate 		 */
597*0Sstevel@tonic-gate 		for (p = svm_bootpath + sizeof (SVM_PSEUDO_STR) + 1;
598*0Sstevel@tonic-gate 		    *p >= '0' && *p <= '9'; p++) {
599*0Sstevel@tonic-gate 			mnum *= 10;
600*0Sstevel@tonic-gate 			mnum += *p - '0';
601*0Sstevel@tonic-gate 		}
602*0Sstevel@tonic-gate 
603*0Sstevel@tonic-gate 		if (md_create_minor_node(0, mnum)) {
604*0Sstevel@tonic-gate 			kmem_free(md_set[0].s_un, sz);
605*0Sstevel@tonic-gate 			kmem_free(md_set[0].s_ui, sz);
606*0Sstevel@tonic-gate 			goto attach_failure;
607*0Sstevel@tonic-gate 		}
608*0Sstevel@tonic-gate 	}
609*0Sstevel@tonic-gate 
610*0Sstevel@tonic-gate 	med_init();
611*0Sstevel@tonic-gate 
612*0Sstevel@tonic-gate 	MD_CLR_IN(IN_ATTACH);
613*0Sstevel@tonic-gate 	return (DDI_SUCCESS);
614*0Sstevel@tonic-gate 
615*0Sstevel@tonic-gate attach_failure:
616*0Sstevel@tonic-gate 	/*
617*0Sstevel@tonic-gate 	 * Use our own detach routine to toss any stuff we allocated above.
618*0Sstevel@tonic-gate 	 * NOTE: detach will call md_halt to free the mddb_init allocations.
619*0Sstevel@tonic-gate 	 */
620*0Sstevel@tonic-gate 	MD_CLR_IN(IN_ATTACH);
621*0Sstevel@tonic-gate 	if (mddetach(dip, DDI_DETACH) != DDI_SUCCESS)
622*0Sstevel@tonic-gate 		cmn_err(CE_WARN, "detach from attach failed");
623*0Sstevel@tonic-gate 	return (DDI_FAILURE);
624*0Sstevel@tonic-gate }
625*0Sstevel@tonic-gate 
626*0Sstevel@tonic-gate /* ARGSUSED */
627*0Sstevel@tonic-gate static int
628*0Sstevel@tonic-gate mddetach(dev_info_t *dip, ddi_detach_cmd_t cmd)
629*0Sstevel@tonic-gate {
630*0Sstevel@tonic-gate 	extern int	check_active_locators();
631*0Sstevel@tonic-gate 	set_t		s;
632*0Sstevel@tonic-gate 	size_t		sz;
633*0Sstevel@tonic-gate 	int		len;
634*0Sstevel@tonic-gate 
635*0Sstevel@tonic-gate 	MD_SET_IN(IN_DETACH);
636*0Sstevel@tonic-gate 
637*0Sstevel@tonic-gate 	/* check command */
638*0Sstevel@tonic-gate 	if (cmd != DDI_DETACH) {
639*0Sstevel@tonic-gate 		MD_CLR_IN(IN_DETACH);
640*0Sstevel@tonic-gate 		return (DDI_FAILURE);
641*0Sstevel@tonic-gate 	}
642*0Sstevel@tonic-gate 
643*0Sstevel@tonic-gate 	/*
644*0Sstevel@tonic-gate 	 * if we have not already halted yet we have no active config
645*0Sstevel@tonic-gate 	 * then automatically initiate a halt so we can detach.
646*0Sstevel@tonic-gate 	 */
647*0Sstevel@tonic-gate 	if (!(md_get_status() & MD_GBL_HALTED)) {
648*0Sstevel@tonic-gate 		if (check_active_locators() == 0) {
649*0Sstevel@tonic-gate 			/*
650*0Sstevel@tonic-gate 			 * NOTE: a successful md_halt will have done the
651*0Sstevel@tonic-gate 			 * mddb_unload to free allocations done in mddb_init
652*0Sstevel@tonic-gate 			 */
653*0Sstevel@tonic-gate 			if (md_halt(MD_NO_GBL_LOCKS_HELD)) {
654*0Sstevel@tonic-gate 				cmn_err(CE_NOTE, "md:detach: "
655*0Sstevel@tonic-gate 				    "Could not halt Solaris Volume Manager");
656*0Sstevel@tonic-gate 				MD_CLR_IN(IN_DETACH);
657*0Sstevel@tonic-gate 				return (DDI_FAILURE);
658*0Sstevel@tonic-gate 			}
659*0Sstevel@tonic-gate 		}
660*0Sstevel@tonic-gate 
661*0Sstevel@tonic-gate 		/* fail detach if we have not halted */
662*0Sstevel@tonic-gate 		if (!(md_get_status() & MD_GBL_HALTED)) {
663*0Sstevel@tonic-gate 			MD_CLR_IN(IN_DETACH);
664*0Sstevel@tonic-gate 			return (DDI_FAILURE);
665*0Sstevel@tonic-gate 		}
666*0Sstevel@tonic-gate 	}
667*0Sstevel@tonic-gate 
668*0Sstevel@tonic-gate 	/* must be in halted state, this will be cleared on next attach */
669*0Sstevel@tonic-gate 	ASSERT(md_get_status() & MD_GBL_HALTED);
670*0Sstevel@tonic-gate 
671*0Sstevel@tonic-gate 	/* cleanup attach allocations and initializations */
672*0Sstevel@tonic-gate 	md_major_targ = 0;
673*0Sstevel@tonic-gate 
674*0Sstevel@tonic-gate 	sz = sizeof (void *) * md_nunits;
675*0Sstevel@tonic-gate 	for (s = 0; s < md_nsets; s++) {
676*0Sstevel@tonic-gate 		if (md_set[s].s_un != NULL) {
677*0Sstevel@tonic-gate 			kmem_free(md_set[s].s_un, sz);
678*0Sstevel@tonic-gate 			md_set[s].s_un = NULL;
679*0Sstevel@tonic-gate 		}
680*0Sstevel@tonic-gate 
681*0Sstevel@tonic-gate 		if (md_set[s].s_ui != NULL) {
682*0Sstevel@tonic-gate 			kmem_free(md_set[s].s_ui, sz);
683*0Sstevel@tonic-gate 			md_set[s].s_ui = NULL;
684*0Sstevel@tonic-gate 		}
685*0Sstevel@tonic-gate 	}
686*0Sstevel@tonic-gate 	md_nunits = 0;
687*0Sstevel@tonic-gate 	md_nsets = 0;
688*0Sstevel@tonic-gate 	md_nmedh = 0;
689*0Sstevel@tonic-gate 
690*0Sstevel@tonic-gate 	if (md_med_trans_lst != NULL) {
691*0Sstevel@tonic-gate 		kmem_free(md_med_trans_lst, strlen(md_med_trans_lst) + 1);
692*0Sstevel@tonic-gate 		md_med_trans_lst = NULL;
693*0Sstevel@tonic-gate 	}
694*0Sstevel@tonic-gate 
695*0Sstevel@tonic-gate 	if (md_mods != NULL) {
696*0Sstevel@tonic-gate 		kmem_free(md_mods, sizeof (ddi_modhandle_t) * MD_NOPS);
697*0Sstevel@tonic-gate 		md_mods = NULL;
698*0Sstevel@tonic-gate 	}
699*0Sstevel@tonic-gate 
700*0Sstevel@tonic-gate 	if (md_ops != NULL) {
701*0Sstevel@tonic-gate 		kmem_free(md_ops, sizeof (md_ops_t *) * MD_NOPS);
702*0Sstevel@tonic-gate 		md_ops = NULL;
703*0Sstevel@tonic-gate 	}
704*0Sstevel@tonic-gate 
705*0Sstevel@tonic-gate 	if (MD_UPGRADE) {
706*0Sstevel@tonic-gate 		len = md_tuple_length * (2 * ((int)sizeof (dev32_t)));
707*0Sstevel@tonic-gate 		md_in_upgrade = 0;
708*0Sstevel@tonic-gate 		md_xlate_free(len);
709*0Sstevel@tonic-gate 		md_majortab_free();
710*0Sstevel@tonic-gate 	}
711*0Sstevel@tonic-gate 
712*0Sstevel@tonic-gate 	/*
713*0Sstevel@tonic-gate 	 * Undo what we did in mdattach, freeing resources
714*0Sstevel@tonic-gate 	 * and removing things we installed.  The system
715*0Sstevel@tonic-gate 	 * framework guarantees we are not active with this devinfo
716*0Sstevel@tonic-gate 	 * node in any other entry points at this time.
717*0Sstevel@tonic-gate 	 */
718*0Sstevel@tonic-gate 	ddi_prop_remove_all(dip);
719*0Sstevel@tonic-gate 	ddi_remove_minor_node(dip, NULL);
720*0Sstevel@tonic-gate 
721*0Sstevel@tonic-gate 	med_fini();
722*0Sstevel@tonic-gate 	md_devinfo = NULL;
723*0Sstevel@tonic-gate 
724*0Sstevel@tonic-gate 	MD_CLR_IN(IN_DETACH);
725*0Sstevel@tonic-gate 	return (DDI_SUCCESS);
726*0Sstevel@tonic-gate }
727*0Sstevel@tonic-gate 
728*0Sstevel@tonic-gate 
729*0Sstevel@tonic-gate /*
730*0Sstevel@tonic-gate  * Given the device number return the devinfo pointer
731*0Sstevel@tonic-gate  * given to md via md_attach
732*0Sstevel@tonic-gate  */
733*0Sstevel@tonic-gate /*ARGSUSED*/
734*0Sstevel@tonic-gate static int
735*0Sstevel@tonic-gate mdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
736*0Sstevel@tonic-gate {
737*0Sstevel@tonic-gate 	int		error = DDI_FAILURE;
738*0Sstevel@tonic-gate 
739*0Sstevel@tonic-gate 	switch (infocmd) {
740*0Sstevel@tonic-gate 	case DDI_INFO_DEVT2DEVINFO:
741*0Sstevel@tonic-gate 		if (md_devinfo) {
742*0Sstevel@tonic-gate 			*result = (void *)md_devinfo;
743*0Sstevel@tonic-gate 			error = DDI_SUCCESS;
744*0Sstevel@tonic-gate 		}
745*0Sstevel@tonic-gate 		break;
746*0Sstevel@tonic-gate 
747*0Sstevel@tonic-gate 	case DDI_INFO_DEVT2INSTANCE:
748*0Sstevel@tonic-gate 		*result = (void *)0;
749*0Sstevel@tonic-gate 		error = DDI_SUCCESS;
750*0Sstevel@tonic-gate 		break;
751*0Sstevel@tonic-gate 	}
752*0Sstevel@tonic-gate 	return (error);
753*0Sstevel@tonic-gate }
754*0Sstevel@tonic-gate 
755*0Sstevel@tonic-gate /*
756*0Sstevel@tonic-gate  * property operation routine.  return the number of blocks for the partition
757*0Sstevel@tonic-gate  * in question or forward the request to the property facilities.
758*0Sstevel@tonic-gate  */
759*0Sstevel@tonic-gate static int
760*0Sstevel@tonic-gate mdprop_op(
761*0Sstevel@tonic-gate 	dev_t dev,		/* device number associated with device */
762*0Sstevel@tonic-gate 	dev_info_t *dip,	/* device info struct for this device */
763*0Sstevel@tonic-gate 	ddi_prop_op_t prop_op,	/* property operator */
764*0Sstevel@tonic-gate 	int mod_flags,		/* property flags */
765*0Sstevel@tonic-gate 	char *name,		/* name of property */
766*0Sstevel@tonic-gate 	caddr_t valuep,		/* where to put property value */
767*0Sstevel@tonic-gate 	int *lengthp)		/* put length of property here */
768*0Sstevel@tonic-gate {
769*0Sstevel@tonic-gate 	minor_t		mnum;
770*0Sstevel@tonic-gate 	set_t		setno;
771*0Sstevel@tonic-gate 	md_unit_t	*un;
772*0Sstevel@tonic-gate 	mdi_unit_t	*ui;
773*0Sstevel@tonic-gate 	uint64_t	nblocks64;
774*0Sstevel@tonic-gate 
775*0Sstevel@tonic-gate 	/*
776*0Sstevel@tonic-gate 	 * Our dynamic properties are all device specific and size oriented.
777*0Sstevel@tonic-gate 	 * Requests issued under conditions where size is valid are passed
778*0Sstevel@tonic-gate 	 * to ddi_prop_op_nblocks with the size information, otherwise the
779*0Sstevel@tonic-gate 	 * request is passed to ddi_prop_op. Make sure that the minor device
780*0Sstevel@tonic-gate 	 * is a valid part of the Virtual Disk subsystem.
781*0Sstevel@tonic-gate 	 */
782*0Sstevel@tonic-gate 	mnum = getminor(dev);
783*0Sstevel@tonic-gate 	setno = MD_MIN2SET(mnum);
784*0Sstevel@tonic-gate 	if ((dev == DDI_DEV_T_ANY) || (mnum == MD_ADM_MINOR) ||
785*0Sstevel@tonic-gate 	    (setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits)) {
786*0Sstevel@tonic-gate pass:		return (ddi_prop_op(dev, dip, prop_op, mod_flags,
787*0Sstevel@tonic-gate 		    name, valuep, lengthp));
788*0Sstevel@tonic-gate 	} else {
789*0Sstevel@tonic-gate 		rw_enter(&md_unit_array_rw.lock, RW_READER);
790*0Sstevel@tonic-gate 		if (((md_get_setstatus(setno) & MD_SET_SNARFED) == 0) ||
791*0Sstevel@tonic-gate 		    ((ui = MDI_UNIT(mnum)) == NULL)) {
792*0Sstevel@tonic-gate 			rw_exit(&md_unit_array_rw.lock);
793*0Sstevel@tonic-gate 			goto pass;
794*0Sstevel@tonic-gate 		}
795*0Sstevel@tonic-gate 
796*0Sstevel@tonic-gate 		/* get nblocks value */
797*0Sstevel@tonic-gate 		un = (md_unit_t *)md_unit_readerlock(ui);
798*0Sstevel@tonic-gate 		nblocks64 = un->c.un_total_blocks;
799*0Sstevel@tonic-gate 		md_unit_readerexit(ui);
800*0Sstevel@tonic-gate 		rw_exit(&md_unit_array_rw.lock);
801*0Sstevel@tonic-gate 
802*0Sstevel@tonic-gate 		return (ddi_prop_op_nblocks(dev, dip, prop_op, mod_flags,
803*0Sstevel@tonic-gate 		    name, valuep, lengthp, nblocks64));
804*0Sstevel@tonic-gate 	}
805*0Sstevel@tonic-gate 
806*0Sstevel@tonic-gate }
807*0Sstevel@tonic-gate 
808*0Sstevel@tonic-gate static void
809*0Sstevel@tonic-gate snarf_user_data(set_t setno)
810*0Sstevel@tonic-gate {
811*0Sstevel@tonic-gate 	mddb_recid_t		recid;
812*0Sstevel@tonic-gate 	mddb_recstatus_t	status;
813*0Sstevel@tonic-gate 
814*0Sstevel@tonic-gate 	recid = mddb_makerecid(setno, 0);
815*0Sstevel@tonic-gate 	while ((recid = mddb_getnextrec(recid, MDDB_USER, 0)) > 0) {
816*0Sstevel@tonic-gate 		if (mddb_getrecprivate(recid) & MD_PRV_GOTIT)
817*0Sstevel@tonic-gate 			continue;
818*0Sstevel@tonic-gate 
819*0Sstevel@tonic-gate 		status = mddb_getrecstatus(recid);
820*0Sstevel@tonic-gate 		if (status == MDDB_STALE)
821*0Sstevel@tonic-gate 			continue;
822*0Sstevel@tonic-gate 
823*0Sstevel@tonic-gate 		if (status == MDDB_NODATA) {
824*0Sstevel@tonic-gate 			mddb_setrecprivate(recid, MD_PRV_PENDDEL);
825*0Sstevel@tonic-gate 			continue;
826*0Sstevel@tonic-gate 		}
827*0Sstevel@tonic-gate 
828*0Sstevel@tonic-gate 		ASSERT(status == MDDB_OK);
829*0Sstevel@tonic-gate 
830*0Sstevel@tonic-gate 		mddb_setrecprivate(recid, MD_PRV_GOTIT);
831*0Sstevel@tonic-gate 	}
832*0Sstevel@tonic-gate }
833*0Sstevel@tonic-gate 
834*0Sstevel@tonic-gate static void
835*0Sstevel@tonic-gate md_print_block_usage(mddb_set_t *s, uint_t blks)
836*0Sstevel@tonic-gate {
837*0Sstevel@tonic-gate 	uint_t		ib;
838*0Sstevel@tonic-gate 	int		li;
839*0Sstevel@tonic-gate 	mddb_mb_ic_t	*mbip;
840*0Sstevel@tonic-gate 	uint_t		max_blk_needed;
841*0Sstevel@tonic-gate 	mddb_lb_t	*lbp;
842*0Sstevel@tonic-gate 	mddb_sidelocator_t	*slp;
843*0Sstevel@tonic-gate 	int		drv_index;
844*0Sstevel@tonic-gate 	md_splitname	sn;
845*0Sstevel@tonic-gate 	char		*name;
846*0Sstevel@tonic-gate 	char		*suffix;
847*0Sstevel@tonic-gate 	size_t		prefixlen;
848*0Sstevel@tonic-gate 	size_t		suffixlen;
849*0Sstevel@tonic-gate 	int		alloc_sz;
850*0Sstevel@tonic-gate 
851*0Sstevel@tonic-gate 
852*0Sstevel@tonic-gate 	max_blk_needed = s->s_totalblkcnt - s->s_freeblkcnt + blks;
853*0Sstevel@tonic-gate 
854*0Sstevel@tonic-gate 
855*0Sstevel@tonic-gate 	cmn_err(CE_WARN, "Blocks in Metadevice State Database: %d\n"
856*0Sstevel@tonic-gate 		"            Additional Blocks Needed:            %d\n\n"
857*0Sstevel@tonic-gate 		"            Increase size of following replicas for\n"
858*0Sstevel@tonic-gate 		"            device relocatability by deleting listed\n"
859*0Sstevel@tonic-gate 		"            replica and re-adding replica with\n"
860*0Sstevel@tonic-gate 		"            increased size (see metadb(1M)):\n"
861*0Sstevel@tonic-gate 		"                Replica                   Increase By",
862*0Sstevel@tonic-gate 		s->s_totalblkcnt, (blks - s->s_freeblkcnt));
863*0Sstevel@tonic-gate 
864*0Sstevel@tonic-gate 	lbp = s->s_lbp;
865*0Sstevel@tonic-gate 
866*0Sstevel@tonic-gate 	for (li = 0; li < lbp->lb_loccnt; li++) {
867*0Sstevel@tonic-gate 		if (lbp->lb_locators[li].l_flags & MDDB_F_DELETED)
868*0Sstevel@tonic-gate 			continue;
869*0Sstevel@tonic-gate 		ib = 0;
870*0Sstevel@tonic-gate 		for (mbip = s->s_mbiarray[li]; mbip != NULL;
871*0Sstevel@tonic-gate 		    mbip = mbip->mbi_next) {
872*0Sstevel@tonic-gate 			ib += (uint_t)mbip->mbi_mddb_mb.mb_blkcnt;
873*0Sstevel@tonic-gate 		}
874*0Sstevel@tonic-gate 		if (ib == 0)
875*0Sstevel@tonic-gate 			continue;
876*0Sstevel@tonic-gate 		if (ib < max_blk_needed) {
877*0Sstevel@tonic-gate 			slp = &lbp->lb_sidelocators[s->s_sideno][li];
878*0Sstevel@tonic-gate 			drv_index = slp->l_drvnm_index;
879*0Sstevel@tonic-gate 			mddb_locatorblock2splitname(s->s_lnp, li, s->s_sideno,
880*0Sstevel@tonic-gate 				&sn);
881*0Sstevel@tonic-gate 			prefixlen = SPN_PREFIX(&sn).pre_len;
882*0Sstevel@tonic-gate 			suffixlen = SPN_SUFFIX(&sn).suf_len;
883*0Sstevel@tonic-gate 			alloc_sz = (int)(prefixlen + suffixlen + 2);
884*0Sstevel@tonic-gate 			name = (char *)kmem_alloc(alloc_sz, KM_SLEEP);
885*0Sstevel@tonic-gate 			(void) strncpy(name, SPN_PREFIX(&sn).pre_data,
886*0Sstevel@tonic-gate 			    prefixlen);
887*0Sstevel@tonic-gate 			name[prefixlen] = '/';
888*0Sstevel@tonic-gate 			suffix = name + (prefixlen + 1);
889*0Sstevel@tonic-gate 			(void) strncpy(suffix, SPN_SUFFIX(&sn).suf_data,
890*0Sstevel@tonic-gate 			    suffixlen);
891*0Sstevel@tonic-gate 			name[prefixlen + suffixlen + 1] = '\0';
892*0Sstevel@tonic-gate 			cmn_err(CE_WARN,
893*0Sstevel@tonic-gate 				"  %s (%s:%d:%d)   %d blocks",
894*0Sstevel@tonic-gate 				name, lbp->lb_drvnm[drv_index].dn_data,
895*0Sstevel@tonic-gate 				slp->l_mnum, lbp->lb_locators[li].l_blkno,
896*0Sstevel@tonic-gate 				(max_blk_needed - ib));
897*0Sstevel@tonic-gate 			kmem_free(name, alloc_sz);
898*0Sstevel@tonic-gate 		}
899*0Sstevel@tonic-gate 	}
900*0Sstevel@tonic-gate }
901*0Sstevel@tonic-gate 
902*0Sstevel@tonic-gate /*
903*0Sstevel@tonic-gate  * md_create_minor_node:
904*0Sstevel@tonic-gate  *	Create the minor device for the given set and un_self_id.
905*0Sstevel@tonic-gate  *
906*0Sstevel@tonic-gate  * Input:
907*0Sstevel@tonic-gate  *	setno	- set number
908*0Sstevel@tonic-gate  *	mnum	- selfID of unit
909*0Sstevel@tonic-gate  *
910*0Sstevel@tonic-gate  * Output:
911*0Sstevel@tonic-gate  *	None.
912*0Sstevel@tonic-gate  *
913*0Sstevel@tonic-gate  * Returns 0 for success, 1 for failure.
914*0Sstevel@tonic-gate  *
915*0Sstevel@tonic-gate  * Side-effects:
916*0Sstevel@tonic-gate  *	None.
917*0Sstevel@tonic-gate  */
918*0Sstevel@tonic-gate int
919*0Sstevel@tonic-gate md_create_minor_node(set_t setno, minor_t mnum)
920*0Sstevel@tonic-gate {
921*0Sstevel@tonic-gate 	char		name[20];
922*0Sstevel@tonic-gate 
923*0Sstevel@tonic-gate 	/* Check for valid arguments */
924*0Sstevel@tonic-gate 	if (setno >= MD_MAXSETS || MD_MIN2UNIT(mnum) >= MD_MAXUNITS)
925*0Sstevel@tonic-gate 		return (1);
926*0Sstevel@tonic-gate 
927*0Sstevel@tonic-gate 	(void) snprintf(name, 20, "%u,%u,blk",
928*0Sstevel@tonic-gate 		(unsigned)setno, (unsigned)MD_MIN2UNIT(mnum));
929*0Sstevel@tonic-gate 
930*0Sstevel@tonic-gate 	if (ddi_create_minor_node(md_devinfo, name, S_IFBLK,
931*0Sstevel@tonic-gate 	    MD_MKMIN(setno, mnum), DDI_PSEUDO, 0))
932*0Sstevel@tonic-gate 		return (1);
933*0Sstevel@tonic-gate 
934*0Sstevel@tonic-gate 	(void) snprintf(name, 20, "%u,%u,raw",
935*0Sstevel@tonic-gate 		(unsigned)setno, (unsigned)MD_MIN2UNIT(mnum));
936*0Sstevel@tonic-gate 
937*0Sstevel@tonic-gate 	if (ddi_create_minor_node(md_devinfo, name, S_IFCHR,
938*0Sstevel@tonic-gate 	    MD_MKMIN(setno, mnum), DDI_PSEUDO, 0))
939*0Sstevel@tonic-gate 		return (1);
940*0Sstevel@tonic-gate 
941*0Sstevel@tonic-gate 	return (0);
942*0Sstevel@tonic-gate }
943*0Sstevel@tonic-gate 
944*0Sstevel@tonic-gate /*
945*0Sstevel@tonic-gate  * For a given key check if it is an orphaned record.
946*0Sstevel@tonic-gate  * The following conditions are used to determine an orphan.
947*0Sstevel@tonic-gate  * 1. The device associated with that key is not a metadevice.
948*0Sstevel@tonic-gate  * 2. If DEVID_STYLE then the physical device does not have a device Id
949*0Sstevel@tonic-gate  * associated with it.
950*0Sstevel@tonic-gate  *
951*0Sstevel@tonic-gate  * If a key does not have an entry in the devid namespace it could be
952*0Sstevel@tonic-gate  * a device that does not support device ids. Hence the record is not
953*0Sstevel@tonic-gate  * deleted.
954*0Sstevel@tonic-gate  */
955*0Sstevel@tonic-gate 
956*0Sstevel@tonic-gate static int
957*0Sstevel@tonic-gate md_verify_orphaned_record(set_t setno, mdkey_t key)
958*0Sstevel@tonic-gate {
959*0Sstevel@tonic-gate 	md_dev64_t	odev; /* orphaned dev */
960*0Sstevel@tonic-gate 	mddb_set_t	*s;
961*0Sstevel@tonic-gate 	side_t		side = 0;
962*0Sstevel@tonic-gate 	struct nm_next_hdr	*did_nh = NULL;
963*0Sstevel@tonic-gate 
964*0Sstevel@tonic-gate 	s = (mddb_set_t *)md_set[setno].s_db;
965*0Sstevel@tonic-gate 	if ((did_nh = get_first_record(setno, 1,  (NM_DEVID | NM_NOTSHARED)))
966*0Sstevel@tonic-gate 	    == NULL)
967*0Sstevel@tonic-gate 		return (0);
968*0Sstevel@tonic-gate 	/*
969*0Sstevel@tonic-gate 	 * If devid style is set then get the dev_t using MD_NOTRUST_DEVT
970*0Sstevel@tonic-gate 	 */
971*0Sstevel@tonic-gate 	if (s->s_lbp->lb_flags & MDDB_DEVID_STYLE) {
972*0Sstevel@tonic-gate 		odev = md_getdevnum(setno, side, key, MD_NOTRUST_DEVT);
973*0Sstevel@tonic-gate 		if ((odev == NODEV64) || (md_getmajor(odev) == md_major))
974*0Sstevel@tonic-gate 			return (0);
975*0Sstevel@tonic-gate 		if (lookup_entry(did_nh, setno, side, key, odev, NM_DEVID) ==
976*0Sstevel@tonic-gate 									NULL)
977*0Sstevel@tonic-gate 			return (1);
978*0Sstevel@tonic-gate 	}
979*0Sstevel@tonic-gate 	return (0);
980*0Sstevel@tonic-gate }
981*0Sstevel@tonic-gate 
982*0Sstevel@tonic-gate int
983*0Sstevel@tonic-gate md_snarf_db_set(set_t setno, md_error_t *ep)
984*0Sstevel@tonic-gate {
985*0Sstevel@tonic-gate 	int			err = 0;
986*0Sstevel@tonic-gate 	int			i;
987*0Sstevel@tonic-gate 	mddb_recid_t		recid;
988*0Sstevel@tonic-gate 	mddb_type_t		drvrid;
989*0Sstevel@tonic-gate 	mddb_recstatus_t	status;
990*0Sstevel@tonic-gate 	md_ops_t		*ops;
991*0Sstevel@tonic-gate 	uint_t			privat;
992*0Sstevel@tonic-gate 	mddb_set_t		*s;
993*0Sstevel@tonic-gate 	uint_t			cvt_blks;
994*0Sstevel@tonic-gate 	struct nm_next_hdr	*nh;
995*0Sstevel@tonic-gate 	mdkey_t			key = MD_KEYWILD;
996*0Sstevel@tonic-gate 	side_t			side = 0;
997*0Sstevel@tonic-gate 	int			size;
998*0Sstevel@tonic-gate 	int			devid_flag;
999*0Sstevel@tonic-gate 	int			retval;
1000*0Sstevel@tonic-gate 
1001*0Sstevel@tonic-gate 	md_haltsnarf_enter(setno);
1002*0Sstevel@tonic-gate 
1003*0Sstevel@tonic-gate 	mutex_enter(&md_mx);
1004*0Sstevel@tonic-gate 	if (md_set[setno].s_status & MD_SET_SNARFED) {
1005*0Sstevel@tonic-gate 		mutex_exit(&md_mx);
1006*0Sstevel@tonic-gate 		md_haltsnarf_exit(setno);
1007*0Sstevel@tonic-gate 		return (0);
1008*0Sstevel@tonic-gate 	}
1009*0Sstevel@tonic-gate 	mutex_exit(&md_mx);
1010*0Sstevel@tonic-gate 
1011*0Sstevel@tonic-gate 	if (! (md_get_status() & MD_GBL_DAEMONS_LIVE)) {
1012*0Sstevel@tonic-gate 		if (md_start_daemons(TRUE)) {
1013*0Sstevel@tonic-gate 			if (ep != NULL)
1014*0Sstevel@tonic-gate 				(void) mdsyserror(ep, ENXIO);
1015*0Sstevel@tonic-gate 			err = -1;
1016*0Sstevel@tonic-gate 			goto out;
1017*0Sstevel@tonic-gate 		}
1018*0Sstevel@tonic-gate 	}
1019*0Sstevel@tonic-gate 
1020*0Sstevel@tonic-gate 
1021*0Sstevel@tonic-gate 	/*
1022*0Sstevel@tonic-gate 	 * Load the devid name space if it exists
1023*0Sstevel@tonic-gate 	 */
1024*0Sstevel@tonic-gate 	(void) md_load_namespace(setno, NULL, NM_DEVID);
1025*0Sstevel@tonic-gate 	if (!md_load_namespace(setno, ep, 0L)) {
1026*0Sstevel@tonic-gate 		/*
1027*0Sstevel@tonic-gate 		 * Unload the devid namespace
1028*0Sstevel@tonic-gate 		 */
1029*0Sstevel@tonic-gate 		(void) md_unload_namespace(setno, NM_DEVID);
1030*0Sstevel@tonic-gate 		err = -1;
1031*0Sstevel@tonic-gate 		goto out;
1032*0Sstevel@tonic-gate 	}
1033*0Sstevel@tonic-gate 
1034*0Sstevel@tonic-gate 	/*
1035*0Sstevel@tonic-gate 	 * If replica is in non-devid state, convert if:
1036*0Sstevel@tonic-gate 	 * 	- not in probe during upgrade (md_keep_repl_state = 0)
1037*0Sstevel@tonic-gate 	 * 	- enough space available in replica
1038*0Sstevel@tonic-gate 	 *	- local set
1039*0Sstevel@tonic-gate 	 *	- not a multi-node diskset
1040*0Sstevel@tonic-gate 	 *	- clustering is not present (for non-local set)
1041*0Sstevel@tonic-gate 	 */
1042*0Sstevel@tonic-gate 	s = (mddb_set_t *)md_set[setno].s_db;
1043*0Sstevel@tonic-gate 	devid_flag = 0;
1044*0Sstevel@tonic-gate 	if (!(s->s_lbp->lb_flags & MDDB_DEVID_STYLE) && !md_keep_repl_state)
1045*0Sstevel@tonic-gate 		devid_flag = 1;
1046*0Sstevel@tonic-gate 	if (cluster_bootflags & CLUSTER_CONFIGURED)
1047*0Sstevel@tonic-gate 		if (setno != MD_LOCAL_SET)
1048*0Sstevel@tonic-gate 			devid_flag = 0;
1049*0Sstevel@tonic-gate 	if (MD_MNSET_SETNO(setno))
1050*0Sstevel@tonic-gate 		devid_flag = 0;
1051*0Sstevel@tonic-gate 	if ((md_devid_destroy == 1) && (md_keep_repl_state == 1))
1052*0Sstevel@tonic-gate 		devid_flag = 0;
1053*0Sstevel@tonic-gate 
1054*0Sstevel@tonic-gate 	/*
1055*0Sstevel@tonic-gate 	 * if we weren't devid style before and md_keep_repl_state=1
1056*0Sstevel@tonic-gate 	 * we need to stay non-devid
1057*0Sstevel@tonic-gate 	 */
1058*0Sstevel@tonic-gate 	if ((md_keep_repl_state == 1) &&
1059*0Sstevel@tonic-gate 	    ((s->s_lbp->lb_flags & MDDB_DEVID_STYLE) == 0))
1060*0Sstevel@tonic-gate 		devid_flag = 0;
1061*0Sstevel@tonic-gate 	if (devid_flag) {
1062*0Sstevel@tonic-gate 		/*
1063*0Sstevel@tonic-gate 		 * Determine number of free blocks needed to convert
1064*0Sstevel@tonic-gate 		 * entire replica to device id format - locator blocks
1065*0Sstevel@tonic-gate 		 * and namespace.
1066*0Sstevel@tonic-gate 		 */
1067*0Sstevel@tonic-gate 		cvt_blks = 0;
1068*0Sstevel@tonic-gate 		if (mddb_lb_did_convert(s, 0, &cvt_blks) != 0) {
1069*0Sstevel@tonic-gate 			if (ep != NULL)
1070*0Sstevel@tonic-gate 				(void) mdsyserror(ep, EIO);
1071*0Sstevel@tonic-gate 			err = -1;
1072*0Sstevel@tonic-gate 			goto out;
1073*0Sstevel@tonic-gate 
1074*0Sstevel@tonic-gate 		}
1075*0Sstevel@tonic-gate 		cvt_blks += md_nm_did_chkspace(setno);
1076*0Sstevel@tonic-gate 
1077*0Sstevel@tonic-gate 		/* add MDDB_DEVID_CONV_PERC% */
1078*0Sstevel@tonic-gate 		if ((md_conv_perc > 0) && (md_conv_perc <= 100)) {
1079*0Sstevel@tonic-gate 			cvt_blks = cvt_blks * (100 + md_conv_perc) / 100;
1080*0Sstevel@tonic-gate 		}
1081*0Sstevel@tonic-gate 
1082*0Sstevel@tonic-gate 		if (cvt_blks <= s->s_freeblkcnt) {
1083*0Sstevel@tonic-gate 			if (mddb_lb_did_convert(s, 1, &cvt_blks) != 0) {
1084*0Sstevel@tonic-gate 				if (ep != NULL)
1085*0Sstevel@tonic-gate 					(void) mdsyserror(ep, EIO);
1086*0Sstevel@tonic-gate 				err = -1;
1087*0Sstevel@tonic-gate 				goto out;
1088*0Sstevel@tonic-gate 			}
1089*0Sstevel@tonic-gate 
1090*0Sstevel@tonic-gate 		} else {
1091*0Sstevel@tonic-gate 			/*
1092*0Sstevel@tonic-gate 			 * Print message that replica can't be converted for
1093*0Sstevel@tonic-gate 			 * lack of space.   No failure - just continue to
1094*0Sstevel@tonic-gate 			 * run without device ids.
1095*0Sstevel@tonic-gate 			 */
1096*0Sstevel@tonic-gate 			cmn_err(CE_WARN,
1097*0Sstevel@tonic-gate 			    "Unable to add Solaris Volume Manager device "
1098*0Sstevel@tonic-gate 			    "relocation data.\n"
1099*0Sstevel@tonic-gate 			    "          To use device relocation feature:\n"
1100*0Sstevel@tonic-gate 			    "          - Increase size of listed replicas\n"
1101*0Sstevel@tonic-gate 			    "          - Reboot");
1102*0Sstevel@tonic-gate 			md_print_block_usage(s, cvt_blks);
1103*0Sstevel@tonic-gate 			cmn_err(CE_WARN,
1104*0Sstevel@tonic-gate 			    "Loading set without device relocation data.\n"
1105*0Sstevel@tonic-gate 			    "          Solaris Volume Manager disk movement "
1106*0Sstevel@tonic-gate 			    "not tracked in local set.");
1107*0Sstevel@tonic-gate 		}
1108*0Sstevel@tonic-gate 	}
1109*0Sstevel@tonic-gate 
1110*0Sstevel@tonic-gate 	/*
1111*0Sstevel@tonic-gate 	 * go through and load any modules referenced in
1112*0Sstevel@tonic-gate 	 * data base
1113*0Sstevel@tonic-gate 	 */
1114*0Sstevel@tonic-gate 	recid = mddb_makerecid(setno, 0);
1115*0Sstevel@tonic-gate 	while ((recid = mddb_getnextrec(recid, MDDB_ALL, 0)) > 0) {
1116*0Sstevel@tonic-gate 		status = mddb_getrecstatus(recid);
1117*0Sstevel@tonic-gate 		if (status == MDDB_STALE) {
1118*0Sstevel@tonic-gate 			if (! (md_get_setstatus(setno) & MD_SET_STALE)) {
1119*0Sstevel@tonic-gate 				md_set_setstatus(setno, MD_SET_STALE);
1120*0Sstevel@tonic-gate 				cmn_err(CE_WARN,
1121*0Sstevel@tonic-gate 				    "md: state database is stale");
1122*0Sstevel@tonic-gate 			}
1123*0Sstevel@tonic-gate 		} else if (status == MDDB_NODATA) {
1124*0Sstevel@tonic-gate 			mddb_setrecprivate(recid, MD_PRV_PENDDEL);
1125*0Sstevel@tonic-gate 			continue;
1126*0Sstevel@tonic-gate 		}
1127*0Sstevel@tonic-gate 		drvrid = mddb_getrectype1(recid);
1128*0Sstevel@tonic-gate 		if (drvrid < MDDB_FIRST_MODID)
1129*0Sstevel@tonic-gate 			continue;
1130*0Sstevel@tonic-gate 		if (md_loadsubmod(setno, md_getshared_name(setno, drvrid),
1131*0Sstevel@tonic-gate 		    drvrid) < 0) {
1132*0Sstevel@tonic-gate 			cmn_err(CE_NOTE, "md: could not load misc/%s",
1133*0Sstevel@tonic-gate 				md_getshared_name(setno, drvrid));
1134*0Sstevel@tonic-gate 		}
1135*0Sstevel@tonic-gate 	}
1136*0Sstevel@tonic-gate 
1137*0Sstevel@tonic-gate 	if (recid < 0)
1138*0Sstevel@tonic-gate 		goto out;
1139*0Sstevel@tonic-gate 
1140*0Sstevel@tonic-gate 	snarf_user_data(setno);
1141*0Sstevel@tonic-gate 
1142*0Sstevel@tonic-gate 	/*
1143*0Sstevel@tonic-gate 	 * Initialize the md_nm_snarfed array
1144*0Sstevel@tonic-gate 	 * this array is indexed by the key and
1145*0Sstevel@tonic-gate 	 * is set by md_getdevnum during the snarf time
1146*0Sstevel@tonic-gate 	 */
1147*0Sstevel@tonic-gate 	if ((nh = get_first_record(setno, 0, NM_NOTSHARED)) != NULL) {
1148*0Sstevel@tonic-gate 		size = (int)((((struct nm_rec_hdr *)nh->nmn_record)->
1149*0Sstevel@tonic-gate 		    r_next_key) * (sizeof (int)));
1150*0Sstevel@tonic-gate 		md_nm_snarfed = (int *)kmem_zalloc(size, KM_SLEEP);
1151*0Sstevel@tonic-gate 	}
1152*0Sstevel@tonic-gate 
1153*0Sstevel@tonic-gate 	/*
1154*0Sstevel@tonic-gate 	 * go through and snarf until nothing gets added
1155*0Sstevel@tonic-gate 	 */
1156*0Sstevel@tonic-gate 	do {
1157*0Sstevel@tonic-gate 		i = 0;
1158*0Sstevel@tonic-gate 		for (ops = md_opslist; ops != NULL; ops = ops->md_next) {
1159*0Sstevel@tonic-gate 			if (ops->md_snarf != NULL) {
1160*0Sstevel@tonic-gate 				retval = ops->md_snarf(MD_SNARF_DOIT, setno);
1161*0Sstevel@tonic-gate 				if (retval == -1) {
1162*0Sstevel@tonic-gate 					err = -1;
1163*0Sstevel@tonic-gate 					/* Don't know the failed unit */
1164*0Sstevel@tonic-gate 					(void) mdmderror(ep, MDE_RR_ALLOC_ERROR,
1165*0Sstevel@tonic-gate 					    0);
1166*0Sstevel@tonic-gate 					(void) md_halt_set(setno, MD_HALT_ALL);
1167*0Sstevel@tonic-gate 					(void) mddb_unload_set(setno);
1168*0Sstevel@tonic-gate 					md_haltsnarf_exit(setno);
1169*0Sstevel@tonic-gate 					return (err);
1170*0Sstevel@tonic-gate 				} else {
1171*0Sstevel@tonic-gate 					i += retval;
1172*0Sstevel@tonic-gate 				}
1173*0Sstevel@tonic-gate 			}
1174*0Sstevel@tonic-gate 		}
1175*0Sstevel@tonic-gate 	} while (i);
1176*0Sstevel@tonic-gate 
1177*0Sstevel@tonic-gate 	md_set_setstatus(setno, MD_SET_SNARFED);
1178*0Sstevel@tonic-gate 
1179*0Sstevel@tonic-gate 	recid = mddb_makerecid(setno, 0);
1180*0Sstevel@tonic-gate 	while ((recid = mddb_getnextrec(recid, MDDB_ALL, 0)) > 0) {
1181*0Sstevel@tonic-gate 		privat = mddb_getrecprivate(recid);
1182*0Sstevel@tonic-gate 		if (privat & MD_PRV_COMMIT) {
1183*0Sstevel@tonic-gate 			if (mddb_commitrec(recid)) {
1184*0Sstevel@tonic-gate 				if (!(md_get_setstatus(setno) & MD_SET_STALE)) {
1185*0Sstevel@tonic-gate 					md_set_setstatus(setno, MD_SET_STALE);
1186*0Sstevel@tonic-gate 					cmn_err(CE_WARN,
1187*0Sstevel@tonic-gate 					    "md: state database is stale");
1188*0Sstevel@tonic-gate 				}
1189*0Sstevel@tonic-gate 			}
1190*0Sstevel@tonic-gate 			mddb_setrecprivate(recid, MD_PRV_GOTIT);
1191*0Sstevel@tonic-gate 		}
1192*0Sstevel@tonic-gate 	}
1193*0Sstevel@tonic-gate 
1194*0Sstevel@tonic-gate 	/* Deletes must happen after all the commits */
1195*0Sstevel@tonic-gate 	recid = mddb_makerecid(setno, 0);
1196*0Sstevel@tonic-gate 	while ((recid = mddb_getnextrec(recid, MDDB_ALL, 0)) > 0) {
1197*0Sstevel@tonic-gate 		privat = mddb_getrecprivate(recid);
1198*0Sstevel@tonic-gate 		if (privat & MD_PRV_DELETE) {
1199*0Sstevel@tonic-gate 			if (mddb_deleterec(recid)) {
1200*0Sstevel@tonic-gate 				if (!(md_get_setstatus(setno) & MD_SET_STALE)) {
1201*0Sstevel@tonic-gate 					md_set_setstatus(setno, MD_SET_STALE);
1202*0Sstevel@tonic-gate 					cmn_err(CE_WARN,
1203*0Sstevel@tonic-gate 					    "md: state database is stale");
1204*0Sstevel@tonic-gate 				}
1205*0Sstevel@tonic-gate 				mddb_setrecprivate(recid, MD_PRV_GOTIT);
1206*0Sstevel@tonic-gate 			}
1207*0Sstevel@tonic-gate 			recid = mddb_makerecid(setno, 0);
1208*0Sstevel@tonic-gate 		}
1209*0Sstevel@tonic-gate 	}
1210*0Sstevel@tonic-gate 
1211*0Sstevel@tonic-gate 	/*
1212*0Sstevel@tonic-gate 	 * go through and clean up records until nothing gets cleaned up.
1213*0Sstevel@tonic-gate 	 */
1214*0Sstevel@tonic-gate 	do {
1215*0Sstevel@tonic-gate 		i = 0;
1216*0Sstevel@tonic-gate 		for (ops = md_opslist; ops != NULL; ops = ops->md_next)
1217*0Sstevel@tonic-gate 			if (ops->md_snarf != NULL)
1218*0Sstevel@tonic-gate 				i += ops->md_snarf(MD_SNARF_CLEANUP, setno);
1219*0Sstevel@tonic-gate 	} while (i);
1220*0Sstevel@tonic-gate 
1221*0Sstevel@tonic-gate 	if (md_nm_snarfed != NULL &&
1222*0Sstevel@tonic-gate 	    !(md_get_setstatus(setno) & MD_SET_STALE)) {
1223*0Sstevel@tonic-gate 		/*
1224*0Sstevel@tonic-gate 		 * go thru and cleanup the namespace and the device id
1225*0Sstevel@tonic-gate 		 * name space
1226*0Sstevel@tonic-gate 		 */
1227*0Sstevel@tonic-gate 		for (key = 1;
1228*0Sstevel@tonic-gate 		    key < ((struct nm_rec_hdr *)nh->nmn_record)->r_next_key;
1229*0Sstevel@tonic-gate 		    key++) {
1230*0Sstevel@tonic-gate 			/*
1231*0Sstevel@tonic-gate 			 * Is the entry an 'orphan'?
1232*0Sstevel@tonic-gate 			 */
1233*0Sstevel@tonic-gate 			if (lookup_entry(nh, setno, side, key, NODEV64, 0L) !=
1234*0Sstevel@tonic-gate 			    NULL) {
1235*0Sstevel@tonic-gate 				/*
1236*0Sstevel@tonic-gate 				 * If the value is not set then apparently
1237*0Sstevel@tonic-gate 				 * it is not part of the current configuration,
1238*0Sstevel@tonic-gate 				 * remove it this can happen when system panic
1239*0Sstevel@tonic-gate 				 * between the primary name space update and
1240*0Sstevel@tonic-gate 				 * the device id name space update
1241*0Sstevel@tonic-gate 				 */
1242*0Sstevel@tonic-gate 				if (md_nm_snarfed[key] == 0) {
1243*0Sstevel@tonic-gate 					if (md_verify_orphaned_record(setno,
1244*0Sstevel@tonic-gate 					    key) == 1)
1245*0Sstevel@tonic-gate 						(void) remove_entry(nh,
1246*0Sstevel@tonic-gate 						    side, key, 0L);
1247*0Sstevel@tonic-gate 				}
1248*0Sstevel@tonic-gate 			}
1249*0Sstevel@tonic-gate 		}
1250*0Sstevel@tonic-gate 	}
1251*0Sstevel@tonic-gate 
1252*0Sstevel@tonic-gate 	if (md_nm_snarfed != NULL) {
1253*0Sstevel@tonic-gate 		/*
1254*0Sstevel@tonic-gate 		 * Done and free the memory
1255*0Sstevel@tonic-gate 		 */
1256*0Sstevel@tonic-gate 		kmem_free(md_nm_snarfed, size);
1257*0Sstevel@tonic-gate 		md_nm_snarfed = NULL;
1258*0Sstevel@tonic-gate 	}
1259*0Sstevel@tonic-gate 
1260*0Sstevel@tonic-gate 	if (s->s_lbp->lb_flags & MDDB_DEVID_STYLE &&
1261*0Sstevel@tonic-gate 	    !(md_get_setstatus(setno) & MD_SET_STALE)) {
1262*0Sstevel@tonic-gate 		/*
1263*0Sstevel@tonic-gate 		 * if the destroy flag has been set and
1264*0Sstevel@tonic-gate 		 * the MD_SET_DIDCLUP bit is not set in
1265*0Sstevel@tonic-gate 		 * the set's status field, cleanup the
1266*0Sstevel@tonic-gate 		 * entire device id namespace
1267*0Sstevel@tonic-gate 		 */
1268*0Sstevel@tonic-gate 		if (md_devid_destroy &&
1269*0Sstevel@tonic-gate 		    !(md_get_setstatus(setno) & MD_SET_DIDCLUP)) {
1270*0Sstevel@tonic-gate 			(void) md_devid_cleanup(setno, 1);
1271*0Sstevel@tonic-gate 			md_set_setstatus(setno, MD_SET_DIDCLUP);
1272*0Sstevel@tonic-gate 		} else
1273*0Sstevel@tonic-gate 			(void) md_devid_cleanup(setno, 0);
1274*0Sstevel@tonic-gate 	}
1275*0Sstevel@tonic-gate 
1276*0Sstevel@tonic-gate 	/*
1277*0Sstevel@tonic-gate 	 * clear single threading on snarf, return success or error
1278*0Sstevel@tonic-gate 	 */
1279*0Sstevel@tonic-gate out:
1280*0Sstevel@tonic-gate 	md_haltsnarf_exit(setno);
1281*0Sstevel@tonic-gate 	return (err);
1282*0Sstevel@tonic-gate }
1283*0Sstevel@tonic-gate 
1284*0Sstevel@tonic-gate void
1285*0Sstevel@tonic-gate get_minfo(struct dk_minfo *info, minor_t mnum)
1286*0Sstevel@tonic-gate {
1287*0Sstevel@tonic-gate 	md_unit_t	*un;
1288*0Sstevel@tonic-gate 	mdi_unit_t	*ui;
1289*0Sstevel@tonic-gate 
1290*0Sstevel@tonic-gate 	info->dki_capacity = 0;
1291*0Sstevel@tonic-gate 	info->dki_lbsize = 0;
1292*0Sstevel@tonic-gate 	info->dki_media_type = 0;
1293*0Sstevel@tonic-gate 
1294*0Sstevel@tonic-gate 	if ((ui = MDI_UNIT(mnum)) == NULL) {
1295*0Sstevel@tonic-gate 		return;
1296*0Sstevel@tonic-gate 	}
1297*0Sstevel@tonic-gate 	un = (md_unit_t *)md_unit_readerlock(ui);
1298*0Sstevel@tonic-gate 	info->dki_capacity = un->c.un_total_blocks;
1299*0Sstevel@tonic-gate 	md_unit_readerexit(ui);
1300*0Sstevel@tonic-gate 	info->dki_lbsize = DEV_BSIZE;
1301*0Sstevel@tonic-gate 	info->dki_media_type = DK_UNKNOWN;
1302*0Sstevel@tonic-gate }
1303*0Sstevel@tonic-gate 
1304*0Sstevel@tonic-gate 
1305*0Sstevel@tonic-gate void
1306*0Sstevel@tonic-gate get_info(struct dk_cinfo *info, minor_t mnum)
1307*0Sstevel@tonic-gate {
1308*0Sstevel@tonic-gate 	/*
1309*0Sstevel@tonic-gate 	 * Controller Information
1310*0Sstevel@tonic-gate 	 */
1311*0Sstevel@tonic-gate 	info->dki_ctype = DKC_MD;
1312*0Sstevel@tonic-gate 	info->dki_cnum = ddi_get_instance(ddi_get_parent(md_devinfo));
1313*0Sstevel@tonic-gate 	(void) strcpy(info->dki_cname,
1314*0Sstevel@tonic-gate 	    ddi_get_name(ddi_get_parent(md_devinfo)));
1315*0Sstevel@tonic-gate 	/*
1316*0Sstevel@tonic-gate 	 * Unit Information
1317*0Sstevel@tonic-gate 	 */
1318*0Sstevel@tonic-gate 	info->dki_unit = mnum;
1319*0Sstevel@tonic-gate 	info->dki_slave = 0;
1320*0Sstevel@tonic-gate 	(void) strcpy(info->dki_dname, ddi_driver_name(md_devinfo));
1321*0Sstevel@tonic-gate 	info->dki_flags = 0;
1322*0Sstevel@tonic-gate 	info->dki_partition = 0;
1323*0Sstevel@tonic-gate 	info->dki_maxtransfer = (ushort_t)(md_maxphys / DEV_BSIZE);
1324*0Sstevel@tonic-gate 
1325*0Sstevel@tonic-gate 	/*
1326*0Sstevel@tonic-gate 	 * We can't get from here to there yet
1327*0Sstevel@tonic-gate 	 */
1328*0Sstevel@tonic-gate 	info->dki_addr = 0;
1329*0Sstevel@tonic-gate 	info->dki_space = 0;
1330*0Sstevel@tonic-gate 	info->dki_prio = 0;
1331*0Sstevel@tonic-gate 	info->dki_vec = 0;
1332*0Sstevel@tonic-gate }
1333*0Sstevel@tonic-gate 
1334*0Sstevel@tonic-gate /*
1335*0Sstevel@tonic-gate  * open admin device
1336*0Sstevel@tonic-gate  */
1337*0Sstevel@tonic-gate static int
1338*0Sstevel@tonic-gate mdadminopen(
1339*0Sstevel@tonic-gate 	int	flag,
1340*0Sstevel@tonic-gate 	int	otyp)
1341*0Sstevel@tonic-gate {
1342*0Sstevel@tonic-gate 	int	err = 0;
1343*0Sstevel@tonic-gate 
1344*0Sstevel@tonic-gate 	/* single thread */
1345*0Sstevel@tonic-gate 	mutex_enter(&md_mx);
1346*0Sstevel@tonic-gate 
1347*0Sstevel@tonic-gate 	/* check type and flags */
1348*0Sstevel@tonic-gate 	if ((otyp != OTYP_CHR) && (otyp != OTYP_LYR)) {
1349*0Sstevel@tonic-gate 		err = EINVAL;
1350*0Sstevel@tonic-gate 		goto out;
1351*0Sstevel@tonic-gate 	}
1352*0Sstevel@tonic-gate 	if (((flag & FEXCL) && (md_status & MD_GBL_OPEN)) ||
1353*0Sstevel@tonic-gate 	    (md_status & MD_GBL_EXCL)) {
1354*0Sstevel@tonic-gate 		err = EBUSY;
1355*0Sstevel@tonic-gate 		goto out;
1356*0Sstevel@tonic-gate 	}
1357*0Sstevel@tonic-gate 
1358*0Sstevel@tonic-gate 	/* count and flag open */
1359*0Sstevel@tonic-gate 	md_ocnt[otyp]++;
1360*0Sstevel@tonic-gate 	md_status |= MD_GBL_OPEN;
1361*0Sstevel@tonic-gate 	if (flag & FEXCL)
1362*0Sstevel@tonic-gate 		md_status |= MD_GBL_EXCL;
1363*0Sstevel@tonic-gate 
1364*0Sstevel@tonic-gate 	/* unlock return success */
1365*0Sstevel@tonic-gate out:
1366*0Sstevel@tonic-gate 	mutex_exit(&md_mx);
1367*0Sstevel@tonic-gate 	return (err);
1368*0Sstevel@tonic-gate }
1369*0Sstevel@tonic-gate 
1370*0Sstevel@tonic-gate /*
1371*0Sstevel@tonic-gate  * open entry point
1372*0Sstevel@tonic-gate  */
1373*0Sstevel@tonic-gate static int
1374*0Sstevel@tonic-gate mdopen(
1375*0Sstevel@tonic-gate 	dev_t		*dev,
1376*0Sstevel@tonic-gate 	int		flag,
1377*0Sstevel@tonic-gate 	int		otyp,
1378*0Sstevel@tonic-gate 	cred_t		*cred_p)
1379*0Sstevel@tonic-gate {
1380*0Sstevel@tonic-gate 	minor_t		mnum = getminor(*dev);
1381*0Sstevel@tonic-gate 	unit_t		unit = MD_MIN2UNIT(mnum);
1382*0Sstevel@tonic-gate 	set_t		setno = MD_MIN2SET(mnum);
1383*0Sstevel@tonic-gate 	mdi_unit_t	*ui = NULL;
1384*0Sstevel@tonic-gate 	int		err = 0;
1385*0Sstevel@tonic-gate 	md_parent_t	parent;
1386*0Sstevel@tonic-gate 
1387*0Sstevel@tonic-gate 	/* dispatch admin device opens */
1388*0Sstevel@tonic-gate 	if (mnum == MD_ADM_MINOR)
1389*0Sstevel@tonic-gate 		return (mdadminopen(flag, otyp));
1390*0Sstevel@tonic-gate 
1391*0Sstevel@tonic-gate 	/* lock, check status */
1392*0Sstevel@tonic-gate 	rw_enter(&md_unit_array_rw.lock, RW_READER);
1393*0Sstevel@tonic-gate 
1394*0Sstevel@tonic-gate tryagain:
1395*0Sstevel@tonic-gate 	if (md_get_status() & MD_GBL_HALTED)  {
1396*0Sstevel@tonic-gate 		err = ENODEV;
1397*0Sstevel@tonic-gate 		goto out;
1398*0Sstevel@tonic-gate 	}
1399*0Sstevel@tonic-gate 
1400*0Sstevel@tonic-gate 	/* check minor */
1401*0Sstevel@tonic-gate 	if ((setno >= md_nsets) || (unit >= md_nunits)) {
1402*0Sstevel@tonic-gate 		err = ENXIO;
1403*0Sstevel@tonic-gate 		goto out;
1404*0Sstevel@tonic-gate 	}
1405*0Sstevel@tonic-gate 
1406*0Sstevel@tonic-gate 	/* make sure we're snarfed */
1407*0Sstevel@tonic-gate 	if ((md_get_setstatus(MD_LOCAL_SET) & MD_SET_SNARFED) == 0) {
1408*0Sstevel@tonic-gate 		if (md_snarf_db_set(MD_LOCAL_SET, NULL) != 0) {
1409*0Sstevel@tonic-gate 			err = ENODEV;
1410*0Sstevel@tonic-gate 			goto out;
1411*0Sstevel@tonic-gate 		}
1412*0Sstevel@tonic-gate 	}
1413*0Sstevel@tonic-gate 	if ((md_get_setstatus(setno) & MD_SET_SNARFED) == 0) {
1414*0Sstevel@tonic-gate 		err = ENODEV;
1415*0Sstevel@tonic-gate 		goto out;
1416*0Sstevel@tonic-gate 	}
1417*0Sstevel@tonic-gate 
1418*0Sstevel@tonic-gate 	/* check unit */
1419*0Sstevel@tonic-gate 	if ((ui = MDI_UNIT(mnum)) == NULL) {
1420*0Sstevel@tonic-gate 		err = ENXIO;
1421*0Sstevel@tonic-gate 		goto out;
1422*0Sstevel@tonic-gate 	}
1423*0Sstevel@tonic-gate 
1424*0Sstevel@tonic-gate 	/*
1425*0Sstevel@tonic-gate 	 * The softpart open routine may do an I/O during the open, in
1426*0Sstevel@tonic-gate 	 * which case the open routine will set the OPENINPROGRESS flag
1427*0Sstevel@tonic-gate 	 * and drop all locks during the I/O.  If this thread sees
1428*0Sstevel@tonic-gate 	 * the OPENINPROGRESS flag set, if should wait until the flag
1429*0Sstevel@tonic-gate 	 * is reset before calling the driver's open routine.  It must
1430*0Sstevel@tonic-gate 	 * also revalidate the world after it grabs the unit_array lock
1431*0Sstevel@tonic-gate 	 * since the set may have been released or the metadevice cleared
1432*0Sstevel@tonic-gate 	 * during the sleep.
1433*0Sstevel@tonic-gate 	 */
1434*0Sstevel@tonic-gate 	if (MD_MNSET_SETNO(setno)) {
1435*0Sstevel@tonic-gate 		mutex_enter(&ui->ui_mx);
1436*0Sstevel@tonic-gate 		if (ui->ui_lock & MD_UL_OPENINPROGRESS) {
1437*0Sstevel@tonic-gate 			rw_exit(&md_unit_array_rw.lock);
1438*0Sstevel@tonic-gate 			cv_wait(&ui->ui_cv, &ui->ui_mx);
1439*0Sstevel@tonic-gate 			rw_enter(&md_unit_array_rw.lock, RW_READER);
1440*0Sstevel@tonic-gate 			mutex_exit(&ui->ui_mx);
1441*0Sstevel@tonic-gate 			goto tryagain;
1442*0Sstevel@tonic-gate 		}
1443*0Sstevel@tonic-gate 		mutex_exit(&ui->ui_mx);
1444*0Sstevel@tonic-gate 	}
1445*0Sstevel@tonic-gate 
1446*0Sstevel@tonic-gate 	/* Test if device is openable */
1447*0Sstevel@tonic-gate 	if ((ui->ui_tstate & MD_NOTOPENABLE) != 0) {
1448*0Sstevel@tonic-gate 		err = ENXIO;
1449*0Sstevel@tonic-gate 		goto out;
1450*0Sstevel@tonic-gate 	}
1451*0Sstevel@tonic-gate 
1452*0Sstevel@tonic-gate 	/* don't allow opens w/WRITE flag if stale */
1453*0Sstevel@tonic-gate 	if ((flag & FWRITE) && (md_get_setstatus(setno) & MD_SET_STALE)) {
1454*0Sstevel@tonic-gate 		err = EROFS;
1455*0Sstevel@tonic-gate 		goto out;
1456*0Sstevel@tonic-gate 	}
1457*0Sstevel@tonic-gate 
1458*0Sstevel@tonic-gate 	/* don't allow writes to subdevices */
1459*0Sstevel@tonic-gate 	parent = md_get_parent(md_expldev(*dev));
1460*0Sstevel@tonic-gate 	if ((flag & FWRITE) && MD_HAS_PARENT(parent)) {
1461*0Sstevel@tonic-gate 		err = EROFS;
1462*0Sstevel@tonic-gate 		goto out;
1463*0Sstevel@tonic-gate 	}
1464*0Sstevel@tonic-gate 
1465*0Sstevel@tonic-gate 	/* open underlying driver */
1466*0Sstevel@tonic-gate 	if (md_ops[ui->ui_opsindex]->md_open != NULL) {
1467*0Sstevel@tonic-gate 		if ((err = (*md_ops[ui->ui_opsindex]->md_open)
1468*0Sstevel@tonic-gate 		    (dev, flag, otyp, cred_p, 0)) != 0)
1469*0Sstevel@tonic-gate 			goto out;
1470*0Sstevel@tonic-gate 	}
1471*0Sstevel@tonic-gate 
1472*0Sstevel@tonic-gate 	/* or do it ourselves */
1473*0Sstevel@tonic-gate 	else {
1474*0Sstevel@tonic-gate 		/* single thread */
1475*0Sstevel@tonic-gate 		(void) md_unit_openclose_enter(ui);
1476*0Sstevel@tonic-gate 		err = md_unit_incopen(mnum, flag, otyp);
1477*0Sstevel@tonic-gate 		md_unit_openclose_exit(ui);
1478*0Sstevel@tonic-gate 		if (err != 0)
1479*0Sstevel@tonic-gate 			goto out;
1480*0Sstevel@tonic-gate 	}
1481*0Sstevel@tonic-gate 
1482*0Sstevel@tonic-gate 	/* unlock, return status */
1483*0Sstevel@tonic-gate out:
1484*0Sstevel@tonic-gate 	rw_exit(&md_unit_array_rw.lock);
1485*0Sstevel@tonic-gate 	return (err);
1486*0Sstevel@tonic-gate }
1487*0Sstevel@tonic-gate 
1488*0Sstevel@tonic-gate /*
1489*0Sstevel@tonic-gate  * close admin device
1490*0Sstevel@tonic-gate  */
1491*0Sstevel@tonic-gate static int
1492*0Sstevel@tonic-gate mdadminclose(
1493*0Sstevel@tonic-gate 	int	otyp)
1494*0Sstevel@tonic-gate {
1495*0Sstevel@tonic-gate 	int	i;
1496*0Sstevel@tonic-gate 	int	err = 0;
1497*0Sstevel@tonic-gate 
1498*0Sstevel@tonic-gate 	/* single thread */
1499*0Sstevel@tonic-gate 	mutex_enter(&md_mx);
1500*0Sstevel@tonic-gate 
1501*0Sstevel@tonic-gate 	/* check type and flags */
1502*0Sstevel@tonic-gate 	if ((otyp < 0) || (otyp >= OTYPCNT)) {
1503*0Sstevel@tonic-gate 		err = EINVAL;
1504*0Sstevel@tonic-gate 		goto out;
1505*0Sstevel@tonic-gate 	} else if (md_ocnt[otyp] == 0) {
1506*0Sstevel@tonic-gate 		err = ENXIO;
1507*0Sstevel@tonic-gate 		goto out;
1508*0Sstevel@tonic-gate 	}
1509*0Sstevel@tonic-gate 
1510*0Sstevel@tonic-gate 	/* count and flag closed */
1511*0Sstevel@tonic-gate 	if (otyp == OTYP_LYR)
1512*0Sstevel@tonic-gate 		md_ocnt[otyp]--;
1513*0Sstevel@tonic-gate 	else
1514*0Sstevel@tonic-gate 		md_ocnt[otyp] = 0;
1515*0Sstevel@tonic-gate 	md_status &= ~MD_GBL_OPEN;
1516*0Sstevel@tonic-gate 	for (i = 0; (i < OTYPCNT); ++i)
1517*0Sstevel@tonic-gate 		if (md_ocnt[i] != 0)
1518*0Sstevel@tonic-gate 			md_status |= MD_GBL_OPEN;
1519*0Sstevel@tonic-gate 	if (! (md_status & MD_GBL_OPEN))
1520*0Sstevel@tonic-gate 		md_status &= ~MD_GBL_EXCL;
1521*0Sstevel@tonic-gate 
1522*0Sstevel@tonic-gate 	/* unlock return success */
1523*0Sstevel@tonic-gate out:
1524*0Sstevel@tonic-gate 	mutex_exit(&md_mx);
1525*0Sstevel@tonic-gate 	return (err);
1526*0Sstevel@tonic-gate }
1527*0Sstevel@tonic-gate 
1528*0Sstevel@tonic-gate /*
1529*0Sstevel@tonic-gate  * close entry point
1530*0Sstevel@tonic-gate  */
1531*0Sstevel@tonic-gate static int
1532*0Sstevel@tonic-gate mdclose(
1533*0Sstevel@tonic-gate 	dev_t		dev,
1534*0Sstevel@tonic-gate 	int		flag,
1535*0Sstevel@tonic-gate 	int		otyp,
1536*0Sstevel@tonic-gate 	cred_t		*cred_p)
1537*0Sstevel@tonic-gate {
1538*0Sstevel@tonic-gate 	minor_t		mnum = getminor(dev);
1539*0Sstevel@tonic-gate 	set_t		setno = MD_MIN2SET(mnum);
1540*0Sstevel@tonic-gate 	unit_t		unit = MD_MIN2UNIT(mnum);
1541*0Sstevel@tonic-gate 	mdi_unit_t	*ui = NULL;
1542*0Sstevel@tonic-gate 	int		err = 0;
1543*0Sstevel@tonic-gate 
1544*0Sstevel@tonic-gate 	/* dispatch admin device closes */
1545*0Sstevel@tonic-gate 	if (mnum == MD_ADM_MINOR)
1546*0Sstevel@tonic-gate 		return (mdadminclose(otyp));
1547*0Sstevel@tonic-gate 
1548*0Sstevel@tonic-gate 	/* check minor */
1549*0Sstevel@tonic-gate 	if ((setno >= md_nsets) || (unit >= md_nunits) ||
1550*0Sstevel@tonic-gate 	    ((ui = MDI_UNIT(mnum)) == NULL)) {
1551*0Sstevel@tonic-gate 		err = ENXIO;
1552*0Sstevel@tonic-gate 		goto out;
1553*0Sstevel@tonic-gate 	}
1554*0Sstevel@tonic-gate 
1555*0Sstevel@tonic-gate 	/* close underlying driver */
1556*0Sstevel@tonic-gate 	if (md_ops[ui->ui_opsindex]->md_close != NULL) {
1557*0Sstevel@tonic-gate 		if ((err = (*md_ops[ui->ui_opsindex]->md_close)
1558*0Sstevel@tonic-gate 		    (dev, flag, otyp, cred_p, 0)) != 0)
1559*0Sstevel@tonic-gate 			goto out;
1560*0Sstevel@tonic-gate 	}
1561*0Sstevel@tonic-gate 
1562*0Sstevel@tonic-gate 	/* or do it ourselves */
1563*0Sstevel@tonic-gate 	else {
1564*0Sstevel@tonic-gate 		/* single thread */
1565*0Sstevel@tonic-gate 		(void) md_unit_openclose_enter(ui);
1566*0Sstevel@tonic-gate 		err = md_unit_decopen(mnum, otyp);
1567*0Sstevel@tonic-gate 		md_unit_openclose_exit(ui);
1568*0Sstevel@tonic-gate 		if (err != 0)
1569*0Sstevel@tonic-gate 			goto out;
1570*0Sstevel@tonic-gate 	}
1571*0Sstevel@tonic-gate 
1572*0Sstevel@tonic-gate 	/* return success */
1573*0Sstevel@tonic-gate out:
1574*0Sstevel@tonic-gate 	return (err);
1575*0Sstevel@tonic-gate }
1576*0Sstevel@tonic-gate 
1577*0Sstevel@tonic-gate 
1578*0Sstevel@tonic-gate /*
1579*0Sstevel@tonic-gate  * This routine performs raw read operations.  It is called from the
1580*0Sstevel@tonic-gate  * device switch at normal priority.
1581*0Sstevel@tonic-gate  *
1582*0Sstevel@tonic-gate  * The main catch is that the *uio struct which is passed to us may
1583*0Sstevel@tonic-gate  * specify a read which spans two buffers, which would be contiguous
1584*0Sstevel@tonic-gate  * on a single partition,  but not on a striped partition. This will
1585*0Sstevel@tonic-gate  * be handled by mdstrategy.
1586*0Sstevel@tonic-gate  */
1587*0Sstevel@tonic-gate /*ARGSUSED*/
1588*0Sstevel@tonic-gate static int
1589*0Sstevel@tonic-gate mdread(dev_t dev, struct uio *uio, cred_t *credp)
1590*0Sstevel@tonic-gate {
1591*0Sstevel@tonic-gate 	minor_t		mnum;
1592*0Sstevel@tonic-gate 	mdi_unit_t	*ui;
1593*0Sstevel@tonic-gate 	int		error;
1594*0Sstevel@tonic-gate 
1595*0Sstevel@tonic-gate 	if (((mnum = getminor(dev)) == MD_ADM_MINOR) ||
1596*0Sstevel@tonic-gate 	    (MD_MIN2SET(mnum) >= md_nsets) ||
1597*0Sstevel@tonic-gate 	    (MD_MIN2UNIT(mnum) >= md_nunits) ||
1598*0Sstevel@tonic-gate 	    ((ui = MDI_UNIT(mnum)) == NULL))
1599*0Sstevel@tonic-gate 		return (ENXIO);
1600*0Sstevel@tonic-gate 
1601*0Sstevel@tonic-gate 	if (md_ops[ui->ui_opsindex]->md_read  != NULL)
1602*0Sstevel@tonic-gate 		return ((*md_ops[ui->ui_opsindex]->md_read)
1603*0Sstevel@tonic-gate 		    (dev, uio, credp));
1604*0Sstevel@tonic-gate 
1605*0Sstevel@tonic-gate 	if ((error = md_chk_uio(uio)) != 0)
1606*0Sstevel@tonic-gate 		return (error);
1607*0Sstevel@tonic-gate 
1608*0Sstevel@tonic-gate 	return (physio(mdstrategy, NULL, dev, B_READ, md_minphys, uio));
1609*0Sstevel@tonic-gate }
1610*0Sstevel@tonic-gate 
1611*0Sstevel@tonic-gate /*
1612*0Sstevel@tonic-gate  * This routine performs async raw read operations.  It is called from the
1613*0Sstevel@tonic-gate  * device switch at normal priority.
1614*0Sstevel@tonic-gate  *
1615*0Sstevel@tonic-gate  * The main catch is that the *aio struct which is passed to us may
1616*0Sstevel@tonic-gate  * specify a read which spans two buffers, which would be contiguous
1617*0Sstevel@tonic-gate  * on a single partition,  but not on a striped partition. This will
1618*0Sstevel@tonic-gate  * be handled by mdstrategy.
1619*0Sstevel@tonic-gate  */
1620*0Sstevel@tonic-gate /*ARGSUSED*/
1621*0Sstevel@tonic-gate static int
1622*0Sstevel@tonic-gate mdaread(dev_t dev, struct aio_req *aio, cred_t *credp)
1623*0Sstevel@tonic-gate {
1624*0Sstevel@tonic-gate 	minor_t		mnum;
1625*0Sstevel@tonic-gate 	mdi_unit_t	*ui;
1626*0Sstevel@tonic-gate 	int		error;
1627*0Sstevel@tonic-gate 
1628*0Sstevel@tonic-gate 
1629*0Sstevel@tonic-gate 	if (((mnum = getminor(dev)) == MD_ADM_MINOR) ||
1630*0Sstevel@tonic-gate 	    (MD_MIN2SET(mnum) >= md_nsets) ||
1631*0Sstevel@tonic-gate 	    (MD_MIN2UNIT(mnum) >= md_nunits) ||
1632*0Sstevel@tonic-gate 	    ((ui = MDI_UNIT(mnum)) == NULL))
1633*0Sstevel@tonic-gate 		return (ENXIO);
1634*0Sstevel@tonic-gate 
1635*0Sstevel@tonic-gate 	if (md_ops[ui->ui_opsindex]->md_aread  != NULL)
1636*0Sstevel@tonic-gate 		return ((*md_ops[ui->ui_opsindex]->md_aread)
1637*0Sstevel@tonic-gate 		    (dev, aio, credp));
1638*0Sstevel@tonic-gate 
1639*0Sstevel@tonic-gate 	if ((error = md_chk_uio(aio->aio_uio)) != 0)
1640*0Sstevel@tonic-gate 		return (error);
1641*0Sstevel@tonic-gate 
1642*0Sstevel@tonic-gate 	return (aphysio(mdstrategy, anocancel, dev, B_READ, md_minphys, aio));
1643*0Sstevel@tonic-gate }
1644*0Sstevel@tonic-gate 
1645*0Sstevel@tonic-gate /*
1646*0Sstevel@tonic-gate  * This routine performs raw write operations.	It is called from the
1647*0Sstevel@tonic-gate  * device switch at normal priority.
1648*0Sstevel@tonic-gate  *
1649*0Sstevel@tonic-gate  * The main catch is that the *uio struct which is passed to us may
1650*0Sstevel@tonic-gate  * specify a write which spans two buffers, which would be contiguous
1651*0Sstevel@tonic-gate  * on a single partition,  but not on a striped partition. This is
1652*0Sstevel@tonic-gate  * handled by mdstrategy.
1653*0Sstevel@tonic-gate  *
1654*0Sstevel@tonic-gate  */
1655*0Sstevel@tonic-gate /*ARGSUSED*/
1656*0Sstevel@tonic-gate static int
1657*0Sstevel@tonic-gate mdwrite(dev_t dev, struct uio *uio, cred_t *credp)
1658*0Sstevel@tonic-gate {
1659*0Sstevel@tonic-gate 	minor_t		mnum;
1660*0Sstevel@tonic-gate 	mdi_unit_t	*ui;
1661*0Sstevel@tonic-gate 	int		error;
1662*0Sstevel@tonic-gate 
1663*0Sstevel@tonic-gate 	if (((mnum = getminor(dev)) == MD_ADM_MINOR) ||
1664*0Sstevel@tonic-gate 	    (MD_MIN2SET(mnum) >= md_nsets) ||
1665*0Sstevel@tonic-gate 	    (MD_MIN2UNIT(mnum) >= md_nunits) ||
1666*0Sstevel@tonic-gate 	    ((ui = MDI_UNIT(mnum)) == NULL))
1667*0Sstevel@tonic-gate 		return (ENXIO);
1668*0Sstevel@tonic-gate 
1669*0Sstevel@tonic-gate 	if (md_ops[ui->ui_opsindex]->md_write  != NULL)
1670*0Sstevel@tonic-gate 		return ((*md_ops[ui->ui_opsindex]->md_write)
1671*0Sstevel@tonic-gate 		    (dev, uio, credp));
1672*0Sstevel@tonic-gate 
1673*0Sstevel@tonic-gate 	if ((error = md_chk_uio(uio)) != 0)
1674*0Sstevel@tonic-gate 		return (error);
1675*0Sstevel@tonic-gate 
1676*0Sstevel@tonic-gate 	return (physio(mdstrategy, NULL, dev, B_WRITE, md_minphys, uio));
1677*0Sstevel@tonic-gate }
1678*0Sstevel@tonic-gate 
1679*0Sstevel@tonic-gate /*
1680*0Sstevel@tonic-gate  * This routine performs async raw write operations.  It is called from the
1681*0Sstevel@tonic-gate  * device switch at normal priority.
1682*0Sstevel@tonic-gate  *
1683*0Sstevel@tonic-gate  * The main catch is that the *aio struct which is passed to us may
1684*0Sstevel@tonic-gate  * specify a write which spans two buffers, which would be contiguous
1685*0Sstevel@tonic-gate  * on a single partition,  but not on a striped partition. This is
1686*0Sstevel@tonic-gate  * handled by mdstrategy.
1687*0Sstevel@tonic-gate  *
1688*0Sstevel@tonic-gate  */
1689*0Sstevel@tonic-gate /*ARGSUSED*/
1690*0Sstevel@tonic-gate static int
1691*0Sstevel@tonic-gate mdawrite(dev_t dev, struct aio_req *aio, cred_t *credp)
1692*0Sstevel@tonic-gate {
1693*0Sstevel@tonic-gate 	minor_t		mnum;
1694*0Sstevel@tonic-gate 	mdi_unit_t	*ui;
1695*0Sstevel@tonic-gate 	int		error;
1696*0Sstevel@tonic-gate 
1697*0Sstevel@tonic-gate 
1698*0Sstevel@tonic-gate 	if (((mnum = getminor(dev)) == MD_ADM_MINOR) ||
1699*0Sstevel@tonic-gate 	    (MD_MIN2SET(mnum) >= md_nsets) ||
1700*0Sstevel@tonic-gate 	    (MD_MIN2UNIT(mnum) >= md_nunits) ||
1701*0Sstevel@tonic-gate 	    ((ui = MDI_UNIT(mnum)) == NULL))
1702*0Sstevel@tonic-gate 		return (ENXIO);
1703*0Sstevel@tonic-gate 
1704*0Sstevel@tonic-gate 	if (md_ops[ui->ui_opsindex]->md_awrite  != NULL)
1705*0Sstevel@tonic-gate 		return ((*md_ops[ui->ui_opsindex]->md_awrite)
1706*0Sstevel@tonic-gate 		    (dev, aio, credp));
1707*0Sstevel@tonic-gate 
1708*0Sstevel@tonic-gate 	if ((error = md_chk_uio(aio->aio_uio)) != 0)
1709*0Sstevel@tonic-gate 		return (error);
1710*0Sstevel@tonic-gate 
1711*0Sstevel@tonic-gate 	return (aphysio(mdstrategy, anocancel, dev, B_WRITE, md_minphys, aio));
1712*0Sstevel@tonic-gate }
1713*0Sstevel@tonic-gate 
1714*0Sstevel@tonic-gate int
1715*0Sstevel@tonic-gate mdstrategy(struct buf *bp)
1716*0Sstevel@tonic-gate {
1717*0Sstevel@tonic-gate 	minor_t		mnum;
1718*0Sstevel@tonic-gate 	mdi_unit_t	*ui;
1719*0Sstevel@tonic-gate 
1720*0Sstevel@tonic-gate 	ASSERT((bp->b_flags & B_DONE) == 0);
1721*0Sstevel@tonic-gate 
1722*0Sstevel@tonic-gate 	if (panicstr)
1723*0Sstevel@tonic-gate 		md_clr_status(MD_GBL_DAEMONS_LIVE);
1724*0Sstevel@tonic-gate 
1725*0Sstevel@tonic-gate 	if (((mnum = getminor(bp->b_edev)) == MD_ADM_MINOR) ||
1726*0Sstevel@tonic-gate 	    (MD_MIN2SET(mnum) >= md_nsets) ||
1727*0Sstevel@tonic-gate 	    (MD_MIN2UNIT(mnum) >= md_nunits) ||
1728*0Sstevel@tonic-gate 	    ((ui = MDI_UNIT(mnum)) == NULL)) {
1729*0Sstevel@tonic-gate 		bp->b_flags |= B_ERROR;
1730*0Sstevel@tonic-gate 		bp->b_error = ENXIO;
1731*0Sstevel@tonic-gate 		bp->b_resid = bp->b_bcount;
1732*0Sstevel@tonic-gate 		biodone(bp);
1733*0Sstevel@tonic-gate 		return (0);
1734*0Sstevel@tonic-gate 	}
1735*0Sstevel@tonic-gate 
1736*0Sstevel@tonic-gate 	bp->b_flags &= ~(B_ERROR | B_DONE);
1737*0Sstevel@tonic-gate 	if (md_ops[ui->ui_opsindex]->md_strategy  != NULL) {
1738*0Sstevel@tonic-gate 		(*md_ops[ui->ui_opsindex]->md_strategy) (bp, 0, NULL);
1739*0Sstevel@tonic-gate 	} else {
1740*0Sstevel@tonic-gate 		(void) errdone(ui, bp, ENXIO);
1741*0Sstevel@tonic-gate 	}
1742*0Sstevel@tonic-gate 	return (0);
1743*0Sstevel@tonic-gate }
1744*0Sstevel@tonic-gate 
1745*0Sstevel@tonic-gate /*
1746*0Sstevel@tonic-gate  * Return true if the ioctl is allowed to be multithreaded.
1747*0Sstevel@tonic-gate  * All the ioctls with MN are sent only from the message handlers through
1748*0Sstevel@tonic-gate  * rpc.mdcommd, which (via it's own locking mechanism) takes care that not two
1749*0Sstevel@tonic-gate  * ioctl for the same metadevice are issued at the same time.
1750*0Sstevel@tonic-gate  * So we are safe here.
1751*0Sstevel@tonic-gate  * The other ioctls do not mess with any metadevice structures and therefor
1752*0Sstevel@tonic-gate  * are harmless too, if called multiple times at the same time.
1753*0Sstevel@tonic-gate  */
1754*0Sstevel@tonic-gate static boolean_t
1755*0Sstevel@tonic-gate is_mt_ioctl(int cmd) {
1756*0Sstevel@tonic-gate 
1757*0Sstevel@tonic-gate 	switch (cmd) {
1758*0Sstevel@tonic-gate 	case MD_IOCGUNIQMSGID:
1759*0Sstevel@tonic-gate 	case MD_IOCGVERSION:
1760*0Sstevel@tonic-gate 	case MD_IOCISOPEN:
1761*0Sstevel@tonic-gate 	case MD_MN_SET_MM_OWNER:
1762*0Sstevel@tonic-gate 	case MD_MN_SET_STATE:
1763*0Sstevel@tonic-gate 	case MD_MN_SUSPEND_WRITES:
1764*0Sstevel@tonic-gate 	case MD_MN_ALLOCATE_HOTSPARE:
1765*0Sstevel@tonic-gate 	case MD_MN_SET_SETFLAGS:
1766*0Sstevel@tonic-gate 	case MD_MN_GET_SETFLAGS:
1767*0Sstevel@tonic-gate 	case MD_MN_MDDB_OPTRECFIX:
1768*0Sstevel@tonic-gate 	case MD_MN_MDDB_PARSE:
1769*0Sstevel@tonic-gate 	case MD_MN_MDDB_BLOCK:
1770*0Sstevel@tonic-gate 	case MD_MN_DB_USERREQ:
1771*0Sstevel@tonic-gate 	case MD_IOC_SPSTATUS:
1772*0Sstevel@tonic-gate 	case MD_MN_COMMD_ERR:
1773*0Sstevel@tonic-gate 	case MD_MN_SET_COMMD_RUNNING:
1774*0Sstevel@tonic-gate 	case MD_MN_RESYNC:
1775*0Sstevel@tonic-gate 	case MD_MN_SETSYNC:
1776*0Sstevel@tonic-gate 	case MD_MN_POKE_HOTSPARES:
1777*0Sstevel@tonic-gate 		return (1);
1778*0Sstevel@tonic-gate 	default:
1779*0Sstevel@tonic-gate 		return (0);
1780*0Sstevel@tonic-gate 	}
1781*0Sstevel@tonic-gate }
1782*0Sstevel@tonic-gate 
1783*0Sstevel@tonic-gate /*
1784*0Sstevel@tonic-gate  * This routine implements the ioctl calls for the Virtual Disk System.
1785*0Sstevel@tonic-gate  * It is called from the device switch at normal priority.
1786*0Sstevel@tonic-gate  */
1787*0Sstevel@tonic-gate /* ARGSUSED */
1788*0Sstevel@tonic-gate static int
1789*0Sstevel@tonic-gate mdioctl(dev_t dev, int cmd, intptr_t data, int mode, cred_t *cred_p,
1790*0Sstevel@tonic-gate 	int *rval_p)
1791*0Sstevel@tonic-gate {
1792*0Sstevel@tonic-gate 	minor_t		mnum = getminor(dev);
1793*0Sstevel@tonic-gate 	mdi_unit_t	*ui;
1794*0Sstevel@tonic-gate 	IOLOCK		lock;
1795*0Sstevel@tonic-gate 	int		err;
1796*0Sstevel@tonic-gate 
1797*0Sstevel@tonic-gate 	/*
1798*0Sstevel@tonic-gate 	 * For multinode disksets  number of ioctls are allowed to be
1799*0Sstevel@tonic-gate 	 * multithreaded.
1800*0Sstevel@tonic-gate 	 * A fundamental assumption made in this implementation is that
1801*0Sstevel@tonic-gate 	 * ioctls either do not interact with other md structures  or the
1802*0Sstevel@tonic-gate 	 * ioctl to the admin device can only occur if the metadevice
1803*0Sstevel@tonic-gate 	 * device is open. i.e. avoid a race between metaclear and the
1804*0Sstevel@tonic-gate 	 * progress of a multithreaded ioctl.
1805*0Sstevel@tonic-gate 	 */
1806*0Sstevel@tonic-gate 
1807*0Sstevel@tonic-gate 	if (!is_mt_ioctl(cmd) && md_ioctl_lock_enter() == EINTR) {
1808*0Sstevel@tonic-gate 		return (EINTR);
1809*0Sstevel@tonic-gate 	}
1810*0Sstevel@tonic-gate 
1811*0Sstevel@tonic-gate 	/*
1812*0Sstevel@tonic-gate 	 * initialize lock tracker
1813*0Sstevel@tonic-gate 	 */
1814*0Sstevel@tonic-gate 	IOLOCK_INIT(&lock);
1815*0Sstevel@tonic-gate 
1816*0Sstevel@tonic-gate 	/* Flag to indicate that MD_GBL_IOCTL_LOCK is not acquired */
1817*0Sstevel@tonic-gate 
1818*0Sstevel@tonic-gate 	if (is_mt_ioctl(cmd)) {
1819*0Sstevel@tonic-gate 		/* increment the md_mtioctl_cnt */
1820*0Sstevel@tonic-gate 		mutex_enter(&md_mx);
1821*0Sstevel@tonic-gate 		md_mtioctl_cnt++;
1822*0Sstevel@tonic-gate 		mutex_exit(&md_mx);
1823*0Sstevel@tonic-gate 		lock.l_flags |= MD_MT_IOCTL;
1824*0Sstevel@tonic-gate 	}
1825*0Sstevel@tonic-gate 
1826*0Sstevel@tonic-gate 	/*
1827*0Sstevel@tonic-gate 	 * this has been added to prevent notification from re-snarfing
1828*0Sstevel@tonic-gate 	 * so metaunload will work.  It may interfere with other modules
1829*0Sstevel@tonic-gate 	 * halt process.
1830*0Sstevel@tonic-gate 	 */
1831*0Sstevel@tonic-gate 	if (md_get_status() & (MD_GBL_HALTED | MD_GBL_DAEMONS_DIE))
1832*0Sstevel@tonic-gate 		return (IOLOCK_RETURN(ENXIO, &lock));
1833*0Sstevel@tonic-gate 
1834*0Sstevel@tonic-gate 	/*
1835*0Sstevel@tonic-gate 	 * admin device ioctls
1836*0Sstevel@tonic-gate 	 */
1837*0Sstevel@tonic-gate 	if (mnum == MD_ADM_MINOR) {
1838*0Sstevel@tonic-gate 		err = md_admin_ioctl(md_expldev(dev), cmd, (void *) data,
1839*0Sstevel@tonic-gate 					mode, &lock);
1840*0Sstevel@tonic-gate 	}
1841*0Sstevel@tonic-gate 
1842*0Sstevel@tonic-gate 	/*
1843*0Sstevel@tonic-gate 	 * metadevice ioctls
1844*0Sstevel@tonic-gate 	 */
1845*0Sstevel@tonic-gate 	else if ((MD_MIN2SET(mnum) >= md_nsets) ||
1846*0Sstevel@tonic-gate 	    (MD_MIN2UNIT(mnum) >= md_nunits) ||
1847*0Sstevel@tonic-gate 	    ((ui = MDI_UNIT(mnum)) == NULL)) {
1848*0Sstevel@tonic-gate 		err = ENXIO;
1849*0Sstevel@tonic-gate 	} else if (md_ops[ui->ui_opsindex]->md_ioctl == NULL) {
1850*0Sstevel@tonic-gate 		err = ENOTTY;
1851*0Sstevel@tonic-gate 	} else {
1852*0Sstevel@tonic-gate 		err = (*md_ops[ui->ui_opsindex]->md_ioctl)
1853*0Sstevel@tonic-gate 		    (dev, cmd, (void *) data, mode, &lock);
1854*0Sstevel@tonic-gate 	}
1855*0Sstevel@tonic-gate 
1856*0Sstevel@tonic-gate 	/*
1857*0Sstevel@tonic-gate 	 * drop any locks we grabbed
1858*0Sstevel@tonic-gate 	 */
1859*0Sstevel@tonic-gate 	return (IOLOCK_RETURN_IOCTLEND(err, &lock));
1860*0Sstevel@tonic-gate }
1861*0Sstevel@tonic-gate 
1862*0Sstevel@tonic-gate static int
1863*0Sstevel@tonic-gate mddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
1864*0Sstevel@tonic-gate {
1865*0Sstevel@tonic-gate 	minor_t		mnum;
1866*0Sstevel@tonic-gate 	set_t		setno;
1867*0Sstevel@tonic-gate 	mdi_unit_t	*ui;
1868*0Sstevel@tonic-gate 
1869*0Sstevel@tonic-gate 	if ((mnum = getminor(dev)) == MD_ADM_MINOR)
1870*0Sstevel@tonic-gate 		return (ENXIO);
1871*0Sstevel@tonic-gate 
1872*0Sstevel@tonic-gate 	setno = MD_MIN2SET(mnum);
1873*0Sstevel@tonic-gate 
1874*0Sstevel@tonic-gate 	if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits) ||
1875*0Sstevel@tonic-gate 	    ((ui = MDI_UNIT(mnum)) == NULL))
1876*0Sstevel@tonic-gate 		return (ENXIO);
1877*0Sstevel@tonic-gate 
1878*0Sstevel@tonic-gate 
1879*0Sstevel@tonic-gate 	if ((md_get_setstatus(setno) & MD_SET_SNARFED) == 0)
1880*0Sstevel@tonic-gate 		return (ENXIO);
1881*0Sstevel@tonic-gate 
1882*0Sstevel@tonic-gate 	if (md_ops[ui->ui_opsindex]->md_dump  != NULL)
1883*0Sstevel@tonic-gate 		return ((*md_ops[ui->ui_opsindex]->md_dump)
1884*0Sstevel@tonic-gate 		    (dev, addr, blkno, nblk));
1885*0Sstevel@tonic-gate 
1886*0Sstevel@tonic-gate 	return (ENXIO);
1887*0Sstevel@tonic-gate }
1888