xref: /onnv-gate/usr/src/uts/common/io/lvm/softpart/sp_ioctl.c (revision 2063:a6ebd483c3cf)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Soft partitioning metadevice driver (md_sp), administrative routines.
30  *
31  * This file contains the administrative routines for the soft partitioning
32  * metadevice driver.  All administration is done through the use of ioctl's.
33  *
34  * The primary ioctl's supported by soft partitions are as follows:
35  *
36  *	MD_IOCSET	- set up a new soft partition.
37  *	MD_IOCGET	- get the unit structure of a soft partition.
38  *	MD_IOCRESET	- delete a soft partition.
39  *	MD_IOCGROW	- add space to a soft partition.
40  *	MD_IOCGETDEVS	- get the device the soft partition is built on.
41  *	MD_IOC_SPSTATUS	- set the status (un_status field in the soft
42  *			  partition unit structure) for one or more soft
43  *			  partitions.
44  *
45  * Note that, as with other metadevices, the majority of the work for
46  * building/growing/deleting soft partitions is performed in userland
47  * (specifically in libmeta, see meta_sp.c).  The driver's main administrative
48  * function is to maintain the in-core & metadb entries associated with a soft
49  * partition.
50  *
51  * In addition, a few other ioctl's are supported via helper routines in
52  * the md driver.  These are:
53  *
54  *	DKIOCINFO	- get "disk" information.
55  *	DKIOCGEOM	- get geometry information.
56  *	DKIOCGVTOC	- get vtoc information.
57  */
58 #include <sys/param.h>
59 #include <sys/systm.h>
60 #include <sys/conf.h>
61 #include <sys/file.h>
62 #include <sys/user.h>
63 #include <sys/uio.h>
64 #include <sys/t_lock.h>
65 #include <sys/buf.h>
66 #include <sys/dkio.h>
67 #include <sys/vtoc.h>
68 #include <sys/kmem.h>
69 #include <vm/page.h>
70 #include <sys/sysmacros.h>
71 #include <sys/types.h>
72 #include <sys/mkdev.h>
73 #include <sys/stat.h>
74 #include <sys/open.h>
75 #include <sys/lvm/mdvar.h>
76 #include <sys/lvm/md_sp.h>
77 #include <sys/lvm/md_notify.h>
78 #include <sys/modctl.h>
79 #include <sys/ddi.h>
80 #include <sys/sunddi.h>
81 #include <sys/debug.h>
82 #include <sys/model.h>
83 
84 #include <sys/sysevent/eventdefs.h>
85 #include <sys/sysevent/svm.h>
86 
87 extern int		md_status;
88 
89 extern unit_t		md_nunits;
90 extern set_t		md_nsets;
91 extern md_set_t		md_set[];
92 
93 extern md_ops_t		sp_md_ops;
94 extern md_krwlock_t	md_unit_array_rw;
95 extern major_t		md_major;
96 
97 /*
98  * FUNCTION:	sp_getun()
99  * INPUT:	mnum	- minor number of soft partition to get.
100  * OUTPUT:	mde	- return error pointer.
101  * RETURNS:	mp_unit_t *	- ptr to unit structure requested
102  *		NULL		- error
103  * PURPOSE:	Returns a reference to the soft partition unit structure
104  *		indicated by the passed-in minor number.
105  */
106 static mp_unit_t *
107 sp_getun(minor_t mnum, md_error_t *mde)
108 {
109 	mp_unit_t	*un;
110 	mdi_unit_t	*ui;
111 	set_t		setno = MD_MIN2SET(mnum);
112 
113 	/* check set */
114 	if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits)) {
115 		(void) mdmderror(mde, MDE_INVAL_UNIT, mnum);
116 		return (NULL);
117 	}
118 
119 	if (md_get_setstatus(setno) & MD_SET_STALE) {
120 		(void) mdmddberror(mde, MDE_DB_STALE, mnum, setno);
121 		return (NULL);
122 	}
123 
124 	ui = MDI_UNIT(mnum);
125 
126 	if (ui == NULL) {
127 		(void) mdmderror(mde, MDE_UNIT_NOT_SETUP, mnum);
128 		return (NULL);
129 	}
130 
131 	un = (mp_unit_t *)MD_UNIT(mnum);
132 
133 	if (un->c.un_type != MD_METASP) {
134 		(void) mdmderror(mde, MDE_NOT_SP, mnum);
135 		return (NULL);
136 	}
137 
138 	return (un);
139 }
140 
141 
142 /*
143  * FUNCTION:	sp_setstatus()
144  * INPUT:	d	- data ptr passed in from ioctl.
145  *		mode	- pass-through to ddi_copyin.
146  *		lockp	- lock ptr.
147  * OUTPUT:	none.
148  * RETURNS:	0		- success.
149  *		non-zero	- error.
150  * PURPOSE:	Set the status of one or more soft partitions atomically.
151  *		this implements the MD_IOC_SPSTATUS ioctl.  Soft partitions
152  *		are passed in as an array of minor numbers.  The un_status
153  *		field in the unit structure of each soft partition is set to
154  *		the status passed in and all unit structures are recommitted
155  *		to the metadb at once.
156  */
157 static int
158 sp_setstatus(void *d, int mode, IOLOCK *lockp)
159 {
160 	minor_t		*minors;
161 	mp_unit_t	*un;
162 	mddb_recid_t	*recids;
163 	int		i, nunits, sz;
164 	int		err = 0;
165 	sp_status_t	status;
166 	md_error_t	*mdep;
167 
168 	md_sp_statusset_t	*msp = (md_sp_statusset_t *)d;
169 
170 	nunits = msp->num_units;
171 	sz = msp->size;
172 	status = msp->new_status;
173 	mdep = &msp->mde;
174 
175 	mdclrerror(mdep);
176 	/* allocate minor number and recids arrays */
177 	minors = kmem_alloc(sz, KM_SLEEP);
178 	recids = kmem_alloc((nunits + 1) * sizeof (mddb_recid_t), KM_SLEEP);
179 
180 	/* copyin minor number array */
181 	if (err = ddi_copyin((void *)(uintptr_t)msp->minors, minors, sz, mode))
182 		goto out;
183 
184 	/* check to make sure all units are valid first */
185 	for (i = 0; i < nunits; i++) {
186 		if ((un = sp_getun(minors[i], mdep)) == NULL) {
187 			err = mdmderror(mdep, MDE_INVAL_UNIT, minors[i]);
188 			goto out;
189 		}
190 	}
191 
192 	/* update state for all units */
193 	for (i = 0; i < nunits; i++) {
194 		un = sp_getun(minors[i], mdep);
195 		(void) md_ioctl_writerlock(lockp, MDI_UNIT(minors[i]));
196 		un->un_status = status;
197 		recids[i] = un->c.un_record_id;
198 		md_ioctl_writerexit(lockp);
199 	}
200 
201 	recids[i] = 0;
202 	mddb_commitrecs_wrapper(recids);
203 
204 out:
205 	kmem_free(minors, sz);
206 	kmem_free(recids, ((nunits + 1) * sizeof (mddb_recid_t)));
207 	return (err);
208 }
209 
210 
211 /*
212  * FUNCTION:	sp_update_watermarks()
213  * INPUT:	d	- data ptr passed in from ioctl.
214  *		mode	- pass-through to ddi_copyin.
215  * OUTPUT:	none.
216  * RETURNS:	0		- success.
217  *		non-zero	- error.
218  * PURPOSE:	This implements the MD_IOC_SPUPDATEWM ioctl.
219  *              Watermarks are passed in an array.
220  */
221 static int
222 sp_update_watermarks(void *d, int mode)
223 {
224 	minor_t			mnum;
225 	set_t			setno;
226 	md_error_t		*mdep;
227 	mp_unit_t		*un;
228 	int			err = 0;
229 	size_t			wsz;
230 	size_t			osz;
231 	mp_watermark_t		*watermarks;
232 	sp_ext_offset_t		*offsets;
233 	md_dev64_t		device;
234 	buf_t			*bp;
235 	int			i;
236 	md_sp_update_wm_t	*mup = (md_sp_update_wm_t *)d;
237 	side_t			side;
238 
239 	mnum = mup->mnum;
240 	setno = MD_MIN2SET(mnum);
241 	side = mddb_getsidenum(setno);
242 	un = MD_UNIT(mnum);
243 
244 	if (un == NULL)
245 		return (EFAULT);
246 
247 	mdep = &mup->mde;
248 
249 	mdclrerror(mdep);
250 
251 	/* Validate the set */
252 	if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits))
253 		return (mdmderror(mdep, MDE_INVAL_UNIT, mnum));
254 	if (md_get_setstatus(setno) & MD_SET_STALE)
255 		return (mdmddberror(mdep, MDE_DB_STALE, mnum, setno));
256 
257 	wsz = mup->count * sizeof (mp_watermark_t);
258 	watermarks = kmem_alloc(wsz, KM_SLEEP);
259 
260 	osz = mup->count * sizeof (sp_ext_offset_t);
261 	offsets = kmem_alloc(osz, KM_SLEEP);
262 
263 	/*
264 	 * Once we're here, we are no longer stateless: we cannot
265 	 * return without first freeing the watermarks and offset
266 	 * arrays we just allocated.  So use the "out" label instead
267 	 * of "return."
268 	 */
269 
270 	/* Retrieve the watermark and offset arrays from user land */
271 
272 	if (ddi_copyin((void *)(uintptr_t)mup->wmp, watermarks, wsz, mode)) {
273 		err = EFAULT;
274 		goto out;
275 	}
276 
277 	if (ddi_copyin((void *)(uintptr_t)mup->osp, offsets, osz, mode)) {
278 		err = EFAULT;
279 		goto out;
280 	}
281 
282 	/*
283 	 * NOTE: For multi-node sets we only commit the watermarks if we are
284 	 * the master node. This avoids an ioctl-within-ioctl deadlock if the
285 	 * underlying device is a mirror.
286 	 */
287 	if (MD_MNSET_SETNO(setno) && !md_set[setno].s_am_i_master) {
288 		goto out;
289 	}
290 
291 	device = un->un_dev;
292 	if ((md_getmajor(device) != md_major) &&
293 	    (md_devid_found(setno, side, un->un_key) == 1)) {
294 		device = md_resolve_bydevid(mnum, device, un->un_key);
295 	}
296 	/*
297 	 * Flag the fact that we're coming from an ioctl handler to the
298 	 * underlying device so that it can take appropriate action if needed.
299 	 * This is necessary for multi-owner mirrors as they may need to
300 	 * update the metadevice state as a result of the layered open.
301 	 */
302 	if (md_layered_open(mnum, &device, MD_OFLG_FROMIOCTL)) {
303 		err = mdcomperror(mdep, MDE_SP_COMP_OPEN_ERR,
304 		    mnum, device);
305 		goto out;
306 	}
307 
308 	bp = kmem_alloc(biosize(), KM_SLEEP);
309 	bioinit(bp);
310 
311 	for (i = 0; i < mup->count; i++) {
312 
313 		/*
314 		 * Even the "constant" fields should be initialized
315 		 * here, since bioreset() below will clear them.
316 		 */
317 		bp->b_flags = B_WRITE;
318 		bp->b_bcount = sizeof (mp_watermark_t);
319 		bp->b_bufsize = sizeof (mp_watermark_t);
320 		bp->b_un.b_addr = (caddr_t)&watermarks[i];
321 		bp->b_lblkno = offsets[i];
322 		bp->b_edev = md_dev64_to_dev(device);
323 
324 		/*
325 		 * For MN sets only:
326 		 * Use a special flag MD_STR_WMUPDATE, for the following case:
327 		 * If the watermarks reside on a mirror disk and a switch
328 		 * of ownership is triggered by this IO,
329 		 * the message that is generated by that request must be
330 		 * processed even if the commd subsystem is currently suspended.
331 		 *
332 		 * For non-MN sets or non-mirror metadevices,
333 		 * this flag has no meaning and is not checked.
334 		 */
335 
336 		md_call_strategy(bp, MD_NOBLOCK | MD_STR_WMUPDATE, NULL);
337 
338 		if (biowait(bp)) {
339 			err = mdmderror(mdep,
340 			    MDE_SP_BADWMWRITE, mnum);
341 			break;
342 		}
343 
344 		/* Get the buf_t ready for the next iteration */
345 		bioreset(bp);
346 	}
347 
348 	biofini(bp);
349 	kmem_free(bp, biosize());
350 
351 	md_layered_close(device, MD_OFLG_NULL);
352 
353 out:
354 	kmem_free(watermarks, wsz);
355 	kmem_free(offsets, osz);
356 
357 	return (err);
358 }
359 
360 
361 /*
362  * FUNCTION:	sp_read_watermark()
363  * INPUT:	d	- data ptr passed in from ioctl.
364  *		mode	- pass-through to ddi_copyin.
365  * OUTPUT:	none.
366  * RETURNS:	0		- success.
367  *		non-zero	- error.
368  * PURPOSE:	This implements the MD_IOC_SPREADWM ioctl.
369  */
370 static int
371 sp_read_watermark(void *d, int mode)
372 {
373 	md_error_t		*mdep;
374 	mp_watermark_t		watermark;
375 	md_dev64_t		device;
376 	buf_t			*bp;
377 	md_sp_read_wm_t		*mrp = (md_sp_read_wm_t *)d;
378 
379 	mdep = &mrp->mde;
380 
381 	mdclrerror(mdep);
382 
383 	device = mrp->rdev;
384 
385 	/*
386 	 * Flag the fact that we are being called from ioctl context so that
387 	 * the underlying device can take any necessary extra steps to handle
388 	 * this scenario.
389 	 */
390 	if (md_layered_open((minor_t)-1, &device, MD_OFLG_FROMIOCTL)) {
391 		return (mdcomperror(mdep, MDE_SP_COMP_OPEN_ERR,
392 		    (minor_t)NODEV, device));
393 	}
394 
395 	bp = kmem_alloc(biosize(), KM_SLEEP);
396 	bioinit(bp);
397 
398 	bp->b_flags = B_READ;
399 	bp->b_bcount = sizeof (mp_watermark_t);
400 	bp->b_bufsize = sizeof (mp_watermark_t);
401 	bp->b_un.b_addr = (caddr_t)&watermark;
402 	bp->b_lblkno = mrp->offset;
403 	bp->b_edev = md_dev64_to_dev(device);
404 
405 	md_call_strategy(bp, MD_NOBLOCK, NULL);
406 
407 	if (biowait(bp)) {
408 		/*
409 		 * Taking advantage of the knowledge that mdmderror()
410 		 * returns 0, so we don't really need to keep track of
411 		 * an error code other than in the error struct.
412 		 */
413 		(void) mdmderror(mdep, MDE_SP_BADWMREAD,
414 		    getminor(device));
415 	}
416 
417 	biofini(bp);
418 	kmem_free(bp, biosize());
419 
420 	md_layered_close(device, MD_OFLG_NULL);
421 
422 	if (ddi_copyout(&watermark, (void *)(uintptr_t)mrp->wmp,
423 	    sizeof (mp_watermark_t), mode)) {
424 		return (EFAULT);
425 	}
426 
427 	return (0);
428 }
429 
430 
431 /*
432  * FUNCTION:	sp_set()
433  * INPUT:	d	- data ptr passed in from ioctl.
434  *		mode	- pass-through to ddi_copyin.
435  * OUTPUT:	none.
436  * RETURNS:	0		- success.
437  *		non-zero	- error.
438  * PURPOSE:	Create a soft partition.  The unit structure representing
439  *		the soft partiton is passed down from userland.  We allocate
440  *		a metadb entry, copyin the unit the structure, handle any
441  *		metadevice parenting issues, then commit the record to the
442  *		metadb.  Once the record is in the metadb, we must also
443  *		build the associated in-core structures.  This is done via
444  *		sp_build_incore() (see sp.c).
445  */
446 static int
447 sp_set(void *d, int mode)
448 {
449 	minor_t		mnum;
450 	mp_unit_t	*un;
451 	void		*rec_addr;
452 	mddb_recid_t	recids[3];
453 	mddb_type_t	rec_type;
454 	int		err;
455 	set_t		setno;
456 	md_error_t	*mdep;
457 	md_unit_t	*child_un;
458 	md_set_params_t *msp = (md_set_params_t *)d;
459 
460 	mnum = msp->mnum;
461 	setno = MD_MIN2SET(mnum);
462 	mdep = &msp->mde;
463 
464 	mdclrerror(mdep);
465 
466 	/* validate set */
467 
468 	if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits))
469 		return (mdmderror(mdep, MDE_INVAL_UNIT, mnum));
470 	if (md_get_setstatus(setno) & MD_SET_STALE)
471 		return (mdmddberror(mdep, MDE_DB_STALE, mnum, setno));
472 
473 	/* get the record type */
474 	rec_type = (mddb_type_t)md_getshared_key(setno,
475 	    sp_md_ops.md_driver.md_drivername);
476 
477 	/* check if there is already a device with this minor number */
478 	un = MD_UNIT(mnum);
479 	if (un != NULL)
480 		return (mdmderror(mdep, MDE_UNIT_ALREADY_SETUP, mnum));
481 
482 	/* create the db record for this soft partition */
483 
484 	if (msp->options & MD_CRO_64BIT) {
485 #if defined(_ILP32)
486 		return (mdmderror(mdep, MDE_UNIT_TOO_LARGE, mnum));
487 #else
488 		recids[0] = mddb_createrec((size_t)msp->size, rec_type, 0,
489 			MD_CRO_64BIT | MD_CRO_SOFTPART | MD_CRO_FN, setno);
490 #endif
491 	} else {
492 		recids[0] = mddb_createrec((size_t)msp->size, rec_type, 0,
493 			MD_CRO_32BIT | MD_CRO_SOFTPART | MD_CRO_FN, setno);
494 	}
495 	/* set initial value for possible child record */
496 	recids[1] = 0;
497 	if (recids[0] < 0)
498 		return (mddbstatus2error(mdep, recids[0], mnum, setno));
499 
500 	/* get the address of the soft partition db record */
501 	rec_addr = (void *) mddb_getrecaddr(recids[0]);
502 
503 	/*
504 	 * at this point we can happily mess with the soft partition
505 	 * db record since we haven't committed it to the metadb yet.
506 	 * if we crash before we commit, the uncommitted record will be
507 	 * automatically purged.
508 	 */
509 
510 	/* copy in the user's soft partition unit struct */
511 	if (err = ddi_copyin((void *)(uintptr_t)msp->mdp,
512 	    rec_addr, (size_t)msp->size, mode)) {
513 		mddb_deleterec_wrapper(recids[0]);
514 		return (EFAULT);
515 	}
516 
517 	/* fill in common unit structure fields which aren't set in userland */
518 	un = (mp_unit_t *)rec_addr;
519 
520 	/* All 64 bit metadevices only support EFI labels. */
521 	if (msp->options & MD_CRO_64BIT) {
522 		un->c.un_flag |= MD_EFILABEL;
523 	}
524 
525 	MD_SID(un) = mnum;
526 	MD_RECID(un) = recids[0];
527 	MD_PARENT(un) = MD_NO_PARENT;
528 	un->c.un_revision |= MD_FN_META_DEV;
529 
530 	/* if we are parenting a metadevice, set our child's parent field */
531 	if (md_getmajor(un->un_dev) == md_major) {
532 		/* it's a metadevice, need to parent it */
533 		child_un = MD_UNIT(md_getminor(un->un_dev));
534 		if (child_un == NULL) {
535 			mddb_deleterec_wrapper(recids[0]);
536 			return (mdmderror(mdep, MDE_INVAL_UNIT,
537 			    md_getminor(un->un_dev)));
538 		}
539 		md_set_parent(un->un_dev, MD_SID(un));
540 
541 		/* set child recid and recids end marker */
542 		recids[1] = MD_RECID(child_un);
543 		recids[2] = 0;
544 	}
545 
546 	/*
547 	 * build the incore structures.
548 	 */
549 	if (err = sp_build_incore(rec_addr, 0)) {
550 		MD_UNIT(mnum) = NULL;
551 		mddb_deleterec_wrapper(recids[0]);
552 		return (err);
553 	}
554 
555 	/*
556 	 * Update unit availability
557 	 */
558 	md_set[setno].s_un_avail--;
559 
560 	/*
561 	 * commit the record.
562 	 * if we had to update a child record, it will get commited
563 	 * as well.
564 	 */
565 	mddb_commitrecs_wrapper(recids);
566 
567 	/* create the mdi_unit struct for this soft partition */
568 	md_create_unit_incore(mnum, &sp_md_ops, 0);
569 
570 	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_CREATE, TAG_METADEVICE, MD_UN2SET(un),
571 	    MD_SID(un));
572 	return (0);
573 }
574 
575 
576 /*
577  * FUNCTION:	sp_get()
578  * INPUT:	d	- data ptr.
579  *		mode	- pass-through to ddi_copyout.
580  *		lock	- lock ptr.
581  * OUTPUT:	none.
582  * RETURNS:	0		- success.
583  *		non-zero	- error.
584  * PURPOSE:	Get the soft partition unit structure specified by the
585  *		minor number.  the in-core unit structure is obtained
586  *		and copied into the md_i_get structure passed down from
587  *		userland.
588  */
589 static int
590 sp_get(void *d, int mode, IOLOCK *lock)
591 {
592 	minor_t		mnum;
593 	mdi_unit_t	*ui;
594 	mp_unit_t	*un;
595 	md_error_t	*mdep;
596 	md_i_get_t	*migp = d;
597 
598 
599 	mnum = migp->id;
600 	mdep = &migp->mde;
601 
602 	mdclrerror(mdep);
603 
604 	/* make sure this is a valid unit structure */
605 	if ((MD_MIN2SET(mnum) >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits))
606 		return (mdmderror(mdep, MDE_INVAL_UNIT, mnum));
607 
608 	/* get the mdi_unit */
609 	if ((ui = MDI_UNIT(mnum)) == NULL) {
610 		return (mdmderror(mdep, MDE_UNIT_NOT_SETUP, mnum));
611 	}
612 
613 	/*
614 	 * md_ioctl_readerlock returns a reference to the in-core
615 	 * unit structure.  this lock will be dropped by
616 	 * md_ioctl_lock_exit() before the ioctl returns.
617 	 */
618 	un = (mp_unit_t *)md_ioctl_readerlock(lock, ui);
619 
620 	/* verify the md_i_get structure */
621 	if (migp->size == 0) {
622 		migp->size = un->c.un_size;
623 		return (0);
624 	}
625 	if (migp->size < un->c.un_size) {
626 		return (EFAULT);
627 	}
628 
629 	/* copyout unit */
630 	if (ddi_copyout(un, (void *)(uintptr_t)migp->mdp,
631 	    un->c.un_size, mode))
632 		return (EFAULT);
633 	return (0);
634 }
635 
636 
637 /*
638  * FUNCTION:	sp_reset()
639  * INPUT:	reset_params	- soft partitioning reset parameters.
640  * OUTPUT:	none.
641  * RETURNS:	0		- success.
642  *		non-zero	- error.
643  * PURPOSE:	Do the setup work needed to delete a soft partition.
644  *		note that the actual removal of both in-core and metadb
645  *		structures is done in the reset_sp() routine (see sp.c).
646  *		In addition, since multiple soft partitions may exist
647  *		on top of a single metadevice, the soft partition reset
648  *		parameters (md_sp_reset_t) contains information about
649  *		how the soft partition should deparent/reparent the
650  *		underlying metadevice.  If the underlying metadevice is
651  *		to be deparented, the new_parent field will be MD_NO_PARENT,
652  *		otherwise it will be contain the minor number of another
653  *		soft partition built on top of the underlying metadevice.
654  */
655 static int
656 sp_reset(md_sp_reset_t *softp)
657 {
658 	minor_t		mnum = softp->mnum;
659 	mdi_unit_t	*ui;
660 	mp_unit_t	*un;
661 	md_unit_t	*child_un;
662 	set_t		setno = MD_MIN2SET(mnum);
663 
664 	mdclrerror(&softp->mde);
665 
666 	/* get the unit structure */
667 	if ((un = sp_getun(mnum, &softp->mde)) == NULL) {
668 		return (mdmderror(&softp->mde, MDE_INVAL_UNIT, mnum));
669 	}
670 
671 	/* don't delete if we have a parent */
672 	if (MD_HAS_PARENT(un->c.un_parent)) {
673 		return (mdmderror(&softp->mde, MDE_IN_USE, mnum));
674 	}
675 
676 	rw_enter(&md_unit_array_rw.lock, RW_WRITER);
677 
678 	ui = MDI_UNIT(mnum);
679 	(void) md_unit_openclose_enter(ui);
680 
681 	/* don't delete if we are currently open */
682 	if (md_unit_isopen(ui)) {
683 		md_unit_openclose_exit(ui);
684 		rw_exit(&md_unit_array_rw.lock);
685 		return (mdmderror(&softp->mde, MDE_IS_OPEN, mnum));
686 	}
687 
688 	md_unit_openclose_exit(ui);
689 
690 	/*
691 	 * if we are built on metadevice, we need to deparent
692 	 * or reparent that metadevice.
693 	 */
694 	if (md_getmajor(un->un_dev) == md_major) {
695 		child_un = MD_UNIT(md_getminor(un->un_dev));
696 		md_set_parent(un->un_dev, softp->new_parent);
697 		mddb_commitrec_wrapper(MD_RECID(child_un));
698 	}
699 	/* remove the soft partition */
700 	reset_sp(un, mnum, 1);
701 
702 	/*
703 	 * Update unit availability
704 	 */
705 	md_set[setno].s_un_avail++;
706 
707 	/*
708 	 * If MN set, reset s_un_next so all nodes can have
709 	 * the same view of the next available slot when
710 	 * nodes are -w and -j
711 	 */
712 	if (MD_MNSET_SETNO(setno)) {
713 		md_upd_set_unnext(setno, MD_MIN2UNIT(mnum));
714 	}
715 
716 	/* release locks and return */
717 out:
718 	rw_exit(&md_unit_array_rw.lock);
719 	return (0);
720 }
721 
722 
723 /*
724  * FUNCTION:	sp_grow()
725  * INPUT:	d	- data ptr.
726  *		mode	- pass-through to ddi_copyin.
727  *		lockp	- lock ptr.
728  * OUTPUT:	none.
729  * RETURNS:	0		- success.
730  *		non-zero	- error.
731  * PURPOSE:	Attach more space to a soft partition.  We are passed in
732  *		a new unit structure with the new extents and other updated
733  *		information.  The new unit structure essentially replaces
734  *		the old unit for this soft partition.  We place the new
735  *		unit into the metadb, delete the old metadb record, and
736  *		then update the in-core unit structure array to point to
737  *		the new unit.
738  */
739 static int
740 sp_grow(void *d, int mode, IOLOCK *lockp)
741 {
742 	minor_t		mnum;
743 	mp_unit_t	*un, *new_un;
744 	mdi_unit_t	*ui;
745 	minor_t		*par = NULL;
746 	IOLOCK		*plock = NULL;
747 	int		i;
748 	mddb_recid_t	recid;
749 	mddb_type_t	rec_type;
750 	mddb_recid_t	old_vtoc = 0;
751 	md_create_rec_option_t options;
752 	int		err;
753 	int		rval = 0;
754 	set_t		setno;
755 	md_error_t	*mdep;
756 	int		npar;
757 	md_grow_params_t *mgp = (md_grow_params_t *)d;
758 
759 	mnum = mgp->mnum;
760 	mdep = &mgp->mde;
761 	setno = MD_MIN2SET(mnum);
762 	npar = mgp->npar;
763 
764 	mdclrerror(mdep);
765 
766 	/* validate set */
767 	if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits))
768 		return (mdmderror(mdep, MDE_INVAL_UNIT, mnum));
769 	if (md_get_setstatus(setno) & MD_SET_STALE)
770 		return (mdmddberror(mdep, MDE_DB_STALE, mnum, setno));
771 
772 	/* make sure this soft partition already exists */
773 	ui = MDI_UNIT(mnum);
774 	if (ui == NULL)
775 		return (mdmderror(mdep, MDE_UNIT_NOT_SETUP, mnum));
776 
777 	/* handle any parents */
778 	if (npar >= 1) {
779 		ASSERT((minor_t *)(uintptr_t)mgp->par != NULL);
780 		par = kmem_alloc(npar * sizeof (*par), KM_SLEEP);
781 		plock = kmem_alloc(npar * sizeof (*plock), KM_SLEEP);
782 		if (ddi_copyin((void *)(uintptr_t)mgp->par, par,
783 		    (npar * sizeof (*par)), mode) != 0) {
784 			kmem_free(par, npar * sizeof (*par));
785 			kmem_free(plock, npar * sizeof (*plock));
786 			return (EFAULT);
787 		}
788 	}
789 
790 	/*
791 	 * handle parent locking.  grab the unit writer lock,
792 	 * then all parent ioctl locks, and then finally our own.
793 	 * parents should be sorted to avoid deadlock.
794 	 */
795 	rw_enter(&md_unit_array_rw.lock, RW_WRITER);
796 	for (i = 0; i < npar; ++i) {
797 		(void) md_ioctl_writerlock(&plock[i],
798 		    MDI_UNIT(par[i]));
799 	}
800 	un = (mp_unit_t *)md_ioctl_writerlock(lockp, ui);
801 
802 	rec_type = (mddb_type_t)md_getshared_key(setno,
803 	    sp_md_ops.md_driver.md_drivername);
804 
805 	/*
806 	 * Preserve the friendly name nature of the unit that is growing.
807 	 */
808 	options = MD_CRO_SOFTPART;
809 	if (un->c.un_revision & MD_FN_META_DEV)
810 		options |= MD_CRO_FN;
811 	if (mgp->options & MD_CRO_64BIT) {
812 #if defined(_ILP32)
813 		rval = mdmderror(mdep, MDE_UNIT_TOO_LARGE, mnum);
814 		goto out;
815 #else
816 		recid = mddb_createrec((size_t)mgp->size, rec_type, 0,
817 				MD_CRO_64BIT | options, setno);
818 #endif
819 	} else {
820 		recid = mddb_createrec((size_t)mgp->size, rec_type, 0,
821 				MD_CRO_32BIT | options, setno);
822 	}
823 	if (recid < 0) {
824 		rval = mddbstatus2error(mdep, (int)recid, mnum, setno);
825 		goto out;
826 	}
827 
828 	/* get the address of the new unit */
829 	new_un = (mp_unit_t *)mddb_getrecaddr(recid);
830 
831 	/* copy in the user's unit struct */
832 	err = ddi_copyin((void *)(uintptr_t)mgp->mdp, new_un,
833 	    (size_t)mgp->size, mode);
834 	if (err) {
835 		mddb_deleterec_wrapper(recid);
836 		rval = EFAULT;
837 		goto out;
838 	}
839 	if (options & MD_CRO_FN)
840 		new_un->c.un_revision |= MD_FN_META_DEV;
841 
842 	/* All 64 bit metadevices only support EFI labels. */
843 	if (mgp->options & MD_CRO_64BIT) {
844 		new_un->c.un_flag |= MD_EFILABEL;
845 		/*
846 		 * If the device was previously smaller than a terabyte,
847 		 * and had a vtoc record attached to it, we remove the
848 		 * vtoc record, because the layout has changed completely.
849 		 */
850 		if (((un->c.un_revision & MD_64BIT_META_DEV) == 0) &&
851 		    (un->c.un_vtoc_id != 0)) {
852 			old_vtoc = un->c.un_vtoc_id;
853 			new_un->c.un_vtoc_id =
854 				md_vtoc_to_efi_record(old_vtoc, setno);
855 		}
856 	}
857 
858 	/* commit new unit struct */
859 	MD_RECID(new_un) = recid;
860 	mddb_commitrec_wrapper(recid);
861 
862 	/*
863 	 * delete old unit struct.
864 	 */
865 	mddb_deleterec_wrapper(MD_RECID(un));
866 	MD_UNIT(mnum) = new_un;
867 	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_GROW, TAG_METADEVICE,
868 	    MD_UN2SET(new_un), MD_SID(new_un));
869 
870 	/*
871 	 * If old_vtoc has a non zero value, we know:
872 	 * - This unit crossed the border from smaller to larger one TB
873 	 * - There was a vtoc record for the unit,
874 	 * - This vtoc record is no longer needed, because
875 	 *   a new efi record has been created for this un.
876 	 */
877 	if (old_vtoc != 0) {
878 		mddb_deleterec_wrapper(old_vtoc);
879 	}
880 
881 	/* release locks, return success */
882 out:
883 	for (i =  npar - 1; (i >= 0); --i)
884 		md_ioctl_writerexit(&plock[i]);
885 	rw_exit(&md_unit_array_rw.lock);
886 	if (plock != NULL)
887 		kmem_free(plock, npar * sizeof (*plock));
888 	if (par != NULL)
889 		kmem_free(par, npar * sizeof (*par));
890 	return (rval);
891 }
892 
893 /*
894  * FUNCTION:	sp_getdevs()
895  * INPUT:	d	- data ptr.
896  *		mode	- pass-through to ddi_copyout.
897  *		lockp	- lock ptr.
898  * OUTPUT:	none.
899  * RETURNS:	0		- success.
900  *		non-zero	- error.
901  * PURPOSE:	Get the device on which the soft partition is built.
902  *		This is simply a matter of copying out the md_dev64_t stored
903  *		in the soft partition unit structure.
904  */
905 static int
906 sp_getdevs(
907 	void			*d,
908 	int			mode,
909 	IOLOCK			*lockp
910 )
911 {
912 	minor_t			mnum;
913 	mdi_unit_t		*ui;
914 	mp_unit_t		*un;
915 	md_error_t		*mdep;
916 	md_dev64_t		*devsp;
917 	md_dev64_t		unit_dev;
918 	md_getdevs_params_t	*mgdp = (md_getdevs_params_t *)d;
919 
920 
921 	mnum = mgdp->mnum;
922 	mdep = &(mgdp->mde);
923 
924 	mdclrerror(mdep);
925 
926 	/* check set */
927 	if ((MD_MIN2SET(mnum) >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits))
928 		return (mdmderror(mdep, MDE_INVAL_UNIT, mnum));
929 	/* check unit */
930 	if ((ui = MDI_UNIT(mnum)) == NULL) {
931 		return (mdmderror(mdep, MDE_UNIT_NOT_SETUP, mnum));
932 	}
933 	/* get unit */
934 	un = (mp_unit_t *)md_ioctl_readerlock(lockp, ui);
935 	devsp = (md_dev64_t *)(uintptr_t)mgdp->devs;
936 
937 	/* only ever 1 device for a soft partition */
938 	if (mgdp->cnt != 0) {
939 		/* do miniroot->target device translation */
940 		unit_dev = un->un_dev;
941 		if (md_getmajor(unit_dev) != md_major) {
942 			if ((unit_dev = md_xlate_mini_2_targ(unit_dev))
943 			    == NODEV64)
944 				return (ENODEV);
945 		}
946 		/* copyout dev information */
947 		if (ddi_copyout(&unit_dev, devsp, sizeof (*devsp), mode) != 0)
948 			return (EFAULT);
949 	}
950 	mgdp->cnt = 1;
951 
952 	return (0);
953 }
954 
955 /*
956  * sp_set_capability:
957  * ------------------
958  * Called to set or clear a capability for a softpart
959  * called by the MD_MN_SET_CAP ioctl.
960  */
961 static int
962 sp_set_capability(md_mn_setcap_params_t *p, IOLOCK *lockp)
963 {
964 	set_t		setno;
965 	mdi_unit_t	*ui;
966 	mp_unit_t	*un;
967 	int		err = 0;
968 
969 	if ((un = sp_getun(p->mnum, &p->mde)) == NULL)
970 		return (EINVAL);
971 
972 	/* This function is only valid for a multi-node set */
973 	setno = MD_MIN2SET(p->mnum);
974 	if (!MD_MNSET_SETNO(setno)) {
975 		return (EINVAL);
976 	}
977 	ui = MDI_UNIT(p->mnum);
978 	(void) md_ioctl_readerlock(lockp, ui);
979 
980 	if (p->sc_set & DKV_ABR_CAP) {
981 		void (*inc_abr_count)();
982 
983 		ui->ui_tstate |= MD_ABR_CAP; /* Set ABR capability */
984 		/* Increment abr count in underlying metadevice */
985 		inc_abr_count = (void(*)())md_get_named_service(un->un_dev,
986 		    0, MD_INC_ABR_COUNT, 0);
987 		if (inc_abr_count != NULL)
988 			(void) (*inc_abr_count)(un->un_dev);
989 	} else {
990 		void (*dec_abr_count)();
991 
992 		ui->ui_tstate &= ~MD_ABR_CAP; /* Clear ABR capability */
993 		/* Decrement abr count in underlying metadevice */
994 		dec_abr_count = (void(*)())md_get_named_service(un->un_dev,
995 		    0, MD_DEC_ABR_COUNT, 0);
996 		if (dec_abr_count != NULL)
997 			(void) (*dec_abr_count)(un->un_dev);
998 	}
999 	if (p->sc_set & DKV_DMR_CAP) {
1000 		ui->ui_tstate |= MD_DMR_CAP; /* Set DMR capability */
1001 	} else {
1002 		ui->ui_tstate &= ~MD_DMR_CAP; /* Clear DMR capability */
1003 	}
1004 	md_ioctl_readerexit(lockp);
1005 	return (err);
1006 }
1007 
1008 
1009 /*
1010  * FUNCTION:	sp_admin_ioctl().
1011  * INPUT:	cmd	- ioctl to be handled.
1012  *		data	- data ptr.
1013  *		mode	- pass-through to copyin/copyout routines.
1014  *		lockp	- lock ptr.
1015  * OUTPUT:	none.
1016  * RETURNS:	0		- success.
1017  *		non-zero	- error.
1018  * PURPOSE:	Handle administrative ioctl's.  Essentially a large
1019  *		switch statement to dispatch the ioctl's to their
1020  *		handlers.  See comment at beginning of file for specifics
1021  *		on which ioctl's are handled.
1022  */
1023 static int
1024 sp_admin_ioctl(int cmd, void *data, int mode, IOLOCK *lockp)
1025 {
1026 	size_t	sz = 0;
1027 	void	*d = NULL;
1028 	int	err = 0;
1029 
1030 	/* We can only handle 32-bit clients for internal commands */
1031 	if ((mode & DATAMODEL_MASK) != DATAMODEL_ILP32) {
1032 		return (EINVAL);
1033 	}
1034 
1035 	/* handle ioctl */
1036 	switch (cmd) {
1037 
1038 	case MD_IOCSET:
1039 	{
1040 		/* create new soft partition */
1041 		if (! (mode & FWRITE))
1042 			return (EACCES);
1043 
1044 		sz = sizeof (md_set_params_t);
1045 
1046 		d = kmem_alloc(sz, KM_SLEEP);
1047 
1048 		if (ddi_copyin(data, d, sz, mode)) {
1049 			err = EFAULT;
1050 			break;
1051 		}
1052 
1053 		err = sp_set(d, mode);
1054 		break;
1055 	}
1056 
1057 	case MD_IOCGET:
1058 	{
1059 		/* get soft partition unit structure */
1060 		if (! (mode & FREAD))
1061 			return (EACCES);
1062 
1063 		sz = sizeof (md_i_get_t);
1064 
1065 		d = kmem_alloc(sz, KM_SLEEP);
1066 
1067 		if (ddi_copyin(data, d, sz, mode)) {
1068 			err = EFAULT;
1069 			break;
1070 		}
1071 
1072 		err = sp_get(d, mode, lockp);
1073 		break;
1074 	}
1075 	case MD_IOCRESET:
1076 	{
1077 		/* delete soft partition */
1078 		if (! (mode & FWRITE))
1079 			return (EACCES);
1080 
1081 		sz = sizeof (md_sp_reset_t);
1082 		d = kmem_alloc(sz, KM_SLEEP);
1083 
1084 		if (ddi_copyin(data, d, sz, mode)) {
1085 			err = EFAULT;
1086 			break;
1087 		}
1088 
1089 		err = sp_reset((md_sp_reset_t *)d);
1090 		break;
1091 	}
1092 
1093 	case MD_IOCGROW:
1094 	{
1095 		/* grow soft partition */
1096 		if (! (mode & FWRITE))
1097 			return (EACCES);
1098 
1099 		sz = sizeof (md_grow_params_t);
1100 		d  = kmem_alloc(sz, KM_SLEEP);
1101 
1102 		if (ddi_copyin(data, d, sz, mode)) {
1103 			err = EFAULT;
1104 			break;
1105 		}
1106 
1107 		err = sp_grow(d, mode, lockp);
1108 		break;
1109 	}
1110 
1111 	case MD_IOCGET_DEVS:
1112 	{
1113 		/* get underlying device */
1114 		if (! (mode & FREAD))
1115 			return (EACCES);
1116 
1117 		sz = sizeof (md_getdevs_params_t);
1118 		d  = kmem_alloc(sz, KM_SLEEP);
1119 
1120 		if (ddi_copyin(data, d, sz, mode)) {
1121 			err = EFAULT;
1122 			break;
1123 		}
1124 
1125 		err = sp_getdevs(d, mode, lockp);
1126 		break;
1127 	}
1128 
1129 	case MD_IOC_SPSTATUS:
1130 	{
1131 		/* set the status field of one or more soft partitions */
1132 		if (! (mode & FWRITE))
1133 			return (EACCES);
1134 
1135 		sz = sizeof (md_sp_statusset_t);
1136 		d  = kmem_alloc(sz, KM_SLEEP);
1137 
1138 		if (ddi_copyin(data, d, sz, mode)) {
1139 			err = EFAULT;
1140 			break;
1141 		}
1142 
1143 		err = sp_setstatus(d, mode, lockp);
1144 		break;
1145 	}
1146 
1147 	case MD_IOC_SPUPDATEWM:
1148 	{
1149 		if (! (mode & FWRITE))
1150 			return (EACCES);
1151 
1152 		sz = sizeof (md_sp_update_wm_t);
1153 		d  = kmem_alloc(sz, KM_SLEEP);
1154 
1155 		if (ddi_copyin(data, d, sz, mode)) {
1156 			err = EFAULT;
1157 			break;
1158 		}
1159 
1160 		err = sp_update_watermarks(d, mode);
1161 		break;
1162 	}
1163 
1164 	case MD_IOC_SPREADWM:
1165 	{
1166 		if (! (mode & FREAD))
1167 			return (EACCES);
1168 
1169 		sz = sizeof (md_sp_read_wm_t);
1170 		d  = kmem_alloc(sz, KM_SLEEP);
1171 
1172 		if (ddi_copyin(data, d, sz, mode)) {
1173 			err = EFAULT;
1174 			break;
1175 		}
1176 
1177 		err = sp_read_watermark(d, mode);
1178 		break;
1179 	}
1180 
1181 	case MD_MN_SET_CAP:
1182 	{
1183 		if (! (mode & FWRITE))
1184 			return (EACCES);
1185 
1186 		sz = sizeof (md_mn_setcap_params_t);
1187 		d  = kmem_alloc(sz, KM_SLEEP);
1188 
1189 		if (ddi_copyin(data, d, sz, mode)) {
1190 			err = EFAULT;
1191 			break;
1192 		}
1193 
1194 		err = sp_set_capability((md_mn_setcap_params_t *)d, lockp);
1195 		break;
1196 	}
1197 
1198 	default:
1199 		return (ENOTTY);
1200 	}
1201 
1202 	/*
1203 	 * copyout and free any args
1204 	 */
1205 	if (sz != 0) {
1206 		if (err == 0) {
1207 			if (ddi_copyout(d, data, sz, mode) != 0) {
1208 				err = EFAULT;
1209 			}
1210 		}
1211 		kmem_free(d, sz);
1212 	}
1213 	return (err);
1214 }
1215 
1216 
1217 /*
1218  * FUNCTION:	md_sp_ioctl()
1219  * INPUT:	dev	- device we are operating on.
1220  *		cmd	- ioctl to be handled.
1221  *		data	- data ptr.
1222  *		mode	- pass-through to copyin/copyout routines.
1223  *		lockp	- lock ptr.
1224  * OUTPUT:	none.
1225  * RETURNS:	0		- success.
1226  *		non-zero	- error.
1227  * PURPOSE:	Dispatch ioctl's.  Administrative ioctl's are handled
1228  *		by sp_admin_ioctl.  All others (see comment at beginning
1229  *		of this file) are handled in-line here.
1230  */
1231 int
1232 md_sp_ioctl(dev_t dev, int cmd, void *data, int mode, IOLOCK *lockp)
1233 {
1234 	minor_t		mnum = getminor(dev);
1235 	mp_unit_t	*un;
1236 	mdi_unit_t	*ui;
1237 	int		err = 0;
1238 
1239 	/* handle admin ioctls */
1240 	if (mnum == MD_ADM_MINOR)
1241 		return (sp_admin_ioctl(cmd, data, mode, lockp));
1242 
1243 	/* check unit */
1244 	if ((MD_MIN2SET(mnum) >= md_nsets) ||
1245 	    (MD_MIN2UNIT(mnum) >= md_nunits) ||
1246 	    ((ui = MDI_UNIT(mnum)) == NULL) ||
1247 	    ((un = MD_UNIT(mnum)) == NULL))
1248 		return (ENXIO);
1249 
1250 	/* is this a supported ioctl? */
1251 	err = md_check_ioctl_against_efi(cmd, un->c.un_flag);
1252 	if (err != 0) {
1253 		return (err);
1254 	}
1255 
1256 
1257 	/* handle ioctl */
1258 	switch (cmd) {
1259 
1260 	case DKIOCINFO:
1261 	{
1262 		/* "disk" info */
1263 		struct dk_cinfo		*p;
1264 
1265 		if (! (mode & FREAD))
1266 			return (EACCES);
1267 
1268 		p = kmem_alloc(sizeof (*p), KM_SLEEP);
1269 
1270 		get_info(p, mnum);
1271 		if (ddi_copyout((caddr_t)p, data, sizeof (*p), mode) != 0)
1272 			err = EFAULT;
1273 
1274 		kmem_free(p, sizeof (*p));
1275 		return (err);
1276 	}
1277 
1278 	case DKIOCGMEDIAINFO:
1279 	{
1280 		struct dk_minfo	p;
1281 
1282 		if (! (mode & FREAD))
1283 			return (EACCES);
1284 
1285 		get_minfo(&p, mnum);
1286 		if (ddi_copyout(&p, data, sizeof (struct dk_minfo), mode) != 0)
1287 			err = EFAULT;
1288 
1289 		return (err);
1290 	}
1291 
1292 	case DKIOCGGEOM:
1293 	{
1294 		/* geometry information */
1295 		struct dk_geom		*p;
1296 
1297 		if (! (mode & FREAD))
1298 			return (EACCES);
1299 
1300 		p = kmem_alloc(sizeof (*p), KM_SLEEP);
1301 
1302 		md_get_geom((md_unit_t *)un, p);
1303 		if (ddi_copyout((caddr_t)p, data, sizeof (*p),
1304 		    mode) != 0)
1305 			err = EFAULT;
1306 
1307 		kmem_free(p, sizeof (*p));
1308 		return (err);
1309 	}
1310 	case DKIOCGAPART:
1311 	{
1312 		struct dk_map	dmp;
1313 
1314 		err = 0;
1315 		md_get_cgapart((md_unit_t *)un, &dmp);
1316 
1317 		if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
1318 			if (ddi_copyout((caddr_t)&dmp, data, sizeof (dmp),
1319 				mode) != 0)
1320 				err = EFAULT;
1321 		}
1322 #ifdef _SYSCALL32
1323 		else {
1324 			struct dk_map32 dmp32;
1325 
1326 			dmp32.dkl_cylno = dmp.dkl_cylno;
1327 			dmp32.dkl_nblk = dmp.dkl_nblk;
1328 
1329 			if (ddi_copyout((caddr_t)&dmp32, data, sizeof (dmp32),
1330 				mode) != 0)
1331 				err = EFAULT;
1332 		}
1333 #endif /* _SYSCALL32 */
1334 
1335 		return (err);
1336 	}
1337 	case DKIOCGVTOC:
1338 	{
1339 		/* vtoc information */
1340 		struct vtoc	vtoc;
1341 
1342 		if (! (mode & FREAD))
1343 			return (EACCES);
1344 
1345 		md_get_vtoc((md_unit_t *)un, &vtoc);
1346 
1347 		if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
1348 			if (ddi_copyout(&vtoc, data, sizeof (vtoc), mode))
1349 				err = EFAULT;
1350 		}
1351 #ifdef _SYSCALL32
1352 		else {
1353 			struct vtoc32 vtoc32;
1354 			vtoctovtoc32(vtoc, vtoc32);
1355 			if (ddi_copyout(&vtoc32, data, sizeof (vtoc32), mode))
1356 				err = EFAULT;
1357 		}
1358 #endif /* _SYSCALL32 */
1359 
1360 		return (err);
1361 	}
1362 
1363 	case DKIOCSVTOC:
1364 	{
1365 		struct vtoc	vtoc;
1366 
1367 		if (! (mode & FWRITE))
1368 			return (EACCES);
1369 
1370 		if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
1371 			if (ddi_copyin(data, &vtoc, sizeof (vtoc), mode)) {
1372 				err = EFAULT;
1373 			}
1374 		}
1375 #ifdef _SYSCALL32
1376 		else {
1377 			struct vtoc32 vtoc32;
1378 			if (ddi_copyin(data, &vtoc32, sizeof (vtoc32), mode)) {
1379 				err = EFAULT;
1380 			} else {
1381 				vtoc32tovtoc(vtoc32, vtoc);
1382 			}
1383 		}
1384 #endif /* _SYSCALL32 */
1385 
1386 		if (err == 0)
1387 			err = md_set_vtoc((md_unit_t *)un, &vtoc);
1388 
1389 		return (err);
1390 	}
1391 
1392 	case DKIOCGETEFI:
1393 	{
1394 		/*
1395 		 * This one can be done centralized,
1396 		 * no need to put in the same code for all types of metadevices
1397 		 */
1398 		return (md_dkiocgetefi(mnum, data, mode));
1399 	}
1400 	case DKIOCSETEFI:
1401 	{
1402 		/*
1403 		 * This one can be done centralized,
1404 		 * no need to put in the same code for all types of metadevices
1405 		 */
1406 		return (md_dkiocsetefi(mnum, data, mode));
1407 	}
1408 
1409 	case DKIOCPARTITION:
1410 	{
1411 		return (md_dkiocpartition(mnum, data, mode));
1412 	}
1413 
1414 	case DKIOCGETVOLCAP:
1415 	{
1416 		/*
1417 		 * Return the supported capabilities for the soft-partition.
1418 		 * We can only support those caps that are provided by the
1419 		 * underlying device.
1420 		 */
1421 
1422 		volcap_t	vc;
1423 
1424 		if (!MD_MNSET_SETNO(MD_MIN2SET(mnum)))
1425 			return (EINVAL);
1426 
1427 		if (! (mode & FREAD))
1428 			return (EACCES);
1429 
1430 		bzero(&vc, sizeof (vc));
1431 
1432 		/* Send ioctl to underlying driver */
1433 
1434 		err = md_call_ioctl(un->un_dev, cmd, &vc, (mode | FKIOCTL),
1435 		    lockp);
1436 
1437 		if (err == 0)
1438 			ui->ui_capab = vc.vc_info;
1439 
1440 		if (ddi_copyout(&vc, data, sizeof (vc), mode))
1441 			err = EFAULT;
1442 
1443 		return (err);
1444 	}
1445 
1446 	case DKIOCSETVOLCAP:
1447 	{
1448 		/*
1449 		 * Enable a supported capability (as returned by DKIOCGETVOLCAP)
1450 		 * Do not pass the request down as we're the top-level device
1451 		 * handler for the application.
1452 		 * If the requested capability is supported (set in ui_capab),
1453 		 * set the corresponding bit in ui_tstate so that we can pass
1454 		 * the appropriate flag when performing i/o.
1455 		 * This request is propagated to all nodes.
1456 		 */
1457 		volcap_t	vc, vc1;
1458 		volcapset_t	volcap = 0;
1459 		void 		(*check_offline)();
1460 		int		offline_status = 0;
1461 
1462 		if (!MD_MNSET_SETNO(MD_MIN2SET(mnum)))
1463 			return (EINVAL);
1464 
1465 		if (! (mode & FWRITE))
1466 			return (EACCES);
1467 
1468 		if (ddi_copyin(data, &vc, sizeof (vc), mode))
1469 			return (EFAULT);
1470 
1471 		/*
1472 		 * Send DKIOCGETVOLCAP to underlying driver to see if
1473 		 * capability supported
1474 		 */
1475 
1476 		vc1.vc_info = 0;
1477 		err = md_call_ioctl(un->un_dev, DKIOCGETVOLCAP, &vc1,
1478 		    (mode | FKIOCTL), lockp);
1479 		if (err != 0)
1480 			return (err);
1481 
1482 		/* Save capabilities */
1483 		ui->ui_capab = vc1.vc_info;
1484 		/*
1485 		 * Error if required capability not supported by underlying
1486 		 * driver
1487 		 */
1488 		if ((vc1.vc_info & vc.vc_set) == 0)
1489 			return (ENOTSUP);
1490 
1491 
1492 		/*
1493 		 * Check if underlying mirror has an offline submirror,
1494 		 * fail if there is on offline submirror
1495 		 */
1496 		check_offline = (void(*)())md_get_named_service(un->un_dev,
1497 		    0, MD_CHECK_OFFLINE, 0);
1498 		if (check_offline != NULL)
1499 			(void) (*check_offline)(un->un_dev, &offline_status);
1500 		if (offline_status)
1501 			return (EINVAL);
1502 
1503 		if (ui->ui_tstate & MD_ABR_CAP)
1504 			volcap |= DKV_ABR_CAP;
1505 
1506 		/* Only send capability message if there is a change */
1507 		if ((vc.vc_set & (DKV_ABR_CAP)) != volcap)
1508 			err = mdmn_send_capability_message(mnum, vc, lockp);
1509 		return (err);
1510 	}
1511 
1512 	case DKIOCDMR:
1513 	{
1514 		/*
1515 		 * Only valid for MN sets. We need to pass it down to the
1516 		 * underlying driver if its a metadevice, after we've modified
1517 		 * the offsets to pick up the correct lower-level device
1518 		 * position.
1519 		 */
1520 		vol_directed_rd_t	*vdr;
1521 #ifdef _MULTI_DATAMODEL
1522 		vol_directed_rd32_t	*vdr32;
1523 #endif	/* _MULTI_DATAMODEL */
1524 
1525 		if (!MD_MNSET_SETNO(MD_MIN2SET(mnum)))
1526 			return (EINVAL);
1527 
1528 		if (! (ui->ui_capab & DKV_DMR_CAP))
1529 			return (EINVAL);
1530 
1531 		vdr = kmem_zalloc(sizeof (vol_directed_rd_t), KM_NOSLEEP);
1532 		if (vdr == NULL)
1533 			return (ENOMEM);
1534 
1535 		/*
1536 		 * Underlying device supports directed mirror read, so update
1537 		 * the user-supplied offset to pick the correct block from the
1538 		 * partitioned metadevice.
1539 		 */
1540 #ifdef _MULTI_DATAMODEL
1541 		vdr32 = kmem_zalloc(sizeof (vol_directed_rd32_t), KM_NOSLEEP);
1542 		if (vdr32 == NULL) {
1543 			kmem_free(vdr, sizeof (vol_directed_rd_t));
1544 			return (ENOMEM);
1545 		}
1546 
1547 		switch (ddi_model_convert_from(mode & FMODELS)) {
1548 		case DDI_MODEL_ILP32:
1549 			if (ddi_copyin(data, vdr32, sizeof (*vdr32), mode)) {
1550 				kmem_free(vdr, sizeof (*vdr));
1551 				return (EFAULT);
1552 			}
1553 			vdr->vdr_flags = vdr32->vdr_flags;
1554 			vdr->vdr_offset = vdr32->vdr_offset;
1555 			vdr->vdr_nbytes = vdr32->vdr_nbytes;
1556 			vdr->vdr_data = (void *)(uintptr_t)vdr32->vdr_data;
1557 			vdr->vdr_side = vdr32->vdr_side;
1558 			break;
1559 
1560 		case DDI_MODEL_NONE:
1561 			if (ddi_copyin(data, vdr, sizeof (*vdr), mode)) {
1562 				kmem_free(vdr32, sizeof (*vdr32));
1563 				kmem_free(vdr, sizeof (*vdr));
1564 				return (EFAULT);
1565 			}
1566 			break;
1567 
1568 		default:
1569 			kmem_free(vdr32, sizeof (*vdr32));
1570 			kmem_free(vdr, sizeof (*vdr));
1571 			return (EFAULT);
1572 		}
1573 #else	/* ! _MULTI_DATAMODEL */
1574 		if (ddi_copyin(data, vdr, sizeof (*vdr), mode)) {
1575 			kmem_free(vdr, sizeof (*vdr));
1576 			return (EFAULT);
1577 		}
1578 #endif	/* _MULTI_DATA_MODEL */
1579 
1580 		err = sp_directed_read(mnum, vdr, mode);
1581 
1582 
1583 #ifdef _MULTI_DATAMODEL
1584 		switch (ddi_model_convert_from(mode & FMODELS)) {
1585 		case DDI_MODEL_ILP32:
1586 			vdr32->vdr_flags = vdr->vdr_flags;
1587 			vdr32->vdr_offset = vdr->vdr_offset;
1588 			vdr32->vdr_side = vdr->vdr_side;
1589 			vdr32->vdr_bytesread = vdr->vdr_bytesread;
1590 			bcopy(vdr->vdr_side_name, vdr32->vdr_side_name,
1591 			    sizeof (vdr32->vdr_side_name));
1592 
1593 			if (ddi_copyout(vdr32, data, sizeof (*vdr32), mode))
1594 				err = EFAULT;
1595 			break;
1596 
1597 		case DDI_MODEL_NONE:
1598 			if (ddi_copyout(&vdr, data, sizeof (vdr), mode))
1599 				err = EFAULT;
1600 			break;
1601 		}
1602 #else	/* ! _MULTI_DATA_MODEL */
1603 		if (ddi_copyout(&vdr, data, sizeof (vdr), mode))
1604 			err = EFAULT;
1605 #endif	/* _MULTI_DATA_MODEL */
1606 
1607 #ifdef _MULTI_DATAMODEL
1608 		kmem_free(vdr32, sizeof (*vdr32));
1609 #endif	/* _MULTI_DATAMODEL */
1610 		kmem_free(vdr, sizeof (*vdr));
1611 
1612 		return (err);
1613 	}
1614 
1615 	}
1616 
1617 	/* Option not handled */
1618 	return (ENOTTY);
1619 }
1620