xref: /onnv-gate/usr/src/uts/common/io/lvm/mirror/mirror_ioctl.c (revision 10948:c686aa11575c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/param.h>
28 #include <sys/systm.h>
29 #include <sys/conf.h>
30 #include <sys/file.h>
31 #include <sys/user.h>
32 #include <sys/uio.h>
33 #include <sys/t_lock.h>
34 #include <sys/buf.h>
35 #include <sys/dkio.h>
36 #include <sys/vtoc.h>
37 #include <sys/kmem.h>
38 #include <vm/page.h>
39 #include <sys/sysmacros.h>
40 #include <sys/types.h>
41 #include <sys/mkdev.h>
42 #include <sys/stat.h>
43 #include <sys/open.h>
44 #include <sys/modctl.h>
45 #include <sys/ddi.h>
46 #include <sys/sunddi.h>
47 
48 #include <sys/lvm/mdvar.h>
49 #include <sys/lvm/md_names.h>
50 #include <sys/lvm/md_mddb.h>
51 #include <sys/lvm/md_stripe.h>
52 #include <sys/lvm/md_mirror.h>
53 
54 #include <sys/model.h>
55 
56 #include <sys/sysevent/eventdefs.h>
57 #include <sys/sysevent/svm.h>
58 #include <sys/lvm/mdmn_commd.h>
59 
60 extern int		md_status;
61 extern kmutex_t		md_mx;
62 extern kcondvar_t	md_cv;
63 
64 extern unit_t		md_nunits;
65 extern set_t		md_nsets;
66 extern md_set_t		md_set[];
67 
68 extern md_ops_t		mirror_md_ops;
69 extern int		md_ioctl_cnt;
70 extern md_krwlock_t	md_unit_array_rw;
71 extern major_t		md_major;
72 extern mdq_anchor_t	md_ff_daemonq;
73 extern void		md_probe_one();
74 extern void		mirror_openfail_console_info();
75 
76 #ifdef DEBUG
77 extern int		mirror_debug_flag;
78 #endif
79 
80 static void
mirror_resume_writes(mm_unit_t * un)81 mirror_resume_writes(mm_unit_t *un)
82 {
83 	/*
84 	 * Release the block on writes to the mirror and resume any blocked
85 	 * resync thread.
86 	 * This is only required for MN sets
87 	 */
88 	if (MD_MNSET_SETNO(MD_UN2SET(un))) {
89 #ifdef DEBUG
90 		if (mirror_debug_flag)
91 			printf("mirror_resume_writes: mnum %x\n", MD_SID(un));
92 #endif
93 		mutex_enter(&un->un_suspend_wr_mx);
94 		un->un_suspend_wr_flag = 0;
95 		cv_broadcast(&un->un_suspend_wr_cv);
96 		mutex_exit(&un->un_suspend_wr_mx);
97 		mutex_enter(&un->un_rs_thread_mx);
98 		un->un_rs_thread_flags &= ~MD_RI_BLOCK;
99 		cv_signal(&un->un_rs_thread_cv);
100 		mutex_exit(&un->un_rs_thread_mx);
101 	}
102 }
103 
104 mm_unit_t *
mirror_getun(minor_t mnum,md_error_t * mde,int flags,IOLOCK * lock)105 mirror_getun(minor_t mnum, md_error_t *mde, int flags, IOLOCK *lock)
106 {
107 	mm_unit_t	*un;
108 	mdi_unit_t	*ui;
109 	set_t		setno = MD_MIN2SET(mnum);
110 
111 	if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits)) {
112 		(void) mdmderror(mde, MDE_INVAL_UNIT, mnum);
113 		return (NULL);
114 	}
115 
116 	if (!(flags & STALE_OK)) {
117 		if (md_get_setstatus(setno) & MD_SET_STALE) {
118 			(void) mdmddberror(mde, MDE_DB_STALE, mnum, setno);
119 			return (NULL);
120 		}
121 	}
122 
123 	ui = MDI_UNIT(mnum);
124 	if (flags & NO_OLD) {
125 		if (ui != NULL) {
126 			(void) mdmderror(mde, MDE_UNIT_ALREADY_SETUP, mnum);
127 			return (NULL);
128 		}
129 		return ((mm_unit_t *)1);
130 	}
131 
132 	if (ui == NULL) {
133 		(void) mdmderror(mde, MDE_UNIT_NOT_SETUP, mnum);
134 		return (NULL);
135 	}
136 
137 	if (flags & ARRAY_WRITER)
138 		md_array_writer(lock);
139 	else if (flags & ARRAY_READER)
140 		md_array_reader(lock);
141 
142 	if (!(flags & NO_LOCK)) {
143 		if (flags & WR_LOCK)
144 			(void) md_ioctl_writerlock(lock, ui);
145 		else /* RD_LOCK */
146 			(void) md_ioctl_readerlock(lock, ui);
147 	}
148 	un = (mm_unit_t *)MD_UNIT(mnum);
149 
150 	if (un->c.un_type != MD_METAMIRROR) {
151 		(void) mdmderror(mde, MDE_NOT_MM, mnum);
152 		return (NULL);
153 	}
154 
155 	return (un);
156 }
157 
158 static int
mirror_set(void * d,int mode)159 mirror_set(
160 	void		*d,
161 	int		mode
162 )
163 {
164 	minor_t		mnum;
165 	mm_unit_t	*un;
166 	mddb_recid_t	recid;
167 	mddb_type_t	typ1;
168 	int		err;
169 	int		i;
170 	set_t		setno;
171 	md_set_params_t	*msp = d;
172 
173 
174 	mnum = msp->mnum;
175 
176 	mdclrerror(&msp->mde);
177 
178 	if (mirror_getun(mnum, &msp->mde, NO_OLD, NULL) == NULL)
179 		return (0);
180 
181 	setno = MD_MIN2SET(mnum);
182 
183 	typ1 = (mddb_type_t)md_getshared_key(setno,
184 	    mirror_md_ops.md_driver.md_drivername);
185 
186 	/*
187 	 * Create the db record for this mdstruct
188 	 * We don't store incore elements ondisk
189 	 */
190 
191 	if (msp->options & MD_CRO_64BIT) {
192 #if defined(_ILP32)
193 		return (mdmderror(&msp->mde, MDE_UNIT_TOO_LARGE, mnum));
194 #else
195 		recid = mddb_createrec((size_t)msp->size, typ1, MIRROR_REC,
196 		    MD_CRO_64BIT | MD_CRO_MIRROR | MD_CRO_FN, setno);
197 #endif
198 	} else {
199 		/*
200 		 * It's important to use the correct size here
201 		 */
202 		msp->size = sizeof (mm_unit32_od_t);
203 		recid = mddb_createrec((size_t)msp->size, typ1, MIRROR_REC,
204 		    MD_CRO_32BIT | MD_CRO_MIRROR | MD_CRO_FN, setno);
205 	}
206 	if (recid < 0)
207 		return (mddbstatus2error(&msp->mde, (int)recid,
208 		    mnum, setno));
209 
210 	/* Resize to include incore fields */
211 	un = (mm_unit_t *)mddb_getrecaddr_resize(recid, sizeof (*un), 0);
212 	/*
213 	 * It is okay that we muck with the mdstruct here,
214 	 * since no one else will know about the mdstruct
215 	 * until we commit it. If we crash, the record will
216 	 * be automatically purged, since we haven't
217 	 * committed it yet.
218 	 */
219 
220 	/* copy in the user's mdstruct */
221 	if (err = ddi_copyin((caddr_t)(uintptr_t)msp->mdp, un,
222 	    (uint_t)msp->size, mode)) {
223 		mddb_deleterec_wrapper(recid);
224 		return (EFAULT);
225 	}
226 	/* All 64 bit metadevices only support EFI labels. */
227 	if (msp->options & MD_CRO_64BIT) {
228 		un->c.un_flag |= MD_EFILABEL;
229 	}
230 
231 	un->c.un_revision |= MD_FN_META_DEV;
232 	MD_RECID(un)	= recid;
233 	MD_CAPAB(un)	= MD_CAN_PARENT | MD_CAN_META_CHILD | MD_CAN_SP;
234 	MD_PARENT(un)	= MD_NO_PARENT;
235 
236 	for (i = 0; i < NMIRROR; i++) {
237 		struct mm_submirror	*sm;
238 
239 		sm = &un->un_sm[i];
240 		if (!SMS_IS(sm, SMS_INUSE))
241 			continue;
242 
243 		/* ensure that the submirror is a metadevice */
244 		if (md_getmajor(sm->sm_dev) != md_major)
245 			return (mdmderror(&msp->mde, MDE_INVAL_UNIT,
246 			    md_getminor(sm->sm_dev)));
247 
248 		if (md_get_parent(sm->sm_dev) == MD_NO_PARENT)
249 			continue;
250 
251 		/* mirror creation should fail here */
252 		md_nblocks_set(mnum, -1ULL);
253 		MD_UNIT(mnum) = NULL;
254 
255 		mddb_deleterec_wrapper(recid);
256 		return (mdmderror(&msp->mde, MDE_IN_USE,
257 		    md_getminor(sm->sm_dev)));
258 	}
259 
260 	if (err = mirror_build_incore(un, 0)) {
261 		md_nblocks_set(mnum, -1ULL);
262 		MD_UNIT(mnum) = NULL;
263 
264 		mddb_deleterec_wrapper(recid);
265 		return (err);
266 	}
267 
268 	/*
269 	 * Update unit availability
270 	 */
271 	md_set[setno].s_un_avail--;
272 
273 	mirror_commit(un, ALL_SUBMIRRORS, 0);
274 	md_create_unit_incore(MD_SID(un), &mirror_md_ops, 0);
275 	mirror_check_failfast(mnum);
276 	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_CREATE, SVM_TAG_METADEVICE, setno,
277 	    MD_SID(un));
278 
279 	resync_start_timeout(setno);
280 	return (0);
281 }
282 
283 static int
mirror_get(void * migp,int mode,IOLOCK * lock)284 mirror_get(
285 	void		*migp,
286 	int		mode,
287 	IOLOCK		*lock
288 )
289 {
290 	mm_unit_t	*un;
291 	md_i_get_t	*migph = migp;
292 
293 	mdclrerror(&migph->mde);
294 
295 	if ((un = mirror_getun(migph->id, &migph->mde, RD_LOCK, lock)) == NULL)
296 		return (0);
297 
298 	if (migph->size == 0) {
299 		migph->size = un->c.un_size;
300 		return (0);
301 	}
302 
303 	if (migph->size < un->c.un_size) {
304 		return (EFAULT);
305 	}
306 	if (ddi_copyout(un, (caddr_t)(uintptr_t)migph->mdp,
307 	    un->c.un_size, mode))
308 		return (EFAULT);
309 	return (0);
310 }
311 
312 static int
mirror_getdevs(void * mgdp,int mode,IOLOCK * lock)313 mirror_getdevs(
314 	void			*mgdp,
315 	int			mode,
316 	IOLOCK			*lock
317 )
318 {
319 	mm_unit_t		*un;
320 	md_dev64_t		*udevs;
321 	int			cnt;
322 	int			i;
323 	md_dev64_t		unit_dev;
324 	md_getdevs_params_t	*mgdph = mgdp;
325 
326 
327 	mdclrerror(&mgdph->mde);
328 
329 	if ((un = mirror_getun(mgdph->mnum,
330 	    &mgdph->mde, RD_LOCK, lock)) == NULL)
331 		return (0);
332 
333 	udevs = (md_dev64_t *)(uintptr_t)mgdph->devs;
334 
335 	for (cnt = 0, i = 0; i < NMIRROR; i++) {
336 		if (!SMS_BY_INDEX_IS(un, i, SMS_INUSE))
337 			continue;
338 		if (cnt < mgdph->cnt) {
339 			unit_dev = un->un_sm[i].sm_dev;
340 			if (md_getmajor(unit_dev) != md_major) {
341 				unit_dev = md_xlate_mini_2_targ(unit_dev);
342 				if (unit_dev == NODEV64)
343 					return (ENODEV);
344 			}
345 
346 			if (ddi_copyout((caddr_t)&unit_dev, (caddr_t)udevs,
347 			    sizeof (*udevs), mode) != 0)
348 				return (EFAULT);
349 			++udevs;
350 		}
351 		++cnt;
352 	}
353 
354 	mgdph->cnt = cnt;
355 	return (0);
356 }
357 
358 static int
mirror_reset(md_i_reset_t * mirp)359 mirror_reset(
360 	md_i_reset_t	*mirp
361 )
362 {
363 	minor_t		mnum = mirp->mnum;
364 	mm_unit_t	*un;
365 	mdi_unit_t	*ui;
366 	set_t		setno = MD_MIN2SET(mnum);
367 
368 	mdclrerror(&mirp->mde);
369 
370 	if ((un = mirror_getun(mnum, &mirp->mde, NO_LOCK, NULL)) == NULL)
371 		return (0);
372 
373 	if (MD_HAS_PARENT(un->c.un_parent)) {
374 		return (mdmderror(&mirp->mde, MDE_IN_USE, mnum));
375 	}
376 
377 	rw_enter(&md_unit_array_rw.lock, RW_WRITER);
378 
379 	/* single thread */
380 	ui = MDI_UNIT(mnum);
381 	(void) md_unit_openclose_enter(ui);
382 
383 	if (md_unit_isopen(ui)) {
384 		md_unit_openclose_exit(ui);
385 		rw_exit(&md_unit_array_rw.lock);
386 		return (mdmderror(&mirp->mde, MDE_IS_OPEN, mnum));
387 	}
388 
389 	md_unit_openclose_exit(ui);
390 
391 	if (!mirp->force) {
392 		int	smi;
393 		for (smi = 0; smi < NMIRROR; smi++) {
394 			if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE))
395 				continue;
396 
397 			if (!SMS_BY_INDEX_IS(un, smi, SMS_RUNNING)) {
398 				rw_exit(&md_unit_array_rw.lock);
399 				return (mdmderror(&mirp->mde,
400 				    MDE_C_WITH_INVAL_SM, mnum));
401 			}
402 		}
403 	}
404 
405 	reset_mirror(un, mnum, 1);
406 
407 	/*
408 	 * Update unit availability
409 	 */
410 	md_set[setno].s_un_avail++;
411 
412 	/*
413 	 * If MN set, reset s_un_next so all nodes can have
414 	 * the same view of the next available slot when
415 	 * nodes are -w and -j
416 	 */
417 	if (MD_MNSET_SETNO(setno)) {
418 		(void) md_upd_set_unnext(setno, MD_MIN2UNIT(mnum));
419 	}
420 
421 	rw_exit(&md_unit_array_rw.lock);
422 	return (0);
423 }
424 
425 static int
mirror_get_geom(mm_unit_t * un,struct dk_geom * geomp)426 mirror_get_geom(
427 	mm_unit_t	*un,
428 	struct dk_geom	*geomp
429 )
430 {
431 	md_get_geom((md_unit_t *)un, geomp);
432 
433 	return (0);
434 }
435 
436 static int
mirror_get_vtoc(mm_unit_t * un,struct vtoc * vtocp)437 mirror_get_vtoc(
438 	mm_unit_t	*un,
439 	struct vtoc	*vtocp
440 )
441 {
442 	md_get_vtoc((md_unit_t *)un, vtocp);
443 
444 	return (0);
445 }
446 
447 static int
mirror_set_vtoc(mm_unit_t * un,struct vtoc * vtocp)448 mirror_set_vtoc(
449 	mm_unit_t	*un,
450 	struct vtoc	*vtocp
451 )
452 {
453 	return (md_set_vtoc((md_unit_t *)un, vtocp));
454 }
455 
456 static int
mirror_get_extvtoc(mm_unit_t * un,struct extvtoc * vtocp)457 mirror_get_extvtoc(
458 	mm_unit_t	*un,
459 	struct extvtoc	*vtocp
460 )
461 {
462 	md_get_extvtoc((md_unit_t *)un, vtocp);
463 
464 	return (0);
465 }
466 
467 static int
mirror_set_extvtoc(mm_unit_t * un,struct extvtoc * vtocp)468 mirror_set_extvtoc(
469 	mm_unit_t	*un,
470 	struct extvtoc	*vtocp
471 )
472 {
473 	return (md_set_extvtoc((md_unit_t *)un, vtocp));
474 }
475 
476 static int
mirror_get_cgapart(mm_unit_t * un,struct dk_map * dkmapp)477 mirror_get_cgapart(
478 	mm_unit_t	*un,
479 	struct dk_map	*dkmapp
480 )
481 {
482 	md_get_cgapart((md_unit_t *)un, dkmapp);
483 	return (0);
484 }
485 
486 static int
mirror_getcomp_by_dev(mm_unit_t * un,replace_params_t * params,int * smi,int * cip)487 mirror_getcomp_by_dev(mm_unit_t *un, replace_params_t *params,
488     int *smi, int *cip)
489 {
490 	mm_submirror_t		*sm;
491 	mm_submirror_ic_t	*smic;
492 	ms_comp_t		*comp;
493 	ms_unit_t		*mous;
494 	int			ci;
495 	int			i;
496 	int			compcnt;
497 	ms_cd_info_t		cd;
498 	void			(*get_dev)();
499 	md_dev64_t		dev = md_expldev(params->old_dev);
500 	md_error_t		*ep = &params->mde;
501 	minor_t			mnum = params->mnum;
502 	mdkey_t			devkey;
503 	int			nkeys;
504 	set_t			setno;
505 	side_t			side;
506 
507 	setno = MD_MIN2SET(MD_SID(un));
508 	side = mddb_getsidenum(setno);
509 
510 	if (md_getkeyfromdev(setno, side, dev, &devkey, &nkeys) != 0)
511 		return (mddeverror(ep, MDE_NAME_SPACE, dev));
512 
513 	for (i = 0; i < NMIRROR; i++) {
514 		sm = &un->un_sm[i];
515 		smic = &un->un_smic[i];
516 
517 		if (!SMS_IS(sm, SMS_INUSE))
518 			continue;
519 
520 		get_dev =
521 		    (void (*)())md_get_named_service(sm->sm_dev, 0,
522 		    "get device", 0);
523 		compcnt = (*(smic->sm_get_component_count))(sm->sm_dev, un);
524 
525 		/*
526 		 * For each of the underlying stripe components get
527 		 * the info.
528 		 */
529 		for (ci = 0; ci < compcnt; ci++) {
530 			(void) (*get_dev)(sm->sm_dev, sm, ci, &cd);
531 			if ((cd.cd_dev == dev) || (cd.cd_orig_dev == dev)) {
532 				*cip = ci;
533 				*smi = i;
534 				return (1);
535 			}
536 		}
537 
538 		/*
539 		 * now we rescan looking only for NODEV. If we find
540 		 * NODEV then we will check the keys to see if its a match.
541 		 *
542 		 * If no key was found to match dev, then there is
543 		 * no way to compare keys - so continue.
544 		 */
545 		if (nkeys == 0) {
546 			continue;
547 		}
548 		mous = MD_UNIT(md_getminor(sm->sm_dev));
549 
550 		for (ci = 0; ci < compcnt; ci++) {
551 
552 			comp = (struct ms_comp *)
553 			    ((void *)&((char *)mous)[mous->un_ocomp]);
554 
555 			(void) (*get_dev)(sm->sm_dev, sm, ci, &cd);
556 
557 			if (cd.cd_dev == NODEV64 || cd.cd_orig_dev == NODEV64) {
558 				comp += ci;
559 				if (comp->un_key == devkey) {
560 					if (nkeys > 1) {
561 						return (mddeverror(
562 						    ep, MDE_MULTNM, dev));
563 					}
564 					*cip = ci;
565 					*smi = i;
566 					return (1);
567 				}
568 			}
569 		}
570 	}
571 	return (mdcomperror(ep, MDE_CANT_FIND_COMP, mnum, dev));
572 }
573 
574 /*
575  * comp_replace:
576  * ----------------
577  * Called to implement the component replace function
578  *
579  * Owner is returned in the parameter block passed in by the caller.
580  *
581  * Returns:
582  *	0	success
583  *	error code if the functions fails
584  *
585  * For a MN set, on entry all writes to the mirror are suspended, on exit
586  * from this function, writes must be resumed when not a dryrun.
587  */
588 static int
comp_replace(replace_params_t * params,IOLOCK * lock)589 comp_replace(
590 	replace_params_t	*params,
591 	IOLOCK			*lock
592 )
593 {
594 	minor_t			mnum = params->mnum;
595 	set_t			setno;
596 	side_t			side;
597 	mm_unit_t		*un;
598 	mdi_unit_t		*ui;
599 	ms_unit_t		*ms_un;
600 	mdi_unit_t		*ms_ui;
601 	ms_comp_t		*comp;
602 	mm_submirror_t		*sm;
603 	md_dev64_t		smdev;
604 	mddb_recid_t		recids[6]; /* recids for stripe on SP */
605 	int			smi, ci;
606 	ms_new_dev_t		nd;
607 	int			(*repl_dev)();
608 	void			(*repl_done)();
609 	void			*repl_data;
610 	int			err = 0;
611 	ms_cd_info_t		cd;
612 	void			(*get_dev)();
613 
614 	mdclrerror(&params->mde);
615 
616 	if ((un = mirror_getun(mnum, &params->mde, WRITERS, lock)) == NULL) {
617 		return (0);
618 	}
619 
620 	ui = MDI_UNIT(mnum);
621 	if (ui->ui_tstate & MD_INACCESSIBLE) {
622 		(void) mdmderror(&params->mde, MDE_IN_UNAVAIL_STATE, mnum);
623 		goto errexit;
624 	}
625 
626 	/*
627 	 * replace cannot be done while a resync is active or we are
628 	 * still waiting for an optimized resync to be started
629 	 */
630 	if (MD_STATUS(un) & (MD_UN_RESYNC_ACTIVE | MD_UN_OPT_NOT_DONE)) {
631 		(void) mdmderror(&params->mde, MDE_RESYNC_ACTIVE, mnum);
632 		goto errexit;
633 	}
634 
635 	if (mirror_getcomp_by_dev(un, params, &smi, &ci) == 0) {
636 		goto errexit;
637 	}
638 
639 	if (un->un_nsm == 1) {
640 		(void) mdmderror(&params->mde, MDE_LAST_SM_RE, mnum);
641 		goto errexit;
642 	}
643 
644 	if (mirror_other_sources(un, smi, ci, 0) != 0) {
645 		(void) mdcomperror(&params->mde, MDE_REPL_INVAL_STATE,
646 		    mnum, md_expldev(params->old_dev));
647 		goto errexit;
648 	}
649 
650 	sm = &un->un_sm[smi];
651 	if (sm->sm_state & (SMS_OFFLINE | SMS_OFFLINE_RESYNC)) {
652 		(void) mdmderror(&params->mde, MDE_ILLEGAL_SM_STATE, mnum);
653 		goto errexit;
654 	}
655 
656 	get_dev = (void (*)())md_get_named_service(sm->sm_dev, 0,
657 	    "get device", 0);
658 	(void) (*get_dev)(sm->sm_dev, sm, ci, &cd);
659 
660 	repl_dev = (int (*)())md_get_named_service(sm->sm_dev, 0,
661 	    "replace device", 0);
662 
663 	smdev = sm->sm_dev;
664 	ms_un = MD_UNIT(md_getminor(smdev));
665 
666 	if (params->cmd == ENABLE_COMP) {
667 		md_dev64_t	this_dev;
668 		int		numkeys;
669 		mdkey_t		this_key;
670 
671 		this_dev = ((cd.cd_orig_dev == 0) ? cd.cd_dev :
672 		    cd.cd_orig_dev);
673 		setno = MD_MIN2SET(md_getminor(smdev));
674 		side = mddb_getsidenum(setno);
675 		comp = (struct ms_comp *)
676 		    ((void *)&((char *)ms_un)[ms_un->un_ocomp]);
677 		comp += ci;
678 		/*
679 		 * We trust the dev_t because we cannot determine the
680 		 * dev_t from the device id since a new disk is in the
681 		 * same location. Since this is a call from metareplace -e dx
682 		 * AND it is SCSI a new dev_t is not generated.  So the
683 		 * dev_t from the mddb is used. Before enabling the device
684 		 * we check to make sure that multiple entries for the same
685 		 * device does not exist in the namespace. If they do we
686 		 * fail the ioctl.
687 		 * One of the many ways multiple entries in the name space
688 		 * can occur is if one removed the failed component in the
689 		 * stripe of a mirror and put another disk that was part of
690 		 * another metadevice. After reboot metadevadm would correctly
691 		 * update the device name for the metadevice whose component
692 		 * has moved. However now in the metadb there are two entries
693 		 * for the same name (ctds) that belong to different
694 		 * metadevices. One is valid, the other is a ghost or "last
695 		 * know as" ctds.
696 		 */
697 		this_dev =  md_getdevnum(setno, side,
698 		    comp->un_key, MD_TRUST_DEVT);
699 
700 		/*
701 		 * Verify that multiple keys for the same
702 		 * dev_t don't exist
703 		 */
704 
705 		if (md_getkeyfromdev(setno, side, this_dev,
706 		    &this_key, &numkeys) != 0) {
707 			(void) mddeverror(&params->mde, MDE_NAME_SPACE,
708 			    md_expldev(params->old_dev));
709 			goto errexit;
710 		}
711 		/*
712 		 * Namespace has multiple entries
713 		 * for the same devt
714 		 */
715 		if (numkeys > 1) {
716 			(void) mddeverror(&params->mde, MDE_MULTNM,
717 			    md_expldev(params->old_dev));
718 			goto errexit;
719 		}
720 		if ((numkeys == 0) || (comp->un_key != this_key)) {
721 			(void) mdcomperror(&params->mde, MDE_CANT_FIND_COMP,
722 			    mnum, this_dev);
723 			goto errexit;
724 		}
725 
726 		if ((md_getmajor(this_dev) != md_major) &&
727 		    (md_devid_found(setno, side, this_key) == 1)) {
728 			if (md_update_namespace_did(setno, side,
729 			    this_key, &params->mde) != 0) {
730 				(void) mddeverror(&params->mde, MDE_NAME_SPACE,
731 				    this_dev);
732 				goto errexit;
733 			}
734 		}
735 
736 		if (md_expldev(params->new_dev) != this_dev) {
737 			(void) mddeverror(&params->mde, MDE_FIX_INVAL_STATE,
738 			    md_expldev(params->new_dev));
739 			goto errexit;
740 		}
741 
742 		/* in case of dryrun, don't actually do anything */
743 		if ((params->options & MDIOCTL_DRYRUN) == 0) {
744 			err = (*repl_dev)(sm->sm_dev, 0, ci, NULL, recids, 6,
745 			    &repl_done, &repl_data);
746 		}
747 	} else if ((params->options & MDIOCTL_DRYRUN) == 0) {
748 		nd.nd_dev = md_expldev(params->new_dev);
749 		nd.nd_key = params->new_key;
750 		nd.nd_start_blk = params->start_blk;
751 		nd.nd_nblks = params->number_blks;
752 		nd.nd_labeled = params->has_label;
753 		nd.nd_hs_id = 0;
754 
755 		err = (*repl_dev)(sm->sm_dev, 0, ci, &nd, recids, 6,
756 		    &repl_done, &repl_data);
757 
758 	}
759 
760 	if (err != 0) {
761 		(void) mdcomperror(&params->mde, err, mnum,
762 		    md_expldev(params->new_dev));
763 		goto errexit;
764 	}
765 	/* In case of a dryun we're done. */
766 	if (params->options & MDIOCTL_DRYRUN) {
767 		mdclrerror(&params->mde);
768 		return (0);
769 	}
770 
771 	/* set_sm_comp_state() commits the modified records */
772 	set_sm_comp_state(un, smi, ci, CS_RESYNC, recids, MD_STATE_NO_XMIT,
773 	    lock);
774 
775 	(*repl_done)(sm->sm_dev, repl_data);
776 
777 	/*
778 	 * If the mirror is open then need to make sure that the submirror,
779 	 * on which the replace ran, is also open and if not then open it.
780 	 * This is only a concern for a single component sub-mirror stripe
781 	 * as it may not be open due to the failure of the single component.
782 	 *
783 	 * This check has to be done after the call to (*repl_done)
784 	 * as that function releases the writer lock on the submirror.
785 	 */
786 	if (md_unit_isopen(ui)) {
787 		minor_t ms_mnum = md_getminor(sm->sm_dev);
788 
789 		ms_ui = MDI_UNIT(ms_mnum);
790 
791 		if (!md_unit_isopen(ms_ui)) {
792 			/*
793 			 * Underlying submirror is not open so open it.
794 			 */
795 			if (md_layered_open(ms_mnum, &smdev, MD_OFLG_NULL)) {
796 				mirror_openfail_console_info(un, smi, ci);
797 				goto errexit;
798 			}
799 		}
800 	}
801 
802 	mirror_check_failfast(mnum);
803 
804 	if (params->cmd == ENABLE_COMP) {
805 		SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ENABLE, SVM_TAG_METADEVICE,
806 		    MD_UN2SET(un), MD_SID(un));
807 	} else {
808 		SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REPLACE, SVM_TAG_METADEVICE,
809 		    MD_UN2SET(un), MD_SID(un));
810 	}
811 
812 	md_ioctl_writerexit(lock);
813 	/*
814 	 * Reset any saved resync location flags as we've now replaced the
815 	 * component. This means we have to resync the _whole_ component.
816 	 */
817 	un->un_rs_resync_done = un->un_rs_resync_2_do = 0;
818 	un->un_rs_type = MD_RS_NONE;
819 	mirror_resume_writes(un);
820 	if (!MD_MNSET_SETNO(MD_UN2SET(un)))
821 		(void) mirror_resync_unit(mnum, NULL, &params->mde, lock);
822 	mdclrerror(&params->mde);
823 	return (0);
824 errexit:
825 	/* We need to resume writes unless this is a dryrun */
826 	if (!(params->options & MDIOCTL_DRYRUN))
827 		mirror_resume_writes(un);
828 	return (0);
829 }
830 
831 /*
832  * mirror_attach:
833  * ----------------
834  * Called to implement the submirror attach function
835  *
836  * Owner is returned in the parameter block passed in by the caller.
837  *
838  * Returns:
839  *	0	success
840  *	error code if the functions fails
841  *
842  * For a MN set, on entry all writes to the mirror are suspended, on exit
843  * from this function, writes must be resumed when not a dryrun.
844  */
845 static int
mirror_attach(md_att_struct_t * att,IOLOCK * lock)846 mirror_attach(
847 	md_att_struct_t	*att,
848 	IOLOCK		*lock
849 )
850 {
851 	minor_t			mnum = att->mnum;
852 	mm_unit_t		*un;
853 	md_unit_t		*su;
854 	mm_submirror_t		*sm;
855 	mm_submirror_ic_t	*smic;
856 	int			smi;
857 	md_dev64_t		sm_dev;
858 	minor_t			sm_mnum;
859 	mdkey_t			indx;
860 	set_t			setno;
861 	uint_t			options;
862 
863 	/*
864 	 * This routine should not be called during upgrade.
865 	 */
866 	if (MD_UPGRADE)  {
867 		return (0);
868 	}
869 
870 	mdclrerror(&att->mde);
871 	options = att->options;
872 
873 	if ((un = mirror_getun(mnum, &att->mde, WRITERS, lock)) == NULL) {
874 		return (0);
875 	}
876 
877 	setno = MD_UN2SET(un);
878 
879 	for (smi = 0; smi < NMIRROR; smi++)
880 		if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE))
881 			break;
882 
883 	if (smi == NMIRROR) {
884 		(void) mdmderror(&att->mde, MDE_MIRROR_FULL, mnum);
885 		goto errexit;
886 	}
887 
888 	sm = &un->un_sm[smi];
889 	smic = &un->un_smic[smi];
890 	sm_dev = att->submirror;
891 	sm_mnum = md_getminor(sm_dev);
892 
893 	if (md_get_parent(sm_dev) != MD_NO_PARENT) {
894 		(void) mdmderror(&att->mde, MDE_IN_USE, sm_mnum);
895 		goto errexit;
896 	}
897 
898 	if (md_unit_isopen(MDI_UNIT(sm_mnum))) {
899 		(void) mdmderror(&att->mde, MDE_IS_OPEN, sm_mnum);
900 		goto errexit;
901 	}
902 
903 	/* Check the size */
904 	su = (md_unit_t *)MD_UNIT(sm_mnum);
905 	if (un->c.un_total_blocks > su->c.un_total_blocks) {
906 		(void) mdmderror(&att->mde, MDE_SM_TOO_SMALL, sm_mnum);
907 		goto errexit;
908 	}
909 
910 	/* Don't attach labeled sm to unlabeled mirrors */
911 	if ((su->c.un_flag & MD_LABELED) && !(un->c.un_flag & MD_LABELED)) {
912 		(void) mdmderror(&att->mde, MDE_NO_LABELED_SM, sm_mnum);
913 		goto errexit;
914 	}
915 
916 	indx = md_setshared_name(setno,
917 	    ddi_major_to_name(md_getmajor(sm_dev)), 0L);
918 
919 	/* Open the sm, only if the mirror is open */
920 	if (md_unit_isopen(MDI_UNIT(mnum))) {
921 		if (md_layered_open(mnum, &sm_dev, MD_OFLG_NULL)) {
922 			(void) md_remshared_name(setno, indx);
923 			(void) mdmderror(&att->mde, MDE_SM_OPEN_ERR,
924 			    md_getminor(att->submirror));
925 			goto errexit;
926 		}
927 		/* in dryrun mode, don't leave the device open */
928 		if (options & MDIOCTL_DRYRUN) {
929 			md_layered_close(sm_dev, MD_OFLG_NULL);
930 		}
931 	}
932 
933 	/*
934 	 * After this point the checks are done and action is taken.
935 	 * So, clean up and return in case of dryrun.
936 	 */
937 
938 	if (options & MDIOCTL_DRYRUN) {
939 		md_ioctl_writerexit(lock);
940 		mdclrerror(&att->mde);
941 		return (0);
942 	}
943 
944 	sm->sm_key = att->key;
945 	sm->sm_dev = sm_dev;
946 	md_set_parent(sm_dev, MD_SID(un));
947 	mirror_set_sm_state(sm, smic, SMS_ATTACHED_RESYNC, 1);
948 	build_submirror(un, smi, 0);
949 	un->un_nsm++;
950 	mirror_commit(un, SMI2BIT(smi), 0);
951 	mirror_check_failfast(mnum);
952 	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_ATTACH, SVM_TAG_METADEVICE,
953 	    MD_UN2SET(un), MD_SID(un));
954 
955 	mirror_resume_writes(un);
956 	md_ioctl_writerexit(lock);
957 	if (!MD_MNSET_SETNO(setno))
958 		(void) mirror_resync_unit(mnum, NULL, &att->mde, lock);
959 	mdclrerror(&att->mde);
960 	return (0);
961 errexit:
962 	/* We need to resume writes unless this is a dryrun */
963 	if (!(options & MDIOCTL_DRYRUN))
964 		mirror_resume_writes(un);
965 	return (0);
966 }
967 
968 
969 void
reset_comp_states(mm_submirror_t * sm,mm_submirror_ic_t * smic)970 reset_comp_states(mm_submirror_t *sm, mm_submirror_ic_t *smic)
971 {
972 	int		compcnt;
973 	int		i;
974 	md_m_shared_t	*shared;
975 
976 	compcnt = (*(smic->sm_get_component_count)) (sm->sm_dev, sm);
977 	for (i = 0; i < compcnt; i++) {
978 		shared = (md_m_shared_t *)(*(smic->sm_shared_by_indx))
979 		    (sm->sm_dev, sm, i);
980 
981 		shared->ms_state = CS_OKAY;
982 		shared->ms_flags &= ~MDM_S_NOWRITE;
983 		shared->ms_lasterrcnt = 0;
984 	}
985 }
986 
987 
988 /*
989  * mirror_detach:
990  * ----------------
991  * Called to implement the submirror detach function
992  *
993  * Owner is returned in the parameter block passed in by the caller.
994  *
995  * Returns:
996  *	0	success
997  *	error code if the functions fails
998  *
999  * For a MN set, on entry all writes to the mirror are suspended, on exit
1000  * from this function, writes must be resumed.
1001  */
1002 static int
mirror_detach(md_detach_params_t * det,IOLOCK * lock)1003 mirror_detach(
1004 	md_detach_params_t	*det,
1005 	IOLOCK			*lock
1006 )
1007 {
1008 	minor_t			mnum = det->mnum;
1009 	mm_unit_t		*un;
1010 	mdi_unit_t		*ui;
1011 	mm_submirror_t		*sm;
1012 	mm_submirror_t		*old_sm;
1013 	mm_submirror_t		*new_sm;
1014 	mm_submirror_ic_t	*smic;
1015 	int			smi;
1016 	md_dev64_t		sm_dev;
1017 	md_unit_t		*su;
1018 	sv_dev_t		sv;
1019 	mddb_recid_t		recids[2];
1020 	int			nsv = 0;
1021 	int			smi_remove;
1022 	mm_submirror_ic_t	*old_smic;
1023 	mm_submirror_ic_t	*new_smic;
1024 
1025 	mdclrerror(&det->mde);
1026 
1027 	if ((un = mirror_getun(mnum, &det->mde, WRITERS, lock)) == NULL) {
1028 		return (0);
1029 	}
1030 
1031 	ui = MDI_UNIT(mnum);
1032 	if (ui->ui_tstate & MD_INACCESSIBLE) {
1033 		mirror_resume_writes(un);
1034 		return (mdmderror(&det->mde, MDE_IN_UNAVAIL_STATE, mnum));
1035 	}
1036 	/*
1037 	 * detach cannot be done while a resync is active or we are
1038 	 * still waiting for an optimized resync to be started
1039 	 */
1040 	if (MD_STATUS(un) & (MD_UN_RESYNC_ACTIVE | MD_UN_OPT_NOT_DONE)) {
1041 		mirror_resume_writes(un);
1042 		return (mdmderror(&det->mde, MDE_RESYNC_ACTIVE, mnum));
1043 	}
1044 
1045 	for (smi = 0; smi < NMIRROR; smi++) {
1046 		if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) {
1047 			continue;
1048 		}
1049 		if (un->un_sm[smi].sm_dev == det->submirror) {
1050 			smi_remove = smi;
1051 			break;
1052 		}
1053 	}
1054 
1055 	if (smi == NMIRROR) {
1056 		mirror_resume_writes(un);
1057 		return (mdmderror(&det->mde, MDE_CANT_FIND_SM, mnum));
1058 	}
1059 
1060 	if (un->un_nsm == 1) {
1061 		mirror_resume_writes(un);
1062 		return (mdmderror(&det->mde, MDE_LAST_SM, mnum));
1063 	}
1064 
1065 	if (mirror_other_sources(un, smi, WHOLE_SM, 0) != 0) {
1066 		mirror_resume_writes(un);
1067 		return (mdmderror(&det->mde, MDE_NO_READABLE_SM, mnum));
1068 	}
1069 
1070 	sm = &un->un_sm[smi];
1071 	smic = &un->un_smic[smi];
1072 	sm_dev = sm->sm_dev;
1073 	su = (md_unit_t *)MD_UNIT(md_getminor(sm_dev));
1074 
1075 	/*
1076 	 * Need to pass in the extra record id,
1077 	 * cause mirror_commit() will not commit
1078 	 * a sm (from the smmask) if the slot is unused.
1079 	 * Which it is, since we are detaching.
1080 	 */
1081 	recids[0] = ((md_unit_t *)MD_UNIT(md_getminor(sm_dev)))->c.un_record_id;
1082 	recids[1] = 0;
1083 
1084 	mirror_set_sm_state(sm, smic, SMS_UNUSED, det->force_detach);
1085 	/*
1086 	 * If there are any erred components
1087 	 * then make the detach fail and do not unparent the
1088 	 * submirror.
1089 	 */
1090 	if (sm->sm_state == SMS_UNUSED) {
1091 		/* reallow soft partitioning of submirror */
1092 		MD_CAPAB(su) |= MD_CAN_SP;
1093 		md_reset_parent(sm_dev);
1094 		reset_comp_states(sm, smic);
1095 		un->un_nsm--;
1096 		/* Close the sm, only if the mirror is open */
1097 		if (md_unit_isopen(MDI_UNIT(mnum)))
1098 			md_layered_close(sm_dev, MD_OFLG_NULL);
1099 		sv.setno = MD_UN2SET(un);
1100 		sv.key = sm->sm_key;
1101 		nsv = 1;
1102 	} else
1103 		(void) mdmderror(&det->mde, MDE_SM_FAILED_COMPS, mnum);
1104 
1105 	/*
1106 	 * Perhaps the mirror changed it's size due to this detach.
1107 	 * (void) mirror_grow_unit(un, &mde);
1108 	 */
1109 
1110 	/*
1111 	 * NOTE: We are passing the detached sm recid
1112 	 * and not the smmask field. This is correct.
1113 	 */
1114 	mirror_commit(un, 0, recids);
1115 	md_rem_names(&sv, nsv);
1116 	if (sm->sm_state == SMS_UNUSED) {
1117 		SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DETACH, SVM_TAG_METADEVICE,
1118 		    MD_UN2SET(un), MD_SID(un));
1119 	}
1120 
1121 	/*
1122 	 * Reshuffle the submirror devices in the array as we potentially
1123 	 * have a dead record in the middle of it.
1124 	 */
1125 	for (smi = 0; nsv && (smi < NMIRROR); smi++) {
1126 		if (smi < smi_remove) {
1127 			continue;
1128 		}
1129 		if (smi > smi_remove) {
1130 			old_sm = &un->un_sm[smi];
1131 			new_sm = &un->un_sm[smi - 1];
1132 			new_sm->sm_key = old_sm->sm_key;
1133 			new_sm->sm_dev = old_sm->sm_dev;
1134 			new_sm->sm_state = old_sm->sm_state;
1135 			new_sm->sm_flags = old_sm->sm_flags;
1136 			new_sm->sm_shared = old_sm->sm_shared;
1137 			new_sm->sm_hsp_id = old_sm->sm_hsp_id;
1138 			new_sm->sm_timestamp = old_sm->sm_timestamp;
1139 			bzero(old_sm, sizeof (mm_submirror_t));
1140 			old_smic = &un->un_smic[smi];
1141 			new_smic = &un->un_smic[smi - 1];
1142 			bcopy(old_smic, new_smic, sizeof (mm_submirror_ic_t));
1143 			bzero(old_smic, sizeof (mm_submirror_ic_t));
1144 		}
1145 	}
1146 	mirror_commit(un, 0, NULL);
1147 	mirror_resume_writes(un);
1148 	return (0);
1149 }
1150 
1151 /*
1152  * mirror_offline:
1153  * ----------------
1154  * Called to implement the submirror offline function
1155  *
1156  * Owner is returned in the parameter block passed in by the caller.
1157  *
1158  * Returns:
1159  *	0	success
1160  *	error code if the functions fails
1161  *
1162  * For a MN set, on entry all writes to the mirror are suspended, on exit
1163  * from this function, writes must be resumed.
1164  */
1165 static int
mirror_offline(md_i_off_on_t * miop,IOLOCK * lock)1166 mirror_offline(
1167 	md_i_off_on_t	*miop,
1168 	IOLOCK		*lock
1169 )
1170 {
1171 	minor_t			mnum = miop->mnum;
1172 	mm_unit_t		*un;
1173 	mm_submirror_t		*sm;
1174 	mm_submirror_ic_t	*smic;
1175 	int			smi;
1176 	mdi_unit_t		*ui = MDI_UNIT(mnum);
1177 
1178 	mdclrerror(&miop->mde);
1179 
1180 	if ((un = mirror_getun(mnum, &miop->mde, WR_LOCK, lock)) == NULL) {
1181 		return (0);
1182 	}
1183 
1184 	/*
1185 	 * offline cannot be done while a resync is active or we are
1186 	 * still waiting for an optimized resync to be started
1187 	 */
1188 	if (MD_STATUS(un) & (MD_UN_RESYNC_ACTIVE | MD_UN_OPT_NOT_DONE)) {
1189 		mirror_resume_writes(un);
1190 		return (mdmderror(&miop->mde, MDE_RESYNC_ACTIVE, mnum));
1191 	}
1192 
1193 	/*
1194 	 * Reject mirror_offline if ABR is set
1195 	 */
1196 	if ((ui->ui_tstate & MD_ABR_CAP) || un->un_abr_count) {
1197 		mirror_resume_writes(un);
1198 		return (mderror(&miop->mde, MDE_ABR_SET));
1199 	}
1200 
1201 	for (smi = 0; smi < NMIRROR; smi++) {
1202 		if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE))
1203 			continue;
1204 		if (un->un_sm[smi].sm_dev == miop->submirror)
1205 			break;
1206 	}
1207 
1208 	if (smi == NMIRROR) {
1209 		mirror_resume_writes(un);
1210 		return (mdmderror(&miop->mde, MDE_CANT_FIND_SM, mnum));
1211 	}
1212 
1213 	sm = &un->un_sm[smi];
1214 	smic = &un->un_smic[smi];
1215 	if (!SMS_IS(sm, SMS_RUNNING) && !miop->force_offline) {
1216 		mirror_resume_writes(un);
1217 		return (mdmderror(&miop->mde, MDE_ILLEGAL_SM_STATE, mnum));
1218 	}
1219 
1220 	if (mirror_other_sources(un, smi, WHOLE_SM, 0) != 0) {
1221 		mirror_resume_writes(un);
1222 		return (mdmderror(&miop->mde, MDE_NO_READABLE_SM, mnum));
1223 	}
1224 	mirror_set_sm_state(sm, smic, SMS_OFFLINE, 1);
1225 	mirror_resume_writes(un);
1226 
1227 	MD_STATUS(un) |= MD_UN_OFFLINE_SM;
1228 	mirror_commit(un, NO_SUBMIRRORS, 0);
1229 	SE_NOTIFY(EC_SVM_STATE, ESC_SVM_OFFLINE, SVM_TAG_METADEVICE,
1230 	    MD_UN2SET(un), MD_SID(un));
1231 	return (0);
1232 }
1233 
1234 /*
1235  * mirror_online:
1236  * ----------------
1237  * Called to implement the submirror online function
1238  *
1239  * Owner is returned in the parameter block passed in by the caller.
1240  *
1241  * Returns:
1242  *	0	success
1243  *	error code if the functions fails
1244  *
1245  * For a MN set, on entry all writes to the mirror are suspended, on exit
1246  * from this function, writes must be resumed.
1247  */
1248 static int
mirror_online(md_i_off_on_t * miop,IOLOCK * lock)1249 mirror_online(
1250 	md_i_off_on_t	*miop,
1251 	IOLOCK		*lock
1252 )
1253 {
1254 	minor_t			mnum = miop->mnum;
1255 	mm_unit_t		*un;
1256 	mm_submirror_t		*sm;
1257 	mm_submirror_ic_t	*smic;
1258 	int			smi;
1259 	set_t			setno = MD_MIN2SET(mnum);
1260 
1261 	mdclrerror(&miop->mde);
1262 
1263 	if ((un = mirror_getun(mnum, &miop->mde, WR_LOCK, lock)) == NULL) {
1264 		return (0);
1265 	}
1266 
1267 	for (smi = 0; smi < NMIRROR; smi++) {
1268 		if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE))
1269 			continue;
1270 		if (un->un_sm[smi].sm_dev == miop->submirror)
1271 			break;
1272 	}
1273 	if (smi == NMIRROR) {
1274 		mirror_resume_writes(un);
1275 		return (mdmderror(&miop->mde, MDE_CANT_FIND_SM, mnum));
1276 	}
1277 
1278 	sm = &un->un_sm[smi];
1279 	smic = &un->un_smic[smi];
1280 	if (!SMS_IS(sm, SMS_OFFLINE)) {
1281 		mirror_resume_writes(un);
1282 		return (mdmderror(&miop->mde, MDE_ILLEGAL_SM_STATE, mnum));
1283 	}
1284 
1285 	/*
1286 	 * online cannot be done while a resync is active or we are
1287 	 * still waiting for an optimized resync to be started
1288 	 */
1289 	if (MD_STATUS(un) & (MD_UN_RESYNC_ACTIVE | MD_UN_OPT_NOT_DONE)) {
1290 		mirror_resume_writes(un);
1291 		return (mdmderror(&miop->mde, MDE_RESYNC_ACTIVE, mnum));
1292 	}
1293 
1294 	mirror_set_sm_state(sm, smic, SMS_OFFLINE_RESYNC, 1);
1295 	mirror_commit(un, NO_SUBMIRRORS, 0);
1296 	mirror_check_failfast(mnum);
1297 	SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ONLINE, SVM_TAG_METADEVICE,
1298 	    MD_UN2SET(un), MD_SID(un));
1299 
1300 
1301 	/* for MN sets, re-read the resync record from disk */
1302 	if (MD_MNSET_SETNO(MD_UN2SET(un)))
1303 		(void) mddb_reread_rr(setno, un->un_rr_dirty_recid);
1304 
1305 	bcopy((caddr_t)un->un_dirty_bm, (caddr_t)un->un_resync_bm,
1306 	    howmany(un->un_rrd_num, NBBY));
1307 	MD_STATUS(un) |= MD_UN_OPT_NOT_DONE;
1308 	sm->sm_flags |= MD_SM_RESYNC_TARGET;
1309 	mirror_resume_writes(un);
1310 	md_ioctl_writerexit(lock);
1311 	if (!MD_MNSET_SETNO(setno))
1312 		return (mirror_resync_unit(mnum, NULL, &miop->mde, lock));
1313 	else return (0);
1314 }
1315 
1316 int
mirror_grow_unit(mm_unit_t * un,md_error_t * ep)1317 mirror_grow_unit(
1318 	mm_unit_t		*un,
1319 	md_error_t		*ep
1320 )
1321 {
1322 	md_unit_t		*su;
1323 	mm_submirror_t		*sm;
1324 	int			smi;
1325 	diskaddr_t		total_blocks;
1326 	diskaddr_t		current_tb;
1327 	int			spc;		/* sectors per head */
1328 	minor_t			mnum = MD_SID(un);
1329 
1330 	/*
1331 	 * grow_unit cannot be done while a resync is active or we are
1332 	 * still waiting for an optimized resync to be started. Set
1333 	 * flag to indicate GROW_PENDING and once the resync is complete
1334 	 * the grow_unit function will be executed.
1335 	 */
1336 	if (MD_STATUS(un) & (MD_UN_RESYNC_ACTIVE | MD_UN_OPT_NOT_DONE)) {
1337 		MD_STATUS(un) |= MD_UN_GROW_PENDING;
1338 		mirror_commit(un, NO_SUBMIRRORS, 0);
1339 		return (mdmderror(ep, MDE_GROW_DELAYED, MD_SID(un)));
1340 	}
1341 
1342 	/*
1343 	 * Find the smallest submirror
1344 	 */
1345 	total_blocks = 0;
1346 	for (smi = 0; smi < NMIRROR; smi++) {
1347 		if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE))
1348 			continue;
1349 		sm = &un->un_sm[smi];
1350 		/*
1351 		 * Growth is not possible if there is one or more
1352 		 * submirrors made up of non-Metadevices.
1353 		 */
1354 		if (md_getmajor(sm->sm_dev) != md_major)
1355 			return (0);
1356 
1357 		su = MD_UNIT(md_getminor(sm->sm_dev));
1358 		if ((total_blocks == 0) ||
1359 		    (su->c.un_total_blocks < total_blocks))
1360 			total_blocks = su->c.un_total_blocks;
1361 	}
1362 
1363 	/*
1364 	 * If the smallest submirror is not larger
1365 	 * than the mirror, we are all done.
1366 	 */
1367 	if (total_blocks <= un->c.un_total_blocks)
1368 		return (0);
1369 
1370 	/*
1371 	 * Growing the mirror now.
1372 	 * First: Round down the actual_tb to be a multiple
1373 	 * 	of nheads * nsects.
1374 	 */
1375 	spc = un->c.un_nhead * un->c.un_nsect;
1376 	current_tb = (total_blocks/spc) * spc;
1377 
1378 	un->c.un_total_blocks = current_tb;
1379 	md_nblocks_set(mnum, un->c.un_total_blocks);
1380 	un->c.un_actual_tb = total_blocks;
1381 
1382 	/* Is the mirror growing from 32 bit device to 64 bit device? */
1383 	if (((un->c.un_revision & MD_64BIT_META_DEV) == 0) &&
1384 	    (un->c.un_total_blocks > MD_MAX_BLKS_FOR_SMALL_DEVS)) {
1385 #if defined(_ILP32)
1386 		return (mdmderror(ep, MDE_UNIT_TOO_LARGE, mnum));
1387 #else
1388 		mddb_type_t	typ1;
1389 		mddb_recid_t	recid;
1390 		set_t		setno;
1391 		mddb_recid_t	old_recid = un->c.un_record_id;
1392 		mddb_recid_t	old_vtoc;
1393 		mddb_de_ic_t    *dep, *old_dep;
1394 		md_create_rec_option_t	options;
1395 
1396 		/* yup, new device size. So we need to replace the record */
1397 		typ1 = (mddb_type_t)md_getshared_key(MD_UN2SET(un),
1398 		    mirror_md_ops.md_driver.md_drivername);
1399 		setno = MD_MIN2SET(mnum);
1400 
1401 		/* Preserve the friendly name properties of growing unit */
1402 		options = MD_CRO_64BIT | MD_CRO_MIRROR;
1403 		if (un->c.un_revision & MD_FN_META_DEV)
1404 			options |= MD_CRO_FN;
1405 		recid = mddb_createrec(offsetof(mm_unit_t, un_smic), typ1,
1406 		    MIRROR_REC, options, setno);
1407 		/* Resize to include incore fields */
1408 		un->c.un_revision |= MD_64BIT_META_DEV;
1409 		/* All 64 bit metadevices only support EFI labels. */
1410 		un->c.un_flag |= MD_EFILABEL;
1411 		/*
1412 		 * If the device had a vtoc record attached to it, we remove
1413 		 * the vtoc record, because the layout has changed completely.
1414 		 */
1415 		old_vtoc = un->c.un_vtoc_id;
1416 		if (old_vtoc != 0) {
1417 			un->c.un_vtoc_id =
1418 			    md_vtoc_to_efi_record(old_vtoc, setno);
1419 		}
1420 		MD_RECID(un) = recid;
1421 		dep = mddb_getrecdep(recid);
1422 		old_dep = mddb_getrecdep(old_recid);
1423 		kmem_free(dep->de_rb_userdata, dep->de_reqsize);
1424 		dep->de_rb_userdata = old_dep->de_rb_userdata;
1425 		dep->de_reqsize = old_dep->de_reqsize;
1426 		dep->de_rb_userdata_ic = old_dep->de_rb_userdata_ic;
1427 		dep->de_icreqsize = old_dep->de_icreqsize;
1428 		mirror_commit(un, NO_SUBMIRRORS, 0);
1429 		old_dep->de_rb_userdata = NULL;
1430 		old_dep->de_rb_userdata_ic = NULL;
1431 		mddb_deleterec_wrapper(old_recid);
1432 		/*
1433 		 * If there was a vtoc record, it is no longer needed, because
1434 		 * a new efi record has been created for this un.
1435 		 */
1436 		if (old_vtoc != 0) {
1437 			mddb_deleterec_wrapper(old_vtoc);
1438 		}
1439 #endif
1440 	}
1441 
1442 	if ((current_tb/un->un_rrd_blksize) > MD_MAX_NUM_RR) {
1443 		if (mirror_resize_resync_regions(un, current_tb)) {
1444 			return (mdmderror(ep, MDE_RR_ALLOC_ERROR, MD_SID(un)));
1445 		}
1446 		mirror_check_failfast(mnum);
1447 		SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_GROW, SVM_TAG_METADEVICE,
1448 		    MD_UN2SET(un), MD_SID(un));
1449 		return (0);
1450 	}
1451 
1452 	if (mirror_add_resync_regions(un, current_tb)) {
1453 		return (mdmderror(ep, MDE_RR_ALLOC_ERROR, MD_SID(un)));
1454 	}
1455 
1456 	mirror_check_failfast(mnum);
1457 	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_GROW, SVM_TAG_METADEVICE,
1458 	    MD_UN2SET(un), MD_SID(un));
1459 
1460 	return (0);
1461 }
1462 
1463 static int
mirror_grow(void * mgp,IOLOCK * lock)1464 mirror_grow(
1465 	void			*mgp,
1466 	IOLOCK			*lock
1467 )
1468 {
1469 	mm_unit_t		*un;
1470 	md_grow_params_t	*mgph = mgp;
1471 
1472 	mdclrerror(&mgph->mde);
1473 
1474 	if ((un = mirror_getun(mgph->mnum,
1475 	    &mgph->mde, WR_LOCK, lock)) == NULL)
1476 		return (0);
1477 
1478 	if (MD_STATUS(un) & MD_UN_GROW_PENDING)
1479 		return (0);
1480 
1481 	return (mirror_grow_unit(un, &mgph->mde));
1482 }
1483 
1484 static int
mirror_change(md_mirror_params_t * mmp,IOLOCK * lock)1485 mirror_change(
1486 	md_mirror_params_t	*mmp,
1487 	IOLOCK			*lock
1488 )
1489 {
1490 	mm_params_t		*pp = &mmp->params;
1491 	mm_unit_t		*un;
1492 
1493 	mdclrerror(&mmp->mde);
1494 
1495 	if ((un = mirror_getun(mmp->mnum, &mmp->mde, WR_LOCK, lock)) == NULL)
1496 		return (0);
1497 
1498 	if (pp->change_read_option)
1499 		un->un_read_option = pp->read_option;
1500 
1501 	if (pp->change_write_option)
1502 		un->un_write_option = pp->write_option;
1503 
1504 	if (pp->change_pass_num)
1505 		un->un_pass_num = pp->pass_num;
1506 
1507 	mirror_commit(un, NO_SUBMIRRORS, 0);
1508 
1509 	SE_NOTIFY(EC_SVM_STATE, ESC_SVM_CHANGE, SVM_TAG_METADEVICE,
1510 	    MD_UN2SET(un), MD_SID(un));
1511 	return (0);
1512 }
1513 
1514 static int
mirror_get_resync(md_resync_ioctl_t * ri)1515 mirror_get_resync(
1516 	md_resync_ioctl_t	*ri
1517 )
1518 {
1519 	minor_t			mnum = ri->ri_mnum;
1520 	mm_unit_t		*un;
1521 	u_longlong_t		percent;
1522 	uint_t			cnt;
1523 	uint_t			rr;
1524 	diskaddr_t		d;
1525 
1526 	mdclrerror(&ri->mde);
1527 
1528 	if ((un = mirror_getun(mnum, &ri->mde, STALE_OK|NO_LOCK, NULL)) == NULL)
1529 		return (0);
1530 
1531 	ri->ri_flags = 0;
1532 	if (md_get_setstatus(MD_MIN2SET(mnum)) & MD_SET_STALE) {
1533 		ri->ri_percent_done = 0;
1534 		ri->ri_percent_dirty = 0;
1535 		return (0);
1536 	}
1537 
1538 	if (MD_STATUS(un) & (MD_UN_RESYNC_ACTIVE|MD_UN_RESYNC_CANCEL)) {
1539 		if (MD_STATUS(un) & MD_UN_RESYNC_ACTIVE)
1540 			ri->ri_flags |= MD_RI_INPROGRESS;
1541 		/* Return state of resync thread */
1542 		ri->ri_flags |= (un->un_rs_thread_flags & MD_RI_BLOCK);
1543 		d = un->un_rs_resync_2_do;
1544 		if (d) {
1545 			percent = un->un_rs_resync_done;
1546 			if (un->c.un_total_blocks >
1547 			    MD_MAX_BLKS_FOR_SMALL_DEVS) {
1548 				percent *= 1000;
1549 				percent /= d;
1550 				if (percent > 1000)
1551 					percent = 1000;
1552 			} else {
1553 				percent *= 100;
1554 				percent /= d;
1555 			}
1556 			ri->ri_percent_done = (int)percent;
1557 		} else {
1558 			ri->ri_percent_done = 0;
1559 		}
1560 	}
1561 	if (un->un_nsm < 2) {
1562 		ri->ri_percent_dirty = 0;
1563 		return (0);
1564 	}
1565 	cnt = 0;
1566 	for (rr = 0; rr < un->un_rrd_num; rr++)
1567 		if (IS_REGION_DIRTY(rr, un))
1568 			cnt++;
1569 	d = un->un_rrd_num;
1570 	if (d) {
1571 		percent = cnt;
1572 		percent *= 100;
1573 		percent += d - 1;		/* round up */
1574 		percent /= d;
1575 	} else
1576 		percent = 0;
1577 	ri->ri_percent_dirty = (int)percent;
1578 	return (0);
1579 }
1580 
1581 /*
1582  * mirror_get_owner:
1583  * ----------------
1584  * Called to obtain the current owner of a mirror.
1585  *
1586  * Owner is returned in the parameter block passed in by the caller.
1587  *
1588  * Returns:
1589  *	0	success
1590  *	EINVAL	metadevice does not exist or is not a member of a multi-owned
1591  *		set.
1592  */
1593 static int
mirror_get_owner(md_set_mmown_params_t * p,IOLOCK * lock)1594 mirror_get_owner(md_set_mmown_params_t *p, IOLOCK *lock)
1595 {
1596 	mm_unit_t	*un;
1597 	set_t		setno;
1598 
1599 	if ((un = mirror_getun(p->d.mnum, &p->mde, RD_LOCK, lock)) == NULL)
1600 		return (EINVAL);
1601 
1602 	setno = MD_UN2SET(un);
1603 	if (!MD_MNSET_SETNO(setno)) {
1604 		return (EINVAL);
1605 	}
1606 	p->d.owner = un->un_mirror_owner;
1607 	return (0);
1608 }
1609 
1610 /*
1611  * mirror_choose_owner_thread:
1612  * --------------------------
1613  * Called to send a CHOOSE_OWNER message to the commd running on the master
1614  * node. This needs to run in a separate context so that mutex livelock is
1615  * avoided. This can occur because the original request is issued from a call
1616  * to metaioctl() which acquires the global ioctl lock, calls down into the
1617  * mirror_ioctl code and then attempts to mdmn_ksend_message() to the master
1618  * node. As the handler for the choose_owner message needs to send another
1619  * ioctl through the metaioctl() entry point, any other use (by rpc.metad or
1620  * mdcommd checking on set ownership) will deadlock the system leading to
1621  * cluster reconfiguration timeouts and eventually a node or (at worst) a
1622  * cluster-wide panic
1623  */
1624 static void
mirror_choose_owner_thread(md_mn_msg_chooseid_t * msg)1625 mirror_choose_owner_thread(md_mn_msg_chooseid_t	*msg)
1626 {
1627 	int		rval;
1628 	md_mn_kresult_t	*kres;
1629 	set_t		setno = MD_MIN2SET(msg->msg_chooseid_mnum);
1630 
1631 	kres = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP);
1632 	rval = mdmn_ksend_message(setno, MD_MN_MSG_CHOOSE_OWNER,
1633 	    MD_MSGF_NO_BCAST | MD_MSGF_NO_LOG, 0, (char *)msg,
1634 	    sizeof (md_mn_msg_chooseid_t), kres);
1635 	if (!MDMN_KSEND_MSG_OK(rval, kres)) {
1636 		mdmn_ksend_show_error(rval, kres, "CHOOSE OWNER");
1637 		cmn_err(CE_WARN, "ksend_message failure: CHOOSE_OWNER");
1638 	}
1639 
1640 	kmem_free(kres, sizeof (md_mn_kresult_t));
1641 	kmem_free(msg, sizeof (md_mn_msg_chooseid_t));
1642 	thread_exit();
1643 }
1644 
1645 /*
1646  * mirror_owner_thread:
1647  * -------------------
1648  * Called to request an ownership change from a thread context. This issues
1649  * a mdmn_ksend_message() and then completes the appropriate ownership change
1650  * on successful completion of the message transport.
1651  * The originating application must poll for completion on the 'flags' member
1652  * of the MD_MN_MM_OWNER_STATUS ioctl() parameter block.
1653  * Success is marked by a return value of MD_MN_MM_RES_OK, Failure by
1654  * MD_MN_MM_RES_FAIL
1655  */
1656 static void
mirror_owner_thread(md_mn_req_owner_t * ownp)1657 mirror_owner_thread(md_mn_req_owner_t *ownp)
1658 {
1659 	int		rval;
1660 	set_t		setno = MD_MIN2SET(ownp->mnum);
1661 	mm_unit_t	*un = MD_UNIT(ownp->mnum);
1662 	md_mn_kresult_t	*kresult;
1663 	md_mps_t	*ps1;
1664 
1665 	un->un_mirror_owner_status = 0;
1666 
1667 	mutex_enter(&un->un_owner_mx);
1668 	un->un_owner_state |= MM_MN_OWNER_SENT;
1669 	mutex_exit(&un->un_owner_mx);
1670 
1671 	kresult = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP);
1672 	rval = mdmn_ksend_message(setno, MD_MN_MSG_REQUIRE_OWNER,
1673 	    MD_MSGF_NO_LOG, 0, (char *)ownp, sizeof (md_mn_req_owner_t),
1674 	    kresult);
1675 
1676 	if (!MDMN_KSEND_MSG_OK(rval, kresult)) {
1677 		/*
1678 		 * Message transport layer failed. Return the failure code to
1679 		 * the application.
1680 		 */
1681 		mdmn_ksend_show_error(rval, kresult, "CHANGE OWNER");
1682 		mutex_enter(&un->un_owner_mx);
1683 		un->un_owner_state &= ~(MM_MN_BECOME_OWNER|MM_MN_OWNER_SENT);
1684 		mutex_exit(&un->un_owner_mx);
1685 		un->un_mirror_owner_status =
1686 		    MD_MN_MM_RESULT | MD_MN_MM_RES_FAIL;
1687 	} else {
1688 		/*
1689 		 * Ownership change succeeded. Update in-core version of
1690 		 * mirror owner.
1691 		 */
1692 		mutex_enter(&un->un_owner_mx);
1693 		if (un->un_owner_state & MM_MN_BECOME_OWNER) {
1694 			un->un_mirror_owner = md_mn_mynode_id;
1695 			/* Sets node owner of un_rr_dirty record */
1696 			if (un->un_rr_dirty_recid)
1697 				(void) mddb_setowner(un->un_rr_dirty_recid,
1698 				    md_mn_mynode_id);
1699 			/*
1700 			 * Release the block on the current resync region if it
1701 			 * is blocked
1702 			 */
1703 			ps1 = un->un_rs_prev_overlap;
1704 			if ((ps1 != NULL) &&
1705 			    (ps1->ps_flags & MD_MPS_ON_OVERLAP))
1706 				mirror_overlap_tree_remove(ps1);
1707 		}
1708 
1709 		un->un_owner_state &= ~(MM_MN_OWNER_SENT|MM_MN_BECOME_OWNER);
1710 		mutex_exit(&un->un_owner_mx);
1711 		un->un_mirror_owner_status =
1712 		    MD_MN_MM_RESULT | MD_MN_MM_RES_OK;
1713 
1714 		/* Restart the resync thread if it was previously blocked */
1715 		if (un->un_rs_thread_flags & MD_RI_BLOCK_OWNER) {
1716 			mutex_enter(&un->un_rs_thread_mx);
1717 			un->un_rs_thread_flags &= ~MD_RI_BLOCK_OWNER;
1718 			cv_signal(&un->un_rs_thread_cv);
1719 			mutex_exit(&un->un_rs_thread_mx);
1720 		}
1721 	}
1722 	kmem_free(kresult, sizeof (md_mn_kresult_t));
1723 	kmem_free(ownp, sizeof (md_mn_req_owner_t));
1724 	thread_exit();
1725 }
1726 
1727 /*
1728  * mirror_set_owner:
1729  * ----------------
1730  * Called to change the owner of a mirror to the specified node. If we
1731  * are not the owner of the mirror, we do nothing apart from update the in-core
1732  * ownership. It can also be used to choose a new owner for the resync of a
1733  * mirror, this case is specified by the flag MD_MN_MM_CHOOSE_OWNER, see below.
1734  *
1735  * The p->d.flags bitfield controls how subsequent ownership changes will be
1736  * handled:
1737  *	MD_MN_MM_SPAWN_THREAD
1738  *		a separate thread is created which emulates the behaviour of
1739  *		become_owner() [mirror.c]. This is needed when changing the
1740  *		ownership from user context as there needs to be a controlling
1741  *		kernel thread which updates the owner info on the originating
1742  *		node. Successful completion of the mdmn_ksend_message() means
1743  *		that the owner field can be changed.
1744  *
1745  *	MD_MN_MM_PREVENT_CHANGE
1746  *		Disallow any change of ownership once this ownership change has
1747  *		been processed. The only way of changing the owner away from
1748  *		the p->d.owner node specified in the call is to issue a request
1749  *		with MD_MN_MM_ALLOW_CHANGE set in the flags. Any request to
1750  *		become owner from a different node while the PREVENT_CHANGE
1751  *		is in operation will result in an EAGAIN return value.
1752  *		un->un_owner_state has MM_MN_PREVENT_CHANGE set.
1753  *
1754  *	MD_MN_MM_ALLOW_CHANGE
1755  *		Allow the owner to be changed by a subsequent request.
1756  *		un->un_owner_state has MM_MN_PREVENT_CHANGE cleared.
1757  *
1758  *	MD_MN_MM_CHOOSE_OWNER
1759  *		Choose a new owner for a mirror resync. In this case, the new
1760  *		owner argument is not used. The selection of a new owner
1761  *		is a round robin allocation using a resync owner count. This
1762  *		ioctl passes this value in a message to the master node
1763  *		which uses it to select a node from the node list and then
1764  *		sends it a message to become the owner.
1765  *
1766  * If we are the current owner, we must stop further i/o from being scheduled
1767  * and wait for any pending i/o to drain. We wait for any in-progress resync
1768  * bitmap updates to complete and we can then set the owner. If an update to
1769  * the resync bitmap is attempted after this we simply don't write this out to
1770  * disk until the ownership is restored.
1771  *
1772  * If we are the node that wants to become the owner we update the in-core
1773  * owner and return. The i/o that initiated the ownership change will complete
1774  * on successful return from this ioctl.
1775  *
1776  * Return Value:
1777  *	0		Success
1778  * 	EINVAL		Invalid unit referenced
1779  *	EAGAIN		Ownership couldn't be transferred away or change of
1780  *			ownership is prevented. Caller should retry later on.
1781  */
1782 static int
mirror_set_owner(md_set_mmown_params_t * p,IOLOCK * lock)1783 mirror_set_owner(md_set_mmown_params_t *p, IOLOCK *lock)
1784 {
1785 	mdi_unit_t	*ui;
1786 	mm_unit_t	*un;
1787 	set_t		setno;
1788 
1789 	if ((un = mirror_getun(p->d.mnum, &p->mde, RD_LOCK, lock)) == NULL)
1790 		return (EINVAL);
1791 	ui = MDI_UNIT(p->d.mnum);
1792 	setno = MD_MIN2SET(p->d.mnum);
1793 	if (!MD_MNSET_SETNO(setno)) {
1794 		return (EINVAL);
1795 	}
1796 
1797 	/*
1798 	 * If we are choosing a new resync owner, send a message to the master
1799 	 * to make the choice.
1800 	 */
1801 	if (p->d.flags & MD_MN_MM_CHOOSE_OWNER) {
1802 		/* Release ioctl lock before we call ksend_message() */
1803 		md_ioctl_readerexit(lock);
1804 		/* If we're resetting the owner pass the node id in */
1805 		if (p->d.owner != MD_MN_MIRROR_UNOWNED) {
1806 			return (mirror_choose_owner(un, &p->d));
1807 		} else {
1808 			return (mirror_choose_owner(un, NULL));
1809 		}
1810 	}
1811 
1812 	/*
1813 	 * Check for whether we have to spawn a thread to issue this request.
1814 	 * If set we issue a mdmn_ksend_message() to cause the appropriate
1815 	 * ownership change. On completion of this request the calling
1816 	 * application _must_ poll the structure 'flags' field to determine the
1817 	 * result of the request. All this is necessary until we have true
1818 	 * multi-entrant ioctl support.
1819 	 * If we are just clearing the owner, then MD_MN_MM_SPAWN_THREAD can
1820 	 * be ignored.
1821 	 */
1822 	if ((p->d.flags & MD_MN_MM_SPAWN_THREAD) && (p->d.owner != 0)) {
1823 		md_mn_req_owner_t	*ownp;
1824 		ownp = kmem_zalloc(sizeof (md_mn_req_owner_t), KM_SLEEP);
1825 		p->d.flags &= ~MD_MN_MM_SPAWN_THREAD;
1826 		bcopy(&p->d, ownp, sizeof (md_mn_req_owner_t));
1827 		if (thread_create(NULL, 0, mirror_owner_thread, (caddr_t)ownp,
1828 		    0, &p0, TS_RUN, 60) == NULL) {
1829 			kmem_free(ownp, sizeof (md_mn_req_owner_t));
1830 			return (EFAULT);
1831 		} else {
1832 			return (0);
1833 		}
1834 	}
1835 
1836 	/*
1837 	 * If setting owner to NULL, this is being done because the owner has
1838 	 * died and therefore we set OPT_NOT_DONE to ensure that the
1839 	 * mirror is marked as "Needs Maintenance" and that an optimized
1840 	 * resync will be done when we resync the mirror, Also clear the
1841 	 * PREVENT_CHANGE flag and remove the last resync region from the
1842 	 * overlap tree.
1843 	 */
1844 	if (p->d.owner == 0) {
1845 		md_mps_t	*ps;
1846 		int		i;
1847 
1848 		md_ioctl_readerexit(lock);
1849 		un = md_ioctl_writerlock(lock, ui);
1850 		/*
1851 		 * If the ABR capability is not set and the pass_num is non-zero
1852 		 * there is need to perform an optimized resync
1853 		 * Therefore set OPT_NOT_DONE, setup the resync_bm and set
1854 		 * the submirrors as resync targets.
1855 		 */
1856 		if (!(ui->ui_tstate & MD_ABR_CAP) && un->un_pass_num) {
1857 			MD_STATUS(un) |= MD_UN_OPT_NOT_DONE;
1858 
1859 			(void) mddb_reread_rr(setno, un->un_rr_dirty_recid);
1860 			bcopy((caddr_t)un->un_dirty_bm,
1861 			    (caddr_t)un->un_resync_bm,
1862 			    howmany(un->un_rrd_num, NBBY));
1863 			for (i = 0; i < NMIRROR; i++) {
1864 				if ((SUBMIRROR_IS_READABLE(un, i)) ||
1865 				    SMS_BY_INDEX_IS(un, i,
1866 				    SMS_OFFLINE_RESYNC))
1867 					un->un_sm[i].sm_flags |=
1868 					    MD_SM_RESYNC_TARGET;
1869 			}
1870 		}
1871 		mutex_enter(&un->un_owner_mx);
1872 		un->un_owner_state &= ~MD_MN_MM_PREVENT_CHANGE;
1873 		mutex_exit(&un->un_owner_mx);
1874 		ps = un->un_rs_prev_overlap;
1875 		if ((ps != NULL) && (ps->ps_flags & MD_MPS_ON_OVERLAP)) {
1876 			mirror_overlap_tree_remove(ps);
1877 			ps->ps_firstblk = 0;
1878 			ps->ps_lastblk = 0;
1879 		}
1880 		md_ioctl_writerexit(lock);
1881 		un = md_ioctl_readerlock(lock, ui);
1882 	}
1883 
1884 	mutex_enter(&un->un_owner_mx);
1885 	if (!(un->un_owner_state & MM_MN_BECOME_OWNER)) {
1886 		/*
1887 		 * If we are not trying to become owner ourselves check
1888 		 * to see if we have to change the owner
1889 		 */
1890 		if (un->un_mirror_owner == p->d.owner) {
1891 			/*
1892 			 * No need to change owner,
1893 			 * Clear/set PREVENT_CHANGE bit
1894 			 */
1895 			if (p->d.flags & MD_MN_MM_PREVENT_CHANGE) {
1896 				un->un_owner_state |= MM_MN_PREVENT_CHANGE;
1897 			} else if (p->d.flags & MD_MN_MM_ALLOW_CHANGE) {
1898 				un->un_owner_state &= ~MM_MN_PREVENT_CHANGE;
1899 			}
1900 			mutex_exit(&un->un_owner_mx);
1901 			return (0);
1902 		}
1903 	}
1904 
1905 	/*
1906 	 * Disallow ownership change if previously requested to. This can only
1907 	 * be reset by issuing a request with MD_MN_MM_ALLOW_CHANGE set in the
1908 	 * flags field.
1909 	 */
1910 	if ((un->un_owner_state & MM_MN_PREVENT_CHANGE) &&
1911 	    !(p->d.flags & MD_MN_MM_ALLOW_CHANGE)) {
1912 		mutex_exit(&un->un_owner_mx);
1913 #ifdef DEBUG
1914 		cmn_err(CE_WARN, "mirror_ioctl: Node %x attempted to become "
1915 		    "owner while node %x has exclusive access to %s",
1916 		    p->d.owner, un->un_mirror_owner, md_shortname(MD_SID(un)));
1917 #endif
1918 		return (EAGAIN);
1919 	}
1920 	if (p->d.owner == md_mn_mynode_id) {
1921 		/*
1922 		 * I'm becoming the mirror owner. Flag this so that the
1923 		 * message sender can change the in-core owner when all
1924 		 * nodes have processed this message
1925 		 */
1926 		un->un_owner_state &= ~MM_MN_OWNER_SENT;
1927 		un->un_owner_state |= MM_MN_BECOME_OWNER;
1928 		un->un_owner_state |= (p->d.flags & MD_MN_MM_PREVENT_CHANGE) ?
1929 		    MM_MN_PREVENT_CHANGE : 0;
1930 		un->un_owner_state &= (p->d.flags & MD_MN_MM_ALLOW_CHANGE) ?
1931 		    ~MM_MN_PREVENT_CHANGE : ~0;
1932 
1933 		mutex_exit(&un->un_owner_mx);
1934 	} else if ((un->un_mirror_owner == md_mn_mynode_id) ||
1935 	    un->un_owner_state & MM_MN_BECOME_OWNER) {
1936 		mutex_exit(&un->un_owner_mx);
1937 
1938 		/*
1939 		 * I'm releasing ownership. Block and drain i/o. This also
1940 		 * blocks until any in-progress resync record update completes.
1941 		 */
1942 		md_ioctl_readerexit(lock);
1943 		un = md_ioctl_writerlock(lock, ui);
1944 		/* Block the resync thread */
1945 		mutex_enter(&un->un_rs_thread_mx);
1946 		un->un_rs_thread_flags |= MD_RI_BLOCK_OWNER;
1947 		mutex_exit(&un->un_rs_thread_mx);
1948 		mutex_enter(&un->un_owner_mx);
1949 		un->un_mirror_owner = p->d.owner;
1950 
1951 		/* Sets node owner of un_rr_dirty record */
1952 		if (un->un_rr_dirty_recid)
1953 			(void) mddb_setowner(un->un_rr_dirty_recid, p->d.owner);
1954 		un->un_owner_state &= ~MM_MN_BECOME_OWNER;
1955 		un->un_owner_state |= (p->d.flags & MD_MN_MM_PREVENT_CHANGE) ?
1956 		    MM_MN_PREVENT_CHANGE : 0;
1957 		un->un_owner_state &= (p->d.flags & MD_MN_MM_ALLOW_CHANGE) ?
1958 		    ~MM_MN_PREVENT_CHANGE : ~0;
1959 		mutex_exit(&un->un_owner_mx);
1960 		/*
1961 		 * Allow further i/o to occur. Any write() from another node
1962 		 * will now cause another ownership change to occur.
1963 		 */
1964 		md_ioctl_writerexit(lock);
1965 	} else {
1966 		/* Update the in-core mirror owner */
1967 		un->un_mirror_owner = p->d.owner;
1968 		/* Sets node owner of un_rr_dirty record */
1969 		if (un->un_rr_dirty_recid)
1970 			(void) mddb_setowner(un->un_rr_dirty_recid, p->d.owner);
1971 		un->un_owner_state |= (p->d.flags & MD_MN_MM_PREVENT_CHANGE) ?
1972 		    MM_MN_PREVENT_CHANGE : 0;
1973 		un->un_owner_state &= (p->d.flags & MD_MN_MM_ALLOW_CHANGE) ?
1974 		    ~MM_MN_PREVENT_CHANGE : ~0;
1975 		mutex_exit(&un->un_owner_mx);
1976 	}
1977 	return (0);
1978 }
1979 /*
1980  * mirror_allocate_hotspare:
1981  * ------------------------
1982  * Called to allocate a hotspare for a failed component. This function is
1983  * called by the MD_MN_ALLOCATE_HOTSPARE ioctl.
1984  */
1985 static int
mirror_allocate_hotspare(md_alloc_hotsp_params_t * p,IOLOCK * lockp)1986 mirror_allocate_hotspare(md_alloc_hotsp_params_t *p, IOLOCK *lockp)
1987 {
1988 	set_t		setno;
1989 	mm_unit_t	*un;
1990 
1991 #ifdef DEBUG
1992 	if (mirror_debug_flag)
1993 		printf("mirror_allocate_hotspare: mnum,sm,comp = %x, %x, %x\n",
1994 		    p->mnum, p->sm, p->comp);
1995 #endif
1996 
1997 	if ((un = mirror_getun(p->mnum, &p->mde, WR_LOCK, lockp)) == NULL)
1998 		return (EINVAL);
1999 
2000 	/* This function is only valid for a multi-node set */
2001 	setno = MD_MIN2SET(p->mnum);
2002 	if (!MD_MNSET_SETNO(setno)) {
2003 		return (EINVAL);
2004 	}
2005 	(void) check_comp_4_hotspares(un, p->sm, p->comp, MD_HOTSPARE_NO_XMIT,
2006 	    p->hs_id, lockp);
2007 	md_ioctl_writerexit(lockp);
2008 	return (0);
2009 }
2010 
2011 /*
2012  * mirror_get_owner_status:
2013  * -----------------------
2014  * Return the status of a previously issued ioctl to change ownership. This is
2015  * required for soft-partition support as the request to change mirror owner
2016  * needs to be run from a separate daemon thread.
2017  *
2018  * Returns:
2019  *	0	Success (contents of un_mirror_owner_status placed in 'flags')
2020  *	EINVAL	Invalid unit
2021  */
2022 static int
mirror_get_owner_status(md_mn_own_status_t * p,IOLOCK * lock)2023 mirror_get_owner_status(md_mn_own_status_t *p, IOLOCK *lock)
2024 {
2025 	mm_unit_t	*un;
2026 	set_t		setno;
2027 
2028 	if ((un = mirror_getun(p->mnum, &p->mde, RD_LOCK, lock)) == NULL)
2029 		return (EINVAL);
2030 
2031 	setno = MD_MIN2SET(p->mnum);
2032 	if (!MD_MNSET_SETNO(setno)) {
2033 		return (EINVAL);
2034 	}
2035 
2036 	p->flags = un->un_mirror_owner_status;
2037 	return (0);
2038 }
2039 
2040 /*
2041  * mirror_set_state:
2042  * ---------------
2043  * Called to set the state of the component of a submirror to the specified
2044  * value. This function is called by the MD_MN_SET_STATE ioctl.
2045  */
2046 static int
mirror_set_state(md_set_state_params_t * p,IOLOCK * lockp)2047 mirror_set_state(md_set_state_params_t *p, IOLOCK *lockp)
2048 {
2049 	mm_unit_t		*un;
2050 	mm_submirror_t		*sm;
2051 	mm_submirror_ic_t	*smic;
2052 	md_m_shared_t		*shared;
2053 	set_t			setno;
2054 
2055 #ifdef DEBUG
2056 	if (mirror_debug_flag)
2057 		printf("mirror_set_state: mnum,sm,comp,state, hs_id = %x, "
2058 		    "%x, %x, %x %x\n", p->mnum, p->sm, p->comp,
2059 		    p->state, p->hs_id);
2060 #endif
2061 	if ((un = mirror_getun(p->mnum, &p->mde, WR_LOCK, lockp)) == NULL)
2062 		return (EINVAL);
2063 
2064 	/* This function is only valid for a multi-node set */
2065 	setno = MD_MIN2SET(p->mnum);
2066 	if (!MD_MNSET_SETNO(setno)) {
2067 		return (EINVAL);
2068 	}
2069 	sm = &un->un_sm[p->sm];
2070 	smic = &un->un_smic[p->sm];
2071 
2072 	/* Set state in component and update ms_flags */
2073 	shared = (md_m_shared_t *)
2074 	    (*(smic->sm_shared_by_indx))(sm->sm_dev, sm, p->comp);
2075 	/*
2076 	 * If a CS_ERRED state is being sent, verify that the sender
2077 	 * has the same view of the component that this node currently has.
2078 	 *
2079 	 * There is a case where the sender was sending a CS_ERRED when a
2080 	 * component was in error, but before the sender returns from
2081 	 * ksend_message the component has been hotspared and resync'd.
2082 	 *
2083 	 * In this case, the hs_id will be different from the shared ms_hs_id,
2084 	 * so the component has already been hotspared.  Just return in this
2085 	 * case.
2086 	 */
2087 	if (p->state == CS_ERRED) {
2088 		if (shared->ms_hs_id != p->hs_id) {
2089 #ifdef DEBUG
2090 			if (mirror_debug_flag) {
2091 				printf("mirror_set_state: short circuit "
2092 				    "hs_id=0x%x, ms_hs_id=0x%x\n",
2093 				    p->hs_id, shared->ms_hs_id);
2094 			}
2095 #endif
2096 			/* release the block on writes to the mirror */
2097 			mirror_resume_writes(un);
2098 			md_ioctl_writerexit(lockp);
2099 			return (0);
2100 		}
2101 	}
2102 
2103 	/*
2104 	 * If the device is newly errored then make sure that it is
2105 	 * closed. Closing the device allows for the RCM framework
2106 	 * to unconfigure the device if required.
2107 	 */
2108 	if (!(shared->ms_state & CS_ERRED) && (p->state & CS_ERRED) &&
2109 	    (shared->ms_flags & MDM_S_ISOPEN)) {
2110 		void		(*get_dev)();
2111 		ms_cd_info_t	cd;
2112 
2113 		get_dev = (void (*)())md_get_named_service(sm->sm_dev, 0,
2114 		    "get device", 0);
2115 		(void) (*get_dev)(sm->sm_dev, sm, p->comp, &cd);
2116 
2117 		md_layered_close(cd.cd_dev, MD_OFLG_NULL);
2118 		shared->ms_flags &= ~MDM_S_ISOPEN;
2119 	}
2120 
2121 	shared->ms_state = p->state;
2122 	uniqtime32(&shared->ms_timestamp);
2123 
2124 	if (p->state == CS_ERRED) {
2125 		shared->ms_flags |= MDM_S_NOWRITE;
2126 	} else
2127 		shared->ms_flags &= ~MDM_S_NOWRITE;
2128 
2129 	shared->ms_flags &= ~MDM_S_IOERR;
2130 	un->un_changecnt++;
2131 	shared->ms_lasterrcnt = un->un_changecnt;
2132 
2133 	/* Update state in submirror */
2134 	mirror_set_sm_state(sm, smic, SMS_RUNNING, 0);
2135 	/*
2136 	 * Commit the state change to the metadb, only the master will write
2137 	 * to disk
2138 	 */
2139 	mirror_commit(un, SMI2BIT(p->sm), 0);
2140 
2141 	/* release the block on writes to the mirror */
2142 	mirror_resume_writes(un);
2143 
2144 	/* generate NOTIFY events for error state changes */
2145 	if (p->state == CS_ERRED) {
2146 		SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ERRED, SVM_TAG_METADEVICE,
2147 		    MD_UN2SET(un), MD_SID(un));
2148 	} else if (p->state == CS_LAST_ERRED) {
2149 		SE_NOTIFY(EC_SVM_STATE, ESC_SVM_LASTERRED, SVM_TAG_METADEVICE,
2150 		    MD_UN2SET(un), MD_SID(un));
2151 	}
2152 	md_ioctl_writerexit(lockp);
2153 	return (0);
2154 }
2155 
2156 /*
2157  * mirror_suspend_writes:
2158  * ---------------------
2159  * Called to suspend writes to a mirror region. The flag un_suspend_wr_flag is
2160  * tested in mirror_write_strategy, and if set all writes are blocked.
2161  * This function is called by the MD_MN_SUSPEND_WRITES ioctl.
2162  */
2163 static int
mirror_suspend_writes(md_suspend_wr_params_t * p)2164 mirror_suspend_writes(md_suspend_wr_params_t *p)
2165 {
2166 	set_t		setno;
2167 	mm_unit_t	*un;
2168 
2169 #ifdef DEBUG
2170 	if (mirror_debug_flag)
2171 		printf("mirror_suspend_writes: mnum = %x\n", p->mnum);
2172 #endif
2173 	if ((un = mirror_getun(p->mnum, &p->mde, NO_LOCK, NULL)) == NULL)
2174 		return (EINVAL); /* No unit */
2175 
2176 	/* This function is only valid for a multi-node set */
2177 	setno = MD_MIN2SET(p->mnum);
2178 	if (!MD_MNSET_SETNO(setno)) {
2179 		return (EINVAL);
2180 	}
2181 
2182 	/*
2183 	 * Mark the resync as blocked. This will stop any currently running
2184 	 * thread and will prevent a new resync from attempting to perform
2185 	 * i/o
2186 	 */
2187 	mutex_enter(&un->un_rs_thread_mx);
2188 	un->un_rs_thread_flags |= MD_RI_BLOCK;
2189 	mutex_exit(&un->un_rs_thread_mx);
2190 
2191 	mutex_enter(&un->un_suspend_wr_mx);
2192 	un->un_suspend_wr_flag = 1;
2193 	mutex_exit(&un->un_suspend_wr_mx);
2194 
2195 	return (0);
2196 }
2197 
2198 /*
2199  * mirror_set_capability:
2200  * ------------------------
2201  * Called to set or clear a capability for a mirror
2202  * called by the MD_MN_SET_CAP ioctl.
2203  */
2204 static int
mirror_set_capability(md_mn_setcap_params_t * p,IOLOCK * lockp)2205 mirror_set_capability(md_mn_setcap_params_t *p, IOLOCK *lockp)
2206 {
2207 	set_t		setno;
2208 	mm_unit_t	*un;
2209 	mdi_unit_t	*ui;
2210 
2211 #ifdef DEBUG
2212 	if (mirror_debug_flag)
2213 		printf("mirror_set_capability: mnum = %x\n", p->mnum);
2214 #endif
2215 	if ((un = mirror_getun(p->mnum, &p->mde, RD_LOCK, lockp)) == NULL)
2216 		return (EINVAL);
2217 
2218 	/* This function is only valid for a multi-node set */
2219 	setno = MD_MIN2SET(p->mnum);
2220 	if (!MD_MNSET_SETNO(setno)) {
2221 		return (EINVAL);
2222 	}
2223 	ui = MDI_UNIT(p->mnum);
2224 
2225 	if (p->sc_set & DKV_ABR_CAP) {
2226 		ui->ui_tstate |= MD_ABR_CAP; /* Set ABR capability */
2227 		/* Clear DRL and set owner to 0 if no resync active */
2228 		mirror_process_unit_resync(un);
2229 		if (!(un->c.un_status & MD_UN_RESYNC_ACTIVE)) {
2230 			mutex_enter(&un->un_owner_mx);
2231 			un->un_mirror_owner = 0;
2232 			mutex_exit(&un->un_owner_mx);
2233 		}
2234 	} else {
2235 		ui->ui_tstate &= ~MD_ABR_CAP; /* Clear ABR capability */
2236 	}
2237 	if (p->sc_set & DKV_DMR_CAP) {
2238 		ui->ui_tstate |= MD_DMR_CAP; /* Set DMR capability */
2239 	} else {
2240 		ui->ui_tstate &= ~MD_DMR_CAP; /* Clear DMR capability */
2241 	}
2242 	return (0);
2243 }
2244 
2245 /*
2246  * mirror_choose_owner:
2247  * ------------------------
2248  * Called to choose an owner for a mirror resync. Can be called when starting
2249  * resync or by the MD_MN_SET_MM_OWNER ioctl with the MD_MN_MM_CHOOSE_OWNER flag
2250  * set. The ioctl is called with this flag set when we are in the cluster
2251  * reconfig and we wish to set a new owner for a resync whose owner has left
2252  * the cluster. We use a resync owner count to implement a round robin
2253  * allocation of resync owners. We send a message to the master including
2254  * this count and the message handler uses it to select an owner from the
2255  * nodelist and then sends a SET_MM_OWNER message to the chosen node to
2256  * become the owner.
2257  *
2258  * Input:
2259  *	un	- unit reference
2260  *	ownp	- owner information (if non-NULL)
2261  */
2262 int
mirror_choose_owner(mm_unit_t * un,md_mn_req_owner_t * ownp)2263 mirror_choose_owner(mm_unit_t *un, md_mn_req_owner_t *ownp)
2264 {
2265 	set_t		setno;
2266 	md_mn_msg_chooseid_t	*msg;
2267 
2268 	/* This function is only valid for a multi-node set */
2269 	setno = MD_UN2SET(un);
2270 	if (!MD_MNSET_SETNO(setno)) {
2271 		return (EINVAL);
2272 	}
2273 
2274 
2275 #ifdef DEBUG
2276 	if (mirror_debug_flag)
2277 		printf("send choose owner message, mnum = %x,"
2278 		    "rcnt = %d\n", MD_SID(un), md_set[setno].s_rcnt);
2279 #endif
2280 
2281 	/*
2282 	 * setup message with current resync count
2283 	 * and then increment the count. If we're called with a non-NULL
2284 	 * owner then we are reestablishing the owner of the mirror. In this
2285 	 * case we have to flag this to the message handler and set rcnt to
2286 	 * the new owner node.
2287 	 */
2288 	msg = kmem_zalloc(sizeof (md_mn_msg_chooseid_t), KM_SLEEP);
2289 	msg->msg_chooseid_mnum = MD_SID(un);
2290 	if (ownp == NULL) {
2291 		mutex_enter(&md_mx);
2292 		msg->msg_chooseid_rcnt = md_set[setno].s_rcnt;
2293 		md_set[setno].s_rcnt++;
2294 		mutex_exit(&md_mx);
2295 		msg->msg_chooseid_set_node = B_FALSE;
2296 	} else {
2297 		msg->msg_chooseid_rcnt = ownp->owner;
2298 		msg->msg_chooseid_set_node = B_TRUE;
2299 	}
2300 
2301 	/*
2302 	 * Spawn a thread to issue the ksend_message() call so that we can
2303 	 * drop the ioctl lock hierarchy that is blocking further rpc.metad and
2304 	 * commd set ownership checking.
2305 	 */
2306 	if (thread_create(NULL, 0, mirror_choose_owner_thread, (caddr_t)msg,
2307 	    0, &p0, TS_RUN, 60) == NULL) {
2308 		kmem_free(msg, sizeof (md_mn_msg_chooseid_t));
2309 		return (EFAULT);
2310 	} else {
2311 		return (0);
2312 	}
2313 }
2314 
2315 /*
2316  * mirror_get_status:
2317  * ----------------------------------
2318  * Called by nodes which are not the master node of the cluster. Obtains the
2319  * master abr state and the submirror status for each valid submirror of the
2320  * unit so that the status returned by metastat is consistent across the
2321  * cluster.
2322  * We update tstate for the mirror and both the sm_flag and the sm_state for
2323  * each submirror.
2324  *
2325  * Input:
2326  *	un	mirror to obtain status from
2327  *
2328  * Calling Convention:
2329  *	writerlock (either ioctl or unit) must be held
2330  */
2331 void
mirror_get_status(mm_unit_t * un,IOLOCK * lockp)2332 mirror_get_status(mm_unit_t *un, IOLOCK *lockp)
2333 {
2334 	mm_submirror_t		*sm;
2335 	int			smi;
2336 	int			rval;
2337 	md_mn_kresult_t		*kres;
2338 	md_mn_msg_mir_state_t	msg;
2339 	md_mn_msg_mir_state_res_t	*res;
2340 	set_t			setno = MD_UN2SET(un);
2341 	mdi_unit_t		*ui = MDI_UNIT(MD_SID(un));
2342 
2343 
2344 	ASSERT(ui->ui_lock & MD_UL_WRITER);
2345 
2346 	/*
2347 	 * Get all of the information for the mirror.
2348 	 */
2349 	bzero(&msg, sizeof (msg));
2350 	msg.mir_state_mnum = MD_SID(un);
2351 
2352 	/*
2353 	 * Must drop the writerlock over ksend_message since another
2354 	 * thread on this node could be running a higher class message
2355 	 * and be trying grab the readerlock.
2356 	 *
2357 	 * If we are in the context of an ioctl, drop the ioctl lock.
2358 	 * lockp holds the list of locks held.
2359 	 */
2360 	if (lockp) {
2361 		IOLOCK_RETURN_RELEASE(0, lockp);
2362 	} else {
2363 		md_unit_writerexit(ui);
2364 	}
2365 
2366 	kres = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP);
2367 	rval = mdmn_ksend_message(setno, MD_MN_MSG_GET_MIRROR_STATE,
2368 	    MD_MSGF_NO_BCAST | MD_MSGF_NO_LOG, 0, (char *)&msg,
2369 	    sizeof (msg), kres);
2370 
2371 	/* if the node hasn't yet joined, it's Ok. */
2372 	if ((!MDMN_KSEND_MSG_OK(rval, kres)) &&
2373 	    (kres->kmmr_comm_state != MDMNE_NOT_JOINED)) {
2374 		mdmn_ksend_show_error(rval, kres, "GET_MIRROR_STATE");
2375 		cmn_err(CE_WARN, "ksend_message failure: GET_MIRROR_STATE");
2376 	}
2377 
2378 	/* if dropped the lock previously, regain it */
2379 	if (lockp) {
2380 		IOLOCK_RETURN_REACQUIRE(lockp);
2381 	} else {
2382 		/*
2383 		 * Reacquire dropped locks and update acquirecnts
2384 		 * appropriately.
2385 		 */
2386 		(void) md_unit_writerlock(ui);
2387 	}
2388 
2389 	/*
2390 	 * Check to see if we've got a believable amount of returned data.
2391 	 * If not, we simply return as there is no usable information.
2392 	 */
2393 	if (kres->kmmr_res_size < sizeof (*res)) {
2394 		cmn_err(CE_WARN, "GET_MIRROR_STATE: returned %d bytes, expected"
2395 		    " %d\n", kres->kmmr_res_size, (int)sizeof (*res));
2396 		kmem_free(kres, sizeof (md_mn_kresult_t));
2397 		return;
2398 	}
2399 
2400 	/*
2401 	 * Copy the results from the call back into our sm_state/sm_flags
2402 	 */
2403 	res = (md_mn_msg_mir_state_res_t *)kres->kmmr_res_data;
2404 #ifdef DEBUG
2405 	if (mirror_debug_flag)
2406 		printf("mirror_get_status: %s\n", md_shortname(MD_SID(un)));
2407 #endif
2408 	for (smi = 0; smi < NMIRROR; smi++) {
2409 		sm = &un->un_sm[smi];
2410 #ifdef DEBUG
2411 		if (mirror_debug_flag) {
2412 			printf("curr state %4x, new state %4x\n", sm->sm_state,
2413 			    res->sm_state[smi]);
2414 			printf("curr_flags %4x, new flags %4x\n", sm->sm_flags,
2415 			    res->sm_flags[smi]);
2416 		}
2417 #endif
2418 		sm->sm_state = res->sm_state[smi];
2419 		sm->sm_flags = res->sm_flags[smi];
2420 	}
2421 
2422 	/* Set ABR if set on the Master node */
2423 	ui->ui_tstate |= (res->mir_tstate & MD_ABR_CAP);
2424 
2425 	kmem_free(kres, sizeof (md_mn_kresult_t));
2426 }
2427 
2428 /*
2429  * mirror_get_mir_state:
2430  * -------------------
2431  * Obtain the ABR state of a mirror and the state of all submirrors from the
2432  * master node for the unit specified in sm_state->mnum.
2433  * Called by MD_MN_GET_MIRROR_STATE ioctl.
2434  */
2435 static int
mirror_get_mir_state(md_mn_get_mir_state_t * p,IOLOCK * lockp)2436 mirror_get_mir_state(md_mn_get_mir_state_t *p, IOLOCK *lockp)
2437 {
2438 	mm_unit_t	*un;
2439 	set_t		setno;
2440 	md_error_t	mde;
2441 
2442 	mdclrerror(&mde);
2443 
2444 	if ((un = mirror_getun(p->mnum, &mde, WR_LOCK, lockp)) == NULL) {
2445 		return (EINVAL);
2446 	}
2447 	setno = MD_MIN2SET(p->mnum);
2448 	if (!MD_MNSET_SETNO(setno)) {
2449 		return (EINVAL);
2450 	}
2451 
2452 	/*
2453 	 * We've now got a writerlock on the unit structure (so no-one can
2454 	 * modify the incore values) and we'll now send the message to the
2455 	 * master node. Since we're only called as part of a reconfig cycle
2456 	 * we don't need to release the unit locks across the ksend_message as
2457 	 * only the master node will process it, and we never send this to
2458 	 * ourselves if we're the master.
2459 	 */
2460 
2461 	mirror_get_status(un, lockp);
2462 
2463 	return (0);
2464 }
2465 
2466 static int
mirror_admin_ioctl(int cmd,void * data,int mode,IOLOCK * lockp)2467 mirror_admin_ioctl(int cmd, void *data, int mode, IOLOCK *lockp)
2468 {
2469 	size_t	sz = 0;
2470 	void	*d = NULL;
2471 	int	err = 0;
2472 
2473 	/* We can only handle 32-bit clients for internal commands */
2474 	if ((mode & DATAMODEL_MASK) != DATAMODEL_ILP32) {
2475 		return (EINVAL);
2476 	}
2477 	/* dispatch ioctl */
2478 	switch (cmd) {
2479 
2480 	case MD_IOCSET:
2481 	{
2482 		if (! (mode & FWRITE))
2483 			return (EACCES);
2484 
2485 		sz = sizeof (md_set_params_t);
2486 
2487 		d = kmem_alloc(sz, KM_SLEEP);
2488 
2489 		if (ddi_copyin(data, d, sz, mode)) {
2490 			err = EFAULT;
2491 			break;
2492 		}
2493 
2494 		err = mirror_set(d, mode);
2495 		break;
2496 	}
2497 
2498 	case MD_IOCGET:
2499 	{
2500 		if (! (mode & FREAD))
2501 			return (EACCES);
2502 
2503 		sz = sizeof (md_i_get_t);
2504 
2505 		d = kmem_alloc(sz, KM_SLEEP);
2506 
2507 		if (ddi_copyin(data, d, sz, mode)) {
2508 			err = EFAULT;
2509 			break;
2510 		}
2511 
2512 		err = mirror_get(d, mode, lockp);
2513 		break;
2514 	}
2515 
2516 	case MD_IOCRESET:
2517 	{
2518 		if (! (mode & FWRITE))
2519 			return (EACCES);
2520 
2521 		sz = sizeof (md_i_reset_t);
2522 		d = kmem_alloc(sz, KM_SLEEP);
2523 
2524 		if (ddi_copyin(data, d, sz, mode)) {
2525 			err = EFAULT;
2526 			break;
2527 		}
2528 
2529 		err = mirror_reset((md_i_reset_t *)d);
2530 		break;
2531 	}
2532 
2533 	case MD_IOCSETSYNC:
2534 	case MD_MN_SETSYNC:
2535 	{
2536 		if (! (mode & FWRITE))
2537 			return (EACCES);
2538 
2539 		sz = sizeof (md_resync_ioctl_t);
2540 		d = kmem_alloc(sz, KM_SLEEP);
2541 
2542 		if (ddi_copyin(data, d, sz, mode)) {
2543 			err = EFAULT;
2544 			break;
2545 		}
2546 
2547 		err = mirror_ioctl_resync((md_resync_ioctl_t *)d, lockp);
2548 		break;
2549 	}
2550 
2551 	case MD_IOCGETSYNC:
2552 	{
2553 		if (! (mode & FREAD))
2554 			return (EACCES);
2555 
2556 		sz = sizeof (md_resync_ioctl_t);
2557 		d = kmem_alloc(sz, KM_SLEEP);
2558 
2559 		if (ddi_copyin(data, d, sz, mode)) {
2560 			err = EFAULT;
2561 			break;
2562 		}
2563 
2564 		err = mirror_get_resync((md_resync_ioctl_t *)d);
2565 		break;
2566 	}
2567 
2568 	case MD_IOCREPLACE:
2569 	{
2570 		if (! (mode & FWRITE))
2571 			return (EACCES);
2572 
2573 		sz = sizeof (replace_params_t);
2574 		d = kmem_alloc(sz, KM_SLEEP);
2575 
2576 		if (ddi_copyin(data, d, sz, mode)) {
2577 			err = EFAULT;
2578 			break;
2579 		}
2580 
2581 		err = comp_replace((replace_params_t *)d, lockp);
2582 		break;
2583 	}
2584 
2585 	case MD_IOCOFFLINE:
2586 	{
2587 		if (! (mode & FWRITE))
2588 			return (EACCES);
2589 
2590 		sz = sizeof (md_i_off_on_t);
2591 		d = kmem_alloc(sz, KM_SLEEP);
2592 
2593 		if (ddi_copyin(data, d, sz, mode)) {
2594 			err = EFAULT;
2595 			break;
2596 		}
2597 
2598 		err = mirror_offline((md_i_off_on_t *)d, lockp);
2599 		break;
2600 	}
2601 
2602 	case MD_IOCONLINE:
2603 	{
2604 		if (! (mode & FWRITE))
2605 			return (EACCES);
2606 
2607 		sz = sizeof (md_i_off_on_t);
2608 		d = kmem_alloc(sz, KM_SLEEP);
2609 
2610 		if (ddi_copyin(data, d, sz, mode)) {
2611 			err = EFAULT;
2612 			break;
2613 		}
2614 
2615 		err = mirror_online((md_i_off_on_t *)d, lockp);
2616 		break;
2617 	}
2618 
2619 	case MD_IOCDETACH:
2620 	{
2621 		if (! (mode & FWRITE))
2622 			return (EACCES);
2623 
2624 		sz = sizeof (md_detach_params_t);
2625 		d = kmem_alloc(sz, KM_SLEEP);
2626 
2627 		if (ddi_copyin(data, d, sz, mode)) {
2628 			err = EFAULT;
2629 			break;
2630 		}
2631 
2632 		err = mirror_detach((md_detach_params_t *)d, lockp);
2633 		break;
2634 	}
2635 
2636 	case MD_IOCATTACH:
2637 	{
2638 
2639 		if (! (mode & FWRITE))
2640 			return (EACCES);
2641 
2642 		sz = sizeof (md_att_struct_t);
2643 		d = kmem_alloc(sz, KM_SLEEP);
2644 
2645 		if (ddi_copyin(data, d, sz, mode)) {
2646 			err = EFAULT;
2647 			break;
2648 		}
2649 
2650 		err = mirror_attach((md_att_struct_t *)d, lockp);
2651 		break;
2652 	}
2653 
2654 	case MD_IOCGET_DEVS:
2655 	{
2656 		if (! (mode & FREAD))
2657 			return (EACCES);
2658 
2659 		sz = sizeof (md_getdevs_params_t);
2660 
2661 		d = kmem_alloc(sz, KM_SLEEP);
2662 
2663 		if (ddi_copyin(data, d, sz, mode)) {
2664 			err = EFAULT;
2665 			break;
2666 		}
2667 
2668 		err = mirror_getdevs(d, mode, lockp);
2669 		break;
2670 	}
2671 
2672 	case MD_IOCGROW:
2673 	{
2674 		if (! (mode & FWRITE))
2675 			return (EACCES);
2676 
2677 		sz = sizeof (md_grow_params_t);
2678 
2679 		d = kmem_alloc(sz, KM_SLEEP);
2680 
2681 		if (ddi_copyin(data, d, sz, mode)) {
2682 			err = EFAULT;
2683 			break;
2684 		}
2685 
2686 		err = mirror_grow(d, lockp);
2687 		break;
2688 	}
2689 
2690 	case MD_IOCCHANGE:
2691 	{
2692 		if (! (mode & FWRITE))
2693 			return (EACCES);
2694 
2695 		sz = sizeof (md_mirror_params_t);
2696 		d = kmem_alloc(sz, KM_SLEEP);
2697 
2698 		if (ddi_copyin(data, d, sz, mode)) {
2699 			err = EFAULT;
2700 			break;
2701 		}
2702 
2703 		err = mirror_change((md_mirror_params_t *)d, lockp);
2704 		break;
2705 	}
2706 
2707 	case MD_IOCPROBE_DEV:
2708 	{
2709 		md_probedev_impl_t	*p = NULL;
2710 		md_probedev_t		*ph = NULL;
2711 		daemon_queue_t		*hdr = NULL;
2712 		int			i;
2713 		size_t			sz2 = 0;
2714 
2715 		if (! (mode & FREAD))
2716 			return (EACCES);
2717 
2718 
2719 		sz = sizeof (md_probedev_t);
2720 		d = kmem_alloc(sz, KM_SLEEP);
2721 
2722 		/* now copy in the data */
2723 		if (ddi_copyin(data, d, sz, mode)) {
2724 			err = EFAULT;
2725 			goto free_mem;
2726 		}
2727 
2728 		/*
2729 		 * Sanity test the args. Test name should have the keyword
2730 		 * probe.
2731 		 */
2732 
2733 		p = kmem_alloc(sizeof (md_probedev_impl_t), KM_SLEEP);
2734 
2735 		p->probe_sema = NULL;
2736 		p->probe_mx = NULL;
2737 		p->probe.mnum_list = (uint64_t)NULL;
2738 
2739 		ph = (struct md_probedev *)d;
2740 
2741 		p->probe.nmdevs = ph->nmdevs;
2742 		(void) strcpy(p->probe.test_name, ph->test_name);
2743 		bcopy(&ph->md_driver, &(p->probe.md_driver),
2744 		    sizeof (md_driver_t));
2745 
2746 		if ((p->probe.nmdevs < 1) ||
2747 		    (strstr(p->probe.test_name, "probe") == NULL)) {
2748 			err = EINVAL;
2749 			goto free_mem;
2750 		}
2751 
2752 
2753 		sz2 = sizeof (minor_t) * p->probe.nmdevs;
2754 		p->probe.mnum_list = (uint64_t)(uintptr_t)kmem_alloc(sz2,
2755 		    KM_SLEEP);
2756 
2757 		if (ddi_copyin((void *)(uintptr_t)ph->mnum_list,
2758 		    (void *)(uintptr_t)p->probe.mnum_list, sz2, mode)) {
2759 			err = EFAULT;
2760 			goto free_mem;
2761 		}
2762 
2763 		if (err = md_init_probereq(p, &hdr))
2764 			goto free_mem;
2765 
2766 		/*
2767 		 * put the request on the queue and wait.
2768 		 */
2769 
2770 		daemon_request_new(&md_ff_daemonq, md_probe_one, hdr, REQ_NEW);
2771 
2772 		(void) IOLOCK_RETURN(0, lockp);
2773 		/* wait for the events to occur */
2774 		for (i = 0; i < p->probe.nmdevs; i++) {
2775 			sema_p(PROBE_SEMA(p));
2776 		}
2777 		while (md_ioctl_lock_enter() == EINTR)
2778 		;
2779 
2780 		/*
2781 		 * clean up. The hdr list is freed in the probe routines
2782 		 * since the list is NULL by the time we get here.
2783 		 */
2784 free_mem:
2785 		if (p) {
2786 			if (p->probe_sema != NULL) {
2787 				sema_destroy(PROBE_SEMA(p));
2788 				kmem_free(p->probe_sema, sizeof (ksema_t));
2789 			}
2790 			if (p->probe_mx != NULL) {
2791 				mutex_destroy(PROBE_MX(p));
2792 				kmem_free(p->probe_mx, sizeof (kmutex_t));
2793 			}
2794 			if ((uintptr_t)p->probe.mnum_list)
2795 				kmem_free((void *)(uintptr_t)
2796 				    p->probe.mnum_list, sz2);
2797 
2798 			kmem_free(p, sizeof (md_probedev_impl_t));
2799 		}
2800 		break;
2801 	}
2802 
2803 	case MD_MN_SET_MM_OWNER:
2804 	{
2805 		if (! (mode & FWRITE))
2806 			return (EACCES);
2807 
2808 		sz = sizeof (md_set_mmown_params_t);
2809 		d = kmem_alloc(sz, KM_SLEEP);
2810 
2811 		if (ddi_copyin(data, d, sz, mode) != 0) {
2812 			err = EFAULT;
2813 			break;
2814 		}
2815 
2816 		err = mirror_set_owner((md_set_mmown_params_t *)d, lockp);
2817 		break;
2818 	}
2819 
2820 	case MD_MN_GET_MM_OWNER:
2821 	{
2822 		if (! (mode & FREAD))
2823 			return (EACCES);
2824 
2825 		sz = sizeof (md_set_mmown_params_t);
2826 		d = kmem_alloc(sz, KM_SLEEP);
2827 
2828 		if (ddi_copyin(data, d, sz, mode) != 0) {
2829 			err = EFAULT;
2830 			break;
2831 		}
2832 
2833 		err = mirror_get_owner((md_set_mmown_params_t *)d, lockp);
2834 		break;
2835 	}
2836 
2837 	case MD_MN_MM_OWNER_STATUS:
2838 	{
2839 		if (! (mode & FREAD))
2840 			return (EACCES);
2841 
2842 		sz = sizeof (md_mn_own_status_t);
2843 		d = kmem_alloc(sz, KM_SLEEP);
2844 
2845 		if (ddi_copyin(data, d, sz, mode) != 0) {
2846 			err = EFAULT;
2847 			break;
2848 		}
2849 
2850 		err = mirror_get_owner_status((md_mn_own_status_t *)d, lockp);
2851 		break;
2852 	}
2853 
2854 	case MD_MN_SET_STATE:
2855 	{
2856 		if (! (mode & FWRITE))
2857 			return (EACCES);
2858 
2859 		sz = sizeof (md_set_state_params_t);
2860 		d = kmem_alloc(sz, KM_SLEEP);
2861 
2862 		if (ddi_copyin(data, d, sz, mode)) {
2863 			err = EFAULT;
2864 			break;
2865 		}
2866 
2867 		err  = mirror_set_state((md_set_state_params_t *)d, lockp);
2868 		break;
2869 	}
2870 
2871 	case MD_MN_SUSPEND_WRITES:
2872 	{
2873 		if (! (mode & FREAD))
2874 			return (EACCES);
2875 
2876 		sz = sizeof (md_suspend_wr_params_t);
2877 		d = kmem_alloc(sz, KM_SLEEP);
2878 
2879 		if (ddi_copyin(data, d, sz, mode) != 0) {
2880 			err = EFAULT;
2881 			break;
2882 		}
2883 
2884 		err = mirror_suspend_writes((md_suspend_wr_params_t *)d);
2885 		break;
2886 	}
2887 
2888 	case MD_MN_RESYNC:
2889 	{
2890 		sz = sizeof (md_mn_rs_params_t);
2891 		d = kmem_alloc(sz, KM_SLEEP);
2892 
2893 		if (ddi_copyin(data, d, sz, mode) != 0) {
2894 			err = EFAULT;
2895 			break;
2896 		}
2897 
2898 		err = mirror_resync_message((md_mn_rs_params_t *)d, lockp);
2899 		break;
2900 	}
2901 
2902 	case MD_MN_ALLOCATE_HOTSPARE:
2903 	{
2904 		if (! (mode & FWRITE))
2905 			return (EACCES);
2906 
2907 		sz = sizeof (md_alloc_hotsp_params_t);
2908 		d = kmem_alloc(sz, KM_SLEEP);
2909 
2910 		if (ddi_copyin(data, d, sz, mode)) {
2911 			err = EFAULT;
2912 			break;
2913 		}
2914 
2915 		err  = mirror_allocate_hotspare((md_alloc_hotsp_params_t *)d,
2916 		    lockp);
2917 		break;
2918 	}
2919 
2920 	case MD_MN_POKE_HOTSPARES:
2921 	{
2922 		(void) poke_hotspares();
2923 		break;
2924 	}
2925 
2926 	case MD_MN_SET_CAP:
2927 	{
2928 		if (! (mode & FWRITE))
2929 			return (EACCES);
2930 
2931 		sz = sizeof (md_mn_setcap_params_t);
2932 		d = kmem_alloc(sz, KM_SLEEP);
2933 
2934 		if (ddi_copyin(data, d, sz, mode)) {
2935 			err = EFAULT;
2936 			break;
2937 		}
2938 
2939 		err  = mirror_set_capability((md_mn_setcap_params_t *)d,
2940 		    lockp);
2941 		break;
2942 	}
2943 
2944 	case MD_MN_GET_MIRROR_STATE:
2945 	{
2946 		sz = sizeof (md_mn_get_mir_state_t);
2947 		d = kmem_zalloc(sz, KM_SLEEP);
2948 
2949 		if (ddi_copyin(data, d, sz, mode)) {
2950 			err = EFAULT;
2951 			break;
2952 		}
2953 
2954 		err = mirror_get_mir_state((md_mn_get_mir_state_t *)d,
2955 		    lockp);
2956 		break;
2957 	}
2958 
2959 	case MD_MN_RR_DIRTY:
2960 	{
2961 		sz = sizeof (md_mn_rr_dirty_params_t);
2962 		d = kmem_zalloc(sz, KM_SLEEP);
2963 
2964 		if (ddi_copyin(data, d, sz, mode)) {
2965 			err = EFAULT;
2966 			break;
2967 		}
2968 
2969 		err = mirror_set_dirty_rr((md_mn_rr_dirty_params_t *)d);
2970 		break;
2971 	}
2972 
2973 	case MD_MN_RR_CLEAN:
2974 	{
2975 		md_mn_rr_clean_params_t tmp;
2976 
2977 		/* get the first part of the structure to find the size */
2978 		if (ddi_copyin(data, &tmp, sizeof (tmp), mode)) {
2979 			err = EFAULT;
2980 			break;
2981 		}
2982 
2983 		sz = MDMN_RR_CLEAN_PARAMS_SIZE(&tmp);
2984 		d = kmem_zalloc(sz, KM_SLEEP);
2985 
2986 		if (ddi_copyin(data, d, sz, mode)) {
2987 			err = EFAULT;
2988 			break;
2989 		}
2990 
2991 		err = mirror_set_clean_rr((md_mn_rr_clean_params_t *)d);
2992 		break;
2993 	}
2994 
2995 	default:
2996 		return (ENOTTY);
2997 	}
2998 
2999 	/*
3000 	 * copyout and free any args
3001 	 */
3002 	if (sz != 0) {
3003 		if (err == 0) {
3004 			if (ddi_copyout(d, data, sz, mode) != 0) {
3005 				err = EFAULT;
3006 			}
3007 		}
3008 		kmem_free(d, sz);
3009 	}
3010 	return (err);
3011 }
3012 
3013 int
md_mirror_ioctl(dev_t ddi_dev,int cmd,void * data,int mode,IOLOCK * lockp)3014 md_mirror_ioctl(
3015 	dev_t		ddi_dev,
3016 	int		cmd,
3017 	void		*data,
3018 	int		mode,
3019 	IOLOCK		*lockp
3020 )
3021 {
3022 	minor_t		mnum = getminor(ddi_dev);
3023 	mm_unit_t	*un;
3024 	int		err = 0;
3025 
3026 	/* handle admin ioctls */
3027 	if (mnum == MD_ADM_MINOR)
3028 		return (mirror_admin_ioctl(cmd, data, mode, lockp));
3029 
3030 	/* check unit */
3031 	if ((MD_MIN2SET(mnum) >= md_nsets) ||
3032 	    (MD_MIN2UNIT(mnum) >= md_nunits) ||
3033 	    ((un = MD_UNIT(mnum)) == NULL))
3034 		return (ENXIO);
3035 	/* is this a supported ioctl? */
3036 	err = md_check_ioctl_against_unit(cmd, un->c);
3037 	if (err != 0) {
3038 		return (err);
3039 	}
3040 
3041 	/* dispatch ioctl */
3042 	switch (cmd) {
3043 
3044 	case DKIOCINFO:
3045 	{
3046 		struct dk_cinfo	*p;
3047 
3048 		if (! (mode & FREAD))
3049 			return (EACCES);
3050 
3051 		p = kmem_alloc(sizeof (*p), KM_SLEEP);
3052 
3053 		get_info(p, mnum);
3054 		if (ddi_copyout((caddr_t)p, data, sizeof (*p), mode) != 0)
3055 			err = EFAULT;
3056 
3057 		kmem_free(p, sizeof (*p));
3058 		return (err);
3059 	}
3060 
3061 	case DKIOCGMEDIAINFO:
3062 	{
3063 		struct dk_minfo	p;
3064 
3065 		if (! (mode & FREAD))
3066 			return (EACCES);
3067 
3068 		get_minfo(&p, mnum);
3069 		if (ddi_copyout(&p, data, sizeof (struct dk_minfo), mode) != 0)
3070 			err = EFAULT;
3071 
3072 		return (err);
3073 	}
3074 
3075 	case DKIOCGGEOM:
3076 	{
3077 		struct dk_geom	*p;
3078 
3079 		if (! (mode & FREAD))
3080 			return (EACCES);
3081 
3082 		p = kmem_alloc(sizeof (*p), KM_SLEEP);
3083 
3084 		if ((err = mirror_get_geom(un, p)) == 0) {
3085 			if (ddi_copyout((caddr_t)p, data, sizeof (*p),
3086 			    mode) != 0)
3087 				err = EFAULT;
3088 		}
3089 
3090 		kmem_free(p, sizeof (*p));
3091 		return (err);
3092 	}
3093 
3094 	case DKIOCGVTOC:
3095 	{
3096 		struct vtoc	*vtoc;
3097 
3098 		if (! (mode & FREAD))
3099 			return (EACCES);
3100 
3101 		vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP);
3102 
3103 		if ((err = mirror_get_vtoc(un, vtoc)) != 0) {
3104 			kmem_free(vtoc, sizeof (*vtoc));
3105 			return (err);
3106 		}
3107 
3108 		if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
3109 			if (ddi_copyout(vtoc, data, sizeof (*vtoc), mode))
3110 				err = EFAULT;
3111 		}
3112 #ifdef _SYSCALL32
3113 		else {
3114 			struct vtoc32	*vtoc32;
3115 
3116 			vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP);
3117 
3118 			vtoctovtoc32((*vtoc), (*vtoc32));
3119 			if (ddi_copyout(vtoc32, data, sizeof (*vtoc32), mode))
3120 				err = EFAULT;
3121 			kmem_free(vtoc32, sizeof (*vtoc32));
3122 		}
3123 #endif /* _SYSCALL32 */
3124 
3125 		kmem_free(vtoc, sizeof (*vtoc));
3126 		return (err);
3127 	}
3128 
3129 	case DKIOCSVTOC:
3130 	{
3131 		struct vtoc	*vtoc;
3132 
3133 		if (! (mode & FWRITE))
3134 			return (EACCES);
3135 
3136 		vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP);
3137 
3138 		if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
3139 			if (ddi_copyin(data, vtoc, sizeof (*vtoc), mode)) {
3140 				err = EFAULT;
3141 			}
3142 		}
3143 #ifdef _SYSCALL32
3144 		else {
3145 			struct vtoc32	*vtoc32;
3146 
3147 			vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP);
3148 
3149 			if (ddi_copyin(data, vtoc32, sizeof (*vtoc32), mode)) {
3150 				err = EFAULT;
3151 			} else {
3152 				vtoc32tovtoc((*vtoc32), (*vtoc));
3153 			}
3154 			kmem_free(vtoc32, sizeof (*vtoc32));
3155 		}
3156 #endif /* _SYSCALL32 */
3157 
3158 		if (err == 0)
3159 			err = mirror_set_vtoc(un, vtoc);
3160 
3161 		kmem_free(vtoc, sizeof (*vtoc));
3162 		return (err);
3163 	}
3164 
3165 	case DKIOCGEXTVTOC:
3166 	{
3167 		struct extvtoc	*extvtoc;
3168 
3169 		if (! (mode & FREAD))
3170 			return (EACCES);
3171 
3172 		extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP);
3173 
3174 		if ((err = mirror_get_extvtoc(un, extvtoc)) != 0) {
3175 			kmem_free(extvtoc, sizeof (*extvtoc));
3176 			return (err);
3177 		}
3178 
3179 		if (ddi_copyout(extvtoc, data, sizeof (*extvtoc), mode))
3180 			err = EFAULT;
3181 
3182 		kmem_free(extvtoc, sizeof (*extvtoc));
3183 		return (err);
3184 	}
3185 
3186 	case DKIOCSEXTVTOC:
3187 	{
3188 		struct extvtoc	*extvtoc;
3189 
3190 		if (! (mode & FWRITE))
3191 			return (EACCES);
3192 
3193 		extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP);
3194 
3195 		if (ddi_copyin(data, extvtoc, sizeof (*extvtoc), mode)) {
3196 			err = EFAULT;
3197 		}
3198 
3199 		if (err == 0)
3200 			err = mirror_set_extvtoc(un, extvtoc);
3201 
3202 		kmem_free(extvtoc, sizeof (*extvtoc));
3203 		return (err);
3204 	}
3205 
3206 	case DKIOCGAPART:
3207 	{
3208 		struct dk_map	dmp;
3209 
3210 		if ((err = mirror_get_cgapart(un, &dmp)) != 0) {
3211 			return (err);
3212 		}
3213 
3214 		if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
3215 			if (ddi_copyout((caddr_t)&dmp, data, sizeof (dmp),
3216 			    mode) != 0)
3217 				err = EFAULT;
3218 		}
3219 #ifdef _SYSCALL32
3220 		else {
3221 			struct dk_map32 dmp32;
3222 
3223 			dmp32.dkl_cylno = dmp.dkl_cylno;
3224 			dmp32.dkl_nblk = dmp.dkl_nblk;
3225 
3226 			if (ddi_copyout((caddr_t)&dmp32, data, sizeof (dmp32),
3227 			    mode) != 0)
3228 				err = EFAULT;
3229 		}
3230 #endif /* _SYSCALL32 */
3231 
3232 		return (err);
3233 	}
3234 	case DKIOCGETEFI:
3235 	{
3236 		/*
3237 		 * This one can be done centralized,
3238 		 * no need to put in the same code for all types of metadevices
3239 		 */
3240 		return (md_dkiocgetefi(mnum, data, mode));
3241 	}
3242 	case DKIOCSETEFI:
3243 	{
3244 		/*
3245 		 * This one can be done centralized,
3246 		 * no need to put in the same code for all types of metadevices
3247 		 */
3248 		return (md_dkiocsetefi(mnum, data, mode));
3249 	}
3250 	case DKIOCPARTITION:
3251 	{
3252 		return (md_dkiocpartition(mnum, data, mode));
3253 	}
3254 
3255 	case DKIOCGETVOLCAP:
3256 	{
3257 		volcap_t	vc;
3258 		mdi_unit_t	*ui;
3259 
3260 		/* Only valid for MN sets */
3261 		if (!MD_MNSET_SETNO(MD_MIN2SET(mnum)))
3262 			return (EINVAL);
3263 
3264 		ui = MDI_UNIT(mnum);
3265 		if (! (mode & FREAD))
3266 			return (EACCES);
3267 
3268 		vc.vc_info = DKV_ABR_CAP | DKV_DMR_CAP;
3269 		vc.vc_set = 0;
3270 		if (ui->ui_tstate & MD_ABR_CAP) {
3271 			vc.vc_set |= DKV_ABR_CAP;
3272 		}
3273 		if (ddi_copyout(&vc, data, sizeof (volcap_t), mode))
3274 			err = EFAULT;
3275 		return (err);
3276 	}
3277 
3278 	case DKIOCSETVOLCAP:
3279 	{
3280 		volcap_t	vc;
3281 		volcapset_t	volcap = 0;
3282 		mdi_unit_t	*ui;
3283 
3284 		/* Only valid for MN sets */
3285 		if (!MD_MNSET_SETNO(MD_MIN2SET(mnum)))
3286 			return (EINVAL);
3287 
3288 		ui = MDI_UNIT(mnum);
3289 		if (! (mode & FWRITE))
3290 			return (EACCES);
3291 
3292 		if (ddi_copyin(data, &vc, sizeof (volcap_t), mode))
3293 			return (EFAULT);
3294 
3295 		/* Not valid if a submirror is offline */
3296 		if (un->c.un_status & MD_UN_OFFLINE_SM) {
3297 			return (EINVAL);
3298 		}
3299 		if (ui->ui_tstate & MD_ABR_CAP)
3300 			volcap |= DKV_ABR_CAP;
3301 		/* Only send capability message if there is a change */
3302 		if ((vc.vc_set & (DKV_ABR_CAP)) != volcap)
3303 			err = mdmn_send_capability_message(mnum, vc, lockp);
3304 		return (err);
3305 	}
3306 
3307 	case DKIOCDMR:
3308 	{
3309 		vol_directed_rd_t	*vdr;
3310 
3311 #ifdef _MULTI_DATAMODEL
3312 		vol_directed_rd32_t	*vdr32;
3313 #endif	/* _MULTI_DATAMODEL */
3314 
3315 		/* Only valid for MN sets */
3316 		if (!MD_MNSET_SETNO(MD_MIN2SET(mnum)))
3317 			return (EINVAL);
3318 
3319 		vdr = kmem_zalloc(sizeof (vol_directed_rd_t), KM_NOSLEEP);
3320 		if (vdr == NULL)
3321 			return (ENOMEM);
3322 
3323 #ifdef _MULTI_DATAMODEL
3324 		vdr32 = kmem_zalloc(sizeof (vol_directed_rd32_t), KM_NOSLEEP);
3325 		if (vdr32 == NULL) {
3326 			kmem_free(vdr, sizeof (vol_directed_rd_t));
3327 			return (ENOMEM);
3328 		}
3329 
3330 		switch (ddi_model_convert_from(mode & FMODELS)) {
3331 		case DDI_MODEL_ILP32:
3332 			/*
3333 			 * If we're called from a higher-level driver we don't
3334 			 * need to manipulate the data. Its already been done by
3335 			 * the caller.
3336 			 */
3337 			if (!(mode & FKIOCTL)) {
3338 				if (ddi_copyin(data, vdr32, sizeof (*vdr32),
3339 				    mode)) {
3340 					kmem_free(vdr, sizeof (*vdr));
3341 					return (EFAULT);
3342 				}
3343 				vdr->vdr_flags = vdr32->vdr_flags;
3344 				vdr->vdr_offset = vdr32->vdr_offset;
3345 				vdr->vdr_nbytes = vdr32->vdr_nbytes;
3346 				vdr->vdr_data =
3347 				    (void *)(uintptr_t)vdr32->vdr_data;
3348 				vdr->vdr_side = vdr32->vdr_side;
3349 				break;
3350 			}
3351 			/* FALLTHROUGH */
3352 
3353 		case DDI_MODEL_NONE:
3354 			if (ddi_copyin(data, vdr, sizeof (*vdr), mode)) {
3355 				kmem_free(vdr32, sizeof (*vdr32));
3356 				kmem_free(vdr, sizeof (*vdr));
3357 				return (EFAULT);
3358 			}
3359 			break;
3360 
3361 		default:
3362 			kmem_free(vdr32, sizeof (*vdr32));
3363 			kmem_free(vdr, sizeof (*vdr));
3364 			return (EFAULT);
3365 		}
3366 #else	/* ! _MULTI_DATAMODEL */
3367 		if (ddi_copyin(data, vdr, sizeof (*vdr), mode)) {
3368 			kmem_free(vdr, sizeof (*vdr));
3369 			return (EFAULT);
3370 		}
3371 #endif	/* _MULTI_DATAMODEL */
3372 
3373 		err = mirror_directed_read(ddi_dev, vdr, mode);
3374 
3375 		if (err == 0) {
3376 #ifdef _MULTI_DATAMODEL
3377 			switch (ddi_model_convert_from(mode & FMODELS)) {
3378 			case DDI_MODEL_ILP32:
3379 				if (!(mode & FKIOCTL)) {
3380 					vdr32->vdr_flags = vdr->vdr_flags;
3381 					vdr32->vdr_offset = vdr->vdr_offset;
3382 					vdr32->vdr_side = vdr->vdr_side;
3383 					vdr32->vdr_bytesread =
3384 					    vdr->vdr_bytesread;
3385 					bcopy(vdr->vdr_side_name,
3386 					    vdr32->vdr_side_name,
3387 					    sizeof (vdr32->vdr_side_name));
3388 
3389 					if (ddi_copyout(vdr32, data,
3390 					    sizeof (*vdr32), mode)) {
3391 						err = EFAULT;
3392 					}
3393 					break;
3394 				}
3395 				/* FALLTHROUGH */
3396 
3397 			case DDI_MODEL_NONE:
3398 				if (ddi_copyout(vdr, data, sizeof (*vdr), mode))
3399 					err = EFAULT;
3400 				break;
3401 			}
3402 #else	/* ! _MULTI_DATAMODEL */
3403 			if (ddi_copyout(vdr, data, sizeof (*vdr), mode))
3404 				err = EFAULT;
3405 #endif	/* _MULTI_DATAMODEL */
3406 			if (vdr->vdr_flags &  DKV_DMR_ERROR)
3407 				err = EIO;
3408 		}
3409 
3410 #ifdef _MULTI_DATAMODEL
3411 		kmem_free(vdr32, sizeof (*vdr32));
3412 #endif	/* _MULTI_DATAMODEL */
3413 
3414 		kmem_free(vdr, sizeof (*vdr));
3415 
3416 		return (err);
3417 	}
3418 
3419 	default:
3420 		return (ENOTTY);
3421 	}
3422 }
3423 
3424 /*
3425  * rename named service entry points and support functions
3426  */
3427 
3428 /*
3429  * rename/exchange role swap functions
3430  *
3431  * most of these are handled by generic role swap functions
3432  */
3433 
3434 /*
3435  * MDRNM_UPDATE_KIDS
3436  * rename/exchange of our child or grandchild
3437  */
3438 void
mirror_renexch_update_kids(md_rendelta_t * delta,md_rentxn_t * rtxnp)3439 mirror_renexch_update_kids(md_rendelta_t *delta, md_rentxn_t *rtxnp)
3440 {
3441 	mm_submirror_t		*sm;
3442 	int			smi;
3443 
3444 	ASSERT(rtxnp);
3445 	ASSERT((MDRNOP_RENAME == rtxnp->op) || (rtxnp->op == MDRNOP_EXCHANGE));
3446 	ASSERT(rtxnp->recids);
3447 	ASSERT(delta);
3448 	ASSERT(delta->unp);
3449 	ASSERT(delta->old_role == MDRR_PARENT);
3450 	ASSERT(delta->new_role == MDRR_PARENT);
3451 
3452 	/*
3453 	 * since our role isn't changing (parent->parent)
3454 	 * one of our children must be changing
3455 	 * find the child being modified, and update
3456 	 * our notion of it
3457 	 */
3458 	for (smi = 0; smi < NMIRROR; smi++) {
3459 		mm_unit_t *un = (mm_unit_t *)delta->unp;
3460 
3461 		if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) {
3462 			continue;
3463 		}
3464 		sm = &un->un_sm[smi];
3465 
3466 		if (md_getminor(sm->sm_dev) == rtxnp->from.mnum) {
3467 			sm->sm_dev = md_makedevice(md_major, rtxnp->to.mnum);
3468 			sm->sm_key = rtxnp->to.key;
3469 			break;
3470 		}
3471 	}
3472 
3473 	md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp);
3474 }
3475 
3476 /*
3477  * exchange down (self->child)
3478  */
3479 void
mirror_exchange_self_update_from_down(md_rendelta_t * delta,md_rentxn_t * rtxnp)3480 mirror_exchange_self_update_from_down(
3481 	md_rendelta_t	*delta,
3482 	md_rentxn_t	*rtxnp
3483 )
3484 {
3485 	int			smi;
3486 	mm_submirror_t		*found;
3487 	minor_t			from_min, to_min;
3488 	sv_dev_t		sv;
3489 
3490 	ASSERT(rtxnp);
3491 	ASSERT(MDRNOP_EXCHANGE == rtxnp->op);
3492 	ASSERT(rtxnp->recids);
3493 	ASSERT(rtxnp->rec_idx >= 0);
3494 	ASSERT(delta);
3495 	ASSERT(delta->unp);
3496 	ASSERT(delta->uip);
3497 	ASSERT(delta->old_role == MDRR_SELF);
3498 	ASSERT(delta->new_role == MDRR_CHILD);
3499 	ASSERT(md_getminor(delta->dev) == rtxnp->from.mnum);
3500 
3501 	from_min = rtxnp->from.mnum;
3502 	to_min = rtxnp->to.mnum;
3503 
3504 	/*
3505 	 * self id changes in our own unit struct
3506 	 */
3507 
3508 	MD_SID(delta->unp) = to_min;
3509 
3510 	/*
3511 	 * parent identifier need not change
3512 	 */
3513 
3514 	/*
3515 	 * point the set array pointers at the "new" unit and unit in-cores
3516 	 * Note: the other half of this transfer is done in the "update_to"
3517 	 * exchange named service.
3518 	 */
3519 
3520 	MDI_VOIDUNIT(to_min) = delta->uip;
3521 	MD_VOIDUNIT(to_min) = delta->unp;
3522 
3523 	/*
3524 	 * transfer kstats
3525 	 */
3526 
3527 	delta->uip->ui_kstat = rtxnp->to.kstatp;
3528 
3529 	/*
3530 	 * the unit in-core reference to the get next link's id changes
3531 	 */
3532 
3533 	delta->uip->ui_link.ln_id = to_min;
3534 
3535 	/*
3536 	 * find the child whose identity we're assuming
3537 	 */
3538 
3539 	for (found = NULL, smi = 0; !found && smi < NMIRROR; smi++) {
3540 		mm_submirror_t		*sm;
3541 		mm_unit_t		*un = (mm_unit_t *)delta->unp;
3542 
3543 		if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) {
3544 			continue;
3545 		}
3546 		sm = &un->un_sm[smi];
3547 
3548 		if (md_getminor(sm->sm_dev) == to_min) {
3549 			found = sm;
3550 		}
3551 	}
3552 	ASSERT(found);
3553 
3554 	/*
3555 	 * Update the sub-mirror's identity
3556 	 */
3557 	found->sm_dev = md_makedevice(md_major, rtxnp->from.mnum);
3558 	sv.key = found->sm_key;
3559 
3560 	ASSERT(rtxnp->from.key != MD_KEYWILD);
3561 	ASSERT(rtxnp->from.key != MD_KEYBAD);
3562 
3563 	found->sm_key = rtxnp->from.key;
3564 
3565 	/*
3566 	 * delete the key for the old sub-mirror from the name space
3567 	 */
3568 
3569 	sv.setno = MD_MIN2SET(from_min);
3570 	md_rem_names(&sv, 1);
3571 
3572 	/*
3573 	 * and store the record id (from the unit struct) into recids
3574 	 */
3575 
3576 	md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp);
3577 }
3578 
3579 /*
3580  * exchange down (parent->self)
3581  */
3582 void
mirror_exchange_parent_update_to(md_rendelta_t * delta,md_rentxn_t * rtxnp)3583 mirror_exchange_parent_update_to(
3584 		md_rendelta_t	*delta,
3585 		md_rentxn_t	*rtxnp
3586 )
3587 {
3588 	int			smi;
3589 	mm_submirror_t		*found;
3590 	minor_t			from_min, to_min;
3591 	sv_dev_t		sv;
3592 
3593 	ASSERT(rtxnp);
3594 	ASSERT(MDRNOP_EXCHANGE == rtxnp->op);
3595 	ASSERT(rtxnp->recids);
3596 	ASSERT(rtxnp->rec_idx >= 0);
3597 	ASSERT(delta);
3598 	ASSERT(delta->unp);
3599 	ASSERT(delta->uip);
3600 	ASSERT(delta->old_role == MDRR_PARENT);
3601 	ASSERT(delta->new_role == MDRR_SELF);
3602 	ASSERT(md_getminor(delta->dev) == rtxnp->to.mnum);
3603 
3604 	from_min = rtxnp->from.mnum;
3605 	to_min = rtxnp->to.mnum;
3606 
3607 	/*
3608 	 * self id changes in our own unit struct
3609 	 */
3610 
3611 	MD_SID(delta->unp) = from_min;
3612 
3613 	/*
3614 	 * parent identifier need not change
3615 	 */
3616 
3617 	/*
3618 	 * point the set array pointers at the "new" unit and unit in-cores
3619 	 * Note: the other half of this transfer is done in the "update_to"
3620 	 * exchange named service.
3621 	 */
3622 
3623 	MDI_VOIDUNIT(from_min) = delta->uip;
3624 	MD_VOIDUNIT(from_min) = delta->unp;
3625 
3626 	/*
3627 	 * transfer kstats
3628 	 */
3629 
3630 	delta->uip->ui_kstat = rtxnp->from.kstatp;
3631 
3632 	/*
3633 	 * the unit in-core reference to the get next link's id changes
3634 	 */
3635 
3636 	delta->uip->ui_link.ln_id = from_min;
3637 
3638 	/*
3639 	 * find the child whose identity we're assuming
3640 	 */
3641 
3642 	for (found = NULL, smi = 0; !found && smi < NMIRROR; smi++) {
3643 		mm_submirror_t		*sm;
3644 		mm_unit_t		*un = (mm_unit_t *)delta->unp;
3645 
3646 		if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) {
3647 			continue;
3648 		}
3649 		sm = &un->un_sm[smi];
3650 
3651 		if (md_getminor(sm->sm_dev) == from_min) {
3652 			found = sm;
3653 		}
3654 	}
3655 	ASSERT(found);
3656 
3657 	/*
3658 	 * Update the sub-mirror's identity
3659 	 */
3660 	found->sm_dev = md_makedevice(md_major, rtxnp->to.mnum);
3661 	sv.key = found->sm_key;
3662 
3663 	ASSERT(rtxnp->to.key != MD_KEYWILD);
3664 	ASSERT(rtxnp->to.key != MD_KEYBAD);
3665 
3666 	found->sm_key = rtxnp->to.key;
3667 
3668 	/*
3669 	 * delete the key for the old sub-mirror from the name space
3670 	 */
3671 
3672 	sv.setno = MD_MIN2SET(to_min);
3673 	md_rem_names(&sv, 1);
3674 
3675 	/*
3676 	 * and store the record id (from the unit struct) into recids
3677 	 */
3678 
3679 	md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp);
3680 }
3681 
3682 /*
3683  * MDRNM_LIST_URKIDS: named svc entry point
3684  * all all delta entries appropriate for our children onto the
3685  * deltalist pointd to by dlpp
3686  */
3687 int
mirror_rename_listkids(md_rendelta_t ** dlpp,md_rentxn_t * rtxnp)3688 mirror_rename_listkids(md_rendelta_t **dlpp, md_rentxn_t *rtxnp)
3689 {
3690 	minor_t			from_min, to_min;
3691 	mm_unit_t		*from_un;
3692 	md_rendelta_t		*new, *p;
3693 	int			smi;
3694 	int			n_children;
3695 	mm_submirror_t		*sm;
3696 
3697 	ASSERT(rtxnp);
3698 	ASSERT(dlpp);
3699 	ASSERT((rtxnp->op == MDRNOP_EXCHANGE) || (rtxnp->op == MDRNOP_RENAME));
3700 
3701 	from_min = rtxnp->from.mnum;
3702 	to_min = rtxnp->to.mnum;
3703 	n_children = 0;
3704 
3705 	if (!MDI_UNIT(from_min) || !(from_un = MD_UNIT(from_min))) {
3706 		(void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP, from_min);
3707 		return (-1);
3708 	}
3709 
3710 	for (p = *dlpp; p && p->next != NULL; p = p->next) {
3711 		/* NULL */
3712 	}
3713 
3714 	for (smi = 0; smi < NMIRROR; smi++) {
3715 		minor_t	child_min;
3716 
3717 		if (!SMS_BY_INDEX_IS(from_un, smi, SMS_INUSE)) {
3718 			continue;
3719 		}
3720 
3721 		sm = &from_un->un_sm[smi];
3722 		child_min = md_getminor(sm->sm_dev);
3723 
3724 		p = new = md_build_rendelta(MDRR_CHILD,
3725 		    to_min == child_min? MDRR_SELF: MDRR_CHILD,
3726 		    sm->sm_dev, p,
3727 		    MD_UNIT(child_min), MDI_UNIT(child_min),
3728 		    &rtxnp->mde);
3729 
3730 		if (!new) {
3731 			if (mdisok(&rtxnp->mde)) {
3732 				(void) mdsyserror(&rtxnp->mde, ENOMEM);
3733 			}
3734 			return (-1);
3735 		}
3736 		++n_children;
3737 	}
3738 
3739 	return (n_children);
3740 }
3741 
3742 /*
3743  * support routine for MDRNM_CHECK
3744  */
3745 static int
mirror_may_renexch_self(mm_unit_t * un,mdi_unit_t * ui,md_rentxn_t * rtxnp)3746 mirror_may_renexch_self(
3747 	mm_unit_t	*un,
3748 	mdi_unit_t	*ui,
3749 	md_rentxn_t	*rtxnp)
3750 {
3751 	minor_t			 from_min;
3752 	minor_t			 to_min;
3753 	bool_t			 toplevel;
3754 	bool_t			 related;
3755 	int			 smi;
3756 	mm_submirror_t		*sm;
3757 
3758 	from_min = rtxnp->from.mnum;
3759 	to_min = rtxnp->to.mnum;
3760 
3761 	if (!un || !ui) {
3762 		(void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR,
3763 		    from_min);
3764 		return (EINVAL);
3765 	}
3766 
3767 	ASSERT(MD_CAPAB(un) & MD_CAN_META_CHILD);
3768 	if (!(MD_CAPAB(un) & MD_CAN_META_CHILD)) {
3769 		(void) mdmderror(&rtxnp->mde, MDE_RENAME_SOURCE_BAD, from_min);
3770 		return (EINVAL);
3771 	}
3772 
3773 	if (MD_PARENT(un) == MD_MULTI_PARENT) {
3774 		(void) mdmderror(&rtxnp->mde, MDE_RENAME_SOURCE_BAD, from_min);
3775 		return (EINVAL);
3776 	}
3777 
3778 	toplevel = !MD_HAS_PARENT(MD_PARENT(un));
3779 
3780 	/* we're related if trying to swap with our parent */
3781 	related = (!toplevel) && (MD_PARENT(un) == to_min);
3782 
3783 	switch (rtxnp->op) {
3784 	case MDRNOP_EXCHANGE:
3785 		/*
3786 		 * check for a swap with our child
3787 		 */
3788 		for (smi = 0; smi < NMIRROR; smi++) {
3789 
3790 			if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) {
3791 				continue;
3792 			}
3793 
3794 			sm = &un->un_sm[smi];
3795 			if (md_getminor(sm->sm_dev) == to_min) {
3796 				related |= TRUE;
3797 			}
3798 		}
3799 		if (!related) {
3800 			(void) mdmderror(&rtxnp->mde,
3801 			    MDE_RENAME_TARGET_UNRELATED, to_min);
3802 			return (EINVAL);
3803 		}
3804 
3805 		break;
3806 
3807 	case MDRNOP_RENAME:
3808 		/*
3809 		 * if from is top-level and is open, then the kernel is using
3810 		 * the md_dev64_t.
3811 		 */
3812 
3813 		if (toplevel && md_unit_isopen(ui)) {
3814 			(void) mdmderror(&rtxnp->mde, MDE_RENAME_BUSY,
3815 			    from_min);
3816 			return (EBUSY);
3817 		}
3818 		break;
3819 
3820 	default:
3821 		(void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR,
3822 		    from_min);
3823 		return (EINVAL);
3824 	}
3825 
3826 	return (0);	/* ok */
3827 }
3828 
3829 /*
3830  * Named service entry point: MDRNM_CHECK
3831  */
3832 intptr_t
mirror_rename_check(md_rendelta_t * delta,md_rentxn_t * rtxnp)3833 mirror_rename_check(
3834 	md_rendelta_t	*delta,
3835 	md_rentxn_t	*rtxnp)
3836 {
3837 	mm_submirror_t		*sm;
3838 	mm_submirror_ic_t	*smic;
3839 	md_m_shared_t		*shared;
3840 	int			ci;
3841 	int			i;
3842 	int			compcnt;
3843 	mm_unit_t		*un;
3844 	int			err = 0;
3845 
3846 	ASSERT(delta);
3847 	ASSERT(rtxnp);
3848 	ASSERT(delta->unp);
3849 	ASSERT(delta->uip);
3850 	ASSERT((rtxnp->op == MDRNOP_RENAME) || (rtxnp->op == MDRNOP_EXCHANGE));
3851 
3852 	if (!delta || !rtxnp || !delta->unp || !delta->uip) {
3853 		(void) mdsyserror(&rtxnp->mde, EINVAL);
3854 		return (EINVAL);
3855 	}
3856 
3857 	un = (mm_unit_t *)delta->unp;
3858 
3859 	for (i = 0; i < NMIRROR; i++) {
3860 		sm = &un->un_sm[i];
3861 		smic = &un->un_smic[i];
3862 
3863 		if (!SMS_IS(sm, SMS_INUSE))
3864 			continue;
3865 
3866 		ASSERT(smic->sm_get_component_count);
3867 		if (!smic->sm_get_component_count) {
3868 			(void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR,
3869 			    md_getminor(delta->dev));
3870 			return (ENXIO);
3871 		}
3872 
3873 		compcnt = (*(smic->sm_get_component_count))(sm->sm_dev, un);
3874 
3875 		for (ci = 0; ci < compcnt; ci++) {
3876 
3877 			ASSERT(smic->sm_shared_by_indx);
3878 			if (!smic->sm_shared_by_indx) {
3879 				(void) mdmderror(&rtxnp->mde,
3880 				    MDE_RENAME_CONFIG_ERROR,
3881 				    md_getminor(delta->dev));
3882 				return (ENXIO);
3883 			}
3884 
3885 			shared = (md_m_shared_t *)(*(smic->sm_shared_by_indx))
3886 			    (sm->sm_dev, sm, ci);
3887 
3888 			ASSERT(shared);
3889 			if (!shared) {
3890 				(void) mdmderror(&rtxnp->mde,
3891 				    MDE_RENAME_CONFIG_ERROR,
3892 				    md_getminor(delta->dev));
3893 				return (ENXIO);
3894 			}
3895 
3896 			if (shared->ms_hs_id != 0) {
3897 				(void) mdmderror(&rtxnp->mde,
3898 				    MDE_SM_FAILED_COMPS,
3899 				    md_getminor(delta->dev));
3900 				return (EIO);
3901 			}
3902 
3903 			switch (shared->ms_state) {
3904 			case CS_OKAY:
3905 				break;
3906 
3907 			case CS_RESYNC:
3908 				(void) mdmderror(&rtxnp->mde,
3909 				    MDE_RESYNC_ACTIVE,
3910 				    md_getminor(delta->dev));
3911 				return (EBUSY);
3912 
3913 			default:
3914 				(void) mdmderror(&rtxnp->mde,
3915 				    MDE_SM_FAILED_COMPS,
3916 				    md_getminor(delta->dev));
3917 				return (EINVAL);
3918 			}
3919 
3920 		}
3921 	}
3922 
3923 	/* self does additional checks */
3924 	if (delta->old_role == MDRR_SELF) {
3925 		err = mirror_may_renexch_self(un, delta->uip, rtxnp);
3926 	}
3927 
3928 	return (err);
3929 }
3930 
3931 /* end of rename/exchange */
3932