xref: /onnv-gate/usr/src/uts/common/io/lvm/raid/raid_ioctl.c (revision 9017:47960a78ed2a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * NAME:	raid_ioctl.c
29  *
30  * DESCRIPTION: RAID driver source file containing IOCTL operations.
31  *
32  * ROUTINES PROVIDED FOR EXTERNAL USE:
33  *	  raid_commit() - commits MD database updates for a RAID metadevice
34  *	md_raid_ioctl() - RAID metadevice IOCTL operations entry point.
35  *
36  * ROUTINES PROVIDED FOR INTERNAL USE:
37  *	 raid_getun() - Performs unit checking on a RAID metadevice
38  *    init_col_nextio() - normal backend when zeroing column of RAID metadevice.
39  *	 init_col_int() - I/O interrupt while zeroing column of RAID metadevice.
40  *  raid_init_columns() - Zero one or more columns of a RAID metadevice.
41  *	     raid_set() - used to create a RAID metadevice
42  *	     raid_get() - used to get the unit structure of a RAID metadevice
43  *	 raid_replace() - used to replace a component of a RAID metadevice
44  *	    raid_grow() - Concatenate to a RAID metadevice
45  *	  raid_change() - change dynamic values of a RAID metadevice
46  *	   raid_reset() - used to reset (clear / remove) a RAID metadevice
47  *	raid_get_geom() - used to get the geometry of a RAID metadevice
48  *	raid_get_vtoc() - used to get the VTOC on a RAID metadevice
49  *	raid_set_vtoc() - used to set the VTOC on a RAID metadevice
50  *	raid_get_extvtoc() - used to get the extended VTOC on a RAID metadevice
51  *	raid_set_extvtoc() - used to set the extended VTOC on a RAID metadevice
52  *	 raid_getdevs() - return all devices within a RAID metadevice
53  *   raid_admin_ioctl() - IOCTL operations unique to metadevices and RAID
54  */
55 
56 
57 #include <sys/param.h>
58 #include <sys/systm.h>
59 #include <sys/conf.h>
60 #include <sys/file.h>
61 #include <sys/user.h>
62 #include <sys/uio.h>
63 #include <sys/t_lock.h>
64 #include <sys/buf.h>
65 #include <sys/dkio.h>
66 #include <sys/vtoc.h>
67 #include <sys/kmem.h>
68 #include <vm/page.h>
69 #include <sys/sysmacros.h>
70 #include <sys/types.h>
71 #include <sys/mkdev.h>
72 #include <sys/stat.h>
73 #include <sys/open.h>
74 #include <sys/disp.h>
75 #include <sys/modctl.h>
76 #include <sys/ddi.h>
77 #include <sys/sunddi.h>
78 #include <sys/cred.h>
79 #include <sys/lvm/mdvar.h>
80 #include <sys/lvm/md_names.h>
81 #include <sys/lvm/md_mddb.h>
82 #include <sys/lvm/md_raid.h>
83 #include <sys/lvm/md_convert.h>
84 
85 #include <sys/sysevent/eventdefs.h>
86 #include <sys/sysevent/svm.h>
87 
88 extern int		md_status;
89 extern unit_t		md_nunits;
90 extern set_t		md_nsets;
91 extern md_set_t		md_set[];
92 extern md_ops_t		raid_md_ops;
93 extern major_t		md_major;
94 extern md_krwlock_t	md_unit_array_rw;
95 extern mdq_anchor_t	md_done_daemon;
96 extern mdq_anchor_t	md_ff_daemonq;
97 extern	int		mdopen();
98 extern	int		mdclose();
99 extern	void		md_probe_one();
100 extern int		md_init_probereq(md_probedev_impl_t *,
101 				daemon_queue_t **);
102 extern md_resync_t	md_cpr_resync;
103 
104 
105 extern void dump_mr_unit(mr_unit_t *);
106 
107 typedef struct raid_ci {
108 	DAEMON_QUEUE
109 	struct raid_ci	*ci_next;
110 	mr_unit_t	*ci_un;
111 	int		ci_col;
112 	int		ci_err;
113 	int		ci_flag;
114 	size_t		ci_zerosize;
115 	diskaddr_t	ci_blkno;
116 	diskaddr_t	ci_lastblk;
117 	buf_t		ci_buf;
118 } raid_ci_t;
119 /* values for the ci_flag */
120 #define	COL_INITING	(0x0001)
121 #define	COL_INIT_DONE	(0x0002)
122 #define	COL_READY	(0x0004)
123 
124 /*
125  * NAME:	raid_getun
126  * DESCRIPTION: performs a lot of unit checking on a RAID metadevice
127  * PARAMETERS:	minor_t	      mnum - minor device number for RAID unit
128  *		md_error_t    *mde - pointer to error reporting structure
129  *		int	     flags - pointer to error reporting structure
130  *					STALE_OK - allow stale MD memory
131  *					  NO_OLD - unit must not exist
132  *					 NO_LOCK - no IOCTL lock needed
133  *					 WR_LOCK - write IOCTL lock needed
134  *					 RD_LOCK - read IOCTL lock needed
135  *		IOLOCK	     *lock - pointer to IOCTL lock
136  *
137  * LOCKS:	obtains unit reader or writer lock via IOLOCK
138  *
139  */
140 static mr_unit_t *
raid_getun(minor_t mnum,md_error_t * mde,int flags,IOLOCK * lock)141 raid_getun(minor_t mnum, md_error_t *mde, int flags, IOLOCK *lock)
142 {
143 	mr_unit_t	*un;
144 	mdi_unit_t	*ui;
145 	set_t		setno = MD_MIN2SET(mnum);
146 
147 	if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits)) {
148 		(void) mdmderror(mde, MDE_INVAL_UNIT, mnum);
149 		return (NULL);
150 	}
151 
152 	if (!(flags & STALE_OK)) {
153 		if (md_get_setstatus(setno) & MD_SET_STALE) {
154 			(void) mdmddberror(mde, MDE_DB_STALE, mnum, setno);
155 			return (NULL);
156 		}
157 	}
158 
159 	ui = MDI_UNIT(mnum);
160 	if (flags & NO_OLD) {
161 		if (ui != NULL) {
162 			(void) mdmderror(mde, MDE_UNIT_ALREADY_SETUP, mnum);
163 			return (NULL);
164 		}
165 		return ((mr_unit_t *)1);
166 	}
167 
168 	if (ui == NULL) {
169 		(void) mdmderror(mde, MDE_UNIT_NOT_SETUP, mnum);
170 		return (NULL);
171 	}
172 	if (flags & ARRAY_WRITER)
173 		md_array_writer(lock);
174 	else if (flags & ARRAY_READER)
175 		md_array_reader(lock);
176 
177 	if (!(flags & NO_LOCK)) {
178 		if (flags & WR_LOCK) {
179 			(void) md_ioctl_io_lock(lock, ui);
180 			(void) md_ioctl_writerlock(lock, ui);
181 		} else /* RD_LOCK */
182 			(void) md_ioctl_readerlock(lock, ui);
183 	}
184 	un = (mr_unit_t *)MD_UNIT(mnum);
185 
186 	if (un->c.un_type != MD_METARAID) {
187 		(void) mdmderror(mde, MDE_NOT_RAID, mnum);
188 		return (NULL);
189 	}
190 
191 	return (un);
192 }
193 
194 
195 /*
196  * NAME:	raid_commit
197  * DESCRIPTION: commits MD database updates for a RAID metadevice
198  * PARAMETERS:	mr_unit_t	 *un - RAID unit to update in the MD database
199  *		mddb_recid_t *extras - array of other record IDs to update
200  *
201  * LOCKS:	assumes caller holds unit writer lock
202  *
203  */
204 void
raid_commit(mr_unit_t * un,mddb_recid_t * extras)205 raid_commit(mr_unit_t *un, mddb_recid_t	*extras)
206 {
207 	mddb_recid_t	*recids;
208 	int 		ri = 0;
209 	int		nrecids = 0;
210 
211 	if (md_get_setstatus(MD_UN2SET(un)) & MD_SET_STALE)
212 		return;
213 
214 	/* Count the extra recids */
215 	if (extras != NULL) {
216 		while (extras[nrecids] != 0) {
217 			nrecids++;
218 		}
219 	}
220 
221 	/*
222 	 * Allocate space for two recids in addition to the extras:
223 	 * one for the unit structure, one for the null terminator.
224 	 */
225 	nrecids += 2;
226 	recids = (mddb_recid_t *)
227 	    kmem_zalloc(nrecids * sizeof (mddb_recid_t), KM_SLEEP);
228 
229 	if (un != NULL) {
230 		ASSERT(MDI_UNIT(MD_SID(un)) ? UNIT_WRITER_HELD(un) : 1);
231 		recids[ri++] = un->c.un_record_id;
232 	}
233 
234 	if (extras != NULL) {
235 		while (*extras != 0) {
236 			recids[ri++] = *extras;
237 			extras++;
238 		}
239 	}
240 
241 	if (ri > 0) {
242 		mddb_commitrecs_wrapper(recids);
243 	}
244 
245 	kmem_free(recids, nrecids * sizeof (mddb_recid_t));
246 }
247 
248 static int
raid_check_pw(mr_unit_t * un)249 raid_check_pw(mr_unit_t *un)
250 {
251 	buf_t		bp;
252 	char		*buf;
253 	mr_column_t	*colptr;
254 	minor_t		mnum = MD_SID(un);
255 	int		i;
256 	int		err = 0;
257 	minor_t		unit;
258 
259 	buf = kmem_zalloc((uint_t)DEV_BSIZE, KM_SLEEP);
260 
261 	for (i = 0; i < un->un_totalcolumncnt; i++) {
262 		md_dev64_t tmpdev;
263 
264 		colptr = &un->un_column[i];
265 
266 		tmpdev = colptr->un_dev;
267 		/*
268 		 * Open by device id
269 		 * If this device is hotspared
270 		 * use the hotspare key
271 		 */
272 		tmpdev = md_resolve_bydevid(mnum, tmpdev, HOTSPARED(un, i) ?
273 		    colptr->un_hs_key : colptr->un_orig_key);
274 		if (md_layered_open(mnum, &tmpdev, MD_OFLG_NULL)) {
275 			colptr->un_dev = tmpdev;
276 			return (1);
277 		}
278 		colptr->un_dev = tmpdev;
279 
280 		bzero((caddr_t)&bp, sizeof (buf_t));
281 		bp.b_back = &bp;
282 		bp.b_forw = &bp;
283 		bp.b_flags = B_READ | B_BUSY;
284 		sema_init(&bp.b_io, 0, NULL,
285 		    SEMA_DEFAULT, NULL);
286 		sema_init(&bp.b_sem, 0, NULL,
287 		    SEMA_DEFAULT, NULL);
288 		bp.b_edev = md_dev64_to_dev(colptr->un_dev);
289 		bp.b_lblkno = colptr->un_pwstart;
290 		bp.b_bcount = DEV_BSIZE;
291 		bp.b_bufsize = DEV_BSIZE;
292 		bp.b_un.b_addr = (caddr_t)buf;
293 		bp.b_offset = -1;
294 		(void) md_call_strategy(&bp, 0, NULL);
295 		if (biowait(&bp))
296 			err = 1;
297 		if (i == 0) {
298 			if (un->c.un_revision & MD_64BIT_META_DEV) {
299 				unit = ((raid_pwhdr_t *)buf)->rpw_unit;
300 			} else {
301 				unit = ((raid_pwhdr32_od_t *)buf)->rpw_unit;
302 			}
303 		}
304 		/*
305 		 * depending upon being an 64bit or 32 bit raid, the
306 		 * pre write headers have different layout
307 		 */
308 		if (un->c.un_revision & MD_64BIT_META_DEV) {
309 			if ((((raid_pwhdr_t *)buf)->rpw_column != i) ||
310 			    (((raid_pwhdr_t *)buf)->rpw_unit != unit))
311 				err = 1;
312 		} else {
313 			if ((((raid_pwhdr32_od_t *)buf)->rpw_column != i) ||
314 			    (((raid_pwhdr32_od_t *)buf)->rpw_unit != unit))
315 				err = 1;
316 		}
317 		md_layered_close(colptr->un_dev, MD_OFLG_NULL);
318 		if (err)
319 			break;
320 	}
321 	kmem_free(buf, DEV_BSIZE);
322 	return (err);
323 }
324 
325 /*
326  * NAME:	init_col_nextio
327  * DESCRIPTION: normal backend process when zeroing column of a RAID metadevice.
328  * PARAMETERS:	raid_ci_t *cur - struct for column being zeroed
329  *
330  * LOCKS:	assumes caller holds unit reader lock,
331  *		preiodically releases and reacquires unit reader lock,
332  *		broadcasts on unit conditional variable (un_cv)
333  *
334  */
335 #define	INIT_RLS_CNT	10
336 static void
init_col_nextio(raid_ci_t * cur)337 init_col_nextio(raid_ci_t *cur)
338 {
339 	mr_unit_t	*un;
340 
341 	un = cur->ci_un;
342 
343 	cur->ci_blkno += cur->ci_zerosize;
344 
345 	mutex_enter(&un->un_mx);
346 	/* ===> update un_percent_done */
347 	un->un_init_iocnt += btodb(cur->ci_buf.b_bcount);
348 	mutex_exit(&un->un_mx);
349 
350 	/*
351 	 * When gorwing a device, normal I/O is still going on.
352 	 * The init thread still holds the unit reader lock which
353 	 * prevents I/O from doing state changes.
354 	 * So every INIT_RLS_CNT init I/Os, we will release the
355 	 * unit reader lock.
356 	 *
357 	 * CAVEAT:
358 	 * We know we are in the middle of a grow operation and the
359 	 * unit cannot be grown or removed (through reset or halt)
360 	 * so the mr_unit_t structure will not move or disappear.
361 	 * In addition, we know that only one of the init I/Os
362 	 * can be in col_init_nextio at a time because they are
363 	 * placed on the md_done_daemon queue and md only processes
364 	 * one element of this queue at a time. In addition, any
365 	 * code that needs to acquire the unit writer lock to change
366 	 * state is supposed to be on the md_mstr_daemon queue so
367 	 * it can be processing while we sit here waiting to get the
368 	 * unit reader lock back.
369 	 */
370 
371 	if (cur->ci_blkno < cur->ci_lastblk) {
372 		/* truncate last chunk to end_addr if needed */
373 		if (cur->ci_blkno + cur->ci_zerosize > cur->ci_lastblk) {
374 			cur->ci_zerosize = (size_t)
375 			    (cur->ci_lastblk - cur->ci_blkno);
376 		}
377 
378 		/* set address and length for I/O bufs */
379 		cur->ci_buf.b_bufsize = dbtob(cur->ci_zerosize);
380 		cur->ci_buf.b_bcount = dbtob(cur->ci_zerosize);
381 		cur->ci_buf.b_lblkno = cur->ci_blkno;
382 
383 		(void) md_call_strategy(&cur->ci_buf, MD_STR_NOTTOP, NULL);
384 		return;
385 	}
386 	/* finished initializing this column */
387 	mutex_enter(&un->un_mx);
388 	cur->ci_flag = COL_INIT_DONE;
389 	uniqtime32(&un->un_column[cur->ci_col].un_devtimestamp);
390 	mutex_exit(&un->un_mx);
391 	cv_broadcast(&un->un_cv);
392 }
393 
394 /*
395  * NAME:	init_col_int
396  * DESCRIPTION: I/O interrupt while zeroing column of a RAID metadevice.
397  * PARAMETERS:	buf_t	  *cb - I/O buffer for which interrupt occurred
398  *
399  * LOCKS:	assumes caller holds unit reader or writer lock
400  *
401  */
402 static int
init_col_int(buf_t * cb)403 init_col_int(buf_t *cb)
404 {
405 	raid_ci_t	*cur;
406 
407 	cur = (raid_ci_t *)cb->b_chain;
408 	if (cb->b_flags & B_ERROR) {
409 		mutex_enter(&cur->ci_un->un_mx);
410 		cur->ci_err = EIO;
411 		mutex_exit(&cur->ci_un->un_mx);
412 		cv_broadcast(&cur->ci_un->un_cv);
413 		return (1);
414 	}
415 	daemon_request(&md_done_daemon, init_col_nextio,
416 	    (daemon_queue_t *)cur, REQ_OLD);
417 	return (1);
418 }
419 
420 /*
421  * NAME:	raid_init_columns
422  * DESCRIPTION: Zero one or more columns of a RAID metadevice.
423  * PARAMETERS:	minor_t	 mnum - RAID unit minor identifier
424  *
425  * LOCKS:	obtains and releases unit reader lock,
426  *		obtains and releases unit writer lock,
427  *		obtains and releases md_unit_array_rw write lock,
428  *		obtains and releases unit mutex (un_mx) lock,
429  *		waits on unit conditional variable (un_cv)
430  *
431  */
432 static void
raid_init_columns(minor_t mnum)433 raid_init_columns(minor_t mnum)
434 {
435 	mr_unit_t	*un;
436 	mdi_unit_t	*ui;
437 	raid_ci_t	*ci_chain = NULL, *cur;
438 	rus_state_t	state;
439 	caddr_t		zero_addr;
440 	diskaddr_t	end_off;
441 	size_t		zerosize;
442 	int		err = 0;
443 	int		ix;
444 	int		colcnt = 0;
445 	int		col;
446 	set_t		setno = MD_MIN2SET(mnum);
447 
448 	/*
449 	 * Increment the raid resync count for cpr
450 	 */
451 	mutex_enter(&md_cpr_resync.md_resync_mutex);
452 	md_cpr_resync.md_raid_resync++;
453 	mutex_exit(&md_cpr_resync.md_resync_mutex);
454 
455 	/*
456 	 * initialization is a multiple step process.  The first step
457 	 * is to go through the unit structure and start each device
458 	 * in the init state writing zeros over the component.
459 	 * Next initialize the prewrite areas, so the device can be
460 	 * used if a metainit -k is done.  Now close the componenets.
461 	 *
462 	 * Once this complete set the state of each component being
463 	 * zeroed and set the correct state for the unit.
464 	 *
465 	 * last commit the records.
466 	 */
467 
468 	ui = MDI_UNIT(mnum);
469 	un = md_unit_readerlock(ui);
470 
471 	/* check for active init on this column */
472 	/* exiting is cpr safe */
473 	if ((un->un_init_colcnt > 0) && (un->un_resync_index != -1)) {
474 		md_unit_readerexit(ui);
475 		(void) raid_internal_close(mnum, OTYP_LYR, 0, 0);
476 		/*
477 		 * Decrement the raid resync count for cpr
478 		 */
479 		mutex_enter(&md_cpr_resync.md_resync_mutex);
480 		md_cpr_resync.md_raid_resync--;
481 		mutex_exit(&md_cpr_resync.md_resync_mutex);
482 		thread_exit();
483 	}
484 
485 	SE_NOTIFY(EC_SVM_STATE, ESC_SVM_INIT_START, SVM_TAG_METADEVICE, setno,
486 	    MD_SID(un));
487 	un->un_init_colcnt = 0;
488 	un->un_init_iocnt = 0;
489 	end_off = un->un_pwsize + (un->un_segsize * un->un_segsincolumn);
490 	zerosize = (size_t)MIN((diskaddr_t)un->un_maxio, end_off);
491 
492 	/* allocate zero-filled buffer */
493 	zero_addr = kmem_zalloc(dbtob(zerosize), KM_SLEEP);
494 
495 	for (ix = 0; ix < un->un_totalcolumncnt; ix++) {
496 		if (un->un_column[ix].un_devstate != RCS_INIT)
497 			continue;
498 		/* allocate new column init structure */
499 		cur = (raid_ci_t *)kmem_zalloc((sizeof (raid_ci_t)), KM_SLEEP);
500 		ASSERT(cur != NULL);
501 		un->un_init_colcnt++;
502 		cur->ci_next = ci_chain;
503 		ci_chain = cur;
504 		cur->ci_un = un;
505 		cur->ci_col = ix;
506 		cur->ci_err = 0;
507 		cur->ci_flag = COL_INITING;
508 		cur->ci_zerosize = zerosize;
509 		cur->ci_blkno = un->un_column[ix].un_pwstart;
510 		cur->ci_lastblk = cur->ci_blkno + un->un_pwsize
511 		    + (un->un_segsize * un->un_segsincolumn);
512 		/* initialize static buf fields */
513 		cur->ci_buf.b_un.b_addr = zero_addr;
514 		cur->ci_buf.b_chain = (buf_t *)cur;
515 		cur->ci_buf.b_back = &cur->ci_buf;
516 		cur->ci_buf.b_forw = &cur->ci_buf;
517 		cur->ci_buf.b_iodone = init_col_int;
518 		cur->ci_buf.b_flags = B_BUSY | B_WRITE;
519 		cur->ci_buf.b_edev = md_dev64_to_dev(un->un_column[ix].un_dev);
520 		sema_init(&cur->ci_buf.b_io, 0, NULL, SEMA_DEFAULT, NULL);
521 		sema_init(&cur->ci_buf.b_sem, 0, NULL, SEMA_DEFAULT, NULL);
522 		/* set address and length for I/O bufs */
523 		cur->ci_buf.b_bufsize = dbtob(zerosize);
524 		cur->ci_buf.b_bcount = dbtob(zerosize);
525 		cur->ci_buf.b_lblkno = un->un_column[ix].un_pwstart;
526 		cur->ci_buf.b_offset = -1;
527 
528 		if (! (un->un_column[ix].un_devflags & MD_RAID_DEV_ISOPEN)) {
529 			md_dev64_t tmpdev = un->un_column[ix].un_dev;
530 			/*
531 			 * Open by device id
532 			 * If this column is hotspared then
533 			 * use the hotspare key
534 			 */
535 			tmpdev = md_resolve_bydevid(mnum, tmpdev,
536 			    HOTSPARED(un, ix) ?
537 			    un->un_column[ix].un_hs_key :
538 			    un->un_column[ix].un_orig_key);
539 			if ((cur->ci_err = md_layered_open(mnum, &tmpdev,
540 			    MD_OFLG_NULL)) == 0)
541 				un->un_column[ix].un_devflags |=
542 				    MD_RAID_DEV_ISOPEN;
543 			un->un_column[ix].un_dev = tmpdev;
544 		}
545 		if (cur->ci_err == 0)
546 			md_call_strategy(&cur->ci_buf, MD_STR_NOTTOP, NULL);
547 	}
548 
549 	md_unit_readerexit(ui);
550 	state = un->un_state;
551 	colcnt = un->un_init_colcnt;
552 	mutex_enter(&un->un_mx);
553 	while (colcnt) {
554 		cv_wait(&un->un_cv, &un->un_mx);
555 
556 		colcnt = 0;
557 		for (cur = ci_chain; cur != NULL; cur = cur->ci_next) {
558 			col = cur->ci_col;
559 			if ((cur->ci_flag != COL_INITING) || (cur->ci_err)) {
560 				if (cur->ci_err)
561 					err = cur->ci_err;
562 				else if (cur->ci_flag == COL_INIT_DONE) {
563 					(void) init_pw_area(un,
564 					    un->un_column[col].un_dev,
565 					    un->un_column[col].un_pwstart,
566 					    col);
567 					cur->ci_flag = COL_READY;
568 				}
569 			} else {
570 				colcnt++;
571 			}
572 		}
573 	}
574 	mutex_exit(&un->un_mx);
575 
576 	/* This prevents new opens */
577 	rw_enter(&md_unit_array_rw.lock, RW_WRITER);
578 	(void) md_io_writerlock(ui);
579 	un = (mr_unit_t *)md_unit_writerlock(ui);
580 	while (ci_chain) {
581 		cur = ci_chain;
582 
583 		/* take this element out of the chain */
584 		ci_chain = cur->ci_next;
585 		/* free this element */
586 		sema_destroy(&cur->ci_buf.b_io);
587 		sema_destroy(&cur->ci_buf.b_sem);
588 		if (cur->ci_err)
589 			raid_set_state(cur->ci_un, cur->ci_col,
590 			    RCS_INIT_ERRED, 0);
591 		else
592 			raid_set_state(cur->ci_un, cur->ci_col,
593 			    RCS_OKAY, 0);
594 		kmem_free(cur, sizeof (raid_ci_t));
595 	}
596 
597 	/* free the zeroed buffer */
598 	kmem_free(zero_addr, dbtob(zerosize));
599 
600 	/* determine new unit state */
601 	if (err == 0) {
602 		if (state == RUS_INIT)
603 			un->un_state = RUS_OKAY;
604 		else {
605 			un->c.un_total_blocks = un->un_grow_tb;
606 			md_nblocks_set(mnum, un->c.un_total_blocks);
607 			un->un_grow_tb = 0;
608 			if (raid_state_cnt(un, RCS_OKAY) ==
609 			    un->un_totalcolumncnt)
610 				un->un_state = RUS_OKAY;
611 		}
612 	} else {  /* error orcurred */
613 		if (state & RUS_INIT)
614 			un->un_state = RUS_DOI;
615 	}
616 	uniqtime32(&un->un_timestamp);
617 	MD_STATUS(un) &= ~MD_UN_GROW_PENDING;
618 	un->un_init_colcnt = 0;
619 	un->un_init_iocnt = 0;
620 	raid_commit(un, NULL);
621 	md_unit_writerexit(ui);
622 	(void) md_io_writerexit(ui);
623 	rw_exit(&md_unit_array_rw.lock);
624 	if (err) {
625 		if (un->un_state & RUS_DOI) {
626 			SE_NOTIFY(EC_SVM_STATE, ESC_SVM_INIT_FATAL,
627 			    SVM_TAG_METADEVICE, setno, MD_SID(un));
628 		} else {
629 			SE_NOTIFY(EC_SVM_STATE, ESC_SVM_INIT_FAILED,
630 			    SVM_TAG_METADEVICE, setno, MD_SID(un));
631 		}
632 	} else {
633 		SE_NOTIFY(EC_SVM_STATE, ESC_SVM_INIT_SUCCESS,
634 		    SVM_TAG_METADEVICE, setno, MD_SID(un));
635 	}
636 	(void) raid_internal_close(mnum, OTYP_LYR, 0, 0);
637 	/*
638 	 * Decrement the raid resync count for cpr
639 	 */
640 	mutex_enter(&md_cpr_resync.md_resync_mutex);
641 	md_cpr_resync.md_raid_resync--;
642 	mutex_exit(&md_cpr_resync.md_resync_mutex);
643 	thread_exit();
644 	/*NOTREACHED*/
645 }
646 
647 static int
raid_init_unit(minor_t mnum,md_error_t * ep)648 raid_init_unit(minor_t mnum, md_error_t *ep)
649 {
650 	mdi_unit_t	*ui;
651 	mr_unit_t	*un;
652 	int		rval, i;
653 	set_t		setno = MD_MIN2SET(mnum);
654 
655 	ui = MDI_UNIT(mnum);
656 	if (md_get_setstatus(setno) & MD_SET_STALE)
657 		return (mdmddberror(ep, MDE_DB_STALE, mnum, setno));
658 
659 	/* Don't start an init if the device is not available */
660 	if ((ui == NULL) || (ui->ui_tstate & MD_DEV_ERRORED)) {
661 		return (mdmderror(ep, MDE_RAID_OPEN_FAILURE, mnum));
662 	}
663 
664 	if (raid_internal_open(mnum, (FREAD | FWRITE),
665 	    OTYP_LYR, MD_OFLG_ISINIT)) {
666 		rval = mdmderror(ep, MDE_RAID_OPEN_FAILURE, mnum);
667 		goto out;
668 	}
669 
670 	un = md_unit_readerlock(ui);
671 	un->un_percent_done = 0;
672 	md_unit_readerexit(ui);
673 	/* start resync_unit thread */
674 	(void) thread_create(NULL, 0, raid_init_columns,
675 	    (void *)(uintptr_t)mnum, 0, &p0, TS_RUN, minclsyspri);
676 
677 	return (0);
678 
679 out:
680 	un = md_unit_writerlock(ui);
681 	MD_STATUS(un) &= ~MD_UN_GROW_PENDING;
682 	/* recover state */
683 	for (i = 0; i < un->un_totalcolumncnt; i++)
684 		if (COLUMN_STATE(un, i) == RCS_INIT)
685 			raid_set_state(un, i, RCS_ERRED, 0);
686 	if (un->un_state & RUS_INIT)
687 		un->un_state = RUS_DOI;
688 	raid_commit(un, NULL);
689 	md_unit_writerexit(ui);
690 	if (un->un_state & RUS_DOI) {
691 		SE_NOTIFY(EC_SVM_STATE, ESC_SVM_INIT_FATAL,
692 		    SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un));
693 	} else {
694 		SE_NOTIFY(EC_SVM_STATE, ESC_SVM_INIT_FAILED,
695 		    SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un));
696 	}
697 	return (rval);
698 }
699 
700 /*
701  * NAME:	raid_regen
702  *
703  * DESCRIPTION:	regenerate all the parity on the raid device.  This
704  *		routine starts a thread that will regenerate the
705  *		parity on a raid device.  If an I/O error occurs during
706  *		this process the entire device is placed in error.
707  *
708  * PARAMETERS:	md_set_params_t *msp - ioctl packet
709  */
710 static void
regen_unit(minor_t mnum)711 regen_unit(minor_t mnum)
712 {
713 	mdi_unit_t	*ui = MDI_UNIT(mnum);
714 	mr_unit_t	*un = MD_UNIT(mnum);
715 	buf_t		buf, *bp;
716 	caddr_t		buffer;
717 	int		err = 0;
718 	diskaddr_t	total_segments;
719 	diskaddr_t	line;
720 	size_t		iosize;
721 
722 	/*
723 	 * Increment raid resync count for cpr
724 	 */
725 	mutex_enter(&md_cpr_resync.md_resync_mutex);
726 	md_cpr_resync.md_raid_resync++;
727 	mutex_exit(&md_cpr_resync.md_resync_mutex);
728 
729 	iosize = dbtob(un->un_segsize);
730 	buffer = kmem_alloc(iosize, KM_SLEEP);
731 	bp = &buf;
732 	total_segments = un->un_segsincolumn;
733 	SE_NOTIFY(EC_SVM_STATE, ESC_SVM_REGEN_START, SVM_TAG_METADEVICE,
734 	    MD_UN2SET(un), MD_SID(un));
735 	un->un_percent_done = 0;
736 	init_buf(bp, B_READ | B_BUSY, iosize);
737 
738 	for (line = 0; line < total_segments; line++) {
739 		bp->b_lblkno = line *
740 		    ((un->un_origcolumncnt - 1) * un->un_segsize);
741 		bp->b_un.b_addr = buffer;
742 		bp->b_bcount = iosize;
743 		bp->b_iodone = NULL;
744 		/*
745 		 * The following assignment is only correct because
746 		 * md_raid_strategy is fine when it's only a minor number
747 		 * and not a real dev_t. Yuck.
748 		 */
749 		bp->b_edev = mnum;
750 		md_raid_strategy(bp, MD_STR_NOTTOP, NULL);
751 		if (biowait(bp)) {
752 			err = 1;
753 			break;
754 		}
755 		un->un_percent_done = (uint_t)((line * 1000) /
756 		    un->un_segsincolumn);
757 		/* just to avoid rounding errors */
758 		if (un->un_percent_done > 1000)
759 			un->un_percent_done = 1000;
760 		reset_buf(bp, B_READ | B_BUSY, iosize);
761 	}
762 	destroy_buf(bp);
763 	kmem_free(buffer, iosize);
764 
765 	(void) md_io_writerlock(ui);
766 	(void) raid_internal_close(mnum, OTYP_LYR, 0, 0);
767 	(void) md_io_writerexit(ui);
768 	un = md_unit_writerlock(ui);
769 	if (!err &&
770 	    (raid_state_cnt(un, RCS_OKAY) == un->un_totalcolumncnt))
771 			un->un_state = RUS_OKAY;
772 	raid_commit(un, NULL);
773 	md_unit_writerexit(ui);
774 	if (err ||
775 	    raid_state_cnt(un, RCS_OKAY) != un->un_totalcolumncnt) {
776 		SE_NOTIFY(EC_SVM_STATE, ESC_SVM_REGEN_FAILED,
777 		    SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un));
778 	} else {
779 		SE_NOTIFY(EC_SVM_STATE, ESC_SVM_REGEN_DONE, SVM_TAG_METADEVICE,
780 		    MD_UN2SET(un), MD_SID(un));
781 	}
782 
783 	/*
784 	 * Decrement the raid resync count for cpr
785 	 */
786 	mutex_enter(&md_cpr_resync.md_resync_mutex);
787 	md_cpr_resync.md_raid_resync--;
788 	mutex_exit(&md_cpr_resync.md_resync_mutex);
789 	thread_exit();
790 }
791 
792 static int
raid_regen_unit(minor_t mnum,md_error_t * ep)793 raid_regen_unit(minor_t mnum, md_error_t *ep)
794 {
795 	mdi_unit_t	*ui;
796 	mr_unit_t	*un;
797 	int		i;
798 	set_t		setno = MD_MIN2SET(mnum);
799 
800 	ui = MDI_UNIT(mnum);
801 	un = (mr_unit_t *)MD_UNIT(mnum);
802 
803 	if (md_get_setstatus(setno) & MD_SET_STALE)
804 		return (mdmddberror(ep, MDE_DB_STALE, mnum, setno));
805 
806 	/* Don't start a regen if the device is not available */
807 	if ((ui == NULL) || (ui->ui_tstate & MD_DEV_ERRORED)) {
808 		return (mdmderror(ep, MDE_RAID_OPEN_FAILURE, mnum));
809 	}
810 
811 	if (raid_internal_open(mnum, (FREAD | FWRITE), OTYP_LYR, 0)) {
812 		(void) md_unit_writerlock(ui);
813 		for (i = 0; i < un->un_totalcolumncnt; i++)
814 			raid_set_state(un, i, RCS_ERRED, 0);
815 		md_unit_writerexit(ui);
816 		return (mdmderror(ep, MDE_RAID_OPEN_FAILURE, mnum));
817 	}
818 
819 	/* start resync_unit thread */
820 	(void) thread_create(NULL, 0, regen_unit,
821 	    (void *)(uintptr_t)mnum, 0, &p0, TS_RUN, minclsyspri);
822 
823 	return (0);
824 }
825 
826 static int
raid_regen(md_regen_param_t * mrp,IOLOCK * lock)827 raid_regen(md_regen_param_t *mrp, IOLOCK *lock)
828 {
829 	minor_t		mnum = mrp->mnum;
830 	mr_unit_t	*un;
831 
832 	mdclrerror(&mrp->mde);
833 
834 	un = md_unit_readerlock(MDI_UNIT(mnum));
835 
836 	if (MD_STATUS(un) & MD_UN_GROW_PENDING) {
837 		md_unit_readerexit(MDI_UNIT(mnum));
838 		return (mdmderror(&mrp->mde, MDE_IN_USE, mnum));
839 	}
840 
841 	if ((MD_STATUS(un) & MD_UN_RESYNC_ACTIVE) ||
842 	    (raid_state_cnt(un, RCS_RESYNC))) {
843 		md_unit_readerexit(MDI_UNIT(mnum));
844 		return (mdmderror(&mrp->mde, MDE_RESYNC_ACTIVE, mnum));
845 	}
846 
847 	if ((raid_state_cnt(un, RCS_INIT) != 0) || (un->un_state & RUS_INIT)) {
848 		md_unit_readerexit(MDI_UNIT(mnum));
849 		return (mdmderror(&mrp->mde, MDE_IN_USE, mnum));
850 	}
851 
852 	if ((raid_state_cnt(un, RCS_OKAY) != un->un_totalcolumncnt) ||
853 	    (! (un->un_state & RUS_OKAY))) {
854 		md_unit_readerexit(MDI_UNIT(mnum));
855 		return (mdmderror(&mrp->mde, MDE_RAID_NOT_OKAY, mnum));
856 	}
857 
858 	md_unit_readerexit(MDI_UNIT(mnum));
859 
860 	/* get locks and recheck to be sure something did not change */
861 	if ((un = raid_getun(mnum, &mrp->mde, WRITERS, lock)) == NULL)
862 		return (0);
863 
864 	if ((raid_state_cnt(un, RCS_OKAY) != un->un_totalcolumncnt) ||
865 	    (! (un->un_state & RUS_OKAY))) {
866 		return (mdmderror(&mrp->mde, MDE_RAID_NOT_OKAY, mnum));
867 	}
868 
869 	raid_set_state(un, 0, RCS_REGEN, 0);
870 	raid_commit(un, NULL);
871 	md_ioctl_droplocks(lock);
872 	return (raid_regen_unit(mnum, &mrp->mde));
873 }
874 
875 /*
876  * NAME:	raid_set
877  * DESCRIPTION: used to create a RAID metadevice
878  * PARAMETERS:	md_set_params_t *d   - pointer to set data structure
879  *		int		mode - must be FWRITE
880  *
881  * LOCKS:	none
882  *
883  */
884 static int
raid_set(void * d,int mode)885 raid_set(void	*d, int mode)
886 {
887 	minor_t		mnum;
888 	mr_unit_t	*un;
889 	mddb_recid_t	mr_recid;
890 	mddb_recid_t	*recids;
891 	mddb_type_t	typ1;
892 	int		err;
893 	set_t		setno;
894 	int		num_recs;
895 	int		rid;
896 	int		col;
897 	md_set_params_t	*msp = d;
898 
899 
900 	mnum = msp->mnum;
901 	setno = MD_MIN2SET(mnum);
902 
903 	mdclrerror(&msp->mde);
904 
905 	if (raid_getun(mnum, &msp->mde, NO_OLD, NULL) == NULL)
906 		return (0);
907 
908 	typ1 = (mddb_type_t)md_getshared_key(setno,
909 	    raid_md_ops.md_driver.md_drivername);
910 
911 	/* create the db record for this mdstruct */
912 
913 	if (msp->options & MD_CRO_64BIT) {
914 #if defined(_ILP32)
915 		return (mdmderror(&msp->mde, MDE_UNIT_TOO_LARGE, mnum));
916 #else
917 		mr_recid = mddb_createrec(msp->size, typ1, 0,
918 		    MD_CRO_64BIT | MD_CRO_RAID | MD_CRO_FN, setno);
919 #endif
920 	} else {
921 		mr_recid = mddb_createrec(msp->size, typ1, 0,
922 		    MD_CRO_32BIT | MD_CRO_RAID | MD_CRO_FN, setno);
923 	}
924 
925 	if (mr_recid < 0)
926 		return (mddbstatus2error(&msp->mde,
927 		    (int)mr_recid, mnum, setno));
928 
929 	/* get the address of the mdstruct */
930 	un = (mr_unit_t *)mddb_getrecaddr(mr_recid);
931 	/*
932 	 * It is okay that we muck with the mdstruct here,
933 	 * since no one else will know about the mdstruct
934 	 * until we commit it. If we crash, the record will
935 	 * be automatically purged, since we haven't
936 	 * committed it yet.
937 	 */
938 
939 	/* copy in the user's mdstruct */
940 	if (err = ddi_copyin((caddr_t)(uintptr_t)msp->mdp, un,
941 	    msp->size, mode)) {
942 		mddb_deleterec_wrapper(mr_recid);
943 		return (EFAULT);
944 	}
945 	/* All 64 bit metadevices only support EFI labels. */
946 	if (msp->options & MD_CRO_64BIT) {
947 		un->c.un_flag |= MD_EFILABEL;
948 	}
949 
950 	/*
951 	 * allocate the real recids array.  since we may have to commit
952 	 * underlying metadevice records, we need an array of size:
953 	 * total number of components in raid + 3 (1 for the raid itself,
954 	 * one for the hotspare, one for the end marker).
955 	 */
956 	num_recs = un->un_totalcolumncnt + 3;
957 	rid = 0;
958 	recids = kmem_alloc(num_recs * sizeof (mddb_recid_t), KM_SLEEP);
959 	recids[rid++] = mr_recid;
960 
961 	MD_SID(un) = mnum;
962 	MD_RECID(un) = recids[0];
963 	MD_CAPAB(un) = MD_CAN_PARENT | MD_CAN_SP;
964 	MD_PARENT(un) = MD_NO_PARENT;
965 	un->un_resync_copysize = 0;
966 	un->c.un_revision |= MD_FN_META_DEV;
967 
968 	if (UNIT_STATE(un) == RUS_INIT)
969 		MD_STATUS(un) |= MD_UN_GROW_PENDING;
970 
971 	if ((UNIT_STATE(un) != RUS_INIT) && raid_check_pw(un)) {
972 		mddb_deleterec_wrapper(mr_recid);
973 		err = mderror(&msp->mde, MDE_RAID_INVALID);
974 		goto out;
975 	}
976 
977 	if (err = raid_build_incore(un, 0)) {
978 		if (un->mr_ic) {
979 			kmem_free(un->un_column_ic, sizeof (mr_column_ic_t) *
980 			    un->un_totalcolumncnt);
981 			kmem_free(un->mr_ic, sizeof (*un->mr_ic));
982 		}
983 
984 		md_nblocks_set(mnum, -1ULL);
985 		MD_UNIT(mnum) = NULL;
986 
987 		mddb_deleterec_wrapper(mr_recid);
988 		goto out;
989 	}
990 
991 	/*
992 	 * Update unit availability
993 	 */
994 	md_set[setno].s_un_avail--;
995 
996 	recids[rid] = 0;
997 	if (un->un_hsp_id != -1) {
998 		/* increment the reference count of the hot spare pool */
999 		err = md_hot_spare_ifc(HSP_INCREF, un->un_hsp_id, 0, 0,
1000 		    &recids[rid], NULL, NULL, NULL);
1001 		if (err) {
1002 			md_nblocks_set(mnum, -1ULL);
1003 			MD_UNIT(mnum) = NULL;
1004 
1005 			mddb_deleterec_wrapper(mr_recid);
1006 			goto out;
1007 		}
1008 		rid++;
1009 	}
1010 
1011 	/*
1012 	 * set the parent on any metadevice components.
1013 	 * NOTE: currently soft partitions are the only metadevices
1014 	 * which can appear within a RAID metadevice.
1015 	 */
1016 	for (col = 0; col < un->un_totalcolumncnt; col++) {
1017 		mr_column_t	*mr_col = &un->un_column[col];
1018 		md_unit_t	*comp_un;
1019 
1020 		if (md_getmajor(mr_col->un_dev) == md_major) {
1021 			comp_un = MD_UNIT(md_getminor(mr_col->un_dev));
1022 			recids[rid++] = MD_RECID(comp_un);
1023 			md_set_parent(mr_col->un_dev, MD_SID(un));
1024 		}
1025 	}
1026 
1027 	/* set the end marker */
1028 	recids[rid] = 0;
1029 
1030 	mddb_commitrecs_wrapper(recids);
1031 	md_create_unit_incore(mnum, &raid_md_ops, 1);
1032 
1033 	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_CREATE, SVM_TAG_METADEVICE, setno,
1034 	    MD_SID(un));
1035 
1036 out:
1037 	kmem_free(recids, (num_recs * sizeof (mddb_recid_t)));
1038 	if (err)
1039 		return (err);
1040 
1041 	/* only attempt to init a device that is in the init state */
1042 	if (UNIT_STATE(un) != RUS_INIT)
1043 		return (0);
1044 
1045 	return (raid_init_unit(mnum, &msp->mde));
1046 }
1047 
1048 /*
1049  * NAME:	raid_get
1050  * DESCRIPTION: used to get the unit structure of a RAID metadevice
1051  * PARAMETERS:	md_i_get_t   *migp - pointer to get data structure
1052  *		int	      mode - must be FREAD
1053  *		IOLOCK	     *lock - pointer to IOCTL lock
1054  *
1055  * LOCKS:	obtains unit reader lock via IOLOCK
1056  *
1057  */
1058 static int
raid_get(void * migp,int mode,IOLOCK * lock)1059 raid_get(
1060 	void		*migp,
1061 	int		mode,
1062 	IOLOCK		*lock
1063 )
1064 {
1065 	minor_t		mnum;
1066 	mr_unit_t	*un;
1067 	md_i_get_t	*migph = migp;
1068 
1069 
1070 	mnum = migph->id;
1071 
1072 	mdclrerror(&migph->mde);
1073 
1074 	if ((un = raid_getun(mnum, &migph->mde,
1075 	    RD_LOCK, lock)) == NULL)
1076 		return (0);
1077 
1078 	if (migph->size == 0) {
1079 		migph->size = un->c.un_size;
1080 		return (0);
1081 	}
1082 
1083 	if (migph->size < un->c.un_size) {
1084 		return (EFAULT);
1085 	}
1086 	if (ddi_copyout(un, (void *)(uintptr_t)migph->mdp,
1087 	    un->c.un_size, mode))
1088 		return (EFAULT);
1089 
1090 	return (0);
1091 }
1092 
1093 
1094 /*
1095  * NAME:	raid_replace
1096  * DESCRIPTION: used to replace a component of a RAID metadevice
1097  * PARAMETERS:	replace_params_t *mrp - pointer to replace data structure
1098  *		IOLOCK	     *lock - pointer to IOCTL lock
1099  *
1100  * LOCKS:	obtains unit writer lock via IOLOCK (through raid_getun),
1101  *		obtains and releases md_unit_array_rw write lock
1102  *
1103  */
1104 static int
raid_replace(replace_params_t * mrp,IOLOCK * lock)1105 raid_replace(
1106 	replace_params_t	*mrp,
1107 	IOLOCK			*lock
1108 )
1109 {
1110 	minor_t		mnum = mrp->mnum;
1111 	md_dev64_t	odev = mrp->old_dev;
1112 	md_error_t	*ep = &mrp->mde;
1113 	mr_unit_t	*un;
1114 	rcs_state_t	state;
1115 	int		ix, col = -1;
1116 	int		force = 0;
1117 	int		err = 0;
1118 	replace_cmd_t	cmd;
1119 	set_t		setno;
1120 	side_t		side;
1121 	mdkey_t		devkey;
1122 	int		nkeys;
1123 	mddb_recid_t	extra_recids[3] = { 0, 0, 0 };
1124 	int		extra_rids = 0;
1125 	md_error_t	mde = mdnullerror;
1126 	sv_dev_t	sv = {MD_SET_BAD, MD_SIDEWILD, MD_KEYWILD};
1127 
1128 	mdclrerror(ep);
1129 	setno = MD_MIN2SET(mnum);
1130 	side = mddb_getsidenum(setno);
1131 
1132 	un = md_unit_readerlock(MDI_UNIT(mnum));
1133 
1134 	if ((MD_STATUS(un) & MD_UN_RESYNC_ACTIVE) ||
1135 	    (raid_state_cnt(un, RCS_RESYNC) != 0)) {
1136 		md_unit_readerexit(MDI_UNIT(mnum));
1137 		return (mdmderror(ep, MDE_RESYNC_ACTIVE, mnum));
1138 	}
1139 
1140 	if (un->un_state & RUS_DOI) {
1141 		md_unit_readerexit(MDI_UNIT(mnum));
1142 		return (mdmderror(ep, MDE_RAID_DOI, mnum));
1143 	}
1144 
1145 	if ((raid_state_cnt(un, RCS_INIT) != 0) || (un->un_state & RUS_INIT) ||
1146 	    (MD_STATUS(un) & MD_UN_GROW_PENDING)) {
1147 		md_unit_readerexit(MDI_UNIT(mnum));
1148 		return (mdmderror(ep, MDE_IN_USE, mnum));
1149 	}
1150 
1151 	md_unit_readerexit(MDI_UNIT(mnum));
1152 
1153 	/* get locks and recheck to be sure something did not change */
1154 	if ((un = raid_getun(mnum, ep, WRITERS, lock)) == NULL)
1155 		return (0);
1156 
1157 	if (md_getkeyfromdev(setno, side, odev, &devkey, &nkeys) != 0) {
1158 		return (mddeverror(ep, MDE_NAME_SPACE, odev));
1159 	}
1160 
1161 	for (ix = 0; ix < un->un_totalcolumncnt; ix++) {
1162 		md_dev64_t tmpdevt = un->un_column[ix].un_orig_dev;
1163 		/*
1164 		 * Try to resolve devt again if NODEV64
1165 		 */
1166 		if (tmpdevt == NODEV64) {
1167 			tmpdevt = md_resolve_bydevid(mnum, tmpdevt,
1168 			    un->un_column[ix].un_orig_key);
1169 			un->un_column[ix].un_orig_dev = tmpdevt;
1170 		}
1171 
1172 		if (un->un_column[ix].un_orig_dev == odev) {
1173 			col = ix;
1174 			break;
1175 		} else {
1176 			if (un->un_column[ix].un_orig_dev == NODEV64) {
1177 				/*
1178 				 * Now we use the keys to match.
1179 				 * If no key found, continue.
1180 				 */
1181 				if (nkeys == 0) {
1182 					continue;
1183 				}
1184 				if (un->un_column[ix].un_orig_key == devkey) {
1185 					if (nkeys > 1)
1186 						return (mddeverror(ep,
1187 						    MDE_MULTNM, odev));
1188 					col = ix;
1189 					break;
1190 				}
1191 			}
1192 		}
1193 	}
1194 
1195 	if (col == -1)
1196 		return (mdcomperror(ep, MDE_CANT_FIND_COMP,
1197 		    mnum, odev));
1198 
1199 	if ((MD_STATUS(un) & MD_UN_RESYNC_ACTIVE) ||
1200 	    (raid_state_cnt(un, RCS_RESYNC) != 0))
1201 		return (mdmderror(ep, MDE_RESYNC_ACTIVE, mnum));
1202 
1203 	if (un->un_state & RUS_DOI)
1204 		return (mdcomperror(ep, MDE_REPL_INVAL_STATE, mnum,
1205 		    un->un_column[col].un_dev));
1206 
1207 	if ((raid_state_cnt(un, RCS_INIT) != 0) || (un->un_state & RUS_INIT) ||
1208 	    (MD_STATUS(un) & MD_UN_GROW_PENDING))
1209 		return (mdmderror(ep, MDE_IN_USE, mnum));
1210 
1211 	if ((mrp->cmd == FORCE_ENABLE_COMP) || (mrp->cmd == FORCE_REPLACE_COMP))
1212 		force = 1;
1213 	if ((mrp->cmd == FORCE_ENABLE_COMP) || (mrp->cmd == ENABLE_COMP))
1214 		cmd = ENABLE_COMP;
1215 	if ((mrp->cmd == FORCE_REPLACE_COMP) || (mrp->cmd == REPLACE_COMP))
1216 		cmd = REPLACE_COMP;
1217 
1218 	if (un->un_state == RUS_LAST_ERRED) {
1219 		/* Must use -f force flag for unit in LAST_ERRED state */
1220 		if (!force)
1221 			return (mdmderror(ep, MDE_RAID_NEED_FORCE, mnum));
1222 
1223 		/* Must use -f force flag on ERRED column first */
1224 		if (un->un_column[col].un_devstate != RCS_ERRED) {
1225 			for (ix = 0; ix < un->un_totalcolumncnt; ix++) {
1226 				if (un->un_column[ix].un_devstate & RCS_ERRED)
1227 					return (mdcomperror(ep,
1228 					    MDE_RAID_COMP_ERRED, mnum,
1229 					    un->un_column[ix].un_dev));
1230 			}
1231 		}
1232 
1233 		/* must use -f force flag on LAST_ERRED columns next */
1234 		if ((un->un_column[col].un_devstate != RCS_LAST_ERRED) &&
1235 		    (un->un_column[col].un_devstate != RCS_ERRED))
1236 			return (mdcomperror(ep, MDE_RAID_COMP_ERRED,
1237 			    mnum, un->un_column[col].un_dev));
1238 	}
1239 
1240 	if (un->un_state == RUS_ERRED) {
1241 		if (! (un->un_column[col].un_devstate &
1242 		    (RCS_ERRED | RCS_INIT_ERRED)))
1243 			return (mdcomperror(ep, MDE_RAID_COMP_ERRED,
1244 			    mnum, un->un_column[ix].un_dev));
1245 	}
1246 
1247 	ASSERT(!(un->un_column[col].un_devflags & MD_RAID_ALT_ISOPEN));
1248 	ASSERT(!(un->un_column[col].un_devflags & MD_RAID_WRITE_ALT));
1249 
1250 	state = un->un_column[col].un_devstate;
1251 	if (state & RCS_INIT_ERRED) {
1252 		MD_STATUS(un) |= MD_UN_GROW_PENDING;
1253 		un->un_percent_done = 0;
1254 		raid_set_state(un, col, RCS_INIT, 0);
1255 	} else if (((mrp->options & MDIOCTL_NO_RESYNC_RAID) == 0) &&
1256 	    resync_request(mnum, col, 0, ep))
1257 		return (mdmderror(ep, MDE_RESYNC_ACTIVE, mnum));
1258 
1259 
1260 	if (cmd == REPLACE_COMP) {
1261 		md_dev64_t tmpdev = mrp->new_dev;
1262 
1263 		/*
1264 		 * open the device by device id
1265 		 */
1266 		tmpdev = md_resolve_bydevid(mnum, tmpdev, mrp->new_key);
1267 		if (md_layered_open(mnum, &tmpdev, MD_OFLG_NULL)) {
1268 			return (mdcomperror(ep, MDE_COMP_OPEN_ERR, mnum,
1269 			    tmpdev));
1270 		}
1271 
1272 		/*
1273 		 * If it's a metadevice, make sure it gets reparented
1274 		 */
1275 		if (md_getmajor(tmpdev) == md_major) {
1276 			minor_t		new_mnum = md_getminor(tmpdev);
1277 			md_unit_t	*new_un = MD_UNIT(new_mnum);
1278 
1279 			md_set_parent(tmpdev, MD_SID(un));
1280 			extra_recids[extra_rids++] = MD_RECID(new_un);
1281 		}
1282 
1283 		mrp->new_dev = tmpdev;
1284 		un->un_column[col].un_orig_dev = tmpdev;
1285 		un->un_column[col].un_orig_key = mrp->new_key;
1286 		un->un_column[col].un_orig_pwstart = mrp->start_blk;
1287 		un->un_column[col].un_orig_devstart =
1288 		    mrp->start_blk + un->un_pwsize;
1289 
1290 		/*
1291 		 * If the old device was a metadevice, make sure to
1292 		 * reset its parent.
1293 		 */
1294 		if (md_getmajor(odev) == md_major) {
1295 			minor_t		old_mnum = md_getminor(odev);
1296 			md_unit_t	*old_un = MD_UNIT(old_mnum);
1297 
1298 			md_reset_parent(odev);
1299 			extra_recids[extra_rids++] =
1300 			    MD_RECID(old_un);
1301 		}
1302 
1303 		if (HOTSPARED(un, col)) {
1304 			md_layered_close(mrp->new_dev, MD_OFLG_NULL);
1305 			un->un_column[col].un_alt_dev = mrp->new_dev;
1306 			un->un_column[col].un_alt_pwstart = mrp->start_blk;
1307 			un->un_column[col].un_alt_devstart =
1308 			    mrp->start_blk + un->un_pwsize;
1309 			un->un_column[col].un_devflags |= MD_RAID_COPY_RESYNC;
1310 		} else {
1311 			/*
1312 			 * not hot spared.  Close the old device and
1313 			 * move the new device in.
1314 			 */
1315 			if (un->un_column[col].un_devflags & MD_RAID_DEV_ISOPEN)
1316 				md_layered_close(odev, MD_OFLG_NULL);
1317 			un->un_column[col].un_devflags |= MD_RAID_DEV_ISOPEN;
1318 			un->un_column[col].un_dev = mrp->new_dev;
1319 			un->un_column[col].un_pwstart = mrp->start_blk;
1320 			un->un_column[col].un_devstart =
1321 			    mrp->start_blk + un->un_pwsize;
1322 			if ((mrp->options & MDIOCTL_NO_RESYNC_RAID) == 0) {
1323 				un->un_column[col].un_devflags |=
1324 				    MD_RAID_REGEN_RESYNC;
1325 			}
1326 		}
1327 		/*
1328 		 * If the old device is not a metadevice then
1329 		 * save off the set number and key so that it
1330 		 * can be removed from the namespace later.
1331 		 */
1332 		if (md_getmajor(odev) != md_major) {
1333 			sv.setno = setno;
1334 			sv.key = devkey;
1335 		}
1336 	}
1337 
1338 	if (cmd == ENABLE_COMP) {
1339 		md_dev64_t tmpdev = un->un_column[col].un_orig_dev;
1340 		mdkey_t raidkey =  un->un_column[col].un_orig_key;
1341 
1342 		/*
1343 		 * We trust the dev_t because we cannot determine the
1344 		 * dev_t from the device id since a new disk is in the
1345 		 * same location. Since this is a call from metareplace -e dx
1346 		 * AND it is SCSI a new dev_t is not generated.  So the
1347 		 * dev_t from the mddb is used. Before enabling the device
1348 		 * we check to make sure that multiple entries for the same
1349 		 * device does not exist in the namespace. If they do we
1350 		 * fail the ioctl.
1351 		 * One of the many ways multiple entries in the name space
1352 		 * can occur is if one removed the failed component in a
1353 		 * RAID metadevice and put another disk that was part of
1354 		 * another metadevice. After reboot metadevadm would correctly
1355 		 * update the device name for the metadevice whose component
1356 		 * has moved. However now in the metadb there are two entries
1357 		 * for the same name (ctds) that belong to different
1358 		 * metadevices. One is valid, the other is a ghost or "last
1359 		 * know as" ctds.
1360 		 */
1361 		tmpdev = md_resolve_bydevid(mnum, tmpdev, raidkey);
1362 		if (tmpdev == NODEV64)
1363 			tmpdev = md_getdevnum(setno, side, raidkey,
1364 			    MD_TRUST_DEVT);
1365 		/*
1366 		 * check for multiple entries in namespace for the
1367 		 * same dev
1368 		 */
1369 
1370 		if (md_getkeyfromdev(setno, side, tmpdev, &devkey,
1371 		    &nkeys) != 0)
1372 			return (mddeverror(ep, MDE_NAME_SPACE, tmpdev));
1373 		/*
1374 		 * If number of keys are greater that
1375 		 * 1, then we have an invalid
1376 		 * namespace. STOP and return.
1377 		 */
1378 		if (nkeys > 1)
1379 			return (mddeverror(ep, MDE_MULTNM, tmpdev));
1380 		if (devkey != raidkey)
1381 			return (mdcomperror(ep, MDE_CANT_FIND_COMP,
1382 			    mnum, tmpdev));
1383 
1384 		if (un->un_column[col].un_orig_dev == NODEV64)
1385 			un->un_column[col].un_orig_dev = tmpdev;
1386 
1387 		if (HOTSPARED(un, col)) {
1388 			un->un_column[col].un_alt_dev =
1389 			    un->un_column[col].un_orig_dev;
1390 			un->un_column[col].un_alt_pwstart =
1391 			    un->un_column[col].un_orig_pwstart;
1392 			un->un_column[col].un_alt_devstart =
1393 			    un->un_column[col].un_orig_devstart;
1394 			un->un_column[col].un_devflags |= MD_RAID_COPY_RESYNC;
1395 		} else {
1396 			if (!(un->un_column[col].un_devflags &
1397 			    MD_RAID_DEV_ISOPEN)) {
1398 				if (md_layered_open(mnum, &tmpdev,
1399 				    MD_OFLG_NULL)) {
1400 					un->un_column[col].un_dev = tmpdev;
1401 					return (mdcomperror(ep,
1402 					    MDE_COMP_OPEN_ERR, mnum, tmpdev));
1403 				}
1404 				ASSERT(tmpdev != NODEV64 &&
1405 				    tmpdev != 0);
1406 
1407 				if ((md_getmajor(tmpdev) != md_major) &&
1408 				    (md_devid_found(setno, side, raidkey)
1409 				    == 1)) {
1410 					if (md_update_namespace_did(setno, side,
1411 					    raidkey, &mde) != 0) {
1412 						cmn_err(CE_WARN,
1413 						    "md: could not"
1414 						    " update namespace\n");
1415 					}
1416 				}
1417 				un->un_column[col].un_dev =
1418 				    un->un_column[col].un_orig_dev;
1419 			}
1420 			un->un_column[col].un_devflags |= MD_RAID_DEV_ISOPEN;
1421 			un->un_column[col].un_devflags |= MD_RAID_REGEN_RESYNC;
1422 		}
1423 	}
1424 	if (mrp->has_label) {
1425 		un->un_column[col].un_devflags |= MD_RAID_HAS_LABEL;
1426 	} else {
1427 		un->un_column[col].un_devflags &= ~MD_RAID_HAS_LABEL;
1428 	}
1429 
1430 	raid_commit(un, extra_recids);
1431 
1432 	/* If the component has been replaced - clean up the name space */
1433 	if (sv.setno != MD_SET_BAD) {
1434 		md_rem_names(&sv, 1);
1435 	}
1436 
1437 	md_ioctl_droplocks(lock);
1438 
1439 	if ((cmd == ENABLE_COMP) || (cmd == FORCE_ENABLE_COMP)) {
1440 		SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ENABLE, SVM_TAG_METADEVICE,
1441 		    setno, MD_SID(un));
1442 	} else {
1443 		SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REPLACE, SVM_TAG_METADEVICE,
1444 		    setno, MD_SID(un));
1445 	}
1446 
1447 	if (un->un_column[col].un_devstate & RCS_INIT)
1448 		err = raid_init_unit(mnum, ep);
1449 	else if ((mrp->options & MDIOCTL_NO_RESYNC_RAID) == 0)
1450 		err = raid_resync_unit(mnum, ep);
1451 
1452 	mdclrerror(ep);
1453 	if (!err)
1454 		return (0);
1455 
1456 	/* be sure state */
1457 	/* is already set by this time */
1458 	/* fix state  and commit record */
1459 	un = md_unit_writerlock(MDI_UNIT(mnum));
1460 	if (state & RCS_INIT_ERRED)
1461 		raid_set_state(un, col, state, 1);
1462 	else if (state & RCS_OKAY)
1463 		raid_set_state(un, col, RCS_ERRED, 0);
1464 	else
1465 		raid_set_state(un, col, state, 1);
1466 	raid_commit(un, NULL);
1467 	md_unit_writerexit(MDI_UNIT(mnum));
1468 	mdclrerror(ep);
1469 	return (0);
1470 }
1471 
1472 
1473 /*
1474  * NAME:	raid_set_sync
1475  * DESCRIPTION: used to sync a component of a RAID metadevice
1476  * PARAMETERS:	md_resync_ioctl_t *mrp - pointer to resync data structure
1477  *		int	      mode - must be FWRITE
1478  *		IOLOCK	     *lock - pointer to IOCTL lock
1479  *
1480  * LOCKS:	obtains unit writer lock via IOLOCK (through raid_getun),
1481  *		obtains and releases md_unit_array_rw write lock
1482  *
1483  */
1484 static int
raid_set_sync(md_resync_ioctl_t * rip,IOLOCK * lock)1485 raid_set_sync(
1486 	md_resync_ioctl_t	*rip,
1487 	IOLOCK			*lock
1488 )
1489 {
1490 	minor_t			mnum = rip->ri_mnum;
1491 	mr_unit_t		*un;
1492 	int			init = 0;
1493 	int			resync = 0;
1494 	int			regen = 0;
1495 	int			ix;
1496 	int			err;
1497 
1498 	mdclrerror(&rip->mde);
1499 
1500 	if ((un = raid_getun(mnum, &rip->mde, WRITERS, lock)) == NULL)
1501 		return (0);
1502 
1503 	if (un->un_state & RUS_DOI)
1504 		return (mdmderror(&rip->mde, MDE_RAID_DOI, mnum));
1505 
1506 	if (un->c.un_status & MD_UN_RESYNC_ACTIVE)
1507 		return (mdmderror(&rip->mde, MDE_RESYNC_ACTIVE, mnum));
1508 
1509 	/* This prevents new opens */
1510 
1511 	rip->ri_flags = 0;
1512 	if (un->un_state & RUS_REGEN)
1513 		regen++;
1514 
1515 	if (raid_state_cnt(un, RCS_RESYNC))
1516 		resync++;
1517 
1518 	if (raid_state_cnt(un, RCS_INIT) || (un->un_state & RUS_INIT))
1519 		init++;
1520 
1521 	ASSERT(!(resync && init && regen));
1522 	md_ioctl_droplocks(lock);
1523 	rip->ri_percent_done = 0;
1524 
1525 	if (init) {
1526 		MD_STATUS(un) |= MD_UN_GROW_PENDING;
1527 		return (raid_init_unit(mnum, &rip->mde));
1528 	}
1529 
1530 	/*
1531 	 * If resync is needed, it will call raid_internal_open forcing
1532 	 * replay before the open completes.
1533 	 * Otherwise, call raid_internal_open directly to force
1534 	 * replay to complete during boot (metasync -r).
1535 	 * NOTE: the unit writer lock must remain held while setting
1536 	 *	 MD_UN_RESYNC_ACTIVE but must be released before
1537 	 *	 calling raid_resync_unit or raid_internal_open.
1538 	 */
1539 	if (resync) {
1540 		ASSERT(resync < 2);
1541 		un = md_unit_writerlock(MDI_UNIT(mnum));
1542 		MD_STATUS(un) |= MD_UN_RESYNC_ACTIVE;
1543 		/* Must release unit writer lock for resync */
1544 		/*
1545 		 * correctly setup the devices before trying to start the
1546 		 * resync operation.
1547 		 */
1548 		for (ix = 0; un->un_totalcolumncnt; ix++) {
1549 			if (un->un_column[ix].un_devstate & RCS_RESYNC) {
1550 				if ((un->un_column[ix].un_devflags &
1551 				    MD_RAID_COPY_RESYNC) &&
1552 				    HOTSPARED(un, ix)) {
1553 					un->un_column[ix].un_alt_dev =
1554 					    un->un_column[ix].un_orig_dev;
1555 					un->un_column[ix].un_alt_devstart =
1556 					    un->un_column[ix].un_orig_devstart;
1557 					un->un_column[ix].un_alt_pwstart =
1558 					    un->un_column[ix].un_orig_pwstart;
1559 				}
1560 				break;
1561 			}
1562 		}
1563 		ASSERT(un->un_column[ix].un_devflags &
1564 		    (MD_RAID_COPY_RESYNC | MD_RAID_REGEN_RESYNC));
1565 		rip->ri_percent_done = 0;
1566 		un->un_column[ix].un_devflags |= MD_RAID_RESYNC;
1567 		(void) resync_request(mnum, ix, 0, NULL);
1568 		md_unit_writerexit(MDI_UNIT(mnum));
1569 		err = raid_resync_unit(mnum, &rip->mde);
1570 		return (err);
1571 	}
1572 
1573 	if (regen) {
1574 		err = raid_regen_unit(mnum, &rip->mde);
1575 		return (err);
1576 	}
1577 
1578 	/* The unit requires not work so just force replay of the device */
1579 	if (raid_internal_open(mnum, (FREAD | FWRITE), OTYP_LYR, 0))
1580 		return (mdmderror(&rip->mde,
1581 		    MDE_RAID_OPEN_FAILURE, mnum));
1582 	(void) raid_internal_close(mnum, OTYP_LYR, 0, 0);
1583 
1584 	return (0);
1585 }
1586 
1587 /*
1588  * NAME:	raid_get_resync
1589  * DESCRIPTION: used to check resync status on a component of a RAID metadevice
1590  * PARAMETERS:	md_resync_ioctl_t *mrp - pointer to resync data structure
1591  *		int	      mode - must be FWRITE
1592  *		IOLOCK	     *lock - pointer to IOCTL lock
1593  *
1594  * LOCKS:	none
1595  *
1596  */
1597 static int
raid_get_resync(md_resync_ioctl_t * rip,IOLOCK * lock)1598 raid_get_resync(
1599 	md_resync_ioctl_t	*rip,
1600 	IOLOCK			*lock
1601 )
1602 {
1603 	minor_t			mnum = rip->ri_mnum;
1604 	mr_unit_t		*un;
1605 	u_longlong_t		percent;
1606 	int			cnt;
1607 	int			ix;
1608 	uint64_t		d;
1609 
1610 	mdclrerror(&rip->mde);
1611 
1612 	if ((un = raid_getun(mnum, &rip->mde, RD_LOCK, lock)) == NULL)
1613 		return (0);
1614 
1615 	rip->ri_flags = 0;
1616 	if (MD_STATUS(un) & MD_UN_RESYNC_ACTIVE) {
1617 		d = un->un_segsincolumn;
1618 		percent = d ? ((1000 * un->un_resync_line_index) / d) : 0;
1619 		if (percent > 1000)
1620 			percent = 1000;	/* can't go over 100% */
1621 		rip->ri_percent_done = (int)percent;
1622 		rip->ri_flags |= MD_RI_INPROGRESS;
1623 	}
1624 
1625 	if (UNIT_STATE(un) & RUS_INIT) {
1626 		d = un->un_segsize * un->un_segsincolumn *
1627 		    un->un_totalcolumncnt;
1628 		percent =
1629 		    d ? ((1000 * (u_longlong_t)un->un_init_iocnt) / d) : 0;
1630 		if (percent > 1000)
1631 			percent = 1000;	/* can't go over 100% */
1632 		rip->ri_percent_done = (int)percent;
1633 		rip->ri_flags |= MD_GROW_INPROGRESS;
1634 	} else if (MD_STATUS(un) & MD_UN_GROW_PENDING) {
1635 		d = un->un_segsize * un->un_segsincolumn * un->un_init_colcnt;
1636 		percent =
1637 		    d ? (((u_longlong_t)un->un_init_iocnt * 1000) / d) : 0;
1638 		if (percent > 1000)
1639 			percent = 1000;
1640 		rip->ri_percent_done = (int)percent;
1641 		rip->ri_flags |= MD_GROW_INPROGRESS;
1642 	}
1643 
1644 	if (un->un_state & RUS_REGEN)
1645 		rip->ri_percent_done = un->un_percent_done;
1646 
1647 	cnt = 0;
1648 	for (ix = 0; ix < un->un_totalcolumncnt; ix++) {
1649 		switch (un->un_column[ix].un_devstate) {
1650 		case RCS_INIT:
1651 		case RCS_ERRED:
1652 		case RCS_LAST_ERRED:
1653 			cnt++;
1654 			break;
1655 		default:
1656 			break;
1657 		}
1658 	}
1659 	d = un->un_totalcolumncnt;
1660 	rip->ri_percent_dirty = d ? (((u_longlong_t)cnt * 100) / d) : 0;
1661 	return (0);
1662 }
1663 
1664 /*
1665  * NAME:	raid_grow
1666  * DESCRIPTION: Concatenate to a RAID metadevice
1667  * PARAMETERS:	md_grow_params_t *mgp
1668  *			      - pointer to IOCGROW data structure
1669  *		int	 mode - must be FWRITE
1670  *		IOLOCK *lockp - IOCTL read/write and unit_array_rw lock
1671  *
1672  * LOCKS:	obtains unit writer lock via IOLOCK (through raid_getun),
1673  *		obtains and releases md_unit_array_rw write lock
1674  *
1675  */
1676 static int
raid_grow(void * mgp,int mode,IOLOCK * lock)1677 raid_grow(void *mgp, int mode, IOLOCK *lock)
1678 {
1679 	minor_t		mnum;
1680 	mr_unit_t	*un, *new_un;
1681 	mdi_unit_t	*ui;
1682 	mddb_type_t	typ1;
1683 	mddb_recid_t	mr_recid;
1684 	mddb_recid_t	old_vtoc = 0;
1685 	mddb_recid_t	*recids;
1686 	md_create_rec_option_t options;
1687 	int		err;
1688 	int		col, i;
1689 	int64_t		tb, atb;
1690 	u_longlong_t	unrev;
1691 	int		tc;
1692 	int		rval = 0;
1693 	set_t		setno;
1694 	mr_column_ic_t	*mrc;
1695 	int		num_recs, rid;
1696 	md_grow_params_t	*mgph = mgp;
1697 
1698 
1699 	mnum = mgph->mnum;
1700 
1701 	mdclrerror(&mgph->mde);
1702 
1703 	ui = MDI_UNIT(mnum);
1704 	un = md_unit_readerlock(ui);
1705 
1706 	if (MD_STATUS(un) & MD_UN_GROW_PENDING) {
1707 		md_unit_readerexit(ui);
1708 		return (mdmderror(&mgph->mde, MDE_IN_USE, mnum));
1709 	}
1710 
1711 	if (MD_STATUS(un) & MD_UN_RESYNC_ACTIVE) {
1712 		md_unit_readerexit(ui);
1713 		return (mdmderror(&mgph->mde, MDE_RESYNC_ACTIVE, mnum));
1714 	}
1715 
1716 	if (UNIT_STATE(un) & RUS_LAST_ERRED) {
1717 		md_unit_readerexit(ui);
1718 		return (mdmderror(&mgph->mde, MDE_RAID_LAST_ERRED, mnum));
1719 	}
1720 
1721 	if (UNIT_STATE(un) & RUS_DOI) {
1722 		md_unit_readerexit(ui);
1723 		return (mdmderror(&mgph->mde, MDE_RAID_DOI, mnum));
1724 	}
1725 
1726 	if ((raid_state_cnt(un, RCS_INIT) != 0) || (un->un_state & RUS_INIT)) {
1727 		md_unit_readerexit(ui);
1728 		return (mdmderror(&mgph->mde, MDE_IN_USE, mnum));
1729 	}
1730 
1731 	md_unit_readerexit(ui);
1732 
1733 	if ((un = raid_getun(mnum, &mgph->mde, WRITERS, lock)) ==
1734 	    NULL)
1735 		return (0);
1736 
1737 	if (MD_STATUS(un) & MD_UN_GROW_PENDING)
1738 		return (mdmderror(&mgph->mde, MDE_IN_USE, mnum));
1739 
1740 	if (MD_STATUS(un) & MD_UN_RESYNC_ACTIVE)
1741 		return (mdmderror(&mgph->mde, MDE_RESYNC_ACTIVE, mnum));
1742 
1743 	if (un->c.un_size >= mgph->size)
1744 		return (EINVAL);
1745 
1746 	if (UNIT_STATE(un) & RUS_LAST_ERRED)
1747 		return (mdmderror(&mgph->mde, MDE_RAID_LAST_ERRED, mnum));
1748 
1749 	if (UNIT_STATE(un) & RUS_DOI)
1750 		return (mdmderror(&mgph->mde, MDE_RAID_DOI, mnum));
1751 
1752 	if ((raid_state_cnt(un, RCS_INIT) != 0) || (un->un_state & RUS_INIT))
1753 		return (mdmderror(&mgph->mde, MDE_IN_USE, mnum));
1754 
1755 	setno = MD_MIN2SET(mnum);
1756 
1757 	typ1 = (mddb_type_t)md_getshared_key(setno,
1758 	    raid_md_ops.md_driver.md_drivername);
1759 
1760 	/*
1761 	 * Preserve the friendly name nature of the device that is
1762 	 * growing.
1763 	 */
1764 	options = MD_CRO_RAID;
1765 	if (un->c.un_revision & MD_FN_META_DEV)
1766 		options |= MD_CRO_FN;
1767 	if (mgph->options & MD_CRO_64BIT) {
1768 #if defined(_ILP32)
1769 		return (mdmderror(&mgph->mde, MDE_UNIT_TOO_LARGE, mnum));
1770 #else
1771 		mr_recid = mddb_createrec(mgph->size, typ1, 0,
1772 		    MD_CRO_64BIT | options, setno);
1773 #endif
1774 	} else {
1775 		mr_recid = mddb_createrec(mgph->size, typ1, 0,
1776 		    MD_CRO_32BIT | options, setno);
1777 	}
1778 	if (mr_recid < 0) {
1779 		rval = mddbstatus2error(&mgph->mde, (int)mr_recid,
1780 		    mnum, setno);
1781 		return (rval);
1782 	}
1783 
1784 	/* get the address of the new unit */
1785 	new_un = (mr_unit_t *)mddb_getrecaddr(mr_recid);
1786 
1787 	/*
1788 	 * It is okay that we muck with the new unit here,
1789 	 * since no one else will know about the unit struct
1790 	 * until we commit it. If we crash, the record will
1791 	 * be automatically purged, since we haven't
1792 	 * committed it yet and the old unit struct will be found.
1793 	 */
1794 
1795 	/* copy in the user's unit struct */
1796 	err = ddi_copyin((void *)(uintptr_t)mgph->mdp, new_un,
1797 	    mgph->size, mode);
1798 	if (err) {
1799 		mddb_deleterec_wrapper(mr_recid);
1800 		return (EFAULT);
1801 	}
1802 
1803 	/* make sure columns are being added */
1804 	if (un->un_totalcolumncnt >= new_un->un_totalcolumncnt) {
1805 		mddb_deleterec_wrapper(mr_recid);
1806 		return (EINVAL);
1807 	}
1808 
1809 	/*
1810 	 * Save a few of the new unit structs fields.
1811 	 * Before they get clobbered.
1812 	 */
1813 	tc = new_un->un_totalcolumncnt;
1814 	tb = new_un->c.un_total_blocks;
1815 	atb = new_un->c.un_actual_tb;
1816 	unrev = new_un->c.un_revision;
1817 
1818 	/*
1819 	 * Copy the old unit struct (static stuff)
1820 	 * into new unit struct
1821 	 */
1822 	bcopy((caddr_t)un, (caddr_t)new_un, un->c.un_size);
1823 
1824 	/*
1825 	 * Restore a few of the new unit struct values.
1826 	 */
1827 	new_un->un_totalcolumncnt = tc;
1828 	new_un->c.un_actual_tb = atb;
1829 	new_un->un_grow_tb = tb;
1830 	new_un->c.un_revision = unrev;
1831 	new_un->c.un_record_id = mr_recid;
1832 	new_un->c.un_size = mgph->size;
1833 
1834 	ASSERT(new_un->mr_ic == un->mr_ic);
1835 
1836 	/*
1837 	 * Save old column slots
1838 	 */
1839 	mrc = un->un_column_ic;
1840 
1841 	/*
1842 	 * Allocate new column slot
1843 	 */
1844 	new_un->un_column_ic = (mr_column_ic_t *)
1845 	    kmem_zalloc(sizeof (mr_column_ic_t) * new_un->un_totalcolumncnt,
1846 	    KM_SLEEP);
1847 
1848 	/*
1849 	 * Restore old column slots
1850 	 * Free the old column slots
1851 	 */
1852 	bcopy(mrc, new_un->un_column_ic,
1853 	    sizeof (mr_column_ic_t) * un->un_totalcolumncnt);
1854 	kmem_free(mrc, sizeof (mr_column_ic_t) * un->un_totalcolumncnt);
1855 
1856 	/* All 64 bit metadevices only support EFI labels. */
1857 	if (mgph->options & MD_CRO_64BIT) {
1858 		new_un->c.un_flag |= MD_EFILABEL;
1859 		/*
1860 		 * If the device was previously smaller than a terabyte,
1861 		 * and had a vtoc record attached to it, we remove the
1862 		 * vtoc record, because the layout has changed completely.
1863 		 */
1864 		if (((un->c.un_revision & MD_64BIT_META_DEV) == 0) &&
1865 		    (un->c.un_vtoc_id != 0)) {
1866 			old_vtoc = un->c.un_vtoc_id;
1867 			new_un->c.un_vtoc_id =
1868 			    md_vtoc_to_efi_record(old_vtoc, setno);
1869 		}
1870 	}
1871 
1872 
1873 	/*
1874 	 * allocate the real recids array.  since we may have to commit
1875 	 * underlying metadevice records, we need an array of size:
1876 	 * total number of new components being attach + 2 (one for the
1877 	 * raid itself, one for the end marker).
1878 	 */
1879 	num_recs = new_un->un_totalcolumncnt + 2;
1880 	rid = 0;
1881 	recids = kmem_alloc(num_recs * sizeof (mddb_recid_t), KM_SLEEP);
1882 	recids[rid++] = mr_recid;
1883 
1884 	for (col = un->un_totalcolumncnt;
1885 	    (col < new_un->un_totalcolumncnt); col++) {
1886 		mr_column_t	*mr_col = &new_un->un_column[col];
1887 		md_unit_t	*comp_un;
1888 
1889 		if (raid_build_pw_reservation(new_un, col) != 0) {
1890 			/* release pwslots already allocated by grow */
1891 			for (i = un->un_totalcolumncnt; i < col; i++) {
1892 				raid_free_pw_reservation(new_un, i);
1893 			}
1894 			kmem_free(new_un->un_column_ic,
1895 			    sizeof (mr_column_ic_t) *
1896 			    new_un->un_totalcolumncnt);
1897 			kmem_free(new_un->mr_ic, sizeof (*un->mr_ic));
1898 			kmem_free(recids, num_recs * sizeof (mddb_recid_t));
1899 			mddb_deleterec_wrapper(mr_recid);
1900 			return (EINVAL);
1901 		}
1902 		/*
1903 		 * set parent on metadevices being added.
1904 		 * NOTE: currently soft partitions are the only metadevices
1905 		 * which can appear within a RAID metadevice.
1906 		 */
1907 		if (md_getmajor(mr_col->un_dev) == md_major) {
1908 			comp_un = MD_UNIT(md_getminor(mr_col->un_dev));
1909 			recids[rid++] = MD_RECID(comp_un);
1910 			md_set_parent(mr_col->un_dev, MD_SID(new_un));
1911 		}
1912 		new_un->un_column[col].un_devflags = 0;
1913 	}
1914 
1915 	/* set end marker */
1916 	recids[rid] = 0;
1917 
1918 	/* commit new unit struct */
1919 	mddb_commitrecs_wrapper(recids);
1920 
1921 	/* delete old unit struct */
1922 	mddb_deleterec_wrapper(un->c.un_record_id);
1923 
1924 	/* place new unit in in-core array */
1925 	md_nblocks_set(mnum, new_un->c.un_total_blocks);
1926 	MD_UNIT(mnum) = new_un;
1927 
1928 	/*
1929 	 * If old_vtoc has a non zero value, we know:
1930 	 * - This unit crossed the border from smaller to larger one TB
1931 	 * - There was a vtoc record for the unit,
1932 	 * - This vtoc record is no longer needed, because
1933 	 *   a new efi record has been created for this un.
1934 	 */
1935 	if (old_vtoc != 0) {
1936 		mddb_deleterec_wrapper(old_vtoc);
1937 	}
1938 
1939 	/* free recids */
1940 	kmem_free(recids, num_recs * sizeof (mddb_recid_t));
1941 
1942 	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_GROW, SVM_TAG_METADEVICE,
1943 	    MD_UN2SET(new_un), MD_SID(new_un));
1944 	MD_STATUS(new_un) |= MD_UN_GROW_PENDING;
1945 
1946 	/*
1947 	 * Since the md_ioctl_writelock aquires the unit write lock
1948 	 * and open/close aquires the unit reader lock it is necessary
1949 	 * to drop the unit write lock and then reaquire it as needed
1950 	 * later.
1951 	 */
1952 	md_unit_writerexit(ui);
1953 
1954 	if (raid_internal_open(mnum, (FREAD | FWRITE), OTYP_LYR, 0)) {
1955 		rval = mdmderror(&mgph->mde, MDE_RAID_OPEN_FAILURE, mnum);
1956 		SE_NOTIFY(EC_SVM_STATE, ESC_SVM_OPEN_FAIL, SVM_TAG_METADEVICE,
1957 		    MD_UN2SET(new_un), MD_SID(new_un));
1958 		return (rval);
1959 	}
1960 	(void) md_unit_writerlock(ui);
1961 	for (i = 0; i < new_un->un_totalcolumncnt; i++) {
1962 		if (new_un->un_column[i].un_devstate & RCS_OKAY)
1963 			(void) init_pw_area(new_un, new_un->un_column[i].un_dev,
1964 			    new_un->un_column[i].un_pwstart, i);
1965 	}
1966 	md_unit_writerexit(ui);
1967 	(void) raid_internal_close(mnum, OTYP_LYR, 0, 0);
1968 	(void) md_unit_writerlock(ui);
1969 	/* create a background thread to initialize the columns */
1970 	md_ioctl_droplocks(lock);
1971 
1972 	return (raid_init_unit(mnum, &mgph->mde));
1973 }
1974 
1975 /*
1976  * NAME:	raid_reset
1977  * DESCRIPTION: used to reset (clear / remove) a RAID metadevice
1978  * PARAMETERS:	md_i_reset_t *mirp - pointer to reset data structure
1979  *
1980  * LOCKS:	obtains and releases md_unit_array_rw write lock
1981  *
1982  */
1983 static int
raid_reset(md_i_reset_t * mirp)1984 raid_reset(md_i_reset_t	*mirp)
1985 {
1986 	minor_t		mnum = mirp->mnum;
1987 	mr_unit_t	*un;
1988 	mdi_unit_t	*ui;
1989 	set_t		setno = MD_MIN2SET(mnum);
1990 
1991 	mdclrerror(&mirp->mde);
1992 
1993 	rw_enter(&md_unit_array_rw.lock, RW_WRITER);
1994 	/*
1995 	 * NOTE: need to get md_unit_writerlock to avoid conflict
1996 	 * with raid_init thread.
1997 	 */
1998 	if ((un = raid_getun(mnum, &mirp->mde, NO_LOCK, NULL)) ==
1999 	    NULL) {
2000 		rw_exit(&md_unit_array_rw.lock);
2001 		return (0);
2002 	}
2003 	ui = MDI_UNIT(mnum);
2004 
2005 	if (MD_HAS_PARENT(MD_PARENT(un))) {
2006 		rw_exit(&md_unit_array_rw.lock);
2007 		return (mdmderror(&mirp->mde, MDE_IN_USE, mnum));
2008 	}
2009 
2010 	un = (mr_unit_t *)md_unit_openclose_enter(ui);
2011 	if (md_unit_isopen(MDI_UNIT(mnum))) {
2012 		md_unit_openclose_exit(ui);
2013 		rw_exit(&md_unit_array_rw.lock);
2014 		return (mdmderror(&mirp->mde, MDE_IS_OPEN, mnum));
2015 	}
2016 	md_unit_openclose_exit(ui);
2017 	if (UNIT_STATE(un) != RUS_OKAY && !mirp->force) {
2018 		rw_exit(&md_unit_array_rw.lock);
2019 		return (mdmderror(&mirp->mde, MDE_RAID_NEED_FORCE, mnum));
2020 	}
2021 
2022 	reset_raid(un, mnum, 1);
2023 
2024 	/*
2025 	 * Update unit availability
2026 	 */
2027 	md_set[setno].s_un_avail++;
2028 
2029 	/*
2030 	 * If MN set, reset s_un_next so all nodes can have
2031 	 * the same view of the next available slot when
2032 	 * nodes are -w and -j
2033 	 */
2034 	if (MD_MNSET_SETNO(setno)) {
2035 		(void) md_upd_set_unnext(setno, MD_MIN2UNIT(mnum));
2036 	}
2037 
2038 	rw_exit(&md_unit_array_rw.lock);
2039 
2040 	return (0);
2041 }
2042 
2043 /*
2044  * NAME:	raid_get_geom
2045  * DESCRIPTION: used to get the geometry of a RAID metadevice
2046  * PARAMETERS:	mr_unit_t    *un - RAID unit to get the geometry for
2047  *		struct dk_geom *gp - pointer to geometry data structure
2048  *
2049  * LOCKS:	none
2050  *
2051  */
2052 static int
raid_get_geom(mr_unit_t * un,struct dk_geom * geomp)2053 raid_get_geom(
2054 	mr_unit_t	*un,
2055 	struct dk_geom	*geomp
2056 )
2057 {
2058 	md_get_geom((md_unit_t *)un, geomp);
2059 
2060 	return (0);
2061 }
2062 
2063 /*
2064  * NAME:	raid_get_vtoc
2065  * DESCRIPTION: used to get the VTOC on a RAID metadevice
2066  * PARAMETERS:	mr_unit_t    *un - RAID unit to get the VTOC from
2067  *		struct vtoc *vtocp - pointer to VTOC data structure
2068  *
2069  * LOCKS:	none
2070  *
2071  */
2072 static int
raid_get_vtoc(mr_unit_t * un,struct vtoc * vtocp)2073 raid_get_vtoc(
2074 	mr_unit_t	*un,
2075 	struct vtoc	*vtocp
2076 )
2077 {
2078 	md_get_vtoc((md_unit_t *)un, vtocp);
2079 
2080 	return (0);
2081 }
2082 
2083 /*
2084  * NAME:	raid_set_vtoc
2085  * DESCRIPTION: used to set the VTOC on a RAID metadevice
2086  * PARAMETERS:	mr_unit_t    *un - RAID unit to set the VTOC on
2087  *		struct vtoc *vtocp - pointer to VTOC data structure
2088  *
2089  * LOCKS:	none
2090  *
2091  */
2092 static int
raid_set_vtoc(mr_unit_t * un,struct vtoc * vtocp)2093 raid_set_vtoc(
2094 	mr_unit_t	*un,
2095 	struct vtoc	*vtocp
2096 )
2097 {
2098 	return (md_set_vtoc((md_unit_t *)un, vtocp));
2099 }
2100 
2101 
2102 /*
2103  * NAME:	raid_get_extvtoc
2104  * DESCRIPTION: used to get the extended VTOC on a RAID metadevice
2105  * PARAMETERS:	mr_unit_t    *un - RAID unit to get the VTOC from
2106  *		struct extvtoc *vtocp - pointer to extended VTOC data structure
2107  *
2108  * LOCKS:	none
2109  *
2110  */
2111 static int
raid_get_extvtoc(mr_unit_t * un,struct extvtoc * vtocp)2112 raid_get_extvtoc(
2113 	mr_unit_t	*un,
2114 	struct extvtoc	*vtocp
2115 )
2116 {
2117 	md_get_extvtoc((md_unit_t *)un, vtocp);
2118 
2119 	return (0);
2120 }
2121 
2122 /*
2123  * NAME:	raid_set_extvtoc
2124  * DESCRIPTION: used to set the extended VTOC on a RAID metadevice
2125  * PARAMETERS:	mr_unit_t    *un - RAID unit to set the VTOC on
2126  *		struct extvtoc *vtocp - pointer to extended VTOC data structure
2127  *
2128  * LOCKS:	none
2129  *
2130  */
2131 static int
raid_set_extvtoc(mr_unit_t * un,struct extvtoc * vtocp)2132 raid_set_extvtoc(
2133 	mr_unit_t	*un,
2134 	struct extvtoc	*vtocp
2135 )
2136 {
2137 	return (md_set_extvtoc((md_unit_t *)un, vtocp));
2138 }
2139 
2140 
2141 
2142 /*
2143  * NAME:	raid_get_cgapart
2144  * DESCRIPTION: used to get the dk_map on a RAID metadevice
2145  * PARAMETERS:	mr_unit_t    *un - RAID unit to set the VTOC on
2146  *		struct vtoc *dkmapp - pointer to dk_map data structure
2147  *
2148  * LOCKS:	none
2149  *
2150  */
2151 
2152 static int
raid_get_cgapart(mr_unit_t * un,struct dk_map * dkmapp)2153 raid_get_cgapart(
2154 	mr_unit_t	*un,
2155 	struct dk_map	*dkmapp
2156 )
2157 {
2158 	md_get_cgapart((md_unit_t *)un, dkmapp);
2159 	return (0);
2160 }
2161 
2162 /*
2163  * NAME:	raid_getdevs
2164  * DESCRIPTION: return all devices within a RAID metadevice
2165  * PARAMETERS:	md_getdevs_params_t *mgdp
2166  *			      - pointer to getdevs IOCTL data structure
2167  *		int	 mode - should be FREAD
2168  *		IOLOCK *lockp - IOCTL read/write lock
2169  *
2170  * LOCKS:	obtains unit reader lock via IOLOCK
2171  *
2172  */
2173 static int
raid_getdevs(void * mgdp,int mode,IOLOCK * lock)2174 raid_getdevs(
2175 	void			*mgdp,
2176 	int			mode,
2177 	IOLOCK			*lock
2178 )
2179 {
2180 	minor_t			mnum;
2181 	mr_unit_t		*un;
2182 	md_dev64_t		*udevs;
2183 	int			i, cnt;
2184 	md_dev64_t		unit_dev;
2185 	md_getdevs_params_t	*mgdph = mgdp;
2186 
2187 
2188 	mnum = mgdph->mnum;
2189 
2190 	/* check out unit */
2191 	mdclrerror(&mgdph->mde);
2192 
2193 	if ((un = raid_getun(mnum, &mgdph->mde, RD_LOCK, lock)) == NULL)
2194 		return (0);
2195 
2196 	udevs = (md_dev64_t *)(uintptr_t)mgdph->devs;
2197 
2198 	for (cnt = 0, i = 0; i < un->un_totalcolumncnt; i++, cnt++) {
2199 		if (cnt < mgdph->cnt) {
2200 			unit_dev = un->un_column[i].un_orig_dev;
2201 			if (md_getmajor(unit_dev) != md_major) {
2202 				if ((unit_dev = md_xlate_mini_2_targ
2203 				    (unit_dev)) == NODEV64)
2204 					return (ENODEV);
2205 			}
2206 
2207 			if (ddi_copyout((caddr_t)&unit_dev,
2208 			    (caddr_t)&udevs[cnt], sizeof (*udevs), mode) != 0)
2209 				return (EFAULT);
2210 		}
2211 		if (HOTSPARED(un, i)) {
2212 			cnt++;
2213 			if (cnt >= mgdph->cnt)
2214 				continue;
2215 
2216 			unit_dev = un->un_column[i].un_dev;
2217 			if (md_getmajor(unit_dev) != md_major) {
2218 				if ((unit_dev = md_xlate_mini_2_targ
2219 				    (unit_dev)) == NODEV64)
2220 					return (ENODEV);
2221 			}
2222 
2223 			if (ddi_copyout((caddr_t)&unit_dev,
2224 			    (caddr_t)&udevs[cnt], sizeof (*udevs), mode) != 0)
2225 				return (EFAULT);
2226 		}
2227 	}
2228 	mgdph->cnt = cnt;
2229 	return (0);
2230 }
2231 
2232 /*
2233  * NAME:	raid_change
2234  * DESCRIPTION: used to change the following dynamic values:
2235  *			the hot spare pool
2236  *		in the unit structure of a RAID metadevice
2237  * PARAMETERS:	md_change_params_t   *mcp - pointer to change data structure
2238  *		IOLOCK	     *lock - pointer to IOCTL lock
2239  *
2240  * LOCKS:	obtains unit writer lock via IOLOCK (through raid_getun)
2241  *
2242  */
2243 static int
raid_change(md_raid_params_t * mrp,IOLOCK * lock)2244 raid_change(
2245 	md_raid_params_t	*mrp,
2246 	IOLOCK			*lock
2247 )
2248 {
2249 	minor_t		mnum = mrp->mnum;
2250 	mr_unit_t	*un;
2251 	int		ix;
2252 	mddb_recid_t	recids[3] = {0, 0, 0};
2253 	int		err;
2254 	int		irecid;
2255 	int		inc_new_hsp = 0;
2256 
2257 	mdclrerror(&mrp->mde);
2258 
2259 	if ((un = raid_getun(mnum, &mrp->mde, WR_LOCK, lock)) == NULL)
2260 		return (0);
2261 
2262 	if (!mrp->params.change_hsp_id)
2263 		return (0);
2264 
2265 	/* verify that no hotspare is in use */
2266 	for (ix = 0; ix < un->un_totalcolumncnt; ix++) {
2267 		if (HOTSPARED(un, ix)) {
2268 			return (mdmderror(&mrp->mde, MDE_HS_IN_USE, mnum));
2269 		}
2270 	}
2271 
2272 	/* replace the hot spare pool */
2273 
2274 	irecid = 0;
2275 	if (mrp->params.hsp_id != -1) {
2276 		/* increment the reference count of the new hsp */
2277 		err = md_hot_spare_ifc(HSP_INCREF, mrp->params.hsp_id, 0, 0,
2278 		    &recids[0], NULL, NULL, NULL);
2279 		if (err) {
2280 			return (mdhsperror(&mrp->mde, MDE_INVAL_HSP,
2281 			    mrp->params.hsp_id));
2282 		}
2283 		inc_new_hsp = 1;
2284 		irecid++;
2285 	}
2286 
2287 	if (un->un_hsp_id != -1) {
2288 		/* decrement the reference count of the old hsp */
2289 		err = md_hot_spare_ifc(HSP_DECREF, un->un_hsp_id, 0, 0,
2290 		    &recids[irecid], NULL, NULL, NULL);
2291 		if (err) {
2292 			err = mdhsperror(&mrp->mde, MDE_INVAL_HSP,
2293 			    mrp->params.hsp_id);
2294 			if (inc_new_hsp) {
2295 				(void) md_hot_spare_ifc(HSP_DECREF,
2296 				    mrp->params.hsp_id, 0, 0,
2297 				    &recids[0], NULL, NULL, NULL);
2298 				/*
2299 				 * Don't need to commit the record,
2300 				 * because it wasn't committed before
2301 				 */
2302 			}
2303 			return (err);
2304 		}
2305 	}
2306 
2307 	un->un_hsp_id = mrp->params.hsp_id;
2308 
2309 	raid_commit(un, recids);
2310 	SE_NOTIFY(EC_SVM_STATE, ESC_SVM_CHANGE, SVM_TAG_METADEVICE,
2311 	    MD_UN2SET(un), MD_SID(un));
2312 
2313 	/* Now trigger hot spare processing in case one is needed. */
2314 	if ((un->un_hsp_id != -1) && (un->un_state == RUS_ERRED))
2315 		(void) raid_hotspares();
2316 
2317 	return (0);
2318 }
2319 
2320 /*
2321  * NAME:	raid_admin_ioctl
2322  * DESCRIPTION: IOCTL operations unique to metadevices and RAID
2323  * PARAMETERS:	int	  cmd - IOCTL command to be executed
2324  *		void	*data - pointer to IOCTL data structure
2325  *		int	 mode - either FREAD or FWRITE
2326  *		IOLOCK *lockp - IOCTL read/write lock
2327  *
2328  * LOCKS:	none
2329  *
2330  */
2331 static int
raid_admin_ioctl(int cmd,void * data,int mode,IOLOCK * lockp)2332 raid_admin_ioctl(
2333 	int		cmd,
2334 	void		*data,
2335 	int		mode,
2336 	IOLOCK		*lockp
2337 )
2338 {
2339 	size_t		sz = 0;
2340 	void		*d = NULL;
2341 	int		err = 0;
2342 
2343 	/* We can only handle 32-bit clients for internal commands */
2344 	if ((mode & DATAMODEL_MASK) != DATAMODEL_ILP32) {
2345 		return (EINVAL);
2346 	}
2347 
2348 
2349 	/* dispatch ioctl */
2350 	switch (cmd) {
2351 
2352 	case MD_IOCSET:
2353 	{
2354 		if (! (mode & FWRITE))
2355 			return (EACCES);
2356 
2357 		sz = sizeof (md_set_params_t);
2358 		d = kmem_alloc(sz, KM_SLEEP);
2359 
2360 		if (ddi_copyin(data, d, sz, mode)) {
2361 			err = EFAULT;
2362 			break;
2363 		}
2364 
2365 		err = raid_set(d, mode);
2366 		break;
2367 	}
2368 
2369 	case MD_IOCGET:
2370 	{
2371 		if (! (mode & FREAD))
2372 			return (EACCES);
2373 
2374 		sz = sizeof (md_i_get_t);
2375 		d = kmem_alloc(sz, KM_SLEEP);
2376 
2377 		if (ddi_copyin(data, d, sz, mode)) {
2378 			err = EFAULT;
2379 			break;
2380 		}
2381 
2382 		err = raid_get(d, mode, lockp);
2383 		break;
2384 	}
2385 
2386 	case MD_IOCREPLACE:
2387 	{
2388 		if (! (mode & FWRITE))
2389 			return (EACCES);
2390 
2391 		sz = sizeof (replace_params_t);
2392 		d = kmem_alloc(sz, KM_SLEEP);
2393 
2394 		if (ddi_copyin(data, d, sz, mode)) {
2395 			err = EFAULT;
2396 			break;
2397 		}
2398 
2399 		err = raid_replace((replace_params_t *)d, lockp);
2400 		break;
2401 	}
2402 
2403 	case MD_IOCSETSYNC:
2404 	{
2405 		if (! (mode & FWRITE))
2406 			return (EACCES);
2407 
2408 		sz = sizeof (md_resync_ioctl_t);
2409 		d = kmem_alloc(sz, KM_SLEEP);
2410 
2411 		if (ddi_copyin(data, d, sz, mode)) {
2412 			err = EFAULT;
2413 			break;
2414 		}
2415 
2416 		err = raid_set_sync((md_resync_ioctl_t *)d, lockp);
2417 		break;
2418 	}
2419 
2420 	case MD_IOCGETSYNC:
2421 	{
2422 		if (! (mode & FREAD))
2423 			return (EACCES);
2424 
2425 		sz = sizeof (md_resync_ioctl_t);
2426 		d = kmem_alloc(sz, KM_SLEEP);
2427 
2428 		if (ddi_copyin(data, d, sz, mode)) {
2429 			err = EFAULT;
2430 			break;
2431 		}
2432 		err = raid_get_resync((md_resync_ioctl_t *)d, lockp);
2433 
2434 		break;
2435 	}
2436 
2437 	case MD_IOCGROW:
2438 	{
2439 		if (! (mode & FWRITE))
2440 			return (EACCES);
2441 
2442 		sz = sizeof (md_grow_params_t);
2443 		d = kmem_alloc(sz, KM_SLEEP);
2444 
2445 		if (ddi_copyin(data, d, sz, mode)) {
2446 			err = EFAULT;
2447 			break;
2448 		}
2449 
2450 		err = raid_grow(d, mode, lockp);
2451 		break;
2452 	}
2453 
2454 	case MD_IOCCHANGE:
2455 	{
2456 		if (! (mode & FWRITE))
2457 			return (EACCES);
2458 
2459 		sz = sizeof (md_raid_params_t);
2460 		d = kmem_alloc(sz, KM_SLEEP);
2461 
2462 		if (ddi_copyin(data, d, sz, mode)) {
2463 			err = EFAULT;
2464 			break;
2465 		}
2466 
2467 		err = raid_change((md_raid_params_t *)d, lockp);
2468 		break;
2469 	}
2470 
2471 	case MD_IOCRESET:
2472 	{
2473 		if (! (mode & FWRITE))
2474 			return (EACCES);
2475 
2476 		sz = sizeof (md_i_reset_t);
2477 		d = kmem_alloc(sz, KM_SLEEP);
2478 
2479 		if (ddi_copyin(data, d, sz, mode)) {
2480 			err = EFAULT;
2481 			break;
2482 		}
2483 
2484 		err = raid_reset((md_i_reset_t *)d);
2485 		break;
2486 	}
2487 
2488 	case MD_IOCGET_DEVS:
2489 	{
2490 		if (! (mode & FREAD))
2491 			return (EACCES);
2492 
2493 		sz = sizeof (md_getdevs_params_t);
2494 		d = kmem_alloc(sz, KM_SLEEP);
2495 
2496 		if (ddi_copyin(data, d, sz, mode)) {
2497 			err = EFAULT;
2498 			break;
2499 		}
2500 
2501 		err = raid_getdevs(d, mode, lockp);
2502 		break;
2503 	}
2504 
2505 	case MD_IOCSETREGEN:
2506 	{
2507 		if (! (mode & FWRITE))
2508 			return (EACCES);
2509 
2510 		sz = sizeof (md_regen_param_t);
2511 		d = kmem_alloc(sz, KM_SLEEP);
2512 
2513 		if (ddi_copyin(data, d, sz, mode)) {
2514 			err = EFAULT;
2515 			break;
2516 		}
2517 
2518 		err = raid_regen((md_regen_param_t *)d, lockp);
2519 		break;
2520 	}
2521 
2522 	case MD_IOCPROBE_DEV:
2523 	{
2524 		md_probedev_impl_t	*p = NULL;
2525 		md_probedev_t		*ph = NULL;
2526 		daemon_queue_t		*hdr = NULL;
2527 		int			i;
2528 		size_t			sz1 = 0;
2529 
2530 
2531 		if (! (mode & FREAD))
2532 			return (EACCES);
2533 
2534 		sz = sizeof (md_probedev_t);
2535 
2536 		d = kmem_alloc(sz, KM_SLEEP);
2537 
2538 		/* now copy in the data */
2539 		if (ddi_copyin(data, d, sz, mode)) {
2540 			err = EFAULT;
2541 			goto free_mem;
2542 		}
2543 
2544 		/*
2545 		 * Sanity test the args. Test name should have the keyword
2546 		 * probe.
2547 		 */
2548 		p = kmem_alloc(sizeof (md_probedev_impl_t), KM_SLEEP);
2549 		p->probe_sema = NULL;
2550 		p->probe_mx = NULL;
2551 		p->probe.mnum_list = (uint64_t)NULL;
2552 
2553 		ph = (md_probedev_t *)d;
2554 		p->probe.nmdevs = ph->nmdevs;
2555 		(void) strcpy(p->probe.test_name, ph->test_name);
2556 		bcopy(&ph->md_driver, &(p->probe.md_driver),
2557 		    sizeof (md_driver_t));
2558 
2559 		if ((p->probe.nmdevs < 1) ||
2560 		    (strstr(p->probe.test_name, "probe") == NULL)) {
2561 			err = EINVAL;
2562 			goto free_mem;
2563 		}
2564 
2565 		sz1 = sizeof (minor_t) * p->probe.nmdevs;
2566 
2567 		p->probe.mnum_list = (uint64_t)(uintptr_t)kmem_alloc(sz1,
2568 		    KM_SLEEP);
2569 
2570 		if (ddi_copyin((caddr_t)(uintptr_t)ph->mnum_list,
2571 		    (caddr_t)(uintptr_t)p->probe.mnum_list, sz1, mode)) {
2572 			err = EFAULT;
2573 			goto free_mem;
2574 		}
2575 
2576 		if (err = md_init_probereq(p, &hdr))
2577 			goto free_mem;
2578 
2579 		/*
2580 		 * put the request on the queue and wait.
2581 		 */
2582 
2583 		daemon_request_new(&md_ff_daemonq, md_probe_one, hdr, REQ_NEW);
2584 
2585 		(void) IOLOCK_RETURN(0, lockp);
2586 		/* wait for the events to occur */
2587 		for (i = 0; i < p->probe.nmdevs; i++) {
2588 			sema_p(PROBE_SEMA(p));
2589 		}
2590 		while (md_ioctl_lock_enter() == EINTR)
2591 			;
2592 
2593 		/*
2594 		 * clean up. The hdr list is freed in the probe routines
2595 		 * since the list is NULL by the time we get here.
2596 		 */
2597 free_mem:
2598 		if (p) {
2599 			if (p->probe_sema != NULL) {
2600 				sema_destroy(PROBE_SEMA(p));
2601 				kmem_free(p->probe_sema, sizeof (ksema_t));
2602 			}
2603 			if (p->probe_mx != NULL) {
2604 				mutex_destroy(PROBE_MX(p));
2605 				kmem_free(p->probe_mx, sizeof (kmutex_t));
2606 			}
2607 			if (p->probe.mnum_list)
2608 				kmem_free((caddr_t)(uintptr_t)
2609 				    p->probe.mnum_list, sz1);
2610 
2611 			kmem_free(p, sizeof (md_probedev_impl_t));
2612 		}
2613 		break;
2614 	}
2615 
2616 	default:
2617 		return (ENOTTY);
2618 	}
2619 
2620 	/*
2621 	 * copyout and free any args
2622 	 */
2623 	if (sz != 0) {
2624 		if (err == 0) {
2625 			if (ddi_copyout(d, data, sz, mode) != 0) {
2626 				err = EFAULT;
2627 			}
2628 		}
2629 		kmem_free(d, sz);
2630 	}
2631 	return (err);
2632 }
2633 
2634 /*
2635  * NAME:	md_raid_ioctl
2636  * DESCRIPTION: RAID metadevice IOCTL operations entry point.
2637  * PARAMETERS:	md_dev64_t dev - RAID device identifier
2638  *		int	  cmd  - IOCTL command to be executed
2639  *		void	*data  - pointer to IOCTL data structure
2640  *		int	 mode  - either FREAD or FWRITE
2641  *		IOLOCK *lockp  - IOCTL read/write lock
2642  *
2643  * LOCKS:	none
2644  *
2645  */
2646 int
md_raid_ioctl(dev_t dev,int cmd,void * data,int mode,IOLOCK * lockp)2647 md_raid_ioctl(
2648 	dev_t		dev,
2649 	int		cmd,
2650 	void		*data,
2651 	int		mode,
2652 	IOLOCK		*lockp
2653 )
2654 {
2655 	minor_t		mnum = getminor(dev);
2656 	mr_unit_t	*un;
2657 	int		err = 0;
2658 
2659 	/* handle admin ioctls */
2660 	if (mnum == MD_ADM_MINOR)
2661 		return (raid_admin_ioctl(cmd, data, mode, lockp));
2662 
2663 	/* check unit */
2664 	if ((MD_MIN2SET(mnum) >= md_nsets) ||
2665 	    (MD_MIN2UNIT(mnum) >= md_nunits) ||
2666 	    ((un = MD_UNIT(mnum)) == NULL))
2667 		return (ENXIO);
2668 
2669 	/* is this a supported ioctl? */
2670 	err = md_check_ioctl_against_unit(cmd, un->c);
2671 	if (err != 0) {
2672 		return (err);
2673 	}
2674 
2675 	/* dispatch ioctl */
2676 	switch (cmd) {
2677 
2678 	case DKIOCINFO:
2679 	{
2680 		struct dk_cinfo *p;
2681 
2682 		if (! (mode & FREAD))
2683 			return (EACCES);
2684 
2685 		p = kmem_alloc(sizeof (*p), KM_SLEEP);
2686 
2687 		get_info(p, mnum);
2688 		if (ddi_copyout((caddr_t)p, data, sizeof (*p), mode) != 0)
2689 			err = EFAULT;
2690 
2691 		kmem_free(p, sizeof (*p));
2692 		return (err);
2693 	}
2694 
2695 	case DKIOCGMEDIAINFO:
2696 	{
2697 		struct dk_minfo	p;
2698 
2699 		if (! (mode & FREAD))
2700 			return (EACCES);
2701 
2702 		get_minfo(&p, mnum);
2703 		if (ddi_copyout(&p, data, sizeof (struct dk_minfo), mode) != 0)
2704 			err = EFAULT;
2705 
2706 		return (err);
2707 	}
2708 
2709 	case DKIOCGGEOM:
2710 	{
2711 		struct dk_geom	*p;
2712 
2713 		if (! (mode & FREAD))
2714 			return (EACCES);
2715 
2716 		p = kmem_alloc(sizeof (*p), KM_SLEEP);
2717 
2718 		if ((err = raid_get_geom(un, p)) == 0) {
2719 			if (ddi_copyout((caddr_t)p, data, sizeof (*p),
2720 			    mode) != 0)
2721 				err = EFAULT;
2722 		}
2723 
2724 		kmem_free(p, sizeof (*p));
2725 		return (err);
2726 	}
2727 
2728 	case DKIOCGVTOC:
2729 	{
2730 		struct vtoc	*vtoc;
2731 
2732 		if (! (mode & FREAD))
2733 			return (EACCES);
2734 
2735 		vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP);
2736 		if ((err = raid_get_vtoc(un, vtoc)) != 0) {
2737 			kmem_free(vtoc, sizeof (*vtoc));
2738 			return (err);
2739 		}
2740 
2741 		if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
2742 			if (ddi_copyout(vtoc, data, sizeof (*vtoc), mode))
2743 				err = EFAULT;
2744 		}
2745 #ifdef _SYSCALL32
2746 		else {
2747 			struct vtoc32	*vtoc32;
2748 
2749 			vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP);
2750 
2751 			vtoctovtoc32((*vtoc), (*vtoc32));
2752 			if (ddi_copyout(vtoc32, data, sizeof (*vtoc32), mode))
2753 				err = EFAULT;
2754 			kmem_free(vtoc32, sizeof (*vtoc32));
2755 		}
2756 #endif /* _SYSCALL32 */
2757 
2758 		kmem_free(vtoc, sizeof (*vtoc));
2759 		return (err);
2760 	}
2761 
2762 	case DKIOCSVTOC:
2763 	{
2764 		struct vtoc	*vtoc;
2765 
2766 		if (! (mode & FWRITE))
2767 			return (EACCES);
2768 
2769 		vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP);
2770 		if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
2771 			if (ddi_copyin(data, vtoc, sizeof (*vtoc), mode)) {
2772 				err = EFAULT;
2773 			}
2774 		}
2775 #ifdef _SYSCALL32
2776 		else {
2777 			struct vtoc32	*vtoc32;
2778 
2779 			vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP);
2780 
2781 			if (ddi_copyin(data, vtoc32, sizeof (*vtoc32), mode)) {
2782 				err = EFAULT;
2783 			} else {
2784 				vtoc32tovtoc((*vtoc32), (*vtoc));
2785 			}
2786 			kmem_free(vtoc32, sizeof (*vtoc32));
2787 		}
2788 #endif /* _SYSCALL32 */
2789 
2790 		if (err == 0)
2791 			err = raid_set_vtoc(un, vtoc);
2792 
2793 		kmem_free(vtoc, sizeof (*vtoc));
2794 		return (err);
2795 	}
2796 
2797 	case DKIOCGEXTVTOC:
2798 	{
2799 		struct extvtoc	*extvtoc;
2800 
2801 		if (! (mode & FREAD))
2802 			return (EACCES);
2803 
2804 		extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP);
2805 		if ((err = raid_get_extvtoc(un, extvtoc)) != 0) {
2806 			kmem_free(extvtoc, sizeof (*extvtoc));
2807 			return (err);
2808 		}
2809 
2810 		if (ddi_copyout(extvtoc, data, sizeof (*extvtoc), mode))
2811 			err = EFAULT;
2812 
2813 		kmem_free(extvtoc, sizeof (*extvtoc));
2814 		return (err);
2815 	}
2816 
2817 	case DKIOCSEXTVTOC:
2818 	{
2819 		struct extvtoc	*extvtoc;
2820 
2821 		if (! (mode & FWRITE))
2822 			return (EACCES);
2823 
2824 		extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP);
2825 		if (ddi_copyin(data, extvtoc, sizeof (*extvtoc), mode)) {
2826 			err = EFAULT;
2827 		}
2828 
2829 		if (err == 0)
2830 			err = raid_set_extvtoc(un, extvtoc);
2831 
2832 		kmem_free(extvtoc, sizeof (*extvtoc));
2833 		return (err);
2834 	}
2835 
2836 	case DKIOCGAPART:
2837 	{
2838 		struct dk_map	dmp;
2839 
2840 		if ((err = raid_get_cgapart(un, &dmp)) != 0) {
2841 			return (err);
2842 		}
2843 
2844 		if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
2845 			if (ddi_copyout((caddr_t)&dmp, data, sizeof (dmp),
2846 			    mode) != 0)
2847 				err = EFAULT;
2848 		}
2849 #ifdef _SYSCALL32
2850 		else {
2851 			struct dk_map32 dmp32;
2852 
2853 			dmp32.dkl_cylno = dmp.dkl_cylno;
2854 			dmp32.dkl_nblk = dmp.dkl_nblk;
2855 
2856 			if (ddi_copyout((caddr_t)&dmp32, data, sizeof (dmp32),
2857 			    mode) != 0)
2858 				err = EFAULT;
2859 		}
2860 #endif /* _SYSCALL32 */
2861 
2862 		return (err);
2863 	}
2864 	case DKIOCGETEFI:
2865 	{
2866 		/*
2867 		 * This one can be done centralized,
2868 		 * no need to put in the same code for all types of metadevices
2869 		 */
2870 		return (md_dkiocgetefi(mnum, data, mode));
2871 	}
2872 
2873 	case DKIOCSETEFI:
2874 	{
2875 		/*
2876 		 * This one can be done centralized,
2877 		 * no need to put in the same code for all types of metadevices
2878 		 */
2879 		return (md_dkiocsetefi(mnum, data, mode));
2880 	}
2881 
2882 	case DKIOCPARTITION:
2883 	{
2884 		return (md_dkiocpartition(mnum, data, mode));
2885 	}
2886 
2887 	default:
2888 		return (ENOTTY);
2889 	}
2890 }
2891 
2892 /*
2893  * rename/exchange named service entry points and support functions follow.
2894  * Most functions are handled generically, except for raid-specific locking
2895  * and checking
2896  */
2897 
2898 /*
2899  * NAME:	raid_may_renexch_self
2900  * DESCRIPTION: support routine for rename check ("MDRNM_CHECK") named service
2901  * PARAMETERS:	mr_unit_t	*un - unit struct of raid unit to be renamed
2902  *		mdi_unit_t	*ui - in-core unit struct of same raid unit
2903  *		md_rentxn_t	*rtxnp - rename transaction state
2904  *
2905  * LOCKS:	none
2906  *
2907  */
2908 static int
raid_may_renexch_self(mr_unit_t * un,mdi_unit_t * ui,md_rentxn_t * rtxnp)2909 raid_may_renexch_self(
2910 	mr_unit_t	*un,
2911 	mdi_unit_t	*ui,
2912 	md_rentxn_t	*rtxnp)
2913 {
2914 	minor_t	from_min;
2915 	minor_t	to_min;
2916 	bool_t	toplevel;
2917 	bool_t	related;
2918 
2919 	from_min = rtxnp->from.mnum;
2920 	to_min = rtxnp->to.mnum;
2921 
2922 	if (!un || !ui) {
2923 		(void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR,
2924 		    from_min);
2925 		return (EINVAL);
2926 	}
2927 
2928 	ASSERT(!(MD_CAPAB(un) & MD_CAN_META_CHILD));
2929 	if (MD_CAPAB(un) & MD_CAN_META_CHILD) {
2930 		(void) mdmderror(&rtxnp->mde, MDE_RENAME_SOURCE_BAD, from_min);
2931 		return (EINVAL);
2932 	}
2933 
2934 	if (MD_PARENT(un) == MD_MULTI_PARENT) {
2935 		(void) mdmderror(&rtxnp->mde, MDE_RENAME_SOURCE_BAD, from_min);
2936 		return (EINVAL);
2937 	}
2938 
2939 	toplevel = !MD_HAS_PARENT(MD_PARENT(un));
2940 
2941 	/* we're related if trying to swap with our parent */
2942 	related = (!toplevel) && (MD_PARENT(un) == to_min);
2943 
2944 	switch (rtxnp->op) {
2945 	case MDRNOP_EXCHANGE:
2946 
2947 		if (!related) {
2948 			(void) mdmderror(&rtxnp->mde,
2949 			    MDE_RENAME_TARGET_UNRELATED, to_min);
2950 			return (EINVAL);
2951 		}
2952 
2953 		break;
2954 
2955 	case MDRNOP_RENAME:
2956 		/*
2957 		 * if from is top-level and is open, then the kernel is using
2958 		 * the md_dev64_t.
2959 		 */
2960 
2961 		if (toplevel && md_unit_isopen(ui)) {
2962 			(void) mdmderror(&rtxnp->mde, MDE_RENAME_BUSY,
2963 			    from_min);
2964 			return (EBUSY);
2965 		}
2966 		break;
2967 
2968 	default:
2969 		(void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR,
2970 		    from_min);
2971 		return (EINVAL);
2972 	}
2973 
2974 	return (0);	/* ok */
2975 }
2976 
2977 /*
2978  * NAME:	raid_rename_check
2979  * DESCRIPTION: ("MDRNM_CHECK") rename/exchange named service entry point
2980  * PARAMETERS:	md_rendelta_t	*delta - describes changes to be made to this
2981  *					 raid device for rename transaction
2982  *		md_rentxn_t	*rtxnp - rename transaction state
2983  *
2984  * LOCKS:	none
2985  *
2986  */
2987 intptr_t
raid_rename_check(md_rendelta_t * delta,md_rentxn_t * rtxnp)2988 raid_rename_check(
2989 	md_rendelta_t	*delta,
2990 	md_rentxn_t	*rtxnp)
2991 {
2992 	int		 err	= 0;
2993 	int		 column;
2994 	mr_unit_t	*un;
2995 
2996 	ASSERT(delta);
2997 	ASSERT(rtxnp);
2998 	ASSERT(delta->unp);
2999 	ASSERT(delta->uip);
3000 
3001 	if (!delta || !rtxnp || !delta->unp || !delta->uip) {
3002 		(void) mdsyserror(&rtxnp->mde, EINVAL);
3003 		return (EINVAL);
3004 	}
3005 
3006 	un = (mr_unit_t *)delta->unp;
3007 
3008 	for (column = 0; column < un->un_totalcolumncnt; column++) {
3009 		rcs_state_t	colstate;
3010 
3011 		colstate = un->un_column[column].un_devstate;
3012 
3013 		if (colstate & RCS_LAST_ERRED) {
3014 			(void) mdmderror(&rtxnp->mde, MDE_RAID_LAST_ERRED,
3015 			    md_getminor(delta->dev));
3016 			return (EINVAL);
3017 		}
3018 
3019 		if (colstate & RCS_INIT_ERRED) {
3020 			(void) mdmderror(&rtxnp->mde, MDE_RAID_DOI,
3021 			    md_getminor(delta->dev));
3022 			return (EINVAL);
3023 		}
3024 
3025 		/* How did we get this far before detecting this? */
3026 		if (colstate & RCS_RESYNC) {
3027 			(void) mdmderror(&rtxnp->mde, MDE_RENAME_BUSY,
3028 			    md_getminor(delta->dev));
3029 			return (EBUSY);
3030 		}
3031 
3032 		if (colstate & RCS_ERRED) {
3033 			(void) mdmderror(&rtxnp->mde, MDE_RAID_NOT_OKAY,
3034 			    md_getminor(delta->dev));
3035 			return (EINVAL);
3036 		}
3037 
3038 		if (!(colstate & RCS_OKAY)) {
3039 			(void) mdmderror(&rtxnp->mde, MDE_RAID_NOT_OKAY,
3040 			    md_getminor(delta->dev));
3041 			return (EINVAL);
3042 		}
3043 
3044 		if (HOTSPARED(un, column)) {
3045 			(void) mdmderror(&rtxnp->mde, MDE_RAID_NOT_OKAY,
3046 			    md_getminor(delta->dev));
3047 			return (EINVAL);
3048 		}
3049 	}
3050 
3051 	/* self does additional checks */
3052 	if (delta->old_role == MDRR_SELF) {
3053 		err = raid_may_renexch_self((mr_unit_t *)delta->unp,
3054 		    delta->uip, rtxnp);
3055 	}
3056 	return (err);
3057 }
3058 
3059 /*
3060  * NAME:	raid_rename_lock
3061  * DESCRIPTION: ("MDRNM_LOCK") rename/exchange named service entry point
3062  * PARAMETERS:	md_rendelta_t	*delta - describes changes to be made to this
3063  *					 raid device for rename transaction
3064  *		md_rentxn_t	*rtxnp - rename transaction state
3065  *
3066  * LOCKS:	io and unit locks (taken explicitly *not* via ioctl wrappers)
3067  *
3068  */
3069 intptr_t
raid_rename_lock(md_rendelta_t * delta,md_rentxn_t * rtxnp)3070 raid_rename_lock(
3071 	md_rendelta_t	*delta,
3072 	md_rentxn_t	*rtxnp)
3073 {
3074 	minor_t		mnum;
3075 
3076 	ASSERT(delta);
3077 	ASSERT(rtxnp);
3078 
3079 	mnum = md_getminor(delta->dev);
3080 	if (mnum == rtxnp->to.mnum && rtxnp->op == MDRNOP_RENAME) {
3081 		return (0);
3082 	}
3083 
3084 	ASSERT(delta->uip);
3085 	if (!delta->uip) {
3086 		(void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP, mnum);
3087 		return (ENODEV);
3088 	}
3089 
3090 	ASSERT(delta->unp);
3091 	if (!delta->unp) {
3092 
3093 		return (ENODEV);
3094 	}
3095 
3096 	ASSERT(!IO_WRITER_HELD(delta->unp));
3097 	(void) md_io_writerlock(delta->uip);
3098 	ASSERT(IO_WRITER_HELD(delta->unp));
3099 
3100 
3101 	ASSERT(!UNIT_WRITER_HELD(delta->unp));
3102 	(void) md_unit_writerlock(delta->uip);
3103 	ASSERT(UNIT_WRITER_HELD(delta->unp));
3104 
3105 	return (0);
3106 }
3107 
3108 /*
3109  * NAME:	raid_rename_unlock
3110  * DESCRIPTION: ("MDRNM_UNLOCK") rename/exchange named service entry point
3111  * PARAMETERS:	md_rendelta_t	*delta - describes changes to be made to this
3112  *					 raid device for rename transaction
3113  *		md_rentxn_t	*rtxnp - rename transaction state
3114  *
3115  * LOCKS:	drops io and unit locks
3116  *
3117  */
3118 /* ARGSUSED */
3119 void
raid_rename_unlock(md_rendelta_t * delta,md_rentxn_t * rtxnp)3120 raid_rename_unlock(
3121 	md_rendelta_t	*delta,
3122 	md_rentxn_t	*rtxnp)
3123 {
3124 	mr_unit_t	*un = (mr_unit_t *)delta->unp;
3125 	minor_t		mnum = MD_SID(un);
3126 	int		col;
3127 
3128 	ASSERT(delta);
3129 	ASSERT(delta->unp);
3130 	ASSERT(delta->uip);
3131 
3132 	ASSERT(UNIT_WRITER_HELD(delta->unp));
3133 	md_unit_writerexit(delta->uip);
3134 	ASSERT(!UNIT_WRITER_HELD(delta->unp));
3135 
3136 	if (! (delta->txn_stat.role_swapped) || ! (delta->txn_stat.is_open)) {
3137 		goto out;
3138 	}
3139 	if (raid_internal_open(mnum, (FREAD | FWRITE),
3140 	    OTYP_LYR, MD_OFLG_ISINIT) == 0) {
3141 		for (col = 0; col < un->un_totalcolumncnt; col++) {
3142 			if (un->un_column[col].un_devstate & RCS_OKAY)
3143 				(void) init_pw_area(un,
3144 				    un->un_column[col].un_dev,
3145 				    un->un_column[col].un_pwstart, col);
3146 		}
3147 		(void) raid_internal_close(mnum, OTYP_LYR, 0, 0);
3148 	}
3149 
3150 out:
3151 	ASSERT(IO_WRITER_HELD(delta->unp));
3152 	md_io_writerexit(delta->uip);
3153 	ASSERT(!IO_WRITER_HELD(delta->unp));
3154 }
3155 /* end of rename/exchange named service and support functions */
3156