xref: /onnv-gate/usr/src/uts/common/io/lvm/trans/trans_ioctl.c (revision 9017:47960a78ed2a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/param.h>
28 #include <sys/systm.h>
29 #include <sys/conf.h>
30 #include <sys/file.h>
31 #include <sys/user.h>
32 #include <sys/uio.h>
33 #include <sys/t_lock.h>
34 #include <sys/dkio.h>
35 #include <sys/vtoc.h>
36 #include <sys/kmem.h>
37 #include <vm/page.h>
38 #include <sys/cmn_err.h>
39 #include <sys/sysmacros.h>
40 #include <sys/types.h>
41 #include <sys/mkdev.h>
42 #include <sys/stat.h>
43 #include <sys/open.h>
44 #include <sys/lvm/md_trans.h>
45 #include <sys/modctl.h>
46 #include <sys/ddi.h>
47 #include <sys/sunddi.h>
48 #include <sys/debug.h>
49 #include <sys/filio.h>
50 #include <sys/lvm/md_notify.h>
51 #include <sys/callb.h>
52 #include <sys/disp.h>
53 
54 #include <sys/sysevent/eventdefs.h>
55 #include <sys/sysevent/svm.h>
56 
57 extern int		md_status;
58 extern unit_t		md_nunits;
59 extern set_t		md_nsets;
60 extern md_set_t		md_set[];
61 extern md_ops_t		trans_md_ops;
62 extern md_krwlock_t	md_unit_array_rw;
63 extern uint_t		mt_debug;
64 
65 extern major_t	md_major;
66 
67 static mt_unit_t *
trans_getun(minor_t mnum,md_error_t * mde,int flags,IOLOCK * lock)68 trans_getun(minor_t mnum, md_error_t *mde, int flags, IOLOCK *lock)
69 {
70 	mt_unit_t	*un;
71 	mdi_unit_t	*ui;
72 	set_t		setno = MD_MIN2SET(mnum);
73 
74 	if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits)) {
75 		(void) mdmderror(mde, MDE_INVAL_UNIT, mnum);
76 		return (NULL);
77 	}
78 
79 	if (! (flags & STALE_OK)) {
80 		if (md_get_setstatus(setno) & MD_SET_STALE) {
81 			(void) mdmddberror(mde, MDE_DB_STALE, mnum, setno);
82 			return (NULL);
83 		}
84 	}
85 
86 	ui = MDI_UNIT(mnum);
87 	if (flags & NO_OLD) {
88 		if (ui != NULL) {
89 			(void) mdmderror(mde, MDE_UNIT_ALREADY_SETUP, mnum);
90 			return (NULL);
91 		}
92 		return ((mt_unit_t *)1);
93 	}
94 
95 	if (ui == NULL) {
96 		(void) mdmderror(mde, MDE_UNIT_NOT_SETUP, mnum);
97 		return (NULL);
98 	}
99 
100 	if (flags & ARRAY_WRITER)
101 		md_array_writer(lock);
102 	else if (flags & ARRAY_READER)
103 		md_array_reader(lock);
104 
105 	if (!(flags & NO_LOCK)) {
106 		if (flags & WR_LOCK)
107 			(void) md_ioctl_writerlock(lock, ui);
108 		else /* RD_LOCK */
109 			(void) md_ioctl_readerlock(lock, ui);
110 	}
111 	un = (mt_unit_t *)MD_UNIT(mnum);
112 
113 	if (un->c.un_type != MD_METATRANS) {
114 		(void) mdmderror(mde, MDE_NOT_MT, mnum);
115 		return (NULL);
116 	}
117 
118 	return (un);
119 }
120 
121 #ifdef	DEBUG
122 /*
123  * DEBUG ROUTINES
124  * 	THESE ROUTINES ARE ONLY USED WHEN ASSERTS ARE ENABLED
125  */
126 
127 extern int		(*mdv_strategy_tstpnt)(buf_t *, int, void*);
128 
129 /*
130  * return the global stats struct
131  */
132 static int
trans_get_transstats(void * d,int mode)133 trans_get_transstats(void *d, int mode)
134 {
135 	md_i_get_t *migp = d;
136 
137 	mdclrerror(&migp->mde);
138 
139 	if (migp->size == 0) {
140 		migp->size = sizeof (struct transstats);
141 		return (0);
142 	}
143 
144 	if (migp->size < sizeof (struct transstats))
145 		return (EFAULT);
146 
147 	if (ddi_copyout(&transstats, (caddr_t)(uintptr_t)migp->mdp,
148 	    sizeof (struct transstats), mode))
149 		return (EFAULT);
150 	return (0);
151 }
152 
153 /*
154  * test ioctls
155  */
156 /*
157  * TEST TRYGETBLK
158  */
159 /*ARGSUSED1*/
160 static int
trans_test_trygetblk(void * d,int mode,IOLOCK * lock)161 trans_test_trygetblk(void *d, int mode, IOLOCK *lock)
162 {
163 	mt_unit_t	*un;
164 	int		test;
165 	dev_t		dev;
166 	struct buf	*bp;
167 	struct buf	*trygetblk();
168 
169 	md_i_get_t *migp = d;
170 
171 	mdclrerror(&migp->mde);
172 	migp->size = 0;
173 
174 	un = trans_getun(migp->id, &migp->mde,
175 	    RD_LOCK, lock);
176 	if (un == NULL)
177 		return (EINVAL);
178 
179 	dev = un->un_m_dev;
180 
181 	/*
182 	 * test 1 -- don't find nonexistant buf
183 	 */
184 	test = 1;
185 	if (bp = trygetblk(dev, 0))
186 		goto errout;
187 
188 	/*
189 	 * test 2 - don't find stale buf
190 	 */
191 	test = 2;
192 	if ((bp = getblk(dev, 0, DEV_BSIZE)) == NULL)
193 		goto errout;
194 	bp->b_flags |= (B_STALE|B_DONE);
195 	brelse(bp);
196 	if (bp = trygetblk(dev, 0))
197 		goto errout;
198 
199 	/*
200 	 * test 3 -- don't find busy buf
201 	 */
202 	test = 3;
203 	if ((bp = getblk(dev, 0, DEV_BSIZE)) == NULL)
204 		goto errout;
205 	if (trygetblk(dev, 0))
206 		goto errout;
207 	bp->b_flags |= B_STALE;
208 	brelse(bp);
209 
210 	/*
211 	 * test 4 -- don't find not-done buf
212 	 */
213 	test = 4;
214 	if ((bp = getblk(dev, 0, DEV_BSIZE)) == NULL)
215 		goto errout;
216 	brelse(bp);
217 	if (bp = trygetblk(dev, 0))
218 		goto errout;
219 
220 	/*
221 	 * test 5 -- find an idle buf
222 	 */
223 	test = 5;
224 	if ((bp = bread(dev, 0, DEV_BSIZE)) == NULL)
225 		goto errout;
226 	brelse(bp);
227 	if ((bp = trygetblk(dev, 0)) == NULL)
228 		goto errout;
229 	bp->b_flags |= B_STALE;
230 	brelse(bp);
231 	bp = 0;
232 
233 	test = 0;	/* no test failed */
234 errout:
235 	if (bp) {
236 		bp->b_flags |= B_STALE;
237 		brelse(bp);
238 	}
239 	migp->size = test;
240 	if (test)
241 		return (EINVAL);
242 	return (0);
243 }
244 /*
245  * TEST TRYGETPAGE
246  */
247 static page_t *
trans_trypage(struct vnode * vp,uint_t off)248 trans_trypage(struct vnode *vp, uint_t off)
249 {
250 	page_t		*pp;
251 
252 	/*
253 	 * get a locked page
254 	 */
255 	if ((pp = page_lookup_nowait(vp, off, SE_EXCL)) == NULL)
256 		return (NULL);
257 	/*
258 	 * get the iolock
259 	 */
260 	if (!page_io_trylock(pp)) {
261 		page_unlock(pp);
262 		return (NULL);
263 	}
264 	return (pp);
265 }
266 
267 /*ARGSUSED1*/
268 static int
trans_test_trypage(void * d,int mode,IOLOCK * lock)269 trans_test_trypage(void *d, int mode, IOLOCK *lock)
270 {
271 	mt_unit_t		*un;
272 	int			test;
273 	dev_t			dev;
274 	struct page		*pp;
275 	struct vnode		*devvp;
276 	struct vnode		*cvp;
277 	extern struct vnode	*common_specvp(struct vnode *);
278 	extern void		pvn_io_done(struct page *);
279 
280 	md_i_get_t *migp = d;
281 
282 	mdclrerror(&migp->mde);
283 	migp->size = 0;
284 
285 	un = trans_getun(migp->id, &migp->mde,
286 	    RD_LOCK, lock);
287 	if (un == NULL)
288 		return (EINVAL);
289 
290 	dev = un->un_m_dev;
291 	devvp = makespecvp(dev, VBLK);
292 	cvp = common_specvp(devvp);
293 
294 	/*
295 	 * get rid of the devices pages
296 	 */
297 	(void) VOP_PUTPAGE(cvp, (offset_t)0, (uint_t)0, B_INVAL, CRED(), NULL);
298 
299 	/*
300 	 * test 1 -- don't find nonexistant page
301 	 */
302 	test = 1;
303 	if (pp = trans_trypage(cvp, 0))
304 		goto errout;
305 
306 	/*
307 	 * test 2 -- don't find busy page
308 	 */
309 	test = 2;
310 	if ((pp = page_create(cvp, 0, 1, PG_WAIT)) == NULL)
311 		goto errout;
312 	if (trans_trypage(cvp, 0))
313 		goto errout;
314 	pvn_io_done(pp);
315 	pp = 0;
316 
317 	/*
318 	 * test 3 - find an idle page
319 	 */
320 	test = 3;
321 	if ((pp = page_create(cvp, 0, 1, PG_WAIT)) == NULL)
322 		goto errout;
323 	pvn_io_done(pp);
324 	if ((pp = trans_trypage(cvp, 0)) == NULL)
325 		goto errout;
326 	pvn_io_done(pp);
327 	pp = 0;
328 
329 	test = 0;	/* no test failed */
330 errout:
331 	if (pp)
332 		pvn_io_done(pp);
333 	/*
334 	 * get rid of the file's pages
335 	 */
336 	(void) VOP_PUTPAGE(cvp, (offset_t)0, (uint_t)0, B_INVAL, CRED(), NULL);
337 	VN_RELE(devvp);
338 
339 	migp->size = test;
340 	if (test)
341 		return (EINVAL);
342 	return (0);
343 }
344 /*
345  * TEST TSD
346  */
347 #define	NKEYS		(7)
348 #define	NTSDTHREADS	(3)
349 struct tothread {
350 	int		test;
351 	int		error;
352 	int		exits;
353 	int		step;
354 	kmutex_t	lock;
355 	kcondvar_t	cv;
356 };
357 static uint_t		keys[NKEYS];
358 static struct tothread	tta[NTSDTHREADS];
359 static int		allocatorvalue;
360 static int		okdestructoralloc;
361 
362 static void
trans_test_stepwait(struct tothread * tp,int step)363 trans_test_stepwait(struct tothread *tp, int step)
364 {
365 	/*
366 	 * wait for other thread
367 	 */
368 	mutex_enter(&tp->lock);
369 	while (tp->step < step)
370 		cv_wait(&tp->cv, &tp->lock);
371 	mutex_exit(&tp->lock);
372 }
373 
374 static void
trans_test_step(struct tothread * tp,int step)375 trans_test_step(struct tothread *tp, int step)
376 {
377 	/*
378 	 * wakeup other threads
379 	 */
380 	mutex_enter(&tp->lock);
381 	tp->step = step;
382 	cv_broadcast(&tp->cv);
383 	mutex_exit(&tp->lock);
384 }
385 
386 static void
trans_test_destructor(void * voidp)387 trans_test_destructor(void *voidp)
388 {
389 	int		exits;
390 	struct tothread	*tp	= voidp;
391 
392 	/*
393 	 * check that threads clean up *all* TSD at exit
394 	 */
395 	mutex_enter(&tp->lock);
396 	exits = ++tp->exits;
397 	mutex_exit(&tp->lock);
398 	if (exits >= NKEYS)
399 		trans_test_step(tp, 3);
400 }
401 
402 static void
trans_test_destructor_alloc(void * voidp)403 trans_test_destructor_alloc(void *voidp)
404 {
405 	int	*value	= voidp;
406 
407 	okdestructoralloc = 0;
408 	if (value) {
409 		if (*value == allocatorvalue)
410 			okdestructoralloc = 1;
411 		md_trans_free((caddr_t)value, sizeof (value));
412 	}
413 }
414 
415 static void *
trans_test_allocator(void)416 trans_test_allocator(void)
417 {
418 	int	*value;
419 
420 	value = (int *)md_trans_zalloc(sizeof (value));
421 	*value = allocatorvalue;
422 	return ((void *)value);
423 }
424 
425 /*
426  * thread used to test TSD destroy functionality
427  */
428 static void
trans_test_thread(struct tothread * tp)429 trans_test_thread(struct tothread *tp)
430 {
431 	int	i;
432 	callb_cpr_t	cprinfo;
433 
434 	/*
435 	 * Register cpr callback
436 	 */
437 	CALLB_CPR_INIT(&cprinfo, &tp->lock, callb_generic_cpr,
438 	    "trans_test_thread");
439 
440 	/*
441 	 * get some TSD
442 	 */
443 	for (i = NKEYS - 1; i >= 0; --i)
444 		if (tsd_set(keys[i], tp)) {
445 			tp->error = 500;
446 			goto errout;
447 		}
448 	/*
449 	 * tell parent that we have TSD
450 	 */
451 	trans_test_step(tp, 1);
452 
453 	/*
454 	 * wait for parent to destroy some of our TSD
455 	 */
456 	trans_test_stepwait(tp, 2);
457 
458 	/*
459 	 * make sure that the appropriate TSD was destroyed
460 	 */
461 	if ((tsd_get(keys[0]) != NULL) ||
462 	    (tsd_get(keys[NKEYS-1]) != NULL) ||
463 	    (tsd_get(keys[NKEYS>>1]) != NULL)) {
464 		tp->error = 510;
465 		goto errout;
466 	}
467 	for (i = 0; i < NKEYS; ++i)
468 		if (tsd_get(keys[i]) != tp)
469 			if (i != 0 && i != NKEYS - 1 && i != NKEYS >> 1) {
470 				tp->error = 520;
471 				goto errout;
472 			}
473 
474 	/*
475 	 * set up cpr exit
476 	 */
477 	mutex_enter(&tp->lock);
478 	CALLB_CPR_EXIT(&cprinfo);
479 	thread_exit();
480 errout:
481 	/*
482 	 * error -- make sure the parent will wake up (error code in tp)
483 	 */
484 	trans_test_step(tp, 3);
485 
486 	/*
487 	 * set up cpr exit
488 	 */
489 	mutex_enter(&tp->lock);
490 	CALLB_CPR_EXIT(&cprinfo);
491 	thread_exit();
492 }
493 
494 static void
trans_test_threadcreate(struct tothread * tp)495 trans_test_threadcreate(struct tothread *tp)
496 {
497 	/*
498 	 * initialize the per thread struct and make a thread
499 	 */
500 	bzero((caddr_t)tp, sizeof (struct tothread));
501 
502 	mutex_init(&tp->lock, NULL, MUTEX_DEFAULT, NULL);
503 	cv_init(&tp->cv, NULL, CV_DEFAULT, NULL);
504 
505 	(void) thread_create(NULL, 0, trans_test_thread, tp, 0, &p0,
506 	    TS_RUN, minclsyspri);
507 }
508 /*
509  * driver for TSD tests -- *NOT REENTRANT*
510  */
511 /*ARGSUSED1*/
512 static int
trans_test_tsd(void * d,int mode)513 trans_test_tsd(void *d, int mode)
514 {
515 	int		test;
516 	uint_t		rekeys[NKEYS];
517 	int		i;
518 	uint_t		key;
519 	int		error;
520 
521 	md_i_get_t *migp = d;
522 
523 	mdclrerror(&migp->mde);
524 	migp->size = 0;
525 
526 	/*
527 	 * destroy old keys, if any
528 	 */
529 	for (i = 0; i < NKEYS; ++i)
530 		tsd_destroy(&keys[i]);
531 	/*
532 	 * test 1 -- simple create and destroy keys tests
533 	 */
534 	test = 1;
535 	error = 0;
536 	for (i = 0; i < NKEYS; ++i) {
537 		tsd_create(&keys[i], NULL);
538 
539 		/* get with no set should return NULL */
540 		if (tsd_get(keys[i]) != NULL) {
541 			error = 100;
542 			goto errout;
543 		}
544 
545 		/* destroyed key should be 0 */
546 		key = keys[i];
547 		tsd_destroy(&keys[i]);
548 		if (keys[i]) {
549 			error = 110;
550 			goto errout;
551 		}
552 
553 		/* destroy the key twice */
554 		keys[i] = key;
555 		tsd_destroy(&keys[i]);
556 
557 		/* destroyed key should be 0 */
558 		if (keys[i]) {
559 			error = 120;
560 			goto errout;
561 		}
562 
563 		/* getting a destroyed key should return NULL */
564 		if (tsd_get(keys[i]) != NULL) {
565 			error = 130;
566 			goto errout;
567 		}
568 		/* recreate the key */
569 		tsd_create(&keys[i], NULL);
570 
571 		/* should be the same key as before */
572 		if (key != keys[i]) {
573 			error = 140;
574 			goto errout;
575 		}
576 
577 		/* initial value should be NULL */
578 		if (tsd_get(keys[i]) != NULL) {
579 			error = 150;
580 			goto errout;
581 		}
582 
583 		/* cleanup */
584 		tsd_destroy(&keys[i]);
585 	}
586 
587 	/*
588 	 * test 2 -- recreate keys
589 	 */
590 	test = 2;
591 	error = 0;
592 	for (i = 0; i < NKEYS; ++i)
593 		tsd_create(&keys[i], NULL);
594 	for (i = 0; i < NKEYS; ++i) {
595 		/* make sure the keys were created */
596 		if (keys[i] == 0) {
597 			error = 200;
598 			goto errout;
599 		}
600 
601 		/* make sure that recreating key doesn't change it */
602 		rekeys[i] = keys[i];
603 		tsd_create(&rekeys[i], NULL);
604 		if (rekeys[i] != keys[i]) {
605 			error = 210;
606 			goto errout;
607 		}
608 	}
609 	for (i = 0; i < NKEYS; ++i)
610 		tsd_destroy(&keys[i]);
611 
612 	/*
613 	 * test 3 -- check processing for unset and destroyed keys
614 	 */
615 	test = 3;
616 	error = 0;
617 
618 	/* getting a 0 key returns NULL */
619 	if (tsd_get(0) != NULL) {
620 		error = 300;
621 		goto errout;
622 	}
623 
624 	/* setting a 0 key returns error */
625 	if (tsd_set(0, NULL) != EINVAL) {
626 		error = 310;
627 		goto errout;
628 	}
629 	tsd_create(&key, NULL);
630 
631 	/* setting a created key returns no error */
632 	if (tsd_set(key, NULL) == EINVAL) {
633 		error = 320;
634 		goto errout;
635 	}
636 	tsd_destroy(&key);
637 
638 	/* setting a destroyed key returns error */
639 	if (tsd_set(key, NULL) != EINVAL) {
640 		error = 330;
641 		goto errout;
642 	}
643 
644 	/*
645 	 * test 4 -- make sure that set and get work
646 	 */
647 	test = 4;
648 	error = 0;
649 
650 	for (i = 0; i < NKEYS; ++i) {
651 		tsd_create(&keys[i], NULL);
652 
653 		/* set a value */
654 		(void) tsd_set(keys[i], &key);
655 
656 		/* get the value */
657 		if (tsd_get(keys[i]) != &key) {
658 			error = 400;
659 			goto errout;
660 		}
661 
662 		/* set the value to NULL */
663 		(void) tsd_set(keys[i], NULL);
664 
665 		/* get the NULL */
666 		if (tsd_get(keys[i]) != NULL) {
667 			error = 410;
668 			goto errout;
669 		}
670 	}
671 	/* cleanup */
672 	for (i = 0; i < NKEYS; ++i)
673 		tsd_destroy(&keys[i]);
674 
675 	/*
676 	 * test 5 -- destroying keys w/multiple threads
677 	 */
678 	test = 5;
679 	error = 0;
680 
681 	/* create the keys */
682 	for (i = 0; i < NKEYS; ++i)
683 		tsd_create(&keys[i], trans_test_destructor);
684 
685 	/* create some threads */
686 	for (i = 0; i < NTSDTHREADS; ++i)
687 		trans_test_threadcreate(&tta[i]);
688 
689 	/* wait for the threads to assign TSD */
690 	for (i = 0; i < NTSDTHREADS; ++i)
691 		trans_test_stepwait(&tta[i], 1);
692 
693 	/* destroy some of the keys */
694 	tsd_destroy(&keys[0]);
695 	tsd_destroy(&keys[NKEYS - 1]);
696 	tsd_destroy(&keys[NKEYS >> 1]);
697 	tsd_destroy(&keys[NKEYS >> 1]);
698 
699 	/* wakeup the threads -- they check that the destroy took */
700 	for (i = 0; i < NTSDTHREADS; ++i)
701 		trans_test_step(&tta[i], 2);
702 
703 	/* wait for the threads to exit (also checks for TSD cleanup) */
704 	for (i = 0; i < NTSDTHREADS; ++i)
705 		trans_test_stepwait(&tta[i], 3);
706 
707 	/* destroy the rest of the keys */
708 	for (i = 0; i < NKEYS; ++i)
709 		tsd_destroy(&keys[i]);
710 
711 	/* check for error */
712 	for (i = 0; i < NTSDTHREADS; ++i) {
713 		if (!error)
714 			error = tta[i].error;
715 		mutex_destroy(&tta[i].lock);
716 		cv_destroy(&tta[i].cv);
717 	}
718 
719 	/*
720 	 * test 6 -- test getcreate
721 	 */
722 	test = 6;
723 	error = 0;
724 
725 	/* make sure the keys are destroyed */
726 	for (i = 0; i < NKEYS; ++i)
727 		tsd_destroy(&keys[i]);
728 
729 	/* get w/create */
730 	for (i = 0; i < NKEYS; ++i) {
731 		allocatorvalue = i;
732 		if (*(int *)tsd_getcreate(&keys[i], trans_test_destructor_alloc,
733 		    trans_test_allocator) != allocatorvalue) {
734 			error = 600;
735 			goto errout;
736 		}
737 	}
738 	for (i = 0; i < NKEYS; ++i) {
739 		allocatorvalue = i;
740 		if (*(int *)tsd_get(keys[i]) != allocatorvalue) {
741 			error = 610;
742 			goto errout;
743 		}
744 	}
745 	/* make sure destructor gets called when we destroy the keys */
746 	for (i = 0; i < NKEYS; ++i) {
747 		allocatorvalue = i;
748 		okdestructoralloc = 0;
749 		tsd_destroy(&keys[i]);
750 		if (okdestructoralloc == 0) {
751 			error = 620;
752 			goto errout;
753 		}
754 	}
755 
756 errout:
757 	/* make sure the keys are destroyed */
758 	for (i = 0; i < NKEYS; ++i)
759 		tsd_destroy(&keys[i]);
760 
761 	/* return test # and error code (if any) */
762 	migp->size = test;
763 	return (error);
764 }
765 
766 /*
767  * Error Injection Structures, Data, and Functions:
768  *
769  * Error injection is used to test the Harpy error recovery system.  The
770  * MD_IOC_INJECTERRORS ioctl is used to start or continue error injection on a
771  * unit, and MD_IOC_STOPERRORS turns it off.  An mt_error structure is
772  * associated with every trans device for which we are injecting errors.  When
773  * MD_IOC_INJECTERRORS is issued, mdv_strategy_tstpnt is set to point to
774  * trans_error_injector(), so that it gets called for every MDD I/O operation.
775  *
776  * The trans unit can be in one of three states:
777  *
778  *	count down -	Each I/O causes er_count_down to be decremented.
779  *			When er_count_down reaches 0, an error is injected,
780  *			the block number is remembered.  Without makeing
781  *			special provisions, the log area would receive a
782  *			small percentage of the injected errors.  Thus,
783  *			trans_check_error() will be written, so that every
784  *			other error is injected on the log.
785  *
786  *	suspend -	No errors are generated and the counters are not
787  *			modified.  This is so that fsck/mkfs can do their thing
788  *			(we're not testing them) and so that the test script can
789  *			set up another test.  The transition back to the count
790  *			down state occurs when MD_IOC_INJECTERRORS is invoked
791  *			again.
792  */
793 
794 typedef enum {
795 	mte_count_down,
796 	mte_suspend,
797 	mte_watch_block
798 } mte_state;
799 
800 typedef struct mt_error {
801 	struct mt_error	*er_next;	/* next error unit in list. */
802 	mte_state	er_state;
803 	mt_unit_t	*er_unitp;	/* unit to force errors on. */
804 	size_t		er_count_down;	/* i/o transactions until error. */
805 	size_t		er_increment;	/* increment for reset_count. */
806 	size_t		er_reset_count;	/* used to reset er_count_down */
807 	size_t		er_total_errors; /* count generated errors. */
808 	/* Following fields describe error we are injecting. */
809 	dev_t		er_bad_unit;	/* Unit associated with block in */
810 					/* error. */
811 	off_t		er_bad_block;	/* Block in error. */
812 } mt_error_t;
813 
814 #define	ERROR_INCREMENT	(1)
815 #define	INITIAL_COUNT	(1)
816 
817 static int		default_increment	= ERROR_INCREMENT;
818 static kmutex_t		error_mutex;	/* protects error_list */
819 static mt_error_t	error_list_head;
820 static int		initial_count		= INITIAL_COUNT;
821 static int		(*tstpnt_save)(buf_t *, int, void*) = NULL;
822 
823 static mt_error_t *
find_by_mtunit(mt_unit_t * un,mt_error_t ** pred_errp)824 find_by_mtunit(mt_unit_t *un, mt_error_t **pred_errp)
825 {
826 	mt_error_t	*errp	= (mt_error_t *)NULL;
827 
828 	ASSERT(mutex_owned(&error_mutex) != 0);
829 	*pred_errp = &error_list_head;
830 	while ((errp = (*pred_errp)->er_next) != (mt_error_t *)NULL) {
831 		if (errp->er_unitp == un)
832 			break;
833 		*pred_errp = errp;
834 	}
835 	return (errp);
836 }
837 
838 static mt_error_t *
find_by_dev(md_dev64_t dev)839 find_by_dev(md_dev64_t dev)
840 {
841 	mt_error_t	*errp	= &error_list_head;
842 
843 	ASSERT(mutex_owned(&error_mutex) != 0);
844 	while ((errp = errp->er_next) != (mt_error_t *)NULL) {
845 		if ((errp->er_unitp->un_m_dev == dev) ||
846 		    (errp->er_unitp->un_l_dev == dev))
847 			break;
848 	}
849 	return (errp);
850 }
851 
852 static int
trans_check_error(buf_t * bp,mt_error_t * errp)853 trans_check_error(buf_t *bp, mt_error_t *errp)
854 {
855 	int		rv	= 0;
856 	md_dev64_t	target	= md_expldev(bp->b_edev);
857 
858 	ASSERT(mutex_owned(&error_mutex) != 0);
859 	switch (errp->er_state) {
860 	case mte_count_down:
861 		errp->er_count_down--;
862 		if (errp->er_count_down == 0) {
863 			/*
864 			 * Every other error that we inject should be on
865 			 * the log device.  Errors will be injected on the
866 			 * log device when errp->er_total_errors is even
867 			 * and on the master device when it is odd.  If
868 			 * this I/O is not for the appropriate device, we
869 			 * will set errp->er_count_down to 1, so that we
870 			 * can try again later.
871 			 */
872 			if ((((errp->er_total_errors % 2) == 0) &&
873 			    (errp->er_unitp->un_l_dev == target)) ||
874 			    (((errp->er_total_errors % 2) != 0) &&
875 			    (errp->er_unitp->un_m_dev == target))) {
876 				/* simulate an error */
877 				bp->b_flags |= B_ERROR;
878 				bp->b_error = EIO;
879 				/* remember the error. */
880 				errp->er_total_errors++;
881 				errp->er_bad_unit = bp->b_edev;
882 				errp->er_bad_block = bp->b_blkno;
883 				/* reset counters. */
884 				errp->er_count_down = errp->er_reset_count;
885 				errp->er_reset_count += errp->er_increment;
886 				rv = 1;
887 			} else {
888 				/* Try again next time. */
889 				errp->er_count_down = 1;
890 			}
891 		}
892 		break;
893 
894 	case mte_suspend:
895 		/* No errors while suspended. */
896 		break;
897 
898 	case mte_watch_block:
899 		if ((bp->b_edev == errp->er_bad_unit) &&
900 		    (bp->b_blkno == errp->er_bad_block)) {
901 			bp->b_flags |= B_ERROR;
902 			bp->b_error = EIO;
903 			rv = 1;
904 		}
905 		break;
906 	}
907 	return (rv);
908 }
909 
910 static int
trans_error_injector(buf_t * bp,int flag,void * private)911 trans_error_injector(buf_t *bp, int flag, void* private)
912 {
913 	mt_error_t	*errp	= (mt_error_t *)NULL;
914 	int		(*tstpnt)(buf_t *, int, void*) = NULL;
915 	int		rv	= 0;
916 	md_dev64_t	target	= md_expldev(bp->b_edev);
917 	int		trv	= 0;
918 	mt_unit_t	*un;
919 
920 	mutex_enter(&error_mutex);
921 	errp = find_by_dev(target);
922 	if (errp != (mt_error_t *)NULL) {
923 		un = errp->er_unitp;
924 		if (target == un->un_m_dev) {
925 			/* Target is our master device. */
926 			rv = trans_check_error(bp, errp);
927 		}
928 		if (target == un->un_l_dev) {
929 			/*
930 			 * Target is our log device.  Unfortunately, the same
931 			 * device may also be used for the MDD database.
932 			 * Therefore, we need to make sure that the I/O is for
933 			 * the range of blocks designated as our log.
934 			 */
935 			if ((bp->b_blkno >= un->un_l_pwsblk) &&
936 			    ((bp->b_blkno + btodb(bp->b_bcount)) <=
937 			    (un->un_l_sblk + un->un_l_tblks))) {
938 				rv = trans_check_error(bp, errp);
939 			}
940 		}
941 	}
942 	tstpnt = tstpnt_save;
943 	mutex_exit(&error_mutex);
944 
945 	if (tstpnt != NULL)
946 		trv = (*tstpnt)(bp, flag, private);
947 
948 	/*
949 	 * If we are producing an error (rv != 0) we need to make sure that
950 	 * biodone gets called.  If the tstpnt returned non-zero,
951 	 * we'll assume that it called biodone.
952 	 */
953 	if ((rv != 0) && (trv == 0)) {
954 		md_biodone(bp);
955 	}
956 	rv = ((rv == 0) && (trv == 0)) ? 0 : 1;
957 	return (rv);
958 }
959 
960 /*
961  * Prepare to inject errors on the master and log devices associated with the
962  * unit specified in migp.  The first time that trans_inject_errors() is called
963  * for a unit, an mt_error_t structure is allocated and initialized for the
964  * unit.  Subsequent calls for the unit will just insure that the unit is in the
965  * count down state.
966  *
967  * If an mt_error structure is allocated and it is the first one to be put in
968  * the list, mdv_strategy_tstpnt (which is referenced in md_call_strategy()) is
969  * set to trans_error_injector so that it will be called to see if an I/O
970  * request should be treated as an error.
971  */
972 
973 /*ARGSUSED1*/
974 static int
trans_inject_errors(void * d,int mode,IOLOCK * lock)975 trans_inject_errors(void *d, int mode, IOLOCK *lock)
976 {
977 	mt_error_t	*errp;
978 	mt_error_t	*do_not_care;
979 	mt_unit_t	*un;
980 	int		rv = 0;
981 
982 	md_i_get_t *migp = d;
983 
984 	mdclrerror(&migp->mde);
985 
986 	un = trans_getun(migp->id, &migp->mde,
987 	    RD_LOCK, lock);
988 	if (un == NULL)
989 		return (EINVAL);
990 
991 	/*
992 	 * If there is already a an error structure for the unit make sure that
993 	 * it is in count down mode.
994 	 */
995 
996 	mutex_enter(&error_mutex);
997 	errp = find_by_mtunit(un, &do_not_care);
998 	if (errp != (mt_error_t *)NULL) {
999 		errp->er_state = mte_count_down;
1000 	} else {
1001 
1002 		/*
1003 		 * Initialize error structure.
1004 		 */
1005 
1006 		errp = (mt_error_t *)md_trans_zalloc(sizeof (mt_error_t));
1007 		errp->er_state = mte_count_down;
1008 		errp->er_unitp = un;
1009 		errp->er_count_down = initial_count;
1010 		errp->er_increment = default_increment;
1011 		errp->er_reset_count = initial_count;
1012 		errp->er_total_errors = 0;
1013 		errp->er_bad_unit = 0;
1014 		errp->er_bad_block = 0;
1015 
1016 		/* Insert it into the list. */
1017 
1018 		errp->er_next = error_list_head.er_next;
1019 		error_list_head.er_next = errp;
1020 
1021 		/*
1022 		 * Set up md_call_strategy to call our error injector.
1023 		 */
1024 
1025 		if (mdv_strategy_tstpnt != trans_error_injector) {
1026 			tstpnt_save = mdv_strategy_tstpnt;
1027 			mdv_strategy_tstpnt = trans_error_injector;
1028 		}
1029 	}
1030 	mutex_exit(&error_mutex);
1031 	return (rv);
1032 }
1033 
1034 /*ARGSUSED1*/
1035 static int
trans_stop_errors(void * d,int mode,IOLOCK * lock)1036 trans_stop_errors(void *d, int mode, IOLOCK *lock)
1037 {
1038 	mt_error_t	*errp	= (mt_error_t *)NULL;
1039 	mt_error_t	*pred_errp;
1040 	mt_unit_t	*un;
1041 	int		rv	= 0;
1042 
1043 	md_i_get_t *migp = d;
1044 
1045 	mdclrerror(&migp->mde);
1046 
1047 	un = trans_getun(migp->id, &migp->mde,
1048 	    RD_LOCK, lock);
1049 	if (un == NULL)
1050 		return (EINVAL);
1051 
1052 	mutex_enter(&error_mutex);
1053 	errp = find_by_mtunit(un, &pred_errp);
1054 	if (errp != (mt_error_t *)NULL) {
1055 		/* Remove from list. */
1056 		pred_errp->er_next = errp->er_next;
1057 		if ((error_list_head.er_next == (mt_error_t *)NULL) &&
1058 		    (mdv_strategy_tstpnt == trans_error_injector)) {
1059 			mdv_strategy_tstpnt = tstpnt_save;
1060 		}
1061 	} else {
1062 		/* unit not set up for errors. */
1063 		rv = ENXIO;
1064 	}
1065 	mutex_exit(&error_mutex);
1066 
1067 	/* Free memory. */
1068 
1069 	if (errp != (mt_error_t *)NULL) {
1070 		md_trans_free((void *)errp, sizeof (*errp));
1071 	}
1072 	return (rv);
1073 }
1074 
1075 int
_init_ioctl()1076 _init_ioctl()
1077 {
1078 	mutex_init(&error_mutex, NULL, MUTEX_DRIVER, (void *)NULL);
1079 	return (1);
1080 }
1081 
1082 int
_fini_ioctl()1083 _fini_ioctl()
1084 {
1085 	mutex_destroy(&error_mutex);
1086 	return (1);
1087 }
1088 
1089 /*
1090  * END OF DEBUG ROUTINES
1091  */
1092 #endif	/* DEBUG */
1093 /*
1094  * BEGIN RELEASE DEBUG
1095  *	The following routines remain in the released product for testability
1096  */
1097 
1098 /*
1099  * ufs error injection remains in the released product
1100  */
1101 /*ARGSUSED1*/
1102 static int
trans_ufserror(void * d,int mode,IOLOCK * lock)1103 trans_ufserror(void *d, int mode, IOLOCK *lock)
1104 {
1105 	mt_unit_t	*un;
1106 
1107 	md_i_get_t *migp = d;
1108 
1109 	mdclrerror(&migp->mde);
1110 
1111 	un = trans_getun(migp->id, &migp->mde,
1112 	    RD_LOCK, lock);
1113 	if (un == NULL || un->un_ut == NULL)
1114 		return (EINVAL);
1115 
1116 	return (0);
1117 }
1118 /*
1119  * shadow test remains in the released product
1120  */
1121 static int
trans_set_shadow(void * d,int mode,IOLOCK * lock)1122 trans_set_shadow(void *d, int mode, IOLOCK *lock)
1123 {
1124 	dev32_t		device;			/* shadow device */
1125 	mt_unit_t 	*un;
1126 
1127 	md_i_get_t *migp = d;
1128 
1129 	mdclrerror(&migp->mde);
1130 
1131 	un = trans_getun(migp->id, &migp->mde,
1132 	    WR_LOCK, lock);
1133 	if (un == NULL)
1134 		return (EINVAL);
1135 
1136 	if ((un->un_debug & MT_SHADOW) == 0)
1137 		return (EINVAL);
1138 
1139 	/* Get shadow device.  User always passes down 32 bit devt */
1140 
1141 	if (ddi_copyin((caddr_t)(uintptr_t)migp->mdp,
1142 	    &device, sizeof (device), mode)) {
1143 		return (EFAULT);
1144 	}
1145 
1146 	/* Save shadow device designator. */
1147 	un->un_s_dev = md_expldev((md_dev64_t)device);
1148 	return (0);
1149 }
1150 
1151 /*
1152  * END RELEASE DEBUG
1153  */
1154 
1155 static int
trans_get(void * d,int mode,IOLOCK * lock)1156 trans_get(void *d, int mode, IOLOCK *lock)
1157 {
1158 	mt_unit_t	*un;
1159 	ml_unit_t	*ul;
1160 
1161 	md_i_get_t *migp = d;
1162 
1163 	mdclrerror(&migp->mde);
1164 
1165 	un = trans_getun(migp->id, &migp->mde,
1166 	    RD_LOCK, lock);
1167 	if (un == NULL)
1168 		return (0);
1169 
1170 	if (migp->size == 0) {
1171 		migp->size = un->c.un_size;
1172 		return (0);
1173 	}
1174 
1175 	if (migp->size < un->c.un_size)
1176 		return (EFAULT);
1177 
1178 log:
1179 	ul = un->un_l_unit;
1180 	if (ul == NULL)
1181 		goto master;
1182 
1183 	/*
1184 	 * refresh log fields in case log was metattach'ed
1185 	 */
1186 	un->un_l_head = (daddr32_t)btodb(ul->un_head_lof);
1187 	un->un_l_sblk = un->un_l_head;
1188 	un->un_l_pwsblk = ul->un_pwsblk;
1189 	un->un_l_maxtransfer = (uint_t)btodb(ul->un_maxtransfer);
1190 	un->un_l_nblks = ul->un_nblks;
1191 	un->un_l_tblks = ul->un_tblks;
1192 	un->un_l_tail = (daddr32_t)btodb(ul->un_tail_lof);
1193 	un->un_l_resv = ul->un_resv;
1194 	un->un_l_maxresv = ul->un_maxresv;
1195 	un->un_l_error = ul->un_error;
1196 	un->un_l_timestamp = ul->un_timestamp;
1197 
1198 	/*
1199 	 * check for log dev dynconcat; can only pick up extra space when the
1200 	 * tail physically follows the head in the circular log
1201 	 */
1202 	if (un->un_l_head <= un->un_l_tail)
1203 		if (ul->un_status & LDL_METADEVICE) {
1204 			struct mdc_unit	*c = MD_UNIT(md_getminor(ul->un_dev));
1205 
1206 			if (c->un_total_blocks > un->un_l_tblks) {
1207 				un->un_l_tblks = c->un_total_blocks;
1208 				un->un_l_nblks = un->un_l_tblks - un->un_l_sblk;
1209 				if (un->un_l_nblks > btodb(LDL_MAXLOGSIZE))
1210 					un->un_l_nblks = btodb(LDL_MAXLOGSIZE);
1211 				un->un_l_maxresv = (uint_t)(un->un_l_nblks *
1212 				    LDL_USABLE_BSIZE);
1213 			}
1214 	}
1215 
1216 master:
1217 
1218 	if (ddi_copyout(un, (void *)(uintptr_t)migp->mdp, un->c.un_size, mode))
1219 		return (EFAULT);
1220 	return (0);
1221 }
1222 
1223 static int
trans_replace(replace_params_t * params)1224 trans_replace(replace_params_t *params)
1225 {
1226 	minor_t		mnum = params->mnum;
1227 	mt_unit_t	*un;
1228 	mdi_unit_t	*ui;
1229 	md_dev64_t	cmp_dev;
1230 	md_dev64_t	ldev;
1231 	md_dev64_t	mdev;
1232 
1233 	mdclrerror(&params->mde);
1234 
1235 	ui = MDI_UNIT(mnum);
1236 	un = md_unit_writerlock(ui);
1237 
1238 	if (MD_STATUS(un) & MD_UN_RESYNC_ACTIVE) {
1239 		return (mdmderror(&params->mde, MDE_RESYNC_ACTIVE, mnum));
1240 	}
1241 
1242 	cmp_dev = params->old_dev;
1243 	mdev = un->un_m_dev;
1244 	ldev = un->un_l_dev;
1245 	if (cmp_dev == mdev) {
1246 		un->un_m_key = params->new_key;
1247 		un->un_m_dev = params->new_dev;
1248 	} else if (cmp_dev == ldev) {
1249 		un->un_l_key = params->new_key;
1250 		un->un_l_dev = params->new_dev;
1251 	}
1252 
1253 	trans_commit(un, 1);
1254 	md_unit_writerexit(ui);
1255 	return (0);
1256 }
1257 
1258 /*ARGSUSED1*/
1259 static int
trans_grow(void * d,int mode,IOLOCK * lock)1260 trans_grow(void *d, int mode, IOLOCK  *lock)
1261 {
1262 	mt_unit_t		*un;
1263 
1264 	md_grow_params_t *mgp = d;
1265 
1266 	mdclrerror(&mgp->mde);
1267 
1268 	un = trans_getun(mgp->mnum, &mgp->mde,
1269 	    RD_LOCK, lock);
1270 	if (un == NULL)
1271 		return (0);
1272 
1273 	/*
1274 	 * check for master dev dynconcat
1275 	 */
1276 	if (md_getmajor(un->un_m_dev) == md_major) {
1277 		struct mdc_unit	*c;
1278 
1279 		c = MD_UNIT(md_getminor(un->un_m_dev));
1280 		if (c->un_total_blocks > MD_MAX_BLKS_FOR_SMALL_DEVS) {
1281 			un->c.un_total_blocks = MD_MAX_BLKS_FOR_SMALL_DEVS;
1282 		} else {
1283 			un->c.un_total_blocks = c->un_total_blocks;
1284 		}
1285 		md_nblocks_set(MD_SID(un), un->c.un_total_blocks);
1286 	}
1287 
1288 	return (0);
1289 }
1290 
1291 /*ARGSUSED1*/
1292 static int
trans_detach_ioctl(void * d,int mode,IOLOCK * lock)1293 trans_detach_ioctl(void *d, int mode, IOLOCK *lock)
1294 {
1295 	mt_unit_t	*un;
1296 	int		error;
1297 
1298 	md_i_get_t *migp = d;
1299 
1300 	mdclrerror(&migp->mde);
1301 
1302 	/* acquire both md_unit_array_rw, and unit_reader lock */
1303 	un = trans_getun(migp->id, &migp->mde,
1304 	    READERS, lock);
1305 	if (un == NULL)
1306 		return (0);
1307 
1308 	/*
1309 	 * simply too much work to make debug modes w/out a log
1310 	 */
1311 	if (un->un_debug)
1312 		return (EACCES);
1313 
1314 	/*
1315 	 * detach the log
1316 	 */
1317 	error = trans_detach(un, migp->size);
1318 
1319 	return (error);
1320 }
1321 
1322 static int
trans_get_log(void * d,int mode,IOLOCK * lock)1323 trans_get_log(void *d, int mode, IOLOCK	*lock)
1324 {
1325 	mt_unit_t	*un;
1326 	ml_unit_t	*ul;
1327 
1328 	md_i_get_t *migp = d;
1329 
1330 	mdclrerror(&migp->mde);
1331 
1332 	un = trans_getun(migp->id, &migp->mde, RD_LOCK, lock);
1333 
1334 	if (un == NULL)
1335 		return (0);
1336 
1337 	ul = un->un_l_unit;
1338 
1339 	if (migp->size == 0) {
1340 		migp->size = ML_UNIT_ONDSZ;
1341 		return (0);
1342 	}
1343 
1344 	if (migp->size < ML_UNIT_ONDSZ)
1345 		return (EFAULT);
1346 
1347 	if (ddi_copyout(ul, (void *)(uintptr_t)migp->mdp, ML_UNIT_ONDSZ,
1348 	    mode))
1349 		return (EFAULT);
1350 	return (0);
1351 }
1352 
1353 static int
trans_getdevs(void * d,int mode,IOLOCK * lock)1354 trans_getdevs(void *d, int mode, IOLOCK	*lock)
1355 {
1356 	int			ndev;
1357 	mt_unit_t		*un;
1358 	md_dev64_t		*udevs;
1359 	md_dev64_t		unit_dev;
1360 
1361 	md_getdevs_params_t *mgdp = d;
1362 
1363 	mdclrerror(&mgdp->mde);
1364 
1365 	un = trans_getun(mgdp->mnum, &mgdp->mde, RD_LOCK, lock);
1366 	if (un == NULL)
1367 		return (0);
1368 
1369 	ndev = (un->un_flags & (TRANS_DETACHED | TRANS_ATTACHING)) ? 1 : 2;
1370 
1371 	if (mgdp->cnt == 0) {
1372 		mgdp->cnt = ndev;
1373 		return (0);
1374 	}
1375 
1376 	if (mgdp->cnt > 2)
1377 		mgdp->cnt = ndev;
1378 
1379 	udevs = (md_dev64_t *)(uintptr_t)mgdp->devs;
1380 	unit_dev = un->un_m_dev;
1381 
1382 	if (md_getmajor(unit_dev) != md_major) {
1383 		if ((unit_dev = md_xlate_mini_2_targ(unit_dev)) == NODEV64)
1384 			return (ENODEV);
1385 	}
1386 
1387 	if (mgdp->cnt >= 1)
1388 		if (ddi_copyout(&unit_dev, (caddr_t)&udevs[0],
1389 		    sizeof (*udevs), mode) != 0)
1390 			return (EFAULT);
1391 
1392 	unit_dev = un->un_l_dev;
1393 	if (md_getmajor(unit_dev) != md_major) {
1394 		if ((unit_dev = md_xlate_mini_2_targ(unit_dev)) == NODEV64)
1395 			return (ENODEV);
1396 	}
1397 
1398 	if (mgdp->cnt >= 2)
1399 		if (ddi_copyout(&unit_dev, (caddr_t)&udevs[1],
1400 		    sizeof (*udevs), mode) != 0)
1401 			return (EFAULT);
1402 
1403 	return (0);
1404 }
1405 
1406 static int
trans_reset_ioctl(md_i_reset_t * mirp,IOLOCK * lock)1407 trans_reset_ioctl(md_i_reset_t *mirp, IOLOCK *lock)
1408 {
1409 	minor_t		mnum = mirp->mnum;
1410 	mt_unit_t	*un;
1411 	int		error;
1412 
1413 	mdclrerror(&mirp->mde);
1414 
1415 	un = trans_getun(mnum, &mirp->mde, NO_LOCK, lock);
1416 	if (un == NULL)
1417 		return (0);
1418 
1419 
1420 	/* This prevents new opens */
1421 	rw_enter(&md_unit_array_rw.lock, RW_WRITER);
1422 
1423 	if (MD_HAS_PARENT(MD_PARENT(un))) {
1424 		rw_exit(&md_unit_array_rw.lock);
1425 		return (mdmderror(&mirp->mde, MDE_IN_USE, mnum));
1426 	}
1427 
1428 	if (md_unit_isopen(MDI_UNIT(mnum))) {
1429 		rw_exit(&md_unit_array_rw.lock);
1430 		return (mdmderror(&mirp->mde, MDE_IS_OPEN, mnum));
1431 	}
1432 	/*
1433 	 * detach the log
1434 	 */
1435 	error = trans_detach(un, mirp->force);
1436 
1437 	/*
1438 	 * reset (aka remove; aka delete) the trans device
1439 	 */
1440 	if (error == 0)
1441 		error = trans_reset(un, mnum, 1, mirp->force);
1442 
1443 	rw_exit(&md_unit_array_rw.lock);
1444 	return (error);
1445 }
1446 
1447 static int
trans_get_geom(mt_unit_t * un,struct dk_geom * geomp)1448 trans_get_geom(mt_unit_t *un, struct dk_geom *geomp)
1449 {
1450 	md_get_geom((md_unit_t *)un, geomp);
1451 
1452 	return (0);
1453 }
1454 
1455 static int
trans_get_vtoc(mt_unit_t * un,struct vtoc * vtocp)1456 trans_get_vtoc(mt_unit_t *un, struct vtoc *vtocp)
1457 {
1458 	md_get_vtoc((md_unit_t *)un, vtocp);
1459 
1460 	return (0);
1461 }
1462 
1463 static int
trans_get_extvtoc(mt_unit_t * un,struct extvtoc * vtocp)1464 trans_get_extvtoc(mt_unit_t *un, struct extvtoc *vtocp)
1465 {
1466 	md_get_extvtoc((md_unit_t *)un, vtocp);
1467 
1468 	return (0);
1469 }
1470 
1471 static int
trans_islog(mt_unit_t * un)1472 trans_islog(mt_unit_t *un)
1473 {
1474 	if (un->un_l_unit == NULL)
1475 		return (ENXIO);
1476 	return (0);
1477 }
1478 
1479 static int
trans_set_vtoc(mt_unit_t * un,struct vtoc * vtocp)1480 trans_set_vtoc(
1481 	mt_unit_t	*un,
1482 	struct vtoc	*vtocp
1483 )
1484 {
1485 	return (md_set_vtoc((md_unit_t *)un, vtocp));
1486 }
1487 
1488 static int
trans_set_extvtoc(mt_unit_t * un,struct extvtoc * vtocp)1489 trans_set_extvtoc(mt_unit_t *un, struct extvtoc *vtocp)
1490 {
1491 	return (md_set_extvtoc((md_unit_t *)un, vtocp));
1492 }
1493 
1494 static int
trans_get_cgapart(mt_unit_t * un,struct dk_map * dkmapp)1495 trans_get_cgapart(
1496 	mt_unit_t	*un,
1497 	struct dk_map	*dkmapp
1498 )
1499 {
1500 	md_get_cgapart((md_unit_t *)un, dkmapp);
1501 	return (0);
1502 }
1503 
1504 static int
trans_admin_ioctl(int cmd,caddr_t data,int mode,IOLOCK * lockp)1505 trans_admin_ioctl(
1506 	int	cmd,
1507 	caddr_t	data,
1508 	int	mode,
1509 	IOLOCK	*lockp
1510 )
1511 {
1512 	size_t	sz = 0;
1513 	void	*d = NULL;
1514 	int	err = 0;
1515 
1516 	/* We can only handle 32-bit clients for internal commands */
1517 	if ((mode & DATAMODEL_MASK) != DATAMODEL_ILP32) {
1518 		return (EINVAL);
1519 	}
1520 
1521 	switch (cmd) {
1522 
1523 	case MD_IOCGET:
1524 	{
1525 		if (! (mode & FREAD))
1526 			return (EACCES);
1527 
1528 		sz = sizeof (md_i_get_t);
1529 
1530 		if ((d = md_trans_zalloc(sz)) == NULL)
1531 			return (ENOMEM);
1532 
1533 		if (ddi_copyin(data, d, sz, mode)) {
1534 			err = EFAULT;
1535 			break;
1536 		}
1537 
1538 		err = trans_get(d, mode, lockp);
1539 		break;
1540 	}
1541 
1542 	case MD_IOCGET_LOG:
1543 	{
1544 		if (! (mode & FREAD))
1545 			return (EACCES);
1546 
1547 		sz = sizeof (md_i_get_t);
1548 
1549 		if ((d = md_trans_zalloc(sz)) == NULL)
1550 			return (ENOMEM);
1551 
1552 		if (ddi_copyin(data, d, sz, mode)) {
1553 			err = EFAULT;
1554 			break;
1555 		}
1556 
1557 		err = trans_get_log(d, mode, lockp);
1558 		break;
1559 	}
1560 
1561 	case MD_IOCRESET:
1562 	{
1563 		md_i_reset_t	*p;
1564 
1565 		if (! (mode & FWRITE))
1566 			return (EACCES);
1567 
1568 		if ((d = p = md_trans_zalloc((sz = sizeof (*p)))) == NULL)
1569 			return (ENOMEM);
1570 
1571 		if (ddi_copyin(data, d, sz, mode)) {
1572 			err = EFAULT;
1573 			break;
1574 		}
1575 
1576 		err = trans_reset_ioctl(p, lockp);
1577 		break;
1578 	}
1579 
1580 	case MD_IOCGROW:
1581 	{
1582 		if (! (mode & FWRITE))
1583 			return (EACCES);
1584 
1585 		sz = sizeof (md_grow_params_t);
1586 
1587 		if ((d = md_trans_zalloc(sz)) == NULL)
1588 			return (ENOMEM);
1589 
1590 		if (ddi_copyin(data, d, sz, mode)) {
1591 			err = EFAULT;
1592 			break;
1593 		}
1594 
1595 		err = trans_grow(d, mode, lockp);
1596 		break;
1597 	}
1598 
1599 	case MD_IOC_TRANS_DETACH:
1600 	{
1601 		if (! (mode & FWRITE))
1602 			return (EACCES);
1603 
1604 		sz = sizeof (md_i_get_t);
1605 
1606 		if ((d = md_trans_zalloc(sz)) == NULL)
1607 			return (ENOMEM);
1608 
1609 		if (ddi_copyin(data, d, sz, mode)) {
1610 			err = EFAULT;
1611 			break;
1612 		}
1613 
1614 		err = trans_detach_ioctl(d, mode, lockp);
1615 		break;
1616 	}
1617 
1618 	case MD_IOCREPLACE:
1619 	{
1620 		replace_params_t	*p;
1621 
1622 		if (! (mode & FWRITE))
1623 			return (EACCES);
1624 
1625 		if ((d = p = kmem_alloc((sz = sizeof (*p)), KM_SLEEP)) == NULL)
1626 			return (ENOMEM);
1627 
1628 		if (ddi_copyin(data, d, sz, mode)) {
1629 			err = EFAULT;
1630 			break;
1631 		}
1632 
1633 		err = trans_replace(p);
1634 		break;
1635 	}
1636 
1637 
1638 	case MD_IOCGET_DEVS:
1639 	{
1640 		if (! (mode & FREAD))
1641 			return (EACCES);
1642 
1643 		sz = sizeof (md_getdevs_params_t);
1644 
1645 		if ((d = md_trans_zalloc(sz)) == NULL)
1646 			return (ENOMEM);
1647 
1648 		if (ddi_copyin(data, d, sz, mode)) {
1649 			err = EFAULT;
1650 			break;
1651 		}
1652 
1653 		err = trans_getdevs(d, mode, lockp);
1654 		break;
1655 	}
1656 
1657 /*
1658  * debug ioctls
1659  */
1660 #ifdef	DEBUG
1661 
1662 
1663 	case MD_IOCGET_TRANSSTATS:
1664 	{
1665 		if (! (mode & FREAD))
1666 			return (EACCES);
1667 
1668 		sz = sizeof (md_i_get_t);
1669 
1670 		if ((d = md_trans_zalloc(sz)) == NULL)
1671 			return (ENOMEM);
1672 
1673 		if (ddi_copyin(data, d, sz, mode)) {
1674 			err = EFAULT;
1675 			break;
1676 		}
1677 
1678 		err = trans_get_transstats(d, mode);
1679 		break;
1680 	}
1681 
1682 	case MD_IOC_DEBUG:
1683 	{
1684 		md_i_get_t *mdigp;
1685 
1686 		if (! (mode & FWRITE))
1687 			return (EACCES);
1688 
1689 		sz = sizeof (md_i_get_t);
1690 
1691 		if ((d = md_trans_zalloc(sz)) == NULL)
1692 			return (ENOMEM);
1693 
1694 		if (ddi_copyin(data, d, sz, mode)) {
1695 			err = EFAULT;
1696 			break;
1697 		}
1698 
1699 		mdigp = d;
1700 
1701 		mdclrerror(&mdigp->mde);
1702 		mt_debug = mdigp->size;
1703 		break;
1704 	}
1705 
1706 	case MD_IOC_TSD:
1707 	{
1708 		if (! (mode & FWRITE))
1709 			return (EACCES);
1710 
1711 
1712 		sz = sizeof (md_i_get_t);
1713 
1714 		if ((d = md_trans_zalloc(sz)) == NULL)
1715 			return (ENOMEM);
1716 
1717 		if (ddi_copyin(data, d, sz, mode)) {
1718 			err = EFAULT;
1719 			break;
1720 		}
1721 
1722 		err = trans_test_tsd(d, mode);
1723 		break;
1724 	}
1725 
1726 	case MD_IOC_TRYGETBLK:
1727 	{
1728 		if (! (mode & FWRITE))
1729 			return (EACCES);
1730 
1731 
1732 		sz = sizeof (md_i_get_t);
1733 
1734 		if ((d = md_trans_zalloc(sz)) == NULL)
1735 			return (ENOMEM);
1736 
1737 		if (ddi_copyin(data, d, sz, mode)) {
1738 			err = EFAULT;
1739 			break;
1740 		}
1741 
1742 		err = trans_test_trygetblk(d, mode, lockp);
1743 		break;
1744 	}
1745 
1746 	case MD_IOC_TRYPAGE:
1747 	{
1748 		if (! (mode & FWRITE))
1749 			return (EACCES);
1750 
1751 
1752 		sz = sizeof (md_i_get_t);
1753 
1754 		if ((d = md_trans_zalloc(sz)) == NULL)
1755 			return (ENOMEM);
1756 
1757 		if (ddi_copyin(data, d, sz, mode)) {
1758 			err = EFAULT;
1759 			break;
1760 		}
1761 
1762 		err = trans_test_trypage(d, mode, lockp);
1763 		break;
1764 	}
1765 
1766 
1767 	case MD_IOC_INJECTERRORS:
1768 	{
1769 		if (! (mode & FWRITE))
1770 			return (EACCES);
1771 
1772 
1773 		sz = sizeof (md_i_get_t);
1774 
1775 		if ((d = md_trans_zalloc(sz)) == NULL)
1776 			return (ENOMEM);
1777 
1778 		if (ddi_copyin(data, d, sz, mode)) {
1779 			err = EFAULT;
1780 			break;
1781 		}
1782 
1783 		err = trans_inject_errors(d, mode, lockp);
1784 		break;
1785 	}
1786 
1787 	case MD_IOC_STOPERRORS:
1788 	{
1789 		if (! (mode & FWRITE))
1790 			return (EACCES);
1791 
1792 
1793 		sz = sizeof (md_i_get_t);
1794 
1795 		if ((d = md_trans_zalloc(sz)) == NULL)
1796 			return (ENOMEM);
1797 
1798 		if (ddi_copyin(data, d, sz, mode)) {
1799 			err = EFAULT;
1800 			break;
1801 		}
1802 
1803 		err = trans_stop_errors(d, mode, lockp);
1804 		break;
1805 	}
1806 
1807 	case MD_IOC_ISDEBUG:
1808 		break;
1809 
1810 #else	/* ! DEBUG */
1811 
1812 	case MD_IOC_ISDEBUG:
1813 	case MD_IOCGET_TRANSSTATS:
1814 	case MD_IOC_STOPERRORS:
1815 	case MD_IOC_TSD:
1816 	case MD_IOC_TRYGETBLK:
1817 	case MD_IOC_TRYPAGE:
1818 		break;
1819 
1820 	/*
1821 	 * error injection behaves like MD_IOC_UFSERROR in released product
1822 	 */
1823 	case MD_IOC_INJECTERRORS:
1824 	{
1825 		if (! (mode & FWRITE))
1826 			return (EACCES);
1827 
1828 
1829 		sz = sizeof (md_i_get_t);
1830 
1831 		if ((d = md_trans_zalloc(sz)) == NULL)
1832 			return (ENOMEM);
1833 
1834 		if (ddi_copyin(data, d, sz, mode)) {
1835 			err = EFAULT;
1836 			break;
1837 		}
1838 
1839 		err = trans_ufserror(d, mode, lockp);
1840 		break;
1841 	}
1842 
1843 	/*
1844 	 * only the shadow test is allowed in the released product
1845 	 */
1846 	case MD_IOC_DEBUG:
1847 	{
1848 		md_i_get_t *mdigp;
1849 
1850 		if (! (mode & FWRITE))
1851 			return (EACCES);
1852 
1853 		sz = sizeof (md_i_get_t);
1854 
1855 		if ((d = md_trans_zalloc(sz)) == NULL)
1856 			return (ENOMEM);
1857 
1858 		if (ddi_copyin(data, d, sz, mode)) {
1859 			err = EFAULT;
1860 			break;
1861 		}
1862 
1863 		mdigp = d;
1864 
1865 		mdclrerror(&mdigp->mde);
1866 		mt_debug = mdigp->size & MT_SHADOW;
1867 		break;
1868 	}
1869 
1870 #endif	/* ! DEBUG */
1871 
1872 /*
1873  * BEGIN RELEASE DEBUG
1874  *	The following routines remain in the released product for testability
1875  */
1876 
1877 	case MD_IOC_UFSERROR:
1878 	{
1879 		if (! (mode & FWRITE))
1880 			return (EACCES);
1881 
1882 		sz = sizeof (md_i_get_t);
1883 
1884 		if ((d = md_trans_zalloc(sz)) == NULL)
1885 			return (ENOMEM);
1886 
1887 		if (ddi_copyin(data, d, sz, mode)) {
1888 			err = EFAULT;
1889 			break;
1890 		}
1891 
1892 		err = trans_ufserror(d, mode, lockp);
1893 		break;
1894 	}
1895 
1896 	case MD_IOC_SETSHADOW:
1897 	{
1898 		if (! (mode & FWRITE))
1899 			return (EACCES);
1900 
1901 		sz = sizeof (md_i_get_t);
1902 
1903 		if ((d = md_trans_zalloc(sz)) == NULL)
1904 			return (ENOMEM);
1905 
1906 		if (ddi_copyin(data, d, sz, mode)) {
1907 			err = EFAULT;
1908 			break;
1909 		}
1910 
1911 		err = trans_set_shadow(d, mode, lockp);
1912 		break;
1913 	}
1914 
1915 /*
1916  * END RELEASE DEBUG
1917  */
1918 
1919 
1920 	default:
1921 		return (ENOTTY);
1922 	}
1923 
1924 	/*
1925 	 * copyout and free any args
1926 	 */
1927 	if (sz != 0) {
1928 		if (err == 0) {
1929 			if (ddi_copyout(d, data, sz, mode) != 0) {
1930 				err = EFAULT;
1931 			}
1932 		}
1933 		md_trans_free(d, sz);
1934 	}
1935 	return (err);
1936 }
1937 
1938 int
md_trans_ioctl(dev_t dev,int cmd,caddr_t data,int mode,IOLOCK * lockp)1939 md_trans_ioctl(
1940 	dev_t		dev,
1941 	int		cmd,
1942 	caddr_t		data,
1943 	int		mode,
1944 	IOLOCK		*lockp
1945 )
1946 {
1947 	minor_t		mnum = getminor(dev);
1948 	mt_unit_t	*un;
1949 	md_error_t	mde = mdnullerror;
1950 	int		err = 0;
1951 
1952 	/* handle admin ioctls */
1953 	if (mnum == MD_ADM_MINOR)
1954 		return (trans_admin_ioctl(cmd, data, mode, lockp));
1955 
1956 	/* check unit */
1957 	if ((MD_MIN2SET(mnum) >= md_nsets) ||
1958 	    (MD_MIN2UNIT(mnum) >= md_nunits) ||
1959 	    ((un = trans_getun(mnum, &mde, RD_LOCK, lockp)) == NULL))
1960 		return (ENXIO);
1961 
1962 	/* dispatch ioctl */
1963 	switch (cmd) {
1964 
1965 	case DKIOCINFO:
1966 	{
1967 		struct dk_cinfo	*p;
1968 
1969 		if (! (mode & FREAD))
1970 			return (EACCES);
1971 
1972 		if ((p = md_trans_zalloc(sizeof (*p))) == NULL)
1973 			return (ENOMEM);
1974 
1975 		get_info(p, mnum);
1976 		if (ddi_copyout((caddr_t)p, data, sizeof (*p), mode) != 0)
1977 			err = EFAULT;
1978 
1979 		md_trans_free(p, sizeof (*p));
1980 		return (err);
1981 	}
1982 
1983 	case DKIOCGGEOM:
1984 	{
1985 		struct dk_geom	*p;
1986 
1987 		if (! (mode & FREAD))
1988 			return (EACCES);
1989 
1990 		if ((p = md_trans_zalloc(sizeof (*p))) == NULL)
1991 			return (ENOMEM);
1992 
1993 		if ((err = trans_get_geom(un, p)) == 0) {
1994 			if (ddi_copyout((caddr_t)p, data, sizeof (*p),
1995 			    mode) != 0)
1996 				err = EFAULT;
1997 		}
1998 
1999 		md_trans_free(p, sizeof (*p));
2000 		return (err);
2001 	}
2002 
2003 	case DKIOCGVTOC:
2004 	{
2005 		struct vtoc	*vtoc;
2006 
2007 		if (! (mode & FREAD))
2008 			return (EACCES);
2009 
2010 		vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP);
2011 		if ((err = trans_get_vtoc(un, vtoc)) != 0) {
2012 			kmem_free(vtoc, sizeof (*vtoc));
2013 			return (err);
2014 		}
2015 
2016 		if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
2017 			if (ddi_copyout(vtoc, data, sizeof (*vtoc), mode))
2018 				err = EFAULT;
2019 		}
2020 #ifdef _SYSCALL32
2021 		else {
2022 			struct vtoc32	*vtoc32;
2023 
2024 			vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP);
2025 
2026 			vtoctovtoc32((*vtoc), (*vtoc32));
2027 			if (ddi_copyout(vtoc32, data, sizeof (*vtoc32), mode))
2028 				err = EFAULT;
2029 			kmem_free(vtoc32, sizeof (*vtoc32));
2030 		}
2031 #endif /* _SYSCALL32 */
2032 
2033 		kmem_free(vtoc, sizeof (*vtoc));
2034 		return (err);
2035 	}
2036 
2037 	case DKIOCSVTOC:
2038 	{
2039 		struct vtoc	*vtoc;
2040 
2041 		if (! (mode & FWRITE))
2042 			return (EACCES);
2043 
2044 		vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP);
2045 		if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
2046 			if (ddi_copyin(data, vtoc, sizeof (*vtoc), mode)) {
2047 				err = EFAULT;
2048 			}
2049 		}
2050 #ifdef _SYSCALL32
2051 		else {
2052 			struct vtoc32	*vtoc32;
2053 
2054 			vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP);
2055 
2056 			if (ddi_copyin(data, vtoc32, sizeof (*vtoc32), mode)) {
2057 				err = EFAULT;
2058 			} else {
2059 				vtoc32tovtoc((*vtoc32), (*vtoc));
2060 			}
2061 			kmem_free(vtoc32, sizeof (*vtoc32));
2062 		}
2063 #endif /* _SYSCALL32 */
2064 
2065 		if (err == 0)
2066 			err = trans_set_vtoc(un, vtoc);
2067 
2068 		kmem_free(vtoc, sizeof (*vtoc));
2069 		return (err);
2070 	}
2071 
2072 
2073 	case DKIOCGEXTVTOC:
2074 	{
2075 		struct extvtoc	*extvtoc;
2076 
2077 		if (! (mode & FREAD))
2078 			return (EACCES);
2079 
2080 		extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP);
2081 		if ((err = trans_get_extvtoc(un, extvtoc)) != 0) {
2082 			return (err);
2083 		}
2084 
2085 		if (ddi_copyout(extvtoc, data, sizeof (*extvtoc), mode))
2086 			err = EFAULT;
2087 
2088 		kmem_free(extvtoc, sizeof (*extvtoc));
2089 		return (err);
2090 	}
2091 
2092 	case DKIOCSEXTVTOC:
2093 	{
2094 		struct extvtoc	*extvtoc;
2095 
2096 		if (! (mode & FWRITE))
2097 			return (EACCES);
2098 
2099 		extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP);
2100 		if (ddi_copyin(data, extvtoc, sizeof (*extvtoc), mode)) {
2101 			err = EFAULT;
2102 		}
2103 
2104 		if (err == 0)
2105 			err = trans_set_extvtoc(un, extvtoc);
2106 
2107 		kmem_free(extvtoc, sizeof (*extvtoc));
2108 		return (err);
2109 	}
2110 
2111 	case DKIOCGAPART:
2112 	{
2113 		struct dk_map	dmp;
2114 
2115 		if ((err = trans_get_cgapart(un, &dmp)) != 0) {
2116 			return (err);
2117 		}
2118 
2119 		if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
2120 			if (ddi_copyout((caddr_t)&dmp, data, sizeof (dmp),
2121 			    mode) != 0)
2122 				err = EFAULT;
2123 		}
2124 #ifdef _SYSCALL32
2125 		else {
2126 			struct dk_map32 dmp32;
2127 
2128 			dmp32.dkl_cylno = dmp.dkl_cylno;
2129 			dmp32.dkl_nblk = dmp.dkl_nblk;
2130 
2131 			if (ddi_copyout((caddr_t)&dmp32, data, sizeof (dmp32),
2132 			    mode) != 0)
2133 				err = EFAULT;
2134 		}
2135 #endif /* _SYSCALL32 */
2136 
2137 		return (err);
2138 	}
2139 
2140 	/*
2141 	 * _FIOISLOG, _FIOISLOGOK, _FIOLOGRESET are used by fsck/mkfs
2142 	 * after opening the device.  fsck/mkfs use these ioctls for
2143 	 * error recovery.
2144 	 */
2145 	case _FIOISLOG:
2146 		return (trans_islog(un));
2147 
2148 	default:
2149 		return (ENOTTY);
2150 	}
2151 }
2152 
2153 /*
2154  * rename named service entry points and support functions
2155  */
2156 
2157 /* rename/exchange role swap functions */
2158 
2159 /*
2160  * MDRNM_UPDATE_SELF
2161  * This role swap function is identical for all unit types,
2162  * so keep it here. It's also the best example because it
2163  * touches all the modified portions of the relevant
2164  * in-common structures.
2165  */
2166 void
trans_rename_update_self(md_rendelta_t * delta,md_rentxn_t * rtxnp)2167 trans_rename_update_self(
2168 	md_rendelta_t	*delta,
2169 	md_rentxn_t	*rtxnp)
2170 {
2171 	minor_t		 from_min, to_min;
2172 	sv_dev_t	 sv;
2173 	mt_unit_t	*un;
2174 
2175 	ASSERT(rtxnp);
2176 	ASSERT(rtxnp->op == MDRNOP_RENAME);
2177 	ASSERT(delta);
2178 	ASSERT(delta->unp);
2179 	ASSERT(delta->uip);
2180 	ASSERT(rtxnp->rec_idx >= 0);
2181 	ASSERT(rtxnp->recids);
2182 	ASSERT(delta->old_role == MDRR_SELF);
2183 	ASSERT(delta->new_role == MDRR_SELF);
2184 
2185 	from_min = rtxnp->from.mnum;
2186 	to_min   = rtxnp->to.mnum;
2187 	un	 = (mt_unit_t *)delta->unp;
2188 
2189 	/*
2190 	 * self id changes in our own unit struct
2191 	 * both mechanisms for identifying the trans must be reset.
2192 	 */
2193 
2194 	MD_SID(delta->unp) = to_min;
2195 	un->un_dev = makedevice(md_major, to_min);
2196 
2197 	/*
2198 	 * clear old array pointers to unit in-core and unit
2199 	 */
2200 
2201 	MDI_VOIDUNIT(from_min) = NULL;
2202 	MD_VOIDUNIT(from_min) = NULL;
2203 
2204 	/*
2205 	 * and point the new slots at the unit in-core and unit structs
2206 	 */
2207 
2208 	MDI_VOIDUNIT(to_min) = delta->uip;
2209 	MD_VOIDUNIT(to_min) = delta->unp;
2210 
2211 	/*
2212 	 * recreate kstats
2213 	 */
2214 	md_kstat_destroy_ui(delta->uip);
2215 	md_kstat_init_ui(to_min, delta->uip);
2216 
2217 	/*
2218 	 * the unit in-core reference to the get next link's id changes
2219 	 */
2220 
2221 	delta->uip->ui_link.ln_id = to_min;
2222 
2223 	/*
2224 	 * name space addition of new key was done from user-level
2225 	 * remove the old name's key here
2226 	 */
2227 
2228 	sv.setno = MD_MIN2SET(from_min);
2229 	sv.key	 = rtxnp->from.key;
2230 
2231 	md_rem_names(&sv, 1);
2232 
2233 
2234 	/*
2235 	 * and store the record id (from the unit struct) into recids
2236 	 * for later commitment by md_rename()
2237 	 */
2238 
2239 	md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp);
2240 }
2241 
2242 /*
2243  * MDRNM_UPDATE_KIDS
2244  * rename/exchange of our child or grandchild
2245  */
2246 void
trans_renexch_update_kids(md_rendelta_t * delta,md_rentxn_t * rtxnp)2247 trans_renexch_update_kids(
2248 	md_rendelta_t	*delta,
2249 	md_rentxn_t	*rtxnp)
2250 {
2251 	mt_unit_t	*un;
2252 	minor_t		 from_min, to_min, log_min, master_min;
2253 
2254 	ASSERT(delta);
2255 	ASSERT(rtxnp);
2256 	ASSERT((rtxnp->op == MDRNOP_RENAME) || (rtxnp->op == MDRNOP_EXCHANGE));
2257 	ASSERT(delta->unp);
2258 	ASSERT(rtxnp->recids);
2259 	ASSERT(rtxnp->rec_idx >= 0);
2260 	ASSERT(delta->old_role == MDRR_PARENT);
2261 	ASSERT(delta->new_role == MDRR_PARENT);
2262 
2263 	un		= (mt_unit_t *)delta->unp;
2264 	from_min	= rtxnp->from.mnum;
2265 	to_min		= rtxnp->to.mnum;
2266 	log_min		= md_getminor(un->un_l_dev);
2267 	master_min	= md_getminor(un->un_m_dev);
2268 
2269 	/*
2270 	 * since our role isn't changing (parent->parent)
2271 	 * one of our children must be changing; which one is it?
2272 	 * find the child being modified, and update
2273 	 * our notion of it
2274 	 */
2275 
2276 	/* both devices must be metadevices in order to be updated */
2277 	ASSERT(md_getmajor(un->un_m_dev) == md_major);
2278 	ASSERT(!(un->un_l_unit && (md_getmajor(un->un_l_dev) != md_major)));
2279 
2280 	if ((md_getmajor(un->un_m_dev) == md_major) &&
2281 	    (master_min == from_min)) {
2282 
2283 		ASSERT(!(un->un_l_unit && (log_min == from_min)));
2284 
2285 		un->un_m_dev = makedevice(md_major, to_min);
2286 		un->un_m_key = rtxnp->to.key;
2287 
2288 	} else if ((md_getmajor(un->un_m_dev) == md_major) &&
2289 	    un->un_l_unit && (log_min == from_min)) {
2290 
2291 		ASSERT(master_min != from_min);
2292 
2293 		un->un_l_dev = makedevice(md_major, to_min);
2294 		un->un_l_key = rtxnp->to.key;
2295 
2296 	} else {
2297 		ASSERT(FALSE);
2298 		panic("trans_renexch_update_kids: not a metadevice");
2299 		/*NOTREACHED*/
2300 	}
2301 
2302 	md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp);
2303 }
2304 
2305 /*
2306  * MDRNM_SELF_UPDATE_FROM (exchange down) [self->child]
2307  */
2308 void
trans_exchange_self_update_from_down(md_rendelta_t * delta,md_rentxn_t * rtxnp)2309 trans_exchange_self_update_from_down(
2310 	md_rendelta_t	*delta,
2311 	md_rentxn_t	*rtxnp)
2312 {
2313 	mt_unit_t	*un;
2314 	minor_t		from_min, to_min, master_min, log_min;
2315 	sv_dev_t	sv;
2316 
2317 	ASSERT(delta);
2318 	ASSERT(delta->unp);
2319 	ASSERT(delta->uip);
2320 	ASSERT(rtxnp);
2321 	ASSERT(MDRNOP_EXCHANGE == rtxnp->op);
2322 	ASSERT(rtxnp->from.uip);
2323 	ASSERT(rtxnp->rec_idx >= 0);
2324 	ASSERT(rtxnp->recids);
2325 	ASSERT(delta->old_role == MDRR_SELF);
2326 	ASSERT(delta->new_role == MDRR_CHILD);
2327 	ASSERT(md_getminor(delta->dev) == rtxnp->from.mnum);
2328 
2329 	un = (mt_unit_t *)delta->unp;
2330 
2331 	/*
2332 	 * if we're exchanging a trans, it had better be a metadevice
2333 	 */
2334 	ASSERT(md_getmajor(un->un_m_dev) == md_major);
2335 
2336 	to_min		= rtxnp->to.mnum;
2337 	from_min	= rtxnp->from.mnum;
2338 	master_min	= md_getminor(un->un_m_dev);
2339 	log_min		= md_getminor(un->un_l_dev);
2340 
2341 	/*
2342 	 * both mechanisms for identifying a trans must be updated
2343 	 */
2344 
2345 	MD_SID(delta->unp) = to_min;
2346 	un->un_dev = makedevice(md_major, to_min);
2347 
2348 	/*
2349 	 * parent identifier need not change
2350 	 */
2351 
2352 	/*
2353 	 * point the set array pointers at the "new" unit and unit in-cores
2354 	 * Note: the other half of this transfer is done in the "update to"
2355 	 * rename/exchange named service.
2356 	 */
2357 
2358 	MDI_VOIDUNIT(to_min) = delta->uip;
2359 	MD_VOIDUNIT(to_min) = delta->unp;
2360 
2361 	/*
2362 	 * transfer kstats
2363 	 */
2364 
2365 	delta->uip->ui_kstat = rtxnp->to.kstatp;
2366 
2367 	/*
2368 	 * the unit in-core reference to the get next link's id changes
2369 	 */
2370 
2371 	delta->uip->ui_link.ln_id = to_min;
2372 
2373 	/*
2374 	 * which one of our children is changing?
2375 	 *
2376 	 * Note that the check routines forbid changing the log (for now)
2377 	 * because there's no lockfs-like trans-ufs "freeze and remount"
2378 	 * or "freeze and bobbit the log."
2379 	 */
2380 
2381 	/* both devices must be metadevices in order to be updated */
2382 	ASSERT(md_getmajor(un->un_m_dev) == md_major);
2383 	ASSERT(!(un->un_l_unit && (md_getmajor(un->un_l_dev) != md_major)));
2384 
2385 	if ((md_getmajor(un->un_m_dev) == md_major) &&
2386 	    (master_min == to_min)) {
2387 
2388 		/* master and log can't both be changed */
2389 		ASSERT(!(un->un_l_unit && (log_min == to_min)));
2390 
2391 		un->un_m_dev = makedevice(md_major, from_min);
2392 		sv.key = un->un_m_key;
2393 		un->un_m_key = rtxnp->from.key;
2394 
2395 	} else if ((md_getmajor(un->un_m_dev) == md_major) &&
2396 	    un->un_l_unit && (log_min == to_min)) {
2397 
2398 		/* master and log can't both be changed */
2399 		ASSERT(!(master_min == to_min));
2400 
2401 		un->un_l_dev = makedevice(md_major, from_min);
2402 		sv.key = un->un_l_key;
2403 		un->un_l_key = rtxnp->from.key;
2404 
2405 	} else {
2406 		ASSERT(FALSE);
2407 		panic("trans_exchange_self_update_from_down: not a metadevice");
2408 		/*NOTREACHED*/
2409 	}
2410 
2411 	/*
2412 	 * the new master must exist in the name space
2413 	 */
2414 	ASSERT(rtxnp->from.key != MD_KEYWILD);
2415 	ASSERT(rtxnp->from.key != MD_KEYBAD);
2416 
2417 	/*
2418 	 * delete the key for the changed child from the namespace
2419 	 */
2420 
2421 	sv.setno = MD_MIN2SET(from_min);
2422 	md_rem_names(&sv, 1);
2423 
2424 	/*
2425 	 * and store the record id (from the unit struct) into recids
2426 	 */
2427 
2428 	md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp);
2429 }
2430 
2431 /*
2432  * MDRNM_PARENT_UPDATE_TO (exchange down) [parent->self]
2433  */
2434 void
trans_exchange_parent_update_to(md_rendelta_t * delta,md_rentxn_t * rtxnp)2435 trans_exchange_parent_update_to(
2436 	md_rendelta_t	*delta,
2437 	md_rentxn_t	*rtxnp)
2438 {
2439 	mt_unit_t	*un;
2440 	minor_t		from_min, to_min, master_min, log_min;
2441 	sv_dev_t	sv;
2442 
2443 	ASSERT(delta);
2444 	ASSERT(delta->unp);
2445 	ASSERT(delta->uip);
2446 	ASSERT(rtxnp);
2447 	ASSERT(MDRNOP_EXCHANGE == rtxnp->op);
2448 	ASSERT(rtxnp->from.uip);
2449 	ASSERT(rtxnp->rec_idx >= 0);
2450 	ASSERT(rtxnp->recids);
2451 	ASSERT(delta->old_role == MDRR_PARENT);
2452 	ASSERT(delta->new_role == MDRR_SELF);
2453 	ASSERT(md_getminor(delta->dev) == rtxnp->to.mnum);
2454 
2455 	un = (mt_unit_t *)delta->unp;
2456 
2457 	ASSERT(md_getmajor(un->un_m_dev) == md_major);
2458 
2459 	to_min		= rtxnp->to.mnum;
2460 	from_min	= rtxnp->from.mnum;
2461 	master_min	= md_getminor(un->un_m_dev);
2462 	log_min		= md_getminor(un->un_l_dev);
2463 
2464 	/*
2465 	 * both mechanisms for identifying a trans must be updated
2466 	 */
2467 
2468 	MD_SID(delta->unp) = from_min;
2469 	un->un_dev = makedevice(md_major, from_min);
2470 
2471 	/*
2472 	 * parent identifier need not change
2473 	 */
2474 
2475 	/*
2476 	 * point the set array pointers at the "new" unit and unit in-cores
2477 	 * Note: the other half of this transfer is done in the "update to"
2478 	 * rename/exchange named service.
2479 	 */
2480 
2481 	MDI_VOIDUNIT(from_min) = delta->uip;
2482 	MD_VOIDUNIT(from_min) = delta->unp;
2483 
2484 	/*
2485 	 * transfer kstats
2486 	 */
2487 
2488 	delta->uip->ui_kstat = rtxnp->from.kstatp;
2489 
2490 	/*
2491 	 * the unit in-core reference to the get next link's id changes
2492 	 */
2493 
2494 	delta->uip->ui_link.ln_id = from_min;
2495 
2496 	/*
2497 	 * which one of our children is changing?
2498 	 */
2499 
2500 	/* both devices must be metadevices in order to be updated */
2501 	ASSERT(md_getmajor(un->un_m_dev) == md_major);
2502 	ASSERT(!(un->un_l_unit && (md_getmajor(un->un_l_dev) != md_major)));
2503 
2504 	if ((md_getmajor(un->un_m_dev) == md_major) &&
2505 	    (master_min == from_min)) {
2506 
2507 		/* can't be changing log and master */
2508 		ASSERT(!(un->un_l_unit && (log_min == to_min)));
2509 
2510 		un->un_m_dev = makedevice(md_major, to_min);
2511 		sv.key = un->un_m_key;
2512 		un->un_m_key = rtxnp->to.key;
2513 
2514 	} else if (un->un_l_unit &&
2515 	    ((md_getmajor(un->un_l_dev) == md_major) && log_min == to_min)) {
2516 
2517 		/* can't be changing log and master */
2518 		ASSERT(master_min != from_min);
2519 
2520 		un->un_l_dev = makedevice(md_major, to_min);
2521 		sv.key = un->un_l_key;
2522 		un->un_l_key = rtxnp->to.key;
2523 
2524 	} else {
2525 		ASSERT(FALSE);
2526 		panic("trans_exchange_parent_update_to: not a metadevice");
2527 		/*NOTREACHED*/
2528 	}
2529 
2530 	/*
2531 	 * delete the key for the changed child from the namespace
2532 	 */
2533 
2534 	sv.setno = MD_MIN2SET(from_min);
2535 	md_rem_names(&sv, 1);
2536 
2537 	/*
2538 	 * and store the record id (from the unit struct) into recids
2539 	 */
2540 
2541 	md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp);
2542 }
2543 
2544 /*
2545  * MDRNM_LIST_URKIDS: named svc entry point
2546  * all all delta entries appropriate for our children onto the
2547  * deltalist pointd to by dlpp
2548  */
2549 int
trans_rename_listkids(md_rendelta_t ** dlpp,md_rentxn_t * rtxnp)2550 trans_rename_listkids(
2551 	md_rendelta_t	**dlpp,
2552 	md_rentxn_t	 *rtxnp)
2553 {
2554 	minor_t		 from_min, to_min, master_min, log_min;
2555 	mt_unit_t	*from_un;
2556 	md_rendelta_t	*new, *p;
2557 	int		 n_children;
2558 
2559 	ASSERT(rtxnp);
2560 	ASSERT(dlpp);
2561 	ASSERT((rtxnp->op == MDRNOP_EXCHANGE) || (rtxnp->op == MDRNOP_RENAME));
2562 
2563 	from_min = rtxnp->from.mnum;
2564 	to_min = rtxnp->to.mnum;
2565 	n_children = 0;
2566 
2567 	if (!MDI_UNIT(from_min) || !(from_un = MD_UNIT(from_min))) {
2568 		(void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP, from_min);
2569 		return (-1);
2570 	}
2571 
2572 	for (p = *dlpp; p && p->next != NULL; p = p->next) {
2573 		/* NULL */
2574 	}
2575 
2576 	if (md_getmajor(from_un->un_m_dev) == md_major) {
2577 
2578 		master_min = md_getminor(from_un->un_m_dev);
2579 
2580 		p = new = md_build_rendelta(MDRR_CHILD,
2581 		    to_min == master_min? MDRR_SELF: MDRR_CHILD,
2582 		    from_un->un_m_dev, p, MD_UNIT(master_min),
2583 		    MDI_UNIT(master_min), &rtxnp->mde);
2584 
2585 		if (!new) {
2586 			if (mdisok(&rtxnp->mde)) {
2587 				(void) mdsyserror(&rtxnp->mde, ENOMEM);
2588 			}
2589 			return (-1);
2590 		}
2591 		++n_children;
2592 	}
2593 
2594 	if (from_un->un_l_unit &&
2595 	    (md_getmajor(from_un->un_l_dev) == md_major)) {
2596 
2597 		log_min = md_getminor(from_un->un_l_dev);
2598 
2599 		new = md_build_rendelta(MDRR_CHILD,
2600 		    to_min == log_min? MDRR_SELF: MDRR_CHILD,
2601 		    from_un->un_l_dev, p, MD_UNIT(log_min),
2602 		    MDI_UNIT(log_min), &rtxnp->mde);
2603 		if (!new) {
2604 			if (mdisok(&rtxnp->mde)) {
2605 				(void) mdsyserror(&rtxnp->mde, ENOMEM);
2606 			}
2607 			return (-1);
2608 		}
2609 		++n_children;
2610 	}
2611 
2612 	return (n_children);
2613 }
2614 
2615 /*
2616  * support routine for MDRNM_CHECK
2617  */
2618 static int
trans_may_renexch_self(mt_unit_t * un,mdi_unit_t * ui,md_rentxn_t * rtxnp)2619 trans_may_renexch_self(
2620 	mt_unit_t	*un,
2621 	mdi_unit_t	*ui,
2622 	md_rentxn_t	*rtxnp)
2623 {
2624 	minor_t			from_min;
2625 	minor_t			to_min;
2626 
2627 	ASSERT(rtxnp);
2628 	ASSERT((rtxnp->op == MDRNOP_RENAME) || (rtxnp->op == MDRNOP_EXCHANGE));
2629 
2630 	from_min = rtxnp->from.mnum;
2631 	to_min	 = rtxnp->to.mnum;
2632 
2633 	if (!un || !ui) {
2634 		(void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR,
2635 		    from_min);
2636 		return (EINVAL);
2637 	}
2638 
2639 	ASSERT(MD_CAPAB(un) & MD_CAN_META_CHILD);
2640 
2641 	if (!(MD_CAPAB(un) & MD_CAN_META_CHILD)) {
2642 		(void) mdmderror(&rtxnp->mde, MDE_RENAME_SOURCE_BAD, from_min);
2643 		return (EINVAL);
2644 	}
2645 
2646 	if (MD_PARENT(un) == MD_MULTI_PARENT) {
2647 		(void) mdmderror(&rtxnp->mde, MDE_RENAME_SOURCE_BAD, from_min);
2648 		return (EINVAL);
2649 	}
2650 
2651 	switch (rtxnp->op) {
2652 	case MDRNOP_EXCHANGE:
2653 		/*
2654 		 * may only swap with our child (master) if it is a metadevice
2655 		 */
2656 		if (md_getmajor(un->un_m_dev) != md_major) {
2657 			(void) mdmderror(&rtxnp->mde, MDE_RENAME_TARGET_BAD,
2658 			    to_min);
2659 			return (EINVAL);
2660 		}
2661 
2662 		if (un->un_l_unit &&
2663 		    (md_getmajor(un->un_l_dev) != md_major)) {
2664 
2665 			(void) mdmderror(&rtxnp->mde, MDE_RENAME_TARGET_BAD,
2666 			    to_min);
2667 			return (EINVAL);
2668 		}
2669 
2670 		if (md_getminor(un->un_m_dev) != to_min) {
2671 			(void) mdmderror(&rtxnp->mde, MDE_RENAME_TARGET_BAD,
2672 			    to_min);
2673 			return (EINVAL);
2674 		}
2675 
2676 		break;
2677 
2678 	case MDRNOP_RENAME:
2679 		break;
2680 
2681 	default:
2682 		(void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR,
2683 		    from_min);
2684 		return (EINVAL);
2685 	}
2686 
2687 	return (0);	/* ok */
2688 }
2689 
2690 /*
2691  * Named service entry point: MDRNM_CHECK
2692  */
2693 intptr_t
trans_rename_check(md_rendelta_t * delta,md_rentxn_t * rtxnp)2694 trans_rename_check(
2695 	md_rendelta_t	*delta,
2696 	md_rentxn_t	*rtxnp)
2697 {
2698 	int		 err = 0;
2699 	mt_unit_t	*un;
2700 
2701 	ASSERT(delta);
2702 	ASSERT(rtxnp);
2703 	ASSERT(delta->unp);
2704 	ASSERT(delta->uip);
2705 	ASSERT((rtxnp->op == MDRNOP_RENAME) || (rtxnp->op == MDRNOP_EXCHANGE));
2706 
2707 	if (!delta || !rtxnp || !delta->unp || !delta->uip) {
2708 		(void) mdsyserror(&rtxnp->mde, EINVAL);
2709 		return (EINVAL);
2710 	}
2711 
2712 	un = (mt_unit_t *)delta->unp;
2713 
2714 	if (rtxnp->revision == MD_RENAME_VERSION_OFFLINE) {
2715 	/*
2716 	 * trans' may not be open, if it is being modified in the exchange
2717 	 * or rename; trans-UFS hasn't been verified to handle the change
2718 	 * out from underneath it.
2719 	 */
2720 		if ((md_unit_isopen(delta->uip)) &&
2721 		    ((md_getminor(delta->dev) == rtxnp->from.mnum) ||
2722 		    (md_getminor(delta->dev) == rtxnp->to.mnum))) {
2723 			(void) mdmderror(&rtxnp->mde,
2724 			    MDE_RENAME_BUSY, rtxnp->from.mnum);
2725 			return (EBUSY);
2726 		}
2727 	}
2728 
2729 	/*
2730 	 * can't rename or exchange with a log attached
2731 	 */
2732 
2733 	if (un->un_l_unit) {
2734 		(void) mdmderror(&rtxnp->mde,
2735 		    MDE_RENAME_BUSY, rtxnp->from.mnum);
2736 		return (EBUSY);
2737 	}
2738 
2739 	switch (delta->old_role) {
2740 	case MDRR_SELF:
2741 		/*
2742 		 * self does additional checks
2743 		 */
2744 		err = trans_may_renexch_self((mt_unit_t *)delta->unp,
2745 		    delta->uip, rtxnp);
2746 		if (err != 0) {
2747 			goto out;
2748 		}
2749 		/* FALLTHROUGH */
2750 
2751 	case MDRR_PARENT:
2752 		/*
2753 		 * top_is_trans is only used to check for online
2754 		 * rename/exchange when MD_RENAME_VERSION == OFFLINE
2755 		 * since trans holds the sub-devices open
2756 		 */
2757 		rtxnp->stat.trans_in_stack = TRUE;
2758 		break;
2759 	default:
2760 		break;
2761 	}
2762 out:
2763 	return (err);
2764 }
2765 
2766 /* end of rename/exchange */
2767