1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #include <sys/param.h>
28 #include <sys/systm.h>
29 #include <sys/conf.h>
30 #include <sys/file.h>
31 #include <sys/user.h>
32 #include <sys/uio.h>
33 #include <sys/t_lock.h>
34 #include <sys/dkio.h>
35 #include <sys/vtoc.h>
36 #include <sys/kmem.h>
37 #include <vm/page.h>
38 #include <sys/cmn_err.h>
39 #include <sys/sysmacros.h>
40 #include <sys/types.h>
41 #include <sys/mkdev.h>
42 #include <sys/stat.h>
43 #include <sys/open.h>
44 #include <sys/lvm/md_trans.h>
45 #include <sys/modctl.h>
46 #include <sys/ddi.h>
47 #include <sys/sunddi.h>
48 #include <sys/debug.h>
49 #include <sys/filio.h>
50 #include <sys/lvm/md_notify.h>
51 #include <sys/callb.h>
52 #include <sys/disp.h>
53
54 #include <sys/sysevent/eventdefs.h>
55 #include <sys/sysevent/svm.h>
56
57 extern int md_status;
58 extern unit_t md_nunits;
59 extern set_t md_nsets;
60 extern md_set_t md_set[];
61 extern md_ops_t trans_md_ops;
62 extern md_krwlock_t md_unit_array_rw;
63 extern uint_t mt_debug;
64
65 extern major_t md_major;
66
67 static mt_unit_t *
trans_getun(minor_t mnum,md_error_t * mde,int flags,IOLOCK * lock)68 trans_getun(minor_t mnum, md_error_t *mde, int flags, IOLOCK *lock)
69 {
70 mt_unit_t *un;
71 mdi_unit_t *ui;
72 set_t setno = MD_MIN2SET(mnum);
73
74 if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits)) {
75 (void) mdmderror(mde, MDE_INVAL_UNIT, mnum);
76 return (NULL);
77 }
78
79 if (! (flags & STALE_OK)) {
80 if (md_get_setstatus(setno) & MD_SET_STALE) {
81 (void) mdmddberror(mde, MDE_DB_STALE, mnum, setno);
82 return (NULL);
83 }
84 }
85
86 ui = MDI_UNIT(mnum);
87 if (flags & NO_OLD) {
88 if (ui != NULL) {
89 (void) mdmderror(mde, MDE_UNIT_ALREADY_SETUP, mnum);
90 return (NULL);
91 }
92 return ((mt_unit_t *)1);
93 }
94
95 if (ui == NULL) {
96 (void) mdmderror(mde, MDE_UNIT_NOT_SETUP, mnum);
97 return (NULL);
98 }
99
100 if (flags & ARRAY_WRITER)
101 md_array_writer(lock);
102 else if (flags & ARRAY_READER)
103 md_array_reader(lock);
104
105 if (!(flags & NO_LOCK)) {
106 if (flags & WR_LOCK)
107 (void) md_ioctl_writerlock(lock, ui);
108 else /* RD_LOCK */
109 (void) md_ioctl_readerlock(lock, ui);
110 }
111 un = (mt_unit_t *)MD_UNIT(mnum);
112
113 if (un->c.un_type != MD_METATRANS) {
114 (void) mdmderror(mde, MDE_NOT_MT, mnum);
115 return (NULL);
116 }
117
118 return (un);
119 }
120
121 #ifdef DEBUG
122 /*
123 * DEBUG ROUTINES
124 * THESE ROUTINES ARE ONLY USED WHEN ASSERTS ARE ENABLED
125 */
126
127 extern int (*mdv_strategy_tstpnt)(buf_t *, int, void*);
128
129 /*
130 * return the global stats struct
131 */
132 static int
trans_get_transstats(void * d,int mode)133 trans_get_transstats(void *d, int mode)
134 {
135 md_i_get_t *migp = d;
136
137 mdclrerror(&migp->mde);
138
139 if (migp->size == 0) {
140 migp->size = sizeof (struct transstats);
141 return (0);
142 }
143
144 if (migp->size < sizeof (struct transstats))
145 return (EFAULT);
146
147 if (ddi_copyout(&transstats, (caddr_t)(uintptr_t)migp->mdp,
148 sizeof (struct transstats), mode))
149 return (EFAULT);
150 return (0);
151 }
152
153 /*
154 * test ioctls
155 */
156 /*
157 * TEST TRYGETBLK
158 */
159 /*ARGSUSED1*/
160 static int
trans_test_trygetblk(void * d,int mode,IOLOCK * lock)161 trans_test_trygetblk(void *d, int mode, IOLOCK *lock)
162 {
163 mt_unit_t *un;
164 int test;
165 dev_t dev;
166 struct buf *bp;
167 struct buf *trygetblk();
168
169 md_i_get_t *migp = d;
170
171 mdclrerror(&migp->mde);
172 migp->size = 0;
173
174 un = trans_getun(migp->id, &migp->mde,
175 RD_LOCK, lock);
176 if (un == NULL)
177 return (EINVAL);
178
179 dev = un->un_m_dev;
180
181 /*
182 * test 1 -- don't find nonexistant buf
183 */
184 test = 1;
185 if (bp = trygetblk(dev, 0))
186 goto errout;
187
188 /*
189 * test 2 - don't find stale buf
190 */
191 test = 2;
192 if ((bp = getblk(dev, 0, DEV_BSIZE)) == NULL)
193 goto errout;
194 bp->b_flags |= (B_STALE|B_DONE);
195 brelse(bp);
196 if (bp = trygetblk(dev, 0))
197 goto errout;
198
199 /*
200 * test 3 -- don't find busy buf
201 */
202 test = 3;
203 if ((bp = getblk(dev, 0, DEV_BSIZE)) == NULL)
204 goto errout;
205 if (trygetblk(dev, 0))
206 goto errout;
207 bp->b_flags |= B_STALE;
208 brelse(bp);
209
210 /*
211 * test 4 -- don't find not-done buf
212 */
213 test = 4;
214 if ((bp = getblk(dev, 0, DEV_BSIZE)) == NULL)
215 goto errout;
216 brelse(bp);
217 if (bp = trygetblk(dev, 0))
218 goto errout;
219
220 /*
221 * test 5 -- find an idle buf
222 */
223 test = 5;
224 if ((bp = bread(dev, 0, DEV_BSIZE)) == NULL)
225 goto errout;
226 brelse(bp);
227 if ((bp = trygetblk(dev, 0)) == NULL)
228 goto errout;
229 bp->b_flags |= B_STALE;
230 brelse(bp);
231 bp = 0;
232
233 test = 0; /* no test failed */
234 errout:
235 if (bp) {
236 bp->b_flags |= B_STALE;
237 brelse(bp);
238 }
239 migp->size = test;
240 if (test)
241 return (EINVAL);
242 return (0);
243 }
244 /*
245 * TEST TRYGETPAGE
246 */
247 static page_t *
trans_trypage(struct vnode * vp,uint_t off)248 trans_trypage(struct vnode *vp, uint_t off)
249 {
250 page_t *pp;
251
252 /*
253 * get a locked page
254 */
255 if ((pp = page_lookup_nowait(vp, off, SE_EXCL)) == NULL)
256 return (NULL);
257 /*
258 * get the iolock
259 */
260 if (!page_io_trylock(pp)) {
261 page_unlock(pp);
262 return (NULL);
263 }
264 return (pp);
265 }
266
267 /*ARGSUSED1*/
268 static int
trans_test_trypage(void * d,int mode,IOLOCK * lock)269 trans_test_trypage(void *d, int mode, IOLOCK *lock)
270 {
271 mt_unit_t *un;
272 int test;
273 dev_t dev;
274 struct page *pp;
275 struct vnode *devvp;
276 struct vnode *cvp;
277 extern struct vnode *common_specvp(struct vnode *);
278 extern void pvn_io_done(struct page *);
279
280 md_i_get_t *migp = d;
281
282 mdclrerror(&migp->mde);
283 migp->size = 0;
284
285 un = trans_getun(migp->id, &migp->mde,
286 RD_LOCK, lock);
287 if (un == NULL)
288 return (EINVAL);
289
290 dev = un->un_m_dev;
291 devvp = makespecvp(dev, VBLK);
292 cvp = common_specvp(devvp);
293
294 /*
295 * get rid of the devices pages
296 */
297 (void) VOP_PUTPAGE(cvp, (offset_t)0, (uint_t)0, B_INVAL, CRED(), NULL);
298
299 /*
300 * test 1 -- don't find nonexistant page
301 */
302 test = 1;
303 if (pp = trans_trypage(cvp, 0))
304 goto errout;
305
306 /*
307 * test 2 -- don't find busy page
308 */
309 test = 2;
310 if ((pp = page_create(cvp, 0, 1, PG_WAIT)) == NULL)
311 goto errout;
312 if (trans_trypage(cvp, 0))
313 goto errout;
314 pvn_io_done(pp);
315 pp = 0;
316
317 /*
318 * test 3 - find an idle page
319 */
320 test = 3;
321 if ((pp = page_create(cvp, 0, 1, PG_WAIT)) == NULL)
322 goto errout;
323 pvn_io_done(pp);
324 if ((pp = trans_trypage(cvp, 0)) == NULL)
325 goto errout;
326 pvn_io_done(pp);
327 pp = 0;
328
329 test = 0; /* no test failed */
330 errout:
331 if (pp)
332 pvn_io_done(pp);
333 /*
334 * get rid of the file's pages
335 */
336 (void) VOP_PUTPAGE(cvp, (offset_t)0, (uint_t)0, B_INVAL, CRED(), NULL);
337 VN_RELE(devvp);
338
339 migp->size = test;
340 if (test)
341 return (EINVAL);
342 return (0);
343 }
344 /*
345 * TEST TSD
346 */
347 #define NKEYS (7)
348 #define NTSDTHREADS (3)
349 struct tothread {
350 int test;
351 int error;
352 int exits;
353 int step;
354 kmutex_t lock;
355 kcondvar_t cv;
356 };
357 static uint_t keys[NKEYS];
358 static struct tothread tta[NTSDTHREADS];
359 static int allocatorvalue;
360 static int okdestructoralloc;
361
362 static void
trans_test_stepwait(struct tothread * tp,int step)363 trans_test_stepwait(struct tothread *tp, int step)
364 {
365 /*
366 * wait for other thread
367 */
368 mutex_enter(&tp->lock);
369 while (tp->step < step)
370 cv_wait(&tp->cv, &tp->lock);
371 mutex_exit(&tp->lock);
372 }
373
374 static void
trans_test_step(struct tothread * tp,int step)375 trans_test_step(struct tothread *tp, int step)
376 {
377 /*
378 * wakeup other threads
379 */
380 mutex_enter(&tp->lock);
381 tp->step = step;
382 cv_broadcast(&tp->cv);
383 mutex_exit(&tp->lock);
384 }
385
386 static void
trans_test_destructor(void * voidp)387 trans_test_destructor(void *voidp)
388 {
389 int exits;
390 struct tothread *tp = voidp;
391
392 /*
393 * check that threads clean up *all* TSD at exit
394 */
395 mutex_enter(&tp->lock);
396 exits = ++tp->exits;
397 mutex_exit(&tp->lock);
398 if (exits >= NKEYS)
399 trans_test_step(tp, 3);
400 }
401
402 static void
trans_test_destructor_alloc(void * voidp)403 trans_test_destructor_alloc(void *voidp)
404 {
405 int *value = voidp;
406
407 okdestructoralloc = 0;
408 if (value) {
409 if (*value == allocatorvalue)
410 okdestructoralloc = 1;
411 md_trans_free((caddr_t)value, sizeof (value));
412 }
413 }
414
415 static void *
trans_test_allocator(void)416 trans_test_allocator(void)
417 {
418 int *value;
419
420 value = (int *)md_trans_zalloc(sizeof (value));
421 *value = allocatorvalue;
422 return ((void *)value);
423 }
424
425 /*
426 * thread used to test TSD destroy functionality
427 */
428 static void
trans_test_thread(struct tothread * tp)429 trans_test_thread(struct tothread *tp)
430 {
431 int i;
432 callb_cpr_t cprinfo;
433
434 /*
435 * Register cpr callback
436 */
437 CALLB_CPR_INIT(&cprinfo, &tp->lock, callb_generic_cpr,
438 "trans_test_thread");
439
440 /*
441 * get some TSD
442 */
443 for (i = NKEYS - 1; i >= 0; --i)
444 if (tsd_set(keys[i], tp)) {
445 tp->error = 500;
446 goto errout;
447 }
448 /*
449 * tell parent that we have TSD
450 */
451 trans_test_step(tp, 1);
452
453 /*
454 * wait for parent to destroy some of our TSD
455 */
456 trans_test_stepwait(tp, 2);
457
458 /*
459 * make sure that the appropriate TSD was destroyed
460 */
461 if ((tsd_get(keys[0]) != NULL) ||
462 (tsd_get(keys[NKEYS-1]) != NULL) ||
463 (tsd_get(keys[NKEYS>>1]) != NULL)) {
464 tp->error = 510;
465 goto errout;
466 }
467 for (i = 0; i < NKEYS; ++i)
468 if (tsd_get(keys[i]) != tp)
469 if (i != 0 && i != NKEYS - 1 && i != NKEYS >> 1) {
470 tp->error = 520;
471 goto errout;
472 }
473
474 /*
475 * set up cpr exit
476 */
477 mutex_enter(&tp->lock);
478 CALLB_CPR_EXIT(&cprinfo);
479 thread_exit();
480 errout:
481 /*
482 * error -- make sure the parent will wake up (error code in tp)
483 */
484 trans_test_step(tp, 3);
485
486 /*
487 * set up cpr exit
488 */
489 mutex_enter(&tp->lock);
490 CALLB_CPR_EXIT(&cprinfo);
491 thread_exit();
492 }
493
494 static void
trans_test_threadcreate(struct tothread * tp)495 trans_test_threadcreate(struct tothread *tp)
496 {
497 /*
498 * initialize the per thread struct and make a thread
499 */
500 bzero((caddr_t)tp, sizeof (struct tothread));
501
502 mutex_init(&tp->lock, NULL, MUTEX_DEFAULT, NULL);
503 cv_init(&tp->cv, NULL, CV_DEFAULT, NULL);
504
505 (void) thread_create(NULL, 0, trans_test_thread, tp, 0, &p0,
506 TS_RUN, minclsyspri);
507 }
508 /*
509 * driver for TSD tests -- *NOT REENTRANT*
510 */
511 /*ARGSUSED1*/
512 static int
trans_test_tsd(void * d,int mode)513 trans_test_tsd(void *d, int mode)
514 {
515 int test;
516 uint_t rekeys[NKEYS];
517 int i;
518 uint_t key;
519 int error;
520
521 md_i_get_t *migp = d;
522
523 mdclrerror(&migp->mde);
524 migp->size = 0;
525
526 /*
527 * destroy old keys, if any
528 */
529 for (i = 0; i < NKEYS; ++i)
530 tsd_destroy(&keys[i]);
531 /*
532 * test 1 -- simple create and destroy keys tests
533 */
534 test = 1;
535 error = 0;
536 for (i = 0; i < NKEYS; ++i) {
537 tsd_create(&keys[i], NULL);
538
539 /* get with no set should return NULL */
540 if (tsd_get(keys[i]) != NULL) {
541 error = 100;
542 goto errout;
543 }
544
545 /* destroyed key should be 0 */
546 key = keys[i];
547 tsd_destroy(&keys[i]);
548 if (keys[i]) {
549 error = 110;
550 goto errout;
551 }
552
553 /* destroy the key twice */
554 keys[i] = key;
555 tsd_destroy(&keys[i]);
556
557 /* destroyed key should be 0 */
558 if (keys[i]) {
559 error = 120;
560 goto errout;
561 }
562
563 /* getting a destroyed key should return NULL */
564 if (tsd_get(keys[i]) != NULL) {
565 error = 130;
566 goto errout;
567 }
568 /* recreate the key */
569 tsd_create(&keys[i], NULL);
570
571 /* should be the same key as before */
572 if (key != keys[i]) {
573 error = 140;
574 goto errout;
575 }
576
577 /* initial value should be NULL */
578 if (tsd_get(keys[i]) != NULL) {
579 error = 150;
580 goto errout;
581 }
582
583 /* cleanup */
584 tsd_destroy(&keys[i]);
585 }
586
587 /*
588 * test 2 -- recreate keys
589 */
590 test = 2;
591 error = 0;
592 for (i = 0; i < NKEYS; ++i)
593 tsd_create(&keys[i], NULL);
594 for (i = 0; i < NKEYS; ++i) {
595 /* make sure the keys were created */
596 if (keys[i] == 0) {
597 error = 200;
598 goto errout;
599 }
600
601 /* make sure that recreating key doesn't change it */
602 rekeys[i] = keys[i];
603 tsd_create(&rekeys[i], NULL);
604 if (rekeys[i] != keys[i]) {
605 error = 210;
606 goto errout;
607 }
608 }
609 for (i = 0; i < NKEYS; ++i)
610 tsd_destroy(&keys[i]);
611
612 /*
613 * test 3 -- check processing for unset and destroyed keys
614 */
615 test = 3;
616 error = 0;
617
618 /* getting a 0 key returns NULL */
619 if (tsd_get(0) != NULL) {
620 error = 300;
621 goto errout;
622 }
623
624 /* setting a 0 key returns error */
625 if (tsd_set(0, NULL) != EINVAL) {
626 error = 310;
627 goto errout;
628 }
629 tsd_create(&key, NULL);
630
631 /* setting a created key returns no error */
632 if (tsd_set(key, NULL) == EINVAL) {
633 error = 320;
634 goto errout;
635 }
636 tsd_destroy(&key);
637
638 /* setting a destroyed key returns error */
639 if (tsd_set(key, NULL) != EINVAL) {
640 error = 330;
641 goto errout;
642 }
643
644 /*
645 * test 4 -- make sure that set and get work
646 */
647 test = 4;
648 error = 0;
649
650 for (i = 0; i < NKEYS; ++i) {
651 tsd_create(&keys[i], NULL);
652
653 /* set a value */
654 (void) tsd_set(keys[i], &key);
655
656 /* get the value */
657 if (tsd_get(keys[i]) != &key) {
658 error = 400;
659 goto errout;
660 }
661
662 /* set the value to NULL */
663 (void) tsd_set(keys[i], NULL);
664
665 /* get the NULL */
666 if (tsd_get(keys[i]) != NULL) {
667 error = 410;
668 goto errout;
669 }
670 }
671 /* cleanup */
672 for (i = 0; i < NKEYS; ++i)
673 tsd_destroy(&keys[i]);
674
675 /*
676 * test 5 -- destroying keys w/multiple threads
677 */
678 test = 5;
679 error = 0;
680
681 /* create the keys */
682 for (i = 0; i < NKEYS; ++i)
683 tsd_create(&keys[i], trans_test_destructor);
684
685 /* create some threads */
686 for (i = 0; i < NTSDTHREADS; ++i)
687 trans_test_threadcreate(&tta[i]);
688
689 /* wait for the threads to assign TSD */
690 for (i = 0; i < NTSDTHREADS; ++i)
691 trans_test_stepwait(&tta[i], 1);
692
693 /* destroy some of the keys */
694 tsd_destroy(&keys[0]);
695 tsd_destroy(&keys[NKEYS - 1]);
696 tsd_destroy(&keys[NKEYS >> 1]);
697 tsd_destroy(&keys[NKEYS >> 1]);
698
699 /* wakeup the threads -- they check that the destroy took */
700 for (i = 0; i < NTSDTHREADS; ++i)
701 trans_test_step(&tta[i], 2);
702
703 /* wait for the threads to exit (also checks for TSD cleanup) */
704 for (i = 0; i < NTSDTHREADS; ++i)
705 trans_test_stepwait(&tta[i], 3);
706
707 /* destroy the rest of the keys */
708 for (i = 0; i < NKEYS; ++i)
709 tsd_destroy(&keys[i]);
710
711 /* check for error */
712 for (i = 0; i < NTSDTHREADS; ++i) {
713 if (!error)
714 error = tta[i].error;
715 mutex_destroy(&tta[i].lock);
716 cv_destroy(&tta[i].cv);
717 }
718
719 /*
720 * test 6 -- test getcreate
721 */
722 test = 6;
723 error = 0;
724
725 /* make sure the keys are destroyed */
726 for (i = 0; i < NKEYS; ++i)
727 tsd_destroy(&keys[i]);
728
729 /* get w/create */
730 for (i = 0; i < NKEYS; ++i) {
731 allocatorvalue = i;
732 if (*(int *)tsd_getcreate(&keys[i], trans_test_destructor_alloc,
733 trans_test_allocator) != allocatorvalue) {
734 error = 600;
735 goto errout;
736 }
737 }
738 for (i = 0; i < NKEYS; ++i) {
739 allocatorvalue = i;
740 if (*(int *)tsd_get(keys[i]) != allocatorvalue) {
741 error = 610;
742 goto errout;
743 }
744 }
745 /* make sure destructor gets called when we destroy the keys */
746 for (i = 0; i < NKEYS; ++i) {
747 allocatorvalue = i;
748 okdestructoralloc = 0;
749 tsd_destroy(&keys[i]);
750 if (okdestructoralloc == 0) {
751 error = 620;
752 goto errout;
753 }
754 }
755
756 errout:
757 /* make sure the keys are destroyed */
758 for (i = 0; i < NKEYS; ++i)
759 tsd_destroy(&keys[i]);
760
761 /* return test # and error code (if any) */
762 migp->size = test;
763 return (error);
764 }
765
766 /*
767 * Error Injection Structures, Data, and Functions:
768 *
769 * Error injection is used to test the Harpy error recovery system. The
770 * MD_IOC_INJECTERRORS ioctl is used to start or continue error injection on a
771 * unit, and MD_IOC_STOPERRORS turns it off. An mt_error structure is
772 * associated with every trans device for which we are injecting errors. When
773 * MD_IOC_INJECTERRORS is issued, mdv_strategy_tstpnt is set to point to
774 * trans_error_injector(), so that it gets called for every MDD I/O operation.
775 *
776 * The trans unit can be in one of three states:
777 *
778 * count down - Each I/O causes er_count_down to be decremented.
779 * When er_count_down reaches 0, an error is injected,
780 * the block number is remembered. Without makeing
781 * special provisions, the log area would receive a
782 * small percentage of the injected errors. Thus,
783 * trans_check_error() will be written, so that every
784 * other error is injected on the log.
785 *
786 * suspend - No errors are generated and the counters are not
787 * modified. This is so that fsck/mkfs can do their thing
788 * (we're not testing them) and so that the test script can
789 * set up another test. The transition back to the count
790 * down state occurs when MD_IOC_INJECTERRORS is invoked
791 * again.
792 */
793
794 typedef enum {
795 mte_count_down,
796 mte_suspend,
797 mte_watch_block
798 } mte_state;
799
800 typedef struct mt_error {
801 struct mt_error *er_next; /* next error unit in list. */
802 mte_state er_state;
803 mt_unit_t *er_unitp; /* unit to force errors on. */
804 size_t er_count_down; /* i/o transactions until error. */
805 size_t er_increment; /* increment for reset_count. */
806 size_t er_reset_count; /* used to reset er_count_down */
807 size_t er_total_errors; /* count generated errors. */
808 /* Following fields describe error we are injecting. */
809 dev_t er_bad_unit; /* Unit associated with block in */
810 /* error. */
811 off_t er_bad_block; /* Block in error. */
812 } mt_error_t;
813
814 #define ERROR_INCREMENT (1)
815 #define INITIAL_COUNT (1)
816
817 static int default_increment = ERROR_INCREMENT;
818 static kmutex_t error_mutex; /* protects error_list */
819 static mt_error_t error_list_head;
820 static int initial_count = INITIAL_COUNT;
821 static int (*tstpnt_save)(buf_t *, int, void*) = NULL;
822
823 static mt_error_t *
find_by_mtunit(mt_unit_t * un,mt_error_t ** pred_errp)824 find_by_mtunit(mt_unit_t *un, mt_error_t **pred_errp)
825 {
826 mt_error_t *errp = (mt_error_t *)NULL;
827
828 ASSERT(mutex_owned(&error_mutex) != 0);
829 *pred_errp = &error_list_head;
830 while ((errp = (*pred_errp)->er_next) != (mt_error_t *)NULL) {
831 if (errp->er_unitp == un)
832 break;
833 *pred_errp = errp;
834 }
835 return (errp);
836 }
837
838 static mt_error_t *
find_by_dev(md_dev64_t dev)839 find_by_dev(md_dev64_t dev)
840 {
841 mt_error_t *errp = &error_list_head;
842
843 ASSERT(mutex_owned(&error_mutex) != 0);
844 while ((errp = errp->er_next) != (mt_error_t *)NULL) {
845 if ((errp->er_unitp->un_m_dev == dev) ||
846 (errp->er_unitp->un_l_dev == dev))
847 break;
848 }
849 return (errp);
850 }
851
852 static int
trans_check_error(buf_t * bp,mt_error_t * errp)853 trans_check_error(buf_t *bp, mt_error_t *errp)
854 {
855 int rv = 0;
856 md_dev64_t target = md_expldev(bp->b_edev);
857
858 ASSERT(mutex_owned(&error_mutex) != 0);
859 switch (errp->er_state) {
860 case mte_count_down:
861 errp->er_count_down--;
862 if (errp->er_count_down == 0) {
863 /*
864 * Every other error that we inject should be on
865 * the log device. Errors will be injected on the
866 * log device when errp->er_total_errors is even
867 * and on the master device when it is odd. If
868 * this I/O is not for the appropriate device, we
869 * will set errp->er_count_down to 1, so that we
870 * can try again later.
871 */
872 if ((((errp->er_total_errors % 2) == 0) &&
873 (errp->er_unitp->un_l_dev == target)) ||
874 (((errp->er_total_errors % 2) != 0) &&
875 (errp->er_unitp->un_m_dev == target))) {
876 /* simulate an error */
877 bp->b_flags |= B_ERROR;
878 bp->b_error = EIO;
879 /* remember the error. */
880 errp->er_total_errors++;
881 errp->er_bad_unit = bp->b_edev;
882 errp->er_bad_block = bp->b_blkno;
883 /* reset counters. */
884 errp->er_count_down = errp->er_reset_count;
885 errp->er_reset_count += errp->er_increment;
886 rv = 1;
887 } else {
888 /* Try again next time. */
889 errp->er_count_down = 1;
890 }
891 }
892 break;
893
894 case mte_suspend:
895 /* No errors while suspended. */
896 break;
897
898 case mte_watch_block:
899 if ((bp->b_edev == errp->er_bad_unit) &&
900 (bp->b_blkno == errp->er_bad_block)) {
901 bp->b_flags |= B_ERROR;
902 bp->b_error = EIO;
903 rv = 1;
904 }
905 break;
906 }
907 return (rv);
908 }
909
910 static int
trans_error_injector(buf_t * bp,int flag,void * private)911 trans_error_injector(buf_t *bp, int flag, void* private)
912 {
913 mt_error_t *errp = (mt_error_t *)NULL;
914 int (*tstpnt)(buf_t *, int, void*) = NULL;
915 int rv = 0;
916 md_dev64_t target = md_expldev(bp->b_edev);
917 int trv = 0;
918 mt_unit_t *un;
919
920 mutex_enter(&error_mutex);
921 errp = find_by_dev(target);
922 if (errp != (mt_error_t *)NULL) {
923 un = errp->er_unitp;
924 if (target == un->un_m_dev) {
925 /* Target is our master device. */
926 rv = trans_check_error(bp, errp);
927 }
928 if (target == un->un_l_dev) {
929 /*
930 * Target is our log device. Unfortunately, the same
931 * device may also be used for the MDD database.
932 * Therefore, we need to make sure that the I/O is for
933 * the range of blocks designated as our log.
934 */
935 if ((bp->b_blkno >= un->un_l_pwsblk) &&
936 ((bp->b_blkno + btodb(bp->b_bcount)) <=
937 (un->un_l_sblk + un->un_l_tblks))) {
938 rv = trans_check_error(bp, errp);
939 }
940 }
941 }
942 tstpnt = tstpnt_save;
943 mutex_exit(&error_mutex);
944
945 if (tstpnt != NULL)
946 trv = (*tstpnt)(bp, flag, private);
947
948 /*
949 * If we are producing an error (rv != 0) we need to make sure that
950 * biodone gets called. If the tstpnt returned non-zero,
951 * we'll assume that it called biodone.
952 */
953 if ((rv != 0) && (trv == 0)) {
954 md_biodone(bp);
955 }
956 rv = ((rv == 0) && (trv == 0)) ? 0 : 1;
957 return (rv);
958 }
959
960 /*
961 * Prepare to inject errors on the master and log devices associated with the
962 * unit specified in migp. The first time that trans_inject_errors() is called
963 * for a unit, an mt_error_t structure is allocated and initialized for the
964 * unit. Subsequent calls for the unit will just insure that the unit is in the
965 * count down state.
966 *
967 * If an mt_error structure is allocated and it is the first one to be put in
968 * the list, mdv_strategy_tstpnt (which is referenced in md_call_strategy()) is
969 * set to trans_error_injector so that it will be called to see if an I/O
970 * request should be treated as an error.
971 */
972
973 /*ARGSUSED1*/
974 static int
trans_inject_errors(void * d,int mode,IOLOCK * lock)975 trans_inject_errors(void *d, int mode, IOLOCK *lock)
976 {
977 mt_error_t *errp;
978 mt_error_t *do_not_care;
979 mt_unit_t *un;
980 int rv = 0;
981
982 md_i_get_t *migp = d;
983
984 mdclrerror(&migp->mde);
985
986 un = trans_getun(migp->id, &migp->mde,
987 RD_LOCK, lock);
988 if (un == NULL)
989 return (EINVAL);
990
991 /*
992 * If there is already a an error structure for the unit make sure that
993 * it is in count down mode.
994 */
995
996 mutex_enter(&error_mutex);
997 errp = find_by_mtunit(un, &do_not_care);
998 if (errp != (mt_error_t *)NULL) {
999 errp->er_state = mte_count_down;
1000 } else {
1001
1002 /*
1003 * Initialize error structure.
1004 */
1005
1006 errp = (mt_error_t *)md_trans_zalloc(sizeof (mt_error_t));
1007 errp->er_state = mte_count_down;
1008 errp->er_unitp = un;
1009 errp->er_count_down = initial_count;
1010 errp->er_increment = default_increment;
1011 errp->er_reset_count = initial_count;
1012 errp->er_total_errors = 0;
1013 errp->er_bad_unit = 0;
1014 errp->er_bad_block = 0;
1015
1016 /* Insert it into the list. */
1017
1018 errp->er_next = error_list_head.er_next;
1019 error_list_head.er_next = errp;
1020
1021 /*
1022 * Set up md_call_strategy to call our error injector.
1023 */
1024
1025 if (mdv_strategy_tstpnt != trans_error_injector) {
1026 tstpnt_save = mdv_strategy_tstpnt;
1027 mdv_strategy_tstpnt = trans_error_injector;
1028 }
1029 }
1030 mutex_exit(&error_mutex);
1031 return (rv);
1032 }
1033
1034 /*ARGSUSED1*/
1035 static int
trans_stop_errors(void * d,int mode,IOLOCK * lock)1036 trans_stop_errors(void *d, int mode, IOLOCK *lock)
1037 {
1038 mt_error_t *errp = (mt_error_t *)NULL;
1039 mt_error_t *pred_errp;
1040 mt_unit_t *un;
1041 int rv = 0;
1042
1043 md_i_get_t *migp = d;
1044
1045 mdclrerror(&migp->mde);
1046
1047 un = trans_getun(migp->id, &migp->mde,
1048 RD_LOCK, lock);
1049 if (un == NULL)
1050 return (EINVAL);
1051
1052 mutex_enter(&error_mutex);
1053 errp = find_by_mtunit(un, &pred_errp);
1054 if (errp != (mt_error_t *)NULL) {
1055 /* Remove from list. */
1056 pred_errp->er_next = errp->er_next;
1057 if ((error_list_head.er_next == (mt_error_t *)NULL) &&
1058 (mdv_strategy_tstpnt == trans_error_injector)) {
1059 mdv_strategy_tstpnt = tstpnt_save;
1060 }
1061 } else {
1062 /* unit not set up for errors. */
1063 rv = ENXIO;
1064 }
1065 mutex_exit(&error_mutex);
1066
1067 /* Free memory. */
1068
1069 if (errp != (mt_error_t *)NULL) {
1070 md_trans_free((void *)errp, sizeof (*errp));
1071 }
1072 return (rv);
1073 }
1074
1075 int
_init_ioctl()1076 _init_ioctl()
1077 {
1078 mutex_init(&error_mutex, NULL, MUTEX_DRIVER, (void *)NULL);
1079 return (1);
1080 }
1081
1082 int
_fini_ioctl()1083 _fini_ioctl()
1084 {
1085 mutex_destroy(&error_mutex);
1086 return (1);
1087 }
1088
1089 /*
1090 * END OF DEBUG ROUTINES
1091 */
1092 #endif /* DEBUG */
1093 /*
1094 * BEGIN RELEASE DEBUG
1095 * The following routines remain in the released product for testability
1096 */
1097
1098 /*
1099 * ufs error injection remains in the released product
1100 */
1101 /*ARGSUSED1*/
1102 static int
trans_ufserror(void * d,int mode,IOLOCK * lock)1103 trans_ufserror(void *d, int mode, IOLOCK *lock)
1104 {
1105 mt_unit_t *un;
1106
1107 md_i_get_t *migp = d;
1108
1109 mdclrerror(&migp->mde);
1110
1111 un = trans_getun(migp->id, &migp->mde,
1112 RD_LOCK, lock);
1113 if (un == NULL || un->un_ut == NULL)
1114 return (EINVAL);
1115
1116 return (0);
1117 }
1118 /*
1119 * shadow test remains in the released product
1120 */
1121 static int
trans_set_shadow(void * d,int mode,IOLOCK * lock)1122 trans_set_shadow(void *d, int mode, IOLOCK *lock)
1123 {
1124 dev32_t device; /* shadow device */
1125 mt_unit_t *un;
1126
1127 md_i_get_t *migp = d;
1128
1129 mdclrerror(&migp->mde);
1130
1131 un = trans_getun(migp->id, &migp->mde,
1132 WR_LOCK, lock);
1133 if (un == NULL)
1134 return (EINVAL);
1135
1136 if ((un->un_debug & MT_SHADOW) == 0)
1137 return (EINVAL);
1138
1139 /* Get shadow device. User always passes down 32 bit devt */
1140
1141 if (ddi_copyin((caddr_t)(uintptr_t)migp->mdp,
1142 &device, sizeof (device), mode)) {
1143 return (EFAULT);
1144 }
1145
1146 /* Save shadow device designator. */
1147 un->un_s_dev = md_expldev((md_dev64_t)device);
1148 return (0);
1149 }
1150
1151 /*
1152 * END RELEASE DEBUG
1153 */
1154
1155 static int
trans_get(void * d,int mode,IOLOCK * lock)1156 trans_get(void *d, int mode, IOLOCK *lock)
1157 {
1158 mt_unit_t *un;
1159 ml_unit_t *ul;
1160
1161 md_i_get_t *migp = d;
1162
1163 mdclrerror(&migp->mde);
1164
1165 un = trans_getun(migp->id, &migp->mde,
1166 RD_LOCK, lock);
1167 if (un == NULL)
1168 return (0);
1169
1170 if (migp->size == 0) {
1171 migp->size = un->c.un_size;
1172 return (0);
1173 }
1174
1175 if (migp->size < un->c.un_size)
1176 return (EFAULT);
1177
1178 log:
1179 ul = un->un_l_unit;
1180 if (ul == NULL)
1181 goto master;
1182
1183 /*
1184 * refresh log fields in case log was metattach'ed
1185 */
1186 un->un_l_head = (daddr32_t)btodb(ul->un_head_lof);
1187 un->un_l_sblk = un->un_l_head;
1188 un->un_l_pwsblk = ul->un_pwsblk;
1189 un->un_l_maxtransfer = (uint_t)btodb(ul->un_maxtransfer);
1190 un->un_l_nblks = ul->un_nblks;
1191 un->un_l_tblks = ul->un_tblks;
1192 un->un_l_tail = (daddr32_t)btodb(ul->un_tail_lof);
1193 un->un_l_resv = ul->un_resv;
1194 un->un_l_maxresv = ul->un_maxresv;
1195 un->un_l_error = ul->un_error;
1196 un->un_l_timestamp = ul->un_timestamp;
1197
1198 /*
1199 * check for log dev dynconcat; can only pick up extra space when the
1200 * tail physically follows the head in the circular log
1201 */
1202 if (un->un_l_head <= un->un_l_tail)
1203 if (ul->un_status & LDL_METADEVICE) {
1204 struct mdc_unit *c = MD_UNIT(md_getminor(ul->un_dev));
1205
1206 if (c->un_total_blocks > un->un_l_tblks) {
1207 un->un_l_tblks = c->un_total_blocks;
1208 un->un_l_nblks = un->un_l_tblks - un->un_l_sblk;
1209 if (un->un_l_nblks > btodb(LDL_MAXLOGSIZE))
1210 un->un_l_nblks = btodb(LDL_MAXLOGSIZE);
1211 un->un_l_maxresv = (uint_t)(un->un_l_nblks *
1212 LDL_USABLE_BSIZE);
1213 }
1214 }
1215
1216 master:
1217
1218 if (ddi_copyout(un, (void *)(uintptr_t)migp->mdp, un->c.un_size, mode))
1219 return (EFAULT);
1220 return (0);
1221 }
1222
1223 static int
trans_replace(replace_params_t * params)1224 trans_replace(replace_params_t *params)
1225 {
1226 minor_t mnum = params->mnum;
1227 mt_unit_t *un;
1228 mdi_unit_t *ui;
1229 md_dev64_t cmp_dev;
1230 md_dev64_t ldev;
1231 md_dev64_t mdev;
1232
1233 mdclrerror(¶ms->mde);
1234
1235 ui = MDI_UNIT(mnum);
1236 un = md_unit_writerlock(ui);
1237
1238 if (MD_STATUS(un) & MD_UN_RESYNC_ACTIVE) {
1239 return (mdmderror(¶ms->mde, MDE_RESYNC_ACTIVE, mnum));
1240 }
1241
1242 cmp_dev = params->old_dev;
1243 mdev = un->un_m_dev;
1244 ldev = un->un_l_dev;
1245 if (cmp_dev == mdev) {
1246 un->un_m_key = params->new_key;
1247 un->un_m_dev = params->new_dev;
1248 } else if (cmp_dev == ldev) {
1249 un->un_l_key = params->new_key;
1250 un->un_l_dev = params->new_dev;
1251 }
1252
1253 trans_commit(un, 1);
1254 md_unit_writerexit(ui);
1255 return (0);
1256 }
1257
1258 /*ARGSUSED1*/
1259 static int
trans_grow(void * d,int mode,IOLOCK * lock)1260 trans_grow(void *d, int mode, IOLOCK *lock)
1261 {
1262 mt_unit_t *un;
1263
1264 md_grow_params_t *mgp = d;
1265
1266 mdclrerror(&mgp->mde);
1267
1268 un = trans_getun(mgp->mnum, &mgp->mde,
1269 RD_LOCK, lock);
1270 if (un == NULL)
1271 return (0);
1272
1273 /*
1274 * check for master dev dynconcat
1275 */
1276 if (md_getmajor(un->un_m_dev) == md_major) {
1277 struct mdc_unit *c;
1278
1279 c = MD_UNIT(md_getminor(un->un_m_dev));
1280 if (c->un_total_blocks > MD_MAX_BLKS_FOR_SMALL_DEVS) {
1281 un->c.un_total_blocks = MD_MAX_BLKS_FOR_SMALL_DEVS;
1282 } else {
1283 un->c.un_total_blocks = c->un_total_blocks;
1284 }
1285 md_nblocks_set(MD_SID(un), un->c.un_total_blocks);
1286 }
1287
1288 return (0);
1289 }
1290
1291 /*ARGSUSED1*/
1292 static int
trans_detach_ioctl(void * d,int mode,IOLOCK * lock)1293 trans_detach_ioctl(void *d, int mode, IOLOCK *lock)
1294 {
1295 mt_unit_t *un;
1296 int error;
1297
1298 md_i_get_t *migp = d;
1299
1300 mdclrerror(&migp->mde);
1301
1302 /* acquire both md_unit_array_rw, and unit_reader lock */
1303 un = trans_getun(migp->id, &migp->mde,
1304 READERS, lock);
1305 if (un == NULL)
1306 return (0);
1307
1308 /*
1309 * simply too much work to make debug modes w/out a log
1310 */
1311 if (un->un_debug)
1312 return (EACCES);
1313
1314 /*
1315 * detach the log
1316 */
1317 error = trans_detach(un, migp->size);
1318
1319 return (error);
1320 }
1321
1322 static int
trans_get_log(void * d,int mode,IOLOCK * lock)1323 trans_get_log(void *d, int mode, IOLOCK *lock)
1324 {
1325 mt_unit_t *un;
1326 ml_unit_t *ul;
1327
1328 md_i_get_t *migp = d;
1329
1330 mdclrerror(&migp->mde);
1331
1332 un = trans_getun(migp->id, &migp->mde, RD_LOCK, lock);
1333
1334 if (un == NULL)
1335 return (0);
1336
1337 ul = un->un_l_unit;
1338
1339 if (migp->size == 0) {
1340 migp->size = ML_UNIT_ONDSZ;
1341 return (0);
1342 }
1343
1344 if (migp->size < ML_UNIT_ONDSZ)
1345 return (EFAULT);
1346
1347 if (ddi_copyout(ul, (void *)(uintptr_t)migp->mdp, ML_UNIT_ONDSZ,
1348 mode))
1349 return (EFAULT);
1350 return (0);
1351 }
1352
1353 static int
trans_getdevs(void * d,int mode,IOLOCK * lock)1354 trans_getdevs(void *d, int mode, IOLOCK *lock)
1355 {
1356 int ndev;
1357 mt_unit_t *un;
1358 md_dev64_t *udevs;
1359 md_dev64_t unit_dev;
1360
1361 md_getdevs_params_t *mgdp = d;
1362
1363 mdclrerror(&mgdp->mde);
1364
1365 un = trans_getun(mgdp->mnum, &mgdp->mde, RD_LOCK, lock);
1366 if (un == NULL)
1367 return (0);
1368
1369 ndev = (un->un_flags & (TRANS_DETACHED | TRANS_ATTACHING)) ? 1 : 2;
1370
1371 if (mgdp->cnt == 0) {
1372 mgdp->cnt = ndev;
1373 return (0);
1374 }
1375
1376 if (mgdp->cnt > 2)
1377 mgdp->cnt = ndev;
1378
1379 udevs = (md_dev64_t *)(uintptr_t)mgdp->devs;
1380 unit_dev = un->un_m_dev;
1381
1382 if (md_getmajor(unit_dev) != md_major) {
1383 if ((unit_dev = md_xlate_mini_2_targ(unit_dev)) == NODEV64)
1384 return (ENODEV);
1385 }
1386
1387 if (mgdp->cnt >= 1)
1388 if (ddi_copyout(&unit_dev, (caddr_t)&udevs[0],
1389 sizeof (*udevs), mode) != 0)
1390 return (EFAULT);
1391
1392 unit_dev = un->un_l_dev;
1393 if (md_getmajor(unit_dev) != md_major) {
1394 if ((unit_dev = md_xlate_mini_2_targ(unit_dev)) == NODEV64)
1395 return (ENODEV);
1396 }
1397
1398 if (mgdp->cnt >= 2)
1399 if (ddi_copyout(&unit_dev, (caddr_t)&udevs[1],
1400 sizeof (*udevs), mode) != 0)
1401 return (EFAULT);
1402
1403 return (0);
1404 }
1405
1406 static int
trans_reset_ioctl(md_i_reset_t * mirp,IOLOCK * lock)1407 trans_reset_ioctl(md_i_reset_t *mirp, IOLOCK *lock)
1408 {
1409 minor_t mnum = mirp->mnum;
1410 mt_unit_t *un;
1411 int error;
1412
1413 mdclrerror(&mirp->mde);
1414
1415 un = trans_getun(mnum, &mirp->mde, NO_LOCK, lock);
1416 if (un == NULL)
1417 return (0);
1418
1419
1420 /* This prevents new opens */
1421 rw_enter(&md_unit_array_rw.lock, RW_WRITER);
1422
1423 if (MD_HAS_PARENT(MD_PARENT(un))) {
1424 rw_exit(&md_unit_array_rw.lock);
1425 return (mdmderror(&mirp->mde, MDE_IN_USE, mnum));
1426 }
1427
1428 if (md_unit_isopen(MDI_UNIT(mnum))) {
1429 rw_exit(&md_unit_array_rw.lock);
1430 return (mdmderror(&mirp->mde, MDE_IS_OPEN, mnum));
1431 }
1432 /*
1433 * detach the log
1434 */
1435 error = trans_detach(un, mirp->force);
1436
1437 /*
1438 * reset (aka remove; aka delete) the trans device
1439 */
1440 if (error == 0)
1441 error = trans_reset(un, mnum, 1, mirp->force);
1442
1443 rw_exit(&md_unit_array_rw.lock);
1444 return (error);
1445 }
1446
1447 static int
trans_get_geom(mt_unit_t * un,struct dk_geom * geomp)1448 trans_get_geom(mt_unit_t *un, struct dk_geom *geomp)
1449 {
1450 md_get_geom((md_unit_t *)un, geomp);
1451
1452 return (0);
1453 }
1454
1455 static int
trans_get_vtoc(mt_unit_t * un,struct vtoc * vtocp)1456 trans_get_vtoc(mt_unit_t *un, struct vtoc *vtocp)
1457 {
1458 md_get_vtoc((md_unit_t *)un, vtocp);
1459
1460 return (0);
1461 }
1462
1463 static int
trans_get_extvtoc(mt_unit_t * un,struct extvtoc * vtocp)1464 trans_get_extvtoc(mt_unit_t *un, struct extvtoc *vtocp)
1465 {
1466 md_get_extvtoc((md_unit_t *)un, vtocp);
1467
1468 return (0);
1469 }
1470
1471 static int
trans_islog(mt_unit_t * un)1472 trans_islog(mt_unit_t *un)
1473 {
1474 if (un->un_l_unit == NULL)
1475 return (ENXIO);
1476 return (0);
1477 }
1478
1479 static int
trans_set_vtoc(mt_unit_t * un,struct vtoc * vtocp)1480 trans_set_vtoc(
1481 mt_unit_t *un,
1482 struct vtoc *vtocp
1483 )
1484 {
1485 return (md_set_vtoc((md_unit_t *)un, vtocp));
1486 }
1487
1488 static int
trans_set_extvtoc(mt_unit_t * un,struct extvtoc * vtocp)1489 trans_set_extvtoc(mt_unit_t *un, struct extvtoc *vtocp)
1490 {
1491 return (md_set_extvtoc((md_unit_t *)un, vtocp));
1492 }
1493
1494 static int
trans_get_cgapart(mt_unit_t * un,struct dk_map * dkmapp)1495 trans_get_cgapart(
1496 mt_unit_t *un,
1497 struct dk_map *dkmapp
1498 )
1499 {
1500 md_get_cgapart((md_unit_t *)un, dkmapp);
1501 return (0);
1502 }
1503
1504 static int
trans_admin_ioctl(int cmd,caddr_t data,int mode,IOLOCK * lockp)1505 trans_admin_ioctl(
1506 int cmd,
1507 caddr_t data,
1508 int mode,
1509 IOLOCK *lockp
1510 )
1511 {
1512 size_t sz = 0;
1513 void *d = NULL;
1514 int err = 0;
1515
1516 /* We can only handle 32-bit clients for internal commands */
1517 if ((mode & DATAMODEL_MASK) != DATAMODEL_ILP32) {
1518 return (EINVAL);
1519 }
1520
1521 switch (cmd) {
1522
1523 case MD_IOCGET:
1524 {
1525 if (! (mode & FREAD))
1526 return (EACCES);
1527
1528 sz = sizeof (md_i_get_t);
1529
1530 if ((d = md_trans_zalloc(sz)) == NULL)
1531 return (ENOMEM);
1532
1533 if (ddi_copyin(data, d, sz, mode)) {
1534 err = EFAULT;
1535 break;
1536 }
1537
1538 err = trans_get(d, mode, lockp);
1539 break;
1540 }
1541
1542 case MD_IOCGET_LOG:
1543 {
1544 if (! (mode & FREAD))
1545 return (EACCES);
1546
1547 sz = sizeof (md_i_get_t);
1548
1549 if ((d = md_trans_zalloc(sz)) == NULL)
1550 return (ENOMEM);
1551
1552 if (ddi_copyin(data, d, sz, mode)) {
1553 err = EFAULT;
1554 break;
1555 }
1556
1557 err = trans_get_log(d, mode, lockp);
1558 break;
1559 }
1560
1561 case MD_IOCRESET:
1562 {
1563 md_i_reset_t *p;
1564
1565 if (! (mode & FWRITE))
1566 return (EACCES);
1567
1568 if ((d = p = md_trans_zalloc((sz = sizeof (*p)))) == NULL)
1569 return (ENOMEM);
1570
1571 if (ddi_copyin(data, d, sz, mode)) {
1572 err = EFAULT;
1573 break;
1574 }
1575
1576 err = trans_reset_ioctl(p, lockp);
1577 break;
1578 }
1579
1580 case MD_IOCGROW:
1581 {
1582 if (! (mode & FWRITE))
1583 return (EACCES);
1584
1585 sz = sizeof (md_grow_params_t);
1586
1587 if ((d = md_trans_zalloc(sz)) == NULL)
1588 return (ENOMEM);
1589
1590 if (ddi_copyin(data, d, sz, mode)) {
1591 err = EFAULT;
1592 break;
1593 }
1594
1595 err = trans_grow(d, mode, lockp);
1596 break;
1597 }
1598
1599 case MD_IOC_TRANS_DETACH:
1600 {
1601 if (! (mode & FWRITE))
1602 return (EACCES);
1603
1604 sz = sizeof (md_i_get_t);
1605
1606 if ((d = md_trans_zalloc(sz)) == NULL)
1607 return (ENOMEM);
1608
1609 if (ddi_copyin(data, d, sz, mode)) {
1610 err = EFAULT;
1611 break;
1612 }
1613
1614 err = trans_detach_ioctl(d, mode, lockp);
1615 break;
1616 }
1617
1618 case MD_IOCREPLACE:
1619 {
1620 replace_params_t *p;
1621
1622 if (! (mode & FWRITE))
1623 return (EACCES);
1624
1625 if ((d = p = kmem_alloc((sz = sizeof (*p)), KM_SLEEP)) == NULL)
1626 return (ENOMEM);
1627
1628 if (ddi_copyin(data, d, sz, mode)) {
1629 err = EFAULT;
1630 break;
1631 }
1632
1633 err = trans_replace(p);
1634 break;
1635 }
1636
1637
1638 case MD_IOCGET_DEVS:
1639 {
1640 if (! (mode & FREAD))
1641 return (EACCES);
1642
1643 sz = sizeof (md_getdevs_params_t);
1644
1645 if ((d = md_trans_zalloc(sz)) == NULL)
1646 return (ENOMEM);
1647
1648 if (ddi_copyin(data, d, sz, mode)) {
1649 err = EFAULT;
1650 break;
1651 }
1652
1653 err = trans_getdevs(d, mode, lockp);
1654 break;
1655 }
1656
1657 /*
1658 * debug ioctls
1659 */
1660 #ifdef DEBUG
1661
1662
1663 case MD_IOCGET_TRANSSTATS:
1664 {
1665 if (! (mode & FREAD))
1666 return (EACCES);
1667
1668 sz = sizeof (md_i_get_t);
1669
1670 if ((d = md_trans_zalloc(sz)) == NULL)
1671 return (ENOMEM);
1672
1673 if (ddi_copyin(data, d, sz, mode)) {
1674 err = EFAULT;
1675 break;
1676 }
1677
1678 err = trans_get_transstats(d, mode);
1679 break;
1680 }
1681
1682 case MD_IOC_DEBUG:
1683 {
1684 md_i_get_t *mdigp;
1685
1686 if (! (mode & FWRITE))
1687 return (EACCES);
1688
1689 sz = sizeof (md_i_get_t);
1690
1691 if ((d = md_trans_zalloc(sz)) == NULL)
1692 return (ENOMEM);
1693
1694 if (ddi_copyin(data, d, sz, mode)) {
1695 err = EFAULT;
1696 break;
1697 }
1698
1699 mdigp = d;
1700
1701 mdclrerror(&mdigp->mde);
1702 mt_debug = mdigp->size;
1703 break;
1704 }
1705
1706 case MD_IOC_TSD:
1707 {
1708 if (! (mode & FWRITE))
1709 return (EACCES);
1710
1711
1712 sz = sizeof (md_i_get_t);
1713
1714 if ((d = md_trans_zalloc(sz)) == NULL)
1715 return (ENOMEM);
1716
1717 if (ddi_copyin(data, d, sz, mode)) {
1718 err = EFAULT;
1719 break;
1720 }
1721
1722 err = trans_test_tsd(d, mode);
1723 break;
1724 }
1725
1726 case MD_IOC_TRYGETBLK:
1727 {
1728 if (! (mode & FWRITE))
1729 return (EACCES);
1730
1731
1732 sz = sizeof (md_i_get_t);
1733
1734 if ((d = md_trans_zalloc(sz)) == NULL)
1735 return (ENOMEM);
1736
1737 if (ddi_copyin(data, d, sz, mode)) {
1738 err = EFAULT;
1739 break;
1740 }
1741
1742 err = trans_test_trygetblk(d, mode, lockp);
1743 break;
1744 }
1745
1746 case MD_IOC_TRYPAGE:
1747 {
1748 if (! (mode & FWRITE))
1749 return (EACCES);
1750
1751
1752 sz = sizeof (md_i_get_t);
1753
1754 if ((d = md_trans_zalloc(sz)) == NULL)
1755 return (ENOMEM);
1756
1757 if (ddi_copyin(data, d, sz, mode)) {
1758 err = EFAULT;
1759 break;
1760 }
1761
1762 err = trans_test_trypage(d, mode, lockp);
1763 break;
1764 }
1765
1766
1767 case MD_IOC_INJECTERRORS:
1768 {
1769 if (! (mode & FWRITE))
1770 return (EACCES);
1771
1772
1773 sz = sizeof (md_i_get_t);
1774
1775 if ((d = md_trans_zalloc(sz)) == NULL)
1776 return (ENOMEM);
1777
1778 if (ddi_copyin(data, d, sz, mode)) {
1779 err = EFAULT;
1780 break;
1781 }
1782
1783 err = trans_inject_errors(d, mode, lockp);
1784 break;
1785 }
1786
1787 case MD_IOC_STOPERRORS:
1788 {
1789 if (! (mode & FWRITE))
1790 return (EACCES);
1791
1792
1793 sz = sizeof (md_i_get_t);
1794
1795 if ((d = md_trans_zalloc(sz)) == NULL)
1796 return (ENOMEM);
1797
1798 if (ddi_copyin(data, d, sz, mode)) {
1799 err = EFAULT;
1800 break;
1801 }
1802
1803 err = trans_stop_errors(d, mode, lockp);
1804 break;
1805 }
1806
1807 case MD_IOC_ISDEBUG:
1808 break;
1809
1810 #else /* ! DEBUG */
1811
1812 case MD_IOC_ISDEBUG:
1813 case MD_IOCGET_TRANSSTATS:
1814 case MD_IOC_STOPERRORS:
1815 case MD_IOC_TSD:
1816 case MD_IOC_TRYGETBLK:
1817 case MD_IOC_TRYPAGE:
1818 break;
1819
1820 /*
1821 * error injection behaves like MD_IOC_UFSERROR in released product
1822 */
1823 case MD_IOC_INJECTERRORS:
1824 {
1825 if (! (mode & FWRITE))
1826 return (EACCES);
1827
1828
1829 sz = sizeof (md_i_get_t);
1830
1831 if ((d = md_trans_zalloc(sz)) == NULL)
1832 return (ENOMEM);
1833
1834 if (ddi_copyin(data, d, sz, mode)) {
1835 err = EFAULT;
1836 break;
1837 }
1838
1839 err = trans_ufserror(d, mode, lockp);
1840 break;
1841 }
1842
1843 /*
1844 * only the shadow test is allowed in the released product
1845 */
1846 case MD_IOC_DEBUG:
1847 {
1848 md_i_get_t *mdigp;
1849
1850 if (! (mode & FWRITE))
1851 return (EACCES);
1852
1853 sz = sizeof (md_i_get_t);
1854
1855 if ((d = md_trans_zalloc(sz)) == NULL)
1856 return (ENOMEM);
1857
1858 if (ddi_copyin(data, d, sz, mode)) {
1859 err = EFAULT;
1860 break;
1861 }
1862
1863 mdigp = d;
1864
1865 mdclrerror(&mdigp->mde);
1866 mt_debug = mdigp->size & MT_SHADOW;
1867 break;
1868 }
1869
1870 #endif /* ! DEBUG */
1871
1872 /*
1873 * BEGIN RELEASE DEBUG
1874 * The following routines remain in the released product for testability
1875 */
1876
1877 case MD_IOC_UFSERROR:
1878 {
1879 if (! (mode & FWRITE))
1880 return (EACCES);
1881
1882 sz = sizeof (md_i_get_t);
1883
1884 if ((d = md_trans_zalloc(sz)) == NULL)
1885 return (ENOMEM);
1886
1887 if (ddi_copyin(data, d, sz, mode)) {
1888 err = EFAULT;
1889 break;
1890 }
1891
1892 err = trans_ufserror(d, mode, lockp);
1893 break;
1894 }
1895
1896 case MD_IOC_SETSHADOW:
1897 {
1898 if (! (mode & FWRITE))
1899 return (EACCES);
1900
1901 sz = sizeof (md_i_get_t);
1902
1903 if ((d = md_trans_zalloc(sz)) == NULL)
1904 return (ENOMEM);
1905
1906 if (ddi_copyin(data, d, sz, mode)) {
1907 err = EFAULT;
1908 break;
1909 }
1910
1911 err = trans_set_shadow(d, mode, lockp);
1912 break;
1913 }
1914
1915 /*
1916 * END RELEASE DEBUG
1917 */
1918
1919
1920 default:
1921 return (ENOTTY);
1922 }
1923
1924 /*
1925 * copyout and free any args
1926 */
1927 if (sz != 0) {
1928 if (err == 0) {
1929 if (ddi_copyout(d, data, sz, mode) != 0) {
1930 err = EFAULT;
1931 }
1932 }
1933 md_trans_free(d, sz);
1934 }
1935 return (err);
1936 }
1937
1938 int
md_trans_ioctl(dev_t dev,int cmd,caddr_t data,int mode,IOLOCK * lockp)1939 md_trans_ioctl(
1940 dev_t dev,
1941 int cmd,
1942 caddr_t data,
1943 int mode,
1944 IOLOCK *lockp
1945 )
1946 {
1947 minor_t mnum = getminor(dev);
1948 mt_unit_t *un;
1949 md_error_t mde = mdnullerror;
1950 int err = 0;
1951
1952 /* handle admin ioctls */
1953 if (mnum == MD_ADM_MINOR)
1954 return (trans_admin_ioctl(cmd, data, mode, lockp));
1955
1956 /* check unit */
1957 if ((MD_MIN2SET(mnum) >= md_nsets) ||
1958 (MD_MIN2UNIT(mnum) >= md_nunits) ||
1959 ((un = trans_getun(mnum, &mde, RD_LOCK, lockp)) == NULL))
1960 return (ENXIO);
1961
1962 /* dispatch ioctl */
1963 switch (cmd) {
1964
1965 case DKIOCINFO:
1966 {
1967 struct dk_cinfo *p;
1968
1969 if (! (mode & FREAD))
1970 return (EACCES);
1971
1972 if ((p = md_trans_zalloc(sizeof (*p))) == NULL)
1973 return (ENOMEM);
1974
1975 get_info(p, mnum);
1976 if (ddi_copyout((caddr_t)p, data, sizeof (*p), mode) != 0)
1977 err = EFAULT;
1978
1979 md_trans_free(p, sizeof (*p));
1980 return (err);
1981 }
1982
1983 case DKIOCGGEOM:
1984 {
1985 struct dk_geom *p;
1986
1987 if (! (mode & FREAD))
1988 return (EACCES);
1989
1990 if ((p = md_trans_zalloc(sizeof (*p))) == NULL)
1991 return (ENOMEM);
1992
1993 if ((err = trans_get_geom(un, p)) == 0) {
1994 if (ddi_copyout((caddr_t)p, data, sizeof (*p),
1995 mode) != 0)
1996 err = EFAULT;
1997 }
1998
1999 md_trans_free(p, sizeof (*p));
2000 return (err);
2001 }
2002
2003 case DKIOCGVTOC:
2004 {
2005 struct vtoc *vtoc;
2006
2007 if (! (mode & FREAD))
2008 return (EACCES);
2009
2010 vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP);
2011 if ((err = trans_get_vtoc(un, vtoc)) != 0) {
2012 kmem_free(vtoc, sizeof (*vtoc));
2013 return (err);
2014 }
2015
2016 if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
2017 if (ddi_copyout(vtoc, data, sizeof (*vtoc), mode))
2018 err = EFAULT;
2019 }
2020 #ifdef _SYSCALL32
2021 else {
2022 struct vtoc32 *vtoc32;
2023
2024 vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP);
2025
2026 vtoctovtoc32((*vtoc), (*vtoc32));
2027 if (ddi_copyout(vtoc32, data, sizeof (*vtoc32), mode))
2028 err = EFAULT;
2029 kmem_free(vtoc32, sizeof (*vtoc32));
2030 }
2031 #endif /* _SYSCALL32 */
2032
2033 kmem_free(vtoc, sizeof (*vtoc));
2034 return (err);
2035 }
2036
2037 case DKIOCSVTOC:
2038 {
2039 struct vtoc *vtoc;
2040
2041 if (! (mode & FWRITE))
2042 return (EACCES);
2043
2044 vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP);
2045 if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
2046 if (ddi_copyin(data, vtoc, sizeof (*vtoc), mode)) {
2047 err = EFAULT;
2048 }
2049 }
2050 #ifdef _SYSCALL32
2051 else {
2052 struct vtoc32 *vtoc32;
2053
2054 vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP);
2055
2056 if (ddi_copyin(data, vtoc32, sizeof (*vtoc32), mode)) {
2057 err = EFAULT;
2058 } else {
2059 vtoc32tovtoc((*vtoc32), (*vtoc));
2060 }
2061 kmem_free(vtoc32, sizeof (*vtoc32));
2062 }
2063 #endif /* _SYSCALL32 */
2064
2065 if (err == 0)
2066 err = trans_set_vtoc(un, vtoc);
2067
2068 kmem_free(vtoc, sizeof (*vtoc));
2069 return (err);
2070 }
2071
2072
2073 case DKIOCGEXTVTOC:
2074 {
2075 struct extvtoc *extvtoc;
2076
2077 if (! (mode & FREAD))
2078 return (EACCES);
2079
2080 extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP);
2081 if ((err = trans_get_extvtoc(un, extvtoc)) != 0) {
2082 return (err);
2083 }
2084
2085 if (ddi_copyout(extvtoc, data, sizeof (*extvtoc), mode))
2086 err = EFAULT;
2087
2088 kmem_free(extvtoc, sizeof (*extvtoc));
2089 return (err);
2090 }
2091
2092 case DKIOCSEXTVTOC:
2093 {
2094 struct extvtoc *extvtoc;
2095
2096 if (! (mode & FWRITE))
2097 return (EACCES);
2098
2099 extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP);
2100 if (ddi_copyin(data, extvtoc, sizeof (*extvtoc), mode)) {
2101 err = EFAULT;
2102 }
2103
2104 if (err == 0)
2105 err = trans_set_extvtoc(un, extvtoc);
2106
2107 kmem_free(extvtoc, sizeof (*extvtoc));
2108 return (err);
2109 }
2110
2111 case DKIOCGAPART:
2112 {
2113 struct dk_map dmp;
2114
2115 if ((err = trans_get_cgapart(un, &dmp)) != 0) {
2116 return (err);
2117 }
2118
2119 if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
2120 if (ddi_copyout((caddr_t)&dmp, data, sizeof (dmp),
2121 mode) != 0)
2122 err = EFAULT;
2123 }
2124 #ifdef _SYSCALL32
2125 else {
2126 struct dk_map32 dmp32;
2127
2128 dmp32.dkl_cylno = dmp.dkl_cylno;
2129 dmp32.dkl_nblk = dmp.dkl_nblk;
2130
2131 if (ddi_copyout((caddr_t)&dmp32, data, sizeof (dmp32),
2132 mode) != 0)
2133 err = EFAULT;
2134 }
2135 #endif /* _SYSCALL32 */
2136
2137 return (err);
2138 }
2139
2140 /*
2141 * _FIOISLOG, _FIOISLOGOK, _FIOLOGRESET are used by fsck/mkfs
2142 * after opening the device. fsck/mkfs use these ioctls for
2143 * error recovery.
2144 */
2145 case _FIOISLOG:
2146 return (trans_islog(un));
2147
2148 default:
2149 return (ENOTTY);
2150 }
2151 }
2152
2153 /*
2154 * rename named service entry points and support functions
2155 */
2156
2157 /* rename/exchange role swap functions */
2158
2159 /*
2160 * MDRNM_UPDATE_SELF
2161 * This role swap function is identical for all unit types,
2162 * so keep it here. It's also the best example because it
2163 * touches all the modified portions of the relevant
2164 * in-common structures.
2165 */
2166 void
trans_rename_update_self(md_rendelta_t * delta,md_rentxn_t * rtxnp)2167 trans_rename_update_self(
2168 md_rendelta_t *delta,
2169 md_rentxn_t *rtxnp)
2170 {
2171 minor_t from_min, to_min;
2172 sv_dev_t sv;
2173 mt_unit_t *un;
2174
2175 ASSERT(rtxnp);
2176 ASSERT(rtxnp->op == MDRNOP_RENAME);
2177 ASSERT(delta);
2178 ASSERT(delta->unp);
2179 ASSERT(delta->uip);
2180 ASSERT(rtxnp->rec_idx >= 0);
2181 ASSERT(rtxnp->recids);
2182 ASSERT(delta->old_role == MDRR_SELF);
2183 ASSERT(delta->new_role == MDRR_SELF);
2184
2185 from_min = rtxnp->from.mnum;
2186 to_min = rtxnp->to.mnum;
2187 un = (mt_unit_t *)delta->unp;
2188
2189 /*
2190 * self id changes in our own unit struct
2191 * both mechanisms for identifying the trans must be reset.
2192 */
2193
2194 MD_SID(delta->unp) = to_min;
2195 un->un_dev = makedevice(md_major, to_min);
2196
2197 /*
2198 * clear old array pointers to unit in-core and unit
2199 */
2200
2201 MDI_VOIDUNIT(from_min) = NULL;
2202 MD_VOIDUNIT(from_min) = NULL;
2203
2204 /*
2205 * and point the new slots at the unit in-core and unit structs
2206 */
2207
2208 MDI_VOIDUNIT(to_min) = delta->uip;
2209 MD_VOIDUNIT(to_min) = delta->unp;
2210
2211 /*
2212 * recreate kstats
2213 */
2214 md_kstat_destroy_ui(delta->uip);
2215 md_kstat_init_ui(to_min, delta->uip);
2216
2217 /*
2218 * the unit in-core reference to the get next link's id changes
2219 */
2220
2221 delta->uip->ui_link.ln_id = to_min;
2222
2223 /*
2224 * name space addition of new key was done from user-level
2225 * remove the old name's key here
2226 */
2227
2228 sv.setno = MD_MIN2SET(from_min);
2229 sv.key = rtxnp->from.key;
2230
2231 md_rem_names(&sv, 1);
2232
2233
2234 /*
2235 * and store the record id (from the unit struct) into recids
2236 * for later commitment by md_rename()
2237 */
2238
2239 md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp);
2240 }
2241
2242 /*
2243 * MDRNM_UPDATE_KIDS
2244 * rename/exchange of our child or grandchild
2245 */
2246 void
trans_renexch_update_kids(md_rendelta_t * delta,md_rentxn_t * rtxnp)2247 trans_renexch_update_kids(
2248 md_rendelta_t *delta,
2249 md_rentxn_t *rtxnp)
2250 {
2251 mt_unit_t *un;
2252 minor_t from_min, to_min, log_min, master_min;
2253
2254 ASSERT(delta);
2255 ASSERT(rtxnp);
2256 ASSERT((rtxnp->op == MDRNOP_RENAME) || (rtxnp->op == MDRNOP_EXCHANGE));
2257 ASSERT(delta->unp);
2258 ASSERT(rtxnp->recids);
2259 ASSERT(rtxnp->rec_idx >= 0);
2260 ASSERT(delta->old_role == MDRR_PARENT);
2261 ASSERT(delta->new_role == MDRR_PARENT);
2262
2263 un = (mt_unit_t *)delta->unp;
2264 from_min = rtxnp->from.mnum;
2265 to_min = rtxnp->to.mnum;
2266 log_min = md_getminor(un->un_l_dev);
2267 master_min = md_getminor(un->un_m_dev);
2268
2269 /*
2270 * since our role isn't changing (parent->parent)
2271 * one of our children must be changing; which one is it?
2272 * find the child being modified, and update
2273 * our notion of it
2274 */
2275
2276 /* both devices must be metadevices in order to be updated */
2277 ASSERT(md_getmajor(un->un_m_dev) == md_major);
2278 ASSERT(!(un->un_l_unit && (md_getmajor(un->un_l_dev) != md_major)));
2279
2280 if ((md_getmajor(un->un_m_dev) == md_major) &&
2281 (master_min == from_min)) {
2282
2283 ASSERT(!(un->un_l_unit && (log_min == from_min)));
2284
2285 un->un_m_dev = makedevice(md_major, to_min);
2286 un->un_m_key = rtxnp->to.key;
2287
2288 } else if ((md_getmajor(un->un_m_dev) == md_major) &&
2289 un->un_l_unit && (log_min == from_min)) {
2290
2291 ASSERT(master_min != from_min);
2292
2293 un->un_l_dev = makedevice(md_major, to_min);
2294 un->un_l_key = rtxnp->to.key;
2295
2296 } else {
2297 ASSERT(FALSE);
2298 panic("trans_renexch_update_kids: not a metadevice");
2299 /*NOTREACHED*/
2300 }
2301
2302 md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp);
2303 }
2304
2305 /*
2306 * MDRNM_SELF_UPDATE_FROM (exchange down) [self->child]
2307 */
2308 void
trans_exchange_self_update_from_down(md_rendelta_t * delta,md_rentxn_t * rtxnp)2309 trans_exchange_self_update_from_down(
2310 md_rendelta_t *delta,
2311 md_rentxn_t *rtxnp)
2312 {
2313 mt_unit_t *un;
2314 minor_t from_min, to_min, master_min, log_min;
2315 sv_dev_t sv;
2316
2317 ASSERT(delta);
2318 ASSERT(delta->unp);
2319 ASSERT(delta->uip);
2320 ASSERT(rtxnp);
2321 ASSERT(MDRNOP_EXCHANGE == rtxnp->op);
2322 ASSERT(rtxnp->from.uip);
2323 ASSERT(rtxnp->rec_idx >= 0);
2324 ASSERT(rtxnp->recids);
2325 ASSERT(delta->old_role == MDRR_SELF);
2326 ASSERT(delta->new_role == MDRR_CHILD);
2327 ASSERT(md_getminor(delta->dev) == rtxnp->from.mnum);
2328
2329 un = (mt_unit_t *)delta->unp;
2330
2331 /*
2332 * if we're exchanging a trans, it had better be a metadevice
2333 */
2334 ASSERT(md_getmajor(un->un_m_dev) == md_major);
2335
2336 to_min = rtxnp->to.mnum;
2337 from_min = rtxnp->from.mnum;
2338 master_min = md_getminor(un->un_m_dev);
2339 log_min = md_getminor(un->un_l_dev);
2340
2341 /*
2342 * both mechanisms for identifying a trans must be updated
2343 */
2344
2345 MD_SID(delta->unp) = to_min;
2346 un->un_dev = makedevice(md_major, to_min);
2347
2348 /*
2349 * parent identifier need not change
2350 */
2351
2352 /*
2353 * point the set array pointers at the "new" unit and unit in-cores
2354 * Note: the other half of this transfer is done in the "update to"
2355 * rename/exchange named service.
2356 */
2357
2358 MDI_VOIDUNIT(to_min) = delta->uip;
2359 MD_VOIDUNIT(to_min) = delta->unp;
2360
2361 /*
2362 * transfer kstats
2363 */
2364
2365 delta->uip->ui_kstat = rtxnp->to.kstatp;
2366
2367 /*
2368 * the unit in-core reference to the get next link's id changes
2369 */
2370
2371 delta->uip->ui_link.ln_id = to_min;
2372
2373 /*
2374 * which one of our children is changing?
2375 *
2376 * Note that the check routines forbid changing the log (for now)
2377 * because there's no lockfs-like trans-ufs "freeze and remount"
2378 * or "freeze and bobbit the log."
2379 */
2380
2381 /* both devices must be metadevices in order to be updated */
2382 ASSERT(md_getmajor(un->un_m_dev) == md_major);
2383 ASSERT(!(un->un_l_unit && (md_getmajor(un->un_l_dev) != md_major)));
2384
2385 if ((md_getmajor(un->un_m_dev) == md_major) &&
2386 (master_min == to_min)) {
2387
2388 /* master and log can't both be changed */
2389 ASSERT(!(un->un_l_unit && (log_min == to_min)));
2390
2391 un->un_m_dev = makedevice(md_major, from_min);
2392 sv.key = un->un_m_key;
2393 un->un_m_key = rtxnp->from.key;
2394
2395 } else if ((md_getmajor(un->un_m_dev) == md_major) &&
2396 un->un_l_unit && (log_min == to_min)) {
2397
2398 /* master and log can't both be changed */
2399 ASSERT(!(master_min == to_min));
2400
2401 un->un_l_dev = makedevice(md_major, from_min);
2402 sv.key = un->un_l_key;
2403 un->un_l_key = rtxnp->from.key;
2404
2405 } else {
2406 ASSERT(FALSE);
2407 panic("trans_exchange_self_update_from_down: not a metadevice");
2408 /*NOTREACHED*/
2409 }
2410
2411 /*
2412 * the new master must exist in the name space
2413 */
2414 ASSERT(rtxnp->from.key != MD_KEYWILD);
2415 ASSERT(rtxnp->from.key != MD_KEYBAD);
2416
2417 /*
2418 * delete the key for the changed child from the namespace
2419 */
2420
2421 sv.setno = MD_MIN2SET(from_min);
2422 md_rem_names(&sv, 1);
2423
2424 /*
2425 * and store the record id (from the unit struct) into recids
2426 */
2427
2428 md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp);
2429 }
2430
2431 /*
2432 * MDRNM_PARENT_UPDATE_TO (exchange down) [parent->self]
2433 */
2434 void
trans_exchange_parent_update_to(md_rendelta_t * delta,md_rentxn_t * rtxnp)2435 trans_exchange_parent_update_to(
2436 md_rendelta_t *delta,
2437 md_rentxn_t *rtxnp)
2438 {
2439 mt_unit_t *un;
2440 minor_t from_min, to_min, master_min, log_min;
2441 sv_dev_t sv;
2442
2443 ASSERT(delta);
2444 ASSERT(delta->unp);
2445 ASSERT(delta->uip);
2446 ASSERT(rtxnp);
2447 ASSERT(MDRNOP_EXCHANGE == rtxnp->op);
2448 ASSERT(rtxnp->from.uip);
2449 ASSERT(rtxnp->rec_idx >= 0);
2450 ASSERT(rtxnp->recids);
2451 ASSERT(delta->old_role == MDRR_PARENT);
2452 ASSERT(delta->new_role == MDRR_SELF);
2453 ASSERT(md_getminor(delta->dev) == rtxnp->to.mnum);
2454
2455 un = (mt_unit_t *)delta->unp;
2456
2457 ASSERT(md_getmajor(un->un_m_dev) == md_major);
2458
2459 to_min = rtxnp->to.mnum;
2460 from_min = rtxnp->from.mnum;
2461 master_min = md_getminor(un->un_m_dev);
2462 log_min = md_getminor(un->un_l_dev);
2463
2464 /*
2465 * both mechanisms for identifying a trans must be updated
2466 */
2467
2468 MD_SID(delta->unp) = from_min;
2469 un->un_dev = makedevice(md_major, from_min);
2470
2471 /*
2472 * parent identifier need not change
2473 */
2474
2475 /*
2476 * point the set array pointers at the "new" unit and unit in-cores
2477 * Note: the other half of this transfer is done in the "update to"
2478 * rename/exchange named service.
2479 */
2480
2481 MDI_VOIDUNIT(from_min) = delta->uip;
2482 MD_VOIDUNIT(from_min) = delta->unp;
2483
2484 /*
2485 * transfer kstats
2486 */
2487
2488 delta->uip->ui_kstat = rtxnp->from.kstatp;
2489
2490 /*
2491 * the unit in-core reference to the get next link's id changes
2492 */
2493
2494 delta->uip->ui_link.ln_id = from_min;
2495
2496 /*
2497 * which one of our children is changing?
2498 */
2499
2500 /* both devices must be metadevices in order to be updated */
2501 ASSERT(md_getmajor(un->un_m_dev) == md_major);
2502 ASSERT(!(un->un_l_unit && (md_getmajor(un->un_l_dev) != md_major)));
2503
2504 if ((md_getmajor(un->un_m_dev) == md_major) &&
2505 (master_min == from_min)) {
2506
2507 /* can't be changing log and master */
2508 ASSERT(!(un->un_l_unit && (log_min == to_min)));
2509
2510 un->un_m_dev = makedevice(md_major, to_min);
2511 sv.key = un->un_m_key;
2512 un->un_m_key = rtxnp->to.key;
2513
2514 } else if (un->un_l_unit &&
2515 ((md_getmajor(un->un_l_dev) == md_major) && log_min == to_min)) {
2516
2517 /* can't be changing log and master */
2518 ASSERT(master_min != from_min);
2519
2520 un->un_l_dev = makedevice(md_major, to_min);
2521 sv.key = un->un_l_key;
2522 un->un_l_key = rtxnp->to.key;
2523
2524 } else {
2525 ASSERT(FALSE);
2526 panic("trans_exchange_parent_update_to: not a metadevice");
2527 /*NOTREACHED*/
2528 }
2529
2530 /*
2531 * delete the key for the changed child from the namespace
2532 */
2533
2534 sv.setno = MD_MIN2SET(from_min);
2535 md_rem_names(&sv, 1);
2536
2537 /*
2538 * and store the record id (from the unit struct) into recids
2539 */
2540
2541 md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp);
2542 }
2543
2544 /*
2545 * MDRNM_LIST_URKIDS: named svc entry point
2546 * all all delta entries appropriate for our children onto the
2547 * deltalist pointd to by dlpp
2548 */
2549 int
trans_rename_listkids(md_rendelta_t ** dlpp,md_rentxn_t * rtxnp)2550 trans_rename_listkids(
2551 md_rendelta_t **dlpp,
2552 md_rentxn_t *rtxnp)
2553 {
2554 minor_t from_min, to_min, master_min, log_min;
2555 mt_unit_t *from_un;
2556 md_rendelta_t *new, *p;
2557 int n_children;
2558
2559 ASSERT(rtxnp);
2560 ASSERT(dlpp);
2561 ASSERT((rtxnp->op == MDRNOP_EXCHANGE) || (rtxnp->op == MDRNOP_RENAME));
2562
2563 from_min = rtxnp->from.mnum;
2564 to_min = rtxnp->to.mnum;
2565 n_children = 0;
2566
2567 if (!MDI_UNIT(from_min) || !(from_un = MD_UNIT(from_min))) {
2568 (void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP, from_min);
2569 return (-1);
2570 }
2571
2572 for (p = *dlpp; p && p->next != NULL; p = p->next) {
2573 /* NULL */
2574 }
2575
2576 if (md_getmajor(from_un->un_m_dev) == md_major) {
2577
2578 master_min = md_getminor(from_un->un_m_dev);
2579
2580 p = new = md_build_rendelta(MDRR_CHILD,
2581 to_min == master_min? MDRR_SELF: MDRR_CHILD,
2582 from_un->un_m_dev, p, MD_UNIT(master_min),
2583 MDI_UNIT(master_min), &rtxnp->mde);
2584
2585 if (!new) {
2586 if (mdisok(&rtxnp->mde)) {
2587 (void) mdsyserror(&rtxnp->mde, ENOMEM);
2588 }
2589 return (-1);
2590 }
2591 ++n_children;
2592 }
2593
2594 if (from_un->un_l_unit &&
2595 (md_getmajor(from_un->un_l_dev) == md_major)) {
2596
2597 log_min = md_getminor(from_un->un_l_dev);
2598
2599 new = md_build_rendelta(MDRR_CHILD,
2600 to_min == log_min? MDRR_SELF: MDRR_CHILD,
2601 from_un->un_l_dev, p, MD_UNIT(log_min),
2602 MDI_UNIT(log_min), &rtxnp->mde);
2603 if (!new) {
2604 if (mdisok(&rtxnp->mde)) {
2605 (void) mdsyserror(&rtxnp->mde, ENOMEM);
2606 }
2607 return (-1);
2608 }
2609 ++n_children;
2610 }
2611
2612 return (n_children);
2613 }
2614
2615 /*
2616 * support routine for MDRNM_CHECK
2617 */
2618 static int
trans_may_renexch_self(mt_unit_t * un,mdi_unit_t * ui,md_rentxn_t * rtxnp)2619 trans_may_renexch_self(
2620 mt_unit_t *un,
2621 mdi_unit_t *ui,
2622 md_rentxn_t *rtxnp)
2623 {
2624 minor_t from_min;
2625 minor_t to_min;
2626
2627 ASSERT(rtxnp);
2628 ASSERT((rtxnp->op == MDRNOP_RENAME) || (rtxnp->op == MDRNOP_EXCHANGE));
2629
2630 from_min = rtxnp->from.mnum;
2631 to_min = rtxnp->to.mnum;
2632
2633 if (!un || !ui) {
2634 (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR,
2635 from_min);
2636 return (EINVAL);
2637 }
2638
2639 ASSERT(MD_CAPAB(un) & MD_CAN_META_CHILD);
2640
2641 if (!(MD_CAPAB(un) & MD_CAN_META_CHILD)) {
2642 (void) mdmderror(&rtxnp->mde, MDE_RENAME_SOURCE_BAD, from_min);
2643 return (EINVAL);
2644 }
2645
2646 if (MD_PARENT(un) == MD_MULTI_PARENT) {
2647 (void) mdmderror(&rtxnp->mde, MDE_RENAME_SOURCE_BAD, from_min);
2648 return (EINVAL);
2649 }
2650
2651 switch (rtxnp->op) {
2652 case MDRNOP_EXCHANGE:
2653 /*
2654 * may only swap with our child (master) if it is a metadevice
2655 */
2656 if (md_getmajor(un->un_m_dev) != md_major) {
2657 (void) mdmderror(&rtxnp->mde, MDE_RENAME_TARGET_BAD,
2658 to_min);
2659 return (EINVAL);
2660 }
2661
2662 if (un->un_l_unit &&
2663 (md_getmajor(un->un_l_dev) != md_major)) {
2664
2665 (void) mdmderror(&rtxnp->mde, MDE_RENAME_TARGET_BAD,
2666 to_min);
2667 return (EINVAL);
2668 }
2669
2670 if (md_getminor(un->un_m_dev) != to_min) {
2671 (void) mdmderror(&rtxnp->mde, MDE_RENAME_TARGET_BAD,
2672 to_min);
2673 return (EINVAL);
2674 }
2675
2676 break;
2677
2678 case MDRNOP_RENAME:
2679 break;
2680
2681 default:
2682 (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR,
2683 from_min);
2684 return (EINVAL);
2685 }
2686
2687 return (0); /* ok */
2688 }
2689
2690 /*
2691 * Named service entry point: MDRNM_CHECK
2692 */
2693 intptr_t
trans_rename_check(md_rendelta_t * delta,md_rentxn_t * rtxnp)2694 trans_rename_check(
2695 md_rendelta_t *delta,
2696 md_rentxn_t *rtxnp)
2697 {
2698 int err = 0;
2699 mt_unit_t *un;
2700
2701 ASSERT(delta);
2702 ASSERT(rtxnp);
2703 ASSERT(delta->unp);
2704 ASSERT(delta->uip);
2705 ASSERT((rtxnp->op == MDRNOP_RENAME) || (rtxnp->op == MDRNOP_EXCHANGE));
2706
2707 if (!delta || !rtxnp || !delta->unp || !delta->uip) {
2708 (void) mdsyserror(&rtxnp->mde, EINVAL);
2709 return (EINVAL);
2710 }
2711
2712 un = (mt_unit_t *)delta->unp;
2713
2714 if (rtxnp->revision == MD_RENAME_VERSION_OFFLINE) {
2715 /*
2716 * trans' may not be open, if it is being modified in the exchange
2717 * or rename; trans-UFS hasn't been verified to handle the change
2718 * out from underneath it.
2719 */
2720 if ((md_unit_isopen(delta->uip)) &&
2721 ((md_getminor(delta->dev) == rtxnp->from.mnum) ||
2722 (md_getminor(delta->dev) == rtxnp->to.mnum))) {
2723 (void) mdmderror(&rtxnp->mde,
2724 MDE_RENAME_BUSY, rtxnp->from.mnum);
2725 return (EBUSY);
2726 }
2727 }
2728
2729 /*
2730 * can't rename or exchange with a log attached
2731 */
2732
2733 if (un->un_l_unit) {
2734 (void) mdmderror(&rtxnp->mde,
2735 MDE_RENAME_BUSY, rtxnp->from.mnum);
2736 return (EBUSY);
2737 }
2738
2739 switch (delta->old_role) {
2740 case MDRR_SELF:
2741 /*
2742 * self does additional checks
2743 */
2744 err = trans_may_renexch_self((mt_unit_t *)delta->unp,
2745 delta->uip, rtxnp);
2746 if (err != 0) {
2747 goto out;
2748 }
2749 /* FALLTHROUGH */
2750
2751 case MDRR_PARENT:
2752 /*
2753 * top_is_trans is only used to check for online
2754 * rename/exchange when MD_RENAME_VERSION == OFFLINE
2755 * since trans holds the sub-devices open
2756 */
2757 rtxnp->stat.trans_in_stack = TRUE;
2758 break;
2759 default:
2760 break;
2761 }
2762 out:
2763 return (err);
2764 }
2765
2766 /* end of rename/exchange */
2767