1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #include <sys/param.h>
28 #include <sys/systm.h>
29 #include <sys/conf.h>
30 #include <sys/file.h>
31 #include <sys/user.h>
32 #include <sys/uio.h>
33 #include <sys/t_lock.h>
34 #include <sys/buf.h>
35 #include <sys/dkio.h>
36 #include <sys/vtoc.h>
37 #include <sys/kmem.h>
38 #include <vm/page.h>
39 #include <sys/sysmacros.h>
40 #include <sys/types.h>
41 #include <sys/mkdev.h>
42 #include <sys/stat.h>
43 #include <sys/open.h>
44 #include <sys/modctl.h>
45 #include <sys/ddi.h>
46 #include <sys/sunddi.h>
47
48 #include <sys/lvm/mdvar.h>
49 #include <sys/lvm/md_names.h>
50 #include <sys/lvm/md_mddb.h>
51 #include <sys/lvm/md_stripe.h>
52 #include <sys/lvm/md_mirror.h>
53
54 #include <sys/model.h>
55
56 #include <sys/sysevent/eventdefs.h>
57 #include <sys/sysevent/svm.h>
58 #include <sys/lvm/mdmn_commd.h>
59
60 extern int md_status;
61 extern kmutex_t md_mx;
62 extern kcondvar_t md_cv;
63
64 extern unit_t md_nunits;
65 extern set_t md_nsets;
66 extern md_set_t md_set[];
67
68 extern md_ops_t mirror_md_ops;
69 extern int md_ioctl_cnt;
70 extern md_krwlock_t md_unit_array_rw;
71 extern major_t md_major;
72 extern mdq_anchor_t md_ff_daemonq;
73 extern void md_probe_one();
74 extern void mirror_openfail_console_info();
75
76 #ifdef DEBUG
77 extern int mirror_debug_flag;
78 #endif
79
80 static void
mirror_resume_writes(mm_unit_t * un)81 mirror_resume_writes(mm_unit_t *un)
82 {
83 /*
84 * Release the block on writes to the mirror and resume any blocked
85 * resync thread.
86 * This is only required for MN sets
87 */
88 if (MD_MNSET_SETNO(MD_UN2SET(un))) {
89 #ifdef DEBUG
90 if (mirror_debug_flag)
91 printf("mirror_resume_writes: mnum %x\n", MD_SID(un));
92 #endif
93 mutex_enter(&un->un_suspend_wr_mx);
94 un->un_suspend_wr_flag = 0;
95 cv_broadcast(&un->un_suspend_wr_cv);
96 mutex_exit(&un->un_suspend_wr_mx);
97 mutex_enter(&un->un_rs_thread_mx);
98 un->un_rs_thread_flags &= ~MD_RI_BLOCK;
99 cv_signal(&un->un_rs_thread_cv);
100 mutex_exit(&un->un_rs_thread_mx);
101 }
102 }
103
104 mm_unit_t *
mirror_getun(minor_t mnum,md_error_t * mde,int flags,IOLOCK * lock)105 mirror_getun(minor_t mnum, md_error_t *mde, int flags, IOLOCK *lock)
106 {
107 mm_unit_t *un;
108 mdi_unit_t *ui;
109 set_t setno = MD_MIN2SET(mnum);
110
111 if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits)) {
112 (void) mdmderror(mde, MDE_INVAL_UNIT, mnum);
113 return (NULL);
114 }
115
116 if (!(flags & STALE_OK)) {
117 if (md_get_setstatus(setno) & MD_SET_STALE) {
118 (void) mdmddberror(mde, MDE_DB_STALE, mnum, setno);
119 return (NULL);
120 }
121 }
122
123 ui = MDI_UNIT(mnum);
124 if (flags & NO_OLD) {
125 if (ui != NULL) {
126 (void) mdmderror(mde, MDE_UNIT_ALREADY_SETUP, mnum);
127 return (NULL);
128 }
129 return ((mm_unit_t *)1);
130 }
131
132 if (ui == NULL) {
133 (void) mdmderror(mde, MDE_UNIT_NOT_SETUP, mnum);
134 return (NULL);
135 }
136
137 if (flags & ARRAY_WRITER)
138 md_array_writer(lock);
139 else if (flags & ARRAY_READER)
140 md_array_reader(lock);
141
142 if (!(flags & NO_LOCK)) {
143 if (flags & WR_LOCK)
144 (void) md_ioctl_writerlock(lock, ui);
145 else /* RD_LOCK */
146 (void) md_ioctl_readerlock(lock, ui);
147 }
148 un = (mm_unit_t *)MD_UNIT(mnum);
149
150 if (un->c.un_type != MD_METAMIRROR) {
151 (void) mdmderror(mde, MDE_NOT_MM, mnum);
152 return (NULL);
153 }
154
155 return (un);
156 }
157
158 static int
mirror_set(void * d,int mode)159 mirror_set(
160 void *d,
161 int mode
162 )
163 {
164 minor_t mnum;
165 mm_unit_t *un;
166 mddb_recid_t recid;
167 mddb_type_t typ1;
168 int err;
169 int i;
170 set_t setno;
171 md_set_params_t *msp = d;
172
173
174 mnum = msp->mnum;
175
176 mdclrerror(&msp->mde);
177
178 if (mirror_getun(mnum, &msp->mde, NO_OLD, NULL) == NULL)
179 return (0);
180
181 setno = MD_MIN2SET(mnum);
182
183 typ1 = (mddb_type_t)md_getshared_key(setno,
184 mirror_md_ops.md_driver.md_drivername);
185
186 /*
187 * Create the db record for this mdstruct
188 * We don't store incore elements ondisk
189 */
190
191 if (msp->options & MD_CRO_64BIT) {
192 #if defined(_ILP32)
193 return (mdmderror(&msp->mde, MDE_UNIT_TOO_LARGE, mnum));
194 #else
195 recid = mddb_createrec((size_t)msp->size, typ1, MIRROR_REC,
196 MD_CRO_64BIT | MD_CRO_MIRROR | MD_CRO_FN, setno);
197 #endif
198 } else {
199 /*
200 * It's important to use the correct size here
201 */
202 msp->size = sizeof (mm_unit32_od_t);
203 recid = mddb_createrec((size_t)msp->size, typ1, MIRROR_REC,
204 MD_CRO_32BIT | MD_CRO_MIRROR | MD_CRO_FN, setno);
205 }
206 if (recid < 0)
207 return (mddbstatus2error(&msp->mde, (int)recid,
208 mnum, setno));
209
210 /* Resize to include incore fields */
211 un = (mm_unit_t *)mddb_getrecaddr_resize(recid, sizeof (*un), 0);
212 /*
213 * It is okay that we muck with the mdstruct here,
214 * since no one else will know about the mdstruct
215 * until we commit it. If we crash, the record will
216 * be automatically purged, since we haven't
217 * committed it yet.
218 */
219
220 /* copy in the user's mdstruct */
221 if (err = ddi_copyin((caddr_t)(uintptr_t)msp->mdp, un,
222 (uint_t)msp->size, mode)) {
223 mddb_deleterec_wrapper(recid);
224 return (EFAULT);
225 }
226 /* All 64 bit metadevices only support EFI labels. */
227 if (msp->options & MD_CRO_64BIT) {
228 un->c.un_flag |= MD_EFILABEL;
229 }
230
231 un->c.un_revision |= MD_FN_META_DEV;
232 MD_RECID(un) = recid;
233 MD_CAPAB(un) = MD_CAN_PARENT | MD_CAN_META_CHILD | MD_CAN_SP;
234 MD_PARENT(un) = MD_NO_PARENT;
235
236 for (i = 0; i < NMIRROR; i++) {
237 struct mm_submirror *sm;
238
239 sm = &un->un_sm[i];
240 if (!SMS_IS(sm, SMS_INUSE))
241 continue;
242
243 /* ensure that the submirror is a metadevice */
244 if (md_getmajor(sm->sm_dev) != md_major)
245 return (mdmderror(&msp->mde, MDE_INVAL_UNIT,
246 md_getminor(sm->sm_dev)));
247
248 if (md_get_parent(sm->sm_dev) == MD_NO_PARENT)
249 continue;
250
251 /* mirror creation should fail here */
252 md_nblocks_set(mnum, -1ULL);
253 MD_UNIT(mnum) = NULL;
254
255 mddb_deleterec_wrapper(recid);
256 return (mdmderror(&msp->mde, MDE_IN_USE,
257 md_getminor(sm->sm_dev)));
258 }
259
260 if (err = mirror_build_incore(un, 0)) {
261 md_nblocks_set(mnum, -1ULL);
262 MD_UNIT(mnum) = NULL;
263
264 mddb_deleterec_wrapper(recid);
265 return (err);
266 }
267
268 /*
269 * Update unit availability
270 */
271 md_set[setno].s_un_avail--;
272
273 mirror_commit(un, ALL_SUBMIRRORS, 0);
274 md_create_unit_incore(MD_SID(un), &mirror_md_ops, 0);
275 mirror_check_failfast(mnum);
276 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_CREATE, SVM_TAG_METADEVICE, setno,
277 MD_SID(un));
278
279 resync_start_timeout(setno);
280 return (0);
281 }
282
283 static int
mirror_get(void * migp,int mode,IOLOCK * lock)284 mirror_get(
285 void *migp,
286 int mode,
287 IOLOCK *lock
288 )
289 {
290 mm_unit_t *un;
291 md_i_get_t *migph = migp;
292
293 mdclrerror(&migph->mde);
294
295 if ((un = mirror_getun(migph->id, &migph->mde, RD_LOCK, lock)) == NULL)
296 return (0);
297
298 if (migph->size == 0) {
299 migph->size = un->c.un_size;
300 return (0);
301 }
302
303 if (migph->size < un->c.un_size) {
304 return (EFAULT);
305 }
306 if (ddi_copyout(un, (caddr_t)(uintptr_t)migph->mdp,
307 un->c.un_size, mode))
308 return (EFAULT);
309 return (0);
310 }
311
312 static int
mirror_getdevs(void * mgdp,int mode,IOLOCK * lock)313 mirror_getdevs(
314 void *mgdp,
315 int mode,
316 IOLOCK *lock
317 )
318 {
319 mm_unit_t *un;
320 md_dev64_t *udevs;
321 int cnt;
322 int i;
323 md_dev64_t unit_dev;
324 md_getdevs_params_t *mgdph = mgdp;
325
326
327 mdclrerror(&mgdph->mde);
328
329 if ((un = mirror_getun(mgdph->mnum,
330 &mgdph->mde, RD_LOCK, lock)) == NULL)
331 return (0);
332
333 udevs = (md_dev64_t *)(uintptr_t)mgdph->devs;
334
335 for (cnt = 0, i = 0; i < NMIRROR; i++) {
336 if (!SMS_BY_INDEX_IS(un, i, SMS_INUSE))
337 continue;
338 if (cnt < mgdph->cnt) {
339 unit_dev = un->un_sm[i].sm_dev;
340 if (md_getmajor(unit_dev) != md_major) {
341 unit_dev = md_xlate_mini_2_targ(unit_dev);
342 if (unit_dev == NODEV64)
343 return (ENODEV);
344 }
345
346 if (ddi_copyout((caddr_t)&unit_dev, (caddr_t)udevs,
347 sizeof (*udevs), mode) != 0)
348 return (EFAULT);
349 ++udevs;
350 }
351 ++cnt;
352 }
353
354 mgdph->cnt = cnt;
355 return (0);
356 }
357
358 static int
mirror_reset(md_i_reset_t * mirp)359 mirror_reset(
360 md_i_reset_t *mirp
361 )
362 {
363 minor_t mnum = mirp->mnum;
364 mm_unit_t *un;
365 mdi_unit_t *ui;
366 set_t setno = MD_MIN2SET(mnum);
367
368 mdclrerror(&mirp->mde);
369
370 if ((un = mirror_getun(mnum, &mirp->mde, NO_LOCK, NULL)) == NULL)
371 return (0);
372
373 if (MD_HAS_PARENT(un->c.un_parent)) {
374 return (mdmderror(&mirp->mde, MDE_IN_USE, mnum));
375 }
376
377 rw_enter(&md_unit_array_rw.lock, RW_WRITER);
378
379 /* single thread */
380 ui = MDI_UNIT(mnum);
381 (void) md_unit_openclose_enter(ui);
382
383 if (md_unit_isopen(ui)) {
384 md_unit_openclose_exit(ui);
385 rw_exit(&md_unit_array_rw.lock);
386 return (mdmderror(&mirp->mde, MDE_IS_OPEN, mnum));
387 }
388
389 md_unit_openclose_exit(ui);
390
391 if (!mirp->force) {
392 int smi;
393 for (smi = 0; smi < NMIRROR; smi++) {
394 if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE))
395 continue;
396
397 if (!SMS_BY_INDEX_IS(un, smi, SMS_RUNNING)) {
398 rw_exit(&md_unit_array_rw.lock);
399 return (mdmderror(&mirp->mde,
400 MDE_C_WITH_INVAL_SM, mnum));
401 }
402 }
403 }
404
405 reset_mirror(un, mnum, 1);
406
407 /*
408 * Update unit availability
409 */
410 md_set[setno].s_un_avail++;
411
412 /*
413 * If MN set, reset s_un_next so all nodes can have
414 * the same view of the next available slot when
415 * nodes are -w and -j
416 */
417 if (MD_MNSET_SETNO(setno)) {
418 (void) md_upd_set_unnext(setno, MD_MIN2UNIT(mnum));
419 }
420
421 rw_exit(&md_unit_array_rw.lock);
422 return (0);
423 }
424
425 static int
mirror_get_geom(mm_unit_t * un,struct dk_geom * geomp)426 mirror_get_geom(
427 mm_unit_t *un,
428 struct dk_geom *geomp
429 )
430 {
431 md_get_geom((md_unit_t *)un, geomp);
432
433 return (0);
434 }
435
436 static int
mirror_get_vtoc(mm_unit_t * un,struct vtoc * vtocp)437 mirror_get_vtoc(
438 mm_unit_t *un,
439 struct vtoc *vtocp
440 )
441 {
442 md_get_vtoc((md_unit_t *)un, vtocp);
443
444 return (0);
445 }
446
447 static int
mirror_set_vtoc(mm_unit_t * un,struct vtoc * vtocp)448 mirror_set_vtoc(
449 mm_unit_t *un,
450 struct vtoc *vtocp
451 )
452 {
453 return (md_set_vtoc((md_unit_t *)un, vtocp));
454 }
455
456 static int
mirror_get_extvtoc(mm_unit_t * un,struct extvtoc * vtocp)457 mirror_get_extvtoc(
458 mm_unit_t *un,
459 struct extvtoc *vtocp
460 )
461 {
462 md_get_extvtoc((md_unit_t *)un, vtocp);
463
464 return (0);
465 }
466
467 static int
mirror_set_extvtoc(mm_unit_t * un,struct extvtoc * vtocp)468 mirror_set_extvtoc(
469 mm_unit_t *un,
470 struct extvtoc *vtocp
471 )
472 {
473 return (md_set_extvtoc((md_unit_t *)un, vtocp));
474 }
475
476 static int
mirror_get_cgapart(mm_unit_t * un,struct dk_map * dkmapp)477 mirror_get_cgapart(
478 mm_unit_t *un,
479 struct dk_map *dkmapp
480 )
481 {
482 md_get_cgapart((md_unit_t *)un, dkmapp);
483 return (0);
484 }
485
486 static int
mirror_getcomp_by_dev(mm_unit_t * un,replace_params_t * params,int * smi,int * cip)487 mirror_getcomp_by_dev(mm_unit_t *un, replace_params_t *params,
488 int *smi, int *cip)
489 {
490 mm_submirror_t *sm;
491 mm_submirror_ic_t *smic;
492 ms_comp_t *comp;
493 ms_unit_t *mous;
494 int ci;
495 int i;
496 int compcnt;
497 ms_cd_info_t cd;
498 void (*get_dev)();
499 md_dev64_t dev = md_expldev(params->old_dev);
500 md_error_t *ep = ¶ms->mde;
501 minor_t mnum = params->mnum;
502 mdkey_t devkey;
503 int nkeys;
504 set_t setno;
505 side_t side;
506
507 setno = MD_MIN2SET(MD_SID(un));
508 side = mddb_getsidenum(setno);
509
510 if (md_getkeyfromdev(setno, side, dev, &devkey, &nkeys) != 0)
511 return (mddeverror(ep, MDE_NAME_SPACE, dev));
512
513 for (i = 0; i < NMIRROR; i++) {
514 sm = &un->un_sm[i];
515 smic = &un->un_smic[i];
516
517 if (!SMS_IS(sm, SMS_INUSE))
518 continue;
519
520 get_dev =
521 (void (*)())md_get_named_service(sm->sm_dev, 0,
522 "get device", 0);
523 compcnt = (*(smic->sm_get_component_count))(sm->sm_dev, un);
524
525 /*
526 * For each of the underlying stripe components get
527 * the info.
528 */
529 for (ci = 0; ci < compcnt; ci++) {
530 (void) (*get_dev)(sm->sm_dev, sm, ci, &cd);
531 if ((cd.cd_dev == dev) || (cd.cd_orig_dev == dev)) {
532 *cip = ci;
533 *smi = i;
534 return (1);
535 }
536 }
537
538 /*
539 * now we rescan looking only for NODEV. If we find
540 * NODEV then we will check the keys to see if its a match.
541 *
542 * If no key was found to match dev, then there is
543 * no way to compare keys - so continue.
544 */
545 if (nkeys == 0) {
546 continue;
547 }
548 mous = MD_UNIT(md_getminor(sm->sm_dev));
549
550 for (ci = 0; ci < compcnt; ci++) {
551
552 comp = (struct ms_comp *)
553 ((void *)&((char *)mous)[mous->un_ocomp]);
554
555 (void) (*get_dev)(sm->sm_dev, sm, ci, &cd);
556
557 if (cd.cd_dev == NODEV64 || cd.cd_orig_dev == NODEV64) {
558 comp += ci;
559 if (comp->un_key == devkey) {
560 if (nkeys > 1) {
561 return (mddeverror(
562 ep, MDE_MULTNM, dev));
563 }
564 *cip = ci;
565 *smi = i;
566 return (1);
567 }
568 }
569 }
570 }
571 return (mdcomperror(ep, MDE_CANT_FIND_COMP, mnum, dev));
572 }
573
574 /*
575 * comp_replace:
576 * ----------------
577 * Called to implement the component replace function
578 *
579 * Owner is returned in the parameter block passed in by the caller.
580 *
581 * Returns:
582 * 0 success
583 * error code if the functions fails
584 *
585 * For a MN set, on entry all writes to the mirror are suspended, on exit
586 * from this function, writes must be resumed when not a dryrun.
587 */
588 static int
comp_replace(replace_params_t * params,IOLOCK * lock)589 comp_replace(
590 replace_params_t *params,
591 IOLOCK *lock
592 )
593 {
594 minor_t mnum = params->mnum;
595 set_t setno;
596 side_t side;
597 mm_unit_t *un;
598 mdi_unit_t *ui;
599 ms_unit_t *ms_un;
600 mdi_unit_t *ms_ui;
601 ms_comp_t *comp;
602 mm_submirror_t *sm;
603 md_dev64_t smdev;
604 mddb_recid_t recids[6]; /* recids for stripe on SP */
605 int smi, ci;
606 ms_new_dev_t nd;
607 int (*repl_dev)();
608 void (*repl_done)();
609 void *repl_data;
610 int err = 0;
611 ms_cd_info_t cd;
612 void (*get_dev)();
613
614 mdclrerror(¶ms->mde);
615
616 if ((un = mirror_getun(mnum, ¶ms->mde, WRITERS, lock)) == NULL) {
617 return (0);
618 }
619
620 ui = MDI_UNIT(mnum);
621 if (ui->ui_tstate & MD_INACCESSIBLE) {
622 (void) mdmderror(¶ms->mde, MDE_IN_UNAVAIL_STATE, mnum);
623 goto errexit;
624 }
625
626 /*
627 * replace cannot be done while a resync is active or we are
628 * still waiting for an optimized resync to be started
629 */
630 if (MD_STATUS(un) & (MD_UN_RESYNC_ACTIVE | MD_UN_OPT_NOT_DONE)) {
631 (void) mdmderror(¶ms->mde, MDE_RESYNC_ACTIVE, mnum);
632 goto errexit;
633 }
634
635 if (mirror_getcomp_by_dev(un, params, &smi, &ci) == 0) {
636 goto errexit;
637 }
638
639 if (un->un_nsm == 1) {
640 (void) mdmderror(¶ms->mde, MDE_LAST_SM_RE, mnum);
641 goto errexit;
642 }
643
644 if (mirror_other_sources(un, smi, ci, 0) != 0) {
645 (void) mdcomperror(¶ms->mde, MDE_REPL_INVAL_STATE,
646 mnum, md_expldev(params->old_dev));
647 goto errexit;
648 }
649
650 sm = &un->un_sm[smi];
651 if (sm->sm_state & (SMS_OFFLINE | SMS_OFFLINE_RESYNC)) {
652 (void) mdmderror(¶ms->mde, MDE_ILLEGAL_SM_STATE, mnum);
653 goto errexit;
654 }
655
656 get_dev = (void (*)())md_get_named_service(sm->sm_dev, 0,
657 "get device", 0);
658 (void) (*get_dev)(sm->sm_dev, sm, ci, &cd);
659
660 repl_dev = (int (*)())md_get_named_service(sm->sm_dev, 0,
661 "replace device", 0);
662
663 smdev = sm->sm_dev;
664 ms_un = MD_UNIT(md_getminor(smdev));
665
666 if (params->cmd == ENABLE_COMP) {
667 md_dev64_t this_dev;
668 int numkeys;
669 mdkey_t this_key;
670
671 this_dev = ((cd.cd_orig_dev == 0) ? cd.cd_dev :
672 cd.cd_orig_dev);
673 setno = MD_MIN2SET(md_getminor(smdev));
674 side = mddb_getsidenum(setno);
675 comp = (struct ms_comp *)
676 ((void *)&((char *)ms_un)[ms_un->un_ocomp]);
677 comp += ci;
678 /*
679 * We trust the dev_t because we cannot determine the
680 * dev_t from the device id since a new disk is in the
681 * same location. Since this is a call from metareplace -e dx
682 * AND it is SCSI a new dev_t is not generated. So the
683 * dev_t from the mddb is used. Before enabling the device
684 * we check to make sure that multiple entries for the same
685 * device does not exist in the namespace. If they do we
686 * fail the ioctl.
687 * One of the many ways multiple entries in the name space
688 * can occur is if one removed the failed component in the
689 * stripe of a mirror and put another disk that was part of
690 * another metadevice. After reboot metadevadm would correctly
691 * update the device name for the metadevice whose component
692 * has moved. However now in the metadb there are two entries
693 * for the same name (ctds) that belong to different
694 * metadevices. One is valid, the other is a ghost or "last
695 * know as" ctds.
696 */
697 this_dev = md_getdevnum(setno, side,
698 comp->un_key, MD_TRUST_DEVT);
699
700 /*
701 * Verify that multiple keys for the same
702 * dev_t don't exist
703 */
704
705 if (md_getkeyfromdev(setno, side, this_dev,
706 &this_key, &numkeys) != 0) {
707 (void) mddeverror(¶ms->mde, MDE_NAME_SPACE,
708 md_expldev(params->old_dev));
709 goto errexit;
710 }
711 /*
712 * Namespace has multiple entries
713 * for the same devt
714 */
715 if (numkeys > 1) {
716 (void) mddeverror(¶ms->mde, MDE_MULTNM,
717 md_expldev(params->old_dev));
718 goto errexit;
719 }
720 if ((numkeys == 0) || (comp->un_key != this_key)) {
721 (void) mdcomperror(¶ms->mde, MDE_CANT_FIND_COMP,
722 mnum, this_dev);
723 goto errexit;
724 }
725
726 if ((md_getmajor(this_dev) != md_major) &&
727 (md_devid_found(setno, side, this_key) == 1)) {
728 if (md_update_namespace_did(setno, side,
729 this_key, ¶ms->mde) != 0) {
730 (void) mddeverror(¶ms->mde, MDE_NAME_SPACE,
731 this_dev);
732 goto errexit;
733 }
734 }
735
736 if (md_expldev(params->new_dev) != this_dev) {
737 (void) mddeverror(¶ms->mde, MDE_FIX_INVAL_STATE,
738 md_expldev(params->new_dev));
739 goto errexit;
740 }
741
742 /* in case of dryrun, don't actually do anything */
743 if ((params->options & MDIOCTL_DRYRUN) == 0) {
744 err = (*repl_dev)(sm->sm_dev, 0, ci, NULL, recids, 6,
745 &repl_done, &repl_data);
746 }
747 } else if ((params->options & MDIOCTL_DRYRUN) == 0) {
748 nd.nd_dev = md_expldev(params->new_dev);
749 nd.nd_key = params->new_key;
750 nd.nd_start_blk = params->start_blk;
751 nd.nd_nblks = params->number_blks;
752 nd.nd_labeled = params->has_label;
753 nd.nd_hs_id = 0;
754
755 err = (*repl_dev)(sm->sm_dev, 0, ci, &nd, recids, 6,
756 &repl_done, &repl_data);
757
758 }
759
760 if (err != 0) {
761 (void) mdcomperror(¶ms->mde, err, mnum,
762 md_expldev(params->new_dev));
763 goto errexit;
764 }
765 /* In case of a dryun we're done. */
766 if (params->options & MDIOCTL_DRYRUN) {
767 mdclrerror(¶ms->mde);
768 return (0);
769 }
770
771 /* set_sm_comp_state() commits the modified records */
772 set_sm_comp_state(un, smi, ci, CS_RESYNC, recids, MD_STATE_NO_XMIT,
773 lock);
774
775 (*repl_done)(sm->sm_dev, repl_data);
776
777 /*
778 * If the mirror is open then need to make sure that the submirror,
779 * on which the replace ran, is also open and if not then open it.
780 * This is only a concern for a single component sub-mirror stripe
781 * as it may not be open due to the failure of the single component.
782 *
783 * This check has to be done after the call to (*repl_done)
784 * as that function releases the writer lock on the submirror.
785 */
786 if (md_unit_isopen(ui)) {
787 minor_t ms_mnum = md_getminor(sm->sm_dev);
788
789 ms_ui = MDI_UNIT(ms_mnum);
790
791 if (!md_unit_isopen(ms_ui)) {
792 /*
793 * Underlying submirror is not open so open it.
794 */
795 if (md_layered_open(ms_mnum, &smdev, MD_OFLG_NULL)) {
796 mirror_openfail_console_info(un, smi, ci);
797 goto errexit;
798 }
799 }
800 }
801
802 mirror_check_failfast(mnum);
803
804 if (params->cmd == ENABLE_COMP) {
805 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ENABLE, SVM_TAG_METADEVICE,
806 MD_UN2SET(un), MD_SID(un));
807 } else {
808 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REPLACE, SVM_TAG_METADEVICE,
809 MD_UN2SET(un), MD_SID(un));
810 }
811
812 md_ioctl_writerexit(lock);
813 /*
814 * Reset any saved resync location flags as we've now replaced the
815 * component. This means we have to resync the _whole_ component.
816 */
817 un->un_rs_resync_done = un->un_rs_resync_2_do = 0;
818 un->un_rs_type = MD_RS_NONE;
819 mirror_resume_writes(un);
820 if (!MD_MNSET_SETNO(MD_UN2SET(un)))
821 (void) mirror_resync_unit(mnum, NULL, ¶ms->mde, lock);
822 mdclrerror(¶ms->mde);
823 return (0);
824 errexit:
825 /* We need to resume writes unless this is a dryrun */
826 if (!(params->options & MDIOCTL_DRYRUN))
827 mirror_resume_writes(un);
828 return (0);
829 }
830
831 /*
832 * mirror_attach:
833 * ----------------
834 * Called to implement the submirror attach function
835 *
836 * Owner is returned in the parameter block passed in by the caller.
837 *
838 * Returns:
839 * 0 success
840 * error code if the functions fails
841 *
842 * For a MN set, on entry all writes to the mirror are suspended, on exit
843 * from this function, writes must be resumed when not a dryrun.
844 */
845 static int
mirror_attach(md_att_struct_t * att,IOLOCK * lock)846 mirror_attach(
847 md_att_struct_t *att,
848 IOLOCK *lock
849 )
850 {
851 minor_t mnum = att->mnum;
852 mm_unit_t *un;
853 md_unit_t *su;
854 mm_submirror_t *sm;
855 mm_submirror_ic_t *smic;
856 int smi;
857 md_dev64_t sm_dev;
858 minor_t sm_mnum;
859 mdkey_t indx;
860 set_t setno;
861 uint_t options;
862
863 /*
864 * This routine should not be called during upgrade.
865 */
866 if (MD_UPGRADE) {
867 return (0);
868 }
869
870 mdclrerror(&att->mde);
871 options = att->options;
872
873 if ((un = mirror_getun(mnum, &att->mde, WRITERS, lock)) == NULL) {
874 return (0);
875 }
876
877 setno = MD_UN2SET(un);
878
879 for (smi = 0; smi < NMIRROR; smi++)
880 if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE))
881 break;
882
883 if (smi == NMIRROR) {
884 (void) mdmderror(&att->mde, MDE_MIRROR_FULL, mnum);
885 goto errexit;
886 }
887
888 sm = &un->un_sm[smi];
889 smic = &un->un_smic[smi];
890 sm_dev = att->submirror;
891 sm_mnum = md_getminor(sm_dev);
892
893 if (md_get_parent(sm_dev) != MD_NO_PARENT) {
894 (void) mdmderror(&att->mde, MDE_IN_USE, sm_mnum);
895 goto errexit;
896 }
897
898 if (md_unit_isopen(MDI_UNIT(sm_mnum))) {
899 (void) mdmderror(&att->mde, MDE_IS_OPEN, sm_mnum);
900 goto errexit;
901 }
902
903 /* Check the size */
904 su = (md_unit_t *)MD_UNIT(sm_mnum);
905 if (un->c.un_total_blocks > su->c.un_total_blocks) {
906 (void) mdmderror(&att->mde, MDE_SM_TOO_SMALL, sm_mnum);
907 goto errexit;
908 }
909
910 /* Don't attach labeled sm to unlabeled mirrors */
911 if ((su->c.un_flag & MD_LABELED) && !(un->c.un_flag & MD_LABELED)) {
912 (void) mdmderror(&att->mde, MDE_NO_LABELED_SM, sm_mnum);
913 goto errexit;
914 }
915
916 indx = md_setshared_name(setno,
917 ddi_major_to_name(md_getmajor(sm_dev)), 0L);
918
919 /* Open the sm, only if the mirror is open */
920 if (md_unit_isopen(MDI_UNIT(mnum))) {
921 if (md_layered_open(mnum, &sm_dev, MD_OFLG_NULL)) {
922 (void) md_remshared_name(setno, indx);
923 (void) mdmderror(&att->mde, MDE_SM_OPEN_ERR,
924 md_getminor(att->submirror));
925 goto errexit;
926 }
927 /* in dryrun mode, don't leave the device open */
928 if (options & MDIOCTL_DRYRUN) {
929 md_layered_close(sm_dev, MD_OFLG_NULL);
930 }
931 }
932
933 /*
934 * After this point the checks are done and action is taken.
935 * So, clean up and return in case of dryrun.
936 */
937
938 if (options & MDIOCTL_DRYRUN) {
939 md_ioctl_writerexit(lock);
940 mdclrerror(&att->mde);
941 return (0);
942 }
943
944 sm->sm_key = att->key;
945 sm->sm_dev = sm_dev;
946 md_set_parent(sm_dev, MD_SID(un));
947 mirror_set_sm_state(sm, smic, SMS_ATTACHED_RESYNC, 1);
948 build_submirror(un, smi, 0);
949 un->un_nsm++;
950 mirror_commit(un, SMI2BIT(smi), 0);
951 mirror_check_failfast(mnum);
952 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_ATTACH, SVM_TAG_METADEVICE,
953 MD_UN2SET(un), MD_SID(un));
954
955 mirror_resume_writes(un);
956 md_ioctl_writerexit(lock);
957 if (!MD_MNSET_SETNO(setno))
958 (void) mirror_resync_unit(mnum, NULL, &att->mde, lock);
959 mdclrerror(&att->mde);
960 return (0);
961 errexit:
962 /* We need to resume writes unless this is a dryrun */
963 if (!(options & MDIOCTL_DRYRUN))
964 mirror_resume_writes(un);
965 return (0);
966 }
967
968
969 void
reset_comp_states(mm_submirror_t * sm,mm_submirror_ic_t * smic)970 reset_comp_states(mm_submirror_t *sm, mm_submirror_ic_t *smic)
971 {
972 int compcnt;
973 int i;
974 md_m_shared_t *shared;
975
976 compcnt = (*(smic->sm_get_component_count)) (sm->sm_dev, sm);
977 for (i = 0; i < compcnt; i++) {
978 shared = (md_m_shared_t *)(*(smic->sm_shared_by_indx))
979 (sm->sm_dev, sm, i);
980
981 shared->ms_state = CS_OKAY;
982 shared->ms_flags &= ~MDM_S_NOWRITE;
983 shared->ms_lasterrcnt = 0;
984 }
985 }
986
987
988 /*
989 * mirror_detach:
990 * ----------------
991 * Called to implement the submirror detach function
992 *
993 * Owner is returned in the parameter block passed in by the caller.
994 *
995 * Returns:
996 * 0 success
997 * error code if the functions fails
998 *
999 * For a MN set, on entry all writes to the mirror are suspended, on exit
1000 * from this function, writes must be resumed.
1001 */
1002 static int
mirror_detach(md_detach_params_t * det,IOLOCK * lock)1003 mirror_detach(
1004 md_detach_params_t *det,
1005 IOLOCK *lock
1006 )
1007 {
1008 minor_t mnum = det->mnum;
1009 mm_unit_t *un;
1010 mdi_unit_t *ui;
1011 mm_submirror_t *sm;
1012 mm_submirror_t *old_sm;
1013 mm_submirror_t *new_sm;
1014 mm_submirror_ic_t *smic;
1015 int smi;
1016 md_dev64_t sm_dev;
1017 md_unit_t *su;
1018 sv_dev_t sv;
1019 mddb_recid_t recids[2];
1020 int nsv = 0;
1021 int smi_remove;
1022 mm_submirror_ic_t *old_smic;
1023 mm_submirror_ic_t *new_smic;
1024
1025 mdclrerror(&det->mde);
1026
1027 if ((un = mirror_getun(mnum, &det->mde, WRITERS, lock)) == NULL) {
1028 return (0);
1029 }
1030
1031 ui = MDI_UNIT(mnum);
1032 if (ui->ui_tstate & MD_INACCESSIBLE) {
1033 mirror_resume_writes(un);
1034 return (mdmderror(&det->mde, MDE_IN_UNAVAIL_STATE, mnum));
1035 }
1036 /*
1037 * detach cannot be done while a resync is active or we are
1038 * still waiting for an optimized resync to be started
1039 */
1040 if (MD_STATUS(un) & (MD_UN_RESYNC_ACTIVE | MD_UN_OPT_NOT_DONE)) {
1041 mirror_resume_writes(un);
1042 return (mdmderror(&det->mde, MDE_RESYNC_ACTIVE, mnum));
1043 }
1044
1045 for (smi = 0; smi < NMIRROR; smi++) {
1046 if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) {
1047 continue;
1048 }
1049 if (un->un_sm[smi].sm_dev == det->submirror) {
1050 smi_remove = smi;
1051 break;
1052 }
1053 }
1054
1055 if (smi == NMIRROR) {
1056 mirror_resume_writes(un);
1057 return (mdmderror(&det->mde, MDE_CANT_FIND_SM, mnum));
1058 }
1059
1060 if (un->un_nsm == 1) {
1061 mirror_resume_writes(un);
1062 return (mdmderror(&det->mde, MDE_LAST_SM, mnum));
1063 }
1064
1065 if (mirror_other_sources(un, smi, WHOLE_SM, 0) != 0) {
1066 mirror_resume_writes(un);
1067 return (mdmderror(&det->mde, MDE_NO_READABLE_SM, mnum));
1068 }
1069
1070 sm = &un->un_sm[smi];
1071 smic = &un->un_smic[smi];
1072 sm_dev = sm->sm_dev;
1073 su = (md_unit_t *)MD_UNIT(md_getminor(sm_dev));
1074
1075 /*
1076 * Need to pass in the extra record id,
1077 * cause mirror_commit() will not commit
1078 * a sm (from the smmask) if the slot is unused.
1079 * Which it is, since we are detaching.
1080 */
1081 recids[0] = ((md_unit_t *)MD_UNIT(md_getminor(sm_dev)))->c.un_record_id;
1082 recids[1] = 0;
1083
1084 mirror_set_sm_state(sm, smic, SMS_UNUSED, det->force_detach);
1085 /*
1086 * If there are any erred components
1087 * then make the detach fail and do not unparent the
1088 * submirror.
1089 */
1090 if (sm->sm_state == SMS_UNUSED) {
1091 /* reallow soft partitioning of submirror */
1092 MD_CAPAB(su) |= MD_CAN_SP;
1093 md_reset_parent(sm_dev);
1094 reset_comp_states(sm, smic);
1095 un->un_nsm--;
1096 /* Close the sm, only if the mirror is open */
1097 if (md_unit_isopen(MDI_UNIT(mnum)))
1098 md_layered_close(sm_dev, MD_OFLG_NULL);
1099 sv.setno = MD_UN2SET(un);
1100 sv.key = sm->sm_key;
1101 nsv = 1;
1102 } else
1103 (void) mdmderror(&det->mde, MDE_SM_FAILED_COMPS, mnum);
1104
1105 /*
1106 * Perhaps the mirror changed it's size due to this detach.
1107 * (void) mirror_grow_unit(un, &mde);
1108 */
1109
1110 /*
1111 * NOTE: We are passing the detached sm recid
1112 * and not the smmask field. This is correct.
1113 */
1114 mirror_commit(un, 0, recids);
1115 md_rem_names(&sv, nsv);
1116 if (sm->sm_state == SMS_UNUSED) {
1117 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DETACH, SVM_TAG_METADEVICE,
1118 MD_UN2SET(un), MD_SID(un));
1119 }
1120
1121 /*
1122 * Reshuffle the submirror devices in the array as we potentially
1123 * have a dead record in the middle of it.
1124 */
1125 for (smi = 0; nsv && (smi < NMIRROR); smi++) {
1126 if (smi < smi_remove) {
1127 continue;
1128 }
1129 if (smi > smi_remove) {
1130 old_sm = &un->un_sm[smi];
1131 new_sm = &un->un_sm[smi - 1];
1132 new_sm->sm_key = old_sm->sm_key;
1133 new_sm->sm_dev = old_sm->sm_dev;
1134 new_sm->sm_state = old_sm->sm_state;
1135 new_sm->sm_flags = old_sm->sm_flags;
1136 new_sm->sm_shared = old_sm->sm_shared;
1137 new_sm->sm_hsp_id = old_sm->sm_hsp_id;
1138 new_sm->sm_timestamp = old_sm->sm_timestamp;
1139 bzero(old_sm, sizeof (mm_submirror_t));
1140 old_smic = &un->un_smic[smi];
1141 new_smic = &un->un_smic[smi - 1];
1142 bcopy(old_smic, new_smic, sizeof (mm_submirror_ic_t));
1143 bzero(old_smic, sizeof (mm_submirror_ic_t));
1144 }
1145 }
1146 mirror_commit(un, 0, NULL);
1147 mirror_resume_writes(un);
1148 return (0);
1149 }
1150
1151 /*
1152 * mirror_offline:
1153 * ----------------
1154 * Called to implement the submirror offline function
1155 *
1156 * Owner is returned in the parameter block passed in by the caller.
1157 *
1158 * Returns:
1159 * 0 success
1160 * error code if the functions fails
1161 *
1162 * For a MN set, on entry all writes to the mirror are suspended, on exit
1163 * from this function, writes must be resumed.
1164 */
1165 static int
mirror_offline(md_i_off_on_t * miop,IOLOCK * lock)1166 mirror_offline(
1167 md_i_off_on_t *miop,
1168 IOLOCK *lock
1169 )
1170 {
1171 minor_t mnum = miop->mnum;
1172 mm_unit_t *un;
1173 mm_submirror_t *sm;
1174 mm_submirror_ic_t *smic;
1175 int smi;
1176 mdi_unit_t *ui = MDI_UNIT(mnum);
1177
1178 mdclrerror(&miop->mde);
1179
1180 if ((un = mirror_getun(mnum, &miop->mde, WR_LOCK, lock)) == NULL) {
1181 return (0);
1182 }
1183
1184 /*
1185 * offline cannot be done while a resync is active or we are
1186 * still waiting for an optimized resync to be started
1187 */
1188 if (MD_STATUS(un) & (MD_UN_RESYNC_ACTIVE | MD_UN_OPT_NOT_DONE)) {
1189 mirror_resume_writes(un);
1190 return (mdmderror(&miop->mde, MDE_RESYNC_ACTIVE, mnum));
1191 }
1192
1193 /*
1194 * Reject mirror_offline if ABR is set
1195 */
1196 if ((ui->ui_tstate & MD_ABR_CAP) || un->un_abr_count) {
1197 mirror_resume_writes(un);
1198 return (mderror(&miop->mde, MDE_ABR_SET));
1199 }
1200
1201 for (smi = 0; smi < NMIRROR; smi++) {
1202 if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE))
1203 continue;
1204 if (un->un_sm[smi].sm_dev == miop->submirror)
1205 break;
1206 }
1207
1208 if (smi == NMIRROR) {
1209 mirror_resume_writes(un);
1210 return (mdmderror(&miop->mde, MDE_CANT_FIND_SM, mnum));
1211 }
1212
1213 sm = &un->un_sm[smi];
1214 smic = &un->un_smic[smi];
1215 if (!SMS_IS(sm, SMS_RUNNING) && !miop->force_offline) {
1216 mirror_resume_writes(un);
1217 return (mdmderror(&miop->mde, MDE_ILLEGAL_SM_STATE, mnum));
1218 }
1219
1220 if (mirror_other_sources(un, smi, WHOLE_SM, 0) != 0) {
1221 mirror_resume_writes(un);
1222 return (mdmderror(&miop->mde, MDE_NO_READABLE_SM, mnum));
1223 }
1224 mirror_set_sm_state(sm, smic, SMS_OFFLINE, 1);
1225 mirror_resume_writes(un);
1226
1227 MD_STATUS(un) |= MD_UN_OFFLINE_SM;
1228 mirror_commit(un, NO_SUBMIRRORS, 0);
1229 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_OFFLINE, SVM_TAG_METADEVICE,
1230 MD_UN2SET(un), MD_SID(un));
1231 return (0);
1232 }
1233
1234 /*
1235 * mirror_online:
1236 * ----------------
1237 * Called to implement the submirror online function
1238 *
1239 * Owner is returned in the parameter block passed in by the caller.
1240 *
1241 * Returns:
1242 * 0 success
1243 * error code if the functions fails
1244 *
1245 * For a MN set, on entry all writes to the mirror are suspended, on exit
1246 * from this function, writes must be resumed.
1247 */
1248 static int
mirror_online(md_i_off_on_t * miop,IOLOCK * lock)1249 mirror_online(
1250 md_i_off_on_t *miop,
1251 IOLOCK *lock
1252 )
1253 {
1254 minor_t mnum = miop->mnum;
1255 mm_unit_t *un;
1256 mm_submirror_t *sm;
1257 mm_submirror_ic_t *smic;
1258 int smi;
1259 set_t setno = MD_MIN2SET(mnum);
1260
1261 mdclrerror(&miop->mde);
1262
1263 if ((un = mirror_getun(mnum, &miop->mde, WR_LOCK, lock)) == NULL) {
1264 return (0);
1265 }
1266
1267 for (smi = 0; smi < NMIRROR; smi++) {
1268 if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE))
1269 continue;
1270 if (un->un_sm[smi].sm_dev == miop->submirror)
1271 break;
1272 }
1273 if (smi == NMIRROR) {
1274 mirror_resume_writes(un);
1275 return (mdmderror(&miop->mde, MDE_CANT_FIND_SM, mnum));
1276 }
1277
1278 sm = &un->un_sm[smi];
1279 smic = &un->un_smic[smi];
1280 if (!SMS_IS(sm, SMS_OFFLINE)) {
1281 mirror_resume_writes(un);
1282 return (mdmderror(&miop->mde, MDE_ILLEGAL_SM_STATE, mnum));
1283 }
1284
1285 /*
1286 * online cannot be done while a resync is active or we are
1287 * still waiting for an optimized resync to be started
1288 */
1289 if (MD_STATUS(un) & (MD_UN_RESYNC_ACTIVE | MD_UN_OPT_NOT_DONE)) {
1290 mirror_resume_writes(un);
1291 return (mdmderror(&miop->mde, MDE_RESYNC_ACTIVE, mnum));
1292 }
1293
1294 mirror_set_sm_state(sm, smic, SMS_OFFLINE_RESYNC, 1);
1295 mirror_commit(un, NO_SUBMIRRORS, 0);
1296 mirror_check_failfast(mnum);
1297 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ONLINE, SVM_TAG_METADEVICE,
1298 MD_UN2SET(un), MD_SID(un));
1299
1300
1301 /* for MN sets, re-read the resync record from disk */
1302 if (MD_MNSET_SETNO(MD_UN2SET(un)))
1303 (void) mddb_reread_rr(setno, un->un_rr_dirty_recid);
1304
1305 bcopy((caddr_t)un->un_dirty_bm, (caddr_t)un->un_resync_bm,
1306 howmany(un->un_rrd_num, NBBY));
1307 MD_STATUS(un) |= MD_UN_OPT_NOT_DONE;
1308 sm->sm_flags |= MD_SM_RESYNC_TARGET;
1309 mirror_resume_writes(un);
1310 md_ioctl_writerexit(lock);
1311 if (!MD_MNSET_SETNO(setno))
1312 return (mirror_resync_unit(mnum, NULL, &miop->mde, lock));
1313 else return (0);
1314 }
1315
1316 int
mirror_grow_unit(mm_unit_t * un,md_error_t * ep)1317 mirror_grow_unit(
1318 mm_unit_t *un,
1319 md_error_t *ep
1320 )
1321 {
1322 md_unit_t *su;
1323 mm_submirror_t *sm;
1324 int smi;
1325 diskaddr_t total_blocks;
1326 diskaddr_t current_tb;
1327 int spc; /* sectors per head */
1328 minor_t mnum = MD_SID(un);
1329
1330 /*
1331 * grow_unit cannot be done while a resync is active or we are
1332 * still waiting for an optimized resync to be started. Set
1333 * flag to indicate GROW_PENDING and once the resync is complete
1334 * the grow_unit function will be executed.
1335 */
1336 if (MD_STATUS(un) & (MD_UN_RESYNC_ACTIVE | MD_UN_OPT_NOT_DONE)) {
1337 MD_STATUS(un) |= MD_UN_GROW_PENDING;
1338 mirror_commit(un, NO_SUBMIRRORS, 0);
1339 return (mdmderror(ep, MDE_GROW_DELAYED, MD_SID(un)));
1340 }
1341
1342 /*
1343 * Find the smallest submirror
1344 */
1345 total_blocks = 0;
1346 for (smi = 0; smi < NMIRROR; smi++) {
1347 if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE))
1348 continue;
1349 sm = &un->un_sm[smi];
1350 /*
1351 * Growth is not possible if there is one or more
1352 * submirrors made up of non-Metadevices.
1353 */
1354 if (md_getmajor(sm->sm_dev) != md_major)
1355 return (0);
1356
1357 su = MD_UNIT(md_getminor(sm->sm_dev));
1358 if ((total_blocks == 0) ||
1359 (su->c.un_total_blocks < total_blocks))
1360 total_blocks = su->c.un_total_blocks;
1361 }
1362
1363 /*
1364 * If the smallest submirror is not larger
1365 * than the mirror, we are all done.
1366 */
1367 if (total_blocks <= un->c.un_total_blocks)
1368 return (0);
1369
1370 /*
1371 * Growing the mirror now.
1372 * First: Round down the actual_tb to be a multiple
1373 * of nheads * nsects.
1374 */
1375 spc = un->c.un_nhead * un->c.un_nsect;
1376 current_tb = (total_blocks/spc) * spc;
1377
1378 un->c.un_total_blocks = current_tb;
1379 md_nblocks_set(mnum, un->c.un_total_blocks);
1380 un->c.un_actual_tb = total_blocks;
1381
1382 /* Is the mirror growing from 32 bit device to 64 bit device? */
1383 if (((un->c.un_revision & MD_64BIT_META_DEV) == 0) &&
1384 (un->c.un_total_blocks > MD_MAX_BLKS_FOR_SMALL_DEVS)) {
1385 #if defined(_ILP32)
1386 return (mdmderror(ep, MDE_UNIT_TOO_LARGE, mnum));
1387 #else
1388 mddb_type_t typ1;
1389 mddb_recid_t recid;
1390 set_t setno;
1391 mddb_recid_t old_recid = un->c.un_record_id;
1392 mddb_recid_t old_vtoc;
1393 mddb_de_ic_t *dep, *old_dep;
1394 md_create_rec_option_t options;
1395
1396 /* yup, new device size. So we need to replace the record */
1397 typ1 = (mddb_type_t)md_getshared_key(MD_UN2SET(un),
1398 mirror_md_ops.md_driver.md_drivername);
1399 setno = MD_MIN2SET(mnum);
1400
1401 /* Preserve the friendly name properties of growing unit */
1402 options = MD_CRO_64BIT | MD_CRO_MIRROR;
1403 if (un->c.un_revision & MD_FN_META_DEV)
1404 options |= MD_CRO_FN;
1405 recid = mddb_createrec(offsetof(mm_unit_t, un_smic), typ1,
1406 MIRROR_REC, options, setno);
1407 /* Resize to include incore fields */
1408 un->c.un_revision |= MD_64BIT_META_DEV;
1409 /* All 64 bit metadevices only support EFI labels. */
1410 un->c.un_flag |= MD_EFILABEL;
1411 /*
1412 * If the device had a vtoc record attached to it, we remove
1413 * the vtoc record, because the layout has changed completely.
1414 */
1415 old_vtoc = un->c.un_vtoc_id;
1416 if (old_vtoc != 0) {
1417 un->c.un_vtoc_id =
1418 md_vtoc_to_efi_record(old_vtoc, setno);
1419 }
1420 MD_RECID(un) = recid;
1421 dep = mddb_getrecdep(recid);
1422 old_dep = mddb_getrecdep(old_recid);
1423 kmem_free(dep->de_rb_userdata, dep->de_reqsize);
1424 dep->de_rb_userdata = old_dep->de_rb_userdata;
1425 dep->de_reqsize = old_dep->de_reqsize;
1426 dep->de_rb_userdata_ic = old_dep->de_rb_userdata_ic;
1427 dep->de_icreqsize = old_dep->de_icreqsize;
1428 mirror_commit(un, NO_SUBMIRRORS, 0);
1429 old_dep->de_rb_userdata = NULL;
1430 old_dep->de_rb_userdata_ic = NULL;
1431 mddb_deleterec_wrapper(old_recid);
1432 /*
1433 * If there was a vtoc record, it is no longer needed, because
1434 * a new efi record has been created for this un.
1435 */
1436 if (old_vtoc != 0) {
1437 mddb_deleterec_wrapper(old_vtoc);
1438 }
1439 #endif
1440 }
1441
1442 if ((current_tb/un->un_rrd_blksize) > MD_MAX_NUM_RR) {
1443 if (mirror_resize_resync_regions(un, current_tb)) {
1444 return (mdmderror(ep, MDE_RR_ALLOC_ERROR, MD_SID(un)));
1445 }
1446 mirror_check_failfast(mnum);
1447 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_GROW, SVM_TAG_METADEVICE,
1448 MD_UN2SET(un), MD_SID(un));
1449 return (0);
1450 }
1451
1452 if (mirror_add_resync_regions(un, current_tb)) {
1453 return (mdmderror(ep, MDE_RR_ALLOC_ERROR, MD_SID(un)));
1454 }
1455
1456 mirror_check_failfast(mnum);
1457 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_GROW, SVM_TAG_METADEVICE,
1458 MD_UN2SET(un), MD_SID(un));
1459
1460 return (0);
1461 }
1462
1463 static int
mirror_grow(void * mgp,IOLOCK * lock)1464 mirror_grow(
1465 void *mgp,
1466 IOLOCK *lock
1467 )
1468 {
1469 mm_unit_t *un;
1470 md_grow_params_t *mgph = mgp;
1471
1472 mdclrerror(&mgph->mde);
1473
1474 if ((un = mirror_getun(mgph->mnum,
1475 &mgph->mde, WR_LOCK, lock)) == NULL)
1476 return (0);
1477
1478 if (MD_STATUS(un) & MD_UN_GROW_PENDING)
1479 return (0);
1480
1481 return (mirror_grow_unit(un, &mgph->mde));
1482 }
1483
1484 static int
mirror_change(md_mirror_params_t * mmp,IOLOCK * lock)1485 mirror_change(
1486 md_mirror_params_t *mmp,
1487 IOLOCK *lock
1488 )
1489 {
1490 mm_params_t *pp = &mmp->params;
1491 mm_unit_t *un;
1492
1493 mdclrerror(&mmp->mde);
1494
1495 if ((un = mirror_getun(mmp->mnum, &mmp->mde, WR_LOCK, lock)) == NULL)
1496 return (0);
1497
1498 if (pp->change_read_option)
1499 un->un_read_option = pp->read_option;
1500
1501 if (pp->change_write_option)
1502 un->un_write_option = pp->write_option;
1503
1504 if (pp->change_pass_num)
1505 un->un_pass_num = pp->pass_num;
1506
1507 mirror_commit(un, NO_SUBMIRRORS, 0);
1508
1509 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_CHANGE, SVM_TAG_METADEVICE,
1510 MD_UN2SET(un), MD_SID(un));
1511 return (0);
1512 }
1513
1514 static int
mirror_get_resync(md_resync_ioctl_t * ri)1515 mirror_get_resync(
1516 md_resync_ioctl_t *ri
1517 )
1518 {
1519 minor_t mnum = ri->ri_mnum;
1520 mm_unit_t *un;
1521 u_longlong_t percent;
1522 uint_t cnt;
1523 uint_t rr;
1524 diskaddr_t d;
1525
1526 mdclrerror(&ri->mde);
1527
1528 if ((un = mirror_getun(mnum, &ri->mde, STALE_OK|NO_LOCK, NULL)) == NULL)
1529 return (0);
1530
1531 ri->ri_flags = 0;
1532 if (md_get_setstatus(MD_MIN2SET(mnum)) & MD_SET_STALE) {
1533 ri->ri_percent_done = 0;
1534 ri->ri_percent_dirty = 0;
1535 return (0);
1536 }
1537
1538 if (MD_STATUS(un) & (MD_UN_RESYNC_ACTIVE|MD_UN_RESYNC_CANCEL)) {
1539 if (MD_STATUS(un) & MD_UN_RESYNC_ACTIVE)
1540 ri->ri_flags |= MD_RI_INPROGRESS;
1541 /* Return state of resync thread */
1542 ri->ri_flags |= (un->un_rs_thread_flags & MD_RI_BLOCK);
1543 d = un->un_rs_resync_2_do;
1544 if (d) {
1545 percent = un->un_rs_resync_done;
1546 if (un->c.un_total_blocks >
1547 MD_MAX_BLKS_FOR_SMALL_DEVS) {
1548 percent *= 1000;
1549 percent /= d;
1550 if (percent > 1000)
1551 percent = 1000;
1552 } else {
1553 percent *= 100;
1554 percent /= d;
1555 }
1556 ri->ri_percent_done = (int)percent;
1557 } else {
1558 ri->ri_percent_done = 0;
1559 }
1560 }
1561 if (un->un_nsm < 2) {
1562 ri->ri_percent_dirty = 0;
1563 return (0);
1564 }
1565 cnt = 0;
1566 for (rr = 0; rr < un->un_rrd_num; rr++)
1567 if (IS_REGION_DIRTY(rr, un))
1568 cnt++;
1569 d = un->un_rrd_num;
1570 if (d) {
1571 percent = cnt;
1572 percent *= 100;
1573 percent += d - 1; /* round up */
1574 percent /= d;
1575 } else
1576 percent = 0;
1577 ri->ri_percent_dirty = (int)percent;
1578 return (0);
1579 }
1580
1581 /*
1582 * mirror_get_owner:
1583 * ----------------
1584 * Called to obtain the current owner of a mirror.
1585 *
1586 * Owner is returned in the parameter block passed in by the caller.
1587 *
1588 * Returns:
1589 * 0 success
1590 * EINVAL metadevice does not exist or is not a member of a multi-owned
1591 * set.
1592 */
1593 static int
mirror_get_owner(md_set_mmown_params_t * p,IOLOCK * lock)1594 mirror_get_owner(md_set_mmown_params_t *p, IOLOCK *lock)
1595 {
1596 mm_unit_t *un;
1597 set_t setno;
1598
1599 if ((un = mirror_getun(p->d.mnum, &p->mde, RD_LOCK, lock)) == NULL)
1600 return (EINVAL);
1601
1602 setno = MD_UN2SET(un);
1603 if (!MD_MNSET_SETNO(setno)) {
1604 return (EINVAL);
1605 }
1606 p->d.owner = un->un_mirror_owner;
1607 return (0);
1608 }
1609
1610 /*
1611 * mirror_choose_owner_thread:
1612 * --------------------------
1613 * Called to send a CHOOSE_OWNER message to the commd running on the master
1614 * node. This needs to run in a separate context so that mutex livelock is
1615 * avoided. This can occur because the original request is issued from a call
1616 * to metaioctl() which acquires the global ioctl lock, calls down into the
1617 * mirror_ioctl code and then attempts to mdmn_ksend_message() to the master
1618 * node. As the handler for the choose_owner message needs to send another
1619 * ioctl through the metaioctl() entry point, any other use (by rpc.metad or
1620 * mdcommd checking on set ownership) will deadlock the system leading to
1621 * cluster reconfiguration timeouts and eventually a node or (at worst) a
1622 * cluster-wide panic
1623 */
1624 static void
mirror_choose_owner_thread(md_mn_msg_chooseid_t * msg)1625 mirror_choose_owner_thread(md_mn_msg_chooseid_t *msg)
1626 {
1627 int rval;
1628 md_mn_kresult_t *kres;
1629 set_t setno = MD_MIN2SET(msg->msg_chooseid_mnum);
1630
1631 kres = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP);
1632 rval = mdmn_ksend_message(setno, MD_MN_MSG_CHOOSE_OWNER,
1633 MD_MSGF_NO_BCAST | MD_MSGF_NO_LOG, 0, (char *)msg,
1634 sizeof (md_mn_msg_chooseid_t), kres);
1635 if (!MDMN_KSEND_MSG_OK(rval, kres)) {
1636 mdmn_ksend_show_error(rval, kres, "CHOOSE OWNER");
1637 cmn_err(CE_WARN, "ksend_message failure: CHOOSE_OWNER");
1638 }
1639
1640 kmem_free(kres, sizeof (md_mn_kresult_t));
1641 kmem_free(msg, sizeof (md_mn_msg_chooseid_t));
1642 thread_exit();
1643 }
1644
1645 /*
1646 * mirror_owner_thread:
1647 * -------------------
1648 * Called to request an ownership change from a thread context. This issues
1649 * a mdmn_ksend_message() and then completes the appropriate ownership change
1650 * on successful completion of the message transport.
1651 * The originating application must poll for completion on the 'flags' member
1652 * of the MD_MN_MM_OWNER_STATUS ioctl() parameter block.
1653 * Success is marked by a return value of MD_MN_MM_RES_OK, Failure by
1654 * MD_MN_MM_RES_FAIL
1655 */
1656 static void
mirror_owner_thread(md_mn_req_owner_t * ownp)1657 mirror_owner_thread(md_mn_req_owner_t *ownp)
1658 {
1659 int rval;
1660 set_t setno = MD_MIN2SET(ownp->mnum);
1661 mm_unit_t *un = MD_UNIT(ownp->mnum);
1662 md_mn_kresult_t *kresult;
1663 md_mps_t *ps1;
1664
1665 un->un_mirror_owner_status = 0;
1666
1667 mutex_enter(&un->un_owner_mx);
1668 un->un_owner_state |= MM_MN_OWNER_SENT;
1669 mutex_exit(&un->un_owner_mx);
1670
1671 kresult = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP);
1672 rval = mdmn_ksend_message(setno, MD_MN_MSG_REQUIRE_OWNER,
1673 MD_MSGF_NO_LOG, 0, (char *)ownp, sizeof (md_mn_req_owner_t),
1674 kresult);
1675
1676 if (!MDMN_KSEND_MSG_OK(rval, kresult)) {
1677 /*
1678 * Message transport layer failed. Return the failure code to
1679 * the application.
1680 */
1681 mdmn_ksend_show_error(rval, kresult, "CHANGE OWNER");
1682 mutex_enter(&un->un_owner_mx);
1683 un->un_owner_state &= ~(MM_MN_BECOME_OWNER|MM_MN_OWNER_SENT);
1684 mutex_exit(&un->un_owner_mx);
1685 un->un_mirror_owner_status =
1686 MD_MN_MM_RESULT | MD_MN_MM_RES_FAIL;
1687 } else {
1688 /*
1689 * Ownership change succeeded. Update in-core version of
1690 * mirror owner.
1691 */
1692 mutex_enter(&un->un_owner_mx);
1693 if (un->un_owner_state & MM_MN_BECOME_OWNER) {
1694 un->un_mirror_owner = md_mn_mynode_id;
1695 /* Sets node owner of un_rr_dirty record */
1696 if (un->un_rr_dirty_recid)
1697 (void) mddb_setowner(un->un_rr_dirty_recid,
1698 md_mn_mynode_id);
1699 /*
1700 * Release the block on the current resync region if it
1701 * is blocked
1702 */
1703 ps1 = un->un_rs_prev_overlap;
1704 if ((ps1 != NULL) &&
1705 (ps1->ps_flags & MD_MPS_ON_OVERLAP))
1706 mirror_overlap_tree_remove(ps1);
1707 }
1708
1709 un->un_owner_state &= ~(MM_MN_OWNER_SENT|MM_MN_BECOME_OWNER);
1710 mutex_exit(&un->un_owner_mx);
1711 un->un_mirror_owner_status =
1712 MD_MN_MM_RESULT | MD_MN_MM_RES_OK;
1713
1714 /* Restart the resync thread if it was previously blocked */
1715 if (un->un_rs_thread_flags & MD_RI_BLOCK_OWNER) {
1716 mutex_enter(&un->un_rs_thread_mx);
1717 un->un_rs_thread_flags &= ~MD_RI_BLOCK_OWNER;
1718 cv_signal(&un->un_rs_thread_cv);
1719 mutex_exit(&un->un_rs_thread_mx);
1720 }
1721 }
1722 kmem_free(kresult, sizeof (md_mn_kresult_t));
1723 kmem_free(ownp, sizeof (md_mn_req_owner_t));
1724 thread_exit();
1725 }
1726
1727 /*
1728 * mirror_set_owner:
1729 * ----------------
1730 * Called to change the owner of a mirror to the specified node. If we
1731 * are not the owner of the mirror, we do nothing apart from update the in-core
1732 * ownership. It can also be used to choose a new owner for the resync of a
1733 * mirror, this case is specified by the flag MD_MN_MM_CHOOSE_OWNER, see below.
1734 *
1735 * The p->d.flags bitfield controls how subsequent ownership changes will be
1736 * handled:
1737 * MD_MN_MM_SPAWN_THREAD
1738 * a separate thread is created which emulates the behaviour of
1739 * become_owner() [mirror.c]. This is needed when changing the
1740 * ownership from user context as there needs to be a controlling
1741 * kernel thread which updates the owner info on the originating
1742 * node. Successful completion of the mdmn_ksend_message() means
1743 * that the owner field can be changed.
1744 *
1745 * MD_MN_MM_PREVENT_CHANGE
1746 * Disallow any change of ownership once this ownership change has
1747 * been processed. The only way of changing the owner away from
1748 * the p->d.owner node specified in the call is to issue a request
1749 * with MD_MN_MM_ALLOW_CHANGE set in the flags. Any request to
1750 * become owner from a different node while the PREVENT_CHANGE
1751 * is in operation will result in an EAGAIN return value.
1752 * un->un_owner_state has MM_MN_PREVENT_CHANGE set.
1753 *
1754 * MD_MN_MM_ALLOW_CHANGE
1755 * Allow the owner to be changed by a subsequent request.
1756 * un->un_owner_state has MM_MN_PREVENT_CHANGE cleared.
1757 *
1758 * MD_MN_MM_CHOOSE_OWNER
1759 * Choose a new owner for a mirror resync. In this case, the new
1760 * owner argument is not used. The selection of a new owner
1761 * is a round robin allocation using a resync owner count. This
1762 * ioctl passes this value in a message to the master node
1763 * which uses it to select a node from the node list and then
1764 * sends it a message to become the owner.
1765 *
1766 * If we are the current owner, we must stop further i/o from being scheduled
1767 * and wait for any pending i/o to drain. We wait for any in-progress resync
1768 * bitmap updates to complete and we can then set the owner. If an update to
1769 * the resync bitmap is attempted after this we simply don't write this out to
1770 * disk until the ownership is restored.
1771 *
1772 * If we are the node that wants to become the owner we update the in-core
1773 * owner and return. The i/o that initiated the ownership change will complete
1774 * on successful return from this ioctl.
1775 *
1776 * Return Value:
1777 * 0 Success
1778 * EINVAL Invalid unit referenced
1779 * EAGAIN Ownership couldn't be transferred away or change of
1780 * ownership is prevented. Caller should retry later on.
1781 */
1782 static int
mirror_set_owner(md_set_mmown_params_t * p,IOLOCK * lock)1783 mirror_set_owner(md_set_mmown_params_t *p, IOLOCK *lock)
1784 {
1785 mdi_unit_t *ui;
1786 mm_unit_t *un;
1787 set_t setno;
1788
1789 if ((un = mirror_getun(p->d.mnum, &p->mde, RD_LOCK, lock)) == NULL)
1790 return (EINVAL);
1791 ui = MDI_UNIT(p->d.mnum);
1792 setno = MD_MIN2SET(p->d.mnum);
1793 if (!MD_MNSET_SETNO(setno)) {
1794 return (EINVAL);
1795 }
1796
1797 /*
1798 * If we are choosing a new resync owner, send a message to the master
1799 * to make the choice.
1800 */
1801 if (p->d.flags & MD_MN_MM_CHOOSE_OWNER) {
1802 /* Release ioctl lock before we call ksend_message() */
1803 md_ioctl_readerexit(lock);
1804 /* If we're resetting the owner pass the node id in */
1805 if (p->d.owner != MD_MN_MIRROR_UNOWNED) {
1806 return (mirror_choose_owner(un, &p->d));
1807 } else {
1808 return (mirror_choose_owner(un, NULL));
1809 }
1810 }
1811
1812 /*
1813 * Check for whether we have to spawn a thread to issue this request.
1814 * If set we issue a mdmn_ksend_message() to cause the appropriate
1815 * ownership change. On completion of this request the calling
1816 * application _must_ poll the structure 'flags' field to determine the
1817 * result of the request. All this is necessary until we have true
1818 * multi-entrant ioctl support.
1819 * If we are just clearing the owner, then MD_MN_MM_SPAWN_THREAD can
1820 * be ignored.
1821 */
1822 if ((p->d.flags & MD_MN_MM_SPAWN_THREAD) && (p->d.owner != 0)) {
1823 md_mn_req_owner_t *ownp;
1824 ownp = kmem_zalloc(sizeof (md_mn_req_owner_t), KM_SLEEP);
1825 p->d.flags &= ~MD_MN_MM_SPAWN_THREAD;
1826 bcopy(&p->d, ownp, sizeof (md_mn_req_owner_t));
1827 if (thread_create(NULL, 0, mirror_owner_thread, (caddr_t)ownp,
1828 0, &p0, TS_RUN, 60) == NULL) {
1829 kmem_free(ownp, sizeof (md_mn_req_owner_t));
1830 return (EFAULT);
1831 } else {
1832 return (0);
1833 }
1834 }
1835
1836 /*
1837 * If setting owner to NULL, this is being done because the owner has
1838 * died and therefore we set OPT_NOT_DONE to ensure that the
1839 * mirror is marked as "Needs Maintenance" and that an optimized
1840 * resync will be done when we resync the mirror, Also clear the
1841 * PREVENT_CHANGE flag and remove the last resync region from the
1842 * overlap tree.
1843 */
1844 if (p->d.owner == 0) {
1845 md_mps_t *ps;
1846 int i;
1847
1848 md_ioctl_readerexit(lock);
1849 un = md_ioctl_writerlock(lock, ui);
1850 /*
1851 * If the ABR capability is not set and the pass_num is non-zero
1852 * there is need to perform an optimized resync
1853 * Therefore set OPT_NOT_DONE, setup the resync_bm and set
1854 * the submirrors as resync targets.
1855 */
1856 if (!(ui->ui_tstate & MD_ABR_CAP) && un->un_pass_num) {
1857 MD_STATUS(un) |= MD_UN_OPT_NOT_DONE;
1858
1859 (void) mddb_reread_rr(setno, un->un_rr_dirty_recid);
1860 bcopy((caddr_t)un->un_dirty_bm,
1861 (caddr_t)un->un_resync_bm,
1862 howmany(un->un_rrd_num, NBBY));
1863 for (i = 0; i < NMIRROR; i++) {
1864 if ((SUBMIRROR_IS_READABLE(un, i)) ||
1865 SMS_BY_INDEX_IS(un, i,
1866 SMS_OFFLINE_RESYNC))
1867 un->un_sm[i].sm_flags |=
1868 MD_SM_RESYNC_TARGET;
1869 }
1870 }
1871 mutex_enter(&un->un_owner_mx);
1872 un->un_owner_state &= ~MD_MN_MM_PREVENT_CHANGE;
1873 mutex_exit(&un->un_owner_mx);
1874 ps = un->un_rs_prev_overlap;
1875 if ((ps != NULL) && (ps->ps_flags & MD_MPS_ON_OVERLAP)) {
1876 mirror_overlap_tree_remove(ps);
1877 ps->ps_firstblk = 0;
1878 ps->ps_lastblk = 0;
1879 }
1880 md_ioctl_writerexit(lock);
1881 un = md_ioctl_readerlock(lock, ui);
1882 }
1883
1884 mutex_enter(&un->un_owner_mx);
1885 if (!(un->un_owner_state & MM_MN_BECOME_OWNER)) {
1886 /*
1887 * If we are not trying to become owner ourselves check
1888 * to see if we have to change the owner
1889 */
1890 if (un->un_mirror_owner == p->d.owner) {
1891 /*
1892 * No need to change owner,
1893 * Clear/set PREVENT_CHANGE bit
1894 */
1895 if (p->d.flags & MD_MN_MM_PREVENT_CHANGE) {
1896 un->un_owner_state |= MM_MN_PREVENT_CHANGE;
1897 } else if (p->d.flags & MD_MN_MM_ALLOW_CHANGE) {
1898 un->un_owner_state &= ~MM_MN_PREVENT_CHANGE;
1899 }
1900 mutex_exit(&un->un_owner_mx);
1901 return (0);
1902 }
1903 }
1904
1905 /*
1906 * Disallow ownership change if previously requested to. This can only
1907 * be reset by issuing a request with MD_MN_MM_ALLOW_CHANGE set in the
1908 * flags field.
1909 */
1910 if ((un->un_owner_state & MM_MN_PREVENT_CHANGE) &&
1911 !(p->d.flags & MD_MN_MM_ALLOW_CHANGE)) {
1912 mutex_exit(&un->un_owner_mx);
1913 #ifdef DEBUG
1914 cmn_err(CE_WARN, "mirror_ioctl: Node %x attempted to become "
1915 "owner while node %x has exclusive access to %s",
1916 p->d.owner, un->un_mirror_owner, md_shortname(MD_SID(un)));
1917 #endif
1918 return (EAGAIN);
1919 }
1920 if (p->d.owner == md_mn_mynode_id) {
1921 /*
1922 * I'm becoming the mirror owner. Flag this so that the
1923 * message sender can change the in-core owner when all
1924 * nodes have processed this message
1925 */
1926 un->un_owner_state &= ~MM_MN_OWNER_SENT;
1927 un->un_owner_state |= MM_MN_BECOME_OWNER;
1928 un->un_owner_state |= (p->d.flags & MD_MN_MM_PREVENT_CHANGE) ?
1929 MM_MN_PREVENT_CHANGE : 0;
1930 un->un_owner_state &= (p->d.flags & MD_MN_MM_ALLOW_CHANGE) ?
1931 ~MM_MN_PREVENT_CHANGE : ~0;
1932
1933 mutex_exit(&un->un_owner_mx);
1934 } else if ((un->un_mirror_owner == md_mn_mynode_id) ||
1935 un->un_owner_state & MM_MN_BECOME_OWNER) {
1936 mutex_exit(&un->un_owner_mx);
1937
1938 /*
1939 * I'm releasing ownership. Block and drain i/o. This also
1940 * blocks until any in-progress resync record update completes.
1941 */
1942 md_ioctl_readerexit(lock);
1943 un = md_ioctl_writerlock(lock, ui);
1944 /* Block the resync thread */
1945 mutex_enter(&un->un_rs_thread_mx);
1946 un->un_rs_thread_flags |= MD_RI_BLOCK_OWNER;
1947 mutex_exit(&un->un_rs_thread_mx);
1948 mutex_enter(&un->un_owner_mx);
1949 un->un_mirror_owner = p->d.owner;
1950
1951 /* Sets node owner of un_rr_dirty record */
1952 if (un->un_rr_dirty_recid)
1953 (void) mddb_setowner(un->un_rr_dirty_recid, p->d.owner);
1954 un->un_owner_state &= ~MM_MN_BECOME_OWNER;
1955 un->un_owner_state |= (p->d.flags & MD_MN_MM_PREVENT_CHANGE) ?
1956 MM_MN_PREVENT_CHANGE : 0;
1957 un->un_owner_state &= (p->d.flags & MD_MN_MM_ALLOW_CHANGE) ?
1958 ~MM_MN_PREVENT_CHANGE : ~0;
1959 mutex_exit(&un->un_owner_mx);
1960 /*
1961 * Allow further i/o to occur. Any write() from another node
1962 * will now cause another ownership change to occur.
1963 */
1964 md_ioctl_writerexit(lock);
1965 } else {
1966 /* Update the in-core mirror owner */
1967 un->un_mirror_owner = p->d.owner;
1968 /* Sets node owner of un_rr_dirty record */
1969 if (un->un_rr_dirty_recid)
1970 (void) mddb_setowner(un->un_rr_dirty_recid, p->d.owner);
1971 un->un_owner_state |= (p->d.flags & MD_MN_MM_PREVENT_CHANGE) ?
1972 MM_MN_PREVENT_CHANGE : 0;
1973 un->un_owner_state &= (p->d.flags & MD_MN_MM_ALLOW_CHANGE) ?
1974 ~MM_MN_PREVENT_CHANGE : ~0;
1975 mutex_exit(&un->un_owner_mx);
1976 }
1977 return (0);
1978 }
1979 /*
1980 * mirror_allocate_hotspare:
1981 * ------------------------
1982 * Called to allocate a hotspare for a failed component. This function is
1983 * called by the MD_MN_ALLOCATE_HOTSPARE ioctl.
1984 */
1985 static int
mirror_allocate_hotspare(md_alloc_hotsp_params_t * p,IOLOCK * lockp)1986 mirror_allocate_hotspare(md_alloc_hotsp_params_t *p, IOLOCK *lockp)
1987 {
1988 set_t setno;
1989 mm_unit_t *un;
1990
1991 #ifdef DEBUG
1992 if (mirror_debug_flag)
1993 printf("mirror_allocate_hotspare: mnum,sm,comp = %x, %x, %x\n",
1994 p->mnum, p->sm, p->comp);
1995 #endif
1996
1997 if ((un = mirror_getun(p->mnum, &p->mde, WR_LOCK, lockp)) == NULL)
1998 return (EINVAL);
1999
2000 /* This function is only valid for a multi-node set */
2001 setno = MD_MIN2SET(p->mnum);
2002 if (!MD_MNSET_SETNO(setno)) {
2003 return (EINVAL);
2004 }
2005 (void) check_comp_4_hotspares(un, p->sm, p->comp, MD_HOTSPARE_NO_XMIT,
2006 p->hs_id, lockp);
2007 md_ioctl_writerexit(lockp);
2008 return (0);
2009 }
2010
2011 /*
2012 * mirror_get_owner_status:
2013 * -----------------------
2014 * Return the status of a previously issued ioctl to change ownership. This is
2015 * required for soft-partition support as the request to change mirror owner
2016 * needs to be run from a separate daemon thread.
2017 *
2018 * Returns:
2019 * 0 Success (contents of un_mirror_owner_status placed in 'flags')
2020 * EINVAL Invalid unit
2021 */
2022 static int
mirror_get_owner_status(md_mn_own_status_t * p,IOLOCK * lock)2023 mirror_get_owner_status(md_mn_own_status_t *p, IOLOCK *lock)
2024 {
2025 mm_unit_t *un;
2026 set_t setno;
2027
2028 if ((un = mirror_getun(p->mnum, &p->mde, RD_LOCK, lock)) == NULL)
2029 return (EINVAL);
2030
2031 setno = MD_MIN2SET(p->mnum);
2032 if (!MD_MNSET_SETNO(setno)) {
2033 return (EINVAL);
2034 }
2035
2036 p->flags = un->un_mirror_owner_status;
2037 return (0);
2038 }
2039
2040 /*
2041 * mirror_set_state:
2042 * ---------------
2043 * Called to set the state of the component of a submirror to the specified
2044 * value. This function is called by the MD_MN_SET_STATE ioctl.
2045 */
2046 static int
mirror_set_state(md_set_state_params_t * p,IOLOCK * lockp)2047 mirror_set_state(md_set_state_params_t *p, IOLOCK *lockp)
2048 {
2049 mm_unit_t *un;
2050 mm_submirror_t *sm;
2051 mm_submirror_ic_t *smic;
2052 md_m_shared_t *shared;
2053 set_t setno;
2054
2055 #ifdef DEBUG
2056 if (mirror_debug_flag)
2057 printf("mirror_set_state: mnum,sm,comp,state, hs_id = %x, "
2058 "%x, %x, %x %x\n", p->mnum, p->sm, p->comp,
2059 p->state, p->hs_id);
2060 #endif
2061 if ((un = mirror_getun(p->mnum, &p->mde, WR_LOCK, lockp)) == NULL)
2062 return (EINVAL);
2063
2064 /* This function is only valid for a multi-node set */
2065 setno = MD_MIN2SET(p->mnum);
2066 if (!MD_MNSET_SETNO(setno)) {
2067 return (EINVAL);
2068 }
2069 sm = &un->un_sm[p->sm];
2070 smic = &un->un_smic[p->sm];
2071
2072 /* Set state in component and update ms_flags */
2073 shared = (md_m_shared_t *)
2074 (*(smic->sm_shared_by_indx))(sm->sm_dev, sm, p->comp);
2075 /*
2076 * If a CS_ERRED state is being sent, verify that the sender
2077 * has the same view of the component that this node currently has.
2078 *
2079 * There is a case where the sender was sending a CS_ERRED when a
2080 * component was in error, but before the sender returns from
2081 * ksend_message the component has been hotspared and resync'd.
2082 *
2083 * In this case, the hs_id will be different from the shared ms_hs_id,
2084 * so the component has already been hotspared. Just return in this
2085 * case.
2086 */
2087 if (p->state == CS_ERRED) {
2088 if (shared->ms_hs_id != p->hs_id) {
2089 #ifdef DEBUG
2090 if (mirror_debug_flag) {
2091 printf("mirror_set_state: short circuit "
2092 "hs_id=0x%x, ms_hs_id=0x%x\n",
2093 p->hs_id, shared->ms_hs_id);
2094 }
2095 #endif
2096 /* release the block on writes to the mirror */
2097 mirror_resume_writes(un);
2098 md_ioctl_writerexit(lockp);
2099 return (0);
2100 }
2101 }
2102
2103 /*
2104 * If the device is newly errored then make sure that it is
2105 * closed. Closing the device allows for the RCM framework
2106 * to unconfigure the device if required.
2107 */
2108 if (!(shared->ms_state & CS_ERRED) && (p->state & CS_ERRED) &&
2109 (shared->ms_flags & MDM_S_ISOPEN)) {
2110 void (*get_dev)();
2111 ms_cd_info_t cd;
2112
2113 get_dev = (void (*)())md_get_named_service(sm->sm_dev, 0,
2114 "get device", 0);
2115 (void) (*get_dev)(sm->sm_dev, sm, p->comp, &cd);
2116
2117 md_layered_close(cd.cd_dev, MD_OFLG_NULL);
2118 shared->ms_flags &= ~MDM_S_ISOPEN;
2119 }
2120
2121 shared->ms_state = p->state;
2122 uniqtime32(&shared->ms_timestamp);
2123
2124 if (p->state == CS_ERRED) {
2125 shared->ms_flags |= MDM_S_NOWRITE;
2126 } else
2127 shared->ms_flags &= ~MDM_S_NOWRITE;
2128
2129 shared->ms_flags &= ~MDM_S_IOERR;
2130 un->un_changecnt++;
2131 shared->ms_lasterrcnt = un->un_changecnt;
2132
2133 /* Update state in submirror */
2134 mirror_set_sm_state(sm, smic, SMS_RUNNING, 0);
2135 /*
2136 * Commit the state change to the metadb, only the master will write
2137 * to disk
2138 */
2139 mirror_commit(un, SMI2BIT(p->sm), 0);
2140
2141 /* release the block on writes to the mirror */
2142 mirror_resume_writes(un);
2143
2144 /* generate NOTIFY events for error state changes */
2145 if (p->state == CS_ERRED) {
2146 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ERRED, SVM_TAG_METADEVICE,
2147 MD_UN2SET(un), MD_SID(un));
2148 } else if (p->state == CS_LAST_ERRED) {
2149 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_LASTERRED, SVM_TAG_METADEVICE,
2150 MD_UN2SET(un), MD_SID(un));
2151 }
2152 md_ioctl_writerexit(lockp);
2153 return (0);
2154 }
2155
2156 /*
2157 * mirror_suspend_writes:
2158 * ---------------------
2159 * Called to suspend writes to a mirror region. The flag un_suspend_wr_flag is
2160 * tested in mirror_write_strategy, and if set all writes are blocked.
2161 * This function is called by the MD_MN_SUSPEND_WRITES ioctl.
2162 */
2163 static int
mirror_suspend_writes(md_suspend_wr_params_t * p)2164 mirror_suspend_writes(md_suspend_wr_params_t *p)
2165 {
2166 set_t setno;
2167 mm_unit_t *un;
2168
2169 #ifdef DEBUG
2170 if (mirror_debug_flag)
2171 printf("mirror_suspend_writes: mnum = %x\n", p->mnum);
2172 #endif
2173 if ((un = mirror_getun(p->mnum, &p->mde, NO_LOCK, NULL)) == NULL)
2174 return (EINVAL); /* No unit */
2175
2176 /* This function is only valid for a multi-node set */
2177 setno = MD_MIN2SET(p->mnum);
2178 if (!MD_MNSET_SETNO(setno)) {
2179 return (EINVAL);
2180 }
2181
2182 /*
2183 * Mark the resync as blocked. This will stop any currently running
2184 * thread and will prevent a new resync from attempting to perform
2185 * i/o
2186 */
2187 mutex_enter(&un->un_rs_thread_mx);
2188 un->un_rs_thread_flags |= MD_RI_BLOCK;
2189 mutex_exit(&un->un_rs_thread_mx);
2190
2191 mutex_enter(&un->un_suspend_wr_mx);
2192 un->un_suspend_wr_flag = 1;
2193 mutex_exit(&un->un_suspend_wr_mx);
2194
2195 return (0);
2196 }
2197
2198 /*
2199 * mirror_set_capability:
2200 * ------------------------
2201 * Called to set or clear a capability for a mirror
2202 * called by the MD_MN_SET_CAP ioctl.
2203 */
2204 static int
mirror_set_capability(md_mn_setcap_params_t * p,IOLOCK * lockp)2205 mirror_set_capability(md_mn_setcap_params_t *p, IOLOCK *lockp)
2206 {
2207 set_t setno;
2208 mm_unit_t *un;
2209 mdi_unit_t *ui;
2210
2211 #ifdef DEBUG
2212 if (mirror_debug_flag)
2213 printf("mirror_set_capability: mnum = %x\n", p->mnum);
2214 #endif
2215 if ((un = mirror_getun(p->mnum, &p->mde, RD_LOCK, lockp)) == NULL)
2216 return (EINVAL);
2217
2218 /* This function is only valid for a multi-node set */
2219 setno = MD_MIN2SET(p->mnum);
2220 if (!MD_MNSET_SETNO(setno)) {
2221 return (EINVAL);
2222 }
2223 ui = MDI_UNIT(p->mnum);
2224
2225 if (p->sc_set & DKV_ABR_CAP) {
2226 ui->ui_tstate |= MD_ABR_CAP; /* Set ABR capability */
2227 /* Clear DRL and set owner to 0 if no resync active */
2228 mirror_process_unit_resync(un);
2229 if (!(un->c.un_status & MD_UN_RESYNC_ACTIVE)) {
2230 mutex_enter(&un->un_owner_mx);
2231 un->un_mirror_owner = 0;
2232 mutex_exit(&un->un_owner_mx);
2233 }
2234 } else {
2235 ui->ui_tstate &= ~MD_ABR_CAP; /* Clear ABR capability */
2236 }
2237 if (p->sc_set & DKV_DMR_CAP) {
2238 ui->ui_tstate |= MD_DMR_CAP; /* Set DMR capability */
2239 } else {
2240 ui->ui_tstate &= ~MD_DMR_CAP; /* Clear DMR capability */
2241 }
2242 return (0);
2243 }
2244
2245 /*
2246 * mirror_choose_owner:
2247 * ------------------------
2248 * Called to choose an owner for a mirror resync. Can be called when starting
2249 * resync or by the MD_MN_SET_MM_OWNER ioctl with the MD_MN_MM_CHOOSE_OWNER flag
2250 * set. The ioctl is called with this flag set when we are in the cluster
2251 * reconfig and we wish to set a new owner for a resync whose owner has left
2252 * the cluster. We use a resync owner count to implement a round robin
2253 * allocation of resync owners. We send a message to the master including
2254 * this count and the message handler uses it to select an owner from the
2255 * nodelist and then sends a SET_MM_OWNER message to the chosen node to
2256 * become the owner.
2257 *
2258 * Input:
2259 * un - unit reference
2260 * ownp - owner information (if non-NULL)
2261 */
2262 int
mirror_choose_owner(mm_unit_t * un,md_mn_req_owner_t * ownp)2263 mirror_choose_owner(mm_unit_t *un, md_mn_req_owner_t *ownp)
2264 {
2265 set_t setno;
2266 md_mn_msg_chooseid_t *msg;
2267
2268 /* This function is only valid for a multi-node set */
2269 setno = MD_UN2SET(un);
2270 if (!MD_MNSET_SETNO(setno)) {
2271 return (EINVAL);
2272 }
2273
2274
2275 #ifdef DEBUG
2276 if (mirror_debug_flag)
2277 printf("send choose owner message, mnum = %x,"
2278 "rcnt = %d\n", MD_SID(un), md_set[setno].s_rcnt);
2279 #endif
2280
2281 /*
2282 * setup message with current resync count
2283 * and then increment the count. If we're called with a non-NULL
2284 * owner then we are reestablishing the owner of the mirror. In this
2285 * case we have to flag this to the message handler and set rcnt to
2286 * the new owner node.
2287 */
2288 msg = kmem_zalloc(sizeof (md_mn_msg_chooseid_t), KM_SLEEP);
2289 msg->msg_chooseid_mnum = MD_SID(un);
2290 if (ownp == NULL) {
2291 mutex_enter(&md_mx);
2292 msg->msg_chooseid_rcnt = md_set[setno].s_rcnt;
2293 md_set[setno].s_rcnt++;
2294 mutex_exit(&md_mx);
2295 msg->msg_chooseid_set_node = B_FALSE;
2296 } else {
2297 msg->msg_chooseid_rcnt = ownp->owner;
2298 msg->msg_chooseid_set_node = B_TRUE;
2299 }
2300
2301 /*
2302 * Spawn a thread to issue the ksend_message() call so that we can
2303 * drop the ioctl lock hierarchy that is blocking further rpc.metad and
2304 * commd set ownership checking.
2305 */
2306 if (thread_create(NULL, 0, mirror_choose_owner_thread, (caddr_t)msg,
2307 0, &p0, TS_RUN, 60) == NULL) {
2308 kmem_free(msg, sizeof (md_mn_msg_chooseid_t));
2309 return (EFAULT);
2310 } else {
2311 return (0);
2312 }
2313 }
2314
2315 /*
2316 * mirror_get_status:
2317 * ----------------------------------
2318 * Called by nodes which are not the master node of the cluster. Obtains the
2319 * master abr state and the submirror status for each valid submirror of the
2320 * unit so that the status returned by metastat is consistent across the
2321 * cluster.
2322 * We update tstate for the mirror and both the sm_flag and the sm_state for
2323 * each submirror.
2324 *
2325 * Input:
2326 * un mirror to obtain status from
2327 *
2328 * Calling Convention:
2329 * writerlock (either ioctl or unit) must be held
2330 */
2331 void
mirror_get_status(mm_unit_t * un,IOLOCK * lockp)2332 mirror_get_status(mm_unit_t *un, IOLOCK *lockp)
2333 {
2334 mm_submirror_t *sm;
2335 int smi;
2336 int rval;
2337 md_mn_kresult_t *kres;
2338 md_mn_msg_mir_state_t msg;
2339 md_mn_msg_mir_state_res_t *res;
2340 set_t setno = MD_UN2SET(un);
2341 mdi_unit_t *ui = MDI_UNIT(MD_SID(un));
2342
2343
2344 ASSERT(ui->ui_lock & MD_UL_WRITER);
2345
2346 /*
2347 * Get all of the information for the mirror.
2348 */
2349 bzero(&msg, sizeof (msg));
2350 msg.mir_state_mnum = MD_SID(un);
2351
2352 /*
2353 * Must drop the writerlock over ksend_message since another
2354 * thread on this node could be running a higher class message
2355 * and be trying grab the readerlock.
2356 *
2357 * If we are in the context of an ioctl, drop the ioctl lock.
2358 * lockp holds the list of locks held.
2359 */
2360 if (lockp) {
2361 IOLOCK_RETURN_RELEASE(0, lockp);
2362 } else {
2363 md_unit_writerexit(ui);
2364 }
2365
2366 kres = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP);
2367 rval = mdmn_ksend_message(setno, MD_MN_MSG_GET_MIRROR_STATE,
2368 MD_MSGF_NO_BCAST | MD_MSGF_NO_LOG, 0, (char *)&msg,
2369 sizeof (msg), kres);
2370
2371 /* if the node hasn't yet joined, it's Ok. */
2372 if ((!MDMN_KSEND_MSG_OK(rval, kres)) &&
2373 (kres->kmmr_comm_state != MDMNE_NOT_JOINED)) {
2374 mdmn_ksend_show_error(rval, kres, "GET_MIRROR_STATE");
2375 cmn_err(CE_WARN, "ksend_message failure: GET_MIRROR_STATE");
2376 }
2377
2378 /* if dropped the lock previously, regain it */
2379 if (lockp) {
2380 IOLOCK_RETURN_REACQUIRE(lockp);
2381 } else {
2382 /*
2383 * Reacquire dropped locks and update acquirecnts
2384 * appropriately.
2385 */
2386 (void) md_unit_writerlock(ui);
2387 }
2388
2389 /*
2390 * Check to see if we've got a believable amount of returned data.
2391 * If not, we simply return as there is no usable information.
2392 */
2393 if (kres->kmmr_res_size < sizeof (*res)) {
2394 cmn_err(CE_WARN, "GET_MIRROR_STATE: returned %d bytes, expected"
2395 " %d\n", kres->kmmr_res_size, (int)sizeof (*res));
2396 kmem_free(kres, sizeof (md_mn_kresult_t));
2397 return;
2398 }
2399
2400 /*
2401 * Copy the results from the call back into our sm_state/sm_flags
2402 */
2403 res = (md_mn_msg_mir_state_res_t *)kres->kmmr_res_data;
2404 #ifdef DEBUG
2405 if (mirror_debug_flag)
2406 printf("mirror_get_status: %s\n", md_shortname(MD_SID(un)));
2407 #endif
2408 for (smi = 0; smi < NMIRROR; smi++) {
2409 sm = &un->un_sm[smi];
2410 #ifdef DEBUG
2411 if (mirror_debug_flag) {
2412 printf("curr state %4x, new state %4x\n", sm->sm_state,
2413 res->sm_state[smi]);
2414 printf("curr_flags %4x, new flags %4x\n", sm->sm_flags,
2415 res->sm_flags[smi]);
2416 }
2417 #endif
2418 sm->sm_state = res->sm_state[smi];
2419 sm->sm_flags = res->sm_flags[smi];
2420 }
2421
2422 /* Set ABR if set on the Master node */
2423 ui->ui_tstate |= (res->mir_tstate & MD_ABR_CAP);
2424
2425 kmem_free(kres, sizeof (md_mn_kresult_t));
2426 }
2427
2428 /*
2429 * mirror_get_mir_state:
2430 * -------------------
2431 * Obtain the ABR state of a mirror and the state of all submirrors from the
2432 * master node for the unit specified in sm_state->mnum.
2433 * Called by MD_MN_GET_MIRROR_STATE ioctl.
2434 */
2435 static int
mirror_get_mir_state(md_mn_get_mir_state_t * p,IOLOCK * lockp)2436 mirror_get_mir_state(md_mn_get_mir_state_t *p, IOLOCK *lockp)
2437 {
2438 mm_unit_t *un;
2439 set_t setno;
2440 md_error_t mde;
2441
2442 mdclrerror(&mde);
2443
2444 if ((un = mirror_getun(p->mnum, &mde, WR_LOCK, lockp)) == NULL) {
2445 return (EINVAL);
2446 }
2447 setno = MD_MIN2SET(p->mnum);
2448 if (!MD_MNSET_SETNO(setno)) {
2449 return (EINVAL);
2450 }
2451
2452 /*
2453 * We've now got a writerlock on the unit structure (so no-one can
2454 * modify the incore values) and we'll now send the message to the
2455 * master node. Since we're only called as part of a reconfig cycle
2456 * we don't need to release the unit locks across the ksend_message as
2457 * only the master node will process it, and we never send this to
2458 * ourselves if we're the master.
2459 */
2460
2461 mirror_get_status(un, lockp);
2462
2463 return (0);
2464 }
2465
2466 static int
mirror_admin_ioctl(int cmd,void * data,int mode,IOLOCK * lockp)2467 mirror_admin_ioctl(int cmd, void *data, int mode, IOLOCK *lockp)
2468 {
2469 size_t sz = 0;
2470 void *d = NULL;
2471 int err = 0;
2472
2473 /* We can only handle 32-bit clients for internal commands */
2474 if ((mode & DATAMODEL_MASK) != DATAMODEL_ILP32) {
2475 return (EINVAL);
2476 }
2477 /* dispatch ioctl */
2478 switch (cmd) {
2479
2480 case MD_IOCSET:
2481 {
2482 if (! (mode & FWRITE))
2483 return (EACCES);
2484
2485 sz = sizeof (md_set_params_t);
2486
2487 d = kmem_alloc(sz, KM_SLEEP);
2488
2489 if (ddi_copyin(data, d, sz, mode)) {
2490 err = EFAULT;
2491 break;
2492 }
2493
2494 err = mirror_set(d, mode);
2495 break;
2496 }
2497
2498 case MD_IOCGET:
2499 {
2500 if (! (mode & FREAD))
2501 return (EACCES);
2502
2503 sz = sizeof (md_i_get_t);
2504
2505 d = kmem_alloc(sz, KM_SLEEP);
2506
2507 if (ddi_copyin(data, d, sz, mode)) {
2508 err = EFAULT;
2509 break;
2510 }
2511
2512 err = mirror_get(d, mode, lockp);
2513 break;
2514 }
2515
2516 case MD_IOCRESET:
2517 {
2518 if (! (mode & FWRITE))
2519 return (EACCES);
2520
2521 sz = sizeof (md_i_reset_t);
2522 d = kmem_alloc(sz, KM_SLEEP);
2523
2524 if (ddi_copyin(data, d, sz, mode)) {
2525 err = EFAULT;
2526 break;
2527 }
2528
2529 err = mirror_reset((md_i_reset_t *)d);
2530 break;
2531 }
2532
2533 case MD_IOCSETSYNC:
2534 case MD_MN_SETSYNC:
2535 {
2536 if (! (mode & FWRITE))
2537 return (EACCES);
2538
2539 sz = sizeof (md_resync_ioctl_t);
2540 d = kmem_alloc(sz, KM_SLEEP);
2541
2542 if (ddi_copyin(data, d, sz, mode)) {
2543 err = EFAULT;
2544 break;
2545 }
2546
2547 err = mirror_ioctl_resync((md_resync_ioctl_t *)d, lockp);
2548 break;
2549 }
2550
2551 case MD_IOCGETSYNC:
2552 {
2553 if (! (mode & FREAD))
2554 return (EACCES);
2555
2556 sz = sizeof (md_resync_ioctl_t);
2557 d = kmem_alloc(sz, KM_SLEEP);
2558
2559 if (ddi_copyin(data, d, sz, mode)) {
2560 err = EFAULT;
2561 break;
2562 }
2563
2564 err = mirror_get_resync((md_resync_ioctl_t *)d);
2565 break;
2566 }
2567
2568 case MD_IOCREPLACE:
2569 {
2570 if (! (mode & FWRITE))
2571 return (EACCES);
2572
2573 sz = sizeof (replace_params_t);
2574 d = kmem_alloc(sz, KM_SLEEP);
2575
2576 if (ddi_copyin(data, d, sz, mode)) {
2577 err = EFAULT;
2578 break;
2579 }
2580
2581 err = comp_replace((replace_params_t *)d, lockp);
2582 break;
2583 }
2584
2585 case MD_IOCOFFLINE:
2586 {
2587 if (! (mode & FWRITE))
2588 return (EACCES);
2589
2590 sz = sizeof (md_i_off_on_t);
2591 d = kmem_alloc(sz, KM_SLEEP);
2592
2593 if (ddi_copyin(data, d, sz, mode)) {
2594 err = EFAULT;
2595 break;
2596 }
2597
2598 err = mirror_offline((md_i_off_on_t *)d, lockp);
2599 break;
2600 }
2601
2602 case MD_IOCONLINE:
2603 {
2604 if (! (mode & FWRITE))
2605 return (EACCES);
2606
2607 sz = sizeof (md_i_off_on_t);
2608 d = kmem_alloc(sz, KM_SLEEP);
2609
2610 if (ddi_copyin(data, d, sz, mode)) {
2611 err = EFAULT;
2612 break;
2613 }
2614
2615 err = mirror_online((md_i_off_on_t *)d, lockp);
2616 break;
2617 }
2618
2619 case MD_IOCDETACH:
2620 {
2621 if (! (mode & FWRITE))
2622 return (EACCES);
2623
2624 sz = sizeof (md_detach_params_t);
2625 d = kmem_alloc(sz, KM_SLEEP);
2626
2627 if (ddi_copyin(data, d, sz, mode)) {
2628 err = EFAULT;
2629 break;
2630 }
2631
2632 err = mirror_detach((md_detach_params_t *)d, lockp);
2633 break;
2634 }
2635
2636 case MD_IOCATTACH:
2637 {
2638
2639 if (! (mode & FWRITE))
2640 return (EACCES);
2641
2642 sz = sizeof (md_att_struct_t);
2643 d = kmem_alloc(sz, KM_SLEEP);
2644
2645 if (ddi_copyin(data, d, sz, mode)) {
2646 err = EFAULT;
2647 break;
2648 }
2649
2650 err = mirror_attach((md_att_struct_t *)d, lockp);
2651 break;
2652 }
2653
2654 case MD_IOCGET_DEVS:
2655 {
2656 if (! (mode & FREAD))
2657 return (EACCES);
2658
2659 sz = sizeof (md_getdevs_params_t);
2660
2661 d = kmem_alloc(sz, KM_SLEEP);
2662
2663 if (ddi_copyin(data, d, sz, mode)) {
2664 err = EFAULT;
2665 break;
2666 }
2667
2668 err = mirror_getdevs(d, mode, lockp);
2669 break;
2670 }
2671
2672 case MD_IOCGROW:
2673 {
2674 if (! (mode & FWRITE))
2675 return (EACCES);
2676
2677 sz = sizeof (md_grow_params_t);
2678
2679 d = kmem_alloc(sz, KM_SLEEP);
2680
2681 if (ddi_copyin(data, d, sz, mode)) {
2682 err = EFAULT;
2683 break;
2684 }
2685
2686 err = mirror_grow(d, lockp);
2687 break;
2688 }
2689
2690 case MD_IOCCHANGE:
2691 {
2692 if (! (mode & FWRITE))
2693 return (EACCES);
2694
2695 sz = sizeof (md_mirror_params_t);
2696 d = kmem_alloc(sz, KM_SLEEP);
2697
2698 if (ddi_copyin(data, d, sz, mode)) {
2699 err = EFAULT;
2700 break;
2701 }
2702
2703 err = mirror_change((md_mirror_params_t *)d, lockp);
2704 break;
2705 }
2706
2707 case MD_IOCPROBE_DEV:
2708 {
2709 md_probedev_impl_t *p = NULL;
2710 md_probedev_t *ph = NULL;
2711 daemon_queue_t *hdr = NULL;
2712 int i;
2713 size_t sz2 = 0;
2714
2715 if (! (mode & FREAD))
2716 return (EACCES);
2717
2718
2719 sz = sizeof (md_probedev_t);
2720 d = kmem_alloc(sz, KM_SLEEP);
2721
2722 /* now copy in the data */
2723 if (ddi_copyin(data, d, sz, mode)) {
2724 err = EFAULT;
2725 goto free_mem;
2726 }
2727
2728 /*
2729 * Sanity test the args. Test name should have the keyword
2730 * probe.
2731 */
2732
2733 p = kmem_alloc(sizeof (md_probedev_impl_t), KM_SLEEP);
2734
2735 p->probe_sema = NULL;
2736 p->probe_mx = NULL;
2737 p->probe.mnum_list = (uint64_t)NULL;
2738
2739 ph = (struct md_probedev *)d;
2740
2741 p->probe.nmdevs = ph->nmdevs;
2742 (void) strcpy(p->probe.test_name, ph->test_name);
2743 bcopy(&ph->md_driver, &(p->probe.md_driver),
2744 sizeof (md_driver_t));
2745
2746 if ((p->probe.nmdevs < 1) ||
2747 (strstr(p->probe.test_name, "probe") == NULL)) {
2748 err = EINVAL;
2749 goto free_mem;
2750 }
2751
2752
2753 sz2 = sizeof (minor_t) * p->probe.nmdevs;
2754 p->probe.mnum_list = (uint64_t)(uintptr_t)kmem_alloc(sz2,
2755 KM_SLEEP);
2756
2757 if (ddi_copyin((void *)(uintptr_t)ph->mnum_list,
2758 (void *)(uintptr_t)p->probe.mnum_list, sz2, mode)) {
2759 err = EFAULT;
2760 goto free_mem;
2761 }
2762
2763 if (err = md_init_probereq(p, &hdr))
2764 goto free_mem;
2765
2766 /*
2767 * put the request on the queue and wait.
2768 */
2769
2770 daemon_request_new(&md_ff_daemonq, md_probe_one, hdr, REQ_NEW);
2771
2772 (void) IOLOCK_RETURN(0, lockp);
2773 /* wait for the events to occur */
2774 for (i = 0; i < p->probe.nmdevs; i++) {
2775 sema_p(PROBE_SEMA(p));
2776 }
2777 while (md_ioctl_lock_enter() == EINTR)
2778 ;
2779
2780 /*
2781 * clean up. The hdr list is freed in the probe routines
2782 * since the list is NULL by the time we get here.
2783 */
2784 free_mem:
2785 if (p) {
2786 if (p->probe_sema != NULL) {
2787 sema_destroy(PROBE_SEMA(p));
2788 kmem_free(p->probe_sema, sizeof (ksema_t));
2789 }
2790 if (p->probe_mx != NULL) {
2791 mutex_destroy(PROBE_MX(p));
2792 kmem_free(p->probe_mx, sizeof (kmutex_t));
2793 }
2794 if ((uintptr_t)p->probe.mnum_list)
2795 kmem_free((void *)(uintptr_t)
2796 p->probe.mnum_list, sz2);
2797
2798 kmem_free(p, sizeof (md_probedev_impl_t));
2799 }
2800 break;
2801 }
2802
2803 case MD_MN_SET_MM_OWNER:
2804 {
2805 if (! (mode & FWRITE))
2806 return (EACCES);
2807
2808 sz = sizeof (md_set_mmown_params_t);
2809 d = kmem_alloc(sz, KM_SLEEP);
2810
2811 if (ddi_copyin(data, d, sz, mode) != 0) {
2812 err = EFAULT;
2813 break;
2814 }
2815
2816 err = mirror_set_owner((md_set_mmown_params_t *)d, lockp);
2817 break;
2818 }
2819
2820 case MD_MN_GET_MM_OWNER:
2821 {
2822 if (! (mode & FREAD))
2823 return (EACCES);
2824
2825 sz = sizeof (md_set_mmown_params_t);
2826 d = kmem_alloc(sz, KM_SLEEP);
2827
2828 if (ddi_copyin(data, d, sz, mode) != 0) {
2829 err = EFAULT;
2830 break;
2831 }
2832
2833 err = mirror_get_owner((md_set_mmown_params_t *)d, lockp);
2834 break;
2835 }
2836
2837 case MD_MN_MM_OWNER_STATUS:
2838 {
2839 if (! (mode & FREAD))
2840 return (EACCES);
2841
2842 sz = sizeof (md_mn_own_status_t);
2843 d = kmem_alloc(sz, KM_SLEEP);
2844
2845 if (ddi_copyin(data, d, sz, mode) != 0) {
2846 err = EFAULT;
2847 break;
2848 }
2849
2850 err = mirror_get_owner_status((md_mn_own_status_t *)d, lockp);
2851 break;
2852 }
2853
2854 case MD_MN_SET_STATE:
2855 {
2856 if (! (mode & FWRITE))
2857 return (EACCES);
2858
2859 sz = sizeof (md_set_state_params_t);
2860 d = kmem_alloc(sz, KM_SLEEP);
2861
2862 if (ddi_copyin(data, d, sz, mode)) {
2863 err = EFAULT;
2864 break;
2865 }
2866
2867 err = mirror_set_state((md_set_state_params_t *)d, lockp);
2868 break;
2869 }
2870
2871 case MD_MN_SUSPEND_WRITES:
2872 {
2873 if (! (mode & FREAD))
2874 return (EACCES);
2875
2876 sz = sizeof (md_suspend_wr_params_t);
2877 d = kmem_alloc(sz, KM_SLEEP);
2878
2879 if (ddi_copyin(data, d, sz, mode) != 0) {
2880 err = EFAULT;
2881 break;
2882 }
2883
2884 err = mirror_suspend_writes((md_suspend_wr_params_t *)d);
2885 break;
2886 }
2887
2888 case MD_MN_RESYNC:
2889 {
2890 sz = sizeof (md_mn_rs_params_t);
2891 d = kmem_alloc(sz, KM_SLEEP);
2892
2893 if (ddi_copyin(data, d, sz, mode) != 0) {
2894 err = EFAULT;
2895 break;
2896 }
2897
2898 err = mirror_resync_message((md_mn_rs_params_t *)d, lockp);
2899 break;
2900 }
2901
2902 case MD_MN_ALLOCATE_HOTSPARE:
2903 {
2904 if (! (mode & FWRITE))
2905 return (EACCES);
2906
2907 sz = sizeof (md_alloc_hotsp_params_t);
2908 d = kmem_alloc(sz, KM_SLEEP);
2909
2910 if (ddi_copyin(data, d, sz, mode)) {
2911 err = EFAULT;
2912 break;
2913 }
2914
2915 err = mirror_allocate_hotspare((md_alloc_hotsp_params_t *)d,
2916 lockp);
2917 break;
2918 }
2919
2920 case MD_MN_POKE_HOTSPARES:
2921 {
2922 (void) poke_hotspares();
2923 break;
2924 }
2925
2926 case MD_MN_SET_CAP:
2927 {
2928 if (! (mode & FWRITE))
2929 return (EACCES);
2930
2931 sz = sizeof (md_mn_setcap_params_t);
2932 d = kmem_alloc(sz, KM_SLEEP);
2933
2934 if (ddi_copyin(data, d, sz, mode)) {
2935 err = EFAULT;
2936 break;
2937 }
2938
2939 err = mirror_set_capability((md_mn_setcap_params_t *)d,
2940 lockp);
2941 break;
2942 }
2943
2944 case MD_MN_GET_MIRROR_STATE:
2945 {
2946 sz = sizeof (md_mn_get_mir_state_t);
2947 d = kmem_zalloc(sz, KM_SLEEP);
2948
2949 if (ddi_copyin(data, d, sz, mode)) {
2950 err = EFAULT;
2951 break;
2952 }
2953
2954 err = mirror_get_mir_state((md_mn_get_mir_state_t *)d,
2955 lockp);
2956 break;
2957 }
2958
2959 case MD_MN_RR_DIRTY:
2960 {
2961 sz = sizeof (md_mn_rr_dirty_params_t);
2962 d = kmem_zalloc(sz, KM_SLEEP);
2963
2964 if (ddi_copyin(data, d, sz, mode)) {
2965 err = EFAULT;
2966 break;
2967 }
2968
2969 err = mirror_set_dirty_rr((md_mn_rr_dirty_params_t *)d);
2970 break;
2971 }
2972
2973 case MD_MN_RR_CLEAN:
2974 {
2975 md_mn_rr_clean_params_t tmp;
2976
2977 /* get the first part of the structure to find the size */
2978 if (ddi_copyin(data, &tmp, sizeof (tmp), mode)) {
2979 err = EFAULT;
2980 break;
2981 }
2982
2983 sz = MDMN_RR_CLEAN_PARAMS_SIZE(&tmp);
2984 d = kmem_zalloc(sz, KM_SLEEP);
2985
2986 if (ddi_copyin(data, d, sz, mode)) {
2987 err = EFAULT;
2988 break;
2989 }
2990
2991 err = mirror_set_clean_rr((md_mn_rr_clean_params_t *)d);
2992 break;
2993 }
2994
2995 default:
2996 return (ENOTTY);
2997 }
2998
2999 /*
3000 * copyout and free any args
3001 */
3002 if (sz != 0) {
3003 if (err == 0) {
3004 if (ddi_copyout(d, data, sz, mode) != 0) {
3005 err = EFAULT;
3006 }
3007 }
3008 kmem_free(d, sz);
3009 }
3010 return (err);
3011 }
3012
3013 int
md_mirror_ioctl(dev_t ddi_dev,int cmd,void * data,int mode,IOLOCK * lockp)3014 md_mirror_ioctl(
3015 dev_t ddi_dev,
3016 int cmd,
3017 void *data,
3018 int mode,
3019 IOLOCK *lockp
3020 )
3021 {
3022 minor_t mnum = getminor(ddi_dev);
3023 mm_unit_t *un;
3024 int err = 0;
3025
3026 /* handle admin ioctls */
3027 if (mnum == MD_ADM_MINOR)
3028 return (mirror_admin_ioctl(cmd, data, mode, lockp));
3029
3030 /* check unit */
3031 if ((MD_MIN2SET(mnum) >= md_nsets) ||
3032 (MD_MIN2UNIT(mnum) >= md_nunits) ||
3033 ((un = MD_UNIT(mnum)) == NULL))
3034 return (ENXIO);
3035 /* is this a supported ioctl? */
3036 err = md_check_ioctl_against_unit(cmd, un->c);
3037 if (err != 0) {
3038 return (err);
3039 }
3040
3041 /* dispatch ioctl */
3042 switch (cmd) {
3043
3044 case DKIOCINFO:
3045 {
3046 struct dk_cinfo *p;
3047
3048 if (! (mode & FREAD))
3049 return (EACCES);
3050
3051 p = kmem_alloc(sizeof (*p), KM_SLEEP);
3052
3053 get_info(p, mnum);
3054 if (ddi_copyout((caddr_t)p, data, sizeof (*p), mode) != 0)
3055 err = EFAULT;
3056
3057 kmem_free(p, sizeof (*p));
3058 return (err);
3059 }
3060
3061 case DKIOCGMEDIAINFO:
3062 {
3063 struct dk_minfo p;
3064
3065 if (! (mode & FREAD))
3066 return (EACCES);
3067
3068 get_minfo(&p, mnum);
3069 if (ddi_copyout(&p, data, sizeof (struct dk_minfo), mode) != 0)
3070 err = EFAULT;
3071
3072 return (err);
3073 }
3074
3075 case DKIOCGGEOM:
3076 {
3077 struct dk_geom *p;
3078
3079 if (! (mode & FREAD))
3080 return (EACCES);
3081
3082 p = kmem_alloc(sizeof (*p), KM_SLEEP);
3083
3084 if ((err = mirror_get_geom(un, p)) == 0) {
3085 if (ddi_copyout((caddr_t)p, data, sizeof (*p),
3086 mode) != 0)
3087 err = EFAULT;
3088 }
3089
3090 kmem_free(p, sizeof (*p));
3091 return (err);
3092 }
3093
3094 case DKIOCGVTOC:
3095 {
3096 struct vtoc *vtoc;
3097
3098 if (! (mode & FREAD))
3099 return (EACCES);
3100
3101 vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP);
3102
3103 if ((err = mirror_get_vtoc(un, vtoc)) != 0) {
3104 kmem_free(vtoc, sizeof (*vtoc));
3105 return (err);
3106 }
3107
3108 if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
3109 if (ddi_copyout(vtoc, data, sizeof (*vtoc), mode))
3110 err = EFAULT;
3111 }
3112 #ifdef _SYSCALL32
3113 else {
3114 struct vtoc32 *vtoc32;
3115
3116 vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP);
3117
3118 vtoctovtoc32((*vtoc), (*vtoc32));
3119 if (ddi_copyout(vtoc32, data, sizeof (*vtoc32), mode))
3120 err = EFAULT;
3121 kmem_free(vtoc32, sizeof (*vtoc32));
3122 }
3123 #endif /* _SYSCALL32 */
3124
3125 kmem_free(vtoc, sizeof (*vtoc));
3126 return (err);
3127 }
3128
3129 case DKIOCSVTOC:
3130 {
3131 struct vtoc *vtoc;
3132
3133 if (! (mode & FWRITE))
3134 return (EACCES);
3135
3136 vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP);
3137
3138 if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
3139 if (ddi_copyin(data, vtoc, sizeof (*vtoc), mode)) {
3140 err = EFAULT;
3141 }
3142 }
3143 #ifdef _SYSCALL32
3144 else {
3145 struct vtoc32 *vtoc32;
3146
3147 vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP);
3148
3149 if (ddi_copyin(data, vtoc32, sizeof (*vtoc32), mode)) {
3150 err = EFAULT;
3151 } else {
3152 vtoc32tovtoc((*vtoc32), (*vtoc));
3153 }
3154 kmem_free(vtoc32, sizeof (*vtoc32));
3155 }
3156 #endif /* _SYSCALL32 */
3157
3158 if (err == 0)
3159 err = mirror_set_vtoc(un, vtoc);
3160
3161 kmem_free(vtoc, sizeof (*vtoc));
3162 return (err);
3163 }
3164
3165 case DKIOCGEXTVTOC:
3166 {
3167 struct extvtoc *extvtoc;
3168
3169 if (! (mode & FREAD))
3170 return (EACCES);
3171
3172 extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP);
3173
3174 if ((err = mirror_get_extvtoc(un, extvtoc)) != 0) {
3175 kmem_free(extvtoc, sizeof (*extvtoc));
3176 return (err);
3177 }
3178
3179 if (ddi_copyout(extvtoc, data, sizeof (*extvtoc), mode))
3180 err = EFAULT;
3181
3182 kmem_free(extvtoc, sizeof (*extvtoc));
3183 return (err);
3184 }
3185
3186 case DKIOCSEXTVTOC:
3187 {
3188 struct extvtoc *extvtoc;
3189
3190 if (! (mode & FWRITE))
3191 return (EACCES);
3192
3193 extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP);
3194
3195 if (ddi_copyin(data, extvtoc, sizeof (*extvtoc), mode)) {
3196 err = EFAULT;
3197 }
3198
3199 if (err == 0)
3200 err = mirror_set_extvtoc(un, extvtoc);
3201
3202 kmem_free(extvtoc, sizeof (*extvtoc));
3203 return (err);
3204 }
3205
3206 case DKIOCGAPART:
3207 {
3208 struct dk_map dmp;
3209
3210 if ((err = mirror_get_cgapart(un, &dmp)) != 0) {
3211 return (err);
3212 }
3213
3214 if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
3215 if (ddi_copyout((caddr_t)&dmp, data, sizeof (dmp),
3216 mode) != 0)
3217 err = EFAULT;
3218 }
3219 #ifdef _SYSCALL32
3220 else {
3221 struct dk_map32 dmp32;
3222
3223 dmp32.dkl_cylno = dmp.dkl_cylno;
3224 dmp32.dkl_nblk = dmp.dkl_nblk;
3225
3226 if (ddi_copyout((caddr_t)&dmp32, data, sizeof (dmp32),
3227 mode) != 0)
3228 err = EFAULT;
3229 }
3230 #endif /* _SYSCALL32 */
3231
3232 return (err);
3233 }
3234 case DKIOCGETEFI:
3235 {
3236 /*
3237 * This one can be done centralized,
3238 * no need to put in the same code for all types of metadevices
3239 */
3240 return (md_dkiocgetefi(mnum, data, mode));
3241 }
3242 case DKIOCSETEFI:
3243 {
3244 /*
3245 * This one can be done centralized,
3246 * no need to put in the same code for all types of metadevices
3247 */
3248 return (md_dkiocsetefi(mnum, data, mode));
3249 }
3250 case DKIOCPARTITION:
3251 {
3252 return (md_dkiocpartition(mnum, data, mode));
3253 }
3254
3255 case DKIOCGETVOLCAP:
3256 {
3257 volcap_t vc;
3258 mdi_unit_t *ui;
3259
3260 /* Only valid for MN sets */
3261 if (!MD_MNSET_SETNO(MD_MIN2SET(mnum)))
3262 return (EINVAL);
3263
3264 ui = MDI_UNIT(mnum);
3265 if (! (mode & FREAD))
3266 return (EACCES);
3267
3268 vc.vc_info = DKV_ABR_CAP | DKV_DMR_CAP;
3269 vc.vc_set = 0;
3270 if (ui->ui_tstate & MD_ABR_CAP) {
3271 vc.vc_set |= DKV_ABR_CAP;
3272 }
3273 if (ddi_copyout(&vc, data, sizeof (volcap_t), mode))
3274 err = EFAULT;
3275 return (err);
3276 }
3277
3278 case DKIOCSETVOLCAP:
3279 {
3280 volcap_t vc;
3281 volcapset_t volcap = 0;
3282 mdi_unit_t *ui;
3283
3284 /* Only valid for MN sets */
3285 if (!MD_MNSET_SETNO(MD_MIN2SET(mnum)))
3286 return (EINVAL);
3287
3288 ui = MDI_UNIT(mnum);
3289 if (! (mode & FWRITE))
3290 return (EACCES);
3291
3292 if (ddi_copyin(data, &vc, sizeof (volcap_t), mode))
3293 return (EFAULT);
3294
3295 /* Not valid if a submirror is offline */
3296 if (un->c.un_status & MD_UN_OFFLINE_SM) {
3297 return (EINVAL);
3298 }
3299 if (ui->ui_tstate & MD_ABR_CAP)
3300 volcap |= DKV_ABR_CAP;
3301 /* Only send capability message if there is a change */
3302 if ((vc.vc_set & (DKV_ABR_CAP)) != volcap)
3303 err = mdmn_send_capability_message(mnum, vc, lockp);
3304 return (err);
3305 }
3306
3307 case DKIOCDMR:
3308 {
3309 vol_directed_rd_t *vdr;
3310
3311 #ifdef _MULTI_DATAMODEL
3312 vol_directed_rd32_t *vdr32;
3313 #endif /* _MULTI_DATAMODEL */
3314
3315 /* Only valid for MN sets */
3316 if (!MD_MNSET_SETNO(MD_MIN2SET(mnum)))
3317 return (EINVAL);
3318
3319 vdr = kmem_zalloc(sizeof (vol_directed_rd_t), KM_NOSLEEP);
3320 if (vdr == NULL)
3321 return (ENOMEM);
3322
3323 #ifdef _MULTI_DATAMODEL
3324 vdr32 = kmem_zalloc(sizeof (vol_directed_rd32_t), KM_NOSLEEP);
3325 if (vdr32 == NULL) {
3326 kmem_free(vdr, sizeof (vol_directed_rd_t));
3327 return (ENOMEM);
3328 }
3329
3330 switch (ddi_model_convert_from(mode & FMODELS)) {
3331 case DDI_MODEL_ILP32:
3332 /*
3333 * If we're called from a higher-level driver we don't
3334 * need to manipulate the data. Its already been done by
3335 * the caller.
3336 */
3337 if (!(mode & FKIOCTL)) {
3338 if (ddi_copyin(data, vdr32, sizeof (*vdr32),
3339 mode)) {
3340 kmem_free(vdr, sizeof (*vdr));
3341 return (EFAULT);
3342 }
3343 vdr->vdr_flags = vdr32->vdr_flags;
3344 vdr->vdr_offset = vdr32->vdr_offset;
3345 vdr->vdr_nbytes = vdr32->vdr_nbytes;
3346 vdr->vdr_data =
3347 (void *)(uintptr_t)vdr32->vdr_data;
3348 vdr->vdr_side = vdr32->vdr_side;
3349 break;
3350 }
3351 /* FALLTHROUGH */
3352
3353 case DDI_MODEL_NONE:
3354 if (ddi_copyin(data, vdr, sizeof (*vdr), mode)) {
3355 kmem_free(vdr32, sizeof (*vdr32));
3356 kmem_free(vdr, sizeof (*vdr));
3357 return (EFAULT);
3358 }
3359 break;
3360
3361 default:
3362 kmem_free(vdr32, sizeof (*vdr32));
3363 kmem_free(vdr, sizeof (*vdr));
3364 return (EFAULT);
3365 }
3366 #else /* ! _MULTI_DATAMODEL */
3367 if (ddi_copyin(data, vdr, sizeof (*vdr), mode)) {
3368 kmem_free(vdr, sizeof (*vdr));
3369 return (EFAULT);
3370 }
3371 #endif /* _MULTI_DATAMODEL */
3372
3373 err = mirror_directed_read(ddi_dev, vdr, mode);
3374
3375 if (err == 0) {
3376 #ifdef _MULTI_DATAMODEL
3377 switch (ddi_model_convert_from(mode & FMODELS)) {
3378 case DDI_MODEL_ILP32:
3379 if (!(mode & FKIOCTL)) {
3380 vdr32->vdr_flags = vdr->vdr_flags;
3381 vdr32->vdr_offset = vdr->vdr_offset;
3382 vdr32->vdr_side = vdr->vdr_side;
3383 vdr32->vdr_bytesread =
3384 vdr->vdr_bytesread;
3385 bcopy(vdr->vdr_side_name,
3386 vdr32->vdr_side_name,
3387 sizeof (vdr32->vdr_side_name));
3388
3389 if (ddi_copyout(vdr32, data,
3390 sizeof (*vdr32), mode)) {
3391 err = EFAULT;
3392 }
3393 break;
3394 }
3395 /* FALLTHROUGH */
3396
3397 case DDI_MODEL_NONE:
3398 if (ddi_copyout(vdr, data, sizeof (*vdr), mode))
3399 err = EFAULT;
3400 break;
3401 }
3402 #else /* ! _MULTI_DATAMODEL */
3403 if (ddi_copyout(vdr, data, sizeof (*vdr), mode))
3404 err = EFAULT;
3405 #endif /* _MULTI_DATAMODEL */
3406 if (vdr->vdr_flags & DKV_DMR_ERROR)
3407 err = EIO;
3408 }
3409
3410 #ifdef _MULTI_DATAMODEL
3411 kmem_free(vdr32, sizeof (*vdr32));
3412 #endif /* _MULTI_DATAMODEL */
3413
3414 kmem_free(vdr, sizeof (*vdr));
3415
3416 return (err);
3417 }
3418
3419 default:
3420 return (ENOTTY);
3421 }
3422 }
3423
3424 /*
3425 * rename named service entry points and support functions
3426 */
3427
3428 /*
3429 * rename/exchange role swap functions
3430 *
3431 * most of these are handled by generic role swap functions
3432 */
3433
3434 /*
3435 * MDRNM_UPDATE_KIDS
3436 * rename/exchange of our child or grandchild
3437 */
3438 void
mirror_renexch_update_kids(md_rendelta_t * delta,md_rentxn_t * rtxnp)3439 mirror_renexch_update_kids(md_rendelta_t *delta, md_rentxn_t *rtxnp)
3440 {
3441 mm_submirror_t *sm;
3442 int smi;
3443
3444 ASSERT(rtxnp);
3445 ASSERT((MDRNOP_RENAME == rtxnp->op) || (rtxnp->op == MDRNOP_EXCHANGE));
3446 ASSERT(rtxnp->recids);
3447 ASSERT(delta);
3448 ASSERT(delta->unp);
3449 ASSERT(delta->old_role == MDRR_PARENT);
3450 ASSERT(delta->new_role == MDRR_PARENT);
3451
3452 /*
3453 * since our role isn't changing (parent->parent)
3454 * one of our children must be changing
3455 * find the child being modified, and update
3456 * our notion of it
3457 */
3458 for (smi = 0; smi < NMIRROR; smi++) {
3459 mm_unit_t *un = (mm_unit_t *)delta->unp;
3460
3461 if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) {
3462 continue;
3463 }
3464 sm = &un->un_sm[smi];
3465
3466 if (md_getminor(sm->sm_dev) == rtxnp->from.mnum) {
3467 sm->sm_dev = md_makedevice(md_major, rtxnp->to.mnum);
3468 sm->sm_key = rtxnp->to.key;
3469 break;
3470 }
3471 }
3472
3473 md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp);
3474 }
3475
3476 /*
3477 * exchange down (self->child)
3478 */
3479 void
mirror_exchange_self_update_from_down(md_rendelta_t * delta,md_rentxn_t * rtxnp)3480 mirror_exchange_self_update_from_down(
3481 md_rendelta_t *delta,
3482 md_rentxn_t *rtxnp
3483 )
3484 {
3485 int smi;
3486 mm_submirror_t *found;
3487 minor_t from_min, to_min;
3488 sv_dev_t sv;
3489
3490 ASSERT(rtxnp);
3491 ASSERT(MDRNOP_EXCHANGE == rtxnp->op);
3492 ASSERT(rtxnp->recids);
3493 ASSERT(rtxnp->rec_idx >= 0);
3494 ASSERT(delta);
3495 ASSERT(delta->unp);
3496 ASSERT(delta->uip);
3497 ASSERT(delta->old_role == MDRR_SELF);
3498 ASSERT(delta->new_role == MDRR_CHILD);
3499 ASSERT(md_getminor(delta->dev) == rtxnp->from.mnum);
3500
3501 from_min = rtxnp->from.mnum;
3502 to_min = rtxnp->to.mnum;
3503
3504 /*
3505 * self id changes in our own unit struct
3506 */
3507
3508 MD_SID(delta->unp) = to_min;
3509
3510 /*
3511 * parent identifier need not change
3512 */
3513
3514 /*
3515 * point the set array pointers at the "new" unit and unit in-cores
3516 * Note: the other half of this transfer is done in the "update_to"
3517 * exchange named service.
3518 */
3519
3520 MDI_VOIDUNIT(to_min) = delta->uip;
3521 MD_VOIDUNIT(to_min) = delta->unp;
3522
3523 /*
3524 * transfer kstats
3525 */
3526
3527 delta->uip->ui_kstat = rtxnp->to.kstatp;
3528
3529 /*
3530 * the unit in-core reference to the get next link's id changes
3531 */
3532
3533 delta->uip->ui_link.ln_id = to_min;
3534
3535 /*
3536 * find the child whose identity we're assuming
3537 */
3538
3539 for (found = NULL, smi = 0; !found && smi < NMIRROR; smi++) {
3540 mm_submirror_t *sm;
3541 mm_unit_t *un = (mm_unit_t *)delta->unp;
3542
3543 if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) {
3544 continue;
3545 }
3546 sm = &un->un_sm[smi];
3547
3548 if (md_getminor(sm->sm_dev) == to_min) {
3549 found = sm;
3550 }
3551 }
3552 ASSERT(found);
3553
3554 /*
3555 * Update the sub-mirror's identity
3556 */
3557 found->sm_dev = md_makedevice(md_major, rtxnp->from.mnum);
3558 sv.key = found->sm_key;
3559
3560 ASSERT(rtxnp->from.key != MD_KEYWILD);
3561 ASSERT(rtxnp->from.key != MD_KEYBAD);
3562
3563 found->sm_key = rtxnp->from.key;
3564
3565 /*
3566 * delete the key for the old sub-mirror from the name space
3567 */
3568
3569 sv.setno = MD_MIN2SET(from_min);
3570 md_rem_names(&sv, 1);
3571
3572 /*
3573 * and store the record id (from the unit struct) into recids
3574 */
3575
3576 md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp);
3577 }
3578
3579 /*
3580 * exchange down (parent->self)
3581 */
3582 void
mirror_exchange_parent_update_to(md_rendelta_t * delta,md_rentxn_t * rtxnp)3583 mirror_exchange_parent_update_to(
3584 md_rendelta_t *delta,
3585 md_rentxn_t *rtxnp
3586 )
3587 {
3588 int smi;
3589 mm_submirror_t *found;
3590 minor_t from_min, to_min;
3591 sv_dev_t sv;
3592
3593 ASSERT(rtxnp);
3594 ASSERT(MDRNOP_EXCHANGE == rtxnp->op);
3595 ASSERT(rtxnp->recids);
3596 ASSERT(rtxnp->rec_idx >= 0);
3597 ASSERT(delta);
3598 ASSERT(delta->unp);
3599 ASSERT(delta->uip);
3600 ASSERT(delta->old_role == MDRR_PARENT);
3601 ASSERT(delta->new_role == MDRR_SELF);
3602 ASSERT(md_getminor(delta->dev) == rtxnp->to.mnum);
3603
3604 from_min = rtxnp->from.mnum;
3605 to_min = rtxnp->to.mnum;
3606
3607 /*
3608 * self id changes in our own unit struct
3609 */
3610
3611 MD_SID(delta->unp) = from_min;
3612
3613 /*
3614 * parent identifier need not change
3615 */
3616
3617 /*
3618 * point the set array pointers at the "new" unit and unit in-cores
3619 * Note: the other half of this transfer is done in the "update_to"
3620 * exchange named service.
3621 */
3622
3623 MDI_VOIDUNIT(from_min) = delta->uip;
3624 MD_VOIDUNIT(from_min) = delta->unp;
3625
3626 /*
3627 * transfer kstats
3628 */
3629
3630 delta->uip->ui_kstat = rtxnp->from.kstatp;
3631
3632 /*
3633 * the unit in-core reference to the get next link's id changes
3634 */
3635
3636 delta->uip->ui_link.ln_id = from_min;
3637
3638 /*
3639 * find the child whose identity we're assuming
3640 */
3641
3642 for (found = NULL, smi = 0; !found && smi < NMIRROR; smi++) {
3643 mm_submirror_t *sm;
3644 mm_unit_t *un = (mm_unit_t *)delta->unp;
3645
3646 if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) {
3647 continue;
3648 }
3649 sm = &un->un_sm[smi];
3650
3651 if (md_getminor(sm->sm_dev) == from_min) {
3652 found = sm;
3653 }
3654 }
3655 ASSERT(found);
3656
3657 /*
3658 * Update the sub-mirror's identity
3659 */
3660 found->sm_dev = md_makedevice(md_major, rtxnp->to.mnum);
3661 sv.key = found->sm_key;
3662
3663 ASSERT(rtxnp->to.key != MD_KEYWILD);
3664 ASSERT(rtxnp->to.key != MD_KEYBAD);
3665
3666 found->sm_key = rtxnp->to.key;
3667
3668 /*
3669 * delete the key for the old sub-mirror from the name space
3670 */
3671
3672 sv.setno = MD_MIN2SET(to_min);
3673 md_rem_names(&sv, 1);
3674
3675 /*
3676 * and store the record id (from the unit struct) into recids
3677 */
3678
3679 md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp);
3680 }
3681
3682 /*
3683 * MDRNM_LIST_URKIDS: named svc entry point
3684 * all all delta entries appropriate for our children onto the
3685 * deltalist pointd to by dlpp
3686 */
3687 int
mirror_rename_listkids(md_rendelta_t ** dlpp,md_rentxn_t * rtxnp)3688 mirror_rename_listkids(md_rendelta_t **dlpp, md_rentxn_t *rtxnp)
3689 {
3690 minor_t from_min, to_min;
3691 mm_unit_t *from_un;
3692 md_rendelta_t *new, *p;
3693 int smi;
3694 int n_children;
3695 mm_submirror_t *sm;
3696
3697 ASSERT(rtxnp);
3698 ASSERT(dlpp);
3699 ASSERT((rtxnp->op == MDRNOP_EXCHANGE) || (rtxnp->op == MDRNOP_RENAME));
3700
3701 from_min = rtxnp->from.mnum;
3702 to_min = rtxnp->to.mnum;
3703 n_children = 0;
3704
3705 if (!MDI_UNIT(from_min) || !(from_un = MD_UNIT(from_min))) {
3706 (void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP, from_min);
3707 return (-1);
3708 }
3709
3710 for (p = *dlpp; p && p->next != NULL; p = p->next) {
3711 /* NULL */
3712 }
3713
3714 for (smi = 0; smi < NMIRROR; smi++) {
3715 minor_t child_min;
3716
3717 if (!SMS_BY_INDEX_IS(from_un, smi, SMS_INUSE)) {
3718 continue;
3719 }
3720
3721 sm = &from_un->un_sm[smi];
3722 child_min = md_getminor(sm->sm_dev);
3723
3724 p = new = md_build_rendelta(MDRR_CHILD,
3725 to_min == child_min? MDRR_SELF: MDRR_CHILD,
3726 sm->sm_dev, p,
3727 MD_UNIT(child_min), MDI_UNIT(child_min),
3728 &rtxnp->mde);
3729
3730 if (!new) {
3731 if (mdisok(&rtxnp->mde)) {
3732 (void) mdsyserror(&rtxnp->mde, ENOMEM);
3733 }
3734 return (-1);
3735 }
3736 ++n_children;
3737 }
3738
3739 return (n_children);
3740 }
3741
3742 /*
3743 * support routine for MDRNM_CHECK
3744 */
3745 static int
mirror_may_renexch_self(mm_unit_t * un,mdi_unit_t * ui,md_rentxn_t * rtxnp)3746 mirror_may_renexch_self(
3747 mm_unit_t *un,
3748 mdi_unit_t *ui,
3749 md_rentxn_t *rtxnp)
3750 {
3751 minor_t from_min;
3752 minor_t to_min;
3753 bool_t toplevel;
3754 bool_t related;
3755 int smi;
3756 mm_submirror_t *sm;
3757
3758 from_min = rtxnp->from.mnum;
3759 to_min = rtxnp->to.mnum;
3760
3761 if (!un || !ui) {
3762 (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR,
3763 from_min);
3764 return (EINVAL);
3765 }
3766
3767 ASSERT(MD_CAPAB(un) & MD_CAN_META_CHILD);
3768 if (!(MD_CAPAB(un) & MD_CAN_META_CHILD)) {
3769 (void) mdmderror(&rtxnp->mde, MDE_RENAME_SOURCE_BAD, from_min);
3770 return (EINVAL);
3771 }
3772
3773 if (MD_PARENT(un) == MD_MULTI_PARENT) {
3774 (void) mdmderror(&rtxnp->mde, MDE_RENAME_SOURCE_BAD, from_min);
3775 return (EINVAL);
3776 }
3777
3778 toplevel = !MD_HAS_PARENT(MD_PARENT(un));
3779
3780 /* we're related if trying to swap with our parent */
3781 related = (!toplevel) && (MD_PARENT(un) == to_min);
3782
3783 switch (rtxnp->op) {
3784 case MDRNOP_EXCHANGE:
3785 /*
3786 * check for a swap with our child
3787 */
3788 for (smi = 0; smi < NMIRROR; smi++) {
3789
3790 if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) {
3791 continue;
3792 }
3793
3794 sm = &un->un_sm[smi];
3795 if (md_getminor(sm->sm_dev) == to_min) {
3796 related |= TRUE;
3797 }
3798 }
3799 if (!related) {
3800 (void) mdmderror(&rtxnp->mde,
3801 MDE_RENAME_TARGET_UNRELATED, to_min);
3802 return (EINVAL);
3803 }
3804
3805 break;
3806
3807 case MDRNOP_RENAME:
3808 /*
3809 * if from is top-level and is open, then the kernel is using
3810 * the md_dev64_t.
3811 */
3812
3813 if (toplevel && md_unit_isopen(ui)) {
3814 (void) mdmderror(&rtxnp->mde, MDE_RENAME_BUSY,
3815 from_min);
3816 return (EBUSY);
3817 }
3818 break;
3819
3820 default:
3821 (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR,
3822 from_min);
3823 return (EINVAL);
3824 }
3825
3826 return (0); /* ok */
3827 }
3828
3829 /*
3830 * Named service entry point: MDRNM_CHECK
3831 */
3832 intptr_t
mirror_rename_check(md_rendelta_t * delta,md_rentxn_t * rtxnp)3833 mirror_rename_check(
3834 md_rendelta_t *delta,
3835 md_rentxn_t *rtxnp)
3836 {
3837 mm_submirror_t *sm;
3838 mm_submirror_ic_t *smic;
3839 md_m_shared_t *shared;
3840 int ci;
3841 int i;
3842 int compcnt;
3843 mm_unit_t *un;
3844 int err = 0;
3845
3846 ASSERT(delta);
3847 ASSERT(rtxnp);
3848 ASSERT(delta->unp);
3849 ASSERT(delta->uip);
3850 ASSERT((rtxnp->op == MDRNOP_RENAME) || (rtxnp->op == MDRNOP_EXCHANGE));
3851
3852 if (!delta || !rtxnp || !delta->unp || !delta->uip) {
3853 (void) mdsyserror(&rtxnp->mde, EINVAL);
3854 return (EINVAL);
3855 }
3856
3857 un = (mm_unit_t *)delta->unp;
3858
3859 for (i = 0; i < NMIRROR; i++) {
3860 sm = &un->un_sm[i];
3861 smic = &un->un_smic[i];
3862
3863 if (!SMS_IS(sm, SMS_INUSE))
3864 continue;
3865
3866 ASSERT(smic->sm_get_component_count);
3867 if (!smic->sm_get_component_count) {
3868 (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR,
3869 md_getminor(delta->dev));
3870 return (ENXIO);
3871 }
3872
3873 compcnt = (*(smic->sm_get_component_count))(sm->sm_dev, un);
3874
3875 for (ci = 0; ci < compcnt; ci++) {
3876
3877 ASSERT(smic->sm_shared_by_indx);
3878 if (!smic->sm_shared_by_indx) {
3879 (void) mdmderror(&rtxnp->mde,
3880 MDE_RENAME_CONFIG_ERROR,
3881 md_getminor(delta->dev));
3882 return (ENXIO);
3883 }
3884
3885 shared = (md_m_shared_t *)(*(smic->sm_shared_by_indx))
3886 (sm->sm_dev, sm, ci);
3887
3888 ASSERT(shared);
3889 if (!shared) {
3890 (void) mdmderror(&rtxnp->mde,
3891 MDE_RENAME_CONFIG_ERROR,
3892 md_getminor(delta->dev));
3893 return (ENXIO);
3894 }
3895
3896 if (shared->ms_hs_id != 0) {
3897 (void) mdmderror(&rtxnp->mde,
3898 MDE_SM_FAILED_COMPS,
3899 md_getminor(delta->dev));
3900 return (EIO);
3901 }
3902
3903 switch (shared->ms_state) {
3904 case CS_OKAY:
3905 break;
3906
3907 case CS_RESYNC:
3908 (void) mdmderror(&rtxnp->mde,
3909 MDE_RESYNC_ACTIVE,
3910 md_getminor(delta->dev));
3911 return (EBUSY);
3912
3913 default:
3914 (void) mdmderror(&rtxnp->mde,
3915 MDE_SM_FAILED_COMPS,
3916 md_getminor(delta->dev));
3917 return (EINVAL);
3918 }
3919
3920 }
3921 }
3922
3923 /* self does additional checks */
3924 if (delta->old_role == MDRR_SELF) {
3925 err = mirror_may_renexch_self(un, delta->uip, rtxnp);
3926 }
3927
3928 return (err);
3929 }
3930
3931 /* end of rename/exchange */
3932