10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
51945Sjeanm * Common Development and Distribution License (the "License").
61945Sjeanm * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
210Sstevel@tonic-gate /*
221945Sjeanm * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
230Sstevel@tonic-gate * Use is subject to license terms.
240Sstevel@tonic-gate */
250Sstevel@tonic-gate
260Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI"
270Sstevel@tonic-gate
280Sstevel@tonic-gate /*
290Sstevel@tonic-gate * Metadevice diskset interfaces
300Sstevel@tonic-gate */
310Sstevel@tonic-gate
320Sstevel@tonic-gate #include <meta.h>
330Sstevel@tonic-gate #include <mdmn_changelog.h>
340Sstevel@tonic-gate #include "meta_set_prv.h"
350Sstevel@tonic-gate #include "meta_repartition.h"
360Sstevel@tonic-gate
370Sstevel@tonic-gate static int
check_setnodes_againstdrivelist(mdsetname_t * sp,mddrivenamelist_t * dnlp,md_error_t * ep)380Sstevel@tonic-gate check_setnodes_againstdrivelist(
390Sstevel@tonic-gate mdsetname_t *sp,
400Sstevel@tonic-gate mddrivenamelist_t *dnlp,
410Sstevel@tonic-gate md_error_t *ep
420Sstevel@tonic-gate )
430Sstevel@tonic-gate {
440Sstevel@tonic-gate md_set_desc *sd;
450Sstevel@tonic-gate mddrivenamelist_t *p;
460Sstevel@tonic-gate int i;
470Sstevel@tonic-gate md_mnnode_desc *nd;
480Sstevel@tonic-gate
490Sstevel@tonic-gate if ((sd = metaget_setdesc(sp, ep)) == NULL)
500Sstevel@tonic-gate return (-1);
510Sstevel@tonic-gate
520Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) {
530Sstevel@tonic-gate nd = sd->sd_nodelist;
540Sstevel@tonic-gate while (nd) {
550Sstevel@tonic-gate if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
560Sstevel@tonic-gate nd = nd->nd_next;
570Sstevel@tonic-gate continue;
580Sstevel@tonic-gate }
590Sstevel@tonic-gate for (p = dnlp; p != NULL; p = p->next)
600Sstevel@tonic-gate if (checkdrive_onnode(sp, p->drivenamep,
610Sstevel@tonic-gate nd->nd_nodename, ep))
620Sstevel@tonic-gate return (-1);
630Sstevel@tonic-gate nd = nd->nd_next;
640Sstevel@tonic-gate }
650Sstevel@tonic-gate } else {
660Sstevel@tonic-gate for (i = 0; i < MD_MAXSIDES; i++) {
670Sstevel@tonic-gate /* Skip empty slots */
680Sstevel@tonic-gate if (sd->sd_nodes[i][0] == '\0')
690Sstevel@tonic-gate continue;
700Sstevel@tonic-gate
710Sstevel@tonic-gate for (p = dnlp; p != NULL; p = p->next)
720Sstevel@tonic-gate if (checkdrive_onnode(sp, p->drivenamep,
730Sstevel@tonic-gate sd->sd_nodes[i], ep))
740Sstevel@tonic-gate return (-1);
750Sstevel@tonic-gate }
760Sstevel@tonic-gate }
770Sstevel@tonic-gate return (0);
780Sstevel@tonic-gate }
790Sstevel@tonic-gate
800Sstevel@tonic-gate static int
drvsuniq(mdsetname_t * sp,mddrivenamelist_t * dnlp,md_error_t * ep)810Sstevel@tonic-gate drvsuniq(mdsetname_t *sp, mddrivenamelist_t *dnlp, md_error_t *ep)
820Sstevel@tonic-gate {
830Sstevel@tonic-gate mddrivenamelist_t *dl1, *dl2;
840Sstevel@tonic-gate mddrivename_t *dn1, *dn2;
850Sstevel@tonic-gate
860Sstevel@tonic-gate for (dl1 = dnlp; dl1 != NULL; dl1 = dl1->next) {
870Sstevel@tonic-gate dn1 = dl1->drivenamep;
880Sstevel@tonic-gate
890Sstevel@tonic-gate for (dl2 = dl1->next; dl2 != NULL; dl2 = dl2->next) {
900Sstevel@tonic-gate dn2 = dl2->drivenamep;
910Sstevel@tonic-gate if (strcmp(dn1->cname, dn2->cname) != 0)
920Sstevel@tonic-gate continue;
930Sstevel@tonic-gate
940Sstevel@tonic-gate return (mddserror(ep, MDE_DS_DUPDRIVE, sp->setno,
950Sstevel@tonic-gate NULL, dn1->cname, sp->setname));
960Sstevel@tonic-gate }
970Sstevel@tonic-gate }
980Sstevel@tonic-gate return (0);
990Sstevel@tonic-gate }
1000Sstevel@tonic-gate
1010Sstevel@tonic-gate static md_drive_desc *
metaget_drivedesc_fromdrivelist(mdsetname_t * sp,mddrivenamelist_t * dnlp,uint_t flags,md_error_t * ep)1020Sstevel@tonic-gate metaget_drivedesc_fromdrivelist(
1030Sstevel@tonic-gate mdsetname_t *sp,
1040Sstevel@tonic-gate mddrivenamelist_t *dnlp,
1050Sstevel@tonic-gate uint_t flags,
1060Sstevel@tonic-gate md_error_t *ep
1070Sstevel@tonic-gate )
1080Sstevel@tonic-gate {
1090Sstevel@tonic-gate mddrivenamelist_t *p;
1100Sstevel@tonic-gate md_drive_desc *dd = NULL;
1110Sstevel@tonic-gate md_set_desc *sd;
1120Sstevel@tonic-gate
1130Sstevel@tonic-gate if ((sd = metaget_setdesc(sp, ep)) == NULL)
1140Sstevel@tonic-gate return (NULL);
1150Sstevel@tonic-gate
1160Sstevel@tonic-gate for (p = dnlp; p != NULL; p = p->next) {
1170Sstevel@tonic-gate (void) metadrivedesc_append(&dd, p->drivenamep, 0, 0,
1180Sstevel@tonic-gate sd->sd_ctime, sd->sd_genid, flags);
1190Sstevel@tonic-gate }
1200Sstevel@tonic-gate
1210Sstevel@tonic-gate return (dd);
1220Sstevel@tonic-gate }
1230Sstevel@tonic-gate
1240Sstevel@tonic-gate /*
1250Sstevel@tonic-gate * Exported Entry Points
1260Sstevel@tonic-gate */
1270Sstevel@tonic-gate
1280Sstevel@tonic-gate int
meta_make_sidenmlist(mdsetname_t * sp,mddrivename_t * dnp,int import_flag,md_im_drive_info_t * midp,md_error_t * ep)1290Sstevel@tonic-gate meta_make_sidenmlist(
1301945Sjeanm mdsetname_t *sp,
1311945Sjeanm mddrivename_t *dnp,
1321945Sjeanm int import_flag, /* flags partial import */
1331945Sjeanm md_im_drive_info_t *midp, /* import drive information */
1341945Sjeanm md_error_t *ep
1350Sstevel@tonic-gate )
1360Sstevel@tonic-gate {
1371945Sjeanm mdsidenames_t *sn, **sn_next;
1381945Sjeanm mdname_t *np;
1391945Sjeanm int done;
1401945Sjeanm side_t sideno = MD_SIDEWILD;
1411945Sjeanm uint_t rep_slice;
1421945Sjeanm char *bname;
1430Sstevel@tonic-gate
1441945Sjeanm if (!import_flag) {
1451945Sjeanm /*
1461945Sjeanm * Normal (aka NOT partial import) code path.
1471945Sjeanm */
1481945Sjeanm if (meta_replicaslice(dnp, &rep_slice, ep) != 0) {
1491945Sjeanm return (-1);
1501945Sjeanm }
1511945Sjeanm
1521945Sjeanm dnp->side_names_key = MD_KEYWILD;
1530Sstevel@tonic-gate
1541945Sjeanm if ((np = metaslicename(dnp, rep_slice, ep)) == NULL)
1551945Sjeanm return (-1);
1561945Sjeanm bname = Strdup(np->bname);
1571945Sjeanm } else {
1581945Sjeanm /*
1591945Sjeanm * When doing a partial import, we'll get the needed
1601945Sjeanm * information from somewhere other than the system.
1611945Sjeanm */
1621945Sjeanm dnp->side_names_key = MD_KEYWILD;
1631945Sjeanm bname = Strdup(midp->mid_devname);
1641945Sjeanm }
1650Sstevel@tonic-gate metaflushsidenames(dnp);
1660Sstevel@tonic-gate sn_next = &dnp->side_names;
1670Sstevel@tonic-gate /*CONSTCOND*/
1680Sstevel@tonic-gate while (1) {
1690Sstevel@tonic-gate sn = Zalloc(sizeof (*sn));
1700Sstevel@tonic-gate
1711945Sjeanm if ((done = meta_getnextside_devinfo(sp, bname, &sideno,
1721945Sjeanm &sn->cname, &sn->dname, &sn->mnum, ep)) == -1) {
1731945Sjeanm if (import_flag) {
1741945Sjeanm mdclrerror(ep);
1751945Sjeanm sn->dname = Strdup(midp->mid_driver_name);
1761945Sjeanm sn->mnum = midp->mid_mnum;
1771945Sjeanm } else {
1781945Sjeanm Free(sn);
1791945Sjeanm Free(bname);
1801945Sjeanm return (-1);
1811945Sjeanm }
1820Sstevel@tonic-gate }
1830Sstevel@tonic-gate
1840Sstevel@tonic-gate if (done == 0) {
1850Sstevel@tonic-gate Free(sn);
1861945Sjeanm Free(bname);
1870Sstevel@tonic-gate return (0);
1880Sstevel@tonic-gate }
1890Sstevel@tonic-gate
1900Sstevel@tonic-gate sn->sideno = sideno;
1910Sstevel@tonic-gate
1920Sstevel@tonic-gate /* Add to the end of the linked list */
1930Sstevel@tonic-gate assert(*sn_next == NULL);
1940Sstevel@tonic-gate *sn_next = sn;
1950Sstevel@tonic-gate sn_next = &sn->next;
1960Sstevel@tonic-gate }
1970Sstevel@tonic-gate /*NOTREACHED*/
1980Sstevel@tonic-gate }
1990Sstevel@tonic-gate
2000Sstevel@tonic-gate int
meta_set_adddrives(mdsetname_t * sp,mddrivenamelist_t * dnlp,daddr_t dbsize,int force_label,md_error_t * ep)2010Sstevel@tonic-gate meta_set_adddrives(
2020Sstevel@tonic-gate mdsetname_t *sp,
2030Sstevel@tonic-gate mddrivenamelist_t *dnlp,
2040Sstevel@tonic-gate daddr_t dbsize,
2050Sstevel@tonic-gate int force_label,
2060Sstevel@tonic-gate md_error_t *ep
2070Sstevel@tonic-gate )
2080Sstevel@tonic-gate {
2090Sstevel@tonic-gate md_set_desc *sd;
2100Sstevel@tonic-gate md_drive_desc *dd = NULL, *curdd = NULL, *ddp;
2110Sstevel@tonic-gate int i;
2120Sstevel@tonic-gate mddrivenamelist_t *p;
2130Sstevel@tonic-gate mhd_mhiargs_t mhiargs;
2140Sstevel@tonic-gate int rval = 0;
2150Sstevel@tonic-gate md_timeval32_t now;
2160Sstevel@tonic-gate sigset_t oldsigs;
2170Sstevel@tonic-gate ulong_t genid;
2180Sstevel@tonic-gate ulong_t max_genid = 0;
2190Sstevel@tonic-gate md_setkey_t *cl_sk;
2200Sstevel@tonic-gate int rb_level = 0;
2210Sstevel@tonic-gate md_error_t xep = mdnullerror;
2220Sstevel@tonic-gate md_mnnode_desc *nd;
2230Sstevel@tonic-gate int suspendall_flag = 0;
2240Sstevel@tonic-gate int suspend1_flag = 0;
2250Sstevel@tonic-gate int lock_flag = 0;
2260Sstevel@tonic-gate int flush_set_onerr = 0;
2272150Sjeanm md_replicalist_t *rlp = NULL, *rl;
2280Sstevel@tonic-gate
2290Sstevel@tonic-gate if ((sd = metaget_setdesc(sp, ep)) == NULL)
2300Sstevel@tonic-gate return (-1);
2310Sstevel@tonic-gate
2320Sstevel@tonic-gate /* Make sure we own the set */
2330Sstevel@tonic-gate if (meta_check_ownership(sp, ep) != 0)
2340Sstevel@tonic-gate return (-1);
2350Sstevel@tonic-gate
2360Sstevel@tonic-gate /*
2370Sstevel@tonic-gate * The drive and node records are stored in the local mddbs of each
2380Sstevel@tonic-gate * node in the diskset. Each node's rpc.metad daemon reads in the set,
2390Sstevel@tonic-gate * drive and node records from that node's local mddb and caches them
2400Sstevel@tonic-gate * internally. Any process needing diskset information contacts its
2410Sstevel@tonic-gate * local rpc.metad to get this information. Since each node in the
2420Sstevel@tonic-gate * diskset is independently reading the set information from its local
2430Sstevel@tonic-gate * mddb, the set, drive and node records in the local mddbs must stay
2440Sstevel@tonic-gate * in-sync, so that all nodes have a consistent view of the diskset.
2450Sstevel@tonic-gate *
2460Sstevel@tonic-gate * For a multinode diskset, explicitly verify that all nodes in the
2470Sstevel@tonic-gate * diskset are ALIVE (i.e. are in the API membership list). Otherwise,
2480Sstevel@tonic-gate * fail this operation since all nodes must be ALIVE in order to add
2490Sstevel@tonic-gate * the new drive record to their local mddb. If a panic of this node
2500Sstevel@tonic-gate * leaves the local mddbs set, node and drive records out-of-sync, the
2510Sstevel@tonic-gate * reconfig cycle will fix the local mddbs and force them back into
2520Sstevel@tonic-gate * synchronization.
2530Sstevel@tonic-gate */
2540Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) {
2550Sstevel@tonic-gate nd = sd->sd_nodelist;
2560Sstevel@tonic-gate while (nd) {
2570Sstevel@tonic-gate if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
2580Sstevel@tonic-gate (void) mddserror(ep, MDE_DS_NOTINMEMBERLIST,
2590Sstevel@tonic-gate sp->setno,
2600Sstevel@tonic-gate nd->nd_nodename, NULL, sp->setname);
2610Sstevel@tonic-gate return (-1);
2620Sstevel@tonic-gate }
2630Sstevel@tonic-gate nd = nd->nd_next;
2640Sstevel@tonic-gate }
2650Sstevel@tonic-gate }
2660Sstevel@tonic-gate
2670Sstevel@tonic-gate if (drvsuniq(sp, dnlp, ep) == -1)
2680Sstevel@tonic-gate return (-1);
2690Sstevel@tonic-gate
2700Sstevel@tonic-gate /*
2710Sstevel@tonic-gate * Lock the set on current set members.
2720Sstevel@tonic-gate * Set locking done much earlier for MN diskset than for traditional
2730Sstevel@tonic-gate * diskset since lock_set and SUSPEND are used to protect against
2740Sstevel@tonic-gate * other meta* commands running on the other nodes.
2750Sstevel@tonic-gate */
2760Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) {
2770Sstevel@tonic-gate /* Make sure we are blocking all signals */
2780Sstevel@tonic-gate if (procsigs(TRUE, &oldsigs, &xep) < 0)
2790Sstevel@tonic-gate mdclrerror(&xep);
2800Sstevel@tonic-gate
2810Sstevel@tonic-gate nd = sd->sd_nodelist;
2820Sstevel@tonic-gate /* All nodes are guaranteed to be ALIVE */
2830Sstevel@tonic-gate while (nd) {
2840Sstevel@tonic-gate if (clnt_lock_set(nd->nd_nodename, sp, ep)) {
2850Sstevel@tonic-gate rval = -1;
2860Sstevel@tonic-gate goto out;
2870Sstevel@tonic-gate }
2880Sstevel@tonic-gate lock_flag = 1;
2890Sstevel@tonic-gate nd = nd->nd_next;
2900Sstevel@tonic-gate }
2910Sstevel@tonic-gate /*
2920Sstevel@tonic-gate * Lock out other meta* commands by suspending
2930Sstevel@tonic-gate * class 1 messages across the diskset.
2940Sstevel@tonic-gate */
2950Sstevel@tonic-gate nd = sd->sd_nodelist;
2960Sstevel@tonic-gate /* All nodes are guaranteed to be ALIVE */
2970Sstevel@tonic-gate while (nd) {
2980Sstevel@tonic-gate if (clnt_mdcommdctl(nd->nd_nodename,
2990Sstevel@tonic-gate COMMDCTL_SUSPEND, sp, MD_MSG_CLASS1,
3000Sstevel@tonic-gate MD_MSCF_NO_FLAGS, ep)) {
3010Sstevel@tonic-gate rval = -1;
3020Sstevel@tonic-gate goto out;
3030Sstevel@tonic-gate }
3040Sstevel@tonic-gate suspend1_flag = 1;
3050Sstevel@tonic-gate nd = nd->nd_next;
3060Sstevel@tonic-gate }
3070Sstevel@tonic-gate }
3080Sstevel@tonic-gate
3090Sstevel@tonic-gate if (check_setnodes_againstdrivelist(sp, dnlp, ep)) {
3100Sstevel@tonic-gate rval = -1;
3110Sstevel@tonic-gate goto out;
3120Sstevel@tonic-gate }
3130Sstevel@tonic-gate
3140Sstevel@tonic-gate for (p = dnlp; p != NULL; p = p->next) {
3150Sstevel@tonic-gate mdsetname_t *tmp;
3160Sstevel@tonic-gate
3170Sstevel@tonic-gate if (meta_is_drive_in_anyset(p->drivenamep, &tmp, FALSE,
3180Sstevel@tonic-gate ep) == -1) {
3190Sstevel@tonic-gate rval = -1;
3200Sstevel@tonic-gate goto out;
3210Sstevel@tonic-gate }
3220Sstevel@tonic-gate
3230Sstevel@tonic-gate if (tmp != NULL) {
3240Sstevel@tonic-gate (void) mddserror(ep, MDE_DS_DRIVEINSET, sp->setno,
3250Sstevel@tonic-gate tmp->setname, p->drivenamep->cname, sp->setname);
3260Sstevel@tonic-gate rval = -1;
3270Sstevel@tonic-gate goto out;
3280Sstevel@tonic-gate }
3290Sstevel@tonic-gate }
3300Sstevel@tonic-gate
3310Sstevel@tonic-gate /* END CHECK CODE */
3320Sstevel@tonic-gate
3330Sstevel@tonic-gate /*
3340Sstevel@tonic-gate * This is a separate loop (from above) so that we validate all the
3350Sstevel@tonic-gate * drives handed to us before we repartition any one drive.
3360Sstevel@tonic-gate */
3370Sstevel@tonic-gate for (p = dnlp; p != NULL; p = p->next) {
3380Sstevel@tonic-gate if (meta_repartition_drive(sp,
3391945Sjeanm p->drivenamep, force_label == TRUE ? MD_REPART_FORCE : 0,
3400Sstevel@tonic-gate NULL, /* Don't return the VTOC. */
3410Sstevel@tonic-gate ep) != 0) {
3420Sstevel@tonic-gate rval = -1;
3430Sstevel@tonic-gate goto out;
3440Sstevel@tonic-gate }
3450Sstevel@tonic-gate /*
3460Sstevel@tonic-gate * Create the names for the drives we are adding per side.
3470Sstevel@tonic-gate */
3481945Sjeanm if (meta_make_sidenmlist(sp, p->drivenamep, 0, NULL,
3491945Sjeanm ep) == -1) {
3500Sstevel@tonic-gate rval = -1;
3510Sstevel@tonic-gate goto out;
3520Sstevel@tonic-gate }
3530Sstevel@tonic-gate }
3540Sstevel@tonic-gate
3550Sstevel@tonic-gate /*
3560Sstevel@tonic-gate * Get the list of drives descriptors that we are adding.
3570Sstevel@tonic-gate */
3580Sstevel@tonic-gate dd = metaget_drivedesc_fromdrivelist(sp, dnlp, MD_DR_ADD, ep);
3590Sstevel@tonic-gate
3600Sstevel@tonic-gate if (! mdisok(ep)) {
3610Sstevel@tonic-gate rval = -1;
3620Sstevel@tonic-gate goto out;
3630Sstevel@tonic-gate }
3640Sstevel@tonic-gate
3650Sstevel@tonic-gate /*
3660Sstevel@tonic-gate * Get the set timeout information.
3670Sstevel@tonic-gate */
3680Sstevel@tonic-gate (void) memset(&mhiargs, '\0', sizeof (mhiargs));
3690Sstevel@tonic-gate if (clnt_gtimeout(mynode(), sp, &mhiargs, ep) == -1) {
3700Sstevel@tonic-gate rval = -1;
3710Sstevel@tonic-gate goto out;
3720Sstevel@tonic-gate }
3730Sstevel@tonic-gate
3740Sstevel@tonic-gate /*
3750Sstevel@tonic-gate * Get timestamp and generation id for new records
3760Sstevel@tonic-gate */
3770Sstevel@tonic-gate now = sd->sd_ctime;
3780Sstevel@tonic-gate genid = sd->sd_genid;
3790Sstevel@tonic-gate
3800Sstevel@tonic-gate
3810Sstevel@tonic-gate /* At this point, in case of error, set should be flushed. */
3820Sstevel@tonic-gate flush_set_onerr = 1;
3830Sstevel@tonic-gate
3840Sstevel@tonic-gate /* Lock the set on current set members */
3850Sstevel@tonic-gate if (!(MD_MNSET_DESC(sd))) {
3860Sstevel@tonic-gate md_rb_sig_handling_on();
3870Sstevel@tonic-gate for (i = 0; i < MD_MAXSIDES; i++) {
3880Sstevel@tonic-gate /* Skip empty slots */
3890Sstevel@tonic-gate if (sd->sd_nodes[i][0] == '\0')
3900Sstevel@tonic-gate continue;
3910Sstevel@tonic-gate
3920Sstevel@tonic-gate if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) {
3930Sstevel@tonic-gate rval = -1;
3940Sstevel@tonic-gate goto out;
3950Sstevel@tonic-gate }
3960Sstevel@tonic-gate lock_flag = 1;
3970Sstevel@tonic-gate }
3980Sstevel@tonic-gate }
3990Sstevel@tonic-gate
4000Sstevel@tonic-gate /*
4010Sstevel@tonic-gate * Get drive descriptors for the drives that are currently in the set.
4020Sstevel@tonic-gate */
4030Sstevel@tonic-gate curdd = metaget_drivedesc(sp, MD_FULLNAME_ONLY, ep);
4040Sstevel@tonic-gate if (! mdisok(ep))
4050Sstevel@tonic-gate goto rollback;
4060Sstevel@tonic-gate
4070Sstevel@tonic-gate /*
4080Sstevel@tonic-gate * If first drive being added to set, set the mastership
4090Sstevel@tonic-gate * of the multinode diskset to be this node.
4100Sstevel@tonic-gate * Only set it on this node. If all goes well
4110Sstevel@tonic-gate * and there are no errors, the mastership of this node will be set
4120Sstevel@tonic-gate * on all nodes in user space and in the kernel.
4130Sstevel@tonic-gate */
4140Sstevel@tonic-gate if ((MD_MNSET_DESC(sd)) && (curdd == NULL)) {
4150Sstevel@tonic-gate if (clnt_mnsetmaster(mynode(), sp,
4160Sstevel@tonic-gate sd->sd_mn_mynode->nd_nodename,
4170Sstevel@tonic-gate sd->sd_mn_mynode->nd_nodeid, ep)) {
4180Sstevel@tonic-gate goto rollback;
4190Sstevel@tonic-gate }
4200Sstevel@tonic-gate /*
4210Sstevel@tonic-gate * Set this up in my local cache of the set desc so that
4220Sstevel@tonic-gate * the set descriptor won't have to be gotten again from
4230Sstevel@tonic-gate * rpc.metad. If it is flushed and gotten again, these
4240Sstevel@tonic-gate * values will be set in sr2setdesc.
4250Sstevel@tonic-gate */
4260Sstevel@tonic-gate sd->sd_mn_master_nodeid = sd->sd_mn_mynode->nd_nodeid;
4270Sstevel@tonic-gate (void) strcpy(sd->sd_mn_master_nodenm,
4280Sstevel@tonic-gate sd->sd_mn_mynode->nd_nodename);
4290Sstevel@tonic-gate sd->sd_mn_am_i_master = 1;
4300Sstevel@tonic-gate }
4310Sstevel@tonic-gate
4320Sstevel@tonic-gate RB_TEST(1, "adddrives", ep)
4330Sstevel@tonic-gate
4340Sstevel@tonic-gate RB_PREEMPT;
4350Sstevel@tonic-gate rb_level = 1; /* level 1 */
4360Sstevel@tonic-gate
4370Sstevel@tonic-gate RB_TEST(2, "adddrives", ep)
4380Sstevel@tonic-gate
4390Sstevel@tonic-gate /*
4400Sstevel@tonic-gate * Add the drive records for the drives that we are adding to
4410Sstevel@tonic-gate * each host in the set. Marks the drive as MD_DR_ADD.
4420Sstevel@tonic-gate */
4430Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) {
4440Sstevel@tonic-gate nd = sd->sd_nodelist;
4450Sstevel@tonic-gate /* All nodes are guaranteed to be ALIVE */
4460Sstevel@tonic-gate while (nd) {
4470Sstevel@tonic-gate if (clnt_adddrvs(nd->nd_nodename, sp, dd, now, genid,
4480Sstevel@tonic-gate ep) == -1)
4490Sstevel@tonic-gate goto rollback;
4500Sstevel@tonic-gate
4510Sstevel@tonic-gate RB_TEST(3, "adddrives", ep)
4520Sstevel@tonic-gate nd = nd->nd_next;
4530Sstevel@tonic-gate }
4540Sstevel@tonic-gate } else {
4550Sstevel@tonic-gate for (i = 0; i < MD_MAXSIDES; i++) {
4560Sstevel@tonic-gate /* Skip empty slots */
4570Sstevel@tonic-gate if (sd->sd_nodes[i][0] == '\0')
4580Sstevel@tonic-gate continue;
4590Sstevel@tonic-gate
4600Sstevel@tonic-gate if (clnt_adddrvs(sd->sd_nodes[i], sp, dd, now, genid,
4610Sstevel@tonic-gate ep) == -1)
4620Sstevel@tonic-gate goto rollback;
4630Sstevel@tonic-gate
4640Sstevel@tonic-gate RB_TEST(3, "adddrives", ep)
4650Sstevel@tonic-gate }
4660Sstevel@tonic-gate }
4670Sstevel@tonic-gate
4680Sstevel@tonic-gate RB_TEST(4, "adddrives", ep)
4690Sstevel@tonic-gate
4700Sstevel@tonic-gate RB_PREEMPT;
4710Sstevel@tonic-gate rb_level = 2; /* level 2 */
4720Sstevel@tonic-gate
4730Sstevel@tonic-gate RB_TEST(5, "adddrives", ep)
4740Sstevel@tonic-gate
4750Sstevel@tonic-gate /*
4760Sstevel@tonic-gate * Take ownership of the added drives.
4770Sstevel@tonic-gate */
4780Sstevel@tonic-gate if (!(MD_MNSET_DESC(sd)) && !MD_ATSET_DESC(sd)) {
4790Sstevel@tonic-gate if (tk_own_bydd(sp, dd, &mhiargs, TRUE, ep))
4800Sstevel@tonic-gate goto rollback;
4810Sstevel@tonic-gate }
4820Sstevel@tonic-gate
483*3073Sjkennedy /*
484*3073Sjkennedy * If this is not a MN set and the state flags do not indicate the
485*3073Sjkennedy * presence of devids, update the set records on all nodes.
486*3073Sjkennedy */
487*3073Sjkennedy if (!(sd->sd_flags & MD_SR_MB_DEVID) && !(MD_MNSET_DESC(sd))) {
488*3073Sjkennedy if (meta_update_mb(sp, dd, ep) == 0) {
489*3073Sjkennedy mdclrerror(ep);
490*3073Sjkennedy
491*3073Sjkennedy /* update the sr_flags on all hosts */
492*3073Sjkennedy for (i = 0; i < MD_MAXSIDES; i++) {
493*3073Sjkennedy if (sd->sd_nodes[i][0] == '\0')
494*3073Sjkennedy continue;
495*3073Sjkennedy
496*3073Sjkennedy if (clnt_upd_sr_flags(sd->sd_nodes[i],
497*3073Sjkennedy sp, (sd->sd_flags | MD_SR_MB_DEVID), ep))
498*3073Sjkennedy goto rollback;
499*3073Sjkennedy }
500*3073Sjkennedy }
501*3073Sjkennedy }
502*3073Sjkennedy
5030Sstevel@tonic-gate RB_TEST(6, "adddrives", ep)
5040Sstevel@tonic-gate
5050Sstevel@tonic-gate RB_PREEMPT;
5060Sstevel@tonic-gate rb_level = 3; /* level 3 */
5070Sstevel@tonic-gate
5080Sstevel@tonic-gate RB_TEST(7, "adddrives", ep)
5090Sstevel@tonic-gate
5100Sstevel@tonic-gate /*
5110Sstevel@tonic-gate * Balance the DB's according to the list of existing drives and the
5120Sstevel@tonic-gate * list of added drives.
5130Sstevel@tonic-gate */
5140Sstevel@tonic-gate if ((rval = meta_db_balance(sp, dd, curdd, dbsize, ep)) == -1)
5150Sstevel@tonic-gate goto rollback;
5160Sstevel@tonic-gate
5172150Sjeanm /*
5182150Sjeanm * Slam a dummy master block on all the disks that we are adding
5192150Sjeanm * that don't have replicas on them.
5202150Sjeanm * Used by diskset import if the disksets are remotely replicated
5212150Sjeanm */
5222150Sjeanm if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) >= 0) {
5232150Sjeanm for (ddp = dd; ddp != NULL; ddp = ddp->dd_next) {
5242150Sjeanm uint_t rep_slice;
5252150Sjeanm int fd = -1;
5262150Sjeanm mdname_t *np = NULL;
5272150Sjeanm char *drive_name;
5282150Sjeanm
5292150Sjeanm drive_name = ddp->dd_dnp->cname;
5302150Sjeanm
5312150Sjeanm for (rl = rlp; rl != NULL; rl = rl->rl_next) {
5322150Sjeanm char *rep_name;
5332150Sjeanm
5342150Sjeanm rep_name =
5352150Sjeanm rl->rl_repp->r_namep->drivenamep->cname;
5362150Sjeanm
5372150Sjeanm if (strcmp(drive_name, rep_name) == 0) {
5382150Sjeanm /*
5392150Sjeanm * Disk has a replica on it so don't
5402150Sjeanm * add dummy master block.
5412150Sjeanm */
5422150Sjeanm break;
5432150Sjeanm }
5442150Sjeanm }
5452150Sjeanm if (rl == NULL) {
5462150Sjeanm /*
5472150Sjeanm * Drive doesn't have a replica on it so
5482150Sjeanm * we need a dummy master block. Add it.
5492150Sjeanm */
5502150Sjeanm if (meta_replicaslice(ddp->dd_dnp, &rep_slice,
5512150Sjeanm &xep) != 0) {
5522150Sjeanm mdclrerror(&xep);
5532150Sjeanm continue;
5542150Sjeanm }
5552150Sjeanm
5562150Sjeanm if ((np = metaslicename(ddp->dd_dnp, rep_slice,
5572150Sjeanm &xep)) == NULL) {
5582150Sjeanm mdclrerror(&xep);
5592150Sjeanm continue;
5602150Sjeanm }
5612150Sjeanm
5622150Sjeanm if ((fd = open(np->rname, O_RDWR)) >= 0) {
5632150Sjeanm meta_mkdummymaster(sp, fd, 16);
5642150Sjeanm (void) close(fd);
5652150Sjeanm }
5662150Sjeanm }
5672150Sjeanm }
5682150Sjeanm }
5692150Sjeanm
5700Sstevel@tonic-gate if ((curdd == NULL) && (MD_MNSET_DESC(sd))) {
5710Sstevel@tonic-gate /*
5720Sstevel@tonic-gate * Notify rpc.mdcommd on all nodes of a nodelist change.
5730Sstevel@tonic-gate * Start by suspending rpc.mdcommd (which drains it of all
5740Sstevel@tonic-gate * messages), then change the nodelist followed by a reinit
5750Sstevel@tonic-gate * and resume.
5760Sstevel@tonic-gate */
5770Sstevel@tonic-gate nd = sd->sd_nodelist;
5780Sstevel@tonic-gate /* All nodes are guaranteed to be ALIVE */
5790Sstevel@tonic-gate while (nd) {
5800Sstevel@tonic-gate if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_SUSPEND,
5810Sstevel@tonic-gate sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, ep)) {
5820Sstevel@tonic-gate rval = -1;
5830Sstevel@tonic-gate goto out;
5840Sstevel@tonic-gate }
5850Sstevel@tonic-gate suspendall_flag = 1;
5860Sstevel@tonic-gate nd = nd->nd_next;
5870Sstevel@tonic-gate }
5880Sstevel@tonic-gate }
5890Sstevel@tonic-gate
5900Sstevel@tonic-gate /*
5910Sstevel@tonic-gate * If a MN diskset and this is the first disk(s) being added
5920Sstevel@tonic-gate * to set, then pre-allocate change log records here.
5930Sstevel@tonic-gate * When the other nodes are joined into the MN diskset, the
5940Sstevel@tonic-gate * USER records will just be snarfed in.
5950Sstevel@tonic-gate */
5960Sstevel@tonic-gate if ((MD_MNSET_DESC(sd)) && (curdd == NULL)) {
5970Sstevel@tonic-gate if (mdmn_allocate_changelog(sp, ep) != 0)
5980Sstevel@tonic-gate goto rollback;
5990Sstevel@tonic-gate }
6000Sstevel@tonic-gate
6010Sstevel@tonic-gate /*
6020Sstevel@tonic-gate * Mark the drives MD_DR_OK.
6030Sstevel@tonic-gate * If first drive being added to MN diskset, then set
6040Sstevel@tonic-gate * master on all nodes to be this node and then join
6050Sstevel@tonic-gate * all alive nodes (nodes in membership list) to set.
6060Sstevel@tonic-gate */
6070Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) {
6080Sstevel@tonic-gate nd = sd->sd_nodelist;
6090Sstevel@tonic-gate /* All nodes are guaranteed to be ALIVE */
6100Sstevel@tonic-gate while (nd) {
6110Sstevel@tonic-gate /* don't set master on this node - done earlier */
6120Sstevel@tonic-gate if ((curdd == NULL) && (nd->nd_nodeid !=
6130Sstevel@tonic-gate sd->sd_mn_mynode->nd_nodeid)) {
6140Sstevel@tonic-gate /*
6150Sstevel@tonic-gate * Set master on all alive nodes since
6160Sstevel@tonic-gate * all alive nodes will become joined nodes.
6170Sstevel@tonic-gate */
6180Sstevel@tonic-gate if (clnt_mnsetmaster(nd->nd_nodename, sp,
6190Sstevel@tonic-gate sd->sd_mn_mynode->nd_nodename,
6200Sstevel@tonic-gate sd->sd_mn_mynode->nd_nodeid, ep)) {
6210Sstevel@tonic-gate goto rollback;
6220Sstevel@tonic-gate }
6230Sstevel@tonic-gate }
6240Sstevel@tonic-gate
6250Sstevel@tonic-gate if (curdd == NULL) {
6260Sstevel@tonic-gate /*
6270Sstevel@tonic-gate * No special flags for join set. Since
6280Sstevel@tonic-gate * all nodes are joining if 1st drive is being
6290Sstevel@tonic-gate * added to set then all nodes will be either
6300Sstevel@tonic-gate * STALE or non-STALE and each node can
6310Sstevel@tonic-gate * determine this on its own.
6320Sstevel@tonic-gate */
6330Sstevel@tonic-gate if (clnt_joinset(nd->nd_nodename, sp,
6340Sstevel@tonic-gate NULL, ep)) {
6350Sstevel@tonic-gate goto rollback;
6360Sstevel@tonic-gate }
6370Sstevel@tonic-gate /* Sets join node flag on all nodes in list */
6380Sstevel@tonic-gate if (clnt_upd_nr_flags(nd->nd_nodename, sp,
6390Sstevel@tonic-gate sd->sd_nodelist, MD_NR_JOIN, NULL, ep)) {
6400Sstevel@tonic-gate goto rollback;
6410Sstevel@tonic-gate }
6420Sstevel@tonic-gate }
6430Sstevel@tonic-gate
6440Sstevel@tonic-gate /*
6450Sstevel@tonic-gate * Set MD_DR_OK as last thing before unlock.
6460Sstevel@tonic-gate * In case of panic on this node, recovery
6470Sstevel@tonic-gate * code can check for MD_DR_OK to determine
6480Sstevel@tonic-gate * status of diskset.
6490Sstevel@tonic-gate */
6500Sstevel@tonic-gate if (clnt_upd_dr_flags(nd->nd_nodename, sp, dd,
6510Sstevel@tonic-gate MD_DR_OK, ep) == -1)
6520Sstevel@tonic-gate goto rollback;
6530Sstevel@tonic-gate
6540Sstevel@tonic-gate
6550Sstevel@tonic-gate RB_TEST(8, "adddrives", ep)
6560Sstevel@tonic-gate nd = nd->nd_next;
6570Sstevel@tonic-gate }
6580Sstevel@tonic-gate } else {
6590Sstevel@tonic-gate for (i = 0; i < MD_MAXSIDES; i++) {
6600Sstevel@tonic-gate /* Skip empty slots */
6610Sstevel@tonic-gate if (sd->sd_nodes[i][0] == '\0')
6620Sstevel@tonic-gate continue;
6630Sstevel@tonic-gate
6640Sstevel@tonic-gate if (clnt_upd_dr_flags(sd->sd_nodes[i], sp, dd, MD_DR_OK,
6650Sstevel@tonic-gate ep) == -1)
6660Sstevel@tonic-gate goto rollback;
6670Sstevel@tonic-gate
6680Sstevel@tonic-gate RB_TEST(8, "adddrives", ep)
6690Sstevel@tonic-gate }
6700Sstevel@tonic-gate }
6710Sstevel@tonic-gate
6720Sstevel@tonic-gate RB_TEST(9, "adddrives", ep)
6730Sstevel@tonic-gate
6740Sstevel@tonic-gate out:
6750Sstevel@tonic-gate /*
6760Sstevel@tonic-gate * Notify rpc.mdcommd on all nodes of a nodelist change.
6770Sstevel@tonic-gate * Send reinit command to mdcommd which forces it to get
6780Sstevel@tonic-gate * fresh set description.
6790Sstevel@tonic-gate */
6800Sstevel@tonic-gate if (suspendall_flag) {
6810Sstevel@tonic-gate /* Send reinit */
6820Sstevel@tonic-gate nd = sd->sd_nodelist;
6830Sstevel@tonic-gate /* All nodes are guaranteed to be ALIVE */
6840Sstevel@tonic-gate while (nd) {
6850Sstevel@tonic-gate /* Class is ignored for REINIT */
6860Sstevel@tonic-gate if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_REINIT,
6870Sstevel@tonic-gate sp, NULL, MD_MSCF_NO_FLAGS, &xep)) {
6880Sstevel@tonic-gate if (rval == 0)
6890Sstevel@tonic-gate (void) mdstealerror(ep, &xep);
6900Sstevel@tonic-gate rval = -1;
6910Sstevel@tonic-gate mde_perror(ep, dgettext(TEXT_DOMAIN,
6920Sstevel@tonic-gate "Unable to reinit rpc.mdcommd.\n"));
6930Sstevel@tonic-gate }
6940Sstevel@tonic-gate nd = nd->nd_next;
6950Sstevel@tonic-gate }
6960Sstevel@tonic-gate }
6970Sstevel@tonic-gate /*
6980Sstevel@tonic-gate * Unlock diskset by resuming messages across the diskset.
6990Sstevel@tonic-gate * Just resume all classes so that resume is the same whether
7000Sstevel@tonic-gate * just one class was locked or all classes were locked.
7010Sstevel@tonic-gate */
7020Sstevel@tonic-gate if ((suspend1_flag) || (suspendall_flag)) {
7030Sstevel@tonic-gate nd = sd->sd_nodelist;
7040Sstevel@tonic-gate /* All nodes are guaranteed to be ALIVE */
7050Sstevel@tonic-gate while (nd) {
7060Sstevel@tonic-gate if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
7070Sstevel@tonic-gate sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) {
7080Sstevel@tonic-gate if (rval == 0)
7090Sstevel@tonic-gate (void) mdstealerror(ep, &xep);
7100Sstevel@tonic-gate rval = -1;
7110Sstevel@tonic-gate mde_perror(ep, dgettext(TEXT_DOMAIN,
7120Sstevel@tonic-gate "Unable to resume rpc.mdcommd.\n"));
7130Sstevel@tonic-gate }
7140Sstevel@tonic-gate nd = nd->nd_next;
7150Sstevel@tonic-gate }
7160Sstevel@tonic-gate meta_ping_mnset(sp->setno);
7170Sstevel@tonic-gate }
7180Sstevel@tonic-gate
7190Sstevel@tonic-gate if (lock_flag) {
7200Sstevel@tonic-gate cl_sk = cl_get_setkey(sp->setno, sp->setname);
7210Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) {
7220Sstevel@tonic-gate nd = sd->sd_nodelist;
7230Sstevel@tonic-gate /* All nodes are guaranteed to be ALIVE */
7240Sstevel@tonic-gate while (nd) {
7250Sstevel@tonic-gate if (clnt_unlock_set(nd->nd_nodename,
7260Sstevel@tonic-gate cl_sk, &xep)) {
7270Sstevel@tonic-gate if (rval == 0)
7280Sstevel@tonic-gate (void) mdstealerror(ep, &xep);
7290Sstevel@tonic-gate rval = -1;
7300Sstevel@tonic-gate }
7310Sstevel@tonic-gate nd = nd->nd_next;
7320Sstevel@tonic-gate }
7330Sstevel@tonic-gate } else {
7340Sstevel@tonic-gate for (i = 0; i < MD_MAXSIDES; i++) {
7350Sstevel@tonic-gate /* Skip empty slots */
7360Sstevel@tonic-gate if (sd->sd_nodes[i][0] == '\0')
7370Sstevel@tonic-gate continue;
7380Sstevel@tonic-gate
7390Sstevel@tonic-gate if (clnt_unlock_set(sd->sd_nodes[i],
7400Sstevel@tonic-gate cl_sk, &xep)) {
7410Sstevel@tonic-gate if (rval == 0)
7420Sstevel@tonic-gate (void) mdstealerror(ep, &xep);
7430Sstevel@tonic-gate rval = -1;
7440Sstevel@tonic-gate }
7450Sstevel@tonic-gate }
7460Sstevel@tonic-gate }
7470Sstevel@tonic-gate cl_set_setkey(NULL);
7480Sstevel@tonic-gate }
7490Sstevel@tonic-gate
7500Sstevel@tonic-gate metafreedrivedesc(&dd);
7510Sstevel@tonic-gate
7520Sstevel@tonic-gate if (flush_set_onerr) {
7530Sstevel@tonic-gate metaflushsetname(sp);
7540Sstevel@tonic-gate if (!(MD_MNSET_DESC(sd))) {
7550Sstevel@tonic-gate md_rb_sig_handling_off(md_got_sig(), md_which_sig());
7560Sstevel@tonic-gate }
7570Sstevel@tonic-gate }
7580Sstevel@tonic-gate
7590Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) {
7600Sstevel@tonic-gate /* release signals back to what they were on entry */
7610Sstevel@tonic-gate if (procsigs(FALSE, &oldsigs, &xep) < 0)
7620Sstevel@tonic-gate mdclrerror(&xep);
7630Sstevel@tonic-gate }
7640Sstevel@tonic-gate
7650Sstevel@tonic-gate return (rval);
7660Sstevel@tonic-gate
7670Sstevel@tonic-gate rollback:
7680Sstevel@tonic-gate /* all signals already blocked for MN disket */
7690Sstevel@tonic-gate if (!(MD_MNSET_DESC(sd))) {
7700Sstevel@tonic-gate /* Make sure we are blocking all signals */
7710Sstevel@tonic-gate if (procsigs(TRUE, &oldsigs, &xep) < 0)
7720Sstevel@tonic-gate mdclrerror(&xep);
7730Sstevel@tonic-gate }
7740Sstevel@tonic-gate
7750Sstevel@tonic-gate rval = -1;
7760Sstevel@tonic-gate
7770Sstevel@tonic-gate max_genid = sd->sd_genid;
7780Sstevel@tonic-gate
7790Sstevel@tonic-gate /* level 3 */
7800Sstevel@tonic-gate if (rb_level > 2) {
7810Sstevel@tonic-gate /*
7820Sstevel@tonic-gate * Since the add drive operation is failing, need
7830Sstevel@tonic-gate * to reset config back to the way it was
7840Sstevel@tonic-gate * before the add drive opration.
7850Sstevel@tonic-gate * If a MN diskset and this is the first drive being added,
7860Sstevel@tonic-gate * then reset master on all ALIVE nodes (which is all nodes)
7870Sstevel@tonic-gate * since the master would have not been set previously.
7880Sstevel@tonic-gate * Don't reset master on this node, since this
7890Sstevel@tonic-gate * is done later.
7900Sstevel@tonic-gate * This is ok to fail since next node to add first
7910Sstevel@tonic-gate * disk to diskset will also set the master on all nodes.
7920Sstevel@tonic-gate *
7930Sstevel@tonic-gate * Also, if this is the first drive being added,
7940Sstevel@tonic-gate * need to have each node withdraw itself from the set.
7950Sstevel@tonic-gate */
7960Sstevel@tonic-gate if ((MD_MNSET_DESC(sd)) && (curdd == NULL)) {
7970Sstevel@tonic-gate nd = sd->sd_nodelist;
7980Sstevel@tonic-gate /* All nodes are guaranteed to be ALIVE */
7990Sstevel@tonic-gate while (nd) {
8000Sstevel@tonic-gate /*
8010Sstevel@tonic-gate * Be careful with ordering in case of
8020Sstevel@tonic-gate * panic between the steps and the
8030Sstevel@tonic-gate * effect on recovery during reconfig.
8040Sstevel@tonic-gate */
8050Sstevel@tonic-gate if (clnt_withdrawset(nd->nd_nodename, sp, &xep))
8060Sstevel@tonic-gate mdclrerror(&xep);
8070Sstevel@tonic-gate
8080Sstevel@tonic-gate /* Sets withdraw flag on all nodes in list */
8090Sstevel@tonic-gate if (clnt_upd_nr_flags(nd->nd_nodename, sp,
8100Sstevel@tonic-gate sd->sd_nodelist, MD_NR_WITHDRAW,
8110Sstevel@tonic-gate NULL, &xep)) {
8120Sstevel@tonic-gate mdclrerror(&xep);
8130Sstevel@tonic-gate }
8140Sstevel@tonic-gate
8150Sstevel@tonic-gate /* Skip this node */
8160Sstevel@tonic-gate if (nd->nd_nodeid ==
8170Sstevel@tonic-gate sd->sd_mn_mynode->nd_nodeid) {
8180Sstevel@tonic-gate nd = nd->nd_next;
8190Sstevel@tonic-gate continue;
8200Sstevel@tonic-gate }
8210Sstevel@tonic-gate /* Reset master on all of the other nodes. */
8220Sstevel@tonic-gate if (clnt_mnsetmaster(nd->nd_nodename, sp,
8230Sstevel@tonic-gate "", MD_MN_INVALID_NID, &xep))
8240Sstevel@tonic-gate mdclrerror(&xep);
8250Sstevel@tonic-gate nd = nd->nd_next;
8260Sstevel@tonic-gate }
8270Sstevel@tonic-gate }
8280Sstevel@tonic-gate }
8290Sstevel@tonic-gate
8300Sstevel@tonic-gate /*
8310Sstevel@tonic-gate * Send resume command to mdcommd. Don't send reinit command
8320Sstevel@tonic-gate * since nodelist should not have changed.
8330Sstevel@tonic-gate * If suspendall_flag is set, then user would have been adding
8340Sstevel@tonic-gate * first drives to set. Since this failed, there is certainly
8350Sstevel@tonic-gate * no reinit message to send to rpc.commd since no nodes will
8360Sstevel@tonic-gate * be joined to set at the end of this metaset command.
8370Sstevel@tonic-gate */
8380Sstevel@tonic-gate if (suspendall_flag) {
8390Sstevel@tonic-gate /* Send resume */
8400Sstevel@tonic-gate nd = sd->sd_nodelist;
8410Sstevel@tonic-gate /* All nodes are guaranteed to be ALIVE */
8420Sstevel@tonic-gate while (nd) {
8430Sstevel@tonic-gate /*
8440Sstevel@tonic-gate * Resume all classes but class 1 so that lock is held
8450Sstevel@tonic-gate * against meta* commands.
8460Sstevel@tonic-gate * To later resume class1, must issue a class0 resume.
8470Sstevel@tonic-gate */
8480Sstevel@tonic-gate if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
8490Sstevel@tonic-gate sp, MD_MSG_CLASS0,
8500Sstevel@tonic-gate MD_MSCF_DONT_RESUME_CLASS1, &xep)) {
8510Sstevel@tonic-gate mde_perror(&xep, dgettext(TEXT_DOMAIN,
8520Sstevel@tonic-gate "Unable to resume rpc.mdcommd.\n"));
8530Sstevel@tonic-gate mdclrerror(&xep);
8540Sstevel@tonic-gate }
8550Sstevel@tonic-gate nd = nd->nd_next;
8560Sstevel@tonic-gate }
8570Sstevel@tonic-gate meta_ping_mnset(sp->setno);
8580Sstevel@tonic-gate }
8590Sstevel@tonic-gate
8600Sstevel@tonic-gate /* level 3 */
8610Sstevel@tonic-gate if (rb_level > 2) {
8620Sstevel@tonic-gate mdnamelist_t *nlp;
8630Sstevel@tonic-gate mdname_t *np;
8640Sstevel@tonic-gate
8650Sstevel@tonic-gate for (ddp = dd; ddp != NULL; ddp = ddp->dd_next) {
8660Sstevel@tonic-gate uint_t rep_slice;
8670Sstevel@tonic-gate
8680Sstevel@tonic-gate if ((meta_replicaslice(ddp->dd_dnp,
8690Sstevel@tonic-gate &rep_slice, &xep) != 0) ||
8700Sstevel@tonic-gate ((np = metaslicename(ddp->dd_dnp, rep_slice,
8710Sstevel@tonic-gate &xep)) == NULL)) {
8720Sstevel@tonic-gate mdclrerror(&xep);
8730Sstevel@tonic-gate continue;
8740Sstevel@tonic-gate }
8750Sstevel@tonic-gate nlp = NULL;
8760Sstevel@tonic-gate (void) metanamelist_append(&nlp, np);
8770Sstevel@tonic-gate
8780Sstevel@tonic-gate if (meta_db_detach(sp, nlp,
8790Sstevel@tonic-gate (MDFORCE_DS | MDFORCE_SET_LOCKED), NULL, &xep))
8800Sstevel@tonic-gate mdclrerror(&xep);
8810Sstevel@tonic-gate
8820Sstevel@tonic-gate metafreenamelist(nlp);
8830Sstevel@tonic-gate }
8840Sstevel@tonic-gate
8850Sstevel@tonic-gate /* Re-balance */
8860Sstevel@tonic-gate if (meta_db_balance(sp, NULL, curdd, 0, &xep) == -1)
8870Sstevel@tonic-gate mdclrerror(&xep);
8880Sstevel@tonic-gate
8890Sstevel@tonic-gate /* Only if we are adding the first drive */
8900Sstevel@tonic-gate /* Handled MN diskset above. */
8910Sstevel@tonic-gate if ((curdd == NULL) && !(MD_MNSET_DESC(sd))) {
8920Sstevel@tonic-gate if (clnt_stimeout(mynode(), sp, &defmhiargs,
8930Sstevel@tonic-gate &xep) == -1)
8940Sstevel@tonic-gate mdclrerror(&xep);
8950Sstevel@tonic-gate
8960Sstevel@tonic-gate /* This is needed because of a corner case */
8970Sstevel@tonic-gate if (halt_set(sp, &xep))
8980Sstevel@tonic-gate mdclrerror(&xep);
8990Sstevel@tonic-gate }
9000Sstevel@tonic-gate max_genid++;
9010Sstevel@tonic-gate }
9020Sstevel@tonic-gate
9030Sstevel@tonic-gate /* level 2 */
9040Sstevel@tonic-gate if (rb_level > 1) {
9050Sstevel@tonic-gate if (!(MD_MNSET_DESC(sd)) && !MD_ATSET_DESC(sd)) {
9060Sstevel@tonic-gate if (rel_own_bydd(sp, dd, TRUE, &xep))
9070Sstevel@tonic-gate mdclrerror(&xep);
9080Sstevel@tonic-gate }
9090Sstevel@tonic-gate }
9100Sstevel@tonic-gate
9110Sstevel@tonic-gate /* level 1 */
9120Sstevel@tonic-gate if (rb_level > 0) {
9130Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) {
9140Sstevel@tonic-gate nd = sd->sd_nodelist;
9150Sstevel@tonic-gate /* All nodes are guaranteed to be ALIVE */
9160Sstevel@tonic-gate while (nd) {
9170Sstevel@tonic-gate if (clnt_deldrvs(nd->nd_nodename, sp, dd,
9180Sstevel@tonic-gate &xep) == -1)
9190Sstevel@tonic-gate mdclrerror(&xep);
9200Sstevel@tonic-gate nd = nd->nd_next;
9210Sstevel@tonic-gate }
9220Sstevel@tonic-gate } else {
9230Sstevel@tonic-gate for (i = 0; i < MD_MAXSIDES; i++) {
9240Sstevel@tonic-gate /* Skip empty slots */
9250Sstevel@tonic-gate if (sd->sd_nodes[i][0] == '\0')
9260Sstevel@tonic-gate continue;
9270Sstevel@tonic-gate
9280Sstevel@tonic-gate if (clnt_deldrvs(sd->sd_nodes[i], sp, dd,
9290Sstevel@tonic-gate &xep) == -1)
9300Sstevel@tonic-gate mdclrerror(&xep);
9310Sstevel@tonic-gate }
9320Sstevel@tonic-gate }
9330Sstevel@tonic-gate max_genid += 2;
9340Sstevel@tonic-gate resync_genid(sp, sd, max_genid, 0, NULL);
9350Sstevel@tonic-gate }
9360Sstevel@tonic-gate
9370Sstevel@tonic-gate if ((suspend1_flag) || (suspendall_flag)) {
9380Sstevel@tonic-gate /* Send resume */
9390Sstevel@tonic-gate nd = sd->sd_nodelist;
9400Sstevel@tonic-gate /* All nodes are guaranteed to be ALIVE */
9410Sstevel@tonic-gate while (nd) {
9420Sstevel@tonic-gate /*
9430Sstevel@tonic-gate * Just resume all classes so that resume is the
9440Sstevel@tonic-gate * same whether just one class was locked or all
9450Sstevel@tonic-gate * classes were locked.
9460Sstevel@tonic-gate */
9470Sstevel@tonic-gate if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
9480Sstevel@tonic-gate sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) {
9490Sstevel@tonic-gate mdclrerror(&xep);
9500Sstevel@tonic-gate }
9510Sstevel@tonic-gate nd = nd->nd_next;
9520Sstevel@tonic-gate }
9530Sstevel@tonic-gate meta_ping_mnset(sp->setno);
9540Sstevel@tonic-gate }
9550Sstevel@tonic-gate
9560Sstevel@tonic-gate /* level 0 */
9570Sstevel@tonic-gate cl_sk = cl_get_setkey(sp->setno, sp->setname);
9580Sstevel@tonic-gate /* Don't test lock flag since guaranteed to be set if in rollback */
9590Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) {
9600Sstevel@tonic-gate /*
9610Sstevel@tonic-gate * Since the add drive operation is failing, need
9620Sstevel@tonic-gate * to reset config back to the way it was
9630Sstevel@tonic-gate * before the add drive opration.
9640Sstevel@tonic-gate * If a MN diskset and this is the first drive being
9650Sstevel@tonic-gate * added, then reset master on this node since
9660Sstevel@tonic-gate * the master would have not been set previously.
9670Sstevel@tonic-gate * This is ok to fail since next node to add first
9680Sstevel@tonic-gate * disk to diskset will also set the master on all nodes.
9690Sstevel@tonic-gate */
9700Sstevel@tonic-gate if (curdd == NULL) {
9710Sstevel@tonic-gate /* Reset master on mynode */
9720Sstevel@tonic-gate if (clnt_mnsetmaster(mynode(), sp, "",
9730Sstevel@tonic-gate MD_MN_INVALID_NID, &xep))
9740Sstevel@tonic-gate mdclrerror(&xep);
9750Sstevel@tonic-gate }
9760Sstevel@tonic-gate nd = sd->sd_nodelist;
9770Sstevel@tonic-gate /* All nodes are guaranteed to be ALIVE */
9780Sstevel@tonic-gate while (nd) {
9790Sstevel@tonic-gate if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep))
9800Sstevel@tonic-gate mdclrerror(&xep);
9810Sstevel@tonic-gate nd = nd->nd_next;
9820Sstevel@tonic-gate }
9830Sstevel@tonic-gate } else {
9840Sstevel@tonic-gate for (i = 0; i < MD_MAXSIDES; i++) {
9850Sstevel@tonic-gate /* Skip empty slots */
9860Sstevel@tonic-gate if (sd->sd_nodes[i][0] == '\0')
9870Sstevel@tonic-gate continue;
9880Sstevel@tonic-gate
9890Sstevel@tonic-gate if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep))
9900Sstevel@tonic-gate mdclrerror(&xep);
9910Sstevel@tonic-gate }
9920Sstevel@tonic-gate }
9930Sstevel@tonic-gate cl_set_setkey(NULL);
9940Sstevel@tonic-gate
9950Sstevel@tonic-gate /* release signals back to what they were on entry */
9960Sstevel@tonic-gate if (procsigs(FALSE, &oldsigs, &xep) < 0)
9970Sstevel@tonic-gate mdclrerror(&xep);
9980Sstevel@tonic-gate
9990Sstevel@tonic-gate metafreedrivedesc(&dd);
10000Sstevel@tonic-gate
10010Sstevel@tonic-gate if (flush_set_onerr) {
10020Sstevel@tonic-gate metaflushsetname(sp);
10030Sstevel@tonic-gate if (!(MD_MNSET_DESC(sd))) {
10040Sstevel@tonic-gate md_rb_sig_handling_off(md_got_sig(), md_which_sig());
10050Sstevel@tonic-gate }
10060Sstevel@tonic-gate }
10070Sstevel@tonic-gate
10080Sstevel@tonic-gate return (rval);
10090Sstevel@tonic-gate }
10100Sstevel@tonic-gate
10111945Sjeanm /*
10121945Sjeanm * Add drives routine used during import of a diskset.
10131945Sjeanm */
10141945Sjeanm int
meta_imp_set_adddrives(mdsetname_t * sp,mddrivenamelist_t * dnlp,md_im_set_desc_t * misp,md_error_t * ep)10151945Sjeanm meta_imp_set_adddrives(
10161945Sjeanm mdsetname_t *sp,
10171945Sjeanm mddrivenamelist_t *dnlp,
10181945Sjeanm md_im_set_desc_t *misp,
10191945Sjeanm md_error_t *ep
10201945Sjeanm )
10211945Sjeanm {
10221945Sjeanm md_set_desc *sd;
10231945Sjeanm mddrivenamelist_t *p;
10241945Sjeanm md_drive_desc *dd = NULL, *ddp;
10251945Sjeanm int flush_set_onerr = 0;
10261945Sjeanm md_timeval32_t now;
10271945Sjeanm ulong_t genid;
10281945Sjeanm mhd_mhiargs_t mhiargs;
10291945Sjeanm md_im_replica_info_t *mirp;
10301945Sjeanm md_im_drive_info_t *midp;
10311945Sjeanm int rval = 0;
10321945Sjeanm sigset_t oldsigs;
10331945Sjeanm ulong_t max_genid = 0;
10341945Sjeanm int rb_level = 0;
10351945Sjeanm md_error_t xep = mdnullerror;
10361945Sjeanm
10371945Sjeanm if ((sd = metaget_setdesc(sp, ep)) == NULL)
10381945Sjeanm return (-1);
10391945Sjeanm
10401945Sjeanm for (p = dnlp; p != NULL; p = p->next) {
10411945Sjeanm int imp_flag = 0;
10421945Sjeanm
10431945Sjeanm /*
10441945Sjeanm * If we have a partial diskset, meta_make_sidenmlist will
10451945Sjeanm * need information from midp to complete making the
10461945Sjeanm * side name structure.
10471945Sjeanm */
10481945Sjeanm if (misp->mis_partial) {
10491945Sjeanm imp_flag = MDDB_C_IMPORT;
10501945Sjeanm for (midp = misp->mis_drives; midp != NULL;
10511945Sjeanm midp = midp->mid_next) {
10521945Sjeanm if (midp->mid_dnp == p->drivenamep)
10531945Sjeanm break;
10541945Sjeanm }
10551945Sjeanm if (midp == NULL) {
10561945Sjeanm (void) mddserror(ep, MDE_DS_SETNOTIMP,
10571945Sjeanm MD_SET_BAD, mynode(), NULL, sp->setname);
10581945Sjeanm rval = -1;
10591945Sjeanm goto out;
10601945Sjeanm }
10611945Sjeanm }
10621945Sjeanm /*
10631945Sjeanm * Create the names for the drives we are adding per side.
10641945Sjeanm */
10651945Sjeanm if (meta_make_sidenmlist(sp, p->drivenamep, imp_flag,
10661945Sjeanm midp, ep) == -1) {
10671945Sjeanm rval = -1;
10681945Sjeanm goto out;
10691945Sjeanm }
10701945Sjeanm }
10711945Sjeanm
10721945Sjeanm /*
10731945Sjeanm * Get the list of drives descriptors that we are adding.
10741945Sjeanm */
10751945Sjeanm dd = metaget_drivedesc_fromdrivelist(sp, dnlp, MD_DR_ADD, ep);
10761945Sjeanm
10771945Sjeanm if (! mdisok(ep)) {
10781945Sjeanm rval = -1;
10791945Sjeanm goto out;
10801945Sjeanm }
10811945Sjeanm
10821945Sjeanm /*
10831945Sjeanm * Get the set timeout information.
10841945Sjeanm */
10851945Sjeanm (void) memset(&mhiargs, '\0', sizeof (mhiargs));
10861945Sjeanm if (clnt_gtimeout(mynode(), sp, &mhiargs, ep) == -1) {
10871945Sjeanm rval = -1;
10881945Sjeanm goto out;
10891945Sjeanm }
10901945Sjeanm
10911945Sjeanm /*
10921945Sjeanm * Get timestamp and generation id for new records
10931945Sjeanm */
10941945Sjeanm now = sd->sd_ctime;
10951945Sjeanm genid = sd->sd_genid;
10961945Sjeanm
10971945Sjeanm /* At this point, in case of error, set should be flushed. */
10981945Sjeanm flush_set_onerr = 1;
10991945Sjeanm
11001945Sjeanm rb_level = 1; /* level 1 */
11011945Sjeanm
11021945Sjeanm for (midp = misp->mis_drives; midp != NULL; midp = midp->mid_next) {
11031945Sjeanm for (ddp = dd; ddp != NULL; ddp = ddp->dd_next) {
11041945Sjeanm if (ddp->dd_dnp == midp->mid_dnp) {
11051945Sjeanm /* same disk */
11061945Sjeanm ddp->dd_dnp->devid =
11071945Sjeanm devid_str_encode(midp->mid_devid,
11081945Sjeanm midp->mid_minor_name);
11091945Sjeanm
11101945Sjeanm ddp->dd_dbcnt = 0;
11111945Sjeanm mirp = midp->mid_replicas;
11121945Sjeanm if (mirp) {
11131945Sjeanm ddp->dd_dbsize = mirp->mir_length;
11141945Sjeanm for (; mirp != NULL;
11151945Sjeanm mirp = mirp->mir_next) {
11161945Sjeanm ddp->dd_dbcnt++;
11171945Sjeanm }
11181945Sjeanm }
11191945Sjeanm if ((midp->mid_available &
11201945Sjeanm MD_IM_DISK_NOT_AVAILABLE) &&
11211945Sjeanm (misp->mis_flags & MD_IM_SET_REPLICATED)) {
11221945Sjeanm ddp->dd_flags = MD_DR_UNRSLV_REPLICATED;
11231945Sjeanm }
11241945Sjeanm }
11251945Sjeanm }
11261945Sjeanm }
11271945Sjeanm
11281945Sjeanm /*
11291945Sjeanm * Add the drive records for the drives that we are adding to
11301945Sjeanm * each host in the set. Marks the drive records as MD_DR_ADD.
11311945Sjeanm * May also mark a drive record as MD_DR_UNRSLV_REPLICATED if
11321945Sjeanm * this flag was set in the dd_flags for that drive.
11331945Sjeanm */
11341945Sjeanm if (clnt_imp_adddrvs(mynode(), sp, dd, now, genid, ep) == -1)
11351945Sjeanm goto rollback;
11361945Sjeanm
11371945Sjeanm rb_level = 2; /* level 2 */
11381945Sjeanm
11391945Sjeanm /*
11401945Sjeanm * Take ownership of the added drives.
11411945Sjeanm */
11421945Sjeanm if (tk_own_bydd(sp, dd, &mhiargs, TRUE, ep))
11431945Sjeanm goto rollback;
11441945Sjeanm
11451945Sjeanm out:
11461945Sjeanm metafreedrivedesc(&dd);
11471945Sjeanm
11481945Sjeanm if (flush_set_onerr) {
11491945Sjeanm metaflushsetname(sp);
11501945Sjeanm }
11511945Sjeanm
11521945Sjeanm return (rval);
11531945Sjeanm
11541945Sjeanm rollback:
11551945Sjeanm /* Make sure we are blocking all signals */
11561945Sjeanm if (procsigs(TRUE, &oldsigs, &xep) < 0)
11571945Sjeanm mdclrerror(&xep);
11581945Sjeanm
11591945Sjeanm rval = -1;
11601945Sjeanm
11611945Sjeanm max_genid = sd->sd_genid;
11621945Sjeanm
11631945Sjeanm /* level 2 */
11641945Sjeanm if (rb_level > 1) {
11651945Sjeanm if (!MD_ATSET_DESC(sd)) {
11661945Sjeanm if (rel_own_bydd(sp, dd, TRUE, &xep)) {
11671945Sjeanm mdclrerror(&xep);
11681945Sjeanm }
11691945Sjeanm }
11701945Sjeanm }
11711945Sjeanm
11721945Sjeanm /* level 1 */
11731945Sjeanm if (rb_level > 0) {
11741945Sjeanm if (clnt_deldrvs(mynode(), sp, dd, &xep) == -1) {
11751945Sjeanm mdclrerror(&xep);
11761945Sjeanm }
11771945Sjeanm max_genid += 2;
11781945Sjeanm resync_genid(sp, sd, max_genid, 0, NULL);
11791945Sjeanm }
11801945Sjeanm
11811945Sjeanm /* level 0 */
11821945Sjeanm
11831945Sjeanm /* release signals back to what they were on entry */
11841945Sjeanm if (procsigs(FALSE, &oldsigs, &xep) < 0)
11851945Sjeanm mdclrerror(&xep);
11861945Sjeanm
11871945Sjeanm metafreedrivedesc(&dd);
11881945Sjeanm
11891945Sjeanm if (flush_set_onerr) {
11901945Sjeanm metaflushsetname(sp);
11911945Sjeanm md_rb_sig_handling_off(md_got_sig(), md_which_sig());
11921945Sjeanm }
11931945Sjeanm
11941945Sjeanm return (rval);
11951945Sjeanm }
11961945Sjeanm
11970Sstevel@tonic-gate int
meta_set_deletedrives(mdsetname_t * sp,mddrivenamelist_t * dnlp,int forceflg,md_error_t * ep)11980Sstevel@tonic-gate meta_set_deletedrives(
11990Sstevel@tonic-gate mdsetname_t *sp,
12000Sstevel@tonic-gate mddrivenamelist_t *dnlp,
12010Sstevel@tonic-gate int forceflg,
12020Sstevel@tonic-gate md_error_t *ep
12030Sstevel@tonic-gate )
12040Sstevel@tonic-gate {
12050Sstevel@tonic-gate md_set_desc *sd;
12060Sstevel@tonic-gate md_drive_desc *ddp, *dd = NULL, *curdd = NULL;
12070Sstevel@tonic-gate md_replicalist_t *rlp = NULL, *rl;
12080Sstevel@tonic-gate mddrivenamelist_t *p;
12090Sstevel@tonic-gate int deldrvcnt = 0;
12100Sstevel@tonic-gate int rval = 0;
12110Sstevel@tonic-gate mhd_mhiargs_t mhiargs;
12120Sstevel@tonic-gate int i;
12130Sstevel@tonic-gate sigset_t oldsigs;
12140Sstevel@tonic-gate md_setkey_t *cl_sk;
12150Sstevel@tonic-gate ulong_t max_genid = 0;
12160Sstevel@tonic-gate int rb_level = 0;
12170Sstevel@tonic-gate md_error_t xep = mdnullerror;
12180Sstevel@tonic-gate md_mnnode_desc *nd;
12190Sstevel@tonic-gate int has_set;
12200Sstevel@tonic-gate int current_drv_cnt = 0;
12210Sstevel@tonic-gate int suspendall_flag = 0, suspendall_flag_rb = 0;
12220Sstevel@tonic-gate int suspend1_flag = 0;
12230Sstevel@tonic-gate int lock_flag = 0;
12240Sstevel@tonic-gate bool_t stale_bool = FALSE;
12250Sstevel@tonic-gate int flush_set_onerr = 0;
12260Sstevel@tonic-gate mdnamelist_t *nlp;
12270Sstevel@tonic-gate mdname_t *np;
12280Sstevel@tonic-gate
12290Sstevel@tonic-gate if ((sd = metaget_setdesc(sp, ep)) == NULL)
12300Sstevel@tonic-gate return (-1);
12310Sstevel@tonic-gate
12320Sstevel@tonic-gate /* Make sure we own the set */
12330Sstevel@tonic-gate if (meta_check_ownership(sp, ep) != 0)
12340Sstevel@tonic-gate return (-1);
12350Sstevel@tonic-gate
12360Sstevel@tonic-gate if (drvsuniq(sp, dnlp, ep) == -1)
12370Sstevel@tonic-gate return (-1);
12380Sstevel@tonic-gate
12390Sstevel@tonic-gate /*
12400Sstevel@tonic-gate * Check and see if all the nodes have the set.
12410Sstevel@tonic-gate *
12420Sstevel@tonic-gate * The drive and node records are stored in the local mddbs of each
12430Sstevel@tonic-gate * node in the diskset. Each node's rpc.metad daemon reads in the set,
12440Sstevel@tonic-gate * drive and node records from that node's local mddb and caches them
12450Sstevel@tonic-gate * internally. Any process needing diskset information contacts its
12460Sstevel@tonic-gate * local rpc.metad to get this information. Since each node in the
12470Sstevel@tonic-gate * diskset is independently reading the set information from its local
12480Sstevel@tonic-gate * mddb, the set, drive and node records in the local mddbs must stay
12490Sstevel@tonic-gate * in-sync, so that all nodes have a consistent view of the diskset.
12500Sstevel@tonic-gate *
12510Sstevel@tonic-gate * For a multinode diskset, explicitly verify that all nodes in the
12520Sstevel@tonic-gate * diskset are ALIVE (i.e. are in the API membership list). Otherwise,
12530Sstevel@tonic-gate * fail this operation since all nodes must be ALIVE in order to delete
12540Sstevel@tonic-gate * a drive record from their local mddb. If a panic of this node
12550Sstevel@tonic-gate * leaves the local mddbs set, node and drive records out-of-sync, the
12560Sstevel@tonic-gate * reconfig cycle will fix the local mddbs and force them back into
12570Sstevel@tonic-gate * synchronization.
12580Sstevel@tonic-gate */
12590Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) {
12600Sstevel@tonic-gate nd = sd->sd_nodelist;
12610Sstevel@tonic-gate while (nd) {
12620Sstevel@tonic-gate if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
12630Sstevel@tonic-gate (void) mddserror(ep, MDE_DS_NOTINMEMBERLIST,
12640Sstevel@tonic-gate sp->setno,
12650Sstevel@tonic-gate nd->nd_nodename, NULL, sp->setname);
12660Sstevel@tonic-gate return (-1);
12670Sstevel@tonic-gate }
12680Sstevel@tonic-gate nd = nd->nd_next;
12690Sstevel@tonic-gate }
12700Sstevel@tonic-gate
12710Sstevel@tonic-gate /* Make sure we are blocking all signals */
12720Sstevel@tonic-gate if (procsigs(TRUE, &oldsigs, &xep) < 0)
12730Sstevel@tonic-gate mdclrerror(&xep);
12740Sstevel@tonic-gate
12750Sstevel@tonic-gate /*
12760Sstevel@tonic-gate * Lock the set on current set members.
12770Sstevel@tonic-gate * Set locking done much earlier for MN diskset than for
12780Sstevel@tonic-gate * traditional diskset since lock_set and SUSPEND are used
12790Sstevel@tonic-gate * to protect against other meta* commands running on the
12800Sstevel@tonic-gate * other nodes.
12810Sstevel@tonic-gate */
12820Sstevel@tonic-gate nd = sd->sd_nodelist;
12830Sstevel@tonic-gate /* All nodes are guaranteed to be ALIVE */
12840Sstevel@tonic-gate while (nd) {
12850Sstevel@tonic-gate if (clnt_lock_set(nd->nd_nodename, sp, ep)) {
12860Sstevel@tonic-gate rval = -1;
12870Sstevel@tonic-gate goto out;
12880Sstevel@tonic-gate }
12890Sstevel@tonic-gate lock_flag = 1;
12900Sstevel@tonic-gate nd = nd->nd_next;
12910Sstevel@tonic-gate }
12920Sstevel@tonic-gate /*
12930Sstevel@tonic-gate * Lock out other meta* commands by suspending
12940Sstevel@tonic-gate * class 1 messages across the diskset.
12950Sstevel@tonic-gate */
12960Sstevel@tonic-gate nd = sd->sd_nodelist;
12970Sstevel@tonic-gate /* All nodes are guaranteed to be ALIVE */
12980Sstevel@tonic-gate while (nd) {
12990Sstevel@tonic-gate if (clnt_mdcommdctl(nd->nd_nodename,
13000Sstevel@tonic-gate COMMDCTL_SUSPEND, sp, MD_MSG_CLASS1,
13010Sstevel@tonic-gate MD_MSCF_NO_FLAGS, ep)) {
13020Sstevel@tonic-gate rval = -1;
13030Sstevel@tonic-gate goto out;
13040Sstevel@tonic-gate }
13050Sstevel@tonic-gate suspend1_flag = 1;
13060Sstevel@tonic-gate nd = nd->nd_next;
13070Sstevel@tonic-gate }
13080Sstevel@tonic-gate
13090Sstevel@tonic-gate nd = sd->sd_nodelist;
13100Sstevel@tonic-gate /* All nodes are guaranteed to be ALIVE */
13110Sstevel@tonic-gate while (nd) {
13120Sstevel@tonic-gate if (strcmp(nd->nd_nodename, mynode()) == 0) {
13130Sstevel@tonic-gate nd = nd->nd_next;
13140Sstevel@tonic-gate continue;
13150Sstevel@tonic-gate }
13160Sstevel@tonic-gate
13170Sstevel@tonic-gate has_set = nodehasset(sp, nd->nd_nodename,
13180Sstevel@tonic-gate NHS_NSTG_EQ, ep);
13190Sstevel@tonic-gate if (has_set < 0) {
13200Sstevel@tonic-gate rval = -1;
13210Sstevel@tonic-gate goto out;
13220Sstevel@tonic-gate }
13230Sstevel@tonic-gate
13240Sstevel@tonic-gate if (! has_set) {
13250Sstevel@tonic-gate (void) mddserror(ep, MDE_DS_NODENOSET,
13260Sstevel@tonic-gate sp->setno, nd->nd_nodename,
13270Sstevel@tonic-gate NULL, sp->setname);
13280Sstevel@tonic-gate rval = -1;
13290Sstevel@tonic-gate goto out;
13300Sstevel@tonic-gate }
13310Sstevel@tonic-gate nd = nd->nd_next;
13320Sstevel@tonic-gate }
13330Sstevel@tonic-gate } else {
13340Sstevel@tonic-gate for (i = 0; i < MD_MAXSIDES; i++) {
13350Sstevel@tonic-gate /* Skip empty slots */
13360Sstevel@tonic-gate if (sd->sd_nodes[i][0] == '\0')
13370Sstevel@tonic-gate continue;
13380Sstevel@tonic-gate
13390Sstevel@tonic-gate if (strcmp(sd->sd_nodes[i], mynode()) == 0)
13400Sstevel@tonic-gate continue;
13410Sstevel@tonic-gate
13420Sstevel@tonic-gate has_set = nodehasset(sp, sd->sd_nodes[i], NHS_NSTG_EQ,
13430Sstevel@tonic-gate ep);
13440Sstevel@tonic-gate if (has_set < 0) {
13450Sstevel@tonic-gate /*
13460Sstevel@tonic-gate * Can directly return since !MN diskset;
13470Sstevel@tonic-gate * nothing to unlock.
13480Sstevel@tonic-gate */
13490Sstevel@tonic-gate return (-1);
13500Sstevel@tonic-gate }
13510Sstevel@tonic-gate
13520Sstevel@tonic-gate if (! has_set) {
13530Sstevel@tonic-gate /*
13540Sstevel@tonic-gate * Can directly return since !MN diskset;
13550Sstevel@tonic-gate * nothing to unlock.
13560Sstevel@tonic-gate */
13570Sstevel@tonic-gate return (mddserror(ep, MDE_DS_NODENOSET,
13580Sstevel@tonic-gate sp->setno, sd->sd_nodes[i], NULL,
13590Sstevel@tonic-gate sp->setname));
13600Sstevel@tonic-gate }
13610Sstevel@tonic-gate }
13620Sstevel@tonic-gate }
13630Sstevel@tonic-gate
13640Sstevel@tonic-gate for (p = dnlp; p != NULL; p = p->next) {
13650Sstevel@tonic-gate int is_it;
13660Sstevel@tonic-gate mddrivename_t *dnp;
13670Sstevel@tonic-gate
13680Sstevel@tonic-gate dnp = p->drivenamep;
13690Sstevel@tonic-gate
13700Sstevel@tonic-gate if ((is_it = meta_is_drive_in_thisset(sp, dnp, FALSE, ep))
13710Sstevel@tonic-gate == -1) {
13720Sstevel@tonic-gate rval = -1;
13730Sstevel@tonic-gate goto out;
13740Sstevel@tonic-gate }
13750Sstevel@tonic-gate
13760Sstevel@tonic-gate if (! is_it) {
13770Sstevel@tonic-gate (void) mddserror(ep, MDE_DS_DRIVENOTINSET, sp->setno,
13780Sstevel@tonic-gate NULL, dnp->cname, sp->setname);
13790Sstevel@tonic-gate rval = -1;
13800Sstevel@tonic-gate goto out;
13810Sstevel@tonic-gate }
13820Sstevel@tonic-gate
13830Sstevel@tonic-gate if ((meta_check_drive_inuse(sp, dnp, FALSE, ep)) == -1) {
13840Sstevel@tonic-gate rval = -1;
13850Sstevel@tonic-gate goto out;
13860Sstevel@tonic-gate }
13870Sstevel@tonic-gate
13880Sstevel@tonic-gate deldrvcnt++;
13890Sstevel@tonic-gate }
13900Sstevel@tonic-gate current_drv_cnt = deldrvcnt;
13910Sstevel@tonic-gate
13920Sstevel@tonic-gate /*
13930Sstevel@tonic-gate * Get drive descriptors for the drives that are currently in the set.
13940Sstevel@tonic-gate */
13950Sstevel@tonic-gate curdd = metaget_drivedesc(sp, MD_BASICNAME_OK, ep);
13960Sstevel@tonic-gate if (! mdisok(ep)) {
13970Sstevel@tonic-gate rval = -1;
13980Sstevel@tonic-gate goto out;
13990Sstevel@tonic-gate }
14000Sstevel@tonic-gate
14010Sstevel@tonic-gate /*
14020Sstevel@tonic-gate * Decrement the the delete drive count for each drive currently in the
14030Sstevel@tonic-gate * set.
14040Sstevel@tonic-gate */
14050Sstevel@tonic-gate for (ddp = curdd; ddp != NULL; ddp = ddp->dd_next)
14060Sstevel@tonic-gate deldrvcnt--;
14070Sstevel@tonic-gate
14080Sstevel@tonic-gate /*
14090Sstevel@tonic-gate * If the count of drives we are deleting is equal to the drives in the
14100Sstevel@tonic-gate * set, and we haven't specified forceflg, return an error
14110Sstevel@tonic-gate */
14120Sstevel@tonic-gate if (deldrvcnt == 0 && forceflg == FALSE) {
14130Sstevel@tonic-gate (void) mderror(ep, MDE_FORCE_DEL_ALL_DRV, NULL);
14140Sstevel@tonic-gate rval = -1;
14150Sstevel@tonic-gate goto out;
14160Sstevel@tonic-gate }
14170Sstevel@tonic-gate
14180Sstevel@tonic-gate /*
14190Sstevel@tonic-gate * Get the list of drive descriptors that we are deleting.
14200Sstevel@tonic-gate */
14210Sstevel@tonic-gate dd = metaget_drivedesc_fromdrivelist(sp, dnlp, MD_DR_DEL, ep);
14220Sstevel@tonic-gate if (! mdisok(ep)) {
14230Sstevel@tonic-gate rval = -1;
14240Sstevel@tonic-gate goto out;
14250Sstevel@tonic-gate }
14260Sstevel@tonic-gate
14270Sstevel@tonic-gate /*
14280Sstevel@tonic-gate * Get the set timeout information in case we have to roll back.
14290Sstevel@tonic-gate */
14300Sstevel@tonic-gate (void) memset(&mhiargs, '\0', sizeof (mhiargs));
14310Sstevel@tonic-gate if (clnt_gtimeout(mynode(), sp, &mhiargs, ep) == -1) {
14320Sstevel@tonic-gate rval = -1;
14330Sstevel@tonic-gate goto out;
14340Sstevel@tonic-gate }
14350Sstevel@tonic-gate
14360Sstevel@tonic-gate /* At this point, in case of error, set should be flushed. */
14370Sstevel@tonic-gate flush_set_onerr = 1;
14380Sstevel@tonic-gate
14390Sstevel@tonic-gate /* END CHECK CODE */
14400Sstevel@tonic-gate
14410Sstevel@tonic-gate /* Lock the set on current set members */
14420Sstevel@tonic-gate if (!(MD_MNSET_DESC(sd))) {
14430Sstevel@tonic-gate md_rb_sig_handling_on();
14440Sstevel@tonic-gate for (i = 0; i < MD_MAXSIDES; i++) {
14450Sstevel@tonic-gate /* Skip empty slots */
14460Sstevel@tonic-gate if (sd->sd_nodes[i][0] == '\0')
14470Sstevel@tonic-gate continue;
14480Sstevel@tonic-gate
14490Sstevel@tonic-gate if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) {
14500Sstevel@tonic-gate rval = -1;
14510Sstevel@tonic-gate goto out;
14520Sstevel@tonic-gate }
14530Sstevel@tonic-gate lock_flag = 1;
14540Sstevel@tonic-gate }
14550Sstevel@tonic-gate }
14560Sstevel@tonic-gate
14570Sstevel@tonic-gate if ((deldrvcnt == 0) && (MD_MNSET_DESC(sd))) {
14580Sstevel@tonic-gate mddb_config_t c;
14590Sstevel@tonic-gate /*
14600Sstevel@tonic-gate * Is current set STALE?
14610Sstevel@tonic-gate */
14620Sstevel@tonic-gate (void) memset(&c, 0, sizeof (c));
14630Sstevel@tonic-gate c.c_id = 0;
14640Sstevel@tonic-gate c.c_setno = sp->setno;
14650Sstevel@tonic-gate if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) {
14660Sstevel@tonic-gate (void) mdstealerror(ep, &c.c_mde);
14670Sstevel@tonic-gate rval = -1;
14680Sstevel@tonic-gate goto out;
14690Sstevel@tonic-gate }
14700Sstevel@tonic-gate if (c.c_flags & MDDB_C_STALE) {
14710Sstevel@tonic-gate stale_bool = TRUE;
14720Sstevel@tonic-gate }
14730Sstevel@tonic-gate }
14740Sstevel@tonic-gate
14750Sstevel@tonic-gate RB_TEST(1, "deletedrives", ep)
14760Sstevel@tonic-gate
14770Sstevel@tonic-gate RB_PREEMPT;
14780Sstevel@tonic-gate rb_level = 1; /* level 1 */
14790Sstevel@tonic-gate
14800Sstevel@tonic-gate RB_TEST(2, "deletedrives", ep)
14810Sstevel@tonic-gate
14820Sstevel@tonic-gate /*
14830Sstevel@tonic-gate * Mark the drives MD_DR_DEL
14840Sstevel@tonic-gate */
14850Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) {
14860Sstevel@tonic-gate nd = sd->sd_nodelist;
14870Sstevel@tonic-gate /* All nodes are guaranteed to be ALIVE */
14880Sstevel@tonic-gate while (nd) {
14890Sstevel@tonic-gate if (clnt_upd_dr_flags(nd->nd_nodename, sp, dd,
14900Sstevel@tonic-gate MD_DR_DEL, ep) == -1)
14910Sstevel@tonic-gate goto rollback;
14920Sstevel@tonic-gate
14930Sstevel@tonic-gate RB_TEST(3, "deletedrives", ep)
14940Sstevel@tonic-gate nd = nd->nd_next;
14950Sstevel@tonic-gate }
14960Sstevel@tonic-gate } else {
14970Sstevel@tonic-gate for (i = 0; i < MD_MAXSIDES; i++) {
14980Sstevel@tonic-gate /* Skip empty slots */
14990Sstevel@tonic-gate if (sd->sd_nodes[i][0] == '\0')
15000Sstevel@tonic-gate continue;
15010Sstevel@tonic-gate
15020Sstevel@tonic-gate if (clnt_upd_dr_flags(sd->sd_nodes[i], sp, dd,
15030Sstevel@tonic-gate MD_DR_DEL, ep) == -1)
15040Sstevel@tonic-gate goto rollback;
15050Sstevel@tonic-gate
15060Sstevel@tonic-gate RB_TEST(3, "deletedrives", ep)
15070Sstevel@tonic-gate }
15080Sstevel@tonic-gate }
15090Sstevel@tonic-gate
15100Sstevel@tonic-gate RB_TEST(4, "deletedrives", ep)
15110Sstevel@tonic-gate
15120Sstevel@tonic-gate RB_PREEMPT;
15130Sstevel@tonic-gate rb_level = 2; /* level 2 */
15140Sstevel@tonic-gate
15150Sstevel@tonic-gate RB_TEST(5, "deletedrives", ep)
15160Sstevel@tonic-gate
15170Sstevel@tonic-gate /*
15180Sstevel@tonic-gate * Balance the DB's according to the list of existing drives and the
15190Sstevel@tonic-gate * list of deleted drives.
15200Sstevel@tonic-gate */
15210Sstevel@tonic-gate if (meta_db_balance(sp, dd, curdd, 0, ep) == -1)
15220Sstevel@tonic-gate goto rollback;
15230Sstevel@tonic-gate
15240Sstevel@tonic-gate /*
15250Sstevel@tonic-gate * If the drive(s) to be deleted cannot be accessed,
15260Sstevel@tonic-gate * they haven't really been deleted yet. Check and delete now
15270Sstevel@tonic-gate * if need be.
15280Sstevel@tonic-gate */
15290Sstevel@tonic-gate if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) >= 0) {
15300Sstevel@tonic-gate nlp = NULL;
15310Sstevel@tonic-gate for (ddp = dd; ddp != NULL; ddp = ddp->dd_next) {
15320Sstevel@tonic-gate char *delete_name;
15330Sstevel@tonic-gate
15340Sstevel@tonic-gate delete_name = ddp->dd_dnp->cname;
15350Sstevel@tonic-gate
15360Sstevel@tonic-gate for (rl = rlp; rl != NULL; rl = rl->rl_next) {
15370Sstevel@tonic-gate char *cur_name;
15380Sstevel@tonic-gate
15390Sstevel@tonic-gate cur_name =
15400Sstevel@tonic-gate rl->rl_repp->r_namep->drivenamep->cname;
15410Sstevel@tonic-gate
15420Sstevel@tonic-gate if (strcmp(delete_name, cur_name) == 0) {
15430Sstevel@tonic-gate /* put it on the delete list */
15440Sstevel@tonic-gate np = rl->rl_repp->r_namep;
15450Sstevel@tonic-gate (void) metanamelist_append(&nlp, np);
15460Sstevel@tonic-gate
15470Sstevel@tonic-gate }
15480Sstevel@tonic-gate }
15490Sstevel@tonic-gate }
15500Sstevel@tonic-gate
15510Sstevel@tonic-gate if (nlp != NULL) {
15520Sstevel@tonic-gate if (meta_db_detach(sp, nlp,
15530Sstevel@tonic-gate (MDFORCE_DS | MDFORCE_SET_LOCKED), NULL,
15540Sstevel@tonic-gate ep) == -1) {
15550Sstevel@tonic-gate metafreenamelist(nlp);
15560Sstevel@tonic-gate goto rollback;
15570Sstevel@tonic-gate }
15580Sstevel@tonic-gate metafreenamelist(nlp);
15590Sstevel@tonic-gate }
15600Sstevel@tonic-gate }
15610Sstevel@tonic-gate
15620Sstevel@tonic-gate RB_TEST(6, "deletedrives", ep)
15630Sstevel@tonic-gate
15640Sstevel@tonic-gate RB_PREEMPT;
15650Sstevel@tonic-gate rb_level = 3; /* level 3 */
15660Sstevel@tonic-gate
15670Sstevel@tonic-gate RB_TEST(7, "deletedrives", ep)
15680Sstevel@tonic-gate
15690Sstevel@tonic-gate /*
15700Sstevel@tonic-gate * Cannot suspend set until after meta_db_balance since
15710Sstevel@tonic-gate * meta_db_balance uses META_DB_ATTACH/DETACH messages.
15720Sstevel@tonic-gate */
15730Sstevel@tonic-gate if ((deldrvcnt == 0) && (MD_MNSET_DESC(sd))) {
15740Sstevel@tonic-gate /*
15750Sstevel@tonic-gate * Notify rpc.mdcommd on all nodes of a nodelist change.
15760Sstevel@tonic-gate * Start by suspending rpc.mdcommd (which drains it of all
15770Sstevel@tonic-gate * messages), then change the nodelist followed by a reinit
15780Sstevel@tonic-gate * and resume.
15790Sstevel@tonic-gate */
15800Sstevel@tonic-gate nd = sd->sd_nodelist;
15810Sstevel@tonic-gate /* All nodes are guaranteed to be ALIVE */
15820Sstevel@tonic-gate while (nd) {
15830Sstevel@tonic-gate if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_SUSPEND,
15840Sstevel@tonic-gate sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, ep)) {
15850Sstevel@tonic-gate rval = -1;
15860Sstevel@tonic-gate goto out;
15870Sstevel@tonic-gate }
15880Sstevel@tonic-gate suspendall_flag = 1;
15890Sstevel@tonic-gate nd = nd->nd_next;
15900Sstevel@tonic-gate }
15910Sstevel@tonic-gate }
15920Sstevel@tonic-gate
15930Sstevel@tonic-gate /*
15940Sstevel@tonic-gate * Remove the drive records for the drives that were deleted from
15950Sstevel@tonic-gate * each host in the set. This removes the record and dr_flags.
15960Sstevel@tonic-gate */
15970Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) {
15980Sstevel@tonic-gate nd = sd->sd_nodelist;
15990Sstevel@tonic-gate /* All nodes are guaranteed to be ALIVE */
16000Sstevel@tonic-gate while (nd) {
16010Sstevel@tonic-gate if (clnt_deldrvs(nd->nd_nodename, sp, dd, ep) == -1)
16020Sstevel@tonic-gate goto rollback;
16030Sstevel@tonic-gate
16040Sstevel@tonic-gate RB_TEST(8, "deletedrives", ep)
16050Sstevel@tonic-gate nd = nd->nd_next;
16060Sstevel@tonic-gate }
16070Sstevel@tonic-gate } else {
16080Sstevel@tonic-gate for (i = 0; i < MD_MAXSIDES; i++) {
16090Sstevel@tonic-gate /* Skip empty slots */
16100Sstevel@tonic-gate if (sd->sd_nodes[i][0] == '\0')
16110Sstevel@tonic-gate continue;
16120Sstevel@tonic-gate
16130Sstevel@tonic-gate if (clnt_deldrvs(sd->sd_nodes[i], sp, dd, ep) == -1)
16140Sstevel@tonic-gate goto rollback;
16150Sstevel@tonic-gate
16160Sstevel@tonic-gate RB_TEST(8, "deletedrives", ep)
16170Sstevel@tonic-gate }
16180Sstevel@tonic-gate }
16190Sstevel@tonic-gate
16200Sstevel@tonic-gate RB_TEST(9, "deletedrives", ep)
16210Sstevel@tonic-gate
16220Sstevel@tonic-gate RB_PREEMPT;
16230Sstevel@tonic-gate rb_level = 4; /* level 4 */
16240Sstevel@tonic-gate
16250Sstevel@tonic-gate RB_TEST(10, "deletedrives", ep)
16260Sstevel@tonic-gate
16270Sstevel@tonic-gate if (!(MD_MNSET_DESC(sd)) && !MD_ATSET_DESC(sd)) {
16280Sstevel@tonic-gate if (rel_own_bydd(sp, dd, TRUE, ep))
16290Sstevel@tonic-gate goto rollback;
16300Sstevel@tonic-gate }
16310Sstevel@tonic-gate
16320Sstevel@tonic-gate /* If we deleted all the drives, then we need to halt the set. */
16330Sstevel@tonic-gate if (deldrvcnt == 0) {
16340Sstevel@tonic-gate RB_TEST(11, "deletedrives", ep)
16350Sstevel@tonic-gate
16360Sstevel@tonic-gate RB_PREEMPT;
16370Sstevel@tonic-gate rb_level = 5; /* level 5 */
16380Sstevel@tonic-gate
16390Sstevel@tonic-gate RB_TEST(12, "deletedrives", ep)
16400Sstevel@tonic-gate
16410Sstevel@tonic-gate if (clnt_stimeout(mynode(), sp, &defmhiargs, ep) == -1)
16420Sstevel@tonic-gate goto rollback;
16430Sstevel@tonic-gate
16440Sstevel@tonic-gate RB_TEST(13, "deletedrives", ep)
16450Sstevel@tonic-gate
16460Sstevel@tonic-gate RB_PREEMPT;
16470Sstevel@tonic-gate rb_level = 6; /* level 6 */
16480Sstevel@tonic-gate
16490Sstevel@tonic-gate RB_TEST(14, "deletedrives", ep)
16500Sstevel@tonic-gate
16510Sstevel@tonic-gate /* Halt MN diskset on all nodes by having node withdraw */
16520Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) {
16530Sstevel@tonic-gate nd = sd->sd_nodelist;
16540Sstevel@tonic-gate /* All nodes are guaranteed to be ALIVE */
16550Sstevel@tonic-gate while (nd) {
16560Sstevel@tonic-gate /* Only withdraw nodes that are joined */
16570Sstevel@tonic-gate if (!(nd->nd_flags & MD_MN_NODE_OWN)) {
16580Sstevel@tonic-gate nd = nd->nd_next;
16590Sstevel@tonic-gate continue;
16600Sstevel@tonic-gate }
16610Sstevel@tonic-gate /*
16620Sstevel@tonic-gate * Going to set locally cached node flags to
16630Sstevel@tonic-gate * rollback join so in case of error, the
16640Sstevel@tonic-gate * rollback code knows which nodes to re-join.
16650Sstevel@tonic-gate */
16660Sstevel@tonic-gate nd->nd_flags |= MD_MN_NODE_RB_JOIN;
16670Sstevel@tonic-gate
16680Sstevel@tonic-gate /*
16690Sstevel@tonic-gate * Be careful in ordering of following steps
16700Sstevel@tonic-gate * so that recovery from a panic between
16710Sstevel@tonic-gate * the steps is viable.
16720Sstevel@tonic-gate * Only reset master info in rpc.metad -
16730Sstevel@tonic-gate * don't reset local cached information
16740Sstevel@tonic-gate * which will be used to set master information
16750Sstevel@tonic-gate * back in case of failure (rollback).
16760Sstevel@tonic-gate */
16770Sstevel@tonic-gate if (clnt_withdrawset(nd->nd_nodename, sp, ep))
16780Sstevel@tonic-gate goto rollback;
16790Sstevel@tonic-gate /* Sets withdraw flag on all nodes in list */
16800Sstevel@tonic-gate if (clnt_upd_nr_flags(nd->nd_nodename, sp,
16810Sstevel@tonic-gate sd->sd_nodelist, MD_NR_WITHDRAW,
16820Sstevel@tonic-gate NULL, ep)) {
16830Sstevel@tonic-gate goto rollback;
16840Sstevel@tonic-gate }
16850Sstevel@tonic-gate if (clnt_mnsetmaster(nd->nd_nodename, sp,
16860Sstevel@tonic-gate "", MD_MN_INVALID_NID, ep)) {
16870Sstevel@tonic-gate goto rollback;
16880Sstevel@tonic-gate }
16890Sstevel@tonic-gate nd = nd->nd_next;
16900Sstevel@tonic-gate }
16910Sstevel@tonic-gate } else {
16920Sstevel@tonic-gate if (halt_set(sp, ep))
16930Sstevel@tonic-gate goto rollback;
16940Sstevel@tonic-gate }
16950Sstevel@tonic-gate
16960Sstevel@tonic-gate RB_TEST(15, "deletedrives", ep)
16970Sstevel@tonic-gate }
16980Sstevel@tonic-gate
16990Sstevel@tonic-gate RB_TEST(16, "deletedrives", ep)
17000Sstevel@tonic-gate
17010Sstevel@tonic-gate out:
17020Sstevel@tonic-gate /*
17030Sstevel@tonic-gate * Notify rpc.mdcommd on all nodes of a nodelist change.
17040Sstevel@tonic-gate * Send reinit command to mdcommd which forces it to get
17050Sstevel@tonic-gate * fresh set description.
17060Sstevel@tonic-gate */
17070Sstevel@tonic-gate if (suspendall_flag) {
17080Sstevel@tonic-gate /* Send reinit */
17090Sstevel@tonic-gate nd = sd->sd_nodelist;
17100Sstevel@tonic-gate /* All nodes are guaranteed to be ALIVE */
17110Sstevel@tonic-gate while (nd) {
17120Sstevel@tonic-gate /* Class is ignored for REINIT */
17130Sstevel@tonic-gate if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_REINIT,
17140Sstevel@tonic-gate sp, NULL, MD_MSCF_NO_FLAGS, &xep)) {
17150Sstevel@tonic-gate if (rval == 0)
17160Sstevel@tonic-gate (void) mdstealerror(ep, &xep);
17170Sstevel@tonic-gate rval = -1;
17180Sstevel@tonic-gate mde_perror(ep, dgettext(TEXT_DOMAIN,
17190Sstevel@tonic-gate "Unable to reinit rpc.mdcommd.\n"));
17200Sstevel@tonic-gate }
17210Sstevel@tonic-gate nd = nd->nd_next;
17220Sstevel@tonic-gate }
17230Sstevel@tonic-gate }
17240Sstevel@tonic-gate
17250Sstevel@tonic-gate /*
17260Sstevel@tonic-gate * Just resume all classes so that resume is the same whether
17270Sstevel@tonic-gate * just one class was locked or all classes were locked.
17280Sstevel@tonic-gate */
17290Sstevel@tonic-gate if ((suspend1_flag) || (suspendall_flag)) {
17300Sstevel@tonic-gate /* Send resume */
17310Sstevel@tonic-gate nd = sd->sd_nodelist;
17320Sstevel@tonic-gate /* All nodes are guaranteed to be ALIVE */
17330Sstevel@tonic-gate while (nd) {
17340Sstevel@tonic-gate if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
17350Sstevel@tonic-gate sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) {
17360Sstevel@tonic-gate if (rval == 0)
17370Sstevel@tonic-gate (void) mdstealerror(ep, &xep);
17380Sstevel@tonic-gate rval = -1;
17390Sstevel@tonic-gate mde_perror(ep, dgettext(TEXT_DOMAIN,
17400Sstevel@tonic-gate "Unable to resume rpc.mdcommd.\n"));
17410Sstevel@tonic-gate }
17420Sstevel@tonic-gate nd = nd->nd_next;
17430Sstevel@tonic-gate }
17440Sstevel@tonic-gate meta_ping_mnset(sp->setno);
17450Sstevel@tonic-gate }
17460Sstevel@tonic-gate if (lock_flag) {
17470Sstevel@tonic-gate cl_sk = cl_get_setkey(sp->setno, sp->setname);
17480Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) {
17490Sstevel@tonic-gate nd = sd->sd_nodelist;
17500Sstevel@tonic-gate /* All nodes are guaranteed to be ALIVE */
17510Sstevel@tonic-gate while (nd) {
17520Sstevel@tonic-gate if (clnt_unlock_set(nd->nd_nodename,
17530Sstevel@tonic-gate cl_sk, &xep)) {
17540Sstevel@tonic-gate if (rval == 0)
17550Sstevel@tonic-gate (void) mdstealerror(ep, &xep);
17560Sstevel@tonic-gate rval = -1;
17570Sstevel@tonic-gate }
17580Sstevel@tonic-gate nd = nd->nd_next;
17590Sstevel@tonic-gate }
17600Sstevel@tonic-gate } else {
17610Sstevel@tonic-gate for (i = 0; i < MD_MAXSIDES; i++) {
17620Sstevel@tonic-gate /* Skip empty slots */
17630Sstevel@tonic-gate if (sd->sd_nodes[i][0] == '\0')
17640Sstevel@tonic-gate continue;
17650Sstevel@tonic-gate
17660Sstevel@tonic-gate if (clnt_unlock_set(sd->sd_nodes[i],
17670Sstevel@tonic-gate cl_sk, &xep)) {
17680Sstevel@tonic-gate if (rval == 0)
17690Sstevel@tonic-gate (void) mdstealerror(ep, &xep);
17700Sstevel@tonic-gate rval = -1;
17710Sstevel@tonic-gate }
17720Sstevel@tonic-gate }
17730Sstevel@tonic-gate }
17740Sstevel@tonic-gate cl_set_setkey(NULL);
17750Sstevel@tonic-gate }
17760Sstevel@tonic-gate
17770Sstevel@tonic-gate metafreedrivedesc(&dd);
17780Sstevel@tonic-gate
17790Sstevel@tonic-gate if (flush_set_onerr) {
17800Sstevel@tonic-gate metaflushsetname(sp);
17810Sstevel@tonic-gate if (!(MD_MNSET_DESC(sd))) {
17820Sstevel@tonic-gate md_rb_sig_handling_off(md_got_sig(), md_which_sig());
17830Sstevel@tonic-gate }
17840Sstevel@tonic-gate }
17850Sstevel@tonic-gate
17860Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) {
17870Sstevel@tonic-gate /* release signals back to what they were on entry */
17880Sstevel@tonic-gate if (procsigs(FALSE, &oldsigs, &xep) < 0)
17890Sstevel@tonic-gate mdclrerror(&xep);
17900Sstevel@tonic-gate }
17910Sstevel@tonic-gate
17920Sstevel@tonic-gate return (rval);
17930Sstevel@tonic-gate
17940Sstevel@tonic-gate rollback:
17950Sstevel@tonic-gate /* all signals already blocked for MN disket */
17960Sstevel@tonic-gate if (!(MD_MNSET_DESC(sd))) {
17970Sstevel@tonic-gate /* Make sure we are blocking all signals */
17980Sstevel@tonic-gate if (procsigs(TRUE, &oldsigs, &xep) < 0)
17990Sstevel@tonic-gate mdclrerror(&xep);
18000Sstevel@tonic-gate }
18010Sstevel@tonic-gate
18020Sstevel@tonic-gate rval = -1;
18030Sstevel@tonic-gate
18040Sstevel@tonic-gate max_genid = sd->sd_genid;
18050Sstevel@tonic-gate
18060Sstevel@tonic-gate /* Set the master on all nodes first thing */
18070Sstevel@tonic-gate if (rb_level > 5) {
18080Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) {
18090Sstevel@tonic-gate nd = sd->sd_nodelist;
18100Sstevel@tonic-gate /* All nodes are guaranteed to be ALIVE */
18110Sstevel@tonic-gate while (nd) {
18120Sstevel@tonic-gate if (!(nd->nd_flags & MD_MN_NODE_RB_JOIN)) {
18130Sstevel@tonic-gate continue;
18140Sstevel@tonic-gate }
18150Sstevel@tonic-gate /*
18160Sstevel@tonic-gate * Set master on all re-joining nodes to be
18170Sstevel@tonic-gate * my cached view of master.
18180Sstevel@tonic-gate */
18190Sstevel@tonic-gate if (clnt_mnsetmaster(nd->nd_nodename, sp,
18200Sstevel@tonic-gate sd->sd_mn_master_nodenm,
18210Sstevel@tonic-gate sd->sd_mn_master_nodeid, &xep)) {
18220Sstevel@tonic-gate mdclrerror(&xep);
18230Sstevel@tonic-gate }
18240Sstevel@tonic-gate }
18250Sstevel@tonic-gate }
18260Sstevel@tonic-gate }
18270Sstevel@tonic-gate
18280Sstevel@tonic-gate /* level 3 */
18290Sstevel@tonic-gate if (rb_level > 2) {
18300Sstevel@tonic-gate md_set_record *sr;
18310Sstevel@tonic-gate md_mnset_record *mnsr;
18320Sstevel@tonic-gate md_drive_record *dr;
18330Sstevel@tonic-gate int sr_drive_cnt;
18340Sstevel@tonic-gate
18350Sstevel@tonic-gate /*
18360Sstevel@tonic-gate * See if we have to re-add the drives specified.
18370Sstevel@tonic-gate */
18380Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) {
18390Sstevel@tonic-gate nd = sd->sd_nodelist;
18400Sstevel@tonic-gate /* All nodes are guaranteed to be ALIVE */
18410Sstevel@tonic-gate while (nd) {
18420Sstevel@tonic-gate /*
18430Sstevel@tonic-gate * Must get current set record from each
18440Sstevel@tonic-gate * node to see what else must be done
18450Sstevel@tonic-gate * to recover.
18460Sstevel@tonic-gate * Record should be for a multi-node diskset.
18470Sstevel@tonic-gate */
18480Sstevel@tonic-gate if (clnt_mngetset(nd->nd_nodename, sp->setname,
18490Sstevel@tonic-gate MD_SET_BAD, &mnsr, &xep) == -1) {
18500Sstevel@tonic-gate mdclrerror(&xep);
18510Sstevel@tonic-gate nd = nd->nd_next;
18520Sstevel@tonic-gate continue;
18530Sstevel@tonic-gate }
18540Sstevel@tonic-gate
18550Sstevel@tonic-gate /*
18560Sstevel@tonic-gate * If all drives are already there, skip
18570Sstevel@tonic-gate * to next node.
18580Sstevel@tonic-gate */
18590Sstevel@tonic-gate sr_drive_cnt = 0;
18600Sstevel@tonic-gate dr = mnsr->sr_drivechain;
18610Sstevel@tonic-gate while (dr) {
18620Sstevel@tonic-gate sr_drive_cnt++;
18630Sstevel@tonic-gate dr = dr->dr_next;
18640Sstevel@tonic-gate }
18650Sstevel@tonic-gate if (sr_drive_cnt == current_drv_cnt) {
18660Sstevel@tonic-gate free_sr((md_set_record *)mnsr);
18670Sstevel@tonic-gate nd = nd->nd_next;
18680Sstevel@tonic-gate continue;
18690Sstevel@tonic-gate }
18700Sstevel@tonic-gate
18710Sstevel@tonic-gate /* Readd all drives */
18720Sstevel@tonic-gate if (clnt_adddrvs(nd->nd_nodename, sp, dd,
18730Sstevel@tonic-gate mnsr->sr_ctime, mnsr->sr_genid, &xep) == -1)
18740Sstevel@tonic-gate mdclrerror(&xep);
18750Sstevel@tonic-gate
18760Sstevel@tonic-gate free_sr((struct md_set_record *)mnsr);
18770Sstevel@tonic-gate nd = nd->nd_next;
18780Sstevel@tonic-gate }
18790Sstevel@tonic-gate } else {
18800Sstevel@tonic-gate for (i = 0; i < MD_MAXSIDES; i++) {
18810Sstevel@tonic-gate /* Skip empty slots */
18820Sstevel@tonic-gate if (sd->sd_nodes[i][0] == '\0')
18830Sstevel@tonic-gate continue;
18840Sstevel@tonic-gate
18850Sstevel@tonic-gate /* Record should be for a non-multi-node set */
18860Sstevel@tonic-gate if (clnt_getset(sd->sd_nodes[i], sp->setname,
18870Sstevel@tonic-gate MD_SET_BAD, &sr, &xep) == -1) {
18880Sstevel@tonic-gate mdclrerror(&xep);
18890Sstevel@tonic-gate continue;
18900Sstevel@tonic-gate }
18910Sstevel@tonic-gate
18920Sstevel@tonic-gate /*
18930Sstevel@tonic-gate * Set record structure was allocated from RPC
18940Sstevel@tonic-gate * routine getset so this structure is only of
18950Sstevel@tonic-gate * size md_set_record even if the MN flag is
18960Sstevel@tonic-gate * set. So, clear the flag so that the free
18970Sstevel@tonic-gate * code doesn't attempt to free a structure
18980Sstevel@tonic-gate * the size of md_mnset_record.
18990Sstevel@tonic-gate */
19000Sstevel@tonic-gate if (MD_MNSET_REC(sr)) {
19010Sstevel@tonic-gate sr->sr_flags &= ~MD_SR_MN;
19020Sstevel@tonic-gate free_sr(sr);
19030Sstevel@tonic-gate continue;
19040Sstevel@tonic-gate }
19050Sstevel@tonic-gate
19060Sstevel@tonic-gate /* Drive already added, skip to next node */
19070Sstevel@tonic-gate if (sr->sr_drivechain != NULL) {
19080Sstevel@tonic-gate free_sr(sr);
19090Sstevel@tonic-gate continue;
19100Sstevel@tonic-gate }
19110Sstevel@tonic-gate
19120Sstevel@tonic-gate if (clnt_adddrvs(sd->sd_nodes[i], sp, dd,
19130Sstevel@tonic-gate sr->sr_ctime, sr->sr_genid, &xep) == -1)
19140Sstevel@tonic-gate mdclrerror(&xep);
19150Sstevel@tonic-gate
19160Sstevel@tonic-gate free_sr(sr);
19170Sstevel@tonic-gate }
19180Sstevel@tonic-gate }
19190Sstevel@tonic-gate max_genid += 2;
19200Sstevel@tonic-gate }
19210Sstevel@tonic-gate
19220Sstevel@tonic-gate /*
19230Sstevel@tonic-gate * Notify rpc.mdcommd on all nodes of a nodelist change.
19240Sstevel@tonic-gate * At this point in time, don't know which nodes are joined
19250Sstevel@tonic-gate * to the set. So, send a reinit command to mdcommd
19260Sstevel@tonic-gate * which forces it to get fresh set description. Then send resume.
19270Sstevel@tonic-gate *
19280Sstevel@tonic-gate * Later, this code will use rpc.mdcommd messages to reattach disks
19290Sstevel@tonic-gate * and then rpc.mdcommd may be suspended again, rest of the nodes
19300Sstevel@tonic-gate * joined, rpc.mdcommd reinited and then resumed.
19310Sstevel@tonic-gate */
19320Sstevel@tonic-gate if (suspendall_flag) {
19330Sstevel@tonic-gate /* Send reinit */
19340Sstevel@tonic-gate nd = sd->sd_nodelist;
19350Sstevel@tonic-gate /* All nodes are guaranteed to be ALIVE */
19360Sstevel@tonic-gate while (nd) {
19370Sstevel@tonic-gate /* Class is ignored for REINIT */
19380Sstevel@tonic-gate if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_REINIT,
19390Sstevel@tonic-gate sp, NULL, MD_MSCF_NO_FLAGS, &xep)) {
19400Sstevel@tonic-gate mde_perror(&xep, dgettext(TEXT_DOMAIN,
19410Sstevel@tonic-gate "Unable to reinit rpc.mdcommd.\n"));
19420Sstevel@tonic-gate mdclrerror(&xep);
19430Sstevel@tonic-gate }
19440Sstevel@tonic-gate nd = nd->nd_next;
19450Sstevel@tonic-gate }
19460Sstevel@tonic-gate
19470Sstevel@tonic-gate /* Send resume */
19480Sstevel@tonic-gate nd = sd->sd_nodelist;
19490Sstevel@tonic-gate /* All nodes are guaranteed to be ALIVE */
19500Sstevel@tonic-gate while (nd) {
19510Sstevel@tonic-gate /*
19520Sstevel@tonic-gate * Resume all classes but class 1 so that lock is held
19530Sstevel@tonic-gate * against meta* commands.
19540Sstevel@tonic-gate * To later resume class1, must issue a class0 resume.
19550Sstevel@tonic-gate */
19560Sstevel@tonic-gate if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
19570Sstevel@tonic-gate sp, MD_MSG_CLASS0,
19580Sstevel@tonic-gate MD_MSCF_DONT_RESUME_CLASS1, &xep)) {
19590Sstevel@tonic-gate mde_perror(&xep, dgettext(TEXT_DOMAIN,
19600Sstevel@tonic-gate "Unable to resume rpc.mdcommd.\n"));
19610Sstevel@tonic-gate mdclrerror(&xep);
19620Sstevel@tonic-gate }
19630Sstevel@tonic-gate nd = nd->nd_next;
19640Sstevel@tonic-gate }
19650Sstevel@tonic-gate meta_ping_mnset(sp->setno);
19660Sstevel@tonic-gate }
19670Sstevel@tonic-gate
19680Sstevel@tonic-gate /* level 2 */
19690Sstevel@tonic-gate if (rb_level > 1) {
19700Sstevel@tonic-gate mdnamelist_t *nlp;
19710Sstevel@tonic-gate mdname_t *np;
19720Sstevel@tonic-gate
19730Sstevel@tonic-gate for (ddp = dd; ddp != NULL; ddp = ddp->dd_next) {
19740Sstevel@tonic-gate uint_t rep_slice;
19750Sstevel@tonic-gate
19760Sstevel@tonic-gate if ((meta_replicaslice(ddp->dd_dnp,
19770Sstevel@tonic-gate &rep_slice, &xep) != 0) ||
19780Sstevel@tonic-gate ((np = metaslicename(ddp->dd_dnp, rep_slice,
19790Sstevel@tonic-gate &xep)) == NULL)) {
19800Sstevel@tonic-gate mdclrerror(&xep);
19810Sstevel@tonic-gate continue;
19820Sstevel@tonic-gate }
19830Sstevel@tonic-gate nlp = NULL;
19840Sstevel@tonic-gate (void) metanamelist_append(&nlp, np);
19850Sstevel@tonic-gate
19860Sstevel@tonic-gate if (meta_db_attach(sp, nlp,
19870Sstevel@tonic-gate (MDCHK_DRVINSET | MDCHK_SET_LOCKED),
19880Sstevel@tonic-gate &sd->sd_ctime, ddp->dd_dbcnt, ddp->dd_dbsize,
19890Sstevel@tonic-gate NULL, &xep) == -1)
19900Sstevel@tonic-gate mdclrerror(&xep);
19910Sstevel@tonic-gate
19920Sstevel@tonic-gate metafreenamelist(nlp);
19930Sstevel@tonic-gate }
19940Sstevel@tonic-gate /* Re-balance */
19950Sstevel@tonic-gate if (meta_db_balance(sp, NULL, curdd, 0, &xep) == -1)
19960Sstevel@tonic-gate mdclrerror(&xep);
19970Sstevel@tonic-gate }
19980Sstevel@tonic-gate
19990Sstevel@tonic-gate /* level 4 */
20000Sstevel@tonic-gate if (rb_level > 3) {
20010Sstevel@tonic-gate if (!(MD_MNSET_DESC(sd)) && !MD_ATSET_DESC(sd)) {
20020Sstevel@tonic-gate if (tk_own_bydd(sp, dd, &mhiargs, TRUE, &xep))
20030Sstevel@tonic-gate mdclrerror(&xep);
20040Sstevel@tonic-gate }
20050Sstevel@tonic-gate }
20060Sstevel@tonic-gate
20070Sstevel@tonic-gate /* level 5 */
20080Sstevel@tonic-gate if (rb_level > 4) {
20090Sstevel@tonic-gate if (clnt_stimeout(mynode(), sp, &mhiargs, &xep) == -1)
20100Sstevel@tonic-gate mdclrerror(&xep);
20110Sstevel@tonic-gate }
20120Sstevel@tonic-gate
20130Sstevel@tonic-gate /*
20140Sstevel@tonic-gate * If at least one node needs to be rejoined to MN diskset,
20150Sstevel@tonic-gate * then suspend commd again.
20160Sstevel@tonic-gate */
20170Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) {
20180Sstevel@tonic-gate nd = sd->sd_nodelist;
20190Sstevel@tonic-gate /* All nodes are guaranteed to be ALIVE */
20200Sstevel@tonic-gate while (nd) {
20210Sstevel@tonic-gate if (!(nd->nd_flags & MD_MN_NODE_RB_JOIN)) {
20220Sstevel@tonic-gate nd = nd->nd_next;
20230Sstevel@tonic-gate continue;
20240Sstevel@tonic-gate }
20250Sstevel@tonic-gate break;
20260Sstevel@tonic-gate }
20270Sstevel@tonic-gate if (nd) {
20280Sstevel@tonic-gate /*
20290Sstevel@tonic-gate * Found node that will be rejoined so
20300Sstevel@tonic-gate * notify rpc.mdcommd on all nodes of a nodelist change.
20310Sstevel@tonic-gate * Start by suspending rpc.mdcommd (which drains it of
20320Sstevel@tonic-gate * all messages), then change the nodelist followed by
20330Sstevel@tonic-gate * a reinit and resume.
20340Sstevel@tonic-gate */
20350Sstevel@tonic-gate nd = sd->sd_nodelist;
20360Sstevel@tonic-gate /* All nodes are guaranteed to be ALIVE */
20370Sstevel@tonic-gate while (nd) {
20380Sstevel@tonic-gate if (clnt_mdcommdctl(nd->nd_nodename,
20390Sstevel@tonic-gate COMMDCTL_SUSPEND, sp, MD_MSG_CLASS0,
20400Sstevel@tonic-gate MD_MSCF_NO_FLAGS, &xep)) {
20410Sstevel@tonic-gate mdclrerror(&xep);
20420Sstevel@tonic-gate }
20430Sstevel@tonic-gate suspendall_flag_rb = 1;
20440Sstevel@tonic-gate nd = nd->nd_next;
20450Sstevel@tonic-gate }
20460Sstevel@tonic-gate }
20470Sstevel@tonic-gate }
20480Sstevel@tonic-gate
20490Sstevel@tonic-gate
20500Sstevel@tonic-gate
20510Sstevel@tonic-gate /* level 6 */
20520Sstevel@tonic-gate if (rb_level > 5) {
20530Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) {
20540Sstevel@tonic-gate int join_flags = 0;
20550Sstevel@tonic-gate
20560Sstevel@tonic-gate nd = sd->sd_nodelist;
20570Sstevel@tonic-gate /* All nodes are guaranteed to be ALIVE */
20580Sstevel@tonic-gate while (nd) {
20590Sstevel@tonic-gate /* Only rejoin nodes that were joined before */
20600Sstevel@tonic-gate if (!(nd->nd_flags & MD_MN_NODE_RB_JOIN)) {
20610Sstevel@tonic-gate nd = nd->nd_next;
20620Sstevel@tonic-gate continue;
20630Sstevel@tonic-gate }
20640Sstevel@tonic-gate /*
20650Sstevel@tonic-gate * Rejoin nodes to same state as before -
20660Sstevel@tonic-gate * either STALE or non-STALE.
20670Sstevel@tonic-gate */
20680Sstevel@tonic-gate if (stale_bool == TRUE)
20690Sstevel@tonic-gate join_flags = MNSET_IS_STALE;
20700Sstevel@tonic-gate if (clnt_joinset(nd->nd_nodename, sp,
20710Sstevel@tonic-gate join_flags, &xep))
20720Sstevel@tonic-gate mdclrerror(&xep);
20730Sstevel@tonic-gate /* Sets OWN flag on all nodes in list */
20740Sstevel@tonic-gate if (clnt_upd_nr_flags(nd->nd_nodename, sp,
20750Sstevel@tonic-gate sd->sd_nodelist, MD_NR_JOIN, NULL, &xep)) {
20760Sstevel@tonic-gate mdclrerror(&xep);
20770Sstevel@tonic-gate }
20780Sstevel@tonic-gate nd = nd->nd_next;
20790Sstevel@tonic-gate }
20800Sstevel@tonic-gate } else {
20810Sstevel@tonic-gate if (setup_db_bydd(sp, dd, TRUE, &xep) == -1)
20820Sstevel@tonic-gate mdclrerror(&xep);
20830Sstevel@tonic-gate
20840Sstevel@tonic-gate /* No special flag for traditional diskset */
20850Sstevel@tonic-gate if (snarf_set(sp, NULL, &xep))
20860Sstevel@tonic-gate mdclrerror(&xep);
20870Sstevel@tonic-gate }
20880Sstevel@tonic-gate }
20890Sstevel@tonic-gate
20900Sstevel@tonic-gate /* level 1 */
20910Sstevel@tonic-gate if (rb_level > 0) {
20920Sstevel@tonic-gate /*
20930Sstevel@tonic-gate * Mark the drives as OK.
20940Sstevel@tonic-gate */
20950Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) {
20960Sstevel@tonic-gate nd = sd->sd_nodelist;
20970Sstevel@tonic-gate /* All nodes are guaranteed to be ALIVE */
20980Sstevel@tonic-gate while (nd) {
20990Sstevel@tonic-gate /*
21000Sstevel@tonic-gate * Must be last action before unlock.
21010Sstevel@tonic-gate * In case of panic, recovery code checks
21020Sstevel@tonic-gate * for MD_DR_OK to know that drive
21030Sstevel@tonic-gate * and possible master are fully added back.
21040Sstevel@tonic-gate */
21050Sstevel@tonic-gate if (clnt_upd_dr_flags(nd->nd_nodename, sp, dd,
21060Sstevel@tonic-gate MD_DR_OK, &xep) == -1)
21070Sstevel@tonic-gate mdclrerror(&xep);
21080Sstevel@tonic-gate nd = nd->nd_next;
21090Sstevel@tonic-gate }
21100Sstevel@tonic-gate } else {
21110Sstevel@tonic-gate for (i = 0; i < MD_MAXSIDES; i++) {
21120Sstevel@tonic-gate /* Skip empty slots */
21130Sstevel@tonic-gate if (sd->sd_nodes[i][0] == '\0')
21140Sstevel@tonic-gate continue;
21150Sstevel@tonic-gate
21160Sstevel@tonic-gate if (clnt_upd_dr_flags(sd->sd_nodes[i], sp, dd,
21170Sstevel@tonic-gate MD_DR_OK, &xep) == -1)
21180Sstevel@tonic-gate mdclrerror(&xep);
21190Sstevel@tonic-gate
21200Sstevel@tonic-gate }
21210Sstevel@tonic-gate }
21220Sstevel@tonic-gate max_genid += 2;
21230Sstevel@tonic-gate resync_genid(sp, sd, max_genid, 0, NULL);
21240Sstevel@tonic-gate }
21250Sstevel@tonic-gate /*
21260Sstevel@tonic-gate * Notify rpc.mdcommd on all nodes of a nodelist change.
21270Sstevel@tonic-gate * Send a reinit command to mdcommd which forces it to get
21280Sstevel@tonic-gate * fresh set description.
21290Sstevel@tonic-gate */
21300Sstevel@tonic-gate if (suspendall_flag_rb) {
21310Sstevel@tonic-gate /* Send reinit */
21320Sstevel@tonic-gate nd = sd->sd_nodelist;
21330Sstevel@tonic-gate /* All nodes are guaranteed to be ALIVE */
21340Sstevel@tonic-gate while (nd) {
21350Sstevel@tonic-gate /* Class is ignored for REINIT */
21360Sstevel@tonic-gate if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_REINIT,
21370Sstevel@tonic-gate sp, NULL, MD_MSCF_NO_FLAGS, &xep)) {
21380Sstevel@tonic-gate mde_perror(&xep, dgettext(TEXT_DOMAIN,
21390Sstevel@tonic-gate "Unable to reinit rpc.mdcommd.\n"));
21400Sstevel@tonic-gate mdclrerror(&xep);
21410Sstevel@tonic-gate }
21420Sstevel@tonic-gate nd = nd->nd_next;
21430Sstevel@tonic-gate }
21440Sstevel@tonic-gate }
21450Sstevel@tonic-gate
21460Sstevel@tonic-gate /*
21470Sstevel@tonic-gate * Just resume all classes so that resume is the same whether
21480Sstevel@tonic-gate * just one class was locked or all classes were locked.
21490Sstevel@tonic-gate */
21500Sstevel@tonic-gate if ((suspend1_flag) || (suspendall_flag_rb) || (suspendall_flag)) {
21510Sstevel@tonic-gate /* Send resume */
21520Sstevel@tonic-gate nd = sd->sd_nodelist;
21530Sstevel@tonic-gate /* All nodes are guaranteed to be ALIVE */
21540Sstevel@tonic-gate while (nd) {
21550Sstevel@tonic-gate if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME,
21560Sstevel@tonic-gate sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) {
21570Sstevel@tonic-gate mde_perror(&xep, dgettext(TEXT_DOMAIN,
21580Sstevel@tonic-gate "Unable to resume rpc.mdcommd.\n"));
21590Sstevel@tonic-gate mdclrerror(&xep);
21600Sstevel@tonic-gate }
21610Sstevel@tonic-gate nd = nd->nd_next;
21620Sstevel@tonic-gate }
21630Sstevel@tonic-gate meta_ping_mnset(sp->setno);
21640Sstevel@tonic-gate }
21650Sstevel@tonic-gate
21660Sstevel@tonic-gate
21670Sstevel@tonic-gate /* level 0 */
21680Sstevel@tonic-gate cl_sk = cl_get_setkey(sp->setno, sp->setname);
21690Sstevel@tonic-gate /* Don't test lock flag since guaranteed to be set if in rollback */
21700Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) {
21710Sstevel@tonic-gate nd = sd->sd_nodelist;
21720Sstevel@tonic-gate /* All nodes are guaranteed to be ALIVE */
21730Sstevel@tonic-gate while (nd) {
21740Sstevel@tonic-gate if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep))
21750Sstevel@tonic-gate mdclrerror(&xep);
21760Sstevel@tonic-gate nd = nd->nd_next;
21770Sstevel@tonic-gate }
21780Sstevel@tonic-gate } else {
21790Sstevel@tonic-gate for (i = 0; i < MD_MAXSIDES; i++) {
21800Sstevel@tonic-gate /* Skip empty slots */
21810Sstevel@tonic-gate if (sd->sd_nodes[i][0] == '\0')
21820Sstevel@tonic-gate continue;
21830Sstevel@tonic-gate
21840Sstevel@tonic-gate if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep))
21850Sstevel@tonic-gate mdclrerror(&xep);
21860Sstevel@tonic-gate }
21870Sstevel@tonic-gate }
21880Sstevel@tonic-gate cl_set_setkey(NULL);
21890Sstevel@tonic-gate
21900Sstevel@tonic-gate /* release signals back to what they were on entry */
21910Sstevel@tonic-gate if (procsigs(FALSE, &oldsigs, &xep) < 0)
21920Sstevel@tonic-gate mdclrerror(&xep);
21930Sstevel@tonic-gate
21940Sstevel@tonic-gate metafreedrivedesc(&dd);
21950Sstevel@tonic-gate
21960Sstevel@tonic-gate if (flush_set_onerr) {
21970Sstevel@tonic-gate metaflushsetname(sp);
21980Sstevel@tonic-gate if (!(MD_MNSET_DESC(sd))) {
21990Sstevel@tonic-gate md_rb_sig_handling_off(md_got_sig(), md_which_sig());
22000Sstevel@tonic-gate }
22010Sstevel@tonic-gate }
22020Sstevel@tonic-gate
22030Sstevel@tonic-gate return (rval);
22040Sstevel@tonic-gate }
2205