/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License, Version 1.0 only * (the "License"). You may not use this file except in compliance * with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2004 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" /* * Metadevice diskset interfaces */ #include "meta_set_prv.h" #include #include #include #include #define MALSIZ 32 static int add_lst(char ***listp, char *item) { int i, j; if (*listp) { for (i = 0; (*listp)[i]; i++) /* void */; } else { *listp = (char **)Zalloc(MALSIZ * sizeof (char *)); i = 0; } (*listp)[i] = Strdup(item); if ((++i % MALSIZ) == 0) { *listp = (char **)Realloc((void *)*listp, (i + MALSIZ) * sizeof (char *)); for (j = i; j < (i + MALSIZ); j++) (*listp)[j] = (char *)NULL; } return (i); } static int del_lst(char ***listp) { int i; if (*listp) { for (i = 0; (*listp)[i]; i++) free((*listp)[i]); free(*listp); *listp = NULL; return (1); } else return (0); } static int validate_med_nodes( mdsetname_t *sp, md_h_arr_t *mhp, md_error_t *ep ) { char *hostname; char *nodename; char *nm; char *cp; int i, j; for (i = 0; i < MED_MAX_HOSTS; i++) { if (mhp->n_lst[i].a_cnt == 0) continue; for (j = 0; j < mhp->n_lst[i].a_cnt; j++) { nm = mhp->n_lst[i].a_nm[j]; for (cp = nm; *cp; cp++) if (!isprint(*cp) || strchr(INVALID_IN_NAMES, *cp) != NULL) return (mddserror(ep, MDE_DS_INVALIDMEDNAME, sp->setno, nm, NULL, sp->setname)); if (clnt_med_hostname(nm, &hostname, ep)) return (-1); if (j == 0) { if (strcmp(nm, hostname) != 0) { Free(hostname); return (mddserror(ep, MDE_DS_NOTNODENAME, sp->setno, nm, NULL, sp->setname)); } nodename = nm; } else { if (strcmp(nodename, hostname) != 0) { Free(hostname); return (mddserror(ep, MDE_DS_ALIASNOMATCH, sp->setno, nm, nodename, sp->setname)); } } Free(hostname); } } return (0); } /* * Exported Entry Points */ int meta_set_addmeds( mdsetname_t *sp, int node_c, char **node_v, md_error_t *ep ) { md_set_desc *sd = NULL; md_drive_desc *dd = NULL; mddb_med_parm_t mp; mddb_med_upd_parm_t mup; md_h_arr_t t; md_h_arr_t rb_t; med_rec_t medr; med_rec_t rb_medr; char *cp; char **n_l = NULL; int n_c = 0; int i, j; sigset_t oldsigs; md_setkey_t *cl_sk; int rb_level = 0; md_error_t xep = mdnullerror; int rval = 0; int max_meds; md_mnnode_desc *nd; int suspend1_flag = 0; int lock_flag = 0; /* Initialize */ (void) memset(&t, '\0', sizeof (t)); t.n_cnt = node_c; mdclrerror(ep); if ((sd = metaget_setdesc(sp, ep)) == NULL) return (-1); /* Make sure we own the set */ if (meta_check_ownership(sp, ep) != 0) return (-1); if ((max_meds = get_max_meds(ep)) == 0) return (-1); /* * The mediator information (which is part of the set record) is * stored in the local mddbs of each node in the diskset. * Each node's rpc.metad daemon reads in the set * records from that node's local mddb and caches them * internally. Any process needing diskset information contacts its * local rpc.metad to get this information. Since each node in the * diskset is independently reading the set information from its local * mddb, the set records in the local mddbs must stay * in-sync, so that all nodes have a consistent view of the diskset. * * For a multinode diskset, explicitly verify that all nodes in the * diskset are ALIVE (i.e. are in the API membership list). Otherwise, * fail this operation since all nodes must be ALIVE in order to add * the mediator information to the set record in their local mddb. * If a panic of this node leaves the local mddbs set records * out-of-sync, the reconfig cycle will fix the local mddbs and * force them back into synchronization. */ if (MD_MNSET_DESC(sd)) { nd = sd->sd_nodelist; while (nd) { if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) { (void) mddserror(ep, MDE_DS_NOTINMEMBERLIST, sp->setno, nd->nd_nodename, NULL, sp->setname); return (-1); } nd = nd->nd_next; } } /* Parse the command line into a the md_h_arr_t structure */ for (i = 0; i < t.n_cnt; i++) { cp = strtok(node_v[i], ","); j = 0; while (cp) { if (strlen(cp) > (size_t)MD_MAX_NODENAME) return (mddserror(ep, MDE_DS_NODENAMETOOLONG, sp->setno, cp, NULL, sp->setname)); if (j >= MAX_HOST_ADDRS) return (mddserror(ep, MDE_DS_TOOMANYALIAS, sp->setno, cp, NULL, sp->setname)); (void) strcpy(t.n_lst[i].a_nm[j], cp); j++; cp = strtok(NULL, ","); } t.n_lst[i].a_cnt = j; } /* Make a list of nodes to check */ for (i = 0; i < t.n_cnt; i++) for (j = 0; j < t.n_lst[i].a_cnt; j++) n_c = add_lst(&n_l, t.n_lst[i].a_nm[j]); /* Make sure that there are no redundant nodes */ rval = nodesuniq(sp, n_c, n_l, ep); (void) del_lst(&n_l); if (rval != 0) return (rval); /* * Lock the set on current set members. * Set locking done much earlier for MN diskset than for traditional * diskset since lock_set and SUSPEND are used to protect against * other metaset commands running on the other nodes. */ if (MD_MNSET_DESC(sd)) { /* Make sure we are blocking all signals */ if (procsigs(TRUE, &oldsigs, &xep) < 0) mdclrerror(&xep); nd = sd->sd_nodelist; /* All nodes are guaranteed to be ALIVE */ while (nd) { if (clnt_lock_set(nd->nd_nodename, sp, ep)) { rval = -1; goto out; } lock_flag = 1; nd = nd->nd_next; } /* * Lock out other meta* commands by suspending * class 1 messages across the diskset. */ nd = sd->sd_nodelist; /* All nodes are guaranteed to be ALIVE */ while (nd) { if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_SUSPEND, sp, MD_MSG_CLASS1, MD_MSCF_NO_FLAGS, ep)) { rval = -1; goto out; } suspend1_flag = 1; nd = nd->nd_next; } } if (validate_med_nodes(sp, &t, ep)) { rval = -1; goto out; } /* Check existing mediators against new, if any */ if (sd->sd_med.n_cnt > 0) { for (i = 0; i < max_meds; i++) if (sd->sd_med.n_lst[i].a_cnt > 0) n_c = add_lst(&n_l, sd->sd_med.n_lst[i].a_nm[0]); for (i = 0; i < t.n_cnt; i++) { if (strinlst(t.n_lst[i].a_nm[0], n_c, n_l)) { (void) del_lst(&n_l); (void) mddserror(ep, MDE_DS_ISMED, sp->setno, t.n_lst[i].a_nm[0], NULL, sp->setname); rval = -1; goto out; } } (void) del_lst(&n_l); } if ((t.n_cnt + sd->sd_med.n_cnt) > max_meds) { (void) mderror(ep, MDE_TOOMANYMED, NULL); rval = -1; goto out; } /* Copy the current mediator list for rollback */ rb_t = sd->sd_med; /* structure assignment */ /* Setup the mediator record roll-back structure */ (void) memset(&rb_medr, '\0', sizeof (med_rec_t)); rb_medr.med_rec_mag = MED_REC_MAGIC; rb_medr.med_rec_rev = MED_REC_REV; rb_medr.med_rec_fl = 0; rb_medr.med_rec_sn = sp->setno; (void) strcpy(rb_medr.med_rec_snm, sp->setname); if (MD_MNSET_DESC(sd)) { /* * For a MN diskset the mediator is not given a list of * hosts in the set. Instead a generic name (multiowner) is * given to the mediator which will allow any node to access * the mediator data as long as it provides the correct * setname and set number. In a MN diskset, the mediator * data is only used when a first node joins the diskset * and becomes the master of the MN diskset. * * The traditional diskset code keeps the host list in * the mediator record up to date with respect to the host * list in the traditional diskset. This keeps an unauthorized * node in the traditional diskset from accessing the data * in the mediator record and being able to 'take' the * diskset. * * This additional check is needed in the traditional diskset * since a panic during the metaset command can leave * the diskset with some nodes thinking that an * action has occurred and other nodes thinking the opposite. * A node may have really been removed from a diskset, but * that node doesn't realize this so this node must be * blocked from using the mediator data when attempting * to 'take' the diskset. * (Traditional diskset code has each node's rpc.metad * cleaning up from an inconsistent state without any * knowledge from the other nodes in the diskset). * * In the MN diskset, the reconfig steps force a consistent * state across all nodes in the diskset, so no node * needs to be blocked from accessing the mediator data. * This allow the MN diskset to use a common 'nodename' * in the mediator record. This allows the mediator * daemon to remain unchanged even though a large number of * nodes are supported by the MN diskset. */ (void) strlcpy(rb_medr.med_rec_nodes[0], MED_MN_CALLER, MD_MAX_NODENAME_PLUS_1); } else { for (i = 0; i < MD_MAXSIDES; i++) (void) strcpy(rb_medr.med_rec_nodes[i], sd->sd_nodes[i]); } rb_medr.med_rec_meds = sd->sd_med; /* structure assigment */ (void) memset(&rb_medr.med_rec_data, '\0', sizeof (med_data_t)); rb_medr.med_rec_foff = 0; crcgen(&rb_medr, &rb_medr.med_rec_cks, sizeof (med_rec_t), NULL); /* Merge new mediators into the set record */ for (i = 0; i < t.n_cnt; i++) { for (j = 0; j < max_meds; j++) { if (sd->sd_med.n_lst[j].a_cnt > 0) continue; sd->sd_med.n_lst[j] = t.n_lst[i]; SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_ADD, SVM_TAG_MEDIATOR, sp->setno, j); sd->sd_med.n_cnt++; break; } } /* * Setup the kernel mediator list, which also validates that the * hosts have valid IP addresses */ (void) memset(&mp, '\0', sizeof (mddb_med_parm_t)); mp.med_setno = sp->setno; /* Copy the hostnames */ if (meta_h2hi(&sd->sd_med, &mp.med, ep)) { rval = -1; goto out; } /* Resolve the IP addresses for the host list */ if (meta_med_hnm2ip(&mp.med, ep)) { rval = -1; goto out; } /* Bring the mediator record up to date with the set record */ medr = rb_medr; /* structure assignment */ medr.med_rec_meds = sd->sd_med; /* structure assigment */ crcgen(&medr, &medr.med_rec_cks, sizeof (med_rec_t), NULL); /* END CHECK CODE */ /* Lock the set on current set members */ if (!(MD_MNSET_DESC(sd))) { /* all signals already blocked for MN disket */ md_rb_sig_handling_on(); for (i = 0; i < MD_MAXSIDES; i++) { /* Skip empty slots */ if (sd->sd_nodes[i][0] == '\0') continue; if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) { rval = -1; goto out; } lock_flag = 1; } } RB_TEST(1, "meta_set_addmeds", ep) RB_PREEMPT; rb_level = 1; /* level 1 */ RB_TEST(2, "meta_set_addmeds", ep) /* * Add the new mediator information to all hosts in the set. * For MN diskset, each node sends mediator list to its kernel. */ if (MD_MNSET_DESC(sd)) { nd = sd->sd_nodelist; while (nd) { /* All nodes are guaranteed to be ALIVE */ if (clnt_updmeds(nd->nd_nodename, sp, &sd->sd_med, ep)) goto rollback; nd = nd->nd_next; } } else { for (i = 0; i < MD_MAXSIDES; i++) { /* Skip empty slots */ if (sd->sd_nodes[i][0] == '\0') continue; if (clnt_updmeds(sd->sd_nodes[i], sp, &sd->sd_med, ep)) goto rollback; } } RB_TEST(3, "meta_set_addmeds", ep) RB_PREEMPT; rb_level = 2; /* level 2 */ RB_TEST(4, "meta_set_addmeds", ep) if ((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST), ep)) == NULL) { if (! mdisok(ep)) goto rollback; } RB_TEST(5, "meta_set_addmeds", ep) RB_PREEMPT; rb_level = 3; /* level 3 */ RB_TEST(6, "meta_set_addmeds", ep) /* Inform the mediator hosts of the new information */ for (i = 0; i < max_meds; i++) { if (sd->sd_med.n_lst[i].a_cnt == 0) continue; /* medr contains new mediator node list */ if (clnt_med_upd_rec(&sd->sd_med.n_lst[i], sp, &medr, ep)) goto rollback; } RB_TEST(7, "meta_set_addmeds", ep) RB_PREEMPT; rb_level = 4; /* level 4 */ RB_TEST(8, "meta_set_addmeds", ep) /* In MN diskset, mediator list updated in clnt_updmeds call */ if (dd != NULL) { if (!(MD_MNSET_DESC(sd))) { if (metaioctl(MD_MED_SET_LST, &mp, &mp.med_mde, NULL) != 0) { (void) mdstealerror(ep, &mp.med_mde); goto rollback; } } /* * If only 50% mddbs available, mediator will be * golden by this ioctl on a traditional diskset. * * On a MN disket, this only happens if the mediator * add operation is executed on the master node. * If a slave node is adding the mediator, the mediator * won't be marked golden until the next mddb change. */ (void) memset(&mup, '\0', sizeof (mddb_med_upd_parm_t)); mup.med_setno = sp->setno; if (metaioctl(MD_MED_UPD_MED, &mup, &mup.med_mde, NULL) != 0) mdclrerror(&mup.med_mde); } out: if (suspend1_flag) { /* * Unlock diskset by resuming messages across the diskset. * Just resume all classes so that resume is the same whether * just one class was locked or all classes were locked. */ nd = sd->sd_nodelist; /* All nodes are guaranteed to be ALIVE */ while (nd) { if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME, sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) { if (rval == 0) (void) mdstealerror(ep, &xep); rval = -1; mde_perror(ep, dgettext(TEXT_DOMAIN, "Unable to resume rpc.mdcommd.\n")); } nd = nd->nd_next; } meta_ping_mnset(sp->setno); } if (lock_flag) { cl_sk = cl_get_setkey(sp->setno, sp->setname); if (MD_MNSET_DESC(sd)) { nd = sd->sd_nodelist; while (nd) { /* All nodes are guaranteed to be ALIVE */ if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep)) { if (rval == 0) (void) mdstealerror(ep, &xep); rval = -1; } nd = nd->nd_next; } } else { for (i = 0; i < MD_MAXSIDES; i++) { /* Skip empty slots */ if (sd->sd_nodes[i][0] == '\0') continue; if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep)) { if (rval == 0) (void) mdstealerror(ep, &xep); rval = -1; } } } cl_set_setkey(NULL); } metafreedrivedesc(&dd); if (MD_MNSET_DESC(sd)) { /* release signals back to what they were on entry */ if (procsigs(FALSE, &oldsigs, &xep) < 0) mdclrerror(&xep); } else { md_rb_sig_handling_off(md_got_sig(), md_which_sig()); } return (rval); rollback: /* all signals already blocked for MN disket */ if (!(MD_MNSET_DESC(sd))) { if (procsigs(TRUE, &oldsigs, &xep) < 0) mdclrerror(&xep); } rval = -1; /* * level 4 * In MN diskset, mediator list updated in clnt_updmeds call */ if (rb_level > 3 && (dd != NULL) && (!(MD_MNSET_DESC(sd)))) { (void) memset(&mp, '\0', sizeof (mddb_med_parm_t)); mp.med_setno = sp->setno; (void) meta_h2hi(&rb_t, &mp.med, &xep); mdclrerror(&xep); (void) meta_med_hnm2ip(&mp.med, &xep); mdclrerror(&xep); (void) metaioctl(MD_MED_SET_LST, &mp, &mp.med_mde, NULL); } /* level 3 */ if (rb_level > 2) { for (i = 0; i < max_meds; i++) { if (sd->sd_med.n_lst[i].a_cnt == 0) continue; /* * rb_medr contains the rollback mediator node list. * Send the rollback mediator information to the * new mediator node list. If a node had this RPC * called, but its node is not in the mediator node * list, rpc.metamedd will delete the mediator * record on that node. */ if (clnt_med_upd_rec(&sd->sd_med.n_lst[i], sp, &rb_medr, &xep)) mdclrerror(&xep); } } /* level 2 */ if (rb_level > 1) { metafreedrivedesc(&dd); } /* level 1 */ if (rb_level > 0) { /* Delete mediator information from all hosts in the set */ if (MD_MNSET_DESC(sd)) { nd = sd->sd_nodelist; while (nd) { /* All nodes are guaranteed to be ALIVE */ if (clnt_updmeds(nd->nd_nodename, sp, &rb_t, &xep)) mdclrerror(&xep); nd = nd->nd_next; } } else { for (i = 0; i < MD_MAXSIDES; i++) { /* Skip empty slots */ if (sd->sd_nodes[i][0] == '\0') continue; if (clnt_updmeds(sd->sd_nodes[i], sp, &rb_t, &xep)) mdclrerror(&xep); } } } /* level 0 */ if (suspend1_flag) { /* * Unlock diskset by resuming messages across the diskset. * Just resume all classes so that resume is the same whether * just one class was locked or all classes were locked. */ nd = sd->sd_nodelist; /* All nodes are guaranteed to be ALIVE */ while (nd) { if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME, sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) { mdclrerror(&xep); mde_perror(ep, dgettext(TEXT_DOMAIN, "Unable to resume rpc.mdcommd.\n")); } nd = nd->nd_next; } meta_ping_mnset(sp->setno); } if (lock_flag) { cl_sk = cl_get_setkey(sp->setno, sp->setname); if (MD_MNSET_DESC(sd)) { nd = sd->sd_nodelist; while (nd) { /* All nodes are guaranteed to be ALIVE */ if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep)) { mdclrerror(&xep); } nd = nd->nd_next; } } else { for (i = 0; i < MD_MAXSIDES; i++) { /* Skip empty slots */ if (sd->sd_nodes[i][0] == '\0') continue; if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep)) { mdclrerror(&xep); } } } cl_set_setkey(NULL); } /* release signals back to what they were on entry */ if (procsigs(FALSE, &oldsigs, &xep) < 0) mdclrerror(&xep); if (!(MD_MNSET_DESC(sd))) { md_rb_sig_handling_off(md_got_sig(), md_which_sig()); } return (rval); } int meta_set_deletemeds( mdsetname_t *sp, int node_c, char **node_v, int forceflg, md_error_t *ep ) { md_set_desc *sd = NULL; md_drive_desc *dd = NULL; mddb_med_parm_t mp; md_h_arr_t rb_t; med_rec_t medr; med_rec_t rb_medr; int i, j; char **n_l = NULL; int n_c = 0; sigset_t oldsigs; md_setkey_t *cl_sk; int rb_level = 0; md_error_t xep = mdnullerror; int rval = 0; int max_meds; md_mnnode_desc *nd; int suspend1_flag = 0; int lock_flag = 0; mdclrerror(ep); if ((sd = metaget_setdesc(sp, ep)) == NULL) return (-1); /* Make sure we own the set */ if (meta_check_ownership(sp, ep) != 0) return (-1); for (i = 0; i < node_c; i++) if (strchr(node_v[i], ',') != NULL) return (mderror(ep, MDE_ONLYNODENAME, node_v[i])); if (nodesuniq(sp, node_c, node_v, ep)) return (-1); if ((max_meds = get_max_meds(ep)) == 0) return (-1); /* * The mediator information (which is part of the set record) is * stored in the local mddbs of each node in the diskset. * Each node's rpc.metad daemon reads in the set * records from that node's local mddb and caches them * internally. Any process needing diskset information contacts its * local rpc.metad to get this information. Since each node in the * diskset is independently reading the set information from its local * mddb, the set records in the local mddbs must stay * in-sync, so that all nodes have a consistent view of the diskset. * * For a multinode diskset, explicitly verify that all nodes in the * diskset are ALIVE (i.e. are in the API membership list). Otherwise, * fail this operation since all nodes must be ALIVE in order to delete * the mediator information from the set record in their local mddb. * If a panic of this node leaves the local mddbs set records * out-of-sync, the reconfig cycle will fix the local mddbs and * force them back into synchronization. */ if (MD_MNSET_DESC(sd)) { nd = sd->sd_nodelist; while (nd) { if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) { (void) mddserror(ep, MDE_DS_NOTINMEMBERLIST, sp->setno, nd->nd_nodename, NULL, sp->setname); return (-1); } nd = nd->nd_next; } } if (sd->sd_med.n_cnt == 0) return (mderror(ep, MDE_NOMED, NULL)); /* Make a list of nodes to check */ for (i = 0; i < max_meds; i++) if (sd->sd_med.n_lst[i].a_cnt > 0) n_c = add_lst(&n_l, sd->sd_med.n_lst[i].a_nm[0]); for (i = 0; i < node_c; i++) { if (! strinlst(node_v[i], n_c, n_l)) { (void) del_lst(&n_l); return (mddserror(ep, MDE_DS_ISNOTMED, sp->setno, node_v[i], NULL, sp->setname)); } } (void) del_lst(&n_l); /* Save a copy of the current mediator information */ rb_t = sd->sd_med; /* structure assignment */ /* Setup the mediator record for rollback */ (void) memset(&rb_medr, '\0', sizeof (med_rec_t)); rb_medr.med_rec_mag = MED_REC_MAGIC; rb_medr.med_rec_rev = MED_REC_REV; rb_medr.med_rec_fl = 0; rb_medr.med_rec_sn = sp->setno; (void) strcpy(rb_medr.med_rec_snm, sp->setname); if (MD_MNSET_DESC(sd)) { /* * In MN diskset, use a generic nodename, multiowner, in the * mediator record which allows any node to access mediator * information. MN diskset reconfig cycle forces consistent * view of set/node/drive/mediator information across all nodes * in the MN diskset. This allows the relaxation of * node name checking in rpc.metamedd for MN disksets. * * In the traditional diskset, only a node that is in the * mediator record's diskset nodelist can access mediator * data. */ (void) strlcpy(rb_medr.med_rec_nodes[0], MED_MN_CALLER, MD_MAX_NODENAME_PLUS_1); } else { for (i = 0; i < MD_MAXSIDES; i++) (void) strcpy(rb_medr.med_rec_nodes[i], sd->sd_nodes[i]); } rb_medr.med_rec_meds = sd->sd_med; /* structure assignment */ (void) memset(&rb_medr.med_rec_data, '\0', sizeof (med_data_t)); rb_medr.med_rec_foff = 0; crcgen(&rb_medr, &rb_medr.med_rec_cks, sizeof (med_rec_t), NULL); /* Delete the mediators requested from the set */ for (i = 0; i < node_c; i++) { for (j = 0; j < max_meds; j++) { if (sd->sd_med.n_lst[j].a_cnt == 0) continue; if (strcmp(node_v[i], sd->sd_med.n_lst[j].a_nm[0]) != 0) continue; SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REMOVE, SVM_TAG_MEDIATOR, sp->setno, j); (void) memset(&sd->sd_med.n_lst[j], '\0', sizeof (md_h_t)); sd->sd_med.n_cnt--; break; } } medr = rb_medr; /* structure assignment */ medr.med_rec_meds = sd->sd_med; /* structure assignment */ crcgen(&medr, &medr.med_rec_cks, sizeof (med_rec_t), NULL); /* END CHECK CODE */ /* Lock the set on current set members */ if (MD_MNSET_DESC(sd)) { /* Make sure we are blocking all signals */ if (procsigs(TRUE, &oldsigs, &xep) < 0) mdclrerror(&xep); /* * Lock the set on current set members. * lock_set and SUSPEND are used to protect against * other metaset commands running on the other nodes. */ nd = sd->sd_nodelist; while (nd) { /* All nodes are guaranteed to be ALIVE */ if (clnt_lock_set(nd->nd_nodename, sp, ep)) { if (forceflg && strcmp(mynode(), nd->nd_nodename) != 0) { mdclrerror(ep); nd = nd->nd_next; continue; } rval = -1; goto out; } lock_flag = 1; nd = nd->nd_next; } /* * Lock out other meta* commands by suspending * class 1 messages across the diskset. */ nd = sd->sd_nodelist; /* All nodes are guaranteed to be ALIVE */ while (nd) { if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_SUSPEND, sp, MD_MSG_CLASS1, MD_MSCF_NO_FLAGS, ep)) { rval = -1; goto out; } suspend1_flag = 1; nd = nd->nd_next; } } else { md_rb_sig_handling_on(); for (i = 0; i < MD_MAXSIDES; i++) { /* Skip empty slots */ if (sd->sd_nodes[i][0] == '\0') continue; if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) { if (forceflg && strcmp(mynode(), sd->sd_nodes[i]) != 0) { mdclrerror(ep); continue; } rval = -1; goto out; } lock_flag = 1; } } RB_TEST(1, "meta_set_deletemeds", ep) RB_PREEMPT; rb_level = 1; /* level 1 */ RB_TEST(2, "meta_set_deletemeds", ep) /* Update the mediator information on all hosts in the set */ if (MD_MNSET_DESC(sd)) { nd = sd->sd_nodelist; while (nd) { /* All nodes are guaranteed to be ALIVE */ if (clnt_updmeds(nd->nd_nodename, sp, &sd->sd_med, ep)) { if (forceflg && strcmp(mynode(), nd->nd_nodename) != 0) { mdclrerror(ep); continue; } goto rollback; } nd = nd->nd_next; } } else { for (i = 0; i < MD_MAXSIDES; i++) { /* Skip empty slots */ if (sd->sd_nodes[i][0] == '\0') continue; if (clnt_updmeds(sd->sd_nodes[i], sp, &sd->sd_med, ep)) { if (forceflg && strcmp(mynode(), sd->sd_nodes[i]) != 0) { mdclrerror(ep); continue; } goto rollback; } } } RB_TEST(3, "meta_set_deletemeds", ep) RB_PREEMPT; rb_level = 2; /* level 2 */ RB_TEST(5, "meta_set_deletemeds", ep) if ((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST), ep)) == NULL) { if (! mdisok(ep)) goto rollback; } RB_TEST(5, "meta_set_deletemeds", ep) RB_PREEMPT; rb_level = 3; /* level 3 */ RB_TEST(6, "meta_set_deletemeds", ep) if (dd != NULL) { /* * Set up the parameters to the call to update the * kernel mediator list */ (void) memset(&mp, '\0', sizeof (mddb_med_parm_t)); mp.med_setno = sp->setno; if (meta_h2hi(&sd->sd_med, &mp.med, ep)) goto rollback; /* Resolve the IP addresses for the host list */ if (meta_med_hnm2ip(&mp.med, ep)) goto rollback; if (metaioctl(MD_MED_SET_LST, &mp, &mp.med_mde, NULL) != 0) { (void) mdstealerror(ep, &mp.med_mde); goto rollback; } } RB_TEST(7, "meta_set_deletemeds", ep) RB_PREEMPT; rb_level = 4; /* level 4 */ RB_TEST(8, "meta_set_deletemeds", ep) /* Inform the mediator hosts of the new status */ for (i = 0; i < max_meds; i++) { if (rb_t.n_lst[i].a_cnt == 0) continue; /* * medr contains the new mediator node list. * Send the new mediator information to the * new mediator node list. If a node had this RPC * called, but its node is no longer in the new mediator * node list, rpc.metamedd will delete the mediator * record on that node. */ if (clnt_med_upd_rec(&rb_t.n_lst[i], sp, &medr, ep)) { if ((forceflg && mdanyrpcerror(ep)) || mdisrpcerror(ep, RPC_PROGNOTREGISTERED)) { mdclrerror(ep); continue; } goto rollback; } } out: if (dd) metafreedrivedesc(&dd); if (suspend1_flag) { /* * Unlock diskset by resuming messages across the diskset. * Just resume all classes so that resume is the same whether * just one class was locked or all classes were locked. */ nd = sd->sd_nodelist; /* All nodes are guaranteed to be ALIVE */ while (nd) { if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME, sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) { if (rval == 0) (void) mdstealerror(ep, &xep); rval = -1; mde_perror(ep, dgettext(TEXT_DOMAIN, "Unable to resume rpc.mdcommd.\n")); } nd = nd->nd_next; } meta_ping_mnset(sp->setno); } cl_sk = cl_get_setkey(sp->setno, sp->setname); if (lock_flag) { if (MD_MNSET_DESC(sd)) { nd = sd->sd_nodelist; while (nd) { /* All nodes are guaranteed to be ALIVE */ if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep)) { if (forceflg && strcmp(mynode(), nd->nd_nodename) != 0) { mdclrerror(ep); continue; } if (rval == 0) (void) mdstealerror(ep, &xep); rval = -1; } nd = nd->nd_next; } } else { for (i = 0; i < MD_MAXSIDES; i++) { /* Skip empty slots */ if (sd->sd_nodes[i][0] == '\0') continue; if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep)) { if (forceflg && strcmp(mynode(), sd->sd_nodes[i]) != 0) { mdclrerror(ep); continue; } if (rval == 0) (void) mdstealerror(ep, &xep); rval = -1; } } } } cl_set_setkey(NULL); if (MD_MNSET_DESC(sd)) { /* release signals back to what they were on entry */ if (procsigs(FALSE, &oldsigs, &xep) < 0) mdclrerror(&xep); } else { md_rb_sig_handling_off(md_got_sig(), md_which_sig()); } return (rval); rollback: /* all signals already blocked for MN disket */ if (!(MD_MNSET_DESC(sd))) { if (procsigs(TRUE, &oldsigs, &xep) < 0) mdclrerror(&xep); } rval = -1; (void) del_lst(&n_l); /* level 4 */ if (rb_level > 4) { for (i = 0; i < max_meds; i++) { if (rb_t.n_lst[i].a_cnt == 0) continue; /* * rb_medr contains the rollback mediator node list. * Send the rollback mediator information to the * new mediator node list. This will recreate the * mediator record on all nodes where the mediator * record had been removed. */ if (clnt_med_upd_rec(&rb_t.n_lst[i], sp, &rb_medr, &xep)) mdclrerror(&xep); } } /* level 3 */ if (rb_level > 2 && dd != NULL) { (void) memset(&mp, '\0', sizeof (mddb_med_parm_t)); mp.med_setno = sp->setno; (void) meta_h2hi(&rb_t, &mp.med, &xep); mdclrerror(&xep); (void) meta_med_hnm2ip(&mp.med, &xep); mdclrerror(&xep); (void) metaioctl(MD_MED_SET_LST, &mp, &mp.med_mde, NULL); } /* level 2 */ if (rb_level > 1) { metafreedrivedesc(&dd); } /* level 1 */ if (rb_level > 0) { /* Delete mediator information from all hosts in the set */ if (MD_MNSET_DESC(sd)) { nd = sd->sd_nodelist; while (nd) { /* All nodes are guaranteed to be ALIVE */ if (clnt_updmeds(nd->nd_nodename, sp, &rb_t, &xep)) mdclrerror(&xep); nd = nd->nd_next; } } else { for (i = 0; i < MD_MAXSIDES; i++) { /* Skip empty slots */ if (sd->sd_nodes[i][0] == '\0') continue; if (clnt_updmeds(sd->sd_nodes[i], sp, &rb_t, &xep)) mdclrerror(&xep); } } } /* level 0 */ cl_sk = cl_get_setkey(sp->setno, sp->setname); /* Unlock the set */ /* Don't test lock flag since guaranteed to be set if in rollback */ if (MD_MNSET_DESC(sd)) { /* * Unlock diskset by resuming messages across the diskset. * Just resume all classes so that resume is the same whether * just one class was locked or all classes were locked. */ if (suspend1_flag) { /* All nodes are guaranteed to be ALIVE */ nd = sd->sd_nodelist; while (nd) { if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME, sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) { mde_perror(&xep, dgettext(TEXT_DOMAIN, "Unable to resume rpc.mdcommd.\n")); mdclrerror(&xep); } nd = nd->nd_next; } meta_ping_mnset(sp->setno); } nd = sd->sd_nodelist; /* All nodes are guaranteed to be ALIVE */ while (nd) { if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep)) mdclrerror(&xep); nd = nd->nd_next; } } else { for (i = 0; i < MD_MAXSIDES; i++) { /* Skip empty slots */ if (sd->sd_nodes[i][0] == '\0') continue; if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep)) mdclrerror(&xep); } } cl_set_setkey(NULL); /* release signals back to what they were on entry */ if (procsigs(FALSE, &oldsigs, &xep) < 0) mdclrerror(&xep); if (!(MD_MNSET_DESC(sd))) { md_rb_sig_handling_off(md_got_sig(), md_which_sig()); } return (rval); }