10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
51623Stw21770 * Common Development and Distribution License (the "License").
61623Stw21770 * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
210Sstevel@tonic-gate
220Sstevel@tonic-gate /*
23*11684SRay.Hassan@Sun.COM * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
240Sstevel@tonic-gate * Use is subject to license terms.
250Sstevel@tonic-gate */
260Sstevel@tonic-gate
270Sstevel@tonic-gate #include <meta.h>
280Sstevel@tonic-gate #include <sdssc.h>
290Sstevel@tonic-gate #include <signal.h>
300Sstevel@tonic-gate #include <syslog.h>
310Sstevel@tonic-gate #include <sys/types.h>
320Sstevel@tonic-gate #include <sys/wait.h>
330Sstevel@tonic-gate #include <sys/lvm/md_mirror.h>
340Sstevel@tonic-gate #include <metad.h>
350Sstevel@tonic-gate
360Sstevel@tonic-gate #define MY_VERSION "1.0" /* the highest supported version */
370Sstevel@tonic-gate #define MAX_DEBUG_LEVEL 5 /* maximum verbosity level */
380Sstevel@tonic-gate
390Sstevel@tonic-gate #define RESET_OWNER 0x0001
400Sstevel@tonic-gate #define CHOOSE_OWNER 0x0002
410Sstevel@tonic-gate #define RESET_ABR 0x0004
420Sstevel@tonic-gate #define UPDATE_ABR 0x0008
430Sstevel@tonic-gate #define GET_MIRROR_STATE 0x0010
440Sstevel@tonic-gate
450Sstevel@tonic-gate #define SET_INFO_NO_WR 0x0002
460Sstevel@tonic-gate #define SET_INFO_MN 0x0004
470Sstevel@tonic-gate
480Sstevel@tonic-gate /*
490Sstevel@tonic-gate * This table defines all the metaclust reconfig steps we understand
500Sstevel@tonic-gate */
510Sstevel@tonic-gate typedef enum stpnum {
520Sstevel@tonic-gate MC_UNK = 0,
530Sstevel@tonic-gate MC_START,
540Sstevel@tonic-gate MC_STOP,
550Sstevel@tonic-gate MC_ABORT,
560Sstevel@tonic-gate MC_RETURN,
570Sstevel@tonic-gate MC_STEP1,
580Sstevel@tonic-gate MC_STEP2,
590Sstevel@tonic-gate MC_STEP3,
600Sstevel@tonic-gate MC_STEP4
610Sstevel@tonic-gate } stepnum_t;
620Sstevel@tonic-gate
630Sstevel@tonic-gate /*
640Sstevel@tonic-gate * Structure for step_name -> step_number mapping
650Sstevel@tonic-gate */
660Sstevel@tonic-gate struct step_t {
670Sstevel@tonic-gate char *step_nam;
680Sstevel@tonic-gate stepnum_t step_num;
690Sstevel@tonic-gate };
700Sstevel@tonic-gate
710Sstevel@tonic-gate /*
720Sstevel@tonic-gate * Step name to step number mapping table
730Sstevel@tonic-gate * This table MUST be sorted alphabetically in ascending order of step name
740Sstevel@tonic-gate */
750Sstevel@tonic-gate static struct step_t step_table[] = {
760Sstevel@tonic-gate { "abort", MC_ABORT },
770Sstevel@tonic-gate { "return", MC_RETURN },
780Sstevel@tonic-gate { "start", MC_START },
790Sstevel@tonic-gate { "step1", MC_STEP1 },
800Sstevel@tonic-gate { "step2", MC_STEP2 },
810Sstevel@tonic-gate { "step3", MC_STEP3 },
820Sstevel@tonic-gate { "step4", MC_STEP4 },
830Sstevel@tonic-gate { "stop", MC_STOP }
840Sstevel@tonic-gate };
850Sstevel@tonic-gate
860Sstevel@tonic-gate /*
870Sstevel@tonic-gate * If support for a different version is added, the new version number should
880Sstevel@tonic-gate * be appended to the version_table below. This list will be searched to
890Sstevel@tonic-gate * determine if a version requested via the -V option is supported or not.
900Sstevel@tonic-gate */
910Sstevel@tonic-gate static char *version_table[] = {
920Sstevel@tonic-gate MY_VERSION
930Sstevel@tonic-gate };
940Sstevel@tonic-gate
950Sstevel@tonic-gate uint_t timeout = 0; /* disable timeout by default */
960Sstevel@tonic-gate char *version = MY_VERSION; /* use latest version by default */
970Sstevel@tonic-gate int stepnum = MC_UNK; /* reconfiguration step number */
980Sstevel@tonic-gate pid_t c_pid; /* child process id */
990Sstevel@tonic-gate
1000Sstevel@tonic-gate /*
1010Sstevel@tonic-gate * Binary search comparison routine
1020Sstevel@tonic-gate */
1030Sstevel@tonic-gate static int
mc_compare(const void * stp1,const void * stp2)1040Sstevel@tonic-gate mc_compare(const void *stp1, const void *stp2)
1050Sstevel@tonic-gate {
1060Sstevel@tonic-gate return (strcmp((const char *)stp1,
1070Sstevel@tonic-gate ((const struct step_t *)stp2)->step_nam));
1080Sstevel@tonic-gate }
1090Sstevel@tonic-gate
1100Sstevel@tonic-gate /*
1110Sstevel@tonic-gate * Timeout expiry alarm signal handler
1120Sstevel@tonic-gate */
1130Sstevel@tonic-gate /*ARGSUSED*/
1140Sstevel@tonic-gate static void
sigalarmhandler(int sig)1150Sstevel@tonic-gate sigalarmhandler(int sig)
1160Sstevel@tonic-gate {
1170Sstevel@tonic-gate int i, n, ret, stat_loc = 0;
1188452SJohn.Wren.Kennedy@Sun.COM FILE *pgcore;
1198452SJohn.Wren.Kennedy@Sun.COM char corecmd[256];
1200Sstevel@tonic-gate
1210Sstevel@tonic-gate n = sizeof (step_table) / sizeof (step_table[0]);
1220Sstevel@tonic-gate for (i = 0; i < n; i++) {
1230Sstevel@tonic-gate if (stepnum == step_table[i].step_num)
1240Sstevel@tonic-gate break;
1250Sstevel@tonic-gate }
1260Sstevel@tonic-gate
1270Sstevel@tonic-gate assert(i != n);
1280Sstevel@tonic-gate
1290Sstevel@tonic-gate meta_mc_log(MC_LOG1, gettext("Timeout expired in %s: %s"),
1300Sstevel@tonic-gate step_table[i].step_nam,
1310Sstevel@tonic-gate meta_print_hrtime(gethrtime() - start_time));
1320Sstevel@tonic-gate
1338452SJohn.Wren.Kennedy@Sun.COM /*
1348452SJohn.Wren.Kennedy@Sun.COM * See what the child was actually doing when the timeout expired.
1358452SJohn.Wren.Kennedy@Sun.COM * A core-dump of this would be _really_ good, so let's just
1368452SJohn.Wren.Kennedy@Sun.COM * try a 'gcore -g c_pid' and hope
1378452SJohn.Wren.Kennedy@Sun.COM */
1388452SJohn.Wren.Kennedy@Sun.COM
1398452SJohn.Wren.Kennedy@Sun.COM (void) memset(corecmd, 0, sizeof (corecmd));
1408452SJohn.Wren.Kennedy@Sun.COM (void) snprintf(corecmd, sizeof (corecmd),
1418452SJohn.Wren.Kennedy@Sun.COM "/bin/gcore -g %d >/dev/null 2>&1", (int)c_pid);
1428452SJohn.Wren.Kennedy@Sun.COM
1438452SJohn.Wren.Kennedy@Sun.COM pgcore = popen(corecmd, "r");
1448452SJohn.Wren.Kennedy@Sun.COM
1458452SJohn.Wren.Kennedy@Sun.COM if (pgcore == NULL) {
1468452SJohn.Wren.Kennedy@Sun.COM meta_mc_log(MC_LOG1, gettext("Could not grab core for pid %s"),
1478452SJohn.Wren.Kennedy@Sun.COM c_pid);
1488452SJohn.Wren.Kennedy@Sun.COM } else {
1498452SJohn.Wren.Kennedy@Sun.COM (void) pclose(pgcore);
1508452SJohn.Wren.Kennedy@Sun.COM }
1518452SJohn.Wren.Kennedy@Sun.COM
1520Sstevel@tonic-gate if ((ret = kill(c_pid, SIGKILL)) == 0) {
1530Sstevel@tonic-gate /*
1540Sstevel@tonic-gate * The child will wait forever until the status is retrieved
1550Sstevel@tonic-gate * so get it now. Keep retrying if the call is interrupted.
1560Sstevel@tonic-gate *
1570Sstevel@tonic-gate * The possible results are,
1580Sstevel@tonic-gate *
1590Sstevel@tonic-gate * - child killed successfully
1600Sstevel@tonic-gate * - signal sent but child not killed
1610Sstevel@tonic-gate * - waitpid failed/interrupted
1620Sstevel@tonic-gate */
16311053SSurya.Prakki@Sun.COM (void) sleep(2);
1640Sstevel@tonic-gate while ((ret = waitpid(c_pid, &stat_loc, WNOHANG)) < 0) {
1650Sstevel@tonic-gate if (errno != EINTR) {
1660Sstevel@tonic-gate break;
1670Sstevel@tonic-gate }
1680Sstevel@tonic-gate }
1690Sstevel@tonic-gate if ((ret == c_pid) || (errno == ECHILD)) {
1700Sstevel@tonic-gate ret = 0;
1710Sstevel@tonic-gate } else {
1720Sstevel@tonic-gate ret = 1;
1730Sstevel@tonic-gate }
1740Sstevel@tonic-gate } else if (errno == ESRCH) {
1750Sstevel@tonic-gate /*
1760Sstevel@tonic-gate * If the kill did not catch the child then it means the child
1770Sstevel@tonic-gate * exited immediately after the timeout occured.
1780Sstevel@tonic-gate */
1790Sstevel@tonic-gate ret = 0;
1800Sstevel@tonic-gate }
1810Sstevel@tonic-gate
1820Sstevel@tonic-gate /*
1830Sstevel@tonic-gate * make sure not to exit with 205 for any steps other than step1-step4.
1840Sstevel@tonic-gate * Suncluster reconfiguration can't handle it otherwise.
1850Sstevel@tonic-gate */
1860Sstevel@tonic-gate switch (stepnum) {
1870Sstevel@tonic-gate case MC_STEP1:
1880Sstevel@tonic-gate case MC_STEP2:
1890Sstevel@tonic-gate case MC_STEP3:
1900Sstevel@tonic-gate case MC_STEP4:
1910Sstevel@tonic-gate /*
1920Sstevel@tonic-gate * If the child was killed successfully return 205 for a
1930Sstevel@tonic-gate * new reconfig cycle otherwise send 1 to panic the node.
1940Sstevel@tonic-gate */
1950Sstevel@tonic-gate if (ret != 0) {
1960Sstevel@tonic-gate md_eprintf(gettext("Could not kill child\n"));
1970Sstevel@tonic-gate exit(1);
1980Sstevel@tonic-gate } else {
1990Sstevel@tonic-gate exit(205);
2000Sstevel@tonic-gate }
2010Sstevel@tonic-gate break;
2020Sstevel@tonic-gate case MC_START:
2030Sstevel@tonic-gate case MC_STOP:
2040Sstevel@tonic-gate case MC_ABORT:
2050Sstevel@tonic-gate case MC_RETURN:
2060Sstevel@tonic-gate default:
2070Sstevel@tonic-gate exit(1);
2080Sstevel@tonic-gate break;
2090Sstevel@tonic-gate }
2100Sstevel@tonic-gate }
2110Sstevel@tonic-gate
2120Sstevel@tonic-gate /*
2130Sstevel@tonic-gate * Attempt to load local set.
2140Sstevel@tonic-gate * Returns:
2150Sstevel@tonic-gate * pointer to mdsetname_t for local set (local_sp) is successful.
2160Sstevel@tonic-gate * 0 if failure
2170Sstevel@tonic-gate * if there are no local set mddbs, no error message is printed.
2180Sstevel@tonic-gate * Otherwise, error message is printed so that user
2190Sstevel@tonic-gate * can determine why the local set didn't start.
2200Sstevel@tonic-gate */
2210Sstevel@tonic-gate mdsetname_t *
load_local_set(md_error_t * ep)2220Sstevel@tonic-gate load_local_set(md_error_t *ep)
2230Sstevel@tonic-gate {
2240Sstevel@tonic-gate mdsetname_t *local_sp = NULL;
2250Sstevel@tonic-gate
2260Sstevel@tonic-gate /* Does local set exist? If not, give no error */
2270Sstevel@tonic-gate if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
2280Sstevel@tonic-gate return (0);
2290Sstevel@tonic-gate }
2300Sstevel@tonic-gate
2310Sstevel@tonic-gate /*
2320Sstevel@tonic-gate * snarf local set
2330Sstevel@tonic-gate * If fails with MDE_DB_NODB, then just return 1 printing
2340Sstevel@tonic-gate * no failure.
2350Sstevel@tonic-gate * Otherwise, print error message, and return 1.
2360Sstevel@tonic-gate */
2370Sstevel@tonic-gate if (meta_setup_db_locations(ep) != 0) {
2380Sstevel@tonic-gate if (!(mdismddberror(ep, MDE_DB_NODB)))
2390Sstevel@tonic-gate mde_perror(ep, "");
2400Sstevel@tonic-gate return (0);
2410Sstevel@tonic-gate }
2420Sstevel@tonic-gate
2430Sstevel@tonic-gate /* local set loaded successfully */
2440Sstevel@tonic-gate return (local_sp);
2450Sstevel@tonic-gate }
2460Sstevel@tonic-gate
2470Sstevel@tonic-gate /*
2480Sstevel@tonic-gate * Purpose: Compose a full path name for a metadevice
2490Sstevel@tonic-gate *
2500Sstevel@tonic-gate * On entry: sp - setname pointer
2510Sstevel@tonic-gate * mnum - minor number of metadevice
2520Sstevel@tonic-gate * pathname - pointer to array to return path string
2530Sstevel@tonic-gate * pathlen - max length of pathname array
2540Sstevel@tonic-gate */
2550Sstevel@tonic-gate static int
compose_path(mdsetname_t * sp,int mnum,char * pathname,int pathlen)2560Sstevel@tonic-gate compose_path(mdsetname_t *sp, int mnum, char *pathname, int pathlen)
2570Sstevel@tonic-gate {
2580Sstevel@tonic-gate int rtn;
2591623Stw21770 mdname_t *np;
2601623Stw21770 md_error_t status = mdnullerror;
2610Sstevel@tonic-gate
2620Sstevel@tonic-gate if (MD_MIN2SET(mnum) != sp->setno) {
2630Sstevel@tonic-gate md_eprintf(gettext("minor number 0x%x invalid for set %d\n"),
2640Sstevel@tonic-gate mnum, sp->setno);
2650Sstevel@tonic-gate return (-1);
2660Sstevel@tonic-gate }
2671623Stw21770
2681623Stw21770 if ((np = metamnumname(&sp, mnum, 0, &status)) == NULL) {
2691623Stw21770 return (-1);
2701623Stw21770 }
2711623Stw21770
2721623Stw21770 rtn = snprintf(pathname, pathlen, "%s", np->rname);
2730Sstevel@tonic-gate
2740Sstevel@tonic-gate if ((pathname[0] == '\0') || (rtn >= pathlen)) {
2750Sstevel@tonic-gate md_eprintf(gettext(
2761623Stw21770 "Could not create path for device %s\n"),
2771623Stw21770 get_mdname(sp, mnum));
2780Sstevel@tonic-gate return (-1);
2790Sstevel@tonic-gate }
2800Sstevel@tonic-gate return (0);
2810Sstevel@tonic-gate }
2820Sstevel@tonic-gate
2830Sstevel@tonic-gate /*
2840Sstevel@tonic-gate * Purpose: Walk through all the devices specified for the given set
2850Sstevel@tonic-gate * and do the action specified in mode
2860Sstevel@tonic-gate */
2870Sstevel@tonic-gate static int
reset_state(uint_t mode,mdsetname_t * sp,char * drivername,md_error_t * ep)2880Sstevel@tonic-gate reset_state(uint_t mode, mdsetname_t *sp, char *drivername, md_error_t *ep)
2890Sstevel@tonic-gate {
2900Sstevel@tonic-gate mdnamelist_t *devnlp = NULL;
2910Sstevel@tonic-gate mdnamelist_t *p;
2920Sstevel@tonic-gate mdname_t *devnp = NULL;
2930Sstevel@tonic-gate md_set_mmown_params_t ownpar_p;
2940Sstevel@tonic-gate md_set_mmown_params_t *ownpar = &ownpar_p;
2950Sstevel@tonic-gate md_unit_t *mm;
2960Sstevel@tonic-gate int mirror_dev = 0;
2970Sstevel@tonic-gate mndiskset_membershiplist_t *nl;
2980Sstevel@tonic-gate int cnt;
2990Sstevel@tonic-gate int has_parent;
3000Sstevel@tonic-gate md_mn_get_mir_state_t mir_state_p;
3010Sstevel@tonic-gate md_mn_get_mir_state_t *mir_state = &mir_state_p;
3020Sstevel@tonic-gate
3030Sstevel@tonic-gate /*
3040Sstevel@tonic-gate * if we are choosing or resetting the owners then make sure
3050Sstevel@tonic-gate * we are only doing it for mirror devices
3060Sstevel@tonic-gate */
3070Sstevel@tonic-gate mirror_dev = (strcmp(MD_MIRROR, drivername) == 0);
3080Sstevel@tonic-gate if ((mode & (RESET_OWNER | CHOOSE_OWNER)) && !mirror_dev) {
3090Sstevel@tonic-gate return (-1);
3100Sstevel@tonic-gate }
3110Sstevel@tonic-gate
3120Sstevel@tonic-gate /* get a list of all the metadevices for current set */
3130Sstevel@tonic-gate if (mirror_dev && meta_get_mirror_names(sp, &devnlp, 0, ep) < 0) {
3140Sstevel@tonic-gate mde_perror(ep, gettext("Could not get mirrors for set %s"),
3150Sstevel@tonic-gate sp->setname);
3160Sstevel@tonic-gate return (-1);
3170Sstevel@tonic-gate } else if (meta_get_sp_names(sp, &devnlp, 0, ep) < 0) {
3180Sstevel@tonic-gate mde_perror(ep, gettext(
3190Sstevel@tonic-gate "Could not get soft partitions for set %s"), sp->setname);
3200Sstevel@tonic-gate return (-1);
3210Sstevel@tonic-gate }
3220Sstevel@tonic-gate
3230Sstevel@tonic-gate /* If resetting the owner, get the known membership list */
3240Sstevel@tonic-gate if (mode & RESET_OWNER) {
3250Sstevel@tonic-gate if (meta_read_nodelist(&cnt, &nl, ep)) {
3260Sstevel@tonic-gate mde_perror(ep, "Could not get nodelist");
3270Sstevel@tonic-gate return (-1);
3280Sstevel@tonic-gate }
3290Sstevel@tonic-gate }
3300Sstevel@tonic-gate
3310Sstevel@tonic-gate /* for each metadevice */
3320Sstevel@tonic-gate for (p = devnlp; (p != NULL); p = p->next) {
3330Sstevel@tonic-gate devnp = p->namep;
3340Sstevel@tonic-gate
3350Sstevel@tonic-gate /*
3360Sstevel@tonic-gate * Get the current setting for mirror ABR state and all of the
3370Sstevel@tonic-gate * submirror state and flags from the master node. We only
3380Sstevel@tonic-gate * perform this when going through a 'start' cycle.
3390Sstevel@tonic-gate */
3400Sstevel@tonic-gate if ((mode & GET_MIRROR_STATE) && mirror_dev) {
3410Sstevel@tonic-gate char *miscname;
3420Sstevel@tonic-gate
3430Sstevel@tonic-gate /*
3440Sstevel@tonic-gate * Ensure that we ignore soft-parts that are returned
3450Sstevel@tonic-gate * from the meta_get_mirror_names() call
3460Sstevel@tonic-gate */
3470Sstevel@tonic-gate if ((miscname = metagetmiscname(devnp, ep)) == NULL)
3480Sstevel@tonic-gate goto out;
3490Sstevel@tonic-gate if (strcmp(miscname, MD_MIRROR) != 0)
3500Sstevel@tonic-gate continue;
3510Sstevel@tonic-gate
3520Sstevel@tonic-gate mir_state->mnum = meta_getminor(devnp->dev);
3530Sstevel@tonic-gate MD_SETDRIVERNAME(mir_state, MD_MIRROR, sp->setno);
3540Sstevel@tonic-gate meta_mc_log(MC_LOG4, gettext("Getting mirror state"
3551623Stw21770 " for %s: %s"), get_mdname(sp, mir_state->mnum),
3560Sstevel@tonic-gate meta_print_hrtime(gethrtime() - start_time));
3570Sstevel@tonic-gate
3580Sstevel@tonic-gate if (metaioctl(MD_MN_GET_MIRROR_STATE, mir_state, ep,
3590Sstevel@tonic-gate "MD_MN_GET_MIRROR_STATE") != 0) {
3600Sstevel@tonic-gate mde_perror(ep, gettext("Unable to get "
3611623Stw21770 "mirror state for %s"),
3621623Stw21770 get_mdname(sp, mir_state->mnum));
3630Sstevel@tonic-gate goto out;
3640Sstevel@tonic-gate } else {
3650Sstevel@tonic-gate continue;
3660Sstevel@tonic-gate }
3670Sstevel@tonic-gate }
3680Sstevel@tonic-gate
3690Sstevel@tonic-gate /* check if this is a top level metadevice */
3700Sstevel@tonic-gate if ((mm = meta_get_mdunit(sp, devnp, ep)) == NULL)
3710Sstevel@tonic-gate goto out;
3720Sstevel@tonic-gate if (MD_HAS_PARENT(MD_PARENT(mm))) {
3730Sstevel@tonic-gate has_parent = 1;
3740Sstevel@tonic-gate } else {
3750Sstevel@tonic-gate has_parent = 0;
3760Sstevel@tonic-gate }
3770Sstevel@tonic-gate Free(mm);
3780Sstevel@tonic-gate
3790Sstevel@tonic-gate if (mode & (RESET_OWNER | CHOOSE_OWNER)) {
3800Sstevel@tonic-gate char *miscname;
3810Sstevel@tonic-gate
3820Sstevel@tonic-gate /*
3830Sstevel@tonic-gate * we can only do these for mirrors so make sure we
3840Sstevel@tonic-gate * really have a mirror device and not a softpartition
3850Sstevel@tonic-gate * imitating one. meta_get_mirror_names seems to think
3860Sstevel@tonic-gate * softparts on top of a mirror are mirrors!
3870Sstevel@tonic-gate */
3880Sstevel@tonic-gate if ((miscname = metagetmiscname(devnp, ep)) == NULL)
3890Sstevel@tonic-gate goto out;
3900Sstevel@tonic-gate if (strcmp(miscname, MD_MIRROR) != 0)
3910Sstevel@tonic-gate continue;
3920Sstevel@tonic-gate
3930Sstevel@tonic-gate (void) memset(ownpar, 0, sizeof (*ownpar));
3940Sstevel@tonic-gate ownpar->d.mnum = meta_getminor(devnp->dev);
3950Sstevel@tonic-gate MD_SETDRIVERNAME(ownpar, MD_MIRROR, sp->setno);
3960Sstevel@tonic-gate
3970Sstevel@tonic-gate meta_mc_log(MC_LOG4, gettext("Setting owner "
3981623Stw21770 "for %s: %s"), get_mdname(sp, ownpar->d.mnum),
3990Sstevel@tonic-gate meta_print_hrtime(gethrtime() - start_time));
4000Sstevel@tonic-gate
4010Sstevel@tonic-gate /* get the current owner id */
4020Sstevel@tonic-gate if (metaioctl(MD_MN_GET_MM_OWNER, ownpar, ep,
4030Sstevel@tonic-gate "MD_MN_GET_MM_OWNER") != 0) {
4040Sstevel@tonic-gate mde_perror(ep, gettext("Unable to get "
4051623Stw21770 "mirror owner for %s"),
4061623Stw21770 get_mdname(sp, ownpar->d.mnum));
4070Sstevel@tonic-gate goto out;
4080Sstevel@tonic-gate }
4090Sstevel@tonic-gate }
4100Sstevel@tonic-gate
4110Sstevel@tonic-gate if (mode & RESET_OWNER) {
4120Sstevel@tonic-gate if (ownpar->d.owner == MD_MN_MIRROR_UNOWNED) {
4130Sstevel@tonic-gate mdclrerror(ep);
4140Sstevel@tonic-gate continue;
4150Sstevel@tonic-gate }
4160Sstevel@tonic-gate
4170Sstevel@tonic-gate /*
4180Sstevel@tonic-gate * reset owner only if the current owner is
4190Sstevel@tonic-gate * not in the membership list
4200Sstevel@tonic-gate * Also kill the resync thread so that when the resync
4210Sstevel@tonic-gate * is started, it will perform an optimized resync
4220Sstevel@tonic-gate * for any resync regions that were dirty when the
4230Sstevel@tonic-gate * current owner left the membership.
4240Sstevel@tonic-gate */
4250Sstevel@tonic-gate if (meta_is_member(NULL, ownpar->d.owner, nl) != 1) {
4260Sstevel@tonic-gate if (meta_mn_change_owner(&ownpar,
4270Sstevel@tonic-gate sp->setno, ownpar->d.mnum,
4280Sstevel@tonic-gate MD_MN_MIRROR_UNOWNED,
4290Sstevel@tonic-gate MD_MN_MM_ALLOW_CHANGE) == -1) {
4300Sstevel@tonic-gate md_eprintf(gettext(
4310Sstevel@tonic-gate "Unable to reset mirror owner "
4321623Stw21770 "for %s\n"),
4331623Stw21770 get_mdname(sp, ownpar->d.mnum));
4340Sstevel@tonic-gate goto out;
4350Sstevel@tonic-gate }
4360Sstevel@tonic-gate if (meta_mirror_resync(sp, devnp, 0, ep,
4370Sstevel@tonic-gate MD_RESYNC_KILL_NO_WAIT) != 0) {
4380Sstevel@tonic-gate md_eprintf(gettext(
4390Sstevel@tonic-gate "Unable to kill resync for"
4401623Stw21770 " %s\n"),
4411623Stw21770 get_mdname(sp, ownpar->d.mnum));
4420Sstevel@tonic-gate goto out;
4430Sstevel@tonic-gate }
4440Sstevel@tonic-gate }
4450Sstevel@tonic-gate }
4460Sstevel@tonic-gate
4470Sstevel@tonic-gate if (mode & CHOOSE_OWNER) {
4480Sstevel@tonic-gate /*
4490Sstevel@tonic-gate * only orphaned resyncs will have no owner.
4500Sstevel@tonic-gate * if that is the case choose a new owner. Otherwise
4510Sstevel@tonic-gate * re-establish the existing owner. This covers the
4520Sstevel@tonic-gate * case where a node that owned the mirror
4530Sstevel@tonic-gate * reboots/panics and comes back into the cluster before
4540Sstevel@tonic-gate * the reconfig cycle has completed. In this case the
4550Sstevel@tonic-gate * other cluster nodes will have the mirror owner marked
4560Sstevel@tonic-gate * as the rebooted node while it has the owner marked
4570Sstevel@tonic-gate * as 'None'. We have to reestablish the ownership so
4580Sstevel@tonic-gate * that the subsequent resync can continue.
4590Sstevel@tonic-gate */
4600Sstevel@tonic-gate if (meta_mn_change_owner(&ownpar, sp->setno,
4610Sstevel@tonic-gate ownpar->d.mnum, ownpar->d.owner,
4620Sstevel@tonic-gate MD_MN_MM_CHOOSE_OWNER) == -1) {
4630Sstevel@tonic-gate md_eprintf(gettext("Unable to choose "
4641623Stw21770 "mirror owner for %s\n"),
4651623Stw21770 get_mdname(sp, ownpar->d.mnum));
4660Sstevel@tonic-gate goto out;
4670Sstevel@tonic-gate }
4680Sstevel@tonic-gate }
4690Sstevel@tonic-gate
4700Sstevel@tonic-gate /*
4710Sstevel@tonic-gate * For RESET_ABR and UPDATE_ABR - only handle top
4720Sstevel@tonic-gate * level metadevices.
4730Sstevel@tonic-gate */
4740Sstevel@tonic-gate if (has_parent)
4750Sstevel@tonic-gate continue;
4760Sstevel@tonic-gate
4770Sstevel@tonic-gate if (mode & RESET_ABR) {
4780Sstevel@tonic-gate /*
4790Sstevel@tonic-gate * Reset the ABR (application based recovery)
4800Sstevel@tonic-gate * value on all nodes. We are dealing with
4810Sstevel@tonic-gate * the possibility that we have ABR set but the
4820Sstevel@tonic-gate * only node that had the device open with ABR has
4830Sstevel@tonic-gate * left the cluster. We simply open and close the
4840Sstevel@tonic-gate * device and if this is the last close in the
4850Sstevel@tonic-gate * cluster, ABR will be cleared on all nodes.
4860Sstevel@tonic-gate */
4870Sstevel@tonic-gate char *miscname;
4881623Stw21770 char name[MAXPATHLEN];
4890Sstevel@tonic-gate int mnum, fd;
4900Sstevel@tonic-gate
4910Sstevel@tonic-gate name[0] = '\0';
4920Sstevel@tonic-gate mnum = meta_getminor(devnp->dev);
4930Sstevel@tonic-gate
4940Sstevel@tonic-gate /*
4950Sstevel@tonic-gate * Ensure that we don't include soft-parts in the
4960Sstevel@tonic-gate * mirror-only call to RESET_ABR. meta_get_mirror_names
4970Sstevel@tonic-gate * returns a bogus list that includes all soft-parts
4980Sstevel@tonic-gate * built on mirrors.
4990Sstevel@tonic-gate */
5000Sstevel@tonic-gate if ((miscname = metagetmiscname(devnp, ep)) == NULL)
5010Sstevel@tonic-gate goto out;
5020Sstevel@tonic-gate if (mirror_dev && (strcmp(miscname, MD_MIRROR) != 0))
5030Sstevel@tonic-gate continue;
5040Sstevel@tonic-gate
5050Sstevel@tonic-gate meta_mc_log(MC_LOG4, gettext("Re-setting ABR state "
5061623Stw21770 "for %s: %s"), get_mdname(sp, mnum),
5070Sstevel@tonic-gate meta_print_hrtime(gethrtime() - start_time));
5080Sstevel@tonic-gate
5090Sstevel@tonic-gate /* compose the absolute device path and open it */
5100Sstevel@tonic-gate if (compose_path(sp, mnum, &name[0],
5110Sstevel@tonic-gate sizeof (name)) != 0)
5120Sstevel@tonic-gate goto out;
5130Sstevel@tonic-gate if ((fd = open(name, O_RDWR, 0)) < 0) {
5140Sstevel@tonic-gate md_perror(gettext("Could not open device %s"),
5150Sstevel@tonic-gate name);
5160Sstevel@tonic-gate continue;
5170Sstevel@tonic-gate }
5180Sstevel@tonic-gate
5190Sstevel@tonic-gate (void) close(fd);
5200Sstevel@tonic-gate }
5210Sstevel@tonic-gate
5220Sstevel@tonic-gate if (mode & UPDATE_ABR) {
5230Sstevel@tonic-gate /*
5240Sstevel@tonic-gate * Update the ABR value on this node. We obtain the
5250Sstevel@tonic-gate * current ABR state from the master node.
5260Sstevel@tonic-gate */
5270Sstevel@tonic-gate
5280Sstevel@tonic-gate char *miscname;
5291623Stw21770 char name[MAXPATHLEN];
5300Sstevel@tonic-gate int mnum, fd;
5310Sstevel@tonic-gate volcap_t vc;
5320Sstevel@tonic-gate uint_t tstate;
5330Sstevel@tonic-gate
5340Sstevel@tonic-gate name[0] = '\0';
5350Sstevel@tonic-gate mnum = meta_getminor(devnp->dev);
5360Sstevel@tonic-gate
5370Sstevel@tonic-gate /*
5380Sstevel@tonic-gate * Ensure that we don't include soft-parts in the
5390Sstevel@tonic-gate * mirror-only call to UPDATE_ABR. meta_get_mirror_names
5400Sstevel@tonic-gate * returns a bogus list that includes all soft-parts
5410Sstevel@tonic-gate * built on mirrors.
5420Sstevel@tonic-gate */
5430Sstevel@tonic-gate if ((miscname = metagetmiscname(devnp, ep)) == NULL)
5440Sstevel@tonic-gate goto out;
5450Sstevel@tonic-gate if (mirror_dev && (strcmp(miscname, MD_MIRROR) != 0))
5460Sstevel@tonic-gate continue;
5470Sstevel@tonic-gate
5480Sstevel@tonic-gate /* Get tstate from Master */
5490Sstevel@tonic-gate if (meta_mn_send_get_tstate(devnp->dev, &tstate, ep)
5500Sstevel@tonic-gate != 0)
5510Sstevel@tonic-gate continue;
5520Sstevel@tonic-gate /* If not set on the master, nothing to do */
5530Sstevel@tonic-gate if (!(tstate & MD_ABR_CAP))
5540Sstevel@tonic-gate continue;
5550Sstevel@tonic-gate
5560Sstevel@tonic-gate meta_mc_log(MC_LOG4, gettext("Updating ABR state "
5571623Stw21770 "for %s: %s"), get_mdname(sp, mnum),
5580Sstevel@tonic-gate meta_print_hrtime(gethrtime() - start_time));
5590Sstevel@tonic-gate
5600Sstevel@tonic-gate /* compose the absolute device path and open it */
5610Sstevel@tonic-gate if (compose_path(sp, mnum, &name[0],
5620Sstevel@tonic-gate sizeof (name)) != 0)
5630Sstevel@tonic-gate goto out;
5640Sstevel@tonic-gate if ((fd = open(name, O_RDWR, 0)) < 0) {
5650Sstevel@tonic-gate md_perror(gettext("Could not open device %s"),
5660Sstevel@tonic-gate name);
5670Sstevel@tonic-gate continue;
5680Sstevel@tonic-gate }
5690Sstevel@tonic-gate
5700Sstevel@tonic-gate /* set ABR state */
5710Sstevel@tonic-gate vc.vc_info = 0;
5720Sstevel@tonic-gate vc.vc_set = 0;
5730Sstevel@tonic-gate if (ioctl(fd, DKIOCGETVOLCAP, &vc) < 0) {
5740Sstevel@tonic-gate /*
5750Sstevel@tonic-gate * Ignore if device does not support this
5760Sstevel@tonic-gate * ioctl
5770Sstevel@tonic-gate */
5780Sstevel@tonic-gate if ((errno != ENOTTY) && (errno != ENOTSUP)) {
5790Sstevel@tonic-gate md_perror(gettext("Could not get "
5800Sstevel@tonic-gate "ABR/DMR state for device %s"),
5810Sstevel@tonic-gate name);
5820Sstevel@tonic-gate }
5830Sstevel@tonic-gate (void) close(fd);
5840Sstevel@tonic-gate continue;
5850Sstevel@tonic-gate }
5860Sstevel@tonic-gate if (!(vc.vc_info & (DKV_ABR_CAP | DKV_DMR_CAP))) {
5870Sstevel@tonic-gate (void) close(fd);
5880Sstevel@tonic-gate continue;
5890Sstevel@tonic-gate }
5900Sstevel@tonic-gate
5910Sstevel@tonic-gate vc.vc_set = DKV_ABR_CAP;
5920Sstevel@tonic-gate if (ioctl(fd, DKIOCSETVOLCAP, &vc) < 0) {
5930Sstevel@tonic-gate md_perror(gettext(
5940Sstevel@tonic-gate "Could not set ABR state for "
5950Sstevel@tonic-gate "device %s"), name);
5960Sstevel@tonic-gate (void) close(fd);
5970Sstevel@tonic-gate goto out;
5980Sstevel@tonic-gate } else {
5990Sstevel@tonic-gate md_eprintf(gettext(
6000Sstevel@tonic-gate "Setting ABR state on device %s\n"), name);
6010Sstevel@tonic-gate }
6020Sstevel@tonic-gate
6030Sstevel@tonic-gate (void) close(fd);
6040Sstevel@tonic-gate }
6050Sstevel@tonic-gate }
6060Sstevel@tonic-gate
6070Sstevel@tonic-gate /* cleanup */
6080Sstevel@tonic-gate if (mode & RESET_OWNER) {
6090Sstevel@tonic-gate meta_free_nodelist(nl);
6100Sstevel@tonic-gate }
6110Sstevel@tonic-gate metafreenamelist(devnlp);
6120Sstevel@tonic-gate return (0);
6130Sstevel@tonic-gate
6140Sstevel@tonic-gate out:
6150Sstevel@tonic-gate /* cleanup */
6160Sstevel@tonic-gate if (mode & RESET_OWNER) {
6170Sstevel@tonic-gate meta_free_nodelist(nl);
6180Sstevel@tonic-gate }
6190Sstevel@tonic-gate metafreenamelist(devnlp);
6200Sstevel@tonic-gate return (-1);
6210Sstevel@tonic-gate }
6220Sstevel@tonic-gate
6230Sstevel@tonic-gate /*
6240Sstevel@tonic-gate * Print usage message
6250Sstevel@tonic-gate */
6260Sstevel@tonic-gate static void
usage(mdsetname_t * sp,int eval)6270Sstevel@tonic-gate usage(mdsetname_t *sp, int eval)
6280Sstevel@tonic-gate {
6290Sstevel@tonic-gate (void) fprintf(stderr, gettext("usage:"
6300Sstevel@tonic-gate "\t%s [-V version] [-t timeout] [-d level] start localnodeid\n"
6310Sstevel@tonic-gate "\t%s [-V version] [-t timeout] [-d level] step nodelist...\n"
6320Sstevel@tonic-gate "\t%s [-V version] [-t timeout] [-d level] abort | stop\n"
6330Sstevel@tonic-gate "\t%s [-V | -? | -h]\n"),
6340Sstevel@tonic-gate myname, myname, myname, myname);
6350Sstevel@tonic-gate if (!eval) {
63611053SSurya.Prakki@Sun.COM (void) fprintf(stderr, gettext("\n"
6370Sstevel@tonic-gate "\tValid debug (-d) levels are 1-%d for increasing "
6380Sstevel@tonic-gate "verbosity.\n\tDefault is -d 3.\n\n"
6390Sstevel@tonic-gate "\tValid step values are: return | step1 | step2 | "
6400Sstevel@tonic-gate "step3 | step4\n\n"
6410Sstevel@tonic-gate "\tNodelist is a space-separated list of node id's\n\n"),
6420Sstevel@tonic-gate MAX_DEBUG_LEVEL);
6430Sstevel@tonic-gate }
6440Sstevel@tonic-gate md_exit(sp, eval);
6450Sstevel@tonic-gate }
6460Sstevel@tonic-gate
6470Sstevel@tonic-gate /*
6480Sstevel@tonic-gate * Input: Input takes a config step name followed by a list of
6490Sstevel@tonic-gate * possible node id's.
6500Sstevel@tonic-gate *
6510Sstevel@tonic-gate * Returns: 0 - Success
6520Sstevel@tonic-gate * 1 - Fail
6530Sstevel@tonic-gate * Node will be removed from cluster membership
6540Sstevel@tonic-gate * by forcing node to panic.
6550Sstevel@tonic-gate * 205 - Unsuccessful. Start another reconfig cycle.
6560Sstevel@tonic-gate * Problem was encountered that could be fixed by
6570Sstevel@tonic-gate * running another reconfig cycle.
6580Sstevel@tonic-gate * Problem could be a result of a failure to read
6590Sstevel@tonic-gate * the nodelist file or that all work could not be
6600Sstevel@tonic-gate * accomplished in a reconfig step in the amount of
6610Sstevel@tonic-gate * time given so another reconfig cycle is needed in
6620Sstevel@tonic-gate * order to finish the current step.
6630Sstevel@tonic-gate */
6640Sstevel@tonic-gate int
main(int argc,char ** argv)6650Sstevel@tonic-gate main(int argc, char **argv)
6660Sstevel@tonic-gate {
6670Sstevel@tonic-gate mdsetname_t *sp = NULL;
6680Sstevel@tonic-gate md_error_t status = mdnullerror;
6690Sstevel@tonic-gate md_error_t *ep = &status;
6700Sstevel@tonic-gate set_t max_sets, setno;
6710Sstevel@tonic-gate int c, clust = 0;
6720Sstevel@tonic-gate struct sigaction nsa, osa;
6730Sstevel@tonic-gate struct step_t *step_ptr;
6740Sstevel@tonic-gate mdsetname_t *local_sp = NULL;
6750Sstevel@tonic-gate md_drive_desc *dd;
6760Sstevel@tonic-gate int rval = 0;
6770Sstevel@tonic-gate md_set_desc *sd;
6780Sstevel@tonic-gate mddb_block_parm_t mbp;
6790Sstevel@tonic-gate uint_t debug = 3; /* log upto MC_LOG3 by default */
6800Sstevel@tonic-gate int version_table_size;
6810Sstevel@tonic-gate mddb_setflags_config_t sf;
6820Sstevel@tonic-gate int ret_val;
6830Sstevel@tonic-gate mddb_config_t cfg;
6840Sstevel@tonic-gate int set_info[MD_MAXSETS];
6853073Sjkennedy long commd_timeout = 0;
6860Sstevel@tonic-gate
6870Sstevel@tonic-gate /*
6880Sstevel@tonic-gate * Get the locale set up before calling any other routines
6890Sstevel@tonic-gate * with messages to ouput. Just in case we're not in a build
6900Sstevel@tonic-gate * environment, make sure that TEXT_DOMAIN gets set to
6910Sstevel@tonic-gate * something.
6920Sstevel@tonic-gate */
6930Sstevel@tonic-gate #if !defined(TEXT_DOMAIN)
6940Sstevel@tonic-gate #define TEXT_DOMAIN "SYS_TEST"
6950Sstevel@tonic-gate #endif
6960Sstevel@tonic-gate (void) setlocale(LC_ALL, "");
6970Sstevel@tonic-gate (void) textdomain(TEXT_DOMAIN);
6980Sstevel@tonic-gate
6990Sstevel@tonic-gate if ((clust = sdssc_bind_library()) == SDSSC_ERROR) {
7000Sstevel@tonic-gate md_eprintf(gettext("Interface error with libsds_sc.so\n"));
7010Sstevel@tonic-gate exit(1);
7020Sstevel@tonic-gate }
7030Sstevel@tonic-gate
7040Sstevel@tonic-gate if (md_init(argc, argv, 1, 1, ep) != 0 || meta_check_root(ep) != 0) {
7050Sstevel@tonic-gate mde_perror(ep, "");
7060Sstevel@tonic-gate md_exit(sp, 1);
7070Sstevel@tonic-gate }
7080Sstevel@tonic-gate
7090Sstevel@tonic-gate /*
7100Sstevel@tonic-gate * open log and enable libmeta logging. Do it here explicitly
7110Sstevel@tonic-gate * rather than letting md_init() do it because we are not really
7120Sstevel@tonic-gate * a daemon and that is what md_init() opens the log as.
7130Sstevel@tonic-gate */
7140Sstevel@tonic-gate openlog("metaclust", LOG_CONS, LOG_USER);
7150Sstevel@tonic-gate
7160Sstevel@tonic-gate version_table_size = sizeof (version_table) / sizeof (version_table[0]);
7170Sstevel@tonic-gate
7180Sstevel@tonic-gate optind = 1;
7190Sstevel@tonic-gate opterr = 0;
7200Sstevel@tonic-gate while ((c = getopt(argc, argv, "hd:V:t:?")) != -1) {
7210Sstevel@tonic-gate switch (c) {
7220Sstevel@tonic-gate case 'h':
7230Sstevel@tonic-gate usage(sp, 0);
7240Sstevel@tonic-gate break;
7250Sstevel@tonic-gate
7260Sstevel@tonic-gate case 'd':
7270Sstevel@tonic-gate if (sscanf(optarg, "%u", &debug) != 1) {
7280Sstevel@tonic-gate md_eprintf(gettext("Invalid debug level\n"));
7290Sstevel@tonic-gate md_exit(sp, 1);
7300Sstevel@tonic-gate } else if ((debug < 1) || (debug > MAX_DEBUG_LEVEL)) {
7310Sstevel@tonic-gate debug = min(max(debug, 1), MAX_DEBUG_LEVEL);
7320Sstevel@tonic-gate md_eprintf(gettext("Debug level must be "
7330Sstevel@tonic-gate "between 1 and %d inclusive.\n"),
7340Sstevel@tonic-gate MAX_DEBUG_LEVEL);
7350Sstevel@tonic-gate md_eprintf(gettext("Debug level set to %d.\n"),
7360Sstevel@tonic-gate debug);
7370Sstevel@tonic-gate }
7380Sstevel@tonic-gate break;
7390Sstevel@tonic-gate
7400Sstevel@tonic-gate case 'V':
7410Sstevel@tonic-gate version = Strdup(optarg);
7420Sstevel@tonic-gate break;
7430Sstevel@tonic-gate
7440Sstevel@tonic-gate case 't':
7450Sstevel@tonic-gate if (sscanf(optarg, "%u", &timeout) != 1) {
7460Sstevel@tonic-gate md_eprintf(gettext("Invalid timeout value\n"));
7470Sstevel@tonic-gate md_exit(sp, 1);
7480Sstevel@tonic-gate }
7490Sstevel@tonic-gate break;
7500Sstevel@tonic-gate
7510Sstevel@tonic-gate case '?':
7520Sstevel@tonic-gate if (optopt == '?') {
7530Sstevel@tonic-gate usage(sp, 0);
7540Sstevel@tonic-gate } else if (optopt == 'V') {
7550Sstevel@tonic-gate int i;
7560Sstevel@tonic-gate
75711053SSurya.Prakki@Sun.COM (void) fprintf(stdout, gettext(
7580Sstevel@tonic-gate "%s: Versions Supported:"), myname);
7590Sstevel@tonic-gate for (i = 0; i < version_table_size; i++) {
76011053SSurya.Prakki@Sun.COM (void) fprintf(stdout, " %s",
7610Sstevel@tonic-gate version_table[i]);
7620Sstevel@tonic-gate }
76311053SSurya.Prakki@Sun.COM (void) fprintf(stdout, "\n");
7640Sstevel@tonic-gate md_exit(sp, 0);
7650Sstevel@tonic-gate }
7660Sstevel@tonic-gate /*FALLTHROUGH*/
7670Sstevel@tonic-gate
7680Sstevel@tonic-gate default:
7690Sstevel@tonic-gate usage(sp, 1);
7700Sstevel@tonic-gate break;
7710Sstevel@tonic-gate }
7720Sstevel@tonic-gate }
7730Sstevel@tonic-gate
7740Sstevel@tonic-gate /* initialise the debug level and start time */
7750Sstevel@tonic-gate setup_mc_log(debug);
7760Sstevel@tonic-gate
7770Sstevel@tonic-gate /*
7780Sstevel@tonic-gate * check that the version specified (if any) is supported.
7790Sstevel@tonic-gate */
7800Sstevel@tonic-gate if (version != NULL) {
7810Sstevel@tonic-gate int i, found = 0;
7820Sstevel@tonic-gate
7830Sstevel@tonic-gate for (i = 0; i < version_table_size; i++) {
7840Sstevel@tonic-gate if (strcmp(version, version_table[i]) == 0) {
7850Sstevel@tonic-gate found = 1;
7860Sstevel@tonic-gate break;
7870Sstevel@tonic-gate }
7880Sstevel@tonic-gate }
7890Sstevel@tonic-gate if (!found) {
7900Sstevel@tonic-gate md_eprintf(gettext("Version %s not supported\n"),
7910Sstevel@tonic-gate version);
7920Sstevel@tonic-gate md_exit(sp, 1);
7930Sstevel@tonic-gate }
7940Sstevel@tonic-gate }
7950Sstevel@tonic-gate
7960Sstevel@tonic-gate argc -= optind;
7970Sstevel@tonic-gate argv += optind;
7980Sstevel@tonic-gate
7990Sstevel@tonic-gate /* parse arguments */
8000Sstevel@tonic-gate if (argc <= 0) {
8010Sstevel@tonic-gate usage(sp, 1);
8020Sstevel@tonic-gate }
8030Sstevel@tonic-gate
8040Sstevel@tonic-gate /* convert the step name to the corresponding number */
8050Sstevel@tonic-gate step_ptr = bsearch(argv[0], step_table, (sizeof (step_table) /
8060Sstevel@tonic-gate sizeof (step_table[0])), sizeof (step_table[0]), mc_compare);
8070Sstevel@tonic-gate if (step_ptr != NULL) {
8080Sstevel@tonic-gate stepnum = step_ptr->step_num;
8090Sstevel@tonic-gate }
8100Sstevel@tonic-gate
8110Sstevel@tonic-gate --argc;
8120Sstevel@tonic-gate ++argv;
8130Sstevel@tonic-gate
8140Sstevel@tonic-gate /* set timeout alarm signal, a value of 0 will disable timeout */
8150Sstevel@tonic-gate if (timeout > 0) {
8160Sstevel@tonic-gate int stat_loc = 0;
8173074Sjkennedy commd_timeout = (long)(timeout * .75);
8180Sstevel@tonic-gate
8190Sstevel@tonic-gate c_pid = fork();
8200Sstevel@tonic-gate
8210Sstevel@tonic-gate if (c_pid == (pid_t)-1) {
8220Sstevel@tonic-gate md_perror(gettext("Unable to fork"));
8230Sstevel@tonic-gate md_exit(sp, 1);
8240Sstevel@tonic-gate } else if (c_pid) {
8250Sstevel@tonic-gate /* parent */
8260Sstevel@tonic-gate nsa.sa_flags = 0;
8270Sstevel@tonic-gate if (sigfillset(&nsa.sa_mask) < 0) {
8280Sstevel@tonic-gate md_perror(gettext("Unable to set signal mask"));
8290Sstevel@tonic-gate md_exit(sp, 1);
8300Sstevel@tonic-gate }
8310Sstevel@tonic-gate
8320Sstevel@tonic-gate nsa.sa_handler = sigalarmhandler;
8330Sstevel@tonic-gate if (sigaction(SIGALRM, &nsa, &osa) == -1) {
8340Sstevel@tonic-gate md_perror(gettext("Unable to set alarm "
8350Sstevel@tonic-gate "handler"));
8360Sstevel@tonic-gate md_exit(sp, 1);
8370Sstevel@tonic-gate }
8380Sstevel@tonic-gate
8390Sstevel@tonic-gate (void) alarm(timeout);
8400Sstevel@tonic-gate
8410Sstevel@tonic-gate /*
8420Sstevel@tonic-gate * wait for child to exit or timeout to expire.
8430Sstevel@tonic-gate * keep retrying if the call is interrupted
8440Sstevel@tonic-gate */
8450Sstevel@tonic-gate while ((ret_val = waitpid(c_pid, &stat_loc, 0)) < 0) {
8460Sstevel@tonic-gate if (errno != EINTR) {
8470Sstevel@tonic-gate break;
8480Sstevel@tonic-gate }
8490Sstevel@tonic-gate }
8500Sstevel@tonic-gate if (ret_val == c_pid) {
8510Sstevel@tonic-gate /* exit with the childs exit value */
8520Sstevel@tonic-gate exit(WEXITSTATUS(stat_loc));
8530Sstevel@tonic-gate } else if (errno == ECHILD) {
8540Sstevel@tonic-gate md_exit(sp, 0);
8550Sstevel@tonic-gate } else {
8560Sstevel@tonic-gate perror(myname);
8570Sstevel@tonic-gate md_exit(sp, 1);
8580Sstevel@tonic-gate }
8590Sstevel@tonic-gate }
8600Sstevel@tonic-gate }
8610Sstevel@tonic-gate
8620Sstevel@tonic-gate /*
8630Sstevel@tonic-gate * If a timeout value is given, everything from this point onwards is
8640Sstevel@tonic-gate * executed in the child process.
8650Sstevel@tonic-gate */
8660Sstevel@tonic-gate
8670Sstevel@tonic-gate switch (stepnum) {
8680Sstevel@tonic-gate case MC_START:
8690Sstevel@tonic-gate /*
8700Sstevel@tonic-gate * Start Step
8710Sstevel@tonic-gate *
8720Sstevel@tonic-gate * - Suspend all rpc.mdcommd messages
8730Sstevel@tonic-gate */
8740Sstevel@tonic-gate
8750Sstevel@tonic-gate /* expect the local node id to be given only */
8760Sstevel@tonic-gate if (argc != 1)
8770Sstevel@tonic-gate usage(sp, 1);
8780Sstevel@tonic-gate
8790Sstevel@tonic-gate meta_mc_log(MC_LOG2, gettext("Starting Start step: %s"),
8800Sstevel@tonic-gate meta_print_hrtime(0));
8810Sstevel@tonic-gate
8820Sstevel@tonic-gate /*
883*11684SRay.Hassan@Sun.COM * With multinode disksets configured we need to
884*11684SRay.Hassan@Sun.COM * update all replicas on all cluster nodes to have
885*11684SRay.Hassan@Sun.COM * the same status. If local replicas on a cluster
886*11684SRay.Hassan@Sun.COM * node are not accessible we need to panic this
887*11684SRay.Hassan@Sun.COM * node, otherwise we abort in the reconfig cycle
888*11684SRay.Hassan@Sun.COM * and failfast/reboot the "good" cluster node too.
889*11684SRay.Hassan@Sun.COM * To avoid a total cluster outage in the above case
890*11684SRay.Hassan@Sun.COM * we panic only the failing node via md_exit(.., 1).
8910Sstevel@tonic-gate */
8920Sstevel@tonic-gate if ((local_sp = load_local_set(ep)) == NULL) {
893*11684SRay.Hassan@Sun.COM /* panic the node */
894*11684SRay.Hassan@Sun.COM md_exit(local_sp, 1);
8950Sstevel@tonic-gate }
8960Sstevel@tonic-gate
8970Sstevel@tonic-gate if ((max_sets = get_max_sets(ep)) == 0) {
8980Sstevel@tonic-gate mde_perror(ep, "");
8990Sstevel@tonic-gate md_exit(sp, 1);
9000Sstevel@tonic-gate }
9010Sstevel@tonic-gate
9020Sstevel@tonic-gate /* start walking through all possible disksets */
9030Sstevel@tonic-gate for (setno = 1; setno < max_sets; setno++) {
9040Sstevel@tonic-gate if ((sp = metasetnosetname(setno, ep)) == NULL) {
9050Sstevel@tonic-gate if (mdiserror(ep, MDE_NO_SET)) {
9060Sstevel@tonic-gate /* No set for this setno - continue */
9070Sstevel@tonic-gate mdclrerror(ep);
9080Sstevel@tonic-gate continue;
9090Sstevel@tonic-gate } else {
9100Sstevel@tonic-gate mde_perror(ep, gettext("Unable to "
9110Sstevel@tonic-gate "get set %d information"), setno);
9120Sstevel@tonic-gate md_exit(sp, 1);
9130Sstevel@tonic-gate }
9140Sstevel@tonic-gate }
9150Sstevel@tonic-gate
9160Sstevel@tonic-gate /* only check multi-node disksets */
9170Sstevel@tonic-gate if (!meta_is_mn_set(sp, ep)) {
9180Sstevel@tonic-gate mdclrerror(ep);
9190Sstevel@tonic-gate continue;
9200Sstevel@tonic-gate }
9210Sstevel@tonic-gate
9220Sstevel@tonic-gate meta_mc_log(MC_LOG3, gettext("Start - block parse "
9230Sstevel@tonic-gate "messages for set %s: %s"), sp->setname,
9240Sstevel@tonic-gate meta_print_hrtime(gethrtime() - start_time));
9250Sstevel@tonic-gate
9260Sstevel@tonic-gate /*
9270Sstevel@tonic-gate * Mddb parse messages are sent amongst the nodes
9280Sstevel@tonic-gate * in a diskset whenever the locator block or
9290Sstevel@tonic-gate * locator names structure has been changed.
9300Sstevel@tonic-gate * A locator block change could occur as a result
9310Sstevel@tonic-gate * of a disk failure during the reconfig cycle,
9320Sstevel@tonic-gate * so block the mddb parse messages while the
9330Sstevel@tonic-gate * rpc.mdcommd is suspended during the reconfig cycle.
9340Sstevel@tonic-gate */
9350Sstevel@tonic-gate if (s_ownset(sp->setno, ep) == MD_SETOWNER_YES) {
9360Sstevel@tonic-gate (void) memset(&mbp, 0, sizeof (mbp));
9370Sstevel@tonic-gate mbp.c_setno = setno;
9380Sstevel@tonic-gate mbp.c_blk_flags = MDDB_BLOCK_PARSE;
9390Sstevel@tonic-gate if (metaioctl(MD_MN_MDDB_BLOCK, &mbp,
9400Sstevel@tonic-gate &mbp.c_mde, NULL)) {
94111053SSurya.Prakki@Sun.COM (void) mdstealerror(ep, &mbp.c_mde);
9420Sstevel@tonic-gate mde_perror(ep, gettext("Could not "
9430Sstevel@tonic-gate "block set %s"), sp->setname);
9440Sstevel@tonic-gate md_exit(sp, 1);
9450Sstevel@tonic-gate }
9460Sstevel@tonic-gate }
9470Sstevel@tonic-gate
9480Sstevel@tonic-gate /* suspend commd and spin waiting for drain */
9490Sstevel@tonic-gate while ((ret_val = mdmn_suspend(setno,
9503073Sjkennedy MD_COMM_ALL_CLASSES, commd_timeout)) ==
9510Sstevel@tonic-gate MDE_DS_COMMDCTL_SUSPEND_NYD) {
95211053SSurya.Prakki@Sun.COM (void) sleep(1);
9530Sstevel@tonic-gate }
9540Sstevel@tonic-gate
9550Sstevel@tonic-gate if (ret_val) {
9560Sstevel@tonic-gate md_eprintf(gettext("Could not suspend "
9570Sstevel@tonic-gate "rpc.mdcommd for set %s\n"), sp->setname);
9580Sstevel@tonic-gate md_exit(sp, 1);
9590Sstevel@tonic-gate }
9600Sstevel@tonic-gate
9610Sstevel@tonic-gate /*
9620Sstevel@tonic-gate * Set start step flag for set. This is set to indicate
96346Sskamm * that this node entered the reconfig cycle through
96446Sskamm * the start step. This is used during the reconfig
96546Sskamm * cycle to determine whether the node had entered
96646Sskamm * through the start step or the return step.
9670Sstevel@tonic-gate */
9680Sstevel@tonic-gate (void) memset(&sf, 0, sizeof (sf));
9690Sstevel@tonic-gate sf.sf_setno = sp->setno;
9700Sstevel@tonic-gate sf.sf_setflags = MD_SET_MN_START_RC;
9710Sstevel@tonic-gate sf.sf_flags = MDDB_NM_SET;
9720Sstevel@tonic-gate /* Use magic to help protect ioctl against attack. */
9730Sstevel@tonic-gate sf.sf_magic = MDDB_SETFLAGS_MAGIC;
9740Sstevel@tonic-gate if (metaioctl(MD_MN_SET_SETFLAGS, &sf,
9750Sstevel@tonic-gate &sf.sf_mde, NULL)) {
97611053SSurya.Prakki@Sun.COM (void) mdstealerror(ep, &sf.sf_mde);
9770Sstevel@tonic-gate mde_perror(ep, gettext("Could not set "
9780Sstevel@tonic-gate "start_step flag for set %s"), sp->setname);
9790Sstevel@tonic-gate md_exit(sp, 1);
9800Sstevel@tonic-gate }
9810Sstevel@tonic-gate
9820Sstevel@tonic-gate }
9830Sstevel@tonic-gate
9840Sstevel@tonic-gate meta_mc_log(MC_LOG2, gettext("Start step completed: %s"),
9850Sstevel@tonic-gate meta_print_hrtime(gethrtime() - start_time));
9860Sstevel@tonic-gate
9870Sstevel@tonic-gate break;
9880Sstevel@tonic-gate
9890Sstevel@tonic-gate case MC_STOP:
9900Sstevel@tonic-gate /*
9910Sstevel@tonic-gate * Stop Step
9920Sstevel@tonic-gate *
9930Sstevel@tonic-gate * - ???
9940Sstevel@tonic-gate */
9950Sstevel@tonic-gate
9960Sstevel@tonic-gate /* don't expect any more arguments to follow the step name */
9970Sstevel@tonic-gate if (argc != 0)
9980Sstevel@tonic-gate usage(sp, 1);
9990Sstevel@tonic-gate
10000Sstevel@tonic-gate break;
10010Sstevel@tonic-gate
10020Sstevel@tonic-gate case MC_ABORT:
10030Sstevel@tonic-gate /*
10040Sstevel@tonic-gate * Abort Step
10050Sstevel@tonic-gate *
10060Sstevel@tonic-gate * - Abort rpc.mdcommd
10070Sstevel@tonic-gate */
10080Sstevel@tonic-gate
10090Sstevel@tonic-gate /* don't expect any more arguments to follow the step name */
10100Sstevel@tonic-gate if (argc != 0)
10110Sstevel@tonic-gate usage(sp, 1);
10120Sstevel@tonic-gate
10130Sstevel@tonic-gate meta_mc_log(MC_LOG2, gettext("Starting Abort step: %s"),
10140Sstevel@tonic-gate meta_print_hrtime(0));
10150Sstevel@tonic-gate
10160Sstevel@tonic-gate /*
10170Sstevel@tonic-gate * Does local set exist? If not, exit with 0
10180Sstevel@tonic-gate * since there's no reason to have this node panic if
10190Sstevel@tonic-gate * the local set cannot be started.
10200Sstevel@tonic-gate */
10210Sstevel@tonic-gate if ((local_sp = load_local_set(ep)) == NULL) {
10220Sstevel@tonic-gate md_exit(local_sp, 0);
10230Sstevel@tonic-gate }
10240Sstevel@tonic-gate
10250Sstevel@tonic-gate /*
10260Sstevel@tonic-gate * abort the rpc.mdcommd. The abort is only issued on this node
10270Sstevel@tonic-gate * meaning that the abort reconfig step is called on this
10280Sstevel@tonic-gate * node before a panic while the rest of the cluster will
10290Sstevel@tonic-gate * undergo a reconfig cycle.
10300Sstevel@tonic-gate * There is no time relation between this node running a
10310Sstevel@tonic-gate * reconfig abort and the the rest of the cluster
10320Sstevel@tonic-gate * running a reconfig cycle meaning that this node may
10330Sstevel@tonic-gate * panic before, during or after the cluster has run
10340Sstevel@tonic-gate * a reconfig cycle.
10350Sstevel@tonic-gate */
10360Sstevel@tonic-gate mdmn_abort();
10370Sstevel@tonic-gate
10380Sstevel@tonic-gate meta_mc_log(MC_LOG2, gettext("Abort step completed: %s"),
10390Sstevel@tonic-gate meta_print_hrtime(gethrtime() - start_time));
10400Sstevel@tonic-gate
10410Sstevel@tonic-gate break;
10420Sstevel@tonic-gate
10430Sstevel@tonic-gate case MC_RETURN:
10440Sstevel@tonic-gate /*
10450Sstevel@tonic-gate * Return Step
10460Sstevel@tonic-gate *
10470Sstevel@tonic-gate * - Grab local set lock, issue rpc.mdcommd DRAIN ALL
10480Sstevel@tonic-gate * and release local set lock. Grabbing the local set
10490Sstevel@tonic-gate * lock allows any active metaset/metadb commands to
10500Sstevel@tonic-gate * terminate gracefully and will keep a metaset/metadb
10510Sstevel@tonic-gate * command from starting until the DRAIN ALL is issued.
10520Sstevel@tonic-gate * The metaset/metadb commands can issue
10530Sstevel@tonic-gate * DRAIN ALL/RESUME ALL commands to rpc.mdcommd,
10540Sstevel@tonic-gate * so the return step must not issue the DRAIN ALL command
10550Sstevel@tonic-gate * until metaset/metadb have finished or metaset may issue
10560Sstevel@tonic-gate * a RESUME ALL after this return reconfig step has issued
10570Sstevel@tonic-gate * the DRAIN ALL command.
10580Sstevel@tonic-gate * After this reconfig step has issued the DRAIN_ALL and
10590Sstevel@tonic-gate * released the local set lock, metaset/metadb will fail
10600Sstevel@tonic-gate * when attempting to contact the rpc.mdcommd and will
10610Sstevel@tonic-gate * terminate without making any configuration changes.
10620Sstevel@tonic-gate * The DRAIN ALL command will keep all other meta* commands
10630Sstevel@tonic-gate * from running during the reconfig cycle (these commands
10640Sstevel@tonic-gate * will wait until the rpc.mdcommd is resumed) since the
10650Sstevel@tonic-gate * reconfig cycle may be changing the diskset configuration.
10660Sstevel@tonic-gate */
10670Sstevel@tonic-gate
10680Sstevel@tonic-gate /* expect the nodelist to follow the step name */
10690Sstevel@tonic-gate if (argc < 1)
10700Sstevel@tonic-gate usage(sp, 1);
10710Sstevel@tonic-gate
10720Sstevel@tonic-gate meta_mc_log(MC_LOG2, gettext("Starting Return step: %s"),
10730Sstevel@tonic-gate meta_print_hrtime(0));
10740Sstevel@tonic-gate
10750Sstevel@tonic-gate /*
10760Sstevel@tonic-gate * Does local set exist? If not, exit with 0
10770Sstevel@tonic-gate * since there's no reason to have this node panic if
10780Sstevel@tonic-gate * the local set cannot be started.
10790Sstevel@tonic-gate */
10800Sstevel@tonic-gate if ((local_sp = load_local_set(ep)) == NULL) {
10810Sstevel@tonic-gate md_exit(local_sp, 0);
10820Sstevel@tonic-gate }
10830Sstevel@tonic-gate
10840Sstevel@tonic-gate /*
10850Sstevel@tonic-gate * Suspend any mirror resyncs that are in progress. This
10860Sstevel@tonic-gate * stops unnecessary timeouts.
10870Sstevel@tonic-gate */
10880Sstevel@tonic-gate meta_mirror_resync_block_all();
10890Sstevel@tonic-gate
10900Sstevel@tonic-gate if (meta_lock(local_sp, TRUE, ep) != 0) {
10910Sstevel@tonic-gate mde_perror(ep, "");
10920Sstevel@tonic-gate md_exit(local_sp, 1);
10930Sstevel@tonic-gate }
10940Sstevel@tonic-gate
10950Sstevel@tonic-gate /*
10960Sstevel@tonic-gate * All metaset and metadb commands on this node have now
10970Sstevel@tonic-gate * terminated gracefully. Now, issue a drain all to
10980Sstevel@tonic-gate * the rpc.mdcommd. Any meta command issued after the
10990Sstevel@tonic-gate * drain all will either spin sending the command to the
11000Sstevel@tonic-gate * master until after the reconfig cycle has finished OR
11010Sstevel@tonic-gate * will terminate gracefully (metaset/metadb).
11020Sstevel@tonic-gate */
11030Sstevel@tonic-gate if ((max_sets = get_max_sets(ep)) == 0) {
11040Sstevel@tonic-gate mde_perror(ep, "");
11050Sstevel@tonic-gate md_exit(sp, 1);
11060Sstevel@tonic-gate }
11070Sstevel@tonic-gate
11080Sstevel@tonic-gate /* start walking through all possible disksets */
11090Sstevel@tonic-gate for (setno = 1; setno < max_sets; setno++) {
11100Sstevel@tonic-gate if ((sp = metasetnosetname(setno, ep)) == NULL) {
11110Sstevel@tonic-gate if (mdiserror(ep, MDE_NO_SET)) {
11120Sstevel@tonic-gate /* No set for this setno - continue */
11130Sstevel@tonic-gate mdclrerror(ep);
11140Sstevel@tonic-gate continue;
11150Sstevel@tonic-gate } else {
11160Sstevel@tonic-gate mde_perror(ep, gettext("Unable to "
11170Sstevel@tonic-gate "get set %d information"), setno);
11180Sstevel@tonic-gate md_exit(sp, 1);
11190Sstevel@tonic-gate }
11200Sstevel@tonic-gate }
11210Sstevel@tonic-gate
11220Sstevel@tonic-gate /* only check multi-node disksets */
11230Sstevel@tonic-gate if (!meta_is_mn_set(sp, ep)) {
11240Sstevel@tonic-gate mdclrerror(ep);
11250Sstevel@tonic-gate continue;
11260Sstevel@tonic-gate }
11270Sstevel@tonic-gate
11280Sstevel@tonic-gate meta_mc_log(MC_LOG3, gettext("Return - block parse "
11290Sstevel@tonic-gate "messages for set %s: %s"), sp->setname,
11300Sstevel@tonic-gate meta_print_hrtime(gethrtime() - start_time));
11310Sstevel@tonic-gate
11320Sstevel@tonic-gate /*
11330Sstevel@tonic-gate * Mddb parse messages are sent amongst the nodes
11340Sstevel@tonic-gate * in a diskset whenever the locator block or
11350Sstevel@tonic-gate * locator names structure has been changed.
11360Sstevel@tonic-gate * A locator block change could occur as a result
11370Sstevel@tonic-gate * of a disk failure during the reconfig cycle,
11380Sstevel@tonic-gate * so block the mddb parse messages while the
11390Sstevel@tonic-gate * rpc.commd is suspended during the reconfig cycle.
11400Sstevel@tonic-gate */
11410Sstevel@tonic-gate if (s_ownset(sp->setno, ep) == MD_SETOWNER_YES) {
11420Sstevel@tonic-gate (void) memset(&mbp, 0, sizeof (mbp));
11430Sstevel@tonic-gate mbp.c_setno = setno;
11440Sstevel@tonic-gate mbp.c_blk_flags = MDDB_BLOCK_PARSE;
11450Sstevel@tonic-gate if (metaioctl(MD_MN_MDDB_BLOCK, &mbp,
11460Sstevel@tonic-gate &mbp.c_mde, NULL)) {
114711053SSurya.Prakki@Sun.COM (void) mdstealerror(ep, &mbp.c_mde);
11480Sstevel@tonic-gate mde_perror(ep, gettext("Could not "
11490Sstevel@tonic-gate "block set %s"), sp->setname);
11500Sstevel@tonic-gate md_exit(sp, 1);
11510Sstevel@tonic-gate }
11520Sstevel@tonic-gate }
11530Sstevel@tonic-gate
11540Sstevel@tonic-gate /* suspend commd and spin waiting for drain */
11550Sstevel@tonic-gate while ((ret_val = mdmn_suspend(setno,
11563073Sjkennedy MD_COMM_ALL_CLASSES, commd_timeout)) ==
11570Sstevel@tonic-gate MDE_DS_COMMDCTL_SUSPEND_NYD) {
115811053SSurya.Prakki@Sun.COM (void) sleep(1);
11590Sstevel@tonic-gate }
11600Sstevel@tonic-gate
11610Sstevel@tonic-gate if (ret_val) {
11620Sstevel@tonic-gate md_eprintf(gettext("Could not suspend "
11630Sstevel@tonic-gate "rpc.mdcommd for set %s\n"), sp->setname);
11640Sstevel@tonic-gate md_exit(sp, 1);
11650Sstevel@tonic-gate }
11660Sstevel@tonic-gate }
11670Sstevel@tonic-gate /*
11680Sstevel@tonic-gate * Resume all I/Os for this node for all MN sets in
11690Sstevel@tonic-gate * case master node had suspended I/Os but panic'd
11700Sstevel@tonic-gate * before resuming I/Os. In case of failure, exit
11710Sstevel@tonic-gate * with a 1 since unable to resume I/Os on this node.
11720Sstevel@tonic-gate */
11730Sstevel@tonic-gate if (clnt_mn_susp_res_io(mynode(), 0, MN_RES_IO, ep)) {
11740Sstevel@tonic-gate mde_perror(ep, gettext(
11750Sstevel@tonic-gate "Unable to resume I/O on node %s for all sets"),
11760Sstevel@tonic-gate mynode());
11770Sstevel@tonic-gate md_exit(sp, 1);
11780Sstevel@tonic-gate }
11790Sstevel@tonic-gate
11800Sstevel@tonic-gate
11810Sstevel@tonic-gate /*
11820Sstevel@tonic-gate * Can now unlock local set lock. New metaset/metadb
11830Sstevel@tonic-gate * commands are now held off using drain all.
11840Sstevel@tonic-gate */
11850Sstevel@tonic-gate (void) meta_unlock(local_sp, ep);
11860Sstevel@tonic-gate
11870Sstevel@tonic-gate meta_mc_log(MC_LOG2, gettext("Return step completed: %s"),
11880Sstevel@tonic-gate meta_print_hrtime(gethrtime() - start_time));
11890Sstevel@tonic-gate
11900Sstevel@tonic-gate break;
11910Sstevel@tonic-gate
11920Sstevel@tonic-gate case MC_STEP1:
11930Sstevel@tonic-gate /*
11940Sstevel@tonic-gate * Step 1
11950Sstevel@tonic-gate *
11960Sstevel@tonic-gate * - Populate nodelist file if we are on clustering
11970Sstevel@tonic-gate * and pick a master node for each MN diskset.
11980Sstevel@tonic-gate */
11990Sstevel@tonic-gate
12000Sstevel@tonic-gate /* expect the nodelist to follow the step name */
12010Sstevel@tonic-gate if (argc < 1)
12020Sstevel@tonic-gate usage(sp, 1);
12030Sstevel@tonic-gate
12040Sstevel@tonic-gate meta_mc_log(MC_LOG2, gettext("Starting Step1: %s"),
12050Sstevel@tonic-gate meta_print_hrtime(0));
12060Sstevel@tonic-gate
12070Sstevel@tonic-gate /* Always write nodelist file even if no local set exists */
12080Sstevel@tonic-gate if (clust == SDSSC_OKAY) {
12090Sstevel@tonic-gate /* skip to the nodelist args */
12100Sstevel@tonic-gate if (meta_write_nodelist(argc, argv, ep) != 0) {
12110Sstevel@tonic-gate mde_perror(ep, gettext(
12120Sstevel@tonic-gate "Could not populate nodelist file"));
12130Sstevel@tonic-gate md_exit(sp, 1);
12140Sstevel@tonic-gate }
12150Sstevel@tonic-gate }
12160Sstevel@tonic-gate
12170Sstevel@tonic-gate /*
12180Sstevel@tonic-gate * Does local set exist? If not, exit with 0
12190Sstevel@tonic-gate * since there's no reason to have this node panic if
12200Sstevel@tonic-gate * the local set cannot be started.
12210Sstevel@tonic-gate */
12220Sstevel@tonic-gate if ((local_sp = load_local_set(ep)) == NULL) {
12230Sstevel@tonic-gate md_exit(local_sp, 0);
12240Sstevel@tonic-gate }
12250Sstevel@tonic-gate
12260Sstevel@tonic-gate /*
12270Sstevel@tonic-gate * At this point, all meta* commands are blocked across
12280Sstevel@tonic-gate * all disksets since the master rpc.mdcommd has drained or
12290Sstevel@tonic-gate * the master node has died.
12300Sstevel@tonic-gate * If a metaset or metadb command had been in progress
12310Sstevel@tonic-gate * at the start of the reconfig cycle, this command has
12320Sstevel@tonic-gate * either completed or it has been terminated due to
12330Sstevel@tonic-gate * the death of the master node.
12340Sstevel@tonic-gate *
12350Sstevel@tonic-gate * This means that that it is now ok to remove any
12360Sstevel@tonic-gate * outstanding clnt_locks associated with multinode
12370Sstevel@tonic-gate * disksets on this node due to a node panic during
12380Sstevel@tonic-gate * a metaset operation. This allows the routines that
12390Sstevel@tonic-gate * choose the master to use rpc.metad to determine the
12400Sstevel@tonic-gate * master of the diskset.
12410Sstevel@tonic-gate */
12420Sstevel@tonic-gate if (clnt_clr_mnsetlock(mynode(), ep) != 0) {
12430Sstevel@tonic-gate meta_mc_log(MC_LOG2, gettext("Step1 aborted:"
12440Sstevel@tonic-gate "clear locks failed %s"),
12450Sstevel@tonic-gate meta_print_hrtime(gethrtime() - start_time));
12460Sstevel@tonic-gate md_exit(local_sp, 1);
12470Sstevel@tonic-gate }
12480Sstevel@tonic-gate
12490Sstevel@tonic-gate /*
12500Sstevel@tonic-gate * Call reconfig_choose_master to choose a master for
12510Sstevel@tonic-gate * each MN diskset, update the nodelist for each diskset
12520Sstevel@tonic-gate * given the member information and send a reinit message
12530Sstevel@tonic-gate * to rpc.mdcommd to reload the nodelist.
12540Sstevel@tonic-gate */
12553073Sjkennedy rval = meta_reconfig_choose_master(commd_timeout, ep);
12560Sstevel@tonic-gate if (rval == 205) {
12570Sstevel@tonic-gate /*
12580Sstevel@tonic-gate * NOTE: Should issue call to reboot remote host that
12590Sstevel@tonic-gate * is causing the RPC failure. Clustering to
12600Sstevel@tonic-gate * provide interface in the future. This should
12610Sstevel@tonic-gate * stop a never-ending set of 205 reconfig cycles.
12620Sstevel@tonic-gate * Remote host causing failure is stored in
12630Sstevel@tonic-gate * ep->host if ep is an RPC error.
12640Sstevel@tonic-gate * if (mdanyrpcerror(ep))
12650Sstevel@tonic-gate * reboot (ep->host);
12660Sstevel@tonic-gate */
12670Sstevel@tonic-gate meta_mc_log(MC_LOG2, gettext("Step1 aborted:"
12680Sstevel@tonic-gate "choose master failure of 205 %s"),
12690Sstevel@tonic-gate meta_print_hrtime(gethrtime() - start_time));
12700Sstevel@tonic-gate md_exit(local_sp, 205);
12710Sstevel@tonic-gate } else if (rval != 0) {
12720Sstevel@tonic-gate meta_mc_log(MC_LOG2, gettext("Step1 failure: "
12730Sstevel@tonic-gate "choose master failure %s"),
12740Sstevel@tonic-gate meta_print_hrtime(gethrtime() - start_time));
12750Sstevel@tonic-gate md_exit(local_sp, 1);
12760Sstevel@tonic-gate }
12770Sstevel@tonic-gate
12780Sstevel@tonic-gate meta_mc_log(MC_LOG2, gettext("Step1 completed: %s"),
12790Sstevel@tonic-gate meta_print_hrtime(gethrtime() - start_time));
12800Sstevel@tonic-gate
12810Sstevel@tonic-gate md_exit(local_sp, rval);
12820Sstevel@tonic-gate break;
12830Sstevel@tonic-gate
12840Sstevel@tonic-gate case MC_STEP2:
12850Sstevel@tonic-gate /*
12860Sstevel@tonic-gate * Step 2
12870Sstevel@tonic-gate *
12880Sstevel@tonic-gate * In Step 2, each node walks the list of disksets. If a
12890Sstevel@tonic-gate * node is a master of a MN diskset, it synchronizes
12900Sstevel@tonic-gate * the local set USER records for that diskset.
12910Sstevel@tonic-gate *
12920Sstevel@tonic-gate * If disks exist in the diskset and there is a joined
12930Sstevel@tonic-gate * (owner) node in the diskset, the master will also:
12940Sstevel@tonic-gate * - synchronize the diskset mddbs to the master
12950Sstevel@tonic-gate * - play the change log
12960Sstevel@tonic-gate *
12970Sstevel@tonic-gate * The master node will now attempt to join any unjoined
12980Sstevel@tonic-gate * nodes that are currently members in the membership list.
12990Sstevel@tonic-gate */
13000Sstevel@tonic-gate
13010Sstevel@tonic-gate /* expect the nodelist to follow the step name */
13020Sstevel@tonic-gate if (argc < 1)
13030Sstevel@tonic-gate usage(sp, 1);
13040Sstevel@tonic-gate
13050Sstevel@tonic-gate meta_mc_log(MC_LOG2, gettext("Starting Step2: %s"),
13060Sstevel@tonic-gate meta_print_hrtime(0));
13070Sstevel@tonic-gate
13080Sstevel@tonic-gate /*
13090Sstevel@tonic-gate * Does local set exist? If not, exit with 0
13100Sstevel@tonic-gate * since there's no reason to have this node panic if
13110Sstevel@tonic-gate * the local set cannot be started.
13120Sstevel@tonic-gate */
13130Sstevel@tonic-gate if ((local_sp = load_local_set(ep)) == NULL) {
13140Sstevel@tonic-gate md_exit(local_sp, 0);
13150Sstevel@tonic-gate }
13160Sstevel@tonic-gate
13170Sstevel@tonic-gate if ((max_sets = get_max_sets(ep)) == 0) {
13180Sstevel@tonic-gate mde_perror(ep, "");
13190Sstevel@tonic-gate md_exit(local_sp, 1);
13200Sstevel@tonic-gate }
13210Sstevel@tonic-gate
13220Sstevel@tonic-gate /* start walking through all possible disksets */
13230Sstevel@tonic-gate for (setno = 1; setno < max_sets; setno++) {
13240Sstevel@tonic-gate if ((sp = metasetnosetname(setno, ep)) == NULL) {
13250Sstevel@tonic-gate if (mdiserror(ep, MDE_NO_SET)) {
13260Sstevel@tonic-gate /* No set for this setno - continue */
13270Sstevel@tonic-gate mdclrerror(ep);
13280Sstevel@tonic-gate continue;
13290Sstevel@tonic-gate } else if (mdanyrpcerror(ep)) {
13300Sstevel@tonic-gate /* Fail on RPC failure to self */
13310Sstevel@tonic-gate mde_perror(ep, gettext(
13320Sstevel@tonic-gate "Unable to get information for "
13330Sstevel@tonic-gate "set number %d"), setno);
13340Sstevel@tonic-gate md_exit(local_sp, 1);
13350Sstevel@tonic-gate } else {
13360Sstevel@tonic-gate mde_perror(ep, gettext(
13370Sstevel@tonic-gate "Unable to get information for "
13380Sstevel@tonic-gate "set number %d"), setno);
13390Sstevel@tonic-gate mdclrerror(ep);
13400Sstevel@tonic-gate continue;
13410Sstevel@tonic-gate }
13420Sstevel@tonic-gate }
13430Sstevel@tonic-gate
13440Sstevel@tonic-gate if ((sd = metaget_setdesc(sp, ep)) == NULL) {
13450Sstevel@tonic-gate if (mdanyrpcerror(ep)) {
13460Sstevel@tonic-gate /* Fail on RPC failure to self */
13470Sstevel@tonic-gate mde_perror(ep, gettext(
13480Sstevel@tonic-gate "Unable to get information for "
13490Sstevel@tonic-gate "set number %d"), setno);
13500Sstevel@tonic-gate md_exit(local_sp, 1);
13510Sstevel@tonic-gate }
13520Sstevel@tonic-gate mde_perror(ep, gettext("Unable to get set "
13530Sstevel@tonic-gate "%s desc information"), sp->setname);
13540Sstevel@tonic-gate mdclrerror(ep);
13550Sstevel@tonic-gate continue;
13560Sstevel@tonic-gate }
13570Sstevel@tonic-gate
13580Sstevel@tonic-gate /* Only check MN disksets */
13590Sstevel@tonic-gate if (!(MD_MNSET_DESC(sd))) {
13600Sstevel@tonic-gate continue;
13610Sstevel@tonic-gate }
13620Sstevel@tonic-gate
13630Sstevel@tonic-gate /* All actions in step 2 are driven by master */
13640Sstevel@tonic-gate if (!(sd->sd_mn_am_i_master)) {
13650Sstevel@tonic-gate continue;
13660Sstevel@tonic-gate }
13670Sstevel@tonic-gate
13680Sstevel@tonic-gate meta_mc_log(MC_LOG3, gettext("Step2 - begin record "
13690Sstevel@tonic-gate "synchronization for set %s: %s"), sp->setname,
13700Sstevel@tonic-gate meta_print_hrtime(gethrtime() - start_time));
13710Sstevel@tonic-gate
13720Sstevel@tonic-gate /*
13730Sstevel@tonic-gate * Synchronize the USER records in the local mddbs
13740Sstevel@tonic-gate * for hosts that are members. The USER records
13750Sstevel@tonic-gate * contain set, drive and host information.
13760Sstevel@tonic-gate */
13770Sstevel@tonic-gate rval = meta_mnsync_user_records(sp, ep);
13780Sstevel@tonic-gate if (rval != 0) {
13790Sstevel@tonic-gate mde_perror(ep, gettext(
13800Sstevel@tonic-gate "Synchronization of user records "
13810Sstevel@tonic-gate "in set %s failed\n"), sp->setname);
13820Sstevel@tonic-gate if (rval == 205) {
13830Sstevel@tonic-gate /*
13840Sstevel@tonic-gate * NOTE: Should issue call to reboot
13850Sstevel@tonic-gate * remote host that is causing the RPC
13860Sstevel@tonic-gate * failure. Clustering to provide
13870Sstevel@tonic-gate * interface in the future. This
13880Sstevel@tonic-gate * should stop a never-ending set of
13890Sstevel@tonic-gate * 205 reconfig cycles.
13900Sstevel@tonic-gate * Remote host causing failure is
13910Sstevel@tonic-gate * stored in ep->host if ep is an
13920Sstevel@tonic-gate * RPC error.
13930Sstevel@tonic-gate * if (mdanyrpcerror(ep))
13940Sstevel@tonic-gate * reboot (ep->host);
13950Sstevel@tonic-gate */
13960Sstevel@tonic-gate md_exit(local_sp, 205);
13970Sstevel@tonic-gate } else {
13980Sstevel@tonic-gate md_exit(local_sp, 1);
13990Sstevel@tonic-gate }
14000Sstevel@tonic-gate }
14010Sstevel@tonic-gate
14020Sstevel@tonic-gate /* Reget sd since sync_user_recs may have flushed it */
14030Sstevel@tonic-gate if ((sd = metaget_setdesc(sp, ep)) == NULL) {
14040Sstevel@tonic-gate mde_perror(ep, gettext("Unable to get set "
14050Sstevel@tonic-gate "%s desc information"), sp->setname);
14060Sstevel@tonic-gate md_exit(local_sp, 1);
14070Sstevel@tonic-gate }
14080Sstevel@tonic-gate
14090Sstevel@tonic-gate dd = metaget_drivedesc(sp,
14100Sstevel@tonic-gate (MD_BASICNAME_OK | PRINT_FAST), ep);
14110Sstevel@tonic-gate if (! mdisok(ep)) {
14120Sstevel@tonic-gate mde_perror(ep, gettext("Unable to get set "
14130Sstevel@tonic-gate "%s drive information"), sp->setname);
14140Sstevel@tonic-gate md_exit(local_sp, 1);
14150Sstevel@tonic-gate }
14160Sstevel@tonic-gate
14170Sstevel@tonic-gate /*
14180Sstevel@tonic-gate * No drives in set, continue to next set.
14190Sstevel@tonic-gate */
14200Sstevel@tonic-gate if (dd == NULL) {
14210Sstevel@tonic-gate /* Done with this set */
14220Sstevel@tonic-gate continue;
14230Sstevel@tonic-gate }
14240Sstevel@tonic-gate
14250Sstevel@tonic-gate meta_mc_log(MC_LOG3, gettext("Step2 - local set user "
14260Sstevel@tonic-gate "records completed for set %s: %s"), sp->setname,
14270Sstevel@tonic-gate meta_print_hrtime(gethrtime() - start_time));
14280Sstevel@tonic-gate
14290Sstevel@tonic-gate /*
14300Sstevel@tonic-gate * Synchronize the diskset mddbs for hosts
14310Sstevel@tonic-gate * that are members. This may involve
14320Sstevel@tonic-gate * playing the changelog and writing out
14330Sstevel@tonic-gate * to the diskset mddbs.
14340Sstevel@tonic-gate */
14350Sstevel@tonic-gate rval = meta_mnsync_diskset_mddbs(sp, ep);
14360Sstevel@tonic-gate if (rval != 0) {
14370Sstevel@tonic-gate mde_perror(ep, gettext(
14380Sstevel@tonic-gate "Synchronization of diskset mddbs "
14390Sstevel@tonic-gate "in set %s failed\n"), sp->setname);
14400Sstevel@tonic-gate meta_mc_log(MC_LOG3, gettext("Step2 - diskset "
14410Sstevel@tonic-gate "mddb synchronization failed for "
14420Sstevel@tonic-gate "set %s: %s"), sp->setname,
14430Sstevel@tonic-gate meta_print_hrtime(gethrtime() -
14440Sstevel@tonic-gate start_time));
14450Sstevel@tonic-gate if (rval == 205) {
14460Sstevel@tonic-gate /*
14470Sstevel@tonic-gate * NOTE: Should issue call to reboot
14480Sstevel@tonic-gate * remote host that is causing the RPC
14490Sstevel@tonic-gate * failure. Clustering to provide
14500Sstevel@tonic-gate * interface in the future. This
14510Sstevel@tonic-gate * should stop a never-ending set of
14520Sstevel@tonic-gate * 205 reconfig cycles.
14530Sstevel@tonic-gate * Remote host causing failure is
14540Sstevel@tonic-gate * stored in ep->host if ep is an
14550Sstevel@tonic-gate * RPC error.
14560Sstevel@tonic-gate * if (mdanyrpcerror(ep))
14570Sstevel@tonic-gate * reboot (ep->host);
14580Sstevel@tonic-gate */
14590Sstevel@tonic-gate md_exit(local_sp, 205);
14600Sstevel@tonic-gate } else if (rval == 1) {
14610Sstevel@tonic-gate continue;
14620Sstevel@tonic-gate } else {
14630Sstevel@tonic-gate md_exit(local_sp, 1);
14640Sstevel@tonic-gate }
14650Sstevel@tonic-gate }
14660Sstevel@tonic-gate
14670Sstevel@tonic-gate meta_mc_log(MC_LOG3, gettext("Step2 - diskset mddb "
14680Sstevel@tonic-gate "synchronization completed for set %s: %s"),
14690Sstevel@tonic-gate sp->setname,
14700Sstevel@tonic-gate meta_print_hrtime(gethrtime() - start_time));
14710Sstevel@tonic-gate
14720Sstevel@tonic-gate /* Join the starting nodes to the diskset */
14730Sstevel@tonic-gate rval = meta_mnjoin_all(sp, ep);
14740Sstevel@tonic-gate if (rval != 0) {
14750Sstevel@tonic-gate mde_perror(ep, gettext(
14760Sstevel@tonic-gate "Join of non-owner (starting) nodes "
14770Sstevel@tonic-gate "in set %s failed\n"), sp->setname);
14780Sstevel@tonic-gate meta_mc_log(MC_LOG3, gettext("Step2 - non owner"
14790Sstevel@tonic-gate "nodes joined for set %s: %s"),
14800Sstevel@tonic-gate sp->setname,
14810Sstevel@tonic-gate meta_print_hrtime(gethrtime() -
14820Sstevel@tonic-gate start_time));
14830Sstevel@tonic-gate if (rval == 205) {
14840Sstevel@tonic-gate /*
14850Sstevel@tonic-gate * NOTE: Should issue call to reboot
14860Sstevel@tonic-gate * remote host that is causing the RPC
14870Sstevel@tonic-gate * failure. Clustering to provide
14880Sstevel@tonic-gate * interface in the future. This
14890Sstevel@tonic-gate * should stop a never-ending set of
14900Sstevel@tonic-gate * 205 reconfig cycles.
14910Sstevel@tonic-gate * Remote host causing failure is
14920Sstevel@tonic-gate * stored in ep->host if ep is an
14930Sstevel@tonic-gate * RPC error.
14940Sstevel@tonic-gate * if (mdanyrpcerror(ep))
14950Sstevel@tonic-gate * reboot (ep->host);
14960Sstevel@tonic-gate */
14970Sstevel@tonic-gate md_exit(local_sp, 205);
14980Sstevel@tonic-gate } else {
14990Sstevel@tonic-gate md_exit(local_sp, 1);
15000Sstevel@tonic-gate }
15010Sstevel@tonic-gate }
15020Sstevel@tonic-gate
15030Sstevel@tonic-gate meta_mc_log(MC_LOG3, gettext("Step2 - non owner nodes "
15040Sstevel@tonic-gate "joined for set %s: %s"), sp->setname,
15050Sstevel@tonic-gate meta_print_hrtime(gethrtime() - start_time));
15060Sstevel@tonic-gate
15070Sstevel@tonic-gate }
15080Sstevel@tonic-gate
15090Sstevel@tonic-gate meta_mc_log(MC_LOG2, gettext("Step2 completed: %s"),
15100Sstevel@tonic-gate meta_print_hrtime(gethrtime() - start_time));
15110Sstevel@tonic-gate
15120Sstevel@tonic-gate break;
15130Sstevel@tonic-gate
15140Sstevel@tonic-gate case MC_STEP3:
15150Sstevel@tonic-gate /*
15160Sstevel@tonic-gate * Step 3
15170Sstevel@tonic-gate *
15180Sstevel@tonic-gate * For all multinode sets do,
15190Sstevel@tonic-gate * - Reinitialise rpc.mdcommd
15200Sstevel@tonic-gate * - Reset mirror owners to null if the current owner is
15210Sstevel@tonic-gate * no longer in the membership list
15220Sstevel@tonic-gate */
15230Sstevel@tonic-gate
15240Sstevel@tonic-gate /* expect the nodelist to follow the step name */
15250Sstevel@tonic-gate if (argc < 1)
15260Sstevel@tonic-gate usage(sp, 1);
15270Sstevel@tonic-gate
15280Sstevel@tonic-gate meta_mc_log(MC_LOG2, gettext("Starting Step3: %s"),
15290Sstevel@tonic-gate meta_print_hrtime(0));
15300Sstevel@tonic-gate
15310Sstevel@tonic-gate /*
15320Sstevel@tonic-gate * Does local set exist? If not, exit with 0
15330Sstevel@tonic-gate * since there's no reason to have this node panic if
15340Sstevel@tonic-gate * the local set cannot be started.
15350Sstevel@tonic-gate */
15360Sstevel@tonic-gate if ((local_sp = load_local_set(ep)) == NULL) {
15370Sstevel@tonic-gate md_exit(local_sp, 0);
15380Sstevel@tonic-gate }
15390Sstevel@tonic-gate
15400Sstevel@tonic-gate /*
15410Sstevel@tonic-gate * walk through all sets on this node which could include:
15420Sstevel@tonic-gate * - MN disksets
15430Sstevel@tonic-gate * - traditional disksets
15440Sstevel@tonic-gate * - non-existent disksets
15450Sstevel@tonic-gate * start mirror resync for all MN sets
15460Sstevel@tonic-gate */
15470Sstevel@tonic-gate if ((max_sets = get_max_sets(ep)) == 0) {
15480Sstevel@tonic-gate mde_perror(ep, "");
15490Sstevel@tonic-gate md_exit(local_sp, 1);
15500Sstevel@tonic-gate }
15510Sstevel@tonic-gate
15520Sstevel@tonic-gate /* start walking through all possible disksets */
15530Sstevel@tonic-gate for (setno = 1; setno < max_sets; setno++) {
15540Sstevel@tonic-gate if ((sp = metasetnosetname(setno, ep)) == NULL) {
15550Sstevel@tonic-gate if (mdiserror(ep, MDE_NO_SET)) {
15560Sstevel@tonic-gate /* No set for this setno - continue */
15570Sstevel@tonic-gate mdclrerror(ep);
15580Sstevel@tonic-gate continue;
15590Sstevel@tonic-gate } else {
15600Sstevel@tonic-gate mde_perror(ep, gettext("Unable to "
15610Sstevel@tonic-gate "get set %d information"), setno);
15620Sstevel@tonic-gate md_exit(local_sp, 1);
15630Sstevel@tonic-gate }
15640Sstevel@tonic-gate }
15650Sstevel@tonic-gate
15660Sstevel@tonic-gate /* only check multi-node disksets */
15670Sstevel@tonic-gate if (!meta_is_mn_set(sp, ep)) {
15680Sstevel@tonic-gate mdclrerror(ep);
15690Sstevel@tonic-gate continue;
15700Sstevel@tonic-gate }
15710Sstevel@tonic-gate
15720Sstevel@tonic-gate if (meta_lock(sp, TRUE, ep) != 0) {
15730Sstevel@tonic-gate mde_perror(ep, "");
15740Sstevel@tonic-gate md_exit(local_sp, 1);
15750Sstevel@tonic-gate }
15760Sstevel@tonic-gate
15770Sstevel@tonic-gate /* If this node isn't joined to set, do nothing */
15780Sstevel@tonic-gate if (s_ownset(sp->setno, ep) != MD_SETOWNER_YES) {
15790Sstevel@tonic-gate if (!mdisok(ep)) {
15800Sstevel@tonic-gate mde_perror(ep, gettext("Could "
15810Sstevel@tonic-gate "not get set %s ownership"),
15820Sstevel@tonic-gate sp->setname);
15830Sstevel@tonic-gate md_exit(sp, 1);
15840Sstevel@tonic-gate }
15850Sstevel@tonic-gate mdclrerror(ep);
158611053SSurya.Prakki@Sun.COM (void) meta_unlock(sp, ep);
15870Sstevel@tonic-gate continue;
15880Sstevel@tonic-gate }
15890Sstevel@tonic-gate
15900Sstevel@tonic-gate meta_mc_log(MC_LOG3, gettext("Step3 - begin "
15910Sstevel@tonic-gate "re-initialising rpc.mdcommd and resetting mirror "
15920Sstevel@tonic-gate "owners for set %s: %s"), sp->setname,
15930Sstevel@tonic-gate meta_print_hrtime(gethrtime() - start_time));
15940Sstevel@tonic-gate
15950Sstevel@tonic-gate /* reinitialzse rpc.mdcommd with new nodelist */
15963073Sjkennedy if (mdmn_reinit_set(setno, commd_timeout)) {
15970Sstevel@tonic-gate md_eprintf(gettext(
15980Sstevel@tonic-gate "Could not re-initialise rpc.mdcommd for "
15990Sstevel@tonic-gate "set %s\n"), sp->setname);
16000Sstevel@tonic-gate md_exit(sp, 1);
16010Sstevel@tonic-gate }
16020Sstevel@tonic-gate
16030Sstevel@tonic-gate (void) memset(&cfg, 0, sizeof (cfg));
16040Sstevel@tonic-gate cfg.c_id = 0;
16050Sstevel@tonic-gate cfg.c_setno = sp->setno;
16060Sstevel@tonic-gate if (metaioctl(MD_DB_GETDEV, &cfg, &cfg.c_mde,
16070Sstevel@tonic-gate NULL) != 0) {
160811053SSurya.Prakki@Sun.COM (void) mdstealerror(ep, &cfg.c_mde);
16090Sstevel@tonic-gate mde_perror(ep, gettext("Could "
16100Sstevel@tonic-gate "not get set %s information"),
16110Sstevel@tonic-gate sp->setname);
16120Sstevel@tonic-gate md_exit(sp, 1);
16130Sstevel@tonic-gate }
16140Sstevel@tonic-gate
16150Sstevel@tonic-gate /* Don't do anything else if set is stale */
16160Sstevel@tonic-gate if (cfg.c_flags & MDDB_C_STALE) {
161711053SSurya.Prakki@Sun.COM (void) meta_unlock(sp, ep);
16180Sstevel@tonic-gate mdclrerror(ep);
16190Sstevel@tonic-gate continue;
16200Sstevel@tonic-gate }
16210Sstevel@tonic-gate
16220Sstevel@tonic-gate /* reset mirror owners */
16230Sstevel@tonic-gate if (reset_state(RESET_OWNER, sp, MD_MIRROR, ep) == -1) {
16240Sstevel@tonic-gate md_exit(sp, 1);
16250Sstevel@tonic-gate }
16260Sstevel@tonic-gate
162711053SSurya.Prakki@Sun.COM (void) meta_unlock(sp, ep);
16280Sstevel@tonic-gate
16290Sstevel@tonic-gate meta_mc_log(MC_LOG3, gettext("Step3 - rpc.mdcommd "
16300Sstevel@tonic-gate "re-initialised and mirror owners reset for "
16310Sstevel@tonic-gate "set %s: %s"), sp->setname,
16320Sstevel@tonic-gate meta_print_hrtime(gethrtime() - start_time));
16330Sstevel@tonic-gate }
16340Sstevel@tonic-gate
16350Sstevel@tonic-gate meta_mc_log(MC_LOG2, gettext("Step3 completed: %s"),
16360Sstevel@tonic-gate meta_print_hrtime(gethrtime() - start_time));
16370Sstevel@tonic-gate
16380Sstevel@tonic-gate break;
16390Sstevel@tonic-gate
16400Sstevel@tonic-gate case MC_STEP4:
16410Sstevel@tonic-gate /*
16420Sstevel@tonic-gate * Step 4
16430Sstevel@tonic-gate *
16440Sstevel@tonic-gate * For all multinode sets do:
16450Sstevel@tonic-gate * - Resume the rpc.mdcommd messages. Must resume all
16460Sstevel@tonic-gate * sets before issuing I/O to any set since an error
16470Sstevel@tonic-gate * encountered in a commd suspended set could be
16480Sstevel@tonic-gate * blocked waiting for commd in another set to resume.
16490Sstevel@tonic-gate * (This happens since the daemon queues service
16500Sstevel@tonic-gate * all sets). An open of a soft partition causes
16510Sstevel@tonic-gate * a read of the watermarks during the open.
16520Sstevel@tonic-gate * - If set is non-writable (not an owner or STALE), then
16530Sstevel@tonic-gate * continue to next set.
16540Sstevel@tonic-gate *
16550Sstevel@tonic-gate * For all multinode sets do,
16560Sstevel@tonic-gate * - Reset ABR states for all mirrors, ie clear ABR if not
16570Sstevel@tonic-gate * open on any node.
16580Sstevel@tonic-gate * - Reset ABR states for all soft partitions, ie clear ABR if
16590Sstevel@tonic-gate * not open on any node.
16600Sstevel@tonic-gate * - For all slave nodes that have entered through the start
16610Sstevel@tonic-gate * step, update the ABR state to that of the master and
16620Sstevel@tonic-gate * get the submirror state from the master
16630Sstevel@tonic-gate * - meta_lock set
16640Sstevel@tonic-gate * - Resync all mirrors
16650Sstevel@tonic-gate * - unlock meta_lock for this set.
16660Sstevel@tonic-gate * - Choose a new owner for any orphaned resyncs
16670Sstevel@tonic-gate *
16680Sstevel@tonic-gate * There is one potential issue here. when concurrently
16690Sstevel@tonic-gate * resetting and updating the ABR state. If the master has ABR
16700Sstevel@tonic-gate * set, but should no longer have because the only node that
16710Sstevel@tonic-gate * had the metadevice open and had ABR set has paniced, the
16720Sstevel@tonic-gate * master will send a message to all nodes to clear the ABR
16730Sstevel@tonic-gate * state. Meanwhile any node that has come through the
16740Sstevel@tonic-gate * start step will get tstate from the master and will update
16750Sstevel@tonic-gate * ABR if it was set in tstate. So, we appear to have a problem
16760Sstevel@tonic-gate * if the following sequence occurs:-
16770Sstevel@tonic-gate * - The slave gets tstate with ABR set
16780Sstevel@tonic-gate * - The master sends a message to clear ABR
16790Sstevel@tonic-gate * - The slave updates ABR with the value it got from tstate.
16800Sstevel@tonic-gate * We now have the master with ABR clear and the slave with ABR
16810Sstevel@tonic-gate * set. Fortunately, having set ABR, the slave will close the
16820Sstevel@tonic-gate * metadevice after setting ABR and as there are no nodes with
16830Sstevel@tonic-gate * the device open, the close will send a message to clear ABR
16840Sstevel@tonic-gate * on all nodes. So, the nodes will all have ABR unset.
16850Sstevel@tonic-gate */
16860Sstevel@tonic-gate
16870Sstevel@tonic-gate /* expect the nodelist to follow the step name */
16880Sstevel@tonic-gate if (argc < 1)
16890Sstevel@tonic-gate usage(sp, 1);
16900Sstevel@tonic-gate
16910Sstevel@tonic-gate meta_mc_log(MC_LOG2, gettext("Starting Step4: %s"),
16920Sstevel@tonic-gate meta_print_hrtime(0));
16930Sstevel@tonic-gate
16940Sstevel@tonic-gate /*
16950Sstevel@tonic-gate * Does local set exist? If not, exit with 0
16960Sstevel@tonic-gate * since there's no reason to have this node panic if
16970Sstevel@tonic-gate * the local set cannot be started.
16980Sstevel@tonic-gate */
16990Sstevel@tonic-gate if ((local_sp = load_local_set(ep)) == NULL) {
17000Sstevel@tonic-gate md_exit(local_sp, 0);
17010Sstevel@tonic-gate }
17020Sstevel@tonic-gate
17030Sstevel@tonic-gate /*
17040Sstevel@tonic-gate * walk through all sets on this node which could include:
17050Sstevel@tonic-gate * - MN disksets
17060Sstevel@tonic-gate * - traditional disksets
17070Sstevel@tonic-gate * - non-existent disksets
17080Sstevel@tonic-gate * start mirror resync for all MN sets
17090Sstevel@tonic-gate */
17100Sstevel@tonic-gate if ((max_sets = get_max_sets(ep)) == 0) {
17110Sstevel@tonic-gate mde_perror(ep, "");
17120Sstevel@tonic-gate md_exit(local_sp, 1);
17130Sstevel@tonic-gate }
17140Sstevel@tonic-gate
17150Sstevel@tonic-gate /* Clear set_info structure */
17160Sstevel@tonic-gate for (setno = 1; setno < max_sets; setno++) {
17170Sstevel@tonic-gate set_info[setno] = 0;
17180Sstevel@tonic-gate }
17190Sstevel@tonic-gate
17200Sstevel@tonic-gate /* start walking through all possible disksets */
17210Sstevel@tonic-gate for (setno = 1; setno < max_sets; setno++) {
17220Sstevel@tonic-gate if ((sp = metasetnosetname(setno, ep)) == NULL) {
17230Sstevel@tonic-gate if (mdiserror(ep, MDE_NO_SET)) {
17240Sstevel@tonic-gate /* No set for this setno - continue */
17250Sstevel@tonic-gate mdclrerror(ep);
17260Sstevel@tonic-gate continue;
17270Sstevel@tonic-gate } else {
17280Sstevel@tonic-gate mde_perror(ep, gettext("Unable to "
17290Sstevel@tonic-gate "get set %d information"), setno);
17300Sstevel@tonic-gate md_exit(local_sp, 1);
17310Sstevel@tonic-gate }
17320Sstevel@tonic-gate }
17330Sstevel@tonic-gate
17340Sstevel@tonic-gate if ((sd = metaget_setdesc(sp, ep)) == NULL) {
17350Sstevel@tonic-gate mde_perror(ep, gettext("Unable to get set "
17360Sstevel@tonic-gate "%s desc information"), sp->setname);
17370Sstevel@tonic-gate mdclrerror(ep);
17380Sstevel@tonic-gate continue;
17390Sstevel@tonic-gate }
17400Sstevel@tonic-gate
17410Sstevel@tonic-gate /* only check multi-node disksets */
17420Sstevel@tonic-gate if (!meta_is_mn_set(sp, ep)) {
17430Sstevel@tonic-gate mdclrerror(ep);
17440Sstevel@tonic-gate continue;
17450Sstevel@tonic-gate }
17460Sstevel@tonic-gate
17470Sstevel@tonic-gate set_info[setno] |= SET_INFO_MN;
17480Sstevel@tonic-gate
17490Sstevel@tonic-gate /*
17500Sstevel@tonic-gate * If not an owner (all mddbs failed) or stale
17510Sstevel@tonic-gate * (< 50% mddbs operational), then set is
17520Sstevel@tonic-gate * non-writable so just resume commd and
17530Sstevel@tonic-gate * unblock mddb messages.
17540Sstevel@tonic-gate */
17550Sstevel@tonic-gate mdclrerror(ep);
17560Sstevel@tonic-gate if (s_ownset(sp->setno, ep) != MD_SETOWNER_YES) {
17570Sstevel@tonic-gate set_info[setno] |= SET_INFO_NO_WR;
17580Sstevel@tonic-gate }
17590Sstevel@tonic-gate if (!mdisok(ep)) {
17600Sstevel@tonic-gate mde_perror(ep, gettext("Could "
17610Sstevel@tonic-gate "not get set %s ownership"),
17620Sstevel@tonic-gate sp->setname);
17630Sstevel@tonic-gate md_exit(local_sp, 1);
17640Sstevel@tonic-gate }
17650Sstevel@tonic-gate /* Set is owned - is it stale? */
17660Sstevel@tonic-gate if (!set_info[setno] & SET_INFO_NO_WR) {
17670Sstevel@tonic-gate (void) memset(&cfg, 0, sizeof (cfg));
17680Sstevel@tonic-gate cfg.c_id = 0;
17690Sstevel@tonic-gate cfg.c_setno = sp->setno;
17700Sstevel@tonic-gate if (metaioctl(MD_DB_GETDEV, &cfg, &cfg.c_mde,
17710Sstevel@tonic-gate NULL) != 0) {
177211053SSurya.Prakki@Sun.COM (void) mdstealerror(ep, &cfg.c_mde);
17730Sstevel@tonic-gate mde_perror(ep, gettext("Could "
17740Sstevel@tonic-gate "not get set %s information"),
17750Sstevel@tonic-gate sp->setname);
17760Sstevel@tonic-gate md_exit(local_sp, 1);
17770Sstevel@tonic-gate }
17780Sstevel@tonic-gate if (cfg.c_flags & MDDB_C_STALE) {
17790Sstevel@tonic-gate set_info[setno] |= SET_INFO_NO_WR;
17800Sstevel@tonic-gate }
17810Sstevel@tonic-gate }
17820Sstevel@tonic-gate
17830Sstevel@tonic-gate /* resume rpc.mdcommd */
17843073Sjkennedy if (mdmn_resume(setno, MD_COMM_ALL_CLASSES, 0,
17853073Sjkennedy commd_timeout)) {
17860Sstevel@tonic-gate md_eprintf(gettext("Unable to resume "
17870Sstevel@tonic-gate "rpc.mdcommd for set %s\n"), sp->setname);
17880Sstevel@tonic-gate md_exit(local_sp, 1);
17890Sstevel@tonic-gate }
17900Sstevel@tonic-gate
17910Sstevel@tonic-gate /* Unblock mddb parse messages */
17920Sstevel@tonic-gate if (s_ownset(sp->setno, ep) == MD_SETOWNER_YES) {
17930Sstevel@tonic-gate (void) memset(&mbp, 0, sizeof (mbp));
17940Sstevel@tonic-gate mbp.c_setno = setno;
17950Sstevel@tonic-gate mbp.c_blk_flags = MDDB_UNBLOCK_PARSE;
17960Sstevel@tonic-gate if (metaioctl(MD_MN_MDDB_BLOCK, &mbp,
17970Sstevel@tonic-gate &mbp.c_mde, NULL)) {
179811053SSurya.Prakki@Sun.COM (void) mdstealerror(ep, &mbp.c_mde);
17990Sstevel@tonic-gate mde_perror(ep, gettext("Could not "
18000Sstevel@tonic-gate "unblock set %s"), sp->setname);
18010Sstevel@tonic-gate md_exit(local_sp, 1);
18020Sstevel@tonic-gate }
18030Sstevel@tonic-gate }
18040Sstevel@tonic-gate meta_mc_log(MC_LOG3, gettext("Step4 - rpc.mdcommd "
18050Sstevel@tonic-gate "resumed and messages unblocked for set %s: %s"),
18060Sstevel@tonic-gate sp->setname,
18070Sstevel@tonic-gate meta_print_hrtime(gethrtime() - start_time));
18080Sstevel@tonic-gate }
18090Sstevel@tonic-gate
18100Sstevel@tonic-gate for (setno = 1; setno < max_sets; setno++) {
18110Sstevel@tonic-gate int start_step;
18120Sstevel@tonic-gate
18130Sstevel@tonic-gate /* Skip traditional disksets. */
18140Sstevel@tonic-gate if ((set_info[setno] & SET_INFO_MN) == 0)
18150Sstevel@tonic-gate continue;
18160Sstevel@tonic-gate
18170Sstevel@tonic-gate /*
18180Sstevel@tonic-gate * If already determined that this set is
18190Sstevel@tonic-gate * a non-writable set, then just continue
18200Sstevel@tonic-gate * to next set since there's nothing else
18210Sstevel@tonic-gate * to do for a non-writable set.
18220Sstevel@tonic-gate */
18230Sstevel@tonic-gate if (set_info[setno] & SET_INFO_NO_WR)
18240Sstevel@tonic-gate continue;
18250Sstevel@tonic-gate
18260Sstevel@tonic-gate if ((sp = metasetnosetname(setno, ep)) == NULL) {
18270Sstevel@tonic-gate if (mdiserror(ep, MDE_NO_SET)) {
18280Sstevel@tonic-gate /* No set for this setno - continue */
18290Sstevel@tonic-gate mdclrerror(ep);
18300Sstevel@tonic-gate continue;
18310Sstevel@tonic-gate } else {
18320Sstevel@tonic-gate mde_perror(ep, gettext("Unable to "
18330Sstevel@tonic-gate "get set %d information"), setno);
18340Sstevel@tonic-gate md_exit(local_sp, 1);
18350Sstevel@tonic-gate }
18360Sstevel@tonic-gate }
18370Sstevel@tonic-gate
18380Sstevel@tonic-gate if ((sd = metaget_setdesc(sp, ep)) == NULL) {
18390Sstevel@tonic-gate mde_perror(ep, gettext("Unable to get set "
18400Sstevel@tonic-gate "%s desc information"), sp->setname);
18410Sstevel@tonic-gate mdclrerror(ep);
18420Sstevel@tonic-gate continue;
18430Sstevel@tonic-gate }
18440Sstevel@tonic-gate
18450Sstevel@tonic-gate /* See if this node came through the start step */
18460Sstevel@tonic-gate (void) memset(&sf, 0, sizeof (sf));
18470Sstevel@tonic-gate sf.sf_setno = sp->setno;
18480Sstevel@tonic-gate sf.sf_flags = MDDB_NM_GET;
18490Sstevel@tonic-gate /* Use magic to help protect ioctl against attack. */
18500Sstevel@tonic-gate sf.sf_magic = MDDB_SETFLAGS_MAGIC;
18517210Srayh if (metaioctl(MD_MN_GET_SETFLAGS, &sf,
18520Sstevel@tonic-gate &sf.sf_mde, NULL)) {
185311053SSurya.Prakki@Sun.COM (void) mdstealerror(ep, &sf.sf_mde);
18540Sstevel@tonic-gate mde_perror(ep, gettext("Could not get "
18550Sstevel@tonic-gate "start_step flag for set %s"), sp->setname);
18560Sstevel@tonic-gate md_exit(local_sp, 1);
18570Sstevel@tonic-gate }
18580Sstevel@tonic-gate start_step =
18590Sstevel@tonic-gate (sf.sf_setflags & MD_SET_MN_START_RC)? 1: 0;
18600Sstevel@tonic-gate
18610Sstevel@tonic-gate /*
18620Sstevel@tonic-gate * We can now reset the start_step flag for the set
18630Sstevel@tonic-gate * if it was already set.
18640Sstevel@tonic-gate */
18650Sstevel@tonic-gate if (start_step) {
18660Sstevel@tonic-gate (void) memset(&sf, 0, sizeof (sf));
18670Sstevel@tonic-gate sf.sf_setno = sp->setno;
18680Sstevel@tonic-gate sf.sf_setflags = MD_SET_MN_START_RC;
18690Sstevel@tonic-gate sf.sf_flags = MDDB_NM_RESET;
18700Sstevel@tonic-gate /*
18710Sstevel@tonic-gate * Use magic to help protect ioctl
18720Sstevel@tonic-gate * against attack.
18730Sstevel@tonic-gate */
18740Sstevel@tonic-gate sf.sf_magic = MDDB_SETFLAGS_MAGIC;
18750Sstevel@tonic-gate if (metaioctl(MD_MN_SET_SETFLAGS, &sf,
18760Sstevel@tonic-gate &sf.sf_mde, NULL)) {
187711053SSurya.Prakki@Sun.COM (void) mdstealerror(ep, &sf.sf_mde);
18780Sstevel@tonic-gate mde_perror(ep,
18790Sstevel@tonic-gate gettext("Could not reset "
18800Sstevel@tonic-gate "start_step flag for set %s"),
18810Sstevel@tonic-gate sp->setname);
18820Sstevel@tonic-gate }
18830Sstevel@tonic-gate }
18840Sstevel@tonic-gate
18850Sstevel@tonic-gate meta_mc_log(MC_LOG3, gettext("Step4 - begin setting "
18860Sstevel@tonic-gate "ABR state and restarting io's for "
18870Sstevel@tonic-gate "set %s: %s"), sp->setname,
18880Sstevel@tonic-gate meta_print_hrtime(gethrtime() - start_time));
18890Sstevel@tonic-gate
18900Sstevel@tonic-gate
18910Sstevel@tonic-gate /*
18920Sstevel@tonic-gate * If we are not the master and we have come through
18930Sstevel@tonic-gate * the start step, we must update the ABR states
18940Sstevel@tonic-gate * for mirrors and soft partitions. Also the submirror
18950Sstevel@tonic-gate * states need to be synchronised so that we see the
18960Sstevel@tonic-gate * same status as other previously joined members.
18970Sstevel@tonic-gate * This _must_ be done before starting the resync.
18980Sstevel@tonic-gate */
18990Sstevel@tonic-gate if (!(sd->sd_mn_am_i_master) && start_step) {
19000Sstevel@tonic-gate if (reset_state(GET_MIRROR_STATE, sp, MD_MIRROR,
19010Sstevel@tonic-gate ep) == -1) {
19020Sstevel@tonic-gate md_exit(local_sp, 1);
19030Sstevel@tonic-gate }
19040Sstevel@tonic-gate if (reset_state(UPDATE_ABR, sp, MD_SP,
19050Sstevel@tonic-gate ep) == -1) {
19060Sstevel@tonic-gate md_exit(local_sp, 1);
19070Sstevel@tonic-gate }
19080Sstevel@tonic-gate /*
19090Sstevel@tonic-gate * Mark the fact that we've got the mirror
19100Sstevel@tonic-gate * state. This allows the resync thread to
19110Sstevel@tonic-gate * determine if _it_ needs to issue this. This
19120Sstevel@tonic-gate * can happen if a node is added to a set after
19130Sstevel@tonic-gate * a reconfig cycle has completed.
19140Sstevel@tonic-gate */
19150Sstevel@tonic-gate (void) memset(&sf, 0, sizeof (sf));
19160Sstevel@tonic-gate sf.sf_setno = sp->setno;
19170Sstevel@tonic-gate sf.sf_setflags = MD_SET_MN_MIR_STATE_RC;
19180Sstevel@tonic-gate sf.sf_flags = MDDB_NM_SET;
19190Sstevel@tonic-gate /*
19200Sstevel@tonic-gate * Use magic to help protect ioctl
19210Sstevel@tonic-gate * against attack.
19220Sstevel@tonic-gate */
19230Sstevel@tonic-gate sf.sf_magic = MDDB_SETFLAGS_MAGIC;
19240Sstevel@tonic-gate if (metaioctl(MD_MN_SET_SETFLAGS, &sf,
19250Sstevel@tonic-gate &sf.sf_mde, NULL)) {
192611053SSurya.Prakki@Sun.COM (void) mdstealerror(ep, &sf.sf_mde);
19270Sstevel@tonic-gate mde_perror(ep,
19280Sstevel@tonic-gate gettext("Could not set "
19290Sstevel@tonic-gate "submirror state flag for set %s"),
19300Sstevel@tonic-gate sp->setname);
19310Sstevel@tonic-gate }
19320Sstevel@tonic-gate }
19330Sstevel@tonic-gate
19340Sstevel@tonic-gate /*
19350Sstevel@tonic-gate * All remaining actions are only performed by the
19360Sstevel@tonic-gate * master
19370Sstevel@tonic-gate */
19380Sstevel@tonic-gate if (!(sd->sd_mn_am_i_master)) {
19390Sstevel@tonic-gate if (meta_lock(sp, TRUE, ep) != 0) {
19400Sstevel@tonic-gate mde_perror(ep, "");
19410Sstevel@tonic-gate md_exit(local_sp, 1);
19420Sstevel@tonic-gate }
19430Sstevel@tonic-gate meta_mirror_resync_unblock(sp);
194411053SSurya.Prakki@Sun.COM (void) meta_unlock(sp, ep);
19450Sstevel@tonic-gate continue;
19460Sstevel@tonic-gate }
19470Sstevel@tonic-gate
19480Sstevel@tonic-gate /*
19490Sstevel@tonic-gate * If the master came through the start step, this
19500Sstevel@tonic-gate * implies that all of the nodes must have done the
19510Sstevel@tonic-gate * same and hence there can be no applications
19520Sstevel@tonic-gate * running. Hence no need to reset ABR
19530Sstevel@tonic-gate */
19540Sstevel@tonic-gate if (!start_step) {
19550Sstevel@tonic-gate /* Reset ABR state for mirrors */
19560Sstevel@tonic-gate if (reset_state(RESET_ABR, sp, MD_MIRROR,
19570Sstevel@tonic-gate ep) == -1) {
19580Sstevel@tonic-gate md_exit(local_sp, 1);
19590Sstevel@tonic-gate }
19600Sstevel@tonic-gate /* ...and now the same for soft partitions */
19610Sstevel@tonic-gate if (reset_state(RESET_ABR, sp, MD_SP,
19620Sstevel@tonic-gate ep) == -1) {
19630Sstevel@tonic-gate md_exit(local_sp, 1);
19640Sstevel@tonic-gate }
19650Sstevel@tonic-gate }
19660Sstevel@tonic-gate
19670Sstevel@tonic-gate /*
19680Sstevel@tonic-gate * choose owners for orphaned resyncs and reset
19690Sstevel@tonic-gate * non-orphaned resyncs so that an owner node that
19700Sstevel@tonic-gate * reboots will restart the resync if needed.
19710Sstevel@tonic-gate */
19720Sstevel@tonic-gate if (reset_state(CHOOSE_OWNER, sp, MD_MIRROR, ep) == -1)
19730Sstevel@tonic-gate md_exit(local_sp, 1);
19740Sstevel@tonic-gate
19750Sstevel@tonic-gate /*
19760Sstevel@tonic-gate * Must unlock set lock before meta_mirror_resync_all
19770Sstevel@tonic-gate * sends a message to run the metasync command
19780Sstevel@tonic-gate * which also grabs the meta_lock.
19790Sstevel@tonic-gate */
19800Sstevel@tonic-gate if (meta_lock(sp, TRUE, ep) != 0) {
19810Sstevel@tonic-gate mde_perror(ep, "");
19820Sstevel@tonic-gate md_exit(local_sp, 1);
19830Sstevel@tonic-gate }
19840Sstevel@tonic-gate meta_mirror_resync_unblock(sp);
198511053SSurya.Prakki@Sun.COM (void) meta_unlock(sp, ep);
19860Sstevel@tonic-gate
19870Sstevel@tonic-gate /* resync all mirrors in set */
19880Sstevel@tonic-gate if (meta_mirror_resync_all(sp, 0, ep) != 0) {
19890Sstevel@tonic-gate mde_perror(ep, gettext("Mirror resyncs "
19900Sstevel@tonic-gate "failed for set %s"), sp->setname);
19910Sstevel@tonic-gate md_exit(local_sp, 1);
19920Sstevel@tonic-gate }
19930Sstevel@tonic-gate
19940Sstevel@tonic-gate meta_mc_log(MC_LOG3, gettext("Step4 - io's restarted "
19950Sstevel@tonic-gate "for set %s: %s"), sp->setname,
19960Sstevel@tonic-gate meta_print_hrtime(gethrtime() - start_time));
19970Sstevel@tonic-gate }
19980Sstevel@tonic-gate
19990Sstevel@tonic-gate meta_mc_log(MC_LOG2, gettext("Step4 completed: %s"),
20000Sstevel@tonic-gate meta_print_hrtime(gethrtime() - start_time));
20010Sstevel@tonic-gate
20020Sstevel@tonic-gate break;
20030Sstevel@tonic-gate
20040Sstevel@tonic-gate default:
20050Sstevel@tonic-gate usage(sp, 1);
20060Sstevel@tonic-gate break;
20070Sstevel@tonic-gate }
20080Sstevel@tonic-gate
20090Sstevel@tonic-gate md_exit(sp, 0);
20100Sstevel@tonic-gate /* NOTREACHED */
20110Sstevel@tonic-gate return (0);
20120Sstevel@tonic-gate }
2013