10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
51623Stw21770 * Common Development and Distribution License (the "License").
61623Stw21770 * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
210Sstevel@tonic-gate /*
22*13018SPeter.Dennis@Oracle.COM * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
230Sstevel@tonic-gate */
240Sstevel@tonic-gate
259818SAndrew.Balfour@Sun.COM
260Sstevel@tonic-gate /*
270Sstevel@tonic-gate * Just in case we're not in a build environment, make sure that
280Sstevel@tonic-gate * TEXT_DOMAIN gets set to something.
290Sstevel@tonic-gate */
300Sstevel@tonic-gate #if !defined(TEXT_DOMAIN)
310Sstevel@tonic-gate #define TEXT_DOMAIN "SYS_TEST"
320Sstevel@tonic-gate #endif
330Sstevel@tonic-gate
340Sstevel@tonic-gate #include <meta.h>
350Sstevel@tonic-gate #include <sdssc.h>
360Sstevel@tonic-gate #include <arpa/inet.h>
370Sstevel@tonic-gate #include <sys/lvm/md_mddb.h>
380Sstevel@tonic-gate
390Sstevel@tonic-gate #define MAX_LINE_SIZE 1024
400Sstevel@tonic-gate
410Sstevel@tonic-gate /*
420Sstevel@tonic-gate * Maximum amount of time to spend waiting for an ownership change to complete.
430Sstevel@tonic-gate */
440Sstevel@tonic-gate static const int OWNER_TIMEOUT = 3;
450Sstevel@tonic-gate
460Sstevel@tonic-gate /*
470Sstevel@tonic-gate * FUNCTION: meta_is_mn_set()
480Sstevel@tonic-gate * INPUT: sp - the set name
490Sstevel@tonic-gate * OUTPUT: ep - return error pointer
500Sstevel@tonic-gate * RETURNS: int - 1 if MultiNode set else 0
510Sstevel@tonic-gate * PURPOSE: checks if the set is a MultiNode set
520Sstevel@tonic-gate */
530Sstevel@tonic-gate int
meta_is_mn_set(mdsetname_t * sp,md_error_t * ep)540Sstevel@tonic-gate meta_is_mn_set(
550Sstevel@tonic-gate mdsetname_t *sp,
560Sstevel@tonic-gate md_error_t *ep
570Sstevel@tonic-gate )
580Sstevel@tonic-gate {
590Sstevel@tonic-gate md_set_desc *sd;
600Sstevel@tonic-gate
610Sstevel@tonic-gate /* Local set cannot be MultiNode */
620Sstevel@tonic-gate if ((sp == NULL) || (sp->setname == NULL) ||
638452SJohn.Wren.Kennedy@Sun.COM (strcmp(sp->setname, MD_LOCAL_NAME) == 0))
640Sstevel@tonic-gate return (0);
650Sstevel@tonic-gate sd = metaget_setdesc(sp, ep);
66*13018SPeter.Dennis@Oracle.COM
67*13018SPeter.Dennis@Oracle.COM /*
68*13018SPeter.Dennis@Oracle.COM * sd can be NULL if there is a difference between
69*13018SPeter.Dennis@Oracle.COM * the setrecords and the setlistp caches. This can happen
70*13018SPeter.Dennis@Oracle.COM * if this function is called while a set is being
71*13018SPeter.Dennis@Oracle.COM * removed during a cluster reconfiguration.
72*13018SPeter.Dennis@Oracle.COM */
73*13018SPeter.Dennis@Oracle.COM if (sd == NULL)
74*13018SPeter.Dennis@Oracle.COM return (0);
750Sstevel@tonic-gate if (sd->sd_flags & MD_SR_MN)
760Sstevel@tonic-gate return (1);
770Sstevel@tonic-gate return (0);
780Sstevel@tonic-gate }
790Sstevel@tonic-gate
800Sstevel@tonic-gate /*
810Sstevel@tonic-gate * FUNCTION: meta_is_mn_name()
820Sstevel@tonic-gate * INPUT: spp - ptr to the set name, if NULL the setname is derived
830Sstevel@tonic-gate * from the metadevice name (eg set/d10 )
841623Stw21770 * name - the metadevice/hsp name
850Sstevel@tonic-gate * OUTPUT: ep - return error pointer
860Sstevel@tonic-gate * RETURNS: int - 1 if MultiNode set else 0
870Sstevel@tonic-gate * PURPOSE: checks if the metadevice is in a MultiNode set
880Sstevel@tonic-gate */
890Sstevel@tonic-gate int
meta_is_mn_name(mdsetname_t ** spp,char * name,md_error_t * ep)900Sstevel@tonic-gate meta_is_mn_name(
910Sstevel@tonic-gate mdsetname_t **spp,
920Sstevel@tonic-gate char *name,
930Sstevel@tonic-gate md_error_t *ep
940Sstevel@tonic-gate )
950Sstevel@tonic-gate {
961623Stw21770 if (*spp == NULL) {
971623Stw21770 char *cname;
980Sstevel@tonic-gate
991623Stw21770 /*
1001623Stw21770 * if the setname is specified in uname and *spp is
1011623Stw21770 * not set, then it is setup using that set name value.
1021623Stw21770 * If *spp is set and a setname specified in uname and
1031623Stw21770 * the set names don't agree then cname will be
1041623Stw21770 * returned as NULL
1051623Stw21770 */
1061623Stw21770 cname = meta_canonicalize_check_set(spp, name, ep);
1071623Stw21770 if (cname == NULL) {
1081623Stw21770 mdclrerror(ep);
1091623Stw21770 return (0);
1101623Stw21770 }
1111623Stw21770
1121623Stw21770 Free(cname);
1130Sstevel@tonic-gate }
1140Sstevel@tonic-gate
1150Sstevel@tonic-gate if ((strcmp((*spp)->setname, MD_LOCAL_NAME) != 0) &&
1160Sstevel@tonic-gate (metaget_setdesc(*spp, ep) != NULL) &&
1170Sstevel@tonic-gate ((*spp)->setdesc->sd_flags & MD_SR_MN)) {
1180Sstevel@tonic-gate return (1);
1190Sstevel@tonic-gate }
1200Sstevel@tonic-gate return (0);
1210Sstevel@tonic-gate }
1220Sstevel@tonic-gate
1230Sstevel@tonic-gate /*
1240Sstevel@tonic-gate * meta_ping_mnset(set_t setno)
1250Sstevel@tonic-gate * Send a test message for this set in order to make commd do some init stuff
1260Sstevel@tonic-gate * Don't bother changelog.
1270Sstevel@tonic-gate * If set is suspended, fail immediately.
1280Sstevel@tonic-gate */
1290Sstevel@tonic-gate void
meta_ping_mnset(set_t setno)1300Sstevel@tonic-gate meta_ping_mnset(set_t setno)
1310Sstevel@tonic-gate {
1320Sstevel@tonic-gate char *data = "test";
1330Sstevel@tonic-gate md_error_t mde = mdnullerror;
1340Sstevel@tonic-gate md_mn_result_t *resp = NULL;
1350Sstevel@tonic-gate
1360Sstevel@tonic-gate (void) mdmn_send_message(setno, MD_MN_MSG_TEST2,
1378452SJohn.Wren.Kennedy@Sun.COM MD_MSGF_NO_LOG | MD_MSGF_FAIL_ON_SUSPEND, 0, data,
1380Sstevel@tonic-gate sizeof (data), &resp, &mde);
1390Sstevel@tonic-gate
1400Sstevel@tonic-gate if (resp != (md_mn_result_t *)NULL) {
1410Sstevel@tonic-gate free_result(resp);
1420Sstevel@tonic-gate }
1430Sstevel@tonic-gate }
1440Sstevel@tonic-gate
1450Sstevel@tonic-gate /*
1460Sstevel@tonic-gate *
1470Sstevel@tonic-gate * FUNCTION: print_stderr
1480Sstevel@tonic-gate * INPUT: errstr - the error message returned by the command
1490Sstevel@tonic-gate * context - the context string from metainit -a
1500Sstevel@tonic-gate * PURPOSE: called from meta_mn_send_command to print the error message
1510Sstevel@tonic-gate * to stderr. When context is NO_CONTEXT_STRING, the errstr string
1520Sstevel@tonic-gate * is output unchanged. When context is a string, it is the context
1530Sstevel@tonic-gate * string for the metainit -a command and in this case the errstr
1540Sstevel@tonic-gate * string has to be parsed to extract the command and node name
1550Sstevel@tonic-gate * and to send a message to stderr in the format
1560Sstevel@tonic-gate * command: node: context: error message
1570Sstevel@tonic-gate */
1580Sstevel@tonic-gate static void
print_stderr(char * errstr,char * context)1590Sstevel@tonic-gate print_stderr(
1600Sstevel@tonic-gate char *errstr,
1610Sstevel@tonic-gate char *context
1620Sstevel@tonic-gate )
1630Sstevel@tonic-gate {
1640Sstevel@tonic-gate char *command;
1650Sstevel@tonic-gate char *node;
1660Sstevel@tonic-gate char *message;
1670Sstevel@tonic-gate int length = strlen(errstr + 1);
1680Sstevel@tonic-gate
1690Sstevel@tonic-gate if (context == NO_CONTEXT_STRING) {
1700Sstevel@tonic-gate (void) fprintf(stderr, "%s", errstr);
1710Sstevel@tonic-gate } else {
1720Sstevel@tonic-gate command = Malloc(length);
1730Sstevel@tonic-gate node = Malloc(length);
1740Sstevel@tonic-gate message = Malloc(length);
1750Sstevel@tonic-gate if (sscanf(errstr, "%[^:]: %[^:]: %[^\n]", command, node,
1760Sstevel@tonic-gate message) == 3) {
1770Sstevel@tonic-gate (void) fprintf(stderr, "%s: %s: %s: %s\n", command,
1780Sstevel@tonic-gate node, context, message);
1790Sstevel@tonic-gate } else {
1800Sstevel@tonic-gate (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1810Sstevel@tonic-gate "%s: Invalid format error message"), errstr);
1820Sstevel@tonic-gate }
1830Sstevel@tonic-gate Free(command);
1840Sstevel@tonic-gate Free(node);
1850Sstevel@tonic-gate Free(message);
1860Sstevel@tonic-gate }
1870Sstevel@tonic-gate }
1880Sstevel@tonic-gate
1890Sstevel@tonic-gate /*
1900Sstevel@tonic-gate * FUNCTION: meta_mn_send_command()
1910Sstevel@tonic-gate * INPUT: sp - the set name
1920Sstevel@tonic-gate * argc - number of arguments
1930Sstevel@tonic-gate * argv - arg list
1940Sstevel@tonic-gate * flags - some controlling flags
1950Sstevel@tonic-gate * initall_context - context string for metainit -a
1960Sstevel@tonic-gate * OUTPUT: ep - return error pointer
1970Sstevel@tonic-gate * RETURNS: return exitval from mdmn_send_message
1980Sstevel@tonic-gate * PURPOSE: sends the command to the master node for execution
1990Sstevel@tonic-gate */
2000Sstevel@tonic-gate int
meta_mn_send_command(mdsetname_t * sp,int argc,char * argv[],int flags,char * initall_context,md_error_t * ep)2010Sstevel@tonic-gate meta_mn_send_command(
2020Sstevel@tonic-gate mdsetname_t *sp,
2030Sstevel@tonic-gate int argc,
2040Sstevel@tonic-gate char *argv[],
2050Sstevel@tonic-gate int flags,
2060Sstevel@tonic-gate char *initall_context,
2070Sstevel@tonic-gate md_error_t *ep
2080Sstevel@tonic-gate )
2090Sstevel@tonic-gate {
2100Sstevel@tonic-gate int a;
2110Sstevel@tonic-gate int err;
2120Sstevel@tonic-gate int retval;
2130Sstevel@tonic-gate int send_message_flags = MD_MSGF_DEFAULT_FLAGS;
2140Sstevel@tonic-gate int send_message_type;
2150Sstevel@tonic-gate char *cmd;
2160Sstevel@tonic-gate md_mn_result_t *resp = NULL;
2170Sstevel@tonic-gate
2180Sstevel@tonic-gate cmd = Malloc(1024);
2190Sstevel@tonic-gate (void) strlcpy(cmd, argv[0], 1024);
2200Sstevel@tonic-gate for (a = 1; a < argc; a++) {
2210Sstevel@tonic-gate /* don't copy empty arguments */
2220Sstevel@tonic-gate if (*argv[a] == '\0') {
2230Sstevel@tonic-gate continue;
2240Sstevel@tonic-gate }
2250Sstevel@tonic-gate (void) strcat(cmd, " ");
2260Sstevel@tonic-gate (void) strcat(cmd, argv[a]);
2270Sstevel@tonic-gate }
2280Sstevel@tonic-gate /*
2290Sstevel@tonic-gate * in dryrun mode stop on the first error
2300Sstevel@tonic-gate * use the CMD_RETRY message type if RETRY_BUSY flag set
2310Sstevel@tonic-gate */
2320Sstevel@tonic-gate if (flags & MD_DRYRUN)
2330Sstevel@tonic-gate send_message_flags |= MD_MSGF_STOP_ON_ERROR;
2340Sstevel@tonic-gate if (flags & MD_NOLOG)
2350Sstevel@tonic-gate send_message_flags |= MD_MSGF_NO_LOG;
2360Sstevel@tonic-gate if (flags & MD_PANIC_WHEN_INCONSISTENT)
2370Sstevel@tonic-gate send_message_flags |= MD_MSGF_PANIC_WHEN_INCONSISTENT;
2380Sstevel@tonic-gate if (flags & MD_RETRY_BUSY) {
2390Sstevel@tonic-gate send_message_type = MD_MN_MSG_BC_CMD_RETRY;
2400Sstevel@tonic-gate } else {
2410Sstevel@tonic-gate send_message_type = MD_MN_MSG_BC_CMD;
2420Sstevel@tonic-gate }
2439818SAndrew.Balfour@Sun.COM err = mdmn_send_message(
2449818SAndrew.Balfour@Sun.COM sp->setno, send_message_type, send_message_flags, 0,
2459818SAndrew.Balfour@Sun.COM cmd, 1024, &resp, ep);
2460Sstevel@tonic-gate
2470Sstevel@tonic-gate free(cmd);
2480Sstevel@tonic-gate
2490Sstevel@tonic-gate if (err == 0) {
2500Sstevel@tonic-gate /*
2510Sstevel@tonic-gate * stderr may be turned off by IGNORE_STDERR
2520Sstevel@tonic-gate * In dryrun we only print stderr if the exit_val is non-zero
2530Sstevel@tonic-gate */
2540Sstevel@tonic-gate if ((resp->mmr_err_size != 0) &&
2550Sstevel@tonic-gate ((flags & MD_IGNORE_STDERR) == 0)) {
2560Sstevel@tonic-gate if (((flags & MD_DRYRUN) == 0) ||
2570Sstevel@tonic-gate (resp->mmr_exitval != 0)) {
2580Sstevel@tonic-gate print_stderr(resp->mmr_err, initall_context);
2590Sstevel@tonic-gate }
2600Sstevel@tonic-gate }
2610Sstevel@tonic-gate
2620Sstevel@tonic-gate /*
2630Sstevel@tonic-gate * If dryrun is set, we don't display stdout,
2640Sstevel@tonic-gate * because the real run has yet to follow.
2650Sstevel@tonic-gate */
2660Sstevel@tonic-gate if (((flags & MD_DRYRUN) == 0) && (resp->mmr_out_size != 0)) {
2670Sstevel@tonic-gate (void) printf("%s", resp->mmr_out);
2680Sstevel@tonic-gate }
2690Sstevel@tonic-gate retval = resp->mmr_exitval;
2700Sstevel@tonic-gate free_result(resp);
2710Sstevel@tonic-gate return (retval);
2720Sstevel@tonic-gate }
2730Sstevel@tonic-gate if (resp != NULL) {
2740Sstevel@tonic-gate if (resp->mmr_comm_state == MDMNE_CLASS_BUSY) {
2750Sstevel@tonic-gate (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
2760Sstevel@tonic-gate "rpc.mdcommd currently busy. "
2770Sstevel@tonic-gate "Retry operation later.\n"));
2780Sstevel@tonic-gate } else if (resp->mmr_comm_state == MDMNE_NOT_JOINED) {
2790Sstevel@tonic-gate (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
2800Sstevel@tonic-gate "Node %s must join the %s multi-owner diskset to "
2810Sstevel@tonic-gate "issue commands.\n"
2820Sstevel@tonic-gate "To join, use: metaset -s %s -j\n"),
2830Sstevel@tonic-gate mynode(), sp->setname, sp->setname);
2840Sstevel@tonic-gate } else if (resp->mmr_comm_state == MDMNE_LOG_FAIL) {
2850Sstevel@tonic-gate mddb_config_t c;
2860Sstevel@tonic-gate
2870Sstevel@tonic-gate (void) memset(&c, 0, sizeof (c));
2880Sstevel@tonic-gate c.c_setno = sp->setno;
2890Sstevel@tonic-gate (void) metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL);
2900Sstevel@tonic-gate (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
2910Sstevel@tonic-gate "Command not attempted: Unable to log message "
2920Sstevel@tonic-gate "in set %s\n"), sp->setname);
2930Sstevel@tonic-gate if (c.c_flags & MDDB_C_STALE) {
2948452SJohn.Wren.Kennedy@Sun.COM (void) mdmddberror(ep, MDE_DB_STALE,
2958452SJohn.Wren.Kennedy@Sun.COM (minor_t)NODEV64, sp->setno, 0, NULL);
2968452SJohn.Wren.Kennedy@Sun.COM mde_perror(ep, "");
2970Sstevel@tonic-gate }
2980Sstevel@tonic-gate } else {
2990Sstevel@tonic-gate (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
3000Sstevel@tonic-gate "Command failed: Commd State %d "
3010Sstevel@tonic-gate "encountered.\n"), resp->mmr_comm_state);
3020Sstevel@tonic-gate }
3030Sstevel@tonic-gate free_result(resp);
3040Sstevel@tonic-gate } else {
3050Sstevel@tonic-gate (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
3060Sstevel@tonic-gate "Command failed: mdmn_send_message returned %d.\n"),
3070Sstevel@tonic-gate err);
3080Sstevel@tonic-gate }
3090Sstevel@tonic-gate
3100Sstevel@tonic-gate
3110Sstevel@tonic-gate return (1);
3120Sstevel@tonic-gate }
3130Sstevel@tonic-gate
3140Sstevel@tonic-gate /*
3150Sstevel@tonic-gate * FUNCTION: meta_mn_send_suspend_writes()
3160Sstevel@tonic-gate * INPUT: mnum - minor num of mirror
3170Sstevel@tonic-gate * OUTPUT: ep - return error pointer
3180Sstevel@tonic-gate * RETURNS: return value from mdmn_send_message()
3190Sstevel@tonic-gate * PURPOSE: sends message to all nodes to suspend writes to the mirror.
3200Sstevel@tonic-gate */
3210Sstevel@tonic-gate int
meta_mn_send_suspend_writes(minor_t mnum,md_error_t * ep)3220Sstevel@tonic-gate meta_mn_send_suspend_writes(
3230Sstevel@tonic-gate minor_t mnum,
3240Sstevel@tonic-gate md_error_t *ep
3250Sstevel@tonic-gate )
3260Sstevel@tonic-gate {
3270Sstevel@tonic-gate int result;
3280Sstevel@tonic-gate md_mn_msg_suspwr_t suspwrmsg;
3290Sstevel@tonic-gate md_mn_result_t *resp = NULL;
3300Sstevel@tonic-gate
3310Sstevel@tonic-gate suspwrmsg.msg_suspwr_mnum = mnum;
3320Sstevel@tonic-gate /*
3330Sstevel@tonic-gate * This message is never directly issued.
3340Sstevel@tonic-gate * So we launch it with a suspend override flag.
3350Sstevel@tonic-gate * If the commd is suspended, and this message comes
3360Sstevel@tonic-gate * along it must be sent due to replaying a command or similar.
3370Sstevel@tonic-gate * In that case we don't want this message to be blocked.
3380Sstevel@tonic-gate * If the commd is not suspended, the flag does no harm.
3390Sstevel@tonic-gate */
3400Sstevel@tonic-gate result = mdmn_send_message(MD_MIN2SET(mnum),
3410Sstevel@tonic-gate MD_MN_MSG_SUSPEND_WRITES,
3428452SJohn.Wren.Kennedy@Sun.COM MD_MSGF_NO_LOG | MD_MSGF_OVERRIDE_SUSPEND, 0,
3430Sstevel@tonic-gate (char *)&suspwrmsg, sizeof (suspwrmsg), &resp, ep);
3440Sstevel@tonic-gate if (resp != NULL) {
3450Sstevel@tonic-gate free_result(resp);
3460Sstevel@tonic-gate }
3470Sstevel@tonic-gate return (result);
3480Sstevel@tonic-gate }
3490Sstevel@tonic-gate
3500Sstevel@tonic-gate /*
3510Sstevel@tonic-gate * Parse the multi-node list file
3520Sstevel@tonic-gate *
3530Sstevel@tonic-gate * Return Values: Zero - Success
3540Sstevel@tonic-gate * Non Zero - Failure
3550Sstevel@tonic-gate *
3560Sstevel@tonic-gate * File content: The content of the nodelist file should consist of
3570Sstevel@tonic-gate * triplets of nodeid, nodename and private interconnect
3580Sstevel@tonic-gate * address seperated by one or more white space.
3590Sstevel@tonic-gate * e.g.
3600Sstevel@tonic-gate * 1 node_a 192.168.111.3
3610Sstevel@tonic-gate * 2 node_b 192.168.111.5
3620Sstevel@tonic-gate *
3630Sstevel@tonic-gate * Any missing fields will result in an error.
3640Sstevel@tonic-gate */
3650Sstevel@tonic-gate int
meta_read_nodelist(int * nodecnt,mndiskset_membershiplist_t ** nl,md_error_t * ep)3660Sstevel@tonic-gate meta_read_nodelist(
3670Sstevel@tonic-gate int *nodecnt,
3680Sstevel@tonic-gate mndiskset_membershiplist_t **nl,
3690Sstevel@tonic-gate md_error_t *ep
3700Sstevel@tonic-gate )
3710Sstevel@tonic-gate {
3720Sstevel@tonic-gate FILE *fp = NULL;
3730Sstevel@tonic-gate char line[MAX_LINE_SIZE];
3740Sstevel@tonic-gate char *buf;
3750Sstevel@tonic-gate uint_t i;
3760Sstevel@tonic-gate int sz;
3770Sstevel@tonic-gate mndiskset_membershiplist_t **tailp = nl;
3780Sstevel@tonic-gate
3790Sstevel@tonic-gate /* open file */
3800Sstevel@tonic-gate if ((fp = fopen(META_MNSET_NODELIST, "r")) == NULL) {
3810Sstevel@tonic-gate mndiskset_membershiplist_t *nlp;
3820Sstevel@tonic-gate struct hostent *hp;
3839818SAndrew.Balfour@Sun.COM int err = 0;
3840Sstevel@tonic-gate
3850Sstevel@tonic-gate /* return this node with id of 1 */
3860Sstevel@tonic-gate nlp = *tailp = Zalloc(sizeof (*nlp));
3870Sstevel@tonic-gate tailp = &nlp->next;
3880Sstevel@tonic-gate
3890Sstevel@tonic-gate *nodecnt = 1;
3900Sstevel@tonic-gate nlp->msl_node_id = 1;
3910Sstevel@tonic-gate buf = mynode();
3920Sstevel@tonic-gate sz = min(strlen(buf), sizeof (nlp->msl_node_name) - 1);
3930Sstevel@tonic-gate (void) strncpy(nlp->msl_node_name, buf, sz);
3940Sstevel@tonic-gate nlp->msl_node_name[sz] = '\0';
3950Sstevel@tonic-gate
3960Sstevel@tonic-gate /* retrieve info about our host */
3970Sstevel@tonic-gate if ((hp = gethostbyname(buf)) == NULL) {
3989818SAndrew.Balfour@Sun.COM err = EADDRNOTAVAIL;
39911742SAndrew.Balfour@Sun.COM } else if (hp->h_addrtype != AF_INET) {
4009818SAndrew.Balfour@Sun.COM /* We only do IPv4 addresses, for now */
40111742SAndrew.Balfour@Sun.COM err = EPFNOSUPPORT;
40211742SAndrew.Balfour@Sun.COM } else if (*hp->h_addr_list == NULL) {
40311742SAndrew.Balfour@Sun.COM /* No addresses in the list */
40411742SAndrew.Balfour@Sun.COM err = EADDRNOTAVAIL;
40511742SAndrew.Balfour@Sun.COM } else {
4069818SAndrew.Balfour@Sun.COM /* We take the first address only */
40711742SAndrew.Balfour@Sun.COM struct in_addr in;
4089818SAndrew.Balfour@Sun.COM
40911742SAndrew.Balfour@Sun.COM (void) memcpy(&in.s_addr, *hp->h_addr_list,
41011742SAndrew.Balfour@Sun.COM sizeof (struct in_addr));
41111742SAndrew.Balfour@Sun.COM (void) strncpy(nlp->msl_node_addr,
41211742SAndrew.Balfour@Sun.COM inet_ntoa(in), MD_MAX_NODENAME);
4130Sstevel@tonic-gate }
41411742SAndrew.Balfour@Sun.COM
4159818SAndrew.Balfour@Sun.COM if (err) {
4169818SAndrew.Balfour@Sun.COM meta_free_nodelist(*nl);
4179818SAndrew.Balfour@Sun.COM return (mdsyserror(ep, err, buf));
4180Sstevel@tonic-gate }
4190Sstevel@tonic-gate return (0);
4200Sstevel@tonic-gate }
4210Sstevel@tonic-gate
4220Sstevel@tonic-gate *nl = NULL;
4230Sstevel@tonic-gate *nodecnt = 0;
4240Sstevel@tonic-gate
4250Sstevel@tonic-gate while ((fp != NULL) && ((buf = fgets(line, sizeof (line) - 1, fp)) !=
4260Sstevel@tonic-gate NULL)) {
4270Sstevel@tonic-gate mndiskset_membershiplist_t *nlp;
4280Sstevel@tonic-gate
4290Sstevel@tonic-gate /* skip leading spaces */
4300Sstevel@tonic-gate while ((*buf != '\0') && (i = strcspn(buf, " \t\n")) == 0)
4310Sstevel@tonic-gate buf++;
4320Sstevel@tonic-gate
4330Sstevel@tonic-gate /* skip comments and blank lines */
4340Sstevel@tonic-gate if (*buf == '\0' || *buf == '#')
4350Sstevel@tonic-gate continue;
4360Sstevel@tonic-gate
4370Sstevel@tonic-gate /* allocate memory and set tail pointer */
4380Sstevel@tonic-gate nlp = *tailp = Zalloc(sizeof (*nlp));
4390Sstevel@tonic-gate tailp = &nlp->next;
4400Sstevel@tonic-gate
4410Sstevel@tonic-gate /* parse node id */
4420Sstevel@tonic-gate nlp->msl_node_id = strtoul(buf, NULL, 0);
4430Sstevel@tonic-gate buf += i;
4440Sstevel@tonic-gate
4450Sstevel@tonic-gate /* skip leading spaces */
4460Sstevel@tonic-gate while ((*buf != '\0') && (i = strcspn(buf, " \t\n")) == 0)
4470Sstevel@tonic-gate buf++;
4480Sstevel@tonic-gate
4490Sstevel@tonic-gate /* fields missing, return error */
4500Sstevel@tonic-gate if (*buf == '\0' || *buf == '#') {
4510Sstevel@tonic-gate meta_free_nodelist(*nl);
4520Sstevel@tonic-gate *nl = NULL;
4530Sstevel@tonic-gate *nodecnt = 0;
4540Sstevel@tonic-gate
4550Sstevel@tonic-gate /* close file and return */
4560Sstevel@tonic-gate if ((fp) && (fclose(fp) != 0))
4570Sstevel@tonic-gate return (mdsyserror(ep, errno,
4580Sstevel@tonic-gate META_MNSET_NODELIST));
4590Sstevel@tonic-gate
4600Sstevel@tonic-gate return (mdsyserror(ep, EINVAL, META_MNSET_NODELIST));
4610Sstevel@tonic-gate }
4620Sstevel@tonic-gate
4630Sstevel@tonic-gate /* parse node name */
4640Sstevel@tonic-gate sz = min(i, sizeof (nlp->msl_node_name) - 1);
4650Sstevel@tonic-gate (void) strncpy(nlp->msl_node_name, buf, sz);
4660Sstevel@tonic-gate nlp->msl_node_name[sz] = '\0';
4670Sstevel@tonic-gate buf += i;
4680Sstevel@tonic-gate
4690Sstevel@tonic-gate /* skip leading spaces */
4700Sstevel@tonic-gate while ((*buf != '\0') && (i = strcspn(buf, " \t\n")) == 0)
4710Sstevel@tonic-gate buf++;
4720Sstevel@tonic-gate
4730Sstevel@tonic-gate /* fields missing, return error */
4740Sstevel@tonic-gate if (*buf == '\0' || *buf == '#') {
4750Sstevel@tonic-gate meta_free_nodelist(*nl);
4760Sstevel@tonic-gate *nl = NULL;
4770Sstevel@tonic-gate *nodecnt = 0;
4780Sstevel@tonic-gate
4790Sstevel@tonic-gate /* close file and return */
4800Sstevel@tonic-gate if ((fp) && (fclose(fp) != 0))
4810Sstevel@tonic-gate return (mdsyserror(ep, errno,
4820Sstevel@tonic-gate META_MNSET_NODELIST));
4830Sstevel@tonic-gate
4840Sstevel@tonic-gate return (mdsyserror(ep, EINVAL, META_MNSET_NODELIST));
4850Sstevel@tonic-gate }
4860Sstevel@tonic-gate
4870Sstevel@tonic-gate /* parse node address */
4880Sstevel@tonic-gate sz = min(i, sizeof (nlp->msl_node_addr) - 1);
4890Sstevel@tonic-gate (void) strncpy(nlp->msl_node_addr, buf, sz);
4900Sstevel@tonic-gate nlp->msl_node_addr[sz] = '\0';
4910Sstevel@tonic-gate
4920Sstevel@tonic-gate ++*nodecnt;
4930Sstevel@tonic-gate }
4940Sstevel@tonic-gate
4950Sstevel@tonic-gate /* close file */
4969818SAndrew.Balfour@Sun.COM if ((fp) && (fclose(fp) != 0)) {
4979818SAndrew.Balfour@Sun.COM meta_free_nodelist(*nl);
4980Sstevel@tonic-gate return (mdsyserror(ep, errno, META_MNSET_NODELIST));
4999818SAndrew.Balfour@Sun.COM }
5000Sstevel@tonic-gate return (0);
5010Sstevel@tonic-gate }
5020Sstevel@tonic-gate
5030Sstevel@tonic-gate /*
5040Sstevel@tonic-gate * Populate the multi-node list file from a given list of node id's
5050Sstevel@tonic-gate * The nids must have only one node id in each cell. Range of node
5060Sstevel@tonic-gate * id's in the form 1-n are not allowed.
5070Sstevel@tonic-gate *
5080Sstevel@tonic-gate * Return Values: Zero - Success
5090Sstevel@tonic-gate * Non Zero - Failure
5100Sstevel@tonic-gate */
5110Sstevel@tonic-gate int
meta_write_nodelist(int nodecnt,char ** nids,md_error_t * ep)5120Sstevel@tonic-gate meta_write_nodelist(
5130Sstevel@tonic-gate int nodecnt,
5140Sstevel@tonic-gate char **nids,
5150Sstevel@tonic-gate md_error_t *ep
5160Sstevel@tonic-gate )
5170Sstevel@tonic-gate {
5180Sstevel@tonic-gate FILE *fp = NULL;
5190Sstevel@tonic-gate char name[MAX_LINE_SIZE], addr[MAX_LINE_SIZE];
5200Sstevel@tonic-gate uint_t i, nid;
5210Sstevel@tonic-gate struct in_addr ipaddr;
5220Sstevel@tonic-gate int err = 0;
5230Sstevel@tonic-gate
5240Sstevel@tonic-gate /* check if we are running on clustering */
5250Sstevel@tonic-gate if ((err = sdssc_bind_library()) != SDSSC_OKAY) {
5260Sstevel@tonic-gate return (mdsyserror(ep, err, META_MNSET_NODELIST));
5270Sstevel@tonic-gate }
5280Sstevel@tonic-gate
5290Sstevel@tonic-gate /* open file for writing */
5300Sstevel@tonic-gate if ((fp = fopen(META_MNSET_NODELIST, "w")) == NULL) {
5310Sstevel@tonic-gate return (mdsyserror(ep, errno, META_MNSET_NODELIST));
5320Sstevel@tonic-gate }
5330Sstevel@tonic-gate
5340Sstevel@tonic-gate for (i = 0; i < nodecnt; i++) {
5350Sstevel@tonic-gate /* extract the node id */
5360Sstevel@tonic-gate errno = 0;
5370Sstevel@tonic-gate nid = strtoul(nids[i], NULL, 0);
5380Sstevel@tonic-gate if (errno != 0) {
5390Sstevel@tonic-gate if ((fp) && (fclose(fp) != 0))
5400Sstevel@tonic-gate return (mdsyserror(ep, errno,
5410Sstevel@tonic-gate META_MNSET_NODELIST));
5420Sstevel@tonic-gate
5430Sstevel@tonic-gate return (mdsyserror(ep, EINVAL, META_MNSET_NODELIST));
5440Sstevel@tonic-gate }
5450Sstevel@tonic-gate
5460Sstevel@tonic-gate /* get node name */
5470Sstevel@tonic-gate (void) snprintf(name, sizeof (name), "%d", nid);
5480Sstevel@tonic-gate sdssc_cm_nid2nm(name);
5490Sstevel@tonic-gate
5500Sstevel@tonic-gate /* finally get the private ip address */
5510Sstevel@tonic-gate (void) snprintf(addr, sizeof (addr), "%s", name);
5520Sstevel@tonic-gate if (sdssc_get_priv_ipaddr(addr, &ipaddr) != SDSSC_OKAY) {
5530Sstevel@tonic-gate if ((fp) && (fclose(fp) != 0))
5540Sstevel@tonic-gate return (mdsyserror(ep, errno,
5550Sstevel@tonic-gate META_MNSET_NODELIST));
5560Sstevel@tonic-gate
5570Sstevel@tonic-gate return (mdsyserror(ep, EINVAL, META_MNSET_NODELIST));
5580Sstevel@tonic-gate }
5590Sstevel@tonic-gate
5600Sstevel@tonic-gate (void) fprintf(fp, "%d\t%s\t%s\n", nid, name,
5610Sstevel@tonic-gate inet_ntoa(ipaddr));
5620Sstevel@tonic-gate }
5630Sstevel@tonic-gate
5640Sstevel@tonic-gate /* close file */
5650Sstevel@tonic-gate if ((fp) && (fclose(fp) != 0))
5660Sstevel@tonic-gate return (mdsyserror(ep, errno, META_MNSET_NODELIST));
5670Sstevel@tonic-gate
5680Sstevel@tonic-gate return (0);
5690Sstevel@tonic-gate }
5700Sstevel@tonic-gate
5710Sstevel@tonic-gate /*
5720Sstevel@tonic-gate * Free node list
5730Sstevel@tonic-gate */
5740Sstevel@tonic-gate void
meta_free_nodelist(mndiskset_membershiplist_t * nl)5750Sstevel@tonic-gate meta_free_nodelist(
5760Sstevel@tonic-gate mndiskset_membershiplist_t *nl
5770Sstevel@tonic-gate )
5780Sstevel@tonic-gate {
5790Sstevel@tonic-gate mndiskset_membershiplist_t *next = NULL;
5800Sstevel@tonic-gate
5810Sstevel@tonic-gate for (/* void */; (nl != NULL); nl = next) {
5820Sstevel@tonic-gate next = nl->next;
5830Sstevel@tonic-gate Free(nl);
5840Sstevel@tonic-gate }
5850Sstevel@tonic-gate }
5860Sstevel@tonic-gate
5870Sstevel@tonic-gate /*
5880Sstevel@tonic-gate * FUNCTION: meta_mn_send_setsync()
5890Sstevel@tonic-gate * INPUT: sp - setname
5900Sstevel@tonic-gate * mirnp - mirror name
5910Sstevel@tonic-gate * size - buffer size, 0 if none
5920Sstevel@tonic-gate * OUTPUT: ep - return error pointer
5930Sstevel@tonic-gate * RETURNS: return value from meta_mn_send_command()
5940Sstevel@tonic-gate * PURPOSE: Send a setsync command to all nodes to set resync status
5950Sstevel@tonic-gate */
5960Sstevel@tonic-gate
5970Sstevel@tonic-gate int
meta_mn_send_setsync(mdsetname_t * sp,mdname_t * mirnp,daddr_t size,md_error_t * ep)5980Sstevel@tonic-gate meta_mn_send_setsync(
5990Sstevel@tonic-gate mdsetname_t *sp,
6000Sstevel@tonic-gate mdname_t *mirnp,
6010Sstevel@tonic-gate daddr_t size,
6020Sstevel@tonic-gate md_error_t *ep
6030Sstevel@tonic-gate )
6040Sstevel@tonic-gate {
6050Sstevel@tonic-gate md_mn_msg_setsync_t setsyncmsg;
6060Sstevel@tonic-gate int ret;
6070Sstevel@tonic-gate md_mn_result_t *resp = NULL;
6080Sstevel@tonic-gate
6090Sstevel@tonic-gate setsyncmsg.setsync_mnum = meta_getminor(mirnp->dev);
6100Sstevel@tonic-gate setsyncmsg.setsync_copysize = size;
6110Sstevel@tonic-gate setsyncmsg.setsync_flags = 0;
6120Sstevel@tonic-gate
6130Sstevel@tonic-gate /*
6140Sstevel@tonic-gate * We do not log the metasync command as it will have no effect on the
6150Sstevel@tonic-gate * underlying metadb state. If we have a master change the
6160Sstevel@tonic-gate * reconfiguration process will issue a new 'metasync' to all affected
6170Sstevel@tonic-gate * mirrors, so we would actually end up sending the message twice.
6180Sstevel@tonic-gate * Removing the logging of the message helps reduce the processing
6190Sstevel@tonic-gate * time required.
6200Sstevel@tonic-gate */
6210Sstevel@tonic-gate ret = mdmn_send_message(sp->setno, MD_MN_MSG_SETSYNC,
6228452SJohn.Wren.Kennedy@Sun.COM MD_MSGF_NO_LOG | MD_MSGF_OVERRIDE_SUSPEND, 0,
6230Sstevel@tonic-gate (char *)&setsyncmsg, sizeof (setsyncmsg), &resp, ep);
6240Sstevel@tonic-gate if (resp != NULL) {
6250Sstevel@tonic-gate free_result(resp);
6260Sstevel@tonic-gate }
6270Sstevel@tonic-gate
6280Sstevel@tonic-gate /*
6290Sstevel@tonic-gate * Unlike non-MN sets, the metasync command does not actually
6300Sstevel@tonic-gate * start a resync, it simply updates the state on all of the
6310Sstevel@tonic-gate * nodes. Therefore, to start a resync we send a resync starting
6320Sstevel@tonic-gate * message for the metadevice
6330Sstevel@tonic-gate */
6340Sstevel@tonic-gate if (ret == 0)
6350Sstevel@tonic-gate ret = meta_mn_send_resync_starting(mirnp, ep);
6360Sstevel@tonic-gate return (ret);
6370Sstevel@tonic-gate }
6380Sstevel@tonic-gate
6390Sstevel@tonic-gate /*
6400Sstevel@tonic-gate * FUNCTION: meta_mn_send_metaclear_command()
6410Sstevel@tonic-gate * INPUT: sp - setname
6420Sstevel@tonic-gate * name - metadevice name
6430Sstevel@tonic-gate * options - command options
6440Sstevel@tonic-gate * pflag - clear all soft partitions for a given device
6450Sstevel@tonic-gate * OUTPUT: ep - return error pointer
6460Sstevel@tonic-gate * RETURNS: return value from meta_mn_send_command()
6470Sstevel@tonic-gate * PURPOSE: Send a metaclear command to all nodes with force(-f) and
6480Sstevel@tonic-gate * recurse(-r) options set if required. For hotspare pool and
6490Sstevel@tonic-gate * metadevices, the metadevice name is of the form setname/dxx or
6500Sstevel@tonic-gate * setname/hspxxx so a '-s' argument isn't required. If pflag is set
6510Sstevel@tonic-gate * the name refers to a metadevice or component and in the is case
6520Sstevel@tonic-gate * a '-s' argument is required to define the set.
6530Sstevel@tonic-gate */
6540Sstevel@tonic-gate
6550Sstevel@tonic-gate int
meta_mn_send_metaclear_command(mdsetname_t * sp,char * name,mdcmdopts_t options,int pflag,md_error_t * ep)6560Sstevel@tonic-gate meta_mn_send_metaclear_command(
6570Sstevel@tonic-gate mdsetname_t *sp,
6580Sstevel@tonic-gate char *name,
6590Sstevel@tonic-gate mdcmdopts_t options,
6600Sstevel@tonic-gate int pflag,
6610Sstevel@tonic-gate md_error_t *ep
6620Sstevel@tonic-gate )
6630Sstevel@tonic-gate {
6640Sstevel@tonic-gate int newargc;
6650Sstevel@tonic-gate char **newargv;
6660Sstevel@tonic-gate int ret;
6670Sstevel@tonic-gate
6680Sstevel@tonic-gate /*
6690Sstevel@tonic-gate * Allocate an array large enough to hold all of the possible
6700Sstevel@tonic-gate * metaclear arguments
6710Sstevel@tonic-gate */
6720Sstevel@tonic-gate newargv = Calloc(7, sizeof (char *));
6730Sstevel@tonic-gate newargv[0] = "metaclear";
6740Sstevel@tonic-gate newargc = 1;
6750Sstevel@tonic-gate if (pflag) {
6760Sstevel@tonic-gate newargv[newargc] = "-s";
6770Sstevel@tonic-gate newargc++;
6780Sstevel@tonic-gate newargv[newargc] = sp->setname;
6790Sstevel@tonic-gate newargc++;
6800Sstevel@tonic-gate }
6810Sstevel@tonic-gate if (options & MDCMD_FORCE) {
6820Sstevel@tonic-gate newargv[newargc] = "-f";
6830Sstevel@tonic-gate newargc++;
6840Sstevel@tonic-gate }
6850Sstevel@tonic-gate if (options & MDCMD_RECURSE) {
6860Sstevel@tonic-gate newargv[newargc] = "-r";
6870Sstevel@tonic-gate newargc++;
6880Sstevel@tonic-gate }
6890Sstevel@tonic-gate if (pflag) {
6900Sstevel@tonic-gate newargv[newargc] = "-p";
6910Sstevel@tonic-gate newargc++;
6920Sstevel@tonic-gate }
6930Sstevel@tonic-gate newargv[newargc] = name;
6940Sstevel@tonic-gate newargc++;
6950Sstevel@tonic-gate
6960Sstevel@tonic-gate ret = meta_mn_send_command(sp, newargc, newargv,
6970Sstevel@tonic-gate MD_DISP_STDERR, NO_CONTEXT_STRING, ep);
6980Sstevel@tonic-gate
6990Sstevel@tonic-gate free(newargv);
7000Sstevel@tonic-gate return (ret);
7010Sstevel@tonic-gate }
7020Sstevel@tonic-gate
7030Sstevel@tonic-gate /*
7040Sstevel@tonic-gate * FUNCTION: meta_mn_send_resync_starting()
7050Sstevel@tonic-gate * INPUT: sp - setname
7060Sstevel@tonic-gate * mirnp - mirror name
7070Sstevel@tonic-gate * OUTPUT: ep - return error pointer
7080Sstevel@tonic-gate * RETURNS: return value from mdmn_send_message()
7090Sstevel@tonic-gate * PURPOSE: Send a resync starting message to all nodes.
7100Sstevel@tonic-gate */
7110Sstevel@tonic-gate
7120Sstevel@tonic-gate int
meta_mn_send_resync_starting(mdname_t * mirnp,md_error_t * ep)7130Sstevel@tonic-gate meta_mn_send_resync_starting(
7140Sstevel@tonic-gate mdname_t *mirnp,
7150Sstevel@tonic-gate md_error_t *ep
7160Sstevel@tonic-gate )
7170Sstevel@tonic-gate {
7180Sstevel@tonic-gate int result;
7190Sstevel@tonic-gate md_mn_msg_resync_t resyncmsg;
7200Sstevel@tonic-gate md_mn_result_t *resp = NULL;
7210Sstevel@tonic-gate minor_t mnum = meta_getminor(mirnp->dev);
7220Sstevel@tonic-gate
7230Sstevel@tonic-gate /*
7240Sstevel@tonic-gate * This message is never directly issued.
7250Sstevel@tonic-gate * So we launch it with a suspend override flag.
7260Sstevel@tonic-gate * If the commd is suspended, and this message comes
7270Sstevel@tonic-gate * along it must be sent due to replaying a command or similar.
7280Sstevel@tonic-gate * In that case we don't want this message to be blocked.
7290Sstevel@tonic-gate * If the commd is not suspended, the flag does no harm.
7300Sstevel@tonic-gate */
7310Sstevel@tonic-gate resyncmsg.msg_resync_mnum = mnum;
7320Sstevel@tonic-gate result = mdmn_send_message(MD_MIN2SET(mnum),
7330Sstevel@tonic-gate MD_MN_MSG_RESYNC_STARTING,
7348452SJohn.Wren.Kennedy@Sun.COM MD_MSGF_NO_LOG | MD_MSGF_OVERRIDE_SUSPEND, 0,
7350Sstevel@tonic-gate (char *)&resyncmsg, sizeof (resyncmsg), &resp, ep);
7360Sstevel@tonic-gate
7370Sstevel@tonic-gate if (resp != NULL) {
7380Sstevel@tonic-gate free_result(resp);
7390Sstevel@tonic-gate }
7400Sstevel@tonic-gate return (result);
7410Sstevel@tonic-gate }
7420Sstevel@tonic-gate
7430Sstevel@tonic-gate /*
7440Sstevel@tonic-gate * FUNCTION: meta_mn_change_owner()
7450Sstevel@tonic-gate * INPUT: opp - pointer to parameter block
7460Sstevel@tonic-gate * setno - set number of mirror metadevice
7470Sstevel@tonic-gate * mnum - minor number of mirror metadevice
7480Sstevel@tonic-gate * owner - node ID of mirror owner
7490Sstevel@tonic-gate * flags - flag field for ioctl
7500Sstevel@tonic-gate * OUTPUT: opp - parameter block used to send ioctl
7510Sstevel@tonic-gate * RETURNS: int - 0 success, -1 error
7520Sstevel@tonic-gate * PURPOSE: issue an ioctl to change the ownership of the specified mirror
7530Sstevel@tonic-gate * to our node ID. We need to be the owner before any watermarks
7540Sstevel@tonic-gate * are committed to the device otherwise we'll enter a deadly
7550Sstevel@tonic-gate * embrace when attempting to write the watermark.
7560Sstevel@tonic-gate * This function can also be used so set the owner on a node to
7570Sstevel@tonic-gate * NULL. In this case the change is only made on the local node.
7580Sstevel@tonic-gate * In addition by setting the MD_MN_MM_CHOOSE_OWNER flag, the
7590Sstevel@tonic-gate * function can also be used to choose a mirror resync owner. This
7600Sstevel@tonic-gate * function should only be called on the master and it will
7610Sstevel@tonic-gate * select the owner and request it to become the owner.
7620Sstevel@tonic-gate */
7630Sstevel@tonic-gate int
meta_mn_change_owner(md_set_mmown_params_t ** opp,set_t setno,uint_t mnum,uint_t owner,uint_t flags)7640Sstevel@tonic-gate meta_mn_change_owner(
7650Sstevel@tonic-gate md_set_mmown_params_t **opp, /* Returned parameter block */
7660Sstevel@tonic-gate set_t setno, /* Mirror set number */
7670Sstevel@tonic-gate uint_t mnum, /* Minor number */
7680Sstevel@tonic-gate uint_t owner, /* Node ID of mirror owner */
7690Sstevel@tonic-gate uint_t flags /* Flags */
7700Sstevel@tonic-gate )
7710Sstevel@tonic-gate {
7720Sstevel@tonic-gate md_set_mmown_params_t *ownpar = *opp;
7730Sstevel@tonic-gate md_mn_own_status_t *ownstat = NULL;
7740Sstevel@tonic-gate struct timeval tvs, tve;
7750Sstevel@tonic-gate int n = 0;
7760Sstevel@tonic-gate int rval;
7770Sstevel@tonic-gate
7780Sstevel@tonic-gate if (ownpar != NULL) {
7790Sstevel@tonic-gate (void) memset(ownpar, 0, sizeof (*ownpar));
7800Sstevel@tonic-gate } else {
7810Sstevel@tonic-gate ownpar = Zalloc(sizeof (*ownpar));
7820Sstevel@tonic-gate }
7830Sstevel@tonic-gate ownstat = Zalloc(sizeof (*ownstat));
7840Sstevel@tonic-gate
7850Sstevel@tonic-gate ownpar->d.mnum = mnum;
7860Sstevel@tonic-gate ownpar->d.owner = owner;
7870Sstevel@tonic-gate ownpar->d.flags = flags;
7880Sstevel@tonic-gate MD_SETDRIVERNAME(ownpar, MD_MIRROR, setno);
7890Sstevel@tonic-gate MD_SETDRIVERNAME(ownstat, MD_MIRROR, setno);
7900Sstevel@tonic-gate
7910Sstevel@tonic-gate /*
7920Sstevel@tonic-gate * Attempt to change the ownership to the specified node. We retry this
7930Sstevel@tonic-gate * up to 10 times if we receive EAGAIN from the metadevice. This only
7940Sstevel@tonic-gate * happens if the underlying metadevice is busy with outstanding i/o
7950Sstevel@tonic-gate * that requires ownership change.
7960Sstevel@tonic-gate */
7970Sstevel@tonic-gate while ((rval = metaioctl(MD_MN_SET_MM_OWNER, ownpar, &ownpar->mde,
7980Sstevel@tonic-gate NULL)) != 0) {
7990Sstevel@tonic-gate md_sys_error_t *ip =
8000Sstevel@tonic-gate &ownpar->mde.info.md_error_info_t_u.sys_error;
8010Sstevel@tonic-gate if (ip->errnum != EAGAIN)
8020Sstevel@tonic-gate break;
8030Sstevel@tonic-gate if (n++ >= 10)
8040Sstevel@tonic-gate break;
8050Sstevel@tonic-gate (void) sleep(1);
8060Sstevel@tonic-gate }
8070Sstevel@tonic-gate
8080Sstevel@tonic-gate /*
8090Sstevel@tonic-gate * There is no need to wait for the ioctl completion if we are setting
8100Sstevel@tonic-gate * the owner to NULL or requesting the master to choose the owner
8110Sstevel@tonic-gate */
8120Sstevel@tonic-gate if ((owner == 0) || (flags & MD_MN_MM_CHOOSE_OWNER)) {
8130Sstevel@tonic-gate Free(ownstat);
8140Sstevel@tonic-gate *opp = ownpar;
8150Sstevel@tonic-gate return (0);
8160Sstevel@tonic-gate }
8170Sstevel@tonic-gate
8180Sstevel@tonic-gate /*
8190Sstevel@tonic-gate * Wait for ioctl completion or a timeout to occur. If we
8200Sstevel@tonic-gate * timeout we fail the i/o request.
8210Sstevel@tonic-gate */
8220Sstevel@tonic-gate ownstat->mnum = ownpar->d.mnum;
8230Sstevel@tonic-gate (void) gettimeofday(&tvs, NULL);
8240Sstevel@tonic-gate
8250Sstevel@tonic-gate while ((rval == 0) && !(ownstat->flags & MD_MN_MM_RESULT)) {
8260Sstevel@tonic-gate while ((rval = metaioctl(MD_MN_MM_OWNER_STATUS, ownstat,
8270Sstevel@tonic-gate &ownstat->mde, NULL)) != 0) {
8280Sstevel@tonic-gate (void) gettimeofday(&tve, NULL);
8290Sstevel@tonic-gate if ((tve.tv_sec - tvs.tv_sec) > OWNER_TIMEOUT) {
8300Sstevel@tonic-gate rval = -1;
8310Sstevel@tonic-gate break;
8320Sstevel@tonic-gate }
8330Sstevel@tonic-gate (void) sleep(1);
8340Sstevel@tonic-gate }
8350Sstevel@tonic-gate }
8360Sstevel@tonic-gate
8370Sstevel@tonic-gate /* we did not not timeout but ioctl failed set rval */
8380Sstevel@tonic-gate
8390Sstevel@tonic-gate if (rval == 0) {
8400Sstevel@tonic-gate rval = (ownstat->flags & MD_MN_MM_RES_FAIL) ? -1 : 0;
8410Sstevel@tonic-gate }
8420Sstevel@tonic-gate
8430Sstevel@tonic-gate Free(ownstat);
8440Sstevel@tonic-gate *opp = ownpar;
8450Sstevel@tonic-gate return (rval);
8460Sstevel@tonic-gate }
8470Sstevel@tonic-gate /*
8480Sstevel@tonic-gate * special handling is required when running on a single node
8490Sstevel@tonic-gate * non-SC3.x environment. This function determines tests
8500Sstevel@tonic-gate * for that case.
8510Sstevel@tonic-gate *
8520Sstevel@tonic-gate * Return values:
8530Sstevel@tonic-gate * 0 - no nodes or joined or in a SC3.x env
8540Sstevel@tonic-gate * 1 - 1 node and not in SC3.x env
8550Sstevel@tonic-gate */
8560Sstevel@tonic-gate
8570Sstevel@tonic-gate int
meta_mn_singlenode()8580Sstevel@tonic-gate meta_mn_singlenode()
8590Sstevel@tonic-gate {
8600Sstevel@tonic-gate md_error_t xep = mdnullerror;
8610Sstevel@tonic-gate int nodecnt;
8620Sstevel@tonic-gate int mnset_single_node = 0;
8630Sstevel@tonic-gate mndiskset_membershiplist_t *nl;
8640Sstevel@tonic-gate
8650Sstevel@tonic-gate /*
8660Sstevel@tonic-gate * If running on SunCluster, then don't validate MN sets,
8670Sstevel@tonic-gate * this is done during a reconfig cycle since all nodes must
8680Sstevel@tonic-gate * take the same action.
8690Sstevel@tonic-gate *
8700Sstevel@tonic-gate * Only cleanup in case of a single node situation
8710Sstevel@tonic-gate * when not running on SunCluster. This single node
8720Sstevel@tonic-gate * situation occurs when the nodelist only contains
8730Sstevel@tonic-gate * this node and the MN setrecords only contain this
8740Sstevel@tonic-gate * node.
8750Sstevel@tonic-gate */
8760Sstevel@tonic-gate if (meta_read_nodelist(&nodecnt, &nl, &xep) == -1) {
8770Sstevel@tonic-gate nodecnt = 0; /* no nodes are alive */
8780Sstevel@tonic-gate nl = NULL;
8790Sstevel@tonic-gate mdclrerror(&xep);
8800Sstevel@tonic-gate } else {
8810Sstevel@tonic-gate /*
8820Sstevel@tonic-gate * If only 1 node in nodelist and not running
8830Sstevel@tonic-gate * on SunCluster, set single_node flag.
8840Sstevel@tonic-gate */
8850Sstevel@tonic-gate if ((nodecnt == 1) &&
8860Sstevel@tonic-gate (strcmp(nl->msl_node_name, mynode()) == 0) &&
8870Sstevel@tonic-gate ((sdssc_bind_library()) != SDSSC_OKAY)) {
8880Sstevel@tonic-gate mnset_single_node = 1;
8890Sstevel@tonic-gate }
8900Sstevel@tonic-gate meta_free_nodelist(nl);
8910Sstevel@tonic-gate }
8920Sstevel@tonic-gate return (mnset_single_node);
8930Sstevel@tonic-gate }
8940Sstevel@tonic-gate
8950Sstevel@tonic-gate /*
8960Sstevel@tonic-gate * FUNCTION: meta_mn_send_get_tstate()
8970Sstevel@tonic-gate * INPUT: dev - dev_t of device
8980Sstevel@tonic-gate * OUTPUT: tstatep - tstate value
8990Sstevel@tonic-gate * ep - return error pointer
9000Sstevel@tonic-gate * RETURNS: return value from mdmn_send_message()
9010Sstevel@tonic-gate * PURPOSE: Send a message to the master to get ui_tstate for a given device.
9020Sstevel@tonic-gate */
9030Sstevel@tonic-gate
9040Sstevel@tonic-gate int
meta_mn_send_get_tstate(md_dev64_t dev,uint_t * tstatep,md_error_t * ep)9050Sstevel@tonic-gate meta_mn_send_get_tstate(
9060Sstevel@tonic-gate md_dev64_t dev,
9070Sstevel@tonic-gate uint_t *tstatep,
9080Sstevel@tonic-gate md_error_t *ep
9090Sstevel@tonic-gate )
9100Sstevel@tonic-gate {
9110Sstevel@tonic-gate int result;
9120Sstevel@tonic-gate md_mn_msg_gettstate_t tstatemsg;
9130Sstevel@tonic-gate md_mn_result_t *resp = NULL;
9140Sstevel@tonic-gate minor_t mnum = meta_getminor(dev);
9150Sstevel@tonic-gate
9160Sstevel@tonic-gate tstatemsg.gettstate_dev = dev;
9170Sstevel@tonic-gate result = mdmn_send_message(MD_MIN2SET(mnum),
9180Sstevel@tonic-gate MD_MN_MSG_GET_TSTATE,
9198452SJohn.Wren.Kennedy@Sun.COM MD_MSGF_NO_LOG | MD_MSGF_NO_BCAST, 0,
9200Sstevel@tonic-gate (char *)&tstatemsg, sizeof (tstatemsg), &resp, ep);
9210Sstevel@tonic-gate
9220Sstevel@tonic-gate if (result == 0)
9230Sstevel@tonic-gate *tstatep = resp->mmr_exitval;
9240Sstevel@tonic-gate else
9250Sstevel@tonic-gate /* If some error occurred set tstate to 0 */
9260Sstevel@tonic-gate *tstatep = 0;
9270Sstevel@tonic-gate
9280Sstevel@tonic-gate if (resp != NULL) {
9290Sstevel@tonic-gate free_result(resp);
9300Sstevel@tonic-gate }
9310Sstevel@tonic-gate return (result);
9320Sstevel@tonic-gate }
933