xref: /onnv-gate/usr/src/lib/lvm/libmeta/common/meta_mn_subr.c (revision 13018:1ff0c65b2b90)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
51623Stw21770  * Common Development and Distribution License (the "License").
61623Stw21770  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
22*13018SPeter.Dennis@Oracle.COM  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
230Sstevel@tonic-gate  */
240Sstevel@tonic-gate 
259818SAndrew.Balfour@Sun.COM 
260Sstevel@tonic-gate /*
270Sstevel@tonic-gate  * Just in case we're not in a build environment, make sure that
280Sstevel@tonic-gate  * TEXT_DOMAIN gets set to something.
290Sstevel@tonic-gate  */
300Sstevel@tonic-gate #if !defined(TEXT_DOMAIN)
310Sstevel@tonic-gate #define	TEXT_DOMAIN "SYS_TEST"
320Sstevel@tonic-gate #endif
330Sstevel@tonic-gate 
340Sstevel@tonic-gate #include <meta.h>
350Sstevel@tonic-gate #include <sdssc.h>
360Sstevel@tonic-gate #include <arpa/inet.h>
370Sstevel@tonic-gate #include <sys/lvm/md_mddb.h>
380Sstevel@tonic-gate 
390Sstevel@tonic-gate #define	MAX_LINE_SIZE 1024
400Sstevel@tonic-gate 
410Sstevel@tonic-gate /*
420Sstevel@tonic-gate  * Maximum amount of time to spend waiting for an ownership change to complete.
430Sstevel@tonic-gate  */
440Sstevel@tonic-gate static const int OWNER_TIMEOUT = 3;
450Sstevel@tonic-gate 
460Sstevel@tonic-gate /*
470Sstevel@tonic-gate  * FUNCTION:	meta_is_mn_set()
480Sstevel@tonic-gate  * INPUT:       sp      - the set name
490Sstevel@tonic-gate  * OUTPUT:	ep	- return error pointer
500Sstevel@tonic-gate  * RETURNS:	int	- 1 if MultiNode set else 0
510Sstevel@tonic-gate  * PURPOSE:	checks if the set is a MultiNode set
520Sstevel@tonic-gate  */
530Sstevel@tonic-gate int
meta_is_mn_set(mdsetname_t * sp,md_error_t * ep)540Sstevel@tonic-gate meta_is_mn_set(
550Sstevel@tonic-gate 	mdsetname_t	*sp,
560Sstevel@tonic-gate 	md_error_t	*ep
570Sstevel@tonic-gate )
580Sstevel@tonic-gate {
590Sstevel@tonic-gate 	md_set_desc	*sd;
600Sstevel@tonic-gate 
610Sstevel@tonic-gate 	/* Local set cannot be MultiNode */
620Sstevel@tonic-gate 	if ((sp == NULL) || (sp->setname == NULL) ||
638452SJohn.Wren.Kennedy@Sun.COM 	    (strcmp(sp->setname, MD_LOCAL_NAME) == 0))
640Sstevel@tonic-gate 		return (0);
650Sstevel@tonic-gate 	sd = metaget_setdesc(sp, ep);
66*13018SPeter.Dennis@Oracle.COM 
67*13018SPeter.Dennis@Oracle.COM 	/*
68*13018SPeter.Dennis@Oracle.COM 	 * sd can be NULL if there is a difference between
69*13018SPeter.Dennis@Oracle.COM 	 * the setrecords and the setlistp caches. This can happen
70*13018SPeter.Dennis@Oracle.COM 	 * if this function is called while a set is being
71*13018SPeter.Dennis@Oracle.COM 	 * removed during a cluster reconfiguration.
72*13018SPeter.Dennis@Oracle.COM 	 */
73*13018SPeter.Dennis@Oracle.COM 	if (sd == NULL)
74*13018SPeter.Dennis@Oracle.COM 		return (0);
750Sstevel@tonic-gate 	if (sd->sd_flags & MD_SR_MN)
760Sstevel@tonic-gate 		return (1);
770Sstevel@tonic-gate 	return (0);
780Sstevel@tonic-gate }
790Sstevel@tonic-gate 
800Sstevel@tonic-gate /*
810Sstevel@tonic-gate  * FUNCTION:	meta_is_mn_name()
820Sstevel@tonic-gate  * INPUT:       spp     - ptr to the set name, if NULL the setname is derived
830Sstevel@tonic-gate  *			  from the metadevice name (eg set/d10 )
841623Stw21770  *		name	- the metadevice/hsp name
850Sstevel@tonic-gate  * OUTPUT:	ep	- return error pointer
860Sstevel@tonic-gate  * RETURNS:	int	- 1 if MultiNode set else 0
870Sstevel@tonic-gate  * PURPOSE:	checks if the metadevice is in a MultiNode set
880Sstevel@tonic-gate  */
890Sstevel@tonic-gate int
meta_is_mn_name(mdsetname_t ** spp,char * name,md_error_t * ep)900Sstevel@tonic-gate meta_is_mn_name(
910Sstevel@tonic-gate 	mdsetname_t	**spp,
920Sstevel@tonic-gate 	char		*name,
930Sstevel@tonic-gate 	md_error_t	*ep
940Sstevel@tonic-gate )
950Sstevel@tonic-gate {
961623Stw21770 	if (*spp == NULL) {
971623Stw21770 		char		*cname;
980Sstevel@tonic-gate 
991623Stw21770 		/*
1001623Stw21770 		 * if the setname is specified in uname and *spp is
1011623Stw21770 		 * not set, then it is setup using that set name value.
1021623Stw21770 		 * If *spp is set and a setname specified in uname and
1031623Stw21770 		 * the set names don't agree then cname will be
1041623Stw21770 		 * returned as NULL
1051623Stw21770 		 */
1061623Stw21770 		cname = meta_canonicalize_check_set(spp, name, ep);
1071623Stw21770 		if (cname == NULL) {
1081623Stw21770 			mdclrerror(ep);
1091623Stw21770 			return (0);
1101623Stw21770 		}
1111623Stw21770 
1121623Stw21770 		Free(cname);
1130Sstevel@tonic-gate 	}
1140Sstevel@tonic-gate 
1150Sstevel@tonic-gate 	if ((strcmp((*spp)->setname, MD_LOCAL_NAME) != 0) &&
1160Sstevel@tonic-gate 	    (metaget_setdesc(*spp, ep) != NULL) &&
1170Sstevel@tonic-gate 	    ((*spp)->setdesc->sd_flags & MD_SR_MN)) {
1180Sstevel@tonic-gate 		return (1);
1190Sstevel@tonic-gate 	}
1200Sstevel@tonic-gate 	return (0);
1210Sstevel@tonic-gate }
1220Sstevel@tonic-gate 
1230Sstevel@tonic-gate /*
1240Sstevel@tonic-gate  * meta_ping_mnset(set_t setno)
1250Sstevel@tonic-gate  * Send a test message for this set in order to make commd do some init stuff
1260Sstevel@tonic-gate  * Don't bother changelog.
1270Sstevel@tonic-gate  * If set is suspended, fail immediately.
1280Sstevel@tonic-gate  */
1290Sstevel@tonic-gate void
meta_ping_mnset(set_t setno)1300Sstevel@tonic-gate meta_ping_mnset(set_t setno)
1310Sstevel@tonic-gate {
1320Sstevel@tonic-gate 	char		*data = "test";
1330Sstevel@tonic-gate 	md_error_t	mde = mdnullerror;
1340Sstevel@tonic-gate 	md_mn_result_t	*resp = NULL;
1350Sstevel@tonic-gate 
1360Sstevel@tonic-gate 	(void) mdmn_send_message(setno, MD_MN_MSG_TEST2,
1378452SJohn.Wren.Kennedy@Sun.COM 	    MD_MSGF_NO_LOG | MD_MSGF_FAIL_ON_SUSPEND, 0, data,
1380Sstevel@tonic-gate 	    sizeof (data), &resp, &mde);
1390Sstevel@tonic-gate 
1400Sstevel@tonic-gate 	if (resp != (md_mn_result_t *)NULL) {
1410Sstevel@tonic-gate 		free_result(resp);
1420Sstevel@tonic-gate 	}
1430Sstevel@tonic-gate }
1440Sstevel@tonic-gate 
1450Sstevel@tonic-gate /*
1460Sstevel@tonic-gate  *
1470Sstevel@tonic-gate  * FUNCTION:	print_stderr
1480Sstevel@tonic-gate  * INPUT:	errstr	- the error message returned by the command
1490Sstevel@tonic-gate  *		context	- the context string from metainit -a
1500Sstevel@tonic-gate  * PURPOSE:	called from meta_mn_send_command to print the error message
1510Sstevel@tonic-gate  *		to stderr. When context is NO_CONTEXT_STRING, the errstr string
1520Sstevel@tonic-gate  *		is output unchanged. When context is a string, it is the context
1530Sstevel@tonic-gate  *		string for the metainit -a command and in this case the errstr
1540Sstevel@tonic-gate  *		string has to be parsed to extract the command and node name
1550Sstevel@tonic-gate  *		and to send a message to stderr in the format
1560Sstevel@tonic-gate  *		command: node: context: error message
1570Sstevel@tonic-gate  */
1580Sstevel@tonic-gate static void
print_stderr(char * errstr,char * context)1590Sstevel@tonic-gate print_stderr(
1600Sstevel@tonic-gate 	char	*errstr,
1610Sstevel@tonic-gate 	char	*context
1620Sstevel@tonic-gate )
1630Sstevel@tonic-gate {
1640Sstevel@tonic-gate 	char	*command;
1650Sstevel@tonic-gate 	char	*node;
1660Sstevel@tonic-gate 	char	*message;
1670Sstevel@tonic-gate 	int	length = strlen(errstr + 1);
1680Sstevel@tonic-gate 
1690Sstevel@tonic-gate 	if (context == NO_CONTEXT_STRING) {
1700Sstevel@tonic-gate 		(void) fprintf(stderr, "%s", errstr);
1710Sstevel@tonic-gate 	} else {
1720Sstevel@tonic-gate 		command = Malloc(length);
1730Sstevel@tonic-gate 		node = Malloc(length);
1740Sstevel@tonic-gate 		message = Malloc(length);
1750Sstevel@tonic-gate 		if (sscanf(errstr, "%[^:]: %[^:]: %[^\n]", command, node,
1760Sstevel@tonic-gate 		    message) == 3) {
1770Sstevel@tonic-gate 			(void) fprintf(stderr, "%s: %s: %s: %s\n", command,
1780Sstevel@tonic-gate 			    node, context, message);
1790Sstevel@tonic-gate 		} else {
1800Sstevel@tonic-gate 			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1810Sstevel@tonic-gate 			    "%s: Invalid format error message"), errstr);
1820Sstevel@tonic-gate 		}
1830Sstevel@tonic-gate 		Free(command);
1840Sstevel@tonic-gate 		Free(node);
1850Sstevel@tonic-gate 		Free(message);
1860Sstevel@tonic-gate 	}
1870Sstevel@tonic-gate }
1880Sstevel@tonic-gate 
1890Sstevel@tonic-gate /*
1900Sstevel@tonic-gate  * FUNCTION:	meta_mn_send_command()
1910Sstevel@tonic-gate  * INPUT:	sp	- the set name
1920Sstevel@tonic-gate  *		argc	- number of arguments
1930Sstevel@tonic-gate  *		argv	- arg list
1940Sstevel@tonic-gate  *		flags	- some controlling flags
1950Sstevel@tonic-gate  *		initall_context	- context string for metainit -a
1960Sstevel@tonic-gate  * OUTPUT:	ep	- return error pointer
1970Sstevel@tonic-gate  * RETURNS:	return exitval from mdmn_send_message
1980Sstevel@tonic-gate  * PURPOSE:	sends the command to the master node for execution
1990Sstevel@tonic-gate  */
2000Sstevel@tonic-gate int
meta_mn_send_command(mdsetname_t * sp,int argc,char * argv[],int flags,char * initall_context,md_error_t * ep)2010Sstevel@tonic-gate meta_mn_send_command(
2020Sstevel@tonic-gate 	mdsetname_t	*sp,
2030Sstevel@tonic-gate 	int		argc,
2040Sstevel@tonic-gate 	char		*argv[],
2050Sstevel@tonic-gate 	int		flags,
2060Sstevel@tonic-gate 	char		*initall_context,
2070Sstevel@tonic-gate 	md_error_t	*ep
2080Sstevel@tonic-gate )
2090Sstevel@tonic-gate {
2100Sstevel@tonic-gate 	int		a;
2110Sstevel@tonic-gate 	int		err;
2120Sstevel@tonic-gate 	int		retval;
2130Sstevel@tonic-gate 	int		send_message_flags = MD_MSGF_DEFAULT_FLAGS;
2140Sstevel@tonic-gate 	int		send_message_type;
2150Sstevel@tonic-gate 	char		*cmd;
2160Sstevel@tonic-gate 	md_mn_result_t	*resp = NULL;
2170Sstevel@tonic-gate 
2180Sstevel@tonic-gate 	cmd = Malloc(1024);
2190Sstevel@tonic-gate 	(void) strlcpy(cmd, argv[0], 1024);
2200Sstevel@tonic-gate 	for (a = 1; a < argc; a++) {
2210Sstevel@tonic-gate 		/* don't copy empty arguments */
2220Sstevel@tonic-gate 		if (*argv[a] == '\0') {
2230Sstevel@tonic-gate 			continue;
2240Sstevel@tonic-gate 		}
2250Sstevel@tonic-gate 		(void) strcat(cmd, " ");
2260Sstevel@tonic-gate 		(void) strcat(cmd, argv[a]);
2270Sstevel@tonic-gate 	}
2280Sstevel@tonic-gate 	/*
2290Sstevel@tonic-gate 	 * in dryrun mode stop on the first error
2300Sstevel@tonic-gate 	 * use the CMD_RETRY message type if RETRY_BUSY flag set
2310Sstevel@tonic-gate 	 */
2320Sstevel@tonic-gate 	if (flags & MD_DRYRUN)
2330Sstevel@tonic-gate 		send_message_flags |= MD_MSGF_STOP_ON_ERROR;
2340Sstevel@tonic-gate 	if (flags & MD_NOLOG)
2350Sstevel@tonic-gate 		send_message_flags |= MD_MSGF_NO_LOG;
2360Sstevel@tonic-gate 	if (flags & MD_PANIC_WHEN_INCONSISTENT)
2370Sstevel@tonic-gate 		send_message_flags |= MD_MSGF_PANIC_WHEN_INCONSISTENT;
2380Sstevel@tonic-gate 	if (flags & MD_RETRY_BUSY)  {
2390Sstevel@tonic-gate 		send_message_type = MD_MN_MSG_BC_CMD_RETRY;
2400Sstevel@tonic-gate 	} else {
2410Sstevel@tonic-gate 		send_message_type = MD_MN_MSG_BC_CMD;
2420Sstevel@tonic-gate 	}
2439818SAndrew.Balfour@Sun.COM 	err = mdmn_send_message(
2449818SAndrew.Balfour@Sun.COM 	    sp->setno, send_message_type, send_message_flags, 0,
2459818SAndrew.Balfour@Sun.COM 	    cmd, 1024, &resp, ep);
2460Sstevel@tonic-gate 
2470Sstevel@tonic-gate 	free(cmd);
2480Sstevel@tonic-gate 
2490Sstevel@tonic-gate 	if (err == 0) {
2500Sstevel@tonic-gate 		/*
2510Sstevel@tonic-gate 		 * stderr may be turned off by IGNORE_STDERR
2520Sstevel@tonic-gate 		 * In dryrun we only print stderr if the exit_val is non-zero
2530Sstevel@tonic-gate 		 */
2540Sstevel@tonic-gate 		if ((resp->mmr_err_size != 0) &&
2550Sstevel@tonic-gate 		    ((flags & MD_IGNORE_STDERR) == 0)) {
2560Sstevel@tonic-gate 			if (((flags & MD_DRYRUN) == 0) ||
2570Sstevel@tonic-gate 			    (resp->mmr_exitval != 0)) {
2580Sstevel@tonic-gate 				print_stderr(resp->mmr_err, initall_context);
2590Sstevel@tonic-gate 			}
2600Sstevel@tonic-gate 		}
2610Sstevel@tonic-gate 
2620Sstevel@tonic-gate 		/*
2630Sstevel@tonic-gate 		 * If dryrun is set, we don't display stdout,
2640Sstevel@tonic-gate 		 * because the real run has yet to follow.
2650Sstevel@tonic-gate 		 */
2660Sstevel@tonic-gate 		if (((flags & MD_DRYRUN) == 0) && (resp->mmr_out_size != 0)) {
2670Sstevel@tonic-gate 			(void) printf("%s", resp->mmr_out);
2680Sstevel@tonic-gate 		}
2690Sstevel@tonic-gate 		retval = resp->mmr_exitval;
2700Sstevel@tonic-gate 		free_result(resp);
2710Sstevel@tonic-gate 		return (retval);
2720Sstevel@tonic-gate 	}
2730Sstevel@tonic-gate 	if (resp != NULL) {
2740Sstevel@tonic-gate 		if (resp->mmr_comm_state == MDMNE_CLASS_BUSY) {
2750Sstevel@tonic-gate 			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
2760Sstevel@tonic-gate 			    "rpc.mdcommd currently busy. "
2770Sstevel@tonic-gate 			    "Retry operation later.\n"));
2780Sstevel@tonic-gate 		} else if (resp->mmr_comm_state == MDMNE_NOT_JOINED) {
2790Sstevel@tonic-gate 			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
2800Sstevel@tonic-gate 			    "Node %s must join the %s multi-owner diskset to "
2810Sstevel@tonic-gate 			    "issue commands.\n"
2820Sstevel@tonic-gate 			    "To join, use: metaset -s %s -j\n"),
2830Sstevel@tonic-gate 			    mynode(), sp->setname, sp->setname);
2840Sstevel@tonic-gate 		} else if (resp->mmr_comm_state == MDMNE_LOG_FAIL) {
2850Sstevel@tonic-gate 			mddb_config_t	c;
2860Sstevel@tonic-gate 
2870Sstevel@tonic-gate 			(void) memset(&c, 0, sizeof (c));
2880Sstevel@tonic-gate 			c.c_setno = sp->setno;
2890Sstevel@tonic-gate 			(void) metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL);
2900Sstevel@tonic-gate 			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
2910Sstevel@tonic-gate 			    "Command not attempted: Unable to log message "
2920Sstevel@tonic-gate 			    "in set %s\n"), sp->setname);
2930Sstevel@tonic-gate 			if (c.c_flags & MDDB_C_STALE) {
2948452SJohn.Wren.Kennedy@Sun.COM 				(void) mdmddberror(ep, MDE_DB_STALE,
2958452SJohn.Wren.Kennedy@Sun.COM 				    (minor_t)NODEV64, sp->setno, 0, NULL);
2968452SJohn.Wren.Kennedy@Sun.COM 				mde_perror(ep, "");
2970Sstevel@tonic-gate 			}
2980Sstevel@tonic-gate 		} else {
2990Sstevel@tonic-gate 			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
3000Sstevel@tonic-gate 			    "Command failed: Commd State %d "
3010Sstevel@tonic-gate 			    "encountered.\n"), resp->mmr_comm_state);
3020Sstevel@tonic-gate 		}
3030Sstevel@tonic-gate 		free_result(resp);
3040Sstevel@tonic-gate 	} else {
3050Sstevel@tonic-gate 		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
3060Sstevel@tonic-gate 		    "Command failed: mdmn_send_message returned %d.\n"),
3070Sstevel@tonic-gate 		    err);
3080Sstevel@tonic-gate 	}
3090Sstevel@tonic-gate 
3100Sstevel@tonic-gate 
3110Sstevel@tonic-gate 	return (1);
3120Sstevel@tonic-gate }
3130Sstevel@tonic-gate 
3140Sstevel@tonic-gate /*
3150Sstevel@tonic-gate  * FUNCTION:	meta_mn_send_suspend_writes()
3160Sstevel@tonic-gate  * INPUT:	mnum	- minor num of mirror
3170Sstevel@tonic-gate  * OUTPUT:	ep	- return error pointer
3180Sstevel@tonic-gate  * RETURNS:	return value from mdmn_send_message()
3190Sstevel@tonic-gate  * PURPOSE:	sends message to all nodes to suspend writes to the mirror.
3200Sstevel@tonic-gate  */
3210Sstevel@tonic-gate int
meta_mn_send_suspend_writes(minor_t mnum,md_error_t * ep)3220Sstevel@tonic-gate meta_mn_send_suspend_writes(
3230Sstevel@tonic-gate 	minor_t		mnum,
3240Sstevel@tonic-gate 	md_error_t	*ep
3250Sstevel@tonic-gate )
3260Sstevel@tonic-gate {
3270Sstevel@tonic-gate 	int			result;
3280Sstevel@tonic-gate 	md_mn_msg_suspwr_t	suspwrmsg;
3290Sstevel@tonic-gate 	md_mn_result_t		*resp = NULL;
3300Sstevel@tonic-gate 
3310Sstevel@tonic-gate 	suspwrmsg.msg_suspwr_mnum =  mnum;
3320Sstevel@tonic-gate 	/*
3330Sstevel@tonic-gate 	 * This message is never directly issued.
3340Sstevel@tonic-gate 	 * So we launch it with a suspend override flag.
3350Sstevel@tonic-gate 	 * If the commd is suspended, and this message comes
3360Sstevel@tonic-gate 	 * along it must be sent due to replaying a command or similar.
3370Sstevel@tonic-gate 	 * In that case we don't want this message to be blocked.
3380Sstevel@tonic-gate 	 * If the commd is not suspended, the flag does no harm.
3390Sstevel@tonic-gate 	 */
3400Sstevel@tonic-gate 	result = mdmn_send_message(MD_MIN2SET(mnum),
3410Sstevel@tonic-gate 	    MD_MN_MSG_SUSPEND_WRITES,
3428452SJohn.Wren.Kennedy@Sun.COM 	    MD_MSGF_NO_LOG | MD_MSGF_OVERRIDE_SUSPEND, 0,
3430Sstevel@tonic-gate 	    (char *)&suspwrmsg, sizeof (suspwrmsg), &resp, ep);
3440Sstevel@tonic-gate 	if (resp != NULL) {
3450Sstevel@tonic-gate 		free_result(resp);
3460Sstevel@tonic-gate 	}
3470Sstevel@tonic-gate 	return (result);
3480Sstevel@tonic-gate }
3490Sstevel@tonic-gate 
3500Sstevel@tonic-gate /*
3510Sstevel@tonic-gate  * Parse the multi-node list file
3520Sstevel@tonic-gate  *
3530Sstevel@tonic-gate  * Return Values:	Zero	 - Success
3540Sstevel@tonic-gate  *			Non Zero - Failure
3550Sstevel@tonic-gate  *
3560Sstevel@tonic-gate  * File content:	The content of the nodelist file should consist of
3570Sstevel@tonic-gate  *			triplets of nodeid, nodename and private interconnect
3580Sstevel@tonic-gate  *			address seperated by one or more white space.
3590Sstevel@tonic-gate  * e.g.
3600Sstevel@tonic-gate  *			1 node_a 192.168.111.3
3610Sstevel@tonic-gate  *			2 node_b 192.168.111.5
3620Sstevel@tonic-gate  *
3630Sstevel@tonic-gate  *			Any missing fields will result in an error.
3640Sstevel@tonic-gate  */
3650Sstevel@tonic-gate int
meta_read_nodelist(int * nodecnt,mndiskset_membershiplist_t ** nl,md_error_t * ep)3660Sstevel@tonic-gate meta_read_nodelist(
3670Sstevel@tonic-gate 	int				*nodecnt,
3680Sstevel@tonic-gate 	mndiskset_membershiplist_t	**nl,
3690Sstevel@tonic-gate 	md_error_t			*ep
3700Sstevel@tonic-gate )
3710Sstevel@tonic-gate {
3720Sstevel@tonic-gate 	FILE				*fp = NULL;
3730Sstevel@tonic-gate 	char				line[MAX_LINE_SIZE];
3740Sstevel@tonic-gate 	char				*buf;
3750Sstevel@tonic-gate 	uint_t				i;
3760Sstevel@tonic-gate 	int				sz;
3770Sstevel@tonic-gate 	mndiskset_membershiplist_t	**tailp = nl;
3780Sstevel@tonic-gate 
3790Sstevel@tonic-gate 	/* open file */
3800Sstevel@tonic-gate 	if ((fp = fopen(META_MNSET_NODELIST, "r")) == NULL) {
3810Sstevel@tonic-gate 		mndiskset_membershiplist_t	*nlp;
3820Sstevel@tonic-gate 		struct hostent *hp;
3839818SAndrew.Balfour@Sun.COM 		int err = 0;
3840Sstevel@tonic-gate 
3850Sstevel@tonic-gate 		/* return this node with id of 1 */
3860Sstevel@tonic-gate 		nlp = *tailp = Zalloc(sizeof (*nlp));
3870Sstevel@tonic-gate 		tailp = &nlp->next;
3880Sstevel@tonic-gate 
3890Sstevel@tonic-gate 		*nodecnt = 1;
3900Sstevel@tonic-gate 		nlp->msl_node_id = 1;
3910Sstevel@tonic-gate 		buf = mynode();
3920Sstevel@tonic-gate 		sz = min(strlen(buf), sizeof (nlp->msl_node_name) - 1);
3930Sstevel@tonic-gate 		(void) strncpy(nlp->msl_node_name, buf, sz);
3940Sstevel@tonic-gate 		nlp->msl_node_name[sz] = '\0';
3950Sstevel@tonic-gate 
3960Sstevel@tonic-gate 		/* retrieve info about our host */
3970Sstevel@tonic-gate 		if ((hp = gethostbyname(buf)) == NULL) {
3989818SAndrew.Balfour@Sun.COM 			err = EADDRNOTAVAIL;
39911742SAndrew.Balfour@Sun.COM 		} else if (hp->h_addrtype != AF_INET) {
4009818SAndrew.Balfour@Sun.COM 			/* We only do IPv4 addresses, for now */
40111742SAndrew.Balfour@Sun.COM 			err = EPFNOSUPPORT;
40211742SAndrew.Balfour@Sun.COM 		} else if (*hp->h_addr_list == NULL) {
40311742SAndrew.Balfour@Sun.COM 			/* No addresses in the list */
40411742SAndrew.Balfour@Sun.COM 			err = EADDRNOTAVAIL;
40511742SAndrew.Balfour@Sun.COM 		} else {
4069818SAndrew.Balfour@Sun.COM 			/* We take the first address only */
40711742SAndrew.Balfour@Sun.COM 			struct in_addr in;
4089818SAndrew.Balfour@Sun.COM 
40911742SAndrew.Balfour@Sun.COM 			(void) memcpy(&in.s_addr, *hp->h_addr_list,
41011742SAndrew.Balfour@Sun.COM 			    sizeof (struct in_addr));
41111742SAndrew.Balfour@Sun.COM 			(void) strncpy(nlp->msl_node_addr,
41211742SAndrew.Balfour@Sun.COM 			    inet_ntoa(in), MD_MAX_NODENAME);
4130Sstevel@tonic-gate 		}
41411742SAndrew.Balfour@Sun.COM 
4159818SAndrew.Balfour@Sun.COM 		if (err) {
4169818SAndrew.Balfour@Sun.COM 			meta_free_nodelist(*nl);
4179818SAndrew.Balfour@Sun.COM 			return (mdsyserror(ep, err, buf));
4180Sstevel@tonic-gate 		}
4190Sstevel@tonic-gate 		return (0);
4200Sstevel@tonic-gate 	}
4210Sstevel@tonic-gate 
4220Sstevel@tonic-gate 	*nl = NULL;
4230Sstevel@tonic-gate 	*nodecnt = 0;
4240Sstevel@tonic-gate 
4250Sstevel@tonic-gate 	while ((fp != NULL) && ((buf = fgets(line, sizeof (line) - 1, fp)) !=
4260Sstevel@tonic-gate 	    NULL)) {
4270Sstevel@tonic-gate 		mndiskset_membershiplist_t	*nlp;
4280Sstevel@tonic-gate 
4290Sstevel@tonic-gate 		/* skip leading spaces */
4300Sstevel@tonic-gate 		while ((*buf != '\0') && (i = strcspn(buf, " \t\n")) == 0)
4310Sstevel@tonic-gate 			buf++;
4320Sstevel@tonic-gate 
4330Sstevel@tonic-gate 		/* skip comments and blank lines */
4340Sstevel@tonic-gate 		if (*buf == '\0' || *buf == '#')
4350Sstevel@tonic-gate 			continue;
4360Sstevel@tonic-gate 
4370Sstevel@tonic-gate 		/* allocate memory and set tail pointer */
4380Sstevel@tonic-gate 		nlp = *tailp = Zalloc(sizeof (*nlp));
4390Sstevel@tonic-gate 		tailp = &nlp->next;
4400Sstevel@tonic-gate 
4410Sstevel@tonic-gate 		/* parse node id */
4420Sstevel@tonic-gate 		nlp->msl_node_id = strtoul(buf, NULL, 0);
4430Sstevel@tonic-gate 		buf += i;
4440Sstevel@tonic-gate 
4450Sstevel@tonic-gate 		/* skip leading spaces */
4460Sstevel@tonic-gate 		while ((*buf != '\0') && (i = strcspn(buf, " \t\n")) == 0)
4470Sstevel@tonic-gate 			buf++;
4480Sstevel@tonic-gate 
4490Sstevel@tonic-gate 		/* fields missing, return error */
4500Sstevel@tonic-gate 		if (*buf == '\0' || *buf == '#') {
4510Sstevel@tonic-gate 			meta_free_nodelist(*nl);
4520Sstevel@tonic-gate 			*nl = NULL;
4530Sstevel@tonic-gate 			*nodecnt = 0;
4540Sstevel@tonic-gate 
4550Sstevel@tonic-gate 			/* close file and return */
4560Sstevel@tonic-gate 			if ((fp) && (fclose(fp) != 0))
4570Sstevel@tonic-gate 				return (mdsyserror(ep, errno,
4580Sstevel@tonic-gate 				    META_MNSET_NODELIST));
4590Sstevel@tonic-gate 
4600Sstevel@tonic-gate 			return (mdsyserror(ep, EINVAL, META_MNSET_NODELIST));
4610Sstevel@tonic-gate 		}
4620Sstevel@tonic-gate 
4630Sstevel@tonic-gate 		/* parse node name */
4640Sstevel@tonic-gate 		sz = min(i, sizeof (nlp->msl_node_name) - 1);
4650Sstevel@tonic-gate 		(void) strncpy(nlp->msl_node_name, buf, sz);
4660Sstevel@tonic-gate 		nlp->msl_node_name[sz] = '\0';
4670Sstevel@tonic-gate 		buf += i;
4680Sstevel@tonic-gate 
4690Sstevel@tonic-gate 		/* skip leading spaces */
4700Sstevel@tonic-gate 		while ((*buf != '\0') && (i = strcspn(buf, " \t\n")) == 0)
4710Sstevel@tonic-gate 			buf++;
4720Sstevel@tonic-gate 
4730Sstevel@tonic-gate 		/* fields missing, return error */
4740Sstevel@tonic-gate 		if (*buf == '\0' || *buf == '#') {
4750Sstevel@tonic-gate 			meta_free_nodelist(*nl);
4760Sstevel@tonic-gate 			*nl = NULL;
4770Sstevel@tonic-gate 			*nodecnt = 0;
4780Sstevel@tonic-gate 
4790Sstevel@tonic-gate 			/* close file and return */
4800Sstevel@tonic-gate 			if ((fp) && (fclose(fp) != 0))
4810Sstevel@tonic-gate 				return (mdsyserror(ep, errno,
4820Sstevel@tonic-gate 				    META_MNSET_NODELIST));
4830Sstevel@tonic-gate 
4840Sstevel@tonic-gate 			return (mdsyserror(ep, EINVAL, META_MNSET_NODELIST));
4850Sstevel@tonic-gate 		}
4860Sstevel@tonic-gate 
4870Sstevel@tonic-gate 		/* parse node address */
4880Sstevel@tonic-gate 		sz = min(i, sizeof (nlp->msl_node_addr) - 1);
4890Sstevel@tonic-gate 		(void) strncpy(nlp->msl_node_addr, buf, sz);
4900Sstevel@tonic-gate 		nlp->msl_node_addr[sz] = '\0';
4910Sstevel@tonic-gate 
4920Sstevel@tonic-gate 		++*nodecnt;
4930Sstevel@tonic-gate 	}
4940Sstevel@tonic-gate 
4950Sstevel@tonic-gate 	/* close file */
4969818SAndrew.Balfour@Sun.COM 	if ((fp) && (fclose(fp) != 0)) {
4979818SAndrew.Balfour@Sun.COM 		meta_free_nodelist(*nl);
4980Sstevel@tonic-gate 		return (mdsyserror(ep, errno, META_MNSET_NODELIST));
4999818SAndrew.Balfour@Sun.COM 	}
5000Sstevel@tonic-gate 	return (0);
5010Sstevel@tonic-gate }
5020Sstevel@tonic-gate 
5030Sstevel@tonic-gate /*
5040Sstevel@tonic-gate  * Populate the multi-node list file from a given list of node id's
5050Sstevel@tonic-gate  * The nids must have only one node id in each cell. Range of node
5060Sstevel@tonic-gate  * id's in the form 1-n are not allowed.
5070Sstevel@tonic-gate  *
5080Sstevel@tonic-gate  * Return Values:	Zero	 - Success
5090Sstevel@tonic-gate  *			Non Zero - Failure
5100Sstevel@tonic-gate  */
5110Sstevel@tonic-gate int
meta_write_nodelist(int nodecnt,char ** nids,md_error_t * ep)5120Sstevel@tonic-gate meta_write_nodelist(
5130Sstevel@tonic-gate 	int		nodecnt,
5140Sstevel@tonic-gate 	char		**nids,
5150Sstevel@tonic-gate 	md_error_t	*ep
5160Sstevel@tonic-gate )
5170Sstevel@tonic-gate {
5180Sstevel@tonic-gate 	FILE		*fp = NULL;
5190Sstevel@tonic-gate 	char		name[MAX_LINE_SIZE], addr[MAX_LINE_SIZE];
5200Sstevel@tonic-gate 	uint_t		i, nid;
5210Sstevel@tonic-gate 	struct in_addr	ipaddr;
5220Sstevel@tonic-gate 	int		err = 0;
5230Sstevel@tonic-gate 
5240Sstevel@tonic-gate 	/* check if we are running on clustering */
5250Sstevel@tonic-gate 	if ((err = sdssc_bind_library()) != SDSSC_OKAY) {
5260Sstevel@tonic-gate 		return (mdsyserror(ep, err, META_MNSET_NODELIST));
5270Sstevel@tonic-gate 	}
5280Sstevel@tonic-gate 
5290Sstevel@tonic-gate 	/* open file for writing */
5300Sstevel@tonic-gate 	if ((fp = fopen(META_MNSET_NODELIST, "w")) == NULL) {
5310Sstevel@tonic-gate 		return (mdsyserror(ep, errno, META_MNSET_NODELIST));
5320Sstevel@tonic-gate 	}
5330Sstevel@tonic-gate 
5340Sstevel@tonic-gate 	for (i = 0; i < nodecnt; i++) {
5350Sstevel@tonic-gate 		/* extract the node id */
5360Sstevel@tonic-gate 		errno = 0;
5370Sstevel@tonic-gate 		nid = strtoul(nids[i], NULL, 0);
5380Sstevel@tonic-gate 		if (errno != 0) {
5390Sstevel@tonic-gate 			if ((fp) && (fclose(fp) != 0))
5400Sstevel@tonic-gate 				return (mdsyserror(ep, errno,
5410Sstevel@tonic-gate 				    META_MNSET_NODELIST));
5420Sstevel@tonic-gate 
5430Sstevel@tonic-gate 			return (mdsyserror(ep, EINVAL, META_MNSET_NODELIST));
5440Sstevel@tonic-gate 		}
5450Sstevel@tonic-gate 
5460Sstevel@tonic-gate 		/* get node name */
5470Sstevel@tonic-gate 		(void) snprintf(name, sizeof (name), "%d", nid);
5480Sstevel@tonic-gate 		sdssc_cm_nid2nm(name);
5490Sstevel@tonic-gate 
5500Sstevel@tonic-gate 		/* finally get the private ip address */
5510Sstevel@tonic-gate 		(void) snprintf(addr, sizeof (addr), "%s", name);
5520Sstevel@tonic-gate 		if (sdssc_get_priv_ipaddr(addr, &ipaddr) != SDSSC_OKAY) {
5530Sstevel@tonic-gate 			if ((fp) && (fclose(fp) != 0))
5540Sstevel@tonic-gate 				return (mdsyserror(ep, errno,
5550Sstevel@tonic-gate 				    META_MNSET_NODELIST));
5560Sstevel@tonic-gate 
5570Sstevel@tonic-gate 			return (mdsyserror(ep, EINVAL, META_MNSET_NODELIST));
5580Sstevel@tonic-gate 		}
5590Sstevel@tonic-gate 
5600Sstevel@tonic-gate 		(void) fprintf(fp, "%d\t%s\t%s\n", nid, name,
5610Sstevel@tonic-gate 		    inet_ntoa(ipaddr));
5620Sstevel@tonic-gate 	}
5630Sstevel@tonic-gate 
5640Sstevel@tonic-gate 	/* close file */
5650Sstevel@tonic-gate 	if ((fp) && (fclose(fp) != 0))
5660Sstevel@tonic-gate 		return (mdsyserror(ep, errno, META_MNSET_NODELIST));
5670Sstevel@tonic-gate 
5680Sstevel@tonic-gate 	return (0);
5690Sstevel@tonic-gate }
5700Sstevel@tonic-gate 
5710Sstevel@tonic-gate /*
5720Sstevel@tonic-gate  * Free node list
5730Sstevel@tonic-gate  */
5740Sstevel@tonic-gate void
meta_free_nodelist(mndiskset_membershiplist_t * nl)5750Sstevel@tonic-gate meta_free_nodelist(
5760Sstevel@tonic-gate 	mndiskset_membershiplist_t	*nl
5770Sstevel@tonic-gate )
5780Sstevel@tonic-gate {
5790Sstevel@tonic-gate 	mndiskset_membershiplist_t	*next = NULL;
5800Sstevel@tonic-gate 
5810Sstevel@tonic-gate 	for (/* void */; (nl != NULL); nl = next) {
5820Sstevel@tonic-gate 		next = nl->next;
5830Sstevel@tonic-gate 		Free(nl);
5840Sstevel@tonic-gate 	}
5850Sstevel@tonic-gate }
5860Sstevel@tonic-gate 
5870Sstevel@tonic-gate /*
5880Sstevel@tonic-gate  * FUNCTION:	meta_mn_send_setsync()
5890Sstevel@tonic-gate  * INPUT:	sp	- setname
5900Sstevel@tonic-gate  *		mirnp	- mirror name
5910Sstevel@tonic-gate  *		size	- buffer size, 0 if none
5920Sstevel@tonic-gate  * OUTPUT:	ep	- return error pointer
5930Sstevel@tonic-gate  * RETURNS:	return value from meta_mn_send_command()
5940Sstevel@tonic-gate  * PURPOSE:  Send a setsync command to all nodes to set resync status
5950Sstevel@tonic-gate  */
5960Sstevel@tonic-gate 
5970Sstevel@tonic-gate int
meta_mn_send_setsync(mdsetname_t * sp,mdname_t * mirnp,daddr_t size,md_error_t * ep)5980Sstevel@tonic-gate meta_mn_send_setsync(
5990Sstevel@tonic-gate 	mdsetname_t		*sp,
6000Sstevel@tonic-gate 	mdname_t		*mirnp,
6010Sstevel@tonic-gate 	daddr_t			size,
6020Sstevel@tonic-gate 	md_error_t		*ep
6030Sstevel@tonic-gate )
6040Sstevel@tonic-gate {
6050Sstevel@tonic-gate 	md_mn_msg_setsync_t	setsyncmsg;
6060Sstevel@tonic-gate 	int			ret;
6070Sstevel@tonic-gate 	md_mn_result_t		*resp = NULL;
6080Sstevel@tonic-gate 
6090Sstevel@tonic-gate 	setsyncmsg.setsync_mnum = meta_getminor(mirnp->dev);
6100Sstevel@tonic-gate 	setsyncmsg.setsync_copysize = size;
6110Sstevel@tonic-gate 	setsyncmsg.setsync_flags = 0;
6120Sstevel@tonic-gate 
6130Sstevel@tonic-gate 	/*
6140Sstevel@tonic-gate 	 * We do not log the metasync command as it will have no effect on the
6150Sstevel@tonic-gate 	 * underlying metadb state. If we have a master change the
6160Sstevel@tonic-gate 	 * reconfiguration process will issue a new 'metasync' to all affected
6170Sstevel@tonic-gate 	 * mirrors, so we would actually end up sending the message twice.
6180Sstevel@tonic-gate 	 * Removing the logging of the message helps reduce the processing
6190Sstevel@tonic-gate 	 * time required.
6200Sstevel@tonic-gate 	 */
6210Sstevel@tonic-gate 	ret = mdmn_send_message(sp->setno, MD_MN_MSG_SETSYNC,
6228452SJohn.Wren.Kennedy@Sun.COM 	    MD_MSGF_NO_LOG | MD_MSGF_OVERRIDE_SUSPEND, 0,
6230Sstevel@tonic-gate 	    (char *)&setsyncmsg, sizeof (setsyncmsg), &resp, ep);
6240Sstevel@tonic-gate 	if (resp != NULL) {
6250Sstevel@tonic-gate 		free_result(resp);
6260Sstevel@tonic-gate 	}
6270Sstevel@tonic-gate 
6280Sstevel@tonic-gate 	/*
6290Sstevel@tonic-gate 	 * Unlike non-MN sets, the metasync command does not actually
6300Sstevel@tonic-gate 	 * start a resync, it simply updates the state on all of the
6310Sstevel@tonic-gate 	 * nodes. Therefore, to start a resync we send a resync starting
6320Sstevel@tonic-gate 	 * message for the metadevice
6330Sstevel@tonic-gate 	 */
6340Sstevel@tonic-gate 	if (ret == 0)
6350Sstevel@tonic-gate 		ret = meta_mn_send_resync_starting(mirnp, ep);
6360Sstevel@tonic-gate 	return (ret);
6370Sstevel@tonic-gate }
6380Sstevel@tonic-gate 
6390Sstevel@tonic-gate /*
6400Sstevel@tonic-gate  * FUNCTION:	meta_mn_send_metaclear_command()
6410Sstevel@tonic-gate  * INPUT:	sp	- setname
6420Sstevel@tonic-gate  *		name	- metadevice name
6430Sstevel@tonic-gate  *		options - command options
6440Sstevel@tonic-gate  *		pflag	- clear all soft partitions for a given device
6450Sstevel@tonic-gate  * OUTPUT:	ep	- return error pointer
6460Sstevel@tonic-gate  * RETURNS:	return value from meta_mn_send_command()
6470Sstevel@tonic-gate  * PURPOSE:  Send a metaclear command to all nodes with force(-f) and
6480Sstevel@tonic-gate  *	     recurse(-r) options set if required. For hotspare pool and
6490Sstevel@tonic-gate  *	     metadevices, the metadevice name is of the form setname/dxx or
6500Sstevel@tonic-gate  *	     setname/hspxxx so a '-s' argument isn't required. If pflag is set
6510Sstevel@tonic-gate  *	     the name refers to a metadevice or component and in the is case
6520Sstevel@tonic-gate  *	     a '-s' argument is required to define the set.
6530Sstevel@tonic-gate  */
6540Sstevel@tonic-gate 
6550Sstevel@tonic-gate int
meta_mn_send_metaclear_command(mdsetname_t * sp,char * name,mdcmdopts_t options,int pflag,md_error_t * ep)6560Sstevel@tonic-gate meta_mn_send_metaclear_command(
6570Sstevel@tonic-gate 	mdsetname_t		*sp,
6580Sstevel@tonic-gate 	char			*name,
6590Sstevel@tonic-gate 	mdcmdopts_t		options,
6600Sstevel@tonic-gate 	int			pflag,
6610Sstevel@tonic-gate 	md_error_t		*ep
6620Sstevel@tonic-gate )
6630Sstevel@tonic-gate {
6640Sstevel@tonic-gate 	int	newargc;
6650Sstevel@tonic-gate 	char	**newargv;
6660Sstevel@tonic-gate 	int	ret;
6670Sstevel@tonic-gate 
6680Sstevel@tonic-gate 	/*
6690Sstevel@tonic-gate 	 * Allocate an array large enough to hold all of the possible
6700Sstevel@tonic-gate 	 * metaclear arguments
6710Sstevel@tonic-gate 	 */
6720Sstevel@tonic-gate 	newargv = Calloc(7, sizeof (char *));
6730Sstevel@tonic-gate 	newargv[0] = "metaclear";
6740Sstevel@tonic-gate 	newargc = 1;
6750Sstevel@tonic-gate 	if (pflag) {
6760Sstevel@tonic-gate 		newargv[newargc] = "-s";
6770Sstevel@tonic-gate 		newargc++;
6780Sstevel@tonic-gate 		newargv[newargc] = sp->setname;
6790Sstevel@tonic-gate 		newargc++;
6800Sstevel@tonic-gate 	}
6810Sstevel@tonic-gate 	if (options & MDCMD_FORCE) {
6820Sstevel@tonic-gate 		newargv[newargc] = "-f";
6830Sstevel@tonic-gate 		newargc++;
6840Sstevel@tonic-gate 	}
6850Sstevel@tonic-gate 	if (options & MDCMD_RECURSE) {
6860Sstevel@tonic-gate 		newargv[newargc] = "-r";
6870Sstevel@tonic-gate 		newargc++;
6880Sstevel@tonic-gate 	}
6890Sstevel@tonic-gate 	if (pflag) {
6900Sstevel@tonic-gate 		newargv[newargc] = "-p";
6910Sstevel@tonic-gate 		newargc++;
6920Sstevel@tonic-gate 	}
6930Sstevel@tonic-gate 	newargv[newargc] = name;
6940Sstevel@tonic-gate 	newargc++;
6950Sstevel@tonic-gate 
6960Sstevel@tonic-gate 	ret = meta_mn_send_command(sp, newargc, newargv,
6970Sstevel@tonic-gate 	    MD_DISP_STDERR, NO_CONTEXT_STRING, ep);
6980Sstevel@tonic-gate 
6990Sstevel@tonic-gate 	free(newargv);
7000Sstevel@tonic-gate 	return (ret);
7010Sstevel@tonic-gate }
7020Sstevel@tonic-gate 
7030Sstevel@tonic-gate /*
7040Sstevel@tonic-gate  * FUNCTION:	meta_mn_send_resync_starting()
7050Sstevel@tonic-gate  * INPUT:	sp	- setname
7060Sstevel@tonic-gate  *		mirnp	- mirror name
7070Sstevel@tonic-gate  * OUTPUT:	ep	- return error pointer
7080Sstevel@tonic-gate  * RETURNS:	return value from mdmn_send_message()
7090Sstevel@tonic-gate  * PURPOSE:  Send a resync starting message to all nodes.
7100Sstevel@tonic-gate  */
7110Sstevel@tonic-gate 
7120Sstevel@tonic-gate int
meta_mn_send_resync_starting(mdname_t * mirnp,md_error_t * ep)7130Sstevel@tonic-gate meta_mn_send_resync_starting(
7140Sstevel@tonic-gate 	mdname_t		*mirnp,
7150Sstevel@tonic-gate 	md_error_t		*ep
7160Sstevel@tonic-gate )
7170Sstevel@tonic-gate {
7180Sstevel@tonic-gate 	int			result;
7190Sstevel@tonic-gate 	md_mn_msg_resync_t	resyncmsg;
7200Sstevel@tonic-gate 	md_mn_result_t		*resp = NULL;
7210Sstevel@tonic-gate 	minor_t			mnum = meta_getminor(mirnp->dev);
7220Sstevel@tonic-gate 
7230Sstevel@tonic-gate 	/*
7240Sstevel@tonic-gate 	 * This message is never directly issued.
7250Sstevel@tonic-gate 	 * So we launch it with a suspend override flag.
7260Sstevel@tonic-gate 	 * If the commd is suspended, and this message comes
7270Sstevel@tonic-gate 	 * along it must be sent due to replaying a command or similar.
7280Sstevel@tonic-gate 	 * In that case we don't want this message to be blocked.
7290Sstevel@tonic-gate 	 * If the commd is not suspended, the flag does no harm.
7300Sstevel@tonic-gate 	 */
7310Sstevel@tonic-gate 	resyncmsg.msg_resync_mnum =  mnum;
7320Sstevel@tonic-gate 	result = mdmn_send_message(MD_MIN2SET(mnum),
7330Sstevel@tonic-gate 	    MD_MN_MSG_RESYNC_STARTING,
7348452SJohn.Wren.Kennedy@Sun.COM 	    MD_MSGF_NO_LOG | MD_MSGF_OVERRIDE_SUSPEND, 0,
7350Sstevel@tonic-gate 	    (char *)&resyncmsg, sizeof (resyncmsg), &resp, ep);
7360Sstevel@tonic-gate 
7370Sstevel@tonic-gate 	if (resp != NULL) {
7380Sstevel@tonic-gate 		free_result(resp);
7390Sstevel@tonic-gate 	}
7400Sstevel@tonic-gate 	return (result);
7410Sstevel@tonic-gate }
7420Sstevel@tonic-gate 
7430Sstevel@tonic-gate /*
7440Sstevel@tonic-gate  * FUNCTION:	meta_mn_change_owner()
7450Sstevel@tonic-gate  * INPUT:	opp	- pointer to parameter block
7460Sstevel@tonic-gate  *		setno	- set number of mirror metadevice
7470Sstevel@tonic-gate  *		mnum	- minor number of mirror metadevice
7480Sstevel@tonic-gate  *		owner	- node ID of mirror owner
7490Sstevel@tonic-gate  *		flags	- flag field for ioctl
7500Sstevel@tonic-gate  * OUTPUT:	opp	- parameter block used to send ioctl
7510Sstevel@tonic-gate  * RETURNS:	int	- 0 success, -1 error
7520Sstevel@tonic-gate  * PURPOSE:	issue an ioctl to change the ownership of the specified mirror
7530Sstevel@tonic-gate  *		to our node ID. We need to be the owner before any watermarks
7540Sstevel@tonic-gate  *		are committed to the device otherwise we'll enter a deadly
7550Sstevel@tonic-gate  *		embrace when attempting to write the watermark.
7560Sstevel@tonic-gate  *		This function can also be used so set the owner on a node to
7570Sstevel@tonic-gate  *		NULL. In this case the change is only made on the local node.
7580Sstevel@tonic-gate  *		In addition by setting the MD_MN_MM_CHOOSE_OWNER flag, the
7590Sstevel@tonic-gate  *		function can also be used to choose a mirror resync owner. This
7600Sstevel@tonic-gate  *		function should only be called on the master and it will
7610Sstevel@tonic-gate  *		select the owner and request it to become the owner.
7620Sstevel@tonic-gate  */
7630Sstevel@tonic-gate int
meta_mn_change_owner(md_set_mmown_params_t ** opp,set_t setno,uint_t mnum,uint_t owner,uint_t flags)7640Sstevel@tonic-gate meta_mn_change_owner(
7650Sstevel@tonic-gate 	md_set_mmown_params_t 	**opp,	/* Returned parameter block */
7660Sstevel@tonic-gate 	set_t			setno,	/* Mirror set number */
7670Sstevel@tonic-gate 	uint_t 			mnum,	/* Minor number */
7680Sstevel@tonic-gate 	uint_t			owner,	/* Node ID of mirror owner */
7690Sstevel@tonic-gate 	uint_t			flags	/* Flags */
7700Sstevel@tonic-gate )
7710Sstevel@tonic-gate {
7720Sstevel@tonic-gate 	md_set_mmown_params_t	*ownpar = *opp;
7730Sstevel@tonic-gate 	md_mn_own_status_t	*ownstat = NULL;
7740Sstevel@tonic-gate 	struct timeval tvs, tve;
7750Sstevel@tonic-gate 	int			n = 0;
7760Sstevel@tonic-gate 	int			rval;
7770Sstevel@tonic-gate 
7780Sstevel@tonic-gate 	if (ownpar != NULL) {
7790Sstevel@tonic-gate 		(void) memset(ownpar, 0, sizeof (*ownpar));
7800Sstevel@tonic-gate 	} else {
7810Sstevel@tonic-gate 		ownpar = Zalloc(sizeof (*ownpar));
7820Sstevel@tonic-gate 	}
7830Sstevel@tonic-gate 	ownstat = Zalloc(sizeof (*ownstat));
7840Sstevel@tonic-gate 
7850Sstevel@tonic-gate 	ownpar->d.mnum = mnum;
7860Sstevel@tonic-gate 	ownpar->d.owner = owner;
7870Sstevel@tonic-gate 	ownpar->d.flags = flags;
7880Sstevel@tonic-gate 	MD_SETDRIVERNAME(ownpar, MD_MIRROR, setno);
7890Sstevel@tonic-gate 	MD_SETDRIVERNAME(ownstat, MD_MIRROR, setno);
7900Sstevel@tonic-gate 
7910Sstevel@tonic-gate 	/*
7920Sstevel@tonic-gate 	 * Attempt to change the ownership to the specified node. We retry this
7930Sstevel@tonic-gate 	 * up to 10 times if we receive EAGAIN from the metadevice. This only
7940Sstevel@tonic-gate 	 * happens if the underlying metadevice is busy with outstanding i/o
7950Sstevel@tonic-gate 	 * that requires ownership change.
7960Sstevel@tonic-gate 	 */
7970Sstevel@tonic-gate 	while ((rval = metaioctl(MD_MN_SET_MM_OWNER, ownpar, &ownpar->mde,
7980Sstevel@tonic-gate 	    NULL)) != 0) {
7990Sstevel@tonic-gate 		md_sys_error_t	*ip =
8000Sstevel@tonic-gate 		    &ownpar->mde.info.md_error_info_t_u.sys_error;
8010Sstevel@tonic-gate 		if (ip->errnum != EAGAIN)
8020Sstevel@tonic-gate 			break;
8030Sstevel@tonic-gate 		if (n++ >= 10)
8040Sstevel@tonic-gate 			break;
8050Sstevel@tonic-gate 		(void) sleep(1);
8060Sstevel@tonic-gate 	}
8070Sstevel@tonic-gate 
8080Sstevel@tonic-gate 	/*
8090Sstevel@tonic-gate 	 * There is no need to wait for the ioctl completion if we are setting
8100Sstevel@tonic-gate 	 * the owner to NULL or requesting the master to choose the owner
8110Sstevel@tonic-gate 	 */
8120Sstevel@tonic-gate 	if ((owner == 0) || (flags & MD_MN_MM_CHOOSE_OWNER)) {
8130Sstevel@tonic-gate 		Free(ownstat);
8140Sstevel@tonic-gate 		*opp = ownpar;
8150Sstevel@tonic-gate 		return (0);
8160Sstevel@tonic-gate 	}
8170Sstevel@tonic-gate 
8180Sstevel@tonic-gate 	/*
8190Sstevel@tonic-gate 	 * Wait for ioctl completion or a timeout to occur. If we
8200Sstevel@tonic-gate 	 * timeout we fail the i/o request.
8210Sstevel@tonic-gate 	 */
8220Sstevel@tonic-gate 	ownstat->mnum = ownpar->d.mnum;
8230Sstevel@tonic-gate 	(void) gettimeofday(&tvs, NULL);
8240Sstevel@tonic-gate 
8250Sstevel@tonic-gate 	while ((rval == 0) && !(ownstat->flags & MD_MN_MM_RESULT)) {
8260Sstevel@tonic-gate 		while ((rval = metaioctl(MD_MN_MM_OWNER_STATUS, ownstat,
8270Sstevel@tonic-gate 		    &ownstat->mde, NULL)) != 0) {
8280Sstevel@tonic-gate 			(void) gettimeofday(&tve, NULL);
8290Sstevel@tonic-gate 			if ((tve.tv_sec - tvs.tv_sec) > OWNER_TIMEOUT) {
8300Sstevel@tonic-gate 				rval = -1;
8310Sstevel@tonic-gate 				break;
8320Sstevel@tonic-gate 			}
8330Sstevel@tonic-gate 			(void) sleep(1);
8340Sstevel@tonic-gate 		}
8350Sstevel@tonic-gate 	}
8360Sstevel@tonic-gate 
8370Sstevel@tonic-gate 	/* we did not not timeout but ioctl failed set rval */
8380Sstevel@tonic-gate 
8390Sstevel@tonic-gate 	if (rval == 0) {
8400Sstevel@tonic-gate 		rval = (ownstat->flags & MD_MN_MM_RES_FAIL) ? -1 : 0;
8410Sstevel@tonic-gate 	}
8420Sstevel@tonic-gate 
8430Sstevel@tonic-gate 	Free(ownstat);
8440Sstevel@tonic-gate 	*opp = ownpar;
8450Sstevel@tonic-gate 	return (rval);
8460Sstevel@tonic-gate }
8470Sstevel@tonic-gate /*
8480Sstevel@tonic-gate  * special handling is required when running on a single node
8490Sstevel@tonic-gate  * non-SC3.x environment.  This function determines tests
8500Sstevel@tonic-gate  * for that case.
8510Sstevel@tonic-gate  *
8520Sstevel@tonic-gate  * Return values:
8530Sstevel@tonic-gate  *	0 - no nodes or joined or in a SC3.x env
8540Sstevel@tonic-gate  *	1 - 1 node and not in SC3.x env
8550Sstevel@tonic-gate  */
8560Sstevel@tonic-gate 
8570Sstevel@tonic-gate int
meta_mn_singlenode()8580Sstevel@tonic-gate meta_mn_singlenode()
8590Sstevel@tonic-gate {
8600Sstevel@tonic-gate 	md_error_t			xep = mdnullerror;
8610Sstevel@tonic-gate 	int				nodecnt;
8620Sstevel@tonic-gate 	int				mnset_single_node = 0;
8630Sstevel@tonic-gate 	mndiskset_membershiplist_t	*nl;
8640Sstevel@tonic-gate 
8650Sstevel@tonic-gate 	/*
8660Sstevel@tonic-gate 	 * If running on SunCluster, then don't validate MN sets,
8670Sstevel@tonic-gate 	 * this is done during a reconfig cycle since all nodes must
8680Sstevel@tonic-gate 	 * take the same action.
8690Sstevel@tonic-gate 	 *
8700Sstevel@tonic-gate 	 * Only cleanup in case of a single node situation
8710Sstevel@tonic-gate 	 * when not running on SunCluster.  This single node
8720Sstevel@tonic-gate 	 * situation occurs when the nodelist only contains
8730Sstevel@tonic-gate 	 * this node and the MN setrecords only contain this
8740Sstevel@tonic-gate 	 * node.
8750Sstevel@tonic-gate 	 */
8760Sstevel@tonic-gate 	if (meta_read_nodelist(&nodecnt, &nl, &xep) == -1) {
8770Sstevel@tonic-gate 		nodecnt = 0;  /* no nodes are alive */
8780Sstevel@tonic-gate 		nl = NULL;
8790Sstevel@tonic-gate 		mdclrerror(&xep);
8800Sstevel@tonic-gate 	} else {
8810Sstevel@tonic-gate 		/*
8820Sstevel@tonic-gate 		 * If only 1 node in nodelist and not running
8830Sstevel@tonic-gate 		 * on SunCluster, set single_node flag.
8840Sstevel@tonic-gate 		 */
8850Sstevel@tonic-gate 		if ((nodecnt == 1) &&
8860Sstevel@tonic-gate 		    (strcmp(nl->msl_node_name, mynode()) == 0) &&
8870Sstevel@tonic-gate 		    ((sdssc_bind_library()) != SDSSC_OKAY)) {
8880Sstevel@tonic-gate 			mnset_single_node = 1;
8890Sstevel@tonic-gate 		}
8900Sstevel@tonic-gate 		meta_free_nodelist(nl);
8910Sstevel@tonic-gate 	}
8920Sstevel@tonic-gate 	return (mnset_single_node);
8930Sstevel@tonic-gate }
8940Sstevel@tonic-gate 
8950Sstevel@tonic-gate /*
8960Sstevel@tonic-gate  * FUNCTION:	meta_mn_send_get_tstate()
8970Sstevel@tonic-gate  * INPUT:	dev	- dev_t of device
8980Sstevel@tonic-gate  * OUTPUT:	tstatep - tstate value
8990Sstevel@tonic-gate  *		ep	- return error pointer
9000Sstevel@tonic-gate  * RETURNS:	return value from mdmn_send_message()
9010Sstevel@tonic-gate  * PURPOSE:  Send a message to the master to get ui_tstate for a given device.
9020Sstevel@tonic-gate  */
9030Sstevel@tonic-gate 
9040Sstevel@tonic-gate int
meta_mn_send_get_tstate(md_dev64_t dev,uint_t * tstatep,md_error_t * ep)9050Sstevel@tonic-gate meta_mn_send_get_tstate(
9060Sstevel@tonic-gate 	md_dev64_t		dev,
9070Sstevel@tonic-gate 	uint_t			*tstatep,
9080Sstevel@tonic-gate 	md_error_t		*ep
9090Sstevel@tonic-gate )
9100Sstevel@tonic-gate {
9110Sstevel@tonic-gate 	int			result;
9120Sstevel@tonic-gate 	md_mn_msg_gettstate_t	tstatemsg;
9130Sstevel@tonic-gate 	md_mn_result_t		*resp = NULL;
9140Sstevel@tonic-gate 	minor_t			mnum = meta_getminor(dev);
9150Sstevel@tonic-gate 
9160Sstevel@tonic-gate 	tstatemsg.gettstate_dev = dev;
9170Sstevel@tonic-gate 	result = mdmn_send_message(MD_MIN2SET(mnum),
9180Sstevel@tonic-gate 	    MD_MN_MSG_GET_TSTATE,
9198452SJohn.Wren.Kennedy@Sun.COM 	    MD_MSGF_NO_LOG | MD_MSGF_NO_BCAST, 0,
9200Sstevel@tonic-gate 	    (char *)&tstatemsg, sizeof (tstatemsg), &resp, ep);
9210Sstevel@tonic-gate 
9220Sstevel@tonic-gate 	if (result == 0)
9230Sstevel@tonic-gate 		*tstatep = resp->mmr_exitval;
9240Sstevel@tonic-gate 	else
9250Sstevel@tonic-gate 		/* If some error occurred set tstate to 0 */
9260Sstevel@tonic-gate 		*tstatep = 0;
9270Sstevel@tonic-gate 
9280Sstevel@tonic-gate 	if (resp != NULL) {
9290Sstevel@tonic-gate 		free_result(resp);
9300Sstevel@tonic-gate 	}
9310Sstevel@tonic-gate 	return (result);
9320Sstevel@tonic-gate }
933