xref: /onnv-gate/usr/src/lib/lvm/libmeta/common/meta_mn_changelog.c (revision 8452:89d32dfdae6e)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*8452SJohn.Wren.Kennedy@Sun.COM  * Common Development and Distribution License (the "License").
6*8452SJohn.Wren.Kennedy@Sun.COM  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
21*8452SJohn.Wren.Kennedy@Sun.COM 
220Sstevel@tonic-gate /*
23*8452SJohn.Wren.Kennedy@Sun.COM  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate  * Use is subject to license terms.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate #include <stdlib.h>
280Sstevel@tonic-gate #include <unistd.h>
290Sstevel@tonic-gate #include <wait.h>
300Sstevel@tonic-gate #include <sys/time.h>
310Sstevel@tonic-gate #include <meta.h>
320Sstevel@tonic-gate #include <metad.h>
330Sstevel@tonic-gate #include <mdmn_changelog.h>
340Sstevel@tonic-gate #include <syslog.h>
350Sstevel@tonic-gate #include <umem.h>
360Sstevel@tonic-gate 
370Sstevel@tonic-gate /*
380Sstevel@tonic-gate  * Number of log entries per set.
390Sstevel@tonic-gate  *
400Sstevel@tonic-gate  * We want at least 4 spares available at all times
410Sstevel@tonic-gate  * in case new classes are added during a live upgrade.
420Sstevel@tonic-gate  *
430Sstevel@tonic-gate  * Allocate the entries in chunks of 16
440Sstevel@tonic-gate  */
450Sstevel@tonic-gate #define	MDMN_LOGRECS_QUANTA	16
460Sstevel@tonic-gate #define	MDMN_LOGRECS_MINSPARES	4
470Sstevel@tonic-gate #define	MDMN_LOGHDR_SIZE	sizeof (mdmn_changelog_record_t)
480Sstevel@tonic-gate #define	MDMN_LOGRECSIZE	(MDMN_LOGHDR_SIZE + MD_MN_MSG_MAXDATALEN)
490Sstevel@tonic-gate #define	MDMN_LOGRECSIZE_OD	sizeof (mdmn_changelog_record_od_t)
500Sstevel@tonic-gate #define	MDMN_LOGRECS_TRIMUP	((MD_MN_NCLASSES % MDMN_LOGRECS_QUANTA) > \
510Sstevel@tonic-gate 				(MDMN_LOGRECS_QUANTA - MDMN_LOGRECS_MINSPARES))
520Sstevel@tonic-gate 
530Sstevel@tonic-gate static int	mdmn_commitlog(md_set_desc *, md_error_t *);
540Sstevel@tonic-gate static int	mdmn_log_it(set_t, md_error_t *, mdmn_changelog_record_t *lr);
550Sstevel@tonic-gate 
560Sstevel@tonic-gate 
570Sstevel@tonic-gate /* Global variables */
580Sstevel@tonic-gate 
590Sstevel@tonic-gate mdmn_changelog_record_t	*mdmn_changelog[MD_MAXSETS];
600Sstevel@tonic-gate int mdmn_changelog_snarfed[MD_MAXSETS];
610Sstevel@tonic-gate 
620Sstevel@tonic-gate /* Total number of log records */
630Sstevel@tonic-gate int mdmn_logrecs = (MDMN_LOGRECS_QUANTA +
640Sstevel@tonic-gate 		((MD_MN_NCLASSES/MDMN_LOGRECS_QUANTA) * MDMN_LOGRECS_QUANTA));
650Sstevel@tonic-gate 
660Sstevel@tonic-gate #ifdef DEBUG
670Sstevel@tonic-gate void
dump_rec(char * fn_name,mdmn_changelog_record_t * lr)680Sstevel@tonic-gate dump_rec(char *fn_name, mdmn_changelog_record_t *lr)
690Sstevel@tonic-gate {
700Sstevel@tonic-gate 	syslog(LOG_DEBUG, "%s incore: selfid 0x%x class %d flags %d "
710Sstevel@tonic-gate 	    "msglen %d\n", fn_name, lr->lr_selfid, lr->lr_class,
720Sstevel@tonic-gate 	    lr->lr_flags, lr->lr_msglen);
730Sstevel@tonic-gate }
740Sstevel@tonic-gate void
dump_rec_od(char * fn_name,mdmn_changelog_record_od_t * lr)750Sstevel@tonic-gate dump_rec_od(char *fn_name, mdmn_changelog_record_od_t *lr)
760Sstevel@tonic-gate {
770Sstevel@tonic-gate 	syslog(LOG_DEBUG, "%s ondisk: selfid 0x%x class %d flags %d "
780Sstevel@tonic-gate 	    "msglen %d\n", fn_name, lr->lr_selfid, lr->lr_class,
790Sstevel@tonic-gate 	    lr->lr_flags, lr->lr_msglen);
800Sstevel@tonic-gate }
810Sstevel@tonic-gate 
820Sstevel@tonic-gate void
dump_array(char * fn_name,set_t setno)830Sstevel@tonic-gate dump_array(char *fn_name, set_t setno)
840Sstevel@tonic-gate {
850Sstevel@tonic-gate 	int i;
860Sstevel@tonic-gate 	char tchar[80];
870Sstevel@tonic-gate 
880Sstevel@tonic-gate 	mdmn_changelog_record_t *tlr;
890Sstevel@tonic-gate 
900Sstevel@tonic-gate 	for (i = 0; i < mdmn_logrecs; i++) {
910Sstevel@tonic-gate 		tlr = &mdmn_changelog[setno][i];
920Sstevel@tonic-gate 		(void) snprintf(tchar, sizeof (tchar), "%s class %d ",
930Sstevel@tonic-gate 		    fn_name, i);
940Sstevel@tonic-gate 		dump_rec(tchar, tlr);
950Sstevel@tonic-gate 	}
960Sstevel@tonic-gate }
970Sstevel@tonic-gate #endif
980Sstevel@tonic-gate 
990Sstevel@tonic-gate /*
1000Sstevel@tonic-gate  * copy_changelog: copies changelog ondisk<->incore records.
1010Sstevel@tonic-gate  * The argument "direction" controls the direction to copy the
1020Sstevel@tonic-gate  * the records. Incore and ondisk changlog structures must be
1030Sstevel@tonic-gate  * allocated when calling this routine.
1040Sstevel@tonic-gate  *
1050Sstevel@tonic-gate  * The purpose of changelog is to store a message that is in progress.
1060Sstevel@tonic-gate  * Therefore the changlog structure embeds the message structure.
1070Sstevel@tonic-gate  * Incore and ondisk changelog structures are created to handle the
1080Sstevel@tonic-gate  * incore and ondisk message formats. The incore message has a pointer
1090Sstevel@tonic-gate  * to the payload. The ondisk message format has payload embedded as
1100Sstevel@tonic-gate  * part of the message.
1110Sstevel@tonic-gate  *
1120Sstevel@tonic-gate  * Caveat Emptor: Incore and ondisk structures have the payload buffers
1130Sstevel@tonic-gate  * correctly allocated.
1140Sstevel@tonic-gate  */
1150Sstevel@tonic-gate 
1160Sstevel@tonic-gate static void
copy_changelog(mdmn_changelog_record_t * incp,mdmn_changelog_record_od_t * odp,int direction)1170Sstevel@tonic-gate copy_changelog(mdmn_changelog_record_t *incp,
1180Sstevel@tonic-gate 		mdmn_changelog_record_od_t *odp, int direction)
1190Sstevel@tonic-gate {
1200Sstevel@tonic-gate 	assert(incp != NULL && odp != NULL);
1210Sstevel@tonic-gate 	assert((direction == MD_MN_COPY_TO_ONDISK) ||
1220Sstevel@tonic-gate 	    (direction == MD_MN_COPY_TO_INCORE));
1230Sstevel@tonic-gate 
1240Sstevel@tonic-gate 	if (direction == MD_MN_COPY_TO_ONDISK) {
1250Sstevel@tonic-gate 		odp->lr_revision = incp->lr_revision;
1260Sstevel@tonic-gate 		odp->lr_flags = incp->lr_flags;
1270Sstevel@tonic-gate 		odp->lr_selfid = incp->lr_selfid;
1280Sstevel@tonic-gate 		odp->lr_class = incp->lr_class;
1290Sstevel@tonic-gate 		odp->lr_msglen = incp->lr_msglen;
1300Sstevel@tonic-gate 		if (incp->lr_msglen)
131*8452SJohn.Wren.Kennedy@Sun.COM 			copy_msg_2(&incp->lr_msg, &odp->lr_od_msg, direction);
1320Sstevel@tonic-gate 	} else {
1330Sstevel@tonic-gate 		incp->lr_revision = odp->lr_revision;
1340Sstevel@tonic-gate 		incp->lr_flags = odp->lr_flags;
1350Sstevel@tonic-gate 		incp->lr_selfid = odp->lr_selfid;
1360Sstevel@tonic-gate 		incp->lr_class = odp->lr_class;
1370Sstevel@tonic-gate 		incp->lr_msglen = odp->lr_msglen;
1380Sstevel@tonic-gate 		if (odp->lr_msglen)
139*8452SJohn.Wren.Kennedy@Sun.COM 			copy_msg_2(&incp->lr_msg, &odp->lr_od_msg, direction);
1400Sstevel@tonic-gate 	}
1410Sstevel@tonic-gate }
1420Sstevel@tonic-gate 
1430Sstevel@tonic-gate /*
1440Sstevel@tonic-gate  * mdmn_allocate_changelog
1450Sstevel@tonic-gate  *
1460Sstevel@tonic-gate  * Changelog records are allocated on a per multi-node basis.
1470Sstevel@tonic-gate  * This routine is called during MN set creation.
1480Sstevel@tonic-gate  * It pre-allocates the changelog, as user records
1490Sstevel@tonic-gate  * one per message class plus some spares.
1500Sstevel@tonic-gate  * Once the records are allocated they are never freed until
1510Sstevel@tonic-gate  * the mddb is deleted. The preallocation ensures that all nodes
1520Sstevel@tonic-gate  * will have a consistent view of the mddb.
1530Sstevel@tonic-gate  *
1540Sstevel@tonic-gate  * Each record is large enough to hold a maximum sized message
1550Sstevel@tonic-gate  * Return Values:
1560Sstevel@tonic-gate  *	0 - success
1570Sstevel@tonic-gate  *	-1 - fail
1580Sstevel@tonic-gate  */
1590Sstevel@tonic-gate int
mdmn_allocate_changelog(mdsetname_t * sp,md_error_t * ep)1600Sstevel@tonic-gate mdmn_allocate_changelog(mdsetname_t *sp, md_error_t *ep)
1610Sstevel@tonic-gate {
1620Sstevel@tonic-gate 	mddb_userreq_t		req;
1630Sstevel@tonic-gate 	md_set_desc		*sd;
1640Sstevel@tonic-gate 	mdmn_changelog_record_t	*tlr;
1650Sstevel@tonic-gate 	int			i;
1660Sstevel@tonic-gate 	set_t			setno;
1670Sstevel@tonic-gate 
1680Sstevel@tonic-gate 	/* Get a pointer to the incore md_set_desc for this MN set */
1690Sstevel@tonic-gate 	if ((sd = metaget_setdesc(sp, ep)) == NULL)
1700Sstevel@tonic-gate 		return (-1);
1710Sstevel@tonic-gate 	setno = sd->sd_setno;
1720Sstevel@tonic-gate 	/*
1730Sstevel@tonic-gate 	 * Round up the number of changelog records
1740Sstevel@tonic-gate 	 * to the next value of MDMN_LOGRECS_QUANTA
1750Sstevel@tonic-gate 	 *
1760Sstevel@tonic-gate 	 * In all cases, make sure we have at least
1770Sstevel@tonic-gate 	 * four more entries than the number of classes
1780Sstevel@tonic-gate 	 * in order to provide space for live upgrades that
1790Sstevel@tonic-gate 	 * might add classes.
1800Sstevel@tonic-gate 	 */
1810Sstevel@tonic-gate 
1820Sstevel@tonic-gate 	mdmn_logrecs += (MDMN_LOGRECS_TRIMUP) ? MDMN_LOGRECS_QUANTA : 0;
1830Sstevel@tonic-gate 
1840Sstevel@tonic-gate 	mdmn_changelog[setno] = Zalloc(MDMN_LOGHDR_SIZE * mdmn_logrecs);
1850Sstevel@tonic-gate 
1860Sstevel@tonic-gate 	for (i = 0; i < mdmn_logrecs; i++) {
1870Sstevel@tonic-gate 		(void) memset(&req, 0, sizeof (req));
1880Sstevel@tonic-gate 		METAD_SETUP_LR(MD_DB_CREATE, setno,  0);
1890Sstevel@tonic-gate 		/* grab a record big enough for max message size */
1900Sstevel@tonic-gate 		req.ur_size = MDMN_LOGRECSIZE_OD;
1910Sstevel@tonic-gate 
1920Sstevel@tonic-gate 		if (metaioctl(MD_MN_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
1930Sstevel@tonic-gate 			(void) mdstealerror(ep, &req.ur_mde);
1940Sstevel@tonic-gate #ifdef DEBUG
1950Sstevel@tonic-gate 			syslog(LOG_DEBUG, "allocate_log: %s\n",
196*8452SJohn.Wren.Kennedy@Sun.COM 			    mde_sperror(ep, ""));
1970Sstevel@tonic-gate #endif
1980Sstevel@tonic-gate 			Free(mdmn_changelog[setno]);
1990Sstevel@tonic-gate 			return (-1);
2000Sstevel@tonic-gate 		}
2010Sstevel@tonic-gate 
2020Sstevel@tonic-gate 		tlr = &mdmn_changelog[setno][i];
2030Sstevel@tonic-gate 		tlr->lr_selfid = req.ur_recid;
2040Sstevel@tonic-gate 		tlr->lr_revision = MD_MN_CHANGELOG_RECORD_REVISION;
2050Sstevel@tonic-gate 		tlr->lr_class = i;
2060Sstevel@tonic-gate 	}
2070Sstevel@tonic-gate 
2080Sstevel@tonic-gate 	/* commit class, and selfid */
2090Sstevel@tonic-gate 	(void) mdmn_commitlog(sd, ep);
2100Sstevel@tonic-gate 	Free(mdmn_changelog[setno]);
2110Sstevel@tonic-gate 	return (0);
2120Sstevel@tonic-gate }
2130Sstevel@tonic-gate 
2140Sstevel@tonic-gate /*
2150Sstevel@tonic-gate  * mdmn_reset_changelog
2160Sstevel@tonic-gate  *
2170Sstevel@tonic-gate  * Called during reconfig step 2.
2180Sstevel@tonic-gate  * The only time the changelog is reset is when all nodes in a cluster
2190Sstevel@tonic-gate  * are starting up. In this case changelog must be ignored, therefore
2200Sstevel@tonic-gate  * it is reset.
2210Sstevel@tonic-gate  *
2220Sstevel@tonic-gate  * The function frees the incore data structures and zeros out the
2230Sstevel@tonic-gate  * records. The ondisk records are never freed.
2240Sstevel@tonic-gate  *
2250Sstevel@tonic-gate  * Return Values:
2260Sstevel@tonic-gate  *	0 - success
2270Sstevel@tonic-gate  *	-1 - fail
2280Sstevel@tonic-gate  */
2290Sstevel@tonic-gate int
mdmn_reset_changelog(mdsetname_t * sp,md_error_t * ep,int flag)2300Sstevel@tonic-gate mdmn_reset_changelog(mdsetname_t *sp, md_error_t *ep, int flag)
2310Sstevel@tonic-gate {
2320Sstevel@tonic-gate 	md_set_desc		*sd;
2330Sstevel@tonic-gate 	mdmn_changelog_record_t	*lr;
2340Sstevel@tonic-gate 	set_t			setno;
2350Sstevel@tonic-gate 	int			lrc;
2360Sstevel@tonic-gate 
2370Sstevel@tonic-gate 	/* Get a pointer to the incore md_set_desc this MN set */
2380Sstevel@tonic-gate 	if ((sd = metaget_setdesc(sp, ep)) == NULL)
2390Sstevel@tonic-gate 		return (-1);
2400Sstevel@tonic-gate 
2410Sstevel@tonic-gate 	setno = sd->sd_setno;
2420Sstevel@tonic-gate 
2430Sstevel@tonic-gate 	if (mdmn_snarf_changelog(setno, ep) == 0) {
2440Sstevel@tonic-gate 		return (0);
2450Sstevel@tonic-gate 	}
2460Sstevel@tonic-gate 
2470Sstevel@tonic-gate 	if (flag & MDMN_CLF_RESETLOG) {
2480Sstevel@tonic-gate 		for (lrc = 0; lrc < mdmn_logrecs; lrc++) {
2490Sstevel@tonic-gate 			lr = &mdmn_changelog[setno][lrc];
2500Sstevel@tonic-gate 			Free(lr->lr_msg.msg_event_data);
2510Sstevel@tonic-gate 			(void) memset(&lr->lr_msg, 0, sizeof (md_mn_msg_t));
2520Sstevel@tonic-gate 			lr->lr_msglen = 0;
2530Sstevel@tonic-gate 			lr->lr_flags = 0;
2540Sstevel@tonic-gate 		}
2550Sstevel@tonic-gate 		(void) mdmn_commitlog(sd, ep);
2560Sstevel@tonic-gate #ifdef DEBUG
2570Sstevel@tonic-gate 		syslog(LOG_DEBUG, "reset_changelog: Log reset\n");
2580Sstevel@tonic-gate #endif
2590Sstevel@tonic-gate 	}
2600Sstevel@tonic-gate 	/* now zap the array */
2610Sstevel@tonic-gate 	if (flag & MDMN_CLF_RESETCACHE) {
2620Sstevel@tonic-gate #ifdef DEBUG
2630Sstevel@tonic-gate 		syslog(LOG_DEBUG, "reset_changelog: cache reset\n");
2640Sstevel@tonic-gate #endif
2650Sstevel@tonic-gate 		Free(&mdmn_changelog[setno]);
2660Sstevel@tonic-gate 		mdmn_changelog[setno] = NULL;
2670Sstevel@tonic-gate 		mdmn_changelog_snarfed[setno] = 0;
2680Sstevel@tonic-gate 	}
2690Sstevel@tonic-gate 	return (0);
2700Sstevel@tonic-gate }
2710Sstevel@tonic-gate 
2720Sstevel@tonic-gate /*
2730Sstevel@tonic-gate  * Log a given message in the changelog.
2740Sstevel@tonic-gate  * This function is only executed by the master node
2750Sstevel@tonic-gate  * Return Values:
2760Sstevel@tonic-gate  *	MDMNE_NULL:
2770Sstevel@tonic-gate  *	    success, the log slot is free
2780Sstevel@tonic-gate  *
2790Sstevel@tonic-gate  *	MDMNE_ACK:
2800Sstevel@tonic-gate  *	    success,
2810Sstevel@tonic-gate  *	    the log slot is occupied with the same msg from a previous try.
2820Sstevel@tonic-gate  *
2830Sstevel@tonic-gate  *	MDMNE_CLASS_BUSY:
2840Sstevel@tonic-gate  *	    This means the appropriate slot is occupied with a different
2850Sstevel@tonic-gate  *	    message. In that case the stored message needs being replayed,
2860Sstevel@tonic-gate  *	    while the current message will be rejected with MDMNE_CLASS_BUSY
2870Sstevel@tonic-gate  *	    to the initiator.
2880Sstevel@tonic-gate  *
2890Sstevel@tonic-gate  *	MDMNE_LOG_FAIL:
2900Sstevel@tonic-gate  *	    Bad things happend, cannot continue.
2910Sstevel@tonic-gate  */
2920Sstevel@tonic-gate int
mdmn_log_msg(md_mn_msg_t * msg)2930Sstevel@tonic-gate mdmn_log_msg(md_mn_msg_t *msg)
2940Sstevel@tonic-gate {
2950Sstevel@tonic-gate 	set_t		setno;
2960Sstevel@tonic-gate 	md_mn_msgclass_t	class;
2970Sstevel@tonic-gate 	mdmn_changelog_record_t	*lr;
2980Sstevel@tonic-gate 	md_error_t		err = mdnullerror;
2990Sstevel@tonic-gate 	md_error_t		*ep = &err;
3000Sstevel@tonic-gate 	int			retval = 0;
3010Sstevel@tonic-gate 
3020Sstevel@tonic-gate 	setno = msg->msg_setno;
3030Sstevel@tonic-gate 	class = mdmn_get_message_class(msg->msg_type);
3040Sstevel@tonic-gate 
3050Sstevel@tonic-gate 	/* if not snarfed, snarf it */
3060Sstevel@tonic-gate 	if (mdmn_snarf_changelog(setno, ep) <= 0) {
3070Sstevel@tonic-gate 		syslog(LOG_DAEMON | LOG_ERR, dgettext(TEXT_DOMAIN,
3080Sstevel@tonic-gate 		    "log_msg: No records snarfed\n"));
3090Sstevel@tonic-gate 		return (-1);
3100Sstevel@tonic-gate 	}
3110Sstevel@tonic-gate 
3120Sstevel@tonic-gate 
3130Sstevel@tonic-gate 	/* log entry for the class */
3140Sstevel@tonic-gate 	lr = &mdmn_changelog[setno][class];
3150Sstevel@tonic-gate 
3160Sstevel@tonic-gate 	/* Check if the class is occupied */
3170Sstevel@tonic-gate 	if (lr->lr_flags & MD_MN_LR_INUSE) {
3180Sstevel@tonic-gate 		if (!MSGID_CMP(&(msg->msg_msgid), &(lr->lr_msg.msg_msgid))) {
3190Sstevel@tonic-gate 			syslog(LOG_DAEMON | LOG_DEBUG, dgettext(TEXT_DOMAIN,
3200Sstevel@tonic-gate 			    "log_msg: id mismatch:\n"
3210Sstevel@tonic-gate 			    " stored    : ID = (%d, 0x%llx-%d)"
3220Sstevel@tonic-gate 			    " setno %d class %d type %d\n"
3230Sstevel@tonic-gate 			    " msg to log: ID = (%d, 0x%llx-%d)"
3240Sstevel@tonic-gate 			    " setno %d class %d type %d.\n"),
3250Sstevel@tonic-gate 			    MSGID_ELEMS(lr->lr_msg.msg_msgid), lr->lr_setno,
3260Sstevel@tonic-gate 			    lr->lr_class, lr->lr_msgtype,
3270Sstevel@tonic-gate 			    MSGID_ELEMS(msg->msg_msgid), msg->msg_setno, class,
3280Sstevel@tonic-gate 			    msg->msg_type);
3290Sstevel@tonic-gate 			return (MDMNE_CLASS_BUSY);
3300Sstevel@tonic-gate 		} else {
3310Sstevel@tonic-gate 			syslog(LOG_DAEMON | LOG_DEBUG, dgettext(TEXT_DOMAIN,
3320Sstevel@tonic-gate 			    "log_msg: msgid already logged:\n ID = "
3330Sstevel@tonic-gate 			    " (%d, 0x%llx-%d) setno %d class %d type %d\n"),
3340Sstevel@tonic-gate 			    MSGID_ELEMS(lr->lr_msg.msg_msgid), lr->lr_setno,
3350Sstevel@tonic-gate 			    lr->lr_class, lr->lr_msgtype);
3360Sstevel@tonic-gate 			return (MDMNE_ACK);
3370Sstevel@tonic-gate 		}
3380Sstevel@tonic-gate 	}
3390Sstevel@tonic-gate 
3400Sstevel@tonic-gate 	lr->lr_flags |= MD_MN_LR_INUSE;
3410Sstevel@tonic-gate 	lr->lr_msglen = MD_MN_MSG_LEN(msg);
3420Sstevel@tonic-gate 	assert(lr->lr_msg.msg_event_data == NULL);
3430Sstevel@tonic-gate 	if (msg->msg_event_size)
3440Sstevel@tonic-gate 		lr->lr_msg.msg_event_data = Zalloc(msg->msg_event_size);
3450Sstevel@tonic-gate 	(void) copy_msg(msg, &(lr->lr_msg));
3460Sstevel@tonic-gate 	retval = mdmn_log_it(setno, ep, lr);
3470Sstevel@tonic-gate 	if (retval != 0) {
3480Sstevel@tonic-gate 		syslog(LOG_DAEMON | LOG_ERR, dgettext(TEXT_DOMAIN,
3490Sstevel@tonic-gate 		    "mdmn_log_msg - failure committing logged msg to disk\n"));
3500Sstevel@tonic-gate 		return (MDMNE_LOG_FAIL);
3510Sstevel@tonic-gate 	}
3520Sstevel@tonic-gate 
3530Sstevel@tonic-gate 	return (MDMNE_NULL); /* this is good */
3540Sstevel@tonic-gate }
3550Sstevel@tonic-gate 
3560Sstevel@tonic-gate /*
3570Sstevel@tonic-gate  * mdmn_unlog_msg(md_mn_msg_t *)
3580Sstevel@tonic-gate  *
3590Sstevel@tonic-gate  * Clear the log entry holding the indicated message.
3600Sstevel@tonic-gate  * Only the set master can do this.
3610Sstevel@tonic-gate  *
3620Sstevel@tonic-gate  * Return Values:
3630Sstevel@tonic-gate  *	0 - success
3640Sstevel@tonic-gate  *	-1 - fail
3650Sstevel@tonic-gate  */
3660Sstevel@tonic-gate int
mdmn_unlog_msg(md_mn_msg_t * msg)3670Sstevel@tonic-gate mdmn_unlog_msg(md_mn_msg_t *msg)
3680Sstevel@tonic-gate {
3690Sstevel@tonic-gate 	set_t			setno;
3700Sstevel@tonic-gate 	md_mn_msgclass_t	class;
3710Sstevel@tonic-gate 	md_error_t		err = mdnullerror;
3720Sstevel@tonic-gate 	md_error_t		*ep = &err;
3730Sstevel@tonic-gate 	int			retval = 0;
3740Sstevel@tonic-gate 	mdmn_changelog_record_t	*lr = NULL;
3750Sstevel@tonic-gate 
3760Sstevel@tonic-gate 	setno = msg->msg_setno;
3770Sstevel@tonic-gate 	class = mdmn_get_message_class(msg->msg_type);
3780Sstevel@tonic-gate 
3790Sstevel@tonic-gate 	/* Find the log entry holding the indicated message */
3800Sstevel@tonic-gate 	if (mdmn_snarf_changelog(setno, ep) == 0)
3810Sstevel@tonic-gate 		return (-1);
3820Sstevel@tonic-gate 
3830Sstevel@tonic-gate 	lr = &mdmn_changelog[setno][class];
3840Sstevel@tonic-gate 
3850Sstevel@tonic-gate 	/* assert the message is still logged */
3860Sstevel@tonic-gate 	assert(lr != NULL);
3870Sstevel@tonic-gate 	if (!MSGID_CMP(&(msg->msg_msgid), &(lr->lr_msg.msg_msgid))) {
3880Sstevel@tonic-gate 		syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
389*8452SJohn.Wren.Kennedy@Sun.COM 		    "unlog_msg: msgid mismatch\n"
390*8452SJohn.Wren.Kennedy@Sun.COM 		    "\t\tstored: ID = (%d, 0x%llx-%d) setno %d "
391*8452SJohn.Wren.Kennedy@Sun.COM 		    "class %d type %d\n"
392*8452SJohn.Wren.Kennedy@Sun.COM 		    "\t\tattempting to unlog:\n"
393*8452SJohn.Wren.Kennedy@Sun.COM 		    "\t\tID = (%d, 0x%llx-%d) setno %d class %d type %d.\n"),
394*8452SJohn.Wren.Kennedy@Sun.COM 		    MSGID_ELEMS(lr->lr_msg.msg_msgid), lr->lr_setno,
395*8452SJohn.Wren.Kennedy@Sun.COM 		    lr->lr_class, lr->lr_msgtype, MSGID_ELEMS(msg->msg_msgid),
396*8452SJohn.Wren.Kennedy@Sun.COM 		    msg->msg_setno, class, msg->msg_type);
3970Sstevel@tonic-gate 		return (-1);
3980Sstevel@tonic-gate 	}
3990Sstevel@tonic-gate 	lr->lr_msglen = 0;
4000Sstevel@tonic-gate 	lr->lr_flags &= ~(MD_MN_LR_INUSE);
4010Sstevel@tonic-gate 	if (lr->lr_msg.msg_event_data) {
4020Sstevel@tonic-gate 		Free(lr->lr_msg.msg_event_data);
4030Sstevel@tonic-gate 		lr->lr_msg.msg_event_data = NULL;
4040Sstevel@tonic-gate 	}
4050Sstevel@tonic-gate 	/* commit the updated log record to disk */
4060Sstevel@tonic-gate 	retval = mdmn_log_it(setno, ep, lr);
4070Sstevel@tonic-gate #ifdef DEBUG
4080Sstevel@tonic-gate 	dump_rec("mdmn_unlog_msg: ", lr);
4090Sstevel@tonic-gate #endif
4100Sstevel@tonic-gate 	return (retval);
4110Sstevel@tonic-gate }
4120Sstevel@tonic-gate 
4130Sstevel@tonic-gate 
4140Sstevel@tonic-gate /*
4150Sstevel@tonic-gate  * mdmn_get_changelogrec(set_t , md_mn_msgclass_t)
4160Sstevel@tonic-gate  * Returns a pointer to incore changelog record.
4170Sstevel@tonic-gate  *
4180Sstevel@tonic-gate  * Return Values:
4190Sstevel@tonic-gate  *	non-NULL - success
4200Sstevel@tonic-gate  *	NULL - fail
4210Sstevel@tonic-gate  */
4220Sstevel@tonic-gate mdmn_changelog_record_t *
mdmn_get_changelogrec(set_t setno,md_mn_msgclass_t class)4230Sstevel@tonic-gate mdmn_get_changelogrec(set_t setno, md_mn_msgclass_t class)
4240Sstevel@tonic-gate {
4250Sstevel@tonic-gate 	md_error_t	err = mdnullerror;
4260Sstevel@tonic-gate 
4270Sstevel@tonic-gate 	if (mdmn_snarf_changelog(setno, &err) == 0)
4280Sstevel@tonic-gate 		return (NULL);
4290Sstevel@tonic-gate 	assert(mdmn_changelog[setno] != NULL);
4300Sstevel@tonic-gate 
4310Sstevel@tonic-gate 	return (&mdmn_changelog[setno][class]);
4320Sstevel@tonic-gate }
4330Sstevel@tonic-gate 
4340Sstevel@tonic-gate /*
4350Sstevel@tonic-gate  * mdmn_commitlog(md_set_desc *, md_error_t *)
4360Sstevel@tonic-gate  *
4370Sstevel@tonic-gate  * Commit the set record and all of the changelog entry records to disk.
4380Sstevel@tonic-gate  * Don't bother with other stuff hanging off the set record
4390Sstevel@tonic-gate  * (e.g. drive records) since none of that is changing.
4400Sstevel@tonic-gate  * Called only at changelog pre-allocation time or when flushing a log.
4410Sstevel@tonic-gate  *
4420Sstevel@tonic-gate  * Return Values:
4430Sstevel@tonic-gate  *	0 - success
4440Sstevel@tonic-gate  *	errno - fail
4450Sstevel@tonic-gate  */
4460Sstevel@tonic-gate 
4470Sstevel@tonic-gate static int
mdmn_commitlog(md_set_desc * sd,md_error_t * ep)4480Sstevel@tonic-gate mdmn_commitlog(md_set_desc *sd, md_error_t *ep)
4490Sstevel@tonic-gate {
4500Sstevel@tonic-gate 	int			lrc;
4510Sstevel@tonic-gate 	int			*recs;
4520Sstevel@tonic-gate 	uint_t			size;
4530Sstevel@tonic-gate 	mdmn_changelog_record_t	*lr;
4540Sstevel@tonic-gate 	mdmn_changelog_record_od_t clodrec; /* changelog ondisk record */
4550Sstevel@tonic-gate 	mddb_userreq_t		req;
4560Sstevel@tonic-gate 	int			retval = 0;
4570Sstevel@tonic-gate 	set_t			setno;
4580Sstevel@tonic-gate 
4590Sstevel@tonic-gate 	/* Check for master and bounce non-master requests */
4600Sstevel@tonic-gate 	if (!(MD_MNSET_DESC(sd)) || !sd->sd_mn_am_i_master) {
4610Sstevel@tonic-gate 		if (!(MD_MNSET_DESC(sd))) {
4620Sstevel@tonic-gate 			syslog(LOG_DAEMON | LOG_ERR, dgettext(TEXT_DOMAIN,
463*8452SJohn.Wren.Kennedy@Sun.COM 			    "mdmn_commitlog - Not MN Set\n"));
4640Sstevel@tonic-gate 		} else {
4650Sstevel@tonic-gate 			syslog(LOG_DAEMON | LOG_ERR, dgettext(TEXT_DOMAIN,
466*8452SJohn.Wren.Kennedy@Sun.COM 			    "mdmn_commit_log - Not Master\n"));
4670Sstevel@tonic-gate 		}
4680Sstevel@tonic-gate 		return (-1);
4690Sstevel@tonic-gate 	}
4700Sstevel@tonic-gate 	(void) memset(&req, 0, sizeof (req));
4710Sstevel@tonic-gate 	/* create the records to commit the info to the mddb */
4720Sstevel@tonic-gate 
4730Sstevel@tonic-gate 	size = (mdmn_logrecs + 1) * sizeof (int);
4740Sstevel@tonic-gate 	recs = Zalloc(size);
4750Sstevel@tonic-gate 	/* Initialize the log entry records for update */
4760Sstevel@tonic-gate 	setno = sd->sd_setno;
4770Sstevel@tonic-gate 
4780Sstevel@tonic-gate 	for (lrc = 0; lrc < mdmn_logrecs; lrc++) {
4790Sstevel@tonic-gate 		lr = &mdmn_changelog[setno][lrc];
4800Sstevel@tonic-gate 		recs[lrc] = lr->lr_selfid;
4810Sstevel@tonic-gate 		copy_changelog(lr, &clodrec, MD_MN_COPY_TO_ONDISK);
4820Sstevel@tonic-gate 		METAD_SETUP_LR(MD_DB_SETDATA, setno, lr->lr_selfid);
4830Sstevel@tonic-gate 		req.ur_size  = MDMN_LOGRECSIZE_OD;
48462Sjeanm 		req.ur_data = (uintptr_t)&clodrec;
4850Sstevel@tonic-gate 		if ((retval = metaioctl(MD_MN_DB_USERREQ, &req, &req.ur_mde,
486*8452SJohn.Wren.Kennedy@Sun.COM 		    NULL)) != 0) {
4870Sstevel@tonic-gate 			(void) mdstealerror(ep, &req.ur_mde);
4880Sstevel@tonic-gate #ifdef DEBUG
4890Sstevel@tonic-gate 			syslog(LOG_DAEMON|LOG_DEBUG,
4900Sstevel@tonic-gate 			    "mdmn_commitlog - metaioctl SETDATA failure\n%s",
4910Sstevel@tonic-gate 			    mde_sperror(ep, ""));
4920Sstevel@tonic-gate #endif
4930Sstevel@tonic-gate 			break;
4940Sstevel@tonic-gate 		}
4950Sstevel@tonic-gate 	}
4960Sstevel@tonic-gate 
4970Sstevel@tonic-gate 	if (retval == 0) {
4980Sstevel@tonic-gate 		/* set last rec to be 0 to indicate completion */
4990Sstevel@tonic-gate 		recs[lrc] = 0;
5000Sstevel@tonic-gate 		/* Commit to mddb  on disk */
5010Sstevel@tonic-gate 		METAD_SETUP_LR(MD_DB_COMMIT_MANY, setno,
502*8452SJohn.Wren.Kennedy@Sun.COM 		    mdmn_changelog[setno][0].lr_selfid);
5030Sstevel@tonic-gate 		req.ur_size = size;
50462Sjeanm 		req.ur_data = (uintptr_t)recs;
5050Sstevel@tonic-gate 		if ((retval = metaioctl(MD_MN_DB_USERREQ, &req,
506*8452SJohn.Wren.Kennedy@Sun.COM 		    &req.ur_mde, NULL)) != 0) {
5070Sstevel@tonic-gate 			(void) mdstealerror(ep, &req.ur_mde);
5080Sstevel@tonic-gate #ifdef DEBUG
5090Sstevel@tonic-gate 			syslog(LOG_DAEMON|LOG_DEBUG,
510*8452SJohn.Wren.Kennedy@Sun.COM 			    "mdmn_commitlog - metaioctl COMMIT_MANY"
511*8452SJohn.Wren.Kennedy@Sun.COM 			    "Failure\n%s",  mde_sperror(ep, ""));
5120Sstevel@tonic-gate #endif
5130Sstevel@tonic-gate 		}
5140Sstevel@tonic-gate 	}
5150Sstevel@tonic-gate 
5160Sstevel@tonic-gate 	Free(recs);
5170Sstevel@tonic-gate 	return (retval);
5180Sstevel@tonic-gate }
5190Sstevel@tonic-gate 
5200Sstevel@tonic-gate /*
5210Sstevel@tonic-gate  * mdmn_log_it(set_t, md_error_t *, mdmn_changelog_record_t *)
5220Sstevel@tonic-gate  *
5230Sstevel@tonic-gate  * Commit the changed log record to disk.
5240Sstevel@tonic-gate  *
5250Sstevel@tonic-gate  * Return Values:
5260Sstevel@tonic-gate  *	0 - success
5270Sstevel@tonic-gate  *	-1 - fail
5280Sstevel@tonic-gate  */
5290Sstevel@tonic-gate static int
mdmn_log_it(set_t set,md_error_t * ep,mdmn_changelog_record_t * lr)5300Sstevel@tonic-gate mdmn_log_it(set_t set, md_error_t *ep, mdmn_changelog_record_t *lr)
5310Sstevel@tonic-gate {
5320Sstevel@tonic-gate 	int			*recs;
5330Sstevel@tonic-gate 	uint_t			size;
5340Sstevel@tonic-gate 	mddb_userreq_t		req;
5350Sstevel@tonic-gate 	mdmn_changelog_record_od_t	clodrec;
5360Sstevel@tonic-gate 
5370Sstevel@tonic-gate 	(void) memset(&req, 0, sizeof (req));
5380Sstevel@tonic-gate 
5390Sstevel@tonic-gate 	/* Initialize the log entry record for update */
5400Sstevel@tonic-gate 
5410Sstevel@tonic-gate 	copy_changelog(lr, &clodrec, MD_MN_COPY_TO_ONDISK);
5420Sstevel@tonic-gate 	METAD_SETUP_LR(MD_DB_SETDATA, set, lr->lr_selfid);
5430Sstevel@tonic-gate 	req.ur_size = MDMN_LOGRECSIZE_OD;
54462Sjeanm 	req.ur_data = (uintptr_t)&clodrec;
5450Sstevel@tonic-gate 	if (metaioctl(MD_MN_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
5460Sstevel@tonic-gate 		(void) mdstealerror(ep, &req.ur_mde);
5470Sstevel@tonic-gate #ifdef DEBUG
5480Sstevel@tonic-gate 		syslog(LOG_DEBUG, "mdmn_log_it: DB_SETDATA  failed\n"
5490Sstevel@tonic-gate 		    "set %d selfid %d, size %d\n%s", set, lr->lr_selfid,
5500Sstevel@tonic-gate 		    req.ur_size, mde_sperror(ep, ""));
5510Sstevel@tonic-gate #endif
5520Sstevel@tonic-gate 		return (-1);
5530Sstevel@tonic-gate 	}
5540Sstevel@tonic-gate 	/* Set up the recid to be updated */
5550Sstevel@tonic-gate 	size = 2 * sizeof (int); /* the changed record, plus null terminator */
5560Sstevel@tonic-gate 	recs = Zalloc(size);
5570Sstevel@tonic-gate 	recs[0] = lr->lr_selfid;
5580Sstevel@tonic-gate 	recs[1] = 0;
5590Sstevel@tonic-gate 	/* Commit to mddb  on disk */
5600Sstevel@tonic-gate 	METAD_SETUP_LR(MD_DB_COMMIT_ONE, set, lr->lr_selfid);
5610Sstevel@tonic-gate 	req.ur_size = size;
56262Sjeanm 	req.ur_data = (uintptr_t)recs;
5630Sstevel@tonic-gate 	if (metaioctl(MD_MN_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
5640Sstevel@tonic-gate 		(void) mdstealerror(ep, &req.ur_mde);
5650Sstevel@tonic-gate #ifdef DEBUG
5660Sstevel@tonic-gate 		syslog(LOG_DEBUG, "mdmn_log_it: DB_COMMIT_ONE  failed\n"
5670Sstevel@tonic-gate 		    "set %d selfid %d, size %d\n%s", set, lr->lr_selfid,
5680Sstevel@tonic-gate 		    req.ur_size, mde_sperror(ep, ""));
5690Sstevel@tonic-gate #endif
5700Sstevel@tonic-gate 		Free(recs);
5710Sstevel@tonic-gate 		return (-1);
5720Sstevel@tonic-gate 	}
5730Sstevel@tonic-gate 	Free(recs);
5740Sstevel@tonic-gate 	return (0);
5750Sstevel@tonic-gate }
5760Sstevel@tonic-gate 
5770Sstevel@tonic-gate /*
5780Sstevel@tonic-gate  * mdmn_snarf_changelog(set_t, md_error_t *)
5790Sstevel@tonic-gate  *
5800Sstevel@tonic-gate  * snarf in the changelog entries and allocate incore structures
5810Sstevel@tonic-gate  * if required.
5820Sstevel@tonic-gate  * mdmn_changelog_snarfed array if set to MDMN_CLF_SNARFED, then
5830Sstevel@tonic-gate  * then the records are already snarfed.
5840Sstevel@tonic-gate  *
5850Sstevel@tonic-gate  * Called from set_snarf(), mdmn_log_msg(), and mdmn_unlog_msg()
5860Sstevel@tonic-gate  * Return Values:
5870Sstevel@tonic-gate  *	non-zero - success
5880Sstevel@tonic-gate  *	0 - fail
5890Sstevel@tonic-gate  */
5900Sstevel@tonic-gate int
mdmn_snarf_changelog(set_t set,md_error_t * ep)5910Sstevel@tonic-gate mdmn_snarf_changelog(set_t set, md_error_t *ep)
5920Sstevel@tonic-gate {
5930Sstevel@tonic-gate 	mdmn_changelog_record_t	 *tlr;
5940Sstevel@tonic-gate 	mdmn_changelog_record_od_t	 *lr;
5950Sstevel@tonic-gate 	mddb_recid_t		id;
5960Sstevel@tonic-gate 	md_mn_msgclass_t	class;
5970Sstevel@tonic-gate 
5980Sstevel@tonic-gate 
5990Sstevel@tonic-gate 	if (set == MD_LOCAL_SET)
6000Sstevel@tonic-gate 		return (0);
6010Sstevel@tonic-gate 
6020Sstevel@tonic-gate 	id = 0;
6030Sstevel@tonic-gate 
6040Sstevel@tonic-gate 	if (mdmn_changelog_snarfed[set] & MDMN_CLF_SNARFED) {
6050Sstevel@tonic-gate 		assert(mdmn_changelog[set] != NULL);
6060Sstevel@tonic-gate 		return (mdmn_logrecs);
6070Sstevel@tonic-gate 	}
6080Sstevel@tonic-gate 
6090Sstevel@tonic-gate 	lr = (mdmn_changelog_record_od_t *)get_ur_rec(set, MD_UR_GET_NEXT,
610*8452SJohn.Wren.Kennedy@Sun.COM 	    MDDB_UR_LR, &id, ep);
6110Sstevel@tonic-gate 	if (lr == NULL)
6120Sstevel@tonic-gate 		return (0);
6130Sstevel@tonic-gate 
6140Sstevel@tonic-gate 	/* only allocate if Log records exist */
6150Sstevel@tonic-gate 
6160Sstevel@tonic-gate 	if (mdmn_changelog[set] == NULL) {
6170Sstevel@tonic-gate 		/* Allocate incore state for the log */
6180Sstevel@tonic-gate 		mdmn_changelog[set] = Zalloc(MDMN_LOGHDR_SIZE *
619*8452SJohn.Wren.Kennedy@Sun.COM 		    mdmn_logrecs);
6200Sstevel@tonic-gate 	}
6210Sstevel@tonic-gate 
6220Sstevel@tonic-gate 	do {
6230Sstevel@tonic-gate 		class = lr->lr_class;
6240Sstevel@tonic-gate 		tlr = &mdmn_changelog[set][class];
6250Sstevel@tonic-gate 		copy_changelog(tlr, lr, MD_MN_COPY_TO_INCORE);
6260Sstevel@tonic-gate 		Free(lr);
6270Sstevel@tonic-gate 		lr = (mdmn_changelog_record_od_t *)get_ur_rec(set,
6280Sstevel@tonic-gate 		    MD_UR_GET_NEXT, MDDB_UR_LR, &id, ep);
6290Sstevel@tonic-gate 	} while (lr != NULL);
6300Sstevel@tonic-gate 
6310Sstevel@tonic-gate 	/* Since log records counts are fixed return that value */
6320Sstevel@tonic-gate 	mdmn_changelog_snarfed[set] |= MDMN_CLF_SNARFED;
6330Sstevel@tonic-gate 	return (mdmn_logrecs);
6340Sstevel@tonic-gate }
635