xref: /onnv-gate/usr/src/lib/lvm/libmeta/common/meta_mirror_resync.c (revision 4492:b01436099eb7)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*4492Spetede  * Common Development and Distribution License (the "License").
6*4492Spetede  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
22*4492Spetede  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
270Sstevel@tonic-gate 
280Sstevel@tonic-gate /*
290Sstevel@tonic-gate  * mirror operations
300Sstevel@tonic-gate  */
310Sstevel@tonic-gate 
320Sstevel@tonic-gate #include <meta.h>
330Sstevel@tonic-gate #include <sys/lvm/md_mirror.h>
340Sstevel@tonic-gate #include <thread.h>
350Sstevel@tonic-gate 
360Sstevel@tonic-gate extern	int	md_in_daemon;
370Sstevel@tonic-gate extern md_mn_client_list_t *mdmn_clients;
380Sstevel@tonic-gate 
390Sstevel@tonic-gate /*
400Sstevel@tonic-gate  * chain of mirrors
410Sstevel@tonic-gate  */
420Sstevel@tonic-gate typedef struct mm_unit_list {
430Sstevel@tonic-gate 	struct mm_unit_list	*next;	/* next in chain */
440Sstevel@tonic-gate 	mdname_t		*namep;	/* mirror name */
450Sstevel@tonic-gate 	mm_pass_num_t		pass;	/* pass number */
460Sstevel@tonic-gate 	uint_t			done;	/* resync done */
470Sstevel@tonic-gate } mm_unit_list_t;
480Sstevel@tonic-gate 
490Sstevel@tonic-gate /*
500Sstevel@tonic-gate  * resync mirror
510Sstevel@tonic-gate  * meta_lock for this set should be held on entry.
520Sstevel@tonic-gate  */
530Sstevel@tonic-gate int
meta_mirror_resync(mdsetname_t * sp,mdname_t * mirnp,daddr_t size,md_error_t * ep,md_resync_cmd_t cmd)540Sstevel@tonic-gate meta_mirror_resync(
550Sstevel@tonic-gate 	mdsetname_t		*sp,
560Sstevel@tonic-gate 	mdname_t		*mirnp,
570Sstevel@tonic-gate 	daddr_t			size,
580Sstevel@tonic-gate 	md_error_t		*ep,
590Sstevel@tonic-gate 	md_resync_cmd_t		cmd	/* Start/Block/Unblock/Kill */
600Sstevel@tonic-gate )
610Sstevel@tonic-gate {
620Sstevel@tonic-gate 	char			*miscname;
630Sstevel@tonic-gate 	md_resync_ioctl_t	ri;
640Sstevel@tonic-gate 
650Sstevel@tonic-gate 	/* should have a set */
660Sstevel@tonic-gate 	assert(sp != NULL);
670Sstevel@tonic-gate 	assert(sp->setno == MD_MIN2SET(meta_getminor(mirnp->dev)));
680Sstevel@tonic-gate 
690Sstevel@tonic-gate 	/* make sure we have a mirror */
700Sstevel@tonic-gate 	if ((miscname = metagetmiscname(mirnp, ep)) == NULL)
710Sstevel@tonic-gate 		return (-1);
720Sstevel@tonic-gate 	if (strcmp(miscname, MD_MIRROR) != 0) {
730Sstevel@tonic-gate 		return (mdmderror(ep, MDE_NOT_MM, meta_getminor(mirnp->dev),
740Sstevel@tonic-gate 		    mirnp->cname));
750Sstevel@tonic-gate 	}
760Sstevel@tonic-gate 
770Sstevel@tonic-gate 	/* start resync */
780Sstevel@tonic-gate 	(void) memset(&ri, 0, sizeof (ri));
790Sstevel@tonic-gate 	MD_SETDRIVERNAME(&ri, MD_MIRROR, sp->setno);
800Sstevel@tonic-gate 	ri.ri_mnum = meta_getminor(mirnp->dev);
810Sstevel@tonic-gate 	ri.ri_copysize = size;
820Sstevel@tonic-gate 	switch (cmd) {
830Sstevel@tonic-gate 	case MD_RESYNC_FORCE_MNSTART:
840Sstevel@tonic-gate 		ri.ri_flags |= MD_RI_RESYNC_FORCE_MNSTART;
850Sstevel@tonic-gate 		break;
860Sstevel@tonic-gate 	case MD_RESYNC_START:
870Sstevel@tonic-gate 		ri.ri_flags = 0;
880Sstevel@tonic-gate 		break;
890Sstevel@tonic-gate 	case MD_RESYNC_BLOCK:
900Sstevel@tonic-gate 		ri.ri_flags = MD_RI_BLOCK;
910Sstevel@tonic-gate 		break;
920Sstevel@tonic-gate 	case MD_RESYNC_UNBLOCK:
930Sstevel@tonic-gate 		ri.ri_flags = MD_RI_UNBLOCK;
940Sstevel@tonic-gate 		break;
950Sstevel@tonic-gate 	case MD_RESYNC_KILL:
960Sstevel@tonic-gate 		ri.ri_flags = MD_RI_KILL;
970Sstevel@tonic-gate 		break;
980Sstevel@tonic-gate 	case MD_RESYNC_KILL_NO_WAIT:
990Sstevel@tonic-gate 		ri.ri_flags = MD_RI_KILL | MD_RI_NO_WAIT;
1000Sstevel@tonic-gate 		break;
1010Sstevel@tonic-gate 	default:
1020Sstevel@tonic-gate 		/* TODO: Add new error MDE_BAD_RESYNC_FLAGS */
1030Sstevel@tonic-gate 		return (mderror(ep, MDE_BAD_RESYNC_OPT, mirnp->cname));
1040Sstevel@tonic-gate 	}
1050Sstevel@tonic-gate 
1060Sstevel@tonic-gate 	if (metaioctl(MD_IOCSETSYNC, &ri, &ri.mde, mirnp->cname) != 0)
1070Sstevel@tonic-gate 		return (mdstealerror(ep, &ri.mde));
1080Sstevel@tonic-gate 
1090Sstevel@tonic-gate 	/* return success */
1100Sstevel@tonic-gate 	return (0);
1110Sstevel@tonic-gate }
1120Sstevel@tonic-gate 
1130Sstevel@tonic-gate /*
1140Sstevel@tonic-gate  * free units
1150Sstevel@tonic-gate  */
1160Sstevel@tonic-gate static void
free_units(mm_unit_list_t * mirrors[MD_PASS_MAX+1])1170Sstevel@tonic-gate free_units(
1180Sstevel@tonic-gate 	mm_unit_list_t	*mirrors[MD_PASS_MAX + 1]
1190Sstevel@tonic-gate )
1200Sstevel@tonic-gate {
1210Sstevel@tonic-gate 	uint_t		i;
1220Sstevel@tonic-gate 
1230Sstevel@tonic-gate 	for (i = 0; (i < (MD_PASS_MAX + 1)); ++i) {
1240Sstevel@tonic-gate 		mm_unit_list_t	*p, *n;
1250Sstevel@tonic-gate 
1260Sstevel@tonic-gate 		for (p = mirrors[i], n = NULL; (p != NULL); p = n) {
1270Sstevel@tonic-gate 			n = p->next;
1280Sstevel@tonic-gate 			Free(p);
1290Sstevel@tonic-gate 		}
1300Sstevel@tonic-gate 		mirrors[i] = NULL;
1310Sstevel@tonic-gate 	}
1320Sstevel@tonic-gate }
1330Sstevel@tonic-gate 
1340Sstevel@tonic-gate /*
1350Sstevel@tonic-gate  * setup_units:	build lists of units for each pass
1360Sstevel@tonic-gate  */
1370Sstevel@tonic-gate static int
setup_units(mdsetname_t * sp,mm_unit_list_t * mirrors[MD_PASS_MAX+1],md_error_t * ep)1380Sstevel@tonic-gate setup_units(
1390Sstevel@tonic-gate 	mdsetname_t	*sp,
1400Sstevel@tonic-gate 	mm_unit_list_t	*mirrors[MD_PASS_MAX + 1],
1410Sstevel@tonic-gate 	md_error_t	*ep
1420Sstevel@tonic-gate )
1430Sstevel@tonic-gate {
1440Sstevel@tonic-gate 	mdnamelist_t	*mirrornlp = NULL;
1450Sstevel@tonic-gate 	mdnamelist_t	*p;
1460Sstevel@tonic-gate 	int		rval = 0;
1470Sstevel@tonic-gate 
1480Sstevel@tonic-gate 	/* should have a set */
1490Sstevel@tonic-gate 	assert(sp != NULL);
1500Sstevel@tonic-gate 
1510Sstevel@tonic-gate 	/* for each mirror */
1520Sstevel@tonic-gate 	if (meta_get_mirror_names(sp, &mirrornlp, 0, ep) < 0)
1530Sstevel@tonic-gate 		return (-1);
1540Sstevel@tonic-gate 	for (p = mirrornlp; (p != NULL); p = p->next) {
1550Sstevel@tonic-gate 		md_mirror_t	*mirrorp;
1560Sstevel@tonic-gate 		mm_unit_list_t	*lp;
1570Sstevel@tonic-gate 
1580Sstevel@tonic-gate 		/* get unit structure */
1590Sstevel@tonic-gate 		if ((mirrorp = meta_get_mirror(sp, p->namep, ep)) == NULL) {
1600Sstevel@tonic-gate 			rval = -1;	/* record, but ignore errors */
1610Sstevel@tonic-gate 			continue;
1620Sstevel@tonic-gate 		}
1630Sstevel@tonic-gate 
1640Sstevel@tonic-gate 		/* save info */
1650Sstevel@tonic-gate 		lp = Zalloc(sizeof (*lp));
1660Sstevel@tonic-gate 		lp->namep = p->namep;
1670Sstevel@tonic-gate 		lp->pass = mirrorp->pass_num;
1680Sstevel@tonic-gate 		if ((lp->pass < 0) || (lp->pass > MD_PASS_MAX))
1690Sstevel@tonic-gate 			lp->pass = MD_PASS_MAX;
1700Sstevel@tonic-gate 
1710Sstevel@tonic-gate 		/* put on list */
1720Sstevel@tonic-gate 		lp->next = mirrors[lp->pass];
1730Sstevel@tonic-gate 		mirrors[lp->pass] = lp;
1740Sstevel@tonic-gate 	}
1750Sstevel@tonic-gate 
1760Sstevel@tonic-gate 	/* cleanup, return error */
1770Sstevel@tonic-gate 	metafreenamelist(mirrornlp);
1780Sstevel@tonic-gate 	return (rval);
1790Sstevel@tonic-gate }
1800Sstevel@tonic-gate 
1810Sstevel@tonic-gate /*
1820Sstevel@tonic-gate  * resync all mirrors (in background)
1830Sstevel@tonic-gate  */
1840Sstevel@tonic-gate int
meta_mirror_resync_all(mdsetname_t * sp,daddr_t size,md_error_t * ep)1850Sstevel@tonic-gate meta_mirror_resync_all(
1860Sstevel@tonic-gate 	mdsetname_t	*sp,
1870Sstevel@tonic-gate 	daddr_t		size,
1880Sstevel@tonic-gate 	md_error_t	*ep
1890Sstevel@tonic-gate )
1900Sstevel@tonic-gate {
1910Sstevel@tonic-gate 	mm_unit_list_t	*mirrors[MD_PASS_MAX + 1];
1920Sstevel@tonic-gate 	mm_pass_num_t	pass, max_pass;
1930Sstevel@tonic-gate 	int		rval = 0, fval;
1940Sstevel@tonic-gate 
1950Sstevel@tonic-gate 	/* should have a set */
1960Sstevel@tonic-gate 	assert(sp != NULL);
1970Sstevel@tonic-gate 
1980Sstevel@tonic-gate 	/* get mirrors */
1990Sstevel@tonic-gate 	(void) memset(mirrors, 0, sizeof (mirrors));
2000Sstevel@tonic-gate 	if (setup_units(sp, mirrors, ep) != 0)
201*4492Spetede 		return (-1);
2020Sstevel@tonic-gate 
2030Sstevel@tonic-gate 	/* fork a process */
2040Sstevel@tonic-gate 	if ((fval = md_daemonize(sp, ep)) != 0) {
2050Sstevel@tonic-gate 		/*
2060Sstevel@tonic-gate 		 * md_daemonize will fork off a process.  The is the
2070Sstevel@tonic-gate 		 * parent or error.
2080Sstevel@tonic-gate 		 */
2090Sstevel@tonic-gate 		if (fval > 0) {
2100Sstevel@tonic-gate 			free_units(mirrors);
2110Sstevel@tonic-gate 			return (0);
2120Sstevel@tonic-gate 		}
2130Sstevel@tonic-gate 		mdclrerror(ep);
2140Sstevel@tonic-gate 	}
2150Sstevel@tonic-gate 	/*
2160Sstevel@tonic-gate 	 * Closing stdin/out/err here.
2170Sstevel@tonic-gate 	 * In case this was called thru rsh, the calling process on the other
2180Sstevel@tonic-gate 	 * side will know, it doesn't have to wait until all the resyncs have
2190Sstevel@tonic-gate 	 * finished.
2200Sstevel@tonic-gate 	 * Also initialise the rpc client pool so that this process will use
2210Sstevel@tonic-gate 	 * a unique pool of clients. If we don't do this, all of the forked
2220Sstevel@tonic-gate 	 * clients will end up using the same pool of clients which can result
2230Sstevel@tonic-gate 	 * in hung clients.
2240Sstevel@tonic-gate 	 */
2250Sstevel@tonic-gate 	if (meta_is_mn_set(sp, ep)) {
2260Sstevel@tonic-gate 		(void) close(0);
2270Sstevel@tonic-gate 		(void) close(1);
2280Sstevel@tonic-gate 		(void) close(2);
2290Sstevel@tonic-gate 		mdmn_clients = NULL;
2300Sstevel@tonic-gate 	}
2310Sstevel@tonic-gate 	assert((fval == 0) || (fval == -1));
2320Sstevel@tonic-gate 
2330Sstevel@tonic-gate 	/*
2340Sstevel@tonic-gate 	 * Determine which pass level is the highest that contains mirrors to
2350Sstevel@tonic-gate 	 * resync. We only need to wait for completion of earlier levels below
2360Sstevel@tonic-gate 	 * this high watermark. If all mirrors are at the same pass level
2370Sstevel@tonic-gate 	 * there is no requirement to wait for completion.
2380Sstevel@tonic-gate 	 */
2390Sstevel@tonic-gate 
2400Sstevel@tonic-gate 	max_pass = 1;
2410Sstevel@tonic-gate 	for (pass = MD_PASS_MAX; pass > 1; --pass) {
2420Sstevel@tonic-gate 		if (mirrors[pass] != NULL) {
2430Sstevel@tonic-gate 			max_pass = pass;
2440Sstevel@tonic-gate 			break;
2450Sstevel@tonic-gate 		}
2460Sstevel@tonic-gate 	}
2470Sstevel@tonic-gate 
2480Sstevel@tonic-gate 	/*
2490Sstevel@tonic-gate 	 * max_pass now contains the highest pass-level with resyncable mirrors
2500Sstevel@tonic-gate 	 */
2510Sstevel@tonic-gate 
2520Sstevel@tonic-gate 	/* do passes */
2530Sstevel@tonic-gate 	for (pass = 1; (pass <= MD_PASS_MAX); ++pass) {
2540Sstevel@tonic-gate 		int			dispatched = 0;
2550Sstevel@tonic-gate 		unsigned		howlong = 1;
2560Sstevel@tonic-gate 		mm_unit_list_t		*lp;
2570Sstevel@tonic-gate 
2580Sstevel@tonic-gate 		/* skip empty passes */
2590Sstevel@tonic-gate 		if (mirrors[pass] == NULL)
2600Sstevel@tonic-gate 			continue;
2610Sstevel@tonic-gate 
2620Sstevel@tonic-gate 		/* dispatch all resyncs in pass */
2630Sstevel@tonic-gate 		for (lp = mirrors[pass]; (lp != NULL); lp = lp->next) {
2640Sstevel@tonic-gate 			if (meta_is_mn_set(sp, ep)) {
2650Sstevel@tonic-gate 				if (meta_mn_send_setsync(sp, lp->namep,
2660Sstevel@tonic-gate 				    size, ep) != 0) {
2670Sstevel@tonic-gate 					rval = -1;
2680Sstevel@tonic-gate 					lp->done = 1;
2690Sstevel@tonic-gate 				} else {
2700Sstevel@tonic-gate 					++dispatched;
2710Sstevel@tonic-gate 				}
2720Sstevel@tonic-gate 			} else {
2730Sstevel@tonic-gate 				if (meta_mirror_resync(sp, lp->namep, size, ep,
2740Sstevel@tonic-gate 				    MD_RESYNC_START) != 0) {
2750Sstevel@tonic-gate 					rval = -1;
2760Sstevel@tonic-gate 					lp->done = 1;
2770Sstevel@tonic-gate 				} else {
2780Sstevel@tonic-gate 					++dispatched;
2790Sstevel@tonic-gate 				}
2800Sstevel@tonic-gate 			}
2810Sstevel@tonic-gate 		}
2820Sstevel@tonic-gate 
2830Sstevel@tonic-gate 		/*
2840Sstevel@tonic-gate 		 * Wait for them to finish iff we are at a level lower than
2850Sstevel@tonic-gate 		 * max_pass. This orders the resyncs into distinct levels.
2860Sstevel@tonic-gate 		 * I.e. level 2 resyncs won't start until all level 1 ones
2870Sstevel@tonic-gate 		 * have completed.
2880Sstevel@tonic-gate 		 */
2890Sstevel@tonic-gate 		if (pass == max_pass)
2900Sstevel@tonic-gate 			continue;
2910Sstevel@tonic-gate 
2920Sstevel@tonic-gate 		howlong = 1;
2930Sstevel@tonic-gate 		while (dispatched > 0) {
2940Sstevel@tonic-gate 
2950Sstevel@tonic-gate 			/* wait a while */
2960Sstevel@tonic-gate 			(void) sleep(howlong);
2970Sstevel@tonic-gate 
2980Sstevel@tonic-gate 			/* see if any finished */
2990Sstevel@tonic-gate 			for (lp = mirrors[pass]; lp != NULL; lp = lp->next) {
3000Sstevel@tonic-gate 				md_resync_ioctl_t	ri;
3010Sstevel@tonic-gate 
3020Sstevel@tonic-gate 				if (lp->done)
3030Sstevel@tonic-gate 					continue;
3040Sstevel@tonic-gate 
3050Sstevel@tonic-gate 				(void) memset(&ri, '\0', sizeof (ri));
3060Sstevel@tonic-gate 				ri.ri_mnum = meta_getminor(lp->namep->dev);
3070Sstevel@tonic-gate 				MD_SETDRIVERNAME(&ri, MD_MIRROR, sp->setno);
3080Sstevel@tonic-gate 				if (metaioctl(MD_IOCGETSYNC, &ri, &ri.mde,
3090Sstevel@tonic-gate 				    lp->namep->cname) != 0) {
3100Sstevel@tonic-gate 					(void) mdstealerror(ep, &ri.mde);
3110Sstevel@tonic-gate 					rval = -1;
3120Sstevel@tonic-gate 					lp->done = 1;
3130Sstevel@tonic-gate 					--dispatched;
3140Sstevel@tonic-gate 				} else if (! (ri.ri_flags & MD_RI_INPROGRESS)) {
3150Sstevel@tonic-gate 					lp->done = 1;
3160Sstevel@tonic-gate 					--dispatched;
3170Sstevel@tonic-gate 				}
3180Sstevel@tonic-gate 			}
3190Sstevel@tonic-gate 
3200Sstevel@tonic-gate 			/* wait a little longer next time */
3210Sstevel@tonic-gate 			if (howlong < 10)
3220Sstevel@tonic-gate 				++howlong;
3230Sstevel@tonic-gate 		}
3240Sstevel@tonic-gate 	}
3250Sstevel@tonic-gate 
3260Sstevel@tonic-gate 	/* cleanup, return success */
3270Sstevel@tonic-gate 	free_units(mirrors);
3280Sstevel@tonic-gate 	if (fval == 0)  /* we are the child process so exit */
3290Sstevel@tonic-gate 		exit(0);
3300Sstevel@tonic-gate 	return (rval);
3310Sstevel@tonic-gate }
3320Sstevel@tonic-gate 
3330Sstevel@tonic-gate /*
3340Sstevel@tonic-gate  * meta_mn_mirror_resync_all:
3350Sstevel@tonic-gate  * -------------------------
3360Sstevel@tonic-gate  * Resync all mirrors associated with given set (arg). Called when master
3370Sstevel@tonic-gate  * node is adding a node to a diskset.  Only want to initiate the resync on
3380Sstevel@tonic-gate  * the current node.
3390Sstevel@tonic-gate  */
3400Sstevel@tonic-gate void *
meta_mn_mirror_resync_all(void * arg)3410Sstevel@tonic-gate meta_mn_mirror_resync_all(void *arg)
3420Sstevel@tonic-gate {
3430Sstevel@tonic-gate 	set_t		setno = *((set_t *)arg);
3440Sstevel@tonic-gate 	mdsetname_t	*sp;
3450Sstevel@tonic-gate 	mm_unit_list_t	*mirrors[MD_PASS_MAX + 1];
3460Sstevel@tonic-gate 	mm_pass_num_t	pass, max_pass;
3470Sstevel@tonic-gate 	md_error_t	mde = mdnullerror;
3480Sstevel@tonic-gate 	int		fval;
3490Sstevel@tonic-gate 
3500Sstevel@tonic-gate 
3510Sstevel@tonic-gate 	/* should have a set */
3520Sstevel@tonic-gate 	assert(setno != NULL);
3530Sstevel@tonic-gate 
3540Sstevel@tonic-gate 	if ((sp = metasetnosetname(setno, &mde)) == NULL) {
3550Sstevel@tonic-gate 		mde_perror(&mde, "");
3560Sstevel@tonic-gate 		return (NULL);
3570Sstevel@tonic-gate 	}
3580Sstevel@tonic-gate 
3590Sstevel@tonic-gate 	if (!(meta_is_mn_set(sp, &mde))) {
3600Sstevel@tonic-gate 		mde_perror(&mde, "");
3610Sstevel@tonic-gate 		return (NULL);
3620Sstevel@tonic-gate 	}
3630Sstevel@tonic-gate 
3640Sstevel@tonic-gate 	/* fork a process */
3650Sstevel@tonic-gate 	if ((fval = md_daemonize(sp, &mde)) != 0) {
3660Sstevel@tonic-gate 		/*
3670Sstevel@tonic-gate 		 * md_daemonize will fork off a process.  The is the
3680Sstevel@tonic-gate 		 * parent or error.
3690Sstevel@tonic-gate 		 */
3700Sstevel@tonic-gate 		if (fval > 0) {
3710Sstevel@tonic-gate 			return (NULL);
3720Sstevel@tonic-gate 		}
3730Sstevel@tonic-gate 		mde_perror(&mde, "");
3740Sstevel@tonic-gate 		return (NULL);
3750Sstevel@tonic-gate 	}
3760Sstevel@tonic-gate 	/*
3770Sstevel@tonic-gate 	 * Child process should never return back to rpc.metad, but
3780Sstevel@tonic-gate 	 * should exit.
3790Sstevel@tonic-gate 	 * Flush all internally cached data inherited from parent process
3800Sstevel@tonic-gate 	 * since cached data will be cleared when parent process RPC request
3810Sstevel@tonic-gate 	 * has completed (which is possibly before this child process
3820Sstevel@tonic-gate 	 * can complete).
3830Sstevel@tonic-gate 	 * Child process can retrieve and cache its own copy of data from
3840Sstevel@tonic-gate 	 * rpc.metad that won't be changed by the parent process.
3850Sstevel@tonic-gate 	 *
3860Sstevel@tonic-gate 	 * Reset md_in_daemon since this child will be a client of rpc.metad
3870Sstevel@tonic-gate 	 * not part of the rpc.metad daemon itself.
3880Sstevel@tonic-gate 	 * md_in_daemon is used by rpc.metad so that libmeta can tell if
3890Sstevel@tonic-gate 	 * this thread is rpc.metad or any other thread.  (If this thread
3900Sstevel@tonic-gate 	 * was rpc.metad it could use some short circuit code to get data
3910Sstevel@tonic-gate 	 * directly from rpc.metad instead of doing an RPC call to rpc.metad).
3920Sstevel@tonic-gate 	 */
3930Sstevel@tonic-gate 	md_in_daemon = 0;
3940Sstevel@tonic-gate 	metaflushsetname(sp);
3950Sstevel@tonic-gate 	sr_cache_flush_setno(setno);
3960Sstevel@tonic-gate 	if ((sp = metasetnosetname(setno, &mde)) == NULL) {
3970Sstevel@tonic-gate 		mde_perror(&mde, "");
3980Sstevel@tonic-gate 		md_exit(sp, 1);
3990Sstevel@tonic-gate 	}
4000Sstevel@tonic-gate 
4010Sstevel@tonic-gate 	if (meta_lock(sp, TRUE, &mde) != 0) {
4020Sstevel@tonic-gate 		mde_perror(&mde, "");
4030Sstevel@tonic-gate 		md_exit(sp, 1);
4040Sstevel@tonic-gate 	}
4050Sstevel@tonic-gate 
4060Sstevel@tonic-gate 	/*
4070Sstevel@tonic-gate 	 * Closing stdin/out/err here.
4080Sstevel@tonic-gate 	 */
4090Sstevel@tonic-gate 	(void) close(0);
4100Sstevel@tonic-gate 	(void) close(1);
4110Sstevel@tonic-gate 	(void) close(2);
4120Sstevel@tonic-gate 	assert(fval == 0);
4130Sstevel@tonic-gate 
4140Sstevel@tonic-gate 	/* get mirrors */
4150Sstevel@tonic-gate 	(void) memset(mirrors, 0, sizeof (mirrors));
4160Sstevel@tonic-gate 	if (setup_units(sp, mirrors, &mde) != 0) {
4170Sstevel@tonic-gate 		(void) meta_unlock(sp, &mde);
4180Sstevel@tonic-gate 		md_exit(sp, 1);
4190Sstevel@tonic-gate 	}
4200Sstevel@tonic-gate 
4210Sstevel@tonic-gate 	/*
4220Sstevel@tonic-gate 	 * Determine which pass level is the highest that contains mirrors to
4230Sstevel@tonic-gate 	 * resync. We only need to wait for completion of earlier levels below
4240Sstevel@tonic-gate 	 * this high watermark. If all mirrors are at the same pass level
4250Sstevel@tonic-gate 	 * there is no requirement to wait for completion.
4260Sstevel@tonic-gate 	 */
4270Sstevel@tonic-gate 	max_pass = 1;
4280Sstevel@tonic-gate 	for (pass = MD_PASS_MAX; pass > 1; --pass) {
4290Sstevel@tonic-gate 		if (mirrors[pass] != NULL) {
4300Sstevel@tonic-gate 			max_pass = pass;
4310Sstevel@tonic-gate 			break;
4320Sstevel@tonic-gate 		}
4330Sstevel@tonic-gate 	}
4340Sstevel@tonic-gate 
4350Sstevel@tonic-gate 	/*
4360Sstevel@tonic-gate 	 * max_pass now contains the highest pass-level with resyncable mirrors
4370Sstevel@tonic-gate 	 */
4380Sstevel@tonic-gate 	/* do passes */
4390Sstevel@tonic-gate 	for (pass = 1; (pass <= MD_PASS_MAX); ++pass) {
4400Sstevel@tonic-gate 		int			dispatched = 0;
4410Sstevel@tonic-gate 		unsigned		howlong = 1;
4420Sstevel@tonic-gate 		mm_unit_list_t		*lp;
4430Sstevel@tonic-gate 
4440Sstevel@tonic-gate 		/* skip empty passes */
4450Sstevel@tonic-gate 		if (mirrors[pass] == NULL)
4460Sstevel@tonic-gate 			continue;
4470Sstevel@tonic-gate 
4480Sstevel@tonic-gate 		/* dispatch all resyncs in pass */
4490Sstevel@tonic-gate 		for (lp = mirrors[pass]; (lp != NULL); lp = lp->next) {
4500Sstevel@tonic-gate 			if (meta_mirror_resync(sp, lp->namep, 0, &mde,
4510Sstevel@tonic-gate 			    MD_RESYNC_FORCE_MNSTART) != 0) {
4520Sstevel@tonic-gate 				mdclrerror(&mde);
4530Sstevel@tonic-gate 				lp->done = 1;
4540Sstevel@tonic-gate 			} else {
4550Sstevel@tonic-gate 				++dispatched;
4560Sstevel@tonic-gate 			}
4570Sstevel@tonic-gate 		}
4580Sstevel@tonic-gate 
4590Sstevel@tonic-gate 		/*
4600Sstevel@tonic-gate 		 * Wait for them to finish iff we are at a level lower than
4610Sstevel@tonic-gate 		 * max_pass. This orders the resyncs into distinct levels.
4620Sstevel@tonic-gate 		 * I.e. level 2 resyncs won't start until all level 1 ones
4630Sstevel@tonic-gate 		 * have completed.
4640Sstevel@tonic-gate 		 */
4650Sstevel@tonic-gate 		if (pass == max_pass)
4660Sstevel@tonic-gate 			continue;
4670Sstevel@tonic-gate 
4680Sstevel@tonic-gate 		howlong = 1;
4690Sstevel@tonic-gate 		while (dispatched > 0) {
4700Sstevel@tonic-gate 
4710Sstevel@tonic-gate 			/* wait a while */
4720Sstevel@tonic-gate 			(void) sleep(howlong);
4730Sstevel@tonic-gate 
4740Sstevel@tonic-gate 			/* see if any finished */
4750Sstevel@tonic-gate 			for (lp = mirrors[pass]; lp != NULL; lp = lp->next) {
4760Sstevel@tonic-gate 				md_resync_ioctl_t	ri;
4770Sstevel@tonic-gate 
4780Sstevel@tonic-gate 				if (lp->done)
4790Sstevel@tonic-gate 					continue;
4800Sstevel@tonic-gate 
4810Sstevel@tonic-gate 				(void) memset(&ri, '\0', sizeof (ri));
4820Sstevel@tonic-gate 				ri.ri_mnum = meta_getminor(lp->namep->dev);
4830Sstevel@tonic-gate 				MD_SETDRIVERNAME(&ri, MD_MIRROR, sp->setno);
4840Sstevel@tonic-gate 				if (metaioctl(MD_IOCGETSYNC, &ri, &ri.mde,
4850Sstevel@tonic-gate 				    lp->namep->cname) != 0) {
4860Sstevel@tonic-gate 					mdclrerror(&mde);
4870Sstevel@tonic-gate 					lp->done = 1;
4880Sstevel@tonic-gate 					--dispatched;
4890Sstevel@tonic-gate 				} else if (! (ri.ri_flags & MD_RI_INPROGRESS)) {
4900Sstevel@tonic-gate 					lp->done = 1;
4910Sstevel@tonic-gate 					--dispatched;
4920Sstevel@tonic-gate 				}
4930Sstevel@tonic-gate 			}
4940Sstevel@tonic-gate 
4950Sstevel@tonic-gate 			/* wait a little longer next time */
4960Sstevel@tonic-gate 			if (howlong < 10)
4970Sstevel@tonic-gate 				++howlong;
4980Sstevel@tonic-gate 		}
4990Sstevel@tonic-gate 	}
5000Sstevel@tonic-gate 
5010Sstevel@tonic-gate 	/* cleanup, return success */
5020Sstevel@tonic-gate 	free_units(mirrors);
5030Sstevel@tonic-gate 	(void) meta_unlock(sp, &mde);
5040Sstevel@tonic-gate 	md_exit(sp, 0);
5050Sstevel@tonic-gate 	/*NOTREACHED*/
50662Sjeanm 	return (NULL);
5070Sstevel@tonic-gate }
5080Sstevel@tonic-gate 
5090Sstevel@tonic-gate /*
5100Sstevel@tonic-gate  * meta_mirror_resync_process:
5110Sstevel@tonic-gate  * --------------------------
5120Sstevel@tonic-gate  * Modify any resync that is in progress on this node for the given set.
5130Sstevel@tonic-gate  *
5140Sstevel@tonic-gate  * Input Parameters:
5150Sstevel@tonic-gate  *	sp	setname to scan for mirrors
5160Sstevel@tonic-gate  *	cmd	action to take:
5170Sstevel@tonic-gate  *		MD_RESYNC_KILL	- kill all resync threads
5180Sstevel@tonic-gate  *		MD_RESYNC_BLOCK	- block all resync threads
5190Sstevel@tonic-gate  *		MD_RESYNC_UNBLOCK - resume all resync threads
5200Sstevel@tonic-gate  * Output Parameters
5210Sstevel@tonic-gate  *	ep	error return structure
5220Sstevel@tonic-gate  *
5230Sstevel@tonic-gate  * meta_lock for this set should be held on entry.
5240Sstevel@tonic-gate  */
5250Sstevel@tonic-gate static void
meta_mirror_resync_process(mdsetname_t * sp,md_error_t * ep,md_resync_cmd_t cmd)5260Sstevel@tonic-gate meta_mirror_resync_process(mdsetname_t *sp, md_error_t *ep, md_resync_cmd_t cmd)
5270Sstevel@tonic-gate {
5280Sstevel@tonic-gate 	mm_unit_list_t	*mirrors[MD_PASS_MAX + 1];
5290Sstevel@tonic-gate 	mm_pass_num_t	pass;
5300Sstevel@tonic-gate 
5310Sstevel@tonic-gate 	/* Grab all the mirrors from the set (if any) */
5320Sstevel@tonic-gate 	(void) memset(mirrors, 0, sizeof (mirrors));
5330Sstevel@tonic-gate 	if (setup_units(sp, mirrors, ep) != 0)
5340Sstevel@tonic-gate 		return;
5350Sstevel@tonic-gate 
5360Sstevel@tonic-gate 	/* do passes */
5370Sstevel@tonic-gate 	for (pass = 1; (pass <= MD_PASS_MAX); ++pass) {
5380Sstevel@tonic-gate 		mm_unit_list_t		*lp;
5390Sstevel@tonic-gate 
5400Sstevel@tonic-gate 		/* skip empty passes */
5410Sstevel@tonic-gate 		if (mirrors[pass] == NULL)
5420Sstevel@tonic-gate 			continue;
5430Sstevel@tonic-gate 
5440Sstevel@tonic-gate 		/* Process all resyncs in pass */
5450Sstevel@tonic-gate 		for (lp = mirrors[pass]; (lp != NULL); lp = lp->next) {
5460Sstevel@tonic-gate 			(void) meta_mirror_resync(sp, lp->namep, 0, ep,
5470Sstevel@tonic-gate 			    cmd);
5480Sstevel@tonic-gate 		}
5490Sstevel@tonic-gate 	}
5500Sstevel@tonic-gate 
5510Sstevel@tonic-gate 	/* Clear up mirror units */
5520Sstevel@tonic-gate 	free_units(mirrors);
5530Sstevel@tonic-gate }
5540Sstevel@tonic-gate 
5550Sstevel@tonic-gate /*
5560Sstevel@tonic-gate  * meta_mirror_resync_process_all:
5570Sstevel@tonic-gate  * ------------------------------
5580Sstevel@tonic-gate  * Issue the given resync command to all mirrors contained in all multi-node
5590Sstevel@tonic-gate  * sets.
5600Sstevel@tonic-gate  *
5610Sstevel@tonic-gate  * Input Parameters:
5620Sstevel@tonic-gate  *	cmd	- MD_RESYNC_KILL, MD_RESYNC_BLOCK, MD_RESYNC_UNBLOCK
5630Sstevel@tonic-gate  */
5640Sstevel@tonic-gate static void
meta_mirror_resync_process_all(md_resync_cmd_t cmd)5650Sstevel@tonic-gate meta_mirror_resync_process_all(md_resync_cmd_t cmd)
5660Sstevel@tonic-gate {
5670Sstevel@tonic-gate 	set_t		setno, max_sets;
5680Sstevel@tonic-gate 	md_error_t	mde = mdnullerror;
5690Sstevel@tonic-gate 	mdsetname_t	*this_sp;
5700Sstevel@tonic-gate 	md_set_desc	*sd;
5710Sstevel@tonic-gate 
5720Sstevel@tonic-gate 	/*
5730Sstevel@tonic-gate 	 * Traverse all sets looking for multi-node capable ones.
5740Sstevel@tonic-gate 	 */
5750Sstevel@tonic-gate 	max_sets = get_max_sets(&mde);
5760Sstevel@tonic-gate 	for (setno = 1; setno < max_sets; setno++) {
5770Sstevel@tonic-gate 		mde = mdnullerror;
5780Sstevel@tonic-gate 		if (this_sp = metasetnosetname(setno, &mde)) {
5790Sstevel@tonic-gate 			if ((sd = metaget_setdesc(this_sp, &mde)) == NULL)
5800Sstevel@tonic-gate 				continue;
5810Sstevel@tonic-gate 			if (!MD_MNSET_DESC(sd))
5820Sstevel@tonic-gate 				continue;
5830Sstevel@tonic-gate 
5840Sstevel@tonic-gate 			if (meta_lock(this_sp, TRUE, &mde)) {
5850Sstevel@tonic-gate 				continue;
5860Sstevel@tonic-gate 			}
5870Sstevel@tonic-gate 			meta_mirror_resync_process(this_sp, &mde, cmd);
5880Sstevel@tonic-gate 			(void) meta_unlock(this_sp, &mde);
5890Sstevel@tonic-gate 		}
5900Sstevel@tonic-gate 	}
5910Sstevel@tonic-gate }
5920Sstevel@tonic-gate 
5930Sstevel@tonic-gate /*
5940Sstevel@tonic-gate  * meta_mirror_resync_kill_all:
5950Sstevel@tonic-gate  * ---------------------------
5960Sstevel@tonic-gate  * Abort any resync that is in progress on this node. Scan all sets for all
5970Sstevel@tonic-gate  * mirrors.
5980Sstevel@tonic-gate  * Note: this routine is provided for future use. For example to kill all
5990Sstevel@tonic-gate  *	 resyncs on a node this could be used as long as the
6000Sstevel@tonic-gate  *	 mddoors / rpc.mdcommd tuple is running on all members of the cluster.
6010Sstevel@tonic-gate  */
6020Sstevel@tonic-gate void
meta_mirror_resync_kill_all(void)6030Sstevel@tonic-gate meta_mirror_resync_kill_all(void)
6040Sstevel@tonic-gate {
6050Sstevel@tonic-gate 	meta_mirror_resync_process_all(MD_RESYNC_KILL);
6060Sstevel@tonic-gate }
6070Sstevel@tonic-gate 
6080Sstevel@tonic-gate /*
6090Sstevel@tonic-gate  * meta_mirror_resync_block_all:
6100Sstevel@tonic-gate  * ----------------------------
6110Sstevel@tonic-gate  * Block all resyncs that are in progress. This causes the resync state to
6120Sstevel@tonic-gate  * freeze on this machine, and can be resumed by calling
6130Sstevel@tonic-gate  * meta_mirror_resync_unblock_all.
6140Sstevel@tonic-gate  */
6150Sstevel@tonic-gate void
meta_mirror_resync_block_all(void)6160Sstevel@tonic-gate meta_mirror_resync_block_all(void)
6170Sstevel@tonic-gate {
6180Sstevel@tonic-gate 	meta_mirror_resync_process_all(MD_RESYNC_BLOCK);
6190Sstevel@tonic-gate }
6200Sstevel@tonic-gate 
6210Sstevel@tonic-gate /*
6220Sstevel@tonic-gate  * meta_mirror_resync_unblock_all:
6230Sstevel@tonic-gate  * ------------------------------
6240Sstevel@tonic-gate  * Unblock all previously blocked resync threads on this node.
6250Sstevel@tonic-gate  */
6260Sstevel@tonic-gate void
meta_mirror_resync_unblock_all(void)6270Sstevel@tonic-gate meta_mirror_resync_unblock_all(void)
6280Sstevel@tonic-gate {
6290Sstevel@tonic-gate 	meta_mirror_resync_process_all(MD_RESYNC_UNBLOCK);
6300Sstevel@tonic-gate }
6310Sstevel@tonic-gate 
6320Sstevel@tonic-gate /*
6330Sstevel@tonic-gate  * meta_mirror_resync_unblock:
6340Sstevel@tonic-gate  * --------------------------
6350Sstevel@tonic-gate  * Unblock any previously blocked resync threads for the given set.
6360Sstevel@tonic-gate  * meta_lock for this set should be held on entry.
6370Sstevel@tonic-gate  */
6380Sstevel@tonic-gate void
meta_mirror_resync_unblock(mdsetname_t * sp)6390Sstevel@tonic-gate meta_mirror_resync_unblock(mdsetname_t *sp)
6400Sstevel@tonic-gate {
6410Sstevel@tonic-gate 	md_error_t	mde = mdnullerror;
6420Sstevel@tonic-gate 
6430Sstevel@tonic-gate 	meta_mirror_resync_process(sp, &mde, MD_RESYNC_UNBLOCK);
6440Sstevel@tonic-gate }
6450Sstevel@tonic-gate 
6460Sstevel@tonic-gate /*
6470Sstevel@tonic-gate  * meta_mirror_resync_kill:
6480Sstevel@tonic-gate  * -----------------------
6490Sstevel@tonic-gate  * Kill any resync threads running on mirrors in the given set.
6500Sstevel@tonic-gate  * Called when releasing a set (meta_set_prv.c`halt_set)
6510Sstevel@tonic-gate  */
6520Sstevel@tonic-gate void
meta_mirror_resync_kill(mdsetname_t * sp)6530Sstevel@tonic-gate meta_mirror_resync_kill(mdsetname_t *sp)
6540Sstevel@tonic-gate {
6550Sstevel@tonic-gate 	md_error_t	mde = mdnullerror;
6560Sstevel@tonic-gate 
6570Sstevel@tonic-gate 	meta_mirror_resync_process(sp, &mde, MD_RESYNC_KILL);
6580Sstevel@tonic-gate }
659