xref: /onnv-gate/usr/src/uts/common/io/lvm/raid/raid_replay.c (revision 1623:7bac4a816ebe)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*1623Stw21770  * Common Development and Distribution License (the "License").
6*1623Stw21770  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
21*1623Stw21770 /*
22*1623Stw21770  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23*1623Stw21770  * Use is subject to license terms.
24*1623Stw21770  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
270Sstevel@tonic-gate 
280Sstevel@tonic-gate /*
290Sstevel@tonic-gate  * NAME:	raid_replay.c
300Sstevel@tonic-gate  *
310Sstevel@tonic-gate  * DESCRIPTION: RAID driver source file containing routines related to replay
320Sstevel@tonic-gate  *		operation.
330Sstevel@tonic-gate  *
340Sstevel@tonic-gate  * ROUTINES PROVIDED FOR EXTERNAL USE:
350Sstevel@tonic-gate  *		raid_replay() - replay all the pre write entries in the unit.
360Sstevel@tonic-gate  */
370Sstevel@tonic-gate 
380Sstevel@tonic-gate #include <sys/param.h>
390Sstevel@tonic-gate #include <sys/systm.h>
400Sstevel@tonic-gate #include <sys/conf.h>
410Sstevel@tonic-gate #include <sys/file.h>
420Sstevel@tonic-gate #include <sys/user.h>
430Sstevel@tonic-gate #include <sys/uio.h>
440Sstevel@tonic-gate #include <sys/t_lock.h>
450Sstevel@tonic-gate #include <sys/buf.h>
460Sstevel@tonic-gate #include <sys/dkio.h>
470Sstevel@tonic-gate #include <sys/vtoc.h>
480Sstevel@tonic-gate #include <sys/kmem.h>
490Sstevel@tonic-gate #include <vm/page.h>
500Sstevel@tonic-gate #include <sys/sysmacros.h>
510Sstevel@tonic-gate #include <sys/types.h>
520Sstevel@tonic-gate #include <sys/mkdev.h>
530Sstevel@tonic-gate #include <sys/stat.h>
540Sstevel@tonic-gate #include <sys/open.h>
550Sstevel@tonic-gate #include <sys/modctl.h>
560Sstevel@tonic-gate #include <sys/ddi.h>
570Sstevel@tonic-gate #include <sys/sunddi.h>
580Sstevel@tonic-gate 
590Sstevel@tonic-gate #include <sys/lvm/md_raid.h>
600Sstevel@tonic-gate 
610Sstevel@tonic-gate #include <sys/sysevent/eventdefs.h>
620Sstevel@tonic-gate #include <sys/sysevent/svm.h>
630Sstevel@tonic-gate 
640Sstevel@tonic-gate /* functions forward declarations */
650Sstevel@tonic-gate static int	raid_replay_error(mr_unit_t *un, int column);
660Sstevel@tonic-gate 
670Sstevel@tonic-gate int		raid_total_rply_entries = 0;
680Sstevel@tonic-gate 
690Sstevel@tonic-gate /*
700Sstevel@tonic-gate  * NAMES:	raid_rply_dealloc, raid_rply_alloc
710Sstevel@tonic-gate  * DESCRIPTION: RAID metadevice replay buffer allocation/deallocation routines
720Sstevel@tonic-gate  * PARAMETERS:	mr_unit_t *un - pointer to the unit structure
730Sstevel@tonic-gate  *		mr_unit_t *un - pointer to the unit structure
740Sstevel@tonic-gate  * RETURNS:
750Sstevel@tonic-gate  */
760Sstevel@tonic-gate static void
raid_rply_dealloc(mr_unit_t * un,raid_rplybuf_t ** bufs,raid_rplybuf_t * rwbuf1,raid_rplybuf_t * rwbuf2)770Sstevel@tonic-gate raid_rply_dealloc(mr_unit_t *un,
780Sstevel@tonic-gate 		raid_rplybuf_t **bufs,
790Sstevel@tonic-gate 		raid_rplybuf_t *rwbuf1,
800Sstevel@tonic-gate 		raid_rplybuf_t *rwbuf2)
810Sstevel@tonic-gate {
820Sstevel@tonic-gate 	int	i;
830Sstevel@tonic-gate 	raid_rplybuf_t *tmp;
840Sstevel@tonic-gate 
850Sstevel@tonic-gate 	for (i = 0, tmp = *bufs; i < un->un_totalcolumncnt; i++, tmp++) {
860Sstevel@tonic-gate 		if (tmp->rpl_data) {
870Sstevel@tonic-gate 			kmem_free(tmp->rpl_data, DEV_BSIZE);
880Sstevel@tonic-gate 			tmp->rpl_data = NULL;
890Sstevel@tonic-gate 		}
900Sstevel@tonic-gate 		if (tmp->rpl_buf) {
910Sstevel@tonic-gate 			kmem_free(tmp->rpl_buf, sizeof (buf_t));
920Sstevel@tonic-gate 			tmp->rpl_buf = NULL;
930Sstevel@tonic-gate 		}
940Sstevel@tonic-gate 	}
950Sstevel@tonic-gate 	kmem_free(*bufs, sizeof (raid_rplybuf_t) * un->un_totalcolumncnt);
960Sstevel@tonic-gate 	*bufs = NULL;
970Sstevel@tonic-gate 	if (rwbuf1->rpl_data) {
980Sstevel@tonic-gate 		kmem_free(rwbuf1->rpl_data, dbtob(un->un_iosize));
990Sstevel@tonic-gate 		rwbuf1->rpl_data = NULL;
1000Sstevel@tonic-gate 	}
1010Sstevel@tonic-gate 	if (rwbuf1->rpl_buf) {
1020Sstevel@tonic-gate 		kmem_free((caddr_t)rwbuf1->rpl_buf, sizeof (buf_t));
1030Sstevel@tonic-gate 		rwbuf1->rpl_buf = NULL;
1040Sstevel@tonic-gate 	}
1050Sstevel@tonic-gate 	if (rwbuf2->rpl_data) {
1060Sstevel@tonic-gate 		kmem_free(rwbuf2->rpl_data, dbtob(un->un_iosize));
1070Sstevel@tonic-gate 		rwbuf2->rpl_data = NULL;
1080Sstevel@tonic-gate 	}
1090Sstevel@tonic-gate 	if (rwbuf2->rpl_buf) {
1100Sstevel@tonic-gate 		kmem_free((caddr_t)rwbuf2->rpl_buf, sizeof (buf_t));
1110Sstevel@tonic-gate 		rwbuf2->rpl_buf = NULL;
1120Sstevel@tonic-gate 	}
1130Sstevel@tonic-gate }
1140Sstevel@tonic-gate 
1150Sstevel@tonic-gate static void
raid_rply_alloc(mr_unit_t * un,raid_rplybuf_t ** bufs,raid_rplybuf_t * rwbuf1,raid_rplybuf_t * rwbuf2)1160Sstevel@tonic-gate raid_rply_alloc(mr_unit_t *un,
1170Sstevel@tonic-gate 		raid_rplybuf_t **bufs,
1180Sstevel@tonic-gate 		raid_rplybuf_t *rwbuf1,
1190Sstevel@tonic-gate 		raid_rplybuf_t *rwbuf2)
1200Sstevel@tonic-gate {
1210Sstevel@tonic-gate 	int		i;
1220Sstevel@tonic-gate 	raid_rplybuf_t *tmp;
1230Sstevel@tonic-gate 	buf_t		*bp;
1240Sstevel@tonic-gate 
1250Sstevel@tonic-gate 	/* intialization */
1260Sstevel@tonic-gate 	*bufs = kmem_zalloc(sizeof (raid_rplybuf_t) * un->un_totalcolumncnt,
1270Sstevel@tonic-gate 	    KM_SLEEP);
1280Sstevel@tonic-gate 	ASSERT(*bufs != NULL);
1290Sstevel@tonic-gate 	bzero((caddr_t)rwbuf1, sizeof (raid_rplybuf_t));
1300Sstevel@tonic-gate 	bzero((caddr_t)rwbuf2, sizeof (raid_rplybuf_t));
1310Sstevel@tonic-gate 
1320Sstevel@tonic-gate 	/* allocate all the buffers required for the replay processing */
1330Sstevel@tonic-gate 	for (i = 0, tmp = *bufs; i < un->un_totalcolumncnt; i++, tmp++) {
1340Sstevel@tonic-gate 		tmp->rpl_data = kmem_zalloc(DEV_BSIZE, KM_SLEEP);
1350Sstevel@tonic-gate 		ASSERT(tmp->rpl_data != NULL);
1360Sstevel@tonic-gate 		tmp->rpl_buf = kmem_zalloc(sizeof (buf_t), KM_SLEEP);
1370Sstevel@tonic-gate 		ASSERT(tmp->rpl_buf != NULL);
1380Sstevel@tonic-gate 		bp = (buf_t *)tmp->rpl_buf;
1390Sstevel@tonic-gate 		bp->b_back = bp;
1400Sstevel@tonic-gate 		bp->b_forw = bp;
1410Sstevel@tonic-gate 		bp->b_flags = B_BUSY;
1420Sstevel@tonic-gate 		bp->b_offset = -1;
1430Sstevel@tonic-gate 		/* Initialize semaphores */
1440Sstevel@tonic-gate 		sema_init(&bp->b_io, 0, NULL,
1450Sstevel@tonic-gate 			SEMA_DEFAULT, NULL);
1460Sstevel@tonic-gate 		sema_init(&bp->b_sem, 0, NULL,
1470Sstevel@tonic-gate 			SEMA_DEFAULT, NULL);
1480Sstevel@tonic-gate 	}
1490Sstevel@tonic-gate 
1500Sstevel@tonic-gate 	rwbuf1->rpl_data = kmem_zalloc(dbtob(un->un_iosize), KM_SLEEP);
1510Sstevel@tonic-gate 	ASSERT(rwbuf1->rpl_data != NULL);
1520Sstevel@tonic-gate 	rwbuf1->rpl_buf = kmem_zalloc(sizeof (buf_t), KM_SLEEP);
1530Sstevel@tonic-gate 	ASSERT(rwbuf1->rpl_buf != NULL);
1540Sstevel@tonic-gate 	rwbuf2->rpl_data = kmem_zalloc(dbtob(un->un_iosize), KM_SLEEP);
1550Sstevel@tonic-gate 	ASSERT(rwbuf2->rpl_data != NULL);
1560Sstevel@tonic-gate 	rwbuf2->rpl_buf = kmem_zalloc(sizeof (buf_t), KM_SLEEP);
1570Sstevel@tonic-gate 	ASSERT(rwbuf2->rpl_buf != NULL);
1580Sstevel@tonic-gate 
1590Sstevel@tonic-gate 	bp = (buf_t *)rwbuf1->rpl_buf;
1600Sstevel@tonic-gate 	bp->b_back = bp;
1610Sstevel@tonic-gate 	bp->b_forw = bp;
1620Sstevel@tonic-gate 	bp->b_flags = B_BUSY;
1630Sstevel@tonic-gate 	bp->b_offset = -1;
1640Sstevel@tonic-gate 	/* Initialize semaphores */
1650Sstevel@tonic-gate 	sema_init(&bp->b_io, 0, NULL,
1660Sstevel@tonic-gate 		SEMA_DEFAULT, NULL);
1670Sstevel@tonic-gate 	sema_init(&bp->b_sem, 0, NULL,
1680Sstevel@tonic-gate 		SEMA_DEFAULT, NULL);
1690Sstevel@tonic-gate 	bp = (buf_t *)rwbuf2->rpl_buf;
1700Sstevel@tonic-gate 	bp->b_back = bp;
1710Sstevel@tonic-gate 	bp->b_forw = bp;
1720Sstevel@tonic-gate 	bp->b_flags = B_BUSY;
1730Sstevel@tonic-gate 	bp->b_offset = -1;
1740Sstevel@tonic-gate 	/* Initialize semaphores */
1750Sstevel@tonic-gate 	sema_init(&bp->b_io, 0, NULL,
1760Sstevel@tonic-gate 		SEMA_DEFAULT, NULL);
1770Sstevel@tonic-gate 	sema_init(&bp->b_sem, 0, NULL,
1780Sstevel@tonic-gate 		SEMA_DEFAULT, NULL);
1790Sstevel@tonic-gate }
1800Sstevel@tonic-gate 
1810Sstevel@tonic-gate /*
1820Sstevel@tonic-gate  * NAMES:	rpl_insert, rpl_delete, rpl_find
1830Sstevel@tonic-gate  * DESCRIPTION: RAID metadevice replay list processing APIs
1840Sstevel@tonic-gate  * PARAMETERS:	raid_rplylst_t *list - pointer to the replay list.
1850Sstevel@tonic-gate  *		raid_pwhdr_t   *pwptr - pointer to a pre-write header.
1860Sstevel@tonic-gate  * RETURNS:
1870Sstevel@tonic-gate  */
1880Sstevel@tonic-gate static void
rpl_insert(raid_rplylst_t ** listp,raid_rplylst_t * newp)1890Sstevel@tonic-gate rpl_insert(raid_rplylst_t **listp, raid_rplylst_t *newp)
1900Sstevel@tonic-gate {
1910Sstevel@tonic-gate 	raid_rplylst_t *tmp, **prevp;
1920Sstevel@tonic-gate 
1930Sstevel@tonic-gate 	for (prevp = listp; ((tmp = *prevp) != NULL); prevp = &tmp->rpl_next) {
1940Sstevel@tonic-gate 		if (tmp->rpl_id > newp->rpl_id) {
1950Sstevel@tonic-gate 			break;
1960Sstevel@tonic-gate 		}
1970Sstevel@tonic-gate 	}
1980Sstevel@tonic-gate 	newp->rpl_next = tmp;
1990Sstevel@tonic-gate 	*prevp = newp;
2000Sstevel@tonic-gate }
2010Sstevel@tonic-gate 
2020Sstevel@tonic-gate static void
rpl_delete(raid_rplylst_t ** prevp,raid_rplylst_t * oldp)2030Sstevel@tonic-gate rpl_delete(raid_rplylst_t **prevp, raid_rplylst_t *oldp)
2040Sstevel@tonic-gate {
2050Sstevel@tonic-gate 
2060Sstevel@tonic-gate 	ASSERT((caddr_t)oldp);
2070Sstevel@tonic-gate 	raid_total_rply_entries --;
2080Sstevel@tonic-gate 	*prevp = oldp->rpl_next;
2090Sstevel@tonic-gate 	kmem_free((caddr_t)oldp, sizeof (raid_rplylst_t));
2100Sstevel@tonic-gate }
2110Sstevel@tonic-gate 
2120Sstevel@tonic-gate static raid_rplylst_t *
rpl_find(raid_rplylst_t * list,long long pw_id)2130Sstevel@tonic-gate rpl_find(raid_rplylst_t *list, long long pw_id)
2140Sstevel@tonic-gate {
2150Sstevel@tonic-gate 	raid_rplylst_t *tmp;
2160Sstevel@tonic-gate 
2170Sstevel@tonic-gate 	for (tmp = list; tmp; tmp = tmp->rpl_next) {
2180Sstevel@tonic-gate 		if (pw_id == tmp->rpl_id) {
2190Sstevel@tonic-gate 			return (tmp);
2200Sstevel@tonic-gate 		}
2210Sstevel@tonic-gate 	}
2220Sstevel@tonic-gate 	return ((raid_rplylst_t *)NULL);
2230Sstevel@tonic-gate }
2240Sstevel@tonic-gate 
2250Sstevel@tonic-gate /*
2260Sstevel@tonic-gate  * NAMES:	enq_rplylst
2270Sstevel@tonic-gate  * DESCRIPTION: Enqueue a pre-write header into the replay list.
2280Sstevel@tonic-gate  * PARAMETERS:	raid_rplylst_t *list - pointer to the replay list.
2290Sstevel@tonic-gate  *		raid_pwhdr_t   *pwptr - pointer to a pre-write header.
2300Sstevel@tonic-gate  * RETURNS:
2310Sstevel@tonic-gate  */
2320Sstevel@tonic-gate static void
enq_rplylst(raid_rplylst_t ** listp,raid_pwhdr_t * pwhp,uint_t slot,int column)2330Sstevel@tonic-gate enq_rplylst(raid_rplylst_t **listp, raid_pwhdr_t *pwhp,
2340Sstevel@tonic-gate 		uint_t slot, int column)
2350Sstevel@tonic-gate {
2360Sstevel@tonic-gate 	raid_rplylst_t *newp, *oldp;
2370Sstevel@tonic-gate 
2380Sstevel@tonic-gate 	/* check if the pre-write existed in the list */
2390Sstevel@tonic-gate 	if ((pwhp->rpw_colcount <= 2) &&
2400Sstevel@tonic-gate 	    (oldp = rpl_find(*listp, pwhp->rpw_id))) {
2410Sstevel@tonic-gate 		bcopy((caddr_t)pwhp, (caddr_t)&oldp->rpl_pwhdr2,
2420Sstevel@tonic-gate 			sizeof (raid_pwhdr_t));
2430Sstevel@tonic-gate 		oldp->rpl_slot2   = slot;
2440Sstevel@tonic-gate 		oldp->rpl_column2 = column;
2450Sstevel@tonic-gate 	} else {
2460Sstevel@tonic-gate 		raid_total_rply_entries ++;
2470Sstevel@tonic-gate 		newp = (raid_rplylst_t *)kmem_zalloc(sizeof (raid_rplylst_t),
2480Sstevel@tonic-gate 		    KM_SLEEP);
2490Sstevel@tonic-gate 		ASSERT(newp != NULL);
2500Sstevel@tonic-gate 		bcopy((caddr_t)pwhp, (caddr_t)&newp->rpl_pwhdr1,
2510Sstevel@tonic-gate 			sizeof (raid_pwhdr_t));
2520Sstevel@tonic-gate 		bzero((caddr_t)&newp->rpl_pwhdr2, sizeof (raid_pwhdr_t));
2530Sstevel@tonic-gate 
2540Sstevel@tonic-gate 		newp->rpl_id = pwhp->rpw_id;
2550Sstevel@tonic-gate 		newp->rpl_column1 = column;
2560Sstevel@tonic-gate 		newp->rpl_slot1 = slot;
2570Sstevel@tonic-gate 		newp->rpl_next = (raid_rplylst_t *)NULL;
2580Sstevel@tonic-gate 		newp->rpl_colcnt = pwhp->rpw_colcount;
2590Sstevel@tonic-gate 		rpl_insert(listp, newp);
2600Sstevel@tonic-gate 	}
2610Sstevel@tonic-gate }
2620Sstevel@tonic-gate 
2630Sstevel@tonic-gate /*
2640Sstevel@tonic-gate  * NAMES:	pw_read_done and pw_write_done
2650Sstevel@tonic-gate  * DESCRIPTION: don't know the usage yet ??? (TBD)
2660Sstevel@tonic-gate  * PARAMETERS:
2670Sstevel@tonic-gate  * RETURNS:
2680Sstevel@tonic-gate  */
2690Sstevel@tonic-gate static int
pw_read_done(buf_t * bp)2700Sstevel@tonic-gate pw_read_done(buf_t *bp)
2710Sstevel@tonic-gate {
2720Sstevel@tonic-gate 	ASSERT(SEMA_HELD(&bp->b_sem));
2730Sstevel@tonic-gate 	ASSERT((bp->b_flags & B_DONE) == 0);
2740Sstevel@tonic-gate 
2750Sstevel@tonic-gate 	bp->b_flags |= B_DONE;
2760Sstevel@tonic-gate 
2770Sstevel@tonic-gate 	if (bp->b_flags & B_ASYNC)
2780Sstevel@tonic-gate 		sema_v(&bp->b_sem);
2790Sstevel@tonic-gate 	else
2800Sstevel@tonic-gate 		/* wakeup the thread waiting on this buf */
2810Sstevel@tonic-gate 		sema_v(&bp->b_io);
2820Sstevel@tonic-gate 	return (0);
2830Sstevel@tonic-gate }
2840Sstevel@tonic-gate 
2850Sstevel@tonic-gate static int
pw_write_done(buf_t * bp)2860Sstevel@tonic-gate pw_write_done(buf_t *bp)
2870Sstevel@tonic-gate {
2880Sstevel@tonic-gate 	ASSERT(SEMA_HELD(&bp->b_sem));
2890Sstevel@tonic-gate 	ASSERT((bp->b_flags & B_DONE) == 0);
2900Sstevel@tonic-gate 
2910Sstevel@tonic-gate 	bp->b_flags |= B_DONE;
2920Sstevel@tonic-gate 
2930Sstevel@tonic-gate 	if (bp->b_flags & B_ASYNC)
2940Sstevel@tonic-gate 		sema_v(&bp->b_sem);
2950Sstevel@tonic-gate 	else
2960Sstevel@tonic-gate 		/* wakeup the thread waiting on this buf */
2970Sstevel@tonic-gate 		sema_v(&bp->b_io);
2980Sstevel@tonic-gate 
2990Sstevel@tonic-gate 	return (0);
3000Sstevel@tonic-gate }
3010Sstevel@tonic-gate 
3020Sstevel@tonic-gate /*
3030Sstevel@tonic-gate  * NAMES:	raid_pwhdr_read
3040Sstevel@tonic-gate  * DESCRIPTION: issue a syncronous read to read a pre-write header
3050Sstevel@tonic-gate  * PARAMETERS:	mr_unit_t *un - pointer to the unit structure
3060Sstevel@tonic-gate  *		int	pw_slot - pre-write entry slot number
3070Sstevel@tonic-gate  *		int	column	- column number for the pre-write entry
3080Sstevel@tonic-gate  *		raid_rplybuf_t *bufp - pointer to the replay buffer structure
3090Sstevel@tonic-gate  * RETURNS:
3100Sstevel@tonic-gate  */
3110Sstevel@tonic-gate static void
raid_pwhdr_read(mr_unit_t * un,int pw_slot,int column,raid_rplybuf_t * bufp)3120Sstevel@tonic-gate raid_pwhdr_read(mr_unit_t *un, int pw_slot, int column, raid_rplybuf_t *bufp)
3130Sstevel@tonic-gate {
3140Sstevel@tonic-gate 	buf_t		*bp;
3150Sstevel@tonic-gate 
3160Sstevel@tonic-gate 	/* set up pointers from raid_rplybuf_t *bufp */
3170Sstevel@tonic-gate 	bp = (buf_t *)bufp->rpl_buf;
3180Sstevel@tonic-gate 
3190Sstevel@tonic-gate 	/* calculate the data address or block number */
3200Sstevel@tonic-gate 	bp->b_un.b_addr = bufp->rpl_data;
3210Sstevel@tonic-gate 	bp->b_lblkno = un->un_column[column].un_pwstart +
3220Sstevel@tonic-gate 		pw_slot * un->un_iosize;
3230Sstevel@tonic-gate 	bp->b_edev = md_dev64_to_dev(un->un_column[column].un_dev);
3240Sstevel@tonic-gate 	bp->b_bufsize = DEV_BSIZE;
3250Sstevel@tonic-gate 	bp->b_bcount = DEV_BSIZE;
3260Sstevel@tonic-gate 	bp->b_flags  = (B_READ | B_BUSY);
3270Sstevel@tonic-gate 	bp->b_iodone = pw_read_done;
3280Sstevel@tonic-gate 	(void) md_call_strategy(bp, 0, NULL);
3290Sstevel@tonic-gate }
3300Sstevel@tonic-gate 
3310Sstevel@tonic-gate /*
3320Sstevel@tonic-gate  * NAMES:	raid_pw_read
3330Sstevel@tonic-gate  * DESCRIPTION: issue a syncronous read to read a pre-write entry
3340Sstevel@tonic-gate  * PARAMETERS:	mr_unit_t	*un    - pointer to the unit structure
3350Sstevel@tonic-gate  *		int		column - column number for the pre-write entry
3360Sstevel@tonic-gate  *		u_int		slot   - pre-write entry slot number
3370Sstevel@tonic-gate  *		raid_rplybuf_t	*bufp  - pointer to the replay buffer structure
3380Sstevel@tonic-gate  * RETURNS:
3390Sstevel@tonic-gate  */
3400Sstevel@tonic-gate static int
raid_pw_read(mr_unit_t * un,int column,uint_t slot,raid_rplybuf_t * bufp)3410Sstevel@tonic-gate raid_pw_read(mr_unit_t *un, int column, uint_t slot, raid_rplybuf_t *bufp)
3420Sstevel@tonic-gate {
3430Sstevel@tonic-gate 	buf_t	*bp;
3440Sstevel@tonic-gate 	int	error;
3450Sstevel@tonic-gate 	uint_t	blkcnt  = un->un_iosize;
3460Sstevel@tonic-gate 	uint_t	bytecnt = blkcnt * DEV_BSIZE;
3470Sstevel@tonic-gate 
3480Sstevel@tonic-gate 	/* if this column is no longer accessible, return */
3490Sstevel@tonic-gate 	if (!COLUMN_ISUP(un, column))
3500Sstevel@tonic-gate 		return (RAID_RPLY_COMPREPLAY);
3510Sstevel@tonic-gate 
3520Sstevel@tonic-gate 	/* set up pointers from raid_rplybuf_t *bufp */
3530Sstevel@tonic-gate 	bp = (buf_t *)bufp->rpl_buf;
3540Sstevel@tonic-gate 
3550Sstevel@tonic-gate 	/* calculate the data address or block number */
3560Sstevel@tonic-gate 	bp->b_un.b_addr = bufp->rpl_data;
3570Sstevel@tonic-gate 	bp->b_bufsize = bytecnt;
3580Sstevel@tonic-gate 	bp->b_bcount = bytecnt;
3590Sstevel@tonic-gate 	bp->b_flags = (B_READ | B_BUSY);
3600Sstevel@tonic-gate 	bp->b_edev = md_dev64_to_dev(un->un_column[column].un_dev);
3610Sstevel@tonic-gate 	bp->b_lblkno = un->un_column[column].un_pwstart + (slot * blkcnt);
3620Sstevel@tonic-gate 	bp->b_iodone = pw_read_done;
3630Sstevel@tonic-gate 	(void) md_call_strategy(bp, 0, NULL);
3640Sstevel@tonic-gate 	if (biowait(bp)) {
3650Sstevel@tonic-gate 		error = raid_replay_error(un, column);
3660Sstevel@tonic-gate 		return (error);
3670Sstevel@tonic-gate 	}
3680Sstevel@tonic-gate 	return (0);
3690Sstevel@tonic-gate }
3700Sstevel@tonic-gate 
3710Sstevel@tonic-gate /*
3720Sstevel@tonic-gate  * NAMES:	raid_pw_write
3730Sstevel@tonic-gate  * DESCRIPTION: issue a syncronous write to write a pre-write entry
3740Sstevel@tonic-gate  * PARAMETERS:	mr_unit_t *un - pointer to the unit structure
3750Sstevel@tonic-gate  *		int	column	- column number for the pre-write entry
3760Sstevel@tonic-gate  *		raid_pwhdr_t   *pwhp - needed for some infos about the pw header
3770Sstevel@tonic-gate  *		raid_rplybuf_t *bufp - pointer to the replay buffer structure
3780Sstevel@tonic-gate  * RETURNS:
3790Sstevel@tonic-gate  */
3800Sstevel@tonic-gate static int
raid_pw_write(mr_unit_t * un,int column,raid_pwhdr_t * pwhp,raid_rplybuf_t * bufp)3810Sstevel@tonic-gate raid_pw_write(mr_unit_t *un, int column, raid_pwhdr_t *pwhp,
3820Sstevel@tonic-gate     raid_rplybuf_t *bufp)
3830Sstevel@tonic-gate {
3840Sstevel@tonic-gate 	buf_t	 *bp;
3850Sstevel@tonic-gate 	int	 error;
3860Sstevel@tonic-gate 
3870Sstevel@tonic-gate 	/* if this column is no longer accessible, return */
3880Sstevel@tonic-gate 	if (!COLUMN_ISUP(un, column))
3890Sstevel@tonic-gate 		return (RAID_RPLY_COMPREPLAY);
3900Sstevel@tonic-gate 
3910Sstevel@tonic-gate 	/* set up pointers from raid_rplybuf_t *bufp */
3920Sstevel@tonic-gate 	bp = (buf_t *)bufp->rpl_buf;
3930Sstevel@tonic-gate 
3940Sstevel@tonic-gate 	/* calculate the data address or block number */
3950Sstevel@tonic-gate 	bp->b_un.b_addr = bufp->rpl_data + DEV_BSIZE;
3960Sstevel@tonic-gate 	bp->b_bufsize = dbtob(pwhp->rpw_blkcnt);
3970Sstevel@tonic-gate 	bp->b_bcount = dbtob(pwhp->rpw_blkcnt);
3980Sstevel@tonic-gate 	bp->b_flags = (B_WRITE | B_BUSY);
3990Sstevel@tonic-gate 	bp->b_edev  = md_dev64_to_dev(un->un_column[column].un_dev);
4000Sstevel@tonic-gate 	bp->b_lblkno = un->un_column[column].un_devstart + pwhp->rpw_blkno;
4010Sstevel@tonic-gate 	bp->b_iodone = pw_write_done;
4020Sstevel@tonic-gate 	(void) md_call_strategy(bp, 0, NULL);
4030Sstevel@tonic-gate 	if (biowait(bp)) {
4040Sstevel@tonic-gate 		error = raid_replay_error(un, column);
4050Sstevel@tonic-gate 		return (error);
4060Sstevel@tonic-gate 	}
4070Sstevel@tonic-gate 	return (0);
4080Sstevel@tonic-gate }
4090Sstevel@tonic-gate 
4100Sstevel@tonic-gate /*
4110Sstevel@tonic-gate  * NAMES:	genchecksum
4120Sstevel@tonic-gate  * DESCRIPTION: generate check sum for a pre-write entry
4130Sstevel@tonic-gate  * PARAMETERS:	caddr_t addr - where the data bytes are
4140Sstevel@tonic-gate  *		int bcount - number of bytes in the pre-write entry
4150Sstevel@tonic-gate  * RETURNS:
4160Sstevel@tonic-gate  */
4170Sstevel@tonic-gate static uint_t
genchecksum(caddr_t addr,size_t bcount)4180Sstevel@tonic-gate genchecksum(caddr_t addr, size_t bcount)
4190Sstevel@tonic-gate {
4200Sstevel@tonic-gate 	uint_t *dbuf;
4210Sstevel@tonic-gate 	size_t wordcnt;
4220Sstevel@tonic-gate 	uint_t dsum = 0;
4230Sstevel@tonic-gate 
4240Sstevel@tonic-gate 	wordcnt = bcount / sizeof (uint_t);
4250Sstevel@tonic-gate 	dbuf = (uint_t *)(void *)(addr);
4260Sstevel@tonic-gate 
4270Sstevel@tonic-gate 	while (wordcnt--) {
4280Sstevel@tonic-gate 		dsum ^= *dbuf;
4290Sstevel@tonic-gate 		dbuf++;
4300Sstevel@tonic-gate 	}
4310Sstevel@tonic-gate 	return (dsum);
4320Sstevel@tonic-gate }
4330Sstevel@tonic-gate 
4340Sstevel@tonic-gate /*
4350Sstevel@tonic-gate  * NAMES:	raid_rply_verify
4360Sstevel@tonic-gate  * DESCRIPTION: verify the pre-write entry for replay
4370Sstevel@tonic-gate  * PARAMETERS:	mr_unit_t *un	- pointer to unit structure
4380Sstevel@tonic-gate  *		int col1	- column number 1
4390Sstevel@tonic-gate  *		int goodsum1	- flag to indicate good checksum
4400Sstevel@tonic-gate  *		int *do_1	- flag to indicate whether we should replay
4410Sstevel@tonic-gate  *				  the first pre-write
4420Sstevel@tonic-gate  *		int col2	- column number 2
4430Sstevel@tonic-gate  *		int goodsum2	- flag to indicate good checksum
4440Sstevel@tonic-gate  *		int *do_2	- flag to indicate whether we should replay
4450Sstevel@tonic-gate  *				  the first pre-write
4460Sstevel@tonic-gate  * RETURNS:
4470Sstevel@tonic-gate  */
4480Sstevel@tonic-gate static void
raid_rply_verify(mr_unit_t * un,int col1,int goodsum1,int * do_1,int col2,int goodsum2,int * do_2)4490Sstevel@tonic-gate raid_rply_verify(mr_unit_t *un, int col1, int goodsum1, int *do_1,
4500Sstevel@tonic-gate     int col2, int goodsum2, int *do_2)
4510Sstevel@tonic-gate {
4520Sstevel@tonic-gate 	int	good_state1 = 0;
4530Sstevel@tonic-gate 	int	good_state2 = 0;
4540Sstevel@tonic-gate 
4550Sstevel@tonic-gate 	*do_1 = 0; *do_2 = 0;		/* prepare for the worst */
4560Sstevel@tonic-gate 	if (COLUMN_ISUP(un, col1)) {
4570Sstevel@tonic-gate 		good_state1 = 1;
4580Sstevel@tonic-gate 	}
4590Sstevel@tonic-gate 	if (COLUMN_ISUP(un, col2)) {
4600Sstevel@tonic-gate 		good_state2 = 1;
4610Sstevel@tonic-gate 	}
4620Sstevel@tonic-gate 	if ((good_state1 & good_state2) && (goodsum1 & goodsum2)) {
4630Sstevel@tonic-gate 		/* if both columns check out, do it */
4640Sstevel@tonic-gate 		*do_1 = 1; *do_2 = 1;
4650Sstevel@tonic-gate 	} else if ((good_state1 & goodsum1) && !good_state2) {
4660Sstevel@tonic-gate 		/* if one column is okay and the other is errored, do it */
4670Sstevel@tonic-gate 		*do_1 = 1; *do_2 = 0;
4680Sstevel@tonic-gate 	} else if ((good_state2 & goodsum2) && !good_state1) {
4690Sstevel@tonic-gate 		/* if one column is okay and the other is errored, do it */
4700Sstevel@tonic-gate 		*do_2 = 1; *do_1 = 0;
4710Sstevel@tonic-gate 	}
4720Sstevel@tonic-gate }
4730Sstevel@tonic-gate 
4740Sstevel@tonic-gate /*
4750Sstevel@tonic-gate  * NAMES:	raid_rplyeach
4760Sstevel@tonic-gate  * DESCRIPTION: issue a syncronous read to read a pre-write header
4770Sstevel@tonic-gate  * PARAMETERS:	mr_unit_t *un - pointer to the unit structure
4780Sstevel@tonic-gate  *		raid_rplylst_t *eachp - pointer to the replay list entry
4790Sstevel@tonic-gate  *		raid_rplybuf_t *rwbuf1 - pointer to the replay buffer structure
4800Sstevel@tonic-gate  *		raid_rplybuf_t *rwbuf2 - pointer to the replay buffer structure
4810Sstevel@tonic-gate  * RETURNS:
4820Sstevel@tonic-gate  */
4830Sstevel@tonic-gate static int
raid_rplyeach(mr_unit_t * un,raid_rplylst_t * eachp,raid_rplybuf_t * rwbuf1,raid_rplybuf_t * rwbuf2)4840Sstevel@tonic-gate raid_rplyeach(
4850Sstevel@tonic-gate 	mr_unit_t	*un,
4860Sstevel@tonic-gate 	raid_rplylst_t	*eachp,
4870Sstevel@tonic-gate 	raid_rplybuf_t	*rwbuf1,
4880Sstevel@tonic-gate 	raid_rplybuf_t	*rwbuf2
4890Sstevel@tonic-gate )
4900Sstevel@tonic-gate {
4910Sstevel@tonic-gate 	raid_pwhdr_t	*pwhp1;
4920Sstevel@tonic-gate 	raid_pwhdr_t	*pwhp2;
4930Sstevel@tonic-gate 	uint_t		dsum1 = 0;
4940Sstevel@tonic-gate 	uint_t		dsum2 = 0;
4950Sstevel@tonic-gate 	int		good_pw1 = 0;
4960Sstevel@tonic-gate 	int		good_pw2 = 0;
4970Sstevel@tonic-gate 	int		do_1 = 0;
4980Sstevel@tonic-gate 	int		do_2 = 0;
4990Sstevel@tonic-gate 	int		error = 0;
5000Sstevel@tonic-gate 
5010Sstevel@tonic-gate 	/* First verify the normal case - two pre-write entries are all good */
5020Sstevel@tonic-gate 	if ((eachp->rpl_pwhdr1.rpw_magic == RAID_PWMAGIC &&
5030Sstevel@tonic-gate 	    eachp->rpl_pwhdr2.rpw_magic == RAID_PWMAGIC) &&
5040Sstevel@tonic-gate 	    (eachp->rpl_pwhdr1.rpw_blkcnt == eachp->rpl_pwhdr2.rpw_blkcnt)) {
5050Sstevel@tonic-gate 
5060Sstevel@tonic-gate 		ASSERT(eachp->rpl_pwhdr1.rpw_id == eachp->rpl_pwhdr2.rpw_id);
5070Sstevel@tonic-gate 
5080Sstevel@tonic-gate 		/* read the pre-write entries */
5090Sstevel@tonic-gate 		error = raid_pw_read(un, eachp->rpl_column1,
5100Sstevel@tonic-gate 		    eachp->rpl_slot1, rwbuf1);
5110Sstevel@tonic-gate 		pwhp1 = &eachp->rpl_pwhdr1;
5120Sstevel@tonic-gate 		if (error) {
5130Sstevel@tonic-gate 			if (error != RAID_RPLY_COMPREPLAY)
5140Sstevel@tonic-gate 				return (error);
5150Sstevel@tonic-gate 			good_pw1 = FALSE;
5160Sstevel@tonic-gate 		} else {
5170Sstevel@tonic-gate 			/* generate checksum for each pre-write entry */
5180Sstevel@tonic-gate 			dsum1 = genchecksum(rwbuf1->rpl_data + DEV_BSIZE,
5190Sstevel@tonic-gate 						dbtob(pwhp1->rpw_blkcnt));
5200Sstevel@tonic-gate 			good_pw1 = (dsum1 == pwhp1->rpw_sum);
5210Sstevel@tonic-gate 		}
5220Sstevel@tonic-gate 
5230Sstevel@tonic-gate 		error = raid_pw_read(un, eachp->rpl_column2, eachp->rpl_slot2,
5240Sstevel@tonic-gate 		    rwbuf2);
5250Sstevel@tonic-gate 		pwhp2 = &eachp->rpl_pwhdr2;
5260Sstevel@tonic-gate 		if (error) {
5270Sstevel@tonic-gate 			if (error != RAID_RPLY_COMPREPLAY)
5280Sstevel@tonic-gate 				return (error);
5290Sstevel@tonic-gate 			good_pw2 = FALSE;
5300Sstevel@tonic-gate 		} else {
5310Sstevel@tonic-gate 			/* generate checksum for pre-write entry */
5320Sstevel@tonic-gate 			dsum2 = genchecksum(rwbuf2->rpl_data + DEV_BSIZE,
5330Sstevel@tonic-gate 						dbtob(pwhp2->rpw_blkcnt));
5340Sstevel@tonic-gate 			good_pw2 = (dsum2 == pwhp2->rpw_sum);
5350Sstevel@tonic-gate 		}
5360Sstevel@tonic-gate 
5370Sstevel@tonic-gate 		/* verify the checksums and states */
5380Sstevel@tonic-gate 		raid_rply_verify(un, eachp->rpl_column1, good_pw1, &do_1,
5390Sstevel@tonic-gate 			eachp->rpl_column2, good_pw2, &do_2);
5400Sstevel@tonic-gate 
5410Sstevel@tonic-gate 		/* write (replay) the pre-write entries */
5420Sstevel@tonic-gate 		if (do_1) {
5430Sstevel@tonic-gate 			error = raid_pw_write(un, eachp->rpl_column1,
5440Sstevel@tonic-gate 			    &eachp->rpl_pwhdr1, rwbuf1);
5450Sstevel@tonic-gate 			if (error && (error != RAID_RPLY_COMPREPLAY)) {
5460Sstevel@tonic-gate 				return (error);
5470Sstevel@tonic-gate 			}
5480Sstevel@tonic-gate 		}
5490Sstevel@tonic-gate 		if (do_2) {
5500Sstevel@tonic-gate 			error = raid_pw_write(un, eachp->rpl_column2,
5510Sstevel@tonic-gate 			    &eachp->rpl_pwhdr2, rwbuf2);
5520Sstevel@tonic-gate 			if (error && (error != RAID_RPLY_COMPREPLAY)) {
5530Sstevel@tonic-gate 				return (error);
5540Sstevel@tonic-gate 			}
5550Sstevel@tonic-gate 		}
5560Sstevel@tonic-gate 		return (0);
5570Sstevel@tonic-gate 	}
5580Sstevel@tonic-gate 	if (eachp->rpl_pwhdr1.rpw_magic == RAID_PWMAGIC) {
5590Sstevel@tonic-gate 		/*
5600Sstevel@tonic-gate 		 * if partner was errored at time of write
5610Sstevel@tonic-gate 		 * or due to open or replay, replay this entry
5620Sstevel@tonic-gate 		 */
5630Sstevel@tonic-gate 		if ((eachp->rpl_pwhdr1.rpw_columnnum == -1) ||
5640Sstevel@tonic-gate 		    (! COLUMN_ISUP(un, eachp->rpl_pwhdr1.rpw_columnnum))) {
5650Sstevel@tonic-gate 			/* read the pre-write entry */
5660Sstevel@tonic-gate 			error = raid_pw_read(un, eachp->rpl_column1,
5670Sstevel@tonic-gate 			    eachp->rpl_slot1, rwbuf1);
5680Sstevel@tonic-gate 			if (error)
5690Sstevel@tonic-gate 				return (error);
5700Sstevel@tonic-gate 			/* generate checksum for the pre-write entry */
5710Sstevel@tonic-gate 			pwhp1 = &eachp->rpl_pwhdr1;
5720Sstevel@tonic-gate 			dsum1 = genchecksum(rwbuf1->rpl_data + DEV_BSIZE,
5730Sstevel@tonic-gate 						dbtob(pwhp1->rpw_blkcnt));
5740Sstevel@tonic-gate 			if (dsum1 == pwhp1->rpw_sum) {
5750Sstevel@tonic-gate 				error = raid_pw_write(un, eachp->rpl_column1,
5760Sstevel@tonic-gate 						&eachp->rpl_pwhdr1, rwbuf1);
5770Sstevel@tonic-gate 				if (error && (error != RAID_RPLY_COMPREPLAY)) {
5780Sstevel@tonic-gate 					return (error);
5790Sstevel@tonic-gate 				}
5800Sstevel@tonic-gate 			}
5810Sstevel@tonic-gate 		}
5820Sstevel@tonic-gate 		return (0);
5830Sstevel@tonic-gate 	}
5840Sstevel@tonic-gate 
5850Sstevel@tonic-gate 	return (0);
5860Sstevel@tonic-gate }
5870Sstevel@tonic-gate 
5880Sstevel@tonic-gate static int
replay_line(mr_unit_t * un,raid_rplylst_t * eachp,raid_rplybuf_t * rplybuf)5890Sstevel@tonic-gate replay_line(mr_unit_t *un, raid_rplylst_t *eachp, raid_rplybuf_t *rplybuf)
5900Sstevel@tonic-gate {
5910Sstevel@tonic-gate 	raid_pwhdr_t	*pwhdr1, *pwhdr2;
5920Sstevel@tonic-gate 	raid_rplylst_t	*eachpn;
5930Sstevel@tonic-gate 	int		i;
5940Sstevel@tonic-gate 	int		cnt;
5950Sstevel@tonic-gate 	diskaddr_t	blkno;
5960Sstevel@tonic-gate 	uint_t		blkcnt;
5970Sstevel@tonic-gate 	long long	id;
5980Sstevel@tonic-gate 	int		dsum;
5990Sstevel@tonic-gate 	int		error;
6000Sstevel@tonic-gate 	int		colcnt, col, col2;
6010Sstevel@tonic-gate 	int		down;
6020Sstevel@tonic-gate 
6030Sstevel@tonic-gate 	if (eachp->rpl_id == 0)
6040Sstevel@tonic-gate 		return (0);
6050Sstevel@tonic-gate 	/*
6060Sstevel@tonic-gate 	 * check: 1 - enough equal ids
6070Sstevel@tonic-gate 	 *	  2 - all have same columncnt
6080Sstevel@tonic-gate 	 *	  3 - all have same blkno
6090Sstevel@tonic-gate 	 *	  4 - all have same blkcnt
6100Sstevel@tonic-gate 	 *
6110Sstevel@tonic-gate 	 * read each and check the checksum
6120Sstevel@tonic-gate 	 * write each
6130Sstevel@tonic-gate 	 */
6140Sstevel@tonic-gate 
6150Sstevel@tonic-gate 	cnt = eachp->rpl_colcnt;
6160Sstevel@tonic-gate 	id = eachp->rpl_id;
6170Sstevel@tonic-gate 	pwhdr1 = &eachp->rpl_pwhdr1;
6180Sstevel@tonic-gate 	blkno = pwhdr1->rpw_blkno;
6190Sstevel@tonic-gate 	blkcnt = pwhdr1->rpw_blkcnt;
6200Sstevel@tonic-gate 
6210Sstevel@tonic-gate 	error = raid_pw_read(un, eachp->rpl_column1, eachp->rpl_slot1, rplybuf);
6220Sstevel@tonic-gate 	dsum = genchecksum(rplybuf->rpl_data + DEV_BSIZE,
6230Sstevel@tonic-gate 	    dbtob(pwhdr1->rpw_blkcnt));
6240Sstevel@tonic-gate 
6250Sstevel@tonic-gate 	if (dsum != pwhdr1->rpw_sum)
6260Sstevel@tonic-gate 		return (0);
6270Sstevel@tonic-gate 
6280Sstevel@tonic-gate 	if (error) {
6290Sstevel@tonic-gate 		if (error == RAID_RPLY_COMPREPLAY)
6300Sstevel@tonic-gate 			return (0);
6310Sstevel@tonic-gate 		else
6320Sstevel@tonic-gate 			return (1);
6330Sstevel@tonic-gate 	}
6340Sstevel@tonic-gate 
6350Sstevel@tonic-gate 	eachpn = eachp->rpl_next;
6360Sstevel@tonic-gate 	for (i = 1; i < cnt; i++) {
6370Sstevel@tonic-gate 		if (eachpn == NULL)
6380Sstevel@tonic-gate 			break;
6390Sstevel@tonic-gate 		col2 = eachpn->rpl_column1;
6400Sstevel@tonic-gate 		ASSERT(col2 < un->un_totalcolumncnt);
6410Sstevel@tonic-gate 		pwhdr2 = &eachpn->rpl_pwhdr1;
6420Sstevel@tonic-gate 		if ((pwhdr2->rpw_blkno != blkno) ||
6430Sstevel@tonic-gate 		    (pwhdr2->rpw_blkcnt != blkcnt) ||
6440Sstevel@tonic-gate 		    (eachpn->rpl_id != id) ||
6450Sstevel@tonic-gate 		    (pwhdr2->rpw_colcount != cnt)) {
6460Sstevel@tonic-gate 			return (0);
6470Sstevel@tonic-gate 		}
6480Sstevel@tonic-gate 
6490Sstevel@tonic-gate 		error = raid_pw_read(un, col2, eachpn->rpl_slot1, rplybuf);
6500Sstevel@tonic-gate 		dsum = genchecksum(rplybuf->rpl_data + DEV_BSIZE,
6510Sstevel@tonic-gate 		    dbtob(pwhdr2->rpw_blkcnt));
6520Sstevel@tonic-gate 		if (dsum != pwhdr2->rpw_sum)
6530Sstevel@tonic-gate 			return (0);
6540Sstevel@tonic-gate 		eachpn = eachpn->rpl_next;
6550Sstevel@tonic-gate 	}
6560Sstevel@tonic-gate 	colcnt = i;
6570Sstevel@tonic-gate 
6580Sstevel@tonic-gate 	if (error)
6590Sstevel@tonic-gate 		return (0);
6600Sstevel@tonic-gate 
6610Sstevel@tonic-gate 	down = raid_state_cnt(un, RCS_ERRED);
6620Sstevel@tonic-gate 	if ((i != un->un_totalcolumncnt) &&
6630Sstevel@tonic-gate 	    (i != (un->un_totalcolumncnt - down)))
6640Sstevel@tonic-gate 		return (0);
6650Sstevel@tonic-gate 
6660Sstevel@tonic-gate 	/* there ara enough columns to write correctly */
6670Sstevel@tonic-gate 	eachpn = eachp;
6680Sstevel@tonic-gate 	for (i = 0; i < colcnt; i++) {
6690Sstevel@tonic-gate 		col = eachpn->rpl_column1;
6700Sstevel@tonic-gate 		error = raid_pw_read(un, col, eachpn->rpl_slot1, rplybuf);
6710Sstevel@tonic-gate 		error = raid_pw_write(un, col, &eachpn->rpl_pwhdr1, rplybuf);
6720Sstevel@tonic-gate 		eachpn->rpl_id = 0;
6730Sstevel@tonic-gate 		if (error && (error != RAID_RPLY_COMPREPLAY))
6740Sstevel@tonic-gate 			return (1);
6750Sstevel@tonic-gate 		eachpn = eachpn->rpl_next;
6760Sstevel@tonic-gate 	}
6770Sstevel@tonic-gate 	return (0);
6780Sstevel@tonic-gate }
6790Sstevel@tonic-gate 
6800Sstevel@tonic-gate /*
6810Sstevel@tonic-gate  * NAMES:	raid_replay_error
6820Sstevel@tonic-gate  * DESCRIPTION: RAID metadevice replay error handling routine (TBD)
6830Sstevel@tonic-gate  * PARAMETERS:
6840Sstevel@tonic-gate  * RETURNS:
6850Sstevel@tonic-gate  */
6860Sstevel@tonic-gate static int
raid_replay_error(mr_unit_t * un,int column)6870Sstevel@tonic-gate raid_replay_error(mr_unit_t *un, int column)
6880Sstevel@tonic-gate {
6890Sstevel@tonic-gate 	int	error = RAID_RPLY_COMPREPLAY;
6900Sstevel@tonic-gate 
6910Sstevel@tonic-gate 	raid_set_state(un, column, RCS_ERRED, 0);
6920Sstevel@tonic-gate 	raid_commit(un, NULL);
6930Sstevel@tonic-gate 
6940Sstevel@tonic-gate 	if (UNIT_STATE(un) == RUS_LAST_ERRED) {
6950Sstevel@tonic-gate 		error = RAID_RPLY_READONLY;
6960Sstevel@tonic-gate 		SE_NOTIFY(EC_SVM_STATE, ESC_SVM_LASTERRED, SVM_TAG_METADEVICE,
6970Sstevel@tonic-gate 		    MD_UN2SET(un), MD_SID(un));
6980Sstevel@tonic-gate 	} else if (UNIT_STATE(un) == RUS_ERRED) {
6990Sstevel@tonic-gate 		SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ERRED, SVM_TAG_METADEVICE,
7000Sstevel@tonic-gate 		    MD_UN2SET(un), MD_SID(un));
7010Sstevel@tonic-gate 	}
7020Sstevel@tonic-gate 
7030Sstevel@tonic-gate 	return (error);
7040Sstevel@tonic-gate }
7050Sstevel@tonic-gate 
7060Sstevel@tonic-gate /*
7070Sstevel@tonic-gate  * NAMES:	raid_replay
7080Sstevel@tonic-gate  * DESCRIPTION: RAID metadevice main replay processing routine
7090Sstevel@tonic-gate  * PARAMETERS:	mr_unit_t *un - pointer to an unit structure
7100Sstevel@tonic-gate  * RETURNS:
7110Sstevel@tonic-gate  */
7120Sstevel@tonic-gate 
7130Sstevel@tonic-gate int
raid_replay(mr_unit_t * un)7140Sstevel@tonic-gate raid_replay(mr_unit_t *un)
7150Sstevel@tonic-gate {
7160Sstevel@tonic-gate 	raid_rplylst_t	*rplylst = NULL;
7170Sstevel@tonic-gate 	raid_rplylst_t	**prevp, *eachp;
7180Sstevel@tonic-gate 	raid_rplybuf_t	*rplybuf;
7190Sstevel@tonic-gate 	raid_rplybuf_t	rwbuf1;
7200Sstevel@tonic-gate 	raid_rplybuf_t	rwbuf2;
7210Sstevel@tonic-gate 	mr_column_t	*colptr;
7220Sstevel@tonic-gate 	raid_pwhdr_t	pwhdr;
7230Sstevel@tonic-gate 	raid_pwhdr_t	*pwhdrp = &pwhdr;
7240Sstevel@tonic-gate 	int		error = 0;
7250Sstevel@tonic-gate 	int		i, j;
7260Sstevel@tonic-gate 	diskaddr_t	max_blkno = un->un_segsize * un->un_segsincolumn;
7270Sstevel@tonic-gate 	int		totalcolumns = un->un_totalcolumncnt;
7280Sstevel@tonic-gate 
7290Sstevel@tonic-gate 	raid_rply_alloc(un, &rplybuf, &rwbuf1, &rwbuf2);
7300Sstevel@tonic-gate 
7310Sstevel@tonic-gate 	/* build a replay list based on the order of pre-write id */
7320Sstevel@tonic-gate 	for (i = 0; i < un->un_pwcnt; i++) {
7330Sstevel@tonic-gate 		/* issue a synchronous read for each column */
7340Sstevel@tonic-gate 		for (j = 0; j < un->un_totalcolumncnt; j++) {
7350Sstevel@tonic-gate 			if (COLUMN_ISUP(un, j)) {
7360Sstevel@tonic-gate 				raid_pwhdr_read(un, i, j, &rplybuf[j]);
7370Sstevel@tonic-gate 				/* wait for I/O completion for each column */
7380Sstevel@tonic-gate 				if (biowait((buf_t *)rplybuf[j].rpl_buf)) {
7390Sstevel@tonic-gate 					/* potential state transition */
7400Sstevel@tonic-gate 					error = raid_replay_error(un, j);
7410Sstevel@tonic-gate 					if (error == RAID_RPLY_COMPREPLAY)
7420Sstevel@tonic-gate 						continue;
7430Sstevel@tonic-gate 					else
7440Sstevel@tonic-gate 						goto replay_failed;
7450Sstevel@tonic-gate 				}
746*1623Stw21770 				if (un->c.un_revision & MD_64BIT_META_DEV) {
747*1623Stw21770 					pwhdrp = (raid_pwhdr_t *)
748*1623Stw21770 							rplybuf[j].rpl_data;
749*1623Stw21770 				} else {
7500Sstevel@tonic-gate 					RAID_CONVERT_RPW((raid_pwhdr32_od_t *)
7510Sstevel@tonic-gate 							rplybuf[j].rpl_data,
7520Sstevel@tonic-gate 							pwhdrp);
7530Sstevel@tonic-gate 				}
7540Sstevel@tonic-gate 
7550Sstevel@tonic-gate 				/* first check pre-write magic number */
7560Sstevel@tonic-gate 				if (pwhdrp->rpw_magic != RAID_PWMAGIC) {
7570Sstevel@tonic-gate 					continue;
7580Sstevel@tonic-gate 				}
7590Sstevel@tonic-gate 				if (pwhdrp->rpw_column != j) {
7600Sstevel@tonic-gate 					continue;
7610Sstevel@tonic-gate 				}
7620Sstevel@tonic-gate 				if (pwhdrp->rpw_id == (long long) 0) {
7630Sstevel@tonic-gate 					continue;
7640Sstevel@tonic-gate 				}
7650Sstevel@tonic-gate 				if (pwhdrp->rpw_blkcnt > (un->un_iosize - 1)) {
7660Sstevel@tonic-gate 					continue;
7670Sstevel@tonic-gate 				}
7680Sstevel@tonic-gate 				if (pwhdrp->rpw_blkcnt == 0) {
7690Sstevel@tonic-gate 					continue;
7700Sstevel@tonic-gate 				}
7710Sstevel@tonic-gate 				if (pwhdrp->rpw_blkno > max_blkno) {
7720Sstevel@tonic-gate 					continue;
7730Sstevel@tonic-gate 				}
7740Sstevel@tonic-gate 				if ((pwhdrp->rpw_columnnum < 0) ||
7750Sstevel@tonic-gate 				    (pwhdrp->rpw_columnnum > totalcolumns)) {
7760Sstevel@tonic-gate 					continue;
7770Sstevel@tonic-gate 				}
7780Sstevel@tonic-gate 				if (((pwhdrp->rpw_colcount != 1) &&
7790Sstevel@tonic-gate 				    (pwhdrp->rpw_colcount != 2) &&
7800Sstevel@tonic-gate 				    (pwhdrp->rpw_colcount != totalcolumns))) {
7810Sstevel@tonic-gate 					continue;
7820Sstevel@tonic-gate 				}
7830Sstevel@tonic-gate 
7840Sstevel@tonic-gate 				enq_rplylst(&rplylst, pwhdrp, i, j);
7850Sstevel@tonic-gate 			}
7860Sstevel@tonic-gate 		}
7870Sstevel@tonic-gate 	}
7880Sstevel@tonic-gate 
7890Sstevel@tonic-gate 	/* replay each entry in the replay list */
7900Sstevel@tonic-gate 	prevp = &rplylst;
7910Sstevel@tonic-gate 	while ((eachp = *prevp) != NULL) {
7920Sstevel@tonic-gate 		/* zero out the pre-write headers in the buffer */
7930Sstevel@tonic-gate 		bzero((caddr_t)rwbuf1.rpl_data, sizeof (raid_pwhdr_t));
7940Sstevel@tonic-gate 		bzero((caddr_t)rwbuf2.rpl_data, sizeof (raid_pwhdr_t));
7950Sstevel@tonic-gate 
7960Sstevel@tonic-gate 		if (eachp->rpl_colcnt <= 2)
7970Sstevel@tonic-gate 			error = raid_rplyeach(un, eachp, &rwbuf1, &rwbuf2);
7980Sstevel@tonic-gate 		else
7990Sstevel@tonic-gate 			error = replay_line(un, eachp, &rwbuf1);
8000Sstevel@tonic-gate 
8010Sstevel@tonic-gate 		if (error && (error != RAID_RPLY_COMPREPLAY)) {
8020Sstevel@tonic-gate 			goto replay_failed;
8030Sstevel@tonic-gate 		}
8040Sstevel@tonic-gate 
8050Sstevel@tonic-gate 		/* free the processed replay list entry */
8060Sstevel@tonic-gate 		rpl_delete(prevp, eachp);
8070Sstevel@tonic-gate 		prevp = &rplylst;
8080Sstevel@tonic-gate 	}
8090Sstevel@tonic-gate 
8100Sstevel@tonic-gate 	/* zero out all pre-write entries in this unit */
8110Sstevel@tonic-gate 	for (j = 0; j < un->un_totalcolumncnt; j++) {
8120Sstevel@tonic-gate 		if (COLUMN_ISUP(un, j)) {
8130Sstevel@tonic-gate 			colptr = &un->un_column[j];
8140Sstevel@tonic-gate 			if (init_pw_area(un, colptr->un_dev,
8150Sstevel@tonic-gate 						colptr->un_pwstart, j))
8160Sstevel@tonic-gate 				break;
8170Sstevel@tonic-gate 		}
8180Sstevel@tonic-gate 	}
8190Sstevel@tonic-gate 
8200Sstevel@tonic-gate 	/* deallocate all the buffer resource allocated in this routine */
8210Sstevel@tonic-gate 	raid_rply_dealloc(un, &rplybuf, &rwbuf1, &rwbuf2);
8220Sstevel@tonic-gate 
8230Sstevel@tonic-gate 	return (RAID_RPLY_SUCCESS);
8240Sstevel@tonic-gate 
8250Sstevel@tonic-gate replay_failed:
8260Sstevel@tonic-gate 
8270Sstevel@tonic-gate 	/* first release the list */
8280Sstevel@tonic-gate 	prevp = &rplylst;
8290Sstevel@tonic-gate 	while ((eachp = *prevp) != NULL) {
8300Sstevel@tonic-gate 		rpl_delete(prevp, eachp);
8310Sstevel@tonic-gate 		prevp = &rplylst;
8320Sstevel@tonic-gate 	}
8330Sstevel@tonic-gate 
8340Sstevel@tonic-gate 	/* then release buffers */
8350Sstevel@tonic-gate 	raid_rply_dealloc(un, &rplybuf, &rwbuf1, &rwbuf2);
8360Sstevel@tonic-gate 
8370Sstevel@tonic-gate 	/* also reset the pre-write id variable to one */
8380Sstevel@tonic-gate 	un->un_pwid = 1;
8390Sstevel@tonic-gate 	raid_total_rply_entries = 0;
8400Sstevel@tonic-gate 
8410Sstevel@tonic-gate 	return (error);
8420Sstevel@tonic-gate }
843