xref: /onnv-gate/usr/src/uts/common/avs/ns/rdc/rdc_dev.c (revision 9093:cd587b0bd19c)
17836SJohn.Forte@Sun.COM /*
27836SJohn.Forte@Sun.COM  * CDDL HEADER START
37836SJohn.Forte@Sun.COM  *
47836SJohn.Forte@Sun.COM  * The contents of this file are subject to the terms of the
57836SJohn.Forte@Sun.COM  * Common Development and Distribution License (the "License").
67836SJohn.Forte@Sun.COM  * You may not use this file except in compliance with the License.
77836SJohn.Forte@Sun.COM  *
87836SJohn.Forte@Sun.COM  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97836SJohn.Forte@Sun.COM  * or http://www.opensolaris.org/os/licensing.
107836SJohn.Forte@Sun.COM  * See the License for the specific language governing permissions
117836SJohn.Forte@Sun.COM  * and limitations under the License.
127836SJohn.Forte@Sun.COM  *
137836SJohn.Forte@Sun.COM  * When distributing Covered Code, include this CDDL HEADER in each
147836SJohn.Forte@Sun.COM  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157836SJohn.Forte@Sun.COM  * If applicable, add the following below this CDDL HEADER, with the
167836SJohn.Forte@Sun.COM  * fields enclosed by brackets "[]" replaced with your own identifying
177836SJohn.Forte@Sun.COM  * information: Portions Copyright [yyyy] [name of copyright owner]
187836SJohn.Forte@Sun.COM  *
197836SJohn.Forte@Sun.COM  * CDDL HEADER END
207836SJohn.Forte@Sun.COM  */
217836SJohn.Forte@Sun.COM /*
22*9093SRamana.Srikanth@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
237836SJohn.Forte@Sun.COM  * Use is subject to license terms.
247836SJohn.Forte@Sun.COM  */
257836SJohn.Forte@Sun.COM 
267836SJohn.Forte@Sun.COM 
277836SJohn.Forte@Sun.COM #include <sys/types.h>
287836SJohn.Forte@Sun.COM #include <sys/ksynch.h>
297836SJohn.Forte@Sun.COM #include <sys/kmem.h>
307836SJohn.Forte@Sun.COM #include <sys/errno.h>
317836SJohn.Forte@Sun.COM #include <sys/cmn_err.h>
327836SJohn.Forte@Sun.COM #include <sys/debug.h>
337836SJohn.Forte@Sun.COM #include <sys/cred.h>
347836SJohn.Forte@Sun.COM #include <sys/file.h>
357836SJohn.Forte@Sun.COM #include <sys/ddi.h>
367836SJohn.Forte@Sun.COM #include <sys/nsc_thread.h>
377836SJohn.Forte@Sun.COM #include <sys/unistat/spcs_s.h>
387836SJohn.Forte@Sun.COM #include <sys/unistat/spcs_errors.h>
397836SJohn.Forte@Sun.COM 
407836SJohn.Forte@Sun.COM #include <sys/unistat/spcs_s_k.h>
417836SJohn.Forte@Sun.COM #ifdef DS_DDICT
427836SJohn.Forte@Sun.COM #include "../contract.h"
437836SJohn.Forte@Sun.COM #endif
447836SJohn.Forte@Sun.COM 
457836SJohn.Forte@Sun.COM #include <sys/nsctl/nsctl.h>
467836SJohn.Forte@Sun.COM 
477836SJohn.Forte@Sun.COM #include <sys/sdt.h>		/* dtrace is S10 or later */
487836SJohn.Forte@Sun.COM 
497836SJohn.Forte@Sun.COM #include "rdc.h"
507836SJohn.Forte@Sun.COM #include "rdc_io.h"
517836SJohn.Forte@Sun.COM #include "rdc_bitmap.h"
527836SJohn.Forte@Sun.COM 
537836SJohn.Forte@Sun.COM /*
547836SJohn.Forte@Sun.COM  * Remote Dual Copy
557836SJohn.Forte@Sun.COM  *
567836SJohn.Forte@Sun.COM  * This file contains the nsctl io provider functionality for RDC.
577836SJohn.Forte@Sun.COM  *
587836SJohn.Forte@Sun.COM  * RDC is implemented as a simple filter module that pushes itself between
597836SJohn.Forte@Sun.COM  * user (SIMCKD, STE, etc.) and SDBC.
607836SJohn.Forte@Sun.COM  */
617836SJohn.Forte@Sun.COM 
627836SJohn.Forte@Sun.COM 
637836SJohn.Forte@Sun.COM static int _rdc_open_count;
647836SJohn.Forte@Sun.COM int	rdc_eio_nobmp = 0;
657836SJohn.Forte@Sun.COM 
667836SJohn.Forte@Sun.COM nsc_io_t *_rdc_io_hc;
677836SJohn.Forte@Sun.COM static nsc_io_t *_rdc_io_hr;
687836SJohn.Forte@Sun.COM static nsc_def_t _rdc_fd_def[], _rdc_io_def[], _rdc_ior_def[];
697836SJohn.Forte@Sun.COM 
707836SJohn.Forte@Sun.COM void _rdc_deinit_dev();
717836SJohn.Forte@Sun.COM int rdc_diskq_enqueue(rdc_k_info_t *, rdc_aio_t *);
727836SJohn.Forte@Sun.COM extern void rdc_unintercept_diskq(rdc_group_t *);
737836SJohn.Forte@Sun.COM rdc_aio_t *rdc_aio_tbuf_get(void *, void *, int, int, int, int, int);
747836SJohn.Forte@Sun.COM 
757836SJohn.Forte@Sun.COM static nsc_buf_t *_rdc_alloc_handle(void (*)(), void (*)(),
767836SJohn.Forte@Sun.COM     void (*)(), rdc_fd_t *);
777836SJohn.Forte@Sun.COM static int _rdc_free_handle(rdc_buf_t *, rdc_fd_t *);
787836SJohn.Forte@Sun.COM 
797836SJohn.Forte@Sun.COM #ifdef DEBUG
807836SJohn.Forte@Sun.COM int	rdc_overlap_cnt;
817836SJohn.Forte@Sun.COM int	rdc_overlap_hnd_cnt;
827836SJohn.Forte@Sun.COM #endif
837836SJohn.Forte@Sun.COM 
847836SJohn.Forte@Sun.COM static rdc_info_dev_t *rdc_devices;
857836SJohn.Forte@Sun.COM 
867836SJohn.Forte@Sun.COM extern int _rdc_rsrv_diskq(rdc_group_t *group);
877836SJohn.Forte@Sun.COM extern void _rdc_rlse_diskq(rdc_group_t *group);
887836SJohn.Forte@Sun.COM 
897836SJohn.Forte@Sun.COM /*
907836SJohn.Forte@Sun.COM  * _rdc_init_dev
917836SJohn.Forte@Sun.COM  *	Initialise the io provider.
927836SJohn.Forte@Sun.COM  */
937836SJohn.Forte@Sun.COM 
947836SJohn.Forte@Sun.COM int
_rdc_init_dev()957836SJohn.Forte@Sun.COM _rdc_init_dev()
967836SJohn.Forte@Sun.COM {
977836SJohn.Forte@Sun.COM 	_rdc_io_hc = nsc_register_io("rdc-high-cache",
98*9093SRamana.Srikanth@Sun.COM 	    NSC_RDCH_ID|NSC_REFCNT|NSC_FILTER, _rdc_io_def);
997836SJohn.Forte@Sun.COM 	if (_rdc_io_hc == NULL)
100*9093SRamana.Srikanth@Sun.COM 		cmn_err(CE_WARN, "!rdc: nsc_register_io (high, cache) failed.");
1017836SJohn.Forte@Sun.COM 
1027836SJohn.Forte@Sun.COM 	_rdc_io_hr = nsc_register_io("rdc-high-raw",
103*9093SRamana.Srikanth@Sun.COM 	    NSC_RDCHR_ID|NSC_REFCNT|NSC_FILTER, _rdc_ior_def);
1047836SJohn.Forte@Sun.COM 	if (_rdc_io_hr == NULL)
105*9093SRamana.Srikanth@Sun.COM 		cmn_err(CE_WARN, "!rdc: nsc_register_io (high, raw) failed.");
1067836SJohn.Forte@Sun.COM 
1077836SJohn.Forte@Sun.COM 	if (!_rdc_io_hc || !_rdc_io_hr) {
1087836SJohn.Forte@Sun.COM 		_rdc_deinit_dev();
1097836SJohn.Forte@Sun.COM 		return (ENOMEM);
1107836SJohn.Forte@Sun.COM 	}
1117836SJohn.Forte@Sun.COM 
1127836SJohn.Forte@Sun.COM 	return (0);
1137836SJohn.Forte@Sun.COM }
1147836SJohn.Forte@Sun.COM 
1157836SJohn.Forte@Sun.COM 
1167836SJohn.Forte@Sun.COM /*
1177836SJohn.Forte@Sun.COM  * _rdc_deinit_dev
1187836SJohn.Forte@Sun.COM  *	De-initialise the io provider.
1197836SJohn.Forte@Sun.COM  *
1207836SJohn.Forte@Sun.COM  */
1217836SJohn.Forte@Sun.COM 
1227836SJohn.Forte@Sun.COM void
_rdc_deinit_dev()1237836SJohn.Forte@Sun.COM _rdc_deinit_dev()
1247836SJohn.Forte@Sun.COM {
1257836SJohn.Forte@Sun.COM 	int rc;
1267836SJohn.Forte@Sun.COM 
1277836SJohn.Forte@Sun.COM 	if (_rdc_io_hc) {
1287836SJohn.Forte@Sun.COM 		if ((rc = nsc_unregister_io(_rdc_io_hc, 0)) != 0)
1297836SJohn.Forte@Sun.COM 			cmn_err(CE_WARN,
130*9093SRamana.Srikanth@Sun.COM 			    "!rdc: nsc_unregister_io (high, cache) failed: %d",
1317836SJohn.Forte@Sun.COM 			    rc);
1327836SJohn.Forte@Sun.COM 	}
1337836SJohn.Forte@Sun.COM 
1347836SJohn.Forte@Sun.COM 	if (_rdc_io_hr) {
1357836SJohn.Forte@Sun.COM 		if ((rc = nsc_unregister_io(_rdc_io_hr, 0)) != 0)
1367836SJohn.Forte@Sun.COM 			cmn_err(CE_WARN,
137*9093SRamana.Srikanth@Sun.COM 			    "!rdc: nsc_unregister_io (high, raw) failed: %d",
1387836SJohn.Forte@Sun.COM 			    rc);
1397836SJohn.Forte@Sun.COM 	}
1407836SJohn.Forte@Sun.COM }
1417836SJohn.Forte@Sun.COM 
1427836SJohn.Forte@Sun.COM 
1437836SJohn.Forte@Sun.COM /*
1447836SJohn.Forte@Sun.COM  * rdc_idev_open
1457836SJohn.Forte@Sun.COM  * - Open the nsctl file descriptors for the data devices.
1467836SJohn.Forte@Sun.COM  *
1477836SJohn.Forte@Sun.COM  * Must be called with rdc_conf_lock held.
1487836SJohn.Forte@Sun.COM  * id_sets is protected by rdc_conf_lock.
1497836SJohn.Forte@Sun.COM  */
1507836SJohn.Forte@Sun.COM static rdc_info_dev_t *
rdc_idev_open(rdc_k_info_t * krdc,char * pathname,int * rc)1517836SJohn.Forte@Sun.COM rdc_idev_open(rdc_k_info_t *krdc, char *pathname, int *rc)
1527836SJohn.Forte@Sun.COM {
1537836SJohn.Forte@Sun.COM 	rdc_info_dev_t *dp;
1547836SJohn.Forte@Sun.COM 
1557836SJohn.Forte@Sun.COM 	ASSERT(MUTEX_HELD(&rdc_conf_lock));
1567836SJohn.Forte@Sun.COM 
1577836SJohn.Forte@Sun.COM 	for (dp = rdc_devices; dp; dp = dp->id_next) {
1587836SJohn.Forte@Sun.COM 		if (dp->id_cache_dev.bi_fd &&
1597836SJohn.Forte@Sun.COM 		    strcmp(pathname, nsc_pathname(dp->id_cache_dev.bi_fd)) == 0)
1607836SJohn.Forte@Sun.COM 			break;
1617836SJohn.Forte@Sun.COM 	}
1627836SJohn.Forte@Sun.COM 
1637836SJohn.Forte@Sun.COM 	if (!dp) {
1647836SJohn.Forte@Sun.COM 		dp = kmem_zalloc(sizeof (*dp), KM_SLEEP);
1657836SJohn.Forte@Sun.COM 		if (!dp)
1667836SJohn.Forte@Sun.COM 			return (NULL);
1677836SJohn.Forte@Sun.COM 
1687836SJohn.Forte@Sun.COM 		dp->id_cache_dev.bi_krdc = krdc;
1697836SJohn.Forte@Sun.COM 		dp->id_cache_dev.bi_fd = nsc_open(pathname,
170*9093SRamana.Srikanth@Sun.COM 		    NSC_RDCHR_ID|NSC_RDWR|NSC_DEVICE,
171*9093SRamana.Srikanth@Sun.COM 		    _rdc_fd_def, (blind_t)&dp->id_cache_dev, rc);
1727836SJohn.Forte@Sun.COM 		if (!dp->id_cache_dev.bi_fd) {
1737836SJohn.Forte@Sun.COM 			kmem_free(dp, sizeof (*dp));
1747836SJohn.Forte@Sun.COM 			return (NULL);
1757836SJohn.Forte@Sun.COM 		}
1767836SJohn.Forte@Sun.COM 
1777836SJohn.Forte@Sun.COM 		dp->id_raw_dev.bi_krdc = krdc;
1787836SJohn.Forte@Sun.COM 		dp->id_raw_dev.bi_fd = nsc_open(pathname,
179*9093SRamana.Srikanth@Sun.COM 		    NSC_RDCHR_ID|NSC_RDWR|NSC_DEVICE,
180*9093SRamana.Srikanth@Sun.COM 		    _rdc_fd_def, (blind_t)&dp->id_raw_dev, rc);
1817836SJohn.Forte@Sun.COM 		if (!dp->id_raw_dev.bi_fd) {
1827836SJohn.Forte@Sun.COM 			(void) nsc_close(dp->id_cache_dev.bi_fd);
1837836SJohn.Forte@Sun.COM 			kmem_free(dp, sizeof (*dp));
1847836SJohn.Forte@Sun.COM 			return (NULL);
1857836SJohn.Forte@Sun.COM 		}
1867836SJohn.Forte@Sun.COM 
1877836SJohn.Forte@Sun.COM 		mutex_init(&dp->id_rlock, NULL, MUTEX_DRIVER, NULL);
1887836SJohn.Forte@Sun.COM 		cv_init(&dp->id_rcv, NULL, CV_DRIVER, NULL);
1897836SJohn.Forte@Sun.COM 
1907836SJohn.Forte@Sun.COM 		dp->id_next = rdc_devices;
1917836SJohn.Forte@Sun.COM 		rdc_devices = dp;
1927836SJohn.Forte@Sun.COM 	}
1937836SJohn.Forte@Sun.COM 
1947836SJohn.Forte@Sun.COM 	dp->id_sets++;
1957836SJohn.Forte@Sun.COM 	return (dp);
1967836SJohn.Forte@Sun.COM }
1977836SJohn.Forte@Sun.COM 
1987836SJohn.Forte@Sun.COM 
1997836SJohn.Forte@Sun.COM /*
2007836SJohn.Forte@Sun.COM  * rdc_idev_close
2017836SJohn.Forte@Sun.COM  * - Close the nsctl file descriptors for the data devices.
2027836SJohn.Forte@Sun.COM  *
2037836SJohn.Forte@Sun.COM  * Must be called with rdc_conf_lock and dp->id_rlock held.
2047836SJohn.Forte@Sun.COM  * Will release dp->id_rlock before returning.
2057836SJohn.Forte@Sun.COM  *
2067836SJohn.Forte@Sun.COM  * id_sets is protected by rdc_conf_lock.
2077836SJohn.Forte@Sun.COM  */
2087836SJohn.Forte@Sun.COM static void
rdc_idev_close(rdc_k_info_t * krdc,rdc_info_dev_t * dp)2097836SJohn.Forte@Sun.COM rdc_idev_close(rdc_k_info_t *krdc, rdc_info_dev_t *dp)
2107836SJohn.Forte@Sun.COM {
2117836SJohn.Forte@Sun.COM 	rdc_info_dev_t **dpp;
2127836SJohn.Forte@Sun.COM #ifdef DEBUG
2137836SJohn.Forte@Sun.COM 	int count = 0;
2147836SJohn.Forte@Sun.COM #endif
2157836SJohn.Forte@Sun.COM 
2167836SJohn.Forte@Sun.COM 	ASSERT(MUTEX_HELD(&rdc_conf_lock));
2177836SJohn.Forte@Sun.COM 	ASSERT(MUTEX_HELD(&dp->id_rlock));
2187836SJohn.Forte@Sun.COM 
2197836SJohn.Forte@Sun.COM 	dp->id_sets--;
2207836SJohn.Forte@Sun.COM 	if (dp->id_sets > 0) {
2217836SJohn.Forte@Sun.COM 		mutex_exit(&dp->id_rlock);
2227836SJohn.Forte@Sun.COM 		return;
2237836SJohn.Forte@Sun.COM 	}
2247836SJohn.Forte@Sun.COM 
2257836SJohn.Forte@Sun.COM 	/* external references must have gone */
2267836SJohn.Forte@Sun.COM 	ASSERT((krdc->c_ref + krdc->r_ref + krdc->b_ref) == 0);
2277836SJohn.Forte@Sun.COM 
2287836SJohn.Forte@Sun.COM 	/* unlink from chain */
2297836SJohn.Forte@Sun.COM 
2307836SJohn.Forte@Sun.COM 	for (dpp = &rdc_devices; *dpp; dpp = &((*dpp)->id_next)) {
2317836SJohn.Forte@Sun.COM 		if (*dpp == dp) {
2327836SJohn.Forte@Sun.COM 			/* unlink */
2337836SJohn.Forte@Sun.COM 			*dpp = dp->id_next;
2347836SJohn.Forte@Sun.COM 			break;
2357836SJohn.Forte@Sun.COM 		}
2367836SJohn.Forte@Sun.COM 	}
2377836SJohn.Forte@Sun.COM 
2387836SJohn.Forte@Sun.COM 	/*
2397836SJohn.Forte@Sun.COM 	 * Wait for all reserves to go away - the rpc server is
2407836SJohn.Forte@Sun.COM 	 * running asynchronously with this close, and so we
2417836SJohn.Forte@Sun.COM 	 * have to wait for it to spot that the krdc is !IS_ENABLED()
2427836SJohn.Forte@Sun.COM 	 * and throw away the nsc_buf_t's that it has allocated
2437836SJohn.Forte@Sun.COM 	 * and release the device.
2447836SJohn.Forte@Sun.COM 	 */
2457836SJohn.Forte@Sun.COM 
2467836SJohn.Forte@Sun.COM 	while (IS_CRSRV(krdc) || IS_RRSRV(krdc)) {
2477836SJohn.Forte@Sun.COM #ifdef DEBUG
2487836SJohn.Forte@Sun.COM 		if (!(++count % 16)) {
2497836SJohn.Forte@Sun.COM 			cmn_err(CE_NOTE,
250*9093SRamana.Srikanth@Sun.COM 			    "!_rdc_idev_close(%s): waiting for nsc_release",
251*9093SRamana.Srikanth@Sun.COM 			    rdc_u_info[krdc->index].primary.file);
2527836SJohn.Forte@Sun.COM 		}
2537836SJohn.Forte@Sun.COM 		if (count > (16*20)) {
2547836SJohn.Forte@Sun.COM 			/* waited for 20 seconds - too long - panic */
2557836SJohn.Forte@Sun.COM 			cmn_err(CE_PANIC,
256*9093SRamana.Srikanth@Sun.COM 			    "!_rdc_idev_close(%s, %p): lost nsc_release",
257*9093SRamana.Srikanth@Sun.COM 			    rdc_u_info[krdc->index].primary.file, (void *)krdc);
2587836SJohn.Forte@Sun.COM 		}
2597836SJohn.Forte@Sun.COM #endif
2607836SJohn.Forte@Sun.COM 		mutex_exit(&dp->id_rlock);
2617836SJohn.Forte@Sun.COM 		delay(HZ>>4);
2627836SJohn.Forte@Sun.COM 		mutex_enter(&dp->id_rlock);
2637836SJohn.Forte@Sun.COM 	}
2647836SJohn.Forte@Sun.COM 
2657836SJohn.Forte@Sun.COM 	if (dp->id_cache_dev.bi_fd) {
2667836SJohn.Forte@Sun.COM 		(void) nsc_close(dp->id_cache_dev.bi_fd);
2677836SJohn.Forte@Sun.COM 		dp->id_cache_dev.bi_fd = NULL;
2687836SJohn.Forte@Sun.COM 	}
2697836SJohn.Forte@Sun.COM 
2707836SJohn.Forte@Sun.COM 	if (dp->id_raw_dev.bi_fd) {
2717836SJohn.Forte@Sun.COM 		(void) nsc_close(dp->id_raw_dev.bi_fd);
2727836SJohn.Forte@Sun.COM 		dp->id_raw_dev.bi_fd = NULL;
2737836SJohn.Forte@Sun.COM 	}
2747836SJohn.Forte@Sun.COM 
2757836SJohn.Forte@Sun.COM 	mutex_exit(&dp->id_rlock);
2767836SJohn.Forte@Sun.COM 	mutex_destroy(&dp->id_rlock);
2777836SJohn.Forte@Sun.COM 	cv_destroy(&dp->id_rcv);
2787836SJohn.Forte@Sun.COM 
2797836SJohn.Forte@Sun.COM 	kmem_free(dp, sizeof (*dp));
2807836SJohn.Forte@Sun.COM }
2817836SJohn.Forte@Sun.COM 
2827836SJohn.Forte@Sun.COM 
2837836SJohn.Forte@Sun.COM /*
2847836SJohn.Forte@Sun.COM  * This function provokes an nsc_reserve() for the device which
2857836SJohn.Forte@Sun.COM  * if successful will populate krdc->maxfbas and urdc->volume_size
2867836SJohn.Forte@Sun.COM  * via the _rdc_attach_fd() callback.
2877836SJohn.Forte@Sun.COM  */
2887836SJohn.Forte@Sun.COM void
rdc_get_details(rdc_k_info_t * krdc)2897836SJohn.Forte@Sun.COM rdc_get_details(rdc_k_info_t *krdc)
2907836SJohn.Forte@Sun.COM {
2917836SJohn.Forte@Sun.COM 	int rc;
2927836SJohn.Forte@Sun.COM 	rdc_u_info_t *urdc = &rdc_u_info[krdc->index];
2937836SJohn.Forte@Sun.COM 	nsc_size_t vol_size, maxfbas;
2947836SJohn.Forte@Sun.COM 
2957836SJohn.Forte@Sun.COM 	if (_rdc_rsrv_devs(krdc, RDC_RAW, RDC_INTERNAL) == 0) {
2967836SJohn.Forte@Sun.COM 		/*
2977836SJohn.Forte@Sun.COM 		 * if the vol is already reserved,
2987836SJohn.Forte@Sun.COM 		 * volume_size won't be populated on enable because
2997836SJohn.Forte@Sun.COM 		 * it is a *fake* reserve and does not make it to
3007836SJohn.Forte@Sun.COM 		 * _rdc_attach_fd(). So do it here.
3017836SJohn.Forte@Sun.COM 		 */
3027836SJohn.Forte@Sun.COM 		rc = nsc_partsize(RDC_U_FD(krdc), &vol_size);
3037836SJohn.Forte@Sun.COM 		if (rc != 0) {
3047836SJohn.Forte@Sun.COM #ifdef DEBUG
3057836SJohn.Forte@Sun.COM 			cmn_err(CE_WARN,
306*9093SRamana.Srikanth@Sun.COM 			    "!rdc_get_details: partsize failed (%d)", rc);
3077836SJohn.Forte@Sun.COM #endif /* DEBUG */
3087836SJohn.Forte@Sun.COM 			urdc->volume_size = vol_size = 0;
3097836SJohn.Forte@Sun.COM 		}
3107836SJohn.Forte@Sun.COM 
3117836SJohn.Forte@Sun.COM 		urdc->volume_size = vol_size;
3127836SJohn.Forte@Sun.COM 		rc = nsc_maxfbas(RDC_U_FD(krdc), 0, &maxfbas);
3137836SJohn.Forte@Sun.COM 		if (rc != 0) {
3147836SJohn.Forte@Sun.COM #ifdef DEBUG
3157836SJohn.Forte@Sun.COM 			cmn_err(CE_WARN,
316*9093SRamana.Srikanth@Sun.COM 			    "!rdc_get_details: maxfbas failed (%d)", rc);
3177836SJohn.Forte@Sun.COM #endif /* DEBUG */
3187836SJohn.Forte@Sun.COM 			maxfbas = 0;
3197836SJohn.Forte@Sun.COM 		}
3207836SJohn.Forte@Sun.COM 		krdc->maxfbas = min(RDC_MAX_MAXFBAS, maxfbas);
3217836SJohn.Forte@Sun.COM 
3227836SJohn.Forte@Sun.COM 		_rdc_rlse_devs(krdc, RDC_RAW);
3237836SJohn.Forte@Sun.COM 	}
3247836SJohn.Forte@Sun.COM }
3257836SJohn.Forte@Sun.COM 
3267836SJohn.Forte@Sun.COM 
3277836SJohn.Forte@Sun.COM /*
3287836SJohn.Forte@Sun.COM  * Should only be used by the config code.
3297836SJohn.Forte@Sun.COM  */
3307836SJohn.Forte@Sun.COM 
3317836SJohn.Forte@Sun.COM int
rdc_dev_open(rdc_set_t * rdc_set,int options)3327836SJohn.Forte@Sun.COM rdc_dev_open(rdc_set_t *rdc_set, int options)
3337836SJohn.Forte@Sun.COM {
3347836SJohn.Forte@Sun.COM 	rdc_k_info_t *krdc;
3357836SJohn.Forte@Sun.COM 	int index;
3367836SJohn.Forte@Sun.COM 	int rc;
3377836SJohn.Forte@Sun.COM 	char *pathname;
3387836SJohn.Forte@Sun.COM 
3397836SJohn.Forte@Sun.COM 	ASSERT(MUTEX_HELD(&rdc_conf_lock));
3407836SJohn.Forte@Sun.COM 
3417836SJohn.Forte@Sun.COM 	if (options & RDC_OPT_PRIMARY)
3427836SJohn.Forte@Sun.COM 		pathname = rdc_set->primary.file;
3437836SJohn.Forte@Sun.COM 	else
3447836SJohn.Forte@Sun.COM 		pathname = rdc_set->secondary.file;
3457836SJohn.Forte@Sun.COM 
3467836SJohn.Forte@Sun.COM 	for (index = 0; index < rdc_max_sets; index++) {
3477836SJohn.Forte@Sun.COM 		krdc = &rdc_k_info[index];
3487836SJohn.Forte@Sun.COM 
3497836SJohn.Forte@Sun.COM 		if (!IS_CONFIGURED(krdc))
3507836SJohn.Forte@Sun.COM 			break;
3517836SJohn.Forte@Sun.COM 	}
3527836SJohn.Forte@Sun.COM 
3537836SJohn.Forte@Sun.COM 	if (index == rdc_max_sets) {
3547836SJohn.Forte@Sun.COM #ifdef DEBUG
355*9093SRamana.Srikanth@Sun.COM 		cmn_err(CE_WARN, "!rdc_dev_open: out of cd\'s");
3567836SJohn.Forte@Sun.COM #endif
3577836SJohn.Forte@Sun.COM 		index = -EINVAL;
3587836SJohn.Forte@Sun.COM 		goto out;
3597836SJohn.Forte@Sun.COM 	}
3607836SJohn.Forte@Sun.COM 
3617836SJohn.Forte@Sun.COM 	if (krdc->devices && (krdc->c_fd || krdc->r_fd)) {
3627836SJohn.Forte@Sun.COM #ifdef DEBUG
363*9093SRamana.Srikanth@Sun.COM 		cmn_err(CE_WARN, "!rdc_dev_open: %s already open", pathname);
3647836SJohn.Forte@Sun.COM #endif
3657836SJohn.Forte@Sun.COM 		index = -EINVAL;
3667836SJohn.Forte@Sun.COM 		goto out;
3677836SJohn.Forte@Sun.COM 	}
3687836SJohn.Forte@Sun.COM 
3697836SJohn.Forte@Sun.COM 	_rdc_open_count++;
3707836SJohn.Forte@Sun.COM 
3717836SJohn.Forte@Sun.COM 	krdc->devices = rdc_idev_open(krdc, pathname, &rc);
3727836SJohn.Forte@Sun.COM 	if (!krdc->devices) {
3737836SJohn.Forte@Sun.COM 		index = -rc;
3747836SJohn.Forte@Sun.COM 		goto open_fail;
3757836SJohn.Forte@Sun.COM 	}
3767836SJohn.Forte@Sun.COM 
3777836SJohn.Forte@Sun.COM 	/*
3787836SJohn.Forte@Sun.COM 	 * Grab the device size and maxfbas now.
3797836SJohn.Forte@Sun.COM 	 */
3807836SJohn.Forte@Sun.COM 
3817836SJohn.Forte@Sun.COM 	rdc_get_details(krdc);
3827836SJohn.Forte@Sun.COM 
3837836SJohn.Forte@Sun.COM out:
3847836SJohn.Forte@Sun.COM 	return (index);
3857836SJohn.Forte@Sun.COM 
3867836SJohn.Forte@Sun.COM open_fail:
3877836SJohn.Forte@Sun.COM 	_rdc_open_count--;
3887836SJohn.Forte@Sun.COM 
3897836SJohn.Forte@Sun.COM 	return (index);
3907836SJohn.Forte@Sun.COM }
3917836SJohn.Forte@Sun.COM 
3927836SJohn.Forte@Sun.COM 
3937836SJohn.Forte@Sun.COM void
rdc_dev_close(rdc_k_info_t * krdc)3947836SJohn.Forte@Sun.COM rdc_dev_close(rdc_k_info_t *krdc)
3957836SJohn.Forte@Sun.COM {
3967836SJohn.Forte@Sun.COM 	rdc_u_info_t *urdc = &rdc_u_info[krdc->index];
3977836SJohn.Forte@Sun.COM 
3987836SJohn.Forte@Sun.COM 	mutex_enter(&rdc_conf_lock);
3997836SJohn.Forte@Sun.COM 
4007836SJohn.Forte@Sun.COM 	if (krdc->devices)
4017836SJohn.Forte@Sun.COM 		mutex_enter(&krdc->devices->id_rlock);
4027836SJohn.Forte@Sun.COM 
4037836SJohn.Forte@Sun.COM #ifdef DEBUG
4047836SJohn.Forte@Sun.COM 	if (!krdc->devices || !krdc->c_fd || !krdc->r_fd) {
4057836SJohn.Forte@Sun.COM 		cmn_err(CE_WARN,
406*9093SRamana.Srikanth@Sun.COM 		    "!rdc_dev_close(%p): c_fd %p r_fd %p", (void *)krdc,
407*9093SRamana.Srikanth@Sun.COM 		    (void *) (krdc->devices ? krdc->c_fd : 0),
408*9093SRamana.Srikanth@Sun.COM 		    (void *) (krdc->devices ? krdc->r_fd : 0));
4097836SJohn.Forte@Sun.COM 	}
4107836SJohn.Forte@Sun.COM #endif
4117836SJohn.Forte@Sun.COM 
4127836SJohn.Forte@Sun.COM 	if (krdc->devices) {
4137836SJohn.Forte@Sun.COM 		/* rdc_idev_close will release id_rlock */
4147836SJohn.Forte@Sun.COM 		rdc_idev_close(krdc, krdc->devices);
4157836SJohn.Forte@Sun.COM 		krdc->devices = NULL;
4167836SJohn.Forte@Sun.COM 	}
4177836SJohn.Forte@Sun.COM 
4187836SJohn.Forte@Sun.COM 	urdc->primary.file[0] = '\0';
4197836SJohn.Forte@Sun.COM 
4207836SJohn.Forte@Sun.COM 	if (_rdc_open_count <= 0) {
421*9093SRamana.Srikanth@Sun.COM 		cmn_err(CE_WARN, "!rdc: _rdc_open_count corrupt: %d",
422*9093SRamana.Srikanth@Sun.COM 		    _rdc_open_count);
4237836SJohn.Forte@Sun.COM 	}
4247836SJohn.Forte@Sun.COM 
4257836SJohn.Forte@Sun.COM 	_rdc_open_count--;
4267836SJohn.Forte@Sun.COM 
4277836SJohn.Forte@Sun.COM 	mutex_exit(&rdc_conf_lock);
4287836SJohn.Forte@Sun.COM }
4297836SJohn.Forte@Sun.COM 
4307836SJohn.Forte@Sun.COM 
4317836SJohn.Forte@Sun.COM /*
4327836SJohn.Forte@Sun.COM  * rdc_intercept
4337836SJohn.Forte@Sun.COM  *
4347836SJohn.Forte@Sun.COM  * Register for IO on this device with nsctl.
4357836SJohn.Forte@Sun.COM  *
4367836SJohn.Forte@Sun.COM  * For a 1-to-many primary we register for each krdc and let nsctl sort
4377836SJohn.Forte@Sun.COM  * out which it wants to be using. This means that we cannot tell which
4387836SJohn.Forte@Sun.COM  * krdc will receive the incoming io from nsctl, though we do know that
4397836SJohn.Forte@Sun.COM  * at any one time only one krdc will be 'attached' and so get io from
4407836SJohn.Forte@Sun.COM  * nsctl.
4417836SJohn.Forte@Sun.COM  *
4427836SJohn.Forte@Sun.COM  * So the krdc->many_next pointer is maintained as a circular list. The
4437836SJohn.Forte@Sun.COM  * result of these multiple nsc_register_paths is that we will see a
4447836SJohn.Forte@Sun.COM  * few more attach and detach io provider calls during enable/resume
4457836SJohn.Forte@Sun.COM  * and disable/suspend of the 1-to-many whilst nsctl settles down to
4467836SJohn.Forte@Sun.COM  * using a single krdc.
4477836SJohn.Forte@Sun.COM  *
4487836SJohn.Forte@Sun.COM  * The major advantage of this scheme is that nsctl sorts out all the
4497836SJohn.Forte@Sun.COM  * rdc_fd_t's so that they can only point to krdc's that are currently
4507836SJohn.Forte@Sun.COM  * active.
4517836SJohn.Forte@Sun.COM  */
4527836SJohn.Forte@Sun.COM int
rdc_intercept(rdc_k_info_t * krdc)4537836SJohn.Forte@Sun.COM rdc_intercept(rdc_k_info_t *krdc)
4547836SJohn.Forte@Sun.COM {
4557836SJohn.Forte@Sun.COM 	rdc_u_info_t *urdc = &rdc_u_info[krdc->index];
4567836SJohn.Forte@Sun.COM 	char *pathname;
4577836SJohn.Forte@Sun.COM 	char *bitmap;
4587836SJohn.Forte@Sun.COM 
4597836SJohn.Forte@Sun.COM 	if (rdc_get_vflags(urdc) & RDC_PRIMARY) {
4607836SJohn.Forte@Sun.COM 		pathname = urdc->primary.file;
4617836SJohn.Forte@Sun.COM 		bitmap = urdc->primary.bitmap;
4627836SJohn.Forte@Sun.COM 	} else {
4637836SJohn.Forte@Sun.COM 		pathname = urdc->secondary.file;
4647836SJohn.Forte@Sun.COM 		bitmap = urdc->secondary.bitmap;
4657836SJohn.Forte@Sun.COM 	}
4667836SJohn.Forte@Sun.COM 
4677836SJohn.Forte@Sun.COM 	if (!krdc->b_tok)
4687836SJohn.Forte@Sun.COM 		krdc->b_tok = nsc_register_path(bitmap, NSC_CACHE | NSC_DEVICE,
4697836SJohn.Forte@Sun.COM 		    _rdc_io_hc);
4707836SJohn.Forte@Sun.COM 
4717836SJohn.Forte@Sun.COM 	if (!krdc->c_tok)
4727836SJohn.Forte@Sun.COM 		krdc->c_tok = nsc_register_path(pathname, NSC_CACHE,
4737836SJohn.Forte@Sun.COM 		    _rdc_io_hc);
4747836SJohn.Forte@Sun.COM 
4757836SJohn.Forte@Sun.COM 	if (!krdc->r_tok)
4767836SJohn.Forte@Sun.COM 		krdc->r_tok = nsc_register_path(pathname, NSC_DEVICE,
4777836SJohn.Forte@Sun.COM 		    _rdc_io_hr);
4787836SJohn.Forte@Sun.COM 
4797836SJohn.Forte@Sun.COM 	if (!krdc->c_tok || !krdc->r_tok) {
4807836SJohn.Forte@Sun.COM 		(void) rdc_unintercept(krdc);
4817836SJohn.Forte@Sun.COM 		return (ENXIO);
4827836SJohn.Forte@Sun.COM 	}
4837836SJohn.Forte@Sun.COM 
4847836SJohn.Forte@Sun.COM 	return (0);
4857836SJohn.Forte@Sun.COM }
4867836SJohn.Forte@Sun.COM 
4877836SJohn.Forte@Sun.COM 
4887836SJohn.Forte@Sun.COM static void
wait_unregistering(rdc_k_info_t * krdc)4897836SJohn.Forte@Sun.COM wait_unregistering(rdc_k_info_t *krdc)
4907836SJohn.Forte@Sun.COM {
4917836SJohn.Forte@Sun.COM 	while (krdc->group->unregistering > 0)
4927836SJohn.Forte@Sun.COM 		(void) cv_wait_sig(&krdc->group->unregistercv, &rdc_conf_lock);
4937836SJohn.Forte@Sun.COM }
4947836SJohn.Forte@Sun.COM 
4957836SJohn.Forte@Sun.COM static void
set_unregistering(rdc_k_info_t * krdc)4967836SJohn.Forte@Sun.COM set_unregistering(rdc_k_info_t *krdc)
4977836SJohn.Forte@Sun.COM {
4987836SJohn.Forte@Sun.COM 	wait_unregistering(krdc);
4997836SJohn.Forte@Sun.COM 
5007836SJohn.Forte@Sun.COM 	krdc->group->unregistering++;
5017836SJohn.Forte@Sun.COM }
5027836SJohn.Forte@Sun.COM 
5037836SJohn.Forte@Sun.COM static void
wakeup_unregistering(rdc_k_info_t * krdc)5047836SJohn.Forte@Sun.COM wakeup_unregistering(rdc_k_info_t *krdc)
5057836SJohn.Forte@Sun.COM {
5067836SJohn.Forte@Sun.COM 	if (krdc->group->unregistering <= 0)
5077836SJohn.Forte@Sun.COM 		return;
5087836SJohn.Forte@Sun.COM 
5097836SJohn.Forte@Sun.COM 	krdc->group->unregistering--;
5107836SJohn.Forte@Sun.COM 	cv_broadcast(&krdc->group->unregistercv);
5117836SJohn.Forte@Sun.COM }
5127836SJohn.Forte@Sun.COM 
5137836SJohn.Forte@Sun.COM 
5147836SJohn.Forte@Sun.COM /*
5157836SJohn.Forte@Sun.COM  * rdc_unintercept
5167836SJohn.Forte@Sun.COM  *
5177836SJohn.Forte@Sun.COM  * Unregister for IO on this device.
5187836SJohn.Forte@Sun.COM  *
5197836SJohn.Forte@Sun.COM  * See comments above rdc_intercept.
5207836SJohn.Forte@Sun.COM  */
5217836SJohn.Forte@Sun.COM int
rdc_unintercept(rdc_k_info_t * krdc)5227836SJohn.Forte@Sun.COM rdc_unintercept(rdc_k_info_t *krdc)
5237836SJohn.Forte@Sun.COM {
5247836SJohn.Forte@Sun.COM 	int err = 0;
5257836SJohn.Forte@Sun.COM 	int rc;
5267836SJohn.Forte@Sun.COM 	rdc_u_info_t *urdc = &rdc_u_info[krdc->index];
5277836SJohn.Forte@Sun.COM 
5287836SJohn.Forte@Sun.COM 	mutex_enter(&rdc_conf_lock);
5297836SJohn.Forte@Sun.COM 	set_unregistering(krdc);
5307836SJohn.Forte@Sun.COM 	krdc->type_flag |= RDC_UNREGISTER;
5317836SJohn.Forte@Sun.COM 	mutex_exit(&rdc_conf_lock);
5327836SJohn.Forte@Sun.COM 
5337836SJohn.Forte@Sun.COM 	if (krdc->r_tok) {
5347836SJohn.Forte@Sun.COM 		rc = nsc_unregister_path(krdc->r_tok, 0);
5357836SJohn.Forte@Sun.COM 		if (rc) {
536*9093SRamana.Srikanth@Sun.COM 			cmn_err(CE_WARN, "!rdc: unregister rawfd %d", rc);
5377836SJohn.Forte@Sun.COM 			err = rc;
5387836SJohn.Forte@Sun.COM 		}
5397836SJohn.Forte@Sun.COM 		krdc->r_tok = NULL;
5407836SJohn.Forte@Sun.COM 	}
5417836SJohn.Forte@Sun.COM 
5427836SJohn.Forte@Sun.COM 	if (krdc->c_tok) {
5437836SJohn.Forte@Sun.COM 		rc = nsc_unregister_path(krdc->c_tok, 0);
5447836SJohn.Forte@Sun.COM 		if (rc) {
545*9093SRamana.Srikanth@Sun.COM 			cmn_err(CE_WARN, "!rdc: unregister cachefd %d", rc);
5467836SJohn.Forte@Sun.COM 			if (!err)
5477836SJohn.Forte@Sun.COM 				err = rc;
5487836SJohn.Forte@Sun.COM 		}
5497836SJohn.Forte@Sun.COM 		krdc->c_tok = NULL;
5507836SJohn.Forte@Sun.COM 	}
5517836SJohn.Forte@Sun.COM 
5527836SJohn.Forte@Sun.COM 	if (krdc->b_tok) {
5537836SJohn.Forte@Sun.COM 		rc = nsc_unregister_path(krdc->b_tok, 0);
5547836SJohn.Forte@Sun.COM 		if (rc) {
555*9093SRamana.Srikanth@Sun.COM 			cmn_err(CE_WARN, "!rdc: unregister bitmap %d", rc);
5567836SJohn.Forte@Sun.COM 			err = rc;
5577836SJohn.Forte@Sun.COM 		}
5587836SJohn.Forte@Sun.COM 		krdc->b_tok = NULL;
5597836SJohn.Forte@Sun.COM 	}
5607836SJohn.Forte@Sun.COM 
5617836SJohn.Forte@Sun.COM 	rdc_group_enter(krdc);
5627836SJohn.Forte@Sun.COM 
5637836SJohn.Forte@Sun.COM 	/* Wait for all necessary _rdc_close() calls to complete */
5647836SJohn.Forte@Sun.COM 	while ((krdc->c_ref + krdc->r_ref + krdc->b_ref) != 0) {
5657836SJohn.Forte@Sun.COM 		krdc->closing++;
5667836SJohn.Forte@Sun.COM 		cv_wait(&krdc->closingcv, &krdc->group->lock);
5677836SJohn.Forte@Sun.COM 		krdc->closing--;
5687836SJohn.Forte@Sun.COM 	}
5697836SJohn.Forte@Sun.COM 
5707836SJohn.Forte@Sun.COM 	rdc_clr_flags(urdc, RDC_ENABLED);
5717836SJohn.Forte@Sun.COM 	rdc_group_exit(krdc);
5727836SJohn.Forte@Sun.COM 
5737836SJohn.Forte@Sun.COM 
5747836SJohn.Forte@Sun.COM 	/*
5757836SJohn.Forte@Sun.COM 	 * Check there are no outstanding writes in progress.
5767836SJohn.Forte@Sun.COM 	 * This can happen when a set is being disabled which
5777836SJohn.Forte@Sun.COM 	 * is one of the 'one_to_many' chain, that did not
5787836SJohn.Forte@Sun.COM 	 * intercept the original write call.
5797836SJohn.Forte@Sun.COM 	 */
5807836SJohn.Forte@Sun.COM 
5817836SJohn.Forte@Sun.COM 	for (;;) {
5827836SJohn.Forte@Sun.COM 		rdc_group_enter(krdc);
5837836SJohn.Forte@Sun.COM 		if (krdc->aux_state & RDC_AUXWRITE) {
5847836SJohn.Forte@Sun.COM 			rdc_group_exit(krdc);
5857836SJohn.Forte@Sun.COM 			/*
5867836SJohn.Forte@Sun.COM 			 * This doesn't happen very often,
5877836SJohn.Forte@Sun.COM 			 * just delay a bit and re-look.
5887836SJohn.Forte@Sun.COM 			 */
5897836SJohn.Forte@Sun.COM 			delay(50);
5907836SJohn.Forte@Sun.COM 		} else {
5917836SJohn.Forte@Sun.COM 			rdc_group_exit(krdc);
5927836SJohn.Forte@Sun.COM 			break;
5937836SJohn.Forte@Sun.COM 		}
5947836SJohn.Forte@Sun.COM 	}
5957836SJohn.Forte@Sun.COM 
5967836SJohn.Forte@Sun.COM 	mutex_enter(&rdc_conf_lock);
5977836SJohn.Forte@Sun.COM 	krdc->type_flag &= ~RDC_UNREGISTER;
5987836SJohn.Forte@Sun.COM 	wakeup_unregistering(krdc);
5997836SJohn.Forte@Sun.COM 	mutex_exit(&rdc_conf_lock);
6007836SJohn.Forte@Sun.COM 
6017836SJohn.Forte@Sun.COM 	return (err);
6027836SJohn.Forte@Sun.COM }
6037836SJohn.Forte@Sun.COM 
6047836SJohn.Forte@Sun.COM 
6057836SJohn.Forte@Sun.COM /*
6067836SJohn.Forte@Sun.COM  * _rdc_rlse_d
6077836SJohn.Forte@Sun.COM  *	Internal version of _rdc_rlse_devs(), only concerned with the
6087836SJohn.Forte@Sun.COM  *	data device, not the bitmap.
6097836SJohn.Forte@Sun.COM  */
6107836SJohn.Forte@Sun.COM 
6117836SJohn.Forte@Sun.COM static void
_rdc_rlse_d(rdc_k_info_t * krdc,int devs)6127836SJohn.Forte@Sun.COM _rdc_rlse_d(rdc_k_info_t *krdc, int devs)
6137836SJohn.Forte@Sun.COM {
6147836SJohn.Forte@Sun.COM 	_rdc_info_dev_t *cip;
6157836SJohn.Forte@Sun.COM 	_rdc_info_dev_t *rip;
6167836SJohn.Forte@Sun.COM 	int raw = (devs & RDC_RAW);
6177836SJohn.Forte@Sun.COM 
6187836SJohn.Forte@Sun.COM 	if (!krdc) {
619*9093SRamana.Srikanth@Sun.COM 		cmn_err(CE_WARN, "!rdc: _rdc_rlse_devs null krdc");
6207836SJohn.Forte@Sun.COM 		return;
6217836SJohn.Forte@Sun.COM 	}
6227836SJohn.Forte@Sun.COM 
6237836SJohn.Forte@Sun.COM 	ASSERT((devs & (~RDC_BMP)) != 0);
6247836SJohn.Forte@Sun.COM 
6257836SJohn.Forte@Sun.COM 	cip = &krdc->devices->id_cache_dev;
6267836SJohn.Forte@Sun.COM 	rip = &krdc->devices->id_raw_dev;
6277836SJohn.Forte@Sun.COM 
6287836SJohn.Forte@Sun.COM 	if (IS_RSRV(cip)) {
6297836SJohn.Forte@Sun.COM 		/* decrement count */
6307836SJohn.Forte@Sun.COM 
6317836SJohn.Forte@Sun.COM 		if (raw) {
6327836SJohn.Forte@Sun.COM 			if (cip->bi_ofailed > 0) {
6337836SJohn.Forte@Sun.COM 				cip->bi_ofailed--;
6347836SJohn.Forte@Sun.COM 			} else if (cip->bi_orsrv > 0) {
6357836SJohn.Forte@Sun.COM 				cip->bi_orsrv--;
6367836SJohn.Forte@Sun.COM 			}
6377836SJohn.Forte@Sun.COM 		} else {
6387836SJohn.Forte@Sun.COM 			if (cip->bi_failed > 0) {
6397836SJohn.Forte@Sun.COM 				cip->bi_failed--;
6407836SJohn.Forte@Sun.COM 			} else if (cip->bi_rsrv > 0) {
6417836SJohn.Forte@Sun.COM 				cip->bi_rsrv--;
6427836SJohn.Forte@Sun.COM 			}
6437836SJohn.Forte@Sun.COM 		}
6447836SJohn.Forte@Sun.COM 
6457836SJohn.Forte@Sun.COM 		/*
6467836SJohn.Forte@Sun.COM 		 * reset nsc_fd ownership back link, it is only set if
6477836SJohn.Forte@Sun.COM 		 * we have really done an underlying reserve, not for
6487836SJohn.Forte@Sun.COM 		 * failed (faked) reserves.
6497836SJohn.Forte@Sun.COM 		 */
6507836SJohn.Forte@Sun.COM 
6517836SJohn.Forte@Sun.COM 		if (cip->bi_rsrv > 0 || cip->bi_orsrv > 0) {
6527836SJohn.Forte@Sun.COM 			nsc_set_owner(cip->bi_fd, krdc->iodev);
6537836SJohn.Forte@Sun.COM 		} else {
6547836SJohn.Forte@Sun.COM 			nsc_set_owner(cip->bi_fd, NULL);
6557836SJohn.Forte@Sun.COM 		}
6567836SJohn.Forte@Sun.COM 
6577836SJohn.Forte@Sun.COM 		/* release nsc_fd */
6587836SJohn.Forte@Sun.COM 
6597836SJohn.Forte@Sun.COM 		if (!IS_RSRV(cip)) {
6607836SJohn.Forte@Sun.COM 			nsc_release(cip->bi_fd);
6617836SJohn.Forte@Sun.COM 		}
6627836SJohn.Forte@Sun.COM 	} else if (IS_RSRV(rip)) {
6637836SJohn.Forte@Sun.COM 		/* decrement count */
6647836SJohn.Forte@Sun.COM 
6657836SJohn.Forte@Sun.COM 		if (raw) {
6667836SJohn.Forte@Sun.COM 			if (rip->bi_failed > 0) {
6677836SJohn.Forte@Sun.COM 				rip->bi_failed--;
6687836SJohn.Forte@Sun.COM 			} else if (rip->bi_rsrv > 0) {
6697836SJohn.Forte@Sun.COM 				rip->bi_rsrv--;
6707836SJohn.Forte@Sun.COM 			}
6717836SJohn.Forte@Sun.COM 		} else {
6727836SJohn.Forte@Sun.COM 			if (rip->bi_ofailed > 0) {
6737836SJohn.Forte@Sun.COM 				rip->bi_ofailed--;
6747836SJohn.Forte@Sun.COM 			} else if (rip->bi_orsrv > 0) {
6757836SJohn.Forte@Sun.COM 				rip->bi_orsrv--;
6767836SJohn.Forte@Sun.COM 			}
6777836SJohn.Forte@Sun.COM 		}
6787836SJohn.Forte@Sun.COM 
6797836SJohn.Forte@Sun.COM 		/*
6807836SJohn.Forte@Sun.COM 		 * reset nsc_fd ownership back link, it is only set if
6817836SJohn.Forte@Sun.COM 		 * we have really done an underlying reserve, not for
6827836SJohn.Forte@Sun.COM 		 * failed (faked) reserves.
6837836SJohn.Forte@Sun.COM 		 */
6847836SJohn.Forte@Sun.COM 
6857836SJohn.Forte@Sun.COM 		if (rip->bi_rsrv > 0 || rip->bi_orsrv > 0) {
6867836SJohn.Forte@Sun.COM 			nsc_set_owner(rip->bi_fd, krdc->iodev);
6877836SJohn.Forte@Sun.COM 		} else {
6887836SJohn.Forte@Sun.COM 			nsc_set_owner(rip->bi_fd, NULL);
6897836SJohn.Forte@Sun.COM 		}
6907836SJohn.Forte@Sun.COM 
6917836SJohn.Forte@Sun.COM 		/* release nsc_fd and any waiters */
6927836SJohn.Forte@Sun.COM 
6937836SJohn.Forte@Sun.COM 		if (!IS_RSRV(rip)) {
6947836SJohn.Forte@Sun.COM 			rip->bi_flag = 0;
6957836SJohn.Forte@Sun.COM 			nsc_release(rip->bi_fd);
6967836SJohn.Forte@Sun.COM 			cv_broadcast(&krdc->devices->id_rcv);
6977836SJohn.Forte@Sun.COM 		}
6987836SJohn.Forte@Sun.COM 	} else {
699*9093SRamana.Srikanth@Sun.COM 		cmn_err(CE_WARN, "!rdc: _rdc_rlse_devs no reserve? krdc %p",
700*9093SRamana.Srikanth@Sun.COM 		    (void *) krdc);
7017836SJohn.Forte@Sun.COM 	}
7027836SJohn.Forte@Sun.COM }
7037836SJohn.Forte@Sun.COM 
7047836SJohn.Forte@Sun.COM /*
7057836SJohn.Forte@Sun.COM  * _rdc_rlse_devs
7067836SJohn.Forte@Sun.COM  *	Release named underlying devices and take care of setting the
7077836SJohn.Forte@Sun.COM  *	back link on the nsc_fd to the correct parent iodev.
7087836SJohn.Forte@Sun.COM  *
7097836SJohn.Forte@Sun.COM  *	NOTE: the 'devs' argument must be the same as that passed to
7107836SJohn.Forte@Sun.COM  *	the preceding _rdc_rsrv_devs call.
7117836SJohn.Forte@Sun.COM  */
7127836SJohn.Forte@Sun.COM 
7137836SJohn.Forte@Sun.COM void
_rdc_rlse_devs(rdc_k_info_t * krdc,int devs)7147836SJohn.Forte@Sun.COM _rdc_rlse_devs(rdc_k_info_t *krdc, int devs)
7157836SJohn.Forte@Sun.COM {
7167836SJohn.Forte@Sun.COM 
7177836SJohn.Forte@Sun.COM 	DTRACE_PROBE(_rdc_rlse_devs_start);
7187836SJohn.Forte@Sun.COM 	mutex_enter(&krdc->devices->id_rlock);
7197836SJohn.Forte@Sun.COM 
7207836SJohn.Forte@Sun.COM 	ASSERT(!(devs & RDC_CACHE));
7217836SJohn.Forte@Sun.COM 
7227836SJohn.Forte@Sun.COM 	if ((devs & (~RDC_BMP)) != 0) {
7237836SJohn.Forte@Sun.COM 		_rdc_rlse_d(krdc, devs);
7247836SJohn.Forte@Sun.COM 	}
7257836SJohn.Forte@Sun.COM 
7267836SJohn.Forte@Sun.COM 	if ((devs & RDC_BMP) != 0) {
7277836SJohn.Forte@Sun.COM 		if (krdc->bmaprsrv > 0 && --krdc->bmaprsrv == 0) {
7287836SJohn.Forte@Sun.COM 			nsc_release(krdc->bitmapfd);
7297836SJohn.Forte@Sun.COM 		}
7307836SJohn.Forte@Sun.COM 	}
7317836SJohn.Forte@Sun.COM 
7327836SJohn.Forte@Sun.COM 	mutex_exit(&krdc->devices->id_rlock);
7337836SJohn.Forte@Sun.COM 
7347836SJohn.Forte@Sun.COM }
7357836SJohn.Forte@Sun.COM 
7367836SJohn.Forte@Sun.COM /*
7377836SJohn.Forte@Sun.COM  * _rdc_rsrv_d
7387836SJohn.Forte@Sun.COM  *	Reserve device flagged, unless its companion is already reserved,
7397836SJohn.Forte@Sun.COM  *	in that case increase the reserve on the companion.  Take care
7407836SJohn.Forte@Sun.COM  *	of setting the nsc_fd ownership back link to the correct parent
7417836SJohn.Forte@Sun.COM  *	iodev pointer.
7427836SJohn.Forte@Sun.COM  */
7437836SJohn.Forte@Sun.COM 
7447836SJohn.Forte@Sun.COM static int
_rdc_rsrv_d(int raw,_rdc_info_dev_t * rid,_rdc_info_dev_t * cid,int flag,rdc_k_info_t * krdc)7457836SJohn.Forte@Sun.COM _rdc_rsrv_d(int raw, _rdc_info_dev_t *rid, _rdc_info_dev_t *cid, int flag,
7467836SJohn.Forte@Sun.COM     rdc_k_info_t *krdc)
7477836SJohn.Forte@Sun.COM {
7487836SJohn.Forte@Sun.COM 	_rdc_info_dev_t *p = NULL;
7497836SJohn.Forte@Sun.COM 	rdc_u_info_t *urdc = &rdc_u_info[krdc->index];
7507836SJohn.Forte@Sun.COM 	int other = 0;
7517836SJohn.Forte@Sun.COM 	int rc;
7527836SJohn.Forte@Sun.COM 
7537836SJohn.Forte@Sun.COM 
7547836SJohn.Forte@Sun.COM #ifdef DEBUG
7557836SJohn.Forte@Sun.COM 	if ((rid->bi_rsrv < 0) ||
7567836SJohn.Forte@Sun.COM 	    (cid->bi_rsrv < 0) ||
7577836SJohn.Forte@Sun.COM 	    (rid->bi_orsrv < 0) ||
7587836SJohn.Forte@Sun.COM 	    (cid->bi_orsrv < 0) ||
7597836SJohn.Forte@Sun.COM 	    (rid->bi_failed < 0) ||
7607836SJohn.Forte@Sun.COM 	    (cid->bi_failed < 0) ||
7617836SJohn.Forte@Sun.COM 	    (rid->bi_ofailed < 0) ||
7627836SJohn.Forte@Sun.COM 	    (cid->bi_ofailed < 0)) {
7637836SJohn.Forte@Sun.COM 		cmn_err(CE_WARN,
764*9093SRamana.Srikanth@Sun.COM 		    "!_rdc_rsrv_d: negative counts (rsrv %d %d orsrv %d %d)",
7657836SJohn.Forte@Sun.COM 		    rid->bi_rsrv, cid->bi_rsrv,
7667836SJohn.Forte@Sun.COM 		    rid->bi_orsrv, cid->bi_orsrv);
7677836SJohn.Forte@Sun.COM 		cmn_err(CE_WARN,
768*9093SRamana.Srikanth@Sun.COM 		    "!_rdc_rsrv_d: negative counts (fail %d %d ofail %d %d)",
7697836SJohn.Forte@Sun.COM 		    rid->bi_failed, cid->bi_failed,
7707836SJohn.Forte@Sun.COM 		    rid->bi_ofailed, cid->bi_ofailed);
7717836SJohn.Forte@Sun.COM 		cmn_err(CE_PANIC, "_rdc_rsrv_d: negative counts (krdc %p)",
7727836SJohn.Forte@Sun.COM 		    (void *) krdc);
7737836SJohn.Forte@Sun.COM 	}
7747836SJohn.Forte@Sun.COM #endif
7757836SJohn.Forte@Sun.COM 
7767836SJohn.Forte@Sun.COM 	/*
7777836SJohn.Forte@Sun.COM 	 * If user wants to do a cache reserve and it's already
7787836SJohn.Forte@Sun.COM 	 * raw reserved internally, we need to do a real nsc_reserve, so wait
7797836SJohn.Forte@Sun.COM 	 * until the release has been done.
7807836SJohn.Forte@Sun.COM 	 */
7817836SJohn.Forte@Sun.COM 	if (IS_RSRV(rid) && (flag == RDC_EXTERNAL) &&
7827836SJohn.Forte@Sun.COM 	    (raw == 0) && (rid->bi_flag != RDC_EXTERNAL)) {
7837836SJohn.Forte@Sun.COM 		krdc->devices->id_release++;
7847836SJohn.Forte@Sun.COM 		while (IS_RSRV(rid))
7857836SJohn.Forte@Sun.COM 			cv_wait(&krdc->devices->id_rcv,
786*9093SRamana.Srikanth@Sun.COM 			    &krdc->devices->id_rlock);
7877836SJohn.Forte@Sun.COM 		krdc->devices->id_release--;
7887836SJohn.Forte@Sun.COM 	}
7897836SJohn.Forte@Sun.COM 
7907836SJohn.Forte@Sun.COM 	/* select underlying device to use */
7917836SJohn.Forte@Sun.COM 
7927836SJohn.Forte@Sun.COM 	if (IS_RSRV(rid)) {
7937836SJohn.Forte@Sun.COM 		p = rid;
7947836SJohn.Forte@Sun.COM 		if (!raw) {
7957836SJohn.Forte@Sun.COM 			other = 1;
7967836SJohn.Forte@Sun.COM 		}
7977836SJohn.Forte@Sun.COM 	} else if (IS_RSRV(cid)) {
7987836SJohn.Forte@Sun.COM 		p = cid;
7997836SJohn.Forte@Sun.COM 		if (raw) {
8007836SJohn.Forte@Sun.COM 			other = 1;
8017836SJohn.Forte@Sun.COM 		}
8027836SJohn.Forte@Sun.COM 	}
8037836SJohn.Forte@Sun.COM 
8047836SJohn.Forte@Sun.COM 	/* just increment count and return if already reserved */
8057836SJohn.Forte@Sun.COM 
8067836SJohn.Forte@Sun.COM 	if (p && !RFAILED(p)) {
8077836SJohn.Forte@Sun.COM 		if (other) {
8087836SJohn.Forte@Sun.COM 			p->bi_orsrv++;
8097836SJohn.Forte@Sun.COM 		} else {
8107836SJohn.Forte@Sun.COM 			p->bi_rsrv++;
8117836SJohn.Forte@Sun.COM 		}
8127836SJohn.Forte@Sun.COM 
8137836SJohn.Forte@Sun.COM 		/* set nsc_fd ownership back link */
8147836SJohn.Forte@Sun.COM 		nsc_set_owner(p->bi_fd, krdc->iodev);
8157836SJohn.Forte@Sun.COM 		return (0);
8167836SJohn.Forte@Sun.COM 	}
8177836SJohn.Forte@Sun.COM 
8187836SJohn.Forte@Sun.COM 	/* attempt reserve */
8197836SJohn.Forte@Sun.COM 
8207836SJohn.Forte@Sun.COM 	if (!p) {
8217836SJohn.Forte@Sun.COM 		p = raw ? rid : cid;
8227836SJohn.Forte@Sun.COM 	}
8237836SJohn.Forte@Sun.COM 
8247836SJohn.Forte@Sun.COM 	if (!p->bi_fd) {
8257836SJohn.Forte@Sun.COM 		/* rpc server raced with rdc_dev_close() */
8267836SJohn.Forte@Sun.COM 		return (EIO);
8277836SJohn.Forte@Sun.COM 	}
8287836SJohn.Forte@Sun.COM 	if ((rc = nsc_reserve(p->bi_fd, 0)) == 0) {
8297836SJohn.Forte@Sun.COM 		/*
8307836SJohn.Forte@Sun.COM 		 * convert failed counts into reserved counts, and add
8317836SJohn.Forte@Sun.COM 		 * in this reserve.
8327836SJohn.Forte@Sun.COM 		 */
8337836SJohn.Forte@Sun.COM 
8347836SJohn.Forte@Sun.COM 		p->bi_orsrv = p->bi_ofailed;
8357836SJohn.Forte@Sun.COM 		p->bi_rsrv = p->bi_failed;
8367836SJohn.Forte@Sun.COM 
8377836SJohn.Forte@Sun.COM 		if (other) {
8387836SJohn.Forte@Sun.COM 			p->bi_orsrv++;
8397836SJohn.Forte@Sun.COM 		} else {
8407836SJohn.Forte@Sun.COM 			p->bi_rsrv++;
8417836SJohn.Forte@Sun.COM 		}
8427836SJohn.Forte@Sun.COM 
8437836SJohn.Forte@Sun.COM 		p->bi_ofailed = 0;
8447836SJohn.Forte@Sun.COM 		p->bi_failed = 0;
8457836SJohn.Forte@Sun.COM 
8467836SJohn.Forte@Sun.COM 		/* set nsc_fd ownership back link */
8477836SJohn.Forte@Sun.COM 
8487836SJohn.Forte@Sun.COM 		nsc_set_owner(p->bi_fd, krdc->iodev);
8497836SJohn.Forte@Sun.COM 	} else if (rc != EINTR) {
8507836SJohn.Forte@Sun.COM 		/*
8517836SJohn.Forte@Sun.COM 		 * If this is the master, and the secondary is not
8527836SJohn.Forte@Sun.COM 		 * failed, then just fake this external reserve so that
8537836SJohn.Forte@Sun.COM 		 * we can do remote io to the secondary and continue to
8547836SJohn.Forte@Sun.COM 		 * provide service to the client.
8557836SJohn.Forte@Sun.COM 		 *
8567836SJohn.Forte@Sun.COM 		 * Subsequent calls to _rdc_rsrv_d() will re-try the
8577836SJohn.Forte@Sun.COM 		 * nsc_reserve() until it succeeds.
8587836SJohn.Forte@Sun.COM 		 */
8597836SJohn.Forte@Sun.COM 
8607836SJohn.Forte@Sun.COM 		if ((rdc_get_vflags(urdc) & RDC_PRIMARY) &&
8617836SJohn.Forte@Sun.COM 		    !(rdc_get_vflags(urdc) & RDC_LOGGING) &&
8627836SJohn.Forte@Sun.COM 		    !((rdc_get_vflags(urdc) & RDC_SLAVE) &&
8637836SJohn.Forte@Sun.COM 		    (rdc_get_vflags(urdc) & RDC_SYNCING))) {
8647836SJohn.Forte@Sun.COM 			if (!(rdc_get_vflags(urdc) & RDC_VOL_FAILED)) {
8657836SJohn.Forte@Sun.COM 				rdc_many_enter(krdc);
8667836SJohn.Forte@Sun.COM 				/* Primary, so reverse sync needed */
8677836SJohn.Forte@Sun.COM 				rdc_set_mflags(urdc, RDC_RSYNC_NEEDED);
8687836SJohn.Forte@Sun.COM 				rdc_set_flags_log(urdc, RDC_VOL_FAILED,
8697836SJohn.Forte@Sun.COM 				    "nsc_reserve failed");
8707836SJohn.Forte@Sun.COM 				rdc_many_exit(krdc);
8717836SJohn.Forte@Sun.COM 				rc = -1;
8727836SJohn.Forte@Sun.COM #ifdef DEBUG
873*9093SRamana.Srikanth@Sun.COM 				cmn_err(CE_NOTE, "!nsc_reserve failed "
8747836SJohn.Forte@Sun.COM 				    "with rc == %d\n", rc);
8757836SJohn.Forte@Sun.COM #endif
8767836SJohn.Forte@Sun.COM 			} else {
8777836SJohn.Forte@Sun.COM 				rc = 0;
8787836SJohn.Forte@Sun.COM 			}
8797836SJohn.Forte@Sun.COM 
8807836SJohn.Forte@Sun.COM 			if (other) {
8817836SJohn.Forte@Sun.COM 				p->bi_ofailed++;
8827836SJohn.Forte@Sun.COM 			} else {
8837836SJohn.Forte@Sun.COM 				p->bi_failed++;
8847836SJohn.Forte@Sun.COM 			}
8857836SJohn.Forte@Sun.COM 
8867836SJohn.Forte@Sun.COM 			if (krdc->maxfbas == 0) {
8877836SJohn.Forte@Sun.COM 				/*
8887836SJohn.Forte@Sun.COM 				 * fake a maxfbas value for remote i/o,
8897836SJohn.Forte@Sun.COM 				 * this will get reset when the next
8907836SJohn.Forte@Sun.COM 				 * successful reserve happens as part
8917836SJohn.Forte@Sun.COM 				 * of the rdc_attach_fd() callback.
8927836SJohn.Forte@Sun.COM 				 */
8937836SJohn.Forte@Sun.COM 				krdc->maxfbas = 128;
8947836SJohn.Forte@Sun.COM 			}
8957836SJohn.Forte@Sun.COM 		}
8967836SJohn.Forte@Sun.COM 	}
8977836SJohn.Forte@Sun.COM 
8987836SJohn.Forte@Sun.COM 	if (rc == 0 && raw) {
8997836SJohn.Forte@Sun.COM 		p->bi_flag = flag;
9007836SJohn.Forte@Sun.COM 	}
9017836SJohn.Forte@Sun.COM 
9027836SJohn.Forte@Sun.COM 
9037836SJohn.Forte@Sun.COM 	return (rc);
9047836SJohn.Forte@Sun.COM }
9057836SJohn.Forte@Sun.COM 
9067836SJohn.Forte@Sun.COM /*
9077836SJohn.Forte@Sun.COM  * _rdc_rsrv_devs
9087836SJohn.Forte@Sun.COM  *	Reserve named underlying devices.
9097836SJohn.Forte@Sun.COM  *
9107836SJohn.Forte@Sun.COM  */
9117836SJohn.Forte@Sun.COM 
9127836SJohn.Forte@Sun.COM int
_rdc_rsrv_devs(rdc_k_info_t * krdc,int devs,int flag)9137836SJohn.Forte@Sun.COM _rdc_rsrv_devs(rdc_k_info_t *krdc, int devs, int flag)
9147836SJohn.Forte@Sun.COM {
9157836SJohn.Forte@Sun.COM 	rdc_u_info_t *urdc = &rdc_u_info[krdc->index];
9167836SJohn.Forte@Sun.COM 	int write = 0;
9177836SJohn.Forte@Sun.COM 	int rc = 0;
9187836SJohn.Forte@Sun.COM 	int got = 0;
9197836SJohn.Forte@Sun.COM 
9207836SJohn.Forte@Sun.COM 	if (!krdc) {
9217836SJohn.Forte@Sun.COM 		return (EINVAL);
9227836SJohn.Forte@Sun.COM 	}
9237836SJohn.Forte@Sun.COM 
9247836SJohn.Forte@Sun.COM 	ASSERT(!(devs & RDC_CACHE));
9257836SJohn.Forte@Sun.COM 
9267836SJohn.Forte@Sun.COM 	mutex_enter(&krdc->devices->id_rlock);
9277836SJohn.Forte@Sun.COM 
9287836SJohn.Forte@Sun.COM 	if ((devs & (~RDC_BMP)) != 0) {
9297836SJohn.Forte@Sun.COM 		if ((rc = _rdc_rsrv_d((devs & RDC_CACHE) == 0,
9307836SJohn.Forte@Sun.COM 		    &krdc->devices->id_raw_dev, &krdc->devices->id_cache_dev,
9317836SJohn.Forte@Sun.COM 		    flag, krdc)) != 0) {
9327836SJohn.Forte@Sun.COM 			if (rc == -1) {
9337836SJohn.Forte@Sun.COM 				/*
9347836SJohn.Forte@Sun.COM 				 * we need to call rdc_write_state()
9357836SJohn.Forte@Sun.COM 				 * after we drop the mutex
9367836SJohn.Forte@Sun.COM 				 */
9377836SJohn.Forte@Sun.COM 				write = 1;
9387836SJohn.Forte@Sun.COM 				rc = 0;
9397836SJohn.Forte@Sun.COM 			} else {
9407836SJohn.Forte@Sun.COM 				cmn_err(CE_WARN,
941*9093SRamana.Srikanth@Sun.COM 				    "!rdc: nsc_reserve(%s) failed %d\n",
9427836SJohn.Forte@Sun.COM 				    nsc_pathname(krdc->c_fd), rc);
9437836SJohn.Forte@Sun.COM 			}
9447836SJohn.Forte@Sun.COM 		} else {
9457836SJohn.Forte@Sun.COM 			got |= (devs & (~RDC_BMP));
9467836SJohn.Forte@Sun.COM 		}
9477836SJohn.Forte@Sun.COM 	}
9487836SJohn.Forte@Sun.COM 
9497836SJohn.Forte@Sun.COM 	if (rc == 0 && (devs & RDC_BMP) != 0) {
9507836SJohn.Forte@Sun.COM 		if (krdc->bitmapfd == NULL)
9517836SJohn.Forte@Sun.COM 			rc = EIO;
9527836SJohn.Forte@Sun.COM 		else if ((krdc->bmaprsrv == 0) &&
9537836SJohn.Forte@Sun.COM 		    (rc = nsc_reserve(krdc->bitmapfd, 0)) != 0) {
954*9093SRamana.Srikanth@Sun.COM 			cmn_err(CE_WARN, "!rdc: nsc_reserve(%s) failed %d\n",
955*9093SRamana.Srikanth@Sun.COM 			    nsc_pathname(krdc->bitmapfd), rc);
9567836SJohn.Forte@Sun.COM 		} else {
9577836SJohn.Forte@Sun.COM 			krdc->bmaprsrv++;
9587836SJohn.Forte@Sun.COM 			got |= RDC_BMP;
9597836SJohn.Forte@Sun.COM 		}
9607836SJohn.Forte@Sun.COM 		if (!RDC_SUCCESS(rc)) {
9617836SJohn.Forte@Sun.COM 			/* Undo any previous reserve */
9627836SJohn.Forte@Sun.COM 			if (got != 0)
9637836SJohn.Forte@Sun.COM 				_rdc_rlse_d(krdc, got);
9647836SJohn.Forte@Sun.COM 		}
9657836SJohn.Forte@Sun.COM 	}
9667836SJohn.Forte@Sun.COM 
9677836SJohn.Forte@Sun.COM 	mutex_exit(&krdc->devices->id_rlock);
9687836SJohn.Forte@Sun.COM 
9697836SJohn.Forte@Sun.COM 	if (write) {
9707836SJohn.Forte@Sun.COM 		rdc_write_state(urdc);
9717836SJohn.Forte@Sun.COM 	}
9727836SJohn.Forte@Sun.COM 
9737836SJohn.Forte@Sun.COM 	return (rc);
9747836SJohn.Forte@Sun.COM }
9757836SJohn.Forte@Sun.COM 
9767836SJohn.Forte@Sun.COM 
9777836SJohn.Forte@Sun.COM /*
9787836SJohn.Forte@Sun.COM  * Read from the remote end, ensuring that if this is a many group in
9797836SJohn.Forte@Sun.COM  * slave mode that we only remote read from the secondary with the
9807836SJohn.Forte@Sun.COM  * valid data.
9817836SJohn.Forte@Sun.COM  */
9827836SJohn.Forte@Sun.COM int
_rdc_remote_read(rdc_k_info_t * krdc,nsc_buf_t * h,nsc_off_t pos,nsc_size_t len,int flag)9837836SJohn.Forte@Sun.COM _rdc_remote_read(rdc_k_info_t *krdc, nsc_buf_t *h, nsc_off_t pos,
9847836SJohn.Forte@Sun.COM     nsc_size_t len, int flag)
9857836SJohn.Forte@Sun.COM {
9867836SJohn.Forte@Sun.COM 	rdc_u_info_t *urdc = &rdc_u_info[krdc->index];
9877836SJohn.Forte@Sun.COM 	rdc_k_info_t *this = krdc;	/* krdc that was requested */
9887836SJohn.Forte@Sun.COM 	int rc;
9897836SJohn.Forte@Sun.COM 
9907836SJohn.Forte@Sun.COM 	if (flag & NSC_RDAHEAD) {
9917836SJohn.Forte@Sun.COM 		/*
9927836SJohn.Forte@Sun.COM 		 * no point in doing readahead remotely,
9937836SJohn.Forte@Sun.COM 		 * just say we did it ok - the client is about to
9947836SJohn.Forte@Sun.COM 		 * throw this buffer away as soon as we return.
9957836SJohn.Forte@Sun.COM 		 */
9967836SJohn.Forte@Sun.COM 		return (NSC_DONE);
9977836SJohn.Forte@Sun.COM 	}
9987836SJohn.Forte@Sun.COM 
9997836SJohn.Forte@Sun.COM 	/*
10007836SJohn.Forte@Sun.COM 	 * If this is a many group with a reverse sync in progress and
10017836SJohn.Forte@Sun.COM 	 * this is not the slave krdc/urdc, then search for the slave
10027836SJohn.Forte@Sun.COM 	 * so that we can do the remote io from the correct secondary.
10037836SJohn.Forte@Sun.COM 	 */
10047836SJohn.Forte@Sun.COM 	if ((rdc_get_mflags(urdc) & RDC_SLAVE) &&
10057836SJohn.Forte@Sun.COM 	    !(rdc_get_vflags(urdc) & RDC_SLAVE)) {
10067836SJohn.Forte@Sun.COM 		rdc_many_enter(krdc);
10077836SJohn.Forte@Sun.COM 		for (krdc = krdc->many_next; krdc != this;
10087836SJohn.Forte@Sun.COM 		    krdc = krdc->many_next) {
10097836SJohn.Forte@Sun.COM 			urdc = &rdc_u_info[krdc->index];
10107836SJohn.Forte@Sun.COM 			if (!IS_ENABLED(urdc))
10117836SJohn.Forte@Sun.COM 				continue;
10127836SJohn.Forte@Sun.COM 			if (rdc_get_vflags(urdc) & RDC_SLAVE)
10137836SJohn.Forte@Sun.COM 				break;
10147836SJohn.Forte@Sun.COM 		}
10157836SJohn.Forte@Sun.COM 		rdc_many_exit(krdc);
10167836SJohn.Forte@Sun.COM 
10177836SJohn.Forte@Sun.COM 		this = krdc;
10187836SJohn.Forte@Sun.COM 	}
10197836SJohn.Forte@Sun.COM 
10207836SJohn.Forte@Sun.COM read1:
10217836SJohn.Forte@Sun.COM 	if (rdc_get_vflags(urdc) & RDC_LOGGING) {
10227836SJohn.Forte@Sun.COM 		/* cannot do remote io without the remote node! */
10237836SJohn.Forte@Sun.COM 		rc = ENETDOWN;
10247836SJohn.Forte@Sun.COM 		goto read2;
10257836SJohn.Forte@Sun.COM 	}
10267836SJohn.Forte@Sun.COM 
10277836SJohn.Forte@Sun.COM 
10287836SJohn.Forte@Sun.COM 	/* wait for the remote end to have the latest data */
10297836SJohn.Forte@Sun.COM 
10307836SJohn.Forte@Sun.COM 	if (IS_ASYNC(urdc)) {
10317836SJohn.Forte@Sun.COM 		while (krdc->group->ra_queue.blocks != 0) {
10327836SJohn.Forte@Sun.COM 			if (!krdc->group->rdc_writer)
10337836SJohn.Forte@Sun.COM 				(void) rdc_writer(krdc->index);
10347836SJohn.Forte@Sun.COM 
10357836SJohn.Forte@Sun.COM 			(void) rdc_drain_queue(krdc->index);
10367836SJohn.Forte@Sun.COM 		}
10377836SJohn.Forte@Sun.COM 	}
10387836SJohn.Forte@Sun.COM 
10397836SJohn.Forte@Sun.COM 	if (krdc->io_kstats) {
10407836SJohn.Forte@Sun.COM 		mutex_enter(krdc->io_kstats->ks_lock);
10417836SJohn.Forte@Sun.COM 		kstat_runq_enter(KSTAT_IO_PTR(krdc->io_kstats));
10427836SJohn.Forte@Sun.COM 		mutex_exit(krdc->io_kstats->ks_lock);
10437836SJohn.Forte@Sun.COM 	}
10447836SJohn.Forte@Sun.COM 
10457836SJohn.Forte@Sun.COM 	rc = rdc_net_read(krdc->index, krdc->remote_index, h, pos, len);
10467836SJohn.Forte@Sun.COM 
10477836SJohn.Forte@Sun.COM 	if (krdc->io_kstats) {
10487836SJohn.Forte@Sun.COM 		mutex_enter(krdc->io_kstats->ks_lock);
10497836SJohn.Forte@Sun.COM 		kstat_runq_exit(KSTAT_IO_PTR(krdc->io_kstats));
10507836SJohn.Forte@Sun.COM 		mutex_exit(krdc->io_kstats->ks_lock);
10517836SJohn.Forte@Sun.COM 	}
10527836SJohn.Forte@Sun.COM 
10537836SJohn.Forte@Sun.COM 	/* If read error keep trying every secondary until no more */
10547836SJohn.Forte@Sun.COM read2:
10557836SJohn.Forte@Sun.COM 	if (!RDC_SUCCESS(rc) && IS_MANY(krdc) &&
10567836SJohn.Forte@Sun.COM 	    !(rdc_get_mflags(urdc) & RDC_SLAVE)) {
10577836SJohn.Forte@Sun.COM 		rdc_many_enter(krdc);
10587836SJohn.Forte@Sun.COM 		for (krdc = krdc->many_next; krdc != this;
10597836SJohn.Forte@Sun.COM 		    krdc = krdc->many_next) {
10607836SJohn.Forte@Sun.COM 			urdc = &rdc_u_info[krdc->index];
10617836SJohn.Forte@Sun.COM 			if (!IS_ENABLED(urdc))
10627836SJohn.Forte@Sun.COM 				continue;
10637836SJohn.Forte@Sun.COM 			rdc_many_exit(krdc);
10647836SJohn.Forte@Sun.COM 			goto read1;
10657836SJohn.Forte@Sun.COM 		}
10667836SJohn.Forte@Sun.COM 		rdc_many_exit(krdc);
10677836SJohn.Forte@Sun.COM 	}
10687836SJohn.Forte@Sun.COM 
10697836SJohn.Forte@Sun.COM 	return (rc);
10707836SJohn.Forte@Sun.COM }
10717836SJohn.Forte@Sun.COM 
10727836SJohn.Forte@Sun.COM 
10737836SJohn.Forte@Sun.COM /*
10747836SJohn.Forte@Sun.COM  * _rdc_alloc_buf
10757836SJohn.Forte@Sun.COM  *	Allocate a buffer of data
10767836SJohn.Forte@Sun.COM  *
10777836SJohn.Forte@Sun.COM  * Calling/Exit State:
10787836SJohn.Forte@Sun.COM  *	Returns NSC_DONE or NSC_HIT for success, NSC_PENDING for async
10797836SJohn.Forte@Sun.COM  *	I/O, > 0 is an error code.
10807836SJohn.Forte@Sun.COM  *
10817836SJohn.Forte@Sun.COM  * Description:
10827836SJohn.Forte@Sun.COM  */
10837836SJohn.Forte@Sun.COM int rdcbufs = 0;
10847836SJohn.Forte@Sun.COM 
10857836SJohn.Forte@Sun.COM static int
_rdc_alloc_buf(rdc_fd_t * rfd,nsc_off_t pos,nsc_size_t len,int flag,rdc_buf_t ** ptr)10867836SJohn.Forte@Sun.COM _rdc_alloc_buf(rdc_fd_t *rfd, nsc_off_t pos, nsc_size_t len, int flag,
10877836SJohn.Forte@Sun.COM     rdc_buf_t **ptr)
10887836SJohn.Forte@Sun.COM {
10897836SJohn.Forte@Sun.COM 	rdc_k_info_t *krdc = rfd->rdc_info;
10907836SJohn.Forte@Sun.COM 	rdc_u_info_t *urdc = &rdc_u_info[krdc->index];
10917836SJohn.Forte@Sun.COM 	nsc_vec_t *vec = NULL;
10927836SJohn.Forte@Sun.COM 	rdc_buf_t *h;
10937836SJohn.Forte@Sun.COM 	size_t size;
10947836SJohn.Forte@Sun.COM 	int ioflag;
10957836SJohn.Forte@Sun.COM 	int rc = 0;
10967836SJohn.Forte@Sun.COM 
10977836SJohn.Forte@Sun.COM 	if (RDC_IS_BMP(rfd) || RDC_IS_QUE(rfd))
10987836SJohn.Forte@Sun.COM 		return (EIO);
10997836SJohn.Forte@Sun.COM 
11007836SJohn.Forte@Sun.COM 	if (len == 0)
11017836SJohn.Forte@Sun.COM 		return (EINVAL);
11027836SJohn.Forte@Sun.COM 
11037836SJohn.Forte@Sun.COM 	if (flag & NSC_WRBUF) {
11047836SJohn.Forte@Sun.COM 
11057836SJohn.Forte@Sun.COM 		if (!(rdc_get_vflags(urdc) & RDC_PRIMARY) &&
11067836SJohn.Forte@Sun.COM 		    !(rdc_get_vflags(urdc) & RDC_LOGGING)) {
11077836SJohn.Forte@Sun.COM 			/*
11087836SJohn.Forte@Sun.COM 			 * Forbid writes to secondary unless logging.
11097836SJohn.Forte@Sun.COM 			 */
11107836SJohn.Forte@Sun.COM 			return (EIO);
11117836SJohn.Forte@Sun.COM 		}
11127836SJohn.Forte@Sun.COM 	}
11137836SJohn.Forte@Sun.COM 
11147836SJohn.Forte@Sun.COM 	if (!(rdc_get_vflags(urdc) & RDC_PRIMARY) &&
11157836SJohn.Forte@Sun.COM 	    (rdc_get_vflags(urdc) & RDC_SYNC_NEEDED)) {
11167836SJohn.Forte@Sun.COM 		/*
11177836SJohn.Forte@Sun.COM 		 * Forbid any io to secondary if it needs a sync.
11187836SJohn.Forte@Sun.COM 		 */
11197836SJohn.Forte@Sun.COM 		return (EIO);
11207836SJohn.Forte@Sun.COM 	}
11217836SJohn.Forte@Sun.COM 
11227836SJohn.Forte@Sun.COM 	if ((rdc_get_vflags(urdc) & RDC_PRIMARY) &&
11237836SJohn.Forte@Sun.COM 	    (rdc_get_vflags(urdc) & RDC_RSYNC_NEEDED) &&
11247836SJohn.Forte@Sun.COM 	    !(rdc_get_vflags(urdc) & RDC_VOL_FAILED) &&
11257836SJohn.Forte@Sun.COM 	    !(rdc_get_vflags(urdc) & RDC_SLAVE)) {
11267836SJohn.Forte@Sun.COM 		/*
11277836SJohn.Forte@Sun.COM 		 * Forbid any io to primary if it needs a reverse sync
11287836SJohn.Forte@Sun.COM 		 * and is not actively syncing.
11297836SJohn.Forte@Sun.COM 		 */
11307836SJohn.Forte@Sun.COM 		return (EIO);
11317836SJohn.Forte@Sun.COM 	}
11327836SJohn.Forte@Sun.COM 
11337836SJohn.Forte@Sun.COM 	/* Bounds checking */
11347836SJohn.Forte@Sun.COM 	ASSERT(urdc->volume_size != 0);
11357836SJohn.Forte@Sun.COM 	if (pos + len > urdc->volume_size) {
11367836SJohn.Forte@Sun.COM #ifdef DEBUG
11377836SJohn.Forte@Sun.COM 		cmn_err(CE_NOTE,
1138*9093SRamana.Srikanth@Sun.COM 		    "!rdc: Attempt to access beyond end of rdc volume");
11397836SJohn.Forte@Sun.COM #endif
11407836SJohn.Forte@Sun.COM 		return (EIO);
11417836SJohn.Forte@Sun.COM 	}
11427836SJohn.Forte@Sun.COM 
11437836SJohn.Forte@Sun.COM 	h = *ptr;
11447836SJohn.Forte@Sun.COM 	if (h == NULL) {
11457836SJohn.Forte@Sun.COM 		/* should never happen (nsctl does this for us) */
11467836SJohn.Forte@Sun.COM #ifdef DEBUG
1147*9093SRamana.Srikanth@Sun.COM 		cmn_err(CE_WARN, "!_rdc_alloc_buf entered without buffer!");
11487836SJohn.Forte@Sun.COM #endif
11497836SJohn.Forte@Sun.COM 		h = (rdc_buf_t *)_rdc_alloc_handle(NULL, NULL, NULL, rfd);
11507836SJohn.Forte@Sun.COM 		if (h == NULL)
11517836SJohn.Forte@Sun.COM 			return (ENOMEM);
11527836SJohn.Forte@Sun.COM 
11537836SJohn.Forte@Sun.COM 		h->rdc_bufh.sb_flag &= ~NSC_HALLOCATED;
11547836SJohn.Forte@Sun.COM 		*ptr = h;
11557836SJohn.Forte@Sun.COM 	}
11567836SJohn.Forte@Sun.COM 
11577836SJohn.Forte@Sun.COM 	if (flag & NSC_NOBLOCK) {
11587836SJohn.Forte@Sun.COM 		cmn_err(CE_WARN,
1159*9093SRamana.Srikanth@Sun.COM 		    "!_rdc_alloc_buf: removing unsupported NSC_NOBLOCK flag");
11607836SJohn.Forte@Sun.COM 		flag &= ~(NSC_NOBLOCK);
11617836SJohn.Forte@Sun.COM 	}
11627836SJohn.Forte@Sun.COM 
11637836SJohn.Forte@Sun.COM 	h->rdc_bufh.sb_error = 0;
11647836SJohn.Forte@Sun.COM 	h->rdc_bufh.sb_flag |= flag;
11657836SJohn.Forte@Sun.COM 	h->rdc_bufh.sb_pos = pos;
11667836SJohn.Forte@Sun.COM 	h->rdc_bufh.sb_len = len;
11677836SJohn.Forte@Sun.COM 	ioflag = flag;
11687836SJohn.Forte@Sun.COM 
11697836SJohn.Forte@Sun.COM 	bzero(&h->rdc_sync, sizeof (h->rdc_sync));
11707836SJohn.Forte@Sun.COM 	mutex_init(&h->rdc_sync.lock, NULL, MUTEX_DRIVER, NULL);
11717836SJohn.Forte@Sun.COM 	cv_init(&h->rdc_sync.cv, NULL, CV_DRIVER, NULL);
11727836SJohn.Forte@Sun.COM 
11737836SJohn.Forte@Sun.COM 	if (flag & NSC_WRBUF)
11747836SJohn.Forte@Sun.COM 		_rdc_async_throttle(krdc, len);	/* throttle incoming io */
11757836SJohn.Forte@Sun.COM 
11767836SJohn.Forte@Sun.COM 	/*
11777836SJohn.Forte@Sun.COM 	 * Use remote io when:
11787836SJohn.Forte@Sun.COM 	 * - local volume is failed
11797836SJohn.Forte@Sun.COM 	 * - reserve status is failed
11807836SJohn.Forte@Sun.COM 	 */
11817836SJohn.Forte@Sun.COM 	if ((rdc_get_vflags(urdc) & RDC_VOL_FAILED) || IS_RFAILED(krdc)) {
11827836SJohn.Forte@Sun.COM 		rc = EIO;
11837836SJohn.Forte@Sun.COM 	} else {
11847836SJohn.Forte@Sun.COM 		rc = nsc_alloc_buf(RDC_U_FD(krdc), pos, len,
1185*9093SRamana.Srikanth@Sun.COM 		    ioflag, &h->rdc_bufp);
11867836SJohn.Forte@Sun.COM 		if (!RDC_SUCCESS(rc)) {
11877836SJohn.Forte@Sun.COM 			rdc_many_enter(krdc);
11887836SJohn.Forte@Sun.COM 			if (rdc_get_vflags(urdc) & RDC_PRIMARY) {
11897836SJohn.Forte@Sun.COM 				/* Primary, so reverse sync needed */
11907836SJohn.Forte@Sun.COM 				rdc_set_mflags(urdc, RDC_RSYNC_NEEDED);
11917836SJohn.Forte@Sun.COM 			} else {
11927836SJohn.Forte@Sun.COM 				/* Secondary, so forward sync needed */
11937836SJohn.Forte@Sun.COM 				rdc_set_flags(urdc, RDC_SYNC_NEEDED);
11947836SJohn.Forte@Sun.COM 			}
11957836SJohn.Forte@Sun.COM 			rdc_set_flags_log(urdc, RDC_VOL_FAILED,
1196*9093SRamana.Srikanth@Sun.COM 			    "nsc_alloc_buf failed");
11977836SJohn.Forte@Sun.COM 			rdc_many_exit(krdc);
11987836SJohn.Forte@Sun.COM 			rdc_write_state(urdc);
11997836SJohn.Forte@Sun.COM 		}
12007836SJohn.Forte@Sun.COM 	}
12017836SJohn.Forte@Sun.COM 
12027836SJohn.Forte@Sun.COM 	if (RDC_SUCCESS(rc)) {
12037836SJohn.Forte@Sun.COM 		h->rdc_bufh.sb_vec = h->rdc_bufp->sb_vec;
12047836SJohn.Forte@Sun.COM 		h->rdc_flags |= RDC_ALLOC;
12057836SJohn.Forte@Sun.COM 
12067836SJohn.Forte@Sun.COM 		/*
12077836SJohn.Forte@Sun.COM 		 * If in slave and reading data, remote read on top of
12087836SJohn.Forte@Sun.COM 		 * the buffer to ensure that we have the latest data.
12097836SJohn.Forte@Sun.COM 		 */
12107836SJohn.Forte@Sun.COM 		if ((flag & NSC_READ) &&
12117836SJohn.Forte@Sun.COM 		    (rdc_get_vflags(urdc) & RDC_PRIMARY) &&
12127836SJohn.Forte@Sun.COM 		    (rdc_get_mflags(urdc) & RDC_SLAVE)) {
12137836SJohn.Forte@Sun.COM 			rc = _rdc_remote_read(krdc, &h->rdc_bufh,
12147836SJohn.Forte@Sun.COM 			    pos, len, flag);
12157836SJohn.Forte@Sun.COM 			/*
12167836SJohn.Forte@Sun.COM 			 * Set NSC_MIXED so that the
12177836SJohn.Forte@Sun.COM 			 * cache will throw away this buffer when we free
12187836SJohn.Forte@Sun.COM 			 * it since we have combined data from multiple
12197836SJohn.Forte@Sun.COM 			 * sources into a single buffer.
12207836SJohn.Forte@Sun.COM 			 */
12217836SJohn.Forte@Sun.COM 			h->rdc_bufp->sb_flag |= NSC_MIXED;
12227836SJohn.Forte@Sun.COM 		}
12237836SJohn.Forte@Sun.COM 	}
12247836SJohn.Forte@Sun.COM 
12257836SJohn.Forte@Sun.COM 	/*
12267836SJohn.Forte@Sun.COM 	 * If nsc_alloc_buf above fails, or local volume is failed or
12277836SJohn.Forte@Sun.COM 	 * bitmap is failed or reserve, then we fill the buf from remote
12287836SJohn.Forte@Sun.COM 	 */
12297836SJohn.Forte@Sun.COM 
12307836SJohn.Forte@Sun.COM 	if ((!RDC_SUCCESS(rc)) && (rdc_get_vflags(urdc) & RDC_PRIMARY) &&
12317836SJohn.Forte@Sun.COM 	    !(rdc_get_vflags(urdc) & RDC_LOGGING)) {
12327836SJohn.Forte@Sun.COM 		if (flag & NSC_NODATA) {
12337836SJohn.Forte@Sun.COM 			ASSERT(!(flag & NSC_READ));
12347836SJohn.Forte@Sun.COM 			h->rdc_flags |= RDC_REMOTE_BUF;
12357836SJohn.Forte@Sun.COM 			h->rdc_bufh.sb_vec = NULL;
12367836SJohn.Forte@Sun.COM 		} else {
12377836SJohn.Forte@Sun.COM 			size = sizeof (nsc_vec_t) * 2;
12387836SJohn.Forte@Sun.COM 			h->rdc_vsize = size + FBA_SIZE(len);
12397836SJohn.Forte@Sun.COM 			vec = kmem_zalloc(h->rdc_vsize, KM_SLEEP);
12407836SJohn.Forte@Sun.COM 
12417836SJohn.Forte@Sun.COM 			if (!vec) {
12427836SJohn.Forte@Sun.COM 				rc = ENOMEM;
12437836SJohn.Forte@Sun.COM 				goto error;
12447836SJohn.Forte@Sun.COM 			}
12457836SJohn.Forte@Sun.COM 
12467836SJohn.Forte@Sun.COM 			/* single flat buffer */
12477836SJohn.Forte@Sun.COM 
12487836SJohn.Forte@Sun.COM 			vec[0].sv_addr = (uchar_t *)vec + size;
12497836SJohn.Forte@Sun.COM 			vec[0].sv_len  = FBA_SIZE(len);
12507836SJohn.Forte@Sun.COM 			vec[0].sv_vme  = 0;
12517836SJohn.Forte@Sun.COM 
12527836SJohn.Forte@Sun.COM 			/* null terminator */
12537836SJohn.Forte@Sun.COM 
12547836SJohn.Forte@Sun.COM 			vec[1].sv_addr = NULL;
12557836SJohn.Forte@Sun.COM 			vec[1].sv_len  = 0;
12567836SJohn.Forte@Sun.COM 			vec[1].sv_vme  = 0;
12577836SJohn.Forte@Sun.COM 
12587836SJohn.Forte@Sun.COM 			h->rdc_bufh.sb_vec = vec;
12597836SJohn.Forte@Sun.COM 			h->rdc_flags |= RDC_REMOTE_BUF;
12607836SJohn.Forte@Sun.COM 			h->rdc_flags |= RDC_VEC_ALLOC;
12617836SJohn.Forte@Sun.COM 		}
12627836SJohn.Forte@Sun.COM 
12637836SJohn.Forte@Sun.COM 		if (flag & NSC_READ) {
12647836SJohn.Forte@Sun.COM 			rc = _rdc_remote_read(krdc, &h->rdc_bufh,
12657836SJohn.Forte@Sun.COM 			    pos, len, flag);
12667836SJohn.Forte@Sun.COM 		} else {
12677836SJohn.Forte@Sun.COM 			rc = NSC_DONE;
12687836SJohn.Forte@Sun.COM 		}
12697836SJohn.Forte@Sun.COM 	}
12707836SJohn.Forte@Sun.COM error:
12717836SJohn.Forte@Sun.COM 	if (!RDC_SUCCESS(rc)) {
12727836SJohn.Forte@Sun.COM 		h->rdc_bufh.sb_error = rc;
12737836SJohn.Forte@Sun.COM 	}
12747836SJohn.Forte@Sun.COM 
12757836SJohn.Forte@Sun.COM 	return (rc);
12767836SJohn.Forte@Sun.COM }
12777836SJohn.Forte@Sun.COM 
12787836SJohn.Forte@Sun.COM 
12797836SJohn.Forte@Sun.COM /*
12807836SJohn.Forte@Sun.COM  * _rdc_free_buf
12817836SJohn.Forte@Sun.COM  */
12827836SJohn.Forte@Sun.COM 
12837836SJohn.Forte@Sun.COM static int
_rdc_free_buf(rdc_buf_t * h)12847836SJohn.Forte@Sun.COM _rdc_free_buf(rdc_buf_t *h)
12857836SJohn.Forte@Sun.COM {
12867836SJohn.Forte@Sun.COM 	int rc = 0;
12877836SJohn.Forte@Sun.COM 
12887836SJohn.Forte@Sun.COM 	if (h->rdc_flags & RDC_ALLOC) {
12897836SJohn.Forte@Sun.COM 		if (h->rdc_bufp) {
12907836SJohn.Forte@Sun.COM 			rc = nsc_free_buf(h->rdc_bufp);
12917836SJohn.Forte@Sun.COM 		}
12927836SJohn.Forte@Sun.COM 		h->rdc_flags &= ~(RDC_ALLOC);
12937836SJohn.Forte@Sun.COM 
12947836SJohn.Forte@Sun.COM 		if (!RDC_SUCCESS(rc)) {
12957836SJohn.Forte@Sun.COM #ifdef DEBUG
12967836SJohn.Forte@Sun.COM 			cmn_err(CE_WARN,
1297*9093SRamana.Srikanth@Sun.COM 			    "!_rdc_free_buf(%p): nsc_free_buf(%p) returned %d",
1298*9093SRamana.Srikanth@Sun.COM 			    (void *) h, (void *) h->rdc_bufp, rc);
12997836SJohn.Forte@Sun.COM #endif
13007836SJohn.Forte@Sun.COM 			return (rc);
13017836SJohn.Forte@Sun.COM 		}
13027836SJohn.Forte@Sun.COM 	}
13037836SJohn.Forte@Sun.COM 
13047836SJohn.Forte@Sun.COM 	if (h->rdc_flags & (RDC_REMOTE_BUF|RDC_VEC_ALLOC)) {
13057836SJohn.Forte@Sun.COM 		if (h->rdc_flags & RDC_VEC_ALLOC) {
13067836SJohn.Forte@Sun.COM 			kmem_free(h->rdc_bufh.sb_vec, h->rdc_vsize);
13077836SJohn.Forte@Sun.COM 		}
13087836SJohn.Forte@Sun.COM 		h->rdc_flags &= ~(RDC_REMOTE_BUF|RDC_VEC_ALLOC);
13097836SJohn.Forte@Sun.COM 	}
13107836SJohn.Forte@Sun.COM 
13117836SJohn.Forte@Sun.COM 	if (h->rdc_anon) {
13127836SJohn.Forte@Sun.COM 		/* anon buffers still pending */
13137836SJohn.Forte@Sun.COM 		DTRACE_PROBE1(rdc_free_buf_err, aio_buf_t, h->rdc_anon);
13147836SJohn.Forte@Sun.COM 	}
13157836SJohn.Forte@Sun.COM 
13167836SJohn.Forte@Sun.COM 	if ((h->rdc_bufh.sb_flag & NSC_HALLOCATED) == 0) {
13177836SJohn.Forte@Sun.COM 		rc = _rdc_free_handle(h, h->rdc_fd);
13187836SJohn.Forte@Sun.COM 		if (!RDC_SUCCESS(rc)) {
13197836SJohn.Forte@Sun.COM #ifdef DEBUG
13207836SJohn.Forte@Sun.COM 			cmn_err(CE_WARN,
1321*9093SRamana.Srikanth@Sun.COM 			    "!_rdc_free_buf(%p): _rdc_free_handle returned %d",
1322*9093SRamana.Srikanth@Sun.COM 			    (void *) h, rc);
13237836SJohn.Forte@Sun.COM #endif
13247836SJohn.Forte@Sun.COM 			return (rc);
13257836SJohn.Forte@Sun.COM 		}
13267836SJohn.Forte@Sun.COM 	} else {
13277836SJohn.Forte@Sun.COM 		h->rdc_bufh.sb_flag = NSC_HALLOCATED;
13287836SJohn.Forte@Sun.COM 		h->rdc_bufh.sb_vec = NULL;
13297836SJohn.Forte@Sun.COM 		h->rdc_bufh.sb_error = 0;
13307836SJohn.Forte@Sun.COM 		h->rdc_bufh.sb_pos = 0;
13317836SJohn.Forte@Sun.COM 		h->rdc_bufh.sb_len = 0;
13327836SJohn.Forte@Sun.COM 		h->rdc_anon = NULL;
13337836SJohn.Forte@Sun.COM 		h->rdc_vsize = 0;
13347836SJohn.Forte@Sun.COM 
13357836SJohn.Forte@Sun.COM 		cv_destroy(&h->rdc_sync.cv);
13367836SJohn.Forte@Sun.COM 		mutex_destroy(&h->rdc_sync.lock);
13377836SJohn.Forte@Sun.COM 
13387836SJohn.Forte@Sun.COM 	}
13397836SJohn.Forte@Sun.COM 
13407836SJohn.Forte@Sun.COM 	return (0);
13417836SJohn.Forte@Sun.COM }
13427836SJohn.Forte@Sun.COM 
13437836SJohn.Forte@Sun.COM 
13447836SJohn.Forte@Sun.COM /*
13457836SJohn.Forte@Sun.COM  * _rdc_open
13467836SJohn.Forte@Sun.COM  *	Open a device
13477836SJohn.Forte@Sun.COM  *
13487836SJohn.Forte@Sun.COM  * Calling/Exit State:
13497836SJohn.Forte@Sun.COM  *	Returns a token to identify the device.
13507836SJohn.Forte@Sun.COM  *
13517836SJohn.Forte@Sun.COM  * Description:
13527836SJohn.Forte@Sun.COM  *	Performs the housekeeping operations associated with an upper layer
13537836SJohn.Forte@Sun.COM  *	of the nsctl stack opening a device.
13547836SJohn.Forte@Sun.COM  */
13557836SJohn.Forte@Sun.COM 
13567836SJohn.Forte@Sun.COM /* ARGSUSED */
13577836SJohn.Forte@Sun.COM 
13587836SJohn.Forte@Sun.COM static int
_rdc_open(char * path,int flag,blind_t * cdp,nsc_iodev_t * iodev)13597836SJohn.Forte@Sun.COM _rdc_open(char *path, int flag, blind_t *cdp, nsc_iodev_t *iodev)
13607836SJohn.Forte@Sun.COM {
13617836SJohn.Forte@Sun.COM 	rdc_k_info_t *krdc;
13627836SJohn.Forte@Sun.COM #ifdef DEBUG
13637836SJohn.Forte@Sun.COM 	rdc_u_info_t *urdc;
13647836SJohn.Forte@Sun.COM #endif
13657836SJohn.Forte@Sun.COM 	rdc_fd_t *rfd;
13667836SJohn.Forte@Sun.COM 	int raw = ((flag & NSC_CACHE) == 0);
13677836SJohn.Forte@Sun.COM 	int index;
13687836SJohn.Forte@Sun.COM 	int bmp = 0;
13697836SJohn.Forte@Sun.COM 	int queue = 0;
13707836SJohn.Forte@Sun.COM 
13717836SJohn.Forte@Sun.COM 	rfd = kmem_zalloc(sizeof (*rfd), KM_SLEEP);
13727836SJohn.Forte@Sun.COM 	if (!rfd)
13737836SJohn.Forte@Sun.COM 		return (ENOMEM);
13747836SJohn.Forte@Sun.COM 
13757836SJohn.Forte@Sun.COM 	/*
13767836SJohn.Forte@Sun.COM 	 * Take config lock to prevent a race with the
13777836SJohn.Forte@Sun.COM 	 * (de)configuration code.
13787836SJohn.Forte@Sun.COM 	 */
13797836SJohn.Forte@Sun.COM 
13807836SJohn.Forte@Sun.COM 	mutex_enter(&rdc_conf_lock);
13817836SJohn.Forte@Sun.COM 
13827836SJohn.Forte@Sun.COM 	index = rdc_lookup_enabled(path, 0);
13837836SJohn.Forte@Sun.COM 	if (index < 0) {
13847836SJohn.Forte@Sun.COM 		index = rdc_lookup_bitmap(path);
13857836SJohn.Forte@Sun.COM 		if (index >= 0)
13867836SJohn.Forte@Sun.COM 			bmp = 1;
13877836SJohn.Forte@Sun.COM 	}
13887836SJohn.Forte@Sun.COM 	if (index < 0) {
13897836SJohn.Forte@Sun.COM 		index = rdc_lookup_diskq(path);
13907836SJohn.Forte@Sun.COM 		if (index >= 0)
13917836SJohn.Forte@Sun.COM 			queue = 1;
13927836SJohn.Forte@Sun.COM 	}
13937836SJohn.Forte@Sun.COM 	if (index < 0) {
13947836SJohn.Forte@Sun.COM 		/* not found in config */
13957836SJohn.Forte@Sun.COM 		mutex_exit(&rdc_conf_lock);
13967836SJohn.Forte@Sun.COM 		kmem_free(rfd, sizeof (*rfd));
13977836SJohn.Forte@Sun.COM 		return (ENXIO);
13987836SJohn.Forte@Sun.COM 	}
13997836SJohn.Forte@Sun.COM #ifdef DEBUG
14007836SJohn.Forte@Sun.COM 	urdc = &rdc_u_info[index];
14017836SJohn.Forte@Sun.COM #endif
14027836SJohn.Forte@Sun.COM 	krdc = &rdc_k_info[index];
14037836SJohn.Forte@Sun.COM 
14047836SJohn.Forte@Sun.COM 	mutex_exit(&rdc_conf_lock);
14057836SJohn.Forte@Sun.COM 
14067836SJohn.Forte@Sun.COM 	rdc_group_enter(krdc);
14077836SJohn.Forte@Sun.COM 
14087836SJohn.Forte@Sun.COM 	ASSERT(IS_ENABLED(urdc));
14097836SJohn.Forte@Sun.COM 
14107836SJohn.Forte@Sun.COM 	if (bmp) {
14117836SJohn.Forte@Sun.COM 		krdc->b_ref++;
14127836SJohn.Forte@Sun.COM 	} else if (raw) {
14137836SJohn.Forte@Sun.COM 		krdc->r_ref++;
14147836SJohn.Forte@Sun.COM 	} else if (!queue) {
14157836SJohn.Forte@Sun.COM 		krdc->c_ref++;
14167836SJohn.Forte@Sun.COM 	}
14177836SJohn.Forte@Sun.COM 
14187836SJohn.Forte@Sun.COM 	rfd->rdc_info = krdc;
14197836SJohn.Forte@Sun.COM 	if (bmp)
14207836SJohn.Forte@Sun.COM 		rfd->rdc_type = RDC_BMP;
14217836SJohn.Forte@Sun.COM 	else if (queue)
14227836SJohn.Forte@Sun.COM 		rfd->rdc_type = RDC_QUE;
14237836SJohn.Forte@Sun.COM 	else
14247836SJohn.Forte@Sun.COM 		rfd->rdc_oflags = flag;
14257836SJohn.Forte@Sun.COM 
14267836SJohn.Forte@Sun.COM 	rdc_group_exit(krdc);
14277836SJohn.Forte@Sun.COM 
14287836SJohn.Forte@Sun.COM 	*cdp = (blind_t)rfd;
14297836SJohn.Forte@Sun.COM 
14307836SJohn.Forte@Sun.COM 	return (0);
14317836SJohn.Forte@Sun.COM }
14327836SJohn.Forte@Sun.COM 
14337836SJohn.Forte@Sun.COM static int
_rdc_openc(char * path,int flag,blind_t * cdp,nsc_iodev_t * iodev)14347836SJohn.Forte@Sun.COM _rdc_openc(char *path, int flag, blind_t *cdp, nsc_iodev_t *iodev)
14357836SJohn.Forte@Sun.COM {
14367836SJohn.Forte@Sun.COM 	return (_rdc_open(path, NSC_CACHE|flag, cdp, iodev));
14377836SJohn.Forte@Sun.COM }
14387836SJohn.Forte@Sun.COM 
14397836SJohn.Forte@Sun.COM static int
_rdc_openr(char * path,int flag,blind_t * cdp,nsc_iodev_t * iodev)14407836SJohn.Forte@Sun.COM _rdc_openr(char *path, int flag, blind_t *cdp, nsc_iodev_t *iodev)
14417836SJohn.Forte@Sun.COM {
14427836SJohn.Forte@Sun.COM 	return (_rdc_open(path, NSC_DEVICE|flag, cdp, iodev));
14437836SJohn.Forte@Sun.COM }
14447836SJohn.Forte@Sun.COM 
14457836SJohn.Forte@Sun.COM 
14467836SJohn.Forte@Sun.COM /*
14477836SJohn.Forte@Sun.COM  * _rdc_close
14487836SJohn.Forte@Sun.COM  *	Close a device
14497836SJohn.Forte@Sun.COM  *
14507836SJohn.Forte@Sun.COM  * Calling/Exit State:
14517836SJohn.Forte@Sun.COM  *	Always succeeds - returns 0
14527836SJohn.Forte@Sun.COM  *
14537836SJohn.Forte@Sun.COM  * Description:
14547836SJohn.Forte@Sun.COM  *	Performs the housekeeping operations associated with an upper layer
14557836SJohn.Forte@Sun.COM  *	of the sd stack closing a shadowed device.
14567836SJohn.Forte@Sun.COM  */
14577836SJohn.Forte@Sun.COM 
14587836SJohn.Forte@Sun.COM static int
_rdc_close(rfd)14597836SJohn.Forte@Sun.COM _rdc_close(rfd)
14607836SJohn.Forte@Sun.COM rdc_fd_t *rfd;
14617836SJohn.Forte@Sun.COM {
14627836SJohn.Forte@Sun.COM 	rdc_k_info_t *krdc = rfd->rdc_info;
14637836SJohn.Forte@Sun.COM 	int bmp = RDC_IS_BMP(rfd);
14647836SJohn.Forte@Sun.COM 	int raw = RDC_IS_RAW(rfd);
14657836SJohn.Forte@Sun.COM 	int queue = RDC_IS_QUE(rfd);
14667836SJohn.Forte@Sun.COM 
14677836SJohn.Forte@Sun.COM 	/*
14687836SJohn.Forte@Sun.COM 	 * we don't keep ref counts for the queue, so skip this stuff.
14697836SJohn.Forte@Sun.COM 	 * we may not even have a valid krdc at this point
14707836SJohn.Forte@Sun.COM 	 */
14717836SJohn.Forte@Sun.COM 	if (queue)
14727836SJohn.Forte@Sun.COM 		goto queue;
14737836SJohn.Forte@Sun.COM 	rdc_group_enter(krdc);
14747836SJohn.Forte@Sun.COM 
14757836SJohn.Forte@Sun.COM 	if (bmp) {
14767836SJohn.Forte@Sun.COM 		krdc->b_ref--;
14777836SJohn.Forte@Sun.COM 	} else if (raw && !queue) {
14787836SJohn.Forte@Sun.COM 		krdc->r_ref--;
14797836SJohn.Forte@Sun.COM 	} else if (!queue) {
14807836SJohn.Forte@Sun.COM 		krdc->c_ref--;
14817836SJohn.Forte@Sun.COM 	}
14827836SJohn.Forte@Sun.COM 
14837836SJohn.Forte@Sun.COM 	if (krdc->closing) {
14847836SJohn.Forte@Sun.COM 		cv_broadcast(&krdc->closingcv);
14857836SJohn.Forte@Sun.COM 	}
14867836SJohn.Forte@Sun.COM 
14877836SJohn.Forte@Sun.COM 	rdc_group_exit(krdc);
14887836SJohn.Forte@Sun.COM queue:
14897836SJohn.Forte@Sun.COM 	kmem_free(rfd, sizeof (*rfd));
14907836SJohn.Forte@Sun.COM 	return (0);
14917836SJohn.Forte@Sun.COM }
14927836SJohn.Forte@Sun.COM 
14937836SJohn.Forte@Sun.COM /*
14947836SJohn.Forte@Sun.COM  * _rdc_alloc_handle
14957836SJohn.Forte@Sun.COM  *	Allocate a handle
14967836SJohn.Forte@Sun.COM  *
14977836SJohn.Forte@Sun.COM  */
14987836SJohn.Forte@Sun.COM 
14997836SJohn.Forte@Sun.COM static nsc_buf_t *
_rdc_alloc_handle(void (* d_cb)(),void (* r_cb)(),void (* w_cb)(),rdc_fd_t * rfd)15007836SJohn.Forte@Sun.COM _rdc_alloc_handle(void (*d_cb)(), void (*r_cb)(), void (*w_cb)(), rdc_fd_t *rfd)
15017836SJohn.Forte@Sun.COM {
15027836SJohn.Forte@Sun.COM 	rdc_buf_t *h;
15037836SJohn.Forte@Sun.COM 
15047836SJohn.Forte@Sun.COM 	h = kmem_zalloc(sizeof (*h), KM_SLEEP);
15057836SJohn.Forte@Sun.COM 	if (!h)
15067836SJohn.Forte@Sun.COM 		return (NULL);
15077836SJohn.Forte@Sun.COM 
15087836SJohn.Forte@Sun.COM 	h->rdc_bufp = nsc_alloc_handle(RDC_FD(rfd), d_cb, r_cb, w_cb);
15097836SJohn.Forte@Sun.COM 	if (!h->rdc_bufp) {
15107836SJohn.Forte@Sun.COM 		if (!IS_RFAILED(rfd->rdc_info)) {
15117836SJohn.Forte@Sun.COM 			/*
15127836SJohn.Forte@Sun.COM 			 * This is a real failure from the io provider below.
15137836SJohn.Forte@Sun.COM 			 */
15147836SJohn.Forte@Sun.COM 			kmem_free(h, sizeof (*h));
15157836SJohn.Forte@Sun.COM 			return (NULL);
15167836SJohn.Forte@Sun.COM 		} else {
15177836SJohn.Forte@Sun.COM 			/* EMPTY */
15187836SJohn.Forte@Sun.COM 			/*
15197836SJohn.Forte@Sun.COM 			 * This is just a failed primary device where
15207836SJohn.Forte@Sun.COM 			 * we can do remote io to the secondary.
15217836SJohn.Forte@Sun.COM 			 */
15227836SJohn.Forte@Sun.COM 		}
15237836SJohn.Forte@Sun.COM 	}
15247836SJohn.Forte@Sun.COM 
15257836SJohn.Forte@Sun.COM 	h->rdc_bufh.sb_flag = NSC_HALLOCATED;
15267836SJohn.Forte@Sun.COM 	h->rdc_fd = rfd;
15277836SJohn.Forte@Sun.COM 	mutex_init(&h->aio_lock, NULL, MUTEX_DRIVER, NULL);
15287836SJohn.Forte@Sun.COM 
15297836SJohn.Forte@Sun.COM 	return (&h->rdc_bufh);
15307836SJohn.Forte@Sun.COM }
15317836SJohn.Forte@Sun.COM 
15327836SJohn.Forte@Sun.COM 
15337836SJohn.Forte@Sun.COM /*
15347836SJohn.Forte@Sun.COM  * _rdc_free_handle
15357836SJohn.Forte@Sun.COM  *	Free a handle
15367836SJohn.Forte@Sun.COM  *
15377836SJohn.Forte@Sun.COM  */
15387836SJohn.Forte@Sun.COM 
15397836SJohn.Forte@Sun.COM /* ARGSUSED */
15407836SJohn.Forte@Sun.COM static int
_rdc_free_handle(rdc_buf_t * h,rdc_fd_t * rfd)15417836SJohn.Forte@Sun.COM _rdc_free_handle(rdc_buf_t *h, rdc_fd_t *rfd)
15427836SJohn.Forte@Sun.COM {
15437836SJohn.Forte@Sun.COM 	int rc;
15447836SJohn.Forte@Sun.COM 
15457836SJohn.Forte@Sun.COM 	mutex_destroy(&h->aio_lock);
15467836SJohn.Forte@Sun.COM 	if (h->rdc_bufp) {
15477836SJohn.Forte@Sun.COM 		rc = nsc_free_handle(h->rdc_bufp);
15487836SJohn.Forte@Sun.COM 		if (!RDC_SUCCESS(rc))
15497836SJohn.Forte@Sun.COM 			return (rc);
15507836SJohn.Forte@Sun.COM 	}
15517836SJohn.Forte@Sun.COM 	kmem_free(h, sizeof (rdc_buf_t));
15527836SJohn.Forte@Sun.COM 	return (0);
15537836SJohn.Forte@Sun.COM }
15547836SJohn.Forte@Sun.COM 
15557836SJohn.Forte@Sun.COM 
15567836SJohn.Forte@Sun.COM /*
15577836SJohn.Forte@Sun.COM  * _rdc_attach
15587836SJohn.Forte@Sun.COM  *	Attach
15597836SJohn.Forte@Sun.COM  *
15607836SJohn.Forte@Sun.COM  * Calling/Exit State:
15617836SJohn.Forte@Sun.COM  *	Returns 0 for success, errno on failure.
15627836SJohn.Forte@Sun.COM  *
15637836SJohn.Forte@Sun.COM  * Description:
15647836SJohn.Forte@Sun.COM  */
15657836SJohn.Forte@Sun.COM 
15667836SJohn.Forte@Sun.COM static int
_rdc_attach(rdc_fd_t * rfd,nsc_iodev_t * iodev)15677836SJohn.Forte@Sun.COM _rdc_attach(rdc_fd_t *rfd, nsc_iodev_t *iodev)
15687836SJohn.Forte@Sun.COM {
15697836SJohn.Forte@Sun.COM 	rdc_k_info_t *krdc;
15707836SJohn.Forte@Sun.COM 	int raw = RDC_IS_RAW(rfd);
15717836SJohn.Forte@Sun.COM 	int rc;
15727836SJohn.Forte@Sun.COM 
15737836SJohn.Forte@Sun.COM 	if ((RDC_IS_BMP(rfd)) || RDC_IS_QUE(rfd))
15747836SJohn.Forte@Sun.COM 		return (EINVAL);
15757836SJohn.Forte@Sun.COM 
15767836SJohn.Forte@Sun.COM 	krdc = rfd->rdc_info;
15777836SJohn.Forte@Sun.COM 	if (krdc == NULL)
15787836SJohn.Forte@Sun.COM 		return (EINVAL);
15797836SJohn.Forte@Sun.COM 
15807836SJohn.Forte@Sun.COM 	mutex_enter(&krdc->devices->id_rlock);
15817836SJohn.Forte@Sun.COM 	krdc->iodev = iodev;
15827836SJohn.Forte@Sun.COM 	mutex_exit(&krdc->devices->id_rlock);
15837836SJohn.Forte@Sun.COM 
15847836SJohn.Forte@Sun.COM 	rc = _rdc_rsrv_devs(krdc, (raw ? RDC_RAW : RDC_CACHE), RDC_EXTERNAL);
15857836SJohn.Forte@Sun.COM 	return (rc);
15867836SJohn.Forte@Sun.COM }
15877836SJohn.Forte@Sun.COM 
15887836SJohn.Forte@Sun.COM 
15897836SJohn.Forte@Sun.COM /*
15907836SJohn.Forte@Sun.COM  * _rdc_detach
15917836SJohn.Forte@Sun.COM  *	Detach
15927836SJohn.Forte@Sun.COM  *
15937836SJohn.Forte@Sun.COM  * Calling/Exit State:
15947836SJohn.Forte@Sun.COM  *	Returns 0 for success, always succeeds
15957836SJohn.Forte@Sun.COM  *
15967836SJohn.Forte@Sun.COM  * Description:
15977836SJohn.Forte@Sun.COM  */
15987836SJohn.Forte@Sun.COM 
15997836SJohn.Forte@Sun.COM static int
_rdc_detach(rdc_fd_t * rfd,nsc_iodev_t * iodev)16007836SJohn.Forte@Sun.COM _rdc_detach(rdc_fd_t *rfd, nsc_iodev_t *iodev)
16017836SJohn.Forte@Sun.COM {
16027836SJohn.Forte@Sun.COM 	rdc_k_info_t *krdc = rfd->rdc_info;
16037836SJohn.Forte@Sun.COM 	int raw = RDC_IS_RAW(rfd);
16047836SJohn.Forte@Sun.COM 
16057836SJohn.Forte@Sun.COM 	/*
16067836SJohn.Forte@Sun.COM 	 * Flush the async queue if necessary.
16077836SJohn.Forte@Sun.COM 	 */
16087836SJohn.Forte@Sun.COM 
16097836SJohn.Forte@Sun.COM 	if (IS_ASYNC(&rdc_u_info[krdc->index]) && !RDC_IS_DISKQ(krdc->group)) {
16107836SJohn.Forte@Sun.COM 		int tries = 1;
16117836SJohn.Forte@Sun.COM 
16127836SJohn.Forte@Sun.COM 		while (krdc->group->ra_queue.blocks != 0 && tries--) {
16137836SJohn.Forte@Sun.COM 			if (!krdc->group->rdc_writer)
16147836SJohn.Forte@Sun.COM 				(void) rdc_writer(krdc->index);
16157836SJohn.Forte@Sun.COM 
16167836SJohn.Forte@Sun.COM 			(void) rdc_drain_queue(krdc->index);
16177836SJohn.Forte@Sun.COM 		}
16187836SJohn.Forte@Sun.COM 
16197836SJohn.Forte@Sun.COM 		/* force disgard of possibly blocked flusher threads */
16207836SJohn.Forte@Sun.COM 		if (rdc_drain_queue(krdc->index) != 0) {
16217836SJohn.Forte@Sun.COM #ifdef DEBUG
16227836SJohn.Forte@Sun.COM 			net_queue *qp = &krdc->group->ra_queue;
16237836SJohn.Forte@Sun.COM #endif
16247836SJohn.Forte@Sun.COM 			do {
16257836SJohn.Forte@Sun.COM 				mutex_enter(&krdc->group->ra_queue.net_qlock);
16267836SJohn.Forte@Sun.COM 				krdc->group->asyncdis = 1;
16277836SJohn.Forte@Sun.COM 				cv_broadcast(&krdc->group->asyncqcv);
16287836SJohn.Forte@Sun.COM 				mutex_exit(&krdc->group->ra_queue.net_qlock);
16297836SJohn.Forte@Sun.COM 				cmn_err(CE_WARN,
1630*9093SRamana.Srikanth@Sun.COM 				    "!RDC: async I/O pending and not drained "
1631*9093SRamana.Srikanth@Sun.COM 				    "for %s during detach",
1632*9093SRamana.Srikanth@Sun.COM 				    rdc_u_info[krdc->index].primary.file);
16337836SJohn.Forte@Sun.COM #ifdef DEBUG
16347836SJohn.Forte@Sun.COM 				cmn_err(CE_WARN,
1635*9093SRamana.Srikanth@Sun.COM 				    "!nitems: %" NSC_SZFMT " nblocks: %"
1636*9093SRamana.Srikanth@Sun.COM 				    NSC_SZFMT " head: 0x%p tail: 0x%p",
1637*9093SRamana.Srikanth@Sun.COM 				    qp->nitems, qp->blocks,
1638*9093SRamana.Srikanth@Sun.COM 				    (void *)qp->net_qhead,
1639*9093SRamana.Srikanth@Sun.COM 				    (void *)qp->net_qtail);
16407836SJohn.Forte@Sun.COM #endif
16417836SJohn.Forte@Sun.COM 			} while (krdc->group->rdc_thrnum > 0);
16427836SJohn.Forte@Sun.COM 		}
16437836SJohn.Forte@Sun.COM 	}
16447836SJohn.Forte@Sun.COM 
16457836SJohn.Forte@Sun.COM 	mutex_enter(&krdc->devices->id_rlock);
16467836SJohn.Forte@Sun.COM 	if (krdc->iodev != iodev)
1647*9093SRamana.Srikanth@Sun.COM 		cmn_err(CE_WARN, "!_rdc_detach: iodev mismatch %p : %p",
16487836SJohn.Forte@Sun.COM 		    (void *) krdc->iodev, (void *) iodev);
16497836SJohn.Forte@Sun.COM 
16507836SJohn.Forte@Sun.COM 	krdc->iodev = NULL;
16517836SJohn.Forte@Sun.COM 	mutex_exit(&krdc->devices->id_rlock);
16527836SJohn.Forte@Sun.COM 
16537836SJohn.Forte@Sun.COM 	_rdc_rlse_devs(krdc, (raw ? RDC_RAW : RDC_CACHE));
16547836SJohn.Forte@Sun.COM 
16557836SJohn.Forte@Sun.COM 	return (0);
16567836SJohn.Forte@Sun.COM }
16577836SJohn.Forte@Sun.COM 
16587836SJohn.Forte@Sun.COM /*
16597836SJohn.Forte@Sun.COM  * _rdc_get_pinned
16607836SJohn.Forte@Sun.COM  *
16617836SJohn.Forte@Sun.COM  * only affects local node.
16627836SJohn.Forte@Sun.COM  */
16637836SJohn.Forte@Sun.COM 
16647836SJohn.Forte@Sun.COM static int
_rdc_get_pinned(rdc_fd_t * rfd)16657836SJohn.Forte@Sun.COM _rdc_get_pinned(rdc_fd_t *rfd)
16667836SJohn.Forte@Sun.COM {
16677836SJohn.Forte@Sun.COM 	return (nsc_get_pinned(RDC_FD(rfd)));
16687836SJohn.Forte@Sun.COM }
16697836SJohn.Forte@Sun.COM 
16707836SJohn.Forte@Sun.COM /*
16717836SJohn.Forte@Sun.COM  * _rdc_discard_pinned
16727836SJohn.Forte@Sun.COM  *
16737836SJohn.Forte@Sun.COM  * only affects local node.
16747836SJohn.Forte@Sun.COM  */
16757836SJohn.Forte@Sun.COM 
16767836SJohn.Forte@Sun.COM static int
_rdc_discard_pinned(rdc_fd_t * rfd,nsc_off_t pos,nsc_size_t len)16777836SJohn.Forte@Sun.COM _rdc_discard_pinned(rdc_fd_t *rfd, nsc_off_t pos, nsc_size_t len)
16787836SJohn.Forte@Sun.COM {
16797836SJohn.Forte@Sun.COM 	return (nsc_discard_pinned(RDC_FD(rfd), pos, len));
16807836SJohn.Forte@Sun.COM }
16817836SJohn.Forte@Sun.COM 
16827836SJohn.Forte@Sun.COM /*
16837836SJohn.Forte@Sun.COM  * _rdc_partsize
16847836SJohn.Forte@Sun.COM  *
16857836SJohn.Forte@Sun.COM  * only affects the local node.
16867836SJohn.Forte@Sun.COM  */
16877836SJohn.Forte@Sun.COM 
16887836SJohn.Forte@Sun.COM static int
_rdc_partsize(rdc_fd_t * rfd,nsc_size_t * ptr)16897836SJohn.Forte@Sun.COM _rdc_partsize(rdc_fd_t *rfd, nsc_size_t *ptr)
16907836SJohn.Forte@Sun.COM {
16917836SJohn.Forte@Sun.COM 	rdc_u_info_t *urdc;
16927836SJohn.Forte@Sun.COM 
16937836SJohn.Forte@Sun.COM 	urdc = &rdc_u_info[rfd->rdc_info->index];
16947836SJohn.Forte@Sun.COM 	/* Always return saved size */
16957836SJohn.Forte@Sun.COM 	ASSERT(urdc->volume_size != 0);
16967836SJohn.Forte@Sun.COM 	*ptr = urdc->volume_size;
16977836SJohn.Forte@Sun.COM 	return (0);
16987836SJohn.Forte@Sun.COM }
16997836SJohn.Forte@Sun.COM 
17007836SJohn.Forte@Sun.COM /*
17017836SJohn.Forte@Sun.COM  * _rdc_maxfbas
17027836SJohn.Forte@Sun.COM  *
17037836SJohn.Forte@Sun.COM  * only affects local node
17047836SJohn.Forte@Sun.COM  */
17057836SJohn.Forte@Sun.COM 
17067836SJohn.Forte@Sun.COM /* ARGSUSED */
17077836SJohn.Forte@Sun.COM static int
_rdc_maxfbas(rdc_fd_t * rfd,int flag,nsc_size_t * ptr)17087836SJohn.Forte@Sun.COM _rdc_maxfbas(rdc_fd_t *rfd, int flag, nsc_size_t *ptr)
17097836SJohn.Forte@Sun.COM {
17107836SJohn.Forte@Sun.COM 	rdc_k_info_t *krdc = rfd->rdc_info;
17117836SJohn.Forte@Sun.COM 	int raw = RDC_IS_RAW(rfd);
17127836SJohn.Forte@Sun.COM 	int rtype = raw ? RDC_RAW : RDC_CACHE;
17137836SJohn.Forte@Sun.COM 	int rc = 0;
17147836SJohn.Forte@Sun.COM 
17157836SJohn.Forte@Sun.COM 	if (krdc == NULL)
17167836SJohn.Forte@Sun.COM 		return (EINVAL);
17177836SJohn.Forte@Sun.COM 	if (flag == NSC_RDAHEAD || flag == NSC_CACHEBLK) {
17187836SJohn.Forte@Sun.COM 		rc = _rdc_rsrv_devs(krdc, rtype, RDC_INTERNAL);
17197836SJohn.Forte@Sun.COM 		if (rc == 0) {
17207836SJohn.Forte@Sun.COM 			rc = nsc_maxfbas(RDC_U_FD(krdc), flag, ptr);
17217836SJohn.Forte@Sun.COM 			_rdc_rlse_devs(krdc, rtype);
17227836SJohn.Forte@Sun.COM 		}
17237836SJohn.Forte@Sun.COM 	} else {
17247836SJohn.Forte@Sun.COM 		/* Always return saved size */
17257836SJohn.Forte@Sun.COM 		ASSERT(krdc->maxfbas != 0);
17267836SJohn.Forte@Sun.COM 		*ptr = krdc->maxfbas - 1;
17277836SJohn.Forte@Sun.COM 	}
17287836SJohn.Forte@Sun.COM 
17297836SJohn.Forte@Sun.COM 	return (rc);
17307836SJohn.Forte@Sun.COM }
17317836SJohn.Forte@Sun.COM 
17327836SJohn.Forte@Sun.COM /* ARGSUSED */
17337836SJohn.Forte@Sun.COM static int
_rdc_control(rdc_fd_t * rfd,int cmd,void * ptr,int len)17347836SJohn.Forte@Sun.COM _rdc_control(rdc_fd_t *rfd, int cmd, void *ptr, int len)
17357836SJohn.Forte@Sun.COM {
17367836SJohn.Forte@Sun.COM 	return (nsc_control(RDC_FD(rfd),  cmd, ptr, len));
17377836SJohn.Forte@Sun.COM }
17387836SJohn.Forte@Sun.COM 
17397836SJohn.Forte@Sun.COM /*
17407836SJohn.Forte@Sun.COM  * _rdc_attach_fd
17417836SJohn.Forte@Sun.COM  *
17427836SJohn.Forte@Sun.COM  * called by nsctl as part of nsc_reserve() processing when one of
17437836SJohn.Forte@Sun.COM  * SNDR's underlying file descriptors becomes available and metadata
17447836SJohn.Forte@Sun.COM  * should be re-acquired.
17457836SJohn.Forte@Sun.COM  */
17467836SJohn.Forte@Sun.COM static int
_rdc_attach_fd(blind_t arg)17477836SJohn.Forte@Sun.COM _rdc_attach_fd(blind_t arg)
17487836SJohn.Forte@Sun.COM {
17497836SJohn.Forte@Sun.COM 	_rdc_info_dev_t *dip = (_rdc_info_dev_t *)arg;
17507836SJohn.Forte@Sun.COM 	rdc_k_info_t *krdc;
17517836SJohn.Forte@Sun.COM 	rdc_u_info_t *urdc;
17527836SJohn.Forte@Sun.COM 	nsc_size_t maxfbas, partsize;
17537836SJohn.Forte@Sun.COM 	int rc;
17547836SJohn.Forte@Sun.COM 
17557836SJohn.Forte@Sun.COM 	krdc = dip->bi_krdc;
17567836SJohn.Forte@Sun.COM 	urdc = &rdc_u_info[krdc->index];
17577836SJohn.Forte@Sun.COM 
17587836SJohn.Forte@Sun.COM 	if ((rc = nsc_partsize(dip->bi_fd, &partsize)) != 0) {
17597836SJohn.Forte@Sun.COM 		cmn_err(CE_WARN,
1760*9093SRamana.Srikanth@Sun.COM 		    "!SNDR: cannot get volume size of %s, error %d",
17617836SJohn.Forte@Sun.COM 		    nsc_pathname(dip->bi_fd), rc);
17627836SJohn.Forte@Sun.COM 	} else if (urdc->volume_size == 0 && partsize > 0) {
17637836SJohn.Forte@Sun.COM 		/* set volume size for the first time */
17647836SJohn.Forte@Sun.COM 		urdc->volume_size = partsize;
17657836SJohn.Forte@Sun.COM 	} else if (urdc->volume_size != partsize) {
17667836SJohn.Forte@Sun.COM 		/*
17677836SJohn.Forte@Sun.COM 		 * SNDR cannot yet cope with a volume being resized,
17687836SJohn.Forte@Sun.COM 		 * so fail it.
17697836SJohn.Forte@Sun.COM 		 */
17707836SJohn.Forte@Sun.COM 		if (!(rdc_get_vflags(urdc) & RDC_VOL_FAILED)) {
17717836SJohn.Forte@Sun.COM 			rdc_many_enter(krdc);
17727836SJohn.Forte@Sun.COM 			if (rdc_get_vflags(urdc) & RDC_PRIMARY)
17737836SJohn.Forte@Sun.COM 				rdc_set_mflags(urdc, RDC_RSYNC_NEEDED);
17747836SJohn.Forte@Sun.COM 			else
17757836SJohn.Forte@Sun.COM 				rdc_set_mflags(urdc, RDC_SYNC_NEEDED);
17767836SJohn.Forte@Sun.COM 			rdc_set_flags_log(urdc, RDC_VOL_FAILED,
17777836SJohn.Forte@Sun.COM 			    "volume resized");
17787836SJohn.Forte@Sun.COM 			rdc_many_exit(krdc);
17797836SJohn.Forte@Sun.COM 			rdc_write_state(urdc);
17807836SJohn.Forte@Sun.COM 		}
17817836SJohn.Forte@Sun.COM 
17827836SJohn.Forte@Sun.COM 		cmn_err(CE_WARN,
1783*9093SRamana.Srikanth@Sun.COM 		    "!SNDR: %s changed size from %" NSC_SZFMT " to %" NSC_SZFMT,
17847836SJohn.Forte@Sun.COM 		    nsc_pathname(dip->bi_fd), urdc->volume_size, partsize);
17857836SJohn.Forte@Sun.COM 	}
17867836SJohn.Forte@Sun.COM 
17877836SJohn.Forte@Sun.COM 	if ((rc = nsc_maxfbas(dip->bi_fd, 0, &maxfbas)) != 0) {
17887836SJohn.Forte@Sun.COM 		cmn_err(CE_WARN,
1789*9093SRamana.Srikanth@Sun.COM 		    "!SNDR: cannot get max transfer size for %s, error %d",
17907836SJohn.Forte@Sun.COM 		    nsc_pathname(dip->bi_fd), rc);
17917836SJohn.Forte@Sun.COM 	} else if (maxfbas > 0) {
17927836SJohn.Forte@Sun.COM 		krdc->maxfbas = min(RDC_MAX_MAXFBAS, maxfbas);
17937836SJohn.Forte@Sun.COM 	}
17947836SJohn.Forte@Sun.COM 
17957836SJohn.Forte@Sun.COM 	return (0);
17967836SJohn.Forte@Sun.COM }
17977836SJohn.Forte@Sun.COM 
17987836SJohn.Forte@Sun.COM 
17997836SJohn.Forte@Sun.COM /*
18007836SJohn.Forte@Sun.COM  * _rdc_pinned
18017836SJohn.Forte@Sun.COM  *
18027836SJohn.Forte@Sun.COM  * only affects local node
18037836SJohn.Forte@Sun.COM  */
18047836SJohn.Forte@Sun.COM 
18057836SJohn.Forte@Sun.COM static void
_rdc_pinned(_rdc_info_dev_t * dip,nsc_off_t pos,nsc_size_t len)18067836SJohn.Forte@Sun.COM _rdc_pinned(_rdc_info_dev_t *dip, nsc_off_t pos, nsc_size_t len)
18077836SJohn.Forte@Sun.COM {
18087836SJohn.Forte@Sun.COM 	nsc_pinned_data(dip->bi_krdc->iodev, pos, len);
18097836SJohn.Forte@Sun.COM }
18107836SJohn.Forte@Sun.COM 
18117836SJohn.Forte@Sun.COM 
18127836SJohn.Forte@Sun.COM /*
18137836SJohn.Forte@Sun.COM  * _rdc_unpinned
18147836SJohn.Forte@Sun.COM  *
18157836SJohn.Forte@Sun.COM  * only affects local node.
18167836SJohn.Forte@Sun.COM  */
18177836SJohn.Forte@Sun.COM 
18187836SJohn.Forte@Sun.COM static void
_rdc_unpinned(_rdc_info_dev_t * dip,nsc_off_t pos,nsc_size_t len)18197836SJohn.Forte@Sun.COM _rdc_unpinned(_rdc_info_dev_t *dip, nsc_off_t pos, nsc_size_t len)
18207836SJohn.Forte@Sun.COM {
18217836SJohn.Forte@Sun.COM 	nsc_unpinned_data(dip->bi_krdc->iodev, pos, len);
18227836SJohn.Forte@Sun.COM }
18237836SJohn.Forte@Sun.COM 
18247836SJohn.Forte@Sun.COM 
18257836SJohn.Forte@Sun.COM /*
18267836SJohn.Forte@Sun.COM  * _rdc_read
18277836SJohn.Forte@Sun.COM  *
18287836SJohn.Forte@Sun.COM  * read the specified data into the buffer - go remote if local down,
18297836SJohn.Forte@Sun.COM  * or the remote end has more recent data because an reverse sync is
18307836SJohn.Forte@Sun.COM  * in progress.
18317836SJohn.Forte@Sun.COM  */
18327836SJohn.Forte@Sun.COM 
18337836SJohn.Forte@Sun.COM static int
_rdc_read(rdc_buf_t * h,nsc_off_t pos,nsc_size_t len,int flag)18347836SJohn.Forte@Sun.COM _rdc_read(rdc_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag)
18357836SJohn.Forte@Sun.COM {
18367836SJohn.Forte@Sun.COM 	rdc_k_info_t *krdc = h->rdc_fd->rdc_info;
18377836SJohn.Forte@Sun.COM 	rdc_u_info_t *urdc = &rdc_u_info[krdc->index];
18387836SJohn.Forte@Sun.COM 	int remote = (RDC_REMOTE(h) || (rdc_get_mflags(urdc) & RDC_SLAVE));
18397836SJohn.Forte@Sun.COM 	int rc1, rc2;
18407836SJohn.Forte@Sun.COM 
18417836SJohn.Forte@Sun.COM 	rc1 = rc2 = 0;
18427836SJohn.Forte@Sun.COM 
18437836SJohn.Forte@Sun.COM 	if (!RDC_HANDLE_LIMITS(&h->rdc_bufh, pos, len)) {
18447836SJohn.Forte@Sun.COM 		cmn_err(CE_WARN,
1845*9093SRamana.Srikanth@Sun.COM 		    "!_rdc_read: bounds check: io(handle) pos %" NSC_XSZFMT
18467836SJohn.Forte@Sun.COM 		    "(%" NSC_XSZFMT ") len %" NSC_XSZFMT "(%" NSC_XSZFMT ")",
1847*9093SRamana.Srikanth@Sun.COM 		    pos, h->rdc_bufh.sb_pos, len, h->rdc_bufh.sb_len);
18487836SJohn.Forte@Sun.COM 		h->rdc_bufh.sb_error = EINVAL;
18497836SJohn.Forte@Sun.COM 		return (h->rdc_bufh.sb_error);
18507836SJohn.Forte@Sun.COM 	}
18517836SJohn.Forte@Sun.COM 
18527836SJohn.Forte@Sun.COM 	if (flag & NSC_NOBLOCK) {
18537836SJohn.Forte@Sun.COM 		cmn_err(CE_WARN,
1854*9093SRamana.Srikanth@Sun.COM 		    "!_rdc_read: removing unsupported NSC_NOBLOCK flag");
18557836SJohn.Forte@Sun.COM 		flag &= ~(NSC_NOBLOCK);
18567836SJohn.Forte@Sun.COM 	}
18577836SJohn.Forte@Sun.COM 
18587836SJohn.Forte@Sun.COM 
18597836SJohn.Forte@Sun.COM 	if (!remote) {
18607836SJohn.Forte@Sun.COM 		rc1 = nsc_read(h->rdc_bufp, pos, len, flag);
18617836SJohn.Forte@Sun.COM 	}
18627836SJohn.Forte@Sun.COM 
18637836SJohn.Forte@Sun.COM 	if (remote || !RDC_SUCCESS(rc1)) {
18647836SJohn.Forte@Sun.COM 		rc2 = _rdc_remote_read(krdc, &h->rdc_bufh, pos, len, flag);
18657836SJohn.Forte@Sun.COM 	}
18667836SJohn.Forte@Sun.COM 
18677836SJohn.Forte@Sun.COM 	if (remote && !RDC_SUCCESS(rc2))
18687836SJohn.Forte@Sun.COM 		h->rdc_bufh.sb_error = rc2;
18697836SJohn.Forte@Sun.COM 	else if (!RDC_SUCCESS(rc1) && !RDC_SUCCESS(rc2))
18707836SJohn.Forte@Sun.COM 		h->rdc_bufh.sb_error = rc1;
18717836SJohn.Forte@Sun.COM 
18727836SJohn.Forte@Sun.COM 	return (h->rdc_bufh.sb_error);
18737836SJohn.Forte@Sun.COM }
18747836SJohn.Forte@Sun.COM 
18757836SJohn.Forte@Sun.COM 
18767836SJohn.Forte@Sun.COM static int
_rdc_remote_write(rdc_k_info_t * krdc,rdc_buf_t * h,nsc_buf_t * nsc_h,nsc_off_t pos,nsc_size_t len,int flag,uint_t bitmask)18777836SJohn.Forte@Sun.COM _rdc_remote_write(rdc_k_info_t *krdc, rdc_buf_t *h, nsc_buf_t *nsc_h,
18787836SJohn.Forte@Sun.COM     nsc_off_t pos, nsc_size_t len, int flag, uint_t bitmask)
18797836SJohn.Forte@Sun.COM {
18807836SJohn.Forte@Sun.COM 	rdc_u_info_t *urdc = &rdc_u_info[krdc->index];
18817836SJohn.Forte@Sun.COM 	int rc = 0;
18827836SJohn.Forte@Sun.COM 	nsc_size_t plen, syncblockpos;
18837836SJohn.Forte@Sun.COM 	aio_buf_t *anon = NULL;
18847836SJohn.Forte@Sun.COM 
18857836SJohn.Forte@Sun.COM 	if (!(rdc_get_vflags(urdc) & RDC_PRIMARY))
18867836SJohn.Forte@Sun.COM 		return (EINVAL);
18877836SJohn.Forte@Sun.COM 
18887836SJohn.Forte@Sun.COM 	if ((rdc_get_vflags(urdc) & RDC_LOGGING) &&
18897836SJohn.Forte@Sun.COM 	    (!IS_STATE(urdc, RDC_QUEUING))) {
18907836SJohn.Forte@Sun.COM 		goto done;
18917836SJohn.Forte@Sun.COM 	}
18927836SJohn.Forte@Sun.COM 
18937836SJohn.Forte@Sun.COM 	/*
18947836SJohn.Forte@Sun.COM 	 * this check for RDC_SYNCING may seem redundant, but there is a window
18957836SJohn.Forte@Sun.COM 	 * in rdc_sync, where an async set has not yet been transformed into a
18967836SJohn.Forte@Sun.COM 	 * sync set.
18977836SJohn.Forte@Sun.COM 	 */
18987836SJohn.Forte@Sun.COM 	if ((!IS_ASYNC(urdc) || IS_STATE(urdc, RDC_SYNCING)) ||
18997836SJohn.Forte@Sun.COM 	    RDC_REMOTE(h) ||
19007836SJohn.Forte@Sun.COM 	    krdc->group->synccount > 0 ||
19017836SJohn.Forte@Sun.COM 	    (rdc_get_vflags(urdc) & RDC_SLAVE) ||
19027836SJohn.Forte@Sun.COM 	    (rdc_get_vflags(urdc) & RDC_VOL_FAILED) ||
19037836SJohn.Forte@Sun.COM 	    (rdc_get_vflags(urdc) & RDC_BMP_FAILED)) {
19047836SJohn.Forte@Sun.COM 
19057836SJohn.Forte@Sun.COM 		/* sync mode, or remote io mode, or local device is dead */
19067836SJohn.Forte@Sun.COM 		rc = rdc_net_write(krdc->index, krdc->remote_index,
19077836SJohn.Forte@Sun.COM 		    nsc_h, pos, len, RDC_NOSEQ, RDC_NOQUE, NULL);
19087836SJohn.Forte@Sun.COM 
19097836SJohn.Forte@Sun.COM 		if ((rc == 0) &&
19107836SJohn.Forte@Sun.COM 		    !(rdc_get_vflags(urdc) & RDC_BMP_FAILED) &&
19117836SJohn.Forte@Sun.COM 		    !(rdc_get_vflags(urdc) & RDC_VOL_FAILED)) {
19127836SJohn.Forte@Sun.COM 			if (IS_STATE(urdc, RDC_SYNCING) &&
19137836SJohn.Forte@Sun.COM 			    !IS_STATE(urdc, RDC_FULL) ||
19147836SJohn.Forte@Sun.COM 			    !IS_STATE(urdc, RDC_SLAVE)) {
19157836SJohn.Forte@Sun.COM 				mutex_enter(&krdc->syncbitmutex);
19167836SJohn.Forte@Sun.COM 
19177836SJohn.Forte@Sun.COM 				syncblockpos = LOG_TO_FBA_NUM(krdc->syncbitpos);
19187836SJohn.Forte@Sun.COM 
19197836SJohn.Forte@Sun.COM 				DTRACE_PROBE4(rdc_remote_write,
1920*9093SRamana.Srikanth@Sun.COM 				    nsc_off_t, krdc->syncbitpos,
1921*9093SRamana.Srikanth@Sun.COM 				    nsc_off_t, syncblockpos,
1922*9093SRamana.Srikanth@Sun.COM 				    nsc_off_t, pos,
1923*9093SRamana.Srikanth@Sun.COM 				    nsc_size_t, len);
19247836SJohn.Forte@Sun.COM 
19257836SJohn.Forte@Sun.COM 				/*
19267836SJohn.Forte@Sun.COM 				 * If the current I/O's position plus length is
19277836SJohn.Forte@Sun.COM 				 * greater then the sync block position, only
19287836SJohn.Forte@Sun.COM 				 * clear those blocks upto sync block position
19297836SJohn.Forte@Sun.COM 				 */
19307836SJohn.Forte@Sun.COM 				if (pos < syncblockpos) {
19317836SJohn.Forte@Sun.COM 					if ((pos + len) > syncblockpos)
19327836SJohn.Forte@Sun.COM 						plen = syncblockpos - pos;
19337836SJohn.Forte@Sun.COM 					else
19347836SJohn.Forte@Sun.COM 						plen = len;
19357836SJohn.Forte@Sun.COM 					RDC_CLR_BITMAP(krdc, pos, plen, bitmask,
19367836SJohn.Forte@Sun.COM 					    RDC_BIT_BUMP);
19377836SJohn.Forte@Sun.COM 				}
19387836SJohn.Forte@Sun.COM 				mutex_exit(&krdc->syncbitmutex);
19397836SJohn.Forte@Sun.COM 			} else {
19407836SJohn.Forte@Sun.COM 				RDC_CLR_BITMAP(krdc, pos, len, bitmask,
19417836SJohn.Forte@Sun.COM 				    RDC_BIT_BUMP);
19427836SJohn.Forte@Sun.COM 			}
19437836SJohn.Forte@Sun.COM 		} else if (rc != 0) {
19447836SJohn.Forte@Sun.COM 			rdc_group_enter(krdc);
19457836SJohn.Forte@Sun.COM 			rdc_set_flags_log(urdc, RDC_LOGGING,
19467836SJohn.Forte@Sun.COM 			    "net write failed");
19477836SJohn.Forte@Sun.COM 			rdc_write_state(urdc);
19487836SJohn.Forte@Sun.COM 			if (rdc_get_vflags(urdc) & RDC_SYNCING)
19497836SJohn.Forte@Sun.COM 				krdc->disk_status = 1;
19507836SJohn.Forte@Sun.COM 			rdc_group_exit(krdc);
19517836SJohn.Forte@Sun.COM 		}
19527836SJohn.Forte@Sun.COM 	} else if (!IS_STATE(urdc, RDC_SYNCING)) {
19537836SJohn.Forte@Sun.COM 		DTRACE_PROBE1(async_enque_start, rdc_buf_t *, h);
19547836SJohn.Forte@Sun.COM 
19557836SJohn.Forte@Sun.COM 		ASSERT(krdc->group->synccount == 0);
19567836SJohn.Forte@Sun.COM 		/* async mode */
19577836SJohn.Forte@Sun.COM 		if ((h == NULL) || ((h->rdc_flags & RDC_ASYNC_VEC) == 0)) {
19587836SJohn.Forte@Sun.COM 
19597836SJohn.Forte@Sun.COM 			rc = _rdc_enqueue_write(krdc, pos, len, flag, NULL);
19607836SJohn.Forte@Sun.COM 
19617836SJohn.Forte@Sun.COM 		} else {
19627836SJohn.Forte@Sun.COM 			anon = rdc_aio_buf_get(h, krdc->index);
19637836SJohn.Forte@Sun.COM 			if (anon == NULL) {
19647836SJohn.Forte@Sun.COM #ifdef DEBUG
19657836SJohn.Forte@Sun.COM 				cmn_err(CE_WARN,
1966*9093SRamana.Srikanth@Sun.COM 				    "!enqueue write failed for handle %p",
1967*9093SRamana.Srikanth@Sun.COM 				    (void *) h);
19687836SJohn.Forte@Sun.COM #endif
19697836SJohn.Forte@Sun.COM 				return (EINVAL);
19707836SJohn.Forte@Sun.COM 			}
19717836SJohn.Forte@Sun.COM 			rc = _rdc_enqueue_write(krdc, pos, len, flag,
19727836SJohn.Forte@Sun.COM 			    anon->rdc_abufp);
19737836SJohn.Forte@Sun.COM 
19747836SJohn.Forte@Sun.COM 			/*
19757836SJohn.Forte@Sun.COM 			 * get rid of the aio_buf_t now, as this
19767836SJohn.Forte@Sun.COM 			 * may not be the set that this rdc_buf
19777836SJohn.Forte@Sun.COM 			 * was allocated on, we are done with it anyways
19787836SJohn.Forte@Sun.COM 			 * enqueuing code frees the nsc_abuf
19797836SJohn.Forte@Sun.COM 			 */
19807836SJohn.Forte@Sun.COM 			rdc_aio_buf_del(h, krdc);
19817836SJohn.Forte@Sun.COM 		}
19827836SJohn.Forte@Sun.COM 
19837836SJohn.Forte@Sun.COM 	} else {
19847836SJohn.Forte@Sun.COM 		ASSERT(IS_STATE(urdc, RDC_SYNCING));
19857836SJohn.Forte@Sun.COM 		ASSERT(0);
19867836SJohn.Forte@Sun.COM 	}
19877836SJohn.Forte@Sun.COM 
19887836SJohn.Forte@Sun.COM done:
19897836SJohn.Forte@Sun.COM 	if ((anon == NULL) && h && (h->rdc_flags & RDC_ASYNC_VEC)) {
19907836SJohn.Forte@Sun.COM 		/*
19917836SJohn.Forte@Sun.COM 		 * Toss the anonymous buffer if we have one allocated.
19927836SJohn.Forte@Sun.COM 		 */
19937836SJohn.Forte@Sun.COM 		anon = rdc_aio_buf_get(h, krdc->index);
19947836SJohn.Forte@Sun.COM 		if (anon) {
19957836SJohn.Forte@Sun.COM 			(void) nsc_free_buf(anon->rdc_abufp);
19967836SJohn.Forte@Sun.COM 			rdc_aio_buf_del(h, krdc);
19977836SJohn.Forte@Sun.COM 		}
19987836SJohn.Forte@Sun.COM 	}
19997836SJohn.Forte@Sun.COM 
20007836SJohn.Forte@Sun.COM 	return (rc);
20017836SJohn.Forte@Sun.COM }
20027836SJohn.Forte@Sun.COM 
20037836SJohn.Forte@Sun.COM /*
20047836SJohn.Forte@Sun.COM  * _rdc_multi_write
20057836SJohn.Forte@Sun.COM  *
20067836SJohn.Forte@Sun.COM  * Send to multihop remote. Obeys 1 to many if present and we are crazy
20077836SJohn.Forte@Sun.COM  * enough to support it.
20087836SJohn.Forte@Sun.COM  *
20097836SJohn.Forte@Sun.COM  */
20107836SJohn.Forte@Sun.COM int
_rdc_multi_write(nsc_buf_t * h,nsc_off_t pos,nsc_size_t len,int flag,rdc_k_info_t * krdc)20117836SJohn.Forte@Sun.COM _rdc_multi_write(nsc_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag,
20127836SJohn.Forte@Sun.COM     rdc_k_info_t *krdc)
20137836SJohn.Forte@Sun.COM {
20147836SJohn.Forte@Sun.COM 	rdc_u_info_t *urdc = &rdc_u_info[krdc->index];
20157836SJohn.Forte@Sun.COM 	rdc_k_info_t *this = krdc;	/* krdc that was requested */
20167836SJohn.Forte@Sun.COM 	int rc, retval;
20177836SJohn.Forte@Sun.COM 	uint_t bitmask;
20187836SJohn.Forte@Sun.COM 
20197836SJohn.Forte@Sun.COM 	retval = rc = 0;
20207836SJohn.Forte@Sun.COM 	if (!RDC_HANDLE_LIMITS(h, pos, len)) {
20217836SJohn.Forte@Sun.COM 		cmn_err(CE_WARN,
2022*9093SRamana.Srikanth@Sun.COM 		    "!_rdc_multi_write: bounds check: io(handle) pos %"
2023*9093SRamana.Srikanth@Sun.COM 		    NSC_XSZFMT "(%" NSC_XSZFMT ") len %" NSC_XSZFMT "(%"
2024*9093SRamana.Srikanth@Sun.COM 		    NSC_XSZFMT ")", pos, h->sb_pos, len, h->sb_len);
20257836SJohn.Forte@Sun.COM 		return (EINVAL);
20267836SJohn.Forte@Sun.COM 	}
20277836SJohn.Forte@Sun.COM 
20287836SJohn.Forte@Sun.COM 	/* if this is a 1 to many, set all the bits for all the sets */
20297836SJohn.Forte@Sun.COM 	do {
20307836SJohn.Forte@Sun.COM 		if (RDC_SET_BITMAP(krdc, pos, len, &bitmask) < 0) {
20317836SJohn.Forte@Sun.COM 			(void) nsc_uncommit(h, pos, len, flag);
20327836SJohn.Forte@Sun.COM 			/* set the error, but try other sets */
20337836SJohn.Forte@Sun.COM 			retval = EIO;
20347836SJohn.Forte@Sun.COM 		}
20357836SJohn.Forte@Sun.COM 		if (IS_MANY(krdc) && IS_STATE(urdc, RDC_PRIMARY)) {
20367836SJohn.Forte@Sun.COM 			rdc_many_enter(krdc);
20377836SJohn.Forte@Sun.COM 			for (krdc = krdc->many_next; krdc != this;
20387836SJohn.Forte@Sun.COM 			    krdc = krdc->many_next) {
20397836SJohn.Forte@Sun.COM 				urdc = &rdc_u_info[krdc->index];
20407836SJohn.Forte@Sun.COM 				if (!IS_ENABLED(urdc))
20417836SJohn.Forte@Sun.COM 					continue;
20427836SJohn.Forte@Sun.COM 				break;
20437836SJohn.Forte@Sun.COM 			}
20447836SJohn.Forte@Sun.COM 			rdc_many_exit(krdc);
20457836SJohn.Forte@Sun.COM 		}
20467836SJohn.Forte@Sun.COM 	} while (krdc != this);
20477836SJohn.Forte@Sun.COM 
20487836SJohn.Forte@Sun.COM 	urdc = &rdc_u_info[krdc->index];
20497836SJohn.Forte@Sun.COM 
20507836SJohn.Forte@Sun.COM 	if (flag & NSC_NOBLOCK) {
20517836SJohn.Forte@Sun.COM 		cmn_err(CE_WARN,
2052*9093SRamana.Srikanth@Sun.COM 		    "!_rdc_multi_write: removing unsupported NSC_NOBLOCK flag");
20537836SJohn.Forte@Sun.COM 		flag &= ~(NSC_NOBLOCK);
20547836SJohn.Forte@Sun.COM 	}
20557836SJohn.Forte@Sun.COM 
20567836SJohn.Forte@Sun.COM multiwrite1:
20577836SJohn.Forte@Sun.COM 	if ((rdc_get_vflags(urdc) & RDC_PRIMARY) &&
20587836SJohn.Forte@Sun.COM 	    (!IS_STATE(urdc, RDC_LOGGING) ||
20597836SJohn.Forte@Sun.COM 	    (IS_STATE(urdc, RDC_LOGGING) &&
20607836SJohn.Forte@Sun.COM 	    IS_STATE(urdc, RDC_QUEUING)))) {
20617836SJohn.Forte@Sun.COM 		rc = _rdc_remote_write(krdc, NULL, h, pos, len, flag, bitmask);
20627836SJohn.Forte@Sun.COM 	}
20637836SJohn.Forte@Sun.COM 
20647836SJohn.Forte@Sun.COM 	if (!RDC_SUCCESS(rc) && retval == 0) {
20657836SJohn.Forte@Sun.COM 		retval = rc;
20667836SJohn.Forte@Sun.COM 	}
20677836SJohn.Forte@Sun.COM 
20687836SJohn.Forte@Sun.COM multiwrite2:
20697836SJohn.Forte@Sun.COM 	if (IS_MANY(krdc) && (rdc_get_vflags(urdc) && RDC_PRIMARY)) {
20707836SJohn.Forte@Sun.COM 		rdc_many_enter(krdc);
20717836SJohn.Forte@Sun.COM 		for (krdc = krdc->many_next; krdc != this;
20727836SJohn.Forte@Sun.COM 		    krdc = krdc->many_next) {
20737836SJohn.Forte@Sun.COM 			urdc = &rdc_u_info[krdc->index];
20747836SJohn.Forte@Sun.COM 			if (!IS_ENABLED(urdc))
20757836SJohn.Forte@Sun.COM 				continue;
20767836SJohn.Forte@Sun.COM 			rc = 0;
20777836SJohn.Forte@Sun.COM 			rdc_many_exit(krdc);
20787836SJohn.Forte@Sun.COM 
20797836SJohn.Forte@Sun.COM 			goto multiwrite1;
20807836SJohn.Forte@Sun.COM 		}
20817836SJohn.Forte@Sun.COM 		rdc_many_exit(krdc);
20827836SJohn.Forte@Sun.COM 	}
20837836SJohn.Forte@Sun.COM 
20847836SJohn.Forte@Sun.COM 	return (retval);
20857836SJohn.Forte@Sun.COM }
20867836SJohn.Forte@Sun.COM 
20877836SJohn.Forte@Sun.COM void
_rdc_diskq_enqueue_thr(rdc_aio_t * p)20887836SJohn.Forte@Sun.COM _rdc_diskq_enqueue_thr(rdc_aio_t *p)
20897836SJohn.Forte@Sun.COM {
20907836SJohn.Forte@Sun.COM 	rdc_thrsync_t *sync = (rdc_thrsync_t *)p->next;
20917836SJohn.Forte@Sun.COM 	rdc_k_info_t *krdc = &rdc_k_info[p->index];
20927836SJohn.Forte@Sun.COM 	int rc2;
20937836SJohn.Forte@Sun.COM 
20947836SJohn.Forte@Sun.COM 
20957836SJohn.Forte@Sun.COM 	rc2 = rdc_diskq_enqueue(krdc, p);
20967836SJohn.Forte@Sun.COM 
20977836SJohn.Forte@Sun.COM 	/*
20987836SJohn.Forte@Sun.COM 	 * overload flag with error return if any
20997836SJohn.Forte@Sun.COM 	 */
21007836SJohn.Forte@Sun.COM 	if (!RDC_SUCCESS(rc2)) {
21017836SJohn.Forte@Sun.COM 		p->flag = rc2;
21027836SJohn.Forte@Sun.COM 	} else {
21037836SJohn.Forte@Sun.COM 		p->flag = 0;
21047836SJohn.Forte@Sun.COM 	}
21057836SJohn.Forte@Sun.COM 	mutex_enter(&sync->lock);
21067836SJohn.Forte@Sun.COM 	sync->complete++;
21077836SJohn.Forte@Sun.COM 	cv_broadcast(&sync->cv);
21087836SJohn.Forte@Sun.COM 	mutex_exit(&sync->lock);
21097836SJohn.Forte@Sun.COM }
21107836SJohn.Forte@Sun.COM 
21117836SJohn.Forte@Sun.COM /*
21127836SJohn.Forte@Sun.COM  * _rdc_sync_write_thr
21137836SJohn.Forte@Sun.COM  * syncronous write thread which writes to network while
21147836SJohn.Forte@Sun.COM  * local write is occuring
21157836SJohn.Forte@Sun.COM  */
21167836SJohn.Forte@Sun.COM void
_rdc_sync_write_thr(rdc_aio_t * p)21177836SJohn.Forte@Sun.COM _rdc_sync_write_thr(rdc_aio_t *p)
21187836SJohn.Forte@Sun.COM {
21197836SJohn.Forte@Sun.COM 	rdc_thrsync_t *sync = (rdc_thrsync_t *)p->next;
21207836SJohn.Forte@Sun.COM 	rdc_buf_t *h = (rdc_buf_t *)p->handle;
21217836SJohn.Forte@Sun.COM 	rdc_k_info_t *krdc = &rdc_k_info[p->index];
21227836SJohn.Forte@Sun.COM #ifdef	DEBUG
21237836SJohn.Forte@Sun.COM 	rdc_u_info_t *urdc;
21247836SJohn.Forte@Sun.COM #endif
21257836SJohn.Forte@Sun.COM 	int rc2;
21267836SJohn.Forte@Sun.COM 	int bitmask;
21277836SJohn.Forte@Sun.COM 
21287836SJohn.Forte@Sun.COM 	rdc_group_enter(krdc);
21297836SJohn.Forte@Sun.COM 	krdc->aux_state |= RDC_AUXWRITE;
21307836SJohn.Forte@Sun.COM #ifdef	DEBUG
21317836SJohn.Forte@Sun.COM 	urdc = &rdc_u_info[krdc->index];
21327836SJohn.Forte@Sun.COM 	if (!IS_ENABLED(urdc)) {
2133*9093SRamana.Srikanth@Sun.COM 		cmn_err(CE_WARN, "!rdc_sync_write_thr: set not enabled %s:%s",
21347836SJohn.Forte@Sun.COM 		    urdc->secondary.file,
21357836SJohn.Forte@Sun.COM 		    urdc->secondary.bitmap);
21367836SJohn.Forte@Sun.COM 	}
21377836SJohn.Forte@Sun.COM #endif
21387836SJohn.Forte@Sun.COM 	rdc_group_exit(krdc);
21397836SJohn.Forte@Sun.COM 	bitmask = p->iostatus;	/* overload */
21407836SJohn.Forte@Sun.COM 	rc2 = _rdc_remote_write(krdc, h, &h->rdc_bufh, p->pos, p->len,
2141*9093SRamana.Srikanth@Sun.COM 	    p->flag, bitmask);
21427836SJohn.Forte@Sun.COM 
21437836SJohn.Forte@Sun.COM 
21447836SJohn.Forte@Sun.COM 	/*
21457836SJohn.Forte@Sun.COM 	 * overload flag with error return if any
21467836SJohn.Forte@Sun.COM 	 */
21477836SJohn.Forte@Sun.COM 	if (!RDC_SUCCESS(rc2)) {
21487836SJohn.Forte@Sun.COM 		p->flag = rc2;
21497836SJohn.Forte@Sun.COM 	} else {
21507836SJohn.Forte@Sun.COM 		p->flag = 0;
21517836SJohn.Forte@Sun.COM 	}
21527836SJohn.Forte@Sun.COM 
21537836SJohn.Forte@Sun.COM 	rdc_group_enter(krdc);
21547836SJohn.Forte@Sun.COM 	krdc->aux_state &= ~RDC_AUXWRITE;
21557836SJohn.Forte@Sun.COM 	rdc_group_exit(krdc);
21567836SJohn.Forte@Sun.COM 
21577836SJohn.Forte@Sun.COM 	mutex_enter(&sync->lock);
21587836SJohn.Forte@Sun.COM 	sync->complete++;
21597836SJohn.Forte@Sun.COM 	cv_broadcast(&sync->cv);
21607836SJohn.Forte@Sun.COM 	mutex_exit(&sync->lock);
21617836SJohn.Forte@Sun.COM }
21627836SJohn.Forte@Sun.COM 
21637836SJohn.Forte@Sun.COM /*
21647836SJohn.Forte@Sun.COM  * _rdc_write
21657836SJohn.Forte@Sun.COM  *
21667836SJohn.Forte@Sun.COM  * Commit changes to the buffer locally and send remote.
21677836SJohn.Forte@Sun.COM  *
21687836SJohn.Forte@Sun.COM  * If this write is whilst the local primary volume is being synced,
21697836SJohn.Forte@Sun.COM  * then we write the remote end first to ensure that the new data
21707836SJohn.Forte@Sun.COM  * cannot be overwritten by a concurrent sync operation.
21717836SJohn.Forte@Sun.COM  */
21727836SJohn.Forte@Sun.COM 
21737836SJohn.Forte@Sun.COM static int
_rdc_write(rdc_buf_t * h,nsc_off_t pos,nsc_size_t len,int flag)21747836SJohn.Forte@Sun.COM _rdc_write(rdc_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag)
21757836SJohn.Forte@Sun.COM {
21767836SJohn.Forte@Sun.COM 	rdc_k_info_t *krdc = h->rdc_fd->rdc_info;
21777836SJohn.Forte@Sun.COM 	rdc_u_info_t *urdc = &rdc_u_info[krdc->index];
21787836SJohn.Forte@Sun.COM 	rdc_k_info_t *this;
21797836SJohn.Forte@Sun.COM 	rdc_k_info_t *multi = NULL;
21807836SJohn.Forte@Sun.COM 	int remote = RDC_REMOTE(h);
21817836SJohn.Forte@Sun.COM 	int rc1, rc2;
21827836SJohn.Forte@Sun.COM 	uint_t bitmask;
21837836SJohn.Forte@Sun.COM 	int first;
21847836SJohn.Forte@Sun.COM 	int rsync;
21857836SJohn.Forte@Sun.COM 	int nthr;
21867836SJohn.Forte@Sun.COM 	int winddown;
21877836SJohn.Forte@Sun.COM 	int thrrc = 0;
21887836SJohn.Forte@Sun.COM 	rdc_aio_t *bp[SNDR_MAXTHREADS];
21897836SJohn.Forte@Sun.COM 	aio_buf_t *anon;
21907836SJohn.Forte@Sun.COM 	nsthread_t  *tp;
21917836SJohn.Forte@Sun.COM 	rdc_thrsync_t *sync = &h->rdc_sync;
21927836SJohn.Forte@Sun.COM 
21937836SJohn.Forte@Sun.COM 	/* If this is the multi-hop secondary, move along to the primary */
21947836SJohn.Forte@Sun.COM 	if (IS_MULTI(krdc) && !IS_PRIMARY(urdc)) {
21957836SJohn.Forte@Sun.COM 		multi = krdc;
21967836SJohn.Forte@Sun.COM 		krdc = krdc->multi_next;
21977836SJohn.Forte@Sun.COM 		urdc = &rdc_u_info[krdc->index];
21987836SJohn.Forte@Sun.COM 
21997836SJohn.Forte@Sun.COM 		if (!IS_ENABLED(urdc)) {
22007836SJohn.Forte@Sun.COM 			krdc = h->rdc_fd->rdc_info;
22017836SJohn.Forte@Sun.COM 			urdc = &rdc_u_info[krdc->index];
22027836SJohn.Forte@Sun.COM 			multi = NULL;
22037836SJohn.Forte@Sun.COM 		}
22047836SJohn.Forte@Sun.COM 	}
22057836SJohn.Forte@Sun.COM 	this = krdc;
22067836SJohn.Forte@Sun.COM 
22077836SJohn.Forte@Sun.COM 	rsync = (IS_PRIMARY(urdc)) && (IS_SLAVE(urdc));
22087836SJohn.Forte@Sun.COM 
22097836SJohn.Forte@Sun.COM 	/*
22107836SJohn.Forte@Sun.COM 	 * If this is a many group with a reverse sync in progress and
22117836SJohn.Forte@Sun.COM 	 * this is not the slave krdc/urdc, then search for the slave
22127836SJohn.Forte@Sun.COM 	 * so that we can do the remote io to the correct secondary
22137836SJohn.Forte@Sun.COM 	 * before the local io.
22147836SJohn.Forte@Sun.COM 	 */
22157836SJohn.Forte@Sun.COM 	if (rsync && !(IS_SLAVE(urdc))) {
22167836SJohn.Forte@Sun.COM 		rdc_many_enter(krdc);
22177836SJohn.Forte@Sun.COM 		for (krdc = krdc->many_next; krdc != this;
22187836SJohn.Forte@Sun.COM 		    krdc = krdc->many_next) {
22197836SJohn.Forte@Sun.COM 			urdc = &rdc_u_info[krdc->index];
22207836SJohn.Forte@Sun.COM 			if (!IS_ENABLED(urdc))
22217836SJohn.Forte@Sun.COM 				continue;
22227836SJohn.Forte@Sun.COM 			if (rdc_get_vflags(urdc) & RDC_SLAVE)
22237836SJohn.Forte@Sun.COM 				break;
22247836SJohn.Forte@Sun.COM 		}
22257836SJohn.Forte@Sun.COM 		rdc_many_exit(krdc);
22267836SJohn.Forte@Sun.COM 
22277836SJohn.Forte@Sun.COM 		this = krdc;
22287836SJohn.Forte@Sun.COM 	}
22297836SJohn.Forte@Sun.COM 
22307836SJohn.Forte@Sun.COM 	urdc = &rdc_u_info[krdc->index];
22317836SJohn.Forte@Sun.COM 
22327836SJohn.Forte@Sun.COM 	rc1 = rc2 = 0;
22337836SJohn.Forte@Sun.COM 	first = 1;
22347836SJohn.Forte@Sun.COM 	nthr = 0;
22357836SJohn.Forte@Sun.COM 	if (!RDC_HANDLE_LIMITS(&h->rdc_bufh, pos, len)) {
22367836SJohn.Forte@Sun.COM 		cmn_err(CE_WARN,
2237*9093SRamana.Srikanth@Sun.COM 		    "!_rdc_write: bounds check: io(handle) pos %" NSC_XSZFMT
2238*9093SRamana.Srikanth@Sun.COM 		    "(%" NSC_XSZFMT ") len %" NSC_XSZFMT "(%" NSC_XSZFMT ")",
2239*9093SRamana.Srikanth@Sun.COM 		    pos, h->rdc_bufh.sb_pos, len, h->rdc_bufh.sb_len);
22407836SJohn.Forte@Sun.COM 		h->rdc_bufh.sb_error = EINVAL;
22417836SJohn.Forte@Sun.COM 		return (h->rdc_bufh.sb_error);
22427836SJohn.Forte@Sun.COM 	}
22437836SJohn.Forte@Sun.COM 
22447836SJohn.Forte@Sun.COM 	DTRACE_PROBE(rdc_write_bitmap_start);
22457836SJohn.Forte@Sun.COM 
22467836SJohn.Forte@Sun.COM 	/* if this is a 1 to many, set all the bits for all the sets */
22477836SJohn.Forte@Sun.COM 	do {
22487836SJohn.Forte@Sun.COM 		if (RDC_SET_BITMAP(krdc, pos, len, &bitmask) < 0) {
22497836SJohn.Forte@Sun.COM 			if (rdc_eio_nobmp) {
2250*9093SRamana.Srikanth@Sun.COM 				(void) nsc_uncommit
2251*9093SRamana.Srikanth@Sun.COM 				    (h->rdc_bufp, pos, len, flag);
2252*9093SRamana.Srikanth@Sun.COM 				/* set the error, but try the other sets */
2253*9093SRamana.Srikanth@Sun.COM 				h->rdc_bufh.sb_error = EIO;
22547836SJohn.Forte@Sun.COM 			}
22557836SJohn.Forte@Sun.COM 		}
22567836SJohn.Forte@Sun.COM 
22577836SJohn.Forte@Sun.COM 		if (IS_MANY(krdc) && IS_STATE(urdc, RDC_PRIMARY)) {
22587836SJohn.Forte@Sun.COM 			rdc_many_enter(krdc);
22597836SJohn.Forte@Sun.COM 			for (krdc = krdc->many_next; krdc != this;
22607836SJohn.Forte@Sun.COM 			    krdc = krdc->many_next) {
22617836SJohn.Forte@Sun.COM 				urdc = &rdc_u_info[krdc->index];
22627836SJohn.Forte@Sun.COM 				if (!IS_ENABLED(urdc))
22637836SJohn.Forte@Sun.COM 					continue;
22647836SJohn.Forte@Sun.COM 				break;
22657836SJohn.Forte@Sun.COM 			}
22667836SJohn.Forte@Sun.COM 			rdc_many_exit(krdc);
22677836SJohn.Forte@Sun.COM 		}
22687836SJohn.Forte@Sun.COM 
22697836SJohn.Forte@Sun.COM 	} while (krdc != this);
22707836SJohn.Forte@Sun.COM 
22717836SJohn.Forte@Sun.COM 	urdc = &rdc_u_info[krdc->index];
22727836SJohn.Forte@Sun.COM 
22737836SJohn.Forte@Sun.COM 	DTRACE_PROBE(rdc_write_bitmap_end);
22747836SJohn.Forte@Sun.COM 
22757836SJohn.Forte@Sun.COM write1:
22767836SJohn.Forte@Sun.COM 	/* just in case we switch mode during write */
22777836SJohn.Forte@Sun.COM 	if (IS_ASYNC(urdc) && (!IS_STATE(urdc, RDC_SYNCING)) &&
22787836SJohn.Forte@Sun.COM 	    (!IS_STATE(urdc, RDC_LOGGING) ||
22797836SJohn.Forte@Sun.COM 	    IS_STATE(urdc, RDC_QUEUING))) {
22807836SJohn.Forte@Sun.COM 		h->rdc_flags |= RDC_ASYNC_BUF;
22817836SJohn.Forte@Sun.COM 	}
22827836SJohn.Forte@Sun.COM 	if (BUF_IS_ASYNC(h)) {
22837836SJohn.Forte@Sun.COM 		/*
22847836SJohn.Forte@Sun.COM 		 * We are async mode
22857836SJohn.Forte@Sun.COM 		 */
22867836SJohn.Forte@Sun.COM 		aio_buf_t *p;
22877836SJohn.Forte@Sun.COM 		DTRACE_PROBE(rdc_write_async_start);
22887836SJohn.Forte@Sun.COM 
22897836SJohn.Forte@Sun.COM 		if ((krdc->type_flag & RDC_DISABLEPEND) ||
22907836SJohn.Forte@Sun.COM 		    ((IS_STATE(urdc, RDC_LOGGING) &&
22917836SJohn.Forte@Sun.COM 		    !IS_STATE(urdc, RDC_QUEUING)))) {
22927836SJohn.Forte@Sun.COM 			goto localwrite;
22937836SJohn.Forte@Sun.COM 		}
22947836SJohn.Forte@Sun.COM 		if (IS_STATE(urdc, RDC_VOL_FAILED)) {
22957836SJohn.Forte@Sun.COM 			/*
22967836SJohn.Forte@Sun.COM 			 * overload remote as we don't want to do local
22977836SJohn.Forte@Sun.COM 			 * IO later. forge ahead with async
22987836SJohn.Forte@Sun.COM 			 */
22997836SJohn.Forte@Sun.COM 			remote++;
23007836SJohn.Forte@Sun.COM 		}
23017836SJohn.Forte@Sun.COM 		if ((IS_STATE(urdc, RDC_SYNCING)) ||
23027836SJohn.Forte@Sun.COM 		    (IS_STATE(urdc, RDC_LOGGING) &&
23037836SJohn.Forte@Sun.COM 		    !IS_STATE(urdc, RDC_QUEUING))) {
23047836SJohn.Forte@Sun.COM 			goto localwrite;
23057836SJohn.Forte@Sun.COM 		}
23067836SJohn.Forte@Sun.COM 
23077836SJohn.Forte@Sun.COM 		p = rdc_aio_buf_add(krdc->index, h);
23087836SJohn.Forte@Sun.COM 		if (p == NULL) {
23097836SJohn.Forte@Sun.COM #ifdef DEBUG
23107836SJohn.Forte@Sun.COM 			cmn_err(CE_WARN,
2311*9093SRamana.Srikanth@Sun.COM 			    "!rdc_alloc_buf  aio_buf allocation failed");
23127836SJohn.Forte@Sun.COM #endif
23137836SJohn.Forte@Sun.COM 			goto localwrite;
23147836SJohn.Forte@Sun.COM 		}
23157836SJohn.Forte@Sun.COM 
23167836SJohn.Forte@Sun.COM 		mutex_enter(&h->aio_lock);
23177836SJohn.Forte@Sun.COM 
23187836SJohn.Forte@Sun.COM 		DTRACE_PROBE(rdc_write_async__allocabuf_start);
23197836SJohn.Forte@Sun.COM 		rc1 = nsc_alloc_abuf(pos, len, 0, &p->rdc_abufp);
23207836SJohn.Forte@Sun.COM 		DTRACE_PROBE(rdc_write_async__allocabuf_end);
23217836SJohn.Forte@Sun.COM 		if (!RDC_SUCCESS(rc1)) {
23227836SJohn.Forte@Sun.COM #ifdef DEBUG
23237836SJohn.Forte@Sun.COM 			cmn_err(CE_WARN,
2324*9093SRamana.Srikanth@Sun.COM 			    "!rdc_alloc_buf NSC_ANON allocation failed rc %d",
23257836SJohn.Forte@Sun.COM 			    rc1);
23267836SJohn.Forte@Sun.COM #endif
23277836SJohn.Forte@Sun.COM 			mutex_exit(&h->aio_lock);
23287836SJohn.Forte@Sun.COM 			goto localwrite;
23297836SJohn.Forte@Sun.COM 		}
23307836SJohn.Forte@Sun.COM 		h->rdc_flags |= RDC_ASYNC_VEC;
23317836SJohn.Forte@Sun.COM 		mutex_exit(&h->aio_lock);
23327836SJohn.Forte@Sun.COM 
23337836SJohn.Forte@Sun.COM 		/*
23347836SJohn.Forte@Sun.COM 		 * Copy buffer into anonymous buffer
23357836SJohn.Forte@Sun.COM 		 */
23367836SJohn.Forte@Sun.COM 
23377836SJohn.Forte@Sun.COM 		DTRACE_PROBE(rdc_write_async_nsccopy_start);
23387836SJohn.Forte@Sun.COM 		rc1 =
23397836SJohn.Forte@Sun.COM 		    nsc_copy(&h->rdc_bufh, p->rdc_abufp, pos, pos, len);
23407836SJohn.Forte@Sun.COM 		DTRACE_PROBE(rdc_write_async_nsccopy_end);
23417836SJohn.Forte@Sun.COM 		if (!RDC_SUCCESS(rc1)) {
23427836SJohn.Forte@Sun.COM #ifdef DEBUG
23437836SJohn.Forte@Sun.COM 			cmn_err(CE_WARN,
2344*9093SRamana.Srikanth@Sun.COM 			    "!_rdc_write: nsc_copy failed rc=%d state %x",
23457836SJohn.Forte@Sun.COM 			    rc1, rdc_get_vflags(urdc));
23467836SJohn.Forte@Sun.COM #endif
23477836SJohn.Forte@Sun.COM 			rc1 = nsc_free_buf(p->rdc_abufp);
23487836SJohn.Forte@Sun.COM 			rdc_aio_buf_del(h, krdc);
23497836SJohn.Forte@Sun.COM 			rdc_group_enter(krdc);
23507836SJohn.Forte@Sun.COM 			rdc_group_log(krdc, RDC_FLUSH|RDC_OTHERREMOTE,
2351*9093SRamana.Srikanth@Sun.COM 			    "nsc_copy failure");
23527836SJohn.Forte@Sun.COM 			rdc_group_exit(krdc);
23537836SJohn.Forte@Sun.COM 		}
23547836SJohn.Forte@Sun.COM 		DTRACE_PROBE(rdc_write_async_end);
23557836SJohn.Forte@Sun.COM 
23567836SJohn.Forte@Sun.COM 		/*
23577836SJohn.Forte@Sun.COM 		 * using a diskq, launch a thread to queue it
23587836SJohn.Forte@Sun.COM 		 * and free the aio->h and aio
23597836SJohn.Forte@Sun.COM 		 * if the thread fails, do it the old way (see localwrite)
23607836SJohn.Forte@Sun.COM 		 */
23617836SJohn.Forte@Sun.COM 
23627836SJohn.Forte@Sun.COM 		if (RDC_IS_DISKQ(krdc->group)) {
23637836SJohn.Forte@Sun.COM 
23647836SJohn.Forte@Sun.COM 			if (nthr >= SNDR_MAXTHREADS) {
23657836SJohn.Forte@Sun.COM #ifdef DEBUG
2366*9093SRamana.Srikanth@Sun.COM 				cmn_err(CE_NOTE, "!nthr overrun in _rdc_write");
23677836SJohn.Forte@Sun.COM #endif
23687836SJohn.Forte@Sun.COM 				thrrc = ENOEXEC;
23697836SJohn.Forte@Sun.COM 				goto localwrite;
23707836SJohn.Forte@Sun.COM 			}
23717836SJohn.Forte@Sun.COM 
23727836SJohn.Forte@Sun.COM 			anon = rdc_aio_buf_get(h, krdc->index);
23737836SJohn.Forte@Sun.COM 			if (anon == NULL) {
23747836SJohn.Forte@Sun.COM #ifdef DEBUG
2375*9093SRamana.Srikanth@Sun.COM 				cmn_err(CE_WARN, "!rdc_aio_buf_get failed for "
23767836SJohn.Forte@Sun.COM 				    "%p", (void *)h);
23777836SJohn.Forte@Sun.COM #endif
23787836SJohn.Forte@Sun.COM 				thrrc = ENOEXEC;
23797836SJohn.Forte@Sun.COM 				goto localwrite;
23807836SJohn.Forte@Sun.COM 			}
23817836SJohn.Forte@Sun.COM 
23827836SJohn.Forte@Sun.COM 			/* get a populated rdc_aio_t */
23837836SJohn.Forte@Sun.COM 			bp[nthr] =
23847836SJohn.Forte@Sun.COM 			    rdc_aio_tbuf_get(sync, anon->rdc_abufp, pos, len,
23857836SJohn.Forte@Sun.COM 			    flag, krdc->index, bitmask);
23867836SJohn.Forte@Sun.COM 
23877836SJohn.Forte@Sun.COM 			if (bp[nthr] == NULL) {
23887836SJohn.Forte@Sun.COM #ifdef DEBUG
2389*9093SRamana.Srikanth@Sun.COM 				cmn_err(CE_NOTE, "!_rdcwrite: "
23907836SJohn.Forte@Sun.COM 				    "kmem_alloc failed bp aio (1)");
23917836SJohn.Forte@Sun.COM #endif
23927836SJohn.Forte@Sun.COM 				thrrc = ENOEXEC;
23937836SJohn.Forte@Sun.COM 				goto localwrite;
23947836SJohn.Forte@Sun.COM 			}
23957836SJohn.Forte@Sun.COM 			/* start the queue io */
23967836SJohn.Forte@Sun.COM 			tp = nst_create(_rdc_ioset, _rdc_diskq_enqueue_thr,
2397*9093SRamana.Srikanth@Sun.COM 			    (void *)bp[nthr], NST_SLEEP);
23987836SJohn.Forte@Sun.COM 
23997836SJohn.Forte@Sun.COM 			if (tp == NULL) {
24007836SJohn.Forte@Sun.COM #ifdef DEBUG
24017836SJohn.Forte@Sun.COM 				cmn_err(CE_NOTE,
2402*9093SRamana.Srikanth@Sun.COM 				    "!_rdcwrite: nst_create failure");
24037836SJohn.Forte@Sun.COM #endif
24047836SJohn.Forte@Sun.COM 				thrrc = ENOEXEC;
24057836SJohn.Forte@Sun.COM 			} else {
24067836SJohn.Forte@Sun.COM 				mutex_enter(&(sync->lock));
24077836SJohn.Forte@Sun.COM 				sync->threads++;
24087836SJohn.Forte@Sun.COM 				mutex_exit(&(sync->lock));
24097836SJohn.Forte@Sun.COM 				nthr++;
24107836SJohn.Forte@Sun.COM 
24117836SJohn.Forte@Sun.COM 			}
24127836SJohn.Forte@Sun.COM 			/*
24137836SJohn.Forte@Sun.COM 			 * the handle that is to be enqueued is now in
24147836SJohn.Forte@Sun.COM 			 * the rdc_aio_t, and will be freed there.
24157836SJohn.Forte@Sun.COM 			 * dump the aio_t now. If this is 1 to many
24167836SJohn.Forte@Sun.COM 			 * we may not do this in _rdc_free_buf()
24177836SJohn.Forte@Sun.COM 			 * if this was not the index that the rdc_buf_t
24187836SJohn.Forte@Sun.COM 			 * was allocated on.
24197836SJohn.Forte@Sun.COM 			 */
24207836SJohn.Forte@Sun.COM 			rdc_aio_buf_del(h, krdc);
24217836SJohn.Forte@Sun.COM 
24227836SJohn.Forte@Sun.COM 		}
24237836SJohn.Forte@Sun.COM 	}	/* end of async */
24247836SJohn.Forte@Sun.COM 
24257836SJohn.Forte@Sun.COM 	/*
24267836SJohn.Forte@Sun.COM 	 * We try to overlap local and network IO for the sync case
24277836SJohn.Forte@Sun.COM 	 * (we already do it for async)
24287836SJohn.Forte@Sun.COM 	 * If one to many, we need to track the resulting nst_thread
24297836SJohn.Forte@Sun.COM 	 * so we don't trash the nsc_buf on a free
24307836SJohn.Forte@Sun.COM 	 * Start network IO first then do local (sync only)
24317836SJohn.Forte@Sun.COM 	 */
24327836SJohn.Forte@Sun.COM 
24337836SJohn.Forte@Sun.COM 	if (IS_PRIMARY(urdc) && !IS_STATE(urdc, RDC_LOGGING) &&
2434*9093SRamana.Srikanth@Sun.COM 	    !BUF_IS_ASYNC(h)) {
24357836SJohn.Forte@Sun.COM 		/*
24367836SJohn.Forte@Sun.COM 		 * if forward syncing, we must do local IO first
24377836SJohn.Forte@Sun.COM 		 * then remote io. Don't spawn thread
24387836SJohn.Forte@Sun.COM 		 */
24397836SJohn.Forte@Sun.COM 		if (!rsync && (IS_STATE(urdc, RDC_SYNCING))) {
24407836SJohn.Forte@Sun.COM 			thrrc = ENOEXEC;
24417836SJohn.Forte@Sun.COM 			goto localwrite;
24427836SJohn.Forte@Sun.COM 		}
24437836SJohn.Forte@Sun.COM 		if (IS_MULTI(krdc)) {
24447836SJohn.Forte@Sun.COM 			rdc_k_info_t *ktmp;
24457836SJohn.Forte@Sun.COM 			rdc_u_info_t *utmp;
24467836SJohn.Forte@Sun.COM 
24477836SJohn.Forte@Sun.COM 			ktmp = krdc->multi_next;
24487836SJohn.Forte@Sun.COM 			utmp = &rdc_u_info[ktmp->index];
24497836SJohn.Forte@Sun.COM 			if (IS_ENABLED(utmp))
24507836SJohn.Forte@Sun.COM 				multi = ktmp;
24517836SJohn.Forte@Sun.COM 		}
24527836SJohn.Forte@Sun.COM 		if (nthr >= SNDR_MAXTHREADS) {
24537836SJohn.Forte@Sun.COM #ifdef DEBUG
2454*9093SRamana.Srikanth@Sun.COM 			cmn_err(CE_NOTE, "!nthr overrun in _rdc_write");
24557836SJohn.Forte@Sun.COM #endif
24567836SJohn.Forte@Sun.COM 			thrrc = ENOEXEC;
24577836SJohn.Forte@Sun.COM 			goto localwrite;
24587836SJohn.Forte@Sun.COM 		}
24597836SJohn.Forte@Sun.COM 
24607836SJohn.Forte@Sun.COM 		bp[nthr] = rdc_aio_tbuf_get(sync, h, pos, len,
24617836SJohn.Forte@Sun.COM 		    flag, krdc->index, bitmask);
24627836SJohn.Forte@Sun.COM 
24637836SJohn.Forte@Sun.COM 		if (bp[nthr] == NULL) {
24647836SJohn.Forte@Sun.COM 			thrrc = ENOEXEC;
24657836SJohn.Forte@Sun.COM 			goto localwrite;
24667836SJohn.Forte@Sun.COM 		}
24677836SJohn.Forte@Sun.COM 		tp = nst_create(_rdc_ioset, _rdc_sync_write_thr,
2468*9093SRamana.Srikanth@Sun.COM 		    (void *)bp[nthr], NST_SLEEP);
24697836SJohn.Forte@Sun.COM 		if (tp == NULL) {
24707836SJohn.Forte@Sun.COM #ifdef DEBUG
2471*9093SRamana.Srikanth@Sun.COM 			cmn_err(CE_NOTE, "!_rdcwrite: nst_create failure");
24727836SJohn.Forte@Sun.COM #endif
24737836SJohn.Forte@Sun.COM 			thrrc = ENOEXEC;
24747836SJohn.Forte@Sun.COM 		} else {
24757836SJohn.Forte@Sun.COM 			mutex_enter(&(sync->lock));
24767836SJohn.Forte@Sun.COM 			sync->threads++;
24777836SJohn.Forte@Sun.COM 			mutex_exit(&(sync->lock));
24787836SJohn.Forte@Sun.COM 			nthr++;
24797836SJohn.Forte@Sun.COM 		}
24807836SJohn.Forte@Sun.COM 	}
24817836SJohn.Forte@Sun.COM localwrite:
24827836SJohn.Forte@Sun.COM 	if (!remote && !rsync && first) {
24837836SJohn.Forte@Sun.COM 		DTRACE_PROBE(rdc_write_nscwrite_start);
24847836SJohn.Forte@Sun.COM 		rc1 = nsc_write(h->rdc_bufp, pos, len, flag);
24857836SJohn.Forte@Sun.COM 		DTRACE_PROBE(rdc_write_nscwrite_end);
24867836SJohn.Forte@Sun.COM 		if (!RDC_SUCCESS(rc1)) {
24877836SJohn.Forte@Sun.COM 			rdc_many_enter(krdc);
24887836SJohn.Forte@Sun.COM 			if (IS_PRIMARY(urdc))
24897836SJohn.Forte@Sun.COM 				/* Primary, so reverse sync needed */
24907836SJohn.Forte@Sun.COM 				rdc_set_mflags(urdc, RDC_RSYNC_NEEDED);
24917836SJohn.Forte@Sun.COM 			else
24927836SJohn.Forte@Sun.COM 				/* Secondary, so sync needed */
24937836SJohn.Forte@Sun.COM 				rdc_set_flags(urdc, RDC_SYNC_NEEDED);
24947836SJohn.Forte@Sun.COM 			rdc_set_flags_log(urdc, RDC_VOL_FAILED,
24957836SJohn.Forte@Sun.COM 			    "local write failed");
24967836SJohn.Forte@Sun.COM 			rdc_many_exit(krdc);
24977836SJohn.Forte@Sun.COM 			rdc_write_state(urdc);
24987836SJohn.Forte@Sun.COM 		}
24997836SJohn.Forte@Sun.COM 	}
25007836SJohn.Forte@Sun.COM 
25017836SJohn.Forte@Sun.COM 	/*
25027836SJohn.Forte@Sun.COM 	 * This is where we either enqueue async IO for the flusher
25037836SJohn.Forte@Sun.COM 	 * or do sync IO in the case of an error in thread creation
25047836SJohn.Forte@Sun.COM 	 * or we are doing a forward sync
25057836SJohn.Forte@Sun.COM 	 * NOTE: if we are async, and using a diskq, we have
25067836SJohn.Forte@Sun.COM 	 * already enqueued this write.
25077836SJohn.Forte@Sun.COM 	 * _rdc_remote_write will end up enqueuueing to memory,
25087836SJohn.Forte@Sun.COM 	 * or in case of a thread creation error above, try again
25097836SJohn.Forte@Sun.COM 	 * enqueue the diskq write if thrrc == ENOEXEC
25107836SJohn.Forte@Sun.COM 	 */
25117836SJohn.Forte@Sun.COM 	if ((IS_PRIMARY(urdc)) && (thrrc == ENOEXEC) ||
25127836SJohn.Forte@Sun.COM 	    (BUF_IS_ASYNC(h) && !RDC_IS_DISKQ(krdc->group))) {
25137836SJohn.Forte@Sun.COM 		thrrc = 0;
25147836SJohn.Forte@Sun.COM 		if (IS_MULTI(krdc)) {
25157836SJohn.Forte@Sun.COM 			rdc_k_info_t *ktmp;
25167836SJohn.Forte@Sun.COM 			rdc_u_info_t *utmp;
25177836SJohn.Forte@Sun.COM 
25187836SJohn.Forte@Sun.COM 			ktmp = krdc->multi_next;
25197836SJohn.Forte@Sun.COM 			utmp = &rdc_u_info[ktmp->index];
25207836SJohn.Forte@Sun.COM 			if (IS_ENABLED(utmp))
25217836SJohn.Forte@Sun.COM 				multi = ktmp;
25227836SJohn.Forte@Sun.COM 		}
25237836SJohn.Forte@Sun.COM 
25247836SJohn.Forte@Sun.COM 		DTRACE_PROBE(rdc_write_remote_start);
25257836SJohn.Forte@Sun.COM 
25267836SJohn.Forte@Sun.COM 		rc2 = _rdc_remote_write(krdc, h, &h->rdc_bufh,
25277836SJohn.Forte@Sun.COM 		    pos, len, flag, bitmask);
25287836SJohn.Forte@Sun.COM 
25297836SJohn.Forte@Sun.COM 		DTRACE_PROBE(rdc_rdcwrite_remote_end);
25307836SJohn.Forte@Sun.COM 	}
25317836SJohn.Forte@Sun.COM 
25327836SJohn.Forte@Sun.COM 	if (!RDC_SUCCESS(rc1)) {
25337836SJohn.Forte@Sun.COM 		if ((IS_PRIMARY(urdc)) && !RDC_SUCCESS(rc2)) {
25347836SJohn.Forte@Sun.COM 			h->rdc_bufh.sb_error = rc1;
25357836SJohn.Forte@Sun.COM 		}
25367836SJohn.Forte@Sun.COM 	} else if ((remote || rsync) && !RDC_SUCCESS(rc2)) {
25377836SJohn.Forte@Sun.COM 		h->rdc_bufh.sb_error = rc2;
25387836SJohn.Forte@Sun.COM 	}
25397836SJohn.Forte@Sun.COM write2:
25407836SJohn.Forte@Sun.COM 	/*
25417836SJohn.Forte@Sun.COM 	 * If one to many, jump back into the loop to continue IO
25427836SJohn.Forte@Sun.COM 	 */
25437836SJohn.Forte@Sun.COM 	if (IS_MANY(krdc) && (IS_PRIMARY(urdc))) {
25447836SJohn.Forte@Sun.COM 		rdc_many_enter(krdc);
25457836SJohn.Forte@Sun.COM 		for (krdc = krdc->many_next; krdc != this;
25467836SJohn.Forte@Sun.COM 		    krdc = krdc->many_next) {
25477836SJohn.Forte@Sun.COM 			urdc = &rdc_u_info[krdc->index];
25487836SJohn.Forte@Sun.COM 			if (!IS_ENABLED(urdc))
25497836SJohn.Forte@Sun.COM 				continue;
25507836SJohn.Forte@Sun.COM 			rc2 = first = 0;
25517836SJohn.Forte@Sun.COM 			h->rdc_flags &= ~RDC_ASYNC_BUF;
25527836SJohn.Forte@Sun.COM 			rdc_many_exit(krdc);
25537836SJohn.Forte@Sun.COM 			goto write1;
25547836SJohn.Forte@Sun.COM 		}
25557836SJohn.Forte@Sun.COM 		rdc_many_exit(krdc);
25567836SJohn.Forte@Sun.COM 	}
25577836SJohn.Forte@Sun.COM 	urdc = &rdc_u_info[krdc->index];
25587836SJohn.Forte@Sun.COM 
25597836SJohn.Forte@Sun.COM 	/*
25607836SJohn.Forte@Sun.COM 	 * collect all of our threads if any
25617836SJohn.Forte@Sun.COM 	 */
25627836SJohn.Forte@Sun.COM 	if (nthr) {
25637836SJohn.Forte@Sun.COM 
25647836SJohn.Forte@Sun.COM 		mutex_enter(&(sync->lock));
25657836SJohn.Forte@Sun.COM 		/* wait for the threads */
25667836SJohn.Forte@Sun.COM 		while (sync->complete != sync->threads) {
25677836SJohn.Forte@Sun.COM 			cv_wait(&(sync->cv), &(sync->lock));
25687836SJohn.Forte@Sun.COM 		}
25697836SJohn.Forte@Sun.COM 		mutex_exit(&(sync->lock));
25707836SJohn.Forte@Sun.COM 
25717836SJohn.Forte@Sun.COM 		/* collect status */
25727836SJohn.Forte@Sun.COM 
25737836SJohn.Forte@Sun.COM 		winddown = 0;
25747836SJohn.Forte@Sun.COM 		while (winddown < nthr) {
25757836SJohn.Forte@Sun.COM 			/*
25767836SJohn.Forte@Sun.COM 			 * Get any error return from thread
25777836SJohn.Forte@Sun.COM 			 */
25787836SJohn.Forte@Sun.COM 			if ((remote || rsync) && bp[winddown]->flag) {
2579*9093SRamana.Srikanth@Sun.COM 				h->rdc_bufh.sb_error = bp[winddown]->flag;
25807836SJohn.Forte@Sun.COM 			}
25817836SJohn.Forte@Sun.COM 			if (bp[winddown])
25827836SJohn.Forte@Sun.COM 				kmem_free(bp[winddown], sizeof (rdc_aio_t));
25837836SJohn.Forte@Sun.COM 			winddown++;
25847836SJohn.Forte@Sun.COM 		}
25857836SJohn.Forte@Sun.COM 	}
25867836SJohn.Forte@Sun.COM 
25877836SJohn.Forte@Sun.COM 	if (rsync && !(IS_STATE(urdc, RDC_VOL_FAILED))) {
25887836SJohn.Forte@Sun.COM 		rc1 = nsc_write(h->rdc_bufp, pos, len, flag);
25897836SJohn.Forte@Sun.COM 		if (!RDC_SUCCESS(rc1)) {
25907836SJohn.Forte@Sun.COM 			/* rsync, so reverse sync needed already set */
25917836SJohn.Forte@Sun.COM 			rdc_many_enter(krdc);
25927836SJohn.Forte@Sun.COM 			rdc_set_flags_log(urdc, RDC_VOL_FAILED,
25937836SJohn.Forte@Sun.COM 			    "rsync local write failed");
25947836SJohn.Forte@Sun.COM 			rdc_many_exit(krdc);
25957836SJohn.Forte@Sun.COM 			rdc_write_state(urdc);
25967836SJohn.Forte@Sun.COM 
25977836SJohn.Forte@Sun.COM 			/*
25987836SJohn.Forte@Sun.COM 			 * only report the error if a remote error
25997836SJohn.Forte@Sun.COM 			 * occurred as well.
26007836SJohn.Forte@Sun.COM 			 */
26017836SJohn.Forte@Sun.COM 			if (h->rdc_bufh.sb_error)
26027836SJohn.Forte@Sun.COM 				h->rdc_bufh.sb_error = rc1;
26037836SJohn.Forte@Sun.COM 		}
26047836SJohn.Forte@Sun.COM 	}
26057836SJohn.Forte@Sun.COM 
26067836SJohn.Forte@Sun.COM 	if (multi) {
26077836SJohn.Forte@Sun.COM 		/* Multi-hop secondary, just set bits in the bitmap */
26087836SJohn.Forte@Sun.COM 		(void) RDC_SET_BITMAP(multi, pos, len, &bitmask);
26097836SJohn.Forte@Sun.COM 	}
26107836SJohn.Forte@Sun.COM 
26117836SJohn.Forte@Sun.COM 	return (h->rdc_bufh.sb_error);
26127836SJohn.Forte@Sun.COM }
26137836SJohn.Forte@Sun.COM 
26147836SJohn.Forte@Sun.COM 
26157836SJohn.Forte@Sun.COM static void
_rdc_bzero(nsc_buf_t * h,nsc_off_t pos,nsc_size_t len)26167836SJohn.Forte@Sun.COM _rdc_bzero(nsc_buf_t *h, nsc_off_t pos, nsc_size_t len)
26177836SJohn.Forte@Sun.COM {
26187836SJohn.Forte@Sun.COM 	nsc_vec_t *v;
26197836SJohn.Forte@Sun.COM 	uchar_t *a;
26207836SJohn.Forte@Sun.COM 	size_t sz;
26217836SJohn.Forte@Sun.COM 	int l;
26227836SJohn.Forte@Sun.COM 
26237836SJohn.Forte@Sun.COM 	if (!RDC_HANDLE_LIMITS(h, pos, len)) {
26247836SJohn.Forte@Sun.COM 		cmn_err(CE_WARN,
2625*9093SRamana.Srikanth@Sun.COM 		    "!_rdc_bzero: bounds check: io(handle) pos %" NSC_XSZFMT
2626*9093SRamana.Srikanth@Sun.COM 		    "(%" NSC_XSZFMT ") len %" NSC_XSZFMT "(%" NSC_XSZFMT ")",
2627*9093SRamana.Srikanth@Sun.COM 		    pos, h->sb_pos, len, h->sb_len);
26287836SJohn.Forte@Sun.COM 		return;
26297836SJohn.Forte@Sun.COM 	}
26307836SJohn.Forte@Sun.COM 
26317836SJohn.Forte@Sun.COM 	if (!len)
26327836SJohn.Forte@Sun.COM 		return;
26337836SJohn.Forte@Sun.COM 
26347836SJohn.Forte@Sun.COM 	/* find starting point */
26357836SJohn.Forte@Sun.COM 
26367836SJohn.Forte@Sun.COM 	v = h->sb_vec;
26377836SJohn.Forte@Sun.COM 	pos -= h->sb_pos;
26387836SJohn.Forte@Sun.COM 
26397836SJohn.Forte@Sun.COM 	for (; pos >= FBA_NUM(v->sv_len); v++)
26407836SJohn.Forte@Sun.COM 		pos -= FBA_NUM(v->sv_len);
26417836SJohn.Forte@Sun.COM 
26427836SJohn.Forte@Sun.COM 	a = v->sv_addr + FBA_SIZE(pos);
26437836SJohn.Forte@Sun.COM 	l = v->sv_len - FBA_SIZE(pos);
26447836SJohn.Forte@Sun.COM 
26457836SJohn.Forte@Sun.COM 	/* zero */
26467836SJohn.Forte@Sun.COM 
26477836SJohn.Forte@Sun.COM 	len = FBA_SIZE(len);	/* convert to bytes */
26487836SJohn.Forte@Sun.COM 
26497836SJohn.Forte@Sun.COM 	while (len) {
26507836SJohn.Forte@Sun.COM 		if (!a)		/* end of vec */
26517836SJohn.Forte@Sun.COM 			break;
26527836SJohn.Forte@Sun.COM 
26537836SJohn.Forte@Sun.COM 		sz = (size_t)min((nsc_size_t)l, len);
26547836SJohn.Forte@Sun.COM 
26557836SJohn.Forte@Sun.COM 		bzero(a, sz);
26567836SJohn.Forte@Sun.COM 
26577836SJohn.Forte@Sun.COM 		len -= sz;
26587836SJohn.Forte@Sun.COM 		l -= sz;
26597836SJohn.Forte@Sun.COM 		a += sz;
26607836SJohn.Forte@Sun.COM 
26617836SJohn.Forte@Sun.COM 		if (!l) {
26627836SJohn.Forte@Sun.COM 			v++;
26637836SJohn.Forte@Sun.COM 			a = v->sv_addr;
26647836SJohn.Forte@Sun.COM 			l = v->sv_len;
26657836SJohn.Forte@Sun.COM 		}
26667836SJohn.Forte@Sun.COM 	}
26677836SJohn.Forte@Sun.COM }
26687836SJohn.Forte@Sun.COM 
26697836SJohn.Forte@Sun.COM 
26707836SJohn.Forte@Sun.COM /*
26717836SJohn.Forte@Sun.COM  * _rdc_zero
26727836SJohn.Forte@Sun.COM  *
26737836SJohn.Forte@Sun.COM  * Zero and commit the specified area of the buffer.
26747836SJohn.Forte@Sun.COM  *
26757836SJohn.Forte@Sun.COM  * If this write is whilst the local primary volume is being synced,
26767836SJohn.Forte@Sun.COM  * then we write the remote end first to ensure that the new data
26777836SJohn.Forte@Sun.COM  * cannot be overwritten by a concurrent sync operation.
26787836SJohn.Forte@Sun.COM  */
26797836SJohn.Forte@Sun.COM 
26807836SJohn.Forte@Sun.COM static int
_rdc_zero(rdc_buf_t * h,nsc_off_t pos,nsc_size_t len,int flag)26817836SJohn.Forte@Sun.COM _rdc_zero(rdc_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag)
26827836SJohn.Forte@Sun.COM {
26837836SJohn.Forte@Sun.COM 	rdc_k_info_t *krdc = h->rdc_fd->rdc_info;
26847836SJohn.Forte@Sun.COM 	rdc_u_info_t *urdc = &rdc_u_info[krdc->index];
26857836SJohn.Forte@Sun.COM 	rdc_k_info_t *this;
26867836SJohn.Forte@Sun.COM 	rdc_k_info_t *multi = NULL;
26877836SJohn.Forte@Sun.COM 	int remote = RDC_REMOTE(h);
26887836SJohn.Forte@Sun.COM 	int rc1, rc2;
26897836SJohn.Forte@Sun.COM 	uint_t bitmask;
26907836SJohn.Forte@Sun.COM 	int first;
26917836SJohn.Forte@Sun.COM 	int rsync;
26927836SJohn.Forte@Sun.COM 
26937836SJohn.Forte@Sun.COM 	/* If this is the multi-hop secondary, move along to the primary */
26947836SJohn.Forte@Sun.COM 	if (IS_MULTI(krdc) && !(rdc_get_vflags(urdc) & RDC_PRIMARY)) {
26957836SJohn.Forte@Sun.COM 		multi = krdc;
26967836SJohn.Forte@Sun.COM 		krdc = krdc->multi_next;
26977836SJohn.Forte@Sun.COM 		urdc = &rdc_u_info[krdc->index];
26987836SJohn.Forte@Sun.COM 
26997836SJohn.Forte@Sun.COM 		if (!IS_ENABLED(urdc)) {
27007836SJohn.Forte@Sun.COM 			krdc = h->rdc_fd->rdc_info;
27017836SJohn.Forte@Sun.COM 			urdc = &rdc_u_info[krdc->index];
27027836SJohn.Forte@Sun.COM 			multi = NULL;
27037836SJohn.Forte@Sun.COM 		}
27047836SJohn.Forte@Sun.COM 	}
27057836SJohn.Forte@Sun.COM 	this = krdc;
27067836SJohn.Forte@Sun.COM 
27077836SJohn.Forte@Sun.COM 	rsync = ((rdc_get_vflags(urdc) & RDC_PRIMARY) &&
27087836SJohn.Forte@Sun.COM 	    (rdc_get_mflags(urdc) & RDC_SLAVE));
27097836SJohn.Forte@Sun.COM 
27107836SJohn.Forte@Sun.COM 	/*
27117836SJohn.Forte@Sun.COM 	 * If this is a many group with a reverse sync in progress and
27127836SJohn.Forte@Sun.COM 	 * this is not the slave krdc/urdc, then search for the slave
27137836SJohn.Forte@Sun.COM 	 * so that we can do the remote io to the correct secondary
27147836SJohn.Forte@Sun.COM 	 * before the local io.
27157836SJohn.Forte@Sun.COM 	 */
27167836SJohn.Forte@Sun.COM 	if (rsync && !(rdc_get_vflags(urdc) & RDC_SLAVE)) {
27177836SJohn.Forte@Sun.COM 		rdc_many_enter(krdc);
27187836SJohn.Forte@Sun.COM 		for (krdc = krdc->many_next; krdc != this;
27197836SJohn.Forte@Sun.COM 		    krdc = krdc->many_next) {
27207836SJohn.Forte@Sun.COM 			urdc = &rdc_u_info[krdc->index];
27217836SJohn.Forte@Sun.COM 			if (!IS_ENABLED(urdc))
27227836SJohn.Forte@Sun.COM 				continue;
27237836SJohn.Forte@Sun.COM 			if (rdc_get_vflags(urdc) & RDC_SLAVE)
27247836SJohn.Forte@Sun.COM 				break;
27257836SJohn.Forte@Sun.COM 		}
27267836SJohn.Forte@Sun.COM 		rdc_many_exit(krdc);
27277836SJohn.Forte@Sun.COM 
27287836SJohn.Forte@Sun.COM 		this = krdc;
27297836SJohn.Forte@Sun.COM 	}
27307836SJohn.Forte@Sun.COM 
27317836SJohn.Forte@Sun.COM 	rc1 = rc2 = 0;
27327836SJohn.Forte@Sun.COM 	first = 1;
27337836SJohn.Forte@Sun.COM 
27347836SJohn.Forte@Sun.COM 	if (!RDC_HANDLE_LIMITS(&h->rdc_bufh, pos, len)) {
27357836SJohn.Forte@Sun.COM 		cmn_err(CE_WARN,
2736*9093SRamana.Srikanth@Sun.COM 		    "!_rdc_zero: bounds check: io(handle) pos %" NSC_XSZFMT
27377836SJohn.Forte@Sun.COM 		    "(%" NSC_XSZFMT ") len %" NSC_XSZFMT "(%" NSC_XSZFMT ")",
2738*9093SRamana.Srikanth@Sun.COM 		    pos, h->rdc_bufh.sb_pos, len, h->rdc_bufh.sb_len);
27397836SJohn.Forte@Sun.COM 		h->rdc_bufh.sb_error = EINVAL;
27407836SJohn.Forte@Sun.COM 		return (h->rdc_bufh.sb_error);
27417836SJohn.Forte@Sun.COM 	}
27427836SJohn.Forte@Sun.COM 
27437836SJohn.Forte@Sun.COM zero1:
27447836SJohn.Forte@Sun.COM 	if (RDC_SET_BITMAP(krdc, pos, len, &bitmask) < 0) {
27457836SJohn.Forte@Sun.COM 		(void) nsc_uncommit(h->rdc_bufp, pos, len, flag);
27467836SJohn.Forte@Sun.COM 		h->rdc_bufh.sb_error = EIO;
27477836SJohn.Forte@Sun.COM 		goto zero2;
27487836SJohn.Forte@Sun.COM 	}
27497836SJohn.Forte@Sun.COM 
27507836SJohn.Forte@Sun.COM 	if (IS_ASYNC(urdc)) {
27517836SJohn.Forte@Sun.COM 		/*
27527836SJohn.Forte@Sun.COM 		 * We are async mode
27537836SJohn.Forte@Sun.COM 		 */
27547836SJohn.Forte@Sun.COM 		aio_buf_t *p;
27557836SJohn.Forte@Sun.COM 
27567836SJohn.Forte@Sun.COM 		if ((krdc->type_flag & RDC_DISABLEPEND) ||
27577836SJohn.Forte@Sun.COM 		    (rdc_get_vflags(urdc) & RDC_LOGGING)) {
27587836SJohn.Forte@Sun.COM 			mutex_exit(&krdc->group->ra_queue.net_qlock);
27597836SJohn.Forte@Sun.COM 			goto localzero;
27607836SJohn.Forte@Sun.COM 		}
27617836SJohn.Forte@Sun.COM 
27627836SJohn.Forte@Sun.COM 		if ((rdc_get_vflags(urdc) & RDC_VOL_FAILED) ||
27637836SJohn.Forte@Sun.COM 		    (rdc_get_vflags(urdc) & RDC_BMP_FAILED)) {
27647836SJohn.Forte@Sun.COM 			mutex_exit(&krdc->group->ra_queue.net_qlock);
27657836SJohn.Forte@Sun.COM 			goto zero2;
27667836SJohn.Forte@Sun.COM 		}
27677836SJohn.Forte@Sun.COM 		if (rdc_get_vflags(urdc) & RDC_LOGGING) {
27687836SJohn.Forte@Sun.COM 			mutex_exit(&krdc->group->ra_queue.net_qlock);
27697836SJohn.Forte@Sun.COM 			goto localzero;
27707836SJohn.Forte@Sun.COM 		}
27717836SJohn.Forte@Sun.COM 		p = rdc_aio_buf_add(krdc->index, h);
27727836SJohn.Forte@Sun.COM 		if (p == NULL) {
27737836SJohn.Forte@Sun.COM #ifdef DEBUG
27747836SJohn.Forte@Sun.COM 			cmn_err(CE_WARN,
2775*9093SRamana.Srikanth@Sun.COM 			    "!rdc_alloc_buf  aio_buf allocation failed");
27767836SJohn.Forte@Sun.COM #endif
27777836SJohn.Forte@Sun.COM 			goto localzero;
27787836SJohn.Forte@Sun.COM 		}
27797836SJohn.Forte@Sun.COM 		mutex_enter(&h->aio_lock);
27807836SJohn.Forte@Sun.COM 		rc1 = nsc_alloc_abuf(pos, len, 0, &p->rdc_abufp);
27817836SJohn.Forte@Sun.COM 		if (!RDC_SUCCESS(rc1)) {
27827836SJohn.Forte@Sun.COM #ifdef DEBUG
27837836SJohn.Forte@Sun.COM 			cmn_err(CE_WARN,
2784*9093SRamana.Srikanth@Sun.COM 			    "!rdc_alloc_buf NSC_ANON allocation failed rc %d",
27857836SJohn.Forte@Sun.COM 			    rc1);
27867836SJohn.Forte@Sun.COM #endif
27877836SJohn.Forte@Sun.COM 			mutex_exit(&h->aio_lock);
27887836SJohn.Forte@Sun.COM 			goto localzero;
27897836SJohn.Forte@Sun.COM 		}
27907836SJohn.Forte@Sun.COM 		h->rdc_flags |= RDC_ASYNC_VEC;
27917836SJohn.Forte@Sun.COM 		mutex_exit(&h->aio_lock);
27927836SJohn.Forte@Sun.COM 
27937836SJohn.Forte@Sun.COM 		/*
27947836SJohn.Forte@Sun.COM 		 * Copy buffer into anonymous buffer
27957836SJohn.Forte@Sun.COM 		 */
27967836SJohn.Forte@Sun.COM 
27977836SJohn.Forte@Sun.COM 		rc1 = nsc_zero(p->rdc_abufp, pos, len, flag);
27987836SJohn.Forte@Sun.COM 		if (!RDC_SUCCESS(rc1)) {
27997836SJohn.Forte@Sun.COM #ifdef DEBUG
28007836SJohn.Forte@Sun.COM 			cmn_err(CE_WARN,
2801*9093SRamana.Srikanth@Sun.COM 			    "!_rdc_zero: nsc_zero failed rc=%d state %x",
28027836SJohn.Forte@Sun.COM 			    rc1, rdc_get_vflags(urdc));
28037836SJohn.Forte@Sun.COM #endif
28047836SJohn.Forte@Sun.COM 			rc1 = nsc_free_buf(p->rdc_abufp);
28057836SJohn.Forte@Sun.COM 			rdc_aio_buf_del(h, krdc);
28067836SJohn.Forte@Sun.COM 			rdc_group_enter(krdc);
28077836SJohn.Forte@Sun.COM 			rdc_group_log(krdc, RDC_FLUSH | RDC_OTHERREMOTE,
2808*9093SRamana.Srikanth@Sun.COM 			    "nsc_zero failed");
28097836SJohn.Forte@Sun.COM 			rdc_group_exit(krdc);
28107836SJohn.Forte@Sun.COM 		}
28117836SJohn.Forte@Sun.COM 	}	/* end of async */
28127836SJohn.Forte@Sun.COM 
28137836SJohn.Forte@Sun.COM localzero:
28147836SJohn.Forte@Sun.COM 
28157836SJohn.Forte@Sun.COM 	if (flag & NSC_NOBLOCK) {
28167836SJohn.Forte@Sun.COM 		cmn_err(CE_WARN,
2817*9093SRamana.Srikanth@Sun.COM 		    "!_rdc_zero: removing unsupported NSC_NOBLOCK flag");
28187836SJohn.Forte@Sun.COM 		flag &= ~(NSC_NOBLOCK);
28197836SJohn.Forte@Sun.COM 	}
28207836SJohn.Forte@Sun.COM 
28217836SJohn.Forte@Sun.COM 	if (!remote && !rsync && first) {
28227836SJohn.Forte@Sun.COM 		rc1 = nsc_zero(h->rdc_bufp, pos, len, flag);
28237836SJohn.Forte@Sun.COM 		if (!RDC_SUCCESS(rc1)) {
28247836SJohn.Forte@Sun.COM 			ASSERT(rdc_get_vflags(urdc) & RDC_PRIMARY);
28257836SJohn.Forte@Sun.COM 			rdc_many_enter(krdc);
28267836SJohn.Forte@Sun.COM 			/* Primary, so reverse sync needed */
28277836SJohn.Forte@Sun.COM 			rdc_set_mflags(urdc, RDC_RSYNC_NEEDED);
28287836SJohn.Forte@Sun.COM 			rdc_set_flags_log(urdc, RDC_VOL_FAILED,
28297836SJohn.Forte@Sun.COM 			    "nsc_zero failed");
28307836SJohn.Forte@Sun.COM 			rdc_many_exit(krdc);
28317836SJohn.Forte@Sun.COM 			rdc_write_state(urdc);
28327836SJohn.Forte@Sun.COM 		}
28337836SJohn.Forte@Sun.COM 	}
28347836SJohn.Forte@Sun.COM 
28357836SJohn.Forte@Sun.COM 	/*
28367836SJohn.Forte@Sun.COM 	 * send new data to remote end - nsc_zero has zero'd
28377836SJohn.Forte@Sun.COM 	 * the data in the buffer, or _rdc_bzero will be used below.
28387836SJohn.Forte@Sun.COM 	 */
28397836SJohn.Forte@Sun.COM 
28407836SJohn.Forte@Sun.COM 	if (rdc_get_vflags(urdc) & RDC_PRIMARY) {
28417836SJohn.Forte@Sun.COM 		if (first && (remote || rsync || !RDC_SUCCESS(rc1))) {
28427836SJohn.Forte@Sun.COM 			/* bzero so that we can send new data to remote node */
28437836SJohn.Forte@Sun.COM 			_rdc_bzero(&h->rdc_bufh, pos, len);
28447836SJohn.Forte@Sun.COM 		}
28457836SJohn.Forte@Sun.COM 
28467836SJohn.Forte@Sun.COM 		if (IS_MULTI(krdc)) {
28477836SJohn.Forte@Sun.COM 			rdc_k_info_t *ktmp;
28487836SJohn.Forte@Sun.COM 			rdc_u_info_t *utmp;
28497836SJohn.Forte@Sun.COM 
28507836SJohn.Forte@Sun.COM 			ktmp = krdc->multi_next;
28517836SJohn.Forte@Sun.COM 			utmp = &rdc_u_info[ktmp->index];
28527836SJohn.Forte@Sun.COM 			if (IS_ENABLED(utmp))
28537836SJohn.Forte@Sun.COM 				multi = ktmp;
28547836SJohn.Forte@Sun.COM 		}
28557836SJohn.Forte@Sun.COM 
28567836SJohn.Forte@Sun.COM 		rc2 = _rdc_remote_write(krdc, h, &h->rdc_bufh,
28577836SJohn.Forte@Sun.COM 		    pos, len, flag, bitmask);
28587836SJohn.Forte@Sun.COM 	}
28597836SJohn.Forte@Sun.COM 
28607836SJohn.Forte@Sun.COM 	if (!RDC_SUCCESS(rc1)) {
28617836SJohn.Forte@Sun.COM 		if ((rdc_get_vflags(urdc) & RDC_PRIMARY) && !RDC_SUCCESS(rc2)) {
28627836SJohn.Forte@Sun.COM 			h->rdc_bufh.sb_error = rc1;
28637836SJohn.Forte@Sun.COM 		}
28647836SJohn.Forte@Sun.COM 	} else if ((remote || rsync) && !RDC_SUCCESS(rc2)) {
28657836SJohn.Forte@Sun.COM 		h->rdc_bufh.sb_error = rc2;
28667836SJohn.Forte@Sun.COM 	}
28677836SJohn.Forte@Sun.COM 
28687836SJohn.Forte@Sun.COM zero2:
28697836SJohn.Forte@Sun.COM 	if (IS_MANY(krdc) && (rdc_get_vflags(urdc) && RDC_PRIMARY)) {
28707836SJohn.Forte@Sun.COM 		rdc_many_enter(krdc);
28717836SJohn.Forte@Sun.COM 		for (krdc = krdc->many_next; krdc != this;
28727836SJohn.Forte@Sun.COM 		    krdc = krdc->many_next) {
28737836SJohn.Forte@Sun.COM 			urdc = &rdc_u_info[krdc->index];
28747836SJohn.Forte@Sun.COM 			if (!IS_ENABLED(urdc))
28757836SJohn.Forte@Sun.COM 				continue;
28767836SJohn.Forte@Sun.COM 			rc2 = first = 0;
28777836SJohn.Forte@Sun.COM 			rdc_many_exit(krdc);
28787836SJohn.Forte@Sun.COM 			goto zero1;
28797836SJohn.Forte@Sun.COM 		}
28807836SJohn.Forte@Sun.COM 		rdc_many_exit(krdc);
28817836SJohn.Forte@Sun.COM 	}
28827836SJohn.Forte@Sun.COM 
28837836SJohn.Forte@Sun.COM 	if (rsync && !(rdc_get_vflags(urdc) & RDC_VOL_FAILED)) {
28847836SJohn.Forte@Sun.COM 		rc1 = nsc_write(h->rdc_bufp, pos, len, flag);
28857836SJohn.Forte@Sun.COM 		if (!RDC_SUCCESS(rc1)) {
28867836SJohn.Forte@Sun.COM 			/* rsync, so reverse sync needed already set */
28877836SJohn.Forte@Sun.COM 			rdc_many_enter(krdc);
28887836SJohn.Forte@Sun.COM 			rdc_set_flags_log(urdc, RDC_VOL_FAILED,
28897836SJohn.Forte@Sun.COM 			    "nsc_write failed");
28907836SJohn.Forte@Sun.COM 			rdc_many_exit(krdc);
28917836SJohn.Forte@Sun.COM 			rdc_write_state(urdc);
28927836SJohn.Forte@Sun.COM 
28937836SJohn.Forte@Sun.COM 			/*
28947836SJohn.Forte@Sun.COM 			 * only report the error if a remote error
28957836SJohn.Forte@Sun.COM 			 * occurred as well.
28967836SJohn.Forte@Sun.COM 			 */
28977836SJohn.Forte@Sun.COM 			if (h->rdc_bufh.sb_error)
28987836SJohn.Forte@Sun.COM 				h->rdc_bufh.sb_error = rc1;
28997836SJohn.Forte@Sun.COM 		}
29007836SJohn.Forte@Sun.COM 	}
29017836SJohn.Forte@Sun.COM 
29027836SJohn.Forte@Sun.COM 	if (multi) {
29037836SJohn.Forte@Sun.COM 		/* Multi-hop secondary, just set bits in the bitmap */
29047836SJohn.Forte@Sun.COM 		(void) RDC_SET_BITMAP(multi, pos, len, &bitmask);
29057836SJohn.Forte@Sun.COM 	}
29067836SJohn.Forte@Sun.COM 
29077836SJohn.Forte@Sun.COM 	return (h->rdc_bufh.sb_error);
29087836SJohn.Forte@Sun.COM }
29097836SJohn.Forte@Sun.COM 
29107836SJohn.Forte@Sun.COM 
29117836SJohn.Forte@Sun.COM /*
29127836SJohn.Forte@Sun.COM  * _rdc_uncommit
29137836SJohn.Forte@Sun.COM  * - refresh specified data region in the buffer to prevent the cache
29147836SJohn.Forte@Sun.COM  *   serving the scribbled on data back to another client.
29157836SJohn.Forte@Sun.COM  *
29167836SJohn.Forte@Sun.COM  * Only needs to happen on the local node.  If in remote io mode, then
29177836SJohn.Forte@Sun.COM  * just return 0 - we do not cache the data on the local node and the
29187836SJohn.Forte@Sun.COM  * changed data will not have made it to the cache on the other node,
29197836SJohn.Forte@Sun.COM  * so it has no need to uncommit.
29207836SJohn.Forte@Sun.COM  */
29217836SJohn.Forte@Sun.COM 
29227836SJohn.Forte@Sun.COM static int
_rdc_uncommit(rdc_buf_t * h,nsc_off_t pos,nsc_size_t len,int flag)29237836SJohn.Forte@Sun.COM _rdc_uncommit(rdc_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag)
29247836SJohn.Forte@Sun.COM {
29257836SJohn.Forte@Sun.COM 	int remote = RDC_REMOTE(h);
29267836SJohn.Forte@Sun.COM 	int rc = 0;
29277836SJohn.Forte@Sun.COM 
29287836SJohn.Forte@Sun.COM 	if (!RDC_HANDLE_LIMITS(&h->rdc_bufh, pos, len)) {
29297836SJohn.Forte@Sun.COM 		cmn_err(CE_WARN,
2930*9093SRamana.Srikanth@Sun.COM 		    "!_rdc_uncommit: bounds check: io(handle) pos %" NSC_XSZFMT
2931*9093SRamana.Srikanth@Sun.COM 		    "(%" NSC_XSZFMT ") len %" NSC_XSZFMT "(%" NSC_XSZFMT ")",
2932*9093SRamana.Srikanth@Sun.COM 		    pos, h->rdc_bufh.sb_pos, len, h->rdc_bufh.sb_len);
29337836SJohn.Forte@Sun.COM 		h->rdc_bufh.sb_error = EINVAL;
29347836SJohn.Forte@Sun.COM 		return (h->rdc_bufh.sb_error);
29357836SJohn.Forte@Sun.COM 	}
29367836SJohn.Forte@Sun.COM 
29377836SJohn.Forte@Sun.COM 	if (flag & NSC_NOBLOCK) {
29387836SJohn.Forte@Sun.COM 		cmn_err(CE_WARN,
2939*9093SRamana.Srikanth@Sun.COM 		    "!_rdc_uncommit: removing unsupported NSC_NOBLOCK flag");
29407836SJohn.Forte@Sun.COM 		flag &= ~(NSC_NOBLOCK);
29417836SJohn.Forte@Sun.COM 	}
29427836SJohn.Forte@Sun.COM 
29437836SJohn.Forte@Sun.COM 	if (!remote) {
29447836SJohn.Forte@Sun.COM 		rc = nsc_uncommit(h->rdc_bufp, pos, len, flag);
29457836SJohn.Forte@Sun.COM 	}
29467836SJohn.Forte@Sun.COM 
29477836SJohn.Forte@Sun.COM 	if (!RDC_SUCCESS(rc))
29487836SJohn.Forte@Sun.COM 		h->rdc_bufh.sb_error = rc;
29497836SJohn.Forte@Sun.COM 
29507836SJohn.Forte@Sun.COM 	return (rc);
29517836SJohn.Forte@Sun.COM }
29527836SJohn.Forte@Sun.COM 
29537836SJohn.Forte@Sun.COM 
29547836SJohn.Forte@Sun.COM /*
29557836SJohn.Forte@Sun.COM  * _rdc_trksize
29567836SJohn.Forte@Sun.COM  *
29577836SJohn.Forte@Sun.COM  * only needs to happen on local node.
29587836SJohn.Forte@Sun.COM  */
29597836SJohn.Forte@Sun.COM 
29607836SJohn.Forte@Sun.COM static int
_rdc_trksize(rdc_fd_t * rfd,nsc_size_t trksize)29617836SJohn.Forte@Sun.COM _rdc_trksize(rdc_fd_t *rfd, nsc_size_t trksize)
29627836SJohn.Forte@Sun.COM {
29637836SJohn.Forte@Sun.COM 	return (nsc_set_trksize(RDC_FD(rfd), trksize));
29647836SJohn.Forte@Sun.COM }
29657836SJohn.Forte@Sun.COM 
29667836SJohn.Forte@Sun.COM 
29677836SJohn.Forte@Sun.COM static nsc_def_t _rdc_fd_def[] = {
29687836SJohn.Forte@Sun.COM 	"Attach",	(uintptr_t)_rdc_attach_fd,	0,
29697836SJohn.Forte@Sun.COM 	"Pinned",	(uintptr_t)_rdc_pinned,		0,
29707836SJohn.Forte@Sun.COM 	"Unpinned",	(uintptr_t)_rdc_unpinned,	0,
29717836SJohn.Forte@Sun.COM 	0,		0,				0
29727836SJohn.Forte@Sun.COM };
29737836SJohn.Forte@Sun.COM 
29747836SJohn.Forte@Sun.COM 
29757836SJohn.Forte@Sun.COM static nsc_def_t _rdc_io_def[] = {
29767836SJohn.Forte@Sun.COM 	"Open",		(uintptr_t)_rdc_openc,		0,
29777836SJohn.Forte@Sun.COM 	"Close",	(uintptr_t)_rdc_close,		0,
29787836SJohn.Forte@Sun.COM 	"Attach",	(uintptr_t)_rdc_attach,		0,
29797836SJohn.Forte@Sun.COM 	"Detach",	(uintptr_t)_rdc_detach,		0,
29807836SJohn.Forte@Sun.COM 	"AllocHandle",	(uintptr_t)_rdc_alloc_handle,	0,
29817836SJohn.Forte@Sun.COM 	"FreeHandle",	(uintptr_t)_rdc_free_handle,	0,
29827836SJohn.Forte@Sun.COM 	"AllocBuf",	(uintptr_t)_rdc_alloc_buf,	0,
29837836SJohn.Forte@Sun.COM 	"FreeBuf",	(uintptr_t)_rdc_free_buf,	0,
29847836SJohn.Forte@Sun.COM 	"GetPinned",	(uintptr_t)_rdc_get_pinned,	0,
29857836SJohn.Forte@Sun.COM 	"Discard",	(uintptr_t)_rdc_discard_pinned,	0,
29867836SJohn.Forte@Sun.COM 	"PartSize",	(uintptr_t)_rdc_partsize,	0,
29877836SJohn.Forte@Sun.COM 	"MaxFbas",	(uintptr_t)_rdc_maxfbas,	0,
29887836SJohn.Forte@Sun.COM 	"Control",	(uintptr_t)_rdc_control,	0,
29897836SJohn.Forte@Sun.COM 	"Read",		(uintptr_t)_rdc_read,		0,
29907836SJohn.Forte@Sun.COM 	"Write",	(uintptr_t)_rdc_write,		0,
29917836SJohn.Forte@Sun.COM 	"Zero",		(uintptr_t)_rdc_zero,		0,
29927836SJohn.Forte@Sun.COM 	"Uncommit",	(uintptr_t)_rdc_uncommit,	0,
29937836SJohn.Forte@Sun.COM 	"TrackSize",	(uintptr_t)_rdc_trksize,	0,
29947836SJohn.Forte@Sun.COM 	"Provide",	0,				0,
29957836SJohn.Forte@Sun.COM 	0,		0,				0
29967836SJohn.Forte@Sun.COM };
29977836SJohn.Forte@Sun.COM 
29987836SJohn.Forte@Sun.COM static nsc_def_t _rdc_ior_def[] = {
29997836SJohn.Forte@Sun.COM 	"Open",		(uintptr_t)_rdc_openr,		0,
30007836SJohn.Forte@Sun.COM 	"Close",	(uintptr_t)_rdc_close,		0,
30017836SJohn.Forte@Sun.COM 	"Attach",	(uintptr_t)_rdc_attach,		0,
30027836SJohn.Forte@Sun.COM 	"Detach",	(uintptr_t)_rdc_detach,		0,
30037836SJohn.Forte@Sun.COM 	"AllocHandle",	(uintptr_t)_rdc_alloc_handle,	0,
30047836SJohn.Forte@Sun.COM 	"FreeHandle",	(uintptr_t)_rdc_free_handle,	0,
30057836SJohn.Forte@Sun.COM 	"AllocBuf",	(uintptr_t)_rdc_alloc_buf,	0,
30067836SJohn.Forte@Sun.COM 	"FreeBuf",	(uintptr_t)_rdc_free_buf,	0,
30077836SJohn.Forte@Sun.COM 	"GetPinned",	(uintptr_t)_rdc_get_pinned,	0,
30087836SJohn.Forte@Sun.COM 	"Discard",	(uintptr_t)_rdc_discard_pinned,	0,
30097836SJohn.Forte@Sun.COM 	"PartSize",	(uintptr_t)_rdc_partsize,	0,
30107836SJohn.Forte@Sun.COM 	"MaxFbas",	(uintptr_t)_rdc_maxfbas,	0,
30117836SJohn.Forte@Sun.COM 	"Control",	(uintptr_t)_rdc_control,	0,
30127836SJohn.Forte@Sun.COM 	"Read",		(uintptr_t)_rdc_read,		0,
30137836SJohn.Forte@Sun.COM 	"Write",	(uintptr_t)_rdc_write,		0,
30147836SJohn.Forte@Sun.COM 	"Zero",		(uintptr_t)_rdc_zero,		0,
30157836SJohn.Forte@Sun.COM 	"Uncommit",	(uintptr_t)_rdc_uncommit,	0,
30167836SJohn.Forte@Sun.COM 	"TrackSize",	(uintptr_t)_rdc_trksize,	0,
30177836SJohn.Forte@Sun.COM 	"Provide",	0,				0,
30187836SJohn.Forte@Sun.COM 	0,		0,				0
30197836SJohn.Forte@Sun.COM };
3020