xref: /onnv-gate/usr/src/uts/common/io/lvm/md/md_subr.c (revision 11130:ce5c27fd996f)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
51623Stw21770  * Common Development and Distribution License (the "License").
61623Stw21770  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
217563SPrasad.Singamsetty@Sun.COM 
220Sstevel@tonic-gate /*
2310549SAchim.Maurer@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate  * Use is subject to license terms.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate /*
280Sstevel@tonic-gate  * Driver for Virtual Disk.
290Sstevel@tonic-gate  */
300Sstevel@tonic-gate #include <sys/param.h>
310Sstevel@tonic-gate #include <sys/systm.h>
320Sstevel@tonic-gate #include <sys/buf.h>
330Sstevel@tonic-gate #include <sys/conf.h>
340Sstevel@tonic-gate #include <sys/user.h>
350Sstevel@tonic-gate #include <sys/uio.h>
360Sstevel@tonic-gate #include <sys/proc.h>
370Sstevel@tonic-gate #include <sys/t_lock.h>
380Sstevel@tonic-gate #include <sys/dkio.h>
390Sstevel@tonic-gate #include <sys/kmem.h>
400Sstevel@tonic-gate #include <sys/debug.h>
410Sstevel@tonic-gate #include <sys/cmn_err.h>
420Sstevel@tonic-gate #include <sys/sysmacros.h>
430Sstevel@tonic-gate #include <sys/types.h>
440Sstevel@tonic-gate #include <sys/mkdev.h>
450Sstevel@tonic-gate #include <sys/vtoc.h>
460Sstevel@tonic-gate #include <sys/open.h>
470Sstevel@tonic-gate #include <sys/file.h>
480Sstevel@tonic-gate #include <vm/page.h>
490Sstevel@tonic-gate #include <sys/callb.h>
500Sstevel@tonic-gate #include <sys/disp.h>
510Sstevel@tonic-gate #include <sys/modctl.h>
520Sstevel@tonic-gate #include <sys/errno.h>
530Sstevel@tonic-gate #include <sys/door.h>
540Sstevel@tonic-gate #include <sys/lvm/mdmn_commd.h>
551623Stw21770 #include <sys/lvm/md_hotspares.h>
560Sstevel@tonic-gate 
570Sstevel@tonic-gate #include <sys/lvm/mdvar.h>
580Sstevel@tonic-gate #include <sys/lvm/md_names.h>
590Sstevel@tonic-gate 
600Sstevel@tonic-gate #include <sys/ddi.h>
610Sstevel@tonic-gate #include <sys/proc.h>
620Sstevel@tonic-gate #include <sys/sunddi.h>
630Sstevel@tonic-gate #include <sys/esunddi.h>
640Sstevel@tonic-gate 
650Sstevel@tonic-gate #include <sys/sysevent.h>
660Sstevel@tonic-gate #include <sys/sysevent/eventdefs.h>
670Sstevel@tonic-gate 
680Sstevel@tonic-gate #include <sys/sysevent/svm.h>
69734Smw145384 #include <sys/lvm/md_basic.h>
70734Smw145384 
710Sstevel@tonic-gate 
720Sstevel@tonic-gate /*
730Sstevel@tonic-gate  * Machine specific Hertz is kept here
740Sstevel@tonic-gate  */
750Sstevel@tonic-gate extern clock_t			md_hz;
760Sstevel@tonic-gate 
770Sstevel@tonic-gate /*
780Sstevel@tonic-gate  * Externs.
790Sstevel@tonic-gate  */
800Sstevel@tonic-gate extern int			(*mdv_strategy_tstpnt)(buf_t *, int, void*);
810Sstevel@tonic-gate extern major_t			md_major;
820Sstevel@tonic-gate extern unit_t			md_nunits;
830Sstevel@tonic-gate extern set_t			md_nsets;
840Sstevel@tonic-gate extern md_set_t			md_set[];
850Sstevel@tonic-gate extern md_set_io_t		md_set_io[];
860Sstevel@tonic-gate extern md_ops_t			**md_ops;
870Sstevel@tonic-gate extern md_ops_t			*md_opslist;
880Sstevel@tonic-gate extern ddi_modhandle_t		*md_mods;
898452SJohn.Wren.Kennedy@Sun.COM extern dev_info_t		*md_devinfo;
900Sstevel@tonic-gate 
910Sstevel@tonic-gate extern md_krwlock_t		md_unit_array_rw;
920Sstevel@tonic-gate extern kmutex_t			md_mx;
930Sstevel@tonic-gate extern kcondvar_t		md_cv;
940Sstevel@tonic-gate 
950Sstevel@tonic-gate extern md_krwlock_t		hsp_rwlp;
960Sstevel@tonic-gate extern md_krwlock_t		ni_rwlp;
970Sstevel@tonic-gate 
980Sstevel@tonic-gate extern int			md_num_daemons;
990Sstevel@tonic-gate extern int			md_status;
1000Sstevel@tonic-gate extern int			md_ioctl_cnt;
1010Sstevel@tonic-gate extern int			md_mtioctl_cnt;
1020Sstevel@tonic-gate 
1030Sstevel@tonic-gate extern struct metatransops	metatransops;
1040Sstevel@tonic-gate extern md_event_queue_t		*md_event_queue;
1050Sstevel@tonic-gate extern md_resync_t		md_cpr_resync;
1060Sstevel@tonic-gate extern int			md_done_daemon_threads;
1070Sstevel@tonic-gate extern int			md_ff_daemon_threads;
1080Sstevel@tonic-gate 
1090Sstevel@tonic-gate 
1100Sstevel@tonic-gate extern mddb_set_t	*mddb_setenter(set_t setno, int flag, int *errorcodep);
1110Sstevel@tonic-gate extern void		mddb_setexit(mddb_set_t *s);
1121623Stw21770 extern void		*lookup_entry(struct nm_next_hdr *, set_t,
1131623Stw21770 				side_t, mdkey_t, md_dev64_t, int);
1141623Stw21770 extern struct nm_next_hdr	*get_first_record(set_t, int, int);
11510549SAchim.Maurer@Sun.COM extern dev_t		getrootdev(void);
1160Sstevel@tonic-gate 
1170Sstevel@tonic-gate struct mdq_anchor	md_done_daemon; /* done request queue */
1188452SJohn.Wren.Kennedy@Sun.COM struct mdq_anchor	md_mstr_daemon; /* mirror error, WOW requests */
1190Sstevel@tonic-gate struct mdq_anchor	md_mhs_daemon;	/* mirror hotspare requests queue */
1200Sstevel@tonic-gate struct mdq_anchor	md_hs_daemon;	/* raid hotspare requests queue */
1210Sstevel@tonic-gate struct mdq_anchor	md_ff_daemonq;	/* failfast request queue */
1220Sstevel@tonic-gate struct mdq_anchor	md_mirror_daemon; /* mirror owner queue */
1230Sstevel@tonic-gate struct mdq_anchor	md_mirror_io_daemon; /* mirror owner i/o queue */
1240Sstevel@tonic-gate struct mdq_anchor	md_mirror_rs_daemon; /* mirror resync done queue */
1250Sstevel@tonic-gate struct mdq_anchor	md_sp_daemon;	/* soft-part error daemon queue */
1268452SJohn.Wren.Kennedy@Sun.COM struct mdq_anchor	md_mto_daemon;	/* mirror timeout daemon queue */
1270Sstevel@tonic-gate 
1280Sstevel@tonic-gate int md_done_daemon_threads = 1;	/* threads for md_done_daemon requestq */
1290Sstevel@tonic-gate int md_mstr_daemon_threads = 1;	/* threads for md_mstr_daemon requestq */
1300Sstevel@tonic-gate int md_mhs_daemon_threads = 1;	/* threads for md_mhs_daemon requestq */
1310Sstevel@tonic-gate int md_hs_daemon_threads = 1;	/* threads for md_hs_daemon requestq */
1320Sstevel@tonic-gate int md_ff_daemon_threads = 3;	/* threads for md_ff_daemon requestq */
1330Sstevel@tonic-gate int md_mirror_daemon_threads = 1; /* threads for md_mirror_daemon requestq */
1340Sstevel@tonic-gate int md_sp_daemon_threads = 1;	/* threads for md_sp_daemon requestq */
1358452SJohn.Wren.Kennedy@Sun.COM int md_mto_daemon_threads = 1;	/* threads for md_mto_daemon requestq */
1360Sstevel@tonic-gate 
1370Sstevel@tonic-gate #ifdef DEBUG
1380Sstevel@tonic-gate /* Flag to switch on debug messages */
1390Sstevel@tonic-gate int md_release_reacquire_debug = 0;	/* debug flag */
1400Sstevel@tonic-gate #endif
1410Sstevel@tonic-gate 
1420Sstevel@tonic-gate /*
1430Sstevel@tonic-gate  *
1440Sstevel@tonic-gate  * The md_request_queues is table of pointers to request queues and the number
1450Sstevel@tonic-gate  * of threads associated with the request queues.
1460Sstevel@tonic-gate  * When the number of threads is set to 1, then the order of execution is
1470Sstevel@tonic-gate  * sequential.
1480Sstevel@tonic-gate  * The number of threads for all the queues have been defined as global
1490Sstevel@tonic-gate  * variables to enable kernel tuning.
1500Sstevel@tonic-gate  *
1510Sstevel@tonic-gate  */
1520Sstevel@tonic-gate 
1538452SJohn.Wren.Kennedy@Sun.COM #define	MD_DAEMON_QUEUES 11
1540Sstevel@tonic-gate 
1550Sstevel@tonic-gate md_requestq_entry_t md_daemon_queues[MD_DAEMON_QUEUES] = {
1560Sstevel@tonic-gate 	{&md_done_daemon, &md_done_daemon_threads},
1570Sstevel@tonic-gate 	{&md_mstr_daemon, &md_mstr_daemon_threads},
1580Sstevel@tonic-gate 	{&md_hs_daemon, &md_hs_daemon_threads},
1590Sstevel@tonic-gate 	{&md_ff_daemonq, &md_ff_daemon_threads},
1600Sstevel@tonic-gate 	{&md_mirror_daemon, &md_mirror_daemon_threads},
1610Sstevel@tonic-gate 	{&md_mirror_io_daemon, &md_mirror_daemon_threads},
1620Sstevel@tonic-gate 	{&md_mirror_rs_daemon, &md_mirror_daemon_threads},
1630Sstevel@tonic-gate 	{&md_sp_daemon, &md_sp_daemon_threads},
1640Sstevel@tonic-gate 	{&md_mhs_daemon, &md_mhs_daemon_threads},
1658452SJohn.Wren.Kennedy@Sun.COM 	{&md_mto_daemon, &md_mto_daemon_threads},
1660Sstevel@tonic-gate 	{0, 0}
1670Sstevel@tonic-gate };
1680Sstevel@tonic-gate 
1690Sstevel@tonic-gate /*
1700Sstevel@tonic-gate  * Number of times a message is retried before issuing a warning to the operator
1710Sstevel@tonic-gate  */
1720Sstevel@tonic-gate #define	MD_MN_WARN_INTVL	10
1730Sstevel@tonic-gate 
1740Sstevel@tonic-gate /*
1750Sstevel@tonic-gate  * Setting retry cnt to one (pre decremented) so that we actually do no
1760Sstevel@tonic-gate  * retries when committing/deleting a mddb rec. The underlying disk driver
1770Sstevel@tonic-gate  * does several retries to check if the disk is really dead or not so there
1780Sstevel@tonic-gate  * is no reason for us to retry on top of the drivers retries.
1790Sstevel@tonic-gate  */
1800Sstevel@tonic-gate 
1810Sstevel@tonic-gate uint_t			md_retry_cnt = 1; /* global so it can be patched */
1820Sstevel@tonic-gate 
1830Sstevel@tonic-gate /*
1848452SJohn.Wren.Kennedy@Sun.COM  * How many times to try to do the door_ki_upcall() in mdmn_ksend_message.
1858452SJohn.Wren.Kennedy@Sun.COM  * Again, made patchable here should it prove useful.
1868452SJohn.Wren.Kennedy@Sun.COM  */
1878452SJohn.Wren.Kennedy@Sun.COM uint_t			md_send_retry_limit = 30;
1888452SJohn.Wren.Kennedy@Sun.COM 
1898452SJohn.Wren.Kennedy@Sun.COM /*
1900Sstevel@tonic-gate  * Bug # 1212146
1910Sstevel@tonic-gate  * Before this change the user had to pass in a short aligned buffer because of
1920Sstevel@tonic-gate  * problems in some underlying device drivers.  This problem seems to have been
1930Sstevel@tonic-gate  * corrected in the underlying drivers so we will default to not requiring any
1940Sstevel@tonic-gate  * alignment.  If the user needs to check for a specific alignment,
1950Sstevel@tonic-gate  * md_uio_alignment_mask may be set in /etc/system to accomplish this.  To get
1960Sstevel@tonic-gate  * the behavior before this fix, the md_uio_alignment_mask would be set to 1,
1970Sstevel@tonic-gate  * to check for word alignment, it can be set to 3, for double word alignment,
1980Sstevel@tonic-gate  * it can be set to 7, etc.
1990Sstevel@tonic-gate  *
2000Sstevel@tonic-gate  * [Other part of fix is in function md_chk_uio()]
2010Sstevel@tonic-gate  */
2020Sstevel@tonic-gate static int		md_uio_alignment_mask = 0;
2030Sstevel@tonic-gate 
2040Sstevel@tonic-gate /*
2050Sstevel@tonic-gate  * for md_dev64_t translation
2060Sstevel@tonic-gate  */
2070Sstevel@tonic-gate struct md_xlate_table		*md_tuple_table;
2080Sstevel@tonic-gate struct md_xlate_major_table	*md_major_tuple_table;
2090Sstevel@tonic-gate int				md_tuple_length;
2100Sstevel@tonic-gate uint_t				md_majortab_len;
2110Sstevel@tonic-gate 
2120Sstevel@tonic-gate /* Function declarations */
2130Sstevel@tonic-gate 
2140Sstevel@tonic-gate static int md_create_probe_rqlist(md_probedev_impl_t *plist,
2150Sstevel@tonic-gate 			daemon_queue_t **hdr, intptr_t (*probe_test)());
2160Sstevel@tonic-gate 
2170Sstevel@tonic-gate /*
2180Sstevel@tonic-gate  * manipulate global status
2190Sstevel@tonic-gate  */
2200Sstevel@tonic-gate void
md_set_status(int bits)2210Sstevel@tonic-gate md_set_status(int bits)
2220Sstevel@tonic-gate {
2230Sstevel@tonic-gate 	mutex_enter(&md_mx);
2240Sstevel@tonic-gate 	md_status |= bits;
2250Sstevel@tonic-gate 	mutex_exit(&md_mx);
2260Sstevel@tonic-gate }
2270Sstevel@tonic-gate 
2280Sstevel@tonic-gate void
md_clr_status(int bits)2290Sstevel@tonic-gate md_clr_status(int bits)
2300Sstevel@tonic-gate {
2310Sstevel@tonic-gate 	mutex_enter(&md_mx);
2320Sstevel@tonic-gate 	md_status &= ~bits;
2330Sstevel@tonic-gate 	mutex_exit(&md_mx);
2340Sstevel@tonic-gate }
2350Sstevel@tonic-gate 
2360Sstevel@tonic-gate int
md_get_status()2370Sstevel@tonic-gate md_get_status()
2380Sstevel@tonic-gate {
2390Sstevel@tonic-gate 	int result;
2400Sstevel@tonic-gate 	mutex_enter(&md_mx);
2410Sstevel@tonic-gate 	result = md_status;
2420Sstevel@tonic-gate 	mutex_exit(&md_mx);
2430Sstevel@tonic-gate 	return (result);
2440Sstevel@tonic-gate }
2450Sstevel@tonic-gate 
2460Sstevel@tonic-gate void
md_set_setstatus(set_t setno,int bits)2470Sstevel@tonic-gate md_set_setstatus(set_t setno, int bits)
2480Sstevel@tonic-gate {
2490Sstevel@tonic-gate 	ASSERT(setno != MD_SET_BAD && setno < MD_MAXSETS);
2500Sstevel@tonic-gate 
2510Sstevel@tonic-gate 	mutex_enter(&md_mx);
2520Sstevel@tonic-gate 	md_set[setno].s_status |= bits;
2530Sstevel@tonic-gate 	mutex_exit(&md_mx);
2540Sstevel@tonic-gate }
2550Sstevel@tonic-gate 
2560Sstevel@tonic-gate void
md_clr_setstatus(set_t setno,int bits)2570Sstevel@tonic-gate md_clr_setstatus(set_t setno, int bits)
2580Sstevel@tonic-gate {
2590Sstevel@tonic-gate 	ASSERT(setno != MD_SET_BAD && setno < MD_MAXSETS);
2600Sstevel@tonic-gate 
2610Sstevel@tonic-gate 	mutex_enter(&md_mx);
2620Sstevel@tonic-gate 	md_set[setno].s_status &= ~bits;
2630Sstevel@tonic-gate 	mutex_exit(&md_mx);
2640Sstevel@tonic-gate }
2650Sstevel@tonic-gate 
2660Sstevel@tonic-gate uint_t
md_get_setstatus(set_t setno)2670Sstevel@tonic-gate md_get_setstatus(set_t setno)
2680Sstevel@tonic-gate {
2690Sstevel@tonic-gate 	uint_t result;
2700Sstevel@tonic-gate 
2710Sstevel@tonic-gate 	ASSERT(setno != MD_SET_BAD && setno < MD_MAXSETS);
2720Sstevel@tonic-gate 
2730Sstevel@tonic-gate 	mutex_enter(&md_mx);
2740Sstevel@tonic-gate 	result = md_set[setno].s_status;
2750Sstevel@tonic-gate 	mutex_exit(&md_mx);
2760Sstevel@tonic-gate 	return (result);
2770Sstevel@tonic-gate }
2780Sstevel@tonic-gate 
2790Sstevel@tonic-gate /*
2800Sstevel@tonic-gate  * md_unit_readerlock_common:
2810Sstevel@tonic-gate  * -------------------------
2820Sstevel@tonic-gate  * Mark the given unit as having a reader reference. Spin waiting for any
2830Sstevel@tonic-gate  * writer references to be released.
2840Sstevel@tonic-gate  *
2850Sstevel@tonic-gate  * Input:
2860Sstevel@tonic-gate  *	ui		unit reference
2870Sstevel@tonic-gate  *	lock_held	0 => ui_mx needs to be grabbed
2880Sstevel@tonic-gate  *			1 => ui_mx already held
2890Sstevel@tonic-gate  * Output:
2900Sstevel@tonic-gate  *	mm_unit_t corresponding to unit structure
2910Sstevel@tonic-gate  *	ui->ui_readercnt incremented
2920Sstevel@tonic-gate  */
2930Sstevel@tonic-gate static void *
md_unit_readerlock_common(mdi_unit_t * ui,int lock_held)2940Sstevel@tonic-gate md_unit_readerlock_common(mdi_unit_t *ui, int lock_held)
2950Sstevel@tonic-gate {
2960Sstevel@tonic-gate 	uint_t	flag = MD_UL_WRITER | MD_UL_WANABEWRITER;
2970Sstevel@tonic-gate 
2980Sstevel@tonic-gate 	if (!lock_held)
2990Sstevel@tonic-gate 		mutex_enter(&ui->ui_mx);
3000Sstevel@tonic-gate 	while (ui->ui_lock & flag) {
3010Sstevel@tonic-gate 		if (panicstr) {
3020Sstevel@tonic-gate 			if (ui->ui_lock & MD_UL_WRITER)
3030Sstevel@tonic-gate 				panic("md: writer lock is held");
3040Sstevel@tonic-gate 			break;
3050Sstevel@tonic-gate 		}
3060Sstevel@tonic-gate 		cv_wait(&ui->ui_cv, &ui->ui_mx);
3070Sstevel@tonic-gate 	}
3080Sstevel@tonic-gate 	ui->ui_readercnt++;
3090Sstevel@tonic-gate 	if (!lock_held)
3100Sstevel@tonic-gate 		mutex_exit(&ui->ui_mx);
3110Sstevel@tonic-gate 	return (MD_UNIT(ui->ui_link.ln_id));
3120Sstevel@tonic-gate }
3130Sstevel@tonic-gate 
3140Sstevel@tonic-gate void *
md_unit_readerlock(mdi_unit_t * ui)3150Sstevel@tonic-gate md_unit_readerlock(mdi_unit_t *ui)
3160Sstevel@tonic-gate {
3170Sstevel@tonic-gate 	return (md_unit_readerlock_common(ui, 0));
3180Sstevel@tonic-gate }
3190Sstevel@tonic-gate 
3200Sstevel@tonic-gate /*
3210Sstevel@tonic-gate  * md_unit_writerlock_common:
3220Sstevel@tonic-gate  * -------------------------
3230Sstevel@tonic-gate  * Acquire a unique writer reference. Causes previous readers to drain.
3240Sstevel@tonic-gate  * Spins if a writer reference already exists or if a previous reader/writer
3250Sstevel@tonic-gate  * dropped the lock to allow a ksend_message to be despatched.
3260Sstevel@tonic-gate  *
3270Sstevel@tonic-gate  * Input:
3280Sstevel@tonic-gate  *	ui		unit reference
3290Sstevel@tonic-gate  *	lock_held	0 => grab ui_mx
3300Sstevel@tonic-gate  *			1 => ui_mx already held on entry
3310Sstevel@tonic-gate  * Output:
3320Sstevel@tonic-gate  *	mm_unit_t reference
3330Sstevel@tonic-gate  */
3340Sstevel@tonic-gate static void *
md_unit_writerlock_common(mdi_unit_t * ui,int lock_held)3350Sstevel@tonic-gate md_unit_writerlock_common(mdi_unit_t *ui, int lock_held)
3360Sstevel@tonic-gate {
3370Sstevel@tonic-gate 	uint_t	flag = MD_UL_WRITER;
3380Sstevel@tonic-gate 
3390Sstevel@tonic-gate 	if (panicstr)
3400Sstevel@tonic-gate 		panic("md: writer lock not allowed");
3410Sstevel@tonic-gate 
3420Sstevel@tonic-gate 	if (!lock_held)
3430Sstevel@tonic-gate 		mutex_enter(&ui->ui_mx);
3440Sstevel@tonic-gate 
3450Sstevel@tonic-gate 	while ((ui->ui_lock & flag) || (ui->ui_readercnt != 0)) {
3460Sstevel@tonic-gate 		ui->ui_wanabecnt++;
3470Sstevel@tonic-gate 		ui->ui_lock |= MD_UL_WANABEWRITER;
3480Sstevel@tonic-gate 		cv_wait(&ui->ui_cv, &ui->ui_mx);
3490Sstevel@tonic-gate 		if (--ui->ui_wanabecnt == 0)
3500Sstevel@tonic-gate 			ui->ui_lock &= ~MD_UL_WANABEWRITER;
3510Sstevel@tonic-gate 	}
3520Sstevel@tonic-gate 	ui->ui_lock |= MD_UL_WRITER;
3530Sstevel@tonic-gate 	ui->ui_owner = curthread;
3540Sstevel@tonic-gate 
3550Sstevel@tonic-gate 	if (!lock_held)
3560Sstevel@tonic-gate 		mutex_exit(&ui->ui_mx);
3570Sstevel@tonic-gate 	return (MD_UNIT(ui->ui_link.ln_id));
3580Sstevel@tonic-gate }
3590Sstevel@tonic-gate 
3600Sstevel@tonic-gate void *
md_unit_writerlock(mdi_unit_t * ui)3610Sstevel@tonic-gate md_unit_writerlock(mdi_unit_t *ui)
3620Sstevel@tonic-gate {
3630Sstevel@tonic-gate 	return (md_unit_writerlock_common(ui, 0));
3640Sstevel@tonic-gate }
3650Sstevel@tonic-gate 
3660Sstevel@tonic-gate /*
3670Sstevel@tonic-gate  * md_unit_readerexit_common:
3680Sstevel@tonic-gate  * -------------------------
3690Sstevel@tonic-gate  * Release the readerlock for the specified unit. If the reader count reaches
3700Sstevel@tonic-gate  * zero and there are waiting writers (MD_UL_WANABEWRITER set) wake them up.
3710Sstevel@tonic-gate  *
3720Sstevel@tonic-gate  * Input:
3730Sstevel@tonic-gate  *	ui		unit reference
3740Sstevel@tonic-gate  *	lock_held	0 => ui_mx needs to be acquired
3750Sstevel@tonic-gate  *			1 => ui_mx already held
3760Sstevel@tonic-gate  */
3770Sstevel@tonic-gate static void
md_unit_readerexit_common(mdi_unit_t * ui,int lock_held)3780Sstevel@tonic-gate md_unit_readerexit_common(mdi_unit_t *ui, int lock_held)
3790Sstevel@tonic-gate {
3800Sstevel@tonic-gate 	if (!lock_held)
3810Sstevel@tonic-gate 		mutex_enter(&ui->ui_mx);
3820Sstevel@tonic-gate 	ASSERT((ui->ui_lock & MD_UL_WRITER) == 0);
3830Sstevel@tonic-gate 	ASSERT(ui->ui_readercnt != 0);
3840Sstevel@tonic-gate 	ui->ui_readercnt--;
3850Sstevel@tonic-gate 	if ((ui->ui_wanabecnt != 0) && (ui->ui_readercnt == 0))
3860Sstevel@tonic-gate 		cv_broadcast(&ui->ui_cv);
3870Sstevel@tonic-gate 
3880Sstevel@tonic-gate 	if (!lock_held)
3890Sstevel@tonic-gate 		mutex_exit(&ui->ui_mx);
3900Sstevel@tonic-gate }
3910Sstevel@tonic-gate 
3920Sstevel@tonic-gate void
md_unit_readerexit(mdi_unit_t * ui)3930Sstevel@tonic-gate md_unit_readerexit(mdi_unit_t *ui)
3940Sstevel@tonic-gate {
3950Sstevel@tonic-gate 	md_unit_readerexit_common(ui, 0);
3960Sstevel@tonic-gate }
3970Sstevel@tonic-gate 
3980Sstevel@tonic-gate /*
3990Sstevel@tonic-gate  * md_unit_writerexit_common:
4000Sstevel@tonic-gate  * -------------------------
4010Sstevel@tonic-gate  * Release the writerlock currently held on the unit. Wake any threads waiting
4020Sstevel@tonic-gate  * on becoming reader or writer (MD_UL_WANABEWRITER set).
4030Sstevel@tonic-gate  *
4040Sstevel@tonic-gate  * Input:
4050Sstevel@tonic-gate  *	ui		unit reference
4060Sstevel@tonic-gate  *	lock_held	0 => ui_mx to be acquired
4070Sstevel@tonic-gate  *			1 => ui_mx already held
4080Sstevel@tonic-gate  */
4090Sstevel@tonic-gate static void
md_unit_writerexit_common(mdi_unit_t * ui,int lock_held)4100Sstevel@tonic-gate md_unit_writerexit_common(mdi_unit_t *ui, int lock_held)
4110Sstevel@tonic-gate {
4120Sstevel@tonic-gate 	if (!lock_held)
4130Sstevel@tonic-gate 		mutex_enter(&ui->ui_mx);
4140Sstevel@tonic-gate 	ASSERT((ui->ui_lock & MD_UL_WRITER) != 0);
4150Sstevel@tonic-gate 	ASSERT(ui->ui_readercnt == 0);
4160Sstevel@tonic-gate 	ui->ui_lock &= ~MD_UL_WRITER;
4170Sstevel@tonic-gate 	ui->ui_owner = NULL;
4180Sstevel@tonic-gate 
4190Sstevel@tonic-gate 	cv_broadcast(&ui->ui_cv);
4200Sstevel@tonic-gate 	if (!lock_held)
4210Sstevel@tonic-gate 		mutex_exit(&ui->ui_mx);
4220Sstevel@tonic-gate }
4230Sstevel@tonic-gate 
4240Sstevel@tonic-gate void
md_unit_writerexit(mdi_unit_t * ui)4250Sstevel@tonic-gate md_unit_writerexit(mdi_unit_t *ui)
4260Sstevel@tonic-gate {
4270Sstevel@tonic-gate 	md_unit_writerexit_common(ui, 0);
4280Sstevel@tonic-gate }
4290Sstevel@tonic-gate 
4300Sstevel@tonic-gate void *
md_io_readerlock(mdi_unit_t * ui)4310Sstevel@tonic-gate md_io_readerlock(mdi_unit_t *ui)
4320Sstevel@tonic-gate {
4330Sstevel@tonic-gate 	md_io_lock_t	*io = ui->ui_io_lock;
4340Sstevel@tonic-gate 
4350Sstevel@tonic-gate 	ASSERT(io);  /* checks case where no io lock allocated */
4360Sstevel@tonic-gate 	mutex_enter(&io->io_mx);
4370Sstevel@tonic-gate 	while (io->io_lock & (MD_UL_WRITER | MD_UL_WANABEWRITER)) {
4380Sstevel@tonic-gate 		if (panicstr) {
4390Sstevel@tonic-gate 			if (io->io_lock & MD_UL_WRITER)
4400Sstevel@tonic-gate 				panic("md: writer lock is held");
4410Sstevel@tonic-gate 			break;
4420Sstevel@tonic-gate 		}
4430Sstevel@tonic-gate 		cv_wait(&io->io_cv, &io->io_mx);
4440Sstevel@tonic-gate 	}
4450Sstevel@tonic-gate 	io->io_readercnt++;
4460Sstevel@tonic-gate 	mutex_exit(&io->io_mx);
4470Sstevel@tonic-gate 	return (MD_UNIT(ui->ui_link.ln_id));
4480Sstevel@tonic-gate }
4490Sstevel@tonic-gate 
4500Sstevel@tonic-gate void *
md_io_writerlock(mdi_unit_t * ui)4510Sstevel@tonic-gate md_io_writerlock(mdi_unit_t *ui)
4520Sstevel@tonic-gate {
4530Sstevel@tonic-gate 	md_io_lock_t	*io = ui->ui_io_lock;
4540Sstevel@tonic-gate 
4550Sstevel@tonic-gate 	ASSERT(io);  /* checks case where no io lock allocated */
4560Sstevel@tonic-gate 	if (panicstr)
4570Sstevel@tonic-gate 		panic("md: writer lock not allowed");
4580Sstevel@tonic-gate 
4590Sstevel@tonic-gate 	mutex_enter(&io->io_mx);
4600Sstevel@tonic-gate 	while ((io->io_lock & MD_UL_WRITER) || (io->io_readercnt != 0)) {
4610Sstevel@tonic-gate 		io->io_wanabecnt++;
4620Sstevel@tonic-gate 		io->io_lock |= MD_UL_WANABEWRITER;
4630Sstevel@tonic-gate 		cv_wait(&io->io_cv, &io->io_mx);
4640Sstevel@tonic-gate 		if (--io->io_wanabecnt == 0)
4650Sstevel@tonic-gate 			io->io_lock &= ~MD_UL_WANABEWRITER;
4660Sstevel@tonic-gate 	}
4670Sstevel@tonic-gate 	io->io_lock |= MD_UL_WRITER;
4680Sstevel@tonic-gate 	io->io_owner = curthread;
4690Sstevel@tonic-gate 
4700Sstevel@tonic-gate 	mutex_exit(&io->io_mx);
4710Sstevel@tonic-gate 	return (MD_UNIT(ui->ui_link.ln_id));
4720Sstevel@tonic-gate }
4730Sstevel@tonic-gate 
4740Sstevel@tonic-gate void
md_io_readerexit(mdi_unit_t * ui)4750Sstevel@tonic-gate md_io_readerexit(mdi_unit_t *ui)
4760Sstevel@tonic-gate {
4770Sstevel@tonic-gate 	md_io_lock_t	*io = ui->ui_io_lock;
4780Sstevel@tonic-gate 
4790Sstevel@tonic-gate 	mutex_enter(&io->io_mx);
4800Sstevel@tonic-gate 	ASSERT((io->io_lock & MD_UL_WRITER) == 0);
4810Sstevel@tonic-gate 	ASSERT(io->io_readercnt != 0);
4820Sstevel@tonic-gate 	io->io_readercnt--;
4830Sstevel@tonic-gate 	if ((io->io_wanabecnt != 0) && (io->io_readercnt == 0)) {
4840Sstevel@tonic-gate 		cv_broadcast(&io->io_cv);
4850Sstevel@tonic-gate 	}
4860Sstevel@tonic-gate 	mutex_exit(&io->io_mx);
4870Sstevel@tonic-gate }
4880Sstevel@tonic-gate 
4890Sstevel@tonic-gate void
md_io_writerexit(mdi_unit_t * ui)4900Sstevel@tonic-gate md_io_writerexit(mdi_unit_t *ui)
4910Sstevel@tonic-gate {
4920Sstevel@tonic-gate 	md_io_lock_t	*io = ui->ui_io_lock;
4930Sstevel@tonic-gate 
4940Sstevel@tonic-gate 	mutex_enter(&io->io_mx);
4950Sstevel@tonic-gate 	ASSERT((io->io_lock & MD_UL_WRITER) != 0);
4960Sstevel@tonic-gate 	ASSERT(io->io_readercnt == 0);
4970Sstevel@tonic-gate 	io->io_lock &= ~MD_UL_WRITER;
4980Sstevel@tonic-gate 	io->io_owner = NULL;
4990Sstevel@tonic-gate 
5000Sstevel@tonic-gate 	cv_broadcast(&io->io_cv);
5010Sstevel@tonic-gate 	mutex_exit(&io->io_mx);
5020Sstevel@tonic-gate }
5030Sstevel@tonic-gate 
5040Sstevel@tonic-gate /*
5050Sstevel@tonic-gate  * Attempt to grab that set of locks defined as global.
5060Sstevel@tonic-gate  * A mask containing the set of global locks that are owned upon
5070Sstevel@tonic-gate  * entry is input.  Any additional global locks are then grabbed.
5080Sstevel@tonic-gate  * This keeps the caller from having to know the set of global
5090Sstevel@tonic-gate  * locks.
5100Sstevel@tonic-gate  */
5110Sstevel@tonic-gate static int
md_global_lock_enter(int global_locks_owned_mask)5120Sstevel@tonic-gate md_global_lock_enter(int global_locks_owned_mask)
5130Sstevel@tonic-gate {
5140Sstevel@tonic-gate 
5150Sstevel@tonic-gate 	/*
5160Sstevel@tonic-gate 	 * The current implementation has been verified by inspection
5170Sstevel@tonic-gate 	 * and test to be deadlock free.  If another global lock is
5180Sstevel@tonic-gate 	 * added, changing the algorithm used by this function should
5190Sstevel@tonic-gate 	 * be considered.  With more than 2 locks it is difficult to
5200Sstevel@tonic-gate 	 * guarantee that locks are being acquired in the correct order.
5210Sstevel@tonic-gate 	 * The safe approach would be to drop all of the locks that are
5220Sstevel@tonic-gate 	 * owned at function entry and then reacquire all of the locks
5230Sstevel@tonic-gate 	 * in the order defined by the lock hierarchy.
5240Sstevel@tonic-gate 	 */
5250Sstevel@tonic-gate 	mutex_enter(&md_mx);
5260Sstevel@tonic-gate 	if (!(global_locks_owned_mask & MD_GBL_IOCTL_LOCK)) {
5270Sstevel@tonic-gate 		while ((md_mtioctl_cnt != 0) ||
5280Sstevel@tonic-gate 		    (md_status & MD_GBL_IOCTL_LOCK)) {
5290Sstevel@tonic-gate 			if (cv_wait_sig_swap(&md_cv, &md_mx) == 0) {
5300Sstevel@tonic-gate 				mutex_exit(&md_mx);
5310Sstevel@tonic-gate 				return (EINTR);
5320Sstevel@tonic-gate 			}
5330Sstevel@tonic-gate 		}
5340Sstevel@tonic-gate 		md_status |= MD_GBL_IOCTL_LOCK;
5350Sstevel@tonic-gate 		md_ioctl_cnt++;
5360Sstevel@tonic-gate 	}
5370Sstevel@tonic-gate 	if (!(global_locks_owned_mask & MD_GBL_HS_LOCK)) {
5380Sstevel@tonic-gate 		while (md_status & MD_GBL_HS_LOCK) {
5390Sstevel@tonic-gate 			if (cv_wait_sig_swap(&md_cv, &md_mx) == 0) {
5400Sstevel@tonic-gate 				md_status &= ~MD_GBL_IOCTL_LOCK;
5410Sstevel@tonic-gate 				mutex_exit(&md_mx);
5420Sstevel@tonic-gate 				return (EINTR);
5430Sstevel@tonic-gate 			}
5440Sstevel@tonic-gate 		}
5450Sstevel@tonic-gate 		md_status |= MD_GBL_HS_LOCK;
5460Sstevel@tonic-gate 	}
5470Sstevel@tonic-gate 	mutex_exit(&md_mx);
5480Sstevel@tonic-gate 	return (0);
5490Sstevel@tonic-gate }
5500Sstevel@tonic-gate 
5510Sstevel@tonic-gate /*
5520Sstevel@tonic-gate  * Release the set of global locks that were grabbed in md_global_lock_enter
5530Sstevel@tonic-gate  * that were not already owned by the calling thread.  The set of previously
5540Sstevel@tonic-gate  * owned global locks is passed in as a mask parameter.
5550Sstevel@tonic-gate  */
5560Sstevel@tonic-gate static int
md_global_lock_exit(int global_locks_owned_mask,int code,int flags,mdi_unit_t * ui)5570Sstevel@tonic-gate md_global_lock_exit(int global_locks_owned_mask, int code,
5580Sstevel@tonic-gate 	int flags, mdi_unit_t *ui)
5590Sstevel@tonic-gate {
5600Sstevel@tonic-gate 	mutex_enter(&md_mx);
5610Sstevel@tonic-gate 
5620Sstevel@tonic-gate 	/* If MT ioctl decrement mt_ioctl_cnt */
5630Sstevel@tonic-gate 	if ((flags & MD_MT_IOCTL)) {
5640Sstevel@tonic-gate 		md_mtioctl_cnt--;
5650Sstevel@tonic-gate 	} else {
5660Sstevel@tonic-gate 		if (!(global_locks_owned_mask & MD_GBL_IOCTL_LOCK)) {
5670Sstevel@tonic-gate 			/* clear the lock and decrement count */
5680Sstevel@tonic-gate 			ASSERT(md_ioctl_cnt == 1);
5690Sstevel@tonic-gate 			md_ioctl_cnt--;
5700Sstevel@tonic-gate 			md_status &= ~MD_GBL_IOCTL_LOCK;
5710Sstevel@tonic-gate 		}
5720Sstevel@tonic-gate 		if (!(global_locks_owned_mask & MD_GBL_HS_LOCK))
5730Sstevel@tonic-gate 			md_status &= ~MD_GBL_HS_LOCK;
5740Sstevel@tonic-gate 	}
5750Sstevel@tonic-gate 	if (flags & MD_READER_HELD)
5760Sstevel@tonic-gate 		md_unit_readerexit(ui);
5770Sstevel@tonic-gate 	if (flags & MD_WRITER_HELD)
5780Sstevel@tonic-gate 		md_unit_writerexit(ui);
5790Sstevel@tonic-gate 	if (flags & MD_IO_HELD)
5800Sstevel@tonic-gate 		md_io_writerexit(ui);
5810Sstevel@tonic-gate 	if (flags & (MD_ARRAY_WRITER | MD_ARRAY_READER)) {
5820Sstevel@tonic-gate 		rw_exit(&md_unit_array_rw.lock);
5830Sstevel@tonic-gate 	}
5840Sstevel@tonic-gate 	cv_broadcast(&md_cv);
5850Sstevel@tonic-gate 	mutex_exit(&md_mx);
5860Sstevel@tonic-gate 
5870Sstevel@tonic-gate 	return (code);
5880Sstevel@tonic-gate }
5890Sstevel@tonic-gate 
5900Sstevel@tonic-gate /*
5910Sstevel@tonic-gate  * The two functions, md_ioctl_lock_enter, and md_ioctl_lock_exit make
5920Sstevel@tonic-gate  * use of the md_global_lock_{enter|exit} functions to avoid duplication
5930Sstevel@tonic-gate  * of code.  They rely upon the fact that the locks that are specified in
5940Sstevel@tonic-gate  * the input mask are not acquired or freed.  If this algorithm changes
5950Sstevel@tonic-gate  * as described in the block comment at the beginning of md_global_lock_enter
5960Sstevel@tonic-gate  * then it will be necessary to change these 2 functions.  Otherwise these
5970Sstevel@tonic-gate  * functions will be grabbing and holding global locks unnecessarily.
5980Sstevel@tonic-gate  */
5990Sstevel@tonic-gate int
md_ioctl_lock_enter(void)6000Sstevel@tonic-gate md_ioctl_lock_enter(void)
6010Sstevel@tonic-gate {
6020Sstevel@tonic-gate 	/* grab only the ioctl lock */
6030Sstevel@tonic-gate 	return (md_global_lock_enter(~MD_GBL_IOCTL_LOCK));
6040Sstevel@tonic-gate }
6050Sstevel@tonic-gate 
6060Sstevel@tonic-gate /*
6070Sstevel@tonic-gate  * If md_ioctl_lock_exit is being called at the end of an ioctl before
6080Sstevel@tonic-gate  * returning to user space, then ioctl_end is set to 1.
6090Sstevel@tonic-gate  * Otherwise, the ioctl lock is being dropped in the middle of handling
6100Sstevel@tonic-gate  * an ioctl and will be reacquired before the end of the ioctl.
6110Sstevel@tonic-gate  * Do not attempt to process the MN diskset mddb parse flags unless
6120Sstevel@tonic-gate  * ioctl_end is true - otherwise a deadlock situation could arise.
6130Sstevel@tonic-gate  */
6140Sstevel@tonic-gate int
md_ioctl_lock_exit(int code,int flags,mdi_unit_t * ui,int ioctl_end)6150Sstevel@tonic-gate md_ioctl_lock_exit(int code, int flags, mdi_unit_t *ui, int ioctl_end)
6160Sstevel@tonic-gate {
6170Sstevel@tonic-gate 	int				ret_val;
6180Sstevel@tonic-gate 	uint_t				status;
6190Sstevel@tonic-gate 	mddb_set_t			*s;
6200Sstevel@tonic-gate 	int				i;
6210Sstevel@tonic-gate 	int				err;
6220Sstevel@tonic-gate 	md_mn_msg_mddb_parse_t		*mddb_parse_msg;
6230Sstevel@tonic-gate 	md_mn_kresult_t			*kresult;
6240Sstevel@tonic-gate 	mddb_lb_t			*lbp;
6250Sstevel@tonic-gate 	int				rval = 1;
6260Sstevel@tonic-gate 	int				flag;
6270Sstevel@tonic-gate 
6280Sstevel@tonic-gate 	/* release only the ioctl lock */
6290Sstevel@tonic-gate 	ret_val = md_global_lock_exit(~MD_GBL_IOCTL_LOCK, code, flags, ui);
6300Sstevel@tonic-gate 
6310Sstevel@tonic-gate 	/*
6320Sstevel@tonic-gate 	 * If md_ioctl_lock_exit is being called with a possible lock held
6330Sstevel@tonic-gate 	 * (ioctl_end is 0), then don't check the MN disksets since the
6340Sstevel@tonic-gate 	 * call to mddb_setenter may cause a lock ordering deadlock.
6350Sstevel@tonic-gate 	 */
6360Sstevel@tonic-gate 	if (!ioctl_end)
6370Sstevel@tonic-gate 		return (ret_val);
6380Sstevel@tonic-gate 
6390Sstevel@tonic-gate 	/*
6400Sstevel@tonic-gate 	 * Walk through disksets to see if there is a MN diskset that
6410Sstevel@tonic-gate 	 * has messages that need to be sent.  Set must be snarfed and
6420Sstevel@tonic-gate 	 * be a MN diskset in order to be checked.
6430Sstevel@tonic-gate 	 *
6440Sstevel@tonic-gate 	 * In a MN diskset, this routine may send messages to the
6450Sstevel@tonic-gate 	 * rpc.mdcommd in order to have the slave nodes re-parse parts
6460Sstevel@tonic-gate 	 * of the mddb.  Messages can only be sent with no locks held,
6470Sstevel@tonic-gate 	 * so if mddb change occurred while the ioctl lock is held, this
6480Sstevel@tonic-gate 	 * routine must send the messages.
6490Sstevel@tonic-gate 	 */
6500Sstevel@tonic-gate 	for (i = 1; i < md_nsets; i++) {
6510Sstevel@tonic-gate 		status = md_get_setstatus(i);
6520Sstevel@tonic-gate 
6530Sstevel@tonic-gate 		/* Set must be snarfed and be a MN diskset */
6540Sstevel@tonic-gate 		if ((status & (MD_SET_SNARFED | MD_SET_MNSET)) !=
6550Sstevel@tonic-gate 		    (MD_SET_SNARFED | MD_SET_MNSET))
6560Sstevel@tonic-gate 			continue;
6570Sstevel@tonic-gate 
6580Sstevel@tonic-gate 		/* Grab set lock so that set can't change */
6590Sstevel@tonic-gate 		if ((s = mddb_setenter(i, MDDB_MUSTEXIST, &err)) == NULL)
6600Sstevel@tonic-gate 			continue;
6610Sstevel@tonic-gate 
6620Sstevel@tonic-gate 		lbp = s->s_lbp;
6630Sstevel@tonic-gate 
6640Sstevel@tonic-gate 		/* Re-get set status now that lock is held */
6650Sstevel@tonic-gate 		status = md_get_setstatus(i);
6660Sstevel@tonic-gate 
6670Sstevel@tonic-gate 		/*
6680Sstevel@tonic-gate 		 * If MN parsing block flag is set - continue to next set.
6690Sstevel@tonic-gate 		 *
6700Sstevel@tonic-gate 		 * If s_mn_parseflags_sending is non-zero, then another thread
6710Sstevel@tonic-gate 		 * is already currently sending a parse message, so just
6720Sstevel@tonic-gate 		 * release the set mutex.  If this ioctl had caused an mddb
6730Sstevel@tonic-gate 		 * change that results in a parse message to be generated,
6740Sstevel@tonic-gate 		 * the thread that is currently sending a parse message would
6750Sstevel@tonic-gate 		 * generate the additional parse message.
6760Sstevel@tonic-gate 		 *
6770Sstevel@tonic-gate 		 * If s_mn_parseflags_sending is zero then loop until
6780Sstevel@tonic-gate 		 * s_mn_parseflags is 0 (until there are no more
6790Sstevel@tonic-gate 		 * messages to send).
6800Sstevel@tonic-gate 		 * While s_mn_parseflags is non-zero,
6810Sstevel@tonic-gate 		 *	put snapshot of parse_flags in s_mn_parseflags_sending
6820Sstevel@tonic-gate 		 *	set s_mn_parseflags to zero
6830Sstevel@tonic-gate 		 *	release set mutex
6840Sstevel@tonic-gate 		 *	send message
6850Sstevel@tonic-gate 		 *	re-grab set mutex
6860Sstevel@tonic-gate 		 *	set s_mn_parseflags_sending to zero
6870Sstevel@tonic-gate 		 *
6880Sstevel@tonic-gate 		 * If set is STALE, send message with NO_LOG flag so that
6890Sstevel@tonic-gate 		 * rpc.mdcommd won't attempt to log message to non-writeable
6900Sstevel@tonic-gate 		 * replica.
6910Sstevel@tonic-gate 		 */
6920Sstevel@tonic-gate 		mddb_parse_msg = kmem_zalloc(sizeof (md_mn_msg_mddb_parse_t),
6937563SPrasad.Singamsetty@Sun.COM 		    KM_SLEEP);
6940Sstevel@tonic-gate 		while (((s->s_mn_parseflags_sending & MDDB_PARSE_MASK) == 0) &&
6950Sstevel@tonic-gate 		    (s->s_mn_parseflags & MDDB_PARSE_MASK) &&
6960Sstevel@tonic-gate 		    (!(status & MD_SET_MNPARSE_BLK))) {
6970Sstevel@tonic-gate 
6980Sstevel@tonic-gate 			/* Grab snapshot of parse flags */
6990Sstevel@tonic-gate 			s->s_mn_parseflags_sending = s->s_mn_parseflags;
7000Sstevel@tonic-gate 			s->s_mn_parseflags = 0;
7010Sstevel@tonic-gate 
7020Sstevel@tonic-gate 			mutex_exit(&md_set[(s)->s_setno].s_dbmx);
7030Sstevel@tonic-gate 
7040Sstevel@tonic-gate 			/*
7050Sstevel@tonic-gate 			 * Send the message to the slaves to re-parse
7060Sstevel@tonic-gate 			 * the indicated portions of the mddb. Send the status
7070Sstevel@tonic-gate 			 * of the 50 mddbs in this set so that slaves know
7080Sstevel@tonic-gate 			 * which mddbs that the master node thinks are 'good'.
7090Sstevel@tonic-gate 			 * Otherwise, slave may reparse, but from wrong
7100Sstevel@tonic-gate 			 * replica.
7110Sstevel@tonic-gate 			 */
7120Sstevel@tonic-gate 			mddb_parse_msg->msg_parse_flags =
7137563SPrasad.Singamsetty@Sun.COM 			    s->s_mn_parseflags_sending;
7140Sstevel@tonic-gate 
7150Sstevel@tonic-gate 			for (i = 0; i < MDDB_NLB; i++) {
7160Sstevel@tonic-gate 				mddb_parse_msg->msg_lb_flags[i] =
7177563SPrasad.Singamsetty@Sun.COM 				    lbp->lb_locators[i].l_flags;
7180Sstevel@tonic-gate 			}
719*11130SJames.Hall@Sun.COM 			kresult = kmem_alloc(sizeof (md_mn_kresult_t),
7207563SPrasad.Singamsetty@Sun.COM 			    KM_SLEEP);
7210Sstevel@tonic-gate 			while (rval != 0) {
7220Sstevel@tonic-gate 				flag = 0;
7230Sstevel@tonic-gate 				if (status & MD_SET_STALE)
7240Sstevel@tonic-gate 					flag |= MD_MSGF_NO_LOG;
7250Sstevel@tonic-gate 				rval = mdmn_ksend_message(s->s_setno,
7268452SJohn.Wren.Kennedy@Sun.COM 				    MD_MN_MSG_MDDB_PARSE, flag, 0,
7270Sstevel@tonic-gate 				    (char *)mddb_parse_msg,
7288452SJohn.Wren.Kennedy@Sun.COM 				    sizeof (md_mn_msg_mddb_parse_t), kresult);
7290Sstevel@tonic-gate 				/* if the node hasn't yet joined, it's Ok. */
7300Sstevel@tonic-gate 				if ((!MDMN_KSEND_MSG_OK(rval, kresult)) &&
7310Sstevel@tonic-gate 				    (kresult->kmmr_comm_state !=
7327563SPrasad.Singamsetty@Sun.COM 				    MDMNE_NOT_JOINED)) {
7330Sstevel@tonic-gate 					mdmn_ksend_show_error(rval, kresult,
7340Sstevel@tonic-gate 					    "MD_MN_MSG_MDDB_PARSE");
7350Sstevel@tonic-gate 					cmn_err(CE_WARN, "md_ioctl_lock_exit: "
7360Sstevel@tonic-gate 					    "Unable to send mddb update "
7370Sstevel@tonic-gate 					    "message to other nodes in "
7380Sstevel@tonic-gate 					    "diskset %s\n", s->s_setname);
7390Sstevel@tonic-gate 					rval = 1;
7400Sstevel@tonic-gate 				}
7410Sstevel@tonic-gate 			}
7420Sstevel@tonic-gate 			kmem_free(kresult, sizeof (md_mn_kresult_t));
7430Sstevel@tonic-gate 
7440Sstevel@tonic-gate 			/*
7450Sstevel@tonic-gate 			 * Re-grab mutex to clear sending field and to
7460Sstevel@tonic-gate 			 * see if another parse message needs to be generated.
7470Sstevel@tonic-gate 			 */
7480Sstevel@tonic-gate 			mutex_enter(&md_set[(s)->s_setno].s_dbmx);
7490Sstevel@tonic-gate 			s->s_mn_parseflags_sending = 0;
7500Sstevel@tonic-gate 		}
7510Sstevel@tonic-gate 		kmem_free(mddb_parse_msg, sizeof (md_mn_msg_mddb_parse_t));
7520Sstevel@tonic-gate 		mutex_exit(&md_set[(s)->s_setno].s_dbmx);
7530Sstevel@tonic-gate 	}
7540Sstevel@tonic-gate 	return (ret_val);
7550Sstevel@tonic-gate }
7560Sstevel@tonic-gate 
7570Sstevel@tonic-gate /*
7580Sstevel@tonic-gate  * Called when in an ioctl and need readerlock.
7590Sstevel@tonic-gate  */
7600Sstevel@tonic-gate void *
md_ioctl_readerlock(IOLOCK * lock,mdi_unit_t * ui)7610Sstevel@tonic-gate md_ioctl_readerlock(IOLOCK *lock, mdi_unit_t *ui)
7620Sstevel@tonic-gate {
7630Sstevel@tonic-gate 	ASSERT(lock != NULL);
7640Sstevel@tonic-gate 	lock->l_ui = ui;
7650Sstevel@tonic-gate 	lock->l_flags |= MD_READER_HELD;
7660Sstevel@tonic-gate 	return (md_unit_readerlock_common(ui, 0));
7670Sstevel@tonic-gate }
7680Sstevel@tonic-gate 
7690Sstevel@tonic-gate /*
7700Sstevel@tonic-gate  * Called when in an ioctl and need writerlock.
7710Sstevel@tonic-gate  */
7720Sstevel@tonic-gate void *
md_ioctl_writerlock(IOLOCK * lock,mdi_unit_t * ui)7730Sstevel@tonic-gate md_ioctl_writerlock(IOLOCK *lock, mdi_unit_t *ui)
7740Sstevel@tonic-gate {
7750Sstevel@tonic-gate 	ASSERT(lock != NULL);
7760Sstevel@tonic-gate 	lock->l_ui = ui;
7770Sstevel@tonic-gate 	lock->l_flags |= MD_WRITER_HELD;
7780Sstevel@tonic-gate 	return (md_unit_writerlock_common(ui, 0));
7790Sstevel@tonic-gate }
7800Sstevel@tonic-gate 
7810Sstevel@tonic-gate void *
md_ioctl_io_lock(IOLOCK * lock,mdi_unit_t * ui)7820Sstevel@tonic-gate md_ioctl_io_lock(IOLOCK *lock, mdi_unit_t *ui)
7830Sstevel@tonic-gate {
7840Sstevel@tonic-gate 	ASSERT(lock != NULL);
7850Sstevel@tonic-gate 	lock->l_ui = ui;
7860Sstevel@tonic-gate 	lock->l_flags |= MD_IO_HELD;
7870Sstevel@tonic-gate 	return (md_io_writerlock(ui));
7880Sstevel@tonic-gate }
7890Sstevel@tonic-gate 
7900Sstevel@tonic-gate void
md_ioctl_readerexit(IOLOCK * lock)7910Sstevel@tonic-gate md_ioctl_readerexit(IOLOCK *lock)
7920Sstevel@tonic-gate {
7930Sstevel@tonic-gate 	ASSERT(lock != NULL);
7940Sstevel@tonic-gate 	lock->l_flags &= ~MD_READER_HELD;
7950Sstevel@tonic-gate 	md_unit_readerexit(lock->l_ui);
7960Sstevel@tonic-gate }
7970Sstevel@tonic-gate 
7980Sstevel@tonic-gate void
md_ioctl_writerexit(IOLOCK * lock)7990Sstevel@tonic-gate md_ioctl_writerexit(IOLOCK *lock)
8000Sstevel@tonic-gate {
8010Sstevel@tonic-gate 	ASSERT(lock != NULL);
8020Sstevel@tonic-gate 	lock->l_flags &= ~MD_WRITER_HELD;
8030Sstevel@tonic-gate 	md_unit_writerexit(lock->l_ui);
8040Sstevel@tonic-gate }
8050Sstevel@tonic-gate 
8060Sstevel@tonic-gate void
md_ioctl_io_exit(IOLOCK * lock)8070Sstevel@tonic-gate md_ioctl_io_exit(IOLOCK *lock)
8080Sstevel@tonic-gate {
8090Sstevel@tonic-gate 	ASSERT(lock != NULL);
8100Sstevel@tonic-gate 	lock->l_flags &= ~MD_IO_HELD;
8110Sstevel@tonic-gate 	md_io_writerexit(lock->l_ui);
8120Sstevel@tonic-gate }
8130Sstevel@tonic-gate 
8140Sstevel@tonic-gate /*
8150Sstevel@tonic-gate  * md_ioctl_releaselocks:
8160Sstevel@tonic-gate  * --------------------
8170Sstevel@tonic-gate  * Release the unit locks that are held and stop subsequent
8180Sstevel@tonic-gate  * md_unit_reader/writerlock calls from progressing. This allows the caller
8190Sstevel@tonic-gate  * to send messages across the cluster when running in a multinode
8200Sstevel@tonic-gate  * environment.
8210Sstevel@tonic-gate  * ioctl originated locks (via md_ioctl_readerlock/md_ioctl_writerlock) are
8220Sstevel@tonic-gate  * allowed to progress as normal. This is required as these typically are
8230Sstevel@tonic-gate  * invoked by the message handler that may be called while a unit lock is
8240Sstevel@tonic-gate  * marked as released.
8250Sstevel@tonic-gate  *
8260Sstevel@tonic-gate  * On entry:
8270Sstevel@tonic-gate  *	variety of unit locks may be held including ioctl lock
8280Sstevel@tonic-gate  *
8290Sstevel@tonic-gate  * On exit:
8300Sstevel@tonic-gate  *      locks released and unit structure updated to prevent subsequent reader/
8310Sstevel@tonic-gate  *      writer locks being acquired until md_ioctl_reacquirelocks is called
8320Sstevel@tonic-gate  */
8330Sstevel@tonic-gate void
md_ioctl_releaselocks(int code,int flags,mdi_unit_t * ui)8340Sstevel@tonic-gate md_ioctl_releaselocks(int code, int flags, mdi_unit_t *ui)
8350Sstevel@tonic-gate {
8360Sstevel@tonic-gate 	/* This actually releases the locks. */
8370Sstevel@tonic-gate 	(void) md_global_lock_exit(~MD_GBL_IOCTL_LOCK, code, flags, ui);
8380Sstevel@tonic-gate }
8390Sstevel@tonic-gate 
8400Sstevel@tonic-gate /*
8410Sstevel@tonic-gate  * md_ioctl_reacquirelocks:
8420Sstevel@tonic-gate  * ----------------------
8430Sstevel@tonic-gate  * Reacquire the locks that were held when md_ioctl_releaselocks
8440Sstevel@tonic-gate  * was called.
8450Sstevel@tonic-gate  *
8460Sstevel@tonic-gate  * On entry:
8470Sstevel@tonic-gate  *      No unit locks held
8480Sstevel@tonic-gate  * On exit:
8490Sstevel@tonic-gate  *	locks held that were held at md_ioctl_releaselocks time including
8500Sstevel@tonic-gate  *	the ioctl lock.
8510Sstevel@tonic-gate  */
8520Sstevel@tonic-gate void
md_ioctl_reacquirelocks(int flags,mdi_unit_t * ui)8530Sstevel@tonic-gate md_ioctl_reacquirelocks(int flags, mdi_unit_t *ui)
8540Sstevel@tonic-gate {
8550Sstevel@tonic-gate 	if (flags & MD_MT_IOCTL) {
8560Sstevel@tonic-gate 		mutex_enter(&md_mx);
8570Sstevel@tonic-gate 		md_mtioctl_cnt++;
8580Sstevel@tonic-gate 		mutex_exit(&md_mx);
8590Sstevel@tonic-gate 	} else {
8607563SPrasad.Singamsetty@Sun.COM 		while (md_ioctl_lock_enter() == EINTR)
8617563SPrasad.Singamsetty@Sun.COM 			;
8620Sstevel@tonic-gate 	}
8630Sstevel@tonic-gate 	if (flags & MD_ARRAY_WRITER) {
8640Sstevel@tonic-gate 		rw_enter(&md_unit_array_rw.lock, RW_WRITER);
8650Sstevel@tonic-gate 	} else if (flags & MD_ARRAY_READER) {
8660Sstevel@tonic-gate 		rw_enter(&md_unit_array_rw.lock, RW_READER);
8670Sstevel@tonic-gate 	}
8680Sstevel@tonic-gate 	if (ui != (mdi_unit_t *)NULL) {
8690Sstevel@tonic-gate 		if (flags & MD_IO_HELD) {
8700Sstevel@tonic-gate 			(void) md_io_writerlock(ui);
8710Sstevel@tonic-gate 		}
8720Sstevel@tonic-gate 
8730Sstevel@tonic-gate 		mutex_enter(&ui->ui_mx);
8740Sstevel@tonic-gate 		if (flags & MD_READER_HELD) {
8750Sstevel@tonic-gate 			(void) md_unit_readerlock_common(ui, 1);
8760Sstevel@tonic-gate 		} else if (flags & MD_WRITER_HELD) {
8770Sstevel@tonic-gate 			(void) md_unit_writerlock_common(ui, 1);
8780Sstevel@tonic-gate 		}
8790Sstevel@tonic-gate 		/* Wake up any blocked readerlock() calls */
8800Sstevel@tonic-gate 		cv_broadcast(&ui->ui_cv);
8810Sstevel@tonic-gate 		mutex_exit(&ui->ui_mx);
8820Sstevel@tonic-gate 	}
8830Sstevel@tonic-gate }
8840Sstevel@tonic-gate 
8850Sstevel@tonic-gate void
md_ioctl_droplocks(IOLOCK * lock)8860Sstevel@tonic-gate md_ioctl_droplocks(IOLOCK *lock)
8870Sstevel@tonic-gate {
8880Sstevel@tonic-gate 	mdi_unit_t	*ui;
8890Sstevel@tonic-gate 	int		flags;
8900Sstevel@tonic-gate 
8910Sstevel@tonic-gate 	ASSERT(lock != NULL);
8920Sstevel@tonic-gate 	ui = lock->l_ui;
8930Sstevel@tonic-gate 	flags = lock->l_flags;
8940Sstevel@tonic-gate 	if (flags & MD_READER_HELD) {
8950Sstevel@tonic-gate 		lock->l_flags &= ~MD_READER_HELD;
8960Sstevel@tonic-gate 		md_unit_readerexit(ui);
8970Sstevel@tonic-gate 	}
8980Sstevel@tonic-gate 	if (flags & MD_WRITER_HELD) {
8990Sstevel@tonic-gate 		lock->l_flags &= ~MD_WRITER_HELD;
9000Sstevel@tonic-gate 		md_unit_writerexit(ui);
9010Sstevel@tonic-gate 	}
9020Sstevel@tonic-gate 	if (flags & MD_IO_HELD) {
9030Sstevel@tonic-gate 		lock->l_flags &= ~MD_IO_HELD;
9040Sstevel@tonic-gate 		md_io_writerexit(ui);
9050Sstevel@tonic-gate 	}
9060Sstevel@tonic-gate 	if (flags & (MD_ARRAY_WRITER | MD_ARRAY_READER)) {
9070Sstevel@tonic-gate 		lock->l_flags &= ~(MD_ARRAY_WRITER | MD_ARRAY_READER);
9080Sstevel@tonic-gate 		rw_exit(&md_unit_array_rw.lock);
9090Sstevel@tonic-gate 	}
9100Sstevel@tonic-gate }
9110Sstevel@tonic-gate 
9120Sstevel@tonic-gate void
md_array_writer(IOLOCK * lock)9130Sstevel@tonic-gate md_array_writer(IOLOCK *lock)
9140Sstevel@tonic-gate {
9150Sstevel@tonic-gate 	ASSERT(lock != NULL);
9160Sstevel@tonic-gate 	lock->l_flags |= MD_ARRAY_WRITER;
9170Sstevel@tonic-gate 	rw_enter(&md_unit_array_rw.lock, RW_WRITER);
9180Sstevel@tonic-gate }
9190Sstevel@tonic-gate 
9200Sstevel@tonic-gate void
md_array_reader(IOLOCK * lock)9210Sstevel@tonic-gate md_array_reader(IOLOCK *lock)
9220Sstevel@tonic-gate {
9230Sstevel@tonic-gate 	ASSERT(lock != NULL);
9240Sstevel@tonic-gate 	lock->l_flags |= MD_ARRAY_READER;
9250Sstevel@tonic-gate 	rw_enter(&md_unit_array_rw.lock, RW_READER);
9260Sstevel@tonic-gate }
9270Sstevel@tonic-gate 
9280Sstevel@tonic-gate /*
9290Sstevel@tonic-gate  * Called when in an ioctl and need opencloselock.
9300Sstevel@tonic-gate  * Sets flags in lockp for READER_HELD.
9310Sstevel@tonic-gate  */
9320Sstevel@tonic-gate void *
md_ioctl_openclose_enter(IOLOCK * lockp,mdi_unit_t * ui)9330Sstevel@tonic-gate md_ioctl_openclose_enter(IOLOCK *lockp, mdi_unit_t *ui)
9340Sstevel@tonic-gate {
9350Sstevel@tonic-gate 	void	*un;
9360Sstevel@tonic-gate 
9370Sstevel@tonic-gate 	ASSERT(lockp != NULL);
9380Sstevel@tonic-gate 	mutex_enter(&ui->ui_mx);
9390Sstevel@tonic-gate 	while (ui->ui_lock & MD_UL_OPENORCLOSE)
9400Sstevel@tonic-gate 		cv_wait(&ui->ui_cv, &ui->ui_mx);
9410Sstevel@tonic-gate 	ui->ui_lock |= MD_UL_OPENORCLOSE;
9420Sstevel@tonic-gate 
9430Sstevel@tonic-gate 	/* Maintain mutex across the readerlock call */
9440Sstevel@tonic-gate 	lockp->l_ui = ui;
9450Sstevel@tonic-gate 	lockp->l_flags |= MD_READER_HELD;
9460Sstevel@tonic-gate 	un = md_unit_readerlock_common(ui, 1);
9470Sstevel@tonic-gate 	mutex_exit(&ui->ui_mx);
9480Sstevel@tonic-gate 
9490Sstevel@tonic-gate 	return (un);
9500Sstevel@tonic-gate }
9510Sstevel@tonic-gate 
9520Sstevel@tonic-gate /*
9530Sstevel@tonic-gate  * Clears reader lock using md_ioctl instead of md_unit
9540Sstevel@tonic-gate  * and updates lockp.
9550Sstevel@tonic-gate  */
9560Sstevel@tonic-gate void
md_ioctl_openclose_exit(IOLOCK * lockp)9570Sstevel@tonic-gate md_ioctl_openclose_exit(IOLOCK *lockp)
9580Sstevel@tonic-gate {
9590Sstevel@tonic-gate 	mdi_unit_t	*ui;
9600Sstevel@tonic-gate 
9610Sstevel@tonic-gate 	ASSERT(lockp != NULL);
9620Sstevel@tonic-gate 	ui = lockp->l_ui;
9630Sstevel@tonic-gate 	ASSERT(ui->ui_lock & MD_UL_OPENORCLOSE);
9640Sstevel@tonic-gate 
9650Sstevel@tonic-gate 	md_ioctl_readerexit(lockp);
9660Sstevel@tonic-gate 
9670Sstevel@tonic-gate 	mutex_enter(&ui->ui_mx);
9680Sstevel@tonic-gate 	ui->ui_lock &= ~MD_UL_OPENORCLOSE;
9690Sstevel@tonic-gate 
9700Sstevel@tonic-gate 	cv_broadcast(&ui->ui_cv);
9710Sstevel@tonic-gate 	mutex_exit(&ui->ui_mx);
9720Sstevel@tonic-gate }
9730Sstevel@tonic-gate 
9740Sstevel@tonic-gate /*
9750Sstevel@tonic-gate  * Clears reader lock using md_ioctl instead of md_unit
9760Sstevel@tonic-gate  * and updates lockp.
9770Sstevel@tonic-gate  * Does not acquire or release the ui_mx lock since the calling
9780Sstevel@tonic-gate  * routine has already acquired this lock.
9790Sstevel@tonic-gate  */
9800Sstevel@tonic-gate void
md_ioctl_openclose_exit_lh(IOLOCK * lockp)9810Sstevel@tonic-gate md_ioctl_openclose_exit_lh(IOLOCK *lockp)
9820Sstevel@tonic-gate {
9830Sstevel@tonic-gate 	mdi_unit_t	*ui;
9840Sstevel@tonic-gate 
9850Sstevel@tonic-gate 	ASSERT(lockp != NULL);
9860Sstevel@tonic-gate 	ui = lockp->l_ui;
9870Sstevel@tonic-gate 	ASSERT(ui->ui_lock & MD_UL_OPENORCLOSE);
9880Sstevel@tonic-gate 
9890Sstevel@tonic-gate 	lockp->l_flags &= ~MD_READER_HELD;
9900Sstevel@tonic-gate 	md_unit_readerexit_common(lockp->l_ui, 1);
9910Sstevel@tonic-gate 
9920Sstevel@tonic-gate 	ui->ui_lock &= ~MD_UL_OPENORCLOSE;
9930Sstevel@tonic-gate 	cv_broadcast(&ui->ui_cv);
9940Sstevel@tonic-gate }
9950Sstevel@tonic-gate 
9960Sstevel@tonic-gate void *
md_unit_openclose_enter(mdi_unit_t * ui)9970Sstevel@tonic-gate md_unit_openclose_enter(mdi_unit_t *ui)
9980Sstevel@tonic-gate {
9990Sstevel@tonic-gate 	void	*un;
10000Sstevel@tonic-gate 
10010Sstevel@tonic-gate 	mutex_enter(&ui->ui_mx);
10020Sstevel@tonic-gate 	while (ui->ui_lock & (MD_UL_OPENORCLOSE))
10030Sstevel@tonic-gate 		cv_wait(&ui->ui_cv, &ui->ui_mx);
10040Sstevel@tonic-gate 	ui->ui_lock |= MD_UL_OPENORCLOSE;
10050Sstevel@tonic-gate 
10060Sstevel@tonic-gate 	/* Maintain mutex across the readerlock call */
10070Sstevel@tonic-gate 	un = md_unit_readerlock_common(ui, 1);
10080Sstevel@tonic-gate 	mutex_exit(&ui->ui_mx);
10090Sstevel@tonic-gate 
10100Sstevel@tonic-gate 	return (un);
10110Sstevel@tonic-gate }
10120Sstevel@tonic-gate 
10130Sstevel@tonic-gate void
md_unit_openclose_exit(mdi_unit_t * ui)10140Sstevel@tonic-gate md_unit_openclose_exit(mdi_unit_t *ui)
10150Sstevel@tonic-gate {
10160Sstevel@tonic-gate 	md_unit_readerexit(ui);
10170Sstevel@tonic-gate 
10180Sstevel@tonic-gate 	mutex_enter(&ui->ui_mx);
10190Sstevel@tonic-gate 	ASSERT(ui->ui_lock & MD_UL_OPENORCLOSE);
10200Sstevel@tonic-gate 	ui->ui_lock &= ~MD_UL_OPENORCLOSE;
10210Sstevel@tonic-gate 
10220Sstevel@tonic-gate 	cv_broadcast(&ui->ui_cv);
10230Sstevel@tonic-gate 	mutex_exit(&ui->ui_mx);
10240Sstevel@tonic-gate }
10250Sstevel@tonic-gate 
10260Sstevel@tonic-gate /*
10270Sstevel@tonic-gate  * Drop the openclose and readerlocks without acquiring or
10280Sstevel@tonic-gate  * releasing the ui_mx lock since the calling routine has
10290Sstevel@tonic-gate  * already acquired this lock.
10300Sstevel@tonic-gate  */
10310Sstevel@tonic-gate void
md_unit_openclose_exit_lh(mdi_unit_t * ui)10320Sstevel@tonic-gate md_unit_openclose_exit_lh(mdi_unit_t *ui)
10330Sstevel@tonic-gate {
10340Sstevel@tonic-gate 	md_unit_readerexit_common(ui, 1);
10350Sstevel@tonic-gate 	ASSERT(ui->ui_lock & MD_UL_OPENORCLOSE);
10360Sstevel@tonic-gate 	ui->ui_lock &= ~MD_UL_OPENORCLOSE;
10370Sstevel@tonic-gate 	cv_broadcast(&ui->ui_cv);
10380Sstevel@tonic-gate }
10390Sstevel@tonic-gate 
10400Sstevel@tonic-gate int
md_unit_isopen(mdi_unit_t * ui)10410Sstevel@tonic-gate md_unit_isopen(
10420Sstevel@tonic-gate 	mdi_unit_t	*ui
10430Sstevel@tonic-gate )
10440Sstevel@tonic-gate {
10450Sstevel@tonic-gate 	int		isopen;
10460Sstevel@tonic-gate 
10470Sstevel@tonic-gate 	/* check status */
10480Sstevel@tonic-gate 	mutex_enter(&ui->ui_mx);
10490Sstevel@tonic-gate 	isopen = ((ui->ui_lock & MD_UL_OPEN) ? 1 : 0);
10500Sstevel@tonic-gate 	mutex_exit(&ui->ui_mx);
10510Sstevel@tonic-gate 	return (isopen);
10520Sstevel@tonic-gate }
10530Sstevel@tonic-gate 
10540Sstevel@tonic-gate int
md_unit_incopen(minor_t mnum,int flag,int otyp)10550Sstevel@tonic-gate md_unit_incopen(
10560Sstevel@tonic-gate 	minor_t		mnum,
10570Sstevel@tonic-gate 	int		flag,
10580Sstevel@tonic-gate 	int		otyp
10590Sstevel@tonic-gate )
10600Sstevel@tonic-gate {
10610Sstevel@tonic-gate 	mdi_unit_t	*ui = MDI_UNIT(mnum);
10620Sstevel@tonic-gate 	int		err = 0;
10630Sstevel@tonic-gate 
10640Sstevel@tonic-gate 	/* check type and flags */
10650Sstevel@tonic-gate 	ASSERT(ui != NULL);
10660Sstevel@tonic-gate 	mutex_enter(&ui->ui_mx);
10670Sstevel@tonic-gate 	if ((otyp < 0) || (otyp >= OTYPCNT)) {
10680Sstevel@tonic-gate 		err = EINVAL;
10690Sstevel@tonic-gate 		goto out;
10700Sstevel@tonic-gate 	}
10710Sstevel@tonic-gate 	if (((flag & FEXCL) && (ui->ui_lock & MD_UL_OPEN)) ||
10720Sstevel@tonic-gate 	    (ui->ui_lock & MD_UL_EXCL)) {
10730Sstevel@tonic-gate 		err = EBUSY;
10740Sstevel@tonic-gate 		goto out;
10750Sstevel@tonic-gate 	}
10760Sstevel@tonic-gate 
10770Sstevel@tonic-gate 	/* count and flag open */
10780Sstevel@tonic-gate 	ui->ui_ocnt[otyp]++;
10790Sstevel@tonic-gate 	ui->ui_lock |= MD_UL_OPEN;
10800Sstevel@tonic-gate 	if (flag & FEXCL)
10810Sstevel@tonic-gate 		ui->ui_lock |= MD_UL_EXCL;
10820Sstevel@tonic-gate 
10830Sstevel@tonic-gate 	/* setup kstat, return success */
10840Sstevel@tonic-gate 	mutex_exit(&ui->ui_mx);
10850Sstevel@tonic-gate 	md_kstat_init(mnum);
10860Sstevel@tonic-gate 	return (0);
10870Sstevel@tonic-gate 
10880Sstevel@tonic-gate 	/* return error */
10890Sstevel@tonic-gate out:
10900Sstevel@tonic-gate 	mutex_exit(&ui->ui_mx);
10910Sstevel@tonic-gate 	return (err);
10920Sstevel@tonic-gate }
10930Sstevel@tonic-gate 
10940Sstevel@tonic-gate int
md_unit_decopen(minor_t mnum,int otyp)10950Sstevel@tonic-gate md_unit_decopen(
10960Sstevel@tonic-gate 	minor_t		mnum,
10970Sstevel@tonic-gate 	int		otyp
10980Sstevel@tonic-gate )
10990Sstevel@tonic-gate {
11000Sstevel@tonic-gate 	mdi_unit_t	*ui = MDI_UNIT(mnum);
11010Sstevel@tonic-gate 	int		err = 0;
11020Sstevel@tonic-gate 	unsigned	i;
11030Sstevel@tonic-gate 
11040Sstevel@tonic-gate 	/* check type and flags */
11050Sstevel@tonic-gate 	ASSERT(ui != NULL);
11060Sstevel@tonic-gate 	mutex_enter(&ui->ui_mx);
11070Sstevel@tonic-gate 	if ((otyp < 0) || (otyp >= OTYPCNT)) {
11080Sstevel@tonic-gate 		err = EINVAL;
11090Sstevel@tonic-gate 		goto out;
11100Sstevel@tonic-gate 	} else if (ui->ui_ocnt[otyp] == 0) {
11110Sstevel@tonic-gate 		err = ENXIO;
11120Sstevel@tonic-gate 		goto out;
11130Sstevel@tonic-gate 	}
11140Sstevel@tonic-gate 
11150Sstevel@tonic-gate 	/* count and flag closed */
11160Sstevel@tonic-gate 	if (otyp == OTYP_LYR)
11170Sstevel@tonic-gate 		ui->ui_ocnt[otyp]--;
11180Sstevel@tonic-gate 	else
11190Sstevel@tonic-gate 		ui->ui_ocnt[otyp] = 0;
11200Sstevel@tonic-gate 	ui->ui_lock &= ~MD_UL_OPEN;
11210Sstevel@tonic-gate 	for (i = 0; (i < OTYPCNT); ++i)
11220Sstevel@tonic-gate 		if (ui->ui_ocnt[i] != 0)
11230Sstevel@tonic-gate 			ui->ui_lock |= MD_UL_OPEN;
11240Sstevel@tonic-gate 	if (! (ui->ui_lock & MD_UL_OPEN))
11250Sstevel@tonic-gate 		ui->ui_lock &= ~MD_UL_EXCL;
11260Sstevel@tonic-gate 
11270Sstevel@tonic-gate 	/* teardown kstat, return success */
11280Sstevel@tonic-gate 	if (! (ui->ui_lock & MD_UL_OPEN)) {
112910667SRay.Hassan@Sun.COM 
113010667SRay.Hassan@Sun.COM 		/*
113110667SRay.Hassan@Sun.COM 		 * We have a race condition inherited from specfs between
113210667SRay.Hassan@Sun.COM 		 * open() and close() calls. This results in the kstat
113310667SRay.Hassan@Sun.COM 		 * for a pending I/O being torn down, and then a panic.
113410667SRay.Hassan@Sun.COM 		 * To avoid this, only tear the kstat down if there are
113510667SRay.Hassan@Sun.COM 		 * no other readers on this device.
113610667SRay.Hassan@Sun.COM 		 */
113710667SRay.Hassan@Sun.COM 		if (ui->ui_readercnt > 1) {
113810667SRay.Hassan@Sun.COM 			mutex_exit(&ui->ui_mx);
113910667SRay.Hassan@Sun.COM 		} else {
114010667SRay.Hassan@Sun.COM 			mutex_exit(&ui->ui_mx);
114110667SRay.Hassan@Sun.COM 			md_kstat_destroy(mnum);
114210667SRay.Hassan@Sun.COM 		}
11430Sstevel@tonic-gate 		return (0);
11440Sstevel@tonic-gate 	}
11450Sstevel@tonic-gate 
11460Sstevel@tonic-gate 	/* return success */
11470Sstevel@tonic-gate out:
11480Sstevel@tonic-gate 	mutex_exit(&ui->ui_mx);
11490Sstevel@tonic-gate 	return (err);
11500Sstevel@tonic-gate }
11510Sstevel@tonic-gate 
11520Sstevel@tonic-gate md_dev64_t
md_xlate_targ_2_mini(md_dev64_t targ_devt)11530Sstevel@tonic-gate md_xlate_targ_2_mini(md_dev64_t targ_devt)
11540Sstevel@tonic-gate {
11550Sstevel@tonic-gate 	dev32_t		mini_32_devt, targ_32_devt;
11560Sstevel@tonic-gate 	int		i;
11570Sstevel@tonic-gate 
11580Sstevel@tonic-gate 	/*
11590Sstevel@tonic-gate 	 * check to see if we're in an upgrade situation
11600Sstevel@tonic-gate 	 * if we are not in upgrade just return the input device
11610Sstevel@tonic-gate 	 */
11620Sstevel@tonic-gate 
11630Sstevel@tonic-gate 	if (!MD_UPGRADE)
11640Sstevel@tonic-gate 		return (targ_devt);
11650Sstevel@tonic-gate 
11660Sstevel@tonic-gate 	targ_32_devt = md_cmpldev(targ_devt);
11670Sstevel@tonic-gate 
11680Sstevel@tonic-gate 	i = 0;
11690Sstevel@tonic-gate 	while (i != md_tuple_length) {
11700Sstevel@tonic-gate 		if (md_tuple_table[i].targ_devt == targ_32_devt) {
11710Sstevel@tonic-gate 			mini_32_devt = md_tuple_table[i].mini_devt;
11720Sstevel@tonic-gate 			return (md_expldev((md_dev64_t)mini_32_devt));
11730Sstevel@tonic-gate 		}
11740Sstevel@tonic-gate 		i++;
11750Sstevel@tonic-gate 	}
11760Sstevel@tonic-gate 	return (NODEV64);
11770Sstevel@tonic-gate }
11780Sstevel@tonic-gate 
11790Sstevel@tonic-gate md_dev64_t
md_xlate_mini_2_targ(md_dev64_t mini_devt)11800Sstevel@tonic-gate md_xlate_mini_2_targ(md_dev64_t mini_devt)
11810Sstevel@tonic-gate {
11820Sstevel@tonic-gate 	dev32_t		mini_32_devt, targ_32_devt;
11830Sstevel@tonic-gate 	int		i;
11840Sstevel@tonic-gate 
11850Sstevel@tonic-gate 	if (!MD_UPGRADE)
11860Sstevel@tonic-gate 		return (mini_devt);
11870Sstevel@tonic-gate 
11880Sstevel@tonic-gate 	mini_32_devt = md_cmpldev(mini_devt);
11890Sstevel@tonic-gate 
11900Sstevel@tonic-gate 	i = 0;
11910Sstevel@tonic-gate 	while (i != md_tuple_length) {
11920Sstevel@tonic-gate 		if (md_tuple_table[i].mini_devt == mini_32_devt) {
11930Sstevel@tonic-gate 			targ_32_devt = md_tuple_table[i].targ_devt;
11940Sstevel@tonic-gate 			return (md_expldev((md_dev64_t)targ_32_devt));
11950Sstevel@tonic-gate 		}
11960Sstevel@tonic-gate 		i++;
11970Sstevel@tonic-gate 	}
11980Sstevel@tonic-gate 	return (NODEV64);
11990Sstevel@tonic-gate }
12000Sstevel@tonic-gate 
12010Sstevel@tonic-gate void
md_xlate_free(int size)12020Sstevel@tonic-gate md_xlate_free(int size)
12030Sstevel@tonic-gate {
12040Sstevel@tonic-gate 	kmem_free(md_tuple_table, size);
12050Sstevel@tonic-gate }
12060Sstevel@tonic-gate 
12070Sstevel@tonic-gate char *
md_targ_major_to_name(major_t maj)12080Sstevel@tonic-gate md_targ_major_to_name(major_t maj)
12090Sstevel@tonic-gate {
12100Sstevel@tonic-gate 	char *drv_name = NULL;
12110Sstevel@tonic-gate 	int	i;
12120Sstevel@tonic-gate 
12130Sstevel@tonic-gate 	if (!MD_UPGRADE)
12140Sstevel@tonic-gate 		return (ddi_major_to_name(maj));
12150Sstevel@tonic-gate 
12160Sstevel@tonic-gate 	for (i = 0; i < md_majortab_len; i++) {
12170Sstevel@tonic-gate 		if (md_major_tuple_table[i].targ_maj == maj) {
12180Sstevel@tonic-gate 			drv_name = md_major_tuple_table[i].drv_name;
12190Sstevel@tonic-gate 			break;
12200Sstevel@tonic-gate 		}
12210Sstevel@tonic-gate 	}
12220Sstevel@tonic-gate 	return (drv_name);
12230Sstevel@tonic-gate }
12240Sstevel@tonic-gate 
12250Sstevel@tonic-gate major_t
md_targ_name_to_major(char * drv_name)12260Sstevel@tonic-gate md_targ_name_to_major(char *drv_name)
12270Sstevel@tonic-gate {
12280Sstevel@tonic-gate 	major_t maj;
12290Sstevel@tonic-gate 	int	i;
12300Sstevel@tonic-gate 
12310Sstevel@tonic-gate 	maj = md_getmajor(NODEV64);
12320Sstevel@tonic-gate 	if (!MD_UPGRADE)
12330Sstevel@tonic-gate 		return (ddi_name_to_major(drv_name));
12340Sstevel@tonic-gate 
12350Sstevel@tonic-gate 	for (i = 0; i < md_majortab_len; i++) {
12360Sstevel@tonic-gate 		if ((strcmp(md_major_tuple_table[i].drv_name,
12370Sstevel@tonic-gate 		    drv_name)) == 0) {
12380Sstevel@tonic-gate 			maj = md_major_tuple_table[i].targ_maj;
12390Sstevel@tonic-gate 			break;
12400Sstevel@tonic-gate 		}
12410Sstevel@tonic-gate 	}
12420Sstevel@tonic-gate 
12430Sstevel@tonic-gate 	return (maj);
12440Sstevel@tonic-gate }
12450Sstevel@tonic-gate 
12460Sstevel@tonic-gate void
md_majortab_free()12470Sstevel@tonic-gate md_majortab_free()
12480Sstevel@tonic-gate {
12490Sstevel@tonic-gate 	size_t	sz;
12500Sstevel@tonic-gate 	int	i;
12510Sstevel@tonic-gate 
12520Sstevel@tonic-gate 	for (i = 0; i < md_majortab_len; i++) {
12530Sstevel@tonic-gate 		freestr(md_major_tuple_table[i].drv_name);
12540Sstevel@tonic-gate 	}
12550Sstevel@tonic-gate 
12560Sstevel@tonic-gate 	sz = md_majortab_len * sizeof (struct md_xlate_major_table);
12570Sstevel@tonic-gate 	kmem_free(md_major_tuple_table, sz);
12580Sstevel@tonic-gate }
12590Sstevel@tonic-gate 
12600Sstevel@tonic-gate /* functions return a pointer to a function which returns an int */
12610Sstevel@tonic-gate 
12620Sstevel@tonic-gate intptr_t (*
md_get_named_service(md_dev64_t dev,int modindex,char * name,intptr_t (* Default)())12630Sstevel@tonic-gate md_get_named_service(md_dev64_t dev, int modindex, char *name,
12640Sstevel@tonic-gate 	intptr_t (*Default)()))()
12650Sstevel@tonic-gate {
12660Sstevel@tonic-gate 	mdi_unit_t		*ui;
12670Sstevel@tonic-gate 	md_named_services_t	*sp;
12680Sstevel@tonic-gate 	int			i;
12690Sstevel@tonic-gate 
12700Sstevel@tonic-gate 	/*
12710Sstevel@tonic-gate 	 * Return the first named service found.
12720Sstevel@tonic-gate 	 * Use this path when it is known that there is only
12730Sstevel@tonic-gate 	 * one named service possible (e.g., hotspare interface)
12740Sstevel@tonic-gate 	 */
12750Sstevel@tonic-gate 	if ((dev == NODEV64) && (modindex == ANY_SERVICE)) {
12760Sstevel@tonic-gate 		for (i = 0; i < MD_NOPS; i++) {
12770Sstevel@tonic-gate 			if (md_ops[i] == NULL) {
12780Sstevel@tonic-gate 				continue;
12790Sstevel@tonic-gate 			}
12800Sstevel@tonic-gate 			sp = md_ops[i]->md_services;
12810Sstevel@tonic-gate 			if (sp == NULL)
12820Sstevel@tonic-gate 				continue;
12830Sstevel@tonic-gate 			while (sp->md_service != NULL) {
12840Sstevel@tonic-gate 				if (strcmp(name, sp->md_name) == 0)
12850Sstevel@tonic-gate 					return (sp->md_service);
12860Sstevel@tonic-gate 				sp++;
12870Sstevel@tonic-gate 			}
12880Sstevel@tonic-gate 		}
12890Sstevel@tonic-gate 		return (Default);
12900Sstevel@tonic-gate 	}
12910Sstevel@tonic-gate 
12920Sstevel@tonic-gate 	/*
12930Sstevel@tonic-gate 	 * Return the named service for the given modindex.
12940Sstevel@tonic-gate 	 * This is used if there are multiple possible named services
12950Sstevel@tonic-gate 	 * and each one needs to be called (e.g., poke hotspares)
12960Sstevel@tonic-gate 	 */
12970Sstevel@tonic-gate 	if (dev == NODEV64) {
12980Sstevel@tonic-gate 		if (modindex >= MD_NOPS)
12990Sstevel@tonic-gate 			return (Default);
13000Sstevel@tonic-gate 
13010Sstevel@tonic-gate 		if (md_ops[modindex] == NULL)
13020Sstevel@tonic-gate 			return (Default);
13030Sstevel@tonic-gate 
13040Sstevel@tonic-gate 		sp = md_ops[modindex]->md_services;
13050Sstevel@tonic-gate 		if (sp == NULL)
13060Sstevel@tonic-gate 			return (Default);
13070Sstevel@tonic-gate 
13080Sstevel@tonic-gate 		while (sp->md_service != NULL) {
13090Sstevel@tonic-gate 			if (strcmp(name, sp->md_name) == 0)
13100Sstevel@tonic-gate 				return (sp->md_service);
13110Sstevel@tonic-gate 			sp++;
13120Sstevel@tonic-gate 		}
13130Sstevel@tonic-gate 		return (Default);
13140Sstevel@tonic-gate 	}
13150Sstevel@tonic-gate 
13160Sstevel@tonic-gate 	/*
13170Sstevel@tonic-gate 	 * Return the named service for this md_dev64_t
13180Sstevel@tonic-gate 	 */
13190Sstevel@tonic-gate 	if (md_getmajor(dev) != md_major)
13200Sstevel@tonic-gate 		return (Default);
13210Sstevel@tonic-gate 
13220Sstevel@tonic-gate 	if ((MD_MIN2SET(md_getminor(dev)) >= md_nsets) ||
13230Sstevel@tonic-gate 	    (MD_MIN2UNIT(md_getminor(dev)) >= md_nunits))
13240Sstevel@tonic-gate 		return (NULL);
13250Sstevel@tonic-gate 
13260Sstevel@tonic-gate 
13270Sstevel@tonic-gate 	if ((ui = MDI_UNIT(md_getminor(dev))) == NULL)
13280Sstevel@tonic-gate 		return (NULL);
13290Sstevel@tonic-gate 
13300Sstevel@tonic-gate 	sp = md_ops[ui->ui_opsindex]->md_services;
13310Sstevel@tonic-gate 	if (sp == NULL)
13320Sstevel@tonic-gate 		return (Default);
13330Sstevel@tonic-gate 	while (sp->md_service != NULL) {
13340Sstevel@tonic-gate 		if (strcmp(name, sp->md_name) == 0)
13350Sstevel@tonic-gate 			return (sp->md_service);
13360Sstevel@tonic-gate 		sp++;
13370Sstevel@tonic-gate 	}
13380Sstevel@tonic-gate 	return (Default);
13390Sstevel@tonic-gate }
13400Sstevel@tonic-gate 
13410Sstevel@tonic-gate /*
13420Sstevel@tonic-gate  * md_daemon callback routine
13430Sstevel@tonic-gate  */
13440Sstevel@tonic-gate boolean_t
callb_md_cpr(void * arg,int code)13450Sstevel@tonic-gate callb_md_cpr(void *arg, int code)
13460Sstevel@tonic-gate {
13470Sstevel@tonic-gate 	callb_cpr_t *cp = (callb_cpr_t *)arg;
13480Sstevel@tonic-gate 	int ret = 0;				/* assume success */
134911066Srafael.vanoni@sun.com 	clock_t delta;
13500Sstevel@tonic-gate 
13510Sstevel@tonic-gate 	mutex_enter(cp->cc_lockp);
13520Sstevel@tonic-gate 
13530Sstevel@tonic-gate 	switch (code) {
13540Sstevel@tonic-gate 	case CB_CODE_CPR_CHKPT:
13550Sstevel@tonic-gate 		/*
13560Sstevel@tonic-gate 		 * Check for active resync threads
13570Sstevel@tonic-gate 		 */
13580Sstevel@tonic-gate 		mutex_enter(&md_cpr_resync.md_resync_mutex);
13590Sstevel@tonic-gate 		if ((md_cpr_resync.md_mirror_resync > 0) ||
13607563SPrasad.Singamsetty@Sun.COM 		    (md_cpr_resync.md_raid_resync > 0)) {
13610Sstevel@tonic-gate 			mutex_exit(&md_cpr_resync.md_resync_mutex);
13620Sstevel@tonic-gate 			cmn_err(CE_WARN, "There are Solaris Volume Manager "
13630Sstevel@tonic-gate 			    "synchronization threads running.");
13640Sstevel@tonic-gate 			cmn_err(CE_WARN, "Please try system suspension at "
13657563SPrasad.Singamsetty@Sun.COM 			    "a later time.");
13660Sstevel@tonic-gate 			ret = -1;
13670Sstevel@tonic-gate 			break;
13680Sstevel@tonic-gate 		}
13690Sstevel@tonic-gate 		mutex_exit(&md_cpr_resync.md_resync_mutex);
13700Sstevel@tonic-gate 
13710Sstevel@tonic-gate 		cp->cc_events |= CALLB_CPR_START;
137211066Srafael.vanoni@sun.com 		delta = CPR_KTHREAD_TIMEOUT_SEC * hz;
13730Sstevel@tonic-gate 		while (!(cp->cc_events & CALLB_CPR_SAFE))
137411066Srafael.vanoni@sun.com 			/* cv_reltimedwait() returns -1 if it times out. */
137511066Srafael.vanoni@sun.com 			if ((ret = cv_reltimedwait(&cp->cc_callb_cv,
137611066Srafael.vanoni@sun.com 			    cp->cc_lockp, delta, TR_CLOCK_TICK)) == -1)
13770Sstevel@tonic-gate 				break;
13780Sstevel@tonic-gate 			break;
13790Sstevel@tonic-gate 
13800Sstevel@tonic-gate 	case CB_CODE_CPR_RESUME:
13810Sstevel@tonic-gate 		cp->cc_events &= ~CALLB_CPR_START;
13820Sstevel@tonic-gate 		cv_signal(&cp->cc_stop_cv);
13830Sstevel@tonic-gate 		break;
13840Sstevel@tonic-gate 	}
13850Sstevel@tonic-gate 	mutex_exit(cp->cc_lockp);
13860Sstevel@tonic-gate 	return (ret != -1);
13870Sstevel@tonic-gate }
13880Sstevel@tonic-gate 
13890Sstevel@tonic-gate void
md_daemon(int pass_thru,mdq_anchor_t * anchor)13900Sstevel@tonic-gate md_daemon(int pass_thru, mdq_anchor_t *anchor)
13910Sstevel@tonic-gate {
13920Sstevel@tonic-gate 	daemon_queue_t  *dq;
13930Sstevel@tonic-gate 	callb_cpr_t	cprinfo;
13940Sstevel@tonic-gate 
13950Sstevel@tonic-gate 	if (pass_thru && (md_get_status() & MD_GBL_DAEMONS_LIVE))
13960Sstevel@tonic-gate 		return;
13970Sstevel@tonic-gate 	/*
13980Sstevel@tonic-gate 	 * Register cpr callback
13990Sstevel@tonic-gate 	 */
14000Sstevel@tonic-gate 	CALLB_CPR_INIT(&cprinfo, &anchor->a_mx, callb_md_cpr, "md_daemon");
14010Sstevel@tonic-gate 
14020Sstevel@tonic-gate 	/*CONSTCOND*/
14030Sstevel@tonic-gate 	while (1) {
14040Sstevel@tonic-gate 		mutex_enter(&anchor->a_mx);
14050Sstevel@tonic-gate 		while ((dq = anchor->dq.dq_next) == &(anchor->dq)) {
14060Sstevel@tonic-gate 			if (pass_thru) {
14070Sstevel@tonic-gate 				/*
14080Sstevel@tonic-gate 				 * CALLB_CPR_EXIT Will do
14090Sstevel@tonic-gate 				 * mutex_exit(&anchor->a_mx)
14100Sstevel@tonic-gate 				 */
14110Sstevel@tonic-gate 				CALLB_CPR_EXIT(&cprinfo);
14120Sstevel@tonic-gate 				return;
14130Sstevel@tonic-gate 			}
14140Sstevel@tonic-gate 			if (md_get_status() & MD_GBL_DAEMONS_DIE) {
14150Sstevel@tonic-gate 				mutex_exit(&anchor->a_mx);
14160Sstevel@tonic-gate 				mutex_enter(&md_mx);
14170Sstevel@tonic-gate 				md_num_daemons--;
14180Sstevel@tonic-gate 				mutex_exit(&md_mx);
14190Sstevel@tonic-gate 				/*
14200Sstevel@tonic-gate 				 * CALLB_CPR_EXIT will do
14210Sstevel@tonic-gate 				 * mutex_exit(&anchor->a_mx)
14220Sstevel@tonic-gate 				 */
14230Sstevel@tonic-gate 				mutex_enter(&anchor->a_mx);
14240Sstevel@tonic-gate 				CALLB_CPR_EXIT(&cprinfo);
14250Sstevel@tonic-gate 				thread_exit();
14260Sstevel@tonic-gate 			}
14270Sstevel@tonic-gate 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
14280Sstevel@tonic-gate 			cv_wait(&anchor->a_cv, &anchor->a_mx);
14290Sstevel@tonic-gate 			CALLB_CPR_SAFE_END(&cprinfo, &anchor->a_mx);
14300Sstevel@tonic-gate 		}
14310Sstevel@tonic-gate 		dq->dq_prev->dq_next = dq->dq_next;
14320Sstevel@tonic-gate 		dq->dq_next->dq_prev = dq->dq_prev;
14330Sstevel@tonic-gate 		dq->dq_prev = dq->dq_next = NULL;
14340Sstevel@tonic-gate 		anchor->dq.qlen--;
14350Sstevel@tonic-gate 		mutex_exit(&anchor->a_mx);
14360Sstevel@tonic-gate 		(*(dq->dq_call))(dq);
14370Sstevel@tonic-gate 	}
14380Sstevel@tonic-gate 	/*NOTREACHED*/
14390Sstevel@tonic-gate }
14400Sstevel@tonic-gate 
14410Sstevel@tonic-gate /*
14420Sstevel@tonic-gate  * daemon_request:
14430Sstevel@tonic-gate  *
14440Sstevel@tonic-gate  * Adds requests to appropriate requestq which is
14450Sstevel@tonic-gate  * anchored by *anchor.
14460Sstevel@tonic-gate  * The request is the first element of a doubly linked circular list.
14470Sstevel@tonic-gate  * When the request is a single element, the forward and backward
14480Sstevel@tonic-gate  * pointers MUST point to the element itself.
14490Sstevel@tonic-gate  */
14500Sstevel@tonic-gate 
14510Sstevel@tonic-gate void
daemon_request(mdq_anchor_t * anchor,void (* func)(),daemon_queue_t * request,callstyle_t style)14520Sstevel@tonic-gate daemon_request(mdq_anchor_t *anchor, void (*func)(),
14530Sstevel@tonic-gate 				daemon_queue_t *request, callstyle_t style)
14540Sstevel@tonic-gate {
14550Sstevel@tonic-gate 	daemon_queue_t *rqtp;
14560Sstevel@tonic-gate 	int i = 0;
14570Sstevel@tonic-gate 
14580Sstevel@tonic-gate 	rqtp = request;
14590Sstevel@tonic-gate 	if (style == REQ_OLD) {
14600Sstevel@tonic-gate 		ASSERT((rqtp->dq_next == NULL) && (rqtp->dq_prev == NULL));
14610Sstevel@tonic-gate 		/* set it to the new style */
14620Sstevel@tonic-gate 		rqtp->dq_prev = rqtp->dq_next = rqtp;
14630Sstevel@tonic-gate 	}
14640Sstevel@tonic-gate 	ASSERT((rqtp->dq_next != NULL) && (rqtp->dq_prev != NULL));
14650Sstevel@tonic-gate 
14660Sstevel@tonic-gate 	/* scan the list and add the function to each element */
14670Sstevel@tonic-gate 
14680Sstevel@tonic-gate 	do {
14690Sstevel@tonic-gate 		rqtp->dq_call = func;
14700Sstevel@tonic-gate 		i++;
14710Sstevel@tonic-gate 		rqtp = rqtp->dq_next;
14720Sstevel@tonic-gate 	} while (rqtp != request);
14730Sstevel@tonic-gate 
14740Sstevel@tonic-gate 	/* save pointer to tail of the request list */
14750Sstevel@tonic-gate 	rqtp = request->dq_prev;
14760Sstevel@tonic-gate 
14770Sstevel@tonic-gate 	mutex_enter(&anchor->a_mx);
14780Sstevel@tonic-gate 	/* stats */
14790Sstevel@tonic-gate 	anchor->dq.qlen += i;
14800Sstevel@tonic-gate 	anchor->dq.treqs += i;
14810Sstevel@tonic-gate 	anchor->dq.maxq_len = (anchor->dq.qlen > anchor->dq.maxq_len) ?
14827563SPrasad.Singamsetty@Sun.COM 	    anchor->dq.qlen : anchor->dq.maxq_len;
14830Sstevel@tonic-gate 
14840Sstevel@tonic-gate 	/* now add the list to request queue */
14850Sstevel@tonic-gate 	request->dq_prev = anchor->dq.dq_prev;
14860Sstevel@tonic-gate 	rqtp->dq_next = &anchor->dq;
14870Sstevel@tonic-gate 	anchor->dq.dq_prev->dq_next = request;
14880Sstevel@tonic-gate 	anchor->dq.dq_prev = rqtp;
14890Sstevel@tonic-gate 	cv_broadcast(&anchor->a_cv);
14900Sstevel@tonic-gate 	mutex_exit(&anchor->a_mx);
14910Sstevel@tonic-gate }
14920Sstevel@tonic-gate 
14930Sstevel@tonic-gate void
mddb_commitrec_wrapper(mddb_recid_t recid)14940Sstevel@tonic-gate mddb_commitrec_wrapper(mddb_recid_t recid)
14950Sstevel@tonic-gate {
14960Sstevel@tonic-gate 	int sent_log = 0;
14970Sstevel@tonic-gate 	uint_t retry = md_retry_cnt;
14980Sstevel@tonic-gate 	set_t	setno;
14990Sstevel@tonic-gate 
15000Sstevel@tonic-gate 	while (mddb_commitrec(recid)) {
15010Sstevel@tonic-gate 		if (! sent_log) {
15020Sstevel@tonic-gate 			cmn_err(CE_WARN,
15030Sstevel@tonic-gate 			    "md: state database commit failed");
15040Sstevel@tonic-gate 			sent_log = 1;
15050Sstevel@tonic-gate 		}
15060Sstevel@tonic-gate 		delay(md_hz);
15070Sstevel@tonic-gate 
15080Sstevel@tonic-gate 		/*
15090Sstevel@tonic-gate 		 * Setting retry cnt to one (pre decremented) so that we
15100Sstevel@tonic-gate 		 * actually do no retries when committing/deleting a mddb rec.
15110Sstevel@tonic-gate 		 * The underlying disk driver does several retries to check
15120Sstevel@tonic-gate 		 * if the disk is really dead or not so there
15130Sstevel@tonic-gate 		 * is no reason for us to retry on top of the drivers retries.
15140Sstevel@tonic-gate 		 */
15150Sstevel@tonic-gate 
15160Sstevel@tonic-gate 		if (--retry == 0) {
15170Sstevel@tonic-gate 			setno = mddb_getsetnum(recid);
15180Sstevel@tonic-gate 			if (md_get_setstatus(setno) & MD_SET_TOOFEW) {
15190Sstevel@tonic-gate 				panic(
15200Sstevel@tonic-gate 				    "md: Panic due to lack of DiskSuite state\n"
15210Sstevel@tonic-gate 				    " database replicas. Fewer than 50%% of "
15220Sstevel@tonic-gate 				    "the total were available,\n so panic to "
15230Sstevel@tonic-gate 				    "ensure data integrity.");
15240Sstevel@tonic-gate 			} else {
15250Sstevel@tonic-gate 				panic("md: state database problem");
15260Sstevel@tonic-gate 			}
15270Sstevel@tonic-gate 			/*NOTREACHED*/
15280Sstevel@tonic-gate 		}
15290Sstevel@tonic-gate 	}
15300Sstevel@tonic-gate }
15310Sstevel@tonic-gate 
15320Sstevel@tonic-gate void
mddb_commitrecs_wrapper(mddb_recid_t * recids)15330Sstevel@tonic-gate mddb_commitrecs_wrapper(mddb_recid_t *recids)
15340Sstevel@tonic-gate {
15350Sstevel@tonic-gate 	int sent_log = 0;
15360Sstevel@tonic-gate 	uint_t retry = md_retry_cnt;
15370Sstevel@tonic-gate 	set_t	setno;
15380Sstevel@tonic-gate 
15390Sstevel@tonic-gate 	while (mddb_commitrecs(recids)) {
15400Sstevel@tonic-gate 		if (! sent_log) {
15410Sstevel@tonic-gate 			cmn_err(CE_WARN,
15420Sstevel@tonic-gate 			    "md: state database commit failed");
15430Sstevel@tonic-gate 			sent_log = 1;
15440Sstevel@tonic-gate 		}
15450Sstevel@tonic-gate 		delay(md_hz);
15460Sstevel@tonic-gate 
15470Sstevel@tonic-gate 		/*
15480Sstevel@tonic-gate 		 * Setting retry cnt to one (pre decremented) so that we
15490Sstevel@tonic-gate 		 * actually do no retries when committing/deleting a mddb rec.
15500Sstevel@tonic-gate 		 * The underlying disk driver does several retries to check
15510Sstevel@tonic-gate 		 * if the disk is really dead or not so there
15520Sstevel@tonic-gate 		 * is no reason for us to retry on top of the drivers retries.
15530Sstevel@tonic-gate 		 */
15540Sstevel@tonic-gate 
15550Sstevel@tonic-gate 		if (--retry == 0) {
15560Sstevel@tonic-gate 			/*
15570Sstevel@tonic-gate 			 * since all the records are part of the same set
15580Sstevel@tonic-gate 			 * use the first one to get setno
15590Sstevel@tonic-gate 			 */
15600Sstevel@tonic-gate 			setno = mddb_getsetnum(*recids);
15610Sstevel@tonic-gate 			if (md_get_setstatus(setno) & MD_SET_TOOFEW) {
15620Sstevel@tonic-gate 				panic(
15630Sstevel@tonic-gate 				    "md: Panic due to lack of DiskSuite state\n"
15640Sstevel@tonic-gate 				    " database replicas. Fewer than 50%% of "
15650Sstevel@tonic-gate 				    "the total were available,\n so panic to "
15660Sstevel@tonic-gate 				    "ensure data integrity.");
15670Sstevel@tonic-gate 			} else {
15680Sstevel@tonic-gate 				panic("md: state database problem");
15690Sstevel@tonic-gate 			}
15700Sstevel@tonic-gate 			/*NOTREACHED*/
15710Sstevel@tonic-gate 		}
15720Sstevel@tonic-gate 	}
15730Sstevel@tonic-gate }
15740Sstevel@tonic-gate 
15750Sstevel@tonic-gate void
mddb_deleterec_wrapper(mddb_recid_t recid)15760Sstevel@tonic-gate mddb_deleterec_wrapper(mddb_recid_t recid)
15770Sstevel@tonic-gate {
15780Sstevel@tonic-gate 	int sent_log = 0;
15790Sstevel@tonic-gate 	uint_t retry = md_retry_cnt;
15800Sstevel@tonic-gate 	set_t	setno;
15810Sstevel@tonic-gate 
15820Sstevel@tonic-gate 	while (mddb_deleterec(recid)) {
15830Sstevel@tonic-gate 		if (! sent_log) {
15840Sstevel@tonic-gate 			cmn_err(CE_WARN,
15850Sstevel@tonic-gate 			    "md: state database delete failed");
15860Sstevel@tonic-gate 			sent_log = 1;
15870Sstevel@tonic-gate 		}
15880Sstevel@tonic-gate 		delay(md_hz);
15890Sstevel@tonic-gate 
15900Sstevel@tonic-gate 		/*
15910Sstevel@tonic-gate 		 * Setting retry cnt to one (pre decremented) so that we
15920Sstevel@tonic-gate 		 * actually do no retries when committing/deleting a mddb rec.
15930Sstevel@tonic-gate 		 * The underlying disk driver does several retries to check
15940Sstevel@tonic-gate 		 * if the disk is really dead or not so there
15950Sstevel@tonic-gate 		 * is no reason for us to retry on top of the drivers retries.
15960Sstevel@tonic-gate 		 */
15970Sstevel@tonic-gate 
15980Sstevel@tonic-gate 		if (--retry == 0) {
15990Sstevel@tonic-gate 			setno = mddb_getsetnum(recid);
16000Sstevel@tonic-gate 			if (md_get_setstatus(setno) & MD_SET_TOOFEW) {
16010Sstevel@tonic-gate 				panic(
16020Sstevel@tonic-gate 				    "md: Panic due to lack of DiskSuite state\n"
16030Sstevel@tonic-gate 				    " database replicas. Fewer than 50%% of "
16040Sstevel@tonic-gate 				    "the total were available,\n so panic to "
16050Sstevel@tonic-gate 				    "ensure data integrity.");
16060Sstevel@tonic-gate 			} else {
16070Sstevel@tonic-gate 				panic("md: state database problem");
16080Sstevel@tonic-gate 			}
16090Sstevel@tonic-gate 			/*NOTREACHED*/
16100Sstevel@tonic-gate 		}
16110Sstevel@tonic-gate 	}
16120Sstevel@tonic-gate }
16130Sstevel@tonic-gate 
16140Sstevel@tonic-gate /*
16150Sstevel@tonic-gate  * md_holdset_enter is called in order to hold the set in its
16160Sstevel@tonic-gate  * current state (loaded, unloaded, snarfed, unsnarfed, etc)
16170Sstevel@tonic-gate  * until md_holdset_exit is called.  This is used by the mirror
16180Sstevel@tonic-gate  * code to mark the set as HOLD so that the set won't be
16190Sstevel@tonic-gate  * unloaded while hotspares are being allocated in check_4_hotspares.
16200Sstevel@tonic-gate  * The original fix to the mirror code to hold the set was to call
16210Sstevel@tonic-gate  * md_haltsnarf_enter, but this will block all ioctls and ioctls
16220Sstevel@tonic-gate  * must work for a MN diskset while hotspares are allocated.
16230Sstevel@tonic-gate  */
16240Sstevel@tonic-gate void
md_holdset_enter(set_t setno)16250Sstevel@tonic-gate md_holdset_enter(set_t setno)
16260Sstevel@tonic-gate {
16270Sstevel@tonic-gate 	mutex_enter(&md_mx);
16280Sstevel@tonic-gate 	while (md_set[setno].s_status & MD_SET_HOLD)
16290Sstevel@tonic-gate 		cv_wait(&md_cv, &md_mx);
16300Sstevel@tonic-gate 	md_set[setno].s_status |= MD_SET_HOLD;
16310Sstevel@tonic-gate 	mutex_exit(&md_mx);
16320Sstevel@tonic-gate }
16330Sstevel@tonic-gate 
16340Sstevel@tonic-gate void
md_holdset_exit(set_t setno)16350Sstevel@tonic-gate md_holdset_exit(set_t setno)
16360Sstevel@tonic-gate {
16370Sstevel@tonic-gate 	mutex_enter(&md_mx);
16380Sstevel@tonic-gate 	md_set[setno].s_status &= ~MD_SET_HOLD;
16390Sstevel@tonic-gate 	cv_broadcast(&md_cv);
16400Sstevel@tonic-gate 	mutex_exit(&md_mx);
16410Sstevel@tonic-gate }
16420Sstevel@tonic-gate 
16430Sstevel@tonic-gate /*
16440Sstevel@tonic-gate  * Returns a 0 if this thread marked the set as HOLD (success),
16450Sstevel@tonic-gate  * returns a -1 if set was already marked HOLD (failure).
16460Sstevel@tonic-gate  * Used by the release_set code to see if set is marked HOLD.
16470Sstevel@tonic-gate  * HOLD is set by a daemon when hotspares are being allocated
16480Sstevel@tonic-gate  * to mirror units.
16490Sstevel@tonic-gate  */
16500Sstevel@tonic-gate int
md_holdset_testandenter(set_t setno)16510Sstevel@tonic-gate md_holdset_testandenter(set_t setno)
16520Sstevel@tonic-gate {
16530Sstevel@tonic-gate 	mutex_enter(&md_mx);
16540Sstevel@tonic-gate 	if (md_set[setno].s_status & MD_SET_HOLD) {
16550Sstevel@tonic-gate 		mutex_exit(&md_mx);
16560Sstevel@tonic-gate 		return (-1);
16570Sstevel@tonic-gate 	}
16580Sstevel@tonic-gate 	md_set[setno].s_status |= MD_SET_HOLD;
16590Sstevel@tonic-gate 	mutex_exit(&md_mx);
16600Sstevel@tonic-gate 	return (0);
16610Sstevel@tonic-gate }
16620Sstevel@tonic-gate 
16630Sstevel@tonic-gate void
md_haltsnarf_enter(set_t setno)16640Sstevel@tonic-gate md_haltsnarf_enter(set_t setno)
16650Sstevel@tonic-gate {
16660Sstevel@tonic-gate 	mutex_enter(&md_mx);
16670Sstevel@tonic-gate 	while (md_set[setno].s_status & MD_SET_SNARFING)
16680Sstevel@tonic-gate 		cv_wait(&md_cv, &md_mx);
16690Sstevel@tonic-gate 
16700Sstevel@tonic-gate 	md_set[setno].s_status |= MD_SET_SNARFING;
16710Sstevel@tonic-gate 	mutex_exit(&md_mx);
16720Sstevel@tonic-gate }
16730Sstevel@tonic-gate 
16740Sstevel@tonic-gate void
md_haltsnarf_exit(set_t setno)16750Sstevel@tonic-gate md_haltsnarf_exit(set_t setno)
16760Sstevel@tonic-gate {
16770Sstevel@tonic-gate 	mutex_enter(&md_mx);
16780Sstevel@tonic-gate 	md_set[setno].s_status &= ~MD_SET_SNARFING;
16790Sstevel@tonic-gate 	cv_broadcast(&md_cv);
16800Sstevel@tonic-gate 	mutex_exit(&md_mx);
16810Sstevel@tonic-gate }
16820Sstevel@tonic-gate 
16830Sstevel@tonic-gate void
md_haltsnarf_wait(set_t setno)16840Sstevel@tonic-gate md_haltsnarf_wait(set_t setno)
16850Sstevel@tonic-gate {
16860Sstevel@tonic-gate 	mutex_enter(&md_mx);
16870Sstevel@tonic-gate 	while (md_set[setno].s_status & MD_SET_SNARFING)
16880Sstevel@tonic-gate 		cv_wait(&md_cv, &md_mx);
16890Sstevel@tonic-gate 	mutex_exit(&md_mx);
16900Sstevel@tonic-gate }
16910Sstevel@tonic-gate 
16920Sstevel@tonic-gate /*
16930Sstevel@tonic-gate  * ASSUMED that the md_unit_array_rw WRITER lock is held.
16940Sstevel@tonic-gate  */
16950Sstevel@tonic-gate int
md_halt_set(set_t setno,enum md_haltcmd cmd)16960Sstevel@tonic-gate md_halt_set(set_t setno, enum md_haltcmd cmd)
16970Sstevel@tonic-gate {
16980Sstevel@tonic-gate 	int	i, err;
16990Sstevel@tonic-gate 
17000Sstevel@tonic-gate 	if (md_set[setno].s_un == NULL || md_set[setno].s_ui == NULL) {
17010Sstevel@tonic-gate 		return (0);
17020Sstevel@tonic-gate 	}
17030Sstevel@tonic-gate 
17040Sstevel@tonic-gate 	if ((cmd == MD_HALT_CHECK) || (cmd == MD_HALT_ALL)) {
17050Sstevel@tonic-gate 		for (i = 0; i < MD_NOPS; i++) {
17060Sstevel@tonic-gate 			if (md_ops[i] == NULL)
17070Sstevel@tonic-gate 				continue;
17080Sstevel@tonic-gate 			if ((*(md_ops[i]->md_halt))(MD_HALT_CLOSE, setno)) {
17090Sstevel@tonic-gate 				for (--i; i > 0; --i) {
17100Sstevel@tonic-gate 					if (md_ops[i] == NULL)
17110Sstevel@tonic-gate 						continue;
17120Sstevel@tonic-gate 					(void) (*(md_ops[i]->md_halt))
17130Sstevel@tonic-gate 					    (MD_HALT_OPEN, setno);
17140Sstevel@tonic-gate 				}
17150Sstevel@tonic-gate 				return (EBUSY);
17160Sstevel@tonic-gate 			}
17170Sstevel@tonic-gate 		}
17180Sstevel@tonic-gate 
17190Sstevel@tonic-gate 		for (i = 0; i < MD_NOPS; i++) {
17200Sstevel@tonic-gate 			if (md_ops[i] == NULL)
17210Sstevel@tonic-gate 				continue;
17220Sstevel@tonic-gate 			if ((*(md_ops[i]->md_halt))(MD_HALT_CHECK, setno)) {
17230Sstevel@tonic-gate 				for (i = 0; i < MD_NOPS; i++) {
17240Sstevel@tonic-gate 					if (md_ops[i] == NULL)
17250Sstevel@tonic-gate 						continue;
17260Sstevel@tonic-gate 					(void) (*(md_ops[i]->md_halt))
17270Sstevel@tonic-gate 					    (MD_HALT_OPEN, setno);
17280Sstevel@tonic-gate 				}
17290Sstevel@tonic-gate 				return (EBUSY);
17300Sstevel@tonic-gate 			}
17310Sstevel@tonic-gate 		}
17320Sstevel@tonic-gate 	}
17330Sstevel@tonic-gate 
17340Sstevel@tonic-gate 	if ((cmd == MD_HALT_DOIT) || (cmd == MD_HALT_ALL)) {
17350Sstevel@tonic-gate 		for (i = 0; i < MD_NOPS; i++) {
17360Sstevel@tonic-gate 			if (md_ops[i] == NULL)
17370Sstevel@tonic-gate 				continue;
17380Sstevel@tonic-gate 			err = (*(md_ops[i]->md_halt))(MD_HALT_DOIT, setno);
17390Sstevel@tonic-gate 			if (err != 0)
17400Sstevel@tonic-gate 				cmn_err(CE_NOTE,
17410Sstevel@tonic-gate 				    "md: halt failed for %s, error %d",
17420Sstevel@tonic-gate 				    md_ops[i]->md_driver.md_drivername, err);
17430Sstevel@tonic-gate 		}
17440Sstevel@tonic-gate 
17450Sstevel@tonic-gate 		/*
17460Sstevel@tonic-gate 		 * Unload the devid namespace if it is loaded
17470Sstevel@tonic-gate 		 */
17480Sstevel@tonic-gate 		md_unload_namespace(setno, NM_DEVID);
17490Sstevel@tonic-gate 		md_unload_namespace(setno, 0L);
17500Sstevel@tonic-gate 		md_clr_setstatus(setno, MD_SET_SNARFED);
17510Sstevel@tonic-gate 	}
17520Sstevel@tonic-gate 
17530Sstevel@tonic-gate 	return (0);
17540Sstevel@tonic-gate }
17550Sstevel@tonic-gate 
17560Sstevel@tonic-gate int
md_halt(int global_locks_owned_mask)17570Sstevel@tonic-gate md_halt(int global_locks_owned_mask)
17580Sstevel@tonic-gate {
17590Sstevel@tonic-gate 	set_t			i, j;
17600Sstevel@tonic-gate 	int			err;
17610Sstevel@tonic-gate 	int			init_queues;
17620Sstevel@tonic-gate 	md_requestq_entry_t	*rqp;
17630Sstevel@tonic-gate 	md_ops_t		**pops, *ops, *lops;
17640Sstevel@tonic-gate 	ddi_modhandle_t		mod;
17650Sstevel@tonic-gate 	char			*name;
17660Sstevel@tonic-gate 
17670Sstevel@tonic-gate 	rw_enter(&md_unit_array_rw.lock, RW_WRITER);
17680Sstevel@tonic-gate 
17690Sstevel@tonic-gate 	/*
17700Sstevel@tonic-gate 	 * Grab the all of the global locks that are not
17710Sstevel@tonic-gate 	 * already owned to ensure that there isn't another
17720Sstevel@tonic-gate 	 * thread trying to access a global resource
17730Sstevel@tonic-gate 	 * while the halt is in progress
17740Sstevel@tonic-gate 	 */
17750Sstevel@tonic-gate 	if (md_global_lock_enter(global_locks_owned_mask) == EINTR)
17760Sstevel@tonic-gate 		return (EINTR);
17770Sstevel@tonic-gate 
17780Sstevel@tonic-gate 	for (i = 0; i < md_nsets; i++)
17790Sstevel@tonic-gate 		md_haltsnarf_enter(i);
17800Sstevel@tonic-gate 
17810Sstevel@tonic-gate 	/*
17820Sstevel@tonic-gate 	 * Kill the daemon threads.
17830Sstevel@tonic-gate 	 */
17840Sstevel@tonic-gate 	init_queues = ((md_get_status() & MD_GBL_DAEMONS_LIVE) ? FALSE : TRUE);
17850Sstevel@tonic-gate 	md_clr_status(MD_GBL_DAEMONS_LIVE);
17860Sstevel@tonic-gate 	md_set_status(MD_GBL_DAEMONS_DIE);
17870Sstevel@tonic-gate 
17880Sstevel@tonic-gate 	rqp = &md_daemon_queues[0];
17890Sstevel@tonic-gate 	i = 0;
17900Sstevel@tonic-gate 	while (!NULL_REQUESTQ_ENTRY(rqp)) {
17910Sstevel@tonic-gate 		cv_broadcast(&rqp->dispq_headp->a_cv);
17920Sstevel@tonic-gate 		rqp = &md_daemon_queues[++i];
17930Sstevel@tonic-gate 	}
17940Sstevel@tonic-gate 
17950Sstevel@tonic-gate 	mutex_enter(&md_mx);
17960Sstevel@tonic-gate 	while (md_num_daemons != 0) {
17970Sstevel@tonic-gate 		mutex_exit(&md_mx);
17980Sstevel@tonic-gate 		delay(md_hz);
17990Sstevel@tonic-gate 		mutex_enter(&md_mx);
18000Sstevel@tonic-gate 	}
18010Sstevel@tonic-gate 	mutex_exit(&md_mx);
18020Sstevel@tonic-gate 	md_clr_status(MD_GBL_DAEMONS_DIE);
18030Sstevel@tonic-gate 
18040Sstevel@tonic-gate 	for (i = 0; i < md_nsets; i++)
18050Sstevel@tonic-gate 		/*
18060Sstevel@tonic-gate 		 * Only call into md_halt_set if s_un / s_ui are both set.
18070Sstevel@tonic-gate 		 * If they are NULL this set hasn't been accessed, so its
18080Sstevel@tonic-gate 		 * pointless performing the call.
18090Sstevel@tonic-gate 		 */
18100Sstevel@tonic-gate 		if (md_set[i].s_un != NULL && md_set[i].s_ui != NULL) {
18110Sstevel@tonic-gate 			if (md_halt_set(i, MD_HALT_CHECK)) {
18120Sstevel@tonic-gate 				if (md_start_daemons(init_queues))
18130Sstevel@tonic-gate 					cmn_err(CE_WARN,
18140Sstevel@tonic-gate 					    "md: restart of daemon threads "
18150Sstevel@tonic-gate 					    "failed");
18160Sstevel@tonic-gate 				for (j = 0; j < md_nsets; j++)
18170Sstevel@tonic-gate 					md_haltsnarf_exit(j);
18180Sstevel@tonic-gate 
18190Sstevel@tonic-gate 				return (md_global_lock_exit(
18200Sstevel@tonic-gate 				    global_locks_owned_mask, EBUSY,
18210Sstevel@tonic-gate 				    MD_ARRAY_WRITER, NULL));
18220Sstevel@tonic-gate 			}
18230Sstevel@tonic-gate 		}
18240Sstevel@tonic-gate 
18250Sstevel@tonic-gate 	/*
18260Sstevel@tonic-gate 	 * if we get here we are going to do it
18270Sstevel@tonic-gate 	 */
18280Sstevel@tonic-gate 	for (i = 0; i < md_nsets; i++) {
18290Sstevel@tonic-gate 		/*
18300Sstevel@tonic-gate 		 * Only call into md_halt_set if s_un / s_ui are both set.
18310Sstevel@tonic-gate 		 * If they are NULL this set hasn't been accessed, so its
18320Sstevel@tonic-gate 		 * pointless performing the call.
18330Sstevel@tonic-gate 		 */
18340Sstevel@tonic-gate 		if (md_set[i].s_un != NULL && md_set[i].s_ui != NULL) {
18350Sstevel@tonic-gate 			err = md_halt_set(i, MD_HALT_DOIT);
18360Sstevel@tonic-gate 			if (err != 0)
18370Sstevel@tonic-gate 				cmn_err(CE_NOTE,
18380Sstevel@tonic-gate 				    "md: halt failed set %u, error %d",
18390Sstevel@tonic-gate 				    (unsigned)i, err);
18400Sstevel@tonic-gate 		}
18410Sstevel@tonic-gate 	}
18420Sstevel@tonic-gate 
18430Sstevel@tonic-gate 	/*
18440Sstevel@tonic-gate 	 * issue a halt unload to each module to indicate that it
18450Sstevel@tonic-gate 	 * is about to be unloaded.  Each module is called once, set
18460Sstevel@tonic-gate 	 * has no meaning at this point in time.
18470Sstevel@tonic-gate 	 */
18480Sstevel@tonic-gate 	for (i = 0; i < MD_NOPS; i++) {
18490Sstevel@tonic-gate 		if (md_ops[i] == NULL)
18500Sstevel@tonic-gate 			continue;
18510Sstevel@tonic-gate 		err = (*(md_ops[i]->md_halt))(MD_HALT_UNLOAD, 0);
18520Sstevel@tonic-gate 		if (err != 0)
18530Sstevel@tonic-gate 			cmn_err(CE_NOTE,
18540Sstevel@tonic-gate 			    "md: halt failed for %s, error %d",
18550Sstevel@tonic-gate 			    md_ops[i]->md_driver.md_drivername, err);
18560Sstevel@tonic-gate 	}
18570Sstevel@tonic-gate 
18580Sstevel@tonic-gate 	/* ddi_modclose the submodules */
18590Sstevel@tonic-gate 	for (i = 0; i < MD_NOPS; i++) {
18600Sstevel@tonic-gate 		/* skip if not open */
18610Sstevel@tonic-gate 		if ((md_ops[i] == NULL) || (md_mods[i] == NULL))
18620Sstevel@tonic-gate 			continue;
18630Sstevel@tonic-gate 
18640Sstevel@tonic-gate 		/* find and unlink from md_opslist */
18650Sstevel@tonic-gate 		ops = md_ops[i];
18660Sstevel@tonic-gate 		mod = md_mods[i];
18670Sstevel@tonic-gate 		pops = &md_opslist;
18680Sstevel@tonic-gate 		for (lops = *pops; lops;
18690Sstevel@tonic-gate 		    pops = &lops->md_next, lops = *pops) {
18700Sstevel@tonic-gate 			if (lops == ops) {
18710Sstevel@tonic-gate 				*pops = ops->md_next;
18720Sstevel@tonic-gate 				ops->md_next = NULL;
18730Sstevel@tonic-gate 				break;
18740Sstevel@tonic-gate 			}
18750Sstevel@tonic-gate 		}
18760Sstevel@tonic-gate 
18770Sstevel@tonic-gate 		/* uninitialize */
18787563SPrasad.Singamsetty@Sun.COM 		name = ops->md_driver.md_drivername;
18790Sstevel@tonic-gate 		md_ops[i] = NULL;
18800Sstevel@tonic-gate 		md_mods[i] = NULL;
18810Sstevel@tonic-gate 		ops->md_selfindex = 0;
18820Sstevel@tonic-gate 		ops->md_driver.md_drivername[0] = '\0';
18830Sstevel@tonic-gate 		rw_destroy(&ops->md_link_rw.lock);
18840Sstevel@tonic-gate 
18850Sstevel@tonic-gate 		/* close */
18860Sstevel@tonic-gate 		err = ddi_modclose(mod);
18870Sstevel@tonic-gate 		if (err != 0)
18880Sstevel@tonic-gate 			cmn_err(CE_NOTE,
18890Sstevel@tonic-gate 			    "md: halt close failed for %s, error %d",
18900Sstevel@tonic-gate 			    name ? name : "UNKNOWN", err);
18910Sstevel@tonic-gate 	}
18920Sstevel@tonic-gate 
18930Sstevel@tonic-gate 	/* Unload the database */
18940Sstevel@tonic-gate 	mddb_unload();
18950Sstevel@tonic-gate 
18960Sstevel@tonic-gate 	md_set_status(MD_GBL_HALTED);	/* we are ready to be unloaded */
18970Sstevel@tonic-gate 
18980Sstevel@tonic-gate 	for (i = 0; i < md_nsets; i++)
18990Sstevel@tonic-gate 		md_haltsnarf_exit(i);
19000Sstevel@tonic-gate 
19010Sstevel@tonic-gate 	return (md_global_lock_exit(global_locks_owned_mask, 0,
19027563SPrasad.Singamsetty@Sun.COM 	    MD_ARRAY_WRITER, NULL));
19030Sstevel@tonic-gate }
19040Sstevel@tonic-gate 
19050Sstevel@tonic-gate /*
19060Sstevel@tonic-gate  * md_layered_open() is an internal routine only for SVM modules.
19070Sstevel@tonic-gate  * So the input device will be a md_dev64_t, because all SVM modules internally
19080Sstevel@tonic-gate  * work with that device type.
19090Sstevel@tonic-gate  * ddi routines on the other hand work with dev_t. So, if we call any ddi
19100Sstevel@tonic-gate  * routines from here we first have to convert that device into a dev_t.
19110Sstevel@tonic-gate  */
19120Sstevel@tonic-gate 
19130Sstevel@tonic-gate int
md_layered_open(minor_t mnum,md_dev64_t * dev,int md_oflags)19140Sstevel@tonic-gate md_layered_open(
19150Sstevel@tonic-gate 	minor_t		mnum,
19160Sstevel@tonic-gate 	md_dev64_t	*dev,
19170Sstevel@tonic-gate 	int		md_oflags
19180Sstevel@tonic-gate )
19190Sstevel@tonic-gate {
19200Sstevel@tonic-gate 	int		flag = (FREAD | FWRITE);
19210Sstevel@tonic-gate 	cred_t		*cred_p = kcred;
19220Sstevel@tonic-gate 	major_t		major;
19230Sstevel@tonic-gate 	int		err;
19240Sstevel@tonic-gate 	dev_t		ddi_dev = md_dev64_to_dev(*dev);
19250Sstevel@tonic-gate 
19260Sstevel@tonic-gate 	if (ddi_dev == NODEV)
19270Sstevel@tonic-gate 		return (ENODEV);
19280Sstevel@tonic-gate 
19290Sstevel@tonic-gate 	major = getmajor(ddi_dev);
19300Sstevel@tonic-gate 
19310Sstevel@tonic-gate 	/* metadevice */
19320Sstevel@tonic-gate 	if (major == md_major) {
19330Sstevel@tonic-gate 		mdi_unit_t	*ui;
19340Sstevel@tonic-gate 
19350Sstevel@tonic-gate 		/* open underlying driver */
19360Sstevel@tonic-gate 		mnum = getminor(ddi_dev);
19370Sstevel@tonic-gate 
19380Sstevel@tonic-gate 		ui = MDI_UNIT(mnum);
19390Sstevel@tonic-gate 		if (md_ops[ui->ui_opsindex]->md_open != NULL) {
19400Sstevel@tonic-gate 			int ret = (*md_ops[ui->ui_opsindex]->md_open)(&ddi_dev,
19417563SPrasad.Singamsetty@Sun.COM 			    flag, OTYP_LYR, cred_p, md_oflags);
19420Sstevel@tonic-gate 			/*
19430Sstevel@tonic-gate 			 * As open() may change the device,
19440Sstevel@tonic-gate 			 * send this info back to the caller.
19450Sstevel@tonic-gate 			 */
19460Sstevel@tonic-gate 			*dev = md_expldev(ddi_dev);
19470Sstevel@tonic-gate 			return (ret);
19480Sstevel@tonic-gate 		}
19490Sstevel@tonic-gate 
19500Sstevel@tonic-gate 		/* or do it ourselves */
19510Sstevel@tonic-gate 		(void) md_unit_openclose_enter(ui);
19520Sstevel@tonic-gate 		err = md_unit_incopen(mnum, flag, OTYP_LYR);
19530Sstevel@tonic-gate 		md_unit_openclose_exit(ui);
19540Sstevel@tonic-gate 		/* convert our ddi_dev back to the dev we were given */
19550Sstevel@tonic-gate 		*dev = md_expldev(ddi_dev);
19560Sstevel@tonic-gate 		return (err);
19570Sstevel@tonic-gate 	}
19580Sstevel@tonic-gate 
19590Sstevel@tonic-gate 	/*
19600Sstevel@tonic-gate 	 * Open regular device, since open() may change dev_t give new dev_t
19610Sstevel@tonic-gate 	 * back to the caller.
19620Sstevel@tonic-gate 	 */
19630Sstevel@tonic-gate 	err = dev_lopen(&ddi_dev, flag, OTYP_LYR, cred_p);
19640Sstevel@tonic-gate 	*dev = md_expldev(ddi_dev);
19650Sstevel@tonic-gate 	return (err);
19660Sstevel@tonic-gate }
19670Sstevel@tonic-gate 
19680Sstevel@tonic-gate /*
19690Sstevel@tonic-gate  * md_layered_close() is an internal routine only for SVM modules.
19700Sstevel@tonic-gate  * So the input device will be a md_dev64_t, because all SVM modules internally
19710Sstevel@tonic-gate  * work with that device type.
19720Sstevel@tonic-gate  * ddi routines on the other hand work with dev_t. So, if we call any ddi
19730Sstevel@tonic-gate  * routines from here we first have to convert that device into a dev_t.
19740Sstevel@tonic-gate  */
19750Sstevel@tonic-gate void
md_layered_close(md_dev64_t dev,int md_cflags)19760Sstevel@tonic-gate md_layered_close(
19770Sstevel@tonic-gate 	md_dev64_t	dev,
19780Sstevel@tonic-gate 	int		md_cflags
19790Sstevel@tonic-gate )
19800Sstevel@tonic-gate {
19810Sstevel@tonic-gate 	int		flag = (FREAD | FWRITE);
19820Sstevel@tonic-gate 	cred_t		*cred_p = kcred;
19830Sstevel@tonic-gate 	dev_t		ddi_dev = md_dev64_to_dev(dev);
19840Sstevel@tonic-gate 	major_t		major = getmajor(ddi_dev);
19850Sstevel@tonic-gate 	minor_t		mnum = getminor(ddi_dev);
19860Sstevel@tonic-gate 
19870Sstevel@tonic-gate 	/* metadevice */
19880Sstevel@tonic-gate 	if (major == md_major) {
19890Sstevel@tonic-gate 		mdi_unit_t	*ui = MDI_UNIT(mnum);
19900Sstevel@tonic-gate 
19910Sstevel@tonic-gate 		/* close underlying driver */
19920Sstevel@tonic-gate 		if (md_ops[ui->ui_opsindex]->md_close != NULL) {
19930Sstevel@tonic-gate 			(*md_ops[ui->ui_opsindex]->md_close)
19940Sstevel@tonic-gate 			    (ddi_dev, flag, OTYP_LYR, cred_p, md_cflags);
19950Sstevel@tonic-gate 			return;
19960Sstevel@tonic-gate 		}
19970Sstevel@tonic-gate 
19980Sstevel@tonic-gate 		/* or do it ourselves */
19990Sstevel@tonic-gate 		(void) md_unit_openclose_enter(ui);
20000Sstevel@tonic-gate 		(void) md_unit_decopen(mnum, OTYP_LYR);
20010Sstevel@tonic-gate 		md_unit_openclose_exit(ui);
20020Sstevel@tonic-gate 		return;
20030Sstevel@tonic-gate 	}
20040Sstevel@tonic-gate 
20050Sstevel@tonic-gate 	/* close regular device */
20060Sstevel@tonic-gate 	(void) dev_lclose(ddi_dev, flag, OTYP_LYR, cred_p);
20070Sstevel@tonic-gate }
20080Sstevel@tonic-gate 
20090Sstevel@tonic-gate /*
20100Sstevel@tonic-gate  * saves a little code in mdstrategy
20110Sstevel@tonic-gate  */
20120Sstevel@tonic-gate int
errdone(mdi_unit_t * ui,struct buf * bp,int err)20130Sstevel@tonic-gate errdone(mdi_unit_t *ui, struct buf *bp, int err)
20140Sstevel@tonic-gate {
20150Sstevel@tonic-gate 	if ((bp->b_error = err) != 0)
20160Sstevel@tonic-gate 		bp->b_flags |= B_ERROR;
20170Sstevel@tonic-gate 	else
20180Sstevel@tonic-gate 		bp->b_resid = bp->b_bcount;
20190Sstevel@tonic-gate 	md_unit_readerexit(ui);
20200Sstevel@tonic-gate 	md_biodone(bp);
20210Sstevel@tonic-gate 	return (1);
20220Sstevel@tonic-gate }
20230Sstevel@tonic-gate 
20240Sstevel@tonic-gate static int	md_write_label = 0;
20250Sstevel@tonic-gate 
20260Sstevel@tonic-gate int
md_checkbuf(mdi_unit_t * ui,md_unit_t * un,buf_t * bp)20270Sstevel@tonic-gate md_checkbuf(mdi_unit_t *ui, md_unit_t *un, buf_t *bp)
20280Sstevel@tonic-gate {
20290Sstevel@tonic-gate 	diskaddr_t endblk;
20300Sstevel@tonic-gate 	set_t	setno = MD_UN2SET(un);
20310Sstevel@tonic-gate 
20320Sstevel@tonic-gate 	if ((md_get_setstatus(setno) & MD_SET_STALE) &&
20330Sstevel@tonic-gate 	    (! (bp->b_flags & B_READ)))
20340Sstevel@tonic-gate 		return (errdone(ui, bp, EROFS));
20350Sstevel@tonic-gate 	/*
20360Sstevel@tonic-gate 	 * Check early for unreasonable block number.
20370Sstevel@tonic-gate 	 *
20380Sstevel@tonic-gate 	 * b_blkno is defined as adaddr_t which is typedef'd to a long.
20390Sstevel@tonic-gate 	 * A problem occurs if b_blkno has bit 31 set and un_total_blocks
20400Sstevel@tonic-gate 	 * doesn't, b_blkno is then compared as a negative number which is
20410Sstevel@tonic-gate 	 * always less than a positive.
20420Sstevel@tonic-gate 	 */
20430Sstevel@tonic-gate 	if ((u_longlong_t)bp->b_lblkno > (u_longlong_t)un->c.un_total_blocks)
20440Sstevel@tonic-gate 		return (errdone(ui, bp, EINVAL));
20450Sstevel@tonic-gate 
20460Sstevel@tonic-gate 	if (bp->b_lblkno == un->c.un_total_blocks)
20470Sstevel@tonic-gate 		return (errdone(ui, bp, 0));
20480Sstevel@tonic-gate 
20490Sstevel@tonic-gate 	/*
20500Sstevel@tonic-gate 	 * make sure we don't clobber any labels
20510Sstevel@tonic-gate 	 */
20520Sstevel@tonic-gate 	if ((bp->b_lblkno == 0) && (! (bp->b_flags & B_READ)) &&
20530Sstevel@tonic-gate 	    (un->c.un_flag & MD_LABELED) && (! md_write_label)) {
20540Sstevel@tonic-gate 		cmn_err(CE_NOTE, "md: %s: write to label",
20550Sstevel@tonic-gate 		    md_shortname(getminor(bp->b_edev)));
20560Sstevel@tonic-gate 		return (errdone(ui, bp, EINVAL));
20570Sstevel@tonic-gate 	}
20580Sstevel@tonic-gate 
20590Sstevel@tonic-gate 	bp->b_resid = 0;
20600Sstevel@tonic-gate 	endblk = (diskaddr_t)(bp->b_lblkno +
20617563SPrasad.Singamsetty@Sun.COM 	    howmany(bp->b_bcount, DEV_BSIZE) - 1);
20620Sstevel@tonic-gate 
20630Sstevel@tonic-gate 	if (endblk > (un->c.un_total_blocks - 1)) {
20640Sstevel@tonic-gate 		bp->b_resid = dbtob(endblk - (un->c.un_total_blocks - 1));
20650Sstevel@tonic-gate 		endblk = un->c.un_total_blocks - 1;
20660Sstevel@tonic-gate 		bp->b_bcount -= bp->b_resid;
20670Sstevel@tonic-gate 	}
20680Sstevel@tonic-gate 	return (0);
20690Sstevel@tonic-gate }
20700Sstevel@tonic-gate 
20710Sstevel@tonic-gate /*
20720Sstevel@tonic-gate  * init_request_queue: initializes the request queues and creates the threads.
20730Sstevel@tonic-gate  *	return value =  0  :invalid num_threads
20740Sstevel@tonic-gate  *		     =  n   : n is the number of threads created.
20750Sstevel@tonic-gate  */
20760Sstevel@tonic-gate 
20770Sstevel@tonic-gate int
init_requestq(md_requestq_entry_t * rq,void (* threadfn)(),caddr_t threadfn_args,int pri,int init_queue)20780Sstevel@tonic-gate init_requestq(
20790Sstevel@tonic-gate 	md_requestq_entry_t *rq, /* request queue info */
20800Sstevel@tonic-gate 	void (*threadfn)(),	 /* function to start the thread */
20810Sstevel@tonic-gate 	caddr_t threadfn_args,	 /* args to the function */
20820Sstevel@tonic-gate 	int pri,		 /* thread priority */
20830Sstevel@tonic-gate 	int init_queue)		 /* flag to init queues */
20840Sstevel@tonic-gate {
20850Sstevel@tonic-gate 	struct mdq_anchor *rqhead;
20860Sstevel@tonic-gate 	int	i;
20870Sstevel@tonic-gate 	int	num_threads;
20880Sstevel@tonic-gate 
20890Sstevel@tonic-gate 
20900Sstevel@tonic-gate 	num_threads = *(rq->num_threadsp);
20910Sstevel@tonic-gate 	rqhead = rq->dispq_headp;
20920Sstevel@tonic-gate 
20930Sstevel@tonic-gate 	if (NULL_REQUESTQ_ENTRY(rq) || num_threads == 0)
20940Sstevel@tonic-gate 		return (0);
20950Sstevel@tonic-gate 
20960Sstevel@tonic-gate 	if (init_queue) {
20970Sstevel@tonic-gate 		rqhead->dq.maxq_len = 0;
20980Sstevel@tonic-gate 		rqhead->dq.treqs = 0;
20990Sstevel@tonic-gate 		rqhead->dq.dq_next = &rqhead->dq;
21000Sstevel@tonic-gate 		rqhead->dq.dq_prev = &rqhead->dq;
21010Sstevel@tonic-gate 		cv_init(&rqhead->a_cv, NULL, CV_DEFAULT, NULL);
21020Sstevel@tonic-gate 		mutex_init(&rqhead->a_mx, NULL, MUTEX_DEFAULT, NULL);
21030Sstevel@tonic-gate 	}
21040Sstevel@tonic-gate 	for (i = 0; i < num_threads; i++) {
21050Sstevel@tonic-gate 		(void) thread_create(NULL, 0, threadfn, threadfn_args, 0, &p0,
21060Sstevel@tonic-gate 		    TS_RUN, pri);
21070Sstevel@tonic-gate 	}
21080Sstevel@tonic-gate 	return (i);
21090Sstevel@tonic-gate }
21100Sstevel@tonic-gate 
21110Sstevel@tonic-gate static void
start_daemon(struct mdq_anchor * q)21120Sstevel@tonic-gate start_daemon(struct mdq_anchor *q)
21130Sstevel@tonic-gate {
21140Sstevel@tonic-gate 	md_daemon(0, q);
21150Sstevel@tonic-gate 	ASSERT(0);
21160Sstevel@tonic-gate }
21170Sstevel@tonic-gate 
21180Sstevel@tonic-gate /*
21190Sstevel@tonic-gate  * Creates all the md daemons.
21200Sstevel@tonic-gate  * Global:
21210Sstevel@tonic-gate  *	md_num_daemons is set to number of daemons.
21220Sstevel@tonic-gate  *	MD_GBL_DAEMONS_LIVE flag set to indicate the daemons are active.
21230Sstevel@tonic-gate  *
21240Sstevel@tonic-gate  * Return value: 0  success
21250Sstevel@tonic-gate  *		 1  failure
21260Sstevel@tonic-gate  */
21270Sstevel@tonic-gate int
md_start_daemons(int init_queue)21280Sstevel@tonic-gate md_start_daemons(int init_queue)
21290Sstevel@tonic-gate {
21300Sstevel@tonic-gate 	md_requestq_entry_t	*rqp;
21310Sstevel@tonic-gate 	int	cnt;
21320Sstevel@tonic-gate 	int	i;
21330Sstevel@tonic-gate 	int	retval = 0;
21340Sstevel@tonic-gate 
21350Sstevel@tonic-gate 
21360Sstevel@tonic-gate 	if (md_get_status() & MD_GBL_DAEMONS_LIVE) {
21370Sstevel@tonic-gate 		return (retval);
21380Sstevel@tonic-gate 	}
21390Sstevel@tonic-gate 	md_clr_status(MD_GBL_DAEMONS_DIE);
21400Sstevel@tonic-gate 
21410Sstevel@tonic-gate 	rqp = &md_daemon_queues[0];
21420Sstevel@tonic-gate 	i = 0;
21430Sstevel@tonic-gate 	while (!NULL_REQUESTQ_ENTRY(rqp)) {
21440Sstevel@tonic-gate 		cnt = init_requestq(rqp, start_daemon,
21457563SPrasad.Singamsetty@Sun.COM 		    (caddr_t)rqp->dispq_headp, minclsyspri, init_queue);
21460Sstevel@tonic-gate 
21470Sstevel@tonic-gate 		if (cnt && cnt != *rqp->num_threadsp) {
21480Sstevel@tonic-gate 			retval = 1;
21490Sstevel@tonic-gate 			break;
21500Sstevel@tonic-gate 		}
21510Sstevel@tonic-gate 		/*
21520Sstevel@tonic-gate 		 * initialize variables
21530Sstevel@tonic-gate 		 */
21540Sstevel@tonic-gate 		md_num_daemons += cnt;
21550Sstevel@tonic-gate 		rqp = &md_daemon_queues[++i];
21560Sstevel@tonic-gate 	}
21570Sstevel@tonic-gate 
21580Sstevel@tonic-gate 	md_set_status(MD_GBL_DAEMONS_LIVE);
21590Sstevel@tonic-gate 	return (retval);
21600Sstevel@tonic-gate }
21610Sstevel@tonic-gate 
21620Sstevel@tonic-gate int
md_loadsubmod(set_t setno,char * name,int drvrid)21630Sstevel@tonic-gate md_loadsubmod(set_t setno, char *name, int drvrid)
21640Sstevel@tonic-gate {
21650Sstevel@tonic-gate 	ddi_modhandle_t	mod;
21660Sstevel@tonic-gate 	md_ops_t	**pops, *ops;
21670Sstevel@tonic-gate 	int		i, err;
21680Sstevel@tonic-gate 
21690Sstevel@tonic-gate 	/*
21700Sstevel@tonic-gate 	 * See if the submodule is mdopened. If not, i is the index of the
21710Sstevel@tonic-gate 	 * next empty slot.
21720Sstevel@tonic-gate 	 */
21730Sstevel@tonic-gate 	for (i = 0; md_ops[i] != NULL; i++) {
21740Sstevel@tonic-gate 		if (strncmp(name, md_ops[i]->md_driver.md_drivername,
21750Sstevel@tonic-gate 		    MD_DRIVERNAMELEN) == 0)
21760Sstevel@tonic-gate 			return (i);
21770Sstevel@tonic-gate 
21780Sstevel@tonic-gate 		if (i == (MD_NOPS - 1))
21790Sstevel@tonic-gate 			return (-1);
21800Sstevel@tonic-gate 	}
21810Sstevel@tonic-gate 
21820Sstevel@tonic-gate 	if (drvrid < 0) {
21830Sstevel@tonic-gate 		/* Do not try to add any records to the DB when stale. */
21840Sstevel@tonic-gate 		if (md_get_setstatus(setno) & MD_SET_STALE)
21850Sstevel@tonic-gate 			return (-1);
21860Sstevel@tonic-gate 		drvrid = md_setshared_name(setno, name, 0L);
21870Sstevel@tonic-gate 	}
21880Sstevel@tonic-gate 
21890Sstevel@tonic-gate 	if (drvrid < 0)
21900Sstevel@tonic-gate 		return (-1);
21910Sstevel@tonic-gate 
21920Sstevel@tonic-gate 	/* open and import the md_ops of the submodules */
21930Sstevel@tonic-gate 	mod = ddi_modopen(name, KRTLD_MODE_FIRST, &err);
21940Sstevel@tonic-gate 	if (mod == NULL) {
21950Sstevel@tonic-gate 		cmn_err(CE_WARN, "md_loadsubmod: "
21960Sstevel@tonic-gate 		    "unable to ddi_modopen %s, error %d\n", name, err);
21970Sstevel@tonic-gate 		return (-1);
21980Sstevel@tonic-gate 	}
21990Sstevel@tonic-gate 	pops = ddi_modsym(mod, "md_interface_ops", &err);
22000Sstevel@tonic-gate 	if (pops == NULL) {
22010Sstevel@tonic-gate 		cmn_err(CE_WARN, "md_loadsubmod: "
22020Sstevel@tonic-gate 		    "unable to import md_interface_ops from %s, error %d\n",
22030Sstevel@tonic-gate 		    name, err);
22040Sstevel@tonic-gate 		(void) ddi_modclose(mod);
22050Sstevel@tonic-gate 		return (-1);
22060Sstevel@tonic-gate 	}
22070Sstevel@tonic-gate 
22080Sstevel@tonic-gate 	/* ddi_modsym returns pointer to md_interface_ops in submod */
22090Sstevel@tonic-gate 	ops = *pops;
22100Sstevel@tonic-gate 
22110Sstevel@tonic-gate 	/* initialize */
22120Sstevel@tonic-gate 	ops->md_selfindex = i;
22130Sstevel@tonic-gate 	rw_init(&ops->md_link_rw.lock, NULL, RW_DEFAULT, NULL);
22140Sstevel@tonic-gate 	(void) strncpy(ops->md_driver.md_drivername, name,
22150Sstevel@tonic-gate 	    MD_DRIVERNAMELEN);
22160Sstevel@tonic-gate 
22170Sstevel@tonic-gate 	/* plumb */
22180Sstevel@tonic-gate 	md_ops[i] = ops;
22190Sstevel@tonic-gate 	md_mods[i] = mod;
22200Sstevel@tonic-gate 	ops->md_next = md_opslist;
22210Sstevel@tonic-gate 	md_opslist = ops;
22220Sstevel@tonic-gate 
22230Sstevel@tonic-gate 	/* return index */
22240Sstevel@tonic-gate 	return (i);
22250Sstevel@tonic-gate }
22260Sstevel@tonic-gate 
22270Sstevel@tonic-gate int
md_getmodindex(md_driver_t * driver,int dont_load,int db_notrequired)22280Sstevel@tonic-gate md_getmodindex(md_driver_t *driver, int dont_load, int db_notrequired)
22290Sstevel@tonic-gate {
22300Sstevel@tonic-gate 	int	i;
22310Sstevel@tonic-gate 	int	modindex;
22320Sstevel@tonic-gate 	char	*name = driver->md_drivername;
22330Sstevel@tonic-gate 	set_t	setno = driver->md_setno;
22340Sstevel@tonic-gate 	int	drvid;
22350Sstevel@tonic-gate 	int	local_dont_load;
22360Sstevel@tonic-gate 
22370Sstevel@tonic-gate 	if (setno >= md_nsets)
22380Sstevel@tonic-gate 		return (-1);
22390Sstevel@tonic-gate 
22400Sstevel@tonic-gate 	for (i = 0; name[i] != 0; i++)
22410Sstevel@tonic-gate 		if (i == (MD_DRIVERNAMELEN -1))
22420Sstevel@tonic-gate 			return (-1);
22430Sstevel@tonic-gate 
22440Sstevel@tonic-gate 	/*
22450Sstevel@tonic-gate 	 * If set is STALE, set local_dont_load to 1 since no records
22460Sstevel@tonic-gate 	 * should be added to DB when stale.
22470Sstevel@tonic-gate 	 */
22480Sstevel@tonic-gate 	if (md_get_setstatus(setno) & MD_SET_STALE) {
22490Sstevel@tonic-gate 		local_dont_load = 1;
22500Sstevel@tonic-gate 	} else {
22510Sstevel@tonic-gate 		local_dont_load = dont_load;
22520Sstevel@tonic-gate 	}
22530Sstevel@tonic-gate 
22540Sstevel@tonic-gate 	/*
22550Sstevel@tonic-gate 	 * Single thread ioctl module binding with respect to
22560Sstevel@tonic-gate 	 * similar code executed in md_loadsubmod that is called
22570Sstevel@tonic-gate 	 * from md_snarf_db_set (which is where that path does
22580Sstevel@tonic-gate 	 * its md_haltsnarf_enter call).
22590Sstevel@tonic-gate 	 */
22600Sstevel@tonic-gate 	md_haltsnarf_enter(setno);
22610Sstevel@tonic-gate 
22620Sstevel@tonic-gate 	/* See if the submodule is already ddi_modopened. */
22630Sstevel@tonic-gate 	for (i = 0; md_ops[i] != NULL; i++) {
22640Sstevel@tonic-gate 		if (strncmp(name, md_ops[i]->md_driver.md_drivername,
22650Sstevel@tonic-gate 		    MD_DRIVERNAMELEN) == 0) {
22660Sstevel@tonic-gate 			if (! local_dont_load &&
22670Sstevel@tonic-gate 			    (md_getshared_key(setno, name) == MD_KEYBAD)) {
22680Sstevel@tonic-gate 				if (md_setshared_name(setno, name, 0L)
22690Sstevel@tonic-gate 				    == MD_KEYBAD) {
22700Sstevel@tonic-gate 					if (!db_notrequired)
22710Sstevel@tonic-gate 						goto err;
22720Sstevel@tonic-gate 				}
22730Sstevel@tonic-gate 			}
22740Sstevel@tonic-gate 			md_haltsnarf_exit(setno);
22750Sstevel@tonic-gate 			return (i);
22760Sstevel@tonic-gate 		}
22770Sstevel@tonic-gate 
22780Sstevel@tonic-gate 		if (i == (MD_NOPS -1))
22790Sstevel@tonic-gate 			break;
22800Sstevel@tonic-gate 	}
22810Sstevel@tonic-gate 
22820Sstevel@tonic-gate 	if (local_dont_load)
22830Sstevel@tonic-gate 		goto err;
22840Sstevel@tonic-gate 
22850Sstevel@tonic-gate 	drvid = ((db_notrequired) ? 0 : (int)md_getshared_key(setno, name));
22860Sstevel@tonic-gate 
22870Sstevel@tonic-gate 	/* ddi_modopen the submodule */
22880Sstevel@tonic-gate 	modindex = md_loadsubmod(setno, name, drvid);
22890Sstevel@tonic-gate 	if (modindex < 0)
22900Sstevel@tonic-gate 		goto err;
22910Sstevel@tonic-gate 
22920Sstevel@tonic-gate 	if (md_ops[modindex]->md_snarf != NULL)
22930Sstevel@tonic-gate 		(*(md_ops[modindex]->md_snarf))(MD_SNARF_DOIT, setno);
22940Sstevel@tonic-gate 
22950Sstevel@tonic-gate 	md_haltsnarf_exit(setno);
22960Sstevel@tonic-gate 	return (modindex);
22970Sstevel@tonic-gate 
22980Sstevel@tonic-gate err:	md_haltsnarf_exit(setno);
22990Sstevel@tonic-gate 	return (-1);
23000Sstevel@tonic-gate }
23010Sstevel@tonic-gate 
23020Sstevel@tonic-gate void
md_call_strategy(buf_t * bp,int flags,void * private)23030Sstevel@tonic-gate md_call_strategy(buf_t *bp, int flags, void *private)
23040Sstevel@tonic-gate {
23050Sstevel@tonic-gate 	mdi_unit_t	*ui;
23060Sstevel@tonic-gate 
23070Sstevel@tonic-gate 	if (mdv_strategy_tstpnt)
23080Sstevel@tonic-gate 		if ((*mdv_strategy_tstpnt)(bp, flags, private) != 0)
23090Sstevel@tonic-gate 			return;
23100Sstevel@tonic-gate 	if (getmajor(bp->b_edev) != md_major) {
23110Sstevel@tonic-gate 		(void) bdev_strategy(bp);
23120Sstevel@tonic-gate 		return;
23130Sstevel@tonic-gate 	}
23140Sstevel@tonic-gate 
23150Sstevel@tonic-gate 	flags = (flags & MD_STR_PASSEDON) | MD_STR_NOTTOP;
23160Sstevel@tonic-gate 	ui = MDI_UNIT(getminor(bp->b_edev));
23170Sstevel@tonic-gate 	ASSERT(ui != NULL);
23180Sstevel@tonic-gate 	(*md_ops[ui->ui_opsindex]->md_strategy)(bp, flags, private);
23190Sstevel@tonic-gate }
23200Sstevel@tonic-gate 
23210Sstevel@tonic-gate /*
23220Sstevel@tonic-gate  * md_call_ioctl:
23230Sstevel@tonic-gate  * -------------
23240Sstevel@tonic-gate  * Issue the specified ioctl to the device associated with the given md_dev64_t
23250Sstevel@tonic-gate  *
23260Sstevel@tonic-gate  * Arguments:
23270Sstevel@tonic-gate  *	dev	- underlying device [md_dev64_t]
23280Sstevel@tonic-gate  *	cmd	- ioctl to perform
23290Sstevel@tonic-gate  *	data	- arguments / result location
23300Sstevel@tonic-gate  *	mode	- read/write/layered ioctl
23310Sstevel@tonic-gate  *	lockp	- lock reference
23320Sstevel@tonic-gate  *
23330Sstevel@tonic-gate  * Returns:
23340Sstevel@tonic-gate  *	0	success
23350Sstevel@tonic-gate  *	!=0	Failure (error code)
23360Sstevel@tonic-gate  */
23370Sstevel@tonic-gate int
md_call_ioctl(md_dev64_t dev,int cmd,void * data,int mode,IOLOCK * lockp)23380Sstevel@tonic-gate md_call_ioctl(md_dev64_t dev, int cmd, void *data, int mode, IOLOCK *lockp)
23390Sstevel@tonic-gate {
23400Sstevel@tonic-gate 	dev_t		device = md_dev64_to_dev(dev);
23410Sstevel@tonic-gate 	int		rval;
23420Sstevel@tonic-gate 	mdi_unit_t	*ui;
23430Sstevel@tonic-gate 
23440Sstevel@tonic-gate 	/*
23450Sstevel@tonic-gate 	 * See if device is a metadevice. If not call cdev_ioctl(), otherwise
23460Sstevel@tonic-gate 	 * call the ioctl entry-point in the metadevice.
23470Sstevel@tonic-gate 	 */
23480Sstevel@tonic-gate 	if (md_getmajor(dev) != md_major) {
23490Sstevel@tonic-gate 		int	rv;
23500Sstevel@tonic-gate 		rval = cdev_ioctl(device, cmd, (intptr_t)data, mode,
23510Sstevel@tonic-gate 		    ddi_get_cred(), &rv);
23520Sstevel@tonic-gate 	} else {
23530Sstevel@tonic-gate 		ui = MDI_UNIT(md_getminor(dev));
23540Sstevel@tonic-gate 		ASSERT(ui != NULL);
23550Sstevel@tonic-gate 		rval = (*md_ops[ui->ui_opsindex]->md_ioctl)(device, cmd, data,
23560Sstevel@tonic-gate 		    mode, lockp);
23570Sstevel@tonic-gate 	}
23580Sstevel@tonic-gate 	return (rval);
23590Sstevel@tonic-gate }
23600Sstevel@tonic-gate 
23610Sstevel@tonic-gate void
md_rem_link(set_t setno,int id,krwlock_t * rw,md_link_t ** head)23620Sstevel@tonic-gate md_rem_link(set_t setno, int id, krwlock_t *rw, md_link_t **head)
23630Sstevel@tonic-gate {
23640Sstevel@tonic-gate 	md_link_t	*next;
23650Sstevel@tonic-gate 	md_link_t	**pprev;
23660Sstevel@tonic-gate 
23670Sstevel@tonic-gate 	rw_enter(rw, RW_WRITER);
23680Sstevel@tonic-gate 
23690Sstevel@tonic-gate 	next = *head;
23700Sstevel@tonic-gate 	pprev = head;
23710Sstevel@tonic-gate 	while (next) {
23720Sstevel@tonic-gate 		if ((next->ln_setno == setno) && (next->ln_id == id)) {
23730Sstevel@tonic-gate 			*pprev = next->ln_next;
23740Sstevel@tonic-gate 			rw_exit(rw);
23750Sstevel@tonic-gate 			return;
23760Sstevel@tonic-gate 		}
23770Sstevel@tonic-gate 		pprev = &next->ln_next;
23780Sstevel@tonic-gate 		next = next->ln_next;
23790Sstevel@tonic-gate 	}
23800Sstevel@tonic-gate 
23810Sstevel@tonic-gate 	rw_exit(rw);
23820Sstevel@tonic-gate }
23830Sstevel@tonic-gate 
23840Sstevel@tonic-gate int
md_dev_exists(md_dev64_t dev)23850Sstevel@tonic-gate md_dev_exists(md_dev64_t dev)
23860Sstevel@tonic-gate {
23870Sstevel@tonic-gate 
23880Sstevel@tonic-gate 	if (dev == NODEV64)
23890Sstevel@tonic-gate 		return (0);
23900Sstevel@tonic-gate 
23910Sstevel@tonic-gate 	if (strcmp(ddi_major_to_name(md_getmajor(dev)), "md") != 0)
23920Sstevel@tonic-gate 		return (1);
23930Sstevel@tonic-gate 
23940Sstevel@tonic-gate 	if ((MD_MIN2SET(md_getminor(dev)) >= md_nsets) ||
23950Sstevel@tonic-gate 	    (MD_MIN2UNIT(md_getminor(dev)) >= md_nunits))
23960Sstevel@tonic-gate 		return (0);
23970Sstevel@tonic-gate 
23980Sstevel@tonic-gate 	if (MDI_UNIT(md_getminor(dev)) != NULL)
23990Sstevel@tonic-gate 		return (1);
24000Sstevel@tonic-gate 
24010Sstevel@tonic-gate 	return (0);
24020Sstevel@tonic-gate }
24030Sstevel@tonic-gate 
24040Sstevel@tonic-gate md_parent_t
md_get_parent(md_dev64_t dev)24050Sstevel@tonic-gate md_get_parent(md_dev64_t dev)
24060Sstevel@tonic-gate {
24070Sstevel@tonic-gate 	md_unit_t	*un;
24080Sstevel@tonic-gate 	mdi_unit_t	*ui;
24090Sstevel@tonic-gate 	md_parent_t	parent;
24100Sstevel@tonic-gate 
24110Sstevel@tonic-gate 	if (md_getmajor(dev) != md_major)
24120Sstevel@tonic-gate 		return (MD_NO_PARENT);
24130Sstevel@tonic-gate 
24140Sstevel@tonic-gate 	ui = MDI_UNIT(md_getminor(dev));
24150Sstevel@tonic-gate 
24160Sstevel@tonic-gate 	un = (md_unit_t *)md_unit_readerlock(ui);
24170Sstevel@tonic-gate 	parent = un->c.un_parent;
24180Sstevel@tonic-gate 	md_unit_readerexit(ui);
24190Sstevel@tonic-gate 
24200Sstevel@tonic-gate 	return (parent);
24210Sstevel@tonic-gate }
24220Sstevel@tonic-gate 
24230Sstevel@tonic-gate void
md_set_parent(md_dev64_t dev,md_parent_t parent)24240Sstevel@tonic-gate md_set_parent(md_dev64_t dev, md_parent_t parent)
24250Sstevel@tonic-gate {
24260Sstevel@tonic-gate 	md_unit_t	*un;
24270Sstevel@tonic-gate 	mdi_unit_t	*ui;
24280Sstevel@tonic-gate 
24290Sstevel@tonic-gate 	if (md_getmajor(dev) != md_major)
24300Sstevel@tonic-gate 		return;
24310Sstevel@tonic-gate 
24320Sstevel@tonic-gate 	ui = MDI_UNIT(md_getminor(dev));
24330Sstevel@tonic-gate 
24340Sstevel@tonic-gate 	un = (md_unit_t *)md_unit_readerlock(ui);
24350Sstevel@tonic-gate 	un->c.un_parent = parent;
24360Sstevel@tonic-gate 	md_unit_readerexit(ui);
24370Sstevel@tonic-gate }
24380Sstevel@tonic-gate 
24390Sstevel@tonic-gate void
md_reset_parent(md_dev64_t dev)24400Sstevel@tonic-gate md_reset_parent(md_dev64_t dev)
24410Sstevel@tonic-gate {
24420Sstevel@tonic-gate 	md_unit_t	*un;
24430Sstevel@tonic-gate 	mdi_unit_t	*ui;
24440Sstevel@tonic-gate 
24450Sstevel@tonic-gate 	if (md_getmajor(dev) != md_major)
24460Sstevel@tonic-gate 		return;
24470Sstevel@tonic-gate 
24480Sstevel@tonic-gate 	ui = MDI_UNIT(md_getminor(dev));
24490Sstevel@tonic-gate 
24500Sstevel@tonic-gate 	un = (md_unit_t *)md_unit_readerlock(ui);
24510Sstevel@tonic-gate 	un->c.un_parent = MD_NO_PARENT;
24520Sstevel@tonic-gate 	md_unit_readerexit(ui);
24530Sstevel@tonic-gate }
24540Sstevel@tonic-gate 
24550Sstevel@tonic-gate 
24560Sstevel@tonic-gate static intptr_t (*hot_spare_interface)() = (intptr_t (*)())NULL;
24570Sstevel@tonic-gate 
24580Sstevel@tonic-gate int
md_hot_spare_ifc(hs_cmds_t cmd,mddb_recid_t id,u_longlong_t size,int labeled,mddb_recid_t * hs_id,mdkey_t * key,md_dev64_t * dev,diskaddr_t * sblock)24590Sstevel@tonic-gate md_hot_spare_ifc(
24600Sstevel@tonic-gate 	hs_cmds_t	cmd,
24610Sstevel@tonic-gate 	mddb_recid_t	id,
24620Sstevel@tonic-gate 	u_longlong_t	size,
24630Sstevel@tonic-gate 	int		labeled,
24640Sstevel@tonic-gate 	mddb_recid_t	*hs_id,
24650Sstevel@tonic-gate 	mdkey_t		*key,
24660Sstevel@tonic-gate 	md_dev64_t	*dev,
24670Sstevel@tonic-gate 	diskaddr_t	*sblock)
24680Sstevel@tonic-gate {
24690Sstevel@tonic-gate 	int		err;
24700Sstevel@tonic-gate 
24710Sstevel@tonic-gate 	/*
24720Sstevel@tonic-gate 	 * RW lock on hot_spare_interface. We don't want it to change from
24730Sstevel@tonic-gate 	 * underneath us. If hot_spare_interface is NULL we're going to
24740Sstevel@tonic-gate 	 * need to set it. So we need to upgrade to a WRITER lock. If that
24750Sstevel@tonic-gate 	 * doesn't work, we drop the lock and reenter as WRITER. This leaves
24760Sstevel@tonic-gate 	 * a small hole during which hot_spare_interface could be modified
24770Sstevel@tonic-gate 	 * so we check it for NULL again. What a pain. Then if still null
24780Sstevel@tonic-gate 	 * load from md_get_named_service.
24790Sstevel@tonic-gate 	 */
24800Sstevel@tonic-gate 
24810Sstevel@tonic-gate 	rw_enter(&hsp_rwlp.lock, RW_READER);
24820Sstevel@tonic-gate 	if (hot_spare_interface == NULL) {
24830Sstevel@tonic-gate 		if (rw_tryupgrade(&hsp_rwlp.lock) == 0) {
24840Sstevel@tonic-gate 			rw_exit(&hsp_rwlp.lock);
24850Sstevel@tonic-gate 			rw_enter(&hsp_rwlp.lock, RW_WRITER);
24860Sstevel@tonic-gate 			if (hot_spare_interface != NULL) {
24870Sstevel@tonic-gate 				err = ((*hot_spare_interface)
24880Sstevel@tonic-gate 				    (cmd, id, size, labeled, hs_id, key, dev,
24890Sstevel@tonic-gate 				    sblock));
24900Sstevel@tonic-gate 				rw_exit(&hsp_rwlp.lock);
24910Sstevel@tonic-gate 				return (err);
24920Sstevel@tonic-gate 			}
24930Sstevel@tonic-gate 		}
24940Sstevel@tonic-gate 		hot_spare_interface = md_get_named_service(NODEV64, ANY_SERVICE,
24950Sstevel@tonic-gate 		    "hot spare interface", 0);
24960Sstevel@tonic-gate 		rw_downgrade(&hsp_rwlp.lock);
24970Sstevel@tonic-gate 	}
24980Sstevel@tonic-gate 
24990Sstevel@tonic-gate 	if (hot_spare_interface == NULL) {
25000Sstevel@tonic-gate 		cmn_err(CE_WARN, "md: no hotspare interface");
25010Sstevel@tonic-gate 		rw_exit(&hsp_rwlp.lock);
25020Sstevel@tonic-gate 		return (0);
25030Sstevel@tonic-gate 	}
25040Sstevel@tonic-gate 
25050Sstevel@tonic-gate 	err = ((*hot_spare_interface)
25060Sstevel@tonic-gate 	    (cmd, id, size, labeled, hs_id, key, dev, sblock));
25070Sstevel@tonic-gate 	rw_exit(&hsp_rwlp.lock);
25080Sstevel@tonic-gate 	return (err);
25090Sstevel@tonic-gate }
25100Sstevel@tonic-gate 
25110Sstevel@tonic-gate void
md_clear_hot_spare_interface()25120Sstevel@tonic-gate md_clear_hot_spare_interface()
25130Sstevel@tonic-gate {
25140Sstevel@tonic-gate 	rw_enter(&hsp_rwlp.lock, RW_WRITER);
25150Sstevel@tonic-gate 	hot_spare_interface = NULL;
25160Sstevel@tonic-gate 	rw_exit(&hsp_rwlp.lock);
25170Sstevel@tonic-gate }
25180Sstevel@tonic-gate 
25190Sstevel@tonic-gate 
25200Sstevel@tonic-gate static intptr_t (*notify_interface)() = (intptr_t (*)())NULL;
25210Sstevel@tonic-gate 
25220Sstevel@tonic-gate int
md_notify_interface(md_event_cmds_t cmd,md_tags_t tag,set_t set,md_dev64_t dev,md_event_type_t event)25230Sstevel@tonic-gate md_notify_interface(
25240Sstevel@tonic-gate 	md_event_cmds_t cmd,
25250Sstevel@tonic-gate 	md_tags_t	tag,
25260Sstevel@tonic-gate 	set_t		set,
25270Sstevel@tonic-gate 	md_dev64_t	dev,
25280Sstevel@tonic-gate 	md_event_type_t event
25290Sstevel@tonic-gate )
25300Sstevel@tonic-gate {
25310Sstevel@tonic-gate 	int		err;
25320Sstevel@tonic-gate 
25330Sstevel@tonic-gate 	if (md_event_queue == NULL)
25340Sstevel@tonic-gate 		return (0);
25350Sstevel@tonic-gate 	rw_enter(&ni_rwlp.lock, RW_READER);
25360Sstevel@tonic-gate 	if (notify_interface == NULL) {
25370Sstevel@tonic-gate 		if (rw_tryupgrade(&ni_rwlp.lock) == 0) {
25380Sstevel@tonic-gate 			rw_exit(&ni_rwlp.lock);
25390Sstevel@tonic-gate 			rw_enter(&ni_rwlp.lock, RW_WRITER);
25400Sstevel@tonic-gate 			if (notify_interface != NULL) {
25410Sstevel@tonic-gate 				err = ((*notify_interface)
25420Sstevel@tonic-gate 				    (cmd, tag, set, dev, event));
25430Sstevel@tonic-gate 				rw_exit(&ni_rwlp.lock);
25440Sstevel@tonic-gate 				return (err);
25450Sstevel@tonic-gate 			}
25460Sstevel@tonic-gate 		}
25470Sstevel@tonic-gate 		notify_interface = md_get_named_service(NODEV64, ANY_SERVICE,
25480Sstevel@tonic-gate 		    "notify interface", 0);
25490Sstevel@tonic-gate 		rw_downgrade(&ni_rwlp.lock);
25500Sstevel@tonic-gate 	}
25510Sstevel@tonic-gate 	if (notify_interface == NULL) {
25520Sstevel@tonic-gate 		cmn_err(CE_WARN, "md: no notify interface");
25530Sstevel@tonic-gate 		rw_exit(&ni_rwlp.lock);
25540Sstevel@tonic-gate 		return (0);
25550Sstevel@tonic-gate 	}
25560Sstevel@tonic-gate 	err = ((*notify_interface)(cmd, tag, set, dev, event));
25570Sstevel@tonic-gate 	rw_exit(&ni_rwlp.lock);
25580Sstevel@tonic-gate 	return (err);
25590Sstevel@tonic-gate }
25600Sstevel@tonic-gate 
25610Sstevel@tonic-gate char *
obj2devname(uint32_t tag,uint_t setno,md_dev64_t dev)25620Sstevel@tonic-gate obj2devname(uint32_t tag, uint_t setno, md_dev64_t dev)
25630Sstevel@tonic-gate {
25640Sstevel@tonic-gate 	char		*setname;
25650Sstevel@tonic-gate 	char		name[MD_MAX_CTDLEN];
25660Sstevel@tonic-gate 	minor_t		mnum = md_getminor(dev);
25670Sstevel@tonic-gate 	major_t		maj = md_getmajor(dev);
25680Sstevel@tonic-gate 	int		rtn = 0;
25690Sstevel@tonic-gate 
25700Sstevel@tonic-gate 	/*
25710Sstevel@tonic-gate 	 * Verify that the passed dev_t refers to a valid metadevice.
25720Sstevel@tonic-gate 	 * If it doesn't we can make no assumptions as to what the device
25730Sstevel@tonic-gate 	 * name is. Return NULL in these cases.
25740Sstevel@tonic-gate 	 */
25750Sstevel@tonic-gate 	if (((maj != md_major) || (MD_MIN2UNIT(mnum) >= md_nunits)) ||
25760Sstevel@tonic-gate 	    (MD_MIN2SET(mnum) >= md_nsets)) {
25770Sstevel@tonic-gate 		return (NULL);
25780Sstevel@tonic-gate 	}
25790Sstevel@tonic-gate 
25800Sstevel@tonic-gate 	setname = NULL;
25810Sstevel@tonic-gate 	name[0] = '\0';
25820Sstevel@tonic-gate 	switch (tag) {
25830Sstevel@tonic-gate 	case SVM_TAG_HSP:
25840Sstevel@tonic-gate 		if (setno == 0) {
25850Sstevel@tonic-gate 			rtn = snprintf(name, sizeof (name), "hsp%u",
25860Sstevel@tonic-gate 			    (unsigned)MD_MIN2UNIT(mnum));
25870Sstevel@tonic-gate 		} else {
25880Sstevel@tonic-gate 			setname = mddb_getsetname(setno);
25890Sstevel@tonic-gate 			if (setname != NULL) {
25900Sstevel@tonic-gate 				rtn = snprintf(name, sizeof (name), "%s/hsp%u",
25910Sstevel@tonic-gate 				    setname, (unsigned)MD_MIN2UNIT(mnum));
25920Sstevel@tonic-gate 			}
25930Sstevel@tonic-gate 		}
25940Sstevel@tonic-gate 		break;
25950Sstevel@tonic-gate 	case SVM_TAG_DRIVE:
25960Sstevel@tonic-gate 		(void) sprintf(name, "drive");
25970Sstevel@tonic-gate 		break;
25980Sstevel@tonic-gate 	case SVM_TAG_HOST:
25990Sstevel@tonic-gate 		(void) sprintf(name, "host");
26000Sstevel@tonic-gate 		break;
26010Sstevel@tonic-gate 	case SVM_TAG_SET:
26020Sstevel@tonic-gate 		rtn = snprintf(name, sizeof (name), "%s",
26030Sstevel@tonic-gate 		    mddb_getsetname(setno));
26040Sstevel@tonic-gate 		if ((name[0] == '\0') || (rtn >= sizeof (name))) {
26050Sstevel@tonic-gate 			(void) sprintf(name, "diskset");
26060Sstevel@tonic-gate 			rtn = 0;
26070Sstevel@tonic-gate 		}
26080Sstevel@tonic-gate 		break;
26090Sstevel@tonic-gate 	default:
26100Sstevel@tonic-gate 		rtn = snprintf(name, sizeof (name), "%s", md_shortname(mnum));
26110Sstevel@tonic-gate 		break;
26120Sstevel@tonic-gate 	}
26130Sstevel@tonic-gate 
26140Sstevel@tonic-gate 	/* Check if we got any rubbish for any of the snprintf's */
26150Sstevel@tonic-gate 	if ((name[0] == '\0') || (rtn >= sizeof (name))) {
26160Sstevel@tonic-gate 		return (NULL);
26170Sstevel@tonic-gate 	}
26180Sstevel@tonic-gate 
26190Sstevel@tonic-gate 	return (md_strdup(name));
26200Sstevel@tonic-gate }
26210Sstevel@tonic-gate 
26220Sstevel@tonic-gate /* Sysevent subclass and mdnotify event type pairs */
26230Sstevel@tonic-gate struct node {
26240Sstevel@tonic-gate 	char		*se_ev;
26250Sstevel@tonic-gate 	md_event_type_t	md_ev;
26260Sstevel@tonic-gate };
26270Sstevel@tonic-gate 
26280Sstevel@tonic-gate /*
26290Sstevel@tonic-gate  * Table must be sorted in case sensitive ascending order of
26300Sstevel@tonic-gate  * the sysevents values
26310Sstevel@tonic-gate  */
26320Sstevel@tonic-gate static struct node ev_table[] = {
26330Sstevel@tonic-gate 	{ ESC_SVM_ADD,			EQ_ADD },
26340Sstevel@tonic-gate 	{ ESC_SVM_ATTACH,		EQ_ATTACH },
26350Sstevel@tonic-gate 	{ ESC_SVM_ATTACHING,		EQ_ATTACHING },
26360Sstevel@tonic-gate 	{ ESC_SVM_CHANGE,		EQ_CHANGE },
26370Sstevel@tonic-gate 	{ ESC_SVM_CREATE,		EQ_CREATE },
26380Sstevel@tonic-gate 	{ ESC_SVM_DELETE,		EQ_DELETE },
26390Sstevel@tonic-gate 	{ ESC_SVM_DETACH,		EQ_DETACH },
26400Sstevel@tonic-gate 	{ ESC_SVM_DETACHING,		EQ_DETACHING },
26410Sstevel@tonic-gate 	{ ESC_SVM_DRIVE_ADD,		EQ_DRIVE_ADD },
26420Sstevel@tonic-gate 	{ ESC_SVM_DRIVE_DELETE,		EQ_DRIVE_DELETE },
26430Sstevel@tonic-gate 	{ ESC_SVM_ENABLE,		EQ_ENABLE },
26440Sstevel@tonic-gate 	{ ESC_SVM_ERRED,		EQ_ERRED },
26450Sstevel@tonic-gate 	{ ESC_SVM_EXCHANGE,		EQ_EXCHANGE },
26460Sstevel@tonic-gate 	{ ESC_SVM_GROW,			EQ_GROW },
26470Sstevel@tonic-gate 	{ ESC_SVM_HS_CHANGED,		EQ_HS_CHANGED },
26480Sstevel@tonic-gate 	{ ESC_SVM_HS_FREED,		EQ_HS_FREED },
26490Sstevel@tonic-gate 	{ ESC_SVM_HOST_ADD,		EQ_HOST_ADD },
26500Sstevel@tonic-gate 	{ ESC_SVM_HOST_DELETE,		EQ_HOST_DELETE },
26510Sstevel@tonic-gate 	{ ESC_SVM_HOTSPARED,		EQ_HOTSPARED },
26520Sstevel@tonic-gate 	{ ESC_SVM_INIT_FAILED,		EQ_INIT_FAILED },
26530Sstevel@tonic-gate 	{ ESC_SVM_INIT_FATAL,		EQ_INIT_FATAL },
26540Sstevel@tonic-gate 	{ ESC_SVM_INIT_START,		EQ_INIT_START },
26550Sstevel@tonic-gate 	{ ESC_SVM_INIT_SUCCESS,		EQ_INIT_SUCCESS },
26560Sstevel@tonic-gate 	{ ESC_SVM_IOERR,		EQ_IOERR },
26570Sstevel@tonic-gate 	{ ESC_SVM_LASTERRED,		EQ_LASTERRED },
26580Sstevel@tonic-gate 	{ ESC_SVM_MEDIATOR_ADD,		EQ_MEDIATOR_ADD },
26590Sstevel@tonic-gate 	{ ESC_SVM_MEDIATOR_DELETE,	EQ_MEDIATOR_DELETE },
26600Sstevel@tonic-gate 	{ ESC_SVM_OFFLINE,		EQ_OFFLINE },
26610Sstevel@tonic-gate 	{ ESC_SVM_OK,			EQ_OK },
26620Sstevel@tonic-gate 	{ ESC_SVM_ONLINE,		EQ_ONLINE },
26630Sstevel@tonic-gate 	{ ESC_SVM_OPEN_FAIL,		EQ_OPEN_FAIL },
26640Sstevel@tonic-gate 	{ ESC_SVM_REGEN_DONE,		EQ_REGEN_DONE },
26650Sstevel@tonic-gate 	{ ESC_SVM_REGEN_FAILED,		EQ_REGEN_FAILED },
26660Sstevel@tonic-gate 	{ ESC_SVM_REGEN_START,		EQ_REGEN_START },
26670Sstevel@tonic-gate 	{ ESC_SVM_RELEASE,		EQ_RELEASE },
26680Sstevel@tonic-gate 	{ ESC_SVM_REMOVE,		EQ_REMOVE },
26690Sstevel@tonic-gate 	{ ESC_SVM_RENAME_DST,		EQ_RENAME_DST },
26700Sstevel@tonic-gate 	{ ESC_SVM_RENAME_SRC,		EQ_RENAME_SRC },
26710Sstevel@tonic-gate 	{ ESC_SVM_REPLACE,		EQ_REPLACE },
26720Sstevel@tonic-gate 	{ ESC_SVM_RESYNC_DONE,		EQ_RESYNC_DONE },
26730Sstevel@tonic-gate 	{ ESC_SVM_RESYNC_FAILED,	EQ_RESYNC_FAILED },
26740Sstevel@tonic-gate 	{ ESC_SVM_RESYNC_START,		EQ_RESYNC_START },
26750Sstevel@tonic-gate 	{ ESC_SVM_RESYNC_SUCCESS,	EQ_RESYNC_SUCCESS },
26760Sstevel@tonic-gate 	{ ESC_SVM_TAKEOVER,		EQ_TAKEOVER }
26770Sstevel@tonic-gate };
26780Sstevel@tonic-gate 
26790Sstevel@tonic-gate static md_tags_t md_tags[] = {
26800Sstevel@tonic-gate 	TAG_UNK,
26810Sstevel@tonic-gate 	TAG_METADEVICE,
26820Sstevel@tonic-gate 	TAG_UNK,
26830Sstevel@tonic-gate 	TAG_UNK,
26840Sstevel@tonic-gate 	TAG_UNK,
26850Sstevel@tonic-gate 	TAG_UNK,
26860Sstevel@tonic-gate 	TAG_REPLICA,
26870Sstevel@tonic-gate 	TAG_HSP,
26880Sstevel@tonic-gate 	TAG_HS,
26890Sstevel@tonic-gate 	TAG_SET,
26900Sstevel@tonic-gate 	TAG_DRIVE,
26910Sstevel@tonic-gate 	TAG_HOST,
26920Sstevel@tonic-gate 	TAG_MEDIATOR
26930Sstevel@tonic-gate };
26940Sstevel@tonic-gate 
26950Sstevel@tonic-gate md_event_type_t
ev_get(char * subclass)26960Sstevel@tonic-gate ev_get(char *subclass)
26970Sstevel@tonic-gate {
26980Sstevel@tonic-gate 	int	high, mid, low, p;
26990Sstevel@tonic-gate 
27000Sstevel@tonic-gate 	low = 0;
27010Sstevel@tonic-gate 	high = (sizeof (ev_table) / sizeof (ev_table[0])) - 1;
27020Sstevel@tonic-gate 	while (low <= high) {
27030Sstevel@tonic-gate 		mid = (high + low) / 2;
27040Sstevel@tonic-gate 		p = strcmp(subclass, ev_table[mid].se_ev);
27050Sstevel@tonic-gate 		if (p == 0) {
27060Sstevel@tonic-gate 			return (ev_table[mid].md_ev);
27070Sstevel@tonic-gate 		} else if (p < 0) {
27080Sstevel@tonic-gate 			high = mid - 1;
27090Sstevel@tonic-gate 		} else {
27100Sstevel@tonic-gate 			low = mid + 1;
27110Sstevel@tonic-gate 		}
27120Sstevel@tonic-gate 	}
27130Sstevel@tonic-gate 
27140Sstevel@tonic-gate 	return (EQ_EMPTY);
27150Sstevel@tonic-gate }
27160Sstevel@tonic-gate 
27170Sstevel@tonic-gate /*
27180Sstevel@tonic-gate  * Log mdnotify event
27190Sstevel@tonic-gate  */
27200Sstevel@tonic-gate void
do_mdnotify(char * se_subclass,uint32_t tag,set_t setno,md_dev64_t devid)27210Sstevel@tonic-gate do_mdnotify(char *se_subclass, uint32_t tag, set_t setno, md_dev64_t devid)
27220Sstevel@tonic-gate {
27230Sstevel@tonic-gate 	md_event_type_t	ev_type;
27240Sstevel@tonic-gate 	md_tags_t	md_tag;
27250Sstevel@tonic-gate 
27260Sstevel@tonic-gate 	/* Translate sysevent into mdnotify event */
27270Sstevel@tonic-gate 	ev_type = ev_get(se_subclass);
27280Sstevel@tonic-gate 
27290Sstevel@tonic-gate 	if (tag >= (sizeof (md_tags) / sizeof (md_tags[0]))) {
27300Sstevel@tonic-gate 		md_tag = TAG_UNK;
27310Sstevel@tonic-gate 	} else {
27320Sstevel@tonic-gate 		md_tag = md_tags[tag];
27330Sstevel@tonic-gate 	}
27340Sstevel@tonic-gate 
27350Sstevel@tonic-gate 	NOTIFY_MD(md_tag, setno, devid, ev_type);
27360Sstevel@tonic-gate }
27370Sstevel@tonic-gate 
27380Sstevel@tonic-gate /*
27390Sstevel@tonic-gate  * Log SVM sys events
27400Sstevel@tonic-gate  */
27410Sstevel@tonic-gate void
svm_gen_sysevent(char * se_class,char * se_subclass,uint32_t tag,set_t setno,md_dev64_t devid)27420Sstevel@tonic-gate svm_gen_sysevent(
27430Sstevel@tonic-gate 	char		*se_class,
27440Sstevel@tonic-gate 	char		*se_subclass,
27450Sstevel@tonic-gate 	uint32_t	tag,
27460Sstevel@tonic-gate 	set_t		setno,
27470Sstevel@tonic-gate 	md_dev64_t	devid
27480Sstevel@tonic-gate )
27490Sstevel@tonic-gate {
27500Sstevel@tonic-gate 	nvlist_t		*attr_list;
27510Sstevel@tonic-gate 	sysevent_id_t		eid;
27520Sstevel@tonic-gate 	int			err = DDI_SUCCESS;
27530Sstevel@tonic-gate 	char			*devname;
27540Sstevel@tonic-gate 	extern dev_info_t	*md_devinfo;
27550Sstevel@tonic-gate 
27560Sstevel@tonic-gate 	/* Raise the mdnotify event before anything else */
27570Sstevel@tonic-gate 	do_mdnotify(se_subclass, tag, setno, devid);
27580Sstevel@tonic-gate 
27590Sstevel@tonic-gate 	if (md_devinfo == NULL) {
27600Sstevel@tonic-gate 		return;
27610Sstevel@tonic-gate 	}
27620Sstevel@tonic-gate 
27630Sstevel@tonic-gate 	err = nvlist_alloc(&attr_list, NV_UNIQUE_NAME, KM_NOSLEEP);
27640Sstevel@tonic-gate 
27650Sstevel@tonic-gate 	if (err == DDI_SUCCESS) {
27660Sstevel@tonic-gate 		/* Add the version numver */
27670Sstevel@tonic-gate 		err = nvlist_add_uint32(attr_list, SVM_VERSION_NO,
27680Sstevel@tonic-gate 		    (uint32_t)SVM_VERSION);
27690Sstevel@tonic-gate 		if (err != DDI_SUCCESS) {
27700Sstevel@tonic-gate 			goto fail;
27710Sstevel@tonic-gate 		}
27720Sstevel@tonic-gate 
27730Sstevel@tonic-gate 		/* Add the tag attribute */
27740Sstevel@tonic-gate 		err = nvlist_add_uint32(attr_list, SVM_TAG, (uint32_t)tag);
27750Sstevel@tonic-gate 		if (err != DDI_SUCCESS) {
27760Sstevel@tonic-gate 			goto fail;
27770Sstevel@tonic-gate 		}
27780Sstevel@tonic-gate 
27790Sstevel@tonic-gate 		/* Add the set number attribute */
27800Sstevel@tonic-gate 		err = nvlist_add_uint32(attr_list, SVM_SET_NO, (uint32_t)setno);
27810Sstevel@tonic-gate 		if (err != DDI_SUCCESS) {
27820Sstevel@tonic-gate 			goto fail;
27830Sstevel@tonic-gate 		}
27840Sstevel@tonic-gate 
27850Sstevel@tonic-gate 		/* Add the device id attribute */
27860Sstevel@tonic-gate 		err = nvlist_add_uint64(attr_list, SVM_DEV_ID, (uint64_t)devid);
27870Sstevel@tonic-gate 		if (err != DDI_SUCCESS) {
27880Sstevel@tonic-gate 			goto fail;
27890Sstevel@tonic-gate 		}
27900Sstevel@tonic-gate 
27910Sstevel@tonic-gate 		/* Add the device name attribute */
27920Sstevel@tonic-gate 		devname = obj2devname(tag, setno, devid);
27930Sstevel@tonic-gate 		if (devname != NULL) {
27940Sstevel@tonic-gate 			err = nvlist_add_string(attr_list, SVM_DEV_NAME,
27950Sstevel@tonic-gate 			    devname);
27960Sstevel@tonic-gate 			freestr(devname);
27970Sstevel@tonic-gate 		} else {
27980Sstevel@tonic-gate 			err = nvlist_add_string(attr_list, SVM_DEV_NAME,
27990Sstevel@tonic-gate 			    "unspecified");
28000Sstevel@tonic-gate 		}
28010Sstevel@tonic-gate 		if (err != DDI_SUCCESS) {
28020Sstevel@tonic-gate 			goto fail;
28030Sstevel@tonic-gate 		}
28040Sstevel@tonic-gate 
28050Sstevel@tonic-gate 		/* Attempt to post event */
28060Sstevel@tonic-gate 		err = ddi_log_sysevent(md_devinfo, DDI_VENDOR_SUNW, se_class,
28070Sstevel@tonic-gate 		    se_subclass, attr_list, &eid, DDI_SLEEP);
28080Sstevel@tonic-gate 
28090Sstevel@tonic-gate 		nvlist_free(attr_list);
28100Sstevel@tonic-gate 		if (err != DDI_SUCCESS) {
28110Sstevel@tonic-gate 			cmn_err(CE_WARN, "Failed to log event for %s, %s,"
28120Sstevel@tonic-gate 			    " err=%x", se_class, se_subclass, err);
28130Sstevel@tonic-gate 		}
28140Sstevel@tonic-gate 	}
28150Sstevel@tonic-gate 
28160Sstevel@tonic-gate 	return;
28170Sstevel@tonic-gate 
28180Sstevel@tonic-gate fail:
28190Sstevel@tonic-gate 	nvlist_free(attr_list);
28200Sstevel@tonic-gate 	cmn_err(CE_WARN, "Failed to setup attributes for event %s, %s, err=%x",
28210Sstevel@tonic-gate 	    se_class, se_subclass, err);
28220Sstevel@tonic-gate }
28230Sstevel@tonic-gate 
28240Sstevel@tonic-gate void
md_clear_named_service()28250Sstevel@tonic-gate md_clear_named_service()
28260Sstevel@tonic-gate {
28270Sstevel@tonic-gate 	rw_enter(&ni_rwlp.lock, RW_WRITER);
28280Sstevel@tonic-gate 	notify_interface = NULL;
28290Sstevel@tonic-gate 	rw_exit(&ni_rwlp.lock);
28300Sstevel@tonic-gate }
28310Sstevel@tonic-gate 
28320Sstevel@tonic-gate void
md_create_unit_incore(minor_t mnum,md_ops_t * ops,int alloc_lock)28330Sstevel@tonic-gate md_create_unit_incore(minor_t mnum, md_ops_t *ops, int alloc_lock)
28340Sstevel@tonic-gate {
28350Sstevel@tonic-gate 	mdi_unit_t	*ui;
28360Sstevel@tonic-gate 	set_t		setno = MD_MIN2SET(mnum);
28370Sstevel@tonic-gate 
28380Sstevel@tonic-gate 	ui = (mdi_unit_t *)kmem_zalloc(sizeof (mdi_unit_t), KM_SLEEP);
28390Sstevel@tonic-gate 	ui->ui_opsindex = ops->md_selfindex;
28400Sstevel@tonic-gate 
28410Sstevel@tonic-gate 	/* initialize all the incore conditional variables */
28420Sstevel@tonic-gate 	mutex_init(&ui->ui_mx, NULL, MUTEX_DEFAULT, NULL);
28430Sstevel@tonic-gate 	cv_init(&ui->ui_cv, NULL, CV_DEFAULT, NULL);
28440Sstevel@tonic-gate 
28458452SJohn.Wren.Kennedy@Sun.COM 	if (alloc_lock) {
28468452SJohn.Wren.Kennedy@Sun.COM 		ui->ui_io_lock = kmem_zalloc(sizeof (md_io_lock_t), KM_SLEEP);
28478452SJohn.Wren.Kennedy@Sun.COM 		mutex_init(&ui->ui_io_lock->io_mx, NULL, MUTEX_DEFAULT, NULL);
28488452SJohn.Wren.Kennedy@Sun.COM 		cv_init(&ui->ui_io_lock->io_cv, NULL, CV_DEFAULT, NULL);
28498452SJohn.Wren.Kennedy@Sun.COM 		mutex_init(&ui->ui_io_lock->io_list_mutex, NULL,
28508452SJohn.Wren.Kennedy@Sun.COM 		    MUTEX_DEFAULT, NULL);
28518452SJohn.Wren.Kennedy@Sun.COM 		ui->ui_io_lock->io_list_front = NULL;
28528452SJohn.Wren.Kennedy@Sun.COM 		ui->ui_io_lock->io_list_back = NULL;
28538452SJohn.Wren.Kennedy@Sun.COM 	}
28540Sstevel@tonic-gate 	if (! (md_get_setstatus(setno) & MD_SET_SNARFING)) {
28550Sstevel@tonic-gate 		rw_enter(&md_unit_array_rw.lock, RW_WRITER);
28560Sstevel@tonic-gate 		MDI_VOIDUNIT(mnum) = (void *) ui;
28570Sstevel@tonic-gate 		rw_exit(&md_unit_array_rw.lock);
28580Sstevel@tonic-gate 	} else
28590Sstevel@tonic-gate 		MDI_VOIDUNIT(mnum) = (void *) ui;
28600Sstevel@tonic-gate 
28610Sstevel@tonic-gate 	rw_enter(&ops->md_link_rw.lock, RW_WRITER);
28620Sstevel@tonic-gate 	ui->ui_link.ln_next = ops->md_head;
28630Sstevel@tonic-gate 	ui->ui_link.ln_setno = setno;
28640Sstevel@tonic-gate 	ui->ui_link.ln_id = mnum;
28650Sstevel@tonic-gate 	ops->md_head = &ui->ui_link;
28660Sstevel@tonic-gate 	/* setup the unavailable field */
28670Sstevel@tonic-gate #if defined(_ILP32)
28681623Stw21770 	if (((md_unit_t *)MD_UNIT(mnum))->c.un_revision & MD_64BIT_META_DEV) {
28690Sstevel@tonic-gate 		ui->ui_tstate |= MD_64MD_ON_32KERNEL;
28700Sstevel@tonic-gate 		cmn_err(CE_NOTE, "d%d is unavailable because 64 bit "
28710Sstevel@tonic-gate 		    "metadevices are not accessible on a 32 bit kernel",
28720Sstevel@tonic-gate 		    mnum);
28730Sstevel@tonic-gate 	}
28740Sstevel@tonic-gate #endif
28750Sstevel@tonic-gate 
28760Sstevel@tonic-gate 	rw_exit(&ops->md_link_rw.lock);
28770Sstevel@tonic-gate }
28780Sstevel@tonic-gate 
28790Sstevel@tonic-gate void
md_destroy_unit_incore(minor_t mnum,md_ops_t * ops)28800Sstevel@tonic-gate md_destroy_unit_incore(minor_t mnum, md_ops_t *ops)
28810Sstevel@tonic-gate {
28820Sstevel@tonic-gate 	mdi_unit_t	*ui;
28830Sstevel@tonic-gate 
28840Sstevel@tonic-gate 	/*
28850Sstevel@tonic-gate 	 * ASSUMPTION: md_unit_array_rw WRITER lock is held.
28860Sstevel@tonic-gate 	 */
28870Sstevel@tonic-gate 	ui = MDI_UNIT(mnum);
28880Sstevel@tonic-gate 	if (ui == NULL)
28890Sstevel@tonic-gate 		return;
28900Sstevel@tonic-gate 
28910Sstevel@tonic-gate 	md_rem_link(MD_MIN2SET(mnum), mnum, &ops->md_link_rw.lock,
28920Sstevel@tonic-gate 	    &ops->md_head);
28930Sstevel@tonic-gate 
28940Sstevel@tonic-gate 	/* destroy the io lock if one is being used */
28950Sstevel@tonic-gate 	if (ui->ui_io_lock) {
28960Sstevel@tonic-gate 		mutex_destroy(&ui->ui_io_lock->io_mx);
28970Sstevel@tonic-gate 		cv_destroy(&ui->ui_io_lock->io_cv);
28980Sstevel@tonic-gate 		kmem_free(ui->ui_io_lock, sizeof (md_io_lock_t));
28990Sstevel@tonic-gate 	}
29000Sstevel@tonic-gate 
29010Sstevel@tonic-gate 	/* teardown kstat */
29020Sstevel@tonic-gate 	md_kstat_destroy(mnum);
29030Sstevel@tonic-gate 
29040Sstevel@tonic-gate 	/* destroy all the incore conditional variables */
29050Sstevel@tonic-gate 	mutex_destroy(&ui->ui_mx);
29060Sstevel@tonic-gate 	cv_destroy(&ui->ui_cv);
29070Sstevel@tonic-gate 
29080Sstevel@tonic-gate 	kmem_free(ui, sizeof (mdi_unit_t));
29090Sstevel@tonic-gate 	MDI_VOIDUNIT(mnum) = (void *) NULL;
29100Sstevel@tonic-gate }
29110Sstevel@tonic-gate 
29120Sstevel@tonic-gate void
md_rem_names(sv_dev_t * sv,int nsv)29130Sstevel@tonic-gate md_rem_names(sv_dev_t *sv, int nsv)
29140Sstevel@tonic-gate {
29150Sstevel@tonic-gate 	int	i, s;
29160Sstevel@tonic-gate 	int	max_sides;
29170Sstevel@tonic-gate 
29180Sstevel@tonic-gate 	if (nsv == 0)
29190Sstevel@tonic-gate 		return;
29200Sstevel@tonic-gate 
29210Sstevel@tonic-gate 	/* All entries removed are in the same diskset */
29220Sstevel@tonic-gate 	if (md_get_setstatus(sv[0].setno) & MD_SET_MNSET)
29230Sstevel@tonic-gate 		max_sides = MD_MNMAXSIDES;
29240Sstevel@tonic-gate 	else
29250Sstevel@tonic-gate 		max_sides = MD_MAXSIDES;
29260Sstevel@tonic-gate 
29270Sstevel@tonic-gate 	for (i = 0; i < nsv; i++)
29280Sstevel@tonic-gate 		for (s = 0; s < max_sides; s++)
29290Sstevel@tonic-gate 			(void) md_remdevname(sv[i].setno, s, sv[i].key);
29300Sstevel@tonic-gate }
29310Sstevel@tonic-gate 
29320Sstevel@tonic-gate /*
29330Sstevel@tonic-gate  * Checking user args before we get into physio - returns 0 for ok, else errno
29340Sstevel@tonic-gate  * We do a lot of checking against illegal arguments here because some of the
29350Sstevel@tonic-gate  * real disk drivers don't like certain kinds of arguments. (e.g xy doesn't
29360Sstevel@tonic-gate  * like odd address user buffer.) Those drivers capture bad arguments in
29370Sstevel@tonic-gate  * xxread and xxwrite. But since meta-driver calls their strategy routines
29380Sstevel@tonic-gate  * directly, two bad scenario might happen:
29390Sstevel@tonic-gate  *	1. the real strategy doesn't like it and panic.
29400Sstevel@tonic-gate  *	2. the real strategy doesn't like it and set B_ERROR.
29410Sstevel@tonic-gate  *
29420Sstevel@tonic-gate  * The second case is no better than the first one, since the meta-driver
29430Sstevel@tonic-gate  * will treat it as a media-error and off line the mirror metapartition.
29440Sstevel@tonic-gate  * (Too bad there is no way to tell what error it is.)
29450Sstevel@tonic-gate  *
29460Sstevel@tonic-gate  */
29470Sstevel@tonic-gate int
md_chk_uio(struct uio * uio)29480Sstevel@tonic-gate md_chk_uio(struct uio *uio)
29490Sstevel@tonic-gate {
29500Sstevel@tonic-gate 	int	i;
29510Sstevel@tonic-gate 	struct iovec *iov;
29520Sstevel@tonic-gate 
29530Sstevel@tonic-gate 	/*
29540Sstevel@tonic-gate 	 * Check for negative or not block-aligned offset
29550Sstevel@tonic-gate 	 */
29560Sstevel@tonic-gate 	if ((uio->uio_loffset < 0) ||
29570Sstevel@tonic-gate 	    ((uio->uio_loffset & (DEV_BSIZE - 1)) != 0)) {
29580Sstevel@tonic-gate 		return (EINVAL);
29590Sstevel@tonic-gate 	}
29600Sstevel@tonic-gate 	iov = uio->uio_iov;
29610Sstevel@tonic-gate 	i = uio->uio_iovcnt;
29620Sstevel@tonic-gate 
29630Sstevel@tonic-gate 	while (i--) {
29640Sstevel@tonic-gate 		if ((iov->iov_len & (DEV_BSIZE - 1)) != 0)
29650Sstevel@tonic-gate 			return (EINVAL);
29660Sstevel@tonic-gate 		/*
29670Sstevel@tonic-gate 		 * Bug # 1212146
29680Sstevel@tonic-gate 		 * The default is to not check alignment, but we can now check
29690Sstevel@tonic-gate 		 * for a larger number of alignments if desired.
29700Sstevel@tonic-gate 		 */
29710Sstevel@tonic-gate 		if ((uintptr_t)(iov->iov_base) & md_uio_alignment_mask)
29720Sstevel@tonic-gate 			return (EINVAL);
29730Sstevel@tonic-gate 		iov++;
29740Sstevel@tonic-gate 	}
29750Sstevel@tonic-gate 	return (0);
29760Sstevel@tonic-gate }
29770Sstevel@tonic-gate 
29780Sstevel@tonic-gate char *
md_shortname(minor_t mnum)29790Sstevel@tonic-gate md_shortname(
29800Sstevel@tonic-gate 	minor_t		mnum
29810Sstevel@tonic-gate )
29820Sstevel@tonic-gate {
29831623Stw21770 	static char	buf[MAXPATHLEN];
29841623Stw21770 	char		*devname;
29851623Stw21770 	char		*invalid = " (Invalid minor number %u) ";
29861623Stw21770 	char		*metaname;
29871623Stw21770 	mdc_unit_t	*un;
29881623Stw21770 	side_t		side;
29890Sstevel@tonic-gate 	set_t		setno = MD_MIN2SET(mnum);
29900Sstevel@tonic-gate 	unit_t		unit = MD_MIN2UNIT(mnum);
29910Sstevel@tonic-gate 
29921623Stw21770 	if ((un = MD_UNIT(mnum)) == NULL) {
29931623Stw21770 		(void) snprintf(buf, sizeof (buf), invalid, mnum);
29941623Stw21770 		return (buf);
29951623Stw21770 	}
29961623Stw21770 
29971623Stw21770 	/*
29981623Stw21770 	 * If unit is not a friendly name unit, derive the name from the
29991623Stw21770 	 * minor number.
30001623Stw21770 	 */
30011623Stw21770 	if ((un->un_revision & MD_FN_META_DEV) == 0) {
30021623Stw21770 		/* This is a traditional metadevice */
30031623Stw21770 		if (setno == MD_LOCAL_SET) {
30041623Stw21770 			(void) snprintf(buf, sizeof (buf), "d%u",
30057563SPrasad.Singamsetty@Sun.COM 			    (unsigned)unit);
30061623Stw21770 		} else {
30071623Stw21770 			(void) snprintf(buf, sizeof (buf), "%s/d%u",
30081623Stw21770 			    mddb_getsetname(setno), (unsigned)unit);
30091623Stw21770 		}
30100Sstevel@tonic-gate 		return (buf);
30110Sstevel@tonic-gate 	}
30120Sstevel@tonic-gate 
30131623Stw21770 	/*
30141623Stw21770 	 * It is a friendly name metadevice, so we need to get its name.
30151623Stw21770 	 */
30161623Stw21770 	side = mddb_getsidenum(setno);
30171623Stw21770 	devname = (char *)kmem_alloc(MAXPATHLEN, KM_SLEEP);
30181623Stw21770 	if (md_getdevname(setno, side, MD_KEYWILD,
30197563SPrasad.Singamsetty@Sun.COM 	    md_makedevice(md_major, mnum), devname, MAXPATHLEN) == 0) {
30201623Stw21770 		/*
30211623Stw21770 		 * md_getdevname has given us either /dev/md/dsk/<metaname>
30221623Stw21770 		 * or /dev/md/<setname>/dsk/<metname> depending on whether
30231623Stw21770 		 * or not we are in the local set.  Thus, we'll pull the
30241623Stw21770 		 * metaname from this string.
30251623Stw21770 		 */
30261623Stw21770 		if ((metaname = strrchr(devname, '/')) == NULL) {
30271623Stw21770 			(void) snprintf(buf, sizeof (buf), invalid, mnum);
30281623Stw21770 			goto out;
30291623Stw21770 		}
30301623Stw21770 		metaname++;	/* move past slash */
30311623Stw21770 		if (setno == MD_LOCAL_SET) {
30321623Stw21770 			/* No set name. */
30331623Stw21770 			(void) snprintf(buf, sizeof (buf), "%s", metaname);
30341623Stw21770 		} else {
30351623Stw21770 			/* Include setname */
30361623Stw21770 			(void) snprintf(buf, sizeof (buf), "%s/%s",
30377563SPrasad.Singamsetty@Sun.COM 			    mddb_getsetname(setno), metaname);
30381623Stw21770 		}
30390Sstevel@tonic-gate 	} else {
30401623Stw21770 		/* We couldn't find the name. */
30411623Stw21770 		(void) snprintf(buf, sizeof (buf), invalid, mnum);
30420Sstevel@tonic-gate 	}
30430Sstevel@tonic-gate 
30441623Stw21770 out:
30451623Stw21770 	kmem_free(devname, MAXPATHLEN);
30460Sstevel@tonic-gate 	return (buf);
30470Sstevel@tonic-gate }
30480Sstevel@tonic-gate 
30490Sstevel@tonic-gate char *
md_devname(set_t setno,md_dev64_t dev,char * buf,size_t size)30500Sstevel@tonic-gate md_devname(
30510Sstevel@tonic-gate 	set_t		setno,
30520Sstevel@tonic-gate 	md_dev64_t	dev,
30530Sstevel@tonic-gate 	char		*buf,
30540Sstevel@tonic-gate 	size_t		size
30550Sstevel@tonic-gate )
30560Sstevel@tonic-gate {
30570Sstevel@tonic-gate 	static char	mybuf[MD_MAX_CTDLEN];
30580Sstevel@tonic-gate 	int		err;
30590Sstevel@tonic-gate 
30600Sstevel@tonic-gate 	if (buf == NULL) {
30610Sstevel@tonic-gate 		buf = mybuf;
30620Sstevel@tonic-gate 		size = sizeof (mybuf);
30630Sstevel@tonic-gate 	} else {
30640Sstevel@tonic-gate 		ASSERT(size >= MD_MAX_CTDLEN);
30650Sstevel@tonic-gate 	}
30660Sstevel@tonic-gate 
30674491Sjmf 	err = md_getdevname_common(setno, mddb_getsidenum(setno),
30687563SPrasad.Singamsetty@Sun.COM 	    0, dev, buf, size, MD_NOWAIT_LOCK);
30690Sstevel@tonic-gate 	if (err) {
30700Sstevel@tonic-gate 		if (err == ENOENT) {
30710Sstevel@tonic-gate 			(void) sprintf(buf, "(Unavailable)");
30720Sstevel@tonic-gate 		} else {
30730Sstevel@tonic-gate 			(void) sprintf(buf, "(%u.%u)",
30740Sstevel@tonic-gate 			    md_getmajor(dev), md_getminor(dev));
30750Sstevel@tonic-gate 		}
30760Sstevel@tonic-gate 	}
30770Sstevel@tonic-gate 
30780Sstevel@tonic-gate 	return (buf);
30790Sstevel@tonic-gate }
30800Sstevel@tonic-gate void
md_minphys(buf_t * pb)30810Sstevel@tonic-gate md_minphys(buf_t *pb)
30820Sstevel@tonic-gate {
30830Sstevel@tonic-gate 	extern unsigned md_maxbcount;
30840Sstevel@tonic-gate 
30850Sstevel@tonic-gate 	if (pb->b_bcount > md_maxbcount)
30860Sstevel@tonic-gate 		pb->b_bcount = md_maxbcount;
30870Sstevel@tonic-gate }
30880Sstevel@tonic-gate 
30890Sstevel@tonic-gate void
md_bioinit(struct buf * bp)30900Sstevel@tonic-gate md_bioinit(struct buf *bp)
30910Sstevel@tonic-gate {
30920Sstevel@tonic-gate 	ASSERT(bp);
30930Sstevel@tonic-gate 
30940Sstevel@tonic-gate 	bioinit(bp);
30950Sstevel@tonic-gate 	bp->b_back = bp;
30960Sstevel@tonic-gate 	bp->b_forw = bp;
30970Sstevel@tonic-gate 	bp->b_flags = B_BUSY;	/* initialize flags */
30980Sstevel@tonic-gate }
30990Sstevel@tonic-gate 
31000Sstevel@tonic-gate void
md_bioreset(struct buf * bp)31010Sstevel@tonic-gate md_bioreset(struct buf *bp)
31020Sstevel@tonic-gate {
31030Sstevel@tonic-gate 	ASSERT(bp);
31040Sstevel@tonic-gate 
31050Sstevel@tonic-gate 	bioreset(bp);
31060Sstevel@tonic-gate 	bp->b_back = bp;
31070Sstevel@tonic-gate 	bp->b_forw = bp;
31080Sstevel@tonic-gate 	bp->b_flags = B_BUSY;	/* initialize flags */
31090Sstevel@tonic-gate }
31100Sstevel@tonic-gate 
31110Sstevel@tonic-gate /*
31120Sstevel@tonic-gate  * md_bioclone is needed as long as the real bioclone only takes a daddr_t
31130Sstevel@tonic-gate  * as block number.
31140Sstevel@tonic-gate  * We simply call bioclone with all input parameters but blkno, and set the
31150Sstevel@tonic-gate  * correct blkno afterwards.
31160Sstevel@tonic-gate  * Caveat Emptor: bp_mem must not be NULL!
31170Sstevel@tonic-gate  */
31180Sstevel@tonic-gate buf_t *
md_bioclone(buf_t * bp,off_t off,size_t len,dev_t dev,diskaddr_t blkno,int (* iodone)(buf_t *),buf_t * bp_mem,int sleep)31190Sstevel@tonic-gate md_bioclone(buf_t *bp, off_t off, size_t len, dev_t dev, diskaddr_t blkno,
31200Sstevel@tonic-gate 		int (*iodone)(buf_t *), buf_t *bp_mem, int sleep)
31210Sstevel@tonic-gate {
31220Sstevel@tonic-gate 	(void) bioclone(bp, off, len, dev, 0, iodone, bp_mem, sleep);
31230Sstevel@tonic-gate 	bp_mem->b_lblkno = blkno;
31240Sstevel@tonic-gate 	return (bp_mem);
31250Sstevel@tonic-gate }
31260Sstevel@tonic-gate 
31270Sstevel@tonic-gate 
31280Sstevel@tonic-gate /*
31290Sstevel@tonic-gate  * kstat stuff
31300Sstevel@tonic-gate  */
31310Sstevel@tonic-gate void
md_kstat_init_ui(minor_t mnum,mdi_unit_t * ui)31320Sstevel@tonic-gate md_kstat_init_ui(
31330Sstevel@tonic-gate 	minor_t		 mnum,
31340Sstevel@tonic-gate 	mdi_unit_t	*ui
31350Sstevel@tonic-gate )
31360Sstevel@tonic-gate {
31370Sstevel@tonic-gate 	if ((ui != NULL) && (ui->ui_kstat == NULL)) {
31380Sstevel@tonic-gate 		set_t	setno = MD_MIN2SET(mnum);
31390Sstevel@tonic-gate 		unit_t  unit = MD_MIN2UNIT(mnum);
31400Sstevel@tonic-gate 		char	module[KSTAT_STRLEN];
31410Sstevel@tonic-gate 		char	*p = module;
31420Sstevel@tonic-gate 
31430Sstevel@tonic-gate 		if (setno != MD_LOCAL_SET) {
31440Sstevel@tonic-gate 			char	buf[64];
31450Sstevel@tonic-gate 			char	*s = buf;
31460Sstevel@tonic-gate 			char	*e = module + sizeof (module) - 4;
31470Sstevel@tonic-gate 
31480Sstevel@tonic-gate 			(void) sprintf(buf, "%u", setno);
31490Sstevel@tonic-gate 			while ((p < e) && (*s != '\0'))
31500Sstevel@tonic-gate 				*p++ = *s++;
31510Sstevel@tonic-gate 			*p++ = '/';
31520Sstevel@tonic-gate 		}
31530Sstevel@tonic-gate 		*p++ = 'm';
31540Sstevel@tonic-gate 		*p++ = 'd';
31550Sstevel@tonic-gate 		*p = '\0';
31560Sstevel@tonic-gate 		if ((ui->ui_kstat = kstat_create(module, unit, NULL, "disk",
31570Sstevel@tonic-gate 		    KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) != NULL) {
31580Sstevel@tonic-gate 			ui->ui_kstat->ks_lock = &ui->ui_mx;
31590Sstevel@tonic-gate 			kstat_install(ui->ui_kstat);
31600Sstevel@tonic-gate 		}
31610Sstevel@tonic-gate 	}
31620Sstevel@tonic-gate }
31630Sstevel@tonic-gate 
31640Sstevel@tonic-gate void
md_kstat_init(minor_t mnum)31650Sstevel@tonic-gate md_kstat_init(
31660Sstevel@tonic-gate 	minor_t		mnum
31670Sstevel@tonic-gate )
31680Sstevel@tonic-gate {
31690Sstevel@tonic-gate 	md_kstat_init_ui(mnum, MDI_UNIT(mnum));
31700Sstevel@tonic-gate }
31710Sstevel@tonic-gate 
31720Sstevel@tonic-gate void
md_kstat_destroy_ui(mdi_unit_t * ui)31730Sstevel@tonic-gate md_kstat_destroy_ui(
31740Sstevel@tonic-gate 	mdi_unit_t	*ui
31750Sstevel@tonic-gate )
31760Sstevel@tonic-gate {
31770Sstevel@tonic-gate 	/*
31780Sstevel@tonic-gate 	 * kstat_delete() interface has it's own locking mechanism and
31790Sstevel@tonic-gate 	 * does not allow holding of kstat lock (ks_lock).
31800Sstevel@tonic-gate 	 * Note: ks_lock == ui_mx from the md_kstat_init_ui().
31810Sstevel@tonic-gate 	 */
31820Sstevel@tonic-gate 	if ((ui != NULL) && (ui->ui_kstat != NULL)) {
31830Sstevel@tonic-gate 		kstat_delete(ui->ui_kstat);
31840Sstevel@tonic-gate 		ui->ui_kstat = NULL;
31850Sstevel@tonic-gate 	}
31860Sstevel@tonic-gate }
31870Sstevel@tonic-gate 
31880Sstevel@tonic-gate void
md_kstat_destroy(minor_t mnum)31890Sstevel@tonic-gate md_kstat_destroy(
31900Sstevel@tonic-gate 	minor_t		mnum
31910Sstevel@tonic-gate )
31920Sstevel@tonic-gate {
31930Sstevel@tonic-gate 	md_kstat_destroy_ui(MDI_UNIT(mnum));
31940Sstevel@tonic-gate }
31950Sstevel@tonic-gate 
31960Sstevel@tonic-gate /*
31970Sstevel@tonic-gate  * In the following subsequent routines, locks are held before checking the
31980Sstevel@tonic-gate  * validity of ui_kstat. This is done to make sure that we don't trip over
31990Sstevel@tonic-gate  * a NULL ui_kstat anymore.
32000Sstevel@tonic-gate  */
32010Sstevel@tonic-gate 
32020Sstevel@tonic-gate void
md_kstat_waitq_enter(mdi_unit_t * ui)32030Sstevel@tonic-gate md_kstat_waitq_enter(
32040Sstevel@tonic-gate 	mdi_unit_t	*ui
32050Sstevel@tonic-gate )
32060Sstevel@tonic-gate {
32070Sstevel@tonic-gate 	mutex_enter(&ui->ui_mx);
32080Sstevel@tonic-gate 	if (ui->ui_kstat != NULL)
32090Sstevel@tonic-gate 		kstat_waitq_enter(KSTAT_IO_PTR(ui->ui_kstat));
32100Sstevel@tonic-gate 	mutex_exit(&ui->ui_mx);
32110Sstevel@tonic-gate }
32120Sstevel@tonic-gate 
32130Sstevel@tonic-gate void
md_kstat_waitq_to_runq(mdi_unit_t * ui)32140Sstevel@tonic-gate md_kstat_waitq_to_runq(
32150Sstevel@tonic-gate 	mdi_unit_t	*ui
32160Sstevel@tonic-gate )
32170Sstevel@tonic-gate {
32180Sstevel@tonic-gate 	mutex_enter(&ui->ui_mx);
32190Sstevel@tonic-gate 	if (ui->ui_kstat != NULL)
32200Sstevel@tonic-gate 		kstat_waitq_to_runq(KSTAT_IO_PTR(ui->ui_kstat));
32210Sstevel@tonic-gate 	mutex_exit(&ui->ui_mx);
32220Sstevel@tonic-gate }
32230Sstevel@tonic-gate 
32240Sstevel@tonic-gate void
md_kstat_waitq_exit(mdi_unit_t * ui)32250Sstevel@tonic-gate md_kstat_waitq_exit(
32260Sstevel@tonic-gate 	mdi_unit_t	*ui
32270Sstevel@tonic-gate )
32280Sstevel@tonic-gate {
32290Sstevel@tonic-gate 	mutex_enter(&ui->ui_mx);
32300Sstevel@tonic-gate 	if (ui->ui_kstat != NULL)
32310Sstevel@tonic-gate 		kstat_waitq_exit(KSTAT_IO_PTR(ui->ui_kstat));
32320Sstevel@tonic-gate 	mutex_exit(&ui->ui_mx);
32330Sstevel@tonic-gate }
32340Sstevel@tonic-gate 
32350Sstevel@tonic-gate void
md_kstat_runq_enter(mdi_unit_t * ui)32360Sstevel@tonic-gate md_kstat_runq_enter(
32370Sstevel@tonic-gate 	mdi_unit_t	*ui
32380Sstevel@tonic-gate )
32390Sstevel@tonic-gate {
32400Sstevel@tonic-gate 	mutex_enter(&ui->ui_mx);
32410Sstevel@tonic-gate 	if (ui->ui_kstat != NULL)
32420Sstevel@tonic-gate 		kstat_runq_enter(KSTAT_IO_PTR(ui->ui_kstat));
32430Sstevel@tonic-gate 	mutex_exit(&ui->ui_mx);
32440Sstevel@tonic-gate }
32450Sstevel@tonic-gate 
32460Sstevel@tonic-gate void
md_kstat_runq_exit(mdi_unit_t * ui)32470Sstevel@tonic-gate md_kstat_runq_exit(
32480Sstevel@tonic-gate 	mdi_unit_t	*ui
32490Sstevel@tonic-gate )
32500Sstevel@tonic-gate {
32510Sstevel@tonic-gate 	mutex_enter(&ui->ui_mx);
32520Sstevel@tonic-gate 	if (ui->ui_kstat != NULL)
32530Sstevel@tonic-gate 		kstat_runq_exit(KSTAT_IO_PTR(ui->ui_kstat));
32540Sstevel@tonic-gate 	mutex_exit(&ui->ui_mx);
32550Sstevel@tonic-gate }
32560Sstevel@tonic-gate 
32570Sstevel@tonic-gate void
md_kstat_done(mdi_unit_t * ui,buf_t * bp,int war)32580Sstevel@tonic-gate md_kstat_done(
32590Sstevel@tonic-gate 	mdi_unit_t	*ui,
32600Sstevel@tonic-gate 	buf_t		*bp,
32610Sstevel@tonic-gate 	int		war
32620Sstevel@tonic-gate )
32630Sstevel@tonic-gate {
32640Sstevel@tonic-gate 	size_t  n_done;
32650Sstevel@tonic-gate 
32660Sstevel@tonic-gate 	/* check for end of device */
32670Sstevel@tonic-gate 	if ((bp->b_resid != 0) && (! (bp->b_flags & B_ERROR))) {
32680Sstevel@tonic-gate 		n_done = bp->b_bcount;
32690Sstevel@tonic-gate 	} else if (bp->b_bcount < bp->b_resid) {
32700Sstevel@tonic-gate 		n_done = 0;
32710Sstevel@tonic-gate 	} else {
32720Sstevel@tonic-gate 		n_done = bp->b_bcount - bp->b_resid;
32730Sstevel@tonic-gate 	}
32740Sstevel@tonic-gate 
32750Sstevel@tonic-gate 	/* do accounting */
32760Sstevel@tonic-gate 	mutex_enter(&ui->ui_mx);
32770Sstevel@tonic-gate 	if (ui->ui_kstat != NULL) {
32780Sstevel@tonic-gate 		if ((! war) && (bp->b_flags & B_READ)) {
32790Sstevel@tonic-gate 			KSTAT_IO_PTR(ui->ui_kstat)->reads++;
32800Sstevel@tonic-gate 			KSTAT_IO_PTR(ui->ui_kstat)->nread += n_done;
32810Sstevel@tonic-gate 		} else {
32820Sstevel@tonic-gate 			KSTAT_IO_PTR(ui->ui_kstat)->writes++;
32830Sstevel@tonic-gate 			KSTAT_IO_PTR(ui->ui_kstat)->nwritten += n_done;
32840Sstevel@tonic-gate 		}
32850Sstevel@tonic-gate 		kstat_runq_exit(KSTAT_IO_PTR(ui->ui_kstat));
32860Sstevel@tonic-gate 	}
32870Sstevel@tonic-gate 	mutex_exit(&ui->ui_mx);
32880Sstevel@tonic-gate }
32890Sstevel@tonic-gate 
32900Sstevel@tonic-gate pid_t
md_getpid()32910Sstevel@tonic-gate md_getpid()
32920Sstevel@tonic-gate {
32930Sstevel@tonic-gate 	pid_t valuep;
32940Sstevel@tonic-gate 	if (drv_getparm(PPID, (pid_t *)&valuep) != 0) {
32950Sstevel@tonic-gate 		ASSERT(0);
32960Sstevel@tonic-gate 		return ((pid_t)0);
32970Sstevel@tonic-gate 	} else {
32980Sstevel@tonic-gate 		ASSERT(valuep);
32990Sstevel@tonic-gate 		return (valuep);
33000Sstevel@tonic-gate 	}
33010Sstevel@tonic-gate }
33020Sstevel@tonic-gate 
33030Sstevel@tonic-gate 
33040Sstevel@tonic-gate proc_t *
md_getproc()33050Sstevel@tonic-gate md_getproc()
33060Sstevel@tonic-gate {
33070Sstevel@tonic-gate 	proc_t  *valuep;
33080Sstevel@tonic-gate 	if (drv_getparm(UPROCP, (proc_t **)&valuep) != 0) {
33090Sstevel@tonic-gate 		ASSERT(0);
33100Sstevel@tonic-gate 		return ((proc_t *)NULL);
33110Sstevel@tonic-gate 	} else {
33120Sstevel@tonic-gate 		ASSERT(valuep);
33130Sstevel@tonic-gate 		return (valuep);
33140Sstevel@tonic-gate 	}
33150Sstevel@tonic-gate }
33160Sstevel@tonic-gate 
33170Sstevel@tonic-gate extern kmutex_t pidlock;
33180Sstevel@tonic-gate 
33190Sstevel@tonic-gate /*
33200Sstevel@tonic-gate  * this check to see if a process pid pair are still running.  For the
33210Sstevel@tonic-gate  * disk set lock when both pid/proc are zero then the locks is not
33220Sstevel@tonic-gate  * currently held.
33230Sstevel@tonic-gate  */
33240Sstevel@tonic-gate int
md_checkpid(pid_t pid,proc_t * proc)33250Sstevel@tonic-gate md_checkpid(pid_t pid, proc_t *proc)
33260Sstevel@tonic-gate {
33270Sstevel@tonic-gate 	int	retval = 1;
33280Sstevel@tonic-gate 
33290Sstevel@tonic-gate 	if (pid == 0 && proc == NULL)
33300Sstevel@tonic-gate 		return (0);
33310Sstevel@tonic-gate 
33320Sstevel@tonic-gate 	mutex_enter(&pidlock);
33330Sstevel@tonic-gate 	if (prfind(pid)  != proc)
33340Sstevel@tonic-gate 		retval = 0;
33350Sstevel@tonic-gate 	mutex_exit(&pidlock);
33360Sstevel@tonic-gate 	return (retval);
33370Sstevel@tonic-gate }
33380Sstevel@tonic-gate 
33390Sstevel@tonic-gate /*
33400Sstevel@tonic-gate  * NAME: md_init_probereq
33410Sstevel@tonic-gate  *
33420Sstevel@tonic-gate  * DESCRIPTION: initializes a probe request. Parcels out the mnums such that
33430Sstevel@tonic-gate  *		they can be dispatched to multiple daemon threads.
33440Sstevel@tonic-gate  *
33450Sstevel@tonic-gate  * PARAMETERS: struct md_probedev *p	pointer ioctl input
33460Sstevel@tonic-gate  *
33470Sstevel@tonic-gate  * RETURN VALUE: Returns errno
33480Sstevel@tonic-gate  *
33490Sstevel@tonic-gate  */
33500Sstevel@tonic-gate 
33510Sstevel@tonic-gate int
md_init_probereq(struct md_probedev_impl * p,daemon_queue_t ** hdrpp)33520Sstevel@tonic-gate md_init_probereq(struct md_probedev_impl *p, daemon_queue_t **hdrpp)
33530Sstevel@tonic-gate {
33540Sstevel@tonic-gate 	int		err = 0;
33550Sstevel@tonic-gate 	int		modindx;
33560Sstevel@tonic-gate 	intptr_t	(*probe_test)();
33570Sstevel@tonic-gate 
33580Sstevel@tonic-gate 	/*
33590Sstevel@tonic-gate 	 * Initialize the semaphores and mutex
33600Sstevel@tonic-gate 	 * for the request
33610Sstevel@tonic-gate 	 */
33620Sstevel@tonic-gate 
33630Sstevel@tonic-gate 	p->probe_sema = kmem_alloc(sizeof (ksema_t), KM_SLEEP);
33640Sstevel@tonic-gate 
33650Sstevel@tonic-gate 	p->probe_mx = kmem_alloc(sizeof (kmutex_t), KM_SLEEP);
33660Sstevel@tonic-gate 	sema_init(PROBE_SEMA(p), 0, NULL, SEMA_DRIVER, NULL);
33670Sstevel@tonic-gate 	mutex_init(PROBE_MX(p), NULL, MUTEX_DEFAULT, NULL);
33680Sstevel@tonic-gate 
33690Sstevel@tonic-gate 	modindx = md_getmodindex(&(p->probe.md_driver), 1, 1);
33700Sstevel@tonic-gate 	probe_test = md_get_named_service(NODEV64, modindx,
33717563SPrasad.Singamsetty@Sun.COM 	    p->probe.test_name, 0);
33720Sstevel@tonic-gate 	if (probe_test == NULL) {
33730Sstevel@tonic-gate 		err = EINVAL;
33740Sstevel@tonic-gate 		goto err_out;
33750Sstevel@tonic-gate 	}
33760Sstevel@tonic-gate 
33770Sstevel@tonic-gate 	err = md_create_probe_rqlist(p, hdrpp, probe_test);
33780Sstevel@tonic-gate err_out:
33790Sstevel@tonic-gate 	return (err);
33800Sstevel@tonic-gate }
33810Sstevel@tonic-gate 
33820Sstevel@tonic-gate /*
33830Sstevel@tonic-gate  * NAME: md_probe_one
33840Sstevel@tonic-gate  *
33850Sstevel@tonic-gate  * DESCRIPTION: Generic routine for probing disks. This is called from the
33860Sstevel@tonic-gate  *		daemon.
33870Sstevel@tonic-gate  *
33880Sstevel@tonic-gate  * PARAMETERS: probe_req_t	*reqp	pointer to the probe request structure.
33890Sstevel@tonic-gate  *
33900Sstevel@tonic-gate  */
33910Sstevel@tonic-gate 
33920Sstevel@tonic-gate void
md_probe_one(probe_req_t * reqp)33930Sstevel@tonic-gate md_probe_one(probe_req_t *reqp)
33940Sstevel@tonic-gate {
33950Sstevel@tonic-gate 	mdi_unit_t		*ui;
33960Sstevel@tonic-gate 	md_probedev_impl_t	*p;
33970Sstevel@tonic-gate 	int			err = 0;
339810549SAchim.Maurer@Sun.COM 	set_t			setno;
33990Sstevel@tonic-gate 
34000Sstevel@tonic-gate 	p = (md_probedev_impl_t *)reqp->private_handle;
34010Sstevel@tonic-gate 	/*
34020Sstevel@tonic-gate 	 * Validate the unit while holding the global ioctl lock, then
34030Sstevel@tonic-gate 	 * obtain the unit_writerlock. Once the writerlock has been obtained
34040Sstevel@tonic-gate 	 * we can release the global lock. As long as we hold one of these
34050Sstevel@tonic-gate 	 * locks this will prevent a metaclear operation being performed
34060Sstevel@tonic-gate 	 * on the metadevice because metaclear takes the readerlock (via
34070Sstevel@tonic-gate 	 * openclose lock).
340810549SAchim.Maurer@Sun.COM 	 * To avoid a potential deadlock with the probe_fcn() causing i/o to
340910549SAchim.Maurer@Sun.COM 	 * be issued to the writerlock'd metadevice we only grab the writerlock
341010549SAchim.Maurer@Sun.COM 	 * if the unit is not an SVM root device.
34110Sstevel@tonic-gate 	 */
34127563SPrasad.Singamsetty@Sun.COM 	while (md_ioctl_lock_enter() == EINTR)
34137563SPrasad.Singamsetty@Sun.COM 		;
341410549SAchim.Maurer@Sun.COM 	setno = MD_MIN2SET(reqp->mnum);
34150Sstevel@tonic-gate 	ui = MDI_UNIT(reqp->mnum);
34160Sstevel@tonic-gate 	if (ui != NULL) {
341710549SAchim.Maurer@Sun.COM 		int	writer_grabbed;
341810549SAchim.Maurer@Sun.COM 		dev_t	svm_root;
341910549SAchim.Maurer@Sun.COM 
342010549SAchim.Maurer@Sun.COM 		if ((setno == MD_LOCAL_SET) && root_is_svm) {
342110549SAchim.Maurer@Sun.COM 			svm_root = getrootdev();
342210549SAchim.Maurer@Sun.COM 
342310549SAchim.Maurer@Sun.COM 			if (getminor(svm_root) == reqp->mnum) {
342410549SAchim.Maurer@Sun.COM 				writer_grabbed = 0;
342510549SAchim.Maurer@Sun.COM 			} else {
342610549SAchim.Maurer@Sun.COM 				writer_grabbed = 1;
342710549SAchim.Maurer@Sun.COM 				(void) md_unit_writerlock_common(ui, 0);
342810549SAchim.Maurer@Sun.COM 			}
342910549SAchim.Maurer@Sun.COM 		} else {
343010549SAchim.Maurer@Sun.COM 			writer_grabbed = 1;
343110549SAchim.Maurer@Sun.COM 			(void) md_unit_writerlock_common(ui, 0);
343210549SAchim.Maurer@Sun.COM 		}
34330Sstevel@tonic-gate 		(void) md_ioctl_lock_exit(0, 0, 0, FALSE);
34340Sstevel@tonic-gate 		err = (*reqp->probe_fcn)(ui, reqp->mnum);
343510549SAchim.Maurer@Sun.COM 		if (writer_grabbed) {
343610549SAchim.Maurer@Sun.COM 			md_unit_writerexit(ui);
343710549SAchim.Maurer@Sun.COM 		}
34380Sstevel@tonic-gate 	} else {
34390Sstevel@tonic-gate 		(void) md_ioctl_lock_exit(0, 0, 0, FALSE);
34400Sstevel@tonic-gate 	}
34410Sstevel@tonic-gate 
344210549SAchim.Maurer@Sun.COM 	/* update the info in the probe structure */
34430Sstevel@tonic-gate 
34440Sstevel@tonic-gate 	mutex_enter(PROBE_MX(p));
34450Sstevel@tonic-gate 	if (err != 0) {
34460Sstevel@tonic-gate 		cmn_err(CE_NOTE, "md_probe_one: err %d mnum %d\n", err,
34477563SPrasad.Singamsetty@Sun.COM 		    reqp->mnum);
34480Sstevel@tonic-gate 		(void) mdsyserror(&(p->probe.mde), err);
34490Sstevel@tonic-gate 	}
34500Sstevel@tonic-gate 
34510Sstevel@tonic-gate 	mutex_exit(PROBE_MX(p));
34520Sstevel@tonic-gate 	sema_v(PROBE_SEMA(p));
34530Sstevel@tonic-gate 
34540Sstevel@tonic-gate 	kmem_free(reqp, sizeof (probe_req_t));
34550Sstevel@tonic-gate }
34560Sstevel@tonic-gate char *
md_strdup(char * cp)34570Sstevel@tonic-gate md_strdup(char *cp)
34580Sstevel@tonic-gate {
34590Sstevel@tonic-gate 	char *new_cp = NULL;
34600Sstevel@tonic-gate 
34610Sstevel@tonic-gate 	new_cp = kmem_alloc(strlen(cp) + 1, KM_SLEEP);
34620Sstevel@tonic-gate 
34630Sstevel@tonic-gate 	return (strcpy(new_cp, cp));
34640Sstevel@tonic-gate }
34650Sstevel@tonic-gate 
34660Sstevel@tonic-gate void
freestr(char * cp)34670Sstevel@tonic-gate freestr(char *cp)
34680Sstevel@tonic-gate {
34690Sstevel@tonic-gate 	kmem_free(cp, strlen(cp) + 1);
34700Sstevel@tonic-gate }
34710Sstevel@tonic-gate 
34720Sstevel@tonic-gate /*
34730Sstevel@tonic-gate  * Validate the list and skip invalid devices. Then create
34740Sstevel@tonic-gate  * a doubly linked circular list of devices to probe.
34750Sstevel@tonic-gate  * The hdr points to the head and tail of this list.
34760Sstevel@tonic-gate  */
34770Sstevel@tonic-gate 
34780Sstevel@tonic-gate static int
md_create_probe_rqlist(md_probedev_impl_t * plist,daemon_queue_t ** hdr,intptr_t (* probe_test)())34790Sstevel@tonic-gate md_create_probe_rqlist(md_probedev_impl_t *plist, daemon_queue_t **hdr,
34800Sstevel@tonic-gate 			intptr_t (*probe_test)())
34810Sstevel@tonic-gate {
34820Sstevel@tonic-gate 	int i, err, nodevcnt;
34830Sstevel@tonic-gate 	probe_req_t *tp;
34840Sstevel@tonic-gate 	daemon_queue_t *hp;
34850Sstevel@tonic-gate 	minor_t mnum;
34860Sstevel@tonic-gate 
34870Sstevel@tonic-gate 	nodevcnt = 0;
34880Sstevel@tonic-gate 
34890Sstevel@tonic-gate 	hp = NULL;
34900Sstevel@tonic-gate 
34910Sstevel@tonic-gate 	for (i = 0; i <  plist->probe.nmdevs; i++) {
34920Sstevel@tonic-gate 		mnum = ((minor_t *)(uintptr_t)(plist->probe.mnum_list))[i];
34930Sstevel@tonic-gate 		if (MDI_UNIT(mnum) == NULL) {
34940Sstevel@tonic-gate 			cmn_err(CE_WARN, "md: Cannot probe %s since it does "
34950Sstevel@tonic-gate 			    "not exist", md_shortname(mnum));
34960Sstevel@tonic-gate 			nodevcnt++;
34970Sstevel@tonic-gate 			continue;
34980Sstevel@tonic-gate 		}
34990Sstevel@tonic-gate 		tp = kmem_alloc(sizeof (probe_req_t), KM_SLEEP);
35000Sstevel@tonic-gate 		tp->mnum = mnum;
35010Sstevel@tonic-gate 		tp->private_handle = (void *)plist;
35020Sstevel@tonic-gate 		tp->probe_fcn = probe_test;
35030Sstevel@tonic-gate 		if (hp == NULL) {
35040Sstevel@tonic-gate 			hp = (daemon_queue_t *)tp;
35050Sstevel@tonic-gate 			hp->dq_prev = hp->dq_next = (daemon_queue_t *)tp;
35060Sstevel@tonic-gate 		} else {
35070Sstevel@tonic-gate 			tp->dq.dq_next = hp;
35080Sstevel@tonic-gate 			tp->dq.dq_prev = hp->dq_prev;
35090Sstevel@tonic-gate 			hp->dq_prev->dq_next = (daemon_queue_t *)tp;
35100Sstevel@tonic-gate 			hp->dq_prev = (daemon_queue_t *)tp;
35110Sstevel@tonic-gate 		}
35120Sstevel@tonic-gate 	}
35130Sstevel@tonic-gate 
35140Sstevel@tonic-gate 	*hdr = hp;
35150Sstevel@tonic-gate 	if (nodevcnt > 0)
35160Sstevel@tonic-gate 		plist->probe.nmdevs -= nodevcnt;
35170Sstevel@tonic-gate 
35180Sstevel@tonic-gate 	/*
35190Sstevel@tonic-gate 	 * If there are no devices to be probed because they were
35200Sstevel@tonic-gate 	 * incorrect, then return an error.
35210Sstevel@tonic-gate 	 */
35220Sstevel@tonic-gate 	err = (plist->probe.nmdevs == 0) ? ENODEV : 0;
35230Sstevel@tonic-gate 
35240Sstevel@tonic-gate 	return (err);
35250Sstevel@tonic-gate }
35260Sstevel@tonic-gate 
35270Sstevel@tonic-gate /*
35280Sstevel@tonic-gate  * This routine increments the I/O count for set I/O operations.  This
35290Sstevel@tonic-gate  * value is used to determine if an I/O can done.  If a release is in
35300Sstevel@tonic-gate  * process this will return an error and cause the I/O to be errored.
35310Sstevel@tonic-gate  */
35320Sstevel@tonic-gate int
md_inc_iocount(set_t setno)35330Sstevel@tonic-gate md_inc_iocount(set_t setno)
35340Sstevel@tonic-gate {
35350Sstevel@tonic-gate 	int	rc = 0;
35360Sstevel@tonic-gate 
35370Sstevel@tonic-gate 	if (setno == 0)
35380Sstevel@tonic-gate 		return (0);
35390Sstevel@tonic-gate 
35400Sstevel@tonic-gate 	mutex_enter(&md_set_io[setno].md_io_mx);
35410Sstevel@tonic-gate 	if (!(md_set_io[setno].io_state & MD_SET_ACTIVE)) {
35420Sstevel@tonic-gate 		rc = EIO;
35430Sstevel@tonic-gate 		goto out;
35440Sstevel@tonic-gate 	}
35450Sstevel@tonic-gate 
35460Sstevel@tonic-gate 	ASSERT(md_set_io[setno].io_cnt >= 0);
35470Sstevel@tonic-gate 	md_set_io[setno].io_cnt++;
35480Sstevel@tonic-gate 
35490Sstevel@tonic-gate out:	mutex_exit(&md_set_io[setno].md_io_mx);
35500Sstevel@tonic-gate 	return (rc);
35510Sstevel@tonic-gate }
35520Sstevel@tonic-gate 
35530Sstevel@tonic-gate void
md_inc_iocount_noblock(set_t setno)35540Sstevel@tonic-gate md_inc_iocount_noblock(set_t setno)
35550Sstevel@tonic-gate {
35560Sstevel@tonic-gate 
35570Sstevel@tonic-gate 	if (setno == 0)
35580Sstevel@tonic-gate 		return;
35590Sstevel@tonic-gate 
35600Sstevel@tonic-gate 	mutex_enter(&md_set_io[setno].md_io_mx);
35610Sstevel@tonic-gate 	md_set_io[setno].io_cnt++;
35620Sstevel@tonic-gate 	mutex_exit(&md_set_io[setno].md_io_mx);
35630Sstevel@tonic-gate }
35640Sstevel@tonic-gate void
md_dec_iocount(set_t setno)35650Sstevel@tonic-gate md_dec_iocount(set_t setno)
35660Sstevel@tonic-gate {
35670Sstevel@tonic-gate 
35680Sstevel@tonic-gate 	if (setno == 0)
35690Sstevel@tonic-gate 		return;
35700Sstevel@tonic-gate 
35710Sstevel@tonic-gate 	mutex_enter(&md_set_io[setno].md_io_mx);
35720Sstevel@tonic-gate 	md_set_io[setno].io_cnt--;
35730Sstevel@tonic-gate 	ASSERT(md_set_io[setno].io_cnt >= 0);
35740Sstevel@tonic-gate 	if ((md_set_io[setno].io_state & MD_SET_RELEASE) &&
35750Sstevel@tonic-gate 	    (md_set_io[setno].io_cnt == 0))
35760Sstevel@tonic-gate 		cv_broadcast(&md_set_io[setno].md_io_cv);
35770Sstevel@tonic-gate 	mutex_exit(&md_set_io[setno].md_io_mx);
35780Sstevel@tonic-gate }
35790Sstevel@tonic-gate 
35800Sstevel@tonic-gate int
md_isblock_setio(set_t setno)35810Sstevel@tonic-gate md_isblock_setio(set_t setno)
35820Sstevel@tonic-gate {
35830Sstevel@tonic-gate 	int	rc = 0;
35840Sstevel@tonic-gate 
35850Sstevel@tonic-gate 	if (setno == 0)
35860Sstevel@tonic-gate 		return (0);
35870Sstevel@tonic-gate 
35880Sstevel@tonic-gate 	mutex_enter(&md_set_io[setno].md_io_mx);
35890Sstevel@tonic-gate 	if (md_set_io[setno].io_state & MD_SET_RELEASE)
35900Sstevel@tonic-gate 		rc = 1;
35910Sstevel@tonic-gate 
35920Sstevel@tonic-gate 	mutex_exit(&md_set_io[setno].md_io_mx);
35930Sstevel@tonic-gate 	return (rc);
35940Sstevel@tonic-gate }
35950Sstevel@tonic-gate 
35960Sstevel@tonic-gate int
md_block_setio(set_t setno)35970Sstevel@tonic-gate md_block_setio(set_t setno)
35980Sstevel@tonic-gate {
35990Sstevel@tonic-gate 	int	rc = 0;
36000Sstevel@tonic-gate 
36010Sstevel@tonic-gate 	if (setno == 0)
36020Sstevel@tonic-gate 		return (1);
36030Sstevel@tonic-gate 
36040Sstevel@tonic-gate 	mutex_enter(&md_set_io[setno].md_io_mx);
36050Sstevel@tonic-gate 	md_set_io[setno].io_state = MD_SET_RELEASE;
36060Sstevel@tonic-gate 
36070Sstevel@tonic-gate 	while (md_set_io[setno].io_cnt > 0) {
36080Sstevel@tonic-gate 		cv_wait(&md_set_io[setno].md_io_cv,
36090Sstevel@tonic-gate 		    &md_set_io[setno].md_io_mx);
36100Sstevel@tonic-gate 	}
36110Sstevel@tonic-gate 	rc = 1;
36120Sstevel@tonic-gate 
36130Sstevel@tonic-gate 
36140Sstevel@tonic-gate 	ASSERT(md_set_io[setno].io_cnt == 0);
36150Sstevel@tonic-gate 	mutex_exit(&md_set_io[setno].md_io_mx);
36160Sstevel@tonic-gate 
36170Sstevel@tonic-gate 	return (rc);
36180Sstevel@tonic-gate }
36190Sstevel@tonic-gate 
36200Sstevel@tonic-gate void
md_clearblock_setio(set_t setno)36210Sstevel@tonic-gate md_clearblock_setio(set_t setno)
36220Sstevel@tonic-gate {
36230Sstevel@tonic-gate 	if (setno == 0)
36240Sstevel@tonic-gate 		return;
36250Sstevel@tonic-gate 
36260Sstevel@tonic-gate 	mutex_enter(&md_set_io[setno].md_io_mx);
36270Sstevel@tonic-gate 	md_set_io[setno].io_state = MD_SET_ACTIVE;
36280Sstevel@tonic-gate 	mutex_exit(&md_set_io[setno].md_io_mx);
36290Sstevel@tonic-gate }
36300Sstevel@tonic-gate 
36310Sstevel@tonic-gate void
md_unblock_setio(set_t setno)36320Sstevel@tonic-gate md_unblock_setio(set_t setno)
36330Sstevel@tonic-gate {
36340Sstevel@tonic-gate 	if (setno == 0)
36350Sstevel@tonic-gate 		return;
36360Sstevel@tonic-gate 
36370Sstevel@tonic-gate 	mutex_enter(&md_set_io[setno].md_io_mx);
36380Sstevel@tonic-gate #ifdef DEBUG
36390Sstevel@tonic-gate 	if (md_set_io[setno].io_cnt != 0) {
36400Sstevel@tonic-gate 		cmn_err(CE_NOTE, "set %d count was %ld at take",
36410Sstevel@tonic-gate 		    setno, md_set_io[setno].io_cnt);
36420Sstevel@tonic-gate 	}
36430Sstevel@tonic-gate #endif /* DEBUG */
36440Sstevel@tonic-gate 
36450Sstevel@tonic-gate 	md_set_io[setno].io_state = MD_SET_ACTIVE;
36460Sstevel@tonic-gate 	md_set_io[setno].io_cnt = 0;
36470Sstevel@tonic-gate 	mutex_exit(&md_set_io[setno].md_io_mx);
36480Sstevel@tonic-gate }
36490Sstevel@tonic-gate 
36500Sstevel@tonic-gate /*
36510Sstevel@tonic-gate  * Test and set version of the md_block_setio.
36520Sstevel@tonic-gate  * Set the io_state to keep new I/O from being issued.
36530Sstevel@tonic-gate  * If there is I/O currently in progress, then set io_state to active
36540Sstevel@tonic-gate  * and return failure.  Otherwise, return a 1 for success.
36550Sstevel@tonic-gate  *
36560Sstevel@tonic-gate  * Used in a MN diskset since the commd must be suspended before
36570Sstevel@tonic-gate  * this node can attempt to withdraw from a diskset.  But, with commd
36580Sstevel@tonic-gate  * suspended, I/O may have been issued that can never finish until
36590Sstevel@tonic-gate  * commd is resumed (allocation of hotspare, etc). So, if I/O is
36600Sstevel@tonic-gate  * outstanding after diskset io_state is marked RELEASE, then set diskset
36610Sstevel@tonic-gate  * io_state back to ACTIVE and return failure.
36620Sstevel@tonic-gate  */
36630Sstevel@tonic-gate int
md_tas_block_setio(set_t setno)36640Sstevel@tonic-gate md_tas_block_setio(set_t setno)
36650Sstevel@tonic-gate {
36660Sstevel@tonic-gate 	int	rc;
36670Sstevel@tonic-gate 
36680Sstevel@tonic-gate 	if (setno == 0)
36690Sstevel@tonic-gate 		return (1);
36700Sstevel@tonic-gate 
36710Sstevel@tonic-gate 	mutex_enter(&md_set_io[setno].md_io_mx);
36720Sstevel@tonic-gate 	md_set_io[setno].io_state = MD_SET_RELEASE;
36730Sstevel@tonic-gate 
36740Sstevel@tonic-gate 	if (md_set_io[setno].io_cnt > 0) {
36750Sstevel@tonic-gate 		md_set_io[setno].io_state = MD_SET_ACTIVE;
36760Sstevel@tonic-gate 		rc = 0;
36770Sstevel@tonic-gate 	} else {
36780Sstevel@tonic-gate 		rc = 1;
36790Sstevel@tonic-gate 	}
36800Sstevel@tonic-gate 
36810Sstevel@tonic-gate 	mutex_exit(&md_set_io[setno].md_io_mx);
36820Sstevel@tonic-gate 
36830Sstevel@tonic-gate 	return (rc);
36840Sstevel@tonic-gate }
36850Sstevel@tonic-gate 
36860Sstevel@tonic-gate void
md_biodone(struct buf * pb)36870Sstevel@tonic-gate md_biodone(struct buf *pb)
36880Sstevel@tonic-gate {
36890Sstevel@tonic-gate 	minor_t	mnum;
36900Sstevel@tonic-gate 	set_t	setno;
36910Sstevel@tonic-gate 	mdi_unit_t	*ui;
36920Sstevel@tonic-gate 
36930Sstevel@tonic-gate 	mnum = getminor(pb->b_edev);
36940Sstevel@tonic-gate 	setno = MD_MIN2SET(mnum);
36950Sstevel@tonic-gate 
36960Sstevel@tonic-gate 	if (setno == 0) {
36970Sstevel@tonic-gate 		biodone(pb);
36980Sstevel@tonic-gate 		return;
36990Sstevel@tonic-gate 	}
37000Sstevel@tonic-gate 
37010Sstevel@tonic-gate #ifdef DEBUG
37020Sstevel@tonic-gate 	ui = MDI_UNIT(mnum);
37030Sstevel@tonic-gate 	if (!md_unit_isopen(ui))
37040Sstevel@tonic-gate 		cmn_err(CE_NOTE, "io after close on %s\n", md_shortname(mnum));
37050Sstevel@tonic-gate #endif /* DEBUG */
37060Sstevel@tonic-gate 
37070Sstevel@tonic-gate 	/*
37080Sstevel@tonic-gate 	 * Handle the local diskset
37090Sstevel@tonic-gate 	 */
37100Sstevel@tonic-gate 	if (md_set_io[setno].io_cnt > 0)
37110Sstevel@tonic-gate 		md_dec_iocount(setno);
37120Sstevel@tonic-gate 
37130Sstevel@tonic-gate #ifdef DEBUG
37140Sstevel@tonic-gate 	/*
37150Sstevel@tonic-gate 	 * this is being done after the lock is dropped so there
37160Sstevel@tonic-gate 	 * are cases it may be invalid.  It is advisory.
37170Sstevel@tonic-gate 	 */
37180Sstevel@tonic-gate 	if (md_set_io[setno].io_state & MD_SET_RELEASE) {
37190Sstevel@tonic-gate 		/* Only display this error once for this metadevice */
37200Sstevel@tonic-gate 		if ((ui->ui_tstate & MD_RELEASE_IOERR_DONE) == 0) {
37210Sstevel@tonic-gate 			cmn_err(CE_NOTE,
37220Sstevel@tonic-gate 			    "I/O to %s attempted during set RELEASE\n",
37230Sstevel@tonic-gate 			    md_shortname(mnum));
37240Sstevel@tonic-gate 			ui->ui_tstate |= MD_RELEASE_IOERR_DONE;
37250Sstevel@tonic-gate 		}
37260Sstevel@tonic-gate 	}
37270Sstevel@tonic-gate #endif /* DEBUG */
37280Sstevel@tonic-gate 
37290Sstevel@tonic-gate 	biodone(pb);
37300Sstevel@tonic-gate }
37310Sstevel@tonic-gate 
37320Sstevel@tonic-gate 
37330Sstevel@tonic-gate /*
37340Sstevel@tonic-gate  * Driver special private devt handling routine
37350Sstevel@tonic-gate  * INPUT:  md_dev64_t
37360Sstevel@tonic-gate  * OUTPUT: dev_t, 32 bit on a 32 bit kernel, 64 bit on a 64 bit kernel.
37370Sstevel@tonic-gate  */
37380Sstevel@tonic-gate dev_t
md_dev64_to_dev(md_dev64_t dev)37390Sstevel@tonic-gate md_dev64_to_dev(md_dev64_t dev)
37400Sstevel@tonic-gate {
37410Sstevel@tonic-gate 	major_t major = (major_t)(dev >> NBITSMINOR64) & MAXMAJ64;
37420Sstevel@tonic-gate 	minor_t minor = (minor_t)(dev & MAXMIN64);
37430Sstevel@tonic-gate 
37440Sstevel@tonic-gate 	return (makedevice(major, minor));
37450Sstevel@tonic-gate 
37460Sstevel@tonic-gate }
37470Sstevel@tonic-gate 
37480Sstevel@tonic-gate /*
37490Sstevel@tonic-gate  * Driver private makedevice routine
37500Sstevel@tonic-gate  * INPUT:  major_t major, minor_t minor
37510Sstevel@tonic-gate  * OUTPUT: md_dev64_t, no matter if on 32 bit or 64 bit kernel.
37520Sstevel@tonic-gate  */
37530Sstevel@tonic-gate md_dev64_t
md_makedevice(major_t major,minor_t minor)37540Sstevel@tonic-gate md_makedevice(major_t major, minor_t minor)
37550Sstevel@tonic-gate {
37560Sstevel@tonic-gate 	return (((md_dev64_t)major << NBITSMINOR64) | minor);
37570Sstevel@tonic-gate 
37580Sstevel@tonic-gate }
37590Sstevel@tonic-gate 
37600Sstevel@tonic-gate 
37610Sstevel@tonic-gate /*
37620Sstevel@tonic-gate  * Driver private devt md_getmajor routine
37630Sstevel@tonic-gate  * INPUT:  dev	a 64 bit container holding either a 32 bit or a 64 bit device
37640Sstevel@tonic-gate  * OUTPUT: the appropriate major number
37650Sstevel@tonic-gate  */
37660Sstevel@tonic-gate major_t
md_getmajor(md_dev64_t dev)37670Sstevel@tonic-gate md_getmajor(md_dev64_t dev)
37680Sstevel@tonic-gate {
37690Sstevel@tonic-gate 	major_t major = (major_t)(dev >> NBITSMINOR64) & MAXMAJ64;
37700Sstevel@tonic-gate 
37710Sstevel@tonic-gate 	if (major == 0) {
37720Sstevel@tonic-gate 		/* Here we were given a 32bit dev */
37730Sstevel@tonic-gate 		major = (major_t)(dev >> NBITSMINOR32) & MAXMAJ32;
37740Sstevel@tonic-gate 	}
37750Sstevel@tonic-gate 	return (major);
37760Sstevel@tonic-gate }
37770Sstevel@tonic-gate 
37780Sstevel@tonic-gate /*
37790Sstevel@tonic-gate  * Driver private devt md_getminor routine
37800Sstevel@tonic-gate  * INPUT:  dev	a 64 bit container holding either a 32 bit or a 64 bit device
37810Sstevel@tonic-gate  * OUTPUT: the appropriate minor number
37820Sstevel@tonic-gate  */
37830Sstevel@tonic-gate minor_t
md_getminor(md_dev64_t dev)37840Sstevel@tonic-gate md_getminor(md_dev64_t dev)
37850Sstevel@tonic-gate {
37860Sstevel@tonic-gate 	minor_t minor;
37870Sstevel@tonic-gate 	major_t major = (major_t)(dev >> NBITSMINOR64) & MAXMAJ64;
37880Sstevel@tonic-gate 
37890Sstevel@tonic-gate 	if (major == 0) {
37900Sstevel@tonic-gate 		/* Here we were given a 32bit dev */
37910Sstevel@tonic-gate 		minor = (minor_t)(dev & MAXMIN32);
37920Sstevel@tonic-gate 	} else {
37930Sstevel@tonic-gate 		minor = (minor_t)(dev & MAXMIN64);
37940Sstevel@tonic-gate 	}
37950Sstevel@tonic-gate 	return (minor);
37960Sstevel@tonic-gate }
37970Sstevel@tonic-gate 
37980Sstevel@tonic-gate int
md_check_ioctl_against_unit(int cmd,mdc_unit_t c)37997563SPrasad.Singamsetty@Sun.COM md_check_ioctl_against_unit(int cmd, mdc_unit_t c)
38000Sstevel@tonic-gate {
38010Sstevel@tonic-gate 	/*
38020Sstevel@tonic-gate 	 * If the metadevice is an old style device, it has a vtoc,
38030Sstevel@tonic-gate 	 *	in that case all reading EFI ioctls are not applicable.
38040Sstevel@tonic-gate 	 * If the metadevice has an EFI label, reading vtoc and geom ioctls
38050Sstevel@tonic-gate 	 *	are not supposed to work.
38060Sstevel@tonic-gate 	 */
38070Sstevel@tonic-gate 	switch (cmd) {
38080Sstevel@tonic-gate 		case DKIOCGGEOM:
38097563SPrasad.Singamsetty@Sun.COM 		case DKIOCGAPART:
38107563SPrasad.Singamsetty@Sun.COM 			/* if > 2 TB then fail */
38117563SPrasad.Singamsetty@Sun.COM 			if (c.un_total_blocks > MD_MAX_BLKS_FOR_EXTVTOC) {
38127563SPrasad.Singamsetty@Sun.COM 				return (ENOTSUP);
38137563SPrasad.Singamsetty@Sun.COM 			}
38147563SPrasad.Singamsetty@Sun.COM 			break;
38150Sstevel@tonic-gate 		case DKIOCGVTOC:
38167563SPrasad.Singamsetty@Sun.COM 			/* if > 2 TB then fail */
38177563SPrasad.Singamsetty@Sun.COM 			if (c.un_total_blocks > MD_MAX_BLKS_FOR_EXTVTOC) {
38187563SPrasad.Singamsetty@Sun.COM 				return (ENOTSUP);
38197563SPrasad.Singamsetty@Sun.COM 			}
38207563SPrasad.Singamsetty@Sun.COM 
38217563SPrasad.Singamsetty@Sun.COM 			/* if > 1 TB but < 2TB return overflow */
38227563SPrasad.Singamsetty@Sun.COM 			if (c.un_revision & MD_64BIT_META_DEV) {
38237563SPrasad.Singamsetty@Sun.COM 				return (EOVERFLOW);
38247563SPrasad.Singamsetty@Sun.COM 			}
38257563SPrasad.Singamsetty@Sun.COM 			break;
38267563SPrasad.Singamsetty@Sun.COM 		case DKIOCGEXTVTOC:
38277563SPrasad.Singamsetty@Sun.COM 			/* if > 2 TB then fail */
38287563SPrasad.Singamsetty@Sun.COM 			if (c.un_total_blocks > MD_MAX_BLKS_FOR_EXTVTOC) {
38290Sstevel@tonic-gate 				return (ENOTSUP);
38300Sstevel@tonic-gate 			}
38310Sstevel@tonic-gate 			break;
38320Sstevel@tonic-gate 		case DKIOCGETEFI:
38330Sstevel@tonic-gate 		case DKIOCPARTITION:
38347563SPrasad.Singamsetty@Sun.COM 			if ((c.un_flag & MD_EFILABEL) == 0) {
38350Sstevel@tonic-gate 				return (ENOTSUP);
38360Sstevel@tonic-gate 			}
38370Sstevel@tonic-gate 			break;
38380Sstevel@tonic-gate 
38390Sstevel@tonic-gate 		case DKIOCSETEFI:
38400Sstevel@tonic-gate 		/* setting an EFI label should always be ok */
38410Sstevel@tonic-gate 			return (0);
38420Sstevel@tonic-gate 
38430Sstevel@tonic-gate 		case DKIOCSVTOC:
38447563SPrasad.Singamsetty@Sun.COM 			/* if > 2 TB then fail */
38457563SPrasad.Singamsetty@Sun.COM 			if (c.un_total_blocks > MD_MAX_BLKS_FOR_EXTVTOC) {
38467563SPrasad.Singamsetty@Sun.COM 				return (ENOTSUP);
38477563SPrasad.Singamsetty@Sun.COM 			}
38487563SPrasad.Singamsetty@Sun.COM 
38497563SPrasad.Singamsetty@Sun.COM 			/* if > 1 TB but < 2TB return overflow */
38507563SPrasad.Singamsetty@Sun.COM 			if (c.un_revision & MD_64BIT_META_DEV) {
38517563SPrasad.Singamsetty@Sun.COM 				return (EOVERFLOW);
38527563SPrasad.Singamsetty@Sun.COM 			}
38537563SPrasad.Singamsetty@Sun.COM 			break;
38547563SPrasad.Singamsetty@Sun.COM 		case DKIOCSEXTVTOC:
38557563SPrasad.Singamsetty@Sun.COM 			if (c.un_total_blocks > MD_MAX_BLKS_FOR_EXTVTOC) {
38567563SPrasad.Singamsetty@Sun.COM 				return (ENOTSUP);
38577563SPrasad.Singamsetty@Sun.COM 			}
38587563SPrasad.Singamsetty@Sun.COM 			break;
38590Sstevel@tonic-gate 	}
38600Sstevel@tonic-gate 	return (0);
38610Sstevel@tonic-gate }
38620Sstevel@tonic-gate 
38630Sstevel@tonic-gate /*
38640Sstevel@tonic-gate  * md_vtoc_to_efi_record()
38650Sstevel@tonic-gate  * Input:  record id of the vtoc record
38660Sstevel@tonic-gate  * Output: record id of the efi record
38670Sstevel@tonic-gate  * Function:
38680Sstevel@tonic-gate  *	- reads the  volume name from the vtoc record
38690Sstevel@tonic-gate  *	- converts the volume name to a format, libefi understands
38700Sstevel@tonic-gate  *	- creates a new record of size MD_EFI_PARTNAME_BYTES
38710Sstevel@tonic-gate  *	- stores the volname in that record,
38720Sstevel@tonic-gate  *	- commits that record
38730Sstevel@tonic-gate  *	- returns the recid of the efi record.
38740Sstevel@tonic-gate  * Caveat Emptor:
38750Sstevel@tonic-gate  *	The calling routine must do something like
38760Sstevel@tonic-gate  *	- un->c.un_vtoc_id = md_vtoc_to_efi_record(vtoc_recid)
38770Sstevel@tonic-gate  *	- commit(un)
38780Sstevel@tonic-gate  *	- delete(vtoc_recid)
38790Sstevel@tonic-gate  *	in order to keep the mddb consistent in case of a panic in the middle.
38800Sstevel@tonic-gate  * Errors:
38810Sstevel@tonic-gate  *	- returns 0 on any error
38820Sstevel@tonic-gate  */
38830Sstevel@tonic-gate mddb_recid_t
md_vtoc_to_efi_record(mddb_recid_t vtoc_recid,set_t setno)38840Sstevel@tonic-gate md_vtoc_to_efi_record(mddb_recid_t vtoc_recid, set_t setno)
38850Sstevel@tonic-gate {
38860Sstevel@tonic-gate 	struct vtoc	*vtoc;
38870Sstevel@tonic-gate 	ushort_t	*v;
38880Sstevel@tonic-gate 	mddb_recid_t	efi_recid;
38890Sstevel@tonic-gate 	int		i;
38900Sstevel@tonic-gate 
38910Sstevel@tonic-gate 	if (mddb_getrecstatus(vtoc_recid) != MDDB_OK) {
38920Sstevel@tonic-gate 		return (0);
38930Sstevel@tonic-gate 	}
38940Sstevel@tonic-gate 	vtoc = (struct vtoc *)mddb_getrecaddr(vtoc_recid);
38950Sstevel@tonic-gate 	efi_recid = mddb_createrec(MD_EFI_PARTNAME_BYTES, MDDB_EFILABEL, 0,
38967563SPrasad.Singamsetty@Sun.COM 	    MD_CRO_32BIT, setno);
38970Sstevel@tonic-gate 	if (efi_recid < 0) {
38980Sstevel@tonic-gate 		return (0);
38990Sstevel@tonic-gate 	}
39000Sstevel@tonic-gate 	v = (ushort_t *)mddb_getrecaddr(efi_recid);
39010Sstevel@tonic-gate 
39020Sstevel@tonic-gate 	/* This for loop read, converts and writes */
39030Sstevel@tonic-gate 	for (i = 0; i < LEN_DKL_VVOL; i++) {
39040Sstevel@tonic-gate 		v[i] = LE_16((uint16_t)vtoc->v_volume[i]);
39050Sstevel@tonic-gate 	}
39060Sstevel@tonic-gate 	/* commit the new record */
39070Sstevel@tonic-gate 	mddb_commitrec_wrapper(efi_recid);
39080Sstevel@tonic-gate 
39090Sstevel@tonic-gate 	return (efi_recid);
39100Sstevel@tonic-gate }
39110Sstevel@tonic-gate 
39120Sstevel@tonic-gate /*
39130Sstevel@tonic-gate  * Send a kernel message.
39140Sstevel@tonic-gate  * user has to provide for an allocated result structure
39158452SJohn.Wren.Kennedy@Sun.COM  * If the door handler disappears we retry, emitting warnings every so often.
39168452SJohn.Wren.Kennedy@Sun.COM  *
39178452SJohn.Wren.Kennedy@Sun.COM  * The recipient argument is almost always unused, and is therefore typically
39188452SJohn.Wren.Kennedy@Sun.COM  * set to zero, as zero is an invalid cluster nodeid.  The exceptions are the
39198452SJohn.Wren.Kennedy@Sun.COM  * marking and clearing of the DRL from a node that is not currently the
39208452SJohn.Wren.Kennedy@Sun.COM  * owner.  In these cases, the recipient argument will be the nodeid of the
39218452SJohn.Wren.Kennedy@Sun.COM  * mirror owner, and MD_MSGF_DIRECTED will be set in the flags.  Non-owner
39228452SJohn.Wren.Kennedy@Sun.COM  * nodes will not receive these messages.
39238452SJohn.Wren.Kennedy@Sun.COM  *
3924*11130SJames.Hall@Sun.COM  * For the case where md_mn_is_commd_present() is false, we simply pre-set
3925*11130SJames.Hall@Sun.COM  * the result->kmmr_comm_state to MDMNE_RPC_FAIL.
3926*11130SJames.Hall@Sun.COM  * This covers the case where the service mdcommd has been killed and so we do
3927*11130SJames.Hall@Sun.COM  * not get a 'new' result structure copied back. Instead we return with the
3928*11130SJames.Hall@Sun.COM  * supplied result field, and we need to flag a failure to the caller.
39290Sstevel@tonic-gate  */
39300Sstevel@tonic-gate int
mdmn_ksend_message(set_t setno,md_mn_msgtype_t type,uint_t flags,md_mn_nodeid_t recipient,char * data,int size,md_mn_kresult_t * result)39310Sstevel@tonic-gate mdmn_ksend_message(
39320Sstevel@tonic-gate 	set_t		setno,
39330Sstevel@tonic-gate 	md_mn_msgtype_t	type,
39340Sstevel@tonic-gate 	uint_t		flags,
39358452SJohn.Wren.Kennedy@Sun.COM 	md_mn_nodeid_t	recipient,
39360Sstevel@tonic-gate 	char		*data,
39370Sstevel@tonic-gate 	int		size,
39380Sstevel@tonic-gate 	md_mn_kresult_t	*result)
39390Sstevel@tonic-gate {
39400Sstevel@tonic-gate 	door_arg_t	da;
39410Sstevel@tonic-gate 	md_mn_kmsg_t	*kmsg;
39428452SJohn.Wren.Kennedy@Sun.COM 	uint_t		send_try_cnt = 0;
39438452SJohn.Wren.Kennedy@Sun.COM 	uint_t		retry_noise_cnt = 0;
39440Sstevel@tonic-gate 	int		rval;
39458452SJohn.Wren.Kennedy@Sun.COM 	k_sigset_t	oldmask, newmask;
39460Sstevel@tonic-gate 
3947*11130SJames.Hall@Sun.COM 	/*
3948*11130SJames.Hall@Sun.COM 	 * Ensure that we default to a recoverable failure state if the
3949*11130SJames.Hall@Sun.COM 	 * door upcall cannot pass the request on to rpc.mdcommd.
3950*11130SJames.Hall@Sun.COM 	 * This may occur when shutting the node down while there is still
3951*11130SJames.Hall@Sun.COM 	 * a mirror resync or metadevice state update occurring.
3952*11130SJames.Hall@Sun.COM 	 */
3953*11130SJames.Hall@Sun.COM 	result->kmmr_comm_state = MDMNE_RPC_FAIL;
3954*11130SJames.Hall@Sun.COM 	result->kmmr_exitval = ~0;
3955*11130SJames.Hall@Sun.COM 
39560Sstevel@tonic-gate 	if (size > MDMN_MAX_KMSG_DATA)
39570Sstevel@tonic-gate 		return (ENOMEM);
39580Sstevel@tonic-gate 	kmsg = kmem_zalloc(sizeof (md_mn_kmsg_t), KM_SLEEP);
39590Sstevel@tonic-gate 	kmsg->kmsg_flags = flags;
39600Sstevel@tonic-gate 	kmsg->kmsg_setno = setno;
39618452SJohn.Wren.Kennedy@Sun.COM 	kmsg->kmsg_recipient = recipient;
39620Sstevel@tonic-gate 	kmsg->kmsg_type	= type;
39630Sstevel@tonic-gate 	kmsg->kmsg_size	= size;
39640Sstevel@tonic-gate 	bcopy(data, &(kmsg->kmsg_data), size);
39650Sstevel@tonic-gate 
39660Sstevel@tonic-gate 	/*
39670Sstevel@tonic-gate 	 * Wait for the door handle to be established.
39680Sstevel@tonic-gate 	 */
39690Sstevel@tonic-gate 	while (mdmn_door_did == -1) {
39708452SJohn.Wren.Kennedy@Sun.COM 		if ((++retry_noise_cnt % MD_MN_WARN_INTVL) == 0) {
39710Sstevel@tonic-gate 			cmn_err(CE_WARN, "door handle not yet ready. "
39720Sstevel@tonic-gate 			    "Check if /usr/lib/lvm/mddoors is running");
39730Sstevel@tonic-gate 		}
39740Sstevel@tonic-gate 		delay(md_hz);
39750Sstevel@tonic-gate 	}
39760Sstevel@tonic-gate 
39770Sstevel@tonic-gate 	/*
39788452SJohn.Wren.Kennedy@Sun.COM 	 * If MD_MSGF_BLK_SIGNAL is set, mask out all signals so that we
39798452SJohn.Wren.Kennedy@Sun.COM 	 * do not fail if the user process receives a signal while we're
39808452SJohn.Wren.Kennedy@Sun.COM 	 * active in the door interface.
39818452SJohn.Wren.Kennedy@Sun.COM 	 */
39828452SJohn.Wren.Kennedy@Sun.COM 	if (flags & MD_MSGF_BLK_SIGNAL) {
39838452SJohn.Wren.Kennedy@Sun.COM 		sigfillset(&newmask);
39848452SJohn.Wren.Kennedy@Sun.COM 		sigreplace(&newmask, &oldmask);
39858452SJohn.Wren.Kennedy@Sun.COM 	}
39868452SJohn.Wren.Kennedy@Sun.COM 
39878452SJohn.Wren.Kennedy@Sun.COM 	/*
39880Sstevel@tonic-gate 	 * If message failed with an RPC_FAILURE when rpc.mdcommd had
39890Sstevel@tonic-gate 	 * been gracefully shutdown (md_mn_is_commd_present returns FALSE)
39900Sstevel@tonic-gate 	 * then don't retry the message anymore.  If message
39910Sstevel@tonic-gate 	 * failed due to any other reason, then retry up to MD_MN_WARN_INTVL
39920Sstevel@tonic-gate 	 * times which should allow a shutting down system time to
39930Sstevel@tonic-gate 	 * notify the kernel of a graceful shutdown of rpc.mdcommd.
39940Sstevel@tonic-gate 	 *
39950Sstevel@tonic-gate 	 * Caller of this routine will need to check the md_mn_commd_present
39960Sstevel@tonic-gate 	 * flag and the failure error in order to determine whether to panic
39970Sstevel@tonic-gate 	 * or not.  If md_mn_commd_present is set to 0 and failure error
39980Sstevel@tonic-gate 	 * is RPC_FAILURE, the calling routine should not panic since the
39990Sstevel@tonic-gate 	 * system is in the process of being shutdown.
40000Sstevel@tonic-gate 	 *
40010Sstevel@tonic-gate 	 */
40020Sstevel@tonic-gate 
40038452SJohn.Wren.Kennedy@Sun.COM 	retry_noise_cnt = send_try_cnt = 0;
40048452SJohn.Wren.Kennedy@Sun.COM 	while (md_mn_is_commd_present_lite()) {
40058452SJohn.Wren.Kennedy@Sun.COM 		/*
40068452SJohn.Wren.Kennedy@Sun.COM 		 * data_ptr and data_size are initialized here because on
40078452SJohn.Wren.Kennedy@Sun.COM 		 * return from the upcall, they contain data duplicated from
40088452SJohn.Wren.Kennedy@Sun.COM 		 * rbuf and rsize.  This causes subsequent upcalls to fail.
40098452SJohn.Wren.Kennedy@Sun.COM 		 */
40108452SJohn.Wren.Kennedy@Sun.COM 		da.data_ptr = (char *)(kmsg);
40118452SJohn.Wren.Kennedy@Sun.COM 		da.data_size = sizeof (md_mn_kmsg_t);
40128452SJohn.Wren.Kennedy@Sun.COM 		da.desc_ptr = NULL;
40138452SJohn.Wren.Kennedy@Sun.COM 		da.desc_num = 0;
40148452SJohn.Wren.Kennedy@Sun.COM 		da.rbuf = (char *)result;
40158452SJohn.Wren.Kennedy@Sun.COM 		da.rsize = sizeof (*result);
40168452SJohn.Wren.Kennedy@Sun.COM 
40178452SJohn.Wren.Kennedy@Sun.COM 		while ((rval = door_ki_upcall_limited(mdmn_door_handle, &da,
40188452SJohn.Wren.Kennedy@Sun.COM 		    NULL, SIZE_MAX, 0)) != 0) {
40198452SJohn.Wren.Kennedy@Sun.COM 			if ((++retry_noise_cnt % MD_MN_WARN_INTVL) == 0) {
40208452SJohn.Wren.Kennedy@Sun.COM 				if (rval == EAGAIN)  {
40218452SJohn.Wren.Kennedy@Sun.COM 					cmn_err(CE_WARN,
40228452SJohn.Wren.Kennedy@Sun.COM 					    "md: door_upcall failed. "
40238452SJohn.Wren.Kennedy@Sun.COM 					    "Check if mddoors is running.");
40248452SJohn.Wren.Kennedy@Sun.COM 				} else if (rval == EINTR) {
40258452SJohn.Wren.Kennedy@Sun.COM 					cmn_err(CE_WARN,
40268452SJohn.Wren.Kennedy@Sun.COM 					    "md: door_upcall failed. "
40278452SJohn.Wren.Kennedy@Sun.COM 					    "Check if rpc.mdcommd is running.");
40288452SJohn.Wren.Kennedy@Sun.COM 				} else {
40298452SJohn.Wren.Kennedy@Sun.COM 					cmn_err(CE_WARN,
40308452SJohn.Wren.Kennedy@Sun.COM 					    "md: door_upcall failed. "
40318452SJohn.Wren.Kennedy@Sun.COM 					    "Returned %d",
40328452SJohn.Wren.Kennedy@Sun.COM 					    rval);
40338452SJohn.Wren.Kennedy@Sun.COM 				}
40348452SJohn.Wren.Kennedy@Sun.COM 			}
40358452SJohn.Wren.Kennedy@Sun.COM 			if (++send_try_cnt >= md_send_retry_limit)
40360Sstevel@tonic-gate 				break;
40378452SJohn.Wren.Kennedy@Sun.COM 
40380Sstevel@tonic-gate 			delay(md_hz);
40398452SJohn.Wren.Kennedy@Sun.COM 
40408452SJohn.Wren.Kennedy@Sun.COM 			/*
40418452SJohn.Wren.Kennedy@Sun.COM 			 * data_ptr and data_size are re-initialized here
40428452SJohn.Wren.Kennedy@Sun.COM 			 * because on return from the upcall, they contain
40438452SJohn.Wren.Kennedy@Sun.COM 			 * data duplicated from rbuf and rsize.  This causes
40448452SJohn.Wren.Kennedy@Sun.COM 			 * subsequent upcalls to fail.
40458452SJohn.Wren.Kennedy@Sun.COM 			 */
40468452SJohn.Wren.Kennedy@Sun.COM 			da.data_ptr = (char *)(kmsg);
40478452SJohn.Wren.Kennedy@Sun.COM 			da.data_size = sizeof (md_mn_kmsg_t);
40488452SJohn.Wren.Kennedy@Sun.COM 			da.desc_ptr = NULL;
40498452SJohn.Wren.Kennedy@Sun.COM 			da.desc_num = 0;
40508452SJohn.Wren.Kennedy@Sun.COM 			da.rbuf = (char *)result;
40518452SJohn.Wren.Kennedy@Sun.COM 			da.rsize = sizeof (*result);
40520Sstevel@tonic-gate 		}
40538452SJohn.Wren.Kennedy@Sun.COM 
40548452SJohn.Wren.Kennedy@Sun.COM 
40558452SJohn.Wren.Kennedy@Sun.COM 		/*
40568452SJohn.Wren.Kennedy@Sun.COM 		 * If:
40578452SJohn.Wren.Kennedy@Sun.COM 		 * - the send succeeded (MDMNE_ACK)
40588452SJohn.Wren.Kennedy@Sun.COM 		 * - we had an MDMNE_RPC_FAIL and commd is now gone
40598452SJohn.Wren.Kennedy@Sun.COM 		 *   (note: since the outer loop is commd-dependent,
40608452SJohn.Wren.Kennedy@Sun.COM 		 *   checking MDMN_RPC_FAIL here is meaningless)
40618452SJohn.Wren.Kennedy@Sun.COM 		 * - we were told not to retry
40628452SJohn.Wren.Kennedy@Sun.COM 		 * - we exceeded the RPC failure send limit
40638452SJohn.Wren.Kennedy@Sun.COM 		 * punch out of the outer loop prior to the delay()
40648452SJohn.Wren.Kennedy@Sun.COM 		 */
40658452SJohn.Wren.Kennedy@Sun.COM 		if (result->kmmr_comm_state == MDMNE_ACK ||
40668452SJohn.Wren.Kennedy@Sun.COM 		    (flags & MD_MSGF_KSEND_NORETRY) ||
40678452SJohn.Wren.Kennedy@Sun.COM 		    (++send_try_cnt % md_send_retry_limit) == 0 ||
40688452SJohn.Wren.Kennedy@Sun.COM 		    !md_mn_is_commd_present())
40698452SJohn.Wren.Kennedy@Sun.COM 			break;
40708452SJohn.Wren.Kennedy@Sun.COM 		delay(md_hz);
40710Sstevel@tonic-gate 	}
40720Sstevel@tonic-gate 
40738452SJohn.Wren.Kennedy@Sun.COM 	if (flags & MD_MSGF_BLK_SIGNAL) {
40748452SJohn.Wren.Kennedy@Sun.COM 		sigreplace(&oldmask, (k_sigset_t *)NULL);
40758452SJohn.Wren.Kennedy@Sun.COM 	}
40768452SJohn.Wren.Kennedy@Sun.COM 	kmem_free(kmsg, sizeof (md_mn_kmsg_t));
40778452SJohn.Wren.Kennedy@Sun.COM 
40780Sstevel@tonic-gate 	return (0);
40790Sstevel@tonic-gate }
40800Sstevel@tonic-gate 
40810Sstevel@tonic-gate /*
40820Sstevel@tonic-gate  * Called to propagate the capability of a metadevice to all nodes in the set.
40830Sstevel@tonic-gate  *
40840Sstevel@tonic-gate  * On entry, lockp is set if the function has been called from within an ioctl.
40850Sstevel@tonic-gate  *
40860Sstevel@tonic-gate  * IOLOCK_RETURN_RELEASE, which drops the md_ioctl_lock is called in this
40870Sstevel@tonic-gate  * routine to enable other mdioctls to enter the kernel while this
40880Sstevel@tonic-gate  * thread of execution waits on the completion of mdmn_ksend_message. When
40890Sstevel@tonic-gate  * the message is completed the thread continues and md_ioctl_lock must be
40900Sstevel@tonic-gate  * reacquired.  Even though md_ioctl_lock is interruptable, we choose to
40910Sstevel@tonic-gate  * ignore EINTR as we must not return without acquiring md_ioctl_lock.
40920Sstevel@tonic-gate  */
40930Sstevel@tonic-gate 
40940Sstevel@tonic-gate int
mdmn_send_capability_message(minor_t mnum,volcap_t vc,IOLOCK * lockp)40950Sstevel@tonic-gate mdmn_send_capability_message(minor_t mnum, volcap_t vc, IOLOCK *lockp)
40960Sstevel@tonic-gate {
40970Sstevel@tonic-gate 	md_mn_msg_setcap_t	msg;
40980Sstevel@tonic-gate 	md_mn_kresult_t		*kres;
40990Sstevel@tonic-gate 	mdi_unit_t		*ui = MDI_UNIT(mnum);
41000Sstevel@tonic-gate 	int			ret;
41013073Sjkennedy 	k_sigset_t		oldmask, newmask;
41020Sstevel@tonic-gate 
41030Sstevel@tonic-gate 	(void) strncpy((char *)&msg.msg_setcap_driver,
41040Sstevel@tonic-gate 	    md_ops[ui->ui_opsindex]->md_driver.md_drivername, MD_DRIVERNAMELEN);
41050Sstevel@tonic-gate 	msg.msg_setcap_mnum = mnum;
41060Sstevel@tonic-gate 	msg.msg_setcap_set = vc.vc_set;
41070Sstevel@tonic-gate 
41080Sstevel@tonic-gate 	if (lockp)
41090Sstevel@tonic-gate 		IOLOCK_RETURN_RELEASE(0, lockp);
4110*11130SJames.Hall@Sun.COM 	kres = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP);
41113073Sjkennedy 
41123073Sjkennedy 	/*
41133073Sjkennedy 	 * Mask signals for the mdmd_ksend_message call.  This keeps the door
41143073Sjkennedy 	 * interface from failing if the user process receives a signal while
41153073Sjkennedy 	 * in mdmn_ksend_message.
41163073Sjkennedy 	 */
41173073Sjkennedy 	sigfillset(&newmask);
41183073Sjkennedy 	sigreplace(&newmask, &oldmask);
41190Sstevel@tonic-gate 	ret = (mdmn_ksend_message(MD_MIN2SET(mnum), MD_MN_MSG_SET_CAP,
41208452SJohn.Wren.Kennedy@Sun.COM 	    MD_MSGF_NO_LOG, 0, (char *)&msg, sizeof (md_mn_msg_setcap_t),
41210Sstevel@tonic-gate 	    kres));
41223073Sjkennedy 	sigreplace(&oldmask, (k_sigset_t *)NULL);
41233073Sjkennedy 
41240Sstevel@tonic-gate 	if (!MDMN_KSEND_MSG_OK(ret, kres)) {
41250Sstevel@tonic-gate 		mdmn_ksend_show_error(ret, kres, "MD_MN_MSG_SET_CAP");
41260Sstevel@tonic-gate 		ret = EIO;
41270Sstevel@tonic-gate 	}
41280Sstevel@tonic-gate 	kmem_free(kres, sizeof (md_mn_kresult_t));
41290Sstevel@tonic-gate 
41300Sstevel@tonic-gate 	if (lockp) {
41310Sstevel@tonic-gate 		IOLOCK_RETURN_REACQUIRE(lockp);
41320Sstevel@tonic-gate 	}
41330Sstevel@tonic-gate 	return (ret);
41340Sstevel@tonic-gate }
41350Sstevel@tonic-gate 
41360Sstevel@tonic-gate /*
41370Sstevel@tonic-gate  * Called to clear all of the transient capabilities for a metadevice when it is
41380Sstevel@tonic-gate  * not open on any node in the cluster
41390Sstevel@tonic-gate  * Called from close for mirror and sp.
41400Sstevel@tonic-gate  */
41410Sstevel@tonic-gate 
41420Sstevel@tonic-gate void
mdmn_clear_all_capabilities(minor_t mnum)41430Sstevel@tonic-gate mdmn_clear_all_capabilities(minor_t mnum)
41440Sstevel@tonic-gate {
41450Sstevel@tonic-gate 	md_isopen_t	clumsg;
41460Sstevel@tonic-gate 	int		ret;
41470Sstevel@tonic-gate 	md_mn_kresult_t	*kresult;
41480Sstevel@tonic-gate 	volcap_t	vc;
41493073Sjkennedy 	k_sigset_t	oldmask, newmask;
41500Sstevel@tonic-gate 
41510Sstevel@tonic-gate 	clumsg.dev = md_makedevice(md_major, mnum);
41520Sstevel@tonic-gate 	clumsg.mde = mdnullerror;
41530Sstevel@tonic-gate 	/*
41540Sstevel@tonic-gate 	 * The check open message doesn't have to be logged, nor should the
41550Sstevel@tonic-gate 	 * result be stored in the MCT. We want an up-to-date state.
41560Sstevel@tonic-gate 	 */
4157*11130SJames.Hall@Sun.COM 	kresult = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP);
41583073Sjkennedy 
41593073Sjkennedy 	/*
41603073Sjkennedy 	 * Mask signals for the mdmd_ksend_message call.  This keeps the door
41613073Sjkennedy 	 * interface from failing if the user process receives a signal while
41623073Sjkennedy 	 * in mdmn_ksend_message.
41633073Sjkennedy 	 */
41643073Sjkennedy 	sigfillset(&newmask);
41653073Sjkennedy 	sigreplace(&newmask, &oldmask);
41660Sstevel@tonic-gate 	ret = mdmn_ksend_message(MD_MIN2SET(mnum),
41670Sstevel@tonic-gate 	    MD_MN_MSG_CLU_CHECK,
41688452SJohn.Wren.Kennedy@Sun.COM 	    MD_MSGF_STOP_ON_ERROR | MD_MSGF_NO_LOG | MD_MSGF_NO_MCT, 0,
41690Sstevel@tonic-gate 	    (char *)&clumsg, sizeof (clumsg), kresult);
41703073Sjkennedy 	sigreplace(&oldmask, (k_sigset_t *)NULL);
41713073Sjkennedy 
41720Sstevel@tonic-gate 	if ((ret == 0) && (kresult->kmmr_exitval == 0)) {
41730Sstevel@tonic-gate 		/*
41740Sstevel@tonic-gate 		 * Not open on any node, clear all capabilities, eg ABR and
41750Sstevel@tonic-gate 		 * DMR
41760Sstevel@tonic-gate 		 */
41770Sstevel@tonic-gate 		vc.vc_set = 0;
41780Sstevel@tonic-gate 		(void) mdmn_send_capability_message(mnum, vc, NULL);
41790Sstevel@tonic-gate 	}
41800Sstevel@tonic-gate 	kmem_free(kresult, sizeof (md_mn_kresult_t));
41810Sstevel@tonic-gate }
41820Sstevel@tonic-gate 
41830Sstevel@tonic-gate /*
41840Sstevel@tonic-gate  * mdmn_ksend_show_error:
41850Sstevel@tonic-gate  * ---------------------
41860Sstevel@tonic-gate  * Called to display the error contents of a failing mdmn_ksend_message() result
41870Sstevel@tonic-gate  *
41880Sstevel@tonic-gate  * Input:
41890Sstevel@tonic-gate  *	rv	- return value from mdmn_ksend_message()
41900Sstevel@tonic-gate  *	kres	- pointer to result structure filled in by mdmn_ksend_message
41910Sstevel@tonic-gate  *	s	- Informative message to identify failing condition (e.g.
41920Sstevel@tonic-gate  *		  "Ownership change") This string will be displayed with
41930Sstevel@tonic-gate  *		  cmn_err(CE_WARN, "%s *FAILED*",...) to alert the system
41940Sstevel@tonic-gate  *		  administrator
41950Sstevel@tonic-gate  */
41960Sstevel@tonic-gate void
mdmn_ksend_show_error(int rv,md_mn_kresult_t * kres,const char * s)41970Sstevel@tonic-gate mdmn_ksend_show_error(int rv, md_mn_kresult_t *kres, const char *s)
41980Sstevel@tonic-gate {
41990Sstevel@tonic-gate 	if (rv == 0) {
42000Sstevel@tonic-gate 		cmn_err(CE_WARN, "%s *FAILED*", s);
42010Sstevel@tonic-gate 		cmn_err(CE_CONT, "exit_val = %d, comm_state = %d, failing_node"
42020Sstevel@tonic-gate 		    " = %d", kres->kmmr_exitval, kres->kmmr_comm_state,
42030Sstevel@tonic-gate 		    kres->kmmr_failing_node);
42040Sstevel@tonic-gate 	} else {
42050Sstevel@tonic-gate 		cmn_err(CE_WARN, "%s *FAILED*, return value = %d", s, rv);
42060Sstevel@tonic-gate 	}
42070Sstevel@tonic-gate }
42080Sstevel@tonic-gate 
42090Sstevel@tonic-gate /*
42100Sstevel@tonic-gate  * Callback routine for resync thread. If requested to suspend we mark the
42110Sstevel@tonic-gate  * commd as not being present.
42120Sstevel@tonic-gate  */
42130Sstevel@tonic-gate boolean_t
callb_md_mrs_cpr(void * arg,int code)42140Sstevel@tonic-gate callb_md_mrs_cpr(void *arg, int code)
42150Sstevel@tonic-gate {
42160Sstevel@tonic-gate 	callb_cpr_t *cp = (callb_cpr_t *)arg;
42170Sstevel@tonic-gate 	int ret = 0;				/* assume success */
421811066Srafael.vanoni@sun.com 	clock_t delta;
42190Sstevel@tonic-gate 
42200Sstevel@tonic-gate 	mutex_enter(cp->cc_lockp);
42210Sstevel@tonic-gate 
42220Sstevel@tonic-gate 	switch (code) {
42230Sstevel@tonic-gate 	case CB_CODE_CPR_CHKPT:
42240Sstevel@tonic-gate 		/*
42250Sstevel@tonic-gate 		 * Mark the rpc.mdcommd as no longer present. We are trying to
42260Sstevel@tonic-gate 		 * suspend the system and so we should expect RPC failures to
42270Sstevel@tonic-gate 		 * occur.
42280Sstevel@tonic-gate 		 */
42290Sstevel@tonic-gate 		md_mn_clear_commd_present();
42300Sstevel@tonic-gate 		cp->cc_events |= CALLB_CPR_START;
423111066Srafael.vanoni@sun.com 		delta = CPR_KTHREAD_TIMEOUT_SEC * hz;
42320Sstevel@tonic-gate 		while (!(cp->cc_events & CALLB_CPR_SAFE))
42330Sstevel@tonic-gate 			/* cv_timedwait() returns -1 if it times out. */
423411066Srafael.vanoni@sun.com 			if ((ret = cv_reltimedwait(&cp->cc_callb_cv,
423511066Srafael.vanoni@sun.com 			    cp->cc_lockp, delta, TR_CLOCK_TICK)) == -1)
42360Sstevel@tonic-gate 				break;
42370Sstevel@tonic-gate 			break;
42380Sstevel@tonic-gate 
42390Sstevel@tonic-gate 	case CB_CODE_CPR_RESUME:
42400Sstevel@tonic-gate 		cp->cc_events &= ~CALLB_CPR_START;
42410Sstevel@tonic-gate 		cv_signal(&cp->cc_stop_cv);
42420Sstevel@tonic-gate 		break;
42430Sstevel@tonic-gate 	}
42440Sstevel@tonic-gate 	mutex_exit(cp->cc_lockp);
42450Sstevel@tonic-gate 	return (ret != -1);
42460Sstevel@tonic-gate }
42471623Stw21770 
42481623Stw21770 
42491623Stw21770 void
md_rem_hspname(set_t setno,mdkey_t n_key)42501623Stw21770 md_rem_hspname(set_t setno, mdkey_t n_key)
42511623Stw21770 {
42521623Stw21770 	int	s;
42531623Stw21770 	int	max_sides;
42541623Stw21770 
42551623Stw21770 
42561623Stw21770 	/* All entries removed are in the same diskset */
42571623Stw21770 	if (md_get_setstatus(setno) & MD_SET_MNSET)
42581623Stw21770 		max_sides = MD_MNMAXSIDES;
42591623Stw21770 	else
42601623Stw21770 		max_sides = MD_MAXSIDES;
42611623Stw21770 
42621623Stw21770 	for (s = 0; s < max_sides; s++)
42631623Stw21770 		(void) md_remdevname(setno, s, n_key);
42641623Stw21770 }
42651623Stw21770 
42661623Stw21770 
42671623Stw21770 int
md_rem_selfname(minor_t selfid)42681623Stw21770 md_rem_selfname(minor_t selfid)
42691623Stw21770 {
42701623Stw21770 	int	s;
42711623Stw21770 	set_t	setno = MD_MIN2SET(selfid);
42721623Stw21770 	int	max_sides;
42731623Stw21770 	md_dev64_t	dev;
42741623Stw21770 	struct nm_next_hdr	*nh;
42751623Stw21770 	struct nm_name	*n;
42761623Stw21770 	mdkey_t key;
42771623Stw21770 
42781623Stw21770 	/*
42791623Stw21770 	 * Get the key since remove routine expects it
42801623Stw21770 	 */
42811623Stw21770 	dev = md_makedevice(md_major, selfid);
42821623Stw21770 	if ((nh = get_first_record(setno, 0, NM_NOTSHARED)) == NULL) {
42831623Stw21770 		return (ENOENT);
42841623Stw21770 	}
42851623Stw21770 
42861623Stw21770 	if ((n = (struct nm_name *)lookup_entry(nh, setno, MD_SIDEWILD,
42877563SPrasad.Singamsetty@Sun.COM 	    MD_KEYWILD, dev, 0L)) == NULL) {
42881623Stw21770 		return (ENOENT);
42891623Stw21770 	}
42901623Stw21770 
42911623Stw21770 	/* All entries removed are in the same diskset */
42921623Stw21770 	key = n->n_key;
42931623Stw21770 	if (md_get_setstatus(setno) & MD_SET_MNSET)
42941623Stw21770 		max_sides = MD_MNMAXSIDES;
42951623Stw21770 	else
42961623Stw21770 		max_sides = MD_MAXSIDES;
42971623Stw21770 
42981623Stw21770 	for (s = 0; s < max_sides; s++)
42991623Stw21770 		(void) md_remdevname(setno, s, key);
43001623Stw21770 
43011623Stw21770 	return (0);
43021623Stw21770 }
43031623Stw21770 
43041623Stw21770 void
md_upd_set_unnext(set_t setno,unit_t un)43051623Stw21770 md_upd_set_unnext(set_t setno, unit_t un)
43061623Stw21770 {
43071623Stw21770 	if (un < md_set[setno].s_un_next) {
43081623Stw21770 		md_set[setno].s_un_next = un;
43091623Stw21770 	}
43101623Stw21770 }
43111623Stw21770 
43121623Stw21770 struct hot_spare_pool *
find_hot_spare_pool(set_t setno,int hsp_id)43131623Stw21770 find_hot_spare_pool(set_t setno, int hsp_id)
43141623Stw21770 {
43151623Stw21770 	hot_spare_pool_t *hsp;
43161623Stw21770 
43171623Stw21770 	hsp = (hot_spare_pool_t *)md_set[setno].s_hsp;
43181623Stw21770 	while (hsp != NULL) {
43191623Stw21770 		if (hsp->hsp_self_id == hsp_id)
43201623Stw21770 			return (hsp);
43211623Stw21770 		hsp = hsp->hsp_next;
43221623Stw21770 	}
43231623Stw21770 
43241623Stw21770 	return ((hot_spare_pool_t *)0);
43251623Stw21770 }
43268452SJohn.Wren.Kennedy@Sun.COM 
43278452SJohn.Wren.Kennedy@Sun.COM /*
43288452SJohn.Wren.Kennedy@Sun.COM  * md_create_taskq:
43298452SJohn.Wren.Kennedy@Sun.COM  *
43308452SJohn.Wren.Kennedy@Sun.COM  * Create a kernel taskq for the given set/unit combination. This is typically
43318452SJohn.Wren.Kennedy@Sun.COM  * used to complete a RR_CLEAN request when the callee is unable to obtain the
43328452SJohn.Wren.Kennedy@Sun.COM  * mutex / condvar access required to update the DRL safely.
43338452SJohn.Wren.Kennedy@Sun.COM  */
43348452SJohn.Wren.Kennedy@Sun.COM void *
md_create_taskq(set_t setno,minor_t mnum)43358452SJohn.Wren.Kennedy@Sun.COM md_create_taskq(set_t setno, minor_t mnum)
43368452SJohn.Wren.Kennedy@Sun.COM {
43378452SJohn.Wren.Kennedy@Sun.COM 	char			name[20];
43388452SJohn.Wren.Kennedy@Sun.COM 	ddi_taskq_t		*tqp;
43398452SJohn.Wren.Kennedy@Sun.COM 
43408452SJohn.Wren.Kennedy@Sun.COM 	(void) snprintf(name, 20, "%d/d%d", setno, MD_MIN2UNIT(mnum));
43418452SJohn.Wren.Kennedy@Sun.COM 
43428452SJohn.Wren.Kennedy@Sun.COM 	tqp = ddi_taskq_create(md_devinfo, name, 1, TASKQ_DEFAULTPRI, 0);
43438452SJohn.Wren.Kennedy@Sun.COM 
43448452SJohn.Wren.Kennedy@Sun.COM 	return ((void *)tqp);
43458452SJohn.Wren.Kennedy@Sun.COM }
4346