10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
51623Stw21770 * Common Development and Distribution License (the "License").
61623Stw21770 * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
217563SPrasad.Singamsetty@Sun.COM
220Sstevel@tonic-gate /*
2310549SAchim.Maurer@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
240Sstevel@tonic-gate * Use is subject to license terms.
250Sstevel@tonic-gate */
260Sstevel@tonic-gate
270Sstevel@tonic-gate /*
280Sstevel@tonic-gate * Driver for Virtual Disk.
290Sstevel@tonic-gate */
300Sstevel@tonic-gate #include <sys/param.h>
310Sstevel@tonic-gate #include <sys/systm.h>
320Sstevel@tonic-gate #include <sys/buf.h>
330Sstevel@tonic-gate #include <sys/conf.h>
340Sstevel@tonic-gate #include <sys/user.h>
350Sstevel@tonic-gate #include <sys/uio.h>
360Sstevel@tonic-gate #include <sys/proc.h>
370Sstevel@tonic-gate #include <sys/t_lock.h>
380Sstevel@tonic-gate #include <sys/dkio.h>
390Sstevel@tonic-gate #include <sys/kmem.h>
400Sstevel@tonic-gate #include <sys/debug.h>
410Sstevel@tonic-gate #include <sys/cmn_err.h>
420Sstevel@tonic-gate #include <sys/sysmacros.h>
430Sstevel@tonic-gate #include <sys/types.h>
440Sstevel@tonic-gate #include <sys/mkdev.h>
450Sstevel@tonic-gate #include <sys/vtoc.h>
460Sstevel@tonic-gate #include <sys/open.h>
470Sstevel@tonic-gate #include <sys/file.h>
480Sstevel@tonic-gate #include <vm/page.h>
490Sstevel@tonic-gate #include <sys/callb.h>
500Sstevel@tonic-gate #include <sys/disp.h>
510Sstevel@tonic-gate #include <sys/modctl.h>
520Sstevel@tonic-gate #include <sys/errno.h>
530Sstevel@tonic-gate #include <sys/door.h>
540Sstevel@tonic-gate #include <sys/lvm/mdmn_commd.h>
551623Stw21770 #include <sys/lvm/md_hotspares.h>
560Sstevel@tonic-gate
570Sstevel@tonic-gate #include <sys/lvm/mdvar.h>
580Sstevel@tonic-gate #include <sys/lvm/md_names.h>
590Sstevel@tonic-gate
600Sstevel@tonic-gate #include <sys/ddi.h>
610Sstevel@tonic-gate #include <sys/proc.h>
620Sstevel@tonic-gate #include <sys/sunddi.h>
630Sstevel@tonic-gate #include <sys/esunddi.h>
640Sstevel@tonic-gate
650Sstevel@tonic-gate #include <sys/sysevent.h>
660Sstevel@tonic-gate #include <sys/sysevent/eventdefs.h>
670Sstevel@tonic-gate
680Sstevel@tonic-gate #include <sys/sysevent/svm.h>
69734Smw145384 #include <sys/lvm/md_basic.h>
70734Smw145384
710Sstevel@tonic-gate
720Sstevel@tonic-gate /*
730Sstevel@tonic-gate * Machine specific Hertz is kept here
740Sstevel@tonic-gate */
750Sstevel@tonic-gate extern clock_t md_hz;
760Sstevel@tonic-gate
770Sstevel@tonic-gate /*
780Sstevel@tonic-gate * Externs.
790Sstevel@tonic-gate */
800Sstevel@tonic-gate extern int (*mdv_strategy_tstpnt)(buf_t *, int, void*);
810Sstevel@tonic-gate extern major_t md_major;
820Sstevel@tonic-gate extern unit_t md_nunits;
830Sstevel@tonic-gate extern set_t md_nsets;
840Sstevel@tonic-gate extern md_set_t md_set[];
850Sstevel@tonic-gate extern md_set_io_t md_set_io[];
860Sstevel@tonic-gate extern md_ops_t **md_ops;
870Sstevel@tonic-gate extern md_ops_t *md_opslist;
880Sstevel@tonic-gate extern ddi_modhandle_t *md_mods;
898452SJohn.Wren.Kennedy@Sun.COM extern dev_info_t *md_devinfo;
900Sstevel@tonic-gate
910Sstevel@tonic-gate extern md_krwlock_t md_unit_array_rw;
920Sstevel@tonic-gate extern kmutex_t md_mx;
930Sstevel@tonic-gate extern kcondvar_t md_cv;
940Sstevel@tonic-gate
950Sstevel@tonic-gate extern md_krwlock_t hsp_rwlp;
960Sstevel@tonic-gate extern md_krwlock_t ni_rwlp;
970Sstevel@tonic-gate
980Sstevel@tonic-gate extern int md_num_daemons;
990Sstevel@tonic-gate extern int md_status;
1000Sstevel@tonic-gate extern int md_ioctl_cnt;
1010Sstevel@tonic-gate extern int md_mtioctl_cnt;
1020Sstevel@tonic-gate
1030Sstevel@tonic-gate extern struct metatransops metatransops;
1040Sstevel@tonic-gate extern md_event_queue_t *md_event_queue;
1050Sstevel@tonic-gate extern md_resync_t md_cpr_resync;
1060Sstevel@tonic-gate extern int md_done_daemon_threads;
1070Sstevel@tonic-gate extern int md_ff_daemon_threads;
1080Sstevel@tonic-gate
1090Sstevel@tonic-gate
1100Sstevel@tonic-gate extern mddb_set_t *mddb_setenter(set_t setno, int flag, int *errorcodep);
1110Sstevel@tonic-gate extern void mddb_setexit(mddb_set_t *s);
1121623Stw21770 extern void *lookup_entry(struct nm_next_hdr *, set_t,
1131623Stw21770 side_t, mdkey_t, md_dev64_t, int);
1141623Stw21770 extern struct nm_next_hdr *get_first_record(set_t, int, int);
11510549SAchim.Maurer@Sun.COM extern dev_t getrootdev(void);
1160Sstevel@tonic-gate
1170Sstevel@tonic-gate struct mdq_anchor md_done_daemon; /* done request queue */
1188452SJohn.Wren.Kennedy@Sun.COM struct mdq_anchor md_mstr_daemon; /* mirror error, WOW requests */
1190Sstevel@tonic-gate struct mdq_anchor md_mhs_daemon; /* mirror hotspare requests queue */
1200Sstevel@tonic-gate struct mdq_anchor md_hs_daemon; /* raid hotspare requests queue */
1210Sstevel@tonic-gate struct mdq_anchor md_ff_daemonq; /* failfast request queue */
1220Sstevel@tonic-gate struct mdq_anchor md_mirror_daemon; /* mirror owner queue */
1230Sstevel@tonic-gate struct mdq_anchor md_mirror_io_daemon; /* mirror owner i/o queue */
1240Sstevel@tonic-gate struct mdq_anchor md_mirror_rs_daemon; /* mirror resync done queue */
1250Sstevel@tonic-gate struct mdq_anchor md_sp_daemon; /* soft-part error daemon queue */
1268452SJohn.Wren.Kennedy@Sun.COM struct mdq_anchor md_mto_daemon; /* mirror timeout daemon queue */
1270Sstevel@tonic-gate
1280Sstevel@tonic-gate int md_done_daemon_threads = 1; /* threads for md_done_daemon requestq */
1290Sstevel@tonic-gate int md_mstr_daemon_threads = 1; /* threads for md_mstr_daemon requestq */
1300Sstevel@tonic-gate int md_mhs_daemon_threads = 1; /* threads for md_mhs_daemon requestq */
1310Sstevel@tonic-gate int md_hs_daemon_threads = 1; /* threads for md_hs_daemon requestq */
1320Sstevel@tonic-gate int md_ff_daemon_threads = 3; /* threads for md_ff_daemon requestq */
1330Sstevel@tonic-gate int md_mirror_daemon_threads = 1; /* threads for md_mirror_daemon requestq */
1340Sstevel@tonic-gate int md_sp_daemon_threads = 1; /* threads for md_sp_daemon requestq */
1358452SJohn.Wren.Kennedy@Sun.COM int md_mto_daemon_threads = 1; /* threads for md_mto_daemon requestq */
1360Sstevel@tonic-gate
1370Sstevel@tonic-gate #ifdef DEBUG
1380Sstevel@tonic-gate /* Flag to switch on debug messages */
1390Sstevel@tonic-gate int md_release_reacquire_debug = 0; /* debug flag */
1400Sstevel@tonic-gate #endif
1410Sstevel@tonic-gate
1420Sstevel@tonic-gate /*
1430Sstevel@tonic-gate *
1440Sstevel@tonic-gate * The md_request_queues is table of pointers to request queues and the number
1450Sstevel@tonic-gate * of threads associated with the request queues.
1460Sstevel@tonic-gate * When the number of threads is set to 1, then the order of execution is
1470Sstevel@tonic-gate * sequential.
1480Sstevel@tonic-gate * The number of threads for all the queues have been defined as global
1490Sstevel@tonic-gate * variables to enable kernel tuning.
1500Sstevel@tonic-gate *
1510Sstevel@tonic-gate */
1520Sstevel@tonic-gate
1538452SJohn.Wren.Kennedy@Sun.COM #define MD_DAEMON_QUEUES 11
1540Sstevel@tonic-gate
1550Sstevel@tonic-gate md_requestq_entry_t md_daemon_queues[MD_DAEMON_QUEUES] = {
1560Sstevel@tonic-gate {&md_done_daemon, &md_done_daemon_threads},
1570Sstevel@tonic-gate {&md_mstr_daemon, &md_mstr_daemon_threads},
1580Sstevel@tonic-gate {&md_hs_daemon, &md_hs_daemon_threads},
1590Sstevel@tonic-gate {&md_ff_daemonq, &md_ff_daemon_threads},
1600Sstevel@tonic-gate {&md_mirror_daemon, &md_mirror_daemon_threads},
1610Sstevel@tonic-gate {&md_mirror_io_daemon, &md_mirror_daemon_threads},
1620Sstevel@tonic-gate {&md_mirror_rs_daemon, &md_mirror_daemon_threads},
1630Sstevel@tonic-gate {&md_sp_daemon, &md_sp_daemon_threads},
1640Sstevel@tonic-gate {&md_mhs_daemon, &md_mhs_daemon_threads},
1658452SJohn.Wren.Kennedy@Sun.COM {&md_mto_daemon, &md_mto_daemon_threads},
1660Sstevel@tonic-gate {0, 0}
1670Sstevel@tonic-gate };
1680Sstevel@tonic-gate
1690Sstevel@tonic-gate /*
1700Sstevel@tonic-gate * Number of times a message is retried before issuing a warning to the operator
1710Sstevel@tonic-gate */
1720Sstevel@tonic-gate #define MD_MN_WARN_INTVL 10
1730Sstevel@tonic-gate
1740Sstevel@tonic-gate /*
1750Sstevel@tonic-gate * Setting retry cnt to one (pre decremented) so that we actually do no
1760Sstevel@tonic-gate * retries when committing/deleting a mddb rec. The underlying disk driver
1770Sstevel@tonic-gate * does several retries to check if the disk is really dead or not so there
1780Sstevel@tonic-gate * is no reason for us to retry on top of the drivers retries.
1790Sstevel@tonic-gate */
1800Sstevel@tonic-gate
1810Sstevel@tonic-gate uint_t md_retry_cnt = 1; /* global so it can be patched */
1820Sstevel@tonic-gate
1830Sstevel@tonic-gate /*
1848452SJohn.Wren.Kennedy@Sun.COM * How many times to try to do the door_ki_upcall() in mdmn_ksend_message.
1858452SJohn.Wren.Kennedy@Sun.COM * Again, made patchable here should it prove useful.
1868452SJohn.Wren.Kennedy@Sun.COM */
1878452SJohn.Wren.Kennedy@Sun.COM uint_t md_send_retry_limit = 30;
1888452SJohn.Wren.Kennedy@Sun.COM
1898452SJohn.Wren.Kennedy@Sun.COM /*
1900Sstevel@tonic-gate * Bug # 1212146
1910Sstevel@tonic-gate * Before this change the user had to pass in a short aligned buffer because of
1920Sstevel@tonic-gate * problems in some underlying device drivers. This problem seems to have been
1930Sstevel@tonic-gate * corrected in the underlying drivers so we will default to not requiring any
1940Sstevel@tonic-gate * alignment. If the user needs to check for a specific alignment,
1950Sstevel@tonic-gate * md_uio_alignment_mask may be set in /etc/system to accomplish this. To get
1960Sstevel@tonic-gate * the behavior before this fix, the md_uio_alignment_mask would be set to 1,
1970Sstevel@tonic-gate * to check for word alignment, it can be set to 3, for double word alignment,
1980Sstevel@tonic-gate * it can be set to 7, etc.
1990Sstevel@tonic-gate *
2000Sstevel@tonic-gate * [Other part of fix is in function md_chk_uio()]
2010Sstevel@tonic-gate */
2020Sstevel@tonic-gate static int md_uio_alignment_mask = 0;
2030Sstevel@tonic-gate
2040Sstevel@tonic-gate /*
2050Sstevel@tonic-gate * for md_dev64_t translation
2060Sstevel@tonic-gate */
2070Sstevel@tonic-gate struct md_xlate_table *md_tuple_table;
2080Sstevel@tonic-gate struct md_xlate_major_table *md_major_tuple_table;
2090Sstevel@tonic-gate int md_tuple_length;
2100Sstevel@tonic-gate uint_t md_majortab_len;
2110Sstevel@tonic-gate
2120Sstevel@tonic-gate /* Function declarations */
2130Sstevel@tonic-gate
2140Sstevel@tonic-gate static int md_create_probe_rqlist(md_probedev_impl_t *plist,
2150Sstevel@tonic-gate daemon_queue_t **hdr, intptr_t (*probe_test)());
2160Sstevel@tonic-gate
2170Sstevel@tonic-gate /*
2180Sstevel@tonic-gate * manipulate global status
2190Sstevel@tonic-gate */
2200Sstevel@tonic-gate void
md_set_status(int bits)2210Sstevel@tonic-gate md_set_status(int bits)
2220Sstevel@tonic-gate {
2230Sstevel@tonic-gate mutex_enter(&md_mx);
2240Sstevel@tonic-gate md_status |= bits;
2250Sstevel@tonic-gate mutex_exit(&md_mx);
2260Sstevel@tonic-gate }
2270Sstevel@tonic-gate
2280Sstevel@tonic-gate void
md_clr_status(int bits)2290Sstevel@tonic-gate md_clr_status(int bits)
2300Sstevel@tonic-gate {
2310Sstevel@tonic-gate mutex_enter(&md_mx);
2320Sstevel@tonic-gate md_status &= ~bits;
2330Sstevel@tonic-gate mutex_exit(&md_mx);
2340Sstevel@tonic-gate }
2350Sstevel@tonic-gate
2360Sstevel@tonic-gate int
md_get_status()2370Sstevel@tonic-gate md_get_status()
2380Sstevel@tonic-gate {
2390Sstevel@tonic-gate int result;
2400Sstevel@tonic-gate mutex_enter(&md_mx);
2410Sstevel@tonic-gate result = md_status;
2420Sstevel@tonic-gate mutex_exit(&md_mx);
2430Sstevel@tonic-gate return (result);
2440Sstevel@tonic-gate }
2450Sstevel@tonic-gate
2460Sstevel@tonic-gate void
md_set_setstatus(set_t setno,int bits)2470Sstevel@tonic-gate md_set_setstatus(set_t setno, int bits)
2480Sstevel@tonic-gate {
2490Sstevel@tonic-gate ASSERT(setno != MD_SET_BAD && setno < MD_MAXSETS);
2500Sstevel@tonic-gate
2510Sstevel@tonic-gate mutex_enter(&md_mx);
2520Sstevel@tonic-gate md_set[setno].s_status |= bits;
2530Sstevel@tonic-gate mutex_exit(&md_mx);
2540Sstevel@tonic-gate }
2550Sstevel@tonic-gate
2560Sstevel@tonic-gate void
md_clr_setstatus(set_t setno,int bits)2570Sstevel@tonic-gate md_clr_setstatus(set_t setno, int bits)
2580Sstevel@tonic-gate {
2590Sstevel@tonic-gate ASSERT(setno != MD_SET_BAD && setno < MD_MAXSETS);
2600Sstevel@tonic-gate
2610Sstevel@tonic-gate mutex_enter(&md_mx);
2620Sstevel@tonic-gate md_set[setno].s_status &= ~bits;
2630Sstevel@tonic-gate mutex_exit(&md_mx);
2640Sstevel@tonic-gate }
2650Sstevel@tonic-gate
2660Sstevel@tonic-gate uint_t
md_get_setstatus(set_t setno)2670Sstevel@tonic-gate md_get_setstatus(set_t setno)
2680Sstevel@tonic-gate {
2690Sstevel@tonic-gate uint_t result;
2700Sstevel@tonic-gate
2710Sstevel@tonic-gate ASSERT(setno != MD_SET_BAD && setno < MD_MAXSETS);
2720Sstevel@tonic-gate
2730Sstevel@tonic-gate mutex_enter(&md_mx);
2740Sstevel@tonic-gate result = md_set[setno].s_status;
2750Sstevel@tonic-gate mutex_exit(&md_mx);
2760Sstevel@tonic-gate return (result);
2770Sstevel@tonic-gate }
2780Sstevel@tonic-gate
2790Sstevel@tonic-gate /*
2800Sstevel@tonic-gate * md_unit_readerlock_common:
2810Sstevel@tonic-gate * -------------------------
2820Sstevel@tonic-gate * Mark the given unit as having a reader reference. Spin waiting for any
2830Sstevel@tonic-gate * writer references to be released.
2840Sstevel@tonic-gate *
2850Sstevel@tonic-gate * Input:
2860Sstevel@tonic-gate * ui unit reference
2870Sstevel@tonic-gate * lock_held 0 => ui_mx needs to be grabbed
2880Sstevel@tonic-gate * 1 => ui_mx already held
2890Sstevel@tonic-gate * Output:
2900Sstevel@tonic-gate * mm_unit_t corresponding to unit structure
2910Sstevel@tonic-gate * ui->ui_readercnt incremented
2920Sstevel@tonic-gate */
2930Sstevel@tonic-gate static void *
md_unit_readerlock_common(mdi_unit_t * ui,int lock_held)2940Sstevel@tonic-gate md_unit_readerlock_common(mdi_unit_t *ui, int lock_held)
2950Sstevel@tonic-gate {
2960Sstevel@tonic-gate uint_t flag = MD_UL_WRITER | MD_UL_WANABEWRITER;
2970Sstevel@tonic-gate
2980Sstevel@tonic-gate if (!lock_held)
2990Sstevel@tonic-gate mutex_enter(&ui->ui_mx);
3000Sstevel@tonic-gate while (ui->ui_lock & flag) {
3010Sstevel@tonic-gate if (panicstr) {
3020Sstevel@tonic-gate if (ui->ui_lock & MD_UL_WRITER)
3030Sstevel@tonic-gate panic("md: writer lock is held");
3040Sstevel@tonic-gate break;
3050Sstevel@tonic-gate }
3060Sstevel@tonic-gate cv_wait(&ui->ui_cv, &ui->ui_mx);
3070Sstevel@tonic-gate }
3080Sstevel@tonic-gate ui->ui_readercnt++;
3090Sstevel@tonic-gate if (!lock_held)
3100Sstevel@tonic-gate mutex_exit(&ui->ui_mx);
3110Sstevel@tonic-gate return (MD_UNIT(ui->ui_link.ln_id));
3120Sstevel@tonic-gate }
3130Sstevel@tonic-gate
3140Sstevel@tonic-gate void *
md_unit_readerlock(mdi_unit_t * ui)3150Sstevel@tonic-gate md_unit_readerlock(mdi_unit_t *ui)
3160Sstevel@tonic-gate {
3170Sstevel@tonic-gate return (md_unit_readerlock_common(ui, 0));
3180Sstevel@tonic-gate }
3190Sstevel@tonic-gate
3200Sstevel@tonic-gate /*
3210Sstevel@tonic-gate * md_unit_writerlock_common:
3220Sstevel@tonic-gate * -------------------------
3230Sstevel@tonic-gate * Acquire a unique writer reference. Causes previous readers to drain.
3240Sstevel@tonic-gate * Spins if a writer reference already exists or if a previous reader/writer
3250Sstevel@tonic-gate * dropped the lock to allow a ksend_message to be despatched.
3260Sstevel@tonic-gate *
3270Sstevel@tonic-gate * Input:
3280Sstevel@tonic-gate * ui unit reference
3290Sstevel@tonic-gate * lock_held 0 => grab ui_mx
3300Sstevel@tonic-gate * 1 => ui_mx already held on entry
3310Sstevel@tonic-gate * Output:
3320Sstevel@tonic-gate * mm_unit_t reference
3330Sstevel@tonic-gate */
3340Sstevel@tonic-gate static void *
md_unit_writerlock_common(mdi_unit_t * ui,int lock_held)3350Sstevel@tonic-gate md_unit_writerlock_common(mdi_unit_t *ui, int lock_held)
3360Sstevel@tonic-gate {
3370Sstevel@tonic-gate uint_t flag = MD_UL_WRITER;
3380Sstevel@tonic-gate
3390Sstevel@tonic-gate if (panicstr)
3400Sstevel@tonic-gate panic("md: writer lock not allowed");
3410Sstevel@tonic-gate
3420Sstevel@tonic-gate if (!lock_held)
3430Sstevel@tonic-gate mutex_enter(&ui->ui_mx);
3440Sstevel@tonic-gate
3450Sstevel@tonic-gate while ((ui->ui_lock & flag) || (ui->ui_readercnt != 0)) {
3460Sstevel@tonic-gate ui->ui_wanabecnt++;
3470Sstevel@tonic-gate ui->ui_lock |= MD_UL_WANABEWRITER;
3480Sstevel@tonic-gate cv_wait(&ui->ui_cv, &ui->ui_mx);
3490Sstevel@tonic-gate if (--ui->ui_wanabecnt == 0)
3500Sstevel@tonic-gate ui->ui_lock &= ~MD_UL_WANABEWRITER;
3510Sstevel@tonic-gate }
3520Sstevel@tonic-gate ui->ui_lock |= MD_UL_WRITER;
3530Sstevel@tonic-gate ui->ui_owner = curthread;
3540Sstevel@tonic-gate
3550Sstevel@tonic-gate if (!lock_held)
3560Sstevel@tonic-gate mutex_exit(&ui->ui_mx);
3570Sstevel@tonic-gate return (MD_UNIT(ui->ui_link.ln_id));
3580Sstevel@tonic-gate }
3590Sstevel@tonic-gate
3600Sstevel@tonic-gate void *
md_unit_writerlock(mdi_unit_t * ui)3610Sstevel@tonic-gate md_unit_writerlock(mdi_unit_t *ui)
3620Sstevel@tonic-gate {
3630Sstevel@tonic-gate return (md_unit_writerlock_common(ui, 0));
3640Sstevel@tonic-gate }
3650Sstevel@tonic-gate
3660Sstevel@tonic-gate /*
3670Sstevel@tonic-gate * md_unit_readerexit_common:
3680Sstevel@tonic-gate * -------------------------
3690Sstevel@tonic-gate * Release the readerlock for the specified unit. If the reader count reaches
3700Sstevel@tonic-gate * zero and there are waiting writers (MD_UL_WANABEWRITER set) wake them up.
3710Sstevel@tonic-gate *
3720Sstevel@tonic-gate * Input:
3730Sstevel@tonic-gate * ui unit reference
3740Sstevel@tonic-gate * lock_held 0 => ui_mx needs to be acquired
3750Sstevel@tonic-gate * 1 => ui_mx already held
3760Sstevel@tonic-gate */
3770Sstevel@tonic-gate static void
md_unit_readerexit_common(mdi_unit_t * ui,int lock_held)3780Sstevel@tonic-gate md_unit_readerexit_common(mdi_unit_t *ui, int lock_held)
3790Sstevel@tonic-gate {
3800Sstevel@tonic-gate if (!lock_held)
3810Sstevel@tonic-gate mutex_enter(&ui->ui_mx);
3820Sstevel@tonic-gate ASSERT((ui->ui_lock & MD_UL_WRITER) == 0);
3830Sstevel@tonic-gate ASSERT(ui->ui_readercnt != 0);
3840Sstevel@tonic-gate ui->ui_readercnt--;
3850Sstevel@tonic-gate if ((ui->ui_wanabecnt != 0) && (ui->ui_readercnt == 0))
3860Sstevel@tonic-gate cv_broadcast(&ui->ui_cv);
3870Sstevel@tonic-gate
3880Sstevel@tonic-gate if (!lock_held)
3890Sstevel@tonic-gate mutex_exit(&ui->ui_mx);
3900Sstevel@tonic-gate }
3910Sstevel@tonic-gate
3920Sstevel@tonic-gate void
md_unit_readerexit(mdi_unit_t * ui)3930Sstevel@tonic-gate md_unit_readerexit(mdi_unit_t *ui)
3940Sstevel@tonic-gate {
3950Sstevel@tonic-gate md_unit_readerexit_common(ui, 0);
3960Sstevel@tonic-gate }
3970Sstevel@tonic-gate
3980Sstevel@tonic-gate /*
3990Sstevel@tonic-gate * md_unit_writerexit_common:
4000Sstevel@tonic-gate * -------------------------
4010Sstevel@tonic-gate * Release the writerlock currently held on the unit. Wake any threads waiting
4020Sstevel@tonic-gate * on becoming reader or writer (MD_UL_WANABEWRITER set).
4030Sstevel@tonic-gate *
4040Sstevel@tonic-gate * Input:
4050Sstevel@tonic-gate * ui unit reference
4060Sstevel@tonic-gate * lock_held 0 => ui_mx to be acquired
4070Sstevel@tonic-gate * 1 => ui_mx already held
4080Sstevel@tonic-gate */
4090Sstevel@tonic-gate static void
md_unit_writerexit_common(mdi_unit_t * ui,int lock_held)4100Sstevel@tonic-gate md_unit_writerexit_common(mdi_unit_t *ui, int lock_held)
4110Sstevel@tonic-gate {
4120Sstevel@tonic-gate if (!lock_held)
4130Sstevel@tonic-gate mutex_enter(&ui->ui_mx);
4140Sstevel@tonic-gate ASSERT((ui->ui_lock & MD_UL_WRITER) != 0);
4150Sstevel@tonic-gate ASSERT(ui->ui_readercnt == 0);
4160Sstevel@tonic-gate ui->ui_lock &= ~MD_UL_WRITER;
4170Sstevel@tonic-gate ui->ui_owner = NULL;
4180Sstevel@tonic-gate
4190Sstevel@tonic-gate cv_broadcast(&ui->ui_cv);
4200Sstevel@tonic-gate if (!lock_held)
4210Sstevel@tonic-gate mutex_exit(&ui->ui_mx);
4220Sstevel@tonic-gate }
4230Sstevel@tonic-gate
4240Sstevel@tonic-gate void
md_unit_writerexit(mdi_unit_t * ui)4250Sstevel@tonic-gate md_unit_writerexit(mdi_unit_t *ui)
4260Sstevel@tonic-gate {
4270Sstevel@tonic-gate md_unit_writerexit_common(ui, 0);
4280Sstevel@tonic-gate }
4290Sstevel@tonic-gate
4300Sstevel@tonic-gate void *
md_io_readerlock(mdi_unit_t * ui)4310Sstevel@tonic-gate md_io_readerlock(mdi_unit_t *ui)
4320Sstevel@tonic-gate {
4330Sstevel@tonic-gate md_io_lock_t *io = ui->ui_io_lock;
4340Sstevel@tonic-gate
4350Sstevel@tonic-gate ASSERT(io); /* checks case where no io lock allocated */
4360Sstevel@tonic-gate mutex_enter(&io->io_mx);
4370Sstevel@tonic-gate while (io->io_lock & (MD_UL_WRITER | MD_UL_WANABEWRITER)) {
4380Sstevel@tonic-gate if (panicstr) {
4390Sstevel@tonic-gate if (io->io_lock & MD_UL_WRITER)
4400Sstevel@tonic-gate panic("md: writer lock is held");
4410Sstevel@tonic-gate break;
4420Sstevel@tonic-gate }
4430Sstevel@tonic-gate cv_wait(&io->io_cv, &io->io_mx);
4440Sstevel@tonic-gate }
4450Sstevel@tonic-gate io->io_readercnt++;
4460Sstevel@tonic-gate mutex_exit(&io->io_mx);
4470Sstevel@tonic-gate return (MD_UNIT(ui->ui_link.ln_id));
4480Sstevel@tonic-gate }
4490Sstevel@tonic-gate
4500Sstevel@tonic-gate void *
md_io_writerlock(mdi_unit_t * ui)4510Sstevel@tonic-gate md_io_writerlock(mdi_unit_t *ui)
4520Sstevel@tonic-gate {
4530Sstevel@tonic-gate md_io_lock_t *io = ui->ui_io_lock;
4540Sstevel@tonic-gate
4550Sstevel@tonic-gate ASSERT(io); /* checks case where no io lock allocated */
4560Sstevel@tonic-gate if (panicstr)
4570Sstevel@tonic-gate panic("md: writer lock not allowed");
4580Sstevel@tonic-gate
4590Sstevel@tonic-gate mutex_enter(&io->io_mx);
4600Sstevel@tonic-gate while ((io->io_lock & MD_UL_WRITER) || (io->io_readercnt != 0)) {
4610Sstevel@tonic-gate io->io_wanabecnt++;
4620Sstevel@tonic-gate io->io_lock |= MD_UL_WANABEWRITER;
4630Sstevel@tonic-gate cv_wait(&io->io_cv, &io->io_mx);
4640Sstevel@tonic-gate if (--io->io_wanabecnt == 0)
4650Sstevel@tonic-gate io->io_lock &= ~MD_UL_WANABEWRITER;
4660Sstevel@tonic-gate }
4670Sstevel@tonic-gate io->io_lock |= MD_UL_WRITER;
4680Sstevel@tonic-gate io->io_owner = curthread;
4690Sstevel@tonic-gate
4700Sstevel@tonic-gate mutex_exit(&io->io_mx);
4710Sstevel@tonic-gate return (MD_UNIT(ui->ui_link.ln_id));
4720Sstevel@tonic-gate }
4730Sstevel@tonic-gate
4740Sstevel@tonic-gate void
md_io_readerexit(mdi_unit_t * ui)4750Sstevel@tonic-gate md_io_readerexit(mdi_unit_t *ui)
4760Sstevel@tonic-gate {
4770Sstevel@tonic-gate md_io_lock_t *io = ui->ui_io_lock;
4780Sstevel@tonic-gate
4790Sstevel@tonic-gate mutex_enter(&io->io_mx);
4800Sstevel@tonic-gate ASSERT((io->io_lock & MD_UL_WRITER) == 0);
4810Sstevel@tonic-gate ASSERT(io->io_readercnt != 0);
4820Sstevel@tonic-gate io->io_readercnt--;
4830Sstevel@tonic-gate if ((io->io_wanabecnt != 0) && (io->io_readercnt == 0)) {
4840Sstevel@tonic-gate cv_broadcast(&io->io_cv);
4850Sstevel@tonic-gate }
4860Sstevel@tonic-gate mutex_exit(&io->io_mx);
4870Sstevel@tonic-gate }
4880Sstevel@tonic-gate
4890Sstevel@tonic-gate void
md_io_writerexit(mdi_unit_t * ui)4900Sstevel@tonic-gate md_io_writerexit(mdi_unit_t *ui)
4910Sstevel@tonic-gate {
4920Sstevel@tonic-gate md_io_lock_t *io = ui->ui_io_lock;
4930Sstevel@tonic-gate
4940Sstevel@tonic-gate mutex_enter(&io->io_mx);
4950Sstevel@tonic-gate ASSERT((io->io_lock & MD_UL_WRITER) != 0);
4960Sstevel@tonic-gate ASSERT(io->io_readercnt == 0);
4970Sstevel@tonic-gate io->io_lock &= ~MD_UL_WRITER;
4980Sstevel@tonic-gate io->io_owner = NULL;
4990Sstevel@tonic-gate
5000Sstevel@tonic-gate cv_broadcast(&io->io_cv);
5010Sstevel@tonic-gate mutex_exit(&io->io_mx);
5020Sstevel@tonic-gate }
5030Sstevel@tonic-gate
5040Sstevel@tonic-gate /*
5050Sstevel@tonic-gate * Attempt to grab that set of locks defined as global.
5060Sstevel@tonic-gate * A mask containing the set of global locks that are owned upon
5070Sstevel@tonic-gate * entry is input. Any additional global locks are then grabbed.
5080Sstevel@tonic-gate * This keeps the caller from having to know the set of global
5090Sstevel@tonic-gate * locks.
5100Sstevel@tonic-gate */
5110Sstevel@tonic-gate static int
md_global_lock_enter(int global_locks_owned_mask)5120Sstevel@tonic-gate md_global_lock_enter(int global_locks_owned_mask)
5130Sstevel@tonic-gate {
5140Sstevel@tonic-gate
5150Sstevel@tonic-gate /*
5160Sstevel@tonic-gate * The current implementation has been verified by inspection
5170Sstevel@tonic-gate * and test to be deadlock free. If another global lock is
5180Sstevel@tonic-gate * added, changing the algorithm used by this function should
5190Sstevel@tonic-gate * be considered. With more than 2 locks it is difficult to
5200Sstevel@tonic-gate * guarantee that locks are being acquired in the correct order.
5210Sstevel@tonic-gate * The safe approach would be to drop all of the locks that are
5220Sstevel@tonic-gate * owned at function entry and then reacquire all of the locks
5230Sstevel@tonic-gate * in the order defined by the lock hierarchy.
5240Sstevel@tonic-gate */
5250Sstevel@tonic-gate mutex_enter(&md_mx);
5260Sstevel@tonic-gate if (!(global_locks_owned_mask & MD_GBL_IOCTL_LOCK)) {
5270Sstevel@tonic-gate while ((md_mtioctl_cnt != 0) ||
5280Sstevel@tonic-gate (md_status & MD_GBL_IOCTL_LOCK)) {
5290Sstevel@tonic-gate if (cv_wait_sig_swap(&md_cv, &md_mx) == 0) {
5300Sstevel@tonic-gate mutex_exit(&md_mx);
5310Sstevel@tonic-gate return (EINTR);
5320Sstevel@tonic-gate }
5330Sstevel@tonic-gate }
5340Sstevel@tonic-gate md_status |= MD_GBL_IOCTL_LOCK;
5350Sstevel@tonic-gate md_ioctl_cnt++;
5360Sstevel@tonic-gate }
5370Sstevel@tonic-gate if (!(global_locks_owned_mask & MD_GBL_HS_LOCK)) {
5380Sstevel@tonic-gate while (md_status & MD_GBL_HS_LOCK) {
5390Sstevel@tonic-gate if (cv_wait_sig_swap(&md_cv, &md_mx) == 0) {
5400Sstevel@tonic-gate md_status &= ~MD_GBL_IOCTL_LOCK;
5410Sstevel@tonic-gate mutex_exit(&md_mx);
5420Sstevel@tonic-gate return (EINTR);
5430Sstevel@tonic-gate }
5440Sstevel@tonic-gate }
5450Sstevel@tonic-gate md_status |= MD_GBL_HS_LOCK;
5460Sstevel@tonic-gate }
5470Sstevel@tonic-gate mutex_exit(&md_mx);
5480Sstevel@tonic-gate return (0);
5490Sstevel@tonic-gate }
5500Sstevel@tonic-gate
5510Sstevel@tonic-gate /*
5520Sstevel@tonic-gate * Release the set of global locks that were grabbed in md_global_lock_enter
5530Sstevel@tonic-gate * that were not already owned by the calling thread. The set of previously
5540Sstevel@tonic-gate * owned global locks is passed in as a mask parameter.
5550Sstevel@tonic-gate */
5560Sstevel@tonic-gate static int
md_global_lock_exit(int global_locks_owned_mask,int code,int flags,mdi_unit_t * ui)5570Sstevel@tonic-gate md_global_lock_exit(int global_locks_owned_mask, int code,
5580Sstevel@tonic-gate int flags, mdi_unit_t *ui)
5590Sstevel@tonic-gate {
5600Sstevel@tonic-gate mutex_enter(&md_mx);
5610Sstevel@tonic-gate
5620Sstevel@tonic-gate /* If MT ioctl decrement mt_ioctl_cnt */
5630Sstevel@tonic-gate if ((flags & MD_MT_IOCTL)) {
5640Sstevel@tonic-gate md_mtioctl_cnt--;
5650Sstevel@tonic-gate } else {
5660Sstevel@tonic-gate if (!(global_locks_owned_mask & MD_GBL_IOCTL_LOCK)) {
5670Sstevel@tonic-gate /* clear the lock and decrement count */
5680Sstevel@tonic-gate ASSERT(md_ioctl_cnt == 1);
5690Sstevel@tonic-gate md_ioctl_cnt--;
5700Sstevel@tonic-gate md_status &= ~MD_GBL_IOCTL_LOCK;
5710Sstevel@tonic-gate }
5720Sstevel@tonic-gate if (!(global_locks_owned_mask & MD_GBL_HS_LOCK))
5730Sstevel@tonic-gate md_status &= ~MD_GBL_HS_LOCK;
5740Sstevel@tonic-gate }
5750Sstevel@tonic-gate if (flags & MD_READER_HELD)
5760Sstevel@tonic-gate md_unit_readerexit(ui);
5770Sstevel@tonic-gate if (flags & MD_WRITER_HELD)
5780Sstevel@tonic-gate md_unit_writerexit(ui);
5790Sstevel@tonic-gate if (flags & MD_IO_HELD)
5800Sstevel@tonic-gate md_io_writerexit(ui);
5810Sstevel@tonic-gate if (flags & (MD_ARRAY_WRITER | MD_ARRAY_READER)) {
5820Sstevel@tonic-gate rw_exit(&md_unit_array_rw.lock);
5830Sstevel@tonic-gate }
5840Sstevel@tonic-gate cv_broadcast(&md_cv);
5850Sstevel@tonic-gate mutex_exit(&md_mx);
5860Sstevel@tonic-gate
5870Sstevel@tonic-gate return (code);
5880Sstevel@tonic-gate }
5890Sstevel@tonic-gate
5900Sstevel@tonic-gate /*
5910Sstevel@tonic-gate * The two functions, md_ioctl_lock_enter, and md_ioctl_lock_exit make
5920Sstevel@tonic-gate * use of the md_global_lock_{enter|exit} functions to avoid duplication
5930Sstevel@tonic-gate * of code. They rely upon the fact that the locks that are specified in
5940Sstevel@tonic-gate * the input mask are not acquired or freed. If this algorithm changes
5950Sstevel@tonic-gate * as described in the block comment at the beginning of md_global_lock_enter
5960Sstevel@tonic-gate * then it will be necessary to change these 2 functions. Otherwise these
5970Sstevel@tonic-gate * functions will be grabbing and holding global locks unnecessarily.
5980Sstevel@tonic-gate */
5990Sstevel@tonic-gate int
md_ioctl_lock_enter(void)6000Sstevel@tonic-gate md_ioctl_lock_enter(void)
6010Sstevel@tonic-gate {
6020Sstevel@tonic-gate /* grab only the ioctl lock */
6030Sstevel@tonic-gate return (md_global_lock_enter(~MD_GBL_IOCTL_LOCK));
6040Sstevel@tonic-gate }
6050Sstevel@tonic-gate
6060Sstevel@tonic-gate /*
6070Sstevel@tonic-gate * If md_ioctl_lock_exit is being called at the end of an ioctl before
6080Sstevel@tonic-gate * returning to user space, then ioctl_end is set to 1.
6090Sstevel@tonic-gate * Otherwise, the ioctl lock is being dropped in the middle of handling
6100Sstevel@tonic-gate * an ioctl and will be reacquired before the end of the ioctl.
6110Sstevel@tonic-gate * Do not attempt to process the MN diskset mddb parse flags unless
6120Sstevel@tonic-gate * ioctl_end is true - otherwise a deadlock situation could arise.
6130Sstevel@tonic-gate */
6140Sstevel@tonic-gate int
md_ioctl_lock_exit(int code,int flags,mdi_unit_t * ui,int ioctl_end)6150Sstevel@tonic-gate md_ioctl_lock_exit(int code, int flags, mdi_unit_t *ui, int ioctl_end)
6160Sstevel@tonic-gate {
6170Sstevel@tonic-gate int ret_val;
6180Sstevel@tonic-gate uint_t status;
6190Sstevel@tonic-gate mddb_set_t *s;
6200Sstevel@tonic-gate int i;
6210Sstevel@tonic-gate int err;
6220Sstevel@tonic-gate md_mn_msg_mddb_parse_t *mddb_parse_msg;
6230Sstevel@tonic-gate md_mn_kresult_t *kresult;
6240Sstevel@tonic-gate mddb_lb_t *lbp;
6250Sstevel@tonic-gate int rval = 1;
6260Sstevel@tonic-gate int flag;
6270Sstevel@tonic-gate
6280Sstevel@tonic-gate /* release only the ioctl lock */
6290Sstevel@tonic-gate ret_val = md_global_lock_exit(~MD_GBL_IOCTL_LOCK, code, flags, ui);
6300Sstevel@tonic-gate
6310Sstevel@tonic-gate /*
6320Sstevel@tonic-gate * If md_ioctl_lock_exit is being called with a possible lock held
6330Sstevel@tonic-gate * (ioctl_end is 0), then don't check the MN disksets since the
6340Sstevel@tonic-gate * call to mddb_setenter may cause a lock ordering deadlock.
6350Sstevel@tonic-gate */
6360Sstevel@tonic-gate if (!ioctl_end)
6370Sstevel@tonic-gate return (ret_val);
6380Sstevel@tonic-gate
6390Sstevel@tonic-gate /*
6400Sstevel@tonic-gate * Walk through disksets to see if there is a MN diskset that
6410Sstevel@tonic-gate * has messages that need to be sent. Set must be snarfed and
6420Sstevel@tonic-gate * be a MN diskset in order to be checked.
6430Sstevel@tonic-gate *
6440Sstevel@tonic-gate * In a MN diskset, this routine may send messages to the
6450Sstevel@tonic-gate * rpc.mdcommd in order to have the slave nodes re-parse parts
6460Sstevel@tonic-gate * of the mddb. Messages can only be sent with no locks held,
6470Sstevel@tonic-gate * so if mddb change occurred while the ioctl lock is held, this
6480Sstevel@tonic-gate * routine must send the messages.
6490Sstevel@tonic-gate */
6500Sstevel@tonic-gate for (i = 1; i < md_nsets; i++) {
6510Sstevel@tonic-gate status = md_get_setstatus(i);
6520Sstevel@tonic-gate
6530Sstevel@tonic-gate /* Set must be snarfed and be a MN diskset */
6540Sstevel@tonic-gate if ((status & (MD_SET_SNARFED | MD_SET_MNSET)) !=
6550Sstevel@tonic-gate (MD_SET_SNARFED | MD_SET_MNSET))
6560Sstevel@tonic-gate continue;
6570Sstevel@tonic-gate
6580Sstevel@tonic-gate /* Grab set lock so that set can't change */
6590Sstevel@tonic-gate if ((s = mddb_setenter(i, MDDB_MUSTEXIST, &err)) == NULL)
6600Sstevel@tonic-gate continue;
6610Sstevel@tonic-gate
6620Sstevel@tonic-gate lbp = s->s_lbp;
6630Sstevel@tonic-gate
6640Sstevel@tonic-gate /* Re-get set status now that lock is held */
6650Sstevel@tonic-gate status = md_get_setstatus(i);
6660Sstevel@tonic-gate
6670Sstevel@tonic-gate /*
6680Sstevel@tonic-gate * If MN parsing block flag is set - continue to next set.
6690Sstevel@tonic-gate *
6700Sstevel@tonic-gate * If s_mn_parseflags_sending is non-zero, then another thread
6710Sstevel@tonic-gate * is already currently sending a parse message, so just
6720Sstevel@tonic-gate * release the set mutex. If this ioctl had caused an mddb
6730Sstevel@tonic-gate * change that results in a parse message to be generated,
6740Sstevel@tonic-gate * the thread that is currently sending a parse message would
6750Sstevel@tonic-gate * generate the additional parse message.
6760Sstevel@tonic-gate *
6770Sstevel@tonic-gate * If s_mn_parseflags_sending is zero then loop until
6780Sstevel@tonic-gate * s_mn_parseflags is 0 (until there are no more
6790Sstevel@tonic-gate * messages to send).
6800Sstevel@tonic-gate * While s_mn_parseflags is non-zero,
6810Sstevel@tonic-gate * put snapshot of parse_flags in s_mn_parseflags_sending
6820Sstevel@tonic-gate * set s_mn_parseflags to zero
6830Sstevel@tonic-gate * release set mutex
6840Sstevel@tonic-gate * send message
6850Sstevel@tonic-gate * re-grab set mutex
6860Sstevel@tonic-gate * set s_mn_parseflags_sending to zero
6870Sstevel@tonic-gate *
6880Sstevel@tonic-gate * If set is STALE, send message with NO_LOG flag so that
6890Sstevel@tonic-gate * rpc.mdcommd won't attempt to log message to non-writeable
6900Sstevel@tonic-gate * replica.
6910Sstevel@tonic-gate */
6920Sstevel@tonic-gate mddb_parse_msg = kmem_zalloc(sizeof (md_mn_msg_mddb_parse_t),
6937563SPrasad.Singamsetty@Sun.COM KM_SLEEP);
6940Sstevel@tonic-gate while (((s->s_mn_parseflags_sending & MDDB_PARSE_MASK) == 0) &&
6950Sstevel@tonic-gate (s->s_mn_parseflags & MDDB_PARSE_MASK) &&
6960Sstevel@tonic-gate (!(status & MD_SET_MNPARSE_BLK))) {
6970Sstevel@tonic-gate
6980Sstevel@tonic-gate /* Grab snapshot of parse flags */
6990Sstevel@tonic-gate s->s_mn_parseflags_sending = s->s_mn_parseflags;
7000Sstevel@tonic-gate s->s_mn_parseflags = 0;
7010Sstevel@tonic-gate
7020Sstevel@tonic-gate mutex_exit(&md_set[(s)->s_setno].s_dbmx);
7030Sstevel@tonic-gate
7040Sstevel@tonic-gate /*
7050Sstevel@tonic-gate * Send the message to the slaves to re-parse
7060Sstevel@tonic-gate * the indicated portions of the mddb. Send the status
7070Sstevel@tonic-gate * of the 50 mddbs in this set so that slaves know
7080Sstevel@tonic-gate * which mddbs that the master node thinks are 'good'.
7090Sstevel@tonic-gate * Otherwise, slave may reparse, but from wrong
7100Sstevel@tonic-gate * replica.
7110Sstevel@tonic-gate */
7120Sstevel@tonic-gate mddb_parse_msg->msg_parse_flags =
7137563SPrasad.Singamsetty@Sun.COM s->s_mn_parseflags_sending;
7140Sstevel@tonic-gate
7150Sstevel@tonic-gate for (i = 0; i < MDDB_NLB; i++) {
7160Sstevel@tonic-gate mddb_parse_msg->msg_lb_flags[i] =
7177563SPrasad.Singamsetty@Sun.COM lbp->lb_locators[i].l_flags;
7180Sstevel@tonic-gate }
719*11130SJames.Hall@Sun.COM kresult = kmem_alloc(sizeof (md_mn_kresult_t),
7207563SPrasad.Singamsetty@Sun.COM KM_SLEEP);
7210Sstevel@tonic-gate while (rval != 0) {
7220Sstevel@tonic-gate flag = 0;
7230Sstevel@tonic-gate if (status & MD_SET_STALE)
7240Sstevel@tonic-gate flag |= MD_MSGF_NO_LOG;
7250Sstevel@tonic-gate rval = mdmn_ksend_message(s->s_setno,
7268452SJohn.Wren.Kennedy@Sun.COM MD_MN_MSG_MDDB_PARSE, flag, 0,
7270Sstevel@tonic-gate (char *)mddb_parse_msg,
7288452SJohn.Wren.Kennedy@Sun.COM sizeof (md_mn_msg_mddb_parse_t), kresult);
7290Sstevel@tonic-gate /* if the node hasn't yet joined, it's Ok. */
7300Sstevel@tonic-gate if ((!MDMN_KSEND_MSG_OK(rval, kresult)) &&
7310Sstevel@tonic-gate (kresult->kmmr_comm_state !=
7327563SPrasad.Singamsetty@Sun.COM MDMNE_NOT_JOINED)) {
7330Sstevel@tonic-gate mdmn_ksend_show_error(rval, kresult,
7340Sstevel@tonic-gate "MD_MN_MSG_MDDB_PARSE");
7350Sstevel@tonic-gate cmn_err(CE_WARN, "md_ioctl_lock_exit: "
7360Sstevel@tonic-gate "Unable to send mddb update "
7370Sstevel@tonic-gate "message to other nodes in "
7380Sstevel@tonic-gate "diskset %s\n", s->s_setname);
7390Sstevel@tonic-gate rval = 1;
7400Sstevel@tonic-gate }
7410Sstevel@tonic-gate }
7420Sstevel@tonic-gate kmem_free(kresult, sizeof (md_mn_kresult_t));
7430Sstevel@tonic-gate
7440Sstevel@tonic-gate /*
7450Sstevel@tonic-gate * Re-grab mutex to clear sending field and to
7460Sstevel@tonic-gate * see if another parse message needs to be generated.
7470Sstevel@tonic-gate */
7480Sstevel@tonic-gate mutex_enter(&md_set[(s)->s_setno].s_dbmx);
7490Sstevel@tonic-gate s->s_mn_parseflags_sending = 0;
7500Sstevel@tonic-gate }
7510Sstevel@tonic-gate kmem_free(mddb_parse_msg, sizeof (md_mn_msg_mddb_parse_t));
7520Sstevel@tonic-gate mutex_exit(&md_set[(s)->s_setno].s_dbmx);
7530Sstevel@tonic-gate }
7540Sstevel@tonic-gate return (ret_val);
7550Sstevel@tonic-gate }
7560Sstevel@tonic-gate
7570Sstevel@tonic-gate /*
7580Sstevel@tonic-gate * Called when in an ioctl and need readerlock.
7590Sstevel@tonic-gate */
7600Sstevel@tonic-gate void *
md_ioctl_readerlock(IOLOCK * lock,mdi_unit_t * ui)7610Sstevel@tonic-gate md_ioctl_readerlock(IOLOCK *lock, mdi_unit_t *ui)
7620Sstevel@tonic-gate {
7630Sstevel@tonic-gate ASSERT(lock != NULL);
7640Sstevel@tonic-gate lock->l_ui = ui;
7650Sstevel@tonic-gate lock->l_flags |= MD_READER_HELD;
7660Sstevel@tonic-gate return (md_unit_readerlock_common(ui, 0));
7670Sstevel@tonic-gate }
7680Sstevel@tonic-gate
7690Sstevel@tonic-gate /*
7700Sstevel@tonic-gate * Called when in an ioctl and need writerlock.
7710Sstevel@tonic-gate */
7720Sstevel@tonic-gate void *
md_ioctl_writerlock(IOLOCK * lock,mdi_unit_t * ui)7730Sstevel@tonic-gate md_ioctl_writerlock(IOLOCK *lock, mdi_unit_t *ui)
7740Sstevel@tonic-gate {
7750Sstevel@tonic-gate ASSERT(lock != NULL);
7760Sstevel@tonic-gate lock->l_ui = ui;
7770Sstevel@tonic-gate lock->l_flags |= MD_WRITER_HELD;
7780Sstevel@tonic-gate return (md_unit_writerlock_common(ui, 0));
7790Sstevel@tonic-gate }
7800Sstevel@tonic-gate
7810Sstevel@tonic-gate void *
md_ioctl_io_lock(IOLOCK * lock,mdi_unit_t * ui)7820Sstevel@tonic-gate md_ioctl_io_lock(IOLOCK *lock, mdi_unit_t *ui)
7830Sstevel@tonic-gate {
7840Sstevel@tonic-gate ASSERT(lock != NULL);
7850Sstevel@tonic-gate lock->l_ui = ui;
7860Sstevel@tonic-gate lock->l_flags |= MD_IO_HELD;
7870Sstevel@tonic-gate return (md_io_writerlock(ui));
7880Sstevel@tonic-gate }
7890Sstevel@tonic-gate
7900Sstevel@tonic-gate void
md_ioctl_readerexit(IOLOCK * lock)7910Sstevel@tonic-gate md_ioctl_readerexit(IOLOCK *lock)
7920Sstevel@tonic-gate {
7930Sstevel@tonic-gate ASSERT(lock != NULL);
7940Sstevel@tonic-gate lock->l_flags &= ~MD_READER_HELD;
7950Sstevel@tonic-gate md_unit_readerexit(lock->l_ui);
7960Sstevel@tonic-gate }
7970Sstevel@tonic-gate
7980Sstevel@tonic-gate void
md_ioctl_writerexit(IOLOCK * lock)7990Sstevel@tonic-gate md_ioctl_writerexit(IOLOCK *lock)
8000Sstevel@tonic-gate {
8010Sstevel@tonic-gate ASSERT(lock != NULL);
8020Sstevel@tonic-gate lock->l_flags &= ~MD_WRITER_HELD;
8030Sstevel@tonic-gate md_unit_writerexit(lock->l_ui);
8040Sstevel@tonic-gate }
8050Sstevel@tonic-gate
8060Sstevel@tonic-gate void
md_ioctl_io_exit(IOLOCK * lock)8070Sstevel@tonic-gate md_ioctl_io_exit(IOLOCK *lock)
8080Sstevel@tonic-gate {
8090Sstevel@tonic-gate ASSERT(lock != NULL);
8100Sstevel@tonic-gate lock->l_flags &= ~MD_IO_HELD;
8110Sstevel@tonic-gate md_io_writerexit(lock->l_ui);
8120Sstevel@tonic-gate }
8130Sstevel@tonic-gate
8140Sstevel@tonic-gate /*
8150Sstevel@tonic-gate * md_ioctl_releaselocks:
8160Sstevel@tonic-gate * --------------------
8170Sstevel@tonic-gate * Release the unit locks that are held and stop subsequent
8180Sstevel@tonic-gate * md_unit_reader/writerlock calls from progressing. This allows the caller
8190Sstevel@tonic-gate * to send messages across the cluster when running in a multinode
8200Sstevel@tonic-gate * environment.
8210Sstevel@tonic-gate * ioctl originated locks (via md_ioctl_readerlock/md_ioctl_writerlock) are
8220Sstevel@tonic-gate * allowed to progress as normal. This is required as these typically are
8230Sstevel@tonic-gate * invoked by the message handler that may be called while a unit lock is
8240Sstevel@tonic-gate * marked as released.
8250Sstevel@tonic-gate *
8260Sstevel@tonic-gate * On entry:
8270Sstevel@tonic-gate * variety of unit locks may be held including ioctl lock
8280Sstevel@tonic-gate *
8290Sstevel@tonic-gate * On exit:
8300Sstevel@tonic-gate * locks released and unit structure updated to prevent subsequent reader/
8310Sstevel@tonic-gate * writer locks being acquired until md_ioctl_reacquirelocks is called
8320Sstevel@tonic-gate */
8330Sstevel@tonic-gate void
md_ioctl_releaselocks(int code,int flags,mdi_unit_t * ui)8340Sstevel@tonic-gate md_ioctl_releaselocks(int code, int flags, mdi_unit_t *ui)
8350Sstevel@tonic-gate {
8360Sstevel@tonic-gate /* This actually releases the locks. */
8370Sstevel@tonic-gate (void) md_global_lock_exit(~MD_GBL_IOCTL_LOCK, code, flags, ui);
8380Sstevel@tonic-gate }
8390Sstevel@tonic-gate
8400Sstevel@tonic-gate /*
8410Sstevel@tonic-gate * md_ioctl_reacquirelocks:
8420Sstevel@tonic-gate * ----------------------
8430Sstevel@tonic-gate * Reacquire the locks that were held when md_ioctl_releaselocks
8440Sstevel@tonic-gate * was called.
8450Sstevel@tonic-gate *
8460Sstevel@tonic-gate * On entry:
8470Sstevel@tonic-gate * No unit locks held
8480Sstevel@tonic-gate * On exit:
8490Sstevel@tonic-gate * locks held that were held at md_ioctl_releaselocks time including
8500Sstevel@tonic-gate * the ioctl lock.
8510Sstevel@tonic-gate */
8520Sstevel@tonic-gate void
md_ioctl_reacquirelocks(int flags,mdi_unit_t * ui)8530Sstevel@tonic-gate md_ioctl_reacquirelocks(int flags, mdi_unit_t *ui)
8540Sstevel@tonic-gate {
8550Sstevel@tonic-gate if (flags & MD_MT_IOCTL) {
8560Sstevel@tonic-gate mutex_enter(&md_mx);
8570Sstevel@tonic-gate md_mtioctl_cnt++;
8580Sstevel@tonic-gate mutex_exit(&md_mx);
8590Sstevel@tonic-gate } else {
8607563SPrasad.Singamsetty@Sun.COM while (md_ioctl_lock_enter() == EINTR)
8617563SPrasad.Singamsetty@Sun.COM ;
8620Sstevel@tonic-gate }
8630Sstevel@tonic-gate if (flags & MD_ARRAY_WRITER) {
8640Sstevel@tonic-gate rw_enter(&md_unit_array_rw.lock, RW_WRITER);
8650Sstevel@tonic-gate } else if (flags & MD_ARRAY_READER) {
8660Sstevel@tonic-gate rw_enter(&md_unit_array_rw.lock, RW_READER);
8670Sstevel@tonic-gate }
8680Sstevel@tonic-gate if (ui != (mdi_unit_t *)NULL) {
8690Sstevel@tonic-gate if (flags & MD_IO_HELD) {
8700Sstevel@tonic-gate (void) md_io_writerlock(ui);
8710Sstevel@tonic-gate }
8720Sstevel@tonic-gate
8730Sstevel@tonic-gate mutex_enter(&ui->ui_mx);
8740Sstevel@tonic-gate if (flags & MD_READER_HELD) {
8750Sstevel@tonic-gate (void) md_unit_readerlock_common(ui, 1);
8760Sstevel@tonic-gate } else if (flags & MD_WRITER_HELD) {
8770Sstevel@tonic-gate (void) md_unit_writerlock_common(ui, 1);
8780Sstevel@tonic-gate }
8790Sstevel@tonic-gate /* Wake up any blocked readerlock() calls */
8800Sstevel@tonic-gate cv_broadcast(&ui->ui_cv);
8810Sstevel@tonic-gate mutex_exit(&ui->ui_mx);
8820Sstevel@tonic-gate }
8830Sstevel@tonic-gate }
8840Sstevel@tonic-gate
8850Sstevel@tonic-gate void
md_ioctl_droplocks(IOLOCK * lock)8860Sstevel@tonic-gate md_ioctl_droplocks(IOLOCK *lock)
8870Sstevel@tonic-gate {
8880Sstevel@tonic-gate mdi_unit_t *ui;
8890Sstevel@tonic-gate int flags;
8900Sstevel@tonic-gate
8910Sstevel@tonic-gate ASSERT(lock != NULL);
8920Sstevel@tonic-gate ui = lock->l_ui;
8930Sstevel@tonic-gate flags = lock->l_flags;
8940Sstevel@tonic-gate if (flags & MD_READER_HELD) {
8950Sstevel@tonic-gate lock->l_flags &= ~MD_READER_HELD;
8960Sstevel@tonic-gate md_unit_readerexit(ui);
8970Sstevel@tonic-gate }
8980Sstevel@tonic-gate if (flags & MD_WRITER_HELD) {
8990Sstevel@tonic-gate lock->l_flags &= ~MD_WRITER_HELD;
9000Sstevel@tonic-gate md_unit_writerexit(ui);
9010Sstevel@tonic-gate }
9020Sstevel@tonic-gate if (flags & MD_IO_HELD) {
9030Sstevel@tonic-gate lock->l_flags &= ~MD_IO_HELD;
9040Sstevel@tonic-gate md_io_writerexit(ui);
9050Sstevel@tonic-gate }
9060Sstevel@tonic-gate if (flags & (MD_ARRAY_WRITER | MD_ARRAY_READER)) {
9070Sstevel@tonic-gate lock->l_flags &= ~(MD_ARRAY_WRITER | MD_ARRAY_READER);
9080Sstevel@tonic-gate rw_exit(&md_unit_array_rw.lock);
9090Sstevel@tonic-gate }
9100Sstevel@tonic-gate }
9110Sstevel@tonic-gate
9120Sstevel@tonic-gate void
md_array_writer(IOLOCK * lock)9130Sstevel@tonic-gate md_array_writer(IOLOCK *lock)
9140Sstevel@tonic-gate {
9150Sstevel@tonic-gate ASSERT(lock != NULL);
9160Sstevel@tonic-gate lock->l_flags |= MD_ARRAY_WRITER;
9170Sstevel@tonic-gate rw_enter(&md_unit_array_rw.lock, RW_WRITER);
9180Sstevel@tonic-gate }
9190Sstevel@tonic-gate
9200Sstevel@tonic-gate void
md_array_reader(IOLOCK * lock)9210Sstevel@tonic-gate md_array_reader(IOLOCK *lock)
9220Sstevel@tonic-gate {
9230Sstevel@tonic-gate ASSERT(lock != NULL);
9240Sstevel@tonic-gate lock->l_flags |= MD_ARRAY_READER;
9250Sstevel@tonic-gate rw_enter(&md_unit_array_rw.lock, RW_READER);
9260Sstevel@tonic-gate }
9270Sstevel@tonic-gate
9280Sstevel@tonic-gate /*
9290Sstevel@tonic-gate * Called when in an ioctl and need opencloselock.
9300Sstevel@tonic-gate * Sets flags in lockp for READER_HELD.
9310Sstevel@tonic-gate */
9320Sstevel@tonic-gate void *
md_ioctl_openclose_enter(IOLOCK * lockp,mdi_unit_t * ui)9330Sstevel@tonic-gate md_ioctl_openclose_enter(IOLOCK *lockp, mdi_unit_t *ui)
9340Sstevel@tonic-gate {
9350Sstevel@tonic-gate void *un;
9360Sstevel@tonic-gate
9370Sstevel@tonic-gate ASSERT(lockp != NULL);
9380Sstevel@tonic-gate mutex_enter(&ui->ui_mx);
9390Sstevel@tonic-gate while (ui->ui_lock & MD_UL_OPENORCLOSE)
9400Sstevel@tonic-gate cv_wait(&ui->ui_cv, &ui->ui_mx);
9410Sstevel@tonic-gate ui->ui_lock |= MD_UL_OPENORCLOSE;
9420Sstevel@tonic-gate
9430Sstevel@tonic-gate /* Maintain mutex across the readerlock call */
9440Sstevel@tonic-gate lockp->l_ui = ui;
9450Sstevel@tonic-gate lockp->l_flags |= MD_READER_HELD;
9460Sstevel@tonic-gate un = md_unit_readerlock_common(ui, 1);
9470Sstevel@tonic-gate mutex_exit(&ui->ui_mx);
9480Sstevel@tonic-gate
9490Sstevel@tonic-gate return (un);
9500Sstevel@tonic-gate }
9510Sstevel@tonic-gate
9520Sstevel@tonic-gate /*
9530Sstevel@tonic-gate * Clears reader lock using md_ioctl instead of md_unit
9540Sstevel@tonic-gate * and updates lockp.
9550Sstevel@tonic-gate */
9560Sstevel@tonic-gate void
md_ioctl_openclose_exit(IOLOCK * lockp)9570Sstevel@tonic-gate md_ioctl_openclose_exit(IOLOCK *lockp)
9580Sstevel@tonic-gate {
9590Sstevel@tonic-gate mdi_unit_t *ui;
9600Sstevel@tonic-gate
9610Sstevel@tonic-gate ASSERT(lockp != NULL);
9620Sstevel@tonic-gate ui = lockp->l_ui;
9630Sstevel@tonic-gate ASSERT(ui->ui_lock & MD_UL_OPENORCLOSE);
9640Sstevel@tonic-gate
9650Sstevel@tonic-gate md_ioctl_readerexit(lockp);
9660Sstevel@tonic-gate
9670Sstevel@tonic-gate mutex_enter(&ui->ui_mx);
9680Sstevel@tonic-gate ui->ui_lock &= ~MD_UL_OPENORCLOSE;
9690Sstevel@tonic-gate
9700Sstevel@tonic-gate cv_broadcast(&ui->ui_cv);
9710Sstevel@tonic-gate mutex_exit(&ui->ui_mx);
9720Sstevel@tonic-gate }
9730Sstevel@tonic-gate
9740Sstevel@tonic-gate /*
9750Sstevel@tonic-gate * Clears reader lock using md_ioctl instead of md_unit
9760Sstevel@tonic-gate * and updates lockp.
9770Sstevel@tonic-gate * Does not acquire or release the ui_mx lock since the calling
9780Sstevel@tonic-gate * routine has already acquired this lock.
9790Sstevel@tonic-gate */
9800Sstevel@tonic-gate void
md_ioctl_openclose_exit_lh(IOLOCK * lockp)9810Sstevel@tonic-gate md_ioctl_openclose_exit_lh(IOLOCK *lockp)
9820Sstevel@tonic-gate {
9830Sstevel@tonic-gate mdi_unit_t *ui;
9840Sstevel@tonic-gate
9850Sstevel@tonic-gate ASSERT(lockp != NULL);
9860Sstevel@tonic-gate ui = lockp->l_ui;
9870Sstevel@tonic-gate ASSERT(ui->ui_lock & MD_UL_OPENORCLOSE);
9880Sstevel@tonic-gate
9890Sstevel@tonic-gate lockp->l_flags &= ~MD_READER_HELD;
9900Sstevel@tonic-gate md_unit_readerexit_common(lockp->l_ui, 1);
9910Sstevel@tonic-gate
9920Sstevel@tonic-gate ui->ui_lock &= ~MD_UL_OPENORCLOSE;
9930Sstevel@tonic-gate cv_broadcast(&ui->ui_cv);
9940Sstevel@tonic-gate }
9950Sstevel@tonic-gate
9960Sstevel@tonic-gate void *
md_unit_openclose_enter(mdi_unit_t * ui)9970Sstevel@tonic-gate md_unit_openclose_enter(mdi_unit_t *ui)
9980Sstevel@tonic-gate {
9990Sstevel@tonic-gate void *un;
10000Sstevel@tonic-gate
10010Sstevel@tonic-gate mutex_enter(&ui->ui_mx);
10020Sstevel@tonic-gate while (ui->ui_lock & (MD_UL_OPENORCLOSE))
10030Sstevel@tonic-gate cv_wait(&ui->ui_cv, &ui->ui_mx);
10040Sstevel@tonic-gate ui->ui_lock |= MD_UL_OPENORCLOSE;
10050Sstevel@tonic-gate
10060Sstevel@tonic-gate /* Maintain mutex across the readerlock call */
10070Sstevel@tonic-gate un = md_unit_readerlock_common(ui, 1);
10080Sstevel@tonic-gate mutex_exit(&ui->ui_mx);
10090Sstevel@tonic-gate
10100Sstevel@tonic-gate return (un);
10110Sstevel@tonic-gate }
10120Sstevel@tonic-gate
10130Sstevel@tonic-gate void
md_unit_openclose_exit(mdi_unit_t * ui)10140Sstevel@tonic-gate md_unit_openclose_exit(mdi_unit_t *ui)
10150Sstevel@tonic-gate {
10160Sstevel@tonic-gate md_unit_readerexit(ui);
10170Sstevel@tonic-gate
10180Sstevel@tonic-gate mutex_enter(&ui->ui_mx);
10190Sstevel@tonic-gate ASSERT(ui->ui_lock & MD_UL_OPENORCLOSE);
10200Sstevel@tonic-gate ui->ui_lock &= ~MD_UL_OPENORCLOSE;
10210Sstevel@tonic-gate
10220Sstevel@tonic-gate cv_broadcast(&ui->ui_cv);
10230Sstevel@tonic-gate mutex_exit(&ui->ui_mx);
10240Sstevel@tonic-gate }
10250Sstevel@tonic-gate
10260Sstevel@tonic-gate /*
10270Sstevel@tonic-gate * Drop the openclose and readerlocks without acquiring or
10280Sstevel@tonic-gate * releasing the ui_mx lock since the calling routine has
10290Sstevel@tonic-gate * already acquired this lock.
10300Sstevel@tonic-gate */
10310Sstevel@tonic-gate void
md_unit_openclose_exit_lh(mdi_unit_t * ui)10320Sstevel@tonic-gate md_unit_openclose_exit_lh(mdi_unit_t *ui)
10330Sstevel@tonic-gate {
10340Sstevel@tonic-gate md_unit_readerexit_common(ui, 1);
10350Sstevel@tonic-gate ASSERT(ui->ui_lock & MD_UL_OPENORCLOSE);
10360Sstevel@tonic-gate ui->ui_lock &= ~MD_UL_OPENORCLOSE;
10370Sstevel@tonic-gate cv_broadcast(&ui->ui_cv);
10380Sstevel@tonic-gate }
10390Sstevel@tonic-gate
10400Sstevel@tonic-gate int
md_unit_isopen(mdi_unit_t * ui)10410Sstevel@tonic-gate md_unit_isopen(
10420Sstevel@tonic-gate mdi_unit_t *ui
10430Sstevel@tonic-gate )
10440Sstevel@tonic-gate {
10450Sstevel@tonic-gate int isopen;
10460Sstevel@tonic-gate
10470Sstevel@tonic-gate /* check status */
10480Sstevel@tonic-gate mutex_enter(&ui->ui_mx);
10490Sstevel@tonic-gate isopen = ((ui->ui_lock & MD_UL_OPEN) ? 1 : 0);
10500Sstevel@tonic-gate mutex_exit(&ui->ui_mx);
10510Sstevel@tonic-gate return (isopen);
10520Sstevel@tonic-gate }
10530Sstevel@tonic-gate
10540Sstevel@tonic-gate int
md_unit_incopen(minor_t mnum,int flag,int otyp)10550Sstevel@tonic-gate md_unit_incopen(
10560Sstevel@tonic-gate minor_t mnum,
10570Sstevel@tonic-gate int flag,
10580Sstevel@tonic-gate int otyp
10590Sstevel@tonic-gate )
10600Sstevel@tonic-gate {
10610Sstevel@tonic-gate mdi_unit_t *ui = MDI_UNIT(mnum);
10620Sstevel@tonic-gate int err = 0;
10630Sstevel@tonic-gate
10640Sstevel@tonic-gate /* check type and flags */
10650Sstevel@tonic-gate ASSERT(ui != NULL);
10660Sstevel@tonic-gate mutex_enter(&ui->ui_mx);
10670Sstevel@tonic-gate if ((otyp < 0) || (otyp >= OTYPCNT)) {
10680Sstevel@tonic-gate err = EINVAL;
10690Sstevel@tonic-gate goto out;
10700Sstevel@tonic-gate }
10710Sstevel@tonic-gate if (((flag & FEXCL) && (ui->ui_lock & MD_UL_OPEN)) ||
10720Sstevel@tonic-gate (ui->ui_lock & MD_UL_EXCL)) {
10730Sstevel@tonic-gate err = EBUSY;
10740Sstevel@tonic-gate goto out;
10750Sstevel@tonic-gate }
10760Sstevel@tonic-gate
10770Sstevel@tonic-gate /* count and flag open */
10780Sstevel@tonic-gate ui->ui_ocnt[otyp]++;
10790Sstevel@tonic-gate ui->ui_lock |= MD_UL_OPEN;
10800Sstevel@tonic-gate if (flag & FEXCL)
10810Sstevel@tonic-gate ui->ui_lock |= MD_UL_EXCL;
10820Sstevel@tonic-gate
10830Sstevel@tonic-gate /* setup kstat, return success */
10840Sstevel@tonic-gate mutex_exit(&ui->ui_mx);
10850Sstevel@tonic-gate md_kstat_init(mnum);
10860Sstevel@tonic-gate return (0);
10870Sstevel@tonic-gate
10880Sstevel@tonic-gate /* return error */
10890Sstevel@tonic-gate out:
10900Sstevel@tonic-gate mutex_exit(&ui->ui_mx);
10910Sstevel@tonic-gate return (err);
10920Sstevel@tonic-gate }
10930Sstevel@tonic-gate
10940Sstevel@tonic-gate int
md_unit_decopen(minor_t mnum,int otyp)10950Sstevel@tonic-gate md_unit_decopen(
10960Sstevel@tonic-gate minor_t mnum,
10970Sstevel@tonic-gate int otyp
10980Sstevel@tonic-gate )
10990Sstevel@tonic-gate {
11000Sstevel@tonic-gate mdi_unit_t *ui = MDI_UNIT(mnum);
11010Sstevel@tonic-gate int err = 0;
11020Sstevel@tonic-gate unsigned i;
11030Sstevel@tonic-gate
11040Sstevel@tonic-gate /* check type and flags */
11050Sstevel@tonic-gate ASSERT(ui != NULL);
11060Sstevel@tonic-gate mutex_enter(&ui->ui_mx);
11070Sstevel@tonic-gate if ((otyp < 0) || (otyp >= OTYPCNT)) {
11080Sstevel@tonic-gate err = EINVAL;
11090Sstevel@tonic-gate goto out;
11100Sstevel@tonic-gate } else if (ui->ui_ocnt[otyp] == 0) {
11110Sstevel@tonic-gate err = ENXIO;
11120Sstevel@tonic-gate goto out;
11130Sstevel@tonic-gate }
11140Sstevel@tonic-gate
11150Sstevel@tonic-gate /* count and flag closed */
11160Sstevel@tonic-gate if (otyp == OTYP_LYR)
11170Sstevel@tonic-gate ui->ui_ocnt[otyp]--;
11180Sstevel@tonic-gate else
11190Sstevel@tonic-gate ui->ui_ocnt[otyp] = 0;
11200Sstevel@tonic-gate ui->ui_lock &= ~MD_UL_OPEN;
11210Sstevel@tonic-gate for (i = 0; (i < OTYPCNT); ++i)
11220Sstevel@tonic-gate if (ui->ui_ocnt[i] != 0)
11230Sstevel@tonic-gate ui->ui_lock |= MD_UL_OPEN;
11240Sstevel@tonic-gate if (! (ui->ui_lock & MD_UL_OPEN))
11250Sstevel@tonic-gate ui->ui_lock &= ~MD_UL_EXCL;
11260Sstevel@tonic-gate
11270Sstevel@tonic-gate /* teardown kstat, return success */
11280Sstevel@tonic-gate if (! (ui->ui_lock & MD_UL_OPEN)) {
112910667SRay.Hassan@Sun.COM
113010667SRay.Hassan@Sun.COM /*
113110667SRay.Hassan@Sun.COM * We have a race condition inherited from specfs between
113210667SRay.Hassan@Sun.COM * open() and close() calls. This results in the kstat
113310667SRay.Hassan@Sun.COM * for a pending I/O being torn down, and then a panic.
113410667SRay.Hassan@Sun.COM * To avoid this, only tear the kstat down if there are
113510667SRay.Hassan@Sun.COM * no other readers on this device.
113610667SRay.Hassan@Sun.COM */
113710667SRay.Hassan@Sun.COM if (ui->ui_readercnt > 1) {
113810667SRay.Hassan@Sun.COM mutex_exit(&ui->ui_mx);
113910667SRay.Hassan@Sun.COM } else {
114010667SRay.Hassan@Sun.COM mutex_exit(&ui->ui_mx);
114110667SRay.Hassan@Sun.COM md_kstat_destroy(mnum);
114210667SRay.Hassan@Sun.COM }
11430Sstevel@tonic-gate return (0);
11440Sstevel@tonic-gate }
11450Sstevel@tonic-gate
11460Sstevel@tonic-gate /* return success */
11470Sstevel@tonic-gate out:
11480Sstevel@tonic-gate mutex_exit(&ui->ui_mx);
11490Sstevel@tonic-gate return (err);
11500Sstevel@tonic-gate }
11510Sstevel@tonic-gate
11520Sstevel@tonic-gate md_dev64_t
md_xlate_targ_2_mini(md_dev64_t targ_devt)11530Sstevel@tonic-gate md_xlate_targ_2_mini(md_dev64_t targ_devt)
11540Sstevel@tonic-gate {
11550Sstevel@tonic-gate dev32_t mini_32_devt, targ_32_devt;
11560Sstevel@tonic-gate int i;
11570Sstevel@tonic-gate
11580Sstevel@tonic-gate /*
11590Sstevel@tonic-gate * check to see if we're in an upgrade situation
11600Sstevel@tonic-gate * if we are not in upgrade just return the input device
11610Sstevel@tonic-gate */
11620Sstevel@tonic-gate
11630Sstevel@tonic-gate if (!MD_UPGRADE)
11640Sstevel@tonic-gate return (targ_devt);
11650Sstevel@tonic-gate
11660Sstevel@tonic-gate targ_32_devt = md_cmpldev(targ_devt);
11670Sstevel@tonic-gate
11680Sstevel@tonic-gate i = 0;
11690Sstevel@tonic-gate while (i != md_tuple_length) {
11700Sstevel@tonic-gate if (md_tuple_table[i].targ_devt == targ_32_devt) {
11710Sstevel@tonic-gate mini_32_devt = md_tuple_table[i].mini_devt;
11720Sstevel@tonic-gate return (md_expldev((md_dev64_t)mini_32_devt));
11730Sstevel@tonic-gate }
11740Sstevel@tonic-gate i++;
11750Sstevel@tonic-gate }
11760Sstevel@tonic-gate return (NODEV64);
11770Sstevel@tonic-gate }
11780Sstevel@tonic-gate
11790Sstevel@tonic-gate md_dev64_t
md_xlate_mini_2_targ(md_dev64_t mini_devt)11800Sstevel@tonic-gate md_xlate_mini_2_targ(md_dev64_t mini_devt)
11810Sstevel@tonic-gate {
11820Sstevel@tonic-gate dev32_t mini_32_devt, targ_32_devt;
11830Sstevel@tonic-gate int i;
11840Sstevel@tonic-gate
11850Sstevel@tonic-gate if (!MD_UPGRADE)
11860Sstevel@tonic-gate return (mini_devt);
11870Sstevel@tonic-gate
11880Sstevel@tonic-gate mini_32_devt = md_cmpldev(mini_devt);
11890Sstevel@tonic-gate
11900Sstevel@tonic-gate i = 0;
11910Sstevel@tonic-gate while (i != md_tuple_length) {
11920Sstevel@tonic-gate if (md_tuple_table[i].mini_devt == mini_32_devt) {
11930Sstevel@tonic-gate targ_32_devt = md_tuple_table[i].targ_devt;
11940Sstevel@tonic-gate return (md_expldev((md_dev64_t)targ_32_devt));
11950Sstevel@tonic-gate }
11960Sstevel@tonic-gate i++;
11970Sstevel@tonic-gate }
11980Sstevel@tonic-gate return (NODEV64);
11990Sstevel@tonic-gate }
12000Sstevel@tonic-gate
12010Sstevel@tonic-gate void
md_xlate_free(int size)12020Sstevel@tonic-gate md_xlate_free(int size)
12030Sstevel@tonic-gate {
12040Sstevel@tonic-gate kmem_free(md_tuple_table, size);
12050Sstevel@tonic-gate }
12060Sstevel@tonic-gate
12070Sstevel@tonic-gate char *
md_targ_major_to_name(major_t maj)12080Sstevel@tonic-gate md_targ_major_to_name(major_t maj)
12090Sstevel@tonic-gate {
12100Sstevel@tonic-gate char *drv_name = NULL;
12110Sstevel@tonic-gate int i;
12120Sstevel@tonic-gate
12130Sstevel@tonic-gate if (!MD_UPGRADE)
12140Sstevel@tonic-gate return (ddi_major_to_name(maj));
12150Sstevel@tonic-gate
12160Sstevel@tonic-gate for (i = 0; i < md_majortab_len; i++) {
12170Sstevel@tonic-gate if (md_major_tuple_table[i].targ_maj == maj) {
12180Sstevel@tonic-gate drv_name = md_major_tuple_table[i].drv_name;
12190Sstevel@tonic-gate break;
12200Sstevel@tonic-gate }
12210Sstevel@tonic-gate }
12220Sstevel@tonic-gate return (drv_name);
12230Sstevel@tonic-gate }
12240Sstevel@tonic-gate
12250Sstevel@tonic-gate major_t
md_targ_name_to_major(char * drv_name)12260Sstevel@tonic-gate md_targ_name_to_major(char *drv_name)
12270Sstevel@tonic-gate {
12280Sstevel@tonic-gate major_t maj;
12290Sstevel@tonic-gate int i;
12300Sstevel@tonic-gate
12310Sstevel@tonic-gate maj = md_getmajor(NODEV64);
12320Sstevel@tonic-gate if (!MD_UPGRADE)
12330Sstevel@tonic-gate return (ddi_name_to_major(drv_name));
12340Sstevel@tonic-gate
12350Sstevel@tonic-gate for (i = 0; i < md_majortab_len; i++) {
12360Sstevel@tonic-gate if ((strcmp(md_major_tuple_table[i].drv_name,
12370Sstevel@tonic-gate drv_name)) == 0) {
12380Sstevel@tonic-gate maj = md_major_tuple_table[i].targ_maj;
12390Sstevel@tonic-gate break;
12400Sstevel@tonic-gate }
12410Sstevel@tonic-gate }
12420Sstevel@tonic-gate
12430Sstevel@tonic-gate return (maj);
12440Sstevel@tonic-gate }
12450Sstevel@tonic-gate
12460Sstevel@tonic-gate void
md_majortab_free()12470Sstevel@tonic-gate md_majortab_free()
12480Sstevel@tonic-gate {
12490Sstevel@tonic-gate size_t sz;
12500Sstevel@tonic-gate int i;
12510Sstevel@tonic-gate
12520Sstevel@tonic-gate for (i = 0; i < md_majortab_len; i++) {
12530Sstevel@tonic-gate freestr(md_major_tuple_table[i].drv_name);
12540Sstevel@tonic-gate }
12550Sstevel@tonic-gate
12560Sstevel@tonic-gate sz = md_majortab_len * sizeof (struct md_xlate_major_table);
12570Sstevel@tonic-gate kmem_free(md_major_tuple_table, sz);
12580Sstevel@tonic-gate }
12590Sstevel@tonic-gate
12600Sstevel@tonic-gate /* functions return a pointer to a function which returns an int */
12610Sstevel@tonic-gate
12620Sstevel@tonic-gate intptr_t (*
md_get_named_service(md_dev64_t dev,int modindex,char * name,intptr_t (* Default)())12630Sstevel@tonic-gate md_get_named_service(md_dev64_t dev, int modindex, char *name,
12640Sstevel@tonic-gate intptr_t (*Default)()))()
12650Sstevel@tonic-gate {
12660Sstevel@tonic-gate mdi_unit_t *ui;
12670Sstevel@tonic-gate md_named_services_t *sp;
12680Sstevel@tonic-gate int i;
12690Sstevel@tonic-gate
12700Sstevel@tonic-gate /*
12710Sstevel@tonic-gate * Return the first named service found.
12720Sstevel@tonic-gate * Use this path when it is known that there is only
12730Sstevel@tonic-gate * one named service possible (e.g., hotspare interface)
12740Sstevel@tonic-gate */
12750Sstevel@tonic-gate if ((dev == NODEV64) && (modindex == ANY_SERVICE)) {
12760Sstevel@tonic-gate for (i = 0; i < MD_NOPS; i++) {
12770Sstevel@tonic-gate if (md_ops[i] == NULL) {
12780Sstevel@tonic-gate continue;
12790Sstevel@tonic-gate }
12800Sstevel@tonic-gate sp = md_ops[i]->md_services;
12810Sstevel@tonic-gate if (sp == NULL)
12820Sstevel@tonic-gate continue;
12830Sstevel@tonic-gate while (sp->md_service != NULL) {
12840Sstevel@tonic-gate if (strcmp(name, sp->md_name) == 0)
12850Sstevel@tonic-gate return (sp->md_service);
12860Sstevel@tonic-gate sp++;
12870Sstevel@tonic-gate }
12880Sstevel@tonic-gate }
12890Sstevel@tonic-gate return (Default);
12900Sstevel@tonic-gate }
12910Sstevel@tonic-gate
12920Sstevel@tonic-gate /*
12930Sstevel@tonic-gate * Return the named service for the given modindex.
12940Sstevel@tonic-gate * This is used if there are multiple possible named services
12950Sstevel@tonic-gate * and each one needs to be called (e.g., poke hotspares)
12960Sstevel@tonic-gate */
12970Sstevel@tonic-gate if (dev == NODEV64) {
12980Sstevel@tonic-gate if (modindex >= MD_NOPS)
12990Sstevel@tonic-gate return (Default);
13000Sstevel@tonic-gate
13010Sstevel@tonic-gate if (md_ops[modindex] == NULL)
13020Sstevel@tonic-gate return (Default);
13030Sstevel@tonic-gate
13040Sstevel@tonic-gate sp = md_ops[modindex]->md_services;
13050Sstevel@tonic-gate if (sp == NULL)
13060Sstevel@tonic-gate return (Default);
13070Sstevel@tonic-gate
13080Sstevel@tonic-gate while (sp->md_service != NULL) {
13090Sstevel@tonic-gate if (strcmp(name, sp->md_name) == 0)
13100Sstevel@tonic-gate return (sp->md_service);
13110Sstevel@tonic-gate sp++;
13120Sstevel@tonic-gate }
13130Sstevel@tonic-gate return (Default);
13140Sstevel@tonic-gate }
13150Sstevel@tonic-gate
13160Sstevel@tonic-gate /*
13170Sstevel@tonic-gate * Return the named service for this md_dev64_t
13180Sstevel@tonic-gate */
13190Sstevel@tonic-gate if (md_getmajor(dev) != md_major)
13200Sstevel@tonic-gate return (Default);
13210Sstevel@tonic-gate
13220Sstevel@tonic-gate if ((MD_MIN2SET(md_getminor(dev)) >= md_nsets) ||
13230Sstevel@tonic-gate (MD_MIN2UNIT(md_getminor(dev)) >= md_nunits))
13240Sstevel@tonic-gate return (NULL);
13250Sstevel@tonic-gate
13260Sstevel@tonic-gate
13270Sstevel@tonic-gate if ((ui = MDI_UNIT(md_getminor(dev))) == NULL)
13280Sstevel@tonic-gate return (NULL);
13290Sstevel@tonic-gate
13300Sstevel@tonic-gate sp = md_ops[ui->ui_opsindex]->md_services;
13310Sstevel@tonic-gate if (sp == NULL)
13320Sstevel@tonic-gate return (Default);
13330Sstevel@tonic-gate while (sp->md_service != NULL) {
13340Sstevel@tonic-gate if (strcmp(name, sp->md_name) == 0)
13350Sstevel@tonic-gate return (sp->md_service);
13360Sstevel@tonic-gate sp++;
13370Sstevel@tonic-gate }
13380Sstevel@tonic-gate return (Default);
13390Sstevel@tonic-gate }
13400Sstevel@tonic-gate
13410Sstevel@tonic-gate /*
13420Sstevel@tonic-gate * md_daemon callback routine
13430Sstevel@tonic-gate */
13440Sstevel@tonic-gate boolean_t
callb_md_cpr(void * arg,int code)13450Sstevel@tonic-gate callb_md_cpr(void *arg, int code)
13460Sstevel@tonic-gate {
13470Sstevel@tonic-gate callb_cpr_t *cp = (callb_cpr_t *)arg;
13480Sstevel@tonic-gate int ret = 0; /* assume success */
134911066Srafael.vanoni@sun.com clock_t delta;
13500Sstevel@tonic-gate
13510Sstevel@tonic-gate mutex_enter(cp->cc_lockp);
13520Sstevel@tonic-gate
13530Sstevel@tonic-gate switch (code) {
13540Sstevel@tonic-gate case CB_CODE_CPR_CHKPT:
13550Sstevel@tonic-gate /*
13560Sstevel@tonic-gate * Check for active resync threads
13570Sstevel@tonic-gate */
13580Sstevel@tonic-gate mutex_enter(&md_cpr_resync.md_resync_mutex);
13590Sstevel@tonic-gate if ((md_cpr_resync.md_mirror_resync > 0) ||
13607563SPrasad.Singamsetty@Sun.COM (md_cpr_resync.md_raid_resync > 0)) {
13610Sstevel@tonic-gate mutex_exit(&md_cpr_resync.md_resync_mutex);
13620Sstevel@tonic-gate cmn_err(CE_WARN, "There are Solaris Volume Manager "
13630Sstevel@tonic-gate "synchronization threads running.");
13640Sstevel@tonic-gate cmn_err(CE_WARN, "Please try system suspension at "
13657563SPrasad.Singamsetty@Sun.COM "a later time.");
13660Sstevel@tonic-gate ret = -1;
13670Sstevel@tonic-gate break;
13680Sstevel@tonic-gate }
13690Sstevel@tonic-gate mutex_exit(&md_cpr_resync.md_resync_mutex);
13700Sstevel@tonic-gate
13710Sstevel@tonic-gate cp->cc_events |= CALLB_CPR_START;
137211066Srafael.vanoni@sun.com delta = CPR_KTHREAD_TIMEOUT_SEC * hz;
13730Sstevel@tonic-gate while (!(cp->cc_events & CALLB_CPR_SAFE))
137411066Srafael.vanoni@sun.com /* cv_reltimedwait() returns -1 if it times out. */
137511066Srafael.vanoni@sun.com if ((ret = cv_reltimedwait(&cp->cc_callb_cv,
137611066Srafael.vanoni@sun.com cp->cc_lockp, delta, TR_CLOCK_TICK)) == -1)
13770Sstevel@tonic-gate break;
13780Sstevel@tonic-gate break;
13790Sstevel@tonic-gate
13800Sstevel@tonic-gate case CB_CODE_CPR_RESUME:
13810Sstevel@tonic-gate cp->cc_events &= ~CALLB_CPR_START;
13820Sstevel@tonic-gate cv_signal(&cp->cc_stop_cv);
13830Sstevel@tonic-gate break;
13840Sstevel@tonic-gate }
13850Sstevel@tonic-gate mutex_exit(cp->cc_lockp);
13860Sstevel@tonic-gate return (ret != -1);
13870Sstevel@tonic-gate }
13880Sstevel@tonic-gate
13890Sstevel@tonic-gate void
md_daemon(int pass_thru,mdq_anchor_t * anchor)13900Sstevel@tonic-gate md_daemon(int pass_thru, mdq_anchor_t *anchor)
13910Sstevel@tonic-gate {
13920Sstevel@tonic-gate daemon_queue_t *dq;
13930Sstevel@tonic-gate callb_cpr_t cprinfo;
13940Sstevel@tonic-gate
13950Sstevel@tonic-gate if (pass_thru && (md_get_status() & MD_GBL_DAEMONS_LIVE))
13960Sstevel@tonic-gate return;
13970Sstevel@tonic-gate /*
13980Sstevel@tonic-gate * Register cpr callback
13990Sstevel@tonic-gate */
14000Sstevel@tonic-gate CALLB_CPR_INIT(&cprinfo, &anchor->a_mx, callb_md_cpr, "md_daemon");
14010Sstevel@tonic-gate
14020Sstevel@tonic-gate /*CONSTCOND*/
14030Sstevel@tonic-gate while (1) {
14040Sstevel@tonic-gate mutex_enter(&anchor->a_mx);
14050Sstevel@tonic-gate while ((dq = anchor->dq.dq_next) == &(anchor->dq)) {
14060Sstevel@tonic-gate if (pass_thru) {
14070Sstevel@tonic-gate /*
14080Sstevel@tonic-gate * CALLB_CPR_EXIT Will do
14090Sstevel@tonic-gate * mutex_exit(&anchor->a_mx)
14100Sstevel@tonic-gate */
14110Sstevel@tonic-gate CALLB_CPR_EXIT(&cprinfo);
14120Sstevel@tonic-gate return;
14130Sstevel@tonic-gate }
14140Sstevel@tonic-gate if (md_get_status() & MD_GBL_DAEMONS_DIE) {
14150Sstevel@tonic-gate mutex_exit(&anchor->a_mx);
14160Sstevel@tonic-gate mutex_enter(&md_mx);
14170Sstevel@tonic-gate md_num_daemons--;
14180Sstevel@tonic-gate mutex_exit(&md_mx);
14190Sstevel@tonic-gate /*
14200Sstevel@tonic-gate * CALLB_CPR_EXIT will do
14210Sstevel@tonic-gate * mutex_exit(&anchor->a_mx)
14220Sstevel@tonic-gate */
14230Sstevel@tonic-gate mutex_enter(&anchor->a_mx);
14240Sstevel@tonic-gate CALLB_CPR_EXIT(&cprinfo);
14250Sstevel@tonic-gate thread_exit();
14260Sstevel@tonic-gate }
14270Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo);
14280Sstevel@tonic-gate cv_wait(&anchor->a_cv, &anchor->a_mx);
14290Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, &anchor->a_mx);
14300Sstevel@tonic-gate }
14310Sstevel@tonic-gate dq->dq_prev->dq_next = dq->dq_next;
14320Sstevel@tonic-gate dq->dq_next->dq_prev = dq->dq_prev;
14330Sstevel@tonic-gate dq->dq_prev = dq->dq_next = NULL;
14340Sstevel@tonic-gate anchor->dq.qlen--;
14350Sstevel@tonic-gate mutex_exit(&anchor->a_mx);
14360Sstevel@tonic-gate (*(dq->dq_call))(dq);
14370Sstevel@tonic-gate }
14380Sstevel@tonic-gate /*NOTREACHED*/
14390Sstevel@tonic-gate }
14400Sstevel@tonic-gate
14410Sstevel@tonic-gate /*
14420Sstevel@tonic-gate * daemon_request:
14430Sstevel@tonic-gate *
14440Sstevel@tonic-gate * Adds requests to appropriate requestq which is
14450Sstevel@tonic-gate * anchored by *anchor.
14460Sstevel@tonic-gate * The request is the first element of a doubly linked circular list.
14470Sstevel@tonic-gate * When the request is a single element, the forward and backward
14480Sstevel@tonic-gate * pointers MUST point to the element itself.
14490Sstevel@tonic-gate */
14500Sstevel@tonic-gate
14510Sstevel@tonic-gate void
daemon_request(mdq_anchor_t * anchor,void (* func)(),daemon_queue_t * request,callstyle_t style)14520Sstevel@tonic-gate daemon_request(mdq_anchor_t *anchor, void (*func)(),
14530Sstevel@tonic-gate daemon_queue_t *request, callstyle_t style)
14540Sstevel@tonic-gate {
14550Sstevel@tonic-gate daemon_queue_t *rqtp;
14560Sstevel@tonic-gate int i = 0;
14570Sstevel@tonic-gate
14580Sstevel@tonic-gate rqtp = request;
14590Sstevel@tonic-gate if (style == REQ_OLD) {
14600Sstevel@tonic-gate ASSERT((rqtp->dq_next == NULL) && (rqtp->dq_prev == NULL));
14610Sstevel@tonic-gate /* set it to the new style */
14620Sstevel@tonic-gate rqtp->dq_prev = rqtp->dq_next = rqtp;
14630Sstevel@tonic-gate }
14640Sstevel@tonic-gate ASSERT((rqtp->dq_next != NULL) && (rqtp->dq_prev != NULL));
14650Sstevel@tonic-gate
14660Sstevel@tonic-gate /* scan the list and add the function to each element */
14670Sstevel@tonic-gate
14680Sstevel@tonic-gate do {
14690Sstevel@tonic-gate rqtp->dq_call = func;
14700Sstevel@tonic-gate i++;
14710Sstevel@tonic-gate rqtp = rqtp->dq_next;
14720Sstevel@tonic-gate } while (rqtp != request);
14730Sstevel@tonic-gate
14740Sstevel@tonic-gate /* save pointer to tail of the request list */
14750Sstevel@tonic-gate rqtp = request->dq_prev;
14760Sstevel@tonic-gate
14770Sstevel@tonic-gate mutex_enter(&anchor->a_mx);
14780Sstevel@tonic-gate /* stats */
14790Sstevel@tonic-gate anchor->dq.qlen += i;
14800Sstevel@tonic-gate anchor->dq.treqs += i;
14810Sstevel@tonic-gate anchor->dq.maxq_len = (anchor->dq.qlen > anchor->dq.maxq_len) ?
14827563SPrasad.Singamsetty@Sun.COM anchor->dq.qlen : anchor->dq.maxq_len;
14830Sstevel@tonic-gate
14840Sstevel@tonic-gate /* now add the list to request queue */
14850Sstevel@tonic-gate request->dq_prev = anchor->dq.dq_prev;
14860Sstevel@tonic-gate rqtp->dq_next = &anchor->dq;
14870Sstevel@tonic-gate anchor->dq.dq_prev->dq_next = request;
14880Sstevel@tonic-gate anchor->dq.dq_prev = rqtp;
14890Sstevel@tonic-gate cv_broadcast(&anchor->a_cv);
14900Sstevel@tonic-gate mutex_exit(&anchor->a_mx);
14910Sstevel@tonic-gate }
14920Sstevel@tonic-gate
14930Sstevel@tonic-gate void
mddb_commitrec_wrapper(mddb_recid_t recid)14940Sstevel@tonic-gate mddb_commitrec_wrapper(mddb_recid_t recid)
14950Sstevel@tonic-gate {
14960Sstevel@tonic-gate int sent_log = 0;
14970Sstevel@tonic-gate uint_t retry = md_retry_cnt;
14980Sstevel@tonic-gate set_t setno;
14990Sstevel@tonic-gate
15000Sstevel@tonic-gate while (mddb_commitrec(recid)) {
15010Sstevel@tonic-gate if (! sent_log) {
15020Sstevel@tonic-gate cmn_err(CE_WARN,
15030Sstevel@tonic-gate "md: state database commit failed");
15040Sstevel@tonic-gate sent_log = 1;
15050Sstevel@tonic-gate }
15060Sstevel@tonic-gate delay(md_hz);
15070Sstevel@tonic-gate
15080Sstevel@tonic-gate /*
15090Sstevel@tonic-gate * Setting retry cnt to one (pre decremented) so that we
15100Sstevel@tonic-gate * actually do no retries when committing/deleting a mddb rec.
15110Sstevel@tonic-gate * The underlying disk driver does several retries to check
15120Sstevel@tonic-gate * if the disk is really dead or not so there
15130Sstevel@tonic-gate * is no reason for us to retry on top of the drivers retries.
15140Sstevel@tonic-gate */
15150Sstevel@tonic-gate
15160Sstevel@tonic-gate if (--retry == 0) {
15170Sstevel@tonic-gate setno = mddb_getsetnum(recid);
15180Sstevel@tonic-gate if (md_get_setstatus(setno) & MD_SET_TOOFEW) {
15190Sstevel@tonic-gate panic(
15200Sstevel@tonic-gate "md: Panic due to lack of DiskSuite state\n"
15210Sstevel@tonic-gate " database replicas. Fewer than 50%% of "
15220Sstevel@tonic-gate "the total were available,\n so panic to "
15230Sstevel@tonic-gate "ensure data integrity.");
15240Sstevel@tonic-gate } else {
15250Sstevel@tonic-gate panic("md: state database problem");
15260Sstevel@tonic-gate }
15270Sstevel@tonic-gate /*NOTREACHED*/
15280Sstevel@tonic-gate }
15290Sstevel@tonic-gate }
15300Sstevel@tonic-gate }
15310Sstevel@tonic-gate
15320Sstevel@tonic-gate void
mddb_commitrecs_wrapper(mddb_recid_t * recids)15330Sstevel@tonic-gate mddb_commitrecs_wrapper(mddb_recid_t *recids)
15340Sstevel@tonic-gate {
15350Sstevel@tonic-gate int sent_log = 0;
15360Sstevel@tonic-gate uint_t retry = md_retry_cnt;
15370Sstevel@tonic-gate set_t setno;
15380Sstevel@tonic-gate
15390Sstevel@tonic-gate while (mddb_commitrecs(recids)) {
15400Sstevel@tonic-gate if (! sent_log) {
15410Sstevel@tonic-gate cmn_err(CE_WARN,
15420Sstevel@tonic-gate "md: state database commit failed");
15430Sstevel@tonic-gate sent_log = 1;
15440Sstevel@tonic-gate }
15450Sstevel@tonic-gate delay(md_hz);
15460Sstevel@tonic-gate
15470Sstevel@tonic-gate /*
15480Sstevel@tonic-gate * Setting retry cnt to one (pre decremented) so that we
15490Sstevel@tonic-gate * actually do no retries when committing/deleting a mddb rec.
15500Sstevel@tonic-gate * The underlying disk driver does several retries to check
15510Sstevel@tonic-gate * if the disk is really dead or not so there
15520Sstevel@tonic-gate * is no reason for us to retry on top of the drivers retries.
15530Sstevel@tonic-gate */
15540Sstevel@tonic-gate
15550Sstevel@tonic-gate if (--retry == 0) {
15560Sstevel@tonic-gate /*
15570Sstevel@tonic-gate * since all the records are part of the same set
15580Sstevel@tonic-gate * use the first one to get setno
15590Sstevel@tonic-gate */
15600Sstevel@tonic-gate setno = mddb_getsetnum(*recids);
15610Sstevel@tonic-gate if (md_get_setstatus(setno) & MD_SET_TOOFEW) {
15620Sstevel@tonic-gate panic(
15630Sstevel@tonic-gate "md: Panic due to lack of DiskSuite state\n"
15640Sstevel@tonic-gate " database replicas. Fewer than 50%% of "
15650Sstevel@tonic-gate "the total were available,\n so panic to "
15660Sstevel@tonic-gate "ensure data integrity.");
15670Sstevel@tonic-gate } else {
15680Sstevel@tonic-gate panic("md: state database problem");
15690Sstevel@tonic-gate }
15700Sstevel@tonic-gate /*NOTREACHED*/
15710Sstevel@tonic-gate }
15720Sstevel@tonic-gate }
15730Sstevel@tonic-gate }
15740Sstevel@tonic-gate
15750Sstevel@tonic-gate void
mddb_deleterec_wrapper(mddb_recid_t recid)15760Sstevel@tonic-gate mddb_deleterec_wrapper(mddb_recid_t recid)
15770Sstevel@tonic-gate {
15780Sstevel@tonic-gate int sent_log = 0;
15790Sstevel@tonic-gate uint_t retry = md_retry_cnt;
15800Sstevel@tonic-gate set_t setno;
15810Sstevel@tonic-gate
15820Sstevel@tonic-gate while (mddb_deleterec(recid)) {
15830Sstevel@tonic-gate if (! sent_log) {
15840Sstevel@tonic-gate cmn_err(CE_WARN,
15850Sstevel@tonic-gate "md: state database delete failed");
15860Sstevel@tonic-gate sent_log = 1;
15870Sstevel@tonic-gate }
15880Sstevel@tonic-gate delay(md_hz);
15890Sstevel@tonic-gate
15900Sstevel@tonic-gate /*
15910Sstevel@tonic-gate * Setting retry cnt to one (pre decremented) so that we
15920Sstevel@tonic-gate * actually do no retries when committing/deleting a mddb rec.
15930Sstevel@tonic-gate * The underlying disk driver does several retries to check
15940Sstevel@tonic-gate * if the disk is really dead or not so there
15950Sstevel@tonic-gate * is no reason for us to retry on top of the drivers retries.
15960Sstevel@tonic-gate */
15970Sstevel@tonic-gate
15980Sstevel@tonic-gate if (--retry == 0) {
15990Sstevel@tonic-gate setno = mddb_getsetnum(recid);
16000Sstevel@tonic-gate if (md_get_setstatus(setno) & MD_SET_TOOFEW) {
16010Sstevel@tonic-gate panic(
16020Sstevel@tonic-gate "md: Panic due to lack of DiskSuite state\n"
16030Sstevel@tonic-gate " database replicas. Fewer than 50%% of "
16040Sstevel@tonic-gate "the total were available,\n so panic to "
16050Sstevel@tonic-gate "ensure data integrity.");
16060Sstevel@tonic-gate } else {
16070Sstevel@tonic-gate panic("md: state database problem");
16080Sstevel@tonic-gate }
16090Sstevel@tonic-gate /*NOTREACHED*/
16100Sstevel@tonic-gate }
16110Sstevel@tonic-gate }
16120Sstevel@tonic-gate }
16130Sstevel@tonic-gate
16140Sstevel@tonic-gate /*
16150Sstevel@tonic-gate * md_holdset_enter is called in order to hold the set in its
16160Sstevel@tonic-gate * current state (loaded, unloaded, snarfed, unsnarfed, etc)
16170Sstevel@tonic-gate * until md_holdset_exit is called. This is used by the mirror
16180Sstevel@tonic-gate * code to mark the set as HOLD so that the set won't be
16190Sstevel@tonic-gate * unloaded while hotspares are being allocated in check_4_hotspares.
16200Sstevel@tonic-gate * The original fix to the mirror code to hold the set was to call
16210Sstevel@tonic-gate * md_haltsnarf_enter, but this will block all ioctls and ioctls
16220Sstevel@tonic-gate * must work for a MN diskset while hotspares are allocated.
16230Sstevel@tonic-gate */
16240Sstevel@tonic-gate void
md_holdset_enter(set_t setno)16250Sstevel@tonic-gate md_holdset_enter(set_t setno)
16260Sstevel@tonic-gate {
16270Sstevel@tonic-gate mutex_enter(&md_mx);
16280Sstevel@tonic-gate while (md_set[setno].s_status & MD_SET_HOLD)
16290Sstevel@tonic-gate cv_wait(&md_cv, &md_mx);
16300Sstevel@tonic-gate md_set[setno].s_status |= MD_SET_HOLD;
16310Sstevel@tonic-gate mutex_exit(&md_mx);
16320Sstevel@tonic-gate }
16330Sstevel@tonic-gate
16340Sstevel@tonic-gate void
md_holdset_exit(set_t setno)16350Sstevel@tonic-gate md_holdset_exit(set_t setno)
16360Sstevel@tonic-gate {
16370Sstevel@tonic-gate mutex_enter(&md_mx);
16380Sstevel@tonic-gate md_set[setno].s_status &= ~MD_SET_HOLD;
16390Sstevel@tonic-gate cv_broadcast(&md_cv);
16400Sstevel@tonic-gate mutex_exit(&md_mx);
16410Sstevel@tonic-gate }
16420Sstevel@tonic-gate
16430Sstevel@tonic-gate /*
16440Sstevel@tonic-gate * Returns a 0 if this thread marked the set as HOLD (success),
16450Sstevel@tonic-gate * returns a -1 if set was already marked HOLD (failure).
16460Sstevel@tonic-gate * Used by the release_set code to see if set is marked HOLD.
16470Sstevel@tonic-gate * HOLD is set by a daemon when hotspares are being allocated
16480Sstevel@tonic-gate * to mirror units.
16490Sstevel@tonic-gate */
16500Sstevel@tonic-gate int
md_holdset_testandenter(set_t setno)16510Sstevel@tonic-gate md_holdset_testandenter(set_t setno)
16520Sstevel@tonic-gate {
16530Sstevel@tonic-gate mutex_enter(&md_mx);
16540Sstevel@tonic-gate if (md_set[setno].s_status & MD_SET_HOLD) {
16550Sstevel@tonic-gate mutex_exit(&md_mx);
16560Sstevel@tonic-gate return (-1);
16570Sstevel@tonic-gate }
16580Sstevel@tonic-gate md_set[setno].s_status |= MD_SET_HOLD;
16590Sstevel@tonic-gate mutex_exit(&md_mx);
16600Sstevel@tonic-gate return (0);
16610Sstevel@tonic-gate }
16620Sstevel@tonic-gate
16630Sstevel@tonic-gate void
md_haltsnarf_enter(set_t setno)16640Sstevel@tonic-gate md_haltsnarf_enter(set_t setno)
16650Sstevel@tonic-gate {
16660Sstevel@tonic-gate mutex_enter(&md_mx);
16670Sstevel@tonic-gate while (md_set[setno].s_status & MD_SET_SNARFING)
16680Sstevel@tonic-gate cv_wait(&md_cv, &md_mx);
16690Sstevel@tonic-gate
16700Sstevel@tonic-gate md_set[setno].s_status |= MD_SET_SNARFING;
16710Sstevel@tonic-gate mutex_exit(&md_mx);
16720Sstevel@tonic-gate }
16730Sstevel@tonic-gate
16740Sstevel@tonic-gate void
md_haltsnarf_exit(set_t setno)16750Sstevel@tonic-gate md_haltsnarf_exit(set_t setno)
16760Sstevel@tonic-gate {
16770Sstevel@tonic-gate mutex_enter(&md_mx);
16780Sstevel@tonic-gate md_set[setno].s_status &= ~MD_SET_SNARFING;
16790Sstevel@tonic-gate cv_broadcast(&md_cv);
16800Sstevel@tonic-gate mutex_exit(&md_mx);
16810Sstevel@tonic-gate }
16820Sstevel@tonic-gate
16830Sstevel@tonic-gate void
md_haltsnarf_wait(set_t setno)16840Sstevel@tonic-gate md_haltsnarf_wait(set_t setno)
16850Sstevel@tonic-gate {
16860Sstevel@tonic-gate mutex_enter(&md_mx);
16870Sstevel@tonic-gate while (md_set[setno].s_status & MD_SET_SNARFING)
16880Sstevel@tonic-gate cv_wait(&md_cv, &md_mx);
16890Sstevel@tonic-gate mutex_exit(&md_mx);
16900Sstevel@tonic-gate }
16910Sstevel@tonic-gate
16920Sstevel@tonic-gate /*
16930Sstevel@tonic-gate * ASSUMED that the md_unit_array_rw WRITER lock is held.
16940Sstevel@tonic-gate */
16950Sstevel@tonic-gate int
md_halt_set(set_t setno,enum md_haltcmd cmd)16960Sstevel@tonic-gate md_halt_set(set_t setno, enum md_haltcmd cmd)
16970Sstevel@tonic-gate {
16980Sstevel@tonic-gate int i, err;
16990Sstevel@tonic-gate
17000Sstevel@tonic-gate if (md_set[setno].s_un == NULL || md_set[setno].s_ui == NULL) {
17010Sstevel@tonic-gate return (0);
17020Sstevel@tonic-gate }
17030Sstevel@tonic-gate
17040Sstevel@tonic-gate if ((cmd == MD_HALT_CHECK) || (cmd == MD_HALT_ALL)) {
17050Sstevel@tonic-gate for (i = 0; i < MD_NOPS; i++) {
17060Sstevel@tonic-gate if (md_ops[i] == NULL)
17070Sstevel@tonic-gate continue;
17080Sstevel@tonic-gate if ((*(md_ops[i]->md_halt))(MD_HALT_CLOSE, setno)) {
17090Sstevel@tonic-gate for (--i; i > 0; --i) {
17100Sstevel@tonic-gate if (md_ops[i] == NULL)
17110Sstevel@tonic-gate continue;
17120Sstevel@tonic-gate (void) (*(md_ops[i]->md_halt))
17130Sstevel@tonic-gate (MD_HALT_OPEN, setno);
17140Sstevel@tonic-gate }
17150Sstevel@tonic-gate return (EBUSY);
17160Sstevel@tonic-gate }
17170Sstevel@tonic-gate }
17180Sstevel@tonic-gate
17190Sstevel@tonic-gate for (i = 0; i < MD_NOPS; i++) {
17200Sstevel@tonic-gate if (md_ops[i] == NULL)
17210Sstevel@tonic-gate continue;
17220Sstevel@tonic-gate if ((*(md_ops[i]->md_halt))(MD_HALT_CHECK, setno)) {
17230Sstevel@tonic-gate for (i = 0; i < MD_NOPS; i++) {
17240Sstevel@tonic-gate if (md_ops[i] == NULL)
17250Sstevel@tonic-gate continue;
17260Sstevel@tonic-gate (void) (*(md_ops[i]->md_halt))
17270Sstevel@tonic-gate (MD_HALT_OPEN, setno);
17280Sstevel@tonic-gate }
17290Sstevel@tonic-gate return (EBUSY);
17300Sstevel@tonic-gate }
17310Sstevel@tonic-gate }
17320Sstevel@tonic-gate }
17330Sstevel@tonic-gate
17340Sstevel@tonic-gate if ((cmd == MD_HALT_DOIT) || (cmd == MD_HALT_ALL)) {
17350Sstevel@tonic-gate for (i = 0; i < MD_NOPS; i++) {
17360Sstevel@tonic-gate if (md_ops[i] == NULL)
17370Sstevel@tonic-gate continue;
17380Sstevel@tonic-gate err = (*(md_ops[i]->md_halt))(MD_HALT_DOIT, setno);
17390Sstevel@tonic-gate if (err != 0)
17400Sstevel@tonic-gate cmn_err(CE_NOTE,
17410Sstevel@tonic-gate "md: halt failed for %s, error %d",
17420Sstevel@tonic-gate md_ops[i]->md_driver.md_drivername, err);
17430Sstevel@tonic-gate }
17440Sstevel@tonic-gate
17450Sstevel@tonic-gate /*
17460Sstevel@tonic-gate * Unload the devid namespace if it is loaded
17470Sstevel@tonic-gate */
17480Sstevel@tonic-gate md_unload_namespace(setno, NM_DEVID);
17490Sstevel@tonic-gate md_unload_namespace(setno, 0L);
17500Sstevel@tonic-gate md_clr_setstatus(setno, MD_SET_SNARFED);
17510Sstevel@tonic-gate }
17520Sstevel@tonic-gate
17530Sstevel@tonic-gate return (0);
17540Sstevel@tonic-gate }
17550Sstevel@tonic-gate
17560Sstevel@tonic-gate int
md_halt(int global_locks_owned_mask)17570Sstevel@tonic-gate md_halt(int global_locks_owned_mask)
17580Sstevel@tonic-gate {
17590Sstevel@tonic-gate set_t i, j;
17600Sstevel@tonic-gate int err;
17610Sstevel@tonic-gate int init_queues;
17620Sstevel@tonic-gate md_requestq_entry_t *rqp;
17630Sstevel@tonic-gate md_ops_t **pops, *ops, *lops;
17640Sstevel@tonic-gate ddi_modhandle_t mod;
17650Sstevel@tonic-gate char *name;
17660Sstevel@tonic-gate
17670Sstevel@tonic-gate rw_enter(&md_unit_array_rw.lock, RW_WRITER);
17680Sstevel@tonic-gate
17690Sstevel@tonic-gate /*
17700Sstevel@tonic-gate * Grab the all of the global locks that are not
17710Sstevel@tonic-gate * already owned to ensure that there isn't another
17720Sstevel@tonic-gate * thread trying to access a global resource
17730Sstevel@tonic-gate * while the halt is in progress
17740Sstevel@tonic-gate */
17750Sstevel@tonic-gate if (md_global_lock_enter(global_locks_owned_mask) == EINTR)
17760Sstevel@tonic-gate return (EINTR);
17770Sstevel@tonic-gate
17780Sstevel@tonic-gate for (i = 0; i < md_nsets; i++)
17790Sstevel@tonic-gate md_haltsnarf_enter(i);
17800Sstevel@tonic-gate
17810Sstevel@tonic-gate /*
17820Sstevel@tonic-gate * Kill the daemon threads.
17830Sstevel@tonic-gate */
17840Sstevel@tonic-gate init_queues = ((md_get_status() & MD_GBL_DAEMONS_LIVE) ? FALSE : TRUE);
17850Sstevel@tonic-gate md_clr_status(MD_GBL_DAEMONS_LIVE);
17860Sstevel@tonic-gate md_set_status(MD_GBL_DAEMONS_DIE);
17870Sstevel@tonic-gate
17880Sstevel@tonic-gate rqp = &md_daemon_queues[0];
17890Sstevel@tonic-gate i = 0;
17900Sstevel@tonic-gate while (!NULL_REQUESTQ_ENTRY(rqp)) {
17910Sstevel@tonic-gate cv_broadcast(&rqp->dispq_headp->a_cv);
17920Sstevel@tonic-gate rqp = &md_daemon_queues[++i];
17930Sstevel@tonic-gate }
17940Sstevel@tonic-gate
17950Sstevel@tonic-gate mutex_enter(&md_mx);
17960Sstevel@tonic-gate while (md_num_daemons != 0) {
17970Sstevel@tonic-gate mutex_exit(&md_mx);
17980Sstevel@tonic-gate delay(md_hz);
17990Sstevel@tonic-gate mutex_enter(&md_mx);
18000Sstevel@tonic-gate }
18010Sstevel@tonic-gate mutex_exit(&md_mx);
18020Sstevel@tonic-gate md_clr_status(MD_GBL_DAEMONS_DIE);
18030Sstevel@tonic-gate
18040Sstevel@tonic-gate for (i = 0; i < md_nsets; i++)
18050Sstevel@tonic-gate /*
18060Sstevel@tonic-gate * Only call into md_halt_set if s_un / s_ui are both set.
18070Sstevel@tonic-gate * If they are NULL this set hasn't been accessed, so its
18080Sstevel@tonic-gate * pointless performing the call.
18090Sstevel@tonic-gate */
18100Sstevel@tonic-gate if (md_set[i].s_un != NULL && md_set[i].s_ui != NULL) {
18110Sstevel@tonic-gate if (md_halt_set(i, MD_HALT_CHECK)) {
18120Sstevel@tonic-gate if (md_start_daemons(init_queues))
18130Sstevel@tonic-gate cmn_err(CE_WARN,
18140Sstevel@tonic-gate "md: restart of daemon threads "
18150Sstevel@tonic-gate "failed");
18160Sstevel@tonic-gate for (j = 0; j < md_nsets; j++)
18170Sstevel@tonic-gate md_haltsnarf_exit(j);
18180Sstevel@tonic-gate
18190Sstevel@tonic-gate return (md_global_lock_exit(
18200Sstevel@tonic-gate global_locks_owned_mask, EBUSY,
18210Sstevel@tonic-gate MD_ARRAY_WRITER, NULL));
18220Sstevel@tonic-gate }
18230Sstevel@tonic-gate }
18240Sstevel@tonic-gate
18250Sstevel@tonic-gate /*
18260Sstevel@tonic-gate * if we get here we are going to do it
18270Sstevel@tonic-gate */
18280Sstevel@tonic-gate for (i = 0; i < md_nsets; i++) {
18290Sstevel@tonic-gate /*
18300Sstevel@tonic-gate * Only call into md_halt_set if s_un / s_ui are both set.
18310Sstevel@tonic-gate * If they are NULL this set hasn't been accessed, so its
18320Sstevel@tonic-gate * pointless performing the call.
18330Sstevel@tonic-gate */
18340Sstevel@tonic-gate if (md_set[i].s_un != NULL && md_set[i].s_ui != NULL) {
18350Sstevel@tonic-gate err = md_halt_set(i, MD_HALT_DOIT);
18360Sstevel@tonic-gate if (err != 0)
18370Sstevel@tonic-gate cmn_err(CE_NOTE,
18380Sstevel@tonic-gate "md: halt failed set %u, error %d",
18390Sstevel@tonic-gate (unsigned)i, err);
18400Sstevel@tonic-gate }
18410Sstevel@tonic-gate }
18420Sstevel@tonic-gate
18430Sstevel@tonic-gate /*
18440Sstevel@tonic-gate * issue a halt unload to each module to indicate that it
18450Sstevel@tonic-gate * is about to be unloaded. Each module is called once, set
18460Sstevel@tonic-gate * has no meaning at this point in time.
18470Sstevel@tonic-gate */
18480Sstevel@tonic-gate for (i = 0; i < MD_NOPS; i++) {
18490Sstevel@tonic-gate if (md_ops[i] == NULL)
18500Sstevel@tonic-gate continue;
18510Sstevel@tonic-gate err = (*(md_ops[i]->md_halt))(MD_HALT_UNLOAD, 0);
18520Sstevel@tonic-gate if (err != 0)
18530Sstevel@tonic-gate cmn_err(CE_NOTE,
18540Sstevel@tonic-gate "md: halt failed for %s, error %d",
18550Sstevel@tonic-gate md_ops[i]->md_driver.md_drivername, err);
18560Sstevel@tonic-gate }
18570Sstevel@tonic-gate
18580Sstevel@tonic-gate /* ddi_modclose the submodules */
18590Sstevel@tonic-gate for (i = 0; i < MD_NOPS; i++) {
18600Sstevel@tonic-gate /* skip if not open */
18610Sstevel@tonic-gate if ((md_ops[i] == NULL) || (md_mods[i] == NULL))
18620Sstevel@tonic-gate continue;
18630Sstevel@tonic-gate
18640Sstevel@tonic-gate /* find and unlink from md_opslist */
18650Sstevel@tonic-gate ops = md_ops[i];
18660Sstevel@tonic-gate mod = md_mods[i];
18670Sstevel@tonic-gate pops = &md_opslist;
18680Sstevel@tonic-gate for (lops = *pops; lops;
18690Sstevel@tonic-gate pops = &lops->md_next, lops = *pops) {
18700Sstevel@tonic-gate if (lops == ops) {
18710Sstevel@tonic-gate *pops = ops->md_next;
18720Sstevel@tonic-gate ops->md_next = NULL;
18730Sstevel@tonic-gate break;
18740Sstevel@tonic-gate }
18750Sstevel@tonic-gate }
18760Sstevel@tonic-gate
18770Sstevel@tonic-gate /* uninitialize */
18787563SPrasad.Singamsetty@Sun.COM name = ops->md_driver.md_drivername;
18790Sstevel@tonic-gate md_ops[i] = NULL;
18800Sstevel@tonic-gate md_mods[i] = NULL;
18810Sstevel@tonic-gate ops->md_selfindex = 0;
18820Sstevel@tonic-gate ops->md_driver.md_drivername[0] = '\0';
18830Sstevel@tonic-gate rw_destroy(&ops->md_link_rw.lock);
18840Sstevel@tonic-gate
18850Sstevel@tonic-gate /* close */
18860Sstevel@tonic-gate err = ddi_modclose(mod);
18870Sstevel@tonic-gate if (err != 0)
18880Sstevel@tonic-gate cmn_err(CE_NOTE,
18890Sstevel@tonic-gate "md: halt close failed for %s, error %d",
18900Sstevel@tonic-gate name ? name : "UNKNOWN", err);
18910Sstevel@tonic-gate }
18920Sstevel@tonic-gate
18930Sstevel@tonic-gate /* Unload the database */
18940Sstevel@tonic-gate mddb_unload();
18950Sstevel@tonic-gate
18960Sstevel@tonic-gate md_set_status(MD_GBL_HALTED); /* we are ready to be unloaded */
18970Sstevel@tonic-gate
18980Sstevel@tonic-gate for (i = 0; i < md_nsets; i++)
18990Sstevel@tonic-gate md_haltsnarf_exit(i);
19000Sstevel@tonic-gate
19010Sstevel@tonic-gate return (md_global_lock_exit(global_locks_owned_mask, 0,
19027563SPrasad.Singamsetty@Sun.COM MD_ARRAY_WRITER, NULL));
19030Sstevel@tonic-gate }
19040Sstevel@tonic-gate
19050Sstevel@tonic-gate /*
19060Sstevel@tonic-gate * md_layered_open() is an internal routine only for SVM modules.
19070Sstevel@tonic-gate * So the input device will be a md_dev64_t, because all SVM modules internally
19080Sstevel@tonic-gate * work with that device type.
19090Sstevel@tonic-gate * ddi routines on the other hand work with dev_t. So, if we call any ddi
19100Sstevel@tonic-gate * routines from here we first have to convert that device into a dev_t.
19110Sstevel@tonic-gate */
19120Sstevel@tonic-gate
19130Sstevel@tonic-gate int
md_layered_open(minor_t mnum,md_dev64_t * dev,int md_oflags)19140Sstevel@tonic-gate md_layered_open(
19150Sstevel@tonic-gate minor_t mnum,
19160Sstevel@tonic-gate md_dev64_t *dev,
19170Sstevel@tonic-gate int md_oflags
19180Sstevel@tonic-gate )
19190Sstevel@tonic-gate {
19200Sstevel@tonic-gate int flag = (FREAD | FWRITE);
19210Sstevel@tonic-gate cred_t *cred_p = kcred;
19220Sstevel@tonic-gate major_t major;
19230Sstevel@tonic-gate int err;
19240Sstevel@tonic-gate dev_t ddi_dev = md_dev64_to_dev(*dev);
19250Sstevel@tonic-gate
19260Sstevel@tonic-gate if (ddi_dev == NODEV)
19270Sstevel@tonic-gate return (ENODEV);
19280Sstevel@tonic-gate
19290Sstevel@tonic-gate major = getmajor(ddi_dev);
19300Sstevel@tonic-gate
19310Sstevel@tonic-gate /* metadevice */
19320Sstevel@tonic-gate if (major == md_major) {
19330Sstevel@tonic-gate mdi_unit_t *ui;
19340Sstevel@tonic-gate
19350Sstevel@tonic-gate /* open underlying driver */
19360Sstevel@tonic-gate mnum = getminor(ddi_dev);
19370Sstevel@tonic-gate
19380Sstevel@tonic-gate ui = MDI_UNIT(mnum);
19390Sstevel@tonic-gate if (md_ops[ui->ui_opsindex]->md_open != NULL) {
19400Sstevel@tonic-gate int ret = (*md_ops[ui->ui_opsindex]->md_open)(&ddi_dev,
19417563SPrasad.Singamsetty@Sun.COM flag, OTYP_LYR, cred_p, md_oflags);
19420Sstevel@tonic-gate /*
19430Sstevel@tonic-gate * As open() may change the device,
19440Sstevel@tonic-gate * send this info back to the caller.
19450Sstevel@tonic-gate */
19460Sstevel@tonic-gate *dev = md_expldev(ddi_dev);
19470Sstevel@tonic-gate return (ret);
19480Sstevel@tonic-gate }
19490Sstevel@tonic-gate
19500Sstevel@tonic-gate /* or do it ourselves */
19510Sstevel@tonic-gate (void) md_unit_openclose_enter(ui);
19520Sstevel@tonic-gate err = md_unit_incopen(mnum, flag, OTYP_LYR);
19530Sstevel@tonic-gate md_unit_openclose_exit(ui);
19540Sstevel@tonic-gate /* convert our ddi_dev back to the dev we were given */
19550Sstevel@tonic-gate *dev = md_expldev(ddi_dev);
19560Sstevel@tonic-gate return (err);
19570Sstevel@tonic-gate }
19580Sstevel@tonic-gate
19590Sstevel@tonic-gate /*
19600Sstevel@tonic-gate * Open regular device, since open() may change dev_t give new dev_t
19610Sstevel@tonic-gate * back to the caller.
19620Sstevel@tonic-gate */
19630Sstevel@tonic-gate err = dev_lopen(&ddi_dev, flag, OTYP_LYR, cred_p);
19640Sstevel@tonic-gate *dev = md_expldev(ddi_dev);
19650Sstevel@tonic-gate return (err);
19660Sstevel@tonic-gate }
19670Sstevel@tonic-gate
19680Sstevel@tonic-gate /*
19690Sstevel@tonic-gate * md_layered_close() is an internal routine only for SVM modules.
19700Sstevel@tonic-gate * So the input device will be a md_dev64_t, because all SVM modules internally
19710Sstevel@tonic-gate * work with that device type.
19720Sstevel@tonic-gate * ddi routines on the other hand work with dev_t. So, if we call any ddi
19730Sstevel@tonic-gate * routines from here we first have to convert that device into a dev_t.
19740Sstevel@tonic-gate */
19750Sstevel@tonic-gate void
md_layered_close(md_dev64_t dev,int md_cflags)19760Sstevel@tonic-gate md_layered_close(
19770Sstevel@tonic-gate md_dev64_t dev,
19780Sstevel@tonic-gate int md_cflags
19790Sstevel@tonic-gate )
19800Sstevel@tonic-gate {
19810Sstevel@tonic-gate int flag = (FREAD | FWRITE);
19820Sstevel@tonic-gate cred_t *cred_p = kcred;
19830Sstevel@tonic-gate dev_t ddi_dev = md_dev64_to_dev(dev);
19840Sstevel@tonic-gate major_t major = getmajor(ddi_dev);
19850Sstevel@tonic-gate minor_t mnum = getminor(ddi_dev);
19860Sstevel@tonic-gate
19870Sstevel@tonic-gate /* metadevice */
19880Sstevel@tonic-gate if (major == md_major) {
19890Sstevel@tonic-gate mdi_unit_t *ui = MDI_UNIT(mnum);
19900Sstevel@tonic-gate
19910Sstevel@tonic-gate /* close underlying driver */
19920Sstevel@tonic-gate if (md_ops[ui->ui_opsindex]->md_close != NULL) {
19930Sstevel@tonic-gate (*md_ops[ui->ui_opsindex]->md_close)
19940Sstevel@tonic-gate (ddi_dev, flag, OTYP_LYR, cred_p, md_cflags);
19950Sstevel@tonic-gate return;
19960Sstevel@tonic-gate }
19970Sstevel@tonic-gate
19980Sstevel@tonic-gate /* or do it ourselves */
19990Sstevel@tonic-gate (void) md_unit_openclose_enter(ui);
20000Sstevel@tonic-gate (void) md_unit_decopen(mnum, OTYP_LYR);
20010Sstevel@tonic-gate md_unit_openclose_exit(ui);
20020Sstevel@tonic-gate return;
20030Sstevel@tonic-gate }
20040Sstevel@tonic-gate
20050Sstevel@tonic-gate /* close regular device */
20060Sstevel@tonic-gate (void) dev_lclose(ddi_dev, flag, OTYP_LYR, cred_p);
20070Sstevel@tonic-gate }
20080Sstevel@tonic-gate
20090Sstevel@tonic-gate /*
20100Sstevel@tonic-gate * saves a little code in mdstrategy
20110Sstevel@tonic-gate */
20120Sstevel@tonic-gate int
errdone(mdi_unit_t * ui,struct buf * bp,int err)20130Sstevel@tonic-gate errdone(mdi_unit_t *ui, struct buf *bp, int err)
20140Sstevel@tonic-gate {
20150Sstevel@tonic-gate if ((bp->b_error = err) != 0)
20160Sstevel@tonic-gate bp->b_flags |= B_ERROR;
20170Sstevel@tonic-gate else
20180Sstevel@tonic-gate bp->b_resid = bp->b_bcount;
20190Sstevel@tonic-gate md_unit_readerexit(ui);
20200Sstevel@tonic-gate md_biodone(bp);
20210Sstevel@tonic-gate return (1);
20220Sstevel@tonic-gate }
20230Sstevel@tonic-gate
20240Sstevel@tonic-gate static int md_write_label = 0;
20250Sstevel@tonic-gate
20260Sstevel@tonic-gate int
md_checkbuf(mdi_unit_t * ui,md_unit_t * un,buf_t * bp)20270Sstevel@tonic-gate md_checkbuf(mdi_unit_t *ui, md_unit_t *un, buf_t *bp)
20280Sstevel@tonic-gate {
20290Sstevel@tonic-gate diskaddr_t endblk;
20300Sstevel@tonic-gate set_t setno = MD_UN2SET(un);
20310Sstevel@tonic-gate
20320Sstevel@tonic-gate if ((md_get_setstatus(setno) & MD_SET_STALE) &&
20330Sstevel@tonic-gate (! (bp->b_flags & B_READ)))
20340Sstevel@tonic-gate return (errdone(ui, bp, EROFS));
20350Sstevel@tonic-gate /*
20360Sstevel@tonic-gate * Check early for unreasonable block number.
20370Sstevel@tonic-gate *
20380Sstevel@tonic-gate * b_blkno is defined as adaddr_t which is typedef'd to a long.
20390Sstevel@tonic-gate * A problem occurs if b_blkno has bit 31 set and un_total_blocks
20400Sstevel@tonic-gate * doesn't, b_blkno is then compared as a negative number which is
20410Sstevel@tonic-gate * always less than a positive.
20420Sstevel@tonic-gate */
20430Sstevel@tonic-gate if ((u_longlong_t)bp->b_lblkno > (u_longlong_t)un->c.un_total_blocks)
20440Sstevel@tonic-gate return (errdone(ui, bp, EINVAL));
20450Sstevel@tonic-gate
20460Sstevel@tonic-gate if (bp->b_lblkno == un->c.un_total_blocks)
20470Sstevel@tonic-gate return (errdone(ui, bp, 0));
20480Sstevel@tonic-gate
20490Sstevel@tonic-gate /*
20500Sstevel@tonic-gate * make sure we don't clobber any labels
20510Sstevel@tonic-gate */
20520Sstevel@tonic-gate if ((bp->b_lblkno == 0) && (! (bp->b_flags & B_READ)) &&
20530Sstevel@tonic-gate (un->c.un_flag & MD_LABELED) && (! md_write_label)) {
20540Sstevel@tonic-gate cmn_err(CE_NOTE, "md: %s: write to label",
20550Sstevel@tonic-gate md_shortname(getminor(bp->b_edev)));
20560Sstevel@tonic-gate return (errdone(ui, bp, EINVAL));
20570Sstevel@tonic-gate }
20580Sstevel@tonic-gate
20590Sstevel@tonic-gate bp->b_resid = 0;
20600Sstevel@tonic-gate endblk = (diskaddr_t)(bp->b_lblkno +
20617563SPrasad.Singamsetty@Sun.COM howmany(bp->b_bcount, DEV_BSIZE) - 1);
20620Sstevel@tonic-gate
20630Sstevel@tonic-gate if (endblk > (un->c.un_total_blocks - 1)) {
20640Sstevel@tonic-gate bp->b_resid = dbtob(endblk - (un->c.un_total_blocks - 1));
20650Sstevel@tonic-gate endblk = un->c.un_total_blocks - 1;
20660Sstevel@tonic-gate bp->b_bcount -= bp->b_resid;
20670Sstevel@tonic-gate }
20680Sstevel@tonic-gate return (0);
20690Sstevel@tonic-gate }
20700Sstevel@tonic-gate
20710Sstevel@tonic-gate /*
20720Sstevel@tonic-gate * init_request_queue: initializes the request queues and creates the threads.
20730Sstevel@tonic-gate * return value = 0 :invalid num_threads
20740Sstevel@tonic-gate * = n : n is the number of threads created.
20750Sstevel@tonic-gate */
20760Sstevel@tonic-gate
20770Sstevel@tonic-gate int
init_requestq(md_requestq_entry_t * rq,void (* threadfn)(),caddr_t threadfn_args,int pri,int init_queue)20780Sstevel@tonic-gate init_requestq(
20790Sstevel@tonic-gate md_requestq_entry_t *rq, /* request queue info */
20800Sstevel@tonic-gate void (*threadfn)(), /* function to start the thread */
20810Sstevel@tonic-gate caddr_t threadfn_args, /* args to the function */
20820Sstevel@tonic-gate int pri, /* thread priority */
20830Sstevel@tonic-gate int init_queue) /* flag to init queues */
20840Sstevel@tonic-gate {
20850Sstevel@tonic-gate struct mdq_anchor *rqhead;
20860Sstevel@tonic-gate int i;
20870Sstevel@tonic-gate int num_threads;
20880Sstevel@tonic-gate
20890Sstevel@tonic-gate
20900Sstevel@tonic-gate num_threads = *(rq->num_threadsp);
20910Sstevel@tonic-gate rqhead = rq->dispq_headp;
20920Sstevel@tonic-gate
20930Sstevel@tonic-gate if (NULL_REQUESTQ_ENTRY(rq) || num_threads == 0)
20940Sstevel@tonic-gate return (0);
20950Sstevel@tonic-gate
20960Sstevel@tonic-gate if (init_queue) {
20970Sstevel@tonic-gate rqhead->dq.maxq_len = 0;
20980Sstevel@tonic-gate rqhead->dq.treqs = 0;
20990Sstevel@tonic-gate rqhead->dq.dq_next = &rqhead->dq;
21000Sstevel@tonic-gate rqhead->dq.dq_prev = &rqhead->dq;
21010Sstevel@tonic-gate cv_init(&rqhead->a_cv, NULL, CV_DEFAULT, NULL);
21020Sstevel@tonic-gate mutex_init(&rqhead->a_mx, NULL, MUTEX_DEFAULT, NULL);
21030Sstevel@tonic-gate }
21040Sstevel@tonic-gate for (i = 0; i < num_threads; i++) {
21050Sstevel@tonic-gate (void) thread_create(NULL, 0, threadfn, threadfn_args, 0, &p0,
21060Sstevel@tonic-gate TS_RUN, pri);
21070Sstevel@tonic-gate }
21080Sstevel@tonic-gate return (i);
21090Sstevel@tonic-gate }
21100Sstevel@tonic-gate
21110Sstevel@tonic-gate static void
start_daemon(struct mdq_anchor * q)21120Sstevel@tonic-gate start_daemon(struct mdq_anchor *q)
21130Sstevel@tonic-gate {
21140Sstevel@tonic-gate md_daemon(0, q);
21150Sstevel@tonic-gate ASSERT(0);
21160Sstevel@tonic-gate }
21170Sstevel@tonic-gate
21180Sstevel@tonic-gate /*
21190Sstevel@tonic-gate * Creates all the md daemons.
21200Sstevel@tonic-gate * Global:
21210Sstevel@tonic-gate * md_num_daemons is set to number of daemons.
21220Sstevel@tonic-gate * MD_GBL_DAEMONS_LIVE flag set to indicate the daemons are active.
21230Sstevel@tonic-gate *
21240Sstevel@tonic-gate * Return value: 0 success
21250Sstevel@tonic-gate * 1 failure
21260Sstevel@tonic-gate */
21270Sstevel@tonic-gate int
md_start_daemons(int init_queue)21280Sstevel@tonic-gate md_start_daemons(int init_queue)
21290Sstevel@tonic-gate {
21300Sstevel@tonic-gate md_requestq_entry_t *rqp;
21310Sstevel@tonic-gate int cnt;
21320Sstevel@tonic-gate int i;
21330Sstevel@tonic-gate int retval = 0;
21340Sstevel@tonic-gate
21350Sstevel@tonic-gate
21360Sstevel@tonic-gate if (md_get_status() & MD_GBL_DAEMONS_LIVE) {
21370Sstevel@tonic-gate return (retval);
21380Sstevel@tonic-gate }
21390Sstevel@tonic-gate md_clr_status(MD_GBL_DAEMONS_DIE);
21400Sstevel@tonic-gate
21410Sstevel@tonic-gate rqp = &md_daemon_queues[0];
21420Sstevel@tonic-gate i = 0;
21430Sstevel@tonic-gate while (!NULL_REQUESTQ_ENTRY(rqp)) {
21440Sstevel@tonic-gate cnt = init_requestq(rqp, start_daemon,
21457563SPrasad.Singamsetty@Sun.COM (caddr_t)rqp->dispq_headp, minclsyspri, init_queue);
21460Sstevel@tonic-gate
21470Sstevel@tonic-gate if (cnt && cnt != *rqp->num_threadsp) {
21480Sstevel@tonic-gate retval = 1;
21490Sstevel@tonic-gate break;
21500Sstevel@tonic-gate }
21510Sstevel@tonic-gate /*
21520Sstevel@tonic-gate * initialize variables
21530Sstevel@tonic-gate */
21540Sstevel@tonic-gate md_num_daemons += cnt;
21550Sstevel@tonic-gate rqp = &md_daemon_queues[++i];
21560Sstevel@tonic-gate }
21570Sstevel@tonic-gate
21580Sstevel@tonic-gate md_set_status(MD_GBL_DAEMONS_LIVE);
21590Sstevel@tonic-gate return (retval);
21600Sstevel@tonic-gate }
21610Sstevel@tonic-gate
21620Sstevel@tonic-gate int
md_loadsubmod(set_t setno,char * name,int drvrid)21630Sstevel@tonic-gate md_loadsubmod(set_t setno, char *name, int drvrid)
21640Sstevel@tonic-gate {
21650Sstevel@tonic-gate ddi_modhandle_t mod;
21660Sstevel@tonic-gate md_ops_t **pops, *ops;
21670Sstevel@tonic-gate int i, err;
21680Sstevel@tonic-gate
21690Sstevel@tonic-gate /*
21700Sstevel@tonic-gate * See if the submodule is mdopened. If not, i is the index of the
21710Sstevel@tonic-gate * next empty slot.
21720Sstevel@tonic-gate */
21730Sstevel@tonic-gate for (i = 0; md_ops[i] != NULL; i++) {
21740Sstevel@tonic-gate if (strncmp(name, md_ops[i]->md_driver.md_drivername,
21750Sstevel@tonic-gate MD_DRIVERNAMELEN) == 0)
21760Sstevel@tonic-gate return (i);
21770Sstevel@tonic-gate
21780Sstevel@tonic-gate if (i == (MD_NOPS - 1))
21790Sstevel@tonic-gate return (-1);
21800Sstevel@tonic-gate }
21810Sstevel@tonic-gate
21820Sstevel@tonic-gate if (drvrid < 0) {
21830Sstevel@tonic-gate /* Do not try to add any records to the DB when stale. */
21840Sstevel@tonic-gate if (md_get_setstatus(setno) & MD_SET_STALE)
21850Sstevel@tonic-gate return (-1);
21860Sstevel@tonic-gate drvrid = md_setshared_name(setno, name, 0L);
21870Sstevel@tonic-gate }
21880Sstevel@tonic-gate
21890Sstevel@tonic-gate if (drvrid < 0)
21900Sstevel@tonic-gate return (-1);
21910Sstevel@tonic-gate
21920Sstevel@tonic-gate /* open and import the md_ops of the submodules */
21930Sstevel@tonic-gate mod = ddi_modopen(name, KRTLD_MODE_FIRST, &err);
21940Sstevel@tonic-gate if (mod == NULL) {
21950Sstevel@tonic-gate cmn_err(CE_WARN, "md_loadsubmod: "
21960Sstevel@tonic-gate "unable to ddi_modopen %s, error %d\n", name, err);
21970Sstevel@tonic-gate return (-1);
21980Sstevel@tonic-gate }
21990Sstevel@tonic-gate pops = ddi_modsym(mod, "md_interface_ops", &err);
22000Sstevel@tonic-gate if (pops == NULL) {
22010Sstevel@tonic-gate cmn_err(CE_WARN, "md_loadsubmod: "
22020Sstevel@tonic-gate "unable to import md_interface_ops from %s, error %d\n",
22030Sstevel@tonic-gate name, err);
22040Sstevel@tonic-gate (void) ddi_modclose(mod);
22050Sstevel@tonic-gate return (-1);
22060Sstevel@tonic-gate }
22070Sstevel@tonic-gate
22080Sstevel@tonic-gate /* ddi_modsym returns pointer to md_interface_ops in submod */
22090Sstevel@tonic-gate ops = *pops;
22100Sstevel@tonic-gate
22110Sstevel@tonic-gate /* initialize */
22120Sstevel@tonic-gate ops->md_selfindex = i;
22130Sstevel@tonic-gate rw_init(&ops->md_link_rw.lock, NULL, RW_DEFAULT, NULL);
22140Sstevel@tonic-gate (void) strncpy(ops->md_driver.md_drivername, name,
22150Sstevel@tonic-gate MD_DRIVERNAMELEN);
22160Sstevel@tonic-gate
22170Sstevel@tonic-gate /* plumb */
22180Sstevel@tonic-gate md_ops[i] = ops;
22190Sstevel@tonic-gate md_mods[i] = mod;
22200Sstevel@tonic-gate ops->md_next = md_opslist;
22210Sstevel@tonic-gate md_opslist = ops;
22220Sstevel@tonic-gate
22230Sstevel@tonic-gate /* return index */
22240Sstevel@tonic-gate return (i);
22250Sstevel@tonic-gate }
22260Sstevel@tonic-gate
22270Sstevel@tonic-gate int
md_getmodindex(md_driver_t * driver,int dont_load,int db_notrequired)22280Sstevel@tonic-gate md_getmodindex(md_driver_t *driver, int dont_load, int db_notrequired)
22290Sstevel@tonic-gate {
22300Sstevel@tonic-gate int i;
22310Sstevel@tonic-gate int modindex;
22320Sstevel@tonic-gate char *name = driver->md_drivername;
22330Sstevel@tonic-gate set_t setno = driver->md_setno;
22340Sstevel@tonic-gate int drvid;
22350Sstevel@tonic-gate int local_dont_load;
22360Sstevel@tonic-gate
22370Sstevel@tonic-gate if (setno >= md_nsets)
22380Sstevel@tonic-gate return (-1);
22390Sstevel@tonic-gate
22400Sstevel@tonic-gate for (i = 0; name[i] != 0; i++)
22410Sstevel@tonic-gate if (i == (MD_DRIVERNAMELEN -1))
22420Sstevel@tonic-gate return (-1);
22430Sstevel@tonic-gate
22440Sstevel@tonic-gate /*
22450Sstevel@tonic-gate * If set is STALE, set local_dont_load to 1 since no records
22460Sstevel@tonic-gate * should be added to DB when stale.
22470Sstevel@tonic-gate */
22480Sstevel@tonic-gate if (md_get_setstatus(setno) & MD_SET_STALE) {
22490Sstevel@tonic-gate local_dont_load = 1;
22500Sstevel@tonic-gate } else {
22510Sstevel@tonic-gate local_dont_load = dont_load;
22520Sstevel@tonic-gate }
22530Sstevel@tonic-gate
22540Sstevel@tonic-gate /*
22550Sstevel@tonic-gate * Single thread ioctl module binding with respect to
22560Sstevel@tonic-gate * similar code executed in md_loadsubmod that is called
22570Sstevel@tonic-gate * from md_snarf_db_set (which is where that path does
22580Sstevel@tonic-gate * its md_haltsnarf_enter call).
22590Sstevel@tonic-gate */
22600Sstevel@tonic-gate md_haltsnarf_enter(setno);
22610Sstevel@tonic-gate
22620Sstevel@tonic-gate /* See if the submodule is already ddi_modopened. */
22630Sstevel@tonic-gate for (i = 0; md_ops[i] != NULL; i++) {
22640Sstevel@tonic-gate if (strncmp(name, md_ops[i]->md_driver.md_drivername,
22650Sstevel@tonic-gate MD_DRIVERNAMELEN) == 0) {
22660Sstevel@tonic-gate if (! local_dont_load &&
22670Sstevel@tonic-gate (md_getshared_key(setno, name) == MD_KEYBAD)) {
22680Sstevel@tonic-gate if (md_setshared_name(setno, name, 0L)
22690Sstevel@tonic-gate == MD_KEYBAD) {
22700Sstevel@tonic-gate if (!db_notrequired)
22710Sstevel@tonic-gate goto err;
22720Sstevel@tonic-gate }
22730Sstevel@tonic-gate }
22740Sstevel@tonic-gate md_haltsnarf_exit(setno);
22750Sstevel@tonic-gate return (i);
22760Sstevel@tonic-gate }
22770Sstevel@tonic-gate
22780Sstevel@tonic-gate if (i == (MD_NOPS -1))
22790Sstevel@tonic-gate break;
22800Sstevel@tonic-gate }
22810Sstevel@tonic-gate
22820Sstevel@tonic-gate if (local_dont_load)
22830Sstevel@tonic-gate goto err;
22840Sstevel@tonic-gate
22850Sstevel@tonic-gate drvid = ((db_notrequired) ? 0 : (int)md_getshared_key(setno, name));
22860Sstevel@tonic-gate
22870Sstevel@tonic-gate /* ddi_modopen the submodule */
22880Sstevel@tonic-gate modindex = md_loadsubmod(setno, name, drvid);
22890Sstevel@tonic-gate if (modindex < 0)
22900Sstevel@tonic-gate goto err;
22910Sstevel@tonic-gate
22920Sstevel@tonic-gate if (md_ops[modindex]->md_snarf != NULL)
22930Sstevel@tonic-gate (*(md_ops[modindex]->md_snarf))(MD_SNARF_DOIT, setno);
22940Sstevel@tonic-gate
22950Sstevel@tonic-gate md_haltsnarf_exit(setno);
22960Sstevel@tonic-gate return (modindex);
22970Sstevel@tonic-gate
22980Sstevel@tonic-gate err: md_haltsnarf_exit(setno);
22990Sstevel@tonic-gate return (-1);
23000Sstevel@tonic-gate }
23010Sstevel@tonic-gate
23020Sstevel@tonic-gate void
md_call_strategy(buf_t * bp,int flags,void * private)23030Sstevel@tonic-gate md_call_strategy(buf_t *bp, int flags, void *private)
23040Sstevel@tonic-gate {
23050Sstevel@tonic-gate mdi_unit_t *ui;
23060Sstevel@tonic-gate
23070Sstevel@tonic-gate if (mdv_strategy_tstpnt)
23080Sstevel@tonic-gate if ((*mdv_strategy_tstpnt)(bp, flags, private) != 0)
23090Sstevel@tonic-gate return;
23100Sstevel@tonic-gate if (getmajor(bp->b_edev) != md_major) {
23110Sstevel@tonic-gate (void) bdev_strategy(bp);
23120Sstevel@tonic-gate return;
23130Sstevel@tonic-gate }
23140Sstevel@tonic-gate
23150Sstevel@tonic-gate flags = (flags & MD_STR_PASSEDON) | MD_STR_NOTTOP;
23160Sstevel@tonic-gate ui = MDI_UNIT(getminor(bp->b_edev));
23170Sstevel@tonic-gate ASSERT(ui != NULL);
23180Sstevel@tonic-gate (*md_ops[ui->ui_opsindex]->md_strategy)(bp, flags, private);
23190Sstevel@tonic-gate }
23200Sstevel@tonic-gate
23210Sstevel@tonic-gate /*
23220Sstevel@tonic-gate * md_call_ioctl:
23230Sstevel@tonic-gate * -------------
23240Sstevel@tonic-gate * Issue the specified ioctl to the device associated with the given md_dev64_t
23250Sstevel@tonic-gate *
23260Sstevel@tonic-gate * Arguments:
23270Sstevel@tonic-gate * dev - underlying device [md_dev64_t]
23280Sstevel@tonic-gate * cmd - ioctl to perform
23290Sstevel@tonic-gate * data - arguments / result location
23300Sstevel@tonic-gate * mode - read/write/layered ioctl
23310Sstevel@tonic-gate * lockp - lock reference
23320Sstevel@tonic-gate *
23330Sstevel@tonic-gate * Returns:
23340Sstevel@tonic-gate * 0 success
23350Sstevel@tonic-gate * !=0 Failure (error code)
23360Sstevel@tonic-gate */
23370Sstevel@tonic-gate int
md_call_ioctl(md_dev64_t dev,int cmd,void * data,int mode,IOLOCK * lockp)23380Sstevel@tonic-gate md_call_ioctl(md_dev64_t dev, int cmd, void *data, int mode, IOLOCK *lockp)
23390Sstevel@tonic-gate {
23400Sstevel@tonic-gate dev_t device = md_dev64_to_dev(dev);
23410Sstevel@tonic-gate int rval;
23420Sstevel@tonic-gate mdi_unit_t *ui;
23430Sstevel@tonic-gate
23440Sstevel@tonic-gate /*
23450Sstevel@tonic-gate * See if device is a metadevice. If not call cdev_ioctl(), otherwise
23460Sstevel@tonic-gate * call the ioctl entry-point in the metadevice.
23470Sstevel@tonic-gate */
23480Sstevel@tonic-gate if (md_getmajor(dev) != md_major) {
23490Sstevel@tonic-gate int rv;
23500Sstevel@tonic-gate rval = cdev_ioctl(device, cmd, (intptr_t)data, mode,
23510Sstevel@tonic-gate ddi_get_cred(), &rv);
23520Sstevel@tonic-gate } else {
23530Sstevel@tonic-gate ui = MDI_UNIT(md_getminor(dev));
23540Sstevel@tonic-gate ASSERT(ui != NULL);
23550Sstevel@tonic-gate rval = (*md_ops[ui->ui_opsindex]->md_ioctl)(device, cmd, data,
23560Sstevel@tonic-gate mode, lockp);
23570Sstevel@tonic-gate }
23580Sstevel@tonic-gate return (rval);
23590Sstevel@tonic-gate }
23600Sstevel@tonic-gate
23610Sstevel@tonic-gate void
md_rem_link(set_t setno,int id,krwlock_t * rw,md_link_t ** head)23620Sstevel@tonic-gate md_rem_link(set_t setno, int id, krwlock_t *rw, md_link_t **head)
23630Sstevel@tonic-gate {
23640Sstevel@tonic-gate md_link_t *next;
23650Sstevel@tonic-gate md_link_t **pprev;
23660Sstevel@tonic-gate
23670Sstevel@tonic-gate rw_enter(rw, RW_WRITER);
23680Sstevel@tonic-gate
23690Sstevel@tonic-gate next = *head;
23700Sstevel@tonic-gate pprev = head;
23710Sstevel@tonic-gate while (next) {
23720Sstevel@tonic-gate if ((next->ln_setno == setno) && (next->ln_id == id)) {
23730Sstevel@tonic-gate *pprev = next->ln_next;
23740Sstevel@tonic-gate rw_exit(rw);
23750Sstevel@tonic-gate return;
23760Sstevel@tonic-gate }
23770Sstevel@tonic-gate pprev = &next->ln_next;
23780Sstevel@tonic-gate next = next->ln_next;
23790Sstevel@tonic-gate }
23800Sstevel@tonic-gate
23810Sstevel@tonic-gate rw_exit(rw);
23820Sstevel@tonic-gate }
23830Sstevel@tonic-gate
23840Sstevel@tonic-gate int
md_dev_exists(md_dev64_t dev)23850Sstevel@tonic-gate md_dev_exists(md_dev64_t dev)
23860Sstevel@tonic-gate {
23870Sstevel@tonic-gate
23880Sstevel@tonic-gate if (dev == NODEV64)
23890Sstevel@tonic-gate return (0);
23900Sstevel@tonic-gate
23910Sstevel@tonic-gate if (strcmp(ddi_major_to_name(md_getmajor(dev)), "md") != 0)
23920Sstevel@tonic-gate return (1);
23930Sstevel@tonic-gate
23940Sstevel@tonic-gate if ((MD_MIN2SET(md_getminor(dev)) >= md_nsets) ||
23950Sstevel@tonic-gate (MD_MIN2UNIT(md_getminor(dev)) >= md_nunits))
23960Sstevel@tonic-gate return (0);
23970Sstevel@tonic-gate
23980Sstevel@tonic-gate if (MDI_UNIT(md_getminor(dev)) != NULL)
23990Sstevel@tonic-gate return (1);
24000Sstevel@tonic-gate
24010Sstevel@tonic-gate return (0);
24020Sstevel@tonic-gate }
24030Sstevel@tonic-gate
24040Sstevel@tonic-gate md_parent_t
md_get_parent(md_dev64_t dev)24050Sstevel@tonic-gate md_get_parent(md_dev64_t dev)
24060Sstevel@tonic-gate {
24070Sstevel@tonic-gate md_unit_t *un;
24080Sstevel@tonic-gate mdi_unit_t *ui;
24090Sstevel@tonic-gate md_parent_t parent;
24100Sstevel@tonic-gate
24110Sstevel@tonic-gate if (md_getmajor(dev) != md_major)
24120Sstevel@tonic-gate return (MD_NO_PARENT);
24130Sstevel@tonic-gate
24140Sstevel@tonic-gate ui = MDI_UNIT(md_getminor(dev));
24150Sstevel@tonic-gate
24160Sstevel@tonic-gate un = (md_unit_t *)md_unit_readerlock(ui);
24170Sstevel@tonic-gate parent = un->c.un_parent;
24180Sstevel@tonic-gate md_unit_readerexit(ui);
24190Sstevel@tonic-gate
24200Sstevel@tonic-gate return (parent);
24210Sstevel@tonic-gate }
24220Sstevel@tonic-gate
24230Sstevel@tonic-gate void
md_set_parent(md_dev64_t dev,md_parent_t parent)24240Sstevel@tonic-gate md_set_parent(md_dev64_t dev, md_parent_t parent)
24250Sstevel@tonic-gate {
24260Sstevel@tonic-gate md_unit_t *un;
24270Sstevel@tonic-gate mdi_unit_t *ui;
24280Sstevel@tonic-gate
24290Sstevel@tonic-gate if (md_getmajor(dev) != md_major)
24300Sstevel@tonic-gate return;
24310Sstevel@tonic-gate
24320Sstevel@tonic-gate ui = MDI_UNIT(md_getminor(dev));
24330Sstevel@tonic-gate
24340Sstevel@tonic-gate un = (md_unit_t *)md_unit_readerlock(ui);
24350Sstevel@tonic-gate un->c.un_parent = parent;
24360Sstevel@tonic-gate md_unit_readerexit(ui);
24370Sstevel@tonic-gate }
24380Sstevel@tonic-gate
24390Sstevel@tonic-gate void
md_reset_parent(md_dev64_t dev)24400Sstevel@tonic-gate md_reset_parent(md_dev64_t dev)
24410Sstevel@tonic-gate {
24420Sstevel@tonic-gate md_unit_t *un;
24430Sstevel@tonic-gate mdi_unit_t *ui;
24440Sstevel@tonic-gate
24450Sstevel@tonic-gate if (md_getmajor(dev) != md_major)
24460Sstevel@tonic-gate return;
24470Sstevel@tonic-gate
24480Sstevel@tonic-gate ui = MDI_UNIT(md_getminor(dev));
24490Sstevel@tonic-gate
24500Sstevel@tonic-gate un = (md_unit_t *)md_unit_readerlock(ui);
24510Sstevel@tonic-gate un->c.un_parent = MD_NO_PARENT;
24520Sstevel@tonic-gate md_unit_readerexit(ui);
24530Sstevel@tonic-gate }
24540Sstevel@tonic-gate
24550Sstevel@tonic-gate
24560Sstevel@tonic-gate static intptr_t (*hot_spare_interface)() = (intptr_t (*)())NULL;
24570Sstevel@tonic-gate
24580Sstevel@tonic-gate int
md_hot_spare_ifc(hs_cmds_t cmd,mddb_recid_t id,u_longlong_t size,int labeled,mddb_recid_t * hs_id,mdkey_t * key,md_dev64_t * dev,diskaddr_t * sblock)24590Sstevel@tonic-gate md_hot_spare_ifc(
24600Sstevel@tonic-gate hs_cmds_t cmd,
24610Sstevel@tonic-gate mddb_recid_t id,
24620Sstevel@tonic-gate u_longlong_t size,
24630Sstevel@tonic-gate int labeled,
24640Sstevel@tonic-gate mddb_recid_t *hs_id,
24650Sstevel@tonic-gate mdkey_t *key,
24660Sstevel@tonic-gate md_dev64_t *dev,
24670Sstevel@tonic-gate diskaddr_t *sblock)
24680Sstevel@tonic-gate {
24690Sstevel@tonic-gate int err;
24700Sstevel@tonic-gate
24710Sstevel@tonic-gate /*
24720Sstevel@tonic-gate * RW lock on hot_spare_interface. We don't want it to change from
24730Sstevel@tonic-gate * underneath us. If hot_spare_interface is NULL we're going to
24740Sstevel@tonic-gate * need to set it. So we need to upgrade to a WRITER lock. If that
24750Sstevel@tonic-gate * doesn't work, we drop the lock and reenter as WRITER. This leaves
24760Sstevel@tonic-gate * a small hole during which hot_spare_interface could be modified
24770Sstevel@tonic-gate * so we check it for NULL again. What a pain. Then if still null
24780Sstevel@tonic-gate * load from md_get_named_service.
24790Sstevel@tonic-gate */
24800Sstevel@tonic-gate
24810Sstevel@tonic-gate rw_enter(&hsp_rwlp.lock, RW_READER);
24820Sstevel@tonic-gate if (hot_spare_interface == NULL) {
24830Sstevel@tonic-gate if (rw_tryupgrade(&hsp_rwlp.lock) == 0) {
24840Sstevel@tonic-gate rw_exit(&hsp_rwlp.lock);
24850Sstevel@tonic-gate rw_enter(&hsp_rwlp.lock, RW_WRITER);
24860Sstevel@tonic-gate if (hot_spare_interface != NULL) {
24870Sstevel@tonic-gate err = ((*hot_spare_interface)
24880Sstevel@tonic-gate (cmd, id, size, labeled, hs_id, key, dev,
24890Sstevel@tonic-gate sblock));
24900Sstevel@tonic-gate rw_exit(&hsp_rwlp.lock);
24910Sstevel@tonic-gate return (err);
24920Sstevel@tonic-gate }
24930Sstevel@tonic-gate }
24940Sstevel@tonic-gate hot_spare_interface = md_get_named_service(NODEV64, ANY_SERVICE,
24950Sstevel@tonic-gate "hot spare interface", 0);
24960Sstevel@tonic-gate rw_downgrade(&hsp_rwlp.lock);
24970Sstevel@tonic-gate }
24980Sstevel@tonic-gate
24990Sstevel@tonic-gate if (hot_spare_interface == NULL) {
25000Sstevel@tonic-gate cmn_err(CE_WARN, "md: no hotspare interface");
25010Sstevel@tonic-gate rw_exit(&hsp_rwlp.lock);
25020Sstevel@tonic-gate return (0);
25030Sstevel@tonic-gate }
25040Sstevel@tonic-gate
25050Sstevel@tonic-gate err = ((*hot_spare_interface)
25060Sstevel@tonic-gate (cmd, id, size, labeled, hs_id, key, dev, sblock));
25070Sstevel@tonic-gate rw_exit(&hsp_rwlp.lock);
25080Sstevel@tonic-gate return (err);
25090Sstevel@tonic-gate }
25100Sstevel@tonic-gate
25110Sstevel@tonic-gate void
md_clear_hot_spare_interface()25120Sstevel@tonic-gate md_clear_hot_spare_interface()
25130Sstevel@tonic-gate {
25140Sstevel@tonic-gate rw_enter(&hsp_rwlp.lock, RW_WRITER);
25150Sstevel@tonic-gate hot_spare_interface = NULL;
25160Sstevel@tonic-gate rw_exit(&hsp_rwlp.lock);
25170Sstevel@tonic-gate }
25180Sstevel@tonic-gate
25190Sstevel@tonic-gate
25200Sstevel@tonic-gate static intptr_t (*notify_interface)() = (intptr_t (*)())NULL;
25210Sstevel@tonic-gate
25220Sstevel@tonic-gate int
md_notify_interface(md_event_cmds_t cmd,md_tags_t tag,set_t set,md_dev64_t dev,md_event_type_t event)25230Sstevel@tonic-gate md_notify_interface(
25240Sstevel@tonic-gate md_event_cmds_t cmd,
25250Sstevel@tonic-gate md_tags_t tag,
25260Sstevel@tonic-gate set_t set,
25270Sstevel@tonic-gate md_dev64_t dev,
25280Sstevel@tonic-gate md_event_type_t event
25290Sstevel@tonic-gate )
25300Sstevel@tonic-gate {
25310Sstevel@tonic-gate int err;
25320Sstevel@tonic-gate
25330Sstevel@tonic-gate if (md_event_queue == NULL)
25340Sstevel@tonic-gate return (0);
25350Sstevel@tonic-gate rw_enter(&ni_rwlp.lock, RW_READER);
25360Sstevel@tonic-gate if (notify_interface == NULL) {
25370Sstevel@tonic-gate if (rw_tryupgrade(&ni_rwlp.lock) == 0) {
25380Sstevel@tonic-gate rw_exit(&ni_rwlp.lock);
25390Sstevel@tonic-gate rw_enter(&ni_rwlp.lock, RW_WRITER);
25400Sstevel@tonic-gate if (notify_interface != NULL) {
25410Sstevel@tonic-gate err = ((*notify_interface)
25420Sstevel@tonic-gate (cmd, tag, set, dev, event));
25430Sstevel@tonic-gate rw_exit(&ni_rwlp.lock);
25440Sstevel@tonic-gate return (err);
25450Sstevel@tonic-gate }
25460Sstevel@tonic-gate }
25470Sstevel@tonic-gate notify_interface = md_get_named_service(NODEV64, ANY_SERVICE,
25480Sstevel@tonic-gate "notify interface", 0);
25490Sstevel@tonic-gate rw_downgrade(&ni_rwlp.lock);
25500Sstevel@tonic-gate }
25510Sstevel@tonic-gate if (notify_interface == NULL) {
25520Sstevel@tonic-gate cmn_err(CE_WARN, "md: no notify interface");
25530Sstevel@tonic-gate rw_exit(&ni_rwlp.lock);
25540Sstevel@tonic-gate return (0);
25550Sstevel@tonic-gate }
25560Sstevel@tonic-gate err = ((*notify_interface)(cmd, tag, set, dev, event));
25570Sstevel@tonic-gate rw_exit(&ni_rwlp.lock);
25580Sstevel@tonic-gate return (err);
25590Sstevel@tonic-gate }
25600Sstevel@tonic-gate
25610Sstevel@tonic-gate char *
obj2devname(uint32_t tag,uint_t setno,md_dev64_t dev)25620Sstevel@tonic-gate obj2devname(uint32_t tag, uint_t setno, md_dev64_t dev)
25630Sstevel@tonic-gate {
25640Sstevel@tonic-gate char *setname;
25650Sstevel@tonic-gate char name[MD_MAX_CTDLEN];
25660Sstevel@tonic-gate minor_t mnum = md_getminor(dev);
25670Sstevel@tonic-gate major_t maj = md_getmajor(dev);
25680Sstevel@tonic-gate int rtn = 0;
25690Sstevel@tonic-gate
25700Sstevel@tonic-gate /*
25710Sstevel@tonic-gate * Verify that the passed dev_t refers to a valid metadevice.
25720Sstevel@tonic-gate * If it doesn't we can make no assumptions as to what the device
25730Sstevel@tonic-gate * name is. Return NULL in these cases.
25740Sstevel@tonic-gate */
25750Sstevel@tonic-gate if (((maj != md_major) || (MD_MIN2UNIT(mnum) >= md_nunits)) ||
25760Sstevel@tonic-gate (MD_MIN2SET(mnum) >= md_nsets)) {
25770Sstevel@tonic-gate return (NULL);
25780Sstevel@tonic-gate }
25790Sstevel@tonic-gate
25800Sstevel@tonic-gate setname = NULL;
25810Sstevel@tonic-gate name[0] = '\0';
25820Sstevel@tonic-gate switch (tag) {
25830Sstevel@tonic-gate case SVM_TAG_HSP:
25840Sstevel@tonic-gate if (setno == 0) {
25850Sstevel@tonic-gate rtn = snprintf(name, sizeof (name), "hsp%u",
25860Sstevel@tonic-gate (unsigned)MD_MIN2UNIT(mnum));
25870Sstevel@tonic-gate } else {
25880Sstevel@tonic-gate setname = mddb_getsetname(setno);
25890Sstevel@tonic-gate if (setname != NULL) {
25900Sstevel@tonic-gate rtn = snprintf(name, sizeof (name), "%s/hsp%u",
25910Sstevel@tonic-gate setname, (unsigned)MD_MIN2UNIT(mnum));
25920Sstevel@tonic-gate }
25930Sstevel@tonic-gate }
25940Sstevel@tonic-gate break;
25950Sstevel@tonic-gate case SVM_TAG_DRIVE:
25960Sstevel@tonic-gate (void) sprintf(name, "drive");
25970Sstevel@tonic-gate break;
25980Sstevel@tonic-gate case SVM_TAG_HOST:
25990Sstevel@tonic-gate (void) sprintf(name, "host");
26000Sstevel@tonic-gate break;
26010Sstevel@tonic-gate case SVM_TAG_SET:
26020Sstevel@tonic-gate rtn = snprintf(name, sizeof (name), "%s",
26030Sstevel@tonic-gate mddb_getsetname(setno));
26040Sstevel@tonic-gate if ((name[0] == '\0') || (rtn >= sizeof (name))) {
26050Sstevel@tonic-gate (void) sprintf(name, "diskset");
26060Sstevel@tonic-gate rtn = 0;
26070Sstevel@tonic-gate }
26080Sstevel@tonic-gate break;
26090Sstevel@tonic-gate default:
26100Sstevel@tonic-gate rtn = snprintf(name, sizeof (name), "%s", md_shortname(mnum));
26110Sstevel@tonic-gate break;
26120Sstevel@tonic-gate }
26130Sstevel@tonic-gate
26140Sstevel@tonic-gate /* Check if we got any rubbish for any of the snprintf's */
26150Sstevel@tonic-gate if ((name[0] == '\0') || (rtn >= sizeof (name))) {
26160Sstevel@tonic-gate return (NULL);
26170Sstevel@tonic-gate }
26180Sstevel@tonic-gate
26190Sstevel@tonic-gate return (md_strdup(name));
26200Sstevel@tonic-gate }
26210Sstevel@tonic-gate
26220Sstevel@tonic-gate /* Sysevent subclass and mdnotify event type pairs */
26230Sstevel@tonic-gate struct node {
26240Sstevel@tonic-gate char *se_ev;
26250Sstevel@tonic-gate md_event_type_t md_ev;
26260Sstevel@tonic-gate };
26270Sstevel@tonic-gate
26280Sstevel@tonic-gate /*
26290Sstevel@tonic-gate * Table must be sorted in case sensitive ascending order of
26300Sstevel@tonic-gate * the sysevents values
26310Sstevel@tonic-gate */
26320Sstevel@tonic-gate static struct node ev_table[] = {
26330Sstevel@tonic-gate { ESC_SVM_ADD, EQ_ADD },
26340Sstevel@tonic-gate { ESC_SVM_ATTACH, EQ_ATTACH },
26350Sstevel@tonic-gate { ESC_SVM_ATTACHING, EQ_ATTACHING },
26360Sstevel@tonic-gate { ESC_SVM_CHANGE, EQ_CHANGE },
26370Sstevel@tonic-gate { ESC_SVM_CREATE, EQ_CREATE },
26380Sstevel@tonic-gate { ESC_SVM_DELETE, EQ_DELETE },
26390Sstevel@tonic-gate { ESC_SVM_DETACH, EQ_DETACH },
26400Sstevel@tonic-gate { ESC_SVM_DETACHING, EQ_DETACHING },
26410Sstevel@tonic-gate { ESC_SVM_DRIVE_ADD, EQ_DRIVE_ADD },
26420Sstevel@tonic-gate { ESC_SVM_DRIVE_DELETE, EQ_DRIVE_DELETE },
26430Sstevel@tonic-gate { ESC_SVM_ENABLE, EQ_ENABLE },
26440Sstevel@tonic-gate { ESC_SVM_ERRED, EQ_ERRED },
26450Sstevel@tonic-gate { ESC_SVM_EXCHANGE, EQ_EXCHANGE },
26460Sstevel@tonic-gate { ESC_SVM_GROW, EQ_GROW },
26470Sstevel@tonic-gate { ESC_SVM_HS_CHANGED, EQ_HS_CHANGED },
26480Sstevel@tonic-gate { ESC_SVM_HS_FREED, EQ_HS_FREED },
26490Sstevel@tonic-gate { ESC_SVM_HOST_ADD, EQ_HOST_ADD },
26500Sstevel@tonic-gate { ESC_SVM_HOST_DELETE, EQ_HOST_DELETE },
26510Sstevel@tonic-gate { ESC_SVM_HOTSPARED, EQ_HOTSPARED },
26520Sstevel@tonic-gate { ESC_SVM_INIT_FAILED, EQ_INIT_FAILED },
26530Sstevel@tonic-gate { ESC_SVM_INIT_FATAL, EQ_INIT_FATAL },
26540Sstevel@tonic-gate { ESC_SVM_INIT_START, EQ_INIT_START },
26550Sstevel@tonic-gate { ESC_SVM_INIT_SUCCESS, EQ_INIT_SUCCESS },
26560Sstevel@tonic-gate { ESC_SVM_IOERR, EQ_IOERR },
26570Sstevel@tonic-gate { ESC_SVM_LASTERRED, EQ_LASTERRED },
26580Sstevel@tonic-gate { ESC_SVM_MEDIATOR_ADD, EQ_MEDIATOR_ADD },
26590Sstevel@tonic-gate { ESC_SVM_MEDIATOR_DELETE, EQ_MEDIATOR_DELETE },
26600Sstevel@tonic-gate { ESC_SVM_OFFLINE, EQ_OFFLINE },
26610Sstevel@tonic-gate { ESC_SVM_OK, EQ_OK },
26620Sstevel@tonic-gate { ESC_SVM_ONLINE, EQ_ONLINE },
26630Sstevel@tonic-gate { ESC_SVM_OPEN_FAIL, EQ_OPEN_FAIL },
26640Sstevel@tonic-gate { ESC_SVM_REGEN_DONE, EQ_REGEN_DONE },
26650Sstevel@tonic-gate { ESC_SVM_REGEN_FAILED, EQ_REGEN_FAILED },
26660Sstevel@tonic-gate { ESC_SVM_REGEN_START, EQ_REGEN_START },
26670Sstevel@tonic-gate { ESC_SVM_RELEASE, EQ_RELEASE },
26680Sstevel@tonic-gate { ESC_SVM_REMOVE, EQ_REMOVE },
26690Sstevel@tonic-gate { ESC_SVM_RENAME_DST, EQ_RENAME_DST },
26700Sstevel@tonic-gate { ESC_SVM_RENAME_SRC, EQ_RENAME_SRC },
26710Sstevel@tonic-gate { ESC_SVM_REPLACE, EQ_REPLACE },
26720Sstevel@tonic-gate { ESC_SVM_RESYNC_DONE, EQ_RESYNC_DONE },
26730Sstevel@tonic-gate { ESC_SVM_RESYNC_FAILED, EQ_RESYNC_FAILED },
26740Sstevel@tonic-gate { ESC_SVM_RESYNC_START, EQ_RESYNC_START },
26750Sstevel@tonic-gate { ESC_SVM_RESYNC_SUCCESS, EQ_RESYNC_SUCCESS },
26760Sstevel@tonic-gate { ESC_SVM_TAKEOVER, EQ_TAKEOVER }
26770Sstevel@tonic-gate };
26780Sstevel@tonic-gate
26790Sstevel@tonic-gate static md_tags_t md_tags[] = {
26800Sstevel@tonic-gate TAG_UNK,
26810Sstevel@tonic-gate TAG_METADEVICE,
26820Sstevel@tonic-gate TAG_UNK,
26830Sstevel@tonic-gate TAG_UNK,
26840Sstevel@tonic-gate TAG_UNK,
26850Sstevel@tonic-gate TAG_UNK,
26860Sstevel@tonic-gate TAG_REPLICA,
26870Sstevel@tonic-gate TAG_HSP,
26880Sstevel@tonic-gate TAG_HS,
26890Sstevel@tonic-gate TAG_SET,
26900Sstevel@tonic-gate TAG_DRIVE,
26910Sstevel@tonic-gate TAG_HOST,
26920Sstevel@tonic-gate TAG_MEDIATOR
26930Sstevel@tonic-gate };
26940Sstevel@tonic-gate
26950Sstevel@tonic-gate md_event_type_t
ev_get(char * subclass)26960Sstevel@tonic-gate ev_get(char *subclass)
26970Sstevel@tonic-gate {
26980Sstevel@tonic-gate int high, mid, low, p;
26990Sstevel@tonic-gate
27000Sstevel@tonic-gate low = 0;
27010Sstevel@tonic-gate high = (sizeof (ev_table) / sizeof (ev_table[0])) - 1;
27020Sstevel@tonic-gate while (low <= high) {
27030Sstevel@tonic-gate mid = (high + low) / 2;
27040Sstevel@tonic-gate p = strcmp(subclass, ev_table[mid].se_ev);
27050Sstevel@tonic-gate if (p == 0) {
27060Sstevel@tonic-gate return (ev_table[mid].md_ev);
27070Sstevel@tonic-gate } else if (p < 0) {
27080Sstevel@tonic-gate high = mid - 1;
27090Sstevel@tonic-gate } else {
27100Sstevel@tonic-gate low = mid + 1;
27110Sstevel@tonic-gate }
27120Sstevel@tonic-gate }
27130Sstevel@tonic-gate
27140Sstevel@tonic-gate return (EQ_EMPTY);
27150Sstevel@tonic-gate }
27160Sstevel@tonic-gate
27170Sstevel@tonic-gate /*
27180Sstevel@tonic-gate * Log mdnotify event
27190Sstevel@tonic-gate */
27200Sstevel@tonic-gate void
do_mdnotify(char * se_subclass,uint32_t tag,set_t setno,md_dev64_t devid)27210Sstevel@tonic-gate do_mdnotify(char *se_subclass, uint32_t tag, set_t setno, md_dev64_t devid)
27220Sstevel@tonic-gate {
27230Sstevel@tonic-gate md_event_type_t ev_type;
27240Sstevel@tonic-gate md_tags_t md_tag;
27250Sstevel@tonic-gate
27260Sstevel@tonic-gate /* Translate sysevent into mdnotify event */
27270Sstevel@tonic-gate ev_type = ev_get(se_subclass);
27280Sstevel@tonic-gate
27290Sstevel@tonic-gate if (tag >= (sizeof (md_tags) / sizeof (md_tags[0]))) {
27300Sstevel@tonic-gate md_tag = TAG_UNK;
27310Sstevel@tonic-gate } else {
27320Sstevel@tonic-gate md_tag = md_tags[tag];
27330Sstevel@tonic-gate }
27340Sstevel@tonic-gate
27350Sstevel@tonic-gate NOTIFY_MD(md_tag, setno, devid, ev_type);
27360Sstevel@tonic-gate }
27370Sstevel@tonic-gate
27380Sstevel@tonic-gate /*
27390Sstevel@tonic-gate * Log SVM sys events
27400Sstevel@tonic-gate */
27410Sstevel@tonic-gate void
svm_gen_sysevent(char * se_class,char * se_subclass,uint32_t tag,set_t setno,md_dev64_t devid)27420Sstevel@tonic-gate svm_gen_sysevent(
27430Sstevel@tonic-gate char *se_class,
27440Sstevel@tonic-gate char *se_subclass,
27450Sstevel@tonic-gate uint32_t tag,
27460Sstevel@tonic-gate set_t setno,
27470Sstevel@tonic-gate md_dev64_t devid
27480Sstevel@tonic-gate )
27490Sstevel@tonic-gate {
27500Sstevel@tonic-gate nvlist_t *attr_list;
27510Sstevel@tonic-gate sysevent_id_t eid;
27520Sstevel@tonic-gate int err = DDI_SUCCESS;
27530Sstevel@tonic-gate char *devname;
27540Sstevel@tonic-gate extern dev_info_t *md_devinfo;
27550Sstevel@tonic-gate
27560Sstevel@tonic-gate /* Raise the mdnotify event before anything else */
27570Sstevel@tonic-gate do_mdnotify(se_subclass, tag, setno, devid);
27580Sstevel@tonic-gate
27590Sstevel@tonic-gate if (md_devinfo == NULL) {
27600Sstevel@tonic-gate return;
27610Sstevel@tonic-gate }
27620Sstevel@tonic-gate
27630Sstevel@tonic-gate err = nvlist_alloc(&attr_list, NV_UNIQUE_NAME, KM_NOSLEEP);
27640Sstevel@tonic-gate
27650Sstevel@tonic-gate if (err == DDI_SUCCESS) {
27660Sstevel@tonic-gate /* Add the version numver */
27670Sstevel@tonic-gate err = nvlist_add_uint32(attr_list, SVM_VERSION_NO,
27680Sstevel@tonic-gate (uint32_t)SVM_VERSION);
27690Sstevel@tonic-gate if (err != DDI_SUCCESS) {
27700Sstevel@tonic-gate goto fail;
27710Sstevel@tonic-gate }
27720Sstevel@tonic-gate
27730Sstevel@tonic-gate /* Add the tag attribute */
27740Sstevel@tonic-gate err = nvlist_add_uint32(attr_list, SVM_TAG, (uint32_t)tag);
27750Sstevel@tonic-gate if (err != DDI_SUCCESS) {
27760Sstevel@tonic-gate goto fail;
27770Sstevel@tonic-gate }
27780Sstevel@tonic-gate
27790Sstevel@tonic-gate /* Add the set number attribute */
27800Sstevel@tonic-gate err = nvlist_add_uint32(attr_list, SVM_SET_NO, (uint32_t)setno);
27810Sstevel@tonic-gate if (err != DDI_SUCCESS) {
27820Sstevel@tonic-gate goto fail;
27830Sstevel@tonic-gate }
27840Sstevel@tonic-gate
27850Sstevel@tonic-gate /* Add the device id attribute */
27860Sstevel@tonic-gate err = nvlist_add_uint64(attr_list, SVM_DEV_ID, (uint64_t)devid);
27870Sstevel@tonic-gate if (err != DDI_SUCCESS) {
27880Sstevel@tonic-gate goto fail;
27890Sstevel@tonic-gate }
27900Sstevel@tonic-gate
27910Sstevel@tonic-gate /* Add the device name attribute */
27920Sstevel@tonic-gate devname = obj2devname(tag, setno, devid);
27930Sstevel@tonic-gate if (devname != NULL) {
27940Sstevel@tonic-gate err = nvlist_add_string(attr_list, SVM_DEV_NAME,
27950Sstevel@tonic-gate devname);
27960Sstevel@tonic-gate freestr(devname);
27970Sstevel@tonic-gate } else {
27980Sstevel@tonic-gate err = nvlist_add_string(attr_list, SVM_DEV_NAME,
27990Sstevel@tonic-gate "unspecified");
28000Sstevel@tonic-gate }
28010Sstevel@tonic-gate if (err != DDI_SUCCESS) {
28020Sstevel@tonic-gate goto fail;
28030Sstevel@tonic-gate }
28040Sstevel@tonic-gate
28050Sstevel@tonic-gate /* Attempt to post event */
28060Sstevel@tonic-gate err = ddi_log_sysevent(md_devinfo, DDI_VENDOR_SUNW, se_class,
28070Sstevel@tonic-gate se_subclass, attr_list, &eid, DDI_SLEEP);
28080Sstevel@tonic-gate
28090Sstevel@tonic-gate nvlist_free(attr_list);
28100Sstevel@tonic-gate if (err != DDI_SUCCESS) {
28110Sstevel@tonic-gate cmn_err(CE_WARN, "Failed to log event for %s, %s,"
28120Sstevel@tonic-gate " err=%x", se_class, se_subclass, err);
28130Sstevel@tonic-gate }
28140Sstevel@tonic-gate }
28150Sstevel@tonic-gate
28160Sstevel@tonic-gate return;
28170Sstevel@tonic-gate
28180Sstevel@tonic-gate fail:
28190Sstevel@tonic-gate nvlist_free(attr_list);
28200Sstevel@tonic-gate cmn_err(CE_WARN, "Failed to setup attributes for event %s, %s, err=%x",
28210Sstevel@tonic-gate se_class, se_subclass, err);
28220Sstevel@tonic-gate }
28230Sstevel@tonic-gate
28240Sstevel@tonic-gate void
md_clear_named_service()28250Sstevel@tonic-gate md_clear_named_service()
28260Sstevel@tonic-gate {
28270Sstevel@tonic-gate rw_enter(&ni_rwlp.lock, RW_WRITER);
28280Sstevel@tonic-gate notify_interface = NULL;
28290Sstevel@tonic-gate rw_exit(&ni_rwlp.lock);
28300Sstevel@tonic-gate }
28310Sstevel@tonic-gate
28320Sstevel@tonic-gate void
md_create_unit_incore(minor_t mnum,md_ops_t * ops,int alloc_lock)28330Sstevel@tonic-gate md_create_unit_incore(minor_t mnum, md_ops_t *ops, int alloc_lock)
28340Sstevel@tonic-gate {
28350Sstevel@tonic-gate mdi_unit_t *ui;
28360Sstevel@tonic-gate set_t setno = MD_MIN2SET(mnum);
28370Sstevel@tonic-gate
28380Sstevel@tonic-gate ui = (mdi_unit_t *)kmem_zalloc(sizeof (mdi_unit_t), KM_SLEEP);
28390Sstevel@tonic-gate ui->ui_opsindex = ops->md_selfindex;
28400Sstevel@tonic-gate
28410Sstevel@tonic-gate /* initialize all the incore conditional variables */
28420Sstevel@tonic-gate mutex_init(&ui->ui_mx, NULL, MUTEX_DEFAULT, NULL);
28430Sstevel@tonic-gate cv_init(&ui->ui_cv, NULL, CV_DEFAULT, NULL);
28440Sstevel@tonic-gate
28458452SJohn.Wren.Kennedy@Sun.COM if (alloc_lock) {
28468452SJohn.Wren.Kennedy@Sun.COM ui->ui_io_lock = kmem_zalloc(sizeof (md_io_lock_t), KM_SLEEP);
28478452SJohn.Wren.Kennedy@Sun.COM mutex_init(&ui->ui_io_lock->io_mx, NULL, MUTEX_DEFAULT, NULL);
28488452SJohn.Wren.Kennedy@Sun.COM cv_init(&ui->ui_io_lock->io_cv, NULL, CV_DEFAULT, NULL);
28498452SJohn.Wren.Kennedy@Sun.COM mutex_init(&ui->ui_io_lock->io_list_mutex, NULL,
28508452SJohn.Wren.Kennedy@Sun.COM MUTEX_DEFAULT, NULL);
28518452SJohn.Wren.Kennedy@Sun.COM ui->ui_io_lock->io_list_front = NULL;
28528452SJohn.Wren.Kennedy@Sun.COM ui->ui_io_lock->io_list_back = NULL;
28538452SJohn.Wren.Kennedy@Sun.COM }
28540Sstevel@tonic-gate if (! (md_get_setstatus(setno) & MD_SET_SNARFING)) {
28550Sstevel@tonic-gate rw_enter(&md_unit_array_rw.lock, RW_WRITER);
28560Sstevel@tonic-gate MDI_VOIDUNIT(mnum) = (void *) ui;
28570Sstevel@tonic-gate rw_exit(&md_unit_array_rw.lock);
28580Sstevel@tonic-gate } else
28590Sstevel@tonic-gate MDI_VOIDUNIT(mnum) = (void *) ui;
28600Sstevel@tonic-gate
28610Sstevel@tonic-gate rw_enter(&ops->md_link_rw.lock, RW_WRITER);
28620Sstevel@tonic-gate ui->ui_link.ln_next = ops->md_head;
28630Sstevel@tonic-gate ui->ui_link.ln_setno = setno;
28640Sstevel@tonic-gate ui->ui_link.ln_id = mnum;
28650Sstevel@tonic-gate ops->md_head = &ui->ui_link;
28660Sstevel@tonic-gate /* setup the unavailable field */
28670Sstevel@tonic-gate #if defined(_ILP32)
28681623Stw21770 if (((md_unit_t *)MD_UNIT(mnum))->c.un_revision & MD_64BIT_META_DEV) {
28690Sstevel@tonic-gate ui->ui_tstate |= MD_64MD_ON_32KERNEL;
28700Sstevel@tonic-gate cmn_err(CE_NOTE, "d%d is unavailable because 64 bit "
28710Sstevel@tonic-gate "metadevices are not accessible on a 32 bit kernel",
28720Sstevel@tonic-gate mnum);
28730Sstevel@tonic-gate }
28740Sstevel@tonic-gate #endif
28750Sstevel@tonic-gate
28760Sstevel@tonic-gate rw_exit(&ops->md_link_rw.lock);
28770Sstevel@tonic-gate }
28780Sstevel@tonic-gate
28790Sstevel@tonic-gate void
md_destroy_unit_incore(minor_t mnum,md_ops_t * ops)28800Sstevel@tonic-gate md_destroy_unit_incore(minor_t mnum, md_ops_t *ops)
28810Sstevel@tonic-gate {
28820Sstevel@tonic-gate mdi_unit_t *ui;
28830Sstevel@tonic-gate
28840Sstevel@tonic-gate /*
28850Sstevel@tonic-gate * ASSUMPTION: md_unit_array_rw WRITER lock is held.
28860Sstevel@tonic-gate */
28870Sstevel@tonic-gate ui = MDI_UNIT(mnum);
28880Sstevel@tonic-gate if (ui == NULL)
28890Sstevel@tonic-gate return;
28900Sstevel@tonic-gate
28910Sstevel@tonic-gate md_rem_link(MD_MIN2SET(mnum), mnum, &ops->md_link_rw.lock,
28920Sstevel@tonic-gate &ops->md_head);
28930Sstevel@tonic-gate
28940Sstevel@tonic-gate /* destroy the io lock if one is being used */
28950Sstevel@tonic-gate if (ui->ui_io_lock) {
28960Sstevel@tonic-gate mutex_destroy(&ui->ui_io_lock->io_mx);
28970Sstevel@tonic-gate cv_destroy(&ui->ui_io_lock->io_cv);
28980Sstevel@tonic-gate kmem_free(ui->ui_io_lock, sizeof (md_io_lock_t));
28990Sstevel@tonic-gate }
29000Sstevel@tonic-gate
29010Sstevel@tonic-gate /* teardown kstat */
29020Sstevel@tonic-gate md_kstat_destroy(mnum);
29030Sstevel@tonic-gate
29040Sstevel@tonic-gate /* destroy all the incore conditional variables */
29050Sstevel@tonic-gate mutex_destroy(&ui->ui_mx);
29060Sstevel@tonic-gate cv_destroy(&ui->ui_cv);
29070Sstevel@tonic-gate
29080Sstevel@tonic-gate kmem_free(ui, sizeof (mdi_unit_t));
29090Sstevel@tonic-gate MDI_VOIDUNIT(mnum) = (void *) NULL;
29100Sstevel@tonic-gate }
29110Sstevel@tonic-gate
29120Sstevel@tonic-gate void
md_rem_names(sv_dev_t * sv,int nsv)29130Sstevel@tonic-gate md_rem_names(sv_dev_t *sv, int nsv)
29140Sstevel@tonic-gate {
29150Sstevel@tonic-gate int i, s;
29160Sstevel@tonic-gate int max_sides;
29170Sstevel@tonic-gate
29180Sstevel@tonic-gate if (nsv == 0)
29190Sstevel@tonic-gate return;
29200Sstevel@tonic-gate
29210Sstevel@tonic-gate /* All entries removed are in the same diskset */
29220Sstevel@tonic-gate if (md_get_setstatus(sv[0].setno) & MD_SET_MNSET)
29230Sstevel@tonic-gate max_sides = MD_MNMAXSIDES;
29240Sstevel@tonic-gate else
29250Sstevel@tonic-gate max_sides = MD_MAXSIDES;
29260Sstevel@tonic-gate
29270Sstevel@tonic-gate for (i = 0; i < nsv; i++)
29280Sstevel@tonic-gate for (s = 0; s < max_sides; s++)
29290Sstevel@tonic-gate (void) md_remdevname(sv[i].setno, s, sv[i].key);
29300Sstevel@tonic-gate }
29310Sstevel@tonic-gate
29320Sstevel@tonic-gate /*
29330Sstevel@tonic-gate * Checking user args before we get into physio - returns 0 for ok, else errno
29340Sstevel@tonic-gate * We do a lot of checking against illegal arguments here because some of the
29350Sstevel@tonic-gate * real disk drivers don't like certain kinds of arguments. (e.g xy doesn't
29360Sstevel@tonic-gate * like odd address user buffer.) Those drivers capture bad arguments in
29370Sstevel@tonic-gate * xxread and xxwrite. But since meta-driver calls their strategy routines
29380Sstevel@tonic-gate * directly, two bad scenario might happen:
29390Sstevel@tonic-gate * 1. the real strategy doesn't like it and panic.
29400Sstevel@tonic-gate * 2. the real strategy doesn't like it and set B_ERROR.
29410Sstevel@tonic-gate *
29420Sstevel@tonic-gate * The second case is no better than the first one, since the meta-driver
29430Sstevel@tonic-gate * will treat it as a media-error and off line the mirror metapartition.
29440Sstevel@tonic-gate * (Too bad there is no way to tell what error it is.)
29450Sstevel@tonic-gate *
29460Sstevel@tonic-gate */
29470Sstevel@tonic-gate int
md_chk_uio(struct uio * uio)29480Sstevel@tonic-gate md_chk_uio(struct uio *uio)
29490Sstevel@tonic-gate {
29500Sstevel@tonic-gate int i;
29510Sstevel@tonic-gate struct iovec *iov;
29520Sstevel@tonic-gate
29530Sstevel@tonic-gate /*
29540Sstevel@tonic-gate * Check for negative or not block-aligned offset
29550Sstevel@tonic-gate */
29560Sstevel@tonic-gate if ((uio->uio_loffset < 0) ||
29570Sstevel@tonic-gate ((uio->uio_loffset & (DEV_BSIZE - 1)) != 0)) {
29580Sstevel@tonic-gate return (EINVAL);
29590Sstevel@tonic-gate }
29600Sstevel@tonic-gate iov = uio->uio_iov;
29610Sstevel@tonic-gate i = uio->uio_iovcnt;
29620Sstevel@tonic-gate
29630Sstevel@tonic-gate while (i--) {
29640Sstevel@tonic-gate if ((iov->iov_len & (DEV_BSIZE - 1)) != 0)
29650Sstevel@tonic-gate return (EINVAL);
29660Sstevel@tonic-gate /*
29670Sstevel@tonic-gate * Bug # 1212146
29680Sstevel@tonic-gate * The default is to not check alignment, but we can now check
29690Sstevel@tonic-gate * for a larger number of alignments if desired.
29700Sstevel@tonic-gate */
29710Sstevel@tonic-gate if ((uintptr_t)(iov->iov_base) & md_uio_alignment_mask)
29720Sstevel@tonic-gate return (EINVAL);
29730Sstevel@tonic-gate iov++;
29740Sstevel@tonic-gate }
29750Sstevel@tonic-gate return (0);
29760Sstevel@tonic-gate }
29770Sstevel@tonic-gate
29780Sstevel@tonic-gate char *
md_shortname(minor_t mnum)29790Sstevel@tonic-gate md_shortname(
29800Sstevel@tonic-gate minor_t mnum
29810Sstevel@tonic-gate )
29820Sstevel@tonic-gate {
29831623Stw21770 static char buf[MAXPATHLEN];
29841623Stw21770 char *devname;
29851623Stw21770 char *invalid = " (Invalid minor number %u) ";
29861623Stw21770 char *metaname;
29871623Stw21770 mdc_unit_t *un;
29881623Stw21770 side_t side;
29890Sstevel@tonic-gate set_t setno = MD_MIN2SET(mnum);
29900Sstevel@tonic-gate unit_t unit = MD_MIN2UNIT(mnum);
29910Sstevel@tonic-gate
29921623Stw21770 if ((un = MD_UNIT(mnum)) == NULL) {
29931623Stw21770 (void) snprintf(buf, sizeof (buf), invalid, mnum);
29941623Stw21770 return (buf);
29951623Stw21770 }
29961623Stw21770
29971623Stw21770 /*
29981623Stw21770 * If unit is not a friendly name unit, derive the name from the
29991623Stw21770 * minor number.
30001623Stw21770 */
30011623Stw21770 if ((un->un_revision & MD_FN_META_DEV) == 0) {
30021623Stw21770 /* This is a traditional metadevice */
30031623Stw21770 if (setno == MD_LOCAL_SET) {
30041623Stw21770 (void) snprintf(buf, sizeof (buf), "d%u",
30057563SPrasad.Singamsetty@Sun.COM (unsigned)unit);
30061623Stw21770 } else {
30071623Stw21770 (void) snprintf(buf, sizeof (buf), "%s/d%u",
30081623Stw21770 mddb_getsetname(setno), (unsigned)unit);
30091623Stw21770 }
30100Sstevel@tonic-gate return (buf);
30110Sstevel@tonic-gate }
30120Sstevel@tonic-gate
30131623Stw21770 /*
30141623Stw21770 * It is a friendly name metadevice, so we need to get its name.
30151623Stw21770 */
30161623Stw21770 side = mddb_getsidenum(setno);
30171623Stw21770 devname = (char *)kmem_alloc(MAXPATHLEN, KM_SLEEP);
30181623Stw21770 if (md_getdevname(setno, side, MD_KEYWILD,
30197563SPrasad.Singamsetty@Sun.COM md_makedevice(md_major, mnum), devname, MAXPATHLEN) == 0) {
30201623Stw21770 /*
30211623Stw21770 * md_getdevname has given us either /dev/md/dsk/<metaname>
30221623Stw21770 * or /dev/md/<setname>/dsk/<metname> depending on whether
30231623Stw21770 * or not we are in the local set. Thus, we'll pull the
30241623Stw21770 * metaname from this string.
30251623Stw21770 */
30261623Stw21770 if ((metaname = strrchr(devname, '/')) == NULL) {
30271623Stw21770 (void) snprintf(buf, sizeof (buf), invalid, mnum);
30281623Stw21770 goto out;
30291623Stw21770 }
30301623Stw21770 metaname++; /* move past slash */
30311623Stw21770 if (setno == MD_LOCAL_SET) {
30321623Stw21770 /* No set name. */
30331623Stw21770 (void) snprintf(buf, sizeof (buf), "%s", metaname);
30341623Stw21770 } else {
30351623Stw21770 /* Include setname */
30361623Stw21770 (void) snprintf(buf, sizeof (buf), "%s/%s",
30377563SPrasad.Singamsetty@Sun.COM mddb_getsetname(setno), metaname);
30381623Stw21770 }
30390Sstevel@tonic-gate } else {
30401623Stw21770 /* We couldn't find the name. */
30411623Stw21770 (void) snprintf(buf, sizeof (buf), invalid, mnum);
30420Sstevel@tonic-gate }
30430Sstevel@tonic-gate
30441623Stw21770 out:
30451623Stw21770 kmem_free(devname, MAXPATHLEN);
30460Sstevel@tonic-gate return (buf);
30470Sstevel@tonic-gate }
30480Sstevel@tonic-gate
30490Sstevel@tonic-gate char *
md_devname(set_t setno,md_dev64_t dev,char * buf,size_t size)30500Sstevel@tonic-gate md_devname(
30510Sstevel@tonic-gate set_t setno,
30520Sstevel@tonic-gate md_dev64_t dev,
30530Sstevel@tonic-gate char *buf,
30540Sstevel@tonic-gate size_t size
30550Sstevel@tonic-gate )
30560Sstevel@tonic-gate {
30570Sstevel@tonic-gate static char mybuf[MD_MAX_CTDLEN];
30580Sstevel@tonic-gate int err;
30590Sstevel@tonic-gate
30600Sstevel@tonic-gate if (buf == NULL) {
30610Sstevel@tonic-gate buf = mybuf;
30620Sstevel@tonic-gate size = sizeof (mybuf);
30630Sstevel@tonic-gate } else {
30640Sstevel@tonic-gate ASSERT(size >= MD_MAX_CTDLEN);
30650Sstevel@tonic-gate }
30660Sstevel@tonic-gate
30674491Sjmf err = md_getdevname_common(setno, mddb_getsidenum(setno),
30687563SPrasad.Singamsetty@Sun.COM 0, dev, buf, size, MD_NOWAIT_LOCK);
30690Sstevel@tonic-gate if (err) {
30700Sstevel@tonic-gate if (err == ENOENT) {
30710Sstevel@tonic-gate (void) sprintf(buf, "(Unavailable)");
30720Sstevel@tonic-gate } else {
30730Sstevel@tonic-gate (void) sprintf(buf, "(%u.%u)",
30740Sstevel@tonic-gate md_getmajor(dev), md_getminor(dev));
30750Sstevel@tonic-gate }
30760Sstevel@tonic-gate }
30770Sstevel@tonic-gate
30780Sstevel@tonic-gate return (buf);
30790Sstevel@tonic-gate }
30800Sstevel@tonic-gate void
md_minphys(buf_t * pb)30810Sstevel@tonic-gate md_minphys(buf_t *pb)
30820Sstevel@tonic-gate {
30830Sstevel@tonic-gate extern unsigned md_maxbcount;
30840Sstevel@tonic-gate
30850Sstevel@tonic-gate if (pb->b_bcount > md_maxbcount)
30860Sstevel@tonic-gate pb->b_bcount = md_maxbcount;
30870Sstevel@tonic-gate }
30880Sstevel@tonic-gate
30890Sstevel@tonic-gate void
md_bioinit(struct buf * bp)30900Sstevel@tonic-gate md_bioinit(struct buf *bp)
30910Sstevel@tonic-gate {
30920Sstevel@tonic-gate ASSERT(bp);
30930Sstevel@tonic-gate
30940Sstevel@tonic-gate bioinit(bp);
30950Sstevel@tonic-gate bp->b_back = bp;
30960Sstevel@tonic-gate bp->b_forw = bp;
30970Sstevel@tonic-gate bp->b_flags = B_BUSY; /* initialize flags */
30980Sstevel@tonic-gate }
30990Sstevel@tonic-gate
31000Sstevel@tonic-gate void
md_bioreset(struct buf * bp)31010Sstevel@tonic-gate md_bioreset(struct buf *bp)
31020Sstevel@tonic-gate {
31030Sstevel@tonic-gate ASSERT(bp);
31040Sstevel@tonic-gate
31050Sstevel@tonic-gate bioreset(bp);
31060Sstevel@tonic-gate bp->b_back = bp;
31070Sstevel@tonic-gate bp->b_forw = bp;
31080Sstevel@tonic-gate bp->b_flags = B_BUSY; /* initialize flags */
31090Sstevel@tonic-gate }
31100Sstevel@tonic-gate
31110Sstevel@tonic-gate /*
31120Sstevel@tonic-gate * md_bioclone is needed as long as the real bioclone only takes a daddr_t
31130Sstevel@tonic-gate * as block number.
31140Sstevel@tonic-gate * We simply call bioclone with all input parameters but blkno, and set the
31150Sstevel@tonic-gate * correct blkno afterwards.
31160Sstevel@tonic-gate * Caveat Emptor: bp_mem must not be NULL!
31170Sstevel@tonic-gate */
31180Sstevel@tonic-gate buf_t *
md_bioclone(buf_t * bp,off_t off,size_t len,dev_t dev,diskaddr_t blkno,int (* iodone)(buf_t *),buf_t * bp_mem,int sleep)31190Sstevel@tonic-gate md_bioclone(buf_t *bp, off_t off, size_t len, dev_t dev, diskaddr_t blkno,
31200Sstevel@tonic-gate int (*iodone)(buf_t *), buf_t *bp_mem, int sleep)
31210Sstevel@tonic-gate {
31220Sstevel@tonic-gate (void) bioclone(bp, off, len, dev, 0, iodone, bp_mem, sleep);
31230Sstevel@tonic-gate bp_mem->b_lblkno = blkno;
31240Sstevel@tonic-gate return (bp_mem);
31250Sstevel@tonic-gate }
31260Sstevel@tonic-gate
31270Sstevel@tonic-gate
31280Sstevel@tonic-gate /*
31290Sstevel@tonic-gate * kstat stuff
31300Sstevel@tonic-gate */
31310Sstevel@tonic-gate void
md_kstat_init_ui(minor_t mnum,mdi_unit_t * ui)31320Sstevel@tonic-gate md_kstat_init_ui(
31330Sstevel@tonic-gate minor_t mnum,
31340Sstevel@tonic-gate mdi_unit_t *ui
31350Sstevel@tonic-gate )
31360Sstevel@tonic-gate {
31370Sstevel@tonic-gate if ((ui != NULL) && (ui->ui_kstat == NULL)) {
31380Sstevel@tonic-gate set_t setno = MD_MIN2SET(mnum);
31390Sstevel@tonic-gate unit_t unit = MD_MIN2UNIT(mnum);
31400Sstevel@tonic-gate char module[KSTAT_STRLEN];
31410Sstevel@tonic-gate char *p = module;
31420Sstevel@tonic-gate
31430Sstevel@tonic-gate if (setno != MD_LOCAL_SET) {
31440Sstevel@tonic-gate char buf[64];
31450Sstevel@tonic-gate char *s = buf;
31460Sstevel@tonic-gate char *e = module + sizeof (module) - 4;
31470Sstevel@tonic-gate
31480Sstevel@tonic-gate (void) sprintf(buf, "%u", setno);
31490Sstevel@tonic-gate while ((p < e) && (*s != '\0'))
31500Sstevel@tonic-gate *p++ = *s++;
31510Sstevel@tonic-gate *p++ = '/';
31520Sstevel@tonic-gate }
31530Sstevel@tonic-gate *p++ = 'm';
31540Sstevel@tonic-gate *p++ = 'd';
31550Sstevel@tonic-gate *p = '\0';
31560Sstevel@tonic-gate if ((ui->ui_kstat = kstat_create(module, unit, NULL, "disk",
31570Sstevel@tonic-gate KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) != NULL) {
31580Sstevel@tonic-gate ui->ui_kstat->ks_lock = &ui->ui_mx;
31590Sstevel@tonic-gate kstat_install(ui->ui_kstat);
31600Sstevel@tonic-gate }
31610Sstevel@tonic-gate }
31620Sstevel@tonic-gate }
31630Sstevel@tonic-gate
31640Sstevel@tonic-gate void
md_kstat_init(minor_t mnum)31650Sstevel@tonic-gate md_kstat_init(
31660Sstevel@tonic-gate minor_t mnum
31670Sstevel@tonic-gate )
31680Sstevel@tonic-gate {
31690Sstevel@tonic-gate md_kstat_init_ui(mnum, MDI_UNIT(mnum));
31700Sstevel@tonic-gate }
31710Sstevel@tonic-gate
31720Sstevel@tonic-gate void
md_kstat_destroy_ui(mdi_unit_t * ui)31730Sstevel@tonic-gate md_kstat_destroy_ui(
31740Sstevel@tonic-gate mdi_unit_t *ui
31750Sstevel@tonic-gate )
31760Sstevel@tonic-gate {
31770Sstevel@tonic-gate /*
31780Sstevel@tonic-gate * kstat_delete() interface has it's own locking mechanism and
31790Sstevel@tonic-gate * does not allow holding of kstat lock (ks_lock).
31800Sstevel@tonic-gate * Note: ks_lock == ui_mx from the md_kstat_init_ui().
31810Sstevel@tonic-gate */
31820Sstevel@tonic-gate if ((ui != NULL) && (ui->ui_kstat != NULL)) {
31830Sstevel@tonic-gate kstat_delete(ui->ui_kstat);
31840Sstevel@tonic-gate ui->ui_kstat = NULL;
31850Sstevel@tonic-gate }
31860Sstevel@tonic-gate }
31870Sstevel@tonic-gate
31880Sstevel@tonic-gate void
md_kstat_destroy(minor_t mnum)31890Sstevel@tonic-gate md_kstat_destroy(
31900Sstevel@tonic-gate minor_t mnum
31910Sstevel@tonic-gate )
31920Sstevel@tonic-gate {
31930Sstevel@tonic-gate md_kstat_destroy_ui(MDI_UNIT(mnum));
31940Sstevel@tonic-gate }
31950Sstevel@tonic-gate
31960Sstevel@tonic-gate /*
31970Sstevel@tonic-gate * In the following subsequent routines, locks are held before checking the
31980Sstevel@tonic-gate * validity of ui_kstat. This is done to make sure that we don't trip over
31990Sstevel@tonic-gate * a NULL ui_kstat anymore.
32000Sstevel@tonic-gate */
32010Sstevel@tonic-gate
32020Sstevel@tonic-gate void
md_kstat_waitq_enter(mdi_unit_t * ui)32030Sstevel@tonic-gate md_kstat_waitq_enter(
32040Sstevel@tonic-gate mdi_unit_t *ui
32050Sstevel@tonic-gate )
32060Sstevel@tonic-gate {
32070Sstevel@tonic-gate mutex_enter(&ui->ui_mx);
32080Sstevel@tonic-gate if (ui->ui_kstat != NULL)
32090Sstevel@tonic-gate kstat_waitq_enter(KSTAT_IO_PTR(ui->ui_kstat));
32100Sstevel@tonic-gate mutex_exit(&ui->ui_mx);
32110Sstevel@tonic-gate }
32120Sstevel@tonic-gate
32130Sstevel@tonic-gate void
md_kstat_waitq_to_runq(mdi_unit_t * ui)32140Sstevel@tonic-gate md_kstat_waitq_to_runq(
32150Sstevel@tonic-gate mdi_unit_t *ui
32160Sstevel@tonic-gate )
32170Sstevel@tonic-gate {
32180Sstevel@tonic-gate mutex_enter(&ui->ui_mx);
32190Sstevel@tonic-gate if (ui->ui_kstat != NULL)
32200Sstevel@tonic-gate kstat_waitq_to_runq(KSTAT_IO_PTR(ui->ui_kstat));
32210Sstevel@tonic-gate mutex_exit(&ui->ui_mx);
32220Sstevel@tonic-gate }
32230Sstevel@tonic-gate
32240Sstevel@tonic-gate void
md_kstat_waitq_exit(mdi_unit_t * ui)32250Sstevel@tonic-gate md_kstat_waitq_exit(
32260Sstevel@tonic-gate mdi_unit_t *ui
32270Sstevel@tonic-gate )
32280Sstevel@tonic-gate {
32290Sstevel@tonic-gate mutex_enter(&ui->ui_mx);
32300Sstevel@tonic-gate if (ui->ui_kstat != NULL)
32310Sstevel@tonic-gate kstat_waitq_exit(KSTAT_IO_PTR(ui->ui_kstat));
32320Sstevel@tonic-gate mutex_exit(&ui->ui_mx);
32330Sstevel@tonic-gate }
32340Sstevel@tonic-gate
32350Sstevel@tonic-gate void
md_kstat_runq_enter(mdi_unit_t * ui)32360Sstevel@tonic-gate md_kstat_runq_enter(
32370Sstevel@tonic-gate mdi_unit_t *ui
32380Sstevel@tonic-gate )
32390Sstevel@tonic-gate {
32400Sstevel@tonic-gate mutex_enter(&ui->ui_mx);
32410Sstevel@tonic-gate if (ui->ui_kstat != NULL)
32420Sstevel@tonic-gate kstat_runq_enter(KSTAT_IO_PTR(ui->ui_kstat));
32430Sstevel@tonic-gate mutex_exit(&ui->ui_mx);
32440Sstevel@tonic-gate }
32450Sstevel@tonic-gate
32460Sstevel@tonic-gate void
md_kstat_runq_exit(mdi_unit_t * ui)32470Sstevel@tonic-gate md_kstat_runq_exit(
32480Sstevel@tonic-gate mdi_unit_t *ui
32490Sstevel@tonic-gate )
32500Sstevel@tonic-gate {
32510Sstevel@tonic-gate mutex_enter(&ui->ui_mx);
32520Sstevel@tonic-gate if (ui->ui_kstat != NULL)
32530Sstevel@tonic-gate kstat_runq_exit(KSTAT_IO_PTR(ui->ui_kstat));
32540Sstevel@tonic-gate mutex_exit(&ui->ui_mx);
32550Sstevel@tonic-gate }
32560Sstevel@tonic-gate
32570Sstevel@tonic-gate void
md_kstat_done(mdi_unit_t * ui,buf_t * bp,int war)32580Sstevel@tonic-gate md_kstat_done(
32590Sstevel@tonic-gate mdi_unit_t *ui,
32600Sstevel@tonic-gate buf_t *bp,
32610Sstevel@tonic-gate int war
32620Sstevel@tonic-gate )
32630Sstevel@tonic-gate {
32640Sstevel@tonic-gate size_t n_done;
32650Sstevel@tonic-gate
32660Sstevel@tonic-gate /* check for end of device */
32670Sstevel@tonic-gate if ((bp->b_resid != 0) && (! (bp->b_flags & B_ERROR))) {
32680Sstevel@tonic-gate n_done = bp->b_bcount;
32690Sstevel@tonic-gate } else if (bp->b_bcount < bp->b_resid) {
32700Sstevel@tonic-gate n_done = 0;
32710Sstevel@tonic-gate } else {
32720Sstevel@tonic-gate n_done = bp->b_bcount - bp->b_resid;
32730Sstevel@tonic-gate }
32740Sstevel@tonic-gate
32750Sstevel@tonic-gate /* do accounting */
32760Sstevel@tonic-gate mutex_enter(&ui->ui_mx);
32770Sstevel@tonic-gate if (ui->ui_kstat != NULL) {
32780Sstevel@tonic-gate if ((! war) && (bp->b_flags & B_READ)) {
32790Sstevel@tonic-gate KSTAT_IO_PTR(ui->ui_kstat)->reads++;
32800Sstevel@tonic-gate KSTAT_IO_PTR(ui->ui_kstat)->nread += n_done;
32810Sstevel@tonic-gate } else {
32820Sstevel@tonic-gate KSTAT_IO_PTR(ui->ui_kstat)->writes++;
32830Sstevel@tonic-gate KSTAT_IO_PTR(ui->ui_kstat)->nwritten += n_done;
32840Sstevel@tonic-gate }
32850Sstevel@tonic-gate kstat_runq_exit(KSTAT_IO_PTR(ui->ui_kstat));
32860Sstevel@tonic-gate }
32870Sstevel@tonic-gate mutex_exit(&ui->ui_mx);
32880Sstevel@tonic-gate }
32890Sstevel@tonic-gate
32900Sstevel@tonic-gate pid_t
md_getpid()32910Sstevel@tonic-gate md_getpid()
32920Sstevel@tonic-gate {
32930Sstevel@tonic-gate pid_t valuep;
32940Sstevel@tonic-gate if (drv_getparm(PPID, (pid_t *)&valuep) != 0) {
32950Sstevel@tonic-gate ASSERT(0);
32960Sstevel@tonic-gate return ((pid_t)0);
32970Sstevel@tonic-gate } else {
32980Sstevel@tonic-gate ASSERT(valuep);
32990Sstevel@tonic-gate return (valuep);
33000Sstevel@tonic-gate }
33010Sstevel@tonic-gate }
33020Sstevel@tonic-gate
33030Sstevel@tonic-gate
33040Sstevel@tonic-gate proc_t *
md_getproc()33050Sstevel@tonic-gate md_getproc()
33060Sstevel@tonic-gate {
33070Sstevel@tonic-gate proc_t *valuep;
33080Sstevel@tonic-gate if (drv_getparm(UPROCP, (proc_t **)&valuep) != 0) {
33090Sstevel@tonic-gate ASSERT(0);
33100Sstevel@tonic-gate return ((proc_t *)NULL);
33110Sstevel@tonic-gate } else {
33120Sstevel@tonic-gate ASSERT(valuep);
33130Sstevel@tonic-gate return (valuep);
33140Sstevel@tonic-gate }
33150Sstevel@tonic-gate }
33160Sstevel@tonic-gate
33170Sstevel@tonic-gate extern kmutex_t pidlock;
33180Sstevel@tonic-gate
33190Sstevel@tonic-gate /*
33200Sstevel@tonic-gate * this check to see if a process pid pair are still running. For the
33210Sstevel@tonic-gate * disk set lock when both pid/proc are zero then the locks is not
33220Sstevel@tonic-gate * currently held.
33230Sstevel@tonic-gate */
33240Sstevel@tonic-gate int
md_checkpid(pid_t pid,proc_t * proc)33250Sstevel@tonic-gate md_checkpid(pid_t pid, proc_t *proc)
33260Sstevel@tonic-gate {
33270Sstevel@tonic-gate int retval = 1;
33280Sstevel@tonic-gate
33290Sstevel@tonic-gate if (pid == 0 && proc == NULL)
33300Sstevel@tonic-gate return (0);
33310Sstevel@tonic-gate
33320Sstevel@tonic-gate mutex_enter(&pidlock);
33330Sstevel@tonic-gate if (prfind(pid) != proc)
33340Sstevel@tonic-gate retval = 0;
33350Sstevel@tonic-gate mutex_exit(&pidlock);
33360Sstevel@tonic-gate return (retval);
33370Sstevel@tonic-gate }
33380Sstevel@tonic-gate
33390Sstevel@tonic-gate /*
33400Sstevel@tonic-gate * NAME: md_init_probereq
33410Sstevel@tonic-gate *
33420Sstevel@tonic-gate * DESCRIPTION: initializes a probe request. Parcels out the mnums such that
33430Sstevel@tonic-gate * they can be dispatched to multiple daemon threads.
33440Sstevel@tonic-gate *
33450Sstevel@tonic-gate * PARAMETERS: struct md_probedev *p pointer ioctl input
33460Sstevel@tonic-gate *
33470Sstevel@tonic-gate * RETURN VALUE: Returns errno
33480Sstevel@tonic-gate *
33490Sstevel@tonic-gate */
33500Sstevel@tonic-gate
33510Sstevel@tonic-gate int
md_init_probereq(struct md_probedev_impl * p,daemon_queue_t ** hdrpp)33520Sstevel@tonic-gate md_init_probereq(struct md_probedev_impl *p, daemon_queue_t **hdrpp)
33530Sstevel@tonic-gate {
33540Sstevel@tonic-gate int err = 0;
33550Sstevel@tonic-gate int modindx;
33560Sstevel@tonic-gate intptr_t (*probe_test)();
33570Sstevel@tonic-gate
33580Sstevel@tonic-gate /*
33590Sstevel@tonic-gate * Initialize the semaphores and mutex
33600Sstevel@tonic-gate * for the request
33610Sstevel@tonic-gate */
33620Sstevel@tonic-gate
33630Sstevel@tonic-gate p->probe_sema = kmem_alloc(sizeof (ksema_t), KM_SLEEP);
33640Sstevel@tonic-gate
33650Sstevel@tonic-gate p->probe_mx = kmem_alloc(sizeof (kmutex_t), KM_SLEEP);
33660Sstevel@tonic-gate sema_init(PROBE_SEMA(p), 0, NULL, SEMA_DRIVER, NULL);
33670Sstevel@tonic-gate mutex_init(PROBE_MX(p), NULL, MUTEX_DEFAULT, NULL);
33680Sstevel@tonic-gate
33690Sstevel@tonic-gate modindx = md_getmodindex(&(p->probe.md_driver), 1, 1);
33700Sstevel@tonic-gate probe_test = md_get_named_service(NODEV64, modindx,
33717563SPrasad.Singamsetty@Sun.COM p->probe.test_name, 0);
33720Sstevel@tonic-gate if (probe_test == NULL) {
33730Sstevel@tonic-gate err = EINVAL;
33740Sstevel@tonic-gate goto err_out;
33750Sstevel@tonic-gate }
33760Sstevel@tonic-gate
33770Sstevel@tonic-gate err = md_create_probe_rqlist(p, hdrpp, probe_test);
33780Sstevel@tonic-gate err_out:
33790Sstevel@tonic-gate return (err);
33800Sstevel@tonic-gate }
33810Sstevel@tonic-gate
33820Sstevel@tonic-gate /*
33830Sstevel@tonic-gate * NAME: md_probe_one
33840Sstevel@tonic-gate *
33850Sstevel@tonic-gate * DESCRIPTION: Generic routine for probing disks. This is called from the
33860Sstevel@tonic-gate * daemon.
33870Sstevel@tonic-gate *
33880Sstevel@tonic-gate * PARAMETERS: probe_req_t *reqp pointer to the probe request structure.
33890Sstevel@tonic-gate *
33900Sstevel@tonic-gate */
33910Sstevel@tonic-gate
33920Sstevel@tonic-gate void
md_probe_one(probe_req_t * reqp)33930Sstevel@tonic-gate md_probe_one(probe_req_t *reqp)
33940Sstevel@tonic-gate {
33950Sstevel@tonic-gate mdi_unit_t *ui;
33960Sstevel@tonic-gate md_probedev_impl_t *p;
33970Sstevel@tonic-gate int err = 0;
339810549SAchim.Maurer@Sun.COM set_t setno;
33990Sstevel@tonic-gate
34000Sstevel@tonic-gate p = (md_probedev_impl_t *)reqp->private_handle;
34010Sstevel@tonic-gate /*
34020Sstevel@tonic-gate * Validate the unit while holding the global ioctl lock, then
34030Sstevel@tonic-gate * obtain the unit_writerlock. Once the writerlock has been obtained
34040Sstevel@tonic-gate * we can release the global lock. As long as we hold one of these
34050Sstevel@tonic-gate * locks this will prevent a metaclear operation being performed
34060Sstevel@tonic-gate * on the metadevice because metaclear takes the readerlock (via
34070Sstevel@tonic-gate * openclose lock).
340810549SAchim.Maurer@Sun.COM * To avoid a potential deadlock with the probe_fcn() causing i/o to
340910549SAchim.Maurer@Sun.COM * be issued to the writerlock'd metadevice we only grab the writerlock
341010549SAchim.Maurer@Sun.COM * if the unit is not an SVM root device.
34110Sstevel@tonic-gate */
34127563SPrasad.Singamsetty@Sun.COM while (md_ioctl_lock_enter() == EINTR)
34137563SPrasad.Singamsetty@Sun.COM ;
341410549SAchim.Maurer@Sun.COM setno = MD_MIN2SET(reqp->mnum);
34150Sstevel@tonic-gate ui = MDI_UNIT(reqp->mnum);
34160Sstevel@tonic-gate if (ui != NULL) {
341710549SAchim.Maurer@Sun.COM int writer_grabbed;
341810549SAchim.Maurer@Sun.COM dev_t svm_root;
341910549SAchim.Maurer@Sun.COM
342010549SAchim.Maurer@Sun.COM if ((setno == MD_LOCAL_SET) && root_is_svm) {
342110549SAchim.Maurer@Sun.COM svm_root = getrootdev();
342210549SAchim.Maurer@Sun.COM
342310549SAchim.Maurer@Sun.COM if (getminor(svm_root) == reqp->mnum) {
342410549SAchim.Maurer@Sun.COM writer_grabbed = 0;
342510549SAchim.Maurer@Sun.COM } else {
342610549SAchim.Maurer@Sun.COM writer_grabbed = 1;
342710549SAchim.Maurer@Sun.COM (void) md_unit_writerlock_common(ui, 0);
342810549SAchim.Maurer@Sun.COM }
342910549SAchim.Maurer@Sun.COM } else {
343010549SAchim.Maurer@Sun.COM writer_grabbed = 1;
343110549SAchim.Maurer@Sun.COM (void) md_unit_writerlock_common(ui, 0);
343210549SAchim.Maurer@Sun.COM }
34330Sstevel@tonic-gate (void) md_ioctl_lock_exit(0, 0, 0, FALSE);
34340Sstevel@tonic-gate err = (*reqp->probe_fcn)(ui, reqp->mnum);
343510549SAchim.Maurer@Sun.COM if (writer_grabbed) {
343610549SAchim.Maurer@Sun.COM md_unit_writerexit(ui);
343710549SAchim.Maurer@Sun.COM }
34380Sstevel@tonic-gate } else {
34390Sstevel@tonic-gate (void) md_ioctl_lock_exit(0, 0, 0, FALSE);
34400Sstevel@tonic-gate }
34410Sstevel@tonic-gate
344210549SAchim.Maurer@Sun.COM /* update the info in the probe structure */
34430Sstevel@tonic-gate
34440Sstevel@tonic-gate mutex_enter(PROBE_MX(p));
34450Sstevel@tonic-gate if (err != 0) {
34460Sstevel@tonic-gate cmn_err(CE_NOTE, "md_probe_one: err %d mnum %d\n", err,
34477563SPrasad.Singamsetty@Sun.COM reqp->mnum);
34480Sstevel@tonic-gate (void) mdsyserror(&(p->probe.mde), err);
34490Sstevel@tonic-gate }
34500Sstevel@tonic-gate
34510Sstevel@tonic-gate mutex_exit(PROBE_MX(p));
34520Sstevel@tonic-gate sema_v(PROBE_SEMA(p));
34530Sstevel@tonic-gate
34540Sstevel@tonic-gate kmem_free(reqp, sizeof (probe_req_t));
34550Sstevel@tonic-gate }
34560Sstevel@tonic-gate char *
md_strdup(char * cp)34570Sstevel@tonic-gate md_strdup(char *cp)
34580Sstevel@tonic-gate {
34590Sstevel@tonic-gate char *new_cp = NULL;
34600Sstevel@tonic-gate
34610Sstevel@tonic-gate new_cp = kmem_alloc(strlen(cp) + 1, KM_SLEEP);
34620Sstevel@tonic-gate
34630Sstevel@tonic-gate return (strcpy(new_cp, cp));
34640Sstevel@tonic-gate }
34650Sstevel@tonic-gate
34660Sstevel@tonic-gate void
freestr(char * cp)34670Sstevel@tonic-gate freestr(char *cp)
34680Sstevel@tonic-gate {
34690Sstevel@tonic-gate kmem_free(cp, strlen(cp) + 1);
34700Sstevel@tonic-gate }
34710Sstevel@tonic-gate
34720Sstevel@tonic-gate /*
34730Sstevel@tonic-gate * Validate the list and skip invalid devices. Then create
34740Sstevel@tonic-gate * a doubly linked circular list of devices to probe.
34750Sstevel@tonic-gate * The hdr points to the head and tail of this list.
34760Sstevel@tonic-gate */
34770Sstevel@tonic-gate
34780Sstevel@tonic-gate static int
md_create_probe_rqlist(md_probedev_impl_t * plist,daemon_queue_t ** hdr,intptr_t (* probe_test)())34790Sstevel@tonic-gate md_create_probe_rqlist(md_probedev_impl_t *plist, daemon_queue_t **hdr,
34800Sstevel@tonic-gate intptr_t (*probe_test)())
34810Sstevel@tonic-gate {
34820Sstevel@tonic-gate int i, err, nodevcnt;
34830Sstevel@tonic-gate probe_req_t *tp;
34840Sstevel@tonic-gate daemon_queue_t *hp;
34850Sstevel@tonic-gate minor_t mnum;
34860Sstevel@tonic-gate
34870Sstevel@tonic-gate nodevcnt = 0;
34880Sstevel@tonic-gate
34890Sstevel@tonic-gate hp = NULL;
34900Sstevel@tonic-gate
34910Sstevel@tonic-gate for (i = 0; i < plist->probe.nmdevs; i++) {
34920Sstevel@tonic-gate mnum = ((minor_t *)(uintptr_t)(plist->probe.mnum_list))[i];
34930Sstevel@tonic-gate if (MDI_UNIT(mnum) == NULL) {
34940Sstevel@tonic-gate cmn_err(CE_WARN, "md: Cannot probe %s since it does "
34950Sstevel@tonic-gate "not exist", md_shortname(mnum));
34960Sstevel@tonic-gate nodevcnt++;
34970Sstevel@tonic-gate continue;
34980Sstevel@tonic-gate }
34990Sstevel@tonic-gate tp = kmem_alloc(sizeof (probe_req_t), KM_SLEEP);
35000Sstevel@tonic-gate tp->mnum = mnum;
35010Sstevel@tonic-gate tp->private_handle = (void *)plist;
35020Sstevel@tonic-gate tp->probe_fcn = probe_test;
35030Sstevel@tonic-gate if (hp == NULL) {
35040Sstevel@tonic-gate hp = (daemon_queue_t *)tp;
35050Sstevel@tonic-gate hp->dq_prev = hp->dq_next = (daemon_queue_t *)tp;
35060Sstevel@tonic-gate } else {
35070Sstevel@tonic-gate tp->dq.dq_next = hp;
35080Sstevel@tonic-gate tp->dq.dq_prev = hp->dq_prev;
35090Sstevel@tonic-gate hp->dq_prev->dq_next = (daemon_queue_t *)tp;
35100Sstevel@tonic-gate hp->dq_prev = (daemon_queue_t *)tp;
35110Sstevel@tonic-gate }
35120Sstevel@tonic-gate }
35130Sstevel@tonic-gate
35140Sstevel@tonic-gate *hdr = hp;
35150Sstevel@tonic-gate if (nodevcnt > 0)
35160Sstevel@tonic-gate plist->probe.nmdevs -= nodevcnt;
35170Sstevel@tonic-gate
35180Sstevel@tonic-gate /*
35190Sstevel@tonic-gate * If there are no devices to be probed because they were
35200Sstevel@tonic-gate * incorrect, then return an error.
35210Sstevel@tonic-gate */
35220Sstevel@tonic-gate err = (plist->probe.nmdevs == 0) ? ENODEV : 0;
35230Sstevel@tonic-gate
35240Sstevel@tonic-gate return (err);
35250Sstevel@tonic-gate }
35260Sstevel@tonic-gate
35270Sstevel@tonic-gate /*
35280Sstevel@tonic-gate * This routine increments the I/O count for set I/O operations. This
35290Sstevel@tonic-gate * value is used to determine if an I/O can done. If a release is in
35300Sstevel@tonic-gate * process this will return an error and cause the I/O to be errored.
35310Sstevel@tonic-gate */
35320Sstevel@tonic-gate int
md_inc_iocount(set_t setno)35330Sstevel@tonic-gate md_inc_iocount(set_t setno)
35340Sstevel@tonic-gate {
35350Sstevel@tonic-gate int rc = 0;
35360Sstevel@tonic-gate
35370Sstevel@tonic-gate if (setno == 0)
35380Sstevel@tonic-gate return (0);
35390Sstevel@tonic-gate
35400Sstevel@tonic-gate mutex_enter(&md_set_io[setno].md_io_mx);
35410Sstevel@tonic-gate if (!(md_set_io[setno].io_state & MD_SET_ACTIVE)) {
35420Sstevel@tonic-gate rc = EIO;
35430Sstevel@tonic-gate goto out;
35440Sstevel@tonic-gate }
35450Sstevel@tonic-gate
35460Sstevel@tonic-gate ASSERT(md_set_io[setno].io_cnt >= 0);
35470Sstevel@tonic-gate md_set_io[setno].io_cnt++;
35480Sstevel@tonic-gate
35490Sstevel@tonic-gate out: mutex_exit(&md_set_io[setno].md_io_mx);
35500Sstevel@tonic-gate return (rc);
35510Sstevel@tonic-gate }
35520Sstevel@tonic-gate
35530Sstevel@tonic-gate void
md_inc_iocount_noblock(set_t setno)35540Sstevel@tonic-gate md_inc_iocount_noblock(set_t setno)
35550Sstevel@tonic-gate {
35560Sstevel@tonic-gate
35570Sstevel@tonic-gate if (setno == 0)
35580Sstevel@tonic-gate return;
35590Sstevel@tonic-gate
35600Sstevel@tonic-gate mutex_enter(&md_set_io[setno].md_io_mx);
35610Sstevel@tonic-gate md_set_io[setno].io_cnt++;
35620Sstevel@tonic-gate mutex_exit(&md_set_io[setno].md_io_mx);
35630Sstevel@tonic-gate }
35640Sstevel@tonic-gate void
md_dec_iocount(set_t setno)35650Sstevel@tonic-gate md_dec_iocount(set_t setno)
35660Sstevel@tonic-gate {
35670Sstevel@tonic-gate
35680Sstevel@tonic-gate if (setno == 0)
35690Sstevel@tonic-gate return;
35700Sstevel@tonic-gate
35710Sstevel@tonic-gate mutex_enter(&md_set_io[setno].md_io_mx);
35720Sstevel@tonic-gate md_set_io[setno].io_cnt--;
35730Sstevel@tonic-gate ASSERT(md_set_io[setno].io_cnt >= 0);
35740Sstevel@tonic-gate if ((md_set_io[setno].io_state & MD_SET_RELEASE) &&
35750Sstevel@tonic-gate (md_set_io[setno].io_cnt == 0))
35760Sstevel@tonic-gate cv_broadcast(&md_set_io[setno].md_io_cv);
35770Sstevel@tonic-gate mutex_exit(&md_set_io[setno].md_io_mx);
35780Sstevel@tonic-gate }
35790Sstevel@tonic-gate
35800Sstevel@tonic-gate int
md_isblock_setio(set_t setno)35810Sstevel@tonic-gate md_isblock_setio(set_t setno)
35820Sstevel@tonic-gate {
35830Sstevel@tonic-gate int rc = 0;
35840Sstevel@tonic-gate
35850Sstevel@tonic-gate if (setno == 0)
35860Sstevel@tonic-gate return (0);
35870Sstevel@tonic-gate
35880Sstevel@tonic-gate mutex_enter(&md_set_io[setno].md_io_mx);
35890Sstevel@tonic-gate if (md_set_io[setno].io_state & MD_SET_RELEASE)
35900Sstevel@tonic-gate rc = 1;
35910Sstevel@tonic-gate
35920Sstevel@tonic-gate mutex_exit(&md_set_io[setno].md_io_mx);
35930Sstevel@tonic-gate return (rc);
35940Sstevel@tonic-gate }
35950Sstevel@tonic-gate
35960Sstevel@tonic-gate int
md_block_setio(set_t setno)35970Sstevel@tonic-gate md_block_setio(set_t setno)
35980Sstevel@tonic-gate {
35990Sstevel@tonic-gate int rc = 0;
36000Sstevel@tonic-gate
36010Sstevel@tonic-gate if (setno == 0)
36020Sstevel@tonic-gate return (1);
36030Sstevel@tonic-gate
36040Sstevel@tonic-gate mutex_enter(&md_set_io[setno].md_io_mx);
36050Sstevel@tonic-gate md_set_io[setno].io_state = MD_SET_RELEASE;
36060Sstevel@tonic-gate
36070Sstevel@tonic-gate while (md_set_io[setno].io_cnt > 0) {
36080Sstevel@tonic-gate cv_wait(&md_set_io[setno].md_io_cv,
36090Sstevel@tonic-gate &md_set_io[setno].md_io_mx);
36100Sstevel@tonic-gate }
36110Sstevel@tonic-gate rc = 1;
36120Sstevel@tonic-gate
36130Sstevel@tonic-gate
36140Sstevel@tonic-gate ASSERT(md_set_io[setno].io_cnt == 0);
36150Sstevel@tonic-gate mutex_exit(&md_set_io[setno].md_io_mx);
36160Sstevel@tonic-gate
36170Sstevel@tonic-gate return (rc);
36180Sstevel@tonic-gate }
36190Sstevel@tonic-gate
36200Sstevel@tonic-gate void
md_clearblock_setio(set_t setno)36210Sstevel@tonic-gate md_clearblock_setio(set_t setno)
36220Sstevel@tonic-gate {
36230Sstevel@tonic-gate if (setno == 0)
36240Sstevel@tonic-gate return;
36250Sstevel@tonic-gate
36260Sstevel@tonic-gate mutex_enter(&md_set_io[setno].md_io_mx);
36270Sstevel@tonic-gate md_set_io[setno].io_state = MD_SET_ACTIVE;
36280Sstevel@tonic-gate mutex_exit(&md_set_io[setno].md_io_mx);
36290Sstevel@tonic-gate }
36300Sstevel@tonic-gate
36310Sstevel@tonic-gate void
md_unblock_setio(set_t setno)36320Sstevel@tonic-gate md_unblock_setio(set_t setno)
36330Sstevel@tonic-gate {
36340Sstevel@tonic-gate if (setno == 0)
36350Sstevel@tonic-gate return;
36360Sstevel@tonic-gate
36370Sstevel@tonic-gate mutex_enter(&md_set_io[setno].md_io_mx);
36380Sstevel@tonic-gate #ifdef DEBUG
36390Sstevel@tonic-gate if (md_set_io[setno].io_cnt != 0) {
36400Sstevel@tonic-gate cmn_err(CE_NOTE, "set %d count was %ld at take",
36410Sstevel@tonic-gate setno, md_set_io[setno].io_cnt);
36420Sstevel@tonic-gate }
36430Sstevel@tonic-gate #endif /* DEBUG */
36440Sstevel@tonic-gate
36450Sstevel@tonic-gate md_set_io[setno].io_state = MD_SET_ACTIVE;
36460Sstevel@tonic-gate md_set_io[setno].io_cnt = 0;
36470Sstevel@tonic-gate mutex_exit(&md_set_io[setno].md_io_mx);
36480Sstevel@tonic-gate }
36490Sstevel@tonic-gate
36500Sstevel@tonic-gate /*
36510Sstevel@tonic-gate * Test and set version of the md_block_setio.
36520Sstevel@tonic-gate * Set the io_state to keep new I/O from being issued.
36530Sstevel@tonic-gate * If there is I/O currently in progress, then set io_state to active
36540Sstevel@tonic-gate * and return failure. Otherwise, return a 1 for success.
36550Sstevel@tonic-gate *
36560Sstevel@tonic-gate * Used in a MN diskset since the commd must be suspended before
36570Sstevel@tonic-gate * this node can attempt to withdraw from a diskset. But, with commd
36580Sstevel@tonic-gate * suspended, I/O may have been issued that can never finish until
36590Sstevel@tonic-gate * commd is resumed (allocation of hotspare, etc). So, if I/O is
36600Sstevel@tonic-gate * outstanding after diskset io_state is marked RELEASE, then set diskset
36610Sstevel@tonic-gate * io_state back to ACTIVE and return failure.
36620Sstevel@tonic-gate */
36630Sstevel@tonic-gate int
md_tas_block_setio(set_t setno)36640Sstevel@tonic-gate md_tas_block_setio(set_t setno)
36650Sstevel@tonic-gate {
36660Sstevel@tonic-gate int rc;
36670Sstevel@tonic-gate
36680Sstevel@tonic-gate if (setno == 0)
36690Sstevel@tonic-gate return (1);
36700Sstevel@tonic-gate
36710Sstevel@tonic-gate mutex_enter(&md_set_io[setno].md_io_mx);
36720Sstevel@tonic-gate md_set_io[setno].io_state = MD_SET_RELEASE;
36730Sstevel@tonic-gate
36740Sstevel@tonic-gate if (md_set_io[setno].io_cnt > 0) {
36750Sstevel@tonic-gate md_set_io[setno].io_state = MD_SET_ACTIVE;
36760Sstevel@tonic-gate rc = 0;
36770Sstevel@tonic-gate } else {
36780Sstevel@tonic-gate rc = 1;
36790Sstevel@tonic-gate }
36800Sstevel@tonic-gate
36810Sstevel@tonic-gate mutex_exit(&md_set_io[setno].md_io_mx);
36820Sstevel@tonic-gate
36830Sstevel@tonic-gate return (rc);
36840Sstevel@tonic-gate }
36850Sstevel@tonic-gate
36860Sstevel@tonic-gate void
md_biodone(struct buf * pb)36870Sstevel@tonic-gate md_biodone(struct buf *pb)
36880Sstevel@tonic-gate {
36890Sstevel@tonic-gate minor_t mnum;
36900Sstevel@tonic-gate set_t setno;
36910Sstevel@tonic-gate mdi_unit_t *ui;
36920Sstevel@tonic-gate
36930Sstevel@tonic-gate mnum = getminor(pb->b_edev);
36940Sstevel@tonic-gate setno = MD_MIN2SET(mnum);
36950Sstevel@tonic-gate
36960Sstevel@tonic-gate if (setno == 0) {
36970Sstevel@tonic-gate biodone(pb);
36980Sstevel@tonic-gate return;
36990Sstevel@tonic-gate }
37000Sstevel@tonic-gate
37010Sstevel@tonic-gate #ifdef DEBUG
37020Sstevel@tonic-gate ui = MDI_UNIT(mnum);
37030Sstevel@tonic-gate if (!md_unit_isopen(ui))
37040Sstevel@tonic-gate cmn_err(CE_NOTE, "io after close on %s\n", md_shortname(mnum));
37050Sstevel@tonic-gate #endif /* DEBUG */
37060Sstevel@tonic-gate
37070Sstevel@tonic-gate /*
37080Sstevel@tonic-gate * Handle the local diskset
37090Sstevel@tonic-gate */
37100Sstevel@tonic-gate if (md_set_io[setno].io_cnt > 0)
37110Sstevel@tonic-gate md_dec_iocount(setno);
37120Sstevel@tonic-gate
37130Sstevel@tonic-gate #ifdef DEBUG
37140Sstevel@tonic-gate /*
37150Sstevel@tonic-gate * this is being done after the lock is dropped so there
37160Sstevel@tonic-gate * are cases it may be invalid. It is advisory.
37170Sstevel@tonic-gate */
37180Sstevel@tonic-gate if (md_set_io[setno].io_state & MD_SET_RELEASE) {
37190Sstevel@tonic-gate /* Only display this error once for this metadevice */
37200Sstevel@tonic-gate if ((ui->ui_tstate & MD_RELEASE_IOERR_DONE) == 0) {
37210Sstevel@tonic-gate cmn_err(CE_NOTE,
37220Sstevel@tonic-gate "I/O to %s attempted during set RELEASE\n",
37230Sstevel@tonic-gate md_shortname(mnum));
37240Sstevel@tonic-gate ui->ui_tstate |= MD_RELEASE_IOERR_DONE;
37250Sstevel@tonic-gate }
37260Sstevel@tonic-gate }
37270Sstevel@tonic-gate #endif /* DEBUG */
37280Sstevel@tonic-gate
37290Sstevel@tonic-gate biodone(pb);
37300Sstevel@tonic-gate }
37310Sstevel@tonic-gate
37320Sstevel@tonic-gate
37330Sstevel@tonic-gate /*
37340Sstevel@tonic-gate * Driver special private devt handling routine
37350Sstevel@tonic-gate * INPUT: md_dev64_t
37360Sstevel@tonic-gate * OUTPUT: dev_t, 32 bit on a 32 bit kernel, 64 bit on a 64 bit kernel.
37370Sstevel@tonic-gate */
37380Sstevel@tonic-gate dev_t
md_dev64_to_dev(md_dev64_t dev)37390Sstevel@tonic-gate md_dev64_to_dev(md_dev64_t dev)
37400Sstevel@tonic-gate {
37410Sstevel@tonic-gate major_t major = (major_t)(dev >> NBITSMINOR64) & MAXMAJ64;
37420Sstevel@tonic-gate minor_t minor = (minor_t)(dev & MAXMIN64);
37430Sstevel@tonic-gate
37440Sstevel@tonic-gate return (makedevice(major, minor));
37450Sstevel@tonic-gate
37460Sstevel@tonic-gate }
37470Sstevel@tonic-gate
37480Sstevel@tonic-gate /*
37490Sstevel@tonic-gate * Driver private makedevice routine
37500Sstevel@tonic-gate * INPUT: major_t major, minor_t minor
37510Sstevel@tonic-gate * OUTPUT: md_dev64_t, no matter if on 32 bit or 64 bit kernel.
37520Sstevel@tonic-gate */
37530Sstevel@tonic-gate md_dev64_t
md_makedevice(major_t major,minor_t minor)37540Sstevel@tonic-gate md_makedevice(major_t major, minor_t minor)
37550Sstevel@tonic-gate {
37560Sstevel@tonic-gate return (((md_dev64_t)major << NBITSMINOR64) | minor);
37570Sstevel@tonic-gate
37580Sstevel@tonic-gate }
37590Sstevel@tonic-gate
37600Sstevel@tonic-gate
37610Sstevel@tonic-gate /*
37620Sstevel@tonic-gate * Driver private devt md_getmajor routine
37630Sstevel@tonic-gate * INPUT: dev a 64 bit container holding either a 32 bit or a 64 bit device
37640Sstevel@tonic-gate * OUTPUT: the appropriate major number
37650Sstevel@tonic-gate */
37660Sstevel@tonic-gate major_t
md_getmajor(md_dev64_t dev)37670Sstevel@tonic-gate md_getmajor(md_dev64_t dev)
37680Sstevel@tonic-gate {
37690Sstevel@tonic-gate major_t major = (major_t)(dev >> NBITSMINOR64) & MAXMAJ64;
37700Sstevel@tonic-gate
37710Sstevel@tonic-gate if (major == 0) {
37720Sstevel@tonic-gate /* Here we were given a 32bit dev */
37730Sstevel@tonic-gate major = (major_t)(dev >> NBITSMINOR32) & MAXMAJ32;
37740Sstevel@tonic-gate }
37750Sstevel@tonic-gate return (major);
37760Sstevel@tonic-gate }
37770Sstevel@tonic-gate
37780Sstevel@tonic-gate /*
37790Sstevel@tonic-gate * Driver private devt md_getminor routine
37800Sstevel@tonic-gate * INPUT: dev a 64 bit container holding either a 32 bit or a 64 bit device
37810Sstevel@tonic-gate * OUTPUT: the appropriate minor number
37820Sstevel@tonic-gate */
37830Sstevel@tonic-gate minor_t
md_getminor(md_dev64_t dev)37840Sstevel@tonic-gate md_getminor(md_dev64_t dev)
37850Sstevel@tonic-gate {
37860Sstevel@tonic-gate minor_t minor;
37870Sstevel@tonic-gate major_t major = (major_t)(dev >> NBITSMINOR64) & MAXMAJ64;
37880Sstevel@tonic-gate
37890Sstevel@tonic-gate if (major == 0) {
37900Sstevel@tonic-gate /* Here we were given a 32bit dev */
37910Sstevel@tonic-gate minor = (minor_t)(dev & MAXMIN32);
37920Sstevel@tonic-gate } else {
37930Sstevel@tonic-gate minor = (minor_t)(dev & MAXMIN64);
37940Sstevel@tonic-gate }
37950Sstevel@tonic-gate return (minor);
37960Sstevel@tonic-gate }
37970Sstevel@tonic-gate
37980Sstevel@tonic-gate int
md_check_ioctl_against_unit(int cmd,mdc_unit_t c)37997563SPrasad.Singamsetty@Sun.COM md_check_ioctl_against_unit(int cmd, mdc_unit_t c)
38000Sstevel@tonic-gate {
38010Sstevel@tonic-gate /*
38020Sstevel@tonic-gate * If the metadevice is an old style device, it has a vtoc,
38030Sstevel@tonic-gate * in that case all reading EFI ioctls are not applicable.
38040Sstevel@tonic-gate * If the metadevice has an EFI label, reading vtoc and geom ioctls
38050Sstevel@tonic-gate * are not supposed to work.
38060Sstevel@tonic-gate */
38070Sstevel@tonic-gate switch (cmd) {
38080Sstevel@tonic-gate case DKIOCGGEOM:
38097563SPrasad.Singamsetty@Sun.COM case DKIOCGAPART:
38107563SPrasad.Singamsetty@Sun.COM /* if > 2 TB then fail */
38117563SPrasad.Singamsetty@Sun.COM if (c.un_total_blocks > MD_MAX_BLKS_FOR_EXTVTOC) {
38127563SPrasad.Singamsetty@Sun.COM return (ENOTSUP);
38137563SPrasad.Singamsetty@Sun.COM }
38147563SPrasad.Singamsetty@Sun.COM break;
38150Sstevel@tonic-gate case DKIOCGVTOC:
38167563SPrasad.Singamsetty@Sun.COM /* if > 2 TB then fail */
38177563SPrasad.Singamsetty@Sun.COM if (c.un_total_blocks > MD_MAX_BLKS_FOR_EXTVTOC) {
38187563SPrasad.Singamsetty@Sun.COM return (ENOTSUP);
38197563SPrasad.Singamsetty@Sun.COM }
38207563SPrasad.Singamsetty@Sun.COM
38217563SPrasad.Singamsetty@Sun.COM /* if > 1 TB but < 2TB return overflow */
38227563SPrasad.Singamsetty@Sun.COM if (c.un_revision & MD_64BIT_META_DEV) {
38237563SPrasad.Singamsetty@Sun.COM return (EOVERFLOW);
38247563SPrasad.Singamsetty@Sun.COM }
38257563SPrasad.Singamsetty@Sun.COM break;
38267563SPrasad.Singamsetty@Sun.COM case DKIOCGEXTVTOC:
38277563SPrasad.Singamsetty@Sun.COM /* if > 2 TB then fail */
38287563SPrasad.Singamsetty@Sun.COM if (c.un_total_blocks > MD_MAX_BLKS_FOR_EXTVTOC) {
38290Sstevel@tonic-gate return (ENOTSUP);
38300Sstevel@tonic-gate }
38310Sstevel@tonic-gate break;
38320Sstevel@tonic-gate case DKIOCGETEFI:
38330Sstevel@tonic-gate case DKIOCPARTITION:
38347563SPrasad.Singamsetty@Sun.COM if ((c.un_flag & MD_EFILABEL) == 0) {
38350Sstevel@tonic-gate return (ENOTSUP);
38360Sstevel@tonic-gate }
38370Sstevel@tonic-gate break;
38380Sstevel@tonic-gate
38390Sstevel@tonic-gate case DKIOCSETEFI:
38400Sstevel@tonic-gate /* setting an EFI label should always be ok */
38410Sstevel@tonic-gate return (0);
38420Sstevel@tonic-gate
38430Sstevel@tonic-gate case DKIOCSVTOC:
38447563SPrasad.Singamsetty@Sun.COM /* if > 2 TB then fail */
38457563SPrasad.Singamsetty@Sun.COM if (c.un_total_blocks > MD_MAX_BLKS_FOR_EXTVTOC) {
38467563SPrasad.Singamsetty@Sun.COM return (ENOTSUP);
38477563SPrasad.Singamsetty@Sun.COM }
38487563SPrasad.Singamsetty@Sun.COM
38497563SPrasad.Singamsetty@Sun.COM /* if > 1 TB but < 2TB return overflow */
38507563SPrasad.Singamsetty@Sun.COM if (c.un_revision & MD_64BIT_META_DEV) {
38517563SPrasad.Singamsetty@Sun.COM return (EOVERFLOW);
38527563SPrasad.Singamsetty@Sun.COM }
38537563SPrasad.Singamsetty@Sun.COM break;
38547563SPrasad.Singamsetty@Sun.COM case DKIOCSEXTVTOC:
38557563SPrasad.Singamsetty@Sun.COM if (c.un_total_blocks > MD_MAX_BLKS_FOR_EXTVTOC) {
38567563SPrasad.Singamsetty@Sun.COM return (ENOTSUP);
38577563SPrasad.Singamsetty@Sun.COM }
38587563SPrasad.Singamsetty@Sun.COM break;
38590Sstevel@tonic-gate }
38600Sstevel@tonic-gate return (0);
38610Sstevel@tonic-gate }
38620Sstevel@tonic-gate
38630Sstevel@tonic-gate /*
38640Sstevel@tonic-gate * md_vtoc_to_efi_record()
38650Sstevel@tonic-gate * Input: record id of the vtoc record
38660Sstevel@tonic-gate * Output: record id of the efi record
38670Sstevel@tonic-gate * Function:
38680Sstevel@tonic-gate * - reads the volume name from the vtoc record
38690Sstevel@tonic-gate * - converts the volume name to a format, libefi understands
38700Sstevel@tonic-gate * - creates a new record of size MD_EFI_PARTNAME_BYTES
38710Sstevel@tonic-gate * - stores the volname in that record,
38720Sstevel@tonic-gate * - commits that record
38730Sstevel@tonic-gate * - returns the recid of the efi record.
38740Sstevel@tonic-gate * Caveat Emptor:
38750Sstevel@tonic-gate * The calling routine must do something like
38760Sstevel@tonic-gate * - un->c.un_vtoc_id = md_vtoc_to_efi_record(vtoc_recid)
38770Sstevel@tonic-gate * - commit(un)
38780Sstevel@tonic-gate * - delete(vtoc_recid)
38790Sstevel@tonic-gate * in order to keep the mddb consistent in case of a panic in the middle.
38800Sstevel@tonic-gate * Errors:
38810Sstevel@tonic-gate * - returns 0 on any error
38820Sstevel@tonic-gate */
38830Sstevel@tonic-gate mddb_recid_t
md_vtoc_to_efi_record(mddb_recid_t vtoc_recid,set_t setno)38840Sstevel@tonic-gate md_vtoc_to_efi_record(mddb_recid_t vtoc_recid, set_t setno)
38850Sstevel@tonic-gate {
38860Sstevel@tonic-gate struct vtoc *vtoc;
38870Sstevel@tonic-gate ushort_t *v;
38880Sstevel@tonic-gate mddb_recid_t efi_recid;
38890Sstevel@tonic-gate int i;
38900Sstevel@tonic-gate
38910Sstevel@tonic-gate if (mddb_getrecstatus(vtoc_recid) != MDDB_OK) {
38920Sstevel@tonic-gate return (0);
38930Sstevel@tonic-gate }
38940Sstevel@tonic-gate vtoc = (struct vtoc *)mddb_getrecaddr(vtoc_recid);
38950Sstevel@tonic-gate efi_recid = mddb_createrec(MD_EFI_PARTNAME_BYTES, MDDB_EFILABEL, 0,
38967563SPrasad.Singamsetty@Sun.COM MD_CRO_32BIT, setno);
38970Sstevel@tonic-gate if (efi_recid < 0) {
38980Sstevel@tonic-gate return (0);
38990Sstevel@tonic-gate }
39000Sstevel@tonic-gate v = (ushort_t *)mddb_getrecaddr(efi_recid);
39010Sstevel@tonic-gate
39020Sstevel@tonic-gate /* This for loop read, converts and writes */
39030Sstevel@tonic-gate for (i = 0; i < LEN_DKL_VVOL; i++) {
39040Sstevel@tonic-gate v[i] = LE_16((uint16_t)vtoc->v_volume[i]);
39050Sstevel@tonic-gate }
39060Sstevel@tonic-gate /* commit the new record */
39070Sstevel@tonic-gate mddb_commitrec_wrapper(efi_recid);
39080Sstevel@tonic-gate
39090Sstevel@tonic-gate return (efi_recid);
39100Sstevel@tonic-gate }
39110Sstevel@tonic-gate
39120Sstevel@tonic-gate /*
39130Sstevel@tonic-gate * Send a kernel message.
39140Sstevel@tonic-gate * user has to provide for an allocated result structure
39158452SJohn.Wren.Kennedy@Sun.COM * If the door handler disappears we retry, emitting warnings every so often.
39168452SJohn.Wren.Kennedy@Sun.COM *
39178452SJohn.Wren.Kennedy@Sun.COM * The recipient argument is almost always unused, and is therefore typically
39188452SJohn.Wren.Kennedy@Sun.COM * set to zero, as zero is an invalid cluster nodeid. The exceptions are the
39198452SJohn.Wren.Kennedy@Sun.COM * marking and clearing of the DRL from a node that is not currently the
39208452SJohn.Wren.Kennedy@Sun.COM * owner. In these cases, the recipient argument will be the nodeid of the
39218452SJohn.Wren.Kennedy@Sun.COM * mirror owner, and MD_MSGF_DIRECTED will be set in the flags. Non-owner
39228452SJohn.Wren.Kennedy@Sun.COM * nodes will not receive these messages.
39238452SJohn.Wren.Kennedy@Sun.COM *
3924*11130SJames.Hall@Sun.COM * For the case where md_mn_is_commd_present() is false, we simply pre-set
3925*11130SJames.Hall@Sun.COM * the result->kmmr_comm_state to MDMNE_RPC_FAIL.
3926*11130SJames.Hall@Sun.COM * This covers the case where the service mdcommd has been killed and so we do
3927*11130SJames.Hall@Sun.COM * not get a 'new' result structure copied back. Instead we return with the
3928*11130SJames.Hall@Sun.COM * supplied result field, and we need to flag a failure to the caller.
39290Sstevel@tonic-gate */
39300Sstevel@tonic-gate int
mdmn_ksend_message(set_t setno,md_mn_msgtype_t type,uint_t flags,md_mn_nodeid_t recipient,char * data,int size,md_mn_kresult_t * result)39310Sstevel@tonic-gate mdmn_ksend_message(
39320Sstevel@tonic-gate set_t setno,
39330Sstevel@tonic-gate md_mn_msgtype_t type,
39340Sstevel@tonic-gate uint_t flags,
39358452SJohn.Wren.Kennedy@Sun.COM md_mn_nodeid_t recipient,
39360Sstevel@tonic-gate char *data,
39370Sstevel@tonic-gate int size,
39380Sstevel@tonic-gate md_mn_kresult_t *result)
39390Sstevel@tonic-gate {
39400Sstevel@tonic-gate door_arg_t da;
39410Sstevel@tonic-gate md_mn_kmsg_t *kmsg;
39428452SJohn.Wren.Kennedy@Sun.COM uint_t send_try_cnt = 0;
39438452SJohn.Wren.Kennedy@Sun.COM uint_t retry_noise_cnt = 0;
39440Sstevel@tonic-gate int rval;
39458452SJohn.Wren.Kennedy@Sun.COM k_sigset_t oldmask, newmask;
39460Sstevel@tonic-gate
3947*11130SJames.Hall@Sun.COM /*
3948*11130SJames.Hall@Sun.COM * Ensure that we default to a recoverable failure state if the
3949*11130SJames.Hall@Sun.COM * door upcall cannot pass the request on to rpc.mdcommd.
3950*11130SJames.Hall@Sun.COM * This may occur when shutting the node down while there is still
3951*11130SJames.Hall@Sun.COM * a mirror resync or metadevice state update occurring.
3952*11130SJames.Hall@Sun.COM */
3953*11130SJames.Hall@Sun.COM result->kmmr_comm_state = MDMNE_RPC_FAIL;
3954*11130SJames.Hall@Sun.COM result->kmmr_exitval = ~0;
3955*11130SJames.Hall@Sun.COM
39560Sstevel@tonic-gate if (size > MDMN_MAX_KMSG_DATA)
39570Sstevel@tonic-gate return (ENOMEM);
39580Sstevel@tonic-gate kmsg = kmem_zalloc(sizeof (md_mn_kmsg_t), KM_SLEEP);
39590Sstevel@tonic-gate kmsg->kmsg_flags = flags;
39600Sstevel@tonic-gate kmsg->kmsg_setno = setno;
39618452SJohn.Wren.Kennedy@Sun.COM kmsg->kmsg_recipient = recipient;
39620Sstevel@tonic-gate kmsg->kmsg_type = type;
39630Sstevel@tonic-gate kmsg->kmsg_size = size;
39640Sstevel@tonic-gate bcopy(data, &(kmsg->kmsg_data), size);
39650Sstevel@tonic-gate
39660Sstevel@tonic-gate /*
39670Sstevel@tonic-gate * Wait for the door handle to be established.
39680Sstevel@tonic-gate */
39690Sstevel@tonic-gate while (mdmn_door_did == -1) {
39708452SJohn.Wren.Kennedy@Sun.COM if ((++retry_noise_cnt % MD_MN_WARN_INTVL) == 0) {
39710Sstevel@tonic-gate cmn_err(CE_WARN, "door handle not yet ready. "
39720Sstevel@tonic-gate "Check if /usr/lib/lvm/mddoors is running");
39730Sstevel@tonic-gate }
39740Sstevel@tonic-gate delay(md_hz);
39750Sstevel@tonic-gate }
39760Sstevel@tonic-gate
39770Sstevel@tonic-gate /*
39788452SJohn.Wren.Kennedy@Sun.COM * If MD_MSGF_BLK_SIGNAL is set, mask out all signals so that we
39798452SJohn.Wren.Kennedy@Sun.COM * do not fail if the user process receives a signal while we're
39808452SJohn.Wren.Kennedy@Sun.COM * active in the door interface.
39818452SJohn.Wren.Kennedy@Sun.COM */
39828452SJohn.Wren.Kennedy@Sun.COM if (flags & MD_MSGF_BLK_SIGNAL) {
39838452SJohn.Wren.Kennedy@Sun.COM sigfillset(&newmask);
39848452SJohn.Wren.Kennedy@Sun.COM sigreplace(&newmask, &oldmask);
39858452SJohn.Wren.Kennedy@Sun.COM }
39868452SJohn.Wren.Kennedy@Sun.COM
39878452SJohn.Wren.Kennedy@Sun.COM /*
39880Sstevel@tonic-gate * If message failed with an RPC_FAILURE when rpc.mdcommd had
39890Sstevel@tonic-gate * been gracefully shutdown (md_mn_is_commd_present returns FALSE)
39900Sstevel@tonic-gate * then don't retry the message anymore. If message
39910Sstevel@tonic-gate * failed due to any other reason, then retry up to MD_MN_WARN_INTVL
39920Sstevel@tonic-gate * times which should allow a shutting down system time to
39930Sstevel@tonic-gate * notify the kernel of a graceful shutdown of rpc.mdcommd.
39940Sstevel@tonic-gate *
39950Sstevel@tonic-gate * Caller of this routine will need to check the md_mn_commd_present
39960Sstevel@tonic-gate * flag and the failure error in order to determine whether to panic
39970Sstevel@tonic-gate * or not. If md_mn_commd_present is set to 0 and failure error
39980Sstevel@tonic-gate * is RPC_FAILURE, the calling routine should not panic since the
39990Sstevel@tonic-gate * system is in the process of being shutdown.
40000Sstevel@tonic-gate *
40010Sstevel@tonic-gate */
40020Sstevel@tonic-gate
40038452SJohn.Wren.Kennedy@Sun.COM retry_noise_cnt = send_try_cnt = 0;
40048452SJohn.Wren.Kennedy@Sun.COM while (md_mn_is_commd_present_lite()) {
40058452SJohn.Wren.Kennedy@Sun.COM /*
40068452SJohn.Wren.Kennedy@Sun.COM * data_ptr and data_size are initialized here because on
40078452SJohn.Wren.Kennedy@Sun.COM * return from the upcall, they contain data duplicated from
40088452SJohn.Wren.Kennedy@Sun.COM * rbuf and rsize. This causes subsequent upcalls to fail.
40098452SJohn.Wren.Kennedy@Sun.COM */
40108452SJohn.Wren.Kennedy@Sun.COM da.data_ptr = (char *)(kmsg);
40118452SJohn.Wren.Kennedy@Sun.COM da.data_size = sizeof (md_mn_kmsg_t);
40128452SJohn.Wren.Kennedy@Sun.COM da.desc_ptr = NULL;
40138452SJohn.Wren.Kennedy@Sun.COM da.desc_num = 0;
40148452SJohn.Wren.Kennedy@Sun.COM da.rbuf = (char *)result;
40158452SJohn.Wren.Kennedy@Sun.COM da.rsize = sizeof (*result);
40168452SJohn.Wren.Kennedy@Sun.COM
40178452SJohn.Wren.Kennedy@Sun.COM while ((rval = door_ki_upcall_limited(mdmn_door_handle, &da,
40188452SJohn.Wren.Kennedy@Sun.COM NULL, SIZE_MAX, 0)) != 0) {
40198452SJohn.Wren.Kennedy@Sun.COM if ((++retry_noise_cnt % MD_MN_WARN_INTVL) == 0) {
40208452SJohn.Wren.Kennedy@Sun.COM if (rval == EAGAIN) {
40218452SJohn.Wren.Kennedy@Sun.COM cmn_err(CE_WARN,
40228452SJohn.Wren.Kennedy@Sun.COM "md: door_upcall failed. "
40238452SJohn.Wren.Kennedy@Sun.COM "Check if mddoors is running.");
40248452SJohn.Wren.Kennedy@Sun.COM } else if (rval == EINTR) {
40258452SJohn.Wren.Kennedy@Sun.COM cmn_err(CE_WARN,
40268452SJohn.Wren.Kennedy@Sun.COM "md: door_upcall failed. "
40278452SJohn.Wren.Kennedy@Sun.COM "Check if rpc.mdcommd is running.");
40288452SJohn.Wren.Kennedy@Sun.COM } else {
40298452SJohn.Wren.Kennedy@Sun.COM cmn_err(CE_WARN,
40308452SJohn.Wren.Kennedy@Sun.COM "md: door_upcall failed. "
40318452SJohn.Wren.Kennedy@Sun.COM "Returned %d",
40328452SJohn.Wren.Kennedy@Sun.COM rval);
40338452SJohn.Wren.Kennedy@Sun.COM }
40348452SJohn.Wren.Kennedy@Sun.COM }
40358452SJohn.Wren.Kennedy@Sun.COM if (++send_try_cnt >= md_send_retry_limit)
40360Sstevel@tonic-gate break;
40378452SJohn.Wren.Kennedy@Sun.COM
40380Sstevel@tonic-gate delay(md_hz);
40398452SJohn.Wren.Kennedy@Sun.COM
40408452SJohn.Wren.Kennedy@Sun.COM /*
40418452SJohn.Wren.Kennedy@Sun.COM * data_ptr and data_size are re-initialized here
40428452SJohn.Wren.Kennedy@Sun.COM * because on return from the upcall, they contain
40438452SJohn.Wren.Kennedy@Sun.COM * data duplicated from rbuf and rsize. This causes
40448452SJohn.Wren.Kennedy@Sun.COM * subsequent upcalls to fail.
40458452SJohn.Wren.Kennedy@Sun.COM */
40468452SJohn.Wren.Kennedy@Sun.COM da.data_ptr = (char *)(kmsg);
40478452SJohn.Wren.Kennedy@Sun.COM da.data_size = sizeof (md_mn_kmsg_t);
40488452SJohn.Wren.Kennedy@Sun.COM da.desc_ptr = NULL;
40498452SJohn.Wren.Kennedy@Sun.COM da.desc_num = 0;
40508452SJohn.Wren.Kennedy@Sun.COM da.rbuf = (char *)result;
40518452SJohn.Wren.Kennedy@Sun.COM da.rsize = sizeof (*result);
40520Sstevel@tonic-gate }
40538452SJohn.Wren.Kennedy@Sun.COM
40548452SJohn.Wren.Kennedy@Sun.COM
40558452SJohn.Wren.Kennedy@Sun.COM /*
40568452SJohn.Wren.Kennedy@Sun.COM * If:
40578452SJohn.Wren.Kennedy@Sun.COM * - the send succeeded (MDMNE_ACK)
40588452SJohn.Wren.Kennedy@Sun.COM * - we had an MDMNE_RPC_FAIL and commd is now gone
40598452SJohn.Wren.Kennedy@Sun.COM * (note: since the outer loop is commd-dependent,
40608452SJohn.Wren.Kennedy@Sun.COM * checking MDMN_RPC_FAIL here is meaningless)
40618452SJohn.Wren.Kennedy@Sun.COM * - we were told not to retry
40628452SJohn.Wren.Kennedy@Sun.COM * - we exceeded the RPC failure send limit
40638452SJohn.Wren.Kennedy@Sun.COM * punch out of the outer loop prior to the delay()
40648452SJohn.Wren.Kennedy@Sun.COM */
40658452SJohn.Wren.Kennedy@Sun.COM if (result->kmmr_comm_state == MDMNE_ACK ||
40668452SJohn.Wren.Kennedy@Sun.COM (flags & MD_MSGF_KSEND_NORETRY) ||
40678452SJohn.Wren.Kennedy@Sun.COM (++send_try_cnt % md_send_retry_limit) == 0 ||
40688452SJohn.Wren.Kennedy@Sun.COM !md_mn_is_commd_present())
40698452SJohn.Wren.Kennedy@Sun.COM break;
40708452SJohn.Wren.Kennedy@Sun.COM delay(md_hz);
40710Sstevel@tonic-gate }
40720Sstevel@tonic-gate
40738452SJohn.Wren.Kennedy@Sun.COM if (flags & MD_MSGF_BLK_SIGNAL) {
40748452SJohn.Wren.Kennedy@Sun.COM sigreplace(&oldmask, (k_sigset_t *)NULL);
40758452SJohn.Wren.Kennedy@Sun.COM }
40768452SJohn.Wren.Kennedy@Sun.COM kmem_free(kmsg, sizeof (md_mn_kmsg_t));
40778452SJohn.Wren.Kennedy@Sun.COM
40780Sstevel@tonic-gate return (0);
40790Sstevel@tonic-gate }
40800Sstevel@tonic-gate
40810Sstevel@tonic-gate /*
40820Sstevel@tonic-gate * Called to propagate the capability of a metadevice to all nodes in the set.
40830Sstevel@tonic-gate *
40840Sstevel@tonic-gate * On entry, lockp is set if the function has been called from within an ioctl.
40850Sstevel@tonic-gate *
40860Sstevel@tonic-gate * IOLOCK_RETURN_RELEASE, which drops the md_ioctl_lock is called in this
40870Sstevel@tonic-gate * routine to enable other mdioctls to enter the kernel while this
40880Sstevel@tonic-gate * thread of execution waits on the completion of mdmn_ksend_message. When
40890Sstevel@tonic-gate * the message is completed the thread continues and md_ioctl_lock must be
40900Sstevel@tonic-gate * reacquired. Even though md_ioctl_lock is interruptable, we choose to
40910Sstevel@tonic-gate * ignore EINTR as we must not return without acquiring md_ioctl_lock.
40920Sstevel@tonic-gate */
40930Sstevel@tonic-gate
40940Sstevel@tonic-gate int
mdmn_send_capability_message(minor_t mnum,volcap_t vc,IOLOCK * lockp)40950Sstevel@tonic-gate mdmn_send_capability_message(minor_t mnum, volcap_t vc, IOLOCK *lockp)
40960Sstevel@tonic-gate {
40970Sstevel@tonic-gate md_mn_msg_setcap_t msg;
40980Sstevel@tonic-gate md_mn_kresult_t *kres;
40990Sstevel@tonic-gate mdi_unit_t *ui = MDI_UNIT(mnum);
41000Sstevel@tonic-gate int ret;
41013073Sjkennedy k_sigset_t oldmask, newmask;
41020Sstevel@tonic-gate
41030Sstevel@tonic-gate (void) strncpy((char *)&msg.msg_setcap_driver,
41040Sstevel@tonic-gate md_ops[ui->ui_opsindex]->md_driver.md_drivername, MD_DRIVERNAMELEN);
41050Sstevel@tonic-gate msg.msg_setcap_mnum = mnum;
41060Sstevel@tonic-gate msg.msg_setcap_set = vc.vc_set;
41070Sstevel@tonic-gate
41080Sstevel@tonic-gate if (lockp)
41090Sstevel@tonic-gate IOLOCK_RETURN_RELEASE(0, lockp);
4110*11130SJames.Hall@Sun.COM kres = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP);
41113073Sjkennedy
41123073Sjkennedy /*
41133073Sjkennedy * Mask signals for the mdmd_ksend_message call. This keeps the door
41143073Sjkennedy * interface from failing if the user process receives a signal while
41153073Sjkennedy * in mdmn_ksend_message.
41163073Sjkennedy */
41173073Sjkennedy sigfillset(&newmask);
41183073Sjkennedy sigreplace(&newmask, &oldmask);
41190Sstevel@tonic-gate ret = (mdmn_ksend_message(MD_MIN2SET(mnum), MD_MN_MSG_SET_CAP,
41208452SJohn.Wren.Kennedy@Sun.COM MD_MSGF_NO_LOG, 0, (char *)&msg, sizeof (md_mn_msg_setcap_t),
41210Sstevel@tonic-gate kres));
41223073Sjkennedy sigreplace(&oldmask, (k_sigset_t *)NULL);
41233073Sjkennedy
41240Sstevel@tonic-gate if (!MDMN_KSEND_MSG_OK(ret, kres)) {
41250Sstevel@tonic-gate mdmn_ksend_show_error(ret, kres, "MD_MN_MSG_SET_CAP");
41260Sstevel@tonic-gate ret = EIO;
41270Sstevel@tonic-gate }
41280Sstevel@tonic-gate kmem_free(kres, sizeof (md_mn_kresult_t));
41290Sstevel@tonic-gate
41300Sstevel@tonic-gate if (lockp) {
41310Sstevel@tonic-gate IOLOCK_RETURN_REACQUIRE(lockp);
41320Sstevel@tonic-gate }
41330Sstevel@tonic-gate return (ret);
41340Sstevel@tonic-gate }
41350Sstevel@tonic-gate
41360Sstevel@tonic-gate /*
41370Sstevel@tonic-gate * Called to clear all of the transient capabilities for a metadevice when it is
41380Sstevel@tonic-gate * not open on any node in the cluster
41390Sstevel@tonic-gate * Called from close for mirror and sp.
41400Sstevel@tonic-gate */
41410Sstevel@tonic-gate
41420Sstevel@tonic-gate void
mdmn_clear_all_capabilities(minor_t mnum)41430Sstevel@tonic-gate mdmn_clear_all_capabilities(minor_t mnum)
41440Sstevel@tonic-gate {
41450Sstevel@tonic-gate md_isopen_t clumsg;
41460Sstevel@tonic-gate int ret;
41470Sstevel@tonic-gate md_mn_kresult_t *kresult;
41480Sstevel@tonic-gate volcap_t vc;
41493073Sjkennedy k_sigset_t oldmask, newmask;
41500Sstevel@tonic-gate
41510Sstevel@tonic-gate clumsg.dev = md_makedevice(md_major, mnum);
41520Sstevel@tonic-gate clumsg.mde = mdnullerror;
41530Sstevel@tonic-gate /*
41540Sstevel@tonic-gate * The check open message doesn't have to be logged, nor should the
41550Sstevel@tonic-gate * result be stored in the MCT. We want an up-to-date state.
41560Sstevel@tonic-gate */
4157*11130SJames.Hall@Sun.COM kresult = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP);
41583073Sjkennedy
41593073Sjkennedy /*
41603073Sjkennedy * Mask signals for the mdmd_ksend_message call. This keeps the door
41613073Sjkennedy * interface from failing if the user process receives a signal while
41623073Sjkennedy * in mdmn_ksend_message.
41633073Sjkennedy */
41643073Sjkennedy sigfillset(&newmask);
41653073Sjkennedy sigreplace(&newmask, &oldmask);
41660Sstevel@tonic-gate ret = mdmn_ksend_message(MD_MIN2SET(mnum),
41670Sstevel@tonic-gate MD_MN_MSG_CLU_CHECK,
41688452SJohn.Wren.Kennedy@Sun.COM MD_MSGF_STOP_ON_ERROR | MD_MSGF_NO_LOG | MD_MSGF_NO_MCT, 0,
41690Sstevel@tonic-gate (char *)&clumsg, sizeof (clumsg), kresult);
41703073Sjkennedy sigreplace(&oldmask, (k_sigset_t *)NULL);
41713073Sjkennedy
41720Sstevel@tonic-gate if ((ret == 0) && (kresult->kmmr_exitval == 0)) {
41730Sstevel@tonic-gate /*
41740Sstevel@tonic-gate * Not open on any node, clear all capabilities, eg ABR and
41750Sstevel@tonic-gate * DMR
41760Sstevel@tonic-gate */
41770Sstevel@tonic-gate vc.vc_set = 0;
41780Sstevel@tonic-gate (void) mdmn_send_capability_message(mnum, vc, NULL);
41790Sstevel@tonic-gate }
41800Sstevel@tonic-gate kmem_free(kresult, sizeof (md_mn_kresult_t));
41810Sstevel@tonic-gate }
41820Sstevel@tonic-gate
41830Sstevel@tonic-gate /*
41840Sstevel@tonic-gate * mdmn_ksend_show_error:
41850Sstevel@tonic-gate * ---------------------
41860Sstevel@tonic-gate * Called to display the error contents of a failing mdmn_ksend_message() result
41870Sstevel@tonic-gate *
41880Sstevel@tonic-gate * Input:
41890Sstevel@tonic-gate * rv - return value from mdmn_ksend_message()
41900Sstevel@tonic-gate * kres - pointer to result structure filled in by mdmn_ksend_message
41910Sstevel@tonic-gate * s - Informative message to identify failing condition (e.g.
41920Sstevel@tonic-gate * "Ownership change") This string will be displayed with
41930Sstevel@tonic-gate * cmn_err(CE_WARN, "%s *FAILED*",...) to alert the system
41940Sstevel@tonic-gate * administrator
41950Sstevel@tonic-gate */
41960Sstevel@tonic-gate void
mdmn_ksend_show_error(int rv,md_mn_kresult_t * kres,const char * s)41970Sstevel@tonic-gate mdmn_ksend_show_error(int rv, md_mn_kresult_t *kres, const char *s)
41980Sstevel@tonic-gate {
41990Sstevel@tonic-gate if (rv == 0) {
42000Sstevel@tonic-gate cmn_err(CE_WARN, "%s *FAILED*", s);
42010Sstevel@tonic-gate cmn_err(CE_CONT, "exit_val = %d, comm_state = %d, failing_node"
42020Sstevel@tonic-gate " = %d", kres->kmmr_exitval, kres->kmmr_comm_state,
42030Sstevel@tonic-gate kres->kmmr_failing_node);
42040Sstevel@tonic-gate } else {
42050Sstevel@tonic-gate cmn_err(CE_WARN, "%s *FAILED*, return value = %d", s, rv);
42060Sstevel@tonic-gate }
42070Sstevel@tonic-gate }
42080Sstevel@tonic-gate
42090Sstevel@tonic-gate /*
42100Sstevel@tonic-gate * Callback routine for resync thread. If requested to suspend we mark the
42110Sstevel@tonic-gate * commd as not being present.
42120Sstevel@tonic-gate */
42130Sstevel@tonic-gate boolean_t
callb_md_mrs_cpr(void * arg,int code)42140Sstevel@tonic-gate callb_md_mrs_cpr(void *arg, int code)
42150Sstevel@tonic-gate {
42160Sstevel@tonic-gate callb_cpr_t *cp = (callb_cpr_t *)arg;
42170Sstevel@tonic-gate int ret = 0; /* assume success */
421811066Srafael.vanoni@sun.com clock_t delta;
42190Sstevel@tonic-gate
42200Sstevel@tonic-gate mutex_enter(cp->cc_lockp);
42210Sstevel@tonic-gate
42220Sstevel@tonic-gate switch (code) {
42230Sstevel@tonic-gate case CB_CODE_CPR_CHKPT:
42240Sstevel@tonic-gate /*
42250Sstevel@tonic-gate * Mark the rpc.mdcommd as no longer present. We are trying to
42260Sstevel@tonic-gate * suspend the system and so we should expect RPC failures to
42270Sstevel@tonic-gate * occur.
42280Sstevel@tonic-gate */
42290Sstevel@tonic-gate md_mn_clear_commd_present();
42300Sstevel@tonic-gate cp->cc_events |= CALLB_CPR_START;
423111066Srafael.vanoni@sun.com delta = CPR_KTHREAD_TIMEOUT_SEC * hz;
42320Sstevel@tonic-gate while (!(cp->cc_events & CALLB_CPR_SAFE))
42330Sstevel@tonic-gate /* cv_timedwait() returns -1 if it times out. */
423411066Srafael.vanoni@sun.com if ((ret = cv_reltimedwait(&cp->cc_callb_cv,
423511066Srafael.vanoni@sun.com cp->cc_lockp, delta, TR_CLOCK_TICK)) == -1)
42360Sstevel@tonic-gate break;
42370Sstevel@tonic-gate break;
42380Sstevel@tonic-gate
42390Sstevel@tonic-gate case CB_CODE_CPR_RESUME:
42400Sstevel@tonic-gate cp->cc_events &= ~CALLB_CPR_START;
42410Sstevel@tonic-gate cv_signal(&cp->cc_stop_cv);
42420Sstevel@tonic-gate break;
42430Sstevel@tonic-gate }
42440Sstevel@tonic-gate mutex_exit(cp->cc_lockp);
42450Sstevel@tonic-gate return (ret != -1);
42460Sstevel@tonic-gate }
42471623Stw21770
42481623Stw21770
42491623Stw21770 void
md_rem_hspname(set_t setno,mdkey_t n_key)42501623Stw21770 md_rem_hspname(set_t setno, mdkey_t n_key)
42511623Stw21770 {
42521623Stw21770 int s;
42531623Stw21770 int max_sides;
42541623Stw21770
42551623Stw21770
42561623Stw21770 /* All entries removed are in the same diskset */
42571623Stw21770 if (md_get_setstatus(setno) & MD_SET_MNSET)
42581623Stw21770 max_sides = MD_MNMAXSIDES;
42591623Stw21770 else
42601623Stw21770 max_sides = MD_MAXSIDES;
42611623Stw21770
42621623Stw21770 for (s = 0; s < max_sides; s++)
42631623Stw21770 (void) md_remdevname(setno, s, n_key);
42641623Stw21770 }
42651623Stw21770
42661623Stw21770
42671623Stw21770 int
md_rem_selfname(minor_t selfid)42681623Stw21770 md_rem_selfname(minor_t selfid)
42691623Stw21770 {
42701623Stw21770 int s;
42711623Stw21770 set_t setno = MD_MIN2SET(selfid);
42721623Stw21770 int max_sides;
42731623Stw21770 md_dev64_t dev;
42741623Stw21770 struct nm_next_hdr *nh;
42751623Stw21770 struct nm_name *n;
42761623Stw21770 mdkey_t key;
42771623Stw21770
42781623Stw21770 /*
42791623Stw21770 * Get the key since remove routine expects it
42801623Stw21770 */
42811623Stw21770 dev = md_makedevice(md_major, selfid);
42821623Stw21770 if ((nh = get_first_record(setno, 0, NM_NOTSHARED)) == NULL) {
42831623Stw21770 return (ENOENT);
42841623Stw21770 }
42851623Stw21770
42861623Stw21770 if ((n = (struct nm_name *)lookup_entry(nh, setno, MD_SIDEWILD,
42877563SPrasad.Singamsetty@Sun.COM MD_KEYWILD, dev, 0L)) == NULL) {
42881623Stw21770 return (ENOENT);
42891623Stw21770 }
42901623Stw21770
42911623Stw21770 /* All entries removed are in the same diskset */
42921623Stw21770 key = n->n_key;
42931623Stw21770 if (md_get_setstatus(setno) & MD_SET_MNSET)
42941623Stw21770 max_sides = MD_MNMAXSIDES;
42951623Stw21770 else
42961623Stw21770 max_sides = MD_MAXSIDES;
42971623Stw21770
42981623Stw21770 for (s = 0; s < max_sides; s++)
42991623Stw21770 (void) md_remdevname(setno, s, key);
43001623Stw21770
43011623Stw21770 return (0);
43021623Stw21770 }
43031623Stw21770
43041623Stw21770 void
md_upd_set_unnext(set_t setno,unit_t un)43051623Stw21770 md_upd_set_unnext(set_t setno, unit_t un)
43061623Stw21770 {
43071623Stw21770 if (un < md_set[setno].s_un_next) {
43081623Stw21770 md_set[setno].s_un_next = un;
43091623Stw21770 }
43101623Stw21770 }
43111623Stw21770
43121623Stw21770 struct hot_spare_pool *
find_hot_spare_pool(set_t setno,int hsp_id)43131623Stw21770 find_hot_spare_pool(set_t setno, int hsp_id)
43141623Stw21770 {
43151623Stw21770 hot_spare_pool_t *hsp;
43161623Stw21770
43171623Stw21770 hsp = (hot_spare_pool_t *)md_set[setno].s_hsp;
43181623Stw21770 while (hsp != NULL) {
43191623Stw21770 if (hsp->hsp_self_id == hsp_id)
43201623Stw21770 return (hsp);
43211623Stw21770 hsp = hsp->hsp_next;
43221623Stw21770 }
43231623Stw21770
43241623Stw21770 return ((hot_spare_pool_t *)0);
43251623Stw21770 }
43268452SJohn.Wren.Kennedy@Sun.COM
43278452SJohn.Wren.Kennedy@Sun.COM /*
43288452SJohn.Wren.Kennedy@Sun.COM * md_create_taskq:
43298452SJohn.Wren.Kennedy@Sun.COM *
43308452SJohn.Wren.Kennedy@Sun.COM * Create a kernel taskq for the given set/unit combination. This is typically
43318452SJohn.Wren.Kennedy@Sun.COM * used to complete a RR_CLEAN request when the callee is unable to obtain the
43328452SJohn.Wren.Kennedy@Sun.COM * mutex / condvar access required to update the DRL safely.
43338452SJohn.Wren.Kennedy@Sun.COM */
43348452SJohn.Wren.Kennedy@Sun.COM void *
md_create_taskq(set_t setno,minor_t mnum)43358452SJohn.Wren.Kennedy@Sun.COM md_create_taskq(set_t setno, minor_t mnum)
43368452SJohn.Wren.Kennedy@Sun.COM {
43378452SJohn.Wren.Kennedy@Sun.COM char name[20];
43388452SJohn.Wren.Kennedy@Sun.COM ddi_taskq_t *tqp;
43398452SJohn.Wren.Kennedy@Sun.COM
43408452SJohn.Wren.Kennedy@Sun.COM (void) snprintf(name, 20, "%d/d%d", setno, MD_MIN2UNIT(mnum));
43418452SJohn.Wren.Kennedy@Sun.COM
43428452SJohn.Wren.Kennedy@Sun.COM tqp = ddi_taskq_create(md_devinfo, name, 1, TASKQ_DEFAULTPRI, 0);
43438452SJohn.Wren.Kennedy@Sun.COM
43448452SJohn.Wren.Kennedy@Sun.COM return ((void *)tqp);
43458452SJohn.Wren.Kennedy@Sun.COM }
4346