10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
51366Spetede * Common Development and Distribution License (the "License").
61366Spetede * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
217627SChris.Horne@Sun.COM
220Sstevel@tonic-gate /*
23*12678SJames.Hall@Sun.COM * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
240Sstevel@tonic-gate */
250Sstevel@tonic-gate
260Sstevel@tonic-gate /*
270Sstevel@tonic-gate * Md - is the meta-disk driver. It sits below the UFS file system
280Sstevel@tonic-gate * but above the 'real' disk drivers, xy, id, sd etc.
290Sstevel@tonic-gate *
300Sstevel@tonic-gate * To the UFS software, md looks like a normal driver, since it has
310Sstevel@tonic-gate * the normal kinds of entries in the bdevsw and cdevsw arrays. So
320Sstevel@tonic-gate * UFS accesses md in the usual ways. In particular, the strategy
330Sstevel@tonic-gate * routine, mdstrategy(), gets called by fbiwrite(), ufs_getapage(),
340Sstevel@tonic-gate * and ufs_writelbn().
350Sstevel@tonic-gate *
360Sstevel@tonic-gate * Md maintains an array of minor devices (meta-partitions). Each
370Sstevel@tonic-gate * meta partition stands for a matrix of real partitions, in rows
380Sstevel@tonic-gate * which are not necessarily of equal length. Md maintains a table,
390Sstevel@tonic-gate * with one entry for each meta-partition, which lists the rows and
400Sstevel@tonic-gate * columns of actual partitions, and the job of the strategy routine
410Sstevel@tonic-gate * is to translate from the meta-partition device and block numbers
420Sstevel@tonic-gate * known to UFS into the actual partitions' device and block numbers.
430Sstevel@tonic-gate *
440Sstevel@tonic-gate * See below, in mdstrategy(), mdreal(), and mddone() for details of
450Sstevel@tonic-gate * this translation.
460Sstevel@tonic-gate */
470Sstevel@tonic-gate
480Sstevel@tonic-gate /*
490Sstevel@tonic-gate * Driver for Virtual Disk.
500Sstevel@tonic-gate */
510Sstevel@tonic-gate
520Sstevel@tonic-gate #include <sys/user.h>
530Sstevel@tonic-gate #include <sys/sysmacros.h>
540Sstevel@tonic-gate #include <sys/conf.h>
550Sstevel@tonic-gate #include <sys/stat.h>
560Sstevel@tonic-gate #include <sys/errno.h>
570Sstevel@tonic-gate #include <sys/param.h>
580Sstevel@tonic-gate #include <sys/systm.h>
590Sstevel@tonic-gate #include <sys/file.h>
600Sstevel@tonic-gate #include <sys/open.h>
610Sstevel@tonic-gate #include <sys/dkio.h>
620Sstevel@tonic-gate #include <sys/vtoc.h>
630Sstevel@tonic-gate #include <sys/cmn_err.h>
640Sstevel@tonic-gate #include <sys/ddi.h>
650Sstevel@tonic-gate #include <sys/sunddi.h>
660Sstevel@tonic-gate #include <sys/debug.h>
670Sstevel@tonic-gate #include <sys/utsname.h>
680Sstevel@tonic-gate #include <sys/lvm/mdvar.h>
690Sstevel@tonic-gate #include <sys/lvm/md_names.h>
700Sstevel@tonic-gate #include <sys/lvm/md_mddb.h>
710Sstevel@tonic-gate #include <sys/lvm/md_sp.h>
720Sstevel@tonic-gate #include <sys/types.h>
730Sstevel@tonic-gate #include <sys/kmem.h>
740Sstevel@tonic-gate #include <sys/cladm.h>
750Sstevel@tonic-gate #include <sys/priv_names.h>
767627SChris.Horne@Sun.COM #include <sys/modhash.h>
770Sstevel@tonic-gate
780Sstevel@tonic-gate #ifndef lint
791366Spetede char _depends_on[] = "strmod/rpcmod";
800Sstevel@tonic-gate #endif /* lint */
810Sstevel@tonic-gate int md_init_debug = 0; /* module binding debug */
820Sstevel@tonic-gate
830Sstevel@tonic-gate /*
840Sstevel@tonic-gate * Tunable to turn off the failfast behavior.
850Sstevel@tonic-gate */
860Sstevel@tonic-gate int md_ff_disable = 0;
870Sstevel@tonic-gate
882063Shshaw /*
892063Shshaw * dynamically allocated list of non FF driver names - needs to
902063Shshaw * be freed when md is detached.
912063Shshaw */
922063Shshaw char **non_ff_drivers = NULL;
932063Shshaw
940Sstevel@tonic-gate md_krwlock_t md_unit_array_rw; /* protects all unit arrays */
950Sstevel@tonic-gate md_krwlock_t nm_lock; /* protects all the name spaces */
960Sstevel@tonic-gate
970Sstevel@tonic-gate md_resync_t md_cpr_resync;
980Sstevel@tonic-gate
990Sstevel@tonic-gate extern char svm_bootpath[];
1000Sstevel@tonic-gate #define SVM_PSEUDO_STR "/pseudo/md@0:"
1010Sstevel@tonic-gate
1020Sstevel@tonic-gate #define VERSION_LENGTH 6
1030Sstevel@tonic-gate #define VERSION "1.0"
1040Sstevel@tonic-gate
1050Sstevel@tonic-gate /*
1060Sstevel@tonic-gate * Keep track of possible 'orphan' entries in the name space
1070Sstevel@tonic-gate */
1080Sstevel@tonic-gate int *md_nm_snarfed = NULL;
1090Sstevel@tonic-gate
1100Sstevel@tonic-gate /*
1110Sstevel@tonic-gate * Global tunable giving the percentage of free space left in replica during
1120Sstevel@tonic-gate * conversion of non-devid style replica to devid style replica.
1130Sstevel@tonic-gate */
1140Sstevel@tonic-gate int md_conv_perc = MDDB_DEVID_CONV_PERC;
1150Sstevel@tonic-gate
1160Sstevel@tonic-gate #ifdef DEBUG
1170Sstevel@tonic-gate /* debug code to verify framework exclusion guarantees */
1180Sstevel@tonic-gate int md_in;
1190Sstevel@tonic-gate kmutex_t md_in_mx; /* used to md global stuff */
1200Sstevel@tonic-gate #define IN_INIT 0x01
1210Sstevel@tonic-gate #define IN_FINI 0x02
1220Sstevel@tonic-gate #define IN_ATTACH 0x04
1230Sstevel@tonic-gate #define IN_DETACH 0x08
1240Sstevel@tonic-gate #define IN_OPEN 0x10
1250Sstevel@tonic-gate #define MD_SET_IN(x) { \
1260Sstevel@tonic-gate mutex_enter(&md_in_mx); \
1270Sstevel@tonic-gate if (md_in) \
1280Sstevel@tonic-gate debug_enter("MD_SET_IN exclusion lost"); \
1290Sstevel@tonic-gate if (md_in & x) \
1300Sstevel@tonic-gate debug_enter("MD_SET_IN already set"); \
1310Sstevel@tonic-gate md_in |= x; \
1320Sstevel@tonic-gate mutex_exit(&md_in_mx); \
1330Sstevel@tonic-gate }
1340Sstevel@tonic-gate
1350Sstevel@tonic-gate #define MD_CLR_IN(x) { \
1360Sstevel@tonic-gate mutex_enter(&md_in_mx); \
1370Sstevel@tonic-gate if (md_in & ~(x)) \
1380Sstevel@tonic-gate debug_enter("MD_CLR_IN exclusion lost"); \
1390Sstevel@tonic-gate if (!(md_in & x)) \
1400Sstevel@tonic-gate debug_enter("MD_CLR_IN already clr"); \
1410Sstevel@tonic-gate md_in &= ~x; \
1420Sstevel@tonic-gate mutex_exit(&md_in_mx); \
1430Sstevel@tonic-gate }
1440Sstevel@tonic-gate #else /* DEBUG */
1450Sstevel@tonic-gate #define MD_SET_IN(x)
1460Sstevel@tonic-gate #define MD_CLR_IN(x)
1470Sstevel@tonic-gate #endif /* DEBUG */
1480Sstevel@tonic-gate hrtime_t savetime1, savetime2;
1490Sstevel@tonic-gate
1500Sstevel@tonic-gate
1510Sstevel@tonic-gate /*
1520Sstevel@tonic-gate * list things protected by md_mx even if they aren't
1530Sstevel@tonic-gate * used in this file.
1540Sstevel@tonic-gate */
1550Sstevel@tonic-gate kmutex_t md_mx; /* used to md global stuff */
1560Sstevel@tonic-gate kcondvar_t md_cv; /* md_status events */
1570Sstevel@tonic-gate int md_status = 0; /* global status for the meta-driver */
1580Sstevel@tonic-gate int md_num_daemons = 0;
1590Sstevel@tonic-gate int md_ioctl_cnt = 0;
1600Sstevel@tonic-gate int md_mtioctl_cnt = 0; /* multithreaded ioctl cnt */
1610Sstevel@tonic-gate uint_t md_mdelay = 10; /* variable so can be patched */
1620Sstevel@tonic-gate
1630Sstevel@tonic-gate int (*mdv_strategy_tstpnt)(buf_t *, int, void*);
1640Sstevel@tonic-gate
1650Sstevel@tonic-gate major_t md_major, md_major_targ;
1660Sstevel@tonic-gate
1670Sstevel@tonic-gate unit_t md_nunits = MD_MAXUNITS;
1680Sstevel@tonic-gate set_t md_nsets = MD_MAXSETS;
1690Sstevel@tonic-gate int md_nmedh = 0;
1700Sstevel@tonic-gate char *md_med_trans_lst = NULL;
1710Sstevel@tonic-gate md_set_t md_set[MD_MAXSETS];
1720Sstevel@tonic-gate md_set_io_t md_set_io[MD_MAXSETS];
1730Sstevel@tonic-gate
1740Sstevel@tonic-gate md_krwlock_t hsp_rwlp; /* protects hot_spare_interface */
1750Sstevel@tonic-gate md_krwlock_t ni_rwlp; /* protects notify_interface */
1763036Seota md_ops_t **md_ops = NULL;
1773036Seota ddi_modhandle_t *md_mods = NULL;
1780Sstevel@tonic-gate md_ops_t *md_opslist;
1790Sstevel@tonic-gate clock_t md_hz;
1800Sstevel@tonic-gate md_event_queue_t *md_event_queue = NULL;
1810Sstevel@tonic-gate
1820Sstevel@tonic-gate int md_in_upgrade;
1830Sstevel@tonic-gate int md_keep_repl_state;
1840Sstevel@tonic-gate int md_devid_destroy;
1850Sstevel@tonic-gate
1860Sstevel@tonic-gate /* for sending messages thru a door to userland */
1870Sstevel@tonic-gate door_handle_t mdmn_door_handle = NULL;
1880Sstevel@tonic-gate int mdmn_door_did = -1;
1890Sstevel@tonic-gate
1900Sstevel@tonic-gate dev_info_t *md_devinfo = NULL;
1910Sstevel@tonic-gate
1920Sstevel@tonic-gate md_mn_nodeid_t md_mn_mynode_id = ~0u; /* My node id (for multi-node sets) */
1930Sstevel@tonic-gate
1940Sstevel@tonic-gate static uint_t md_ocnt[OTYPCNT];
1950Sstevel@tonic-gate
1960Sstevel@tonic-gate static int mdinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
1970Sstevel@tonic-gate static int mdattach(dev_info_t *, ddi_attach_cmd_t);
1980Sstevel@tonic-gate static int mddetach(dev_info_t *, ddi_detach_cmd_t);
1990Sstevel@tonic-gate static int mdopen(dev_t *, int, int, cred_t *);
2000Sstevel@tonic-gate static int mdclose(dev_t, int, int, cred_t *);
2010Sstevel@tonic-gate static int mddump(dev_t, caddr_t, daddr_t, int);
2020Sstevel@tonic-gate static int mdread(dev_t, struct uio *, cred_t *);
2030Sstevel@tonic-gate static int mdwrite(dev_t, struct uio *, cred_t *);
2040Sstevel@tonic-gate static int mdaread(dev_t, struct aio_req *, cred_t *);
2050Sstevel@tonic-gate static int mdawrite(dev_t, struct aio_req *, cred_t *);
2060Sstevel@tonic-gate static int mdioctl(dev_t, int, intptr_t, int, cred_t *, int *);
2070Sstevel@tonic-gate static int mdprop_op(dev_t, dev_info_t *,
2080Sstevel@tonic-gate ddi_prop_op_t, int, char *, caddr_t, int *);
2090Sstevel@tonic-gate
2100Sstevel@tonic-gate static struct cb_ops md_cb_ops = {
2110Sstevel@tonic-gate mdopen, /* open */
2120Sstevel@tonic-gate mdclose, /* close */
2130Sstevel@tonic-gate mdstrategy, /* strategy */
2140Sstevel@tonic-gate /* print routine -- none yet */
2150Sstevel@tonic-gate (int(*)(dev_t, char *))nulldev,
2160Sstevel@tonic-gate mddump, /* dump */
2170Sstevel@tonic-gate mdread, /* read */
2180Sstevel@tonic-gate mdwrite, /* write */
2190Sstevel@tonic-gate mdioctl, /* ioctl */
2200Sstevel@tonic-gate /* devmap */
2210Sstevel@tonic-gate (int(*)(dev_t, devmap_cookie_t, offset_t, size_t, size_t *,
2220Sstevel@tonic-gate uint_t))nodev,
2230Sstevel@tonic-gate /* mmap */
2240Sstevel@tonic-gate (int(*)(dev_t, off_t, int))nodev,
2250Sstevel@tonic-gate /* segmap */
2260Sstevel@tonic-gate (int(*)(dev_t, off_t, struct as *, caddr_t *, off_t, unsigned,
2270Sstevel@tonic-gate unsigned, unsigned, cred_t *))nodev,
2280Sstevel@tonic-gate nochpoll, /* poll */
2290Sstevel@tonic-gate mdprop_op, /* prop_op */
2300Sstevel@tonic-gate 0, /* streamtab */
2310Sstevel@tonic-gate (D_64BIT|D_MP|D_NEW), /* driver compatibility flag */
2320Sstevel@tonic-gate CB_REV, /* cb_ops version */
2330Sstevel@tonic-gate mdaread, /* aread */
2340Sstevel@tonic-gate mdawrite, /* awrite */
2350Sstevel@tonic-gate };
2360Sstevel@tonic-gate
2370Sstevel@tonic-gate static struct dev_ops md_devops = {
2380Sstevel@tonic-gate DEVO_REV, /* dev_ops version */
2390Sstevel@tonic-gate 0, /* device reference count */
2400Sstevel@tonic-gate mdinfo, /* info routine */
2410Sstevel@tonic-gate nulldev, /* identify routine */
2420Sstevel@tonic-gate nulldev, /* probe - not defined */
2430Sstevel@tonic-gate mdattach, /* attach routine */
2440Sstevel@tonic-gate mddetach, /* detach routine */
2450Sstevel@tonic-gate nodev, /* reset - not defined */
2460Sstevel@tonic-gate &md_cb_ops, /* driver operations */
2470Sstevel@tonic-gate NULL, /* bus operations */
2487656SSherry.Moore@Sun.COM nodev, /* power management */
2497656SSherry.Moore@Sun.COM ddi_quiesce_not_needed, /* quiesce */
2500Sstevel@tonic-gate };
2510Sstevel@tonic-gate
2520Sstevel@tonic-gate /*
2530Sstevel@tonic-gate * loadable module wrapper
2540Sstevel@tonic-gate */
2550Sstevel@tonic-gate #include <sys/modctl.h>
2560Sstevel@tonic-gate
2570Sstevel@tonic-gate static struct modldrv modldrv = {
2580Sstevel@tonic-gate &mod_driverops, /* type of module -- a pseudodriver */
2594932Spetede "Solaris Volume Manager base module", /* name of the module */
2600Sstevel@tonic-gate &md_devops, /* driver ops */
2610Sstevel@tonic-gate };
2620Sstevel@tonic-gate
2630Sstevel@tonic-gate static struct modlinkage modlinkage = {
2640Sstevel@tonic-gate MODREV_1,
2650Sstevel@tonic-gate (void *)&modldrv,
2660Sstevel@tonic-gate NULL
2670Sstevel@tonic-gate };
2680Sstevel@tonic-gate
2690Sstevel@tonic-gate
2700Sstevel@tonic-gate /* md_medd.c */
2710Sstevel@tonic-gate extern void med_init(void);
2720Sstevel@tonic-gate extern void med_fini(void);
2730Sstevel@tonic-gate extern void md_devid_cleanup(set_t, uint_t);
2740Sstevel@tonic-gate
2750Sstevel@tonic-gate /* md_names.c */
2760Sstevel@tonic-gate extern struct nm_next_hdr *get_first_record(set_t, int, int);
2770Sstevel@tonic-gate
2780Sstevel@tonic-gate int md_maxphys = 0; /* maximum io size in bytes */
2790Sstevel@tonic-gate #define MD_MAXBCOUNT (1024 * 1024)
2800Sstevel@tonic-gate unsigned md_maxbcount = 0; /* maximum physio size in bytes */
2810Sstevel@tonic-gate
2827627SChris.Horne@Sun.COM /*
2837627SChris.Horne@Sun.COM * Some md ioctls trigger io framework device tree operations. An
2847627SChris.Horne@Sun.COM * example is md ioctls that call md_resolve_bydevid(): which uses the
2857627SChris.Horne@Sun.COM * io framework to resolve a devid. Such operations result in acquiring
2867627SChris.Horne@Sun.COM * io framework locks (like ndi_devi_enter() of "/") while holding
2877627SChris.Horne@Sun.COM * driver locks (like md_unit_writerlock()).
2887627SChris.Horne@Sun.COM *
2897627SChris.Horne@Sun.COM * The prop_op(9E) entry point is called from the devinfo driver with
2907627SChris.Horne@Sun.COM * an active ndi_devi_enter of "/". To avoid deadlock, md's prop_op
2917627SChris.Horne@Sun.COM * implementation must avoid taking a lock that is held per above md
2927627SChris.Horne@Sun.COM * ioctl description: i.e. mdprop_op(9E) can't call md_unit_readerlock()
2937627SChris.Horne@Sun.COM * without risking deadlock.
2947627SChris.Horne@Sun.COM *
2957627SChris.Horne@Sun.COM * To service "size" requests without risking deadlock, we maintain a
2967627SChris.Horne@Sun.COM * "mnum->nblocks" sizemap (protected by a short-term global mutex).
2977627SChris.Horne@Sun.COM */
2987627SChris.Horne@Sun.COM static kmutex_t md_nblocks_mutex;
2997627SChris.Horne@Sun.COM static mod_hash_t *md_nblocksmap; /* mnum -> nblocks */
3007627SChris.Horne@Sun.COM int md_nblocksmap_size = 512;
3017627SChris.Horne@Sun.COM
3027627SChris.Horne@Sun.COM /*
3037627SChris.Horne@Sun.COM * Maintain "mnum->nblocks" sizemap for mdprop_op use:
3047627SChris.Horne@Sun.COM *
3057627SChris.Horne@Sun.COM * Create: any code that establishes a unit's un_total_blocks needs the
3067627SChris.Horne@Sun.COM * following type of call to establish nblocks for mdprop_op():
3077627SChris.Horne@Sun.COM * md_nblocks_set(mnum, un->c.un_total_blocks);"
3087627SChris.Horne@Sun.COM * NOTE: locate via cscope md_create_minor_node/md_create_unit_incore
3097627SChris.Horne@Sun.COM * ...or "MD_UNIT..*="
3107627SChris.Horne@Sun.COM *
3117627SChris.Horne@Sun.COM * Change: any code that changes a unit's un_total_blocks needs the
3127627SChris.Horne@Sun.COM * following type of call to sync nblocks for mdprop_op():
3137627SChris.Horne@Sun.COM * md_nblocks_set(mnum, un->c.un_total_blocks);"
3147627SChris.Horne@Sun.COM * NOTE: locate via cscope for "un_total_blocks[ \t]*="
3157627SChris.Horne@Sun.COM *
3167627SChris.Horne@Sun.COM * Destroy: any code that deletes a unit needs the following type of call
3177627SChris.Horne@Sun.COM * to sync nblocks for mdprop_op():
3187627SChris.Horne@Sun.COM * md_nblocks_set(mnum, -1ULL);
3197627SChris.Horne@Sun.COM * NOTE: locate via cscope md_remove_minor_node/md_destroy_unit_incore
3207627SChris.Horne@Sun.COM * ...or "MD_UNIT..*="
3217627SChris.Horne@Sun.COM */
3227627SChris.Horne@Sun.COM void
md_nblocks_set(minor_t mnum,uint64_t nblocks)3237627SChris.Horne@Sun.COM md_nblocks_set(minor_t mnum, uint64_t nblocks)
3247627SChris.Horne@Sun.COM {
3257627SChris.Horne@Sun.COM mutex_enter(&md_nblocks_mutex);
3267627SChris.Horne@Sun.COM if (nblocks == -1ULL)
3277627SChris.Horne@Sun.COM (void) mod_hash_destroy(md_nblocksmap,
3287627SChris.Horne@Sun.COM (mod_hash_key_t)(intptr_t)mnum);
3297627SChris.Horne@Sun.COM else
3307627SChris.Horne@Sun.COM (void) mod_hash_replace(md_nblocksmap,
3317627SChris.Horne@Sun.COM (mod_hash_key_t)(intptr_t)mnum,
3327627SChris.Horne@Sun.COM (mod_hash_val_t)(intptr_t)nblocks);
3337627SChris.Horne@Sun.COM mutex_exit(&md_nblocks_mutex);
3347627SChris.Horne@Sun.COM }
3357627SChris.Horne@Sun.COM
3367627SChris.Horne@Sun.COM /* get the size of a mnum from "mnum->nblocks" sizemap */
3377627SChris.Horne@Sun.COM uint64_t
md_nblocks_get(minor_t mnum)3387627SChris.Horne@Sun.COM md_nblocks_get(minor_t mnum)
3397627SChris.Horne@Sun.COM {
3407627SChris.Horne@Sun.COM mod_hash_val_t hv;
3417627SChris.Horne@Sun.COM
3427627SChris.Horne@Sun.COM mutex_enter(&md_nblocks_mutex);
3437627SChris.Horne@Sun.COM if (mod_hash_find(md_nblocksmap,
3447627SChris.Horne@Sun.COM (mod_hash_key_t)(intptr_t)mnum, &hv) == 0) {
3457627SChris.Horne@Sun.COM mutex_exit(&md_nblocks_mutex);
3467627SChris.Horne@Sun.COM return ((uint64_t)(intptr_t)hv);
3477627SChris.Horne@Sun.COM }
3487627SChris.Horne@Sun.COM mutex_exit(&md_nblocks_mutex);
3497627SChris.Horne@Sun.COM return (0);
3507627SChris.Horne@Sun.COM }
3517627SChris.Horne@Sun.COM
3520Sstevel@tonic-gate /* allocate/free dynamic space associated with driver globals */
3530Sstevel@tonic-gate void
md_global_alloc_free(int alloc)3540Sstevel@tonic-gate md_global_alloc_free(int alloc)
3550Sstevel@tonic-gate {
3560Sstevel@tonic-gate set_t s;
3570Sstevel@tonic-gate
3580Sstevel@tonic-gate if (alloc) {
3590Sstevel@tonic-gate /* initialize driver global locks */
3600Sstevel@tonic-gate cv_init(&md_cv, NULL, CV_DEFAULT, NULL);
3610Sstevel@tonic-gate mutex_init(&md_mx, NULL, MUTEX_DEFAULT, NULL);
3620Sstevel@tonic-gate rw_init(&md_unit_array_rw.lock, NULL, RW_DEFAULT, NULL);
3630Sstevel@tonic-gate rw_init(&nm_lock.lock, NULL, RW_DEFAULT, NULL);
3640Sstevel@tonic-gate rw_init(&ni_rwlp.lock, NULL, RW_DRIVER, NULL);
3650Sstevel@tonic-gate rw_init(&hsp_rwlp.lock, NULL, RW_DRIVER, NULL);
3660Sstevel@tonic-gate mutex_init(&md_cpr_resync.md_resync_mutex, NULL,
3677627SChris.Horne@Sun.COM MUTEX_DEFAULT, NULL);
3687627SChris.Horne@Sun.COM mutex_init(&md_nblocks_mutex, NULL, MUTEX_DEFAULT, NULL);
3690Sstevel@tonic-gate
3700Sstevel@tonic-gate /* initialize per set driver global locks */
3710Sstevel@tonic-gate for (s = 0; s < MD_MAXSETS; s++) {
3720Sstevel@tonic-gate /* initialize per set driver globals locks */
3730Sstevel@tonic-gate mutex_init(&md_set[s].s_dbmx,
3740Sstevel@tonic-gate NULL, MUTEX_DEFAULT, NULL);
3750Sstevel@tonic-gate mutex_init(&md_set_io[s].md_io_mx,
3760Sstevel@tonic-gate NULL, MUTEX_DEFAULT, NULL);
3770Sstevel@tonic-gate cv_init(&md_set_io[s].md_io_cv,
3780Sstevel@tonic-gate NULL, CV_DEFAULT, NULL);
3790Sstevel@tonic-gate }
3800Sstevel@tonic-gate } else {
3810Sstevel@tonic-gate /* destroy per set driver global locks */
3820Sstevel@tonic-gate for (s = 0; s < MD_MAXSETS; s++) {
3830Sstevel@tonic-gate cv_destroy(&md_set_io[s].md_io_cv);
3840Sstevel@tonic-gate mutex_destroy(&md_set_io[s].md_io_mx);
3850Sstevel@tonic-gate mutex_destroy(&md_set[s].s_dbmx);
3860Sstevel@tonic-gate }
3870Sstevel@tonic-gate
3880Sstevel@tonic-gate /* destroy driver global locks */
3897627SChris.Horne@Sun.COM mutex_destroy(&md_nblocks_mutex);
3900Sstevel@tonic-gate mutex_destroy(&md_cpr_resync.md_resync_mutex);
3910Sstevel@tonic-gate rw_destroy(&hsp_rwlp.lock);
3920Sstevel@tonic-gate rw_destroy(&ni_rwlp.lock);
3930Sstevel@tonic-gate rw_destroy(&nm_lock.lock);
3940Sstevel@tonic-gate rw_destroy(&md_unit_array_rw.lock);
3950Sstevel@tonic-gate mutex_destroy(&md_mx);
3960Sstevel@tonic-gate cv_destroy(&md_cv);
3970Sstevel@tonic-gate }
3980Sstevel@tonic-gate }
3990Sstevel@tonic-gate
4000Sstevel@tonic-gate int
_init(void)4010Sstevel@tonic-gate _init(void)
4020Sstevel@tonic-gate {
4030Sstevel@tonic-gate set_t s;
4040Sstevel@tonic-gate int err;
4050Sstevel@tonic-gate
4060Sstevel@tonic-gate MD_SET_IN(IN_INIT);
4070Sstevel@tonic-gate
4080Sstevel@tonic-gate /* allocate dynamic space associated with driver globals */
4090Sstevel@tonic-gate md_global_alloc_free(1);
4100Sstevel@tonic-gate
4110Sstevel@tonic-gate /* initialize driver globals */
4120Sstevel@tonic-gate md_major = ddi_name_to_major("md");
4130Sstevel@tonic-gate md_hz = drv_usectohz(NUM_USEC_IN_SEC);
4140Sstevel@tonic-gate
4150Sstevel@tonic-gate /* initialize tunable globals */
4160Sstevel@tonic-gate if (md_maxphys == 0) /* maximum io size in bytes */
4170Sstevel@tonic-gate md_maxphys = maxphys;
4180Sstevel@tonic-gate if (md_maxbcount == 0) /* maximum physio size in bytes */
4190Sstevel@tonic-gate md_maxbcount = MD_MAXBCOUNT;
4200Sstevel@tonic-gate
4210Sstevel@tonic-gate /* initialize per set driver globals */
4220Sstevel@tonic-gate for (s = 0; s < MD_MAXSETS; s++)
4230Sstevel@tonic-gate md_set_io[s].io_state = MD_SET_ACTIVE;
4240Sstevel@tonic-gate
4250Sstevel@tonic-gate /*
4260Sstevel@tonic-gate * NOTE: the framework does not currently guarantee exclusion
4270Sstevel@tonic-gate * between _init and attach after calling mod_install.
4280Sstevel@tonic-gate */
4290Sstevel@tonic-gate MD_CLR_IN(IN_INIT);
4300Sstevel@tonic-gate if ((err = mod_install(&modlinkage))) {
4310Sstevel@tonic-gate MD_SET_IN(IN_INIT);
4320Sstevel@tonic-gate md_global_alloc_free(0); /* free dynamic space */
4330Sstevel@tonic-gate MD_CLR_IN(IN_INIT);
4340Sstevel@tonic-gate }
4350Sstevel@tonic-gate return (err);
4360Sstevel@tonic-gate }
4370Sstevel@tonic-gate
4380Sstevel@tonic-gate int
_fini(void)4390Sstevel@tonic-gate _fini(void)
4400Sstevel@tonic-gate {
4410Sstevel@tonic-gate int err;
4420Sstevel@tonic-gate
4430Sstevel@tonic-gate /*
4440Sstevel@tonic-gate * NOTE: the framework currently does not guarantee exclusion
4450Sstevel@tonic-gate * with attach until after mod_remove returns 0.
4460Sstevel@tonic-gate */
4470Sstevel@tonic-gate if ((err = mod_remove(&modlinkage)))
4480Sstevel@tonic-gate return (err);
4490Sstevel@tonic-gate
4500Sstevel@tonic-gate MD_SET_IN(IN_FINI);
4510Sstevel@tonic-gate md_global_alloc_free(0); /* free dynamic space */
4520Sstevel@tonic-gate MD_CLR_IN(IN_FINI);
4530Sstevel@tonic-gate return (err);
4540Sstevel@tonic-gate }
4550Sstevel@tonic-gate
4560Sstevel@tonic-gate int
_info(struct modinfo * modinfop)4570Sstevel@tonic-gate _info(struct modinfo *modinfop)
4580Sstevel@tonic-gate {
4590Sstevel@tonic-gate return (mod_info(&modlinkage, modinfop));
4600Sstevel@tonic-gate }
4610Sstevel@tonic-gate
4620Sstevel@tonic-gate /* ARGSUSED */
4630Sstevel@tonic-gate static int
mdattach(dev_info_t * dip,ddi_attach_cmd_t cmd)4640Sstevel@tonic-gate mdattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
4650Sstevel@tonic-gate {
4660Sstevel@tonic-gate int len;
4670Sstevel@tonic-gate unit_t i;
4680Sstevel@tonic-gate size_t sz;
4690Sstevel@tonic-gate char ver[VERSION_LENGTH];
4700Sstevel@tonic-gate char **maj_str_array;
4710Sstevel@tonic-gate char *str, *str2;
4720Sstevel@tonic-gate
4730Sstevel@tonic-gate MD_SET_IN(IN_ATTACH);
4740Sstevel@tonic-gate md_in_upgrade = 0;
4750Sstevel@tonic-gate md_keep_repl_state = 0;
4760Sstevel@tonic-gate md_devid_destroy = 0;
4770Sstevel@tonic-gate
4780Sstevel@tonic-gate if (cmd != DDI_ATTACH) {
4790Sstevel@tonic-gate MD_CLR_IN(IN_ATTACH);
4800Sstevel@tonic-gate return (DDI_FAILURE);
4810Sstevel@tonic-gate }
4820Sstevel@tonic-gate
4830Sstevel@tonic-gate if (md_devinfo != NULL) {
4840Sstevel@tonic-gate MD_CLR_IN(IN_ATTACH);
4850Sstevel@tonic-gate return (DDI_FAILURE);
4860Sstevel@tonic-gate }
4870Sstevel@tonic-gate
4880Sstevel@tonic-gate mddb_init();
4890Sstevel@tonic-gate
4900Sstevel@tonic-gate if (md_start_daemons(TRUE)) {
4910Sstevel@tonic-gate MD_CLR_IN(IN_ATTACH);
4920Sstevel@tonic-gate mddb_unload(); /* undo mddb_init() allocations */
4930Sstevel@tonic-gate return (DDI_FAILURE);
4940Sstevel@tonic-gate }
4950Sstevel@tonic-gate
4960Sstevel@tonic-gate /* clear the halted state */
4970Sstevel@tonic-gate md_clr_status(MD_GBL_HALTED);
4980Sstevel@tonic-gate
4990Sstevel@tonic-gate /* see if the diagnostic switch is on */
5000Sstevel@tonic-gate if (ddi_prop_get_int(DDI_DEV_T_ANY, dip,
5010Sstevel@tonic-gate DDI_PROP_DONTPASS, "md_init_debug", 0))
5020Sstevel@tonic-gate md_init_debug++;
5030Sstevel@tonic-gate
5040Sstevel@tonic-gate /* see if the failfast disable switch is on */
5050Sstevel@tonic-gate if (ddi_prop_get_int(DDI_DEV_T_ANY, dip,
5060Sstevel@tonic-gate DDI_PROP_DONTPASS, "md_ff_disable", 0))
5070Sstevel@tonic-gate md_ff_disable++;
5080Sstevel@tonic-gate
5090Sstevel@tonic-gate /* try and get the md_nmedh property */
5100Sstevel@tonic-gate md_nmedh = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
5110Sstevel@tonic-gate DDI_PROP_DONTPASS, "md_nmedh", MED_DEF_HOSTS);
5120Sstevel@tonic-gate if ((md_nmedh <= 0) || (md_nmedh > MED_MAX_HOSTS))
5130Sstevel@tonic-gate md_nmedh = MED_DEF_HOSTS;
5140Sstevel@tonic-gate
5150Sstevel@tonic-gate /* try and get the md_med_trans_lst property */
5160Sstevel@tonic-gate len = 0;
5170Sstevel@tonic-gate if (ddi_prop_op(DDI_DEV_T_ANY, dip, PROP_LEN,
5180Sstevel@tonic-gate 0, "md_med_trans_lst", NULL, &len) != DDI_PROP_SUCCESS ||
5190Sstevel@tonic-gate len == 0) {
5200Sstevel@tonic-gate md_med_trans_lst = md_strdup("tcp");
5210Sstevel@tonic-gate } else {
5220Sstevel@tonic-gate md_med_trans_lst = kmem_zalloc((size_t)len, KM_SLEEP);
5230Sstevel@tonic-gate if (ddi_prop_op(DDI_DEV_T_ANY, dip, PROP_LEN_AND_VAL_BUF,
5240Sstevel@tonic-gate 0, "md_med_trans_lst", md_med_trans_lst, &len) !=
5250Sstevel@tonic-gate DDI_PROP_SUCCESS) {
5260Sstevel@tonic-gate kmem_free(md_med_trans_lst, (size_t)len);
5270Sstevel@tonic-gate md_med_trans_lst = md_strdup("tcp");
5280Sstevel@tonic-gate }
5290Sstevel@tonic-gate }
5300Sstevel@tonic-gate
5313036Seota /*
5323036Seota * Must initialize the internal data structures before the
5333036Seota * any possible calls to 'goto attach_failure' as _fini
5343036Seota * routine references them.
5353036Seota */
5363036Seota med_init();
5373036Seota
5383036Seota md_ops = (md_ops_t **)kmem_zalloc(
5393036Seota sizeof (md_ops_t *) * MD_NOPS, KM_SLEEP);
5403036Seota md_mods = (ddi_modhandle_t *)kmem_zalloc(
5413036Seota sizeof (ddi_modhandle_t) * MD_NOPS, KM_SLEEP);
5423036Seota
5430Sstevel@tonic-gate /* try and get the md_xlate property */
5440Sstevel@tonic-gate /* Should we only do this if upgrade? */
5450Sstevel@tonic-gate len = sizeof (char) * 5;
5460Sstevel@tonic-gate if (ddi_prop_op(DDI_DEV_T_ANY, dip, PROP_LEN_AND_VAL_BUF,
5470Sstevel@tonic-gate 0, "md_xlate_ver", ver, &len) == DDI_PROP_SUCCESS) {
5480Sstevel@tonic-gate if (strcmp(ver, VERSION) == 0) {
5490Sstevel@tonic-gate len = 0;
5500Sstevel@tonic-gate if (ddi_prop_op(DDI_DEV_T_ANY, dip,
5510Sstevel@tonic-gate PROP_LEN_AND_VAL_ALLOC, 0, "md_xlate",
5520Sstevel@tonic-gate (caddr_t)&md_tuple_table, &len) !=
5530Sstevel@tonic-gate DDI_PROP_SUCCESS) {
5540Sstevel@tonic-gate if (md_init_debug)
5550Sstevel@tonic-gate cmn_err(CE_WARN,
5560Sstevel@tonic-gate "md_xlate ddi_prop_op failed");
5570Sstevel@tonic-gate goto attach_failure;
5580Sstevel@tonic-gate } else {
5590Sstevel@tonic-gate md_tuple_length =
5600Sstevel@tonic-gate len/(2 * ((int)sizeof (dev32_t)));
5610Sstevel@tonic-gate md_in_upgrade = 1;
5620Sstevel@tonic-gate }
5630Sstevel@tonic-gate
5640Sstevel@tonic-gate /* Get target's name to major table */
5650Sstevel@tonic-gate if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY,
5660Sstevel@tonic-gate dip, DDI_PROP_DONTPASS,
5670Sstevel@tonic-gate "md_targ_nm_table", &maj_str_array,
5680Sstevel@tonic-gate &md_majortab_len) != DDI_PROP_SUCCESS) {
5690Sstevel@tonic-gate md_majortab_len = 0;
5700Sstevel@tonic-gate if (md_init_debug)
5717627SChris.Horne@Sun.COM cmn_err(CE_WARN, "md_targ_nm_table "
5727627SChris.Horne@Sun.COM "ddi_prop_lookup_string_array "
5737627SChris.Horne@Sun.COM "failed");
5740Sstevel@tonic-gate goto attach_failure;
5750Sstevel@tonic-gate }
5760Sstevel@tonic-gate
5770Sstevel@tonic-gate md_major_tuple_table =
5780Sstevel@tonic-gate (struct md_xlate_major_table *)
5790Sstevel@tonic-gate kmem_zalloc(md_majortab_len *
5800Sstevel@tonic-gate sizeof (struct md_xlate_major_table), KM_SLEEP);
5810Sstevel@tonic-gate
5820Sstevel@tonic-gate for (i = 0; i < md_majortab_len; i++) {
5830Sstevel@tonic-gate /* Getting major name */
5840Sstevel@tonic-gate str = strchr(maj_str_array[i], ' ');
5850Sstevel@tonic-gate if (str == NULL)
5860Sstevel@tonic-gate continue;
5870Sstevel@tonic-gate *str = '\0';
5880Sstevel@tonic-gate md_major_tuple_table[i].drv_name =
5890Sstevel@tonic-gate md_strdup(maj_str_array[i]);
5900Sstevel@tonic-gate
5910Sstevel@tonic-gate /* Simplified atoi to get major number */
5920Sstevel@tonic-gate str2 = str + 1;
5930Sstevel@tonic-gate md_major_tuple_table[i].targ_maj = 0;
5940Sstevel@tonic-gate while ((*str2 >= '0') && (*str2 <= '9')) {
5957627SChris.Horne@Sun.COM md_major_tuple_table[i].targ_maj *= 10;
5967627SChris.Horne@Sun.COM md_major_tuple_table[i].targ_maj +=
5977627SChris.Horne@Sun.COM *str2++ - '0';
5980Sstevel@tonic-gate }
5990Sstevel@tonic-gate *str = ' ';
6000Sstevel@tonic-gate }
6010Sstevel@tonic-gate ddi_prop_free((void *)maj_str_array);
6020Sstevel@tonic-gate } else {
6030Sstevel@tonic-gate if (md_init_debug)
6040Sstevel@tonic-gate cmn_err(CE_WARN, "md_xlate_ver is incorrect");
6050Sstevel@tonic-gate goto attach_failure;
6060Sstevel@tonic-gate }
6070Sstevel@tonic-gate }
6080Sstevel@tonic-gate
6090Sstevel@tonic-gate /*
6100Sstevel@tonic-gate * Check for properties:
6110Sstevel@tonic-gate * md_keep_repl_state and md_devid_destroy
6120Sstevel@tonic-gate * and set globals if these exist.
6130Sstevel@tonic-gate */
6140Sstevel@tonic-gate md_keep_repl_state = ddi_getprop(DDI_DEV_T_ANY, dip,
6157627SChris.Horne@Sun.COM 0, "md_keep_repl_state", 0);
6160Sstevel@tonic-gate
6170Sstevel@tonic-gate md_devid_destroy = ddi_getprop(DDI_DEV_T_ANY, dip,
6187627SChris.Horne@Sun.COM 0, "md_devid_destroy", 0);
6190Sstevel@tonic-gate
6200Sstevel@tonic-gate if (MD_UPGRADE)
6210Sstevel@tonic-gate md_major_targ = md_targ_name_to_major("md");
6220Sstevel@tonic-gate else
6230Sstevel@tonic-gate md_major_targ = 0;
6240Sstevel@tonic-gate
6250Sstevel@tonic-gate /* allocate admin device node */
6260Sstevel@tonic-gate if (ddi_create_priv_minor_node(dip, "admin", S_IFCHR,
6270Sstevel@tonic-gate MD_ADM_MINOR, DDI_PSEUDO, 0, NULL, PRIV_SYS_CONFIG, 0640))
6280Sstevel@tonic-gate goto attach_failure;
6290Sstevel@tonic-gate
6300Sstevel@tonic-gate if (ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
6310Sstevel@tonic-gate DDI_KERNEL_IOCTL, NULL, 0) != DDI_SUCCESS)
6320Sstevel@tonic-gate goto attach_failure;
6330Sstevel@tonic-gate
6340Sstevel@tonic-gate if (ddi_prop_update_int(DDI_DEV_T_NONE, dip,
6350Sstevel@tonic-gate "ddi-abrwrite-supported", 1) != DDI_SUCCESS)
6360Sstevel@tonic-gate goto attach_failure;
6370Sstevel@tonic-gate
6380Sstevel@tonic-gate /* these could have been cleared by a detach */
6390Sstevel@tonic-gate md_nunits = MD_MAXUNITS;
6400Sstevel@tonic-gate md_nsets = MD_MAXSETS;
6410Sstevel@tonic-gate
6420Sstevel@tonic-gate sz = sizeof (void *) * MD_MAXUNITS;
6430Sstevel@tonic-gate if (md_set[0].s_un == NULL)
6440Sstevel@tonic-gate md_set[0].s_un = kmem_zalloc(sz, KM_SLEEP);
6450Sstevel@tonic-gate if (md_set[0].s_ui == NULL)
6460Sstevel@tonic-gate md_set[0].s_ui = kmem_zalloc(sz, KM_SLEEP);
6470Sstevel@tonic-gate
6480Sstevel@tonic-gate md_devinfo = dip;
6490Sstevel@tonic-gate
6500Sstevel@tonic-gate /*
6510Sstevel@tonic-gate * Only allocate device node for root mirror metadevice.
6520Sstevel@tonic-gate * Don't pre-allocate unnecessary device nodes (thus slowing down a
6530Sstevel@tonic-gate * boot when we attach).
6540Sstevel@tonic-gate * We can't read the mddbs in attach. The mddbs will be read
6550Sstevel@tonic-gate * by metainit during the boot process when it is doing the
6560Sstevel@tonic-gate * auto-take processing and any other minor nodes will be
6570Sstevel@tonic-gate * allocated at that point.
6580Sstevel@tonic-gate *
6590Sstevel@tonic-gate * There are two scenarios to be aware of here:
6600Sstevel@tonic-gate * 1) when we are booting from a mirrored root we need the root
6610Sstevel@tonic-gate * metadevice to exist very early (during vfs_mountroot processing)
6620Sstevel@tonic-gate * 2) we need all of the nodes to be created so that any mnttab entries
6630Sstevel@tonic-gate * will succeed (handled by metainit reading the mddb during boot).
6640Sstevel@tonic-gate */
6650Sstevel@tonic-gate if (strncmp(SVM_PSEUDO_STR, svm_bootpath, sizeof (SVM_PSEUDO_STR) - 1)
6660Sstevel@tonic-gate == 0) {
6670Sstevel@tonic-gate char *p;
6680Sstevel@tonic-gate int mnum = 0;
6690Sstevel@tonic-gate
6700Sstevel@tonic-gate /*
6710Sstevel@tonic-gate * The svm_bootpath string looks something like
6720Sstevel@tonic-gate * /pseudo/md@0:0,150,blk where 150 is the minor number
6730Sstevel@tonic-gate * in this example so we need to set the pointer p onto
6740Sstevel@tonic-gate * the first digit of the minor number and convert it
6750Sstevel@tonic-gate * from ascii.
6760Sstevel@tonic-gate */
6770Sstevel@tonic-gate for (p = svm_bootpath + sizeof (SVM_PSEUDO_STR) + 1;
6780Sstevel@tonic-gate *p >= '0' && *p <= '9'; p++) {
6790Sstevel@tonic-gate mnum *= 10;
6800Sstevel@tonic-gate mnum += *p - '0';
6810Sstevel@tonic-gate }
6820Sstevel@tonic-gate
6830Sstevel@tonic-gate if (md_create_minor_node(0, mnum)) {
6840Sstevel@tonic-gate kmem_free(md_set[0].s_un, sz);
6850Sstevel@tonic-gate kmem_free(md_set[0].s_ui, sz);
6860Sstevel@tonic-gate goto attach_failure;
6870Sstevel@tonic-gate }
6880Sstevel@tonic-gate }
6890Sstevel@tonic-gate
6907627SChris.Horne@Sun.COM /* create the hash to store the meta device sizes */
6917627SChris.Horne@Sun.COM md_nblocksmap = mod_hash_create_idhash("md_nblocksmap",
6927627SChris.Horne@Sun.COM md_nblocksmap_size, mod_hash_null_valdtor);
6937627SChris.Horne@Sun.COM
6940Sstevel@tonic-gate MD_CLR_IN(IN_ATTACH);
6950Sstevel@tonic-gate return (DDI_SUCCESS);
6960Sstevel@tonic-gate
6970Sstevel@tonic-gate attach_failure:
6980Sstevel@tonic-gate /*
6990Sstevel@tonic-gate * Use our own detach routine to toss any stuff we allocated above.
7000Sstevel@tonic-gate * NOTE: detach will call md_halt to free the mddb_init allocations.
7010Sstevel@tonic-gate */
7020Sstevel@tonic-gate MD_CLR_IN(IN_ATTACH);
7030Sstevel@tonic-gate if (mddetach(dip, DDI_DETACH) != DDI_SUCCESS)
7040Sstevel@tonic-gate cmn_err(CE_WARN, "detach from attach failed");
7050Sstevel@tonic-gate return (DDI_FAILURE);
7060Sstevel@tonic-gate }
7070Sstevel@tonic-gate
7080Sstevel@tonic-gate /* ARGSUSED */
7090Sstevel@tonic-gate static int
mddetach(dev_info_t * dip,ddi_detach_cmd_t cmd)7100Sstevel@tonic-gate mddetach(dev_info_t *dip, ddi_detach_cmd_t cmd)
7110Sstevel@tonic-gate {
7120Sstevel@tonic-gate extern int check_active_locators();
7130Sstevel@tonic-gate set_t s;
7140Sstevel@tonic-gate size_t sz;
7150Sstevel@tonic-gate int len;
7160Sstevel@tonic-gate
7170Sstevel@tonic-gate MD_SET_IN(IN_DETACH);
7180Sstevel@tonic-gate
7190Sstevel@tonic-gate /* check command */
7200Sstevel@tonic-gate if (cmd != DDI_DETACH) {
7210Sstevel@tonic-gate MD_CLR_IN(IN_DETACH);
7220Sstevel@tonic-gate return (DDI_FAILURE);
7230Sstevel@tonic-gate }
7240Sstevel@tonic-gate
7250Sstevel@tonic-gate /*
7260Sstevel@tonic-gate * if we have not already halted yet we have no active config
7270Sstevel@tonic-gate * then automatically initiate a halt so we can detach.
7280Sstevel@tonic-gate */
7290Sstevel@tonic-gate if (!(md_get_status() & MD_GBL_HALTED)) {
7300Sstevel@tonic-gate if (check_active_locators() == 0) {
7310Sstevel@tonic-gate /*
7320Sstevel@tonic-gate * NOTE: a successful md_halt will have done the
7330Sstevel@tonic-gate * mddb_unload to free allocations done in mddb_init
7340Sstevel@tonic-gate */
7350Sstevel@tonic-gate if (md_halt(MD_NO_GBL_LOCKS_HELD)) {
7360Sstevel@tonic-gate cmn_err(CE_NOTE, "md:detach: "
7370Sstevel@tonic-gate "Could not halt Solaris Volume Manager");
7380Sstevel@tonic-gate MD_CLR_IN(IN_DETACH);
7390Sstevel@tonic-gate return (DDI_FAILURE);
7400Sstevel@tonic-gate }
7410Sstevel@tonic-gate }
7420Sstevel@tonic-gate
7430Sstevel@tonic-gate /* fail detach if we have not halted */
7440Sstevel@tonic-gate if (!(md_get_status() & MD_GBL_HALTED)) {
7450Sstevel@tonic-gate MD_CLR_IN(IN_DETACH);
7460Sstevel@tonic-gate return (DDI_FAILURE);
7470Sstevel@tonic-gate }
7480Sstevel@tonic-gate }
7490Sstevel@tonic-gate
7500Sstevel@tonic-gate /* must be in halted state, this will be cleared on next attach */
7510Sstevel@tonic-gate ASSERT(md_get_status() & MD_GBL_HALTED);
7520Sstevel@tonic-gate
7530Sstevel@tonic-gate /* cleanup attach allocations and initializations */
7540Sstevel@tonic-gate md_major_targ = 0;
7550Sstevel@tonic-gate
7560Sstevel@tonic-gate sz = sizeof (void *) * md_nunits;
7570Sstevel@tonic-gate for (s = 0; s < md_nsets; s++) {
7580Sstevel@tonic-gate if (md_set[s].s_un != NULL) {
7590Sstevel@tonic-gate kmem_free(md_set[s].s_un, sz);
7600Sstevel@tonic-gate md_set[s].s_un = NULL;
7610Sstevel@tonic-gate }
7620Sstevel@tonic-gate
7630Sstevel@tonic-gate if (md_set[s].s_ui != NULL) {
7640Sstevel@tonic-gate kmem_free(md_set[s].s_ui, sz);
7650Sstevel@tonic-gate md_set[s].s_ui = NULL;
7660Sstevel@tonic-gate }
7670Sstevel@tonic-gate }
7680Sstevel@tonic-gate md_nunits = 0;
7690Sstevel@tonic-gate md_nsets = 0;
7700Sstevel@tonic-gate md_nmedh = 0;
7710Sstevel@tonic-gate
7722063Shshaw if (non_ff_drivers != NULL) {
7732063Shshaw int i;
7742063Shshaw
7752063Shshaw for (i = 0; non_ff_drivers[i] != NULL; i++)
7767627SChris.Horne@Sun.COM kmem_free(non_ff_drivers[i],
7777627SChris.Horne@Sun.COM strlen(non_ff_drivers[i]) + 1);
7782063Shshaw
7792103Shshaw /* free i+1 entries because there is a null entry at list end */
7802103Shshaw kmem_free(non_ff_drivers, (i + 1) * sizeof (char *));
7812063Shshaw non_ff_drivers = NULL;
7822063Shshaw }
7832063Shshaw
7840Sstevel@tonic-gate if (md_med_trans_lst != NULL) {
7850Sstevel@tonic-gate kmem_free(md_med_trans_lst, strlen(md_med_trans_lst) + 1);
7860Sstevel@tonic-gate md_med_trans_lst = NULL;
7870Sstevel@tonic-gate }
7880Sstevel@tonic-gate
7890Sstevel@tonic-gate if (md_mods != NULL) {
7900Sstevel@tonic-gate kmem_free(md_mods, sizeof (ddi_modhandle_t) * MD_NOPS);
7910Sstevel@tonic-gate md_mods = NULL;
7920Sstevel@tonic-gate }
7930Sstevel@tonic-gate
7940Sstevel@tonic-gate if (md_ops != NULL) {
7950Sstevel@tonic-gate kmem_free(md_ops, sizeof (md_ops_t *) * MD_NOPS);
7960Sstevel@tonic-gate md_ops = NULL;
7970Sstevel@tonic-gate }
7980Sstevel@tonic-gate
7990Sstevel@tonic-gate if (MD_UPGRADE) {
8000Sstevel@tonic-gate len = md_tuple_length * (2 * ((int)sizeof (dev32_t)));
8010Sstevel@tonic-gate md_in_upgrade = 0;
8020Sstevel@tonic-gate md_xlate_free(len);
8030Sstevel@tonic-gate md_majortab_free();
8040Sstevel@tonic-gate }
8050Sstevel@tonic-gate
8060Sstevel@tonic-gate /*
8070Sstevel@tonic-gate * Undo what we did in mdattach, freeing resources
8080Sstevel@tonic-gate * and removing things we installed. The system
8090Sstevel@tonic-gate * framework guarantees we are not active with this devinfo
8100Sstevel@tonic-gate * node in any other entry points at this time.
8110Sstevel@tonic-gate */
8120Sstevel@tonic-gate ddi_prop_remove_all(dip);
8130Sstevel@tonic-gate ddi_remove_minor_node(dip, NULL);
8140Sstevel@tonic-gate
8150Sstevel@tonic-gate med_fini();
8167627SChris.Horne@Sun.COM
8177627SChris.Horne@Sun.COM mod_hash_destroy_idhash(md_nblocksmap);
8187627SChris.Horne@Sun.COM
8190Sstevel@tonic-gate md_devinfo = NULL;
8200Sstevel@tonic-gate
8210Sstevel@tonic-gate MD_CLR_IN(IN_DETACH);
8220Sstevel@tonic-gate return (DDI_SUCCESS);
8230Sstevel@tonic-gate }
8240Sstevel@tonic-gate
8250Sstevel@tonic-gate
8260Sstevel@tonic-gate /*
8270Sstevel@tonic-gate * Given the device number return the devinfo pointer
8280Sstevel@tonic-gate * given to md via md_attach
8290Sstevel@tonic-gate */
8300Sstevel@tonic-gate /*ARGSUSED*/
8310Sstevel@tonic-gate static int
mdinfo(dev_info_t * dip,ddi_info_cmd_t infocmd,void * arg,void ** result)8320Sstevel@tonic-gate mdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
8330Sstevel@tonic-gate {
8340Sstevel@tonic-gate int error = DDI_FAILURE;
8350Sstevel@tonic-gate
8360Sstevel@tonic-gate switch (infocmd) {
8370Sstevel@tonic-gate case DDI_INFO_DEVT2DEVINFO:
8380Sstevel@tonic-gate if (md_devinfo) {
8390Sstevel@tonic-gate *result = (void *)md_devinfo;
8400Sstevel@tonic-gate error = DDI_SUCCESS;
8410Sstevel@tonic-gate }
8420Sstevel@tonic-gate break;
8430Sstevel@tonic-gate
8440Sstevel@tonic-gate case DDI_INFO_DEVT2INSTANCE:
8450Sstevel@tonic-gate *result = (void *)0;
8460Sstevel@tonic-gate error = DDI_SUCCESS;
8470Sstevel@tonic-gate break;
8480Sstevel@tonic-gate }
8490Sstevel@tonic-gate return (error);
8500Sstevel@tonic-gate }
8510Sstevel@tonic-gate
8520Sstevel@tonic-gate /*
8530Sstevel@tonic-gate * property operation routine. return the number of blocks for the partition
8540Sstevel@tonic-gate * in question or forward the request to the property facilities.
8550Sstevel@tonic-gate */
8560Sstevel@tonic-gate static int
mdprop_op(dev_t dev,dev_info_t * dip,ddi_prop_op_t prop_op,int mod_flags,char * name,caddr_t valuep,int * lengthp)8570Sstevel@tonic-gate mdprop_op(
8580Sstevel@tonic-gate dev_t dev, /* device number associated with device */
8590Sstevel@tonic-gate dev_info_t *dip, /* device info struct for this device */
8600Sstevel@tonic-gate ddi_prop_op_t prop_op, /* property operator */
8610Sstevel@tonic-gate int mod_flags, /* property flags */
8620Sstevel@tonic-gate char *name, /* name of property */
8630Sstevel@tonic-gate caddr_t valuep, /* where to put property value */
8640Sstevel@tonic-gate int *lengthp) /* put length of property here */
8650Sstevel@tonic-gate {
8667627SChris.Horne@Sun.COM return (ddi_prop_op_nblocks(dev, dip, prop_op, mod_flags,
8677627SChris.Horne@Sun.COM name, valuep, lengthp, md_nblocks_get(getminor(dev))));
8680Sstevel@tonic-gate }
8690Sstevel@tonic-gate
8700Sstevel@tonic-gate static void
snarf_user_data(set_t setno)8710Sstevel@tonic-gate snarf_user_data(set_t setno)
8720Sstevel@tonic-gate {
8730Sstevel@tonic-gate mddb_recid_t recid;
8740Sstevel@tonic-gate mddb_recstatus_t status;
8750Sstevel@tonic-gate
8760Sstevel@tonic-gate recid = mddb_makerecid(setno, 0);
8770Sstevel@tonic-gate while ((recid = mddb_getnextrec(recid, MDDB_USER, 0)) > 0) {
8780Sstevel@tonic-gate if (mddb_getrecprivate(recid) & MD_PRV_GOTIT)
8790Sstevel@tonic-gate continue;
8800Sstevel@tonic-gate
8810Sstevel@tonic-gate status = mddb_getrecstatus(recid);
8820Sstevel@tonic-gate if (status == MDDB_STALE)
8830Sstevel@tonic-gate continue;
8840Sstevel@tonic-gate
8850Sstevel@tonic-gate if (status == MDDB_NODATA) {
8860Sstevel@tonic-gate mddb_setrecprivate(recid, MD_PRV_PENDDEL);
8870Sstevel@tonic-gate continue;
8880Sstevel@tonic-gate }
8890Sstevel@tonic-gate
8900Sstevel@tonic-gate ASSERT(status == MDDB_OK);
8910Sstevel@tonic-gate
8920Sstevel@tonic-gate mddb_setrecprivate(recid, MD_PRV_GOTIT);
8930Sstevel@tonic-gate }
8940Sstevel@tonic-gate }
8950Sstevel@tonic-gate
8960Sstevel@tonic-gate static void
md_print_block_usage(mddb_set_t * s,uint_t blks)8970Sstevel@tonic-gate md_print_block_usage(mddb_set_t *s, uint_t blks)
8980Sstevel@tonic-gate {
8990Sstevel@tonic-gate uint_t ib;
9000Sstevel@tonic-gate int li;
9010Sstevel@tonic-gate mddb_mb_ic_t *mbip;
9020Sstevel@tonic-gate uint_t max_blk_needed;
9030Sstevel@tonic-gate mddb_lb_t *lbp;
9040Sstevel@tonic-gate mddb_sidelocator_t *slp;
9050Sstevel@tonic-gate int drv_index;
9060Sstevel@tonic-gate md_splitname sn;
9070Sstevel@tonic-gate char *name;
9080Sstevel@tonic-gate char *suffix;
9090Sstevel@tonic-gate size_t prefixlen;
9100Sstevel@tonic-gate size_t suffixlen;
9110Sstevel@tonic-gate int alloc_sz;
9120Sstevel@tonic-gate
9130Sstevel@tonic-gate
9140Sstevel@tonic-gate max_blk_needed = s->s_totalblkcnt - s->s_freeblkcnt + blks;
9150Sstevel@tonic-gate
9160Sstevel@tonic-gate cmn_err(CE_WARN, "Blocks in Metadevice State Database: %d\n"
9177627SChris.Horne@Sun.COM " Additional Blocks Needed: %d\n\n"
9187627SChris.Horne@Sun.COM " Increase size of following replicas for\n"
9197627SChris.Horne@Sun.COM " device relocatability by deleting listed\n"
9207627SChris.Horne@Sun.COM " replica and re-adding replica with\n"
9217627SChris.Horne@Sun.COM " increased size (see metadb(1M)):\n"
9227627SChris.Horne@Sun.COM " Replica Increase By",
9237627SChris.Horne@Sun.COM s->s_totalblkcnt, (blks - s->s_freeblkcnt));
9240Sstevel@tonic-gate
9250Sstevel@tonic-gate lbp = s->s_lbp;
9260Sstevel@tonic-gate
9270Sstevel@tonic-gate for (li = 0; li < lbp->lb_loccnt; li++) {
9280Sstevel@tonic-gate if (lbp->lb_locators[li].l_flags & MDDB_F_DELETED)
9290Sstevel@tonic-gate continue;
9300Sstevel@tonic-gate ib = 0;
9310Sstevel@tonic-gate for (mbip = s->s_mbiarray[li]; mbip != NULL;
9320Sstevel@tonic-gate mbip = mbip->mbi_next) {
9330Sstevel@tonic-gate ib += (uint_t)mbip->mbi_mddb_mb.mb_blkcnt;
9340Sstevel@tonic-gate }
9350Sstevel@tonic-gate if (ib == 0)
9360Sstevel@tonic-gate continue;
9370Sstevel@tonic-gate if (ib < max_blk_needed) {
9380Sstevel@tonic-gate slp = &lbp->lb_sidelocators[s->s_sideno][li];
9390Sstevel@tonic-gate drv_index = slp->l_drvnm_index;
9400Sstevel@tonic-gate mddb_locatorblock2splitname(s->s_lnp, li, s->s_sideno,
9417627SChris.Horne@Sun.COM &sn);
9420Sstevel@tonic-gate prefixlen = SPN_PREFIX(&sn).pre_len;
9430Sstevel@tonic-gate suffixlen = SPN_SUFFIX(&sn).suf_len;
9440Sstevel@tonic-gate alloc_sz = (int)(prefixlen + suffixlen + 2);
9450Sstevel@tonic-gate name = (char *)kmem_alloc(alloc_sz, KM_SLEEP);
9460Sstevel@tonic-gate (void) strncpy(name, SPN_PREFIX(&sn).pre_data,
9470Sstevel@tonic-gate prefixlen);
9480Sstevel@tonic-gate name[prefixlen] = '/';
9490Sstevel@tonic-gate suffix = name + (prefixlen + 1);
9500Sstevel@tonic-gate (void) strncpy(suffix, SPN_SUFFIX(&sn).suf_data,
9510Sstevel@tonic-gate suffixlen);
9520Sstevel@tonic-gate name[prefixlen + suffixlen + 1] = '\0';
9530Sstevel@tonic-gate cmn_err(CE_WARN,
9547627SChris.Horne@Sun.COM " %s (%s:%d:%d) %d blocks",
9557627SChris.Horne@Sun.COM name, lbp->lb_drvnm[drv_index].dn_data,
9567627SChris.Horne@Sun.COM slp->l_mnum, lbp->lb_locators[li].l_blkno,
9577627SChris.Horne@Sun.COM (max_blk_needed - ib));
9580Sstevel@tonic-gate kmem_free(name, alloc_sz);
9590Sstevel@tonic-gate }
9600Sstevel@tonic-gate }
9610Sstevel@tonic-gate }
9620Sstevel@tonic-gate
9630Sstevel@tonic-gate /*
9640Sstevel@tonic-gate * md_create_minor_node:
9650Sstevel@tonic-gate * Create the minor device for the given set and un_self_id.
9660Sstevel@tonic-gate *
9670Sstevel@tonic-gate * Input:
9680Sstevel@tonic-gate * setno - set number
9690Sstevel@tonic-gate * mnum - selfID of unit
9700Sstevel@tonic-gate *
9710Sstevel@tonic-gate * Output:
9720Sstevel@tonic-gate * None.
9730Sstevel@tonic-gate *
9740Sstevel@tonic-gate * Returns 0 for success, 1 for failure.
9750Sstevel@tonic-gate *
9760Sstevel@tonic-gate * Side-effects:
9770Sstevel@tonic-gate * None.
9780Sstevel@tonic-gate */
9790Sstevel@tonic-gate int
md_create_minor_node(set_t setno,minor_t mnum)9800Sstevel@tonic-gate md_create_minor_node(set_t setno, minor_t mnum)
9810Sstevel@tonic-gate {
9820Sstevel@tonic-gate char name[20];
9830Sstevel@tonic-gate
9840Sstevel@tonic-gate /* Check for valid arguments */
9850Sstevel@tonic-gate if (setno >= MD_MAXSETS || MD_MIN2UNIT(mnum) >= MD_MAXUNITS)
9860Sstevel@tonic-gate return (1);
9870Sstevel@tonic-gate
9880Sstevel@tonic-gate (void) snprintf(name, 20, "%u,%u,blk",
9897627SChris.Horne@Sun.COM (unsigned)setno, (unsigned)MD_MIN2UNIT(mnum));
9900Sstevel@tonic-gate
9910Sstevel@tonic-gate if (ddi_create_minor_node(md_devinfo, name, S_IFBLK,
9920Sstevel@tonic-gate MD_MKMIN(setno, mnum), DDI_PSEUDO, 0))
9930Sstevel@tonic-gate return (1);
9940Sstevel@tonic-gate
9950Sstevel@tonic-gate (void) snprintf(name, 20, "%u,%u,raw",
9967627SChris.Horne@Sun.COM (unsigned)setno, (unsigned)MD_MIN2UNIT(mnum));
9970Sstevel@tonic-gate
9980Sstevel@tonic-gate if (ddi_create_minor_node(md_devinfo, name, S_IFCHR,
9990Sstevel@tonic-gate MD_MKMIN(setno, mnum), DDI_PSEUDO, 0))
10000Sstevel@tonic-gate return (1);
10010Sstevel@tonic-gate
10020Sstevel@tonic-gate return (0);
10030Sstevel@tonic-gate }
10040Sstevel@tonic-gate
10050Sstevel@tonic-gate /*
10060Sstevel@tonic-gate * For a given key check if it is an orphaned record.
10070Sstevel@tonic-gate * The following conditions are used to determine an orphan.
10080Sstevel@tonic-gate * 1. The device associated with that key is not a metadevice.
10090Sstevel@tonic-gate * 2. If DEVID_STYLE then the physical device does not have a device Id
10100Sstevel@tonic-gate * associated with it.
10110Sstevel@tonic-gate *
10120Sstevel@tonic-gate * If a key does not have an entry in the devid namespace it could be
10130Sstevel@tonic-gate * a device that does not support device ids. Hence the record is not
10140Sstevel@tonic-gate * deleted.
10150Sstevel@tonic-gate */
10160Sstevel@tonic-gate
10170Sstevel@tonic-gate static int
md_verify_orphaned_record(set_t setno,mdkey_t key)10180Sstevel@tonic-gate md_verify_orphaned_record(set_t setno, mdkey_t key)
10190Sstevel@tonic-gate {
10200Sstevel@tonic-gate md_dev64_t odev; /* orphaned dev */
10210Sstevel@tonic-gate mddb_set_t *s;
10220Sstevel@tonic-gate side_t side = 0;
10230Sstevel@tonic-gate struct nm_next_hdr *did_nh = NULL;
10240Sstevel@tonic-gate
10250Sstevel@tonic-gate s = (mddb_set_t *)md_set[setno].s_db;
10260Sstevel@tonic-gate if ((did_nh = get_first_record(setno, 1, (NM_DEVID | NM_NOTSHARED)))
10270Sstevel@tonic-gate == NULL)
10280Sstevel@tonic-gate return (0);
10290Sstevel@tonic-gate /*
10300Sstevel@tonic-gate * If devid style is set then get the dev_t using MD_NOTRUST_DEVT
10310Sstevel@tonic-gate */
10320Sstevel@tonic-gate if (s->s_lbp->lb_flags & MDDB_DEVID_STYLE) {
10330Sstevel@tonic-gate odev = md_getdevnum(setno, side, key, MD_NOTRUST_DEVT);
10340Sstevel@tonic-gate if ((odev == NODEV64) || (md_getmajor(odev) == md_major))
10350Sstevel@tonic-gate return (0);
10360Sstevel@tonic-gate if (lookup_entry(did_nh, setno, side, key, odev, NM_DEVID) ==
10377627SChris.Horne@Sun.COM NULL)
10380Sstevel@tonic-gate return (1);
10390Sstevel@tonic-gate }
10400Sstevel@tonic-gate return (0);
10410Sstevel@tonic-gate }
10420Sstevel@tonic-gate
10430Sstevel@tonic-gate int
md_snarf_db_set(set_t setno,md_error_t * ep)10440Sstevel@tonic-gate md_snarf_db_set(set_t setno, md_error_t *ep)
10450Sstevel@tonic-gate {
10460Sstevel@tonic-gate int err = 0;
10470Sstevel@tonic-gate int i;
10480Sstevel@tonic-gate mddb_recid_t recid;
10490Sstevel@tonic-gate mddb_type_t drvrid;
10500Sstevel@tonic-gate mddb_recstatus_t status;
10510Sstevel@tonic-gate md_ops_t *ops;
10520Sstevel@tonic-gate uint_t privat;
10530Sstevel@tonic-gate mddb_set_t *s;
10540Sstevel@tonic-gate uint_t cvt_blks;
10550Sstevel@tonic-gate struct nm_next_hdr *nh;
10560Sstevel@tonic-gate mdkey_t key = MD_KEYWILD;
10570Sstevel@tonic-gate side_t side = 0;
10580Sstevel@tonic-gate int size;
10590Sstevel@tonic-gate int devid_flag;
10600Sstevel@tonic-gate int retval;
10611623Stw21770 uint_t un;
10621623Stw21770 int un_next_set = 0;
10630Sstevel@tonic-gate
10640Sstevel@tonic-gate md_haltsnarf_enter(setno);
10650Sstevel@tonic-gate
10660Sstevel@tonic-gate mutex_enter(&md_mx);
10670Sstevel@tonic-gate if (md_set[setno].s_status & MD_SET_SNARFED) {
10680Sstevel@tonic-gate mutex_exit(&md_mx);
10690Sstevel@tonic-gate md_haltsnarf_exit(setno);
10700Sstevel@tonic-gate return (0);
10710Sstevel@tonic-gate }
10720Sstevel@tonic-gate mutex_exit(&md_mx);
10730Sstevel@tonic-gate
10740Sstevel@tonic-gate if (! (md_get_status() & MD_GBL_DAEMONS_LIVE)) {
10750Sstevel@tonic-gate if (md_start_daemons(TRUE)) {
10760Sstevel@tonic-gate if (ep != NULL)
10770Sstevel@tonic-gate (void) mdsyserror(ep, ENXIO);
10780Sstevel@tonic-gate err = -1;
10790Sstevel@tonic-gate goto out;
10800Sstevel@tonic-gate }
10810Sstevel@tonic-gate }
10820Sstevel@tonic-gate
10830Sstevel@tonic-gate
10840Sstevel@tonic-gate /*
10850Sstevel@tonic-gate * Load the devid name space if it exists
10860Sstevel@tonic-gate */
10870Sstevel@tonic-gate (void) md_load_namespace(setno, NULL, NM_DEVID);
10880Sstevel@tonic-gate if (!md_load_namespace(setno, ep, 0L)) {
10890Sstevel@tonic-gate /*
10900Sstevel@tonic-gate * Unload the devid namespace
10910Sstevel@tonic-gate */
10920Sstevel@tonic-gate (void) md_unload_namespace(setno, NM_DEVID);
10930Sstevel@tonic-gate err = -1;
10940Sstevel@tonic-gate goto out;
10950Sstevel@tonic-gate }
10960Sstevel@tonic-gate
10970Sstevel@tonic-gate /*
10980Sstevel@tonic-gate * If replica is in non-devid state, convert if:
10990Sstevel@tonic-gate * - not in probe during upgrade (md_keep_repl_state = 0)
11000Sstevel@tonic-gate * - enough space available in replica
11010Sstevel@tonic-gate * - local set
11020Sstevel@tonic-gate * - not a multi-node diskset
11030Sstevel@tonic-gate * - clustering is not present (for non-local set)
11040Sstevel@tonic-gate */
11050Sstevel@tonic-gate s = (mddb_set_t *)md_set[setno].s_db;
11060Sstevel@tonic-gate devid_flag = 0;
11070Sstevel@tonic-gate if (!(s->s_lbp->lb_flags & MDDB_DEVID_STYLE) && !md_keep_repl_state)
11080Sstevel@tonic-gate devid_flag = 1;
11090Sstevel@tonic-gate if (cluster_bootflags & CLUSTER_CONFIGURED)
11100Sstevel@tonic-gate if (setno != MD_LOCAL_SET)
11110Sstevel@tonic-gate devid_flag = 0;
11120Sstevel@tonic-gate if (MD_MNSET_SETNO(setno))
11130Sstevel@tonic-gate devid_flag = 0;
11140Sstevel@tonic-gate if ((md_devid_destroy == 1) && (md_keep_repl_state == 1))
11150Sstevel@tonic-gate devid_flag = 0;
11160Sstevel@tonic-gate
11170Sstevel@tonic-gate /*
11180Sstevel@tonic-gate * if we weren't devid style before and md_keep_repl_state=1
11190Sstevel@tonic-gate * we need to stay non-devid
11200Sstevel@tonic-gate */
11210Sstevel@tonic-gate if ((md_keep_repl_state == 1) &&
11220Sstevel@tonic-gate ((s->s_lbp->lb_flags & MDDB_DEVID_STYLE) == 0))
11230Sstevel@tonic-gate devid_flag = 0;
11240Sstevel@tonic-gate if (devid_flag) {
11250Sstevel@tonic-gate /*
11260Sstevel@tonic-gate * Determine number of free blocks needed to convert
11270Sstevel@tonic-gate * entire replica to device id format - locator blocks
11280Sstevel@tonic-gate * and namespace.
11290Sstevel@tonic-gate */
11300Sstevel@tonic-gate cvt_blks = 0;
11310Sstevel@tonic-gate if (mddb_lb_did_convert(s, 0, &cvt_blks) != 0) {
11320Sstevel@tonic-gate if (ep != NULL)
11330Sstevel@tonic-gate (void) mdsyserror(ep, EIO);
11340Sstevel@tonic-gate err = -1;
11350Sstevel@tonic-gate goto out;
11360Sstevel@tonic-gate
11370Sstevel@tonic-gate }
11380Sstevel@tonic-gate cvt_blks += md_nm_did_chkspace(setno);
11390Sstevel@tonic-gate
11400Sstevel@tonic-gate /* add MDDB_DEVID_CONV_PERC% */
11410Sstevel@tonic-gate if ((md_conv_perc > 0) && (md_conv_perc <= 100)) {
11420Sstevel@tonic-gate cvt_blks = cvt_blks * (100 + md_conv_perc) / 100;
11430Sstevel@tonic-gate }
11440Sstevel@tonic-gate
11450Sstevel@tonic-gate if (cvt_blks <= s->s_freeblkcnt) {
11460Sstevel@tonic-gate if (mddb_lb_did_convert(s, 1, &cvt_blks) != 0) {
11470Sstevel@tonic-gate if (ep != NULL)
11480Sstevel@tonic-gate (void) mdsyserror(ep, EIO);
11490Sstevel@tonic-gate err = -1;
11500Sstevel@tonic-gate goto out;
11510Sstevel@tonic-gate }
11520Sstevel@tonic-gate
11530Sstevel@tonic-gate } else {
11540Sstevel@tonic-gate /*
11550Sstevel@tonic-gate * Print message that replica can't be converted for
11560Sstevel@tonic-gate * lack of space. No failure - just continue to
11570Sstevel@tonic-gate * run without device ids.
11580Sstevel@tonic-gate */
11590Sstevel@tonic-gate cmn_err(CE_WARN,
11600Sstevel@tonic-gate "Unable to add Solaris Volume Manager device "
11610Sstevel@tonic-gate "relocation data.\n"
11620Sstevel@tonic-gate " To use device relocation feature:\n"
11630Sstevel@tonic-gate " - Increase size of listed replicas\n"
11640Sstevel@tonic-gate " - Reboot");
11650Sstevel@tonic-gate md_print_block_usage(s, cvt_blks);
11660Sstevel@tonic-gate cmn_err(CE_WARN,
11670Sstevel@tonic-gate "Loading set without device relocation data.\n"
11680Sstevel@tonic-gate " Solaris Volume Manager disk movement "
11690Sstevel@tonic-gate "not tracked in local set.");
11700Sstevel@tonic-gate }
11710Sstevel@tonic-gate }
11720Sstevel@tonic-gate
11730Sstevel@tonic-gate /*
11740Sstevel@tonic-gate * go through and load any modules referenced in
11750Sstevel@tonic-gate * data base
11760Sstevel@tonic-gate */
11770Sstevel@tonic-gate recid = mddb_makerecid(setno, 0);
11780Sstevel@tonic-gate while ((recid = mddb_getnextrec(recid, MDDB_ALL, 0)) > 0) {
11790Sstevel@tonic-gate status = mddb_getrecstatus(recid);
11800Sstevel@tonic-gate if (status == MDDB_STALE) {
11810Sstevel@tonic-gate if (! (md_get_setstatus(setno) & MD_SET_STALE)) {
11820Sstevel@tonic-gate md_set_setstatus(setno, MD_SET_STALE);
11830Sstevel@tonic-gate cmn_err(CE_WARN,
11840Sstevel@tonic-gate "md: state database is stale");
11850Sstevel@tonic-gate }
11860Sstevel@tonic-gate } else if (status == MDDB_NODATA) {
11870Sstevel@tonic-gate mddb_setrecprivate(recid, MD_PRV_PENDDEL);
11880Sstevel@tonic-gate continue;
11890Sstevel@tonic-gate }
11900Sstevel@tonic-gate drvrid = mddb_getrectype1(recid);
11910Sstevel@tonic-gate if (drvrid < MDDB_FIRST_MODID)
11920Sstevel@tonic-gate continue;
11930Sstevel@tonic-gate if (md_loadsubmod(setno, md_getshared_name(setno, drvrid),
11940Sstevel@tonic-gate drvrid) < 0) {
11950Sstevel@tonic-gate cmn_err(CE_NOTE, "md: could not load misc/%s",
11967627SChris.Horne@Sun.COM md_getshared_name(setno, drvrid));
11970Sstevel@tonic-gate }
11980Sstevel@tonic-gate }
11990Sstevel@tonic-gate
12000Sstevel@tonic-gate if (recid < 0)
12010Sstevel@tonic-gate goto out;
12020Sstevel@tonic-gate
12030Sstevel@tonic-gate snarf_user_data(setno);
12040Sstevel@tonic-gate
12050Sstevel@tonic-gate /*
12060Sstevel@tonic-gate * Initialize the md_nm_snarfed array
12070Sstevel@tonic-gate * this array is indexed by the key and
12080Sstevel@tonic-gate * is set by md_getdevnum during the snarf time
12090Sstevel@tonic-gate */
12100Sstevel@tonic-gate if ((nh = get_first_record(setno, 0, NM_NOTSHARED)) != NULL) {
12110Sstevel@tonic-gate size = (int)((((struct nm_rec_hdr *)nh->nmn_record)->
12120Sstevel@tonic-gate r_next_key) * (sizeof (int)));
12130Sstevel@tonic-gate md_nm_snarfed = (int *)kmem_zalloc(size, KM_SLEEP);
12140Sstevel@tonic-gate }
12150Sstevel@tonic-gate
12160Sstevel@tonic-gate /*
12170Sstevel@tonic-gate * go through and snarf until nothing gets added
12180Sstevel@tonic-gate */
12190Sstevel@tonic-gate do {
12200Sstevel@tonic-gate i = 0;
12210Sstevel@tonic-gate for (ops = md_opslist; ops != NULL; ops = ops->md_next) {
12220Sstevel@tonic-gate if (ops->md_snarf != NULL) {
12230Sstevel@tonic-gate retval = ops->md_snarf(MD_SNARF_DOIT, setno);
12240Sstevel@tonic-gate if (retval == -1) {
12250Sstevel@tonic-gate err = -1;
12260Sstevel@tonic-gate /* Don't know the failed unit */
12270Sstevel@tonic-gate (void) mdmderror(ep, MDE_RR_ALLOC_ERROR,
12280Sstevel@tonic-gate 0);
12290Sstevel@tonic-gate (void) md_halt_set(setno, MD_HALT_ALL);
12300Sstevel@tonic-gate (void) mddb_unload_set(setno);
12310Sstevel@tonic-gate md_haltsnarf_exit(setno);
12320Sstevel@tonic-gate return (err);
12330Sstevel@tonic-gate } else {
12340Sstevel@tonic-gate i += retval;
12350Sstevel@tonic-gate }
12360Sstevel@tonic-gate }
12370Sstevel@tonic-gate }
12380Sstevel@tonic-gate } while (i);
12390Sstevel@tonic-gate
12401623Stw21770 /*
12411623Stw21770 * Set the first available slot and availability
12421623Stw21770 */
12431623Stw21770 md_set[setno].s_un_avail = 0;
12441623Stw21770 for (un = 0; un < MD_MAXUNITS; un++) {
12451623Stw21770 if (md_set[setno].s_un[un] != NULL) {
12461623Stw21770 continue;
12471623Stw21770 } else {
12481623Stw21770 if (!un_next_set) {
12491623Stw21770 md_set[setno].s_un_next = un;
12501623Stw21770 un_next_set = 1;
12511623Stw21770 }
12521623Stw21770 md_set[setno].s_un_avail++;
12531623Stw21770 }
12541623Stw21770 }
12551623Stw21770
12560Sstevel@tonic-gate md_set_setstatus(setno, MD_SET_SNARFED);
12570Sstevel@tonic-gate
12580Sstevel@tonic-gate recid = mddb_makerecid(setno, 0);
12590Sstevel@tonic-gate while ((recid = mddb_getnextrec(recid, MDDB_ALL, 0)) > 0) {
12600Sstevel@tonic-gate privat = mddb_getrecprivate(recid);
12610Sstevel@tonic-gate if (privat & MD_PRV_COMMIT) {
12620Sstevel@tonic-gate if (mddb_commitrec(recid)) {
12630Sstevel@tonic-gate if (!(md_get_setstatus(setno) & MD_SET_STALE)) {
12640Sstevel@tonic-gate md_set_setstatus(setno, MD_SET_STALE);
12650Sstevel@tonic-gate cmn_err(CE_WARN,
12660Sstevel@tonic-gate "md: state database is stale");
12670Sstevel@tonic-gate }
12680Sstevel@tonic-gate }
12690Sstevel@tonic-gate mddb_setrecprivate(recid, MD_PRV_GOTIT);
12700Sstevel@tonic-gate }
12710Sstevel@tonic-gate }
12720Sstevel@tonic-gate
12730Sstevel@tonic-gate /* Deletes must happen after all the commits */
12740Sstevel@tonic-gate recid = mddb_makerecid(setno, 0);
12750Sstevel@tonic-gate while ((recid = mddb_getnextrec(recid, MDDB_ALL, 0)) > 0) {
12760Sstevel@tonic-gate privat = mddb_getrecprivate(recid);
12770Sstevel@tonic-gate if (privat & MD_PRV_DELETE) {
12780Sstevel@tonic-gate if (mddb_deleterec(recid)) {
12790Sstevel@tonic-gate if (!(md_get_setstatus(setno) & MD_SET_STALE)) {
12800Sstevel@tonic-gate md_set_setstatus(setno, MD_SET_STALE);
12810Sstevel@tonic-gate cmn_err(CE_WARN,
12820Sstevel@tonic-gate "md: state database is stale");
12830Sstevel@tonic-gate }
12840Sstevel@tonic-gate mddb_setrecprivate(recid, MD_PRV_GOTIT);
12850Sstevel@tonic-gate }
12860Sstevel@tonic-gate recid = mddb_makerecid(setno, 0);
12870Sstevel@tonic-gate }
12880Sstevel@tonic-gate }
12890Sstevel@tonic-gate
12900Sstevel@tonic-gate /*
12910Sstevel@tonic-gate * go through and clean up records until nothing gets cleaned up.
12920Sstevel@tonic-gate */
12930Sstevel@tonic-gate do {
12940Sstevel@tonic-gate i = 0;
12950Sstevel@tonic-gate for (ops = md_opslist; ops != NULL; ops = ops->md_next)
12960Sstevel@tonic-gate if (ops->md_snarf != NULL)
12970Sstevel@tonic-gate i += ops->md_snarf(MD_SNARF_CLEANUP, setno);
12980Sstevel@tonic-gate } while (i);
12990Sstevel@tonic-gate
13000Sstevel@tonic-gate if (md_nm_snarfed != NULL &&
13010Sstevel@tonic-gate !(md_get_setstatus(setno) & MD_SET_STALE)) {
13020Sstevel@tonic-gate /*
13030Sstevel@tonic-gate * go thru and cleanup the namespace and the device id
13040Sstevel@tonic-gate * name space
13050Sstevel@tonic-gate */
13060Sstevel@tonic-gate for (key = 1;
13070Sstevel@tonic-gate key < ((struct nm_rec_hdr *)nh->nmn_record)->r_next_key;
13080Sstevel@tonic-gate key++) {
13090Sstevel@tonic-gate /*
13100Sstevel@tonic-gate * Is the entry an 'orphan'?
13110Sstevel@tonic-gate */
13120Sstevel@tonic-gate if (lookup_entry(nh, setno, side, key, NODEV64, 0L) !=
13130Sstevel@tonic-gate NULL) {
13140Sstevel@tonic-gate /*
13150Sstevel@tonic-gate * If the value is not set then apparently
13160Sstevel@tonic-gate * it is not part of the current configuration,
13170Sstevel@tonic-gate * remove it this can happen when system panic
13180Sstevel@tonic-gate * between the primary name space update and
13190Sstevel@tonic-gate * the device id name space update
13200Sstevel@tonic-gate */
13210Sstevel@tonic-gate if (md_nm_snarfed[key] == 0) {
13220Sstevel@tonic-gate if (md_verify_orphaned_record(setno,
13230Sstevel@tonic-gate key) == 1)
13240Sstevel@tonic-gate (void) remove_entry(nh,
13250Sstevel@tonic-gate side, key, 0L);
13260Sstevel@tonic-gate }
13270Sstevel@tonic-gate }
13280Sstevel@tonic-gate }
13290Sstevel@tonic-gate }
13300Sstevel@tonic-gate
13310Sstevel@tonic-gate if (md_nm_snarfed != NULL) {
13320Sstevel@tonic-gate /*
13330Sstevel@tonic-gate * Done and free the memory
13340Sstevel@tonic-gate */
13350Sstevel@tonic-gate kmem_free(md_nm_snarfed, size);
13360Sstevel@tonic-gate md_nm_snarfed = NULL;
13370Sstevel@tonic-gate }
13380Sstevel@tonic-gate
13390Sstevel@tonic-gate if (s->s_lbp->lb_flags & MDDB_DEVID_STYLE &&
13400Sstevel@tonic-gate !(md_get_setstatus(setno) & MD_SET_STALE)) {
13410Sstevel@tonic-gate /*
13420Sstevel@tonic-gate * if the destroy flag has been set and
13430Sstevel@tonic-gate * the MD_SET_DIDCLUP bit is not set in
13440Sstevel@tonic-gate * the set's status field, cleanup the
13450Sstevel@tonic-gate * entire device id namespace
13460Sstevel@tonic-gate */
13470Sstevel@tonic-gate if (md_devid_destroy &&
13480Sstevel@tonic-gate !(md_get_setstatus(setno) & MD_SET_DIDCLUP)) {
13490Sstevel@tonic-gate (void) md_devid_cleanup(setno, 1);
13500Sstevel@tonic-gate md_set_setstatus(setno, MD_SET_DIDCLUP);
13510Sstevel@tonic-gate } else
13520Sstevel@tonic-gate (void) md_devid_cleanup(setno, 0);
13530Sstevel@tonic-gate }
13540Sstevel@tonic-gate
13550Sstevel@tonic-gate /*
13560Sstevel@tonic-gate * clear single threading on snarf, return success or error
13570Sstevel@tonic-gate */
13580Sstevel@tonic-gate out:
13590Sstevel@tonic-gate md_haltsnarf_exit(setno);
13600Sstevel@tonic-gate return (err);
13610Sstevel@tonic-gate }
13620Sstevel@tonic-gate
13630Sstevel@tonic-gate void
get_minfo(struct dk_minfo * info,minor_t mnum)13640Sstevel@tonic-gate get_minfo(struct dk_minfo *info, minor_t mnum)
13650Sstevel@tonic-gate {
13660Sstevel@tonic-gate md_unit_t *un;
13670Sstevel@tonic-gate mdi_unit_t *ui;
13680Sstevel@tonic-gate
13690Sstevel@tonic-gate info->dki_capacity = 0;
13700Sstevel@tonic-gate info->dki_lbsize = 0;
13710Sstevel@tonic-gate info->dki_media_type = 0;
13720Sstevel@tonic-gate
13730Sstevel@tonic-gate if ((ui = MDI_UNIT(mnum)) == NULL) {
13740Sstevel@tonic-gate return;
13750Sstevel@tonic-gate }
13760Sstevel@tonic-gate un = (md_unit_t *)md_unit_readerlock(ui);
13770Sstevel@tonic-gate info->dki_capacity = un->c.un_total_blocks;
13780Sstevel@tonic-gate md_unit_readerexit(ui);
13790Sstevel@tonic-gate info->dki_lbsize = DEV_BSIZE;
13800Sstevel@tonic-gate info->dki_media_type = DK_UNKNOWN;
13810Sstevel@tonic-gate }
13820Sstevel@tonic-gate
13830Sstevel@tonic-gate
13840Sstevel@tonic-gate void
get_info(struct dk_cinfo * info,minor_t mnum)13850Sstevel@tonic-gate get_info(struct dk_cinfo *info, minor_t mnum)
13860Sstevel@tonic-gate {
13870Sstevel@tonic-gate /*
13880Sstevel@tonic-gate * Controller Information
13890Sstevel@tonic-gate */
13900Sstevel@tonic-gate info->dki_ctype = DKC_MD;
13910Sstevel@tonic-gate info->dki_cnum = ddi_get_instance(ddi_get_parent(md_devinfo));
13920Sstevel@tonic-gate (void) strcpy(info->dki_cname,
13930Sstevel@tonic-gate ddi_get_name(ddi_get_parent(md_devinfo)));
13940Sstevel@tonic-gate /*
13950Sstevel@tonic-gate * Unit Information
13960Sstevel@tonic-gate */
13970Sstevel@tonic-gate info->dki_unit = mnum;
13980Sstevel@tonic-gate info->dki_slave = 0;
13990Sstevel@tonic-gate (void) strcpy(info->dki_dname, ddi_driver_name(md_devinfo));
14000Sstevel@tonic-gate info->dki_flags = 0;
14010Sstevel@tonic-gate info->dki_partition = 0;
14020Sstevel@tonic-gate info->dki_maxtransfer = (ushort_t)(md_maxphys / DEV_BSIZE);
14030Sstevel@tonic-gate
14040Sstevel@tonic-gate /*
14050Sstevel@tonic-gate * We can't get from here to there yet
14060Sstevel@tonic-gate */
14070Sstevel@tonic-gate info->dki_addr = 0;
14080Sstevel@tonic-gate info->dki_space = 0;
14090Sstevel@tonic-gate info->dki_prio = 0;
14100Sstevel@tonic-gate info->dki_vec = 0;
14110Sstevel@tonic-gate }
14120Sstevel@tonic-gate
14130Sstevel@tonic-gate /*
14140Sstevel@tonic-gate * open admin device
14150Sstevel@tonic-gate */
14160Sstevel@tonic-gate static int
mdadminopen(int flag,int otyp)14170Sstevel@tonic-gate mdadminopen(
14180Sstevel@tonic-gate int flag,
14190Sstevel@tonic-gate int otyp)
14200Sstevel@tonic-gate {
14210Sstevel@tonic-gate int err = 0;
14220Sstevel@tonic-gate
14230Sstevel@tonic-gate /* single thread */
14240Sstevel@tonic-gate mutex_enter(&md_mx);
14250Sstevel@tonic-gate
14260Sstevel@tonic-gate /* check type and flags */
14270Sstevel@tonic-gate if ((otyp != OTYP_CHR) && (otyp != OTYP_LYR)) {
14280Sstevel@tonic-gate err = EINVAL;
14290Sstevel@tonic-gate goto out;
14300Sstevel@tonic-gate }
14310Sstevel@tonic-gate if (((flag & FEXCL) && (md_status & MD_GBL_OPEN)) ||
14320Sstevel@tonic-gate (md_status & MD_GBL_EXCL)) {
14330Sstevel@tonic-gate err = EBUSY;
14340Sstevel@tonic-gate goto out;
14350Sstevel@tonic-gate }
14360Sstevel@tonic-gate
14370Sstevel@tonic-gate /* count and flag open */
14380Sstevel@tonic-gate md_ocnt[otyp]++;
14390Sstevel@tonic-gate md_status |= MD_GBL_OPEN;
14400Sstevel@tonic-gate if (flag & FEXCL)
14410Sstevel@tonic-gate md_status |= MD_GBL_EXCL;
14420Sstevel@tonic-gate
14430Sstevel@tonic-gate /* unlock return success */
14440Sstevel@tonic-gate out:
14450Sstevel@tonic-gate mutex_exit(&md_mx);
14460Sstevel@tonic-gate return (err);
14470Sstevel@tonic-gate }
14480Sstevel@tonic-gate
14490Sstevel@tonic-gate /*
14500Sstevel@tonic-gate * open entry point
14510Sstevel@tonic-gate */
14520Sstevel@tonic-gate static int
mdopen(dev_t * dev,int flag,int otyp,cred_t * cred_p)14530Sstevel@tonic-gate mdopen(
14540Sstevel@tonic-gate dev_t *dev,
14550Sstevel@tonic-gate int flag,
14560Sstevel@tonic-gate int otyp,
14570Sstevel@tonic-gate cred_t *cred_p)
14580Sstevel@tonic-gate {
14590Sstevel@tonic-gate minor_t mnum = getminor(*dev);
14600Sstevel@tonic-gate unit_t unit = MD_MIN2UNIT(mnum);
14610Sstevel@tonic-gate set_t setno = MD_MIN2SET(mnum);
14620Sstevel@tonic-gate mdi_unit_t *ui = NULL;
14630Sstevel@tonic-gate int err = 0;
14640Sstevel@tonic-gate md_parent_t parent;
14650Sstevel@tonic-gate
14660Sstevel@tonic-gate /* dispatch admin device opens */
14670Sstevel@tonic-gate if (mnum == MD_ADM_MINOR)
14680Sstevel@tonic-gate return (mdadminopen(flag, otyp));
14690Sstevel@tonic-gate
14700Sstevel@tonic-gate /* lock, check status */
14710Sstevel@tonic-gate rw_enter(&md_unit_array_rw.lock, RW_READER);
14720Sstevel@tonic-gate
14730Sstevel@tonic-gate tryagain:
14740Sstevel@tonic-gate if (md_get_status() & MD_GBL_HALTED) {
14750Sstevel@tonic-gate err = ENODEV;
14760Sstevel@tonic-gate goto out;
14770Sstevel@tonic-gate }
14780Sstevel@tonic-gate
14790Sstevel@tonic-gate /* check minor */
14800Sstevel@tonic-gate if ((setno >= md_nsets) || (unit >= md_nunits)) {
14810Sstevel@tonic-gate err = ENXIO;
14820Sstevel@tonic-gate goto out;
14830Sstevel@tonic-gate }
14840Sstevel@tonic-gate
14850Sstevel@tonic-gate /* make sure we're snarfed */
14860Sstevel@tonic-gate if ((md_get_setstatus(MD_LOCAL_SET) & MD_SET_SNARFED) == 0) {
14870Sstevel@tonic-gate if (md_snarf_db_set(MD_LOCAL_SET, NULL) != 0) {
14880Sstevel@tonic-gate err = ENODEV;
14890Sstevel@tonic-gate goto out;
14900Sstevel@tonic-gate }
14910Sstevel@tonic-gate }
14920Sstevel@tonic-gate if ((md_get_setstatus(setno) & MD_SET_SNARFED) == 0) {
14930Sstevel@tonic-gate err = ENODEV;
14940Sstevel@tonic-gate goto out;
14950Sstevel@tonic-gate }
14960Sstevel@tonic-gate
14970Sstevel@tonic-gate /* check unit */
14980Sstevel@tonic-gate if ((ui = MDI_UNIT(mnum)) == NULL) {
14990Sstevel@tonic-gate err = ENXIO;
15000Sstevel@tonic-gate goto out;
15010Sstevel@tonic-gate }
15020Sstevel@tonic-gate
15030Sstevel@tonic-gate /*
15040Sstevel@tonic-gate * The softpart open routine may do an I/O during the open, in
15050Sstevel@tonic-gate * which case the open routine will set the OPENINPROGRESS flag
15060Sstevel@tonic-gate * and drop all locks during the I/O. If this thread sees
15070Sstevel@tonic-gate * the OPENINPROGRESS flag set, if should wait until the flag
15080Sstevel@tonic-gate * is reset before calling the driver's open routine. It must
15090Sstevel@tonic-gate * also revalidate the world after it grabs the unit_array lock
15100Sstevel@tonic-gate * since the set may have been released or the metadevice cleared
15110Sstevel@tonic-gate * during the sleep.
15120Sstevel@tonic-gate */
15130Sstevel@tonic-gate if (MD_MNSET_SETNO(setno)) {
15140Sstevel@tonic-gate mutex_enter(&ui->ui_mx);
15150Sstevel@tonic-gate if (ui->ui_lock & MD_UL_OPENINPROGRESS) {
15160Sstevel@tonic-gate rw_exit(&md_unit_array_rw.lock);
15170Sstevel@tonic-gate cv_wait(&ui->ui_cv, &ui->ui_mx);
15180Sstevel@tonic-gate rw_enter(&md_unit_array_rw.lock, RW_READER);
15190Sstevel@tonic-gate mutex_exit(&ui->ui_mx);
15200Sstevel@tonic-gate goto tryagain;
15210Sstevel@tonic-gate }
15220Sstevel@tonic-gate mutex_exit(&ui->ui_mx);
15230Sstevel@tonic-gate }
15240Sstevel@tonic-gate
15250Sstevel@tonic-gate /* Test if device is openable */
15260Sstevel@tonic-gate if ((ui->ui_tstate & MD_NOTOPENABLE) != 0) {
15270Sstevel@tonic-gate err = ENXIO;
15280Sstevel@tonic-gate goto out;
15290Sstevel@tonic-gate }
15300Sstevel@tonic-gate
15310Sstevel@tonic-gate /* don't allow opens w/WRITE flag if stale */
15320Sstevel@tonic-gate if ((flag & FWRITE) && (md_get_setstatus(setno) & MD_SET_STALE)) {
15330Sstevel@tonic-gate err = EROFS;
15340Sstevel@tonic-gate goto out;
15350Sstevel@tonic-gate }
15360Sstevel@tonic-gate
15370Sstevel@tonic-gate /* don't allow writes to subdevices */
15380Sstevel@tonic-gate parent = md_get_parent(md_expldev(*dev));
15390Sstevel@tonic-gate if ((flag & FWRITE) && MD_HAS_PARENT(parent)) {
15400Sstevel@tonic-gate err = EROFS;
15410Sstevel@tonic-gate goto out;
15420Sstevel@tonic-gate }
15430Sstevel@tonic-gate
15440Sstevel@tonic-gate /* open underlying driver */
15450Sstevel@tonic-gate if (md_ops[ui->ui_opsindex]->md_open != NULL) {
15460Sstevel@tonic-gate if ((err = (*md_ops[ui->ui_opsindex]->md_open)
15470Sstevel@tonic-gate (dev, flag, otyp, cred_p, 0)) != 0)
15480Sstevel@tonic-gate goto out;
15490Sstevel@tonic-gate }
15500Sstevel@tonic-gate
15510Sstevel@tonic-gate /* or do it ourselves */
15520Sstevel@tonic-gate else {
15530Sstevel@tonic-gate /* single thread */
15540Sstevel@tonic-gate (void) md_unit_openclose_enter(ui);
15550Sstevel@tonic-gate err = md_unit_incopen(mnum, flag, otyp);
15560Sstevel@tonic-gate md_unit_openclose_exit(ui);
15570Sstevel@tonic-gate if (err != 0)
15580Sstevel@tonic-gate goto out;
15590Sstevel@tonic-gate }
15600Sstevel@tonic-gate
15610Sstevel@tonic-gate /* unlock, return status */
15620Sstevel@tonic-gate out:
15630Sstevel@tonic-gate rw_exit(&md_unit_array_rw.lock);
15640Sstevel@tonic-gate return (err);
15650Sstevel@tonic-gate }
15660Sstevel@tonic-gate
15670Sstevel@tonic-gate /*
15680Sstevel@tonic-gate * close admin device
15690Sstevel@tonic-gate */
15700Sstevel@tonic-gate static int
mdadminclose(int otyp)15710Sstevel@tonic-gate mdadminclose(
15720Sstevel@tonic-gate int otyp)
15730Sstevel@tonic-gate {
15740Sstevel@tonic-gate int i;
15750Sstevel@tonic-gate int err = 0;
15760Sstevel@tonic-gate
15770Sstevel@tonic-gate /* single thread */
15780Sstevel@tonic-gate mutex_enter(&md_mx);
15790Sstevel@tonic-gate
15800Sstevel@tonic-gate /* check type and flags */
15810Sstevel@tonic-gate if ((otyp < 0) || (otyp >= OTYPCNT)) {
15820Sstevel@tonic-gate err = EINVAL;
15830Sstevel@tonic-gate goto out;
15840Sstevel@tonic-gate } else if (md_ocnt[otyp] == 0) {
15850Sstevel@tonic-gate err = ENXIO;
15860Sstevel@tonic-gate goto out;
15870Sstevel@tonic-gate }
15880Sstevel@tonic-gate
15890Sstevel@tonic-gate /* count and flag closed */
15900Sstevel@tonic-gate if (otyp == OTYP_LYR)
15910Sstevel@tonic-gate md_ocnt[otyp]--;
15920Sstevel@tonic-gate else
15930Sstevel@tonic-gate md_ocnt[otyp] = 0;
15940Sstevel@tonic-gate md_status &= ~MD_GBL_OPEN;
15950Sstevel@tonic-gate for (i = 0; (i < OTYPCNT); ++i)
15960Sstevel@tonic-gate if (md_ocnt[i] != 0)
15970Sstevel@tonic-gate md_status |= MD_GBL_OPEN;
15980Sstevel@tonic-gate if (! (md_status & MD_GBL_OPEN))
15990Sstevel@tonic-gate md_status &= ~MD_GBL_EXCL;
16000Sstevel@tonic-gate
16010Sstevel@tonic-gate /* unlock return success */
16020Sstevel@tonic-gate out:
16030Sstevel@tonic-gate mutex_exit(&md_mx);
16040Sstevel@tonic-gate return (err);
16050Sstevel@tonic-gate }
16060Sstevel@tonic-gate
16070Sstevel@tonic-gate /*
16080Sstevel@tonic-gate * close entry point
16090Sstevel@tonic-gate */
16100Sstevel@tonic-gate static int
mdclose(dev_t dev,int flag,int otyp,cred_t * cred_p)16110Sstevel@tonic-gate mdclose(
16120Sstevel@tonic-gate dev_t dev,
16130Sstevel@tonic-gate int flag,
16140Sstevel@tonic-gate int otyp,
16150Sstevel@tonic-gate cred_t *cred_p)
16160Sstevel@tonic-gate {
16170Sstevel@tonic-gate minor_t mnum = getminor(dev);
16180Sstevel@tonic-gate set_t setno = MD_MIN2SET(mnum);
16190Sstevel@tonic-gate unit_t unit = MD_MIN2UNIT(mnum);
16200Sstevel@tonic-gate mdi_unit_t *ui = NULL;
16210Sstevel@tonic-gate int err = 0;
16220Sstevel@tonic-gate
16230Sstevel@tonic-gate /* dispatch admin device closes */
16240Sstevel@tonic-gate if (mnum == MD_ADM_MINOR)
16250Sstevel@tonic-gate return (mdadminclose(otyp));
16260Sstevel@tonic-gate
16270Sstevel@tonic-gate /* check minor */
16280Sstevel@tonic-gate if ((setno >= md_nsets) || (unit >= md_nunits) ||
16290Sstevel@tonic-gate ((ui = MDI_UNIT(mnum)) == NULL)) {
16300Sstevel@tonic-gate err = ENXIO;
16310Sstevel@tonic-gate goto out;
16320Sstevel@tonic-gate }
16330Sstevel@tonic-gate
16340Sstevel@tonic-gate /* close underlying driver */
16350Sstevel@tonic-gate if (md_ops[ui->ui_opsindex]->md_close != NULL) {
16360Sstevel@tonic-gate if ((err = (*md_ops[ui->ui_opsindex]->md_close)
16370Sstevel@tonic-gate (dev, flag, otyp, cred_p, 0)) != 0)
16380Sstevel@tonic-gate goto out;
16390Sstevel@tonic-gate }
16400Sstevel@tonic-gate
16410Sstevel@tonic-gate /* or do it ourselves */
16420Sstevel@tonic-gate else {
16430Sstevel@tonic-gate /* single thread */
16440Sstevel@tonic-gate (void) md_unit_openclose_enter(ui);
16450Sstevel@tonic-gate err = md_unit_decopen(mnum, otyp);
16460Sstevel@tonic-gate md_unit_openclose_exit(ui);
16470Sstevel@tonic-gate if (err != 0)
16480Sstevel@tonic-gate goto out;
16490Sstevel@tonic-gate }
16500Sstevel@tonic-gate
16510Sstevel@tonic-gate /* return success */
16520Sstevel@tonic-gate out:
16530Sstevel@tonic-gate return (err);
16540Sstevel@tonic-gate }
16550Sstevel@tonic-gate
16560Sstevel@tonic-gate
16570Sstevel@tonic-gate /*
16580Sstevel@tonic-gate * This routine performs raw read operations. It is called from the
16590Sstevel@tonic-gate * device switch at normal priority.
16600Sstevel@tonic-gate *
16610Sstevel@tonic-gate * The main catch is that the *uio struct which is passed to us may
16620Sstevel@tonic-gate * specify a read which spans two buffers, which would be contiguous
16630Sstevel@tonic-gate * on a single partition, but not on a striped partition. This will
16640Sstevel@tonic-gate * be handled by mdstrategy.
16650Sstevel@tonic-gate */
16660Sstevel@tonic-gate /*ARGSUSED*/
16670Sstevel@tonic-gate static int
mdread(dev_t dev,struct uio * uio,cred_t * credp)16680Sstevel@tonic-gate mdread(dev_t dev, struct uio *uio, cred_t *credp)
16690Sstevel@tonic-gate {
16700Sstevel@tonic-gate minor_t mnum;
16710Sstevel@tonic-gate mdi_unit_t *ui;
16720Sstevel@tonic-gate int error;
16730Sstevel@tonic-gate
16740Sstevel@tonic-gate if (((mnum = getminor(dev)) == MD_ADM_MINOR) ||
16750Sstevel@tonic-gate (MD_MIN2SET(mnum) >= md_nsets) ||
16760Sstevel@tonic-gate (MD_MIN2UNIT(mnum) >= md_nunits) ||
16770Sstevel@tonic-gate ((ui = MDI_UNIT(mnum)) == NULL))
16780Sstevel@tonic-gate return (ENXIO);
16790Sstevel@tonic-gate
16800Sstevel@tonic-gate if (md_ops[ui->ui_opsindex]->md_read != NULL)
16810Sstevel@tonic-gate return ((*md_ops[ui->ui_opsindex]->md_read)
16820Sstevel@tonic-gate (dev, uio, credp));
16830Sstevel@tonic-gate
16840Sstevel@tonic-gate if ((error = md_chk_uio(uio)) != 0)
16850Sstevel@tonic-gate return (error);
16860Sstevel@tonic-gate
16870Sstevel@tonic-gate return (physio(mdstrategy, NULL, dev, B_READ, md_minphys, uio));
16880Sstevel@tonic-gate }
16890Sstevel@tonic-gate
16900Sstevel@tonic-gate /*
16910Sstevel@tonic-gate * This routine performs async raw read operations. It is called from the
16920Sstevel@tonic-gate * device switch at normal priority.
16930Sstevel@tonic-gate *
16940Sstevel@tonic-gate * The main catch is that the *aio struct which is passed to us may
16950Sstevel@tonic-gate * specify a read which spans two buffers, which would be contiguous
16960Sstevel@tonic-gate * on a single partition, but not on a striped partition. This will
16970Sstevel@tonic-gate * be handled by mdstrategy.
16980Sstevel@tonic-gate */
16990Sstevel@tonic-gate /*ARGSUSED*/
17000Sstevel@tonic-gate static int
mdaread(dev_t dev,struct aio_req * aio,cred_t * credp)17010Sstevel@tonic-gate mdaread(dev_t dev, struct aio_req *aio, cred_t *credp)
17020Sstevel@tonic-gate {
17030Sstevel@tonic-gate minor_t mnum;
17040Sstevel@tonic-gate mdi_unit_t *ui;
17050Sstevel@tonic-gate int error;
17060Sstevel@tonic-gate
17070Sstevel@tonic-gate
17080Sstevel@tonic-gate if (((mnum = getminor(dev)) == MD_ADM_MINOR) ||
17090Sstevel@tonic-gate (MD_MIN2SET(mnum) >= md_nsets) ||
17100Sstevel@tonic-gate (MD_MIN2UNIT(mnum) >= md_nunits) ||
17110Sstevel@tonic-gate ((ui = MDI_UNIT(mnum)) == NULL))
17120Sstevel@tonic-gate return (ENXIO);
17130Sstevel@tonic-gate
17140Sstevel@tonic-gate if (md_ops[ui->ui_opsindex]->md_aread != NULL)
17150Sstevel@tonic-gate return ((*md_ops[ui->ui_opsindex]->md_aread)
17160Sstevel@tonic-gate (dev, aio, credp));
17170Sstevel@tonic-gate
17180Sstevel@tonic-gate if ((error = md_chk_uio(aio->aio_uio)) != 0)
17190Sstevel@tonic-gate return (error);
17200Sstevel@tonic-gate
17210Sstevel@tonic-gate return (aphysio(mdstrategy, anocancel, dev, B_READ, md_minphys, aio));
17220Sstevel@tonic-gate }
17230Sstevel@tonic-gate
17240Sstevel@tonic-gate /*
17250Sstevel@tonic-gate * This routine performs raw write operations. It is called from the
17260Sstevel@tonic-gate * device switch at normal priority.
17270Sstevel@tonic-gate *
17280Sstevel@tonic-gate * The main catch is that the *uio struct which is passed to us may
17290Sstevel@tonic-gate * specify a write which spans two buffers, which would be contiguous
17300Sstevel@tonic-gate * on a single partition, but not on a striped partition. This is
17310Sstevel@tonic-gate * handled by mdstrategy.
17320Sstevel@tonic-gate *
17330Sstevel@tonic-gate */
17340Sstevel@tonic-gate /*ARGSUSED*/
17350Sstevel@tonic-gate static int
mdwrite(dev_t dev,struct uio * uio,cred_t * credp)17360Sstevel@tonic-gate mdwrite(dev_t dev, struct uio *uio, cred_t *credp)
17370Sstevel@tonic-gate {
17380Sstevel@tonic-gate minor_t mnum;
17390Sstevel@tonic-gate mdi_unit_t *ui;
17400Sstevel@tonic-gate int error;
17410Sstevel@tonic-gate
17420Sstevel@tonic-gate if (((mnum = getminor(dev)) == MD_ADM_MINOR) ||
17430Sstevel@tonic-gate (MD_MIN2SET(mnum) >= md_nsets) ||
17440Sstevel@tonic-gate (MD_MIN2UNIT(mnum) >= md_nunits) ||
17450Sstevel@tonic-gate ((ui = MDI_UNIT(mnum)) == NULL))
17460Sstevel@tonic-gate return (ENXIO);
17470Sstevel@tonic-gate
17480Sstevel@tonic-gate if (md_ops[ui->ui_opsindex]->md_write != NULL)
17490Sstevel@tonic-gate return ((*md_ops[ui->ui_opsindex]->md_write)
17500Sstevel@tonic-gate (dev, uio, credp));
17510Sstevel@tonic-gate
17520Sstevel@tonic-gate if ((error = md_chk_uio(uio)) != 0)
17530Sstevel@tonic-gate return (error);
17540Sstevel@tonic-gate
17550Sstevel@tonic-gate return (physio(mdstrategy, NULL, dev, B_WRITE, md_minphys, uio));
17560Sstevel@tonic-gate }
17570Sstevel@tonic-gate
17580Sstevel@tonic-gate /*
17590Sstevel@tonic-gate * This routine performs async raw write operations. It is called from the
17600Sstevel@tonic-gate * device switch at normal priority.
17610Sstevel@tonic-gate *
17620Sstevel@tonic-gate * The main catch is that the *aio struct which is passed to us may
17630Sstevel@tonic-gate * specify a write which spans two buffers, which would be contiguous
17640Sstevel@tonic-gate * on a single partition, but not on a striped partition. This is
17650Sstevel@tonic-gate * handled by mdstrategy.
17660Sstevel@tonic-gate *
17670Sstevel@tonic-gate */
17680Sstevel@tonic-gate /*ARGSUSED*/
17690Sstevel@tonic-gate static int
mdawrite(dev_t dev,struct aio_req * aio,cred_t * credp)17700Sstevel@tonic-gate mdawrite(dev_t dev, struct aio_req *aio, cred_t *credp)
17710Sstevel@tonic-gate {
17720Sstevel@tonic-gate minor_t mnum;
17730Sstevel@tonic-gate mdi_unit_t *ui;
17740Sstevel@tonic-gate int error;
17750Sstevel@tonic-gate
17760Sstevel@tonic-gate
17770Sstevel@tonic-gate if (((mnum = getminor(dev)) == MD_ADM_MINOR) ||
17780Sstevel@tonic-gate (MD_MIN2SET(mnum) >= md_nsets) ||
17790Sstevel@tonic-gate (MD_MIN2UNIT(mnum) >= md_nunits) ||
17800Sstevel@tonic-gate ((ui = MDI_UNIT(mnum)) == NULL))
17810Sstevel@tonic-gate return (ENXIO);
17820Sstevel@tonic-gate
17830Sstevel@tonic-gate if (md_ops[ui->ui_opsindex]->md_awrite != NULL)
17840Sstevel@tonic-gate return ((*md_ops[ui->ui_opsindex]->md_awrite)
17850Sstevel@tonic-gate (dev, aio, credp));
17860Sstevel@tonic-gate
17870Sstevel@tonic-gate if ((error = md_chk_uio(aio->aio_uio)) != 0)
17880Sstevel@tonic-gate return (error);
17890Sstevel@tonic-gate
17900Sstevel@tonic-gate return (aphysio(mdstrategy, anocancel, dev, B_WRITE, md_minphys, aio));
17910Sstevel@tonic-gate }
17920Sstevel@tonic-gate
17930Sstevel@tonic-gate int
mdstrategy(struct buf * bp)17940Sstevel@tonic-gate mdstrategy(struct buf *bp)
17950Sstevel@tonic-gate {
17960Sstevel@tonic-gate minor_t mnum;
17970Sstevel@tonic-gate mdi_unit_t *ui;
17980Sstevel@tonic-gate
17990Sstevel@tonic-gate ASSERT((bp->b_flags & B_DONE) == 0);
18000Sstevel@tonic-gate
18010Sstevel@tonic-gate if (panicstr)
18020Sstevel@tonic-gate md_clr_status(MD_GBL_DAEMONS_LIVE);
18030Sstevel@tonic-gate
18040Sstevel@tonic-gate if (((mnum = getminor(bp->b_edev)) == MD_ADM_MINOR) ||
18050Sstevel@tonic-gate (MD_MIN2SET(mnum) >= md_nsets) ||
18060Sstevel@tonic-gate (MD_MIN2UNIT(mnum) >= md_nunits) ||
18070Sstevel@tonic-gate ((ui = MDI_UNIT(mnum)) == NULL)) {
18080Sstevel@tonic-gate bp->b_flags |= B_ERROR;
18090Sstevel@tonic-gate bp->b_error = ENXIO;
18100Sstevel@tonic-gate bp->b_resid = bp->b_bcount;
18110Sstevel@tonic-gate biodone(bp);
18120Sstevel@tonic-gate return (0);
18130Sstevel@tonic-gate }
18140Sstevel@tonic-gate
18150Sstevel@tonic-gate bp->b_flags &= ~(B_ERROR | B_DONE);
18160Sstevel@tonic-gate if (md_ops[ui->ui_opsindex]->md_strategy != NULL) {
18170Sstevel@tonic-gate (*md_ops[ui->ui_opsindex]->md_strategy) (bp, 0, NULL);
18180Sstevel@tonic-gate } else {
18190Sstevel@tonic-gate (void) errdone(ui, bp, ENXIO);
18200Sstevel@tonic-gate }
18210Sstevel@tonic-gate return (0);
18220Sstevel@tonic-gate }
18230Sstevel@tonic-gate
18240Sstevel@tonic-gate /*
18250Sstevel@tonic-gate * Return true if the ioctl is allowed to be multithreaded.
18260Sstevel@tonic-gate * All the ioctls with MN are sent only from the message handlers through
18270Sstevel@tonic-gate * rpc.mdcommd, which (via it's own locking mechanism) takes care that not two
18280Sstevel@tonic-gate * ioctl for the same metadevice are issued at the same time.
18290Sstevel@tonic-gate * So we are safe here.
18300Sstevel@tonic-gate * The other ioctls do not mess with any metadevice structures and therefor
18310Sstevel@tonic-gate * are harmless too, if called multiple times at the same time.
18320Sstevel@tonic-gate */
18330Sstevel@tonic-gate static boolean_t
is_mt_ioctl(int cmd)18340Sstevel@tonic-gate is_mt_ioctl(int cmd) {
18350Sstevel@tonic-gate
18360Sstevel@tonic-gate switch (cmd) {
18370Sstevel@tonic-gate case MD_IOCGUNIQMSGID:
18380Sstevel@tonic-gate case MD_IOCGVERSION:
18390Sstevel@tonic-gate case MD_IOCISOPEN:
18400Sstevel@tonic-gate case MD_MN_SET_MM_OWNER:
18410Sstevel@tonic-gate case MD_MN_SET_STATE:
18420Sstevel@tonic-gate case MD_MN_SUSPEND_WRITES:
18430Sstevel@tonic-gate case MD_MN_ALLOCATE_HOTSPARE:
18440Sstevel@tonic-gate case MD_MN_SET_SETFLAGS:
18450Sstevel@tonic-gate case MD_MN_GET_SETFLAGS:
18460Sstevel@tonic-gate case MD_MN_MDDB_OPTRECFIX:
18470Sstevel@tonic-gate case MD_MN_MDDB_PARSE:
18480Sstevel@tonic-gate case MD_MN_MDDB_BLOCK:
18490Sstevel@tonic-gate case MD_MN_DB_USERREQ:
18500Sstevel@tonic-gate case MD_IOC_SPSTATUS:
18510Sstevel@tonic-gate case MD_MN_COMMD_ERR:
18520Sstevel@tonic-gate case MD_MN_SET_COMMD_RUNNING:
18530Sstevel@tonic-gate case MD_MN_RESYNC:
18540Sstevel@tonic-gate case MD_MN_SETSYNC:
18550Sstevel@tonic-gate case MD_MN_POKE_HOTSPARES:
18568452SJohn.Wren.Kennedy@Sun.COM case MD_MN_RR_DIRTY:
18578452SJohn.Wren.Kennedy@Sun.COM case MD_MN_RR_CLEAN:
18588452SJohn.Wren.Kennedy@Sun.COM case MD_MN_IOC_SPUPDATEWM:
18590Sstevel@tonic-gate return (1);
18600Sstevel@tonic-gate default:
18610Sstevel@tonic-gate return (0);
18620Sstevel@tonic-gate }
18630Sstevel@tonic-gate }
18640Sstevel@tonic-gate
18650Sstevel@tonic-gate /*
18660Sstevel@tonic-gate * This routine implements the ioctl calls for the Virtual Disk System.
18670Sstevel@tonic-gate * It is called from the device switch at normal priority.
18680Sstevel@tonic-gate */
18690Sstevel@tonic-gate /* ARGSUSED */
18700Sstevel@tonic-gate static int
mdioctl(dev_t dev,int cmd,intptr_t data,int mode,cred_t * cred_p,int * rval_p)18710Sstevel@tonic-gate mdioctl(dev_t dev, int cmd, intptr_t data, int mode, cred_t *cred_p,
18720Sstevel@tonic-gate int *rval_p)
18730Sstevel@tonic-gate {
18740Sstevel@tonic-gate minor_t mnum = getminor(dev);
18750Sstevel@tonic-gate mdi_unit_t *ui;
18760Sstevel@tonic-gate IOLOCK lock;
18770Sstevel@tonic-gate int err;
18780Sstevel@tonic-gate
18790Sstevel@tonic-gate /*
18800Sstevel@tonic-gate * For multinode disksets number of ioctls are allowed to be
18810Sstevel@tonic-gate * multithreaded.
18820Sstevel@tonic-gate * A fundamental assumption made in this implementation is that
18830Sstevel@tonic-gate * ioctls either do not interact with other md structures or the
18840Sstevel@tonic-gate * ioctl to the admin device can only occur if the metadevice
18850Sstevel@tonic-gate * device is open. i.e. avoid a race between metaclear and the
18860Sstevel@tonic-gate * progress of a multithreaded ioctl.
18870Sstevel@tonic-gate */
18880Sstevel@tonic-gate
18890Sstevel@tonic-gate if (!is_mt_ioctl(cmd) && md_ioctl_lock_enter() == EINTR) {
18900Sstevel@tonic-gate return (EINTR);
18910Sstevel@tonic-gate }
18920Sstevel@tonic-gate
18930Sstevel@tonic-gate /*
18940Sstevel@tonic-gate * initialize lock tracker
18950Sstevel@tonic-gate */
18960Sstevel@tonic-gate IOLOCK_INIT(&lock);
18970Sstevel@tonic-gate
18980Sstevel@tonic-gate /* Flag to indicate that MD_GBL_IOCTL_LOCK is not acquired */
18990Sstevel@tonic-gate
19000Sstevel@tonic-gate if (is_mt_ioctl(cmd)) {
19010Sstevel@tonic-gate /* increment the md_mtioctl_cnt */
19020Sstevel@tonic-gate mutex_enter(&md_mx);
19030Sstevel@tonic-gate md_mtioctl_cnt++;
19040Sstevel@tonic-gate mutex_exit(&md_mx);
19050Sstevel@tonic-gate lock.l_flags |= MD_MT_IOCTL;
19060Sstevel@tonic-gate }
19070Sstevel@tonic-gate
19080Sstevel@tonic-gate /*
19090Sstevel@tonic-gate * this has been added to prevent notification from re-snarfing
19100Sstevel@tonic-gate * so metaunload will work. It may interfere with other modules
19110Sstevel@tonic-gate * halt process.
19120Sstevel@tonic-gate */
19130Sstevel@tonic-gate if (md_get_status() & (MD_GBL_HALTED | MD_GBL_DAEMONS_DIE))
19140Sstevel@tonic-gate return (IOLOCK_RETURN(ENXIO, &lock));
19150Sstevel@tonic-gate
19160Sstevel@tonic-gate /*
19170Sstevel@tonic-gate * admin device ioctls
19180Sstevel@tonic-gate */
19190Sstevel@tonic-gate if (mnum == MD_ADM_MINOR) {
19200Sstevel@tonic-gate err = md_admin_ioctl(md_expldev(dev), cmd, (void *) data,
19217627SChris.Horne@Sun.COM mode, &lock);
19220Sstevel@tonic-gate }
19230Sstevel@tonic-gate
19240Sstevel@tonic-gate /*
19250Sstevel@tonic-gate * metadevice ioctls
19260Sstevel@tonic-gate */
19270Sstevel@tonic-gate else if ((MD_MIN2SET(mnum) >= md_nsets) ||
19280Sstevel@tonic-gate (MD_MIN2UNIT(mnum) >= md_nunits) ||
19299753SAndrew.Balfour@Sun.COM (md_set[MD_MIN2SET(mnum)].s_ui == NULL) ||
19300Sstevel@tonic-gate ((ui = MDI_UNIT(mnum)) == NULL)) {
19310Sstevel@tonic-gate err = ENXIO;
19320Sstevel@tonic-gate } else if (md_ops[ui->ui_opsindex]->md_ioctl == NULL) {
19330Sstevel@tonic-gate err = ENOTTY;
19340Sstevel@tonic-gate } else {
19350Sstevel@tonic-gate err = (*md_ops[ui->ui_opsindex]->md_ioctl)
19360Sstevel@tonic-gate (dev, cmd, (void *) data, mode, &lock);
19370Sstevel@tonic-gate }
19380Sstevel@tonic-gate
19390Sstevel@tonic-gate /*
19400Sstevel@tonic-gate * drop any locks we grabbed
19410Sstevel@tonic-gate */
19420Sstevel@tonic-gate return (IOLOCK_RETURN_IOCTLEND(err, &lock));
19430Sstevel@tonic-gate }
19440Sstevel@tonic-gate
19450Sstevel@tonic-gate static int
mddump(dev_t dev,caddr_t addr,daddr_t blkno,int nblk)19460Sstevel@tonic-gate mddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
19470Sstevel@tonic-gate {
19480Sstevel@tonic-gate minor_t mnum;
19490Sstevel@tonic-gate set_t setno;
19500Sstevel@tonic-gate mdi_unit_t *ui;
19510Sstevel@tonic-gate
19520Sstevel@tonic-gate if ((mnum = getminor(dev)) == MD_ADM_MINOR)
19530Sstevel@tonic-gate return (ENXIO);
19540Sstevel@tonic-gate
19550Sstevel@tonic-gate setno = MD_MIN2SET(mnum);
19560Sstevel@tonic-gate
19570Sstevel@tonic-gate if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits) ||
19580Sstevel@tonic-gate ((ui = MDI_UNIT(mnum)) == NULL))
19590Sstevel@tonic-gate return (ENXIO);
19600Sstevel@tonic-gate
19610Sstevel@tonic-gate
19620Sstevel@tonic-gate if ((md_get_setstatus(setno) & MD_SET_SNARFED) == 0)
19630Sstevel@tonic-gate return (ENXIO);
19640Sstevel@tonic-gate
19650Sstevel@tonic-gate if (md_ops[ui->ui_opsindex]->md_dump != NULL)
19660Sstevel@tonic-gate return ((*md_ops[ui->ui_opsindex]->md_dump)
19670Sstevel@tonic-gate (dev, addr, blkno, nblk));
19680Sstevel@tonic-gate
19690Sstevel@tonic-gate return (ENXIO);
19700Sstevel@tonic-gate }
19711623Stw21770
19721623Stw21770 /*
19731623Stw21770 * Metadevice unit number dispatcher
19741623Stw21770 * When this routine is called it will scan the
19751623Stw21770 * incore unit array and return the avail slot
19761623Stw21770 * hence the unit number to the caller
19771623Stw21770 *
19781623Stw21770 * Return -1 if there is nothing available
19791623Stw21770 */
19801623Stw21770 unit_t
md_get_nextunit(set_t setno)19811623Stw21770 md_get_nextunit(set_t setno)
19821623Stw21770 {
19831623Stw21770 unit_t un, start;
19841623Stw21770
19851623Stw21770 /*
19861623Stw21770 * If nothing available
19871623Stw21770 */
19881623Stw21770 if (md_set[setno].s_un_avail == 0) {
19891623Stw21770 return (MD_UNITBAD);
19901623Stw21770 }
19911623Stw21770
19921623Stw21770 mutex_enter(&md_mx);
19931623Stw21770 start = un = md_set[setno].s_un_next;
19941623Stw21770
19951623Stw21770 /* LINTED: E_CONSTANT_CONDITION */
19961623Stw21770 while (1) {
19971623Stw21770 if (md_set[setno].s_un[un] == NULL) {
19981623Stw21770 /*
19991623Stw21770 * Advance the starting index for the next
20001623Stw21770 * md_get_nextunit call
20011623Stw21770 */
20021623Stw21770 if (un == MD_MAXUNITS - 1) {
20031623Stw21770 md_set[setno].s_un_next = 0;
20041623Stw21770 } else {
20051623Stw21770 md_set[setno].s_un_next = un + 1;
20061623Stw21770 }
20071623Stw21770 break;
20081623Stw21770 }
20091623Stw21770
20101623Stw21770 un = ((un == MD_MAXUNITS - 1) ? 0 : un + 1);
20111623Stw21770
20121623Stw21770 if (un == start) {
20131623Stw21770 un = MD_UNITBAD;
20141623Stw21770 break;
20151623Stw21770 }
20161623Stw21770
20171623Stw21770 }
20181623Stw21770
20191623Stw21770 mutex_exit(&md_mx);
20201623Stw21770 return (un);
20211623Stw21770 }
2022