10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 51366Spetede * Common Development and Distribution License (the "License"). 61366Spetede * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 217627SChris.Horne@Sun.COM 220Sstevel@tonic-gate /* 237627SChris.Horne@Sun.COM * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 240Sstevel@tonic-gate * Use is subject to license terms. 250Sstevel@tonic-gate */ 260Sstevel@tonic-gate 270Sstevel@tonic-gate /* 280Sstevel@tonic-gate * Md - is the meta-disk driver. It sits below the UFS file system 290Sstevel@tonic-gate * but above the 'real' disk drivers, xy, id, sd etc. 300Sstevel@tonic-gate * 310Sstevel@tonic-gate * To the UFS software, md looks like a normal driver, since it has 320Sstevel@tonic-gate * the normal kinds of entries in the bdevsw and cdevsw arrays. So 330Sstevel@tonic-gate * UFS accesses md in the usual ways. In particular, the strategy 340Sstevel@tonic-gate * routine, mdstrategy(), gets called by fbiwrite(), ufs_getapage(), 350Sstevel@tonic-gate * and ufs_writelbn(). 360Sstevel@tonic-gate * 370Sstevel@tonic-gate * Md maintains an array of minor devices (meta-partitions). Each 380Sstevel@tonic-gate * meta partition stands for a matrix of real partitions, in rows 390Sstevel@tonic-gate * which are not necessarily of equal length. Md maintains a table, 400Sstevel@tonic-gate * with one entry for each meta-partition, which lists the rows and 410Sstevel@tonic-gate * columns of actual partitions, and the job of the strategy routine 420Sstevel@tonic-gate * is to translate from the meta-partition device and block numbers 430Sstevel@tonic-gate * known to UFS into the actual partitions' device and block numbers. 440Sstevel@tonic-gate * 450Sstevel@tonic-gate * See below, in mdstrategy(), mdreal(), and mddone() for details of 460Sstevel@tonic-gate * this translation. 470Sstevel@tonic-gate */ 480Sstevel@tonic-gate 490Sstevel@tonic-gate /* 500Sstevel@tonic-gate * Driver for Virtual Disk. 510Sstevel@tonic-gate */ 520Sstevel@tonic-gate 530Sstevel@tonic-gate #include <sys/user.h> 540Sstevel@tonic-gate #include <sys/sysmacros.h> 550Sstevel@tonic-gate #include <sys/conf.h> 560Sstevel@tonic-gate #include <sys/stat.h> 570Sstevel@tonic-gate #include <sys/errno.h> 580Sstevel@tonic-gate #include <sys/param.h> 590Sstevel@tonic-gate #include <sys/systm.h> 600Sstevel@tonic-gate #include <sys/file.h> 610Sstevel@tonic-gate #include <sys/open.h> 620Sstevel@tonic-gate #include <sys/dkio.h> 630Sstevel@tonic-gate #include <sys/vtoc.h> 640Sstevel@tonic-gate #include <sys/cmn_err.h> 650Sstevel@tonic-gate #include <sys/ddi.h> 660Sstevel@tonic-gate #include <sys/sunddi.h> 670Sstevel@tonic-gate #include <sys/debug.h> 680Sstevel@tonic-gate #include <sys/utsname.h> 690Sstevel@tonic-gate #include <sys/lvm/mdvar.h> 700Sstevel@tonic-gate #include <sys/lvm/md_names.h> 710Sstevel@tonic-gate #include <sys/lvm/md_mddb.h> 720Sstevel@tonic-gate #include <sys/lvm/md_sp.h> 730Sstevel@tonic-gate #include <sys/types.h> 740Sstevel@tonic-gate #include <sys/kmem.h> 750Sstevel@tonic-gate #include <sys/cladm.h> 760Sstevel@tonic-gate #include <sys/priv_names.h> 777627SChris.Horne@Sun.COM #include <sys/modhash.h> 780Sstevel@tonic-gate 790Sstevel@tonic-gate #ifndef lint 801366Spetede char _depends_on[] = "strmod/rpcmod"; 810Sstevel@tonic-gate #endif /* lint */ 820Sstevel@tonic-gate int md_init_debug = 0; /* module binding debug */ 830Sstevel@tonic-gate 840Sstevel@tonic-gate /* 850Sstevel@tonic-gate * Tunable to turn off the failfast behavior. 860Sstevel@tonic-gate */ 870Sstevel@tonic-gate int md_ff_disable = 0; 880Sstevel@tonic-gate 892063Shshaw /* 902063Shshaw * dynamically allocated list of non FF driver names - needs to 912063Shshaw * be freed when md is detached. 922063Shshaw */ 932063Shshaw char **non_ff_drivers = NULL; 942063Shshaw 950Sstevel@tonic-gate md_krwlock_t md_unit_array_rw; /* protects all unit arrays */ 960Sstevel@tonic-gate md_krwlock_t nm_lock; /* protects all the name spaces */ 970Sstevel@tonic-gate 980Sstevel@tonic-gate md_resync_t md_cpr_resync; 990Sstevel@tonic-gate 1000Sstevel@tonic-gate extern char svm_bootpath[]; 1010Sstevel@tonic-gate #define SVM_PSEUDO_STR "/pseudo/md@0:" 1020Sstevel@tonic-gate 1030Sstevel@tonic-gate #define VERSION_LENGTH 6 1040Sstevel@tonic-gate #define VERSION "1.0" 1050Sstevel@tonic-gate 1060Sstevel@tonic-gate /* 1070Sstevel@tonic-gate * Keep track of possible 'orphan' entries in the name space 1080Sstevel@tonic-gate */ 1090Sstevel@tonic-gate int *md_nm_snarfed = NULL; 1100Sstevel@tonic-gate 1110Sstevel@tonic-gate /* 1120Sstevel@tonic-gate * Global tunable giving the percentage of free space left in replica during 1130Sstevel@tonic-gate * conversion of non-devid style replica to devid style replica. 1140Sstevel@tonic-gate */ 1150Sstevel@tonic-gate int md_conv_perc = MDDB_DEVID_CONV_PERC; 1160Sstevel@tonic-gate 1170Sstevel@tonic-gate #ifdef DEBUG 1180Sstevel@tonic-gate /* debug code to verify framework exclusion guarantees */ 1190Sstevel@tonic-gate int md_in; 1200Sstevel@tonic-gate kmutex_t md_in_mx; /* used to md global stuff */ 1210Sstevel@tonic-gate #define IN_INIT 0x01 1220Sstevel@tonic-gate #define IN_FINI 0x02 1230Sstevel@tonic-gate #define IN_ATTACH 0x04 1240Sstevel@tonic-gate #define IN_DETACH 0x08 1250Sstevel@tonic-gate #define IN_OPEN 0x10 1260Sstevel@tonic-gate #define MD_SET_IN(x) { \ 1270Sstevel@tonic-gate mutex_enter(&md_in_mx); \ 1280Sstevel@tonic-gate if (md_in) \ 1290Sstevel@tonic-gate debug_enter("MD_SET_IN exclusion lost"); \ 1300Sstevel@tonic-gate if (md_in & x) \ 1310Sstevel@tonic-gate debug_enter("MD_SET_IN already set"); \ 1320Sstevel@tonic-gate md_in |= x; \ 1330Sstevel@tonic-gate mutex_exit(&md_in_mx); \ 1340Sstevel@tonic-gate } 1350Sstevel@tonic-gate 1360Sstevel@tonic-gate #define MD_CLR_IN(x) { \ 1370Sstevel@tonic-gate mutex_enter(&md_in_mx); \ 1380Sstevel@tonic-gate if (md_in & ~(x)) \ 1390Sstevel@tonic-gate debug_enter("MD_CLR_IN exclusion lost"); \ 1400Sstevel@tonic-gate if (!(md_in & x)) \ 1410Sstevel@tonic-gate debug_enter("MD_CLR_IN already clr"); \ 1420Sstevel@tonic-gate md_in &= ~x; \ 1430Sstevel@tonic-gate mutex_exit(&md_in_mx); \ 1440Sstevel@tonic-gate } 1450Sstevel@tonic-gate #else /* DEBUG */ 1460Sstevel@tonic-gate #define MD_SET_IN(x) 1470Sstevel@tonic-gate #define MD_CLR_IN(x) 1480Sstevel@tonic-gate #endif /* DEBUG */ 1490Sstevel@tonic-gate hrtime_t savetime1, savetime2; 1500Sstevel@tonic-gate 1510Sstevel@tonic-gate 1520Sstevel@tonic-gate /* 1530Sstevel@tonic-gate * list things protected by md_mx even if they aren't 1540Sstevel@tonic-gate * used in this file. 1550Sstevel@tonic-gate */ 1560Sstevel@tonic-gate kmutex_t md_mx; /* used to md global stuff */ 1570Sstevel@tonic-gate kcondvar_t md_cv; /* md_status events */ 1580Sstevel@tonic-gate int md_status = 0; /* global status for the meta-driver */ 1590Sstevel@tonic-gate int md_num_daemons = 0; 1600Sstevel@tonic-gate int md_ioctl_cnt = 0; 1610Sstevel@tonic-gate int md_mtioctl_cnt = 0; /* multithreaded ioctl cnt */ 1620Sstevel@tonic-gate uint_t md_mdelay = 10; /* variable so can be patched */ 1630Sstevel@tonic-gate 1640Sstevel@tonic-gate int (*mdv_strategy_tstpnt)(buf_t *, int, void*); 1650Sstevel@tonic-gate 1660Sstevel@tonic-gate major_t md_major, md_major_targ; 1670Sstevel@tonic-gate 1680Sstevel@tonic-gate unit_t md_nunits = MD_MAXUNITS; 1690Sstevel@tonic-gate set_t md_nsets = MD_MAXSETS; 1700Sstevel@tonic-gate int md_nmedh = 0; 1710Sstevel@tonic-gate char *md_med_trans_lst = NULL; 1720Sstevel@tonic-gate md_set_t md_set[MD_MAXSETS]; 1730Sstevel@tonic-gate md_set_io_t md_set_io[MD_MAXSETS]; 1740Sstevel@tonic-gate 1750Sstevel@tonic-gate md_krwlock_t hsp_rwlp; /* protects hot_spare_interface */ 1760Sstevel@tonic-gate md_krwlock_t ni_rwlp; /* protects notify_interface */ 1773036Seota md_ops_t **md_ops = NULL; 1783036Seota ddi_modhandle_t *md_mods = NULL; 1790Sstevel@tonic-gate md_ops_t *md_opslist; 1800Sstevel@tonic-gate clock_t md_hz; 1810Sstevel@tonic-gate md_event_queue_t *md_event_queue = NULL; 1820Sstevel@tonic-gate 1830Sstevel@tonic-gate int md_in_upgrade; 1840Sstevel@tonic-gate int md_keep_repl_state; 1850Sstevel@tonic-gate int md_devid_destroy; 1860Sstevel@tonic-gate 1870Sstevel@tonic-gate /* for sending messages thru a door to userland */ 1880Sstevel@tonic-gate door_handle_t mdmn_door_handle = NULL; 1890Sstevel@tonic-gate int mdmn_door_did = -1; 1900Sstevel@tonic-gate 1910Sstevel@tonic-gate dev_info_t *md_devinfo = NULL; 1920Sstevel@tonic-gate 1930Sstevel@tonic-gate md_mn_nodeid_t md_mn_mynode_id = ~0u; /* My node id (for multi-node sets) */ 1940Sstevel@tonic-gate 1950Sstevel@tonic-gate static uint_t md_ocnt[OTYPCNT]; 1960Sstevel@tonic-gate 1970Sstevel@tonic-gate static int mdinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 1980Sstevel@tonic-gate static int mdattach(dev_info_t *, ddi_attach_cmd_t); 1990Sstevel@tonic-gate static int mddetach(dev_info_t *, ddi_detach_cmd_t); 2000Sstevel@tonic-gate static int mdopen(dev_t *, int, int, cred_t *); 2010Sstevel@tonic-gate static int mdclose(dev_t, int, int, cred_t *); 2020Sstevel@tonic-gate static int mddump(dev_t, caddr_t, daddr_t, int); 2030Sstevel@tonic-gate static int mdread(dev_t, struct uio *, cred_t *); 2040Sstevel@tonic-gate static int mdwrite(dev_t, struct uio *, cred_t *); 2050Sstevel@tonic-gate static int mdaread(dev_t, struct aio_req *, cred_t *); 2060Sstevel@tonic-gate static int mdawrite(dev_t, struct aio_req *, cred_t *); 2070Sstevel@tonic-gate static int mdioctl(dev_t, int, intptr_t, int, cred_t *, int *); 2080Sstevel@tonic-gate static int mdprop_op(dev_t, dev_info_t *, 2090Sstevel@tonic-gate ddi_prop_op_t, int, char *, caddr_t, int *); 2100Sstevel@tonic-gate 2110Sstevel@tonic-gate static struct cb_ops md_cb_ops = { 2120Sstevel@tonic-gate mdopen, /* open */ 2130Sstevel@tonic-gate mdclose, /* close */ 2140Sstevel@tonic-gate mdstrategy, /* strategy */ 2150Sstevel@tonic-gate /* print routine -- none yet */ 2160Sstevel@tonic-gate (int(*)(dev_t, char *))nulldev, 2170Sstevel@tonic-gate mddump, /* dump */ 2180Sstevel@tonic-gate mdread, /* read */ 2190Sstevel@tonic-gate mdwrite, /* write */ 2200Sstevel@tonic-gate mdioctl, /* ioctl */ 2210Sstevel@tonic-gate /* devmap */ 2220Sstevel@tonic-gate (int(*)(dev_t, devmap_cookie_t, offset_t, size_t, size_t *, 2230Sstevel@tonic-gate uint_t))nodev, 2240Sstevel@tonic-gate /* mmap */ 2250Sstevel@tonic-gate (int(*)(dev_t, off_t, int))nodev, 2260Sstevel@tonic-gate /* segmap */ 2270Sstevel@tonic-gate (int(*)(dev_t, off_t, struct as *, caddr_t *, off_t, unsigned, 2280Sstevel@tonic-gate unsigned, unsigned, cred_t *))nodev, 2290Sstevel@tonic-gate nochpoll, /* poll */ 2300Sstevel@tonic-gate mdprop_op, /* prop_op */ 2310Sstevel@tonic-gate 0, /* streamtab */ 2320Sstevel@tonic-gate (D_64BIT|D_MP|D_NEW), /* driver compatibility flag */ 2330Sstevel@tonic-gate CB_REV, /* cb_ops version */ 2340Sstevel@tonic-gate mdaread, /* aread */ 2350Sstevel@tonic-gate mdawrite, /* awrite */ 2360Sstevel@tonic-gate }; 2370Sstevel@tonic-gate 2380Sstevel@tonic-gate static struct dev_ops md_devops = { 2390Sstevel@tonic-gate DEVO_REV, /* dev_ops version */ 2400Sstevel@tonic-gate 0, /* device reference count */ 2410Sstevel@tonic-gate mdinfo, /* info routine */ 2420Sstevel@tonic-gate nulldev, /* identify routine */ 2430Sstevel@tonic-gate nulldev, /* probe - not defined */ 2440Sstevel@tonic-gate mdattach, /* attach routine */ 2450Sstevel@tonic-gate mddetach, /* detach routine */ 2460Sstevel@tonic-gate nodev, /* reset - not defined */ 2470Sstevel@tonic-gate &md_cb_ops, /* driver operations */ 2480Sstevel@tonic-gate NULL, /* bus operations */ 2497656SSherry.Moore@Sun.COM nodev, /* power management */ 2507656SSherry.Moore@Sun.COM ddi_quiesce_not_needed, /* quiesce */ 2510Sstevel@tonic-gate }; 2520Sstevel@tonic-gate 2530Sstevel@tonic-gate /* 2540Sstevel@tonic-gate * loadable module wrapper 2550Sstevel@tonic-gate */ 2560Sstevel@tonic-gate #include <sys/modctl.h> 2570Sstevel@tonic-gate 2580Sstevel@tonic-gate static struct modldrv modldrv = { 2590Sstevel@tonic-gate &mod_driverops, /* type of module -- a pseudodriver */ 2604932Spetede "Solaris Volume Manager base module", /* name of the module */ 2610Sstevel@tonic-gate &md_devops, /* driver ops */ 2620Sstevel@tonic-gate }; 2630Sstevel@tonic-gate 2640Sstevel@tonic-gate static struct modlinkage modlinkage = { 2650Sstevel@tonic-gate MODREV_1, 2660Sstevel@tonic-gate (void *)&modldrv, 2670Sstevel@tonic-gate NULL 2680Sstevel@tonic-gate }; 2690Sstevel@tonic-gate 2700Sstevel@tonic-gate 2710Sstevel@tonic-gate /* md_medd.c */ 2720Sstevel@tonic-gate extern void med_init(void); 2730Sstevel@tonic-gate extern void med_fini(void); 2740Sstevel@tonic-gate extern void md_devid_cleanup(set_t, uint_t); 2750Sstevel@tonic-gate 2760Sstevel@tonic-gate /* md_names.c */ 2770Sstevel@tonic-gate extern void *lookup_entry(struct nm_next_hdr *, set_t, 2780Sstevel@tonic-gate side_t, mdkey_t, md_dev64_t, int); 2790Sstevel@tonic-gate extern struct nm_next_hdr *get_first_record(set_t, int, int); 2800Sstevel@tonic-gate extern int remove_entry(struct nm_next_hdr *, 2810Sstevel@tonic-gate side_t, mdkey_t, int); 2820Sstevel@tonic-gate 2830Sstevel@tonic-gate int md_maxphys = 0; /* maximum io size in bytes */ 2840Sstevel@tonic-gate #define MD_MAXBCOUNT (1024 * 1024) 2850Sstevel@tonic-gate unsigned md_maxbcount = 0; /* maximum physio size in bytes */ 2860Sstevel@tonic-gate 2877627SChris.Horne@Sun.COM /* 2887627SChris.Horne@Sun.COM * Some md ioctls trigger io framework device tree operations. An 2897627SChris.Horne@Sun.COM * example is md ioctls that call md_resolve_bydevid(): which uses the 2907627SChris.Horne@Sun.COM * io framework to resolve a devid. Such operations result in acquiring 2917627SChris.Horne@Sun.COM * io framework locks (like ndi_devi_enter() of "/") while holding 2927627SChris.Horne@Sun.COM * driver locks (like md_unit_writerlock()). 2937627SChris.Horne@Sun.COM * 2947627SChris.Horne@Sun.COM * The prop_op(9E) entry point is called from the devinfo driver with 2957627SChris.Horne@Sun.COM * an active ndi_devi_enter of "/". To avoid deadlock, md's prop_op 2967627SChris.Horne@Sun.COM * implementation must avoid taking a lock that is held per above md 2977627SChris.Horne@Sun.COM * ioctl description: i.e. mdprop_op(9E) can't call md_unit_readerlock() 2987627SChris.Horne@Sun.COM * without risking deadlock. 2997627SChris.Horne@Sun.COM * 3007627SChris.Horne@Sun.COM * To service "size" requests without risking deadlock, we maintain a 3017627SChris.Horne@Sun.COM * "mnum->nblocks" sizemap (protected by a short-term global mutex). 3027627SChris.Horne@Sun.COM */ 3037627SChris.Horne@Sun.COM static kmutex_t md_nblocks_mutex; 3047627SChris.Horne@Sun.COM static mod_hash_t *md_nblocksmap; /* mnum -> nblocks */ 3057627SChris.Horne@Sun.COM int md_nblocksmap_size = 512; 3067627SChris.Horne@Sun.COM 3077627SChris.Horne@Sun.COM /* 3087627SChris.Horne@Sun.COM * Maintain "mnum->nblocks" sizemap for mdprop_op use: 3097627SChris.Horne@Sun.COM * 3107627SChris.Horne@Sun.COM * Create: any code that establishes a unit's un_total_blocks needs the 3117627SChris.Horne@Sun.COM * following type of call to establish nblocks for mdprop_op(): 3127627SChris.Horne@Sun.COM * md_nblocks_set(mnum, un->c.un_total_blocks);" 3137627SChris.Horne@Sun.COM * NOTE: locate via cscope md_create_minor_node/md_create_unit_incore 3147627SChris.Horne@Sun.COM * ...or "MD_UNIT..*=" 3157627SChris.Horne@Sun.COM * 3167627SChris.Horne@Sun.COM * Change: any code that changes a unit's un_total_blocks needs the 3177627SChris.Horne@Sun.COM * following type of call to sync nblocks for mdprop_op(): 3187627SChris.Horne@Sun.COM * md_nblocks_set(mnum, un->c.un_total_blocks);" 3197627SChris.Horne@Sun.COM * NOTE: locate via cscope for "un_total_blocks[ \t]*=" 3207627SChris.Horne@Sun.COM * 3217627SChris.Horne@Sun.COM * Destroy: any code that deletes a unit needs the following type of call 3227627SChris.Horne@Sun.COM * to sync nblocks for mdprop_op(): 3237627SChris.Horne@Sun.COM * md_nblocks_set(mnum, -1ULL); 3247627SChris.Horne@Sun.COM * NOTE: locate via cscope md_remove_minor_node/md_destroy_unit_incore 3257627SChris.Horne@Sun.COM * ...or "MD_UNIT..*=" 3267627SChris.Horne@Sun.COM */ 3277627SChris.Horne@Sun.COM void 3287627SChris.Horne@Sun.COM md_nblocks_set(minor_t mnum, uint64_t nblocks) 3297627SChris.Horne@Sun.COM { 3307627SChris.Horne@Sun.COM mutex_enter(&md_nblocks_mutex); 3317627SChris.Horne@Sun.COM if (nblocks == -1ULL) 3327627SChris.Horne@Sun.COM (void) mod_hash_destroy(md_nblocksmap, 3337627SChris.Horne@Sun.COM (mod_hash_key_t)(intptr_t)mnum); 3347627SChris.Horne@Sun.COM else 3357627SChris.Horne@Sun.COM (void) mod_hash_replace(md_nblocksmap, 3367627SChris.Horne@Sun.COM (mod_hash_key_t)(intptr_t)mnum, 3377627SChris.Horne@Sun.COM (mod_hash_val_t)(intptr_t)nblocks); 3387627SChris.Horne@Sun.COM mutex_exit(&md_nblocks_mutex); 3397627SChris.Horne@Sun.COM } 3407627SChris.Horne@Sun.COM 3417627SChris.Horne@Sun.COM /* get the size of a mnum from "mnum->nblocks" sizemap */ 3427627SChris.Horne@Sun.COM uint64_t 3437627SChris.Horne@Sun.COM md_nblocks_get(minor_t mnum) 3447627SChris.Horne@Sun.COM { 3457627SChris.Horne@Sun.COM mod_hash_val_t hv; 3467627SChris.Horne@Sun.COM 3477627SChris.Horne@Sun.COM mutex_enter(&md_nblocks_mutex); 3487627SChris.Horne@Sun.COM if (mod_hash_find(md_nblocksmap, 3497627SChris.Horne@Sun.COM (mod_hash_key_t)(intptr_t)mnum, &hv) == 0) { 3507627SChris.Horne@Sun.COM mutex_exit(&md_nblocks_mutex); 3517627SChris.Horne@Sun.COM return ((uint64_t)(intptr_t)hv); 3527627SChris.Horne@Sun.COM } 3537627SChris.Horne@Sun.COM mutex_exit(&md_nblocks_mutex); 3547627SChris.Horne@Sun.COM return (0); 3557627SChris.Horne@Sun.COM } 3567627SChris.Horne@Sun.COM 3570Sstevel@tonic-gate /* allocate/free dynamic space associated with driver globals */ 3580Sstevel@tonic-gate void 3590Sstevel@tonic-gate md_global_alloc_free(int alloc) 3600Sstevel@tonic-gate { 3610Sstevel@tonic-gate set_t s; 3620Sstevel@tonic-gate 3630Sstevel@tonic-gate if (alloc) { 3640Sstevel@tonic-gate /* initialize driver global locks */ 3650Sstevel@tonic-gate cv_init(&md_cv, NULL, CV_DEFAULT, NULL); 3660Sstevel@tonic-gate mutex_init(&md_mx, NULL, MUTEX_DEFAULT, NULL); 3670Sstevel@tonic-gate rw_init(&md_unit_array_rw.lock, NULL, RW_DEFAULT, NULL); 3680Sstevel@tonic-gate rw_init(&nm_lock.lock, NULL, RW_DEFAULT, NULL); 3690Sstevel@tonic-gate rw_init(&ni_rwlp.lock, NULL, RW_DRIVER, NULL); 3700Sstevel@tonic-gate rw_init(&hsp_rwlp.lock, NULL, RW_DRIVER, NULL); 3710Sstevel@tonic-gate mutex_init(&md_cpr_resync.md_resync_mutex, NULL, 3727627SChris.Horne@Sun.COM MUTEX_DEFAULT, NULL); 3737627SChris.Horne@Sun.COM mutex_init(&md_nblocks_mutex, NULL, MUTEX_DEFAULT, NULL); 3740Sstevel@tonic-gate 3750Sstevel@tonic-gate /* initialize per set driver global locks */ 3760Sstevel@tonic-gate for (s = 0; s < MD_MAXSETS; s++) { 3770Sstevel@tonic-gate /* initialize per set driver globals locks */ 3780Sstevel@tonic-gate mutex_init(&md_set[s].s_dbmx, 3790Sstevel@tonic-gate NULL, MUTEX_DEFAULT, NULL); 3800Sstevel@tonic-gate mutex_init(&md_set_io[s].md_io_mx, 3810Sstevel@tonic-gate NULL, MUTEX_DEFAULT, NULL); 3820Sstevel@tonic-gate cv_init(&md_set_io[s].md_io_cv, 3830Sstevel@tonic-gate NULL, CV_DEFAULT, NULL); 3840Sstevel@tonic-gate } 3850Sstevel@tonic-gate } else { 3860Sstevel@tonic-gate /* destroy per set driver global locks */ 3870Sstevel@tonic-gate for (s = 0; s < MD_MAXSETS; s++) { 3880Sstevel@tonic-gate cv_destroy(&md_set_io[s].md_io_cv); 3890Sstevel@tonic-gate mutex_destroy(&md_set_io[s].md_io_mx); 3900Sstevel@tonic-gate mutex_destroy(&md_set[s].s_dbmx); 3910Sstevel@tonic-gate } 3920Sstevel@tonic-gate 3930Sstevel@tonic-gate /* destroy driver global locks */ 3947627SChris.Horne@Sun.COM mutex_destroy(&md_nblocks_mutex); 3950Sstevel@tonic-gate mutex_destroy(&md_cpr_resync.md_resync_mutex); 3960Sstevel@tonic-gate rw_destroy(&hsp_rwlp.lock); 3970Sstevel@tonic-gate rw_destroy(&ni_rwlp.lock); 3980Sstevel@tonic-gate rw_destroy(&nm_lock.lock); 3990Sstevel@tonic-gate rw_destroy(&md_unit_array_rw.lock); 4000Sstevel@tonic-gate mutex_destroy(&md_mx); 4010Sstevel@tonic-gate cv_destroy(&md_cv); 4020Sstevel@tonic-gate } 4030Sstevel@tonic-gate } 4040Sstevel@tonic-gate 4050Sstevel@tonic-gate int 4060Sstevel@tonic-gate _init(void) 4070Sstevel@tonic-gate { 4080Sstevel@tonic-gate set_t s; 4090Sstevel@tonic-gate int err; 4100Sstevel@tonic-gate 4110Sstevel@tonic-gate MD_SET_IN(IN_INIT); 4120Sstevel@tonic-gate 4130Sstevel@tonic-gate /* allocate dynamic space associated with driver globals */ 4140Sstevel@tonic-gate md_global_alloc_free(1); 4150Sstevel@tonic-gate 4160Sstevel@tonic-gate /* initialize driver globals */ 4170Sstevel@tonic-gate md_major = ddi_name_to_major("md"); 4180Sstevel@tonic-gate md_hz = drv_usectohz(NUM_USEC_IN_SEC); 4190Sstevel@tonic-gate 4200Sstevel@tonic-gate /* initialize tunable globals */ 4210Sstevel@tonic-gate if (md_maxphys == 0) /* maximum io size in bytes */ 4220Sstevel@tonic-gate md_maxphys = maxphys; 4230Sstevel@tonic-gate if (md_maxbcount == 0) /* maximum physio size in bytes */ 4240Sstevel@tonic-gate md_maxbcount = MD_MAXBCOUNT; 4250Sstevel@tonic-gate 4260Sstevel@tonic-gate /* initialize per set driver globals */ 4270Sstevel@tonic-gate for (s = 0; s < MD_MAXSETS; s++) 4280Sstevel@tonic-gate md_set_io[s].io_state = MD_SET_ACTIVE; 4290Sstevel@tonic-gate 4300Sstevel@tonic-gate /* 4310Sstevel@tonic-gate * NOTE: the framework does not currently guarantee exclusion 4320Sstevel@tonic-gate * between _init and attach after calling mod_install. 4330Sstevel@tonic-gate */ 4340Sstevel@tonic-gate MD_CLR_IN(IN_INIT); 4350Sstevel@tonic-gate if ((err = mod_install(&modlinkage))) { 4360Sstevel@tonic-gate MD_SET_IN(IN_INIT); 4370Sstevel@tonic-gate md_global_alloc_free(0); /* free dynamic space */ 4380Sstevel@tonic-gate MD_CLR_IN(IN_INIT); 4390Sstevel@tonic-gate } 4400Sstevel@tonic-gate return (err); 4410Sstevel@tonic-gate } 4420Sstevel@tonic-gate 4430Sstevel@tonic-gate int 4440Sstevel@tonic-gate _fini(void) 4450Sstevel@tonic-gate { 4460Sstevel@tonic-gate int err; 4470Sstevel@tonic-gate 4480Sstevel@tonic-gate /* 4490Sstevel@tonic-gate * NOTE: the framework currently does not guarantee exclusion 4500Sstevel@tonic-gate * with attach until after mod_remove returns 0. 4510Sstevel@tonic-gate */ 4520Sstevel@tonic-gate if ((err = mod_remove(&modlinkage))) 4530Sstevel@tonic-gate return (err); 4540Sstevel@tonic-gate 4550Sstevel@tonic-gate MD_SET_IN(IN_FINI); 4560Sstevel@tonic-gate md_global_alloc_free(0); /* free dynamic space */ 4570Sstevel@tonic-gate MD_CLR_IN(IN_FINI); 4580Sstevel@tonic-gate return (err); 4590Sstevel@tonic-gate } 4600Sstevel@tonic-gate 4610Sstevel@tonic-gate int 4620Sstevel@tonic-gate _info(struct modinfo *modinfop) 4630Sstevel@tonic-gate { 4640Sstevel@tonic-gate return (mod_info(&modlinkage, modinfop)); 4650Sstevel@tonic-gate } 4660Sstevel@tonic-gate 4670Sstevel@tonic-gate /* ARGSUSED */ 4680Sstevel@tonic-gate static int 4690Sstevel@tonic-gate mdattach(dev_info_t *dip, ddi_attach_cmd_t cmd) 4700Sstevel@tonic-gate { 4710Sstevel@tonic-gate int len; 4720Sstevel@tonic-gate unit_t i; 4730Sstevel@tonic-gate size_t sz; 4740Sstevel@tonic-gate char ver[VERSION_LENGTH]; 4750Sstevel@tonic-gate char **maj_str_array; 4760Sstevel@tonic-gate char *str, *str2; 4770Sstevel@tonic-gate 4780Sstevel@tonic-gate MD_SET_IN(IN_ATTACH); 4790Sstevel@tonic-gate md_in_upgrade = 0; 4800Sstevel@tonic-gate md_keep_repl_state = 0; 4810Sstevel@tonic-gate md_devid_destroy = 0; 4820Sstevel@tonic-gate 4830Sstevel@tonic-gate if (cmd != DDI_ATTACH) { 4840Sstevel@tonic-gate MD_CLR_IN(IN_ATTACH); 4850Sstevel@tonic-gate return (DDI_FAILURE); 4860Sstevel@tonic-gate } 4870Sstevel@tonic-gate 4880Sstevel@tonic-gate if (md_devinfo != NULL) { 4890Sstevel@tonic-gate MD_CLR_IN(IN_ATTACH); 4900Sstevel@tonic-gate return (DDI_FAILURE); 4910Sstevel@tonic-gate } 4920Sstevel@tonic-gate 4930Sstevel@tonic-gate mddb_init(); 4940Sstevel@tonic-gate 4950Sstevel@tonic-gate if (md_start_daemons(TRUE)) { 4960Sstevel@tonic-gate MD_CLR_IN(IN_ATTACH); 4970Sstevel@tonic-gate mddb_unload(); /* undo mddb_init() allocations */ 4980Sstevel@tonic-gate return (DDI_FAILURE); 4990Sstevel@tonic-gate } 5000Sstevel@tonic-gate 5010Sstevel@tonic-gate /* clear the halted state */ 5020Sstevel@tonic-gate md_clr_status(MD_GBL_HALTED); 5030Sstevel@tonic-gate 5040Sstevel@tonic-gate /* see if the diagnostic switch is on */ 5050Sstevel@tonic-gate if (ddi_prop_get_int(DDI_DEV_T_ANY, dip, 5060Sstevel@tonic-gate DDI_PROP_DONTPASS, "md_init_debug", 0)) 5070Sstevel@tonic-gate md_init_debug++; 5080Sstevel@tonic-gate 5090Sstevel@tonic-gate /* see if the failfast disable switch is on */ 5100Sstevel@tonic-gate if (ddi_prop_get_int(DDI_DEV_T_ANY, dip, 5110Sstevel@tonic-gate DDI_PROP_DONTPASS, "md_ff_disable", 0)) 5120Sstevel@tonic-gate md_ff_disable++; 5130Sstevel@tonic-gate 5140Sstevel@tonic-gate /* try and get the md_nmedh property */ 5150Sstevel@tonic-gate md_nmedh = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 5160Sstevel@tonic-gate DDI_PROP_DONTPASS, "md_nmedh", MED_DEF_HOSTS); 5170Sstevel@tonic-gate if ((md_nmedh <= 0) || (md_nmedh > MED_MAX_HOSTS)) 5180Sstevel@tonic-gate md_nmedh = MED_DEF_HOSTS; 5190Sstevel@tonic-gate 5200Sstevel@tonic-gate /* try and get the md_med_trans_lst property */ 5210Sstevel@tonic-gate len = 0; 5220Sstevel@tonic-gate if (ddi_prop_op(DDI_DEV_T_ANY, dip, PROP_LEN, 5230Sstevel@tonic-gate 0, "md_med_trans_lst", NULL, &len) != DDI_PROP_SUCCESS || 5240Sstevel@tonic-gate len == 0) { 5250Sstevel@tonic-gate md_med_trans_lst = md_strdup("tcp"); 5260Sstevel@tonic-gate } else { 5270Sstevel@tonic-gate md_med_trans_lst = kmem_zalloc((size_t)len, KM_SLEEP); 5280Sstevel@tonic-gate if (ddi_prop_op(DDI_DEV_T_ANY, dip, PROP_LEN_AND_VAL_BUF, 5290Sstevel@tonic-gate 0, "md_med_trans_lst", md_med_trans_lst, &len) != 5300Sstevel@tonic-gate DDI_PROP_SUCCESS) { 5310Sstevel@tonic-gate kmem_free(md_med_trans_lst, (size_t)len); 5320Sstevel@tonic-gate md_med_trans_lst = md_strdup("tcp"); 5330Sstevel@tonic-gate } 5340Sstevel@tonic-gate } 5350Sstevel@tonic-gate 5363036Seota /* 5373036Seota * Must initialize the internal data structures before the 5383036Seota * any possible calls to 'goto attach_failure' as _fini 5393036Seota * routine references them. 5403036Seota */ 5413036Seota med_init(); 5423036Seota 5433036Seota md_ops = (md_ops_t **)kmem_zalloc( 5443036Seota sizeof (md_ops_t *) * MD_NOPS, KM_SLEEP); 5453036Seota md_mods = (ddi_modhandle_t *)kmem_zalloc( 5463036Seota sizeof (ddi_modhandle_t) * MD_NOPS, KM_SLEEP); 5473036Seota 5480Sstevel@tonic-gate /* try and get the md_xlate property */ 5490Sstevel@tonic-gate /* Should we only do this if upgrade? */ 5500Sstevel@tonic-gate len = sizeof (char) * 5; 5510Sstevel@tonic-gate if (ddi_prop_op(DDI_DEV_T_ANY, dip, PROP_LEN_AND_VAL_BUF, 5520Sstevel@tonic-gate 0, "md_xlate_ver", ver, &len) == DDI_PROP_SUCCESS) { 5530Sstevel@tonic-gate if (strcmp(ver, VERSION) == 0) { 5540Sstevel@tonic-gate len = 0; 5550Sstevel@tonic-gate if (ddi_prop_op(DDI_DEV_T_ANY, dip, 5560Sstevel@tonic-gate PROP_LEN_AND_VAL_ALLOC, 0, "md_xlate", 5570Sstevel@tonic-gate (caddr_t)&md_tuple_table, &len) != 5580Sstevel@tonic-gate DDI_PROP_SUCCESS) { 5590Sstevel@tonic-gate if (md_init_debug) 5600Sstevel@tonic-gate cmn_err(CE_WARN, 5610Sstevel@tonic-gate "md_xlate ddi_prop_op failed"); 5620Sstevel@tonic-gate goto attach_failure; 5630Sstevel@tonic-gate } else { 5640Sstevel@tonic-gate md_tuple_length = 5650Sstevel@tonic-gate len/(2 * ((int)sizeof (dev32_t))); 5660Sstevel@tonic-gate md_in_upgrade = 1; 5670Sstevel@tonic-gate } 5680Sstevel@tonic-gate 5690Sstevel@tonic-gate /* Get target's name to major table */ 5700Sstevel@tonic-gate if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, 5710Sstevel@tonic-gate dip, DDI_PROP_DONTPASS, 5720Sstevel@tonic-gate "md_targ_nm_table", &maj_str_array, 5730Sstevel@tonic-gate &md_majortab_len) != DDI_PROP_SUCCESS) { 5740Sstevel@tonic-gate md_majortab_len = 0; 5750Sstevel@tonic-gate if (md_init_debug) 5767627SChris.Horne@Sun.COM cmn_err(CE_WARN, "md_targ_nm_table " 5777627SChris.Horne@Sun.COM "ddi_prop_lookup_string_array " 5787627SChris.Horne@Sun.COM "failed"); 5790Sstevel@tonic-gate goto attach_failure; 5800Sstevel@tonic-gate } 5810Sstevel@tonic-gate 5820Sstevel@tonic-gate md_major_tuple_table = 5830Sstevel@tonic-gate (struct md_xlate_major_table *) 5840Sstevel@tonic-gate kmem_zalloc(md_majortab_len * 5850Sstevel@tonic-gate sizeof (struct md_xlate_major_table), KM_SLEEP); 5860Sstevel@tonic-gate 5870Sstevel@tonic-gate for (i = 0; i < md_majortab_len; i++) { 5880Sstevel@tonic-gate /* Getting major name */ 5890Sstevel@tonic-gate str = strchr(maj_str_array[i], ' '); 5900Sstevel@tonic-gate if (str == NULL) 5910Sstevel@tonic-gate continue; 5920Sstevel@tonic-gate *str = '\0'; 5930Sstevel@tonic-gate md_major_tuple_table[i].drv_name = 5940Sstevel@tonic-gate md_strdup(maj_str_array[i]); 5950Sstevel@tonic-gate 5960Sstevel@tonic-gate /* Simplified atoi to get major number */ 5970Sstevel@tonic-gate str2 = str + 1; 5980Sstevel@tonic-gate md_major_tuple_table[i].targ_maj = 0; 5990Sstevel@tonic-gate while ((*str2 >= '0') && (*str2 <= '9')) { 6007627SChris.Horne@Sun.COM md_major_tuple_table[i].targ_maj *= 10; 6017627SChris.Horne@Sun.COM md_major_tuple_table[i].targ_maj += 6027627SChris.Horne@Sun.COM *str2++ - '0'; 6030Sstevel@tonic-gate } 6040Sstevel@tonic-gate *str = ' '; 6050Sstevel@tonic-gate } 6060Sstevel@tonic-gate ddi_prop_free((void *)maj_str_array); 6070Sstevel@tonic-gate } else { 6080Sstevel@tonic-gate if (md_init_debug) 6090Sstevel@tonic-gate cmn_err(CE_WARN, "md_xlate_ver is incorrect"); 6100Sstevel@tonic-gate goto attach_failure; 6110Sstevel@tonic-gate } 6120Sstevel@tonic-gate } 6130Sstevel@tonic-gate 6140Sstevel@tonic-gate /* 6150Sstevel@tonic-gate * Check for properties: 6160Sstevel@tonic-gate * md_keep_repl_state and md_devid_destroy 6170Sstevel@tonic-gate * and set globals if these exist. 6180Sstevel@tonic-gate */ 6190Sstevel@tonic-gate md_keep_repl_state = ddi_getprop(DDI_DEV_T_ANY, dip, 6207627SChris.Horne@Sun.COM 0, "md_keep_repl_state", 0); 6210Sstevel@tonic-gate 6220Sstevel@tonic-gate md_devid_destroy = ddi_getprop(DDI_DEV_T_ANY, dip, 6237627SChris.Horne@Sun.COM 0, "md_devid_destroy", 0); 6240Sstevel@tonic-gate 6250Sstevel@tonic-gate if (MD_UPGRADE) 6260Sstevel@tonic-gate md_major_targ = md_targ_name_to_major("md"); 6270Sstevel@tonic-gate else 6280Sstevel@tonic-gate md_major_targ = 0; 6290Sstevel@tonic-gate 6300Sstevel@tonic-gate /* allocate admin device node */ 6310Sstevel@tonic-gate if (ddi_create_priv_minor_node(dip, "admin", S_IFCHR, 6320Sstevel@tonic-gate MD_ADM_MINOR, DDI_PSEUDO, 0, NULL, PRIV_SYS_CONFIG, 0640)) 6330Sstevel@tonic-gate goto attach_failure; 6340Sstevel@tonic-gate 6350Sstevel@tonic-gate if (ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 6360Sstevel@tonic-gate DDI_KERNEL_IOCTL, NULL, 0) != DDI_SUCCESS) 6370Sstevel@tonic-gate goto attach_failure; 6380Sstevel@tonic-gate 6390Sstevel@tonic-gate if (ddi_prop_update_int(DDI_DEV_T_NONE, dip, 6400Sstevel@tonic-gate "ddi-abrwrite-supported", 1) != DDI_SUCCESS) 6410Sstevel@tonic-gate goto attach_failure; 6420Sstevel@tonic-gate 6430Sstevel@tonic-gate /* these could have been cleared by a detach */ 6440Sstevel@tonic-gate md_nunits = MD_MAXUNITS; 6450Sstevel@tonic-gate md_nsets = MD_MAXSETS; 6460Sstevel@tonic-gate 6470Sstevel@tonic-gate sz = sizeof (void *) * MD_MAXUNITS; 6480Sstevel@tonic-gate if (md_set[0].s_un == NULL) 6490Sstevel@tonic-gate md_set[0].s_un = kmem_zalloc(sz, KM_SLEEP); 6500Sstevel@tonic-gate if (md_set[0].s_ui == NULL) 6510Sstevel@tonic-gate md_set[0].s_ui = kmem_zalloc(sz, KM_SLEEP); 6520Sstevel@tonic-gate 6530Sstevel@tonic-gate md_devinfo = dip; 6540Sstevel@tonic-gate 6550Sstevel@tonic-gate /* 6560Sstevel@tonic-gate * Only allocate device node for root mirror metadevice. 6570Sstevel@tonic-gate * Don't pre-allocate unnecessary device nodes (thus slowing down a 6580Sstevel@tonic-gate * boot when we attach). 6590Sstevel@tonic-gate * We can't read the mddbs in attach. The mddbs will be read 6600Sstevel@tonic-gate * by metainit during the boot process when it is doing the 6610Sstevel@tonic-gate * auto-take processing and any other minor nodes will be 6620Sstevel@tonic-gate * allocated at that point. 6630Sstevel@tonic-gate * 6640Sstevel@tonic-gate * There are two scenarios to be aware of here: 6650Sstevel@tonic-gate * 1) when we are booting from a mirrored root we need the root 6660Sstevel@tonic-gate * metadevice to exist very early (during vfs_mountroot processing) 6670Sstevel@tonic-gate * 2) we need all of the nodes to be created so that any mnttab entries 6680Sstevel@tonic-gate * will succeed (handled by metainit reading the mddb during boot). 6690Sstevel@tonic-gate */ 6700Sstevel@tonic-gate if (strncmp(SVM_PSEUDO_STR, svm_bootpath, sizeof (SVM_PSEUDO_STR) - 1) 6710Sstevel@tonic-gate == 0) { 6720Sstevel@tonic-gate char *p; 6730Sstevel@tonic-gate int mnum = 0; 6740Sstevel@tonic-gate 6750Sstevel@tonic-gate /* 6760Sstevel@tonic-gate * The svm_bootpath string looks something like 6770Sstevel@tonic-gate * /pseudo/md@0:0,150,blk where 150 is the minor number 6780Sstevel@tonic-gate * in this example so we need to set the pointer p onto 6790Sstevel@tonic-gate * the first digit of the minor number and convert it 6800Sstevel@tonic-gate * from ascii. 6810Sstevel@tonic-gate */ 6820Sstevel@tonic-gate for (p = svm_bootpath + sizeof (SVM_PSEUDO_STR) + 1; 6830Sstevel@tonic-gate *p >= '0' && *p <= '9'; p++) { 6840Sstevel@tonic-gate mnum *= 10; 6850Sstevel@tonic-gate mnum += *p - '0'; 6860Sstevel@tonic-gate } 6870Sstevel@tonic-gate 6880Sstevel@tonic-gate if (md_create_minor_node(0, mnum)) { 6890Sstevel@tonic-gate kmem_free(md_set[0].s_un, sz); 6900Sstevel@tonic-gate kmem_free(md_set[0].s_ui, sz); 6910Sstevel@tonic-gate goto attach_failure; 6920Sstevel@tonic-gate } 6930Sstevel@tonic-gate } 6940Sstevel@tonic-gate 6957627SChris.Horne@Sun.COM /* create the hash to store the meta device sizes */ 6967627SChris.Horne@Sun.COM md_nblocksmap = mod_hash_create_idhash("md_nblocksmap", 6977627SChris.Horne@Sun.COM md_nblocksmap_size, mod_hash_null_valdtor); 6987627SChris.Horne@Sun.COM 6990Sstevel@tonic-gate MD_CLR_IN(IN_ATTACH); 7000Sstevel@tonic-gate return (DDI_SUCCESS); 7010Sstevel@tonic-gate 7020Sstevel@tonic-gate attach_failure: 7030Sstevel@tonic-gate /* 7040Sstevel@tonic-gate * Use our own detach routine to toss any stuff we allocated above. 7050Sstevel@tonic-gate * NOTE: detach will call md_halt to free the mddb_init allocations. 7060Sstevel@tonic-gate */ 7070Sstevel@tonic-gate MD_CLR_IN(IN_ATTACH); 7080Sstevel@tonic-gate if (mddetach(dip, DDI_DETACH) != DDI_SUCCESS) 7090Sstevel@tonic-gate cmn_err(CE_WARN, "detach from attach failed"); 7100Sstevel@tonic-gate return (DDI_FAILURE); 7110Sstevel@tonic-gate } 7120Sstevel@tonic-gate 7130Sstevel@tonic-gate /* ARGSUSED */ 7140Sstevel@tonic-gate static int 7150Sstevel@tonic-gate mddetach(dev_info_t *dip, ddi_detach_cmd_t cmd) 7160Sstevel@tonic-gate { 7170Sstevel@tonic-gate extern int check_active_locators(); 7180Sstevel@tonic-gate set_t s; 7190Sstevel@tonic-gate size_t sz; 7200Sstevel@tonic-gate int len; 7210Sstevel@tonic-gate 7220Sstevel@tonic-gate MD_SET_IN(IN_DETACH); 7230Sstevel@tonic-gate 7240Sstevel@tonic-gate /* check command */ 7250Sstevel@tonic-gate if (cmd != DDI_DETACH) { 7260Sstevel@tonic-gate MD_CLR_IN(IN_DETACH); 7270Sstevel@tonic-gate return (DDI_FAILURE); 7280Sstevel@tonic-gate } 7290Sstevel@tonic-gate 7300Sstevel@tonic-gate /* 7310Sstevel@tonic-gate * if we have not already halted yet we have no active config 7320Sstevel@tonic-gate * then automatically initiate a halt so we can detach. 7330Sstevel@tonic-gate */ 7340Sstevel@tonic-gate if (!(md_get_status() & MD_GBL_HALTED)) { 7350Sstevel@tonic-gate if (check_active_locators() == 0) { 7360Sstevel@tonic-gate /* 7370Sstevel@tonic-gate * NOTE: a successful md_halt will have done the 7380Sstevel@tonic-gate * mddb_unload to free allocations done in mddb_init 7390Sstevel@tonic-gate */ 7400Sstevel@tonic-gate if (md_halt(MD_NO_GBL_LOCKS_HELD)) { 7410Sstevel@tonic-gate cmn_err(CE_NOTE, "md:detach: " 7420Sstevel@tonic-gate "Could not halt Solaris Volume Manager"); 7430Sstevel@tonic-gate MD_CLR_IN(IN_DETACH); 7440Sstevel@tonic-gate return (DDI_FAILURE); 7450Sstevel@tonic-gate } 7460Sstevel@tonic-gate } 7470Sstevel@tonic-gate 7480Sstevel@tonic-gate /* fail detach if we have not halted */ 7490Sstevel@tonic-gate if (!(md_get_status() & MD_GBL_HALTED)) { 7500Sstevel@tonic-gate MD_CLR_IN(IN_DETACH); 7510Sstevel@tonic-gate return (DDI_FAILURE); 7520Sstevel@tonic-gate } 7530Sstevel@tonic-gate } 7540Sstevel@tonic-gate 7550Sstevel@tonic-gate /* must be in halted state, this will be cleared on next attach */ 7560Sstevel@tonic-gate ASSERT(md_get_status() & MD_GBL_HALTED); 7570Sstevel@tonic-gate 7580Sstevel@tonic-gate /* cleanup attach allocations and initializations */ 7590Sstevel@tonic-gate md_major_targ = 0; 7600Sstevel@tonic-gate 7610Sstevel@tonic-gate sz = sizeof (void *) * md_nunits; 7620Sstevel@tonic-gate for (s = 0; s < md_nsets; s++) { 7630Sstevel@tonic-gate if (md_set[s].s_un != NULL) { 7640Sstevel@tonic-gate kmem_free(md_set[s].s_un, sz); 7650Sstevel@tonic-gate md_set[s].s_un = NULL; 7660Sstevel@tonic-gate } 7670Sstevel@tonic-gate 7680Sstevel@tonic-gate if (md_set[s].s_ui != NULL) { 7690Sstevel@tonic-gate kmem_free(md_set[s].s_ui, sz); 7700Sstevel@tonic-gate md_set[s].s_ui = NULL; 7710Sstevel@tonic-gate } 7720Sstevel@tonic-gate } 7730Sstevel@tonic-gate md_nunits = 0; 7740Sstevel@tonic-gate md_nsets = 0; 7750Sstevel@tonic-gate md_nmedh = 0; 7760Sstevel@tonic-gate 7772063Shshaw if (non_ff_drivers != NULL) { 7782063Shshaw int i; 7792063Shshaw 7802063Shshaw for (i = 0; non_ff_drivers[i] != NULL; i++) 7817627SChris.Horne@Sun.COM kmem_free(non_ff_drivers[i], 7827627SChris.Horne@Sun.COM strlen(non_ff_drivers[i]) + 1); 7832063Shshaw 7842103Shshaw /* free i+1 entries because there is a null entry at list end */ 7852103Shshaw kmem_free(non_ff_drivers, (i + 1) * sizeof (char *)); 7862063Shshaw non_ff_drivers = NULL; 7872063Shshaw } 7882063Shshaw 7890Sstevel@tonic-gate if (md_med_trans_lst != NULL) { 7900Sstevel@tonic-gate kmem_free(md_med_trans_lst, strlen(md_med_trans_lst) + 1); 7910Sstevel@tonic-gate md_med_trans_lst = NULL; 7920Sstevel@tonic-gate } 7930Sstevel@tonic-gate 7940Sstevel@tonic-gate if (md_mods != NULL) { 7950Sstevel@tonic-gate kmem_free(md_mods, sizeof (ddi_modhandle_t) * MD_NOPS); 7960Sstevel@tonic-gate md_mods = NULL; 7970Sstevel@tonic-gate } 7980Sstevel@tonic-gate 7990Sstevel@tonic-gate if (md_ops != NULL) { 8000Sstevel@tonic-gate kmem_free(md_ops, sizeof (md_ops_t *) * MD_NOPS); 8010Sstevel@tonic-gate md_ops = NULL; 8020Sstevel@tonic-gate } 8030Sstevel@tonic-gate 8040Sstevel@tonic-gate if (MD_UPGRADE) { 8050Sstevel@tonic-gate len = md_tuple_length * (2 * ((int)sizeof (dev32_t))); 8060Sstevel@tonic-gate md_in_upgrade = 0; 8070Sstevel@tonic-gate md_xlate_free(len); 8080Sstevel@tonic-gate md_majortab_free(); 8090Sstevel@tonic-gate } 8100Sstevel@tonic-gate 8110Sstevel@tonic-gate /* 8120Sstevel@tonic-gate * Undo what we did in mdattach, freeing resources 8130Sstevel@tonic-gate * and removing things we installed. The system 8140Sstevel@tonic-gate * framework guarantees we are not active with this devinfo 8150Sstevel@tonic-gate * node in any other entry points at this time. 8160Sstevel@tonic-gate */ 8170Sstevel@tonic-gate ddi_prop_remove_all(dip); 8180Sstevel@tonic-gate ddi_remove_minor_node(dip, NULL); 8190Sstevel@tonic-gate 8200Sstevel@tonic-gate med_fini(); 8217627SChris.Horne@Sun.COM 8227627SChris.Horne@Sun.COM mod_hash_destroy_idhash(md_nblocksmap); 8237627SChris.Horne@Sun.COM 8240Sstevel@tonic-gate md_devinfo = NULL; 8250Sstevel@tonic-gate 8260Sstevel@tonic-gate MD_CLR_IN(IN_DETACH); 8270Sstevel@tonic-gate return (DDI_SUCCESS); 8280Sstevel@tonic-gate } 8290Sstevel@tonic-gate 8300Sstevel@tonic-gate 8310Sstevel@tonic-gate /* 8320Sstevel@tonic-gate * Given the device number return the devinfo pointer 8330Sstevel@tonic-gate * given to md via md_attach 8340Sstevel@tonic-gate */ 8350Sstevel@tonic-gate /*ARGSUSED*/ 8360Sstevel@tonic-gate static int 8370Sstevel@tonic-gate mdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 8380Sstevel@tonic-gate { 8390Sstevel@tonic-gate int error = DDI_FAILURE; 8400Sstevel@tonic-gate 8410Sstevel@tonic-gate switch (infocmd) { 8420Sstevel@tonic-gate case DDI_INFO_DEVT2DEVINFO: 8430Sstevel@tonic-gate if (md_devinfo) { 8440Sstevel@tonic-gate *result = (void *)md_devinfo; 8450Sstevel@tonic-gate error = DDI_SUCCESS; 8460Sstevel@tonic-gate } 8470Sstevel@tonic-gate break; 8480Sstevel@tonic-gate 8490Sstevel@tonic-gate case DDI_INFO_DEVT2INSTANCE: 8500Sstevel@tonic-gate *result = (void *)0; 8510Sstevel@tonic-gate error = DDI_SUCCESS; 8520Sstevel@tonic-gate break; 8530Sstevel@tonic-gate } 8540Sstevel@tonic-gate return (error); 8550Sstevel@tonic-gate } 8560Sstevel@tonic-gate 8570Sstevel@tonic-gate /* 8580Sstevel@tonic-gate * property operation routine. return the number of blocks for the partition 8590Sstevel@tonic-gate * in question or forward the request to the property facilities. 8600Sstevel@tonic-gate */ 8610Sstevel@tonic-gate static int 8620Sstevel@tonic-gate mdprop_op( 8630Sstevel@tonic-gate dev_t dev, /* device number associated with device */ 8640Sstevel@tonic-gate dev_info_t *dip, /* device info struct for this device */ 8650Sstevel@tonic-gate ddi_prop_op_t prop_op, /* property operator */ 8660Sstevel@tonic-gate int mod_flags, /* property flags */ 8670Sstevel@tonic-gate char *name, /* name of property */ 8680Sstevel@tonic-gate caddr_t valuep, /* where to put property value */ 8690Sstevel@tonic-gate int *lengthp) /* put length of property here */ 8700Sstevel@tonic-gate { 8717627SChris.Horne@Sun.COM return (ddi_prop_op_nblocks(dev, dip, prop_op, mod_flags, 8727627SChris.Horne@Sun.COM name, valuep, lengthp, md_nblocks_get(getminor(dev)))); 8730Sstevel@tonic-gate } 8740Sstevel@tonic-gate 8750Sstevel@tonic-gate static void 8760Sstevel@tonic-gate snarf_user_data(set_t setno) 8770Sstevel@tonic-gate { 8780Sstevel@tonic-gate mddb_recid_t recid; 8790Sstevel@tonic-gate mddb_recstatus_t status; 8800Sstevel@tonic-gate 8810Sstevel@tonic-gate recid = mddb_makerecid(setno, 0); 8820Sstevel@tonic-gate while ((recid = mddb_getnextrec(recid, MDDB_USER, 0)) > 0) { 8830Sstevel@tonic-gate if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) 8840Sstevel@tonic-gate continue; 8850Sstevel@tonic-gate 8860Sstevel@tonic-gate status = mddb_getrecstatus(recid); 8870Sstevel@tonic-gate if (status == MDDB_STALE) 8880Sstevel@tonic-gate continue; 8890Sstevel@tonic-gate 8900Sstevel@tonic-gate if (status == MDDB_NODATA) { 8910Sstevel@tonic-gate mddb_setrecprivate(recid, MD_PRV_PENDDEL); 8920Sstevel@tonic-gate continue; 8930Sstevel@tonic-gate } 8940Sstevel@tonic-gate 8950Sstevel@tonic-gate ASSERT(status == MDDB_OK); 8960Sstevel@tonic-gate 8970Sstevel@tonic-gate mddb_setrecprivate(recid, MD_PRV_GOTIT); 8980Sstevel@tonic-gate } 8990Sstevel@tonic-gate } 9000Sstevel@tonic-gate 9010Sstevel@tonic-gate static void 9020Sstevel@tonic-gate md_print_block_usage(mddb_set_t *s, uint_t blks) 9030Sstevel@tonic-gate { 9040Sstevel@tonic-gate uint_t ib; 9050Sstevel@tonic-gate int li; 9060Sstevel@tonic-gate mddb_mb_ic_t *mbip; 9070Sstevel@tonic-gate uint_t max_blk_needed; 9080Sstevel@tonic-gate mddb_lb_t *lbp; 9090Sstevel@tonic-gate mddb_sidelocator_t *slp; 9100Sstevel@tonic-gate int drv_index; 9110Sstevel@tonic-gate md_splitname sn; 9120Sstevel@tonic-gate char *name; 9130Sstevel@tonic-gate char *suffix; 9140Sstevel@tonic-gate size_t prefixlen; 9150Sstevel@tonic-gate size_t suffixlen; 9160Sstevel@tonic-gate int alloc_sz; 9170Sstevel@tonic-gate 9180Sstevel@tonic-gate 9190Sstevel@tonic-gate max_blk_needed = s->s_totalblkcnt - s->s_freeblkcnt + blks; 9200Sstevel@tonic-gate 9210Sstevel@tonic-gate cmn_err(CE_WARN, "Blocks in Metadevice State Database: %d\n" 9227627SChris.Horne@Sun.COM " Additional Blocks Needed: %d\n\n" 9237627SChris.Horne@Sun.COM " Increase size of following replicas for\n" 9247627SChris.Horne@Sun.COM " device relocatability by deleting listed\n" 9257627SChris.Horne@Sun.COM " replica and re-adding replica with\n" 9267627SChris.Horne@Sun.COM " increased size (see metadb(1M)):\n" 9277627SChris.Horne@Sun.COM " Replica Increase By", 9287627SChris.Horne@Sun.COM s->s_totalblkcnt, (blks - s->s_freeblkcnt)); 9290Sstevel@tonic-gate 9300Sstevel@tonic-gate lbp = s->s_lbp; 9310Sstevel@tonic-gate 9320Sstevel@tonic-gate for (li = 0; li < lbp->lb_loccnt; li++) { 9330Sstevel@tonic-gate if (lbp->lb_locators[li].l_flags & MDDB_F_DELETED) 9340Sstevel@tonic-gate continue; 9350Sstevel@tonic-gate ib = 0; 9360Sstevel@tonic-gate for (mbip = s->s_mbiarray[li]; mbip != NULL; 9370Sstevel@tonic-gate mbip = mbip->mbi_next) { 9380Sstevel@tonic-gate ib += (uint_t)mbip->mbi_mddb_mb.mb_blkcnt; 9390Sstevel@tonic-gate } 9400Sstevel@tonic-gate if (ib == 0) 9410Sstevel@tonic-gate continue; 9420Sstevel@tonic-gate if (ib < max_blk_needed) { 9430Sstevel@tonic-gate slp = &lbp->lb_sidelocators[s->s_sideno][li]; 9440Sstevel@tonic-gate drv_index = slp->l_drvnm_index; 9450Sstevel@tonic-gate mddb_locatorblock2splitname(s->s_lnp, li, s->s_sideno, 9467627SChris.Horne@Sun.COM &sn); 9470Sstevel@tonic-gate prefixlen = SPN_PREFIX(&sn).pre_len; 9480Sstevel@tonic-gate suffixlen = SPN_SUFFIX(&sn).suf_len; 9490Sstevel@tonic-gate alloc_sz = (int)(prefixlen + suffixlen + 2); 9500Sstevel@tonic-gate name = (char *)kmem_alloc(alloc_sz, KM_SLEEP); 9510Sstevel@tonic-gate (void) strncpy(name, SPN_PREFIX(&sn).pre_data, 9520Sstevel@tonic-gate prefixlen); 9530Sstevel@tonic-gate name[prefixlen] = '/'; 9540Sstevel@tonic-gate suffix = name + (prefixlen + 1); 9550Sstevel@tonic-gate (void) strncpy(suffix, SPN_SUFFIX(&sn).suf_data, 9560Sstevel@tonic-gate suffixlen); 9570Sstevel@tonic-gate name[prefixlen + suffixlen + 1] = '\0'; 9580Sstevel@tonic-gate cmn_err(CE_WARN, 9597627SChris.Horne@Sun.COM " %s (%s:%d:%d) %d blocks", 9607627SChris.Horne@Sun.COM name, lbp->lb_drvnm[drv_index].dn_data, 9617627SChris.Horne@Sun.COM slp->l_mnum, lbp->lb_locators[li].l_blkno, 9627627SChris.Horne@Sun.COM (max_blk_needed - ib)); 9630Sstevel@tonic-gate kmem_free(name, alloc_sz); 9640Sstevel@tonic-gate } 9650Sstevel@tonic-gate } 9660Sstevel@tonic-gate } 9670Sstevel@tonic-gate 9680Sstevel@tonic-gate /* 9690Sstevel@tonic-gate * md_create_minor_node: 9700Sstevel@tonic-gate * Create the minor device for the given set and un_self_id. 9710Sstevel@tonic-gate * 9720Sstevel@tonic-gate * Input: 9730Sstevel@tonic-gate * setno - set number 9740Sstevel@tonic-gate * mnum - selfID of unit 9750Sstevel@tonic-gate * 9760Sstevel@tonic-gate * Output: 9770Sstevel@tonic-gate * None. 9780Sstevel@tonic-gate * 9790Sstevel@tonic-gate * Returns 0 for success, 1 for failure. 9800Sstevel@tonic-gate * 9810Sstevel@tonic-gate * Side-effects: 9820Sstevel@tonic-gate * None. 9830Sstevel@tonic-gate */ 9840Sstevel@tonic-gate int 9850Sstevel@tonic-gate md_create_minor_node(set_t setno, minor_t mnum) 9860Sstevel@tonic-gate { 9870Sstevel@tonic-gate char name[20]; 9880Sstevel@tonic-gate 9890Sstevel@tonic-gate /* Check for valid arguments */ 9900Sstevel@tonic-gate if (setno >= MD_MAXSETS || MD_MIN2UNIT(mnum) >= MD_MAXUNITS) 9910Sstevel@tonic-gate return (1); 9920Sstevel@tonic-gate 9930Sstevel@tonic-gate (void) snprintf(name, 20, "%u,%u,blk", 9947627SChris.Horne@Sun.COM (unsigned)setno, (unsigned)MD_MIN2UNIT(mnum)); 9950Sstevel@tonic-gate 9960Sstevel@tonic-gate if (ddi_create_minor_node(md_devinfo, name, S_IFBLK, 9970Sstevel@tonic-gate MD_MKMIN(setno, mnum), DDI_PSEUDO, 0)) 9980Sstevel@tonic-gate return (1); 9990Sstevel@tonic-gate 10000Sstevel@tonic-gate (void) snprintf(name, 20, "%u,%u,raw", 10017627SChris.Horne@Sun.COM (unsigned)setno, (unsigned)MD_MIN2UNIT(mnum)); 10020Sstevel@tonic-gate 10030Sstevel@tonic-gate if (ddi_create_minor_node(md_devinfo, name, S_IFCHR, 10040Sstevel@tonic-gate MD_MKMIN(setno, mnum), DDI_PSEUDO, 0)) 10050Sstevel@tonic-gate return (1); 10060Sstevel@tonic-gate 10070Sstevel@tonic-gate return (0); 10080Sstevel@tonic-gate } 10090Sstevel@tonic-gate 10100Sstevel@tonic-gate /* 10110Sstevel@tonic-gate * For a given key check if it is an orphaned record. 10120Sstevel@tonic-gate * The following conditions are used to determine an orphan. 10130Sstevel@tonic-gate * 1. The device associated with that key is not a metadevice. 10140Sstevel@tonic-gate * 2. If DEVID_STYLE then the physical device does not have a device Id 10150Sstevel@tonic-gate * associated with it. 10160Sstevel@tonic-gate * 10170Sstevel@tonic-gate * If a key does not have an entry in the devid namespace it could be 10180Sstevel@tonic-gate * a device that does not support device ids. Hence the record is not 10190Sstevel@tonic-gate * deleted. 10200Sstevel@tonic-gate */ 10210Sstevel@tonic-gate 10220Sstevel@tonic-gate static int 10230Sstevel@tonic-gate md_verify_orphaned_record(set_t setno, mdkey_t key) 10240Sstevel@tonic-gate { 10250Sstevel@tonic-gate md_dev64_t odev; /* orphaned dev */ 10260Sstevel@tonic-gate mddb_set_t *s; 10270Sstevel@tonic-gate side_t side = 0; 10280Sstevel@tonic-gate struct nm_next_hdr *did_nh = NULL; 10290Sstevel@tonic-gate 10300Sstevel@tonic-gate s = (mddb_set_t *)md_set[setno].s_db; 10310Sstevel@tonic-gate if ((did_nh = get_first_record(setno, 1, (NM_DEVID | NM_NOTSHARED))) 10320Sstevel@tonic-gate == NULL) 10330Sstevel@tonic-gate return (0); 10340Sstevel@tonic-gate /* 10350Sstevel@tonic-gate * If devid style is set then get the dev_t using MD_NOTRUST_DEVT 10360Sstevel@tonic-gate */ 10370Sstevel@tonic-gate if (s->s_lbp->lb_flags & MDDB_DEVID_STYLE) { 10380Sstevel@tonic-gate odev = md_getdevnum(setno, side, key, MD_NOTRUST_DEVT); 10390Sstevel@tonic-gate if ((odev == NODEV64) || (md_getmajor(odev) == md_major)) 10400Sstevel@tonic-gate return (0); 10410Sstevel@tonic-gate if (lookup_entry(did_nh, setno, side, key, odev, NM_DEVID) == 10427627SChris.Horne@Sun.COM NULL) 10430Sstevel@tonic-gate return (1); 10440Sstevel@tonic-gate } 10450Sstevel@tonic-gate return (0); 10460Sstevel@tonic-gate } 10470Sstevel@tonic-gate 10480Sstevel@tonic-gate int 10490Sstevel@tonic-gate md_snarf_db_set(set_t setno, md_error_t *ep) 10500Sstevel@tonic-gate { 10510Sstevel@tonic-gate int err = 0; 10520Sstevel@tonic-gate int i; 10530Sstevel@tonic-gate mddb_recid_t recid; 10540Sstevel@tonic-gate mddb_type_t drvrid; 10550Sstevel@tonic-gate mddb_recstatus_t status; 10560Sstevel@tonic-gate md_ops_t *ops; 10570Sstevel@tonic-gate uint_t privat; 10580Sstevel@tonic-gate mddb_set_t *s; 10590Sstevel@tonic-gate uint_t cvt_blks; 10600Sstevel@tonic-gate struct nm_next_hdr *nh; 10610Sstevel@tonic-gate mdkey_t key = MD_KEYWILD; 10620Sstevel@tonic-gate side_t side = 0; 10630Sstevel@tonic-gate int size; 10640Sstevel@tonic-gate int devid_flag; 10650Sstevel@tonic-gate int retval; 10661623Stw21770 uint_t un; 10671623Stw21770 int un_next_set = 0; 10680Sstevel@tonic-gate 10690Sstevel@tonic-gate md_haltsnarf_enter(setno); 10700Sstevel@tonic-gate 10710Sstevel@tonic-gate mutex_enter(&md_mx); 10720Sstevel@tonic-gate if (md_set[setno].s_status & MD_SET_SNARFED) { 10730Sstevel@tonic-gate mutex_exit(&md_mx); 10740Sstevel@tonic-gate md_haltsnarf_exit(setno); 10750Sstevel@tonic-gate return (0); 10760Sstevel@tonic-gate } 10770Sstevel@tonic-gate mutex_exit(&md_mx); 10780Sstevel@tonic-gate 10790Sstevel@tonic-gate if (! (md_get_status() & MD_GBL_DAEMONS_LIVE)) { 10800Sstevel@tonic-gate if (md_start_daemons(TRUE)) { 10810Sstevel@tonic-gate if (ep != NULL) 10820Sstevel@tonic-gate (void) mdsyserror(ep, ENXIO); 10830Sstevel@tonic-gate err = -1; 10840Sstevel@tonic-gate goto out; 10850Sstevel@tonic-gate } 10860Sstevel@tonic-gate } 10870Sstevel@tonic-gate 10880Sstevel@tonic-gate 10890Sstevel@tonic-gate /* 10900Sstevel@tonic-gate * Load the devid name space if it exists 10910Sstevel@tonic-gate */ 10920Sstevel@tonic-gate (void) md_load_namespace(setno, NULL, NM_DEVID); 10930Sstevel@tonic-gate if (!md_load_namespace(setno, ep, 0L)) { 10940Sstevel@tonic-gate /* 10950Sstevel@tonic-gate * Unload the devid namespace 10960Sstevel@tonic-gate */ 10970Sstevel@tonic-gate (void) md_unload_namespace(setno, NM_DEVID); 10980Sstevel@tonic-gate err = -1; 10990Sstevel@tonic-gate goto out; 11000Sstevel@tonic-gate } 11010Sstevel@tonic-gate 11020Sstevel@tonic-gate /* 11030Sstevel@tonic-gate * If replica is in non-devid state, convert if: 11040Sstevel@tonic-gate * - not in probe during upgrade (md_keep_repl_state = 0) 11050Sstevel@tonic-gate * - enough space available in replica 11060Sstevel@tonic-gate * - local set 11070Sstevel@tonic-gate * - not a multi-node diskset 11080Sstevel@tonic-gate * - clustering is not present (for non-local set) 11090Sstevel@tonic-gate */ 11100Sstevel@tonic-gate s = (mddb_set_t *)md_set[setno].s_db; 11110Sstevel@tonic-gate devid_flag = 0; 11120Sstevel@tonic-gate if (!(s->s_lbp->lb_flags & MDDB_DEVID_STYLE) && !md_keep_repl_state) 11130Sstevel@tonic-gate devid_flag = 1; 11140Sstevel@tonic-gate if (cluster_bootflags & CLUSTER_CONFIGURED) 11150Sstevel@tonic-gate if (setno != MD_LOCAL_SET) 11160Sstevel@tonic-gate devid_flag = 0; 11170Sstevel@tonic-gate if (MD_MNSET_SETNO(setno)) 11180Sstevel@tonic-gate devid_flag = 0; 11190Sstevel@tonic-gate if ((md_devid_destroy == 1) && (md_keep_repl_state == 1)) 11200Sstevel@tonic-gate devid_flag = 0; 11210Sstevel@tonic-gate 11220Sstevel@tonic-gate /* 11230Sstevel@tonic-gate * if we weren't devid style before and md_keep_repl_state=1 11240Sstevel@tonic-gate * we need to stay non-devid 11250Sstevel@tonic-gate */ 11260Sstevel@tonic-gate if ((md_keep_repl_state == 1) && 11270Sstevel@tonic-gate ((s->s_lbp->lb_flags & MDDB_DEVID_STYLE) == 0)) 11280Sstevel@tonic-gate devid_flag = 0; 11290Sstevel@tonic-gate if (devid_flag) { 11300Sstevel@tonic-gate /* 11310Sstevel@tonic-gate * Determine number of free blocks needed to convert 11320Sstevel@tonic-gate * entire replica to device id format - locator blocks 11330Sstevel@tonic-gate * and namespace. 11340Sstevel@tonic-gate */ 11350Sstevel@tonic-gate cvt_blks = 0; 11360Sstevel@tonic-gate if (mddb_lb_did_convert(s, 0, &cvt_blks) != 0) { 11370Sstevel@tonic-gate if (ep != NULL) 11380Sstevel@tonic-gate (void) mdsyserror(ep, EIO); 11390Sstevel@tonic-gate err = -1; 11400Sstevel@tonic-gate goto out; 11410Sstevel@tonic-gate 11420Sstevel@tonic-gate } 11430Sstevel@tonic-gate cvt_blks += md_nm_did_chkspace(setno); 11440Sstevel@tonic-gate 11450Sstevel@tonic-gate /* add MDDB_DEVID_CONV_PERC% */ 11460Sstevel@tonic-gate if ((md_conv_perc > 0) && (md_conv_perc <= 100)) { 11470Sstevel@tonic-gate cvt_blks = cvt_blks * (100 + md_conv_perc) / 100; 11480Sstevel@tonic-gate } 11490Sstevel@tonic-gate 11500Sstevel@tonic-gate if (cvt_blks <= s->s_freeblkcnt) { 11510Sstevel@tonic-gate if (mddb_lb_did_convert(s, 1, &cvt_blks) != 0) { 11520Sstevel@tonic-gate if (ep != NULL) 11530Sstevel@tonic-gate (void) mdsyserror(ep, EIO); 11540Sstevel@tonic-gate err = -1; 11550Sstevel@tonic-gate goto out; 11560Sstevel@tonic-gate } 11570Sstevel@tonic-gate 11580Sstevel@tonic-gate } else { 11590Sstevel@tonic-gate /* 11600Sstevel@tonic-gate * Print message that replica can't be converted for 11610Sstevel@tonic-gate * lack of space. No failure - just continue to 11620Sstevel@tonic-gate * run without device ids. 11630Sstevel@tonic-gate */ 11640Sstevel@tonic-gate cmn_err(CE_WARN, 11650Sstevel@tonic-gate "Unable to add Solaris Volume Manager device " 11660Sstevel@tonic-gate "relocation data.\n" 11670Sstevel@tonic-gate " To use device relocation feature:\n" 11680Sstevel@tonic-gate " - Increase size of listed replicas\n" 11690Sstevel@tonic-gate " - Reboot"); 11700Sstevel@tonic-gate md_print_block_usage(s, cvt_blks); 11710Sstevel@tonic-gate cmn_err(CE_WARN, 11720Sstevel@tonic-gate "Loading set without device relocation data.\n" 11730Sstevel@tonic-gate " Solaris Volume Manager disk movement " 11740Sstevel@tonic-gate "not tracked in local set."); 11750Sstevel@tonic-gate } 11760Sstevel@tonic-gate } 11770Sstevel@tonic-gate 11780Sstevel@tonic-gate /* 11790Sstevel@tonic-gate * go through and load any modules referenced in 11800Sstevel@tonic-gate * data base 11810Sstevel@tonic-gate */ 11820Sstevel@tonic-gate recid = mddb_makerecid(setno, 0); 11830Sstevel@tonic-gate while ((recid = mddb_getnextrec(recid, MDDB_ALL, 0)) > 0) { 11840Sstevel@tonic-gate status = mddb_getrecstatus(recid); 11850Sstevel@tonic-gate if (status == MDDB_STALE) { 11860Sstevel@tonic-gate if (! (md_get_setstatus(setno) & MD_SET_STALE)) { 11870Sstevel@tonic-gate md_set_setstatus(setno, MD_SET_STALE); 11880Sstevel@tonic-gate cmn_err(CE_WARN, 11890Sstevel@tonic-gate "md: state database is stale"); 11900Sstevel@tonic-gate } 11910Sstevel@tonic-gate } else if (status == MDDB_NODATA) { 11920Sstevel@tonic-gate mddb_setrecprivate(recid, MD_PRV_PENDDEL); 11930Sstevel@tonic-gate continue; 11940Sstevel@tonic-gate } 11950Sstevel@tonic-gate drvrid = mddb_getrectype1(recid); 11960Sstevel@tonic-gate if (drvrid < MDDB_FIRST_MODID) 11970Sstevel@tonic-gate continue; 11980Sstevel@tonic-gate if (md_loadsubmod(setno, md_getshared_name(setno, drvrid), 11990Sstevel@tonic-gate drvrid) < 0) { 12000Sstevel@tonic-gate cmn_err(CE_NOTE, "md: could not load misc/%s", 12017627SChris.Horne@Sun.COM md_getshared_name(setno, drvrid)); 12020Sstevel@tonic-gate } 12030Sstevel@tonic-gate } 12040Sstevel@tonic-gate 12050Sstevel@tonic-gate if (recid < 0) 12060Sstevel@tonic-gate goto out; 12070Sstevel@tonic-gate 12080Sstevel@tonic-gate snarf_user_data(setno); 12090Sstevel@tonic-gate 12100Sstevel@tonic-gate /* 12110Sstevel@tonic-gate * Initialize the md_nm_snarfed array 12120Sstevel@tonic-gate * this array is indexed by the key and 12130Sstevel@tonic-gate * is set by md_getdevnum during the snarf time 12140Sstevel@tonic-gate */ 12150Sstevel@tonic-gate if ((nh = get_first_record(setno, 0, NM_NOTSHARED)) != NULL) { 12160Sstevel@tonic-gate size = (int)((((struct nm_rec_hdr *)nh->nmn_record)-> 12170Sstevel@tonic-gate r_next_key) * (sizeof (int))); 12180Sstevel@tonic-gate md_nm_snarfed = (int *)kmem_zalloc(size, KM_SLEEP); 12190Sstevel@tonic-gate } 12200Sstevel@tonic-gate 12210Sstevel@tonic-gate /* 12220Sstevel@tonic-gate * go through and snarf until nothing gets added 12230Sstevel@tonic-gate */ 12240Sstevel@tonic-gate do { 12250Sstevel@tonic-gate i = 0; 12260Sstevel@tonic-gate for (ops = md_opslist; ops != NULL; ops = ops->md_next) { 12270Sstevel@tonic-gate if (ops->md_snarf != NULL) { 12280Sstevel@tonic-gate retval = ops->md_snarf(MD_SNARF_DOIT, setno); 12290Sstevel@tonic-gate if (retval == -1) { 12300Sstevel@tonic-gate err = -1; 12310Sstevel@tonic-gate /* Don't know the failed unit */ 12320Sstevel@tonic-gate (void) mdmderror(ep, MDE_RR_ALLOC_ERROR, 12330Sstevel@tonic-gate 0); 12340Sstevel@tonic-gate (void) md_halt_set(setno, MD_HALT_ALL); 12350Sstevel@tonic-gate (void) mddb_unload_set(setno); 12360Sstevel@tonic-gate md_haltsnarf_exit(setno); 12370Sstevel@tonic-gate return (err); 12380Sstevel@tonic-gate } else { 12390Sstevel@tonic-gate i += retval; 12400Sstevel@tonic-gate } 12410Sstevel@tonic-gate } 12420Sstevel@tonic-gate } 12430Sstevel@tonic-gate } while (i); 12440Sstevel@tonic-gate 12451623Stw21770 /* 12461623Stw21770 * Set the first available slot and availability 12471623Stw21770 */ 12481623Stw21770 md_set[setno].s_un_avail = 0; 12491623Stw21770 for (un = 0; un < MD_MAXUNITS; un++) { 12501623Stw21770 if (md_set[setno].s_un[un] != NULL) { 12511623Stw21770 continue; 12521623Stw21770 } else { 12531623Stw21770 if (!un_next_set) { 12541623Stw21770 md_set[setno].s_un_next = un; 12551623Stw21770 un_next_set = 1; 12561623Stw21770 } 12571623Stw21770 md_set[setno].s_un_avail++; 12581623Stw21770 } 12591623Stw21770 } 12601623Stw21770 12610Sstevel@tonic-gate md_set_setstatus(setno, MD_SET_SNARFED); 12620Sstevel@tonic-gate 12630Sstevel@tonic-gate recid = mddb_makerecid(setno, 0); 12640Sstevel@tonic-gate while ((recid = mddb_getnextrec(recid, MDDB_ALL, 0)) > 0) { 12650Sstevel@tonic-gate privat = mddb_getrecprivate(recid); 12660Sstevel@tonic-gate if (privat & MD_PRV_COMMIT) { 12670Sstevel@tonic-gate if (mddb_commitrec(recid)) { 12680Sstevel@tonic-gate if (!(md_get_setstatus(setno) & MD_SET_STALE)) { 12690Sstevel@tonic-gate md_set_setstatus(setno, MD_SET_STALE); 12700Sstevel@tonic-gate cmn_err(CE_WARN, 12710Sstevel@tonic-gate "md: state database is stale"); 12720Sstevel@tonic-gate } 12730Sstevel@tonic-gate } 12740Sstevel@tonic-gate mddb_setrecprivate(recid, MD_PRV_GOTIT); 12750Sstevel@tonic-gate } 12760Sstevel@tonic-gate } 12770Sstevel@tonic-gate 12780Sstevel@tonic-gate /* Deletes must happen after all the commits */ 12790Sstevel@tonic-gate recid = mddb_makerecid(setno, 0); 12800Sstevel@tonic-gate while ((recid = mddb_getnextrec(recid, MDDB_ALL, 0)) > 0) { 12810Sstevel@tonic-gate privat = mddb_getrecprivate(recid); 12820Sstevel@tonic-gate if (privat & MD_PRV_DELETE) { 12830Sstevel@tonic-gate if (mddb_deleterec(recid)) { 12840Sstevel@tonic-gate if (!(md_get_setstatus(setno) & MD_SET_STALE)) { 12850Sstevel@tonic-gate md_set_setstatus(setno, MD_SET_STALE); 12860Sstevel@tonic-gate cmn_err(CE_WARN, 12870Sstevel@tonic-gate "md: state database is stale"); 12880Sstevel@tonic-gate } 12890Sstevel@tonic-gate mddb_setrecprivate(recid, MD_PRV_GOTIT); 12900Sstevel@tonic-gate } 12910Sstevel@tonic-gate recid = mddb_makerecid(setno, 0); 12920Sstevel@tonic-gate } 12930Sstevel@tonic-gate } 12940Sstevel@tonic-gate 12950Sstevel@tonic-gate /* 12960Sstevel@tonic-gate * go through and clean up records until nothing gets cleaned up. 12970Sstevel@tonic-gate */ 12980Sstevel@tonic-gate do { 12990Sstevel@tonic-gate i = 0; 13000Sstevel@tonic-gate for (ops = md_opslist; ops != NULL; ops = ops->md_next) 13010Sstevel@tonic-gate if (ops->md_snarf != NULL) 13020Sstevel@tonic-gate i += ops->md_snarf(MD_SNARF_CLEANUP, setno); 13030Sstevel@tonic-gate } while (i); 13040Sstevel@tonic-gate 13050Sstevel@tonic-gate if (md_nm_snarfed != NULL && 13060Sstevel@tonic-gate !(md_get_setstatus(setno) & MD_SET_STALE)) { 13070Sstevel@tonic-gate /* 13080Sstevel@tonic-gate * go thru and cleanup the namespace and the device id 13090Sstevel@tonic-gate * name space 13100Sstevel@tonic-gate */ 13110Sstevel@tonic-gate for (key = 1; 13120Sstevel@tonic-gate key < ((struct nm_rec_hdr *)nh->nmn_record)->r_next_key; 13130Sstevel@tonic-gate key++) { 13140Sstevel@tonic-gate /* 13150Sstevel@tonic-gate * Is the entry an 'orphan'? 13160Sstevel@tonic-gate */ 13170Sstevel@tonic-gate if (lookup_entry(nh, setno, side, key, NODEV64, 0L) != 13180Sstevel@tonic-gate NULL) { 13190Sstevel@tonic-gate /* 13200Sstevel@tonic-gate * If the value is not set then apparently 13210Sstevel@tonic-gate * it is not part of the current configuration, 13220Sstevel@tonic-gate * remove it this can happen when system panic 13230Sstevel@tonic-gate * between the primary name space update and 13240Sstevel@tonic-gate * the device id name space update 13250Sstevel@tonic-gate */ 13260Sstevel@tonic-gate if (md_nm_snarfed[key] == 0) { 13270Sstevel@tonic-gate if (md_verify_orphaned_record(setno, 13280Sstevel@tonic-gate key) == 1) 13290Sstevel@tonic-gate (void) remove_entry(nh, 13300Sstevel@tonic-gate side, key, 0L); 13310Sstevel@tonic-gate } 13320Sstevel@tonic-gate } 13330Sstevel@tonic-gate } 13340Sstevel@tonic-gate } 13350Sstevel@tonic-gate 13360Sstevel@tonic-gate if (md_nm_snarfed != NULL) { 13370Sstevel@tonic-gate /* 13380Sstevel@tonic-gate * Done and free the memory 13390Sstevel@tonic-gate */ 13400Sstevel@tonic-gate kmem_free(md_nm_snarfed, size); 13410Sstevel@tonic-gate md_nm_snarfed = NULL; 13420Sstevel@tonic-gate } 13430Sstevel@tonic-gate 13440Sstevel@tonic-gate if (s->s_lbp->lb_flags & MDDB_DEVID_STYLE && 13450Sstevel@tonic-gate !(md_get_setstatus(setno) & MD_SET_STALE)) { 13460Sstevel@tonic-gate /* 13470Sstevel@tonic-gate * if the destroy flag has been set and 13480Sstevel@tonic-gate * the MD_SET_DIDCLUP bit is not set in 13490Sstevel@tonic-gate * the set's status field, cleanup the 13500Sstevel@tonic-gate * entire device id namespace 13510Sstevel@tonic-gate */ 13520Sstevel@tonic-gate if (md_devid_destroy && 13530Sstevel@tonic-gate !(md_get_setstatus(setno) & MD_SET_DIDCLUP)) { 13540Sstevel@tonic-gate (void) md_devid_cleanup(setno, 1); 13550Sstevel@tonic-gate md_set_setstatus(setno, MD_SET_DIDCLUP); 13560Sstevel@tonic-gate } else 13570Sstevel@tonic-gate (void) md_devid_cleanup(setno, 0); 13580Sstevel@tonic-gate } 13590Sstevel@tonic-gate 13600Sstevel@tonic-gate /* 13610Sstevel@tonic-gate * clear single threading on snarf, return success or error 13620Sstevel@tonic-gate */ 13630Sstevel@tonic-gate out: 13640Sstevel@tonic-gate md_haltsnarf_exit(setno); 13650Sstevel@tonic-gate return (err); 13660Sstevel@tonic-gate } 13670Sstevel@tonic-gate 13680Sstevel@tonic-gate void 13690Sstevel@tonic-gate get_minfo(struct dk_minfo *info, minor_t mnum) 13700Sstevel@tonic-gate { 13710Sstevel@tonic-gate md_unit_t *un; 13720Sstevel@tonic-gate mdi_unit_t *ui; 13730Sstevel@tonic-gate 13740Sstevel@tonic-gate info->dki_capacity = 0; 13750Sstevel@tonic-gate info->dki_lbsize = 0; 13760Sstevel@tonic-gate info->dki_media_type = 0; 13770Sstevel@tonic-gate 13780Sstevel@tonic-gate if ((ui = MDI_UNIT(mnum)) == NULL) { 13790Sstevel@tonic-gate return; 13800Sstevel@tonic-gate } 13810Sstevel@tonic-gate un = (md_unit_t *)md_unit_readerlock(ui); 13820Sstevel@tonic-gate info->dki_capacity = un->c.un_total_blocks; 13830Sstevel@tonic-gate md_unit_readerexit(ui); 13840Sstevel@tonic-gate info->dki_lbsize = DEV_BSIZE; 13850Sstevel@tonic-gate info->dki_media_type = DK_UNKNOWN; 13860Sstevel@tonic-gate } 13870Sstevel@tonic-gate 13880Sstevel@tonic-gate 13890Sstevel@tonic-gate void 13900Sstevel@tonic-gate get_info(struct dk_cinfo *info, minor_t mnum) 13910Sstevel@tonic-gate { 13920Sstevel@tonic-gate /* 13930Sstevel@tonic-gate * Controller Information 13940Sstevel@tonic-gate */ 13950Sstevel@tonic-gate info->dki_ctype = DKC_MD; 13960Sstevel@tonic-gate info->dki_cnum = ddi_get_instance(ddi_get_parent(md_devinfo)); 13970Sstevel@tonic-gate (void) strcpy(info->dki_cname, 13980Sstevel@tonic-gate ddi_get_name(ddi_get_parent(md_devinfo))); 13990Sstevel@tonic-gate /* 14000Sstevel@tonic-gate * Unit Information 14010Sstevel@tonic-gate */ 14020Sstevel@tonic-gate info->dki_unit = mnum; 14030Sstevel@tonic-gate info->dki_slave = 0; 14040Sstevel@tonic-gate (void) strcpy(info->dki_dname, ddi_driver_name(md_devinfo)); 14050Sstevel@tonic-gate info->dki_flags = 0; 14060Sstevel@tonic-gate info->dki_partition = 0; 14070Sstevel@tonic-gate info->dki_maxtransfer = (ushort_t)(md_maxphys / DEV_BSIZE); 14080Sstevel@tonic-gate 14090Sstevel@tonic-gate /* 14100Sstevel@tonic-gate * We can't get from here to there yet 14110Sstevel@tonic-gate */ 14120Sstevel@tonic-gate info->dki_addr = 0; 14130Sstevel@tonic-gate info->dki_space = 0; 14140Sstevel@tonic-gate info->dki_prio = 0; 14150Sstevel@tonic-gate info->dki_vec = 0; 14160Sstevel@tonic-gate } 14170Sstevel@tonic-gate 14180Sstevel@tonic-gate /* 14190Sstevel@tonic-gate * open admin device 14200Sstevel@tonic-gate */ 14210Sstevel@tonic-gate static int 14220Sstevel@tonic-gate mdadminopen( 14230Sstevel@tonic-gate int flag, 14240Sstevel@tonic-gate int otyp) 14250Sstevel@tonic-gate { 14260Sstevel@tonic-gate int err = 0; 14270Sstevel@tonic-gate 14280Sstevel@tonic-gate /* single thread */ 14290Sstevel@tonic-gate mutex_enter(&md_mx); 14300Sstevel@tonic-gate 14310Sstevel@tonic-gate /* check type and flags */ 14320Sstevel@tonic-gate if ((otyp != OTYP_CHR) && (otyp != OTYP_LYR)) { 14330Sstevel@tonic-gate err = EINVAL; 14340Sstevel@tonic-gate goto out; 14350Sstevel@tonic-gate } 14360Sstevel@tonic-gate if (((flag & FEXCL) && (md_status & MD_GBL_OPEN)) || 14370Sstevel@tonic-gate (md_status & MD_GBL_EXCL)) { 14380Sstevel@tonic-gate err = EBUSY; 14390Sstevel@tonic-gate goto out; 14400Sstevel@tonic-gate } 14410Sstevel@tonic-gate 14420Sstevel@tonic-gate /* count and flag open */ 14430Sstevel@tonic-gate md_ocnt[otyp]++; 14440Sstevel@tonic-gate md_status |= MD_GBL_OPEN; 14450Sstevel@tonic-gate if (flag & FEXCL) 14460Sstevel@tonic-gate md_status |= MD_GBL_EXCL; 14470Sstevel@tonic-gate 14480Sstevel@tonic-gate /* unlock return success */ 14490Sstevel@tonic-gate out: 14500Sstevel@tonic-gate mutex_exit(&md_mx); 14510Sstevel@tonic-gate return (err); 14520Sstevel@tonic-gate } 14530Sstevel@tonic-gate 14540Sstevel@tonic-gate /* 14550Sstevel@tonic-gate * open entry point 14560Sstevel@tonic-gate */ 14570Sstevel@tonic-gate static int 14580Sstevel@tonic-gate mdopen( 14590Sstevel@tonic-gate dev_t *dev, 14600Sstevel@tonic-gate int flag, 14610Sstevel@tonic-gate int otyp, 14620Sstevel@tonic-gate cred_t *cred_p) 14630Sstevel@tonic-gate { 14640Sstevel@tonic-gate minor_t mnum = getminor(*dev); 14650Sstevel@tonic-gate unit_t unit = MD_MIN2UNIT(mnum); 14660Sstevel@tonic-gate set_t setno = MD_MIN2SET(mnum); 14670Sstevel@tonic-gate mdi_unit_t *ui = NULL; 14680Sstevel@tonic-gate int err = 0; 14690Sstevel@tonic-gate md_parent_t parent; 14700Sstevel@tonic-gate 14710Sstevel@tonic-gate /* dispatch admin device opens */ 14720Sstevel@tonic-gate if (mnum == MD_ADM_MINOR) 14730Sstevel@tonic-gate return (mdadminopen(flag, otyp)); 14740Sstevel@tonic-gate 14750Sstevel@tonic-gate /* lock, check status */ 14760Sstevel@tonic-gate rw_enter(&md_unit_array_rw.lock, RW_READER); 14770Sstevel@tonic-gate 14780Sstevel@tonic-gate tryagain: 14790Sstevel@tonic-gate if (md_get_status() & MD_GBL_HALTED) { 14800Sstevel@tonic-gate err = ENODEV; 14810Sstevel@tonic-gate goto out; 14820Sstevel@tonic-gate } 14830Sstevel@tonic-gate 14840Sstevel@tonic-gate /* check minor */ 14850Sstevel@tonic-gate if ((setno >= md_nsets) || (unit >= md_nunits)) { 14860Sstevel@tonic-gate err = ENXIO; 14870Sstevel@tonic-gate goto out; 14880Sstevel@tonic-gate } 14890Sstevel@tonic-gate 14900Sstevel@tonic-gate /* make sure we're snarfed */ 14910Sstevel@tonic-gate if ((md_get_setstatus(MD_LOCAL_SET) & MD_SET_SNARFED) == 0) { 14920Sstevel@tonic-gate if (md_snarf_db_set(MD_LOCAL_SET, NULL) != 0) { 14930Sstevel@tonic-gate err = ENODEV; 14940Sstevel@tonic-gate goto out; 14950Sstevel@tonic-gate } 14960Sstevel@tonic-gate } 14970Sstevel@tonic-gate if ((md_get_setstatus(setno) & MD_SET_SNARFED) == 0) { 14980Sstevel@tonic-gate err = ENODEV; 14990Sstevel@tonic-gate goto out; 15000Sstevel@tonic-gate } 15010Sstevel@tonic-gate 15020Sstevel@tonic-gate /* check unit */ 15030Sstevel@tonic-gate if ((ui = MDI_UNIT(mnum)) == NULL) { 15040Sstevel@tonic-gate err = ENXIO; 15050Sstevel@tonic-gate goto out; 15060Sstevel@tonic-gate } 15070Sstevel@tonic-gate 15080Sstevel@tonic-gate /* 15090Sstevel@tonic-gate * The softpart open routine may do an I/O during the open, in 15100Sstevel@tonic-gate * which case the open routine will set the OPENINPROGRESS flag 15110Sstevel@tonic-gate * and drop all locks during the I/O. If this thread sees 15120Sstevel@tonic-gate * the OPENINPROGRESS flag set, if should wait until the flag 15130Sstevel@tonic-gate * is reset before calling the driver's open routine. It must 15140Sstevel@tonic-gate * also revalidate the world after it grabs the unit_array lock 15150Sstevel@tonic-gate * since the set may have been released or the metadevice cleared 15160Sstevel@tonic-gate * during the sleep. 15170Sstevel@tonic-gate */ 15180Sstevel@tonic-gate if (MD_MNSET_SETNO(setno)) { 15190Sstevel@tonic-gate mutex_enter(&ui->ui_mx); 15200Sstevel@tonic-gate if (ui->ui_lock & MD_UL_OPENINPROGRESS) { 15210Sstevel@tonic-gate rw_exit(&md_unit_array_rw.lock); 15220Sstevel@tonic-gate cv_wait(&ui->ui_cv, &ui->ui_mx); 15230Sstevel@tonic-gate rw_enter(&md_unit_array_rw.lock, RW_READER); 15240Sstevel@tonic-gate mutex_exit(&ui->ui_mx); 15250Sstevel@tonic-gate goto tryagain; 15260Sstevel@tonic-gate } 15270Sstevel@tonic-gate mutex_exit(&ui->ui_mx); 15280Sstevel@tonic-gate } 15290Sstevel@tonic-gate 15300Sstevel@tonic-gate /* Test if device is openable */ 15310Sstevel@tonic-gate if ((ui->ui_tstate & MD_NOTOPENABLE) != 0) { 15320Sstevel@tonic-gate err = ENXIO; 15330Sstevel@tonic-gate goto out; 15340Sstevel@tonic-gate } 15350Sstevel@tonic-gate 15360Sstevel@tonic-gate /* don't allow opens w/WRITE flag if stale */ 15370Sstevel@tonic-gate if ((flag & FWRITE) && (md_get_setstatus(setno) & MD_SET_STALE)) { 15380Sstevel@tonic-gate err = EROFS; 15390Sstevel@tonic-gate goto out; 15400Sstevel@tonic-gate } 15410Sstevel@tonic-gate 15420Sstevel@tonic-gate /* don't allow writes to subdevices */ 15430Sstevel@tonic-gate parent = md_get_parent(md_expldev(*dev)); 15440Sstevel@tonic-gate if ((flag & FWRITE) && MD_HAS_PARENT(parent)) { 15450Sstevel@tonic-gate err = EROFS; 15460Sstevel@tonic-gate goto out; 15470Sstevel@tonic-gate } 15480Sstevel@tonic-gate 15490Sstevel@tonic-gate /* open underlying driver */ 15500Sstevel@tonic-gate if (md_ops[ui->ui_opsindex]->md_open != NULL) { 15510Sstevel@tonic-gate if ((err = (*md_ops[ui->ui_opsindex]->md_open) 15520Sstevel@tonic-gate (dev, flag, otyp, cred_p, 0)) != 0) 15530Sstevel@tonic-gate goto out; 15540Sstevel@tonic-gate } 15550Sstevel@tonic-gate 15560Sstevel@tonic-gate /* or do it ourselves */ 15570Sstevel@tonic-gate else { 15580Sstevel@tonic-gate /* single thread */ 15590Sstevel@tonic-gate (void) md_unit_openclose_enter(ui); 15600Sstevel@tonic-gate err = md_unit_incopen(mnum, flag, otyp); 15610Sstevel@tonic-gate md_unit_openclose_exit(ui); 15620Sstevel@tonic-gate if (err != 0) 15630Sstevel@tonic-gate goto out; 15640Sstevel@tonic-gate } 15650Sstevel@tonic-gate 15660Sstevel@tonic-gate /* unlock, return status */ 15670Sstevel@tonic-gate out: 15680Sstevel@tonic-gate rw_exit(&md_unit_array_rw.lock); 15690Sstevel@tonic-gate return (err); 15700Sstevel@tonic-gate } 15710Sstevel@tonic-gate 15720Sstevel@tonic-gate /* 15730Sstevel@tonic-gate * close admin device 15740Sstevel@tonic-gate */ 15750Sstevel@tonic-gate static int 15760Sstevel@tonic-gate mdadminclose( 15770Sstevel@tonic-gate int otyp) 15780Sstevel@tonic-gate { 15790Sstevel@tonic-gate int i; 15800Sstevel@tonic-gate int err = 0; 15810Sstevel@tonic-gate 15820Sstevel@tonic-gate /* single thread */ 15830Sstevel@tonic-gate mutex_enter(&md_mx); 15840Sstevel@tonic-gate 15850Sstevel@tonic-gate /* check type and flags */ 15860Sstevel@tonic-gate if ((otyp < 0) || (otyp >= OTYPCNT)) { 15870Sstevel@tonic-gate err = EINVAL; 15880Sstevel@tonic-gate goto out; 15890Sstevel@tonic-gate } else if (md_ocnt[otyp] == 0) { 15900Sstevel@tonic-gate err = ENXIO; 15910Sstevel@tonic-gate goto out; 15920Sstevel@tonic-gate } 15930Sstevel@tonic-gate 15940Sstevel@tonic-gate /* count and flag closed */ 15950Sstevel@tonic-gate if (otyp == OTYP_LYR) 15960Sstevel@tonic-gate md_ocnt[otyp]--; 15970Sstevel@tonic-gate else 15980Sstevel@tonic-gate md_ocnt[otyp] = 0; 15990Sstevel@tonic-gate md_status &= ~MD_GBL_OPEN; 16000Sstevel@tonic-gate for (i = 0; (i < OTYPCNT); ++i) 16010Sstevel@tonic-gate if (md_ocnt[i] != 0) 16020Sstevel@tonic-gate md_status |= MD_GBL_OPEN; 16030Sstevel@tonic-gate if (! (md_status & MD_GBL_OPEN)) 16040Sstevel@tonic-gate md_status &= ~MD_GBL_EXCL; 16050Sstevel@tonic-gate 16060Sstevel@tonic-gate /* unlock return success */ 16070Sstevel@tonic-gate out: 16080Sstevel@tonic-gate mutex_exit(&md_mx); 16090Sstevel@tonic-gate return (err); 16100Sstevel@tonic-gate } 16110Sstevel@tonic-gate 16120Sstevel@tonic-gate /* 16130Sstevel@tonic-gate * close entry point 16140Sstevel@tonic-gate */ 16150Sstevel@tonic-gate static int 16160Sstevel@tonic-gate mdclose( 16170Sstevel@tonic-gate dev_t dev, 16180Sstevel@tonic-gate int flag, 16190Sstevel@tonic-gate int otyp, 16200Sstevel@tonic-gate cred_t *cred_p) 16210Sstevel@tonic-gate { 16220Sstevel@tonic-gate minor_t mnum = getminor(dev); 16230Sstevel@tonic-gate set_t setno = MD_MIN2SET(mnum); 16240Sstevel@tonic-gate unit_t unit = MD_MIN2UNIT(mnum); 16250Sstevel@tonic-gate mdi_unit_t *ui = NULL; 16260Sstevel@tonic-gate int err = 0; 16270Sstevel@tonic-gate 16280Sstevel@tonic-gate /* dispatch admin device closes */ 16290Sstevel@tonic-gate if (mnum == MD_ADM_MINOR) 16300Sstevel@tonic-gate return (mdadminclose(otyp)); 16310Sstevel@tonic-gate 16320Sstevel@tonic-gate /* check minor */ 16330Sstevel@tonic-gate if ((setno >= md_nsets) || (unit >= md_nunits) || 16340Sstevel@tonic-gate ((ui = MDI_UNIT(mnum)) == NULL)) { 16350Sstevel@tonic-gate err = ENXIO; 16360Sstevel@tonic-gate goto out; 16370Sstevel@tonic-gate } 16380Sstevel@tonic-gate 16390Sstevel@tonic-gate /* close underlying driver */ 16400Sstevel@tonic-gate if (md_ops[ui->ui_opsindex]->md_close != NULL) { 16410Sstevel@tonic-gate if ((err = (*md_ops[ui->ui_opsindex]->md_close) 16420Sstevel@tonic-gate (dev, flag, otyp, cred_p, 0)) != 0) 16430Sstevel@tonic-gate goto out; 16440Sstevel@tonic-gate } 16450Sstevel@tonic-gate 16460Sstevel@tonic-gate /* or do it ourselves */ 16470Sstevel@tonic-gate else { 16480Sstevel@tonic-gate /* single thread */ 16490Sstevel@tonic-gate (void) md_unit_openclose_enter(ui); 16500Sstevel@tonic-gate err = md_unit_decopen(mnum, otyp); 16510Sstevel@tonic-gate md_unit_openclose_exit(ui); 16520Sstevel@tonic-gate if (err != 0) 16530Sstevel@tonic-gate goto out; 16540Sstevel@tonic-gate } 16550Sstevel@tonic-gate 16560Sstevel@tonic-gate /* return success */ 16570Sstevel@tonic-gate out: 16580Sstevel@tonic-gate return (err); 16590Sstevel@tonic-gate } 16600Sstevel@tonic-gate 16610Sstevel@tonic-gate 16620Sstevel@tonic-gate /* 16630Sstevel@tonic-gate * This routine performs raw read operations. It is called from the 16640Sstevel@tonic-gate * device switch at normal priority. 16650Sstevel@tonic-gate * 16660Sstevel@tonic-gate * The main catch is that the *uio struct which is passed to us may 16670Sstevel@tonic-gate * specify a read which spans two buffers, which would be contiguous 16680Sstevel@tonic-gate * on a single partition, but not on a striped partition. This will 16690Sstevel@tonic-gate * be handled by mdstrategy. 16700Sstevel@tonic-gate */ 16710Sstevel@tonic-gate /*ARGSUSED*/ 16720Sstevel@tonic-gate static int 16730Sstevel@tonic-gate mdread(dev_t dev, struct uio *uio, cred_t *credp) 16740Sstevel@tonic-gate { 16750Sstevel@tonic-gate minor_t mnum; 16760Sstevel@tonic-gate mdi_unit_t *ui; 16770Sstevel@tonic-gate int error; 16780Sstevel@tonic-gate 16790Sstevel@tonic-gate if (((mnum = getminor(dev)) == MD_ADM_MINOR) || 16800Sstevel@tonic-gate (MD_MIN2SET(mnum) >= md_nsets) || 16810Sstevel@tonic-gate (MD_MIN2UNIT(mnum) >= md_nunits) || 16820Sstevel@tonic-gate ((ui = MDI_UNIT(mnum)) == NULL)) 16830Sstevel@tonic-gate return (ENXIO); 16840Sstevel@tonic-gate 16850Sstevel@tonic-gate if (md_ops[ui->ui_opsindex]->md_read != NULL) 16860Sstevel@tonic-gate return ((*md_ops[ui->ui_opsindex]->md_read) 16870Sstevel@tonic-gate (dev, uio, credp)); 16880Sstevel@tonic-gate 16890Sstevel@tonic-gate if ((error = md_chk_uio(uio)) != 0) 16900Sstevel@tonic-gate return (error); 16910Sstevel@tonic-gate 16920Sstevel@tonic-gate return (physio(mdstrategy, NULL, dev, B_READ, md_minphys, uio)); 16930Sstevel@tonic-gate } 16940Sstevel@tonic-gate 16950Sstevel@tonic-gate /* 16960Sstevel@tonic-gate * This routine performs async raw read operations. It is called from the 16970Sstevel@tonic-gate * device switch at normal priority. 16980Sstevel@tonic-gate * 16990Sstevel@tonic-gate * The main catch is that the *aio struct which is passed to us may 17000Sstevel@tonic-gate * specify a read which spans two buffers, which would be contiguous 17010Sstevel@tonic-gate * on a single partition, but not on a striped partition. This will 17020Sstevel@tonic-gate * be handled by mdstrategy. 17030Sstevel@tonic-gate */ 17040Sstevel@tonic-gate /*ARGSUSED*/ 17050Sstevel@tonic-gate static int 17060Sstevel@tonic-gate mdaread(dev_t dev, struct aio_req *aio, cred_t *credp) 17070Sstevel@tonic-gate { 17080Sstevel@tonic-gate minor_t mnum; 17090Sstevel@tonic-gate mdi_unit_t *ui; 17100Sstevel@tonic-gate int error; 17110Sstevel@tonic-gate 17120Sstevel@tonic-gate 17130Sstevel@tonic-gate if (((mnum = getminor(dev)) == MD_ADM_MINOR) || 17140Sstevel@tonic-gate (MD_MIN2SET(mnum) >= md_nsets) || 17150Sstevel@tonic-gate (MD_MIN2UNIT(mnum) >= md_nunits) || 17160Sstevel@tonic-gate ((ui = MDI_UNIT(mnum)) == NULL)) 17170Sstevel@tonic-gate return (ENXIO); 17180Sstevel@tonic-gate 17190Sstevel@tonic-gate if (md_ops[ui->ui_opsindex]->md_aread != NULL) 17200Sstevel@tonic-gate return ((*md_ops[ui->ui_opsindex]->md_aread) 17210Sstevel@tonic-gate (dev, aio, credp)); 17220Sstevel@tonic-gate 17230Sstevel@tonic-gate if ((error = md_chk_uio(aio->aio_uio)) != 0) 17240Sstevel@tonic-gate return (error); 17250Sstevel@tonic-gate 17260Sstevel@tonic-gate return (aphysio(mdstrategy, anocancel, dev, B_READ, md_minphys, aio)); 17270Sstevel@tonic-gate } 17280Sstevel@tonic-gate 17290Sstevel@tonic-gate /* 17300Sstevel@tonic-gate * This routine performs raw write operations. It is called from the 17310Sstevel@tonic-gate * device switch at normal priority. 17320Sstevel@tonic-gate * 17330Sstevel@tonic-gate * The main catch is that the *uio struct which is passed to us may 17340Sstevel@tonic-gate * specify a write which spans two buffers, which would be contiguous 17350Sstevel@tonic-gate * on a single partition, but not on a striped partition. This is 17360Sstevel@tonic-gate * handled by mdstrategy. 17370Sstevel@tonic-gate * 17380Sstevel@tonic-gate */ 17390Sstevel@tonic-gate /*ARGSUSED*/ 17400Sstevel@tonic-gate static int 17410Sstevel@tonic-gate mdwrite(dev_t dev, struct uio *uio, cred_t *credp) 17420Sstevel@tonic-gate { 17430Sstevel@tonic-gate minor_t mnum; 17440Sstevel@tonic-gate mdi_unit_t *ui; 17450Sstevel@tonic-gate int error; 17460Sstevel@tonic-gate 17470Sstevel@tonic-gate if (((mnum = getminor(dev)) == MD_ADM_MINOR) || 17480Sstevel@tonic-gate (MD_MIN2SET(mnum) >= md_nsets) || 17490Sstevel@tonic-gate (MD_MIN2UNIT(mnum) >= md_nunits) || 17500Sstevel@tonic-gate ((ui = MDI_UNIT(mnum)) == NULL)) 17510Sstevel@tonic-gate return (ENXIO); 17520Sstevel@tonic-gate 17530Sstevel@tonic-gate if (md_ops[ui->ui_opsindex]->md_write != NULL) 17540Sstevel@tonic-gate return ((*md_ops[ui->ui_opsindex]->md_write) 17550Sstevel@tonic-gate (dev, uio, credp)); 17560Sstevel@tonic-gate 17570Sstevel@tonic-gate if ((error = md_chk_uio(uio)) != 0) 17580Sstevel@tonic-gate return (error); 17590Sstevel@tonic-gate 17600Sstevel@tonic-gate return (physio(mdstrategy, NULL, dev, B_WRITE, md_minphys, uio)); 17610Sstevel@tonic-gate } 17620Sstevel@tonic-gate 17630Sstevel@tonic-gate /* 17640Sstevel@tonic-gate * This routine performs async raw write operations. It is called from the 17650Sstevel@tonic-gate * device switch at normal priority. 17660Sstevel@tonic-gate * 17670Sstevel@tonic-gate * The main catch is that the *aio struct which is passed to us may 17680Sstevel@tonic-gate * specify a write which spans two buffers, which would be contiguous 17690Sstevel@tonic-gate * on a single partition, but not on a striped partition. This is 17700Sstevel@tonic-gate * handled by mdstrategy. 17710Sstevel@tonic-gate * 17720Sstevel@tonic-gate */ 17730Sstevel@tonic-gate /*ARGSUSED*/ 17740Sstevel@tonic-gate static int 17750Sstevel@tonic-gate mdawrite(dev_t dev, struct aio_req *aio, cred_t *credp) 17760Sstevel@tonic-gate { 17770Sstevel@tonic-gate minor_t mnum; 17780Sstevel@tonic-gate mdi_unit_t *ui; 17790Sstevel@tonic-gate int error; 17800Sstevel@tonic-gate 17810Sstevel@tonic-gate 17820Sstevel@tonic-gate if (((mnum = getminor(dev)) == MD_ADM_MINOR) || 17830Sstevel@tonic-gate (MD_MIN2SET(mnum) >= md_nsets) || 17840Sstevel@tonic-gate (MD_MIN2UNIT(mnum) >= md_nunits) || 17850Sstevel@tonic-gate ((ui = MDI_UNIT(mnum)) == NULL)) 17860Sstevel@tonic-gate return (ENXIO); 17870Sstevel@tonic-gate 17880Sstevel@tonic-gate if (md_ops[ui->ui_opsindex]->md_awrite != NULL) 17890Sstevel@tonic-gate return ((*md_ops[ui->ui_opsindex]->md_awrite) 17900Sstevel@tonic-gate (dev, aio, credp)); 17910Sstevel@tonic-gate 17920Sstevel@tonic-gate if ((error = md_chk_uio(aio->aio_uio)) != 0) 17930Sstevel@tonic-gate return (error); 17940Sstevel@tonic-gate 17950Sstevel@tonic-gate return (aphysio(mdstrategy, anocancel, dev, B_WRITE, md_minphys, aio)); 17960Sstevel@tonic-gate } 17970Sstevel@tonic-gate 17980Sstevel@tonic-gate int 17990Sstevel@tonic-gate mdstrategy(struct buf *bp) 18000Sstevel@tonic-gate { 18010Sstevel@tonic-gate minor_t mnum; 18020Sstevel@tonic-gate mdi_unit_t *ui; 18030Sstevel@tonic-gate 18040Sstevel@tonic-gate ASSERT((bp->b_flags & B_DONE) == 0); 18050Sstevel@tonic-gate 18060Sstevel@tonic-gate if (panicstr) 18070Sstevel@tonic-gate md_clr_status(MD_GBL_DAEMONS_LIVE); 18080Sstevel@tonic-gate 18090Sstevel@tonic-gate if (((mnum = getminor(bp->b_edev)) == MD_ADM_MINOR) || 18100Sstevel@tonic-gate (MD_MIN2SET(mnum) >= md_nsets) || 18110Sstevel@tonic-gate (MD_MIN2UNIT(mnum) >= md_nunits) || 18120Sstevel@tonic-gate ((ui = MDI_UNIT(mnum)) == NULL)) { 18130Sstevel@tonic-gate bp->b_flags |= B_ERROR; 18140Sstevel@tonic-gate bp->b_error = ENXIO; 18150Sstevel@tonic-gate bp->b_resid = bp->b_bcount; 18160Sstevel@tonic-gate biodone(bp); 18170Sstevel@tonic-gate return (0); 18180Sstevel@tonic-gate } 18190Sstevel@tonic-gate 18200Sstevel@tonic-gate bp->b_flags &= ~(B_ERROR | B_DONE); 18210Sstevel@tonic-gate if (md_ops[ui->ui_opsindex]->md_strategy != NULL) { 18220Sstevel@tonic-gate (*md_ops[ui->ui_opsindex]->md_strategy) (bp, 0, NULL); 18230Sstevel@tonic-gate } else { 18240Sstevel@tonic-gate (void) errdone(ui, bp, ENXIO); 18250Sstevel@tonic-gate } 18260Sstevel@tonic-gate return (0); 18270Sstevel@tonic-gate } 18280Sstevel@tonic-gate 18290Sstevel@tonic-gate /* 18300Sstevel@tonic-gate * Return true if the ioctl is allowed to be multithreaded. 18310Sstevel@tonic-gate * All the ioctls with MN are sent only from the message handlers through 18320Sstevel@tonic-gate * rpc.mdcommd, which (via it's own locking mechanism) takes care that not two 18330Sstevel@tonic-gate * ioctl for the same metadevice are issued at the same time. 18340Sstevel@tonic-gate * So we are safe here. 18350Sstevel@tonic-gate * The other ioctls do not mess with any metadevice structures and therefor 18360Sstevel@tonic-gate * are harmless too, if called multiple times at the same time. 18370Sstevel@tonic-gate */ 18380Sstevel@tonic-gate static boolean_t 18390Sstevel@tonic-gate is_mt_ioctl(int cmd) { 18400Sstevel@tonic-gate 18410Sstevel@tonic-gate switch (cmd) { 18420Sstevel@tonic-gate case MD_IOCGUNIQMSGID: 18430Sstevel@tonic-gate case MD_IOCGVERSION: 18440Sstevel@tonic-gate case MD_IOCISOPEN: 18450Sstevel@tonic-gate case MD_MN_SET_MM_OWNER: 18460Sstevel@tonic-gate case MD_MN_SET_STATE: 18470Sstevel@tonic-gate case MD_MN_SUSPEND_WRITES: 18480Sstevel@tonic-gate case MD_MN_ALLOCATE_HOTSPARE: 18490Sstevel@tonic-gate case MD_MN_SET_SETFLAGS: 18500Sstevel@tonic-gate case MD_MN_GET_SETFLAGS: 18510Sstevel@tonic-gate case MD_MN_MDDB_OPTRECFIX: 18520Sstevel@tonic-gate case MD_MN_MDDB_PARSE: 18530Sstevel@tonic-gate case MD_MN_MDDB_BLOCK: 18540Sstevel@tonic-gate case MD_MN_DB_USERREQ: 18550Sstevel@tonic-gate case MD_IOC_SPSTATUS: 18560Sstevel@tonic-gate case MD_MN_COMMD_ERR: 18570Sstevel@tonic-gate case MD_MN_SET_COMMD_RUNNING: 18580Sstevel@tonic-gate case MD_MN_RESYNC: 18590Sstevel@tonic-gate case MD_MN_SETSYNC: 18600Sstevel@tonic-gate case MD_MN_POKE_HOTSPARES: 1861*8452SJohn.Wren.Kennedy@Sun.COM case MD_MN_RR_DIRTY: 1862*8452SJohn.Wren.Kennedy@Sun.COM case MD_MN_RR_CLEAN: 1863*8452SJohn.Wren.Kennedy@Sun.COM case MD_MN_IOC_SPUPDATEWM: 18640Sstevel@tonic-gate return (1); 18650Sstevel@tonic-gate default: 18660Sstevel@tonic-gate return (0); 18670Sstevel@tonic-gate } 18680Sstevel@tonic-gate } 18690Sstevel@tonic-gate 18700Sstevel@tonic-gate /* 18710Sstevel@tonic-gate * This routine implements the ioctl calls for the Virtual Disk System. 18720Sstevel@tonic-gate * It is called from the device switch at normal priority. 18730Sstevel@tonic-gate */ 18740Sstevel@tonic-gate /* ARGSUSED */ 18750Sstevel@tonic-gate static int 18760Sstevel@tonic-gate mdioctl(dev_t dev, int cmd, intptr_t data, int mode, cred_t *cred_p, 18770Sstevel@tonic-gate int *rval_p) 18780Sstevel@tonic-gate { 18790Sstevel@tonic-gate minor_t mnum = getminor(dev); 18800Sstevel@tonic-gate mdi_unit_t *ui; 18810Sstevel@tonic-gate IOLOCK lock; 18820Sstevel@tonic-gate int err; 18830Sstevel@tonic-gate 18840Sstevel@tonic-gate /* 18850Sstevel@tonic-gate * For multinode disksets number of ioctls are allowed to be 18860Sstevel@tonic-gate * multithreaded. 18870Sstevel@tonic-gate * A fundamental assumption made in this implementation is that 18880Sstevel@tonic-gate * ioctls either do not interact with other md structures or the 18890Sstevel@tonic-gate * ioctl to the admin device can only occur if the metadevice 18900Sstevel@tonic-gate * device is open. i.e. avoid a race between metaclear and the 18910Sstevel@tonic-gate * progress of a multithreaded ioctl. 18920Sstevel@tonic-gate */ 18930Sstevel@tonic-gate 18940Sstevel@tonic-gate if (!is_mt_ioctl(cmd) && md_ioctl_lock_enter() == EINTR) { 18950Sstevel@tonic-gate return (EINTR); 18960Sstevel@tonic-gate } 18970Sstevel@tonic-gate 18980Sstevel@tonic-gate /* 18990Sstevel@tonic-gate * initialize lock tracker 19000Sstevel@tonic-gate */ 19010Sstevel@tonic-gate IOLOCK_INIT(&lock); 19020Sstevel@tonic-gate 19030Sstevel@tonic-gate /* Flag to indicate that MD_GBL_IOCTL_LOCK is not acquired */ 19040Sstevel@tonic-gate 19050Sstevel@tonic-gate if (is_mt_ioctl(cmd)) { 19060Sstevel@tonic-gate /* increment the md_mtioctl_cnt */ 19070Sstevel@tonic-gate mutex_enter(&md_mx); 19080Sstevel@tonic-gate md_mtioctl_cnt++; 19090Sstevel@tonic-gate mutex_exit(&md_mx); 19100Sstevel@tonic-gate lock.l_flags |= MD_MT_IOCTL; 19110Sstevel@tonic-gate } 19120Sstevel@tonic-gate 19130Sstevel@tonic-gate /* 19140Sstevel@tonic-gate * this has been added to prevent notification from re-snarfing 19150Sstevel@tonic-gate * so metaunload will work. It may interfere with other modules 19160Sstevel@tonic-gate * halt process. 19170Sstevel@tonic-gate */ 19180Sstevel@tonic-gate if (md_get_status() & (MD_GBL_HALTED | MD_GBL_DAEMONS_DIE)) 19190Sstevel@tonic-gate return (IOLOCK_RETURN(ENXIO, &lock)); 19200Sstevel@tonic-gate 19210Sstevel@tonic-gate /* 19220Sstevel@tonic-gate * admin device ioctls 19230Sstevel@tonic-gate */ 19240Sstevel@tonic-gate if (mnum == MD_ADM_MINOR) { 19250Sstevel@tonic-gate err = md_admin_ioctl(md_expldev(dev), cmd, (void *) data, 19267627SChris.Horne@Sun.COM mode, &lock); 19270Sstevel@tonic-gate } 19280Sstevel@tonic-gate 19290Sstevel@tonic-gate /* 19300Sstevel@tonic-gate * metadevice ioctls 19310Sstevel@tonic-gate */ 19320Sstevel@tonic-gate else if ((MD_MIN2SET(mnum) >= md_nsets) || 19330Sstevel@tonic-gate (MD_MIN2UNIT(mnum) >= md_nunits) || 19340Sstevel@tonic-gate ((ui = MDI_UNIT(mnum)) == NULL)) { 19350Sstevel@tonic-gate err = ENXIO; 19360Sstevel@tonic-gate } else if (md_ops[ui->ui_opsindex]->md_ioctl == NULL) { 19370Sstevel@tonic-gate err = ENOTTY; 19380Sstevel@tonic-gate } else { 19390Sstevel@tonic-gate err = (*md_ops[ui->ui_opsindex]->md_ioctl) 19400Sstevel@tonic-gate (dev, cmd, (void *) data, mode, &lock); 19410Sstevel@tonic-gate } 19420Sstevel@tonic-gate 19430Sstevel@tonic-gate /* 19440Sstevel@tonic-gate * drop any locks we grabbed 19450Sstevel@tonic-gate */ 19460Sstevel@tonic-gate return (IOLOCK_RETURN_IOCTLEND(err, &lock)); 19470Sstevel@tonic-gate } 19480Sstevel@tonic-gate 19490Sstevel@tonic-gate static int 19500Sstevel@tonic-gate mddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 19510Sstevel@tonic-gate { 19520Sstevel@tonic-gate minor_t mnum; 19530Sstevel@tonic-gate set_t setno; 19540Sstevel@tonic-gate mdi_unit_t *ui; 19550Sstevel@tonic-gate 19560Sstevel@tonic-gate if ((mnum = getminor(dev)) == MD_ADM_MINOR) 19570Sstevel@tonic-gate return (ENXIO); 19580Sstevel@tonic-gate 19590Sstevel@tonic-gate setno = MD_MIN2SET(mnum); 19600Sstevel@tonic-gate 19610Sstevel@tonic-gate if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits) || 19620Sstevel@tonic-gate ((ui = MDI_UNIT(mnum)) == NULL)) 19630Sstevel@tonic-gate return (ENXIO); 19640Sstevel@tonic-gate 19650Sstevel@tonic-gate 19660Sstevel@tonic-gate if ((md_get_setstatus(setno) & MD_SET_SNARFED) == 0) 19670Sstevel@tonic-gate return (ENXIO); 19680Sstevel@tonic-gate 19690Sstevel@tonic-gate if (md_ops[ui->ui_opsindex]->md_dump != NULL) 19700Sstevel@tonic-gate return ((*md_ops[ui->ui_opsindex]->md_dump) 19710Sstevel@tonic-gate (dev, addr, blkno, nblk)); 19720Sstevel@tonic-gate 19730Sstevel@tonic-gate return (ENXIO); 19740Sstevel@tonic-gate } 19751623Stw21770 19761623Stw21770 /* 19771623Stw21770 * Metadevice unit number dispatcher 19781623Stw21770 * When this routine is called it will scan the 19791623Stw21770 * incore unit array and return the avail slot 19801623Stw21770 * hence the unit number to the caller 19811623Stw21770 * 19821623Stw21770 * Return -1 if there is nothing available 19831623Stw21770 */ 19841623Stw21770 unit_t 19851623Stw21770 md_get_nextunit(set_t setno) 19861623Stw21770 { 19871623Stw21770 unit_t un, start; 19881623Stw21770 19891623Stw21770 /* 19901623Stw21770 * If nothing available 19911623Stw21770 */ 19921623Stw21770 if (md_set[setno].s_un_avail == 0) { 19931623Stw21770 return (MD_UNITBAD); 19941623Stw21770 } 19951623Stw21770 19961623Stw21770 mutex_enter(&md_mx); 19971623Stw21770 start = un = md_set[setno].s_un_next; 19981623Stw21770 19991623Stw21770 /* LINTED: E_CONSTANT_CONDITION */ 20001623Stw21770 while (1) { 20011623Stw21770 if (md_set[setno].s_un[un] == NULL) { 20021623Stw21770 /* 20031623Stw21770 * Advance the starting index for the next 20041623Stw21770 * md_get_nextunit call 20051623Stw21770 */ 20061623Stw21770 if (un == MD_MAXUNITS - 1) { 20071623Stw21770 md_set[setno].s_un_next = 0; 20081623Stw21770 } else { 20091623Stw21770 md_set[setno].s_un_next = un + 1; 20101623Stw21770 } 20111623Stw21770 break; 20121623Stw21770 } 20131623Stw21770 20141623Stw21770 un = ((un == MD_MAXUNITS - 1) ? 0 : un + 1); 20151623Stw21770 20161623Stw21770 if (un == start) { 20171623Stw21770 un = MD_UNITBAD; 20181623Stw21770 break; 20191623Stw21770 } 20201623Stw21770 20211623Stw21770 } 20221623Stw21770 20231623Stw21770 mutex_exit(&md_mx); 20241623Stw21770 return (un); 20251623Stw21770 } 2026