10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 51623Stw21770 * Common Development and Distribution License (the "License"). 61623Stw21770 * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 221623Stw21770 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 230Sstevel@tonic-gate * Use is subject to license terms. 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate 260Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 270Sstevel@tonic-gate 280Sstevel@tonic-gate /* 290Sstevel@tonic-gate * Just in case we're not in a build environment, make sure that 300Sstevel@tonic-gate * TEXT_DOMAIN gets set to something. 310Sstevel@tonic-gate */ 320Sstevel@tonic-gate #if !defined(TEXT_DOMAIN) 330Sstevel@tonic-gate #define TEXT_DOMAIN "SYS_TEST" 340Sstevel@tonic-gate #endif 350Sstevel@tonic-gate 360Sstevel@tonic-gate /* 370Sstevel@tonic-gate * Metadevice database interfaces. 380Sstevel@tonic-gate */ 390Sstevel@tonic-gate 400Sstevel@tonic-gate #define MDDB 410Sstevel@tonic-gate 420Sstevel@tonic-gate #include <meta.h> 430Sstevel@tonic-gate #include <sys/lvm/md_mddb.h> 440Sstevel@tonic-gate #include <sys/lvm/md_crc.h> 450Sstevel@tonic-gate #include <sys/lvm/mdio.h> 460Sstevel@tonic-gate #include <string.h> 470Sstevel@tonic-gate #include <strings.h> 480Sstevel@tonic-gate #include <ctype.h> 490Sstevel@tonic-gate 500Sstevel@tonic-gate struct svm_daemon { 510Sstevel@tonic-gate char *svmd_name; 520Sstevel@tonic-gate char *svmd_kill_val; 530Sstevel@tonic-gate }; 540Sstevel@tonic-gate 55*2614Spetede /* 56*2614Spetede * This is a list of the daemons that are not stopped by the SVM smf(5) 57*2614Spetede * services. The mdmonitord is started via svc:/system/mdmonitor:default 58*2614Spetede * but no contract(4) is constructed and so it is not stopped by smf(5). 59*2614Spetede */ 600Sstevel@tonic-gate struct svm_daemon svmd_kill_list[] = { 610Sstevel@tonic-gate {"mdmonitord", "HUP"}, 620Sstevel@tonic-gate {"mddoors", "KILL"}, 630Sstevel@tonic-gate }; 640Sstevel@tonic-gate 650Sstevel@tonic-gate #define DAEMON_COUNT (sizeof (svmd_kill_list)/ sizeof (struct svm_daemon)) 660Sstevel@tonic-gate 670Sstevel@tonic-gate extern int procsigs(int block, sigset_t *oldsigs, md_error_t *ep); 680Sstevel@tonic-gate 690Sstevel@tonic-gate /* 700Sstevel@tonic-gate * meta_get_lb_inittime sends a request for the lb_inittime to the kernel 710Sstevel@tonic-gate */ 720Sstevel@tonic-gate md_timeval32_t 730Sstevel@tonic-gate meta_get_lb_inittime( 740Sstevel@tonic-gate mdsetname_t *sp, 750Sstevel@tonic-gate md_error_t *ep 760Sstevel@tonic-gate ) 770Sstevel@tonic-gate { 780Sstevel@tonic-gate mddb_config_t c; 790Sstevel@tonic-gate 800Sstevel@tonic-gate (void) memset(&c, 0, sizeof (c)); 810Sstevel@tonic-gate 820Sstevel@tonic-gate /* Fill in setno, setname, and sideno */ 830Sstevel@tonic-gate c.c_setno = sp->setno; 840Sstevel@tonic-gate 850Sstevel@tonic-gate if (metaioctl(MD_DB_LBINITTIME, &c, &c.c_mde, NULL) != 0) { 860Sstevel@tonic-gate (void) mdstealerror(ep, &c.c_mde); 870Sstevel@tonic-gate } 880Sstevel@tonic-gate 890Sstevel@tonic-gate return (c.c_timestamp); 900Sstevel@tonic-gate } 910Sstevel@tonic-gate 920Sstevel@tonic-gate /* 930Sstevel@tonic-gate * mkmasterblks writes out the master blocks of the mddb to the replica. 940Sstevel@tonic-gate * 950Sstevel@tonic-gate * In a MN diskset, this is called by the node that is adding this replica 960Sstevel@tonic-gate * to the diskset. 970Sstevel@tonic-gate */ 980Sstevel@tonic-gate 990Sstevel@tonic-gate #define MDDB_VERIFY_SIZE 8192 1000Sstevel@tonic-gate 1010Sstevel@tonic-gate static int 1020Sstevel@tonic-gate mkmasterblks( 1030Sstevel@tonic-gate mdsetname_t *sp, 1040Sstevel@tonic-gate mdname_t *np, 1050Sstevel@tonic-gate int fd, 1060Sstevel@tonic-gate daddr_t firstblk, 1070Sstevel@tonic-gate int dbsize, 1080Sstevel@tonic-gate md_timeval32_t inittime, 1090Sstevel@tonic-gate md_error_t *ep 1100Sstevel@tonic-gate ) 1110Sstevel@tonic-gate { 1120Sstevel@tonic-gate int consecutive; 1130Sstevel@tonic-gate md_timeval32_t tp; 1140Sstevel@tonic-gate struct mddb_mb *mb; 1150Sstevel@tonic-gate char *buffer; 1160Sstevel@tonic-gate int iosize; 1170Sstevel@tonic-gate md_set_desc *sd; 1180Sstevel@tonic-gate int mn_set = 0; 1190Sstevel@tonic-gate daddr_t startblk; 1200Sstevel@tonic-gate int cnt; 1210Sstevel@tonic-gate ddi_devid_t devid; 1220Sstevel@tonic-gate 1230Sstevel@tonic-gate if (! metaislocalset(sp)) { 1240Sstevel@tonic-gate if ((sd = metaget_setdesc(sp, ep)) == NULL) 1250Sstevel@tonic-gate return (-1); 1260Sstevel@tonic-gate 1270Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) { 1280Sstevel@tonic-gate mn_set = 1; /* Used later */ 1290Sstevel@tonic-gate } 1300Sstevel@tonic-gate } 1310Sstevel@tonic-gate 1320Sstevel@tonic-gate /* 1330Sstevel@tonic-gate * Loop to verify the entire mddb region on disk is read/writable. 1340Sstevel@tonic-gate * buffer is used to write/read in at most MDDB_VERIFY_SIZE block 1350Sstevel@tonic-gate * chunks. 1360Sstevel@tonic-gate * 1370Sstevel@tonic-gate * A side-effect of this loop is to zero out the entire mddb region 1380Sstevel@tonic-gate */ 1390Sstevel@tonic-gate if ((buffer = Zalloc(MDDB_VERIFY_SIZE * DEV_BSIZE)) == NULL) 1400Sstevel@tonic-gate return (mdsyserror(ep, ENOMEM, np->rname)); 1410Sstevel@tonic-gate 1420Sstevel@tonic-gate startblk = firstblk; 1430Sstevel@tonic-gate for (cnt = dbsize; cnt > 0; cnt -= consecutive) { 1440Sstevel@tonic-gate 1450Sstevel@tonic-gate if (cnt > MDDB_VERIFY_SIZE) 1460Sstevel@tonic-gate consecutive = MDDB_VERIFY_SIZE; 1470Sstevel@tonic-gate else 1480Sstevel@tonic-gate consecutive = cnt; 1490Sstevel@tonic-gate 1500Sstevel@tonic-gate if (lseek(fd, (off_t)(startblk * DEV_BSIZE), SEEK_SET) < 0) { 1510Sstevel@tonic-gate Free(buffer); 1520Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname)); 1530Sstevel@tonic-gate } 1540Sstevel@tonic-gate 1550Sstevel@tonic-gate iosize = DEV_BSIZE * consecutive; 1560Sstevel@tonic-gate if (write(fd, buffer, iosize) != iosize) { 1570Sstevel@tonic-gate Free(buffer); 1580Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname)); 1590Sstevel@tonic-gate } 1600Sstevel@tonic-gate 1610Sstevel@tonic-gate if (lseek(fd, (off_t)(startblk * DEV_BSIZE), SEEK_SET) < 0) { 1620Sstevel@tonic-gate Free(buffer); 1630Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname)); 1640Sstevel@tonic-gate } 1650Sstevel@tonic-gate 1660Sstevel@tonic-gate if (read(fd, buffer, iosize) != iosize) { 1670Sstevel@tonic-gate Free(buffer); 1680Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname)); 1690Sstevel@tonic-gate } 1700Sstevel@tonic-gate 1710Sstevel@tonic-gate startblk += consecutive; 1720Sstevel@tonic-gate } 1730Sstevel@tonic-gate 1740Sstevel@tonic-gate Free(buffer); 1750Sstevel@tonic-gate if ((mb = Zalloc(DEV_BSIZE)) == NULL) 1760Sstevel@tonic-gate return (mdsyserror(ep, ENOMEM, np->rname)); 1770Sstevel@tonic-gate 1780Sstevel@tonic-gate if (meta_gettimeofday(&tp) == -1) { 1790Sstevel@tonic-gate Free(mb); 1800Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname)); 1810Sstevel@tonic-gate } 1820Sstevel@tonic-gate 1830Sstevel@tonic-gate mb->mb_magic = MDDB_MAGIC_MB; 1840Sstevel@tonic-gate /* 1850Sstevel@tonic-gate * If a MN diskset, set master block revision for a MN set. 1860Sstevel@tonic-gate * Even though the master block structure is no different 1870Sstevel@tonic-gate * for a MN set, setting the revision field to a different 1880Sstevel@tonic-gate * number keeps any pre-MN_diskset code from accessing 1890Sstevel@tonic-gate * this diskset. It also allows for an early determination 1900Sstevel@tonic-gate * of a MN diskset when reading in from disk so that the 1910Sstevel@tonic-gate * proper size locator block and locator names structure 1920Sstevel@tonic-gate * can be read in thus saving time on diskset startup. 1930Sstevel@tonic-gate */ 1940Sstevel@tonic-gate if (mn_set) 1950Sstevel@tonic-gate mb->mb_revision = MDDB_REV_MNMB; 1960Sstevel@tonic-gate else 1970Sstevel@tonic-gate mb->mb_revision = MDDB_REV_MB; 1980Sstevel@tonic-gate mb->mb_timestamp = tp; 1990Sstevel@tonic-gate mb->mb_setno = sp->setno; 2000Sstevel@tonic-gate mb->mb_blkcnt = dbsize - 1; 2010Sstevel@tonic-gate mb->mb_blkno = firstblk; 2020Sstevel@tonic-gate mb->mb_nextblk = 0; 2030Sstevel@tonic-gate 2040Sstevel@tonic-gate mb->mb_blkmap.m_firstblk = firstblk + 1; 2050Sstevel@tonic-gate mb->mb_blkmap.m_consecutive = dbsize - 1; 2060Sstevel@tonic-gate if (! metaislocalset(sp)) { 2070Sstevel@tonic-gate mb->mb_setcreatetime = inittime; 2080Sstevel@tonic-gate } 2090Sstevel@tonic-gate 2100Sstevel@tonic-gate /* 2110Sstevel@tonic-gate * We try to save the disks device ID into the remaining bytes in 2120Sstevel@tonic-gate * the master block. The saved devid is used to provide a mapping 2130Sstevel@tonic-gate * between this disk's devid and the devid stored into the master 2140Sstevel@tonic-gate * block. This allows the disk image to be self-identifying 2150Sstevel@tonic-gate * if it gets copied (e.g. SNDR, True Copy, etc.). This is used 2160Sstevel@tonic-gate * when we try to import these disks on the remote copied image. 2170Sstevel@tonic-gate * If we cannot save the disks device ID onto the master block that is 2180Sstevel@tonic-gate * ok. The disk is just not self-identifying and won't be importable 2190Sstevel@tonic-gate * in the remote copy scenario. 2200Sstevel@tonic-gate */ 2210Sstevel@tonic-gate if (devid_get(fd, &devid) == 0) { 2220Sstevel@tonic-gate size_t len; 2230Sstevel@tonic-gate 2240Sstevel@tonic-gate len = devid_sizeof(devid); 2250Sstevel@tonic-gate if (len <= DEV_BSIZE - sizeof (*mb)) { 2260Sstevel@tonic-gate /* there is enough space to store the devid */ 2270Sstevel@tonic-gate mb->mb_devid_magic = MDDB_MAGIC_DE; 2280Sstevel@tonic-gate mb->mb_devid_len = len; 2290Sstevel@tonic-gate (void) memcpy(mb->mb_devid, devid, len); 2300Sstevel@tonic-gate } 2310Sstevel@tonic-gate devid_free(devid); 2320Sstevel@tonic-gate } 2330Sstevel@tonic-gate 2340Sstevel@tonic-gate crcgen((uchar_t *)mb, (uint_t *)&mb->mb_checksum, (uint_t)DEV_BSIZE, 2350Sstevel@tonic-gate (crc_skip_t *)NULL); 2360Sstevel@tonic-gate 2370Sstevel@tonic-gate if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0) { 2380Sstevel@tonic-gate Free(mb); 2390Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname)); 2400Sstevel@tonic-gate } 2410Sstevel@tonic-gate 2420Sstevel@tonic-gate if (write(fd, mb, DEV_BSIZE) != DEV_BSIZE) { 2430Sstevel@tonic-gate Free(mb); 2440Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname)); 2450Sstevel@tonic-gate } 2460Sstevel@tonic-gate 2470Sstevel@tonic-gate if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0) { 2480Sstevel@tonic-gate Free(mb); 2490Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname)); 2500Sstevel@tonic-gate } 2510Sstevel@tonic-gate 2520Sstevel@tonic-gate if (read(fd, mb, DEV_BSIZE) != DEV_BSIZE) { 2530Sstevel@tonic-gate Free(mb); 2540Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname)); 2550Sstevel@tonic-gate } 2560Sstevel@tonic-gate 2570Sstevel@tonic-gate if (crcchk((uchar_t *)mb, (uint_t *)&mb->mb_checksum, 2580Sstevel@tonic-gate (uint_t)DEV_BSIZE, (crc_skip_t *)NULL)) { 2590Sstevel@tonic-gate Free(mb); 2600Sstevel@tonic-gate return (mdmddberror(ep, MDE_NOTVERIFIED, 2610Sstevel@tonic-gate meta_getminor(np->dev), sp->setno, 0, np->rname)); 2620Sstevel@tonic-gate } 2630Sstevel@tonic-gate 2640Sstevel@tonic-gate Free(mb); 2650Sstevel@tonic-gate return (0); 2660Sstevel@tonic-gate } 2670Sstevel@tonic-gate 2680Sstevel@tonic-gate void 2690Sstevel@tonic-gate meta_mkdummymaster( 2700Sstevel@tonic-gate mdsetname_t *sp, 2710Sstevel@tonic-gate int fd, 2720Sstevel@tonic-gate daddr_t firstblk 2730Sstevel@tonic-gate ) 2740Sstevel@tonic-gate { 2750Sstevel@tonic-gate md_timeval32_t tp; 2760Sstevel@tonic-gate struct mddb_mb *mb; 2770Sstevel@tonic-gate ddi_devid_t devid; 2780Sstevel@tonic-gate md_set_desc *sd; 2790Sstevel@tonic-gate md_error_t ep = mdnullerror; 2800Sstevel@tonic-gate md_timeval32_t inittime; 2810Sstevel@tonic-gate 2820Sstevel@tonic-gate /* 2830Sstevel@tonic-gate * No dummy master blocks are written for a MN diskset since devids 2840Sstevel@tonic-gate * are not supported in MN disksets. 2850Sstevel@tonic-gate */ 2860Sstevel@tonic-gate if (! metaislocalset(sp)) { 2870Sstevel@tonic-gate if ((sd = metaget_setdesc(sp, &ep)) == NULL) 2880Sstevel@tonic-gate return; 2890Sstevel@tonic-gate 2900Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) 2910Sstevel@tonic-gate return; 2920Sstevel@tonic-gate } 2930Sstevel@tonic-gate 2940Sstevel@tonic-gate if ((mb = Zalloc(DEV_BSIZE)) == NULL) 2950Sstevel@tonic-gate return; 2960Sstevel@tonic-gate 2970Sstevel@tonic-gate mb->mb_magic = MDDB_MAGIC_DU; 2980Sstevel@tonic-gate mb->mb_revision = MDDB_REV_MB; 2990Sstevel@tonic-gate mb->mb_setno = sp->setno; 3000Sstevel@tonic-gate inittime = meta_get_lb_inittime(sp, &ep); 3010Sstevel@tonic-gate mb->mb_setcreatetime = inittime; 3020Sstevel@tonic-gate 3030Sstevel@tonic-gate if (meta_gettimeofday(&tp) != -1) 3040Sstevel@tonic-gate mb->mb_timestamp = tp; 3050Sstevel@tonic-gate 3060Sstevel@tonic-gate /* 3070Sstevel@tonic-gate * We try to save the disks device ID into the remaining bytes in 3080Sstevel@tonic-gate * the master block. This allows the disk image to be self-identifying 3090Sstevel@tonic-gate * if it gets copied (e.g. SNDR, True Copy, etc.). This is used 3100Sstevel@tonic-gate * when we try to import these disks on the remote copied image. 3110Sstevel@tonic-gate * If we cannot save the disks device ID onto the master block that is 3120Sstevel@tonic-gate * ok. The disk is just not self-identifying and won't be importable 3130Sstevel@tonic-gate * in the remote copy scenario. 3140Sstevel@tonic-gate */ 3150Sstevel@tonic-gate if (devid_get(fd, &devid) == 0) { 3160Sstevel@tonic-gate int len; 3170Sstevel@tonic-gate 3180Sstevel@tonic-gate len = devid_sizeof(devid); 3190Sstevel@tonic-gate if (len <= DEV_BSIZE - sizeof (*mb)) { 3200Sstevel@tonic-gate /* there is enough space to store the devid */ 3210Sstevel@tonic-gate mb->mb_devid_magic = MDDB_MAGIC_DE; 3220Sstevel@tonic-gate mb->mb_devid_len = len; 3230Sstevel@tonic-gate (void) memcpy(mb->mb_devid, (char *)devid, len); 3240Sstevel@tonic-gate } 3250Sstevel@tonic-gate devid_free(devid); 3260Sstevel@tonic-gate } 3270Sstevel@tonic-gate 3280Sstevel@tonic-gate crcgen((uchar_t *)mb, (uint_t *)&mb->mb_checksum, (uint_t)DEV_BSIZE, 3290Sstevel@tonic-gate (crc_skip_t *)NULL); 3300Sstevel@tonic-gate 3310Sstevel@tonic-gate /* 3320Sstevel@tonic-gate * If any of these operations fail, we need to inform the 3330Sstevel@tonic-gate * user that the disk won't be self identifying. When support 3340Sstevel@tonic-gate * for importing remotely replicated disksets is added, we 3350Sstevel@tonic-gate * want to add the error messages here. 3360Sstevel@tonic-gate */ 3370Sstevel@tonic-gate if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0) 3380Sstevel@tonic-gate goto out; 3390Sstevel@tonic-gate 3400Sstevel@tonic-gate if (write(fd, mb, DEV_BSIZE) != DEV_BSIZE) 3410Sstevel@tonic-gate goto out; 3420Sstevel@tonic-gate 3430Sstevel@tonic-gate if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0) 3440Sstevel@tonic-gate goto out; 3450Sstevel@tonic-gate 3460Sstevel@tonic-gate if (read(fd, mb, DEV_BSIZE) != DEV_BSIZE) 3470Sstevel@tonic-gate goto out; 3480Sstevel@tonic-gate 3490Sstevel@tonic-gate if (crcchk((uchar_t *)mb, (uint_t *)&mb->mb_checksum, 3500Sstevel@tonic-gate (uint_t)DEV_BSIZE, (crc_skip_t *)NULL)) 3510Sstevel@tonic-gate goto out; 3520Sstevel@tonic-gate 3530Sstevel@tonic-gate out: 3540Sstevel@tonic-gate Free(mb); 3550Sstevel@tonic-gate } 3560Sstevel@tonic-gate 3570Sstevel@tonic-gate static int 3580Sstevel@tonic-gate buildconf(mdsetname_t *sp, md_error_t *ep) 3590Sstevel@tonic-gate { 3600Sstevel@tonic-gate md_replicalist_t *rlp = NULL; 3610Sstevel@tonic-gate md_replicalist_t *rl; 3620Sstevel@tonic-gate FILE *cfp = NULL; 3630Sstevel@tonic-gate FILE *mfp = NULL; 3640Sstevel@tonic-gate struct stat sbuf; 3650Sstevel@tonic-gate int rval = 0; 3660Sstevel@tonic-gate int in_miniroot = 0; 3670Sstevel@tonic-gate char line[MDDB_BOOTLIST_MAX_LEN]; 3680Sstevel@tonic-gate char *tname = NULL; 3690Sstevel@tonic-gate 3700Sstevel@tonic-gate /* get list of local replicas */ 3710Sstevel@tonic-gate if (! metaislocalset(sp)) 3720Sstevel@tonic-gate return (0); 3730Sstevel@tonic-gate 3740Sstevel@tonic-gate if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0) 3750Sstevel@tonic-gate return (-1); 3760Sstevel@tonic-gate 3770Sstevel@tonic-gate /* open tempfile, copy permissions of original file */ 3780Sstevel@tonic-gate if ((cfp = fopen(META_DBCONFTMP, "w+")) == NULL) { 3790Sstevel@tonic-gate /* 3800Sstevel@tonic-gate * On the miniroot tmp files must be created in /var/tmp. 3810Sstevel@tonic-gate * If we get a EROFS error, we assume that we are in the 3820Sstevel@tonic-gate * miniroot. 3830Sstevel@tonic-gate */ 3840Sstevel@tonic-gate if (errno != EROFS) 3850Sstevel@tonic-gate goto error; 3860Sstevel@tonic-gate in_miniroot = 1; 3870Sstevel@tonic-gate errno = 0; 3880Sstevel@tonic-gate tname = tempnam("/var/tmp", "slvm_"); 3890Sstevel@tonic-gate if (tname == NULL && errno == EROFS) { 3900Sstevel@tonic-gate /* 3910Sstevel@tonic-gate * If we are booted on a read-only root because 3920Sstevel@tonic-gate * of mddb quorum problems we don't want to emit 3930Sstevel@tonic-gate * any scary error messages. 3940Sstevel@tonic-gate */ 3950Sstevel@tonic-gate errno = 0; 3960Sstevel@tonic-gate goto out; 3970Sstevel@tonic-gate } 3980Sstevel@tonic-gate 3990Sstevel@tonic-gate /* open tempfile, copy permissions of original file */ 4000Sstevel@tonic-gate if ((cfp = fopen(tname, "w+")) == NULL) 4010Sstevel@tonic-gate goto error; 4020Sstevel@tonic-gate } 4030Sstevel@tonic-gate if (stat(META_DBCONF, &sbuf) == 0) { 4040Sstevel@tonic-gate if (fchmod(fileno(cfp), (sbuf.st_mode & 0666)) != 0) 4050Sstevel@tonic-gate goto error; 4060Sstevel@tonic-gate if (fchown(fileno(cfp), sbuf.st_uid, sbuf.st_gid) != 0) 4070Sstevel@tonic-gate goto error; 4080Sstevel@tonic-gate } 4090Sstevel@tonic-gate 4100Sstevel@tonic-gate /* print header */ 4110Sstevel@tonic-gate if (fprintf(cfp, "#metadevice database location file ") == EOF) 4120Sstevel@tonic-gate goto error; 4130Sstevel@tonic-gate if (fprintf(cfp, "do not hand edit\n") < 0) 4140Sstevel@tonic-gate goto error; 4150Sstevel@tonic-gate if (fprintf(cfp, 4160Sstevel@tonic-gate "#driver\tminor_t\tdaddr_t\tdevice id\tchecksum\n") < 0) 4170Sstevel@tonic-gate goto error; 4180Sstevel@tonic-gate 4190Sstevel@tonic-gate /* dump replicas */ 4200Sstevel@tonic-gate for (rl = rlp; (rl != NULL); rl = rl->rl_next) { 4210Sstevel@tonic-gate md_replica_t *r = rl->rl_repp; 4220Sstevel@tonic-gate int checksum = 42; 4230Sstevel@tonic-gate int i; 4240Sstevel@tonic-gate char *devidp; 4250Sstevel@tonic-gate minor_t min; 4260Sstevel@tonic-gate 4270Sstevel@tonic-gate devidp = devid_str_encode(r->r_devid, r->r_minor_name); 4280Sstevel@tonic-gate /* If devid code can't encode devidp - skip entry */ 4290Sstevel@tonic-gate if (devidp == NULL) { 4300Sstevel@tonic-gate continue; 4310Sstevel@tonic-gate } 4320Sstevel@tonic-gate 4330Sstevel@tonic-gate /* compute checksum */ 4340Sstevel@tonic-gate for (i = 0; ((r->r_driver_name[i] != '\0') && 4350Sstevel@tonic-gate (i < sizeof (r->r_driver_name))); i++) { 4360Sstevel@tonic-gate checksum -= r->r_driver_name[i]; 4370Sstevel@tonic-gate } 4380Sstevel@tonic-gate min = meta_getminor(r->r_namep->dev); 4390Sstevel@tonic-gate checksum -= min; 4400Sstevel@tonic-gate checksum -= r->r_blkno; 4410Sstevel@tonic-gate 4420Sstevel@tonic-gate for (i = 0; i < strlen(devidp); i++) { 4430Sstevel@tonic-gate checksum -= devidp[i]; 4440Sstevel@tonic-gate } 4450Sstevel@tonic-gate /* print info */ 4460Sstevel@tonic-gate if (fprintf(cfp, "%s\t%lu\t%ld\t%s\t%d\n", 4470Sstevel@tonic-gate r->r_driver_name, min, r->r_blkno, devidp, checksum) < 0) { 4480Sstevel@tonic-gate goto error; 4490Sstevel@tonic-gate } 4500Sstevel@tonic-gate 4510Sstevel@tonic-gate devid_str_free(devidp); 4520Sstevel@tonic-gate } 4530Sstevel@tonic-gate 4540Sstevel@tonic-gate /* close and rename to real file */ 4550Sstevel@tonic-gate if (fflush(cfp) != 0) 4560Sstevel@tonic-gate goto error; 4570Sstevel@tonic-gate if (fsync(fileno(cfp)) != 0) 4580Sstevel@tonic-gate goto error; 4590Sstevel@tonic-gate if (fclose(cfp) != 0) { 4600Sstevel@tonic-gate cfp = NULL; 4610Sstevel@tonic-gate goto error; 4620Sstevel@tonic-gate } 4630Sstevel@tonic-gate cfp = NULL; 4640Sstevel@tonic-gate 4650Sstevel@tonic-gate /* 4660Sstevel@tonic-gate * Renames don't work in the miniroot since tmpfiles are 4670Sstevel@tonic-gate * created in /var/tmp. Hence we copy the data out. 4680Sstevel@tonic-gate */ 4690Sstevel@tonic-gate 4700Sstevel@tonic-gate if (! in_miniroot) { 4710Sstevel@tonic-gate if (rename(META_DBCONFTMP, META_DBCONF) != 0) 4720Sstevel@tonic-gate goto error; 4730Sstevel@tonic-gate } else { 4740Sstevel@tonic-gate if ((cfp = fopen(tname, "r")) == NULL) 4750Sstevel@tonic-gate goto error; 4760Sstevel@tonic-gate if ((mfp = fopen(META_DBCONF, "w+")) == NULL) 4770Sstevel@tonic-gate goto error; 4780Sstevel@tonic-gate while (fgets(line, MDDB_BOOTLIST_MAX_LEN, cfp) != NULL) { 4790Sstevel@tonic-gate if (fputs(line, mfp) == NULL) 4800Sstevel@tonic-gate goto error; 4810Sstevel@tonic-gate } 4820Sstevel@tonic-gate (void) fclose(cfp); 4830Sstevel@tonic-gate cfp = NULL; 4840Sstevel@tonic-gate if (fflush(mfp) != 0) 4850Sstevel@tonic-gate goto error; 4860Sstevel@tonic-gate if (fsync(fileno(mfp)) != 0) 4870Sstevel@tonic-gate goto error; 4880Sstevel@tonic-gate if (fclose(mfp) != 0) { 4890Sstevel@tonic-gate mfp = NULL; 4900Sstevel@tonic-gate goto error; 4910Sstevel@tonic-gate } 4920Sstevel@tonic-gate /* delete the tempfile */ 4930Sstevel@tonic-gate (void) unlink(tname); 4940Sstevel@tonic-gate } 4950Sstevel@tonic-gate /* success */ 4960Sstevel@tonic-gate rval = 0; 4970Sstevel@tonic-gate goto out; 4980Sstevel@tonic-gate 4990Sstevel@tonic-gate /* tempfile error */ 5000Sstevel@tonic-gate error: 5010Sstevel@tonic-gate rval = (in_miniroot) ? mdsyserror(ep, errno, tname): 5020Sstevel@tonic-gate mdsyserror(ep, errno, META_DBCONFTMP); 5030Sstevel@tonic-gate 5040Sstevel@tonic-gate 5050Sstevel@tonic-gate /* cleanup, return success */ 5060Sstevel@tonic-gate out: 5070Sstevel@tonic-gate if (rlp != NULL) 5080Sstevel@tonic-gate metafreereplicalist(rlp); 5090Sstevel@tonic-gate if ((cfp != NULL) && (fclose(cfp) != 0) && (rval == 0)) { 5100Sstevel@tonic-gate rval = (in_miniroot) ? mdsyserror(ep, errno, tname): 5110Sstevel@tonic-gate mdsyserror(ep, errno, META_DBCONFTMP); 5120Sstevel@tonic-gate } 5130Sstevel@tonic-gate free(tname); 5140Sstevel@tonic-gate return (rval); 5150Sstevel@tonic-gate } 5160Sstevel@tonic-gate 5170Sstevel@tonic-gate /* 5180Sstevel@tonic-gate * check replica for dev 5190Sstevel@tonic-gate */ 5200Sstevel@tonic-gate static int 5210Sstevel@tonic-gate in_replica( 5220Sstevel@tonic-gate mdsetname_t *sp, 5230Sstevel@tonic-gate md_replica_t *rp, 5240Sstevel@tonic-gate mdname_t *np, 5250Sstevel@tonic-gate diskaddr_t slblk, 5260Sstevel@tonic-gate diskaddr_t nblks, 5270Sstevel@tonic-gate md_error_t *ep 5280Sstevel@tonic-gate ) 5290Sstevel@tonic-gate { 5300Sstevel@tonic-gate mdname_t *repnp = rp->r_namep; 5310Sstevel@tonic-gate diskaddr_t rep_sblk = rp->r_blkno; 5320Sstevel@tonic-gate diskaddr_t rep_nblks = rp->r_nblk; 5330Sstevel@tonic-gate 5340Sstevel@tonic-gate /* should be in the same set */ 5350Sstevel@tonic-gate assert(sp != NULL); 5360Sstevel@tonic-gate 5370Sstevel@tonic-gate /* if error in master block, assume whole partition */ 5380Sstevel@tonic-gate if ((rep_sblk == MD_DISKADDR_ERROR) || 5390Sstevel@tonic-gate (rep_nblks == MD_DISKADDR_ERROR)) { 5400Sstevel@tonic-gate rep_sblk = 0; 5410Sstevel@tonic-gate rep_nblks = MD_DISKADDR_ERROR; 5420Sstevel@tonic-gate } 5430Sstevel@tonic-gate 5440Sstevel@tonic-gate /* check overlap */ 5450Sstevel@tonic-gate if (meta_check_overlap( 5460Sstevel@tonic-gate MDB_STR, np, slblk, nblks, repnp, rep_sblk, rep_nblks, ep) != 0) { 5470Sstevel@tonic-gate return (-1); 5480Sstevel@tonic-gate } 5490Sstevel@tonic-gate 5500Sstevel@tonic-gate /* return success */ 5510Sstevel@tonic-gate return (0); 5520Sstevel@tonic-gate } 5530Sstevel@tonic-gate 5540Sstevel@tonic-gate /* 5550Sstevel@tonic-gate * check to see if we're in a replica 5560Sstevel@tonic-gate */ 5570Sstevel@tonic-gate int 5580Sstevel@tonic-gate meta_check_inreplica( 5590Sstevel@tonic-gate mdsetname_t *sp, 5600Sstevel@tonic-gate mdname_t *np, 5610Sstevel@tonic-gate diskaddr_t slblk, 5620Sstevel@tonic-gate diskaddr_t nblks, 5630Sstevel@tonic-gate md_error_t *ep 5640Sstevel@tonic-gate ) 5650Sstevel@tonic-gate { 5660Sstevel@tonic-gate md_replicalist_t *rlp = NULL; 5670Sstevel@tonic-gate md_replicalist_t *rl; 5680Sstevel@tonic-gate int rval = 0; 5690Sstevel@tonic-gate 5700Sstevel@tonic-gate /* should have a set */ 5710Sstevel@tonic-gate assert(sp != NULL); 5720Sstevel@tonic-gate 5730Sstevel@tonic-gate /* for each replica */ 5740Sstevel@tonic-gate if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0) 5750Sstevel@tonic-gate return (-1); 5760Sstevel@tonic-gate for (rl = rlp; (rl != NULL); rl = rl->rl_next) { 5770Sstevel@tonic-gate md_replica_t *rp = rl->rl_repp; 5780Sstevel@tonic-gate 5790Sstevel@tonic-gate /* check replica */ 5800Sstevel@tonic-gate if (in_replica(sp, rp, np, slblk, nblks, ep) != 0) { 5810Sstevel@tonic-gate rval = -1; 5820Sstevel@tonic-gate break; 5830Sstevel@tonic-gate } 5840Sstevel@tonic-gate } 5850Sstevel@tonic-gate 5860Sstevel@tonic-gate /* cleanup, return success */ 5870Sstevel@tonic-gate metafreereplicalist(rlp); 5880Sstevel@tonic-gate return (rval); 5890Sstevel@tonic-gate } 5900Sstevel@tonic-gate 5910Sstevel@tonic-gate /* 5920Sstevel@tonic-gate * check replica 5930Sstevel@tonic-gate */ 5940Sstevel@tonic-gate int 5950Sstevel@tonic-gate meta_check_replica( 5960Sstevel@tonic-gate mdsetname_t *sp, /* set to check against */ 5970Sstevel@tonic-gate mdname_t *np, /* component to check against */ 5980Sstevel@tonic-gate mdchkopts_t options, /* option flags */ 5990Sstevel@tonic-gate diskaddr_t slblk, /* start logical block */ 6000Sstevel@tonic-gate diskaddr_t nblks, /* number of blocks (-1,rest of them) */ 6010Sstevel@tonic-gate md_error_t *ep /* error packet */ 6020Sstevel@tonic-gate ) 6030Sstevel@tonic-gate { 6040Sstevel@tonic-gate mdchkopts_t chkoptions = MDCHK_ALLOW_REPSLICE; 6050Sstevel@tonic-gate 6060Sstevel@tonic-gate /* make sure we have a disk */ 6070Sstevel@tonic-gate if (metachkcomp(np, ep) != 0) 6080Sstevel@tonic-gate return (-1); 6090Sstevel@tonic-gate 6100Sstevel@tonic-gate /* check to ensure that it is not already in use */ 6110Sstevel@tonic-gate if (meta_check_inuse(sp, np, MDCHK_INUSE, ep) != 0) { 6120Sstevel@tonic-gate return (-1); 6130Sstevel@tonic-gate } 6140Sstevel@tonic-gate 6150Sstevel@tonic-gate if (options & MDCHK_ALLOW_NODBS) 6160Sstevel@tonic-gate return (0); 6170Sstevel@tonic-gate 6180Sstevel@tonic-gate if (options & MDCHK_DRVINSET) 6190Sstevel@tonic-gate return (0); 6200Sstevel@tonic-gate 6210Sstevel@tonic-gate /* make sure it is in the set */ 6220Sstevel@tonic-gate if (meta_check_inset(sp, np, ep) != 0) 6230Sstevel@tonic-gate return (-1); 6240Sstevel@tonic-gate 6250Sstevel@tonic-gate /* make sure its not in a metadevice */ 6260Sstevel@tonic-gate if (meta_check_inmeta(sp, np, chkoptions, slblk, nblks, ep) != 0) 6270Sstevel@tonic-gate return (-1); 6280Sstevel@tonic-gate 6290Sstevel@tonic-gate /* return success */ 6300Sstevel@tonic-gate return (0); 6310Sstevel@tonic-gate } 6320Sstevel@tonic-gate 6330Sstevel@tonic-gate static int 6340Sstevel@tonic-gate update_dbinfo_on_drives( 6350Sstevel@tonic-gate mdsetname_t *sp, 6360Sstevel@tonic-gate md_drive_desc *dd, 6370Sstevel@tonic-gate int set_locked, 6380Sstevel@tonic-gate int force, 6390Sstevel@tonic-gate md_error_t *ep 6400Sstevel@tonic-gate ) 6410Sstevel@tonic-gate { 6420Sstevel@tonic-gate md_set_desc *sd; 6430Sstevel@tonic-gate int i; 6440Sstevel@tonic-gate md_setkey_t *cl_sk; 6450Sstevel@tonic-gate int rval = 0; 6460Sstevel@tonic-gate md_mnnode_desc *nd; 6470Sstevel@tonic-gate 6480Sstevel@tonic-gate if ((sd = metaget_setdesc(sp, ep)) == NULL) 6490Sstevel@tonic-gate return (-1); 6500Sstevel@tonic-gate 6510Sstevel@tonic-gate if (! set_locked) { 6520Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) { 6530Sstevel@tonic-gate md_error_t xep = mdnullerror; 6540Sstevel@tonic-gate sigset_t sigs; 6550Sstevel@tonic-gate /* Make sure we are blocking all signals */ 6560Sstevel@tonic-gate if (procsigs(TRUE, &sigs, &xep) < 0) 6570Sstevel@tonic-gate mdclrerror(&xep); 6580Sstevel@tonic-gate 6590Sstevel@tonic-gate nd = sd->sd_nodelist; 6600Sstevel@tonic-gate while (nd) { 6610Sstevel@tonic-gate if (force && strcmp(nd->nd_nodename, 6620Sstevel@tonic-gate mynode()) != 0) { 6630Sstevel@tonic-gate nd = nd->nd_next; 6640Sstevel@tonic-gate continue; 6650Sstevel@tonic-gate } 6660Sstevel@tonic-gate 6670Sstevel@tonic-gate if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) { 6680Sstevel@tonic-gate nd = nd->nd_next; 6690Sstevel@tonic-gate continue; 6700Sstevel@tonic-gate } 6710Sstevel@tonic-gate 6720Sstevel@tonic-gate if (clnt_lock_set(nd->nd_nodename, sp, ep)) 6730Sstevel@tonic-gate return (-1); 6740Sstevel@tonic-gate nd = nd->nd_next; 6750Sstevel@tonic-gate } 6760Sstevel@tonic-gate } else { 6770Sstevel@tonic-gate for (i = 0; i < MD_MAXSIDES; i++) { 6780Sstevel@tonic-gate /* Skip empty slots */ 6790Sstevel@tonic-gate if (sd->sd_nodes[i][0] == '\0') 6800Sstevel@tonic-gate continue; 6810Sstevel@tonic-gate 6820Sstevel@tonic-gate if (force && strcmp(sd->sd_nodes[i], 6830Sstevel@tonic-gate mynode()) != 0) 6840Sstevel@tonic-gate continue; 6850Sstevel@tonic-gate 6860Sstevel@tonic-gate if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) 6870Sstevel@tonic-gate return (-1); 6880Sstevel@tonic-gate } 6890Sstevel@tonic-gate } 6900Sstevel@tonic-gate } 6910Sstevel@tonic-gate 6920Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) { 6930Sstevel@tonic-gate nd = sd->sd_nodelist; 6940Sstevel@tonic-gate while (nd) { 6950Sstevel@tonic-gate if (force && strcmp(nd->nd_nodename, mynode()) != 0) { 6960Sstevel@tonic-gate nd = nd->nd_next; 6970Sstevel@tonic-gate continue; 6980Sstevel@tonic-gate } 6990Sstevel@tonic-gate 7000Sstevel@tonic-gate if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) { 7010Sstevel@tonic-gate nd = nd->nd_next; 7020Sstevel@tonic-gate continue; 7030Sstevel@tonic-gate } 7040Sstevel@tonic-gate 7050Sstevel@tonic-gate if (clnt_upd_dr_dbinfo(nd->nd_nodename, sp, dd, ep) 7060Sstevel@tonic-gate == -1) { 7070Sstevel@tonic-gate rval = -1; 7080Sstevel@tonic-gate break; 7090Sstevel@tonic-gate } 7100Sstevel@tonic-gate nd = nd->nd_next; 7110Sstevel@tonic-gate } 7120Sstevel@tonic-gate } else { 7130Sstevel@tonic-gate for (i = 0; i < MD_MAXSIDES; i++) { 7140Sstevel@tonic-gate /* Skip empty slots */ 7150Sstevel@tonic-gate if (sd->sd_nodes[i][0] == '\0') 7160Sstevel@tonic-gate continue; 7170Sstevel@tonic-gate 7180Sstevel@tonic-gate if (force && strcmp(sd->sd_nodes[i], mynode()) != 0) 7190Sstevel@tonic-gate continue; 7200Sstevel@tonic-gate 7210Sstevel@tonic-gate if (clnt_upd_dr_dbinfo(sd->sd_nodes[i], sp, dd, ep) 7220Sstevel@tonic-gate == -1) { 7230Sstevel@tonic-gate rval = -1; 7240Sstevel@tonic-gate break; 7250Sstevel@tonic-gate } 7260Sstevel@tonic-gate } 7270Sstevel@tonic-gate } 7280Sstevel@tonic-gate 7290Sstevel@tonic-gate if (! set_locked) { 7300Sstevel@tonic-gate cl_sk = cl_get_setkey(sp->setno, sp->setname); 7310Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) { 7320Sstevel@tonic-gate nd = sd->sd_nodelist; 7330Sstevel@tonic-gate while (nd) { 7340Sstevel@tonic-gate if (force && 7350Sstevel@tonic-gate strcmp(nd->nd_nodename, mynode()) != 0) { 7360Sstevel@tonic-gate nd = nd->nd_next; 7370Sstevel@tonic-gate continue; 7380Sstevel@tonic-gate } 7390Sstevel@tonic-gate 7400Sstevel@tonic-gate if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) { 7410Sstevel@tonic-gate nd = nd->nd_next; 7420Sstevel@tonic-gate continue; 7430Sstevel@tonic-gate } 7440Sstevel@tonic-gate 7450Sstevel@tonic-gate if (clnt_unlock_set(nd->nd_nodename, cl_sk, 7460Sstevel@tonic-gate ep)) { 7470Sstevel@tonic-gate rval = -1; 7480Sstevel@tonic-gate break; 7490Sstevel@tonic-gate } 7500Sstevel@tonic-gate nd = nd->nd_next; 7510Sstevel@tonic-gate } 7520Sstevel@tonic-gate } else { 7530Sstevel@tonic-gate for (i = 0; i < MD_MAXSIDES; i++) { 7540Sstevel@tonic-gate /* Skip empty slots */ 7550Sstevel@tonic-gate if (sd->sd_nodes[i][0] == '\0') 7560Sstevel@tonic-gate continue; 7570Sstevel@tonic-gate 7580Sstevel@tonic-gate if (force && 7590Sstevel@tonic-gate strcmp(sd->sd_nodes[i], mynode()) != 0) 7600Sstevel@tonic-gate continue; 7610Sstevel@tonic-gate 7620Sstevel@tonic-gate if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, 7630Sstevel@tonic-gate ep)) { 7640Sstevel@tonic-gate rval = -1; 7650Sstevel@tonic-gate break; 7660Sstevel@tonic-gate } 7670Sstevel@tonic-gate } 7680Sstevel@tonic-gate 7690Sstevel@tonic-gate } 7700Sstevel@tonic-gate cl_set_setkey(NULL); 7710Sstevel@tonic-gate } 7720Sstevel@tonic-gate 7730Sstevel@tonic-gate return (rval); 7740Sstevel@tonic-gate } 7750Sstevel@tonic-gate 7760Sstevel@tonic-gate int 7770Sstevel@tonic-gate meta_db_addsidenms( 7780Sstevel@tonic-gate mdsetname_t *sp, 7790Sstevel@tonic-gate mdname_t *np, 7800Sstevel@tonic-gate daddr_t blkno, 7810Sstevel@tonic-gate int bcast, 7820Sstevel@tonic-gate md_error_t *ep 7830Sstevel@tonic-gate ) 7840Sstevel@tonic-gate { 7850Sstevel@tonic-gate side_t sideno; 7860Sstevel@tonic-gate char *bname = NULL; 7870Sstevel@tonic-gate char *dname = NULL; 7880Sstevel@tonic-gate minor_t mnum; 7890Sstevel@tonic-gate mddb_config_t c; 7900Sstevel@tonic-gate int done; 7910Sstevel@tonic-gate int rval = 0; 7920Sstevel@tonic-gate md_set_desc *sd; 7930Sstevel@tonic-gate 7940Sstevel@tonic-gate sideno = MD_SIDEWILD; 7950Sstevel@tonic-gate /*CONSTCOND*/ 7960Sstevel@tonic-gate while (1) { 7970Sstevel@tonic-gate if (bname != NULL) { 7980Sstevel@tonic-gate Free(bname); 7990Sstevel@tonic-gate bname = NULL; 8000Sstevel@tonic-gate } 8010Sstevel@tonic-gate if (dname != NULL) { 8020Sstevel@tonic-gate Free(dname); 8030Sstevel@tonic-gate dname = NULL; 8040Sstevel@tonic-gate } 8050Sstevel@tonic-gate if ((done = meta_getnextside_devinfo(sp, np->bname, 8060Sstevel@tonic-gate &sideno, &bname, &dname, &mnum, ep)) == -1) { 8070Sstevel@tonic-gate rval = -1; 8080Sstevel@tonic-gate break; 8090Sstevel@tonic-gate } 8100Sstevel@tonic-gate 8110Sstevel@tonic-gate if (done == 0) 8120Sstevel@tonic-gate break; 8130Sstevel@tonic-gate 8140Sstevel@tonic-gate if (! metaislocalset(sp)) { 8150Sstevel@tonic-gate if ((sd = metaget_setdesc(sp, ep)) == NULL) { 8160Sstevel@tonic-gate rval = -1; 8170Sstevel@tonic-gate break; 8180Sstevel@tonic-gate } 8190Sstevel@tonic-gate } 8200Sstevel@tonic-gate 8210Sstevel@tonic-gate /* 8220Sstevel@tonic-gate * Send addsidenms to all nodes using rpc.mdcommd if 8230Sstevel@tonic-gate * sidename is being added to MN diskset. 8240Sstevel@tonic-gate * 8250Sstevel@tonic-gate * It's ok to broadcast this call to other nodes. 8260Sstevel@tonic-gate * 8270Sstevel@tonic-gate * Note: The broadcast to other nodes isn't needed during 8280Sstevel@tonic-gate * the addition of the first mddbs to the set since the 8290Sstevel@tonic-gate * other nodes haven't been joined to the set yet. All 8300Sstevel@tonic-gate * nodes in a MN diskset are (implicitly) joined to the set 8310Sstevel@tonic-gate * on the addition of the first mddb. 8320Sstevel@tonic-gate */ 8330Sstevel@tonic-gate if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) && 8340Sstevel@tonic-gate (bcast == DB_ADDSIDENMS_BCAST)) { 8350Sstevel@tonic-gate md_mn_result_t *resultp = NULL; 8360Sstevel@tonic-gate md_mn_msg_meta_db_newside_t db_ns; 8370Sstevel@tonic-gate int send_rval; 8380Sstevel@tonic-gate 8390Sstevel@tonic-gate db_ns.msg_l_dev = np->dev; 8400Sstevel@tonic-gate db_ns.msg_sideno = sideno; 8410Sstevel@tonic-gate db_ns.msg_blkno = blkno; 8420Sstevel@tonic-gate (void) strncpy(db_ns.msg_dname, dname, 8430Sstevel@tonic-gate sizeof (db_ns.msg_dname)); 8440Sstevel@tonic-gate (void) splitname(np->bname, &db_ns.msg_splitname); 8450Sstevel@tonic-gate db_ns.msg_mnum = mnum; 8460Sstevel@tonic-gate 8470Sstevel@tonic-gate /* Set devid to NULL until devids are supported */ 8480Sstevel@tonic-gate db_ns.msg_devid[0] = NULL; 8490Sstevel@tonic-gate 8500Sstevel@tonic-gate /* 8510Sstevel@tonic-gate * If reconfig cycle has been started, this node is 8520Sstevel@tonic-gate * stuck in in the return step until this command has 8530Sstevel@tonic-gate * completed. If mdcommd is suspended, ask 8540Sstevel@tonic-gate * send_message to fail (instead of retrying) 8550Sstevel@tonic-gate * so that metaset can finish allowing the reconfig 8560Sstevel@tonic-gate * cycle to proceed. 8570Sstevel@tonic-gate */ 8580Sstevel@tonic-gate send_rval = mdmn_send_message(sp->setno, 8590Sstevel@tonic-gate MD_MN_MSG_META_DB_NEWSIDE, MD_MSGF_FAIL_ON_SUSPEND | 8600Sstevel@tonic-gate MD_MSGF_PANIC_WHEN_INCONSISTENT, (char *)&db_ns, 8610Sstevel@tonic-gate sizeof (md_mn_msg_meta_db_newside_t), 8620Sstevel@tonic-gate &resultp, ep); 8630Sstevel@tonic-gate if (send_rval != 0) { 8640Sstevel@tonic-gate rval = -1; 8650Sstevel@tonic-gate if (resultp == NULL) 8660Sstevel@tonic-gate (void) mddserror(ep, 8670Sstevel@tonic-gate MDE_DS_COMMD_SEND_FAIL, 8680Sstevel@tonic-gate sp->setno, NULL, NULL, 8690Sstevel@tonic-gate sp->setname); 8700Sstevel@tonic-gate else { 8710Sstevel@tonic-gate (void) mdstealerror(ep, 8720Sstevel@tonic-gate &(resultp->mmr_ep)); 8730Sstevel@tonic-gate if (mdisok(ep)) { 8740Sstevel@tonic-gate (void) mddserror(ep, 8750Sstevel@tonic-gate MDE_DS_COMMD_SEND_FAIL, 8760Sstevel@tonic-gate sp->setno, NULL, NULL, 8770Sstevel@tonic-gate sp->setname); 8780Sstevel@tonic-gate } 8790Sstevel@tonic-gate free_result(resultp); 8800Sstevel@tonic-gate } 8810Sstevel@tonic-gate break; 8820Sstevel@tonic-gate } 8830Sstevel@tonic-gate if (resultp) 8840Sstevel@tonic-gate free_result(resultp); 8850Sstevel@tonic-gate } else { 8860Sstevel@tonic-gate /* 8870Sstevel@tonic-gate * Let this side's device name, minor # and driver name 8880Sstevel@tonic-gate * be known to the database replica. 8890Sstevel@tonic-gate */ 8900Sstevel@tonic-gate (void) memset(&c, 0, sizeof (c)); 8910Sstevel@tonic-gate 8920Sstevel@tonic-gate /* Fill in device/replica info */ 8930Sstevel@tonic-gate c.c_locator.l_dev = meta_cmpldev(np->dev); 8940Sstevel@tonic-gate c.c_locator.l_blkno = blkno; 8950Sstevel@tonic-gate (void) strncpy(c.c_locator.l_driver, dname, 8960Sstevel@tonic-gate sizeof (c.c_locator.l_driver)); 8970Sstevel@tonic-gate (void) splitname(bname, &c.c_devname); 8980Sstevel@tonic-gate c.c_locator.l_mnum = mnum; 8990Sstevel@tonic-gate 9000Sstevel@tonic-gate /* Fill in setno, setname, and sideno */ 9010Sstevel@tonic-gate c.c_setno = sp->setno; 9020Sstevel@tonic-gate (void) strncpy(c.c_setname, sp->setname, 9030Sstevel@tonic-gate sizeof (c.c_setname)); 9040Sstevel@tonic-gate c.c_sideno = sideno; 9050Sstevel@tonic-gate 9060Sstevel@tonic-gate /* 9070Sstevel@tonic-gate * Don't need device id information from this ioctl 9080Sstevel@tonic-gate * Kernel determines device id from dev_t, which 9090Sstevel@tonic-gate * is just what this code would do. 9100Sstevel@tonic-gate */ 9110Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0; 9120Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 9130Sstevel@tonic-gate 9140Sstevel@tonic-gate if (metaioctl(MD_DB_NEWSIDE, &c, &c.c_mde, NULL) != 0) { 9150Sstevel@tonic-gate rval = mdstealerror(ep, &c.c_mde); 9160Sstevel@tonic-gate break; 9170Sstevel@tonic-gate } 9180Sstevel@tonic-gate } 9190Sstevel@tonic-gate } 9200Sstevel@tonic-gate 9210Sstevel@tonic-gate /* cleanup, return success */ 9220Sstevel@tonic-gate if (bname != NULL) { 9230Sstevel@tonic-gate Free(bname); 9240Sstevel@tonic-gate bname = NULL; 9250Sstevel@tonic-gate } 9260Sstevel@tonic-gate if (dname != NULL) { 9270Sstevel@tonic-gate Free(dname); 9280Sstevel@tonic-gate dname = NULL; 9290Sstevel@tonic-gate } 9300Sstevel@tonic-gate return (rval); 9310Sstevel@tonic-gate } 9320Sstevel@tonic-gate 9330Sstevel@tonic-gate 9340Sstevel@tonic-gate int 9350Sstevel@tonic-gate meta_db_delsidenm( 9360Sstevel@tonic-gate mdsetname_t *sp, 9370Sstevel@tonic-gate side_t sideno, 9380Sstevel@tonic-gate mdname_t *np, 9390Sstevel@tonic-gate daddr_t blkno, 9400Sstevel@tonic-gate md_error_t *ep 9410Sstevel@tonic-gate ) 9420Sstevel@tonic-gate { 9430Sstevel@tonic-gate mddb_config_t c; 9440Sstevel@tonic-gate md_set_desc *sd; 9450Sstevel@tonic-gate 9460Sstevel@tonic-gate if (! metaislocalset(sp)) { 9470Sstevel@tonic-gate if ((sd = metaget_setdesc(sp, ep)) == NULL) 9480Sstevel@tonic-gate return (-1); 9490Sstevel@tonic-gate } 9500Sstevel@tonic-gate /* Use rpc.mdcommd to delete mddb side from all nodes */ 9510Sstevel@tonic-gate if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) && 9520Sstevel@tonic-gate (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) { 9530Sstevel@tonic-gate md_mn_result_t *resultp = NULL; 9540Sstevel@tonic-gate md_mn_msg_meta_db_delside_t db_ds; 9550Sstevel@tonic-gate int send_rval; 9560Sstevel@tonic-gate 9570Sstevel@tonic-gate db_ds.msg_l_dev = np->dev; 9580Sstevel@tonic-gate db_ds.msg_blkno = blkno; 9590Sstevel@tonic-gate db_ds.msg_sideno = sideno; 9600Sstevel@tonic-gate 9610Sstevel@tonic-gate /* Set devid to NULL until devids are supported */ 9620Sstevel@tonic-gate db_ds.msg_devid[0] = NULL; 9630Sstevel@tonic-gate 9640Sstevel@tonic-gate /* 9650Sstevel@tonic-gate * If reconfig cycle has been started, this node is 9660Sstevel@tonic-gate * stuck in in the return step until this command has 9670Sstevel@tonic-gate * completed. If mdcommd is suspended, ask 9680Sstevel@tonic-gate * send_message to fail (instead of retrying) 9690Sstevel@tonic-gate * so that metaset can finish allowing the reconfig 9700Sstevel@tonic-gate * cycle to proceed. 9710Sstevel@tonic-gate */ 9720Sstevel@tonic-gate send_rval = mdmn_send_message(sp->setno, 9730Sstevel@tonic-gate MD_MN_MSG_META_DB_DELSIDE, MD_MSGF_FAIL_ON_SUSPEND | 9740Sstevel@tonic-gate MD_MSGF_PANIC_WHEN_INCONSISTENT, (char *)&db_ds, 9750Sstevel@tonic-gate sizeof (md_mn_msg_meta_db_delside_t), &resultp, ep); 9760Sstevel@tonic-gate if (send_rval != 0) { 9770Sstevel@tonic-gate if (resultp == NULL) 9780Sstevel@tonic-gate (void) mddserror(ep, 9790Sstevel@tonic-gate MDE_DS_COMMD_SEND_FAIL, 9800Sstevel@tonic-gate sp->setno, NULL, NULL, 9810Sstevel@tonic-gate sp->setname); 9820Sstevel@tonic-gate else { 9830Sstevel@tonic-gate (void) mdstealerror(ep, &(resultp->mmr_ep)); 9840Sstevel@tonic-gate if (mdisok(ep)) { 9850Sstevel@tonic-gate (void) mddserror(ep, 9860Sstevel@tonic-gate MDE_DS_COMMD_SEND_FAIL, 9870Sstevel@tonic-gate sp->setno, NULL, NULL, 9880Sstevel@tonic-gate sp->setname); 9890Sstevel@tonic-gate } 9900Sstevel@tonic-gate free_result(resultp); 9910Sstevel@tonic-gate } 9920Sstevel@tonic-gate return (-1); 9930Sstevel@tonic-gate } 9940Sstevel@tonic-gate if (resultp) 9950Sstevel@tonic-gate free_result(resultp); 9960Sstevel@tonic-gate 9970Sstevel@tonic-gate } else { 9980Sstevel@tonic-gate /* 9990Sstevel@tonic-gate * Let this side's device name, minor # and driver name 10000Sstevel@tonic-gate * be known to the database replica. 10010Sstevel@tonic-gate */ 10020Sstevel@tonic-gate (void) memset(&c, 0, sizeof (c)); 10030Sstevel@tonic-gate 10040Sstevel@tonic-gate /* Fill in device/replica info */ 10050Sstevel@tonic-gate c.c_locator.l_dev = meta_cmpldev(np->dev); 10060Sstevel@tonic-gate c.c_locator.l_blkno = blkno; 10070Sstevel@tonic-gate 10080Sstevel@tonic-gate /* Fill in setno, setname, and sideno */ 10090Sstevel@tonic-gate c.c_setno = sp->setno; 10100Sstevel@tonic-gate (void) strcpy(c.c_setname, sp->setname); 10110Sstevel@tonic-gate c.c_sideno = sideno; 10120Sstevel@tonic-gate 10130Sstevel@tonic-gate /* 10140Sstevel@tonic-gate * Don't need device id information from this ioctl 10150Sstevel@tonic-gate * Kernel determines device id from dev_t, which 10160Sstevel@tonic-gate * is just what this code would do. 10170Sstevel@tonic-gate */ 10180Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0; 10190Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 10200Sstevel@tonic-gate 10210Sstevel@tonic-gate if (metaioctl(MD_DB_DELSIDE, &c, &c.c_mde, NULL) != 0) 10220Sstevel@tonic-gate return (mdstealerror(ep, &c.c_mde)); 10230Sstevel@tonic-gate } 10240Sstevel@tonic-gate return (0); 10250Sstevel@tonic-gate } 10260Sstevel@tonic-gate 10270Sstevel@tonic-gate 10280Sstevel@tonic-gate static int 10290Sstevel@tonic-gate mdnamesareunique(mdnamelist_t *nlp, md_error_t *ep) 10300Sstevel@tonic-gate { 10310Sstevel@tonic-gate mdnamelist_t *dnp1, *dnp2; 10320Sstevel@tonic-gate 10330Sstevel@tonic-gate for (dnp1 = nlp; dnp1 != NULL; dnp1 = dnp1->next) { 10340Sstevel@tonic-gate for (dnp2 = dnp1->next; dnp2 != NULL; dnp2 = dnp2->next) { 10350Sstevel@tonic-gate if (strcmp(dnp1->namep->cname, dnp2->namep->cname) == 0) 10360Sstevel@tonic-gate return (mderror(ep, MDE_DUPDRIVE, 10370Sstevel@tonic-gate dnp1->namep->cname)); 10380Sstevel@tonic-gate } 10390Sstevel@tonic-gate } 10400Sstevel@tonic-gate return (0); 10410Sstevel@tonic-gate } 10420Sstevel@tonic-gate 10430Sstevel@tonic-gate 10440Sstevel@tonic-gate /* 10450Sstevel@tonic-gate * Return 1 if files are different, else return 0 10460Sstevel@tonic-gate */ 10470Sstevel@tonic-gate static int 10480Sstevel@tonic-gate filediff(char *tsname, char *sname) 10490Sstevel@tonic-gate { 10500Sstevel@tonic-gate int ret = 1, fd; 10510Sstevel@tonic-gate size_t tsz, sz; 10520Sstevel@tonic-gate struct stat sbuf; 10530Sstevel@tonic-gate char *tbuf, *buf; 10540Sstevel@tonic-gate 10550Sstevel@tonic-gate if (stat(tsname, &sbuf) != 0) 10560Sstevel@tonic-gate return (1); 10570Sstevel@tonic-gate tsz = sbuf.st_size; 10580Sstevel@tonic-gate if (stat(sname, &sbuf) != 0) 10590Sstevel@tonic-gate return (1); 10600Sstevel@tonic-gate sz = sbuf.st_size; 10610Sstevel@tonic-gate if (tsz != sz) 10620Sstevel@tonic-gate return (1); 10630Sstevel@tonic-gate 10640Sstevel@tonic-gate /* allocate memory and read both files into buffer */ 10650Sstevel@tonic-gate tbuf = malloc(tsz); 10660Sstevel@tonic-gate buf = malloc(sz); 10670Sstevel@tonic-gate if (tbuf == NULL || buf == NULL) 10680Sstevel@tonic-gate goto out; 10690Sstevel@tonic-gate 10700Sstevel@tonic-gate fd = open(tsname, O_RDONLY); 10710Sstevel@tonic-gate if (fd == -1) 10720Sstevel@tonic-gate goto out; 10730Sstevel@tonic-gate sz = read(fd, tbuf, tsz); 10740Sstevel@tonic-gate (void) close(fd); 10750Sstevel@tonic-gate if (sz != tsz) 10760Sstevel@tonic-gate goto out; 10770Sstevel@tonic-gate 10780Sstevel@tonic-gate fd = open(sname, O_RDONLY); 10790Sstevel@tonic-gate if (fd == -1) 10800Sstevel@tonic-gate goto out; 10810Sstevel@tonic-gate sz = read(fd, buf, tsz); 10820Sstevel@tonic-gate (void) close(fd); 10830Sstevel@tonic-gate if (sz != tsz) 10840Sstevel@tonic-gate goto out; 10850Sstevel@tonic-gate 10860Sstevel@tonic-gate /* compare content */ 10870Sstevel@tonic-gate ret = bcmp(tbuf, buf, tsz); 10880Sstevel@tonic-gate out: 10890Sstevel@tonic-gate if (tbuf) 10900Sstevel@tonic-gate free(tbuf); 10910Sstevel@tonic-gate if (buf) 10920Sstevel@tonic-gate free(buf); 10930Sstevel@tonic-gate return (ret); 10940Sstevel@tonic-gate } 10950Sstevel@tonic-gate 10960Sstevel@tonic-gate /* 10970Sstevel@tonic-gate * patch md.conf file with mddb locations 10980Sstevel@tonic-gate */ 10990Sstevel@tonic-gate int 11000Sstevel@tonic-gate meta_db_patch( 11010Sstevel@tonic-gate char *sname, /* system file name */ 11020Sstevel@tonic-gate char *cname, /* mddb.cf file name */ 11030Sstevel@tonic-gate int patch, /* patching locally */ 11040Sstevel@tonic-gate md_error_t *ep 11050Sstevel@tonic-gate ) 11060Sstevel@tonic-gate { 11070Sstevel@tonic-gate char *tsname = NULL; 11080Sstevel@tonic-gate char line[MDDB_BOOTLIST_MAX_LEN]; 11090Sstevel@tonic-gate FILE *tsfp = NULL; 11100Sstevel@tonic-gate FILE *mfp = NULL; 11110Sstevel@tonic-gate int rval = -1; 11120Sstevel@tonic-gate 11130Sstevel@tonic-gate /* check names */ 11140Sstevel@tonic-gate if (sname == NULL) { 11150Sstevel@tonic-gate if (patch) 11160Sstevel@tonic-gate sname = "md.conf"; 11170Sstevel@tonic-gate else 11180Sstevel@tonic-gate sname = "/kernel/drv/md.conf"; 11190Sstevel@tonic-gate } 11200Sstevel@tonic-gate if (cname == NULL) 11210Sstevel@tonic-gate cname = META_DBCONF; 11220Sstevel@tonic-gate 11230Sstevel@tonic-gate /* 11240Sstevel@tonic-gate * edit file 11250Sstevel@tonic-gate */ 11260Sstevel@tonic-gate if (meta_systemfile_copy(sname, 0, 1, 1, 0, &tsname, &tsfp, ep) != 0) { 11270Sstevel@tonic-gate if (mdissyserror(ep, EROFS)) { 11280Sstevel@tonic-gate /* 11290Sstevel@tonic-gate * If we are booted on a read-only root because 11300Sstevel@tonic-gate * of mddb quorum problems we don't want to emit 11310Sstevel@tonic-gate * any scary error messages. 11320Sstevel@tonic-gate */ 11330Sstevel@tonic-gate mdclrerror(ep); 11340Sstevel@tonic-gate rval = 0; 11350Sstevel@tonic-gate } 11360Sstevel@tonic-gate goto out; 11370Sstevel@tonic-gate } 11380Sstevel@tonic-gate 11392063Shshaw if (meta_systemfile_append_mddb(cname, sname, tsname, tsfp, 1, 0, 0, 11400Sstevel@tonic-gate ep) != 0) 11410Sstevel@tonic-gate goto out; 11420Sstevel@tonic-gate 11430Sstevel@tonic-gate /* if file content is identical, skip rename */ 11440Sstevel@tonic-gate if (filediff(tsname, sname) == 0) { 11450Sstevel@tonic-gate rval = 0; 11460Sstevel@tonic-gate goto out; 11470Sstevel@tonic-gate } 11480Sstevel@tonic-gate 11490Sstevel@tonic-gate if ((fflush(tsfp) != 0) || (fsync(fileno(tsfp)) != 0) || 11500Sstevel@tonic-gate (fclose(tsfp) != 0)) { 11510Sstevel@tonic-gate (void) mdsyserror(ep, errno, tsname); 11520Sstevel@tonic-gate goto out; 11530Sstevel@tonic-gate } 11540Sstevel@tonic-gate 11550Sstevel@tonic-gate tsfp = NULL; 11560Sstevel@tonic-gate 11570Sstevel@tonic-gate /* 11580Sstevel@tonic-gate * rename file. If we get a Cross Device error then it 11590Sstevel@tonic-gate * is because we are in the miniroot. 11600Sstevel@tonic-gate */ 11610Sstevel@tonic-gate if (rename(tsname, sname) != 0 && errno != EXDEV) { 11620Sstevel@tonic-gate (void) mdsyserror(ep, errno, sname); 11630Sstevel@tonic-gate goto out; 11640Sstevel@tonic-gate } 11650Sstevel@tonic-gate 11660Sstevel@tonic-gate if (errno == EXDEV) { 11670Sstevel@tonic-gate if ((tsfp = fopen(tsname, "r")) == NULL) 11680Sstevel@tonic-gate goto out; 11690Sstevel@tonic-gate if ((mfp = fopen(sname, "w+")) == NULL) 11700Sstevel@tonic-gate goto out; 11710Sstevel@tonic-gate while (fgets(line, sizeof (line), tsfp) != NULL) { 11720Sstevel@tonic-gate if (fputs(line, mfp) == NULL) 11730Sstevel@tonic-gate goto out; 11740Sstevel@tonic-gate } 11750Sstevel@tonic-gate (void) fclose(tsfp); 11760Sstevel@tonic-gate tsfp = NULL; 11770Sstevel@tonic-gate if (fflush(mfp) != 0) 11780Sstevel@tonic-gate goto out; 11790Sstevel@tonic-gate if (fsync(fileno(mfp)) != 0) 11800Sstevel@tonic-gate goto out; 11810Sstevel@tonic-gate if (fclose(mfp) != 0) { 11820Sstevel@tonic-gate mfp = NULL; 11830Sstevel@tonic-gate goto out; 11840Sstevel@tonic-gate } 11850Sstevel@tonic-gate } 11860Sstevel@tonic-gate 11870Sstevel@tonic-gate Free(tsname); 11880Sstevel@tonic-gate tsname = NULL; 11890Sstevel@tonic-gate rval = 0; 11900Sstevel@tonic-gate 11910Sstevel@tonic-gate /* cleanup, return error */ 11920Sstevel@tonic-gate out: 11930Sstevel@tonic-gate if (tsfp != NULL) 11940Sstevel@tonic-gate (void) fclose(tsfp); 11950Sstevel@tonic-gate if (tsname != NULL) { 11960Sstevel@tonic-gate (void) unlink(tsname); 11970Sstevel@tonic-gate Free(tsname); 11980Sstevel@tonic-gate } 11990Sstevel@tonic-gate return (rval); 12000Sstevel@tonic-gate } 12010Sstevel@tonic-gate 12020Sstevel@tonic-gate /* 12030Sstevel@tonic-gate * Add replicas to set. This happens as a result of: 12040Sstevel@tonic-gate * - metadb [-s set_name] -a 12050Sstevel@tonic-gate * - metaset -s set_name -a disk 12060Sstevel@tonic-gate * - metaset -s set_name -d disk (causes a rebalance of mddbs) 12070Sstevel@tonic-gate * - metaset -s set_name -b 12080Sstevel@tonic-gate * 12090Sstevel@tonic-gate * For a local set, this routine is run on the local set host. 12100Sstevel@tonic-gate * 12110Sstevel@tonic-gate * For a traditional diskset, this routine is run on the node that 12120Sstevel@tonic-gate * is running the metaset command. 12130Sstevel@tonic-gate * 12140Sstevel@tonic-gate * For a multinode diskset, this routine is run by the node that is 12150Sstevel@tonic-gate * running the metaset command. If this is the first mddb added to 12160Sstevel@tonic-gate * the MN diskset, then no communication is made to other nodes via commd 12170Sstevel@tonic-gate * since the other nodes will be in-sync with respect to the mddbs when 12180Sstevel@tonic-gate * those other nodes join the set and snarf in the newly created mddb. 12190Sstevel@tonic-gate * If this is not the first mddb added to the MN diskset, then this 12200Sstevel@tonic-gate * attach command is sent to all of the nodes using commd. This keeps 12210Sstevel@tonic-gate * the nodes in-sync. 12220Sstevel@tonic-gate */ 12230Sstevel@tonic-gate int 12240Sstevel@tonic-gate meta_db_attach( 12250Sstevel@tonic-gate mdsetname_t *sp, 12260Sstevel@tonic-gate mdnamelist_t *db_nlp, 12270Sstevel@tonic-gate mdchkopts_t options, 12280Sstevel@tonic-gate md_timeval32_t *timeval, 12290Sstevel@tonic-gate int dbcnt, 12300Sstevel@tonic-gate int dbsize, 12310Sstevel@tonic-gate char *sysfilename, 12320Sstevel@tonic-gate md_error_t *ep 12330Sstevel@tonic-gate ) 12340Sstevel@tonic-gate { 12350Sstevel@tonic-gate struct mddb_config c; 12360Sstevel@tonic-gate mdnamelist_t *nlp; 12370Sstevel@tonic-gate mdname_t *np; 12380Sstevel@tonic-gate md_drive_desc *dd = NULL; 12390Sstevel@tonic-gate md_drive_desc *p; 12400Sstevel@tonic-gate int i; 12410Sstevel@tonic-gate int fd; 12420Sstevel@tonic-gate side_t sideno; 12430Sstevel@tonic-gate daddr_t blkno; 12440Sstevel@tonic-gate int replicacount = 0; 1245*2614Spetede int start_svmdaemons = 0; 12460Sstevel@tonic-gate int rval = 0; 12470Sstevel@tonic-gate md_error_t status = mdnullerror; 12480Sstevel@tonic-gate md_set_desc *sd; 12490Sstevel@tonic-gate int stale_bool = FALSE; 12500Sstevel@tonic-gate int flags; 12510Sstevel@tonic-gate int firstmddb = 1; 12520Sstevel@tonic-gate md_timeval32_t inittime = {0, 0}; 12530Sstevel@tonic-gate 12540Sstevel@tonic-gate /* 12550Sstevel@tonic-gate * Error if we don't get some work to do. 12560Sstevel@tonic-gate */ 12570Sstevel@tonic-gate if (db_nlp == NULL) 12580Sstevel@tonic-gate return (mdsyserror(ep, EINVAL, NULL)); 12590Sstevel@tonic-gate 12600Sstevel@tonic-gate if (mdnamesareunique(db_nlp, ep) != 0) 12610Sstevel@tonic-gate return (-1); 12620Sstevel@tonic-gate (void) memset(&c, 0, sizeof (c)); 12630Sstevel@tonic-gate c.c_id = 0; 12640Sstevel@tonic-gate c.c_setno = sp->setno; 12650Sstevel@tonic-gate 12660Sstevel@tonic-gate /* Don't need device id information from this ioctl */ 12670Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0; 12680Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 12690Sstevel@tonic-gate if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) { 12700Sstevel@tonic-gate if (metaislocalset(sp)) { 12710Sstevel@tonic-gate if (mdismddberror(&c.c_mde, MDE_DB_INVALID)) 12720Sstevel@tonic-gate mdclrerror(&c.c_mde); 12730Sstevel@tonic-gate else if (! mdismddberror(&c.c_mde, MDE_DB_NODB) || 12740Sstevel@tonic-gate (! (options & MDCHK_ALLOW_NODBS))) 12750Sstevel@tonic-gate return (mdstealerror(ep, &c.c_mde)); 12760Sstevel@tonic-gate } else { 12770Sstevel@tonic-gate if (! mdismddberror(&c.c_mde, MDE_DB_NOTOWNER)) 12780Sstevel@tonic-gate return (mdstealerror(ep, &c.c_mde)); 12790Sstevel@tonic-gate } 12800Sstevel@tonic-gate mdclrerror(&c.c_mde); 12810Sstevel@tonic-gate } 12820Sstevel@tonic-gate /* 12830Sstevel@tonic-gate * Is current set STALE? 12840Sstevel@tonic-gate */ 12850Sstevel@tonic-gate if (c.c_flags & MDDB_C_STALE) { 12860Sstevel@tonic-gate stale_bool = TRUE; 12870Sstevel@tonic-gate } 12880Sstevel@tonic-gate 12890Sstevel@tonic-gate assert(db_nlp != NULL); 12900Sstevel@tonic-gate 1291*2614Spetede /* if these are the first replicas then the SVM daemons need to run */ 12920Sstevel@tonic-gate if (c.c_dbcnt == 0) 1293*2614Spetede start_svmdaemons = 1; 12940Sstevel@tonic-gate 12950Sstevel@tonic-gate /* 12960Sstevel@tonic-gate * check to see if we will go over the total possible number 12970Sstevel@tonic-gate * of data bases 12980Sstevel@tonic-gate */ 12990Sstevel@tonic-gate nlp = db_nlp; 13000Sstevel@tonic-gate while (nlp) { 13010Sstevel@tonic-gate replicacount += dbcnt; 13020Sstevel@tonic-gate nlp = nlp->next; 13030Sstevel@tonic-gate } 13040Sstevel@tonic-gate 13050Sstevel@tonic-gate if ((replicacount + c.c_dbcnt) > c.c_dbmax) 13060Sstevel@tonic-gate return (mdmddberror(ep, MDE_TOOMANY_REPLICAS, NODEV32, 13070Sstevel@tonic-gate sp->setno, c.c_dbcnt + replicacount, NULL)); 13080Sstevel@tonic-gate 13090Sstevel@tonic-gate /* 13100Sstevel@tonic-gate * go through and check to make sure all locations specified 13110Sstevel@tonic-gate * are legal also pick out driver name; 13120Sstevel@tonic-gate */ 13130Sstevel@tonic-gate for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) { 13140Sstevel@tonic-gate diskaddr_t devsize; 13150Sstevel@tonic-gate 13160Sstevel@tonic-gate np = nlp->namep; 13170Sstevel@tonic-gate 13180Sstevel@tonic-gate if (! metaislocalset(sp)) { 13190Sstevel@tonic-gate uint_t partno; 13200Sstevel@tonic-gate uint_t rep_partno; 13210Sstevel@tonic-gate mddrivename_t *dnp = np->drivenamep; 13220Sstevel@tonic-gate 13230Sstevel@tonic-gate /* 13240Sstevel@tonic-gate * make sure that non-local database replicas 13250Sstevel@tonic-gate * are always on the replica slice. 13260Sstevel@tonic-gate */ 13270Sstevel@tonic-gate if (meta_replicaslice(dnp, 13280Sstevel@tonic-gate &rep_partno, ep) != 0) 13290Sstevel@tonic-gate return (-1); 13300Sstevel@tonic-gate if (metagetvtoc(np, FALSE, &partno, ep) == NULL) 13310Sstevel@tonic-gate return (-1); 13320Sstevel@tonic-gate if (partno != rep_partno) 13330Sstevel@tonic-gate return (mddeverror(ep, MDE_REPCOMP_ONLY, 13340Sstevel@tonic-gate np->dev, sp->setname)); 13350Sstevel@tonic-gate } 13360Sstevel@tonic-gate 13370Sstevel@tonic-gate if (meta_check_replica(sp, np, options, 0, (dbcnt * dbsize), 13380Sstevel@tonic-gate ep)) { 13390Sstevel@tonic-gate return (-1); 13400Sstevel@tonic-gate } 13410Sstevel@tonic-gate 13420Sstevel@tonic-gate if ((devsize = metagetsize(np, ep)) == -1) 13430Sstevel@tonic-gate return (-1); 13440Sstevel@tonic-gate 13450Sstevel@tonic-gate if (devsize < (diskaddr_t)((dbcnt * dbsize) + 16)) 13460Sstevel@tonic-gate return (mdmddberror(ep, MDE_REPLICA_TOOSMALL, 13470Sstevel@tonic-gate meta_getminor(np->dev), sp->setno, devsize, 13480Sstevel@tonic-gate np->cname)); 13490Sstevel@tonic-gate } 13500Sstevel@tonic-gate 13510Sstevel@tonic-gate /* 13520Sstevel@tonic-gate * If first disk in set we don't have lb_inittime yet for use as 13530Sstevel@tonic-gate * mb_setcreatetime so don't go looking for it. WE'll come back 13540Sstevel@tonic-gate * later and update after the locator block has been created. 13550Sstevel@tonic-gate * If this isn't the first disk in the set, we have a locator 13560Sstevel@tonic-gate * block and thus we have lb_inittime. Set mb_setcreatetime to 13570Sstevel@tonic-gate * lb_inittime. 13580Sstevel@tonic-gate */ 13590Sstevel@tonic-gate if (! metaislocalset(sp)) { 13600Sstevel@tonic-gate if (c.c_dbcnt != 0) { 13610Sstevel@tonic-gate firstmddb = 0; 13620Sstevel@tonic-gate inittime = meta_get_lb_inittime(sp, ep); 13630Sstevel@tonic-gate } 13640Sstevel@tonic-gate } 13650Sstevel@tonic-gate 13660Sstevel@tonic-gate /* 13670Sstevel@tonic-gate * go through and write all master blocks 13680Sstevel@tonic-gate */ 13690Sstevel@tonic-gate 13700Sstevel@tonic-gate for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) { 13710Sstevel@tonic-gate np = nlp->namep; 13720Sstevel@tonic-gate 13730Sstevel@tonic-gate if ((fd = open(np->rname, O_RDWR)) < 0) 13740Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname)); 13750Sstevel@tonic-gate 13760Sstevel@tonic-gate for (i = 0; i < dbcnt; i++) { 13770Sstevel@tonic-gate if (mkmasterblks(sp, np, fd, (i * dbsize + 16), dbsize, 13780Sstevel@tonic-gate inittime, ep)) { 13790Sstevel@tonic-gate (void) close(fd); 13800Sstevel@tonic-gate return (-1); 13810Sstevel@tonic-gate } 13820Sstevel@tonic-gate } 13830Sstevel@tonic-gate (void) close(fd); 13840Sstevel@tonic-gate } 13850Sstevel@tonic-gate 13860Sstevel@tonic-gate if ((sideno = getmyside(sp, ep)) == MD_SIDEWILD) 13870Sstevel@tonic-gate return (-1); 13880Sstevel@tonic-gate 13890Sstevel@tonic-gate if (! metaislocalset(sp)) { 13900Sstevel@tonic-gate dd = metaget_drivedesc_fromnamelist(sp, db_nlp, ep); 13910Sstevel@tonic-gate if (! mdisok(ep)) 13920Sstevel@tonic-gate return (-1); 13930Sstevel@tonic-gate if ((sd = metaget_setdesc(sp, ep)) == NULL) 13940Sstevel@tonic-gate return (-1); 13950Sstevel@tonic-gate 13960Sstevel@tonic-gate } 13970Sstevel@tonic-gate 13980Sstevel@tonic-gate /* 13990Sstevel@tonic-gate * go through and tell kernel to add them 14000Sstevel@tonic-gate */ 14010Sstevel@tonic-gate for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) { 14020Sstevel@tonic-gate mdcinfo_t *cinfo; 14030Sstevel@tonic-gate 14040Sstevel@tonic-gate np = nlp->namep; 14050Sstevel@tonic-gate 14060Sstevel@tonic-gate if ((cinfo = metagetcinfo(np, ep)) == NULL) { 14070Sstevel@tonic-gate rval = -1; 14080Sstevel@tonic-gate goto out; 14090Sstevel@tonic-gate } 14100Sstevel@tonic-gate 14110Sstevel@tonic-gate /* 14120Sstevel@tonic-gate * If mddb is being added to MN diskset and there already 14130Sstevel@tonic-gate * exists a valid mddb in the set (which equates to this 14140Sstevel@tonic-gate * node being an owner of the set) then use rpc.mdcommd 14150Sstevel@tonic-gate * mechanism to add mddb(s) so that all nodes stay in sync. 14160Sstevel@tonic-gate * If set is stale, don't log the message since rpc.mdcommd 14170Sstevel@tonic-gate * can't write the message to the mddb. 14180Sstevel@tonic-gate * 14190Sstevel@tonic-gate * Otherwise, just add mddb to this node. 14200Sstevel@tonic-gate */ 14210Sstevel@tonic-gate if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) && 14220Sstevel@tonic-gate (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) { 14230Sstevel@tonic-gate md_mn_result_t *resultp = NULL; 14240Sstevel@tonic-gate md_mn_msg_meta_db_attach_t attach; 14250Sstevel@tonic-gate int send_rval; 14260Sstevel@tonic-gate 14270Sstevel@tonic-gate /* 14280Sstevel@tonic-gate * In a scenario where new replicas had been added on 14290Sstevel@tonic-gate * the master, and then all of the old replicas failed 14300Sstevel@tonic-gate * before the slaves had knowledge of the new replicas, 14310Sstevel@tonic-gate * the slaves are unable to re-parse in the mddb 14320Sstevel@tonic-gate * from the new replicas since the slaves have no 14330Sstevel@tonic-gate * knowledge of the new replicas. The following 14340Sstevel@tonic-gate * algorithm solves this problem: 14350Sstevel@tonic-gate * - META_DB_ATTACH message generates submsgs 14360Sstevel@tonic-gate * - BLOCK parse (master) 14370Sstevel@tonic-gate * - MDDB_ATTACH new replicas 14380Sstevel@tonic-gate * - UNBLOCK parse (master) causing parse 14390Sstevel@tonic-gate * information to be sent from master 14400Sstevel@tonic-gate * to slaves at a higher class than the 14410Sstevel@tonic-gate * unblock so the parse message will 14420Sstevel@tonic-gate * reach slaves before unblock message. 14430Sstevel@tonic-gate */ 14440Sstevel@tonic-gate attach.msg_l_dev = np->dev; 14450Sstevel@tonic-gate attach.msg_cnt = dbcnt; 14460Sstevel@tonic-gate attach.msg_dbsize = dbsize; 14470Sstevel@tonic-gate (void) strncpy(attach.msg_dname, cinfo->dname, 14480Sstevel@tonic-gate sizeof (attach.msg_dname)); 14490Sstevel@tonic-gate (void) splitname(np->bname, &attach.msg_splitname); 14500Sstevel@tonic-gate attach.msg_options = options; 14510Sstevel@tonic-gate 14520Sstevel@tonic-gate /* Set devid to NULL until devids are supported */ 14530Sstevel@tonic-gate attach.msg_devid[0] = NULL; 14540Sstevel@tonic-gate 14550Sstevel@tonic-gate /* 14560Sstevel@tonic-gate * If reconfig cycle has been started, this node is 14570Sstevel@tonic-gate * stuck in in the return step until this command has 14580Sstevel@tonic-gate * completed. If mdcommd is suspended, ask 14590Sstevel@tonic-gate * send_message to fail (instead of retrying) 14600Sstevel@tonic-gate * so that metaset can finish allowing the reconfig 14610Sstevel@tonic-gate * cycle to proceed. 14620Sstevel@tonic-gate */ 14630Sstevel@tonic-gate flags = MD_MSGF_FAIL_ON_SUSPEND; 14640Sstevel@tonic-gate if (stale_bool == TRUE) 14650Sstevel@tonic-gate flags |= MD_MSGF_NO_LOG; 14660Sstevel@tonic-gate send_rval = mdmn_send_message(sp->setno, 14670Sstevel@tonic-gate MD_MN_MSG_META_DB_ATTACH, 14680Sstevel@tonic-gate flags, (char *)&attach, 14690Sstevel@tonic-gate sizeof (md_mn_msg_meta_db_attach_t), 14700Sstevel@tonic-gate &resultp, ep); 14710Sstevel@tonic-gate if (send_rval != 0) { 14720Sstevel@tonic-gate rval = -1; 14730Sstevel@tonic-gate if (resultp == NULL) 14740Sstevel@tonic-gate (void) mddserror(ep, 14750Sstevel@tonic-gate MDE_DS_COMMD_SEND_FAIL, 14760Sstevel@tonic-gate sp->setno, NULL, NULL, 14770Sstevel@tonic-gate sp->setname); 14780Sstevel@tonic-gate else { 14790Sstevel@tonic-gate (void) mdstealerror(ep, 14800Sstevel@tonic-gate &(resultp->mmr_ep)); 14810Sstevel@tonic-gate if (mdisok(ep)) { 14820Sstevel@tonic-gate (void) mddserror(ep, 14830Sstevel@tonic-gate MDE_DS_COMMD_SEND_FAIL, 14840Sstevel@tonic-gate sp->setno, NULL, NULL, 14850Sstevel@tonic-gate sp->setname); 14860Sstevel@tonic-gate } 14870Sstevel@tonic-gate free_result(resultp); 14880Sstevel@tonic-gate } 14890Sstevel@tonic-gate goto out; 14900Sstevel@tonic-gate } 14910Sstevel@tonic-gate if (resultp) 14920Sstevel@tonic-gate free_result(resultp); 14930Sstevel@tonic-gate } else { 14940Sstevel@tonic-gate /* Adding mddb(s) to just this node */ 14950Sstevel@tonic-gate for (i = 0; i < dbcnt; i++) { 14960Sstevel@tonic-gate (void) memset(&c, 0, sizeof (c)); 14970Sstevel@tonic-gate /* Fill in device/replica info */ 14980Sstevel@tonic-gate c.c_locator.l_dev = meta_cmpldev(np->dev); 14990Sstevel@tonic-gate c.c_locator.l_blkno = i * dbsize + 16; 15000Sstevel@tonic-gate blkno = c.c_locator.l_blkno; 15010Sstevel@tonic-gate (void) strncpy(c.c_locator.l_driver, cinfo->dname, 15020Sstevel@tonic-gate sizeof (c.c_locator.l_driver)); 15030Sstevel@tonic-gate (void) splitname(np->bname, &c.c_devname); 15040Sstevel@tonic-gate c.c_locator.l_mnum = meta_getminor(np->dev); 15050Sstevel@tonic-gate 15060Sstevel@tonic-gate /* Fill in setno, setname, and sideno */ 15070Sstevel@tonic-gate c.c_setno = sp->setno; 15080Sstevel@tonic-gate if (! metaislocalset(sp)) { 15090Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) { 15100Sstevel@tonic-gate c.c_multi_node = 1; 15110Sstevel@tonic-gate } 15120Sstevel@tonic-gate } 15130Sstevel@tonic-gate (void) strcpy(c.c_setname, sp->setname); 15140Sstevel@tonic-gate c.c_sideno = sideno; 15150Sstevel@tonic-gate 15160Sstevel@tonic-gate /* 15170Sstevel@tonic-gate * Don't need device id information from this ioctl 15180Sstevel@tonic-gate * Kernel determines device id from dev_t, which 15190Sstevel@tonic-gate * is just what this code would do. 15200Sstevel@tonic-gate */ 15210Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0; 15220Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 15230Sstevel@tonic-gate 15240Sstevel@tonic-gate if (timeval != NULL) 15250Sstevel@tonic-gate c.c_timestamp = *timeval; 15260Sstevel@tonic-gate 15270Sstevel@tonic-gate if (setup_med_cfg(sp, &c, (options & MDCHK_SET_FORCE), 15280Sstevel@tonic-gate ep)) { 15290Sstevel@tonic-gate rval = -1; 15300Sstevel@tonic-gate goto out; 15310Sstevel@tonic-gate } 15320Sstevel@tonic-gate 15330Sstevel@tonic-gate if (metaioctl(MD_DB_NEWDEV, &c, &c.c_mde, NULL) != 0) { 15340Sstevel@tonic-gate rval = mdstealerror(ep, &c.c_mde); 15350Sstevel@tonic-gate goto out; 15360Sstevel@tonic-gate } 15370Sstevel@tonic-gate /* 15380Sstevel@tonic-gate * This is either a traditional diskset OR this 15390Sstevel@tonic-gate * is the first replica added to a MN diskset. 15400Sstevel@tonic-gate * In either case, set broadcast to NO_BCAST so 15410Sstevel@tonic-gate * that message won't go through rpc.mdcommd. 15420Sstevel@tonic-gate * If this is a traditional diskset, the bcast 15430Sstevel@tonic-gate * flag is ignored since traditional disksets 15440Sstevel@tonic-gate * don't use the rpc.mdcommd. 15450Sstevel@tonic-gate */ 15460Sstevel@tonic-gate if (meta_db_addsidenms(sp, np, blkno, 15470Sstevel@tonic-gate DB_ADDSIDENMS_NO_BCAST, ep)) 15480Sstevel@tonic-gate goto out; 15490Sstevel@tonic-gate } 15500Sstevel@tonic-gate } 15510Sstevel@tonic-gate if (! metaislocalset(sp)) { 15520Sstevel@tonic-gate /* update the dbcnt and size in dd */ 15530Sstevel@tonic-gate for (p = dd; p != NULL; p = p->dd_next) 15540Sstevel@tonic-gate if (p->dd_dnp == np->drivenamep) { 15550Sstevel@tonic-gate p->dd_dbcnt = dbcnt; 15560Sstevel@tonic-gate p->dd_dbsize = dbsize; 15570Sstevel@tonic-gate break; 15580Sstevel@tonic-gate } 15590Sstevel@tonic-gate } 15600Sstevel@tonic-gate 15610Sstevel@tonic-gate /* 15620Sstevel@tonic-gate * If this was the first addition of disks to the 15630Sstevel@tonic-gate * diskset you now need to update the mb_setcreatetime 15640Sstevel@tonic-gate * which needed lb_inittime which wasn't there until now. 15650Sstevel@tonic-gate */ 15660Sstevel@tonic-gate if (firstmddb) { 15670Sstevel@tonic-gate if (meta_update_mb(sp, dd, ep) != 0) { 15680Sstevel@tonic-gate return (-1); 15690Sstevel@tonic-gate } 15700Sstevel@tonic-gate } 15710Sstevel@tonic-gate (void) close(fd); 15720Sstevel@tonic-gate } 15730Sstevel@tonic-gate 15740Sstevel@tonic-gate out: 15750Sstevel@tonic-gate if (metaislocalset(sp)) { 15760Sstevel@tonic-gate 15770Sstevel@tonic-gate /* everything looks fine. Start mdmonitord */ 1578*2614Spetede if (rval == 0 && start_svmdaemons == 1) { 15790Sstevel@tonic-gate if (meta_smf_enable(META_SMF_CORE, &status) == -1) { 15800Sstevel@tonic-gate mde_perror(&status, ""); 15810Sstevel@tonic-gate mdclrerror(&status); 15820Sstevel@tonic-gate } 15830Sstevel@tonic-gate } 15840Sstevel@tonic-gate 15850Sstevel@tonic-gate if (buildconf(sp, &status)) { 15860Sstevel@tonic-gate /* Don't mask any previous errors */ 15870Sstevel@tonic-gate if (rval == 0) 15880Sstevel@tonic-gate rval = mdstealerror(ep, &status); 15890Sstevel@tonic-gate return (rval); 15900Sstevel@tonic-gate } 15910Sstevel@tonic-gate 15920Sstevel@tonic-gate if (meta_db_patch(sysfilename, NULL, 0, &status)) { 15930Sstevel@tonic-gate /* Don't mask any previous errors */ 15940Sstevel@tonic-gate if (rval == 0) 15950Sstevel@tonic-gate rval = mdstealerror(ep, &status); 15960Sstevel@tonic-gate } 15970Sstevel@tonic-gate } else { 15980Sstevel@tonic-gate if (update_dbinfo_on_drives(sp, dd, 15990Sstevel@tonic-gate (options & MDCHK_SET_LOCKED), 16000Sstevel@tonic-gate (options & MDCHK_SET_FORCE), 16010Sstevel@tonic-gate &status)) { 16020Sstevel@tonic-gate /* Don't mask any previous errors */ 16030Sstevel@tonic-gate if (rval == 0) 16040Sstevel@tonic-gate rval = mdstealerror(ep, &status); 16050Sstevel@tonic-gate else 16060Sstevel@tonic-gate mdclrerror(&status); 16070Sstevel@tonic-gate } 16080Sstevel@tonic-gate metafreedrivedesc(&dd); 16090Sstevel@tonic-gate } 16100Sstevel@tonic-gate /* 16110Sstevel@tonic-gate * For MN disksets that already had already had nodes joined 16120Sstevel@tonic-gate * before the attach of this mddb(s), the name invalidation is 16130Sstevel@tonic-gate * done by the commd handler routine. Otherwise, if this 16140Sstevel@tonic-gate * is the first attach of a MN diskset mddb, the invalidation 16150Sstevel@tonic-gate * must be done here since the first attach cannot be sent 16160Sstevel@tonic-gate * via the commd since there are no nodes joined to the set yet. 16170Sstevel@tonic-gate */ 16180Sstevel@tonic-gate if ((metaislocalset(sp)) || (!MD_MNSET_DESC(sd)) || 16190Sstevel@tonic-gate (MD_MNSET_DESC(sd) && 16200Sstevel@tonic-gate (!(sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)))) { 16210Sstevel@tonic-gate for (nlp = db_nlp; (nlp != NULL); nlp = nlp->next) { 16220Sstevel@tonic-gate meta_invalidate_name(nlp->namep); 16230Sstevel@tonic-gate } 16240Sstevel@tonic-gate } 16250Sstevel@tonic-gate return (rval); 16260Sstevel@tonic-gate } 16270Sstevel@tonic-gate 16280Sstevel@tonic-gate /* 16290Sstevel@tonic-gate * deletelist_length 16300Sstevel@tonic-gate * 16310Sstevel@tonic-gate * return the number of slices that have been specified for deletion 16320Sstevel@tonic-gate * on the metadb command line. This does not calculate the number 16330Sstevel@tonic-gate * of replicas because there may be multiple replicas per slice. 16340Sstevel@tonic-gate */ 16350Sstevel@tonic-gate static int 16360Sstevel@tonic-gate deletelist_length(mdnamelist_t *db_nlp) 16370Sstevel@tonic-gate { 16380Sstevel@tonic-gate 16390Sstevel@tonic-gate mdnamelist_t *nlp; 16400Sstevel@tonic-gate int list_length = 0; 16410Sstevel@tonic-gate 16420Sstevel@tonic-gate for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) { 16430Sstevel@tonic-gate list_length++; 16440Sstevel@tonic-gate } 16450Sstevel@tonic-gate 16460Sstevel@tonic-gate return (list_length); 16470Sstevel@tonic-gate } 16480Sstevel@tonic-gate 16490Sstevel@tonic-gate static int 16500Sstevel@tonic-gate in_deletelist(char *devname, mdnamelist_t *db_nlp) 16510Sstevel@tonic-gate { 16520Sstevel@tonic-gate 16530Sstevel@tonic-gate mdnamelist_t *nlp; 16540Sstevel@tonic-gate mdname_t *np; 16550Sstevel@tonic-gate int index = 0; 16560Sstevel@tonic-gate 16570Sstevel@tonic-gate for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) { 16580Sstevel@tonic-gate np = nlp->namep; 16590Sstevel@tonic-gate 16600Sstevel@tonic-gate if (strcmp(devname, np->bname) == 0) 16610Sstevel@tonic-gate return (index); 16620Sstevel@tonic-gate index++; 16630Sstevel@tonic-gate } 16640Sstevel@tonic-gate 16650Sstevel@tonic-gate return (-1); 16660Sstevel@tonic-gate } 16670Sstevel@tonic-gate 16680Sstevel@tonic-gate /* 16690Sstevel@tonic-gate * Delete replicas from set. This happens as a result of: 16700Sstevel@tonic-gate * - metadb [-s set_name] -d 16710Sstevel@tonic-gate * - metaset -s set_name -a disk (causes a rebalance of mddbs) 16720Sstevel@tonic-gate * - metaset -s set_name -d disk 16730Sstevel@tonic-gate * - metaset -s set_name -b 16740Sstevel@tonic-gate * 16750Sstevel@tonic-gate * For a local set, this routine is run on the local set host. 16760Sstevel@tonic-gate * 16770Sstevel@tonic-gate * For a traditional diskset, this routine is run on the node that 16780Sstevel@tonic-gate * is running the metaset command. 16790Sstevel@tonic-gate * 16800Sstevel@tonic-gate * For a multinode diskset, this routine is run by the node that is 16810Sstevel@tonic-gate * running the metaset command. This detach routine is sent to all 16820Sstevel@tonic-gate * of the joined nodes in the diskset using commd. This keeps 16830Sstevel@tonic-gate * the nodes in-sync. 16840Sstevel@tonic-gate */ 16850Sstevel@tonic-gate int 16860Sstevel@tonic-gate meta_db_detach( 16870Sstevel@tonic-gate mdsetname_t *sp, 16880Sstevel@tonic-gate mdnamelist_t *db_nlp, 16890Sstevel@tonic-gate mdforceopts_t force_option, 16900Sstevel@tonic-gate char *sysfilename, 16910Sstevel@tonic-gate md_error_t *ep 16920Sstevel@tonic-gate ) 16930Sstevel@tonic-gate { 16940Sstevel@tonic-gate struct mddb_config c; 16950Sstevel@tonic-gate mdnamelist_t *nlp; 16960Sstevel@tonic-gate mdname_t *np; 16970Sstevel@tonic-gate md_drive_desc *dd = NULL; 16980Sstevel@tonic-gate md_drive_desc *p; 16990Sstevel@tonic-gate int replicacount; 17000Sstevel@tonic-gate int replica_delete_count; 17010Sstevel@tonic-gate int nr_replica_slices; 17020Sstevel@tonic-gate int i; 17030Sstevel@tonic-gate int stop_svmdaemons = 0; 17040Sstevel@tonic-gate int rval = 0; 17050Sstevel@tonic-gate int index; 17060Sstevel@tonic-gate int valid_replicas_nottodelete = 0; 17070Sstevel@tonic-gate int invalid_replicas_nottodelete = 0; 17080Sstevel@tonic-gate int invalid_replicas_todelete = 0; 17090Sstevel@tonic-gate int errored = 0; 17100Sstevel@tonic-gate int *tag_array; 17110Sstevel@tonic-gate int fd = -1; 17120Sstevel@tonic-gate md_error_t status = mdnullerror; 17130Sstevel@tonic-gate md_set_desc *sd; 17140Sstevel@tonic-gate int stale_bool = FALSE; 17150Sstevel@tonic-gate int flags; 17160Sstevel@tonic-gate 17170Sstevel@tonic-gate /* 17180Sstevel@tonic-gate * Error if we don't get some work to do. 17190Sstevel@tonic-gate */ 17200Sstevel@tonic-gate if (db_nlp == NULL) 17210Sstevel@tonic-gate return (mdsyserror(ep, EINVAL, NULL)); 17220Sstevel@tonic-gate 17230Sstevel@tonic-gate if (mdnamesareunique(db_nlp, ep) != 0) 17240Sstevel@tonic-gate return (-1); 17250Sstevel@tonic-gate 17260Sstevel@tonic-gate (void) memset(&c, 0, sizeof (c)); 17270Sstevel@tonic-gate c.c_id = 0; 17280Sstevel@tonic-gate c.c_setno = sp->setno; 17290Sstevel@tonic-gate 17300Sstevel@tonic-gate /* Don't need device id information from this ioctl */ 17310Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0; 17320Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 17330Sstevel@tonic-gate 17340Sstevel@tonic-gate if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) 17350Sstevel@tonic-gate return (mdstealerror(ep, &c.c_mde)); 17360Sstevel@tonic-gate 17370Sstevel@tonic-gate /* 17380Sstevel@tonic-gate * Is current set STALE? 17390Sstevel@tonic-gate */ 17400Sstevel@tonic-gate if (c.c_flags & MDDB_C_STALE) { 17410Sstevel@tonic-gate stale_bool = TRUE; 17420Sstevel@tonic-gate } 17430Sstevel@tonic-gate 17440Sstevel@tonic-gate replicacount = c.c_dbcnt; 17450Sstevel@tonic-gate 17460Sstevel@tonic-gate assert(db_nlp != NULL); 17470Sstevel@tonic-gate 17480Sstevel@tonic-gate /* 17490Sstevel@tonic-gate * go through and gather how many data bases are on each 17500Sstevel@tonic-gate * device specified. 17510Sstevel@tonic-gate */ 17520Sstevel@tonic-gate 17530Sstevel@tonic-gate nr_replica_slices = deletelist_length(db_nlp); 17540Sstevel@tonic-gate tag_array = (int *)calloc(nr_replica_slices, sizeof (int)); 17550Sstevel@tonic-gate 17560Sstevel@tonic-gate replica_delete_count = 0; 17570Sstevel@tonic-gate for (i = 0; i < replicacount; i++) { 17580Sstevel@tonic-gate char *devname; 17590Sstevel@tonic-gate int found = 0; 17600Sstevel@tonic-gate 17610Sstevel@tonic-gate c.c_id = i; 17620Sstevel@tonic-gate 17630Sstevel@tonic-gate /* Don't need device id information from this ioctl */ 17640Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0; 17650Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 17660Sstevel@tonic-gate 17670Sstevel@tonic-gate if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) 17680Sstevel@tonic-gate return (mdstealerror(ep, &c.c_mde)); 17690Sstevel@tonic-gate 17700Sstevel@tonic-gate devname = splicename(&c.c_devname); 17710Sstevel@tonic-gate 17720Sstevel@tonic-gate if ((index = in_deletelist(devname, db_nlp)) != -1) { 17730Sstevel@tonic-gate found = 1; 17740Sstevel@tonic-gate tag_array[index] = 1; 17750Sstevel@tonic-gate replica_delete_count++; 17760Sstevel@tonic-gate } 17770Sstevel@tonic-gate 17780Sstevel@tonic-gate errored = c.c_locator.l_flags & (MDDB_F_EREAD | 17790Sstevel@tonic-gate MDDB_F_EWRITE | MDDB_F_TOOSMALL | 17800Sstevel@tonic-gate MDDB_F_EFMT | MDDB_F_EDATA | 17810Sstevel@tonic-gate MDDB_F_EMASTER); 17820Sstevel@tonic-gate 17830Sstevel@tonic-gate /* 17840Sstevel@tonic-gate * There are four combinations of "errored" and "found" 17850Sstevel@tonic-gate * and they are used to find the number of 17860Sstevel@tonic-gate * (a) valid/invalid replicas that are not in the delete 17870Sstevel@tonic-gate * list and are available in the system. 17880Sstevel@tonic-gate * (b) valid/invalid replicas that are to be deleted. 17890Sstevel@tonic-gate */ 17900Sstevel@tonic-gate 17910Sstevel@tonic-gate if (errored && !found) /* errored and !found */ 17920Sstevel@tonic-gate invalid_replicas_nottodelete++; 17930Sstevel@tonic-gate else if (!found) /* !errored and !found */ 17940Sstevel@tonic-gate valid_replicas_nottodelete++; 17950Sstevel@tonic-gate else if (errored) /* errored and found */ 17960Sstevel@tonic-gate invalid_replicas_todelete++; 17970Sstevel@tonic-gate /* 17980Sstevel@tonic-gate * else it is !errored and found. This means 17990Sstevel@tonic-gate * valid_replicas_todelete++; But this variable will not 18000Sstevel@tonic-gate * be used anywhere 18010Sstevel@tonic-gate */ 18020Sstevel@tonic-gate 18030Sstevel@tonic-gate Free(devname); 18040Sstevel@tonic-gate } 18050Sstevel@tonic-gate 18060Sstevel@tonic-gate index = 0; 18070Sstevel@tonic-gate for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) { 18080Sstevel@tonic-gate np = nlp->namep; 18090Sstevel@tonic-gate if (tag_array[index++] != 1) { 18100Sstevel@tonic-gate Free(tag_array); 18110Sstevel@tonic-gate return (mddeverror(ep, MDE_NO_DB, np->dev, np->cname)); 18120Sstevel@tonic-gate } 18130Sstevel@tonic-gate } 18140Sstevel@tonic-gate 18150Sstevel@tonic-gate Free(tag_array); 18160Sstevel@tonic-gate 18170Sstevel@tonic-gate 18180Sstevel@tonic-gate /* if all replicas are deleted stop mdmonitord */ 18190Sstevel@tonic-gate if ((replicacount - replica_delete_count) == 0) 18200Sstevel@tonic-gate stop_svmdaemons = 1; 18210Sstevel@tonic-gate 18220Sstevel@tonic-gate if (((replicacount - replica_delete_count) < MD_MINREPLICAS)) { 18230Sstevel@tonic-gate if (force_option & MDFORCE_NONE) 18240Sstevel@tonic-gate return (mderror(ep, MDE_NOTENOUGH_DB, sp->setname)); 18250Sstevel@tonic-gate if (! metaislocalset(sp) && ! (force_option & MDFORCE_DS)) 18260Sstevel@tonic-gate return (mderror(ep, MDE_DELDB_NOTALLOWED, sp->setname)); 18270Sstevel@tonic-gate } 18280Sstevel@tonic-gate 18290Sstevel@tonic-gate /* 18300Sstevel@tonic-gate * The following algorithms are followed to check for deletion: 18310Sstevel@tonic-gate * (a) If the delete list(db_nlp) has all invalid replicas and no valid 18320Sstevel@tonic-gate * replicas, then deletion should be allowed. 18330Sstevel@tonic-gate * (b) Deletion should be allowed only if valid replicas that are "not" 18340Sstevel@tonic-gate * to be deleted is always greater than the invalid replicas that 18350Sstevel@tonic-gate * are "not" to be deleted. 18360Sstevel@tonic-gate * (c) If the user uses -f option, then deletion should be allowed. 18370Sstevel@tonic-gate */ 18380Sstevel@tonic-gate 18390Sstevel@tonic-gate if ((invalid_replicas_todelete != replica_delete_count) && 18400Sstevel@tonic-gate (invalid_replicas_nottodelete > valid_replicas_nottodelete) && 18410Sstevel@tonic-gate (force_option != MDFORCE_LOCAL)) 18420Sstevel@tonic-gate return (mderror(ep, MDE_DEL_VALIDDB_NOTALLOWED, sp->setname)); 18430Sstevel@tonic-gate 18440Sstevel@tonic-gate /* 18450Sstevel@tonic-gate * go through and tell kernel to delete them 18460Sstevel@tonic-gate */ 18470Sstevel@tonic-gate 18480Sstevel@tonic-gate /* Don't need device id information from this ioctl */ 18490Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0; 18500Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 18510Sstevel@tonic-gate 18520Sstevel@tonic-gate if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) 18530Sstevel@tonic-gate return (mdstealerror(ep, &c.c_mde)); 18540Sstevel@tonic-gate 18550Sstevel@tonic-gate if (! metaislocalset(sp)) { 18560Sstevel@tonic-gate dd = metaget_drivedesc_fromnamelist(sp, db_nlp, ep); 18570Sstevel@tonic-gate if (! mdisok(ep)) 18580Sstevel@tonic-gate return (-1); 18590Sstevel@tonic-gate if ((sd = metaget_setdesc(sp, ep)) == NULL) 18600Sstevel@tonic-gate return (-1); 18610Sstevel@tonic-gate } 18620Sstevel@tonic-gate 18630Sstevel@tonic-gate for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) { 18640Sstevel@tonic-gate np = nlp->namep; 18650Sstevel@tonic-gate 18660Sstevel@tonic-gate /* 18670Sstevel@tonic-gate * If mddb is being deleted from MN diskset and node is 18680Sstevel@tonic-gate * an owner of the diskset then use rpc.mdcommd 18690Sstevel@tonic-gate * mechanism to add mddb(s) so that all nodes stay in sync. 18700Sstevel@tonic-gate * If set is stale, don't log the message since rpc.mdcommd 18710Sstevel@tonic-gate * can't write the message to the mddb. 18720Sstevel@tonic-gate * 18730Sstevel@tonic-gate * When mddbs are first being added to set, a detach can 18740Sstevel@tonic-gate * be called before any node has joined the diskset, so 18750Sstevel@tonic-gate * must check to see if node is an owner of the diskset. 18760Sstevel@tonic-gate * 18770Sstevel@tonic-gate * Otherwise, just delete mddb from this node. 18780Sstevel@tonic-gate */ 18790Sstevel@tonic-gate 18800Sstevel@tonic-gate if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) && 18810Sstevel@tonic-gate (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) { 18820Sstevel@tonic-gate md_mn_result_t *resultp; 18830Sstevel@tonic-gate md_mn_msg_meta_db_detach_t detach; 18840Sstevel@tonic-gate int send_rval; 18850Sstevel@tonic-gate 18860Sstevel@tonic-gate /* 18870Sstevel@tonic-gate * The following algorithm is used to detach replicas. 18880Sstevel@tonic-gate * - META_DB_DETACH message generates submsgs 18890Sstevel@tonic-gate * - BLOCK parse (master) 18900Sstevel@tonic-gate * - MDDB_DETACH replicas 18910Sstevel@tonic-gate * - UNBLOCK parse (master) causing parse 18920Sstevel@tonic-gate * information to be sent from master 18930Sstevel@tonic-gate * to slaves at a higher class than the 18940Sstevel@tonic-gate * unblock so the parse message will 18950Sstevel@tonic-gate * reach slaves before unblock message. 18960Sstevel@tonic-gate */ 18970Sstevel@tonic-gate (void) splitname(np->bname, &detach.msg_splitname); 18980Sstevel@tonic-gate 18990Sstevel@tonic-gate /* Set devid to NULL until devids are supported */ 19000Sstevel@tonic-gate detach.msg_devid[0] = NULL; 19010Sstevel@tonic-gate 19020Sstevel@tonic-gate /* 19030Sstevel@tonic-gate * If reconfig cycle has been started, this node is 19040Sstevel@tonic-gate * stuck in in the return step until this command has 19050Sstevel@tonic-gate * completed. If mdcommd is suspended, ask 19060Sstevel@tonic-gate * send_message to fail (instead of retrying) 19070Sstevel@tonic-gate * so that metaset can finish allowing the reconfig 19080Sstevel@tonic-gate * cycle to proceed. 19090Sstevel@tonic-gate */ 19100Sstevel@tonic-gate flags = MD_MSGF_FAIL_ON_SUSPEND; 19110Sstevel@tonic-gate if (stale_bool == TRUE) 19120Sstevel@tonic-gate flags |= MD_MSGF_NO_LOG; 19130Sstevel@tonic-gate send_rval = mdmn_send_message(sp->setno, 19140Sstevel@tonic-gate MD_MN_MSG_META_DB_DETACH, 19150Sstevel@tonic-gate flags, (char *)&detach, 19160Sstevel@tonic-gate sizeof (md_mn_msg_meta_db_detach_t), 19170Sstevel@tonic-gate &resultp, ep); 19180Sstevel@tonic-gate if (send_rval != 0) { 19190Sstevel@tonic-gate rval = -1; 19200Sstevel@tonic-gate if (resultp == NULL) 19210Sstevel@tonic-gate (void) mddserror(ep, 19220Sstevel@tonic-gate MDE_DS_COMMD_SEND_FAIL, 19230Sstevel@tonic-gate sp->setno, NULL, NULL, 19240Sstevel@tonic-gate sp->setname); 19250Sstevel@tonic-gate else { 19260Sstevel@tonic-gate (void) mdstealerror(ep, 19270Sstevel@tonic-gate &(resultp->mmr_ep)); 19280Sstevel@tonic-gate if (mdisok(ep)) { 19290Sstevel@tonic-gate (void) mddserror(ep, 19300Sstevel@tonic-gate MDE_DS_COMMD_SEND_FAIL, 19310Sstevel@tonic-gate sp->setno, NULL, NULL, 19320Sstevel@tonic-gate sp->setname); 19330Sstevel@tonic-gate } 19340Sstevel@tonic-gate free_result(resultp); 19350Sstevel@tonic-gate } 19360Sstevel@tonic-gate goto out; 19370Sstevel@tonic-gate } 19380Sstevel@tonic-gate if (resultp) 19390Sstevel@tonic-gate free_result(resultp); 19400Sstevel@tonic-gate } else { 19410Sstevel@tonic-gate i = 0; 19420Sstevel@tonic-gate while (i < c.c_dbcnt) { 19430Sstevel@tonic-gate char *devname; 19440Sstevel@tonic-gate 19450Sstevel@tonic-gate c.c_id = i; 19460Sstevel@tonic-gate 19470Sstevel@tonic-gate /* Don't need devid info from this ioctl */ 19480Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0; 19490Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 19500Sstevel@tonic-gate 19510Sstevel@tonic-gate if (metaioctl(MD_DB_GETDEV, &c, 19520Sstevel@tonic-gate &c.c_mde, NULL)) { 19530Sstevel@tonic-gate rval = mdstealerror(ep, &c.c_mde); 19540Sstevel@tonic-gate goto out; 19550Sstevel@tonic-gate } 19560Sstevel@tonic-gate 19570Sstevel@tonic-gate devname = splicename(&c.c_devname); 19580Sstevel@tonic-gate if (strcmp(devname, np->bname) != 0) { 19590Sstevel@tonic-gate Free(devname); 19600Sstevel@tonic-gate i++; 19610Sstevel@tonic-gate continue; 19620Sstevel@tonic-gate } 19630Sstevel@tonic-gate Free(devname); 19640Sstevel@tonic-gate 19650Sstevel@tonic-gate /* Don't need devid info from this ioctl */ 19660Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0; 19670Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 19680Sstevel@tonic-gate 19690Sstevel@tonic-gate if (metaioctl(MD_DB_DELDEV, &c, 19700Sstevel@tonic-gate &c.c_mde, NULL) != 0) { 19710Sstevel@tonic-gate rval = mdstealerror(ep, &c.c_mde); 19720Sstevel@tonic-gate goto out; 19730Sstevel@tonic-gate } 19740Sstevel@tonic-gate 19750Sstevel@tonic-gate /* Not incrementing "i" intentionally */ 19760Sstevel@tonic-gate } 19770Sstevel@tonic-gate } 19780Sstevel@tonic-gate if (! metaislocalset(sp)) { 19790Sstevel@tonic-gate /* update the dbcnt and size in dd */ 19800Sstevel@tonic-gate for (p = dd; p != NULL; p = p->dd_next) { 19810Sstevel@tonic-gate if (p->dd_dnp == np->drivenamep) { 19820Sstevel@tonic-gate p->dd_dbcnt = 0; 19830Sstevel@tonic-gate p->dd_dbsize = 0; 19840Sstevel@tonic-gate break; 19850Sstevel@tonic-gate } 19860Sstevel@tonic-gate } 19870Sstevel@tonic-gate 19880Sstevel@tonic-gate /* 19890Sstevel@tonic-gate * Slam a dummy master block and make it self 19900Sstevel@tonic-gate * identifying 19910Sstevel@tonic-gate */ 19920Sstevel@tonic-gate if ((fd = open(np->rname, O_RDWR)) >= 0) { 19930Sstevel@tonic-gate meta_mkdummymaster(sp, fd, 16); 19940Sstevel@tonic-gate (void) close(fd); 19950Sstevel@tonic-gate } 19960Sstevel@tonic-gate } 19970Sstevel@tonic-gate } 19980Sstevel@tonic-gate out: 19990Sstevel@tonic-gate if (metaislocalset(sp)) { 20000Sstevel@tonic-gate /* 20010Sstevel@tonic-gate * Stop all the daemons if there are 20020Sstevel@tonic-gate * no more replicas so that the module can be 20030Sstevel@tonic-gate * unloaded. 20040Sstevel@tonic-gate */ 20050Sstevel@tonic-gate if (rval == 0 && stop_svmdaemons == 1) { 20060Sstevel@tonic-gate char buf[MAXPATHLEN]; 20070Sstevel@tonic-gate int i; 20080Sstevel@tonic-gate 20090Sstevel@tonic-gate for (i = 0; i < DAEMON_COUNT; i++) { 20100Sstevel@tonic-gate (void) snprintf(buf, MAXPATHLEN, 20110Sstevel@tonic-gate "/usr/bin/pkill -%s -x %s", 20120Sstevel@tonic-gate svmd_kill_list[i].svmd_kill_val, 20130Sstevel@tonic-gate svmd_kill_list[i].svmd_name); 20140Sstevel@tonic-gate if (pclose(popen(buf, "w")) == -1) 20150Sstevel@tonic-gate md_perror(buf); 20160Sstevel@tonic-gate } 20170Sstevel@tonic-gate 20180Sstevel@tonic-gate if (meta_smf_disable(META_SMF_ALL, &status) == -1) { 20190Sstevel@tonic-gate mde_perror(&status, ""); 20200Sstevel@tonic-gate mdclrerror(&status); 20210Sstevel@tonic-gate } 20220Sstevel@tonic-gate } 20230Sstevel@tonic-gate if (buildconf(sp, &status)) { 20240Sstevel@tonic-gate /* Don't mask any previous errors */ 20250Sstevel@tonic-gate if (rval == 0) 20260Sstevel@tonic-gate rval = mdstealerror(ep, &status); 20270Sstevel@tonic-gate else 20280Sstevel@tonic-gate mdclrerror(&status); 20290Sstevel@tonic-gate return (rval); 20300Sstevel@tonic-gate } 20310Sstevel@tonic-gate 20320Sstevel@tonic-gate if (meta_db_patch(sysfilename, NULL, 0, &status)) { 20330Sstevel@tonic-gate /* Don't mask any previous errors */ 20340Sstevel@tonic-gate if (rval == 0) 20350Sstevel@tonic-gate rval = mdstealerror(ep, &status); 20360Sstevel@tonic-gate else 20370Sstevel@tonic-gate mdclrerror(&status); 20380Sstevel@tonic-gate } 20390Sstevel@tonic-gate } else { 20400Sstevel@tonic-gate if (update_dbinfo_on_drives(sp, dd, 20410Sstevel@tonic-gate (force_option & MDFORCE_SET_LOCKED), 20420Sstevel@tonic-gate ((force_option & MDFORCE_LOCAL) | 20430Sstevel@tonic-gate (force_option & MDFORCE_DS)), &status)) { 20440Sstevel@tonic-gate /* Don't mask any previous errors */ 20450Sstevel@tonic-gate if (rval == 0) 20460Sstevel@tonic-gate rval = mdstealerror(ep, &status); 20470Sstevel@tonic-gate else 20480Sstevel@tonic-gate mdclrerror(&status); 20490Sstevel@tonic-gate } 20500Sstevel@tonic-gate metafreedrivedesc(&dd); 20510Sstevel@tonic-gate } 20520Sstevel@tonic-gate if ((metaislocalset(sp)) || (!(MD_MNSET_DESC(sd)))) { 20530Sstevel@tonic-gate for (nlp = db_nlp; (nlp != NULL); nlp = nlp->next) { 20540Sstevel@tonic-gate meta_invalidate_name(nlp->namep); 20550Sstevel@tonic-gate } 20560Sstevel@tonic-gate } 20570Sstevel@tonic-gate return (rval); 20580Sstevel@tonic-gate } 20590Sstevel@tonic-gate 20600Sstevel@tonic-gate static md_replica_t * 20610Sstevel@tonic-gate metareplicaname( 20620Sstevel@tonic-gate mdsetname_t *sp, 20630Sstevel@tonic-gate int flags, 20640Sstevel@tonic-gate struct mddb_config *c, 20650Sstevel@tonic-gate md_error_t *ep 20660Sstevel@tonic-gate ) 20670Sstevel@tonic-gate { 20680Sstevel@tonic-gate md_replica_t *rp; 20690Sstevel@tonic-gate char *devname; 20700Sstevel@tonic-gate size_t sz; 20710Sstevel@tonic-gate 20720Sstevel@tonic-gate /* allocate replicaname */ 20730Sstevel@tonic-gate rp = Zalloc(sizeof (*rp)); 20740Sstevel@tonic-gate 20750Sstevel@tonic-gate /* get device name */ 20760Sstevel@tonic-gate devname = splicename(&c->c_devname); 20770Sstevel@tonic-gate if (flags & PRINT_FAST) { 20781623Stw21770 if ((rp->r_namep = metaname_fast(&sp, devname, 20791623Stw21770 LOGICAL_DEVICE, ep)) == NULL) { 20800Sstevel@tonic-gate Free(devname); 20810Sstevel@tonic-gate Free(rp); 20820Sstevel@tonic-gate return (NULL); 20830Sstevel@tonic-gate } 20840Sstevel@tonic-gate } else { 20851623Stw21770 if ((rp->r_namep = metaname(&sp, devname, 20861623Stw21770 LOGICAL_DEVICE, ep)) == NULL) { 20870Sstevel@tonic-gate Free(devname); 20880Sstevel@tonic-gate Free(rp); 20890Sstevel@tonic-gate return (NULL); 20900Sstevel@tonic-gate } 20910Sstevel@tonic-gate } 20920Sstevel@tonic-gate Free(devname); 20930Sstevel@tonic-gate 20940Sstevel@tonic-gate /* make sure it's OK */ 20950Sstevel@tonic-gate if ((! (flags & MD_BASICNAME_OK)) && 20960Sstevel@tonic-gate (metachkcomp(rp->r_namep, ep) != 0)) { 20970Sstevel@tonic-gate Free(rp); 20980Sstevel@tonic-gate return (NULL); 20990Sstevel@tonic-gate } 21000Sstevel@tonic-gate 210162Sjeanm rp->r_blkno = (daddr_t)MD_DISKADDR_ERROR; 210262Sjeanm rp->r_nblk = (daddr_t)MD_DISKADDR_ERROR; 21030Sstevel@tonic-gate rp->r_flags = c->c_locator.l_flags | MDDB_F_NODEVID; 21040Sstevel@tonic-gate if (c->c_locator.l_devid_flags & MDDB_DEVID_VALID) { 210562Sjeanm sz = devid_sizeof((ddi_devid_t)(uintptr_t) 210662Sjeanm (c->c_locator.l_devid)); 21070Sstevel@tonic-gate if ((rp->r_devid = (ddi_devid_t)malloc(sz)) == 21080Sstevel@tonic-gate (ddi_devid_t)NULL) { 21090Sstevel@tonic-gate Free(rp); 21100Sstevel@tonic-gate return (NULL); 21110Sstevel@tonic-gate } 21120Sstevel@tonic-gate (void) memcpy((void *)rp->r_devid, 211362Sjeanm (void *)(uintptr_t)c->c_locator.l_devid, sz); 21140Sstevel@tonic-gate (void) strcpy(rp->r_minor_name, c->c_locator.l_minor_name); 21150Sstevel@tonic-gate rp->r_flags &= ~MDDB_F_NODEVID; 21160Sstevel@tonic-gate /* Overwrite dev derived from name with dev from devid */ 21170Sstevel@tonic-gate rp->r_namep->dev = meta_expldev(c->c_locator.l_dev); 21180Sstevel@tonic-gate } 21190Sstevel@tonic-gate (void) strcpy(rp->r_driver_name, c->c_locator.l_driver); 21200Sstevel@tonic-gate 21210Sstevel@tonic-gate rp->r_blkno = c->c_locator.l_blkno; 21220Sstevel@tonic-gate if (c->c_dbend != 0) 21230Sstevel@tonic-gate rp->r_nblk = c->c_dbend - c->c_locator.l_blkno + 1; 21240Sstevel@tonic-gate 21250Sstevel@tonic-gate /* return replica */ 21260Sstevel@tonic-gate return (rp); 21270Sstevel@tonic-gate } 21280Sstevel@tonic-gate 21290Sstevel@tonic-gate /* 21300Sstevel@tonic-gate * free replica list 21310Sstevel@tonic-gate */ 21320Sstevel@tonic-gate void 21330Sstevel@tonic-gate metafreereplicalist( 21340Sstevel@tonic-gate md_replicalist_t *rlp 21350Sstevel@tonic-gate ) 21360Sstevel@tonic-gate { 21370Sstevel@tonic-gate md_replicalist_t *rl = NULL; 21380Sstevel@tonic-gate 21390Sstevel@tonic-gate for (/* void */; (rlp != NULL); rlp = rl) { 21400Sstevel@tonic-gate rl = rlp->rl_next; 21410Sstevel@tonic-gate if (rlp->rl_repp->r_devid != (ddi_devid_t)0) { 21420Sstevel@tonic-gate free(rlp->rl_repp->r_devid); 21430Sstevel@tonic-gate } 21440Sstevel@tonic-gate Free(rlp->rl_repp); 21450Sstevel@tonic-gate Free(rlp); 21460Sstevel@tonic-gate } 21470Sstevel@tonic-gate } 21480Sstevel@tonic-gate 21490Sstevel@tonic-gate /* 21500Sstevel@tonic-gate * return list of all replicas in set 21510Sstevel@tonic-gate */ 21520Sstevel@tonic-gate int 21530Sstevel@tonic-gate metareplicalist( 21540Sstevel@tonic-gate mdsetname_t *sp, 21550Sstevel@tonic-gate int flags, 21560Sstevel@tonic-gate md_replicalist_t **rlpp, 21570Sstevel@tonic-gate md_error_t *ep 21580Sstevel@tonic-gate ) 21590Sstevel@tonic-gate { 21600Sstevel@tonic-gate md_replicalist_t **tail = rlpp; 21610Sstevel@tonic-gate int count = 0; 21620Sstevel@tonic-gate struct mddb_config c; 21630Sstevel@tonic-gate int i; 21640Sstevel@tonic-gate char *devid; 21650Sstevel@tonic-gate 21660Sstevel@tonic-gate /* for each replica */ 21670Sstevel@tonic-gate i = 0; 21680Sstevel@tonic-gate do { 21690Sstevel@tonic-gate md_replica_t *rp; 21700Sstevel@tonic-gate 21710Sstevel@tonic-gate /* get next replica */ 21720Sstevel@tonic-gate (void) memset(&c, 0, sizeof (c)); 21730Sstevel@tonic-gate c.c_id = i; 21740Sstevel@tonic-gate c.c_setno = sp->setno; 21750Sstevel@tonic-gate 21760Sstevel@tonic-gate c.c_locator.l_devid_flags = MDDB_DEVID_GETSZ; 21770Sstevel@tonic-gate if (metaioctl(MD_DB_ENDDEV, &c, &c.c_mde, NULL) != 0) { 21780Sstevel@tonic-gate if (mdismddberror(&c.c_mde, MDE_DB_INVALID)) { 21790Sstevel@tonic-gate mdclrerror(&c.c_mde); 21800Sstevel@tonic-gate break; /* handle none at all */ 21810Sstevel@tonic-gate } 21820Sstevel@tonic-gate (void) mdstealerror(ep, &c.c_mde); 21830Sstevel@tonic-gate goto out; 21840Sstevel@tonic-gate } 21850Sstevel@tonic-gate 21860Sstevel@tonic-gate if (c.c_locator.l_devid_flags & MDDB_DEVID_SZ) { 21870Sstevel@tonic-gate if ((devid = malloc(c.c_locator.l_devid_sz)) == NULL) { 21880Sstevel@tonic-gate (void) mdsyserror(ep, ENOMEM, META_DBCONF); 21890Sstevel@tonic-gate goto out; 21900Sstevel@tonic-gate } 21910Sstevel@tonic-gate c.c_locator.l_devid = (uintptr_t)devid; 21920Sstevel@tonic-gate /* 21930Sstevel@tonic-gate * Turn on space and sz flags since 'sz' amount of 21940Sstevel@tonic-gate * space has been alloc'd. 21950Sstevel@tonic-gate */ 21960Sstevel@tonic-gate c.c_locator.l_devid_flags = 21970Sstevel@tonic-gate MDDB_DEVID_SPACE | MDDB_DEVID_SZ; 21980Sstevel@tonic-gate } 21990Sstevel@tonic-gate 22000Sstevel@tonic-gate if (metaioctl(MD_DB_ENDDEV, &c, &c.c_mde, NULL) != 0) { 22010Sstevel@tonic-gate if (mdismddberror(&c.c_mde, MDE_DB_INVALID)) { 22020Sstevel@tonic-gate mdclrerror(&c.c_mde); 22030Sstevel@tonic-gate break; /* handle none at all */ 22040Sstevel@tonic-gate } 22050Sstevel@tonic-gate (void) mdstealerror(ep, &c.c_mde); 22060Sstevel@tonic-gate goto out; 22070Sstevel@tonic-gate } 22080Sstevel@tonic-gate 22090Sstevel@tonic-gate /* 22100Sstevel@tonic-gate * Paranoid check - shouldn't happen, but is left as 22110Sstevel@tonic-gate * a place holder for changes that will be needed after 22120Sstevel@tonic-gate * dynamic reconfiguration changes are added to SVM (to 22130Sstevel@tonic-gate * support movement of disks at any point in time). 22140Sstevel@tonic-gate */ 22150Sstevel@tonic-gate if (c.c_locator.l_devid_flags & MDDB_DEVID_NOSPACE) { 22160Sstevel@tonic-gate (void) fprintf(stderr, 22170Sstevel@tonic-gate dgettext(TEXT_DOMAIN, 22180Sstevel@tonic-gate "Error: Relocation Information " 22190Sstevel@tonic-gate "(drvnm=%s, mnum=0x%lx) \n" 22200Sstevel@tonic-gate "relocation information size changed - \n" 22210Sstevel@tonic-gate "rerun command\n"), 22220Sstevel@tonic-gate c.c_locator.l_driver, c.c_locator.l_mnum); 22230Sstevel@tonic-gate (void) mderror(ep, MDE_DEVID_TOOBIG, NULL); 22240Sstevel@tonic-gate goto out; 22250Sstevel@tonic-gate } 22260Sstevel@tonic-gate 22270Sstevel@tonic-gate if (c.c_dbcnt == 0) 22280Sstevel@tonic-gate break; /* handle none at all */ 22290Sstevel@tonic-gate 22300Sstevel@tonic-gate /* get info */ 22310Sstevel@tonic-gate if ((rp = metareplicaname(sp, flags, &c, ep)) == NULL) 22320Sstevel@tonic-gate goto out; 22330Sstevel@tonic-gate 22340Sstevel@tonic-gate /* append to list */ 22350Sstevel@tonic-gate *tail = Zalloc(sizeof (**tail)); 22360Sstevel@tonic-gate (*tail)->rl_repp = rp; 22370Sstevel@tonic-gate tail = &(*tail)->rl_next; 22380Sstevel@tonic-gate ++count; 22390Sstevel@tonic-gate 22400Sstevel@tonic-gate if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) { 22410Sstevel@tonic-gate free(devid); 22420Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 22430Sstevel@tonic-gate } 22440Sstevel@tonic-gate 22450Sstevel@tonic-gate } while (++i < c.c_dbcnt); 22460Sstevel@tonic-gate 22470Sstevel@tonic-gate if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) { 22480Sstevel@tonic-gate free(devid); 22490Sstevel@tonic-gate } 22500Sstevel@tonic-gate 22510Sstevel@tonic-gate /* return count */ 22520Sstevel@tonic-gate return (count); 22530Sstevel@tonic-gate 22540Sstevel@tonic-gate /* cleanup, return error */ 22550Sstevel@tonic-gate out: 22560Sstevel@tonic-gate if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) { 22570Sstevel@tonic-gate free(devid); 22580Sstevel@tonic-gate } 22590Sstevel@tonic-gate metafreereplicalist(*rlpp); 22600Sstevel@tonic-gate *rlpp = NULL; 22610Sstevel@tonic-gate return (-1); 22620Sstevel@tonic-gate } 22630Sstevel@tonic-gate 22640Sstevel@tonic-gate /* 22650Sstevel@tonic-gate * meta_sync_db_locations - get list of replicas from kernel and write 22660Sstevel@tonic-gate * out to mddb.cf and md.conf. 'Syncs up' the replica list in 22670Sstevel@tonic-gate * the kernel with the replica list in the conf files. 22680Sstevel@tonic-gate * 22690Sstevel@tonic-gate */ 22700Sstevel@tonic-gate void 22710Sstevel@tonic-gate meta_sync_db_locations( 22720Sstevel@tonic-gate mdsetname_t *sp, 22730Sstevel@tonic-gate md_error_t *ep 22740Sstevel@tonic-gate ) 22750Sstevel@tonic-gate { 22760Sstevel@tonic-gate char *sname = 0; /* system file name */ 22770Sstevel@tonic-gate char *cname = 0; /* config file name */ 22780Sstevel@tonic-gate 22790Sstevel@tonic-gate if (!metaislocalset(sp)) 22800Sstevel@tonic-gate return; 22810Sstevel@tonic-gate 22820Sstevel@tonic-gate /* Updates backup of configuration file (aka mddb.cf) */ 22830Sstevel@tonic-gate if (buildconf(sp, ep) != 0) 22840Sstevel@tonic-gate return; 22850Sstevel@tonic-gate 22860Sstevel@tonic-gate /* Updates system configuration file (aka md.conf) */ 22870Sstevel@tonic-gate (void) meta_db_patch(sname, cname, 0, ep); 22880Sstevel@tonic-gate } 22890Sstevel@tonic-gate 22900Sstevel@tonic-gate /* 22910Sstevel@tonic-gate * setup_db_locations - parse the mddb.cf file and 22920Sstevel@tonic-gate * tells the driver which db locations to use. 22930Sstevel@tonic-gate */ 22940Sstevel@tonic-gate int 22950Sstevel@tonic-gate meta_setup_db_locations( 22960Sstevel@tonic-gate md_error_t *ep 22970Sstevel@tonic-gate ) 22980Sstevel@tonic-gate { 22990Sstevel@tonic-gate mddb_config_t c; 23000Sstevel@tonic-gate FILE *fp; 23010Sstevel@tonic-gate char inbuff[1024]; 23020Sstevel@tonic-gate char *buff; 23030Sstevel@tonic-gate uint_t i; 23040Sstevel@tonic-gate size_t sz; 23050Sstevel@tonic-gate int rval = 0; 23060Sstevel@tonic-gate char *devidp; 23070Sstevel@tonic-gate uint_t devid_size; 23080Sstevel@tonic-gate char *minor_name = NULL; 23090Sstevel@tonic-gate ddi_devid_t devid_decode; 23100Sstevel@tonic-gate int checksum; 23110Sstevel@tonic-gate 23120Sstevel@tonic-gate /* do mddb.cf file */ 23130Sstevel@tonic-gate (void) memset(&c, '\0', sizeof (c)); 23140Sstevel@tonic-gate if ((fp = fopen(META_DBCONF, "r")) == NULL) { 23150Sstevel@tonic-gate if (errno != ENOENT) 23160Sstevel@tonic-gate return (mdsyserror(ep, errno, META_DBCONF)); 23170Sstevel@tonic-gate } 23180Sstevel@tonic-gate while ((fp != NULL) && ((buff = fgets(inbuff, (sizeof (inbuff) - 1), 23190Sstevel@tonic-gate fp)) != NULL)) { 23200Sstevel@tonic-gate 23210Sstevel@tonic-gate /* ignore comments */ 23220Sstevel@tonic-gate if (*buff == '#') 23230Sstevel@tonic-gate continue; 23240Sstevel@tonic-gate 23250Sstevel@tonic-gate /* parse locator */ 23260Sstevel@tonic-gate (void) memset(&c, 0, sizeof (c)); 23270Sstevel@tonic-gate c.c_setno = MD_LOCAL_SET; 23280Sstevel@tonic-gate i = strcspn(buff, " \t"); 23290Sstevel@tonic-gate if (i > sizeof (c.c_locator.l_driver)) 23300Sstevel@tonic-gate i = sizeof (c.c_locator.l_driver); 23310Sstevel@tonic-gate (void) strncpy(c.c_locator.l_driver, buff, i); 23320Sstevel@tonic-gate buff += i; 23330Sstevel@tonic-gate c.c_locator.l_dev = 23340Sstevel@tonic-gate makedev((major_t)0, (minor_t)strtol(buff, &buff, 10)); 23350Sstevel@tonic-gate c.c_locator.l_blkno = (daddr_t)strtol(buff, &buff, 10); 23360Sstevel@tonic-gate c.c_locator.l_mnum = minor(c.c_locator.l_dev); 23370Sstevel@tonic-gate 23380Sstevel@tonic-gate /* parse out devid */ 23390Sstevel@tonic-gate while (isspace((int)(*buff))) 23400Sstevel@tonic-gate buff += 1; 23410Sstevel@tonic-gate i = strcspn(buff, " \t"); 23420Sstevel@tonic-gate if ((devidp = (char *)malloc(i+1)) == NULL) 23430Sstevel@tonic-gate return (mdsyserror(ep, ENOMEM, META_DBCONF)); 23440Sstevel@tonic-gate 23450Sstevel@tonic-gate (void) strncpy(devidp, buff, i); 23460Sstevel@tonic-gate devidp[i] = '\0'; 23470Sstevel@tonic-gate if (devid_str_decode(devidp, &devid_decode, 23480Sstevel@tonic-gate &minor_name) == -1) { 23490Sstevel@tonic-gate free(devidp); 23500Sstevel@tonic-gate continue; 23510Sstevel@tonic-gate } 23520Sstevel@tonic-gate 23530Sstevel@tonic-gate /* Conf file must have minor name associated with devid */ 23540Sstevel@tonic-gate if (minor_name == NULL) { 23550Sstevel@tonic-gate free(devidp); 23560Sstevel@tonic-gate devid_free(devid_decode); 23570Sstevel@tonic-gate continue; 23580Sstevel@tonic-gate } 23590Sstevel@tonic-gate 23600Sstevel@tonic-gate sz = devid_sizeof(devid_decode); 23610Sstevel@tonic-gate /* Copy to devid size buffer that ioctl expects */ 23620Sstevel@tonic-gate if ((c.c_locator.l_devid = (uintptr_t)malloc(sz)) == NULL) { 23630Sstevel@tonic-gate devid_free(devid_decode); 23640Sstevel@tonic-gate free(minor_name); 23650Sstevel@tonic-gate free(devidp); 23660Sstevel@tonic-gate return (mdsyserror(ep, ENOMEM, META_DBCONF)); 23670Sstevel@tonic-gate } 23680Sstevel@tonic-gate 236962Sjeanm (void) memcpy((void *)(uintptr_t)c.c_locator.l_devid, 23700Sstevel@tonic-gate (void *)devid_decode, sz); 23710Sstevel@tonic-gate 23720Sstevel@tonic-gate devid_free(devid_decode); 23730Sstevel@tonic-gate 23740Sstevel@tonic-gate if (strlen(minor_name) > MDDB_MINOR_NAME_MAX) { 23750Sstevel@tonic-gate free(minor_name); 23760Sstevel@tonic-gate free(devidp); 237762Sjeanm free((void *)(uintptr_t)c.c_locator.l_devid); 23780Sstevel@tonic-gate return (mdsyserror(ep, ENOMEM, META_DBCONF)); 23790Sstevel@tonic-gate } 23800Sstevel@tonic-gate (void) strcpy(c.c_locator.l_minor_name, minor_name); 23810Sstevel@tonic-gate free(minor_name); 23820Sstevel@tonic-gate c.c_locator.l_devid_flags = MDDB_DEVID_VALID | 23830Sstevel@tonic-gate MDDB_DEVID_SPACE | MDDB_DEVID_SZ; 23840Sstevel@tonic-gate c.c_locator.l_devid_sz = sz; 23850Sstevel@tonic-gate 23860Sstevel@tonic-gate devid_size = strlen(devidp); 23870Sstevel@tonic-gate buff += devid_size; 23880Sstevel@tonic-gate 23890Sstevel@tonic-gate checksum = strtol(buff, &buff, 10); 23900Sstevel@tonic-gate for (i = 0; c.c_locator.l_driver[i] != 0; i++) 23910Sstevel@tonic-gate checksum += c.c_locator.l_driver[i]; 23920Sstevel@tonic-gate for (i = 0; i < devid_size; i++) { 23930Sstevel@tonic-gate checksum += devidp[i]; 23940Sstevel@tonic-gate } 23950Sstevel@tonic-gate free(devidp); 23960Sstevel@tonic-gate 23970Sstevel@tonic-gate checksum += minor(c.c_locator.l_dev); 23980Sstevel@tonic-gate checksum += c.c_locator.l_blkno; 23990Sstevel@tonic-gate if (checksum != 42) { 24000Sstevel@tonic-gate /* overwritten later for more serious problems */ 24010Sstevel@tonic-gate rval = mderror(ep, MDE_MDDB_CKSUM, META_DBCONF); 240262Sjeanm free((void *)(uintptr_t)c.c_locator.l_devid); 24030Sstevel@tonic-gate continue; 24040Sstevel@tonic-gate } 24050Sstevel@tonic-gate c.c_locator.l_flags = 0; 24060Sstevel@tonic-gate 24070Sstevel@tonic-gate /* use db location */ 24080Sstevel@tonic-gate if (metaioctl(MD_DB_USEDEV, &c, &c.c_mde, NULL) != 0) { 240962Sjeanm free((void *)(uintptr_t)c.c_locator.l_devid); 24100Sstevel@tonic-gate return (mdstealerror(ep, &c.c_mde)); 24110Sstevel@tonic-gate } 24120Sstevel@tonic-gate 24130Sstevel@tonic-gate /* free up devid if in use */ 241462Sjeanm free((void *)(uintptr_t)c.c_locator.l_devid); 24150Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0; 24160Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 24170Sstevel@tonic-gate } 24180Sstevel@tonic-gate if ((fp) && (fclose(fp) != 0)) 24190Sstevel@tonic-gate return (mdsyserror(ep, errno, META_DBCONF)); 24200Sstevel@tonic-gate 24210Sstevel@tonic-gate /* check for stale database */ 24220Sstevel@tonic-gate (void) memset((char *)&c, 0, sizeof (struct mddb_config)); 24230Sstevel@tonic-gate c.c_id = 0; 24240Sstevel@tonic-gate c.c_setno = MD_LOCAL_SET; 24250Sstevel@tonic-gate 24260Sstevel@tonic-gate /* Don't need device id information from this ioctl */ 24270Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0; 24280Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 24290Sstevel@tonic-gate 24300Sstevel@tonic-gate if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) { 24310Sstevel@tonic-gate if (! mdismddberror(&c.c_mde, MDE_DB_INVALID)) 24320Sstevel@tonic-gate return (mdstealerror(ep, &c.c_mde)); 24330Sstevel@tonic-gate mdclrerror(&c.c_mde); 24340Sstevel@tonic-gate } 24350Sstevel@tonic-gate 24360Sstevel@tonic-gate if (c.c_flags & MDDB_C_STALE) 24370Sstevel@tonic-gate return (mdmddberror(ep, MDE_DB_STALE, NODEV32, MD_LOCAL_SET, 24380Sstevel@tonic-gate 0, NULL)); 24390Sstevel@tonic-gate 24400Sstevel@tonic-gate /* success */ 24410Sstevel@tonic-gate return (rval); 24420Sstevel@tonic-gate } 24430Sstevel@tonic-gate 24440Sstevel@tonic-gate /* 24450Sstevel@tonic-gate * meta_db_minreplica - returns the minimum size replica currently in use. 24460Sstevel@tonic-gate */ 24470Sstevel@tonic-gate daddr_t 24480Sstevel@tonic-gate meta_db_minreplica( 24490Sstevel@tonic-gate mdsetname_t *sp, 24500Sstevel@tonic-gate md_error_t *ep 24510Sstevel@tonic-gate ) 24520Sstevel@tonic-gate { 24530Sstevel@tonic-gate md_replica_t *r; 24540Sstevel@tonic-gate md_replicalist_t *rl, *rlp = NULL; 24550Sstevel@tonic-gate daddr_t nblks = 0; 24560Sstevel@tonic-gate 24570Sstevel@tonic-gate if (metareplicalist(sp, (MD_BASICNAME_OK | PRINT_FAST), &rlp, ep) < 0) 24580Sstevel@tonic-gate return (-1); 24590Sstevel@tonic-gate 24600Sstevel@tonic-gate if (rlp == NULL) 24610Sstevel@tonic-gate return (-1); 24620Sstevel@tonic-gate 24630Sstevel@tonic-gate /* find the smallest existing replica */ 24640Sstevel@tonic-gate for (rl = rlp; rl != NULL; rl = rl->rl_next) { 24650Sstevel@tonic-gate r = rl->rl_repp; 24660Sstevel@tonic-gate nblks = ((nblks == 0) ? r->r_nblk : min(r->r_nblk, nblks)); 24670Sstevel@tonic-gate } 24680Sstevel@tonic-gate 24690Sstevel@tonic-gate metafreereplicalist(rlp); 24700Sstevel@tonic-gate return (nblks); 24710Sstevel@tonic-gate } 24720Sstevel@tonic-gate 24730Sstevel@tonic-gate /* 24740Sstevel@tonic-gate * meta_get_replica_names 24750Sstevel@tonic-gate * returns an mdnamelist_t of replica slices 24760Sstevel@tonic-gate */ 24770Sstevel@tonic-gate /*ARGSUSED*/ 24780Sstevel@tonic-gate int 24790Sstevel@tonic-gate meta_get_replica_names( 24800Sstevel@tonic-gate mdsetname_t *sp, 24810Sstevel@tonic-gate mdnamelist_t **nlpp, 24820Sstevel@tonic-gate int options, 24830Sstevel@tonic-gate md_error_t *ep 24840Sstevel@tonic-gate ) 24850Sstevel@tonic-gate { 24860Sstevel@tonic-gate md_replicalist_t *rlp = NULL; 24870Sstevel@tonic-gate md_replicalist_t *rl; 24880Sstevel@tonic-gate mdnamelist_t **tailpp = nlpp; 24890Sstevel@tonic-gate int cnt = 0; 24900Sstevel@tonic-gate 24910Sstevel@tonic-gate assert(nlpp != NULL); 24920Sstevel@tonic-gate 24930Sstevel@tonic-gate if (!metaislocalset(sp)) 24940Sstevel@tonic-gate goto out; 24950Sstevel@tonic-gate 24960Sstevel@tonic-gate /* get replicas */ 24970Sstevel@tonic-gate if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0) { 24980Sstevel@tonic-gate cnt = -1; 24990Sstevel@tonic-gate goto out; 25000Sstevel@tonic-gate } 25010Sstevel@tonic-gate 25020Sstevel@tonic-gate /* build name list */ 25030Sstevel@tonic-gate for (rl = rlp; (rl != NULL); rl = rl->rl_next) { 25040Sstevel@tonic-gate /* 25050Sstevel@tonic-gate * Add the name struct to the end of the 25060Sstevel@tonic-gate * namelist but keep a pointer to the last 25070Sstevel@tonic-gate * element so that we don't incur the overhead 25080Sstevel@tonic-gate * of traversing the list each time 25090Sstevel@tonic-gate */ 25100Sstevel@tonic-gate tailpp = meta_namelist_append_wrapper( 25110Sstevel@tonic-gate tailpp, rl->rl_repp->r_namep); 25120Sstevel@tonic-gate ++cnt; 25130Sstevel@tonic-gate } 25140Sstevel@tonic-gate 25150Sstevel@tonic-gate /* cleanup, return count or error */ 25160Sstevel@tonic-gate out: 25170Sstevel@tonic-gate metafreereplicalist(rlp); 25180Sstevel@tonic-gate return (cnt); 25190Sstevel@tonic-gate } 2520