10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*1623Stw21770 * Common Development and Distribution License (the "License"). 6*1623Stw21770 * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 22*1623Stw21770 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 230Sstevel@tonic-gate * Use is subject to license terms. 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate 260Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 270Sstevel@tonic-gate 280Sstevel@tonic-gate /* 290Sstevel@tonic-gate * Just in case we're not in a build environment, make sure that 300Sstevel@tonic-gate * TEXT_DOMAIN gets set to something. 310Sstevel@tonic-gate */ 320Sstevel@tonic-gate #if !defined(TEXT_DOMAIN) 330Sstevel@tonic-gate #define TEXT_DOMAIN "SYS_TEST" 340Sstevel@tonic-gate #endif 350Sstevel@tonic-gate 360Sstevel@tonic-gate /* 370Sstevel@tonic-gate * Metadevice database interfaces. 380Sstevel@tonic-gate */ 390Sstevel@tonic-gate 400Sstevel@tonic-gate #define MDDB 410Sstevel@tonic-gate 420Sstevel@tonic-gate #include <meta.h> 430Sstevel@tonic-gate #include <sys/lvm/md_mddb.h> 440Sstevel@tonic-gate #include <sys/lvm/md_crc.h> 450Sstevel@tonic-gate #include <sys/lvm/mdio.h> 460Sstevel@tonic-gate #include <string.h> 470Sstevel@tonic-gate #include <strings.h> 480Sstevel@tonic-gate #include <ctype.h> 490Sstevel@tonic-gate 500Sstevel@tonic-gate struct svm_daemon { 510Sstevel@tonic-gate char *svmd_name; 520Sstevel@tonic-gate char *svmd_kill_val; 530Sstevel@tonic-gate }; 540Sstevel@tonic-gate 550Sstevel@tonic-gate struct svm_daemon svmd_kill_list[] = { 560Sstevel@tonic-gate {"mdmonitord", "HUP"}, 570Sstevel@tonic-gate {"mddoors", "KILL"}, 580Sstevel@tonic-gate }; 590Sstevel@tonic-gate 600Sstevel@tonic-gate #define DAEMON_COUNT (sizeof (svmd_kill_list)/ sizeof (struct svm_daemon)) 610Sstevel@tonic-gate #define MDMONITORD "/usr/sbin/mdmonitord" 620Sstevel@tonic-gate 630Sstevel@tonic-gate extern int procsigs(int block, sigset_t *oldsigs, md_error_t *ep); 640Sstevel@tonic-gate 650Sstevel@tonic-gate /* 660Sstevel@tonic-gate * meta_get_lb_inittime sends a request for the lb_inittime to the kernel 670Sstevel@tonic-gate */ 680Sstevel@tonic-gate md_timeval32_t 690Sstevel@tonic-gate meta_get_lb_inittime( 700Sstevel@tonic-gate mdsetname_t *sp, 710Sstevel@tonic-gate md_error_t *ep 720Sstevel@tonic-gate ) 730Sstevel@tonic-gate { 740Sstevel@tonic-gate mddb_config_t c; 750Sstevel@tonic-gate 760Sstevel@tonic-gate (void) memset(&c, 0, sizeof (c)); 770Sstevel@tonic-gate 780Sstevel@tonic-gate /* Fill in setno, setname, and sideno */ 790Sstevel@tonic-gate c.c_setno = sp->setno; 800Sstevel@tonic-gate 810Sstevel@tonic-gate if (metaioctl(MD_DB_LBINITTIME, &c, &c.c_mde, NULL) != 0) { 820Sstevel@tonic-gate (void) mdstealerror(ep, &c.c_mde); 830Sstevel@tonic-gate } 840Sstevel@tonic-gate 850Sstevel@tonic-gate return (c.c_timestamp); 860Sstevel@tonic-gate } 870Sstevel@tonic-gate 880Sstevel@tonic-gate /* 890Sstevel@tonic-gate * mkmasterblks writes out the master blocks of the mddb to the replica. 900Sstevel@tonic-gate * 910Sstevel@tonic-gate * In a MN diskset, this is called by the node that is adding this replica 920Sstevel@tonic-gate * to the diskset. 930Sstevel@tonic-gate */ 940Sstevel@tonic-gate 950Sstevel@tonic-gate #define MDDB_VERIFY_SIZE 8192 960Sstevel@tonic-gate 970Sstevel@tonic-gate static int 980Sstevel@tonic-gate mkmasterblks( 990Sstevel@tonic-gate mdsetname_t *sp, 1000Sstevel@tonic-gate mdname_t *np, 1010Sstevel@tonic-gate int fd, 1020Sstevel@tonic-gate daddr_t firstblk, 1030Sstevel@tonic-gate int dbsize, 1040Sstevel@tonic-gate md_timeval32_t inittime, 1050Sstevel@tonic-gate md_error_t *ep 1060Sstevel@tonic-gate ) 1070Sstevel@tonic-gate { 1080Sstevel@tonic-gate int consecutive; 1090Sstevel@tonic-gate md_timeval32_t tp; 1100Sstevel@tonic-gate struct mddb_mb *mb; 1110Sstevel@tonic-gate char *buffer; 1120Sstevel@tonic-gate int iosize; 1130Sstevel@tonic-gate md_set_desc *sd; 1140Sstevel@tonic-gate int mn_set = 0; 1150Sstevel@tonic-gate daddr_t startblk; 1160Sstevel@tonic-gate int cnt; 1170Sstevel@tonic-gate ddi_devid_t devid; 1180Sstevel@tonic-gate 1190Sstevel@tonic-gate if (! metaislocalset(sp)) { 1200Sstevel@tonic-gate if ((sd = metaget_setdesc(sp, ep)) == NULL) 1210Sstevel@tonic-gate return (-1); 1220Sstevel@tonic-gate 1230Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) { 1240Sstevel@tonic-gate mn_set = 1; /* Used later */ 1250Sstevel@tonic-gate } 1260Sstevel@tonic-gate } 1270Sstevel@tonic-gate 1280Sstevel@tonic-gate /* 1290Sstevel@tonic-gate * Loop to verify the entire mddb region on disk is read/writable. 1300Sstevel@tonic-gate * buffer is used to write/read in at most MDDB_VERIFY_SIZE block 1310Sstevel@tonic-gate * chunks. 1320Sstevel@tonic-gate * 1330Sstevel@tonic-gate * A side-effect of this loop is to zero out the entire mddb region 1340Sstevel@tonic-gate */ 1350Sstevel@tonic-gate if ((buffer = Zalloc(MDDB_VERIFY_SIZE * DEV_BSIZE)) == NULL) 1360Sstevel@tonic-gate return (mdsyserror(ep, ENOMEM, np->rname)); 1370Sstevel@tonic-gate 1380Sstevel@tonic-gate startblk = firstblk; 1390Sstevel@tonic-gate for (cnt = dbsize; cnt > 0; cnt -= consecutive) { 1400Sstevel@tonic-gate 1410Sstevel@tonic-gate if (cnt > MDDB_VERIFY_SIZE) 1420Sstevel@tonic-gate consecutive = MDDB_VERIFY_SIZE; 1430Sstevel@tonic-gate else 1440Sstevel@tonic-gate consecutive = cnt; 1450Sstevel@tonic-gate 1460Sstevel@tonic-gate if (lseek(fd, (off_t)(startblk * DEV_BSIZE), SEEK_SET) < 0) { 1470Sstevel@tonic-gate Free(buffer); 1480Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname)); 1490Sstevel@tonic-gate } 1500Sstevel@tonic-gate 1510Sstevel@tonic-gate iosize = DEV_BSIZE * consecutive; 1520Sstevel@tonic-gate if (write(fd, buffer, iosize) != iosize) { 1530Sstevel@tonic-gate Free(buffer); 1540Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname)); 1550Sstevel@tonic-gate } 1560Sstevel@tonic-gate 1570Sstevel@tonic-gate if (lseek(fd, (off_t)(startblk * DEV_BSIZE), SEEK_SET) < 0) { 1580Sstevel@tonic-gate Free(buffer); 1590Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname)); 1600Sstevel@tonic-gate } 1610Sstevel@tonic-gate 1620Sstevel@tonic-gate if (read(fd, buffer, iosize) != iosize) { 1630Sstevel@tonic-gate Free(buffer); 1640Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname)); 1650Sstevel@tonic-gate } 1660Sstevel@tonic-gate 1670Sstevel@tonic-gate startblk += consecutive; 1680Sstevel@tonic-gate } 1690Sstevel@tonic-gate 1700Sstevel@tonic-gate Free(buffer); 1710Sstevel@tonic-gate if ((mb = Zalloc(DEV_BSIZE)) == NULL) 1720Sstevel@tonic-gate return (mdsyserror(ep, ENOMEM, np->rname)); 1730Sstevel@tonic-gate 1740Sstevel@tonic-gate if (meta_gettimeofday(&tp) == -1) { 1750Sstevel@tonic-gate Free(mb); 1760Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname)); 1770Sstevel@tonic-gate } 1780Sstevel@tonic-gate 1790Sstevel@tonic-gate mb->mb_magic = MDDB_MAGIC_MB; 1800Sstevel@tonic-gate /* 1810Sstevel@tonic-gate * If a MN diskset, set master block revision for a MN set. 1820Sstevel@tonic-gate * Even though the master block structure is no different 1830Sstevel@tonic-gate * for a MN set, setting the revision field to a different 1840Sstevel@tonic-gate * number keeps any pre-MN_diskset code from accessing 1850Sstevel@tonic-gate * this diskset. It also allows for an early determination 1860Sstevel@tonic-gate * of a MN diskset when reading in from disk so that the 1870Sstevel@tonic-gate * proper size locator block and locator names structure 1880Sstevel@tonic-gate * can be read in thus saving time on diskset startup. 1890Sstevel@tonic-gate */ 1900Sstevel@tonic-gate if (mn_set) 1910Sstevel@tonic-gate mb->mb_revision = MDDB_REV_MNMB; 1920Sstevel@tonic-gate else 1930Sstevel@tonic-gate mb->mb_revision = MDDB_REV_MB; 1940Sstevel@tonic-gate mb->mb_timestamp = tp; 1950Sstevel@tonic-gate mb->mb_setno = sp->setno; 1960Sstevel@tonic-gate mb->mb_blkcnt = dbsize - 1; 1970Sstevel@tonic-gate mb->mb_blkno = firstblk; 1980Sstevel@tonic-gate mb->mb_nextblk = 0; 1990Sstevel@tonic-gate 2000Sstevel@tonic-gate mb->mb_blkmap.m_firstblk = firstblk + 1; 2010Sstevel@tonic-gate mb->mb_blkmap.m_consecutive = dbsize - 1; 2020Sstevel@tonic-gate if (! metaislocalset(sp)) { 2030Sstevel@tonic-gate mb->mb_setcreatetime = inittime; 2040Sstevel@tonic-gate } 2050Sstevel@tonic-gate 2060Sstevel@tonic-gate /* 2070Sstevel@tonic-gate * We try to save the disks device ID into the remaining bytes in 2080Sstevel@tonic-gate * the master block. The saved devid is used to provide a mapping 2090Sstevel@tonic-gate * between this disk's devid and the devid stored into the master 2100Sstevel@tonic-gate * block. This allows the disk image to be self-identifying 2110Sstevel@tonic-gate * if it gets copied (e.g. SNDR, True Copy, etc.). This is used 2120Sstevel@tonic-gate * when we try to import these disks on the remote copied image. 2130Sstevel@tonic-gate * If we cannot save the disks device ID onto the master block that is 2140Sstevel@tonic-gate * ok. The disk is just not self-identifying and won't be importable 2150Sstevel@tonic-gate * in the remote copy scenario. 2160Sstevel@tonic-gate */ 2170Sstevel@tonic-gate if (devid_get(fd, &devid) == 0) { 2180Sstevel@tonic-gate size_t len; 2190Sstevel@tonic-gate 2200Sstevel@tonic-gate len = devid_sizeof(devid); 2210Sstevel@tonic-gate if (len <= DEV_BSIZE - sizeof (*mb)) { 2220Sstevel@tonic-gate /* there is enough space to store the devid */ 2230Sstevel@tonic-gate mb->mb_devid_magic = MDDB_MAGIC_DE; 2240Sstevel@tonic-gate mb->mb_devid_len = len; 2250Sstevel@tonic-gate (void) memcpy(mb->mb_devid, devid, len); 2260Sstevel@tonic-gate } 2270Sstevel@tonic-gate devid_free(devid); 2280Sstevel@tonic-gate } 2290Sstevel@tonic-gate 2300Sstevel@tonic-gate crcgen((uchar_t *)mb, (uint_t *)&mb->mb_checksum, (uint_t)DEV_BSIZE, 2310Sstevel@tonic-gate (crc_skip_t *)NULL); 2320Sstevel@tonic-gate 2330Sstevel@tonic-gate if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0) { 2340Sstevel@tonic-gate Free(mb); 2350Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname)); 2360Sstevel@tonic-gate } 2370Sstevel@tonic-gate 2380Sstevel@tonic-gate if (write(fd, mb, DEV_BSIZE) != DEV_BSIZE) { 2390Sstevel@tonic-gate Free(mb); 2400Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname)); 2410Sstevel@tonic-gate } 2420Sstevel@tonic-gate 2430Sstevel@tonic-gate if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0) { 2440Sstevel@tonic-gate Free(mb); 2450Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname)); 2460Sstevel@tonic-gate } 2470Sstevel@tonic-gate 2480Sstevel@tonic-gate if (read(fd, mb, DEV_BSIZE) != DEV_BSIZE) { 2490Sstevel@tonic-gate Free(mb); 2500Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname)); 2510Sstevel@tonic-gate } 2520Sstevel@tonic-gate 2530Sstevel@tonic-gate if (crcchk((uchar_t *)mb, (uint_t *)&mb->mb_checksum, 2540Sstevel@tonic-gate (uint_t)DEV_BSIZE, (crc_skip_t *)NULL)) { 2550Sstevel@tonic-gate Free(mb); 2560Sstevel@tonic-gate return (mdmddberror(ep, MDE_NOTVERIFIED, 2570Sstevel@tonic-gate meta_getminor(np->dev), sp->setno, 0, np->rname)); 2580Sstevel@tonic-gate } 2590Sstevel@tonic-gate 2600Sstevel@tonic-gate Free(mb); 2610Sstevel@tonic-gate return (0); 2620Sstevel@tonic-gate } 2630Sstevel@tonic-gate 2640Sstevel@tonic-gate void 2650Sstevel@tonic-gate meta_mkdummymaster( 2660Sstevel@tonic-gate mdsetname_t *sp, 2670Sstevel@tonic-gate int fd, 2680Sstevel@tonic-gate daddr_t firstblk 2690Sstevel@tonic-gate ) 2700Sstevel@tonic-gate { 2710Sstevel@tonic-gate md_timeval32_t tp; 2720Sstevel@tonic-gate struct mddb_mb *mb; 2730Sstevel@tonic-gate ddi_devid_t devid; 2740Sstevel@tonic-gate md_set_desc *sd; 2750Sstevel@tonic-gate md_error_t ep = mdnullerror; 2760Sstevel@tonic-gate md_timeval32_t inittime; 2770Sstevel@tonic-gate 2780Sstevel@tonic-gate /* 2790Sstevel@tonic-gate * No dummy master blocks are written for a MN diskset since devids 2800Sstevel@tonic-gate * are not supported in MN disksets. 2810Sstevel@tonic-gate */ 2820Sstevel@tonic-gate if (! metaislocalset(sp)) { 2830Sstevel@tonic-gate if ((sd = metaget_setdesc(sp, &ep)) == NULL) 2840Sstevel@tonic-gate return; 2850Sstevel@tonic-gate 2860Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) 2870Sstevel@tonic-gate return; 2880Sstevel@tonic-gate } 2890Sstevel@tonic-gate 2900Sstevel@tonic-gate if ((mb = Zalloc(DEV_BSIZE)) == NULL) 2910Sstevel@tonic-gate return; 2920Sstevel@tonic-gate 2930Sstevel@tonic-gate mb->mb_magic = MDDB_MAGIC_DU; 2940Sstevel@tonic-gate mb->mb_revision = MDDB_REV_MB; 2950Sstevel@tonic-gate mb->mb_setno = sp->setno; 2960Sstevel@tonic-gate inittime = meta_get_lb_inittime(sp, &ep); 2970Sstevel@tonic-gate mb->mb_setcreatetime = inittime; 2980Sstevel@tonic-gate 2990Sstevel@tonic-gate if (meta_gettimeofday(&tp) != -1) 3000Sstevel@tonic-gate mb->mb_timestamp = tp; 3010Sstevel@tonic-gate 3020Sstevel@tonic-gate /* 3030Sstevel@tonic-gate * We try to save the disks device ID into the remaining bytes in 3040Sstevel@tonic-gate * the master block. This allows the disk image to be self-identifying 3050Sstevel@tonic-gate * if it gets copied (e.g. SNDR, True Copy, etc.). This is used 3060Sstevel@tonic-gate * when we try to import these disks on the remote copied image. 3070Sstevel@tonic-gate * If we cannot save the disks device ID onto the master block that is 3080Sstevel@tonic-gate * ok. The disk is just not self-identifying and won't be importable 3090Sstevel@tonic-gate * in the remote copy scenario. 3100Sstevel@tonic-gate */ 3110Sstevel@tonic-gate if (devid_get(fd, &devid) == 0) { 3120Sstevel@tonic-gate int len; 3130Sstevel@tonic-gate 3140Sstevel@tonic-gate len = devid_sizeof(devid); 3150Sstevel@tonic-gate if (len <= DEV_BSIZE - sizeof (*mb)) { 3160Sstevel@tonic-gate /* there is enough space to store the devid */ 3170Sstevel@tonic-gate mb->mb_devid_magic = MDDB_MAGIC_DE; 3180Sstevel@tonic-gate mb->mb_devid_len = len; 3190Sstevel@tonic-gate (void) memcpy(mb->mb_devid, (char *)devid, len); 3200Sstevel@tonic-gate } 3210Sstevel@tonic-gate devid_free(devid); 3220Sstevel@tonic-gate } 3230Sstevel@tonic-gate 3240Sstevel@tonic-gate crcgen((uchar_t *)mb, (uint_t *)&mb->mb_checksum, (uint_t)DEV_BSIZE, 3250Sstevel@tonic-gate (crc_skip_t *)NULL); 3260Sstevel@tonic-gate 3270Sstevel@tonic-gate /* 3280Sstevel@tonic-gate * If any of these operations fail, we need to inform the 3290Sstevel@tonic-gate * user that the disk won't be self identifying. When support 3300Sstevel@tonic-gate * for importing remotely replicated disksets is added, we 3310Sstevel@tonic-gate * want to add the error messages here. 3320Sstevel@tonic-gate */ 3330Sstevel@tonic-gate if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0) 3340Sstevel@tonic-gate goto out; 3350Sstevel@tonic-gate 3360Sstevel@tonic-gate if (write(fd, mb, DEV_BSIZE) != DEV_BSIZE) 3370Sstevel@tonic-gate goto out; 3380Sstevel@tonic-gate 3390Sstevel@tonic-gate if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0) 3400Sstevel@tonic-gate goto out; 3410Sstevel@tonic-gate 3420Sstevel@tonic-gate if (read(fd, mb, DEV_BSIZE) != DEV_BSIZE) 3430Sstevel@tonic-gate goto out; 3440Sstevel@tonic-gate 3450Sstevel@tonic-gate if (crcchk((uchar_t *)mb, (uint_t *)&mb->mb_checksum, 3460Sstevel@tonic-gate (uint_t)DEV_BSIZE, (crc_skip_t *)NULL)) 3470Sstevel@tonic-gate goto out; 3480Sstevel@tonic-gate 3490Sstevel@tonic-gate out: 3500Sstevel@tonic-gate Free(mb); 3510Sstevel@tonic-gate } 3520Sstevel@tonic-gate 3530Sstevel@tonic-gate static int 3540Sstevel@tonic-gate buildconf(mdsetname_t *sp, md_error_t *ep) 3550Sstevel@tonic-gate { 3560Sstevel@tonic-gate md_replicalist_t *rlp = NULL; 3570Sstevel@tonic-gate md_replicalist_t *rl; 3580Sstevel@tonic-gate FILE *cfp = NULL; 3590Sstevel@tonic-gate FILE *mfp = NULL; 3600Sstevel@tonic-gate struct stat sbuf; 3610Sstevel@tonic-gate int rval = 0; 3620Sstevel@tonic-gate int in_miniroot = 0; 3630Sstevel@tonic-gate char line[MDDB_BOOTLIST_MAX_LEN]; 3640Sstevel@tonic-gate char *tname = NULL; 3650Sstevel@tonic-gate 3660Sstevel@tonic-gate /* get list of local replicas */ 3670Sstevel@tonic-gate if (! metaislocalset(sp)) 3680Sstevel@tonic-gate return (0); 3690Sstevel@tonic-gate 3700Sstevel@tonic-gate if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0) 3710Sstevel@tonic-gate return (-1); 3720Sstevel@tonic-gate 3730Sstevel@tonic-gate /* open tempfile, copy permissions of original file */ 3740Sstevel@tonic-gate if ((cfp = fopen(META_DBCONFTMP, "w+")) == NULL) { 3750Sstevel@tonic-gate /* 3760Sstevel@tonic-gate * On the miniroot tmp files must be created in /var/tmp. 3770Sstevel@tonic-gate * If we get a EROFS error, we assume that we are in the 3780Sstevel@tonic-gate * miniroot. 3790Sstevel@tonic-gate */ 3800Sstevel@tonic-gate if (errno != EROFS) 3810Sstevel@tonic-gate goto error; 3820Sstevel@tonic-gate in_miniroot = 1; 3830Sstevel@tonic-gate errno = 0; 3840Sstevel@tonic-gate tname = tempnam("/var/tmp", "slvm_"); 3850Sstevel@tonic-gate if (tname == NULL && errno == EROFS) { 3860Sstevel@tonic-gate /* 3870Sstevel@tonic-gate * If we are booted on a read-only root because 3880Sstevel@tonic-gate * of mddb quorum problems we don't want to emit 3890Sstevel@tonic-gate * any scary error messages. 3900Sstevel@tonic-gate */ 3910Sstevel@tonic-gate errno = 0; 3920Sstevel@tonic-gate goto out; 3930Sstevel@tonic-gate } 3940Sstevel@tonic-gate 3950Sstevel@tonic-gate /* open tempfile, copy permissions of original file */ 3960Sstevel@tonic-gate if ((cfp = fopen(tname, "w+")) == NULL) 3970Sstevel@tonic-gate goto error; 3980Sstevel@tonic-gate } 3990Sstevel@tonic-gate if (stat(META_DBCONF, &sbuf) == 0) { 4000Sstevel@tonic-gate if (fchmod(fileno(cfp), (sbuf.st_mode & 0666)) != 0) 4010Sstevel@tonic-gate goto error; 4020Sstevel@tonic-gate if (fchown(fileno(cfp), sbuf.st_uid, sbuf.st_gid) != 0) 4030Sstevel@tonic-gate goto error; 4040Sstevel@tonic-gate } 4050Sstevel@tonic-gate 4060Sstevel@tonic-gate /* print header */ 4070Sstevel@tonic-gate if (fprintf(cfp, "#metadevice database location file ") == EOF) 4080Sstevel@tonic-gate goto error; 4090Sstevel@tonic-gate if (fprintf(cfp, "do not hand edit\n") < 0) 4100Sstevel@tonic-gate goto error; 4110Sstevel@tonic-gate if (fprintf(cfp, 4120Sstevel@tonic-gate "#driver\tminor_t\tdaddr_t\tdevice id\tchecksum\n") < 0) 4130Sstevel@tonic-gate goto error; 4140Sstevel@tonic-gate 4150Sstevel@tonic-gate /* dump replicas */ 4160Sstevel@tonic-gate for (rl = rlp; (rl != NULL); rl = rl->rl_next) { 4170Sstevel@tonic-gate md_replica_t *r = rl->rl_repp; 4180Sstevel@tonic-gate int checksum = 42; 4190Sstevel@tonic-gate int i; 4200Sstevel@tonic-gate char *devidp; 4210Sstevel@tonic-gate minor_t min; 4220Sstevel@tonic-gate 4230Sstevel@tonic-gate devidp = devid_str_encode(r->r_devid, r->r_minor_name); 4240Sstevel@tonic-gate /* If devid code can't encode devidp - skip entry */ 4250Sstevel@tonic-gate if (devidp == NULL) { 4260Sstevel@tonic-gate continue; 4270Sstevel@tonic-gate } 4280Sstevel@tonic-gate 4290Sstevel@tonic-gate /* compute checksum */ 4300Sstevel@tonic-gate for (i = 0; ((r->r_driver_name[i] != '\0') && 4310Sstevel@tonic-gate (i < sizeof (r->r_driver_name))); i++) { 4320Sstevel@tonic-gate checksum -= r->r_driver_name[i]; 4330Sstevel@tonic-gate } 4340Sstevel@tonic-gate min = meta_getminor(r->r_namep->dev); 4350Sstevel@tonic-gate checksum -= min; 4360Sstevel@tonic-gate checksum -= r->r_blkno; 4370Sstevel@tonic-gate 4380Sstevel@tonic-gate for (i = 0; i < strlen(devidp); i++) { 4390Sstevel@tonic-gate checksum -= devidp[i]; 4400Sstevel@tonic-gate } 4410Sstevel@tonic-gate /* print info */ 4420Sstevel@tonic-gate if (fprintf(cfp, "%s\t%lu\t%ld\t%s\t%d\n", 4430Sstevel@tonic-gate r->r_driver_name, min, r->r_blkno, devidp, checksum) < 0) { 4440Sstevel@tonic-gate goto error; 4450Sstevel@tonic-gate } 4460Sstevel@tonic-gate 4470Sstevel@tonic-gate devid_str_free(devidp); 4480Sstevel@tonic-gate } 4490Sstevel@tonic-gate 4500Sstevel@tonic-gate /* close and rename to real file */ 4510Sstevel@tonic-gate if (fflush(cfp) != 0) 4520Sstevel@tonic-gate goto error; 4530Sstevel@tonic-gate if (fsync(fileno(cfp)) != 0) 4540Sstevel@tonic-gate goto error; 4550Sstevel@tonic-gate if (fclose(cfp) != 0) { 4560Sstevel@tonic-gate cfp = NULL; 4570Sstevel@tonic-gate goto error; 4580Sstevel@tonic-gate } 4590Sstevel@tonic-gate cfp = NULL; 4600Sstevel@tonic-gate 4610Sstevel@tonic-gate /* 4620Sstevel@tonic-gate * Renames don't work in the miniroot since tmpfiles are 4630Sstevel@tonic-gate * created in /var/tmp. Hence we copy the data out. 4640Sstevel@tonic-gate */ 4650Sstevel@tonic-gate 4660Sstevel@tonic-gate if (! in_miniroot) { 4670Sstevel@tonic-gate if (rename(META_DBCONFTMP, META_DBCONF) != 0) 4680Sstevel@tonic-gate goto error; 4690Sstevel@tonic-gate } else { 4700Sstevel@tonic-gate if ((cfp = fopen(tname, "r")) == NULL) 4710Sstevel@tonic-gate goto error; 4720Sstevel@tonic-gate if ((mfp = fopen(META_DBCONF, "w+")) == NULL) 4730Sstevel@tonic-gate goto error; 4740Sstevel@tonic-gate while (fgets(line, MDDB_BOOTLIST_MAX_LEN, cfp) != NULL) { 4750Sstevel@tonic-gate if (fputs(line, mfp) == NULL) 4760Sstevel@tonic-gate goto error; 4770Sstevel@tonic-gate } 4780Sstevel@tonic-gate (void) fclose(cfp); 4790Sstevel@tonic-gate cfp = NULL; 4800Sstevel@tonic-gate if (fflush(mfp) != 0) 4810Sstevel@tonic-gate goto error; 4820Sstevel@tonic-gate if (fsync(fileno(mfp)) != 0) 4830Sstevel@tonic-gate goto error; 4840Sstevel@tonic-gate if (fclose(mfp) != 0) { 4850Sstevel@tonic-gate mfp = NULL; 4860Sstevel@tonic-gate goto error; 4870Sstevel@tonic-gate } 4880Sstevel@tonic-gate /* delete the tempfile */ 4890Sstevel@tonic-gate (void) unlink(tname); 4900Sstevel@tonic-gate } 4910Sstevel@tonic-gate /* success */ 4920Sstevel@tonic-gate rval = 0; 4930Sstevel@tonic-gate goto out; 4940Sstevel@tonic-gate 4950Sstevel@tonic-gate /* tempfile error */ 4960Sstevel@tonic-gate error: 4970Sstevel@tonic-gate rval = (in_miniroot) ? mdsyserror(ep, errno, tname): 4980Sstevel@tonic-gate mdsyserror(ep, errno, META_DBCONFTMP); 4990Sstevel@tonic-gate 5000Sstevel@tonic-gate 5010Sstevel@tonic-gate /* cleanup, return success */ 5020Sstevel@tonic-gate out: 5030Sstevel@tonic-gate if (rlp != NULL) 5040Sstevel@tonic-gate metafreereplicalist(rlp); 5050Sstevel@tonic-gate if ((cfp != NULL) && (fclose(cfp) != 0) && (rval == 0)) { 5060Sstevel@tonic-gate rval = (in_miniroot) ? mdsyserror(ep, errno, tname): 5070Sstevel@tonic-gate mdsyserror(ep, errno, META_DBCONFTMP); 5080Sstevel@tonic-gate } 5090Sstevel@tonic-gate free(tname); 5100Sstevel@tonic-gate return (rval); 5110Sstevel@tonic-gate } 5120Sstevel@tonic-gate 5130Sstevel@tonic-gate /* 5140Sstevel@tonic-gate * check replica for dev 5150Sstevel@tonic-gate */ 5160Sstevel@tonic-gate static int 5170Sstevel@tonic-gate in_replica( 5180Sstevel@tonic-gate mdsetname_t *sp, 5190Sstevel@tonic-gate md_replica_t *rp, 5200Sstevel@tonic-gate mdname_t *np, 5210Sstevel@tonic-gate diskaddr_t slblk, 5220Sstevel@tonic-gate diskaddr_t nblks, 5230Sstevel@tonic-gate md_error_t *ep 5240Sstevel@tonic-gate ) 5250Sstevel@tonic-gate { 5260Sstevel@tonic-gate mdname_t *repnp = rp->r_namep; 5270Sstevel@tonic-gate diskaddr_t rep_sblk = rp->r_blkno; 5280Sstevel@tonic-gate diskaddr_t rep_nblks = rp->r_nblk; 5290Sstevel@tonic-gate 5300Sstevel@tonic-gate /* should be in the same set */ 5310Sstevel@tonic-gate assert(sp != NULL); 5320Sstevel@tonic-gate 5330Sstevel@tonic-gate /* if error in master block, assume whole partition */ 5340Sstevel@tonic-gate if ((rep_sblk == MD_DISKADDR_ERROR) || 5350Sstevel@tonic-gate (rep_nblks == MD_DISKADDR_ERROR)) { 5360Sstevel@tonic-gate rep_sblk = 0; 5370Sstevel@tonic-gate rep_nblks = MD_DISKADDR_ERROR; 5380Sstevel@tonic-gate } 5390Sstevel@tonic-gate 5400Sstevel@tonic-gate /* check overlap */ 5410Sstevel@tonic-gate if (meta_check_overlap( 5420Sstevel@tonic-gate MDB_STR, np, slblk, nblks, repnp, rep_sblk, rep_nblks, ep) != 0) { 5430Sstevel@tonic-gate return (-1); 5440Sstevel@tonic-gate } 5450Sstevel@tonic-gate 5460Sstevel@tonic-gate /* return success */ 5470Sstevel@tonic-gate return (0); 5480Sstevel@tonic-gate } 5490Sstevel@tonic-gate 5500Sstevel@tonic-gate /* 5510Sstevel@tonic-gate * check to see if we're in a replica 5520Sstevel@tonic-gate */ 5530Sstevel@tonic-gate int 5540Sstevel@tonic-gate meta_check_inreplica( 5550Sstevel@tonic-gate mdsetname_t *sp, 5560Sstevel@tonic-gate mdname_t *np, 5570Sstevel@tonic-gate diskaddr_t slblk, 5580Sstevel@tonic-gate diskaddr_t nblks, 5590Sstevel@tonic-gate md_error_t *ep 5600Sstevel@tonic-gate ) 5610Sstevel@tonic-gate { 5620Sstevel@tonic-gate md_replicalist_t *rlp = NULL; 5630Sstevel@tonic-gate md_replicalist_t *rl; 5640Sstevel@tonic-gate int rval = 0; 5650Sstevel@tonic-gate 5660Sstevel@tonic-gate /* should have a set */ 5670Sstevel@tonic-gate assert(sp != NULL); 5680Sstevel@tonic-gate 5690Sstevel@tonic-gate /* for each replica */ 5700Sstevel@tonic-gate if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0) 5710Sstevel@tonic-gate return (-1); 5720Sstevel@tonic-gate for (rl = rlp; (rl != NULL); rl = rl->rl_next) { 5730Sstevel@tonic-gate md_replica_t *rp = rl->rl_repp; 5740Sstevel@tonic-gate 5750Sstevel@tonic-gate /* check replica */ 5760Sstevel@tonic-gate if (in_replica(sp, rp, np, slblk, nblks, ep) != 0) { 5770Sstevel@tonic-gate rval = -1; 5780Sstevel@tonic-gate break; 5790Sstevel@tonic-gate } 5800Sstevel@tonic-gate } 5810Sstevel@tonic-gate 5820Sstevel@tonic-gate /* cleanup, return success */ 5830Sstevel@tonic-gate metafreereplicalist(rlp); 5840Sstevel@tonic-gate return (rval); 5850Sstevel@tonic-gate } 5860Sstevel@tonic-gate 5870Sstevel@tonic-gate /* 5880Sstevel@tonic-gate * check replica 5890Sstevel@tonic-gate */ 5900Sstevel@tonic-gate int 5910Sstevel@tonic-gate meta_check_replica( 5920Sstevel@tonic-gate mdsetname_t *sp, /* set to check against */ 5930Sstevel@tonic-gate mdname_t *np, /* component to check against */ 5940Sstevel@tonic-gate mdchkopts_t options, /* option flags */ 5950Sstevel@tonic-gate diskaddr_t slblk, /* start logical block */ 5960Sstevel@tonic-gate diskaddr_t nblks, /* number of blocks (-1,rest of them) */ 5970Sstevel@tonic-gate md_error_t *ep /* error packet */ 5980Sstevel@tonic-gate ) 5990Sstevel@tonic-gate { 6000Sstevel@tonic-gate mdchkopts_t chkoptions = MDCHK_ALLOW_REPSLICE; 6010Sstevel@tonic-gate 6020Sstevel@tonic-gate /* make sure we have a disk */ 6030Sstevel@tonic-gate if (metachkcomp(np, ep) != 0) 6040Sstevel@tonic-gate return (-1); 6050Sstevel@tonic-gate 6060Sstevel@tonic-gate /* check to ensure that it is not already in use */ 6070Sstevel@tonic-gate if (meta_check_inuse(sp, np, MDCHK_INUSE, ep) != 0) { 6080Sstevel@tonic-gate return (-1); 6090Sstevel@tonic-gate } 6100Sstevel@tonic-gate 6110Sstevel@tonic-gate if (options & MDCHK_ALLOW_NODBS) 6120Sstevel@tonic-gate return (0); 6130Sstevel@tonic-gate 6140Sstevel@tonic-gate if (options & MDCHK_DRVINSET) 6150Sstevel@tonic-gate return (0); 6160Sstevel@tonic-gate 6170Sstevel@tonic-gate /* make sure it is in the set */ 6180Sstevel@tonic-gate if (meta_check_inset(sp, np, ep) != 0) 6190Sstevel@tonic-gate return (-1); 6200Sstevel@tonic-gate 6210Sstevel@tonic-gate /* make sure its not in a metadevice */ 6220Sstevel@tonic-gate if (meta_check_inmeta(sp, np, chkoptions, slblk, nblks, ep) != 0) 6230Sstevel@tonic-gate return (-1); 6240Sstevel@tonic-gate 6250Sstevel@tonic-gate /* return success */ 6260Sstevel@tonic-gate return (0); 6270Sstevel@tonic-gate } 6280Sstevel@tonic-gate 6290Sstevel@tonic-gate static int 6300Sstevel@tonic-gate update_dbinfo_on_drives( 6310Sstevel@tonic-gate mdsetname_t *sp, 6320Sstevel@tonic-gate md_drive_desc *dd, 6330Sstevel@tonic-gate int set_locked, 6340Sstevel@tonic-gate int force, 6350Sstevel@tonic-gate md_error_t *ep 6360Sstevel@tonic-gate ) 6370Sstevel@tonic-gate { 6380Sstevel@tonic-gate md_set_desc *sd; 6390Sstevel@tonic-gate int i; 6400Sstevel@tonic-gate md_setkey_t *cl_sk; 6410Sstevel@tonic-gate int rval = 0; 6420Sstevel@tonic-gate md_mnnode_desc *nd; 6430Sstevel@tonic-gate 6440Sstevel@tonic-gate if ((sd = metaget_setdesc(sp, ep)) == NULL) 6450Sstevel@tonic-gate return (-1); 6460Sstevel@tonic-gate 6470Sstevel@tonic-gate if (! set_locked) { 6480Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) { 6490Sstevel@tonic-gate md_error_t xep = mdnullerror; 6500Sstevel@tonic-gate sigset_t sigs; 6510Sstevel@tonic-gate /* Make sure we are blocking all signals */ 6520Sstevel@tonic-gate if (procsigs(TRUE, &sigs, &xep) < 0) 6530Sstevel@tonic-gate mdclrerror(&xep); 6540Sstevel@tonic-gate 6550Sstevel@tonic-gate nd = sd->sd_nodelist; 6560Sstevel@tonic-gate while (nd) { 6570Sstevel@tonic-gate if (force && strcmp(nd->nd_nodename, 6580Sstevel@tonic-gate mynode()) != 0) { 6590Sstevel@tonic-gate nd = nd->nd_next; 6600Sstevel@tonic-gate continue; 6610Sstevel@tonic-gate } 6620Sstevel@tonic-gate 6630Sstevel@tonic-gate if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) { 6640Sstevel@tonic-gate nd = nd->nd_next; 6650Sstevel@tonic-gate continue; 6660Sstevel@tonic-gate } 6670Sstevel@tonic-gate 6680Sstevel@tonic-gate if (clnt_lock_set(nd->nd_nodename, sp, ep)) 6690Sstevel@tonic-gate return (-1); 6700Sstevel@tonic-gate nd = nd->nd_next; 6710Sstevel@tonic-gate } 6720Sstevel@tonic-gate } else { 6730Sstevel@tonic-gate for (i = 0; i < MD_MAXSIDES; i++) { 6740Sstevel@tonic-gate /* Skip empty slots */ 6750Sstevel@tonic-gate if (sd->sd_nodes[i][0] == '\0') 6760Sstevel@tonic-gate continue; 6770Sstevel@tonic-gate 6780Sstevel@tonic-gate if (force && strcmp(sd->sd_nodes[i], 6790Sstevel@tonic-gate mynode()) != 0) 6800Sstevel@tonic-gate continue; 6810Sstevel@tonic-gate 6820Sstevel@tonic-gate if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) 6830Sstevel@tonic-gate return (-1); 6840Sstevel@tonic-gate } 6850Sstevel@tonic-gate } 6860Sstevel@tonic-gate } 6870Sstevel@tonic-gate 6880Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) { 6890Sstevel@tonic-gate nd = sd->sd_nodelist; 6900Sstevel@tonic-gate while (nd) { 6910Sstevel@tonic-gate if (force && strcmp(nd->nd_nodename, mynode()) != 0) { 6920Sstevel@tonic-gate nd = nd->nd_next; 6930Sstevel@tonic-gate continue; 6940Sstevel@tonic-gate } 6950Sstevel@tonic-gate 6960Sstevel@tonic-gate if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) { 6970Sstevel@tonic-gate nd = nd->nd_next; 6980Sstevel@tonic-gate continue; 6990Sstevel@tonic-gate } 7000Sstevel@tonic-gate 7010Sstevel@tonic-gate if (clnt_upd_dr_dbinfo(nd->nd_nodename, sp, dd, ep) 7020Sstevel@tonic-gate == -1) { 7030Sstevel@tonic-gate rval = -1; 7040Sstevel@tonic-gate break; 7050Sstevel@tonic-gate } 7060Sstevel@tonic-gate nd = nd->nd_next; 7070Sstevel@tonic-gate } 7080Sstevel@tonic-gate } else { 7090Sstevel@tonic-gate for (i = 0; i < MD_MAXSIDES; i++) { 7100Sstevel@tonic-gate /* Skip empty slots */ 7110Sstevel@tonic-gate if (sd->sd_nodes[i][0] == '\0') 7120Sstevel@tonic-gate continue; 7130Sstevel@tonic-gate 7140Sstevel@tonic-gate if (force && strcmp(sd->sd_nodes[i], mynode()) != 0) 7150Sstevel@tonic-gate continue; 7160Sstevel@tonic-gate 7170Sstevel@tonic-gate if (clnt_upd_dr_dbinfo(sd->sd_nodes[i], sp, dd, ep) 7180Sstevel@tonic-gate == -1) { 7190Sstevel@tonic-gate rval = -1; 7200Sstevel@tonic-gate break; 7210Sstevel@tonic-gate } 7220Sstevel@tonic-gate } 7230Sstevel@tonic-gate } 7240Sstevel@tonic-gate 7250Sstevel@tonic-gate if (! set_locked) { 7260Sstevel@tonic-gate cl_sk = cl_get_setkey(sp->setno, sp->setname); 7270Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) { 7280Sstevel@tonic-gate nd = sd->sd_nodelist; 7290Sstevel@tonic-gate while (nd) { 7300Sstevel@tonic-gate if (force && 7310Sstevel@tonic-gate strcmp(nd->nd_nodename, mynode()) != 0) { 7320Sstevel@tonic-gate nd = nd->nd_next; 7330Sstevel@tonic-gate continue; 7340Sstevel@tonic-gate } 7350Sstevel@tonic-gate 7360Sstevel@tonic-gate if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) { 7370Sstevel@tonic-gate nd = nd->nd_next; 7380Sstevel@tonic-gate continue; 7390Sstevel@tonic-gate } 7400Sstevel@tonic-gate 7410Sstevel@tonic-gate if (clnt_unlock_set(nd->nd_nodename, cl_sk, 7420Sstevel@tonic-gate ep)) { 7430Sstevel@tonic-gate rval = -1; 7440Sstevel@tonic-gate break; 7450Sstevel@tonic-gate } 7460Sstevel@tonic-gate nd = nd->nd_next; 7470Sstevel@tonic-gate } 7480Sstevel@tonic-gate } else { 7490Sstevel@tonic-gate for (i = 0; i < MD_MAXSIDES; i++) { 7500Sstevel@tonic-gate /* Skip empty slots */ 7510Sstevel@tonic-gate if (sd->sd_nodes[i][0] == '\0') 7520Sstevel@tonic-gate continue; 7530Sstevel@tonic-gate 7540Sstevel@tonic-gate if (force && 7550Sstevel@tonic-gate strcmp(sd->sd_nodes[i], mynode()) != 0) 7560Sstevel@tonic-gate continue; 7570Sstevel@tonic-gate 7580Sstevel@tonic-gate if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, 7590Sstevel@tonic-gate ep)) { 7600Sstevel@tonic-gate rval = -1; 7610Sstevel@tonic-gate break; 7620Sstevel@tonic-gate } 7630Sstevel@tonic-gate } 7640Sstevel@tonic-gate 7650Sstevel@tonic-gate } 7660Sstevel@tonic-gate cl_set_setkey(NULL); 7670Sstevel@tonic-gate } 7680Sstevel@tonic-gate 7690Sstevel@tonic-gate return (rval); 7700Sstevel@tonic-gate } 7710Sstevel@tonic-gate 7720Sstevel@tonic-gate int 7730Sstevel@tonic-gate meta_db_addsidenms( 7740Sstevel@tonic-gate mdsetname_t *sp, 7750Sstevel@tonic-gate mdname_t *np, 7760Sstevel@tonic-gate daddr_t blkno, 7770Sstevel@tonic-gate int bcast, 7780Sstevel@tonic-gate md_error_t *ep 7790Sstevel@tonic-gate ) 7800Sstevel@tonic-gate { 7810Sstevel@tonic-gate side_t sideno; 7820Sstevel@tonic-gate char *bname = NULL; 7830Sstevel@tonic-gate char *dname = NULL; 7840Sstevel@tonic-gate minor_t mnum; 7850Sstevel@tonic-gate mddb_config_t c; 7860Sstevel@tonic-gate int done; 7870Sstevel@tonic-gate int rval = 0; 7880Sstevel@tonic-gate md_set_desc *sd; 7890Sstevel@tonic-gate 7900Sstevel@tonic-gate sideno = MD_SIDEWILD; 7910Sstevel@tonic-gate /*CONSTCOND*/ 7920Sstevel@tonic-gate while (1) { 7930Sstevel@tonic-gate if (bname != NULL) { 7940Sstevel@tonic-gate Free(bname); 7950Sstevel@tonic-gate bname = NULL; 7960Sstevel@tonic-gate } 7970Sstevel@tonic-gate if (dname != NULL) { 7980Sstevel@tonic-gate Free(dname); 7990Sstevel@tonic-gate dname = NULL; 8000Sstevel@tonic-gate } 8010Sstevel@tonic-gate if ((done = meta_getnextside_devinfo(sp, np->bname, 8020Sstevel@tonic-gate &sideno, &bname, &dname, &mnum, ep)) == -1) { 8030Sstevel@tonic-gate rval = -1; 8040Sstevel@tonic-gate break; 8050Sstevel@tonic-gate } 8060Sstevel@tonic-gate 8070Sstevel@tonic-gate if (done == 0) 8080Sstevel@tonic-gate break; 8090Sstevel@tonic-gate 8100Sstevel@tonic-gate if (! metaislocalset(sp)) { 8110Sstevel@tonic-gate if ((sd = metaget_setdesc(sp, ep)) == NULL) { 8120Sstevel@tonic-gate rval = -1; 8130Sstevel@tonic-gate break; 8140Sstevel@tonic-gate } 8150Sstevel@tonic-gate } 8160Sstevel@tonic-gate 8170Sstevel@tonic-gate /* 8180Sstevel@tonic-gate * Send addsidenms to all nodes using rpc.mdcommd if 8190Sstevel@tonic-gate * sidename is being added to MN diskset. 8200Sstevel@tonic-gate * 8210Sstevel@tonic-gate * It's ok to broadcast this call to other nodes. 8220Sstevel@tonic-gate * 8230Sstevel@tonic-gate * Note: The broadcast to other nodes isn't needed during 8240Sstevel@tonic-gate * the addition of the first mddbs to the set since the 8250Sstevel@tonic-gate * other nodes haven't been joined to the set yet. All 8260Sstevel@tonic-gate * nodes in a MN diskset are (implicitly) joined to the set 8270Sstevel@tonic-gate * on the addition of the first mddb. 8280Sstevel@tonic-gate */ 8290Sstevel@tonic-gate if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) && 8300Sstevel@tonic-gate (bcast == DB_ADDSIDENMS_BCAST)) { 8310Sstevel@tonic-gate md_mn_result_t *resultp = NULL; 8320Sstevel@tonic-gate md_mn_msg_meta_db_newside_t db_ns; 8330Sstevel@tonic-gate int send_rval; 8340Sstevel@tonic-gate 8350Sstevel@tonic-gate db_ns.msg_l_dev = np->dev; 8360Sstevel@tonic-gate db_ns.msg_sideno = sideno; 8370Sstevel@tonic-gate db_ns.msg_blkno = blkno; 8380Sstevel@tonic-gate (void) strncpy(db_ns.msg_dname, dname, 8390Sstevel@tonic-gate sizeof (db_ns.msg_dname)); 8400Sstevel@tonic-gate (void) splitname(np->bname, &db_ns.msg_splitname); 8410Sstevel@tonic-gate db_ns.msg_mnum = mnum; 8420Sstevel@tonic-gate 8430Sstevel@tonic-gate /* Set devid to NULL until devids are supported */ 8440Sstevel@tonic-gate db_ns.msg_devid[0] = NULL; 8450Sstevel@tonic-gate 8460Sstevel@tonic-gate /* 8470Sstevel@tonic-gate * If reconfig cycle has been started, this node is 8480Sstevel@tonic-gate * stuck in in the return step until this command has 8490Sstevel@tonic-gate * completed. If mdcommd is suspended, ask 8500Sstevel@tonic-gate * send_message to fail (instead of retrying) 8510Sstevel@tonic-gate * so that metaset can finish allowing the reconfig 8520Sstevel@tonic-gate * cycle to proceed. 8530Sstevel@tonic-gate */ 8540Sstevel@tonic-gate send_rval = mdmn_send_message(sp->setno, 8550Sstevel@tonic-gate MD_MN_MSG_META_DB_NEWSIDE, MD_MSGF_FAIL_ON_SUSPEND | 8560Sstevel@tonic-gate MD_MSGF_PANIC_WHEN_INCONSISTENT, (char *)&db_ns, 8570Sstevel@tonic-gate sizeof (md_mn_msg_meta_db_newside_t), 8580Sstevel@tonic-gate &resultp, ep); 8590Sstevel@tonic-gate if (send_rval != 0) { 8600Sstevel@tonic-gate rval = -1; 8610Sstevel@tonic-gate if (resultp == NULL) 8620Sstevel@tonic-gate (void) mddserror(ep, 8630Sstevel@tonic-gate MDE_DS_COMMD_SEND_FAIL, 8640Sstevel@tonic-gate sp->setno, NULL, NULL, 8650Sstevel@tonic-gate sp->setname); 8660Sstevel@tonic-gate else { 8670Sstevel@tonic-gate (void) mdstealerror(ep, 8680Sstevel@tonic-gate &(resultp->mmr_ep)); 8690Sstevel@tonic-gate if (mdisok(ep)) { 8700Sstevel@tonic-gate (void) mddserror(ep, 8710Sstevel@tonic-gate MDE_DS_COMMD_SEND_FAIL, 8720Sstevel@tonic-gate sp->setno, NULL, NULL, 8730Sstevel@tonic-gate sp->setname); 8740Sstevel@tonic-gate } 8750Sstevel@tonic-gate free_result(resultp); 8760Sstevel@tonic-gate } 8770Sstevel@tonic-gate break; 8780Sstevel@tonic-gate } 8790Sstevel@tonic-gate if (resultp) 8800Sstevel@tonic-gate free_result(resultp); 8810Sstevel@tonic-gate } else { 8820Sstevel@tonic-gate /* 8830Sstevel@tonic-gate * Let this side's device name, minor # and driver name 8840Sstevel@tonic-gate * be known to the database replica. 8850Sstevel@tonic-gate */ 8860Sstevel@tonic-gate (void) memset(&c, 0, sizeof (c)); 8870Sstevel@tonic-gate 8880Sstevel@tonic-gate /* Fill in device/replica info */ 8890Sstevel@tonic-gate c.c_locator.l_dev = meta_cmpldev(np->dev); 8900Sstevel@tonic-gate c.c_locator.l_blkno = blkno; 8910Sstevel@tonic-gate (void) strncpy(c.c_locator.l_driver, dname, 8920Sstevel@tonic-gate sizeof (c.c_locator.l_driver)); 8930Sstevel@tonic-gate (void) splitname(bname, &c.c_devname); 8940Sstevel@tonic-gate c.c_locator.l_mnum = mnum; 8950Sstevel@tonic-gate 8960Sstevel@tonic-gate /* Fill in setno, setname, and sideno */ 8970Sstevel@tonic-gate c.c_setno = sp->setno; 8980Sstevel@tonic-gate (void) strncpy(c.c_setname, sp->setname, 8990Sstevel@tonic-gate sizeof (c.c_setname)); 9000Sstevel@tonic-gate c.c_sideno = sideno; 9010Sstevel@tonic-gate 9020Sstevel@tonic-gate /* 9030Sstevel@tonic-gate * Don't need device id information from this ioctl 9040Sstevel@tonic-gate * Kernel determines device id from dev_t, which 9050Sstevel@tonic-gate * is just what this code would do. 9060Sstevel@tonic-gate */ 9070Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0; 9080Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 9090Sstevel@tonic-gate 9100Sstevel@tonic-gate if (metaioctl(MD_DB_NEWSIDE, &c, &c.c_mde, NULL) != 0) { 9110Sstevel@tonic-gate rval = mdstealerror(ep, &c.c_mde); 9120Sstevel@tonic-gate break; 9130Sstevel@tonic-gate } 9140Sstevel@tonic-gate } 9150Sstevel@tonic-gate } 9160Sstevel@tonic-gate 9170Sstevel@tonic-gate /* cleanup, return success */ 9180Sstevel@tonic-gate if (bname != NULL) { 9190Sstevel@tonic-gate Free(bname); 9200Sstevel@tonic-gate bname = NULL; 9210Sstevel@tonic-gate } 9220Sstevel@tonic-gate if (dname != NULL) { 9230Sstevel@tonic-gate Free(dname); 9240Sstevel@tonic-gate dname = NULL; 9250Sstevel@tonic-gate } 9260Sstevel@tonic-gate return (rval); 9270Sstevel@tonic-gate } 9280Sstevel@tonic-gate 9290Sstevel@tonic-gate 9300Sstevel@tonic-gate int 9310Sstevel@tonic-gate meta_db_delsidenm( 9320Sstevel@tonic-gate mdsetname_t *sp, 9330Sstevel@tonic-gate side_t sideno, 9340Sstevel@tonic-gate mdname_t *np, 9350Sstevel@tonic-gate daddr_t blkno, 9360Sstevel@tonic-gate md_error_t *ep 9370Sstevel@tonic-gate ) 9380Sstevel@tonic-gate { 9390Sstevel@tonic-gate mddb_config_t c; 9400Sstevel@tonic-gate md_set_desc *sd; 9410Sstevel@tonic-gate 9420Sstevel@tonic-gate if (! metaislocalset(sp)) { 9430Sstevel@tonic-gate if ((sd = metaget_setdesc(sp, ep)) == NULL) 9440Sstevel@tonic-gate return (-1); 9450Sstevel@tonic-gate } 9460Sstevel@tonic-gate /* Use rpc.mdcommd to delete mddb side from all nodes */ 9470Sstevel@tonic-gate if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) && 9480Sstevel@tonic-gate (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) { 9490Sstevel@tonic-gate md_mn_result_t *resultp = NULL; 9500Sstevel@tonic-gate md_mn_msg_meta_db_delside_t db_ds; 9510Sstevel@tonic-gate int send_rval; 9520Sstevel@tonic-gate 9530Sstevel@tonic-gate db_ds.msg_l_dev = np->dev; 9540Sstevel@tonic-gate db_ds.msg_blkno = blkno; 9550Sstevel@tonic-gate db_ds.msg_sideno = sideno; 9560Sstevel@tonic-gate 9570Sstevel@tonic-gate /* Set devid to NULL until devids are supported */ 9580Sstevel@tonic-gate db_ds.msg_devid[0] = NULL; 9590Sstevel@tonic-gate 9600Sstevel@tonic-gate /* 9610Sstevel@tonic-gate * If reconfig cycle has been started, this node is 9620Sstevel@tonic-gate * stuck in in the return step until this command has 9630Sstevel@tonic-gate * completed. If mdcommd is suspended, ask 9640Sstevel@tonic-gate * send_message to fail (instead of retrying) 9650Sstevel@tonic-gate * so that metaset can finish allowing the reconfig 9660Sstevel@tonic-gate * cycle to proceed. 9670Sstevel@tonic-gate */ 9680Sstevel@tonic-gate send_rval = mdmn_send_message(sp->setno, 9690Sstevel@tonic-gate MD_MN_MSG_META_DB_DELSIDE, MD_MSGF_FAIL_ON_SUSPEND | 9700Sstevel@tonic-gate MD_MSGF_PANIC_WHEN_INCONSISTENT, (char *)&db_ds, 9710Sstevel@tonic-gate sizeof (md_mn_msg_meta_db_delside_t), &resultp, ep); 9720Sstevel@tonic-gate if (send_rval != 0) { 9730Sstevel@tonic-gate if (resultp == NULL) 9740Sstevel@tonic-gate (void) mddserror(ep, 9750Sstevel@tonic-gate MDE_DS_COMMD_SEND_FAIL, 9760Sstevel@tonic-gate sp->setno, NULL, NULL, 9770Sstevel@tonic-gate sp->setname); 9780Sstevel@tonic-gate else { 9790Sstevel@tonic-gate (void) mdstealerror(ep, &(resultp->mmr_ep)); 9800Sstevel@tonic-gate if (mdisok(ep)) { 9810Sstevel@tonic-gate (void) mddserror(ep, 9820Sstevel@tonic-gate MDE_DS_COMMD_SEND_FAIL, 9830Sstevel@tonic-gate sp->setno, NULL, NULL, 9840Sstevel@tonic-gate sp->setname); 9850Sstevel@tonic-gate } 9860Sstevel@tonic-gate free_result(resultp); 9870Sstevel@tonic-gate } 9880Sstevel@tonic-gate return (-1); 9890Sstevel@tonic-gate } 9900Sstevel@tonic-gate if (resultp) 9910Sstevel@tonic-gate free_result(resultp); 9920Sstevel@tonic-gate 9930Sstevel@tonic-gate } else { 9940Sstevel@tonic-gate /* 9950Sstevel@tonic-gate * Let this side's device name, minor # and driver name 9960Sstevel@tonic-gate * be known to the database replica. 9970Sstevel@tonic-gate */ 9980Sstevel@tonic-gate (void) memset(&c, 0, sizeof (c)); 9990Sstevel@tonic-gate 10000Sstevel@tonic-gate /* Fill in device/replica info */ 10010Sstevel@tonic-gate c.c_locator.l_dev = meta_cmpldev(np->dev); 10020Sstevel@tonic-gate c.c_locator.l_blkno = blkno; 10030Sstevel@tonic-gate 10040Sstevel@tonic-gate /* Fill in setno, setname, and sideno */ 10050Sstevel@tonic-gate c.c_setno = sp->setno; 10060Sstevel@tonic-gate (void) strcpy(c.c_setname, sp->setname); 10070Sstevel@tonic-gate c.c_sideno = sideno; 10080Sstevel@tonic-gate 10090Sstevel@tonic-gate /* 10100Sstevel@tonic-gate * Don't need device id information from this ioctl 10110Sstevel@tonic-gate * Kernel determines device id from dev_t, which 10120Sstevel@tonic-gate * is just what this code would do. 10130Sstevel@tonic-gate */ 10140Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0; 10150Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 10160Sstevel@tonic-gate 10170Sstevel@tonic-gate if (metaioctl(MD_DB_DELSIDE, &c, &c.c_mde, NULL) != 0) 10180Sstevel@tonic-gate return (mdstealerror(ep, &c.c_mde)); 10190Sstevel@tonic-gate } 10200Sstevel@tonic-gate return (0); 10210Sstevel@tonic-gate } 10220Sstevel@tonic-gate 10230Sstevel@tonic-gate 10240Sstevel@tonic-gate static int 10250Sstevel@tonic-gate mdnamesareunique(mdnamelist_t *nlp, md_error_t *ep) 10260Sstevel@tonic-gate { 10270Sstevel@tonic-gate mdnamelist_t *dnp1, *dnp2; 10280Sstevel@tonic-gate 10290Sstevel@tonic-gate for (dnp1 = nlp; dnp1 != NULL; dnp1 = dnp1->next) { 10300Sstevel@tonic-gate for (dnp2 = dnp1->next; dnp2 != NULL; dnp2 = dnp2->next) { 10310Sstevel@tonic-gate if (strcmp(dnp1->namep->cname, dnp2->namep->cname) == 0) 10320Sstevel@tonic-gate return (mderror(ep, MDE_DUPDRIVE, 10330Sstevel@tonic-gate dnp1->namep->cname)); 10340Sstevel@tonic-gate } 10350Sstevel@tonic-gate } 10360Sstevel@tonic-gate return (0); 10370Sstevel@tonic-gate } 10380Sstevel@tonic-gate 10390Sstevel@tonic-gate 10400Sstevel@tonic-gate /* 10410Sstevel@tonic-gate * Return 1 if files are different, else return 0 10420Sstevel@tonic-gate */ 10430Sstevel@tonic-gate static int 10440Sstevel@tonic-gate filediff(char *tsname, char *sname) 10450Sstevel@tonic-gate { 10460Sstevel@tonic-gate int ret = 1, fd; 10470Sstevel@tonic-gate size_t tsz, sz; 10480Sstevel@tonic-gate struct stat sbuf; 10490Sstevel@tonic-gate char *tbuf, *buf; 10500Sstevel@tonic-gate 10510Sstevel@tonic-gate if (stat(tsname, &sbuf) != 0) 10520Sstevel@tonic-gate return (1); 10530Sstevel@tonic-gate tsz = sbuf.st_size; 10540Sstevel@tonic-gate if (stat(sname, &sbuf) != 0) 10550Sstevel@tonic-gate return (1); 10560Sstevel@tonic-gate sz = sbuf.st_size; 10570Sstevel@tonic-gate if (tsz != sz) 10580Sstevel@tonic-gate return (1); 10590Sstevel@tonic-gate 10600Sstevel@tonic-gate /* allocate memory and read both files into buffer */ 10610Sstevel@tonic-gate tbuf = malloc(tsz); 10620Sstevel@tonic-gate buf = malloc(sz); 10630Sstevel@tonic-gate if (tbuf == NULL || buf == NULL) 10640Sstevel@tonic-gate goto out; 10650Sstevel@tonic-gate 10660Sstevel@tonic-gate fd = open(tsname, O_RDONLY); 10670Sstevel@tonic-gate if (fd == -1) 10680Sstevel@tonic-gate goto out; 10690Sstevel@tonic-gate sz = read(fd, tbuf, tsz); 10700Sstevel@tonic-gate (void) close(fd); 10710Sstevel@tonic-gate if (sz != tsz) 10720Sstevel@tonic-gate goto out; 10730Sstevel@tonic-gate 10740Sstevel@tonic-gate fd = open(sname, O_RDONLY); 10750Sstevel@tonic-gate if (fd == -1) 10760Sstevel@tonic-gate goto out; 10770Sstevel@tonic-gate sz = read(fd, buf, tsz); 10780Sstevel@tonic-gate (void) close(fd); 10790Sstevel@tonic-gate if (sz != tsz) 10800Sstevel@tonic-gate goto out; 10810Sstevel@tonic-gate 10820Sstevel@tonic-gate /* compare content */ 10830Sstevel@tonic-gate ret = bcmp(tbuf, buf, tsz); 10840Sstevel@tonic-gate out: 10850Sstevel@tonic-gate if (tbuf) 10860Sstevel@tonic-gate free(tbuf); 10870Sstevel@tonic-gate if (buf) 10880Sstevel@tonic-gate free(buf); 10890Sstevel@tonic-gate return (ret); 10900Sstevel@tonic-gate } 10910Sstevel@tonic-gate 10920Sstevel@tonic-gate /* 10930Sstevel@tonic-gate * patch md.conf file with mddb locations 10940Sstevel@tonic-gate */ 10950Sstevel@tonic-gate int 10960Sstevel@tonic-gate meta_db_patch( 10970Sstevel@tonic-gate char *sname, /* system file name */ 10980Sstevel@tonic-gate char *cname, /* mddb.cf file name */ 10990Sstevel@tonic-gate int patch, /* patching locally */ 11000Sstevel@tonic-gate md_error_t *ep 11010Sstevel@tonic-gate ) 11020Sstevel@tonic-gate { 11030Sstevel@tonic-gate char *tsname = NULL; 11040Sstevel@tonic-gate char line[MDDB_BOOTLIST_MAX_LEN]; 11050Sstevel@tonic-gate FILE *tsfp = NULL; 11060Sstevel@tonic-gate FILE *mfp = NULL; 11070Sstevel@tonic-gate int rval = -1; 11080Sstevel@tonic-gate 11090Sstevel@tonic-gate /* check names */ 11100Sstevel@tonic-gate if (sname == NULL) { 11110Sstevel@tonic-gate if (patch) 11120Sstevel@tonic-gate sname = "md.conf"; 11130Sstevel@tonic-gate else 11140Sstevel@tonic-gate sname = "/kernel/drv/md.conf"; 11150Sstevel@tonic-gate } 11160Sstevel@tonic-gate if (cname == NULL) 11170Sstevel@tonic-gate cname = META_DBCONF; 11180Sstevel@tonic-gate 11190Sstevel@tonic-gate /* 11200Sstevel@tonic-gate * edit file 11210Sstevel@tonic-gate */ 11220Sstevel@tonic-gate if (meta_systemfile_copy(sname, 0, 1, 1, 0, &tsname, &tsfp, ep) != 0) { 11230Sstevel@tonic-gate if (mdissyserror(ep, EROFS)) { 11240Sstevel@tonic-gate /* 11250Sstevel@tonic-gate * If we are booted on a read-only root because 11260Sstevel@tonic-gate * of mddb quorum problems we don't want to emit 11270Sstevel@tonic-gate * any scary error messages. 11280Sstevel@tonic-gate */ 11290Sstevel@tonic-gate mdclrerror(ep); 11300Sstevel@tonic-gate rval = 0; 11310Sstevel@tonic-gate } 11320Sstevel@tonic-gate goto out; 11330Sstevel@tonic-gate } 11340Sstevel@tonic-gate 11350Sstevel@tonic-gate if (meta_systemfile_append_mddb(cname, sname, tsname, tsfp, 1, 0, 11360Sstevel@tonic-gate ep) != 0) 11370Sstevel@tonic-gate goto out; 11380Sstevel@tonic-gate 11390Sstevel@tonic-gate /* if file content is identical, skip rename */ 11400Sstevel@tonic-gate if (filediff(tsname, sname) == 0) { 11410Sstevel@tonic-gate rval = 0; 11420Sstevel@tonic-gate goto out; 11430Sstevel@tonic-gate } 11440Sstevel@tonic-gate 11450Sstevel@tonic-gate if ((fflush(tsfp) != 0) || (fsync(fileno(tsfp)) != 0) || 11460Sstevel@tonic-gate (fclose(tsfp) != 0)) { 11470Sstevel@tonic-gate (void) mdsyserror(ep, errno, tsname); 11480Sstevel@tonic-gate goto out; 11490Sstevel@tonic-gate } 11500Sstevel@tonic-gate 11510Sstevel@tonic-gate tsfp = NULL; 11520Sstevel@tonic-gate 11530Sstevel@tonic-gate /* 11540Sstevel@tonic-gate * rename file. If we get a Cross Device error then it 11550Sstevel@tonic-gate * is because we are in the miniroot. 11560Sstevel@tonic-gate */ 11570Sstevel@tonic-gate if (rename(tsname, sname) != 0 && errno != EXDEV) { 11580Sstevel@tonic-gate (void) mdsyserror(ep, errno, sname); 11590Sstevel@tonic-gate goto out; 11600Sstevel@tonic-gate } 11610Sstevel@tonic-gate 11620Sstevel@tonic-gate if (errno == EXDEV) { 11630Sstevel@tonic-gate if ((tsfp = fopen(tsname, "r")) == NULL) 11640Sstevel@tonic-gate goto out; 11650Sstevel@tonic-gate if ((mfp = fopen(sname, "w+")) == NULL) 11660Sstevel@tonic-gate goto out; 11670Sstevel@tonic-gate while (fgets(line, sizeof (line), tsfp) != NULL) { 11680Sstevel@tonic-gate if (fputs(line, mfp) == NULL) 11690Sstevel@tonic-gate goto out; 11700Sstevel@tonic-gate } 11710Sstevel@tonic-gate (void) fclose(tsfp); 11720Sstevel@tonic-gate tsfp = NULL; 11730Sstevel@tonic-gate if (fflush(mfp) != 0) 11740Sstevel@tonic-gate goto out; 11750Sstevel@tonic-gate if (fsync(fileno(mfp)) != 0) 11760Sstevel@tonic-gate goto out; 11770Sstevel@tonic-gate if (fclose(mfp) != 0) { 11780Sstevel@tonic-gate mfp = NULL; 11790Sstevel@tonic-gate goto out; 11800Sstevel@tonic-gate } 11810Sstevel@tonic-gate } 11820Sstevel@tonic-gate 11830Sstevel@tonic-gate Free(tsname); 11840Sstevel@tonic-gate tsname = NULL; 11850Sstevel@tonic-gate rval = 0; 11860Sstevel@tonic-gate 11870Sstevel@tonic-gate /* cleanup, return error */ 11880Sstevel@tonic-gate out: 11890Sstevel@tonic-gate if (tsfp != NULL) 11900Sstevel@tonic-gate (void) fclose(tsfp); 11910Sstevel@tonic-gate if (tsname != NULL) { 11920Sstevel@tonic-gate (void) unlink(tsname); 11930Sstevel@tonic-gate Free(tsname); 11940Sstevel@tonic-gate } 11950Sstevel@tonic-gate return (rval); 11960Sstevel@tonic-gate } 11970Sstevel@tonic-gate 11980Sstevel@tonic-gate /* 11990Sstevel@tonic-gate * Add replicas to set. This happens as a result of: 12000Sstevel@tonic-gate * - metadb [-s set_name] -a 12010Sstevel@tonic-gate * - metaset -s set_name -a disk 12020Sstevel@tonic-gate * - metaset -s set_name -d disk (causes a rebalance of mddbs) 12030Sstevel@tonic-gate * - metaset -s set_name -b 12040Sstevel@tonic-gate * 12050Sstevel@tonic-gate * For a local set, this routine is run on the local set host. 12060Sstevel@tonic-gate * 12070Sstevel@tonic-gate * For a traditional diskset, this routine is run on the node that 12080Sstevel@tonic-gate * is running the metaset command. 12090Sstevel@tonic-gate * 12100Sstevel@tonic-gate * For a multinode diskset, this routine is run by the node that is 12110Sstevel@tonic-gate * running the metaset command. If this is the first mddb added to 12120Sstevel@tonic-gate * the MN diskset, then no communication is made to other nodes via commd 12130Sstevel@tonic-gate * since the other nodes will be in-sync with respect to the mddbs when 12140Sstevel@tonic-gate * those other nodes join the set and snarf in the newly created mddb. 12150Sstevel@tonic-gate * If this is not the first mddb added to the MN diskset, then this 12160Sstevel@tonic-gate * attach command is sent to all of the nodes using commd. This keeps 12170Sstevel@tonic-gate * the nodes in-sync. 12180Sstevel@tonic-gate */ 12190Sstevel@tonic-gate int 12200Sstevel@tonic-gate meta_db_attach( 12210Sstevel@tonic-gate mdsetname_t *sp, 12220Sstevel@tonic-gate mdnamelist_t *db_nlp, 12230Sstevel@tonic-gate mdchkopts_t options, 12240Sstevel@tonic-gate md_timeval32_t *timeval, 12250Sstevel@tonic-gate int dbcnt, 12260Sstevel@tonic-gate int dbsize, 12270Sstevel@tonic-gate char *sysfilename, 12280Sstevel@tonic-gate md_error_t *ep 12290Sstevel@tonic-gate ) 12300Sstevel@tonic-gate { 12310Sstevel@tonic-gate struct mddb_config c; 12320Sstevel@tonic-gate mdnamelist_t *nlp; 12330Sstevel@tonic-gate mdname_t *np; 12340Sstevel@tonic-gate md_drive_desc *dd = NULL; 12350Sstevel@tonic-gate md_drive_desc *p; 12360Sstevel@tonic-gate int i; 12370Sstevel@tonic-gate int fd; 12380Sstevel@tonic-gate side_t sideno; 12390Sstevel@tonic-gate daddr_t blkno; 12400Sstevel@tonic-gate int replicacount = 0; 12410Sstevel@tonic-gate int start_mdmonitord = 0; 12420Sstevel@tonic-gate int rval = 0; 12430Sstevel@tonic-gate md_error_t status = mdnullerror; 12440Sstevel@tonic-gate md_set_desc *sd; 12450Sstevel@tonic-gate int stale_bool = FALSE; 12460Sstevel@tonic-gate int flags; 12470Sstevel@tonic-gate int firstmddb = 1; 12480Sstevel@tonic-gate md_timeval32_t inittime = {0, 0}; 12490Sstevel@tonic-gate 12500Sstevel@tonic-gate /* 12510Sstevel@tonic-gate * Error if we don't get some work to do. 12520Sstevel@tonic-gate */ 12530Sstevel@tonic-gate if (db_nlp == NULL) 12540Sstevel@tonic-gate return (mdsyserror(ep, EINVAL, NULL)); 12550Sstevel@tonic-gate 12560Sstevel@tonic-gate if (mdnamesareunique(db_nlp, ep) != 0) 12570Sstevel@tonic-gate return (-1); 12580Sstevel@tonic-gate (void) memset(&c, 0, sizeof (c)); 12590Sstevel@tonic-gate c.c_id = 0; 12600Sstevel@tonic-gate c.c_setno = sp->setno; 12610Sstevel@tonic-gate 12620Sstevel@tonic-gate /* Don't need device id information from this ioctl */ 12630Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0; 12640Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 12650Sstevel@tonic-gate if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) { 12660Sstevel@tonic-gate if (metaislocalset(sp)) { 12670Sstevel@tonic-gate if (mdismddberror(&c.c_mde, MDE_DB_INVALID)) 12680Sstevel@tonic-gate mdclrerror(&c.c_mde); 12690Sstevel@tonic-gate else if (! mdismddberror(&c.c_mde, MDE_DB_NODB) || 12700Sstevel@tonic-gate (! (options & MDCHK_ALLOW_NODBS))) 12710Sstevel@tonic-gate return (mdstealerror(ep, &c.c_mde)); 12720Sstevel@tonic-gate } else { 12730Sstevel@tonic-gate if (! mdismddberror(&c.c_mde, MDE_DB_NOTOWNER)) 12740Sstevel@tonic-gate return (mdstealerror(ep, &c.c_mde)); 12750Sstevel@tonic-gate } 12760Sstevel@tonic-gate mdclrerror(&c.c_mde); 12770Sstevel@tonic-gate } 12780Sstevel@tonic-gate /* 12790Sstevel@tonic-gate * Is current set STALE? 12800Sstevel@tonic-gate */ 12810Sstevel@tonic-gate if (c.c_flags & MDDB_C_STALE) { 12820Sstevel@tonic-gate stale_bool = TRUE; 12830Sstevel@tonic-gate } 12840Sstevel@tonic-gate 12850Sstevel@tonic-gate assert(db_nlp != NULL); 12860Sstevel@tonic-gate 12870Sstevel@tonic-gate /* if creating the metadbs for the first time start mdmonitord */ 12880Sstevel@tonic-gate if (c.c_dbcnt == 0) 12890Sstevel@tonic-gate start_mdmonitord = 1; 12900Sstevel@tonic-gate 12910Sstevel@tonic-gate /* 12920Sstevel@tonic-gate * check to see if we will go over the total possible number 12930Sstevel@tonic-gate * of data bases 12940Sstevel@tonic-gate */ 12950Sstevel@tonic-gate nlp = db_nlp; 12960Sstevel@tonic-gate while (nlp) { 12970Sstevel@tonic-gate replicacount += dbcnt; 12980Sstevel@tonic-gate nlp = nlp->next; 12990Sstevel@tonic-gate } 13000Sstevel@tonic-gate 13010Sstevel@tonic-gate if ((replicacount + c.c_dbcnt) > c.c_dbmax) 13020Sstevel@tonic-gate return (mdmddberror(ep, MDE_TOOMANY_REPLICAS, NODEV32, 13030Sstevel@tonic-gate sp->setno, c.c_dbcnt + replicacount, NULL)); 13040Sstevel@tonic-gate 13050Sstevel@tonic-gate /* 13060Sstevel@tonic-gate * go through and check to make sure all locations specified 13070Sstevel@tonic-gate * are legal also pick out driver name; 13080Sstevel@tonic-gate */ 13090Sstevel@tonic-gate for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) { 13100Sstevel@tonic-gate diskaddr_t devsize; 13110Sstevel@tonic-gate 13120Sstevel@tonic-gate np = nlp->namep; 13130Sstevel@tonic-gate 13140Sstevel@tonic-gate if (! metaislocalset(sp)) { 13150Sstevel@tonic-gate uint_t partno; 13160Sstevel@tonic-gate uint_t rep_partno; 13170Sstevel@tonic-gate mddrivename_t *dnp = np->drivenamep; 13180Sstevel@tonic-gate 13190Sstevel@tonic-gate /* 13200Sstevel@tonic-gate * make sure that non-local database replicas 13210Sstevel@tonic-gate * are always on the replica slice. 13220Sstevel@tonic-gate */ 13230Sstevel@tonic-gate if (meta_replicaslice(dnp, 13240Sstevel@tonic-gate &rep_partno, ep) != 0) 13250Sstevel@tonic-gate return (-1); 13260Sstevel@tonic-gate if (metagetvtoc(np, FALSE, &partno, ep) == NULL) 13270Sstevel@tonic-gate return (-1); 13280Sstevel@tonic-gate if (partno != rep_partno) 13290Sstevel@tonic-gate return (mddeverror(ep, MDE_REPCOMP_ONLY, 13300Sstevel@tonic-gate np->dev, sp->setname)); 13310Sstevel@tonic-gate } 13320Sstevel@tonic-gate 13330Sstevel@tonic-gate if (meta_check_replica(sp, np, options, 0, (dbcnt * dbsize), 13340Sstevel@tonic-gate ep)) { 13350Sstevel@tonic-gate return (-1); 13360Sstevel@tonic-gate } 13370Sstevel@tonic-gate 13380Sstevel@tonic-gate if ((devsize = metagetsize(np, ep)) == -1) 13390Sstevel@tonic-gate return (-1); 13400Sstevel@tonic-gate 13410Sstevel@tonic-gate if (devsize < (diskaddr_t)((dbcnt * dbsize) + 16)) 13420Sstevel@tonic-gate return (mdmddberror(ep, MDE_REPLICA_TOOSMALL, 13430Sstevel@tonic-gate meta_getminor(np->dev), sp->setno, devsize, 13440Sstevel@tonic-gate np->cname)); 13450Sstevel@tonic-gate } 13460Sstevel@tonic-gate 13470Sstevel@tonic-gate /* 13480Sstevel@tonic-gate * If first disk in set we don't have lb_inittime yet for use as 13490Sstevel@tonic-gate * mb_setcreatetime so don't go looking for it. WE'll come back 13500Sstevel@tonic-gate * later and update after the locator block has been created. 13510Sstevel@tonic-gate * If this isn't the first disk in the set, we have a locator 13520Sstevel@tonic-gate * block and thus we have lb_inittime. Set mb_setcreatetime to 13530Sstevel@tonic-gate * lb_inittime. 13540Sstevel@tonic-gate */ 13550Sstevel@tonic-gate if (! metaislocalset(sp)) { 13560Sstevel@tonic-gate if (c.c_dbcnt != 0) { 13570Sstevel@tonic-gate firstmddb = 0; 13580Sstevel@tonic-gate inittime = meta_get_lb_inittime(sp, ep); 13590Sstevel@tonic-gate } 13600Sstevel@tonic-gate } 13610Sstevel@tonic-gate 13620Sstevel@tonic-gate /* 13630Sstevel@tonic-gate * go through and write all master blocks 13640Sstevel@tonic-gate */ 13650Sstevel@tonic-gate 13660Sstevel@tonic-gate for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) { 13670Sstevel@tonic-gate np = nlp->namep; 13680Sstevel@tonic-gate 13690Sstevel@tonic-gate if ((fd = open(np->rname, O_RDWR)) < 0) 13700Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname)); 13710Sstevel@tonic-gate 13720Sstevel@tonic-gate for (i = 0; i < dbcnt; i++) { 13730Sstevel@tonic-gate if (mkmasterblks(sp, np, fd, (i * dbsize + 16), dbsize, 13740Sstevel@tonic-gate inittime, ep)) { 13750Sstevel@tonic-gate (void) close(fd); 13760Sstevel@tonic-gate return (-1); 13770Sstevel@tonic-gate } 13780Sstevel@tonic-gate } 13790Sstevel@tonic-gate (void) close(fd); 13800Sstevel@tonic-gate } 13810Sstevel@tonic-gate 13820Sstevel@tonic-gate if ((sideno = getmyside(sp, ep)) == MD_SIDEWILD) 13830Sstevel@tonic-gate return (-1); 13840Sstevel@tonic-gate 13850Sstevel@tonic-gate if (! metaislocalset(sp)) { 13860Sstevel@tonic-gate dd = metaget_drivedesc_fromnamelist(sp, db_nlp, ep); 13870Sstevel@tonic-gate if (! mdisok(ep)) 13880Sstevel@tonic-gate return (-1); 13890Sstevel@tonic-gate if ((sd = metaget_setdesc(sp, ep)) == NULL) 13900Sstevel@tonic-gate return (-1); 13910Sstevel@tonic-gate 13920Sstevel@tonic-gate } 13930Sstevel@tonic-gate 13940Sstevel@tonic-gate /* 13950Sstevel@tonic-gate * go through and tell kernel to add them 13960Sstevel@tonic-gate */ 13970Sstevel@tonic-gate for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) { 13980Sstevel@tonic-gate mdcinfo_t *cinfo; 13990Sstevel@tonic-gate 14000Sstevel@tonic-gate np = nlp->namep; 14010Sstevel@tonic-gate 14020Sstevel@tonic-gate if ((cinfo = metagetcinfo(np, ep)) == NULL) { 14030Sstevel@tonic-gate rval = -1; 14040Sstevel@tonic-gate goto out; 14050Sstevel@tonic-gate } 14060Sstevel@tonic-gate 14070Sstevel@tonic-gate /* 14080Sstevel@tonic-gate * If mddb is being added to MN diskset and there already 14090Sstevel@tonic-gate * exists a valid mddb in the set (which equates to this 14100Sstevel@tonic-gate * node being an owner of the set) then use rpc.mdcommd 14110Sstevel@tonic-gate * mechanism to add mddb(s) so that all nodes stay in sync. 14120Sstevel@tonic-gate * If set is stale, don't log the message since rpc.mdcommd 14130Sstevel@tonic-gate * can't write the message to the mddb. 14140Sstevel@tonic-gate * 14150Sstevel@tonic-gate * Otherwise, just add mddb to this node. 14160Sstevel@tonic-gate */ 14170Sstevel@tonic-gate if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) && 14180Sstevel@tonic-gate (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) { 14190Sstevel@tonic-gate md_mn_result_t *resultp = NULL; 14200Sstevel@tonic-gate md_mn_msg_meta_db_attach_t attach; 14210Sstevel@tonic-gate int send_rval; 14220Sstevel@tonic-gate 14230Sstevel@tonic-gate /* 14240Sstevel@tonic-gate * In a scenario where new replicas had been added on 14250Sstevel@tonic-gate * the master, and then all of the old replicas failed 14260Sstevel@tonic-gate * before the slaves had knowledge of the new replicas, 14270Sstevel@tonic-gate * the slaves are unable to re-parse in the mddb 14280Sstevel@tonic-gate * from the new replicas since the slaves have no 14290Sstevel@tonic-gate * knowledge of the new replicas. The following 14300Sstevel@tonic-gate * algorithm solves this problem: 14310Sstevel@tonic-gate * - META_DB_ATTACH message generates submsgs 14320Sstevel@tonic-gate * - BLOCK parse (master) 14330Sstevel@tonic-gate * - MDDB_ATTACH new replicas 14340Sstevel@tonic-gate * - UNBLOCK parse (master) causing parse 14350Sstevel@tonic-gate * information to be sent from master 14360Sstevel@tonic-gate * to slaves at a higher class than the 14370Sstevel@tonic-gate * unblock so the parse message will 14380Sstevel@tonic-gate * reach slaves before unblock message. 14390Sstevel@tonic-gate */ 14400Sstevel@tonic-gate attach.msg_l_dev = np->dev; 14410Sstevel@tonic-gate attach.msg_cnt = dbcnt; 14420Sstevel@tonic-gate attach.msg_dbsize = dbsize; 14430Sstevel@tonic-gate (void) strncpy(attach.msg_dname, cinfo->dname, 14440Sstevel@tonic-gate sizeof (attach.msg_dname)); 14450Sstevel@tonic-gate (void) splitname(np->bname, &attach.msg_splitname); 14460Sstevel@tonic-gate attach.msg_options = options; 14470Sstevel@tonic-gate 14480Sstevel@tonic-gate /* Set devid to NULL until devids are supported */ 14490Sstevel@tonic-gate attach.msg_devid[0] = NULL; 14500Sstevel@tonic-gate 14510Sstevel@tonic-gate /* 14520Sstevel@tonic-gate * If reconfig cycle has been started, this node is 14530Sstevel@tonic-gate * stuck in in the return step until this command has 14540Sstevel@tonic-gate * completed. If mdcommd is suspended, ask 14550Sstevel@tonic-gate * send_message to fail (instead of retrying) 14560Sstevel@tonic-gate * so that metaset can finish allowing the reconfig 14570Sstevel@tonic-gate * cycle to proceed. 14580Sstevel@tonic-gate */ 14590Sstevel@tonic-gate flags = MD_MSGF_FAIL_ON_SUSPEND; 14600Sstevel@tonic-gate if (stale_bool == TRUE) 14610Sstevel@tonic-gate flags |= MD_MSGF_NO_LOG; 14620Sstevel@tonic-gate send_rval = mdmn_send_message(sp->setno, 14630Sstevel@tonic-gate MD_MN_MSG_META_DB_ATTACH, 14640Sstevel@tonic-gate flags, (char *)&attach, 14650Sstevel@tonic-gate sizeof (md_mn_msg_meta_db_attach_t), 14660Sstevel@tonic-gate &resultp, ep); 14670Sstevel@tonic-gate if (send_rval != 0) { 14680Sstevel@tonic-gate rval = -1; 14690Sstevel@tonic-gate if (resultp == NULL) 14700Sstevel@tonic-gate (void) mddserror(ep, 14710Sstevel@tonic-gate MDE_DS_COMMD_SEND_FAIL, 14720Sstevel@tonic-gate sp->setno, NULL, NULL, 14730Sstevel@tonic-gate sp->setname); 14740Sstevel@tonic-gate else { 14750Sstevel@tonic-gate (void) mdstealerror(ep, 14760Sstevel@tonic-gate &(resultp->mmr_ep)); 14770Sstevel@tonic-gate if (mdisok(ep)) { 14780Sstevel@tonic-gate (void) mddserror(ep, 14790Sstevel@tonic-gate MDE_DS_COMMD_SEND_FAIL, 14800Sstevel@tonic-gate sp->setno, NULL, NULL, 14810Sstevel@tonic-gate sp->setname); 14820Sstevel@tonic-gate } 14830Sstevel@tonic-gate free_result(resultp); 14840Sstevel@tonic-gate } 14850Sstevel@tonic-gate goto out; 14860Sstevel@tonic-gate } 14870Sstevel@tonic-gate if (resultp) 14880Sstevel@tonic-gate free_result(resultp); 14890Sstevel@tonic-gate } else { 14900Sstevel@tonic-gate /* Adding mddb(s) to just this node */ 14910Sstevel@tonic-gate for (i = 0; i < dbcnt; i++) { 14920Sstevel@tonic-gate (void) memset(&c, 0, sizeof (c)); 14930Sstevel@tonic-gate /* Fill in device/replica info */ 14940Sstevel@tonic-gate c.c_locator.l_dev = meta_cmpldev(np->dev); 14950Sstevel@tonic-gate c.c_locator.l_blkno = i * dbsize + 16; 14960Sstevel@tonic-gate blkno = c.c_locator.l_blkno; 14970Sstevel@tonic-gate (void) strncpy(c.c_locator.l_driver, cinfo->dname, 14980Sstevel@tonic-gate sizeof (c.c_locator.l_driver)); 14990Sstevel@tonic-gate (void) splitname(np->bname, &c.c_devname); 15000Sstevel@tonic-gate c.c_locator.l_mnum = meta_getminor(np->dev); 15010Sstevel@tonic-gate 15020Sstevel@tonic-gate /* Fill in setno, setname, and sideno */ 15030Sstevel@tonic-gate c.c_setno = sp->setno; 15040Sstevel@tonic-gate if (! metaislocalset(sp)) { 15050Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) { 15060Sstevel@tonic-gate c.c_multi_node = 1; 15070Sstevel@tonic-gate } 15080Sstevel@tonic-gate } 15090Sstevel@tonic-gate (void) strcpy(c.c_setname, sp->setname); 15100Sstevel@tonic-gate c.c_sideno = sideno; 15110Sstevel@tonic-gate 15120Sstevel@tonic-gate /* 15130Sstevel@tonic-gate * Don't need device id information from this ioctl 15140Sstevel@tonic-gate * Kernel determines device id from dev_t, which 15150Sstevel@tonic-gate * is just what this code would do. 15160Sstevel@tonic-gate */ 15170Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0; 15180Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 15190Sstevel@tonic-gate 15200Sstevel@tonic-gate if (timeval != NULL) 15210Sstevel@tonic-gate c.c_timestamp = *timeval; 15220Sstevel@tonic-gate 15230Sstevel@tonic-gate if (setup_med_cfg(sp, &c, (options & MDCHK_SET_FORCE), 15240Sstevel@tonic-gate ep)) { 15250Sstevel@tonic-gate rval = -1; 15260Sstevel@tonic-gate goto out; 15270Sstevel@tonic-gate } 15280Sstevel@tonic-gate 15290Sstevel@tonic-gate if (metaioctl(MD_DB_NEWDEV, &c, &c.c_mde, NULL) != 0) { 15300Sstevel@tonic-gate rval = mdstealerror(ep, &c.c_mde); 15310Sstevel@tonic-gate goto out; 15320Sstevel@tonic-gate } 15330Sstevel@tonic-gate /* 15340Sstevel@tonic-gate * This is either a traditional diskset OR this 15350Sstevel@tonic-gate * is the first replica added to a MN diskset. 15360Sstevel@tonic-gate * In either case, set broadcast to NO_BCAST so 15370Sstevel@tonic-gate * that message won't go through rpc.mdcommd. 15380Sstevel@tonic-gate * If this is a traditional diskset, the bcast 15390Sstevel@tonic-gate * flag is ignored since traditional disksets 15400Sstevel@tonic-gate * don't use the rpc.mdcommd. 15410Sstevel@tonic-gate */ 15420Sstevel@tonic-gate if (meta_db_addsidenms(sp, np, blkno, 15430Sstevel@tonic-gate DB_ADDSIDENMS_NO_BCAST, ep)) 15440Sstevel@tonic-gate goto out; 15450Sstevel@tonic-gate } 15460Sstevel@tonic-gate } 15470Sstevel@tonic-gate if (! metaislocalset(sp)) { 15480Sstevel@tonic-gate /* update the dbcnt and size in dd */ 15490Sstevel@tonic-gate for (p = dd; p != NULL; p = p->dd_next) 15500Sstevel@tonic-gate if (p->dd_dnp == np->drivenamep) { 15510Sstevel@tonic-gate p->dd_dbcnt = dbcnt; 15520Sstevel@tonic-gate p->dd_dbsize = dbsize; 15530Sstevel@tonic-gate break; 15540Sstevel@tonic-gate } 15550Sstevel@tonic-gate } 15560Sstevel@tonic-gate 15570Sstevel@tonic-gate /* 15580Sstevel@tonic-gate * If this was the first addition of disks to the 15590Sstevel@tonic-gate * diskset you now need to update the mb_setcreatetime 15600Sstevel@tonic-gate * which needed lb_inittime which wasn't there until now. 15610Sstevel@tonic-gate */ 15620Sstevel@tonic-gate if (firstmddb) { 15630Sstevel@tonic-gate if (meta_update_mb(sp, dd, ep) != 0) { 15640Sstevel@tonic-gate return (-1); 15650Sstevel@tonic-gate } 15660Sstevel@tonic-gate } 15670Sstevel@tonic-gate (void) close(fd); 15680Sstevel@tonic-gate } 15690Sstevel@tonic-gate 15700Sstevel@tonic-gate out: 15710Sstevel@tonic-gate if (metaislocalset(sp)) { 15720Sstevel@tonic-gate 15730Sstevel@tonic-gate /* everything looks fine. Start mdmonitord */ 15740Sstevel@tonic-gate /* Note: popen/pclose is the MT-safe replacement for system */ 15750Sstevel@tonic-gate if (rval == 0 && start_mdmonitord == 1) { 15760Sstevel@tonic-gate if (pclose(popen(MDMONITORD, "w")) == -1) 15770Sstevel@tonic-gate md_perror(MDMONITORD); 15780Sstevel@tonic-gate 15790Sstevel@tonic-gate if (meta_smf_enable(META_SMF_CORE, &status) == -1) { 15800Sstevel@tonic-gate mde_perror(&status, ""); 15810Sstevel@tonic-gate mdclrerror(&status); 15820Sstevel@tonic-gate } 15830Sstevel@tonic-gate } 15840Sstevel@tonic-gate 15850Sstevel@tonic-gate if (buildconf(sp, &status)) { 15860Sstevel@tonic-gate /* Don't mask any previous errors */ 15870Sstevel@tonic-gate if (rval == 0) 15880Sstevel@tonic-gate rval = mdstealerror(ep, &status); 15890Sstevel@tonic-gate return (rval); 15900Sstevel@tonic-gate } 15910Sstevel@tonic-gate 15920Sstevel@tonic-gate if (meta_db_patch(sysfilename, NULL, 0, &status)) { 15930Sstevel@tonic-gate /* Don't mask any previous errors */ 15940Sstevel@tonic-gate if (rval == 0) 15950Sstevel@tonic-gate rval = mdstealerror(ep, &status); 15960Sstevel@tonic-gate } 15970Sstevel@tonic-gate } else { 15980Sstevel@tonic-gate if (update_dbinfo_on_drives(sp, dd, 15990Sstevel@tonic-gate (options & MDCHK_SET_LOCKED), 16000Sstevel@tonic-gate (options & MDCHK_SET_FORCE), 16010Sstevel@tonic-gate &status)) { 16020Sstevel@tonic-gate /* Don't mask any previous errors */ 16030Sstevel@tonic-gate if (rval == 0) 16040Sstevel@tonic-gate rval = mdstealerror(ep, &status); 16050Sstevel@tonic-gate else 16060Sstevel@tonic-gate mdclrerror(&status); 16070Sstevel@tonic-gate } 16080Sstevel@tonic-gate metafreedrivedesc(&dd); 16090Sstevel@tonic-gate } 16100Sstevel@tonic-gate /* 16110Sstevel@tonic-gate * For MN disksets that already had already had nodes joined 16120Sstevel@tonic-gate * before the attach of this mddb(s), the name invalidation is 16130Sstevel@tonic-gate * done by the commd handler routine. Otherwise, if this 16140Sstevel@tonic-gate * is the first attach of a MN diskset mddb, the invalidation 16150Sstevel@tonic-gate * must be done here since the first attach cannot be sent 16160Sstevel@tonic-gate * via the commd since there are no nodes joined to the set yet. 16170Sstevel@tonic-gate */ 16180Sstevel@tonic-gate if ((metaislocalset(sp)) || (!MD_MNSET_DESC(sd)) || 16190Sstevel@tonic-gate (MD_MNSET_DESC(sd) && 16200Sstevel@tonic-gate (!(sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)))) { 16210Sstevel@tonic-gate for (nlp = db_nlp; (nlp != NULL); nlp = nlp->next) { 16220Sstevel@tonic-gate meta_invalidate_name(nlp->namep); 16230Sstevel@tonic-gate } 16240Sstevel@tonic-gate } 16250Sstevel@tonic-gate return (rval); 16260Sstevel@tonic-gate } 16270Sstevel@tonic-gate 16280Sstevel@tonic-gate /* 16290Sstevel@tonic-gate * deletelist_length 16300Sstevel@tonic-gate * 16310Sstevel@tonic-gate * return the number of slices that have been specified for deletion 16320Sstevel@tonic-gate * on the metadb command line. This does not calculate the number 16330Sstevel@tonic-gate * of replicas because there may be multiple replicas per slice. 16340Sstevel@tonic-gate */ 16350Sstevel@tonic-gate static int 16360Sstevel@tonic-gate deletelist_length(mdnamelist_t *db_nlp) 16370Sstevel@tonic-gate { 16380Sstevel@tonic-gate 16390Sstevel@tonic-gate mdnamelist_t *nlp; 16400Sstevel@tonic-gate int list_length = 0; 16410Sstevel@tonic-gate 16420Sstevel@tonic-gate for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) { 16430Sstevel@tonic-gate list_length++; 16440Sstevel@tonic-gate } 16450Sstevel@tonic-gate 16460Sstevel@tonic-gate return (list_length); 16470Sstevel@tonic-gate } 16480Sstevel@tonic-gate 16490Sstevel@tonic-gate static int 16500Sstevel@tonic-gate in_deletelist(char *devname, mdnamelist_t *db_nlp) 16510Sstevel@tonic-gate { 16520Sstevel@tonic-gate 16530Sstevel@tonic-gate mdnamelist_t *nlp; 16540Sstevel@tonic-gate mdname_t *np; 16550Sstevel@tonic-gate int index = 0; 16560Sstevel@tonic-gate 16570Sstevel@tonic-gate for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) { 16580Sstevel@tonic-gate np = nlp->namep; 16590Sstevel@tonic-gate 16600Sstevel@tonic-gate if (strcmp(devname, np->bname) == 0) 16610Sstevel@tonic-gate return (index); 16620Sstevel@tonic-gate index++; 16630Sstevel@tonic-gate } 16640Sstevel@tonic-gate 16650Sstevel@tonic-gate return (-1); 16660Sstevel@tonic-gate } 16670Sstevel@tonic-gate 16680Sstevel@tonic-gate /* 16690Sstevel@tonic-gate * Delete replicas from set. This happens as a result of: 16700Sstevel@tonic-gate * - metadb [-s set_name] -d 16710Sstevel@tonic-gate * - metaset -s set_name -a disk (causes a rebalance of mddbs) 16720Sstevel@tonic-gate * - metaset -s set_name -d disk 16730Sstevel@tonic-gate * - metaset -s set_name -b 16740Sstevel@tonic-gate * 16750Sstevel@tonic-gate * For a local set, this routine is run on the local set host. 16760Sstevel@tonic-gate * 16770Sstevel@tonic-gate * For a traditional diskset, this routine is run on the node that 16780Sstevel@tonic-gate * is running the metaset command. 16790Sstevel@tonic-gate * 16800Sstevel@tonic-gate * For a multinode diskset, this routine is run by the node that is 16810Sstevel@tonic-gate * running the metaset command. This detach routine is sent to all 16820Sstevel@tonic-gate * of the joined nodes in the diskset using commd. This keeps 16830Sstevel@tonic-gate * the nodes in-sync. 16840Sstevel@tonic-gate */ 16850Sstevel@tonic-gate int 16860Sstevel@tonic-gate meta_db_detach( 16870Sstevel@tonic-gate mdsetname_t *sp, 16880Sstevel@tonic-gate mdnamelist_t *db_nlp, 16890Sstevel@tonic-gate mdforceopts_t force_option, 16900Sstevel@tonic-gate char *sysfilename, 16910Sstevel@tonic-gate md_error_t *ep 16920Sstevel@tonic-gate ) 16930Sstevel@tonic-gate { 16940Sstevel@tonic-gate struct mddb_config c; 16950Sstevel@tonic-gate mdnamelist_t *nlp; 16960Sstevel@tonic-gate mdname_t *np; 16970Sstevel@tonic-gate md_drive_desc *dd = NULL; 16980Sstevel@tonic-gate md_drive_desc *p; 16990Sstevel@tonic-gate int replicacount; 17000Sstevel@tonic-gate int replica_delete_count; 17010Sstevel@tonic-gate int nr_replica_slices; 17020Sstevel@tonic-gate int i; 17030Sstevel@tonic-gate int stop_svmdaemons = 0; 17040Sstevel@tonic-gate int rval = 0; 17050Sstevel@tonic-gate int index; 17060Sstevel@tonic-gate int valid_replicas_nottodelete = 0; 17070Sstevel@tonic-gate int invalid_replicas_nottodelete = 0; 17080Sstevel@tonic-gate int invalid_replicas_todelete = 0; 17090Sstevel@tonic-gate int errored = 0; 17100Sstevel@tonic-gate int *tag_array; 17110Sstevel@tonic-gate int fd = -1; 17120Sstevel@tonic-gate md_error_t status = mdnullerror; 17130Sstevel@tonic-gate md_set_desc *sd; 17140Sstevel@tonic-gate int stale_bool = FALSE; 17150Sstevel@tonic-gate int flags; 17160Sstevel@tonic-gate 17170Sstevel@tonic-gate /* 17180Sstevel@tonic-gate * Error if we don't get some work to do. 17190Sstevel@tonic-gate */ 17200Sstevel@tonic-gate if (db_nlp == NULL) 17210Sstevel@tonic-gate return (mdsyserror(ep, EINVAL, NULL)); 17220Sstevel@tonic-gate 17230Sstevel@tonic-gate if (mdnamesareunique(db_nlp, ep) != 0) 17240Sstevel@tonic-gate return (-1); 17250Sstevel@tonic-gate 17260Sstevel@tonic-gate (void) memset(&c, 0, sizeof (c)); 17270Sstevel@tonic-gate c.c_id = 0; 17280Sstevel@tonic-gate c.c_setno = sp->setno; 17290Sstevel@tonic-gate 17300Sstevel@tonic-gate /* Don't need device id information from this ioctl */ 17310Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0; 17320Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 17330Sstevel@tonic-gate 17340Sstevel@tonic-gate if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) 17350Sstevel@tonic-gate return (mdstealerror(ep, &c.c_mde)); 17360Sstevel@tonic-gate 17370Sstevel@tonic-gate /* 17380Sstevel@tonic-gate * Is current set STALE? 17390Sstevel@tonic-gate */ 17400Sstevel@tonic-gate if (c.c_flags & MDDB_C_STALE) { 17410Sstevel@tonic-gate stale_bool = TRUE; 17420Sstevel@tonic-gate } 17430Sstevel@tonic-gate 17440Sstevel@tonic-gate replicacount = c.c_dbcnt; 17450Sstevel@tonic-gate 17460Sstevel@tonic-gate assert(db_nlp != NULL); 17470Sstevel@tonic-gate 17480Sstevel@tonic-gate /* 17490Sstevel@tonic-gate * go through and gather how many data bases are on each 17500Sstevel@tonic-gate * device specified. 17510Sstevel@tonic-gate */ 17520Sstevel@tonic-gate 17530Sstevel@tonic-gate nr_replica_slices = deletelist_length(db_nlp); 17540Sstevel@tonic-gate tag_array = (int *)calloc(nr_replica_slices, sizeof (int)); 17550Sstevel@tonic-gate 17560Sstevel@tonic-gate replica_delete_count = 0; 17570Sstevel@tonic-gate for (i = 0; i < replicacount; i++) { 17580Sstevel@tonic-gate char *devname; 17590Sstevel@tonic-gate int found = 0; 17600Sstevel@tonic-gate 17610Sstevel@tonic-gate c.c_id = i; 17620Sstevel@tonic-gate 17630Sstevel@tonic-gate /* Don't need device id information from this ioctl */ 17640Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0; 17650Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 17660Sstevel@tonic-gate 17670Sstevel@tonic-gate if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) 17680Sstevel@tonic-gate return (mdstealerror(ep, &c.c_mde)); 17690Sstevel@tonic-gate 17700Sstevel@tonic-gate devname = splicename(&c.c_devname); 17710Sstevel@tonic-gate 17720Sstevel@tonic-gate if ((index = in_deletelist(devname, db_nlp)) != -1) { 17730Sstevel@tonic-gate found = 1; 17740Sstevel@tonic-gate tag_array[index] = 1; 17750Sstevel@tonic-gate replica_delete_count++; 17760Sstevel@tonic-gate } 17770Sstevel@tonic-gate 17780Sstevel@tonic-gate errored = c.c_locator.l_flags & (MDDB_F_EREAD | 17790Sstevel@tonic-gate MDDB_F_EWRITE | MDDB_F_TOOSMALL | 17800Sstevel@tonic-gate MDDB_F_EFMT | MDDB_F_EDATA | 17810Sstevel@tonic-gate MDDB_F_EMASTER); 17820Sstevel@tonic-gate 17830Sstevel@tonic-gate /* 17840Sstevel@tonic-gate * There are four combinations of "errored" and "found" 17850Sstevel@tonic-gate * and they are used to find the number of 17860Sstevel@tonic-gate * (a) valid/invalid replicas that are not in the delete 17870Sstevel@tonic-gate * list and are available in the system. 17880Sstevel@tonic-gate * (b) valid/invalid replicas that are to be deleted. 17890Sstevel@tonic-gate */ 17900Sstevel@tonic-gate 17910Sstevel@tonic-gate if (errored && !found) /* errored and !found */ 17920Sstevel@tonic-gate invalid_replicas_nottodelete++; 17930Sstevel@tonic-gate else if (!found) /* !errored and !found */ 17940Sstevel@tonic-gate valid_replicas_nottodelete++; 17950Sstevel@tonic-gate else if (errored) /* errored and found */ 17960Sstevel@tonic-gate invalid_replicas_todelete++; 17970Sstevel@tonic-gate /* 17980Sstevel@tonic-gate * else it is !errored and found. This means 17990Sstevel@tonic-gate * valid_replicas_todelete++; But this variable will not 18000Sstevel@tonic-gate * be used anywhere 18010Sstevel@tonic-gate */ 18020Sstevel@tonic-gate 18030Sstevel@tonic-gate Free(devname); 18040Sstevel@tonic-gate } 18050Sstevel@tonic-gate 18060Sstevel@tonic-gate index = 0; 18070Sstevel@tonic-gate for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) { 18080Sstevel@tonic-gate np = nlp->namep; 18090Sstevel@tonic-gate if (tag_array[index++] != 1) { 18100Sstevel@tonic-gate Free(tag_array); 18110Sstevel@tonic-gate return (mddeverror(ep, MDE_NO_DB, np->dev, np->cname)); 18120Sstevel@tonic-gate } 18130Sstevel@tonic-gate } 18140Sstevel@tonic-gate 18150Sstevel@tonic-gate Free(tag_array); 18160Sstevel@tonic-gate 18170Sstevel@tonic-gate 18180Sstevel@tonic-gate /* if all replicas are deleted stop mdmonitord */ 18190Sstevel@tonic-gate if ((replicacount - replica_delete_count) == 0) 18200Sstevel@tonic-gate stop_svmdaemons = 1; 18210Sstevel@tonic-gate 18220Sstevel@tonic-gate if (((replicacount - replica_delete_count) < MD_MINREPLICAS)) { 18230Sstevel@tonic-gate if (force_option & MDFORCE_NONE) 18240Sstevel@tonic-gate return (mderror(ep, MDE_NOTENOUGH_DB, sp->setname)); 18250Sstevel@tonic-gate if (! metaislocalset(sp) && ! (force_option & MDFORCE_DS)) 18260Sstevel@tonic-gate return (mderror(ep, MDE_DELDB_NOTALLOWED, sp->setname)); 18270Sstevel@tonic-gate } 18280Sstevel@tonic-gate 18290Sstevel@tonic-gate /* 18300Sstevel@tonic-gate * The following algorithms are followed to check for deletion: 18310Sstevel@tonic-gate * (a) If the delete list(db_nlp) has all invalid replicas and no valid 18320Sstevel@tonic-gate * replicas, then deletion should be allowed. 18330Sstevel@tonic-gate * (b) Deletion should be allowed only if valid replicas that are "not" 18340Sstevel@tonic-gate * to be deleted is always greater than the invalid replicas that 18350Sstevel@tonic-gate * are "not" to be deleted. 18360Sstevel@tonic-gate * (c) If the user uses -f option, then deletion should be allowed. 18370Sstevel@tonic-gate */ 18380Sstevel@tonic-gate 18390Sstevel@tonic-gate if ((invalid_replicas_todelete != replica_delete_count) && 18400Sstevel@tonic-gate (invalid_replicas_nottodelete > valid_replicas_nottodelete) && 18410Sstevel@tonic-gate (force_option != MDFORCE_LOCAL)) 18420Sstevel@tonic-gate return (mderror(ep, MDE_DEL_VALIDDB_NOTALLOWED, sp->setname)); 18430Sstevel@tonic-gate 18440Sstevel@tonic-gate /* 18450Sstevel@tonic-gate * go through and tell kernel to delete them 18460Sstevel@tonic-gate */ 18470Sstevel@tonic-gate 18480Sstevel@tonic-gate /* Don't need device id information from this ioctl */ 18490Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0; 18500Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 18510Sstevel@tonic-gate 18520Sstevel@tonic-gate if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) 18530Sstevel@tonic-gate return (mdstealerror(ep, &c.c_mde)); 18540Sstevel@tonic-gate 18550Sstevel@tonic-gate if (! metaislocalset(sp)) { 18560Sstevel@tonic-gate dd = metaget_drivedesc_fromnamelist(sp, db_nlp, ep); 18570Sstevel@tonic-gate if (! mdisok(ep)) 18580Sstevel@tonic-gate return (-1); 18590Sstevel@tonic-gate if ((sd = metaget_setdesc(sp, ep)) == NULL) 18600Sstevel@tonic-gate return (-1); 18610Sstevel@tonic-gate } 18620Sstevel@tonic-gate 18630Sstevel@tonic-gate for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) { 18640Sstevel@tonic-gate np = nlp->namep; 18650Sstevel@tonic-gate 18660Sstevel@tonic-gate /* 18670Sstevel@tonic-gate * If mddb is being deleted from MN diskset and node is 18680Sstevel@tonic-gate * an owner of the diskset then use rpc.mdcommd 18690Sstevel@tonic-gate * mechanism to add mddb(s) so that all nodes stay in sync. 18700Sstevel@tonic-gate * If set is stale, don't log the message since rpc.mdcommd 18710Sstevel@tonic-gate * can't write the message to the mddb. 18720Sstevel@tonic-gate * 18730Sstevel@tonic-gate * When mddbs are first being added to set, a detach can 18740Sstevel@tonic-gate * be called before any node has joined the diskset, so 18750Sstevel@tonic-gate * must check to see if node is an owner of the diskset. 18760Sstevel@tonic-gate * 18770Sstevel@tonic-gate * Otherwise, just delete mddb from this node. 18780Sstevel@tonic-gate */ 18790Sstevel@tonic-gate 18800Sstevel@tonic-gate if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) && 18810Sstevel@tonic-gate (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) { 18820Sstevel@tonic-gate md_mn_result_t *resultp; 18830Sstevel@tonic-gate md_mn_msg_meta_db_detach_t detach; 18840Sstevel@tonic-gate int send_rval; 18850Sstevel@tonic-gate 18860Sstevel@tonic-gate /* 18870Sstevel@tonic-gate * The following algorithm is used to detach replicas. 18880Sstevel@tonic-gate * - META_DB_DETACH message generates submsgs 18890Sstevel@tonic-gate * - BLOCK parse (master) 18900Sstevel@tonic-gate * - MDDB_DETACH replicas 18910Sstevel@tonic-gate * - UNBLOCK parse (master) causing parse 18920Sstevel@tonic-gate * information to be sent from master 18930Sstevel@tonic-gate * to slaves at a higher class than the 18940Sstevel@tonic-gate * unblock so the parse message will 18950Sstevel@tonic-gate * reach slaves before unblock message. 18960Sstevel@tonic-gate */ 18970Sstevel@tonic-gate (void) splitname(np->bname, &detach.msg_splitname); 18980Sstevel@tonic-gate 18990Sstevel@tonic-gate /* Set devid to NULL until devids are supported */ 19000Sstevel@tonic-gate detach.msg_devid[0] = NULL; 19010Sstevel@tonic-gate 19020Sstevel@tonic-gate /* 19030Sstevel@tonic-gate * If reconfig cycle has been started, this node is 19040Sstevel@tonic-gate * stuck in in the return step until this command has 19050Sstevel@tonic-gate * completed. If mdcommd is suspended, ask 19060Sstevel@tonic-gate * send_message to fail (instead of retrying) 19070Sstevel@tonic-gate * so that metaset can finish allowing the reconfig 19080Sstevel@tonic-gate * cycle to proceed. 19090Sstevel@tonic-gate */ 19100Sstevel@tonic-gate flags = MD_MSGF_FAIL_ON_SUSPEND; 19110Sstevel@tonic-gate if (stale_bool == TRUE) 19120Sstevel@tonic-gate flags |= MD_MSGF_NO_LOG; 19130Sstevel@tonic-gate send_rval = mdmn_send_message(sp->setno, 19140Sstevel@tonic-gate MD_MN_MSG_META_DB_DETACH, 19150Sstevel@tonic-gate flags, (char *)&detach, 19160Sstevel@tonic-gate sizeof (md_mn_msg_meta_db_detach_t), 19170Sstevel@tonic-gate &resultp, ep); 19180Sstevel@tonic-gate if (send_rval != 0) { 19190Sstevel@tonic-gate rval = -1; 19200Sstevel@tonic-gate if (resultp == NULL) 19210Sstevel@tonic-gate (void) mddserror(ep, 19220Sstevel@tonic-gate MDE_DS_COMMD_SEND_FAIL, 19230Sstevel@tonic-gate sp->setno, NULL, NULL, 19240Sstevel@tonic-gate sp->setname); 19250Sstevel@tonic-gate else { 19260Sstevel@tonic-gate (void) mdstealerror(ep, 19270Sstevel@tonic-gate &(resultp->mmr_ep)); 19280Sstevel@tonic-gate if (mdisok(ep)) { 19290Sstevel@tonic-gate (void) mddserror(ep, 19300Sstevel@tonic-gate MDE_DS_COMMD_SEND_FAIL, 19310Sstevel@tonic-gate sp->setno, NULL, NULL, 19320Sstevel@tonic-gate sp->setname); 19330Sstevel@tonic-gate } 19340Sstevel@tonic-gate free_result(resultp); 19350Sstevel@tonic-gate } 19360Sstevel@tonic-gate goto out; 19370Sstevel@tonic-gate } 19380Sstevel@tonic-gate if (resultp) 19390Sstevel@tonic-gate free_result(resultp); 19400Sstevel@tonic-gate } else { 19410Sstevel@tonic-gate i = 0; 19420Sstevel@tonic-gate while (i < c.c_dbcnt) { 19430Sstevel@tonic-gate char *devname; 19440Sstevel@tonic-gate 19450Sstevel@tonic-gate c.c_id = i; 19460Sstevel@tonic-gate 19470Sstevel@tonic-gate /* Don't need devid info from this ioctl */ 19480Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0; 19490Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 19500Sstevel@tonic-gate 19510Sstevel@tonic-gate if (metaioctl(MD_DB_GETDEV, &c, 19520Sstevel@tonic-gate &c.c_mde, NULL)) { 19530Sstevel@tonic-gate rval = mdstealerror(ep, &c.c_mde); 19540Sstevel@tonic-gate goto out; 19550Sstevel@tonic-gate } 19560Sstevel@tonic-gate 19570Sstevel@tonic-gate devname = splicename(&c.c_devname); 19580Sstevel@tonic-gate if (strcmp(devname, np->bname) != 0) { 19590Sstevel@tonic-gate Free(devname); 19600Sstevel@tonic-gate i++; 19610Sstevel@tonic-gate continue; 19620Sstevel@tonic-gate } 19630Sstevel@tonic-gate Free(devname); 19640Sstevel@tonic-gate 19650Sstevel@tonic-gate /* Don't need devid info from this ioctl */ 19660Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0; 19670Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 19680Sstevel@tonic-gate 19690Sstevel@tonic-gate if (metaioctl(MD_DB_DELDEV, &c, 19700Sstevel@tonic-gate &c.c_mde, NULL) != 0) { 19710Sstevel@tonic-gate rval = mdstealerror(ep, &c.c_mde); 19720Sstevel@tonic-gate goto out; 19730Sstevel@tonic-gate } 19740Sstevel@tonic-gate 19750Sstevel@tonic-gate /* Not incrementing "i" intentionally */ 19760Sstevel@tonic-gate } 19770Sstevel@tonic-gate } 19780Sstevel@tonic-gate if (! metaislocalset(sp)) { 19790Sstevel@tonic-gate /* update the dbcnt and size in dd */ 19800Sstevel@tonic-gate for (p = dd; p != NULL; p = p->dd_next) { 19810Sstevel@tonic-gate if (p->dd_dnp == np->drivenamep) { 19820Sstevel@tonic-gate p->dd_dbcnt = 0; 19830Sstevel@tonic-gate p->dd_dbsize = 0; 19840Sstevel@tonic-gate break; 19850Sstevel@tonic-gate } 19860Sstevel@tonic-gate } 19870Sstevel@tonic-gate 19880Sstevel@tonic-gate /* 19890Sstevel@tonic-gate * Slam a dummy master block and make it self 19900Sstevel@tonic-gate * identifying 19910Sstevel@tonic-gate */ 19920Sstevel@tonic-gate if ((fd = open(np->rname, O_RDWR)) >= 0) { 19930Sstevel@tonic-gate meta_mkdummymaster(sp, fd, 16); 19940Sstevel@tonic-gate (void) close(fd); 19950Sstevel@tonic-gate } 19960Sstevel@tonic-gate } 19970Sstevel@tonic-gate } 19980Sstevel@tonic-gate out: 19990Sstevel@tonic-gate if (metaislocalset(sp)) { 20000Sstevel@tonic-gate /* 20010Sstevel@tonic-gate * Stop all the daemons if there are 20020Sstevel@tonic-gate * no more replicas so that the module can be 20030Sstevel@tonic-gate * unloaded. 20040Sstevel@tonic-gate */ 20050Sstevel@tonic-gate if (rval == 0 && stop_svmdaemons == 1) { 20060Sstevel@tonic-gate char buf[MAXPATHLEN]; 20070Sstevel@tonic-gate int i; 20080Sstevel@tonic-gate 20090Sstevel@tonic-gate for (i = 0; i < DAEMON_COUNT; i++) { 20100Sstevel@tonic-gate (void) snprintf(buf, MAXPATHLEN, 20110Sstevel@tonic-gate "/usr/bin/pkill -%s -x %s", 20120Sstevel@tonic-gate svmd_kill_list[i].svmd_kill_val, 20130Sstevel@tonic-gate svmd_kill_list[i].svmd_name); 20140Sstevel@tonic-gate if (pclose(popen(buf, "w")) == -1) 20150Sstevel@tonic-gate md_perror(buf); 20160Sstevel@tonic-gate } 20170Sstevel@tonic-gate 20180Sstevel@tonic-gate if (meta_smf_disable(META_SMF_ALL, &status) == -1) { 20190Sstevel@tonic-gate mde_perror(&status, ""); 20200Sstevel@tonic-gate mdclrerror(&status); 20210Sstevel@tonic-gate } 20220Sstevel@tonic-gate } 20230Sstevel@tonic-gate if (buildconf(sp, &status)) { 20240Sstevel@tonic-gate /* Don't mask any previous errors */ 20250Sstevel@tonic-gate if (rval == 0) 20260Sstevel@tonic-gate rval = mdstealerror(ep, &status); 20270Sstevel@tonic-gate else 20280Sstevel@tonic-gate mdclrerror(&status); 20290Sstevel@tonic-gate return (rval); 20300Sstevel@tonic-gate } 20310Sstevel@tonic-gate 20320Sstevel@tonic-gate if (meta_db_patch(sysfilename, NULL, 0, &status)) { 20330Sstevel@tonic-gate /* Don't mask any previous errors */ 20340Sstevel@tonic-gate if (rval == 0) 20350Sstevel@tonic-gate rval = mdstealerror(ep, &status); 20360Sstevel@tonic-gate else 20370Sstevel@tonic-gate mdclrerror(&status); 20380Sstevel@tonic-gate } 20390Sstevel@tonic-gate } else { 20400Sstevel@tonic-gate if (update_dbinfo_on_drives(sp, dd, 20410Sstevel@tonic-gate (force_option & MDFORCE_SET_LOCKED), 20420Sstevel@tonic-gate ((force_option & MDFORCE_LOCAL) | 20430Sstevel@tonic-gate (force_option & MDFORCE_DS)), &status)) { 20440Sstevel@tonic-gate /* Don't mask any previous errors */ 20450Sstevel@tonic-gate if (rval == 0) 20460Sstevel@tonic-gate rval = mdstealerror(ep, &status); 20470Sstevel@tonic-gate else 20480Sstevel@tonic-gate mdclrerror(&status); 20490Sstevel@tonic-gate } 20500Sstevel@tonic-gate metafreedrivedesc(&dd); 20510Sstevel@tonic-gate } 20520Sstevel@tonic-gate if ((metaislocalset(sp)) || (!(MD_MNSET_DESC(sd)))) { 20530Sstevel@tonic-gate for (nlp = db_nlp; (nlp != NULL); nlp = nlp->next) { 20540Sstevel@tonic-gate meta_invalidate_name(nlp->namep); 20550Sstevel@tonic-gate } 20560Sstevel@tonic-gate } 20570Sstevel@tonic-gate return (rval); 20580Sstevel@tonic-gate } 20590Sstevel@tonic-gate 20600Sstevel@tonic-gate static md_replica_t * 20610Sstevel@tonic-gate metareplicaname( 20620Sstevel@tonic-gate mdsetname_t *sp, 20630Sstevel@tonic-gate int flags, 20640Sstevel@tonic-gate struct mddb_config *c, 20650Sstevel@tonic-gate md_error_t *ep 20660Sstevel@tonic-gate ) 20670Sstevel@tonic-gate { 20680Sstevel@tonic-gate md_replica_t *rp; 20690Sstevel@tonic-gate char *devname; 20700Sstevel@tonic-gate size_t sz; 20710Sstevel@tonic-gate 20720Sstevel@tonic-gate /* allocate replicaname */ 20730Sstevel@tonic-gate rp = Zalloc(sizeof (*rp)); 20740Sstevel@tonic-gate 20750Sstevel@tonic-gate /* get device name */ 20760Sstevel@tonic-gate devname = splicename(&c->c_devname); 20770Sstevel@tonic-gate if (flags & PRINT_FAST) { 2078*1623Stw21770 if ((rp->r_namep = metaname_fast(&sp, devname, 2079*1623Stw21770 LOGICAL_DEVICE, ep)) == NULL) { 20800Sstevel@tonic-gate Free(devname); 20810Sstevel@tonic-gate Free(rp); 20820Sstevel@tonic-gate return (NULL); 20830Sstevel@tonic-gate } 20840Sstevel@tonic-gate } else { 2085*1623Stw21770 if ((rp->r_namep = metaname(&sp, devname, 2086*1623Stw21770 LOGICAL_DEVICE, ep)) == NULL) { 20870Sstevel@tonic-gate Free(devname); 20880Sstevel@tonic-gate Free(rp); 20890Sstevel@tonic-gate return (NULL); 20900Sstevel@tonic-gate } 20910Sstevel@tonic-gate } 20920Sstevel@tonic-gate Free(devname); 20930Sstevel@tonic-gate 20940Sstevel@tonic-gate /* make sure it's OK */ 20950Sstevel@tonic-gate if ((! (flags & MD_BASICNAME_OK)) && 20960Sstevel@tonic-gate (metachkcomp(rp->r_namep, ep) != 0)) { 20970Sstevel@tonic-gate Free(rp); 20980Sstevel@tonic-gate return (NULL); 20990Sstevel@tonic-gate } 21000Sstevel@tonic-gate 210162Sjeanm rp->r_blkno = (daddr_t)MD_DISKADDR_ERROR; 210262Sjeanm rp->r_nblk = (daddr_t)MD_DISKADDR_ERROR; 21030Sstevel@tonic-gate rp->r_flags = c->c_locator.l_flags | MDDB_F_NODEVID; 21040Sstevel@tonic-gate if (c->c_locator.l_devid_flags & MDDB_DEVID_VALID) { 210562Sjeanm sz = devid_sizeof((ddi_devid_t)(uintptr_t) 210662Sjeanm (c->c_locator.l_devid)); 21070Sstevel@tonic-gate if ((rp->r_devid = (ddi_devid_t)malloc(sz)) == 21080Sstevel@tonic-gate (ddi_devid_t)NULL) { 21090Sstevel@tonic-gate Free(rp); 21100Sstevel@tonic-gate return (NULL); 21110Sstevel@tonic-gate } 21120Sstevel@tonic-gate (void) memcpy((void *)rp->r_devid, 211362Sjeanm (void *)(uintptr_t)c->c_locator.l_devid, sz); 21140Sstevel@tonic-gate (void) strcpy(rp->r_minor_name, c->c_locator.l_minor_name); 21150Sstevel@tonic-gate rp->r_flags &= ~MDDB_F_NODEVID; 21160Sstevel@tonic-gate /* Overwrite dev derived from name with dev from devid */ 21170Sstevel@tonic-gate rp->r_namep->dev = meta_expldev(c->c_locator.l_dev); 21180Sstevel@tonic-gate } 21190Sstevel@tonic-gate (void) strcpy(rp->r_driver_name, c->c_locator.l_driver); 21200Sstevel@tonic-gate 21210Sstevel@tonic-gate rp->r_blkno = c->c_locator.l_blkno; 21220Sstevel@tonic-gate if (c->c_dbend != 0) 21230Sstevel@tonic-gate rp->r_nblk = c->c_dbend - c->c_locator.l_blkno + 1; 21240Sstevel@tonic-gate 21250Sstevel@tonic-gate /* return replica */ 21260Sstevel@tonic-gate return (rp); 21270Sstevel@tonic-gate } 21280Sstevel@tonic-gate 21290Sstevel@tonic-gate /* 21300Sstevel@tonic-gate * free replica list 21310Sstevel@tonic-gate */ 21320Sstevel@tonic-gate void 21330Sstevel@tonic-gate metafreereplicalist( 21340Sstevel@tonic-gate md_replicalist_t *rlp 21350Sstevel@tonic-gate ) 21360Sstevel@tonic-gate { 21370Sstevel@tonic-gate md_replicalist_t *rl = NULL; 21380Sstevel@tonic-gate 21390Sstevel@tonic-gate for (/* void */; (rlp != NULL); rlp = rl) { 21400Sstevel@tonic-gate rl = rlp->rl_next; 21410Sstevel@tonic-gate if (rlp->rl_repp->r_devid != (ddi_devid_t)0) { 21420Sstevel@tonic-gate free(rlp->rl_repp->r_devid); 21430Sstevel@tonic-gate } 21440Sstevel@tonic-gate Free(rlp->rl_repp); 21450Sstevel@tonic-gate Free(rlp); 21460Sstevel@tonic-gate } 21470Sstevel@tonic-gate } 21480Sstevel@tonic-gate 21490Sstevel@tonic-gate /* 21500Sstevel@tonic-gate * return list of all replicas in set 21510Sstevel@tonic-gate */ 21520Sstevel@tonic-gate int 21530Sstevel@tonic-gate metareplicalist( 21540Sstevel@tonic-gate mdsetname_t *sp, 21550Sstevel@tonic-gate int flags, 21560Sstevel@tonic-gate md_replicalist_t **rlpp, 21570Sstevel@tonic-gate md_error_t *ep 21580Sstevel@tonic-gate ) 21590Sstevel@tonic-gate { 21600Sstevel@tonic-gate md_replicalist_t **tail = rlpp; 21610Sstevel@tonic-gate int count = 0; 21620Sstevel@tonic-gate struct mddb_config c; 21630Sstevel@tonic-gate int i; 21640Sstevel@tonic-gate char *devid; 21650Sstevel@tonic-gate 21660Sstevel@tonic-gate /* for each replica */ 21670Sstevel@tonic-gate i = 0; 21680Sstevel@tonic-gate do { 21690Sstevel@tonic-gate md_replica_t *rp; 21700Sstevel@tonic-gate 21710Sstevel@tonic-gate /* get next replica */ 21720Sstevel@tonic-gate (void) memset(&c, 0, sizeof (c)); 21730Sstevel@tonic-gate c.c_id = i; 21740Sstevel@tonic-gate c.c_setno = sp->setno; 21750Sstevel@tonic-gate 21760Sstevel@tonic-gate c.c_locator.l_devid_flags = MDDB_DEVID_GETSZ; 21770Sstevel@tonic-gate if (metaioctl(MD_DB_ENDDEV, &c, &c.c_mde, NULL) != 0) { 21780Sstevel@tonic-gate if (mdismddberror(&c.c_mde, MDE_DB_INVALID)) { 21790Sstevel@tonic-gate mdclrerror(&c.c_mde); 21800Sstevel@tonic-gate break; /* handle none at all */ 21810Sstevel@tonic-gate } 21820Sstevel@tonic-gate (void) mdstealerror(ep, &c.c_mde); 21830Sstevel@tonic-gate goto out; 21840Sstevel@tonic-gate } 21850Sstevel@tonic-gate 21860Sstevel@tonic-gate if (c.c_locator.l_devid_flags & MDDB_DEVID_SZ) { 21870Sstevel@tonic-gate if ((devid = malloc(c.c_locator.l_devid_sz)) == NULL) { 21880Sstevel@tonic-gate (void) mdsyserror(ep, ENOMEM, META_DBCONF); 21890Sstevel@tonic-gate goto out; 21900Sstevel@tonic-gate } 21910Sstevel@tonic-gate c.c_locator.l_devid = (uintptr_t)devid; 21920Sstevel@tonic-gate /* 21930Sstevel@tonic-gate * Turn on space and sz flags since 'sz' amount of 21940Sstevel@tonic-gate * space has been alloc'd. 21950Sstevel@tonic-gate */ 21960Sstevel@tonic-gate c.c_locator.l_devid_flags = 21970Sstevel@tonic-gate MDDB_DEVID_SPACE | MDDB_DEVID_SZ; 21980Sstevel@tonic-gate } 21990Sstevel@tonic-gate 22000Sstevel@tonic-gate if (metaioctl(MD_DB_ENDDEV, &c, &c.c_mde, NULL) != 0) { 22010Sstevel@tonic-gate if (mdismddberror(&c.c_mde, MDE_DB_INVALID)) { 22020Sstevel@tonic-gate mdclrerror(&c.c_mde); 22030Sstevel@tonic-gate break; /* handle none at all */ 22040Sstevel@tonic-gate } 22050Sstevel@tonic-gate (void) mdstealerror(ep, &c.c_mde); 22060Sstevel@tonic-gate goto out; 22070Sstevel@tonic-gate } 22080Sstevel@tonic-gate 22090Sstevel@tonic-gate /* 22100Sstevel@tonic-gate * Paranoid check - shouldn't happen, but is left as 22110Sstevel@tonic-gate * a place holder for changes that will be needed after 22120Sstevel@tonic-gate * dynamic reconfiguration changes are added to SVM (to 22130Sstevel@tonic-gate * support movement of disks at any point in time). 22140Sstevel@tonic-gate */ 22150Sstevel@tonic-gate if (c.c_locator.l_devid_flags & MDDB_DEVID_NOSPACE) { 22160Sstevel@tonic-gate (void) fprintf(stderr, 22170Sstevel@tonic-gate dgettext(TEXT_DOMAIN, 22180Sstevel@tonic-gate "Error: Relocation Information " 22190Sstevel@tonic-gate "(drvnm=%s, mnum=0x%lx) \n" 22200Sstevel@tonic-gate "relocation information size changed - \n" 22210Sstevel@tonic-gate "rerun command\n"), 22220Sstevel@tonic-gate c.c_locator.l_driver, c.c_locator.l_mnum); 22230Sstevel@tonic-gate (void) mderror(ep, MDE_DEVID_TOOBIG, NULL); 22240Sstevel@tonic-gate goto out; 22250Sstevel@tonic-gate } 22260Sstevel@tonic-gate 22270Sstevel@tonic-gate if (c.c_dbcnt == 0) 22280Sstevel@tonic-gate break; /* handle none at all */ 22290Sstevel@tonic-gate 22300Sstevel@tonic-gate /* get info */ 22310Sstevel@tonic-gate if ((rp = metareplicaname(sp, flags, &c, ep)) == NULL) 22320Sstevel@tonic-gate goto out; 22330Sstevel@tonic-gate 22340Sstevel@tonic-gate /* append to list */ 22350Sstevel@tonic-gate *tail = Zalloc(sizeof (**tail)); 22360Sstevel@tonic-gate (*tail)->rl_repp = rp; 22370Sstevel@tonic-gate tail = &(*tail)->rl_next; 22380Sstevel@tonic-gate ++count; 22390Sstevel@tonic-gate 22400Sstevel@tonic-gate if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) { 22410Sstevel@tonic-gate free(devid); 22420Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 22430Sstevel@tonic-gate } 22440Sstevel@tonic-gate 22450Sstevel@tonic-gate } while (++i < c.c_dbcnt); 22460Sstevel@tonic-gate 22470Sstevel@tonic-gate if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) { 22480Sstevel@tonic-gate free(devid); 22490Sstevel@tonic-gate } 22500Sstevel@tonic-gate 22510Sstevel@tonic-gate /* return count */ 22520Sstevel@tonic-gate return (count); 22530Sstevel@tonic-gate 22540Sstevel@tonic-gate /* cleanup, return error */ 22550Sstevel@tonic-gate out: 22560Sstevel@tonic-gate if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) { 22570Sstevel@tonic-gate free(devid); 22580Sstevel@tonic-gate } 22590Sstevel@tonic-gate metafreereplicalist(*rlpp); 22600Sstevel@tonic-gate *rlpp = NULL; 22610Sstevel@tonic-gate return (-1); 22620Sstevel@tonic-gate } 22630Sstevel@tonic-gate 22640Sstevel@tonic-gate /* 22650Sstevel@tonic-gate * meta_sync_db_locations - get list of replicas from kernel and write 22660Sstevel@tonic-gate * out to mddb.cf and md.conf. 'Syncs up' the replica list in 22670Sstevel@tonic-gate * the kernel with the replica list in the conf files. 22680Sstevel@tonic-gate * 22690Sstevel@tonic-gate */ 22700Sstevel@tonic-gate void 22710Sstevel@tonic-gate meta_sync_db_locations( 22720Sstevel@tonic-gate mdsetname_t *sp, 22730Sstevel@tonic-gate md_error_t *ep 22740Sstevel@tonic-gate ) 22750Sstevel@tonic-gate { 22760Sstevel@tonic-gate char *sname = 0; /* system file name */ 22770Sstevel@tonic-gate char *cname = 0; /* config file name */ 22780Sstevel@tonic-gate 22790Sstevel@tonic-gate if (!metaislocalset(sp)) 22800Sstevel@tonic-gate return; 22810Sstevel@tonic-gate 22820Sstevel@tonic-gate /* Updates backup of configuration file (aka mddb.cf) */ 22830Sstevel@tonic-gate if (buildconf(sp, ep) != 0) 22840Sstevel@tonic-gate return; 22850Sstevel@tonic-gate 22860Sstevel@tonic-gate /* Updates system configuration file (aka md.conf) */ 22870Sstevel@tonic-gate (void) meta_db_patch(sname, cname, 0, ep); 22880Sstevel@tonic-gate } 22890Sstevel@tonic-gate 22900Sstevel@tonic-gate /* 22910Sstevel@tonic-gate * setup_db_locations - parse the mddb.cf file and 22920Sstevel@tonic-gate * tells the driver which db locations to use. 22930Sstevel@tonic-gate */ 22940Sstevel@tonic-gate int 22950Sstevel@tonic-gate meta_setup_db_locations( 22960Sstevel@tonic-gate md_error_t *ep 22970Sstevel@tonic-gate ) 22980Sstevel@tonic-gate { 22990Sstevel@tonic-gate mddb_config_t c; 23000Sstevel@tonic-gate FILE *fp; 23010Sstevel@tonic-gate char inbuff[1024]; 23020Sstevel@tonic-gate char *buff; 23030Sstevel@tonic-gate uint_t i; 23040Sstevel@tonic-gate size_t sz; 23050Sstevel@tonic-gate int rval = 0; 23060Sstevel@tonic-gate char *devidp; 23070Sstevel@tonic-gate uint_t devid_size; 23080Sstevel@tonic-gate char *minor_name = NULL; 23090Sstevel@tonic-gate ddi_devid_t devid_decode; 23100Sstevel@tonic-gate int checksum; 23110Sstevel@tonic-gate 23120Sstevel@tonic-gate /* do mddb.cf file */ 23130Sstevel@tonic-gate (void) memset(&c, '\0', sizeof (c)); 23140Sstevel@tonic-gate if ((fp = fopen(META_DBCONF, "r")) == NULL) { 23150Sstevel@tonic-gate if (errno != ENOENT) 23160Sstevel@tonic-gate return (mdsyserror(ep, errno, META_DBCONF)); 23170Sstevel@tonic-gate } 23180Sstevel@tonic-gate while ((fp != NULL) && ((buff = fgets(inbuff, (sizeof (inbuff) - 1), 23190Sstevel@tonic-gate fp)) != NULL)) { 23200Sstevel@tonic-gate 23210Sstevel@tonic-gate /* ignore comments */ 23220Sstevel@tonic-gate if (*buff == '#') 23230Sstevel@tonic-gate continue; 23240Sstevel@tonic-gate 23250Sstevel@tonic-gate /* parse locator */ 23260Sstevel@tonic-gate (void) memset(&c, 0, sizeof (c)); 23270Sstevel@tonic-gate c.c_setno = MD_LOCAL_SET; 23280Sstevel@tonic-gate i = strcspn(buff, " \t"); 23290Sstevel@tonic-gate if (i > sizeof (c.c_locator.l_driver)) 23300Sstevel@tonic-gate i = sizeof (c.c_locator.l_driver); 23310Sstevel@tonic-gate (void) strncpy(c.c_locator.l_driver, buff, i); 23320Sstevel@tonic-gate buff += i; 23330Sstevel@tonic-gate c.c_locator.l_dev = 23340Sstevel@tonic-gate makedev((major_t)0, (minor_t)strtol(buff, &buff, 10)); 23350Sstevel@tonic-gate c.c_locator.l_blkno = (daddr_t)strtol(buff, &buff, 10); 23360Sstevel@tonic-gate c.c_locator.l_mnum = minor(c.c_locator.l_dev); 23370Sstevel@tonic-gate 23380Sstevel@tonic-gate /* parse out devid */ 23390Sstevel@tonic-gate while (isspace((int)(*buff))) 23400Sstevel@tonic-gate buff += 1; 23410Sstevel@tonic-gate i = strcspn(buff, " \t"); 23420Sstevel@tonic-gate if ((devidp = (char *)malloc(i+1)) == NULL) 23430Sstevel@tonic-gate return (mdsyserror(ep, ENOMEM, META_DBCONF)); 23440Sstevel@tonic-gate 23450Sstevel@tonic-gate (void) strncpy(devidp, buff, i); 23460Sstevel@tonic-gate devidp[i] = '\0'; 23470Sstevel@tonic-gate if (devid_str_decode(devidp, &devid_decode, 23480Sstevel@tonic-gate &minor_name) == -1) { 23490Sstevel@tonic-gate free(devidp); 23500Sstevel@tonic-gate continue; 23510Sstevel@tonic-gate } 23520Sstevel@tonic-gate 23530Sstevel@tonic-gate /* Conf file must have minor name associated with devid */ 23540Sstevel@tonic-gate if (minor_name == NULL) { 23550Sstevel@tonic-gate free(devidp); 23560Sstevel@tonic-gate devid_free(devid_decode); 23570Sstevel@tonic-gate continue; 23580Sstevel@tonic-gate } 23590Sstevel@tonic-gate 23600Sstevel@tonic-gate sz = devid_sizeof(devid_decode); 23610Sstevel@tonic-gate /* Copy to devid size buffer that ioctl expects */ 23620Sstevel@tonic-gate if ((c.c_locator.l_devid = (uintptr_t)malloc(sz)) == NULL) { 23630Sstevel@tonic-gate devid_free(devid_decode); 23640Sstevel@tonic-gate free(minor_name); 23650Sstevel@tonic-gate free(devidp); 23660Sstevel@tonic-gate return (mdsyserror(ep, ENOMEM, META_DBCONF)); 23670Sstevel@tonic-gate } 23680Sstevel@tonic-gate 236962Sjeanm (void) memcpy((void *)(uintptr_t)c.c_locator.l_devid, 23700Sstevel@tonic-gate (void *)devid_decode, sz); 23710Sstevel@tonic-gate 23720Sstevel@tonic-gate devid_free(devid_decode); 23730Sstevel@tonic-gate 23740Sstevel@tonic-gate if (strlen(minor_name) > MDDB_MINOR_NAME_MAX) { 23750Sstevel@tonic-gate free(minor_name); 23760Sstevel@tonic-gate free(devidp); 237762Sjeanm free((void *)(uintptr_t)c.c_locator.l_devid); 23780Sstevel@tonic-gate return (mdsyserror(ep, ENOMEM, META_DBCONF)); 23790Sstevel@tonic-gate } 23800Sstevel@tonic-gate (void) strcpy(c.c_locator.l_minor_name, minor_name); 23810Sstevel@tonic-gate free(minor_name); 23820Sstevel@tonic-gate c.c_locator.l_devid_flags = MDDB_DEVID_VALID | 23830Sstevel@tonic-gate MDDB_DEVID_SPACE | MDDB_DEVID_SZ; 23840Sstevel@tonic-gate c.c_locator.l_devid_sz = sz; 23850Sstevel@tonic-gate 23860Sstevel@tonic-gate devid_size = strlen(devidp); 23870Sstevel@tonic-gate buff += devid_size; 23880Sstevel@tonic-gate 23890Sstevel@tonic-gate checksum = strtol(buff, &buff, 10); 23900Sstevel@tonic-gate for (i = 0; c.c_locator.l_driver[i] != 0; i++) 23910Sstevel@tonic-gate checksum += c.c_locator.l_driver[i]; 23920Sstevel@tonic-gate for (i = 0; i < devid_size; i++) { 23930Sstevel@tonic-gate checksum += devidp[i]; 23940Sstevel@tonic-gate } 23950Sstevel@tonic-gate free(devidp); 23960Sstevel@tonic-gate 23970Sstevel@tonic-gate checksum += minor(c.c_locator.l_dev); 23980Sstevel@tonic-gate checksum += c.c_locator.l_blkno; 23990Sstevel@tonic-gate if (checksum != 42) { 24000Sstevel@tonic-gate /* overwritten later for more serious problems */ 24010Sstevel@tonic-gate rval = mderror(ep, MDE_MDDB_CKSUM, META_DBCONF); 240262Sjeanm free((void *)(uintptr_t)c.c_locator.l_devid); 24030Sstevel@tonic-gate continue; 24040Sstevel@tonic-gate } 24050Sstevel@tonic-gate c.c_locator.l_flags = 0; 24060Sstevel@tonic-gate 24070Sstevel@tonic-gate /* use db location */ 24080Sstevel@tonic-gate if (metaioctl(MD_DB_USEDEV, &c, &c.c_mde, NULL) != 0) { 240962Sjeanm free((void *)(uintptr_t)c.c_locator.l_devid); 24100Sstevel@tonic-gate return (mdstealerror(ep, &c.c_mde)); 24110Sstevel@tonic-gate } 24120Sstevel@tonic-gate 24130Sstevel@tonic-gate /* free up devid if in use */ 241462Sjeanm free((void *)(uintptr_t)c.c_locator.l_devid); 24150Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0; 24160Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 24170Sstevel@tonic-gate } 24180Sstevel@tonic-gate if ((fp) && (fclose(fp) != 0)) 24190Sstevel@tonic-gate return (mdsyserror(ep, errno, META_DBCONF)); 24200Sstevel@tonic-gate 24210Sstevel@tonic-gate /* check for stale database */ 24220Sstevel@tonic-gate (void) memset((char *)&c, 0, sizeof (struct mddb_config)); 24230Sstevel@tonic-gate c.c_id = 0; 24240Sstevel@tonic-gate c.c_setno = MD_LOCAL_SET; 24250Sstevel@tonic-gate 24260Sstevel@tonic-gate /* Don't need device id information from this ioctl */ 24270Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0; 24280Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 24290Sstevel@tonic-gate 24300Sstevel@tonic-gate if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) { 24310Sstevel@tonic-gate if (! mdismddberror(&c.c_mde, MDE_DB_INVALID)) 24320Sstevel@tonic-gate return (mdstealerror(ep, &c.c_mde)); 24330Sstevel@tonic-gate mdclrerror(&c.c_mde); 24340Sstevel@tonic-gate } 24350Sstevel@tonic-gate 24360Sstevel@tonic-gate if (c.c_flags & MDDB_C_STALE) 24370Sstevel@tonic-gate return (mdmddberror(ep, MDE_DB_STALE, NODEV32, MD_LOCAL_SET, 24380Sstevel@tonic-gate 0, NULL)); 24390Sstevel@tonic-gate 24400Sstevel@tonic-gate /* success */ 24410Sstevel@tonic-gate return (rval); 24420Sstevel@tonic-gate } 24430Sstevel@tonic-gate 24440Sstevel@tonic-gate /* 24450Sstevel@tonic-gate * meta_db_minreplica - returns the minimum size replica currently in use. 24460Sstevel@tonic-gate */ 24470Sstevel@tonic-gate daddr_t 24480Sstevel@tonic-gate meta_db_minreplica( 24490Sstevel@tonic-gate mdsetname_t *sp, 24500Sstevel@tonic-gate md_error_t *ep 24510Sstevel@tonic-gate ) 24520Sstevel@tonic-gate { 24530Sstevel@tonic-gate md_replica_t *r; 24540Sstevel@tonic-gate md_replicalist_t *rl, *rlp = NULL; 24550Sstevel@tonic-gate daddr_t nblks = 0; 24560Sstevel@tonic-gate 24570Sstevel@tonic-gate if (metareplicalist(sp, (MD_BASICNAME_OK | PRINT_FAST), &rlp, ep) < 0) 24580Sstevel@tonic-gate return (-1); 24590Sstevel@tonic-gate 24600Sstevel@tonic-gate if (rlp == NULL) 24610Sstevel@tonic-gate return (-1); 24620Sstevel@tonic-gate 24630Sstevel@tonic-gate /* find the smallest existing replica */ 24640Sstevel@tonic-gate for (rl = rlp; rl != NULL; rl = rl->rl_next) { 24650Sstevel@tonic-gate r = rl->rl_repp; 24660Sstevel@tonic-gate nblks = ((nblks == 0) ? r->r_nblk : min(r->r_nblk, nblks)); 24670Sstevel@tonic-gate } 24680Sstevel@tonic-gate 24690Sstevel@tonic-gate metafreereplicalist(rlp); 24700Sstevel@tonic-gate return (nblks); 24710Sstevel@tonic-gate } 24720Sstevel@tonic-gate 24730Sstevel@tonic-gate /* 24740Sstevel@tonic-gate * meta_get_replica_names 24750Sstevel@tonic-gate * returns an mdnamelist_t of replica slices 24760Sstevel@tonic-gate */ 24770Sstevel@tonic-gate /*ARGSUSED*/ 24780Sstevel@tonic-gate int 24790Sstevel@tonic-gate meta_get_replica_names( 24800Sstevel@tonic-gate mdsetname_t *sp, 24810Sstevel@tonic-gate mdnamelist_t **nlpp, 24820Sstevel@tonic-gate int options, 24830Sstevel@tonic-gate md_error_t *ep 24840Sstevel@tonic-gate ) 24850Sstevel@tonic-gate { 24860Sstevel@tonic-gate md_replicalist_t *rlp = NULL; 24870Sstevel@tonic-gate md_replicalist_t *rl; 24880Sstevel@tonic-gate mdnamelist_t **tailpp = nlpp; 24890Sstevel@tonic-gate int cnt = 0; 24900Sstevel@tonic-gate 24910Sstevel@tonic-gate assert(nlpp != NULL); 24920Sstevel@tonic-gate 24930Sstevel@tonic-gate if (!metaislocalset(sp)) 24940Sstevel@tonic-gate goto out; 24950Sstevel@tonic-gate 24960Sstevel@tonic-gate /* get replicas */ 24970Sstevel@tonic-gate if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0) { 24980Sstevel@tonic-gate cnt = -1; 24990Sstevel@tonic-gate goto out; 25000Sstevel@tonic-gate } 25010Sstevel@tonic-gate 25020Sstevel@tonic-gate /* build name list */ 25030Sstevel@tonic-gate for (rl = rlp; (rl != NULL); rl = rl->rl_next) { 25040Sstevel@tonic-gate /* 25050Sstevel@tonic-gate * Add the name struct to the end of the 25060Sstevel@tonic-gate * namelist but keep a pointer to the last 25070Sstevel@tonic-gate * element so that we don't incur the overhead 25080Sstevel@tonic-gate * of traversing the list each time 25090Sstevel@tonic-gate */ 25100Sstevel@tonic-gate tailpp = meta_namelist_append_wrapper( 25110Sstevel@tonic-gate tailpp, rl->rl_repp->r_namep); 25120Sstevel@tonic-gate ++cnt; 25130Sstevel@tonic-gate } 25140Sstevel@tonic-gate 25150Sstevel@tonic-gate /* cleanup, return count or error */ 25160Sstevel@tonic-gate out: 25170Sstevel@tonic-gate metafreereplicalist(rlp); 25180Sstevel@tonic-gate return (cnt); 25190Sstevel@tonic-gate } 2520