10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 51623Stw21770 * Common Development and Distribution License (the "License"). 61623Stw21770 * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 22*5109Spetede * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 230Sstevel@tonic-gate * Use is subject to license terms. 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate 260Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 270Sstevel@tonic-gate 280Sstevel@tonic-gate /* 290Sstevel@tonic-gate * Just in case we're not in a build environment, make sure that 300Sstevel@tonic-gate * TEXT_DOMAIN gets set to something. 310Sstevel@tonic-gate */ 320Sstevel@tonic-gate #if !defined(TEXT_DOMAIN) 330Sstevel@tonic-gate #define TEXT_DOMAIN "SYS_TEST" 340Sstevel@tonic-gate #endif 350Sstevel@tonic-gate 360Sstevel@tonic-gate /* 370Sstevel@tonic-gate * Metadevice database interfaces. 380Sstevel@tonic-gate */ 390Sstevel@tonic-gate 400Sstevel@tonic-gate #define MDDB 410Sstevel@tonic-gate 420Sstevel@tonic-gate #include <meta.h> 430Sstevel@tonic-gate #include <sys/lvm/md_mddb.h> 440Sstevel@tonic-gate #include <sys/lvm/md_crc.h> 450Sstevel@tonic-gate #include <sys/lvm/mdio.h> 460Sstevel@tonic-gate #include <string.h> 470Sstevel@tonic-gate #include <strings.h> 480Sstevel@tonic-gate #include <ctype.h> 490Sstevel@tonic-gate 500Sstevel@tonic-gate struct svm_daemon { 510Sstevel@tonic-gate char *svmd_name; 520Sstevel@tonic-gate char *svmd_kill_val; 530Sstevel@tonic-gate }; 540Sstevel@tonic-gate 552614Spetede /* 562614Spetede * This is a list of the daemons that are not stopped by the SVM smf(5) 572614Spetede * services. The mdmonitord is started via svc:/system/mdmonitor:default 582614Spetede * but no contract(4) is constructed and so it is not stopped by smf(5). 592614Spetede */ 600Sstevel@tonic-gate struct svm_daemon svmd_kill_list[] = { 610Sstevel@tonic-gate {"mdmonitord", "HUP"}, 620Sstevel@tonic-gate {"mddoors", "KILL"}, 630Sstevel@tonic-gate }; 640Sstevel@tonic-gate 650Sstevel@tonic-gate #define DAEMON_COUNT (sizeof (svmd_kill_list)/ sizeof (struct svm_daemon)) 660Sstevel@tonic-gate 670Sstevel@tonic-gate extern int procsigs(int block, sigset_t *oldsigs, md_error_t *ep); 680Sstevel@tonic-gate 690Sstevel@tonic-gate /* 70*5109Spetede * Are the locator blocks for the replicas using devids 71*5109Spetede */ 72*5109Spetede static int devid_in_use = FALSE; 73*5109Spetede 74*5109Spetede static char * 75*5109Spetede getlongname( 76*5109Spetede struct mddb_config *c, 77*5109Spetede md_error_t *ep 78*5109Spetede ) 79*5109Spetede { 80*5109Spetede char *diskname = NULL; 81*5109Spetede char *devid_str; 82*5109Spetede devid_nmlist_t *disklist = NULL; 83*5109Spetede 84*5109Spetede c->c_locator.l_devid_flags = MDDB_DEVID_GETSZ; 85*5109Spetede if (metaioctl(MD_DB_ENDDEV, c, &c->c_mde, NULL) != 0) { 86*5109Spetede (void) mdstealerror(ep, &c->c_mde); 87*5109Spetede return (NULL); 88*5109Spetede } 89*5109Spetede 90*5109Spetede if (c->c_locator.l_devid_flags & MDDB_DEVID_SZ) { 91*5109Spetede c->c_locator.l_devid = (uintptr_t) 92*5109Spetede Malloc(c->c_locator.l_devid_sz); 93*5109Spetede c->c_locator.l_devid_flags = 94*5109Spetede MDDB_DEVID_SPACE | MDDB_DEVID_SZ; 95*5109Spetede } else { 96*5109Spetede (void) mderror(ep, MDE_NODEVID, ""); 97*5109Spetede goto out; 98*5109Spetede } 99*5109Spetede 100*5109Spetede if (metaioctl(MD_DB_ENDDEV, c, &c->c_mde, NULL) != 0) { 101*5109Spetede (void) mdstealerror(ep, &c->c_mde); 102*5109Spetede goto out; 103*5109Spetede } 104*5109Spetede 105*5109Spetede if (c->c_locator.l_devid_flags & MDDB_DEVID_NOSPACE) { 106*5109Spetede (void) mderror(ep, MDE_NODEVID, ""); 107*5109Spetede goto out; 108*5109Spetede } 109*5109Spetede 110*5109Spetede if (metaioctl(MD_DB_GETDEV, c, &c->c_mde, NULL) != 0) { 111*5109Spetede (void) mdstealerror(ep, &c->c_mde); 112*5109Spetede goto out; 113*5109Spetede } 114*5109Spetede 115*5109Spetede if (c->c_locator.l_devid != NULL) { 116*5109Spetede if (meta_deviceid_to_nmlist("/dev/dsk", 117*5109Spetede (ddi_devid_t)(uintptr_t)c->c_locator.l_devid, 118*5109Spetede c->c_locator.l_minor_name, &disklist) != 0) { 119*5109Spetede devid_str = devid_str_encode( 120*5109Spetede (ddi_devid_t)(uintptr_t)c->c_locator.l_devid, NULL); 121*5109Spetede (void) mderror(ep, MDE_MISSING_DEVID_DISK, ""); 122*5109Spetede mderrorextra(ep, devid_str); 123*5109Spetede if (devid_str != NULL) 124*5109Spetede devid_str_free(devid_str); 125*5109Spetede goto out; 126*5109Spetede } 127*5109Spetede diskname = Strdup(disklist[0].devname); 128*5109Spetede } 129*5109Spetede 130*5109Spetede out: 131*5109Spetede if (disklist != NULL) 132*5109Spetede devid_free_nmlist(disklist); 133*5109Spetede 134*5109Spetede if (c->c_locator.l_devid != NULL) 135*5109Spetede Free((void *)(uintptr_t)c->c_locator.l_devid); 136*5109Spetede 137*5109Spetede return (diskname); 138*5109Spetede } 139*5109Spetede 140*5109Spetede /* 1410Sstevel@tonic-gate * meta_get_lb_inittime sends a request for the lb_inittime to the kernel 1420Sstevel@tonic-gate */ 1430Sstevel@tonic-gate md_timeval32_t 1440Sstevel@tonic-gate meta_get_lb_inittime( 1450Sstevel@tonic-gate mdsetname_t *sp, 1460Sstevel@tonic-gate md_error_t *ep 1470Sstevel@tonic-gate ) 1480Sstevel@tonic-gate { 1490Sstevel@tonic-gate mddb_config_t c; 1500Sstevel@tonic-gate 1510Sstevel@tonic-gate (void) memset(&c, 0, sizeof (c)); 1520Sstevel@tonic-gate 1530Sstevel@tonic-gate /* Fill in setno, setname, and sideno */ 1540Sstevel@tonic-gate c.c_setno = sp->setno; 1550Sstevel@tonic-gate 1560Sstevel@tonic-gate if (metaioctl(MD_DB_LBINITTIME, &c, &c.c_mde, NULL) != 0) { 1570Sstevel@tonic-gate (void) mdstealerror(ep, &c.c_mde); 1580Sstevel@tonic-gate } 1590Sstevel@tonic-gate 1600Sstevel@tonic-gate return (c.c_timestamp); 1610Sstevel@tonic-gate } 1620Sstevel@tonic-gate 1630Sstevel@tonic-gate /* 1640Sstevel@tonic-gate * mkmasterblks writes out the master blocks of the mddb to the replica. 1650Sstevel@tonic-gate * 1660Sstevel@tonic-gate * In a MN diskset, this is called by the node that is adding this replica 1670Sstevel@tonic-gate * to the diskset. 1680Sstevel@tonic-gate */ 1690Sstevel@tonic-gate 1700Sstevel@tonic-gate #define MDDB_VERIFY_SIZE 8192 1710Sstevel@tonic-gate 1720Sstevel@tonic-gate static int 1730Sstevel@tonic-gate mkmasterblks( 1740Sstevel@tonic-gate mdsetname_t *sp, 1750Sstevel@tonic-gate mdname_t *np, 1760Sstevel@tonic-gate int fd, 1770Sstevel@tonic-gate daddr_t firstblk, 1780Sstevel@tonic-gate int dbsize, 1790Sstevel@tonic-gate md_timeval32_t inittime, 1800Sstevel@tonic-gate md_error_t *ep 1810Sstevel@tonic-gate ) 1820Sstevel@tonic-gate { 1830Sstevel@tonic-gate int consecutive; 1840Sstevel@tonic-gate md_timeval32_t tp; 1850Sstevel@tonic-gate struct mddb_mb *mb; 1860Sstevel@tonic-gate char *buffer; 1870Sstevel@tonic-gate int iosize; 1880Sstevel@tonic-gate md_set_desc *sd; 1890Sstevel@tonic-gate int mn_set = 0; 1900Sstevel@tonic-gate daddr_t startblk; 1910Sstevel@tonic-gate int cnt; 1920Sstevel@tonic-gate ddi_devid_t devid; 1930Sstevel@tonic-gate 1940Sstevel@tonic-gate if (! metaislocalset(sp)) { 1950Sstevel@tonic-gate if ((sd = metaget_setdesc(sp, ep)) == NULL) 1960Sstevel@tonic-gate return (-1); 1970Sstevel@tonic-gate 1980Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) { 1990Sstevel@tonic-gate mn_set = 1; /* Used later */ 2000Sstevel@tonic-gate } 2010Sstevel@tonic-gate } 2020Sstevel@tonic-gate 2030Sstevel@tonic-gate /* 2040Sstevel@tonic-gate * Loop to verify the entire mddb region on disk is read/writable. 2050Sstevel@tonic-gate * buffer is used to write/read in at most MDDB_VERIFY_SIZE block 2060Sstevel@tonic-gate * chunks. 2070Sstevel@tonic-gate * 2080Sstevel@tonic-gate * A side-effect of this loop is to zero out the entire mddb region 2090Sstevel@tonic-gate */ 2100Sstevel@tonic-gate if ((buffer = Zalloc(MDDB_VERIFY_SIZE * DEV_BSIZE)) == NULL) 2110Sstevel@tonic-gate return (mdsyserror(ep, ENOMEM, np->rname)); 2120Sstevel@tonic-gate 2130Sstevel@tonic-gate startblk = firstblk; 2140Sstevel@tonic-gate for (cnt = dbsize; cnt > 0; cnt -= consecutive) { 2150Sstevel@tonic-gate 2160Sstevel@tonic-gate if (cnt > MDDB_VERIFY_SIZE) 2170Sstevel@tonic-gate consecutive = MDDB_VERIFY_SIZE; 2180Sstevel@tonic-gate else 2190Sstevel@tonic-gate consecutive = cnt; 2200Sstevel@tonic-gate 2210Sstevel@tonic-gate if (lseek(fd, (off_t)(startblk * DEV_BSIZE), SEEK_SET) < 0) { 2220Sstevel@tonic-gate Free(buffer); 2230Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname)); 2240Sstevel@tonic-gate } 2250Sstevel@tonic-gate 2260Sstevel@tonic-gate iosize = DEV_BSIZE * consecutive; 2270Sstevel@tonic-gate if (write(fd, buffer, iosize) != iosize) { 2280Sstevel@tonic-gate Free(buffer); 2290Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname)); 2300Sstevel@tonic-gate } 2310Sstevel@tonic-gate 2320Sstevel@tonic-gate if (lseek(fd, (off_t)(startblk * DEV_BSIZE), SEEK_SET) < 0) { 2330Sstevel@tonic-gate Free(buffer); 2340Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname)); 2350Sstevel@tonic-gate } 2360Sstevel@tonic-gate 2370Sstevel@tonic-gate if (read(fd, buffer, iosize) != iosize) { 2380Sstevel@tonic-gate Free(buffer); 2390Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname)); 2400Sstevel@tonic-gate } 2410Sstevel@tonic-gate 2420Sstevel@tonic-gate startblk += consecutive; 2430Sstevel@tonic-gate } 2440Sstevel@tonic-gate 2450Sstevel@tonic-gate Free(buffer); 2460Sstevel@tonic-gate if ((mb = Zalloc(DEV_BSIZE)) == NULL) 2470Sstevel@tonic-gate return (mdsyserror(ep, ENOMEM, np->rname)); 2480Sstevel@tonic-gate 2490Sstevel@tonic-gate if (meta_gettimeofday(&tp) == -1) { 2500Sstevel@tonic-gate Free(mb); 2510Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname)); 2520Sstevel@tonic-gate } 2530Sstevel@tonic-gate 2540Sstevel@tonic-gate mb->mb_magic = MDDB_MAGIC_MB; 2550Sstevel@tonic-gate /* 2560Sstevel@tonic-gate * If a MN diskset, set master block revision for a MN set. 2570Sstevel@tonic-gate * Even though the master block structure is no different 2580Sstevel@tonic-gate * for a MN set, setting the revision field to a different 2590Sstevel@tonic-gate * number keeps any pre-MN_diskset code from accessing 2600Sstevel@tonic-gate * this diskset. It also allows for an early determination 2610Sstevel@tonic-gate * of a MN diskset when reading in from disk so that the 2620Sstevel@tonic-gate * proper size locator block and locator names structure 2630Sstevel@tonic-gate * can be read in thus saving time on diskset startup. 2640Sstevel@tonic-gate */ 2650Sstevel@tonic-gate if (mn_set) 2660Sstevel@tonic-gate mb->mb_revision = MDDB_REV_MNMB; 2670Sstevel@tonic-gate else 2680Sstevel@tonic-gate mb->mb_revision = MDDB_REV_MB; 2690Sstevel@tonic-gate mb->mb_timestamp = tp; 2700Sstevel@tonic-gate mb->mb_setno = sp->setno; 2710Sstevel@tonic-gate mb->mb_blkcnt = dbsize - 1; 2720Sstevel@tonic-gate mb->mb_blkno = firstblk; 2730Sstevel@tonic-gate mb->mb_nextblk = 0; 2740Sstevel@tonic-gate 2750Sstevel@tonic-gate mb->mb_blkmap.m_firstblk = firstblk + 1; 2760Sstevel@tonic-gate mb->mb_blkmap.m_consecutive = dbsize - 1; 2770Sstevel@tonic-gate if (! metaislocalset(sp)) { 2780Sstevel@tonic-gate mb->mb_setcreatetime = inittime; 2790Sstevel@tonic-gate } 2800Sstevel@tonic-gate 2810Sstevel@tonic-gate /* 2820Sstevel@tonic-gate * We try to save the disks device ID into the remaining bytes in 2830Sstevel@tonic-gate * the master block. The saved devid is used to provide a mapping 2840Sstevel@tonic-gate * between this disk's devid and the devid stored into the master 2850Sstevel@tonic-gate * block. This allows the disk image to be self-identifying 2860Sstevel@tonic-gate * if it gets copied (e.g. SNDR, True Copy, etc.). This is used 2870Sstevel@tonic-gate * when we try to import these disks on the remote copied image. 2880Sstevel@tonic-gate * If we cannot save the disks device ID onto the master block that is 2890Sstevel@tonic-gate * ok. The disk is just not self-identifying and won't be importable 2900Sstevel@tonic-gate * in the remote copy scenario. 2910Sstevel@tonic-gate */ 2920Sstevel@tonic-gate if (devid_get(fd, &devid) == 0) { 2930Sstevel@tonic-gate size_t len; 2940Sstevel@tonic-gate 2950Sstevel@tonic-gate len = devid_sizeof(devid); 2960Sstevel@tonic-gate if (len <= DEV_BSIZE - sizeof (*mb)) { 2970Sstevel@tonic-gate /* there is enough space to store the devid */ 2980Sstevel@tonic-gate mb->mb_devid_magic = MDDB_MAGIC_DE; 2990Sstevel@tonic-gate mb->mb_devid_len = len; 3000Sstevel@tonic-gate (void) memcpy(mb->mb_devid, devid, len); 3010Sstevel@tonic-gate } 3020Sstevel@tonic-gate devid_free(devid); 3030Sstevel@tonic-gate } 3040Sstevel@tonic-gate 3050Sstevel@tonic-gate crcgen((uchar_t *)mb, (uint_t *)&mb->mb_checksum, (uint_t)DEV_BSIZE, 3060Sstevel@tonic-gate (crc_skip_t *)NULL); 3070Sstevel@tonic-gate 3080Sstevel@tonic-gate if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0) { 3090Sstevel@tonic-gate Free(mb); 3100Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname)); 3110Sstevel@tonic-gate } 3120Sstevel@tonic-gate 3130Sstevel@tonic-gate if (write(fd, mb, DEV_BSIZE) != DEV_BSIZE) { 3140Sstevel@tonic-gate Free(mb); 3150Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname)); 3160Sstevel@tonic-gate } 3170Sstevel@tonic-gate 3180Sstevel@tonic-gate if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0) { 3190Sstevel@tonic-gate Free(mb); 3200Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname)); 3210Sstevel@tonic-gate } 3220Sstevel@tonic-gate 3230Sstevel@tonic-gate if (read(fd, mb, DEV_BSIZE) != DEV_BSIZE) { 3240Sstevel@tonic-gate Free(mb); 3250Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname)); 3260Sstevel@tonic-gate } 3270Sstevel@tonic-gate 3280Sstevel@tonic-gate if (crcchk((uchar_t *)mb, (uint_t *)&mb->mb_checksum, 329*5109Spetede (uint_t)DEV_BSIZE, (crc_skip_t *)NULL)) { 3300Sstevel@tonic-gate Free(mb); 3310Sstevel@tonic-gate return (mdmddberror(ep, MDE_NOTVERIFIED, 332*5109Spetede meta_getminor(np->dev), sp->setno, 0, np->rname)); 3330Sstevel@tonic-gate } 3340Sstevel@tonic-gate 3350Sstevel@tonic-gate Free(mb); 3360Sstevel@tonic-gate return (0); 3370Sstevel@tonic-gate } 3380Sstevel@tonic-gate 3390Sstevel@tonic-gate void 3400Sstevel@tonic-gate meta_mkdummymaster( 3410Sstevel@tonic-gate mdsetname_t *sp, 3420Sstevel@tonic-gate int fd, 3430Sstevel@tonic-gate daddr_t firstblk 3440Sstevel@tonic-gate ) 3450Sstevel@tonic-gate { 3460Sstevel@tonic-gate md_timeval32_t tp; 3470Sstevel@tonic-gate struct mddb_mb *mb; 3480Sstevel@tonic-gate ddi_devid_t devid; 3490Sstevel@tonic-gate md_set_desc *sd; 3500Sstevel@tonic-gate md_error_t ep = mdnullerror; 3510Sstevel@tonic-gate md_timeval32_t inittime; 3520Sstevel@tonic-gate 3530Sstevel@tonic-gate /* 3540Sstevel@tonic-gate * No dummy master blocks are written for a MN diskset since devids 3550Sstevel@tonic-gate * are not supported in MN disksets. 3560Sstevel@tonic-gate */ 3570Sstevel@tonic-gate if (! metaislocalset(sp)) { 3580Sstevel@tonic-gate if ((sd = metaget_setdesc(sp, &ep)) == NULL) 3590Sstevel@tonic-gate return; 3600Sstevel@tonic-gate 3610Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) 3620Sstevel@tonic-gate return; 3630Sstevel@tonic-gate } 3640Sstevel@tonic-gate 3650Sstevel@tonic-gate if ((mb = Zalloc(DEV_BSIZE)) == NULL) 3660Sstevel@tonic-gate return; 3670Sstevel@tonic-gate 3680Sstevel@tonic-gate mb->mb_magic = MDDB_MAGIC_DU; 3690Sstevel@tonic-gate mb->mb_revision = MDDB_REV_MB; 3700Sstevel@tonic-gate mb->mb_setno = sp->setno; 3710Sstevel@tonic-gate inittime = meta_get_lb_inittime(sp, &ep); 3720Sstevel@tonic-gate mb->mb_setcreatetime = inittime; 3730Sstevel@tonic-gate 3740Sstevel@tonic-gate if (meta_gettimeofday(&tp) != -1) 3750Sstevel@tonic-gate mb->mb_timestamp = tp; 3760Sstevel@tonic-gate 3770Sstevel@tonic-gate /* 3780Sstevel@tonic-gate * We try to save the disks device ID into the remaining bytes in 3790Sstevel@tonic-gate * the master block. This allows the disk image to be self-identifying 3800Sstevel@tonic-gate * if it gets copied (e.g. SNDR, True Copy, etc.). This is used 3810Sstevel@tonic-gate * when we try to import these disks on the remote copied image. 3820Sstevel@tonic-gate * If we cannot save the disks device ID onto the master block that is 3830Sstevel@tonic-gate * ok. The disk is just not self-identifying and won't be importable 3840Sstevel@tonic-gate * in the remote copy scenario. 3850Sstevel@tonic-gate */ 3860Sstevel@tonic-gate if (devid_get(fd, &devid) == 0) { 3870Sstevel@tonic-gate int len; 3880Sstevel@tonic-gate 3890Sstevel@tonic-gate len = devid_sizeof(devid); 3900Sstevel@tonic-gate if (len <= DEV_BSIZE - sizeof (*mb)) { 3910Sstevel@tonic-gate /* there is enough space to store the devid */ 3920Sstevel@tonic-gate mb->mb_devid_magic = MDDB_MAGIC_DE; 3930Sstevel@tonic-gate mb->mb_devid_len = len; 3940Sstevel@tonic-gate (void) memcpy(mb->mb_devid, (char *)devid, len); 3950Sstevel@tonic-gate } 3960Sstevel@tonic-gate devid_free(devid); 3970Sstevel@tonic-gate } 3980Sstevel@tonic-gate 3990Sstevel@tonic-gate crcgen((uchar_t *)mb, (uint_t *)&mb->mb_checksum, (uint_t)DEV_BSIZE, 4000Sstevel@tonic-gate (crc_skip_t *)NULL); 4010Sstevel@tonic-gate 4020Sstevel@tonic-gate /* 4030Sstevel@tonic-gate * If any of these operations fail, we need to inform the 4040Sstevel@tonic-gate * user that the disk won't be self identifying. When support 4050Sstevel@tonic-gate * for importing remotely replicated disksets is added, we 4060Sstevel@tonic-gate * want to add the error messages here. 4070Sstevel@tonic-gate */ 4080Sstevel@tonic-gate if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0) 4090Sstevel@tonic-gate goto out; 4100Sstevel@tonic-gate 4110Sstevel@tonic-gate if (write(fd, mb, DEV_BSIZE) != DEV_BSIZE) 4120Sstevel@tonic-gate goto out; 4130Sstevel@tonic-gate 4140Sstevel@tonic-gate if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0) 4150Sstevel@tonic-gate goto out; 4160Sstevel@tonic-gate 4170Sstevel@tonic-gate if (read(fd, mb, DEV_BSIZE) != DEV_BSIZE) 4180Sstevel@tonic-gate goto out; 4190Sstevel@tonic-gate 4200Sstevel@tonic-gate if (crcchk((uchar_t *)mb, (uint_t *)&mb->mb_checksum, 4210Sstevel@tonic-gate (uint_t)DEV_BSIZE, (crc_skip_t *)NULL)) 4220Sstevel@tonic-gate goto out; 4230Sstevel@tonic-gate 4240Sstevel@tonic-gate out: 4250Sstevel@tonic-gate Free(mb); 4260Sstevel@tonic-gate } 4270Sstevel@tonic-gate 4280Sstevel@tonic-gate static int 4290Sstevel@tonic-gate buildconf(mdsetname_t *sp, md_error_t *ep) 4300Sstevel@tonic-gate { 4310Sstevel@tonic-gate md_replicalist_t *rlp = NULL; 4320Sstevel@tonic-gate md_replicalist_t *rl; 4330Sstevel@tonic-gate FILE *cfp = NULL; 4340Sstevel@tonic-gate FILE *mfp = NULL; 4350Sstevel@tonic-gate struct stat sbuf; 4360Sstevel@tonic-gate int rval = 0; 4370Sstevel@tonic-gate int in_miniroot = 0; 4380Sstevel@tonic-gate char line[MDDB_BOOTLIST_MAX_LEN]; 4390Sstevel@tonic-gate char *tname = NULL; 4400Sstevel@tonic-gate 4410Sstevel@tonic-gate /* get list of local replicas */ 4420Sstevel@tonic-gate if (! metaislocalset(sp)) 4430Sstevel@tonic-gate return (0); 4440Sstevel@tonic-gate 4450Sstevel@tonic-gate if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0) 4460Sstevel@tonic-gate return (-1); 4470Sstevel@tonic-gate 4480Sstevel@tonic-gate /* open tempfile, copy permissions of original file */ 4490Sstevel@tonic-gate if ((cfp = fopen(META_DBCONFTMP, "w+")) == NULL) { 4500Sstevel@tonic-gate /* 4510Sstevel@tonic-gate * On the miniroot tmp files must be created in /var/tmp. 4520Sstevel@tonic-gate * If we get a EROFS error, we assume that we are in the 4530Sstevel@tonic-gate * miniroot. 4540Sstevel@tonic-gate */ 4550Sstevel@tonic-gate if (errno != EROFS) 4560Sstevel@tonic-gate goto error; 4570Sstevel@tonic-gate in_miniroot = 1; 4580Sstevel@tonic-gate errno = 0; 4590Sstevel@tonic-gate tname = tempnam("/var/tmp", "slvm_"); 4600Sstevel@tonic-gate if (tname == NULL && errno == EROFS) { 4610Sstevel@tonic-gate /* 4620Sstevel@tonic-gate * If we are booted on a read-only root because 4630Sstevel@tonic-gate * of mddb quorum problems we don't want to emit 4640Sstevel@tonic-gate * any scary error messages. 4650Sstevel@tonic-gate */ 4660Sstevel@tonic-gate errno = 0; 4670Sstevel@tonic-gate goto out; 4680Sstevel@tonic-gate } 4690Sstevel@tonic-gate 4700Sstevel@tonic-gate /* open tempfile, copy permissions of original file */ 4710Sstevel@tonic-gate if ((cfp = fopen(tname, "w+")) == NULL) 4720Sstevel@tonic-gate goto error; 4730Sstevel@tonic-gate } 4740Sstevel@tonic-gate if (stat(META_DBCONF, &sbuf) == 0) { 4750Sstevel@tonic-gate if (fchmod(fileno(cfp), (sbuf.st_mode & 0666)) != 0) 4760Sstevel@tonic-gate goto error; 4770Sstevel@tonic-gate if (fchown(fileno(cfp), sbuf.st_uid, sbuf.st_gid) != 0) 4780Sstevel@tonic-gate goto error; 4790Sstevel@tonic-gate } 4800Sstevel@tonic-gate 4810Sstevel@tonic-gate /* print header */ 4820Sstevel@tonic-gate if (fprintf(cfp, "#metadevice database location file ") == EOF) 4830Sstevel@tonic-gate goto error; 4840Sstevel@tonic-gate if (fprintf(cfp, "do not hand edit\n") < 0) 4850Sstevel@tonic-gate goto error; 4860Sstevel@tonic-gate if (fprintf(cfp, 487*5109Spetede "#driver\tminor_t\tdaddr_t\tdevice id\tchecksum\n") < 0) 4880Sstevel@tonic-gate goto error; 4890Sstevel@tonic-gate 4900Sstevel@tonic-gate /* dump replicas */ 4910Sstevel@tonic-gate for (rl = rlp; (rl != NULL); rl = rl->rl_next) { 4920Sstevel@tonic-gate md_replica_t *r = rl->rl_repp; 4930Sstevel@tonic-gate int checksum = 42; 4940Sstevel@tonic-gate int i; 4950Sstevel@tonic-gate char *devidp; 4960Sstevel@tonic-gate minor_t min; 4970Sstevel@tonic-gate 4980Sstevel@tonic-gate devidp = devid_str_encode(r->r_devid, r->r_minor_name); 4990Sstevel@tonic-gate /* If devid code can't encode devidp - skip entry */ 5000Sstevel@tonic-gate if (devidp == NULL) { 5010Sstevel@tonic-gate continue; 5020Sstevel@tonic-gate } 5030Sstevel@tonic-gate 5040Sstevel@tonic-gate /* compute checksum */ 5050Sstevel@tonic-gate for (i = 0; ((r->r_driver_name[i] != '\0') && 5060Sstevel@tonic-gate (i < sizeof (r->r_driver_name))); i++) { 5070Sstevel@tonic-gate checksum -= r->r_driver_name[i]; 5080Sstevel@tonic-gate } 5090Sstevel@tonic-gate min = meta_getminor(r->r_namep->dev); 5100Sstevel@tonic-gate checksum -= min; 5110Sstevel@tonic-gate checksum -= r->r_blkno; 5120Sstevel@tonic-gate 5130Sstevel@tonic-gate for (i = 0; i < strlen(devidp); i++) { 5140Sstevel@tonic-gate checksum -= devidp[i]; 5150Sstevel@tonic-gate } 5160Sstevel@tonic-gate /* print info */ 5170Sstevel@tonic-gate if (fprintf(cfp, "%s\t%lu\t%ld\t%s\t%d\n", 5180Sstevel@tonic-gate r->r_driver_name, min, r->r_blkno, devidp, checksum) < 0) { 5190Sstevel@tonic-gate goto error; 5200Sstevel@tonic-gate } 5210Sstevel@tonic-gate 5220Sstevel@tonic-gate devid_str_free(devidp); 5230Sstevel@tonic-gate } 5240Sstevel@tonic-gate 5250Sstevel@tonic-gate /* close and rename to real file */ 5260Sstevel@tonic-gate if (fflush(cfp) != 0) 5270Sstevel@tonic-gate goto error; 5280Sstevel@tonic-gate if (fsync(fileno(cfp)) != 0) 5290Sstevel@tonic-gate goto error; 5300Sstevel@tonic-gate if (fclose(cfp) != 0) { 5310Sstevel@tonic-gate cfp = NULL; 5320Sstevel@tonic-gate goto error; 5330Sstevel@tonic-gate } 5340Sstevel@tonic-gate cfp = NULL; 5350Sstevel@tonic-gate 5360Sstevel@tonic-gate /* 5370Sstevel@tonic-gate * Renames don't work in the miniroot since tmpfiles are 5380Sstevel@tonic-gate * created in /var/tmp. Hence we copy the data out. 5390Sstevel@tonic-gate */ 5400Sstevel@tonic-gate 5410Sstevel@tonic-gate if (! in_miniroot) { 5420Sstevel@tonic-gate if (rename(META_DBCONFTMP, META_DBCONF) != 0) 5430Sstevel@tonic-gate goto error; 5440Sstevel@tonic-gate } else { 5450Sstevel@tonic-gate if ((cfp = fopen(tname, "r")) == NULL) 5460Sstevel@tonic-gate goto error; 5470Sstevel@tonic-gate if ((mfp = fopen(META_DBCONF, "w+")) == NULL) 5480Sstevel@tonic-gate goto error; 5490Sstevel@tonic-gate while (fgets(line, MDDB_BOOTLIST_MAX_LEN, cfp) != NULL) { 5500Sstevel@tonic-gate if (fputs(line, mfp) == NULL) 5510Sstevel@tonic-gate goto error; 5520Sstevel@tonic-gate } 5530Sstevel@tonic-gate (void) fclose(cfp); 5540Sstevel@tonic-gate cfp = NULL; 5550Sstevel@tonic-gate if (fflush(mfp) != 0) 5560Sstevel@tonic-gate goto error; 5570Sstevel@tonic-gate if (fsync(fileno(mfp)) != 0) 5580Sstevel@tonic-gate goto error; 5590Sstevel@tonic-gate if (fclose(mfp) != 0) { 5600Sstevel@tonic-gate mfp = NULL; 5610Sstevel@tonic-gate goto error; 5620Sstevel@tonic-gate } 5630Sstevel@tonic-gate /* delete the tempfile */ 5640Sstevel@tonic-gate (void) unlink(tname); 5650Sstevel@tonic-gate } 5660Sstevel@tonic-gate /* success */ 5670Sstevel@tonic-gate rval = 0; 5680Sstevel@tonic-gate goto out; 5690Sstevel@tonic-gate 5700Sstevel@tonic-gate /* tempfile error */ 5710Sstevel@tonic-gate error: 5720Sstevel@tonic-gate rval = (in_miniroot) ? mdsyserror(ep, errno, tname): 573*5109Spetede mdsyserror(ep, errno, META_DBCONFTMP); 5740Sstevel@tonic-gate 5750Sstevel@tonic-gate 5760Sstevel@tonic-gate /* cleanup, return success */ 5770Sstevel@tonic-gate out: 5780Sstevel@tonic-gate if (rlp != NULL) 5790Sstevel@tonic-gate metafreereplicalist(rlp); 5800Sstevel@tonic-gate if ((cfp != NULL) && (fclose(cfp) != 0) && (rval == 0)) { 5810Sstevel@tonic-gate rval = (in_miniroot) ? mdsyserror(ep, errno, tname): 582*5109Spetede mdsyserror(ep, errno, META_DBCONFTMP); 5830Sstevel@tonic-gate } 5840Sstevel@tonic-gate free(tname); 5850Sstevel@tonic-gate return (rval); 5860Sstevel@tonic-gate } 5870Sstevel@tonic-gate 5880Sstevel@tonic-gate /* 5890Sstevel@tonic-gate * check replica for dev 5900Sstevel@tonic-gate */ 5910Sstevel@tonic-gate static int 5920Sstevel@tonic-gate in_replica( 5930Sstevel@tonic-gate mdsetname_t *sp, 5940Sstevel@tonic-gate md_replica_t *rp, 5950Sstevel@tonic-gate mdname_t *np, 5960Sstevel@tonic-gate diskaddr_t slblk, 5970Sstevel@tonic-gate diskaddr_t nblks, 5980Sstevel@tonic-gate md_error_t *ep 5990Sstevel@tonic-gate ) 6000Sstevel@tonic-gate { 6010Sstevel@tonic-gate mdname_t *repnp = rp->r_namep; 6020Sstevel@tonic-gate diskaddr_t rep_sblk = rp->r_blkno; 6030Sstevel@tonic-gate diskaddr_t rep_nblks = rp->r_nblk; 6040Sstevel@tonic-gate 6050Sstevel@tonic-gate /* should be in the same set */ 6060Sstevel@tonic-gate assert(sp != NULL); 6070Sstevel@tonic-gate 6080Sstevel@tonic-gate /* if error in master block, assume whole partition */ 6090Sstevel@tonic-gate if ((rep_sblk == MD_DISKADDR_ERROR) || 6100Sstevel@tonic-gate (rep_nblks == MD_DISKADDR_ERROR)) { 6110Sstevel@tonic-gate rep_sblk = 0; 6120Sstevel@tonic-gate rep_nblks = MD_DISKADDR_ERROR; 6130Sstevel@tonic-gate } 6140Sstevel@tonic-gate 6150Sstevel@tonic-gate /* check overlap */ 6160Sstevel@tonic-gate if (meta_check_overlap( 6170Sstevel@tonic-gate MDB_STR, np, slblk, nblks, repnp, rep_sblk, rep_nblks, ep) != 0) { 6180Sstevel@tonic-gate return (-1); 6190Sstevel@tonic-gate } 6200Sstevel@tonic-gate 6210Sstevel@tonic-gate /* return success */ 6220Sstevel@tonic-gate return (0); 6230Sstevel@tonic-gate } 6240Sstevel@tonic-gate 6250Sstevel@tonic-gate /* 6260Sstevel@tonic-gate * check to see if we're in a replica 6270Sstevel@tonic-gate */ 6280Sstevel@tonic-gate int 6290Sstevel@tonic-gate meta_check_inreplica( 6300Sstevel@tonic-gate mdsetname_t *sp, 6310Sstevel@tonic-gate mdname_t *np, 6320Sstevel@tonic-gate diskaddr_t slblk, 6330Sstevel@tonic-gate diskaddr_t nblks, 6340Sstevel@tonic-gate md_error_t *ep 6350Sstevel@tonic-gate ) 6360Sstevel@tonic-gate { 6370Sstevel@tonic-gate md_replicalist_t *rlp = NULL; 6380Sstevel@tonic-gate md_replicalist_t *rl; 6390Sstevel@tonic-gate int rval = 0; 6400Sstevel@tonic-gate 6410Sstevel@tonic-gate /* should have a set */ 6420Sstevel@tonic-gate assert(sp != NULL); 6430Sstevel@tonic-gate 6440Sstevel@tonic-gate /* for each replica */ 6450Sstevel@tonic-gate if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0) 6460Sstevel@tonic-gate return (-1); 6470Sstevel@tonic-gate for (rl = rlp; (rl != NULL); rl = rl->rl_next) { 6480Sstevel@tonic-gate md_replica_t *rp = rl->rl_repp; 6490Sstevel@tonic-gate 6500Sstevel@tonic-gate /* check replica */ 6510Sstevel@tonic-gate if (in_replica(sp, rp, np, slblk, nblks, ep) != 0) { 6520Sstevel@tonic-gate rval = -1; 6530Sstevel@tonic-gate break; 6540Sstevel@tonic-gate } 6550Sstevel@tonic-gate } 6560Sstevel@tonic-gate 6570Sstevel@tonic-gate /* cleanup, return success */ 6580Sstevel@tonic-gate metafreereplicalist(rlp); 6590Sstevel@tonic-gate return (rval); 6600Sstevel@tonic-gate } 6610Sstevel@tonic-gate 6620Sstevel@tonic-gate /* 6630Sstevel@tonic-gate * check replica 6640Sstevel@tonic-gate */ 6650Sstevel@tonic-gate int 6660Sstevel@tonic-gate meta_check_replica( 6670Sstevel@tonic-gate mdsetname_t *sp, /* set to check against */ 6680Sstevel@tonic-gate mdname_t *np, /* component to check against */ 6690Sstevel@tonic-gate mdchkopts_t options, /* option flags */ 6700Sstevel@tonic-gate diskaddr_t slblk, /* start logical block */ 6710Sstevel@tonic-gate diskaddr_t nblks, /* number of blocks (-1,rest of them) */ 6720Sstevel@tonic-gate md_error_t *ep /* error packet */ 6730Sstevel@tonic-gate ) 6740Sstevel@tonic-gate { 6750Sstevel@tonic-gate mdchkopts_t chkoptions = MDCHK_ALLOW_REPSLICE; 6760Sstevel@tonic-gate 6770Sstevel@tonic-gate /* make sure we have a disk */ 6780Sstevel@tonic-gate if (metachkcomp(np, ep) != 0) 6790Sstevel@tonic-gate return (-1); 6800Sstevel@tonic-gate 6810Sstevel@tonic-gate /* check to ensure that it is not already in use */ 6820Sstevel@tonic-gate if (meta_check_inuse(sp, np, MDCHK_INUSE, ep) != 0) { 6830Sstevel@tonic-gate return (-1); 6840Sstevel@tonic-gate } 6850Sstevel@tonic-gate 6860Sstevel@tonic-gate if (options & MDCHK_ALLOW_NODBS) 6870Sstevel@tonic-gate return (0); 6880Sstevel@tonic-gate 6890Sstevel@tonic-gate if (options & MDCHK_DRVINSET) 6900Sstevel@tonic-gate return (0); 6910Sstevel@tonic-gate 6920Sstevel@tonic-gate /* make sure it is in the set */ 6930Sstevel@tonic-gate if (meta_check_inset(sp, np, ep) != 0) 6940Sstevel@tonic-gate return (-1); 6950Sstevel@tonic-gate 6960Sstevel@tonic-gate /* make sure its not in a metadevice */ 6970Sstevel@tonic-gate if (meta_check_inmeta(sp, np, chkoptions, slblk, nblks, ep) != 0) 6980Sstevel@tonic-gate return (-1); 6990Sstevel@tonic-gate 7000Sstevel@tonic-gate /* return success */ 7010Sstevel@tonic-gate return (0); 7020Sstevel@tonic-gate } 7030Sstevel@tonic-gate 7040Sstevel@tonic-gate static int 7050Sstevel@tonic-gate update_dbinfo_on_drives( 7060Sstevel@tonic-gate mdsetname_t *sp, 7070Sstevel@tonic-gate md_drive_desc *dd, 7080Sstevel@tonic-gate int set_locked, 7090Sstevel@tonic-gate int force, 7100Sstevel@tonic-gate md_error_t *ep 7110Sstevel@tonic-gate ) 7120Sstevel@tonic-gate { 7130Sstevel@tonic-gate md_set_desc *sd; 7140Sstevel@tonic-gate int i; 7150Sstevel@tonic-gate md_setkey_t *cl_sk; 7160Sstevel@tonic-gate int rval = 0; 7170Sstevel@tonic-gate md_mnnode_desc *nd; 7180Sstevel@tonic-gate 7190Sstevel@tonic-gate if ((sd = metaget_setdesc(sp, ep)) == NULL) 7200Sstevel@tonic-gate return (-1); 7210Sstevel@tonic-gate 7220Sstevel@tonic-gate if (! set_locked) { 7230Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) { 7240Sstevel@tonic-gate md_error_t xep = mdnullerror; 7250Sstevel@tonic-gate sigset_t sigs; 7260Sstevel@tonic-gate /* Make sure we are blocking all signals */ 7270Sstevel@tonic-gate if (procsigs(TRUE, &sigs, &xep) < 0) 7280Sstevel@tonic-gate mdclrerror(&xep); 7290Sstevel@tonic-gate 7300Sstevel@tonic-gate nd = sd->sd_nodelist; 7310Sstevel@tonic-gate while (nd) { 7320Sstevel@tonic-gate if (force && strcmp(nd->nd_nodename, 7330Sstevel@tonic-gate mynode()) != 0) { 7340Sstevel@tonic-gate nd = nd->nd_next; 7350Sstevel@tonic-gate continue; 7360Sstevel@tonic-gate } 7370Sstevel@tonic-gate 7380Sstevel@tonic-gate if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) { 7390Sstevel@tonic-gate nd = nd->nd_next; 7400Sstevel@tonic-gate continue; 7410Sstevel@tonic-gate } 7420Sstevel@tonic-gate 7430Sstevel@tonic-gate if (clnt_lock_set(nd->nd_nodename, sp, ep)) 7440Sstevel@tonic-gate return (-1); 7450Sstevel@tonic-gate nd = nd->nd_next; 7460Sstevel@tonic-gate } 7470Sstevel@tonic-gate } else { 7480Sstevel@tonic-gate for (i = 0; i < MD_MAXSIDES; i++) { 7490Sstevel@tonic-gate /* Skip empty slots */ 7500Sstevel@tonic-gate if (sd->sd_nodes[i][0] == '\0') 7510Sstevel@tonic-gate continue; 7520Sstevel@tonic-gate 7530Sstevel@tonic-gate if (force && strcmp(sd->sd_nodes[i], 7540Sstevel@tonic-gate mynode()) != 0) 7550Sstevel@tonic-gate continue; 7560Sstevel@tonic-gate 7570Sstevel@tonic-gate if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) 7580Sstevel@tonic-gate return (-1); 7590Sstevel@tonic-gate } 7600Sstevel@tonic-gate } 7610Sstevel@tonic-gate } 7620Sstevel@tonic-gate 7630Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) { 7640Sstevel@tonic-gate nd = sd->sd_nodelist; 7650Sstevel@tonic-gate while (nd) { 7660Sstevel@tonic-gate if (force && strcmp(nd->nd_nodename, mynode()) != 0) { 7670Sstevel@tonic-gate nd = nd->nd_next; 7680Sstevel@tonic-gate continue; 7690Sstevel@tonic-gate } 7700Sstevel@tonic-gate 7710Sstevel@tonic-gate if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) { 7720Sstevel@tonic-gate nd = nd->nd_next; 7730Sstevel@tonic-gate continue; 7740Sstevel@tonic-gate } 7750Sstevel@tonic-gate 7760Sstevel@tonic-gate if (clnt_upd_dr_dbinfo(nd->nd_nodename, sp, dd, ep) 7770Sstevel@tonic-gate == -1) { 7780Sstevel@tonic-gate rval = -1; 7790Sstevel@tonic-gate break; 7800Sstevel@tonic-gate } 7810Sstevel@tonic-gate nd = nd->nd_next; 7820Sstevel@tonic-gate } 7830Sstevel@tonic-gate } else { 7840Sstevel@tonic-gate for (i = 0; i < MD_MAXSIDES; i++) { 7850Sstevel@tonic-gate /* Skip empty slots */ 7860Sstevel@tonic-gate if (sd->sd_nodes[i][0] == '\0') 7870Sstevel@tonic-gate continue; 7880Sstevel@tonic-gate 7890Sstevel@tonic-gate if (force && strcmp(sd->sd_nodes[i], mynode()) != 0) 7900Sstevel@tonic-gate continue; 7910Sstevel@tonic-gate 7920Sstevel@tonic-gate if (clnt_upd_dr_dbinfo(sd->sd_nodes[i], sp, dd, ep) 7930Sstevel@tonic-gate == -1) { 7940Sstevel@tonic-gate rval = -1; 7950Sstevel@tonic-gate break; 7960Sstevel@tonic-gate } 7970Sstevel@tonic-gate } 7980Sstevel@tonic-gate } 7990Sstevel@tonic-gate 8000Sstevel@tonic-gate if (! set_locked) { 8010Sstevel@tonic-gate cl_sk = cl_get_setkey(sp->setno, sp->setname); 8020Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) { 8030Sstevel@tonic-gate nd = sd->sd_nodelist; 8040Sstevel@tonic-gate while (nd) { 8050Sstevel@tonic-gate if (force && 8060Sstevel@tonic-gate strcmp(nd->nd_nodename, mynode()) != 0) { 8070Sstevel@tonic-gate nd = nd->nd_next; 8080Sstevel@tonic-gate continue; 8090Sstevel@tonic-gate } 8100Sstevel@tonic-gate 8110Sstevel@tonic-gate if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) { 8120Sstevel@tonic-gate nd = nd->nd_next; 8130Sstevel@tonic-gate continue; 8140Sstevel@tonic-gate } 8150Sstevel@tonic-gate 8160Sstevel@tonic-gate if (clnt_unlock_set(nd->nd_nodename, cl_sk, 8170Sstevel@tonic-gate ep)) { 8180Sstevel@tonic-gate rval = -1; 8190Sstevel@tonic-gate break; 8200Sstevel@tonic-gate } 8210Sstevel@tonic-gate nd = nd->nd_next; 8220Sstevel@tonic-gate } 8230Sstevel@tonic-gate } else { 8240Sstevel@tonic-gate for (i = 0; i < MD_MAXSIDES; i++) { 8250Sstevel@tonic-gate /* Skip empty slots */ 8260Sstevel@tonic-gate if (sd->sd_nodes[i][0] == '\0') 8270Sstevel@tonic-gate continue; 8280Sstevel@tonic-gate 8290Sstevel@tonic-gate if (force && 8300Sstevel@tonic-gate strcmp(sd->sd_nodes[i], mynode()) != 0) 8310Sstevel@tonic-gate continue; 8320Sstevel@tonic-gate 8330Sstevel@tonic-gate if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, 8340Sstevel@tonic-gate ep)) { 8350Sstevel@tonic-gate rval = -1; 8360Sstevel@tonic-gate break; 8370Sstevel@tonic-gate } 8380Sstevel@tonic-gate } 8390Sstevel@tonic-gate 8400Sstevel@tonic-gate } 8410Sstevel@tonic-gate cl_set_setkey(NULL); 8420Sstevel@tonic-gate } 8430Sstevel@tonic-gate 8440Sstevel@tonic-gate return (rval); 8450Sstevel@tonic-gate } 8460Sstevel@tonic-gate 8470Sstevel@tonic-gate int 8480Sstevel@tonic-gate meta_db_addsidenms( 8490Sstevel@tonic-gate mdsetname_t *sp, 8500Sstevel@tonic-gate mdname_t *np, 8510Sstevel@tonic-gate daddr_t blkno, 8520Sstevel@tonic-gate int bcast, 8530Sstevel@tonic-gate md_error_t *ep 8540Sstevel@tonic-gate ) 8550Sstevel@tonic-gate { 8560Sstevel@tonic-gate side_t sideno; 8570Sstevel@tonic-gate char *bname = NULL; 8580Sstevel@tonic-gate char *dname = NULL; 8590Sstevel@tonic-gate minor_t mnum; 8600Sstevel@tonic-gate mddb_config_t c; 8610Sstevel@tonic-gate int done; 8620Sstevel@tonic-gate int rval = 0; 8630Sstevel@tonic-gate md_set_desc *sd; 8640Sstevel@tonic-gate 8650Sstevel@tonic-gate sideno = MD_SIDEWILD; 8660Sstevel@tonic-gate /*CONSTCOND*/ 8670Sstevel@tonic-gate while (1) { 8680Sstevel@tonic-gate if (bname != NULL) { 8690Sstevel@tonic-gate Free(bname); 8700Sstevel@tonic-gate bname = NULL; 8710Sstevel@tonic-gate } 8720Sstevel@tonic-gate if (dname != NULL) { 8730Sstevel@tonic-gate Free(dname); 8740Sstevel@tonic-gate dname = NULL; 8750Sstevel@tonic-gate } 8760Sstevel@tonic-gate if ((done = meta_getnextside_devinfo(sp, np->bname, 8770Sstevel@tonic-gate &sideno, &bname, &dname, &mnum, ep)) == -1) { 8780Sstevel@tonic-gate rval = -1; 8790Sstevel@tonic-gate break; 8800Sstevel@tonic-gate } 8810Sstevel@tonic-gate 8820Sstevel@tonic-gate if (done == 0) 8830Sstevel@tonic-gate break; 8840Sstevel@tonic-gate 8850Sstevel@tonic-gate if (! metaislocalset(sp)) { 8860Sstevel@tonic-gate if ((sd = metaget_setdesc(sp, ep)) == NULL) { 8870Sstevel@tonic-gate rval = -1; 8880Sstevel@tonic-gate break; 8890Sstevel@tonic-gate } 8900Sstevel@tonic-gate } 8910Sstevel@tonic-gate 8920Sstevel@tonic-gate /* 8930Sstevel@tonic-gate * Send addsidenms to all nodes using rpc.mdcommd if 8940Sstevel@tonic-gate * sidename is being added to MN diskset. 8950Sstevel@tonic-gate * 8960Sstevel@tonic-gate * It's ok to broadcast this call to other nodes. 8970Sstevel@tonic-gate * 8980Sstevel@tonic-gate * Note: The broadcast to other nodes isn't needed during 8990Sstevel@tonic-gate * the addition of the first mddbs to the set since the 9000Sstevel@tonic-gate * other nodes haven't been joined to the set yet. All 9010Sstevel@tonic-gate * nodes in a MN diskset are (implicitly) joined to the set 9020Sstevel@tonic-gate * on the addition of the first mddb. 9030Sstevel@tonic-gate */ 9040Sstevel@tonic-gate if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) && 9050Sstevel@tonic-gate (bcast == DB_ADDSIDENMS_BCAST)) { 9060Sstevel@tonic-gate md_mn_result_t *resultp = NULL; 9070Sstevel@tonic-gate md_mn_msg_meta_db_newside_t db_ns; 9080Sstevel@tonic-gate int send_rval; 9090Sstevel@tonic-gate 9100Sstevel@tonic-gate db_ns.msg_l_dev = np->dev; 9110Sstevel@tonic-gate db_ns.msg_sideno = sideno; 9120Sstevel@tonic-gate db_ns.msg_blkno = blkno; 9130Sstevel@tonic-gate (void) strncpy(db_ns.msg_dname, dname, 9140Sstevel@tonic-gate sizeof (db_ns.msg_dname)); 9150Sstevel@tonic-gate (void) splitname(np->bname, &db_ns.msg_splitname); 9160Sstevel@tonic-gate db_ns.msg_mnum = mnum; 9170Sstevel@tonic-gate 9180Sstevel@tonic-gate /* Set devid to NULL until devids are supported */ 9190Sstevel@tonic-gate db_ns.msg_devid[0] = NULL; 9200Sstevel@tonic-gate 9210Sstevel@tonic-gate /* 9220Sstevel@tonic-gate * If reconfig cycle has been started, this node is 9230Sstevel@tonic-gate * stuck in in the return step until this command has 9240Sstevel@tonic-gate * completed. If mdcommd is suspended, ask 9250Sstevel@tonic-gate * send_message to fail (instead of retrying) 9260Sstevel@tonic-gate * so that metaset can finish allowing the reconfig 9270Sstevel@tonic-gate * cycle to proceed. 9280Sstevel@tonic-gate */ 9290Sstevel@tonic-gate send_rval = mdmn_send_message(sp->setno, 9300Sstevel@tonic-gate MD_MN_MSG_META_DB_NEWSIDE, MD_MSGF_FAIL_ON_SUSPEND | 9310Sstevel@tonic-gate MD_MSGF_PANIC_WHEN_INCONSISTENT, (char *)&db_ns, 9320Sstevel@tonic-gate sizeof (md_mn_msg_meta_db_newside_t), 9330Sstevel@tonic-gate &resultp, ep); 9340Sstevel@tonic-gate if (send_rval != 0) { 9350Sstevel@tonic-gate rval = -1; 9360Sstevel@tonic-gate if (resultp == NULL) 9370Sstevel@tonic-gate (void) mddserror(ep, 9380Sstevel@tonic-gate MDE_DS_COMMD_SEND_FAIL, 9390Sstevel@tonic-gate sp->setno, NULL, NULL, 9400Sstevel@tonic-gate sp->setname); 9410Sstevel@tonic-gate else { 9420Sstevel@tonic-gate (void) mdstealerror(ep, 9430Sstevel@tonic-gate &(resultp->mmr_ep)); 9440Sstevel@tonic-gate if (mdisok(ep)) { 9450Sstevel@tonic-gate (void) mddserror(ep, 9460Sstevel@tonic-gate MDE_DS_COMMD_SEND_FAIL, 9470Sstevel@tonic-gate sp->setno, NULL, NULL, 9480Sstevel@tonic-gate sp->setname); 9490Sstevel@tonic-gate } 9500Sstevel@tonic-gate free_result(resultp); 9510Sstevel@tonic-gate } 9520Sstevel@tonic-gate break; 9530Sstevel@tonic-gate } 9540Sstevel@tonic-gate if (resultp) 9550Sstevel@tonic-gate free_result(resultp); 9560Sstevel@tonic-gate } else { 9570Sstevel@tonic-gate /* 9580Sstevel@tonic-gate * Let this side's device name, minor # and driver name 9590Sstevel@tonic-gate * be known to the database replica. 9600Sstevel@tonic-gate */ 9610Sstevel@tonic-gate (void) memset(&c, 0, sizeof (c)); 9620Sstevel@tonic-gate 9630Sstevel@tonic-gate /* Fill in device/replica info */ 9640Sstevel@tonic-gate c.c_locator.l_dev = meta_cmpldev(np->dev); 9650Sstevel@tonic-gate c.c_locator.l_blkno = blkno; 9660Sstevel@tonic-gate (void) strncpy(c.c_locator.l_driver, dname, 9670Sstevel@tonic-gate sizeof (c.c_locator.l_driver)); 968*5109Spetede if (splitname(bname, &c.c_devname) == 969*5109Spetede METASPLIT_LONGDISKNAME && devid_in_use == FALSE) { 970*5109Spetede rval = mddeverror(ep, MDE_DISKNAMETOOLONG, 971*5109Spetede NODEV64, np->rname); 972*5109Spetede break; 973*5109Spetede } 974*5109Spetede 9750Sstevel@tonic-gate c.c_locator.l_mnum = mnum; 9760Sstevel@tonic-gate 9770Sstevel@tonic-gate /* Fill in setno, setname, and sideno */ 9780Sstevel@tonic-gate c.c_setno = sp->setno; 9790Sstevel@tonic-gate (void) strncpy(c.c_setname, sp->setname, 980*5109Spetede sizeof (c.c_setname)); 9810Sstevel@tonic-gate c.c_sideno = sideno; 9820Sstevel@tonic-gate 9830Sstevel@tonic-gate /* 9840Sstevel@tonic-gate * Don't need device id information from this ioctl 9850Sstevel@tonic-gate * Kernel determines device id from dev_t, which 9860Sstevel@tonic-gate * is just what this code would do. 9870Sstevel@tonic-gate */ 9880Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0; 9890Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 9900Sstevel@tonic-gate 9910Sstevel@tonic-gate if (metaioctl(MD_DB_NEWSIDE, &c, &c.c_mde, NULL) != 0) { 9920Sstevel@tonic-gate rval = mdstealerror(ep, &c.c_mde); 9930Sstevel@tonic-gate break; 9940Sstevel@tonic-gate } 9950Sstevel@tonic-gate } 9960Sstevel@tonic-gate } 9970Sstevel@tonic-gate 9980Sstevel@tonic-gate /* cleanup, return success */ 9990Sstevel@tonic-gate if (bname != NULL) { 10000Sstevel@tonic-gate Free(bname); 10010Sstevel@tonic-gate bname = NULL; 10020Sstevel@tonic-gate } 10030Sstevel@tonic-gate if (dname != NULL) { 10040Sstevel@tonic-gate Free(dname); 10050Sstevel@tonic-gate dname = NULL; 10060Sstevel@tonic-gate } 10070Sstevel@tonic-gate return (rval); 10080Sstevel@tonic-gate } 10090Sstevel@tonic-gate 10100Sstevel@tonic-gate 10110Sstevel@tonic-gate int 10120Sstevel@tonic-gate meta_db_delsidenm( 10130Sstevel@tonic-gate mdsetname_t *sp, 10140Sstevel@tonic-gate side_t sideno, 10150Sstevel@tonic-gate mdname_t *np, 10160Sstevel@tonic-gate daddr_t blkno, 10170Sstevel@tonic-gate md_error_t *ep 10180Sstevel@tonic-gate ) 10190Sstevel@tonic-gate { 10200Sstevel@tonic-gate mddb_config_t c; 10210Sstevel@tonic-gate md_set_desc *sd; 10220Sstevel@tonic-gate 10230Sstevel@tonic-gate if (! metaislocalset(sp)) { 10240Sstevel@tonic-gate if ((sd = metaget_setdesc(sp, ep)) == NULL) 10250Sstevel@tonic-gate return (-1); 10260Sstevel@tonic-gate } 10270Sstevel@tonic-gate /* Use rpc.mdcommd to delete mddb side from all nodes */ 10280Sstevel@tonic-gate if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) && 10290Sstevel@tonic-gate (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) { 10300Sstevel@tonic-gate md_mn_result_t *resultp = NULL; 10310Sstevel@tonic-gate md_mn_msg_meta_db_delside_t db_ds; 10320Sstevel@tonic-gate int send_rval; 10330Sstevel@tonic-gate 10340Sstevel@tonic-gate db_ds.msg_l_dev = np->dev; 10350Sstevel@tonic-gate db_ds.msg_blkno = blkno; 10360Sstevel@tonic-gate db_ds.msg_sideno = sideno; 10370Sstevel@tonic-gate 10380Sstevel@tonic-gate /* Set devid to NULL until devids are supported */ 10390Sstevel@tonic-gate db_ds.msg_devid[0] = NULL; 10400Sstevel@tonic-gate 10410Sstevel@tonic-gate /* 10420Sstevel@tonic-gate * If reconfig cycle has been started, this node is 10430Sstevel@tonic-gate * stuck in in the return step until this command has 10440Sstevel@tonic-gate * completed. If mdcommd is suspended, ask 10450Sstevel@tonic-gate * send_message to fail (instead of retrying) 10460Sstevel@tonic-gate * so that metaset can finish allowing the reconfig 10470Sstevel@tonic-gate * cycle to proceed. 10480Sstevel@tonic-gate */ 10490Sstevel@tonic-gate send_rval = mdmn_send_message(sp->setno, 10500Sstevel@tonic-gate MD_MN_MSG_META_DB_DELSIDE, MD_MSGF_FAIL_ON_SUSPEND | 10510Sstevel@tonic-gate MD_MSGF_PANIC_WHEN_INCONSISTENT, (char *)&db_ds, 10520Sstevel@tonic-gate sizeof (md_mn_msg_meta_db_delside_t), &resultp, ep); 10530Sstevel@tonic-gate if (send_rval != 0) { 10540Sstevel@tonic-gate if (resultp == NULL) 10550Sstevel@tonic-gate (void) mddserror(ep, 10560Sstevel@tonic-gate MDE_DS_COMMD_SEND_FAIL, 10570Sstevel@tonic-gate sp->setno, NULL, NULL, 10580Sstevel@tonic-gate sp->setname); 10590Sstevel@tonic-gate else { 10600Sstevel@tonic-gate (void) mdstealerror(ep, &(resultp->mmr_ep)); 10610Sstevel@tonic-gate if (mdisok(ep)) { 10620Sstevel@tonic-gate (void) mddserror(ep, 10630Sstevel@tonic-gate MDE_DS_COMMD_SEND_FAIL, 10640Sstevel@tonic-gate sp->setno, NULL, NULL, 10650Sstevel@tonic-gate sp->setname); 10660Sstevel@tonic-gate } 10670Sstevel@tonic-gate free_result(resultp); 10680Sstevel@tonic-gate } 10690Sstevel@tonic-gate return (-1); 10700Sstevel@tonic-gate } 10710Sstevel@tonic-gate if (resultp) 10720Sstevel@tonic-gate free_result(resultp); 10730Sstevel@tonic-gate 10740Sstevel@tonic-gate } else { 10750Sstevel@tonic-gate /* 10760Sstevel@tonic-gate * Let this side's device name, minor # and driver name 10770Sstevel@tonic-gate * be known to the database replica. 10780Sstevel@tonic-gate */ 10790Sstevel@tonic-gate (void) memset(&c, 0, sizeof (c)); 10800Sstevel@tonic-gate 10810Sstevel@tonic-gate /* Fill in device/replica info */ 10820Sstevel@tonic-gate c.c_locator.l_dev = meta_cmpldev(np->dev); 10830Sstevel@tonic-gate c.c_locator.l_blkno = blkno; 10840Sstevel@tonic-gate 10850Sstevel@tonic-gate /* Fill in setno, setname, and sideno */ 10860Sstevel@tonic-gate c.c_setno = sp->setno; 10870Sstevel@tonic-gate (void) strcpy(c.c_setname, sp->setname); 10880Sstevel@tonic-gate c.c_sideno = sideno; 10890Sstevel@tonic-gate 10900Sstevel@tonic-gate /* 10910Sstevel@tonic-gate * Don't need device id information from this ioctl 10920Sstevel@tonic-gate * Kernel determines device id from dev_t, which 10930Sstevel@tonic-gate * is just what this code would do. 10940Sstevel@tonic-gate */ 10950Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0; 10960Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 10970Sstevel@tonic-gate 10980Sstevel@tonic-gate if (metaioctl(MD_DB_DELSIDE, &c, &c.c_mde, NULL) != 0) 10990Sstevel@tonic-gate return (mdstealerror(ep, &c.c_mde)); 11000Sstevel@tonic-gate } 11010Sstevel@tonic-gate return (0); 11020Sstevel@tonic-gate } 11030Sstevel@tonic-gate 11040Sstevel@tonic-gate 11050Sstevel@tonic-gate static int 11060Sstevel@tonic-gate mdnamesareunique(mdnamelist_t *nlp, md_error_t *ep) 11070Sstevel@tonic-gate { 11080Sstevel@tonic-gate mdnamelist_t *dnp1, *dnp2; 11090Sstevel@tonic-gate 11100Sstevel@tonic-gate for (dnp1 = nlp; dnp1 != NULL; dnp1 = dnp1->next) { 11110Sstevel@tonic-gate for (dnp2 = dnp1->next; dnp2 != NULL; dnp2 = dnp2->next) { 11120Sstevel@tonic-gate if (strcmp(dnp1->namep->cname, dnp2->namep->cname) == 0) 11130Sstevel@tonic-gate return (mderror(ep, MDE_DUPDRIVE, 11140Sstevel@tonic-gate dnp1->namep->cname)); 11150Sstevel@tonic-gate } 11160Sstevel@tonic-gate } 11170Sstevel@tonic-gate return (0); 11180Sstevel@tonic-gate } 11190Sstevel@tonic-gate 11200Sstevel@tonic-gate 11210Sstevel@tonic-gate /* 11220Sstevel@tonic-gate * Return 1 if files are different, else return 0 11230Sstevel@tonic-gate */ 11240Sstevel@tonic-gate static int 11250Sstevel@tonic-gate filediff(char *tsname, char *sname) 11260Sstevel@tonic-gate { 11270Sstevel@tonic-gate int ret = 1, fd; 11280Sstevel@tonic-gate size_t tsz, sz; 11290Sstevel@tonic-gate struct stat sbuf; 11300Sstevel@tonic-gate char *tbuf, *buf; 11310Sstevel@tonic-gate 11320Sstevel@tonic-gate if (stat(tsname, &sbuf) != 0) 11330Sstevel@tonic-gate return (1); 11340Sstevel@tonic-gate tsz = sbuf.st_size; 11350Sstevel@tonic-gate if (stat(sname, &sbuf) != 0) 11360Sstevel@tonic-gate return (1); 11370Sstevel@tonic-gate sz = sbuf.st_size; 11380Sstevel@tonic-gate if (tsz != sz) 11390Sstevel@tonic-gate return (1); 11400Sstevel@tonic-gate 11410Sstevel@tonic-gate /* allocate memory and read both files into buffer */ 11420Sstevel@tonic-gate tbuf = malloc(tsz); 11430Sstevel@tonic-gate buf = malloc(sz); 11440Sstevel@tonic-gate if (tbuf == NULL || buf == NULL) 11450Sstevel@tonic-gate goto out; 11460Sstevel@tonic-gate 11470Sstevel@tonic-gate fd = open(tsname, O_RDONLY); 11480Sstevel@tonic-gate if (fd == -1) 11490Sstevel@tonic-gate goto out; 11500Sstevel@tonic-gate sz = read(fd, tbuf, tsz); 11510Sstevel@tonic-gate (void) close(fd); 11520Sstevel@tonic-gate if (sz != tsz) 11530Sstevel@tonic-gate goto out; 11540Sstevel@tonic-gate 11550Sstevel@tonic-gate fd = open(sname, O_RDONLY); 11560Sstevel@tonic-gate if (fd == -1) 11570Sstevel@tonic-gate goto out; 11580Sstevel@tonic-gate sz = read(fd, buf, tsz); 11590Sstevel@tonic-gate (void) close(fd); 11600Sstevel@tonic-gate if (sz != tsz) 11610Sstevel@tonic-gate goto out; 11620Sstevel@tonic-gate 11630Sstevel@tonic-gate /* compare content */ 11640Sstevel@tonic-gate ret = bcmp(tbuf, buf, tsz); 11650Sstevel@tonic-gate out: 11660Sstevel@tonic-gate if (tbuf) 11670Sstevel@tonic-gate free(tbuf); 11680Sstevel@tonic-gate if (buf) 11690Sstevel@tonic-gate free(buf); 11700Sstevel@tonic-gate return (ret); 11710Sstevel@tonic-gate } 11720Sstevel@tonic-gate 11730Sstevel@tonic-gate /* 11740Sstevel@tonic-gate * patch md.conf file with mddb locations 11750Sstevel@tonic-gate */ 11760Sstevel@tonic-gate int 11770Sstevel@tonic-gate meta_db_patch( 11780Sstevel@tonic-gate char *sname, /* system file name */ 11790Sstevel@tonic-gate char *cname, /* mddb.cf file name */ 11800Sstevel@tonic-gate int patch, /* patching locally */ 11810Sstevel@tonic-gate md_error_t *ep 11820Sstevel@tonic-gate ) 11830Sstevel@tonic-gate { 11840Sstevel@tonic-gate char *tsname = NULL; 11850Sstevel@tonic-gate char line[MDDB_BOOTLIST_MAX_LEN]; 11860Sstevel@tonic-gate FILE *tsfp = NULL; 11870Sstevel@tonic-gate FILE *mfp = NULL; 11880Sstevel@tonic-gate int rval = -1; 11890Sstevel@tonic-gate 11900Sstevel@tonic-gate /* check names */ 11910Sstevel@tonic-gate if (sname == NULL) { 11920Sstevel@tonic-gate if (patch) 11930Sstevel@tonic-gate sname = "md.conf"; 11940Sstevel@tonic-gate else 11950Sstevel@tonic-gate sname = "/kernel/drv/md.conf"; 11960Sstevel@tonic-gate } 11970Sstevel@tonic-gate if (cname == NULL) 11980Sstevel@tonic-gate cname = META_DBCONF; 11990Sstevel@tonic-gate 12000Sstevel@tonic-gate /* 12010Sstevel@tonic-gate * edit file 12020Sstevel@tonic-gate */ 12030Sstevel@tonic-gate if (meta_systemfile_copy(sname, 0, 1, 1, 0, &tsname, &tsfp, ep) != 0) { 12040Sstevel@tonic-gate if (mdissyserror(ep, EROFS)) { 12050Sstevel@tonic-gate /* 12060Sstevel@tonic-gate * If we are booted on a read-only root because 12070Sstevel@tonic-gate * of mddb quorum problems we don't want to emit 12080Sstevel@tonic-gate * any scary error messages. 12090Sstevel@tonic-gate */ 12100Sstevel@tonic-gate mdclrerror(ep); 12110Sstevel@tonic-gate rval = 0; 12120Sstevel@tonic-gate } 12130Sstevel@tonic-gate goto out; 12140Sstevel@tonic-gate } 12150Sstevel@tonic-gate 12162063Shshaw if (meta_systemfile_append_mddb(cname, sname, tsname, tsfp, 1, 0, 0, 12170Sstevel@tonic-gate ep) != 0) 12180Sstevel@tonic-gate goto out; 12190Sstevel@tonic-gate 12200Sstevel@tonic-gate /* if file content is identical, skip rename */ 12210Sstevel@tonic-gate if (filediff(tsname, sname) == 0) { 12220Sstevel@tonic-gate rval = 0; 12230Sstevel@tonic-gate goto out; 12240Sstevel@tonic-gate } 12250Sstevel@tonic-gate 12260Sstevel@tonic-gate if ((fflush(tsfp) != 0) || (fsync(fileno(tsfp)) != 0) || 1227*5109Spetede (fclose(tsfp) != 0)) { 12280Sstevel@tonic-gate (void) mdsyserror(ep, errno, tsname); 12290Sstevel@tonic-gate goto out; 12300Sstevel@tonic-gate } 12310Sstevel@tonic-gate 12320Sstevel@tonic-gate tsfp = NULL; 12330Sstevel@tonic-gate 12340Sstevel@tonic-gate /* 12350Sstevel@tonic-gate * rename file. If we get a Cross Device error then it 12360Sstevel@tonic-gate * is because we are in the miniroot. 12370Sstevel@tonic-gate */ 12380Sstevel@tonic-gate if (rename(tsname, sname) != 0 && errno != EXDEV) { 12390Sstevel@tonic-gate (void) mdsyserror(ep, errno, sname); 12400Sstevel@tonic-gate goto out; 12410Sstevel@tonic-gate } 12420Sstevel@tonic-gate 12430Sstevel@tonic-gate if (errno == EXDEV) { 12440Sstevel@tonic-gate if ((tsfp = fopen(tsname, "r")) == NULL) 12450Sstevel@tonic-gate goto out; 12460Sstevel@tonic-gate if ((mfp = fopen(sname, "w+")) == NULL) 12470Sstevel@tonic-gate goto out; 12480Sstevel@tonic-gate while (fgets(line, sizeof (line), tsfp) != NULL) { 12490Sstevel@tonic-gate if (fputs(line, mfp) == NULL) 12500Sstevel@tonic-gate goto out; 12510Sstevel@tonic-gate } 12520Sstevel@tonic-gate (void) fclose(tsfp); 12530Sstevel@tonic-gate tsfp = NULL; 12540Sstevel@tonic-gate if (fflush(mfp) != 0) 12550Sstevel@tonic-gate goto out; 12560Sstevel@tonic-gate if (fsync(fileno(mfp)) != 0) 12570Sstevel@tonic-gate goto out; 12580Sstevel@tonic-gate if (fclose(mfp) != 0) { 12590Sstevel@tonic-gate mfp = NULL; 12600Sstevel@tonic-gate goto out; 12610Sstevel@tonic-gate } 12620Sstevel@tonic-gate } 12630Sstevel@tonic-gate 12640Sstevel@tonic-gate Free(tsname); 12650Sstevel@tonic-gate tsname = NULL; 12660Sstevel@tonic-gate rval = 0; 12670Sstevel@tonic-gate 12680Sstevel@tonic-gate /* cleanup, return error */ 12690Sstevel@tonic-gate out: 12700Sstevel@tonic-gate if (tsfp != NULL) 12710Sstevel@tonic-gate (void) fclose(tsfp); 12720Sstevel@tonic-gate if (tsname != NULL) { 12730Sstevel@tonic-gate (void) unlink(tsname); 12740Sstevel@tonic-gate Free(tsname); 12750Sstevel@tonic-gate } 12760Sstevel@tonic-gate return (rval); 12770Sstevel@tonic-gate } 12780Sstevel@tonic-gate 12790Sstevel@tonic-gate /* 12800Sstevel@tonic-gate * Add replicas to set. This happens as a result of: 12810Sstevel@tonic-gate * - metadb [-s set_name] -a 12820Sstevel@tonic-gate * - metaset -s set_name -a disk 12830Sstevel@tonic-gate * - metaset -s set_name -d disk (causes a rebalance of mddbs) 12840Sstevel@tonic-gate * - metaset -s set_name -b 12850Sstevel@tonic-gate * 12860Sstevel@tonic-gate * For a local set, this routine is run on the local set host. 12870Sstevel@tonic-gate * 12880Sstevel@tonic-gate * For a traditional diskset, this routine is run on the node that 12890Sstevel@tonic-gate * is running the metaset command. 12900Sstevel@tonic-gate * 12910Sstevel@tonic-gate * For a multinode diskset, this routine is run by the node that is 12920Sstevel@tonic-gate * running the metaset command. If this is the first mddb added to 12930Sstevel@tonic-gate * the MN diskset, then no communication is made to other nodes via commd 12940Sstevel@tonic-gate * since the other nodes will be in-sync with respect to the mddbs when 12950Sstevel@tonic-gate * those other nodes join the set and snarf in the newly created mddb. 12960Sstevel@tonic-gate * If this is not the first mddb added to the MN diskset, then this 12970Sstevel@tonic-gate * attach command is sent to all of the nodes using commd. This keeps 12980Sstevel@tonic-gate * the nodes in-sync. 12990Sstevel@tonic-gate */ 13000Sstevel@tonic-gate int 13010Sstevel@tonic-gate meta_db_attach( 13020Sstevel@tonic-gate mdsetname_t *sp, 13030Sstevel@tonic-gate mdnamelist_t *db_nlp, 13040Sstevel@tonic-gate mdchkopts_t options, 13050Sstevel@tonic-gate md_timeval32_t *timeval, 13060Sstevel@tonic-gate int dbcnt, 13070Sstevel@tonic-gate int dbsize, 13080Sstevel@tonic-gate char *sysfilename, 13090Sstevel@tonic-gate md_error_t *ep 13100Sstevel@tonic-gate ) 13110Sstevel@tonic-gate { 13120Sstevel@tonic-gate struct mddb_config c; 13130Sstevel@tonic-gate mdnamelist_t *nlp; 13140Sstevel@tonic-gate mdname_t *np; 13150Sstevel@tonic-gate md_drive_desc *dd = NULL; 13160Sstevel@tonic-gate md_drive_desc *p; 13170Sstevel@tonic-gate int i; 13180Sstevel@tonic-gate int fd; 13190Sstevel@tonic-gate side_t sideno; 13200Sstevel@tonic-gate daddr_t blkno; 13210Sstevel@tonic-gate int replicacount = 0; 13222614Spetede int start_svmdaemons = 0; 13230Sstevel@tonic-gate int rval = 0; 13240Sstevel@tonic-gate md_error_t status = mdnullerror; 13250Sstevel@tonic-gate md_set_desc *sd; 13260Sstevel@tonic-gate int stale_bool = FALSE; 13270Sstevel@tonic-gate int flags; 13280Sstevel@tonic-gate int firstmddb = 1; 13290Sstevel@tonic-gate md_timeval32_t inittime = {0, 0}; 13300Sstevel@tonic-gate 13310Sstevel@tonic-gate /* 13320Sstevel@tonic-gate * Error if we don't get some work to do. 13330Sstevel@tonic-gate */ 13340Sstevel@tonic-gate if (db_nlp == NULL) 13350Sstevel@tonic-gate return (mdsyserror(ep, EINVAL, NULL)); 13360Sstevel@tonic-gate 13370Sstevel@tonic-gate if (mdnamesareunique(db_nlp, ep) != 0) 13380Sstevel@tonic-gate return (-1); 13390Sstevel@tonic-gate (void) memset(&c, 0, sizeof (c)); 13400Sstevel@tonic-gate c.c_id = 0; 13410Sstevel@tonic-gate c.c_setno = sp->setno; 13420Sstevel@tonic-gate 13430Sstevel@tonic-gate /* Don't need device id information from this ioctl */ 13440Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0; 13450Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 13460Sstevel@tonic-gate if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) { 13470Sstevel@tonic-gate if (metaislocalset(sp)) { 13480Sstevel@tonic-gate if (mdismddberror(&c.c_mde, MDE_DB_INVALID)) 13490Sstevel@tonic-gate mdclrerror(&c.c_mde); 13500Sstevel@tonic-gate else if (! mdismddberror(&c.c_mde, MDE_DB_NODB) || 13510Sstevel@tonic-gate (! (options & MDCHK_ALLOW_NODBS))) 13520Sstevel@tonic-gate return (mdstealerror(ep, &c.c_mde)); 13530Sstevel@tonic-gate } else { 13540Sstevel@tonic-gate if (! mdismddberror(&c.c_mde, MDE_DB_NOTOWNER)) 13550Sstevel@tonic-gate return (mdstealerror(ep, &c.c_mde)); 13560Sstevel@tonic-gate } 13570Sstevel@tonic-gate mdclrerror(&c.c_mde); 13580Sstevel@tonic-gate } 13590Sstevel@tonic-gate /* 13600Sstevel@tonic-gate * Is current set STALE? 13610Sstevel@tonic-gate */ 13620Sstevel@tonic-gate if (c.c_flags & MDDB_C_STALE) { 13630Sstevel@tonic-gate stale_bool = TRUE; 13640Sstevel@tonic-gate } 13650Sstevel@tonic-gate 13660Sstevel@tonic-gate assert(db_nlp != NULL); 13670Sstevel@tonic-gate 13682614Spetede /* if these are the first replicas then the SVM daemons need to run */ 13690Sstevel@tonic-gate if (c.c_dbcnt == 0) 13702614Spetede start_svmdaemons = 1; 13710Sstevel@tonic-gate 13720Sstevel@tonic-gate /* 13730Sstevel@tonic-gate * check to see if we will go over the total possible number 13740Sstevel@tonic-gate * of data bases 13750Sstevel@tonic-gate */ 13760Sstevel@tonic-gate nlp = db_nlp; 13770Sstevel@tonic-gate while (nlp) { 13780Sstevel@tonic-gate replicacount += dbcnt; 13790Sstevel@tonic-gate nlp = nlp->next; 13800Sstevel@tonic-gate } 13810Sstevel@tonic-gate 13820Sstevel@tonic-gate if ((replicacount + c.c_dbcnt) > c.c_dbmax) 13830Sstevel@tonic-gate return (mdmddberror(ep, MDE_TOOMANY_REPLICAS, NODEV32, 13840Sstevel@tonic-gate sp->setno, c.c_dbcnt + replicacount, NULL)); 13850Sstevel@tonic-gate 13860Sstevel@tonic-gate /* 13870Sstevel@tonic-gate * go through and check to make sure all locations specified 13880Sstevel@tonic-gate * are legal also pick out driver name; 13890Sstevel@tonic-gate */ 13900Sstevel@tonic-gate for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) { 13910Sstevel@tonic-gate diskaddr_t devsize; 13920Sstevel@tonic-gate 13930Sstevel@tonic-gate np = nlp->namep; 13940Sstevel@tonic-gate 13950Sstevel@tonic-gate if (! metaislocalset(sp)) { 13960Sstevel@tonic-gate uint_t partno; 13970Sstevel@tonic-gate uint_t rep_partno; 13980Sstevel@tonic-gate mddrivename_t *dnp = np->drivenamep; 13990Sstevel@tonic-gate 14000Sstevel@tonic-gate /* 14010Sstevel@tonic-gate * make sure that non-local database replicas 14020Sstevel@tonic-gate * are always on the replica slice. 14030Sstevel@tonic-gate */ 14040Sstevel@tonic-gate if (meta_replicaslice(dnp, 14050Sstevel@tonic-gate &rep_partno, ep) != 0) 14060Sstevel@tonic-gate return (-1); 14070Sstevel@tonic-gate if (metagetvtoc(np, FALSE, &partno, ep) == NULL) 14080Sstevel@tonic-gate return (-1); 14090Sstevel@tonic-gate if (partno != rep_partno) 14100Sstevel@tonic-gate return (mddeverror(ep, MDE_REPCOMP_ONLY, 14110Sstevel@tonic-gate np->dev, sp->setname)); 14120Sstevel@tonic-gate } 14130Sstevel@tonic-gate 14140Sstevel@tonic-gate if (meta_check_replica(sp, np, options, 0, (dbcnt * dbsize), 14150Sstevel@tonic-gate ep)) { 14160Sstevel@tonic-gate return (-1); 14170Sstevel@tonic-gate } 14180Sstevel@tonic-gate 14190Sstevel@tonic-gate if ((devsize = metagetsize(np, ep)) == -1) 14200Sstevel@tonic-gate return (-1); 14210Sstevel@tonic-gate 14220Sstevel@tonic-gate if (devsize < (diskaddr_t)((dbcnt * dbsize) + 16)) 14230Sstevel@tonic-gate return (mdmddberror(ep, MDE_REPLICA_TOOSMALL, 14240Sstevel@tonic-gate meta_getminor(np->dev), sp->setno, devsize, 14250Sstevel@tonic-gate np->cname)); 14260Sstevel@tonic-gate } 14270Sstevel@tonic-gate 14280Sstevel@tonic-gate /* 14290Sstevel@tonic-gate * If first disk in set we don't have lb_inittime yet for use as 14300Sstevel@tonic-gate * mb_setcreatetime so don't go looking for it. WE'll come back 14310Sstevel@tonic-gate * later and update after the locator block has been created. 14320Sstevel@tonic-gate * If this isn't the first disk in the set, we have a locator 14330Sstevel@tonic-gate * block and thus we have lb_inittime. Set mb_setcreatetime to 14340Sstevel@tonic-gate * lb_inittime. 14350Sstevel@tonic-gate */ 14360Sstevel@tonic-gate if (! metaislocalset(sp)) { 14370Sstevel@tonic-gate if (c.c_dbcnt != 0) { 14380Sstevel@tonic-gate firstmddb = 0; 14390Sstevel@tonic-gate inittime = meta_get_lb_inittime(sp, ep); 14400Sstevel@tonic-gate } 14410Sstevel@tonic-gate } 14420Sstevel@tonic-gate 14430Sstevel@tonic-gate /* 14440Sstevel@tonic-gate * go through and write all master blocks 14450Sstevel@tonic-gate */ 14460Sstevel@tonic-gate 14470Sstevel@tonic-gate for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) { 14480Sstevel@tonic-gate np = nlp->namep; 14490Sstevel@tonic-gate 14500Sstevel@tonic-gate if ((fd = open(np->rname, O_RDWR)) < 0) 14510Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname)); 14520Sstevel@tonic-gate 14530Sstevel@tonic-gate for (i = 0; i < dbcnt; i++) { 14540Sstevel@tonic-gate if (mkmasterblks(sp, np, fd, (i * dbsize + 16), dbsize, 14550Sstevel@tonic-gate inittime, ep)) { 14560Sstevel@tonic-gate (void) close(fd); 14570Sstevel@tonic-gate return (-1); 14580Sstevel@tonic-gate } 14590Sstevel@tonic-gate } 14600Sstevel@tonic-gate (void) close(fd); 14610Sstevel@tonic-gate } 14620Sstevel@tonic-gate 14630Sstevel@tonic-gate if ((sideno = getmyside(sp, ep)) == MD_SIDEWILD) 14640Sstevel@tonic-gate return (-1); 14650Sstevel@tonic-gate 14660Sstevel@tonic-gate if (! metaislocalset(sp)) { 14670Sstevel@tonic-gate dd = metaget_drivedesc_fromnamelist(sp, db_nlp, ep); 14680Sstevel@tonic-gate if (! mdisok(ep)) 14690Sstevel@tonic-gate return (-1); 14700Sstevel@tonic-gate if ((sd = metaget_setdesc(sp, ep)) == NULL) 14710Sstevel@tonic-gate return (-1); 14720Sstevel@tonic-gate 14730Sstevel@tonic-gate } 14740Sstevel@tonic-gate 14750Sstevel@tonic-gate /* 14760Sstevel@tonic-gate * go through and tell kernel to add them 14770Sstevel@tonic-gate */ 14780Sstevel@tonic-gate for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) { 14790Sstevel@tonic-gate mdcinfo_t *cinfo; 14800Sstevel@tonic-gate 14810Sstevel@tonic-gate np = nlp->namep; 14820Sstevel@tonic-gate 14830Sstevel@tonic-gate if ((cinfo = metagetcinfo(np, ep)) == NULL) { 14840Sstevel@tonic-gate rval = -1; 14850Sstevel@tonic-gate goto out; 14860Sstevel@tonic-gate } 14870Sstevel@tonic-gate 14880Sstevel@tonic-gate /* 14890Sstevel@tonic-gate * If mddb is being added to MN diskset and there already 14900Sstevel@tonic-gate * exists a valid mddb in the set (which equates to this 14910Sstevel@tonic-gate * node being an owner of the set) then use rpc.mdcommd 14920Sstevel@tonic-gate * mechanism to add mddb(s) so that all nodes stay in sync. 14930Sstevel@tonic-gate * If set is stale, don't log the message since rpc.mdcommd 14940Sstevel@tonic-gate * can't write the message to the mddb. 14950Sstevel@tonic-gate * 14960Sstevel@tonic-gate * Otherwise, just add mddb to this node. 14970Sstevel@tonic-gate */ 14980Sstevel@tonic-gate if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) && 14990Sstevel@tonic-gate (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) { 15000Sstevel@tonic-gate md_mn_result_t *resultp = NULL; 15010Sstevel@tonic-gate md_mn_msg_meta_db_attach_t attach; 15020Sstevel@tonic-gate int send_rval; 15030Sstevel@tonic-gate 15040Sstevel@tonic-gate /* 15050Sstevel@tonic-gate * In a scenario where new replicas had been added on 15060Sstevel@tonic-gate * the master, and then all of the old replicas failed 15070Sstevel@tonic-gate * before the slaves had knowledge of the new replicas, 15080Sstevel@tonic-gate * the slaves are unable to re-parse in the mddb 15090Sstevel@tonic-gate * from the new replicas since the slaves have no 15100Sstevel@tonic-gate * knowledge of the new replicas. The following 15110Sstevel@tonic-gate * algorithm solves this problem: 15120Sstevel@tonic-gate * - META_DB_ATTACH message generates submsgs 15130Sstevel@tonic-gate * - BLOCK parse (master) 15140Sstevel@tonic-gate * - MDDB_ATTACH new replicas 15150Sstevel@tonic-gate * - UNBLOCK parse (master) causing parse 15160Sstevel@tonic-gate * information to be sent from master 15170Sstevel@tonic-gate * to slaves at a higher class than the 15180Sstevel@tonic-gate * unblock so the parse message will 15190Sstevel@tonic-gate * reach slaves before unblock message. 15200Sstevel@tonic-gate */ 15210Sstevel@tonic-gate attach.msg_l_dev = np->dev; 15220Sstevel@tonic-gate attach.msg_cnt = dbcnt; 15230Sstevel@tonic-gate attach.msg_dbsize = dbsize; 15240Sstevel@tonic-gate (void) strncpy(attach.msg_dname, cinfo->dname, 15250Sstevel@tonic-gate sizeof (attach.msg_dname)); 15260Sstevel@tonic-gate (void) splitname(np->bname, &attach.msg_splitname); 15270Sstevel@tonic-gate attach.msg_options = options; 15280Sstevel@tonic-gate 15290Sstevel@tonic-gate /* Set devid to NULL until devids are supported */ 15300Sstevel@tonic-gate attach.msg_devid[0] = NULL; 15310Sstevel@tonic-gate 15320Sstevel@tonic-gate /* 15330Sstevel@tonic-gate * If reconfig cycle has been started, this node is 15340Sstevel@tonic-gate * stuck in in the return step until this command has 15350Sstevel@tonic-gate * completed. If mdcommd is suspended, ask 15360Sstevel@tonic-gate * send_message to fail (instead of retrying) 15370Sstevel@tonic-gate * so that metaset can finish allowing the reconfig 15380Sstevel@tonic-gate * cycle to proceed. 15390Sstevel@tonic-gate */ 15400Sstevel@tonic-gate flags = MD_MSGF_FAIL_ON_SUSPEND; 15410Sstevel@tonic-gate if (stale_bool == TRUE) 15420Sstevel@tonic-gate flags |= MD_MSGF_NO_LOG; 15430Sstevel@tonic-gate send_rval = mdmn_send_message(sp->setno, 1544*5109Spetede MD_MN_MSG_META_DB_ATTACH, 1545*5109Spetede flags, (char *)&attach, 1546*5109Spetede sizeof (md_mn_msg_meta_db_attach_t), 1547*5109Spetede &resultp, ep); 15480Sstevel@tonic-gate if (send_rval != 0) { 15490Sstevel@tonic-gate rval = -1; 15500Sstevel@tonic-gate if (resultp == NULL) 15510Sstevel@tonic-gate (void) mddserror(ep, 15520Sstevel@tonic-gate MDE_DS_COMMD_SEND_FAIL, 15530Sstevel@tonic-gate sp->setno, NULL, NULL, 15540Sstevel@tonic-gate sp->setname); 15550Sstevel@tonic-gate else { 15560Sstevel@tonic-gate (void) mdstealerror(ep, 15570Sstevel@tonic-gate &(resultp->mmr_ep)); 15580Sstevel@tonic-gate if (mdisok(ep)) { 15590Sstevel@tonic-gate (void) mddserror(ep, 15600Sstevel@tonic-gate MDE_DS_COMMD_SEND_FAIL, 15610Sstevel@tonic-gate sp->setno, NULL, NULL, 15620Sstevel@tonic-gate sp->setname); 15630Sstevel@tonic-gate } 15640Sstevel@tonic-gate free_result(resultp); 15650Sstevel@tonic-gate } 15660Sstevel@tonic-gate goto out; 15670Sstevel@tonic-gate } 15680Sstevel@tonic-gate if (resultp) 15690Sstevel@tonic-gate free_result(resultp); 15700Sstevel@tonic-gate } else { 1571*5109Spetede /* Adding mddb(s) to just this node */ 1572*5109Spetede for (i = 0; i < dbcnt; i++) { 1573*5109Spetede (void) memset(&c, 0, sizeof (c)); 1574*5109Spetede /* Fill in device/replica info */ 1575*5109Spetede c.c_locator.l_dev = meta_cmpldev(np->dev); 1576*5109Spetede c.c_locator.l_blkno = i * dbsize + 16; 1577*5109Spetede blkno = c.c_locator.l_blkno; 1578*5109Spetede (void) strncpy(c.c_locator.l_driver, 1579*5109Spetede cinfo->dname, 1580*5109Spetede sizeof (c.c_locator.l_driver)); 15810Sstevel@tonic-gate 1582*5109Spetede if (splitname(np->bname, &c.c_devname) == 1583*5109Spetede METASPLIT_LONGDISKNAME && devid_in_use == 1584*5109Spetede FALSE) { 1585*5109Spetede rval = mddeverror(ep, 1586*5109Spetede MDE_DISKNAMETOOLONG, 1587*5109Spetede NODEV64, np->rname); 1588*5109Spetede goto out; 15890Sstevel@tonic-gate } 1590*5109Spetede 1591*5109Spetede c.c_locator.l_mnum = meta_getminor(np->dev); 1592*5109Spetede 1593*5109Spetede /* Fill in setno, setname, and sideno */ 1594*5109Spetede c.c_setno = sp->setno; 1595*5109Spetede if (! metaislocalset(sp)) { 1596*5109Spetede if (MD_MNSET_DESC(sd)) { 1597*5109Spetede c.c_multi_node = 1; 1598*5109Spetede } 1599*5109Spetede } 1600*5109Spetede (void) strcpy(c.c_setname, sp->setname); 1601*5109Spetede c.c_sideno = sideno; 16020Sstevel@tonic-gate 1603*5109Spetede /* 1604*5109Spetede * Don't need device id information from this 1605*5109Spetede * ioctl Kernel determines device id from 1606*5109Spetede * dev_t, which is just what this code would do. 1607*5109Spetede */ 1608*5109Spetede c.c_locator.l_devid = (uint64_t)0; 1609*5109Spetede c.c_locator.l_devid_flags = 0; 16100Sstevel@tonic-gate 1611*5109Spetede if (timeval != NULL) 1612*5109Spetede c.c_timestamp = *timeval; 16130Sstevel@tonic-gate 1614*5109Spetede if (setup_med_cfg(sp, &c, 1615*5109Spetede (options & MDCHK_SET_FORCE), ep)) { 1616*5109Spetede rval = -1; 1617*5109Spetede goto out; 1618*5109Spetede } 16190Sstevel@tonic-gate 1620*5109Spetede if (metaioctl(MD_DB_NEWDEV, &c, &c.c_mde, 1621*5109Spetede NULL) != 0) { 1622*5109Spetede rval = mdstealerror(ep, &c.c_mde); 1623*5109Spetede goto out; 1624*5109Spetede } 1625*5109Spetede /* 1626*5109Spetede * This is either a traditional diskset OR this 1627*5109Spetede * is the first replica added to a MN diskset. 1628*5109Spetede * In either case, set broadcast to NO_BCAST so 1629*5109Spetede * that message won't go through rpc.mdcommd. 1630*5109Spetede * If this is a traditional diskset, the bcast 1631*5109Spetede * flag is ignored since traditional disksets 1632*5109Spetede * don't use the rpc.mdcommd. 1633*5109Spetede */ 1634*5109Spetede if (meta_db_addsidenms(sp, np, blkno, 1635*5109Spetede DB_ADDSIDENMS_NO_BCAST, ep)) 1636*5109Spetede goto out; 16370Sstevel@tonic-gate } 16380Sstevel@tonic-gate } 16390Sstevel@tonic-gate if (! metaislocalset(sp)) { 16400Sstevel@tonic-gate /* update the dbcnt and size in dd */ 16410Sstevel@tonic-gate for (p = dd; p != NULL; p = p->dd_next) 16420Sstevel@tonic-gate if (p->dd_dnp == np->drivenamep) { 16430Sstevel@tonic-gate p->dd_dbcnt = dbcnt; 16440Sstevel@tonic-gate p->dd_dbsize = dbsize; 16450Sstevel@tonic-gate break; 16460Sstevel@tonic-gate } 16470Sstevel@tonic-gate } 16480Sstevel@tonic-gate 16490Sstevel@tonic-gate /* 16500Sstevel@tonic-gate * If this was the first addition of disks to the 16510Sstevel@tonic-gate * diskset you now need to update the mb_setcreatetime 16520Sstevel@tonic-gate * which needed lb_inittime which wasn't there until now. 16530Sstevel@tonic-gate */ 16540Sstevel@tonic-gate if (firstmddb) { 16550Sstevel@tonic-gate if (meta_update_mb(sp, dd, ep) != 0) { 16560Sstevel@tonic-gate return (-1); 16570Sstevel@tonic-gate } 16580Sstevel@tonic-gate } 16590Sstevel@tonic-gate (void) close(fd); 16600Sstevel@tonic-gate } 16610Sstevel@tonic-gate 16620Sstevel@tonic-gate out: 16630Sstevel@tonic-gate if (metaislocalset(sp)) { 16640Sstevel@tonic-gate 16650Sstevel@tonic-gate /* everything looks fine. Start mdmonitord */ 16662614Spetede if (rval == 0 && start_svmdaemons == 1) { 16670Sstevel@tonic-gate if (meta_smf_enable(META_SMF_CORE, &status) == -1) { 16680Sstevel@tonic-gate mde_perror(&status, ""); 16690Sstevel@tonic-gate mdclrerror(&status); 16700Sstevel@tonic-gate } 16710Sstevel@tonic-gate } 16720Sstevel@tonic-gate 16730Sstevel@tonic-gate if (buildconf(sp, &status)) { 16740Sstevel@tonic-gate /* Don't mask any previous errors */ 16750Sstevel@tonic-gate if (rval == 0) 16760Sstevel@tonic-gate rval = mdstealerror(ep, &status); 16770Sstevel@tonic-gate return (rval); 16780Sstevel@tonic-gate } 16790Sstevel@tonic-gate 16800Sstevel@tonic-gate if (meta_db_patch(sysfilename, NULL, 0, &status)) { 16810Sstevel@tonic-gate /* Don't mask any previous errors */ 16820Sstevel@tonic-gate if (rval == 0) 16830Sstevel@tonic-gate rval = mdstealerror(ep, &status); 16840Sstevel@tonic-gate } 16850Sstevel@tonic-gate } else { 16860Sstevel@tonic-gate if (update_dbinfo_on_drives(sp, dd, 16870Sstevel@tonic-gate (options & MDCHK_SET_LOCKED), 16880Sstevel@tonic-gate (options & MDCHK_SET_FORCE), 16890Sstevel@tonic-gate &status)) { 16900Sstevel@tonic-gate /* Don't mask any previous errors */ 16910Sstevel@tonic-gate if (rval == 0) 16920Sstevel@tonic-gate rval = mdstealerror(ep, &status); 16930Sstevel@tonic-gate else 16940Sstevel@tonic-gate mdclrerror(&status); 16950Sstevel@tonic-gate } 16960Sstevel@tonic-gate metafreedrivedesc(&dd); 16970Sstevel@tonic-gate } 16980Sstevel@tonic-gate /* 16990Sstevel@tonic-gate * For MN disksets that already had already had nodes joined 17000Sstevel@tonic-gate * before the attach of this mddb(s), the name invalidation is 17010Sstevel@tonic-gate * done by the commd handler routine. Otherwise, if this 17020Sstevel@tonic-gate * is the first attach of a MN diskset mddb, the invalidation 17030Sstevel@tonic-gate * must be done here since the first attach cannot be sent 17040Sstevel@tonic-gate * via the commd since there are no nodes joined to the set yet. 17050Sstevel@tonic-gate */ 17060Sstevel@tonic-gate if ((metaislocalset(sp)) || (!MD_MNSET_DESC(sd)) || 17070Sstevel@tonic-gate (MD_MNSET_DESC(sd) && 17080Sstevel@tonic-gate (!(sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)))) { 17090Sstevel@tonic-gate for (nlp = db_nlp; (nlp != NULL); nlp = nlp->next) { 17100Sstevel@tonic-gate meta_invalidate_name(nlp->namep); 17110Sstevel@tonic-gate } 17120Sstevel@tonic-gate } 17130Sstevel@tonic-gate return (rval); 17140Sstevel@tonic-gate } 17150Sstevel@tonic-gate 17160Sstevel@tonic-gate /* 17170Sstevel@tonic-gate * deletelist_length 17180Sstevel@tonic-gate * 17190Sstevel@tonic-gate * return the number of slices that have been specified for deletion 17200Sstevel@tonic-gate * on the metadb command line. This does not calculate the number 17210Sstevel@tonic-gate * of replicas because there may be multiple replicas per slice. 17220Sstevel@tonic-gate */ 17230Sstevel@tonic-gate static int 17240Sstevel@tonic-gate deletelist_length(mdnamelist_t *db_nlp) 17250Sstevel@tonic-gate { 17260Sstevel@tonic-gate 17270Sstevel@tonic-gate mdnamelist_t *nlp; 17280Sstevel@tonic-gate int list_length = 0; 17290Sstevel@tonic-gate 17300Sstevel@tonic-gate for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) { 17310Sstevel@tonic-gate list_length++; 17320Sstevel@tonic-gate } 17330Sstevel@tonic-gate 17340Sstevel@tonic-gate return (list_length); 17350Sstevel@tonic-gate } 17360Sstevel@tonic-gate 17370Sstevel@tonic-gate static int 17380Sstevel@tonic-gate in_deletelist(char *devname, mdnamelist_t *db_nlp) 17390Sstevel@tonic-gate { 17400Sstevel@tonic-gate 17410Sstevel@tonic-gate mdnamelist_t *nlp; 17420Sstevel@tonic-gate mdname_t *np; 17430Sstevel@tonic-gate int index = 0; 17440Sstevel@tonic-gate 17450Sstevel@tonic-gate for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) { 17460Sstevel@tonic-gate np = nlp->namep; 17470Sstevel@tonic-gate 17480Sstevel@tonic-gate if (strcmp(devname, np->bname) == 0) 17490Sstevel@tonic-gate return (index); 17500Sstevel@tonic-gate index++; 17510Sstevel@tonic-gate } 17520Sstevel@tonic-gate 17530Sstevel@tonic-gate return (-1); 17540Sstevel@tonic-gate } 17550Sstevel@tonic-gate 17560Sstevel@tonic-gate /* 17570Sstevel@tonic-gate * Delete replicas from set. This happens as a result of: 17580Sstevel@tonic-gate * - metadb [-s set_name] -d 17590Sstevel@tonic-gate * - metaset -s set_name -a disk (causes a rebalance of mddbs) 17600Sstevel@tonic-gate * - metaset -s set_name -d disk 17610Sstevel@tonic-gate * - metaset -s set_name -b 17620Sstevel@tonic-gate * 17630Sstevel@tonic-gate * For a local set, this routine is run on the local set host. 17640Sstevel@tonic-gate * 17650Sstevel@tonic-gate * For a traditional diskset, this routine is run on the node that 17660Sstevel@tonic-gate * is running the metaset command. 17670Sstevel@tonic-gate * 17680Sstevel@tonic-gate * For a multinode diskset, this routine is run by the node that is 17690Sstevel@tonic-gate * running the metaset command. This detach routine is sent to all 17700Sstevel@tonic-gate * of the joined nodes in the diskset using commd. This keeps 17710Sstevel@tonic-gate * the nodes in-sync. 17720Sstevel@tonic-gate */ 17730Sstevel@tonic-gate int 17740Sstevel@tonic-gate meta_db_detach( 17750Sstevel@tonic-gate mdsetname_t *sp, 17760Sstevel@tonic-gate mdnamelist_t *db_nlp, 17770Sstevel@tonic-gate mdforceopts_t force_option, 17780Sstevel@tonic-gate char *sysfilename, 17790Sstevel@tonic-gate md_error_t *ep 17800Sstevel@tonic-gate ) 17810Sstevel@tonic-gate { 17820Sstevel@tonic-gate struct mddb_config c; 17830Sstevel@tonic-gate mdnamelist_t *nlp; 17840Sstevel@tonic-gate mdname_t *np; 17850Sstevel@tonic-gate md_drive_desc *dd = NULL; 17860Sstevel@tonic-gate md_drive_desc *p; 17870Sstevel@tonic-gate int replicacount; 17880Sstevel@tonic-gate int replica_delete_count; 17890Sstevel@tonic-gate int nr_replica_slices; 17900Sstevel@tonic-gate int i; 17910Sstevel@tonic-gate int stop_svmdaemons = 0; 17920Sstevel@tonic-gate int rval = 0; 17930Sstevel@tonic-gate int index; 17940Sstevel@tonic-gate int valid_replicas_nottodelete = 0; 17950Sstevel@tonic-gate int invalid_replicas_nottodelete = 0; 17960Sstevel@tonic-gate int invalid_replicas_todelete = 0; 17970Sstevel@tonic-gate int errored = 0; 17980Sstevel@tonic-gate int *tag_array; 17990Sstevel@tonic-gate int fd = -1; 18000Sstevel@tonic-gate md_error_t status = mdnullerror; 18010Sstevel@tonic-gate md_set_desc *sd; 18020Sstevel@tonic-gate int stale_bool = FALSE; 18030Sstevel@tonic-gate int flags; 18040Sstevel@tonic-gate 18050Sstevel@tonic-gate /* 18060Sstevel@tonic-gate * Error if we don't get some work to do. 18070Sstevel@tonic-gate */ 18080Sstevel@tonic-gate if (db_nlp == NULL) 18090Sstevel@tonic-gate return (mdsyserror(ep, EINVAL, NULL)); 18100Sstevel@tonic-gate 18110Sstevel@tonic-gate if (mdnamesareunique(db_nlp, ep) != 0) 18120Sstevel@tonic-gate return (-1); 18130Sstevel@tonic-gate 18140Sstevel@tonic-gate (void) memset(&c, 0, sizeof (c)); 18150Sstevel@tonic-gate c.c_id = 0; 18160Sstevel@tonic-gate c.c_setno = sp->setno; 18170Sstevel@tonic-gate 18180Sstevel@tonic-gate /* Don't need device id information from this ioctl */ 18190Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0; 18200Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 18210Sstevel@tonic-gate 18220Sstevel@tonic-gate if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) 18230Sstevel@tonic-gate return (mdstealerror(ep, &c.c_mde)); 18240Sstevel@tonic-gate 18250Sstevel@tonic-gate /* 18260Sstevel@tonic-gate * Is current set STALE? 18270Sstevel@tonic-gate */ 18280Sstevel@tonic-gate if (c.c_flags & MDDB_C_STALE) { 18290Sstevel@tonic-gate stale_bool = TRUE; 18300Sstevel@tonic-gate } 18310Sstevel@tonic-gate 18320Sstevel@tonic-gate replicacount = c.c_dbcnt; 18330Sstevel@tonic-gate 18340Sstevel@tonic-gate assert(db_nlp != NULL); 18350Sstevel@tonic-gate 18360Sstevel@tonic-gate /* 18370Sstevel@tonic-gate * go through and gather how many data bases are on each 18380Sstevel@tonic-gate * device specified. 18390Sstevel@tonic-gate */ 18400Sstevel@tonic-gate 18410Sstevel@tonic-gate nr_replica_slices = deletelist_length(db_nlp); 18420Sstevel@tonic-gate tag_array = (int *)calloc(nr_replica_slices, sizeof (int)); 18430Sstevel@tonic-gate 18440Sstevel@tonic-gate replica_delete_count = 0; 18450Sstevel@tonic-gate for (i = 0; i < replicacount; i++) { 18460Sstevel@tonic-gate char *devname; 18470Sstevel@tonic-gate int found = 0; 18480Sstevel@tonic-gate 18490Sstevel@tonic-gate c.c_id = i; 18500Sstevel@tonic-gate 18510Sstevel@tonic-gate /* Don't need device id information from this ioctl */ 18520Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0; 18530Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 18540Sstevel@tonic-gate 18550Sstevel@tonic-gate if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) 18560Sstevel@tonic-gate return (mdstealerror(ep, &c.c_mde)); 18570Sstevel@tonic-gate 18580Sstevel@tonic-gate devname = splicename(&c.c_devname); 18590Sstevel@tonic-gate 1860*5109Spetede if (strstr(devname, META_LONGDISKNAME_STR) != NULL) { 1861*5109Spetede Free(devname); 1862*5109Spetede devname = getlongname(&c, ep); 1863*5109Spetede if (devname == NULL) { 1864*5109Spetede return (-1); 1865*5109Spetede } 1866*5109Spetede } 1867*5109Spetede 18680Sstevel@tonic-gate if ((index = in_deletelist(devname, db_nlp)) != -1) { 18690Sstevel@tonic-gate found = 1; 18700Sstevel@tonic-gate tag_array[index] = 1; 18710Sstevel@tonic-gate replica_delete_count++; 18720Sstevel@tonic-gate } 18730Sstevel@tonic-gate 18740Sstevel@tonic-gate errored = c.c_locator.l_flags & (MDDB_F_EREAD | 1875*5109Spetede MDDB_F_EWRITE | MDDB_F_TOOSMALL | MDDB_F_EFMT | 1876*5109Spetede MDDB_F_EDATA | MDDB_F_EMASTER); 18770Sstevel@tonic-gate 18780Sstevel@tonic-gate /* 18790Sstevel@tonic-gate * There are four combinations of "errored" and "found" 18800Sstevel@tonic-gate * and they are used to find the number of 18810Sstevel@tonic-gate * (a) valid/invalid replicas that are not in the delete 18820Sstevel@tonic-gate * list and are available in the system. 18830Sstevel@tonic-gate * (b) valid/invalid replicas that are to be deleted. 18840Sstevel@tonic-gate */ 18850Sstevel@tonic-gate 18860Sstevel@tonic-gate if (errored && !found) /* errored and !found */ 18870Sstevel@tonic-gate invalid_replicas_nottodelete++; 18880Sstevel@tonic-gate else if (!found) /* !errored and !found */ 18890Sstevel@tonic-gate valid_replicas_nottodelete++; 18900Sstevel@tonic-gate else if (errored) /* errored and found */ 18910Sstevel@tonic-gate invalid_replicas_todelete++; 18920Sstevel@tonic-gate /* 18930Sstevel@tonic-gate * else it is !errored and found. This means 18940Sstevel@tonic-gate * valid_replicas_todelete++; But this variable will not 18950Sstevel@tonic-gate * be used anywhere 18960Sstevel@tonic-gate */ 18970Sstevel@tonic-gate 18980Sstevel@tonic-gate Free(devname); 18990Sstevel@tonic-gate } 19000Sstevel@tonic-gate 19010Sstevel@tonic-gate index = 0; 19020Sstevel@tonic-gate for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) { 19030Sstevel@tonic-gate np = nlp->namep; 19040Sstevel@tonic-gate if (tag_array[index++] != 1) { 19050Sstevel@tonic-gate Free(tag_array); 19060Sstevel@tonic-gate return (mddeverror(ep, MDE_NO_DB, np->dev, np->cname)); 19070Sstevel@tonic-gate } 19080Sstevel@tonic-gate } 19090Sstevel@tonic-gate 19100Sstevel@tonic-gate Free(tag_array); 19110Sstevel@tonic-gate 19120Sstevel@tonic-gate 19130Sstevel@tonic-gate /* if all replicas are deleted stop mdmonitord */ 19140Sstevel@tonic-gate if ((replicacount - replica_delete_count) == 0) 19150Sstevel@tonic-gate stop_svmdaemons = 1; 19160Sstevel@tonic-gate 19170Sstevel@tonic-gate if (((replicacount - replica_delete_count) < MD_MINREPLICAS)) { 19180Sstevel@tonic-gate if (force_option & MDFORCE_NONE) 19190Sstevel@tonic-gate return (mderror(ep, MDE_NOTENOUGH_DB, sp->setname)); 19200Sstevel@tonic-gate if (! metaislocalset(sp) && ! (force_option & MDFORCE_DS)) 19210Sstevel@tonic-gate return (mderror(ep, MDE_DELDB_NOTALLOWED, sp->setname)); 19220Sstevel@tonic-gate } 19230Sstevel@tonic-gate 19240Sstevel@tonic-gate /* 19250Sstevel@tonic-gate * The following algorithms are followed to check for deletion: 19260Sstevel@tonic-gate * (a) If the delete list(db_nlp) has all invalid replicas and no valid 19270Sstevel@tonic-gate * replicas, then deletion should be allowed. 19280Sstevel@tonic-gate * (b) Deletion should be allowed only if valid replicas that are "not" 19290Sstevel@tonic-gate * to be deleted is always greater than the invalid replicas that 19300Sstevel@tonic-gate * are "not" to be deleted. 19310Sstevel@tonic-gate * (c) If the user uses -f option, then deletion should be allowed. 19320Sstevel@tonic-gate */ 19330Sstevel@tonic-gate 19340Sstevel@tonic-gate if ((invalid_replicas_todelete != replica_delete_count) && 1935*5109Spetede (invalid_replicas_nottodelete > valid_replicas_nottodelete) && 1936*5109Spetede (force_option != MDFORCE_LOCAL)) 19370Sstevel@tonic-gate return (mderror(ep, MDE_DEL_VALIDDB_NOTALLOWED, sp->setname)); 19380Sstevel@tonic-gate 19390Sstevel@tonic-gate /* 19400Sstevel@tonic-gate * go through and tell kernel to delete them 19410Sstevel@tonic-gate */ 19420Sstevel@tonic-gate 19430Sstevel@tonic-gate /* Don't need device id information from this ioctl */ 19440Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0; 19450Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 19460Sstevel@tonic-gate 19470Sstevel@tonic-gate if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) 19480Sstevel@tonic-gate return (mdstealerror(ep, &c.c_mde)); 19490Sstevel@tonic-gate 19500Sstevel@tonic-gate if (! metaislocalset(sp)) { 19510Sstevel@tonic-gate dd = metaget_drivedesc_fromnamelist(sp, db_nlp, ep); 19520Sstevel@tonic-gate if (! mdisok(ep)) 19530Sstevel@tonic-gate return (-1); 19540Sstevel@tonic-gate if ((sd = metaget_setdesc(sp, ep)) == NULL) 19550Sstevel@tonic-gate return (-1); 19560Sstevel@tonic-gate } 19570Sstevel@tonic-gate 19580Sstevel@tonic-gate for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) { 19590Sstevel@tonic-gate np = nlp->namep; 19600Sstevel@tonic-gate 19610Sstevel@tonic-gate /* 19620Sstevel@tonic-gate * If mddb is being deleted from MN diskset and node is 19630Sstevel@tonic-gate * an owner of the diskset then use rpc.mdcommd 19640Sstevel@tonic-gate * mechanism to add mddb(s) so that all nodes stay in sync. 19650Sstevel@tonic-gate * If set is stale, don't log the message since rpc.mdcommd 19660Sstevel@tonic-gate * can't write the message to the mddb. 19670Sstevel@tonic-gate * 19680Sstevel@tonic-gate * When mddbs are first being added to set, a detach can 19690Sstevel@tonic-gate * be called before any node has joined the diskset, so 19700Sstevel@tonic-gate * must check to see if node is an owner of the diskset. 19710Sstevel@tonic-gate * 19720Sstevel@tonic-gate * Otherwise, just delete mddb from this node. 19730Sstevel@tonic-gate */ 19740Sstevel@tonic-gate 19750Sstevel@tonic-gate if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) && 19760Sstevel@tonic-gate (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) { 19770Sstevel@tonic-gate md_mn_result_t *resultp; 19780Sstevel@tonic-gate md_mn_msg_meta_db_detach_t detach; 19790Sstevel@tonic-gate int send_rval; 19800Sstevel@tonic-gate 19810Sstevel@tonic-gate /* 19820Sstevel@tonic-gate * The following algorithm is used to detach replicas. 19830Sstevel@tonic-gate * - META_DB_DETACH message generates submsgs 19840Sstevel@tonic-gate * - BLOCK parse (master) 19850Sstevel@tonic-gate * - MDDB_DETACH replicas 19860Sstevel@tonic-gate * - UNBLOCK parse (master) causing parse 19870Sstevel@tonic-gate * information to be sent from master 19880Sstevel@tonic-gate * to slaves at a higher class than the 19890Sstevel@tonic-gate * unblock so the parse message will 19900Sstevel@tonic-gate * reach slaves before unblock message. 19910Sstevel@tonic-gate */ 19920Sstevel@tonic-gate (void) splitname(np->bname, &detach.msg_splitname); 19930Sstevel@tonic-gate 19940Sstevel@tonic-gate /* Set devid to NULL until devids are supported */ 19950Sstevel@tonic-gate detach.msg_devid[0] = NULL; 19960Sstevel@tonic-gate 19970Sstevel@tonic-gate /* 19980Sstevel@tonic-gate * If reconfig cycle has been started, this node is 19990Sstevel@tonic-gate * stuck in in the return step until this command has 20000Sstevel@tonic-gate * completed. If mdcommd is suspended, ask 20010Sstevel@tonic-gate * send_message to fail (instead of retrying) 20020Sstevel@tonic-gate * so that metaset can finish allowing the reconfig 20030Sstevel@tonic-gate * cycle to proceed. 20040Sstevel@tonic-gate */ 20050Sstevel@tonic-gate flags = MD_MSGF_FAIL_ON_SUSPEND; 20060Sstevel@tonic-gate if (stale_bool == TRUE) 20070Sstevel@tonic-gate flags |= MD_MSGF_NO_LOG; 20080Sstevel@tonic-gate send_rval = mdmn_send_message(sp->setno, 2009*5109Spetede MD_MN_MSG_META_DB_DETACH, 2010*5109Spetede flags, (char *)&detach, 2011*5109Spetede sizeof (md_mn_msg_meta_db_detach_t), 2012*5109Spetede &resultp, ep); 20130Sstevel@tonic-gate if (send_rval != 0) { 20140Sstevel@tonic-gate rval = -1; 20150Sstevel@tonic-gate if (resultp == NULL) 20160Sstevel@tonic-gate (void) mddserror(ep, 20170Sstevel@tonic-gate MDE_DS_COMMD_SEND_FAIL, 20180Sstevel@tonic-gate sp->setno, NULL, NULL, 20190Sstevel@tonic-gate sp->setname); 20200Sstevel@tonic-gate else { 20210Sstevel@tonic-gate (void) mdstealerror(ep, 20220Sstevel@tonic-gate &(resultp->mmr_ep)); 20230Sstevel@tonic-gate if (mdisok(ep)) { 20240Sstevel@tonic-gate (void) mddserror(ep, 20250Sstevel@tonic-gate MDE_DS_COMMD_SEND_FAIL, 20260Sstevel@tonic-gate sp->setno, NULL, NULL, 20270Sstevel@tonic-gate sp->setname); 20280Sstevel@tonic-gate } 20290Sstevel@tonic-gate free_result(resultp); 20300Sstevel@tonic-gate } 20310Sstevel@tonic-gate goto out; 20320Sstevel@tonic-gate } 20330Sstevel@tonic-gate if (resultp) 20340Sstevel@tonic-gate free_result(resultp); 20350Sstevel@tonic-gate } else { 20360Sstevel@tonic-gate i = 0; 20370Sstevel@tonic-gate while (i < c.c_dbcnt) { 20380Sstevel@tonic-gate char *devname; 20390Sstevel@tonic-gate 20400Sstevel@tonic-gate c.c_id = i; 20410Sstevel@tonic-gate 20420Sstevel@tonic-gate /* Don't need devid info from this ioctl */ 20430Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0; 20440Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 20450Sstevel@tonic-gate 20460Sstevel@tonic-gate if (metaioctl(MD_DB_GETDEV, &c, 20470Sstevel@tonic-gate &c.c_mde, NULL)) { 20480Sstevel@tonic-gate rval = mdstealerror(ep, &c.c_mde); 20490Sstevel@tonic-gate goto out; 20500Sstevel@tonic-gate } 20510Sstevel@tonic-gate 20520Sstevel@tonic-gate devname = splicename(&c.c_devname); 2053*5109Spetede 2054*5109Spetede if (strstr(devname, META_LONGDISKNAME_STR) 2055*5109Spetede != NULL) { 2056*5109Spetede Free(devname); 2057*5109Spetede devname = getlongname(&c, ep); 2058*5109Spetede if (devname == NULL) { 2059*5109Spetede return (-1); 2060*5109Spetede } 2061*5109Spetede } 2062*5109Spetede 20630Sstevel@tonic-gate if (strcmp(devname, np->bname) != 0) { 20640Sstevel@tonic-gate Free(devname); 20650Sstevel@tonic-gate i++; 20660Sstevel@tonic-gate continue; 20670Sstevel@tonic-gate } 20680Sstevel@tonic-gate Free(devname); 20690Sstevel@tonic-gate 20700Sstevel@tonic-gate /* Don't need devid info from this ioctl */ 20710Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0; 20720Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 20730Sstevel@tonic-gate 20740Sstevel@tonic-gate if (metaioctl(MD_DB_DELDEV, &c, 20750Sstevel@tonic-gate &c.c_mde, NULL) != 0) { 20760Sstevel@tonic-gate rval = mdstealerror(ep, &c.c_mde); 20770Sstevel@tonic-gate goto out; 20780Sstevel@tonic-gate } 20790Sstevel@tonic-gate 20800Sstevel@tonic-gate /* Not incrementing "i" intentionally */ 20810Sstevel@tonic-gate } 20820Sstevel@tonic-gate } 20830Sstevel@tonic-gate if (! metaislocalset(sp)) { 20840Sstevel@tonic-gate /* update the dbcnt and size in dd */ 20850Sstevel@tonic-gate for (p = dd; p != NULL; p = p->dd_next) { 20860Sstevel@tonic-gate if (p->dd_dnp == np->drivenamep) { 20870Sstevel@tonic-gate p->dd_dbcnt = 0; 20880Sstevel@tonic-gate p->dd_dbsize = 0; 20890Sstevel@tonic-gate break; 20900Sstevel@tonic-gate } 20910Sstevel@tonic-gate } 20920Sstevel@tonic-gate 20930Sstevel@tonic-gate /* 20940Sstevel@tonic-gate * Slam a dummy master block and make it self 20950Sstevel@tonic-gate * identifying 20960Sstevel@tonic-gate */ 20970Sstevel@tonic-gate if ((fd = open(np->rname, O_RDWR)) >= 0) { 20980Sstevel@tonic-gate meta_mkdummymaster(sp, fd, 16); 20990Sstevel@tonic-gate (void) close(fd); 21000Sstevel@tonic-gate } 21010Sstevel@tonic-gate } 21020Sstevel@tonic-gate } 21030Sstevel@tonic-gate out: 21040Sstevel@tonic-gate if (metaislocalset(sp)) { 21050Sstevel@tonic-gate /* 21060Sstevel@tonic-gate * Stop all the daemons if there are 21070Sstevel@tonic-gate * no more replicas so that the module can be 21080Sstevel@tonic-gate * unloaded. 21090Sstevel@tonic-gate */ 21100Sstevel@tonic-gate if (rval == 0 && stop_svmdaemons == 1) { 21110Sstevel@tonic-gate char buf[MAXPATHLEN]; 21120Sstevel@tonic-gate int i; 21130Sstevel@tonic-gate 21140Sstevel@tonic-gate for (i = 0; i < DAEMON_COUNT; i++) { 21150Sstevel@tonic-gate (void) snprintf(buf, MAXPATHLEN, 2116*5109Spetede "/usr/bin/pkill -%s -x %s", 2117*5109Spetede svmd_kill_list[i].svmd_kill_val, 2118*5109Spetede svmd_kill_list[i].svmd_name); 21190Sstevel@tonic-gate if (pclose(popen(buf, "w")) == -1) 21200Sstevel@tonic-gate md_perror(buf); 21210Sstevel@tonic-gate } 21220Sstevel@tonic-gate 21230Sstevel@tonic-gate if (meta_smf_disable(META_SMF_ALL, &status) == -1) { 21240Sstevel@tonic-gate mde_perror(&status, ""); 21250Sstevel@tonic-gate mdclrerror(&status); 21260Sstevel@tonic-gate } 21270Sstevel@tonic-gate } 21280Sstevel@tonic-gate if (buildconf(sp, &status)) { 21290Sstevel@tonic-gate /* Don't mask any previous errors */ 21300Sstevel@tonic-gate if (rval == 0) 21310Sstevel@tonic-gate rval = mdstealerror(ep, &status); 21320Sstevel@tonic-gate else 21330Sstevel@tonic-gate mdclrerror(&status); 21340Sstevel@tonic-gate return (rval); 21350Sstevel@tonic-gate } 21360Sstevel@tonic-gate 21370Sstevel@tonic-gate if (meta_db_patch(sysfilename, NULL, 0, &status)) { 21380Sstevel@tonic-gate /* Don't mask any previous errors */ 21390Sstevel@tonic-gate if (rval == 0) 21400Sstevel@tonic-gate rval = mdstealerror(ep, &status); 21410Sstevel@tonic-gate else 21420Sstevel@tonic-gate mdclrerror(&status); 21430Sstevel@tonic-gate } 21440Sstevel@tonic-gate } else { 21450Sstevel@tonic-gate if (update_dbinfo_on_drives(sp, dd, 21460Sstevel@tonic-gate (force_option & MDFORCE_SET_LOCKED), 21470Sstevel@tonic-gate ((force_option & MDFORCE_LOCAL) | 21480Sstevel@tonic-gate (force_option & MDFORCE_DS)), &status)) { 21490Sstevel@tonic-gate /* Don't mask any previous errors */ 21500Sstevel@tonic-gate if (rval == 0) 21510Sstevel@tonic-gate rval = mdstealerror(ep, &status); 21520Sstevel@tonic-gate else 21530Sstevel@tonic-gate mdclrerror(&status); 21540Sstevel@tonic-gate } 21550Sstevel@tonic-gate metafreedrivedesc(&dd); 21560Sstevel@tonic-gate } 21570Sstevel@tonic-gate if ((metaislocalset(sp)) || (!(MD_MNSET_DESC(sd)))) { 21580Sstevel@tonic-gate for (nlp = db_nlp; (nlp != NULL); nlp = nlp->next) { 21590Sstevel@tonic-gate meta_invalidate_name(nlp->namep); 21600Sstevel@tonic-gate } 21610Sstevel@tonic-gate } 21620Sstevel@tonic-gate return (rval); 21630Sstevel@tonic-gate } 21640Sstevel@tonic-gate 21650Sstevel@tonic-gate static md_replica_t * 21660Sstevel@tonic-gate metareplicaname( 21670Sstevel@tonic-gate mdsetname_t *sp, 21680Sstevel@tonic-gate int flags, 21690Sstevel@tonic-gate struct mddb_config *c, 21700Sstevel@tonic-gate md_error_t *ep 21710Sstevel@tonic-gate ) 21720Sstevel@tonic-gate { 21730Sstevel@tonic-gate md_replica_t *rp; 21740Sstevel@tonic-gate char *devname; 21750Sstevel@tonic-gate size_t sz; 2176*5109Spetede devid_nmlist_t *disklist = NULL; 2177*5109Spetede char *devid_str; 21780Sstevel@tonic-gate 21790Sstevel@tonic-gate /* allocate replicaname */ 21800Sstevel@tonic-gate rp = Zalloc(sizeof (*rp)); 21810Sstevel@tonic-gate 21820Sstevel@tonic-gate /* get device name */ 21830Sstevel@tonic-gate devname = splicename(&c->c_devname); 2184*5109Spetede 2185*5109Spetede /* 2186*5109Spetede * Check if the device has a long name (>40 characters) and 2187*5109Spetede * if so then we have to use devids to get the device name. 2188*5109Spetede * If this cannot be done then we have to fail the request. 2189*5109Spetede */ 2190*5109Spetede if (strstr(devname, META_LONGDISKNAME_STR) != NULL) { 2191*5109Spetede if (c->c_locator.l_devid != NULL) { 2192*5109Spetede if (meta_deviceid_to_nmlist("/dev/dsk", 2193*5109Spetede (ddi_devid_t)(uintptr_t)c->c_locator.l_devid, 2194*5109Spetede c->c_locator.l_minor_name, &disklist) != 0) { 2195*5109Spetede devid_str = devid_str_encode( 2196*5109Spetede (ddi_devid_t)(uintptr_t) 2197*5109Spetede c->c_locator.l_devid, NULL); 2198*5109Spetede (void) mderror(ep, MDE_MISSING_DEVID_DISK, ""); 2199*5109Spetede mderrorextra(ep, devid_str); 2200*5109Spetede if (devid_str != NULL) 2201*5109Spetede devid_str_free(devid_str); 2202*5109Spetede Free(rp); 2203*5109Spetede Free(devname); 2204*5109Spetede return (NULL); 2205*5109Spetede } 2206*5109Spetede } else { 2207*5109Spetede (void) mderror(ep, MDE_NODEVID, ""); 2208*5109Spetede Free(rp); 2209*5109Spetede Free(devname); 2210*5109Spetede return (NULL); 2211*5109Spetede } 2212*5109Spetede Free(devname); 2213*5109Spetede devname = disklist[0].devname; 2214*5109Spetede } 2215*5109Spetede 22160Sstevel@tonic-gate if (flags & PRINT_FAST) { 22171623Stw21770 if ((rp->r_namep = metaname_fast(&sp, devname, 22181623Stw21770 LOGICAL_DEVICE, ep)) == NULL) { 22190Sstevel@tonic-gate Free(devname); 22200Sstevel@tonic-gate Free(rp); 22210Sstevel@tonic-gate return (NULL); 22220Sstevel@tonic-gate } 22230Sstevel@tonic-gate } else { 22241623Stw21770 if ((rp->r_namep = metaname(&sp, devname, 22251623Stw21770 LOGICAL_DEVICE, ep)) == NULL) { 22260Sstevel@tonic-gate Free(devname); 22270Sstevel@tonic-gate Free(rp); 22280Sstevel@tonic-gate return (NULL); 22290Sstevel@tonic-gate } 22300Sstevel@tonic-gate } 22310Sstevel@tonic-gate Free(devname); 22320Sstevel@tonic-gate 22330Sstevel@tonic-gate /* make sure it's OK */ 22340Sstevel@tonic-gate if ((! (flags & MD_BASICNAME_OK)) && 22350Sstevel@tonic-gate (metachkcomp(rp->r_namep, ep) != 0)) { 22360Sstevel@tonic-gate Free(rp); 22370Sstevel@tonic-gate return (NULL); 22380Sstevel@tonic-gate } 22390Sstevel@tonic-gate 224062Sjeanm rp->r_blkno = (daddr_t)MD_DISKADDR_ERROR; 224162Sjeanm rp->r_nblk = (daddr_t)MD_DISKADDR_ERROR; 22420Sstevel@tonic-gate rp->r_flags = c->c_locator.l_flags | MDDB_F_NODEVID; 22430Sstevel@tonic-gate if (c->c_locator.l_devid_flags & MDDB_DEVID_VALID) { 224462Sjeanm sz = devid_sizeof((ddi_devid_t)(uintptr_t) 224562Sjeanm (c->c_locator.l_devid)); 22460Sstevel@tonic-gate if ((rp->r_devid = (ddi_devid_t)malloc(sz)) == 22470Sstevel@tonic-gate (ddi_devid_t)NULL) { 22480Sstevel@tonic-gate Free(rp); 22490Sstevel@tonic-gate return (NULL); 22500Sstevel@tonic-gate } 22510Sstevel@tonic-gate (void) memcpy((void *)rp->r_devid, 225262Sjeanm (void *)(uintptr_t)c->c_locator.l_devid, sz); 22530Sstevel@tonic-gate (void) strcpy(rp->r_minor_name, c->c_locator.l_minor_name); 22540Sstevel@tonic-gate rp->r_flags &= ~MDDB_F_NODEVID; 22550Sstevel@tonic-gate /* Overwrite dev derived from name with dev from devid */ 22560Sstevel@tonic-gate rp->r_namep->dev = meta_expldev(c->c_locator.l_dev); 22570Sstevel@tonic-gate } 22580Sstevel@tonic-gate (void) strcpy(rp->r_driver_name, c->c_locator.l_driver); 22590Sstevel@tonic-gate 22600Sstevel@tonic-gate rp->r_blkno = c->c_locator.l_blkno; 22610Sstevel@tonic-gate if (c->c_dbend != 0) 22620Sstevel@tonic-gate rp->r_nblk = c->c_dbend - c->c_locator.l_blkno + 1; 22630Sstevel@tonic-gate 22640Sstevel@tonic-gate /* return replica */ 22650Sstevel@tonic-gate return (rp); 22660Sstevel@tonic-gate } 22670Sstevel@tonic-gate 22680Sstevel@tonic-gate /* 22690Sstevel@tonic-gate * free replica list 22700Sstevel@tonic-gate */ 22710Sstevel@tonic-gate void 22720Sstevel@tonic-gate metafreereplicalist( 22730Sstevel@tonic-gate md_replicalist_t *rlp 22740Sstevel@tonic-gate ) 22750Sstevel@tonic-gate { 22760Sstevel@tonic-gate md_replicalist_t *rl = NULL; 22770Sstevel@tonic-gate 22780Sstevel@tonic-gate for (/* void */; (rlp != NULL); rlp = rl) { 22790Sstevel@tonic-gate rl = rlp->rl_next; 22800Sstevel@tonic-gate if (rlp->rl_repp->r_devid != (ddi_devid_t)0) { 22810Sstevel@tonic-gate free(rlp->rl_repp->r_devid); 22820Sstevel@tonic-gate } 22830Sstevel@tonic-gate Free(rlp->rl_repp); 22840Sstevel@tonic-gate Free(rlp); 22850Sstevel@tonic-gate } 22860Sstevel@tonic-gate } 22870Sstevel@tonic-gate 22880Sstevel@tonic-gate /* 22890Sstevel@tonic-gate * return list of all replicas in set 22900Sstevel@tonic-gate */ 22910Sstevel@tonic-gate int 22920Sstevel@tonic-gate metareplicalist( 22930Sstevel@tonic-gate mdsetname_t *sp, 22940Sstevel@tonic-gate int flags, 22950Sstevel@tonic-gate md_replicalist_t **rlpp, 22960Sstevel@tonic-gate md_error_t *ep 22970Sstevel@tonic-gate ) 22980Sstevel@tonic-gate { 22990Sstevel@tonic-gate md_replicalist_t **tail = rlpp; 23000Sstevel@tonic-gate int count = 0; 23010Sstevel@tonic-gate struct mddb_config c; 23020Sstevel@tonic-gate int i; 23030Sstevel@tonic-gate char *devid; 23040Sstevel@tonic-gate 23050Sstevel@tonic-gate /* for each replica */ 23060Sstevel@tonic-gate i = 0; 23070Sstevel@tonic-gate do { 23080Sstevel@tonic-gate md_replica_t *rp; 23090Sstevel@tonic-gate 23100Sstevel@tonic-gate /* get next replica */ 23110Sstevel@tonic-gate (void) memset(&c, 0, sizeof (c)); 23120Sstevel@tonic-gate c.c_id = i; 23130Sstevel@tonic-gate c.c_setno = sp->setno; 23140Sstevel@tonic-gate 23150Sstevel@tonic-gate c.c_locator.l_devid_flags = MDDB_DEVID_GETSZ; 23160Sstevel@tonic-gate if (metaioctl(MD_DB_ENDDEV, &c, &c.c_mde, NULL) != 0) { 23170Sstevel@tonic-gate if (mdismddberror(&c.c_mde, MDE_DB_INVALID)) { 23180Sstevel@tonic-gate mdclrerror(&c.c_mde); 23190Sstevel@tonic-gate break; /* handle none at all */ 23200Sstevel@tonic-gate } 23210Sstevel@tonic-gate (void) mdstealerror(ep, &c.c_mde); 23220Sstevel@tonic-gate goto out; 23230Sstevel@tonic-gate } 23240Sstevel@tonic-gate 23250Sstevel@tonic-gate if (c.c_locator.l_devid_flags & MDDB_DEVID_SZ) { 23260Sstevel@tonic-gate if ((devid = malloc(c.c_locator.l_devid_sz)) == NULL) { 23270Sstevel@tonic-gate (void) mdsyserror(ep, ENOMEM, META_DBCONF); 23280Sstevel@tonic-gate goto out; 23290Sstevel@tonic-gate } 23300Sstevel@tonic-gate c.c_locator.l_devid = (uintptr_t)devid; 23310Sstevel@tonic-gate /* 23320Sstevel@tonic-gate * Turn on space and sz flags since 'sz' amount of 23330Sstevel@tonic-gate * space has been alloc'd. 23340Sstevel@tonic-gate */ 23350Sstevel@tonic-gate c.c_locator.l_devid_flags = 2336*5109Spetede MDDB_DEVID_SPACE | MDDB_DEVID_SZ; 23370Sstevel@tonic-gate } 23380Sstevel@tonic-gate 23390Sstevel@tonic-gate if (metaioctl(MD_DB_ENDDEV, &c, &c.c_mde, NULL) != 0) { 23400Sstevel@tonic-gate if (mdismddberror(&c.c_mde, MDE_DB_INVALID)) { 23410Sstevel@tonic-gate mdclrerror(&c.c_mde); 23420Sstevel@tonic-gate break; /* handle none at all */ 23430Sstevel@tonic-gate } 23440Sstevel@tonic-gate (void) mdstealerror(ep, &c.c_mde); 23450Sstevel@tonic-gate goto out; 23460Sstevel@tonic-gate } 23470Sstevel@tonic-gate 23480Sstevel@tonic-gate /* 23490Sstevel@tonic-gate * Paranoid check - shouldn't happen, but is left as 23500Sstevel@tonic-gate * a place holder for changes that will be needed after 23510Sstevel@tonic-gate * dynamic reconfiguration changes are added to SVM (to 23520Sstevel@tonic-gate * support movement of disks at any point in time). 23530Sstevel@tonic-gate */ 23540Sstevel@tonic-gate if (c.c_locator.l_devid_flags & MDDB_DEVID_NOSPACE) { 23550Sstevel@tonic-gate (void) fprintf(stderr, 23560Sstevel@tonic-gate dgettext(TEXT_DOMAIN, 2357*5109Spetede "Error: Relocation Information " 2358*5109Spetede "(drvnm=%s, mnum=0x%lx) \n" 2359*5109Spetede "relocation information size changed - \n" 2360*5109Spetede "rerun command\n"), 23610Sstevel@tonic-gate c.c_locator.l_driver, c.c_locator.l_mnum); 23620Sstevel@tonic-gate (void) mderror(ep, MDE_DEVID_TOOBIG, NULL); 23630Sstevel@tonic-gate goto out; 23640Sstevel@tonic-gate } 23650Sstevel@tonic-gate 23660Sstevel@tonic-gate if (c.c_dbcnt == 0) 23670Sstevel@tonic-gate break; /* handle none at all */ 23680Sstevel@tonic-gate 23690Sstevel@tonic-gate /* get info */ 23700Sstevel@tonic-gate if ((rp = metareplicaname(sp, flags, &c, ep)) == NULL) 23710Sstevel@tonic-gate goto out; 23720Sstevel@tonic-gate 23730Sstevel@tonic-gate /* append to list */ 23740Sstevel@tonic-gate *tail = Zalloc(sizeof (**tail)); 23750Sstevel@tonic-gate (*tail)->rl_repp = rp; 23760Sstevel@tonic-gate tail = &(*tail)->rl_next; 23770Sstevel@tonic-gate ++count; 23780Sstevel@tonic-gate 23790Sstevel@tonic-gate if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) { 23800Sstevel@tonic-gate free(devid); 23810Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 23820Sstevel@tonic-gate } 23830Sstevel@tonic-gate 23840Sstevel@tonic-gate } while (++i < c.c_dbcnt); 23850Sstevel@tonic-gate 23860Sstevel@tonic-gate if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) { 23870Sstevel@tonic-gate free(devid); 23880Sstevel@tonic-gate } 23890Sstevel@tonic-gate 23900Sstevel@tonic-gate /* return count */ 23910Sstevel@tonic-gate return (count); 23920Sstevel@tonic-gate 23930Sstevel@tonic-gate /* cleanup, return error */ 23940Sstevel@tonic-gate out: 23950Sstevel@tonic-gate if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) { 23960Sstevel@tonic-gate free(devid); 23970Sstevel@tonic-gate } 23980Sstevel@tonic-gate metafreereplicalist(*rlpp); 23990Sstevel@tonic-gate *rlpp = NULL; 24000Sstevel@tonic-gate return (-1); 24010Sstevel@tonic-gate } 24020Sstevel@tonic-gate 24030Sstevel@tonic-gate /* 24040Sstevel@tonic-gate * meta_sync_db_locations - get list of replicas from kernel and write 24050Sstevel@tonic-gate * out to mddb.cf and md.conf. 'Syncs up' the replica list in 24060Sstevel@tonic-gate * the kernel with the replica list in the conf files. 24070Sstevel@tonic-gate * 24080Sstevel@tonic-gate */ 24090Sstevel@tonic-gate void 24100Sstevel@tonic-gate meta_sync_db_locations( 24110Sstevel@tonic-gate mdsetname_t *sp, 24120Sstevel@tonic-gate md_error_t *ep 24130Sstevel@tonic-gate ) 24140Sstevel@tonic-gate { 24150Sstevel@tonic-gate char *sname = 0; /* system file name */ 24160Sstevel@tonic-gate char *cname = 0; /* config file name */ 24170Sstevel@tonic-gate 24180Sstevel@tonic-gate if (!metaislocalset(sp)) 24190Sstevel@tonic-gate return; 24200Sstevel@tonic-gate 24210Sstevel@tonic-gate /* Updates backup of configuration file (aka mddb.cf) */ 24220Sstevel@tonic-gate if (buildconf(sp, ep) != 0) 24230Sstevel@tonic-gate return; 24240Sstevel@tonic-gate 24250Sstevel@tonic-gate /* Updates system configuration file (aka md.conf) */ 24260Sstevel@tonic-gate (void) meta_db_patch(sname, cname, 0, ep); 24270Sstevel@tonic-gate } 24280Sstevel@tonic-gate 24290Sstevel@tonic-gate /* 24300Sstevel@tonic-gate * setup_db_locations - parse the mddb.cf file and 24310Sstevel@tonic-gate * tells the driver which db locations to use. 24320Sstevel@tonic-gate */ 24330Sstevel@tonic-gate int 24340Sstevel@tonic-gate meta_setup_db_locations( 24350Sstevel@tonic-gate md_error_t *ep 24360Sstevel@tonic-gate ) 24370Sstevel@tonic-gate { 24380Sstevel@tonic-gate mddb_config_t c; 24390Sstevel@tonic-gate FILE *fp; 24400Sstevel@tonic-gate char inbuff[1024]; 24410Sstevel@tonic-gate char *buff; 24420Sstevel@tonic-gate uint_t i; 24430Sstevel@tonic-gate size_t sz; 24440Sstevel@tonic-gate int rval = 0; 24450Sstevel@tonic-gate char *devidp; 24460Sstevel@tonic-gate uint_t devid_size; 24470Sstevel@tonic-gate char *minor_name = NULL; 24480Sstevel@tonic-gate ddi_devid_t devid_decode; 24490Sstevel@tonic-gate int checksum; 24500Sstevel@tonic-gate 24510Sstevel@tonic-gate /* do mddb.cf file */ 24520Sstevel@tonic-gate (void) memset(&c, '\0', sizeof (c)); 24530Sstevel@tonic-gate if ((fp = fopen(META_DBCONF, "r")) == NULL) { 24540Sstevel@tonic-gate if (errno != ENOENT) 24550Sstevel@tonic-gate return (mdsyserror(ep, errno, META_DBCONF)); 24560Sstevel@tonic-gate } 24570Sstevel@tonic-gate while ((fp != NULL) && ((buff = fgets(inbuff, (sizeof (inbuff) - 1), 24580Sstevel@tonic-gate fp)) != NULL)) { 24590Sstevel@tonic-gate 24600Sstevel@tonic-gate /* ignore comments */ 24610Sstevel@tonic-gate if (*buff == '#') 24620Sstevel@tonic-gate continue; 24630Sstevel@tonic-gate 24640Sstevel@tonic-gate /* parse locator */ 24650Sstevel@tonic-gate (void) memset(&c, 0, sizeof (c)); 24660Sstevel@tonic-gate c.c_setno = MD_LOCAL_SET; 24670Sstevel@tonic-gate i = strcspn(buff, " \t"); 24680Sstevel@tonic-gate if (i > sizeof (c.c_locator.l_driver)) 24690Sstevel@tonic-gate i = sizeof (c.c_locator.l_driver); 24700Sstevel@tonic-gate (void) strncpy(c.c_locator.l_driver, buff, i); 24710Sstevel@tonic-gate buff += i; 24720Sstevel@tonic-gate c.c_locator.l_dev = 24730Sstevel@tonic-gate makedev((major_t)0, (minor_t)strtol(buff, &buff, 10)); 24740Sstevel@tonic-gate c.c_locator.l_blkno = (daddr_t)strtol(buff, &buff, 10); 24750Sstevel@tonic-gate c.c_locator.l_mnum = minor(c.c_locator.l_dev); 24760Sstevel@tonic-gate 24770Sstevel@tonic-gate /* parse out devid */ 24780Sstevel@tonic-gate while (isspace((int)(*buff))) 24790Sstevel@tonic-gate buff += 1; 24800Sstevel@tonic-gate i = strcspn(buff, " \t"); 24810Sstevel@tonic-gate if ((devidp = (char *)malloc(i+1)) == NULL) 24820Sstevel@tonic-gate return (mdsyserror(ep, ENOMEM, META_DBCONF)); 24830Sstevel@tonic-gate 24840Sstevel@tonic-gate (void) strncpy(devidp, buff, i); 24850Sstevel@tonic-gate devidp[i] = '\0'; 24860Sstevel@tonic-gate if (devid_str_decode(devidp, &devid_decode, 24870Sstevel@tonic-gate &minor_name) == -1) { 24880Sstevel@tonic-gate free(devidp); 24890Sstevel@tonic-gate continue; 24900Sstevel@tonic-gate } 24910Sstevel@tonic-gate 24920Sstevel@tonic-gate /* Conf file must have minor name associated with devid */ 24930Sstevel@tonic-gate if (minor_name == NULL) { 24940Sstevel@tonic-gate free(devidp); 24950Sstevel@tonic-gate devid_free(devid_decode); 24960Sstevel@tonic-gate continue; 24970Sstevel@tonic-gate } 24980Sstevel@tonic-gate 24990Sstevel@tonic-gate sz = devid_sizeof(devid_decode); 25000Sstevel@tonic-gate /* Copy to devid size buffer that ioctl expects */ 25010Sstevel@tonic-gate if ((c.c_locator.l_devid = (uintptr_t)malloc(sz)) == NULL) { 25020Sstevel@tonic-gate devid_free(devid_decode); 25030Sstevel@tonic-gate free(minor_name); 25040Sstevel@tonic-gate free(devidp); 25050Sstevel@tonic-gate return (mdsyserror(ep, ENOMEM, META_DBCONF)); 25060Sstevel@tonic-gate } 25070Sstevel@tonic-gate 250862Sjeanm (void) memcpy((void *)(uintptr_t)c.c_locator.l_devid, 25090Sstevel@tonic-gate (void *)devid_decode, sz); 25100Sstevel@tonic-gate 25110Sstevel@tonic-gate devid_free(devid_decode); 25120Sstevel@tonic-gate 25130Sstevel@tonic-gate if (strlen(minor_name) > MDDB_MINOR_NAME_MAX) { 25140Sstevel@tonic-gate free(minor_name); 25150Sstevel@tonic-gate free(devidp); 251662Sjeanm free((void *)(uintptr_t)c.c_locator.l_devid); 25170Sstevel@tonic-gate return (mdsyserror(ep, ENOMEM, META_DBCONF)); 25180Sstevel@tonic-gate } 25190Sstevel@tonic-gate (void) strcpy(c.c_locator.l_minor_name, minor_name); 25200Sstevel@tonic-gate free(minor_name); 25210Sstevel@tonic-gate c.c_locator.l_devid_flags = MDDB_DEVID_VALID | 2522*5109Spetede MDDB_DEVID_SPACE | MDDB_DEVID_SZ; 25230Sstevel@tonic-gate c.c_locator.l_devid_sz = sz; 25240Sstevel@tonic-gate 25250Sstevel@tonic-gate devid_size = strlen(devidp); 25260Sstevel@tonic-gate buff += devid_size; 25270Sstevel@tonic-gate 25280Sstevel@tonic-gate checksum = strtol(buff, &buff, 10); 25290Sstevel@tonic-gate for (i = 0; c.c_locator.l_driver[i] != 0; i++) 25300Sstevel@tonic-gate checksum += c.c_locator.l_driver[i]; 25310Sstevel@tonic-gate for (i = 0; i < devid_size; i++) { 25320Sstevel@tonic-gate checksum += devidp[i]; 25330Sstevel@tonic-gate } 25340Sstevel@tonic-gate free(devidp); 25350Sstevel@tonic-gate 25360Sstevel@tonic-gate checksum += minor(c.c_locator.l_dev); 25370Sstevel@tonic-gate checksum += c.c_locator.l_blkno; 25380Sstevel@tonic-gate if (checksum != 42) { 25390Sstevel@tonic-gate /* overwritten later for more serious problems */ 25400Sstevel@tonic-gate rval = mderror(ep, MDE_MDDB_CKSUM, META_DBCONF); 254162Sjeanm free((void *)(uintptr_t)c.c_locator.l_devid); 25420Sstevel@tonic-gate continue; 25430Sstevel@tonic-gate } 25440Sstevel@tonic-gate c.c_locator.l_flags = 0; 25450Sstevel@tonic-gate 25460Sstevel@tonic-gate /* use db location */ 25470Sstevel@tonic-gate if (metaioctl(MD_DB_USEDEV, &c, &c.c_mde, NULL) != 0) { 254862Sjeanm free((void *)(uintptr_t)c.c_locator.l_devid); 25490Sstevel@tonic-gate return (mdstealerror(ep, &c.c_mde)); 25500Sstevel@tonic-gate } 25510Sstevel@tonic-gate 25520Sstevel@tonic-gate /* free up devid if in use */ 255362Sjeanm free((void *)(uintptr_t)c.c_locator.l_devid); 25540Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0; 25550Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 25560Sstevel@tonic-gate } 25570Sstevel@tonic-gate if ((fp) && (fclose(fp) != 0)) 25580Sstevel@tonic-gate return (mdsyserror(ep, errno, META_DBCONF)); 25590Sstevel@tonic-gate 25600Sstevel@tonic-gate /* check for stale database */ 25610Sstevel@tonic-gate (void) memset((char *)&c, 0, sizeof (struct mddb_config)); 25620Sstevel@tonic-gate c.c_id = 0; 25630Sstevel@tonic-gate c.c_setno = MD_LOCAL_SET; 25640Sstevel@tonic-gate 2565*5109Spetede /* 2566*5109Spetede * While we do not need the devid here we may need to 2567*5109Spetede * know if devid's are being used by the kernel for 2568*5109Spetede * the replicas. This is because under some circumstances 2569*5109Spetede * we can only manipulate the SVM configuration if the 2570*5109Spetede * kernel is using devid's. 2571*5109Spetede */ 25720Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0; 2573*5109Spetede c.c_locator.l_devid_flags = MDDB_DEVID_GETSZ; 2574*5109Spetede c.c_locator.l_devid_sz = 0; 25750Sstevel@tonic-gate 25760Sstevel@tonic-gate if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) { 25770Sstevel@tonic-gate if (! mdismddberror(&c.c_mde, MDE_DB_INVALID)) 25780Sstevel@tonic-gate return (mdstealerror(ep, &c.c_mde)); 25790Sstevel@tonic-gate mdclrerror(&c.c_mde); 25800Sstevel@tonic-gate } 25810Sstevel@tonic-gate 25820Sstevel@tonic-gate if (c.c_flags & MDDB_C_STALE) 25830Sstevel@tonic-gate return (mdmddberror(ep, MDE_DB_STALE, NODEV32, MD_LOCAL_SET, 25840Sstevel@tonic-gate 0, NULL)); 25850Sstevel@tonic-gate 2586*5109Spetede if (c.c_locator.l_devid_sz != 0) { 2587*5109Spetede /* 2588*5109Spetede * Devid's are being used to track the replicas because 2589*5109Spetede * there is space for a devid. 2590*5109Spetede */ 2591*5109Spetede devid_in_use = TRUE; 2592*5109Spetede } 2593*5109Spetede 25940Sstevel@tonic-gate /* success */ 25950Sstevel@tonic-gate return (rval); 25960Sstevel@tonic-gate } 25970Sstevel@tonic-gate 25980Sstevel@tonic-gate /* 25990Sstevel@tonic-gate * meta_db_minreplica - returns the minimum size replica currently in use. 26000Sstevel@tonic-gate */ 26010Sstevel@tonic-gate daddr_t 26020Sstevel@tonic-gate meta_db_minreplica( 26030Sstevel@tonic-gate mdsetname_t *sp, 26040Sstevel@tonic-gate md_error_t *ep 26050Sstevel@tonic-gate ) 26060Sstevel@tonic-gate { 26070Sstevel@tonic-gate md_replica_t *r; 26080Sstevel@tonic-gate md_replicalist_t *rl, *rlp = NULL; 26090Sstevel@tonic-gate daddr_t nblks = 0; 26100Sstevel@tonic-gate 26110Sstevel@tonic-gate if (metareplicalist(sp, (MD_BASICNAME_OK | PRINT_FAST), &rlp, ep) < 0) 26120Sstevel@tonic-gate return (-1); 26130Sstevel@tonic-gate 26140Sstevel@tonic-gate if (rlp == NULL) 26150Sstevel@tonic-gate return (-1); 26160Sstevel@tonic-gate 26170Sstevel@tonic-gate /* find the smallest existing replica */ 26180Sstevel@tonic-gate for (rl = rlp; rl != NULL; rl = rl->rl_next) { 26190Sstevel@tonic-gate r = rl->rl_repp; 26200Sstevel@tonic-gate nblks = ((nblks == 0) ? r->r_nblk : min(r->r_nblk, nblks)); 26210Sstevel@tonic-gate } 26220Sstevel@tonic-gate 26230Sstevel@tonic-gate metafreereplicalist(rlp); 26240Sstevel@tonic-gate return (nblks); 26250Sstevel@tonic-gate } 26260Sstevel@tonic-gate 26270Sstevel@tonic-gate /* 26280Sstevel@tonic-gate * meta_get_replica_names 26290Sstevel@tonic-gate * returns an mdnamelist_t of replica slices 26300Sstevel@tonic-gate */ 26310Sstevel@tonic-gate /*ARGSUSED*/ 26320Sstevel@tonic-gate int 26330Sstevel@tonic-gate meta_get_replica_names( 26340Sstevel@tonic-gate mdsetname_t *sp, 26350Sstevel@tonic-gate mdnamelist_t **nlpp, 26360Sstevel@tonic-gate int options, 26370Sstevel@tonic-gate md_error_t *ep 26380Sstevel@tonic-gate ) 26390Sstevel@tonic-gate { 26400Sstevel@tonic-gate md_replicalist_t *rlp = NULL; 26410Sstevel@tonic-gate md_replicalist_t *rl; 26420Sstevel@tonic-gate mdnamelist_t **tailpp = nlpp; 26430Sstevel@tonic-gate int cnt = 0; 26440Sstevel@tonic-gate 26450Sstevel@tonic-gate assert(nlpp != NULL); 26460Sstevel@tonic-gate 26470Sstevel@tonic-gate if (!metaislocalset(sp)) 26480Sstevel@tonic-gate goto out; 26490Sstevel@tonic-gate 26500Sstevel@tonic-gate /* get replicas */ 26510Sstevel@tonic-gate if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0) { 26520Sstevel@tonic-gate cnt = -1; 26530Sstevel@tonic-gate goto out; 26540Sstevel@tonic-gate } 26550Sstevel@tonic-gate 26560Sstevel@tonic-gate /* build name list */ 26570Sstevel@tonic-gate for (rl = rlp; (rl != NULL); rl = rl->rl_next) { 26580Sstevel@tonic-gate /* 26590Sstevel@tonic-gate * Add the name struct to the end of the 26600Sstevel@tonic-gate * namelist but keep a pointer to the last 26610Sstevel@tonic-gate * element so that we don't incur the overhead 26620Sstevel@tonic-gate * of traversing the list each time 26630Sstevel@tonic-gate */ 26640Sstevel@tonic-gate tailpp = meta_namelist_append_wrapper( 2665*5109Spetede tailpp, rl->rl_repp->r_namep); 26660Sstevel@tonic-gate ++cnt; 26670Sstevel@tonic-gate } 26680Sstevel@tonic-gate 26690Sstevel@tonic-gate /* cleanup, return count or error */ 26700Sstevel@tonic-gate out: 26710Sstevel@tonic-gate metafreereplicalist(rlp); 26720Sstevel@tonic-gate return (cnt); 26730Sstevel@tonic-gate } 2674