10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
51623Stw21770 * Common Development and Distribution License (the "License").
61623Stw21770 * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
21*8452SJohn.Wren.Kennedy@Sun.COM
220Sstevel@tonic-gate /*
236195Sachimm * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
240Sstevel@tonic-gate * Use is subject to license terms.
250Sstevel@tonic-gate */
260Sstevel@tonic-gate
270Sstevel@tonic-gate /*
280Sstevel@tonic-gate * Just in case we're not in a build environment, make sure that
290Sstevel@tonic-gate * TEXT_DOMAIN gets set to something.
300Sstevel@tonic-gate */
310Sstevel@tonic-gate #if !defined(TEXT_DOMAIN)
320Sstevel@tonic-gate #define TEXT_DOMAIN "SYS_TEST"
330Sstevel@tonic-gate #endif
340Sstevel@tonic-gate
350Sstevel@tonic-gate /*
360Sstevel@tonic-gate * Metadevice database interfaces.
370Sstevel@tonic-gate */
380Sstevel@tonic-gate
390Sstevel@tonic-gate #define MDDB
400Sstevel@tonic-gate
410Sstevel@tonic-gate #include <meta.h>
420Sstevel@tonic-gate #include <sys/lvm/md_mddb.h>
430Sstevel@tonic-gate #include <sys/lvm/md_crc.h>
440Sstevel@tonic-gate #include <sys/lvm/mdio.h>
450Sstevel@tonic-gate #include <string.h>
460Sstevel@tonic-gate #include <strings.h>
470Sstevel@tonic-gate #include <ctype.h>
480Sstevel@tonic-gate
490Sstevel@tonic-gate struct svm_daemon {
500Sstevel@tonic-gate char *svmd_name;
510Sstevel@tonic-gate char *svmd_kill_val;
520Sstevel@tonic-gate };
530Sstevel@tonic-gate
542614Spetede /*
552614Spetede * This is a list of the daemons that are not stopped by the SVM smf(5)
562614Spetede * services. The mdmonitord is started via svc:/system/mdmonitor:default
572614Spetede * but no contract(4) is constructed and so it is not stopped by smf(5).
582614Spetede */
590Sstevel@tonic-gate struct svm_daemon svmd_kill_list[] = {
600Sstevel@tonic-gate {"mdmonitord", "HUP"},
610Sstevel@tonic-gate {"mddoors", "KILL"},
620Sstevel@tonic-gate };
630Sstevel@tonic-gate
640Sstevel@tonic-gate #define DAEMON_COUNT (sizeof (svmd_kill_list)/ sizeof (struct svm_daemon))
650Sstevel@tonic-gate
660Sstevel@tonic-gate extern int procsigs(int block, sigset_t *oldsigs, md_error_t *ep);
670Sstevel@tonic-gate
680Sstevel@tonic-gate /*
695109Spetede * Are the locator blocks for the replicas using devids
705109Spetede */
715109Spetede static int devid_in_use = FALSE;
725109Spetede
735109Spetede static char *
getlongname(struct mddb_config * c,md_error_t * ep)745109Spetede getlongname(
755109Spetede struct mddb_config *c,
765109Spetede md_error_t *ep
775109Spetede )
785109Spetede {
795109Spetede char *diskname = NULL;
805109Spetede char *devid_str;
815109Spetede devid_nmlist_t *disklist = NULL;
825109Spetede
835109Spetede c->c_locator.l_devid_flags = MDDB_DEVID_GETSZ;
845109Spetede if (metaioctl(MD_DB_ENDDEV, c, &c->c_mde, NULL) != 0) {
855109Spetede (void) mdstealerror(ep, &c->c_mde);
865109Spetede return (NULL);
875109Spetede }
885109Spetede
895109Spetede if (c->c_locator.l_devid_flags & MDDB_DEVID_SZ) {
905109Spetede c->c_locator.l_devid = (uintptr_t)
915109Spetede Malloc(c->c_locator.l_devid_sz);
925109Spetede c->c_locator.l_devid_flags =
935109Spetede MDDB_DEVID_SPACE | MDDB_DEVID_SZ;
945109Spetede } else {
955109Spetede (void) mderror(ep, MDE_NODEVID, "");
965109Spetede goto out;
975109Spetede }
985109Spetede
995109Spetede if (metaioctl(MD_DB_ENDDEV, c, &c->c_mde, NULL) != 0) {
1005109Spetede (void) mdstealerror(ep, &c->c_mde);
1015109Spetede goto out;
1025109Spetede }
1035109Spetede
1045109Spetede if (c->c_locator.l_devid_flags & MDDB_DEVID_NOSPACE) {
1055109Spetede (void) mderror(ep, MDE_NODEVID, "");
1065109Spetede goto out;
1075109Spetede }
1085109Spetede
1095109Spetede if (metaioctl(MD_DB_GETDEV, c, &c->c_mde, NULL) != 0) {
1105109Spetede (void) mdstealerror(ep, &c->c_mde);
1115109Spetede goto out;
1125109Spetede }
1135109Spetede
1145109Spetede if (c->c_locator.l_devid != NULL) {
1155109Spetede if (meta_deviceid_to_nmlist("/dev/dsk",
1165109Spetede (ddi_devid_t)(uintptr_t)c->c_locator.l_devid,
1175109Spetede c->c_locator.l_minor_name, &disklist) != 0) {
1185109Spetede devid_str = devid_str_encode(
1195109Spetede (ddi_devid_t)(uintptr_t)c->c_locator.l_devid, NULL);
1205109Spetede (void) mderror(ep, MDE_MISSING_DEVID_DISK, "");
1215109Spetede mderrorextra(ep, devid_str);
1225109Spetede if (devid_str != NULL)
1235109Spetede devid_str_free(devid_str);
1245109Spetede goto out;
1255109Spetede }
1265109Spetede diskname = Strdup(disklist[0].devname);
1275109Spetede }
1285109Spetede
1295109Spetede out:
1305109Spetede if (disklist != NULL)
1315109Spetede devid_free_nmlist(disklist);
1325109Spetede
1335109Spetede if (c->c_locator.l_devid != NULL)
1345109Spetede Free((void *)(uintptr_t)c->c_locator.l_devid);
1355109Spetede
1365109Spetede return (diskname);
1375109Spetede }
1385109Spetede
1395109Spetede /*
1400Sstevel@tonic-gate * meta_get_lb_inittime sends a request for the lb_inittime to the kernel
1410Sstevel@tonic-gate */
1420Sstevel@tonic-gate md_timeval32_t
meta_get_lb_inittime(mdsetname_t * sp,md_error_t * ep)1430Sstevel@tonic-gate meta_get_lb_inittime(
1440Sstevel@tonic-gate mdsetname_t *sp,
1450Sstevel@tonic-gate md_error_t *ep
1460Sstevel@tonic-gate )
1470Sstevel@tonic-gate {
1480Sstevel@tonic-gate mddb_config_t c;
1490Sstevel@tonic-gate
1500Sstevel@tonic-gate (void) memset(&c, 0, sizeof (c));
1510Sstevel@tonic-gate
1520Sstevel@tonic-gate /* Fill in setno, setname, and sideno */
1530Sstevel@tonic-gate c.c_setno = sp->setno;
1540Sstevel@tonic-gate
1550Sstevel@tonic-gate if (metaioctl(MD_DB_LBINITTIME, &c, &c.c_mde, NULL) != 0) {
1560Sstevel@tonic-gate (void) mdstealerror(ep, &c.c_mde);
1570Sstevel@tonic-gate }
1580Sstevel@tonic-gate
1590Sstevel@tonic-gate return (c.c_timestamp);
1600Sstevel@tonic-gate }
1610Sstevel@tonic-gate
1620Sstevel@tonic-gate /*
1630Sstevel@tonic-gate * mkmasterblks writes out the master blocks of the mddb to the replica.
1640Sstevel@tonic-gate *
1650Sstevel@tonic-gate * In a MN diskset, this is called by the node that is adding this replica
1660Sstevel@tonic-gate * to the diskset.
1670Sstevel@tonic-gate */
1680Sstevel@tonic-gate
1690Sstevel@tonic-gate #define MDDB_VERIFY_SIZE 8192
1700Sstevel@tonic-gate
1710Sstevel@tonic-gate static int
mkmasterblks(mdsetname_t * sp,mdname_t * np,int fd,daddr_t firstblk,int dbsize,md_timeval32_t inittime,md_error_t * ep)1720Sstevel@tonic-gate mkmasterblks(
1730Sstevel@tonic-gate mdsetname_t *sp,
1740Sstevel@tonic-gate mdname_t *np,
1750Sstevel@tonic-gate int fd,
1760Sstevel@tonic-gate daddr_t firstblk,
1770Sstevel@tonic-gate int dbsize,
1780Sstevel@tonic-gate md_timeval32_t inittime,
1790Sstevel@tonic-gate md_error_t *ep
1800Sstevel@tonic-gate )
1810Sstevel@tonic-gate {
1820Sstevel@tonic-gate int consecutive;
1830Sstevel@tonic-gate md_timeval32_t tp;
1840Sstevel@tonic-gate struct mddb_mb *mb;
1850Sstevel@tonic-gate char *buffer;
1860Sstevel@tonic-gate int iosize;
1870Sstevel@tonic-gate md_set_desc *sd;
1880Sstevel@tonic-gate int mn_set = 0;
1890Sstevel@tonic-gate daddr_t startblk;
1900Sstevel@tonic-gate int cnt;
1910Sstevel@tonic-gate ddi_devid_t devid;
1920Sstevel@tonic-gate
1930Sstevel@tonic-gate if (! metaislocalset(sp)) {
1940Sstevel@tonic-gate if ((sd = metaget_setdesc(sp, ep)) == NULL)
1950Sstevel@tonic-gate return (-1);
1960Sstevel@tonic-gate
1970Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) {
1980Sstevel@tonic-gate mn_set = 1; /* Used later */
1990Sstevel@tonic-gate }
2000Sstevel@tonic-gate }
2010Sstevel@tonic-gate
2020Sstevel@tonic-gate /*
2030Sstevel@tonic-gate * Loop to verify the entire mddb region on disk is read/writable.
2040Sstevel@tonic-gate * buffer is used to write/read in at most MDDB_VERIFY_SIZE block
2050Sstevel@tonic-gate * chunks.
2060Sstevel@tonic-gate *
2070Sstevel@tonic-gate * A side-effect of this loop is to zero out the entire mddb region
2080Sstevel@tonic-gate */
2090Sstevel@tonic-gate if ((buffer = Zalloc(MDDB_VERIFY_SIZE * DEV_BSIZE)) == NULL)
2100Sstevel@tonic-gate return (mdsyserror(ep, ENOMEM, np->rname));
2110Sstevel@tonic-gate
2120Sstevel@tonic-gate startblk = firstblk;
2130Sstevel@tonic-gate for (cnt = dbsize; cnt > 0; cnt -= consecutive) {
2140Sstevel@tonic-gate
2150Sstevel@tonic-gate if (cnt > MDDB_VERIFY_SIZE)
2160Sstevel@tonic-gate consecutive = MDDB_VERIFY_SIZE;
2170Sstevel@tonic-gate else
2180Sstevel@tonic-gate consecutive = cnt;
2190Sstevel@tonic-gate
2200Sstevel@tonic-gate if (lseek(fd, (off_t)(startblk * DEV_BSIZE), SEEK_SET) < 0) {
2210Sstevel@tonic-gate Free(buffer);
2220Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname));
2230Sstevel@tonic-gate }
2240Sstevel@tonic-gate
2250Sstevel@tonic-gate iosize = DEV_BSIZE * consecutive;
2260Sstevel@tonic-gate if (write(fd, buffer, iosize) != iosize) {
2270Sstevel@tonic-gate Free(buffer);
2280Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname));
2290Sstevel@tonic-gate }
2300Sstevel@tonic-gate
2310Sstevel@tonic-gate if (lseek(fd, (off_t)(startblk * DEV_BSIZE), SEEK_SET) < 0) {
2320Sstevel@tonic-gate Free(buffer);
2330Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname));
2340Sstevel@tonic-gate }
2350Sstevel@tonic-gate
2360Sstevel@tonic-gate if (read(fd, buffer, iosize) != iosize) {
2370Sstevel@tonic-gate Free(buffer);
2380Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname));
2390Sstevel@tonic-gate }
2400Sstevel@tonic-gate
2410Sstevel@tonic-gate startblk += consecutive;
2420Sstevel@tonic-gate }
2430Sstevel@tonic-gate
2440Sstevel@tonic-gate Free(buffer);
2450Sstevel@tonic-gate if ((mb = Zalloc(DEV_BSIZE)) == NULL)
2460Sstevel@tonic-gate return (mdsyserror(ep, ENOMEM, np->rname));
2470Sstevel@tonic-gate
2480Sstevel@tonic-gate if (meta_gettimeofday(&tp) == -1) {
2490Sstevel@tonic-gate Free(mb);
2500Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname));
2510Sstevel@tonic-gate }
2520Sstevel@tonic-gate
2530Sstevel@tonic-gate mb->mb_magic = MDDB_MAGIC_MB;
2540Sstevel@tonic-gate /*
2550Sstevel@tonic-gate * If a MN diskset, set master block revision for a MN set.
2560Sstevel@tonic-gate * Even though the master block structure is no different
2570Sstevel@tonic-gate * for a MN set, setting the revision field to a different
2580Sstevel@tonic-gate * number keeps any pre-MN_diskset code from accessing
2590Sstevel@tonic-gate * this diskset. It also allows for an early determination
2600Sstevel@tonic-gate * of a MN diskset when reading in from disk so that the
2610Sstevel@tonic-gate * proper size locator block and locator names structure
2620Sstevel@tonic-gate * can be read in thus saving time on diskset startup.
2630Sstevel@tonic-gate */
2640Sstevel@tonic-gate if (mn_set)
2650Sstevel@tonic-gate mb->mb_revision = MDDB_REV_MNMB;
2660Sstevel@tonic-gate else
2670Sstevel@tonic-gate mb->mb_revision = MDDB_REV_MB;
2680Sstevel@tonic-gate mb->mb_timestamp = tp;
2690Sstevel@tonic-gate mb->mb_setno = sp->setno;
2700Sstevel@tonic-gate mb->mb_blkcnt = dbsize - 1;
2710Sstevel@tonic-gate mb->mb_blkno = firstblk;
2720Sstevel@tonic-gate mb->mb_nextblk = 0;
2730Sstevel@tonic-gate
2740Sstevel@tonic-gate mb->mb_blkmap.m_firstblk = firstblk + 1;
2750Sstevel@tonic-gate mb->mb_blkmap.m_consecutive = dbsize - 1;
2760Sstevel@tonic-gate if (! metaislocalset(sp)) {
2770Sstevel@tonic-gate mb->mb_setcreatetime = inittime;
2780Sstevel@tonic-gate }
2790Sstevel@tonic-gate
2800Sstevel@tonic-gate /*
2810Sstevel@tonic-gate * We try to save the disks device ID into the remaining bytes in
2820Sstevel@tonic-gate * the master block. The saved devid is used to provide a mapping
2830Sstevel@tonic-gate * between this disk's devid and the devid stored into the master
2840Sstevel@tonic-gate * block. This allows the disk image to be self-identifying
2850Sstevel@tonic-gate * if it gets copied (e.g. SNDR, True Copy, etc.). This is used
2860Sstevel@tonic-gate * when we try to import these disks on the remote copied image.
2870Sstevel@tonic-gate * If we cannot save the disks device ID onto the master block that is
2880Sstevel@tonic-gate * ok. The disk is just not self-identifying and won't be importable
2890Sstevel@tonic-gate * in the remote copy scenario.
2900Sstevel@tonic-gate */
2910Sstevel@tonic-gate if (devid_get(fd, &devid) == 0) {
2920Sstevel@tonic-gate size_t len;
2930Sstevel@tonic-gate
2940Sstevel@tonic-gate len = devid_sizeof(devid);
2950Sstevel@tonic-gate if (len <= DEV_BSIZE - sizeof (*mb)) {
2960Sstevel@tonic-gate /* there is enough space to store the devid */
2970Sstevel@tonic-gate mb->mb_devid_magic = MDDB_MAGIC_DE;
2980Sstevel@tonic-gate mb->mb_devid_len = len;
2990Sstevel@tonic-gate (void) memcpy(mb->mb_devid, devid, len);
3000Sstevel@tonic-gate }
3010Sstevel@tonic-gate devid_free(devid);
3020Sstevel@tonic-gate }
3030Sstevel@tonic-gate
3040Sstevel@tonic-gate crcgen((uchar_t *)mb, (uint_t *)&mb->mb_checksum, (uint_t)DEV_BSIZE,
3050Sstevel@tonic-gate (crc_skip_t *)NULL);
3060Sstevel@tonic-gate
3070Sstevel@tonic-gate if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0) {
3080Sstevel@tonic-gate Free(mb);
3090Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname));
3100Sstevel@tonic-gate }
3110Sstevel@tonic-gate
3120Sstevel@tonic-gate if (write(fd, mb, DEV_BSIZE) != DEV_BSIZE) {
3130Sstevel@tonic-gate Free(mb);
3140Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname));
3150Sstevel@tonic-gate }
3160Sstevel@tonic-gate
3170Sstevel@tonic-gate if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0) {
3180Sstevel@tonic-gate Free(mb);
3190Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname));
3200Sstevel@tonic-gate }
3210Sstevel@tonic-gate
3220Sstevel@tonic-gate if (read(fd, mb, DEV_BSIZE) != DEV_BSIZE) {
3230Sstevel@tonic-gate Free(mb);
3240Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname));
3250Sstevel@tonic-gate }
3260Sstevel@tonic-gate
3270Sstevel@tonic-gate if (crcchk((uchar_t *)mb, (uint_t *)&mb->mb_checksum,
3285109Spetede (uint_t)DEV_BSIZE, (crc_skip_t *)NULL)) {
3290Sstevel@tonic-gate Free(mb);
3300Sstevel@tonic-gate return (mdmddberror(ep, MDE_NOTVERIFIED,
3315109Spetede meta_getminor(np->dev), sp->setno, 0, np->rname));
3320Sstevel@tonic-gate }
3330Sstevel@tonic-gate
3340Sstevel@tonic-gate Free(mb);
3350Sstevel@tonic-gate return (0);
3360Sstevel@tonic-gate }
3370Sstevel@tonic-gate
3380Sstevel@tonic-gate void
meta_mkdummymaster(mdsetname_t * sp,int fd,daddr_t firstblk)3390Sstevel@tonic-gate meta_mkdummymaster(
3400Sstevel@tonic-gate mdsetname_t *sp,
3410Sstevel@tonic-gate int fd,
3420Sstevel@tonic-gate daddr_t firstblk
3430Sstevel@tonic-gate )
3440Sstevel@tonic-gate {
3450Sstevel@tonic-gate md_timeval32_t tp;
3460Sstevel@tonic-gate struct mddb_mb *mb;
3470Sstevel@tonic-gate ddi_devid_t devid;
3480Sstevel@tonic-gate md_set_desc *sd;
3490Sstevel@tonic-gate md_error_t ep = mdnullerror;
3500Sstevel@tonic-gate md_timeval32_t inittime;
3510Sstevel@tonic-gate
3520Sstevel@tonic-gate /*
3530Sstevel@tonic-gate * No dummy master blocks are written for a MN diskset since devids
3540Sstevel@tonic-gate * are not supported in MN disksets.
3550Sstevel@tonic-gate */
3560Sstevel@tonic-gate if (! metaislocalset(sp)) {
3570Sstevel@tonic-gate if ((sd = metaget_setdesc(sp, &ep)) == NULL)
3580Sstevel@tonic-gate return;
3590Sstevel@tonic-gate
3600Sstevel@tonic-gate if (MD_MNSET_DESC(sd))
3610Sstevel@tonic-gate return;
3620Sstevel@tonic-gate }
3630Sstevel@tonic-gate
3640Sstevel@tonic-gate if ((mb = Zalloc(DEV_BSIZE)) == NULL)
3650Sstevel@tonic-gate return;
3660Sstevel@tonic-gate
3670Sstevel@tonic-gate mb->mb_magic = MDDB_MAGIC_DU;
3680Sstevel@tonic-gate mb->mb_revision = MDDB_REV_MB;
3690Sstevel@tonic-gate mb->mb_setno = sp->setno;
3700Sstevel@tonic-gate inittime = meta_get_lb_inittime(sp, &ep);
3710Sstevel@tonic-gate mb->mb_setcreatetime = inittime;
3720Sstevel@tonic-gate
3730Sstevel@tonic-gate if (meta_gettimeofday(&tp) != -1)
3740Sstevel@tonic-gate mb->mb_timestamp = tp;
3750Sstevel@tonic-gate
3760Sstevel@tonic-gate /*
3770Sstevel@tonic-gate * We try to save the disks device ID into the remaining bytes in
3780Sstevel@tonic-gate * the master block. This allows the disk image to be self-identifying
3790Sstevel@tonic-gate * if it gets copied (e.g. SNDR, True Copy, etc.). This is used
3800Sstevel@tonic-gate * when we try to import these disks on the remote copied image.
3810Sstevel@tonic-gate * If we cannot save the disks device ID onto the master block that is
3820Sstevel@tonic-gate * ok. The disk is just not self-identifying and won't be importable
3830Sstevel@tonic-gate * in the remote copy scenario.
3840Sstevel@tonic-gate */
3850Sstevel@tonic-gate if (devid_get(fd, &devid) == 0) {
3860Sstevel@tonic-gate int len;
3870Sstevel@tonic-gate
3880Sstevel@tonic-gate len = devid_sizeof(devid);
3890Sstevel@tonic-gate if (len <= DEV_BSIZE - sizeof (*mb)) {
3900Sstevel@tonic-gate /* there is enough space to store the devid */
3910Sstevel@tonic-gate mb->mb_devid_magic = MDDB_MAGIC_DE;
3920Sstevel@tonic-gate mb->mb_devid_len = len;
3930Sstevel@tonic-gate (void) memcpy(mb->mb_devid, (char *)devid, len);
3940Sstevel@tonic-gate }
3950Sstevel@tonic-gate devid_free(devid);
3960Sstevel@tonic-gate }
3970Sstevel@tonic-gate
3980Sstevel@tonic-gate crcgen((uchar_t *)mb, (uint_t *)&mb->mb_checksum, (uint_t)DEV_BSIZE,
3990Sstevel@tonic-gate (crc_skip_t *)NULL);
4000Sstevel@tonic-gate
4010Sstevel@tonic-gate /*
4020Sstevel@tonic-gate * If any of these operations fail, we need to inform the
4030Sstevel@tonic-gate * user that the disk won't be self identifying. When support
4040Sstevel@tonic-gate * for importing remotely replicated disksets is added, we
4050Sstevel@tonic-gate * want to add the error messages here.
4060Sstevel@tonic-gate */
4070Sstevel@tonic-gate if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0)
4080Sstevel@tonic-gate goto out;
4090Sstevel@tonic-gate
4100Sstevel@tonic-gate if (write(fd, mb, DEV_BSIZE) != DEV_BSIZE)
4110Sstevel@tonic-gate goto out;
4120Sstevel@tonic-gate
4130Sstevel@tonic-gate if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0)
4140Sstevel@tonic-gate goto out;
4150Sstevel@tonic-gate
4160Sstevel@tonic-gate if (read(fd, mb, DEV_BSIZE) != DEV_BSIZE)
4170Sstevel@tonic-gate goto out;
4180Sstevel@tonic-gate
4190Sstevel@tonic-gate if (crcchk((uchar_t *)mb, (uint_t *)&mb->mb_checksum,
4200Sstevel@tonic-gate (uint_t)DEV_BSIZE, (crc_skip_t *)NULL))
4210Sstevel@tonic-gate goto out;
4220Sstevel@tonic-gate
4230Sstevel@tonic-gate out:
4240Sstevel@tonic-gate Free(mb);
4250Sstevel@tonic-gate }
4260Sstevel@tonic-gate
4270Sstevel@tonic-gate static int
buildconf(mdsetname_t * sp,md_error_t * ep)4280Sstevel@tonic-gate buildconf(mdsetname_t *sp, md_error_t *ep)
4290Sstevel@tonic-gate {
4300Sstevel@tonic-gate md_replicalist_t *rlp = NULL;
4310Sstevel@tonic-gate md_replicalist_t *rl;
4320Sstevel@tonic-gate FILE *cfp = NULL;
4330Sstevel@tonic-gate FILE *mfp = NULL;
4340Sstevel@tonic-gate struct stat sbuf;
4350Sstevel@tonic-gate int rval = 0;
4360Sstevel@tonic-gate int in_miniroot = 0;
4370Sstevel@tonic-gate char line[MDDB_BOOTLIST_MAX_LEN];
4380Sstevel@tonic-gate char *tname = NULL;
4390Sstevel@tonic-gate
4400Sstevel@tonic-gate /* get list of local replicas */
4410Sstevel@tonic-gate if (! metaislocalset(sp))
4420Sstevel@tonic-gate return (0);
4430Sstevel@tonic-gate
4440Sstevel@tonic-gate if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0)
4450Sstevel@tonic-gate return (-1);
4460Sstevel@tonic-gate
4470Sstevel@tonic-gate /* open tempfile, copy permissions of original file */
4480Sstevel@tonic-gate if ((cfp = fopen(META_DBCONFTMP, "w+")) == NULL) {
4490Sstevel@tonic-gate /*
4500Sstevel@tonic-gate * On the miniroot tmp files must be created in /var/tmp.
4510Sstevel@tonic-gate * If we get a EROFS error, we assume that we are in the
4520Sstevel@tonic-gate * miniroot.
4530Sstevel@tonic-gate */
4540Sstevel@tonic-gate if (errno != EROFS)
4550Sstevel@tonic-gate goto error;
4560Sstevel@tonic-gate in_miniroot = 1;
4570Sstevel@tonic-gate errno = 0;
4580Sstevel@tonic-gate tname = tempnam("/var/tmp", "slvm_");
4590Sstevel@tonic-gate if (tname == NULL && errno == EROFS) {
4600Sstevel@tonic-gate /*
4610Sstevel@tonic-gate * If we are booted on a read-only root because
4620Sstevel@tonic-gate * of mddb quorum problems we don't want to emit
4630Sstevel@tonic-gate * any scary error messages.
4640Sstevel@tonic-gate */
4650Sstevel@tonic-gate errno = 0;
4660Sstevel@tonic-gate goto out;
4670Sstevel@tonic-gate }
4680Sstevel@tonic-gate
4690Sstevel@tonic-gate /* open tempfile, copy permissions of original file */
4700Sstevel@tonic-gate if ((cfp = fopen(tname, "w+")) == NULL)
4710Sstevel@tonic-gate goto error;
4720Sstevel@tonic-gate }
4730Sstevel@tonic-gate if (stat(META_DBCONF, &sbuf) == 0) {
4740Sstevel@tonic-gate if (fchmod(fileno(cfp), (sbuf.st_mode & 0666)) != 0)
4750Sstevel@tonic-gate goto error;
4760Sstevel@tonic-gate if (fchown(fileno(cfp), sbuf.st_uid, sbuf.st_gid) != 0)
4770Sstevel@tonic-gate goto error;
4780Sstevel@tonic-gate }
4790Sstevel@tonic-gate
4800Sstevel@tonic-gate /* print header */
4810Sstevel@tonic-gate if (fprintf(cfp, "#metadevice database location file ") == EOF)
4820Sstevel@tonic-gate goto error;
4830Sstevel@tonic-gate if (fprintf(cfp, "do not hand edit\n") < 0)
4840Sstevel@tonic-gate goto error;
4850Sstevel@tonic-gate if (fprintf(cfp,
4865109Spetede "#driver\tminor_t\tdaddr_t\tdevice id\tchecksum\n") < 0)
4870Sstevel@tonic-gate goto error;
4880Sstevel@tonic-gate
4890Sstevel@tonic-gate /* dump replicas */
4900Sstevel@tonic-gate for (rl = rlp; (rl != NULL); rl = rl->rl_next) {
4910Sstevel@tonic-gate md_replica_t *r = rl->rl_repp;
4920Sstevel@tonic-gate int checksum = 42;
4930Sstevel@tonic-gate int i;
4940Sstevel@tonic-gate char *devidp;
4950Sstevel@tonic-gate minor_t min;
4960Sstevel@tonic-gate
4970Sstevel@tonic-gate devidp = devid_str_encode(r->r_devid, r->r_minor_name);
4980Sstevel@tonic-gate /* If devid code can't encode devidp - skip entry */
4990Sstevel@tonic-gate if (devidp == NULL) {
5000Sstevel@tonic-gate continue;
5010Sstevel@tonic-gate }
5020Sstevel@tonic-gate
5030Sstevel@tonic-gate /* compute checksum */
5040Sstevel@tonic-gate for (i = 0; ((r->r_driver_name[i] != '\0') &&
5050Sstevel@tonic-gate (i < sizeof (r->r_driver_name))); i++) {
5060Sstevel@tonic-gate checksum -= r->r_driver_name[i];
5070Sstevel@tonic-gate }
5080Sstevel@tonic-gate min = meta_getminor(r->r_namep->dev);
5090Sstevel@tonic-gate checksum -= min;
5100Sstevel@tonic-gate checksum -= r->r_blkno;
5110Sstevel@tonic-gate
5120Sstevel@tonic-gate for (i = 0; i < strlen(devidp); i++) {
5130Sstevel@tonic-gate checksum -= devidp[i];
5140Sstevel@tonic-gate }
5150Sstevel@tonic-gate /* print info */
5160Sstevel@tonic-gate if (fprintf(cfp, "%s\t%lu\t%ld\t%s\t%d\n",
5170Sstevel@tonic-gate r->r_driver_name, min, r->r_blkno, devidp, checksum) < 0) {
5180Sstevel@tonic-gate goto error;
5190Sstevel@tonic-gate }
5200Sstevel@tonic-gate
5210Sstevel@tonic-gate devid_str_free(devidp);
5220Sstevel@tonic-gate }
5230Sstevel@tonic-gate
5240Sstevel@tonic-gate /* close and rename to real file */
5250Sstevel@tonic-gate if (fflush(cfp) != 0)
5260Sstevel@tonic-gate goto error;
5270Sstevel@tonic-gate if (fsync(fileno(cfp)) != 0)
5280Sstevel@tonic-gate goto error;
5290Sstevel@tonic-gate if (fclose(cfp) != 0) {
5300Sstevel@tonic-gate cfp = NULL;
5310Sstevel@tonic-gate goto error;
5320Sstevel@tonic-gate }
5330Sstevel@tonic-gate cfp = NULL;
5340Sstevel@tonic-gate
5350Sstevel@tonic-gate /*
5360Sstevel@tonic-gate * Renames don't work in the miniroot since tmpfiles are
5370Sstevel@tonic-gate * created in /var/tmp. Hence we copy the data out.
5380Sstevel@tonic-gate */
5390Sstevel@tonic-gate
5400Sstevel@tonic-gate if (! in_miniroot) {
5410Sstevel@tonic-gate if (rename(META_DBCONFTMP, META_DBCONF) != 0)
5420Sstevel@tonic-gate goto error;
5430Sstevel@tonic-gate } else {
5440Sstevel@tonic-gate if ((cfp = fopen(tname, "r")) == NULL)
5450Sstevel@tonic-gate goto error;
5460Sstevel@tonic-gate if ((mfp = fopen(META_DBCONF, "w+")) == NULL)
5470Sstevel@tonic-gate goto error;
5480Sstevel@tonic-gate while (fgets(line, MDDB_BOOTLIST_MAX_LEN, cfp) != NULL) {
5490Sstevel@tonic-gate if (fputs(line, mfp) == NULL)
5500Sstevel@tonic-gate goto error;
5510Sstevel@tonic-gate }
5520Sstevel@tonic-gate (void) fclose(cfp);
5530Sstevel@tonic-gate cfp = NULL;
5540Sstevel@tonic-gate if (fflush(mfp) != 0)
5550Sstevel@tonic-gate goto error;
5560Sstevel@tonic-gate if (fsync(fileno(mfp)) != 0)
5570Sstevel@tonic-gate goto error;
5580Sstevel@tonic-gate if (fclose(mfp) != 0) {
5590Sstevel@tonic-gate mfp = NULL;
5600Sstevel@tonic-gate goto error;
5610Sstevel@tonic-gate }
5620Sstevel@tonic-gate /* delete the tempfile */
5630Sstevel@tonic-gate (void) unlink(tname);
5640Sstevel@tonic-gate }
5650Sstevel@tonic-gate /* success */
5660Sstevel@tonic-gate rval = 0;
5670Sstevel@tonic-gate goto out;
5680Sstevel@tonic-gate
5690Sstevel@tonic-gate /* tempfile error */
5700Sstevel@tonic-gate error:
5710Sstevel@tonic-gate rval = (in_miniroot) ? mdsyserror(ep, errno, tname):
5725109Spetede mdsyserror(ep, errno, META_DBCONFTMP);
5730Sstevel@tonic-gate
5740Sstevel@tonic-gate
5750Sstevel@tonic-gate /* cleanup, return success */
5760Sstevel@tonic-gate out:
5770Sstevel@tonic-gate if (rlp != NULL)
5780Sstevel@tonic-gate metafreereplicalist(rlp);
5790Sstevel@tonic-gate if ((cfp != NULL) && (fclose(cfp) != 0) && (rval == 0)) {
5800Sstevel@tonic-gate rval = (in_miniroot) ? mdsyserror(ep, errno, tname):
5815109Spetede mdsyserror(ep, errno, META_DBCONFTMP);
5820Sstevel@tonic-gate }
5830Sstevel@tonic-gate free(tname);
5840Sstevel@tonic-gate return (rval);
5850Sstevel@tonic-gate }
5860Sstevel@tonic-gate
5870Sstevel@tonic-gate /*
5880Sstevel@tonic-gate * check replica for dev
5890Sstevel@tonic-gate */
5900Sstevel@tonic-gate static int
in_replica(mdsetname_t * sp,md_replica_t * rp,mdname_t * np,diskaddr_t slblk,diskaddr_t nblks,md_error_t * ep)5910Sstevel@tonic-gate in_replica(
5920Sstevel@tonic-gate mdsetname_t *sp,
5930Sstevel@tonic-gate md_replica_t *rp,
5940Sstevel@tonic-gate mdname_t *np,
5950Sstevel@tonic-gate diskaddr_t slblk,
5960Sstevel@tonic-gate diskaddr_t nblks,
5970Sstevel@tonic-gate md_error_t *ep
5980Sstevel@tonic-gate )
5990Sstevel@tonic-gate {
6000Sstevel@tonic-gate mdname_t *repnp = rp->r_namep;
6010Sstevel@tonic-gate diskaddr_t rep_sblk = rp->r_blkno;
6020Sstevel@tonic-gate diskaddr_t rep_nblks = rp->r_nblk;
6030Sstevel@tonic-gate
6040Sstevel@tonic-gate /* should be in the same set */
6050Sstevel@tonic-gate assert(sp != NULL);
6060Sstevel@tonic-gate
6070Sstevel@tonic-gate /* if error in master block, assume whole partition */
6080Sstevel@tonic-gate if ((rep_sblk == MD_DISKADDR_ERROR) ||
6090Sstevel@tonic-gate (rep_nblks == MD_DISKADDR_ERROR)) {
6100Sstevel@tonic-gate rep_sblk = 0;
6110Sstevel@tonic-gate rep_nblks = MD_DISKADDR_ERROR;
6120Sstevel@tonic-gate }
6130Sstevel@tonic-gate
6140Sstevel@tonic-gate /* check overlap */
6150Sstevel@tonic-gate if (meta_check_overlap(
6160Sstevel@tonic-gate MDB_STR, np, slblk, nblks, repnp, rep_sblk, rep_nblks, ep) != 0) {
6170Sstevel@tonic-gate return (-1);
6180Sstevel@tonic-gate }
6190Sstevel@tonic-gate
6200Sstevel@tonic-gate /* return success */
6210Sstevel@tonic-gate return (0);
6220Sstevel@tonic-gate }
6230Sstevel@tonic-gate
6240Sstevel@tonic-gate /*
6250Sstevel@tonic-gate * check to see if we're in a replica
6260Sstevel@tonic-gate */
6270Sstevel@tonic-gate int
meta_check_inreplica(mdsetname_t * sp,mdname_t * np,diskaddr_t slblk,diskaddr_t nblks,md_error_t * ep)6280Sstevel@tonic-gate meta_check_inreplica(
6290Sstevel@tonic-gate mdsetname_t *sp,
6300Sstevel@tonic-gate mdname_t *np,
6310Sstevel@tonic-gate diskaddr_t slblk,
6320Sstevel@tonic-gate diskaddr_t nblks,
6330Sstevel@tonic-gate md_error_t *ep
6340Sstevel@tonic-gate )
6350Sstevel@tonic-gate {
6360Sstevel@tonic-gate md_replicalist_t *rlp = NULL;
6370Sstevel@tonic-gate md_replicalist_t *rl;
6380Sstevel@tonic-gate int rval = 0;
6390Sstevel@tonic-gate
6400Sstevel@tonic-gate /* should have a set */
6410Sstevel@tonic-gate assert(sp != NULL);
6420Sstevel@tonic-gate
6430Sstevel@tonic-gate /* for each replica */
6440Sstevel@tonic-gate if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0)
6450Sstevel@tonic-gate return (-1);
6460Sstevel@tonic-gate for (rl = rlp; (rl != NULL); rl = rl->rl_next) {
6470Sstevel@tonic-gate md_replica_t *rp = rl->rl_repp;
6480Sstevel@tonic-gate
6490Sstevel@tonic-gate /* check replica */
6500Sstevel@tonic-gate if (in_replica(sp, rp, np, slblk, nblks, ep) != 0) {
6510Sstevel@tonic-gate rval = -1;
6520Sstevel@tonic-gate break;
6530Sstevel@tonic-gate }
6540Sstevel@tonic-gate }
6550Sstevel@tonic-gate
6560Sstevel@tonic-gate /* cleanup, return success */
6570Sstevel@tonic-gate metafreereplicalist(rlp);
6580Sstevel@tonic-gate return (rval);
6590Sstevel@tonic-gate }
6600Sstevel@tonic-gate
6610Sstevel@tonic-gate /*
6620Sstevel@tonic-gate * check replica
6630Sstevel@tonic-gate */
6640Sstevel@tonic-gate int
meta_check_replica(mdsetname_t * sp,mdname_t * np,mdchkopts_t options,diskaddr_t slblk,diskaddr_t nblks,md_error_t * ep)6650Sstevel@tonic-gate meta_check_replica(
6660Sstevel@tonic-gate mdsetname_t *sp, /* set to check against */
6670Sstevel@tonic-gate mdname_t *np, /* component to check against */
6680Sstevel@tonic-gate mdchkopts_t options, /* option flags */
6690Sstevel@tonic-gate diskaddr_t slblk, /* start logical block */
6700Sstevel@tonic-gate diskaddr_t nblks, /* number of blocks (-1,rest of them) */
6710Sstevel@tonic-gate md_error_t *ep /* error packet */
6720Sstevel@tonic-gate )
6730Sstevel@tonic-gate {
6740Sstevel@tonic-gate mdchkopts_t chkoptions = MDCHK_ALLOW_REPSLICE;
6750Sstevel@tonic-gate
6760Sstevel@tonic-gate /* make sure we have a disk */
6770Sstevel@tonic-gate if (metachkcomp(np, ep) != 0)
6780Sstevel@tonic-gate return (-1);
6790Sstevel@tonic-gate
6800Sstevel@tonic-gate /* check to ensure that it is not already in use */
6810Sstevel@tonic-gate if (meta_check_inuse(sp, np, MDCHK_INUSE, ep) != 0) {
6820Sstevel@tonic-gate return (-1);
6830Sstevel@tonic-gate }
6840Sstevel@tonic-gate
6850Sstevel@tonic-gate if (options & MDCHK_ALLOW_NODBS)
6860Sstevel@tonic-gate return (0);
6870Sstevel@tonic-gate
6880Sstevel@tonic-gate if (options & MDCHK_DRVINSET)
6890Sstevel@tonic-gate return (0);
6900Sstevel@tonic-gate
6910Sstevel@tonic-gate /* make sure it is in the set */
6920Sstevel@tonic-gate if (meta_check_inset(sp, np, ep) != 0)
6930Sstevel@tonic-gate return (-1);
6940Sstevel@tonic-gate
6950Sstevel@tonic-gate /* make sure its not in a metadevice */
6960Sstevel@tonic-gate if (meta_check_inmeta(sp, np, chkoptions, slblk, nblks, ep) != 0)
6970Sstevel@tonic-gate return (-1);
6980Sstevel@tonic-gate
6990Sstevel@tonic-gate /* return success */
7000Sstevel@tonic-gate return (0);
7010Sstevel@tonic-gate }
7020Sstevel@tonic-gate
7030Sstevel@tonic-gate static int
update_dbinfo_on_drives(mdsetname_t * sp,md_drive_desc * dd,int set_locked,int force,md_error_t * ep)7040Sstevel@tonic-gate update_dbinfo_on_drives(
7050Sstevel@tonic-gate mdsetname_t *sp,
7060Sstevel@tonic-gate md_drive_desc *dd,
7070Sstevel@tonic-gate int set_locked,
7080Sstevel@tonic-gate int force,
7090Sstevel@tonic-gate md_error_t *ep
7100Sstevel@tonic-gate )
7110Sstevel@tonic-gate {
7120Sstevel@tonic-gate md_set_desc *sd;
7130Sstevel@tonic-gate int i;
7140Sstevel@tonic-gate md_setkey_t *cl_sk;
7150Sstevel@tonic-gate int rval = 0;
7160Sstevel@tonic-gate md_mnnode_desc *nd;
7170Sstevel@tonic-gate
7180Sstevel@tonic-gate if ((sd = metaget_setdesc(sp, ep)) == NULL)
7190Sstevel@tonic-gate return (-1);
7200Sstevel@tonic-gate
7210Sstevel@tonic-gate if (! set_locked) {
7220Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) {
7230Sstevel@tonic-gate md_error_t xep = mdnullerror;
7240Sstevel@tonic-gate sigset_t sigs;
7250Sstevel@tonic-gate /* Make sure we are blocking all signals */
7260Sstevel@tonic-gate if (procsigs(TRUE, &sigs, &xep) < 0)
7270Sstevel@tonic-gate mdclrerror(&xep);
7280Sstevel@tonic-gate
7290Sstevel@tonic-gate nd = sd->sd_nodelist;
7300Sstevel@tonic-gate while (nd) {
7310Sstevel@tonic-gate if (force && strcmp(nd->nd_nodename,
7320Sstevel@tonic-gate mynode()) != 0) {
7330Sstevel@tonic-gate nd = nd->nd_next;
7340Sstevel@tonic-gate continue;
7350Sstevel@tonic-gate }
7360Sstevel@tonic-gate
7370Sstevel@tonic-gate if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
7380Sstevel@tonic-gate nd = nd->nd_next;
7390Sstevel@tonic-gate continue;
7400Sstevel@tonic-gate }
7410Sstevel@tonic-gate
7420Sstevel@tonic-gate if (clnt_lock_set(nd->nd_nodename, sp, ep))
7430Sstevel@tonic-gate return (-1);
7440Sstevel@tonic-gate nd = nd->nd_next;
7450Sstevel@tonic-gate }
7460Sstevel@tonic-gate } else {
7470Sstevel@tonic-gate for (i = 0; i < MD_MAXSIDES; i++) {
7480Sstevel@tonic-gate /* Skip empty slots */
7490Sstevel@tonic-gate if (sd->sd_nodes[i][0] == '\0')
7500Sstevel@tonic-gate continue;
7510Sstevel@tonic-gate
7520Sstevel@tonic-gate if (force && strcmp(sd->sd_nodes[i],
7530Sstevel@tonic-gate mynode()) != 0)
7540Sstevel@tonic-gate continue;
7550Sstevel@tonic-gate
7560Sstevel@tonic-gate if (clnt_lock_set(sd->sd_nodes[i], sp, ep))
7570Sstevel@tonic-gate return (-1);
7580Sstevel@tonic-gate }
7590Sstevel@tonic-gate }
7600Sstevel@tonic-gate }
7610Sstevel@tonic-gate
7620Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) {
7630Sstevel@tonic-gate nd = sd->sd_nodelist;
7640Sstevel@tonic-gate while (nd) {
7650Sstevel@tonic-gate if (force && strcmp(nd->nd_nodename, mynode()) != 0) {
7660Sstevel@tonic-gate nd = nd->nd_next;
7670Sstevel@tonic-gate continue;
7680Sstevel@tonic-gate }
7690Sstevel@tonic-gate
7700Sstevel@tonic-gate if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
7710Sstevel@tonic-gate nd = nd->nd_next;
7720Sstevel@tonic-gate continue;
7730Sstevel@tonic-gate }
7740Sstevel@tonic-gate
7750Sstevel@tonic-gate if (clnt_upd_dr_dbinfo(nd->nd_nodename, sp, dd, ep)
7760Sstevel@tonic-gate == -1) {
7770Sstevel@tonic-gate rval = -1;
7780Sstevel@tonic-gate break;
7790Sstevel@tonic-gate }
7800Sstevel@tonic-gate nd = nd->nd_next;
7810Sstevel@tonic-gate }
7820Sstevel@tonic-gate } else {
7830Sstevel@tonic-gate for (i = 0; i < MD_MAXSIDES; i++) {
7840Sstevel@tonic-gate /* Skip empty slots */
7850Sstevel@tonic-gate if (sd->sd_nodes[i][0] == '\0')
7860Sstevel@tonic-gate continue;
7870Sstevel@tonic-gate
7880Sstevel@tonic-gate if (force && strcmp(sd->sd_nodes[i], mynode()) != 0)
7890Sstevel@tonic-gate continue;
7900Sstevel@tonic-gate
7910Sstevel@tonic-gate if (clnt_upd_dr_dbinfo(sd->sd_nodes[i], sp, dd, ep)
7920Sstevel@tonic-gate == -1) {
7930Sstevel@tonic-gate rval = -1;
7940Sstevel@tonic-gate break;
7950Sstevel@tonic-gate }
7960Sstevel@tonic-gate }
7970Sstevel@tonic-gate }
7980Sstevel@tonic-gate
7990Sstevel@tonic-gate if (! set_locked) {
8000Sstevel@tonic-gate cl_sk = cl_get_setkey(sp->setno, sp->setname);
8010Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) {
8020Sstevel@tonic-gate nd = sd->sd_nodelist;
8030Sstevel@tonic-gate while (nd) {
8040Sstevel@tonic-gate if (force &&
8050Sstevel@tonic-gate strcmp(nd->nd_nodename, mynode()) != 0) {
8060Sstevel@tonic-gate nd = nd->nd_next;
8070Sstevel@tonic-gate continue;
8080Sstevel@tonic-gate }
8090Sstevel@tonic-gate
8100Sstevel@tonic-gate if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
8110Sstevel@tonic-gate nd = nd->nd_next;
8120Sstevel@tonic-gate continue;
8130Sstevel@tonic-gate }
8140Sstevel@tonic-gate
8150Sstevel@tonic-gate if (clnt_unlock_set(nd->nd_nodename, cl_sk,
8160Sstevel@tonic-gate ep)) {
8170Sstevel@tonic-gate rval = -1;
8180Sstevel@tonic-gate break;
8190Sstevel@tonic-gate }
8200Sstevel@tonic-gate nd = nd->nd_next;
8210Sstevel@tonic-gate }
8220Sstevel@tonic-gate } else {
8230Sstevel@tonic-gate for (i = 0; i < MD_MAXSIDES; i++) {
8240Sstevel@tonic-gate /* Skip empty slots */
8250Sstevel@tonic-gate if (sd->sd_nodes[i][0] == '\0')
8260Sstevel@tonic-gate continue;
8270Sstevel@tonic-gate
8280Sstevel@tonic-gate if (force &&
8290Sstevel@tonic-gate strcmp(sd->sd_nodes[i], mynode()) != 0)
8300Sstevel@tonic-gate continue;
8310Sstevel@tonic-gate
8320Sstevel@tonic-gate if (clnt_unlock_set(sd->sd_nodes[i], cl_sk,
8330Sstevel@tonic-gate ep)) {
8340Sstevel@tonic-gate rval = -1;
8350Sstevel@tonic-gate break;
8360Sstevel@tonic-gate }
8370Sstevel@tonic-gate }
8380Sstevel@tonic-gate
8390Sstevel@tonic-gate }
8400Sstevel@tonic-gate cl_set_setkey(NULL);
8410Sstevel@tonic-gate }
8420Sstevel@tonic-gate
8430Sstevel@tonic-gate return (rval);
8440Sstevel@tonic-gate }
8450Sstevel@tonic-gate
8460Sstevel@tonic-gate int
meta_db_addsidenms(mdsetname_t * sp,mdname_t * np,daddr_t blkno,int bcast,md_error_t * ep)8470Sstevel@tonic-gate meta_db_addsidenms(
8480Sstevel@tonic-gate mdsetname_t *sp,
8490Sstevel@tonic-gate mdname_t *np,
8500Sstevel@tonic-gate daddr_t blkno,
8510Sstevel@tonic-gate int bcast,
8520Sstevel@tonic-gate md_error_t *ep
8530Sstevel@tonic-gate )
8540Sstevel@tonic-gate {
8550Sstevel@tonic-gate side_t sideno;
8560Sstevel@tonic-gate char *bname = NULL;
8570Sstevel@tonic-gate char *dname = NULL;
8580Sstevel@tonic-gate minor_t mnum;
8590Sstevel@tonic-gate mddb_config_t c;
8600Sstevel@tonic-gate int done;
8610Sstevel@tonic-gate int rval = 0;
8620Sstevel@tonic-gate md_set_desc *sd;
8630Sstevel@tonic-gate
8640Sstevel@tonic-gate sideno = MD_SIDEWILD;
8650Sstevel@tonic-gate /*CONSTCOND*/
8660Sstevel@tonic-gate while (1) {
8670Sstevel@tonic-gate if (bname != NULL) {
8680Sstevel@tonic-gate Free(bname);
8690Sstevel@tonic-gate bname = NULL;
8700Sstevel@tonic-gate }
8710Sstevel@tonic-gate if (dname != NULL) {
8720Sstevel@tonic-gate Free(dname);
8730Sstevel@tonic-gate dname = NULL;
8740Sstevel@tonic-gate }
8750Sstevel@tonic-gate if ((done = meta_getnextside_devinfo(sp, np->bname,
8760Sstevel@tonic-gate &sideno, &bname, &dname, &mnum, ep)) == -1) {
8770Sstevel@tonic-gate rval = -1;
8780Sstevel@tonic-gate break;
8790Sstevel@tonic-gate }
8800Sstevel@tonic-gate
8810Sstevel@tonic-gate if (done == 0)
8820Sstevel@tonic-gate break;
8830Sstevel@tonic-gate
8840Sstevel@tonic-gate if (! metaislocalset(sp)) {
8850Sstevel@tonic-gate if ((sd = metaget_setdesc(sp, ep)) == NULL) {
8860Sstevel@tonic-gate rval = -1;
8870Sstevel@tonic-gate break;
8880Sstevel@tonic-gate }
8890Sstevel@tonic-gate }
8900Sstevel@tonic-gate
8910Sstevel@tonic-gate /*
8920Sstevel@tonic-gate * Send addsidenms to all nodes using rpc.mdcommd if
8930Sstevel@tonic-gate * sidename is being added to MN diskset.
8940Sstevel@tonic-gate *
8950Sstevel@tonic-gate * It's ok to broadcast this call to other nodes.
8960Sstevel@tonic-gate *
8970Sstevel@tonic-gate * Note: The broadcast to other nodes isn't needed during
8980Sstevel@tonic-gate * the addition of the first mddbs to the set since the
8990Sstevel@tonic-gate * other nodes haven't been joined to the set yet. All
9000Sstevel@tonic-gate * nodes in a MN diskset are (implicitly) joined to the set
9010Sstevel@tonic-gate * on the addition of the first mddb.
9020Sstevel@tonic-gate */
9030Sstevel@tonic-gate if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
9040Sstevel@tonic-gate (bcast == DB_ADDSIDENMS_BCAST)) {
9050Sstevel@tonic-gate md_mn_result_t *resultp = NULL;
9060Sstevel@tonic-gate md_mn_msg_meta_db_newside_t db_ns;
9070Sstevel@tonic-gate int send_rval;
9080Sstevel@tonic-gate
9090Sstevel@tonic-gate db_ns.msg_l_dev = np->dev;
9100Sstevel@tonic-gate db_ns.msg_sideno = sideno;
9110Sstevel@tonic-gate db_ns.msg_blkno = blkno;
9120Sstevel@tonic-gate (void) strncpy(db_ns.msg_dname, dname,
9130Sstevel@tonic-gate sizeof (db_ns.msg_dname));
9140Sstevel@tonic-gate (void) splitname(np->bname, &db_ns.msg_splitname);
9150Sstevel@tonic-gate db_ns.msg_mnum = mnum;
9160Sstevel@tonic-gate
9170Sstevel@tonic-gate /* Set devid to NULL until devids are supported */
9180Sstevel@tonic-gate db_ns.msg_devid[0] = NULL;
9190Sstevel@tonic-gate
9200Sstevel@tonic-gate /*
9210Sstevel@tonic-gate * If reconfig cycle has been started, this node is
9220Sstevel@tonic-gate * stuck in in the return step until this command has
9230Sstevel@tonic-gate * completed. If mdcommd is suspended, ask
9240Sstevel@tonic-gate * send_message to fail (instead of retrying)
9250Sstevel@tonic-gate * so that metaset can finish allowing the reconfig
9260Sstevel@tonic-gate * cycle to proceed.
9270Sstevel@tonic-gate */
9280Sstevel@tonic-gate send_rval = mdmn_send_message(sp->setno,
9290Sstevel@tonic-gate MD_MN_MSG_META_DB_NEWSIDE, MD_MSGF_FAIL_ON_SUSPEND |
930*8452SJohn.Wren.Kennedy@Sun.COM MD_MSGF_PANIC_WHEN_INCONSISTENT, 0, (char *)&db_ns,
9310Sstevel@tonic-gate sizeof (md_mn_msg_meta_db_newside_t),
9320Sstevel@tonic-gate &resultp, ep);
9330Sstevel@tonic-gate if (send_rval != 0) {
9340Sstevel@tonic-gate rval = -1;
9350Sstevel@tonic-gate if (resultp == NULL)
9360Sstevel@tonic-gate (void) mddserror(ep,
9370Sstevel@tonic-gate MDE_DS_COMMD_SEND_FAIL,
9380Sstevel@tonic-gate sp->setno, NULL, NULL,
9390Sstevel@tonic-gate sp->setname);
9400Sstevel@tonic-gate else {
9410Sstevel@tonic-gate (void) mdstealerror(ep,
9420Sstevel@tonic-gate &(resultp->mmr_ep));
9430Sstevel@tonic-gate if (mdisok(ep)) {
9440Sstevel@tonic-gate (void) mddserror(ep,
9450Sstevel@tonic-gate MDE_DS_COMMD_SEND_FAIL,
9460Sstevel@tonic-gate sp->setno, NULL, NULL,
9470Sstevel@tonic-gate sp->setname);
9480Sstevel@tonic-gate }
9490Sstevel@tonic-gate free_result(resultp);
9500Sstevel@tonic-gate }
9510Sstevel@tonic-gate break;
9520Sstevel@tonic-gate }
9530Sstevel@tonic-gate if (resultp)
9540Sstevel@tonic-gate free_result(resultp);
9550Sstevel@tonic-gate } else {
9560Sstevel@tonic-gate /*
9570Sstevel@tonic-gate * Let this side's device name, minor # and driver name
9580Sstevel@tonic-gate * be known to the database replica.
9590Sstevel@tonic-gate */
9600Sstevel@tonic-gate (void) memset(&c, 0, sizeof (c));
9610Sstevel@tonic-gate
9620Sstevel@tonic-gate /* Fill in device/replica info */
9630Sstevel@tonic-gate c.c_locator.l_dev = meta_cmpldev(np->dev);
9640Sstevel@tonic-gate c.c_locator.l_blkno = blkno;
9650Sstevel@tonic-gate (void) strncpy(c.c_locator.l_driver, dname,
9660Sstevel@tonic-gate sizeof (c.c_locator.l_driver));
9676195Sachimm if (splitname(np->bname, &c.c_devname) ==
9685109Spetede METASPLIT_LONGDISKNAME && devid_in_use == FALSE) {
9695109Spetede rval = mddeverror(ep, MDE_DISKNAMETOOLONG,
9705109Spetede NODEV64, np->rname);
9715109Spetede break;
9725109Spetede }
9735109Spetede
9740Sstevel@tonic-gate c.c_locator.l_mnum = mnum;
9750Sstevel@tonic-gate
9760Sstevel@tonic-gate /* Fill in setno, setname, and sideno */
9770Sstevel@tonic-gate c.c_setno = sp->setno;
9780Sstevel@tonic-gate (void) strncpy(c.c_setname, sp->setname,
9795109Spetede sizeof (c.c_setname));
9800Sstevel@tonic-gate c.c_sideno = sideno;
9810Sstevel@tonic-gate
9820Sstevel@tonic-gate /*
9830Sstevel@tonic-gate * Don't need device id information from this ioctl
9840Sstevel@tonic-gate * Kernel determines device id from dev_t, which
9850Sstevel@tonic-gate * is just what this code would do.
9860Sstevel@tonic-gate */
9870Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0;
9880Sstevel@tonic-gate c.c_locator.l_devid_flags = 0;
9890Sstevel@tonic-gate
9900Sstevel@tonic-gate if (metaioctl(MD_DB_NEWSIDE, &c, &c.c_mde, NULL) != 0) {
9910Sstevel@tonic-gate rval = mdstealerror(ep, &c.c_mde);
9920Sstevel@tonic-gate break;
9930Sstevel@tonic-gate }
9940Sstevel@tonic-gate }
9950Sstevel@tonic-gate }
9960Sstevel@tonic-gate
9970Sstevel@tonic-gate /* cleanup, return success */
9980Sstevel@tonic-gate if (bname != NULL) {
9990Sstevel@tonic-gate Free(bname);
10000Sstevel@tonic-gate bname = NULL;
10010Sstevel@tonic-gate }
10020Sstevel@tonic-gate if (dname != NULL) {
10030Sstevel@tonic-gate Free(dname);
10040Sstevel@tonic-gate dname = NULL;
10050Sstevel@tonic-gate }
10060Sstevel@tonic-gate return (rval);
10070Sstevel@tonic-gate }
10080Sstevel@tonic-gate
10090Sstevel@tonic-gate
10100Sstevel@tonic-gate int
meta_db_delsidenm(mdsetname_t * sp,side_t sideno,mdname_t * np,daddr_t blkno,md_error_t * ep)10110Sstevel@tonic-gate meta_db_delsidenm(
10120Sstevel@tonic-gate mdsetname_t *sp,
10130Sstevel@tonic-gate side_t sideno,
10140Sstevel@tonic-gate mdname_t *np,
10150Sstevel@tonic-gate daddr_t blkno,
10160Sstevel@tonic-gate md_error_t *ep
10170Sstevel@tonic-gate )
10180Sstevel@tonic-gate {
10190Sstevel@tonic-gate mddb_config_t c;
10200Sstevel@tonic-gate md_set_desc *sd;
10210Sstevel@tonic-gate
10220Sstevel@tonic-gate if (! metaislocalset(sp)) {
10230Sstevel@tonic-gate if ((sd = metaget_setdesc(sp, ep)) == NULL)
10240Sstevel@tonic-gate return (-1);
10250Sstevel@tonic-gate }
10260Sstevel@tonic-gate /* Use rpc.mdcommd to delete mddb side from all nodes */
10270Sstevel@tonic-gate if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
10280Sstevel@tonic-gate (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) {
10290Sstevel@tonic-gate md_mn_result_t *resultp = NULL;
10300Sstevel@tonic-gate md_mn_msg_meta_db_delside_t db_ds;
10310Sstevel@tonic-gate int send_rval;
10320Sstevel@tonic-gate
10330Sstevel@tonic-gate db_ds.msg_l_dev = np->dev;
10340Sstevel@tonic-gate db_ds.msg_blkno = blkno;
10350Sstevel@tonic-gate db_ds.msg_sideno = sideno;
10360Sstevel@tonic-gate
10370Sstevel@tonic-gate /* Set devid to NULL until devids are supported */
10380Sstevel@tonic-gate db_ds.msg_devid[0] = NULL;
10390Sstevel@tonic-gate
10400Sstevel@tonic-gate /*
10410Sstevel@tonic-gate * If reconfig cycle has been started, this node is
10420Sstevel@tonic-gate * stuck in in the return step until this command has
10430Sstevel@tonic-gate * completed. If mdcommd is suspended, ask
10440Sstevel@tonic-gate * send_message to fail (instead of retrying)
10450Sstevel@tonic-gate * so that metaset can finish allowing the reconfig
10460Sstevel@tonic-gate * cycle to proceed.
10470Sstevel@tonic-gate */
10480Sstevel@tonic-gate send_rval = mdmn_send_message(sp->setno,
10490Sstevel@tonic-gate MD_MN_MSG_META_DB_DELSIDE, MD_MSGF_FAIL_ON_SUSPEND |
1050*8452SJohn.Wren.Kennedy@Sun.COM MD_MSGF_PANIC_WHEN_INCONSISTENT, 0, (char *)&db_ds,
10510Sstevel@tonic-gate sizeof (md_mn_msg_meta_db_delside_t), &resultp, ep);
10520Sstevel@tonic-gate if (send_rval != 0) {
10530Sstevel@tonic-gate if (resultp == NULL)
10540Sstevel@tonic-gate (void) mddserror(ep,
10550Sstevel@tonic-gate MDE_DS_COMMD_SEND_FAIL,
10560Sstevel@tonic-gate sp->setno, NULL, NULL,
10570Sstevel@tonic-gate sp->setname);
10580Sstevel@tonic-gate else {
10590Sstevel@tonic-gate (void) mdstealerror(ep, &(resultp->mmr_ep));
10600Sstevel@tonic-gate if (mdisok(ep)) {
10610Sstevel@tonic-gate (void) mddserror(ep,
10620Sstevel@tonic-gate MDE_DS_COMMD_SEND_FAIL,
10630Sstevel@tonic-gate sp->setno, NULL, NULL,
10640Sstevel@tonic-gate sp->setname);
10650Sstevel@tonic-gate }
10660Sstevel@tonic-gate free_result(resultp);
10670Sstevel@tonic-gate }
10680Sstevel@tonic-gate return (-1);
10690Sstevel@tonic-gate }
10700Sstevel@tonic-gate if (resultp)
10710Sstevel@tonic-gate free_result(resultp);
10720Sstevel@tonic-gate
10730Sstevel@tonic-gate } else {
10740Sstevel@tonic-gate /*
10750Sstevel@tonic-gate * Let this side's device name, minor # and driver name
10760Sstevel@tonic-gate * be known to the database replica.
10770Sstevel@tonic-gate */
10780Sstevel@tonic-gate (void) memset(&c, 0, sizeof (c));
10790Sstevel@tonic-gate
10800Sstevel@tonic-gate /* Fill in device/replica info */
10810Sstevel@tonic-gate c.c_locator.l_dev = meta_cmpldev(np->dev);
10820Sstevel@tonic-gate c.c_locator.l_blkno = blkno;
10830Sstevel@tonic-gate
10840Sstevel@tonic-gate /* Fill in setno, setname, and sideno */
10850Sstevel@tonic-gate c.c_setno = sp->setno;
10860Sstevel@tonic-gate (void) strcpy(c.c_setname, sp->setname);
10870Sstevel@tonic-gate c.c_sideno = sideno;
10880Sstevel@tonic-gate
10890Sstevel@tonic-gate /*
10900Sstevel@tonic-gate * Don't need device id information from this ioctl
10910Sstevel@tonic-gate * Kernel determines device id from dev_t, which
10920Sstevel@tonic-gate * is just what this code would do.
10930Sstevel@tonic-gate */
10940Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0;
10950Sstevel@tonic-gate c.c_locator.l_devid_flags = 0;
10960Sstevel@tonic-gate
10970Sstevel@tonic-gate if (metaioctl(MD_DB_DELSIDE, &c, &c.c_mde, NULL) != 0)
10980Sstevel@tonic-gate return (mdstealerror(ep, &c.c_mde));
10990Sstevel@tonic-gate }
11000Sstevel@tonic-gate return (0);
11010Sstevel@tonic-gate }
11020Sstevel@tonic-gate
11030Sstevel@tonic-gate
11040Sstevel@tonic-gate static int
mdnamesareunique(mdnamelist_t * nlp,md_error_t * ep)11050Sstevel@tonic-gate mdnamesareunique(mdnamelist_t *nlp, md_error_t *ep)
11060Sstevel@tonic-gate {
11070Sstevel@tonic-gate mdnamelist_t *dnp1, *dnp2;
11080Sstevel@tonic-gate
11090Sstevel@tonic-gate for (dnp1 = nlp; dnp1 != NULL; dnp1 = dnp1->next) {
11100Sstevel@tonic-gate for (dnp2 = dnp1->next; dnp2 != NULL; dnp2 = dnp2->next) {
11110Sstevel@tonic-gate if (strcmp(dnp1->namep->cname, dnp2->namep->cname) == 0)
11120Sstevel@tonic-gate return (mderror(ep, MDE_DUPDRIVE,
11130Sstevel@tonic-gate dnp1->namep->cname));
11140Sstevel@tonic-gate }
11150Sstevel@tonic-gate }
11160Sstevel@tonic-gate return (0);
11170Sstevel@tonic-gate }
11180Sstevel@tonic-gate
11190Sstevel@tonic-gate
11200Sstevel@tonic-gate /*
11210Sstevel@tonic-gate * Return 1 if files are different, else return 0
11220Sstevel@tonic-gate */
11230Sstevel@tonic-gate static int
filediff(char * tsname,char * sname)11240Sstevel@tonic-gate filediff(char *tsname, char *sname)
11250Sstevel@tonic-gate {
11260Sstevel@tonic-gate int ret = 1, fd;
11270Sstevel@tonic-gate size_t tsz, sz;
11280Sstevel@tonic-gate struct stat sbuf;
11290Sstevel@tonic-gate char *tbuf, *buf;
11300Sstevel@tonic-gate
11310Sstevel@tonic-gate if (stat(tsname, &sbuf) != 0)
11320Sstevel@tonic-gate return (1);
11330Sstevel@tonic-gate tsz = sbuf.st_size;
11340Sstevel@tonic-gate if (stat(sname, &sbuf) != 0)
11350Sstevel@tonic-gate return (1);
11360Sstevel@tonic-gate sz = sbuf.st_size;
11370Sstevel@tonic-gate if (tsz != sz)
11380Sstevel@tonic-gate return (1);
11390Sstevel@tonic-gate
11400Sstevel@tonic-gate /* allocate memory and read both files into buffer */
11410Sstevel@tonic-gate tbuf = malloc(tsz);
11420Sstevel@tonic-gate buf = malloc(sz);
11430Sstevel@tonic-gate if (tbuf == NULL || buf == NULL)
11440Sstevel@tonic-gate goto out;
11450Sstevel@tonic-gate
11460Sstevel@tonic-gate fd = open(tsname, O_RDONLY);
11470Sstevel@tonic-gate if (fd == -1)
11480Sstevel@tonic-gate goto out;
11490Sstevel@tonic-gate sz = read(fd, tbuf, tsz);
11500Sstevel@tonic-gate (void) close(fd);
11510Sstevel@tonic-gate if (sz != tsz)
11520Sstevel@tonic-gate goto out;
11530Sstevel@tonic-gate
11540Sstevel@tonic-gate fd = open(sname, O_RDONLY);
11550Sstevel@tonic-gate if (fd == -1)
11560Sstevel@tonic-gate goto out;
11570Sstevel@tonic-gate sz = read(fd, buf, tsz);
11580Sstevel@tonic-gate (void) close(fd);
11590Sstevel@tonic-gate if (sz != tsz)
11600Sstevel@tonic-gate goto out;
11610Sstevel@tonic-gate
11620Sstevel@tonic-gate /* compare content */
11630Sstevel@tonic-gate ret = bcmp(tbuf, buf, tsz);
11640Sstevel@tonic-gate out:
11650Sstevel@tonic-gate if (tbuf)
11660Sstevel@tonic-gate free(tbuf);
11670Sstevel@tonic-gate if (buf)
11680Sstevel@tonic-gate free(buf);
11690Sstevel@tonic-gate return (ret);
11700Sstevel@tonic-gate }
11710Sstevel@tonic-gate
11720Sstevel@tonic-gate /*
11730Sstevel@tonic-gate * patch md.conf file with mddb locations
11740Sstevel@tonic-gate */
11750Sstevel@tonic-gate int
meta_db_patch(char * sname,char * cname,int patch,md_error_t * ep)11760Sstevel@tonic-gate meta_db_patch(
11770Sstevel@tonic-gate char *sname, /* system file name */
11780Sstevel@tonic-gate char *cname, /* mddb.cf file name */
11790Sstevel@tonic-gate int patch, /* patching locally */
11800Sstevel@tonic-gate md_error_t *ep
11810Sstevel@tonic-gate )
11820Sstevel@tonic-gate {
11830Sstevel@tonic-gate char *tsname = NULL;
11840Sstevel@tonic-gate char line[MDDB_BOOTLIST_MAX_LEN];
11850Sstevel@tonic-gate FILE *tsfp = NULL;
11860Sstevel@tonic-gate FILE *mfp = NULL;
11870Sstevel@tonic-gate int rval = -1;
11880Sstevel@tonic-gate
11890Sstevel@tonic-gate /* check names */
11900Sstevel@tonic-gate if (sname == NULL) {
11910Sstevel@tonic-gate if (patch)
11920Sstevel@tonic-gate sname = "md.conf";
11930Sstevel@tonic-gate else
11940Sstevel@tonic-gate sname = "/kernel/drv/md.conf";
11950Sstevel@tonic-gate }
11960Sstevel@tonic-gate if (cname == NULL)
11970Sstevel@tonic-gate cname = META_DBCONF;
11980Sstevel@tonic-gate
11990Sstevel@tonic-gate /*
12000Sstevel@tonic-gate * edit file
12010Sstevel@tonic-gate */
12020Sstevel@tonic-gate if (meta_systemfile_copy(sname, 0, 1, 1, 0, &tsname, &tsfp, ep) != 0) {
12030Sstevel@tonic-gate if (mdissyserror(ep, EROFS)) {
12040Sstevel@tonic-gate /*
12050Sstevel@tonic-gate * If we are booted on a read-only root because
12060Sstevel@tonic-gate * of mddb quorum problems we don't want to emit
12070Sstevel@tonic-gate * any scary error messages.
12080Sstevel@tonic-gate */
12090Sstevel@tonic-gate mdclrerror(ep);
12100Sstevel@tonic-gate rval = 0;
12110Sstevel@tonic-gate }
12120Sstevel@tonic-gate goto out;
12130Sstevel@tonic-gate }
12140Sstevel@tonic-gate
12152063Shshaw if (meta_systemfile_append_mddb(cname, sname, tsname, tsfp, 1, 0, 0,
12160Sstevel@tonic-gate ep) != 0)
12170Sstevel@tonic-gate goto out;
12180Sstevel@tonic-gate
12190Sstevel@tonic-gate /* if file content is identical, skip rename */
12200Sstevel@tonic-gate if (filediff(tsname, sname) == 0) {
12210Sstevel@tonic-gate rval = 0;
12220Sstevel@tonic-gate goto out;
12230Sstevel@tonic-gate }
12240Sstevel@tonic-gate
12250Sstevel@tonic-gate if ((fflush(tsfp) != 0) || (fsync(fileno(tsfp)) != 0) ||
12265109Spetede (fclose(tsfp) != 0)) {
12270Sstevel@tonic-gate (void) mdsyserror(ep, errno, tsname);
12280Sstevel@tonic-gate goto out;
12290Sstevel@tonic-gate }
12300Sstevel@tonic-gate
12310Sstevel@tonic-gate tsfp = NULL;
12320Sstevel@tonic-gate
12330Sstevel@tonic-gate /*
12340Sstevel@tonic-gate * rename file. If we get a Cross Device error then it
12350Sstevel@tonic-gate * is because we are in the miniroot.
12360Sstevel@tonic-gate */
12370Sstevel@tonic-gate if (rename(tsname, sname) != 0 && errno != EXDEV) {
12380Sstevel@tonic-gate (void) mdsyserror(ep, errno, sname);
12390Sstevel@tonic-gate goto out;
12400Sstevel@tonic-gate }
12410Sstevel@tonic-gate
12420Sstevel@tonic-gate if (errno == EXDEV) {
12430Sstevel@tonic-gate if ((tsfp = fopen(tsname, "r")) == NULL)
12440Sstevel@tonic-gate goto out;
12450Sstevel@tonic-gate if ((mfp = fopen(sname, "w+")) == NULL)
12460Sstevel@tonic-gate goto out;
12470Sstevel@tonic-gate while (fgets(line, sizeof (line), tsfp) != NULL) {
12480Sstevel@tonic-gate if (fputs(line, mfp) == NULL)
12490Sstevel@tonic-gate goto out;
12500Sstevel@tonic-gate }
12510Sstevel@tonic-gate (void) fclose(tsfp);
12520Sstevel@tonic-gate tsfp = NULL;
12530Sstevel@tonic-gate if (fflush(mfp) != 0)
12540Sstevel@tonic-gate goto out;
12550Sstevel@tonic-gate if (fsync(fileno(mfp)) != 0)
12560Sstevel@tonic-gate goto out;
12570Sstevel@tonic-gate if (fclose(mfp) != 0) {
12580Sstevel@tonic-gate mfp = NULL;
12590Sstevel@tonic-gate goto out;
12600Sstevel@tonic-gate }
12610Sstevel@tonic-gate }
12620Sstevel@tonic-gate
12630Sstevel@tonic-gate Free(tsname);
12640Sstevel@tonic-gate tsname = NULL;
12650Sstevel@tonic-gate rval = 0;
12660Sstevel@tonic-gate
12670Sstevel@tonic-gate /* cleanup, return error */
12680Sstevel@tonic-gate out:
12690Sstevel@tonic-gate if (tsfp != NULL)
12700Sstevel@tonic-gate (void) fclose(tsfp);
12710Sstevel@tonic-gate if (tsname != NULL) {
12720Sstevel@tonic-gate (void) unlink(tsname);
12730Sstevel@tonic-gate Free(tsname);
12740Sstevel@tonic-gate }
12750Sstevel@tonic-gate return (rval);
12760Sstevel@tonic-gate }
12770Sstevel@tonic-gate
12780Sstevel@tonic-gate /*
12790Sstevel@tonic-gate * Add replicas to set. This happens as a result of:
12800Sstevel@tonic-gate * - metadb [-s set_name] -a
12810Sstevel@tonic-gate * - metaset -s set_name -a disk
12820Sstevel@tonic-gate * - metaset -s set_name -d disk (causes a rebalance of mddbs)
12830Sstevel@tonic-gate * - metaset -s set_name -b
12840Sstevel@tonic-gate *
12850Sstevel@tonic-gate * For a local set, this routine is run on the local set host.
12860Sstevel@tonic-gate *
12870Sstevel@tonic-gate * For a traditional diskset, this routine is run on the node that
12880Sstevel@tonic-gate * is running the metaset command.
12890Sstevel@tonic-gate *
12900Sstevel@tonic-gate * For a multinode diskset, this routine is run by the node that is
12910Sstevel@tonic-gate * running the metaset command. If this is the first mddb added to
12920Sstevel@tonic-gate * the MN diskset, then no communication is made to other nodes via commd
12930Sstevel@tonic-gate * since the other nodes will be in-sync with respect to the mddbs when
12940Sstevel@tonic-gate * those other nodes join the set and snarf in the newly created mddb.
12950Sstevel@tonic-gate * If this is not the first mddb added to the MN diskset, then this
12960Sstevel@tonic-gate * attach command is sent to all of the nodes using commd. This keeps
12970Sstevel@tonic-gate * the nodes in-sync.
12980Sstevel@tonic-gate */
12990Sstevel@tonic-gate int
meta_db_attach(mdsetname_t * sp,mdnamelist_t * db_nlp,mdchkopts_t options,md_timeval32_t * timeval,int dbcnt,int dbsize,char * sysfilename,md_error_t * ep)13000Sstevel@tonic-gate meta_db_attach(
13010Sstevel@tonic-gate mdsetname_t *sp,
13020Sstevel@tonic-gate mdnamelist_t *db_nlp,
13030Sstevel@tonic-gate mdchkopts_t options,
13040Sstevel@tonic-gate md_timeval32_t *timeval,
13050Sstevel@tonic-gate int dbcnt,
13060Sstevel@tonic-gate int dbsize,
13070Sstevel@tonic-gate char *sysfilename,
13080Sstevel@tonic-gate md_error_t *ep
13090Sstevel@tonic-gate )
13100Sstevel@tonic-gate {
13110Sstevel@tonic-gate struct mddb_config c;
13120Sstevel@tonic-gate mdnamelist_t *nlp;
13130Sstevel@tonic-gate mdname_t *np;
13140Sstevel@tonic-gate md_drive_desc *dd = NULL;
13150Sstevel@tonic-gate md_drive_desc *p;
13160Sstevel@tonic-gate int i;
13170Sstevel@tonic-gate int fd;
13180Sstevel@tonic-gate side_t sideno;
13190Sstevel@tonic-gate daddr_t blkno;
13200Sstevel@tonic-gate int replicacount = 0;
13212614Spetede int start_svmdaemons = 0;
13220Sstevel@tonic-gate int rval = 0;
13230Sstevel@tonic-gate md_error_t status = mdnullerror;
13240Sstevel@tonic-gate md_set_desc *sd;
13250Sstevel@tonic-gate int stale_bool = FALSE;
13260Sstevel@tonic-gate int flags;
13270Sstevel@tonic-gate int firstmddb = 1;
13280Sstevel@tonic-gate md_timeval32_t inittime = {0, 0};
13290Sstevel@tonic-gate
13300Sstevel@tonic-gate /*
13310Sstevel@tonic-gate * Error if we don't get some work to do.
13320Sstevel@tonic-gate */
13330Sstevel@tonic-gate if (db_nlp == NULL)
13340Sstevel@tonic-gate return (mdsyserror(ep, EINVAL, NULL));
13350Sstevel@tonic-gate
13360Sstevel@tonic-gate if (mdnamesareunique(db_nlp, ep) != 0)
13370Sstevel@tonic-gate return (-1);
13380Sstevel@tonic-gate (void) memset(&c, 0, sizeof (c));
13390Sstevel@tonic-gate c.c_id = 0;
13400Sstevel@tonic-gate c.c_setno = sp->setno;
13410Sstevel@tonic-gate
13420Sstevel@tonic-gate /* Don't need device id information from this ioctl */
13430Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0;
13440Sstevel@tonic-gate c.c_locator.l_devid_flags = 0;
13450Sstevel@tonic-gate if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) {
13460Sstevel@tonic-gate if (metaislocalset(sp)) {
13470Sstevel@tonic-gate if (mdismddberror(&c.c_mde, MDE_DB_INVALID))
13480Sstevel@tonic-gate mdclrerror(&c.c_mde);
13490Sstevel@tonic-gate else if (! mdismddberror(&c.c_mde, MDE_DB_NODB) ||
13500Sstevel@tonic-gate (! (options & MDCHK_ALLOW_NODBS)))
13510Sstevel@tonic-gate return (mdstealerror(ep, &c.c_mde));
13520Sstevel@tonic-gate } else {
13530Sstevel@tonic-gate if (! mdismddberror(&c.c_mde, MDE_DB_NOTOWNER))
13540Sstevel@tonic-gate return (mdstealerror(ep, &c.c_mde));
13550Sstevel@tonic-gate }
13560Sstevel@tonic-gate mdclrerror(&c.c_mde);
13570Sstevel@tonic-gate }
13580Sstevel@tonic-gate /*
13590Sstevel@tonic-gate * Is current set STALE?
13600Sstevel@tonic-gate */
13610Sstevel@tonic-gate if (c.c_flags & MDDB_C_STALE) {
13620Sstevel@tonic-gate stale_bool = TRUE;
13630Sstevel@tonic-gate }
13640Sstevel@tonic-gate
13650Sstevel@tonic-gate assert(db_nlp != NULL);
13660Sstevel@tonic-gate
13672614Spetede /* if these are the first replicas then the SVM daemons need to run */
13680Sstevel@tonic-gate if (c.c_dbcnt == 0)
13692614Spetede start_svmdaemons = 1;
13700Sstevel@tonic-gate
13710Sstevel@tonic-gate /*
13720Sstevel@tonic-gate * check to see if we will go over the total possible number
13730Sstevel@tonic-gate * of data bases
13740Sstevel@tonic-gate */
13750Sstevel@tonic-gate nlp = db_nlp;
13760Sstevel@tonic-gate while (nlp) {
13770Sstevel@tonic-gate replicacount += dbcnt;
13780Sstevel@tonic-gate nlp = nlp->next;
13790Sstevel@tonic-gate }
13800Sstevel@tonic-gate
13810Sstevel@tonic-gate if ((replicacount + c.c_dbcnt) > c.c_dbmax)
13820Sstevel@tonic-gate return (mdmddberror(ep, MDE_TOOMANY_REPLICAS, NODEV32,
13830Sstevel@tonic-gate sp->setno, c.c_dbcnt + replicacount, NULL));
13840Sstevel@tonic-gate
13850Sstevel@tonic-gate /*
13860Sstevel@tonic-gate * go through and check to make sure all locations specified
13870Sstevel@tonic-gate * are legal also pick out driver name;
13880Sstevel@tonic-gate */
13890Sstevel@tonic-gate for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
13900Sstevel@tonic-gate diskaddr_t devsize;
13910Sstevel@tonic-gate
13920Sstevel@tonic-gate np = nlp->namep;
13930Sstevel@tonic-gate
13940Sstevel@tonic-gate if (! metaislocalset(sp)) {
13950Sstevel@tonic-gate uint_t partno;
13960Sstevel@tonic-gate uint_t rep_partno;
13970Sstevel@tonic-gate mddrivename_t *dnp = np->drivenamep;
13980Sstevel@tonic-gate
13990Sstevel@tonic-gate /*
14000Sstevel@tonic-gate * make sure that non-local database replicas
14010Sstevel@tonic-gate * are always on the replica slice.
14020Sstevel@tonic-gate */
14030Sstevel@tonic-gate if (meta_replicaslice(dnp,
14040Sstevel@tonic-gate &rep_partno, ep) != 0)
14050Sstevel@tonic-gate return (-1);
14060Sstevel@tonic-gate if (metagetvtoc(np, FALSE, &partno, ep) == NULL)
14070Sstevel@tonic-gate return (-1);
14080Sstevel@tonic-gate if (partno != rep_partno)
14090Sstevel@tonic-gate return (mddeverror(ep, MDE_REPCOMP_ONLY,
14100Sstevel@tonic-gate np->dev, sp->setname));
14110Sstevel@tonic-gate }
14120Sstevel@tonic-gate
14130Sstevel@tonic-gate if (meta_check_replica(sp, np, options, 0, (dbcnt * dbsize),
14140Sstevel@tonic-gate ep)) {
14150Sstevel@tonic-gate return (-1);
14160Sstevel@tonic-gate }
14170Sstevel@tonic-gate
14180Sstevel@tonic-gate if ((devsize = metagetsize(np, ep)) == -1)
14190Sstevel@tonic-gate return (-1);
14200Sstevel@tonic-gate
14210Sstevel@tonic-gate if (devsize < (diskaddr_t)((dbcnt * dbsize) + 16))
14220Sstevel@tonic-gate return (mdmddberror(ep, MDE_REPLICA_TOOSMALL,
14230Sstevel@tonic-gate meta_getminor(np->dev), sp->setno, devsize,
14240Sstevel@tonic-gate np->cname));
14250Sstevel@tonic-gate }
14260Sstevel@tonic-gate
14270Sstevel@tonic-gate /*
14280Sstevel@tonic-gate * If first disk in set we don't have lb_inittime yet for use as
14290Sstevel@tonic-gate * mb_setcreatetime so don't go looking for it. WE'll come back
14300Sstevel@tonic-gate * later and update after the locator block has been created.
14310Sstevel@tonic-gate * If this isn't the first disk in the set, we have a locator
14320Sstevel@tonic-gate * block and thus we have lb_inittime. Set mb_setcreatetime to
14330Sstevel@tonic-gate * lb_inittime.
14340Sstevel@tonic-gate */
14350Sstevel@tonic-gate if (! metaislocalset(sp)) {
14360Sstevel@tonic-gate if (c.c_dbcnt != 0) {
14370Sstevel@tonic-gate firstmddb = 0;
14380Sstevel@tonic-gate inittime = meta_get_lb_inittime(sp, ep);
14390Sstevel@tonic-gate }
14400Sstevel@tonic-gate }
14410Sstevel@tonic-gate
14420Sstevel@tonic-gate /*
14430Sstevel@tonic-gate * go through and write all master blocks
14440Sstevel@tonic-gate */
14450Sstevel@tonic-gate
14460Sstevel@tonic-gate for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
14470Sstevel@tonic-gate np = nlp->namep;
14480Sstevel@tonic-gate
14490Sstevel@tonic-gate if ((fd = open(np->rname, O_RDWR)) < 0)
14500Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname));
14510Sstevel@tonic-gate
14520Sstevel@tonic-gate for (i = 0; i < dbcnt; i++) {
14530Sstevel@tonic-gate if (mkmasterblks(sp, np, fd, (i * dbsize + 16), dbsize,
14540Sstevel@tonic-gate inittime, ep)) {
14550Sstevel@tonic-gate (void) close(fd);
14560Sstevel@tonic-gate return (-1);
14570Sstevel@tonic-gate }
14580Sstevel@tonic-gate }
14590Sstevel@tonic-gate (void) close(fd);
14600Sstevel@tonic-gate }
14610Sstevel@tonic-gate
14620Sstevel@tonic-gate if ((sideno = getmyside(sp, ep)) == MD_SIDEWILD)
14630Sstevel@tonic-gate return (-1);
14640Sstevel@tonic-gate
14650Sstevel@tonic-gate if (! metaislocalset(sp)) {
14660Sstevel@tonic-gate dd = metaget_drivedesc_fromnamelist(sp, db_nlp, ep);
14670Sstevel@tonic-gate if (! mdisok(ep))
14680Sstevel@tonic-gate return (-1);
14690Sstevel@tonic-gate if ((sd = metaget_setdesc(sp, ep)) == NULL)
14700Sstevel@tonic-gate return (-1);
14710Sstevel@tonic-gate
14720Sstevel@tonic-gate }
14730Sstevel@tonic-gate
14740Sstevel@tonic-gate /*
14750Sstevel@tonic-gate * go through and tell kernel to add them
14760Sstevel@tonic-gate */
14770Sstevel@tonic-gate for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
14780Sstevel@tonic-gate mdcinfo_t *cinfo;
14790Sstevel@tonic-gate
14800Sstevel@tonic-gate np = nlp->namep;
14810Sstevel@tonic-gate
14820Sstevel@tonic-gate if ((cinfo = metagetcinfo(np, ep)) == NULL) {
14830Sstevel@tonic-gate rval = -1;
14840Sstevel@tonic-gate goto out;
14850Sstevel@tonic-gate }
14860Sstevel@tonic-gate
14870Sstevel@tonic-gate /*
14880Sstevel@tonic-gate * If mddb is being added to MN diskset and there already
14890Sstevel@tonic-gate * exists a valid mddb in the set (which equates to this
14900Sstevel@tonic-gate * node being an owner of the set) then use rpc.mdcommd
14910Sstevel@tonic-gate * mechanism to add mddb(s) so that all nodes stay in sync.
14920Sstevel@tonic-gate * If set is stale, don't log the message since rpc.mdcommd
14930Sstevel@tonic-gate * can't write the message to the mddb.
14940Sstevel@tonic-gate *
14950Sstevel@tonic-gate * Otherwise, just add mddb to this node.
14960Sstevel@tonic-gate */
14970Sstevel@tonic-gate if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
14980Sstevel@tonic-gate (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) {
14990Sstevel@tonic-gate md_mn_result_t *resultp = NULL;
15000Sstevel@tonic-gate md_mn_msg_meta_db_attach_t attach;
15010Sstevel@tonic-gate int send_rval;
15020Sstevel@tonic-gate
15030Sstevel@tonic-gate /*
15040Sstevel@tonic-gate * In a scenario where new replicas had been added on
15050Sstevel@tonic-gate * the master, and then all of the old replicas failed
15060Sstevel@tonic-gate * before the slaves had knowledge of the new replicas,
15070Sstevel@tonic-gate * the slaves are unable to re-parse in the mddb
15080Sstevel@tonic-gate * from the new replicas since the slaves have no
15090Sstevel@tonic-gate * knowledge of the new replicas. The following
15100Sstevel@tonic-gate * algorithm solves this problem:
15110Sstevel@tonic-gate * - META_DB_ATTACH message generates submsgs
15120Sstevel@tonic-gate * - BLOCK parse (master)
15130Sstevel@tonic-gate * - MDDB_ATTACH new replicas
15140Sstevel@tonic-gate * - UNBLOCK parse (master) causing parse
15150Sstevel@tonic-gate * information to be sent from master
15160Sstevel@tonic-gate * to slaves at a higher class than the
15170Sstevel@tonic-gate * unblock so the parse message will
15180Sstevel@tonic-gate * reach slaves before unblock message.
15190Sstevel@tonic-gate */
15200Sstevel@tonic-gate attach.msg_l_dev = np->dev;
15210Sstevel@tonic-gate attach.msg_cnt = dbcnt;
15220Sstevel@tonic-gate attach.msg_dbsize = dbsize;
15230Sstevel@tonic-gate (void) strncpy(attach.msg_dname, cinfo->dname,
15240Sstevel@tonic-gate sizeof (attach.msg_dname));
15250Sstevel@tonic-gate (void) splitname(np->bname, &attach.msg_splitname);
15260Sstevel@tonic-gate attach.msg_options = options;
15270Sstevel@tonic-gate
15280Sstevel@tonic-gate /* Set devid to NULL until devids are supported */
15290Sstevel@tonic-gate attach.msg_devid[0] = NULL;
15300Sstevel@tonic-gate
15310Sstevel@tonic-gate /*
15320Sstevel@tonic-gate * If reconfig cycle has been started, this node is
15330Sstevel@tonic-gate * stuck in in the return step until this command has
15340Sstevel@tonic-gate * completed. If mdcommd is suspended, ask
15350Sstevel@tonic-gate * send_message to fail (instead of retrying)
15360Sstevel@tonic-gate * so that metaset can finish allowing the reconfig
15370Sstevel@tonic-gate * cycle to proceed.
15380Sstevel@tonic-gate */
15390Sstevel@tonic-gate flags = MD_MSGF_FAIL_ON_SUSPEND;
15400Sstevel@tonic-gate if (stale_bool == TRUE)
15410Sstevel@tonic-gate flags |= MD_MSGF_NO_LOG;
15420Sstevel@tonic-gate send_rval = mdmn_send_message(sp->setno,
15435109Spetede MD_MN_MSG_META_DB_ATTACH,
1544*8452SJohn.Wren.Kennedy@Sun.COM flags, 0, (char *)&attach,
15455109Spetede sizeof (md_mn_msg_meta_db_attach_t),
15465109Spetede &resultp, ep);
15470Sstevel@tonic-gate if (send_rval != 0) {
15480Sstevel@tonic-gate rval = -1;
15490Sstevel@tonic-gate if (resultp == NULL)
15500Sstevel@tonic-gate (void) mddserror(ep,
15510Sstevel@tonic-gate MDE_DS_COMMD_SEND_FAIL,
15520Sstevel@tonic-gate sp->setno, NULL, NULL,
15530Sstevel@tonic-gate sp->setname);
15540Sstevel@tonic-gate else {
15550Sstevel@tonic-gate (void) mdstealerror(ep,
15560Sstevel@tonic-gate &(resultp->mmr_ep));
15570Sstevel@tonic-gate if (mdisok(ep)) {
15580Sstevel@tonic-gate (void) mddserror(ep,
15590Sstevel@tonic-gate MDE_DS_COMMD_SEND_FAIL,
15600Sstevel@tonic-gate sp->setno, NULL, NULL,
15610Sstevel@tonic-gate sp->setname);
15620Sstevel@tonic-gate }
15630Sstevel@tonic-gate free_result(resultp);
15640Sstevel@tonic-gate }
15650Sstevel@tonic-gate goto out;
15660Sstevel@tonic-gate }
15670Sstevel@tonic-gate if (resultp)
15680Sstevel@tonic-gate free_result(resultp);
15690Sstevel@tonic-gate } else {
15705109Spetede /* Adding mddb(s) to just this node */
15715109Spetede for (i = 0; i < dbcnt; i++) {
15725109Spetede (void) memset(&c, 0, sizeof (c));
15735109Spetede /* Fill in device/replica info */
15745109Spetede c.c_locator.l_dev = meta_cmpldev(np->dev);
15755109Spetede c.c_locator.l_blkno = i * dbsize + 16;
15765109Spetede blkno = c.c_locator.l_blkno;
15775109Spetede (void) strncpy(c.c_locator.l_driver,
15785109Spetede cinfo->dname,
15795109Spetede sizeof (c.c_locator.l_driver));
15800Sstevel@tonic-gate
15815109Spetede if (splitname(np->bname, &c.c_devname) ==
15825109Spetede METASPLIT_LONGDISKNAME && devid_in_use ==
15835109Spetede FALSE) {
15845109Spetede rval = mddeverror(ep,
15855109Spetede MDE_DISKNAMETOOLONG,
15865109Spetede NODEV64, np->rname);
15875109Spetede goto out;
15880Sstevel@tonic-gate }
15895109Spetede
15905109Spetede c.c_locator.l_mnum = meta_getminor(np->dev);
15915109Spetede
15925109Spetede /* Fill in setno, setname, and sideno */
15935109Spetede c.c_setno = sp->setno;
15945109Spetede if (! metaislocalset(sp)) {
15955109Spetede if (MD_MNSET_DESC(sd)) {
15965109Spetede c.c_multi_node = 1;
15975109Spetede }
15985109Spetede }
15995109Spetede (void) strcpy(c.c_setname, sp->setname);
16005109Spetede c.c_sideno = sideno;
16010Sstevel@tonic-gate
16025109Spetede /*
16035109Spetede * Don't need device id information from this
16045109Spetede * ioctl Kernel determines device id from
16055109Spetede * dev_t, which is just what this code would do.
16065109Spetede */
16075109Spetede c.c_locator.l_devid = (uint64_t)0;
16085109Spetede c.c_locator.l_devid_flags = 0;
16090Sstevel@tonic-gate
16105109Spetede if (timeval != NULL)
16115109Spetede c.c_timestamp = *timeval;
16120Sstevel@tonic-gate
16135109Spetede if (setup_med_cfg(sp, &c,
16145109Spetede (options & MDCHK_SET_FORCE), ep)) {
16155109Spetede rval = -1;
16165109Spetede goto out;
16175109Spetede }
16180Sstevel@tonic-gate
16195109Spetede if (metaioctl(MD_DB_NEWDEV, &c, &c.c_mde,
16205109Spetede NULL) != 0) {
16215109Spetede rval = mdstealerror(ep, &c.c_mde);
16225109Spetede goto out;
16235109Spetede }
16245109Spetede /*
16255109Spetede * This is either a traditional diskset OR this
16265109Spetede * is the first replica added to a MN diskset.
16275109Spetede * In either case, set broadcast to NO_BCAST so
16285109Spetede * that message won't go through rpc.mdcommd.
16295109Spetede * If this is a traditional diskset, the bcast
16305109Spetede * flag is ignored since traditional disksets
16315109Spetede * don't use the rpc.mdcommd.
16325109Spetede */
16335109Spetede if (meta_db_addsidenms(sp, np, blkno,
16345109Spetede DB_ADDSIDENMS_NO_BCAST, ep))
16355109Spetede goto out;
16360Sstevel@tonic-gate }
16370Sstevel@tonic-gate }
16380Sstevel@tonic-gate if (! metaislocalset(sp)) {
16390Sstevel@tonic-gate /* update the dbcnt and size in dd */
16400Sstevel@tonic-gate for (p = dd; p != NULL; p = p->dd_next)
16410Sstevel@tonic-gate if (p->dd_dnp == np->drivenamep) {
16420Sstevel@tonic-gate p->dd_dbcnt = dbcnt;
16430Sstevel@tonic-gate p->dd_dbsize = dbsize;
16440Sstevel@tonic-gate break;
16450Sstevel@tonic-gate }
16460Sstevel@tonic-gate }
16470Sstevel@tonic-gate
16480Sstevel@tonic-gate /*
16490Sstevel@tonic-gate * If this was the first addition of disks to the
16500Sstevel@tonic-gate * diskset you now need to update the mb_setcreatetime
16510Sstevel@tonic-gate * which needed lb_inittime which wasn't there until now.
16520Sstevel@tonic-gate */
16530Sstevel@tonic-gate if (firstmddb) {
16540Sstevel@tonic-gate if (meta_update_mb(sp, dd, ep) != 0) {
16550Sstevel@tonic-gate return (-1);
16560Sstevel@tonic-gate }
16570Sstevel@tonic-gate }
16580Sstevel@tonic-gate (void) close(fd);
16590Sstevel@tonic-gate }
16600Sstevel@tonic-gate
16610Sstevel@tonic-gate out:
16620Sstevel@tonic-gate if (metaislocalset(sp)) {
16630Sstevel@tonic-gate
16640Sstevel@tonic-gate /* everything looks fine. Start mdmonitord */
16652614Spetede if (rval == 0 && start_svmdaemons == 1) {
16660Sstevel@tonic-gate if (meta_smf_enable(META_SMF_CORE, &status) == -1) {
16670Sstevel@tonic-gate mde_perror(&status, "");
16680Sstevel@tonic-gate mdclrerror(&status);
16690Sstevel@tonic-gate }
16700Sstevel@tonic-gate }
16710Sstevel@tonic-gate
16720Sstevel@tonic-gate if (buildconf(sp, &status)) {
16730Sstevel@tonic-gate /* Don't mask any previous errors */
16740Sstevel@tonic-gate if (rval == 0)
16750Sstevel@tonic-gate rval = mdstealerror(ep, &status);
16760Sstevel@tonic-gate return (rval);
16770Sstevel@tonic-gate }
16780Sstevel@tonic-gate
16790Sstevel@tonic-gate if (meta_db_patch(sysfilename, NULL, 0, &status)) {
16800Sstevel@tonic-gate /* Don't mask any previous errors */
16810Sstevel@tonic-gate if (rval == 0)
16820Sstevel@tonic-gate rval = mdstealerror(ep, &status);
16830Sstevel@tonic-gate }
16840Sstevel@tonic-gate } else {
16850Sstevel@tonic-gate if (update_dbinfo_on_drives(sp, dd,
16860Sstevel@tonic-gate (options & MDCHK_SET_LOCKED),
16870Sstevel@tonic-gate (options & MDCHK_SET_FORCE),
16880Sstevel@tonic-gate &status)) {
16890Sstevel@tonic-gate /* Don't mask any previous errors */
16900Sstevel@tonic-gate if (rval == 0)
16910Sstevel@tonic-gate rval = mdstealerror(ep, &status);
16920Sstevel@tonic-gate else
16930Sstevel@tonic-gate mdclrerror(&status);
16940Sstevel@tonic-gate }
16950Sstevel@tonic-gate metafreedrivedesc(&dd);
16960Sstevel@tonic-gate }
16970Sstevel@tonic-gate /*
16980Sstevel@tonic-gate * For MN disksets that already had already had nodes joined
16990Sstevel@tonic-gate * before the attach of this mddb(s), the name invalidation is
17000Sstevel@tonic-gate * done by the commd handler routine. Otherwise, if this
17010Sstevel@tonic-gate * is the first attach of a MN diskset mddb, the invalidation
17020Sstevel@tonic-gate * must be done here since the first attach cannot be sent
17030Sstevel@tonic-gate * via the commd since there are no nodes joined to the set yet.
17040Sstevel@tonic-gate */
17050Sstevel@tonic-gate if ((metaislocalset(sp)) || (!MD_MNSET_DESC(sd)) ||
17060Sstevel@tonic-gate (MD_MNSET_DESC(sd) &&
17070Sstevel@tonic-gate (!(sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)))) {
17080Sstevel@tonic-gate for (nlp = db_nlp; (nlp != NULL); nlp = nlp->next) {
17090Sstevel@tonic-gate meta_invalidate_name(nlp->namep);
17100Sstevel@tonic-gate }
17110Sstevel@tonic-gate }
17120Sstevel@tonic-gate return (rval);
17130Sstevel@tonic-gate }
17140Sstevel@tonic-gate
17150Sstevel@tonic-gate /*
17160Sstevel@tonic-gate * deletelist_length
17170Sstevel@tonic-gate *
17180Sstevel@tonic-gate * return the number of slices that have been specified for deletion
17190Sstevel@tonic-gate * on the metadb command line. This does not calculate the number
17200Sstevel@tonic-gate * of replicas because there may be multiple replicas per slice.
17210Sstevel@tonic-gate */
17220Sstevel@tonic-gate static int
deletelist_length(mdnamelist_t * db_nlp)17230Sstevel@tonic-gate deletelist_length(mdnamelist_t *db_nlp)
17240Sstevel@tonic-gate {
17250Sstevel@tonic-gate
17260Sstevel@tonic-gate mdnamelist_t *nlp;
17270Sstevel@tonic-gate int list_length = 0;
17280Sstevel@tonic-gate
17290Sstevel@tonic-gate for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
17300Sstevel@tonic-gate list_length++;
17310Sstevel@tonic-gate }
17320Sstevel@tonic-gate
17330Sstevel@tonic-gate return (list_length);
17340Sstevel@tonic-gate }
17350Sstevel@tonic-gate
17360Sstevel@tonic-gate static int
in_deletelist(char * devname,mdnamelist_t * db_nlp)17370Sstevel@tonic-gate in_deletelist(char *devname, mdnamelist_t *db_nlp)
17380Sstevel@tonic-gate {
17390Sstevel@tonic-gate
17400Sstevel@tonic-gate mdnamelist_t *nlp;
17410Sstevel@tonic-gate mdname_t *np;
17420Sstevel@tonic-gate int index = 0;
17430Sstevel@tonic-gate
17440Sstevel@tonic-gate for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
17450Sstevel@tonic-gate np = nlp->namep;
17460Sstevel@tonic-gate
17470Sstevel@tonic-gate if (strcmp(devname, np->bname) == 0)
17480Sstevel@tonic-gate return (index);
17490Sstevel@tonic-gate index++;
17500Sstevel@tonic-gate }
17510Sstevel@tonic-gate
17520Sstevel@tonic-gate return (-1);
17530Sstevel@tonic-gate }
17540Sstevel@tonic-gate
17550Sstevel@tonic-gate /*
17560Sstevel@tonic-gate * Delete replicas from set. This happens as a result of:
17570Sstevel@tonic-gate * - metadb [-s set_name] -d
17580Sstevel@tonic-gate * - metaset -s set_name -a disk (causes a rebalance of mddbs)
17590Sstevel@tonic-gate * - metaset -s set_name -d disk
17600Sstevel@tonic-gate * - metaset -s set_name -b
17610Sstevel@tonic-gate *
17620Sstevel@tonic-gate * For a local set, this routine is run on the local set host.
17630Sstevel@tonic-gate *
17640Sstevel@tonic-gate * For a traditional diskset, this routine is run on the node that
17650Sstevel@tonic-gate * is running the metaset command.
17660Sstevel@tonic-gate *
17670Sstevel@tonic-gate * For a multinode diskset, this routine is run by the node that is
17680Sstevel@tonic-gate * running the metaset command. This detach routine is sent to all
17690Sstevel@tonic-gate * of the joined nodes in the diskset using commd. This keeps
17700Sstevel@tonic-gate * the nodes in-sync.
17710Sstevel@tonic-gate */
17720Sstevel@tonic-gate int
meta_db_detach(mdsetname_t * sp,mdnamelist_t * db_nlp,mdforceopts_t force_option,char * sysfilename,md_error_t * ep)17730Sstevel@tonic-gate meta_db_detach(
17740Sstevel@tonic-gate mdsetname_t *sp,
17750Sstevel@tonic-gate mdnamelist_t *db_nlp,
17760Sstevel@tonic-gate mdforceopts_t force_option,
17770Sstevel@tonic-gate char *sysfilename,
17780Sstevel@tonic-gate md_error_t *ep
17790Sstevel@tonic-gate )
17800Sstevel@tonic-gate {
17810Sstevel@tonic-gate struct mddb_config c;
17820Sstevel@tonic-gate mdnamelist_t *nlp;
17830Sstevel@tonic-gate mdname_t *np;
17840Sstevel@tonic-gate md_drive_desc *dd = NULL;
17850Sstevel@tonic-gate md_drive_desc *p;
17860Sstevel@tonic-gate int replicacount;
17870Sstevel@tonic-gate int replica_delete_count;
17880Sstevel@tonic-gate int nr_replica_slices;
17890Sstevel@tonic-gate int i;
17900Sstevel@tonic-gate int stop_svmdaemons = 0;
17910Sstevel@tonic-gate int rval = 0;
17920Sstevel@tonic-gate int index;
17930Sstevel@tonic-gate int valid_replicas_nottodelete = 0;
17940Sstevel@tonic-gate int invalid_replicas_nottodelete = 0;
17950Sstevel@tonic-gate int invalid_replicas_todelete = 0;
17960Sstevel@tonic-gate int errored = 0;
17970Sstevel@tonic-gate int *tag_array;
17980Sstevel@tonic-gate int fd = -1;
17990Sstevel@tonic-gate md_error_t status = mdnullerror;
18000Sstevel@tonic-gate md_set_desc *sd;
18010Sstevel@tonic-gate int stale_bool = FALSE;
18020Sstevel@tonic-gate int flags;
18030Sstevel@tonic-gate
18040Sstevel@tonic-gate /*
18050Sstevel@tonic-gate * Error if we don't get some work to do.
18060Sstevel@tonic-gate */
18070Sstevel@tonic-gate if (db_nlp == NULL)
18080Sstevel@tonic-gate return (mdsyserror(ep, EINVAL, NULL));
18090Sstevel@tonic-gate
18100Sstevel@tonic-gate if (mdnamesareunique(db_nlp, ep) != 0)
18110Sstevel@tonic-gate return (-1);
18120Sstevel@tonic-gate
18130Sstevel@tonic-gate (void) memset(&c, 0, sizeof (c));
18140Sstevel@tonic-gate c.c_id = 0;
18150Sstevel@tonic-gate c.c_setno = sp->setno;
18160Sstevel@tonic-gate
18170Sstevel@tonic-gate /* Don't need device id information from this ioctl */
18180Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0;
18190Sstevel@tonic-gate c.c_locator.l_devid_flags = 0;
18200Sstevel@tonic-gate
18210Sstevel@tonic-gate if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0)
18220Sstevel@tonic-gate return (mdstealerror(ep, &c.c_mde));
18230Sstevel@tonic-gate
18240Sstevel@tonic-gate /*
18250Sstevel@tonic-gate * Is current set STALE?
18260Sstevel@tonic-gate */
18270Sstevel@tonic-gate if (c.c_flags & MDDB_C_STALE) {
18280Sstevel@tonic-gate stale_bool = TRUE;
18290Sstevel@tonic-gate }
18300Sstevel@tonic-gate
18310Sstevel@tonic-gate replicacount = c.c_dbcnt;
18320Sstevel@tonic-gate
18330Sstevel@tonic-gate assert(db_nlp != NULL);
18340Sstevel@tonic-gate
18350Sstevel@tonic-gate /*
18360Sstevel@tonic-gate * go through and gather how many data bases are on each
18370Sstevel@tonic-gate * device specified.
18380Sstevel@tonic-gate */
18390Sstevel@tonic-gate
18400Sstevel@tonic-gate nr_replica_slices = deletelist_length(db_nlp);
18410Sstevel@tonic-gate tag_array = (int *)calloc(nr_replica_slices, sizeof (int));
18420Sstevel@tonic-gate
18430Sstevel@tonic-gate replica_delete_count = 0;
18440Sstevel@tonic-gate for (i = 0; i < replicacount; i++) {
18450Sstevel@tonic-gate char *devname;
18460Sstevel@tonic-gate int found = 0;
18470Sstevel@tonic-gate
18480Sstevel@tonic-gate c.c_id = i;
18490Sstevel@tonic-gate
18500Sstevel@tonic-gate /* Don't need device id information from this ioctl */
18510Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0;
18520Sstevel@tonic-gate c.c_locator.l_devid_flags = 0;
18530Sstevel@tonic-gate
18540Sstevel@tonic-gate if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0)
18550Sstevel@tonic-gate return (mdstealerror(ep, &c.c_mde));
18560Sstevel@tonic-gate
18570Sstevel@tonic-gate devname = splicename(&c.c_devname);
18580Sstevel@tonic-gate
18595109Spetede if (strstr(devname, META_LONGDISKNAME_STR) != NULL) {
18605109Spetede Free(devname);
18615109Spetede devname = getlongname(&c, ep);
18625109Spetede if (devname == NULL) {
18635109Spetede return (-1);
18645109Spetede }
18655109Spetede }
18665109Spetede
18670Sstevel@tonic-gate if ((index = in_deletelist(devname, db_nlp)) != -1) {
18680Sstevel@tonic-gate found = 1;
18690Sstevel@tonic-gate tag_array[index] = 1;
18700Sstevel@tonic-gate replica_delete_count++;
18710Sstevel@tonic-gate }
18720Sstevel@tonic-gate
18730Sstevel@tonic-gate errored = c.c_locator.l_flags & (MDDB_F_EREAD |
18745109Spetede MDDB_F_EWRITE | MDDB_F_TOOSMALL | MDDB_F_EFMT |
18755109Spetede MDDB_F_EDATA | MDDB_F_EMASTER);
18760Sstevel@tonic-gate
18770Sstevel@tonic-gate /*
18780Sstevel@tonic-gate * There are four combinations of "errored" and "found"
18790Sstevel@tonic-gate * and they are used to find the number of
18800Sstevel@tonic-gate * (a) valid/invalid replicas that are not in the delete
18810Sstevel@tonic-gate * list and are available in the system.
18820Sstevel@tonic-gate * (b) valid/invalid replicas that are to be deleted.
18830Sstevel@tonic-gate */
18840Sstevel@tonic-gate
18850Sstevel@tonic-gate if (errored && !found) /* errored and !found */
18860Sstevel@tonic-gate invalid_replicas_nottodelete++;
18870Sstevel@tonic-gate else if (!found) /* !errored and !found */
18880Sstevel@tonic-gate valid_replicas_nottodelete++;
18890Sstevel@tonic-gate else if (errored) /* errored and found */
18900Sstevel@tonic-gate invalid_replicas_todelete++;
18910Sstevel@tonic-gate /*
18920Sstevel@tonic-gate * else it is !errored and found. This means
18930Sstevel@tonic-gate * valid_replicas_todelete++; But this variable will not
18940Sstevel@tonic-gate * be used anywhere
18950Sstevel@tonic-gate */
18960Sstevel@tonic-gate
18970Sstevel@tonic-gate Free(devname);
18980Sstevel@tonic-gate }
18990Sstevel@tonic-gate
19000Sstevel@tonic-gate index = 0;
19010Sstevel@tonic-gate for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
19020Sstevel@tonic-gate np = nlp->namep;
19030Sstevel@tonic-gate if (tag_array[index++] != 1) {
19040Sstevel@tonic-gate Free(tag_array);
19050Sstevel@tonic-gate return (mddeverror(ep, MDE_NO_DB, np->dev, np->cname));
19060Sstevel@tonic-gate }
19070Sstevel@tonic-gate }
19080Sstevel@tonic-gate
19090Sstevel@tonic-gate Free(tag_array);
19100Sstevel@tonic-gate
19110Sstevel@tonic-gate
19120Sstevel@tonic-gate /* if all replicas are deleted stop mdmonitord */
19130Sstevel@tonic-gate if ((replicacount - replica_delete_count) == 0)
19140Sstevel@tonic-gate stop_svmdaemons = 1;
19150Sstevel@tonic-gate
19160Sstevel@tonic-gate if (((replicacount - replica_delete_count) < MD_MINREPLICAS)) {
19170Sstevel@tonic-gate if (force_option & MDFORCE_NONE)
19180Sstevel@tonic-gate return (mderror(ep, MDE_NOTENOUGH_DB, sp->setname));
19190Sstevel@tonic-gate if (! metaislocalset(sp) && ! (force_option & MDFORCE_DS))
19200Sstevel@tonic-gate return (mderror(ep, MDE_DELDB_NOTALLOWED, sp->setname));
19210Sstevel@tonic-gate }
19220Sstevel@tonic-gate
19230Sstevel@tonic-gate /*
19240Sstevel@tonic-gate * The following algorithms are followed to check for deletion:
19250Sstevel@tonic-gate * (a) If the delete list(db_nlp) has all invalid replicas and no valid
19260Sstevel@tonic-gate * replicas, then deletion should be allowed.
19270Sstevel@tonic-gate * (b) Deletion should be allowed only if valid replicas that are "not"
19280Sstevel@tonic-gate * to be deleted is always greater than the invalid replicas that
19290Sstevel@tonic-gate * are "not" to be deleted.
19300Sstevel@tonic-gate * (c) If the user uses -f option, then deletion should be allowed.
19310Sstevel@tonic-gate */
19320Sstevel@tonic-gate
19330Sstevel@tonic-gate if ((invalid_replicas_todelete != replica_delete_count) &&
19345109Spetede (invalid_replicas_nottodelete > valid_replicas_nottodelete) &&
19355109Spetede (force_option != MDFORCE_LOCAL))
19360Sstevel@tonic-gate return (mderror(ep, MDE_DEL_VALIDDB_NOTALLOWED, sp->setname));
19370Sstevel@tonic-gate
19380Sstevel@tonic-gate /*
19390Sstevel@tonic-gate * go through and tell kernel to delete them
19400Sstevel@tonic-gate */
19410Sstevel@tonic-gate
19420Sstevel@tonic-gate /* Don't need device id information from this ioctl */
19430Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0;
19440Sstevel@tonic-gate c.c_locator.l_devid_flags = 0;
19450Sstevel@tonic-gate
19460Sstevel@tonic-gate if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0)
19470Sstevel@tonic-gate return (mdstealerror(ep, &c.c_mde));
19480Sstevel@tonic-gate
19490Sstevel@tonic-gate if (! metaislocalset(sp)) {
19500Sstevel@tonic-gate dd = metaget_drivedesc_fromnamelist(sp, db_nlp, ep);
19510Sstevel@tonic-gate if (! mdisok(ep))
19520Sstevel@tonic-gate return (-1);
19530Sstevel@tonic-gate if ((sd = metaget_setdesc(sp, ep)) == NULL)
19540Sstevel@tonic-gate return (-1);
19550Sstevel@tonic-gate }
19560Sstevel@tonic-gate
19570Sstevel@tonic-gate for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
19580Sstevel@tonic-gate np = nlp->namep;
19590Sstevel@tonic-gate
19600Sstevel@tonic-gate /*
19610Sstevel@tonic-gate * If mddb is being deleted from MN diskset and node is
19620Sstevel@tonic-gate * an owner of the diskset then use rpc.mdcommd
19630Sstevel@tonic-gate * mechanism to add mddb(s) so that all nodes stay in sync.
19640Sstevel@tonic-gate * If set is stale, don't log the message since rpc.mdcommd
19650Sstevel@tonic-gate * can't write the message to the mddb.
19660Sstevel@tonic-gate *
19670Sstevel@tonic-gate * When mddbs are first being added to set, a detach can
19680Sstevel@tonic-gate * be called before any node has joined the diskset, so
19690Sstevel@tonic-gate * must check to see if node is an owner of the diskset.
19700Sstevel@tonic-gate *
19710Sstevel@tonic-gate * Otherwise, just delete mddb from this node.
19720Sstevel@tonic-gate */
19730Sstevel@tonic-gate
19740Sstevel@tonic-gate if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
19750Sstevel@tonic-gate (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) {
19760Sstevel@tonic-gate md_mn_result_t *resultp;
19770Sstevel@tonic-gate md_mn_msg_meta_db_detach_t detach;
19780Sstevel@tonic-gate int send_rval;
19790Sstevel@tonic-gate
19800Sstevel@tonic-gate /*
19810Sstevel@tonic-gate * The following algorithm is used to detach replicas.
19820Sstevel@tonic-gate * - META_DB_DETACH message generates submsgs
19830Sstevel@tonic-gate * - BLOCK parse (master)
19840Sstevel@tonic-gate * - MDDB_DETACH replicas
19850Sstevel@tonic-gate * - UNBLOCK parse (master) causing parse
19860Sstevel@tonic-gate * information to be sent from master
19870Sstevel@tonic-gate * to slaves at a higher class than the
19880Sstevel@tonic-gate * unblock so the parse message will
19890Sstevel@tonic-gate * reach slaves before unblock message.
19900Sstevel@tonic-gate */
19910Sstevel@tonic-gate (void) splitname(np->bname, &detach.msg_splitname);
19920Sstevel@tonic-gate
19930Sstevel@tonic-gate /* Set devid to NULL until devids are supported */
19940Sstevel@tonic-gate detach.msg_devid[0] = NULL;
19950Sstevel@tonic-gate
19960Sstevel@tonic-gate /*
19970Sstevel@tonic-gate * If reconfig cycle has been started, this node is
19980Sstevel@tonic-gate * stuck in in the return step until this command has
19990Sstevel@tonic-gate * completed. If mdcommd is suspended, ask
20000Sstevel@tonic-gate * send_message to fail (instead of retrying)
20010Sstevel@tonic-gate * so that metaset can finish allowing the reconfig
20020Sstevel@tonic-gate * cycle to proceed.
20030Sstevel@tonic-gate */
20040Sstevel@tonic-gate flags = MD_MSGF_FAIL_ON_SUSPEND;
20050Sstevel@tonic-gate if (stale_bool == TRUE)
20060Sstevel@tonic-gate flags |= MD_MSGF_NO_LOG;
20070Sstevel@tonic-gate send_rval = mdmn_send_message(sp->setno,
20085109Spetede MD_MN_MSG_META_DB_DETACH,
2009*8452SJohn.Wren.Kennedy@Sun.COM flags, 0, (char *)&detach,
20105109Spetede sizeof (md_mn_msg_meta_db_detach_t),
20115109Spetede &resultp, ep);
20120Sstevel@tonic-gate if (send_rval != 0) {
20130Sstevel@tonic-gate rval = -1;
20140Sstevel@tonic-gate if (resultp == NULL)
20150Sstevel@tonic-gate (void) mddserror(ep,
20160Sstevel@tonic-gate MDE_DS_COMMD_SEND_FAIL,
20170Sstevel@tonic-gate sp->setno, NULL, NULL,
20180Sstevel@tonic-gate sp->setname);
20190Sstevel@tonic-gate else {
20200Sstevel@tonic-gate (void) mdstealerror(ep,
20210Sstevel@tonic-gate &(resultp->mmr_ep));
20220Sstevel@tonic-gate if (mdisok(ep)) {
20230Sstevel@tonic-gate (void) mddserror(ep,
20240Sstevel@tonic-gate MDE_DS_COMMD_SEND_FAIL,
20250Sstevel@tonic-gate sp->setno, NULL, NULL,
20260Sstevel@tonic-gate sp->setname);
20270Sstevel@tonic-gate }
20280Sstevel@tonic-gate free_result(resultp);
20290Sstevel@tonic-gate }
20300Sstevel@tonic-gate goto out;
20310Sstevel@tonic-gate }
20320Sstevel@tonic-gate if (resultp)
20330Sstevel@tonic-gate free_result(resultp);
20340Sstevel@tonic-gate } else {
20350Sstevel@tonic-gate i = 0;
20360Sstevel@tonic-gate while (i < c.c_dbcnt) {
20370Sstevel@tonic-gate char *devname;
20380Sstevel@tonic-gate
20390Sstevel@tonic-gate c.c_id = i;
20400Sstevel@tonic-gate
20410Sstevel@tonic-gate /* Don't need devid info from this ioctl */
20420Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0;
20430Sstevel@tonic-gate c.c_locator.l_devid_flags = 0;
20440Sstevel@tonic-gate
20450Sstevel@tonic-gate if (metaioctl(MD_DB_GETDEV, &c,
20460Sstevel@tonic-gate &c.c_mde, NULL)) {
20470Sstevel@tonic-gate rval = mdstealerror(ep, &c.c_mde);
20480Sstevel@tonic-gate goto out;
20490Sstevel@tonic-gate }
20500Sstevel@tonic-gate
20510Sstevel@tonic-gate devname = splicename(&c.c_devname);
20525109Spetede
20535109Spetede if (strstr(devname, META_LONGDISKNAME_STR)
20545109Spetede != NULL) {
20555109Spetede Free(devname);
20565109Spetede devname = getlongname(&c, ep);
20575109Spetede if (devname == NULL) {
20585109Spetede return (-1);
20595109Spetede }
20605109Spetede }
20615109Spetede
20620Sstevel@tonic-gate if (strcmp(devname, np->bname) != 0) {
20630Sstevel@tonic-gate Free(devname);
20640Sstevel@tonic-gate i++;
20650Sstevel@tonic-gate continue;
20660Sstevel@tonic-gate }
20670Sstevel@tonic-gate Free(devname);
20680Sstevel@tonic-gate
20690Sstevel@tonic-gate /* Don't need devid info from this ioctl */
20700Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0;
20710Sstevel@tonic-gate c.c_locator.l_devid_flags = 0;
20720Sstevel@tonic-gate
20730Sstevel@tonic-gate if (metaioctl(MD_DB_DELDEV, &c,
20740Sstevel@tonic-gate &c.c_mde, NULL) != 0) {
20750Sstevel@tonic-gate rval = mdstealerror(ep, &c.c_mde);
20760Sstevel@tonic-gate goto out;
20770Sstevel@tonic-gate }
20780Sstevel@tonic-gate
20790Sstevel@tonic-gate /* Not incrementing "i" intentionally */
20800Sstevel@tonic-gate }
20810Sstevel@tonic-gate }
20820Sstevel@tonic-gate if (! metaislocalset(sp)) {
20830Sstevel@tonic-gate /* update the dbcnt and size in dd */
20840Sstevel@tonic-gate for (p = dd; p != NULL; p = p->dd_next) {
20850Sstevel@tonic-gate if (p->dd_dnp == np->drivenamep) {
20860Sstevel@tonic-gate p->dd_dbcnt = 0;
20870Sstevel@tonic-gate p->dd_dbsize = 0;
20880Sstevel@tonic-gate break;
20890Sstevel@tonic-gate }
20900Sstevel@tonic-gate }
20910Sstevel@tonic-gate
20920Sstevel@tonic-gate /*
20930Sstevel@tonic-gate * Slam a dummy master block and make it self
20940Sstevel@tonic-gate * identifying
20950Sstevel@tonic-gate */
20960Sstevel@tonic-gate if ((fd = open(np->rname, O_RDWR)) >= 0) {
20970Sstevel@tonic-gate meta_mkdummymaster(sp, fd, 16);
20980Sstevel@tonic-gate (void) close(fd);
20990Sstevel@tonic-gate }
21000Sstevel@tonic-gate }
21010Sstevel@tonic-gate }
21020Sstevel@tonic-gate out:
21030Sstevel@tonic-gate if (metaislocalset(sp)) {
21040Sstevel@tonic-gate /*
21050Sstevel@tonic-gate * Stop all the daemons if there are
21060Sstevel@tonic-gate * no more replicas so that the module can be
21070Sstevel@tonic-gate * unloaded.
21080Sstevel@tonic-gate */
21090Sstevel@tonic-gate if (rval == 0 && stop_svmdaemons == 1) {
21100Sstevel@tonic-gate char buf[MAXPATHLEN];
21110Sstevel@tonic-gate int i;
21120Sstevel@tonic-gate
21130Sstevel@tonic-gate for (i = 0; i < DAEMON_COUNT; i++) {
21140Sstevel@tonic-gate (void) snprintf(buf, MAXPATHLEN,
21155109Spetede "/usr/bin/pkill -%s -x %s",
21165109Spetede svmd_kill_list[i].svmd_kill_val,
21175109Spetede svmd_kill_list[i].svmd_name);
21180Sstevel@tonic-gate if (pclose(popen(buf, "w")) == -1)
21190Sstevel@tonic-gate md_perror(buf);
21200Sstevel@tonic-gate }
21210Sstevel@tonic-gate
21220Sstevel@tonic-gate if (meta_smf_disable(META_SMF_ALL, &status) == -1) {
21230Sstevel@tonic-gate mde_perror(&status, "");
21240Sstevel@tonic-gate mdclrerror(&status);
21250Sstevel@tonic-gate }
21260Sstevel@tonic-gate }
21270Sstevel@tonic-gate if (buildconf(sp, &status)) {
21280Sstevel@tonic-gate /* Don't mask any previous errors */
21290Sstevel@tonic-gate if (rval == 0)
21300Sstevel@tonic-gate rval = mdstealerror(ep, &status);
21310Sstevel@tonic-gate else
21320Sstevel@tonic-gate mdclrerror(&status);
21330Sstevel@tonic-gate return (rval);
21340Sstevel@tonic-gate }
21350Sstevel@tonic-gate
21360Sstevel@tonic-gate if (meta_db_patch(sysfilename, NULL, 0, &status)) {
21370Sstevel@tonic-gate /* Don't mask any previous errors */
21380Sstevel@tonic-gate if (rval == 0)
21390Sstevel@tonic-gate rval = mdstealerror(ep, &status);
21400Sstevel@tonic-gate else
21410Sstevel@tonic-gate mdclrerror(&status);
21420Sstevel@tonic-gate }
21430Sstevel@tonic-gate } else {
21440Sstevel@tonic-gate if (update_dbinfo_on_drives(sp, dd,
21450Sstevel@tonic-gate (force_option & MDFORCE_SET_LOCKED),
21460Sstevel@tonic-gate ((force_option & MDFORCE_LOCAL) |
21470Sstevel@tonic-gate (force_option & MDFORCE_DS)), &status)) {
21480Sstevel@tonic-gate /* Don't mask any previous errors */
21490Sstevel@tonic-gate if (rval == 0)
21500Sstevel@tonic-gate rval = mdstealerror(ep, &status);
21510Sstevel@tonic-gate else
21520Sstevel@tonic-gate mdclrerror(&status);
21530Sstevel@tonic-gate }
21540Sstevel@tonic-gate metafreedrivedesc(&dd);
21550Sstevel@tonic-gate }
21560Sstevel@tonic-gate if ((metaislocalset(sp)) || (!(MD_MNSET_DESC(sd)))) {
21570Sstevel@tonic-gate for (nlp = db_nlp; (nlp != NULL); nlp = nlp->next) {
21580Sstevel@tonic-gate meta_invalidate_name(nlp->namep);
21590Sstevel@tonic-gate }
21600Sstevel@tonic-gate }
21610Sstevel@tonic-gate return (rval);
21620Sstevel@tonic-gate }
21630Sstevel@tonic-gate
21640Sstevel@tonic-gate static md_replica_t *
metareplicaname(mdsetname_t * sp,int flags,struct mddb_config * c,md_error_t * ep)21650Sstevel@tonic-gate metareplicaname(
21660Sstevel@tonic-gate mdsetname_t *sp,
21670Sstevel@tonic-gate int flags,
21680Sstevel@tonic-gate struct mddb_config *c,
21690Sstevel@tonic-gate md_error_t *ep
21700Sstevel@tonic-gate )
21710Sstevel@tonic-gate {
21720Sstevel@tonic-gate md_replica_t *rp;
21730Sstevel@tonic-gate char *devname;
21740Sstevel@tonic-gate size_t sz;
21755109Spetede devid_nmlist_t *disklist = NULL;
21765109Spetede char *devid_str;
21770Sstevel@tonic-gate
21780Sstevel@tonic-gate /* allocate replicaname */
21790Sstevel@tonic-gate rp = Zalloc(sizeof (*rp));
21800Sstevel@tonic-gate
21810Sstevel@tonic-gate /* get device name */
21820Sstevel@tonic-gate devname = splicename(&c->c_devname);
21835109Spetede
21845109Spetede /*
21855109Spetede * Check if the device has a long name (>40 characters) and
21865109Spetede * if so then we have to use devids to get the device name.
21875109Spetede * If this cannot be done then we have to fail the request.
21885109Spetede */
21895109Spetede if (strstr(devname, META_LONGDISKNAME_STR) != NULL) {
21905109Spetede if (c->c_locator.l_devid != NULL) {
21915109Spetede if (meta_deviceid_to_nmlist("/dev/dsk",
21925109Spetede (ddi_devid_t)(uintptr_t)c->c_locator.l_devid,
21935109Spetede c->c_locator.l_minor_name, &disklist) != 0) {
21945109Spetede devid_str = devid_str_encode(
21955109Spetede (ddi_devid_t)(uintptr_t)
21965109Spetede c->c_locator.l_devid, NULL);
21975109Spetede (void) mderror(ep, MDE_MISSING_DEVID_DISK, "");
21985109Spetede mderrorextra(ep, devid_str);
21995109Spetede if (devid_str != NULL)
22005109Spetede devid_str_free(devid_str);
22015109Spetede Free(rp);
22025109Spetede Free(devname);
22035109Spetede return (NULL);
22045109Spetede }
22055109Spetede } else {
22065109Spetede (void) mderror(ep, MDE_NODEVID, "");
22075109Spetede Free(rp);
22085109Spetede Free(devname);
22095109Spetede return (NULL);
22105109Spetede }
22115109Spetede Free(devname);
22125109Spetede devname = disklist[0].devname;
22135109Spetede }
22145109Spetede
22150Sstevel@tonic-gate if (flags & PRINT_FAST) {
22161623Stw21770 if ((rp->r_namep = metaname_fast(&sp, devname,
22171623Stw21770 LOGICAL_DEVICE, ep)) == NULL) {
22180Sstevel@tonic-gate Free(devname);
22190Sstevel@tonic-gate Free(rp);
22200Sstevel@tonic-gate return (NULL);
22210Sstevel@tonic-gate }
22220Sstevel@tonic-gate } else {
22231623Stw21770 if ((rp->r_namep = metaname(&sp, devname,
22241623Stw21770 LOGICAL_DEVICE, ep)) == NULL) {
22250Sstevel@tonic-gate Free(devname);
22260Sstevel@tonic-gate Free(rp);
22270Sstevel@tonic-gate return (NULL);
22280Sstevel@tonic-gate }
22290Sstevel@tonic-gate }
22300Sstevel@tonic-gate Free(devname);
22310Sstevel@tonic-gate
22320Sstevel@tonic-gate /* make sure it's OK */
22330Sstevel@tonic-gate if ((! (flags & MD_BASICNAME_OK)) &&
22340Sstevel@tonic-gate (metachkcomp(rp->r_namep, ep) != 0)) {
22350Sstevel@tonic-gate Free(rp);
22360Sstevel@tonic-gate return (NULL);
22370Sstevel@tonic-gate }
22380Sstevel@tonic-gate
223962Sjeanm rp->r_blkno = (daddr_t)MD_DISKADDR_ERROR;
224062Sjeanm rp->r_nblk = (daddr_t)MD_DISKADDR_ERROR;
22410Sstevel@tonic-gate rp->r_flags = c->c_locator.l_flags | MDDB_F_NODEVID;
22420Sstevel@tonic-gate if (c->c_locator.l_devid_flags & MDDB_DEVID_VALID) {
224362Sjeanm sz = devid_sizeof((ddi_devid_t)(uintptr_t)
224462Sjeanm (c->c_locator.l_devid));
22450Sstevel@tonic-gate if ((rp->r_devid = (ddi_devid_t)malloc(sz)) ==
22460Sstevel@tonic-gate (ddi_devid_t)NULL) {
22470Sstevel@tonic-gate Free(rp);
22480Sstevel@tonic-gate return (NULL);
22490Sstevel@tonic-gate }
22500Sstevel@tonic-gate (void) memcpy((void *)rp->r_devid,
225162Sjeanm (void *)(uintptr_t)c->c_locator.l_devid, sz);
22520Sstevel@tonic-gate (void) strcpy(rp->r_minor_name, c->c_locator.l_minor_name);
22530Sstevel@tonic-gate rp->r_flags &= ~MDDB_F_NODEVID;
22540Sstevel@tonic-gate /* Overwrite dev derived from name with dev from devid */
22550Sstevel@tonic-gate rp->r_namep->dev = meta_expldev(c->c_locator.l_dev);
22560Sstevel@tonic-gate }
22570Sstevel@tonic-gate (void) strcpy(rp->r_driver_name, c->c_locator.l_driver);
22580Sstevel@tonic-gate
22590Sstevel@tonic-gate rp->r_blkno = c->c_locator.l_blkno;
22600Sstevel@tonic-gate if (c->c_dbend != 0)
22610Sstevel@tonic-gate rp->r_nblk = c->c_dbend - c->c_locator.l_blkno + 1;
22620Sstevel@tonic-gate
22630Sstevel@tonic-gate /* return replica */
22640Sstevel@tonic-gate return (rp);
22650Sstevel@tonic-gate }
22660Sstevel@tonic-gate
22670Sstevel@tonic-gate /*
22680Sstevel@tonic-gate * free replica list
22690Sstevel@tonic-gate */
22700Sstevel@tonic-gate void
metafreereplicalist(md_replicalist_t * rlp)22710Sstevel@tonic-gate metafreereplicalist(
22720Sstevel@tonic-gate md_replicalist_t *rlp
22730Sstevel@tonic-gate )
22740Sstevel@tonic-gate {
22750Sstevel@tonic-gate md_replicalist_t *rl = NULL;
22760Sstevel@tonic-gate
22770Sstevel@tonic-gate for (/* void */; (rlp != NULL); rlp = rl) {
22780Sstevel@tonic-gate rl = rlp->rl_next;
22790Sstevel@tonic-gate if (rlp->rl_repp->r_devid != (ddi_devid_t)0) {
22800Sstevel@tonic-gate free(rlp->rl_repp->r_devid);
22810Sstevel@tonic-gate }
22820Sstevel@tonic-gate Free(rlp->rl_repp);
22830Sstevel@tonic-gate Free(rlp);
22840Sstevel@tonic-gate }
22850Sstevel@tonic-gate }
22860Sstevel@tonic-gate
22870Sstevel@tonic-gate /*
22880Sstevel@tonic-gate * return list of all replicas in set
22890Sstevel@tonic-gate */
22900Sstevel@tonic-gate int
metareplicalist(mdsetname_t * sp,int flags,md_replicalist_t ** rlpp,md_error_t * ep)22910Sstevel@tonic-gate metareplicalist(
22920Sstevel@tonic-gate mdsetname_t *sp,
22930Sstevel@tonic-gate int flags,
22940Sstevel@tonic-gate md_replicalist_t **rlpp,
22950Sstevel@tonic-gate md_error_t *ep
22960Sstevel@tonic-gate )
22970Sstevel@tonic-gate {
22980Sstevel@tonic-gate md_replicalist_t **tail = rlpp;
22990Sstevel@tonic-gate int count = 0;
23000Sstevel@tonic-gate struct mddb_config c;
23010Sstevel@tonic-gate int i;
23020Sstevel@tonic-gate char *devid;
23030Sstevel@tonic-gate
23040Sstevel@tonic-gate /* for each replica */
23050Sstevel@tonic-gate i = 0;
23060Sstevel@tonic-gate do {
23070Sstevel@tonic-gate md_replica_t *rp;
23080Sstevel@tonic-gate
23090Sstevel@tonic-gate /* get next replica */
23100Sstevel@tonic-gate (void) memset(&c, 0, sizeof (c));
23110Sstevel@tonic-gate c.c_id = i;
23120Sstevel@tonic-gate c.c_setno = sp->setno;
23130Sstevel@tonic-gate
23140Sstevel@tonic-gate c.c_locator.l_devid_flags = MDDB_DEVID_GETSZ;
23150Sstevel@tonic-gate if (metaioctl(MD_DB_ENDDEV, &c, &c.c_mde, NULL) != 0) {
23160Sstevel@tonic-gate if (mdismddberror(&c.c_mde, MDE_DB_INVALID)) {
23170Sstevel@tonic-gate mdclrerror(&c.c_mde);
23180Sstevel@tonic-gate break; /* handle none at all */
23190Sstevel@tonic-gate }
23200Sstevel@tonic-gate (void) mdstealerror(ep, &c.c_mde);
23210Sstevel@tonic-gate goto out;
23220Sstevel@tonic-gate }
23230Sstevel@tonic-gate
23240Sstevel@tonic-gate if (c.c_locator.l_devid_flags & MDDB_DEVID_SZ) {
23250Sstevel@tonic-gate if ((devid = malloc(c.c_locator.l_devid_sz)) == NULL) {
23260Sstevel@tonic-gate (void) mdsyserror(ep, ENOMEM, META_DBCONF);
23270Sstevel@tonic-gate goto out;
23280Sstevel@tonic-gate }
23290Sstevel@tonic-gate c.c_locator.l_devid = (uintptr_t)devid;
23300Sstevel@tonic-gate /*
23310Sstevel@tonic-gate * Turn on space and sz flags since 'sz' amount of
23320Sstevel@tonic-gate * space has been alloc'd.
23330Sstevel@tonic-gate */
23340Sstevel@tonic-gate c.c_locator.l_devid_flags =
23355109Spetede MDDB_DEVID_SPACE | MDDB_DEVID_SZ;
23360Sstevel@tonic-gate }
23370Sstevel@tonic-gate
23380Sstevel@tonic-gate if (metaioctl(MD_DB_ENDDEV, &c, &c.c_mde, NULL) != 0) {
23390Sstevel@tonic-gate if (mdismddberror(&c.c_mde, MDE_DB_INVALID)) {
23400Sstevel@tonic-gate mdclrerror(&c.c_mde);
23410Sstevel@tonic-gate break; /* handle none at all */
23420Sstevel@tonic-gate }
23430Sstevel@tonic-gate (void) mdstealerror(ep, &c.c_mde);
23440Sstevel@tonic-gate goto out;
23450Sstevel@tonic-gate }
23460Sstevel@tonic-gate
23470Sstevel@tonic-gate /*
23480Sstevel@tonic-gate * Paranoid check - shouldn't happen, but is left as
23490Sstevel@tonic-gate * a place holder for changes that will be needed after
23500Sstevel@tonic-gate * dynamic reconfiguration changes are added to SVM (to
23510Sstevel@tonic-gate * support movement of disks at any point in time).
23520Sstevel@tonic-gate */
23530Sstevel@tonic-gate if (c.c_locator.l_devid_flags & MDDB_DEVID_NOSPACE) {
23540Sstevel@tonic-gate (void) fprintf(stderr,
23550Sstevel@tonic-gate dgettext(TEXT_DOMAIN,
23565109Spetede "Error: Relocation Information "
23575109Spetede "(drvnm=%s, mnum=0x%lx) \n"
23585109Spetede "relocation information size changed - \n"
23595109Spetede "rerun command\n"),
23600Sstevel@tonic-gate c.c_locator.l_driver, c.c_locator.l_mnum);
23610Sstevel@tonic-gate (void) mderror(ep, MDE_DEVID_TOOBIG, NULL);
23620Sstevel@tonic-gate goto out;
23630Sstevel@tonic-gate }
23640Sstevel@tonic-gate
23650Sstevel@tonic-gate if (c.c_dbcnt == 0)
23660Sstevel@tonic-gate break; /* handle none at all */
23670Sstevel@tonic-gate
23680Sstevel@tonic-gate /* get info */
23690Sstevel@tonic-gate if ((rp = metareplicaname(sp, flags, &c, ep)) == NULL)
23700Sstevel@tonic-gate goto out;
23710Sstevel@tonic-gate
23720Sstevel@tonic-gate /* append to list */
23730Sstevel@tonic-gate *tail = Zalloc(sizeof (**tail));
23740Sstevel@tonic-gate (*tail)->rl_repp = rp;
23750Sstevel@tonic-gate tail = &(*tail)->rl_next;
23760Sstevel@tonic-gate ++count;
23770Sstevel@tonic-gate
23780Sstevel@tonic-gate if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) {
23790Sstevel@tonic-gate free(devid);
23800Sstevel@tonic-gate c.c_locator.l_devid_flags = 0;
23810Sstevel@tonic-gate }
23820Sstevel@tonic-gate
23830Sstevel@tonic-gate } while (++i < c.c_dbcnt);
23840Sstevel@tonic-gate
23850Sstevel@tonic-gate if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) {
23860Sstevel@tonic-gate free(devid);
23870Sstevel@tonic-gate }
23880Sstevel@tonic-gate
23890Sstevel@tonic-gate /* return count */
23900Sstevel@tonic-gate return (count);
23910Sstevel@tonic-gate
23920Sstevel@tonic-gate /* cleanup, return error */
23930Sstevel@tonic-gate out:
23940Sstevel@tonic-gate if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) {
23950Sstevel@tonic-gate free(devid);
23960Sstevel@tonic-gate }
23970Sstevel@tonic-gate metafreereplicalist(*rlpp);
23980Sstevel@tonic-gate *rlpp = NULL;
23990Sstevel@tonic-gate return (-1);
24000Sstevel@tonic-gate }
24010Sstevel@tonic-gate
24020Sstevel@tonic-gate /*
24030Sstevel@tonic-gate * meta_sync_db_locations - get list of replicas from kernel and write
24040Sstevel@tonic-gate * out to mddb.cf and md.conf. 'Syncs up' the replica list in
24050Sstevel@tonic-gate * the kernel with the replica list in the conf files.
24060Sstevel@tonic-gate *
24070Sstevel@tonic-gate */
24080Sstevel@tonic-gate void
meta_sync_db_locations(mdsetname_t * sp,md_error_t * ep)24090Sstevel@tonic-gate meta_sync_db_locations(
24100Sstevel@tonic-gate mdsetname_t *sp,
24110Sstevel@tonic-gate md_error_t *ep
24120Sstevel@tonic-gate )
24130Sstevel@tonic-gate {
24140Sstevel@tonic-gate char *sname = 0; /* system file name */
24150Sstevel@tonic-gate char *cname = 0; /* config file name */
24160Sstevel@tonic-gate
24170Sstevel@tonic-gate if (!metaislocalset(sp))
24180Sstevel@tonic-gate return;
24190Sstevel@tonic-gate
24200Sstevel@tonic-gate /* Updates backup of configuration file (aka mddb.cf) */
24210Sstevel@tonic-gate if (buildconf(sp, ep) != 0)
24220Sstevel@tonic-gate return;
24230Sstevel@tonic-gate
24240Sstevel@tonic-gate /* Updates system configuration file (aka md.conf) */
24250Sstevel@tonic-gate (void) meta_db_patch(sname, cname, 0, ep);
24260Sstevel@tonic-gate }
24270Sstevel@tonic-gate
24280Sstevel@tonic-gate /*
24290Sstevel@tonic-gate * setup_db_locations - parse the mddb.cf file and
24300Sstevel@tonic-gate * tells the driver which db locations to use.
24310Sstevel@tonic-gate */
24320Sstevel@tonic-gate int
meta_setup_db_locations(md_error_t * ep)24330Sstevel@tonic-gate meta_setup_db_locations(
24340Sstevel@tonic-gate md_error_t *ep
24350Sstevel@tonic-gate )
24360Sstevel@tonic-gate {
24370Sstevel@tonic-gate mddb_config_t c;
24380Sstevel@tonic-gate FILE *fp;
24390Sstevel@tonic-gate char inbuff[1024];
24400Sstevel@tonic-gate char *buff;
24410Sstevel@tonic-gate uint_t i;
24420Sstevel@tonic-gate size_t sz;
24430Sstevel@tonic-gate int rval = 0;
24440Sstevel@tonic-gate char *devidp;
24450Sstevel@tonic-gate uint_t devid_size;
24460Sstevel@tonic-gate char *minor_name = NULL;
24470Sstevel@tonic-gate ddi_devid_t devid_decode;
24480Sstevel@tonic-gate int checksum;
24490Sstevel@tonic-gate
24500Sstevel@tonic-gate /* do mddb.cf file */
24510Sstevel@tonic-gate (void) memset(&c, '\0', sizeof (c));
24520Sstevel@tonic-gate if ((fp = fopen(META_DBCONF, "r")) == NULL) {
24530Sstevel@tonic-gate if (errno != ENOENT)
24540Sstevel@tonic-gate return (mdsyserror(ep, errno, META_DBCONF));
24550Sstevel@tonic-gate }
24560Sstevel@tonic-gate while ((fp != NULL) && ((buff = fgets(inbuff, (sizeof (inbuff) - 1),
24570Sstevel@tonic-gate fp)) != NULL)) {
24580Sstevel@tonic-gate
24590Sstevel@tonic-gate /* ignore comments */
24600Sstevel@tonic-gate if (*buff == '#')
24610Sstevel@tonic-gate continue;
24620Sstevel@tonic-gate
24630Sstevel@tonic-gate /* parse locator */
24640Sstevel@tonic-gate (void) memset(&c, 0, sizeof (c));
24650Sstevel@tonic-gate c.c_setno = MD_LOCAL_SET;
24660Sstevel@tonic-gate i = strcspn(buff, " \t");
24670Sstevel@tonic-gate if (i > sizeof (c.c_locator.l_driver))
24680Sstevel@tonic-gate i = sizeof (c.c_locator.l_driver);
24690Sstevel@tonic-gate (void) strncpy(c.c_locator.l_driver, buff, i);
24700Sstevel@tonic-gate buff += i;
24710Sstevel@tonic-gate c.c_locator.l_dev =
24720Sstevel@tonic-gate makedev((major_t)0, (minor_t)strtol(buff, &buff, 10));
24730Sstevel@tonic-gate c.c_locator.l_blkno = (daddr_t)strtol(buff, &buff, 10);
24740Sstevel@tonic-gate c.c_locator.l_mnum = minor(c.c_locator.l_dev);
24750Sstevel@tonic-gate
24760Sstevel@tonic-gate /* parse out devid */
24770Sstevel@tonic-gate while (isspace((int)(*buff)))
24780Sstevel@tonic-gate buff += 1;
24790Sstevel@tonic-gate i = strcspn(buff, " \t");
24800Sstevel@tonic-gate if ((devidp = (char *)malloc(i+1)) == NULL)
24810Sstevel@tonic-gate return (mdsyserror(ep, ENOMEM, META_DBCONF));
24820Sstevel@tonic-gate
24830Sstevel@tonic-gate (void) strncpy(devidp, buff, i);
24840Sstevel@tonic-gate devidp[i] = '\0';
24850Sstevel@tonic-gate if (devid_str_decode(devidp, &devid_decode,
24860Sstevel@tonic-gate &minor_name) == -1) {
24870Sstevel@tonic-gate free(devidp);
24880Sstevel@tonic-gate continue;
24890Sstevel@tonic-gate }
24900Sstevel@tonic-gate
24910Sstevel@tonic-gate /* Conf file must have minor name associated with devid */
24920Sstevel@tonic-gate if (minor_name == NULL) {
24930Sstevel@tonic-gate free(devidp);
24940Sstevel@tonic-gate devid_free(devid_decode);
24950Sstevel@tonic-gate continue;
24960Sstevel@tonic-gate }
24970Sstevel@tonic-gate
24980Sstevel@tonic-gate sz = devid_sizeof(devid_decode);
24990Sstevel@tonic-gate /* Copy to devid size buffer that ioctl expects */
25000Sstevel@tonic-gate if ((c.c_locator.l_devid = (uintptr_t)malloc(sz)) == NULL) {
25010Sstevel@tonic-gate devid_free(devid_decode);
25020Sstevel@tonic-gate free(minor_name);
25030Sstevel@tonic-gate free(devidp);
25040Sstevel@tonic-gate return (mdsyserror(ep, ENOMEM, META_DBCONF));
25050Sstevel@tonic-gate }
25060Sstevel@tonic-gate
250762Sjeanm (void) memcpy((void *)(uintptr_t)c.c_locator.l_devid,
25080Sstevel@tonic-gate (void *)devid_decode, sz);
25090Sstevel@tonic-gate
25100Sstevel@tonic-gate devid_free(devid_decode);
25110Sstevel@tonic-gate
25120Sstevel@tonic-gate if (strlen(minor_name) > MDDB_MINOR_NAME_MAX) {
25130Sstevel@tonic-gate free(minor_name);
25140Sstevel@tonic-gate free(devidp);
251562Sjeanm free((void *)(uintptr_t)c.c_locator.l_devid);
25160Sstevel@tonic-gate return (mdsyserror(ep, ENOMEM, META_DBCONF));
25170Sstevel@tonic-gate }
25180Sstevel@tonic-gate (void) strcpy(c.c_locator.l_minor_name, minor_name);
25190Sstevel@tonic-gate free(minor_name);
25200Sstevel@tonic-gate c.c_locator.l_devid_flags = MDDB_DEVID_VALID |
25215109Spetede MDDB_DEVID_SPACE | MDDB_DEVID_SZ;
25220Sstevel@tonic-gate c.c_locator.l_devid_sz = sz;
25230Sstevel@tonic-gate
25240Sstevel@tonic-gate devid_size = strlen(devidp);
25250Sstevel@tonic-gate buff += devid_size;
25260Sstevel@tonic-gate
25270Sstevel@tonic-gate checksum = strtol(buff, &buff, 10);
25280Sstevel@tonic-gate for (i = 0; c.c_locator.l_driver[i] != 0; i++)
25290Sstevel@tonic-gate checksum += c.c_locator.l_driver[i];
25300Sstevel@tonic-gate for (i = 0; i < devid_size; i++) {
25310Sstevel@tonic-gate checksum += devidp[i];
25320Sstevel@tonic-gate }
25330Sstevel@tonic-gate free(devidp);
25340Sstevel@tonic-gate
25350Sstevel@tonic-gate checksum += minor(c.c_locator.l_dev);
25360Sstevel@tonic-gate checksum += c.c_locator.l_blkno;
25370Sstevel@tonic-gate if (checksum != 42) {
25380Sstevel@tonic-gate /* overwritten later for more serious problems */
25390Sstevel@tonic-gate rval = mderror(ep, MDE_MDDB_CKSUM, META_DBCONF);
254062Sjeanm free((void *)(uintptr_t)c.c_locator.l_devid);
25410Sstevel@tonic-gate continue;
25420Sstevel@tonic-gate }
25430Sstevel@tonic-gate c.c_locator.l_flags = 0;
25440Sstevel@tonic-gate
25450Sstevel@tonic-gate /* use db location */
25460Sstevel@tonic-gate if (metaioctl(MD_DB_USEDEV, &c, &c.c_mde, NULL) != 0) {
254762Sjeanm free((void *)(uintptr_t)c.c_locator.l_devid);
25480Sstevel@tonic-gate return (mdstealerror(ep, &c.c_mde));
25490Sstevel@tonic-gate }
25500Sstevel@tonic-gate
25510Sstevel@tonic-gate /* free up devid if in use */
255262Sjeanm free((void *)(uintptr_t)c.c_locator.l_devid);
25530Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0;
25540Sstevel@tonic-gate c.c_locator.l_devid_flags = 0;
25550Sstevel@tonic-gate }
25560Sstevel@tonic-gate if ((fp) && (fclose(fp) != 0))
25570Sstevel@tonic-gate return (mdsyserror(ep, errno, META_DBCONF));
25580Sstevel@tonic-gate
25590Sstevel@tonic-gate /* check for stale database */
25600Sstevel@tonic-gate (void) memset((char *)&c, 0, sizeof (struct mddb_config));
25610Sstevel@tonic-gate c.c_id = 0;
25620Sstevel@tonic-gate c.c_setno = MD_LOCAL_SET;
25630Sstevel@tonic-gate
25645109Spetede /*
25655109Spetede * While we do not need the devid here we may need to
25665109Spetede * know if devid's are being used by the kernel for
25675109Spetede * the replicas. This is because under some circumstances
25685109Spetede * we can only manipulate the SVM configuration if the
25695109Spetede * kernel is using devid's.
25705109Spetede */
25710Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0;
25725109Spetede c.c_locator.l_devid_flags = MDDB_DEVID_GETSZ;
25735109Spetede c.c_locator.l_devid_sz = 0;
25740Sstevel@tonic-gate
25750Sstevel@tonic-gate if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) {
25760Sstevel@tonic-gate if (! mdismddberror(&c.c_mde, MDE_DB_INVALID))
25770Sstevel@tonic-gate return (mdstealerror(ep, &c.c_mde));
25780Sstevel@tonic-gate mdclrerror(&c.c_mde);
25790Sstevel@tonic-gate }
25800Sstevel@tonic-gate
25810Sstevel@tonic-gate if (c.c_flags & MDDB_C_STALE)
25820Sstevel@tonic-gate return (mdmddberror(ep, MDE_DB_STALE, NODEV32, MD_LOCAL_SET,
25830Sstevel@tonic-gate 0, NULL));
25840Sstevel@tonic-gate
25855109Spetede if (c.c_locator.l_devid_sz != 0) {
25865109Spetede /*
25875109Spetede * Devid's are being used to track the replicas because
25885109Spetede * there is space for a devid.
25895109Spetede */
25905109Spetede devid_in_use = TRUE;
25915109Spetede }
25925109Spetede
25930Sstevel@tonic-gate /* success */
25940Sstevel@tonic-gate return (rval);
25950Sstevel@tonic-gate }
25960Sstevel@tonic-gate
25970Sstevel@tonic-gate /*
25980Sstevel@tonic-gate * meta_db_minreplica - returns the minimum size replica currently in use.
25990Sstevel@tonic-gate */
26000Sstevel@tonic-gate daddr_t
meta_db_minreplica(mdsetname_t * sp,md_error_t * ep)26010Sstevel@tonic-gate meta_db_minreplica(
26020Sstevel@tonic-gate mdsetname_t *sp,
26030Sstevel@tonic-gate md_error_t *ep
26040Sstevel@tonic-gate )
26050Sstevel@tonic-gate {
26060Sstevel@tonic-gate md_replica_t *r;
26070Sstevel@tonic-gate md_replicalist_t *rl, *rlp = NULL;
26080Sstevel@tonic-gate daddr_t nblks = 0;
26090Sstevel@tonic-gate
26100Sstevel@tonic-gate if (metareplicalist(sp, (MD_BASICNAME_OK | PRINT_FAST), &rlp, ep) < 0)
26110Sstevel@tonic-gate return (-1);
26120Sstevel@tonic-gate
26130Sstevel@tonic-gate if (rlp == NULL)
26140Sstevel@tonic-gate return (-1);
26150Sstevel@tonic-gate
26160Sstevel@tonic-gate /* find the smallest existing replica */
26170Sstevel@tonic-gate for (rl = rlp; rl != NULL; rl = rl->rl_next) {
26180Sstevel@tonic-gate r = rl->rl_repp;
26190Sstevel@tonic-gate nblks = ((nblks == 0) ? r->r_nblk : min(r->r_nblk, nblks));
26200Sstevel@tonic-gate }
26210Sstevel@tonic-gate
26220Sstevel@tonic-gate metafreereplicalist(rlp);
26230Sstevel@tonic-gate return (nblks);
26240Sstevel@tonic-gate }
26250Sstevel@tonic-gate
26260Sstevel@tonic-gate /*
26270Sstevel@tonic-gate * meta_get_replica_names
26280Sstevel@tonic-gate * returns an mdnamelist_t of replica slices
26290Sstevel@tonic-gate */
26300Sstevel@tonic-gate /*ARGSUSED*/
26310Sstevel@tonic-gate int
meta_get_replica_names(mdsetname_t * sp,mdnamelist_t ** nlpp,int options,md_error_t * ep)26320Sstevel@tonic-gate meta_get_replica_names(
26330Sstevel@tonic-gate mdsetname_t *sp,
26340Sstevel@tonic-gate mdnamelist_t **nlpp,
26350Sstevel@tonic-gate int options,
26360Sstevel@tonic-gate md_error_t *ep
26370Sstevel@tonic-gate )
26380Sstevel@tonic-gate {
26390Sstevel@tonic-gate md_replicalist_t *rlp = NULL;
26400Sstevel@tonic-gate md_replicalist_t *rl;
26410Sstevel@tonic-gate mdnamelist_t **tailpp = nlpp;
26420Sstevel@tonic-gate int cnt = 0;
26430Sstevel@tonic-gate
26440Sstevel@tonic-gate assert(nlpp != NULL);
26450Sstevel@tonic-gate
26460Sstevel@tonic-gate if (!metaislocalset(sp))
26470Sstevel@tonic-gate goto out;
26480Sstevel@tonic-gate
26490Sstevel@tonic-gate /* get replicas */
26500Sstevel@tonic-gate if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0) {
26510Sstevel@tonic-gate cnt = -1;
26520Sstevel@tonic-gate goto out;
26530Sstevel@tonic-gate }
26540Sstevel@tonic-gate
26550Sstevel@tonic-gate /* build name list */
26560Sstevel@tonic-gate for (rl = rlp; (rl != NULL); rl = rl->rl_next) {
26570Sstevel@tonic-gate /*
26580Sstevel@tonic-gate * Add the name struct to the end of the
26590Sstevel@tonic-gate * namelist but keep a pointer to the last
26600Sstevel@tonic-gate * element so that we don't incur the overhead
26610Sstevel@tonic-gate * of traversing the list each time
26620Sstevel@tonic-gate */
26630Sstevel@tonic-gate tailpp = meta_namelist_append_wrapper(
26645109Spetede tailpp, rl->rl_repp->r_namep);
26650Sstevel@tonic-gate ++cnt;
26660Sstevel@tonic-gate }
26670Sstevel@tonic-gate
26680Sstevel@tonic-gate /* cleanup, return count or error */
26690Sstevel@tonic-gate out:
26700Sstevel@tonic-gate metafreereplicalist(rlp);
26710Sstevel@tonic-gate return (cnt);
26720Sstevel@tonic-gate }
2673