1*0Sstevel@tonic-gate /* 2*0Sstevel@tonic-gate * CDDL HEADER START 3*0Sstevel@tonic-gate * 4*0Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*0Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*0Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*0Sstevel@tonic-gate * with the License. 8*0Sstevel@tonic-gate * 9*0Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*0Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*0Sstevel@tonic-gate * See the License for the specific language governing permissions 12*0Sstevel@tonic-gate * and limitations under the License. 13*0Sstevel@tonic-gate * 14*0Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*0Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*0Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*0Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*0Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*0Sstevel@tonic-gate * 20*0Sstevel@tonic-gate * CDDL HEADER END 21*0Sstevel@tonic-gate */ 22*0Sstevel@tonic-gate /* 23*0Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*0Sstevel@tonic-gate * Use is subject to license terms. 25*0Sstevel@tonic-gate */ 26*0Sstevel@tonic-gate 27*0Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 28*0Sstevel@tonic-gate 29*0Sstevel@tonic-gate /* 30*0Sstevel@tonic-gate * Just in case we're not in a build environment, make sure that 31*0Sstevel@tonic-gate * TEXT_DOMAIN gets set to something. 32*0Sstevel@tonic-gate */ 33*0Sstevel@tonic-gate #if !defined(TEXT_DOMAIN) 34*0Sstevel@tonic-gate #define TEXT_DOMAIN "SYS_TEST" 35*0Sstevel@tonic-gate #endif 36*0Sstevel@tonic-gate 37*0Sstevel@tonic-gate /* 38*0Sstevel@tonic-gate * Metadevice database interfaces. 39*0Sstevel@tonic-gate */ 40*0Sstevel@tonic-gate 41*0Sstevel@tonic-gate #define MDDB 42*0Sstevel@tonic-gate 43*0Sstevel@tonic-gate #include <meta.h> 44*0Sstevel@tonic-gate #include <sys/lvm/md_mddb.h> 45*0Sstevel@tonic-gate #include <sys/lvm/md_crc.h> 46*0Sstevel@tonic-gate #include <sys/lvm/mdio.h> 47*0Sstevel@tonic-gate #include <string.h> 48*0Sstevel@tonic-gate #include <strings.h> 49*0Sstevel@tonic-gate #include <ctype.h> 50*0Sstevel@tonic-gate 51*0Sstevel@tonic-gate struct svm_daemon { 52*0Sstevel@tonic-gate char *svmd_name; 53*0Sstevel@tonic-gate char *svmd_kill_val; 54*0Sstevel@tonic-gate }; 55*0Sstevel@tonic-gate 56*0Sstevel@tonic-gate struct svm_daemon svmd_kill_list[] = { 57*0Sstevel@tonic-gate {"mdmonitord", "HUP"}, 58*0Sstevel@tonic-gate {"mddoors", "KILL"}, 59*0Sstevel@tonic-gate }; 60*0Sstevel@tonic-gate 61*0Sstevel@tonic-gate #define DAEMON_COUNT (sizeof (svmd_kill_list)/ sizeof (struct svm_daemon)) 62*0Sstevel@tonic-gate #define MDMONITORD "/usr/sbin/mdmonitord" 63*0Sstevel@tonic-gate 64*0Sstevel@tonic-gate extern int procsigs(int block, sigset_t *oldsigs, md_error_t *ep); 65*0Sstevel@tonic-gate 66*0Sstevel@tonic-gate /* 67*0Sstevel@tonic-gate * meta_get_lb_inittime sends a request for the lb_inittime to the kernel 68*0Sstevel@tonic-gate */ 69*0Sstevel@tonic-gate md_timeval32_t 70*0Sstevel@tonic-gate meta_get_lb_inittime( 71*0Sstevel@tonic-gate mdsetname_t *sp, 72*0Sstevel@tonic-gate md_error_t *ep 73*0Sstevel@tonic-gate ) 74*0Sstevel@tonic-gate { 75*0Sstevel@tonic-gate mddb_config_t c; 76*0Sstevel@tonic-gate 77*0Sstevel@tonic-gate (void) memset(&c, 0, sizeof (c)); 78*0Sstevel@tonic-gate 79*0Sstevel@tonic-gate /* Fill in setno, setname, and sideno */ 80*0Sstevel@tonic-gate c.c_setno = sp->setno; 81*0Sstevel@tonic-gate 82*0Sstevel@tonic-gate if (metaioctl(MD_DB_LBINITTIME, &c, &c.c_mde, NULL) != 0) { 83*0Sstevel@tonic-gate (void) mdstealerror(ep, &c.c_mde); 84*0Sstevel@tonic-gate } 85*0Sstevel@tonic-gate 86*0Sstevel@tonic-gate return (c.c_timestamp); 87*0Sstevel@tonic-gate } 88*0Sstevel@tonic-gate 89*0Sstevel@tonic-gate /* 90*0Sstevel@tonic-gate * mkmasterblks writes out the master blocks of the mddb to the replica. 91*0Sstevel@tonic-gate * 92*0Sstevel@tonic-gate * In a MN diskset, this is called by the node that is adding this replica 93*0Sstevel@tonic-gate * to the diskset. 94*0Sstevel@tonic-gate */ 95*0Sstevel@tonic-gate 96*0Sstevel@tonic-gate #define MDDB_VERIFY_SIZE 8192 97*0Sstevel@tonic-gate 98*0Sstevel@tonic-gate static int 99*0Sstevel@tonic-gate mkmasterblks( 100*0Sstevel@tonic-gate mdsetname_t *sp, 101*0Sstevel@tonic-gate mdname_t *np, 102*0Sstevel@tonic-gate int fd, 103*0Sstevel@tonic-gate daddr_t firstblk, 104*0Sstevel@tonic-gate int dbsize, 105*0Sstevel@tonic-gate md_timeval32_t inittime, 106*0Sstevel@tonic-gate md_error_t *ep 107*0Sstevel@tonic-gate ) 108*0Sstevel@tonic-gate { 109*0Sstevel@tonic-gate int consecutive; 110*0Sstevel@tonic-gate md_timeval32_t tp; 111*0Sstevel@tonic-gate struct mddb_mb *mb; 112*0Sstevel@tonic-gate char *buffer; 113*0Sstevel@tonic-gate int iosize; 114*0Sstevel@tonic-gate md_set_desc *sd; 115*0Sstevel@tonic-gate int mn_set = 0; 116*0Sstevel@tonic-gate daddr_t startblk; 117*0Sstevel@tonic-gate int cnt; 118*0Sstevel@tonic-gate ddi_devid_t devid; 119*0Sstevel@tonic-gate 120*0Sstevel@tonic-gate if (! metaislocalset(sp)) { 121*0Sstevel@tonic-gate if ((sd = metaget_setdesc(sp, ep)) == NULL) 122*0Sstevel@tonic-gate return (-1); 123*0Sstevel@tonic-gate 124*0Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) { 125*0Sstevel@tonic-gate mn_set = 1; /* Used later */ 126*0Sstevel@tonic-gate } 127*0Sstevel@tonic-gate } 128*0Sstevel@tonic-gate 129*0Sstevel@tonic-gate /* 130*0Sstevel@tonic-gate * Loop to verify the entire mddb region on disk is read/writable. 131*0Sstevel@tonic-gate * buffer is used to write/read in at most MDDB_VERIFY_SIZE block 132*0Sstevel@tonic-gate * chunks. 133*0Sstevel@tonic-gate * 134*0Sstevel@tonic-gate * A side-effect of this loop is to zero out the entire mddb region 135*0Sstevel@tonic-gate */ 136*0Sstevel@tonic-gate if ((buffer = Zalloc(MDDB_VERIFY_SIZE * DEV_BSIZE)) == NULL) 137*0Sstevel@tonic-gate return (mdsyserror(ep, ENOMEM, np->rname)); 138*0Sstevel@tonic-gate 139*0Sstevel@tonic-gate startblk = firstblk; 140*0Sstevel@tonic-gate for (cnt = dbsize; cnt > 0; cnt -= consecutive) { 141*0Sstevel@tonic-gate 142*0Sstevel@tonic-gate if (cnt > MDDB_VERIFY_SIZE) 143*0Sstevel@tonic-gate consecutive = MDDB_VERIFY_SIZE; 144*0Sstevel@tonic-gate else 145*0Sstevel@tonic-gate consecutive = cnt; 146*0Sstevel@tonic-gate 147*0Sstevel@tonic-gate if (lseek(fd, (off_t)(startblk * DEV_BSIZE), SEEK_SET) < 0) { 148*0Sstevel@tonic-gate Free(buffer); 149*0Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname)); 150*0Sstevel@tonic-gate } 151*0Sstevel@tonic-gate 152*0Sstevel@tonic-gate iosize = DEV_BSIZE * consecutive; 153*0Sstevel@tonic-gate if (write(fd, buffer, iosize) != iosize) { 154*0Sstevel@tonic-gate Free(buffer); 155*0Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname)); 156*0Sstevel@tonic-gate } 157*0Sstevel@tonic-gate 158*0Sstevel@tonic-gate if (lseek(fd, (off_t)(startblk * DEV_BSIZE), SEEK_SET) < 0) { 159*0Sstevel@tonic-gate Free(buffer); 160*0Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname)); 161*0Sstevel@tonic-gate } 162*0Sstevel@tonic-gate 163*0Sstevel@tonic-gate if (read(fd, buffer, iosize) != iosize) { 164*0Sstevel@tonic-gate Free(buffer); 165*0Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname)); 166*0Sstevel@tonic-gate } 167*0Sstevel@tonic-gate 168*0Sstevel@tonic-gate startblk += consecutive; 169*0Sstevel@tonic-gate } 170*0Sstevel@tonic-gate 171*0Sstevel@tonic-gate Free(buffer); 172*0Sstevel@tonic-gate if ((mb = Zalloc(DEV_BSIZE)) == NULL) 173*0Sstevel@tonic-gate return (mdsyserror(ep, ENOMEM, np->rname)); 174*0Sstevel@tonic-gate 175*0Sstevel@tonic-gate if (meta_gettimeofday(&tp) == -1) { 176*0Sstevel@tonic-gate Free(mb); 177*0Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname)); 178*0Sstevel@tonic-gate } 179*0Sstevel@tonic-gate 180*0Sstevel@tonic-gate mb->mb_magic = MDDB_MAGIC_MB; 181*0Sstevel@tonic-gate /* 182*0Sstevel@tonic-gate * If a MN diskset, set master block revision for a MN set. 183*0Sstevel@tonic-gate * Even though the master block structure is no different 184*0Sstevel@tonic-gate * for a MN set, setting the revision field to a different 185*0Sstevel@tonic-gate * number keeps any pre-MN_diskset code from accessing 186*0Sstevel@tonic-gate * this diskset. It also allows for an early determination 187*0Sstevel@tonic-gate * of a MN diskset when reading in from disk so that the 188*0Sstevel@tonic-gate * proper size locator block and locator names structure 189*0Sstevel@tonic-gate * can be read in thus saving time on diskset startup. 190*0Sstevel@tonic-gate */ 191*0Sstevel@tonic-gate if (mn_set) 192*0Sstevel@tonic-gate mb->mb_revision = MDDB_REV_MNMB; 193*0Sstevel@tonic-gate else 194*0Sstevel@tonic-gate mb->mb_revision = MDDB_REV_MB; 195*0Sstevel@tonic-gate mb->mb_timestamp = tp; 196*0Sstevel@tonic-gate mb->mb_setno = sp->setno; 197*0Sstevel@tonic-gate mb->mb_blkcnt = dbsize - 1; 198*0Sstevel@tonic-gate mb->mb_blkno = firstblk; 199*0Sstevel@tonic-gate mb->mb_nextblk = 0; 200*0Sstevel@tonic-gate 201*0Sstevel@tonic-gate mb->mb_blkmap.m_firstblk = firstblk + 1; 202*0Sstevel@tonic-gate mb->mb_blkmap.m_consecutive = dbsize - 1; 203*0Sstevel@tonic-gate if (! metaislocalset(sp)) { 204*0Sstevel@tonic-gate mb->mb_setcreatetime = inittime; 205*0Sstevel@tonic-gate } 206*0Sstevel@tonic-gate 207*0Sstevel@tonic-gate /* 208*0Sstevel@tonic-gate * We try to save the disks device ID into the remaining bytes in 209*0Sstevel@tonic-gate * the master block. The saved devid is used to provide a mapping 210*0Sstevel@tonic-gate * between this disk's devid and the devid stored into the master 211*0Sstevel@tonic-gate * block. This allows the disk image to be self-identifying 212*0Sstevel@tonic-gate * if it gets copied (e.g. SNDR, True Copy, etc.). This is used 213*0Sstevel@tonic-gate * when we try to import these disks on the remote copied image. 214*0Sstevel@tonic-gate * If we cannot save the disks device ID onto the master block that is 215*0Sstevel@tonic-gate * ok. The disk is just not self-identifying and won't be importable 216*0Sstevel@tonic-gate * in the remote copy scenario. 217*0Sstevel@tonic-gate */ 218*0Sstevel@tonic-gate if (devid_get(fd, &devid) == 0) { 219*0Sstevel@tonic-gate size_t len; 220*0Sstevel@tonic-gate 221*0Sstevel@tonic-gate len = devid_sizeof(devid); 222*0Sstevel@tonic-gate if (len <= DEV_BSIZE - sizeof (*mb)) { 223*0Sstevel@tonic-gate /* there is enough space to store the devid */ 224*0Sstevel@tonic-gate mb->mb_devid_magic = MDDB_MAGIC_DE; 225*0Sstevel@tonic-gate mb->mb_devid_len = len; 226*0Sstevel@tonic-gate (void) memcpy(mb->mb_devid, devid, len); 227*0Sstevel@tonic-gate } 228*0Sstevel@tonic-gate devid_free(devid); 229*0Sstevel@tonic-gate } 230*0Sstevel@tonic-gate 231*0Sstevel@tonic-gate crcgen((uchar_t *)mb, (uint_t *)&mb->mb_checksum, (uint_t)DEV_BSIZE, 232*0Sstevel@tonic-gate (crc_skip_t *)NULL); 233*0Sstevel@tonic-gate 234*0Sstevel@tonic-gate if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0) { 235*0Sstevel@tonic-gate Free(mb); 236*0Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname)); 237*0Sstevel@tonic-gate } 238*0Sstevel@tonic-gate 239*0Sstevel@tonic-gate if (write(fd, mb, DEV_BSIZE) != DEV_BSIZE) { 240*0Sstevel@tonic-gate Free(mb); 241*0Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname)); 242*0Sstevel@tonic-gate } 243*0Sstevel@tonic-gate 244*0Sstevel@tonic-gate if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0) { 245*0Sstevel@tonic-gate Free(mb); 246*0Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname)); 247*0Sstevel@tonic-gate } 248*0Sstevel@tonic-gate 249*0Sstevel@tonic-gate if (read(fd, mb, DEV_BSIZE) != DEV_BSIZE) { 250*0Sstevel@tonic-gate Free(mb); 251*0Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname)); 252*0Sstevel@tonic-gate } 253*0Sstevel@tonic-gate 254*0Sstevel@tonic-gate if (crcchk((uchar_t *)mb, (uint_t *)&mb->mb_checksum, 255*0Sstevel@tonic-gate (uint_t)DEV_BSIZE, (crc_skip_t *)NULL)) { 256*0Sstevel@tonic-gate Free(mb); 257*0Sstevel@tonic-gate return (mdmddberror(ep, MDE_NOTVERIFIED, 258*0Sstevel@tonic-gate meta_getminor(np->dev), sp->setno, 0, np->rname)); 259*0Sstevel@tonic-gate } 260*0Sstevel@tonic-gate 261*0Sstevel@tonic-gate Free(mb); 262*0Sstevel@tonic-gate return (0); 263*0Sstevel@tonic-gate } 264*0Sstevel@tonic-gate 265*0Sstevel@tonic-gate void 266*0Sstevel@tonic-gate meta_mkdummymaster( 267*0Sstevel@tonic-gate mdsetname_t *sp, 268*0Sstevel@tonic-gate int fd, 269*0Sstevel@tonic-gate daddr_t firstblk 270*0Sstevel@tonic-gate ) 271*0Sstevel@tonic-gate { 272*0Sstevel@tonic-gate md_timeval32_t tp; 273*0Sstevel@tonic-gate struct mddb_mb *mb; 274*0Sstevel@tonic-gate ddi_devid_t devid; 275*0Sstevel@tonic-gate md_set_desc *sd; 276*0Sstevel@tonic-gate md_error_t ep = mdnullerror; 277*0Sstevel@tonic-gate md_timeval32_t inittime; 278*0Sstevel@tonic-gate 279*0Sstevel@tonic-gate /* 280*0Sstevel@tonic-gate * No dummy master blocks are written for a MN diskset since devids 281*0Sstevel@tonic-gate * are not supported in MN disksets. 282*0Sstevel@tonic-gate */ 283*0Sstevel@tonic-gate if (! metaislocalset(sp)) { 284*0Sstevel@tonic-gate if ((sd = metaget_setdesc(sp, &ep)) == NULL) 285*0Sstevel@tonic-gate return; 286*0Sstevel@tonic-gate 287*0Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) 288*0Sstevel@tonic-gate return; 289*0Sstevel@tonic-gate } 290*0Sstevel@tonic-gate 291*0Sstevel@tonic-gate if ((mb = Zalloc(DEV_BSIZE)) == NULL) 292*0Sstevel@tonic-gate return; 293*0Sstevel@tonic-gate 294*0Sstevel@tonic-gate mb->mb_magic = MDDB_MAGIC_DU; 295*0Sstevel@tonic-gate mb->mb_revision = MDDB_REV_MB; 296*0Sstevel@tonic-gate mb->mb_setno = sp->setno; 297*0Sstevel@tonic-gate inittime = meta_get_lb_inittime(sp, &ep); 298*0Sstevel@tonic-gate mb->mb_setcreatetime = inittime; 299*0Sstevel@tonic-gate 300*0Sstevel@tonic-gate if (meta_gettimeofday(&tp) != -1) 301*0Sstevel@tonic-gate mb->mb_timestamp = tp; 302*0Sstevel@tonic-gate 303*0Sstevel@tonic-gate /* 304*0Sstevel@tonic-gate * We try to save the disks device ID into the remaining bytes in 305*0Sstevel@tonic-gate * the master block. This allows the disk image to be self-identifying 306*0Sstevel@tonic-gate * if it gets copied (e.g. SNDR, True Copy, etc.). This is used 307*0Sstevel@tonic-gate * when we try to import these disks on the remote copied image. 308*0Sstevel@tonic-gate * If we cannot save the disks device ID onto the master block that is 309*0Sstevel@tonic-gate * ok. The disk is just not self-identifying and won't be importable 310*0Sstevel@tonic-gate * in the remote copy scenario. 311*0Sstevel@tonic-gate */ 312*0Sstevel@tonic-gate if (devid_get(fd, &devid) == 0) { 313*0Sstevel@tonic-gate int len; 314*0Sstevel@tonic-gate 315*0Sstevel@tonic-gate len = devid_sizeof(devid); 316*0Sstevel@tonic-gate if (len <= DEV_BSIZE - sizeof (*mb)) { 317*0Sstevel@tonic-gate /* there is enough space to store the devid */ 318*0Sstevel@tonic-gate mb->mb_devid_magic = MDDB_MAGIC_DE; 319*0Sstevel@tonic-gate mb->mb_devid_len = len; 320*0Sstevel@tonic-gate (void) memcpy(mb->mb_devid, (char *)devid, len); 321*0Sstevel@tonic-gate } 322*0Sstevel@tonic-gate devid_free(devid); 323*0Sstevel@tonic-gate } 324*0Sstevel@tonic-gate 325*0Sstevel@tonic-gate crcgen((uchar_t *)mb, (uint_t *)&mb->mb_checksum, (uint_t)DEV_BSIZE, 326*0Sstevel@tonic-gate (crc_skip_t *)NULL); 327*0Sstevel@tonic-gate 328*0Sstevel@tonic-gate /* 329*0Sstevel@tonic-gate * If any of these operations fail, we need to inform the 330*0Sstevel@tonic-gate * user that the disk won't be self identifying. When support 331*0Sstevel@tonic-gate * for importing remotely replicated disksets is added, we 332*0Sstevel@tonic-gate * want to add the error messages here. 333*0Sstevel@tonic-gate */ 334*0Sstevel@tonic-gate if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0) 335*0Sstevel@tonic-gate goto out; 336*0Sstevel@tonic-gate 337*0Sstevel@tonic-gate if (write(fd, mb, DEV_BSIZE) != DEV_BSIZE) 338*0Sstevel@tonic-gate goto out; 339*0Sstevel@tonic-gate 340*0Sstevel@tonic-gate if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0) 341*0Sstevel@tonic-gate goto out; 342*0Sstevel@tonic-gate 343*0Sstevel@tonic-gate if (read(fd, mb, DEV_BSIZE) != DEV_BSIZE) 344*0Sstevel@tonic-gate goto out; 345*0Sstevel@tonic-gate 346*0Sstevel@tonic-gate if (crcchk((uchar_t *)mb, (uint_t *)&mb->mb_checksum, 347*0Sstevel@tonic-gate (uint_t)DEV_BSIZE, (crc_skip_t *)NULL)) 348*0Sstevel@tonic-gate goto out; 349*0Sstevel@tonic-gate 350*0Sstevel@tonic-gate out: 351*0Sstevel@tonic-gate Free(mb); 352*0Sstevel@tonic-gate } 353*0Sstevel@tonic-gate 354*0Sstevel@tonic-gate static int 355*0Sstevel@tonic-gate buildconf(mdsetname_t *sp, md_error_t *ep) 356*0Sstevel@tonic-gate { 357*0Sstevel@tonic-gate md_replicalist_t *rlp = NULL; 358*0Sstevel@tonic-gate md_replicalist_t *rl; 359*0Sstevel@tonic-gate FILE *cfp = NULL; 360*0Sstevel@tonic-gate FILE *mfp = NULL; 361*0Sstevel@tonic-gate struct stat sbuf; 362*0Sstevel@tonic-gate int rval = 0; 363*0Sstevel@tonic-gate int in_miniroot = 0; 364*0Sstevel@tonic-gate char line[MDDB_BOOTLIST_MAX_LEN]; 365*0Sstevel@tonic-gate char *tname = NULL; 366*0Sstevel@tonic-gate 367*0Sstevel@tonic-gate /* get list of local replicas */ 368*0Sstevel@tonic-gate if (! metaislocalset(sp)) 369*0Sstevel@tonic-gate return (0); 370*0Sstevel@tonic-gate 371*0Sstevel@tonic-gate if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0) 372*0Sstevel@tonic-gate return (-1); 373*0Sstevel@tonic-gate 374*0Sstevel@tonic-gate /* open tempfile, copy permissions of original file */ 375*0Sstevel@tonic-gate if ((cfp = fopen(META_DBCONFTMP, "w+")) == NULL) { 376*0Sstevel@tonic-gate /* 377*0Sstevel@tonic-gate * On the miniroot tmp files must be created in /var/tmp. 378*0Sstevel@tonic-gate * If we get a EROFS error, we assume that we are in the 379*0Sstevel@tonic-gate * miniroot. 380*0Sstevel@tonic-gate */ 381*0Sstevel@tonic-gate if (errno != EROFS) 382*0Sstevel@tonic-gate goto error; 383*0Sstevel@tonic-gate in_miniroot = 1; 384*0Sstevel@tonic-gate errno = 0; 385*0Sstevel@tonic-gate tname = tempnam("/var/tmp", "slvm_"); 386*0Sstevel@tonic-gate if (tname == NULL && errno == EROFS) { 387*0Sstevel@tonic-gate /* 388*0Sstevel@tonic-gate * If we are booted on a read-only root because 389*0Sstevel@tonic-gate * of mddb quorum problems we don't want to emit 390*0Sstevel@tonic-gate * any scary error messages. 391*0Sstevel@tonic-gate */ 392*0Sstevel@tonic-gate errno = 0; 393*0Sstevel@tonic-gate goto out; 394*0Sstevel@tonic-gate } 395*0Sstevel@tonic-gate 396*0Sstevel@tonic-gate /* open tempfile, copy permissions of original file */ 397*0Sstevel@tonic-gate if ((cfp = fopen(tname, "w+")) == NULL) 398*0Sstevel@tonic-gate goto error; 399*0Sstevel@tonic-gate } 400*0Sstevel@tonic-gate if (stat(META_DBCONF, &sbuf) == 0) { 401*0Sstevel@tonic-gate if (fchmod(fileno(cfp), (sbuf.st_mode & 0666)) != 0) 402*0Sstevel@tonic-gate goto error; 403*0Sstevel@tonic-gate if (fchown(fileno(cfp), sbuf.st_uid, sbuf.st_gid) != 0) 404*0Sstevel@tonic-gate goto error; 405*0Sstevel@tonic-gate } 406*0Sstevel@tonic-gate 407*0Sstevel@tonic-gate /* print header */ 408*0Sstevel@tonic-gate if (fprintf(cfp, "#metadevice database location file ") == EOF) 409*0Sstevel@tonic-gate goto error; 410*0Sstevel@tonic-gate if (fprintf(cfp, "do not hand edit\n") < 0) 411*0Sstevel@tonic-gate goto error; 412*0Sstevel@tonic-gate if (fprintf(cfp, 413*0Sstevel@tonic-gate "#driver\tminor_t\tdaddr_t\tdevice id\tchecksum\n") < 0) 414*0Sstevel@tonic-gate goto error; 415*0Sstevel@tonic-gate 416*0Sstevel@tonic-gate /* dump replicas */ 417*0Sstevel@tonic-gate for (rl = rlp; (rl != NULL); rl = rl->rl_next) { 418*0Sstevel@tonic-gate md_replica_t *r = rl->rl_repp; 419*0Sstevel@tonic-gate int checksum = 42; 420*0Sstevel@tonic-gate int i; 421*0Sstevel@tonic-gate char *devidp; 422*0Sstevel@tonic-gate minor_t min; 423*0Sstevel@tonic-gate 424*0Sstevel@tonic-gate devidp = devid_str_encode(r->r_devid, r->r_minor_name); 425*0Sstevel@tonic-gate /* If devid code can't encode devidp - skip entry */ 426*0Sstevel@tonic-gate if (devidp == NULL) { 427*0Sstevel@tonic-gate continue; 428*0Sstevel@tonic-gate } 429*0Sstevel@tonic-gate 430*0Sstevel@tonic-gate /* compute checksum */ 431*0Sstevel@tonic-gate for (i = 0; ((r->r_driver_name[i] != '\0') && 432*0Sstevel@tonic-gate (i < sizeof (r->r_driver_name))); i++) { 433*0Sstevel@tonic-gate checksum -= r->r_driver_name[i]; 434*0Sstevel@tonic-gate } 435*0Sstevel@tonic-gate min = meta_getminor(r->r_namep->dev); 436*0Sstevel@tonic-gate checksum -= min; 437*0Sstevel@tonic-gate checksum -= r->r_blkno; 438*0Sstevel@tonic-gate 439*0Sstevel@tonic-gate for (i = 0; i < strlen(devidp); i++) { 440*0Sstevel@tonic-gate checksum -= devidp[i]; 441*0Sstevel@tonic-gate } 442*0Sstevel@tonic-gate /* print info */ 443*0Sstevel@tonic-gate if (fprintf(cfp, "%s\t%lu\t%ld\t%s\t%d\n", 444*0Sstevel@tonic-gate r->r_driver_name, min, r->r_blkno, devidp, checksum) < 0) { 445*0Sstevel@tonic-gate goto error; 446*0Sstevel@tonic-gate } 447*0Sstevel@tonic-gate 448*0Sstevel@tonic-gate devid_str_free(devidp); 449*0Sstevel@tonic-gate } 450*0Sstevel@tonic-gate 451*0Sstevel@tonic-gate /* close and rename to real file */ 452*0Sstevel@tonic-gate if (fflush(cfp) != 0) 453*0Sstevel@tonic-gate goto error; 454*0Sstevel@tonic-gate if (fsync(fileno(cfp)) != 0) 455*0Sstevel@tonic-gate goto error; 456*0Sstevel@tonic-gate if (fclose(cfp) != 0) { 457*0Sstevel@tonic-gate cfp = NULL; 458*0Sstevel@tonic-gate goto error; 459*0Sstevel@tonic-gate } 460*0Sstevel@tonic-gate cfp = NULL; 461*0Sstevel@tonic-gate 462*0Sstevel@tonic-gate /* 463*0Sstevel@tonic-gate * Renames don't work in the miniroot since tmpfiles are 464*0Sstevel@tonic-gate * created in /var/tmp. Hence we copy the data out. 465*0Sstevel@tonic-gate */ 466*0Sstevel@tonic-gate 467*0Sstevel@tonic-gate if (! in_miniroot) { 468*0Sstevel@tonic-gate if (rename(META_DBCONFTMP, META_DBCONF) != 0) 469*0Sstevel@tonic-gate goto error; 470*0Sstevel@tonic-gate } else { 471*0Sstevel@tonic-gate if ((cfp = fopen(tname, "r")) == NULL) 472*0Sstevel@tonic-gate goto error; 473*0Sstevel@tonic-gate if ((mfp = fopen(META_DBCONF, "w+")) == NULL) 474*0Sstevel@tonic-gate goto error; 475*0Sstevel@tonic-gate while (fgets(line, MDDB_BOOTLIST_MAX_LEN, cfp) != NULL) { 476*0Sstevel@tonic-gate if (fputs(line, mfp) == NULL) 477*0Sstevel@tonic-gate goto error; 478*0Sstevel@tonic-gate } 479*0Sstevel@tonic-gate (void) fclose(cfp); 480*0Sstevel@tonic-gate cfp = NULL; 481*0Sstevel@tonic-gate if (fflush(mfp) != 0) 482*0Sstevel@tonic-gate goto error; 483*0Sstevel@tonic-gate if (fsync(fileno(mfp)) != 0) 484*0Sstevel@tonic-gate goto error; 485*0Sstevel@tonic-gate if (fclose(mfp) != 0) { 486*0Sstevel@tonic-gate mfp = NULL; 487*0Sstevel@tonic-gate goto error; 488*0Sstevel@tonic-gate } 489*0Sstevel@tonic-gate /* delete the tempfile */ 490*0Sstevel@tonic-gate (void) unlink(tname); 491*0Sstevel@tonic-gate } 492*0Sstevel@tonic-gate /* success */ 493*0Sstevel@tonic-gate rval = 0; 494*0Sstevel@tonic-gate goto out; 495*0Sstevel@tonic-gate 496*0Sstevel@tonic-gate /* tempfile error */ 497*0Sstevel@tonic-gate error: 498*0Sstevel@tonic-gate rval = (in_miniroot) ? mdsyserror(ep, errno, tname): 499*0Sstevel@tonic-gate mdsyserror(ep, errno, META_DBCONFTMP); 500*0Sstevel@tonic-gate 501*0Sstevel@tonic-gate 502*0Sstevel@tonic-gate /* cleanup, return success */ 503*0Sstevel@tonic-gate out: 504*0Sstevel@tonic-gate if (rlp != NULL) 505*0Sstevel@tonic-gate metafreereplicalist(rlp); 506*0Sstevel@tonic-gate if ((cfp != NULL) && (fclose(cfp) != 0) && (rval == 0)) { 507*0Sstevel@tonic-gate rval = (in_miniroot) ? mdsyserror(ep, errno, tname): 508*0Sstevel@tonic-gate mdsyserror(ep, errno, META_DBCONFTMP); 509*0Sstevel@tonic-gate } 510*0Sstevel@tonic-gate free(tname); 511*0Sstevel@tonic-gate return (rval); 512*0Sstevel@tonic-gate } 513*0Sstevel@tonic-gate 514*0Sstevel@tonic-gate /* 515*0Sstevel@tonic-gate * check replica for dev 516*0Sstevel@tonic-gate */ 517*0Sstevel@tonic-gate static int 518*0Sstevel@tonic-gate in_replica( 519*0Sstevel@tonic-gate mdsetname_t *sp, 520*0Sstevel@tonic-gate md_replica_t *rp, 521*0Sstevel@tonic-gate mdname_t *np, 522*0Sstevel@tonic-gate diskaddr_t slblk, 523*0Sstevel@tonic-gate diskaddr_t nblks, 524*0Sstevel@tonic-gate md_error_t *ep 525*0Sstevel@tonic-gate ) 526*0Sstevel@tonic-gate { 527*0Sstevel@tonic-gate mdname_t *repnp = rp->r_namep; 528*0Sstevel@tonic-gate diskaddr_t rep_sblk = rp->r_blkno; 529*0Sstevel@tonic-gate diskaddr_t rep_nblks = rp->r_nblk; 530*0Sstevel@tonic-gate 531*0Sstevel@tonic-gate /* should be in the same set */ 532*0Sstevel@tonic-gate assert(sp != NULL); 533*0Sstevel@tonic-gate 534*0Sstevel@tonic-gate /* if error in master block, assume whole partition */ 535*0Sstevel@tonic-gate if ((rep_sblk == MD_DISKADDR_ERROR) || 536*0Sstevel@tonic-gate (rep_nblks == MD_DISKADDR_ERROR)) { 537*0Sstevel@tonic-gate rep_sblk = 0; 538*0Sstevel@tonic-gate rep_nblks = MD_DISKADDR_ERROR; 539*0Sstevel@tonic-gate } 540*0Sstevel@tonic-gate 541*0Sstevel@tonic-gate /* check overlap */ 542*0Sstevel@tonic-gate if (meta_check_overlap( 543*0Sstevel@tonic-gate MDB_STR, np, slblk, nblks, repnp, rep_sblk, rep_nblks, ep) != 0) { 544*0Sstevel@tonic-gate return (-1); 545*0Sstevel@tonic-gate } 546*0Sstevel@tonic-gate 547*0Sstevel@tonic-gate /* return success */ 548*0Sstevel@tonic-gate return (0); 549*0Sstevel@tonic-gate } 550*0Sstevel@tonic-gate 551*0Sstevel@tonic-gate /* 552*0Sstevel@tonic-gate * check to see if we're in a replica 553*0Sstevel@tonic-gate */ 554*0Sstevel@tonic-gate int 555*0Sstevel@tonic-gate meta_check_inreplica( 556*0Sstevel@tonic-gate mdsetname_t *sp, 557*0Sstevel@tonic-gate mdname_t *np, 558*0Sstevel@tonic-gate diskaddr_t slblk, 559*0Sstevel@tonic-gate diskaddr_t nblks, 560*0Sstevel@tonic-gate md_error_t *ep 561*0Sstevel@tonic-gate ) 562*0Sstevel@tonic-gate { 563*0Sstevel@tonic-gate md_replicalist_t *rlp = NULL; 564*0Sstevel@tonic-gate md_replicalist_t *rl; 565*0Sstevel@tonic-gate int rval = 0; 566*0Sstevel@tonic-gate 567*0Sstevel@tonic-gate /* should have a set */ 568*0Sstevel@tonic-gate assert(sp != NULL); 569*0Sstevel@tonic-gate 570*0Sstevel@tonic-gate /* for each replica */ 571*0Sstevel@tonic-gate if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0) 572*0Sstevel@tonic-gate return (-1); 573*0Sstevel@tonic-gate for (rl = rlp; (rl != NULL); rl = rl->rl_next) { 574*0Sstevel@tonic-gate md_replica_t *rp = rl->rl_repp; 575*0Sstevel@tonic-gate 576*0Sstevel@tonic-gate /* check replica */ 577*0Sstevel@tonic-gate if (in_replica(sp, rp, np, slblk, nblks, ep) != 0) { 578*0Sstevel@tonic-gate rval = -1; 579*0Sstevel@tonic-gate break; 580*0Sstevel@tonic-gate } 581*0Sstevel@tonic-gate } 582*0Sstevel@tonic-gate 583*0Sstevel@tonic-gate /* cleanup, return success */ 584*0Sstevel@tonic-gate metafreereplicalist(rlp); 585*0Sstevel@tonic-gate return (rval); 586*0Sstevel@tonic-gate } 587*0Sstevel@tonic-gate 588*0Sstevel@tonic-gate /* 589*0Sstevel@tonic-gate * check replica 590*0Sstevel@tonic-gate */ 591*0Sstevel@tonic-gate int 592*0Sstevel@tonic-gate meta_check_replica( 593*0Sstevel@tonic-gate mdsetname_t *sp, /* set to check against */ 594*0Sstevel@tonic-gate mdname_t *np, /* component to check against */ 595*0Sstevel@tonic-gate mdchkopts_t options, /* option flags */ 596*0Sstevel@tonic-gate diskaddr_t slblk, /* start logical block */ 597*0Sstevel@tonic-gate diskaddr_t nblks, /* number of blocks (-1,rest of them) */ 598*0Sstevel@tonic-gate md_error_t *ep /* error packet */ 599*0Sstevel@tonic-gate ) 600*0Sstevel@tonic-gate { 601*0Sstevel@tonic-gate mdchkopts_t chkoptions = MDCHK_ALLOW_REPSLICE; 602*0Sstevel@tonic-gate 603*0Sstevel@tonic-gate /* make sure we have a disk */ 604*0Sstevel@tonic-gate if (metachkcomp(np, ep) != 0) 605*0Sstevel@tonic-gate return (-1); 606*0Sstevel@tonic-gate 607*0Sstevel@tonic-gate /* check to ensure that it is not already in use */ 608*0Sstevel@tonic-gate if (meta_check_inuse(sp, np, MDCHK_INUSE, ep) != 0) { 609*0Sstevel@tonic-gate return (-1); 610*0Sstevel@tonic-gate } 611*0Sstevel@tonic-gate 612*0Sstevel@tonic-gate if (options & MDCHK_ALLOW_NODBS) 613*0Sstevel@tonic-gate return (0); 614*0Sstevel@tonic-gate 615*0Sstevel@tonic-gate if (options & MDCHK_DRVINSET) 616*0Sstevel@tonic-gate return (0); 617*0Sstevel@tonic-gate 618*0Sstevel@tonic-gate /* make sure it is in the set */ 619*0Sstevel@tonic-gate if (meta_check_inset(sp, np, ep) != 0) 620*0Sstevel@tonic-gate return (-1); 621*0Sstevel@tonic-gate 622*0Sstevel@tonic-gate /* make sure its not in a metadevice */ 623*0Sstevel@tonic-gate if (meta_check_inmeta(sp, np, chkoptions, slblk, nblks, ep) != 0) 624*0Sstevel@tonic-gate return (-1); 625*0Sstevel@tonic-gate 626*0Sstevel@tonic-gate /* return success */ 627*0Sstevel@tonic-gate return (0); 628*0Sstevel@tonic-gate } 629*0Sstevel@tonic-gate 630*0Sstevel@tonic-gate static int 631*0Sstevel@tonic-gate update_dbinfo_on_drives( 632*0Sstevel@tonic-gate mdsetname_t *sp, 633*0Sstevel@tonic-gate md_drive_desc *dd, 634*0Sstevel@tonic-gate int set_locked, 635*0Sstevel@tonic-gate int force, 636*0Sstevel@tonic-gate md_error_t *ep 637*0Sstevel@tonic-gate ) 638*0Sstevel@tonic-gate { 639*0Sstevel@tonic-gate md_set_desc *sd; 640*0Sstevel@tonic-gate int i; 641*0Sstevel@tonic-gate md_setkey_t *cl_sk; 642*0Sstevel@tonic-gate int rval = 0; 643*0Sstevel@tonic-gate md_mnnode_desc *nd; 644*0Sstevel@tonic-gate 645*0Sstevel@tonic-gate if ((sd = metaget_setdesc(sp, ep)) == NULL) 646*0Sstevel@tonic-gate return (-1); 647*0Sstevel@tonic-gate 648*0Sstevel@tonic-gate if (! set_locked) { 649*0Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) { 650*0Sstevel@tonic-gate md_error_t xep = mdnullerror; 651*0Sstevel@tonic-gate sigset_t sigs; 652*0Sstevel@tonic-gate /* Make sure we are blocking all signals */ 653*0Sstevel@tonic-gate if (procsigs(TRUE, &sigs, &xep) < 0) 654*0Sstevel@tonic-gate mdclrerror(&xep); 655*0Sstevel@tonic-gate 656*0Sstevel@tonic-gate nd = sd->sd_nodelist; 657*0Sstevel@tonic-gate while (nd) { 658*0Sstevel@tonic-gate if (force && strcmp(nd->nd_nodename, 659*0Sstevel@tonic-gate mynode()) != 0) { 660*0Sstevel@tonic-gate nd = nd->nd_next; 661*0Sstevel@tonic-gate continue; 662*0Sstevel@tonic-gate } 663*0Sstevel@tonic-gate 664*0Sstevel@tonic-gate if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) { 665*0Sstevel@tonic-gate nd = nd->nd_next; 666*0Sstevel@tonic-gate continue; 667*0Sstevel@tonic-gate } 668*0Sstevel@tonic-gate 669*0Sstevel@tonic-gate if (clnt_lock_set(nd->nd_nodename, sp, ep)) 670*0Sstevel@tonic-gate return (-1); 671*0Sstevel@tonic-gate nd = nd->nd_next; 672*0Sstevel@tonic-gate } 673*0Sstevel@tonic-gate } else { 674*0Sstevel@tonic-gate for (i = 0; i < MD_MAXSIDES; i++) { 675*0Sstevel@tonic-gate /* Skip empty slots */ 676*0Sstevel@tonic-gate if (sd->sd_nodes[i][0] == '\0') 677*0Sstevel@tonic-gate continue; 678*0Sstevel@tonic-gate 679*0Sstevel@tonic-gate if (force && strcmp(sd->sd_nodes[i], 680*0Sstevel@tonic-gate mynode()) != 0) 681*0Sstevel@tonic-gate continue; 682*0Sstevel@tonic-gate 683*0Sstevel@tonic-gate if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) 684*0Sstevel@tonic-gate return (-1); 685*0Sstevel@tonic-gate } 686*0Sstevel@tonic-gate } 687*0Sstevel@tonic-gate } 688*0Sstevel@tonic-gate 689*0Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) { 690*0Sstevel@tonic-gate nd = sd->sd_nodelist; 691*0Sstevel@tonic-gate while (nd) { 692*0Sstevel@tonic-gate if (force && strcmp(nd->nd_nodename, mynode()) != 0) { 693*0Sstevel@tonic-gate nd = nd->nd_next; 694*0Sstevel@tonic-gate continue; 695*0Sstevel@tonic-gate } 696*0Sstevel@tonic-gate 697*0Sstevel@tonic-gate if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) { 698*0Sstevel@tonic-gate nd = nd->nd_next; 699*0Sstevel@tonic-gate continue; 700*0Sstevel@tonic-gate } 701*0Sstevel@tonic-gate 702*0Sstevel@tonic-gate if (clnt_upd_dr_dbinfo(nd->nd_nodename, sp, dd, ep) 703*0Sstevel@tonic-gate == -1) { 704*0Sstevel@tonic-gate rval = -1; 705*0Sstevel@tonic-gate break; 706*0Sstevel@tonic-gate } 707*0Sstevel@tonic-gate nd = nd->nd_next; 708*0Sstevel@tonic-gate } 709*0Sstevel@tonic-gate } else { 710*0Sstevel@tonic-gate for (i = 0; i < MD_MAXSIDES; i++) { 711*0Sstevel@tonic-gate /* Skip empty slots */ 712*0Sstevel@tonic-gate if (sd->sd_nodes[i][0] == '\0') 713*0Sstevel@tonic-gate continue; 714*0Sstevel@tonic-gate 715*0Sstevel@tonic-gate if (force && strcmp(sd->sd_nodes[i], mynode()) != 0) 716*0Sstevel@tonic-gate continue; 717*0Sstevel@tonic-gate 718*0Sstevel@tonic-gate if (clnt_upd_dr_dbinfo(sd->sd_nodes[i], sp, dd, ep) 719*0Sstevel@tonic-gate == -1) { 720*0Sstevel@tonic-gate rval = -1; 721*0Sstevel@tonic-gate break; 722*0Sstevel@tonic-gate } 723*0Sstevel@tonic-gate } 724*0Sstevel@tonic-gate } 725*0Sstevel@tonic-gate 726*0Sstevel@tonic-gate if (! set_locked) { 727*0Sstevel@tonic-gate cl_sk = cl_get_setkey(sp->setno, sp->setname); 728*0Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) { 729*0Sstevel@tonic-gate nd = sd->sd_nodelist; 730*0Sstevel@tonic-gate while (nd) { 731*0Sstevel@tonic-gate if (force && 732*0Sstevel@tonic-gate strcmp(nd->nd_nodename, mynode()) != 0) { 733*0Sstevel@tonic-gate nd = nd->nd_next; 734*0Sstevel@tonic-gate continue; 735*0Sstevel@tonic-gate } 736*0Sstevel@tonic-gate 737*0Sstevel@tonic-gate if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) { 738*0Sstevel@tonic-gate nd = nd->nd_next; 739*0Sstevel@tonic-gate continue; 740*0Sstevel@tonic-gate } 741*0Sstevel@tonic-gate 742*0Sstevel@tonic-gate if (clnt_unlock_set(nd->nd_nodename, cl_sk, 743*0Sstevel@tonic-gate ep)) { 744*0Sstevel@tonic-gate rval = -1; 745*0Sstevel@tonic-gate break; 746*0Sstevel@tonic-gate } 747*0Sstevel@tonic-gate nd = nd->nd_next; 748*0Sstevel@tonic-gate } 749*0Sstevel@tonic-gate } else { 750*0Sstevel@tonic-gate for (i = 0; i < MD_MAXSIDES; i++) { 751*0Sstevel@tonic-gate /* Skip empty slots */ 752*0Sstevel@tonic-gate if (sd->sd_nodes[i][0] == '\0') 753*0Sstevel@tonic-gate continue; 754*0Sstevel@tonic-gate 755*0Sstevel@tonic-gate if (force && 756*0Sstevel@tonic-gate strcmp(sd->sd_nodes[i], mynode()) != 0) 757*0Sstevel@tonic-gate continue; 758*0Sstevel@tonic-gate 759*0Sstevel@tonic-gate if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, 760*0Sstevel@tonic-gate ep)) { 761*0Sstevel@tonic-gate rval = -1; 762*0Sstevel@tonic-gate break; 763*0Sstevel@tonic-gate } 764*0Sstevel@tonic-gate } 765*0Sstevel@tonic-gate 766*0Sstevel@tonic-gate } 767*0Sstevel@tonic-gate cl_set_setkey(NULL); 768*0Sstevel@tonic-gate } 769*0Sstevel@tonic-gate 770*0Sstevel@tonic-gate return (rval); 771*0Sstevel@tonic-gate } 772*0Sstevel@tonic-gate 773*0Sstevel@tonic-gate int 774*0Sstevel@tonic-gate meta_db_addsidenms( 775*0Sstevel@tonic-gate mdsetname_t *sp, 776*0Sstevel@tonic-gate mdname_t *np, 777*0Sstevel@tonic-gate daddr_t blkno, 778*0Sstevel@tonic-gate int bcast, 779*0Sstevel@tonic-gate md_error_t *ep 780*0Sstevel@tonic-gate ) 781*0Sstevel@tonic-gate { 782*0Sstevel@tonic-gate side_t sideno; 783*0Sstevel@tonic-gate char *bname = NULL; 784*0Sstevel@tonic-gate char *dname = NULL; 785*0Sstevel@tonic-gate minor_t mnum; 786*0Sstevel@tonic-gate mddb_config_t c; 787*0Sstevel@tonic-gate int done; 788*0Sstevel@tonic-gate int rval = 0; 789*0Sstevel@tonic-gate md_set_desc *sd; 790*0Sstevel@tonic-gate 791*0Sstevel@tonic-gate sideno = MD_SIDEWILD; 792*0Sstevel@tonic-gate /*CONSTCOND*/ 793*0Sstevel@tonic-gate while (1) { 794*0Sstevel@tonic-gate if (bname != NULL) { 795*0Sstevel@tonic-gate Free(bname); 796*0Sstevel@tonic-gate bname = NULL; 797*0Sstevel@tonic-gate } 798*0Sstevel@tonic-gate if (dname != NULL) { 799*0Sstevel@tonic-gate Free(dname); 800*0Sstevel@tonic-gate dname = NULL; 801*0Sstevel@tonic-gate } 802*0Sstevel@tonic-gate if ((done = meta_getnextside_devinfo(sp, np->bname, 803*0Sstevel@tonic-gate &sideno, &bname, &dname, &mnum, ep)) == -1) { 804*0Sstevel@tonic-gate rval = -1; 805*0Sstevel@tonic-gate break; 806*0Sstevel@tonic-gate } 807*0Sstevel@tonic-gate 808*0Sstevel@tonic-gate if (done == 0) 809*0Sstevel@tonic-gate break; 810*0Sstevel@tonic-gate 811*0Sstevel@tonic-gate if (! metaislocalset(sp)) { 812*0Sstevel@tonic-gate if ((sd = metaget_setdesc(sp, ep)) == NULL) { 813*0Sstevel@tonic-gate rval = -1; 814*0Sstevel@tonic-gate break; 815*0Sstevel@tonic-gate } 816*0Sstevel@tonic-gate } 817*0Sstevel@tonic-gate 818*0Sstevel@tonic-gate /* 819*0Sstevel@tonic-gate * Send addsidenms to all nodes using rpc.mdcommd if 820*0Sstevel@tonic-gate * sidename is being added to MN diskset. 821*0Sstevel@tonic-gate * 822*0Sstevel@tonic-gate * It's ok to broadcast this call to other nodes. 823*0Sstevel@tonic-gate * 824*0Sstevel@tonic-gate * Note: The broadcast to other nodes isn't needed during 825*0Sstevel@tonic-gate * the addition of the first mddbs to the set since the 826*0Sstevel@tonic-gate * other nodes haven't been joined to the set yet. All 827*0Sstevel@tonic-gate * nodes in a MN diskset are (implicitly) joined to the set 828*0Sstevel@tonic-gate * on the addition of the first mddb. 829*0Sstevel@tonic-gate */ 830*0Sstevel@tonic-gate if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) && 831*0Sstevel@tonic-gate (bcast == DB_ADDSIDENMS_BCAST)) { 832*0Sstevel@tonic-gate md_mn_result_t *resultp = NULL; 833*0Sstevel@tonic-gate md_mn_msg_meta_db_newside_t db_ns; 834*0Sstevel@tonic-gate int send_rval; 835*0Sstevel@tonic-gate 836*0Sstevel@tonic-gate db_ns.msg_l_dev = np->dev; 837*0Sstevel@tonic-gate db_ns.msg_sideno = sideno; 838*0Sstevel@tonic-gate db_ns.msg_blkno = blkno; 839*0Sstevel@tonic-gate (void) strncpy(db_ns.msg_dname, dname, 840*0Sstevel@tonic-gate sizeof (db_ns.msg_dname)); 841*0Sstevel@tonic-gate (void) splitname(np->bname, &db_ns.msg_splitname); 842*0Sstevel@tonic-gate db_ns.msg_mnum = mnum; 843*0Sstevel@tonic-gate 844*0Sstevel@tonic-gate /* Set devid to NULL until devids are supported */ 845*0Sstevel@tonic-gate db_ns.msg_devid[0] = NULL; 846*0Sstevel@tonic-gate 847*0Sstevel@tonic-gate /* 848*0Sstevel@tonic-gate * If reconfig cycle has been started, this node is 849*0Sstevel@tonic-gate * stuck in in the return step until this command has 850*0Sstevel@tonic-gate * completed. If mdcommd is suspended, ask 851*0Sstevel@tonic-gate * send_message to fail (instead of retrying) 852*0Sstevel@tonic-gate * so that metaset can finish allowing the reconfig 853*0Sstevel@tonic-gate * cycle to proceed. 854*0Sstevel@tonic-gate */ 855*0Sstevel@tonic-gate send_rval = mdmn_send_message(sp->setno, 856*0Sstevel@tonic-gate MD_MN_MSG_META_DB_NEWSIDE, MD_MSGF_FAIL_ON_SUSPEND | 857*0Sstevel@tonic-gate MD_MSGF_PANIC_WHEN_INCONSISTENT, (char *)&db_ns, 858*0Sstevel@tonic-gate sizeof (md_mn_msg_meta_db_newside_t), 859*0Sstevel@tonic-gate &resultp, ep); 860*0Sstevel@tonic-gate if (send_rval != 0) { 861*0Sstevel@tonic-gate rval = -1; 862*0Sstevel@tonic-gate if (resultp == NULL) 863*0Sstevel@tonic-gate (void) mddserror(ep, 864*0Sstevel@tonic-gate MDE_DS_COMMD_SEND_FAIL, 865*0Sstevel@tonic-gate sp->setno, NULL, NULL, 866*0Sstevel@tonic-gate sp->setname); 867*0Sstevel@tonic-gate else { 868*0Sstevel@tonic-gate (void) mdstealerror(ep, 869*0Sstevel@tonic-gate &(resultp->mmr_ep)); 870*0Sstevel@tonic-gate if (mdisok(ep)) { 871*0Sstevel@tonic-gate (void) mddserror(ep, 872*0Sstevel@tonic-gate MDE_DS_COMMD_SEND_FAIL, 873*0Sstevel@tonic-gate sp->setno, NULL, NULL, 874*0Sstevel@tonic-gate sp->setname); 875*0Sstevel@tonic-gate } 876*0Sstevel@tonic-gate free_result(resultp); 877*0Sstevel@tonic-gate } 878*0Sstevel@tonic-gate break; 879*0Sstevel@tonic-gate } 880*0Sstevel@tonic-gate if (resultp) 881*0Sstevel@tonic-gate free_result(resultp); 882*0Sstevel@tonic-gate } else { 883*0Sstevel@tonic-gate /* 884*0Sstevel@tonic-gate * Let this side's device name, minor # and driver name 885*0Sstevel@tonic-gate * be known to the database replica. 886*0Sstevel@tonic-gate */ 887*0Sstevel@tonic-gate (void) memset(&c, 0, sizeof (c)); 888*0Sstevel@tonic-gate 889*0Sstevel@tonic-gate /* Fill in device/replica info */ 890*0Sstevel@tonic-gate c.c_locator.l_dev = meta_cmpldev(np->dev); 891*0Sstevel@tonic-gate c.c_locator.l_blkno = blkno; 892*0Sstevel@tonic-gate (void) strncpy(c.c_locator.l_driver, dname, 893*0Sstevel@tonic-gate sizeof (c.c_locator.l_driver)); 894*0Sstevel@tonic-gate (void) splitname(bname, &c.c_devname); 895*0Sstevel@tonic-gate c.c_locator.l_mnum = mnum; 896*0Sstevel@tonic-gate 897*0Sstevel@tonic-gate /* Fill in setno, setname, and sideno */ 898*0Sstevel@tonic-gate c.c_setno = sp->setno; 899*0Sstevel@tonic-gate (void) strncpy(c.c_setname, sp->setname, 900*0Sstevel@tonic-gate sizeof (c.c_setname)); 901*0Sstevel@tonic-gate c.c_sideno = sideno; 902*0Sstevel@tonic-gate 903*0Sstevel@tonic-gate /* 904*0Sstevel@tonic-gate * Don't need device id information from this ioctl 905*0Sstevel@tonic-gate * Kernel determines device id from dev_t, which 906*0Sstevel@tonic-gate * is just what this code would do. 907*0Sstevel@tonic-gate */ 908*0Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0; 909*0Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 910*0Sstevel@tonic-gate 911*0Sstevel@tonic-gate if (metaioctl(MD_DB_NEWSIDE, &c, &c.c_mde, NULL) != 0) { 912*0Sstevel@tonic-gate rval = mdstealerror(ep, &c.c_mde); 913*0Sstevel@tonic-gate break; 914*0Sstevel@tonic-gate } 915*0Sstevel@tonic-gate } 916*0Sstevel@tonic-gate } 917*0Sstevel@tonic-gate 918*0Sstevel@tonic-gate /* cleanup, return success */ 919*0Sstevel@tonic-gate if (bname != NULL) { 920*0Sstevel@tonic-gate Free(bname); 921*0Sstevel@tonic-gate bname = NULL; 922*0Sstevel@tonic-gate } 923*0Sstevel@tonic-gate if (dname != NULL) { 924*0Sstevel@tonic-gate Free(dname); 925*0Sstevel@tonic-gate dname = NULL; 926*0Sstevel@tonic-gate } 927*0Sstevel@tonic-gate return (rval); 928*0Sstevel@tonic-gate } 929*0Sstevel@tonic-gate 930*0Sstevel@tonic-gate 931*0Sstevel@tonic-gate int 932*0Sstevel@tonic-gate meta_db_delsidenm( 933*0Sstevel@tonic-gate mdsetname_t *sp, 934*0Sstevel@tonic-gate side_t sideno, 935*0Sstevel@tonic-gate mdname_t *np, 936*0Sstevel@tonic-gate daddr_t blkno, 937*0Sstevel@tonic-gate md_error_t *ep 938*0Sstevel@tonic-gate ) 939*0Sstevel@tonic-gate { 940*0Sstevel@tonic-gate mddb_config_t c; 941*0Sstevel@tonic-gate md_set_desc *sd; 942*0Sstevel@tonic-gate 943*0Sstevel@tonic-gate if (! metaislocalset(sp)) { 944*0Sstevel@tonic-gate if ((sd = metaget_setdesc(sp, ep)) == NULL) 945*0Sstevel@tonic-gate return (-1); 946*0Sstevel@tonic-gate } 947*0Sstevel@tonic-gate /* Use rpc.mdcommd to delete mddb side from all nodes */ 948*0Sstevel@tonic-gate if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) && 949*0Sstevel@tonic-gate (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) { 950*0Sstevel@tonic-gate md_mn_result_t *resultp = NULL; 951*0Sstevel@tonic-gate md_mn_msg_meta_db_delside_t db_ds; 952*0Sstevel@tonic-gate int send_rval; 953*0Sstevel@tonic-gate 954*0Sstevel@tonic-gate db_ds.msg_l_dev = np->dev; 955*0Sstevel@tonic-gate db_ds.msg_blkno = blkno; 956*0Sstevel@tonic-gate db_ds.msg_sideno = sideno; 957*0Sstevel@tonic-gate 958*0Sstevel@tonic-gate /* Set devid to NULL until devids are supported */ 959*0Sstevel@tonic-gate db_ds.msg_devid[0] = NULL; 960*0Sstevel@tonic-gate 961*0Sstevel@tonic-gate /* 962*0Sstevel@tonic-gate * If reconfig cycle has been started, this node is 963*0Sstevel@tonic-gate * stuck in in the return step until this command has 964*0Sstevel@tonic-gate * completed. If mdcommd is suspended, ask 965*0Sstevel@tonic-gate * send_message to fail (instead of retrying) 966*0Sstevel@tonic-gate * so that metaset can finish allowing the reconfig 967*0Sstevel@tonic-gate * cycle to proceed. 968*0Sstevel@tonic-gate */ 969*0Sstevel@tonic-gate send_rval = mdmn_send_message(sp->setno, 970*0Sstevel@tonic-gate MD_MN_MSG_META_DB_DELSIDE, MD_MSGF_FAIL_ON_SUSPEND | 971*0Sstevel@tonic-gate MD_MSGF_PANIC_WHEN_INCONSISTENT, (char *)&db_ds, 972*0Sstevel@tonic-gate sizeof (md_mn_msg_meta_db_delside_t), &resultp, ep); 973*0Sstevel@tonic-gate if (send_rval != 0) { 974*0Sstevel@tonic-gate if (resultp == NULL) 975*0Sstevel@tonic-gate (void) mddserror(ep, 976*0Sstevel@tonic-gate MDE_DS_COMMD_SEND_FAIL, 977*0Sstevel@tonic-gate sp->setno, NULL, NULL, 978*0Sstevel@tonic-gate sp->setname); 979*0Sstevel@tonic-gate else { 980*0Sstevel@tonic-gate (void) mdstealerror(ep, &(resultp->mmr_ep)); 981*0Sstevel@tonic-gate if (mdisok(ep)) { 982*0Sstevel@tonic-gate (void) mddserror(ep, 983*0Sstevel@tonic-gate MDE_DS_COMMD_SEND_FAIL, 984*0Sstevel@tonic-gate sp->setno, NULL, NULL, 985*0Sstevel@tonic-gate sp->setname); 986*0Sstevel@tonic-gate } 987*0Sstevel@tonic-gate free_result(resultp); 988*0Sstevel@tonic-gate } 989*0Sstevel@tonic-gate return (-1); 990*0Sstevel@tonic-gate } 991*0Sstevel@tonic-gate if (resultp) 992*0Sstevel@tonic-gate free_result(resultp); 993*0Sstevel@tonic-gate 994*0Sstevel@tonic-gate } else { 995*0Sstevel@tonic-gate /* 996*0Sstevel@tonic-gate * Let this side's device name, minor # and driver name 997*0Sstevel@tonic-gate * be known to the database replica. 998*0Sstevel@tonic-gate */ 999*0Sstevel@tonic-gate (void) memset(&c, 0, sizeof (c)); 1000*0Sstevel@tonic-gate 1001*0Sstevel@tonic-gate /* Fill in device/replica info */ 1002*0Sstevel@tonic-gate c.c_locator.l_dev = meta_cmpldev(np->dev); 1003*0Sstevel@tonic-gate c.c_locator.l_blkno = blkno; 1004*0Sstevel@tonic-gate 1005*0Sstevel@tonic-gate /* Fill in setno, setname, and sideno */ 1006*0Sstevel@tonic-gate c.c_setno = sp->setno; 1007*0Sstevel@tonic-gate (void) strcpy(c.c_setname, sp->setname); 1008*0Sstevel@tonic-gate c.c_sideno = sideno; 1009*0Sstevel@tonic-gate 1010*0Sstevel@tonic-gate /* 1011*0Sstevel@tonic-gate * Don't need device id information from this ioctl 1012*0Sstevel@tonic-gate * Kernel determines device id from dev_t, which 1013*0Sstevel@tonic-gate * is just what this code would do. 1014*0Sstevel@tonic-gate */ 1015*0Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0; 1016*0Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 1017*0Sstevel@tonic-gate 1018*0Sstevel@tonic-gate if (metaioctl(MD_DB_DELSIDE, &c, &c.c_mde, NULL) != 0) 1019*0Sstevel@tonic-gate return (mdstealerror(ep, &c.c_mde)); 1020*0Sstevel@tonic-gate } 1021*0Sstevel@tonic-gate return (0); 1022*0Sstevel@tonic-gate } 1023*0Sstevel@tonic-gate 1024*0Sstevel@tonic-gate 1025*0Sstevel@tonic-gate static int 1026*0Sstevel@tonic-gate mdnamesareunique(mdnamelist_t *nlp, md_error_t *ep) 1027*0Sstevel@tonic-gate { 1028*0Sstevel@tonic-gate mdnamelist_t *dnp1, *dnp2; 1029*0Sstevel@tonic-gate 1030*0Sstevel@tonic-gate for (dnp1 = nlp; dnp1 != NULL; dnp1 = dnp1->next) { 1031*0Sstevel@tonic-gate for (dnp2 = dnp1->next; dnp2 != NULL; dnp2 = dnp2->next) { 1032*0Sstevel@tonic-gate if (strcmp(dnp1->namep->cname, dnp2->namep->cname) == 0) 1033*0Sstevel@tonic-gate return (mderror(ep, MDE_DUPDRIVE, 1034*0Sstevel@tonic-gate dnp1->namep->cname)); 1035*0Sstevel@tonic-gate } 1036*0Sstevel@tonic-gate } 1037*0Sstevel@tonic-gate return (0); 1038*0Sstevel@tonic-gate } 1039*0Sstevel@tonic-gate 1040*0Sstevel@tonic-gate 1041*0Sstevel@tonic-gate /* 1042*0Sstevel@tonic-gate * Return 1 if files are different, else return 0 1043*0Sstevel@tonic-gate */ 1044*0Sstevel@tonic-gate static int 1045*0Sstevel@tonic-gate filediff(char *tsname, char *sname) 1046*0Sstevel@tonic-gate { 1047*0Sstevel@tonic-gate int ret = 1, fd; 1048*0Sstevel@tonic-gate size_t tsz, sz; 1049*0Sstevel@tonic-gate struct stat sbuf; 1050*0Sstevel@tonic-gate char *tbuf, *buf; 1051*0Sstevel@tonic-gate 1052*0Sstevel@tonic-gate if (stat(tsname, &sbuf) != 0) 1053*0Sstevel@tonic-gate return (1); 1054*0Sstevel@tonic-gate tsz = sbuf.st_size; 1055*0Sstevel@tonic-gate if (stat(sname, &sbuf) != 0) 1056*0Sstevel@tonic-gate return (1); 1057*0Sstevel@tonic-gate sz = sbuf.st_size; 1058*0Sstevel@tonic-gate if (tsz != sz) 1059*0Sstevel@tonic-gate return (1); 1060*0Sstevel@tonic-gate 1061*0Sstevel@tonic-gate /* allocate memory and read both files into buffer */ 1062*0Sstevel@tonic-gate tbuf = malloc(tsz); 1063*0Sstevel@tonic-gate buf = malloc(sz); 1064*0Sstevel@tonic-gate if (tbuf == NULL || buf == NULL) 1065*0Sstevel@tonic-gate goto out; 1066*0Sstevel@tonic-gate 1067*0Sstevel@tonic-gate fd = open(tsname, O_RDONLY); 1068*0Sstevel@tonic-gate if (fd == -1) 1069*0Sstevel@tonic-gate goto out; 1070*0Sstevel@tonic-gate sz = read(fd, tbuf, tsz); 1071*0Sstevel@tonic-gate (void) close(fd); 1072*0Sstevel@tonic-gate if (sz != tsz) 1073*0Sstevel@tonic-gate goto out; 1074*0Sstevel@tonic-gate 1075*0Sstevel@tonic-gate fd = open(sname, O_RDONLY); 1076*0Sstevel@tonic-gate if (fd == -1) 1077*0Sstevel@tonic-gate goto out; 1078*0Sstevel@tonic-gate sz = read(fd, buf, tsz); 1079*0Sstevel@tonic-gate (void) close(fd); 1080*0Sstevel@tonic-gate if (sz != tsz) 1081*0Sstevel@tonic-gate goto out; 1082*0Sstevel@tonic-gate 1083*0Sstevel@tonic-gate /* compare content */ 1084*0Sstevel@tonic-gate ret = bcmp(tbuf, buf, tsz); 1085*0Sstevel@tonic-gate out: 1086*0Sstevel@tonic-gate if (tbuf) 1087*0Sstevel@tonic-gate free(tbuf); 1088*0Sstevel@tonic-gate if (buf) 1089*0Sstevel@tonic-gate free(buf); 1090*0Sstevel@tonic-gate return (ret); 1091*0Sstevel@tonic-gate } 1092*0Sstevel@tonic-gate 1093*0Sstevel@tonic-gate /* 1094*0Sstevel@tonic-gate * patch md.conf file with mddb locations 1095*0Sstevel@tonic-gate */ 1096*0Sstevel@tonic-gate int 1097*0Sstevel@tonic-gate meta_db_patch( 1098*0Sstevel@tonic-gate char *sname, /* system file name */ 1099*0Sstevel@tonic-gate char *cname, /* mddb.cf file name */ 1100*0Sstevel@tonic-gate int patch, /* patching locally */ 1101*0Sstevel@tonic-gate md_error_t *ep 1102*0Sstevel@tonic-gate ) 1103*0Sstevel@tonic-gate { 1104*0Sstevel@tonic-gate char *tsname = NULL; 1105*0Sstevel@tonic-gate char line[MDDB_BOOTLIST_MAX_LEN]; 1106*0Sstevel@tonic-gate FILE *tsfp = NULL; 1107*0Sstevel@tonic-gate FILE *mfp = NULL; 1108*0Sstevel@tonic-gate int rval = -1; 1109*0Sstevel@tonic-gate 1110*0Sstevel@tonic-gate /* check names */ 1111*0Sstevel@tonic-gate if (sname == NULL) { 1112*0Sstevel@tonic-gate if (patch) 1113*0Sstevel@tonic-gate sname = "md.conf"; 1114*0Sstevel@tonic-gate else 1115*0Sstevel@tonic-gate sname = "/kernel/drv/md.conf"; 1116*0Sstevel@tonic-gate } 1117*0Sstevel@tonic-gate if (cname == NULL) 1118*0Sstevel@tonic-gate cname = META_DBCONF; 1119*0Sstevel@tonic-gate 1120*0Sstevel@tonic-gate /* 1121*0Sstevel@tonic-gate * edit file 1122*0Sstevel@tonic-gate */ 1123*0Sstevel@tonic-gate if (meta_systemfile_copy(sname, 0, 1, 1, 0, &tsname, &tsfp, ep) != 0) { 1124*0Sstevel@tonic-gate if (mdissyserror(ep, EROFS)) { 1125*0Sstevel@tonic-gate /* 1126*0Sstevel@tonic-gate * If we are booted on a read-only root because 1127*0Sstevel@tonic-gate * of mddb quorum problems we don't want to emit 1128*0Sstevel@tonic-gate * any scary error messages. 1129*0Sstevel@tonic-gate */ 1130*0Sstevel@tonic-gate mdclrerror(ep); 1131*0Sstevel@tonic-gate rval = 0; 1132*0Sstevel@tonic-gate } 1133*0Sstevel@tonic-gate goto out; 1134*0Sstevel@tonic-gate } 1135*0Sstevel@tonic-gate 1136*0Sstevel@tonic-gate if (meta_systemfile_append_mddb(cname, sname, tsname, tsfp, 1, 0, 1137*0Sstevel@tonic-gate ep) != 0) 1138*0Sstevel@tonic-gate goto out; 1139*0Sstevel@tonic-gate 1140*0Sstevel@tonic-gate /* if file content is identical, skip rename */ 1141*0Sstevel@tonic-gate if (filediff(tsname, sname) == 0) { 1142*0Sstevel@tonic-gate rval = 0; 1143*0Sstevel@tonic-gate goto out; 1144*0Sstevel@tonic-gate } 1145*0Sstevel@tonic-gate 1146*0Sstevel@tonic-gate if ((fflush(tsfp) != 0) || (fsync(fileno(tsfp)) != 0) || 1147*0Sstevel@tonic-gate (fclose(tsfp) != 0)) { 1148*0Sstevel@tonic-gate (void) mdsyserror(ep, errno, tsname); 1149*0Sstevel@tonic-gate goto out; 1150*0Sstevel@tonic-gate } 1151*0Sstevel@tonic-gate 1152*0Sstevel@tonic-gate tsfp = NULL; 1153*0Sstevel@tonic-gate 1154*0Sstevel@tonic-gate /* 1155*0Sstevel@tonic-gate * rename file. If we get a Cross Device error then it 1156*0Sstevel@tonic-gate * is because we are in the miniroot. 1157*0Sstevel@tonic-gate */ 1158*0Sstevel@tonic-gate if (rename(tsname, sname) != 0 && errno != EXDEV) { 1159*0Sstevel@tonic-gate (void) mdsyserror(ep, errno, sname); 1160*0Sstevel@tonic-gate goto out; 1161*0Sstevel@tonic-gate } 1162*0Sstevel@tonic-gate 1163*0Sstevel@tonic-gate if (errno == EXDEV) { 1164*0Sstevel@tonic-gate if ((tsfp = fopen(tsname, "r")) == NULL) 1165*0Sstevel@tonic-gate goto out; 1166*0Sstevel@tonic-gate if ((mfp = fopen(sname, "w+")) == NULL) 1167*0Sstevel@tonic-gate goto out; 1168*0Sstevel@tonic-gate while (fgets(line, sizeof (line), tsfp) != NULL) { 1169*0Sstevel@tonic-gate if (fputs(line, mfp) == NULL) 1170*0Sstevel@tonic-gate goto out; 1171*0Sstevel@tonic-gate } 1172*0Sstevel@tonic-gate (void) fclose(tsfp); 1173*0Sstevel@tonic-gate tsfp = NULL; 1174*0Sstevel@tonic-gate if (fflush(mfp) != 0) 1175*0Sstevel@tonic-gate goto out; 1176*0Sstevel@tonic-gate if (fsync(fileno(mfp)) != 0) 1177*0Sstevel@tonic-gate goto out; 1178*0Sstevel@tonic-gate if (fclose(mfp) != 0) { 1179*0Sstevel@tonic-gate mfp = NULL; 1180*0Sstevel@tonic-gate goto out; 1181*0Sstevel@tonic-gate } 1182*0Sstevel@tonic-gate } 1183*0Sstevel@tonic-gate 1184*0Sstevel@tonic-gate Free(tsname); 1185*0Sstevel@tonic-gate tsname = NULL; 1186*0Sstevel@tonic-gate rval = 0; 1187*0Sstevel@tonic-gate 1188*0Sstevel@tonic-gate /* cleanup, return error */ 1189*0Sstevel@tonic-gate out: 1190*0Sstevel@tonic-gate if (tsfp != NULL) 1191*0Sstevel@tonic-gate (void) fclose(tsfp); 1192*0Sstevel@tonic-gate if (tsname != NULL) { 1193*0Sstevel@tonic-gate (void) unlink(tsname); 1194*0Sstevel@tonic-gate Free(tsname); 1195*0Sstevel@tonic-gate } 1196*0Sstevel@tonic-gate return (rval); 1197*0Sstevel@tonic-gate } 1198*0Sstevel@tonic-gate 1199*0Sstevel@tonic-gate /* 1200*0Sstevel@tonic-gate * Add replicas to set. This happens as a result of: 1201*0Sstevel@tonic-gate * - metadb [-s set_name] -a 1202*0Sstevel@tonic-gate * - metaset -s set_name -a disk 1203*0Sstevel@tonic-gate * - metaset -s set_name -d disk (causes a rebalance of mddbs) 1204*0Sstevel@tonic-gate * - metaset -s set_name -b 1205*0Sstevel@tonic-gate * 1206*0Sstevel@tonic-gate * For a local set, this routine is run on the local set host. 1207*0Sstevel@tonic-gate * 1208*0Sstevel@tonic-gate * For a traditional diskset, this routine is run on the node that 1209*0Sstevel@tonic-gate * is running the metaset command. 1210*0Sstevel@tonic-gate * 1211*0Sstevel@tonic-gate * For a multinode diskset, this routine is run by the node that is 1212*0Sstevel@tonic-gate * running the metaset command. If this is the first mddb added to 1213*0Sstevel@tonic-gate * the MN diskset, then no communication is made to other nodes via commd 1214*0Sstevel@tonic-gate * since the other nodes will be in-sync with respect to the mddbs when 1215*0Sstevel@tonic-gate * those other nodes join the set and snarf in the newly created mddb. 1216*0Sstevel@tonic-gate * If this is not the first mddb added to the MN diskset, then this 1217*0Sstevel@tonic-gate * attach command is sent to all of the nodes using commd. This keeps 1218*0Sstevel@tonic-gate * the nodes in-sync. 1219*0Sstevel@tonic-gate */ 1220*0Sstevel@tonic-gate int 1221*0Sstevel@tonic-gate meta_db_attach( 1222*0Sstevel@tonic-gate mdsetname_t *sp, 1223*0Sstevel@tonic-gate mdnamelist_t *db_nlp, 1224*0Sstevel@tonic-gate mdchkopts_t options, 1225*0Sstevel@tonic-gate md_timeval32_t *timeval, 1226*0Sstevel@tonic-gate int dbcnt, 1227*0Sstevel@tonic-gate int dbsize, 1228*0Sstevel@tonic-gate char *sysfilename, 1229*0Sstevel@tonic-gate md_error_t *ep 1230*0Sstevel@tonic-gate ) 1231*0Sstevel@tonic-gate { 1232*0Sstevel@tonic-gate struct mddb_config c; 1233*0Sstevel@tonic-gate mdnamelist_t *nlp; 1234*0Sstevel@tonic-gate mdname_t *np; 1235*0Sstevel@tonic-gate md_drive_desc *dd = NULL; 1236*0Sstevel@tonic-gate md_drive_desc *p; 1237*0Sstevel@tonic-gate int i; 1238*0Sstevel@tonic-gate int fd; 1239*0Sstevel@tonic-gate side_t sideno; 1240*0Sstevel@tonic-gate daddr_t blkno; 1241*0Sstevel@tonic-gate int replicacount = 0; 1242*0Sstevel@tonic-gate int start_mdmonitord = 0; 1243*0Sstevel@tonic-gate int rval = 0; 1244*0Sstevel@tonic-gate md_error_t status = mdnullerror; 1245*0Sstevel@tonic-gate md_set_desc *sd; 1246*0Sstevel@tonic-gate int stale_bool = FALSE; 1247*0Sstevel@tonic-gate int flags; 1248*0Sstevel@tonic-gate int firstmddb = 1; 1249*0Sstevel@tonic-gate md_timeval32_t inittime = {0, 0}; 1250*0Sstevel@tonic-gate 1251*0Sstevel@tonic-gate /* 1252*0Sstevel@tonic-gate * Error if we don't get some work to do. 1253*0Sstevel@tonic-gate */ 1254*0Sstevel@tonic-gate if (db_nlp == NULL) 1255*0Sstevel@tonic-gate return (mdsyserror(ep, EINVAL, NULL)); 1256*0Sstevel@tonic-gate 1257*0Sstevel@tonic-gate if (mdnamesareunique(db_nlp, ep) != 0) 1258*0Sstevel@tonic-gate return (-1); 1259*0Sstevel@tonic-gate (void) memset(&c, 0, sizeof (c)); 1260*0Sstevel@tonic-gate c.c_id = 0; 1261*0Sstevel@tonic-gate c.c_setno = sp->setno; 1262*0Sstevel@tonic-gate 1263*0Sstevel@tonic-gate /* Don't need device id information from this ioctl */ 1264*0Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0; 1265*0Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 1266*0Sstevel@tonic-gate if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) { 1267*0Sstevel@tonic-gate if (metaislocalset(sp)) { 1268*0Sstevel@tonic-gate if (mdismddberror(&c.c_mde, MDE_DB_INVALID)) 1269*0Sstevel@tonic-gate mdclrerror(&c.c_mde); 1270*0Sstevel@tonic-gate else if (! mdismddberror(&c.c_mde, MDE_DB_NODB) || 1271*0Sstevel@tonic-gate (! (options & MDCHK_ALLOW_NODBS))) 1272*0Sstevel@tonic-gate return (mdstealerror(ep, &c.c_mde)); 1273*0Sstevel@tonic-gate } else { 1274*0Sstevel@tonic-gate if (! mdismddberror(&c.c_mde, MDE_DB_NOTOWNER)) 1275*0Sstevel@tonic-gate return (mdstealerror(ep, &c.c_mde)); 1276*0Sstevel@tonic-gate } 1277*0Sstevel@tonic-gate mdclrerror(&c.c_mde); 1278*0Sstevel@tonic-gate } 1279*0Sstevel@tonic-gate /* 1280*0Sstevel@tonic-gate * Is current set STALE? 1281*0Sstevel@tonic-gate */ 1282*0Sstevel@tonic-gate if (c.c_flags & MDDB_C_STALE) { 1283*0Sstevel@tonic-gate stale_bool = TRUE; 1284*0Sstevel@tonic-gate } 1285*0Sstevel@tonic-gate 1286*0Sstevel@tonic-gate assert(db_nlp != NULL); 1287*0Sstevel@tonic-gate 1288*0Sstevel@tonic-gate /* if creating the metadbs for the first time start mdmonitord */ 1289*0Sstevel@tonic-gate if (c.c_dbcnt == 0) 1290*0Sstevel@tonic-gate start_mdmonitord = 1; 1291*0Sstevel@tonic-gate 1292*0Sstevel@tonic-gate /* 1293*0Sstevel@tonic-gate * check to see if we will go over the total possible number 1294*0Sstevel@tonic-gate * of data bases 1295*0Sstevel@tonic-gate */ 1296*0Sstevel@tonic-gate nlp = db_nlp; 1297*0Sstevel@tonic-gate while (nlp) { 1298*0Sstevel@tonic-gate replicacount += dbcnt; 1299*0Sstevel@tonic-gate nlp = nlp->next; 1300*0Sstevel@tonic-gate } 1301*0Sstevel@tonic-gate 1302*0Sstevel@tonic-gate if ((replicacount + c.c_dbcnt) > c.c_dbmax) 1303*0Sstevel@tonic-gate return (mdmddberror(ep, MDE_TOOMANY_REPLICAS, NODEV32, 1304*0Sstevel@tonic-gate sp->setno, c.c_dbcnt + replicacount, NULL)); 1305*0Sstevel@tonic-gate 1306*0Sstevel@tonic-gate /* 1307*0Sstevel@tonic-gate * go through and check to make sure all locations specified 1308*0Sstevel@tonic-gate * are legal also pick out driver name; 1309*0Sstevel@tonic-gate */ 1310*0Sstevel@tonic-gate for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) { 1311*0Sstevel@tonic-gate diskaddr_t devsize; 1312*0Sstevel@tonic-gate 1313*0Sstevel@tonic-gate np = nlp->namep; 1314*0Sstevel@tonic-gate 1315*0Sstevel@tonic-gate if (! metaislocalset(sp)) { 1316*0Sstevel@tonic-gate uint_t partno; 1317*0Sstevel@tonic-gate uint_t rep_partno; 1318*0Sstevel@tonic-gate mddrivename_t *dnp = np->drivenamep; 1319*0Sstevel@tonic-gate 1320*0Sstevel@tonic-gate /* 1321*0Sstevel@tonic-gate * make sure that non-local database replicas 1322*0Sstevel@tonic-gate * are always on the replica slice. 1323*0Sstevel@tonic-gate */ 1324*0Sstevel@tonic-gate if (meta_replicaslice(dnp, 1325*0Sstevel@tonic-gate &rep_partno, ep) != 0) 1326*0Sstevel@tonic-gate return (-1); 1327*0Sstevel@tonic-gate if (metagetvtoc(np, FALSE, &partno, ep) == NULL) 1328*0Sstevel@tonic-gate return (-1); 1329*0Sstevel@tonic-gate if (partno != rep_partno) 1330*0Sstevel@tonic-gate return (mddeverror(ep, MDE_REPCOMP_ONLY, 1331*0Sstevel@tonic-gate np->dev, sp->setname)); 1332*0Sstevel@tonic-gate } 1333*0Sstevel@tonic-gate 1334*0Sstevel@tonic-gate if (meta_check_replica(sp, np, options, 0, (dbcnt * dbsize), 1335*0Sstevel@tonic-gate ep)) { 1336*0Sstevel@tonic-gate return (-1); 1337*0Sstevel@tonic-gate } 1338*0Sstevel@tonic-gate 1339*0Sstevel@tonic-gate if ((devsize = metagetsize(np, ep)) == -1) 1340*0Sstevel@tonic-gate return (-1); 1341*0Sstevel@tonic-gate 1342*0Sstevel@tonic-gate if (devsize < (diskaddr_t)((dbcnt * dbsize) + 16)) 1343*0Sstevel@tonic-gate return (mdmddberror(ep, MDE_REPLICA_TOOSMALL, 1344*0Sstevel@tonic-gate meta_getminor(np->dev), sp->setno, devsize, 1345*0Sstevel@tonic-gate np->cname)); 1346*0Sstevel@tonic-gate } 1347*0Sstevel@tonic-gate 1348*0Sstevel@tonic-gate /* 1349*0Sstevel@tonic-gate * If first disk in set we don't have lb_inittime yet for use as 1350*0Sstevel@tonic-gate * mb_setcreatetime so don't go looking for it. WE'll come back 1351*0Sstevel@tonic-gate * later and update after the locator block has been created. 1352*0Sstevel@tonic-gate * If this isn't the first disk in the set, we have a locator 1353*0Sstevel@tonic-gate * block and thus we have lb_inittime. Set mb_setcreatetime to 1354*0Sstevel@tonic-gate * lb_inittime. 1355*0Sstevel@tonic-gate */ 1356*0Sstevel@tonic-gate if (! metaislocalset(sp)) { 1357*0Sstevel@tonic-gate if (c.c_dbcnt != 0) { 1358*0Sstevel@tonic-gate firstmddb = 0; 1359*0Sstevel@tonic-gate inittime = meta_get_lb_inittime(sp, ep); 1360*0Sstevel@tonic-gate } 1361*0Sstevel@tonic-gate } 1362*0Sstevel@tonic-gate 1363*0Sstevel@tonic-gate /* 1364*0Sstevel@tonic-gate * go through and write all master blocks 1365*0Sstevel@tonic-gate */ 1366*0Sstevel@tonic-gate 1367*0Sstevel@tonic-gate for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) { 1368*0Sstevel@tonic-gate np = nlp->namep; 1369*0Sstevel@tonic-gate 1370*0Sstevel@tonic-gate if ((fd = open(np->rname, O_RDWR)) < 0) 1371*0Sstevel@tonic-gate return (mdsyserror(ep, errno, np->rname)); 1372*0Sstevel@tonic-gate 1373*0Sstevel@tonic-gate for (i = 0; i < dbcnt; i++) { 1374*0Sstevel@tonic-gate if (mkmasterblks(sp, np, fd, (i * dbsize + 16), dbsize, 1375*0Sstevel@tonic-gate inittime, ep)) { 1376*0Sstevel@tonic-gate (void) close(fd); 1377*0Sstevel@tonic-gate return (-1); 1378*0Sstevel@tonic-gate } 1379*0Sstevel@tonic-gate } 1380*0Sstevel@tonic-gate (void) close(fd); 1381*0Sstevel@tonic-gate } 1382*0Sstevel@tonic-gate 1383*0Sstevel@tonic-gate if ((sideno = getmyside(sp, ep)) == MD_SIDEWILD) 1384*0Sstevel@tonic-gate return (-1); 1385*0Sstevel@tonic-gate 1386*0Sstevel@tonic-gate if (! metaislocalset(sp)) { 1387*0Sstevel@tonic-gate dd = metaget_drivedesc_fromnamelist(sp, db_nlp, ep); 1388*0Sstevel@tonic-gate if (! mdisok(ep)) 1389*0Sstevel@tonic-gate return (-1); 1390*0Sstevel@tonic-gate if ((sd = metaget_setdesc(sp, ep)) == NULL) 1391*0Sstevel@tonic-gate return (-1); 1392*0Sstevel@tonic-gate 1393*0Sstevel@tonic-gate } 1394*0Sstevel@tonic-gate 1395*0Sstevel@tonic-gate /* 1396*0Sstevel@tonic-gate * go through and tell kernel to add them 1397*0Sstevel@tonic-gate */ 1398*0Sstevel@tonic-gate for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) { 1399*0Sstevel@tonic-gate mdcinfo_t *cinfo; 1400*0Sstevel@tonic-gate 1401*0Sstevel@tonic-gate np = nlp->namep; 1402*0Sstevel@tonic-gate 1403*0Sstevel@tonic-gate if ((cinfo = metagetcinfo(np, ep)) == NULL) { 1404*0Sstevel@tonic-gate rval = -1; 1405*0Sstevel@tonic-gate goto out; 1406*0Sstevel@tonic-gate } 1407*0Sstevel@tonic-gate 1408*0Sstevel@tonic-gate /* 1409*0Sstevel@tonic-gate * If mddb is being added to MN diskset and there already 1410*0Sstevel@tonic-gate * exists a valid mddb in the set (which equates to this 1411*0Sstevel@tonic-gate * node being an owner of the set) then use rpc.mdcommd 1412*0Sstevel@tonic-gate * mechanism to add mddb(s) so that all nodes stay in sync. 1413*0Sstevel@tonic-gate * If set is stale, don't log the message since rpc.mdcommd 1414*0Sstevel@tonic-gate * can't write the message to the mddb. 1415*0Sstevel@tonic-gate * 1416*0Sstevel@tonic-gate * Otherwise, just add mddb to this node. 1417*0Sstevel@tonic-gate */ 1418*0Sstevel@tonic-gate if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) && 1419*0Sstevel@tonic-gate (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) { 1420*0Sstevel@tonic-gate md_mn_result_t *resultp = NULL; 1421*0Sstevel@tonic-gate md_mn_msg_meta_db_attach_t attach; 1422*0Sstevel@tonic-gate int send_rval; 1423*0Sstevel@tonic-gate 1424*0Sstevel@tonic-gate /* 1425*0Sstevel@tonic-gate * In a scenario where new replicas had been added on 1426*0Sstevel@tonic-gate * the master, and then all of the old replicas failed 1427*0Sstevel@tonic-gate * before the slaves had knowledge of the new replicas, 1428*0Sstevel@tonic-gate * the slaves are unable to re-parse in the mddb 1429*0Sstevel@tonic-gate * from the new replicas since the slaves have no 1430*0Sstevel@tonic-gate * knowledge of the new replicas. The following 1431*0Sstevel@tonic-gate * algorithm solves this problem: 1432*0Sstevel@tonic-gate * - META_DB_ATTACH message generates submsgs 1433*0Sstevel@tonic-gate * - BLOCK parse (master) 1434*0Sstevel@tonic-gate * - MDDB_ATTACH new replicas 1435*0Sstevel@tonic-gate * - UNBLOCK parse (master) causing parse 1436*0Sstevel@tonic-gate * information to be sent from master 1437*0Sstevel@tonic-gate * to slaves at a higher class than the 1438*0Sstevel@tonic-gate * unblock so the parse message will 1439*0Sstevel@tonic-gate * reach slaves before unblock message. 1440*0Sstevel@tonic-gate */ 1441*0Sstevel@tonic-gate attach.msg_l_dev = np->dev; 1442*0Sstevel@tonic-gate attach.msg_cnt = dbcnt; 1443*0Sstevel@tonic-gate attach.msg_dbsize = dbsize; 1444*0Sstevel@tonic-gate (void) strncpy(attach.msg_dname, cinfo->dname, 1445*0Sstevel@tonic-gate sizeof (attach.msg_dname)); 1446*0Sstevel@tonic-gate (void) splitname(np->bname, &attach.msg_splitname); 1447*0Sstevel@tonic-gate attach.msg_options = options; 1448*0Sstevel@tonic-gate 1449*0Sstevel@tonic-gate /* Set devid to NULL until devids are supported */ 1450*0Sstevel@tonic-gate attach.msg_devid[0] = NULL; 1451*0Sstevel@tonic-gate 1452*0Sstevel@tonic-gate /* 1453*0Sstevel@tonic-gate * If reconfig cycle has been started, this node is 1454*0Sstevel@tonic-gate * stuck in in the return step until this command has 1455*0Sstevel@tonic-gate * completed. If mdcommd is suspended, ask 1456*0Sstevel@tonic-gate * send_message to fail (instead of retrying) 1457*0Sstevel@tonic-gate * so that metaset can finish allowing the reconfig 1458*0Sstevel@tonic-gate * cycle to proceed. 1459*0Sstevel@tonic-gate */ 1460*0Sstevel@tonic-gate flags = MD_MSGF_FAIL_ON_SUSPEND; 1461*0Sstevel@tonic-gate if (stale_bool == TRUE) 1462*0Sstevel@tonic-gate flags |= MD_MSGF_NO_LOG; 1463*0Sstevel@tonic-gate send_rval = mdmn_send_message(sp->setno, 1464*0Sstevel@tonic-gate MD_MN_MSG_META_DB_ATTACH, 1465*0Sstevel@tonic-gate flags, (char *)&attach, 1466*0Sstevel@tonic-gate sizeof (md_mn_msg_meta_db_attach_t), 1467*0Sstevel@tonic-gate &resultp, ep); 1468*0Sstevel@tonic-gate if (send_rval != 0) { 1469*0Sstevel@tonic-gate rval = -1; 1470*0Sstevel@tonic-gate if (resultp == NULL) 1471*0Sstevel@tonic-gate (void) mddserror(ep, 1472*0Sstevel@tonic-gate MDE_DS_COMMD_SEND_FAIL, 1473*0Sstevel@tonic-gate sp->setno, NULL, NULL, 1474*0Sstevel@tonic-gate sp->setname); 1475*0Sstevel@tonic-gate else { 1476*0Sstevel@tonic-gate (void) mdstealerror(ep, 1477*0Sstevel@tonic-gate &(resultp->mmr_ep)); 1478*0Sstevel@tonic-gate if (mdisok(ep)) { 1479*0Sstevel@tonic-gate (void) mddserror(ep, 1480*0Sstevel@tonic-gate MDE_DS_COMMD_SEND_FAIL, 1481*0Sstevel@tonic-gate sp->setno, NULL, NULL, 1482*0Sstevel@tonic-gate sp->setname); 1483*0Sstevel@tonic-gate } 1484*0Sstevel@tonic-gate free_result(resultp); 1485*0Sstevel@tonic-gate } 1486*0Sstevel@tonic-gate goto out; 1487*0Sstevel@tonic-gate } 1488*0Sstevel@tonic-gate if (resultp) 1489*0Sstevel@tonic-gate free_result(resultp); 1490*0Sstevel@tonic-gate } else { 1491*0Sstevel@tonic-gate /* Adding mddb(s) to just this node */ 1492*0Sstevel@tonic-gate for (i = 0; i < dbcnt; i++) { 1493*0Sstevel@tonic-gate (void) memset(&c, 0, sizeof (c)); 1494*0Sstevel@tonic-gate /* Fill in device/replica info */ 1495*0Sstevel@tonic-gate c.c_locator.l_dev = meta_cmpldev(np->dev); 1496*0Sstevel@tonic-gate c.c_locator.l_blkno = i * dbsize + 16; 1497*0Sstevel@tonic-gate blkno = c.c_locator.l_blkno; 1498*0Sstevel@tonic-gate (void) strncpy(c.c_locator.l_driver, cinfo->dname, 1499*0Sstevel@tonic-gate sizeof (c.c_locator.l_driver)); 1500*0Sstevel@tonic-gate (void) splitname(np->bname, &c.c_devname); 1501*0Sstevel@tonic-gate c.c_locator.l_mnum = meta_getminor(np->dev); 1502*0Sstevel@tonic-gate 1503*0Sstevel@tonic-gate /* Fill in setno, setname, and sideno */ 1504*0Sstevel@tonic-gate c.c_setno = sp->setno; 1505*0Sstevel@tonic-gate if (! metaislocalset(sp)) { 1506*0Sstevel@tonic-gate if (MD_MNSET_DESC(sd)) { 1507*0Sstevel@tonic-gate c.c_multi_node = 1; 1508*0Sstevel@tonic-gate } 1509*0Sstevel@tonic-gate } 1510*0Sstevel@tonic-gate (void) strcpy(c.c_setname, sp->setname); 1511*0Sstevel@tonic-gate c.c_sideno = sideno; 1512*0Sstevel@tonic-gate 1513*0Sstevel@tonic-gate /* 1514*0Sstevel@tonic-gate * Don't need device id information from this ioctl 1515*0Sstevel@tonic-gate * Kernel determines device id from dev_t, which 1516*0Sstevel@tonic-gate * is just what this code would do. 1517*0Sstevel@tonic-gate */ 1518*0Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0; 1519*0Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 1520*0Sstevel@tonic-gate 1521*0Sstevel@tonic-gate if (timeval != NULL) 1522*0Sstevel@tonic-gate c.c_timestamp = *timeval; 1523*0Sstevel@tonic-gate 1524*0Sstevel@tonic-gate if (setup_med_cfg(sp, &c, (options & MDCHK_SET_FORCE), 1525*0Sstevel@tonic-gate ep)) { 1526*0Sstevel@tonic-gate rval = -1; 1527*0Sstevel@tonic-gate goto out; 1528*0Sstevel@tonic-gate } 1529*0Sstevel@tonic-gate 1530*0Sstevel@tonic-gate if (metaioctl(MD_DB_NEWDEV, &c, &c.c_mde, NULL) != 0) { 1531*0Sstevel@tonic-gate rval = mdstealerror(ep, &c.c_mde); 1532*0Sstevel@tonic-gate goto out; 1533*0Sstevel@tonic-gate } 1534*0Sstevel@tonic-gate /* 1535*0Sstevel@tonic-gate * This is either a traditional diskset OR this 1536*0Sstevel@tonic-gate * is the first replica added to a MN diskset. 1537*0Sstevel@tonic-gate * In either case, set broadcast to NO_BCAST so 1538*0Sstevel@tonic-gate * that message won't go through rpc.mdcommd. 1539*0Sstevel@tonic-gate * If this is a traditional diskset, the bcast 1540*0Sstevel@tonic-gate * flag is ignored since traditional disksets 1541*0Sstevel@tonic-gate * don't use the rpc.mdcommd. 1542*0Sstevel@tonic-gate */ 1543*0Sstevel@tonic-gate if (meta_db_addsidenms(sp, np, blkno, 1544*0Sstevel@tonic-gate DB_ADDSIDENMS_NO_BCAST, ep)) 1545*0Sstevel@tonic-gate goto out; 1546*0Sstevel@tonic-gate } 1547*0Sstevel@tonic-gate } 1548*0Sstevel@tonic-gate if (! metaislocalset(sp)) { 1549*0Sstevel@tonic-gate /* update the dbcnt and size in dd */ 1550*0Sstevel@tonic-gate for (p = dd; p != NULL; p = p->dd_next) 1551*0Sstevel@tonic-gate if (p->dd_dnp == np->drivenamep) { 1552*0Sstevel@tonic-gate p->dd_dbcnt = dbcnt; 1553*0Sstevel@tonic-gate p->dd_dbsize = dbsize; 1554*0Sstevel@tonic-gate break; 1555*0Sstevel@tonic-gate } 1556*0Sstevel@tonic-gate } 1557*0Sstevel@tonic-gate 1558*0Sstevel@tonic-gate /* 1559*0Sstevel@tonic-gate * If this was the first addition of disks to the 1560*0Sstevel@tonic-gate * diskset you now need to update the mb_setcreatetime 1561*0Sstevel@tonic-gate * which needed lb_inittime which wasn't there until now. 1562*0Sstevel@tonic-gate */ 1563*0Sstevel@tonic-gate if (firstmddb) { 1564*0Sstevel@tonic-gate if (meta_update_mb(sp, dd, ep) != 0) { 1565*0Sstevel@tonic-gate return (-1); 1566*0Sstevel@tonic-gate } 1567*0Sstevel@tonic-gate } 1568*0Sstevel@tonic-gate (void) close(fd); 1569*0Sstevel@tonic-gate } 1570*0Sstevel@tonic-gate 1571*0Sstevel@tonic-gate out: 1572*0Sstevel@tonic-gate if (metaislocalset(sp)) { 1573*0Sstevel@tonic-gate 1574*0Sstevel@tonic-gate /* everything looks fine. Start mdmonitord */ 1575*0Sstevel@tonic-gate /* Note: popen/pclose is the MT-safe replacement for system */ 1576*0Sstevel@tonic-gate if (rval == 0 && start_mdmonitord == 1) { 1577*0Sstevel@tonic-gate if (pclose(popen(MDMONITORD, "w")) == -1) 1578*0Sstevel@tonic-gate md_perror(MDMONITORD); 1579*0Sstevel@tonic-gate 1580*0Sstevel@tonic-gate if (meta_smf_enable(META_SMF_CORE, &status) == -1) { 1581*0Sstevel@tonic-gate mde_perror(&status, ""); 1582*0Sstevel@tonic-gate mdclrerror(&status); 1583*0Sstevel@tonic-gate } 1584*0Sstevel@tonic-gate } 1585*0Sstevel@tonic-gate 1586*0Sstevel@tonic-gate if (buildconf(sp, &status)) { 1587*0Sstevel@tonic-gate /* Don't mask any previous errors */ 1588*0Sstevel@tonic-gate if (rval == 0) 1589*0Sstevel@tonic-gate rval = mdstealerror(ep, &status); 1590*0Sstevel@tonic-gate return (rval); 1591*0Sstevel@tonic-gate } 1592*0Sstevel@tonic-gate 1593*0Sstevel@tonic-gate if (meta_db_patch(sysfilename, NULL, 0, &status)) { 1594*0Sstevel@tonic-gate /* Don't mask any previous errors */ 1595*0Sstevel@tonic-gate if (rval == 0) 1596*0Sstevel@tonic-gate rval = mdstealerror(ep, &status); 1597*0Sstevel@tonic-gate } 1598*0Sstevel@tonic-gate } else { 1599*0Sstevel@tonic-gate if (update_dbinfo_on_drives(sp, dd, 1600*0Sstevel@tonic-gate (options & MDCHK_SET_LOCKED), 1601*0Sstevel@tonic-gate (options & MDCHK_SET_FORCE), 1602*0Sstevel@tonic-gate &status)) { 1603*0Sstevel@tonic-gate /* Don't mask any previous errors */ 1604*0Sstevel@tonic-gate if (rval == 0) 1605*0Sstevel@tonic-gate rval = mdstealerror(ep, &status); 1606*0Sstevel@tonic-gate else 1607*0Sstevel@tonic-gate mdclrerror(&status); 1608*0Sstevel@tonic-gate } 1609*0Sstevel@tonic-gate metafreedrivedesc(&dd); 1610*0Sstevel@tonic-gate } 1611*0Sstevel@tonic-gate /* 1612*0Sstevel@tonic-gate * For MN disksets that already had already had nodes joined 1613*0Sstevel@tonic-gate * before the attach of this mddb(s), the name invalidation is 1614*0Sstevel@tonic-gate * done by the commd handler routine. Otherwise, if this 1615*0Sstevel@tonic-gate * is the first attach of a MN diskset mddb, the invalidation 1616*0Sstevel@tonic-gate * must be done here since the first attach cannot be sent 1617*0Sstevel@tonic-gate * via the commd since there are no nodes joined to the set yet. 1618*0Sstevel@tonic-gate */ 1619*0Sstevel@tonic-gate if ((metaislocalset(sp)) || (!MD_MNSET_DESC(sd)) || 1620*0Sstevel@tonic-gate (MD_MNSET_DESC(sd) && 1621*0Sstevel@tonic-gate (!(sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)))) { 1622*0Sstevel@tonic-gate for (nlp = db_nlp; (nlp != NULL); nlp = nlp->next) { 1623*0Sstevel@tonic-gate meta_invalidate_name(nlp->namep); 1624*0Sstevel@tonic-gate } 1625*0Sstevel@tonic-gate } 1626*0Sstevel@tonic-gate return (rval); 1627*0Sstevel@tonic-gate } 1628*0Sstevel@tonic-gate 1629*0Sstevel@tonic-gate /* 1630*0Sstevel@tonic-gate * deletelist_length 1631*0Sstevel@tonic-gate * 1632*0Sstevel@tonic-gate * return the number of slices that have been specified for deletion 1633*0Sstevel@tonic-gate * on the metadb command line. This does not calculate the number 1634*0Sstevel@tonic-gate * of replicas because there may be multiple replicas per slice. 1635*0Sstevel@tonic-gate */ 1636*0Sstevel@tonic-gate static int 1637*0Sstevel@tonic-gate deletelist_length(mdnamelist_t *db_nlp) 1638*0Sstevel@tonic-gate { 1639*0Sstevel@tonic-gate 1640*0Sstevel@tonic-gate mdnamelist_t *nlp; 1641*0Sstevel@tonic-gate int list_length = 0; 1642*0Sstevel@tonic-gate 1643*0Sstevel@tonic-gate for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) { 1644*0Sstevel@tonic-gate list_length++; 1645*0Sstevel@tonic-gate } 1646*0Sstevel@tonic-gate 1647*0Sstevel@tonic-gate return (list_length); 1648*0Sstevel@tonic-gate } 1649*0Sstevel@tonic-gate 1650*0Sstevel@tonic-gate static int 1651*0Sstevel@tonic-gate in_deletelist(char *devname, mdnamelist_t *db_nlp) 1652*0Sstevel@tonic-gate { 1653*0Sstevel@tonic-gate 1654*0Sstevel@tonic-gate mdnamelist_t *nlp; 1655*0Sstevel@tonic-gate mdname_t *np; 1656*0Sstevel@tonic-gate int index = 0; 1657*0Sstevel@tonic-gate 1658*0Sstevel@tonic-gate for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) { 1659*0Sstevel@tonic-gate np = nlp->namep; 1660*0Sstevel@tonic-gate 1661*0Sstevel@tonic-gate if (strcmp(devname, np->bname) == 0) 1662*0Sstevel@tonic-gate return (index); 1663*0Sstevel@tonic-gate index++; 1664*0Sstevel@tonic-gate } 1665*0Sstevel@tonic-gate 1666*0Sstevel@tonic-gate return (-1); 1667*0Sstevel@tonic-gate } 1668*0Sstevel@tonic-gate 1669*0Sstevel@tonic-gate /* 1670*0Sstevel@tonic-gate * Delete replicas from set. This happens as a result of: 1671*0Sstevel@tonic-gate * - metadb [-s set_name] -d 1672*0Sstevel@tonic-gate * - metaset -s set_name -a disk (causes a rebalance of mddbs) 1673*0Sstevel@tonic-gate * - metaset -s set_name -d disk 1674*0Sstevel@tonic-gate * - metaset -s set_name -b 1675*0Sstevel@tonic-gate * 1676*0Sstevel@tonic-gate * For a local set, this routine is run on the local set host. 1677*0Sstevel@tonic-gate * 1678*0Sstevel@tonic-gate * For a traditional diskset, this routine is run on the node that 1679*0Sstevel@tonic-gate * is running the metaset command. 1680*0Sstevel@tonic-gate * 1681*0Sstevel@tonic-gate * For a multinode diskset, this routine is run by the node that is 1682*0Sstevel@tonic-gate * running the metaset command. This detach routine is sent to all 1683*0Sstevel@tonic-gate * of the joined nodes in the diskset using commd. This keeps 1684*0Sstevel@tonic-gate * the nodes in-sync. 1685*0Sstevel@tonic-gate */ 1686*0Sstevel@tonic-gate int 1687*0Sstevel@tonic-gate meta_db_detach( 1688*0Sstevel@tonic-gate mdsetname_t *sp, 1689*0Sstevel@tonic-gate mdnamelist_t *db_nlp, 1690*0Sstevel@tonic-gate mdforceopts_t force_option, 1691*0Sstevel@tonic-gate char *sysfilename, 1692*0Sstevel@tonic-gate md_error_t *ep 1693*0Sstevel@tonic-gate ) 1694*0Sstevel@tonic-gate { 1695*0Sstevel@tonic-gate struct mddb_config c; 1696*0Sstevel@tonic-gate mdnamelist_t *nlp; 1697*0Sstevel@tonic-gate mdname_t *np; 1698*0Sstevel@tonic-gate md_drive_desc *dd = NULL; 1699*0Sstevel@tonic-gate md_drive_desc *p; 1700*0Sstevel@tonic-gate int replicacount; 1701*0Sstevel@tonic-gate int replica_delete_count; 1702*0Sstevel@tonic-gate int nr_replica_slices; 1703*0Sstevel@tonic-gate int i; 1704*0Sstevel@tonic-gate int stop_svmdaemons = 0; 1705*0Sstevel@tonic-gate int rval = 0; 1706*0Sstevel@tonic-gate int index; 1707*0Sstevel@tonic-gate int valid_replicas_nottodelete = 0; 1708*0Sstevel@tonic-gate int invalid_replicas_nottodelete = 0; 1709*0Sstevel@tonic-gate int invalid_replicas_todelete = 0; 1710*0Sstevel@tonic-gate int errored = 0; 1711*0Sstevel@tonic-gate int *tag_array; 1712*0Sstevel@tonic-gate int fd = -1; 1713*0Sstevel@tonic-gate md_error_t status = mdnullerror; 1714*0Sstevel@tonic-gate md_set_desc *sd; 1715*0Sstevel@tonic-gate int stale_bool = FALSE; 1716*0Sstevel@tonic-gate int flags; 1717*0Sstevel@tonic-gate 1718*0Sstevel@tonic-gate /* 1719*0Sstevel@tonic-gate * Error if we don't get some work to do. 1720*0Sstevel@tonic-gate */ 1721*0Sstevel@tonic-gate if (db_nlp == NULL) 1722*0Sstevel@tonic-gate return (mdsyserror(ep, EINVAL, NULL)); 1723*0Sstevel@tonic-gate 1724*0Sstevel@tonic-gate if (mdnamesareunique(db_nlp, ep) != 0) 1725*0Sstevel@tonic-gate return (-1); 1726*0Sstevel@tonic-gate 1727*0Sstevel@tonic-gate (void) memset(&c, 0, sizeof (c)); 1728*0Sstevel@tonic-gate c.c_id = 0; 1729*0Sstevel@tonic-gate c.c_setno = sp->setno; 1730*0Sstevel@tonic-gate 1731*0Sstevel@tonic-gate /* Don't need device id information from this ioctl */ 1732*0Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0; 1733*0Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 1734*0Sstevel@tonic-gate 1735*0Sstevel@tonic-gate if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) 1736*0Sstevel@tonic-gate return (mdstealerror(ep, &c.c_mde)); 1737*0Sstevel@tonic-gate 1738*0Sstevel@tonic-gate /* 1739*0Sstevel@tonic-gate * Is current set STALE? 1740*0Sstevel@tonic-gate */ 1741*0Sstevel@tonic-gate if (c.c_flags & MDDB_C_STALE) { 1742*0Sstevel@tonic-gate stale_bool = TRUE; 1743*0Sstevel@tonic-gate } 1744*0Sstevel@tonic-gate 1745*0Sstevel@tonic-gate replicacount = c.c_dbcnt; 1746*0Sstevel@tonic-gate 1747*0Sstevel@tonic-gate assert(db_nlp != NULL); 1748*0Sstevel@tonic-gate 1749*0Sstevel@tonic-gate /* 1750*0Sstevel@tonic-gate * go through and gather how many data bases are on each 1751*0Sstevel@tonic-gate * device specified. 1752*0Sstevel@tonic-gate */ 1753*0Sstevel@tonic-gate 1754*0Sstevel@tonic-gate nr_replica_slices = deletelist_length(db_nlp); 1755*0Sstevel@tonic-gate tag_array = (int *)calloc(nr_replica_slices, sizeof (int)); 1756*0Sstevel@tonic-gate 1757*0Sstevel@tonic-gate replica_delete_count = 0; 1758*0Sstevel@tonic-gate for (i = 0; i < replicacount; i++) { 1759*0Sstevel@tonic-gate char *devname; 1760*0Sstevel@tonic-gate int found = 0; 1761*0Sstevel@tonic-gate 1762*0Sstevel@tonic-gate c.c_id = i; 1763*0Sstevel@tonic-gate 1764*0Sstevel@tonic-gate /* Don't need device id information from this ioctl */ 1765*0Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0; 1766*0Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 1767*0Sstevel@tonic-gate 1768*0Sstevel@tonic-gate if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) 1769*0Sstevel@tonic-gate return (mdstealerror(ep, &c.c_mde)); 1770*0Sstevel@tonic-gate 1771*0Sstevel@tonic-gate devname = splicename(&c.c_devname); 1772*0Sstevel@tonic-gate 1773*0Sstevel@tonic-gate if ((index = in_deletelist(devname, db_nlp)) != -1) { 1774*0Sstevel@tonic-gate found = 1; 1775*0Sstevel@tonic-gate tag_array[index] = 1; 1776*0Sstevel@tonic-gate replica_delete_count++; 1777*0Sstevel@tonic-gate } 1778*0Sstevel@tonic-gate 1779*0Sstevel@tonic-gate errored = c.c_locator.l_flags & (MDDB_F_EREAD | 1780*0Sstevel@tonic-gate MDDB_F_EWRITE | MDDB_F_TOOSMALL | 1781*0Sstevel@tonic-gate MDDB_F_EFMT | MDDB_F_EDATA | 1782*0Sstevel@tonic-gate MDDB_F_EMASTER); 1783*0Sstevel@tonic-gate 1784*0Sstevel@tonic-gate /* 1785*0Sstevel@tonic-gate * There are four combinations of "errored" and "found" 1786*0Sstevel@tonic-gate * and they are used to find the number of 1787*0Sstevel@tonic-gate * (a) valid/invalid replicas that are not in the delete 1788*0Sstevel@tonic-gate * list and are available in the system. 1789*0Sstevel@tonic-gate * (b) valid/invalid replicas that are to be deleted. 1790*0Sstevel@tonic-gate */ 1791*0Sstevel@tonic-gate 1792*0Sstevel@tonic-gate if (errored && !found) /* errored and !found */ 1793*0Sstevel@tonic-gate invalid_replicas_nottodelete++; 1794*0Sstevel@tonic-gate else if (!found) /* !errored and !found */ 1795*0Sstevel@tonic-gate valid_replicas_nottodelete++; 1796*0Sstevel@tonic-gate else if (errored) /* errored and found */ 1797*0Sstevel@tonic-gate invalid_replicas_todelete++; 1798*0Sstevel@tonic-gate /* 1799*0Sstevel@tonic-gate * else it is !errored and found. This means 1800*0Sstevel@tonic-gate * valid_replicas_todelete++; But this variable will not 1801*0Sstevel@tonic-gate * be used anywhere 1802*0Sstevel@tonic-gate */ 1803*0Sstevel@tonic-gate 1804*0Sstevel@tonic-gate Free(devname); 1805*0Sstevel@tonic-gate } 1806*0Sstevel@tonic-gate 1807*0Sstevel@tonic-gate index = 0; 1808*0Sstevel@tonic-gate for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) { 1809*0Sstevel@tonic-gate np = nlp->namep; 1810*0Sstevel@tonic-gate if (tag_array[index++] != 1) { 1811*0Sstevel@tonic-gate Free(tag_array); 1812*0Sstevel@tonic-gate return (mddeverror(ep, MDE_NO_DB, np->dev, np->cname)); 1813*0Sstevel@tonic-gate } 1814*0Sstevel@tonic-gate } 1815*0Sstevel@tonic-gate 1816*0Sstevel@tonic-gate Free(tag_array); 1817*0Sstevel@tonic-gate 1818*0Sstevel@tonic-gate 1819*0Sstevel@tonic-gate /* if all replicas are deleted stop mdmonitord */ 1820*0Sstevel@tonic-gate if ((replicacount - replica_delete_count) == 0) 1821*0Sstevel@tonic-gate stop_svmdaemons = 1; 1822*0Sstevel@tonic-gate 1823*0Sstevel@tonic-gate if (((replicacount - replica_delete_count) < MD_MINREPLICAS)) { 1824*0Sstevel@tonic-gate if (force_option & MDFORCE_NONE) 1825*0Sstevel@tonic-gate return (mderror(ep, MDE_NOTENOUGH_DB, sp->setname)); 1826*0Sstevel@tonic-gate if (! metaislocalset(sp) && ! (force_option & MDFORCE_DS)) 1827*0Sstevel@tonic-gate return (mderror(ep, MDE_DELDB_NOTALLOWED, sp->setname)); 1828*0Sstevel@tonic-gate } 1829*0Sstevel@tonic-gate 1830*0Sstevel@tonic-gate /* 1831*0Sstevel@tonic-gate * The following algorithms are followed to check for deletion: 1832*0Sstevel@tonic-gate * (a) If the delete list(db_nlp) has all invalid replicas and no valid 1833*0Sstevel@tonic-gate * replicas, then deletion should be allowed. 1834*0Sstevel@tonic-gate * (b) Deletion should be allowed only if valid replicas that are "not" 1835*0Sstevel@tonic-gate * to be deleted is always greater than the invalid replicas that 1836*0Sstevel@tonic-gate * are "not" to be deleted. 1837*0Sstevel@tonic-gate * (c) If the user uses -f option, then deletion should be allowed. 1838*0Sstevel@tonic-gate */ 1839*0Sstevel@tonic-gate 1840*0Sstevel@tonic-gate if ((invalid_replicas_todelete != replica_delete_count) && 1841*0Sstevel@tonic-gate (invalid_replicas_nottodelete > valid_replicas_nottodelete) && 1842*0Sstevel@tonic-gate (force_option != MDFORCE_LOCAL)) 1843*0Sstevel@tonic-gate return (mderror(ep, MDE_DEL_VALIDDB_NOTALLOWED, sp->setname)); 1844*0Sstevel@tonic-gate 1845*0Sstevel@tonic-gate /* 1846*0Sstevel@tonic-gate * go through and tell kernel to delete them 1847*0Sstevel@tonic-gate */ 1848*0Sstevel@tonic-gate 1849*0Sstevel@tonic-gate /* Don't need device id information from this ioctl */ 1850*0Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0; 1851*0Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 1852*0Sstevel@tonic-gate 1853*0Sstevel@tonic-gate if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) 1854*0Sstevel@tonic-gate return (mdstealerror(ep, &c.c_mde)); 1855*0Sstevel@tonic-gate 1856*0Sstevel@tonic-gate if (! metaislocalset(sp)) { 1857*0Sstevel@tonic-gate dd = metaget_drivedesc_fromnamelist(sp, db_nlp, ep); 1858*0Sstevel@tonic-gate if (! mdisok(ep)) 1859*0Sstevel@tonic-gate return (-1); 1860*0Sstevel@tonic-gate if ((sd = metaget_setdesc(sp, ep)) == NULL) 1861*0Sstevel@tonic-gate return (-1); 1862*0Sstevel@tonic-gate } 1863*0Sstevel@tonic-gate 1864*0Sstevel@tonic-gate for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) { 1865*0Sstevel@tonic-gate np = nlp->namep; 1866*0Sstevel@tonic-gate 1867*0Sstevel@tonic-gate /* 1868*0Sstevel@tonic-gate * If mddb is being deleted from MN diskset and node is 1869*0Sstevel@tonic-gate * an owner of the diskset then use rpc.mdcommd 1870*0Sstevel@tonic-gate * mechanism to add mddb(s) so that all nodes stay in sync. 1871*0Sstevel@tonic-gate * If set is stale, don't log the message since rpc.mdcommd 1872*0Sstevel@tonic-gate * can't write the message to the mddb. 1873*0Sstevel@tonic-gate * 1874*0Sstevel@tonic-gate * When mddbs are first being added to set, a detach can 1875*0Sstevel@tonic-gate * be called before any node has joined the diskset, so 1876*0Sstevel@tonic-gate * must check to see if node is an owner of the diskset. 1877*0Sstevel@tonic-gate * 1878*0Sstevel@tonic-gate * Otherwise, just delete mddb from this node. 1879*0Sstevel@tonic-gate */ 1880*0Sstevel@tonic-gate 1881*0Sstevel@tonic-gate if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) && 1882*0Sstevel@tonic-gate (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) { 1883*0Sstevel@tonic-gate md_mn_result_t *resultp; 1884*0Sstevel@tonic-gate md_mn_msg_meta_db_detach_t detach; 1885*0Sstevel@tonic-gate int send_rval; 1886*0Sstevel@tonic-gate 1887*0Sstevel@tonic-gate /* 1888*0Sstevel@tonic-gate * The following algorithm is used to detach replicas. 1889*0Sstevel@tonic-gate * - META_DB_DETACH message generates submsgs 1890*0Sstevel@tonic-gate * - BLOCK parse (master) 1891*0Sstevel@tonic-gate * - MDDB_DETACH replicas 1892*0Sstevel@tonic-gate * - UNBLOCK parse (master) causing parse 1893*0Sstevel@tonic-gate * information to be sent from master 1894*0Sstevel@tonic-gate * to slaves at a higher class than the 1895*0Sstevel@tonic-gate * unblock so the parse message will 1896*0Sstevel@tonic-gate * reach slaves before unblock message. 1897*0Sstevel@tonic-gate */ 1898*0Sstevel@tonic-gate (void) splitname(np->bname, &detach.msg_splitname); 1899*0Sstevel@tonic-gate 1900*0Sstevel@tonic-gate /* Set devid to NULL until devids are supported */ 1901*0Sstevel@tonic-gate detach.msg_devid[0] = NULL; 1902*0Sstevel@tonic-gate 1903*0Sstevel@tonic-gate /* 1904*0Sstevel@tonic-gate * If reconfig cycle has been started, this node is 1905*0Sstevel@tonic-gate * stuck in in the return step until this command has 1906*0Sstevel@tonic-gate * completed. If mdcommd is suspended, ask 1907*0Sstevel@tonic-gate * send_message to fail (instead of retrying) 1908*0Sstevel@tonic-gate * so that metaset can finish allowing the reconfig 1909*0Sstevel@tonic-gate * cycle to proceed. 1910*0Sstevel@tonic-gate */ 1911*0Sstevel@tonic-gate flags = MD_MSGF_FAIL_ON_SUSPEND; 1912*0Sstevel@tonic-gate if (stale_bool == TRUE) 1913*0Sstevel@tonic-gate flags |= MD_MSGF_NO_LOG; 1914*0Sstevel@tonic-gate send_rval = mdmn_send_message(sp->setno, 1915*0Sstevel@tonic-gate MD_MN_MSG_META_DB_DETACH, 1916*0Sstevel@tonic-gate flags, (char *)&detach, 1917*0Sstevel@tonic-gate sizeof (md_mn_msg_meta_db_detach_t), 1918*0Sstevel@tonic-gate &resultp, ep); 1919*0Sstevel@tonic-gate if (send_rval != 0) { 1920*0Sstevel@tonic-gate rval = -1; 1921*0Sstevel@tonic-gate if (resultp == NULL) 1922*0Sstevel@tonic-gate (void) mddserror(ep, 1923*0Sstevel@tonic-gate MDE_DS_COMMD_SEND_FAIL, 1924*0Sstevel@tonic-gate sp->setno, NULL, NULL, 1925*0Sstevel@tonic-gate sp->setname); 1926*0Sstevel@tonic-gate else { 1927*0Sstevel@tonic-gate (void) mdstealerror(ep, 1928*0Sstevel@tonic-gate &(resultp->mmr_ep)); 1929*0Sstevel@tonic-gate if (mdisok(ep)) { 1930*0Sstevel@tonic-gate (void) mddserror(ep, 1931*0Sstevel@tonic-gate MDE_DS_COMMD_SEND_FAIL, 1932*0Sstevel@tonic-gate sp->setno, NULL, NULL, 1933*0Sstevel@tonic-gate sp->setname); 1934*0Sstevel@tonic-gate } 1935*0Sstevel@tonic-gate free_result(resultp); 1936*0Sstevel@tonic-gate } 1937*0Sstevel@tonic-gate goto out; 1938*0Sstevel@tonic-gate } 1939*0Sstevel@tonic-gate if (resultp) 1940*0Sstevel@tonic-gate free_result(resultp); 1941*0Sstevel@tonic-gate } else { 1942*0Sstevel@tonic-gate i = 0; 1943*0Sstevel@tonic-gate while (i < c.c_dbcnt) { 1944*0Sstevel@tonic-gate char *devname; 1945*0Sstevel@tonic-gate 1946*0Sstevel@tonic-gate c.c_id = i; 1947*0Sstevel@tonic-gate 1948*0Sstevel@tonic-gate /* Don't need devid info from this ioctl */ 1949*0Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0; 1950*0Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 1951*0Sstevel@tonic-gate 1952*0Sstevel@tonic-gate if (metaioctl(MD_DB_GETDEV, &c, 1953*0Sstevel@tonic-gate &c.c_mde, NULL)) { 1954*0Sstevel@tonic-gate rval = mdstealerror(ep, &c.c_mde); 1955*0Sstevel@tonic-gate goto out; 1956*0Sstevel@tonic-gate } 1957*0Sstevel@tonic-gate 1958*0Sstevel@tonic-gate devname = splicename(&c.c_devname); 1959*0Sstevel@tonic-gate if (strcmp(devname, np->bname) != 0) { 1960*0Sstevel@tonic-gate Free(devname); 1961*0Sstevel@tonic-gate i++; 1962*0Sstevel@tonic-gate continue; 1963*0Sstevel@tonic-gate } 1964*0Sstevel@tonic-gate Free(devname); 1965*0Sstevel@tonic-gate 1966*0Sstevel@tonic-gate /* Don't need devid info from this ioctl */ 1967*0Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0; 1968*0Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 1969*0Sstevel@tonic-gate 1970*0Sstevel@tonic-gate if (metaioctl(MD_DB_DELDEV, &c, 1971*0Sstevel@tonic-gate &c.c_mde, NULL) != 0) { 1972*0Sstevel@tonic-gate rval = mdstealerror(ep, &c.c_mde); 1973*0Sstevel@tonic-gate goto out; 1974*0Sstevel@tonic-gate } 1975*0Sstevel@tonic-gate 1976*0Sstevel@tonic-gate /* Not incrementing "i" intentionally */ 1977*0Sstevel@tonic-gate } 1978*0Sstevel@tonic-gate } 1979*0Sstevel@tonic-gate if (! metaislocalset(sp)) { 1980*0Sstevel@tonic-gate /* update the dbcnt and size in dd */ 1981*0Sstevel@tonic-gate for (p = dd; p != NULL; p = p->dd_next) { 1982*0Sstevel@tonic-gate if (p->dd_dnp == np->drivenamep) { 1983*0Sstevel@tonic-gate p->dd_dbcnt = 0; 1984*0Sstevel@tonic-gate p->dd_dbsize = 0; 1985*0Sstevel@tonic-gate break; 1986*0Sstevel@tonic-gate } 1987*0Sstevel@tonic-gate } 1988*0Sstevel@tonic-gate 1989*0Sstevel@tonic-gate /* 1990*0Sstevel@tonic-gate * Slam a dummy master block and make it self 1991*0Sstevel@tonic-gate * identifying 1992*0Sstevel@tonic-gate */ 1993*0Sstevel@tonic-gate if ((fd = open(np->rname, O_RDWR)) >= 0) { 1994*0Sstevel@tonic-gate meta_mkdummymaster(sp, fd, 16); 1995*0Sstevel@tonic-gate (void) close(fd); 1996*0Sstevel@tonic-gate } 1997*0Sstevel@tonic-gate } 1998*0Sstevel@tonic-gate } 1999*0Sstevel@tonic-gate out: 2000*0Sstevel@tonic-gate if (metaislocalset(sp)) { 2001*0Sstevel@tonic-gate /* 2002*0Sstevel@tonic-gate * Stop all the daemons if there are 2003*0Sstevel@tonic-gate * no more replicas so that the module can be 2004*0Sstevel@tonic-gate * unloaded. 2005*0Sstevel@tonic-gate */ 2006*0Sstevel@tonic-gate if (rval == 0 && stop_svmdaemons == 1) { 2007*0Sstevel@tonic-gate char buf[MAXPATHLEN]; 2008*0Sstevel@tonic-gate int i; 2009*0Sstevel@tonic-gate 2010*0Sstevel@tonic-gate for (i = 0; i < DAEMON_COUNT; i++) { 2011*0Sstevel@tonic-gate (void) snprintf(buf, MAXPATHLEN, 2012*0Sstevel@tonic-gate "/usr/bin/pkill -%s -x %s", 2013*0Sstevel@tonic-gate svmd_kill_list[i].svmd_kill_val, 2014*0Sstevel@tonic-gate svmd_kill_list[i].svmd_name); 2015*0Sstevel@tonic-gate if (pclose(popen(buf, "w")) == -1) 2016*0Sstevel@tonic-gate md_perror(buf); 2017*0Sstevel@tonic-gate } 2018*0Sstevel@tonic-gate 2019*0Sstevel@tonic-gate if (meta_smf_disable(META_SMF_ALL, &status) == -1) { 2020*0Sstevel@tonic-gate mde_perror(&status, ""); 2021*0Sstevel@tonic-gate mdclrerror(&status); 2022*0Sstevel@tonic-gate } 2023*0Sstevel@tonic-gate } 2024*0Sstevel@tonic-gate if (buildconf(sp, &status)) { 2025*0Sstevel@tonic-gate /* Don't mask any previous errors */ 2026*0Sstevel@tonic-gate if (rval == 0) 2027*0Sstevel@tonic-gate rval = mdstealerror(ep, &status); 2028*0Sstevel@tonic-gate else 2029*0Sstevel@tonic-gate mdclrerror(&status); 2030*0Sstevel@tonic-gate return (rval); 2031*0Sstevel@tonic-gate } 2032*0Sstevel@tonic-gate 2033*0Sstevel@tonic-gate if (meta_db_patch(sysfilename, NULL, 0, &status)) { 2034*0Sstevel@tonic-gate /* Don't mask any previous errors */ 2035*0Sstevel@tonic-gate if (rval == 0) 2036*0Sstevel@tonic-gate rval = mdstealerror(ep, &status); 2037*0Sstevel@tonic-gate else 2038*0Sstevel@tonic-gate mdclrerror(&status); 2039*0Sstevel@tonic-gate } 2040*0Sstevel@tonic-gate } else { 2041*0Sstevel@tonic-gate if (update_dbinfo_on_drives(sp, dd, 2042*0Sstevel@tonic-gate (force_option & MDFORCE_SET_LOCKED), 2043*0Sstevel@tonic-gate ((force_option & MDFORCE_LOCAL) | 2044*0Sstevel@tonic-gate (force_option & MDFORCE_DS)), &status)) { 2045*0Sstevel@tonic-gate /* Don't mask any previous errors */ 2046*0Sstevel@tonic-gate if (rval == 0) 2047*0Sstevel@tonic-gate rval = mdstealerror(ep, &status); 2048*0Sstevel@tonic-gate else 2049*0Sstevel@tonic-gate mdclrerror(&status); 2050*0Sstevel@tonic-gate } 2051*0Sstevel@tonic-gate metafreedrivedesc(&dd); 2052*0Sstevel@tonic-gate } 2053*0Sstevel@tonic-gate if ((metaislocalset(sp)) || (!(MD_MNSET_DESC(sd)))) { 2054*0Sstevel@tonic-gate for (nlp = db_nlp; (nlp != NULL); nlp = nlp->next) { 2055*0Sstevel@tonic-gate meta_invalidate_name(nlp->namep); 2056*0Sstevel@tonic-gate } 2057*0Sstevel@tonic-gate } 2058*0Sstevel@tonic-gate return (rval); 2059*0Sstevel@tonic-gate } 2060*0Sstevel@tonic-gate 2061*0Sstevel@tonic-gate static md_replica_t * 2062*0Sstevel@tonic-gate metareplicaname( 2063*0Sstevel@tonic-gate mdsetname_t *sp, 2064*0Sstevel@tonic-gate int flags, 2065*0Sstevel@tonic-gate struct mddb_config *c, 2066*0Sstevel@tonic-gate md_error_t *ep 2067*0Sstevel@tonic-gate ) 2068*0Sstevel@tonic-gate { 2069*0Sstevel@tonic-gate md_replica_t *rp; 2070*0Sstevel@tonic-gate char *devname; 2071*0Sstevel@tonic-gate size_t sz; 2072*0Sstevel@tonic-gate 2073*0Sstevel@tonic-gate /* allocate replicaname */ 2074*0Sstevel@tonic-gate rp = Zalloc(sizeof (*rp)); 2075*0Sstevel@tonic-gate 2076*0Sstevel@tonic-gate /* get device name */ 2077*0Sstevel@tonic-gate devname = splicename(&c->c_devname); 2078*0Sstevel@tonic-gate if (flags & PRINT_FAST) { 2079*0Sstevel@tonic-gate if ((rp->r_namep = metaname_fast(&sp, devname, ep)) == NULL) { 2080*0Sstevel@tonic-gate Free(devname); 2081*0Sstevel@tonic-gate Free(rp); 2082*0Sstevel@tonic-gate return (NULL); 2083*0Sstevel@tonic-gate } 2084*0Sstevel@tonic-gate } else { 2085*0Sstevel@tonic-gate if ((rp->r_namep = metaname(&sp, devname, ep)) == NULL) { 2086*0Sstevel@tonic-gate Free(devname); 2087*0Sstevel@tonic-gate Free(rp); 2088*0Sstevel@tonic-gate return (NULL); 2089*0Sstevel@tonic-gate } 2090*0Sstevel@tonic-gate } 2091*0Sstevel@tonic-gate Free(devname); 2092*0Sstevel@tonic-gate 2093*0Sstevel@tonic-gate /* make sure it's OK */ 2094*0Sstevel@tonic-gate if ((! (flags & MD_BASICNAME_OK)) && 2095*0Sstevel@tonic-gate (metachkcomp(rp->r_namep, ep) != 0)) { 2096*0Sstevel@tonic-gate Free(rp); 2097*0Sstevel@tonic-gate return (NULL); 2098*0Sstevel@tonic-gate } 2099*0Sstevel@tonic-gate 2100*0Sstevel@tonic-gate rp->r_blkno = MD_DISKADDR_ERROR; 2101*0Sstevel@tonic-gate rp->r_nblk = MD_DISKADDR_ERROR; 2102*0Sstevel@tonic-gate rp->r_flags = c->c_locator.l_flags | MDDB_F_NODEVID; 2103*0Sstevel@tonic-gate if (c->c_locator.l_devid_flags & MDDB_DEVID_VALID) { 2104*0Sstevel@tonic-gate sz = devid_sizeof((ddi_devid_t)(c->c_locator.l_devid)); 2105*0Sstevel@tonic-gate if ((rp->r_devid = (ddi_devid_t)malloc(sz)) == 2106*0Sstevel@tonic-gate (ddi_devid_t)NULL) { 2107*0Sstevel@tonic-gate Free(rp); 2108*0Sstevel@tonic-gate return (NULL); 2109*0Sstevel@tonic-gate } 2110*0Sstevel@tonic-gate (void) memcpy((void *)rp->r_devid, 2111*0Sstevel@tonic-gate (void *)c->c_locator.l_devid, sz); 2112*0Sstevel@tonic-gate (void) strcpy(rp->r_minor_name, c->c_locator.l_minor_name); 2113*0Sstevel@tonic-gate rp->r_flags &= ~MDDB_F_NODEVID; 2114*0Sstevel@tonic-gate /* Overwrite dev derived from name with dev from devid */ 2115*0Sstevel@tonic-gate rp->r_namep->dev = meta_expldev(c->c_locator.l_dev); 2116*0Sstevel@tonic-gate } 2117*0Sstevel@tonic-gate (void) strcpy(rp->r_driver_name, c->c_locator.l_driver); 2118*0Sstevel@tonic-gate 2119*0Sstevel@tonic-gate rp->r_blkno = c->c_locator.l_blkno; 2120*0Sstevel@tonic-gate if (c->c_dbend != 0) 2121*0Sstevel@tonic-gate rp->r_nblk = c->c_dbend - c->c_locator.l_blkno + 1; 2122*0Sstevel@tonic-gate 2123*0Sstevel@tonic-gate /* return replica */ 2124*0Sstevel@tonic-gate return (rp); 2125*0Sstevel@tonic-gate } 2126*0Sstevel@tonic-gate 2127*0Sstevel@tonic-gate /* 2128*0Sstevel@tonic-gate * free replica list 2129*0Sstevel@tonic-gate */ 2130*0Sstevel@tonic-gate void 2131*0Sstevel@tonic-gate metafreereplicalist( 2132*0Sstevel@tonic-gate md_replicalist_t *rlp 2133*0Sstevel@tonic-gate ) 2134*0Sstevel@tonic-gate { 2135*0Sstevel@tonic-gate md_replicalist_t *rl = NULL; 2136*0Sstevel@tonic-gate 2137*0Sstevel@tonic-gate for (/* void */; (rlp != NULL); rlp = rl) { 2138*0Sstevel@tonic-gate rl = rlp->rl_next; 2139*0Sstevel@tonic-gate if (rlp->rl_repp->r_devid != (ddi_devid_t)0) { 2140*0Sstevel@tonic-gate free(rlp->rl_repp->r_devid); 2141*0Sstevel@tonic-gate } 2142*0Sstevel@tonic-gate Free(rlp->rl_repp); 2143*0Sstevel@tonic-gate Free(rlp); 2144*0Sstevel@tonic-gate } 2145*0Sstevel@tonic-gate } 2146*0Sstevel@tonic-gate 2147*0Sstevel@tonic-gate /* 2148*0Sstevel@tonic-gate * return list of all replicas in set 2149*0Sstevel@tonic-gate */ 2150*0Sstevel@tonic-gate int 2151*0Sstevel@tonic-gate metareplicalist( 2152*0Sstevel@tonic-gate mdsetname_t *sp, 2153*0Sstevel@tonic-gate int flags, 2154*0Sstevel@tonic-gate md_replicalist_t **rlpp, 2155*0Sstevel@tonic-gate md_error_t *ep 2156*0Sstevel@tonic-gate ) 2157*0Sstevel@tonic-gate { 2158*0Sstevel@tonic-gate md_replicalist_t **tail = rlpp; 2159*0Sstevel@tonic-gate int count = 0; 2160*0Sstevel@tonic-gate struct mddb_config c; 2161*0Sstevel@tonic-gate int i; 2162*0Sstevel@tonic-gate char *devid; 2163*0Sstevel@tonic-gate 2164*0Sstevel@tonic-gate /* for each replica */ 2165*0Sstevel@tonic-gate i = 0; 2166*0Sstevel@tonic-gate do { 2167*0Sstevel@tonic-gate md_replica_t *rp; 2168*0Sstevel@tonic-gate 2169*0Sstevel@tonic-gate /* get next replica */ 2170*0Sstevel@tonic-gate (void) memset(&c, 0, sizeof (c)); 2171*0Sstevel@tonic-gate c.c_id = i; 2172*0Sstevel@tonic-gate c.c_setno = sp->setno; 2173*0Sstevel@tonic-gate 2174*0Sstevel@tonic-gate c.c_locator.l_devid_flags = MDDB_DEVID_GETSZ; 2175*0Sstevel@tonic-gate if (metaioctl(MD_DB_ENDDEV, &c, &c.c_mde, NULL) != 0) { 2176*0Sstevel@tonic-gate if (mdismddberror(&c.c_mde, MDE_DB_INVALID)) { 2177*0Sstevel@tonic-gate mdclrerror(&c.c_mde); 2178*0Sstevel@tonic-gate break; /* handle none at all */ 2179*0Sstevel@tonic-gate } 2180*0Sstevel@tonic-gate (void) mdstealerror(ep, &c.c_mde); 2181*0Sstevel@tonic-gate goto out; 2182*0Sstevel@tonic-gate } 2183*0Sstevel@tonic-gate 2184*0Sstevel@tonic-gate if (c.c_locator.l_devid_flags & MDDB_DEVID_SZ) { 2185*0Sstevel@tonic-gate if ((devid = malloc(c.c_locator.l_devid_sz)) == NULL) { 2186*0Sstevel@tonic-gate (void) mdsyserror(ep, ENOMEM, META_DBCONF); 2187*0Sstevel@tonic-gate goto out; 2188*0Sstevel@tonic-gate } 2189*0Sstevel@tonic-gate c.c_locator.l_devid = (uintptr_t)devid; 2190*0Sstevel@tonic-gate /* 2191*0Sstevel@tonic-gate * Turn on space and sz flags since 'sz' amount of 2192*0Sstevel@tonic-gate * space has been alloc'd. 2193*0Sstevel@tonic-gate */ 2194*0Sstevel@tonic-gate c.c_locator.l_devid_flags = 2195*0Sstevel@tonic-gate MDDB_DEVID_SPACE | MDDB_DEVID_SZ; 2196*0Sstevel@tonic-gate } 2197*0Sstevel@tonic-gate 2198*0Sstevel@tonic-gate if (metaioctl(MD_DB_ENDDEV, &c, &c.c_mde, NULL) != 0) { 2199*0Sstevel@tonic-gate if (mdismddberror(&c.c_mde, MDE_DB_INVALID)) { 2200*0Sstevel@tonic-gate mdclrerror(&c.c_mde); 2201*0Sstevel@tonic-gate break; /* handle none at all */ 2202*0Sstevel@tonic-gate } 2203*0Sstevel@tonic-gate (void) mdstealerror(ep, &c.c_mde); 2204*0Sstevel@tonic-gate goto out; 2205*0Sstevel@tonic-gate } 2206*0Sstevel@tonic-gate 2207*0Sstevel@tonic-gate /* 2208*0Sstevel@tonic-gate * Paranoid check - shouldn't happen, but is left as 2209*0Sstevel@tonic-gate * a place holder for changes that will be needed after 2210*0Sstevel@tonic-gate * dynamic reconfiguration changes are added to SVM (to 2211*0Sstevel@tonic-gate * support movement of disks at any point in time). 2212*0Sstevel@tonic-gate */ 2213*0Sstevel@tonic-gate if (c.c_locator.l_devid_flags & MDDB_DEVID_NOSPACE) { 2214*0Sstevel@tonic-gate (void) fprintf(stderr, 2215*0Sstevel@tonic-gate dgettext(TEXT_DOMAIN, 2216*0Sstevel@tonic-gate "Error: Relocation Information " 2217*0Sstevel@tonic-gate "(drvnm=%s, mnum=0x%lx) \n" 2218*0Sstevel@tonic-gate "relocation information size changed - \n" 2219*0Sstevel@tonic-gate "rerun command\n"), 2220*0Sstevel@tonic-gate c.c_locator.l_driver, c.c_locator.l_mnum); 2221*0Sstevel@tonic-gate (void) mderror(ep, MDE_DEVID_TOOBIG, NULL); 2222*0Sstevel@tonic-gate goto out; 2223*0Sstevel@tonic-gate } 2224*0Sstevel@tonic-gate 2225*0Sstevel@tonic-gate if (c.c_dbcnt == 0) 2226*0Sstevel@tonic-gate break; /* handle none at all */ 2227*0Sstevel@tonic-gate 2228*0Sstevel@tonic-gate /* get info */ 2229*0Sstevel@tonic-gate if ((rp = metareplicaname(sp, flags, &c, ep)) == NULL) 2230*0Sstevel@tonic-gate goto out; 2231*0Sstevel@tonic-gate 2232*0Sstevel@tonic-gate /* append to list */ 2233*0Sstevel@tonic-gate *tail = Zalloc(sizeof (**tail)); 2234*0Sstevel@tonic-gate (*tail)->rl_repp = rp; 2235*0Sstevel@tonic-gate tail = &(*tail)->rl_next; 2236*0Sstevel@tonic-gate ++count; 2237*0Sstevel@tonic-gate 2238*0Sstevel@tonic-gate if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) { 2239*0Sstevel@tonic-gate free(devid); 2240*0Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 2241*0Sstevel@tonic-gate } 2242*0Sstevel@tonic-gate 2243*0Sstevel@tonic-gate } while (++i < c.c_dbcnt); 2244*0Sstevel@tonic-gate 2245*0Sstevel@tonic-gate if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) { 2246*0Sstevel@tonic-gate free(devid); 2247*0Sstevel@tonic-gate } 2248*0Sstevel@tonic-gate 2249*0Sstevel@tonic-gate /* return count */ 2250*0Sstevel@tonic-gate return (count); 2251*0Sstevel@tonic-gate 2252*0Sstevel@tonic-gate /* cleanup, return error */ 2253*0Sstevel@tonic-gate out: 2254*0Sstevel@tonic-gate if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) { 2255*0Sstevel@tonic-gate free(devid); 2256*0Sstevel@tonic-gate } 2257*0Sstevel@tonic-gate metafreereplicalist(*rlpp); 2258*0Sstevel@tonic-gate *rlpp = NULL; 2259*0Sstevel@tonic-gate return (-1); 2260*0Sstevel@tonic-gate } 2261*0Sstevel@tonic-gate 2262*0Sstevel@tonic-gate /* 2263*0Sstevel@tonic-gate * meta_sync_db_locations - get list of replicas from kernel and write 2264*0Sstevel@tonic-gate * out to mddb.cf and md.conf. 'Syncs up' the replica list in 2265*0Sstevel@tonic-gate * the kernel with the replica list in the conf files. 2266*0Sstevel@tonic-gate * 2267*0Sstevel@tonic-gate */ 2268*0Sstevel@tonic-gate void 2269*0Sstevel@tonic-gate meta_sync_db_locations( 2270*0Sstevel@tonic-gate mdsetname_t *sp, 2271*0Sstevel@tonic-gate md_error_t *ep 2272*0Sstevel@tonic-gate ) 2273*0Sstevel@tonic-gate { 2274*0Sstevel@tonic-gate char *sname = 0; /* system file name */ 2275*0Sstevel@tonic-gate char *cname = 0; /* config file name */ 2276*0Sstevel@tonic-gate 2277*0Sstevel@tonic-gate if (!metaislocalset(sp)) 2278*0Sstevel@tonic-gate return; 2279*0Sstevel@tonic-gate 2280*0Sstevel@tonic-gate /* Updates backup of configuration file (aka mddb.cf) */ 2281*0Sstevel@tonic-gate if (buildconf(sp, ep) != 0) 2282*0Sstevel@tonic-gate return; 2283*0Sstevel@tonic-gate 2284*0Sstevel@tonic-gate /* Updates system configuration file (aka md.conf) */ 2285*0Sstevel@tonic-gate (void) meta_db_patch(sname, cname, 0, ep); 2286*0Sstevel@tonic-gate } 2287*0Sstevel@tonic-gate 2288*0Sstevel@tonic-gate /* 2289*0Sstevel@tonic-gate * setup_db_locations - parse the mddb.cf file and 2290*0Sstevel@tonic-gate * tells the driver which db locations to use. 2291*0Sstevel@tonic-gate */ 2292*0Sstevel@tonic-gate int 2293*0Sstevel@tonic-gate meta_setup_db_locations( 2294*0Sstevel@tonic-gate md_error_t *ep 2295*0Sstevel@tonic-gate ) 2296*0Sstevel@tonic-gate { 2297*0Sstevel@tonic-gate mddb_config_t c; 2298*0Sstevel@tonic-gate FILE *fp; 2299*0Sstevel@tonic-gate char inbuff[1024]; 2300*0Sstevel@tonic-gate char *buff; 2301*0Sstevel@tonic-gate uint_t i; 2302*0Sstevel@tonic-gate size_t sz; 2303*0Sstevel@tonic-gate int rval = 0; 2304*0Sstevel@tonic-gate char *devidp; 2305*0Sstevel@tonic-gate uint_t devid_size; 2306*0Sstevel@tonic-gate char *minor_name = NULL; 2307*0Sstevel@tonic-gate ddi_devid_t devid_decode; 2308*0Sstevel@tonic-gate int checksum; 2309*0Sstevel@tonic-gate 2310*0Sstevel@tonic-gate /* do mddb.cf file */ 2311*0Sstevel@tonic-gate (void) memset(&c, '\0', sizeof (c)); 2312*0Sstevel@tonic-gate if ((fp = fopen(META_DBCONF, "r")) == NULL) { 2313*0Sstevel@tonic-gate if (errno != ENOENT) 2314*0Sstevel@tonic-gate return (mdsyserror(ep, errno, META_DBCONF)); 2315*0Sstevel@tonic-gate } 2316*0Sstevel@tonic-gate while ((fp != NULL) && ((buff = fgets(inbuff, (sizeof (inbuff) - 1), 2317*0Sstevel@tonic-gate fp)) != NULL)) { 2318*0Sstevel@tonic-gate 2319*0Sstevel@tonic-gate /* ignore comments */ 2320*0Sstevel@tonic-gate if (*buff == '#') 2321*0Sstevel@tonic-gate continue; 2322*0Sstevel@tonic-gate 2323*0Sstevel@tonic-gate /* parse locator */ 2324*0Sstevel@tonic-gate (void) memset(&c, 0, sizeof (c)); 2325*0Sstevel@tonic-gate c.c_setno = MD_LOCAL_SET; 2326*0Sstevel@tonic-gate i = strcspn(buff, " \t"); 2327*0Sstevel@tonic-gate if (i > sizeof (c.c_locator.l_driver)) 2328*0Sstevel@tonic-gate i = sizeof (c.c_locator.l_driver); 2329*0Sstevel@tonic-gate (void) strncpy(c.c_locator.l_driver, buff, i); 2330*0Sstevel@tonic-gate buff += i; 2331*0Sstevel@tonic-gate c.c_locator.l_dev = 2332*0Sstevel@tonic-gate makedev((major_t)0, (minor_t)strtol(buff, &buff, 10)); 2333*0Sstevel@tonic-gate c.c_locator.l_blkno = (daddr_t)strtol(buff, &buff, 10); 2334*0Sstevel@tonic-gate c.c_locator.l_mnum = minor(c.c_locator.l_dev); 2335*0Sstevel@tonic-gate 2336*0Sstevel@tonic-gate /* parse out devid */ 2337*0Sstevel@tonic-gate while (isspace((int)(*buff))) 2338*0Sstevel@tonic-gate buff += 1; 2339*0Sstevel@tonic-gate i = strcspn(buff, " \t"); 2340*0Sstevel@tonic-gate if ((devidp = (char *)malloc(i+1)) == NULL) 2341*0Sstevel@tonic-gate return (mdsyserror(ep, ENOMEM, META_DBCONF)); 2342*0Sstevel@tonic-gate 2343*0Sstevel@tonic-gate (void) strncpy(devidp, buff, i); 2344*0Sstevel@tonic-gate devidp[i] = '\0'; 2345*0Sstevel@tonic-gate if (devid_str_decode(devidp, &devid_decode, 2346*0Sstevel@tonic-gate &minor_name) == -1) { 2347*0Sstevel@tonic-gate free(devidp); 2348*0Sstevel@tonic-gate continue; 2349*0Sstevel@tonic-gate } 2350*0Sstevel@tonic-gate 2351*0Sstevel@tonic-gate /* Conf file must have minor name associated with devid */ 2352*0Sstevel@tonic-gate if (minor_name == NULL) { 2353*0Sstevel@tonic-gate free(devidp); 2354*0Sstevel@tonic-gate devid_free(devid_decode); 2355*0Sstevel@tonic-gate continue; 2356*0Sstevel@tonic-gate } 2357*0Sstevel@tonic-gate 2358*0Sstevel@tonic-gate sz = devid_sizeof(devid_decode); 2359*0Sstevel@tonic-gate /* Copy to devid size buffer that ioctl expects */ 2360*0Sstevel@tonic-gate if ((c.c_locator.l_devid = (uintptr_t)malloc(sz)) == NULL) { 2361*0Sstevel@tonic-gate devid_free(devid_decode); 2362*0Sstevel@tonic-gate free(minor_name); 2363*0Sstevel@tonic-gate free(devidp); 2364*0Sstevel@tonic-gate return (mdsyserror(ep, ENOMEM, META_DBCONF)); 2365*0Sstevel@tonic-gate } 2366*0Sstevel@tonic-gate 2367*0Sstevel@tonic-gate (void) memcpy((void *)c.c_locator.l_devid, 2368*0Sstevel@tonic-gate (void *)devid_decode, sz); 2369*0Sstevel@tonic-gate 2370*0Sstevel@tonic-gate devid_free(devid_decode); 2371*0Sstevel@tonic-gate 2372*0Sstevel@tonic-gate if (strlen(minor_name) > MDDB_MINOR_NAME_MAX) { 2373*0Sstevel@tonic-gate free(minor_name); 2374*0Sstevel@tonic-gate free(devidp); 2375*0Sstevel@tonic-gate free((void *)c.c_locator.l_devid); 2376*0Sstevel@tonic-gate return (mdsyserror(ep, ENOMEM, META_DBCONF)); 2377*0Sstevel@tonic-gate } 2378*0Sstevel@tonic-gate (void) strcpy(c.c_locator.l_minor_name, minor_name); 2379*0Sstevel@tonic-gate free(minor_name); 2380*0Sstevel@tonic-gate c.c_locator.l_devid_flags = MDDB_DEVID_VALID | 2381*0Sstevel@tonic-gate MDDB_DEVID_SPACE | MDDB_DEVID_SZ; 2382*0Sstevel@tonic-gate c.c_locator.l_devid_sz = sz; 2383*0Sstevel@tonic-gate 2384*0Sstevel@tonic-gate devid_size = strlen(devidp); 2385*0Sstevel@tonic-gate buff += devid_size; 2386*0Sstevel@tonic-gate 2387*0Sstevel@tonic-gate checksum = strtol(buff, &buff, 10); 2388*0Sstevel@tonic-gate for (i = 0; c.c_locator.l_driver[i] != 0; i++) 2389*0Sstevel@tonic-gate checksum += c.c_locator.l_driver[i]; 2390*0Sstevel@tonic-gate for (i = 0; i < devid_size; i++) { 2391*0Sstevel@tonic-gate checksum += devidp[i]; 2392*0Sstevel@tonic-gate } 2393*0Sstevel@tonic-gate free(devidp); 2394*0Sstevel@tonic-gate 2395*0Sstevel@tonic-gate checksum += minor(c.c_locator.l_dev); 2396*0Sstevel@tonic-gate checksum += c.c_locator.l_blkno; 2397*0Sstevel@tonic-gate if (checksum != 42) { 2398*0Sstevel@tonic-gate /* overwritten later for more serious problems */ 2399*0Sstevel@tonic-gate rval = mderror(ep, MDE_MDDB_CKSUM, META_DBCONF); 2400*0Sstevel@tonic-gate free((void *)c.c_locator.l_devid); 2401*0Sstevel@tonic-gate continue; 2402*0Sstevel@tonic-gate } 2403*0Sstevel@tonic-gate c.c_locator.l_flags = 0; 2404*0Sstevel@tonic-gate 2405*0Sstevel@tonic-gate /* use db location */ 2406*0Sstevel@tonic-gate if (metaioctl(MD_DB_USEDEV, &c, &c.c_mde, NULL) != 0) { 2407*0Sstevel@tonic-gate free((void *)c.c_locator.l_devid); 2408*0Sstevel@tonic-gate return (mdstealerror(ep, &c.c_mde)); 2409*0Sstevel@tonic-gate } 2410*0Sstevel@tonic-gate 2411*0Sstevel@tonic-gate /* free up devid if in use */ 2412*0Sstevel@tonic-gate free((void *)c.c_locator.l_devid); 2413*0Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0; 2414*0Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 2415*0Sstevel@tonic-gate } 2416*0Sstevel@tonic-gate if ((fp) && (fclose(fp) != 0)) 2417*0Sstevel@tonic-gate return (mdsyserror(ep, errno, META_DBCONF)); 2418*0Sstevel@tonic-gate 2419*0Sstevel@tonic-gate /* check for stale database */ 2420*0Sstevel@tonic-gate (void) memset((char *)&c, 0, sizeof (struct mddb_config)); 2421*0Sstevel@tonic-gate c.c_id = 0; 2422*0Sstevel@tonic-gate c.c_setno = MD_LOCAL_SET; 2423*0Sstevel@tonic-gate 2424*0Sstevel@tonic-gate /* Don't need device id information from this ioctl */ 2425*0Sstevel@tonic-gate c.c_locator.l_devid = (uint64_t)0; 2426*0Sstevel@tonic-gate c.c_locator.l_devid_flags = 0; 2427*0Sstevel@tonic-gate 2428*0Sstevel@tonic-gate if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) { 2429*0Sstevel@tonic-gate if (! mdismddberror(&c.c_mde, MDE_DB_INVALID)) 2430*0Sstevel@tonic-gate return (mdstealerror(ep, &c.c_mde)); 2431*0Sstevel@tonic-gate mdclrerror(&c.c_mde); 2432*0Sstevel@tonic-gate } 2433*0Sstevel@tonic-gate 2434*0Sstevel@tonic-gate if (c.c_flags & MDDB_C_STALE) 2435*0Sstevel@tonic-gate return (mdmddberror(ep, MDE_DB_STALE, NODEV32, MD_LOCAL_SET, 2436*0Sstevel@tonic-gate 0, NULL)); 2437*0Sstevel@tonic-gate 2438*0Sstevel@tonic-gate /* success */ 2439*0Sstevel@tonic-gate return (rval); 2440*0Sstevel@tonic-gate } 2441*0Sstevel@tonic-gate 2442*0Sstevel@tonic-gate /* 2443*0Sstevel@tonic-gate * meta_db_minreplica - returns the minimum size replica currently in use. 2444*0Sstevel@tonic-gate */ 2445*0Sstevel@tonic-gate daddr_t 2446*0Sstevel@tonic-gate meta_db_minreplica( 2447*0Sstevel@tonic-gate mdsetname_t *sp, 2448*0Sstevel@tonic-gate md_error_t *ep 2449*0Sstevel@tonic-gate ) 2450*0Sstevel@tonic-gate { 2451*0Sstevel@tonic-gate md_replica_t *r; 2452*0Sstevel@tonic-gate md_replicalist_t *rl, *rlp = NULL; 2453*0Sstevel@tonic-gate daddr_t nblks = 0; 2454*0Sstevel@tonic-gate 2455*0Sstevel@tonic-gate if (metareplicalist(sp, (MD_BASICNAME_OK | PRINT_FAST), &rlp, ep) < 0) 2456*0Sstevel@tonic-gate return (-1); 2457*0Sstevel@tonic-gate 2458*0Sstevel@tonic-gate if (rlp == NULL) 2459*0Sstevel@tonic-gate return (-1); 2460*0Sstevel@tonic-gate 2461*0Sstevel@tonic-gate /* find the smallest existing replica */ 2462*0Sstevel@tonic-gate for (rl = rlp; rl != NULL; rl = rl->rl_next) { 2463*0Sstevel@tonic-gate r = rl->rl_repp; 2464*0Sstevel@tonic-gate nblks = ((nblks == 0) ? r->r_nblk : min(r->r_nblk, nblks)); 2465*0Sstevel@tonic-gate } 2466*0Sstevel@tonic-gate 2467*0Sstevel@tonic-gate metafreereplicalist(rlp); 2468*0Sstevel@tonic-gate return (nblks); 2469*0Sstevel@tonic-gate } 2470*0Sstevel@tonic-gate 2471*0Sstevel@tonic-gate /* 2472*0Sstevel@tonic-gate * meta_get_replica_names 2473*0Sstevel@tonic-gate * returns an mdnamelist_t of replica slices 2474*0Sstevel@tonic-gate */ 2475*0Sstevel@tonic-gate /*ARGSUSED*/ 2476*0Sstevel@tonic-gate int 2477*0Sstevel@tonic-gate meta_get_replica_names( 2478*0Sstevel@tonic-gate mdsetname_t *sp, 2479*0Sstevel@tonic-gate mdnamelist_t **nlpp, 2480*0Sstevel@tonic-gate int options, 2481*0Sstevel@tonic-gate md_error_t *ep 2482*0Sstevel@tonic-gate ) 2483*0Sstevel@tonic-gate { 2484*0Sstevel@tonic-gate md_replicalist_t *rlp = NULL; 2485*0Sstevel@tonic-gate md_replicalist_t *rl; 2486*0Sstevel@tonic-gate mdnamelist_t **tailpp = nlpp; 2487*0Sstevel@tonic-gate int cnt = 0; 2488*0Sstevel@tonic-gate 2489*0Sstevel@tonic-gate assert(nlpp != NULL); 2490*0Sstevel@tonic-gate 2491*0Sstevel@tonic-gate if (!metaislocalset(sp)) 2492*0Sstevel@tonic-gate goto out; 2493*0Sstevel@tonic-gate 2494*0Sstevel@tonic-gate /* get replicas */ 2495*0Sstevel@tonic-gate if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0) { 2496*0Sstevel@tonic-gate cnt = -1; 2497*0Sstevel@tonic-gate goto out; 2498*0Sstevel@tonic-gate } 2499*0Sstevel@tonic-gate 2500*0Sstevel@tonic-gate /* build name list */ 2501*0Sstevel@tonic-gate for (rl = rlp; (rl != NULL); rl = rl->rl_next) { 2502*0Sstevel@tonic-gate /* 2503*0Sstevel@tonic-gate * Add the name struct to the end of the 2504*0Sstevel@tonic-gate * namelist but keep a pointer to the last 2505*0Sstevel@tonic-gate * element so that we don't incur the overhead 2506*0Sstevel@tonic-gate * of traversing the list each time 2507*0Sstevel@tonic-gate */ 2508*0Sstevel@tonic-gate tailpp = meta_namelist_append_wrapper( 2509*0Sstevel@tonic-gate tailpp, rl->rl_repp->r_namep); 2510*0Sstevel@tonic-gate ++cnt; 2511*0Sstevel@tonic-gate } 2512*0Sstevel@tonic-gate 2513*0Sstevel@tonic-gate /* cleanup, return count or error */ 2514*0Sstevel@tonic-gate out: 2515*0Sstevel@tonic-gate metafreereplicalist(rlp); 2516*0Sstevel@tonic-gate return (cnt); 2517*0Sstevel@tonic-gate } 2518