1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <stdlib.h> 29 #include <unistd.h> 30 #include <wait.h> 31 #include <sys/time.h> 32 #include <syslog.h> 33 34 #include <meta.h> 35 #include <sys/lvm/mdio.h> 36 #include <sys/lvm/md_mddb.h> 37 #include <sys/lvm/md_mirror.h> 38 39 #define MAX_N_ARGS 64 40 #define MAX_ARG_LEN 1024 41 42 /* we reserve 1024 bytes for stdout and the same for stderr */ 43 #define MAX_OUT 1024 44 #define MAX_ERR 1024 45 #define JUNK 128 /* used to flush stdout and stderr */ 46 47 48 /*ARGSUSED*/ 49 void 50 mdmn_do_cmd(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 51 { 52 53 /* 54 * We are given one string containing all the arguments 55 * For execvp() we have to regenerate the arguments again 56 */ 57 int arg; /* argument that is currently been built */ 58 int index; /* runs through arg above */ 59 int i; /* helper for for loop */ 60 char *argv[MAX_N_ARGS]; /* argument array for execvp */ 61 char *cp; /* runs through the given command line string */ 62 char *command = NULL; /* the command we call locally */ 63 int pout[2]; /* pipe for stdout */ 64 int perr[2]; /* pipe for stderr */ 65 pid_t pid; /* process id */ 66 67 cp = msg->msg_event_data; 68 arg = 0; 69 index = 0; 70 71 /* init the args array alloc the first one and null out the rest */ 72 argv[0] = Malloc(MAX_ARG_LEN); 73 for (i = 1; i < MAX_N_ARGS; i++) { 74 argv[i] = NULL; 75 } 76 77 resp->mmr_comm_state = MDMNE_ACK; /* Ok state */; 78 79 while (*cp != '\0') { 80 if (arg == MAX_N_ARGS) { 81 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 82 "PANIC: too many arguments specified\n")); 83 resp->mmr_comm_state = MDMNE_HANDLER_FAILED; 84 goto out; 85 } 86 if (index == MAX_ARG_LEN) { 87 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 88 "PANIC: argument too long\n")); 89 resp->mmr_comm_state = MDMNE_HANDLER_FAILED; 90 goto out; 91 } 92 93 if ((*cp != ' ') && (*cp != '\t')) { 94 /* 95 * No space or tab: copy char into current 96 * argv and advance both pointers 97 */ 98 99 argv[arg][index] = *cp; 100 cp++; /* next char in command line */ 101 index++; /* next char in argument */ 102 } else { 103 /* 104 * space or tab: terminate current argv, 105 * advance arg, reset pointer into arg, 106 * advance pointer in command line 107 */ 108 argv[arg][index] = '\0'; 109 arg++; /* next argument */ 110 argv[arg] = Malloc(MAX_ARG_LEN); 111 cp++; /* next char in command line */ 112 index = 0; /* starts at char 0 */ 113 } 114 } 115 /* terminate the last real argument */ 116 argv[arg][index] = '\0'; 117 /* the last argument is an NULL pointer */ 118 argv[++arg] = NULL; 119 if (pipe(pout) < 0) { 120 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 121 "PANIC: pipe failed\n")); 122 resp->mmr_comm_state = MDMNE_HANDLER_FAILED; 123 goto out; 124 } 125 if (pipe(perr) < 0) { 126 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 127 "PANIC: pipe failed\n")); 128 (void) close(pout[0]); 129 (void) close(pout[1]); 130 resp->mmr_comm_state = MDMNE_HANDLER_FAILED; 131 goto out; 132 } 133 command = Strdup(argv[0]); 134 (void) strcat(argv[0], ".rpc_call"); 135 pid = fork1(); 136 if (pid == (pid_t)-1) { 137 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 138 "PANIC: fork failed\n")); 139 resp->mmr_comm_state = MDMNE_HANDLER_FAILED; 140 (void) close(pout[0]); 141 (void) close(pout[1]); 142 (void) close(perr[0]); 143 (void) close(perr[1]); 144 goto out; 145 } else if (pid == (pid_t)0) { 146 /* child */ 147 (void) close(0); 148 /* close the reading channels of pout and perr */ 149 (void) close(pout[0]); 150 (void) close(perr[0]); 151 /* redirect stdout */ 152 if (dup2(pout[1], 1) < 0) { 153 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 154 "PANIC: dup2 failed\n")); 155 resp->mmr_comm_state = MDMNE_HANDLER_FAILED; 156 return; 157 } 158 159 /* redirect stderr */ 160 if (dup2(perr[1], 2) < 0) { 161 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 162 "PANIC: dup2 failed\n")); 163 resp->mmr_comm_state = MDMNE_HANDLER_FAILED; 164 return; 165 } 166 167 (void) execvp(command, (char *const *)argv); 168 perror("execvp"); 169 _exit(1); 170 } else { 171 /* parent process */ 172 int stat_loc; 173 char *out, *err; /* for stdout and stderr of child */ 174 int i; /* index into the aboves */ 175 char junk[JUNK]; 176 int out_done = 0; 177 int err_done = 0; 178 int out_read = 0; 179 int err_read = 0; 180 int maxfd; 181 fd_set rset; 182 183 184 /* close the writing channels of pout and perr */ 185 (void) close(pout[1]); 186 (void) close(perr[1]); 187 resp->mmr_out = Malloc(MAX_OUT); 188 resp->mmr_err = Malloc(MAX_ERR); 189 resp->mmr_out_size = MAX_OUT; 190 resp->mmr_err_size = MAX_ERR; 191 out = resp->mmr_out; 192 err = resp->mmr_err; 193 FD_ZERO(&rset); 194 while ((out_done == 0) || (err_done == 0)) { 195 FD_SET(pout[0], &rset); 196 FD_SET(perr[0], &rset); 197 maxfd = max(pout[0], perr[0]) + 1; 198 (void) select(maxfd, &rset, NULL, NULL, NULL); 199 200 /* 201 * Did the child produce some output to stdout? 202 * If so, read it until we either reach the end of the 203 * output or until we read MAX_OUT bytes. 204 * Whatever comes first. 205 * In case we already read MAX_OUT bytes we simply 206 * read away the output into a junk buffer. 207 * Just to make the child happy 208 */ 209 if (FD_ISSET(pout[0], &rset)) { 210 if (MAX_OUT - out_read - 1 > 0) { 211 i = read(pout[0], out, 212 MAX_OUT - out_read); 213 out_read += i; 214 out += i; 215 } else { 216 /* buffer full, empty stdout */ 217 i = read(pout[0], junk, JUNK); 218 } 219 if (i == 0) { 220 /* stdout is closed by child */ 221 out_done++; 222 } 223 } 224 /* same comment as above | sed -e 's/stdout/stderr/' */ 225 if (FD_ISSET(perr[0], &rset)) { 226 if (MAX_ERR - err_read - 1 > 0) { 227 i = read(perr[0], err, 228 MAX_ERR - err_read); 229 err_read += i; 230 err += i; 231 } else { 232 /* buffer full, empty stderr */ 233 i = read(perr[0], junk, JUNK); 234 } 235 if (i == 0) { 236 /* stderr is closed by child */ 237 err_done++; 238 } 239 } 240 } 241 resp->mmr_out[out_read] = '\0'; 242 resp->mmr_err[err_read] = '\0'; 243 244 while (waitpid(pid, &stat_loc, 0) < 0) { 245 if (errno != EINTR) { 246 resp->mmr_comm_state = MDMNE_HANDLER_FAILED; 247 break; 248 } 249 } 250 if (errno == 0) 251 resp->mmr_exitval = WEXITSTATUS(stat_loc); 252 253 (void) close(pout[0]); 254 (void) close(perr[0]); 255 } 256 out: 257 for (i = 0; i < MAX_N_ARGS; i++) { 258 if (argv[i] != NULL) { 259 free(argv[i]); 260 } 261 } 262 if (command != NULL) { 263 Free(command); 264 } 265 } 266 267 /* 268 * This is for checking if a metadevice is opened, and for 269 * locking in case it is not and for 270 * unlocking a locked device 271 */ 272 /*ARGSUSED*/ 273 void 274 mdmn_do_clu(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 275 { 276 if (msg->msg_type == MD_MN_MSG_CLU_CHECK) { 277 md_isopen_t *d; 278 int ret; 279 280 resp->mmr_comm_state = MDMNE_ACK; /* Ok state */; 281 resp->mmr_out_size = 0; 282 resp->mmr_err_size = 0; 283 resp->mmr_out = NULL; 284 resp->mmr_err = NULL; 285 d = (md_isopen_t *)(void *)msg->msg_event_data; 286 ret = metaioctl(MD_IOCISOPEN, d, &(d->mde), NULL); 287 /* 288 * In case the ioctl succeeded, return the open state of 289 * the metadevice. Otherwise we return the error the ioctl 290 * produced. As this is not zero, no attempt is made to 291 * remove/rename the metadevice later 292 */ 293 294 if (ret == 0) { 295 resp->mmr_exitval = d->isopen; 296 } else { 297 /* 298 * When doing a metaclear, one node after the other 299 * does the two steps: 300 * - check on all nodes if this md is opened. 301 * - remove the md locally. 302 * When the 2nd node asks all nodes if the md is 303 * open it starts with the first node. 304 * As this already removed the md, the check 305 * returns MDE_UNIT_NOT_SETUP. 306 * In order to not keep the 2nd node from proceeding, 307 * we map this to an Ok. 308 */ 309 if (mdismderror(&(d->mde), MDE_UNIT_NOT_SETUP)) { 310 mdclrerror(&(d->mde)); 311 ret = 0; 312 } 313 314 resp->mmr_exitval = ret; 315 } 316 } 317 } 318 319 /* handler for MD_MN_MSG_REQUIRE_OWNER */ 320 /*ARGSUSED*/ 321 void 322 mdmn_do_req_owner(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 323 { 324 md_set_mmown_params_t setown; 325 md_mn_req_owner_t *d; 326 int ret, n = 0; 327 328 resp->mmr_out_size = 0; 329 resp->mmr_err_size = 0; 330 resp->mmr_out = NULL; 331 resp->mmr_err = NULL; 332 resp->mmr_comm_state = MDMNE_ACK; 333 d = (md_mn_req_owner_t *)(void *)msg->msg_event_data; 334 335 (void) memset(&setown, 0, sizeof (setown)); 336 MD_SETDRIVERNAME(&setown, MD_MIRROR, MD_MIN2SET(d->mnum)) 337 setown.d.mnum = d->mnum; 338 setown.d.owner = d->owner; 339 340 /* Retry ownership change if we get EAGAIN returned */ 341 while ((ret = metaioctl(MD_MN_SET_MM_OWNER, &setown, &setown.mde, NULL)) 342 != 0) { 343 md_sys_error_t *ip = 344 &setown.mde.info.md_error_info_t_u.sys_error; 345 if (ip->errnum != EAGAIN) { 346 break; 347 } 348 if (n++ >= 10) { 349 break; 350 } 351 (void) sleep(1); 352 } 353 354 resp->mmr_exitval = ret; 355 } 356 357 /* 358 * handler for MD_MN_MSG_CHOOSE_OWNER 359 * This is called when a mirror resync has no owner. The master node generates 360 * this message which is not broadcast to the other nodes. The message is 361 * required as the kernel does not have access to the nodelist for the set. 362 */ 363 /*ARGSUSED*/ 364 void 365 mdmn_do_choose_owner(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 366 { 367 md_mn_msg_chowner_t chownermsg; 368 md_mn_msg_chooseid_t *d; 369 int ret = 0; 370 int nodecnt; 371 int nodeno; 372 uint_t nodeid; 373 uint_t myflags; 374 set_t setno; 375 mdsetname_t *sp; 376 md_set_desc *sd; 377 md_mnnode_desc *nd; 378 md_error_t mde = mdnullerror; 379 md_mn_result_t *resp1 = NULL; 380 381 resp->mmr_out_size = 0; 382 resp->mmr_err_size = 0; 383 resp->mmr_out = NULL; 384 resp->mmr_err = NULL; 385 resp->mmr_comm_state = MDMNE_ACK; 386 d = (md_mn_msg_chooseid_t *)(void *)msg->msg_event_data; 387 388 /* 389 * The node to be chosen will be the resync count for the set 390 * modulo the number of live nodes in the set 391 */ 392 setno = MD_MIN2SET(d->msg_chooseid_mnum); 393 if ((sp = metasetnosetname(setno, &mde)) == NULL) { 394 syslog(LOG_ERR, dgettext(TEXT_DOMAIN, 395 "MD_MN_MSG_CHOOSE_OWNER: Invalid setno %d\n"), setno); 396 resp->mmr_exitval = 1; 397 return; 398 } 399 if ((sd = metaget_setdesc(sp, &mde)) == NULL) { 400 syslog(LOG_ERR, dgettext(TEXT_DOMAIN, 401 "MD_MN_MSG_CHOOSE_OWNER: Invalid set pointer\n")); 402 resp->mmr_exitval = 1; 403 return; 404 } 405 406 /* Count the number of live nodes */ 407 nodecnt = 0; 408 nd = sd->sd_nodelist; 409 while (nd) { 410 if (nd->nd_flags & MD_MN_NODE_ALIVE) 411 nodecnt++; 412 nd = nd->nd_next; 413 } 414 nodeno = (d->msg_chooseid_rcnt%nodecnt); 415 416 /* 417 * If we've been called with msg_chooseid_set_node set TRUE then we 418 * are simply re-setting the owner id to ensure consistency across 419 * the cluster. 420 * If the flag is reset (B_FALSE) we are requesting a new owner to be 421 * determined. 422 */ 423 if (d->msg_chooseid_set_node) { 424 nodeid = d->msg_chooseid_rcnt; 425 } else { 426 /* scan the nodelist looking for the required node */ 427 nodecnt = 0; 428 nd = sd->sd_nodelist; 429 while (nd) { 430 if (nd->nd_flags & MD_MN_NODE_ALIVE) { 431 if (nodecnt == nodeno) 432 break; 433 nodecnt++; 434 } 435 nd = nd->nd_next; 436 } 437 nodeid = nd->nd_nodeid; 438 } 439 440 /* Send message to all nodes to make ownership change */ 441 chownermsg.msg_chowner_mnum = d->msg_chooseid_mnum; 442 chownermsg.msg_chowner_nodeid = nodeid; 443 myflags = MD_MSGF_NO_LOG; 444 445 /* inherit some flags from the parent message */ 446 myflags |= msg->msg_flags & MD_MSGF_INHERIT_BITS; 447 448 ret = mdmn_send_message(MD_MIN2SET(d->msg_chooseid_mnum), 449 MD_MN_MSG_CHANGE_OWNER, myflags, (char *)&chownermsg, 450 sizeof (chownermsg), &resp1, &mde); 451 if (resp1 != NULL) 452 free_result(resp1); 453 resp->mmr_exitval = ret; 454 } 455 456 /* 457 * Handler for MD_MN_MSG_CHANGE_OWNER 458 * This is called when we are perfoming a resync and wish to change from 459 * no mirror owner to an owner chosen by the master. 460 * This mesage is only relevant for the new owner, the message will be 461 * ignored by all other nodes 462 */ 463 /*ARGSUSED*/ 464 void 465 mdmn_do_change_owner(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 466 { 467 md_set_mmown_params_t setown; 468 md_mn_msg_chowner_t *d; 469 int ret = 0; 470 set_t setno; 471 mdsetname_t *sp; 472 md_set_desc *sd; 473 md_error_t mde = mdnullerror; 474 475 resp->mmr_out_size = 0; 476 resp->mmr_err_size = 0; 477 resp->mmr_out = NULL; 478 resp->mmr_err = NULL; 479 resp->mmr_comm_state = MDMNE_ACK; 480 d = (md_mn_msg_chowner_t *)(void *)msg->msg_event_data; 481 482 setno = MD_MIN2SET(d->msg_chowner_mnum); 483 if ((sp = metasetnosetname(setno, &mde)) == NULL) { 484 syslog(LOG_ERR, dgettext(TEXT_DOMAIN, 485 "MD_MN_MSG_CHANGE_OWNER: Invalid setno %d\n"), setno); 486 resp->mmr_exitval = 1; 487 return; 488 } 489 if ((sd = metaget_setdesc(sp, &mde)) == NULL) { 490 syslog(LOG_ERR, dgettext(TEXT_DOMAIN, 491 "MD_MN_MSG_CHANGE_OWNER: Invalid set pointer\n")); 492 resp->mmr_exitval = 1; 493 return; 494 } 495 496 if (d->msg_chowner_nodeid == sd->sd_mn_mynode->nd_nodeid) { 497 /* 498 * If we are the chosen owner, issue ioctl to make the 499 * ownership change 500 */ 501 (void) memset(&setown, 0, sizeof (md_set_mmown_params_t)); 502 setown.d.mnum = d->msg_chowner_mnum; 503 setown.d.owner = d->msg_chowner_nodeid; 504 setown.d.flags = MD_MN_MM_SPAWN_THREAD; 505 MD_SETDRIVERNAME(&setown, MD_MIRROR, 506 MD_MIN2SET(d->msg_chowner_mnum)); 507 508 /* 509 * Single shot at changing the the owner, if it fails EAGAIN, 510 * another node must have become the owner while we are in the 511 * process of making this choice. 512 */ 513 514 ret = metaioctl(MD_MN_SET_MM_OWNER, &setown, 515 &(setown.mde), NULL); 516 if (ret == EAGAIN) 517 ret = 0; 518 } 519 resp->mmr_exitval = ret; 520 } 521 522 /* handler for MD_MN_MSG_SUSPEND_WRITES */ 523 /*ARGSUSED*/ 524 void 525 mdmn_do_susp_write(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 526 { 527 /* Suspend writes to a region of a mirror */ 528 md_suspend_wr_params_t suspwr_ioc; 529 md_mn_msg_suspwr_t *d; 530 int ret; 531 532 resp->mmr_out_size = 0; 533 resp->mmr_err_size = 0; 534 resp->mmr_out = NULL; 535 resp->mmr_err = NULL; 536 resp->mmr_comm_state = MDMNE_ACK; 537 d = (md_mn_msg_suspwr_t *)(void *)msg->msg_event_data; 538 539 (void) memset(&suspwr_ioc, 0, sizeof (md_suspend_wr_params_t)); 540 MD_SETDRIVERNAME(&suspwr_ioc, MD_MIRROR, 541 MD_MIN2SET(d->msg_suspwr_mnum)); 542 suspwr_ioc.mnum = d->msg_suspwr_mnum; 543 ret = metaioctl(MD_MN_SUSPEND_WRITES, &suspwr_ioc, 544 &(suspwr_ioc.mde), NULL); 545 resp->mmr_exitval = ret; 546 } 547 548 /* 549 * handler for MD_MN_MSG_STATE_UPDATE_RESWR 550 * This functions update a submirror component state and then resumes writes 551 * to the mirror 552 */ 553 /*ARGSUSED*/ 554 void 555 mdmn_do_state_upd_reswr(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 556 { 557 /* Update the state of the component of a mirror */ 558 md_set_state_params_t setstate_ioc; 559 md_mn_msg_stch_t *d; 560 int ret; 561 562 resp->mmr_out_size = 0; 563 resp->mmr_err_size = 0; 564 resp->mmr_out = NULL; 565 resp->mmr_err = NULL; 566 resp->mmr_comm_state = MDMNE_ACK; 567 d = (md_mn_msg_stch_t *)(void *)msg->msg_event_data; 568 569 (void) memset(&setstate_ioc, 0, sizeof (md_set_state_params_t)); 570 MD_SETDRIVERNAME(&setstate_ioc, MD_MIRROR, 571 MD_MIN2SET(d->msg_stch_mnum)); 572 setstate_ioc.mnum = d->msg_stch_mnum; 573 setstate_ioc.sm = d->msg_stch_sm; 574 setstate_ioc.comp = d->msg_stch_comp; 575 setstate_ioc.state = d->msg_stch_new_state; 576 setstate_ioc.hs_id = d->msg_stch_hs_id; 577 ret = metaioctl(MD_MN_SET_STATE, &setstate_ioc, 578 &(setstate_ioc.mde), NULL); 579 resp->mmr_exitval = ret; 580 } 581 582 /* 583 * submessage generator for MD_MN_MSG_STATE_UPDATE and MD_MN_MSG_STATE_UPDATE2 584 * This generates 2 messages, the first is SUSPEND_WRITES and 585 * depending on the type of the original message the second one is 586 * either STATE_UPDATE_RESWR or STATE_UPDATE_RESWR2 which actually does 587 * the same, but runs on a higher class. 588 */ 589 int 590 mdmn_smgen_state_upd(md_mn_msg_t *msg, md_mn_msg_t *msglist[]) 591 { 592 md_mn_msg_t *nmsg; 593 md_mn_msg_stch_t *d; 594 md_mn_msg_stch_t *stch_data; 595 md_mn_msg_suspwr_t *suspwr_data; 596 597 d = (md_mn_msg_stch_t *)(void *)msg->msg_event_data; 598 599 nmsg = Zalloc(sizeof (md_mn_msg_t)); 600 MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid)); 601 602 nmsg->msg_flags = MD_MSGF_NO_LOG; /* Don't log submessages */ 603 nmsg->msg_setno = msg->msg_setno; 604 nmsg->msg_type = MD_MN_MSG_SUSPEND_WRITES; 605 nmsg->msg_event_size = sizeof (md_mn_msg_suspwr_t); 606 nmsg->msg_event_data = Zalloc(sizeof (md_mn_msg_suspwr_t)); 607 suspwr_data = (md_mn_msg_suspwr_t *)(void *)nmsg->msg_event_data; 608 suspwr_data->msg_suspwr_mnum = d->msg_stch_mnum; 609 msglist[0] = nmsg; 610 611 nmsg = Zalloc(sizeof (md_mn_msg_t)); 612 MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid)); 613 614 nmsg->msg_flags = MD_MSGF_NO_LOG; /* Don't log submessages */ 615 nmsg->msg_setno = msg->msg_setno; 616 if (msg->msg_type == MD_MN_MSG_STATE_UPDATE2) { 617 nmsg->msg_type = MD_MN_MSG_STATE_UPDATE_RESWR2; 618 } else { 619 nmsg->msg_type = MD_MN_MSG_STATE_UPDATE_RESWR; 620 } 621 nmsg->msg_event_size = sizeof (md_mn_msg_stch_t); 622 nmsg->msg_event_data = Zalloc(sizeof (md_mn_msg_stch_t)); 623 stch_data = (md_mn_msg_stch_t *)(void *)nmsg->msg_event_data; 624 stch_data->msg_stch_mnum = d->msg_stch_mnum; 625 stch_data->msg_stch_sm = d->msg_stch_sm; 626 stch_data->msg_stch_comp = d->msg_stch_comp; 627 stch_data->msg_stch_new_state = d->msg_stch_new_state; 628 stch_data->msg_stch_hs_id = d->msg_stch_hs_id; 629 msglist[1] = nmsg; 630 return (2); /* Return the number of submessages generated */ 631 } 632 633 /* 634 * handler for MD_MN_MSG_ALLOCATE_HOTSPARE and MD_MN_MSG_ALLOCATE_HOTSPARE2 635 * This sends a message to all nodes requesting them to allocate a hotspare 636 * for the specified component. The component is specified by the mnum of 637 * the mirror, the submirror index and the component index. 638 */ 639 /*ARGSUSED*/ 640 void 641 mdmn_do_allocate_hotspare(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 642 { 643 /* Allocate a hotspare for a mirror component */ 644 md_alloc_hotsp_params_t allochsp_ioc; 645 md_mn_msg_allochsp_t *d; 646 int ret; 647 648 resp->mmr_out_size = 0; 649 resp->mmr_err_size = 0; 650 resp->mmr_out = NULL; 651 resp->mmr_err = NULL; 652 resp->mmr_comm_state = MDMNE_ACK; 653 d = (md_mn_msg_allochsp_t *)((void *)(msg->msg_event_data)); 654 655 (void) memset(&allochsp_ioc, 0, 656 sizeof (md_alloc_hotsp_params_t)); 657 MD_SETDRIVERNAME(&allochsp_ioc, MD_MIRROR, 658 MD_MIN2SET(d->msg_allochsp_mnum)); 659 allochsp_ioc.mnum = d->msg_allochsp_mnum; 660 allochsp_ioc.sm = d->msg_allochsp_sm; 661 allochsp_ioc.comp = d->msg_allochsp_comp; 662 allochsp_ioc.hs_id = d->msg_allochsp_hs_id; 663 ret = metaioctl(MD_MN_ALLOCATE_HOTSPARE, &allochsp_ioc, 664 &(allochsp_ioc.mde), NULL); 665 resp->mmr_exitval = ret; 666 } 667 668 /* 669 * handler for MD_MN_MSG_RESYNC_STARTING,MD_MN_MSG_RESYNC_FIRST, 670 * MD_MN_MSG_RESYNC_NEXT, MD_MN_MSG_RESYNC_FINISH, MD_MN_MSG_RESYNC_PHASE_DONE 671 */ 672 /*ARGSUSED*/ 673 void 674 mdmn_do_resync(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 675 { 676 md_mn_msg_resync_t *d; 677 md_mn_rs_params_t respar; 678 int ret; 679 int smi; 680 681 resp->mmr_out_size = 0; 682 resp->mmr_err_size = 0; 683 resp->mmr_out = NULL; 684 resp->mmr_err = NULL; 685 resp->mmr_comm_state = MDMNE_ACK; 686 d = (md_mn_msg_resync_t *)((void *)(msg->msg_event_data)); 687 688 (void) memset(&respar, 0, sizeof (respar)); 689 MD_SETDRIVERNAME(&respar, MD_MIRROR, 690 MD_MIN2SET(d->msg_resync_mnum)) 691 respar.msg_type = (int)msg->msg_type; 692 respar.mnum = d->msg_resync_mnum; 693 respar.rs_type = d->msg_resync_type; 694 respar.rs_start = d->msg_resync_start; 695 respar.rs_size = d->msg_resync_rsize; 696 respar.rs_done = d->msg_resync_done; 697 respar.rs_2_do = d->msg_resync_2_do; 698 respar.rs_originator = d->msg_originator; 699 respar.rs_flags = d->msg_resync_flags; 700 701 for (smi = 0; smi < NMIRROR; smi++) { 702 respar.rs_sm_state[smi] = d->msg_sm_state[smi]; 703 respar.rs_sm_flags[smi] = d->msg_sm_flags[smi]; 704 } 705 706 ret = metaioctl(MD_MN_RESYNC, &respar, &respar.mde, NULL); 707 708 resp->mmr_exitval = ret; 709 } 710 711 /* 712 * handler for MD_MN_MSG_SETSYNC 713 */ 714 /*ARGSUSED*/ 715 void 716 mdmn_do_setsync(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 717 { 718 md_mn_msg_setsync_t *d; 719 md_resync_ioctl_t ri; 720 int ret; 721 722 resp->mmr_out_size = 0; 723 resp->mmr_err_size = 0; 724 resp->mmr_out = NULL; 725 resp->mmr_err = NULL; 726 resp->mmr_comm_state = MDMNE_ACK; 727 d = (md_mn_msg_setsync_t *)((void *)(msg->msg_event_data)); 728 729 (void) memset(&ri, 0, sizeof (ri)); 730 MD_SETDRIVERNAME(&ri, MD_MIRROR, MD_MIN2SET(d->setsync_mnum)) 731 ri.ri_mnum = d->setsync_mnum; 732 ri.ri_copysize = d->setsync_copysize; 733 ri.ri_flags = d->setsync_flags; 734 735 ret = metaioctl(MD_MN_SETSYNC, &ri, &ri.mde, NULL); 736 737 resp->mmr_exitval = ret; 738 } 739 740 /* 741 * handler for MD_MN_MSG_SET_CAP. As this handler can deal with both mirrors 742 * and soft partitions, the driver name that is required for the ioctl call 743 * is included in the message. 744 */ 745 /*ARGSUSED*/ 746 void 747 mdmn_do_set_cap(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 748 { 749 md_mn_msg_setcap_t *d; 750 md_mn_setcap_params_t setcap_ioc; 751 minor_t mnum; 752 int ret; 753 754 resp->mmr_out_size = 0; 755 resp->mmr_err_size = 0; 756 resp->mmr_out = NULL; 757 resp->mmr_err = NULL; 758 resp->mmr_comm_state = MDMNE_ACK; 759 d = (md_mn_msg_setcap_t *)((void *)(msg->msg_event_data)); 760 mnum = d->msg_setcap_mnum; 761 762 (void) memset(&setcap_ioc, 0, sizeof (setcap_ioc)); 763 764 MD_SETDRIVERNAME(&setcap_ioc, d->msg_setcap_driver, MD_MIN2SET(mnum)); 765 setcap_ioc.mnum = mnum; 766 setcap_ioc.sc_set = d->msg_setcap_set; 767 768 ret = metaioctl(MD_MN_SET_CAP, &setcap_ioc, &setcap_ioc.mde, NULL); 769 770 resp->mmr_exitval = ret; 771 } 772 773 /* 774 * Dummy handler for various CLASS0 messages like 775 * MD_MN_MSG_VERBOSITY / MD_MN_MSG_RESUME / MD_MN_MSG_SUSPEND ... 776 */ 777 /*ARGSUSED*/ 778 void 779 mdmn_do_dummy(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 780 { 781 resp->mmr_out_size = 0; 782 resp->mmr_err_size = 0; 783 resp->mmr_out = NULL; 784 resp->mmr_err = NULL; 785 resp->mmr_exitval = 0; 786 resp->mmr_comm_state = MDMNE_ACK; 787 } 788 789 /* 790 * Overall description of mdcommd support that keeps all nodes in-sync 791 * with the ondisk diskset mddbs. 792 * 793 * All configuration changes to the mddb - addition/deletion of metadevices 794 * or replicas must use a CLASS1 message to block out these changes. 795 * Changes to the state of existing replicas do not need to block CLASS1 796 * since there is no conflict when just updating the state of a replica. 797 * 798 * Error encountered when master writes to mddbs: 799 * As the master updates parts of the mddbs, flags are updated describing 800 * what has been written. When all locks are dropped (either in 801 * mddb_setexit or mdioctl), a PARSE message will be generated to all 802 * nodes with an index list of known good mddbs and the parse flags. 803 * The master node ignore the parse message since it sent it. 804 * The slave nodes re-read in the changed part of the mddb using the list 805 * of known good replicas that was passed. 806 * PARSE message does not block CLASS1. 807 * The PARSE message must be the highest class message. Since this 808 * message could be sent on any ioctl, this PARSE message class must 809 * be higher than any other class message that could issue an ioctl. 810 * 811 * Master Slave1 Slave2 812 * Handles_error 813 * PARSE PARSE PARSE 814 * 815 * 816 * Add/Delete mddbs can occur from the following commands: 817 * metadb -s set_name -a/-d 818 * metaset -s set_name -a/-d disk 819 * metaset -s set_name -b 820 * 821 * The metadb/metaset command is run on the node executing the command 822 * and sends an ATTACH/DETACH message to the master node blocking CLASS1 823 * messages on all nodes until this message is finished. The master 824 * node generates 3 submessages of BLOCK, SM_ATTACH/SM_DETACH, UNBLOCK. 825 * The BLOCK message is only run on the master node and will BLOCK 826 * the PARSE messages from being sent to the nodes. 827 * The SM_ATTACH/SM_DETACH message is run on all nodes and actually adds or 828 * removes the replica(s) from the given disk slice. 829 * The UNBLOCK message is only run on the master node and allows the 830 * sending of PARSE messages. 831 * 832 * Master Slave1 Slave2 833 * Add mddb cmd 834 * ATTACH msg to master 835 * BLOCK 836 * ATTACH ATTACH ATTACH 837 * UNBLOCK 838 * PARSE PARSE PARSE 839 * ATTACH msg finished 840 * 841 * Add/Delete host side information from the following commands: 842 * metaset -s set_name -a/-d -h 843 * 844 * The metaset command is run on the node executing the command and 845 * sends a DB_NEWSIDE/DB_DELSIDE message and a MD_NEWSIDE/MD_DELSIDE 846 * message whenever a host is added to or deleted from the diskset. 847 * 848 * The side information contains the major name and minor number 849 * associated with a disk slice from a certain node's perspective 850 * in an (failed) effort to support clustered systems that don't have the 851 * same device name for a physical device. (The original designers of 852 * SVM eventually took the shortcut of assuming that all device names 853 * are the same on all systems, but left the side information in the 854 * mddb and namespace.) The side information is used for disk slices 855 * that contain mddbs and/or are components for metadevices. 856 * 857 * The DB_NEWSIDE/DELSIDE command adds or deletes the side information 858 * for each mddb for the host being added or deleted. 859 * The MD_ADDSIDE/MD_DELSIDE command adds or deletes the side information 860 * for all disk slice components that are in the namespace records for 861 * the host being added or deleted. 862 * 863 * The DB_NEWSIDE/DB_DELSIDE message does not change any mddb records 864 * and only needs to be executed on the master node since the slave 865 * nodes will be brought up to date by the PARSE message that is 866 * generated as a result of a change to the mddb. 867 * The MD_ADDSIDE/MD_DELSIDE message does modify the records in the mddb 868 * and needs to be run on all nodes. The message must block class1 869 * messages so that record changing commands don't interfere. 870 * 871 * Master Slave1 Slave2 872 * Add host 873 * DB_NEWSIDE msg to master 874 * DB_NEWSIDE 875 * PARSE PARSE PARSE 876 * DB_NEWSIDE msg finished 877 * MD_NEWSIDE msg to master 878 * MD_NEWSIDE MD_NEWSIDE MD_NEWSIDE 879 * MD_NEWSIDE msg finished 880 * 881 * 882 * Optimized resync record failure: 883 * When any node sees a failure to write an optimized resync record 884 * that node notifies the master node of the replica that failed. 885 * The master node handles the error and updates the rest of the 886 * nodes using a PARSE message. The PARSE message also calls 887 * fixoptrecord on each slave node causing each node to fix up 888 * the optimized resync records that are owned by that node (the mirror 889 * owner code also sets the optimized resync record owner). The master 890 * node will fix up all optimized resync records that have no owner or 891 * are owned by the master node. 892 * 893 * Master Slave1 Slave2 894 * Optimized Record Failure 895 * OPTRECERR msg to master 896 * Master handles opt rec failure 897 * PARSE PARSE PARSE 898 * OPTRECERR msg finished 899 * Slave rewrites optimized record 900 * 901 */ 902 903 /* 904 * Handler for MD_MN_MSG_MDDB_PARSE which send parse messages to the 905 * slave nodes in order to keep the incore view of the mddbs the 906 * same on all nodes. 907 * 908 * Since master node generated the mddb parse message, do nothing 909 * if this is the master node. 910 * 911 * If this is a slave node, send the parse message down to the kernel 912 * where this node will re-read in parts of the mddbs. 913 * 914 */ 915 void 916 mdmn_do_mddb_parse(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 917 { 918 md_mn_msg_mddb_parse_t *d; 919 mddb_parse_parm_t mpp; 920 int ret = 0; 921 int i; 922 923 resp->mmr_out_size = 0; 924 resp->mmr_err_size = 0; 925 resp->mmr_out = NULL; 926 resp->mmr_err = NULL; 927 resp->mmr_comm_state = MDMNE_ACK; 928 d = (md_mn_msg_mddb_parse_t *)((void *)(msg->msg_event_data)); 929 930 if (flags & MD_MSGF_ON_MASTER) 931 return; 932 933 (void) memset(&mpp, 0, sizeof (mpp)); 934 mpp.c_setno = msg->msg_setno; 935 mpp.c_parse_flags = d->msg_parse_flags; 936 for (i = 0; i < MDDB_NLB; i++) { 937 mpp.c_lb_flags[i] = d->msg_lb_flags[i]; 938 } 939 ret = metaioctl(MD_MN_MDDB_PARSE, &mpp, &mpp.c_mde, NULL); 940 if (ret) 941 (void) mdstealerror(&(resp->mmr_ep), &mpp.c_mde); 942 943 resp->mmr_exitval = ret; 944 } 945 946 /* 947 * Handler for MD_MN_MSG_MDDB_BLOCK which blocks the generation 948 * of parse messages from this node. 949 * 950 * This is needed when attaching/detaching mddbs on the master and the 951 * slave node is unable to handle a parse message until the slave node 952 * has done the attach/detach of the mddbs. So, master node will block 953 * the parse messages, execute the attach/detach on all nodes and 954 * then unblock the parse messages which causes the parse message to 955 * be sent to all nodes. 956 */ 957 /*ARGSUSED*/ 958 void 959 mdmn_do_mddb_block(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 960 { 961 md_mn_msg_mddb_block_t *d; 962 mddb_block_parm_t mbp; 963 int ret; 964 965 resp->mmr_out_size = 0; 966 resp->mmr_err_size = 0; 967 resp->mmr_out = NULL; 968 resp->mmr_err = NULL; 969 resp->mmr_comm_state = MDMNE_ACK; 970 d = (md_mn_msg_mddb_block_t *)((void *)(msg->msg_event_data)); 971 972 (void) memset(&mbp, 0, sizeof (mbp)); 973 mbp.c_setno = msg->msg_setno; 974 mbp.c_blk_flags = d->msg_block_flags; 975 ret = metaioctl(MD_MN_MDDB_BLOCK, &mbp, &mbp.c_mde, NULL); 976 if (ret) 977 (void) mdstealerror(&(resp->mmr_ep), &mbp.c_mde); 978 979 resp->mmr_exitval = ret; 980 } 981 982 /* 983 * Submessage generator for MD_MN_MSG_META_DB_ATTACH which generates 984 * a BLOCK message on the master node only, a MD_MN_MSG_SM_MDDB_ATTACH 985 * message on all nodes and then an UNBLOCK message on the master only. 986 */ 987 int 988 mdmn_smgen_mddb_attach(md_mn_msg_t *msg, md_mn_msg_t *msglist[]) 989 { 990 md_mn_msg_t *nmsg; 991 md_mn_msg_meta_db_attach_t *d; 992 md_mn_msg_meta_db_attach_t *attach_d; 993 md_mn_msg_mddb_block_t *block_d; 994 995 d = (md_mn_msg_meta_db_attach_t *)(void *)msg->msg_event_data; 996 997 nmsg = Zalloc(sizeof (md_mn_msg_t)); 998 MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid)); 999 1000 nmsg->msg_flags = (MD_MSGF_NO_LOG | MD_MSGF_NO_BCAST); 1001 nmsg->msg_setno = msg->msg_setno; 1002 nmsg->msg_type = MD_MN_MSG_MDDB_BLOCK; 1003 nmsg->msg_event_size = sizeof (md_mn_msg_mddb_block_t); 1004 nmsg->msg_event_data = Zalloc(sizeof (md_mn_msg_mddb_block_t)); 1005 block_d = (md_mn_msg_mddb_block_t *)(void *)nmsg->msg_event_data; 1006 block_d->msg_block_flags = MDDB_BLOCK_PARSE; 1007 msglist[0] = nmsg; 1008 1009 nmsg = Zalloc(sizeof (md_mn_msg_t)); 1010 MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid)); 1011 1012 /* Don't log submessages and panic on inconsistent results */ 1013 nmsg->msg_flags = MD_MSGF_NO_LOG | 1014 MD_MSGF_PANIC_WHEN_INCONSISTENT; 1015 nmsg->msg_setno = msg->msg_setno; 1016 nmsg->msg_type = MD_MN_MSG_SM_MDDB_ATTACH; 1017 nmsg->msg_event_size = sizeof (md_mn_msg_meta_db_attach_t); 1018 nmsg->msg_event_data = Zalloc(sizeof (md_mn_msg_meta_db_attach_t)); 1019 attach_d = (md_mn_msg_meta_db_attach_t *) 1020 (void *)nmsg->msg_event_data; 1021 attach_d->msg_l_dev = d->msg_l_dev; 1022 attach_d->msg_cnt = d->msg_cnt; 1023 attach_d->msg_dbsize = d->msg_dbsize; 1024 (void) strncpy(attach_d->msg_dname, d->msg_dname, 16); 1025 attach_d->msg_splitname = d->msg_splitname; 1026 attach_d->msg_options = d->msg_options; 1027 msglist[1] = nmsg; 1028 1029 nmsg = Zalloc(sizeof (md_mn_msg_t)); 1030 MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid)); 1031 1032 nmsg->msg_flags = (MD_MSGF_NO_LOG | MD_MSGF_NO_BCAST); 1033 nmsg->msg_setno = msg->msg_setno; 1034 nmsg->msg_type = MD_MN_MSG_MDDB_BLOCK; 1035 nmsg->msg_event_size = sizeof (md_mn_msg_mddb_block_t); 1036 nmsg->msg_event_data = Zalloc(sizeof (md_mn_msg_mddb_block_t)); 1037 block_d = (md_mn_msg_mddb_block_t *)(void *)nmsg->msg_event_data; 1038 block_d->msg_block_flags = MDDB_UNBLOCK_PARSE; 1039 msglist[2] = nmsg; 1040 1041 return (3); /* Return the number of submessages generated */ 1042 } 1043 1044 /* 1045 * Submessage generator for MD_MN_MSG_META_DB_DETACH which generates 1046 * a BLOCK message on the master node only, a MD_MN_MSG_SM_MDDB_DETACH 1047 * message on all nodes and then an UNBLOCK message on the master only. 1048 */ 1049 int 1050 mdmn_smgen_mddb_detach(md_mn_msg_t *msg, md_mn_msg_t *msglist[]) 1051 { 1052 md_mn_msg_t *nmsg; 1053 md_mn_msg_meta_db_detach_t *d; 1054 md_mn_msg_meta_db_detach_t *detach_d; 1055 md_mn_msg_mddb_block_t *block_d; 1056 1057 d = (md_mn_msg_meta_db_detach_t *)(void *)msg->msg_event_data; 1058 1059 nmsg = Zalloc(sizeof (md_mn_msg_t)); 1060 MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid)); 1061 1062 nmsg->msg_flags = (MD_MSGF_NO_LOG | MD_MSGF_NO_BCAST); 1063 nmsg->msg_setno = msg->msg_setno; 1064 nmsg->msg_type = MD_MN_MSG_MDDB_BLOCK; 1065 nmsg->msg_event_size = sizeof (md_mn_msg_mddb_block_t); 1066 nmsg->msg_event_data = Zalloc(sizeof (md_mn_msg_mddb_block_t)); 1067 block_d = (md_mn_msg_mddb_block_t *)(void *)nmsg->msg_event_data; 1068 block_d->msg_block_flags = MDDB_BLOCK_PARSE; 1069 msglist[0] = nmsg; 1070 1071 nmsg = Zalloc(sizeof (md_mn_msg_t)); 1072 MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid)); 1073 1074 /* Don't log submessages and panic on inconsistent results */ 1075 nmsg->msg_flags = MD_MSGF_NO_LOG | 1076 MD_MSGF_PANIC_WHEN_INCONSISTENT; 1077 nmsg->msg_setno = msg->msg_setno; 1078 nmsg->msg_type = MD_MN_MSG_SM_MDDB_DETACH; 1079 nmsg->msg_event_size = sizeof (md_mn_msg_meta_db_detach_t); 1080 nmsg->msg_event_data = Zalloc(sizeof (md_mn_msg_meta_db_detach_t)); 1081 detach_d = (md_mn_msg_meta_db_detach_t *) 1082 (void *)nmsg->msg_event_data; 1083 detach_d->msg_splitname = d->msg_splitname; 1084 msglist[1] = nmsg; 1085 1086 nmsg = Zalloc(sizeof (md_mn_msg_t)); 1087 MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid)); 1088 1089 nmsg->msg_flags = (MD_MSGF_NO_LOG | MD_MSGF_NO_BCAST); 1090 nmsg->msg_setno = msg->msg_setno; 1091 nmsg->msg_type = MD_MN_MSG_MDDB_BLOCK; 1092 nmsg->msg_event_size = sizeof (md_mn_msg_mddb_block_t); 1093 nmsg->msg_event_data = Zalloc(sizeof (md_mn_msg_mddb_block_t)); 1094 block_d = (md_mn_msg_mddb_block_t *)(void *)nmsg->msg_event_data; 1095 block_d->msg_block_flags = MDDB_UNBLOCK_PARSE; 1096 msglist[2] = nmsg; 1097 1098 return (3); /* Return the number of submessages generated */ 1099 } 1100 1101 /* 1102 * Handler for MD_MN_MSG_SM_MDDB_ATTACH which is used to attach mddbs. 1103 * 1104 * Used when running: 1105 * metadb -s set_name -a 1106 * metaset -s set_name -a/-d disk 1107 * metaset -s set_name -b 1108 */ 1109 /*ARGSUSED*/ 1110 void 1111 mdmn_do_sm_mddb_attach(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 1112 { 1113 md_mn_msg_meta_db_attach_t *d; 1114 struct mddb_config c; 1115 int i; 1116 int ret = 0; 1117 md_error_t ep = mdnullerror; 1118 char *name, *add_name; 1119 mdname_t *np; 1120 mdsetname_t *sp; 1121 1122 resp->mmr_out_size = 0; 1123 resp->mmr_err_size = 0; 1124 resp->mmr_out = NULL; 1125 resp->mmr_err = NULL; 1126 resp->mmr_comm_state = MDMNE_ACK; 1127 d = (md_mn_msg_meta_db_attach_t *)((void *)(msg->msg_event_data)); 1128 1129 (void) memset(&c, 0, sizeof (c)); 1130 c.c_setno = msg->msg_setno; 1131 c.c_locator.l_dev = meta_cmpldev(d->msg_l_dev); 1132 (void) strncpy(c.c_locator.l_driver, d->msg_dname, 1133 sizeof (c.c_locator.l_driver)); 1134 c.c_devname = d->msg_splitname; 1135 c.c_locator.l_mnum = meta_getminor(d->msg_l_dev); 1136 c.c_multi_node = 1; 1137 if ((sp = metasetnosetname(c.c_setno, &ep)) == NULL) { 1138 (void) mdstealerror(&(resp->mmr_ep), &ep); 1139 resp->mmr_exitval = -1; 1140 return; 1141 } 1142 (void) strcpy(c.c_setname, sp->setname); 1143 c.c_sideno = getmyside(sp, &ep); 1144 if (c.c_sideno == MD_SIDEWILD) { 1145 (void) mdstealerror(&(resp->mmr_ep), &ep); 1146 resp->mmr_exitval = -1; 1147 return; 1148 } 1149 1150 name = splicename(&d->msg_splitname); 1151 np = metaname(&sp, name, LOGICAL_DEVICE, &ep); 1152 Free(name); 1153 if (np == NULL) { 1154 (void) mdstealerror(&(resp->mmr_ep), &ep); 1155 resp->mmr_exitval = -1; 1156 return; 1157 } 1158 /* 1159 * All nodes in MN diskset must do meta_check_replica 1160 * since this causes the shared namespace to be 1161 * populated by the md driver names while checking 1162 * to see if this device is already in use as a 1163 * metadevice. 1164 */ 1165 if (meta_check_replica(sp, np, d->msg_options, 0, 1166 (d->msg_cnt * d->msg_dbsize), &ep)) { 1167 (void) mdstealerror(&(resp->mmr_ep), &ep); 1168 resp->mmr_exitval = -1; 1169 return; 1170 } 1171 1172 for (i = 0; i < d->msg_cnt; i++) { 1173 c.c_locator.l_blkno = i * d->msg_dbsize + 16; 1174 if (setup_med_cfg(sp, &c, 1175 (d->msg_options & MDCHK_SET_FORCE), &ep)) { 1176 ret = -1; 1177 (void) mdstealerror(&(resp->mmr_ep), &ep); 1178 break; 1179 } 1180 ret = metaioctl(MD_DB_NEWDEV, &c, &c.c_mde, NULL); 1181 /* If newdev was successful, continue with attach */ 1182 if (ret == 0) { 1183 if (meta_db_addsidenms(sp, np, c.c_locator.l_blkno, 1184 DB_ADDSIDENMS_NO_BCAST, &ep)) { 1185 ret = -1; 1186 (void) mdstealerror(&(resp->mmr_ep), &ep); 1187 break; 1188 } 1189 } else { 1190 (void) mdstealerror(&(resp->mmr_ep), &c.c_mde); 1191 break; 1192 } 1193 } 1194 add_name = splicename(&d->msg_splitname); 1195 if ((np = metaname(&sp, add_name, LOGICAL_DEVICE, &ep)) != NULL) { 1196 meta_invalidate_name(np); 1197 } else { 1198 ret = -1; 1199 (void) mdstealerror(&(resp->mmr_ep), &ep); 1200 } 1201 Free(add_name); 1202 1203 resp->mmr_exitval = ret; 1204 } 1205 1206 /* 1207 * Handler for MD_MN_MSG_SM_MDDB_DETACH which is used to detach mddbs. 1208 * 1209 * Used when running: 1210 * metadb -s set_name -d 1211 * metaset -s set_name -a/-d disk 1212 * metaset -s set_name -b 1213 */ 1214 /*ARGSUSED*/ 1215 void 1216 mdmn_do_sm_mddb_detach(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 1217 { 1218 md_mn_msg_meta_db_detach_t *d; 1219 struct mddb_config c; 1220 int i; 1221 int ret = 0; 1222 md_error_t ep = mdnullerror; 1223 char *name, *del_name; 1224 mdname_t *np; 1225 mdsetname_t *sp; 1226 1227 resp->mmr_out_size = 0; 1228 resp->mmr_err_size = 0; 1229 resp->mmr_out = NULL; 1230 resp->mmr_err = NULL; 1231 resp->mmr_comm_state = MDMNE_ACK; 1232 d = (md_mn_msg_meta_db_detach_t *)((void *)(msg->msg_event_data)); 1233 1234 if ((sp = metasetnosetname(msg->msg_setno, &ep)) == NULL) { 1235 (void) mdstealerror(&(resp->mmr_ep), &ep); 1236 resp->mmr_exitval = -1; 1237 return; 1238 } 1239 1240 (void) memset(&c, 0, sizeof (c)); 1241 c.c_setno = msg->msg_setno; 1242 if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) { 1243 resp->mmr_exitval = -1; 1244 (void) mdstealerror(&(resp->mmr_ep), &c.c_mde); 1245 return; 1246 } 1247 i = 0; 1248 del_name = splicename(&d->msg_splitname); 1249 while (i < c.c_dbcnt) { 1250 c.c_id = i; 1251 if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) { 1252 ret = -1; 1253 (void) mdstealerror(&(resp->mmr_ep), &c.c_mde); 1254 break; 1255 } 1256 name = splicename(&c.c_devname); 1257 if (strcmp(name, del_name) != 0) { 1258 Free(name); 1259 i++; 1260 continue; 1261 } 1262 Free(name); 1263 /* Found a match - delete mddb */ 1264 if (metaioctl(MD_DB_DELDEV, &c, &c.c_mde, NULL) != 0) { 1265 ret = -1; 1266 (void) mdstealerror(&(resp->mmr_ep), &c.c_mde); 1267 break; 1268 } 1269 /* Not incrementing "i" intentionally (dbcnt is changed) */ 1270 } 1271 if ((np = metaname(&sp, del_name, LOGICAL_DEVICE, &ep)) != NULL) { 1272 meta_invalidate_name(np); 1273 } else { 1274 ret = -1; 1275 (void) mdstealerror(&(resp->mmr_ep), &ep); 1276 } 1277 Free(del_name); 1278 1279 resp->mmr_exitval = ret; 1280 } 1281 1282 /* 1283 * Handler for MD_MN_MSG_META_DB_NEWSIDE which is used to update the 1284 * side information for each diskset mddb when a new host has been 1285 * added to the diskset. The side information is the /dev/dsk/ctds name 1286 * that the new node would use to access each mddb. 1287 * 1288 * Since this routine makes no changes to the records in the diskset mddb, 1289 * this routine only needs to be run on the master node. The master node's 1290 * kernel code will detect that portions of the mddb have changed and 1291 * will send a parse message to all nodes to re-parse parts of the mddb. 1292 * 1293 * Used when running: 1294 * metaset -s set_name -a -h new_hostname 1295 */ 1296 /*ARGSUSED*/ 1297 void 1298 mdmn_do_meta_db_newside(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 1299 { 1300 md_mn_msg_meta_db_newside_t *d; 1301 struct mddb_config c; 1302 int ret = 0; 1303 mdsetname_t *sp; 1304 md_error_t ep = mdnullerror; 1305 1306 resp->mmr_out_size = 0; 1307 resp->mmr_err_size = 0; 1308 resp->mmr_out = NULL; 1309 resp->mmr_err = NULL; 1310 resp->mmr_comm_state = MDMNE_ACK; 1311 d = (md_mn_msg_meta_db_newside_t *)((void *)(msg->msg_event_data)); 1312 1313 (void) memset(&c, 0, sizeof (c)); 1314 c.c_setno = msg->msg_setno; 1315 c.c_locator.l_dev = meta_cmpldev(d->msg_l_dev); 1316 c.c_locator.l_blkno = d->msg_blkno; 1317 (void) strncpy(c.c_locator.l_driver, d->msg_dname, 1318 sizeof (c.c_locator.l_driver)); 1319 c.c_devname = d->msg_splitname; 1320 c.c_locator.l_mnum = d->msg_mnum; 1321 c.c_multi_node = 1; 1322 if ((sp = metasetnosetname(c.c_setno, &ep)) == NULL) { 1323 (void) mdstealerror(&(resp->mmr_ep), &ep); 1324 resp->mmr_exitval = -1; 1325 return; 1326 } 1327 (void) strcpy(c.c_setname, sp->setname); 1328 c.c_sideno = d->msg_sideno; 1329 1330 if ((ret = metaioctl(MD_DB_NEWSIDE, &c, &c.c_mde, NULL)) != 0) { 1331 (void) mdstealerror(&(resp->mmr_ep), &c.c_mde); 1332 } 1333 resp->mmr_exitval = ret; 1334 } 1335 1336 /* 1337 * Handler for MD_MN_MSG_META_DB_DELSIDE which is used to remove the 1338 * side information for each diskset mddb when a host has been 1339 * deleted from the diskset. The side information is the /dev/dsk/ctds name 1340 * that the node would use to access each mddb. 1341 * 1342 * Since this routine makes no changes to the records in the diskset mddb, 1343 * this routine only needs to be run on the master node. The master node's 1344 * kernel code will detect that portions of the mddb have changed and 1345 * will send a parse message to all nodes to re-parse parts of the mddb. 1346 * 1347 * Used when running: 1348 * metaset -s set_name -d -h hostname 1349 */ 1350 /*ARGSUSED*/ 1351 void 1352 mdmn_do_meta_db_delside(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 1353 { 1354 md_mn_msg_meta_db_delside_t *d; 1355 mddb_config_t c; 1356 int ret = 0; 1357 mdsetname_t *sp; 1358 md_error_t ep = mdnullerror; 1359 1360 resp->mmr_out_size = 0; 1361 resp->mmr_err_size = 0; 1362 resp->mmr_out = NULL; 1363 resp->mmr_err = NULL; 1364 resp->mmr_comm_state = MDMNE_ACK; 1365 d = (md_mn_msg_meta_db_delside_t *)((void *)(msg->msg_event_data)); 1366 1367 (void) memset(&c, 0, sizeof (c)); 1368 c.c_setno = msg->msg_setno; 1369 c.c_locator.l_dev = meta_cmpldev(d->msg_l_dev); 1370 c.c_locator.l_blkno = d->msg_blkno; 1371 c.c_multi_node = 1; 1372 if ((sp = metasetnosetname(c.c_setno, &ep)) == NULL) { 1373 (void) mdstealerror(&(resp->mmr_ep), &ep); 1374 resp->mmr_exitval = -1; 1375 return; 1376 } 1377 (void) strcpy(c.c_setname, sp->setname); 1378 c.c_sideno = d->msg_sideno; 1379 1380 if ((ret = metaioctl(MD_DB_DELSIDE, &c, &c.c_mde, NULL)) != 0) { 1381 (void) mdstealerror(&(resp->mmr_ep), &c.c_mde); 1382 } 1383 resp->mmr_exitval = ret; 1384 } 1385 1386 /* 1387 * Handler for MD_MN_MSG_META_MD_ADDSIDE which is used to add the 1388 * side information for each diskset metadevice component (if that 1389 * component is a disk) when a host has been added to the diskset. 1390 * The side information is the /dev/dsk/ctds name that the node would 1391 * use to access the metadevice component. 1392 * 1393 * This routine makes changes to the mddb records and must be run 1394 * on all nodes. 1395 * 1396 * Used when running: 1397 * metaset -s set_name -a -h new_hostname 1398 */ 1399 /*ARGSUSED*/ 1400 void 1401 mdmn_do_meta_md_addside(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 1402 { 1403 md_mn_msg_meta_md_addside_t *d; 1404 mdnm_params_t nm; 1405 mdsetname_t *sp; 1406 char *cname, *dname; 1407 minor_t mnum; 1408 int done, i; 1409 md_error_t ep = mdnullerror; 1410 1411 resp->mmr_out_size = 0; 1412 resp->mmr_err_size = 0; 1413 resp->mmr_out = NULL; 1414 resp->mmr_err = NULL; 1415 resp->mmr_comm_state = MDMNE_ACK; 1416 d = (md_mn_msg_meta_md_addside_t *)((void *)(msg->msg_event_data)); 1417 1418 (void) memset(&nm, 0, sizeof (nm)); 1419 if ((sp = metasetnosetname(msg->msg_setno, &ep)) == NULL) { 1420 (void) mdstealerror(&(resp->mmr_ep), &ep); 1421 resp->mmr_exitval = -1; 1422 return; 1423 } 1424 /* While loop continues until IOCNXTKEY_NM gives nm.key of KEYWILD */ 1425 /*CONSTCOND*/ 1426 while (1) { 1427 nm.mde = mdnullerror; 1428 nm.setno = msg->msg_setno; 1429 nm.side = d->msg_otherside; 1430 if (metaioctl(MD_IOCNXTKEY_NM, &nm, &nm.mde, NULL) != 0) { 1431 (void) mdstealerror(&(resp->mmr_ep), &nm.mde); 1432 resp->mmr_exitval = -1; 1433 return; 1434 } 1435 1436 /* Normal exit path is to eventually get a KEYWILD */ 1437 if (nm.key == MD_KEYWILD) { 1438 resp->mmr_exitval = 0; 1439 return; 1440 } 1441 1442 nm.devname = (uintptr_t)meta_getnmbykey(msg->msg_setno, 1443 d->msg_otherside, nm.key, &ep); 1444 if (nm.devname == NULL) { 1445 (void) mdstealerror(&(resp->mmr_ep), &ep); 1446 resp->mmr_exitval = -1; 1447 return; 1448 } 1449 nm.side = d->msg_sideno; 1450 if ((done = meta_getside_devinfo(sp, 1451 (char *)(uintptr_t)nm.devname, 1452 d->msg_sideno, &cname, &dname, &mnum, &ep)) == -1) { 1453 (void) mdstealerror(&(resp->mmr_ep), &ep); 1454 Free((void *)(uintptr_t)nm.devname); 1455 resp->mmr_exitval = -1; 1456 return; 1457 } 1458 Free((void *)(uintptr_t)nm.devname); 1459 if (done != 1) { 1460 Free(cname); 1461 Free(dname); 1462 resp->mmr_exitval = -1; 1463 return; 1464 } 1465 1466 /* 1467 * The device reference count can be greater than 1 if 1468 * more than one softpart is configured on top of the 1469 * same device. If this is the case then we want to 1470 * increment the count to sync up with the other sides. 1471 */ 1472 for (i = 0; i < nm.ref_count; i++) { 1473 if (add_name(sp, d->msg_sideno, nm.key, dname, mnum, 1474 cname, &ep) == -1) { 1475 (void) mdstealerror(&(resp->mmr_ep), &ep); 1476 Free(cname); 1477 Free(dname); 1478 resp->mmr_exitval = -1; 1479 return; 1480 } 1481 } 1482 Free(cname); 1483 Free(dname); 1484 } 1485 1486 /*NOTREACHED*/ 1487 } 1488 /* 1489 * Handler for MD_MN_MSG_META_MD_DELSIDE which is used to delete the 1490 * side information for each diskset metadevice component (if that 1491 * component is a disk) when a host has been removed from the diskset. 1492 * The side information is the /dev/dsk/ctds name that the node would 1493 * use to access the metadevice component. 1494 * 1495 * This routine makes changes to the mddb records and must be run 1496 * on all nodes. 1497 * 1498 * Used when running: 1499 * metaset -s set_name -d -h hostname 1500 */ 1501 /*ARGSUSED*/ 1502 void 1503 mdmn_do_meta_md_delside(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 1504 { 1505 md_mn_msg_meta_md_delside_t *d; 1506 mdnm_params_t nm; 1507 mdsetname_t *sp; 1508 md_error_t ep = mdnullerror; 1509 int i; 1510 1511 resp->mmr_out_size = 0; 1512 resp->mmr_err_size = 0; 1513 resp->mmr_out = NULL; 1514 resp->mmr_err = NULL; 1515 resp->mmr_comm_state = MDMNE_ACK; 1516 d = (md_mn_msg_meta_md_delside_t *)((void *)(msg->msg_event_data)); 1517 1518 if ((sp = metasetnosetname(msg->msg_setno, &ep)) == NULL) { 1519 (void) mdstealerror(&(resp->mmr_ep), &ep); 1520 resp->mmr_exitval = -1; 1521 return; 1522 } 1523 1524 (void) memset(&nm, 0, sizeof (nm)); 1525 nm.key = MD_KEYWILD; 1526 /*CONSTCOND*/ 1527 while (1) { 1528 nm.mde = mdnullerror; 1529 nm.setno = msg->msg_setno; 1530 nm.side = MD_SIDEWILD; 1531 if (metaioctl(MD_IOCNXTKEY_NM, &nm, &nm.mde, NULL) != 0) { 1532 (void) mdstealerror(&(resp->mmr_ep), &nm.mde); 1533 resp->mmr_exitval = -1; 1534 return; 1535 } 1536 1537 /* Normal exit path is to eventually get a KEYWILD */ 1538 if (nm.key == MD_KEYWILD) { 1539 resp->mmr_exitval = 0; 1540 return; 1541 } 1542 1543 /* 1544 * The device reference count can be greater than 1 if 1545 * more than one softpart is configured on top of the 1546 * same device. If this is the case then we want to 1547 * decrement the count to zero so the entry can be 1548 * actually removed. 1549 */ 1550 for (i = 0; i < nm.ref_count; i++) { 1551 if (del_name(sp, d->msg_sideno, nm.key, &ep) == -1) { 1552 (void) mdstealerror(&(resp->mmr_ep), &ep); 1553 resp->mmr_exitval = -1; 1554 return; 1555 } 1556 } 1557 } 1558 1559 /*NOTREACHED*/ 1560 } 1561 1562 /* 1563 * Handler for MD_MN_MSG_MDDB_OPTRECERR which is used to notify 1564 * the master node that a node has seen an error when attempting to 1565 * write to the optimized resync records that reside on 2 of the diskset 1566 * mddbs. Master node will mark the failed replica in error and this 1567 * will send a parse message to all nodes to re-read parts of the mddb 1568 * and to fix their optimized resync records based on this information. 1569 */ 1570 /*ARGSUSED*/ 1571 void 1572 mdmn_do_mddb_optrecerr(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 1573 { 1574 md_mn_msg_mddb_optrecerr_t *d; 1575 mddb_optrec_parm_t mop; 1576 int ret; 1577 int i; 1578 1579 resp->mmr_out_size = 0; 1580 resp->mmr_err_size = 0; 1581 resp->mmr_out = NULL; 1582 resp->mmr_err = NULL; 1583 resp->mmr_comm_state = MDMNE_ACK; 1584 d = (md_mn_msg_mddb_optrecerr_t *)((void *)(msg->msg_event_data)); 1585 1586 (void) memset(&mop, 0, sizeof (mop)); 1587 mop.c_setno = msg->msg_setno; 1588 for (i = 0; i < 2; i++) { 1589 mop.c_recerr[i] = d->msg_recerr[i]; 1590 } 1591 ret = metaioctl(MD_MN_MDDB_OPTRECFIX, &mop, &mop.c_mde, NULL); 1592 if (ret) 1593 (void) mdstealerror(&(resp->mmr_ep), &mop.c_mde); 1594 1595 resp->mmr_exitval = ret; 1596 } 1597 1598 int 1599 mdmn_smgen_test6(md_mn_msg_t *msg, md_mn_msg_t **msglist) 1600 { 1601 md_mn_msg_t *nmsg; 1602 1603 nmsg = Zalloc(sizeof (md_mn_msg_t)); 1604 MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid)); 1605 1606 nmsg->msg_flags = MD_MSGF_NO_LOG; /* Don't log submessages */ 1607 nmsg->msg_setno = msg->msg_setno; 1608 nmsg->msg_type = MD_MN_MSG_TEST2; 1609 nmsg->msg_event_size = sizeof ("test2"); 1610 nmsg->msg_event_data = Strdup("test2"); 1611 msglist[0] = nmsg; 1612 1613 nmsg = Zalloc(sizeof (md_mn_msg_t)); 1614 MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid)); 1615 1616 nmsg->msg_flags = MD_MSGF_NO_LOG; /* Don't log submessages */ 1617 nmsg->msg_setno = msg->msg_setno; 1618 nmsg->msg_type = MD_MN_MSG_TEST2; 1619 nmsg->msg_event_size = sizeof ("test2"); 1620 nmsg->msg_event_data = Strdup("test2"); 1621 msglist[1] = nmsg; 1622 1623 nmsg = Zalloc(sizeof (md_mn_msg_t)); 1624 MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid)); 1625 1626 nmsg->msg_flags = MD_MSGF_NO_LOG; /* Don't log submessages */ 1627 nmsg->msg_setno = msg->msg_setno; 1628 nmsg->msg_type = MD_MN_MSG_TEST3; 1629 nmsg->msg_event_size = sizeof ("test3"); 1630 nmsg->msg_event_data = Strdup("test3"); 1631 msglist[2] = nmsg; 1632 1633 nmsg = Zalloc(sizeof (md_mn_msg_t)); 1634 MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid)); 1635 1636 nmsg->msg_flags = MD_MSGF_NO_LOG; /* Don't log submessages */ 1637 nmsg->msg_setno = msg->msg_setno; 1638 nmsg->msg_type = MD_MN_MSG_TEST4; 1639 nmsg->msg_event_size = sizeof ("test4"); 1640 nmsg->msg_event_data = Strdup("test4"); 1641 msglist[3] = nmsg; 1642 1643 return (4); /* Return the number of submessages generated */ 1644 } 1645 1646 /* 1647 * This is to send an MD_IOCSET ioctl to all nodes to create a soft 1648 * partition. 1649 */ 1650 /*ARGSUSED*/ 1651 void 1652 mdmn_do_iocset(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 1653 { 1654 md_mn_msg_iocset_t *d; 1655 int ret; 1656 set_t setno; 1657 mdsetname_t *sp; 1658 mdname_t *np; 1659 md_error_t mde = mdnullerror; 1660 1661 resp->mmr_comm_state = MDMNE_ACK; /* Ok state */; 1662 resp->mmr_out_size = 0; 1663 resp->mmr_err_size = 0; 1664 resp->mmr_out = NULL; 1665 resp->mmr_err = NULL; 1666 d = (md_mn_msg_iocset_t *)(void *)msg->msg_event_data; 1667 1668 setno = MD_MIN2SET(d->iocset_params.mnum); 1669 if ((sp = metasetnosetname(setno, &mde)) == NULL) { 1670 syslog(LOG_ERR, dgettext(TEXT_DOMAIN, 1671 "MD_MN_MSG_IOCSET: Invalid setno %d\n"), setno); 1672 resp->mmr_exitval = 1; 1673 return; 1674 } 1675 1676 /* 1677 * Device should be in the namespace already 1678 */ 1679 if ((np = metamnumname(&sp, d->iocset_params.mnum, 1, &mde)) == NULL) { 1680 syslog(LOG_ERR, dgettext(TEXT_DOMAIN, 1681 "MD_MN_MSG_IOCSET: Invalid mnum %d\n"), 1682 d->iocset_params.mnum); 1683 resp->mmr_exitval = 1; 1684 return; 1685 } 1686 1687 /* 1688 * Create unit structure 1689 */ 1690 d->iocset_params.mdp = (uintptr_t)&d->unit; /* set pointer to unit */ 1691 ret = metaioctl(MD_IOCSET, &(d->iocset_params), &mde, np->cname); 1692 resp->mmr_exitval = ret; 1693 } 1694 1695 /* 1696 * This is to update the status of a softpart 1697 */ 1698 /*ARGSUSED*/ 1699 void 1700 mdmn_do_sp_setstat(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 1701 { 1702 md_mn_msg_sp_setstat_t *d; 1703 int ret; 1704 set_t setno; 1705 mdsetname_t *sp; 1706 minor_t mnum; 1707 md_error_t mde = mdnullerror; 1708 1709 resp->mmr_comm_state = MDMNE_ACK; /* Ok state */; 1710 resp->mmr_out_size = 0; 1711 resp->mmr_err_size = 0; 1712 resp->mmr_out = NULL; 1713 resp->mmr_err = NULL; 1714 d = (md_mn_msg_sp_setstat_t *)(void *)msg->msg_event_data; 1715 1716 mnum = d->sp_setstat_mnum; 1717 setno = MD_MIN2SET(mnum); 1718 if ((sp = metasetnosetname(setno, &mde)) == NULL) { 1719 syslog(LOG_ERR, dgettext(TEXT_DOMAIN, 1720 "MD_MN_MSG_IOCSET: Invalid setno %d\n"), setno); 1721 resp->mmr_exitval = 1; 1722 return; 1723 } 1724 1725 ret = meta_sp_setstatus(sp, &mnum, 1, d->sp_setstat_status, &mde); 1726 resp->mmr_exitval = ret; 1727 } 1728 1729 /* 1730 * This is to add a key to the namespace 1731 */ 1732 /*ARGSUSED*/ 1733 void 1734 mdmn_do_addkeyname(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 1735 { 1736 md_mn_msg_addkeyname_t *d; 1737 int ret; 1738 set_t setno; 1739 mdsetname_t *sp; 1740 md_error_t mde = mdnullerror; 1741 mdname_t *compnp; 1742 1743 resp->mmr_comm_state = MDMNE_ACK; /* Ok state */; 1744 resp->mmr_out_size = 0; 1745 resp->mmr_err_size = 0; 1746 resp->mmr_out = NULL; 1747 resp->mmr_err = NULL; 1748 d = (md_mn_msg_addkeyname_t *)(void *)msg->msg_event_data; 1749 1750 setno = d->addkeyname_setno; 1751 if ((sp = metasetnosetname(setno, &mde)) == NULL) { 1752 syslog(LOG_ERR, dgettext(TEXT_DOMAIN, 1753 "MD_MN_ADDKEYNAME: Invalid setno %d\n"), setno); 1754 resp->mmr_exitval = -1; 1755 return; 1756 } 1757 1758 compnp = metaname(&sp, d->addkeyname_name, UNKNOWN, &mde); 1759 if (compnp != NULL) { 1760 ret = add_key_name(sp, compnp, NULL, &mde); 1761 if (ret < 0) 1762 resp->mmr_exitval = -1; 1763 else 1764 resp->mmr_exitval = compnp->key; 1765 } else { 1766 resp->mmr_exitval = -1; 1767 } 1768 } 1769 1770 /* 1771 * This is to delete a key from the namespace 1772 */ 1773 /*ARGSUSED*/ 1774 void 1775 mdmn_do_delkeyname(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 1776 { 1777 md_mn_msg_delkeyname_t *d; 1778 int ret; 1779 set_t setno; 1780 mdsetname_t *sp; 1781 md_error_t mde = mdnullerror; 1782 mdname_t *compnp; 1783 1784 resp->mmr_comm_state = MDMNE_ACK; /* Ok state */; 1785 resp->mmr_out_size = 0; 1786 resp->mmr_err_size = 0; 1787 resp->mmr_out = NULL; 1788 resp->mmr_err = NULL; 1789 d = (md_mn_msg_delkeyname_t *)(void *)msg->msg_event_data; 1790 1791 setno = d->delkeyname_setno; 1792 if ((sp = metasetnosetname(setno, &mde)) == NULL) { 1793 syslog(LOG_ERR, dgettext(TEXT_DOMAIN, 1794 "MD_MN_DELKEYNAME: Invalid setno %d\n"), setno); 1795 resp->mmr_exitval = -1; 1796 return; 1797 } 1798 1799 compnp = metadevname(&sp, d->delkeyname_dev, &mde); 1800 if (compnp != NULL) { 1801 /* 1802 * Reset the key value for the name. This is required because 1803 * any previous call of del_key_name for the same component 1804 * will have resulted in the key value being reset to MD_KEYBAD 1805 * even though there may still be references to this component. 1806 */ 1807 compnp->key = d->delkeyname_key; 1808 ret = del_key_name(sp, compnp, &mde); 1809 resp->mmr_exitval = ret; 1810 } else { 1811 resp->mmr_exitval = -1; 1812 } 1813 } 1814 1815 /* 1816 * This is to get the value of tstate from the master node. We use this 1817 * to get the ABR state of a metadevice from the master. 1818 */ 1819 /*ARGSUSED*/ 1820 void 1821 mdmn_do_get_tstate(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 1822 { 1823 md_mn_msg_gettstate_t *d; 1824 int ret; 1825 uint_t tstate; 1826 md_error_t mde = mdnullerror; 1827 1828 resp->mmr_comm_state = MDMNE_ACK; /* Ok state */; 1829 resp->mmr_out_size = 0; 1830 resp->mmr_err_size = 0; 1831 resp->mmr_out = NULL; 1832 resp->mmr_err = NULL; 1833 d = (md_mn_msg_gettstate_t *)(void *)msg->msg_event_data; 1834 1835 ret = meta_get_tstate(d->gettstate_dev, &tstate, &mde); 1836 if (ret != 0) { 1837 syslog(LOG_ERR, dgettext(TEXT_DOMAIN, 1838 "MD_MN_GET_TSTATE: Invalid dev %llx\n"), d->gettstate_dev); 1839 tstate = 0; 1840 } 1841 resp->mmr_exitval = tstate; 1842 } 1843 1844 /* 1845 * This is to get the mirror ABR state and the state of its submirrors from 1846 * the master node. We need this to ensure consistent output from metastat 1847 * when a new node joins the cluster during a resync. Without this the 1848 * submirror status will be incorrect until the whole resync is complete which 1849 * may take days for very large metadevices. 1850 */ 1851 /*ARGSUSED*/ 1852 void 1853 mdmn_do_get_mirstate(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 1854 { 1855 md_mn_msg_mir_state_t *d; 1856 md_mn_msg_mir_state_res_t *res; /* Results */ 1857 set_t setno; 1858 mdsetname_t *sp; /* Set name */ 1859 mdname_t *mirnp; /* Mirror name */ 1860 md_error_t mde = mdnullerror; 1861 mm_unit_t *mm; /* Mirror */ 1862 int smi; 1863 uint_t tstate; 1864 1865 resp->mmr_comm_state = MDMNE_ACK; 1866 resp->mmr_out_size = sizeof (md_mn_msg_mir_state_res_t); 1867 resp->mmr_err_size = 0; 1868 resp->mmr_out = Malloc(resp->mmr_out_size); 1869 resp->mmr_err = NULL; 1870 d = (md_mn_msg_mir_state_t *)(void *)msg->msg_event_data; 1871 res = (md_mn_msg_mir_state_res_t *)(void *)resp->mmr_out; 1872 1873 /* Validate set information from minor number */ 1874 setno = MD_MIN2SET(d->mir_state_mnum); 1875 sp = metasetnosetname(setno, &mde); 1876 if (sp == NULL) { 1877 syslog(LOG_ERR, dgettext(TEXT_DOMAIN, 1878 "MD_MN_GET_MIRROR_STATE: Invalid set %d\n"), setno); 1879 resp->mmr_exitval = 1; /* Failure */ 1880 Free(resp->mmr_out); 1881 resp->mmr_out_size = 0; 1882 return; 1883 } 1884 1885 /* Construct mirror name from minor number */ 1886 mirnp = metamnumname(&sp, d->mir_state_mnum, 0, &mde); 1887 if (mirnp == NULL) { 1888 syslog(LOG_ERR, dgettext(TEXT_DOMAIN, 1889 "MD_MN_GET_MIRROR_STATE: Invalid minor %lx\n"), 1890 d->mir_state_mnum); 1891 resp->mmr_exitval = 2; /* Failure */ 1892 Free(resp->mmr_out); 1893 resp->mmr_out_size = 0; 1894 return; 1895 } 1896 1897 /* Get common mirror structure */ 1898 mm = (mm_unit_t *)meta_get_mdunit(sp, mirnp, &mde); 1899 if (mm == NULL) { 1900 syslog(LOG_ERR, dgettext(TEXT_DOMAIN, 1901 "MD_MN_GET_MIRROR_STATE: Invalid mirror minor %x\n"), 1902 d->mir_state_mnum); 1903 resp->mmr_exitval = 3; /* Failure */ 1904 Free(resp->mmr_out); 1905 resp->mmr_out_size = 0; 1906 return; 1907 } 1908 1909 if (meta_get_tstate(d->mir_state_mnum, &tstate, &mde) != 0) { 1910 syslog(LOG_ERR, dgettext(TEXT_DOMAIN, 1911 "MD_MN_GET_MIRROR_STATE: Invalid minor %lx\n"), 1912 d->mir_state_mnum); 1913 resp->mmr_exitval = 4; /* Failure */ 1914 Free(resp->mmr_out); 1915 resp->mmr_out_size = 0; 1916 return; 1917 } 1918 /* 1919 * Fill in the sm_state/sm_flags value in the results structure which 1920 * gets passed back to the message originator 1921 */ 1922 resp->mmr_exitval = 0; 1923 for (smi = 0; (smi < NMIRROR); smi++) { 1924 mm_submirror_t *mmsp = &mm->un_sm[smi]; 1925 res->sm_state[smi] = mmsp->sm_state; 1926 res->sm_flags[smi] = mmsp->sm_flags; 1927 } 1928 /* Returm value of tstate for mirror */ 1929 res->mir_tstate = tstate; 1930 } 1931 1932 /* 1933 * This is to issue an ioctl to call poke_hotspares 1934 */ 1935 /*ARGSUSED*/ 1936 void 1937 mdmn_do_poke_hotspares(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 1938 { 1939 1940 md_mn_poke_hotspares_t pokehsp; 1941 md_mn_msg_pokehsp_t *d; 1942 1943 resp->mmr_out_size = 0; 1944 resp->mmr_err_size = 0; 1945 resp->mmr_out = NULL; 1946 resp->mmr_err = NULL; 1947 resp->mmr_comm_state = MDMNE_ACK; 1948 d = (md_mn_msg_pokehsp_t *)(void *)msg->msg_event_data; 1949 1950 (void) memset(&pokehsp, 0, sizeof (pokehsp)); 1951 MD_SETDRIVERNAME(&pokehsp, MD_MIRROR, d->pokehsp_setno); 1952 1953 resp->mmr_exitval = metaioctl(MD_MN_POKE_HOTSPARES, &pokehsp, 1954 &pokehsp.mde, NULL); 1955 } 1956 1957 /* 1958 * Called to create a softpart during a metarecover operation 1959 */ 1960 /*ARGSUSED*/ 1961 void 1962 mdmn_do_addmdname(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 1963 { 1964 md_mn_msg_addmdname_t *d; 1965 md_error_t mde = mdnullerror; 1966 mdsetname_t *sp; 1967 int init = 0; 1968 mdkey_t key; 1969 minor_t mnum; 1970 1971 resp->mmr_comm_state = MDMNE_ACK; /* Ok state */; 1972 resp->mmr_out_size = 0; 1973 resp->mmr_err_size = 0; 1974 resp->mmr_out = NULL; 1975 resp->mmr_err = NULL; 1976 d = (md_mn_msg_addmdname_t *)(void *)msg->msg_event_data; 1977 1978 if ((sp = metasetnosetname(d->addmdname_setno, &mde)) == NULL) { 1979 syslog(LOG_ERR, dgettext(TEXT_DOMAIN, 1980 "MD_MN_MSG_ADDMDNAME: Invalid setno %d\n"), 1981 d->addmdname_setno); 1982 resp->mmr_exitval = 1; 1983 return; 1984 } 1985 1986 /* 1987 * If device node does not exist then init it 1988 */ 1989 if (!is_existing_meta_hsp(sp, d->addmdname_name)) { 1990 if ((key = meta_init_make_device(&sp, d->addmdname_name, 1991 &mde)) <= 0) { 1992 syslog(LOG_ERR, dgettext(TEXT_DOMAIN, 1993 "MD_MN_MSG_ADDMDNAME: Invalid name %s\n"), 1994 d->addmdname_name); 1995 resp->mmr_exitval = 1; 1996 return; 1997 } 1998 1999 init = 1; 2000 } 2001 2002 /* 2003 * We should have it 2004 */ 2005 if (metaname(&sp, d->addmdname_name, META_DEVICE, &mde) == NULL) { 2006 2007 if (init) { 2008 if (meta_getnmentbykey(sp->setno, MD_SIDEWILD, 2009 key, NULL, &mnum, NULL, &mde) != NULL) { 2010 (void) metaioctl(MD_IOCREM_DEV, &mnum, 2011 &mde, NULL); 2012 } 2013 (void) del_self_name(sp, key, &mde); 2014 } 2015 2016 resp->mmr_exitval = 1; 2017 return; 2018 } 2019 2020 resp->mmr_exitval = 0; 2021 } 2022