1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <stdlib.h> 30 #include <unistd.h> 31 #include <wait.h> 32 #include <sys/time.h> 33 #include <syslog.h> 34 35 #include <meta.h> 36 #include <sys/lvm/mdio.h> 37 #include <sys/lvm/md_mddb.h> 38 #include <sys/lvm/md_mirror.h> 39 40 #define MAX_N_ARGS 64 41 #define MAX_ARG_LEN 1024 42 43 /* we reserve 1024 bytes for stdout and the same for stderr */ 44 #define MAX_OUT 1024 45 #define MAX_ERR 1024 46 #define JUNK 128 /* used to flush stdout and stderr */ 47 48 49 /*ARGSUSED*/ 50 void 51 mdmn_do_cmd(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 52 { 53 54 /* 55 * We are given one string containing all the arguments 56 * For execvp() we have to regenerate the arguments again 57 */ 58 int arg; /* argument that is currently been built */ 59 int index; /* runs through arg above */ 60 int i; /* helper for for loop */ 61 char *argv[MAX_N_ARGS]; /* argument array for execvp */ 62 char *cp; /* runs through the given command line string */ 63 char *command = NULL; /* the command we call locally */ 64 int pout[2]; /* pipe for stdout */ 65 int perr[2]; /* pipe for stderr */ 66 pid_t pid; /* process id */ 67 68 cp = msg->msg_event_data; 69 arg = 0; 70 index = 0; 71 72 /* init the args array alloc the first one and null out the rest */ 73 argv[0] = Malloc(MAX_ARG_LEN); 74 for (i = 1; i < MAX_N_ARGS; i++) { 75 argv[i] = NULL; 76 } 77 78 resp->mmr_comm_state = MDMNE_ACK; /* Ok state */; 79 80 while (*cp != '\0') { 81 if (arg == MAX_N_ARGS) { 82 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 83 "PANIC: too many arguments specified\n")); 84 resp->mmr_comm_state = MDMNE_HANDLER_FAILED; 85 goto out; 86 } 87 if (index == MAX_ARG_LEN) { 88 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 89 "PANIC: argument too long\n")); 90 resp->mmr_comm_state = MDMNE_HANDLER_FAILED; 91 goto out; 92 } 93 94 if ((*cp != ' ') && (*cp != '\t')) { 95 /* 96 * No space or tab: copy char into current 97 * argv and advance both pointers 98 */ 99 100 argv[arg][index] = *cp; 101 cp++; /* next char in command line */ 102 index++; /* next char in argument */ 103 } else { 104 /* 105 * space or tab: terminate current argv, 106 * advance arg, reset pointer into arg, 107 * advance pointer in command line 108 */ 109 argv[arg][index] = '\0'; 110 arg++; /* next argument */ 111 argv[arg] = Malloc(MAX_ARG_LEN); 112 cp++; /* next char in command line */ 113 index = 0; /* starts at char 0 */ 114 } 115 } 116 /* terminate the last real argument */ 117 argv[arg][index] = '\0'; 118 /* the last argument is an NULL pointer */ 119 argv[++arg] = NULL; 120 if (pipe(pout) < 0) { 121 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 122 "PANIC: pipe failed\n")); 123 resp->mmr_comm_state = MDMNE_HANDLER_FAILED; 124 goto out; 125 } 126 if (pipe(perr) < 0) { 127 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 128 "PANIC: pipe failed\n")); 129 (void) close(pout[0]); 130 (void) close(pout[1]); 131 resp->mmr_comm_state = MDMNE_HANDLER_FAILED; 132 goto out; 133 } 134 command = Strdup(argv[0]); 135 (void) strcat(argv[0], ".rpc_call"); 136 pid = fork1(); 137 if (pid == (pid_t)-1) { 138 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 139 "PANIC: fork failed\n")); 140 resp->mmr_comm_state = MDMNE_HANDLER_FAILED; 141 (void) close(pout[0]); 142 (void) close(pout[1]); 143 (void) close(perr[0]); 144 (void) close(perr[1]); 145 goto out; 146 } else if (pid == (pid_t)0) { 147 /* child */ 148 (void) close(0); 149 /* close the reading channels of pout and perr */ 150 (void) close(pout[0]); 151 (void) close(perr[0]); 152 /* redirect stdout */ 153 if (dup2(pout[1], 1) < 0) { 154 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 155 "PANIC: dup2 failed\n")); 156 resp->mmr_comm_state = MDMNE_HANDLER_FAILED; 157 return; 158 } 159 160 /* redirect stderr */ 161 if (dup2(perr[1], 2) < 0) { 162 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, 163 "PANIC: dup2 failed\n")); 164 resp->mmr_comm_state = MDMNE_HANDLER_FAILED; 165 return; 166 } 167 168 (void) execvp(command, (char *const *)argv); 169 perror("execvp"); 170 _exit(1); 171 } else { 172 /* parent process */ 173 int stat_loc; 174 char *out, *err; /* for stdout and stderr of child */ 175 int i; /* index into the aboves */ 176 char junk[JUNK]; 177 int out_done = 0; 178 int err_done = 0; 179 int out_read = 0; 180 int err_read = 0; 181 int maxfd; 182 fd_set rset; 183 184 185 /* close the writing channels of pout and perr */ 186 (void) close(pout[1]); 187 (void) close(perr[1]); 188 resp->mmr_out = Malloc(MAX_OUT); 189 resp->mmr_err = Malloc(MAX_ERR); 190 resp->mmr_out_size = MAX_OUT; 191 resp->mmr_err_size = MAX_ERR; 192 out = resp->mmr_out; 193 err = resp->mmr_err; 194 FD_ZERO(&rset); 195 while ((out_done == 0) || (err_done == 0)) { 196 FD_SET(pout[0], &rset); 197 FD_SET(perr[0], &rset); 198 maxfd = max(pout[0], perr[0]) + 1; 199 (void) select(maxfd, &rset, NULL, NULL, NULL); 200 201 /* 202 * Did the child produce some output to stdout? 203 * If so, read it until we either reach the end of the 204 * output or until we read MAX_OUT bytes. 205 * Whatever comes first. 206 * In case we already read MAX_OUT bytes we simply 207 * read away the output into a junk buffer. 208 * Just to make the child happy 209 */ 210 if (FD_ISSET(pout[0], &rset)) { 211 if (MAX_OUT - out_read - 1 > 0) { 212 i = read(pout[0], out, 213 MAX_OUT - out_read); 214 out_read += i; 215 out += i; 216 } else { 217 /* buffer full, empty stdout */ 218 i = read(pout[0], junk, JUNK); 219 } 220 if (i == 0) { 221 /* stdout is closed by child */ 222 out_done++; 223 } 224 } 225 /* same comment as above | sed -e 's/stdout/stderr/' */ 226 if (FD_ISSET(perr[0], &rset)) { 227 if (MAX_ERR - err_read - 1 > 0) { 228 i = read(perr[0], err, 229 MAX_ERR - err_read); 230 err_read += i; 231 err += i; 232 } else { 233 /* buffer full, empty stderr */ 234 i = read(perr[0], junk, JUNK); 235 } 236 if (i == 0) { 237 /* stderr is closed by child */ 238 err_done++; 239 } 240 } 241 } 242 resp->mmr_out[out_read] = '\0'; 243 resp->mmr_err[err_read] = '\0'; 244 245 while (waitpid(pid, &stat_loc, 0) < 0) { 246 if (errno != EINTR) { 247 resp->mmr_comm_state = MDMNE_HANDLER_FAILED; 248 break; 249 } 250 } 251 if (errno == 0) 252 resp->mmr_exitval = WEXITSTATUS(stat_loc); 253 254 (void) close(pout[0]); 255 (void) close(perr[0]); 256 } 257 out: 258 for (i = 0; i < MAX_N_ARGS; i++) { 259 if (argv[i] != NULL) { 260 free(argv[i]); 261 } 262 } 263 if (command != NULL) { 264 Free(command); 265 } 266 } 267 268 /* 269 * This is for checking if a metadevice is opened, and for 270 * locking in case it is not and for 271 * unlocking a locked device 272 */ 273 /*ARGSUSED*/ 274 void 275 mdmn_do_clu(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 276 { 277 if (msg->msg_type == MD_MN_MSG_CLU_CHECK) { 278 md_isopen_t *d; 279 int ret; 280 281 resp->mmr_comm_state = MDMNE_ACK; /* Ok state */; 282 resp->mmr_out_size = 0; 283 resp->mmr_err_size = 0; 284 resp->mmr_out = NULL; 285 resp->mmr_err = NULL; 286 d = (md_isopen_t *)(void *)msg->msg_event_data; 287 ret = metaioctl(MD_IOCISOPEN, d, &(d->mde), NULL); 288 /* 289 * In case the ioctl succeeded, return the open state of 290 * the metadevice. Otherwise we return the error the ioctl 291 * produced. As this is not zero, no attempt is made to 292 * remove/rename the metadevice later 293 */ 294 295 if (ret == 0) { 296 resp->mmr_exitval = d->isopen; 297 } else { 298 /* 299 * When doing a metaclear, one node after the other 300 * does the two steps: 301 * - check on all nodes if this md is opened. 302 * - remove the md locally. 303 * When the 2nd node asks all nodes if the md is 304 * open it starts with the first node. 305 * As this already removed the md, the check 306 * returns MDE_UNIT_NOT_SETUP. 307 * In order to not keep the 2nd node from proceeding, 308 * we map this to an Ok. 309 */ 310 if (mdismderror(&(d->mde), MDE_UNIT_NOT_SETUP)) { 311 mdclrerror(&(d->mde)); 312 ret = 0; 313 } 314 315 resp->mmr_exitval = ret; 316 } 317 } 318 } 319 320 /* handler for MD_MN_MSG_REQUIRE_OWNER */ 321 /*ARGSUSED*/ 322 void 323 mdmn_do_req_owner(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 324 { 325 md_set_mmown_params_t setown; 326 md_mn_req_owner_t *d; 327 int ret, n = 0; 328 329 resp->mmr_out_size = 0; 330 resp->mmr_err_size = 0; 331 resp->mmr_out = NULL; 332 resp->mmr_err = NULL; 333 resp->mmr_comm_state = MDMNE_ACK; 334 d = (md_mn_req_owner_t *)(void *)msg->msg_event_data; 335 336 (void) memset(&setown, 0, sizeof (setown)); 337 MD_SETDRIVERNAME(&setown, MD_MIRROR, MD_MIN2SET(d->mnum)) 338 setown.d.mnum = d->mnum; 339 setown.d.owner = d->owner; 340 341 /* Retry ownership change if we get EAGAIN returned */ 342 while ((ret = metaioctl(MD_MN_SET_MM_OWNER, &setown, &setown.mde, NULL)) 343 != 0) { 344 md_sys_error_t *ip = 345 &setown.mde.info.md_error_info_t_u.sys_error; 346 if (ip->errnum != EAGAIN) { 347 break; 348 } 349 if (n++ >= 10) { 350 break; 351 } 352 (void) sleep(1); 353 } 354 355 resp->mmr_exitval = ret; 356 } 357 358 /* 359 * handler for MD_MN_MSG_CHOOSE_OWNER 360 * This is called when a mirror resync has no owner. The master node generates 361 * this message which is not broadcast to the other nodes. The message is 362 * required as the kernel does not have access to the nodelist for the set. 363 */ 364 /*ARGSUSED*/ 365 void 366 mdmn_do_choose_owner(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 367 { 368 md_mn_msg_chowner_t chownermsg; 369 md_mn_msg_chooseid_t *d; 370 int ret = 0; 371 int nodecnt; 372 int nodeno; 373 uint_t nodeid; 374 uint_t myflags; 375 set_t setno; 376 mdsetname_t *sp; 377 md_set_desc *sd; 378 md_mnnode_desc *nd; 379 md_error_t mde = mdnullerror; 380 md_mn_result_t *resp1 = NULL; 381 382 resp->mmr_out_size = 0; 383 resp->mmr_err_size = 0; 384 resp->mmr_out = NULL; 385 resp->mmr_err = NULL; 386 resp->mmr_comm_state = MDMNE_ACK; 387 d = (md_mn_msg_chooseid_t *)(void *)msg->msg_event_data; 388 389 /* 390 * The node to be chosen will be the resync count for the set 391 * modulo the number of live nodes in the set 392 */ 393 setno = MD_MIN2SET(d->msg_chooseid_mnum); 394 if ((sp = metasetnosetname(setno, &mde)) == NULL) { 395 syslog(LOG_ERR, dgettext(TEXT_DOMAIN, 396 "MD_MN_MSG_CHOOSE_OWNER: Invalid setno %d\n"), setno); 397 resp->mmr_exitval = 1; 398 return; 399 } 400 if ((sd = metaget_setdesc(sp, &mde)) == NULL) { 401 syslog(LOG_ERR, dgettext(TEXT_DOMAIN, 402 "MD_MN_MSG_CHOOSE_OWNER: Invalid set pointer\n")); 403 resp->mmr_exitval = 1; 404 return; 405 } 406 407 /* Count the number of live nodes */ 408 nodecnt = 0; 409 nd = sd->sd_nodelist; 410 while (nd) { 411 if (nd->nd_flags & MD_MN_NODE_ALIVE) 412 nodecnt++; 413 nd = nd->nd_next; 414 } 415 nodeno = (d->msg_chooseid_rcnt%nodecnt); 416 417 /* 418 * If we've been called with msg_chooseid_set_node set TRUE then we 419 * are simply re-setting the owner id to ensure consistency across 420 * the cluster. 421 * If the flag is reset (B_FALSE) we are requesting a new owner to be 422 * determined. 423 */ 424 if (d->msg_chooseid_set_node) { 425 nodeid = d->msg_chooseid_rcnt; 426 } else { 427 /* scan the nodelist looking for the required node */ 428 nodecnt = 0; 429 nd = sd->sd_nodelist; 430 while (nd) { 431 if (nd->nd_flags & MD_MN_NODE_ALIVE) { 432 if (nodecnt == nodeno) 433 break; 434 nodecnt++; 435 } 436 nd = nd->nd_next; 437 } 438 nodeid = nd->nd_nodeid; 439 } 440 441 /* Send message to all nodes to make ownership change */ 442 chownermsg.msg_chowner_mnum = d->msg_chooseid_mnum; 443 chownermsg.msg_chowner_nodeid = nodeid; 444 myflags = MD_MSGF_NO_LOG; 445 446 /* inherit some flags from the parent message */ 447 myflags |= msg->msg_flags & MD_MSGF_INHERIT_BITS; 448 449 ret = mdmn_send_message(MD_MIN2SET(d->msg_chooseid_mnum), 450 MD_MN_MSG_CHANGE_OWNER, myflags, (char *)&chownermsg, 451 sizeof (chownermsg), &resp1, &mde); 452 if (resp1 != NULL) 453 free_result(resp1); 454 resp->mmr_exitval = ret; 455 } 456 457 /* 458 * Handler for MD_MN_MSG_CHANGE_OWNER 459 * This is called when we are perfoming a resync and wish to change from 460 * no mirror owner to an owner chosen by the master. 461 * This mesage is only relevant for the new owner, the message will be 462 * ignored by all other nodes 463 */ 464 /*ARGSUSED*/ 465 void 466 mdmn_do_change_owner(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 467 { 468 md_set_mmown_params_t setown; 469 md_mn_msg_chowner_t *d; 470 int ret = 0; 471 set_t setno; 472 mdsetname_t *sp; 473 md_set_desc *sd; 474 md_error_t mde = mdnullerror; 475 476 resp->mmr_out_size = 0; 477 resp->mmr_err_size = 0; 478 resp->mmr_out = NULL; 479 resp->mmr_err = NULL; 480 resp->mmr_comm_state = MDMNE_ACK; 481 d = (md_mn_msg_chowner_t *)(void *)msg->msg_event_data; 482 483 setno = MD_MIN2SET(d->msg_chowner_mnum); 484 if ((sp = metasetnosetname(setno, &mde)) == NULL) { 485 syslog(LOG_ERR, dgettext(TEXT_DOMAIN, 486 "MD_MN_MSG_CHANGE_OWNER: Invalid setno %d\n"), setno); 487 resp->mmr_exitval = 1; 488 return; 489 } 490 if ((sd = metaget_setdesc(sp, &mde)) == NULL) { 491 syslog(LOG_ERR, dgettext(TEXT_DOMAIN, 492 "MD_MN_MSG_CHANGE_OWNER: Invalid set pointer\n")); 493 resp->mmr_exitval = 1; 494 return; 495 } 496 497 if (d->msg_chowner_nodeid == sd->sd_mn_mynode->nd_nodeid) { 498 /* 499 * If we are the chosen owner, issue ioctl to make the 500 * ownership change 501 */ 502 (void) memset(&setown, 0, sizeof (md_set_mmown_params_t)); 503 setown.d.mnum = d->msg_chowner_mnum; 504 setown.d.owner = d->msg_chowner_nodeid; 505 setown.d.flags = MD_MN_MM_SPAWN_THREAD; 506 MD_SETDRIVERNAME(&setown, MD_MIRROR, 507 MD_MIN2SET(d->msg_chowner_mnum)); 508 509 /* 510 * Single shot at changing the the owner, if it fails EAGAIN, 511 * another node must have become the owner while we are in the 512 * process of making this choice. 513 */ 514 515 ret = metaioctl(MD_MN_SET_MM_OWNER, &setown, 516 &(setown.mde), NULL); 517 if (ret == EAGAIN) 518 ret = 0; 519 } 520 resp->mmr_exitval = ret; 521 } 522 523 /* handler for MD_MN_MSG_SUSPEND_WRITES */ 524 /*ARGSUSED*/ 525 void 526 mdmn_do_susp_write(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 527 { 528 /* Suspend writes to a region of a mirror */ 529 md_suspend_wr_params_t suspwr_ioc; 530 md_mn_msg_suspwr_t *d; 531 int ret; 532 533 resp->mmr_out_size = 0; 534 resp->mmr_err_size = 0; 535 resp->mmr_out = NULL; 536 resp->mmr_err = NULL; 537 resp->mmr_comm_state = MDMNE_ACK; 538 d = (md_mn_msg_suspwr_t *)(void *)msg->msg_event_data; 539 540 (void) memset(&suspwr_ioc, 0, sizeof (md_suspend_wr_params_t)); 541 MD_SETDRIVERNAME(&suspwr_ioc, MD_MIRROR, 542 MD_MIN2SET(d->msg_suspwr_mnum)); 543 suspwr_ioc.mnum = d->msg_suspwr_mnum; 544 ret = metaioctl(MD_MN_SUSPEND_WRITES, &suspwr_ioc, 545 &(suspwr_ioc.mde), NULL); 546 resp->mmr_exitval = ret; 547 } 548 549 /* 550 * handler for MD_MN_MSG_STATE_UPDATE_RESWR 551 * This functions update a submirror component state and then resumes writes 552 * to the mirror 553 */ 554 /*ARGSUSED*/ 555 void 556 mdmn_do_state_upd_reswr(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 557 { 558 /* Update the state of the component of a mirror */ 559 md_set_state_params_t setstate_ioc; 560 md_mn_msg_stch_t *d; 561 int ret; 562 563 resp->mmr_out_size = 0; 564 resp->mmr_err_size = 0; 565 resp->mmr_out = NULL; 566 resp->mmr_err = NULL; 567 resp->mmr_comm_state = MDMNE_ACK; 568 d = (md_mn_msg_stch_t *)(void *)msg->msg_event_data; 569 570 (void) memset(&setstate_ioc, 0, sizeof (md_set_state_params_t)); 571 MD_SETDRIVERNAME(&setstate_ioc, MD_MIRROR, 572 MD_MIN2SET(d->msg_stch_mnum)); 573 setstate_ioc.mnum = d->msg_stch_mnum; 574 setstate_ioc.sm = d->msg_stch_sm; 575 setstate_ioc.comp = d->msg_stch_comp; 576 setstate_ioc.state = d->msg_stch_new_state; 577 setstate_ioc.hs_id = d->msg_stch_hs_id; 578 ret = metaioctl(MD_MN_SET_STATE, &setstate_ioc, 579 &(setstate_ioc.mde), NULL); 580 resp->mmr_exitval = ret; 581 } 582 583 /* 584 * submessage generator for MD_MN_MSG_STATE_UPDATE and MD_MN_MSG_STATE_UPDATE2 585 * This generates 2 messages, the first is SUSPEND_WRITES and 586 * depending on the type of the original message the second one is 587 * either STATE_UPDATE_RESWR or STATE_UPDATE_RESWR2 which actually does 588 * the same, but runs on a higher class. 589 */ 590 int 591 mdmn_smgen_state_upd(md_mn_msg_t *msg, md_mn_msg_t *msglist[]) 592 { 593 md_mn_msg_t *nmsg; 594 md_mn_msg_stch_t *d; 595 md_mn_msg_stch_t *stch_data; 596 md_mn_msg_suspwr_t *suspwr_data; 597 598 d = (md_mn_msg_stch_t *)(void *)msg->msg_event_data; 599 600 nmsg = Zalloc(sizeof (md_mn_msg_t)); 601 MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid)); 602 603 nmsg->msg_flags = MD_MSGF_NO_LOG; /* Don't log submessages */ 604 nmsg->msg_setno = msg->msg_setno; 605 nmsg->msg_type = MD_MN_MSG_SUSPEND_WRITES; 606 nmsg->msg_event_size = sizeof (md_mn_msg_suspwr_t); 607 nmsg->msg_event_data = Zalloc(sizeof (md_mn_msg_suspwr_t)); 608 suspwr_data = (md_mn_msg_suspwr_t *)(void *)nmsg->msg_event_data; 609 suspwr_data->msg_suspwr_mnum = d->msg_stch_mnum; 610 msglist[0] = nmsg; 611 612 nmsg = Zalloc(sizeof (md_mn_msg_t)); 613 MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid)); 614 615 nmsg->msg_flags = MD_MSGF_NO_LOG; /* Don't log submessages */ 616 nmsg->msg_setno = msg->msg_setno; 617 if (msg->msg_type == MD_MN_MSG_STATE_UPDATE2) { 618 nmsg->msg_type = MD_MN_MSG_STATE_UPDATE_RESWR2; 619 } else { 620 nmsg->msg_type = MD_MN_MSG_STATE_UPDATE_RESWR; 621 } 622 nmsg->msg_event_size = sizeof (md_mn_msg_stch_t); 623 nmsg->msg_event_data = Zalloc(sizeof (md_mn_msg_stch_t)); 624 stch_data = (md_mn_msg_stch_t *)(void *)nmsg->msg_event_data; 625 stch_data->msg_stch_mnum = d->msg_stch_mnum; 626 stch_data->msg_stch_sm = d->msg_stch_sm; 627 stch_data->msg_stch_comp = d->msg_stch_comp; 628 stch_data->msg_stch_new_state = d->msg_stch_new_state; 629 stch_data->msg_stch_hs_id = d->msg_stch_hs_id; 630 msglist[1] = nmsg; 631 return (2); /* Return the number of submessages generated */ 632 } 633 634 /* 635 * handler for MD_MN_MSG_ALLOCATE_HOTSPARE and MD_MN_MSG_ALLOCATE_HOTSPARE2 636 * This sends a message to all nodes requesting them to allocate a hotspare 637 * for the specified component. The component is specified by the mnum of 638 * the mirror, the submirror index and the component index. 639 */ 640 /*ARGSUSED*/ 641 void 642 mdmn_do_allocate_hotspare(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 643 { 644 /* Allocate a hotspare for a mirror component */ 645 md_alloc_hotsp_params_t allochsp_ioc; 646 md_mn_msg_allochsp_t *d; 647 int ret; 648 649 resp->mmr_out_size = 0; 650 resp->mmr_err_size = 0; 651 resp->mmr_out = NULL; 652 resp->mmr_err = NULL; 653 resp->mmr_comm_state = MDMNE_ACK; 654 d = (md_mn_msg_allochsp_t *)((void *)(msg->msg_event_data)); 655 656 (void) memset(&allochsp_ioc, 0, 657 sizeof (md_alloc_hotsp_params_t)); 658 MD_SETDRIVERNAME(&allochsp_ioc, MD_MIRROR, 659 MD_MIN2SET(d->msg_allochsp_mnum)); 660 allochsp_ioc.mnum = d->msg_allochsp_mnum; 661 allochsp_ioc.sm = d->msg_allochsp_sm; 662 allochsp_ioc.comp = d->msg_allochsp_comp; 663 allochsp_ioc.hs_id = d->msg_allochsp_hs_id; 664 ret = metaioctl(MD_MN_ALLOCATE_HOTSPARE, &allochsp_ioc, 665 &(allochsp_ioc.mde), NULL); 666 resp->mmr_exitval = ret; 667 } 668 669 /* 670 * handler for MD_MN_MSG_RESYNC_STARTING,MD_MN_MSG_RESYNC_FIRST, 671 * MD_MN_MSG_RESYNC_NEXT, MD_MN_MSG_RESYNC_FINISH, MD_MN_MSG_RESYNC_PHASE_DONE 672 */ 673 /*ARGSUSED*/ 674 void 675 mdmn_do_resync(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 676 { 677 md_mn_msg_resync_t *d; 678 md_mn_rs_params_t respar; 679 int ret; 680 int smi; 681 682 resp->mmr_out_size = 0; 683 resp->mmr_err_size = 0; 684 resp->mmr_out = NULL; 685 resp->mmr_err = NULL; 686 resp->mmr_comm_state = MDMNE_ACK; 687 d = (md_mn_msg_resync_t *)((void *)(msg->msg_event_data)); 688 689 (void) memset(&respar, 0, sizeof (respar)); 690 MD_SETDRIVERNAME(&respar, MD_MIRROR, 691 MD_MIN2SET(d->msg_resync_mnum)) 692 respar.msg_type = (int)msg->msg_type; 693 respar.mnum = d->msg_resync_mnum; 694 respar.rs_type = d->msg_resync_type; 695 respar.rs_start = d->msg_resync_start; 696 respar.rs_size = d->msg_resync_rsize; 697 respar.rs_done = d->msg_resync_done; 698 respar.rs_2_do = d->msg_resync_2_do; 699 respar.rs_originator = d->msg_originator; 700 respar.rs_flags = d->msg_resync_flags; 701 702 for (smi = 0; smi < NMIRROR; smi++) { 703 respar.rs_sm_state[smi] = d->msg_sm_state[smi]; 704 respar.rs_sm_flags[smi] = d->msg_sm_flags[smi]; 705 } 706 707 ret = metaioctl(MD_MN_RESYNC, &respar, &respar.mde, NULL); 708 709 resp->mmr_exitval = ret; 710 } 711 712 /* 713 * handler for MD_MN_MSG_SETSYNC 714 */ 715 /*ARGSUSED*/ 716 void 717 mdmn_do_setsync(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 718 { 719 md_mn_msg_setsync_t *d; 720 md_resync_ioctl_t ri; 721 int ret; 722 723 resp->mmr_out_size = 0; 724 resp->mmr_err_size = 0; 725 resp->mmr_out = NULL; 726 resp->mmr_err = NULL; 727 resp->mmr_comm_state = MDMNE_ACK; 728 d = (md_mn_msg_setsync_t *)((void *)(msg->msg_event_data)); 729 730 (void) memset(&ri, 0, sizeof (ri)); 731 MD_SETDRIVERNAME(&ri, MD_MIRROR, MD_MIN2SET(d->setsync_mnum)) 732 ri.ri_mnum = d->setsync_mnum; 733 ri.ri_copysize = d->setsync_copysize; 734 ri.ri_flags = d->setsync_flags; 735 736 ret = metaioctl(MD_MN_SETSYNC, &ri, &ri.mde, NULL); 737 738 resp->mmr_exitval = ret; 739 } 740 741 /* 742 * handler for MD_MN_MSG_SET_CAP. As this handler can deal with both mirrors 743 * and soft partitions, the driver name that is required for the ioctl call 744 * is included in the message. 745 */ 746 /*ARGSUSED*/ 747 void 748 mdmn_do_set_cap(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 749 { 750 md_mn_msg_setcap_t *d; 751 md_mn_setcap_params_t setcap_ioc; 752 minor_t mnum; 753 int ret; 754 755 resp->mmr_out_size = 0; 756 resp->mmr_err_size = 0; 757 resp->mmr_out = NULL; 758 resp->mmr_err = NULL; 759 resp->mmr_comm_state = MDMNE_ACK; 760 d = (md_mn_msg_setcap_t *)((void *)(msg->msg_event_data)); 761 mnum = d->msg_setcap_mnum; 762 763 (void) memset(&setcap_ioc, 0, sizeof (setcap_ioc)); 764 765 MD_SETDRIVERNAME(&setcap_ioc, d->msg_setcap_driver, MD_MIN2SET(mnum)); 766 setcap_ioc.mnum = mnum; 767 setcap_ioc.sc_set = d->msg_setcap_set; 768 769 ret = metaioctl(MD_MN_SET_CAP, &setcap_ioc, &setcap_ioc.mde, NULL); 770 771 resp->mmr_exitval = ret; 772 } 773 774 /* 775 * Dummy handler for various CLASS0 messages like 776 * MD_MN_MSG_VERBOSITY / MD_MN_MSG_RESUME / MD_MN_MSG_SUSPEND ... 777 */ 778 /*ARGSUSED*/ 779 void 780 mdmn_do_dummy(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 781 { 782 resp->mmr_out_size = 0; 783 resp->mmr_err_size = 0; 784 resp->mmr_out = NULL; 785 resp->mmr_err = NULL; 786 resp->mmr_exitval = 0; 787 resp->mmr_comm_state = MDMNE_ACK; 788 } 789 790 /* 791 * Overall description of mdcommd support that keeps all nodes in-sync 792 * with the ondisk diskset mddbs. 793 * 794 * All configuration changes to the mddb - addition/deletion of metadevices 795 * or replicas must use a CLASS1 message to block out these changes. 796 * Changes to the state of existing replicas do not need to block CLASS1 797 * since there is no conflict when just updating the state of a replica. 798 * 799 * Error encountered when master writes to mddbs: 800 * As the master updates parts of the mddbs, flags are updated describing 801 * what has been written. When all locks are dropped (either in 802 * mddb_setexit or mdioctl), a PARSE message will be generated to all 803 * nodes with an index list of known good mddbs and the parse flags. 804 * The master node ignore the parse message since it sent it. 805 * The slave nodes re-read in the changed part of the mddb using the list 806 * of known good replicas that was passed. 807 * PARSE message does not block CLASS1. 808 * The PARSE message must be the highest class message. Since this 809 * message could be sent on any ioctl, this PARSE message class must 810 * be higher than any other class message that could issue an ioctl. 811 * 812 * Master Slave1 Slave2 813 * Handles_error 814 * PARSE PARSE PARSE 815 * 816 * 817 * Add/Delete mddbs can occur from the following commands: 818 * metadb -s set_name -a/-d 819 * metaset -s set_name -a/-d disk 820 * metaset -s set_name -b 821 * 822 * The metadb/metaset command is run on the node executing the command 823 * and sends an ATTACH/DETACH message to the master node blocking CLASS1 824 * messages on all nodes until this message is finished. The master 825 * node generates 3 submessages of BLOCK, SM_ATTACH/SM_DETACH, UNBLOCK. 826 * The BLOCK message is only run on the master node and will BLOCK 827 * the PARSE messages from being sent to the nodes. 828 * The SM_ATTACH/SM_DETACH message is run on all nodes and actually adds or 829 * removes the replica(s) from the given disk slice. 830 * The UNBLOCK message is only run on the master node and allows the 831 * sending of PARSE messages. 832 * 833 * Master Slave1 Slave2 834 * Add mddb cmd 835 * ATTACH msg to master 836 * BLOCK 837 * ATTACH ATTACH ATTACH 838 * UNBLOCK 839 * PARSE PARSE PARSE 840 * ATTACH msg finished 841 * 842 * Add/Delete host side information from the following commands: 843 * metaset -s set_name -a/-d -h 844 * 845 * The metaset command is run on the node executing the command and 846 * sends a DB_NEWSIDE/DB_DELSIDE message and a MD_NEWSIDE/MD_DELSIDE 847 * message whenever a host is added to or deleted from the diskset. 848 * 849 * The side information contains the major name and minor number 850 * associated with a disk slice from a certain node's perspective 851 * in an (failed) effort to support clustered systems that don't have the 852 * same device name for a physical device. (The original designers of 853 * SVM eventually took the shortcut of assuming that all device names 854 * are the same on all systems, but left the side information in the 855 * mddb and namespace.) The side information is used for disk slices 856 * that contain mddbs and/or are components for metadevices. 857 * 858 * The DB_NEWSIDE/DELSIDE command adds or deletes the side information 859 * for each mddb for the host being added or deleted. 860 * The MD_ADDSIDE/MD_DELSIDE command adds or deletes the side information 861 * for all disk slice components that are in the namespace records for 862 * the host being added or deleted. 863 * 864 * The DB_NEWSIDE/DB_DELSIDE message does not change any mddb records 865 * and only needs to be executed on the master node since the slave 866 * nodes will be brought up to date by the PARSE message that is 867 * generated as a result of a change to the mddb. 868 * The MD_ADDSIDE/MD_DELSIDE message does modify the records in the mddb 869 * and needs to be run on all nodes. The message must block class1 870 * messages so that record changing commands don't interfere. 871 * 872 * Master Slave1 Slave2 873 * Add host 874 * DB_NEWSIDE msg to master 875 * DB_NEWSIDE 876 * PARSE PARSE PARSE 877 * DB_NEWSIDE msg finished 878 * MD_NEWSIDE msg to master 879 * MD_NEWSIDE MD_NEWSIDE MD_NEWSIDE 880 * MD_NEWSIDE msg finished 881 * 882 * 883 * Optimized resync record failure: 884 * When any node sees a failure to write an optimized resync record 885 * that node notifies the master node of the replica that failed. 886 * The master node handles the error and updates the rest of the 887 * nodes using a PARSE message. The PARSE message also calls 888 * fixoptrecord on each slave node causing each node to fix up 889 * the optimized resync records that are owned by that node (the mirror 890 * owner code also sets the optimized resync record owner). The master 891 * node will fix up all optimized resync records that have no owner or 892 * are owned by the master node. 893 * 894 * Master Slave1 Slave2 895 * Optimized Record Failure 896 * OPTRECERR msg to master 897 * Master handles opt rec failure 898 * PARSE PARSE PARSE 899 * OPTRECERR msg finished 900 * Slave rewrites optimized record 901 * 902 */ 903 904 /* 905 * Handler for MD_MN_MSG_MDDB_PARSE which send parse messages to the 906 * slave nodes in order to keep the incore view of the mddbs the 907 * same on all nodes. 908 * 909 * Since master node generated the mddb parse message, do nothing 910 * if this is the master node. 911 * 912 * If this is a slave node, send the parse message down to the kernel 913 * where this node will re-read in parts of the mddbs. 914 * 915 */ 916 void 917 mdmn_do_mddb_parse(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 918 { 919 md_mn_msg_mddb_parse_t *d; 920 mddb_parse_parm_t mpp; 921 int ret = 0; 922 int i; 923 924 resp->mmr_out_size = 0; 925 resp->mmr_err_size = 0; 926 resp->mmr_out = NULL; 927 resp->mmr_err = NULL; 928 resp->mmr_comm_state = MDMNE_ACK; 929 d = (md_mn_msg_mddb_parse_t *)((void *)(msg->msg_event_data)); 930 931 if (flags & MD_MSGF_ON_MASTER) 932 return; 933 934 (void) memset(&mpp, 0, sizeof (mpp)); 935 mpp.c_setno = msg->msg_setno; 936 mpp.c_parse_flags = d->msg_parse_flags; 937 for (i = 0; i < MDDB_NLB; i++) { 938 mpp.c_lb_flags[i] = d->msg_lb_flags[i]; 939 } 940 ret = metaioctl(MD_MN_MDDB_PARSE, &mpp, &mpp.c_mde, NULL); 941 if (ret) 942 (void) mdstealerror(&(resp->mmr_ep), &mpp.c_mde); 943 944 resp->mmr_exitval = ret; 945 } 946 947 /* 948 * Handler for MD_MN_MSG_MDDB_BLOCK which blocks the generation 949 * of parse messages from this node. 950 * 951 * This is needed when attaching/detaching mddbs on the master and the 952 * slave node is unable to handle a parse message until the slave node 953 * has done the attach/detach of the mddbs. So, master node will block 954 * the parse messages, execute the attach/detach on all nodes and 955 * then unblock the parse messages which causes the parse message to 956 * be sent to all nodes. 957 */ 958 /*ARGSUSED*/ 959 void 960 mdmn_do_mddb_block(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 961 { 962 md_mn_msg_mddb_block_t *d; 963 mddb_block_parm_t mbp; 964 int ret; 965 966 resp->mmr_out_size = 0; 967 resp->mmr_err_size = 0; 968 resp->mmr_out = NULL; 969 resp->mmr_err = NULL; 970 resp->mmr_comm_state = MDMNE_ACK; 971 d = (md_mn_msg_mddb_block_t *)((void *)(msg->msg_event_data)); 972 973 (void) memset(&mbp, 0, sizeof (mbp)); 974 mbp.c_setno = msg->msg_setno; 975 mbp.c_blk_flags = d->msg_block_flags; 976 ret = metaioctl(MD_MN_MDDB_BLOCK, &mbp, &mbp.c_mde, NULL); 977 if (ret) 978 (void) mdstealerror(&(resp->mmr_ep), &mbp.c_mde); 979 980 resp->mmr_exitval = ret; 981 } 982 983 /* 984 * Submessage generator for MD_MN_MSG_META_DB_ATTACH which generates 985 * a BLOCK message on the master node only, a MD_MN_MSG_SM_MDDB_ATTACH 986 * message on all nodes and then an UNBLOCK message on the master only. 987 */ 988 int 989 mdmn_smgen_mddb_attach(md_mn_msg_t *msg, md_mn_msg_t *msglist[]) 990 { 991 md_mn_msg_t *nmsg; 992 md_mn_msg_meta_db_attach_t *d; 993 md_mn_msg_meta_db_attach_t *attach_d; 994 md_mn_msg_mddb_block_t *block_d; 995 996 d = (md_mn_msg_meta_db_attach_t *)(void *)msg->msg_event_data; 997 998 nmsg = Zalloc(sizeof (md_mn_msg_t)); 999 MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid)); 1000 1001 nmsg->msg_flags = (MD_MSGF_NO_LOG | MD_MSGF_NO_BCAST); 1002 nmsg->msg_setno = msg->msg_setno; 1003 nmsg->msg_type = MD_MN_MSG_MDDB_BLOCK; 1004 nmsg->msg_event_size = sizeof (md_mn_msg_mddb_block_t); 1005 nmsg->msg_event_data = Zalloc(sizeof (md_mn_msg_mddb_block_t)); 1006 block_d = (md_mn_msg_mddb_block_t *)(void *)nmsg->msg_event_data; 1007 block_d->msg_block_flags = MDDB_BLOCK_PARSE; 1008 msglist[0] = nmsg; 1009 1010 nmsg = Zalloc(sizeof (md_mn_msg_t)); 1011 MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid)); 1012 1013 /* Don't log submessages and panic on inconsistent results */ 1014 nmsg->msg_flags = MD_MSGF_NO_LOG | 1015 MD_MSGF_PANIC_WHEN_INCONSISTENT; 1016 nmsg->msg_setno = msg->msg_setno; 1017 nmsg->msg_type = MD_MN_MSG_SM_MDDB_ATTACH; 1018 nmsg->msg_event_size = sizeof (md_mn_msg_meta_db_attach_t); 1019 nmsg->msg_event_data = Zalloc(sizeof (md_mn_msg_meta_db_attach_t)); 1020 attach_d = (md_mn_msg_meta_db_attach_t *) 1021 (void *)nmsg->msg_event_data; 1022 attach_d->msg_l_dev = d->msg_l_dev; 1023 attach_d->msg_cnt = d->msg_cnt; 1024 attach_d->msg_dbsize = d->msg_dbsize; 1025 (void) strncpy(attach_d->msg_dname, d->msg_dname, 16); 1026 attach_d->msg_splitname = d->msg_splitname; 1027 attach_d->msg_options = d->msg_options; 1028 msglist[1] = nmsg; 1029 1030 nmsg = Zalloc(sizeof (md_mn_msg_t)); 1031 MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid)); 1032 1033 nmsg->msg_flags = (MD_MSGF_NO_LOG | MD_MSGF_NO_BCAST); 1034 nmsg->msg_setno = msg->msg_setno; 1035 nmsg->msg_type = MD_MN_MSG_MDDB_BLOCK; 1036 nmsg->msg_event_size = sizeof (md_mn_msg_mddb_block_t); 1037 nmsg->msg_event_data = Zalloc(sizeof (md_mn_msg_mddb_block_t)); 1038 block_d = (md_mn_msg_mddb_block_t *)(void *)nmsg->msg_event_data; 1039 block_d->msg_block_flags = MDDB_UNBLOCK_PARSE; 1040 msglist[2] = nmsg; 1041 1042 return (3); /* Return the number of submessages generated */ 1043 } 1044 1045 /* 1046 * Submessage generator for MD_MN_MSG_META_DB_DETACH which generates 1047 * a BLOCK message on the master node only, a MD_MN_MSG_SM_MDDB_DETACH 1048 * message on all nodes and then an UNBLOCK message on the master only. 1049 */ 1050 int 1051 mdmn_smgen_mddb_detach(md_mn_msg_t *msg, md_mn_msg_t *msglist[]) 1052 { 1053 md_mn_msg_t *nmsg; 1054 md_mn_msg_meta_db_detach_t *d; 1055 md_mn_msg_meta_db_detach_t *detach_d; 1056 md_mn_msg_mddb_block_t *block_d; 1057 1058 d = (md_mn_msg_meta_db_detach_t *)(void *)msg->msg_event_data; 1059 1060 nmsg = Zalloc(sizeof (md_mn_msg_t)); 1061 MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid)); 1062 1063 nmsg->msg_flags = (MD_MSGF_NO_LOG | MD_MSGF_NO_BCAST); 1064 nmsg->msg_setno = msg->msg_setno; 1065 nmsg->msg_type = MD_MN_MSG_MDDB_BLOCK; 1066 nmsg->msg_event_size = sizeof (md_mn_msg_mddb_block_t); 1067 nmsg->msg_event_data = Zalloc(sizeof (md_mn_msg_mddb_block_t)); 1068 block_d = (md_mn_msg_mddb_block_t *)(void *)nmsg->msg_event_data; 1069 block_d->msg_block_flags = MDDB_BLOCK_PARSE; 1070 msglist[0] = nmsg; 1071 1072 nmsg = Zalloc(sizeof (md_mn_msg_t)); 1073 MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid)); 1074 1075 /* Don't log submessages and panic on inconsistent results */ 1076 nmsg->msg_flags = MD_MSGF_NO_LOG | 1077 MD_MSGF_PANIC_WHEN_INCONSISTENT; 1078 nmsg->msg_setno = msg->msg_setno; 1079 nmsg->msg_type = MD_MN_MSG_SM_MDDB_DETACH; 1080 nmsg->msg_event_size = sizeof (md_mn_msg_meta_db_detach_t); 1081 nmsg->msg_event_data = Zalloc(sizeof (md_mn_msg_meta_db_detach_t)); 1082 detach_d = (md_mn_msg_meta_db_detach_t *) 1083 (void *)nmsg->msg_event_data; 1084 detach_d->msg_splitname = d->msg_splitname; 1085 msglist[1] = nmsg; 1086 1087 nmsg = Zalloc(sizeof (md_mn_msg_t)); 1088 MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid)); 1089 1090 nmsg->msg_flags = (MD_MSGF_NO_LOG | MD_MSGF_NO_BCAST); 1091 nmsg->msg_setno = msg->msg_setno; 1092 nmsg->msg_type = MD_MN_MSG_MDDB_BLOCK; 1093 nmsg->msg_event_size = sizeof (md_mn_msg_mddb_block_t); 1094 nmsg->msg_event_data = Zalloc(sizeof (md_mn_msg_mddb_block_t)); 1095 block_d = (md_mn_msg_mddb_block_t *)(void *)nmsg->msg_event_data; 1096 block_d->msg_block_flags = MDDB_UNBLOCK_PARSE; 1097 msglist[2] = nmsg; 1098 1099 return (3); /* Return the number of submessages generated */ 1100 } 1101 1102 /* 1103 * Handler for MD_MN_MSG_SM_MDDB_ATTACH which is used to attach mddbs. 1104 * 1105 * Used when running: 1106 * metadb -s set_name -a 1107 * metaset -s set_name -a/-d disk 1108 * metaset -s set_name -b 1109 */ 1110 /*ARGSUSED*/ 1111 void 1112 mdmn_do_sm_mddb_attach(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 1113 { 1114 md_mn_msg_meta_db_attach_t *d; 1115 struct mddb_config c; 1116 int i; 1117 int ret = 0; 1118 md_error_t ep = mdnullerror; 1119 char *name, *add_name; 1120 mdname_t *np; 1121 mdsetname_t *sp; 1122 1123 resp->mmr_out_size = 0; 1124 resp->mmr_err_size = 0; 1125 resp->mmr_out = NULL; 1126 resp->mmr_err = NULL; 1127 resp->mmr_comm_state = MDMNE_ACK; 1128 d = (md_mn_msg_meta_db_attach_t *)((void *)(msg->msg_event_data)); 1129 1130 (void) memset(&c, 0, sizeof (c)); 1131 c.c_setno = msg->msg_setno; 1132 c.c_locator.l_dev = meta_cmpldev(d->msg_l_dev); 1133 (void) strncpy(c.c_locator.l_driver, d->msg_dname, 1134 sizeof (c.c_locator.l_driver)); 1135 c.c_devname = d->msg_splitname; 1136 c.c_locator.l_mnum = meta_getminor(d->msg_l_dev); 1137 c.c_multi_node = 1; 1138 if ((sp = metasetnosetname(c.c_setno, &ep)) == NULL) { 1139 (void) mdstealerror(&(resp->mmr_ep), &ep); 1140 resp->mmr_exitval = -1; 1141 return; 1142 } 1143 (void) strcpy(c.c_setname, sp->setname); 1144 c.c_sideno = getmyside(sp, &ep); 1145 if (c.c_sideno == MD_SIDEWILD) { 1146 (void) mdstealerror(&(resp->mmr_ep), &ep); 1147 resp->mmr_exitval = -1; 1148 return; 1149 } 1150 1151 name = splicename(&d->msg_splitname); 1152 if ((np = metaname(&sp, name, &ep)) == NULL) { 1153 Free(name); 1154 (void) mdstealerror(&(resp->mmr_ep), &ep); 1155 resp->mmr_exitval = -1; 1156 return; 1157 } 1158 /* 1159 * All nodes in MN diskset must do meta_check_replica 1160 * since this causes the shared namespace to be 1161 * populated by the md driver names while checking 1162 * to see if this device is already in use as a 1163 * metadevice. 1164 */ 1165 if (meta_check_replica(sp, np, d->msg_options, 0, 1166 (d->msg_cnt * d->msg_dbsize), &ep)) { 1167 (void) mdstealerror(&(resp->mmr_ep), &ep); 1168 resp->mmr_exitval = -1; 1169 return; 1170 } 1171 1172 for (i = 0; i < d->msg_cnt; i++) { 1173 c.c_locator.l_blkno = i * d->msg_dbsize + 16; 1174 if (setup_med_cfg(sp, &c, 1175 (d->msg_options & MDCHK_SET_FORCE), &ep)) { 1176 ret = -1; 1177 (void) mdstealerror(&(resp->mmr_ep), &ep); 1178 break; 1179 } 1180 ret = metaioctl(MD_DB_NEWDEV, &c, &c.c_mde, NULL); 1181 /* If newdev was successful, continue with attach */ 1182 if (ret == 0) { 1183 if (meta_db_addsidenms(sp, np, c.c_locator.l_blkno, 1184 DB_ADDSIDENMS_NO_BCAST, &ep)) { 1185 ret = -1; 1186 (void) mdstealerror(&(resp->mmr_ep), &ep); 1187 break; 1188 } 1189 } else { 1190 (void) mdstealerror(&(resp->mmr_ep), &c.c_mde); 1191 break; 1192 } 1193 } 1194 add_name = splicename(&d->msg_splitname); 1195 if ((np = metaname(&sp, add_name, &ep)) != NULL) { 1196 meta_invalidate_name(np); 1197 } else { 1198 ret = -1; 1199 (void) mdstealerror(&(resp->mmr_ep), &ep); 1200 } 1201 Free(add_name); 1202 1203 resp->mmr_exitval = ret; 1204 } 1205 1206 /* 1207 * Handler for MD_MN_MSG_SM_MDDB_DETACH which is used to detach mddbs. 1208 * 1209 * Used when running: 1210 * metadb -s set_name -d 1211 * metaset -s set_name -a/-d disk 1212 * metaset -s set_name -b 1213 */ 1214 /*ARGSUSED*/ 1215 void 1216 mdmn_do_sm_mddb_detach(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 1217 { 1218 md_mn_msg_meta_db_detach_t *d; 1219 struct mddb_config c; 1220 int i; 1221 int ret = 0; 1222 md_error_t ep = mdnullerror; 1223 char *name, *del_name; 1224 mdname_t *np; 1225 mdsetname_t *sp; 1226 1227 resp->mmr_out_size = 0; 1228 resp->mmr_err_size = 0; 1229 resp->mmr_out = NULL; 1230 resp->mmr_err = NULL; 1231 resp->mmr_comm_state = MDMNE_ACK; 1232 d = (md_mn_msg_meta_db_detach_t *)((void *)(msg->msg_event_data)); 1233 1234 if ((sp = metasetnosetname(msg->msg_setno, &ep)) == NULL) { 1235 (void) mdstealerror(&(resp->mmr_ep), &ep); 1236 resp->mmr_exitval = -1; 1237 return; 1238 } 1239 1240 (void) memset(&c, 0, sizeof (c)); 1241 c.c_setno = msg->msg_setno; 1242 if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) { 1243 resp->mmr_exitval = -1; 1244 (void) mdstealerror(&(resp->mmr_ep), &c.c_mde); 1245 return; 1246 } 1247 i = 0; 1248 del_name = splicename(&d->msg_splitname); 1249 while (i < c.c_dbcnt) { 1250 c.c_id = i; 1251 if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) { 1252 ret = -1; 1253 (void) mdstealerror(&(resp->mmr_ep), &c.c_mde); 1254 break; 1255 } 1256 name = splicename(&c.c_devname); 1257 if (strcmp(name, del_name) != 0) { 1258 Free(name); 1259 i++; 1260 continue; 1261 } 1262 Free(name); 1263 /* Found a match - delete mddb */ 1264 if (metaioctl(MD_DB_DELDEV, &c, &c.c_mde, NULL) != 0) { 1265 ret = -1; 1266 (void) mdstealerror(&(resp->mmr_ep), &c.c_mde); 1267 break; 1268 } 1269 /* Not incrementing "i" intentionally (dbcnt is changed) */ 1270 } 1271 if ((np = metaname(&sp, del_name, &ep)) != NULL) { 1272 meta_invalidate_name(np); 1273 } else { 1274 ret = -1; 1275 (void) mdstealerror(&(resp->mmr_ep), &ep); 1276 } 1277 Free(del_name); 1278 1279 resp->mmr_exitval = ret; 1280 } 1281 1282 /* 1283 * Handler for MD_MN_MSG_META_DB_NEWSIDE which is used to update the 1284 * side information for each diskset mddb when a new host has been 1285 * added to the diskset. The side information is the /dev/dsk/ctds name 1286 * that the new node would use to access each mddb. 1287 * 1288 * Since this routine makes no changes to the records in the diskset mddb, 1289 * this routine only needs to be run on the master node. The master node's 1290 * kernel code will detect that portions of the mddb have changed and 1291 * will send a parse message to all nodes to re-parse parts of the mddb. 1292 * 1293 * Used when running: 1294 * metaset -s set_name -a -h new_hostname 1295 */ 1296 /*ARGSUSED*/ 1297 void 1298 mdmn_do_meta_db_newside(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 1299 { 1300 md_mn_msg_meta_db_newside_t *d; 1301 struct mddb_config c; 1302 int ret = 0; 1303 mdsetname_t *sp; 1304 md_error_t ep = mdnullerror; 1305 1306 resp->mmr_out_size = 0; 1307 resp->mmr_err_size = 0; 1308 resp->mmr_out = NULL; 1309 resp->mmr_err = NULL; 1310 resp->mmr_comm_state = MDMNE_ACK; 1311 d = (md_mn_msg_meta_db_newside_t *)((void *)(msg->msg_event_data)); 1312 1313 (void) memset(&c, 0, sizeof (c)); 1314 c.c_setno = msg->msg_setno; 1315 c.c_locator.l_dev = meta_cmpldev(d->msg_l_dev); 1316 c.c_locator.l_blkno = d->msg_blkno; 1317 (void) strncpy(c.c_locator.l_driver, d->msg_dname, 1318 sizeof (c.c_locator.l_driver)); 1319 c.c_devname = d->msg_splitname; 1320 c.c_locator.l_mnum = d->msg_mnum; 1321 c.c_multi_node = 1; 1322 if ((sp = metasetnosetname(c.c_setno, &ep)) == NULL) { 1323 (void) mdstealerror(&(resp->mmr_ep), &ep); 1324 resp->mmr_exitval = -1; 1325 return; 1326 } 1327 (void) strcpy(c.c_setname, sp->setname); 1328 c.c_sideno = d->msg_sideno; 1329 1330 if ((ret = metaioctl(MD_DB_NEWSIDE, &c, &c.c_mde, NULL)) != 0) { 1331 (void) mdstealerror(&(resp->mmr_ep), &c.c_mde); 1332 } 1333 resp->mmr_exitval = ret; 1334 } 1335 1336 /* 1337 * Handler for MD_MN_MSG_META_DB_DELSIDE which is used to remove the 1338 * side information for each diskset mddb when a host has been 1339 * deleted from the diskset. The side information is the /dev/dsk/ctds name 1340 * that the node would use to access each mddb. 1341 * 1342 * Since this routine makes no changes to the records in the diskset mddb, 1343 * this routine only needs to be run on the master node. The master node's 1344 * kernel code will detect that portions of the mddb have changed and 1345 * will send a parse message to all nodes to re-parse parts of the mddb. 1346 * 1347 * Used when running: 1348 * metaset -s set_name -d -h hostname 1349 */ 1350 /*ARGSUSED*/ 1351 void 1352 mdmn_do_meta_db_delside(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 1353 { 1354 md_mn_msg_meta_db_delside_t *d; 1355 mddb_config_t c; 1356 int ret = 0; 1357 mdsetname_t *sp; 1358 md_error_t ep = mdnullerror; 1359 1360 resp->mmr_out_size = 0; 1361 resp->mmr_err_size = 0; 1362 resp->mmr_out = NULL; 1363 resp->mmr_err = NULL; 1364 resp->mmr_comm_state = MDMNE_ACK; 1365 d = (md_mn_msg_meta_db_delside_t *)((void *)(msg->msg_event_data)); 1366 1367 (void) memset(&c, 0, sizeof (c)); 1368 c.c_setno = msg->msg_setno; 1369 c.c_locator.l_dev = meta_cmpldev(d->msg_l_dev); 1370 c.c_locator.l_blkno = d->msg_blkno; 1371 c.c_multi_node = 1; 1372 if ((sp = metasetnosetname(c.c_setno, &ep)) == NULL) { 1373 (void) mdstealerror(&(resp->mmr_ep), &ep); 1374 resp->mmr_exitval = -1; 1375 return; 1376 } 1377 (void) strcpy(c.c_setname, sp->setname); 1378 c.c_sideno = d->msg_sideno; 1379 1380 if ((ret = metaioctl(MD_DB_DELSIDE, &c, &c.c_mde, NULL)) != 0) { 1381 (void) mdstealerror(&(resp->mmr_ep), &c.c_mde); 1382 } 1383 resp->mmr_exitval = ret; 1384 } 1385 1386 /* 1387 * Handler for MD_MN_MSG_META_MD_ADDSIDE which is used to add the 1388 * side information for each diskset metadevice component (if that 1389 * component is a disk) when a host has been added to the diskset. 1390 * The side information is the /dev/dsk/ctds name that the node would 1391 * use to access the metadevice component. 1392 * 1393 * This routine makes changes to the mddb records and must be run 1394 * on all nodes. 1395 * 1396 * Used when running: 1397 * metaset -s set_name -a -h new_hostname 1398 */ 1399 /*ARGSUSED*/ 1400 void 1401 mdmn_do_meta_md_addside(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 1402 { 1403 md_mn_msg_meta_md_addside_t *d; 1404 mdnm_params_t nm; 1405 mdsetname_t *sp; 1406 char *cname, *dname; 1407 minor_t mnum; 1408 int done, i; 1409 md_error_t ep = mdnullerror; 1410 1411 resp->mmr_out_size = 0; 1412 resp->mmr_err_size = 0; 1413 resp->mmr_out = NULL; 1414 resp->mmr_err = NULL; 1415 resp->mmr_comm_state = MDMNE_ACK; 1416 d = (md_mn_msg_meta_md_addside_t *)((void *)(msg->msg_event_data)); 1417 1418 (void) memset(&nm, 0, sizeof (nm)); 1419 if ((sp = metasetnosetname(msg->msg_setno, &ep)) == NULL) { 1420 (void) mdstealerror(&(resp->mmr_ep), &ep); 1421 resp->mmr_exitval = -1; 1422 return; 1423 } 1424 /* While loop continues until IOCNXTKEY_NM gives nm.key of KEYWILD */ 1425 /*CONSTCOND*/ 1426 while (1) { 1427 nm.mde = mdnullerror; 1428 nm.setno = msg->msg_setno; 1429 nm.side = d->msg_otherside; 1430 if (metaioctl(MD_IOCNXTKEY_NM, &nm, &nm.mde, NULL) != 0) { 1431 (void) mdstealerror(&(resp->mmr_ep), &nm.mde); 1432 resp->mmr_exitval = -1; 1433 return; 1434 } 1435 1436 /* Normal exit path is to eventually get a KEYWILD */ 1437 if (nm.key == MD_KEYWILD) { 1438 resp->mmr_exitval = 0; 1439 return; 1440 } 1441 1442 nm.devname = (uintptr_t)meta_getnmbykey(msg->msg_setno, 1443 d->msg_otherside, nm.key, &ep); 1444 if (nm.devname == NULL) { 1445 (void) mdstealerror(&(resp->mmr_ep), &ep); 1446 resp->mmr_exitval = -1; 1447 return; 1448 } 1449 nm.side = d->msg_sideno; 1450 if ((done = meta_getside_devinfo(sp, 1451 (char *)(uintptr_t)nm.devname, 1452 d->msg_sideno, &cname, &dname, &mnum, &ep)) == -1) { 1453 (void) mdstealerror(&(resp->mmr_ep), &ep); 1454 Free((void *)(uintptr_t)nm.devname); 1455 resp->mmr_exitval = -1; 1456 return; 1457 } 1458 Free((void *)(uintptr_t)nm.devname); 1459 if (done != 1) { 1460 Free(cname); 1461 Free(dname); 1462 resp->mmr_exitval = -1; 1463 return; 1464 } 1465 1466 /* 1467 * The device reference count can be greater than 1 if 1468 * more than one softpart is configured on top of the 1469 * same device. If this is the case then we want to 1470 * increment the count to sync up with the other sides. 1471 */ 1472 for (i = 0; i < nm.ref_count; i++) { 1473 if (add_name(sp, d->msg_sideno, nm.key, dname, mnum, 1474 cname, &ep) == -1) { 1475 (void) mdstealerror(&(resp->mmr_ep), &ep); 1476 Free(cname); 1477 Free(dname); 1478 resp->mmr_exitval = -1; 1479 return; 1480 } 1481 } 1482 Free(cname); 1483 Free(dname); 1484 } 1485 1486 /*NOTREACHED*/ 1487 } 1488 /* 1489 * Handler for MD_MN_MSG_META_MD_DELSIDE which is used to delete the 1490 * side information for each diskset metadevice component (if that 1491 * component is a disk) when a host has been removed from the diskset. 1492 * The side information is the /dev/dsk/ctds name that the node would 1493 * use to access the metadevice component. 1494 * 1495 * This routine makes changes to the mddb records and must be run 1496 * on all nodes. 1497 * 1498 * Used when running: 1499 * metaset -s set_name -d -h hostname 1500 */ 1501 /*ARGSUSED*/ 1502 void 1503 mdmn_do_meta_md_delside(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 1504 { 1505 md_mn_msg_meta_md_delside_t *d; 1506 mdnm_params_t nm; 1507 mdsetname_t *sp; 1508 md_error_t ep = mdnullerror; 1509 int i; 1510 1511 resp->mmr_out_size = 0; 1512 resp->mmr_err_size = 0; 1513 resp->mmr_out = NULL; 1514 resp->mmr_err = NULL; 1515 resp->mmr_comm_state = MDMNE_ACK; 1516 d = (md_mn_msg_meta_md_delside_t *)((void *)(msg->msg_event_data)); 1517 1518 if ((sp = metasetnosetname(msg->msg_setno, &ep)) == NULL) { 1519 (void) mdstealerror(&(resp->mmr_ep), &ep); 1520 resp->mmr_exitval = -1; 1521 return; 1522 } 1523 1524 (void) memset(&nm, 0, sizeof (nm)); 1525 nm.key = MD_KEYWILD; 1526 /*CONSTCOND*/ 1527 while (1) { 1528 nm.mde = mdnullerror; 1529 nm.setno = msg->msg_setno; 1530 nm.side = MD_SIDEWILD; 1531 if (metaioctl(MD_IOCNXTKEY_NM, &nm, &nm.mde, NULL) != 0) { 1532 (void) mdstealerror(&(resp->mmr_ep), &nm.mde); 1533 resp->mmr_exitval = -1; 1534 return; 1535 } 1536 1537 /* Normal exit path is to eventually get a KEYWILD */ 1538 if (nm.key == MD_KEYWILD) { 1539 resp->mmr_exitval = 0; 1540 return; 1541 } 1542 1543 /* 1544 * The device reference count can be greater than 1 if 1545 * more than one softpart is configured on top of the 1546 * same device. If this is the case then we want to 1547 * decrement the count to zero so the entry can be 1548 * actually removed. 1549 */ 1550 for (i = 0; i < nm.ref_count; i++) { 1551 if (del_name(sp, d->msg_sideno, nm.key, &ep) == -1) { 1552 (void) mdstealerror(&(resp->mmr_ep), &ep); 1553 resp->mmr_exitval = -1; 1554 return; 1555 } 1556 } 1557 } 1558 1559 /*NOTREACHED*/ 1560 } 1561 1562 /* 1563 * Handler for MD_MN_MSG_MDDB_OPTRECERR which is used to notify 1564 * the master node that a node has seen an error when attempting to 1565 * write to the optimized resync records that reside on 2 of the diskset 1566 * mddbs. Master node will mark the failed replica in error and this 1567 * will send a parse message to all nodes to re-read parts of the mddb 1568 * and to fix their optimized resync records based on this information. 1569 */ 1570 /*ARGSUSED*/ 1571 void 1572 mdmn_do_mddb_optrecerr(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 1573 { 1574 md_mn_msg_mddb_optrecerr_t *d; 1575 mddb_optrec_parm_t mop; 1576 int ret; 1577 int i; 1578 1579 resp->mmr_out_size = 0; 1580 resp->mmr_err_size = 0; 1581 resp->mmr_out = NULL; 1582 resp->mmr_err = NULL; 1583 resp->mmr_comm_state = MDMNE_ACK; 1584 d = (md_mn_msg_mddb_optrecerr_t *)((void *)(msg->msg_event_data)); 1585 1586 (void) memset(&mop, 0, sizeof (mop)); 1587 mop.c_setno = msg->msg_setno; 1588 for (i = 0; i < 2; i++) { 1589 mop.c_recerr[i] = d->msg_recerr[i]; 1590 } 1591 ret = metaioctl(MD_MN_MDDB_OPTRECFIX, &mop, &mop.c_mde, NULL); 1592 if (ret) 1593 (void) mdstealerror(&(resp->mmr_ep), &mop.c_mde); 1594 1595 resp->mmr_exitval = ret; 1596 } 1597 1598 int 1599 mdmn_smgen_test6(md_mn_msg_t *msg, md_mn_msg_t **msglist) 1600 { 1601 md_mn_msg_t *nmsg; 1602 1603 nmsg = Zalloc(sizeof (md_mn_msg_t)); 1604 MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid)); 1605 1606 nmsg->msg_flags = MD_MSGF_NO_LOG; /* Don't log submessages */ 1607 nmsg->msg_setno = msg->msg_setno; 1608 nmsg->msg_type = MD_MN_MSG_TEST2; 1609 nmsg->msg_event_size = sizeof ("test2"); 1610 nmsg->msg_event_data = Strdup("test2"); 1611 msglist[0] = nmsg; 1612 1613 nmsg = Zalloc(sizeof (md_mn_msg_t)); 1614 MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid)); 1615 1616 nmsg->msg_flags = MD_MSGF_NO_LOG; /* Don't log submessages */ 1617 nmsg->msg_setno = msg->msg_setno; 1618 nmsg->msg_type = MD_MN_MSG_TEST2; 1619 nmsg->msg_event_size = sizeof ("test2"); 1620 nmsg->msg_event_data = Strdup("test2"); 1621 msglist[1] = nmsg; 1622 1623 nmsg = Zalloc(sizeof (md_mn_msg_t)); 1624 MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid)); 1625 1626 nmsg->msg_flags = MD_MSGF_NO_LOG; /* Don't log submessages */ 1627 nmsg->msg_setno = msg->msg_setno; 1628 nmsg->msg_type = MD_MN_MSG_TEST3; 1629 nmsg->msg_event_size = sizeof ("test3"); 1630 nmsg->msg_event_data = Strdup("test3"); 1631 msglist[2] = nmsg; 1632 1633 nmsg = Zalloc(sizeof (md_mn_msg_t)); 1634 MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid)); 1635 1636 nmsg->msg_flags = MD_MSGF_NO_LOG; /* Don't log submessages */ 1637 nmsg->msg_setno = msg->msg_setno; 1638 nmsg->msg_type = MD_MN_MSG_TEST4; 1639 nmsg->msg_event_size = sizeof ("test4"); 1640 nmsg->msg_event_data = Strdup("test4"); 1641 msglist[3] = nmsg; 1642 1643 return (4); /* Return the number of submessages generated */ 1644 } 1645 1646 /* 1647 * This is to send an MD_IOCSET ioctl to all nodes to create a soft 1648 * partition. 1649 */ 1650 /*ARGSUSED*/ 1651 void 1652 mdmn_do_iocset(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 1653 { 1654 md_mn_msg_iocset_t *d; 1655 int ret; 1656 set_t setno; 1657 mdsetname_t *sp; 1658 mdname_t *np; 1659 md_error_t mde = mdnullerror; 1660 1661 resp->mmr_comm_state = MDMNE_ACK; /* Ok state */; 1662 resp->mmr_out_size = 0; 1663 resp->mmr_err_size = 0; 1664 resp->mmr_out = NULL; 1665 resp->mmr_err = NULL; 1666 d = (md_mn_msg_iocset_t *)(void *)msg->msg_event_data; 1667 1668 setno = MD_MIN2SET(d->iocset_params.mnum); 1669 if ((sp = metasetnosetname(setno, &mde)) == NULL) { 1670 syslog(LOG_ERR, dgettext(TEXT_DOMAIN, 1671 "MD_MN_MSG_IOCSET: Invalid setno %d\n"), setno); 1672 resp->mmr_exitval = 1; 1673 return; 1674 } 1675 1676 if ((np = metamnumname(&sp, d->iocset_params.mnum, 1, &mde)) == NULL) { 1677 syslog(LOG_ERR, dgettext(TEXT_DOMAIN, 1678 "MD_MN_MSG_IOCSET: Invalid mnum %d\n"), 1679 d->iocset_params.mnum); 1680 resp->mmr_exitval = 1; 1681 return; 1682 } 1683 1684 if (meta_init_make_device(&sp, np->cname, &mde) == -1) { 1685 syslog(LOG_ERR, dgettext(TEXT_DOMAIN, 1686 "MD_MN_MSG_IOCSET: Invalid metadevice name %s\n"), 1687 np->cname); 1688 resp->mmr_exitval = 1; 1689 return; 1690 } 1691 1692 d->iocset_params.mdp = (uintptr_t)&d->unit; /* set pointer to unit */ 1693 ret = metaioctl(MD_IOCSET, &(d->iocset_params), &mde, np->cname); 1694 resp->mmr_exitval = ret; 1695 } 1696 1697 /* 1698 * This is to update the status of a softpart 1699 */ 1700 /*ARGSUSED*/ 1701 void 1702 mdmn_do_sp_setstat(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 1703 { 1704 md_mn_msg_sp_setstat_t *d; 1705 int ret; 1706 set_t setno; 1707 mdsetname_t *sp; 1708 minor_t mnum; 1709 md_error_t mde = mdnullerror; 1710 1711 resp->mmr_comm_state = MDMNE_ACK; /* Ok state */; 1712 resp->mmr_out_size = 0; 1713 resp->mmr_err_size = 0; 1714 resp->mmr_out = NULL; 1715 resp->mmr_err = NULL; 1716 d = (md_mn_msg_sp_setstat_t *)(void *)msg->msg_event_data; 1717 1718 mnum = d->sp_setstat_mnum; 1719 setno = MD_MIN2SET(mnum); 1720 if ((sp = metasetnosetname(setno, &mde)) == NULL) { 1721 syslog(LOG_ERR, dgettext(TEXT_DOMAIN, 1722 "MD_MN_MSG_IOCSET: Invalid setno %d\n"), setno); 1723 resp->mmr_exitval = 1; 1724 return; 1725 } 1726 1727 ret = meta_sp_setstatus(sp, &mnum, 1, d->sp_setstat_status, &mde); 1728 resp->mmr_exitval = ret; 1729 } 1730 1731 /* 1732 * This is to add a key to the namespace 1733 */ 1734 /*ARGSUSED*/ 1735 void 1736 mdmn_do_addkeyname(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 1737 { 1738 md_mn_msg_addkeyname_t *d; 1739 int ret; 1740 set_t setno; 1741 mdsetname_t *sp; 1742 md_error_t mde = mdnullerror; 1743 mdname_t *compnp; 1744 1745 resp->mmr_comm_state = MDMNE_ACK; /* Ok state */; 1746 resp->mmr_out_size = 0; 1747 resp->mmr_err_size = 0; 1748 resp->mmr_out = NULL; 1749 resp->mmr_err = NULL; 1750 d = (md_mn_msg_addkeyname_t *)(void *)msg->msg_event_data; 1751 1752 setno = d->addkeyname_setno; 1753 if ((sp = metasetnosetname(setno, &mde)) == NULL) { 1754 syslog(LOG_ERR, dgettext(TEXT_DOMAIN, 1755 "MD_MN_ADDKEYNAME: Invalid setno %d\n"), setno); 1756 resp->mmr_exitval = -1; 1757 return; 1758 } 1759 1760 compnp = metaname(&sp, d->addkeyname_name, &mde); 1761 if (compnp != NULL) { 1762 ret = add_key_name(sp, compnp, NULL, &mde); 1763 if (ret < 0) 1764 resp->mmr_exitval = -1; 1765 else 1766 resp->mmr_exitval = compnp->key; 1767 } else { 1768 resp->mmr_exitval = -1; 1769 } 1770 } 1771 1772 /* 1773 * This is to delete a key from the namespace 1774 */ 1775 /*ARGSUSED*/ 1776 void 1777 mdmn_do_delkeyname(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 1778 { 1779 md_mn_msg_delkeyname_t *d; 1780 int ret; 1781 set_t setno; 1782 mdsetname_t *sp; 1783 md_error_t mde = mdnullerror; 1784 mdname_t *compnp; 1785 1786 resp->mmr_comm_state = MDMNE_ACK; /* Ok state */; 1787 resp->mmr_out_size = 0; 1788 resp->mmr_err_size = 0; 1789 resp->mmr_out = NULL; 1790 resp->mmr_err = NULL; 1791 d = (md_mn_msg_delkeyname_t *)(void *)msg->msg_event_data; 1792 1793 setno = d->delkeyname_setno; 1794 if ((sp = metasetnosetname(setno, &mde)) == NULL) { 1795 syslog(LOG_ERR, dgettext(TEXT_DOMAIN, 1796 "MD_MN_DELKEYNAME: Invalid setno %d\n"), setno); 1797 resp->mmr_exitval = -1; 1798 return; 1799 } 1800 1801 compnp = metadevname(&sp, d->delkeyname_dev, &mde); 1802 if (compnp != NULL) { 1803 /* 1804 * Reset the key value for the name. This is required because 1805 * any previous call of del_key_name for the same component 1806 * will have resulted in the key value being reset to MD_KEYBAD 1807 * even though there may still be references to this component. 1808 */ 1809 compnp->key = d->delkeyname_key; 1810 ret = del_key_name(sp, compnp, &mde); 1811 resp->mmr_exitval = ret; 1812 } else { 1813 resp->mmr_exitval = -1; 1814 } 1815 } 1816 1817 /* 1818 * This is to get the value of tstate from the master node. We use this 1819 * to get the ABR state of a metadevice from the master. 1820 */ 1821 /*ARGSUSED*/ 1822 void 1823 mdmn_do_get_tstate(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 1824 { 1825 md_mn_msg_gettstate_t *d; 1826 int ret; 1827 uint_t tstate; 1828 md_error_t mde = mdnullerror; 1829 1830 resp->mmr_comm_state = MDMNE_ACK; /* Ok state */; 1831 resp->mmr_out_size = 0; 1832 resp->mmr_err_size = 0; 1833 resp->mmr_out = NULL; 1834 resp->mmr_err = NULL; 1835 d = (md_mn_msg_gettstate_t *)(void *)msg->msg_event_data; 1836 1837 ret = meta_get_tstate(d->gettstate_dev, &tstate, &mde); 1838 if (ret != 0) { 1839 syslog(LOG_ERR, dgettext(TEXT_DOMAIN, 1840 "MD_MN_GET_TSTATE: Invalid dev %llx\n"), d->gettstate_dev); 1841 tstate = 0; 1842 } 1843 resp->mmr_exitval = tstate; 1844 } 1845 1846 /* 1847 * This is to get the mirror ABR state and the state of its submirrors from 1848 * the master node. We need this to ensure consistent output from metastat 1849 * when a new node joins the cluster during a resync. Without this the 1850 * submirror status will be incorrect until the whole resync is complete which 1851 * may take days for very large metadevices. 1852 */ 1853 /*ARGSUSED*/ 1854 void 1855 mdmn_do_get_mirstate(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 1856 { 1857 md_mn_msg_mir_state_t *d; 1858 md_mn_msg_mir_state_res_t *res; /* Results */ 1859 set_t setno; 1860 mdsetname_t *sp; /* Set name */ 1861 mdname_t *mirnp; /* Mirror name */ 1862 md_error_t mde = mdnullerror; 1863 mm_unit_t *mm; /* Mirror */ 1864 int smi; 1865 uint_t tstate; 1866 1867 resp->mmr_comm_state = MDMNE_ACK; 1868 resp->mmr_out_size = sizeof (md_mn_msg_mir_state_res_t); 1869 resp->mmr_err_size = 0; 1870 resp->mmr_out = Malloc(resp->mmr_out_size); 1871 resp->mmr_err = NULL; 1872 d = (md_mn_msg_mir_state_t *)(void *)msg->msg_event_data; 1873 res = (md_mn_msg_mir_state_res_t *)(void *)resp->mmr_out; 1874 1875 /* Validate set information from minor number */ 1876 setno = MD_MIN2SET(d->mir_state_mnum); 1877 sp = metasetnosetname(setno, &mde); 1878 if (sp == NULL) { 1879 syslog(LOG_ERR, dgettext(TEXT_DOMAIN, 1880 "MD_MN_GET_MIRROR_STATE: Invalid set %d\n"), setno); 1881 resp->mmr_exitval = 1; /* Failure */ 1882 Free(resp->mmr_out); 1883 resp->mmr_out_size = 0; 1884 return; 1885 } 1886 1887 /* Construct mirror name from minor number */ 1888 mirnp = metamnumname(&sp, d->mir_state_mnum, 0, &mde); 1889 if (mirnp == NULL) { 1890 syslog(LOG_ERR, dgettext(TEXT_DOMAIN, 1891 "MD_MN_GET_MIRROR_STATE: Invalid minor %lx\n"), 1892 d->mir_state_mnum); 1893 resp->mmr_exitval = 2; /* Failure */ 1894 Free(resp->mmr_out); 1895 resp->mmr_out_size = 0; 1896 return; 1897 } 1898 1899 /* Get common mirror structure */ 1900 mm = (mm_unit_t *)meta_get_mdunit(sp, mirnp, &mde); 1901 if (mm == NULL) { 1902 syslog(LOG_ERR, dgettext(TEXT_DOMAIN, 1903 "MD_MN_GET_MIRROR_STATE: Invalid mirror minor %x\n"), 1904 d->mir_state_mnum); 1905 resp->mmr_exitval = 3; /* Failure */ 1906 Free(resp->mmr_out); 1907 resp->mmr_out_size = 0; 1908 return; 1909 } 1910 1911 if (meta_get_tstate(d->mir_state_mnum, &tstate, &mde) != 0) { 1912 syslog(LOG_ERR, dgettext(TEXT_DOMAIN, 1913 "MD_MN_GET_MIRROR_STATE: Invalid minor %lx\n"), 1914 d->mir_state_mnum); 1915 resp->mmr_exitval = 4; /* Failure */ 1916 Free(resp->mmr_out); 1917 resp->mmr_out_size = 0; 1918 return; 1919 } 1920 /* 1921 * Fill in the sm_state/sm_flags value in the results structure which 1922 * gets passed back to the message originator 1923 */ 1924 resp->mmr_exitval = 0; 1925 for (smi = 0; (smi < NMIRROR); smi++) { 1926 mm_submirror_t *mmsp = &mm->un_sm[smi]; 1927 res->sm_state[smi] = mmsp->sm_state; 1928 res->sm_flags[smi] = mmsp->sm_flags; 1929 } 1930 /* Returm value of tstate for mirror */ 1931 res->mir_tstate = tstate; 1932 } 1933 1934 /* 1935 * This is to issue an ioctl to call poke_hotspares 1936 */ 1937 /*ARGSUSED*/ 1938 void 1939 mdmn_do_poke_hotspares(md_mn_msg_t *msg, uint_t flags, md_mn_result_t *resp) 1940 { 1941 1942 md_mn_poke_hotspares_t pokehsp; 1943 md_mn_msg_pokehsp_t *d; 1944 1945 resp->mmr_out_size = 0; 1946 resp->mmr_err_size = 0; 1947 resp->mmr_out = NULL; 1948 resp->mmr_err = NULL; 1949 resp->mmr_comm_state = MDMNE_ACK; 1950 d = (md_mn_msg_pokehsp_t *)(void *)msg->msg_event_data; 1951 1952 (void) memset(&pokehsp, 0, sizeof (pokehsp)); 1953 MD_SETDRIVERNAME(&pokehsp, MD_MIRROR, d->pokehsp_setno); 1954 1955 resp->mmr_exitval = metaioctl(MD_MN_POKE_HOTSPARES, &pokehsp, 1956 &pokehsp.mde, NULL); 1957 } 1958