1 /* $OpenBSD: softraid_raid1.c,v 1.31 2012/01/22 11:13:32 jsing Exp $ */ 2 /* 3 * Copyright (c) 2007 Marco Peereboom <marco@peereboom.us> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18 #include "bio.h" 19 20 #include <sys/param.h> 21 #include <sys/systm.h> 22 #include <sys/buf.h> 23 #include <sys/device.h> 24 #include <sys/ioctl.h> 25 #include <sys/proc.h> 26 #include <sys/malloc.h> 27 #include <sys/kernel.h> 28 #include <sys/disk.h> 29 #include <sys/rwlock.h> 30 #include <sys/queue.h> 31 #include <sys/fcntl.h> 32 #include <sys/disklabel.h> 33 #include <sys/mount.h> 34 #include <sys/sensors.h> 35 #include <sys/stat.h> 36 #include <sys/conf.h> 37 #include <sys/uio.h> 38 39 #include <scsi/scsi_all.h> 40 #include <scsi/scsiconf.h> 41 #include <scsi/scsi_disk.h> 42 43 #include <dev/softraidvar.h> 44 #include <dev/rndvar.h> 45 46 /* RAID 1 functions. */ 47 int sr_raid1_create(struct sr_discipline *, struct bioc_createraid *, 48 int, int64_t); 49 int sr_raid1_assemble(struct sr_discipline *, struct bioc_createraid *, 50 int); 51 int sr_raid1_alloc_resources(struct sr_discipline *); 52 int sr_raid1_free_resources(struct sr_discipline *); 53 int sr_raid1_rw(struct sr_workunit *); 54 void sr_raid1_intr(struct buf *); 55 void sr_raid1_recreate_wu(struct sr_workunit *); 56 void sr_raid1_set_chunk_state(struct sr_discipline *, int, int); 57 void sr_raid1_set_vol_state(struct sr_discipline *); 58 59 /* Discipline initialisation. */ 60 void 61 sr_raid1_discipline_init(struct sr_discipline *sd) 62 { 63 64 /* Fill out discipline members. */ 65 sd->sd_type = SR_MD_RAID1; 66 sd->sd_capabilities = SR_CAP_SYSTEM_DISK | SR_CAP_AUTO_ASSEMBLE | 67 SR_CAP_REBUILD; 68 sd->sd_max_wu = SR_RAID1_NOWU; 69 70 /* Setup discipline specific function pointers. */ 71 sd->sd_alloc_resources = sr_raid1_alloc_resources; 72 sd->sd_assemble = sr_raid1_assemble; 73 sd->sd_create = sr_raid1_create; 74 sd->sd_free_resources = sr_raid1_free_resources; 75 sd->sd_scsi_rw = sr_raid1_rw; 76 sd->sd_set_chunk_state = sr_raid1_set_chunk_state; 77 sd->sd_set_vol_state = sr_raid1_set_vol_state; 78 } 79 80 int 81 sr_raid1_create(struct sr_discipline *sd, struct bioc_createraid *bc, 82 int no_chunk, int64_t coerced_size) 83 { 84 85 if (no_chunk < 2) { 86 sr_error(sd->sd_sc, "RAID 1 requires two or more chunks"); 87 return EINVAL; 88 } 89 90 strlcpy(sd->sd_name, "RAID 1", sizeof(sd->sd_name)); 91 sd->sd_meta->ssdi.ssd_size = coerced_size; 92 93 sd->sd_max_ccb_per_wu = no_chunk; 94 95 return 0; 96 } 97 98 int 99 sr_raid1_assemble(struct sr_discipline *sd, struct bioc_createraid *bc, 100 int no_chunk) 101 { 102 103 sd->sd_max_ccb_per_wu = sd->sd_meta->ssdi.ssd_chunk_no; 104 105 return 0; 106 } 107 108 int 109 sr_raid1_alloc_resources(struct sr_discipline *sd) 110 { 111 int rv = EINVAL; 112 113 if (!sd) 114 return (rv); 115 116 DNPRINTF(SR_D_DIS, "%s: sr_raid1_alloc_resources\n", 117 DEVNAME(sd->sd_sc)); 118 119 if (sr_wu_alloc(sd)) 120 goto bad; 121 if (sr_ccb_alloc(sd)) 122 goto bad; 123 124 rv = 0; 125 bad: 126 return (rv); 127 } 128 129 int 130 sr_raid1_free_resources(struct sr_discipline *sd) 131 { 132 int rv = EINVAL; 133 134 if (!sd) 135 return (rv); 136 137 DNPRINTF(SR_D_DIS, "%s: sr_raid1_free_resources\n", 138 DEVNAME(sd->sd_sc)); 139 140 sr_wu_free(sd); 141 sr_ccb_free(sd); 142 143 rv = 0; 144 return (rv); 145 } 146 147 void 148 sr_raid1_set_chunk_state(struct sr_discipline *sd, int c, int new_state) 149 { 150 int old_state, s; 151 152 DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n", 153 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 154 sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state); 155 156 /* ok to go to splbio since this only happens in error path */ 157 s = splbio(); 158 old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status; 159 160 /* multiple IOs to the same chunk that fail will come through here */ 161 if (old_state == new_state) 162 goto done; 163 164 switch (old_state) { 165 case BIOC_SDONLINE: 166 switch (new_state) { 167 case BIOC_SDOFFLINE: 168 case BIOC_SDSCRUB: 169 break; 170 default: 171 goto die; 172 } 173 break; 174 175 case BIOC_SDOFFLINE: 176 switch (new_state) { 177 case BIOC_SDREBUILD: 178 case BIOC_SDHOTSPARE: 179 break; 180 default: 181 goto die; 182 } 183 break; 184 185 case BIOC_SDSCRUB: 186 if (new_state == BIOC_SDONLINE) { 187 ; 188 } else 189 goto die; 190 break; 191 192 case BIOC_SDREBUILD: 193 switch (new_state) { 194 case BIOC_SDONLINE: 195 break; 196 case BIOC_SDOFFLINE: 197 /* Abort rebuild since the rebuild chunk disappeared. */ 198 sd->sd_reb_abort = 1; 199 break; 200 default: 201 goto die; 202 } 203 break; 204 205 case BIOC_SDHOTSPARE: 206 switch (new_state) { 207 case BIOC_SDOFFLINE: 208 case BIOC_SDREBUILD: 209 break; 210 default: 211 goto die; 212 } 213 break; 214 215 default: 216 die: 217 splx(s); /* XXX */ 218 panic("%s: %s: %s: invalid chunk state transition " 219 "%d -> %d\n", DEVNAME(sd->sd_sc), 220 sd->sd_meta->ssd_devname, 221 sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, 222 old_state, new_state); 223 /* NOTREACHED */ 224 } 225 226 sd->sd_vol.sv_chunks[c]->src_meta.scm_status = new_state; 227 sd->sd_set_vol_state(sd); 228 229 sd->sd_must_flush = 1; 230 workq_add_task(NULL, 0, sr_meta_save_callback, sd, NULL); 231 done: 232 splx(s); 233 } 234 235 void 236 sr_raid1_set_vol_state(struct sr_discipline *sd) 237 { 238 int states[SR_MAX_STATES]; 239 int new_state, i, s, nd; 240 int old_state = sd->sd_vol_status; 241 242 DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n", 243 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname); 244 245 nd = sd->sd_meta->ssdi.ssd_chunk_no; 246 247 #ifdef SR_DEBUG 248 for (i = 0; i < nd; i++) 249 DNPRINTF(SR_D_STATE, "%s: chunk %d status = %u\n", 250 DEVNAME(sd->sd_sc), i, 251 sd->sd_vol.sv_chunks[i]->src_meta.scm_status); 252 #endif 253 254 for (i = 0; i < SR_MAX_STATES; i++) 255 states[i] = 0; 256 257 for (i = 0; i < nd; i++) { 258 s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status; 259 if (s >= SR_MAX_STATES) 260 panic("%s: %s: %s: invalid chunk state", 261 DEVNAME(sd->sd_sc), 262 sd->sd_meta->ssd_devname, 263 sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname); 264 states[s]++; 265 } 266 267 if (states[BIOC_SDONLINE] == nd) 268 new_state = BIOC_SVONLINE; 269 else if (states[BIOC_SDONLINE] == 0) 270 new_state = BIOC_SVOFFLINE; 271 else if (states[BIOC_SDSCRUB] != 0) 272 new_state = BIOC_SVSCRUB; 273 else if (states[BIOC_SDREBUILD] != 0) 274 new_state = BIOC_SVREBUILD; 275 else if (states[BIOC_SDOFFLINE] != 0) 276 new_state = BIOC_SVDEGRADED; 277 else { 278 DNPRINTF(SR_D_STATE, "%s: invalid volume state, old state " 279 "was %d\n", DEVNAME(sd->sd_sc), old_state); 280 panic("invalid volume state"); 281 } 282 283 DNPRINTF(SR_D_STATE, "%s: %s: sr_raid1_set_vol_state %d -> %d\n", 284 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 285 old_state, new_state); 286 287 switch (old_state) { 288 case BIOC_SVONLINE: 289 switch (new_state) { 290 case BIOC_SVONLINE: /* can go to same state */ 291 case BIOC_SVOFFLINE: 292 case BIOC_SVDEGRADED: 293 case BIOC_SVREBUILD: /* happens on boot */ 294 break; 295 default: 296 goto die; 297 } 298 break; 299 300 case BIOC_SVOFFLINE: 301 /* XXX this might be a little too much */ 302 goto die; 303 304 case BIOC_SVSCRUB: 305 switch (new_state) { 306 case BIOC_SVONLINE: 307 case BIOC_SVOFFLINE: 308 case BIOC_SVDEGRADED: 309 case BIOC_SVSCRUB: /* can go to same state */ 310 break; 311 default: 312 goto die; 313 } 314 break; 315 316 case BIOC_SVBUILDING: 317 switch (new_state) { 318 case BIOC_SVONLINE: 319 case BIOC_SVOFFLINE: 320 case BIOC_SVBUILDING: /* can go to the same state */ 321 break; 322 default: 323 goto die; 324 } 325 break; 326 327 case BIOC_SVREBUILD: 328 switch (new_state) { 329 case BIOC_SVONLINE: 330 case BIOC_SVOFFLINE: 331 case BIOC_SVDEGRADED: 332 case BIOC_SVREBUILD: /* can go to the same state */ 333 break; 334 default: 335 goto die; 336 } 337 break; 338 339 case BIOC_SVDEGRADED: 340 switch (new_state) { 341 case BIOC_SVOFFLINE: 342 case BIOC_SVREBUILD: 343 case BIOC_SVDEGRADED: /* can go to the same state */ 344 break; 345 default: 346 goto die; 347 } 348 break; 349 350 default: 351 die: 352 panic("%s: %s: invalid volume state transition " 353 "%d -> %d\n", DEVNAME(sd->sd_sc), 354 sd->sd_meta->ssd_devname, 355 old_state, new_state); 356 /* NOTREACHED */ 357 } 358 359 sd->sd_vol_status = new_state; 360 361 /* If we have just become degraded, look for a hotspare. */ 362 if (new_state == BIOC_SVDEGRADED) 363 workq_add_task(NULL, 0, sr_hotspare_rebuild_callback, sd, NULL); 364 } 365 366 int 367 sr_raid1_rw(struct sr_workunit *wu) 368 { 369 struct sr_discipline *sd = wu->swu_dis; 370 struct scsi_xfer *xs = wu->swu_xs; 371 struct sr_ccb *ccb; 372 struct buf *b; 373 struct sr_chunk *scp; 374 int ios, x, i, s, rt; 375 daddr64_t blk; 376 377 /* blk and scsi error will be handled by sr_validate_io */ 378 if (sr_validate_io(wu, &blk, "sr_raid1_rw")) 379 goto bad; 380 381 /* calculate physical block */ 382 blk += sd->sd_meta->ssd_data_offset; 383 384 if (xs->flags & SCSI_DATA_IN) 385 ios = 1; 386 else 387 ios = sd->sd_meta->ssdi.ssd_chunk_no; 388 wu->swu_io_count = ios; 389 390 for (i = 0; i < ios; i++) { 391 ccb = sr_ccb_get(sd); 392 if (!ccb) { 393 /* should never happen but handle more gracefully */ 394 printf("%s: %s: too many ccbs queued\n", 395 DEVNAME(sd->sd_sc), 396 sd->sd_meta->ssd_devname); 397 goto bad; 398 } 399 b = &ccb->ccb_buf; 400 401 if (xs->flags & SCSI_POLL) { 402 b->b_flags = 0; 403 b->b_iodone = NULL; 404 } else { 405 b->b_flags = B_CALL; 406 b->b_iodone = sr_raid1_intr; 407 } 408 409 b->b_flags |= B_PHYS; 410 b->b_blkno = blk; 411 b->b_bcount = xs->datalen; 412 b->b_bufsize = xs->datalen; 413 b->b_resid = xs->datalen; 414 b->b_data = xs->data; 415 b->b_error = 0; 416 b->b_proc = curproc; 417 b->b_bq = NULL; 418 ccb->ccb_wu = wu; 419 420 if (xs->flags & SCSI_DATA_IN) { 421 rt = 0; 422 ragain: 423 /* interleave reads */ 424 x = sd->mds.mdd_raid1.sr1_counter++ % 425 sd->sd_meta->ssdi.ssd_chunk_no; 426 scp = sd->sd_vol.sv_chunks[x]; 427 switch (scp->src_meta.scm_status) { 428 case BIOC_SDONLINE: 429 case BIOC_SDSCRUB: 430 b->b_flags |= B_READ; 431 break; 432 433 case BIOC_SDOFFLINE: 434 case BIOC_SDREBUILD: 435 case BIOC_SDHOTSPARE: 436 if (rt++ < sd->sd_meta->ssdi.ssd_chunk_no) 437 goto ragain; 438 439 /* FALLTHROUGH */ 440 default: 441 /* volume offline */ 442 printf("%s: is offline, can't read\n", 443 DEVNAME(sd->sd_sc)); 444 sr_ccb_put(ccb); 445 goto bad; 446 } 447 } else { 448 /* writes go on all working disks */ 449 x = i; 450 scp = sd->sd_vol.sv_chunks[x]; 451 switch (scp->src_meta.scm_status) { 452 case BIOC_SDONLINE: 453 case BIOC_SDSCRUB: 454 case BIOC_SDREBUILD: 455 b->b_flags |= B_WRITE; 456 break; 457 458 case BIOC_SDHOTSPARE: /* should never happen */ 459 case BIOC_SDOFFLINE: 460 wu->swu_io_count--; 461 sr_ccb_put(ccb); 462 continue; 463 464 default: 465 goto bad; 466 } 467 468 } 469 ccb->ccb_target = x; 470 b->b_dev = sd->sd_vol.sv_chunks[x]->src_dev_mm; 471 b->b_vp = sd->sd_vol.sv_chunks[x]->src_vn; 472 if ((b->b_flags & B_READ) == 0) 473 b->b_vp->v_numoutput++; 474 475 LIST_INIT(&b->b_dep); 476 477 if (wu->swu_cb_active == 1) 478 panic("%s: sr_raid1_rw", DEVNAME(sd->sd_sc)); 479 TAILQ_INSERT_TAIL(&wu->swu_ccb, ccb, ccb_link); 480 481 DNPRINTF(SR_D_DIS, "%s: %s: sr_raid1: b_bcount: %d " 482 "b_blkno: %x b_flags 0x%0x b_data %p\n", 483 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 484 b->b_bcount, b->b_blkno, 485 b->b_flags, b->b_data); 486 } 487 488 s = splbio(); 489 490 /* rebuild io, let rebuild routine deal with it */ 491 if (wu->swu_flags & SR_WUF_REBUILD) 492 goto queued; 493 494 /* current io failed, restart */ 495 if (wu->swu_state == SR_WU_RESTART) 496 goto start; 497 498 /* deferred io failed, don't restart */ 499 if (wu->swu_state == SR_WU_REQUEUE) 500 goto queued; 501 502 if (sr_check_io_collision(wu)) 503 goto queued; 504 505 start: 506 sr_raid_startwu(wu); 507 queued: 508 splx(s); 509 return (0); 510 bad: 511 /* wu is unwound by sr_wu_put */ 512 return (1); 513 } 514 515 void 516 sr_raid1_intr(struct buf *bp) 517 { 518 struct sr_ccb *ccb = (struct sr_ccb *)bp; 519 struct sr_workunit *wu = ccb->ccb_wu, *wup; 520 struct sr_discipline *sd = wu->swu_dis; 521 struct scsi_xfer *xs = wu->swu_xs; 522 struct sr_softc *sc = sd->sd_sc; 523 struct buf *b; 524 int s, pend; 525 526 DNPRINTF(SR_D_INTR, "%s: sr_intr bp %x xs %x\n", 527 DEVNAME(sc), bp, xs); 528 529 b = &ccb->ccb_buf; 530 DNPRINTF(SR_D_INTR, "%s: sr_intr: b_bcount: %d b_resid: %d" 531 " b_flags: 0x%0x block: %lld target: %d\n", DEVNAME(sc), 532 b->b_bcount, b->b_resid, b->b_flags, b->b_blkno, ccb->ccb_target); 533 534 s = splbio(); 535 536 if (b->b_flags & B_ERROR) { 537 DNPRINTF(SR_D_INTR, "%s: i/o error on block %lld target: %d\n", 538 DEVNAME(sc), b->b_blkno, ccb->ccb_target); 539 wu->swu_ios_failed++; 540 ccb->ccb_state = SR_CCB_FAILED; 541 if (ccb->ccb_target != -1) 542 sd->sd_set_chunk_state(sd, ccb->ccb_target, 543 BIOC_SDOFFLINE); 544 else 545 panic("%s: invalid target on wu: %p", DEVNAME(sc), wu); 546 } else { 547 ccb->ccb_state = SR_CCB_OK; 548 wu->swu_ios_succeeded++; 549 } 550 wu->swu_ios_complete++; 551 552 DNPRINTF(SR_D_INTR, "%s: sr_intr: comp: %d count: %d failed: %d\n", 553 DEVNAME(sc), wu->swu_ios_complete, wu->swu_io_count, 554 wu->swu_ios_failed); 555 556 if (wu->swu_ios_complete >= wu->swu_io_count) { 557 /* if all ios failed, retry reads and give up on writes */ 558 if (wu->swu_ios_failed == wu->swu_ios_complete) { 559 if (xs->flags & SCSI_DATA_IN) { 560 printf("%s: retrying read on block %lld\n", 561 DEVNAME(sc), b->b_blkno); 562 sr_ccb_put(ccb); 563 if (wu->swu_cb_active == 1) 564 panic("%s: sr_raid1_intr_cb", 565 DEVNAME(sd->sd_sc)); 566 TAILQ_INIT(&wu->swu_ccb); 567 wu->swu_state = SR_WU_RESTART; 568 if (sd->sd_scsi_rw(wu)) 569 goto bad; 570 else 571 goto retry; 572 } else { 573 printf("%s: permanently fail write on block " 574 "%lld\n", DEVNAME(sc), b->b_blkno); 575 xs->error = XS_DRIVER_STUFFUP; 576 goto bad; 577 } 578 } 579 580 xs->error = XS_NOERROR; 581 xs->resid = 0; 582 583 pend = 0; 584 TAILQ_FOREACH(wup, &sd->sd_wu_pendq, swu_link) { 585 if (wu == wup) { 586 /* wu on pendq, remove */ 587 TAILQ_REMOVE(&sd->sd_wu_pendq, wu, swu_link); 588 pend = 1; 589 590 if (wu->swu_collider) { 591 if (wu->swu_ios_failed) 592 /* toss all ccbs and recreate */ 593 sr_raid1_recreate_wu(wu->swu_collider); 594 595 /* restart deferred wu */ 596 wu->swu_collider->swu_state = 597 SR_WU_INPROGRESS; 598 TAILQ_REMOVE(&sd->sd_wu_defq, 599 wu->swu_collider, swu_link); 600 sr_raid_startwu(wu->swu_collider); 601 } 602 break; 603 } 604 } 605 606 if (!pend) 607 printf("%s: wu: %p not on pending queue\n", 608 DEVNAME(sc), wu); 609 610 if (wu->swu_flags & SR_WUF_REBUILD) { 611 if (wu->swu_xs->flags & SCSI_DATA_OUT) { 612 wu->swu_flags |= SR_WUF_REBUILDIOCOMP; 613 wakeup(wu); 614 } 615 } else { 616 scsi_done(xs); 617 } 618 619 if (sd->sd_sync && sd->sd_wu_pending == 0) 620 wakeup(sd); 621 } 622 623 retry: 624 splx(s); 625 return; 626 bad: 627 xs->error = XS_DRIVER_STUFFUP; 628 if (wu->swu_flags & SR_WUF_REBUILD) { 629 wu->swu_flags |= SR_WUF_REBUILDIOCOMP; 630 wakeup(wu); 631 } else { 632 scsi_done(xs); 633 } 634 635 splx(s); 636 } 637 638 void 639 sr_raid1_recreate_wu(struct sr_workunit *wu) 640 { 641 struct sr_discipline *sd = wu->swu_dis; 642 struct sr_workunit *wup = wu; 643 struct sr_ccb *ccb; 644 645 do { 646 DNPRINTF(SR_D_INTR, "%s: sr_raid1_recreate_wu: %p\n", wup); 647 648 /* toss all ccbs */ 649 if (wu->swu_cb_active == 1) 650 panic("%s: sr_raid1_recreate_wu", DEVNAME(sd->sd_sc)); 651 while ((ccb = TAILQ_FIRST(&wup->swu_ccb)) != NULL) { 652 TAILQ_REMOVE(&wup->swu_ccb, ccb, ccb_link); 653 sr_ccb_put(ccb); 654 } 655 TAILQ_INIT(&wup->swu_ccb); 656 657 /* recreate ccbs */ 658 wup->swu_state = SR_WU_REQUEUE; 659 if (sd->sd_scsi_rw(wup)) 660 panic("could not requeue io"); 661 662 wup = wup->swu_collider; 663 } while (wup); 664 } 665