1 /* $OpenBSD: softraid_raid1.c,v 1.23 2010/03/26 11:20:34 jsing Exp $ */ 2 /* 3 * Copyright (c) 2007 Marco Peereboom <marco@peereboom.us> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18 #include "bio.h" 19 20 #include <sys/param.h> 21 #include <sys/systm.h> 22 #include <sys/buf.h> 23 #include <sys/device.h> 24 #include <sys/ioctl.h> 25 #include <sys/proc.h> 26 #include <sys/malloc.h> 27 #include <sys/kernel.h> 28 #include <sys/disk.h> 29 #include <sys/rwlock.h> 30 #include <sys/queue.h> 31 #include <sys/fcntl.h> 32 #include <sys/disklabel.h> 33 #include <sys/mount.h> 34 #include <sys/sensors.h> 35 #include <sys/stat.h> 36 #include <sys/conf.h> 37 #include <sys/uio.h> 38 39 #include <scsi/scsi_all.h> 40 #include <scsi/scsiconf.h> 41 #include <scsi/scsi_disk.h> 42 43 #include <dev/softraidvar.h> 44 #include <dev/rndvar.h> 45 46 /* RAID 1 functions. */ 47 int sr_raid1_create(struct sr_discipline *, struct bioc_createraid *, 48 int, int64_t); 49 int sr_raid1_assemble(struct sr_discipline *, struct bioc_createraid *, 50 int); 51 int sr_raid1_alloc_resources(struct sr_discipline *); 52 int sr_raid1_free_resources(struct sr_discipline *); 53 int sr_raid1_rw(struct sr_workunit *); 54 void sr_raid1_intr(struct buf *); 55 void sr_raid1_recreate_wu(struct sr_workunit *); 56 57 /* Discipline initialisation. */ 58 void 59 sr_raid1_discipline_init(struct sr_discipline *sd) 60 { 61 62 /* Fill out discipline members. */ 63 sd->sd_type = SR_MD_RAID1; 64 sd->sd_capabilities = SR_CAP_SYSTEM_DISK | SR_CAP_AUTO_ASSEMBLE | 65 SR_CAP_REBUILD; 66 sd->sd_max_wu = SR_RAID1_NOWU; 67 68 /* Setup discipline pointers. */ 69 sd->sd_create = sr_raid1_create; 70 sd->sd_assemble = sr_raid1_assemble; 71 sd->sd_alloc_resources = sr_raid1_alloc_resources; 72 sd->sd_free_resources = sr_raid1_free_resources; 73 sd->sd_start_discipline = NULL; 74 sd->sd_scsi_inquiry = sr_raid_inquiry; 75 sd->sd_scsi_read_cap = sr_raid_read_cap; 76 sd->sd_scsi_tur = sr_raid_tur; 77 sd->sd_scsi_req_sense = sr_raid_request_sense; 78 sd->sd_scsi_start_stop = sr_raid_start_stop; 79 sd->sd_scsi_sync = sr_raid_sync; 80 sd->sd_scsi_rw = sr_raid1_rw; 81 sd->sd_set_chunk_state = sr_raid1_set_chunk_state; 82 sd->sd_set_vol_state = sr_raid1_set_vol_state; 83 } 84 85 int 86 sr_raid1_create(struct sr_discipline *sd, struct bioc_createraid *bc, 87 int no_chunk, int64_t coerced_size) 88 { 89 90 if (no_chunk < 2) 91 return EINVAL; 92 93 strlcpy(sd->sd_name, "RAID 1", sizeof(sd->sd_name)); 94 sd->sd_meta->ssdi.ssd_size = coerced_size; 95 96 sd->sd_max_ccb_per_wu = no_chunk; 97 98 return 0; 99 } 100 101 int 102 sr_raid1_assemble(struct sr_discipline *sd, struct bioc_createraid *bc, 103 int no_chunk) 104 { 105 106 sd->sd_max_ccb_per_wu = sd->sd_meta->ssdi.ssd_chunk_no; 107 108 return 0; 109 } 110 111 int 112 sr_raid1_alloc_resources(struct sr_discipline *sd) 113 { 114 int rv = EINVAL; 115 116 if (!sd) 117 return (rv); 118 119 DNPRINTF(SR_D_DIS, "%s: sr_raid1_alloc_resources\n", 120 DEVNAME(sd->sd_sc)); 121 122 if (sr_wu_alloc(sd)) 123 goto bad; 124 if (sr_ccb_alloc(sd)) 125 goto bad; 126 127 rv = 0; 128 bad: 129 return (rv); 130 } 131 132 int 133 sr_raid1_free_resources(struct sr_discipline *sd) 134 { 135 int rv = EINVAL; 136 137 if (!sd) 138 return (rv); 139 140 DNPRINTF(SR_D_DIS, "%s: sr_raid1_free_resources\n", 141 DEVNAME(sd->sd_sc)); 142 143 sr_wu_free(sd); 144 sr_ccb_free(sd); 145 146 rv = 0; 147 return (rv); 148 } 149 150 void 151 sr_raid1_set_chunk_state(struct sr_discipline *sd, int c, int new_state) 152 { 153 int old_state, s; 154 155 DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n", 156 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 157 sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state); 158 159 /* ok to go to splbio since this only happens in error path */ 160 s = splbio(); 161 old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status; 162 163 /* multiple IOs to the same chunk that fail will come through here */ 164 if (old_state == new_state) 165 goto done; 166 167 switch (old_state) { 168 case BIOC_SDONLINE: 169 switch (new_state) { 170 case BIOC_SDOFFLINE: 171 case BIOC_SDSCRUB: 172 break; 173 default: 174 goto die; 175 } 176 break; 177 178 case BIOC_SDOFFLINE: 179 switch (new_state) { 180 case BIOC_SDREBUILD: 181 case BIOC_SDHOTSPARE: 182 break; 183 default: 184 goto die; 185 } 186 break; 187 188 case BIOC_SDSCRUB: 189 if (new_state == BIOC_SDONLINE) { 190 ; 191 } else 192 goto die; 193 break; 194 195 case BIOC_SDREBUILD: 196 switch (new_state) { 197 case BIOC_SDONLINE: 198 break; 199 case BIOC_SDOFFLINE: 200 /* Abort rebuild since the rebuild chunk disappeared. */ 201 sd->sd_reb_abort = 1; 202 break; 203 default: 204 goto die; 205 } 206 break; 207 208 case BIOC_SDHOTSPARE: 209 switch (new_state) { 210 case BIOC_SDOFFLINE: 211 case BIOC_SDREBUILD: 212 break; 213 default: 214 goto die; 215 } 216 break; 217 218 default: 219 die: 220 splx(s); /* XXX */ 221 panic("%s: %s: %s: invalid chunk state transition " 222 "%d -> %d\n", DEVNAME(sd->sd_sc), 223 sd->sd_meta->ssd_devname, 224 sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, 225 old_state, new_state); 226 /* NOTREACHED */ 227 } 228 229 sd->sd_vol.sv_chunks[c]->src_meta.scm_status = new_state; 230 sd->sd_set_vol_state(sd); 231 232 sd->sd_must_flush = 1; 233 workq_add_task(NULL, 0, sr_meta_save_callback, sd, NULL); 234 done: 235 splx(s); 236 } 237 238 void 239 sr_raid1_set_vol_state(struct sr_discipline *sd) 240 { 241 int states[SR_MAX_STATES]; 242 int new_state, i, s, nd; 243 int old_state = sd->sd_vol_status; 244 245 DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n", 246 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname); 247 248 nd = sd->sd_meta->ssdi.ssd_chunk_no; 249 250 for (i = 0; i < SR_MAX_STATES; i++) 251 states[i] = 0; 252 253 for (i = 0; i < nd; i++) { 254 s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status; 255 if (s >= SR_MAX_STATES) 256 panic("%s: %s: %s: invalid chunk state", 257 DEVNAME(sd->sd_sc), 258 sd->sd_meta->ssd_devname, 259 sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname); 260 states[s]++; 261 } 262 263 if (states[BIOC_SDONLINE] == nd) 264 new_state = BIOC_SVONLINE; 265 else if (states[BIOC_SDONLINE] == 0) 266 new_state = BIOC_SVOFFLINE; 267 else if (states[BIOC_SDSCRUB] != 0) 268 new_state = BIOC_SVSCRUB; 269 else if (states[BIOC_SDREBUILD] != 0) 270 new_state = BIOC_SVREBUILD; 271 else if (states[BIOC_SDOFFLINE] != 0) 272 new_state = BIOC_SVDEGRADED; 273 else { 274 #ifdef SR_DEBUG 275 DNPRINTF(SR_D_STATE, "%s: invalid volume state, old state " 276 "was %d\n", DEVNAME(sd->sd_sc), old_state); 277 for (i = 0; i < nd; i++) 278 DNPRINTF(SR_D_STATE, "%s: chunk %d status = %d\n", 279 DEVNAME(sd->sd_sc), i, 280 sd->sd_vol.sv_chunks[i]->src_meta.scm_status); 281 #endif 282 panic("invalid volume state"); 283 } 284 285 DNPRINTF(SR_D_STATE, "%s: %s: sr_raid1_set_vol_state %d -> %d\n", 286 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 287 old_state, new_state); 288 289 switch (old_state) { 290 case BIOC_SVONLINE: 291 switch (new_state) { 292 case BIOC_SVONLINE: /* can go to same state */ 293 case BIOC_SVOFFLINE: 294 case BIOC_SVDEGRADED: 295 case BIOC_SVREBUILD: /* happens on boot */ 296 break; 297 default: 298 goto die; 299 } 300 break; 301 302 case BIOC_SVOFFLINE: 303 /* XXX this might be a little too much */ 304 goto die; 305 306 case BIOC_SVSCRUB: 307 switch (new_state) { 308 case BIOC_SVONLINE: 309 case BIOC_SVOFFLINE: 310 case BIOC_SVDEGRADED: 311 case BIOC_SVSCRUB: /* can go to same state */ 312 break; 313 default: 314 goto die; 315 } 316 break; 317 318 case BIOC_SVBUILDING: 319 switch (new_state) { 320 case BIOC_SVONLINE: 321 case BIOC_SVOFFLINE: 322 case BIOC_SVBUILDING: /* can go to the same state */ 323 break; 324 default: 325 goto die; 326 } 327 break; 328 329 case BIOC_SVREBUILD: 330 switch (new_state) { 331 case BIOC_SVONLINE: 332 case BIOC_SVOFFLINE: 333 case BIOC_SVDEGRADED: 334 case BIOC_SVREBUILD: /* can go to the same state */ 335 break; 336 default: 337 goto die; 338 } 339 break; 340 341 case BIOC_SVDEGRADED: 342 switch (new_state) { 343 case BIOC_SVOFFLINE: 344 case BIOC_SVREBUILD: 345 case BIOC_SVDEGRADED: /* can go to the same state */ 346 break; 347 default: 348 goto die; 349 } 350 break; 351 352 default: 353 die: 354 panic("%s: %s: invalid volume state transition " 355 "%d -> %d\n", DEVNAME(sd->sd_sc), 356 sd->sd_meta->ssd_devname, 357 old_state, new_state); 358 /* NOTREACHED */ 359 } 360 361 sd->sd_vol_status = new_state; 362 363 /* If we have just become degraded, look for a hotspare. */ 364 if (new_state == BIOC_SVDEGRADED) 365 workq_add_task(NULL, 0, sr_hotspare_rebuild_callback, sd, NULL); 366 } 367 368 int 369 sr_raid1_rw(struct sr_workunit *wu) 370 { 371 struct sr_discipline *sd = wu->swu_dis; 372 struct scsi_xfer *xs = wu->swu_xs; 373 struct sr_ccb *ccb; 374 struct buf *b; 375 struct sr_chunk *scp; 376 int ios, x, i, s, rt; 377 daddr64_t blk; 378 379 /* blk and scsi error will be handled by sr_validate_io */ 380 if (sr_validate_io(wu, &blk, "sr_raid1_rw")) 381 goto bad; 382 383 /* calculate physical block */ 384 blk += SR_DATA_OFFSET; 385 386 if (xs->flags & SCSI_DATA_IN) 387 ios = 1; 388 else 389 ios = sd->sd_meta->ssdi.ssd_chunk_no; 390 wu->swu_io_count = ios; 391 392 for (i = 0; i < ios; i++) { 393 ccb = sr_ccb_get(sd); 394 if (!ccb) { 395 /* should never happen but handle more gracefully */ 396 printf("%s: %s: too many ccbs queued\n", 397 DEVNAME(sd->sd_sc), 398 sd->sd_meta->ssd_devname); 399 goto bad; 400 } 401 b = &ccb->ccb_buf; 402 403 if (xs->flags & SCSI_POLL) { 404 b->b_flags = 0; 405 b->b_iodone = NULL; 406 } else { 407 b->b_flags = B_CALL; 408 b->b_iodone = sr_raid1_intr; 409 } 410 411 b->b_flags |= B_PHYS; 412 b->b_blkno = blk; 413 b->b_bcount = xs->datalen; 414 b->b_bufsize = xs->datalen; 415 b->b_resid = xs->datalen; 416 b->b_data = xs->data; 417 b->b_error = 0; 418 b->b_proc = curproc; 419 ccb->ccb_wu = wu; 420 421 if (xs->flags & SCSI_DATA_IN) { 422 rt = 0; 423 ragain: 424 /* interleave reads */ 425 x = sd->mds.mdd_raid1.sr1_counter++ % 426 sd->sd_meta->ssdi.ssd_chunk_no; 427 scp = sd->sd_vol.sv_chunks[x]; 428 switch (scp->src_meta.scm_status) { 429 case BIOC_SDONLINE: 430 case BIOC_SDSCRUB: 431 b->b_flags |= B_READ; 432 break; 433 434 case BIOC_SDOFFLINE: 435 case BIOC_SDREBUILD: 436 case BIOC_SDHOTSPARE: 437 if (rt++ < sd->sd_meta->ssdi.ssd_chunk_no) 438 goto ragain; 439 440 /* FALLTHROUGH */ 441 default: 442 /* volume offline */ 443 printf("%s: is offline, can't read\n", 444 DEVNAME(sd->sd_sc)); 445 sr_ccb_put(ccb); 446 goto bad; 447 } 448 } else { 449 /* writes go on all working disks */ 450 x = i; 451 scp = sd->sd_vol.sv_chunks[x]; 452 switch (scp->src_meta.scm_status) { 453 case BIOC_SDONLINE: 454 case BIOC_SDSCRUB: 455 case BIOC_SDREBUILD: 456 b->b_flags |= B_WRITE; 457 break; 458 459 case BIOC_SDHOTSPARE: /* should never happen */ 460 case BIOC_SDOFFLINE: 461 wu->swu_io_count--; 462 sr_ccb_put(ccb); 463 continue; 464 465 default: 466 goto bad; 467 } 468 469 } 470 ccb->ccb_target = x; 471 b->b_dev = sd->sd_vol.sv_chunks[x]->src_dev_mm; 472 b->b_vp = sd->sd_vol.sv_chunks[x]->src_vn; 473 if ((b->b_flags & B_READ) == 0) 474 b->b_vp->v_numoutput++; 475 476 LIST_INIT(&b->b_dep); 477 478 TAILQ_INSERT_TAIL(&wu->swu_ccb, ccb, ccb_link); 479 480 DNPRINTF(SR_D_DIS, "%s: %s: sr_raid1: b_bcount: %d " 481 "b_blkno: %x b_flags 0x%0x b_data %p\n", 482 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 483 b->b_bcount, b->b_blkno, 484 b->b_flags, b->b_data); 485 } 486 487 s = splbio(); 488 489 /* rebuild io, let rebuild routine deal with it */ 490 if (wu->swu_flags & SR_WUF_REBUILD) 491 goto queued; 492 493 /* current io failed, restart */ 494 if (wu->swu_state == SR_WU_RESTART) 495 goto start; 496 497 /* deferred io failed, don't restart */ 498 if (wu->swu_state == SR_WU_REQUEUE) 499 goto queued; 500 501 if (sr_check_io_collision(wu)) 502 goto queued; 503 504 start: 505 sr_raid_startwu(wu); 506 queued: 507 splx(s); 508 return (0); 509 bad: 510 /* wu is unwound by sr_wu_put */ 511 return (1); 512 } 513 514 void 515 sr_raid1_intr(struct buf *bp) 516 { 517 struct sr_ccb *ccb = (struct sr_ccb *)bp; 518 struct sr_workunit *wu = ccb->ccb_wu, *wup; 519 struct sr_discipline *sd = wu->swu_dis; 520 struct scsi_xfer *xs = wu->swu_xs; 521 struct sr_softc *sc = sd->sd_sc; 522 struct buf *b; 523 int s, pend; 524 525 DNPRINTF(SR_D_INTR, "%s: sr_intr bp %x xs %x\n", 526 DEVNAME(sc), bp, xs); 527 528 b = &ccb->ccb_buf; 529 DNPRINTF(SR_D_INTR, "%s: sr_intr: b_bcount: %d b_resid: %d" 530 " b_flags: 0x%0x block: %lld target: %d\n", DEVNAME(sc), 531 b->b_bcount, b->b_resid, b->b_flags, b->b_blkno, ccb->ccb_target); 532 533 s = splbio(); 534 535 if (b->b_flags & B_ERROR) { 536 DNPRINTF(SR_D_INTR, "%s: i/o error on block %lld target: %d\n", 537 DEVNAME(sc), b->b_blkno, ccb->ccb_target); 538 wu->swu_ios_failed++; 539 ccb->ccb_state = SR_CCB_FAILED; 540 if (ccb->ccb_target != -1) 541 sd->sd_set_chunk_state(sd, ccb->ccb_target, 542 BIOC_SDOFFLINE); 543 else 544 panic("%s: invalid target on wu: %p", DEVNAME(sc), wu); 545 } else { 546 ccb->ccb_state = SR_CCB_OK; 547 wu->swu_ios_succeeded++; 548 } 549 wu->swu_ios_complete++; 550 551 DNPRINTF(SR_D_INTR, "%s: sr_intr: comp: %d count: %d failed: %d\n", 552 DEVNAME(sc), wu->swu_ios_complete, wu->swu_io_count, 553 wu->swu_ios_failed); 554 555 if (wu->swu_ios_complete >= wu->swu_io_count) { 556 /* if all ios failed, retry reads and give up on writes */ 557 if (wu->swu_ios_failed == wu->swu_ios_complete) { 558 if (xs->flags & SCSI_DATA_IN) { 559 printf("%s: retrying read on block %lld\n", 560 DEVNAME(sc), b->b_blkno); 561 sr_ccb_put(ccb); 562 TAILQ_INIT(&wu->swu_ccb); 563 wu->swu_state = SR_WU_RESTART; 564 if (sd->sd_scsi_rw(wu)) 565 goto bad; 566 else 567 goto retry; 568 } else { 569 printf("%s: permanently fail write on block " 570 "%lld\n", DEVNAME(sc), b->b_blkno); 571 xs->error = XS_DRIVER_STUFFUP; 572 goto bad; 573 } 574 } 575 576 xs->error = XS_NOERROR; 577 xs->resid = 0; 578 579 pend = 0; 580 TAILQ_FOREACH(wup, &sd->sd_wu_pendq, swu_link) { 581 if (wu == wup) { 582 /* wu on pendq, remove */ 583 TAILQ_REMOVE(&sd->sd_wu_pendq, wu, swu_link); 584 pend = 1; 585 586 if (wu->swu_collider) { 587 if (wu->swu_ios_failed) 588 /* toss all ccbs and recreate */ 589 sr_raid1_recreate_wu(wu->swu_collider); 590 591 /* restart deferred wu */ 592 wu->swu_collider->swu_state = 593 SR_WU_INPROGRESS; 594 TAILQ_REMOVE(&sd->sd_wu_defq, 595 wu->swu_collider, swu_link); 596 sr_raid_startwu(wu->swu_collider); 597 } 598 break; 599 } 600 } 601 602 if (!pend) 603 printf("%s: wu: %p not on pending queue\n", 604 DEVNAME(sc), wu); 605 606 if (wu->swu_flags & SR_WUF_REBUILD) { 607 if (wu->swu_xs->flags & SCSI_DATA_OUT) { 608 wu->swu_flags |= SR_WUF_REBUILDIOCOMP; 609 wakeup(wu); 610 } 611 } else { 612 /* do not change the order of these 2 functions */ 613 sr_wu_put(wu); 614 scsi_done(xs); 615 } 616 617 if (sd->sd_sync && sd->sd_wu_pending == 0) 618 wakeup(sd); 619 } 620 621 retry: 622 splx(s); 623 return; 624 bad: 625 xs->error = XS_DRIVER_STUFFUP; 626 if (wu->swu_flags & SR_WUF_REBUILD) { 627 wu->swu_flags |= SR_WUF_REBUILDIOCOMP; 628 wakeup(wu); 629 } else { 630 /* do not change the order of these 2 functions */ 631 sr_wu_put(wu); 632 scsi_done(xs); 633 } 634 635 splx(s); 636 } 637 638 void 639 sr_raid1_recreate_wu(struct sr_workunit *wu) 640 { 641 struct sr_discipline *sd = wu->swu_dis; 642 struct sr_workunit *wup = wu; 643 struct sr_ccb *ccb; 644 645 do { 646 DNPRINTF(SR_D_INTR, "%s: sr_raid1_recreate_wu: %p\n", wup); 647 648 /* toss all ccbs */ 649 while ((ccb = TAILQ_FIRST(&wup->swu_ccb)) != NULL) { 650 TAILQ_REMOVE(&wup->swu_ccb, ccb, ccb_link); 651 sr_ccb_put(ccb); 652 } 653 TAILQ_INIT(&wup->swu_ccb); 654 655 /* recreate ccbs */ 656 wup->swu_state = SR_WU_REQUEUE; 657 if (sd->sd_scsi_rw(wup)) 658 panic("could not requeue io"); 659 660 wup = wup->swu_collider; 661 } while (wup); 662 } 663