1 /* $OpenBSD: softraid_raid1.c,v 1.42 2013/03/25 16:01:49 jsing Exp $ */ 2 /* 3 * Copyright (c) 2007 Marco Peereboom <marco@peereboom.us> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18 #include "bio.h" 19 20 #include <sys/param.h> 21 #include <sys/systm.h> 22 #include <sys/buf.h> 23 #include <sys/device.h> 24 #include <sys/ioctl.h> 25 #include <sys/proc.h> 26 #include <sys/malloc.h> 27 #include <sys/kernel.h> 28 #include <sys/disk.h> 29 #include <sys/rwlock.h> 30 #include <sys/queue.h> 31 #include <sys/fcntl.h> 32 #include <sys/disklabel.h> 33 #include <sys/mount.h> 34 #include <sys/sensors.h> 35 #include <sys/stat.h> 36 #include <sys/conf.h> 37 #include <sys/uio.h> 38 39 #include <scsi/scsi_all.h> 40 #include <scsi/scsiconf.h> 41 #include <scsi/scsi_disk.h> 42 43 #include <dev/softraidvar.h> 44 #include <dev/rndvar.h> 45 46 /* RAID 1 functions. */ 47 int sr_raid1_create(struct sr_discipline *, struct bioc_createraid *, 48 int, int64_t); 49 int sr_raid1_assemble(struct sr_discipline *, struct bioc_createraid *, 50 int, void *); 51 int sr_raid1_alloc_resources(struct sr_discipline *); 52 int sr_raid1_free_resources(struct sr_discipline *); 53 int sr_raid1_rw(struct sr_workunit *); 54 void sr_raid1_intr(struct buf *); 55 void sr_raid1_set_chunk_state(struct sr_discipline *, int, int); 56 void sr_raid1_set_vol_state(struct sr_discipline *); 57 58 /* Discipline initialisation. */ 59 void 60 sr_raid1_discipline_init(struct sr_discipline *sd) 61 { 62 /* Fill out discipline members. */ 63 sd->sd_type = SR_MD_RAID1; 64 strlcpy(sd->sd_name, "RAID 1", sizeof(sd->sd_name)); 65 sd->sd_capabilities = SR_CAP_SYSTEM_DISK | SR_CAP_AUTO_ASSEMBLE | 66 SR_CAP_REBUILD | SR_CAP_REDUNDANT; 67 sd->sd_max_wu = SR_RAID1_NOWU; 68 69 /* Setup discipline specific function pointers. */ 70 sd->sd_alloc_resources = sr_raid1_alloc_resources; 71 sd->sd_assemble = sr_raid1_assemble; 72 sd->sd_create = sr_raid1_create; 73 sd->sd_free_resources = sr_raid1_free_resources; 74 sd->sd_scsi_rw = sr_raid1_rw; 75 sd->sd_scsi_intr = sr_raid1_intr; 76 sd->sd_set_chunk_state = sr_raid1_set_chunk_state; 77 sd->sd_set_vol_state = sr_raid1_set_vol_state; 78 } 79 80 int 81 sr_raid1_create(struct sr_discipline *sd, struct bioc_createraid *bc, 82 int no_chunk, int64_t coerced_size) 83 { 84 85 if (no_chunk < 2) { 86 sr_error(sd->sd_sc, "RAID 1 requires two or more chunks"); 87 return EINVAL; 88 } 89 90 sd->sd_meta->ssdi.ssd_size = coerced_size; 91 92 sd->sd_max_ccb_per_wu = no_chunk; 93 94 return 0; 95 } 96 97 int 98 sr_raid1_assemble(struct sr_discipline *sd, struct bioc_createraid *bc, 99 int no_chunk, void *data) 100 { 101 102 sd->sd_max_ccb_per_wu = sd->sd_meta->ssdi.ssd_chunk_no; 103 104 return 0; 105 } 106 107 int 108 sr_raid1_alloc_resources(struct sr_discipline *sd) 109 { 110 int rv = EINVAL; 111 112 DNPRINTF(SR_D_DIS, "%s: sr_raid1_alloc_resources\n", 113 DEVNAME(sd->sd_sc)); 114 115 if (sr_wu_alloc(sd)) 116 goto bad; 117 if (sr_ccb_alloc(sd)) 118 goto bad; 119 120 rv = 0; 121 bad: 122 return (rv); 123 } 124 125 int 126 sr_raid1_free_resources(struct sr_discipline *sd) 127 { 128 int rv = EINVAL; 129 130 DNPRINTF(SR_D_DIS, "%s: sr_raid1_free_resources\n", 131 DEVNAME(sd->sd_sc)); 132 133 sr_wu_free(sd); 134 sr_ccb_free(sd); 135 136 rv = 0; 137 return (rv); 138 } 139 140 void 141 sr_raid1_set_chunk_state(struct sr_discipline *sd, int c, int new_state) 142 { 143 int old_state, s; 144 145 DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n", 146 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 147 sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state); 148 149 /* ok to go to splbio since this only happens in error path */ 150 s = splbio(); 151 old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status; 152 153 /* multiple IOs to the same chunk that fail will come through here */ 154 if (old_state == new_state) 155 goto done; 156 157 switch (old_state) { 158 case BIOC_SDONLINE: 159 switch (new_state) { 160 case BIOC_SDOFFLINE: 161 case BIOC_SDSCRUB: 162 break; 163 default: 164 goto die; 165 } 166 break; 167 168 case BIOC_SDOFFLINE: 169 switch (new_state) { 170 case BIOC_SDREBUILD: 171 case BIOC_SDHOTSPARE: 172 break; 173 default: 174 goto die; 175 } 176 break; 177 178 case BIOC_SDSCRUB: 179 if (new_state == BIOC_SDONLINE) { 180 ; 181 } else 182 goto die; 183 break; 184 185 case BIOC_SDREBUILD: 186 switch (new_state) { 187 case BIOC_SDONLINE: 188 break; 189 case BIOC_SDOFFLINE: 190 /* Abort rebuild since the rebuild chunk disappeared. */ 191 sd->sd_reb_abort = 1; 192 break; 193 default: 194 goto die; 195 } 196 break; 197 198 case BIOC_SDHOTSPARE: 199 switch (new_state) { 200 case BIOC_SDOFFLINE: 201 case BIOC_SDREBUILD: 202 break; 203 default: 204 goto die; 205 } 206 break; 207 208 default: 209 die: 210 splx(s); /* XXX */ 211 panic("%s: %s: %s: invalid chunk state transition " 212 "%d -> %d\n", DEVNAME(sd->sd_sc), 213 sd->sd_meta->ssd_devname, 214 sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, 215 old_state, new_state); 216 /* NOTREACHED */ 217 } 218 219 sd->sd_vol.sv_chunks[c]->src_meta.scm_status = new_state; 220 sd->sd_set_vol_state(sd); 221 222 sd->sd_must_flush = 1; 223 workq_add_task(NULL, 0, sr_meta_save_callback, sd, NULL); 224 done: 225 splx(s); 226 } 227 228 void 229 sr_raid1_set_vol_state(struct sr_discipline *sd) 230 { 231 int states[SR_MAX_STATES]; 232 int new_state, i, s, nd; 233 int old_state = sd->sd_vol_status; 234 235 DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n", 236 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname); 237 238 nd = sd->sd_meta->ssdi.ssd_chunk_no; 239 240 #ifdef SR_DEBUG 241 for (i = 0; i < nd; i++) 242 DNPRINTF(SR_D_STATE, "%s: chunk %d status = %u\n", 243 DEVNAME(sd->sd_sc), i, 244 sd->sd_vol.sv_chunks[i]->src_meta.scm_status); 245 #endif 246 247 for (i = 0; i < SR_MAX_STATES; i++) 248 states[i] = 0; 249 250 for (i = 0; i < nd; i++) { 251 s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status; 252 if (s >= SR_MAX_STATES) 253 panic("%s: %s: %s: invalid chunk state", 254 DEVNAME(sd->sd_sc), 255 sd->sd_meta->ssd_devname, 256 sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname); 257 states[s]++; 258 } 259 260 if (states[BIOC_SDONLINE] == nd) 261 new_state = BIOC_SVONLINE; 262 else if (states[BIOC_SDONLINE] == 0) 263 new_state = BIOC_SVOFFLINE; 264 else if (states[BIOC_SDSCRUB] != 0) 265 new_state = BIOC_SVSCRUB; 266 else if (states[BIOC_SDREBUILD] != 0) 267 new_state = BIOC_SVREBUILD; 268 else if (states[BIOC_SDOFFLINE] != 0) 269 new_state = BIOC_SVDEGRADED; 270 else { 271 DNPRINTF(SR_D_STATE, "%s: invalid volume state, old state " 272 "was %d\n", DEVNAME(sd->sd_sc), old_state); 273 panic("invalid volume state"); 274 } 275 276 DNPRINTF(SR_D_STATE, "%s: %s: sr_raid1_set_vol_state %d -> %d\n", 277 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 278 old_state, new_state); 279 280 switch (old_state) { 281 case BIOC_SVONLINE: 282 switch (new_state) { 283 case BIOC_SVONLINE: /* can go to same state */ 284 case BIOC_SVOFFLINE: 285 case BIOC_SVDEGRADED: 286 case BIOC_SVREBUILD: /* happens on boot */ 287 break; 288 default: 289 goto die; 290 } 291 break; 292 293 case BIOC_SVOFFLINE: 294 /* XXX this might be a little too much */ 295 goto die; 296 297 case BIOC_SVSCRUB: 298 switch (new_state) { 299 case BIOC_SVONLINE: 300 case BIOC_SVOFFLINE: 301 case BIOC_SVDEGRADED: 302 case BIOC_SVSCRUB: /* can go to same state */ 303 break; 304 default: 305 goto die; 306 } 307 break; 308 309 case BIOC_SVBUILDING: 310 switch (new_state) { 311 case BIOC_SVONLINE: 312 case BIOC_SVOFFLINE: 313 case BIOC_SVBUILDING: /* can go to the same state */ 314 break; 315 default: 316 goto die; 317 } 318 break; 319 320 case BIOC_SVREBUILD: 321 switch (new_state) { 322 case BIOC_SVONLINE: 323 case BIOC_SVOFFLINE: 324 case BIOC_SVDEGRADED: 325 case BIOC_SVREBUILD: /* can go to the same state */ 326 break; 327 default: 328 goto die; 329 } 330 break; 331 332 case BIOC_SVDEGRADED: 333 switch (new_state) { 334 case BIOC_SVOFFLINE: 335 case BIOC_SVREBUILD: 336 case BIOC_SVDEGRADED: /* can go to the same state */ 337 break; 338 default: 339 goto die; 340 } 341 break; 342 343 default: 344 die: 345 panic("%s: %s: invalid volume state transition " 346 "%d -> %d\n", DEVNAME(sd->sd_sc), 347 sd->sd_meta->ssd_devname, 348 old_state, new_state); 349 /* NOTREACHED */ 350 } 351 352 sd->sd_vol_status = new_state; 353 354 /* If we have just become degraded, look for a hotspare. */ 355 if (new_state == BIOC_SVDEGRADED) 356 workq_add_task(NULL, 0, sr_hotspare_rebuild_callback, sd, NULL); 357 } 358 359 int 360 sr_raid1_rw(struct sr_workunit *wu) 361 { 362 struct sr_discipline *sd = wu->swu_dis; 363 struct scsi_xfer *xs = wu->swu_xs; 364 struct sr_ccb *ccb; 365 struct sr_chunk *scp; 366 int ios, chunk, i, s, rt; 367 daddr64_t blk; 368 369 /* blk and scsi error will be handled by sr_validate_io */ 370 if (sr_validate_io(wu, &blk, "sr_raid1_rw")) 371 goto bad; 372 373 /* calculate physical block */ 374 blk += sd->sd_meta->ssd_data_offset; 375 376 if (xs->flags & SCSI_DATA_IN) 377 ios = 1; 378 else 379 ios = sd->sd_meta->ssdi.ssd_chunk_no; 380 381 for (i = 0; i < ios; i++) { 382 if (xs->flags & SCSI_DATA_IN) { 383 rt = 0; 384 ragain: 385 /* interleave reads */ 386 chunk = sd->mds.mdd_raid1.sr1_counter++ % 387 sd->sd_meta->ssdi.ssd_chunk_no; 388 scp = sd->sd_vol.sv_chunks[chunk]; 389 switch (scp->src_meta.scm_status) { 390 case BIOC_SDONLINE: 391 case BIOC_SDSCRUB: 392 break; 393 394 case BIOC_SDOFFLINE: 395 case BIOC_SDREBUILD: 396 case BIOC_SDHOTSPARE: 397 if (rt++ < sd->sd_meta->ssdi.ssd_chunk_no) 398 goto ragain; 399 400 /* FALLTHROUGH */ 401 default: 402 /* volume offline */ 403 printf("%s: is offline, cannot read\n", 404 DEVNAME(sd->sd_sc)); 405 goto bad; 406 } 407 } else { 408 /* writes go on all working disks */ 409 chunk = i; 410 scp = sd->sd_vol.sv_chunks[chunk]; 411 switch (scp->src_meta.scm_status) { 412 case BIOC_SDONLINE: 413 case BIOC_SDSCRUB: 414 case BIOC_SDREBUILD: 415 break; 416 417 case BIOC_SDHOTSPARE: /* should never happen */ 418 case BIOC_SDOFFLINE: 419 continue; 420 421 default: 422 goto bad; 423 } 424 } 425 426 ccb = sr_ccb_rw(sd, chunk, blk, xs->datalen, xs->data, 427 xs->flags, 0); 428 if (!ccb) { 429 /* should never happen but handle more gracefully */ 430 printf("%s: %s: too many ccbs queued\n", 431 DEVNAME(sd->sd_sc), 432 sd->sd_meta->ssd_devname); 433 goto bad; 434 } 435 sr_wu_enqueue_ccb(wu, ccb); 436 } 437 438 s = splbio(); 439 440 /* rebuild io, let rebuild routine deal with it */ 441 if (wu->swu_flags & SR_WUF_REBUILD) 442 goto queued; 443 444 /* current io failed, restart */ 445 if (wu->swu_state == SR_WU_RESTART) 446 goto start; 447 448 /* deferred io failed, don't restart */ 449 if (wu->swu_state == SR_WU_REQUEUE) 450 goto queued; 451 452 if (sr_check_io_collision(wu)) 453 goto queued; 454 455 start: 456 sr_raid_startwu(wu); 457 queued: 458 splx(s); 459 return (0); 460 bad: 461 /* wu is unwound by sr_wu_put */ 462 return (1); 463 } 464 465 void 466 sr_raid1_intr(struct buf *bp) 467 { 468 struct sr_ccb *ccb = (struct sr_ccb *)bp; 469 struct sr_workunit *wu = ccb->ccb_wu, *wup; 470 struct sr_discipline *sd = wu->swu_dis; 471 struct scsi_xfer *xs = wu->swu_xs; 472 struct sr_softc *sc = sd->sd_sc; 473 int s, pend; 474 475 DNPRINTF(SR_D_INTR, "%s: sr_intr bp %x xs %x\n", 476 DEVNAME(sc), bp, xs); 477 478 s = splbio(); 479 480 sr_ccb_done(ccb); 481 482 DNPRINTF(SR_D_INTR, "%s: sr_intr: comp: %d count: %d failed: %d\n", 483 DEVNAME(sc), wu->swu_ios_complete, wu->swu_io_count, 484 wu->swu_ios_failed); 485 486 if (wu->swu_ios_complete >= wu->swu_io_count) { 487 /* if all ios failed, retry reads and give up on writes */ 488 if (wu->swu_ios_failed == wu->swu_ios_complete) { 489 if (xs->flags & SCSI_DATA_IN) { 490 printf("%s: retrying read on block %lld\n", 491 DEVNAME(sc), ccb->ccb_buf.b_blkno); 492 sr_ccb_put(ccb); 493 if (wu->swu_cb_active == 1) 494 panic("%s: sr_raid1_intr_cb", 495 DEVNAME(sd->sd_sc)); 496 TAILQ_INIT(&wu->swu_ccb); 497 wu->swu_state = SR_WU_RESTART; 498 if (sd->sd_scsi_rw(wu)) 499 goto bad; 500 else 501 goto retry; 502 } else { 503 printf("%s: permanently fail write on block " 504 "%lld\n", DEVNAME(sc), 505 ccb->ccb_buf.b_blkno); 506 xs->error = XS_DRIVER_STUFFUP; 507 goto bad; 508 } 509 } 510 511 xs->error = XS_NOERROR; 512 513 pend = 0; 514 TAILQ_FOREACH(wup, &sd->sd_wu_pendq, swu_link) { 515 if (wu == wup) { 516 /* wu on pendq, remove */ 517 TAILQ_REMOVE(&sd->sd_wu_pendq, wu, swu_link); 518 pend = 1; 519 520 if (wu->swu_collider) { 521 if (wu->swu_ios_failed) 522 /* toss all ccbs and recreate */ 523 sr_raid_recreate_wu(wu->swu_collider); 524 525 /* restart deferred wu */ 526 wu->swu_collider->swu_state = 527 SR_WU_INPROGRESS; 528 TAILQ_REMOVE(&sd->sd_wu_defq, 529 wu->swu_collider, swu_link); 530 sr_raid_startwu(wu->swu_collider); 531 } 532 break; 533 } 534 } 535 536 if (!pend) 537 printf("%s: wu: %p not on pending queue\n", 538 DEVNAME(sc), wu); 539 540 if (wu->swu_flags & SR_WUF_REBUILD) { 541 if (wu->swu_xs->flags & SCSI_DATA_OUT) { 542 wu->swu_flags |= SR_WUF_REBUILDIOCOMP; 543 wakeup(wu); 544 } 545 } else { 546 sr_scsi_done(sd, xs); 547 } 548 549 if (sd->sd_sync && sd->sd_wu_pending == 0) 550 wakeup(sd); 551 } 552 553 retry: 554 splx(s); 555 return; 556 bad: 557 xs->error = XS_DRIVER_STUFFUP; 558 if (wu->swu_flags & SR_WUF_REBUILD) { 559 wu->swu_flags |= SR_WUF_REBUILDIOCOMP; 560 wakeup(wu); 561 } else { 562 sr_scsi_done(sd, xs); 563 } 564 565 splx(s); 566 } 567