1 /* $OpenBSD: softraid_raid1.c,v 1.5 2008/02/05 16:49:25 marco Exp $ */ 2 /* 3 * Copyright (c) 2007 Marco Peereboom <marco@peereboom.us> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18 #include "bio.h" 19 20 #include <sys/param.h> 21 #include <sys/systm.h> 22 #include <sys/buf.h> 23 #include <sys/device.h> 24 #include <sys/ioctl.h> 25 #include <sys/proc.h> 26 #include <sys/malloc.h> 27 #include <sys/kernel.h> 28 #include <sys/disk.h> 29 #include <sys/rwlock.h> 30 #include <sys/queue.h> 31 #include <sys/fcntl.h> 32 #include <sys/disklabel.h> 33 #include <sys/mount.h> 34 #include <sys/sensors.h> 35 #include <sys/stat.h> 36 #include <sys/conf.h> 37 #include <sys/uio.h> 38 39 #include <scsi/scsi_all.h> 40 #include <scsi/scsiconf.h> 41 #include <scsi/scsi_disk.h> 42 43 #include <dev/softraidvar.h> 44 #include <dev/rndvar.h> 45 46 /* RAID 1 functions */ 47 int 48 sr_raid1_alloc_resources(struct sr_discipline *sd) 49 { 50 int rv = EINVAL; 51 52 if (!sd) 53 return (rv); 54 55 DNPRINTF(SR_D_DIS, "%s: sr_raid1_alloc_resources\n", 56 DEVNAME(sd->sd_sc)); 57 58 if (sr_alloc_wu(sd)) 59 goto bad; 60 if (sr_alloc_ccb(sd)) 61 goto bad; 62 63 rv = 0; 64 bad: 65 return (rv); 66 } 67 68 int 69 sr_raid1_free_resources(struct sr_discipline *sd) 70 { 71 int rv = EINVAL; 72 73 if (!sd) 74 return (rv); 75 76 DNPRINTF(SR_D_DIS, "%s: sr_raid1_free_resources\n", 77 DEVNAME(sd->sd_sc)); 78 79 sr_free_wu(sd); 80 sr_free_ccb(sd); 81 82 if (sd->sd_meta) 83 free(sd->sd_meta, M_DEVBUF); 84 85 rv = 0; 86 return (rv); 87 } 88 89 void 90 sr_raid1_set_chunk_state(struct sr_discipline *sd, int c, int new_state) 91 { 92 int old_state, s; 93 94 DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n", 95 DEVNAME(sd->sd_sc), sd->sd_vol.sv_meta.svm_devname, 96 sd->sd_vol.sv_chunks[c]->src_meta.scm_devname, c, new_state); 97 98 /* ok to go to splbio since this only happens in error path */ 99 s = splbio(); 100 old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status; 101 102 /* multiple IOs to the same chunk that fail will come through here */ 103 if (old_state == new_state) 104 goto done; 105 106 switch (old_state) { 107 case BIOC_SDONLINE: 108 switch (new_state) { 109 case BIOC_SDOFFLINE: 110 break; 111 case BIOC_SDSCRUB: 112 break; 113 default: 114 goto die; 115 } 116 break; 117 118 case BIOC_SDOFFLINE: 119 if (new_state == BIOC_SDREBUILD) { 120 ; 121 } else 122 goto die; 123 break; 124 125 case BIOC_SDSCRUB: 126 if (new_state == BIOC_SDONLINE) { 127 ; 128 } else 129 goto die; 130 break; 131 132 case BIOC_SDREBUILD: 133 if (new_state == BIOC_SDONLINE) { 134 ; 135 } else 136 goto die; 137 break; 138 139 case BIOC_SDHOTSPARE: 140 if (new_state == BIOC_SDREBUILD) { 141 ; 142 } else 143 goto die; 144 break; 145 146 default: 147 die: 148 splx(s); /* XXX */ 149 panic("%s: %s: %s: invalid chunk state transition " 150 "%d -> %d\n", DEVNAME(sd->sd_sc), 151 sd->sd_vol.sv_meta.svm_devname, 152 sd->sd_vol.sv_chunks[c]->src_meta.scm_devname, 153 old_state, new_state); 154 /* NOTREACHED */ 155 } 156 157 sd->sd_vol.sv_chunks[c]->src_meta.scm_status = new_state; 158 sd->sd_set_vol_state(sd); 159 160 sd->sd_must_flush = 1; 161 workq_add_task(NULL, 0, sr_save_metadata_callback, sd, NULL); 162 done: 163 splx(s); 164 } 165 166 void 167 sr_raid1_set_vol_state(struct sr_discipline *sd) 168 { 169 int states[SR_MAX_STATES]; 170 int new_state, i, s, nd; 171 int old_state = sd->sd_vol.sv_meta.svm_status; 172 173 DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n", 174 DEVNAME(sd->sd_sc), sd->sd_vol.sv_meta.svm_devname); 175 176 nd = sd->sd_vol.sv_meta.svm_no_chunk; 177 178 for (i = 0; i < SR_MAX_STATES; i++) 179 states[i] = 0; 180 181 for (i = 0; i < nd; i++) { 182 s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status; 183 if (s > SR_MAX_STATES) 184 panic("%s: %s: %s: invalid chunk state", 185 DEVNAME(sd->sd_sc), 186 sd->sd_vol.sv_meta.svm_devname, 187 sd->sd_vol.sv_chunks[i]->src_meta.scm_devname); 188 states[s]++; 189 } 190 191 if (states[BIOC_SDONLINE] == nd) 192 new_state = BIOC_SVONLINE; 193 else if (states[BIOC_SDONLINE] == 0) 194 new_state = BIOC_SVOFFLINE; 195 else if (states[BIOC_SDSCRUB] != 0) 196 new_state = BIOC_SVSCRUB; 197 else if (states[BIOC_SDREBUILD] != 0) 198 new_state = BIOC_SVREBUILD; 199 else if (states[BIOC_SDOFFLINE] != 0) 200 new_state = BIOC_SVDEGRADED; 201 else { 202 printf("old_state = %d, ", old_state); 203 for (i = 0; i < nd; i++) 204 printf("%d = %d, ", i, 205 sd->sd_vol.sv_chunks[i]->src_meta.scm_status); 206 panic("invalid new_state"); 207 } 208 209 DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state %d -> %d\n", 210 DEVNAME(sd->sd_sc), sd->sd_vol.sv_meta.svm_devname, 211 old_state, new_state); 212 213 switch (old_state) { 214 case BIOC_SVONLINE: 215 switch (new_state) { 216 case BIOC_SVOFFLINE: 217 case BIOC_SVDEGRADED: 218 break; 219 default: 220 goto die; 221 } 222 break; 223 224 case BIOC_SVOFFLINE: 225 /* XXX this might be a little too much */ 226 goto die; 227 228 case BIOC_SVSCRUB: 229 switch (new_state) { 230 case BIOC_SVONLINE: 231 case BIOC_SVOFFLINE: 232 case BIOC_SVDEGRADED: 233 case BIOC_SVSCRUB: /* can go to same state */ 234 break; 235 default: 236 goto die; 237 } 238 break; 239 240 case BIOC_SVBUILDING: 241 switch (new_state) { 242 case BIOC_SVONLINE: 243 case BIOC_SVOFFLINE: 244 case BIOC_SVBUILDING: /* can go to the same state */ 245 break; 246 default: 247 goto die; 248 } 249 break; 250 251 case BIOC_SVREBUILD: 252 switch (new_state) { 253 case BIOC_SVONLINE: 254 case BIOC_SVOFFLINE: 255 case BIOC_SVREBUILD: /* can go to the same state */ 256 break; 257 default: 258 goto die; 259 } 260 break; 261 262 case BIOC_SVDEGRADED: 263 switch (new_state) { 264 case BIOC_SVOFFLINE: 265 case BIOC_SVREBUILD: 266 case BIOC_SVDEGRADED: /* can go to the same state */ 267 break; 268 default: 269 goto die; 270 } 271 break; 272 273 default: 274 die: 275 panic("%s: %s: invalid volume state transition " 276 "%d -> %d\n", DEVNAME(sd->sd_sc), 277 sd->sd_vol.sv_meta.svm_devname, 278 old_state, new_state); 279 /* NOTREACHED */ 280 } 281 282 sd->sd_vol.sv_meta.svm_status = new_state; 283 } 284 285 int 286 sr_raid1_rw(struct sr_workunit *wu) 287 { 288 struct sr_discipline *sd = wu->swu_dis; 289 struct scsi_xfer *xs = wu->swu_xs; 290 struct sr_ccb *ccb; 291 struct sr_chunk *scp; 292 int ios, x, i, s, rt; 293 daddr64_t blk; 294 295 /* blk and scsi error will be handled by sr_validate_io */ 296 if (sr_validate_io(wu, &blk, "sr_raid1_rw")) 297 goto bad; 298 299 /* calculate physical block */ 300 blk += SR_META_SIZE + SR_META_OFFSET; 301 302 if (xs->flags & SCSI_DATA_IN) 303 ios = 1; 304 else 305 ios = sd->sd_vol.sv_meta.svm_no_chunk; 306 wu->swu_io_count = ios; 307 308 for (i = 0; i < ios; i++) { 309 ccb = sr_get_ccb(sd); 310 if (!ccb) { 311 /* should never happen but handle more gracefully */ 312 printf("%s: %s: too many ccbs queued\n", 313 DEVNAME(sd->sd_sc), 314 sd->sd_vol.sv_meta.svm_devname); 315 goto bad; 316 } 317 318 if (xs->flags & SCSI_POLL) { 319 ccb->ccb_buf.b_flags = 0; 320 ccb->ccb_buf.b_iodone = NULL; 321 } else { 322 ccb->ccb_buf.b_flags = B_CALL; 323 ccb->ccb_buf.b_iodone = sr_raid1_intr; 324 } 325 326 ccb->ccb_buf.b_blkno = blk; 327 ccb->ccb_buf.b_bcount = xs->datalen; 328 ccb->ccb_buf.b_bufsize = xs->datalen; 329 ccb->ccb_buf.b_resid = xs->datalen; 330 ccb->ccb_buf.b_data = xs->data; 331 ccb->ccb_buf.b_error = 0; 332 ccb->ccb_buf.b_proc = curproc; 333 ccb->ccb_wu = wu; 334 335 if (xs->flags & SCSI_DATA_IN) { 336 rt = 0; 337 ragain: 338 /* interleave reads */ 339 x = sd->mds.mdd_raid1.sr1_counter++ % 340 sd->sd_vol.sv_meta.svm_no_chunk; 341 scp = sd->sd_vol.sv_chunks[x]; 342 switch (scp->src_meta.scm_status) { 343 case BIOC_SDONLINE: 344 case BIOC_SDSCRUB: 345 ccb->ccb_buf.b_flags |= B_READ; 346 break; 347 348 case BIOC_SDOFFLINE: 349 case BIOC_SDREBUILD: 350 case BIOC_SDHOTSPARE: 351 if (rt++ < sd->sd_vol.sv_meta.svm_no_chunk) 352 goto ragain; 353 354 /* FALLTHROUGH */ 355 default: 356 /* volume offline */ 357 printf("%s: is offline, can't read\n", 358 DEVNAME(sd->sd_sc)); 359 sr_put_ccb(ccb); 360 goto bad; 361 } 362 } else { 363 /* writes go on all working disks */ 364 x = i; 365 scp = sd->sd_vol.sv_chunks[x]; 366 switch (scp->src_meta.scm_status) { 367 case BIOC_SDONLINE: 368 case BIOC_SDSCRUB: 369 case BIOC_SDREBUILD: 370 ccb->ccb_buf.b_flags |= B_WRITE; 371 break; 372 373 case BIOC_SDHOTSPARE: /* should never happen */ 374 case BIOC_SDOFFLINE: 375 wu->swu_io_count--; 376 sr_put_ccb(ccb); 377 continue; 378 379 default: 380 goto bad; 381 } 382 383 } 384 ccb->ccb_target = x; 385 ccb->ccb_buf.b_dev = sd->sd_vol.sv_chunks[x]->src_dev_mm; 386 ccb->ccb_buf.b_vp = NULL; 387 388 LIST_INIT(&ccb->ccb_buf.b_dep); 389 390 TAILQ_INSERT_TAIL(&wu->swu_ccb, ccb, ccb_link); 391 392 DNPRINTF(SR_D_DIS, "%s: %s: sr_raid1: b_bcount: %d " 393 "b_blkno: %x b_flags 0x%0x b_data %p\n", 394 DEVNAME(sd->sd_sc), sd->sd_vol.sv_meta.svm_devname, 395 ccb->ccb_buf.b_bcount, ccb->ccb_buf.b_blkno, 396 ccb->ccb_buf.b_flags, ccb->ccb_buf.b_data); 397 } 398 399 s = splbio(); 400 401 /* current io failed, restart */ 402 if (wu->swu_state == SR_WU_RESTART) 403 goto start; 404 405 /* deferred io failed, don't restart */ 406 if (wu->swu_state == SR_WU_REQUEUE) 407 goto queued; 408 409 if (sr_check_io_collision(wu)) 410 goto queued; 411 412 start: 413 sr_raid_startwu(wu); 414 queued: 415 splx(s); 416 return (0); 417 bad: 418 /* wu is unwound by sr_put_wu */ 419 return (1); 420 } 421 422 void 423 sr_raid1_intr(struct buf *bp) 424 { 425 struct sr_ccb *ccb = (struct sr_ccb *)bp; 426 struct sr_workunit *wu = ccb->ccb_wu, *wup; 427 struct sr_discipline *sd = wu->swu_dis; 428 struct scsi_xfer *xs = wu->swu_xs; 429 struct sr_softc *sc = sd->sd_sc; 430 int s, pend; 431 432 DNPRINTF(SR_D_INTR, "%s: sr_intr bp %x xs %x\n", 433 DEVNAME(sc), bp, xs); 434 435 DNPRINTF(SR_D_INTR, "%s: sr_intr: b_bcount: %d b_resid: %d" 436 " b_flags: 0x%0x block: %lld target: %d\n", DEVNAME(sc), 437 ccb->ccb_buf.b_bcount, ccb->ccb_buf.b_resid, ccb->ccb_buf.b_flags, 438 ccb->ccb_buf.b_blkno, ccb->ccb_target); 439 440 s = splbio(); 441 442 if (ccb->ccb_buf.b_flags & B_ERROR) { 443 DNPRINTF(SR_D_INTR, "%s: i/o error on block %lld target: %d\n", 444 DEVNAME(sc), ccb->ccb_buf.b_blkno, ccb->ccb_target); 445 wu->swu_ios_failed++; 446 ccb->ccb_state = SR_CCB_FAILED; 447 if (ccb->ccb_target != -1) 448 sd->sd_set_chunk_state(sd, ccb->ccb_target, 449 BIOC_SDOFFLINE); 450 else 451 panic("%s: invalid target on wu: %p", DEVNAME(sc), wu); 452 } else { 453 ccb->ccb_state = SR_CCB_OK; 454 wu->swu_ios_succeeded++; 455 } 456 wu->swu_ios_complete++; 457 458 DNPRINTF(SR_D_INTR, "%s: sr_intr: comp: %d count: %d failed: %d\n", 459 DEVNAME(sc), wu->swu_ios_complete, wu->swu_io_count, 460 wu->swu_ios_failed); 461 462 if (wu->swu_ios_complete >= wu->swu_io_count) { 463 /* if all ios failed, retry reads and give up on writes */ 464 if (wu->swu_ios_failed == wu->swu_ios_complete) { 465 if (xs->flags & SCSI_DATA_IN) { 466 printf("%s: retrying read on block %lld\n", 467 DEVNAME(sc), ccb->ccb_buf.b_blkno); 468 sr_put_ccb(ccb); 469 TAILQ_INIT(&wu->swu_ccb); 470 wu->swu_state = SR_WU_RESTART; 471 if (sd->sd_scsi_rw(wu)) 472 goto bad; 473 else 474 goto retry; 475 } else { 476 printf("%s: permanently fail write on block " 477 "%lld\n", DEVNAME(sc), 478 ccb->ccb_buf.b_blkno); 479 xs->error = XS_DRIVER_STUFFUP; 480 goto bad; 481 } 482 } 483 484 xs->error = XS_NOERROR; 485 xs->resid = 0; 486 xs->flags |= ITSDONE; 487 488 pend = 0; 489 TAILQ_FOREACH(wup, &sd->sd_wu_pendq, swu_link) { 490 if (wu == wup) { 491 /* wu on pendq, remove */ 492 TAILQ_REMOVE(&sd->sd_wu_pendq, wu, swu_link); 493 pend = 1; 494 495 if (wu->swu_collider) { 496 if (wu->swu_ios_failed) 497 /* toss all ccbs and recreate */ 498 sr_raid1_recreate_wu(wu->swu_collider); 499 500 /* restart deferred wu */ 501 wu->swu_collider->swu_state = 502 SR_WU_INPROGRESS; 503 TAILQ_REMOVE(&sd->sd_wu_defq, 504 wu->swu_collider, swu_link); 505 sr_raid_startwu(wu->swu_collider); 506 } 507 break; 508 } 509 } 510 511 if (!pend) 512 printf("%s: wu: %p not on pending queue\n", 513 DEVNAME(sc), wu); 514 515 /* do not change the order of these 2 functions */ 516 sr_put_wu(wu); 517 scsi_done(xs); 518 519 if (sd->sd_sync && sd->sd_wu_pending == 0) 520 wakeup(sd); 521 } 522 523 retry: 524 splx(s); 525 return; 526 bad: 527 xs->error = XS_DRIVER_STUFFUP; 528 xs->flags |= ITSDONE; 529 sr_put_wu(wu); 530 scsi_done(xs); 531 splx(s); 532 } 533 534 void 535 sr_raid1_recreate_wu(struct sr_workunit *wu) 536 { 537 struct sr_discipline *sd = wu->swu_dis; 538 struct sr_workunit *wup = wu; 539 struct sr_ccb *ccb; 540 541 do { 542 DNPRINTF(SR_D_INTR, "%s: sr_raid1_recreate_wu: %p\n", wup); 543 544 /* toss all ccbs */ 545 while ((ccb = TAILQ_FIRST(&wup->swu_ccb)) != NULL) { 546 TAILQ_REMOVE(&wup->swu_ccb, ccb, ccb_link); 547 sr_put_ccb(ccb); 548 } 549 TAILQ_INIT(&wup->swu_ccb); 550 551 /* recreate ccbs */ 552 wup->swu_state = SR_WU_REQUEUE; 553 if (sd->sd_scsi_rw(wup)) 554 panic("could not requeue io"); 555 556 wup = wup->swu_collider; 557 } while (wup); 558 } 559