1 /* $OpenBSD: softraid_raid1.c,v 1.7 2008/11/25 23:05:17 marco Exp $ */ 2 /* 3 * Copyright (c) 2007 Marco Peereboom <marco@peereboom.us> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18 #include "bio.h" 19 20 #include <sys/param.h> 21 #include <sys/systm.h> 22 #include <sys/buf.h> 23 #include <sys/device.h> 24 #include <sys/ioctl.h> 25 #include <sys/proc.h> 26 #include <sys/malloc.h> 27 #include <sys/kernel.h> 28 #include <sys/disk.h> 29 #include <sys/rwlock.h> 30 #include <sys/queue.h> 31 #include <sys/fcntl.h> 32 #include <sys/disklabel.h> 33 #include <sys/mount.h> 34 #include <sys/sensors.h> 35 #include <sys/stat.h> 36 #include <sys/conf.h> 37 #include <sys/uio.h> 38 39 #include <scsi/scsi_all.h> 40 #include <scsi/scsiconf.h> 41 #include <scsi/scsi_disk.h> 42 43 #include <dev/softraidvar.h> 44 #include <dev/rndvar.h> 45 46 /* RAID 1 functions */ 47 int 48 sr_raid1_alloc_resources(struct sr_discipline *sd) 49 { 50 int rv = EINVAL; 51 52 if (!sd) 53 return (rv); 54 55 DNPRINTF(SR_D_DIS, "%s: sr_raid1_alloc_resources\n", 56 DEVNAME(sd->sd_sc)); 57 58 if (sr_wu_alloc(sd)) 59 goto bad; 60 if (sr_ccb_alloc(sd)) 61 goto bad; 62 63 rv = 0; 64 bad: 65 return (rv); 66 } 67 68 int 69 sr_raid1_free_resources(struct sr_discipline *sd) 70 { 71 int rv = EINVAL; 72 73 if (!sd) 74 return (rv); 75 76 DNPRINTF(SR_D_DIS, "%s: sr_raid1_free_resources\n", 77 DEVNAME(sd->sd_sc)); 78 79 sr_wu_free(sd); 80 sr_ccb_free(sd); 81 82 rv = 0; 83 return (rv); 84 } 85 86 void 87 sr_raid1_set_chunk_state(struct sr_discipline *sd, int c, int new_state) 88 { 89 int old_state, s; 90 91 DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n", 92 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 93 sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state); 94 95 /* ok to go to splbio since this only happens in error path */ 96 s = splbio(); 97 old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status; 98 99 /* multiple IOs to the same chunk that fail will come through here */ 100 if (old_state == new_state) 101 goto done; 102 103 switch (old_state) { 104 case BIOC_SDONLINE: 105 switch (new_state) { 106 case BIOC_SDOFFLINE: 107 break; 108 case BIOC_SDSCRUB: 109 break; 110 default: 111 goto die; 112 } 113 break; 114 115 case BIOC_SDOFFLINE: 116 if (new_state == BIOC_SDREBUILD) { 117 ; 118 } else 119 goto die; 120 break; 121 122 case BIOC_SDSCRUB: 123 if (new_state == BIOC_SDONLINE) { 124 ; 125 } else 126 goto die; 127 break; 128 129 case BIOC_SDREBUILD: 130 if (new_state == BIOC_SDONLINE) { 131 ; 132 } else 133 goto die; 134 break; 135 136 case BIOC_SDHOTSPARE: 137 if (new_state == BIOC_SDREBUILD) { 138 ; 139 } else 140 goto die; 141 break; 142 143 default: 144 die: 145 splx(s); /* XXX */ 146 panic("%s: %s: %s: invalid chunk state transition " 147 "%d -> %d\n", DEVNAME(sd->sd_sc), 148 sd->sd_meta->ssd_devname, 149 sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, 150 old_state, new_state); 151 /* NOTREACHED */ 152 } 153 154 sd->sd_vol.sv_chunks[c]->src_meta.scm_status = new_state; 155 sd->sd_set_vol_state(sd); 156 157 sd->sd_must_flush = 1; 158 workq_add_task(NULL, 0, sr_meta_save_callback, sd, NULL); 159 done: 160 splx(s); 161 } 162 163 void 164 sr_raid1_set_vol_state(struct sr_discipline *sd) 165 { 166 int states[SR_MAX_STATES]; 167 int new_state, i, s, nd; 168 int old_state = sd->sd_vol_status; 169 170 DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n", 171 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname); 172 173 nd = sd->sd_meta->ssdi.ssd_chunk_no; 174 175 for (i = 0; i < SR_MAX_STATES; i++) 176 states[i] = 0; 177 178 for (i = 0; i < nd; i++) { 179 s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status; 180 if (s > SR_MAX_STATES) 181 panic("%s: %s: %s: invalid chunk state", 182 DEVNAME(sd->sd_sc), 183 sd->sd_meta->ssd_devname, 184 sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname); 185 states[s]++; 186 } 187 188 if (states[BIOC_SDONLINE] == nd) 189 new_state = BIOC_SVONLINE; 190 else if (states[BIOC_SDONLINE] == 0) 191 new_state = BIOC_SVOFFLINE; 192 else if (states[BIOC_SDSCRUB] != 0) 193 new_state = BIOC_SVSCRUB; 194 else if (states[BIOC_SDREBUILD] != 0) 195 new_state = BIOC_SVREBUILD; 196 else if (states[BIOC_SDOFFLINE] != 0) 197 new_state = BIOC_SVDEGRADED; 198 else { 199 printf("old_state = %d, ", old_state); 200 for (i = 0; i < nd; i++) 201 printf("%d = %d, ", i, 202 sd->sd_vol.sv_chunks[i]->src_meta.scm_status); 203 panic("invalid new_state"); 204 } 205 206 DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state %d -> %d\n", 207 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 208 old_state, new_state); 209 210 switch (old_state) { 211 case BIOC_SVONLINE: 212 switch (new_state) { 213 case BIOC_SVOFFLINE: 214 case BIOC_SVDEGRADED: 215 break; 216 default: 217 goto die; 218 } 219 break; 220 221 case BIOC_SVOFFLINE: 222 /* XXX this might be a little too much */ 223 goto die; 224 225 case BIOC_SVSCRUB: 226 switch (new_state) { 227 case BIOC_SVONLINE: 228 case BIOC_SVOFFLINE: 229 case BIOC_SVDEGRADED: 230 case BIOC_SVSCRUB: /* can go to same state */ 231 break; 232 default: 233 goto die; 234 } 235 break; 236 237 case BIOC_SVBUILDING: 238 switch (new_state) { 239 case BIOC_SVONLINE: 240 case BIOC_SVOFFLINE: 241 case BIOC_SVBUILDING: /* can go to the same state */ 242 break; 243 default: 244 goto die; 245 } 246 break; 247 248 case BIOC_SVREBUILD: 249 switch (new_state) { 250 case BIOC_SVONLINE: 251 case BIOC_SVOFFLINE: 252 case BIOC_SVREBUILD: /* can go to the same state */ 253 break; 254 default: 255 goto die; 256 } 257 break; 258 259 case BIOC_SVDEGRADED: 260 switch (new_state) { 261 case BIOC_SVOFFLINE: 262 case BIOC_SVREBUILD: 263 case BIOC_SVDEGRADED: /* can go to the same state */ 264 break; 265 default: 266 goto die; 267 } 268 break; 269 270 default: 271 die: 272 panic("%s: %s: invalid volume state transition " 273 "%d -> %d\n", DEVNAME(sd->sd_sc), 274 sd->sd_meta->ssd_devname, 275 old_state, new_state); 276 /* NOTREACHED */ 277 } 278 279 sd->sd_vol_status = new_state; 280 } 281 282 int 283 sr_raid1_rw(struct sr_workunit *wu) 284 { 285 struct sr_discipline *sd = wu->swu_dis; 286 struct scsi_xfer *xs = wu->swu_xs; 287 struct sr_ccb *ccb; 288 struct sr_chunk *scp; 289 int ios, x, i, s, rt; 290 daddr64_t blk; 291 292 /* blk and scsi error will be handled by sr_validate_io */ 293 if (sr_validate_io(wu, &blk, "sr_raid1_rw")) 294 goto bad; 295 296 /* calculate physical block */ 297 blk += SR_META_SIZE + SR_META_OFFSET; 298 299 if (xs->flags & SCSI_DATA_IN) 300 ios = 1; 301 else 302 ios = sd->sd_meta->ssdi.ssd_chunk_no; 303 wu->swu_io_count = ios; 304 305 for (i = 0; i < ios; i++) { 306 ccb = sr_ccb_get(sd); 307 if (!ccb) { 308 /* should never happen but handle more gracefully */ 309 printf("%s: %s: too many ccbs queued\n", 310 DEVNAME(sd->sd_sc), 311 sd->sd_meta->ssd_devname); 312 goto bad; 313 } 314 315 if (xs->flags & SCSI_POLL) { 316 ccb->ccb_buf.b_flags = 0; 317 ccb->ccb_buf.b_iodone = NULL; 318 } else { 319 ccb->ccb_buf.b_flags = B_CALL; 320 ccb->ccb_buf.b_iodone = sr_raid1_intr; 321 } 322 323 ccb->ccb_buf.b_blkno = blk; 324 ccb->ccb_buf.b_bcount = xs->datalen; 325 ccb->ccb_buf.b_bufsize = xs->datalen; 326 ccb->ccb_buf.b_resid = xs->datalen; 327 ccb->ccb_buf.b_data = xs->data; 328 ccb->ccb_buf.b_error = 0; 329 ccb->ccb_buf.b_proc = curproc; 330 ccb->ccb_wu = wu; 331 332 if (xs->flags & SCSI_DATA_IN) { 333 rt = 0; 334 ragain: 335 /* interleave reads */ 336 x = sd->mds.mdd_raid1.sr1_counter++ % 337 sd->sd_meta->ssdi.ssd_chunk_no; 338 scp = sd->sd_vol.sv_chunks[x]; 339 switch (scp->src_meta.scm_status) { 340 case BIOC_SDONLINE: 341 case BIOC_SDSCRUB: 342 ccb->ccb_buf.b_flags |= B_READ; 343 break; 344 345 case BIOC_SDOFFLINE: 346 case BIOC_SDREBUILD: 347 case BIOC_SDHOTSPARE: 348 if (rt++ < sd->sd_meta->ssdi.ssd_chunk_no) 349 goto ragain; 350 351 /* FALLTHROUGH */ 352 default: 353 /* volume offline */ 354 printf("%s: is offline, can't read\n", 355 DEVNAME(sd->sd_sc)); 356 sr_ccb_put(ccb); 357 goto bad; 358 } 359 } else { 360 /* writes go on all working disks */ 361 x = i; 362 scp = sd->sd_vol.sv_chunks[x]; 363 switch (scp->src_meta.scm_status) { 364 case BIOC_SDONLINE: 365 case BIOC_SDSCRUB: 366 case BIOC_SDREBUILD: 367 ccb->ccb_buf.b_flags |= B_WRITE; 368 break; 369 370 case BIOC_SDHOTSPARE: /* should never happen */ 371 case BIOC_SDOFFLINE: 372 wu->swu_io_count--; 373 sr_ccb_put(ccb); 374 continue; 375 376 default: 377 goto bad; 378 } 379 380 } 381 ccb->ccb_target = x; 382 ccb->ccb_buf.b_dev = sd->sd_vol.sv_chunks[x]->src_dev_mm; 383 ccb->ccb_buf.b_vp = NULL; 384 385 LIST_INIT(&ccb->ccb_buf.b_dep); 386 387 TAILQ_INSERT_TAIL(&wu->swu_ccb, ccb, ccb_link); 388 389 DNPRINTF(SR_D_DIS, "%s: %s: sr_raid1: b_bcount: %d " 390 "b_blkno: %x b_flags 0x%0x b_data %p\n", 391 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 392 ccb->ccb_buf.b_bcount, ccb->ccb_buf.b_blkno, 393 ccb->ccb_buf.b_flags, ccb->ccb_buf.b_data); 394 } 395 396 s = splbio(); 397 398 /* current io failed, restart */ 399 if (wu->swu_state == SR_WU_RESTART) 400 goto start; 401 402 /* deferred io failed, don't restart */ 403 if (wu->swu_state == SR_WU_REQUEUE) 404 goto queued; 405 406 if (sr_check_io_collision(wu)) 407 goto queued; 408 409 start: 410 sr_raid_startwu(wu); 411 queued: 412 splx(s); 413 return (0); 414 bad: 415 /* wu is unwound by sr_wu_put */ 416 return (1); 417 } 418 419 void 420 sr_raid1_intr(struct buf *bp) 421 { 422 struct sr_ccb *ccb = (struct sr_ccb *)bp; 423 struct sr_workunit *wu = ccb->ccb_wu, *wup; 424 struct sr_discipline *sd = wu->swu_dis; 425 struct scsi_xfer *xs = wu->swu_xs; 426 struct sr_softc *sc = sd->sd_sc; 427 int s, pend; 428 429 DNPRINTF(SR_D_INTR, "%s: sr_intr bp %x xs %x\n", 430 DEVNAME(sc), bp, xs); 431 432 DNPRINTF(SR_D_INTR, "%s: sr_intr: b_bcount: %d b_resid: %d" 433 " b_flags: 0x%0x block: %lld target: %d\n", DEVNAME(sc), 434 ccb->ccb_buf.b_bcount, ccb->ccb_buf.b_resid, ccb->ccb_buf.b_flags, 435 ccb->ccb_buf.b_blkno, ccb->ccb_target); 436 437 s = splbio(); 438 439 if (ccb->ccb_buf.b_flags & B_ERROR) { 440 DNPRINTF(SR_D_INTR, "%s: i/o error on block %lld target: %d\n", 441 DEVNAME(sc), ccb->ccb_buf.b_blkno, ccb->ccb_target); 442 wu->swu_ios_failed++; 443 ccb->ccb_state = SR_CCB_FAILED; 444 if (ccb->ccb_target != -1) 445 sd->sd_set_chunk_state(sd, ccb->ccb_target, 446 BIOC_SDOFFLINE); 447 else 448 panic("%s: invalid target on wu: %p", DEVNAME(sc), wu); 449 } else { 450 ccb->ccb_state = SR_CCB_OK; 451 wu->swu_ios_succeeded++; 452 } 453 wu->swu_ios_complete++; 454 455 DNPRINTF(SR_D_INTR, "%s: sr_intr: comp: %d count: %d failed: %d\n", 456 DEVNAME(sc), wu->swu_ios_complete, wu->swu_io_count, 457 wu->swu_ios_failed); 458 459 if (wu->swu_ios_complete >= wu->swu_io_count) { 460 /* if all ios failed, retry reads and give up on writes */ 461 if (wu->swu_ios_failed == wu->swu_ios_complete) { 462 if (xs->flags & SCSI_DATA_IN) { 463 printf("%s: retrying read on block %lld\n", 464 DEVNAME(sc), ccb->ccb_buf.b_blkno); 465 sr_ccb_put(ccb); 466 TAILQ_INIT(&wu->swu_ccb); 467 wu->swu_state = SR_WU_RESTART; 468 if (sd->sd_scsi_rw(wu)) 469 goto bad; 470 else 471 goto retry; 472 } else { 473 printf("%s: permanently fail write on block " 474 "%lld\n", DEVNAME(sc), 475 ccb->ccb_buf.b_blkno); 476 xs->error = XS_DRIVER_STUFFUP; 477 goto bad; 478 } 479 } 480 481 xs->error = XS_NOERROR; 482 xs->resid = 0; 483 xs->flags |= ITSDONE; 484 485 pend = 0; 486 TAILQ_FOREACH(wup, &sd->sd_wu_pendq, swu_link) { 487 if (wu == wup) { 488 /* wu on pendq, remove */ 489 TAILQ_REMOVE(&sd->sd_wu_pendq, wu, swu_link); 490 pend = 1; 491 492 if (wu->swu_collider) { 493 if (wu->swu_ios_failed) 494 /* toss all ccbs and recreate */ 495 sr_raid1_recreate_wu(wu->swu_collider); 496 497 /* restart deferred wu */ 498 wu->swu_collider->swu_state = 499 SR_WU_INPROGRESS; 500 TAILQ_REMOVE(&sd->sd_wu_defq, 501 wu->swu_collider, swu_link); 502 sr_raid_startwu(wu->swu_collider); 503 } 504 break; 505 } 506 } 507 508 if (!pend) 509 printf("%s: wu: %p not on pending queue\n", 510 DEVNAME(sc), wu); 511 512 /* do not change the order of these 2 functions */ 513 sr_wu_put(wu); 514 sr_scsi_done(sd, xs); 515 516 if (sd->sd_sync && sd->sd_wu_pending == 0) 517 wakeup(sd); 518 } 519 520 retry: 521 splx(s); 522 return; 523 bad: 524 xs->error = XS_DRIVER_STUFFUP; 525 xs->flags |= ITSDONE; 526 sr_wu_put(wu); 527 sr_scsi_done(sd, xs); 528 splx(s); 529 } 530 531 void 532 sr_raid1_recreate_wu(struct sr_workunit *wu) 533 { 534 struct sr_discipline *sd = wu->swu_dis; 535 struct sr_workunit *wup = wu; 536 struct sr_ccb *ccb; 537 538 do { 539 DNPRINTF(SR_D_INTR, "%s: sr_raid1_recreate_wu: %p\n", wup); 540 541 /* toss all ccbs */ 542 while ((ccb = TAILQ_FIRST(&wup->swu_ccb)) != NULL) { 543 TAILQ_REMOVE(&wup->swu_ccb, ccb, ccb_link); 544 sr_ccb_put(ccb); 545 } 546 TAILQ_INIT(&wup->swu_ccb); 547 548 /* recreate ccbs */ 549 wup->swu_state = SR_WU_REQUEUE; 550 if (sd->sd_scsi_rw(wup)) 551 panic("could not requeue io"); 552 553 wup = wup->swu_collider; 554 } while (wup); 555 } 556