1 /* $OpenBSD: softraid_raid1.c,v 1.8 2009/04/28 02:54:53 marco Exp $ */ 2 /* 3 * Copyright (c) 2007 Marco Peereboom <marco@peereboom.us> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18 #include "bio.h" 19 20 #include <sys/param.h> 21 #include <sys/systm.h> 22 #include <sys/buf.h> 23 #include <sys/device.h> 24 #include <sys/ioctl.h> 25 #include <sys/proc.h> 26 #include <sys/malloc.h> 27 #include <sys/kernel.h> 28 #include <sys/disk.h> 29 #include <sys/rwlock.h> 30 #include <sys/queue.h> 31 #include <sys/fcntl.h> 32 #include <sys/disklabel.h> 33 #include <sys/mount.h> 34 #include <sys/sensors.h> 35 #include <sys/stat.h> 36 #include <sys/conf.h> 37 #include <sys/uio.h> 38 39 #include <scsi/scsi_all.h> 40 #include <scsi/scsiconf.h> 41 #include <scsi/scsi_disk.h> 42 43 #include <dev/softraidvar.h> 44 #include <dev/rndvar.h> 45 46 /* RAID 1 functions */ 47 int 48 sr_raid1_alloc_resources(struct sr_discipline *sd) 49 { 50 int rv = EINVAL; 51 52 if (!sd) 53 return (rv); 54 55 DNPRINTF(SR_D_DIS, "%s: sr_raid1_alloc_resources\n", 56 DEVNAME(sd->sd_sc)); 57 58 if (sr_wu_alloc(sd)) 59 goto bad; 60 if (sr_ccb_alloc(sd)) 61 goto bad; 62 63 rv = 0; 64 bad: 65 return (rv); 66 } 67 68 int 69 sr_raid1_free_resources(struct sr_discipline *sd) 70 { 71 int rv = EINVAL; 72 73 if (!sd) 74 return (rv); 75 76 DNPRINTF(SR_D_DIS, "%s: sr_raid1_free_resources\n", 77 DEVNAME(sd->sd_sc)); 78 79 sr_wu_free(sd); 80 sr_ccb_free(sd); 81 82 rv = 0; 83 return (rv); 84 } 85 86 void 87 sr_raid1_set_chunk_state(struct sr_discipline *sd, int c, int new_state) 88 { 89 int old_state, s; 90 91 DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n", 92 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 93 sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state); 94 95 /* ok to go to splbio since this only happens in error path */ 96 s = splbio(); 97 old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status; 98 99 /* multiple IOs to the same chunk that fail will come through here */ 100 if (old_state == new_state) 101 goto done; 102 103 switch (old_state) { 104 case BIOC_SDONLINE: 105 switch (new_state) { 106 case BIOC_SDOFFLINE: 107 break; 108 case BIOC_SDSCRUB: 109 break; 110 default: 111 goto die; 112 } 113 break; 114 115 case BIOC_SDOFFLINE: 116 if (new_state == BIOC_SDREBUILD) { 117 ; 118 } else 119 goto die; 120 break; 121 122 case BIOC_SDSCRUB: 123 if (new_state == BIOC_SDONLINE) { 124 ; 125 } else 126 goto die; 127 break; 128 129 case BIOC_SDREBUILD: 130 if (new_state == BIOC_SDONLINE) { 131 ; 132 } else 133 goto die; 134 break; 135 136 case BIOC_SDHOTSPARE: 137 if (new_state == BIOC_SDREBUILD) { 138 ; 139 } else 140 goto die; 141 break; 142 143 default: 144 die: 145 splx(s); /* XXX */ 146 panic("%s: %s: %s: invalid chunk state transition " 147 "%d -> %d\n", DEVNAME(sd->sd_sc), 148 sd->sd_meta->ssd_devname, 149 sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, 150 old_state, new_state); 151 /* NOTREACHED */ 152 } 153 154 sd->sd_vol.sv_chunks[c]->src_meta.scm_status = new_state; 155 sd->sd_set_vol_state(sd); 156 157 sd->sd_must_flush = 1; 158 workq_add_task(NULL, 0, sr_meta_save_callback, sd, NULL); 159 done: 160 splx(s); 161 } 162 163 void 164 sr_raid1_set_vol_state(struct sr_discipline *sd) 165 { 166 int states[SR_MAX_STATES]; 167 int new_state, i, s, nd; 168 int old_state = sd->sd_vol_status; 169 170 DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n", 171 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname); 172 173 nd = sd->sd_meta->ssdi.ssd_chunk_no; 174 175 for (i = 0; i < SR_MAX_STATES; i++) 176 states[i] = 0; 177 178 for (i = 0; i < nd; i++) { 179 s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status; 180 if (s > SR_MAX_STATES) 181 panic("%s: %s: %s: invalid chunk state", 182 DEVNAME(sd->sd_sc), 183 sd->sd_meta->ssd_devname, 184 sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname); 185 states[s]++; 186 } 187 188 if (states[BIOC_SDONLINE] == nd) 189 new_state = BIOC_SVONLINE; 190 else if (states[BIOC_SDONLINE] == 0) 191 new_state = BIOC_SVOFFLINE; 192 else if (states[BIOC_SDSCRUB] != 0) 193 new_state = BIOC_SVSCRUB; 194 else if (states[BIOC_SDREBUILD] != 0) 195 new_state = BIOC_SVREBUILD; 196 else if (states[BIOC_SDOFFLINE] != 0) 197 new_state = BIOC_SVDEGRADED; 198 else { 199 printf("old_state = %d, ", old_state); 200 for (i = 0; i < nd; i++) 201 printf("%d = %d, ", i, 202 sd->sd_vol.sv_chunks[i]->src_meta.scm_status); 203 panic("invalid new_state"); 204 } 205 206 DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state %d -> %d\n", 207 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 208 old_state, new_state); 209 210 switch (old_state) { 211 case BIOC_SVONLINE: 212 switch (new_state) { 213 case BIOC_SVONLINE: /* can go to same state */ 214 case BIOC_SVOFFLINE: 215 case BIOC_SVDEGRADED: 216 break; 217 default: 218 goto die; 219 } 220 break; 221 222 case BIOC_SVOFFLINE: 223 /* XXX this might be a little too much */ 224 goto die; 225 226 case BIOC_SVSCRUB: 227 switch (new_state) { 228 case BIOC_SVONLINE: 229 case BIOC_SVOFFLINE: 230 case BIOC_SVDEGRADED: 231 case BIOC_SVSCRUB: /* can go to same state */ 232 break; 233 default: 234 goto die; 235 } 236 break; 237 238 case BIOC_SVBUILDING: 239 switch (new_state) { 240 case BIOC_SVONLINE: 241 case BIOC_SVOFFLINE: 242 case BIOC_SVBUILDING: /* can go to the same state */ 243 break; 244 default: 245 goto die; 246 } 247 break; 248 249 case BIOC_SVREBUILD: 250 switch (new_state) { 251 case BIOC_SVONLINE: 252 case BIOC_SVOFFLINE: 253 case BIOC_SVREBUILD: /* can go to the same state */ 254 break; 255 default: 256 goto die; 257 } 258 break; 259 260 case BIOC_SVDEGRADED: 261 switch (new_state) { 262 case BIOC_SVOFFLINE: 263 case BIOC_SVREBUILD: 264 case BIOC_SVDEGRADED: /* can go to the same state */ 265 break; 266 default: 267 goto die; 268 } 269 break; 270 271 default: 272 die: 273 panic("%s: %s: invalid volume state transition " 274 "%d -> %d\n", DEVNAME(sd->sd_sc), 275 sd->sd_meta->ssd_devname, 276 old_state, new_state); 277 /* NOTREACHED */ 278 } 279 280 sd->sd_vol_status = new_state; 281 } 282 283 int 284 sr_raid1_rw(struct sr_workunit *wu) 285 { 286 struct sr_discipline *sd = wu->swu_dis; 287 struct scsi_xfer *xs = wu->swu_xs; 288 struct sr_ccb *ccb; 289 struct sr_chunk *scp; 290 int ios, x, i, s, rt; 291 daddr64_t blk; 292 293 /* blk and scsi error will be handled by sr_validate_io */ 294 if (sr_validate_io(wu, &blk, "sr_raid1_rw")) 295 goto bad; 296 297 /* calculate physical block */ 298 blk += SR_META_SIZE + SR_META_OFFSET; 299 300 if (xs->flags & SCSI_DATA_IN) 301 ios = 1; 302 else 303 ios = sd->sd_meta->ssdi.ssd_chunk_no; 304 wu->swu_io_count = ios; 305 306 for (i = 0; i < ios; i++) { 307 ccb = sr_ccb_get(sd); 308 if (!ccb) { 309 /* should never happen but handle more gracefully */ 310 printf("%s: %s: too many ccbs queued\n", 311 DEVNAME(sd->sd_sc), 312 sd->sd_meta->ssd_devname); 313 goto bad; 314 } 315 316 if (xs->flags & SCSI_POLL) { 317 ccb->ccb_buf.b_flags = 0; 318 ccb->ccb_buf.b_iodone = NULL; 319 } else { 320 ccb->ccb_buf.b_flags = B_CALL; 321 ccb->ccb_buf.b_iodone = sr_raid1_intr; 322 } 323 324 ccb->ccb_buf.b_blkno = blk; 325 ccb->ccb_buf.b_bcount = xs->datalen; 326 ccb->ccb_buf.b_bufsize = xs->datalen; 327 ccb->ccb_buf.b_resid = xs->datalen; 328 ccb->ccb_buf.b_data = xs->data; 329 ccb->ccb_buf.b_error = 0; 330 ccb->ccb_buf.b_proc = curproc; 331 ccb->ccb_wu = wu; 332 333 if (xs->flags & SCSI_DATA_IN) { 334 rt = 0; 335 ragain: 336 /* interleave reads */ 337 x = sd->mds.mdd_raid1.sr1_counter++ % 338 sd->sd_meta->ssdi.ssd_chunk_no; 339 scp = sd->sd_vol.sv_chunks[x]; 340 switch (scp->src_meta.scm_status) { 341 case BIOC_SDONLINE: 342 case BIOC_SDSCRUB: 343 ccb->ccb_buf.b_flags |= B_READ; 344 break; 345 346 case BIOC_SDOFFLINE: 347 case BIOC_SDREBUILD: 348 case BIOC_SDHOTSPARE: 349 if (rt++ < sd->sd_meta->ssdi.ssd_chunk_no) 350 goto ragain; 351 352 /* FALLTHROUGH */ 353 default: 354 /* volume offline */ 355 printf("%s: is offline, can't read\n", 356 DEVNAME(sd->sd_sc)); 357 sr_ccb_put(ccb); 358 goto bad; 359 } 360 } else { 361 /* writes go on all working disks */ 362 x = i; 363 scp = sd->sd_vol.sv_chunks[x]; 364 switch (scp->src_meta.scm_status) { 365 case BIOC_SDONLINE: 366 case BIOC_SDSCRUB: 367 case BIOC_SDREBUILD: 368 ccb->ccb_buf.b_flags |= B_WRITE; 369 break; 370 371 case BIOC_SDHOTSPARE: /* should never happen */ 372 case BIOC_SDOFFLINE: 373 wu->swu_io_count--; 374 sr_ccb_put(ccb); 375 continue; 376 377 default: 378 goto bad; 379 } 380 381 } 382 ccb->ccb_target = x; 383 ccb->ccb_buf.b_dev = sd->sd_vol.sv_chunks[x]->src_dev_mm; 384 ccb->ccb_buf.b_vp = NULL; 385 386 LIST_INIT(&ccb->ccb_buf.b_dep); 387 388 TAILQ_INSERT_TAIL(&wu->swu_ccb, ccb, ccb_link); 389 390 DNPRINTF(SR_D_DIS, "%s: %s: sr_raid1: b_bcount: %d " 391 "b_blkno: %x b_flags 0x%0x b_data %p\n", 392 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 393 ccb->ccb_buf.b_bcount, ccb->ccb_buf.b_blkno, 394 ccb->ccb_buf.b_flags, ccb->ccb_buf.b_data); 395 } 396 397 s = splbio(); 398 399 /* current io failed, restart */ 400 if (wu->swu_state == SR_WU_RESTART) 401 goto start; 402 403 /* deferred io failed, don't restart */ 404 if (wu->swu_state == SR_WU_REQUEUE) 405 goto queued; 406 407 if (sr_check_io_collision(wu)) 408 goto queued; 409 410 start: 411 sr_raid_startwu(wu); 412 queued: 413 splx(s); 414 return (0); 415 bad: 416 /* wu is unwound by sr_wu_put */ 417 return (1); 418 } 419 420 void 421 sr_raid1_intr(struct buf *bp) 422 { 423 struct sr_ccb *ccb = (struct sr_ccb *)bp; 424 struct sr_workunit *wu = ccb->ccb_wu, *wup; 425 struct sr_discipline *sd = wu->swu_dis; 426 struct scsi_xfer *xs = wu->swu_xs; 427 struct sr_softc *sc = sd->sd_sc; 428 int s, pend; 429 430 DNPRINTF(SR_D_INTR, "%s: sr_intr bp %x xs %x\n", 431 DEVNAME(sc), bp, xs); 432 433 DNPRINTF(SR_D_INTR, "%s: sr_intr: b_bcount: %d b_resid: %d" 434 " b_flags: 0x%0x block: %lld target: %d\n", DEVNAME(sc), 435 ccb->ccb_buf.b_bcount, ccb->ccb_buf.b_resid, ccb->ccb_buf.b_flags, 436 ccb->ccb_buf.b_blkno, ccb->ccb_target); 437 438 s = splbio(); 439 440 if (ccb->ccb_buf.b_flags & B_ERROR) { 441 DNPRINTF(SR_D_INTR, "%s: i/o error on block %lld target: %d\n", 442 DEVNAME(sc), ccb->ccb_buf.b_blkno, ccb->ccb_target); 443 wu->swu_ios_failed++; 444 ccb->ccb_state = SR_CCB_FAILED; 445 if (ccb->ccb_target != -1) 446 sd->sd_set_chunk_state(sd, ccb->ccb_target, 447 BIOC_SDOFFLINE); 448 else 449 panic("%s: invalid target on wu: %p", DEVNAME(sc), wu); 450 } else { 451 ccb->ccb_state = SR_CCB_OK; 452 wu->swu_ios_succeeded++; 453 } 454 wu->swu_ios_complete++; 455 456 DNPRINTF(SR_D_INTR, "%s: sr_intr: comp: %d count: %d failed: %d\n", 457 DEVNAME(sc), wu->swu_ios_complete, wu->swu_io_count, 458 wu->swu_ios_failed); 459 460 if (wu->swu_ios_complete >= wu->swu_io_count) { 461 /* if all ios failed, retry reads and give up on writes */ 462 if (wu->swu_ios_failed == wu->swu_ios_complete) { 463 if (xs->flags & SCSI_DATA_IN) { 464 printf("%s: retrying read on block %lld\n", 465 DEVNAME(sc), ccb->ccb_buf.b_blkno); 466 sr_ccb_put(ccb); 467 TAILQ_INIT(&wu->swu_ccb); 468 wu->swu_state = SR_WU_RESTART; 469 if (sd->sd_scsi_rw(wu)) 470 goto bad; 471 else 472 goto retry; 473 } else { 474 printf("%s: permanently fail write on block " 475 "%lld\n", DEVNAME(sc), 476 ccb->ccb_buf.b_blkno); 477 xs->error = XS_DRIVER_STUFFUP; 478 goto bad; 479 } 480 } 481 482 xs->error = XS_NOERROR; 483 xs->resid = 0; 484 xs->flags |= ITSDONE; 485 486 pend = 0; 487 TAILQ_FOREACH(wup, &sd->sd_wu_pendq, swu_link) { 488 if (wu == wup) { 489 /* wu on pendq, remove */ 490 TAILQ_REMOVE(&sd->sd_wu_pendq, wu, swu_link); 491 pend = 1; 492 493 if (wu->swu_collider) { 494 if (wu->swu_ios_failed) 495 /* toss all ccbs and recreate */ 496 sr_raid1_recreate_wu(wu->swu_collider); 497 498 /* restart deferred wu */ 499 wu->swu_collider->swu_state = 500 SR_WU_INPROGRESS; 501 TAILQ_REMOVE(&sd->sd_wu_defq, 502 wu->swu_collider, swu_link); 503 sr_raid_startwu(wu->swu_collider); 504 } 505 break; 506 } 507 } 508 509 if (!pend) 510 printf("%s: wu: %p not on pending queue\n", 511 DEVNAME(sc), wu); 512 513 /* do not change the order of these 2 functions */ 514 sr_wu_put(wu); 515 sr_scsi_done(sd, xs); 516 517 if (sd->sd_sync && sd->sd_wu_pending == 0) 518 wakeup(sd); 519 } 520 521 retry: 522 splx(s); 523 return; 524 bad: 525 xs->error = XS_DRIVER_STUFFUP; 526 xs->flags |= ITSDONE; 527 sr_wu_put(wu); 528 sr_scsi_done(sd, xs); 529 splx(s); 530 } 531 532 void 533 sr_raid1_recreate_wu(struct sr_workunit *wu) 534 { 535 struct sr_discipline *sd = wu->swu_dis; 536 struct sr_workunit *wup = wu; 537 struct sr_ccb *ccb; 538 539 do { 540 DNPRINTF(SR_D_INTR, "%s: sr_raid1_recreate_wu: %p\n", wup); 541 542 /* toss all ccbs */ 543 while ((ccb = TAILQ_FIRST(&wup->swu_ccb)) != NULL) { 544 TAILQ_REMOVE(&wup->swu_ccb, ccb, ccb_link); 545 sr_ccb_put(ccb); 546 } 547 TAILQ_INIT(&wup->swu_ccb); 548 549 /* recreate ccbs */ 550 wup->swu_state = SR_WU_REQUEUE; 551 if (sd->sd_scsi_rw(wup)) 552 panic("could not requeue io"); 553 554 wup = wup->swu_collider; 555 } while (wup); 556 } 557