1 /* $OpenBSD: softraid_raid1.c,v 1.47 2013/03/31 11:12:06 jsing Exp $ */ 2 /* 3 * Copyright (c) 2007 Marco Peereboom <marco@peereboom.us> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18 #include "bio.h" 19 20 #include <sys/param.h> 21 #include <sys/systm.h> 22 #include <sys/buf.h> 23 #include <sys/device.h> 24 #include <sys/ioctl.h> 25 #include <sys/proc.h> 26 #include <sys/malloc.h> 27 #include <sys/kernel.h> 28 #include <sys/disk.h> 29 #include <sys/rwlock.h> 30 #include <sys/queue.h> 31 #include <sys/fcntl.h> 32 #include <sys/disklabel.h> 33 #include <sys/mount.h> 34 #include <sys/sensors.h> 35 #include <sys/stat.h> 36 #include <sys/conf.h> 37 #include <sys/uio.h> 38 39 #include <scsi/scsi_all.h> 40 #include <scsi/scsiconf.h> 41 #include <scsi/scsi_disk.h> 42 43 #include <dev/softraidvar.h> 44 #include <dev/rndvar.h> 45 46 /* RAID 1 functions. */ 47 int sr_raid1_create(struct sr_discipline *, struct bioc_createraid *, 48 int, int64_t); 49 int sr_raid1_assemble(struct sr_discipline *, struct bioc_createraid *, 50 int, void *); 51 int sr_raid1_rw(struct sr_workunit *); 52 void sr_raid1_intr(struct buf *); 53 void sr_raid1_set_chunk_state(struct sr_discipline *, int, int); 54 void sr_raid1_set_vol_state(struct sr_discipline *); 55 56 /* Discipline initialisation. */ 57 void 58 sr_raid1_discipline_init(struct sr_discipline *sd) 59 { 60 /* Fill out discipline members. */ 61 sd->sd_type = SR_MD_RAID1; 62 strlcpy(sd->sd_name, "RAID 1", sizeof(sd->sd_name)); 63 sd->sd_capabilities = SR_CAP_SYSTEM_DISK | SR_CAP_AUTO_ASSEMBLE | 64 SR_CAP_REBUILD | SR_CAP_REDUNDANT; 65 sd->sd_max_wu = SR_RAID1_NOWU; 66 67 /* Setup discipline specific function pointers. */ 68 sd->sd_assemble = sr_raid1_assemble; 69 sd->sd_create = sr_raid1_create; 70 sd->sd_scsi_rw = sr_raid1_rw; 71 sd->sd_scsi_intr = sr_raid1_intr; 72 sd->sd_set_chunk_state = sr_raid1_set_chunk_state; 73 sd->sd_set_vol_state = sr_raid1_set_vol_state; 74 } 75 76 int 77 sr_raid1_create(struct sr_discipline *sd, struct bioc_createraid *bc, 78 int no_chunk, int64_t coerced_size) 79 { 80 81 if (no_chunk < 2) { 82 sr_error(sd->sd_sc, "RAID 1 requires two or more chunks"); 83 return EINVAL; 84 } 85 86 sd->sd_meta->ssdi.ssd_size = coerced_size; 87 88 sd->sd_max_ccb_per_wu = no_chunk; 89 90 return 0; 91 } 92 93 int 94 sr_raid1_assemble(struct sr_discipline *sd, struct bioc_createraid *bc, 95 int no_chunk, void *data) 96 { 97 98 sd->sd_max_ccb_per_wu = sd->sd_meta->ssdi.ssd_chunk_no; 99 100 return 0; 101 } 102 103 void 104 sr_raid1_set_chunk_state(struct sr_discipline *sd, int c, int new_state) 105 { 106 int old_state, s; 107 108 DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n", 109 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 110 sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state); 111 112 /* ok to go to splbio since this only happens in error path */ 113 s = splbio(); 114 old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status; 115 116 /* multiple IOs to the same chunk that fail will come through here */ 117 if (old_state == new_state) 118 goto done; 119 120 switch (old_state) { 121 case BIOC_SDONLINE: 122 switch (new_state) { 123 case BIOC_SDOFFLINE: 124 case BIOC_SDSCRUB: 125 break; 126 default: 127 goto die; 128 } 129 break; 130 131 case BIOC_SDOFFLINE: 132 switch (new_state) { 133 case BIOC_SDREBUILD: 134 case BIOC_SDHOTSPARE: 135 break; 136 default: 137 goto die; 138 } 139 break; 140 141 case BIOC_SDSCRUB: 142 if (new_state == BIOC_SDONLINE) { 143 ; 144 } else 145 goto die; 146 break; 147 148 case BIOC_SDREBUILD: 149 switch (new_state) { 150 case BIOC_SDONLINE: 151 break; 152 case BIOC_SDOFFLINE: 153 /* Abort rebuild since the rebuild chunk disappeared. */ 154 sd->sd_reb_abort = 1; 155 break; 156 default: 157 goto die; 158 } 159 break; 160 161 case BIOC_SDHOTSPARE: 162 switch (new_state) { 163 case BIOC_SDOFFLINE: 164 case BIOC_SDREBUILD: 165 break; 166 default: 167 goto die; 168 } 169 break; 170 171 default: 172 die: 173 splx(s); /* XXX */ 174 panic("%s: %s: %s: invalid chunk state transition " 175 "%d -> %d\n", DEVNAME(sd->sd_sc), 176 sd->sd_meta->ssd_devname, 177 sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, 178 old_state, new_state); 179 /* NOTREACHED */ 180 } 181 182 sd->sd_vol.sv_chunks[c]->src_meta.scm_status = new_state; 183 sd->sd_set_vol_state(sd); 184 185 sd->sd_must_flush = 1; 186 workq_add_task(NULL, 0, sr_meta_save_callback, sd, NULL); 187 done: 188 splx(s); 189 } 190 191 void 192 sr_raid1_set_vol_state(struct sr_discipline *sd) 193 { 194 int states[SR_MAX_STATES]; 195 int new_state, i, s, nd; 196 int old_state = sd->sd_vol_status; 197 198 DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n", 199 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname); 200 201 nd = sd->sd_meta->ssdi.ssd_chunk_no; 202 203 #ifdef SR_DEBUG 204 for (i = 0; i < nd; i++) 205 DNPRINTF(SR_D_STATE, "%s: chunk %d status = %u\n", 206 DEVNAME(sd->sd_sc), i, 207 sd->sd_vol.sv_chunks[i]->src_meta.scm_status); 208 #endif 209 210 for (i = 0; i < SR_MAX_STATES; i++) 211 states[i] = 0; 212 213 for (i = 0; i < nd; i++) { 214 s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status; 215 if (s >= SR_MAX_STATES) 216 panic("%s: %s: %s: invalid chunk state", 217 DEVNAME(sd->sd_sc), 218 sd->sd_meta->ssd_devname, 219 sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname); 220 states[s]++; 221 } 222 223 if (states[BIOC_SDONLINE] == nd) 224 new_state = BIOC_SVONLINE; 225 else if (states[BIOC_SDONLINE] == 0) 226 new_state = BIOC_SVOFFLINE; 227 else if (states[BIOC_SDSCRUB] != 0) 228 new_state = BIOC_SVSCRUB; 229 else if (states[BIOC_SDREBUILD] != 0) 230 new_state = BIOC_SVREBUILD; 231 else if (states[BIOC_SDOFFLINE] != 0) 232 new_state = BIOC_SVDEGRADED; 233 else { 234 DNPRINTF(SR_D_STATE, "%s: invalid volume state, old state " 235 "was %d\n", DEVNAME(sd->sd_sc), old_state); 236 panic("invalid volume state"); 237 } 238 239 DNPRINTF(SR_D_STATE, "%s: %s: sr_raid1_set_vol_state %d -> %d\n", 240 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 241 old_state, new_state); 242 243 switch (old_state) { 244 case BIOC_SVONLINE: 245 switch (new_state) { 246 case BIOC_SVONLINE: /* can go to same state */ 247 case BIOC_SVOFFLINE: 248 case BIOC_SVDEGRADED: 249 case BIOC_SVREBUILD: /* happens on boot */ 250 break; 251 default: 252 goto die; 253 } 254 break; 255 256 case BIOC_SVOFFLINE: 257 /* XXX this might be a little too much */ 258 goto die; 259 260 case BIOC_SVSCRUB: 261 switch (new_state) { 262 case BIOC_SVONLINE: 263 case BIOC_SVOFFLINE: 264 case BIOC_SVDEGRADED: 265 case BIOC_SVSCRUB: /* can go to same state */ 266 break; 267 default: 268 goto die; 269 } 270 break; 271 272 case BIOC_SVBUILDING: 273 switch (new_state) { 274 case BIOC_SVONLINE: 275 case BIOC_SVOFFLINE: 276 case BIOC_SVBUILDING: /* can go to the same state */ 277 break; 278 default: 279 goto die; 280 } 281 break; 282 283 case BIOC_SVREBUILD: 284 switch (new_state) { 285 case BIOC_SVONLINE: 286 case BIOC_SVOFFLINE: 287 case BIOC_SVDEGRADED: 288 case BIOC_SVREBUILD: /* can go to the same state */ 289 break; 290 default: 291 goto die; 292 } 293 break; 294 295 case BIOC_SVDEGRADED: 296 switch (new_state) { 297 case BIOC_SVOFFLINE: 298 case BIOC_SVREBUILD: 299 case BIOC_SVDEGRADED: /* can go to the same state */ 300 break; 301 default: 302 goto die; 303 } 304 break; 305 306 default: 307 die: 308 panic("%s: %s: invalid volume state transition " 309 "%d -> %d\n", DEVNAME(sd->sd_sc), 310 sd->sd_meta->ssd_devname, 311 old_state, new_state); 312 /* NOTREACHED */ 313 } 314 315 sd->sd_vol_status = new_state; 316 317 /* If we have just become degraded, look for a hotspare. */ 318 if (new_state == BIOC_SVDEGRADED) 319 workq_add_task(NULL, 0, sr_hotspare_rebuild_callback, sd, NULL); 320 } 321 322 int 323 sr_raid1_rw(struct sr_workunit *wu) 324 { 325 struct sr_discipline *sd = wu->swu_dis; 326 struct scsi_xfer *xs = wu->swu_xs; 327 struct sr_ccb *ccb; 328 struct sr_chunk *scp; 329 int ios, chunk, i, s, rt; 330 daddr64_t blk; 331 332 /* blk and scsi error will be handled by sr_validate_io */ 333 if (sr_validate_io(wu, &blk, "sr_raid1_rw")) 334 goto bad; 335 336 /* calculate physical block */ 337 blk += sd->sd_meta->ssd_data_offset; 338 339 if (xs->flags & SCSI_DATA_IN) 340 ios = 1; 341 else 342 ios = sd->sd_meta->ssdi.ssd_chunk_no; 343 344 for (i = 0; i < ios; i++) { 345 if (xs->flags & SCSI_DATA_IN) { 346 rt = 0; 347 ragain: 348 /* interleave reads */ 349 chunk = sd->mds.mdd_raid1.sr1_counter++ % 350 sd->sd_meta->ssdi.ssd_chunk_no; 351 scp = sd->sd_vol.sv_chunks[chunk]; 352 switch (scp->src_meta.scm_status) { 353 case BIOC_SDONLINE: 354 case BIOC_SDSCRUB: 355 break; 356 357 case BIOC_SDOFFLINE: 358 case BIOC_SDREBUILD: 359 case BIOC_SDHOTSPARE: 360 if (rt++ < sd->sd_meta->ssdi.ssd_chunk_no) 361 goto ragain; 362 363 /* FALLTHROUGH */ 364 default: 365 /* volume offline */ 366 printf("%s: is offline, cannot read\n", 367 DEVNAME(sd->sd_sc)); 368 goto bad; 369 } 370 } else { 371 /* writes go on all working disks */ 372 chunk = i; 373 scp = sd->sd_vol.sv_chunks[chunk]; 374 switch (scp->src_meta.scm_status) { 375 case BIOC_SDONLINE: 376 case BIOC_SDSCRUB: 377 case BIOC_SDREBUILD: 378 break; 379 380 case BIOC_SDHOTSPARE: /* should never happen */ 381 case BIOC_SDOFFLINE: 382 continue; 383 384 default: 385 goto bad; 386 } 387 } 388 389 ccb = sr_ccb_rw(sd, chunk, blk, xs->datalen, xs->data, 390 xs->flags, 0); 391 if (!ccb) { 392 /* should never happen but handle more gracefully */ 393 printf("%s: %s: too many ccbs queued\n", 394 DEVNAME(sd->sd_sc), 395 sd->sd_meta->ssd_devname); 396 goto bad; 397 } 398 sr_wu_enqueue_ccb(wu, ccb); 399 } 400 401 s = splbio(); 402 403 /* rebuild io, let rebuild routine deal with it */ 404 if (wu->swu_flags & SR_WUF_REBUILD) 405 goto queued; 406 407 /* current io failed, restart */ 408 if (wu->swu_state == SR_WU_RESTART) 409 goto start; 410 411 /* deferred io failed, don't restart */ 412 if (wu->swu_state == SR_WU_REQUEUE) 413 goto queued; 414 415 if (sr_check_io_collision(wu)) 416 goto queued; 417 418 start: 419 sr_raid_startwu(wu); 420 queued: 421 splx(s); 422 return (0); 423 bad: 424 /* wu is unwound by sr_wu_put */ 425 return (1); 426 } 427 428 void 429 sr_raid1_intr(struct buf *bp) 430 { 431 struct sr_ccb *ccb = (struct sr_ccb *)bp; 432 struct sr_workunit *wu = ccb->ccb_wu, *wup; 433 struct sr_discipline *sd = wu->swu_dis; 434 struct scsi_xfer *xs = wu->swu_xs; 435 struct sr_softc *sc = sd->sd_sc; 436 int s; 437 438 DNPRINTF(SR_D_INTR, "%s: sr_intr bp %x xs %x\n", 439 DEVNAME(sc), bp, xs); 440 441 s = splbio(); 442 443 sr_ccb_done(ccb); 444 445 DNPRINTF(SR_D_INTR, "%s: sr_intr: comp: %d count: %d failed: %d\n", 446 DEVNAME(sc), wu->swu_ios_complete, wu->swu_io_count, 447 wu->swu_ios_failed); 448 449 if (wu->swu_ios_complete < wu->swu_io_count) 450 goto done; 451 452 xs->error = XS_NOERROR; 453 454 /* if all ios failed, retry reads and give up on writes */ 455 if (wu->swu_ios_failed == wu->swu_ios_complete) { 456 if (xs->flags & SCSI_DATA_IN) { 457 printf("%s: retrying read on block %lld\n", 458 DEVNAME(sc), ccb->ccb_buf.b_blkno); 459 if (wu->swu_cb_active == 1) 460 panic("%s: sr_raid1_intr_cb", 461 DEVNAME(sd->sd_sc)); 462 sr_wu_release_ccbs(wu); 463 wu->swu_state = SR_WU_RESTART; 464 if (sd->sd_scsi_rw(wu) == 0) 465 goto done; 466 xs->error = XS_DRIVER_STUFFUP; 467 } else { 468 printf("%s: permanently failing write on block %lld\n", 469 DEVNAME(sc), ccb->ccb_buf.b_blkno); 470 xs->error = XS_DRIVER_STUFFUP; 471 } 472 } 473 474 TAILQ_FOREACH(wup, &sd->sd_wu_pendq, swu_link) 475 if (wu == wup) 476 break; 477 478 if (wup == NULL) 479 panic("%s: wu %p not on pending queue", 480 DEVNAME(sd->sd_sc), wu); 481 482 /* wu on pendq, remove */ 483 TAILQ_REMOVE(&sd->sd_wu_pendq, wu, swu_link); 484 485 if (wu->swu_collider) { 486 if (wu->swu_ios_failed) 487 sr_raid_recreate_wu(wu->swu_collider); 488 489 /* XXX Should the collider be failed if this xs failed? */ 490 /* restart deferred wu */ 491 wu->swu_collider->swu_state = SR_WU_INPROGRESS; 492 TAILQ_REMOVE(&sd->sd_wu_defq, wu->swu_collider, swu_link); 493 sr_raid_startwu(wu->swu_collider); 494 } 495 496 if (wu->swu_flags & SR_WUF_REBUILD) 497 wu->swu_flags |= SR_WUF_REBUILDIOCOMP; 498 if (wu->swu_flags & SR_WUF_WAKEUP) 499 wakeup(wu); 500 if (!(wu->swu_flags & SR_WUF_REBUILD)) 501 sr_scsi_done(sd, xs); 502 503 done: 504 splx(s); 505 } 506