1 /* $OpenBSD: softraid_raid6.c,v 1.62 2014/07/12 18:48:51 tedu Exp $ */ 2 /* 3 * Copyright (c) 2009 Marco Peereboom <marco@peereboom.us> 4 * Copyright (c) 2009 Jordan Hargrave <jordan@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include "bio.h" 20 21 #include <sys/param.h> 22 #include <sys/systm.h> 23 #include <sys/buf.h> 24 #include <sys/device.h> 25 #include <sys/ioctl.h> 26 #include <sys/proc.h> 27 #include <sys/malloc.h> 28 #include <sys/kernel.h> 29 #include <sys/disk.h> 30 #include <sys/rwlock.h> 31 #include <sys/queue.h> 32 #include <sys/fcntl.h> 33 #include <sys/disklabel.h> 34 #include <sys/mount.h> 35 #include <sys/sensors.h> 36 #include <sys/stat.h> 37 #include <sys/task.h> 38 #include <sys/conf.h> 39 #include <sys/uio.h> 40 41 #include <scsi/scsi_all.h> 42 #include <scsi/scsiconf.h> 43 #include <scsi/scsi_disk.h> 44 45 #include <dev/softraidvar.h> 46 #include <dev/rndvar.h> 47 48 uint8_t *gf_map[256]; 49 uint8_t gf_pow[768]; 50 int gf_log[256]; 51 52 /* RAID 6 functions. */ 53 int sr_raid6_create(struct sr_discipline *, struct bioc_createraid *, 54 int, int64_t); 55 int sr_raid6_assemble(struct sr_discipline *, struct bioc_createraid *, 56 int, void *); 57 int sr_raid6_init(struct sr_discipline *); 58 int sr_raid6_rw(struct sr_workunit *); 59 int sr_raid6_openings(struct sr_discipline *); 60 void sr_raid6_intr(struct buf *); 61 int sr_raid6_wu_done(struct sr_workunit *); 62 void sr_raid6_set_chunk_state(struct sr_discipline *, int, int); 63 void sr_raid6_set_vol_state(struct sr_discipline *); 64 65 void sr_raid6_xorp(void *, void *, int); 66 void sr_raid6_xorq(void *, void *, int, int); 67 int sr_raid6_addio(struct sr_workunit *wu, int, daddr_t, daddr_t, 68 void *, int, int, void *, void *, int); 69 void sr_raid6_scrub(struct sr_discipline *); 70 int sr_failio(struct sr_workunit *); 71 72 void gf_init(void); 73 uint8_t gf_inv(uint8_t); 74 int gf_premul(uint8_t); 75 uint8_t gf_mul(uint8_t, uint8_t); 76 77 #define SR_NOFAIL 0x00 78 #define SR_FAILX (1L << 0) 79 #define SR_FAILY (1L << 1) 80 #define SR_FAILP (1L << 2) 81 #define SR_FAILQ (1L << 3) 82 83 struct sr_raid6_opaque { 84 int gn; 85 void *pbuf; 86 void *qbuf; 87 }; 88 89 /* discipline initialisation. */ 90 void 91 sr_raid6_discipline_init(struct sr_discipline *sd) 92 { 93 /* Initialize GF256 tables. */ 94 gf_init(); 95 96 /* Fill out discipline members. */ 97 sd->sd_type = SR_MD_RAID6; 98 strlcpy(sd->sd_name, "RAID 6", sizeof(sd->sd_name)); 99 sd->sd_capabilities = SR_CAP_SYSTEM_DISK | SR_CAP_AUTO_ASSEMBLE | 100 SR_CAP_REDUNDANT; 101 sd->sd_max_wu = SR_RAID6_NOWU; 102 103 /* Setup discipline specific function pointers. */ 104 sd->sd_assemble = sr_raid6_assemble; 105 sd->sd_create = sr_raid6_create; 106 sd->sd_openings = sr_raid6_openings; 107 sd->sd_scsi_rw = sr_raid6_rw; 108 sd->sd_scsi_intr = sr_raid6_intr; 109 sd->sd_scsi_wu_done = sr_raid6_wu_done; 110 sd->sd_set_chunk_state = sr_raid6_set_chunk_state; 111 sd->sd_set_vol_state = sr_raid6_set_vol_state; 112 } 113 114 int 115 sr_raid6_create(struct sr_discipline *sd, struct bioc_createraid *bc, 116 int no_chunk, int64_t coerced_size) 117 { 118 if (no_chunk < 4) { 119 sr_error(sd->sd_sc, "%s requires four or more chunks", 120 sd->sd_name); 121 return EINVAL; 122 } 123 124 /* 125 * XXX add variable strip size later even though MAXPHYS is really 126 * the clever value, users like * to tinker with that type of stuff. 127 */ 128 sd->sd_meta->ssdi.ssd_strip_size = MAXPHYS; 129 sd->sd_meta->ssdi.ssd_size = (coerced_size & 130 ~(((u_int64_t)sd->sd_meta->ssdi.ssd_strip_size >> 131 DEV_BSHIFT) - 1)) * (no_chunk - 2); 132 133 return sr_raid6_init(sd); 134 } 135 136 int 137 sr_raid6_assemble(struct sr_discipline *sd, struct bioc_createraid *bc, 138 int no_chunk, void *data) 139 { 140 return sr_raid6_init(sd); 141 } 142 143 int 144 sr_raid6_init(struct sr_discipline *sd) 145 { 146 /* Initialise runtime values. */ 147 sd->mds.mdd_raid6.sr6_strip_bits = 148 sr_validate_stripsize(sd->sd_meta->ssdi.ssd_strip_size); 149 if (sd->mds.mdd_raid6.sr6_strip_bits == -1) { 150 sr_error(sd->sd_sc, "invalid strip size"); 151 return EINVAL; 152 } 153 154 /* only if stripsize <= MAXPHYS */ 155 sd->sd_max_ccb_per_wu = max(6, 2 * sd->sd_meta->ssdi.ssd_chunk_no); 156 157 return 0; 158 } 159 160 int 161 sr_raid6_openings(struct sr_discipline *sd) 162 { 163 return (sd->sd_max_wu >> 1); /* 2 wu's per IO */ 164 } 165 166 void 167 sr_raid6_set_chunk_state(struct sr_discipline *sd, int c, int new_state) 168 { 169 int old_state, s; 170 171 /* XXX this is for RAID 0 */ 172 DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n", 173 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 174 sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state); 175 176 /* ok to go to splbio since this only happens in error path */ 177 s = splbio(); 178 old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status; 179 180 /* multiple IOs to the same chunk that fail will come through here */ 181 if (old_state == new_state) 182 goto done; 183 184 switch (old_state) { 185 case BIOC_SDONLINE: 186 switch (new_state) { 187 case BIOC_SDOFFLINE: 188 case BIOC_SDSCRUB: 189 break; 190 default: 191 goto die; 192 } 193 break; 194 195 case BIOC_SDOFFLINE: 196 if (new_state == BIOC_SDREBUILD) { 197 ; 198 } else 199 goto die; 200 break; 201 202 case BIOC_SDSCRUB: 203 switch (new_state) { 204 case BIOC_SDONLINE: 205 case BIOC_SDOFFLINE: 206 break; 207 default: 208 goto die; 209 } 210 break; 211 212 case BIOC_SDREBUILD: 213 switch (new_state) { 214 case BIOC_SDONLINE: 215 case BIOC_SDOFFLINE: 216 break; 217 default: 218 goto die; 219 } 220 break; 221 222 default: 223 die: 224 splx(s); /* XXX */ 225 panic("%s: %s: %s: invalid chunk state transition " 226 "%d -> %d", DEVNAME(sd->sd_sc), 227 sd->sd_meta->ssd_devname, 228 sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, 229 old_state, new_state); 230 /* NOTREACHED */ 231 } 232 233 sd->sd_vol.sv_chunks[c]->src_meta.scm_status = new_state; 234 sd->sd_set_vol_state(sd); 235 236 sd->sd_must_flush = 1; 237 task_add(systq, &sd->sd_meta_save_task); 238 done: 239 splx(s); 240 } 241 242 void 243 sr_raid6_set_vol_state(struct sr_discipline *sd) 244 { 245 int states[SR_MAX_STATES]; 246 int new_state, i, s, nd; 247 int old_state = sd->sd_vol_status; 248 249 /* XXX this is for RAID 0 */ 250 251 DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n", 252 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname); 253 254 nd = sd->sd_meta->ssdi.ssd_chunk_no; 255 256 for (i = 0; i < SR_MAX_STATES; i++) 257 states[i] = 0; 258 259 for (i = 0; i < nd; i++) { 260 s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status; 261 if (s >= SR_MAX_STATES) 262 panic("%s: %s: %s: invalid chunk state", 263 DEVNAME(sd->sd_sc), 264 sd->sd_meta->ssd_devname, 265 sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname); 266 states[s]++; 267 } 268 269 if (states[BIOC_SDONLINE] == nd) 270 new_state = BIOC_SVONLINE; 271 else if (states[BIOC_SDONLINE] < nd - 2) 272 new_state = BIOC_SVOFFLINE; 273 else if (states[BIOC_SDSCRUB] != 0) 274 new_state = BIOC_SVSCRUB; 275 else if (states[BIOC_SDREBUILD] != 0) 276 new_state = BIOC_SVREBUILD; 277 else if (states[BIOC_SDONLINE] < nd) 278 new_state = BIOC_SVDEGRADED; 279 else { 280 printf("old_state = %d, ", old_state); 281 for (i = 0; i < nd; i++) 282 printf("%d = %d, ", i, 283 sd->sd_vol.sv_chunks[i]->src_meta.scm_status); 284 panic("invalid new_state"); 285 } 286 287 DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state %d -> %d\n", 288 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 289 old_state, new_state); 290 291 switch (old_state) { 292 case BIOC_SVONLINE: 293 switch (new_state) { 294 case BIOC_SVONLINE: /* can go to same state */ 295 case BIOC_SVOFFLINE: 296 case BIOC_SVDEGRADED: 297 case BIOC_SVREBUILD: /* happens on boot */ 298 break; 299 default: 300 goto die; 301 } 302 break; 303 304 case BIOC_SVOFFLINE: 305 /* XXX this might be a little too much */ 306 goto die; 307 308 case BIOC_SVDEGRADED: 309 switch (new_state) { 310 case BIOC_SVOFFLINE: 311 case BIOC_SVREBUILD: 312 case BIOC_SVDEGRADED: /* can go to the same state */ 313 break; 314 default: 315 goto die; 316 } 317 break; 318 319 case BIOC_SVBUILDING: 320 switch (new_state) { 321 case BIOC_SVONLINE: 322 case BIOC_SVOFFLINE: 323 case BIOC_SVBUILDING: /* can go to the same state */ 324 break; 325 default: 326 goto die; 327 } 328 break; 329 330 case BIOC_SVSCRUB: 331 switch (new_state) { 332 case BIOC_SVONLINE: 333 case BIOC_SVOFFLINE: 334 case BIOC_SVDEGRADED: 335 case BIOC_SVSCRUB: /* can go to same state */ 336 break; 337 default: 338 goto die; 339 } 340 break; 341 342 case BIOC_SVREBUILD: 343 switch (new_state) { 344 case BIOC_SVONLINE: 345 case BIOC_SVOFFLINE: 346 case BIOC_SVDEGRADED: 347 case BIOC_SVREBUILD: /* can go to the same state */ 348 break; 349 default: 350 goto die; 351 } 352 break; 353 354 default: 355 die: 356 panic("%s: %s: invalid volume state transition %d -> %d", 357 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 358 old_state, new_state); 359 /* NOTREACHED */ 360 } 361 362 sd->sd_vol_status = new_state; 363 } 364 365 /* modes: 366 * readq: sr_raid6_addio(i, lba, length, NULL, SCSI_DATA_IN, 367 * 0, qbuf, NULL, 0); 368 * readp: sr_raid6_addio(i, lba, length, NULL, SCSI_DATA_IN, 369 * 0, pbuf, NULL, 0); 370 * readx: sr_raid6_addio(i, lba, length, NULL, SCSI_DATA_IN, 371 * 0, pbuf, qbuf, gf_pow[i]); 372 */ 373 374 int 375 sr_raid6_rw(struct sr_workunit *wu) 376 { 377 struct sr_workunit *wu_r = NULL; 378 struct sr_discipline *sd = wu->swu_dis; 379 struct scsi_xfer *xs = wu->swu_xs; 380 struct sr_chunk *scp; 381 int s, fail, i, gxinv, pxinv; 382 daddr_t blk, lba; 383 int64_t chunk_offs, lbaoffs, phys_offs, strip_offs; 384 int64_t strip_no, strip_size, strip_bits; 385 int64_t fchunk, no_chunk, chunk, qchunk, pchunk; 386 int64_t length, datalen, row_size; 387 void *pbuf, *data, *qbuf; 388 389 /* blk and scsi error will be handled by sr_validate_io */ 390 if (sr_validate_io(wu, &blk, "sr_raid6_rw")) 391 goto bad; 392 393 strip_size = sd->sd_meta->ssdi.ssd_strip_size; 394 strip_bits = sd->mds.mdd_raid6.sr6_strip_bits; 395 no_chunk = sd->sd_meta->ssdi.ssd_chunk_no - 2; 396 row_size = (no_chunk << strip_bits) >> DEV_BSHIFT; 397 398 data = xs->data; 399 datalen = xs->datalen; 400 lbaoffs = blk << DEV_BSHIFT; 401 402 if (xs->flags & SCSI_DATA_OUT) { 403 if ((wu_r = sr_scsi_wu_get(sd, SCSI_NOSLEEP)) == NULL){ 404 printf("%s: can't get wu_r", DEVNAME(sd->sd_sc)); 405 goto bad; 406 } 407 wu_r->swu_state = SR_WU_INPROGRESS; 408 wu_r->swu_flags |= SR_WUF_DISCIPLINE; 409 } 410 411 wu->swu_blk_start = 0; 412 while (datalen != 0) { 413 strip_no = lbaoffs >> strip_bits; 414 strip_offs = lbaoffs & (strip_size - 1); 415 chunk_offs = (strip_no / no_chunk) << strip_bits; 416 phys_offs = chunk_offs + strip_offs + 417 (sd->sd_meta->ssd_data_offset << DEV_BSHIFT); 418 419 /* get size remaining in this stripe */ 420 length = MIN(strip_size - strip_offs, datalen); 421 422 /* map disk offset to parity/data drive */ 423 chunk = strip_no % no_chunk; 424 425 qchunk = (no_chunk + 1) - ((strip_no / no_chunk) % (no_chunk+2)); 426 if (qchunk == 0) 427 pchunk = no_chunk + 1; 428 else 429 pchunk = qchunk - 1; 430 if (chunk >= pchunk) 431 chunk++; 432 if (chunk >= qchunk) 433 chunk++; 434 435 lba = phys_offs >> DEV_BSHIFT; 436 437 /* XXX big hammer.. exclude I/O from entire stripe */ 438 if (wu->swu_blk_start == 0) 439 wu->swu_blk_start = (strip_no / no_chunk) * row_size; 440 wu->swu_blk_end = (strip_no / no_chunk) * row_size + (row_size - 1); 441 442 fail = 0; 443 fchunk = -1; 444 445 /* Get disk-fail flags */ 446 for (i=0; i< no_chunk+2; i++) { 447 scp = sd->sd_vol.sv_chunks[i]; 448 switch (scp->src_meta.scm_status) { 449 case BIOC_SDOFFLINE: 450 case BIOC_SDREBUILD: 451 case BIOC_SDHOTSPARE: 452 if (i == qchunk) 453 fail |= SR_FAILQ; 454 else if (i == pchunk) 455 fail |= SR_FAILP; 456 else if (i == chunk) 457 fail |= SR_FAILX; 458 else { 459 /* dual data-disk failure */ 460 fail |= SR_FAILY; 461 fchunk = i; 462 } 463 break; 464 } 465 } 466 if (xs->flags & SCSI_DATA_IN) { 467 if (!(fail & SR_FAILX)) { 468 /* drive is good. issue single read request */ 469 if (sr_raid6_addio(wu, chunk, lba, length, 470 data, xs->flags, 0, NULL, NULL, 0)) 471 goto bad; 472 } else if (fail & SR_FAILP) { 473 /* Dx, P failed */ 474 printf("Disk %llx offline, " 475 "regenerating Dx+P\n", chunk); 476 477 gxinv = gf_inv(gf_pow[chunk]); 478 479 /* Calculate: Dx = (Q^Dz*gz)*inv(gx) */ 480 memset(data, 0, length); 481 if (sr_raid6_addio(wu, qchunk, lba, length, 482 NULL, SCSI_DATA_IN, 0, NULL, data, gxinv)) 483 goto bad; 484 485 /* Read Dz * gz * inv(gx) */ 486 for (i = 0; i < no_chunk+2; i++) { 487 if (i == qchunk || i == pchunk || i == chunk) 488 continue; 489 490 if (sr_raid6_addio(wu, i, lba, length, 491 NULL, SCSI_DATA_IN, 0, NULL, data, 492 gf_mul(gf_pow[i], gxinv))) 493 goto bad; 494 } 495 496 /* data will contain correct value on completion */ 497 } else if (fail & SR_FAILY) { 498 /* Dx, Dy failed */ 499 printf("Disk %llx & %llx offline, " 500 "regenerating Dx+Dy\n", chunk, fchunk); 501 502 gxinv = gf_inv(gf_pow[chunk] ^ gf_pow[fchunk]); 503 pxinv = gf_mul(gf_pow[fchunk], gxinv); 504 505 /* read Q * inv(gx + gy) */ 506 memset(data, 0, length); 507 if (sr_raid6_addio(wu, qchunk, lba, length, 508 NULL, SCSI_DATA_IN, 0, NULL, data, gxinv)) 509 goto bad; 510 511 /* read P * gy * inv(gx + gy) */ 512 if (sr_raid6_addio(wu, pchunk, lba, length, 513 NULL, SCSI_DATA_IN, 0, NULL, data, pxinv)) 514 goto bad; 515 516 /* Calculate: Dx*gx^Dy*gy = Q^(Dz*gz) ; Dx^Dy = P^Dz 517 * Q: sr_raid6_xorp(qbuf, --, length); 518 * P: sr_raid6_xorp(pbuf, --, length); 519 * Dz: sr_raid6_xorp(pbuf, --, length); 520 * sr_raid6_xorq(qbuf, --, length, gf_pow[i]); 521 */ 522 for (i = 0; i < no_chunk+2; i++) { 523 if (i == qchunk || i == pchunk || 524 i == chunk || i == fchunk) 525 continue; 526 527 /* read Dz * (gz + gy) * inv(gx + gy) */ 528 if (sr_raid6_addio(wu, i, lba, length, 529 NULL, SCSI_DATA_IN, 0, NULL, data, 530 pxinv ^ gf_mul(gf_pow[i], gxinv))) 531 goto bad; 532 } 533 } else { 534 /* Two cases: single disk (Dx) or (Dx+Q) 535 * Dx = Dz ^ P (same as RAID5) 536 */ 537 printf("Disk %llx offline, " 538 "regenerating Dx%s\n", chunk, 539 fail & SR_FAILQ ? "+Q" : " single"); 540 541 /* Calculate: Dx = P^Dz 542 * P: sr_raid6_xorp(data, ---, length); 543 * Dz: sr_raid6_xorp(data, ---, length); 544 */ 545 memset(data, 0, length); 546 for (i = 0; i < no_chunk+2; i++) { 547 if (i != chunk && i != qchunk) { 548 /* Read Dz */ 549 if (sr_raid6_addio(wu, i, lba, 550 length, NULL, SCSI_DATA_IN, 551 0, data, NULL, 0)) 552 goto bad; 553 } 554 } 555 556 /* data will contain correct value on completion */ 557 } 558 } else { 559 /* XXX handle writes to failed/offline disk? */ 560 if (fail & (SR_FAILX|SR_FAILQ|SR_FAILP)) 561 goto bad; 562 563 /* 564 * initialize pbuf with contents of new data to be 565 * written. This will be XORed with old data and old 566 * parity in the intr routine. The result in pbuf 567 * is the new parity data. 568 */ 569 qbuf = sr_block_get(sd, length); 570 if (qbuf == NULL) 571 goto bad; 572 573 pbuf = sr_block_get(sd, length); 574 if (pbuf == NULL) 575 goto bad; 576 577 /* Calculate P = Dn; Q = gn * Dn */ 578 if (gf_premul(gf_pow[chunk])) 579 goto bad; 580 sr_raid6_xorp(pbuf, data, length); 581 sr_raid6_xorq(qbuf, data, length, gf_pow[chunk]); 582 583 /* Read old data: P ^= Dn' ; Q ^= (gn * Dn') */ 584 if (sr_raid6_addio(wu_r, chunk, lba, length, NULL, 585 SCSI_DATA_IN, 0, pbuf, qbuf, gf_pow[chunk])) 586 goto bad; 587 588 /* Read old xor-parity: P ^= P' */ 589 if (sr_raid6_addio(wu_r, pchunk, lba, length, NULL, 590 SCSI_DATA_IN, 0, pbuf, NULL, 0)) 591 goto bad; 592 593 /* Read old q-parity: Q ^= Q' */ 594 if (sr_raid6_addio(wu_r, qchunk, lba, length, NULL, 595 SCSI_DATA_IN, 0, qbuf, NULL, 0)) 596 goto bad; 597 598 /* write new data */ 599 if (sr_raid6_addio(wu, chunk, lba, length, data, 600 xs->flags, 0, NULL, NULL, 0)) 601 goto bad; 602 603 /* write new xor-parity */ 604 if (sr_raid6_addio(wu, pchunk, lba, length, pbuf, 605 xs->flags, SR_CCBF_FREEBUF, NULL, NULL, 0)) 606 goto bad; 607 608 /* write new q-parity */ 609 if (sr_raid6_addio(wu, qchunk, lba, length, qbuf, 610 xs->flags, SR_CCBF_FREEBUF, NULL, NULL, 0)) 611 goto bad; 612 } 613 614 /* advance to next block */ 615 lbaoffs += length; 616 datalen -= length; 617 data += length; 618 } 619 620 s = splbio(); 621 if (wu_r) { 622 /* collide write request with reads */ 623 wu_r->swu_blk_start = wu->swu_blk_start; 624 wu_r->swu_blk_end = wu->swu_blk_end; 625 626 wu->swu_state = SR_WU_DEFERRED; 627 wu_r->swu_collider = wu; 628 TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu, swu_link); 629 630 wu = wu_r; 631 } 632 splx(s); 633 634 sr_schedule_wu(wu); 635 636 return (0); 637 bad: 638 /* XXX - can leak pbuf/qbuf on error. */ 639 /* wu is unwound by sr_wu_put */ 640 if (wu_r) 641 sr_scsi_wu_put(sd, wu_r); 642 return (1); 643 } 644 645 /* Handle failure I/O completion */ 646 int 647 sr_failio(struct sr_workunit *wu) 648 { 649 struct sr_discipline *sd = wu->swu_dis; 650 struct sr_ccb *ccb; 651 652 if (!(wu->swu_flags & SR_WUF_FAIL)) 653 return (0); 654 655 /* Wu is a 'fake'.. don't do real I/O just intr */ 656 TAILQ_INSERT_TAIL(&sd->sd_wu_pendq, wu, swu_link); 657 TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) 658 sr_raid6_intr(&ccb->ccb_buf); 659 return (1); 660 } 661 662 void 663 sr_raid6_intr(struct buf *bp) 664 { 665 struct sr_ccb *ccb = (struct sr_ccb *)bp; 666 struct sr_workunit *wu = ccb->ccb_wu; 667 struct sr_discipline *sd = wu->swu_dis; 668 struct sr_raid6_opaque *pq = ccb->ccb_opaque; 669 int s; 670 671 DNPRINTF(SR_D_INTR, "%s: sr_raid6_intr bp %p xs %p\n", 672 DEVNAME(sd->sd_sc), bp, wu->swu_xs); 673 674 s = splbio(); 675 sr_ccb_done(ccb); 676 677 /* XOR data to result. */ 678 if (ccb->ccb_state == SR_CCB_OK && pq) { 679 if (pq->pbuf) 680 /* Calculate xor-parity */ 681 sr_raid6_xorp(pq->pbuf, ccb->ccb_buf.b_data, 682 ccb->ccb_buf.b_bcount); 683 if (pq->qbuf) 684 /* Calculate q-parity */ 685 sr_raid6_xorq(pq->qbuf, ccb->ccb_buf.b_data, 686 ccb->ccb_buf.b_bcount, pq->gn); 687 free(pq, M_DEVBUF, 0); 688 ccb->ccb_opaque = NULL; 689 } 690 691 /* Free allocated data buffer. */ 692 if (ccb->ccb_flags & SR_CCBF_FREEBUF) { 693 sr_block_put(sd, ccb->ccb_buf.b_data, ccb->ccb_buf.b_bcount); 694 ccb->ccb_buf.b_data = NULL; 695 } 696 697 sr_wu_done(wu); 698 splx(s); 699 } 700 701 int 702 sr_raid6_wu_done(struct sr_workunit *wu) 703 { 704 struct sr_discipline *sd = wu->swu_dis; 705 struct scsi_xfer *xs = wu->swu_xs; 706 707 /* XXX - we have no way of propagating errors... */ 708 if (wu->swu_flags & SR_WUF_DISCIPLINE) 709 return SR_WU_OK; 710 711 /* XXX - This is insufficient for RAID 6. */ 712 if (wu->swu_ios_succeeded > 0) { 713 xs->error = XS_NOERROR; 714 return SR_WU_OK; 715 } 716 717 if (xs->flags & SCSI_DATA_IN) { 718 printf("%s: retrying read on block %lld\n", 719 sd->sd_meta->ssd_devname, (long long)wu->swu_blk_start); 720 sr_wu_release_ccbs(wu); 721 wu->swu_state = SR_WU_RESTART; 722 if (sd->sd_scsi_rw(wu) == 0) 723 return SR_WU_RESTART; 724 } else { 725 printf("%s: permanently fail write on block %lld\n", 726 sd->sd_meta->ssd_devname, (long long)wu->swu_blk_start); 727 } 728 729 wu->swu_state = SR_WU_FAILED; 730 xs->error = XS_DRIVER_STUFFUP; 731 732 return SR_WU_FAILED; 733 } 734 735 int 736 sr_raid6_addio(struct sr_workunit *wu, int chunk, daddr_t blkno, 737 daddr_t len, void *data, int xsflags, int ccbflags, void *pbuf, 738 void *qbuf, int gn) 739 { 740 struct sr_discipline *sd = wu->swu_dis; 741 struct sr_ccb *ccb; 742 struct sr_raid6_opaque *pqbuf; 743 744 DNPRINTF(SR_D_DIS, "sr_raid6_addio: %s %d.%llx %llx %p:%p\n", 745 (xsflags & SCSI_DATA_IN) ? "read" : "write", chunk, 746 (long long)blkno, (long long)len, 747 pbuf, qbuf); 748 749 /* Allocate temporary buffer. */ 750 if (data == NULL) { 751 data = sr_block_get(sd, len); 752 if (data == NULL) 753 return (-1); 754 ccbflags |= SR_CCBF_FREEBUF; 755 } 756 757 ccb = sr_ccb_rw(sd, chunk, blkno, len, data, xsflags, ccbflags); 758 if (ccb == NULL) { 759 if (ccbflags & SR_CCBF_FREEBUF) 760 sr_block_put(sd, data, len); 761 return (-1); 762 } 763 if (pbuf || qbuf) { 764 /* XXX - can leak data and ccb on failure. */ 765 if (qbuf && gf_premul(gn)) 766 return (-1); 767 768 /* XXX - should be preallocated? */ 769 pqbuf = malloc(sizeof(struct sr_raid6_opaque), 770 M_DEVBUF, M_ZERO | M_NOWAIT); 771 if (pqbuf == NULL) { 772 sr_ccb_put(ccb); 773 return (-1); 774 } 775 pqbuf->pbuf = pbuf; 776 pqbuf->qbuf = qbuf; 777 pqbuf->gn = gn; 778 ccb->ccb_opaque = pqbuf; 779 } 780 sr_wu_enqueue_ccb(wu, ccb); 781 782 return (0); 783 } 784 785 /* Perform RAID6 parity calculation. 786 * P=xor parity, Q=GF256 parity, D=data, gn=disk# */ 787 void 788 sr_raid6_xorp(void *p, void *d, int len) 789 { 790 uint32_t *pbuf = p, *data = d; 791 792 len >>= 2; 793 while (len--) 794 *pbuf++ ^= *data++; 795 } 796 797 void 798 sr_raid6_xorq(void *q, void *d, int len, int gn) 799 { 800 uint32_t *qbuf = q, *data = d, x; 801 uint8_t *gn_map = gf_map[gn]; 802 803 len >>= 2; 804 while (len--) { 805 x = *data++; 806 *qbuf++ ^= (((uint32_t)gn_map[x & 0xff]) | 807 ((uint32_t)gn_map[(x >> 8) & 0xff] << 8) | 808 ((uint32_t)gn_map[(x >> 16) & 0xff] << 16) | 809 ((uint32_t)gn_map[(x >> 24) & 0xff] << 24)); 810 } 811 } 812 813 /* Create GF256 log/pow tables: polynomial = 0x11D */ 814 void 815 gf_init(void) 816 { 817 int i; 818 uint8_t p = 1; 819 820 /* use 2N pow table to avoid using % in multiply */ 821 for (i=0; i<256; i++) { 822 gf_log[p] = i; 823 gf_pow[i] = gf_pow[i+255] = p; 824 p = ((p << 1) ^ ((p & 0x80) ? 0x1D : 0x00)); 825 } 826 gf_log[0] = 512; 827 } 828 829 uint8_t 830 gf_inv(uint8_t a) 831 { 832 return gf_pow[255 - gf_log[a]]; 833 } 834 835 uint8_t 836 gf_mul(uint8_t a, uint8_t b) 837 { 838 return gf_pow[gf_log[a] + gf_log[b]]; 839 } 840 841 /* Precalculate multiplication tables for drive gn */ 842 int 843 gf_premul(uint8_t gn) 844 { 845 int i; 846 847 if (gf_map[gn] != NULL) 848 return (0); 849 850 if ((gf_map[gn] = malloc(256, M_DEVBUF, M_ZERO | M_NOWAIT)) == NULL) 851 return (-1); 852 853 for (i=0; i<256; i++) 854 gf_map[gn][i] = gf_pow[gf_log[i] + gf_log[gn]]; 855 return (0); 856 } 857