1 /* $OpenBSD: softraid_raid6.c,v 1.73 2024/05/13 01:15:50 jsg Exp $ */ 2 /* 3 * Copyright (c) 2009 Marco Peereboom <marco@peereboom.us> 4 * Copyright (c) 2009 Jordan Hargrave <jordan@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include "bio.h" 20 21 #include <sys/param.h> 22 #include <sys/systm.h> 23 #include <sys/buf.h> 24 #include <sys/device.h> 25 #include <sys/ioctl.h> 26 #include <sys/malloc.h> 27 #include <sys/kernel.h> 28 #include <sys/disk.h> 29 #include <sys/rwlock.h> 30 #include <sys/queue.h> 31 #include <sys/fcntl.h> 32 #include <sys/mount.h> 33 #include <sys/sensors.h> 34 #include <sys/stat.h> 35 #include <sys/task.h> 36 #include <sys/conf.h> 37 #include <sys/uio.h> 38 39 #include <scsi/scsi_all.h> 40 #include <scsi/scsiconf.h> 41 #include <scsi/scsi_disk.h> 42 43 #include <dev/softraidvar.h> 44 45 uint8_t *gf_map[256]; 46 uint8_t gf_pow[768]; 47 int gf_log[256]; 48 49 /* RAID 6 functions. */ 50 int sr_raid6_create(struct sr_discipline *, struct bioc_createraid *, 51 int, int64_t); 52 int sr_raid6_assemble(struct sr_discipline *, struct bioc_createraid *, 53 int, void *); 54 int sr_raid6_init(struct sr_discipline *); 55 int sr_raid6_rw(struct sr_workunit *); 56 int sr_raid6_openings(struct sr_discipline *); 57 void sr_raid6_intr(struct buf *); 58 int sr_raid6_wu_done(struct sr_workunit *); 59 void sr_raid6_set_chunk_state(struct sr_discipline *, int, int); 60 void sr_raid6_set_vol_state(struct sr_discipline *); 61 62 void sr_raid6_xorp(void *, void *, int); 63 void sr_raid6_xorq(void *, void *, int, int); 64 int sr_raid6_addio(struct sr_workunit *wu, int, daddr_t, long, 65 void *, int, int, void *, void *, int); 66 int sr_failio(struct sr_workunit *); 67 68 void gf_init(void); 69 uint8_t gf_inv(uint8_t); 70 int gf_premul(uint8_t); 71 uint8_t gf_mul(uint8_t, uint8_t); 72 73 #define SR_NOFAIL 0x00 74 #define SR_FAILX (1L << 0) 75 #define SR_FAILY (1L << 1) 76 #define SR_FAILP (1L << 2) 77 #define SR_FAILQ (1L << 3) 78 79 struct sr_raid6_opaque { 80 int gn; 81 void *pbuf; 82 void *qbuf; 83 }; 84 85 /* discipline initialisation. */ 86 void 87 sr_raid6_discipline_init(struct sr_discipline *sd) 88 { 89 /* Initialize GF256 tables. */ 90 gf_init(); 91 92 /* Fill out discipline members. */ 93 sd->sd_type = SR_MD_RAID6; 94 strlcpy(sd->sd_name, "RAID 6", sizeof(sd->sd_name)); 95 sd->sd_capabilities = SR_CAP_SYSTEM_DISK | SR_CAP_AUTO_ASSEMBLE | 96 SR_CAP_REDUNDANT; 97 sd->sd_max_wu = SR_RAID6_NOWU; 98 99 /* Setup discipline specific function pointers. */ 100 sd->sd_assemble = sr_raid6_assemble; 101 sd->sd_create = sr_raid6_create; 102 sd->sd_openings = sr_raid6_openings; 103 sd->sd_scsi_rw = sr_raid6_rw; 104 sd->sd_scsi_intr = sr_raid6_intr; 105 sd->sd_scsi_wu_done = sr_raid6_wu_done; 106 sd->sd_set_chunk_state = sr_raid6_set_chunk_state; 107 sd->sd_set_vol_state = sr_raid6_set_vol_state; 108 } 109 110 int 111 sr_raid6_create(struct sr_discipline *sd, struct bioc_createraid *bc, 112 int no_chunk, int64_t coerced_size) 113 { 114 if (no_chunk < 4) { 115 sr_error(sd->sd_sc, "%s requires four or more chunks", 116 sd->sd_name); 117 return EINVAL; 118 } 119 120 /* 121 * XXX add variable strip size later even though MAXPHYS is really 122 * the clever value, users like * to tinker with that type of stuff. 123 */ 124 sd->sd_meta->ssdi.ssd_strip_size = MAXPHYS; 125 sd->sd_meta->ssdi.ssd_size = (coerced_size & 126 ~(((u_int64_t)sd->sd_meta->ssdi.ssd_strip_size >> 127 DEV_BSHIFT) - 1)) * (no_chunk - 2); 128 129 return sr_raid6_init(sd); 130 } 131 132 int 133 sr_raid6_assemble(struct sr_discipline *sd, struct bioc_createraid *bc, 134 int no_chunk, void *data) 135 { 136 return sr_raid6_init(sd); 137 } 138 139 int 140 sr_raid6_init(struct sr_discipline *sd) 141 { 142 /* Initialise runtime values. */ 143 sd->mds.mdd_raid6.sr6_strip_bits = 144 sr_validate_stripsize(sd->sd_meta->ssdi.ssd_strip_size); 145 if (sd->mds.mdd_raid6.sr6_strip_bits == -1) { 146 sr_error(sd->sd_sc, "invalid strip size"); 147 return EINVAL; 148 } 149 150 /* only if stripsize <= MAXPHYS */ 151 sd->sd_max_ccb_per_wu = max(6, 2 * sd->sd_meta->ssdi.ssd_chunk_no); 152 153 return 0; 154 } 155 156 int 157 sr_raid6_openings(struct sr_discipline *sd) 158 { 159 return (sd->sd_max_wu >> 1); /* 2 wu's per IO */ 160 } 161 162 void 163 sr_raid6_set_chunk_state(struct sr_discipline *sd, int c, int new_state) 164 { 165 int old_state, s; 166 167 /* XXX this is for RAID 0 */ 168 DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n", 169 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 170 sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state); 171 172 /* ok to go to splbio since this only happens in error path */ 173 s = splbio(); 174 old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status; 175 176 /* multiple IOs to the same chunk that fail will come through here */ 177 if (old_state == new_state) 178 goto done; 179 180 switch (old_state) { 181 case BIOC_SDONLINE: 182 switch (new_state) { 183 case BIOC_SDOFFLINE: 184 case BIOC_SDSCRUB: 185 break; 186 default: 187 goto die; 188 } 189 break; 190 191 case BIOC_SDOFFLINE: 192 if (new_state == BIOC_SDREBUILD) { 193 ; 194 } else 195 goto die; 196 break; 197 198 case BIOC_SDSCRUB: 199 switch (new_state) { 200 case BIOC_SDONLINE: 201 case BIOC_SDOFFLINE: 202 break; 203 default: 204 goto die; 205 } 206 break; 207 208 case BIOC_SDREBUILD: 209 switch (new_state) { 210 case BIOC_SDONLINE: 211 case BIOC_SDOFFLINE: 212 break; 213 default: 214 goto die; 215 } 216 break; 217 218 default: 219 die: 220 splx(s); /* XXX */ 221 panic("%s: %s: %s: invalid chunk state transition %d -> %d", 222 DEVNAME(sd->sd_sc), 223 sd->sd_meta->ssd_devname, 224 sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, 225 old_state, new_state); 226 /* NOTREACHED */ 227 } 228 229 sd->sd_vol.sv_chunks[c]->src_meta.scm_status = new_state; 230 sd->sd_set_vol_state(sd); 231 232 sd->sd_must_flush = 1; 233 task_add(systq, &sd->sd_meta_save_task); 234 done: 235 splx(s); 236 } 237 238 void 239 sr_raid6_set_vol_state(struct sr_discipline *sd) 240 { 241 int states[SR_MAX_STATES]; 242 int new_state, i, s, nd; 243 int old_state = sd->sd_vol_status; 244 245 /* XXX this is for RAID 0 */ 246 247 DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n", 248 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname); 249 250 nd = sd->sd_meta->ssdi.ssd_chunk_no; 251 252 for (i = 0; i < SR_MAX_STATES; i++) 253 states[i] = 0; 254 255 for (i = 0; i < nd; i++) { 256 s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status; 257 if (s >= SR_MAX_STATES) 258 panic("%s: %s: %s: invalid chunk state", 259 DEVNAME(sd->sd_sc), 260 sd->sd_meta->ssd_devname, 261 sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname); 262 states[s]++; 263 } 264 265 if (states[BIOC_SDONLINE] == nd) 266 new_state = BIOC_SVONLINE; 267 else if (states[BIOC_SDONLINE] < nd - 2) 268 new_state = BIOC_SVOFFLINE; 269 else if (states[BIOC_SDSCRUB] != 0) 270 new_state = BIOC_SVSCRUB; 271 else if (states[BIOC_SDREBUILD] != 0) 272 new_state = BIOC_SVREBUILD; 273 else if (states[BIOC_SDONLINE] < nd) 274 new_state = BIOC_SVDEGRADED; 275 else { 276 printf("old_state = %d, ", old_state); 277 for (i = 0; i < nd; i++) 278 printf("%d = %d, ", i, 279 sd->sd_vol.sv_chunks[i]->src_meta.scm_status); 280 panic("invalid new_state"); 281 } 282 283 DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state %d -> %d\n", 284 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 285 old_state, new_state); 286 287 switch (old_state) { 288 case BIOC_SVONLINE: 289 switch (new_state) { 290 case BIOC_SVONLINE: /* can go to same state */ 291 case BIOC_SVOFFLINE: 292 case BIOC_SVDEGRADED: 293 case BIOC_SVREBUILD: /* happens on boot */ 294 break; 295 default: 296 goto die; 297 } 298 break; 299 300 case BIOC_SVOFFLINE: 301 /* XXX this might be a little too much */ 302 goto die; 303 304 case BIOC_SVDEGRADED: 305 switch (new_state) { 306 case BIOC_SVOFFLINE: 307 case BIOC_SVREBUILD: 308 case BIOC_SVDEGRADED: /* can go to the same state */ 309 break; 310 default: 311 goto die; 312 } 313 break; 314 315 case BIOC_SVBUILDING: 316 switch (new_state) { 317 case BIOC_SVONLINE: 318 case BIOC_SVOFFLINE: 319 case BIOC_SVBUILDING: /* can go to the same state */ 320 break; 321 default: 322 goto die; 323 } 324 break; 325 326 case BIOC_SVSCRUB: 327 switch (new_state) { 328 case BIOC_SVONLINE: 329 case BIOC_SVOFFLINE: 330 case BIOC_SVDEGRADED: 331 case BIOC_SVSCRUB: /* can go to same state */ 332 break; 333 default: 334 goto die; 335 } 336 break; 337 338 case BIOC_SVREBUILD: 339 switch (new_state) { 340 case BIOC_SVONLINE: 341 case BIOC_SVOFFLINE: 342 case BIOC_SVDEGRADED: 343 case BIOC_SVREBUILD: /* can go to the same state */ 344 break; 345 default: 346 goto die; 347 } 348 break; 349 350 default: 351 die: 352 panic("%s: %s: invalid volume state transition %d -> %d", 353 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 354 old_state, new_state); 355 /* NOTREACHED */ 356 } 357 358 sd->sd_vol_status = new_state; 359 } 360 361 /* modes: 362 * readq: sr_raid6_addio(i, lba, length, NULL, SCSI_DATA_IN, 363 * 0, qbuf, NULL, 0); 364 * readp: sr_raid6_addio(i, lba, length, NULL, SCSI_DATA_IN, 365 * 0, pbuf, NULL, 0); 366 * readx: sr_raid6_addio(i, lba, length, NULL, SCSI_DATA_IN, 367 * 0, pbuf, qbuf, gf_pow[i]); 368 */ 369 370 int 371 sr_raid6_rw(struct sr_workunit *wu) 372 { 373 struct sr_workunit *wu_r = NULL; 374 struct sr_discipline *sd = wu->swu_dis; 375 struct scsi_xfer *xs = wu->swu_xs; 376 struct sr_chunk *scp; 377 int s, fail, i, gxinv, pxinv; 378 daddr_t blkno, lba; 379 int64_t chunk_offs, lbaoffs, offset, strip_offs; 380 int64_t strip_no, strip_size, strip_bits, row_size; 381 int64_t fchunk, no_chunk, chunk, qchunk, pchunk; 382 long length, datalen; 383 void *pbuf, *data, *qbuf; 384 385 /* blkno and scsi error will be handled by sr_validate_io */ 386 if (sr_validate_io(wu, &blkno, "sr_raid6_rw")) 387 goto bad; 388 389 strip_size = sd->sd_meta->ssdi.ssd_strip_size; 390 strip_bits = sd->mds.mdd_raid6.sr6_strip_bits; 391 no_chunk = sd->sd_meta->ssdi.ssd_chunk_no - 2; 392 row_size = (no_chunk << strip_bits) >> DEV_BSHIFT; 393 394 data = xs->data; 395 datalen = xs->datalen; 396 lbaoffs = blkno << DEV_BSHIFT; 397 398 if (xs->flags & SCSI_DATA_OUT) { 399 if ((wu_r = sr_scsi_wu_get(sd, SCSI_NOSLEEP)) == NULL){ 400 printf("%s: can't get wu_r", DEVNAME(sd->sd_sc)); 401 goto bad; 402 } 403 wu_r->swu_state = SR_WU_INPROGRESS; 404 wu_r->swu_flags |= SR_WUF_DISCIPLINE; 405 } 406 407 wu->swu_blk_start = 0; 408 while (datalen != 0) { 409 strip_no = lbaoffs >> strip_bits; 410 strip_offs = lbaoffs & (strip_size - 1); 411 chunk_offs = (strip_no / no_chunk) << strip_bits; 412 offset = chunk_offs + strip_offs; 413 414 /* get size remaining in this stripe */ 415 length = MIN(strip_size - strip_offs, datalen); 416 417 /* map disk offset to parity/data drive */ 418 chunk = strip_no % no_chunk; 419 420 qchunk = (no_chunk + 1) - ((strip_no / no_chunk) % (no_chunk+2)); 421 if (qchunk == 0) 422 pchunk = no_chunk + 1; 423 else 424 pchunk = qchunk - 1; 425 if (chunk >= pchunk) 426 chunk++; 427 if (chunk >= qchunk) 428 chunk++; 429 430 lba = offset >> DEV_BSHIFT; 431 432 /* XXX big hammer.. exclude I/O from entire stripe */ 433 if (wu->swu_blk_start == 0) 434 wu->swu_blk_start = (strip_no / no_chunk) * row_size; 435 wu->swu_blk_end = (strip_no / no_chunk) * row_size + (row_size - 1); 436 437 fail = 0; 438 fchunk = -1; 439 440 /* Get disk-fail flags */ 441 for (i=0; i< no_chunk+2; i++) { 442 scp = sd->sd_vol.sv_chunks[i]; 443 switch (scp->src_meta.scm_status) { 444 case BIOC_SDOFFLINE: 445 case BIOC_SDREBUILD: 446 case BIOC_SDHOTSPARE: 447 if (i == qchunk) 448 fail |= SR_FAILQ; 449 else if (i == pchunk) 450 fail |= SR_FAILP; 451 else if (i == chunk) 452 fail |= SR_FAILX; 453 else { 454 /* dual data-disk failure */ 455 fail |= SR_FAILY; 456 fchunk = i; 457 } 458 break; 459 } 460 } 461 if (xs->flags & SCSI_DATA_IN) { 462 if (!(fail & SR_FAILX)) { 463 /* drive is good. issue single read request */ 464 if (sr_raid6_addio(wu, chunk, lba, length, 465 data, xs->flags, 0, NULL, NULL, 0)) 466 goto bad; 467 } else if (fail & SR_FAILP) { 468 /* Dx, P failed */ 469 printf("Disk %llx offline, " 470 "regenerating Dx+P\n", chunk); 471 472 gxinv = gf_inv(gf_pow[chunk]); 473 474 /* Calculate: Dx = (Q^Dz*gz)*inv(gx) */ 475 memset(data, 0, length); 476 if (sr_raid6_addio(wu, qchunk, lba, length, 477 NULL, SCSI_DATA_IN, 0, NULL, data, gxinv)) 478 goto bad; 479 480 /* Read Dz * gz * inv(gx) */ 481 for (i = 0; i < no_chunk+2; i++) { 482 if (i == qchunk || i == pchunk || i == chunk) 483 continue; 484 485 if (sr_raid6_addio(wu, i, lba, length, 486 NULL, SCSI_DATA_IN, 0, NULL, data, 487 gf_mul(gf_pow[i], gxinv))) 488 goto bad; 489 } 490 491 /* data will contain correct value on completion */ 492 } else if (fail & SR_FAILY) { 493 /* Dx, Dy failed */ 494 printf("Disk %llx & %llx offline, " 495 "regenerating Dx+Dy\n", chunk, fchunk); 496 497 gxinv = gf_inv(gf_pow[chunk] ^ gf_pow[fchunk]); 498 pxinv = gf_mul(gf_pow[fchunk], gxinv); 499 500 /* read Q * inv(gx + gy) */ 501 memset(data, 0, length); 502 if (sr_raid6_addio(wu, qchunk, lba, length, 503 NULL, SCSI_DATA_IN, 0, NULL, data, gxinv)) 504 goto bad; 505 506 /* read P * gy * inv(gx + gy) */ 507 if (sr_raid6_addio(wu, pchunk, lba, length, 508 NULL, SCSI_DATA_IN, 0, NULL, data, pxinv)) 509 goto bad; 510 511 /* Calculate: Dx*gx^Dy*gy = Q^(Dz*gz) ; Dx^Dy = P^Dz 512 * Q: sr_raid6_xorp(qbuf, --, length); 513 * P: sr_raid6_xorp(pbuf, --, length); 514 * Dz: sr_raid6_xorp(pbuf, --, length); 515 * sr_raid6_xorq(qbuf, --, length, gf_pow[i]); 516 */ 517 for (i = 0; i < no_chunk+2; i++) { 518 if (i == qchunk || i == pchunk || 519 i == chunk || i == fchunk) 520 continue; 521 522 /* read Dz * (gz + gy) * inv(gx + gy) */ 523 if (sr_raid6_addio(wu, i, lba, length, 524 NULL, SCSI_DATA_IN, 0, NULL, data, 525 pxinv ^ gf_mul(gf_pow[i], gxinv))) 526 goto bad; 527 } 528 } else { 529 /* Two cases: single disk (Dx) or (Dx+Q) 530 * Dx = Dz ^ P (same as RAID5) 531 */ 532 printf("Disk %llx offline, " 533 "regenerating Dx%s\n", chunk, 534 fail & SR_FAILQ ? "+Q" : " single"); 535 536 /* Calculate: Dx = P^Dz 537 * P: sr_raid6_xorp(data, ---, length); 538 * Dz: sr_raid6_xorp(data, ---, length); 539 */ 540 memset(data, 0, length); 541 for (i = 0; i < no_chunk+2; i++) { 542 if (i != chunk && i != qchunk) { 543 /* Read Dz */ 544 if (sr_raid6_addio(wu, i, lba, 545 length, NULL, SCSI_DATA_IN, 546 0, data, NULL, 0)) 547 goto bad; 548 } 549 } 550 551 /* data will contain correct value on completion */ 552 } 553 } else { 554 /* XXX handle writes to failed/offline disk? */ 555 if (fail & (SR_FAILX|SR_FAILQ|SR_FAILP)) 556 goto bad; 557 558 /* 559 * initialize pbuf with contents of new data to be 560 * written. This will be XORed with old data and old 561 * parity in the intr routine. The result in pbuf 562 * is the new parity data. 563 */ 564 qbuf = sr_block_get(sd, length); 565 if (qbuf == NULL) 566 goto bad; 567 568 pbuf = sr_block_get(sd, length); 569 if (pbuf == NULL) 570 goto bad; 571 572 /* Calculate P = Dn; Q = gn * Dn */ 573 if (gf_premul(gf_pow[chunk])) 574 goto bad; 575 sr_raid6_xorp(pbuf, data, length); 576 sr_raid6_xorq(qbuf, data, length, gf_pow[chunk]); 577 578 /* Read old data: P ^= Dn' ; Q ^= (gn * Dn') */ 579 if (sr_raid6_addio(wu_r, chunk, lba, length, NULL, 580 SCSI_DATA_IN, 0, pbuf, qbuf, gf_pow[chunk])) 581 goto bad; 582 583 /* Read old xor-parity: P ^= P' */ 584 if (sr_raid6_addio(wu_r, pchunk, lba, length, NULL, 585 SCSI_DATA_IN, 0, pbuf, NULL, 0)) 586 goto bad; 587 588 /* Read old q-parity: Q ^= Q' */ 589 if (sr_raid6_addio(wu_r, qchunk, lba, length, NULL, 590 SCSI_DATA_IN, 0, qbuf, NULL, 0)) 591 goto bad; 592 593 /* write new data */ 594 if (sr_raid6_addio(wu, chunk, lba, length, data, 595 xs->flags, 0, NULL, NULL, 0)) 596 goto bad; 597 598 /* write new xor-parity */ 599 if (sr_raid6_addio(wu, pchunk, lba, length, pbuf, 600 xs->flags, SR_CCBF_FREEBUF, NULL, NULL, 0)) 601 goto bad; 602 603 /* write new q-parity */ 604 if (sr_raid6_addio(wu, qchunk, lba, length, qbuf, 605 xs->flags, SR_CCBF_FREEBUF, NULL, NULL, 0)) 606 goto bad; 607 } 608 609 /* advance to next block */ 610 lbaoffs += length; 611 datalen -= length; 612 data += length; 613 } 614 615 s = splbio(); 616 if (wu_r) { 617 /* collide write request with reads */ 618 wu_r->swu_blk_start = wu->swu_blk_start; 619 wu_r->swu_blk_end = wu->swu_blk_end; 620 621 wu->swu_state = SR_WU_DEFERRED; 622 wu_r->swu_collider = wu; 623 TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu, swu_link); 624 625 wu = wu_r; 626 } 627 splx(s); 628 629 sr_schedule_wu(wu); 630 631 return (0); 632 bad: 633 /* XXX - can leak pbuf/qbuf on error. */ 634 /* wu is unwound by sr_wu_put */ 635 if (wu_r) 636 sr_scsi_wu_put(sd, wu_r); 637 return (1); 638 } 639 640 /* Handle failure I/O completion */ 641 int 642 sr_failio(struct sr_workunit *wu) 643 { 644 struct sr_discipline *sd = wu->swu_dis; 645 struct sr_ccb *ccb; 646 647 if (!(wu->swu_flags & SR_WUF_FAIL)) 648 return (0); 649 650 /* Wu is a 'fake'.. don't do real I/O just intr */ 651 TAILQ_INSERT_TAIL(&sd->sd_wu_pendq, wu, swu_link); 652 TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) 653 sr_raid6_intr(&ccb->ccb_buf); 654 return (1); 655 } 656 657 void 658 sr_raid6_intr(struct buf *bp) 659 { 660 struct sr_ccb *ccb = (struct sr_ccb *)bp; 661 struct sr_workunit *wu = ccb->ccb_wu; 662 struct sr_discipline *sd = wu->swu_dis; 663 struct sr_raid6_opaque *pq = ccb->ccb_opaque; 664 int s; 665 666 DNPRINTF(SR_D_INTR, "%s: sr_raid6_intr bp %p xs %p\n", 667 DEVNAME(sd->sd_sc), bp, wu->swu_xs); 668 669 s = splbio(); 670 sr_ccb_done(ccb); 671 672 /* XOR data to result. */ 673 if (ccb->ccb_state == SR_CCB_OK && pq) { 674 if (pq->pbuf) 675 /* Calculate xor-parity */ 676 sr_raid6_xorp(pq->pbuf, ccb->ccb_buf.b_data, 677 ccb->ccb_buf.b_bcount); 678 if (pq->qbuf) 679 /* Calculate q-parity */ 680 sr_raid6_xorq(pq->qbuf, ccb->ccb_buf.b_data, 681 ccb->ccb_buf.b_bcount, pq->gn); 682 free(pq, M_DEVBUF, 0); 683 ccb->ccb_opaque = NULL; 684 } 685 686 /* Free allocated data buffer. */ 687 if (ccb->ccb_flags & SR_CCBF_FREEBUF) { 688 sr_block_put(sd, ccb->ccb_buf.b_data, ccb->ccb_buf.b_bcount); 689 ccb->ccb_buf.b_data = NULL; 690 } 691 692 sr_wu_done(wu); 693 splx(s); 694 } 695 696 int 697 sr_raid6_wu_done(struct sr_workunit *wu) 698 { 699 struct sr_discipline *sd = wu->swu_dis; 700 struct scsi_xfer *xs = wu->swu_xs; 701 702 /* XXX - we have no way of propagating errors... */ 703 if (wu->swu_flags & SR_WUF_DISCIPLINE) 704 return SR_WU_OK; 705 706 /* XXX - This is insufficient for RAID 6. */ 707 if (wu->swu_ios_succeeded > 0) { 708 xs->error = XS_NOERROR; 709 return SR_WU_OK; 710 } 711 712 if (xs->flags & SCSI_DATA_IN) { 713 printf("%s: retrying read on block %lld\n", 714 sd->sd_meta->ssd_devname, (long long)wu->swu_blk_start); 715 sr_wu_release_ccbs(wu); 716 wu->swu_state = SR_WU_RESTART; 717 if (sd->sd_scsi_rw(wu) == 0) 718 return SR_WU_RESTART; 719 } else { 720 printf("%s: permanently fail write on block %lld\n", 721 sd->sd_meta->ssd_devname, (long long)wu->swu_blk_start); 722 } 723 724 wu->swu_state = SR_WU_FAILED; 725 xs->error = XS_DRIVER_STUFFUP; 726 727 return SR_WU_FAILED; 728 } 729 730 int 731 sr_raid6_addio(struct sr_workunit *wu, int chunk, daddr_t blkno, 732 long len, void *data, int xsflags, int ccbflags, void *pbuf, 733 void *qbuf, int gn) 734 { 735 struct sr_discipline *sd = wu->swu_dis; 736 struct sr_ccb *ccb; 737 struct sr_raid6_opaque *pqbuf; 738 739 DNPRINTF(SR_D_DIS, "sr_raid6_addio: %s %d.%lld %ld %p:%p\n", 740 (xsflags & SCSI_DATA_IN) ? "read" : "write", chunk, 741 (long long)blkno, len, pbuf, qbuf); 742 743 /* Allocate temporary buffer. */ 744 if (data == NULL) { 745 data = sr_block_get(sd, len); 746 if (data == NULL) 747 return (-1); 748 ccbflags |= SR_CCBF_FREEBUF; 749 } 750 751 ccb = sr_ccb_rw(sd, chunk, blkno, len, data, xsflags, ccbflags); 752 if (ccb == NULL) { 753 if (ccbflags & SR_CCBF_FREEBUF) 754 sr_block_put(sd, data, len); 755 return (-1); 756 } 757 if (pbuf || qbuf) { 758 /* XXX - can leak data and ccb on failure. */ 759 if (qbuf && gf_premul(gn)) 760 return (-1); 761 762 /* XXX - should be preallocated? */ 763 pqbuf = malloc(sizeof(struct sr_raid6_opaque), 764 M_DEVBUF, M_ZERO | M_NOWAIT); 765 if (pqbuf == NULL) { 766 sr_ccb_put(ccb); 767 return (-1); 768 } 769 pqbuf->pbuf = pbuf; 770 pqbuf->qbuf = qbuf; 771 pqbuf->gn = gn; 772 ccb->ccb_opaque = pqbuf; 773 } 774 sr_wu_enqueue_ccb(wu, ccb); 775 776 return (0); 777 } 778 779 /* Perform RAID6 parity calculation. 780 * P=xor parity, Q=GF256 parity, D=data, gn=disk# */ 781 void 782 sr_raid6_xorp(void *p, void *d, int len) 783 { 784 uint32_t *pbuf = p, *data = d; 785 786 len >>= 2; 787 while (len--) 788 *pbuf++ ^= *data++; 789 } 790 791 void 792 sr_raid6_xorq(void *q, void *d, int len, int gn) 793 { 794 uint32_t *qbuf = q, *data = d, x; 795 uint8_t *gn_map = gf_map[gn]; 796 797 len >>= 2; 798 while (len--) { 799 x = *data++; 800 *qbuf++ ^= (((uint32_t)gn_map[x & 0xff]) | 801 ((uint32_t)gn_map[(x >> 8) & 0xff] << 8) | 802 ((uint32_t)gn_map[(x >> 16) & 0xff] << 16) | 803 ((uint32_t)gn_map[(x >> 24) & 0xff] << 24)); 804 } 805 } 806 807 /* Create GF256 log/pow tables: polynomial = 0x11D */ 808 void 809 gf_init(void) 810 { 811 int i; 812 uint8_t p = 1; 813 814 /* use 2N pow table to avoid using % in multiply */ 815 for (i=0; i<256; i++) { 816 gf_log[p] = i; 817 gf_pow[i] = gf_pow[i+255] = p; 818 p = ((p << 1) ^ ((p & 0x80) ? 0x1D : 0x00)); 819 } 820 gf_log[0] = 512; 821 } 822 823 uint8_t 824 gf_inv(uint8_t a) 825 { 826 return gf_pow[255 - gf_log[a]]; 827 } 828 829 uint8_t 830 gf_mul(uint8_t a, uint8_t b) 831 { 832 return gf_pow[gf_log[a] + gf_log[b]]; 833 } 834 835 /* Precalculate multiplication tables for drive gn */ 836 int 837 gf_premul(uint8_t gn) 838 { 839 int i; 840 841 if (gf_map[gn] != NULL) 842 return (0); 843 844 if ((gf_map[gn] = malloc(256, M_DEVBUF, M_ZERO | M_NOWAIT)) == NULL) 845 return (-1); 846 847 for (i=0; i<256; i++) 848 gf_map[gn][i] = gf_pow[gf_log[i] + gf_log[gn]]; 849 return (0); 850 } 851