1 /* $NetBSD: rf_pq.c,v 1.10 2001/10/04 15:58:55 oster Exp $ */ 2 /* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: Daniel Stodolsky 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29 /* 30 * Code for RAID level 6 (P + Q) disk array architecture. 31 */ 32 33 #include "rf_archs.h" 34 35 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD > 0) 36 37 #include <dev/raidframe/raidframevar.h> 38 39 #include "rf_raid.h" 40 #include "rf_dag.h" 41 #include "rf_dagffrd.h" 42 #include "rf_dagffwr.h" 43 #include "rf_dagdegrd.h" 44 #include "rf_dagdegwr.h" 45 #include "rf_dagutils.h" 46 #include "rf_dagfuncs.h" 47 #include "rf_etimer.h" 48 #include "rf_pqdeg.h" 49 #include "rf_general.h" 50 #include "rf_map.h" 51 #include "rf_pq.h" 52 53 RF_RedFuncs_t rf_pFuncs = {rf_RegularONPFunc, "Regular Old-New P", rf_SimpleONPFunc, "Simple Old-New P"}; 54 RF_RedFuncs_t rf_pRecoveryFuncs = {rf_RecoveryPFunc, "Recovery P Func", rf_RecoveryPFunc, "Recovery P Func"}; 55 56 int 57 rf_RegularONPFunc(node) 58 RF_DagNode_t *node; 59 { 60 return (rf_RegularXorFunc(node)); 61 } 62 /* 63 same as simpleONQ func, but the coefficient is always 1 64 */ 65 66 int 67 rf_SimpleONPFunc(node) 68 RF_DagNode_t *node; 69 { 70 return (rf_SimpleXorFunc(node)); 71 } 72 73 int 74 rf_RecoveryPFunc(node) 75 RF_DagNode_t *node; 76 { 77 return (rf_RecoveryXorFunc(node)); 78 } 79 80 int 81 rf_RegularPFunc(node) 82 RF_DagNode_t *node; 83 { 84 return (rf_RegularXorFunc(node)); 85 } 86 #endif /* (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD > 0) */ 87 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) 88 89 static void 90 QDelta(char *dest, char *obuf, char *nbuf, unsigned length, 91 unsigned char coeff); 92 static void 93 rf_InvertQ(unsigned long *qbuf, unsigned long *abuf, 94 unsigned length, unsigned coeff); 95 96 RF_RedFuncs_t rf_qFuncs = {rf_RegularONQFunc, "Regular Old-New Q", rf_SimpleONQFunc, "Simple Old-New Q"}; 97 RF_RedFuncs_t rf_qRecoveryFuncs = {rf_RecoveryQFunc, "Recovery Q Func", rf_RecoveryQFunc, "Recovery Q Func"}; 98 RF_RedFuncs_t rf_pqRecoveryFuncs = {rf_RecoveryPQFunc, "Recovery PQ Func", rf_RecoveryPQFunc, "Recovery PQ Func"}; 99 100 void 101 rf_PQDagSelect( 102 RF_Raid_t * raidPtr, 103 RF_IoType_t type, 104 RF_AccessStripeMap_t * asmap, 105 RF_VoidFuncPtr * createFunc) 106 { 107 RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); 108 unsigned ndfail = asmap->numDataFailed; 109 unsigned npfail = asmap->numParityFailed; 110 unsigned ntfail = npfail + ndfail; 111 112 RF_ASSERT(RF_IO_IS_R_OR_W(type)); 113 if (ntfail > 2) { 114 RF_ERRORMSG("more than two disks failed in a single group! Aborting I/O operation.\n"); 115 /* *infoFunc = */ *createFunc = NULL; 116 return; 117 } 118 /* ok, we can do this I/O */ 119 if (type == RF_IO_TYPE_READ) { 120 switch (ndfail) { 121 case 0: 122 /* fault free read */ 123 *createFunc = (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG; /* same as raid 5 */ 124 break; 125 case 1: 126 /* lost a single data unit */ 127 /* two cases: (1) parity is not lost. do a normal raid 128 * 5 reconstruct read. (2) parity is lost. do a 129 * reconstruct read using "q". */ 130 if (ntfail == 2) { /* also lost redundancy */ 131 if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) 132 *createFunc = (RF_VoidFuncPtr) rf_PQ_110_CreateReadDAG; 133 else 134 *createFunc = (RF_VoidFuncPtr) rf_PQ_101_CreateReadDAG; 135 } else { 136 /* P and Q are ok. But is there a failure in 137 * some unaccessed data unit? */ 138 if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2) 139 *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateReadDAG; 140 else 141 *createFunc = (RF_VoidFuncPtr) rf_PQ_100_CreateReadDAG; 142 } 143 break; 144 case 2: 145 /* lost two data units */ 146 /* *infoFunc = PQOneTwo; */ 147 *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateReadDAG; 148 break; 149 } 150 return; 151 } 152 /* a write */ 153 switch (ntfail) { 154 case 0: /* fault free */ 155 if (rf_suppressLocksAndLargeWrites || 156 (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) || 157 (asmap->parityInfo->next != NULL) || (asmap->qInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) { 158 159 *createFunc = (RF_VoidFuncPtr) rf_PQCreateSmallWriteDAG; 160 } else { 161 *createFunc = (RF_VoidFuncPtr) rf_PQCreateLargeWriteDAG; 162 } 163 break; 164 165 case 1: /* single disk fault */ 166 if (npfail == 1) { 167 RF_ASSERT((asmap->failedPDAs[0]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q)); 168 if (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q) { /* q died, treat like 169 * normal mode raid5 170 * write. */ 171 if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1)) 172 || rf_NumFailedDataUnitsInStripe(raidPtr, asmap)) 173 *createFunc = (RF_VoidFuncPtr) rf_PQ_001_CreateSmallWriteDAG; 174 else 175 *createFunc = (RF_VoidFuncPtr) rf_PQ_001_CreateLargeWriteDAG; 176 } else {/* parity died, small write only updating Q */ 177 if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1)) 178 || rf_NumFailedDataUnitsInStripe(raidPtr, asmap)) 179 *createFunc = (RF_VoidFuncPtr) rf_PQ_010_CreateSmallWriteDAG; 180 else 181 *createFunc = (RF_VoidFuncPtr) rf_PQ_010_CreateLargeWriteDAG; 182 } 183 } else { /* data missing. Do a P reconstruct write if 184 * only a single data unit is lost in the 185 * stripe, otherwise a PQ reconstruct write. */ 186 if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2) 187 *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateWriteDAG; 188 else 189 *createFunc = (RF_VoidFuncPtr) rf_PQ_100_CreateWriteDAG; 190 } 191 break; 192 193 case 2: /* two disk faults */ 194 switch (npfail) { 195 case 2: /* both p and q dead */ 196 *createFunc = (RF_VoidFuncPtr) rf_PQ_011_CreateWriteDAG; 197 break; 198 case 1: /* either p or q and dead data */ 199 RF_ASSERT(asmap->failedPDAs[0]->type == RF_PDA_TYPE_DATA); 200 RF_ASSERT((asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q)); 201 if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q) 202 *createFunc = (RF_VoidFuncPtr) rf_PQ_101_CreateWriteDAG; 203 else 204 *createFunc = (RF_VoidFuncPtr) rf_PQ_110_CreateWriteDAG; 205 break; 206 case 0: /* double data loss */ 207 *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateWriteDAG; 208 break; 209 } 210 break; 211 212 default: /* more than 2 disk faults */ 213 *createFunc = NULL; 214 RF_PANIC(); 215 } 216 return; 217 } 218 /* 219 Used as a stop gap info function 220 */ 221 #if 0 222 static void 223 PQOne(raidPtr, nSucc, nAnte, asmap) 224 RF_Raid_t *raidPtr; 225 int *nSucc; 226 int *nAnte; 227 RF_AccessStripeMap_t *asmap; 228 { 229 *nSucc = *nAnte = 1; 230 } 231 232 static void 233 PQOneTwo(raidPtr, nSucc, nAnte, asmap) 234 RF_Raid_t *raidPtr; 235 int *nSucc; 236 int *nAnte; 237 RF_AccessStripeMap_t *asmap; 238 { 239 *nSucc = 1; 240 *nAnte = 2; 241 } 242 #endif 243 244 RF_CREATE_DAG_FUNC_DECL(rf_PQCreateLargeWriteDAG) 245 { 246 rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2, 247 rf_RegularPQFunc, RF_FALSE); 248 } 249 250 int 251 rf_RegularONQFunc(node) 252 RF_DagNode_t *node; 253 { 254 int np = node->numParams; 255 int d; 256 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p; 257 int i; 258 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 259 RF_Etimer_t timer; 260 char *qbuf, *qpbuf; 261 char *obuf, *nbuf; 262 RF_PhysDiskAddr_t *old, *new; 263 unsigned long coeff; 264 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit; 265 266 RF_ETIMER_START(timer); 267 268 d = (np - 3) / 4; 269 RF_ASSERT(4 * d + 3 == np); 270 qbuf = (char *) node->params[2 * d + 1].p; /* q buffer */ 271 for (i = 0; i < d; i++) { 272 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p; 273 obuf = (char *) node->params[2 * i + 1].p; 274 new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p; 275 nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p; 276 RF_ASSERT(new->numSector == old->numSector); 277 RF_ASSERT(new->raidAddress == old->raidAddress); 278 /* the stripe unit within the stripe tells us the coefficient 279 * to use for the multiply. */ 280 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress); 281 /* compute the data unit offset within the column, then add 282 * one */ 283 coeff = (coeff % raidPtr->Layout.numDataCol); 284 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU); 285 QDelta(qpbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff); 286 } 287 288 RF_ETIMER_STOP(timer); 289 RF_ETIMER_EVAL(timer); 290 tracerec->q_us += RF_ETIMER_VAL_US(timer); 291 rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no 292 * I/O in this node */ 293 return (0); 294 } 295 /* 296 See the SimpleXORFunc for the difference between a simple and regular func. 297 These Q functions should be used for 298 299 new q = Q(data,old data,old q) 300 301 style updates and not for 302 303 q = ( new data, new data, .... ) 304 305 computations. 306 307 The simple q takes 2(2d+1)+1 params, where d is the number 308 of stripes written. The order of params is 309 old data pda_0, old data buffer_0, old data pda_1, old data buffer_1, ... old data pda_d, old data buffer_d 310 [2d] old q pda_0, old q buffer 311 [2d_2] new data pda_0, new data buffer_0, ... new data pda_d, new data buffer_d 312 raidPtr 313 */ 314 315 int 316 rf_SimpleONQFunc(node) 317 RF_DagNode_t *node; 318 { 319 int np = node->numParams; 320 int d; 321 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p; 322 int i; 323 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 324 RF_Etimer_t timer; 325 char *qbuf; 326 char *obuf, *nbuf; 327 RF_PhysDiskAddr_t *old, *new; 328 unsigned long coeff; 329 330 RF_ETIMER_START(timer); 331 332 d = (np - 3) / 4; 333 RF_ASSERT(4 * d + 3 == np); 334 qbuf = (char *) node->params[2 * d + 1].p; /* q buffer */ 335 for (i = 0; i < d; i++) { 336 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p; 337 obuf = (char *) node->params[2 * i + 1].p; 338 new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p; 339 nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p; 340 RF_ASSERT(new->numSector == old->numSector); 341 RF_ASSERT(new->raidAddress == old->raidAddress); 342 /* the stripe unit within the stripe tells us the coefficient 343 * to use for the multiply. */ 344 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress); 345 /* compute the data unit offset within the column, then add 346 * one */ 347 coeff = (coeff % raidPtr->Layout.numDataCol); 348 QDelta(qbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff); 349 } 350 351 RF_ETIMER_STOP(timer); 352 RF_ETIMER_EVAL(timer); 353 tracerec->q_us += RF_ETIMER_VAL_US(timer); 354 rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no 355 * I/O in this node */ 356 return (0); 357 } 358 RF_CREATE_DAG_FUNC_DECL(rf_PQCreateSmallWriteDAG) 359 { 360 rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pFuncs, &rf_qFuncs); 361 } 362 363 static void RegularQSubr(RF_DagNode_t *node, char *qbuf); 364 365 static void 366 RegularQSubr(node, qbuf) 367 RF_DagNode_t *node; 368 char *qbuf; 369 { 370 int np = node->numParams; 371 int d; 372 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p; 373 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit; 374 int i; 375 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 376 RF_Etimer_t timer; 377 char *obuf, *qpbuf; 378 RF_PhysDiskAddr_t *old; 379 unsigned long coeff; 380 381 RF_ETIMER_START(timer); 382 383 d = (np - 1) / 2; 384 RF_ASSERT(2 * d + 1 == np); 385 for (i = 0; i < d; i++) { 386 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p; 387 obuf = (char *) node->params[2 * i + 1].p; 388 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress); 389 /* compute the data unit offset within the column, then add 390 * one */ 391 coeff = (coeff % raidPtr->Layout.numDataCol); 392 /* the input buffers may not all be aligned with the start of 393 * the stripe. so shift by their sector offset within the 394 * stripe unit */ 395 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU); 396 rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff); 397 } 398 399 RF_ETIMER_STOP(timer); 400 RF_ETIMER_EVAL(timer); 401 tracerec->q_us += RF_ETIMER_VAL_US(timer); 402 } 403 /* 404 used in degraded writes. 405 */ 406 407 static void DegrQSubr(RF_DagNode_t *node); 408 409 static void 410 DegrQSubr(node) 411 RF_DagNode_t *node; 412 { 413 int np = node->numParams; 414 int d; 415 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p; 416 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit; 417 int i; 418 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 419 RF_Etimer_t timer; 420 char *qbuf = node->results[1]; 421 char *obuf, *qpbuf; 422 RF_PhysDiskAddr_t *old; 423 unsigned long coeff; 424 unsigned fail_start; 425 int j; 426 427 old = (RF_PhysDiskAddr_t *) node->params[np - 2].p; 428 fail_start = old->startSector % secPerSU; 429 430 RF_ETIMER_START(timer); 431 432 d = (np - 2) / 2; 433 RF_ASSERT(2 * d + 2 == np); 434 for (i = 0; i < d; i++) { 435 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p; 436 obuf = (char *) node->params[2 * i + 1].p; 437 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress); 438 /* compute the data unit offset within the column, then add 439 * one */ 440 coeff = (coeff % raidPtr->Layout.numDataCol); 441 /* the input buffers may not all be aligned with the start of 442 * the stripe. so shift by their sector offset within the 443 * stripe unit */ 444 j = old->startSector % secPerSU; 445 RF_ASSERT(j >= fail_start); 446 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start); 447 rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff); 448 } 449 450 RF_ETIMER_STOP(timer); 451 RF_ETIMER_EVAL(timer); 452 tracerec->q_us += RF_ETIMER_VAL_US(timer); 453 } 454 /* 455 Called by large write code to compute the new parity and the new q. 456 457 structure of the params: 458 459 pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d ( d = numDataCol 460 raidPtr 461 462 for a total of 2d+1 arguments. 463 The result buffers results[0], results[1] are the buffers for the p and q, 464 respectively. 465 466 We compute Q first, then compute P. The P calculation may try to reuse 467 one of the input buffers for its output, so if we computed P first, we would 468 corrupt the input for the q calculation. 469 */ 470 471 int 472 rf_RegularPQFunc(node) 473 RF_DagNode_t *node; 474 { 475 RegularQSubr(node, node->results[1]); 476 return (rf_RegularXorFunc(node)); /* does the wakeup */ 477 } 478 479 int 480 rf_RegularQFunc(node) 481 RF_DagNode_t *node; 482 { 483 /* Almost ... adjust Qsubr args */ 484 RegularQSubr(node, node->results[0]); 485 rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no 486 * I/O in this node */ 487 return (0); 488 } 489 /* 490 Called by singly degraded write code to compute the new parity and the new q. 491 492 structure of the params: 493 494 pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d 495 failedPDA raidPtr 496 497 for a total of 2d+2 arguments. 498 The result buffers results[0], results[1] are the buffers for the parity and q, 499 respectively. 500 501 We compute Q first, then compute parity. The parity calculation may try to reuse 502 one of the input buffers for its output, so if we computed parity first, we would 503 corrupt the input for the q calculation. 504 505 We treat this identically to the regularPQ case, ignoring the failedPDA extra argument. 506 */ 507 508 void 509 rf_Degraded_100_PQFunc(node) 510 RF_DagNode_t *node; 511 { 512 int np = node->numParams; 513 514 RF_ASSERT(np >= 2); 515 DegrQSubr(node); 516 rf_RecoveryXorFunc(node); 517 } 518 519 520 /* 521 The two below are used when reading a stripe with a single lost data unit. 522 The parameters are 523 524 pda_0, buffer_0, .... pda_n, buffer_n, P pda, P buffer, failedPDA, raidPtr 525 526 and results[0] contains the data buffer. Which is originally zero-filled. 527 528 */ 529 530 /* this Q func is used by the degraded-mode dag functions to recover lost data. 531 * the second-to-last parameter is the PDA for the failed portion of the access. 532 * the code here looks at this PDA and assumes that the xor target buffer is 533 * equal in size to the number of sectors in the failed PDA. It then uses 534 * the other PDAs in the parameter list to determine where within the target 535 * buffer the corresponding data should be xored. 536 * 537 * Recall the basic equation is 538 * 539 * Q = ( data_1 + 2 * data_2 ... + k * data_k ) mod 256 540 * 541 * so to recover data_j we need 542 * 543 * J data_j = (Q - data_1 - 2 data_2 ....- k* data_k) mod 256 544 * 545 * So the coefficient for each buffer is (255 - data_col), and j should be initialized by 546 * copying Q into it. Then we need to do a table lookup to convert to solve 547 * data_j /= J 548 * 549 * 550 */ 551 int 552 rf_RecoveryQFunc(node) 553 RF_DagNode_t *node; 554 { 555 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; 556 RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; 557 RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p; 558 int i; 559 RF_PhysDiskAddr_t *pda; 560 RF_RaidAddr_t suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector); 561 char *srcbuf, *destbuf; 562 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 563 RF_Etimer_t timer; 564 unsigned long coeff; 565 566 RF_ETIMER_START(timer); 567 /* start by copying Q into the buffer */ 568 bcopy(node->params[node->numParams - 3].p, node->results[0], 569 rf_RaidAddressToByte(raidPtr, failedPDA->numSector)); 570 for (i = 0; i < node->numParams - 4; i += 2) { 571 RF_ASSERT(node->params[i + 1].p != node->results[0]); 572 pda = (RF_PhysDiskAddr_t *) node->params[i].p; 573 srcbuf = (char *) node->params[i + 1].p; 574 suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); 575 destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset); 576 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), pda->raidAddress); 577 /* compute the data unit offset within the column */ 578 coeff = (coeff % raidPtr->Layout.numDataCol); 579 rf_IncQ((unsigned long *) destbuf, (unsigned long *) srcbuf, rf_RaidAddressToByte(raidPtr, pda->numSector), coeff); 580 } 581 /* Do the nasty inversion now */ 582 coeff = (rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), failedPDA->startSector) % raidPtr->Layout.numDataCol); 583 rf_InvertQ(node->results[0], node->results[0], rf_RaidAddressToByte(raidPtr, pda->numSector), coeff); 584 RF_ETIMER_STOP(timer); 585 RF_ETIMER_EVAL(timer); 586 tracerec->q_us += RF_ETIMER_VAL_US(timer); 587 rf_GenericWakeupFunc(node, 0); 588 return (0); 589 } 590 591 int 592 rf_RecoveryPQFunc(node) 593 RF_DagNode_t *node; 594 { 595 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; 596 printf("raid%d: Recovery from PQ not implemented.\n",raidPtr->raidid); 597 return (1); 598 } 599 /* 600 Degraded write Q subroutine. 601 Used when P is dead. 602 Large-write style Q computation. 603 Parameters 604 605 (pda,buf),(pda,buf),.....,(failedPDA,bufPtr),failedPDA,raidPtr. 606 607 We ignore failedPDA. 608 609 This is a "simple style" recovery func. 610 */ 611 612 void 613 rf_PQ_DegradedWriteQFunc(node) 614 RF_DagNode_t *node; 615 { 616 int np = node->numParams; 617 int d; 618 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p; 619 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit; 620 int i; 621 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 622 RF_Etimer_t timer; 623 char *qbuf = node->results[0]; 624 char *obuf, *qpbuf; 625 RF_PhysDiskAddr_t *old; 626 unsigned long coeff; 627 int fail_start, j; 628 629 old = (RF_PhysDiskAddr_t *) node->params[np - 2].p; 630 fail_start = old->startSector % secPerSU; 631 632 RF_ETIMER_START(timer); 633 634 d = (np - 2) / 2; 635 RF_ASSERT(2 * d + 2 == np); 636 637 for (i = 0; i < d; i++) { 638 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p; 639 obuf = (char *) node->params[2 * i + 1].p; 640 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress); 641 /* compute the data unit offset within the column, then add 642 * one */ 643 coeff = (coeff % raidPtr->Layout.numDataCol); 644 j = old->startSector % secPerSU; 645 RF_ASSERT(j >= fail_start); 646 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start); 647 rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff); 648 } 649 650 RF_ETIMER_STOP(timer); 651 RF_ETIMER_EVAL(timer); 652 tracerec->q_us += RF_ETIMER_VAL_US(timer); 653 rf_GenericWakeupFunc(node, 0); 654 } 655 656 657 658 659 /* Q computations */ 660 661 /* 662 coeff - colummn; 663 664 compute dest ^= qfor[28-coeff][rn[coeff+1] a] 665 666 on 5-bit basis; 667 length in bytes; 668 */ 669 670 void 671 rf_IncQ(dest, buf, length, coeff) 672 unsigned long *dest; 673 unsigned long *buf; 674 unsigned length; 675 unsigned coeff; 676 { 677 unsigned long a, d, new; 678 unsigned long a1, a2; 679 unsigned int *q = &(rf_qfor[28 - coeff][0]); 680 unsigned r = rf_rn[coeff + 1]; 681 682 #define EXTRACT(a,i) ((a >> (5L*i)) & 0x1f) 683 #define INSERT(a,i) (a << (5L*i)) 684 685 length /= 8; 686 /* 13 5 bit quants in a 64 bit word */ 687 while (length) { 688 a = *buf++; 689 d = *dest; 690 a1 = EXTRACT(a, 0) ^ r; 691 a2 = EXTRACT(a, 1) ^ r; 692 new = INSERT(a2, 1) | a1; 693 a1 = EXTRACT(a, 2) ^ r; 694 a2 = EXTRACT(a, 3) ^ r; 695 a1 = q[a1]; 696 a2 = q[a2]; 697 new = new | INSERT(a1, 2) | INSERT(a2, 3); 698 a1 = EXTRACT(a, 4) ^ r; 699 a2 = EXTRACT(a, 5) ^ r; 700 a1 = q[a1]; 701 a2 = q[a2]; 702 new = new | INSERT(a1, 4) | INSERT(a2, 5); 703 a1 = EXTRACT(a, 5) ^ r; 704 a2 = EXTRACT(a, 6) ^ r; 705 a1 = q[a1]; 706 a2 = q[a2]; 707 new = new | INSERT(a1, 5) | INSERT(a2, 6); 708 #if RF_LONGSHIFT > 2 709 a1 = EXTRACT(a, 7) ^ r; 710 a2 = EXTRACT(a, 8) ^ r; 711 a1 = q[a1]; 712 a2 = q[a2]; 713 new = new | INSERT(a1, 7) | INSERT(a2, 8); 714 a1 = EXTRACT(a, 9) ^ r; 715 a2 = EXTRACT(a, 10) ^ r; 716 a1 = q[a1]; 717 a2 = q[a2]; 718 new = new | INSERT(a1, 9) | INSERT(a2, 10); 719 a1 = EXTRACT(a, 11) ^ r; 720 a2 = EXTRACT(a, 12) ^ r; 721 a1 = q[a1]; 722 a2 = q[a2]; 723 new = new | INSERT(a1, 11) | INSERT(a2, 12); 724 #endif /* RF_LONGSHIFT > 2 */ 725 d ^= new; 726 *dest++ = d; 727 length--; 728 } 729 } 730 /* 731 compute 732 733 dest ^= rf_qfor[28-coeff][rf_rn[coeff+1] (old^new) ] 734 735 on a five bit basis. 736 optimization: compute old ^ new on 64 bit basis. 737 738 length in bytes. 739 */ 740 741 static void 742 QDelta( 743 char *dest, 744 char *obuf, 745 char *nbuf, 746 unsigned length, 747 unsigned char coeff) 748 { 749 unsigned long a, d, new; 750 unsigned long a1, a2; 751 unsigned int *q = &(rf_qfor[28 - coeff][0]); 752 unsigned int r = rf_rn[coeff + 1]; 753 754 r = a1 = a2 = new = d = a = 0; /* XXX for now... */ 755 q = NULL; /* XXX for now */ 756 757 #ifdef _KERNEL 758 /* PQ in kernel currently not supported because the encoding/decoding 759 * table is not present */ 760 memset(dest, 0, length); 761 #else /* KERNEL */ 762 /* this code probably doesn't work and should be rewritten -wvcii */ 763 /* 13 5 bit quants in a 64 bit word */ 764 length /= 8; 765 while (length) { 766 a = *obuf++; /* XXX need to reorg to avoid cache conflicts */ 767 a ^= *nbuf++; 768 d = *dest; 769 a1 = EXTRACT(a, 0) ^ r; 770 a2 = EXTRACT(a, 1) ^ r; 771 a1 = q[a1]; 772 a2 = q[a2]; 773 new = INSERT(a2, 1) | a1; 774 a1 = EXTRACT(a, 2) ^ r; 775 a2 = EXTRACT(a, 3) ^ r; 776 a1 = q[a1]; 777 a2 = q[a2]; 778 new = new | INSERT(a1, 2) | INSERT(a2, 3); 779 a1 = EXTRACT(a, 4) ^ r; 780 a2 = EXTRACT(a, 5) ^ r; 781 a1 = q[a1]; 782 a2 = q[a2]; 783 new = new | INSERT(a1, 4) | INSERT(a2, 5); 784 a1 = EXTRACT(a, 5) ^ r; 785 a2 = EXTRACT(a, 6) ^ r; 786 a1 = q[a1]; 787 a2 = q[a2]; 788 new = new | INSERT(a1, 5) | INSERT(a2, 6); 789 #if RF_LONGSHIFT > 2 790 a1 = EXTRACT(a, 7) ^ r; 791 a2 = EXTRACT(a, 8) ^ r; 792 a1 = q[a1]; 793 a2 = q[a2]; 794 new = new | INSERT(a1, 7) | INSERT(a2, 8); 795 a1 = EXTRACT(a, 9) ^ r; 796 a2 = EXTRACT(a, 10) ^ r; 797 a1 = q[a1]; 798 a2 = q[a2]; 799 new = new | INSERT(a1, 9) | INSERT(a2, 10); 800 a1 = EXTRACT(a, 11) ^ r; 801 a2 = EXTRACT(a, 12) ^ r; 802 a1 = q[a1]; 803 a2 = q[a2]; 804 new = new | INSERT(a1, 11) | INSERT(a2, 12); 805 #endif /* RF_LONGSHIFT > 2 */ 806 d ^= new; 807 *dest++ = d; 808 length--; 809 } 810 #endif /* _KERNEL */ 811 } 812 /* 813 recover columns a and b from the given p and q into 814 bufs abuf and bbuf. All bufs are word aligned. 815 Length is in bytes. 816 */ 817 818 819 /* 820 * XXX 821 * 822 * Everything about this seems wrong. 823 */ 824 void 825 rf_PQ_recover(pbuf, qbuf, abuf, bbuf, length, coeff_a, coeff_b) 826 unsigned long *pbuf; 827 unsigned long *qbuf; 828 unsigned long *abuf; 829 unsigned long *bbuf; 830 unsigned length; 831 unsigned coeff_a; 832 unsigned coeff_b; 833 { 834 unsigned long p, q, a, a0, a1; 835 int col = (29 * coeff_a) + coeff_b; 836 unsigned char *q0 = &(rf_qinv[col][0]); 837 838 length /= 8; 839 while (length) { 840 p = *pbuf++; 841 q = *qbuf++; 842 a0 = EXTRACT(p, 0); 843 a1 = EXTRACT(q, 0); 844 a = q0[a0 << 5 | a1]; 845 #define MF(i) \ 846 a0 = EXTRACT(p,i); \ 847 a1 = EXTRACT(q,i); \ 848 a = a | INSERT(q0[a0<<5 | a1],i) 849 850 MF(1); 851 MF(2); 852 MF(3); 853 MF(4); 854 MF(5); 855 MF(6); 856 #if 0 857 MF(7); 858 MF(8); 859 MF(9); 860 MF(10); 861 MF(11); 862 MF(12); 863 #endif /* 0 */ 864 *abuf++ = a; 865 *bbuf++ = a ^ p; 866 length--; 867 } 868 } 869 /* 870 Lost parity and a data column. Recover that data column. 871 Assume col coeff is lost. Let q the contents of Q after 872 all surviving data columns have been q-xored out of it. 873 Then we have the equation 874 875 q[28-coeff][a_i ^ r_i+1] = q 876 877 but q is cyclic with period 31. 878 So q[3+coeff][q[28-coeff][a_i ^ r_{i+1}]] = 879 q[31][a_i ^ r_{i+1}] = a_i ^ r_{i+1} . 880 881 so a_i = r_{coeff+1} ^ q[3+coeff][q] 882 883 The routine is passed q buffer and the buffer 884 the data is to be recoverd into. They can be the same. 885 */ 886 887 888 889 static void 890 rf_InvertQ( 891 unsigned long *qbuf, 892 unsigned long *abuf, 893 unsigned length, 894 unsigned coeff) 895 { 896 unsigned long a, new; 897 unsigned long a1, a2; 898 unsigned int *q = &(rf_qfor[3 + coeff][0]); 899 unsigned r = rf_rn[coeff + 1]; 900 901 /* 13 5 bit quants in a 64 bit word */ 902 length /= 8; 903 while (length) { 904 a = *qbuf++; 905 a1 = EXTRACT(a, 0); 906 a2 = EXTRACT(a, 1); 907 a1 = r ^ q[a1]; 908 a2 = r ^ q[a2]; 909 new = INSERT(a2, 1) | a1; 910 #define M(i,j) \ 911 a1 = EXTRACT(a,i); \ 912 a2 = EXTRACT(a,j); \ 913 a1 = r ^ q[a1]; \ 914 a2 = r ^ q[a2]; \ 915 new = new | INSERT(a1,i) | INSERT(a2,j) 916 917 M(2, 3); 918 M(4, 5); 919 M(5, 6); 920 #if RF_LONGSHIFT > 2 921 M(7, 8); 922 M(9, 10); 923 M(11, 12); 924 #endif /* RF_LONGSHIFT > 2 */ 925 *abuf++ = new; 926 length--; 927 } 928 } 929 #endif /* (RF_INCLUDE_DECL_PQ > 0) || 930 * (RF_INCLUDE_RAID6 > 0) */ 931