1 /* $NetBSD: rf_pq.c,v 1.11 2001/11/13 07:11:16 lukem Exp $ */ 2 /* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: Daniel Stodolsky 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29 /* 30 * Code for RAID level 6 (P + Q) disk array architecture. 31 */ 32 33 #include <sys/cdefs.h> 34 __KERNEL_RCSID(0, "$NetBSD: rf_pq.c,v 1.11 2001/11/13 07:11:16 lukem Exp $"); 35 36 #include "rf_archs.h" 37 38 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD > 0) 39 40 #include <dev/raidframe/raidframevar.h> 41 42 #include "rf_raid.h" 43 #include "rf_dag.h" 44 #include "rf_dagffrd.h" 45 #include "rf_dagffwr.h" 46 #include "rf_dagdegrd.h" 47 #include "rf_dagdegwr.h" 48 #include "rf_dagutils.h" 49 #include "rf_dagfuncs.h" 50 #include "rf_etimer.h" 51 #include "rf_pqdeg.h" 52 #include "rf_general.h" 53 #include "rf_map.h" 54 #include "rf_pq.h" 55 56 RF_RedFuncs_t rf_pFuncs = {rf_RegularONPFunc, "Regular Old-New P", rf_SimpleONPFunc, "Simple Old-New P"}; 57 RF_RedFuncs_t rf_pRecoveryFuncs = {rf_RecoveryPFunc, "Recovery P Func", rf_RecoveryPFunc, "Recovery P Func"}; 58 59 int 60 rf_RegularONPFunc(node) 61 RF_DagNode_t *node; 62 { 63 return (rf_RegularXorFunc(node)); 64 } 65 /* 66 same as simpleONQ func, but the coefficient is always 1 67 */ 68 69 int 70 rf_SimpleONPFunc(node) 71 RF_DagNode_t *node; 72 { 73 return (rf_SimpleXorFunc(node)); 74 } 75 76 int 77 rf_RecoveryPFunc(node) 78 RF_DagNode_t *node; 79 { 80 return (rf_RecoveryXorFunc(node)); 81 } 82 83 int 84 rf_RegularPFunc(node) 85 RF_DagNode_t *node; 86 { 87 return (rf_RegularXorFunc(node)); 88 } 89 #endif /* (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD > 0) */ 90 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) 91 92 static void 93 QDelta(char *dest, char *obuf, char *nbuf, unsigned length, 94 unsigned char coeff); 95 static void 96 rf_InvertQ(unsigned long *qbuf, unsigned long *abuf, 97 unsigned length, unsigned coeff); 98 99 RF_RedFuncs_t rf_qFuncs = {rf_RegularONQFunc, "Regular Old-New Q", rf_SimpleONQFunc, "Simple Old-New Q"}; 100 RF_RedFuncs_t rf_qRecoveryFuncs = {rf_RecoveryQFunc, "Recovery Q Func", rf_RecoveryQFunc, "Recovery Q Func"}; 101 RF_RedFuncs_t rf_pqRecoveryFuncs = {rf_RecoveryPQFunc, "Recovery PQ Func", rf_RecoveryPQFunc, "Recovery PQ Func"}; 102 103 void 104 rf_PQDagSelect( 105 RF_Raid_t * raidPtr, 106 RF_IoType_t type, 107 RF_AccessStripeMap_t * asmap, 108 RF_VoidFuncPtr * createFunc) 109 { 110 RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); 111 unsigned ndfail = asmap->numDataFailed; 112 unsigned npfail = asmap->numParityFailed; 113 unsigned ntfail = npfail + ndfail; 114 115 RF_ASSERT(RF_IO_IS_R_OR_W(type)); 116 if (ntfail > 2) { 117 RF_ERRORMSG("more than two disks failed in a single group! Aborting I/O operation.\n"); 118 /* *infoFunc = */ *createFunc = NULL; 119 return; 120 } 121 /* ok, we can do this I/O */ 122 if (type == RF_IO_TYPE_READ) { 123 switch (ndfail) { 124 case 0: 125 /* fault free read */ 126 *createFunc = (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG; /* same as raid 5 */ 127 break; 128 case 1: 129 /* lost a single data unit */ 130 /* two cases: (1) parity is not lost. do a normal raid 131 * 5 reconstruct read. (2) parity is lost. do a 132 * reconstruct read using "q". */ 133 if (ntfail == 2) { /* also lost redundancy */ 134 if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) 135 *createFunc = (RF_VoidFuncPtr) rf_PQ_110_CreateReadDAG; 136 else 137 *createFunc = (RF_VoidFuncPtr) rf_PQ_101_CreateReadDAG; 138 } else { 139 /* P and Q are ok. But is there a failure in 140 * some unaccessed data unit? */ 141 if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2) 142 *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateReadDAG; 143 else 144 *createFunc = (RF_VoidFuncPtr) rf_PQ_100_CreateReadDAG; 145 } 146 break; 147 case 2: 148 /* lost two data units */ 149 /* *infoFunc = PQOneTwo; */ 150 *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateReadDAG; 151 break; 152 } 153 return; 154 } 155 /* a write */ 156 switch (ntfail) { 157 case 0: /* fault free */ 158 if (rf_suppressLocksAndLargeWrites || 159 (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) || 160 (asmap->parityInfo->next != NULL) || (asmap->qInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) { 161 162 *createFunc = (RF_VoidFuncPtr) rf_PQCreateSmallWriteDAG; 163 } else { 164 *createFunc = (RF_VoidFuncPtr) rf_PQCreateLargeWriteDAG; 165 } 166 break; 167 168 case 1: /* single disk fault */ 169 if (npfail == 1) { 170 RF_ASSERT((asmap->failedPDAs[0]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q)); 171 if (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q) { /* q died, treat like 172 * normal mode raid5 173 * write. */ 174 if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1)) 175 || rf_NumFailedDataUnitsInStripe(raidPtr, asmap)) 176 *createFunc = (RF_VoidFuncPtr) rf_PQ_001_CreateSmallWriteDAG; 177 else 178 *createFunc = (RF_VoidFuncPtr) rf_PQ_001_CreateLargeWriteDAG; 179 } else {/* parity died, small write only updating Q */ 180 if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1)) 181 || rf_NumFailedDataUnitsInStripe(raidPtr, asmap)) 182 *createFunc = (RF_VoidFuncPtr) rf_PQ_010_CreateSmallWriteDAG; 183 else 184 *createFunc = (RF_VoidFuncPtr) rf_PQ_010_CreateLargeWriteDAG; 185 } 186 } else { /* data missing. Do a P reconstruct write if 187 * only a single data unit is lost in the 188 * stripe, otherwise a PQ reconstruct write. */ 189 if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2) 190 *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateWriteDAG; 191 else 192 *createFunc = (RF_VoidFuncPtr) rf_PQ_100_CreateWriteDAG; 193 } 194 break; 195 196 case 2: /* two disk faults */ 197 switch (npfail) { 198 case 2: /* both p and q dead */ 199 *createFunc = (RF_VoidFuncPtr) rf_PQ_011_CreateWriteDAG; 200 break; 201 case 1: /* either p or q and dead data */ 202 RF_ASSERT(asmap->failedPDAs[0]->type == RF_PDA_TYPE_DATA); 203 RF_ASSERT((asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q)); 204 if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q) 205 *createFunc = (RF_VoidFuncPtr) rf_PQ_101_CreateWriteDAG; 206 else 207 *createFunc = (RF_VoidFuncPtr) rf_PQ_110_CreateWriteDAG; 208 break; 209 case 0: /* double data loss */ 210 *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateWriteDAG; 211 break; 212 } 213 break; 214 215 default: /* more than 2 disk faults */ 216 *createFunc = NULL; 217 RF_PANIC(); 218 } 219 return; 220 } 221 /* 222 Used as a stop gap info function 223 */ 224 #if 0 225 static void 226 PQOne(raidPtr, nSucc, nAnte, asmap) 227 RF_Raid_t *raidPtr; 228 int *nSucc; 229 int *nAnte; 230 RF_AccessStripeMap_t *asmap; 231 { 232 *nSucc = *nAnte = 1; 233 } 234 235 static void 236 PQOneTwo(raidPtr, nSucc, nAnte, asmap) 237 RF_Raid_t *raidPtr; 238 int *nSucc; 239 int *nAnte; 240 RF_AccessStripeMap_t *asmap; 241 { 242 *nSucc = 1; 243 *nAnte = 2; 244 } 245 #endif 246 247 RF_CREATE_DAG_FUNC_DECL(rf_PQCreateLargeWriteDAG) 248 { 249 rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2, 250 rf_RegularPQFunc, RF_FALSE); 251 } 252 253 int 254 rf_RegularONQFunc(node) 255 RF_DagNode_t *node; 256 { 257 int np = node->numParams; 258 int d; 259 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p; 260 int i; 261 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 262 RF_Etimer_t timer; 263 char *qbuf, *qpbuf; 264 char *obuf, *nbuf; 265 RF_PhysDiskAddr_t *old, *new; 266 unsigned long coeff; 267 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit; 268 269 RF_ETIMER_START(timer); 270 271 d = (np - 3) / 4; 272 RF_ASSERT(4 * d + 3 == np); 273 qbuf = (char *) node->params[2 * d + 1].p; /* q buffer */ 274 for (i = 0; i < d; i++) { 275 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p; 276 obuf = (char *) node->params[2 * i + 1].p; 277 new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p; 278 nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p; 279 RF_ASSERT(new->numSector == old->numSector); 280 RF_ASSERT(new->raidAddress == old->raidAddress); 281 /* the stripe unit within the stripe tells us the coefficient 282 * to use for the multiply. */ 283 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress); 284 /* compute the data unit offset within the column, then add 285 * one */ 286 coeff = (coeff % raidPtr->Layout.numDataCol); 287 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU); 288 QDelta(qpbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff); 289 } 290 291 RF_ETIMER_STOP(timer); 292 RF_ETIMER_EVAL(timer); 293 tracerec->q_us += RF_ETIMER_VAL_US(timer); 294 rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no 295 * I/O in this node */ 296 return (0); 297 } 298 /* 299 See the SimpleXORFunc for the difference between a simple and regular func. 300 These Q functions should be used for 301 302 new q = Q(data,old data,old q) 303 304 style updates and not for 305 306 q = ( new data, new data, .... ) 307 308 computations. 309 310 The simple q takes 2(2d+1)+1 params, where d is the number 311 of stripes written. The order of params is 312 old data pda_0, old data buffer_0, old data pda_1, old data buffer_1, ... old data pda_d, old data buffer_d 313 [2d] old q pda_0, old q buffer 314 [2d_2] new data pda_0, new data buffer_0, ... new data pda_d, new data buffer_d 315 raidPtr 316 */ 317 318 int 319 rf_SimpleONQFunc(node) 320 RF_DagNode_t *node; 321 { 322 int np = node->numParams; 323 int d; 324 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p; 325 int i; 326 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 327 RF_Etimer_t timer; 328 char *qbuf; 329 char *obuf, *nbuf; 330 RF_PhysDiskAddr_t *old, *new; 331 unsigned long coeff; 332 333 RF_ETIMER_START(timer); 334 335 d = (np - 3) / 4; 336 RF_ASSERT(4 * d + 3 == np); 337 qbuf = (char *) node->params[2 * d + 1].p; /* q buffer */ 338 for (i = 0; i < d; i++) { 339 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p; 340 obuf = (char *) node->params[2 * i + 1].p; 341 new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p; 342 nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p; 343 RF_ASSERT(new->numSector == old->numSector); 344 RF_ASSERT(new->raidAddress == old->raidAddress); 345 /* the stripe unit within the stripe tells us the coefficient 346 * to use for the multiply. */ 347 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress); 348 /* compute the data unit offset within the column, then add 349 * one */ 350 coeff = (coeff % raidPtr->Layout.numDataCol); 351 QDelta(qbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff); 352 } 353 354 RF_ETIMER_STOP(timer); 355 RF_ETIMER_EVAL(timer); 356 tracerec->q_us += RF_ETIMER_VAL_US(timer); 357 rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no 358 * I/O in this node */ 359 return (0); 360 } 361 RF_CREATE_DAG_FUNC_DECL(rf_PQCreateSmallWriteDAG) 362 { 363 rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pFuncs, &rf_qFuncs); 364 } 365 366 static void RegularQSubr(RF_DagNode_t *node, char *qbuf); 367 368 static void 369 RegularQSubr(node, qbuf) 370 RF_DagNode_t *node; 371 char *qbuf; 372 { 373 int np = node->numParams; 374 int d; 375 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p; 376 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit; 377 int i; 378 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 379 RF_Etimer_t timer; 380 char *obuf, *qpbuf; 381 RF_PhysDiskAddr_t *old; 382 unsigned long coeff; 383 384 RF_ETIMER_START(timer); 385 386 d = (np - 1) / 2; 387 RF_ASSERT(2 * d + 1 == np); 388 for (i = 0; i < d; i++) { 389 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p; 390 obuf = (char *) node->params[2 * i + 1].p; 391 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress); 392 /* compute the data unit offset within the column, then add 393 * one */ 394 coeff = (coeff % raidPtr->Layout.numDataCol); 395 /* the input buffers may not all be aligned with the start of 396 * the stripe. so shift by their sector offset within the 397 * stripe unit */ 398 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU); 399 rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff); 400 } 401 402 RF_ETIMER_STOP(timer); 403 RF_ETIMER_EVAL(timer); 404 tracerec->q_us += RF_ETIMER_VAL_US(timer); 405 } 406 /* 407 used in degraded writes. 408 */ 409 410 static void DegrQSubr(RF_DagNode_t *node); 411 412 static void 413 DegrQSubr(node) 414 RF_DagNode_t *node; 415 { 416 int np = node->numParams; 417 int d; 418 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p; 419 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit; 420 int i; 421 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 422 RF_Etimer_t timer; 423 char *qbuf = node->results[1]; 424 char *obuf, *qpbuf; 425 RF_PhysDiskAddr_t *old; 426 unsigned long coeff; 427 unsigned fail_start; 428 int j; 429 430 old = (RF_PhysDiskAddr_t *) node->params[np - 2].p; 431 fail_start = old->startSector % secPerSU; 432 433 RF_ETIMER_START(timer); 434 435 d = (np - 2) / 2; 436 RF_ASSERT(2 * d + 2 == np); 437 for (i = 0; i < d; i++) { 438 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p; 439 obuf = (char *) node->params[2 * i + 1].p; 440 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress); 441 /* compute the data unit offset within the column, then add 442 * one */ 443 coeff = (coeff % raidPtr->Layout.numDataCol); 444 /* the input buffers may not all be aligned with the start of 445 * the stripe. so shift by their sector offset within the 446 * stripe unit */ 447 j = old->startSector % secPerSU; 448 RF_ASSERT(j >= fail_start); 449 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start); 450 rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff); 451 } 452 453 RF_ETIMER_STOP(timer); 454 RF_ETIMER_EVAL(timer); 455 tracerec->q_us += RF_ETIMER_VAL_US(timer); 456 } 457 /* 458 Called by large write code to compute the new parity and the new q. 459 460 structure of the params: 461 462 pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d ( d = numDataCol 463 raidPtr 464 465 for a total of 2d+1 arguments. 466 The result buffers results[0], results[1] are the buffers for the p and q, 467 respectively. 468 469 We compute Q first, then compute P. The P calculation may try to reuse 470 one of the input buffers for its output, so if we computed P first, we would 471 corrupt the input for the q calculation. 472 */ 473 474 int 475 rf_RegularPQFunc(node) 476 RF_DagNode_t *node; 477 { 478 RegularQSubr(node, node->results[1]); 479 return (rf_RegularXorFunc(node)); /* does the wakeup */ 480 } 481 482 int 483 rf_RegularQFunc(node) 484 RF_DagNode_t *node; 485 { 486 /* Almost ... adjust Qsubr args */ 487 RegularQSubr(node, node->results[0]); 488 rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no 489 * I/O in this node */ 490 return (0); 491 } 492 /* 493 Called by singly degraded write code to compute the new parity and the new q. 494 495 structure of the params: 496 497 pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d 498 failedPDA raidPtr 499 500 for a total of 2d+2 arguments. 501 The result buffers results[0], results[1] are the buffers for the parity and q, 502 respectively. 503 504 We compute Q first, then compute parity. The parity calculation may try to reuse 505 one of the input buffers for its output, so if we computed parity first, we would 506 corrupt the input for the q calculation. 507 508 We treat this identically to the regularPQ case, ignoring the failedPDA extra argument. 509 */ 510 511 void 512 rf_Degraded_100_PQFunc(node) 513 RF_DagNode_t *node; 514 { 515 int np = node->numParams; 516 517 RF_ASSERT(np >= 2); 518 DegrQSubr(node); 519 rf_RecoveryXorFunc(node); 520 } 521 522 523 /* 524 The two below are used when reading a stripe with a single lost data unit. 525 The parameters are 526 527 pda_0, buffer_0, .... pda_n, buffer_n, P pda, P buffer, failedPDA, raidPtr 528 529 and results[0] contains the data buffer. Which is originally zero-filled. 530 531 */ 532 533 /* this Q func is used by the degraded-mode dag functions to recover lost data. 534 * the second-to-last parameter is the PDA for the failed portion of the access. 535 * the code here looks at this PDA and assumes that the xor target buffer is 536 * equal in size to the number of sectors in the failed PDA. It then uses 537 * the other PDAs in the parameter list to determine where within the target 538 * buffer the corresponding data should be xored. 539 * 540 * Recall the basic equation is 541 * 542 * Q = ( data_1 + 2 * data_2 ... + k * data_k ) mod 256 543 * 544 * so to recover data_j we need 545 * 546 * J data_j = (Q - data_1 - 2 data_2 ....- k* data_k) mod 256 547 * 548 * So the coefficient for each buffer is (255 - data_col), and j should be initialized by 549 * copying Q into it. Then we need to do a table lookup to convert to solve 550 * data_j /= J 551 * 552 * 553 */ 554 int 555 rf_RecoveryQFunc(node) 556 RF_DagNode_t *node; 557 { 558 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; 559 RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; 560 RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p; 561 int i; 562 RF_PhysDiskAddr_t *pda; 563 RF_RaidAddr_t suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector); 564 char *srcbuf, *destbuf; 565 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 566 RF_Etimer_t timer; 567 unsigned long coeff; 568 569 RF_ETIMER_START(timer); 570 /* start by copying Q into the buffer */ 571 bcopy(node->params[node->numParams - 3].p, node->results[0], 572 rf_RaidAddressToByte(raidPtr, failedPDA->numSector)); 573 for (i = 0; i < node->numParams - 4; i += 2) { 574 RF_ASSERT(node->params[i + 1].p != node->results[0]); 575 pda = (RF_PhysDiskAddr_t *) node->params[i].p; 576 srcbuf = (char *) node->params[i + 1].p; 577 suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); 578 destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset); 579 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), pda->raidAddress); 580 /* compute the data unit offset within the column */ 581 coeff = (coeff % raidPtr->Layout.numDataCol); 582 rf_IncQ((unsigned long *) destbuf, (unsigned long *) srcbuf, rf_RaidAddressToByte(raidPtr, pda->numSector), coeff); 583 } 584 /* Do the nasty inversion now */ 585 coeff = (rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), failedPDA->startSector) % raidPtr->Layout.numDataCol); 586 rf_InvertQ(node->results[0], node->results[0], rf_RaidAddressToByte(raidPtr, pda->numSector), coeff); 587 RF_ETIMER_STOP(timer); 588 RF_ETIMER_EVAL(timer); 589 tracerec->q_us += RF_ETIMER_VAL_US(timer); 590 rf_GenericWakeupFunc(node, 0); 591 return (0); 592 } 593 594 int 595 rf_RecoveryPQFunc(node) 596 RF_DagNode_t *node; 597 { 598 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; 599 printf("raid%d: Recovery from PQ not implemented.\n",raidPtr->raidid); 600 return (1); 601 } 602 /* 603 Degraded write Q subroutine. 604 Used when P is dead. 605 Large-write style Q computation. 606 Parameters 607 608 (pda,buf),(pda,buf),.....,(failedPDA,bufPtr),failedPDA,raidPtr. 609 610 We ignore failedPDA. 611 612 This is a "simple style" recovery func. 613 */ 614 615 void 616 rf_PQ_DegradedWriteQFunc(node) 617 RF_DagNode_t *node; 618 { 619 int np = node->numParams; 620 int d; 621 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p; 622 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit; 623 int i; 624 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 625 RF_Etimer_t timer; 626 char *qbuf = node->results[0]; 627 char *obuf, *qpbuf; 628 RF_PhysDiskAddr_t *old; 629 unsigned long coeff; 630 int fail_start, j; 631 632 old = (RF_PhysDiskAddr_t *) node->params[np - 2].p; 633 fail_start = old->startSector % secPerSU; 634 635 RF_ETIMER_START(timer); 636 637 d = (np - 2) / 2; 638 RF_ASSERT(2 * d + 2 == np); 639 640 for (i = 0; i < d; i++) { 641 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p; 642 obuf = (char *) node->params[2 * i + 1].p; 643 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress); 644 /* compute the data unit offset within the column, then add 645 * one */ 646 coeff = (coeff % raidPtr->Layout.numDataCol); 647 j = old->startSector % secPerSU; 648 RF_ASSERT(j >= fail_start); 649 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start); 650 rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff); 651 } 652 653 RF_ETIMER_STOP(timer); 654 RF_ETIMER_EVAL(timer); 655 tracerec->q_us += RF_ETIMER_VAL_US(timer); 656 rf_GenericWakeupFunc(node, 0); 657 } 658 659 660 661 662 /* Q computations */ 663 664 /* 665 coeff - colummn; 666 667 compute dest ^= qfor[28-coeff][rn[coeff+1] a] 668 669 on 5-bit basis; 670 length in bytes; 671 */ 672 673 void 674 rf_IncQ(dest, buf, length, coeff) 675 unsigned long *dest; 676 unsigned long *buf; 677 unsigned length; 678 unsigned coeff; 679 { 680 unsigned long a, d, new; 681 unsigned long a1, a2; 682 unsigned int *q = &(rf_qfor[28 - coeff][0]); 683 unsigned r = rf_rn[coeff + 1]; 684 685 #define EXTRACT(a,i) ((a >> (5L*i)) & 0x1f) 686 #define INSERT(a,i) (a << (5L*i)) 687 688 length /= 8; 689 /* 13 5 bit quants in a 64 bit word */ 690 while (length) { 691 a = *buf++; 692 d = *dest; 693 a1 = EXTRACT(a, 0) ^ r; 694 a2 = EXTRACT(a, 1) ^ r; 695 new = INSERT(a2, 1) | a1; 696 a1 = EXTRACT(a, 2) ^ r; 697 a2 = EXTRACT(a, 3) ^ r; 698 a1 = q[a1]; 699 a2 = q[a2]; 700 new = new | INSERT(a1, 2) | INSERT(a2, 3); 701 a1 = EXTRACT(a, 4) ^ r; 702 a2 = EXTRACT(a, 5) ^ r; 703 a1 = q[a1]; 704 a2 = q[a2]; 705 new = new | INSERT(a1, 4) | INSERT(a2, 5); 706 a1 = EXTRACT(a, 5) ^ r; 707 a2 = EXTRACT(a, 6) ^ r; 708 a1 = q[a1]; 709 a2 = q[a2]; 710 new = new | INSERT(a1, 5) | INSERT(a2, 6); 711 #if RF_LONGSHIFT > 2 712 a1 = EXTRACT(a, 7) ^ r; 713 a2 = EXTRACT(a, 8) ^ r; 714 a1 = q[a1]; 715 a2 = q[a2]; 716 new = new | INSERT(a1, 7) | INSERT(a2, 8); 717 a1 = EXTRACT(a, 9) ^ r; 718 a2 = EXTRACT(a, 10) ^ r; 719 a1 = q[a1]; 720 a2 = q[a2]; 721 new = new | INSERT(a1, 9) | INSERT(a2, 10); 722 a1 = EXTRACT(a, 11) ^ r; 723 a2 = EXTRACT(a, 12) ^ r; 724 a1 = q[a1]; 725 a2 = q[a2]; 726 new = new | INSERT(a1, 11) | INSERT(a2, 12); 727 #endif /* RF_LONGSHIFT > 2 */ 728 d ^= new; 729 *dest++ = d; 730 length--; 731 } 732 } 733 /* 734 compute 735 736 dest ^= rf_qfor[28-coeff][rf_rn[coeff+1] (old^new) ] 737 738 on a five bit basis. 739 optimization: compute old ^ new on 64 bit basis. 740 741 length in bytes. 742 */ 743 744 static void 745 QDelta( 746 char *dest, 747 char *obuf, 748 char *nbuf, 749 unsigned length, 750 unsigned char coeff) 751 { 752 unsigned long a, d, new; 753 unsigned long a1, a2; 754 unsigned int *q = &(rf_qfor[28 - coeff][0]); 755 unsigned int r = rf_rn[coeff + 1]; 756 757 r = a1 = a2 = new = d = a = 0; /* XXX for now... */ 758 q = NULL; /* XXX for now */ 759 760 #ifdef _KERNEL 761 /* PQ in kernel currently not supported because the encoding/decoding 762 * table is not present */ 763 memset(dest, 0, length); 764 #else /* KERNEL */ 765 /* this code probably doesn't work and should be rewritten -wvcii */ 766 /* 13 5 bit quants in a 64 bit word */ 767 length /= 8; 768 while (length) { 769 a = *obuf++; /* XXX need to reorg to avoid cache conflicts */ 770 a ^= *nbuf++; 771 d = *dest; 772 a1 = EXTRACT(a, 0) ^ r; 773 a2 = EXTRACT(a, 1) ^ r; 774 a1 = q[a1]; 775 a2 = q[a2]; 776 new = INSERT(a2, 1) | a1; 777 a1 = EXTRACT(a, 2) ^ r; 778 a2 = EXTRACT(a, 3) ^ r; 779 a1 = q[a1]; 780 a2 = q[a2]; 781 new = new | INSERT(a1, 2) | INSERT(a2, 3); 782 a1 = EXTRACT(a, 4) ^ r; 783 a2 = EXTRACT(a, 5) ^ r; 784 a1 = q[a1]; 785 a2 = q[a2]; 786 new = new | INSERT(a1, 4) | INSERT(a2, 5); 787 a1 = EXTRACT(a, 5) ^ r; 788 a2 = EXTRACT(a, 6) ^ r; 789 a1 = q[a1]; 790 a2 = q[a2]; 791 new = new | INSERT(a1, 5) | INSERT(a2, 6); 792 #if RF_LONGSHIFT > 2 793 a1 = EXTRACT(a, 7) ^ r; 794 a2 = EXTRACT(a, 8) ^ r; 795 a1 = q[a1]; 796 a2 = q[a2]; 797 new = new | INSERT(a1, 7) | INSERT(a2, 8); 798 a1 = EXTRACT(a, 9) ^ r; 799 a2 = EXTRACT(a, 10) ^ r; 800 a1 = q[a1]; 801 a2 = q[a2]; 802 new = new | INSERT(a1, 9) | INSERT(a2, 10); 803 a1 = EXTRACT(a, 11) ^ r; 804 a2 = EXTRACT(a, 12) ^ r; 805 a1 = q[a1]; 806 a2 = q[a2]; 807 new = new | INSERT(a1, 11) | INSERT(a2, 12); 808 #endif /* RF_LONGSHIFT > 2 */ 809 d ^= new; 810 *dest++ = d; 811 length--; 812 } 813 #endif /* _KERNEL */ 814 } 815 /* 816 recover columns a and b from the given p and q into 817 bufs abuf and bbuf. All bufs are word aligned. 818 Length is in bytes. 819 */ 820 821 822 /* 823 * XXX 824 * 825 * Everything about this seems wrong. 826 */ 827 void 828 rf_PQ_recover(pbuf, qbuf, abuf, bbuf, length, coeff_a, coeff_b) 829 unsigned long *pbuf; 830 unsigned long *qbuf; 831 unsigned long *abuf; 832 unsigned long *bbuf; 833 unsigned length; 834 unsigned coeff_a; 835 unsigned coeff_b; 836 { 837 unsigned long p, q, a, a0, a1; 838 int col = (29 * coeff_a) + coeff_b; 839 unsigned char *q0 = &(rf_qinv[col][0]); 840 841 length /= 8; 842 while (length) { 843 p = *pbuf++; 844 q = *qbuf++; 845 a0 = EXTRACT(p, 0); 846 a1 = EXTRACT(q, 0); 847 a = q0[a0 << 5 | a1]; 848 #define MF(i) \ 849 a0 = EXTRACT(p,i); \ 850 a1 = EXTRACT(q,i); \ 851 a = a | INSERT(q0[a0<<5 | a1],i) 852 853 MF(1); 854 MF(2); 855 MF(3); 856 MF(4); 857 MF(5); 858 MF(6); 859 #if 0 860 MF(7); 861 MF(8); 862 MF(9); 863 MF(10); 864 MF(11); 865 MF(12); 866 #endif /* 0 */ 867 *abuf++ = a; 868 *bbuf++ = a ^ p; 869 length--; 870 } 871 } 872 /* 873 Lost parity and a data column. Recover that data column. 874 Assume col coeff is lost. Let q the contents of Q after 875 all surviving data columns have been q-xored out of it. 876 Then we have the equation 877 878 q[28-coeff][a_i ^ r_i+1] = q 879 880 but q is cyclic with period 31. 881 So q[3+coeff][q[28-coeff][a_i ^ r_{i+1}]] = 882 q[31][a_i ^ r_{i+1}] = a_i ^ r_{i+1} . 883 884 so a_i = r_{coeff+1} ^ q[3+coeff][q] 885 886 The routine is passed q buffer and the buffer 887 the data is to be recoverd into. They can be the same. 888 */ 889 890 891 892 static void 893 rf_InvertQ( 894 unsigned long *qbuf, 895 unsigned long *abuf, 896 unsigned length, 897 unsigned coeff) 898 { 899 unsigned long a, new; 900 unsigned long a1, a2; 901 unsigned int *q = &(rf_qfor[3 + coeff][0]); 902 unsigned r = rf_rn[coeff + 1]; 903 904 /* 13 5 bit quants in a 64 bit word */ 905 length /= 8; 906 while (length) { 907 a = *qbuf++; 908 a1 = EXTRACT(a, 0); 909 a2 = EXTRACT(a, 1); 910 a1 = r ^ q[a1]; 911 a2 = r ^ q[a2]; 912 new = INSERT(a2, 1) | a1; 913 #define M(i,j) \ 914 a1 = EXTRACT(a,i); \ 915 a2 = EXTRACT(a,j); \ 916 a1 = r ^ q[a1]; \ 917 a2 = r ^ q[a2]; \ 918 new = new | INSERT(a1,i) | INSERT(a2,j) 919 920 M(2, 3); 921 M(4, 5); 922 M(5, 6); 923 #if RF_LONGSHIFT > 2 924 M(7, 8); 925 M(9, 10); 926 M(11, 12); 927 #endif /* RF_LONGSHIFT > 2 */ 928 *abuf++ = new; 929 length--; 930 } 931 } 932 #endif /* (RF_INCLUDE_DECL_PQ > 0) || 933 * (RF_INCLUDE_RAID6 > 0) */ 934