1 /* $NetBSD: rf_pq.c,v 1.16 2009/03/14 15:36:20 dsl Exp $ */ 2 /* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: Daniel Stodolsky 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29 /* 30 * Code for RAID level 6 (P + Q) disk array architecture. 31 */ 32 33 #include <sys/cdefs.h> 34 __KERNEL_RCSID(0, "$NetBSD: rf_pq.c,v 1.16 2009/03/14 15:36:20 dsl Exp $"); 35 36 #include "rf_archs.h" 37 38 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD > 0) 39 40 #include <dev/raidframe/raidframevar.h> 41 42 #include "rf_raid.h" 43 #include "rf_dag.h" 44 #include "rf_dagffrd.h" 45 #include "rf_dagffwr.h" 46 #include "rf_dagdegrd.h" 47 #include "rf_dagdegwr.h" 48 #include "rf_dagutils.h" 49 #include "rf_dagfuncs.h" 50 #include "rf_etimer.h" 51 #include "rf_pqdeg.h" 52 #include "rf_general.h" 53 #include "rf_map.h" 54 #include "rf_pq.h" 55 56 RF_RedFuncs_t rf_pFuncs = {rf_RegularONPFunc, "Regular Old-New P", rf_SimpleONPFunc, "Simple Old-New P"}; 57 RF_RedFuncs_t rf_pRecoveryFuncs = {rf_RecoveryPFunc, "Recovery P Func", rf_RecoveryPFunc, "Recovery P Func"}; 58 59 int 60 rf_RegularONPFunc(RF_DagNode_t *node) 61 { 62 return (rf_RegularXorFunc(node)); 63 } 64 /* 65 same as simpleONQ func, but the coefficient is always 1 66 */ 67 68 int 69 rf_SimpleONPFunc(RF_DagNode_t *node) 70 { 71 return (rf_SimpleXorFunc(node)); 72 } 73 74 int 75 rf_RecoveryPFunc(RF_DagNode_t *node) 76 { 77 return (rf_RecoveryXorFunc(node)); 78 } 79 80 int 81 rf_RegularPFunc(RF_DagNode_t *node) 82 { 83 return (rf_RegularXorFunc(node)); 84 } 85 #endif /* (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD > 0) */ 86 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) 87 88 static void 89 QDelta(char *dest, char *obuf, char *nbuf, unsigned length, 90 unsigned char coeff); 91 static void 92 rf_InvertQ(unsigned long *qbuf, unsigned long *abuf, 93 unsigned length, unsigned coeff); 94 95 RF_RedFuncs_t rf_qFuncs = {rf_RegularONQFunc, "Regular Old-New Q", rf_SimpleONQFunc, "Simple Old-New Q"}; 96 RF_RedFuncs_t rf_qRecoveryFuncs = {rf_RecoveryQFunc, "Recovery Q Func", rf_RecoveryQFunc, "Recovery Q Func"}; 97 RF_RedFuncs_t rf_pqRecoveryFuncs = {rf_RecoveryPQFunc, "Recovery PQ Func", rf_RecoveryPQFunc, "Recovery PQ Func"}; 98 99 void 100 rf_PQDagSelect( 101 RF_Raid_t * raidPtr, 102 RF_IoType_t type, 103 RF_AccessStripeMap_t * asmap, 104 RF_VoidFuncPtr * createFunc) 105 { 106 RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); 107 unsigned ndfail = asmap->numDataFailed; 108 unsigned npfail = asmap->numParityFailed; 109 unsigned ntfail = npfail + ndfail; 110 111 RF_ASSERT(RF_IO_IS_R_OR_W(type)); 112 if (ntfail > 2) { 113 RF_ERRORMSG("more than two disks failed in a single group! Aborting I/O operation.\n"); 114 *createFunc = NULL; 115 return; 116 } 117 /* ok, we can do this I/O */ 118 if (type == RF_IO_TYPE_READ) { 119 switch (ndfail) { 120 case 0: 121 /* fault free read */ 122 *createFunc = (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG; /* same as raid 5 */ 123 break; 124 case 1: 125 /* lost a single data unit */ 126 /* two cases: (1) parity is not lost. do a normal raid 127 * 5 reconstruct read. (2) parity is lost. do a 128 * reconstruct read using "q". */ 129 if (ntfail == 2) { /* also lost redundancy */ 130 if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) 131 *createFunc = (RF_VoidFuncPtr) rf_PQ_110_CreateReadDAG; 132 else 133 *createFunc = (RF_VoidFuncPtr) rf_PQ_101_CreateReadDAG; 134 } else { 135 /* P and Q are ok. But is there a failure in 136 * some unaccessed data unit? */ 137 if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2) 138 *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateReadDAG; 139 else 140 *createFunc = (RF_VoidFuncPtr) rf_PQ_100_CreateReadDAG; 141 } 142 break; 143 case 2: 144 /* lost two data units */ 145 *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateReadDAG; 146 break; 147 } 148 return; 149 } 150 /* a write */ 151 switch (ntfail) { 152 case 0: /* fault free */ 153 if (rf_suppressLocksAndLargeWrites || 154 (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) || 155 (asmap->parityInfo->next != NULL) || (asmap->qInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) { 156 157 *createFunc = (RF_VoidFuncPtr) rf_PQCreateSmallWriteDAG; 158 } else { 159 *createFunc = (RF_VoidFuncPtr) rf_PQCreateLargeWriteDAG; 160 } 161 break; 162 163 case 1: /* single disk fault */ 164 if (npfail == 1) { 165 RF_ASSERT((asmap->failedPDAs[0]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q)); 166 if (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q) { /* q died, treat like 167 * normal mode raid5 168 * write. */ 169 if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1)) 170 || rf_NumFailedDataUnitsInStripe(raidPtr, asmap)) 171 *createFunc = (RF_VoidFuncPtr) rf_PQ_001_CreateSmallWriteDAG; 172 else 173 *createFunc = (RF_VoidFuncPtr) rf_PQ_001_CreateLargeWriteDAG; 174 } else {/* parity died, small write only updating Q */ 175 if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1)) 176 || rf_NumFailedDataUnitsInStripe(raidPtr, asmap)) 177 *createFunc = (RF_VoidFuncPtr) rf_PQ_010_CreateSmallWriteDAG; 178 else 179 *createFunc = (RF_VoidFuncPtr) rf_PQ_010_CreateLargeWriteDAG; 180 } 181 } else { /* data missing. Do a P reconstruct write if 182 * only a single data unit is lost in the 183 * stripe, otherwise a PQ reconstruct write. */ 184 if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2) 185 *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateWriteDAG; 186 else 187 *createFunc = (RF_VoidFuncPtr) rf_PQ_100_CreateWriteDAG; 188 } 189 break; 190 191 case 2: /* two disk faults */ 192 switch (npfail) { 193 case 2: /* both p and q dead */ 194 *createFunc = (RF_VoidFuncPtr) rf_PQ_011_CreateWriteDAG; 195 break; 196 case 1: /* either p or q and dead data */ 197 RF_ASSERT(asmap->failedPDAs[0]->type == RF_PDA_TYPE_DATA); 198 RF_ASSERT((asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q)); 199 if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q) 200 *createFunc = (RF_VoidFuncPtr) rf_PQ_101_CreateWriteDAG; 201 else 202 *createFunc = (RF_VoidFuncPtr) rf_PQ_110_CreateWriteDAG; 203 break; 204 case 0: /* double data loss */ 205 *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateWriteDAG; 206 break; 207 } 208 break; 209 210 default: /* more than 2 disk faults */ 211 *createFunc = NULL; 212 RF_PANIC(); 213 } 214 return; 215 } 216 /* 217 Used as a stop gap info function 218 */ 219 #if 0 220 static void 221 PQOne(RF_Raid_t *raidPtr, int *nSucc, int *nAnte, RF_AccessStripeMap_t *asmap) 222 { 223 *nSucc = *nAnte = 1; 224 } 225 226 static void 227 PQOneTwo(RF_Raid_t *raidPtr, int *nSucc, int *nAnte, RF_AccessStripeMap_t *asmap) 228 { 229 *nSucc = 1; 230 *nAnte = 2; 231 } 232 #endif 233 234 RF_CREATE_DAG_FUNC_DECL(rf_PQCreateLargeWriteDAG) 235 { 236 rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2, 237 rf_RegularPQFunc, RF_FALSE); 238 } 239 240 int 241 rf_RegularONQFunc(RF_DagNode_t *node) 242 { 243 int np = node->numParams; 244 int d; 245 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p; 246 int i; 247 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 248 RF_Etimer_t timer; 249 char *qbuf, *qpbuf; 250 char *obuf, *nbuf; 251 RF_PhysDiskAddr_t *old, *new; 252 unsigned long coeff; 253 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit; 254 255 RF_ETIMER_START(timer); 256 257 d = (np - 3) / 4; 258 RF_ASSERT(4 * d + 3 == np); 259 qbuf = (char *) node->params[2 * d + 1].p; /* q buffer */ 260 for (i = 0; i < d; i++) { 261 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p; 262 obuf = (char *) node->params[2 * i + 1].p; 263 new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p; 264 nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p; 265 RF_ASSERT(new->numSector == old->numSector); 266 RF_ASSERT(new->raidAddress == old->raidAddress); 267 /* the stripe unit within the stripe tells us the coefficient 268 * to use for the multiply. */ 269 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress); 270 /* compute the data unit offset within the column, then add 271 * one */ 272 coeff = (coeff % raidPtr->Layout.numDataCol); 273 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU); 274 QDelta(qpbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff); 275 } 276 277 RF_ETIMER_STOP(timer); 278 RF_ETIMER_EVAL(timer); 279 tracerec->q_us += RF_ETIMER_VAL_US(timer); 280 rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no 281 * I/O in this node */ 282 return (0); 283 } 284 /* 285 See the SimpleXORFunc for the difference between a simple and regular func. 286 These Q functions should be used for 287 288 new q = Q(data,old data,old q) 289 290 style updates and not for 291 292 q = ( new data, new data, .... ) 293 294 computations. 295 296 The simple q takes 2(2d+1)+1 params, where d is the number 297 of stripes written. The order of params is 298 old data pda_0, old data buffer_0, old data pda_1, old data buffer_1, ... old data pda_d, old data buffer_d 299 [2d] old q pda_0, old q buffer 300 [2d_2] new data pda_0, new data buffer_0, ... new data pda_d, new data buffer_d 301 raidPtr 302 */ 303 304 int 305 rf_SimpleONQFunc(RF_DagNode_t *node) 306 { 307 int np = node->numParams; 308 int d; 309 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p; 310 int i; 311 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 312 RF_Etimer_t timer; 313 char *qbuf; 314 char *obuf, *nbuf; 315 RF_PhysDiskAddr_t *old, *new; 316 unsigned long coeff; 317 318 RF_ETIMER_START(timer); 319 320 d = (np - 3) / 4; 321 RF_ASSERT(4 * d + 3 == np); 322 qbuf = (char *) node->params[2 * d + 1].p; /* q buffer */ 323 for (i = 0; i < d; i++) { 324 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p; 325 obuf = (char *) node->params[2 * i + 1].p; 326 new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p; 327 nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p; 328 RF_ASSERT(new->numSector == old->numSector); 329 RF_ASSERT(new->raidAddress == old->raidAddress); 330 /* the stripe unit within the stripe tells us the coefficient 331 * to use for the multiply. */ 332 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress); 333 /* compute the data unit offset within the column, then add 334 * one */ 335 coeff = (coeff % raidPtr->Layout.numDataCol); 336 QDelta(qbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff); 337 } 338 339 RF_ETIMER_STOP(timer); 340 RF_ETIMER_EVAL(timer); 341 tracerec->q_us += RF_ETIMER_VAL_US(timer); 342 rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no 343 * I/O in this node */ 344 return (0); 345 } 346 RF_CREATE_DAG_FUNC_DECL(rf_PQCreateSmallWriteDAG) 347 { 348 rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pFuncs, &rf_qFuncs); 349 } 350 351 static void RegularQSubr(RF_DagNode_t *node, char *qbuf); 352 353 static void 354 RegularQSubr(RF_DagNode_t *node, char *qbuf) 355 { 356 int np = node->numParams; 357 int d; 358 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p; 359 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit; 360 int i; 361 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 362 RF_Etimer_t timer; 363 char *obuf, *qpbuf; 364 RF_PhysDiskAddr_t *old; 365 unsigned long coeff; 366 367 RF_ETIMER_START(timer); 368 369 d = (np - 1) / 2; 370 RF_ASSERT(2 * d + 1 == np); 371 for (i = 0; i < d; i++) { 372 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p; 373 obuf = (char *) node->params[2 * i + 1].p; 374 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress); 375 /* compute the data unit offset within the column, then add 376 * one */ 377 coeff = (coeff % raidPtr->Layout.numDataCol); 378 /* the input buffers may not all be aligned with the start of 379 * the stripe. so shift by their sector offset within the 380 * stripe unit */ 381 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU); 382 rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff); 383 } 384 385 RF_ETIMER_STOP(timer); 386 RF_ETIMER_EVAL(timer); 387 tracerec->q_us += RF_ETIMER_VAL_US(timer); 388 } 389 /* 390 used in degraded writes. 391 */ 392 393 static void DegrQSubr(RF_DagNode_t *node); 394 395 static void 396 DegrQSubr(RF_DagNode_t *node) 397 { 398 int np = node->numParams; 399 int d; 400 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p; 401 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit; 402 int i; 403 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 404 RF_Etimer_t timer; 405 char *qbuf = node->results[1]; 406 char *obuf, *qpbuf; 407 RF_PhysDiskAddr_t *old; 408 unsigned long coeff; 409 unsigned fail_start; 410 int j; 411 412 old = (RF_PhysDiskAddr_t *) node->params[np - 2].p; 413 fail_start = old->startSector % secPerSU; 414 415 RF_ETIMER_START(timer); 416 417 d = (np - 2) / 2; 418 RF_ASSERT(2 * d + 2 == np); 419 for (i = 0; i < d; i++) { 420 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p; 421 obuf = (char *) node->params[2 * i + 1].p; 422 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress); 423 /* compute the data unit offset within the column, then add 424 * one */ 425 coeff = (coeff % raidPtr->Layout.numDataCol); 426 /* the input buffers may not all be aligned with the start of 427 * the stripe. so shift by their sector offset within the 428 * stripe unit */ 429 j = old->startSector % secPerSU; 430 RF_ASSERT(j >= fail_start); 431 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start); 432 rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff); 433 } 434 435 RF_ETIMER_STOP(timer); 436 RF_ETIMER_EVAL(timer); 437 tracerec->q_us += RF_ETIMER_VAL_US(timer); 438 } 439 /* 440 Called by large write code to compute the new parity and the new q. 441 442 structure of the params: 443 444 pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d ( d = numDataCol 445 raidPtr 446 447 for a total of 2d+1 arguments. 448 The result buffers results[0], results[1] are the buffers for the p and q, 449 respectively. 450 451 We compute Q first, then compute P. The P calculation may try to reuse 452 one of the input buffers for its output, so if we computed P first, we would 453 corrupt the input for the q calculation. 454 */ 455 456 int 457 rf_RegularPQFunc(RF_DagNode_t *node) 458 { 459 RegularQSubr(node, node->results[1]); 460 return (rf_RegularXorFunc(node)); /* does the wakeup */ 461 } 462 463 int 464 rf_RegularQFunc(RF_DagNode_t *node) 465 { 466 /* Almost ... adjust Qsubr args */ 467 RegularQSubr(node, node->results[0]); 468 rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no 469 * I/O in this node */ 470 return (0); 471 } 472 /* 473 Called by singly degraded write code to compute the new parity and the new q. 474 475 structure of the params: 476 477 pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d 478 failedPDA raidPtr 479 480 for a total of 2d+2 arguments. 481 The result buffers results[0], results[1] are the buffers for the parity and q, 482 respectively. 483 484 We compute Q first, then compute parity. The parity calculation may try to reuse 485 one of the input buffers for its output, so if we computed parity first, we would 486 corrupt the input for the q calculation. 487 488 We treat this identically to the regularPQ case, ignoring the failedPDA extra argument. 489 */ 490 491 void 492 rf_Degraded_100_PQFunc(RF_DagNode_t *node) 493 { 494 int np = node->numParams; 495 496 RF_ASSERT(np >= 2); 497 DegrQSubr(node); 498 rf_RecoveryXorFunc(node); 499 } 500 501 502 /* 503 The two below are used when reading a stripe with a single lost data unit. 504 The parameters are 505 506 pda_0, buffer_0, .... pda_n, buffer_n, P pda, P buffer, failedPDA, raidPtr 507 508 and results[0] contains the data buffer. Which is originally zero-filled. 509 510 */ 511 512 /* this Q func is used by the degraded-mode dag functions to recover lost data. 513 * the second-to-last parameter is the PDA for the failed portion of the access. 514 * the code here looks at this PDA and assumes that the xor target buffer is 515 * equal in size to the number of sectors in the failed PDA. It then uses 516 * the other PDAs in the parameter list to determine where within the target 517 * buffer the corresponding data should be xored. 518 * 519 * Recall the basic equation is 520 * 521 * Q = ( data_1 + 2 * data_2 ... + k * data_k ) mod 256 522 * 523 * so to recover data_j we need 524 * 525 * J data_j = (Q - data_1 - 2 data_2 ....- k* data_k) mod 256 526 * 527 * So the coefficient for each buffer is (255 - data_col), and j should be initialized by 528 * copying Q into it. Then we need to do a table lookup to convert to solve 529 * data_j /= J 530 * 531 * 532 */ 533 int 534 rf_RecoveryQFunc(RF_DagNode_t *node) 535 { 536 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; 537 RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; 538 RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p; 539 int i; 540 RF_PhysDiskAddr_t *pda; 541 RF_RaidAddr_t suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector); 542 char *srcbuf, *destbuf; 543 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 544 RF_Etimer_t timer; 545 unsigned long coeff; 546 547 RF_ETIMER_START(timer); 548 /* start by copying Q into the buffer */ 549 memcpy(node->results[0], node->params[node->numParams - 3].p, 550 rf_RaidAddressToByte(raidPtr, failedPDA->numSector)); 551 for (i = 0; i < node->numParams - 4; i += 2) { 552 RF_ASSERT(node->params[i + 1].p != node->results[0]); 553 pda = (RF_PhysDiskAddr_t *) node->params[i].p; 554 srcbuf = (char *) node->params[i + 1].p; 555 suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); 556 destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset); 557 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), pda->raidAddress); 558 /* compute the data unit offset within the column */ 559 coeff = (coeff % raidPtr->Layout.numDataCol); 560 rf_IncQ((unsigned long *) destbuf, (unsigned long *) srcbuf, rf_RaidAddressToByte(raidPtr, pda->numSector), coeff); 561 } 562 /* Do the nasty inversion now */ 563 coeff = (rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), failedPDA->startSector) % raidPtr->Layout.numDataCol); 564 rf_InvertQ(node->results[0], node->results[0], rf_RaidAddressToByte(raidPtr, pda->numSector), coeff); 565 RF_ETIMER_STOP(timer); 566 RF_ETIMER_EVAL(timer); 567 tracerec->q_us += RF_ETIMER_VAL_US(timer); 568 rf_GenericWakeupFunc(node, 0); 569 return (0); 570 } 571 572 int 573 rf_RecoveryPQFunc(RF_DagNode_t *node) 574 { 575 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; 576 printf("raid%d: Recovery from PQ not implemented.\n",raidPtr->raidid); 577 return (1); 578 } 579 /* 580 Degraded write Q subroutine. 581 Used when P is dead. 582 Large-write style Q computation. 583 Parameters 584 585 (pda,buf),(pda,buf),.....,(failedPDA,bufPtr),failedPDA,raidPtr. 586 587 We ignore failedPDA. 588 589 This is a "simple style" recovery func. 590 */ 591 592 void 593 rf_PQ_DegradedWriteQFunc(RF_DagNode_t *node) 594 { 595 int np = node->numParams; 596 int d; 597 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p; 598 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit; 599 int i; 600 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 601 RF_Etimer_t timer; 602 char *qbuf = node->results[0]; 603 char *obuf, *qpbuf; 604 RF_PhysDiskAddr_t *old; 605 unsigned long coeff; 606 int fail_start, j; 607 608 old = (RF_PhysDiskAddr_t *) node->params[np - 2].p; 609 fail_start = old->startSector % secPerSU; 610 611 RF_ETIMER_START(timer); 612 613 d = (np - 2) / 2; 614 RF_ASSERT(2 * d + 2 == np); 615 616 for (i = 0; i < d; i++) { 617 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p; 618 obuf = (char *) node->params[2 * i + 1].p; 619 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress); 620 /* compute the data unit offset within the column, then add 621 * one */ 622 coeff = (coeff % raidPtr->Layout.numDataCol); 623 j = old->startSector % secPerSU; 624 RF_ASSERT(j >= fail_start); 625 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start); 626 rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff); 627 } 628 629 RF_ETIMER_STOP(timer); 630 RF_ETIMER_EVAL(timer); 631 tracerec->q_us += RF_ETIMER_VAL_US(timer); 632 rf_GenericWakeupFunc(node, 0); 633 } 634 635 636 637 638 /* Q computations */ 639 640 /* 641 coeff - colummn; 642 643 compute dest ^= qfor[28-coeff][rn[coeff+1] a] 644 645 on 5-bit basis; 646 length in bytes; 647 */ 648 649 void 650 rf_IncQ(unsigned long *dest, unsigned long *buf, unsigned length, unsigned coeff) 651 { 652 unsigned long a, d, new; 653 unsigned long a1, a2; 654 unsigned int *q = &(rf_qfor[28 - coeff][0]); 655 unsigned r = rf_rn[coeff + 1]; 656 657 #define EXTRACT(a,i) ((a >> (5L*i)) & 0x1f) 658 #define INSERT(a,i) (a << (5L*i)) 659 660 length /= 8; 661 /* 13 5 bit quants in a 64 bit word */ 662 while (length) { 663 a = *buf++; 664 d = *dest; 665 a1 = EXTRACT(a, 0) ^ r; 666 a2 = EXTRACT(a, 1) ^ r; 667 new = INSERT(a2, 1) | a1; 668 a1 = EXTRACT(a, 2) ^ r; 669 a2 = EXTRACT(a, 3) ^ r; 670 a1 = q[a1]; 671 a2 = q[a2]; 672 new = new | INSERT(a1, 2) | INSERT(a2, 3); 673 a1 = EXTRACT(a, 4) ^ r; 674 a2 = EXTRACT(a, 5) ^ r; 675 a1 = q[a1]; 676 a2 = q[a2]; 677 new = new | INSERT(a1, 4) | INSERT(a2, 5); 678 a1 = EXTRACT(a, 5) ^ r; 679 a2 = EXTRACT(a, 6) ^ r; 680 a1 = q[a1]; 681 a2 = q[a2]; 682 new = new | INSERT(a1, 5) | INSERT(a2, 6); 683 #if RF_LONGSHIFT > 2 684 a1 = EXTRACT(a, 7) ^ r; 685 a2 = EXTRACT(a, 8) ^ r; 686 a1 = q[a1]; 687 a2 = q[a2]; 688 new = new | INSERT(a1, 7) | INSERT(a2, 8); 689 a1 = EXTRACT(a, 9) ^ r; 690 a2 = EXTRACT(a, 10) ^ r; 691 a1 = q[a1]; 692 a2 = q[a2]; 693 new = new | INSERT(a1, 9) | INSERT(a2, 10); 694 a1 = EXTRACT(a, 11) ^ r; 695 a2 = EXTRACT(a, 12) ^ r; 696 a1 = q[a1]; 697 a2 = q[a2]; 698 new = new | INSERT(a1, 11) | INSERT(a2, 12); 699 #endif /* RF_LONGSHIFT > 2 */ 700 d ^= new; 701 *dest++ = d; 702 length--; 703 } 704 } 705 /* 706 compute 707 708 dest ^= rf_qfor[28-coeff][rf_rn[coeff+1] (old^new) ] 709 710 on a five bit basis. 711 optimization: compute old ^ new on 64 bit basis. 712 713 length in bytes. 714 */ 715 716 static void 717 QDelta( 718 char *dest, 719 char *obuf, 720 char *nbuf, 721 unsigned length, 722 unsigned char coeff) 723 { 724 unsigned long a, d, new; 725 unsigned long a1, a2; 726 unsigned int *q = &(rf_qfor[28 - coeff][0]); 727 unsigned int r = rf_rn[coeff + 1]; 728 729 r = a1 = a2 = new = d = a = 0; /* XXX for now... */ 730 q = NULL; /* XXX for now */ 731 732 #ifdef _KERNEL 733 /* PQ in kernel currently not supported because the encoding/decoding 734 * table is not present */ 735 memset(dest, 0, length); 736 #else /* KERNEL */ 737 /* this code probably doesn't work and should be rewritten -wvcii */ 738 /* 13 5 bit quants in a 64 bit word */ 739 length /= 8; 740 while (length) { 741 a = *obuf++; /* XXX need to reorg to avoid cache conflicts */ 742 a ^= *nbuf++; 743 d = *dest; 744 a1 = EXTRACT(a, 0) ^ r; 745 a2 = EXTRACT(a, 1) ^ r; 746 a1 = q[a1]; 747 a2 = q[a2]; 748 new = INSERT(a2, 1) | a1; 749 a1 = EXTRACT(a, 2) ^ r; 750 a2 = EXTRACT(a, 3) ^ r; 751 a1 = q[a1]; 752 a2 = q[a2]; 753 new = new | INSERT(a1, 2) | INSERT(a2, 3); 754 a1 = EXTRACT(a, 4) ^ r; 755 a2 = EXTRACT(a, 5) ^ r; 756 a1 = q[a1]; 757 a2 = q[a2]; 758 new = new | INSERT(a1, 4) | INSERT(a2, 5); 759 a1 = EXTRACT(a, 5) ^ r; 760 a2 = EXTRACT(a, 6) ^ r; 761 a1 = q[a1]; 762 a2 = q[a2]; 763 new = new | INSERT(a1, 5) | INSERT(a2, 6); 764 #if RF_LONGSHIFT > 2 765 a1 = EXTRACT(a, 7) ^ r; 766 a2 = EXTRACT(a, 8) ^ r; 767 a1 = q[a1]; 768 a2 = q[a2]; 769 new = new | INSERT(a1, 7) | INSERT(a2, 8); 770 a1 = EXTRACT(a, 9) ^ r; 771 a2 = EXTRACT(a, 10) ^ r; 772 a1 = q[a1]; 773 a2 = q[a2]; 774 new = new | INSERT(a1, 9) | INSERT(a2, 10); 775 a1 = EXTRACT(a, 11) ^ r; 776 a2 = EXTRACT(a, 12) ^ r; 777 a1 = q[a1]; 778 a2 = q[a2]; 779 new = new | INSERT(a1, 11) | INSERT(a2, 12); 780 #endif /* RF_LONGSHIFT > 2 */ 781 d ^= new; 782 *dest++ = d; 783 length--; 784 } 785 #endif /* _KERNEL */ 786 } 787 /* 788 recover columns a and b from the given p and q into 789 bufs abuf and bbuf. All bufs are word aligned. 790 Length is in bytes. 791 */ 792 793 794 /* 795 * XXX 796 * 797 * Everything about this seems wrong. 798 */ 799 void 800 rf_PQ_recover(unsigned long *pbuf, unsigned long *qbuf, unsigned long *abuf, unsigned long *bbuf, unsigned length, unsigned coeff_a, unsigned coeff_b) 801 { 802 unsigned long p, q, a, a0, a1; 803 int col = (29 * coeff_a) + coeff_b; 804 unsigned char *q0 = &(rf_qinv[col][0]); 805 806 length /= 8; 807 while (length) { 808 p = *pbuf++; 809 q = *qbuf++; 810 a0 = EXTRACT(p, 0); 811 a1 = EXTRACT(q, 0); 812 a = q0[a0 << 5 | a1]; 813 #define MF(i) \ 814 a0 = EXTRACT(p,i); \ 815 a1 = EXTRACT(q,i); \ 816 a = a | INSERT(q0[a0<<5 | a1],i) 817 818 MF(1); 819 MF(2); 820 MF(3); 821 MF(4); 822 MF(5); 823 MF(6); 824 #if 0 825 MF(7); 826 MF(8); 827 MF(9); 828 MF(10); 829 MF(11); 830 MF(12); 831 #endif /* 0 */ 832 *abuf++ = a; 833 *bbuf++ = a ^ p; 834 length--; 835 } 836 } 837 /* 838 Lost parity and a data column. Recover that data column. 839 Assume col coeff is lost. Let q the contents of Q after 840 all surviving data columns have been q-xored out of it. 841 Then we have the equation 842 843 q[28-coeff][a_i ^ r_i+1] = q 844 845 but q is cyclic with period 31. 846 So q[3+coeff][q[28-coeff][a_i ^ r_{i+1}]] = 847 q[31][a_i ^ r_{i+1}] = a_i ^ r_{i+1} . 848 849 so a_i = r_{coeff+1} ^ q[3+coeff][q] 850 851 The routine is passed q buffer and the buffer 852 the data is to be recoverd into. They can be the same. 853 */ 854 855 856 857 static void 858 rf_InvertQ( 859 unsigned long *qbuf, 860 unsigned long *abuf, 861 unsigned length, 862 unsigned coeff) 863 { 864 unsigned long a, new; 865 unsigned long a1, a2; 866 unsigned int *q = &(rf_qfor[3 + coeff][0]); 867 unsigned r = rf_rn[coeff + 1]; 868 869 /* 13 5 bit quants in a 64 bit word */ 870 length /= 8; 871 while (length) { 872 a = *qbuf++; 873 a1 = EXTRACT(a, 0); 874 a2 = EXTRACT(a, 1); 875 a1 = r ^ q[a1]; 876 a2 = r ^ q[a2]; 877 new = INSERT(a2, 1) | a1; 878 #define M(i,j) \ 879 a1 = EXTRACT(a,i); \ 880 a2 = EXTRACT(a,j); \ 881 a1 = r ^ q[a1]; \ 882 a2 = r ^ q[a2]; \ 883 new = new | INSERT(a1,i) | INSERT(a2,j) 884 885 M(2, 3); 886 M(4, 5); 887 M(5, 6); 888 #if RF_LONGSHIFT > 2 889 M(7, 8); 890 M(9, 10); 891 M(11, 12); 892 #endif /* RF_LONGSHIFT > 2 */ 893 *abuf++ = new; 894 length--; 895 } 896 } 897 #endif /* (RF_INCLUDE_DECL_PQ > 0) || 898 * (RF_INCLUDE_RAID6 > 0) */ 899