1 /* $NetBSD: rf_parityscan.c,v 1.32 2006/11/16 01:33:23 christos Exp $ */ 2 /* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: Mark Holland 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29 /***************************************************************************** 30 * 31 * rf_parityscan.c -- misc utilities related to parity verification 32 * 33 ****************************************************************************/ 34 35 #include <sys/cdefs.h> 36 __KERNEL_RCSID(0, "$NetBSD: rf_parityscan.c,v 1.32 2006/11/16 01:33:23 christos Exp $"); 37 38 #include <dev/raidframe/raidframevar.h> 39 40 #include "rf_raid.h" 41 #include "rf_dag.h" 42 #include "rf_dagfuncs.h" 43 #include "rf_dagutils.h" 44 #include "rf_mcpair.h" 45 #include "rf_general.h" 46 #include "rf_engine.h" 47 #include "rf_parityscan.h" 48 #include "rf_map.h" 49 50 /***************************************************************************** 51 * 52 * walk through the entire arry and write new parity. This works by 53 * creating two DAGs, one to read a stripe of data and one to write 54 * new parity. The first is executed, the data is xored together, and 55 * then the second is executed. To avoid constantly building and 56 * tearing down the DAGs, we create them a priori and fill them in 57 * with the mapping information as we go along. 58 * 59 * there should never be more than one thread running this. 60 * 61 ****************************************************************************/ 62 63 int 64 rf_RewriteParity(RF_Raid_t *raidPtr) 65 { 66 RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; 67 RF_AccessStripeMapHeader_t *asm_h; 68 int ret_val; 69 int rc; 70 RF_SectorNum_t i; 71 72 if (raidPtr->Layout.map->faultsTolerated == 0) { 73 /* There isn't any parity. Call it "okay." */ 74 return (RF_PARITY_OKAY); 75 } 76 if (raidPtr->status != rf_rs_optimal) { 77 /* 78 * We're in degraded mode. Don't try to verify parity now! 79 * XXX: this should be a "we don't want to", not a 80 * "we can't" error. 81 */ 82 return (RF_PARITY_COULD_NOT_VERIFY); 83 } 84 85 ret_val = 0; 86 87 rc = RF_PARITY_OKAY; 88 89 for (i = 0; i < raidPtr->totalSectors && 90 rc <= RF_PARITY_CORRECTED; 91 i += layoutPtr->dataSectorsPerStripe) { 92 if (raidPtr->waitShutdown) { 93 /* Someone is pulling the plug on this set... 94 abort the re-write */ 95 return (1); 96 } 97 asm_h = rf_MapAccess(raidPtr, i, 98 layoutPtr->dataSectorsPerStripe, 99 NULL, RF_DONT_REMAP); 100 raidPtr->parity_rewrite_stripes_done = 101 i / layoutPtr->dataSectorsPerStripe ; 102 rc = rf_VerifyParity(raidPtr, asm_h->stripeMap, 1, 0); 103 104 switch (rc) { 105 case RF_PARITY_OKAY: 106 case RF_PARITY_CORRECTED: 107 break; 108 case RF_PARITY_BAD: 109 printf("Parity bad during correction\n"); 110 ret_val = 1; 111 break; 112 case RF_PARITY_COULD_NOT_CORRECT: 113 printf("Could not correct bad parity\n"); 114 ret_val = 1; 115 break; 116 case RF_PARITY_COULD_NOT_VERIFY: 117 printf("Could not verify parity\n"); 118 ret_val = 1; 119 break; 120 default: 121 printf("Bad rc=%d from VerifyParity in RewriteParity\n", rc); 122 ret_val = 1; 123 } 124 rf_FreeAccessStripeMap(asm_h); 125 } 126 return (ret_val); 127 } 128 /***************************************************************************** 129 * 130 * verify that the parity in a particular stripe is correct. we 131 * validate only the range of parity defined by parityPDA, since this 132 * is all we have locked. The way we do this is to create an asm that 133 * maps the whole stripe and then range-restrict it to the parity 134 * region defined by the parityPDA. 135 * 136 ****************************************************************************/ 137 int 138 rf_VerifyParity(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *aasm, 139 int correct_it, RF_RaidAccessFlags_t flags) 140 { 141 RF_PhysDiskAddr_t *parityPDA; 142 RF_AccessStripeMap_t *doasm; 143 const RF_LayoutSW_t *lp; 144 int lrc, rc; 145 146 lp = raidPtr->Layout.map; 147 if (lp->faultsTolerated == 0) { 148 /* 149 * There isn't any parity. Call it "okay." 150 */ 151 return (RF_PARITY_OKAY); 152 } 153 rc = RF_PARITY_OKAY; 154 if (lp->VerifyParity) { 155 for (doasm = aasm; doasm; doasm = doasm->next) { 156 for (parityPDA = doasm->parityInfo; parityPDA; 157 parityPDA = parityPDA->next) { 158 lrc = lp->VerifyParity(raidPtr, 159 doasm->raidAddress, 160 parityPDA, 161 correct_it, flags); 162 if (lrc > rc) { 163 /* see rf_parityscan.h for why this 164 * works */ 165 rc = lrc; 166 } 167 } 168 } 169 } else { 170 rc = RF_PARITY_COULD_NOT_VERIFY; 171 } 172 return (rc); 173 } 174 175 int 176 rf_VerifyParityBasic(RF_Raid_t *raidPtr, RF_RaidAddr_t raidAddr, 177 RF_PhysDiskAddr_t *parityPDA, int correct_it, 178 RF_RaidAccessFlags_t flags) 179 { 180 RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); 181 RF_RaidAddr_t startAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, 182 raidAddr); 183 RF_SectorCount_t numsector = parityPDA->numSector; 184 int numbytes = rf_RaidAddressToByte(raidPtr, numsector); 185 int bytesPerStripe = numbytes * layoutPtr->numDataCol; 186 RF_DagHeader_t *rd_dag_h, *wr_dag_h; /* read, write dag */ 187 RF_DagNode_t *blockNode, *wrBlock; 188 RF_AccessStripeMapHeader_t *asm_h; 189 RF_AccessStripeMap_t *asmap; 190 RF_AllocListElem_t *alloclist; 191 RF_PhysDiskAddr_t *pda; 192 char *pbuf, *bf, *end_p, *p; 193 int i, retcode; 194 RF_ReconUnitNum_t which_ru; 195 RF_StripeNum_t psID = rf_RaidAddressToParityStripeID(layoutPtr, 196 raidAddr, 197 &which_ru); 198 int stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol; 199 #if RF_ACC_TRACE > 0 200 RF_AccTraceEntry_t tracerec; 201 #endif 202 RF_MCPair_t *mcpair; 203 204 retcode = RF_PARITY_OKAY; 205 206 mcpair = rf_AllocMCPair(); 207 rf_MakeAllocList(alloclist); 208 RF_MallocAndAdd(bf, numbytes * (layoutPtr->numDataCol + layoutPtr->numParityCol), (char *), alloclist); 209 RF_MallocAndAdd(pbuf, numbytes, (char *), alloclist); 210 end_p = bf + bytesPerStripe; 211 212 rd_dag_h = rf_MakeSimpleDAG(raidPtr, stripeWidth, numbytes, bf, rf_DiskReadFunc, rf_DiskReadUndoFunc, 213 "Rod", alloclist, flags, RF_IO_NORMAL_PRIORITY); 214 blockNode = rd_dag_h->succedents[0]; 215 216 /* map the stripe and fill in the PDAs in the dag */ 217 asm_h = rf_MapAccess(raidPtr, startAddr, layoutPtr->dataSectorsPerStripe, bf, RF_DONT_REMAP); 218 asmap = asm_h->stripeMap; 219 220 for (pda = asmap->physInfo, i = 0; i < layoutPtr->numDataCol; i++, pda = pda->next) { 221 RF_ASSERT(pda); 222 rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1); 223 RF_ASSERT(pda->numSector != 0); 224 if (rf_TryToRedirectPDA(raidPtr, pda, 0)) 225 goto out; /* no way to verify parity if disk is 226 * dead. return w/ good status */ 227 blockNode->succedents[i]->params[0].p = pda; 228 blockNode->succedents[i]->params[2].v = psID; 229 blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru); 230 } 231 232 RF_ASSERT(!asmap->parityInfo->next); 233 rf_RangeRestrictPDA(raidPtr, parityPDA, asmap->parityInfo, 0, 1); 234 RF_ASSERT(asmap->parityInfo->numSector != 0); 235 if (rf_TryToRedirectPDA(raidPtr, asmap->parityInfo, 1)) 236 goto out; 237 blockNode->succedents[layoutPtr->numDataCol]->params[0].p = asmap->parityInfo; 238 239 /* fire off the DAG */ 240 #if RF_ACC_TRACE > 0 241 memset((char *) &tracerec, 0, sizeof(tracerec)); 242 rd_dag_h->tracerec = &tracerec; 243 #endif 244 #if 0 245 if (rf_verifyParityDebug) { 246 printf("Parity verify read dag:\n"); 247 rf_PrintDAGList(rd_dag_h); 248 } 249 #endif 250 RF_LOCK_MUTEX(mcpair->mutex); 251 mcpair->flag = 0; 252 RF_UNLOCK_MUTEX(mcpair->mutex); 253 254 rf_DispatchDAG(rd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, 255 (void *) mcpair); 256 257 RF_LOCK_MUTEX(mcpair->mutex); 258 while (!mcpair->flag) 259 RF_WAIT_COND(mcpair->cond, mcpair->mutex); 260 RF_UNLOCK_MUTEX(mcpair->mutex); 261 if (rd_dag_h->status != rf_enable) { 262 RF_ERRORMSG("Unable to verify parity: can't read the stripe\n"); 263 retcode = RF_PARITY_COULD_NOT_VERIFY; 264 goto out; 265 } 266 for (p = bf; p < end_p; p += numbytes) { 267 rf_bxor(p, pbuf, numbytes); 268 } 269 for (i = 0; i < numbytes; i++) { 270 if (pbuf[i] != bf[bytesPerStripe + i]) { 271 if (!correct_it) 272 RF_ERRORMSG3("Parity verify error: byte %d of parity is 0x%x should be 0x%x\n", 273 i, (u_char) bf[bytesPerStripe + i], (u_char) pbuf[i]); 274 retcode = RF_PARITY_BAD; 275 break; 276 } 277 } 278 279 if (retcode && correct_it) { 280 wr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, numbytes, pbuf, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, 281 "Wnp", alloclist, flags, RF_IO_NORMAL_PRIORITY); 282 wrBlock = wr_dag_h->succedents[0]; 283 wrBlock->succedents[0]->params[0].p = asmap->parityInfo; 284 wrBlock->succedents[0]->params[2].v = psID; 285 wrBlock->succedents[0]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru); 286 #if RF_ACC_TRACE > 0 287 memset((char *) &tracerec, 0, sizeof(tracerec)); 288 wr_dag_h->tracerec = &tracerec; 289 #endif 290 #if 0 291 if (rf_verifyParityDebug) { 292 printf("Parity verify write dag:\n"); 293 rf_PrintDAGList(wr_dag_h); 294 } 295 #endif 296 RF_LOCK_MUTEX(mcpair->mutex); 297 mcpair->flag = 0; 298 RF_UNLOCK_MUTEX(mcpair->mutex); 299 300 rf_DispatchDAG(wr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, 301 (void *) mcpair); 302 303 RF_LOCK_MUTEX(mcpair->mutex); 304 while (!mcpair->flag) 305 RF_WAIT_COND(mcpair->cond, mcpair->mutex); 306 RF_UNLOCK_MUTEX(mcpair->mutex); 307 if (wr_dag_h->status != rf_enable) { 308 RF_ERRORMSG("Unable to correct parity in VerifyParity: can't write the stripe\n"); 309 retcode = RF_PARITY_COULD_NOT_CORRECT; 310 } 311 rf_FreeDAG(wr_dag_h); 312 if (retcode == RF_PARITY_BAD) 313 retcode = RF_PARITY_CORRECTED; 314 } 315 out: 316 rf_FreeAccessStripeMap(asm_h); 317 rf_FreeAllocList(alloclist); 318 rf_FreeDAG(rd_dag_h); 319 rf_FreeMCPair(mcpair); 320 return (retcode); 321 } 322 323 int 324 rf_TryToRedirectPDA(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *pda, 325 int parity) 326 { 327 if (raidPtr->Disks[pda->col].status == rf_ds_reconstructing) { 328 if (rf_CheckRUReconstructed(raidPtr->reconControl->reconMap, pda->startSector)) { 329 #if RF_INCLUDE_PARITY_DECLUSTERING_DS > 0 330 if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { 331 #if RF_DEBUG_VERIFYPARITY 332 RF_RowCol_t oc = pda->col; 333 RF_SectorNum_t os = pda->startSector; 334 #endif 335 if (parity) { 336 (raidPtr->Layout.map->MapParity) (raidPtr, pda->raidAddress, &pda->col, &pda->startSector, RF_REMAP); 337 #if RF_DEBUG_VERIFYPARITY 338 if (rf_verifyParityDebug) 339 printf("VerifyParity: Redir P c %d sect %ld -> c %d sect %ld\n", 340 oc, (long) os, pda->col, (long) pda->startSector); 341 #endif 342 } else { 343 (raidPtr->Layout.map->MapSector) (raidPtr, pda->raidAddress, &pda->col, &pda->startSector, RF_REMAP); 344 #if RF_DEBUG_VERIFYPARITY 345 if (rf_verifyParityDebug) 346 printf("VerifyParity: Redir D c %d sect %ld -> c %d sect %ld\n", 347 oc, (long) os, pda->col, (long) pda->startSector); 348 #endif 349 } 350 } else { 351 #endif 352 RF_RowCol_t spCol = raidPtr->Disks[pda->col].spareCol; 353 pda->col = spCol; 354 #if RF_INCLUDE_PARITY_DECLUSTERING_DS > 0 355 } 356 #endif 357 } 358 } 359 if (RF_DEAD_DISK(raidPtr->Disks[pda->col].status)) 360 return (1); 361 return (0); 362 } 363 /***************************************************************************** 364 * 365 * currently a stub. 366 * 367 * takes as input an ASM describing a write operation and containing 368 * one failure, and verifies that the parity was correctly updated to 369 * reflect the write. 370 * 371 * if it's a data unit that's failed, we read the other data units in 372 * the stripe and the parity unit, XOR them together, and verify that 373 * we get the data intended for the failed disk. Since it's easy, we 374 * also validate that the right data got written to the surviving data 375 * disks. 376 * 377 * If it's the parity that failed, there's really no validation we can 378 * do except the above verification that the right data got written to 379 * all disks. This is because the new data intended for the failed 380 * disk is supplied in the ASM, but this is of course not the case for 381 * the new parity. 382 * 383 ****************************************************************************/ 384 #if 0 385 int 386 rf_VerifyDegrModeWrite(RF_Raid_t *raidPtr, RF_AccessStripeMapHeader_t *asmh) 387 { 388 return (0); 389 } 390 #endif 391 /* creates a simple DAG with a header, a block-recon node at level 1, 392 * nNodes nodes at level 2, an unblock-recon node at level 3, and a 393 * terminator node at level 4. The stripe address field in the block 394 * and unblock nodes are not touched, nor are the pda fields in the 395 * second-level nodes, so they must be filled in later. 396 * 397 * commit point is established at unblock node - this means that any 398 * failure during dag execution causes the dag to fail 399 * 400 * name - node names at the second level 401 */ 402 RF_DagHeader_t * 403 rf_MakeSimpleDAG(RF_Raid_t *raidPtr, int nNodes, int bytesPerSU, char *databuf, 404 int (*doFunc) (RF_DagNode_t * node), 405 int (*undoFunc) (RF_DagNode_t * node), 406 const char *name, RF_AllocListElem_t *alloclist, 407 RF_RaidAccessFlags_t flags, int priority) 408 { 409 RF_DagHeader_t *dag_h; 410 RF_DagNode_t *nodes, *termNode, *blockNode, *unblockNode, *tmpNode; 411 int i; 412 413 /* grab a DAG header... */ 414 415 dag_h = rf_AllocDAGHeader(); 416 dag_h->raidPtr = (void *) raidPtr; 417 dag_h->allocList = NULL;/* we won't use this alloc list */ 418 dag_h->status = rf_enable; 419 dag_h->numSuccedents = 1; 420 dag_h->creator = "SimpleDAG"; 421 422 /* this dag can not commit until the unblock node is reached errors 423 * prior to the commit point imply the dag has failed */ 424 dag_h->numCommitNodes = 1; 425 dag_h->numCommits = 0; 426 427 /* create the nodes, the block & unblock nodes, and the terminator 428 * node */ 429 430 for (i = 0; i < nNodes; i++) { 431 tmpNode = rf_AllocDAGNode(); 432 tmpNode->list_next = dag_h->nodes; 433 dag_h->nodes = tmpNode; 434 } 435 nodes = dag_h->nodes; 436 437 blockNode = rf_AllocDAGNode(); 438 blockNode->list_next = dag_h->nodes; 439 dag_h->nodes = blockNode; 440 441 unblockNode = rf_AllocDAGNode(); 442 unblockNode->list_next = dag_h->nodes; 443 dag_h->nodes = unblockNode; 444 445 termNode = rf_AllocDAGNode(); 446 termNode->list_next = dag_h->nodes; 447 dag_h->nodes = termNode; 448 449 dag_h->succedents[0] = blockNode; 450 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0, dag_h, "Nil", alloclist); 451 rf_InitNode(unblockNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nNodes, 0, 0, dag_h, "Nil", alloclist); 452 unblockNode->succedents[0] = termNode; 453 tmpNode = nodes; 454 for (i = 0; i < nNodes; i++) { 455 blockNode->succedents[i] = unblockNode->antecedents[i] = tmpNode; 456 unblockNode->antType[i] = rf_control; 457 rf_InitNode(tmpNode, rf_wait, RF_FALSE, doFunc, undoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, name, alloclist); 458 tmpNode->succedents[0] = unblockNode; 459 tmpNode->antecedents[0] = blockNode; 460 tmpNode->antType[0] = rf_control; 461 tmpNode->params[1].p = (databuf + (i * bytesPerSU)); 462 tmpNode = tmpNode->list_next; 463 } 464 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", alloclist); 465 termNode->antecedents[0] = unblockNode; 466 termNode->antType[0] = rf_control; 467 return (dag_h); 468 } 469