1 /* $NetBSD: rf_parityscan.c,v 1.36 2019/10/10 03:43:59 christos Exp $ */ 2 /* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: Mark Holland 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29 /***************************************************************************** 30 * 31 * rf_parityscan.c -- misc utilities related to parity verification 32 * 33 ****************************************************************************/ 34 35 #include <sys/cdefs.h> 36 __KERNEL_RCSID(0, "$NetBSD: rf_parityscan.c,v 1.36 2019/10/10 03:43:59 christos Exp $"); 37 38 #include <dev/raidframe/raidframevar.h> 39 40 #include "rf_raid.h" 41 #include "rf_dag.h" 42 #include "rf_dagfuncs.h" 43 #include "rf_dagutils.h" 44 #include "rf_mcpair.h" 45 #include "rf_general.h" 46 #include "rf_engine.h" 47 #include "rf_parityscan.h" 48 #include "rf_map.h" 49 #include "rf_paritymap.h" 50 51 /***************************************************************************** 52 * 53 * walk through the entire arry and write new parity. This works by 54 * creating two DAGs, one to read a stripe of data and one to write 55 * new parity. The first is executed, the data is xored together, and 56 * then the second is executed. To avoid constantly building and 57 * tearing down the DAGs, we create them a priori and fill them in 58 * with the mapping information as we go along. 59 * 60 * there should never be more than one thread running this. 61 * 62 ****************************************************************************/ 63 64 int 65 rf_RewriteParity(RF_Raid_t *raidPtr) 66 { 67 if (raidPtr->parity_map != NULL) 68 return rf_paritymap_rewrite(raidPtr->parity_map); 69 else 70 return rf_RewriteParityRange(raidPtr, 0, raidPtr->totalSectors); 71 } 72 73 int 74 rf_RewriteParityRange(RF_Raid_t *raidPtr, RF_SectorNum_t sec_begin, 75 RF_SectorNum_t sec_len) 76 { 77 /* 78 * Note: It is the caller's responsibility to ensure that 79 * sec_begin and sec_len are stripe-aligned. 80 */ 81 RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; 82 RF_AccessStripeMapHeader_t *asm_h; 83 int ret_val; 84 int rc; 85 RF_SectorNum_t i; 86 87 if (raidPtr->Layout.map->faultsTolerated == 0) { 88 /* There isn't any parity. Call it "okay." */ 89 return (RF_PARITY_OKAY); 90 } 91 if (raidPtr->status != rf_rs_optimal) { 92 /* 93 * We're in degraded mode. Don't try to verify parity now! 94 * XXX: this should be a "we don't want to", not a 95 * "we can't" error. 96 */ 97 return (RF_PARITY_COULD_NOT_VERIFY); 98 } 99 100 ret_val = 0; 101 102 rc = RF_PARITY_OKAY; 103 104 for (i = sec_begin; i < sec_begin + sec_len && 105 rc <= RF_PARITY_CORRECTED; 106 i += layoutPtr->dataSectorsPerStripe) { 107 if (raidPtr->waitShutdown) { 108 /* Someone is pulling the plug on this set... 109 abort the re-write */ 110 return (1); 111 } 112 asm_h = rf_MapAccess(raidPtr, i, 113 layoutPtr->dataSectorsPerStripe, 114 NULL, RF_DONT_REMAP); 115 raidPtr->parity_rewrite_stripes_done = 116 i / layoutPtr->dataSectorsPerStripe ; 117 rc = rf_VerifyParity(raidPtr, asm_h->stripeMap, 1, 0); 118 119 switch (rc) { 120 case RF_PARITY_OKAY: 121 case RF_PARITY_CORRECTED: 122 break; 123 case RF_PARITY_BAD: 124 printf("Parity bad during correction\n"); 125 ret_val = 1; 126 break; 127 case RF_PARITY_COULD_NOT_CORRECT: 128 printf("Could not correct bad parity\n"); 129 ret_val = 1; 130 break; 131 case RF_PARITY_COULD_NOT_VERIFY: 132 printf("Could not verify parity\n"); 133 ret_val = 1; 134 break; 135 default: 136 printf("Bad rc=%d from VerifyParity in RewriteParity\n", rc); 137 ret_val = 1; 138 } 139 rf_FreeAccessStripeMap(asm_h); 140 } 141 return (ret_val); 142 } 143 /***************************************************************************** 144 * 145 * verify that the parity in a particular stripe is correct. we 146 * validate only the range of parity defined by parityPDA, since this 147 * is all we have locked. The way we do this is to create an asm that 148 * maps the whole stripe and then range-restrict it to the parity 149 * region defined by the parityPDA. 150 * 151 ****************************************************************************/ 152 int 153 rf_VerifyParity(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *aasm, 154 int correct_it, RF_RaidAccessFlags_t flags) 155 { 156 RF_PhysDiskAddr_t *parityPDA; 157 RF_AccessStripeMap_t *doasm; 158 const RF_LayoutSW_t *lp; 159 int lrc, rc; 160 161 lp = raidPtr->Layout.map; 162 if (lp->faultsTolerated == 0) { 163 /* 164 * There isn't any parity. Call it "okay." 165 */ 166 return (RF_PARITY_OKAY); 167 } 168 rc = RF_PARITY_OKAY; 169 if (lp->VerifyParity) { 170 for (doasm = aasm; doasm; doasm = doasm->next) { 171 for (parityPDA = doasm->parityInfo; parityPDA; 172 parityPDA = parityPDA->next) { 173 lrc = lp->VerifyParity(raidPtr, 174 doasm->raidAddress, 175 parityPDA, 176 correct_it, flags); 177 if (lrc > rc) { 178 /* see rf_parityscan.h for why this 179 * works */ 180 rc = lrc; 181 } 182 } 183 } 184 } else { 185 rc = RF_PARITY_COULD_NOT_VERIFY; 186 } 187 return (rc); 188 } 189 190 int 191 rf_VerifyParityBasic(RF_Raid_t *raidPtr, RF_RaidAddr_t raidAddr, 192 RF_PhysDiskAddr_t *parityPDA, int correct_it, 193 RF_RaidAccessFlags_t flags) 194 { 195 RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); 196 RF_RaidAddr_t startAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, 197 raidAddr); 198 RF_SectorCount_t numsector = parityPDA->numSector; 199 int numbytes = rf_RaidAddressToByte(raidPtr, numsector); 200 int bytesPerStripe = numbytes * layoutPtr->numDataCol; 201 RF_DagHeader_t *rd_dag_h, *wr_dag_h; /* read, write dag */ 202 RF_DagNode_t *blockNode, *wrBlock; 203 RF_AccessStripeMapHeader_t *asm_h; 204 RF_AccessStripeMap_t *asmap; 205 RF_AllocListElem_t *alloclist; 206 RF_PhysDiskAddr_t *pda; 207 char *pbuf, *bf, *end_p, *p; 208 int i, retcode; 209 RF_ReconUnitNum_t which_ru; 210 RF_StripeNum_t psID = rf_RaidAddressToParityStripeID(layoutPtr, 211 raidAddr, 212 &which_ru); 213 int stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol; 214 #if RF_ACC_TRACE > 0 215 RF_AccTraceEntry_t tracerec; 216 #endif 217 RF_MCPair_t *mcpair; 218 219 retcode = RF_PARITY_OKAY; 220 221 mcpair = rf_AllocMCPair(); 222 rf_MakeAllocList(alloclist); 223 bf = RF_MallocAndAdd(numbytes 224 * (layoutPtr->numDataCol + layoutPtr->numParityCol), alloclist); 225 pbuf = RF_MallocAndAdd(numbytes, alloclist); 226 end_p = bf + bytesPerStripe; 227 228 rd_dag_h = rf_MakeSimpleDAG(raidPtr, stripeWidth, numbytes, bf, rf_DiskReadFunc, rf_DiskReadUndoFunc, 229 "Rod", alloclist, flags, RF_IO_NORMAL_PRIORITY); 230 blockNode = rd_dag_h->succedents[0]; 231 232 /* map the stripe and fill in the PDAs in the dag */ 233 asm_h = rf_MapAccess(raidPtr, startAddr, layoutPtr->dataSectorsPerStripe, bf, RF_DONT_REMAP); 234 asmap = asm_h->stripeMap; 235 236 for (pda = asmap->physInfo, i = 0; i < layoutPtr->numDataCol; i++, pda = pda->next) { 237 RF_ASSERT(pda); 238 rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1); 239 RF_ASSERT(pda->numSector != 0); 240 if (rf_TryToRedirectPDA(raidPtr, pda, 0)) 241 goto out; /* no way to verify parity if disk is 242 * dead. return w/ good status */ 243 blockNode->succedents[i]->params[0].p = pda; 244 blockNode->succedents[i]->params[2].v = psID; 245 blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru); 246 } 247 248 RF_ASSERT(!asmap->parityInfo->next); 249 rf_RangeRestrictPDA(raidPtr, parityPDA, asmap->parityInfo, 0, 1); 250 RF_ASSERT(asmap->parityInfo->numSector != 0); 251 if (rf_TryToRedirectPDA(raidPtr, asmap->parityInfo, 1)) 252 goto out; 253 blockNode->succedents[layoutPtr->numDataCol]->params[0].p = asmap->parityInfo; 254 255 /* fire off the DAG */ 256 #if RF_ACC_TRACE > 0 257 memset(&tracerec, 0, sizeof(tracerec)); 258 rd_dag_h->tracerec = &tracerec; 259 #endif 260 #if 0 261 if (rf_verifyParityDebug) { 262 printf("Parity verify read dag:\n"); 263 rf_PrintDAGList(rd_dag_h); 264 } 265 #endif 266 RF_LOCK_MCPAIR(mcpair); 267 mcpair->flag = 0; 268 RF_UNLOCK_MCPAIR(mcpair); 269 270 rf_DispatchDAG(rd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, 271 (void *) mcpair); 272 273 RF_LOCK_MCPAIR(mcpair); 274 while (!mcpair->flag) 275 RF_WAIT_MCPAIR(mcpair); 276 RF_UNLOCK_MCPAIR(mcpair); 277 if (rd_dag_h->status != rf_enable) { 278 RF_ERRORMSG("Unable to verify parity: can't read the stripe\n"); 279 retcode = RF_PARITY_COULD_NOT_VERIFY; 280 goto out; 281 } 282 for (p = bf; p < end_p; p += numbytes) { 283 rf_bxor(p, pbuf, numbytes); 284 } 285 for (i = 0; i < numbytes; i++) { 286 if (pbuf[i] != bf[bytesPerStripe + i]) { 287 if (!correct_it) 288 RF_ERRORMSG3("Parity verify error: byte %d of parity is 0x%x should be 0x%x\n", 289 i, (u_char) bf[bytesPerStripe + i], (u_char) pbuf[i]); 290 retcode = RF_PARITY_BAD; 291 break; 292 } 293 } 294 295 if (retcode && correct_it) { 296 wr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, numbytes, pbuf, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, 297 "Wnp", alloclist, flags, RF_IO_NORMAL_PRIORITY); 298 wrBlock = wr_dag_h->succedents[0]; 299 wrBlock->succedents[0]->params[0].p = asmap->parityInfo; 300 wrBlock->succedents[0]->params[2].v = psID; 301 wrBlock->succedents[0]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru); 302 #if RF_ACC_TRACE > 0 303 memset(&tracerec, 0, sizeof(tracerec)); 304 wr_dag_h->tracerec = &tracerec; 305 #endif 306 #if 0 307 if (rf_verifyParityDebug) { 308 printf("Parity verify write dag:\n"); 309 rf_PrintDAGList(wr_dag_h); 310 } 311 #endif 312 RF_LOCK_MCPAIR(mcpair); 313 mcpair->flag = 0; 314 RF_UNLOCK_MCPAIR(mcpair); 315 316 rf_DispatchDAG(wr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, 317 (void *) mcpair); 318 319 RF_LOCK_MCPAIR(mcpair); 320 while (!mcpair->flag) 321 RF_WAIT_MCPAIR(mcpair); 322 RF_UNLOCK_MCPAIR(mcpair); 323 if (wr_dag_h->status != rf_enable) { 324 RF_ERRORMSG("Unable to correct parity in VerifyParity: can't write the stripe\n"); 325 retcode = RF_PARITY_COULD_NOT_CORRECT; 326 } 327 rf_FreeDAG(wr_dag_h); 328 if (retcode == RF_PARITY_BAD) 329 retcode = RF_PARITY_CORRECTED; 330 } 331 out: 332 rf_FreeAccessStripeMap(asm_h); 333 rf_FreeAllocList(alloclist); 334 rf_FreeDAG(rd_dag_h); 335 rf_FreeMCPair(mcpair); 336 return (retcode); 337 } 338 339 int 340 rf_TryToRedirectPDA(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *pda, 341 int parity) 342 { 343 if (raidPtr->Disks[pda->col].status == rf_ds_reconstructing) { 344 if (rf_CheckRUReconstructed(raidPtr->reconControl->reconMap, pda->startSector)) { 345 #if RF_INCLUDE_PARITY_DECLUSTERING_DS > 0 346 if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { 347 #if RF_DEBUG_VERIFYPARITY 348 RF_RowCol_t oc = pda->col; 349 RF_SectorNum_t os = pda->startSector; 350 #endif 351 if (parity) { 352 (raidPtr->Layout.map->MapParity) (raidPtr, pda->raidAddress, &pda->col, &pda->startSector, RF_REMAP); 353 #if RF_DEBUG_VERIFYPARITY 354 if (rf_verifyParityDebug) 355 printf("VerifyParity: Redir P c %d sect %ld -> c %d sect %ld\n", 356 oc, (long) os, pda->col, (long) pda->startSector); 357 #endif 358 } else { 359 (raidPtr->Layout.map->MapSector) (raidPtr, pda->raidAddress, &pda->col, &pda->startSector, RF_REMAP); 360 #if RF_DEBUG_VERIFYPARITY 361 if (rf_verifyParityDebug) 362 printf("VerifyParity: Redir D c %d sect %ld -> c %d sect %ld\n", 363 oc, (long) os, pda->col, (long) pda->startSector); 364 #endif 365 } 366 } else { 367 #endif 368 RF_RowCol_t spCol = raidPtr->Disks[pda->col].spareCol; 369 pda->col = spCol; 370 #if RF_INCLUDE_PARITY_DECLUSTERING_DS > 0 371 } 372 #endif 373 } 374 } 375 if (RF_DEAD_DISK(raidPtr->Disks[pda->col].status)) 376 return (1); 377 return (0); 378 } 379 /***************************************************************************** 380 * 381 * currently a stub. 382 * 383 * takes as input an ASM describing a write operation and containing 384 * one failure, and verifies that the parity was correctly updated to 385 * reflect the write. 386 * 387 * if it's a data unit that's failed, we read the other data units in 388 * the stripe and the parity unit, XOR them together, and verify that 389 * we get the data intended for the failed disk. Since it's easy, we 390 * also validate that the right data got written to the surviving data 391 * disks. 392 * 393 * If it's the parity that failed, there's really no validation we can 394 * do except the above verification that the right data got written to 395 * all disks. This is because the new data intended for the failed 396 * disk is supplied in the ASM, but this is of course not the case for 397 * the new parity. 398 * 399 ****************************************************************************/ 400 #if 0 401 int 402 rf_VerifyDegrModeWrite(RF_Raid_t *raidPtr, RF_AccessStripeMapHeader_t *asmh) 403 { 404 return (0); 405 } 406 #endif 407 /* creates a simple DAG with a header, a block-recon node at level 1, 408 * nNodes nodes at level 2, an unblock-recon node at level 3, and a 409 * terminator node at level 4. The stripe address field in the block 410 * and unblock nodes are not touched, nor are the pda fields in the 411 * second-level nodes, so they must be filled in later. 412 * 413 * commit point is established at unblock node - this means that any 414 * failure during dag execution causes the dag to fail 415 * 416 * name - node names at the second level 417 */ 418 RF_DagHeader_t * 419 rf_MakeSimpleDAG(RF_Raid_t *raidPtr, int nNodes, int bytesPerSU, char *databuf, 420 void (*doFunc) (RF_DagNode_t * node), 421 void (*undoFunc) (RF_DagNode_t * node), 422 const char *name, RF_AllocListElem_t *alloclist, 423 RF_RaidAccessFlags_t flags, int priority) 424 { 425 RF_DagHeader_t *dag_h; 426 RF_DagNode_t *nodes, *termNode, *blockNode, *unblockNode, *tmpNode; 427 int i; 428 429 /* grab a DAG header... */ 430 431 dag_h = rf_AllocDAGHeader(); 432 dag_h->raidPtr = (void *) raidPtr; 433 dag_h->allocList = NULL;/* we won't use this alloc list */ 434 dag_h->status = rf_enable; 435 dag_h->numSuccedents = 1; 436 dag_h->creator = "SimpleDAG"; 437 438 /* this dag can not commit until the unblock node is reached errors 439 * prior to the commit point imply the dag has failed */ 440 dag_h->numCommitNodes = 1; 441 dag_h->numCommits = 0; 442 443 /* create the nodes, the block & unblock nodes, and the terminator 444 * node */ 445 446 for (i = 0; i < nNodes; i++) { 447 tmpNode = rf_AllocDAGNode(); 448 tmpNode->list_next = dag_h->nodes; 449 dag_h->nodes = tmpNode; 450 } 451 nodes = dag_h->nodes; 452 453 blockNode = rf_AllocDAGNode(); 454 blockNode->list_next = dag_h->nodes; 455 dag_h->nodes = blockNode; 456 457 unblockNode = rf_AllocDAGNode(); 458 unblockNode->list_next = dag_h->nodes; 459 dag_h->nodes = unblockNode; 460 461 termNode = rf_AllocDAGNode(); 462 termNode->list_next = dag_h->nodes; 463 dag_h->nodes = termNode; 464 465 dag_h->succedents[0] = blockNode; 466 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0, dag_h, "Nil", alloclist); 467 rf_InitNode(unblockNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nNodes, 0, 0, dag_h, "Nil", alloclist); 468 unblockNode->succedents[0] = termNode; 469 tmpNode = nodes; 470 for (i = 0; i < nNodes; i++) { 471 blockNode->succedents[i] = unblockNode->antecedents[i] = tmpNode; 472 unblockNode->antType[i] = rf_control; 473 rf_InitNode(tmpNode, rf_wait, RF_FALSE, doFunc, undoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, name, alloclist); 474 tmpNode->succedents[0] = unblockNode; 475 tmpNode->antecedents[0] = blockNode; 476 tmpNode->antType[0] = rf_control; 477 tmpNode->params[1].p = (databuf + (i * bytesPerSU)); 478 tmpNode = tmpNode->list_next; 479 } 480 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", alloclist); 481 termNode->antecedents[0] = unblockNode; 482 termNode->antType[0] = rf_control; 483 return (dag_h); 484 } 485