1 /* $NetBSD: rf_parityscan.c,v 1.8 2000/01/05 02:57:28 oster Exp $ */ 2 /* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: Mark Holland 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29 /***************************************************************************** 30 * 31 * rf_parityscan.c -- misc utilities related to parity verification 32 * 33 *****************************************************************************/ 34 35 #include "rf_types.h" 36 #include "rf_raid.h" 37 #include "rf_dag.h" 38 #include "rf_dagfuncs.h" 39 #include "rf_dagutils.h" 40 #include "rf_mcpair.h" 41 #include "rf_general.h" 42 #include "rf_engine.h" 43 #include "rf_parityscan.h" 44 #include "rf_map.h" 45 46 /***************************************************************************************** 47 * 48 * walk through the entire arry and write new parity. 49 * This works by creating two DAGs, one to read a stripe of data and one to 50 * write new parity. The first is executed, the data is xored together, and 51 * then the second is executed. To avoid constantly building and tearing down 52 * the DAGs, we create them a priori and fill them in with the mapping 53 * information as we go along. 54 * 55 * there should never be more than one thread running this. 56 * 57 ****************************************************************************************/ 58 59 int 60 rf_RewriteParity(raidPtr) 61 RF_Raid_t *raidPtr; 62 { 63 RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; 64 RF_AccessStripeMapHeader_t *asm_h; 65 int ret_val; 66 int rc; 67 RF_PhysDiskAddr_t pda; 68 RF_SectorNum_t i; 69 70 if (raidPtr->Layout.map->faultsTolerated == 0) { 71 /* There isn't any parity. Call it "okay." */ 72 return (RF_PARITY_OKAY); 73 } 74 if (raidPtr->status[0] != rf_rs_optimal) { 75 /* 76 * We're in degraded mode. Don't try to verify parity now! 77 * XXX: this should be a "we don't want to", not a 78 * "we can't" error. 79 */ 80 return (RF_PARITY_COULD_NOT_VERIFY); 81 } 82 83 ret_val = 0; 84 85 pda.startSector = 0; 86 pda.numSector = raidPtr->Layout.sectorsPerStripeUnit; 87 rc = RF_PARITY_OKAY; 88 89 for (i = 0; i < raidPtr->totalSectors && 90 rc <= RF_PARITY_CORRECTED; 91 i += layoutPtr->dataSectorsPerStripe) { 92 asm_h = rf_MapAccess(raidPtr, i, 93 layoutPtr->dataSectorsPerStripe, 94 NULL, RF_DONT_REMAP); 95 raidPtr->parity_rewrite_stripes_done = 96 i / layoutPtr->dataSectorsPerStripe ; 97 rc = rf_VerifyParity(raidPtr, asm_h->stripeMap, 1, 0); 98 99 switch (rc) { 100 case RF_PARITY_OKAY: 101 case RF_PARITY_CORRECTED: 102 break; 103 case RF_PARITY_BAD: 104 printf("Parity bad during correction\n"); 105 ret_val = 1; 106 break; 107 case RF_PARITY_COULD_NOT_CORRECT: 108 printf("Could not correct bad parity\n"); 109 ret_val = 1; 110 break; 111 case RF_PARITY_COULD_NOT_VERIFY: 112 printf("Could not verify parity\n"); 113 ret_val = 1; 114 break; 115 default: 116 printf("Bad rc=%d from VerifyParity in RewriteParity\n", rc); 117 ret_val = 1; 118 } 119 rf_FreeAccessStripeMap(asm_h); 120 } 121 return (ret_val); 122 } 123 /***************************************************************************************** 124 * 125 * verify that the parity in a particular stripe is correct. 126 * we validate only the range of parity defined by parityPDA, since 127 * this is all we have locked. The way we do this is to create an asm 128 * that maps the whole stripe and then range-restrict it to the parity 129 * region defined by the parityPDA. 130 * 131 ****************************************************************************************/ 132 int 133 rf_VerifyParity(raidPtr, aasm, correct_it, flags) 134 RF_Raid_t *raidPtr; 135 RF_AccessStripeMap_t *aasm; 136 int correct_it; 137 RF_RaidAccessFlags_t flags; 138 { 139 RF_PhysDiskAddr_t *parityPDA; 140 RF_AccessStripeMap_t *doasm; 141 RF_LayoutSW_t *lp; 142 int lrc, rc; 143 144 lp = raidPtr->Layout.map; 145 if (lp->faultsTolerated == 0) { 146 /* 147 * There isn't any parity. Call it "okay." 148 */ 149 return (RF_PARITY_OKAY); 150 } 151 rc = RF_PARITY_OKAY; 152 if (lp->VerifyParity) { 153 for (doasm = aasm; doasm; doasm = doasm->next) { 154 for (parityPDA = doasm->parityInfo; parityPDA; 155 parityPDA = parityPDA->next) { 156 lrc = lp->VerifyParity(raidPtr, 157 doasm->raidAddress, 158 parityPDA, 159 correct_it, flags); 160 if (lrc > rc) { 161 /* see rf_parityscan.h for why this 162 * works */ 163 rc = lrc; 164 } 165 } 166 } 167 } else { 168 rc = RF_PARITY_COULD_NOT_VERIFY; 169 } 170 return (rc); 171 } 172 173 int 174 rf_VerifyParityBasic(raidPtr, raidAddr, parityPDA, correct_it, flags) 175 RF_Raid_t *raidPtr; 176 RF_RaidAddr_t raidAddr; 177 RF_PhysDiskAddr_t *parityPDA; 178 int correct_it; 179 RF_RaidAccessFlags_t flags; 180 { 181 RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); 182 RF_RaidAddr_t startAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, 183 raidAddr); 184 RF_SectorCount_t numsector = parityPDA->numSector; 185 int numbytes = rf_RaidAddressToByte(raidPtr, numsector); 186 int bytesPerStripe = numbytes * layoutPtr->numDataCol; 187 RF_DagHeader_t *rd_dag_h, *wr_dag_h; /* read, write dag */ 188 RF_DagNode_t *blockNode, *unblockNode, *wrBlock, *wrUnblock; 189 RF_AccessStripeMapHeader_t *asm_h; 190 RF_AccessStripeMap_t *asmap; 191 RF_AllocListElem_t *alloclist; 192 RF_PhysDiskAddr_t *pda; 193 char *pbuf, *buf, *end_p, *p; 194 int i, retcode; 195 RF_ReconUnitNum_t which_ru; 196 RF_StripeNum_t psID = rf_RaidAddressToParityStripeID(layoutPtr, 197 raidAddr, 198 &which_ru); 199 int stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol; 200 RF_AccTraceEntry_t tracerec; 201 RF_MCPair_t *mcpair; 202 203 retcode = RF_PARITY_OKAY; 204 205 mcpair = rf_AllocMCPair(); 206 rf_MakeAllocList(alloclist); 207 RF_MallocAndAdd(buf, numbytes * (layoutPtr->numDataCol + layoutPtr->numParityCol), (char *), alloclist); 208 RF_CallocAndAdd(pbuf, 1, numbytes, (char *), alloclist); /* use calloc to make 209 * sure buffer is zeroed */ 210 end_p = buf + bytesPerStripe; 211 212 rd_dag_h = rf_MakeSimpleDAG(raidPtr, stripeWidth, numbytes, buf, rf_DiskReadFunc, rf_DiskReadUndoFunc, 213 "Rod", alloclist, flags, RF_IO_NORMAL_PRIORITY); 214 blockNode = rd_dag_h->succedents[0]; 215 unblockNode = blockNode->succedents[0]->succedents[0]; 216 217 /* map the stripe and fill in the PDAs in the dag */ 218 asm_h = rf_MapAccess(raidPtr, startAddr, layoutPtr->dataSectorsPerStripe, buf, RF_DONT_REMAP); 219 asmap = asm_h->stripeMap; 220 221 for (pda = asmap->physInfo, i = 0; i < layoutPtr->numDataCol; i++, pda = pda->next) { 222 RF_ASSERT(pda); 223 rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1); 224 RF_ASSERT(pda->numSector != 0); 225 if (rf_TryToRedirectPDA(raidPtr, pda, 0)) 226 goto out; /* no way to verify parity if disk is 227 * dead. return w/ good status */ 228 blockNode->succedents[i]->params[0].p = pda; 229 blockNode->succedents[i]->params[2].v = psID; 230 blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); 231 } 232 233 RF_ASSERT(!asmap->parityInfo->next); 234 rf_RangeRestrictPDA(raidPtr, parityPDA, asmap->parityInfo, 0, 1); 235 RF_ASSERT(asmap->parityInfo->numSector != 0); 236 if (rf_TryToRedirectPDA(raidPtr, asmap->parityInfo, 1)) 237 goto out; 238 blockNode->succedents[layoutPtr->numDataCol]->params[0].p = asmap->parityInfo; 239 240 /* fire off the DAG */ 241 bzero((char *) &tracerec, sizeof(tracerec)); 242 rd_dag_h->tracerec = &tracerec; 243 244 if (rf_verifyParityDebug) { 245 printf("Parity verify read dag:\n"); 246 rf_PrintDAGList(rd_dag_h); 247 } 248 RF_LOCK_MUTEX(mcpair->mutex); 249 mcpair->flag = 0; 250 rf_DispatchDAG(rd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, 251 (void *) mcpair); 252 while (!mcpair->flag) 253 RF_WAIT_COND(mcpair->cond, mcpair->mutex); 254 RF_UNLOCK_MUTEX(mcpair->mutex); 255 if (rd_dag_h->status != rf_enable) { 256 RF_ERRORMSG("Unable to verify parity: can't read the stripe\n"); 257 retcode = RF_PARITY_COULD_NOT_VERIFY; 258 goto out; 259 } 260 for (p = buf; p < end_p; p += numbytes) { 261 rf_bxor(p, pbuf, numbytes, NULL); 262 } 263 for (i = 0; i < numbytes; i++) { 264 #if 0 265 if (pbuf[i] != 0 || buf[bytesPerStripe + i] != 0) { 266 printf("Bytes: %d %d %d\n", i, pbuf[i], buf[bytesPerStripe + i]); 267 } 268 #endif 269 if (pbuf[i] != buf[bytesPerStripe + i]) { 270 if (!correct_it) 271 RF_ERRORMSG3("Parity verify error: byte %d of parity is 0x%x should be 0x%x\n", 272 i, (u_char) buf[bytesPerStripe + i], (u_char) pbuf[i]); 273 retcode = RF_PARITY_BAD; 274 break; 275 } 276 } 277 278 if (retcode && correct_it) { 279 wr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, numbytes, pbuf, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, 280 "Wnp", alloclist, flags, RF_IO_NORMAL_PRIORITY); 281 wrBlock = wr_dag_h->succedents[0]; 282 wrUnblock = wrBlock->succedents[0]->succedents[0]; 283 wrBlock->succedents[0]->params[0].p = asmap->parityInfo; 284 wrBlock->succedents[0]->params[2].v = psID; 285 wrBlock->succedents[0]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru); 286 bzero((char *) &tracerec, sizeof(tracerec)); 287 wr_dag_h->tracerec = &tracerec; 288 if (rf_verifyParityDebug) { 289 printf("Parity verify write dag:\n"); 290 rf_PrintDAGList(wr_dag_h); 291 } 292 RF_LOCK_MUTEX(mcpair->mutex); 293 mcpair->flag = 0; 294 rf_DispatchDAG(wr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, 295 (void *) mcpair); 296 while (!mcpair->flag) 297 RF_WAIT_COND(mcpair->cond, mcpair->mutex); 298 RF_UNLOCK_MUTEX(mcpair->mutex); 299 if (wr_dag_h->status != rf_enable) { 300 RF_ERRORMSG("Unable to correct parity in VerifyParity: can't write the stripe\n"); 301 retcode = RF_PARITY_COULD_NOT_CORRECT; 302 } 303 rf_FreeDAG(wr_dag_h); 304 if (retcode == RF_PARITY_BAD) 305 retcode = RF_PARITY_CORRECTED; 306 } 307 out: 308 rf_FreeAccessStripeMap(asm_h); 309 rf_FreeAllocList(alloclist); 310 rf_FreeDAG(rd_dag_h); 311 rf_FreeMCPair(mcpair); 312 return (retcode); 313 } 314 315 int 316 rf_TryToRedirectPDA(raidPtr, pda, parity) 317 RF_Raid_t *raidPtr; 318 RF_PhysDiskAddr_t *pda; 319 int parity; 320 { 321 if (raidPtr->Disks[pda->row][pda->col].status == rf_ds_reconstructing) { 322 if (rf_CheckRUReconstructed(raidPtr->reconControl[pda->row]->reconMap, pda->startSector)) { 323 if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { 324 RF_RowCol_t or = pda->row, oc = pda->col; 325 RF_SectorNum_t os = pda->startSector; 326 if (parity) { 327 (raidPtr->Layout.map->MapParity) (raidPtr, pda->raidAddress, &pda->row, &pda->col, &pda->startSector, RF_REMAP); 328 if (rf_verifyParityDebug) 329 printf("VerifyParity: Redir P r %d c %d sect %ld -> r %d c %d sect %ld\n", 330 or, oc, (long) os, pda->row, pda->col, (long) pda->startSector); 331 } else { 332 (raidPtr->Layout.map->MapSector) (raidPtr, pda->raidAddress, &pda->row, &pda->col, &pda->startSector, RF_REMAP); 333 if (rf_verifyParityDebug) 334 printf("VerifyParity: Redir D r %d c %d sect %ld -> r %d c %d sect %ld\n", 335 or, oc, (long) os, pda->row, pda->col, (long) pda->startSector); 336 } 337 } else { 338 RF_RowCol_t spRow = raidPtr->Disks[pda->row][pda->col].spareRow; 339 RF_RowCol_t spCol = raidPtr->Disks[pda->row][pda->col].spareCol; 340 pda->row = spRow; 341 pda->col = spCol; 342 } 343 } 344 } 345 if (RF_DEAD_DISK(raidPtr->Disks[pda->row][pda->col].status)) 346 return (1); 347 return (0); 348 } 349 /***************************************************************************************** 350 * 351 * currently a stub. 352 * 353 * takes as input an ASM describing a write operation and containing one failure, and 354 * verifies that the parity was correctly updated to reflect the write. 355 * 356 * if it's a data unit that's failed, we read the other data units in the stripe and 357 * the parity unit, XOR them together, and verify that we get the data intended for 358 * the failed disk. Since it's easy, we also validate that the right data got written 359 * to the surviving data disks. 360 * 361 * If it's the parity that failed, there's really no validation we can do except the 362 * above verification that the right data got written to all disks. This is because 363 * the new data intended for the failed disk is supplied in the ASM, but this is of 364 * course not the case for the new parity. 365 * 366 ****************************************************************************************/ 367 int 368 rf_VerifyDegrModeWrite(raidPtr, asmh) 369 RF_Raid_t *raidPtr; 370 RF_AccessStripeMapHeader_t *asmh; 371 { 372 return (0); 373 } 374 /* creates a simple DAG with a header, a block-recon node at level 1, 375 * nNodes nodes at level 2, an unblock-recon node at level 3, and 376 * a terminator node at level 4. The stripe address field in 377 * the block and unblock nodes are not touched, nor are the pda 378 * fields in the second-level nodes, so they must be filled in later. 379 * 380 * commit point is established at unblock node - this means that any 381 * failure during dag execution causes the dag to fail 382 */ 383 RF_DagHeader_t * 384 rf_MakeSimpleDAG(raidPtr, nNodes, bytesPerSU, databuf, doFunc, undoFunc, name, alloclist, flags, priority) 385 RF_Raid_t *raidPtr; 386 int nNodes; 387 int bytesPerSU; 388 char *databuf; 389 int (*doFunc) (RF_DagNode_t * node); 390 int (*undoFunc) (RF_DagNode_t * node); 391 char *name; /* node names at the second level */ 392 RF_AllocListElem_t *alloclist; 393 RF_RaidAccessFlags_t flags; 394 int priority; 395 { 396 RF_DagHeader_t *dag_h; 397 RF_DagNode_t *nodes, *termNode, *blockNode, *unblockNode; 398 int i; 399 400 /* create the nodes, the block & unblock nodes, and the terminator 401 * node */ 402 RF_CallocAndAdd(nodes, nNodes + 3, sizeof(RF_DagNode_t), (RF_DagNode_t *), alloclist); 403 blockNode = &nodes[nNodes]; 404 unblockNode = blockNode + 1; 405 termNode = unblockNode + 1; 406 407 dag_h = rf_AllocDAGHeader(); 408 dag_h->raidPtr = (void *) raidPtr; 409 dag_h->allocList = NULL;/* we won't use this alloc list */ 410 dag_h->status = rf_enable; 411 dag_h->numSuccedents = 1; 412 dag_h->creator = "SimpleDAG"; 413 414 /* this dag can not commit until the unblock node is reached errors 415 * prior to the commit point imply the dag has failed */ 416 dag_h->numCommitNodes = 1; 417 dag_h->numCommits = 0; 418 419 dag_h->succedents[0] = blockNode; 420 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0, dag_h, "Nil", alloclist); 421 rf_InitNode(unblockNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nNodes, 0, 0, dag_h, "Nil", alloclist); 422 unblockNode->succedents[0] = termNode; 423 for (i = 0; i < nNodes; i++) { 424 blockNode->succedents[i] = unblockNode->antecedents[i] = &nodes[i]; 425 unblockNode->antType[i] = rf_control; 426 rf_InitNode(&nodes[i], rf_wait, RF_FALSE, doFunc, undoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, name, alloclist); 427 nodes[i].succedents[0] = unblockNode; 428 nodes[i].antecedents[0] = blockNode; 429 nodes[i].antType[0] = rf_control; 430 nodes[i].params[1].p = (databuf + (i * bytesPerSU)); 431 } 432 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", alloclist); 433 termNode->antecedents[0] = unblockNode; 434 termNode->antType[0] = rf_control; 435 return (dag_h); 436 } 437