1 /* $NetBSD: rf_reconutil.c,v 1.1 1998/11/13 04:20:34 oster Exp $ */ 2 /* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: Mark Holland 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29 /******************************************** 30 * rf_reconutil.c -- reconstruction utilities 31 ********************************************/ 32 33 /* : 34 * Log: rf_reconutil.c,v 35 * Revision 1.32 1996/07/29 14:05:12 jimz 36 * fix numPUs/numRUs confusion (everything is now numRUs) 37 * clean up some commenting, return values 38 * 39 * Revision 1.31 1996/07/15 05:40:41 jimz 40 * some recon datastructure cleanup 41 * better handling of multiple failures 42 * added undocumented double-recon test 43 * 44 * Revision 1.30 1996/07/13 00:00:59 jimz 45 * sanitized generalized reconstruction architecture 46 * cleaned up head sep, rbuf problems 47 * 48 * Revision 1.29 1996/06/19 17:53:48 jimz 49 * move GetNumSparePUs, InstallSpareTable ops into layout switch 50 * 51 * Revision 1.28 1996/06/07 21:33:04 jimz 52 * begin using consistent types for sector numbers, 53 * stripe numbers, row+col numbers, recon unit numbers 54 * 55 * Revision 1.27 1996/06/05 18:06:02 jimz 56 * Major code cleanup. The Great Renaming is now done. 57 * Better modularity. Better typing. Fixed a bunch of 58 * synchronization bugs. Made a lot of global stuff 59 * per-desc or per-array. Removed dead code. 60 * 61 * Revision 1.26 1996/06/03 23:28:26 jimz 62 * more bugfixes 63 * check in tree to sync for IPDS runs with current bugfixes 64 * there still may be a problem with threads in the script test 65 * getting I/Os stuck- not trivially reproducible (runs ~50 times 66 * in a row without getting stuck) 67 * 68 * Revision 1.25 1996/06/02 17:31:48 jimz 69 * Moved a lot of global stuff into array structure, where it belongs. 70 * Fixed up paritylogging, pss modules in this manner. Some general 71 * code cleanup. Removed lots of dead code, some dead files. 72 * 73 * Revision 1.24 1996/05/31 22:26:54 jimz 74 * fix a lot of mapping problems, memory allocation problems 75 * found some weird lock issues, fixed 'em 76 * more code cleanup 77 * 78 * Revision 1.23 1996/05/30 23:22:16 jimz 79 * bugfixes of serialization, timing problems 80 * more cleanup 81 * 82 * Revision 1.22 1996/05/30 11:29:41 jimz 83 * Numerous bug fixes. Stripe lock release code disagreed with the taking code 84 * about when stripes should be locked (I made it consistent: no parity, no lock) 85 * There was a lot of extra serialization of I/Os which I've removed- a lot of 86 * it was to calculate values for the cache code, which is no longer with us. 87 * More types, function, macro cleanup. Added code to properly quiesce the array 88 * on shutdown. Made a lot of stuff array-specific which was (bogusly) general 89 * before. Fixed memory allocation, freeing bugs. 90 * 91 * Revision 1.21 1996/05/27 18:56:37 jimz 92 * more code cleanup 93 * better typing 94 * compiles in all 3 environments 95 * 96 * Revision 1.20 1996/05/23 00:33:23 jimz 97 * code cleanup: move all debug decls to rf_options.c, all extern 98 * debug decls to rf_options.h, all debug vars preceded by rf_ 99 * 100 * Revision 1.19 1996/05/20 16:14:55 jimz 101 * switch to rf_{mutex,cond}_{init,destroy} 102 * 103 * Revision 1.18 1996/05/18 19:51:34 jimz 104 * major code cleanup- fix syntax, make some types consistent, 105 * add prototypes, clean out dead code, et cetera 106 * 107 * Revision 1.17 1995/12/12 18:10:06 jimz 108 * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT 109 * fix 80-column brain damage in comments 110 * 111 * Revision 1.16 1995/12/06 15:05:31 root 112 * added copyright info 113 * 114 */ 115 116 #include "rf_types.h" 117 #include "rf_raid.h" 118 #include "rf_desc.h" 119 #include "rf_reconutil.h" 120 #include "rf_reconbuffer.h" 121 #include "rf_general.h" 122 #include "rf_decluster.h" 123 #include "rf_raid5_rotatedspare.h" 124 #include "rf_interdecluster.h" 125 #include "rf_chaindecluster.h" 126 127 /******************************************************************* 128 * allocates/frees the reconstruction control information structures 129 *******************************************************************/ 130 RF_ReconCtrl_t *rf_MakeReconControl(reconDesc, frow, fcol, srow, scol) 131 RF_RaidReconDesc_t *reconDesc; 132 RF_RowCol_t frow; /* failed row and column */ 133 RF_RowCol_t fcol; 134 RF_RowCol_t srow; /* identifies which spare we're using */ 135 RF_RowCol_t scol; 136 { 137 RF_Raid_t *raidPtr = reconDesc->raidPtr; 138 RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; 139 RF_ReconUnitCount_t RUsPerPU = layoutPtr->SUsPerPU / layoutPtr->SUsPerRU; 140 RF_ReconUnitCount_t numSpareRUs; 141 RF_ReconCtrl_t *reconCtrlPtr; 142 RF_ReconBuffer_t *rbuf; 143 RF_LayoutSW_t *lp; 144 int retcode, rc; 145 RF_RowCol_t i; 146 147 lp = raidPtr->Layout.map; 148 149 /* make and zero the global reconstruction structure and the per-disk structure */ 150 RF_Calloc(reconCtrlPtr, 1, sizeof(RF_ReconCtrl_t), (RF_ReconCtrl_t *)); 151 RF_Calloc(reconCtrlPtr->perDiskInfo, raidPtr->numCol, sizeof(RF_PerDiskReconCtrl_t), (RF_PerDiskReconCtrl_t *)); /* this zeros it */ 152 reconCtrlPtr->reconDesc = reconDesc; 153 reconCtrlPtr->fcol = fcol; 154 reconCtrlPtr->spareRow = srow; 155 reconCtrlPtr->spareCol = scol; 156 reconCtrlPtr->lastPSID = layoutPtr->numStripe/layoutPtr->SUsPerPU; 157 reconCtrlPtr->percentComplete = 0; 158 159 /* initialize each per-disk recon information structure */ 160 for (i=0; i<raidPtr->numCol; i++) { 161 reconCtrlPtr->perDiskInfo[i].reconCtrl = reconCtrlPtr; 162 reconCtrlPtr->perDiskInfo[i].row = frow; 163 reconCtrlPtr->perDiskInfo[i].col = i; 164 reconCtrlPtr->perDiskInfo[i].curPSID = -1; /* make it appear as if we just finished an RU */ 165 reconCtrlPtr->perDiskInfo[i].ru_count = RUsPerPU-1; 166 } 167 168 /* Get the number of spare units per disk and the sparemap in case spare is distributed */ 169 170 if (lp->GetNumSpareRUs) { 171 numSpareRUs = lp->GetNumSpareRUs(raidPtr); 172 } 173 else { 174 numSpareRUs = 0; 175 } 176 177 /* 178 * Not all distributed sparing archs need dynamic mappings 179 */ 180 if (lp->InstallSpareTable) { 181 retcode = rf_InstallSpareTable(raidPtr, frow, fcol); 182 if (retcode) { 183 RF_PANIC(); /* XXX fix this*/ 184 } 185 } 186 187 /* make the reconstruction map */ 188 reconCtrlPtr->reconMap = rf_MakeReconMap(raidPtr, (int) (layoutPtr->SUsPerRU * layoutPtr->sectorsPerStripeUnit), 189 raidPtr->sectorsPerDisk, numSpareRUs); 190 191 /* make the per-disk reconstruction buffers */ 192 for (i=0; i<raidPtr->numCol; i++) { 193 reconCtrlPtr->perDiskInfo[i].rbuf = (i==fcol) ? NULL : rf_MakeReconBuffer(raidPtr, frow, i, RF_RBUF_TYPE_EXCLUSIVE); 194 } 195 196 /* initialize the event queue */ 197 rc = rf_mutex_init(&reconCtrlPtr->eq_mutex); 198 if (rc) { 199 /* XXX deallocate, cleanup */ 200 RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, 201 __LINE__, rc); 202 return(NULL); 203 } 204 rc = rf_cond_init(&reconCtrlPtr->eq_cond); 205 if (rc) { 206 /* XXX deallocate, cleanup */ 207 RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", __FILE__, 208 __LINE__, rc); 209 return(NULL); 210 } 211 reconCtrlPtr->eventQueue = NULL; 212 reconCtrlPtr->eq_count = 0; 213 214 /* make the floating recon buffers and append them to the free list */ 215 rc = rf_mutex_init(&reconCtrlPtr->rb_mutex); 216 if (rc) { 217 /* XXX deallocate, cleanup */ 218 RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, 219 __LINE__, rc); 220 return(NULL); 221 } 222 reconCtrlPtr->fullBufferList= NULL; 223 reconCtrlPtr->priorityList = NULL; 224 reconCtrlPtr->floatingRbufs = NULL; 225 reconCtrlPtr->committedRbufs= NULL; 226 for (i=0; i<raidPtr->numFloatingReconBufs; i++) { 227 rbuf = rf_MakeReconBuffer(raidPtr, frow, fcol, RF_RBUF_TYPE_FLOATING); 228 rbuf->next = reconCtrlPtr->floatingRbufs; 229 reconCtrlPtr->floatingRbufs = rbuf; 230 } 231 232 /* create the parity stripe status table */ 233 reconCtrlPtr->pssTable = rf_MakeParityStripeStatusTable(raidPtr); 234 235 /* set the initial min head sep counter val */ 236 reconCtrlPtr->minHeadSepCounter = 0; 237 238 return(reconCtrlPtr); 239 } 240 241 void rf_FreeReconControl(raidPtr, row) 242 RF_Raid_t *raidPtr; 243 RF_RowCol_t row; 244 { 245 RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[row]; 246 RF_ReconBuffer_t *t; 247 RF_ReconUnitNum_t i; 248 249 RF_ASSERT(reconCtrlPtr); 250 for (i=0; i<raidPtr->numCol; i++) if (reconCtrlPtr->perDiskInfo[i].rbuf) rf_FreeReconBuffer(reconCtrlPtr->perDiskInfo[i].rbuf); 251 for (i=0; i<raidPtr->numFloatingReconBufs; i++) { 252 t = reconCtrlPtr->floatingRbufs; 253 RF_ASSERT(t); 254 reconCtrlPtr->floatingRbufs = t->next; 255 rf_FreeReconBuffer(t); 256 } 257 rf_mutex_destroy(&reconCtrlPtr->rb_mutex); 258 rf_mutex_destroy(&reconCtrlPtr->eq_mutex); 259 rf_cond_destroy(&reconCtrlPtr->eq_cond); 260 rf_FreeReconMap(reconCtrlPtr->reconMap); 261 rf_FreeParityStripeStatusTable(raidPtr, reconCtrlPtr->pssTable); 262 RF_Free(reconCtrlPtr->perDiskInfo, raidPtr->numCol * sizeof(RF_PerDiskReconCtrl_t)); 263 RF_Free(reconCtrlPtr, sizeof(*reconCtrlPtr)); 264 } 265 266 267 /****************************************************************************** 268 * computes the default head separation limit 269 *****************************************************************************/ 270 RF_HeadSepLimit_t rf_GetDefaultHeadSepLimit(raidPtr) 271 RF_Raid_t *raidPtr; 272 { 273 RF_HeadSepLimit_t hsl; 274 RF_LayoutSW_t *lp; 275 276 lp = raidPtr->Layout.map; 277 if (lp->GetDefaultHeadSepLimit == NULL) 278 return(-1); 279 hsl = lp->GetDefaultHeadSepLimit(raidPtr); 280 return(hsl); 281 } 282 283 284 /****************************************************************************** 285 * computes the default number of floating recon buffers 286 *****************************************************************************/ 287 int rf_GetDefaultNumFloatingReconBuffers(raidPtr) 288 RF_Raid_t *raidPtr; 289 { 290 RF_LayoutSW_t *lp; 291 int nrb; 292 293 lp = raidPtr->Layout.map; 294 if (lp->GetDefaultNumFloatingReconBuffers == NULL) 295 return(3 * raidPtr->numCol); 296 nrb = lp->GetDefaultNumFloatingReconBuffers(raidPtr); 297 return(nrb); 298 } 299 300 301 /****************************************************************************** 302 * creates and initializes a reconstruction buffer 303 *****************************************************************************/ 304 RF_ReconBuffer_t *rf_MakeReconBuffer( 305 RF_Raid_t *raidPtr, 306 RF_RowCol_t row, 307 RF_RowCol_t col, 308 RF_RbufType_t type) 309 { 310 RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; 311 RF_ReconBuffer_t *t; 312 u_int recon_buffer_size = rf_RaidAddressToByte(raidPtr, layoutPtr->SUsPerRU * layoutPtr->sectorsPerStripeUnit); 313 314 RF_Malloc(t, sizeof(RF_ReconBuffer_t), (RF_ReconBuffer_t *)); 315 RF_Malloc(t->buffer, recon_buffer_size, (caddr_t)); 316 RF_Malloc(t->arrived, raidPtr->numCol * sizeof(char), (char *)); 317 t->raidPtr = raidPtr; 318 t->row = row; t->col = col; 319 t->priority = RF_IO_RECON_PRIORITY; 320 t->type = type; 321 t->pssPtr = NULL; 322 t->next = NULL; 323 return(t); 324 } 325 326 /****************************************************************************** 327 * frees a reconstruction buffer 328 *****************************************************************************/ 329 void rf_FreeReconBuffer(rbuf) 330 RF_ReconBuffer_t *rbuf; 331 { 332 RF_Raid_t *raidPtr = rbuf->raidPtr; 333 u_int recon_buffer_size = rf_RaidAddressToByte(raidPtr, raidPtr->Layout.SUsPerRU * raidPtr->Layout.sectorsPerStripeUnit); 334 335 RF_Free(rbuf->arrived, raidPtr->numCol * sizeof(char)); 336 RF_Free(rbuf->buffer, recon_buffer_size); 337 RF_Free(rbuf, sizeof(*rbuf)); 338 } 339 340 341 /****************************************************************************** 342 * debug only: sanity check the number of floating recon bufs in use 343 *****************************************************************************/ 344 void rf_CheckFloatingRbufCount(raidPtr, dolock) 345 RF_Raid_t *raidPtr; 346 int dolock; 347 { 348 RF_ReconParityStripeStatus_t *p; 349 RF_PSStatusHeader_t *pssTable; 350 RF_ReconBuffer_t *rbuf; 351 int i, j, sum = 0; 352 RF_RowCol_t frow=0; 353 354 for (i=0; i<raidPtr->numRow; i++) 355 if (raidPtr->reconControl[i]) { 356 frow = i; 357 break; 358 } 359 RF_ASSERT(frow >= 0); 360 361 if (dolock) 362 RF_LOCK_MUTEX(raidPtr->reconControl[frow]->rb_mutex); 363 pssTable = raidPtr->reconControl[frow]->pssTable; 364 365 for (i=0; i<raidPtr->pssTableSize; i++) { 366 RF_LOCK_MUTEX(pssTable[i].mutex); 367 for (p = pssTable[i].chain; p; p=p->next) { 368 rbuf = (RF_ReconBuffer_t *) p->rbuf; 369 if (rbuf && rbuf->type == RF_RBUF_TYPE_FLOATING) 370 sum++; 371 372 rbuf = (RF_ReconBuffer_t *) p->writeRbuf; 373 if (rbuf && rbuf->type == RF_RBUF_TYPE_FLOATING) 374 sum++; 375 376 for (j=0; j<p->xorBufCount; j++) { 377 rbuf = (RF_ReconBuffer_t *) p->rbufsForXor[j]; 378 RF_ASSERT(rbuf); 379 if (rbuf->type == RF_RBUF_TYPE_FLOATING) 380 sum++; 381 } 382 } 383 RF_UNLOCK_MUTEX(pssTable[i].mutex); 384 } 385 386 for (rbuf = raidPtr->reconControl[frow]->floatingRbufs; rbuf; rbuf = rbuf->next) { 387 if (rbuf->type == RF_RBUF_TYPE_FLOATING) 388 sum++; 389 } 390 for (rbuf = raidPtr->reconControl[frow]->committedRbufs; rbuf; rbuf = rbuf->next) { 391 if (rbuf->type == RF_RBUF_TYPE_FLOATING) 392 sum++; 393 } 394 for (rbuf = raidPtr->reconControl[frow]->fullBufferList; rbuf; rbuf = rbuf->next) { 395 if (rbuf->type == RF_RBUF_TYPE_FLOATING) 396 sum++; 397 } 398 for (rbuf = raidPtr->reconControl[frow]->priorityList; rbuf; rbuf = rbuf->next) { 399 if (rbuf->type == RF_RBUF_TYPE_FLOATING) 400 sum++; 401 } 402 403 RF_ASSERT(sum == raidPtr->numFloatingReconBufs); 404 405 if (dolock) 406 RF_UNLOCK_MUTEX(raidPtr->reconControl[frow]->rb_mutex); 407 } 408