1 /* $NetBSD: rf_chaindecluster.c,v 1.1 1998/11/13 04:20:26 oster Exp $ */ 2 /* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: Khalil Amiri 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29 /****************************************************************************** 30 * 31 * rf_chaindecluster.c -- implements chained declustering 32 * 33 *****************************************************************************/ 34 35 /* : 36 * Log: rf_chaindecluster.c,v 37 * Revision 1.33 1996/08/02 13:20:34 jimz 38 * get rid of bogus (long) casts 39 * 40 * Revision 1.32 1996/07/31 16:56:18 jimz 41 * dataBytesPerStripe, sectorsPerDisk init arch-indep. 42 * 43 * Revision 1.31 1996/07/29 14:05:12 jimz 44 * fix numPUs/numRUs confusion (everything is now numRUs) 45 * clean up some commenting, return values 46 * 47 * Revision 1.30 1996/07/22 19:52:16 jimz 48 * switched node params to RF_DagParam_t, a union of 49 * a 64-bit int and a void *, for better portability 50 * attempted hpux port, but failed partway through for 51 * lack of a single C compiler capable of compiling all 52 * source files 53 * 54 * Revision 1.29 1996/07/18 22:57:14 jimz 55 * port simulator to AIX 56 * 57 * Revision 1.28 1996/06/19 17:53:48 jimz 58 * move GetNumSparePUs, InstallSpareTable ops into layout switch 59 * 60 * Revision 1.27 1996/06/11 15:19:57 wvcii 61 * added include of rf_chaindecluster.h 62 * fixed parameter list of rf_ConfigureChainDecluster 63 * 64 * Revision 1.26 1996/06/11 08:55:15 jimz 65 * improved error-checking at configuration time 66 * 67 * Revision 1.25 1996/06/10 11:55:47 jimz 68 * Straightened out some per-array/not-per-array distinctions, fixed 69 * a couple bugs related to confusion. Added shutdown lists. Removed 70 * layout shutdown function (now subsumed by shutdown lists). 71 * 72 * Revision 1.24 1996/06/07 22:26:27 jimz 73 * type-ify which_ru (RF_ReconUnitNum_t) 74 * 75 * Revision 1.23 1996/06/07 21:33:04 jimz 76 * begin using consistent types for sector numbers, 77 * stripe numbers, row+col numbers, recon unit numbers 78 * 79 * Revision 1.22 1996/06/06 17:31:30 jimz 80 * use CreateMirrorPartitionReadDAG for mirrored reads 81 * 82 * Revision 1.21 1996/06/03 23:28:26 jimz 83 * more bugfixes 84 * check in tree to sync for IPDS runs with current bugfixes 85 * there still may be a problem with threads in the script test 86 * getting I/Os stuck- not trivially reproducible (runs ~50 times 87 * in a row without getting stuck) 88 * 89 * Revision 1.20 1996/06/02 17:31:48 jimz 90 * Moved a lot of global stuff into array structure, where it belongs. 91 * Fixed up paritylogging, pss modules in this manner. Some general 92 * code cleanup. Removed lots of dead code, some dead files. 93 * 94 * Revision 1.19 1996/05/31 22:26:54 jimz 95 * fix a lot of mapping problems, memory allocation problems 96 * found some weird lock issues, fixed 'em 97 * more code cleanup 98 * 99 * Revision 1.18 1996/05/31 16:13:28 amiri 100 * removed/added some commnets. 101 * 102 * Revision 1.17 1996/05/31 05:01:52 amiri 103 * fixed a bug related to sparing layout. 104 * 105 * Revision 1.16 1996/05/30 23:22:16 jimz 106 * bugfixes of serialization, timing problems 107 * more cleanup 108 * 109 * Revision 1.15 1996/05/27 18:56:37 jimz 110 * more code cleanup 111 * better typing 112 * compiles in all 3 environments 113 * 114 * Revision 1.14 1996/05/24 22:17:04 jimz 115 * continue code + namespace cleanup 116 * typed a bunch of flags 117 * 118 * Revision 1.13 1996/05/23 21:46:35 jimz 119 * checkpoint in code cleanup (release prep) 120 * lots of types, function names have been fixed 121 * 122 * Revision 1.12 1996/05/23 00:33:23 jimz 123 * code cleanup: move all debug decls to rf_options.c, all extern 124 * debug decls to rf_options.h, all debug vars preceded by rf_ 125 * 126 * Revision 1.11 1996/05/18 19:51:34 jimz 127 * major code cleanup- fix syntax, make some types consistent, 128 * add prototypes, clean out dead code, et cetera 129 * 130 * Revision 1.10 1996/05/03 19:53:56 wvcii 131 * removed include of rf_redstripe.h 132 * moved dag creation routines to new dag library 133 * 134 */ 135 136 #include "rf_archs.h" 137 #include "rf_types.h" 138 #include "rf_raid.h" 139 #include "rf_chaindecluster.h" 140 #include "rf_dag.h" 141 #include "rf_dagutils.h" 142 #include "rf_dagffrd.h" 143 #include "rf_dagffwr.h" 144 #include "rf_dagdegrd.h" 145 #include "rf_dagfuncs.h" 146 #include "rf_threadid.h" 147 #include "rf_general.h" 148 #include "rf_utils.h" 149 150 typedef struct RF_ChaindeclusterConfigInfo_s { 151 RF_RowCol_t **stripeIdentifier; /* filled in at config time 152 * and used by IdentifyStripe */ 153 RF_StripeCount_t numSparingRegions; 154 RF_StripeCount_t stripeUnitsPerSparingRegion; 155 RF_SectorNum_t mirrorStripeOffset; 156 } RF_ChaindeclusterConfigInfo_t; 157 158 int rf_ConfigureChainDecluster( 159 RF_ShutdownList_t **listp, 160 RF_Raid_t *raidPtr, 161 RF_Config_t *cfgPtr) 162 { 163 RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; 164 RF_StripeCount_t num_used_stripeUnitsPerDisk; 165 RF_ChaindeclusterConfigInfo_t *info; 166 RF_RowCol_t i; 167 168 /* create a Chained Declustering configuration structure */ 169 RF_MallocAndAdd(info, sizeof(RF_ChaindeclusterConfigInfo_t), (RF_ChaindeclusterConfigInfo_t *), raidPtr->cleanupList); 170 if (info == NULL) 171 return(ENOMEM); 172 layoutPtr->layoutSpecificInfo = (void *) info; 173 174 /* fill in the config structure. */ 175 info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol, 2 , raidPtr->cleanupList); 176 if (info->stripeIdentifier == NULL) 177 return(ENOMEM); 178 for (i=0; i< raidPtr->numCol; i++) { 179 info->stripeIdentifier[i][0] = i % raidPtr->numCol; 180 info->stripeIdentifier[i][1] = (i+1) % raidPtr->numCol; 181 } 182 183 RF_ASSERT(raidPtr->numRow == 1); 184 185 /* fill in the remaining layout parameters */ 186 num_used_stripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk - (layoutPtr->stripeUnitsPerDisk % 187 (2*raidPtr->numCol-2) ); 188 info->numSparingRegions = num_used_stripeUnitsPerDisk / (2*raidPtr->numCol-2); 189 info->stripeUnitsPerSparingRegion = raidPtr->numCol * (raidPtr->numCol - 1); 190 info->mirrorStripeOffset = info->numSparingRegions * (raidPtr->numCol-1); 191 layoutPtr->numStripe = info->numSparingRegions * info->stripeUnitsPerSparingRegion; 192 layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector; 193 layoutPtr->numDataCol = 1; 194 layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; 195 layoutPtr->numParityCol = 1; 196 197 layoutPtr->dataStripeUnitsPerDisk = num_used_stripeUnitsPerDisk; 198 199 raidPtr->sectorsPerDisk = 200 num_used_stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit; 201 202 raidPtr->totalSectors = 203 (layoutPtr->numStripe) * layoutPtr->sectorsPerStripeUnit; 204 205 layoutPtr->stripeUnitsPerDisk = raidPtr->sectorsPerDisk / layoutPtr->sectorsPerStripeUnit; 206 207 return(0); 208 } 209 210 RF_ReconUnitCount_t rf_GetNumSpareRUsChainDecluster(raidPtr) 211 RF_Raid_t *raidPtr; 212 { 213 RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; 214 215 /* 216 * The layout uses two stripe units per disk as spare within each 217 * sparing region. 218 */ 219 return (2*info->numSparingRegions); 220 } 221 222 223 /* Maps to the primary copy of the data, i.e. the first mirror pair */ 224 void rf_MapSectorChainDecluster( 225 RF_Raid_t *raidPtr, 226 RF_RaidAddr_t raidSector, 227 RF_RowCol_t *row, 228 RF_RowCol_t *col, 229 RF_SectorNum_t *diskSector, 230 int remap) 231 { 232 RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; 233 RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; 234 RF_SectorNum_t index_within_region, index_within_disk; 235 RF_StripeNum_t sparing_region_id; 236 int col_before_remap; 237 238 *row = 0; 239 sparing_region_id = SUID / info->stripeUnitsPerSparingRegion; 240 index_within_region = SUID % info->stripeUnitsPerSparingRegion; 241 index_within_disk = index_within_region / raidPtr->numCol; 242 col_before_remap = SUID % raidPtr->numCol; 243 244 if (!remap) { 245 *col = col_before_remap; 246 *diskSector = ( index_within_disk + ( (raidPtr->numCol-1) * sparing_region_id) ) * 247 raidPtr->Layout.sectorsPerStripeUnit; 248 *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); 249 } 250 else { 251 /* remap sector to spare space...*/ 252 *diskSector = sparing_region_id * (raidPtr->numCol+1) * raidPtr->Layout.sectorsPerStripeUnit; 253 *diskSector += (raidPtr->numCol-1) * raidPtr->Layout.sectorsPerStripeUnit; 254 *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); 255 index_within_disk = index_within_region / raidPtr->numCol; 256 if (index_within_disk < col_before_remap ) 257 *col = index_within_disk; 258 else if (index_within_disk == raidPtr->numCol-2 ) { 259 *col = (col_before_remap+raidPtr->numCol-1) % raidPtr->numCol; 260 *diskSector += raidPtr->Layout.sectorsPerStripeUnit; 261 } 262 else 263 *col = (index_within_disk + 2) % raidPtr->numCol; 264 } 265 266 } 267 268 269 270 /* Maps to the second copy of the mirror pair, which is chain declustered. The second copy is contained 271 in the next disk (mod numCol) after the disk containing the primary copy. 272 The offset into the disk is one-half disk down */ 273 void rf_MapParityChainDecluster( 274 RF_Raid_t *raidPtr, 275 RF_RaidAddr_t raidSector, 276 RF_RowCol_t *row, 277 RF_RowCol_t *col, 278 RF_SectorNum_t *diskSector, 279 int remap) 280 { 281 RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; 282 RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; 283 RF_SectorNum_t index_within_region, index_within_disk; 284 RF_StripeNum_t sparing_region_id; 285 int col_before_remap; 286 287 *row = 0; 288 if (!remap) { 289 *col = SUID % raidPtr->numCol; 290 *col = (*col + 1) % raidPtr->numCol; 291 *diskSector = info->mirrorStripeOffset * raidPtr->Layout.sectorsPerStripeUnit; 292 *diskSector += ( SUID / raidPtr->numCol ) * raidPtr->Layout.sectorsPerStripeUnit; 293 *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); 294 } 295 else { 296 /* remap parity to spare space ... */ 297 sparing_region_id = SUID / info->stripeUnitsPerSparingRegion; 298 index_within_region = SUID % info->stripeUnitsPerSparingRegion; 299 index_within_disk = index_within_region / raidPtr->numCol; 300 *diskSector = sparing_region_id * (raidPtr->numCol+1) * raidPtr->Layout.sectorsPerStripeUnit; 301 *diskSector += (raidPtr->numCol) * raidPtr->Layout.sectorsPerStripeUnit; 302 *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); 303 col_before_remap = SUID % raidPtr->numCol; 304 if (index_within_disk < col_before_remap) 305 *col = index_within_disk; 306 else if (index_within_disk == raidPtr->numCol-2 ) { 307 *col = (col_before_remap+2) % raidPtr->numCol; 308 *diskSector -= raidPtr->Layout.sectorsPerStripeUnit; 309 } 310 else 311 *col = (index_within_disk + 2) % raidPtr->numCol; 312 } 313 314 } 315 316 void rf_IdentifyStripeChainDecluster( 317 RF_Raid_t *raidPtr, 318 RF_RaidAddr_t addr, 319 RF_RowCol_t **diskids, 320 RF_RowCol_t *outRow) 321 { 322 RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; 323 RF_StripeNum_t SUID; 324 RF_RowCol_t col; 325 326 SUID = addr / raidPtr->Layout.sectorsPerStripeUnit; 327 col = SUID % raidPtr->numCol; 328 *outRow = 0; 329 *diskids = info->stripeIdentifier[ col ]; 330 } 331 332 void rf_MapSIDToPSIDChainDecluster( 333 RF_RaidLayout_t *layoutPtr, 334 RF_StripeNum_t stripeID, 335 RF_StripeNum_t *psID, 336 RF_ReconUnitNum_t *which_ru) 337 { 338 *which_ru = 0; 339 *psID = stripeID; 340 } 341 342 /****************************************************************************** 343 * select a graph to perform a single-stripe access 344 * 345 * Parameters: raidPtr - description of the physical array 346 * type - type of operation (read or write) requested 347 * asmap - logical & physical addresses for this access 348 * createFunc - function to use to create the graph (return value) 349 *****************************************************************************/ 350 351 void rf_RAIDCDagSelect( 352 RF_Raid_t *raidPtr, 353 RF_IoType_t type, 354 RF_AccessStripeMap_t *asmap, 355 RF_VoidFuncPtr *createFunc) 356 #if 0 357 void (**createFunc)(RF_Raid_t *, RF_AccessStripeMap_t *, 358 RF_DagHeader_t *, void *, RF_RaidAccessFlags_t, 359 RF_AllocListElem_t *)) 360 #endif 361 { 362 RF_ASSERT(RF_IO_IS_R_OR_W(type)); 363 RF_ASSERT(raidPtr->numRow == 1); 364 365 if (asmap->numDataFailed + asmap->numParityFailed > 1) { 366 RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n"); 367 *createFunc = NULL; 368 return; 369 } 370 371 *createFunc = (type == RF_IO_TYPE_READ) ? (RF_VoidFuncPtr)rf_CreateFaultFreeReadDAG :(RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG; 372 373 if (type == RF_IO_TYPE_READ) { 374 if ( ( raidPtr->status[0] == rf_rs_degraded ) || ( raidPtr->status[0] == rf_rs_reconstructing) ) 375 *createFunc = (RF_VoidFuncPtr)rf_CreateRaidCDegradedReadDAG; /* array status is degraded, implement workload shifting */ 376 else 377 *createFunc = (RF_VoidFuncPtr)rf_CreateMirrorPartitionReadDAG; /* array status not degraded, so use mirror partition dag */ 378 } 379 else 380 *createFunc = (RF_VoidFuncPtr)rf_CreateRaidOneWriteDAG; 381 } 382