xref: /netbsd-src/sys/dev/raidframe/rf_chaindecluster.c (revision dc306354b0b29af51801a7632f1e95265a68cd81)
1 /*	$NetBSD: rf_chaindecluster.c,v 1.1 1998/11/13 04:20:26 oster Exp $	*/
2 /*
3  * Copyright (c) 1995 Carnegie-Mellon University.
4  * All rights reserved.
5  *
6  * Author: Khalil Amiri
7  *
8  * Permission to use, copy, modify and distribute this software and
9  * its documentation is hereby granted, provided that both the copyright
10  * notice and this permission notice appear in all copies of the
11  * software, derivative works or modified versions, and any portions
12  * thereof, and that both notices appear in supporting documentation.
13  *
14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17  *
18  * Carnegie Mellon requests users of this software to return to
19  *
20  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21  *  School of Computer Science
22  *  Carnegie Mellon University
23  *  Pittsburgh PA 15213-3890
24  *
25  * any improvements or extensions that they make and grant Carnegie the
26  * rights to redistribute these changes.
27  */
28 
29 /******************************************************************************
30  *
31  * rf_chaindecluster.c -- implements chained declustering
32  *
33  *****************************************************************************/
34 
35 /* :
36  * Log: rf_chaindecluster.c,v
37  * Revision 1.33  1996/08/02 13:20:34  jimz
38  * get rid of bogus (long) casts
39  *
40  * Revision 1.32  1996/07/31  16:56:18  jimz
41  * dataBytesPerStripe, sectorsPerDisk init arch-indep.
42  *
43  * Revision 1.31  1996/07/29  14:05:12  jimz
44  * fix numPUs/numRUs confusion (everything is now numRUs)
45  * clean up some commenting, return values
46  *
47  * Revision 1.30  1996/07/22  19:52:16  jimz
48  * switched node params to RF_DagParam_t, a union of
49  * a 64-bit int and a void *, for better portability
50  * attempted hpux port, but failed partway through for
51  * lack of a single C compiler capable of compiling all
52  * source files
53  *
54  * Revision 1.29  1996/07/18  22:57:14  jimz
55  * port simulator to AIX
56  *
57  * Revision 1.28  1996/06/19  17:53:48  jimz
58  * move GetNumSparePUs, InstallSpareTable ops into layout switch
59  *
60  * Revision 1.27  1996/06/11  15:19:57  wvcii
61  * added include of rf_chaindecluster.h
62  * fixed parameter list of rf_ConfigureChainDecluster
63  *
64  * Revision 1.26  1996/06/11  08:55:15  jimz
65  * improved error-checking at configuration time
66  *
67  * Revision 1.25  1996/06/10  11:55:47  jimz
68  * Straightened out some per-array/not-per-array distinctions, fixed
69  * a couple bugs related to confusion. Added shutdown lists. Removed
70  * layout shutdown function (now subsumed by shutdown lists).
71  *
72  * Revision 1.24  1996/06/07  22:26:27  jimz
73  * type-ify which_ru (RF_ReconUnitNum_t)
74  *
75  * Revision 1.23  1996/06/07  21:33:04  jimz
76  * begin using consistent types for sector numbers,
77  * stripe numbers, row+col numbers, recon unit numbers
78  *
79  * Revision 1.22  1996/06/06  17:31:30  jimz
80  * use CreateMirrorPartitionReadDAG for mirrored reads
81  *
82  * Revision 1.21  1996/06/03  23:28:26  jimz
83  * more bugfixes
84  * check in tree to sync for IPDS runs with current bugfixes
85  * there still may be a problem with threads in the script test
86  * getting I/Os stuck- not trivially reproducible (runs ~50 times
87  * in a row without getting stuck)
88  *
89  * Revision 1.20  1996/06/02  17:31:48  jimz
90  * Moved a lot of global stuff into array structure, where it belongs.
91  * Fixed up paritylogging, pss modules in this manner. Some general
92  * code cleanup. Removed lots of dead code, some dead files.
93  *
94  * Revision 1.19  1996/05/31  22:26:54  jimz
95  * fix a lot of mapping problems, memory allocation problems
96  * found some weird lock issues, fixed 'em
97  * more code cleanup
98  *
99  * Revision 1.18  1996/05/31  16:13:28  amiri
100  * removed/added some commnets.
101  *
102  * Revision 1.17  1996/05/31  05:01:52  amiri
103  * fixed a bug related to sparing layout.
104  *
105  * Revision 1.16  1996/05/30  23:22:16  jimz
106  * bugfixes of serialization, timing problems
107  * more cleanup
108  *
109  * Revision 1.15  1996/05/27  18:56:37  jimz
110  * more code cleanup
111  * better typing
112  * compiles in all 3 environments
113  *
114  * Revision 1.14  1996/05/24  22:17:04  jimz
115  * continue code + namespace cleanup
116  * typed a bunch of flags
117  *
118  * Revision 1.13  1996/05/23  21:46:35  jimz
119  * checkpoint in code cleanup (release prep)
120  * lots of types, function names have been fixed
121  *
122  * Revision 1.12  1996/05/23  00:33:23  jimz
123  * code cleanup: move all debug decls to rf_options.c, all extern
124  * debug decls to rf_options.h, all debug vars preceded by rf_
125  *
126  * Revision 1.11  1996/05/18  19:51:34  jimz
127  * major code cleanup- fix syntax, make some types consistent,
128  * add prototypes, clean out dead code, et cetera
129  *
130  * Revision 1.10  1996/05/03  19:53:56  wvcii
131  * removed include of rf_redstripe.h
132  * moved dag creation routines to new dag library
133  *
134  */
135 
136 #include "rf_archs.h"
137 #include "rf_types.h"
138 #include "rf_raid.h"
139 #include "rf_chaindecluster.h"
140 #include "rf_dag.h"
141 #include "rf_dagutils.h"
142 #include "rf_dagffrd.h"
143 #include "rf_dagffwr.h"
144 #include "rf_dagdegrd.h"
145 #include "rf_dagfuncs.h"
146 #include "rf_threadid.h"
147 #include "rf_general.h"
148 #include "rf_utils.h"
149 
150 typedef struct RF_ChaindeclusterConfigInfo_s {
151   RF_RowCol_t       **stripeIdentifier;   /* filled in at config time
152                                            * and used by IdentifyStripe */
153   RF_StripeCount_t    numSparingRegions;
154   RF_StripeCount_t    stripeUnitsPerSparingRegion;
155   RF_SectorNum_t      mirrorStripeOffset;
156 } RF_ChaindeclusterConfigInfo_t;
157 
158 int rf_ConfigureChainDecluster(
159   RF_ShutdownList_t  **listp,
160   RF_Raid_t           *raidPtr,
161   RF_Config_t         *cfgPtr)
162 {
163   RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
164   RF_StripeCount_t num_used_stripeUnitsPerDisk;
165   RF_ChaindeclusterConfigInfo_t *info;
166   RF_RowCol_t i;
167 
168   /* create a Chained Declustering configuration structure */
169   RF_MallocAndAdd(info, sizeof(RF_ChaindeclusterConfigInfo_t), (RF_ChaindeclusterConfigInfo_t *), raidPtr->cleanupList);
170   if (info == NULL)
171     return(ENOMEM);
172   layoutPtr->layoutSpecificInfo = (void *) info;
173 
174   /*  fill in the config structure.  */
175   info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol, 2 , raidPtr->cleanupList);
176   if (info->stripeIdentifier == NULL)
177     return(ENOMEM);
178   for (i=0; i< raidPtr->numCol; i++) {
179       info->stripeIdentifier[i][0] = i % raidPtr->numCol;
180       info->stripeIdentifier[i][1] = (i+1) % raidPtr->numCol;
181     }
182 
183   RF_ASSERT(raidPtr->numRow == 1);
184 
185   /* fill in the remaining layout parameters */
186   num_used_stripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk - (layoutPtr->stripeUnitsPerDisk %
187         (2*raidPtr->numCol-2) );
188   info->numSparingRegions = num_used_stripeUnitsPerDisk / (2*raidPtr->numCol-2);
189   info->stripeUnitsPerSparingRegion = raidPtr->numCol * (raidPtr->numCol - 1);
190   info->mirrorStripeOffset = info->numSparingRegions * (raidPtr->numCol-1);
191   layoutPtr->numStripe = info->numSparingRegions * info->stripeUnitsPerSparingRegion;
192   layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector;
193   layoutPtr->numDataCol = 1;
194   layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit;
195   layoutPtr->numParityCol = 1;
196 
197  layoutPtr->dataStripeUnitsPerDisk = num_used_stripeUnitsPerDisk;
198 
199  raidPtr->sectorsPerDisk =
200      num_used_stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit;
201 
202   raidPtr->totalSectors =
203      (layoutPtr->numStripe) * layoutPtr->sectorsPerStripeUnit;
204 
205   layoutPtr->stripeUnitsPerDisk = raidPtr->sectorsPerDisk / layoutPtr->sectorsPerStripeUnit;
206 
207   return(0);
208 }
209 
210 RF_ReconUnitCount_t rf_GetNumSpareRUsChainDecluster(raidPtr)
211   RF_Raid_t  *raidPtr;
212 {
213   RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
214 
215   /*
216    * The layout uses two stripe units per disk as spare within each
217    * sparing region.
218    */
219   return (2*info->numSparingRegions);
220 }
221 
222 
223 /* Maps to the primary copy of the data, i.e. the first mirror pair */
224 void rf_MapSectorChainDecluster(
225   RF_Raid_t         *raidPtr,
226   RF_RaidAddr_t      raidSector,
227   RF_RowCol_t       *row,
228   RF_RowCol_t       *col,
229   RF_SectorNum_t    *diskSector,
230   int                remap)
231 {
232  RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
233  RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
234  RF_SectorNum_t index_within_region, index_within_disk;
235  RF_StripeNum_t sparing_region_id;
236  int col_before_remap;
237 
238  *row = 0;
239  sparing_region_id = SUID / info->stripeUnitsPerSparingRegion;
240  index_within_region = SUID % info->stripeUnitsPerSparingRegion;
241  index_within_disk = index_within_region / raidPtr->numCol;
242  col_before_remap = SUID % raidPtr->numCol;
243 
244  if (!remap) {
245         *col = col_before_remap;
246         *diskSector = ( index_within_disk + ( (raidPtr->numCol-1) * sparing_region_id) ) *
247                         raidPtr->Layout.sectorsPerStripeUnit;
248         *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
249       }
250  else {
251        /* remap sector to spare space...*/
252       *diskSector = sparing_region_id * (raidPtr->numCol+1) * raidPtr->Layout.sectorsPerStripeUnit;
253       *diskSector += (raidPtr->numCol-1) * raidPtr->Layout.sectorsPerStripeUnit;
254       *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
255       index_within_disk = index_within_region / raidPtr->numCol;
256       if (index_within_disk < col_before_remap )
257         *col = index_within_disk;
258       else if (index_within_disk  == raidPtr->numCol-2 ) {
259         *col = (col_before_remap+raidPtr->numCol-1) % raidPtr->numCol;
260         *diskSector += raidPtr->Layout.sectorsPerStripeUnit;
261         }
262       else
263         *col = (index_within_disk + 2) % raidPtr->numCol;
264    }
265 
266 }
267 
268 
269 
270 /* Maps to the second copy of the mirror pair, which is chain declustered. The second copy is contained
271    in the next disk (mod numCol) after the disk containing the primary copy.
272    The offset into the disk is one-half disk down */
273 void rf_MapParityChainDecluster(
274   RF_Raid_t       *raidPtr,
275   RF_RaidAddr_t    raidSector,
276   RF_RowCol_t     *row,
277   RF_RowCol_t     *col,
278   RF_SectorNum_t  *diskSector,
279   int              remap)
280 {
281   RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
282   RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
283   RF_SectorNum_t index_within_region, index_within_disk;
284   RF_StripeNum_t sparing_region_id;
285   int col_before_remap;
286 
287   *row = 0;
288   if (!remap) {
289         *col = SUID % raidPtr->numCol;
290         *col = (*col + 1) % raidPtr->numCol;
291         *diskSector =  info->mirrorStripeOffset * raidPtr->Layout.sectorsPerStripeUnit;
292         *diskSector += ( SUID / raidPtr->numCol ) * raidPtr->Layout.sectorsPerStripeUnit;
293         *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
294        }
295   else {
296         /* remap parity to spare space ... */
297         sparing_region_id = SUID / info->stripeUnitsPerSparingRegion;
298         index_within_region = SUID % info->stripeUnitsPerSparingRegion;
299         index_within_disk = index_within_region / raidPtr->numCol;
300         *diskSector =  sparing_region_id * (raidPtr->numCol+1) * raidPtr->Layout.sectorsPerStripeUnit;
301         *diskSector += (raidPtr->numCol) * raidPtr->Layout.sectorsPerStripeUnit;
302         *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
303         col_before_remap = SUID % raidPtr->numCol;
304         if (index_within_disk < col_before_remap)
305                 *col = index_within_disk;
306         else if (index_within_disk  == raidPtr->numCol-2 ) {
307                 *col = (col_before_remap+2) % raidPtr->numCol;
308                 *diskSector -= raidPtr->Layout.sectorsPerStripeUnit;
309                 }
310         else
311                 *col = (index_within_disk + 2) % raidPtr->numCol;
312   }
313 
314 }
315 
316 void rf_IdentifyStripeChainDecluster(
317   RF_Raid_t        *raidPtr,
318   RF_RaidAddr_t     addr,
319   RF_RowCol_t     **diskids,
320   RF_RowCol_t      *outRow)
321 {
322   RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
323   RF_StripeNum_t SUID;
324   RF_RowCol_t col;
325 
326   SUID = addr / raidPtr->Layout.sectorsPerStripeUnit;
327   col = SUID  % raidPtr->numCol;
328   *outRow = 0;
329   *diskids = info->stripeIdentifier[ col ];
330 }
331 
332 void rf_MapSIDToPSIDChainDecluster(
333   RF_RaidLayout_t    *layoutPtr,
334   RF_StripeNum_t      stripeID,
335   RF_StripeNum_t     *psID,
336   RF_ReconUnitNum_t  *which_ru)
337 {
338   *which_ru = 0;
339   *psID = stripeID;
340 }
341 
342 /******************************************************************************
343  * select a graph to perform a single-stripe access
344  *
345  * Parameters:  raidPtr    - description of the physical array
346  *              type       - type of operation (read or write) requested
347  *              asmap      - logical & physical addresses for this access
348  *              createFunc - function to use to create the graph (return value)
349  *****************************************************************************/
350 
351 void rf_RAIDCDagSelect(
352   RF_Raid_t             *raidPtr,
353   RF_IoType_t            type,
354   RF_AccessStripeMap_t  *asmap,
355   RF_VoidFuncPtr *createFunc)
356 #if 0
357   void (**createFunc)(RF_Raid_t *, RF_AccessStripeMap_t *,
358 		     RF_DagHeader_t *, void *, RF_RaidAccessFlags_t,
359 		     RF_AllocListElem_t *))
360 #endif
361 {
362   RF_ASSERT(RF_IO_IS_R_OR_W(type));
363   RF_ASSERT(raidPtr->numRow == 1);
364 
365   if (asmap->numDataFailed + asmap->numParityFailed > 1) {
366     RF_ERRORMSG("Multiple disks failed in a single group!  Aborting I/O operation.\n");
367     *createFunc = NULL;
368     return;
369   }
370 
371   *createFunc = (type == RF_IO_TYPE_READ) ? (RF_VoidFuncPtr)rf_CreateFaultFreeReadDAG :(RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG;
372 
373   if (type == RF_IO_TYPE_READ) {
374     if ( ( raidPtr->status[0] == rf_rs_degraded ) || (  raidPtr->status[0] == rf_rs_reconstructing) )
375       *createFunc = (RF_VoidFuncPtr)rf_CreateRaidCDegradedReadDAG;  /* array status is degraded, implement workload shifting */
376     else
377       *createFunc = (RF_VoidFuncPtr)rf_CreateMirrorPartitionReadDAG; /* array status not degraded, so use mirror partition dag */
378   }
379   else
380     *createFunc = (RF_VoidFuncPtr)rf_CreateRaidOneWriteDAG;
381 }
382