xref: /netbsd-src/sys/dev/raidframe/rf_raid.h (revision 93f9db1b75d415b78f73ed629beeb86235153473)
1 /*	$NetBSD: rf_raid.h,v 1.1 1998/11/13 04:20:32 oster Exp $	*/
2 /*
3  * Copyright (c) 1995 Carnegie-Mellon University.
4  * All rights reserved.
5  *
6  * Author: Mark Holland
7  *
8  * Permission to use, copy, modify and distribute this software and
9  * its documentation is hereby granted, provided that both the copyright
10  * notice and this permission notice appear in all copies of the
11  * software, derivative works or modified versions, and any portions
12  * thereof, and that both notices appear in supporting documentation.
13  *
14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17  *
18  * Carnegie Mellon requests users of this software to return to
19  *
20  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21  *  School of Computer Science
22  *  Carnegie Mellon University
23  *  Pittsburgh PA 15213-3890
24  *
25  * any improvements or extensions that they make and grant Carnegie the
26  * rights to redistribute these changes.
27  */
28 
29 /**********************************************
30  * rf_raid.h -- main header file for RAID driver
31  **********************************************/
32 
33 /*
34  * :
35  * Log: rf_raid.h,v
36  * Revision 1.48  1996/08/20 22:33:54  jimz
37  * make hist_diskreq a doubly-indexed array
38  *
39  * Revision 1.47  1996/07/15  05:40:41  jimz
40  * some recon datastructure cleanup
41  * better handling of multiple failures
42  * added undocumented double-recon test
43  *
44  * Revision 1.46  1996/07/10  22:28:51  jimz
45  * get rid of obsolete row statuses (dead,degraded2)
46  *
47  * Revision 1.45  1996/06/14  14:56:29  jimz
48  * make engine threading stuff ifndef SIMULATE
49  *
50  * Revision 1.44  1996/06/14  14:16:54  jimz
51  * move in engine node queue, atomicity control
52  *
53  * Revision 1.43  1996/06/12  04:41:26  jimz
54  * tweaks to make genplot work with user-level driver
55  * (mainly change stat collection)
56  *
57  * Revision 1.42  1996/06/11  10:57:17  jimz
58  * add recon_done_procs, recon_done_proc_mutex
59  *
60  * Revision 1.41  1996/06/11  01:26:48  jimz
61  * added mechanism for user-level to sync diskthread startup,
62  * shutdown
63  *
64  * Revision 1.40  1996/06/10  14:18:58  jimz
65  * move user, throughput stats into per-array structure
66  *
67  * Revision 1.39  1996/06/10  11:55:47  jimz
68  * Straightened out some per-array/not-per-array distinctions, fixed
69  * a couple bugs related to confusion. Added shutdown lists. Removed
70  * layout shutdown function (now subsumed by shutdown lists).
71  *
72  * Revision 1.38  1996/06/07  21:33:04  jimz
73  * begin using consistent types for sector numbers,
74  * stripe numbers, row+col numbers, recon unit numbers
75  *
76  * Revision 1.37  1996/06/05  19:38:32  jimz
77  * fixed up disk queueing types config
78  * added sstf disk queueing
79  * fixed exit bug on diskthreads (ref-ing bad mem)
80  *
81  * Revision 1.36  1996/06/05  18:06:02  jimz
82  * Major code cleanup. The Great Renaming is now done.
83  * Better modularity. Better typing. Fixed a bunch of
84  * synchronization bugs. Made a lot of global stuff
85  * per-desc or per-array. Removed dead code.
86  *
87  * Revision 1.35  1996/06/03  23:28:26  jimz
88  * more bugfixes
89  * check in tree to sync for IPDS runs with current bugfixes
90  * there still may be a problem with threads in the script test
91  * getting I/Os stuck- not trivially reproducible (runs ~50 times
92  * in a row without getting stuck)
93  *
94  * Revision 1.34  1996/06/02  17:31:48  jimz
95  * Moved a lot of global stuff into array structure, where it belongs.
96  * Fixed up paritylogging, pss modules in this manner. Some general
97  * code cleanup. Removed lots of dead code, some dead files.
98  *
99  * Revision 1.33  1996/05/30  23:22:16  jimz
100  * bugfixes of serialization, timing problems
101  * more cleanup
102  *
103  * Revision 1.32  1996/05/30  11:29:41  jimz
104  * Numerous bug fixes. Stripe lock release code disagreed with the taking code
105  * about when stripes should be locked (I made it consistent: no parity, no lock)
106  * There was a lot of extra serialization of I/Os which I've removed- a lot of
107  * it was to calculate values for the cache code, which is no longer with us.
108  * More types, function, macro cleanup. Added code to properly quiesce the array
109  * on shutdown. Made a lot of stuff array-specific which was (bogusly) general
110  * before. Fixed memory allocation, freeing bugs.
111  *
112  * Revision 1.31  1996/05/27  18:56:37  jimz
113  * more code cleanup
114  * better typing
115  * compiles in all 3 environments
116  *
117  * Revision 1.30  1996/05/24  22:17:04  jimz
118  * continue code + namespace cleanup
119  * typed a bunch of flags
120  *
121  * Revision 1.29  1996/05/23  21:46:35  jimz
122  * checkpoint in code cleanup (release prep)
123  * lots of types, function names have been fixed
124  *
125  * Revision 1.28  1996/05/23  00:33:23  jimz
126  * code cleanup: move all debug decls to rf_options.c, all extern
127  * debug decls to rf_options.h, all debug vars preceded by rf_
128  *
129  * Revision 1.27  1996/05/18  19:51:34  jimz
130  * major code cleanup- fix syntax, make some types consistent,
131  * add prototypes, clean out dead code, et cetera
132  *
133  * Revision 1.26  1996/05/08  21:01:24  jimz
134  * fixed up enum type names that were conflicting with other
135  * enums and function names (ie, "panic")
136  * future naming trends will be towards RF_ and rf_ for
137  * everything raidframe-related
138  *
139  * Revision 1.25  1996/05/02  14:57:55  jimz
140  * add sectorMask
141  *
142  * Revision 1.24  1996/04/22  15:53:13  jimz
143  * MAX_RAIDS -> NRAIDFRAME
144  *
145  * Revision 1.23  1995/12/14  18:39:46  jimz
146  * convert to rf_types.h types
147  *
148  * Revision 1.22  1995/12/06  15:02:26  root
149  * added copyright info
150  *
151  * Revision 1.21  1995/10/09  17:39:24  jimz
152  * added info for tracking number of outstanding accesses
153  * at user-level
154  *
155  * Revision 1.20  1995/09/30  20:37:46  jimz
156  * added acc_totals to Raid for kernel
157  *
158  * Revision 1.19  1995/09/19  22:57:14  jimz
159  * add cache of raidid for kernel
160  *
161  * Revision 1.18  1995/09/18  16:50:04  jimz
162  * added RF_MAX_DISKS (for config ioctls)
163  *
164  * Revision 1.17  1995/09/07  19:02:31  jimz
165  * mods to get raidframe to compile and link
166  * in kernel environment
167  *
168  * Revision 1.16  1995/07/21  19:29:51  robby
169  * added some info for the idler to the Raid
170  *
171  * Revision 1.15  1995/07/16  03:19:14  cfb
172  * added cachePtr to *raidPtr
173  *
174  * Revision 1.14  1995/06/23  13:39:36  robby
175  * updeated to prototypes in rf_layout.h
176  *
177  */
178 
179 #ifndef _RF__RF_RAID_H_
180 #define _RF__RF_RAID_H_
181 
182 #ifdef _KERNEL
183 #define KERNEL
184 #endif
185 
186 #include "rf_archs.h"
187 #include "rf_types.h"
188 #include "rf_threadstuff.h"
189 
190 #if defined(__NetBSD__) && defined(_KERNEL)
191 #include "rf_netbsd.h"
192 #endif
193 
194 #ifdef KERNEL
195 /* XXX Needs to be added.  GO
196 #include <raidframe.h>
197 */
198 #include <sys/disklabel.h>
199 #else /* KERNEL */
200 #include <stdio.h>
201 #include <assert.h>
202 #endif /* KERNEL */
203 #include <sys/types.h>
204 
205 #include "rf_alloclist.h"
206 #include "rf_stripelocks.h"
207 #include "rf_layout.h"
208 #include "rf_disks.h"
209 #include "rf_debugMem.h"
210 #include "rf_diskqueue.h"
211 #include "rf_reconstruct.h"
212 #include "rf_acctrace.h"
213 
214 #if RF_INCLUDE_PARITYLOGGING > 0
215 #include "rf_paritylog.h"
216 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
217 
218 #define RF_MAX_DISKS 128 /* max disks per array */
219 #ifdef __NetBSD__
220 #define RF_DEV2RAIDID(_dev)  (DISKUNIT(_dev))
221 #else
222 #define RF_DEV2RAIDID(_dev)  (minor(_dev)>>6)     /* convert dev_t to raid id */
223 #endif
224 
225 /*
226  * Each row in the array is a distinct parity group, so
227  * each has it's own status, which is one of the following.
228  */
229 typedef enum RF_RowStatus_e {
230   rf_rs_optimal,
231   rf_rs_degraded,
232   rf_rs_reconstructing,
233   rf_rs_reconfigured
234 } RF_RowStatus_t;
235 
236 struct RF_CumulativeStats_s {
237   struct timeval start;     /* the time when the stats were last started*/
238   struct timeval stop;      /* the time when the stats were last stopped */
239   long sum_io_us;           /* sum of all user response times (us) */
240   long num_ios;             /* total number of I/Os serviced */
241   long num_sect_moved;      /* total number of sectors read or written */
242 };
243 
244 struct RF_ThroughputStats_s {
245   RF_DECLARE_MUTEX(mutex)/* a mutex used to lock the configuration stuff */
246   struct timeval start;  /* timer started when numOutstandingRequests moves from 0 to 1 */
247   struct timeval stop;   /* timer stopped when numOutstandingRequests moves from 1 to 0 */
248   RF_uint64 sum_io_us;   /* total time timer is enabled */
249   RF_uint64 num_ios;     /* total number of ios processed by RAIDframe */
250   long num_out_ios;      /* number of outstanding ios */
251 };
252 
253 #ifdef SIMULATE
254 typedef struct RF_PendingRecon_s RF_PendingRecon_t;
255 struct RF_PendingRecon_s {
256   RF_RowCol_t         row;
257   RF_RowCol_t         col;
258   RF_PendingRecon_t  *next;
259 };
260 #endif /* SIMULATE */
261 
262 struct RF_Raid_s {
263   /* This portion never changes, and can be accessed without locking */
264   /* an exception is Disks[][].status, which requires locking when it is changed */
265   u_int numRow;             /* number of rows of disks, typically == # of ranks */
266   u_int numCol;             /* number of columns of disks, typically == # of disks/rank */
267   u_int numSpare;           /* number of spare disks */
268   int   maxQueueDepth;      /* max disk queue depth */
269   RF_SectorCount_t  totalSectors;   /* total number of sectors in the array */
270   RF_SectorCount_t  sectorsPerDisk; /* number of sectors on each disk */
271   u_int logBytesPerSector;  /* base-2 log of the number of bytes in a sector */
272   u_int bytesPerSector;     /* bytes in a sector */
273   RF_int32  sectorMask;     /* mask of bytes-per-sector */
274 
275   RF_RaidLayout_t   Layout; /* all information related to layout */
276   RF_RaidDisk_t   **Disks;  /* all information related to physical disks */
277   RF_DiskQueue_t  **Queues; /* all information related to disk queues */
278      /* NOTE:  This is an anchor point via which the queues can be accessed,
279       * but the enqueue/dequeue routines in diskqueue.c use a local copy of
280       * this pointer for the actual accesses.
281       */
282   /* The remainder of the structure can change, and therefore requires locking on reads and updates */
283   RF_DECLARE_MUTEX(mutex)        /* mutex used to serialize access to the fields below */
284   RF_RowStatus_t  *status;       /* the status of each row in the array */
285   int              valid;        /* indicates successful configuration */
286   RF_LockTableEntry_t *lockTable;   /* stripe-lock table */
287   RF_LockTableEntry_t *quiesceLock; /* quiesnce table */
288   int                  numFailures; /* total number of failures in the array */
289 
290   /*
291    * Cleanup stuff
292    */
293   RF_ShutdownList_t  *shutdownList; /* shutdown activities */
294   RF_AllocListElem_t *cleanupList;  /* memory to be freed at shutdown time */
295 
296   /*
297    * Recon stuff
298    */
299   RF_HeadSepLimit_t headSepLimit;
300   int numFloatingReconBufs;
301   int reconInProgress;
302 #ifdef SIMULATE
303   RF_PendingRecon_t *pendingRecon;
304 #endif /* SIMULATE */
305   RF_DECLARE_COND(waitForReconCond)
306   RF_RaidReconDesc_t *reconDesc; /* reconstruction descriptor */
307   RF_ReconCtrl_t **reconControl; /* reconstruction control structure pointers for each row in the array */
308 
309 #if !defined(KERNEL) && !defined(SIMULATE)
310   /*
311    * Disk thread stuff
312    */
313   int diskthreads_created;
314   int diskthreads_running;
315   int diskthreads_shutdown;
316   RF_DECLARE_MUTEX(diskthread_count_mutex)
317   RF_DECLARE_COND(diskthread_count_cond)
318 #endif /* !KERNEL && !SIMULATE */
319 
320   /*
321    * Array-quiescence stuff
322    */
323   RF_DECLARE_MUTEX(access_suspend_mutex)
324   RF_DECLARE_COND(quiescent_cond)
325   RF_IoCount_t accesses_suspended;
326   RF_IoCount_t accs_in_flight;
327   int access_suspend_release;
328   int waiting_for_quiescence;
329   RF_CallbackDesc_t *quiesce_wait_list;
330 
331   /*
332    * Statistics
333    */
334 #if !defined(KERNEL) && !defined(SIMULATE)
335   RF_ThroughputStats_t throughputstats;
336 #endif /* !KERNEL && !SIMULATE */
337   RF_CumulativeStats_t userstats;
338 
339   /*
340    * Engine thread control
341    */
342   RF_DECLARE_MUTEX(node_queue_mutex)
343   RF_DECLARE_COND(node_queue_cond)
344   RF_DagNode_t *node_queue;
345 #ifndef SIMULATE
346   RF_Thread_t engine_thread;
347   RF_ThreadGroup_t engine_tg;
348 #endif /* !SIMULATE */
349   int shutdown_engine;
350   int dags_in_flight; /* debug */
351 
352   /*
353    * PSS (Parity Stripe Status) stuff
354    */
355   RF_FreeList_t *pss_freelist;
356   long pssTableSize;
357 
358   /*
359    * Reconstruction stuff
360    */
361   int procsInBufWait;
362   int numFullReconBuffers;
363   RF_AccTraceEntry_t *recon_tracerecs;
364   unsigned long accumXorTimeUs;
365   RF_ReconDoneProc_t *recon_done_procs;
366   RF_DECLARE_MUTEX(recon_done_proc_mutex)
367 
368 #if !defined(KERNEL) && !defined(SIMULATE)
369   RF_Thread_t **diskthreads, *sparediskthreads;  /* thread descriptors for disk threads in user-level version */
370 #endif /* !KERNEL && !SIMULATE */
371 
372   /*
373    * nAccOutstanding, waitShutdown protected by desc freelist lock
374    * (This may seem strange, since that's a central serialization point
375    * for a per-array piece of data, but otherwise, it'd be an extra
376    * per-array lock, and that'd only be less efficient...)
377    */
378   RF_DECLARE_COND(outstandingCond)
379   int waitShutdown;
380   int nAccOutstanding;
381 
382   RF_DiskId_t **diskids;
383   RF_DiskId_t  *sparediskids;
384 
385 #ifdef KERNEL
386 	int           raidid;
387 #endif /* KERNEL */
388 	RF_AccTotals_t  acc_totals;
389 	int           keep_acc_totals;
390 
391 #ifdef _KERNEL
392         struct raidcinfo **raid_cinfo; /* array of component info */
393         struct proc *proc; /* XXX shouldn't be needed here.. :-p */
394 #endif
395 
396   int terminate_disk_queues;
397 
398   /*
399    * XXX
400    *
401    * config-specific information should be moved
402    * somewhere else, or at least hung off this
403    * in some generic way
404    */
405 
406   /* used by rf_compute_workload_shift */
407   RF_RowCol_t hist_diskreq[RF_MAXROW][RF_MAXCOL];
408 
409   /* used by declustering */
410   int noRotate;
411 
412 #if RF_INCLUDE_PARITYLOGGING > 0
413   /* used by parity logging */
414   RF_SectorCount_t          regionLogCapacity;
415   RF_ParityLogQueue_t       parityLogPool;       /* pool of unused parity logs */
416   RF_RegionInfo_t          *regionInfo;          /* array of region state */
417   int                       numParityLogs;
418   int                       numSectorsPerLog;
419   int                       regionParityRange;
420   int                       logsInUse;           /* debugging */
421   RF_ParityLogDiskQueue_t   parityLogDiskQueue;  /* state of parity logging disk work */
422   RF_RegionBufferQueue_t    regionBufferPool;    /* buffers for holding region log */
423   RF_RegionBufferQueue_t    parityBufferPool;    /* buffers for holding parity */
424   caddr_t                   parityLogBufferHeap; /* pool of unused parity logs */
425 #ifndef SIMULATE
426   RF_Thread_t               pLogDiskThreadHandle;
427 #endif /* !SIMULATE */
428 
429 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
430 };
431 
432 #endif /* !_RF__RF_RAID_H_ */
433