xref: /netbsd-src/sys/dev/raidframe/rf_driver.c (revision d20841bb642898112fe68f0ad3f7b26dddf56f07)
1 /*	$NetBSD: rf_driver.c,v 1.84 2004/01/15 20:27:27 oster Exp $	*/
2 /*-
3  * Copyright (c) 1999 The NetBSD Foundation, Inc.
4  * All rights reserved.
5  *
6  * This code is derived from software contributed to The NetBSD Foundation
7  * by Greg Oster
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *        This product includes software developed by the NetBSD
20  *        Foundation, Inc. and its contributors.
21  * 4. Neither the name of The NetBSD Foundation nor the names of its
22  *    contributors may be used to endorse or promote products derived
23  *    from this software without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGE.
36  */
37 
38 /*
39  * Copyright (c) 1995 Carnegie-Mellon University.
40  * All rights reserved.
41  *
42  * Author: Mark Holland, Khalil Amiri, Claudson Bornstein, William V. Courtright II,
43  *         Robby Findler, Daniel Stodolsky, Rachad Youssef, Jim Zelenka
44  *
45  * Permission to use, copy, modify and distribute this software and
46  * its documentation is hereby granted, provided that both the copyright
47  * notice and this permission notice appear in all copies of the
48  * software, derivative works or modified versions, and any portions
49  * thereof, and that both notices appear in supporting documentation.
50  *
51  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
52  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
53  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
54  *
55  * Carnegie Mellon requests users of this software to return to
56  *
57  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
58  *  School of Computer Science
59  *  Carnegie Mellon University
60  *  Pittsburgh PA 15213-3890
61  *
62  * any improvements or extensions that they make and grant Carnegie the
63  * rights to redistribute these changes.
64  */
65 
66 /******************************************************************************
67  *
68  * rf_driver.c -- main setup, teardown, and access routines for the RAID driver
69  *
70  * all routines are prefixed with rf_ (raidframe), to avoid conficts.
71  *
72  ******************************************************************************/
73 
74 
75 #include <sys/cdefs.h>
76 __KERNEL_RCSID(0, "$NetBSD: rf_driver.c,v 1.84 2004/01/15 20:27:27 oster Exp $");
77 
78 #include "opt_raid_diagnostic.h"
79 
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/ioctl.h>
83 #include <sys/fcntl.h>
84 #include <sys/vnode.h>
85 
86 
87 #include "rf_archs.h"
88 #include "rf_threadstuff.h"
89 
90 #include <sys/errno.h>
91 
92 #include "rf_raid.h"
93 #include "rf_dag.h"
94 #include "rf_aselect.h"
95 #include "rf_diskqueue.h"
96 #include "rf_parityscan.h"
97 #include "rf_alloclist.h"
98 #include "rf_dagutils.h"
99 #include "rf_utils.h"
100 #include "rf_etimer.h"
101 #include "rf_acctrace.h"
102 #include "rf_general.h"
103 #include "rf_desc.h"
104 #include "rf_states.h"
105 #include "rf_decluster.h"
106 #include "rf_map.h"
107 #include "rf_revent.h"
108 #include "rf_callback.h"
109 #include "rf_engine.h"
110 #include "rf_mcpair.h"
111 #include "rf_nwayxor.h"
112 #include "rf_copyback.h"
113 #include "rf_driver.h"
114 #include "rf_options.h"
115 #include "rf_shutdown.h"
116 #include "rf_kintf.h"
117 
118 #include <sys/buf.h>
119 
120 #ifndef RF_ACCESS_DEBUG
121 #define RF_ACCESS_DEBUG 0
122 #endif
123 
124 /* rad == RF_RaidAccessDesc_t */
125 RF_DECLARE_MUTEX(rf_rad_pool_lock)
126 static struct pool rf_rad_pool;
127 #define RF_MAX_FREE_RAD 128
128 #define RF_RAD_INC       16
129 #define RF_RAD_INITIAL   32
130 
131 /* debug variables */
132 char    rf_panicbuf[2048];	/* a buffer to hold an error msg when we panic */
133 
134 /* main configuration routines */
135 static int raidframe_booted = 0;
136 
137 static void rf_ConfigureDebug(RF_Config_t * cfgPtr);
138 static void set_debug_option(char *name, long val);
139 static void rf_UnconfigureArray(void);
140 static void rf_ShutdownRDFreeList(void *);
141 static int rf_ConfigureRDFreeList(RF_ShutdownList_t **);
142 
143 RF_DECLARE_MUTEX(rf_printf_mutex)	/* debug only:  avoids interleaved
144 					 * printfs by different stripes */
145 
146 #define SIGNAL_QUIESCENT_COND(_raid_)  wakeup(&((_raid_)->accesses_suspended))
147 #define WAIT_FOR_QUIESCENCE(_raid_) \
148 	ltsleep(&((_raid_)->accesses_suspended), PRIBIO, \
149 		"raidframe quiesce", 0, &((_raid_)->access_suspend_mutex))
150 
151 static int configureCount = 0;	/* number of active configurations */
152 static int isconfigged = 0;	/* is basic raidframe (non per-array)
153 				 * stuff configged */
154 RF_DECLARE_LKMGR_STATIC_MUTEX(configureMutex)	/* used to lock the configuration
155 					 * stuff */
156 static RF_ShutdownList_t *globalShutdown;	/* non array-specific
157 						 * stuff */
158 
159 static int rf_ConfigureRDFreeList(RF_ShutdownList_t ** listp);
160 
161 /* called at system boot time */
162 int
163 rf_BootRaidframe()
164 {
165 
166 	if (raidframe_booted)
167 		return (EBUSY);
168 	raidframe_booted = 1;
169 	lockinit(&configureMutex, PRIBIO, "RAIDframe lock", 0, 0);
170  	configureCount = 0;
171 	isconfigged = 0;
172 	globalShutdown = NULL;
173 	return (0);
174 }
175 
176 /*
177  * Called whenever an array is shutdown
178  */
179 static void
180 rf_UnconfigureArray()
181 {
182 	int     rc;
183 
184 	RF_LOCK_LKMGR_MUTEX(configureMutex);
185 	if (--configureCount == 0) {	/* if no active configurations, shut
186 					 * everything down */
187 		isconfigged = 0;
188 
189 		rc = rf_ShutdownList(&globalShutdown);
190 		if (rc) {
191 			RF_ERRORMSG1("RAIDFRAME: unable to do global shutdown, rc=%d\n", rc);
192 		}
193 
194 		/*
195 	         * We must wait until now, because the AllocList module
196 	         * uses the DebugMem module.
197 	         */
198 #if RF_DEBUG_MEM
199 		if (rf_memDebug)
200 			rf_print_unfreed();
201 #endif
202 	}
203 	RF_UNLOCK_LKMGR_MUTEX(configureMutex);
204 }
205 
206 /*
207  * Called to shut down an array.
208  */
209 int
210 rf_Shutdown(RF_Raid_t *raidPtr)
211 {
212 
213 	if (!raidPtr->valid) {
214 		RF_ERRORMSG("Attempt to shut down unconfigured RAIDframe driver.  Aborting shutdown\n");
215 		return (EINVAL);
216 	}
217 	/*
218          * wait for outstanding IOs to land
219          * As described in rf_raid.h, we use the rad_freelist lock
220          * to protect the per-array info about outstanding descs
221          * since we need to do freelist locking anyway, and this
222          * cuts down on the amount of serialization we've got going
223          * on.
224          */
225 	RF_LOCK_MUTEX(rf_rad_pool_lock);
226 	if (raidPtr->waitShutdown) {
227 		RF_UNLOCK_MUTEX(rf_rad_pool_lock);
228 		return (EBUSY);
229 	}
230 	raidPtr->waitShutdown = 1;
231 	while (raidPtr->nAccOutstanding) {
232 		RF_WAIT_COND(raidPtr->outstandingCond, rf_rad_pool_lock);
233 	}
234 	RF_UNLOCK_MUTEX(rf_rad_pool_lock);
235 
236 	/* Wait for any parity re-writes to stop... */
237 	while (raidPtr->parity_rewrite_in_progress) {
238 		printf("Waiting for parity re-write to exit...\n");
239 		tsleep(&raidPtr->parity_rewrite_in_progress, PRIBIO,
240 		       "rfprwshutdown", 0);
241 	}
242 
243 	raidPtr->valid = 0;
244 
245 	rf_update_component_labels(raidPtr, RF_FINAL_COMPONENT_UPDATE);
246 
247 	rf_UnconfigureVnodes(raidPtr);
248 
249 	rf_ShutdownList(&raidPtr->shutdownList);
250 
251 	rf_UnconfigureArray();
252 
253 	return (0);
254 }
255 
256 
257 #define DO_INIT_CONFIGURE(f) { \
258 	rc = f (&globalShutdown); \
259 	if (rc) { \
260 		RF_ERRORMSG2("RAIDFRAME: failed %s with %d\n", RF_STRING(f), rc); \
261 		rf_ShutdownList(&globalShutdown); \
262 		configureCount--; \
263 		RF_UNLOCK_LKMGR_MUTEX(configureMutex); \
264 		return(rc); \
265 	} \
266 }
267 
268 #define DO_RAID_FAIL() { \
269 	rf_UnconfigureVnodes(raidPtr); \
270 	rf_ShutdownList(&raidPtr->shutdownList); \
271 	rf_UnconfigureArray(); \
272 }
273 
274 #define DO_RAID_INIT_CONFIGURE(f) { \
275 	rc = f (&raidPtr->shutdownList, raidPtr, cfgPtr); \
276 	if (rc) { \
277 		RF_ERRORMSG2("RAIDFRAME: failed %s with %d\n", RF_STRING(f), rc); \
278 		DO_RAID_FAIL(); \
279 		return(rc); \
280 	} \
281 }
282 
283 #define DO_RAID_MUTEX(_m_) { \
284 	rf_mutex_init((_m_)); \
285 }
286 
287 int
288 rf_Configure(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr, RF_AutoConfig_t *ac)
289 {
290 	RF_RowCol_t col;
291 	int     rc;
292 
293 	RF_LOCK_LKMGR_MUTEX(configureMutex);
294 	configureCount++;
295 	if (isconfigged == 0) {
296 		rf_mutex_init(&rf_printf_mutex);
297 
298 		/* initialize globals */
299 
300 		DO_INIT_CONFIGURE(rf_ConfigureAllocList);
301 
302 		/*
303 	         * Yes, this does make debugging general to the whole
304 	         * system instead of being array specific. Bummer, drag.
305 		 */
306 		rf_ConfigureDebug(cfgPtr);
307 		DO_INIT_CONFIGURE(rf_ConfigureDebugMem);
308 		DO_INIT_CONFIGURE(rf_ConfigureAccessTrace);
309 		DO_INIT_CONFIGURE(rf_ConfigureMapModule);
310 		DO_INIT_CONFIGURE(rf_ConfigureReconEvent);
311 		DO_INIT_CONFIGURE(rf_ConfigureCallback);
312 		DO_INIT_CONFIGURE(rf_ConfigureRDFreeList);
313 		DO_INIT_CONFIGURE(rf_ConfigureNWayXor);
314 		DO_INIT_CONFIGURE(rf_ConfigureStripeLockFreeList);
315 		DO_INIT_CONFIGURE(rf_ConfigureMCPair);
316 		DO_INIT_CONFIGURE(rf_ConfigureDAGs);
317 		DO_INIT_CONFIGURE(rf_ConfigureDAGFuncs);
318 		DO_INIT_CONFIGURE(rf_ConfigureReconstruction);
319 		DO_INIT_CONFIGURE(rf_ConfigureCopyback);
320 		DO_INIT_CONFIGURE(rf_ConfigureDiskQueueSystem);
321 		isconfigged = 1;
322 	}
323 	RF_UNLOCK_LKMGR_MUTEX(configureMutex);
324 
325 	DO_RAID_MUTEX(&raidPtr->mutex);
326 	/* set up the cleanup list.  Do this after ConfigureDebug so that
327 	 * value of memDebug will be set */
328 
329 	rf_MakeAllocList(raidPtr->cleanupList);
330 	if (raidPtr->cleanupList == NULL) {
331 		DO_RAID_FAIL();
332 		return (ENOMEM);
333 	}
334 	rc = rf_ShutdownCreate(&raidPtr->shutdownList,
335 	    (void (*) (void *)) rf_FreeAllocList,
336 	    raidPtr->cleanupList);
337 	if (rc) {
338 		rf_print_unable_to_add_shutdown(__FILE__, __LINE__, rc);
339 		DO_RAID_FAIL();
340 		return (rc);
341 	}
342 	raidPtr->numCol = cfgPtr->numCol;
343 	raidPtr->numSpare = cfgPtr->numSpare;
344 
345 	raidPtr->status = rf_rs_optimal;
346 	raidPtr->reconControl = NULL;
347 
348 	TAILQ_INIT(&(raidPtr->iodone));
349 	simple_lock_init(&(raidPtr->iodone_lock));
350 
351 	DO_RAID_INIT_CONFIGURE(rf_ConfigureEngine);
352 	DO_RAID_INIT_CONFIGURE(rf_ConfigureStripeLocks);
353 
354 	raidPtr->outstandingCond = 0;
355 
356 	raidPtr->nAccOutstanding = 0;
357 	raidPtr->waitShutdown = 0;
358 
359 	DO_RAID_MUTEX(&raidPtr->access_suspend_mutex);
360 
361 	raidPtr->waitForReconCond = 0;
362 
363 	if (ac!=NULL) {
364 		/* We have an AutoConfig structure..  Don't do the
365 		   normal disk configuration... call the auto config
366 		   stuff */
367 		rf_AutoConfigureDisks(raidPtr, cfgPtr, ac);
368 	} else {
369 		DO_RAID_INIT_CONFIGURE(rf_ConfigureDisks);
370 		DO_RAID_INIT_CONFIGURE(rf_ConfigureSpareDisks);
371 	}
372 	/* do this after ConfigureDisks & ConfigureSpareDisks to be sure dev
373 	 * no. is set */
374 	DO_RAID_INIT_CONFIGURE(rf_ConfigureDiskQueues);
375 
376 	DO_RAID_INIT_CONFIGURE(rf_ConfigureLayout);
377 
378 	DO_RAID_INIT_CONFIGURE(rf_ConfigurePSStatus);
379 
380 #if RF_INCLUDE_CHAINDECLUSTER > 0
381 	for (col = 0; col < raidPtr->numCol; col++) {
382 		/*
383 		 * XXX better distribution
384 		 */
385 		raidPtr->hist_diskreq[col] = 0;
386 	}
387 #endif
388 	raidPtr->numNewFailures = 0;
389 	raidPtr->copyback_in_progress = 0;
390 	raidPtr->parity_rewrite_in_progress = 0;
391 	raidPtr->adding_hot_spare = 0;
392 	raidPtr->recon_in_progress = 0;
393 	raidPtr->maxOutstanding = cfgPtr->maxOutstandingDiskReqs;
394 
395 	/* autoconfigure and root_partition will actually get filled in
396 	   after the config is done */
397 	raidPtr->autoconfigure = 0;
398 	raidPtr->root_partition = 0;
399 	raidPtr->last_unit = raidPtr->raidid;
400 	raidPtr->config_order = 0;
401 
402 	if (rf_keepAccTotals) {
403 		raidPtr->keep_acc_totals = 1;
404 	}
405 	rf_StartUserStats(raidPtr);
406 
407 	raidPtr->valid = 1;
408 
409 	printf("raid%d: %s\n", raidPtr->raidid,
410 	       raidPtr->Layout.map->configName);
411 	printf("raid%d: Components:", raidPtr->raidid);
412 
413 	for (col = 0; col < raidPtr->numCol; col++) {
414 		printf(" %s", raidPtr->Disks[col].devname);
415 		if (RF_DEAD_DISK(raidPtr->Disks[col].status)) {
416 			printf("[**FAILED**]");
417 		}
418 	}
419 	printf("\n");
420 	printf("raid%d: Total Sectors: %lu (%lu MB)\n",
421 	       raidPtr->raidid,
422 	       (unsigned long) raidPtr->totalSectors,
423 	       (unsigned long) (raidPtr->totalSectors / 1024 *
424 				(1 << raidPtr->logBytesPerSector) / 1024));
425 
426 	return (0);
427 }
428 
429 static void
430 rf_ShutdownRDFreeList(void *ignored)
431 {
432 	pool_destroy(&rf_rad_pool);
433 }
434 
435 static int
436 rf_ConfigureRDFreeList(RF_ShutdownList_t **listp)
437 {
438 	int     rc;
439 
440 	pool_init(&rf_rad_pool, sizeof(RF_RaidAccessDesc_t), 0, 0, 0,
441 		  "rf_rad_pl", NULL);
442 	pool_sethiwat(&rf_rad_pool, RF_MAX_FREE_RAD);
443 	pool_prime(&rf_rad_pool, RF_RAD_INITIAL);
444 	rc = rf_ShutdownCreate(listp, rf_ShutdownRDFreeList, NULL);
445 	if (rc) {
446 		rf_print_unable_to_add_shutdown(__FILE__, __LINE__, rc);
447 		rf_ShutdownRDFreeList(NULL);
448 		return (rc);
449 	}
450 	simple_lock_init(&rf_rad_pool_lock);
451 	return (0);
452 }
453 
454 RF_RaidAccessDesc_t *
455 rf_AllocRaidAccDesc(RF_Raid_t *raidPtr, RF_IoType_t type,
456 		    RF_RaidAddr_t raidAddress, RF_SectorCount_t numBlocks,
457 		    caddr_t bufPtr, void *bp, RF_RaidAccessFlags_t flags,
458 		    RF_AccessState_t *states)
459 {
460 	RF_RaidAccessDesc_t *desc;
461 
462 	desc = pool_get(&rf_rad_pool, PR_WAITOK);
463 	simple_lock_init(&desc->mutex);
464 
465 	RF_LOCK_MUTEX(rf_rad_pool_lock);
466 	if (raidPtr->waitShutdown) {
467 		/*
468 	         * Actually, we're shutting the array down. Free the desc
469 	         * and return NULL.
470 	         */
471 
472 		RF_UNLOCK_MUTEX(rf_rad_pool_lock);
473 		pool_put(&rf_rad_pool, desc);
474 		return (NULL);
475 	}
476 	raidPtr->nAccOutstanding++;
477 
478 	RF_UNLOCK_MUTEX(rf_rad_pool_lock);
479 
480 	desc->raidPtr = (void *) raidPtr;
481 	desc->type = type;
482 	desc->raidAddress = raidAddress;
483 	desc->numBlocks = numBlocks;
484 	desc->bufPtr = bufPtr;
485 	desc->bp = bp;
486 	desc->paramDAG = NULL;
487 	desc->paramASM = NULL;
488 	desc->flags = flags;
489 	desc->states = states;
490 	desc->state = 0;
491 
492 	desc->status = 0;
493 	memset((char *) &desc->tracerec, 0, sizeof(RF_AccTraceEntry_t));
494 	desc->callbackFunc = NULL;
495 	desc->callbackArg = NULL;
496 	desc->next = NULL;
497 	desc->cleanupList = NULL;
498 	rf_MakeAllocList(desc->cleanupList);
499 	return (desc);
500 }
501 
502 void
503 rf_FreeRaidAccDesc(RF_RaidAccessDesc_t *desc)
504 {
505 	RF_Raid_t *raidPtr = desc->raidPtr;
506 
507 	RF_ASSERT(desc);
508 
509 	rf_FreeAllocList(desc->cleanupList);
510 	pool_put(&rf_rad_pool, desc);
511 	RF_LOCK_MUTEX(rf_rad_pool_lock);
512 	raidPtr->nAccOutstanding--;
513 	if (raidPtr->waitShutdown) {
514 		RF_SIGNAL_COND(raidPtr->outstandingCond);
515 	}
516 	RF_UNLOCK_MUTEX(rf_rad_pool_lock);
517 }
518 /*********************************************************************
519  * Main routine for performing an access.
520  * Accesses are retried until a DAG can not be selected.  This occurs
521  * when either the DAG library is incomplete or there are too many
522  * failures in a parity group.
523  *
524  * type should be read or write async_flag should be RF_TRUE or
525  * RF_FALSE bp_in is a buf pointer.  void * to facilitate ignoring it
526  * outside the kernel
527  ********************************************************************/
528 int
529 rf_DoAccess(RF_Raid_t * raidPtr, RF_IoType_t type, int async_flag,
530 	    RF_RaidAddr_t raidAddress, RF_SectorCount_t numBlocks,
531 	    caddr_t bufPtr, void *bp_in, RF_RaidAccessFlags_t flags)
532 {
533 	RF_RaidAccessDesc_t *desc;
534 	caddr_t lbufPtr = bufPtr;
535 	struct buf *bp = (struct buf *) bp_in;
536 
537 	raidAddress += rf_raidSectorOffset;
538 
539 #if RF_ACCESS_DEBUG
540 	if (rf_accessDebug) {
541 
542 		printf("logBytes is: %d %d %d\n", raidPtr->raidid,
543 		    raidPtr->logBytesPerSector,
544 		    (int) rf_RaidAddressToByte(raidPtr, numBlocks));
545 		printf("raid%d: %s raidAddr %d (stripeid %d-%d) numBlocks %d (%d bytes) buf 0x%lx\n", raidPtr->raidid,
546 		    (type == RF_IO_TYPE_READ) ? "READ" : "WRITE", (int) raidAddress,
547 		    (int) rf_RaidAddressToStripeID(&raidPtr->Layout, raidAddress),
548 		    (int) rf_RaidAddressToStripeID(&raidPtr->Layout, raidAddress + numBlocks - 1),
549 		    (int) numBlocks,
550 		    (int) rf_RaidAddressToByte(raidPtr, numBlocks),
551 		    (long) bufPtr);
552 	}
553 #endif
554 	if (raidAddress + numBlocks > raidPtr->totalSectors) {
555 
556 		printf("DoAccess: raid addr %lu too large to access %lu sectors.  Max legal addr is %lu\n",
557 		    (u_long) raidAddress, (u_long) numBlocks, (u_long) raidPtr->totalSectors);
558 
559 
560 		bp->b_flags |= B_ERROR;
561 		bp->b_resid = bp->b_bcount;
562 		bp->b_error = ENOSPC;
563 		biodone(bp);
564 		return (ENOSPC);
565 	}
566 	desc = rf_AllocRaidAccDesc(raidPtr, type, raidAddress,
567 	    numBlocks, lbufPtr, bp, flags, raidPtr->Layout.map->states);
568 
569 	if (desc == NULL) {
570 		return (ENOMEM);
571 	}
572 	RF_ETIMER_START(desc->tracerec.tot_timer);
573 
574 	desc->async_flag = async_flag;
575 
576 	rf_ContinueRaidAccess(desc);
577 
578 	return (0);
579 }
580 #if 0
581 /* force the array into reconfigured mode without doing reconstruction */
582 int
583 rf_SetReconfiguredMode(RF_Raid_t *raidPtr, int col)
584 {
585 	if (!(raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
586 		printf("Can't set reconfigured mode in dedicated-spare array\n");
587 		RF_PANIC();
588 	}
589 	RF_LOCK_MUTEX(raidPtr->mutex);
590 	raidPtr->numFailures++;
591 	raidPtr->Disks[col].status = rf_ds_dist_spared;
592 	raidPtr->status = rf_rs_reconfigured;
593 	rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE);
594 	/* install spare table only if declustering + distributed sparing
595 	 * architecture. */
596 	if (raidPtr->Layout.map->flags & RF_BD_DECLUSTERED)
597 		rf_InstallSpareTable(raidPtr, col);
598 	RF_UNLOCK_MUTEX(raidPtr->mutex);
599 	return (0);
600 }
601 #endif
602 
603 int
604 rf_FailDisk(RF_Raid_t *raidPtr, int fcol, int initRecon)
605 {
606 	RF_LOCK_MUTEX(raidPtr->mutex);
607 	if (raidPtr->Disks[fcol].status != rf_ds_failed) {
608 		/* must be failing something that is valid, or else it's
609 		   already marked as failed (in which case we don't
610 		   want to mark it failed again!) */
611 		raidPtr->numFailures++;
612 		raidPtr->Disks[fcol].status = rf_ds_failed;
613 		raidPtr->status = rf_rs_degraded;
614 	}
615 	RF_UNLOCK_MUTEX(raidPtr->mutex);
616 
617 	rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE);
618 
619 	/* Close the component, so that it's not "locked" if someone
620 	   else want's to use it! */
621 
622 	rf_close_component(raidPtr, raidPtr->raid_cinfo[fcol].ci_vp,
623 			   raidPtr->Disks[fcol].auto_configured);
624 
625 	RF_LOCK_MUTEX(raidPtr->mutex);
626 	raidPtr->raid_cinfo[fcol].ci_vp = NULL;
627 
628 	/* Need to mark the component as not being auto_configured
629 	   (in case it was previously). */
630 
631 	raidPtr->Disks[fcol].auto_configured = 0;
632 	RF_UNLOCK_MUTEX(raidPtr->mutex);
633 
634 	if (initRecon)
635 		rf_ReconstructFailedDisk(raidPtr, fcol);
636 	return (0);
637 }
638 /* releases a thread that is waiting for the array to become quiesced.
639  * access_suspend_mutex should be locked upon calling this
640  */
641 void
642 rf_SignalQuiescenceLock(RF_Raid_t *raidPtr)
643 {
644 #if RF_DEBUG_QUIESCE
645 	if (rf_quiesceDebug) {
646 		printf("raid%d: Signalling quiescence lock\n",
647 		       raidPtr->raidid);
648 	}
649 #endif
650 	raidPtr->access_suspend_release = 1;
651 
652 	if (raidPtr->waiting_for_quiescence) {
653 		SIGNAL_QUIESCENT_COND(raidPtr);
654 	}
655 }
656 /* suspends all new requests to the array.  No effect on accesses that are in flight.  */
657 int
658 rf_SuspendNewRequestsAndWait(RF_Raid_t *raidPtr)
659 {
660 #if RF_DEBUG_QUIESCE
661 	if (rf_quiesceDebug)
662 		printf("raid%d: Suspending new reqs\n", raidPtr->raidid);
663 #endif
664 	RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
665 	raidPtr->accesses_suspended++;
666 	raidPtr->waiting_for_quiescence = (raidPtr->accs_in_flight == 0) ? 0 : 1;
667 
668 	if (raidPtr->waiting_for_quiescence) {
669 		raidPtr->access_suspend_release = 0;
670 		while (!raidPtr->access_suspend_release) {
671 			printf("raid%d: Suspending: Waiting for Quiescence\n",
672 			       raidPtr->raidid);
673 			WAIT_FOR_QUIESCENCE(raidPtr);
674 			raidPtr->waiting_for_quiescence = 0;
675 		}
676 	}
677 	printf("raid%d: Quiescence reached..\n", raidPtr->raidid);
678 
679 	RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
680 	return (raidPtr->waiting_for_quiescence);
681 }
682 /* wake up everyone waiting for quiescence to be released */
683 void
684 rf_ResumeNewRequests(RF_Raid_t *raidPtr)
685 {
686 	RF_CallbackDesc_t *t, *cb;
687 
688 #if RF_DEBUG_QUIESCE
689 	if (rf_quiesceDebug)
690 		printf("Resuming new reqs\n");
691 #endif
692 
693 	RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
694 	raidPtr->accesses_suspended--;
695 	if (raidPtr->accesses_suspended == 0)
696 		cb = raidPtr->quiesce_wait_list;
697 	else
698 		cb = NULL;
699 	raidPtr->quiesce_wait_list = NULL;
700 	RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
701 
702 	while (cb) {
703 		t = cb;
704 		cb = cb->next;
705 		(t->callbackFunc) (t->callbackArg);
706 		rf_FreeCallbackDesc(t);
707 	}
708 }
709 /*****************************************************************************************
710  *
711  * debug routines
712  *
713  ****************************************************************************************/
714 
715 static void
716 set_debug_option(char *name, long val)
717 {
718 	RF_DebugName_t *p;
719 
720 	for (p = rf_debugNames; p->name; p++) {
721 		if (!strcmp(p->name, name)) {
722 			*(p->ptr) = val;
723 			printf("[Set debug variable %s to %ld]\n", name, val);
724 			return;
725 		}
726 	}
727 	RF_ERRORMSG1("Unknown debug string \"%s\"\n", name);
728 }
729 
730 
731 /* would like to use sscanf here, but apparently not available in kernel */
732 /*ARGSUSED*/
733 static void
734 rf_ConfigureDebug(RF_Config_t *cfgPtr)
735 {
736 	char   *val_p, *name_p, *white_p;
737 	long    val;
738 	int     i;
739 
740 	rf_ResetDebugOptions();
741 	for (i = 0; cfgPtr->debugVars[i][0] && i < RF_MAXDBGV; i++) {
742 		name_p = rf_find_non_white(&cfgPtr->debugVars[i][0]);
743 		white_p = rf_find_white(name_p);	/* skip to start of 2nd
744 							 * word */
745 		val_p = rf_find_non_white(white_p);
746 		if (*val_p == '0' && *(val_p + 1) == 'x')
747 			val = rf_htoi(val_p + 2);
748 		else
749 			val = rf_atoi(val_p);
750 		*white_p = '\0';
751 		set_debug_option(name_p, val);
752 	}
753 }
754 /* performance monitoring stuff */
755 
756 #define TIMEVAL_TO_US(t) (((long) t.tv_sec) * 1000000L + (long) t.tv_usec)
757 
758 #if !defined(_KERNEL) && !defined(SIMULATE)
759 
760 /*
761  * Throughput stats currently only used in user-level RAIDframe
762  */
763 
764 static int
765 rf_InitThroughputStats(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
766 		       RF_Config_t *cfgPtr)
767 {
768 	int     rc;
769 
770 	/* these used by user-level raidframe only */
771 	rf_mutex_init(&raidPtr->throughputstats.mutex);
772 	raidPtr->throughputstats.sum_io_us = 0;
773 	raidPtr->throughputstats.num_ios = 0;
774 	raidPtr->throughputstats.num_out_ios = 0;
775 	return (0);
776 }
777 
778 void
779 rf_StartThroughputStats(RF_Raid_t *raidPtr)
780 {
781 	RF_LOCK_MUTEX(raidPtr->throughputstats.mutex);
782 	raidPtr->throughputstats.num_ios++;
783 	raidPtr->throughputstats.num_out_ios++;
784 	if (raidPtr->throughputstats.num_out_ios == 1)
785 		RF_GETTIME(raidPtr->throughputstats.start);
786 	RF_UNLOCK_MUTEX(raidPtr->throughputstats.mutex);
787 }
788 
789 static void
790 rf_StopThroughputStats(RF_Raid_t *raidPtr)
791 {
792 	struct timeval diff;
793 
794 	RF_LOCK_MUTEX(raidPtr->throughputstats.mutex);
795 	raidPtr->throughputstats.num_out_ios--;
796 	if (raidPtr->throughputstats.num_out_ios == 0) {
797 		RF_GETTIME(raidPtr->throughputstats.stop);
798 		RF_TIMEVAL_DIFF(&raidPtr->throughputstats.start, &raidPtr->throughputstats.stop, &diff);
799 		raidPtr->throughputstats.sum_io_us += TIMEVAL_TO_US(diff);
800 	}
801 	RF_UNLOCK_MUTEX(raidPtr->throughputstats.mutex);
802 }
803 
804 static void
805 rf_PrintThroughputStats(RF_Raid_t *raidPtr)
806 {
807 	RF_ASSERT(raidPtr->throughputstats.num_out_ios == 0);
808 	if (raidPtr->throughputstats.sum_io_us != 0) {
809 		printf("[Througphut: %8.2f IOs/second]\n", raidPtr->throughputstats.num_ios
810 		    / (raidPtr->throughputstats.sum_io_us / 1000000.0));
811 	}
812 }
813 #endif				/* !KERNEL && !SIMULATE */
814 
815 void
816 rf_StartUserStats(RF_Raid_t *raidPtr)
817 {
818 	RF_GETTIME(raidPtr->userstats.start);
819 	raidPtr->userstats.sum_io_us = 0;
820 	raidPtr->userstats.num_ios = 0;
821 	raidPtr->userstats.num_sect_moved = 0;
822 }
823 
824 void
825 rf_StopUserStats(RF_Raid_t *raidPtr)
826 {
827 	RF_GETTIME(raidPtr->userstats.stop);
828 }
829 
830 /* rt: resp time in us
831    numsect: number of sectors for this access */
832 void
833 rf_UpdateUserStats(RF_Raid_t *raidPtr, int rt, int numsect)
834 {
835 	raidPtr->userstats.sum_io_us += rt;
836 	raidPtr->userstats.num_ios++;
837 	raidPtr->userstats.num_sect_moved += numsect;
838 }
839 
840 void
841 rf_PrintUserStats(RF_Raid_t *raidPtr)
842 {
843 	long    elapsed_us, mbs, mbs_frac;
844 	struct timeval diff;
845 
846 	RF_TIMEVAL_DIFF(&raidPtr->userstats.start,
847 			&raidPtr->userstats.stop, &diff);
848 	elapsed_us = TIMEVAL_TO_US(diff);
849 
850 	/* 2000 sectors per megabyte, 10000000 microseconds per second */
851 	if (elapsed_us)
852 		mbs = (raidPtr->userstats.num_sect_moved / 2000) /
853 			(elapsed_us / 1000000);
854 	else
855 		mbs = 0;
856 
857 	/* this computes only the first digit of the fractional mb/s moved */
858 	if (elapsed_us) {
859 		mbs_frac = ((raidPtr->userstats.num_sect_moved / 200) /
860 			    (elapsed_us / 1000000)) - (mbs * 10);
861 	} else {
862 		mbs_frac = 0;
863 	}
864 
865 	printf("raid%d: Number of I/Os:             %ld\n",
866 	       raidPtr->raidid, raidPtr->userstats.num_ios);
867 	printf("raid%d: Elapsed time (us):          %ld\n",
868 	       raidPtr->raidid, elapsed_us);
869 	printf("raid%d: User I/Os per second:       %ld\n",
870 	       raidPtr->raidid, RF_DB0_CHECK(raidPtr->userstats.num_ios,
871 					     (elapsed_us / 1000000)));
872 	printf("raid%d: Average user response time: %ld us\n",
873 	       raidPtr->raidid, RF_DB0_CHECK(raidPtr->userstats.sum_io_us,
874 					     raidPtr->userstats.num_ios));
875 	printf("raid%d: Total sectors moved:        %ld\n",
876 	       raidPtr->raidid, raidPtr->userstats.num_sect_moved);
877 	printf("raid%d: Average access size (sect): %ld\n",
878 	       raidPtr->raidid, RF_DB0_CHECK(raidPtr->userstats.num_sect_moved,
879 					     raidPtr->userstats.num_ios));
880 	printf("raid%d: Achieved data rate:         %ld.%ld MB/sec\n",
881 	       raidPtr->raidid, mbs, mbs_frac);
882 }
883 
884 
885 void
886 rf_print_panic_message(int line, char *file)
887 {
888 	sprintf(rf_panicbuf,"raidframe error at line %d file %s",
889 		line, file);
890 }
891 
892 #ifdef RAID_DIAGNOSTIC
893 void
894 rf_print_assert_panic_message(int line,	char *file, char *condition)
895 {
896 	sprintf(rf_panicbuf,
897 		"raidframe error at line %d file %s (failed asserting %s)\n",
898 		line, file, condition);
899 }
900 #endif
901 
902 void
903 rf_print_unable_to_init_mutex(char *file, int line, int rc)
904 {
905 	RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n",
906 		     file, line, rc);
907 }
908 
909 void
910 rf_print_unable_to_add_shutdown(char *file, int line, int rc)
911 {
912 	RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n",
913 		     file, line, rc);
914 }
915