xref: /netbsd-src/sys/dev/raidframe/rf_driver.c (revision 8b0f9554ff8762542c4defc4f70e1eb76fb508fa)
1 /*	$NetBSD: rf_driver.c,v 1.115 2007/12/05 08:39:46 ad Exp $	*/
2 /*-
3  * Copyright (c) 1999 The NetBSD Foundation, Inc.
4  * All rights reserved.
5  *
6  * This code is derived from software contributed to The NetBSD Foundation
7  * by Greg Oster
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *        This product includes software developed by the NetBSD
20  *        Foundation, Inc. and its contributors.
21  * 4. Neither the name of The NetBSD Foundation nor the names of its
22  *    contributors may be used to endorse or promote products derived
23  *    from this software without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGE.
36  */
37 
38 /*
39  * Copyright (c) 1995 Carnegie-Mellon University.
40  * All rights reserved.
41  *
42  * Author: Mark Holland, Khalil Amiri, Claudson Bornstein, William V. Courtright II,
43  *         Robby Findler, Daniel Stodolsky, Rachad Youssef, Jim Zelenka
44  *
45  * Permission to use, copy, modify and distribute this software and
46  * its documentation is hereby granted, provided that both the copyright
47  * notice and this permission notice appear in all copies of the
48  * software, derivative works or modified versions, and any portions
49  * thereof, and that both notices appear in supporting documentation.
50  *
51  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
52  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
53  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
54  *
55  * Carnegie Mellon requests users of this software to return to
56  *
57  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
58  *  School of Computer Science
59  *  Carnegie Mellon University
60  *  Pittsburgh PA 15213-3890
61  *
62  * any improvements or extensions that they make and grant Carnegie the
63  * rights to redistribute these changes.
64  */
65 
66 /******************************************************************************
67  *
68  * rf_driver.c -- main setup, teardown, and access routines for the RAID driver
69  *
70  * all routines are prefixed with rf_ (raidframe), to avoid conficts.
71  *
72  ******************************************************************************/
73 
74 
75 #include <sys/cdefs.h>
76 __KERNEL_RCSID(0, "$NetBSD: rf_driver.c,v 1.115 2007/12/05 08:39:46 ad Exp $");
77 
78 #include "opt_raid_diagnostic.h"
79 
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/ioctl.h>
83 #include <sys/fcntl.h>
84 #include <sys/vnode.h>
85 
86 
87 #include "rf_archs.h"
88 #include "rf_threadstuff.h"
89 
90 #include <sys/errno.h>
91 
92 #include "rf_raid.h"
93 #include "rf_dag.h"
94 #include "rf_aselect.h"
95 #include "rf_diskqueue.h"
96 #include "rf_parityscan.h"
97 #include "rf_alloclist.h"
98 #include "rf_dagutils.h"
99 #include "rf_utils.h"
100 #include "rf_etimer.h"
101 #include "rf_acctrace.h"
102 #include "rf_general.h"
103 #include "rf_desc.h"
104 #include "rf_states.h"
105 #include "rf_decluster.h"
106 #include "rf_map.h"
107 #include "rf_revent.h"
108 #include "rf_callback.h"
109 #include "rf_engine.h"
110 #include "rf_mcpair.h"
111 #include "rf_nwayxor.h"
112 #include "rf_copyback.h"
113 #include "rf_driver.h"
114 #include "rf_options.h"
115 #include "rf_shutdown.h"
116 #include "rf_kintf.h"
117 
118 #include <sys/buf.h>
119 
120 #ifndef RF_ACCESS_DEBUG
121 #define RF_ACCESS_DEBUG 0
122 #endif
123 
124 /* rad == RF_RaidAccessDesc_t */
125 RF_DECLARE_MUTEX(rf_rad_lock)
126 #define RF_MAX_FREE_RAD 128
127 #define RF_MIN_FREE_RAD  32
128 
129 /* debug variables */
130 char    rf_panicbuf[2048];	/* a buffer to hold an error msg when we panic */
131 
132 /* main configuration routines */
133 static int raidframe_booted = 0;
134 
135 static void rf_ConfigureDebug(RF_Config_t * cfgPtr);
136 static void set_debug_option(char *name, long val);
137 static void rf_UnconfigureArray(void);
138 static void rf_ShutdownRDFreeList(void *);
139 static int rf_ConfigureRDFreeList(RF_ShutdownList_t **);
140 
141 RF_DECLARE_MUTEX(rf_printf_mutex)	/* debug only:  avoids interleaved
142 					 * printfs by different stripes */
143 
144 #define SIGNAL_QUIESCENT_COND(_raid_)  wakeup(&((_raid_)->accesses_suspended))
145 #define WAIT_FOR_QUIESCENCE(_raid_) \
146 	ltsleep(&((_raid_)->accesses_suspended), PRIBIO, \
147 		"raidframe quiesce", 0, &((_raid_)->access_suspend_mutex))
148 
149 static int configureCount = 0;	/* number of active configurations */
150 static int isconfigged = 0;	/* is basic raidframe (non per-array)
151 				 * stuff configged */
152 RF_DECLARE_LKMGR_STATIC_MUTEX(configureMutex)	/* used to lock the configuration
153 					 * stuff */
154 static RF_ShutdownList_t *globalShutdown;	/* non array-specific
155 						 * stuff */
156 
157 static int rf_ConfigureRDFreeList(RF_ShutdownList_t ** listp);
158 static int rf_AllocEmergBuffers(RF_Raid_t *);
159 static void rf_FreeEmergBuffers(RF_Raid_t *);
160 
161 /* called at system boot time */
162 int
163 rf_BootRaidframe()
164 {
165 
166 	if (raidframe_booted)
167 		return (EBUSY);
168 	raidframe_booted = 1;
169 	mutex_init(&configureMutex, MUTEX_DEFAULT, IPL_NONE);
170  	configureCount = 0;
171 	isconfigged = 0;
172 	globalShutdown = NULL;
173 	return (0);
174 }
175 
176 /*
177  * Called whenever an array is shutdown
178  */
179 static void
180 rf_UnconfigureArray()
181 {
182 
183 	RF_LOCK_LKMGR_MUTEX(configureMutex);
184 	if (--configureCount == 0) {	/* if no active configurations, shut
185 					 * everything down */
186 		isconfigged = 0;
187 		rf_ShutdownList(&globalShutdown);
188 
189 		/*
190 	         * We must wait until now, because the AllocList module
191 	         * uses the DebugMem module.
192 	         */
193 #if RF_DEBUG_MEM
194 		if (rf_memDebug)
195 			rf_print_unfreed();
196 #endif
197 	}
198 	RF_UNLOCK_LKMGR_MUTEX(configureMutex);
199 }
200 
201 /*
202  * Called to shut down an array.
203  */
204 int
205 rf_Shutdown(RF_Raid_t *raidPtr)
206 {
207 
208 	if (!raidPtr->valid) {
209 		RF_ERRORMSG("Attempt to shut down unconfigured RAIDframe driver.  Aborting shutdown\n");
210 		return (EINVAL);
211 	}
212 	/*
213          * wait for outstanding IOs to land
214          * As described in rf_raid.h, we use the rad_freelist lock
215          * to protect the per-array info about outstanding descs
216          * since we need to do freelist locking anyway, and this
217          * cuts down on the amount of serialization we've got going
218          * on.
219          */
220 	RF_LOCK_MUTEX(rf_rad_lock);
221 	if (raidPtr->waitShutdown) {
222 		RF_UNLOCK_MUTEX(rf_rad_lock);
223 		return (EBUSY);
224 	}
225 	raidPtr->waitShutdown = 1;
226 	while (raidPtr->nAccOutstanding) {
227 		RF_WAIT_COND(raidPtr->outstandingCond, rf_rad_lock);
228 	}
229 	RF_UNLOCK_MUTEX(rf_rad_lock);
230 
231 	/* Wait for any parity re-writes to stop... */
232 	while (raidPtr->parity_rewrite_in_progress) {
233 		printf("Waiting for parity re-write to exit...\n");
234 		tsleep(&raidPtr->parity_rewrite_in_progress, PRIBIO,
235 		       "rfprwshutdown", 0);
236 	}
237 
238 	raidPtr->valid = 0;
239 
240 	rf_update_component_labels(raidPtr, RF_FINAL_COMPONENT_UPDATE);
241 
242 	rf_UnconfigureVnodes(raidPtr);
243 
244 	rf_FreeEmergBuffers(raidPtr);
245 
246 	rf_ShutdownList(&raidPtr->shutdownList);
247 
248 	rf_UnconfigureArray();
249 
250 	return (0);
251 }
252 
253 
254 #define DO_INIT_CONFIGURE(f) { \
255 	rc = f (&globalShutdown); \
256 	if (rc) { \
257 		RF_ERRORMSG2("RAIDFRAME: failed %s with %d\n", RF_STRING(f), rc); \
258 		rf_ShutdownList(&globalShutdown); \
259 		configureCount--; \
260 		RF_UNLOCK_LKMGR_MUTEX(configureMutex); \
261 		return(rc); \
262 	} \
263 }
264 
265 #define DO_RAID_FAIL() { \
266 	rf_UnconfigureVnodes(raidPtr); \
267 	rf_FreeEmergBuffers(raidPtr); \
268 	rf_ShutdownList(&raidPtr->shutdownList); \
269 	rf_UnconfigureArray(); \
270 }
271 
272 #define DO_RAID_INIT_CONFIGURE(f) { \
273 	rc = f (&raidPtr->shutdownList, raidPtr, cfgPtr); \
274 	if (rc) { \
275 		RF_ERRORMSG2("RAIDFRAME: failed %s with %d\n", RF_STRING(f), rc); \
276 		DO_RAID_FAIL(); \
277 		return(rc); \
278 	} \
279 }
280 
281 #define DO_RAID_MUTEX(_m_) { \
282 	rf_mutex_init((_m_)); \
283 }
284 
285 int
286 rf_Configure(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr, RF_AutoConfig_t *ac)
287 {
288 	RF_RowCol_t col;
289 	int rc;
290 
291 	RF_LOCK_LKMGR_MUTEX(configureMutex);
292 	configureCount++;
293 	if (isconfigged == 0) {
294 		rf_mutex_init(&rf_printf_mutex);
295 
296 		/* initialize globals */
297 
298 		DO_INIT_CONFIGURE(rf_ConfigureAllocList);
299 
300 		/*
301 	         * Yes, this does make debugging general to the whole
302 	         * system instead of being array specific. Bummer, drag.
303 		 */
304 		rf_ConfigureDebug(cfgPtr);
305 		DO_INIT_CONFIGURE(rf_ConfigureDebugMem);
306 #if RF_ACC_TRACE > 0
307 		DO_INIT_CONFIGURE(rf_ConfigureAccessTrace);
308 #endif
309 		DO_INIT_CONFIGURE(rf_ConfigureMapModule);
310 		DO_INIT_CONFIGURE(rf_ConfigureReconEvent);
311 		DO_INIT_CONFIGURE(rf_ConfigureCallback);
312 		DO_INIT_CONFIGURE(rf_ConfigureRDFreeList);
313 		DO_INIT_CONFIGURE(rf_ConfigureNWayXor);
314 		DO_INIT_CONFIGURE(rf_ConfigureStripeLockFreeList);
315 		DO_INIT_CONFIGURE(rf_ConfigureMCPair);
316 		DO_INIT_CONFIGURE(rf_ConfigureDAGs);
317 		DO_INIT_CONFIGURE(rf_ConfigureDAGFuncs);
318 		DO_INIT_CONFIGURE(rf_ConfigureReconstruction);
319 		DO_INIT_CONFIGURE(rf_ConfigureCopyback);
320 		DO_INIT_CONFIGURE(rf_ConfigureDiskQueueSystem);
321 		DO_INIT_CONFIGURE(rf_ConfigurePSStatus);
322 		isconfigged = 1;
323 	}
324 	RF_UNLOCK_LKMGR_MUTEX(configureMutex);
325 
326 	DO_RAID_MUTEX(&raidPtr->mutex);
327 	/* set up the cleanup list.  Do this after ConfigureDebug so that
328 	 * value of memDebug will be set */
329 
330 	rf_MakeAllocList(raidPtr->cleanupList);
331 	if (raidPtr->cleanupList == NULL) {
332 		DO_RAID_FAIL();
333 		return (ENOMEM);
334 	}
335 	rf_ShutdownCreate(&raidPtr->shutdownList,
336 			  (void (*) (void *)) rf_FreeAllocList,
337 			  raidPtr->cleanupList);
338 
339 	raidPtr->numCol = cfgPtr->numCol;
340 	raidPtr->numSpare = cfgPtr->numSpare;
341 
342 	raidPtr->status = rf_rs_optimal;
343 	raidPtr->reconControl = NULL;
344 
345 	TAILQ_INIT(&(raidPtr->iodone));
346 	simple_lock_init(&(raidPtr->iodone_lock));
347 
348 	DO_RAID_INIT_CONFIGURE(rf_ConfigureEngine);
349 	DO_RAID_INIT_CONFIGURE(rf_ConfigureStripeLocks);
350 
351 	raidPtr->outstandingCond = 0;
352 
353 	raidPtr->nAccOutstanding = 0;
354 	raidPtr->waitShutdown = 0;
355 
356 	DO_RAID_MUTEX(&raidPtr->access_suspend_mutex);
357 
358 	raidPtr->waitForReconCond = 0;
359 
360 	if (ac!=NULL) {
361 		/* We have an AutoConfig structure..  Don't do the
362 		   normal disk configuration... call the auto config
363 		   stuff */
364 		rf_AutoConfigureDisks(raidPtr, cfgPtr, ac);
365 	} else {
366 		DO_RAID_INIT_CONFIGURE(rf_ConfigureDisks);
367 		DO_RAID_INIT_CONFIGURE(rf_ConfigureSpareDisks);
368 	}
369 	/* do this after ConfigureDisks & ConfigureSpareDisks to be sure dev
370 	 * no. is set */
371 	DO_RAID_INIT_CONFIGURE(rf_ConfigureDiskQueues);
372 
373 	DO_RAID_INIT_CONFIGURE(rf_ConfigureLayout);
374 
375 	/* Initialize per-RAID PSS bits */
376 	rf_InitPSStatus(raidPtr);
377 
378 #if RF_INCLUDE_CHAINDECLUSTER > 0
379 	for (col = 0; col < raidPtr->numCol; col++) {
380 		/*
381 		 * XXX better distribution
382 		 */
383 		raidPtr->hist_diskreq[col] = 0;
384 	}
385 #endif
386 	raidPtr->numNewFailures = 0;
387 	raidPtr->copyback_in_progress = 0;
388 	raidPtr->parity_rewrite_in_progress = 0;
389 	raidPtr->adding_hot_spare = 0;
390 	raidPtr->recon_in_progress = 0;
391 	raidPtr->maxOutstanding = cfgPtr->maxOutstandingDiskReqs;
392 
393 	/* autoconfigure and root_partition will actually get filled in
394 	   after the config is done */
395 	raidPtr->autoconfigure = 0;
396 	raidPtr->root_partition = 0;
397 	raidPtr->last_unit = raidPtr->raidid;
398 	raidPtr->config_order = 0;
399 
400 	if (rf_keepAccTotals) {
401 		raidPtr->keep_acc_totals = 1;
402 	}
403 
404 	/* Allocate a bunch of buffers to be used in low-memory conditions */
405 	raidPtr->iobuf = NULL;
406 
407 	rc = rf_AllocEmergBuffers(raidPtr);
408 	if (rc) {
409 		printf("raid%d: Unable to allocate emergency buffers.\n",
410 		       raidPtr->raidid);
411 		DO_RAID_FAIL();
412 		return(rc);
413 	}
414 
415 	raidPtr->valid = 1;
416 
417 	printf("raid%d: %s\n", raidPtr->raidid,
418 	       raidPtr->Layout.map->configName);
419 	printf("raid%d: Components:", raidPtr->raidid);
420 
421 	for (col = 0; col < raidPtr->numCol; col++) {
422 		printf(" %s", raidPtr->Disks[col].devname);
423 		if (RF_DEAD_DISK(raidPtr->Disks[col].status)) {
424 			printf("[**FAILED**]");
425 		}
426 	}
427 	printf("\n");
428 	printf("raid%d: Total Sectors: %lu (%lu MB)\n",
429 	       raidPtr->raidid,
430 	       (unsigned long) raidPtr->totalSectors,
431 	       (unsigned long) (raidPtr->totalSectors / 1024 *
432 				(1 << raidPtr->logBytesPerSector) / 1024));
433 
434 	return (0);
435 }
436 
437 
438 /*
439 
440   Routines to allocate and free the "emergency buffers" for a given
441   RAID set.  These emergency buffers will be used when the kernel runs
442   out of kernel memory.
443 
444  */
445 
446 static int
447 rf_AllocEmergBuffers(RF_Raid_t *raidPtr)
448 {
449 	void *tmpbuf;
450 	RF_VoidPointerListElem_t *vple;
451 	int i;
452 
453 	/* XXX next line needs tuning... */
454 	raidPtr->numEmergencyBuffers = 10 * raidPtr->numCol;
455 #if DEBUG
456 	printf("raid%d: allocating %d buffers of %d bytes.\n",
457 	       raidPtr->raidid,
458 	       raidPtr->numEmergencyBuffers,
459 	       (int)(raidPtr->Layout.sectorsPerStripeUnit <<
460 	       raidPtr->logBytesPerSector));
461 #endif
462 	for (i = 0; i < raidPtr->numEmergencyBuffers; i++) {
463 		tmpbuf = malloc( raidPtr->Layout.sectorsPerStripeUnit <<
464 				 raidPtr->logBytesPerSector,
465 				 M_RAIDFRAME, M_WAITOK);
466 		if (tmpbuf) {
467 			vple = rf_AllocVPListElem();
468 			vple->p= tmpbuf;
469 			vple->next = raidPtr->iobuf;
470 			raidPtr->iobuf = vple;
471 			raidPtr->iobuf_count++;
472 		} else {
473 			printf("raid%d: failed to allocate emergency buffer!\n",
474 			       raidPtr->raidid);
475 			return 1;
476 		}
477 	}
478 
479 	/* XXX next line needs tuning too... */
480 	raidPtr->numEmergencyStripeBuffers = 10;
481         for (i = 0; i < raidPtr->numEmergencyStripeBuffers; i++) {
482                 tmpbuf = malloc( raidPtr->numCol * (raidPtr->Layout.sectorsPerStripeUnit <<
483                                  raidPtr->logBytesPerSector),
484                                  M_RAIDFRAME, M_WAITOK);
485                 if (tmpbuf) {
486                         vple = rf_AllocVPListElem();
487                         vple->p= tmpbuf;
488                         vple->next = raidPtr->stripebuf;
489                         raidPtr->stripebuf = vple;
490                         raidPtr->stripebuf_count++;
491                 } else {
492                         printf("raid%d: failed to allocate emergency stripe buffer!\n",
493                                raidPtr->raidid);
494 			return 1;
495                 }
496         }
497 
498 	return (0);
499 }
500 
501 static void
502 rf_FreeEmergBuffers(RF_Raid_t *raidPtr)
503 {
504 	RF_VoidPointerListElem_t *tmp;
505 
506 	/* Free the emergency IO buffers */
507 	while (raidPtr->iobuf != NULL) {
508 		tmp = raidPtr->iobuf;
509 		raidPtr->iobuf = raidPtr->iobuf->next;
510 		free(tmp->p, M_RAIDFRAME);
511 		rf_FreeVPListElem(tmp);
512 	}
513 
514 	/* Free the emergency stripe buffers */
515 	while (raidPtr->stripebuf != NULL) {
516 		tmp = raidPtr->stripebuf;
517 		raidPtr->stripebuf = raidPtr->stripebuf->next;
518 		free(tmp->p, M_RAIDFRAME);
519 		rf_FreeVPListElem(tmp);
520 	}
521 }
522 
523 
524 static void
525 rf_ShutdownRDFreeList(void *ignored)
526 {
527 	pool_destroy(&rf_pools.rad);
528 }
529 
530 static int
531 rf_ConfigureRDFreeList(RF_ShutdownList_t **listp)
532 {
533 
534 	rf_pool_init(&rf_pools.rad, sizeof(RF_RaidAccessDesc_t),
535 		     "rf_rad_pl", RF_MIN_FREE_RAD, RF_MAX_FREE_RAD);
536 	rf_ShutdownCreate(listp, rf_ShutdownRDFreeList, NULL);
537 	simple_lock_init(&rf_rad_lock);
538 	return (0);
539 }
540 
541 RF_RaidAccessDesc_t *
542 rf_AllocRaidAccDesc(RF_Raid_t *raidPtr, RF_IoType_t type,
543 		    RF_RaidAddr_t raidAddress, RF_SectorCount_t numBlocks,
544 		    void *bufPtr, void *bp, RF_RaidAccessFlags_t flags,
545 		    const RF_AccessState_t *states)
546 {
547 	RF_RaidAccessDesc_t *desc;
548 
549 	desc = pool_get(&rf_pools.rad, PR_WAITOK);
550 
551 	RF_LOCK_MUTEX(rf_rad_lock);
552 	if (raidPtr->waitShutdown) {
553 		/*
554 	         * Actually, we're shutting the array down. Free the desc
555 	         * and return NULL.
556 	         */
557 
558 		RF_UNLOCK_MUTEX(rf_rad_lock);
559 		pool_put(&rf_pools.rad, desc);
560 		return (NULL);
561 	}
562 	raidPtr->nAccOutstanding++;
563 
564 	RF_UNLOCK_MUTEX(rf_rad_lock);
565 
566 	desc->raidPtr = (void *) raidPtr;
567 	desc->type = type;
568 	desc->raidAddress = raidAddress;
569 	desc->numBlocks = numBlocks;
570 	desc->bufPtr = bufPtr;
571 	desc->bp = bp;
572 	desc->flags = flags;
573 	desc->states = states;
574 	desc->state = 0;
575 	desc->dagList = NULL;
576 
577 	desc->status = 0;
578 	desc->numRetries = 0;
579 #if RF_ACC_TRACE > 0
580 	memset((char *) &desc->tracerec, 0, sizeof(RF_AccTraceEntry_t));
581 #endif
582 	desc->callbackFunc = NULL;
583 	desc->callbackArg = NULL;
584 	desc->next = NULL;
585 	desc->iobufs = NULL;
586 	desc->stripebufs = NULL;
587 
588 	return (desc);
589 }
590 
591 void
592 rf_FreeRaidAccDesc(RF_RaidAccessDesc_t *desc)
593 {
594 	RF_Raid_t *raidPtr = desc->raidPtr;
595 	RF_DagList_t *dagList, *temp;
596 	RF_VoidPointerListElem_t *tmp;
597 
598 	RF_ASSERT(desc);
599 
600 	/* Cleanup the dagList(s) */
601 	dagList = desc->dagList;
602 	while(dagList != NULL) {
603 		temp = dagList;
604 		dagList = dagList->next;
605 		rf_FreeDAGList(temp);
606 	}
607 
608 	while (desc->iobufs) {
609 		tmp = desc->iobufs;
610 		desc->iobufs = desc->iobufs->next;
611 		rf_FreeIOBuffer(raidPtr, tmp);
612 	}
613 
614 	while (desc->stripebufs) {
615 		tmp = desc->stripebufs;
616 		desc->stripebufs = desc->stripebufs->next;
617 		rf_FreeStripeBuffer(raidPtr, tmp);
618 	}
619 
620 	pool_put(&rf_pools.rad, desc);
621 	RF_LOCK_MUTEX(rf_rad_lock);
622 	raidPtr->nAccOutstanding--;
623 	if (raidPtr->waitShutdown) {
624 		RF_SIGNAL_COND(raidPtr->outstandingCond);
625 	}
626 	RF_UNLOCK_MUTEX(rf_rad_lock);
627 }
628 /*********************************************************************
629  * Main routine for performing an access.
630  * Accesses are retried until a DAG can not be selected.  This occurs
631  * when either the DAG library is incomplete or there are too many
632  * failures in a parity group.
633  *
634  * type should be read or write async_flag should be RF_TRUE or
635  * RF_FALSE bp_in is a buf pointer.  void *to facilitate ignoring it
636  * outside the kernel
637  ********************************************************************/
638 int
639 rf_DoAccess(RF_Raid_t * raidPtr, RF_IoType_t type, int async_flag,
640 	    RF_RaidAddr_t raidAddress, RF_SectorCount_t numBlocks,
641 	    void *bufPtr, struct buf *bp, RF_RaidAccessFlags_t flags)
642 {
643 	RF_RaidAccessDesc_t *desc;
644 	void *lbufPtr = bufPtr;
645 
646 	raidAddress += rf_raidSectorOffset;
647 
648 #if RF_ACCESS_DEBUG
649 	if (rf_accessDebug) {
650 
651 		printf("logBytes is: %d %d %d\n", raidPtr->raidid,
652 		    raidPtr->logBytesPerSector,
653 		    (int) rf_RaidAddressToByte(raidPtr, numBlocks));
654 		printf("raid%d: %s raidAddr %d (stripeid %d-%d) numBlocks %d (%d bytes) buf 0x%lx\n", raidPtr->raidid,
655 		    (type == RF_IO_TYPE_READ) ? "READ" : "WRITE", (int) raidAddress,
656 		    (int) rf_RaidAddressToStripeID(&raidPtr->Layout, raidAddress),
657 		    (int) rf_RaidAddressToStripeID(&raidPtr->Layout, raidAddress + numBlocks - 1),
658 		    (int) numBlocks,
659 		    (int) rf_RaidAddressToByte(raidPtr, numBlocks),
660 		    (long) bufPtr);
661 	}
662 #endif
663 
664 	desc = rf_AllocRaidAccDesc(raidPtr, type, raidAddress,
665 	    numBlocks, lbufPtr, bp, flags, raidPtr->Layout.map->states);
666 
667 	if (desc == NULL) {
668 		return (ENOMEM);
669 	}
670 #if RF_ACC_TRACE > 0
671 	RF_ETIMER_START(desc->tracerec.tot_timer);
672 #endif
673 	desc->async_flag = async_flag;
674 
675 	rf_ContinueRaidAccess(desc);
676 
677 	return (0);
678 }
679 #if 0
680 /* force the array into reconfigured mode without doing reconstruction */
681 int
682 rf_SetReconfiguredMode(RF_Raid_t *raidPtr, int col)
683 {
684 	if (!(raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
685 		printf("Can't set reconfigured mode in dedicated-spare array\n");
686 		RF_PANIC();
687 	}
688 	RF_LOCK_MUTEX(raidPtr->mutex);
689 	raidPtr->numFailures++;
690 	raidPtr->Disks[col].status = rf_ds_dist_spared;
691 	raidPtr->status = rf_rs_reconfigured;
692 	rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE);
693 	/* install spare table only if declustering + distributed sparing
694 	 * architecture. */
695 	if (raidPtr->Layout.map->flags & RF_BD_DECLUSTERED)
696 		rf_InstallSpareTable(raidPtr, col);
697 	RF_UNLOCK_MUTEX(raidPtr->mutex);
698 	return (0);
699 }
700 #endif
701 
702 int
703 rf_FailDisk(RF_Raid_t *raidPtr, int fcol, int initRecon)
704 {
705 
706 	/* need to suspend IO's here -- if there are DAGs in flight
707 	   and we pull the rug out from under ci_vp, Bad Things
708 	   can happen.  */
709 
710 	rf_SuspendNewRequestsAndWait(raidPtr);
711 
712 	RF_LOCK_MUTEX(raidPtr->mutex);
713 	if (raidPtr->Disks[fcol].status != rf_ds_failed) {
714 		/* must be failing something that is valid, or else it's
715 		   already marked as failed (in which case we don't
716 		   want to mark it failed again!) */
717 		raidPtr->numFailures++;
718 		raidPtr->Disks[fcol].status = rf_ds_failed;
719 		raidPtr->status = rf_rs_degraded;
720 	}
721 	RF_UNLOCK_MUTEX(raidPtr->mutex);
722 
723 	rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE);
724 
725 	/* Close the component, so that it's not "locked" if someone
726 	   else want's to use it! */
727 
728 	rf_close_component(raidPtr, raidPtr->raid_cinfo[fcol].ci_vp,
729 			   raidPtr->Disks[fcol].auto_configured);
730 
731 	RF_LOCK_MUTEX(raidPtr->mutex);
732 	raidPtr->raid_cinfo[fcol].ci_vp = NULL;
733 
734 	/* Need to mark the component as not being auto_configured
735 	   (in case it was previously). */
736 
737 	raidPtr->Disks[fcol].auto_configured = 0;
738 	RF_UNLOCK_MUTEX(raidPtr->mutex);
739 	/* now we can allow IO to continue -- we'll be suspending it
740 	   again in rf_ReconstructFailedDisk() if we have to.. */
741 
742 	rf_ResumeNewRequests(raidPtr);
743 
744 	if (initRecon)
745 		rf_ReconstructFailedDisk(raidPtr, fcol);
746 	return (0);
747 }
748 /* releases a thread that is waiting for the array to become quiesced.
749  * access_suspend_mutex should be locked upon calling this
750  */
751 void
752 rf_SignalQuiescenceLock(RF_Raid_t *raidPtr)
753 {
754 #if RF_DEBUG_QUIESCE
755 	if (rf_quiesceDebug) {
756 		printf("raid%d: Signalling quiescence lock\n",
757 		       raidPtr->raidid);
758 	}
759 #endif
760 	raidPtr->access_suspend_release = 1;
761 
762 	if (raidPtr->waiting_for_quiescence) {
763 		SIGNAL_QUIESCENT_COND(raidPtr);
764 	}
765 }
766 /* suspends all new requests to the array.  No effect on accesses that are in flight.  */
767 int
768 rf_SuspendNewRequestsAndWait(RF_Raid_t *raidPtr)
769 {
770 #if RF_DEBUG_QUIESCE
771 	if (rf_quiesceDebug)
772 		printf("raid%d: Suspending new reqs\n", raidPtr->raidid);
773 #endif
774 	RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
775 	raidPtr->accesses_suspended++;
776 	raidPtr->waiting_for_quiescence = (raidPtr->accs_in_flight == 0) ? 0 : 1;
777 
778 	if (raidPtr->waiting_for_quiescence) {
779 		raidPtr->access_suspend_release = 0;
780 		while (!raidPtr->access_suspend_release) {
781 #if RF_DEBUG_QUIESCE
782 			printf("raid%d: Suspending: Waiting for Quiescence\n",
783 			       raidPtr->raidid);
784 #endif
785 			WAIT_FOR_QUIESCENCE(raidPtr);
786 			raidPtr->waiting_for_quiescence = 0;
787 		}
788 	}
789 #if RF_DEBUG_QUIESCE
790 	printf("raid%d: Quiescence reached..\n", raidPtr->raidid);
791 #endif
792 
793 	RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
794 	return (raidPtr->waiting_for_quiescence);
795 }
796 /* wake up everyone waiting for quiescence to be released */
797 void
798 rf_ResumeNewRequests(RF_Raid_t *raidPtr)
799 {
800 	RF_CallbackDesc_t *t, *cb;
801 
802 #if RF_DEBUG_QUIESCE
803 	if (rf_quiesceDebug)
804 		printf("Resuming new reqs\n");
805 #endif
806 
807 	RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
808 	raidPtr->accesses_suspended--;
809 	if (raidPtr->accesses_suspended == 0)
810 		cb = raidPtr->quiesce_wait_list;
811 	else
812 		cb = NULL;
813 	raidPtr->quiesce_wait_list = NULL;
814 	RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
815 
816 	while (cb) {
817 		t = cb;
818 		cb = cb->next;
819 		(t->callbackFunc) (t->callbackArg);
820 		rf_FreeCallbackDesc(t);
821 	}
822 }
823 /*****************************************************************************************
824  *
825  * debug routines
826  *
827  ****************************************************************************************/
828 
829 static void
830 set_debug_option(char *name, long val)
831 {
832 	RF_DebugName_t *p;
833 
834 	for (p = rf_debugNames; p->name; p++) {
835 		if (!strcmp(p->name, name)) {
836 			*(p->ptr) = val;
837 			printf("[Set debug variable %s to %ld]\n", name, val);
838 			return;
839 		}
840 	}
841 	RF_ERRORMSG1("Unknown debug string \"%s\"\n", name);
842 }
843 
844 
845 /* would like to use sscanf here, but apparently not available in kernel */
846 /*ARGSUSED*/
847 static void
848 rf_ConfigureDebug(RF_Config_t *cfgPtr)
849 {
850 	char   *val_p, *name_p, *white_p;
851 	long    val;
852 	int     i;
853 
854 	rf_ResetDebugOptions();
855 	for (i = 0; cfgPtr->debugVars[i][0] && i < RF_MAXDBGV; i++) {
856 		name_p = rf_find_non_white(&cfgPtr->debugVars[i][0]);
857 		white_p = rf_find_white(name_p);	/* skip to start of 2nd
858 							 * word */
859 		val_p = rf_find_non_white(white_p);
860 		if (*val_p == '0' && *(val_p + 1) == 'x')
861 			val = rf_htoi(val_p + 2);
862 		else
863 			val = rf_atoi(val_p);
864 		*white_p = '\0';
865 		set_debug_option(name_p, val);
866 	}
867 }
868 
869 void
870 rf_print_panic_message(int line, const char *file)
871 {
872 	snprintf(rf_panicbuf, sizeof(rf_panicbuf),
873 	    "raidframe error at line %d file %s", line, file);
874 }
875 
876 #ifdef RAID_DIAGNOSTIC
877 void
878 rf_print_assert_panic_message(int line,	const char *file, const char *condition)
879 {
880 	snprintf(rf_panicbuf, sizeof(rf_panicbuf),
881 		"raidframe error at line %d file %s (failed asserting %s)\n",
882 		line, file, condition);
883 }
884 #endif
885 
886 void
887 rf_print_unable_to_init_mutex(const char *file, int line, int rc)
888 {
889 	RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n",
890 		     file, line, rc);
891 }
892 
893 void
894 rf_print_unable_to_add_shutdown(const char *file, int line, int rc)
895 {
896 	RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n",
897 		     file, line, rc);
898 }
899