xref: /netbsd-src/sys/dev/raidframe/rf_netbsdkintf.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /*	$NetBSD: rf_netbsdkintf.c,v 1.356 2018/01/23 22:42:29 pgoyette Exp $	*/
2 
3 /*-
4  * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Greg Oster; Jason R. Thorpe.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * Copyright (c) 1988 University of Utah.
34  * Copyright (c) 1990, 1993
35  *      The Regents of the University of California.  All rights reserved.
36  *
37  * This code is derived from software contributed to Berkeley by
38  * the Systems Programming Group of the University of Utah Computer
39  * Science Department.
40  *
41  * Redistribution and use in source and binary forms, with or without
42  * modification, are permitted provided that the following conditions
43  * are met:
44  * 1. Redistributions of source code must retain the above copyright
45  *    notice, this list of conditions and the following disclaimer.
46  * 2. Redistributions in binary form must reproduce the above copyright
47  *    notice, this list of conditions and the following disclaimer in the
48  *    documentation and/or other materials provided with the distribution.
49  * 3. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  * from: Utah $Hdr: cd.c 1.6 90/11/28$
66  *
67  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
68  */
69 
70 /*
71  * Copyright (c) 1995 Carnegie-Mellon University.
72  * All rights reserved.
73  *
74  * Authors: Mark Holland, Jim Zelenka
75  *
76  * Permission to use, copy, modify and distribute this software and
77  * its documentation is hereby granted, provided that both the copyright
78  * notice and this permission notice appear in all copies of the
79  * software, derivative works or modified versions, and any portions
80  * thereof, and that both notices appear in supporting documentation.
81  *
82  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85  *
86  * Carnegie Mellon requests users of this software to return to
87  *
88  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
89  *  School of Computer Science
90  *  Carnegie Mellon University
91  *  Pittsburgh PA 15213-3890
92  *
93  * any improvements or extensions that they make and grant Carnegie the
94  * rights to redistribute these changes.
95  */
96 
97 /***********************************************************
98  *
99  * rf_kintf.c -- the kernel interface routines for RAIDframe
100  *
101  ***********************************************************/
102 
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.356 2018/01/23 22:42:29 pgoyette Exp $");
105 
106 #ifdef _KERNEL_OPT
107 #include "opt_compat_netbsd.h"
108 #include "opt_compat_netbsd32.h"
109 #include "opt_raid_autoconfig.h"
110 #endif
111 
112 #include <sys/param.h>
113 #include <sys/errno.h>
114 #include <sys/pool.h>
115 #include <sys/proc.h>
116 #include <sys/queue.h>
117 #include <sys/disk.h>
118 #include <sys/device.h>
119 #include <sys/stat.h>
120 #include <sys/ioctl.h>
121 #include <sys/fcntl.h>
122 #include <sys/systm.h>
123 #include <sys/vnode.h>
124 #include <sys/disklabel.h>
125 #include <sys/conf.h>
126 #include <sys/buf.h>
127 #include <sys/bufq.h>
128 #include <sys/reboot.h>
129 #include <sys/kauth.h>
130 #include <sys/module.h>
131 
132 #include <prop/proplib.h>
133 
134 #include <dev/raidframe/raidframevar.h>
135 #include <dev/raidframe/raidframeio.h>
136 #include <dev/raidframe/rf_paritymap.h>
137 
138 #include "rf_raid.h"
139 #include "rf_copyback.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_desc.h"
143 #include "rf_diskqueue.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_kintf.h"
147 #include "rf_options.h"
148 #include "rf_driver.h"
149 #include "rf_parityscan.h"
150 #include "rf_threadstuff.h"
151 
152 #ifdef COMPAT_50
153 #include "rf_compat50.h"
154 #endif
155 
156 #ifdef COMPAT_80
157 #include "rf_compat80.h"
158 #endif
159 
160 #ifdef COMPAT_NETBSD32
161 #include "rf_compat32.h"
162 #endif
163 
164 #include "ioconf.h"
165 
166 #ifdef DEBUG
167 int     rf_kdebug_level = 0;
168 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
169 #else				/* DEBUG */
170 #define db1_printf(a) { }
171 #endif				/* DEBUG */
172 
173 #ifdef DEBUG_ROOT
174 #define DPRINTF(a, ...) printf(a, __VA_ARGS__)
175 #else
176 #define DPRINTF(a, ...)
177 #endif
178 
179 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
180 static rf_declare_mutex2(rf_sparet_wait_mutex);
181 static rf_declare_cond2(rf_sparet_wait_cv);
182 static rf_declare_cond2(rf_sparet_resp_cv);
183 
184 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
185 						 * spare table */
186 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
187 						 * installation process */
188 #endif
189 
190 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
191 
192 /* prototypes */
193 static void KernelWakeupFunc(struct buf *);
194 static void InitBP(struct buf *, struct vnode *, unsigned,
195     dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
196     void *, int, struct proc *);
197 struct raid_softc;
198 static void raidinit(struct raid_softc *);
199 static int raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp);
200 static int rf_get_component_caches(RF_Raid_t *raidPtr, int *);
201 
202 static int raid_match(device_t, cfdata_t, void *);
203 static void raid_attach(device_t, device_t, void *);
204 static int raid_detach(device_t, int);
205 
206 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
207     daddr_t, daddr_t);
208 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
209     daddr_t, daddr_t, int);
210 
211 static int raidwrite_component_label(unsigned,
212     dev_t, struct vnode *, RF_ComponentLabel_t *);
213 static int raidread_component_label(unsigned,
214     dev_t, struct vnode *, RF_ComponentLabel_t *);
215 
216 static int raid_diskstart(device_t, struct buf *bp);
217 static int raid_dumpblocks(device_t, void *, daddr_t, int);
218 static int raid_lastclose(device_t);
219 
220 static dev_type_open(raidopen);
221 static dev_type_close(raidclose);
222 static dev_type_read(raidread);
223 static dev_type_write(raidwrite);
224 static dev_type_ioctl(raidioctl);
225 static dev_type_strategy(raidstrategy);
226 static dev_type_dump(raiddump);
227 static dev_type_size(raidsize);
228 
229 const struct bdevsw raid_bdevsw = {
230 	.d_open = raidopen,
231 	.d_close = raidclose,
232 	.d_strategy = raidstrategy,
233 	.d_ioctl = raidioctl,
234 	.d_dump = raiddump,
235 	.d_psize = raidsize,
236 	.d_discard = nodiscard,
237 	.d_flag = D_DISK
238 };
239 
240 const struct cdevsw raid_cdevsw = {
241 	.d_open = raidopen,
242 	.d_close = raidclose,
243 	.d_read = raidread,
244 	.d_write = raidwrite,
245 	.d_ioctl = raidioctl,
246 	.d_stop = nostop,
247 	.d_tty = notty,
248 	.d_poll = nopoll,
249 	.d_mmap = nommap,
250 	.d_kqfilter = nokqfilter,
251 	.d_discard = nodiscard,
252 	.d_flag = D_DISK
253 };
254 
255 static struct dkdriver rf_dkdriver = {
256 	.d_open = raidopen,
257 	.d_close = raidclose,
258 	.d_strategy = raidstrategy,
259 	.d_diskstart = raid_diskstart,
260 	.d_dumpblocks = raid_dumpblocks,
261 	.d_lastclose = raid_lastclose,
262 	.d_minphys = minphys
263 };
264 
265 struct raid_softc {
266 	struct dk_softc sc_dksc;
267 	int	sc_unit;
268 	int     sc_flags;	/* flags */
269 	int     sc_cflags;	/* configuration flags */
270 	kmutex_t sc_mutex;	/* interlock mutex */
271 	kcondvar_t sc_cv;	/* and the condvar */
272 	uint64_t sc_size;	/* size of the raid device */
273 	char    sc_xname[20];	/* XXX external name */
274 	RF_Raid_t sc_r;
275 	LIST_ENTRY(raid_softc) sc_link;
276 };
277 /* sc_flags */
278 #define RAIDF_INITED		0x01	/* unit has been initialized */
279 #define RAIDF_SHUTDOWN		0x02	/* unit is being shutdown */
280 #define RAIDF_DETACH  		0x04	/* detach after final close */
281 #define RAIDF_WANTED		0x08	/* someone waiting to obtain a lock */
282 #define RAIDF_LOCKED		0x10	/* unit is locked */
283 #define RAIDF_UNIT_CHANGED	0x20	/* unit is being changed */
284 
285 #define	raidunit(x)	DISKUNIT(x)
286 #define	raidsoftc(dev)	(((struct raid_softc *)device_private(dev))->sc_r.softc)
287 
288 extern struct cfdriver raid_cd;
289 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
290     raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
291     DVF_DETACH_SHUTDOWN);
292 
293 /* Internal representation of a rf_recon_req */
294 struct rf_recon_req_internal {
295 	RF_RowCol_t col;
296 	RF_ReconReqFlags_t flags;
297 	void   *raidPtr;
298 };
299 
300 /*
301  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
302  * Be aware that large numbers can allow the driver to consume a lot of
303  * kernel memory, especially on writes, and in degraded mode reads.
304  *
305  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
306  * a single 64K write will typically require 64K for the old data,
307  * 64K for the old parity, and 64K for the new parity, for a total
308  * of 192K (if the parity buffer is not re-used immediately).
309  * Even it if is used immediately, that's still 128K, which when multiplied
310  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
311  *
312  * Now in degraded mode, for example, a 64K read on the above setup may
313  * require data reconstruction, which will require *all* of the 4 remaining
314  * disks to participate -- 4 * 32K/disk == 128K again.
315  */
316 
317 #ifndef RAIDOUTSTANDING
318 #define RAIDOUTSTANDING   6
319 #endif
320 
321 #define RAIDLABELDEV(dev)	\
322 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
323 
324 /* declared here, and made public, for the benefit of KVM stuff.. */
325 
326 static int raidlock(struct raid_softc *);
327 static void raidunlock(struct raid_softc *);
328 
329 static int raid_detach_unlocked(struct raid_softc *);
330 
331 static void rf_markalldirty(RF_Raid_t *);
332 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
333 
334 void rf_ReconThread(struct rf_recon_req_internal *);
335 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
336 void rf_CopybackThread(RF_Raid_t *raidPtr);
337 void rf_ReconstructInPlaceThread(struct rf_recon_req_internal *);
338 int rf_autoconfig(device_t);
339 void rf_buildroothack(RF_ConfigSet_t *);
340 
341 RF_AutoConfig_t *rf_find_raid_components(void);
342 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
343 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
344 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
345 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
346 int rf_set_autoconfig(RF_Raid_t *, int);
347 int rf_set_rootpartition(RF_Raid_t *, int);
348 void rf_release_all_vps(RF_ConfigSet_t *);
349 void rf_cleanup_config_set(RF_ConfigSet_t *);
350 int rf_have_enough_components(RF_ConfigSet_t *);
351 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
352 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
353 
354 /*
355  * Debugging, mostly.  Set to 0 to not allow autoconfig to take place.
356  * Note that this is overridden by having RAID_AUTOCONFIG as an option
357  * in the kernel config file.
358  */
359 #ifdef RAID_AUTOCONFIG
360 int raidautoconfig = 1;
361 #else
362 int raidautoconfig = 0;
363 #endif
364 static bool raidautoconfigdone = false;
365 
366 struct RF_Pools_s rf_pools;
367 
368 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
369 static kmutex_t raid_lock;
370 
371 static struct raid_softc *
372 raidcreate(int unit) {
373 	struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
374 	sc->sc_unit = unit;
375 	cv_init(&sc->sc_cv, "raidunit");
376 	mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_NONE);
377 	return sc;
378 }
379 
380 static void
381 raiddestroy(struct raid_softc *sc) {
382 	cv_destroy(&sc->sc_cv);
383 	mutex_destroy(&sc->sc_mutex);
384 	kmem_free(sc, sizeof(*sc));
385 }
386 
387 static struct raid_softc *
388 raidget(int unit, bool create) {
389 	struct raid_softc *sc;
390 	if (unit < 0) {
391 #ifdef DIAGNOSTIC
392 		panic("%s: unit %d!", __func__, unit);
393 #endif
394 		return NULL;
395 	}
396 	mutex_enter(&raid_lock);
397 	LIST_FOREACH(sc, &raids, sc_link) {
398 		if (sc->sc_unit == unit) {
399 			mutex_exit(&raid_lock);
400 			return sc;
401 		}
402 	}
403 	mutex_exit(&raid_lock);
404 	if (!create)
405 		return NULL;
406 	if ((sc = raidcreate(unit)) == NULL)
407 		return NULL;
408 	mutex_enter(&raid_lock);
409 	LIST_INSERT_HEAD(&raids, sc, sc_link);
410 	mutex_exit(&raid_lock);
411 	return sc;
412 }
413 
414 static void
415 raidput(struct raid_softc *sc) {
416 	mutex_enter(&raid_lock);
417 	LIST_REMOVE(sc, sc_link);
418 	mutex_exit(&raid_lock);
419 	raiddestroy(sc);
420 }
421 
422 void
423 raidattach(int num)
424 {
425 
426 	/*
427 	 * Device attachment and associated initialization now occurs
428 	 * as part of the module initialization.
429 	 */
430 }
431 
432 int
433 rf_autoconfig(device_t self)
434 {
435 	RF_AutoConfig_t *ac_list;
436 	RF_ConfigSet_t *config_sets;
437 
438 	if (!raidautoconfig || raidautoconfigdone == true)
439 		return (0);
440 
441 	/* XXX This code can only be run once. */
442 	raidautoconfigdone = true;
443 
444 #ifdef __HAVE_CPU_BOOTCONF
445 	/*
446 	 * 0. find the boot device if needed first so we can use it later
447 	 * this needs to be done before we autoconfigure any raid sets,
448 	 * because if we use wedges we are not going to be able to open
449 	 * the boot device later
450 	 */
451 	if (booted_device == NULL)
452 		cpu_bootconf();
453 #endif
454 	/* 1. locate all RAID components on the system */
455 	aprint_debug("Searching for RAID components...\n");
456 	ac_list = rf_find_raid_components();
457 
458 	/* 2. Sort them into their respective sets. */
459 	config_sets = rf_create_auto_sets(ac_list);
460 
461 	/*
462 	 * 3. Evaluate each set and configure the valid ones.
463 	 * This gets done in rf_buildroothack().
464 	 */
465 	rf_buildroothack(config_sets);
466 
467 	return 1;
468 }
469 
470 static int
471 rf_containsboot(RF_Raid_t *r, device_t bdv) {
472 	const char *bootname = device_xname(bdv);
473 	size_t len = strlen(bootname);
474 
475 	for (int col = 0; col < r->numCol; col++) {
476 		const char *devname = r->Disks[col].devname;
477 		devname += sizeof("/dev/") - 1;
478 		if (strncmp(devname, "dk", 2) == 0) {
479 			const char *parent =
480 			    dkwedge_get_parent_name(r->Disks[col].dev);
481 			if (parent != NULL)
482 				devname = parent;
483 		}
484 		if (strncmp(devname, bootname, len) == 0) {
485 			struct raid_softc *sc = r->softc;
486 			aprint_debug("raid%d includes boot device %s\n",
487 			    sc->sc_unit, devname);
488 			return 1;
489 		}
490 	}
491 	return 0;
492 }
493 
494 void
495 rf_buildroothack(RF_ConfigSet_t *config_sets)
496 {
497 	RF_ConfigSet_t *cset;
498 	RF_ConfigSet_t *next_cset;
499 	int num_root;
500 	struct raid_softc *sc, *rsc;
501 	struct dk_softc *dksc;
502 
503 	sc = rsc = NULL;
504 	num_root = 0;
505 	cset = config_sets;
506 	while (cset != NULL) {
507 		next_cset = cset->next;
508 		if (rf_have_enough_components(cset) &&
509 		    cset->ac->clabel->autoconfigure == 1) {
510 			sc = rf_auto_config_set(cset);
511 			if (sc != NULL) {
512 				aprint_debug("raid%d: configured ok\n",
513 				    sc->sc_unit);
514 				if (cset->rootable) {
515 					rsc = sc;
516 					num_root++;
517 				}
518 			} else {
519 				/* The autoconfig didn't work :( */
520 				aprint_debug("Autoconfig failed\n");
521 				rf_release_all_vps(cset);
522 			}
523 		} else {
524 			/* we're not autoconfiguring this set...
525 			   release the associated resources */
526 			rf_release_all_vps(cset);
527 		}
528 		/* cleanup */
529 		rf_cleanup_config_set(cset);
530 		cset = next_cset;
531 	}
532 	dksc = &rsc->sc_dksc;
533 
534 	/* if the user has specified what the root device should be
535 	   then we don't touch booted_device or boothowto... */
536 
537 	if (rootspec != NULL)
538 		return;
539 
540 	/* we found something bootable... */
541 
542 	/*
543 	 * XXX: The following code assumes that the root raid
544 	 * is the first ('a') partition. This is about the best
545 	 * we can do with a BSD disklabel, but we might be able
546 	 * to do better with a GPT label, by setting a specified
547 	 * attribute to indicate the root partition. We can then
548 	 * stash the partition number in the r->root_partition
549 	 * high bits (the bottom 2 bits are already used). For
550 	 * now we just set booted_partition to 0 when we override
551 	 * root.
552 	 */
553 	if (num_root == 1) {
554 		device_t candidate_root;
555 		if (dksc->sc_dkdev.dk_nwedges != 0) {
556 			char cname[sizeof(cset->ac->devname)];
557 			/* XXX: assume partition 'a' first */
558 			snprintf(cname, sizeof(cname), "%s%c",
559 			    device_xname(dksc->sc_dev), 'a');
560 			candidate_root = dkwedge_find_by_wname(cname);
561 			DPRINTF("%s: candidate wedge root=%s\n", __func__,
562 			    cname);
563 			if (candidate_root == NULL) {
564 				/*
565 				 * If that is not found, because we don't use
566 				 * disklabel, return the first dk child
567 				 * XXX: we can skip the 'a' check above
568 				 * and always do this...
569 				 */
570 				size_t i = 0;
571 				candidate_root = dkwedge_find_by_parent(
572 				    device_xname(dksc->sc_dev), &i);
573 			}
574 			DPRINTF("%s: candidate wedge root=%p\n", __func__,
575 			    candidate_root);
576 		} else
577 			candidate_root = dksc->sc_dev;
578 		DPRINTF("%s: candidate root=%p\n", __func__, candidate_root);
579 		DPRINTF("%s: booted_device=%p root_partition=%d "
580 		   "contains_boot=%d\n", __func__, booted_device,
581 		   rsc->sc_r.root_partition,
582 		   rf_containsboot(&rsc->sc_r, booted_device));
583 		if (booted_device == NULL ||
584 		    rsc->sc_r.root_partition == 1 ||
585 		    rf_containsboot(&rsc->sc_r, booted_device)) {
586 			booted_device = candidate_root;
587 			booted_method = "raidframe/single";
588 			booted_partition = 0;	/* XXX assume 'a' */
589 		}
590 	} else if (num_root > 1) {
591 		DPRINTF("%s: many roots=%d, %p\n", __func__, num_root,
592 		    booted_device);
593 
594 		/*
595 		 * Maybe the MD code can help. If it cannot, then
596 		 * setroot() will discover that we have no
597 		 * booted_device and will ask the user if nothing was
598 		 * hardwired in the kernel config file
599 		 */
600 		if (booted_device == NULL)
601 			return;
602 
603 		num_root = 0;
604 		mutex_enter(&raid_lock);
605 		LIST_FOREACH(sc, &raids, sc_link) {
606 			RF_Raid_t *r = &sc->sc_r;
607 			if (r->valid == 0)
608 				continue;
609 
610 			if (r->root_partition == 0)
611 				continue;
612 
613 			if (rf_containsboot(r, booted_device)) {
614 				num_root++;
615 				rsc = sc;
616 				dksc = &rsc->sc_dksc;
617 			}
618 		}
619 		mutex_exit(&raid_lock);
620 
621 		if (num_root == 1) {
622 			booted_device = dksc->sc_dev;
623 			booted_method = "raidframe/multi";
624 			booted_partition = 0;	/* XXX assume 'a' */
625 		} else {
626 			/* we can't guess.. require the user to answer... */
627 			boothowto |= RB_ASKNAME;
628 		}
629 	}
630 }
631 
632 static int
633 raidsize(dev_t dev)
634 {
635 	struct raid_softc *rs;
636 	struct dk_softc *dksc;
637 	unsigned int unit;
638 
639 	unit = raidunit(dev);
640 	if ((rs = raidget(unit, false)) == NULL)
641 		return -1;
642 	dksc = &rs->sc_dksc;
643 
644 	if ((rs->sc_flags & RAIDF_INITED) == 0)
645 		return -1;
646 
647 	return dk_size(dksc, dev);
648 }
649 
650 static int
651 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
652 {
653 	unsigned int unit;
654 	struct raid_softc *rs;
655 	struct dk_softc *dksc;
656 
657 	unit = raidunit(dev);
658 	if ((rs = raidget(unit, false)) == NULL)
659 		return ENXIO;
660 	dksc = &rs->sc_dksc;
661 
662 	if ((rs->sc_flags & RAIDF_INITED) == 0)
663 		return ENODEV;
664 
665         /*
666            Note that blkno is relative to this particular partition.
667            By adding adding RF_PROTECTED_SECTORS, we get a value that
668 	   is relative to the partition used for the underlying component.
669         */
670 	blkno += RF_PROTECTED_SECTORS;
671 
672 	return dk_dump(dksc, dev, blkno, va, size);
673 }
674 
675 static int
676 raid_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk)
677 {
678 	struct raid_softc *rs = raidsoftc(dev);
679 	const struct bdevsw *bdev;
680 	RF_Raid_t *raidPtr;
681 	int     c, sparecol, j, scol, dumpto;
682 	int     error = 0;
683 
684 	raidPtr = &rs->sc_r;
685 
686 	/* we only support dumping to RAID 1 sets */
687 	if (raidPtr->Layout.numDataCol != 1 ||
688 	    raidPtr->Layout.numParityCol != 1)
689 		return EINVAL;
690 
691 	if ((error = raidlock(rs)) != 0)
692 		return error;
693 
694 	/* figure out what device is alive.. */
695 
696 	/*
697 	   Look for a component to dump to.  The preference for the
698 	   component to dump to is as follows:
699 	   1) the master
700 	   2) a used_spare of the master
701 	   3) the slave
702 	   4) a used_spare of the slave
703 	*/
704 
705 	dumpto = -1;
706 	for (c = 0; c < raidPtr->numCol; c++) {
707 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
708 			/* this might be the one */
709 			dumpto = c;
710 			break;
711 		}
712 	}
713 
714 	/*
715 	   At this point we have possibly selected a live master or a
716 	   live slave.  We now check to see if there is a spared
717 	   master (or a spared slave), if we didn't find a live master
718 	   or a live slave.
719 	*/
720 
721 	for (c = 0; c < raidPtr->numSpare; c++) {
722 		sparecol = raidPtr->numCol + c;
723 		if (raidPtr->Disks[sparecol].status ==  rf_ds_used_spare) {
724 			/* How about this one? */
725 			scol = -1;
726 			for(j=0;j<raidPtr->numCol;j++) {
727 				if (raidPtr->Disks[j].spareCol == sparecol) {
728 					scol = j;
729 					break;
730 				}
731 			}
732 			if (scol == 0) {
733 				/*
734 				   We must have found a spared master!
735 				   We'll take that over anything else
736 				   found so far.  (We couldn't have
737 				   found a real master before, since
738 				   this is a used spare, and it's
739 				   saying that it's replacing the
740 				   master.)  On reboot (with
741 				   autoconfiguration turned on)
742 				   sparecol will become the 1st
743 				   component (component0) of this set.
744 				*/
745 				dumpto = sparecol;
746 				break;
747 			} else if (scol != -1) {
748 				/*
749 				   Must be a spared slave.  We'll dump
750 				   to that if we havn't found anything
751 				   else so far.
752 				*/
753 				if (dumpto == -1)
754 					dumpto = sparecol;
755 			}
756 		}
757 	}
758 
759 	if (dumpto == -1) {
760 		/* we couldn't find any live components to dump to!?!?
761 		 */
762 		error = EINVAL;
763 		goto out;
764 	}
765 
766 	bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
767 	if (bdev == NULL) {
768 		error = ENXIO;
769 		goto out;
770 	}
771 
772 	error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
773 				blkno, va, nblk * raidPtr->bytesPerSector);
774 
775 out:
776 	raidunlock(rs);
777 
778 	return error;
779 }
780 
781 /* ARGSUSED */
782 static int
783 raidopen(dev_t dev, int flags, int fmt,
784     struct lwp *l)
785 {
786 	int     unit = raidunit(dev);
787 	struct raid_softc *rs;
788 	struct dk_softc *dksc;
789 	int     error = 0;
790 	int     part, pmask;
791 
792 	if ((rs = raidget(unit, true)) == NULL)
793 		return ENXIO;
794 	if ((error = raidlock(rs)) != 0)
795 		return (error);
796 
797 	if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
798 		error = EBUSY;
799 		goto bad;
800 	}
801 
802 	dksc = &rs->sc_dksc;
803 
804 	part = DISKPART(dev);
805 	pmask = (1 << part);
806 
807 	if (!DK_BUSY(dksc, pmask) &&
808 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
809 		/* First one... mark things as dirty... Note that we *MUST*
810 		 have done a configure before this.  I DO NOT WANT TO BE
811 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
812 		 THAT THEY BELONG TOGETHER!!!!! */
813 		/* XXX should check to see if we're only open for reading
814 		   here... If so, we needn't do this, but then need some
815 		   other way of keeping track of what's happened.. */
816 
817 		rf_markalldirty(&rs->sc_r);
818 	}
819 
820 	if ((rs->sc_flags & RAIDF_INITED) != 0)
821 		error = dk_open(dksc, dev, flags, fmt, l);
822 
823 bad:
824 	raidunlock(rs);
825 
826 	return (error);
827 
828 
829 }
830 
831 static int
832 raid_lastclose(device_t self)
833 {
834 	struct raid_softc *rs = raidsoftc(self);
835 
836 	/* Last one... device is not unconfigured yet.
837 	   Device shutdown has taken care of setting the
838 	   clean bits if RAIDF_INITED is not set
839 	   mark things as clean... */
840 
841 	rf_update_component_labels(&rs->sc_r,
842 	    RF_FINAL_COMPONENT_UPDATE);
843 
844 	/* pass to unlocked code */
845 	if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
846 		rs->sc_flags |= RAIDF_DETACH;
847 
848 	return 0;
849 }
850 
851 /* ARGSUSED */
852 static int
853 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
854 {
855 	int     unit = raidunit(dev);
856 	struct raid_softc *rs;
857 	struct dk_softc *dksc;
858 	cfdata_t cf;
859 	int     error = 0, do_detach = 0, do_put = 0;
860 
861 	if ((rs = raidget(unit, false)) == NULL)
862 		return ENXIO;
863 	dksc = &rs->sc_dksc;
864 
865 	if ((error = raidlock(rs)) != 0)
866 		return (error);
867 
868 	if ((rs->sc_flags & RAIDF_INITED) != 0) {
869 		error = dk_close(dksc, dev, flags, fmt, l);
870 		if ((rs->sc_flags & RAIDF_DETACH) != 0)
871 			do_detach = 1;
872 	} else if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
873 		do_put = 1;
874 
875 	raidunlock(rs);
876 
877 	if (do_detach) {
878 		/* free the pseudo device attach bits */
879 		cf = device_cfdata(dksc->sc_dev);
880 		error = config_detach(dksc->sc_dev, 0);
881 		if (error == 0)
882 			free(cf, M_RAIDFRAME);
883 	} else if (do_put) {
884 		raidput(rs);
885 	}
886 
887 	return (error);
888 
889 }
890 
891 static void
892 raid_wakeup(RF_Raid_t *raidPtr)
893 {
894 	rf_lock_mutex2(raidPtr->iodone_lock);
895 	rf_signal_cond2(raidPtr->iodone_cv);
896 	rf_unlock_mutex2(raidPtr->iodone_lock);
897 }
898 
899 static void
900 raidstrategy(struct buf *bp)
901 {
902 	unsigned int unit;
903 	struct raid_softc *rs;
904 	struct dk_softc *dksc;
905 	RF_Raid_t *raidPtr;
906 
907 	unit = raidunit(bp->b_dev);
908 	if ((rs = raidget(unit, false)) == NULL) {
909 		bp->b_error = ENXIO;
910 		goto fail;
911 	}
912 	if ((rs->sc_flags & RAIDF_INITED) == 0) {
913 		bp->b_error = ENXIO;
914 		goto fail;
915 	}
916 	dksc = &rs->sc_dksc;
917 	raidPtr = &rs->sc_r;
918 
919 	/* Queue IO only */
920 	if (dk_strategy_defer(dksc, bp))
921 		goto done;
922 
923 	/* schedule the IO to happen at the next convenient time */
924 	raid_wakeup(raidPtr);
925 
926 done:
927 	return;
928 
929 fail:
930 	bp->b_resid = bp->b_bcount;
931 	biodone(bp);
932 }
933 
934 static int
935 raid_diskstart(device_t dev, struct buf *bp)
936 {
937 	struct raid_softc *rs = raidsoftc(dev);
938 	RF_Raid_t *raidPtr;
939 
940 	raidPtr = &rs->sc_r;
941 	if (!raidPtr->valid) {
942 		db1_printf(("raid is not valid..\n"));
943 		return ENODEV;
944 	}
945 
946 	/* XXX */
947 	bp->b_resid = 0;
948 
949 	return raiddoaccess(raidPtr, bp);
950 }
951 
952 void
953 raiddone(RF_Raid_t *raidPtr, struct buf *bp)
954 {
955 	struct raid_softc *rs;
956 	struct dk_softc *dksc;
957 
958 	rs = raidPtr->softc;
959 	dksc = &rs->sc_dksc;
960 
961 	dk_done(dksc, bp);
962 
963 	rf_lock_mutex2(raidPtr->mutex);
964 	raidPtr->openings++;
965 	rf_unlock_mutex2(raidPtr->mutex);
966 
967 	/* schedule more IO */
968 	raid_wakeup(raidPtr);
969 }
970 
971 /* ARGSUSED */
972 static int
973 raidread(dev_t dev, struct uio *uio, int flags)
974 {
975 	int     unit = raidunit(dev);
976 	struct raid_softc *rs;
977 
978 	if ((rs = raidget(unit, false)) == NULL)
979 		return ENXIO;
980 
981 	if ((rs->sc_flags & RAIDF_INITED) == 0)
982 		return (ENXIO);
983 
984 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
985 
986 }
987 
988 /* ARGSUSED */
989 static int
990 raidwrite(dev_t dev, struct uio *uio, int flags)
991 {
992 	int     unit = raidunit(dev);
993 	struct raid_softc *rs;
994 
995 	if ((rs = raidget(unit, false)) == NULL)
996 		return ENXIO;
997 
998 	if ((rs->sc_flags & RAIDF_INITED) == 0)
999 		return (ENXIO);
1000 
1001 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
1002 
1003 }
1004 
1005 static int
1006 raid_detach_unlocked(struct raid_softc *rs)
1007 {
1008 	struct dk_softc *dksc = &rs->sc_dksc;
1009 	RF_Raid_t *raidPtr;
1010 	int error;
1011 
1012 	raidPtr = &rs->sc_r;
1013 
1014 	if (DK_BUSY(dksc, 0) ||
1015 	    raidPtr->recon_in_progress != 0 ||
1016 	    raidPtr->parity_rewrite_in_progress != 0 ||
1017 	    raidPtr->copyback_in_progress != 0)
1018 		return EBUSY;
1019 
1020 	if ((rs->sc_flags & RAIDF_INITED) == 0)
1021 		return 0;
1022 
1023 	rs->sc_flags &= ~RAIDF_SHUTDOWN;
1024 
1025 	if ((error = rf_Shutdown(raidPtr)) != 0)
1026 		return error;
1027 
1028 	rs->sc_flags &= ~RAIDF_INITED;
1029 
1030 	/* Kill off any queued buffers */
1031 	dk_drain(dksc);
1032 	bufq_free(dksc->sc_bufq);
1033 
1034 	/* Detach the disk. */
1035 	dkwedge_delall(&dksc->sc_dkdev);
1036 	disk_detach(&dksc->sc_dkdev);
1037 	disk_destroy(&dksc->sc_dkdev);
1038 	dk_detach(dksc);
1039 
1040 	return 0;
1041 }
1042 
1043 static int
1044 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1045 {
1046 	int     unit = raidunit(dev);
1047 	int     error = 0;
1048 	int     part, pmask;
1049 	struct raid_softc *rs;
1050 	struct dk_softc *dksc;
1051 	RF_Config_t *k_cfg, *u_cfg;
1052 	RF_Raid_t *raidPtr;
1053 	RF_RaidDisk_t *diskPtr;
1054 	RF_AccTotals_t *totals;
1055 	RF_DeviceConfig_t *d_cfg, *ucfgp;
1056 	u_char *specific_buf;
1057 	int retcode = 0;
1058 	int column;
1059 /*	int raidid; */
1060 	struct rf_recon_req *rr;
1061 	struct rf_recon_req_internal *rrint;
1062 	RF_ComponentLabel_t *clabel;
1063 	RF_ComponentLabel_t *ci_label;
1064 	RF_SingleComponent_t *sparePtr,*componentPtr;
1065 	RF_SingleComponent_t component;
1066 	int d;
1067 
1068 	if ((rs = raidget(unit, false)) == NULL)
1069 		return ENXIO;
1070 	dksc = &rs->sc_dksc;
1071 	raidPtr = &rs->sc_r;
1072 
1073 	db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1074 		(int) DISKPART(dev), (int) unit, cmd));
1075 
1076 	/* Must be initialized for these... */
1077 	switch (cmd) {
1078 	case RAIDFRAME_REWRITEPARITY:
1079 	case RAIDFRAME_GET_INFO:
1080 	case RAIDFRAME_RESET_ACCTOTALS:
1081 	case RAIDFRAME_GET_ACCTOTALS:
1082 	case RAIDFRAME_KEEP_ACCTOTALS:
1083 	case RAIDFRAME_GET_SIZE:
1084 	case RAIDFRAME_FAIL_DISK:
1085 	case RAIDFRAME_COPYBACK:
1086 	case RAIDFRAME_CHECK_RECON_STATUS:
1087 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1088 	case RAIDFRAME_GET_COMPONENT_LABEL:
1089 	case RAIDFRAME_SET_COMPONENT_LABEL:
1090 	case RAIDFRAME_ADD_HOT_SPARE:
1091 	case RAIDFRAME_REMOVE_HOT_SPARE:
1092 	case RAIDFRAME_INIT_LABELS:
1093 	case RAIDFRAME_REBUILD_IN_PLACE:
1094 	case RAIDFRAME_CHECK_PARITY:
1095 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1096 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1097 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
1098 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1099 	case RAIDFRAME_SET_AUTOCONFIG:
1100 	case RAIDFRAME_SET_ROOT:
1101 	case RAIDFRAME_DELETE_COMPONENT:
1102 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
1103 	case RAIDFRAME_PARITYMAP_STATUS:
1104 	case RAIDFRAME_PARITYMAP_GET_DISABLE:
1105 	case RAIDFRAME_PARITYMAP_SET_DISABLE:
1106 	case RAIDFRAME_PARITYMAP_SET_PARAMS:
1107 #ifdef COMPAT_50
1108 	case RAIDFRAME_GET_INFO50:
1109 #endif
1110 #ifdef COMPAT_80
1111 	case RAIDFRAME_CHECK_RECON_STATUS_EXT80:
1112 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT80:
1113 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT80:
1114 	case RAIDFRAME_GET_INFO80:
1115 	case RAIDFRAME_GET_COMPONENT_LABEL80:
1116 #endif
1117 #ifdef COMPAT_NETBSD32
1118 #ifdef _LP64
1119 	case RAIDFRAME_GET_INFO32:
1120 #endif
1121 #endif
1122 		if ((rs->sc_flags & RAIDF_INITED) == 0)
1123 			return (ENXIO);
1124 	}
1125 
1126 	switch (cmd) {
1127 #ifdef COMPAT_50
1128 	case RAIDFRAME_GET_INFO50:
1129 		return rf_get_info50(raidPtr, data);
1130 
1131 	case RAIDFRAME_CONFIGURE50:
1132 		if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0)
1133 			return retcode;
1134 		goto config;
1135 #endif
1136 
1137 #ifdef COMPAT_80
1138 	case RAIDFRAME_CHECK_RECON_STATUS_EXT80:
1139 		return rf_check_recon_status_ext80(raidPtr, data);
1140 
1141 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT80:
1142 		return rf_check_parityrewrite_status_ext80(raidPtr, data);
1143 
1144 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT80:
1145 		return rf_check_copyback_status_ext80(raidPtr, data);
1146 
1147 	case RAIDFRAME_GET_INFO80:
1148 		return rf_get_info80(raidPtr, data);
1149 
1150 	case RAIDFRAME_GET_COMPONENT_LABEL80:
1151 		return rf_get_component_label80(raidPtr, data);
1152 
1153 	case RAIDFRAME_CONFIGURE80:
1154 		if ((retcode = rf_config80(raidPtr, unit, data, &k_cfg)) != 0)
1155 			return retcode;
1156 		goto config;
1157 #endif
1158 
1159 		/* configure the system */
1160 	case RAIDFRAME_CONFIGURE:
1161 #ifdef COMPAT_NETBSD32
1162 #ifdef _LP64
1163 	case RAIDFRAME_CONFIGURE32:
1164 #endif
1165 #endif
1166 
1167 		if (raidPtr->valid) {
1168 			/* There is a valid RAID set running on this unit! */
1169 			printf("raid%d: Device already configured!\n",unit);
1170 			return(EINVAL);
1171 		}
1172 
1173 		/* copy-in the configuration information */
1174 		/* data points to a pointer to the configuration structure */
1175 
1176 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1177 		if (k_cfg == NULL) {
1178 			return (ENOMEM);
1179 		}
1180 #ifdef COMPAT_NETBSD32
1181 #ifdef _LP64
1182 		if (cmd == RAIDFRAME_CONFIGURE32 &&
1183 		    (l->l_proc->p_flag & PK_32) != 0)
1184 			retcode = rf_config_netbsd32(data, k_cfg);
1185 		else
1186 #endif
1187 #endif
1188 		{
1189 			u_cfg = *((RF_Config_t **) data);
1190 			retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1191 		}
1192 		if (retcode) {
1193 			RF_Free(k_cfg, sizeof(RF_Config_t));
1194 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1195 				retcode));
1196 			goto no_config;
1197 		}
1198 		goto config;
1199 	config:
1200 		rs->sc_flags &= ~RAIDF_SHUTDOWN;
1201 
1202 		/* allocate a buffer for the layout-specific data, and copy it
1203 		 * in */
1204 		if (k_cfg->layoutSpecificSize) {
1205 			if (k_cfg->layoutSpecificSize > 10000) {
1206 				/* sanity check */
1207 				RF_Free(k_cfg, sizeof(RF_Config_t));
1208 				retcode = EINVAL;
1209 				goto no_config;
1210 			}
1211 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1212 			    (u_char *));
1213 			if (specific_buf == NULL) {
1214 				RF_Free(k_cfg, sizeof(RF_Config_t));
1215 				retcode = ENOMEM;
1216 				goto no_config;
1217 			}
1218 			retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1219 			    k_cfg->layoutSpecificSize);
1220 			if (retcode) {
1221 				RF_Free(k_cfg, sizeof(RF_Config_t));
1222 				RF_Free(specific_buf,
1223 					k_cfg->layoutSpecificSize);
1224 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1225 					retcode));
1226 				goto no_config;
1227 			}
1228 		} else
1229 			specific_buf = NULL;
1230 		k_cfg->layoutSpecific = specific_buf;
1231 
1232 		/* should do some kind of sanity check on the configuration.
1233 		 * Store the sum of all the bytes in the last byte? */
1234 
1235 		/* configure the system */
1236 
1237 		/*
1238 		 * Clear the entire RAID descriptor, just to make sure
1239 		 *  there is no stale data left in the case of a
1240 		 *  reconfiguration
1241 		 */
1242 		memset(raidPtr, 0, sizeof(*raidPtr));
1243 		raidPtr->softc = rs;
1244 		raidPtr->raidid = unit;
1245 
1246 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
1247 
1248 		if (retcode == 0) {
1249 
1250 			/* allow this many simultaneous IO's to
1251 			   this RAID device */
1252 			raidPtr->openings = RAIDOUTSTANDING;
1253 
1254 			raidinit(rs);
1255 			raid_wakeup(raidPtr);
1256 			rf_markalldirty(raidPtr);
1257 		}
1258 		/* free the buffers.  No return code here. */
1259 		if (k_cfg->layoutSpecificSize) {
1260 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1261 		}
1262 		RF_Free(k_cfg, sizeof(RF_Config_t));
1263 
1264 	no_config:
1265 		/*
1266 		 * If configuration failed, set sc_flags so that we
1267 		 * will detach the device when we close it.
1268 		 */
1269 		if (retcode != 0)
1270 			rs->sc_flags |= RAIDF_SHUTDOWN;
1271 		return (retcode);
1272 
1273 		/* shutdown the system */
1274 	case RAIDFRAME_SHUTDOWN:
1275 
1276 		part = DISKPART(dev);
1277 		pmask = (1 << part);
1278 
1279 		if ((error = raidlock(rs)) != 0)
1280 			return (error);
1281 
1282 		if (DK_BUSY(dksc, pmask) ||
1283 		    raidPtr->recon_in_progress != 0 ||
1284 		    raidPtr->parity_rewrite_in_progress != 0 ||
1285 		    raidPtr->copyback_in_progress != 0)
1286 			retcode = EBUSY;
1287 		else {
1288 			/* detach and free on close */
1289 			rs->sc_flags |= RAIDF_SHUTDOWN;
1290 			retcode = 0;
1291 		}
1292 
1293 		raidunlock(rs);
1294 
1295 		return (retcode);
1296 	case RAIDFRAME_GET_COMPONENT_LABEL:
1297 		return rf_get_component_label(raidPtr, data);
1298 
1299 #if 0
1300 	case RAIDFRAME_SET_COMPONENT_LABEL:
1301 		clabel = (RF_ComponentLabel_t *) data;
1302 
1303 		/* XXX check the label for valid stuff... */
1304 		/* Note that some things *should not* get modified --
1305 		   the user should be re-initing the labels instead of
1306 		   trying to patch things.
1307 		   */
1308 
1309 		raidid = raidPtr->raidid;
1310 #ifdef DEBUG
1311 		printf("raid%d: Got component label:\n", raidid);
1312 		printf("raid%d: Version: %d\n", raidid, clabel->version);
1313 		printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1314 		printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1315 		printf("raid%d: Column: %d\n", raidid, clabel->column);
1316 		printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1317 		printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1318 		printf("raid%d: Status: %d\n", raidid, clabel->status);
1319 #endif
1320 		clabel->row = 0;
1321 		column = clabel->column;
1322 
1323 		if ((column < 0) || (column >= raidPtr->numCol)) {
1324 			return(EINVAL);
1325 		}
1326 
1327 		/* XXX this isn't allowed to do anything for now :-) */
1328 
1329 		/* XXX and before it is, we need to fill in the rest
1330 		   of the fields!?!?!?! */
1331 		memcpy(raidget_component_label(raidPtr, column),
1332 		    clabel, sizeof(*clabel));
1333 		raidflush_component_label(raidPtr, column);
1334 		return (0);
1335 #endif
1336 
1337 	case RAIDFRAME_INIT_LABELS:
1338 		clabel = (RF_ComponentLabel_t *) data;
1339 		/*
1340 		   we only want the serial number from
1341 		   the above.  We get all the rest of the information
1342 		   from the config that was used to create this RAID
1343 		   set.
1344 		   */
1345 
1346 		raidPtr->serial_number = clabel->serial_number;
1347 
1348 		for(column=0;column<raidPtr->numCol;column++) {
1349 			diskPtr = &raidPtr->Disks[column];
1350 			if (!RF_DEAD_DISK(diskPtr->status)) {
1351 				ci_label = raidget_component_label(raidPtr,
1352 				    column);
1353 				/* Zeroing this is important. */
1354 				memset(ci_label, 0, sizeof(*ci_label));
1355 				raid_init_component_label(raidPtr, ci_label);
1356 				ci_label->serial_number =
1357 				    raidPtr->serial_number;
1358 				ci_label->row = 0; /* we dont' pretend to support more */
1359 				rf_component_label_set_partitionsize(ci_label,
1360 				    diskPtr->partitionSize);
1361 				ci_label->column = column;
1362 				raidflush_component_label(raidPtr, column);
1363 			}
1364 			/* XXXjld what about the spares? */
1365 		}
1366 
1367 		return (retcode);
1368 	case RAIDFRAME_SET_AUTOCONFIG:
1369 		d = rf_set_autoconfig(raidPtr, *(int *) data);
1370 		printf("raid%d: New autoconfig value is: %d\n",
1371 		       raidPtr->raidid, d);
1372 		*(int *) data = d;
1373 		return (retcode);
1374 
1375 	case RAIDFRAME_SET_ROOT:
1376 		d = rf_set_rootpartition(raidPtr, *(int *) data);
1377 		printf("raid%d: New rootpartition value is: %d\n",
1378 		       raidPtr->raidid, d);
1379 		*(int *) data = d;
1380 		return (retcode);
1381 
1382 		/* initialize all parity */
1383 	case RAIDFRAME_REWRITEPARITY:
1384 
1385 		if (raidPtr->Layout.map->faultsTolerated == 0) {
1386 			/* Parity for RAID 0 is trivially correct */
1387 			raidPtr->parity_good = RF_RAID_CLEAN;
1388 			return(0);
1389 		}
1390 
1391 		if (raidPtr->parity_rewrite_in_progress == 1) {
1392 			/* Re-write is already in progress! */
1393 			return(EINVAL);
1394 		}
1395 
1396 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1397 					   rf_RewriteParityThread,
1398 					   raidPtr,"raid_parity");
1399 		return (retcode);
1400 
1401 
1402 	case RAIDFRAME_ADD_HOT_SPARE:
1403 		sparePtr = (RF_SingleComponent_t *) data;
1404 		memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1405 		retcode = rf_add_hot_spare(raidPtr, &component);
1406 		return(retcode);
1407 
1408 	case RAIDFRAME_REMOVE_HOT_SPARE:
1409 		return(retcode);
1410 
1411 	case RAIDFRAME_DELETE_COMPONENT:
1412 		componentPtr = (RF_SingleComponent_t *)data;
1413 		memcpy( &component, componentPtr,
1414 			sizeof(RF_SingleComponent_t));
1415 		retcode = rf_delete_component(raidPtr, &component);
1416 		return(retcode);
1417 
1418 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
1419 		componentPtr = (RF_SingleComponent_t *)data;
1420 		memcpy( &component, componentPtr,
1421 			sizeof(RF_SingleComponent_t));
1422 		retcode = rf_incorporate_hot_spare(raidPtr, &component);
1423 		return(retcode);
1424 
1425 	case RAIDFRAME_REBUILD_IN_PLACE:
1426 
1427 		if (raidPtr->Layout.map->faultsTolerated == 0) {
1428 			/* Can't do this on a RAID 0!! */
1429 			return(EINVAL);
1430 		}
1431 
1432 		if (raidPtr->recon_in_progress == 1) {
1433 			/* a reconstruct is already in progress! */
1434 			return(EINVAL);
1435 		}
1436 
1437 		componentPtr = (RF_SingleComponent_t *) data;
1438 		memcpy( &component, componentPtr,
1439 			sizeof(RF_SingleComponent_t));
1440 		component.row = 0; /* we don't support any more */
1441 		column = component.column;
1442 
1443 		if ((column < 0) || (column >= raidPtr->numCol)) {
1444 			return(EINVAL);
1445 		}
1446 
1447 		rf_lock_mutex2(raidPtr->mutex);
1448 		if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1449 		    (raidPtr->numFailures > 0)) {
1450 			/* XXX 0 above shouldn't be constant!!! */
1451 			/* some component other than this has failed.
1452 			   Let's not make things worse than they already
1453 			   are... */
1454 			printf("raid%d: Unable to reconstruct to disk at:\n",
1455 			       raidPtr->raidid);
1456 			printf("raid%d:     Col: %d   Too many failures.\n",
1457 			       raidPtr->raidid, column);
1458 			rf_unlock_mutex2(raidPtr->mutex);
1459 			return (EINVAL);
1460 		}
1461 		if (raidPtr->Disks[column].status ==
1462 		    rf_ds_reconstructing) {
1463 			printf("raid%d: Unable to reconstruct to disk at:\n",
1464 			       raidPtr->raidid);
1465 			printf("raid%d:    Col: %d   Reconstruction already occurring!\n", raidPtr->raidid, column);
1466 
1467 			rf_unlock_mutex2(raidPtr->mutex);
1468 			return (EINVAL);
1469 		}
1470 		if (raidPtr->Disks[column].status == rf_ds_spared) {
1471 			rf_unlock_mutex2(raidPtr->mutex);
1472 			return (EINVAL);
1473 		}
1474 		rf_unlock_mutex2(raidPtr->mutex);
1475 
1476 		RF_Malloc(rrint, sizeof(*rrint), (struct rf_recon_req_internal *));
1477 		if (rrint == NULL)
1478 			return(ENOMEM);
1479 
1480 		rrint->col = column;
1481 		rrint->raidPtr = raidPtr;
1482 
1483 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1484 					   rf_ReconstructInPlaceThread,
1485 					   rrint, "raid_reconip");
1486 		return(retcode);
1487 
1488 	case RAIDFRAME_GET_INFO:
1489 #ifdef COMPAT_NETBSD32
1490 #ifdef _LP64
1491 	case RAIDFRAME_GET_INFO32:
1492 #endif
1493 #endif
1494 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1495 			  (RF_DeviceConfig_t *));
1496 		if (d_cfg == NULL)
1497 			return (ENOMEM);
1498 		retcode = rf_get_info(raidPtr, d_cfg);
1499 		if (retcode == 0) {
1500 #ifdef COMPAT_NETBSD32
1501 #ifdef _LP64
1502 			if (cmd == RAIDFRAME_GET_INFO32)
1503 				ucfgp = NETBSD32PTR64(*(netbsd32_pointer_t *)data);
1504 			else
1505 #endif
1506 #endif
1507 				ucfgp = *(RF_DeviceConfig_t **)data;
1508 			retcode = copyout(d_cfg, ucfgp, sizeof(RF_DeviceConfig_t));
1509 		}
1510 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1511 
1512 		return (retcode);
1513 
1514 	case RAIDFRAME_CHECK_PARITY:
1515 		*(int *) data = raidPtr->parity_good;
1516 		return (0);
1517 
1518 	case RAIDFRAME_PARITYMAP_STATUS:
1519 		if (rf_paritymap_ineligible(raidPtr))
1520 			return EINVAL;
1521 		rf_paritymap_status(raidPtr->parity_map,
1522 		    (struct rf_pmstat *)data);
1523 		return 0;
1524 
1525 	case RAIDFRAME_PARITYMAP_SET_PARAMS:
1526 		if (rf_paritymap_ineligible(raidPtr))
1527 			return EINVAL;
1528 		if (raidPtr->parity_map == NULL)
1529 			return ENOENT; /* ??? */
1530 		if (0 != rf_paritymap_set_params(raidPtr->parity_map,
1531 			(struct rf_pmparams *)data, 1))
1532 			return EINVAL;
1533 		return 0;
1534 
1535 	case RAIDFRAME_PARITYMAP_GET_DISABLE:
1536 		if (rf_paritymap_ineligible(raidPtr))
1537 			return EINVAL;
1538 		*(int *) data = rf_paritymap_get_disable(raidPtr);
1539 		return 0;
1540 
1541 	case RAIDFRAME_PARITYMAP_SET_DISABLE:
1542 		if (rf_paritymap_ineligible(raidPtr))
1543 			return EINVAL;
1544 		rf_paritymap_set_disable(raidPtr, *(int *)data);
1545 		/* XXX should errors be passed up? */
1546 		return 0;
1547 
1548 	case RAIDFRAME_RESET_ACCTOTALS:
1549 		memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1550 		return (0);
1551 
1552 	case RAIDFRAME_GET_ACCTOTALS:
1553 		totals = (RF_AccTotals_t *) data;
1554 		*totals = raidPtr->acc_totals;
1555 		return (0);
1556 
1557 	case RAIDFRAME_KEEP_ACCTOTALS:
1558 		raidPtr->keep_acc_totals = *(int *)data;
1559 		return (0);
1560 
1561 	case RAIDFRAME_GET_SIZE:
1562 		*(int *) data = raidPtr->totalSectors;
1563 		return (0);
1564 
1565 		/* fail a disk & optionally start reconstruction */
1566 	case RAIDFRAME_FAIL_DISK:
1567 #ifdef COMPAT_80
1568 	case RAIDFRAME_FAIL_DISK80:
1569 #endif
1570 
1571 		if (raidPtr->Layout.map->faultsTolerated == 0) {
1572 			/* Can't do this on a RAID 0!! */
1573 			return(EINVAL);
1574 		}
1575 
1576 		rr = (struct rf_recon_req *) data;
1577 		if (rr->col < 0 || rr->col >= raidPtr->numCol)
1578 			return (EINVAL);
1579 
1580 		rf_lock_mutex2(raidPtr->mutex);
1581 		if (raidPtr->status == rf_rs_reconstructing) {
1582 			/* you can't fail a disk while we're reconstructing! */
1583 			/* XXX wrong for RAID6 */
1584 			rf_unlock_mutex2(raidPtr->mutex);
1585 			return (EINVAL);
1586 		}
1587 		if ((raidPtr->Disks[rr->col].status ==
1588 		     rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1589 			/* some other component has failed.  Let's not make
1590 			   things worse. XXX wrong for RAID6 */
1591 			rf_unlock_mutex2(raidPtr->mutex);
1592 			return (EINVAL);
1593 		}
1594 		if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1595 			/* Can't fail a spared disk! */
1596 			rf_unlock_mutex2(raidPtr->mutex);
1597 			return (EINVAL);
1598 		}
1599 		rf_unlock_mutex2(raidPtr->mutex);
1600 
1601 		/* make a copy of the recon request so that we don't rely on
1602 		 * the user's buffer */
1603 		RF_Malloc(rrint, sizeof(*rrint), (struct rf_recon_req_internal *));
1604 		if (rrint == NULL)
1605 			return(ENOMEM);
1606 		rrint->col = rr->col;
1607 		rrint->flags = rr->flags;
1608 		rrint->raidPtr = raidPtr;
1609 
1610 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1611 					   rf_ReconThread,
1612 					   rrint, "raid_recon");
1613 		return (0);
1614 
1615 		/* invoke a copyback operation after recon on whatever disk
1616 		 * needs it, if any */
1617 	case RAIDFRAME_COPYBACK:
1618 
1619 		if (raidPtr->Layout.map->faultsTolerated == 0) {
1620 			/* This makes no sense on a RAID 0!! */
1621 			return(EINVAL);
1622 		}
1623 
1624 		if (raidPtr->copyback_in_progress == 1) {
1625 			/* Copyback is already in progress! */
1626 			return(EINVAL);
1627 		}
1628 
1629 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1630 					   rf_CopybackThread,
1631 					   raidPtr,"raid_copyback");
1632 		return (retcode);
1633 
1634 		/* return the percentage completion of reconstruction */
1635 	case RAIDFRAME_CHECK_RECON_STATUS:
1636 		if (raidPtr->Layout.map->faultsTolerated == 0) {
1637 			/* This makes no sense on a RAID 0, so tell the
1638 			   user it's done. */
1639 			*(int *) data = 100;
1640 			return(0);
1641 		}
1642 		if (raidPtr->status != rf_rs_reconstructing)
1643 			*(int *) data = 100;
1644 		else {
1645 			if (raidPtr->reconControl->numRUsTotal > 0) {
1646 				*(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1647 			} else {
1648 				*(int *) data = 0;
1649 			}
1650 		}
1651 		return (0);
1652 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1653 		rf_check_recon_status_ext(raidPtr, data);
1654 		return (0);
1655 
1656 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1657 		if (raidPtr->Layout.map->faultsTolerated == 0) {
1658 			/* This makes no sense on a RAID 0, so tell the
1659 			   user it's done. */
1660 			*(int *) data = 100;
1661 			return(0);
1662 		}
1663 		if (raidPtr->parity_rewrite_in_progress == 1) {
1664 			*(int *) data = 100 *
1665 				raidPtr->parity_rewrite_stripes_done /
1666 				raidPtr->Layout.numStripe;
1667 		} else {
1668 			*(int *) data = 100;
1669 		}
1670 		return (0);
1671 
1672 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1673 		rf_check_parityrewrite_status_ext(raidPtr, data);
1674 		return (0);
1675 
1676 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
1677 		if (raidPtr->Layout.map->faultsTolerated == 0) {
1678 			/* This makes no sense on a RAID 0 */
1679 			*(int *) data = 100;
1680 			return(0);
1681 		}
1682 		if (raidPtr->copyback_in_progress == 1) {
1683 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
1684 				raidPtr->Layout.numStripe;
1685 		} else {
1686 			*(int *) data = 100;
1687 		}
1688 		return (0);
1689 
1690 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1691 		rf_check_copyback_status_ext(raidPtr, data);
1692 		return 0;
1693 
1694 	case RAIDFRAME_SET_LAST_UNIT:
1695 		for (column = 0; column < raidPtr->numCol; column++)
1696 			if (raidPtr->Disks[column].status != rf_ds_optimal)
1697 				return EBUSY;
1698 
1699 		for (column = 0; column < raidPtr->numCol; column++) {
1700 			clabel = raidget_component_label(raidPtr, column);
1701 			clabel->last_unit = *(int *)data;
1702 			raidflush_component_label(raidPtr, column);
1703 		}
1704 		rs->sc_cflags |= RAIDF_UNIT_CHANGED;
1705 		return 0;
1706 
1707 		/* the sparetable daemon calls this to wait for the kernel to
1708 		 * need a spare table. this ioctl does not return until a
1709 		 * spare table is needed. XXX -- calling mpsleep here in the
1710 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1711 		 * -- I should either compute the spare table in the kernel,
1712 		 * or have a different -- XXX XXX -- interface (a different
1713 		 * character device) for delivering the table     -- XXX */
1714 #if 0
1715 	case RAIDFRAME_SPARET_WAIT:
1716 		rf_lock_mutex2(rf_sparet_wait_mutex);
1717 		while (!rf_sparet_wait_queue)
1718 			rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1719 		waitreq = rf_sparet_wait_queue;
1720 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1721 		rf_unlock_mutex2(rf_sparet_wait_mutex);
1722 
1723 		/* structure assignment */
1724 		*((RF_SparetWait_t *) data) = *waitreq;
1725 
1726 		RF_Free(waitreq, sizeof(*waitreq));
1727 		return (0);
1728 
1729 		/* wakes up a process waiting on SPARET_WAIT and puts an error
1730 		 * code in it that will cause the dameon to exit */
1731 	case RAIDFRAME_ABORT_SPARET_WAIT:
1732 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1733 		waitreq->fcol = -1;
1734 		rf_lock_mutex2(rf_sparet_wait_mutex);
1735 		waitreq->next = rf_sparet_wait_queue;
1736 		rf_sparet_wait_queue = waitreq;
1737 		rf_broadcast_conf2(rf_sparet_wait_cv);
1738 		rf_unlock_mutex2(rf_sparet_wait_mutex);
1739 		return (0);
1740 
1741 		/* used by the spare table daemon to deliver a spare table
1742 		 * into the kernel */
1743 	case RAIDFRAME_SEND_SPARET:
1744 
1745 		/* install the spare table */
1746 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1747 
1748 		/* respond to the requestor.  the return status of the spare
1749 		 * table installation is passed in the "fcol" field */
1750 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1751 		waitreq->fcol = retcode;
1752 		rf_lock_mutex2(rf_sparet_wait_mutex);
1753 		waitreq->next = rf_sparet_resp_queue;
1754 		rf_sparet_resp_queue = waitreq;
1755 		rf_broadcast_cond2(rf_sparet_resp_cv);
1756 		rf_unlock_mutex2(rf_sparet_wait_mutex);
1757 
1758 		return (retcode);
1759 #endif
1760 
1761 	default:
1762 		break; /* fall through to the os-specific code below */
1763 
1764 	}
1765 
1766 	if (!raidPtr->valid)
1767 		return (EINVAL);
1768 
1769 	/*
1770 	 * Add support for "regular" device ioctls here.
1771 	 */
1772 
1773 	switch (cmd) {
1774 	case DIOCGCACHE:
1775 		retcode = rf_get_component_caches(raidPtr, (int *)data);
1776 		break;
1777 
1778 	case DIOCCACHESYNC:
1779 		retcode = rf_sync_component_caches(raidPtr);
1780 		break;
1781 
1782 	default:
1783 		retcode = dk_ioctl(dksc, dev, cmd, data, flag, l);
1784 		break;
1785 	}
1786 
1787 	return (retcode);
1788 
1789 }
1790 
1791 
1792 /* raidinit -- complete the rest of the initialization for the
1793    RAIDframe device.  */
1794 
1795 
1796 static void
1797 raidinit(struct raid_softc *rs)
1798 {
1799 	cfdata_t cf;
1800 	unsigned int unit;
1801 	struct dk_softc *dksc = &rs->sc_dksc;
1802 	RF_Raid_t *raidPtr = &rs->sc_r;
1803 	device_t dev;
1804 
1805 	unit = raidPtr->raidid;
1806 
1807 	/* XXX doesn't check bounds. */
1808 	snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%u", unit);
1809 
1810 	/* attach the pseudo device */
1811 	cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1812 	cf->cf_name = raid_cd.cd_name;
1813 	cf->cf_atname = raid_cd.cd_name;
1814 	cf->cf_unit = unit;
1815 	cf->cf_fstate = FSTATE_STAR;
1816 
1817 	dev = config_attach_pseudo(cf);
1818 	if (dev == NULL) {
1819 		printf("raid%d: config_attach_pseudo failed\n",
1820 		    raidPtr->raidid);
1821 		free(cf, M_RAIDFRAME);
1822 		return;
1823 	}
1824 
1825 	/* provide a backpointer to the real softc */
1826 	raidsoftc(dev) = rs;
1827 
1828 	/* disk_attach actually creates space for the CPU disklabel, among
1829 	 * other things, so it's critical to call this *BEFORE* we try putzing
1830 	 * with disklabels. */
1831 	dk_init(dksc, dev, DKTYPE_RAID);
1832 	disk_init(&dksc->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1833 
1834 	/* XXX There may be a weird interaction here between this, and
1835 	 * protectedSectors, as used in RAIDframe.  */
1836 
1837 	rs->sc_size = raidPtr->totalSectors;
1838 
1839 	/* Attach dk and disk subsystems */
1840 	dk_attach(dksc);
1841 	disk_attach(&dksc->sc_dkdev);
1842 	rf_set_geometry(rs, raidPtr);
1843 
1844 	bufq_alloc(&dksc->sc_bufq, "fcfs", BUFQ_SORT_RAWBLOCK);
1845 
1846 	/* mark unit as usuable */
1847 	rs->sc_flags |= RAIDF_INITED;
1848 
1849 	dkwedge_discover(&dksc->sc_dkdev);
1850 }
1851 
1852 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1853 /* wake up the daemon & tell it to get us a spare table
1854  * XXX
1855  * the entries in the queues should be tagged with the raidPtr
1856  * so that in the extremely rare case that two recons happen at once,
1857  * we know for which device were requesting a spare table
1858  * XXX
1859  *
1860  * XXX This code is not currently used. GO
1861  */
1862 int
1863 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1864 {
1865 	int     retcode;
1866 
1867 	rf_lock_mutex2(rf_sparet_wait_mutex);
1868 	req->next = rf_sparet_wait_queue;
1869 	rf_sparet_wait_queue = req;
1870 	rf_broadcast_cond2(rf_sparet_wait_cv);
1871 
1872 	/* mpsleep unlocks the mutex */
1873 	while (!rf_sparet_resp_queue) {
1874 		rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
1875 	}
1876 	req = rf_sparet_resp_queue;
1877 	rf_sparet_resp_queue = req->next;
1878 	rf_unlock_mutex2(rf_sparet_wait_mutex);
1879 
1880 	retcode = req->fcol;
1881 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
1882 					 * alloc'd */
1883 	return (retcode);
1884 }
1885 #endif
1886 
1887 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1888  * bp & passes it down.
1889  * any calls originating in the kernel must use non-blocking I/O
1890  * do some extra sanity checking to return "appropriate" error values for
1891  * certain conditions (to make some standard utilities work)
1892  *
1893  * Formerly known as: rf_DoAccessKernel
1894  */
1895 void
1896 raidstart(RF_Raid_t *raidPtr)
1897 {
1898 	struct raid_softc *rs;
1899 	struct dk_softc *dksc;
1900 
1901 	rs = raidPtr->softc;
1902 	dksc = &rs->sc_dksc;
1903 	/* quick check to see if anything has died recently */
1904 	rf_lock_mutex2(raidPtr->mutex);
1905 	if (raidPtr->numNewFailures > 0) {
1906 		rf_unlock_mutex2(raidPtr->mutex);
1907 		rf_update_component_labels(raidPtr,
1908 					   RF_NORMAL_COMPONENT_UPDATE);
1909 		rf_lock_mutex2(raidPtr->mutex);
1910 		raidPtr->numNewFailures--;
1911 	}
1912 	rf_unlock_mutex2(raidPtr->mutex);
1913 
1914 	if ((rs->sc_flags & RAIDF_INITED) == 0) {
1915 		printf("raid%d: raidstart not ready\n", raidPtr->raidid);
1916 		return;
1917 	}
1918 
1919 	dk_start(dksc, NULL);
1920 }
1921 
1922 static int
1923 raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp)
1924 {
1925 	RF_SectorCount_t num_blocks, pb, sum;
1926 	RF_RaidAddr_t raid_addr;
1927 	daddr_t blocknum;
1928 	int     do_async;
1929 	int rc;
1930 
1931 	rf_lock_mutex2(raidPtr->mutex);
1932 	if (raidPtr->openings == 0) {
1933 		rf_unlock_mutex2(raidPtr->mutex);
1934 		return EAGAIN;
1935 	}
1936 	rf_unlock_mutex2(raidPtr->mutex);
1937 
1938 	blocknum = bp->b_rawblkno;
1939 
1940 	db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1941 		    (int) blocknum));
1942 
1943 	db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1944 	db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1945 
1946 	/* *THIS* is where we adjust what block we're going to...
1947 	 * but DO NOT TOUCH bp->b_blkno!!! */
1948 	raid_addr = blocknum;
1949 
1950 	num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1951 	pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1952 	sum = raid_addr + num_blocks + pb;
1953 	if (1 || rf_debugKernelAccess) {
1954 		db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1955 			    (int) raid_addr, (int) sum, (int) num_blocks,
1956 			    (int) pb, (int) bp->b_resid));
1957 	}
1958 	if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1959 	    || (sum < num_blocks) || (sum < pb)) {
1960 		rc = ENOSPC;
1961 		goto done;
1962 	}
1963 	/*
1964 	 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1965 	 */
1966 
1967 	if (bp->b_bcount & raidPtr->sectorMask) {
1968 		rc = ENOSPC;
1969 		goto done;
1970 	}
1971 	db1_printf(("Calling DoAccess..\n"));
1972 
1973 
1974 	rf_lock_mutex2(raidPtr->mutex);
1975 	raidPtr->openings--;
1976 	rf_unlock_mutex2(raidPtr->mutex);
1977 
1978 	/*
1979 	 * Everything is async.
1980 	 */
1981 	do_async = 1;
1982 
1983 	/* don't ever condition on bp->b_flags & B_WRITE.
1984 	 * always condition on B_READ instead */
1985 
1986 	rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1987 			 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1988 			 do_async, raid_addr, num_blocks,
1989 			 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1990 
1991 done:
1992 	return rc;
1993 }
1994 
1995 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
1996 
1997 int
1998 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
1999 {
2000 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2001 	struct buf *bp;
2002 
2003 	req->queue = queue;
2004 	bp = req->bp;
2005 
2006 	switch (req->type) {
2007 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
2008 		/* XXX need to do something extra here.. */
2009 		/* I'm leaving this in, as I've never actually seen it used,
2010 		 * and I'd like folks to report it... GO */
2011 		printf(("WAKEUP CALLED\n"));
2012 		queue->numOutstanding++;
2013 
2014 		bp->b_flags = 0;
2015 		bp->b_private = req;
2016 
2017 		KernelWakeupFunc(bp);
2018 		break;
2019 
2020 	case RF_IO_TYPE_READ:
2021 	case RF_IO_TYPE_WRITE:
2022 #if RF_ACC_TRACE > 0
2023 		if (req->tracerec) {
2024 			RF_ETIMER_START(req->tracerec->timer);
2025 		}
2026 #endif
2027 		InitBP(bp, queue->rf_cinfo->ci_vp,
2028 		    op, queue->rf_cinfo->ci_dev,
2029 		    req->sectorOffset, req->numSector,
2030 		    req->buf, KernelWakeupFunc, (void *) req,
2031 		    queue->raidPtr->logBytesPerSector, req->b_proc);
2032 
2033 		if (rf_debugKernelAccess) {
2034 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
2035 				(long) bp->b_blkno));
2036 		}
2037 		queue->numOutstanding++;
2038 		queue->last_deq_sector = req->sectorOffset;
2039 		/* acc wouldn't have been let in if there were any pending
2040 		 * reqs at any other priority */
2041 		queue->curPriority = req->priority;
2042 
2043 		db1_printf(("Going for %c to unit %d col %d\n",
2044 			    req->type, queue->raidPtr->raidid,
2045 			    queue->col));
2046 		db1_printf(("sector %d count %d (%d bytes) %d\n",
2047 			(int) req->sectorOffset, (int) req->numSector,
2048 			(int) (req->numSector <<
2049 			    queue->raidPtr->logBytesPerSector),
2050 			(int) queue->raidPtr->logBytesPerSector));
2051 
2052 		/*
2053 		 * XXX: drop lock here since this can block at
2054 		 * least with backing SCSI devices.  Retake it
2055 		 * to minimize fuss with calling interfaces.
2056 		 */
2057 
2058 		RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2059 		bdev_strategy(bp);
2060 		RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2061 		break;
2062 
2063 	default:
2064 		panic("bad req->type in rf_DispatchKernelIO");
2065 	}
2066 	db1_printf(("Exiting from DispatchKernelIO\n"));
2067 
2068 	return (0);
2069 }
2070 /* this is the callback function associated with a I/O invoked from
2071    kernel code.
2072  */
2073 static void
2074 KernelWakeupFunc(struct buf *bp)
2075 {
2076 	RF_DiskQueueData_t *req = NULL;
2077 	RF_DiskQueue_t *queue;
2078 
2079 	db1_printf(("recovering the request queue:\n"));
2080 
2081 	req = bp->b_private;
2082 
2083 	queue = (RF_DiskQueue_t *) req->queue;
2084 
2085 	rf_lock_mutex2(queue->raidPtr->iodone_lock);
2086 
2087 #if RF_ACC_TRACE > 0
2088 	if (req->tracerec) {
2089 		RF_ETIMER_STOP(req->tracerec->timer);
2090 		RF_ETIMER_EVAL(req->tracerec->timer);
2091 		rf_lock_mutex2(rf_tracing_mutex);
2092 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2093 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2094 		req->tracerec->num_phys_ios++;
2095 		rf_unlock_mutex2(rf_tracing_mutex);
2096 	}
2097 #endif
2098 
2099 	/* XXX Ok, let's get aggressive... If b_error is set, let's go
2100 	 * ballistic, and mark the component as hosed... */
2101 
2102 	if (bp->b_error != 0) {
2103 		/* Mark the disk as dead */
2104 		/* but only mark it once... */
2105 		/* and only if it wouldn't leave this RAID set
2106 		   completely broken */
2107 		if (((queue->raidPtr->Disks[queue->col].status ==
2108 		      rf_ds_optimal) ||
2109 		     (queue->raidPtr->Disks[queue->col].status ==
2110 		      rf_ds_used_spare)) &&
2111 		     (queue->raidPtr->numFailures <
2112 		      queue->raidPtr->Layout.map->faultsTolerated)) {
2113 			printf("raid%d: IO Error (%d). Marking %s as failed.\n",
2114 			       queue->raidPtr->raidid,
2115 			       bp->b_error,
2116 			       queue->raidPtr->Disks[queue->col].devname);
2117 			queue->raidPtr->Disks[queue->col].status =
2118 			    rf_ds_failed;
2119 			queue->raidPtr->status = rf_rs_degraded;
2120 			queue->raidPtr->numFailures++;
2121 			queue->raidPtr->numNewFailures++;
2122 		} else {	/* Disk is already dead... */
2123 			/* printf("Disk already marked as dead!\n"); */
2124 		}
2125 
2126 	}
2127 
2128 	/* Fill in the error value */
2129 	req->error = bp->b_error;
2130 
2131 	/* Drop this one on the "finished" queue... */
2132 	TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2133 
2134 	/* Let the raidio thread know there is work to be done. */
2135 	rf_signal_cond2(queue->raidPtr->iodone_cv);
2136 
2137 	rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2138 }
2139 
2140 
2141 /*
2142  * initialize a buf structure for doing an I/O in the kernel.
2143  */
2144 static void
2145 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2146        RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2147        void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2148        struct proc *b_proc)
2149 {
2150 	/* bp->b_flags       = B_PHYS | rw_flag; */
2151 	bp->b_flags = rw_flag;	/* XXX need B_PHYS here too??? */
2152 	bp->b_oflags = 0;
2153 	bp->b_cflags = 0;
2154 	bp->b_bcount = numSect << logBytesPerSector;
2155 	bp->b_bufsize = bp->b_bcount;
2156 	bp->b_error = 0;
2157 	bp->b_dev = dev;
2158 	bp->b_data = bf;
2159 	bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2160 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
2161 	if (bp->b_bcount == 0) {
2162 		panic("bp->b_bcount is zero in InitBP!!");
2163 	}
2164 	bp->b_proc = b_proc;
2165 	bp->b_iodone = cbFunc;
2166 	bp->b_private = cbArg;
2167 }
2168 
2169 /*
2170  * Wait interruptibly for an exclusive lock.
2171  *
2172  * XXX
2173  * Several drivers do this; it should be abstracted and made MP-safe.
2174  * (Hmm... where have we seen this warning before :->  GO )
2175  */
2176 static int
2177 raidlock(struct raid_softc *rs)
2178 {
2179 	int     error;
2180 
2181 	error = 0;
2182 	mutex_enter(&rs->sc_mutex);
2183 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2184 		rs->sc_flags |= RAIDF_WANTED;
2185 		error = cv_wait_sig(&rs->sc_cv, &rs->sc_mutex);
2186 		if (error != 0)
2187 			goto done;
2188 	}
2189 	rs->sc_flags |= RAIDF_LOCKED;
2190 done:
2191 	mutex_exit(&rs->sc_mutex);
2192 	return (error);
2193 }
2194 /*
2195  * Unlock and wake up any waiters.
2196  */
2197 static void
2198 raidunlock(struct raid_softc *rs)
2199 {
2200 
2201 	mutex_enter(&rs->sc_mutex);
2202 	rs->sc_flags &= ~RAIDF_LOCKED;
2203 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2204 		rs->sc_flags &= ~RAIDF_WANTED;
2205 		cv_broadcast(&rs->sc_cv);
2206 	}
2207 	mutex_exit(&rs->sc_mutex);
2208 }
2209 
2210 
2211 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
2212 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
2213 #define RF_PARITY_MAP_SIZE   RF_PARITYMAP_NBYTE
2214 
2215 static daddr_t
2216 rf_component_info_offset(void)
2217 {
2218 
2219 	return RF_COMPONENT_INFO_OFFSET;
2220 }
2221 
2222 static daddr_t
2223 rf_component_info_size(unsigned secsize)
2224 {
2225 	daddr_t info_size;
2226 
2227 	KASSERT(secsize);
2228 	if (secsize > RF_COMPONENT_INFO_SIZE)
2229 		info_size = secsize;
2230 	else
2231 		info_size = RF_COMPONENT_INFO_SIZE;
2232 
2233 	return info_size;
2234 }
2235 
2236 static daddr_t
2237 rf_parity_map_offset(RF_Raid_t *raidPtr)
2238 {
2239 	daddr_t map_offset;
2240 
2241 	KASSERT(raidPtr->bytesPerSector);
2242 	if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2243 		map_offset = raidPtr->bytesPerSector;
2244 	else
2245 		map_offset = RF_COMPONENT_INFO_SIZE;
2246 	map_offset += rf_component_info_offset();
2247 
2248 	return map_offset;
2249 }
2250 
2251 static daddr_t
2252 rf_parity_map_size(RF_Raid_t *raidPtr)
2253 {
2254 	daddr_t map_size;
2255 
2256 	if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2257 		map_size = raidPtr->bytesPerSector;
2258 	else
2259 		map_size = RF_PARITY_MAP_SIZE;
2260 
2261 	return map_size;
2262 }
2263 
2264 int
2265 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2266 {
2267 	RF_ComponentLabel_t *clabel;
2268 
2269 	clabel = raidget_component_label(raidPtr, col);
2270 	clabel->clean = RF_RAID_CLEAN;
2271 	raidflush_component_label(raidPtr, col);
2272 	return(0);
2273 }
2274 
2275 
2276 int
2277 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2278 {
2279 	RF_ComponentLabel_t *clabel;
2280 
2281 	clabel = raidget_component_label(raidPtr, col);
2282 	clabel->clean = RF_RAID_DIRTY;
2283 	raidflush_component_label(raidPtr, col);
2284 	return(0);
2285 }
2286 
2287 int
2288 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2289 {
2290 	KASSERT(raidPtr->bytesPerSector);
2291 	return raidread_component_label(raidPtr->bytesPerSector,
2292 	    raidPtr->Disks[col].dev,
2293 	    raidPtr->raid_cinfo[col].ci_vp,
2294 	    &raidPtr->raid_cinfo[col].ci_label);
2295 }
2296 
2297 RF_ComponentLabel_t *
2298 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2299 {
2300 	return &raidPtr->raid_cinfo[col].ci_label;
2301 }
2302 
2303 int
2304 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2305 {
2306 	RF_ComponentLabel_t *label;
2307 
2308 	label = &raidPtr->raid_cinfo[col].ci_label;
2309 	label->mod_counter = raidPtr->mod_counter;
2310 #ifndef RF_NO_PARITY_MAP
2311 	label->parity_map_modcount = label->mod_counter;
2312 #endif
2313 	return raidwrite_component_label(raidPtr->bytesPerSector,
2314 	    raidPtr->Disks[col].dev,
2315 	    raidPtr->raid_cinfo[col].ci_vp, label);
2316 }
2317 
2318 
2319 static int
2320 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2321     RF_ComponentLabel_t *clabel)
2322 {
2323 	return raidread_component_area(dev, b_vp, clabel,
2324 	    sizeof(RF_ComponentLabel_t),
2325 	    rf_component_info_offset(),
2326 	    rf_component_info_size(secsize));
2327 }
2328 
2329 /* ARGSUSED */
2330 static int
2331 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2332     size_t msize, daddr_t offset, daddr_t dsize)
2333 {
2334 	struct buf *bp;
2335 	int error;
2336 
2337 	/* XXX should probably ensure that we don't try to do this if
2338 	   someone has changed rf_protected_sectors. */
2339 
2340 	if (b_vp == NULL) {
2341 		/* For whatever reason, this component is not valid.
2342 		   Don't try to read a component label from it. */
2343 		return(EINVAL);
2344 	}
2345 
2346 	/* get a block of the appropriate size... */
2347 	bp = geteblk((int)dsize);
2348 	bp->b_dev = dev;
2349 
2350 	/* get our ducks in a row for the read */
2351 	bp->b_blkno = offset / DEV_BSIZE;
2352 	bp->b_bcount = dsize;
2353 	bp->b_flags |= B_READ;
2354  	bp->b_resid = dsize;
2355 
2356 	bdev_strategy(bp);
2357 	error = biowait(bp);
2358 
2359 	if (!error) {
2360 		memcpy(data, bp->b_data, msize);
2361 	}
2362 
2363 	brelse(bp, 0);
2364 	return(error);
2365 }
2366 
2367 
2368 static int
2369 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2370     RF_ComponentLabel_t *clabel)
2371 {
2372 	return raidwrite_component_area(dev, b_vp, clabel,
2373 	    sizeof(RF_ComponentLabel_t),
2374 	    rf_component_info_offset(),
2375 	    rf_component_info_size(secsize), 0);
2376 }
2377 
2378 /* ARGSUSED */
2379 static int
2380 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2381     size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2382 {
2383 	struct buf *bp;
2384 	int error;
2385 
2386 	/* get a block of the appropriate size... */
2387 	bp = geteblk((int)dsize);
2388 	bp->b_dev = dev;
2389 
2390 	/* get our ducks in a row for the write */
2391 	bp->b_blkno = offset / DEV_BSIZE;
2392 	bp->b_bcount = dsize;
2393 	bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2394  	bp->b_resid = dsize;
2395 
2396 	memset(bp->b_data, 0, dsize);
2397 	memcpy(bp->b_data, data, msize);
2398 
2399 	bdev_strategy(bp);
2400 	if (asyncp)
2401 		return 0;
2402 	error = biowait(bp);
2403 	brelse(bp, 0);
2404 	if (error) {
2405 #if 1
2406 		printf("Failed to write RAID component info!\n");
2407 #endif
2408 	}
2409 
2410 	return(error);
2411 }
2412 
2413 void
2414 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2415 {
2416 	int c;
2417 
2418 	for (c = 0; c < raidPtr->numCol; c++) {
2419 		/* Skip dead disks. */
2420 		if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2421 			continue;
2422 		/* XXXjld: what if an error occurs here? */
2423 		raidwrite_component_area(raidPtr->Disks[c].dev,
2424 		    raidPtr->raid_cinfo[c].ci_vp, map,
2425 		    RF_PARITYMAP_NBYTE,
2426 		    rf_parity_map_offset(raidPtr),
2427 		    rf_parity_map_size(raidPtr), 0);
2428 	}
2429 }
2430 
2431 void
2432 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2433 {
2434 	struct rf_paritymap_ondisk tmp;
2435 	int c,first;
2436 
2437 	first=1;
2438 	for (c = 0; c < raidPtr->numCol; c++) {
2439 		/* Skip dead disks. */
2440 		if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2441 			continue;
2442 		raidread_component_area(raidPtr->Disks[c].dev,
2443 		    raidPtr->raid_cinfo[c].ci_vp, &tmp,
2444 		    RF_PARITYMAP_NBYTE,
2445 		    rf_parity_map_offset(raidPtr),
2446 		    rf_parity_map_size(raidPtr));
2447 		if (first) {
2448 			memcpy(map, &tmp, sizeof(*map));
2449 			first = 0;
2450 		} else {
2451 			rf_paritymap_merge(map, &tmp);
2452 		}
2453 	}
2454 }
2455 
2456 void
2457 rf_markalldirty(RF_Raid_t *raidPtr)
2458 {
2459 	RF_ComponentLabel_t *clabel;
2460 	int sparecol;
2461 	int c;
2462 	int j;
2463 	int scol = -1;
2464 
2465 	raidPtr->mod_counter++;
2466 	for (c = 0; c < raidPtr->numCol; c++) {
2467 		/* we don't want to touch (at all) a disk that has
2468 		   failed */
2469 		if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2470 			clabel = raidget_component_label(raidPtr, c);
2471 			if (clabel->status == rf_ds_spared) {
2472 				/* XXX do something special...
2473 				   but whatever you do, don't
2474 				   try to access it!! */
2475 			} else {
2476 				raidmarkdirty(raidPtr, c);
2477 			}
2478 		}
2479 	}
2480 
2481 	for( c = 0; c < raidPtr->numSpare ; c++) {
2482 		sparecol = raidPtr->numCol + c;
2483 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2484 			/*
2485 
2486 			   we claim this disk is "optimal" if it's
2487 			   rf_ds_used_spare, as that means it should be
2488 			   directly substitutable for the disk it replaced.
2489 			   We note that too...
2490 
2491 			 */
2492 
2493 			for(j=0;j<raidPtr->numCol;j++) {
2494 				if (raidPtr->Disks[j].spareCol == sparecol) {
2495 					scol = j;
2496 					break;
2497 				}
2498 			}
2499 
2500 			clabel = raidget_component_label(raidPtr, sparecol);
2501 			/* make sure status is noted */
2502 
2503 			raid_init_component_label(raidPtr, clabel);
2504 
2505 			clabel->row = 0;
2506 			clabel->column = scol;
2507 			/* Note: we *don't* change status from rf_ds_used_spare
2508 			   to rf_ds_optimal */
2509 			/* clabel.status = rf_ds_optimal; */
2510 
2511 			raidmarkdirty(raidPtr, sparecol);
2512 		}
2513 	}
2514 }
2515 
2516 
2517 void
2518 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2519 {
2520 	RF_ComponentLabel_t *clabel;
2521 	int sparecol;
2522 	int c;
2523 	int j;
2524 	int scol;
2525 	struct raid_softc *rs = raidPtr->softc;
2526 
2527 	scol = -1;
2528 
2529 	/* XXX should do extra checks to make sure things really are clean,
2530 	   rather than blindly setting the clean bit... */
2531 
2532 	raidPtr->mod_counter++;
2533 
2534 	for (c = 0; c < raidPtr->numCol; c++) {
2535 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
2536 			clabel = raidget_component_label(raidPtr, c);
2537 			/* make sure status is noted */
2538 			clabel->status = rf_ds_optimal;
2539 
2540 			/* note what unit we are configured as */
2541 			if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2542 				clabel->last_unit = raidPtr->raidid;
2543 
2544 			raidflush_component_label(raidPtr, c);
2545 			if (final == RF_FINAL_COMPONENT_UPDATE) {
2546 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
2547 					raidmarkclean(raidPtr, c);
2548 				}
2549 			}
2550 		}
2551 		/* else we don't touch it.. */
2552 	}
2553 
2554 	for( c = 0; c < raidPtr->numSpare ; c++) {
2555 		sparecol = raidPtr->numCol + c;
2556 		/* Need to ensure that the reconstruct actually completed! */
2557 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2558 			/*
2559 
2560 			   we claim this disk is "optimal" if it's
2561 			   rf_ds_used_spare, as that means it should be
2562 			   directly substitutable for the disk it replaced.
2563 			   We note that too...
2564 
2565 			 */
2566 
2567 			for(j=0;j<raidPtr->numCol;j++) {
2568 				if (raidPtr->Disks[j].spareCol == sparecol) {
2569 					scol = j;
2570 					break;
2571 				}
2572 			}
2573 
2574 			/* XXX shouldn't *really* need this... */
2575 			clabel = raidget_component_label(raidPtr, sparecol);
2576 			/* make sure status is noted */
2577 
2578 			raid_init_component_label(raidPtr, clabel);
2579 
2580 			clabel->column = scol;
2581 			clabel->status = rf_ds_optimal;
2582 			if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2583 				clabel->last_unit = raidPtr->raidid;
2584 
2585 			raidflush_component_label(raidPtr, sparecol);
2586 			if (final == RF_FINAL_COMPONENT_UPDATE) {
2587 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
2588 					raidmarkclean(raidPtr, sparecol);
2589 				}
2590 			}
2591 		}
2592 	}
2593 }
2594 
2595 void
2596 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2597 {
2598 
2599 	if (vp != NULL) {
2600 		if (auto_configured == 1) {
2601 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2602 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2603 			vput(vp);
2604 
2605 		} else {
2606 			(void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2607 		}
2608 	}
2609 }
2610 
2611 
2612 void
2613 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2614 {
2615 	int r,c;
2616 	struct vnode *vp;
2617 	int acd;
2618 
2619 
2620 	/* We take this opportunity to close the vnodes like we should.. */
2621 
2622 	for (c = 0; c < raidPtr->numCol; c++) {
2623 		vp = raidPtr->raid_cinfo[c].ci_vp;
2624 		acd = raidPtr->Disks[c].auto_configured;
2625 		rf_close_component(raidPtr, vp, acd);
2626 		raidPtr->raid_cinfo[c].ci_vp = NULL;
2627 		raidPtr->Disks[c].auto_configured = 0;
2628 	}
2629 
2630 	for (r = 0; r < raidPtr->numSpare; r++) {
2631 		vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2632 		acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2633 		rf_close_component(raidPtr, vp, acd);
2634 		raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2635 		raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2636 	}
2637 }
2638 
2639 
2640 void
2641 rf_ReconThread(struct rf_recon_req_internal *req)
2642 {
2643 	int     s;
2644 	RF_Raid_t *raidPtr;
2645 
2646 	s = splbio();
2647 	raidPtr = (RF_Raid_t *) req->raidPtr;
2648 	raidPtr->recon_in_progress = 1;
2649 
2650 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2651 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2652 
2653 	RF_Free(req, sizeof(*req));
2654 
2655 	raidPtr->recon_in_progress = 0;
2656 	splx(s);
2657 
2658 	/* That's all... */
2659 	kthread_exit(0);	/* does not return */
2660 }
2661 
2662 void
2663 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2664 {
2665 	int retcode;
2666 	int s;
2667 
2668 	raidPtr->parity_rewrite_stripes_done = 0;
2669 	raidPtr->parity_rewrite_in_progress = 1;
2670 	s = splbio();
2671 	retcode = rf_RewriteParity(raidPtr);
2672 	splx(s);
2673 	if (retcode) {
2674 		printf("raid%d: Error re-writing parity (%d)!\n",
2675 		    raidPtr->raidid, retcode);
2676 	} else {
2677 		/* set the clean bit!  If we shutdown correctly,
2678 		   the clean bit on each component label will get
2679 		   set */
2680 		raidPtr->parity_good = RF_RAID_CLEAN;
2681 	}
2682 	raidPtr->parity_rewrite_in_progress = 0;
2683 
2684 	/* Anyone waiting for us to stop?  If so, inform them... */
2685 	if (raidPtr->waitShutdown) {
2686 		wakeup(&raidPtr->parity_rewrite_in_progress);
2687 	}
2688 
2689 	/* That's all... */
2690 	kthread_exit(0);	/* does not return */
2691 }
2692 
2693 
2694 void
2695 rf_CopybackThread(RF_Raid_t *raidPtr)
2696 {
2697 	int s;
2698 
2699 	raidPtr->copyback_in_progress = 1;
2700 	s = splbio();
2701 	rf_CopybackReconstructedData(raidPtr);
2702 	splx(s);
2703 	raidPtr->copyback_in_progress = 0;
2704 
2705 	/* That's all... */
2706 	kthread_exit(0);	/* does not return */
2707 }
2708 
2709 
2710 void
2711 rf_ReconstructInPlaceThread(struct rf_recon_req_internal *req)
2712 {
2713 	int s;
2714 	RF_Raid_t *raidPtr;
2715 
2716 	s = splbio();
2717 	raidPtr = req->raidPtr;
2718 	raidPtr->recon_in_progress = 1;
2719 	rf_ReconstructInPlace(raidPtr, req->col);
2720 	RF_Free(req, sizeof(*req));
2721 	raidPtr->recon_in_progress = 0;
2722 	splx(s);
2723 
2724 	/* That's all... */
2725 	kthread_exit(0);	/* does not return */
2726 }
2727 
2728 static RF_AutoConfig_t *
2729 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
2730     const char *cname, RF_SectorCount_t size, uint64_t numsecs,
2731     unsigned secsize)
2732 {
2733 	int good_one = 0;
2734 	RF_ComponentLabel_t *clabel;
2735 	RF_AutoConfig_t *ac;
2736 
2737 	clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
2738 	if (clabel == NULL) {
2739 oomem:
2740 		    while(ac_list) {
2741 			    ac = ac_list;
2742 			    if (ac->clabel)
2743 				    free(ac->clabel, M_RAIDFRAME);
2744 			    ac_list = ac_list->next;
2745 			    free(ac, M_RAIDFRAME);
2746 		    }
2747 		    printf("RAID auto config: out of memory!\n");
2748 		    return NULL; /* XXX probably should panic? */
2749 	}
2750 
2751 	if (!raidread_component_label(secsize, dev, vp, clabel)) {
2752 		/* Got the label.  Does it look reasonable? */
2753 		if (rf_reasonable_label(clabel, numsecs) &&
2754 		    (rf_component_label_partitionsize(clabel) <= size)) {
2755 #ifdef DEBUG
2756 			printf("Component on: %s: %llu\n",
2757 				cname, (unsigned long long)size);
2758 			rf_print_component_label(clabel);
2759 #endif
2760 			/* if it's reasonable, add it, else ignore it. */
2761 			ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
2762 				M_NOWAIT);
2763 			if (ac == NULL) {
2764 				free(clabel, M_RAIDFRAME);
2765 				goto oomem;
2766 			}
2767 			strlcpy(ac->devname, cname, sizeof(ac->devname));
2768 			ac->dev = dev;
2769 			ac->vp = vp;
2770 			ac->clabel = clabel;
2771 			ac->next = ac_list;
2772 			ac_list = ac;
2773 			good_one = 1;
2774 		}
2775 	}
2776 	if (!good_one) {
2777 		/* cleanup */
2778 		free(clabel, M_RAIDFRAME);
2779 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2780 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2781 		vput(vp);
2782 	}
2783 	return ac_list;
2784 }
2785 
2786 RF_AutoConfig_t *
2787 rf_find_raid_components(void)
2788 {
2789 	struct vnode *vp;
2790 	struct disklabel label;
2791 	device_t dv;
2792 	deviter_t di;
2793 	dev_t dev;
2794 	int bmajor, bminor, wedge, rf_part_found;
2795 	int error;
2796 	int i;
2797 	RF_AutoConfig_t *ac_list;
2798 	uint64_t numsecs;
2799 	unsigned secsize;
2800 	int dowedges;
2801 
2802 	/* initialize the AutoConfig list */
2803 	ac_list = NULL;
2804 
2805 	/*
2806 	 * we begin by trolling through *all* the devices on the system *twice*
2807 	 * first we scan for wedges, second for other devices. This avoids
2808 	 * using a raw partition instead of a wedge that covers the whole disk
2809 	 */
2810 
2811 	for (dowedges=1; dowedges>=0; --dowedges) {
2812 		for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
2813 		     dv = deviter_next(&di)) {
2814 
2815 			/* we are only interested in disks... */
2816 			if (device_class(dv) != DV_DISK)
2817 				continue;
2818 
2819 			/* we don't care about floppies... */
2820 			if (device_is_a(dv, "fd")) {
2821 				continue;
2822 			}
2823 
2824 			/* we don't care about CD's... */
2825 			if (device_is_a(dv, "cd")) {
2826 				continue;
2827 			}
2828 
2829 			/* we don't care about md's... */
2830 			if (device_is_a(dv, "md")) {
2831 				continue;
2832 			}
2833 
2834 			/* hdfd is the Atari/Hades floppy driver */
2835 			if (device_is_a(dv, "hdfd")) {
2836 				continue;
2837 			}
2838 
2839 			/* fdisa is the Atari/Milan floppy driver */
2840 			if (device_is_a(dv, "fdisa")) {
2841 				continue;
2842 			}
2843 
2844 			/* are we in the wedges pass ? */
2845 			wedge = device_is_a(dv, "dk");
2846 			if (wedge != dowedges) {
2847 				continue;
2848 			}
2849 
2850 			/* need to find the device_name_to_block_device_major stuff */
2851 			bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
2852 
2853 			rf_part_found = 0; /*No raid partition as yet*/
2854 
2855 			/* get a vnode for the raw partition of this disk */
2856 			bminor = minor(device_unit(dv));
2857 			dev = wedge ? makedev(bmajor, bminor) :
2858 			    MAKEDISKDEV(bmajor, bminor, RAW_PART);
2859 			if (bdevvp(dev, &vp))
2860 				panic("RAID can't alloc vnode");
2861 
2862 			error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
2863 
2864 			if (error) {
2865 				/* "Who cares."  Continue looking
2866 				   for something that exists*/
2867 				vput(vp);
2868 				continue;
2869 			}
2870 
2871 			error = getdisksize(vp, &numsecs, &secsize);
2872 			if (error) {
2873 				/*
2874 				 * Pseudo devices like vnd and cgd can be
2875 				 * opened but may still need some configuration.
2876 				 * Ignore these quietly.
2877 				 */
2878 				if (error != ENXIO)
2879 					printf("RAIDframe: can't get disk size"
2880 					    " for dev %s (%d)\n",
2881 					    device_xname(dv), error);
2882 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2883 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2884 				vput(vp);
2885 				continue;
2886 			}
2887 			if (wedge) {
2888 				struct dkwedge_info dkw;
2889 				error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
2890 				    NOCRED);
2891 				if (error) {
2892 					printf("RAIDframe: can't get wedge info for "
2893 					    "dev %s (%d)\n", device_xname(dv), error);
2894 					vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2895 					VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2896 					vput(vp);
2897 					continue;
2898 				}
2899 
2900 				if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
2901 					vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2902 					VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2903 					vput(vp);
2904 					continue;
2905 				}
2906 
2907 				ac_list = rf_get_component(ac_list, dev, vp,
2908 				    device_xname(dv), dkw.dkw_size, numsecs, secsize);
2909 				rf_part_found = 1; /*There is a raid component on this disk*/
2910 				continue;
2911 			}
2912 
2913 			/* Ok, the disk exists.  Go get the disklabel. */
2914 			error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
2915 			if (error) {
2916 				/*
2917 				 * XXX can't happen - open() would
2918 				 * have errored out (or faked up one)
2919 				 */
2920 				if (error != ENOTTY)
2921 					printf("RAIDframe: can't get label for dev "
2922 					    "%s (%d)\n", device_xname(dv), error);
2923 			}
2924 
2925 			/* don't need this any more.  We'll allocate it again
2926 			   a little later if we really do... */
2927 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2928 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2929 			vput(vp);
2930 
2931 			if (error)
2932 				continue;
2933 
2934 			rf_part_found = 0; /*No raid partitions yet*/
2935 			for (i = 0; i < label.d_npartitions; i++) {
2936 				char cname[sizeof(ac_list->devname)];
2937 
2938 				/* We only support partitions marked as RAID */
2939 				if (label.d_partitions[i].p_fstype != FS_RAID)
2940 					continue;
2941 
2942 				dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
2943 				if (bdevvp(dev, &vp))
2944 					panic("RAID can't alloc vnode");
2945 
2946 				error = VOP_OPEN(vp, FREAD, NOCRED);
2947 				if (error) {
2948 					/* Whatever... */
2949 					vput(vp);
2950 					continue;
2951 				}
2952 				snprintf(cname, sizeof(cname), "%s%c",
2953 				    device_xname(dv), 'a' + i);
2954 				ac_list = rf_get_component(ac_list, dev, vp, cname,
2955 					label.d_partitions[i].p_size, numsecs, secsize);
2956 				rf_part_found = 1; /*There is at least one raid partition on this disk*/
2957 			}
2958 
2959 			/*
2960 			 *If there is no raid component on this disk, either in a
2961 			 *disklabel or inside a wedge, check the raw partition as well,
2962 			 *as it is possible to configure raid components on raw disk
2963 			 *devices.
2964 			 */
2965 
2966 			if (!rf_part_found) {
2967 				char cname[sizeof(ac_list->devname)];
2968 
2969 				dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
2970 				if (bdevvp(dev, &vp))
2971 					panic("RAID can't alloc vnode");
2972 
2973 				error = VOP_OPEN(vp, FREAD, NOCRED);
2974 				if (error) {
2975 					/* Whatever... */
2976 					vput(vp);
2977 					continue;
2978 				}
2979 				snprintf(cname, sizeof(cname), "%s%c",
2980 				    device_xname(dv), 'a' + RAW_PART);
2981 				ac_list = rf_get_component(ac_list, dev, vp, cname,
2982 					label.d_partitions[RAW_PART].p_size, numsecs, secsize);
2983 			}
2984 		}
2985 		deviter_release(&di);
2986 	}
2987 	return ac_list;
2988 }
2989 
2990 
2991 int
2992 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
2993 {
2994 
2995 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2996 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2997 	    ((clabel->clean == RF_RAID_CLEAN) ||
2998 	     (clabel->clean == RF_RAID_DIRTY)) &&
2999 	    clabel->row >=0 &&
3000 	    clabel->column >= 0 &&
3001 	    clabel->num_rows > 0 &&
3002 	    clabel->num_columns > 0 &&
3003 	    clabel->row < clabel->num_rows &&
3004 	    clabel->column < clabel->num_columns &&
3005 	    clabel->blockSize > 0 &&
3006 	    /*
3007 	     * numBlocksHi may contain garbage, but it is ok since
3008 	     * the type is unsigned.  If it is really garbage,
3009 	     * rf_fix_old_label_size() will fix it.
3010 	     */
3011 	    rf_component_label_numblocks(clabel) > 0) {
3012 		/*
3013 		 * label looks reasonable enough...
3014 		 * let's make sure it has no old garbage.
3015 		 */
3016 		if (numsecs)
3017 			rf_fix_old_label_size(clabel, numsecs);
3018 		return(1);
3019 	}
3020 	return(0);
3021 }
3022 
3023 
3024 /*
3025  * For reasons yet unknown, some old component labels have garbage in
3026  * the newer numBlocksHi region, and this causes lossage.  Since those
3027  * disks will also have numsecs set to less than 32 bits of sectors,
3028  * we can determine when this corruption has occurred, and fix it.
3029  *
3030  * The exact same problem, with the same unknown reason, happens to
3031  * the partitionSizeHi member as well.
3032  */
3033 static void
3034 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3035 {
3036 
3037 	if (numsecs < ((uint64_t)1 << 32)) {
3038 		if (clabel->numBlocksHi) {
3039 			printf("WARNING: total sectors < 32 bits, yet "
3040 			       "numBlocksHi set\n"
3041 			       "WARNING: resetting numBlocksHi to zero.\n");
3042 			clabel->numBlocksHi = 0;
3043 		}
3044 
3045 		if (clabel->partitionSizeHi) {
3046 			printf("WARNING: total sectors < 32 bits, yet "
3047 			       "partitionSizeHi set\n"
3048 			       "WARNING: resetting partitionSizeHi to zero.\n");
3049 			clabel->partitionSizeHi = 0;
3050 		}
3051 	}
3052 }
3053 
3054 
3055 #ifdef DEBUG
3056 void
3057 rf_print_component_label(RF_ComponentLabel_t *clabel)
3058 {
3059 	uint64_t numBlocks;
3060 	static const char *rp[] = {
3061 	    "No", "Force", "Soft", "*invalid*"
3062 	};
3063 
3064 
3065 	numBlocks = rf_component_label_numblocks(clabel);
3066 
3067 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3068 	       clabel->row, clabel->column,
3069 	       clabel->num_rows, clabel->num_columns);
3070 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
3071 	       clabel->version, clabel->serial_number,
3072 	       clabel->mod_counter);
3073 	printf("   Clean: %s Status: %d\n",
3074 	       clabel->clean ? "Yes" : "No", clabel->status);
3075 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3076 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3077 	printf("   RAID Level: %c  blocksize: %d numBlocks: %"PRIu64"\n",
3078 	       (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3079 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3080 	printf("   Root partition: %s\n", rp[clabel->root_partition & 3]);
3081 	printf("   Last configured as: raid%d\n", clabel->last_unit);
3082 #if 0
3083 	   printf("   Config order: %d\n", clabel->config_order);
3084 #endif
3085 
3086 }
3087 #endif
3088 
3089 RF_ConfigSet_t *
3090 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3091 {
3092 	RF_AutoConfig_t *ac;
3093 	RF_ConfigSet_t *config_sets;
3094 	RF_ConfigSet_t *cset;
3095 	RF_AutoConfig_t *ac_next;
3096 
3097 
3098 	config_sets = NULL;
3099 
3100 	/* Go through the AutoConfig list, and figure out which components
3101 	   belong to what sets.  */
3102 	ac = ac_list;
3103 	while(ac!=NULL) {
3104 		/* we're going to putz with ac->next, so save it here
3105 		   for use at the end of the loop */
3106 		ac_next = ac->next;
3107 
3108 		if (config_sets == NULL) {
3109 			/* will need at least this one... */
3110 			config_sets = (RF_ConfigSet_t *)
3111 				malloc(sizeof(RF_ConfigSet_t),
3112 				       M_RAIDFRAME, M_NOWAIT);
3113 			if (config_sets == NULL) {
3114 				panic("rf_create_auto_sets: No memory!");
3115 			}
3116 			/* this one is easy :) */
3117 			config_sets->ac = ac;
3118 			config_sets->next = NULL;
3119 			config_sets->rootable = 0;
3120 			ac->next = NULL;
3121 		} else {
3122 			/* which set does this component fit into? */
3123 			cset = config_sets;
3124 			while(cset!=NULL) {
3125 				if (rf_does_it_fit(cset, ac)) {
3126 					/* looks like it matches... */
3127 					ac->next = cset->ac;
3128 					cset->ac = ac;
3129 					break;
3130 				}
3131 				cset = cset->next;
3132 			}
3133 			if (cset==NULL) {
3134 				/* didn't find a match above... new set..*/
3135 				cset = (RF_ConfigSet_t *)
3136 					malloc(sizeof(RF_ConfigSet_t),
3137 					       M_RAIDFRAME, M_NOWAIT);
3138 				if (cset == NULL) {
3139 					panic("rf_create_auto_sets: No memory!");
3140 				}
3141 				cset->ac = ac;
3142 				ac->next = NULL;
3143 				cset->next = config_sets;
3144 				cset->rootable = 0;
3145 				config_sets = cset;
3146 			}
3147 		}
3148 		ac = ac_next;
3149 	}
3150 
3151 
3152 	return(config_sets);
3153 }
3154 
3155 static int
3156 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3157 {
3158 	RF_ComponentLabel_t *clabel1, *clabel2;
3159 
3160 	/* If this one matches the *first* one in the set, that's good
3161 	   enough, since the other members of the set would have been
3162 	   through here too... */
3163 	/* note that we are not checking partitionSize here..
3164 
3165 	   Note that we are also not checking the mod_counters here.
3166 	   If everything else matches except the mod_counter, that's
3167 	   good enough for this test.  We will deal with the mod_counters
3168 	   a little later in the autoconfiguration process.
3169 
3170 	    (clabel1->mod_counter == clabel2->mod_counter) &&
3171 
3172 	   The reason we don't check for this is that failed disks
3173 	   will have lower modification counts.  If those disks are
3174 	   not added to the set they used to belong to, then they will
3175 	   form their own set, which may result in 2 different sets,
3176 	   for example, competing to be configured at raid0, and
3177 	   perhaps competing to be the root filesystem set.  If the
3178 	   wrong ones get configured, or both attempt to become /,
3179 	   weird behaviour and or serious lossage will occur.  Thus we
3180 	   need to bring them into the fold here, and kick them out at
3181 	   a later point.
3182 
3183 	*/
3184 
3185 	clabel1 = cset->ac->clabel;
3186 	clabel2 = ac->clabel;
3187 	if ((clabel1->version == clabel2->version) &&
3188 	    (clabel1->serial_number == clabel2->serial_number) &&
3189 	    (clabel1->num_rows == clabel2->num_rows) &&
3190 	    (clabel1->num_columns == clabel2->num_columns) &&
3191 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
3192 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3193 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3194 	    (clabel1->parityConfig == clabel2->parityConfig) &&
3195 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3196 	    (clabel1->blockSize == clabel2->blockSize) &&
3197 	    rf_component_label_numblocks(clabel1) ==
3198 	    rf_component_label_numblocks(clabel2) &&
3199 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
3200 	    (clabel1->root_partition == clabel2->root_partition) &&
3201 	    (clabel1->last_unit == clabel2->last_unit) &&
3202 	    (clabel1->config_order == clabel2->config_order)) {
3203 		/* if it get's here, it almost *has* to be a match */
3204 	} else {
3205 		/* it's not consistent with somebody in the set..
3206 		   punt */
3207 		return(0);
3208 	}
3209 	/* all was fine.. it must fit... */
3210 	return(1);
3211 }
3212 
3213 int
3214 rf_have_enough_components(RF_ConfigSet_t *cset)
3215 {
3216 	RF_AutoConfig_t *ac;
3217 	RF_AutoConfig_t *auto_config;
3218 	RF_ComponentLabel_t *clabel;
3219 	int c;
3220 	int num_cols;
3221 	int num_missing;
3222 	int mod_counter;
3223 	int mod_counter_found;
3224 	int even_pair_failed;
3225 	char parity_type;
3226 
3227 
3228 	/* check to see that we have enough 'live' components
3229 	   of this set.  If so, we can configure it if necessary */
3230 
3231 	num_cols = cset->ac->clabel->num_columns;
3232 	parity_type = cset->ac->clabel->parityConfig;
3233 
3234 	/* XXX Check for duplicate components!?!?!? */
3235 
3236 	/* Determine what the mod_counter is supposed to be for this set. */
3237 
3238 	mod_counter_found = 0;
3239 	mod_counter = 0;
3240 	ac = cset->ac;
3241 	while(ac!=NULL) {
3242 		if (mod_counter_found==0) {
3243 			mod_counter = ac->clabel->mod_counter;
3244 			mod_counter_found = 1;
3245 		} else {
3246 			if (ac->clabel->mod_counter > mod_counter) {
3247 				mod_counter = ac->clabel->mod_counter;
3248 			}
3249 		}
3250 		ac = ac->next;
3251 	}
3252 
3253 	num_missing = 0;
3254 	auto_config = cset->ac;
3255 
3256 	even_pair_failed = 0;
3257 	for(c=0; c<num_cols; c++) {
3258 		ac = auto_config;
3259 		while(ac!=NULL) {
3260 			if ((ac->clabel->column == c) &&
3261 			    (ac->clabel->mod_counter == mod_counter)) {
3262 				/* it's this one... */
3263 #ifdef DEBUG
3264 				printf("Found: %s at %d\n",
3265 				       ac->devname,c);
3266 #endif
3267 				break;
3268 			}
3269 			ac=ac->next;
3270 		}
3271 		if (ac==NULL) {
3272 				/* Didn't find one here! */
3273 				/* special case for RAID 1, especially
3274 				   where there are more than 2
3275 				   components (where RAIDframe treats
3276 				   things a little differently :( ) */
3277 			if (parity_type == '1') {
3278 				if (c%2 == 0) { /* even component */
3279 					even_pair_failed = 1;
3280 				} else { /* odd component.  If
3281 					    we're failed, and
3282 					    so is the even
3283 					    component, it's
3284 					    "Good Night, Charlie" */
3285 					if (even_pair_failed == 1) {
3286 						return(0);
3287 					}
3288 				}
3289 			} else {
3290 				/* normal accounting */
3291 				num_missing++;
3292 			}
3293 		}
3294 		if ((parity_type == '1') && (c%2 == 1)) {
3295 				/* Just did an even component, and we didn't
3296 				   bail.. reset the even_pair_failed flag,
3297 				   and go on to the next component.... */
3298 			even_pair_failed = 0;
3299 		}
3300 	}
3301 
3302 	clabel = cset->ac->clabel;
3303 
3304 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3305 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3306 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
3307 		/* XXX this needs to be made *much* more general */
3308 		/* Too many failures */
3309 		return(0);
3310 	}
3311 	/* otherwise, all is well, and we've got enough to take a kick
3312 	   at autoconfiguring this set */
3313 	return(1);
3314 }
3315 
3316 void
3317 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3318 			RF_Raid_t *raidPtr)
3319 {
3320 	RF_ComponentLabel_t *clabel;
3321 	int i;
3322 
3323 	clabel = ac->clabel;
3324 
3325 	/* 1. Fill in the common stuff */
3326 	config->numCol = clabel->num_columns;
3327 	config->numSpare = 0; /* XXX should this be set here? */
3328 	config->sectPerSU = clabel->sectPerSU;
3329 	config->SUsPerPU = clabel->SUsPerPU;
3330 	config->SUsPerRU = clabel->SUsPerRU;
3331 	config->parityConfig = clabel->parityConfig;
3332 	/* XXX... */
3333 	strcpy(config->diskQueueType,"fifo");
3334 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3335 	config->layoutSpecificSize = 0; /* XXX ?? */
3336 
3337 	while(ac!=NULL) {
3338 		/* row/col values will be in range due to the checks
3339 		   in reasonable_label() */
3340 		strcpy(config->devnames[0][ac->clabel->column],
3341 		       ac->devname);
3342 		ac = ac->next;
3343 	}
3344 
3345 	for(i=0;i<RF_MAXDBGV;i++) {
3346 		config->debugVars[i][0] = 0;
3347 	}
3348 }
3349 
3350 int
3351 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3352 {
3353 	RF_ComponentLabel_t *clabel;
3354 	int column;
3355 	int sparecol;
3356 
3357 	raidPtr->autoconfigure = new_value;
3358 
3359 	for(column=0; column<raidPtr->numCol; column++) {
3360 		if (raidPtr->Disks[column].status == rf_ds_optimal) {
3361 			clabel = raidget_component_label(raidPtr, column);
3362 			clabel->autoconfigure = new_value;
3363 			raidflush_component_label(raidPtr, column);
3364 		}
3365 	}
3366 	for(column = 0; column < raidPtr->numSpare ; column++) {
3367 		sparecol = raidPtr->numCol + column;
3368 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3369 			clabel = raidget_component_label(raidPtr, sparecol);
3370 			clabel->autoconfigure = new_value;
3371 			raidflush_component_label(raidPtr, sparecol);
3372 		}
3373 	}
3374 	return(new_value);
3375 }
3376 
3377 int
3378 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3379 {
3380 	RF_ComponentLabel_t *clabel;
3381 	int column;
3382 	int sparecol;
3383 
3384 	raidPtr->root_partition = new_value;
3385 	for(column=0; column<raidPtr->numCol; column++) {
3386 		if (raidPtr->Disks[column].status == rf_ds_optimal) {
3387 			clabel = raidget_component_label(raidPtr, column);
3388 			clabel->root_partition = new_value;
3389 			raidflush_component_label(raidPtr, column);
3390 		}
3391 	}
3392 	for(column = 0; column < raidPtr->numSpare ; column++) {
3393 		sparecol = raidPtr->numCol + column;
3394 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3395 			clabel = raidget_component_label(raidPtr, sparecol);
3396 			clabel->root_partition = new_value;
3397 			raidflush_component_label(raidPtr, sparecol);
3398 		}
3399 	}
3400 	return(new_value);
3401 }
3402 
3403 void
3404 rf_release_all_vps(RF_ConfigSet_t *cset)
3405 {
3406 	RF_AutoConfig_t *ac;
3407 
3408 	ac = cset->ac;
3409 	while(ac!=NULL) {
3410 		/* Close the vp, and give it back */
3411 		if (ac->vp) {
3412 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3413 			VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
3414 			vput(ac->vp);
3415 			ac->vp = NULL;
3416 		}
3417 		ac = ac->next;
3418 	}
3419 }
3420 
3421 
3422 void
3423 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3424 {
3425 	RF_AutoConfig_t *ac;
3426 	RF_AutoConfig_t *next_ac;
3427 
3428 	ac = cset->ac;
3429 	while(ac!=NULL) {
3430 		next_ac = ac->next;
3431 		/* nuke the label */
3432 		free(ac->clabel, M_RAIDFRAME);
3433 		/* cleanup the config structure */
3434 		free(ac, M_RAIDFRAME);
3435 		/* "next.." */
3436 		ac = next_ac;
3437 	}
3438 	/* and, finally, nuke the config set */
3439 	free(cset, M_RAIDFRAME);
3440 }
3441 
3442 
3443 void
3444 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3445 {
3446 	/* current version number */
3447 	clabel->version = RF_COMPONENT_LABEL_VERSION;
3448 	clabel->serial_number = raidPtr->serial_number;
3449 	clabel->mod_counter = raidPtr->mod_counter;
3450 
3451 	clabel->num_rows = 1;
3452 	clabel->num_columns = raidPtr->numCol;
3453 	clabel->clean = RF_RAID_DIRTY; /* not clean */
3454 	clabel->status = rf_ds_optimal; /* "It's good!" */
3455 
3456 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3457 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3458 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3459 
3460 	clabel->blockSize = raidPtr->bytesPerSector;
3461 	rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3462 
3463 	/* XXX not portable */
3464 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3465 	clabel->maxOutstanding = raidPtr->maxOutstanding;
3466 	clabel->autoconfigure = raidPtr->autoconfigure;
3467 	clabel->root_partition = raidPtr->root_partition;
3468 	clabel->last_unit = raidPtr->raidid;
3469 	clabel->config_order = raidPtr->config_order;
3470 
3471 #ifndef RF_NO_PARITY_MAP
3472 	rf_paritymap_init_label(raidPtr->parity_map, clabel);
3473 #endif
3474 }
3475 
3476 struct raid_softc *
3477 rf_auto_config_set(RF_ConfigSet_t *cset)
3478 {
3479 	RF_Raid_t *raidPtr;
3480 	RF_Config_t *config;
3481 	int raidID;
3482 	struct raid_softc *sc;
3483 
3484 #ifdef DEBUG
3485 	printf("RAID autoconfigure\n");
3486 #endif
3487 
3488 	/* 1. Create a config structure */
3489 	config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO);
3490 	if (config == NULL) {
3491 		printf("%s: Out of mem - config!?!?\n", __func__);
3492 				/* XXX do something more intelligent here. */
3493 		return NULL;
3494 	}
3495 
3496 	/*
3497 	   2. Figure out what RAID ID this one is supposed to live at
3498 	   See if we can get the same RAID dev that it was configured
3499 	   on last time..
3500 	*/
3501 
3502 	raidID = cset->ac->clabel->last_unit;
3503 	for (sc = raidget(raidID, false); sc && sc->sc_r.valid != 0;
3504 	     sc = raidget(++raidID, false))
3505 		continue;
3506 #ifdef DEBUG
3507 	printf("Configuring raid%d:\n",raidID);
3508 #endif
3509 
3510 	if (sc == NULL)
3511 		sc = raidget(raidID, true);
3512 	if (sc == NULL) {
3513 		printf("%s: Out of mem - softc!?!?\n", __func__);
3514 				/* XXX do something more intelligent here. */
3515 		free(config, M_RAIDFRAME);
3516 		return NULL;
3517 	}
3518 
3519 	raidPtr = &sc->sc_r;
3520 
3521 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
3522 	raidPtr->softc = sc;
3523 	raidPtr->raidid = raidID;
3524 	raidPtr->openings = RAIDOUTSTANDING;
3525 
3526 	/* 3. Build the configuration structure */
3527 	rf_create_configuration(cset->ac, config, raidPtr);
3528 
3529 	/* 4. Do the configuration */
3530 	if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3531 		raidinit(sc);
3532 
3533 		rf_markalldirty(raidPtr);
3534 		raidPtr->autoconfigure = 1; /* XXX do this here? */
3535 		switch (cset->ac->clabel->root_partition) {
3536 		case 1:	/* Force Root */
3537 		case 2:	/* Soft Root: root when boot partition part of raid */
3538 			/*
3539 			 * everything configured just fine.  Make a note
3540 			 * that this set is eligible to be root,
3541 			 * or forced to be root
3542 			 */
3543 			cset->rootable = cset->ac->clabel->root_partition;
3544 			/* XXX do this here? */
3545 			raidPtr->root_partition = cset->rootable;
3546 			break;
3547 		default:
3548 			break;
3549 		}
3550 	} else {
3551 		raidput(sc);
3552 		sc = NULL;
3553 	}
3554 
3555 	/* 5. Cleanup */
3556 	free(config, M_RAIDFRAME);
3557 	return sc;
3558 }
3559 
3560 void
3561 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3562 	     size_t xmin, size_t xmax)
3563 {
3564 	int error;
3565 
3566 	pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3567 	pool_sethiwat(p, xmax);
3568 	if ((error = pool_prime(p, xmin)) != 0)
3569 		panic("%s: failed to prime pool: %d", __func__, error);
3570 	pool_setlowat(p, xmin);
3571 }
3572 
3573 /*
3574  * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buffer queue
3575  * to see if there is IO pending and if that IO could possibly be done
3576  * for a given RAID set.  Returns 0 if IO is waiting and can be done, 1
3577  * otherwise.
3578  *
3579  */
3580 int
3581 rf_buf_queue_check(RF_Raid_t *raidPtr)
3582 {
3583 	struct raid_softc *rs;
3584 	struct dk_softc *dksc;
3585 
3586 	rs = raidPtr->softc;
3587 	dksc = &rs->sc_dksc;
3588 
3589 	if ((rs->sc_flags & RAIDF_INITED) == 0)
3590 		return 1;
3591 
3592 	if (dk_strategy_pending(dksc) && raidPtr->openings > 0) {
3593 		/* there is work to do */
3594 		return 0;
3595 	}
3596 	/* default is nothing to do */
3597 	return 1;
3598 }
3599 
3600 int
3601 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3602 {
3603 	uint64_t numsecs;
3604 	unsigned secsize;
3605 	int error;
3606 
3607 	error = getdisksize(vp, &numsecs, &secsize);
3608 	if (error == 0) {
3609 		diskPtr->blockSize = secsize;
3610 		diskPtr->numBlocks = numsecs - rf_protectedSectors;
3611 		diskPtr->partitionSize = numsecs;
3612 		return 0;
3613 	}
3614 	return error;
3615 }
3616 
3617 static int
3618 raid_match(device_t self, cfdata_t cfdata, void *aux)
3619 {
3620 	return 1;
3621 }
3622 
3623 static void
3624 raid_attach(device_t parent, device_t self, void *aux)
3625 {
3626 }
3627 
3628 
3629 static int
3630 raid_detach(device_t self, int flags)
3631 {
3632 	int error;
3633 	struct raid_softc *rs = raidsoftc(self);
3634 
3635 	if (rs == NULL)
3636 		return ENXIO;
3637 
3638 	if ((error = raidlock(rs)) != 0)
3639 		return (error);
3640 
3641 	error = raid_detach_unlocked(rs);
3642 
3643 	raidunlock(rs);
3644 
3645 	/* XXX raid can be referenced here */
3646 
3647 	if (error)
3648 		return error;
3649 
3650 	/* Free the softc */
3651 	raidput(rs);
3652 
3653 	return 0;
3654 }
3655 
3656 static void
3657 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
3658 {
3659 	struct dk_softc *dksc = &rs->sc_dksc;
3660 	struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
3661 
3662 	memset(dg, 0, sizeof(*dg));
3663 
3664 	dg->dg_secperunit = raidPtr->totalSectors;
3665 	dg->dg_secsize = raidPtr->bytesPerSector;
3666 	dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
3667 	dg->dg_ntracks = 4 * raidPtr->numCol;
3668 
3669 	disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, NULL);
3670 }
3671 
3672 /*
3673  * Get cache info for all the components (including spares).
3674  * Returns intersection of all the cache flags of all disks, or first
3675  * error if any encountered.
3676  * XXXfua feature flags can change as spares are added - lock down somehow
3677  */
3678 static int
3679 rf_get_component_caches(RF_Raid_t *raidPtr, int *data)
3680 {
3681 	int c;
3682 	int error;
3683 	int dkwhole = 0, dkpart;
3684 
3685 	for (c = 0; c < raidPtr->numCol + raidPtr->numSpare; c++) {
3686 		/*
3687 		 * Check any non-dead disk, even when currently being
3688 		 * reconstructed.
3689 		 */
3690 		if (!RF_DEAD_DISK(raidPtr->Disks[c].status)
3691 		    || raidPtr->Disks[c].status == rf_ds_reconstructing) {
3692 			error = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp,
3693 			    DIOCGCACHE, &dkpart, FREAD, NOCRED);
3694 			if (error) {
3695 				if (error != ENODEV) {
3696 					printf("raid%d: get cache for component %s failed\n",
3697 					    raidPtr->raidid,
3698 					    raidPtr->Disks[c].devname);
3699 				}
3700 
3701 				return error;
3702 			}
3703 
3704 			if (c == 0)
3705 				dkwhole = dkpart;
3706 			else
3707 				dkwhole = DKCACHE_COMBINE(dkwhole, dkpart);
3708 		}
3709 	}
3710 
3711 	*data = dkwhole;
3712 
3713 	return 0;
3714 }
3715 
3716 /*
3717  * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3718  * We end up returning whatever error was returned by the first cache flush
3719  * that fails.
3720  */
3721 
3722 int
3723 rf_sync_component_caches(RF_Raid_t *raidPtr)
3724 {
3725 	int c, sparecol;
3726 	int e,error;
3727 	int force = 1;
3728 
3729 	error = 0;
3730 	for (c = 0; c < raidPtr->numCol; c++) {
3731 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
3732 			e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3733 					  &force, FWRITE, NOCRED);
3734 			if (e) {
3735 				if (e != ENODEV)
3736 					printf("raid%d: cache flush to component %s failed.\n",
3737 					       raidPtr->raidid, raidPtr->Disks[c].devname);
3738 				if (error == 0) {
3739 					error = e;
3740 				}
3741 			}
3742 		}
3743 	}
3744 
3745 	for( c = 0; c < raidPtr->numSpare ; c++) {
3746 		sparecol = raidPtr->numCol + c;
3747 		/* Need to ensure that the reconstruct actually completed! */
3748 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3749 			e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3750 					  DIOCCACHESYNC, &force, FWRITE, NOCRED);
3751 			if (e) {
3752 				if (e != ENODEV)
3753 					printf("raid%d: cache flush to component %s failed.\n",
3754 					       raidPtr->raidid, raidPtr->Disks[sparecol].devname);
3755 				if (error == 0) {
3756 					error = e;
3757 				}
3758 			}
3759 		}
3760 	}
3761 	return error;
3762 }
3763 
3764 /* Fill in info with the current status */
3765 void
3766 rf_check_recon_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3767 {
3768 
3769 	if (raidPtr->status != rf_rs_reconstructing) {
3770 		info->total = 100;
3771 		info->completed = 100;
3772 	} else {
3773 		info->total = raidPtr->reconControl->numRUsTotal;
3774 		info->completed = raidPtr->reconControl->numRUsComplete;
3775 	}
3776 	info->remaining = info->total - info->completed;
3777 }
3778 
3779 /* Fill in info with the current status */
3780 void
3781 rf_check_parityrewrite_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3782 {
3783 
3784 	if (raidPtr->parity_rewrite_in_progress == 1) {
3785 		info->total = raidPtr->Layout.numStripe;
3786 		info->completed = raidPtr->parity_rewrite_stripes_done;
3787 	} else {
3788 		info->completed = 100;
3789 		info->total = 100;
3790 	}
3791 	info->remaining = info->total - info->completed;
3792 }
3793 
3794 /* Fill in info with the current status */
3795 void
3796 rf_check_copyback_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3797 {
3798 
3799 	if (raidPtr->copyback_in_progress == 1) {
3800 		info->total = raidPtr->Layout.numStripe;
3801 		info->completed = raidPtr->copyback_stripes_done;
3802 		info->remaining = info->total - info->completed;
3803 	} else {
3804 		info->remaining = 0;
3805 		info->completed = 100;
3806 		info->total = 100;
3807 	}
3808 }
3809 
3810 /* Fill in config with the current info */
3811 int
3812 rf_get_info(RF_Raid_t *raidPtr, RF_DeviceConfig_t *config)
3813 {
3814 	int	d, i, j;
3815 
3816 	if (!raidPtr->valid)
3817 		return (ENODEV);
3818 	config->cols = raidPtr->numCol;
3819 	config->ndevs = raidPtr->numCol;
3820 	if (config->ndevs >= RF_MAX_DISKS)
3821 		return (ENOMEM);
3822 	config->nspares = raidPtr->numSpare;
3823 	if (config->nspares >= RF_MAX_DISKS)
3824 		return (ENOMEM);
3825 	config->maxqdepth = raidPtr->maxQueueDepth;
3826 	d = 0;
3827 	for (j = 0; j < config->cols; j++) {
3828 		config->devs[d] = raidPtr->Disks[j];
3829 		d++;
3830 	}
3831 	for (j = config->cols, i = 0; i < config->nspares; i++, j++) {
3832 		config->spares[i] = raidPtr->Disks[j];
3833 		if (config->spares[i].status == rf_ds_rebuilding_spare) {
3834 			/* XXX: raidctl(8) expects to see this as a used spare */
3835 			config->spares[i].status = rf_ds_used_spare;
3836 		}
3837 	}
3838 	return 0;
3839 }
3840 
3841 int
3842 rf_get_component_label(RF_Raid_t *raidPtr, void *data)
3843 {
3844 	RF_ComponentLabel_t *clabel = (RF_ComponentLabel_t *)data;
3845 	RF_ComponentLabel_t *raid_clabel;
3846 	int column = clabel->column;
3847 
3848 	if ((column < 0) || (column >= raidPtr->numCol + raidPtr->numSpare))
3849 		return EINVAL;
3850 	raid_clabel = raidget_component_label(raidPtr, column);
3851 	memcpy(clabel, raid_clabel, sizeof *clabel);
3852 
3853 	return 0;
3854 }
3855 
3856 /*
3857  * Module interface
3858  */
3859 
3860 MODULE(MODULE_CLASS_DRIVER, raid, "dk_subr,bufq_fcfs");
3861 
3862 #ifdef _MODULE
3863 CFDRIVER_DECL(raid, DV_DISK, NULL);
3864 #endif
3865 
3866 static int raid_modcmd(modcmd_t, void *);
3867 static int raid_modcmd_init(void);
3868 static int raid_modcmd_fini(void);
3869 
3870 static int
3871 raid_modcmd(modcmd_t cmd, void *data)
3872 {
3873 	int error;
3874 
3875 	error = 0;
3876 	switch (cmd) {
3877 	case MODULE_CMD_INIT:
3878 		error = raid_modcmd_init();
3879 		break;
3880 	case MODULE_CMD_FINI:
3881 		error = raid_modcmd_fini();
3882 		break;
3883 	default:
3884 		error = ENOTTY;
3885 		break;
3886 	}
3887 	return error;
3888 }
3889 
3890 static int
3891 raid_modcmd_init(void)
3892 {
3893 	int error;
3894 	int bmajor, cmajor;
3895 
3896 	mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
3897 	mutex_enter(&raid_lock);
3898 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
3899 	rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
3900 	rf_init_cond2(rf_sparet_wait_cv, "sparetw");
3901 	rf_init_cond2(rf_sparet_resp_cv, "rfgst");
3902 
3903 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
3904 #endif
3905 
3906 	bmajor = cmajor = -1;
3907 	error = devsw_attach("raid", &raid_bdevsw, &bmajor,
3908 	    &raid_cdevsw, &cmajor);
3909 	if (error != 0 && error != EEXIST) {
3910 		aprint_error("%s: devsw_attach failed %d\n", __func__, error);
3911 		mutex_exit(&raid_lock);
3912 		return error;
3913 	}
3914 #ifdef _MODULE
3915 	error = config_cfdriver_attach(&raid_cd);
3916 	if (error != 0) {
3917 		aprint_error("%s: config_cfdriver_attach failed %d\n",
3918 		    __func__, error);
3919 		devsw_detach(&raid_bdevsw, &raid_cdevsw);
3920 		mutex_exit(&raid_lock);
3921 		return error;
3922 	}
3923 #endif
3924 	error = config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3925 	if (error != 0) {
3926 		aprint_error("%s: config_cfattach_attach failed %d\n",
3927 		    __func__, error);
3928 #ifdef _MODULE
3929 		config_cfdriver_detach(&raid_cd);
3930 #endif
3931 		devsw_detach(&raid_bdevsw, &raid_cdevsw);
3932 		mutex_exit(&raid_lock);
3933 		return error;
3934 	}
3935 
3936 	raidautoconfigdone = false;
3937 
3938 	mutex_exit(&raid_lock);
3939 
3940 	if (error == 0) {
3941 		if (rf_BootRaidframe(true) == 0)
3942 			aprint_verbose("Kernelized RAIDframe activated\n");
3943 		else
3944 			panic("Serious error activating RAID!!");
3945 	}
3946 
3947 	/*
3948 	 * Register a finalizer which will be used to auto-config RAID
3949 	 * sets once all real hardware devices have been found.
3950 	 */
3951 	error = config_finalize_register(NULL, rf_autoconfig);
3952 	if (error != 0) {
3953 		aprint_error("WARNING: unable to register RAIDframe "
3954 		    "finalizer\n");
3955 		error = 0;
3956 	}
3957 
3958 	return error;
3959 }
3960 
3961 static int
3962 raid_modcmd_fini(void)
3963 {
3964 	int error;
3965 
3966 	mutex_enter(&raid_lock);
3967 
3968 	/* Don't allow unload if raid device(s) exist.  */
3969 	if (!LIST_EMPTY(&raids)) {
3970 		mutex_exit(&raid_lock);
3971 		return EBUSY;
3972 	}
3973 
3974 	error = config_cfattach_detach(raid_cd.cd_name, &raid_ca);
3975 	if (error != 0) {
3976 		aprint_error("%s: cannot detach cfattach\n",__func__);
3977 		mutex_exit(&raid_lock);
3978 		return error;
3979 	}
3980 #ifdef _MODULE
3981 	error = config_cfdriver_detach(&raid_cd);
3982 	if (error != 0) {
3983 		aprint_error("%s: cannot detach cfdriver\n",__func__);
3984 		config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3985 		mutex_exit(&raid_lock);
3986 		return error;
3987 	}
3988 #endif
3989 	error = devsw_detach(&raid_bdevsw, &raid_cdevsw);
3990 	if (error != 0) {
3991 		aprint_error("%s: cannot detach devsw\n",__func__);
3992 #ifdef _MODULE
3993 		config_cfdriver_attach(&raid_cd);
3994 #endif
3995 		config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3996 		mutex_exit(&raid_lock);
3997 		return error;
3998 	}
3999 	rf_BootRaidframe(false);
4000 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
4001 	rf_destroy_mutex2(rf_sparet_wait_mutex);
4002 	rf_destroy_cond2(rf_sparet_wait_cv);
4003 	rf_destroy_cond2(rf_sparet_resp_cv);
4004 #endif
4005 	mutex_exit(&raid_lock);
4006 	mutex_destroy(&raid_lock);
4007 
4008 	return error;
4009 }
4010