xref: /netbsd-src/sys/dev/raidframe/rf_netbsdkintf.c (revision 6a493d6bc668897c91594964a732d38505b70cbb)
1 /*	$NetBSD: rf_netbsdkintf.c,v 1.304 2013/05/29 00:47:49 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Greg Oster; Jason R. Thorpe.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * Copyright (c) 1988 University of Utah.
34  * Copyright (c) 1990, 1993
35  *      The Regents of the University of California.  All rights reserved.
36  *
37  * This code is derived from software contributed to Berkeley by
38  * the Systems Programming Group of the University of Utah Computer
39  * Science Department.
40  *
41  * Redistribution and use in source and binary forms, with or without
42  * modification, are permitted provided that the following conditions
43  * are met:
44  * 1. Redistributions of source code must retain the above copyright
45  *    notice, this list of conditions and the following disclaimer.
46  * 2. Redistributions in binary form must reproduce the above copyright
47  *    notice, this list of conditions and the following disclaimer in the
48  *    documentation and/or other materials provided with the distribution.
49  * 3. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  * from: Utah $Hdr: cd.c 1.6 90/11/28$
66  *
67  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
68  */
69 
70 /*
71  * Copyright (c) 1995 Carnegie-Mellon University.
72  * All rights reserved.
73  *
74  * Authors: Mark Holland, Jim Zelenka
75  *
76  * Permission to use, copy, modify and distribute this software and
77  * its documentation is hereby granted, provided that both the copyright
78  * notice and this permission notice appear in all copies of the
79  * software, derivative works or modified versions, and any portions
80  * thereof, and that both notices appear in supporting documentation.
81  *
82  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85  *
86  * Carnegie Mellon requests users of this software to return to
87  *
88  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
89  *  School of Computer Science
90  *  Carnegie Mellon University
91  *  Pittsburgh PA 15213-3890
92  *
93  * any improvements or extensions that they make and grant Carnegie the
94  * rights to redistribute these changes.
95  */
96 
97 /***********************************************************
98  *
99  * rf_kintf.c -- the kernel interface routines for RAIDframe
100  *
101  ***********************************************************/
102 
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.304 2013/05/29 00:47:49 christos Exp $");
105 
106 #ifdef _KERNEL_OPT
107 #include "opt_compat_netbsd.h"
108 #include "opt_raid_autoconfig.h"
109 #endif
110 
111 #include <sys/param.h>
112 #include <sys/errno.h>
113 #include <sys/pool.h>
114 #include <sys/proc.h>
115 #include <sys/queue.h>
116 #include <sys/disk.h>
117 #include <sys/device.h>
118 #include <sys/stat.h>
119 #include <sys/ioctl.h>
120 #include <sys/fcntl.h>
121 #include <sys/systm.h>
122 #include <sys/vnode.h>
123 #include <sys/disklabel.h>
124 #include <sys/conf.h>
125 #include <sys/buf.h>
126 #include <sys/bufq.h>
127 #include <sys/reboot.h>
128 #include <sys/kauth.h>
129 
130 #include <prop/proplib.h>
131 
132 #include <dev/raidframe/raidframevar.h>
133 #include <dev/raidframe/raidframeio.h>
134 #include <dev/raidframe/rf_paritymap.h>
135 
136 #include "rf_raid.h"
137 #include "rf_copyback.h"
138 #include "rf_dag.h"
139 #include "rf_dagflags.h"
140 #include "rf_desc.h"
141 #include "rf_diskqueue.h"
142 #include "rf_etimer.h"
143 #include "rf_general.h"
144 #include "rf_kintf.h"
145 #include "rf_options.h"
146 #include "rf_driver.h"
147 #include "rf_parityscan.h"
148 #include "rf_threadstuff.h"
149 
150 #ifdef COMPAT_50
151 #include "rf_compat50.h"
152 #endif
153 
154 #ifdef DEBUG
155 int     rf_kdebug_level = 0;
156 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
157 #else				/* DEBUG */
158 #define db1_printf(a) { }
159 #endif				/* DEBUG */
160 
161 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
162 static rf_declare_mutex2(rf_sparet_wait_mutex);
163 static rf_declare_cond2(rf_sparet_wait_cv);
164 static rf_declare_cond2(rf_sparet_resp_cv);
165 
166 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
167 						 * spare table */
168 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
169 						 * installation process */
170 #endif
171 
172 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
173 
174 /* prototypes */
175 static void KernelWakeupFunc(struct buf *);
176 static void InitBP(struct buf *, struct vnode *, unsigned,
177     dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
178     void *, int, struct proc *);
179 struct raid_softc;
180 static void raidinit(struct raid_softc *);
181 
182 void raidattach(int);
183 static int raid_match(device_t, cfdata_t, void *);
184 static void raid_attach(device_t, device_t, void *);
185 static int raid_detach(device_t, int);
186 
187 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
188     daddr_t, daddr_t);
189 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
190     daddr_t, daddr_t, int);
191 
192 static int raidwrite_component_label(unsigned,
193     dev_t, struct vnode *, RF_ComponentLabel_t *);
194 static int raidread_component_label(unsigned,
195     dev_t, struct vnode *, RF_ComponentLabel_t *);
196 
197 
198 dev_type_open(raidopen);
199 dev_type_close(raidclose);
200 dev_type_read(raidread);
201 dev_type_write(raidwrite);
202 dev_type_ioctl(raidioctl);
203 dev_type_strategy(raidstrategy);
204 dev_type_dump(raiddump);
205 dev_type_size(raidsize);
206 
207 const struct bdevsw raid_bdevsw = {
208 	raidopen, raidclose, raidstrategy, raidioctl,
209 	raiddump, raidsize, D_DISK
210 };
211 
212 const struct cdevsw raid_cdevsw = {
213 	raidopen, raidclose, raidread, raidwrite, raidioctl,
214 	nostop, notty, nopoll, nommap, nokqfilter, D_DISK
215 };
216 
217 static struct dkdriver rf_dkdriver = { raidstrategy, minphys };
218 
219 struct raid_softc {
220 	device_t sc_dev;
221 	int	sc_unit;
222 	int     sc_flags;	/* flags */
223 	int     sc_cflags;	/* configuration flags */
224 	uint64_t sc_size;	/* size of the raid device */
225 	char    sc_xname[20];	/* XXX external name */
226 	struct disk sc_dkdev;	/* generic disk device info */
227 	struct bufq_state *buf_queue;	/* used for the device queue */
228 	RF_Raid_t sc_r;
229 	LIST_ENTRY(raid_softc) sc_link;
230 };
231 /* sc_flags */
232 #define RAIDF_INITED	0x01	/* unit has been initialized */
233 #define RAIDF_WLABEL	0x02	/* label area is writable */
234 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
235 #define RAIDF_SHUTDOWN	0x08	/* unit is being shutdown */
236 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
237 #define RAIDF_LOCKED	0x80	/* unit is locked */
238 
239 #define	raidunit(x)	DISKUNIT(x)
240 
241 extern struct cfdriver raid_cd;
242 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
243     raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
244     DVF_DETACH_SHUTDOWN);
245 
246 /*
247  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
248  * Be aware that large numbers can allow the driver to consume a lot of
249  * kernel memory, especially on writes, and in degraded mode reads.
250  *
251  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
252  * a single 64K write will typically require 64K for the old data,
253  * 64K for the old parity, and 64K for the new parity, for a total
254  * of 192K (if the parity buffer is not re-used immediately).
255  * Even it if is used immediately, that's still 128K, which when multiplied
256  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
257  *
258  * Now in degraded mode, for example, a 64K read on the above setup may
259  * require data reconstruction, which will require *all* of the 4 remaining
260  * disks to participate -- 4 * 32K/disk == 128K again.
261  */
262 
263 #ifndef RAIDOUTSTANDING
264 #define RAIDOUTSTANDING   6
265 #endif
266 
267 #define RAIDLABELDEV(dev)	\
268 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
269 
270 /* declared here, and made public, for the benefit of KVM stuff.. */
271 
272 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
273 				     struct disklabel *);
274 static void raidgetdisklabel(dev_t);
275 static void raidmakedisklabel(struct raid_softc *);
276 
277 static int raidlock(struct raid_softc *);
278 static void raidunlock(struct raid_softc *);
279 
280 static int raid_detach_unlocked(struct raid_softc *);
281 
282 static void rf_markalldirty(RF_Raid_t *);
283 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
284 
285 void rf_ReconThread(struct rf_recon_req *);
286 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
287 void rf_CopybackThread(RF_Raid_t *raidPtr);
288 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
289 int rf_autoconfig(device_t);
290 void rf_buildroothack(RF_ConfigSet_t *);
291 
292 RF_AutoConfig_t *rf_find_raid_components(void);
293 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
294 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
295 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
296 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
297 int rf_set_autoconfig(RF_Raid_t *, int);
298 int rf_set_rootpartition(RF_Raid_t *, int);
299 void rf_release_all_vps(RF_ConfigSet_t *);
300 void rf_cleanup_config_set(RF_ConfigSet_t *);
301 int rf_have_enough_components(RF_ConfigSet_t *);
302 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
303 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
304 
305 /*
306  * Debugging, mostly.  Set to 0 to not allow autoconfig to take place.
307  * Note that this is overridden by having RAID_AUTOCONFIG as an option
308  * in the kernel config file.
309  */
310 #ifdef RAID_AUTOCONFIG
311 int raidautoconfig = 1;
312 #else
313 int raidautoconfig = 0;
314 #endif
315 static bool raidautoconfigdone = false;
316 
317 struct RF_Pools_s rf_pools;
318 
319 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
320 static kmutex_t raid_lock;
321 
322 static struct raid_softc *
323 raidcreate(int unit) {
324 	struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
325 	if (sc == NULL) {
326 #ifdef DIAGNOSTIC
327 		printf("%s: out of memory\n", __func__);
328 #endif
329 		return NULL;
330 	}
331 	sc->sc_unit = unit;
332 	bufq_alloc(&sc->buf_queue, BUFQ_DISK_DEFAULT_STRAT, BUFQ_SORT_RAWBLOCK);
333 	return sc;
334 }
335 
336 static void
337 raiddestroy(struct raid_softc *sc) {
338 	bufq_free(sc->buf_queue);
339 	kmem_free(sc, sizeof(*sc));
340 }
341 
342 static struct raid_softc *
343 raidget(int unit) {
344 	struct raid_softc *sc;
345 	if (unit < 0) {
346 #ifdef DIAGNOSTIC
347 		panic("%s: unit %d!", __func__, unit);
348 #endif
349 		return NULL;
350 	}
351 	mutex_enter(&raid_lock);
352 	LIST_FOREACH(sc, &raids, sc_link) {
353 		if (sc->sc_unit == unit) {
354 			mutex_exit(&raid_lock);
355 			return sc;
356 		}
357 	}
358 	mutex_exit(&raid_lock);
359 	if ((sc = raidcreate(unit)) == NULL)
360 		return NULL;
361 	mutex_enter(&raid_lock);
362 	LIST_INSERT_HEAD(&raids, sc, sc_link);
363 	mutex_exit(&raid_lock);
364 	return sc;
365 }
366 
367 static void
368 raidput(struct raid_softc *sc) {
369 	mutex_enter(&raid_lock);
370 	LIST_REMOVE(sc, sc_link);
371 	mutex_exit(&raid_lock);
372 	raiddestroy(sc);
373 }
374 
375 void
376 raidattach(int num)
377 {
378 	mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
379 	/* This is where all the initialization stuff gets done. */
380 
381 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
382 	rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
383 	rf_init_cond2(rf_sparet_wait_cv, "sparetw");
384 	rf_init_cond2(rf_sparet_resp_cv, "rfgst");
385 
386 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
387 #endif
388 
389 	if (rf_BootRaidframe() == 0)
390 		aprint_verbose("Kernelized RAIDframe activated\n");
391 	else
392 		panic("Serious error booting RAID!!");
393 
394 	if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) {
395 		aprint_error("raidattach: config_cfattach_attach failed?\n");
396 	}
397 
398 	raidautoconfigdone = false;
399 
400 	/*
401 	 * Register a finalizer which will be used to auto-config RAID
402 	 * sets once all real hardware devices have been found.
403 	 */
404 	if (config_finalize_register(NULL, rf_autoconfig) != 0)
405 		aprint_error("WARNING: unable to register RAIDframe finalizer\n");
406 }
407 
408 int
409 rf_autoconfig(device_t self)
410 {
411 	RF_AutoConfig_t *ac_list;
412 	RF_ConfigSet_t *config_sets;
413 
414 	if (!raidautoconfig || raidautoconfigdone == true)
415 		return (0);
416 
417 	/* XXX This code can only be run once. */
418 	raidautoconfigdone = true;
419 
420 	/* 1. locate all RAID components on the system */
421 	aprint_debug("Searching for RAID components...\n");
422 	ac_list = rf_find_raid_components();
423 
424 	/* 2. Sort them into their respective sets. */
425 	config_sets = rf_create_auto_sets(ac_list);
426 
427 	/*
428 	 * 3. Evaluate each set and configure the valid ones.
429 	 * This gets done in rf_buildroothack().
430 	 */
431 	rf_buildroothack(config_sets);
432 
433 	return 1;
434 }
435 
436 void
437 rf_buildroothack(RF_ConfigSet_t *config_sets)
438 {
439 	RF_ConfigSet_t *cset;
440 	RF_ConfigSet_t *next_cset;
441 	int col;
442 	int num_root;
443 	char *devname;
444 	struct raid_softc *sc, *rsc;
445 
446 	sc = rsc = NULL;
447 	num_root = 0;
448 	cset = config_sets;
449 	while (cset != NULL) {
450 		next_cset = cset->next;
451 		if (rf_have_enough_components(cset) &&
452 		    cset->ac->clabel->autoconfigure == 1) {
453 			sc = rf_auto_config_set(cset);
454 			if (sc != NULL) {
455 				aprint_debug("raid%d: configured ok\n",
456 				    sc->sc_unit);
457 				if (cset->rootable) {
458 					rsc = sc;
459 					num_root++;
460 				}
461 			} else {
462 				/* The autoconfig didn't work :( */
463 				aprint_debug("Autoconfig failed\n");
464 				rf_release_all_vps(cset);
465 			}
466 		} else {
467 			/* we're not autoconfiguring this set...
468 			   release the associated resources */
469 			rf_release_all_vps(cset);
470 		}
471 		/* cleanup */
472 		rf_cleanup_config_set(cset);
473 		cset = next_cset;
474 	}
475 
476 	/* if the user has specified what the root device should be
477 	   then we don't touch booted_device or boothowto... */
478 
479 	if (rootspec != NULL)
480 		return;
481 
482 	/* we found something bootable... */
483 
484 	if (num_root == 1) {
485 		if (rsc->sc_dkdev.dk_nwedges != 0) {
486 			/* XXX: How do we find the real root partition? */
487 			char cname[sizeof(cset->ac->devname)];
488 			snprintf(cname, sizeof(cname), "%s%c",
489 			    device_xname(rsc->sc_dev), 'a');
490 			booted_device = dkwedge_find_by_wname(cname);
491 		} else
492 			booted_device = rsc->sc_dev;
493 	} else if (num_root > 1) {
494 
495 		/*
496 		 * Maybe the MD code can help. If it cannot, then
497 		 * setroot() will discover that we have no
498 		 * booted_device and will ask the user if nothing was
499 		 * hardwired in the kernel config file
500 		 */
501 
502 		if (booted_device == NULL)
503 			cpu_rootconf();
504 		if (booted_device == NULL)
505 			return;
506 
507 		num_root = 0;
508 		mutex_enter(&raid_lock);
509 		LIST_FOREACH(sc, &raids, sc_link) {
510 			RF_Raid_t *r = &sc->sc_r;
511 			if (r->valid == 0)
512 				continue;
513 
514 			if (r->root_partition == 0)
515 				continue;
516 
517 			for (col = 0; col < r->numCol; col++) {
518 				devname = r->Disks[col].devname;
519 				devname += sizeof("/dev/") - 1;
520 				if (strncmp(devname, device_xname(booted_device),
521 					    strlen(device_xname(booted_device))) != 0)
522 					continue;
523 				aprint_debug("raid%d includes boot device %s\n",
524 				       sc->sc_unit, devname);
525 				num_root++;
526 				rsc = sc;
527 			}
528 		}
529 		mutex_exit(&raid_lock);
530 
531 		if (num_root == 1) {
532 			booted_device = rsc->sc_dev;
533 		} else {
534 			/* we can't guess.. require the user to answer... */
535 			boothowto |= RB_ASKNAME;
536 		}
537 	}
538 }
539 
540 
541 int
542 raidsize(dev_t dev)
543 {
544 	struct raid_softc *rs;
545 	struct disklabel *lp;
546 	int     part, unit, omask, size;
547 
548 	unit = raidunit(dev);
549 	if ((rs = raidget(unit)) == NULL)
550 		return -1;
551 	if ((rs->sc_flags & RAIDF_INITED) == 0)
552 		return (-1);
553 
554 	part = DISKPART(dev);
555 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
556 	lp = rs->sc_dkdev.dk_label;
557 
558 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
559 		return (-1);
560 
561 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
562 		size = -1;
563 	else
564 		size = lp->d_partitions[part].p_size *
565 		    (lp->d_secsize / DEV_BSIZE);
566 
567 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
568 		return (-1);
569 
570 	return (size);
571 
572 }
573 
574 int
575 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
576 {
577 	int     unit = raidunit(dev);
578 	struct raid_softc *rs;
579 	const struct bdevsw *bdev;
580 	struct disklabel *lp;
581 	RF_Raid_t *raidPtr;
582 	daddr_t offset;
583 	int     part, c, sparecol, j, scol, dumpto;
584 	int     error = 0;
585 
586 	if ((rs = raidget(unit)) == NULL)
587 		return ENXIO;
588 
589 	raidPtr = &rs->sc_r;
590 
591 	if ((rs->sc_flags & RAIDF_INITED) == 0)
592 		return ENXIO;
593 
594 	/* we only support dumping to RAID 1 sets */
595 	if (raidPtr->Layout.numDataCol != 1 ||
596 	    raidPtr->Layout.numParityCol != 1)
597 		return EINVAL;
598 
599 
600 	if ((error = raidlock(rs)) != 0)
601 		return error;
602 
603 	if (size % DEV_BSIZE != 0) {
604 		error = EINVAL;
605 		goto out;
606 	}
607 
608 	if (blkno + size / DEV_BSIZE > rs->sc_size) {
609 		printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
610 		    "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
611 		    size / DEV_BSIZE, rs->sc_size);
612 		error = EINVAL;
613 		goto out;
614 	}
615 
616 	part = DISKPART(dev);
617 	lp = rs->sc_dkdev.dk_label;
618 	offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
619 
620 	/* figure out what device is alive.. */
621 
622 	/*
623 	   Look for a component to dump to.  The preference for the
624 	   component to dump to is as follows:
625 	   1) the master
626 	   2) a used_spare of the master
627 	   3) the slave
628 	   4) a used_spare of the slave
629 	*/
630 
631 	dumpto = -1;
632 	for (c = 0; c < raidPtr->numCol; c++) {
633 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
634 			/* this might be the one */
635 			dumpto = c;
636 			break;
637 		}
638 	}
639 
640 	/*
641 	   At this point we have possibly selected a live master or a
642 	   live slave.  We now check to see if there is a spared
643 	   master (or a spared slave), if we didn't find a live master
644 	   or a live slave.
645 	*/
646 
647 	for (c = 0; c < raidPtr->numSpare; c++) {
648 		sparecol = raidPtr->numCol + c;
649 		if (raidPtr->Disks[sparecol].status ==  rf_ds_used_spare) {
650 			/* How about this one? */
651 			scol = -1;
652 			for(j=0;j<raidPtr->numCol;j++) {
653 				if (raidPtr->Disks[j].spareCol == sparecol) {
654 					scol = j;
655 					break;
656 				}
657 			}
658 			if (scol == 0) {
659 				/*
660 				   We must have found a spared master!
661 				   We'll take that over anything else
662 				   found so far.  (We couldn't have
663 				   found a real master before, since
664 				   this is a used spare, and it's
665 				   saying that it's replacing the
666 				   master.)  On reboot (with
667 				   autoconfiguration turned on)
668 				   sparecol will become the 1st
669 				   component (component0) of this set.
670 				*/
671 				dumpto = sparecol;
672 				break;
673 			} else if (scol != -1) {
674 				/*
675 				   Must be a spared slave.  We'll dump
676 				   to that if we havn't found anything
677 				   else so far.
678 				*/
679 				if (dumpto == -1)
680 					dumpto = sparecol;
681 			}
682 		}
683 	}
684 
685 	if (dumpto == -1) {
686 		/* we couldn't find any live components to dump to!?!?
687 		 */
688 		error = EINVAL;
689 		goto out;
690 	}
691 
692 	bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
693 
694 	/*
695 	   Note that blkno is relative to this particular partition.
696 	   By adding the offset of this partition in the RAID
697 	   set, and also adding RF_PROTECTED_SECTORS, we get a
698 	   value that is relative to the partition used for the
699 	   underlying component.
700 	*/
701 
702 	error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
703 				blkno + offset, va, size);
704 
705 out:
706 	raidunlock(rs);
707 
708 	return error;
709 }
710 /* ARGSUSED */
711 int
712 raidopen(dev_t dev, int flags, int fmt,
713     struct lwp *l)
714 {
715 	int     unit = raidunit(dev);
716 	struct raid_softc *rs;
717 	struct disklabel *lp;
718 	int     part, pmask;
719 	int     error = 0;
720 
721 	if ((rs = raidget(unit)) == NULL)
722 		return ENXIO;
723 	if ((error = raidlock(rs)) != 0)
724 		return (error);
725 
726 	if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
727 		error = EBUSY;
728 		goto bad;
729 	}
730 
731 	lp = rs->sc_dkdev.dk_label;
732 
733 	part = DISKPART(dev);
734 
735 	/*
736 	 * If there are wedges, and this is not RAW_PART, then we
737 	 * need to fail.
738 	 */
739 	if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
740 		error = EBUSY;
741 		goto bad;
742 	}
743 	pmask = (1 << part);
744 
745 	if ((rs->sc_flags & RAIDF_INITED) &&
746 	    (rs->sc_dkdev.dk_openmask == 0))
747 		raidgetdisklabel(dev);
748 
749 	/* make sure that this partition exists */
750 
751 	if (part != RAW_PART) {
752 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
753 		    ((part >= lp->d_npartitions) ||
754 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
755 			error = ENXIO;
756 			goto bad;
757 		}
758 	}
759 	/* Prevent this unit from being unconfigured while open. */
760 	switch (fmt) {
761 	case S_IFCHR:
762 		rs->sc_dkdev.dk_copenmask |= pmask;
763 		break;
764 
765 	case S_IFBLK:
766 		rs->sc_dkdev.dk_bopenmask |= pmask;
767 		break;
768 	}
769 
770 	if ((rs->sc_dkdev.dk_openmask == 0) &&
771 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
772 		/* First one... mark things as dirty... Note that we *MUST*
773 		 have done a configure before this.  I DO NOT WANT TO BE
774 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
775 		 THAT THEY BELONG TOGETHER!!!!! */
776 		/* XXX should check to see if we're only open for reading
777 		   here... If so, we needn't do this, but then need some
778 		   other way of keeping track of what's happened.. */
779 
780 		rf_markalldirty(&rs->sc_r);
781 	}
782 
783 
784 	rs->sc_dkdev.dk_openmask =
785 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
786 
787 bad:
788 	raidunlock(rs);
789 
790 	return (error);
791 
792 
793 }
794 /* ARGSUSED */
795 int
796 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
797 {
798 	int     unit = raidunit(dev);
799 	struct raid_softc *rs;
800 	int     error = 0;
801 	int     part;
802 
803 	if ((rs = raidget(unit)) == NULL)
804 		return ENXIO;
805 
806 	if ((error = raidlock(rs)) != 0)
807 		return (error);
808 
809 	part = DISKPART(dev);
810 
811 	/* ...that much closer to allowing unconfiguration... */
812 	switch (fmt) {
813 	case S_IFCHR:
814 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
815 		break;
816 
817 	case S_IFBLK:
818 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
819 		break;
820 	}
821 	rs->sc_dkdev.dk_openmask =
822 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
823 
824 	if ((rs->sc_dkdev.dk_openmask == 0) &&
825 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
826 		/* Last one... device is not unconfigured yet.
827 		   Device shutdown has taken care of setting the
828 		   clean bits if RAIDF_INITED is not set
829 		   mark things as clean... */
830 
831 		rf_update_component_labels(&rs->sc_r,
832 						 RF_FINAL_COMPONENT_UPDATE);
833 
834 		/* If the kernel is shutting down, it will detach
835 		 * this RAID set soon enough.
836 		 */
837 	}
838 
839 	raidunlock(rs);
840 	return (0);
841 
842 }
843 
844 void
845 raidstrategy(struct buf *bp)
846 {
847 	unsigned int unit = raidunit(bp->b_dev);
848 	RF_Raid_t *raidPtr;
849 	int     wlabel;
850 	struct raid_softc *rs;
851 
852 	if ((rs = raidget(unit)) == NULL) {
853 		bp->b_error = ENXIO;
854 		goto done;
855 	}
856 	if ((rs->sc_flags & RAIDF_INITED) == 0) {
857 		bp->b_error = ENXIO;
858 		goto done;
859 	}
860 	raidPtr = &rs->sc_r;
861 	if (!raidPtr->valid) {
862 		bp->b_error = ENODEV;
863 		goto done;
864 	}
865 	if (bp->b_bcount == 0) {
866 		db1_printf(("b_bcount is zero..\n"));
867 		goto done;
868 	}
869 
870 	/*
871 	 * Do bounds checking and adjust transfer.  If there's an
872 	 * error, the bounds check will flag that for us.
873 	 */
874 
875 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
876 	if (DISKPART(bp->b_dev) == RAW_PART) {
877 		uint64_t size; /* device size in DEV_BSIZE unit */
878 
879 		if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
880 			size = raidPtr->totalSectors <<
881 			    (raidPtr->logBytesPerSector - DEV_BSHIFT);
882 		} else {
883 			size = raidPtr->totalSectors >>
884 			    (DEV_BSHIFT - raidPtr->logBytesPerSector);
885 		}
886 		if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
887 			goto done;
888 		}
889 	} else {
890 		if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
891 			db1_printf(("Bounds check failed!!:%d %d\n",
892 				(int) bp->b_blkno, (int) wlabel));
893 			goto done;
894 		}
895 	}
896 
897 	rf_lock_mutex2(raidPtr->iodone_lock);
898 
899 	bp->b_resid = 0;
900 
901 	/* stuff it onto our queue */
902 	bufq_put(rs->buf_queue, bp);
903 
904 	/* scheduled the IO to happen at the next convenient time */
905 	rf_signal_cond2(raidPtr->iodone_cv);
906 	rf_unlock_mutex2(raidPtr->iodone_lock);
907 
908 	return;
909 
910 done:
911 	bp->b_resid = bp->b_bcount;
912 	biodone(bp);
913 }
914 /* ARGSUSED */
915 int
916 raidread(dev_t dev, struct uio *uio, int flags)
917 {
918 	int     unit = raidunit(dev);
919 	struct raid_softc *rs;
920 
921 	if ((rs = raidget(unit)) == NULL)
922 		return ENXIO;
923 
924 	if ((rs->sc_flags & RAIDF_INITED) == 0)
925 		return (ENXIO);
926 
927 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
928 
929 }
930 /* ARGSUSED */
931 int
932 raidwrite(dev_t dev, struct uio *uio, int flags)
933 {
934 	int     unit = raidunit(dev);
935 	struct raid_softc *rs;
936 
937 	if ((rs = raidget(unit)) == NULL)
938 		return ENXIO;
939 
940 	if ((rs->sc_flags & RAIDF_INITED) == 0)
941 		return (ENXIO);
942 
943 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
944 
945 }
946 
947 static int
948 raid_detach_unlocked(struct raid_softc *rs)
949 {
950 	int error;
951 	RF_Raid_t *raidPtr;
952 
953 	raidPtr = &rs->sc_r;
954 
955 	/*
956 	 * If somebody has a partition mounted, we shouldn't
957 	 * shutdown.
958 	 */
959 	if (rs->sc_dkdev.dk_openmask != 0)
960 		return EBUSY;
961 
962 	if ((rs->sc_flags & RAIDF_INITED) == 0)
963 		;	/* not initialized: nothing to do */
964 	else if ((error = rf_Shutdown(raidPtr)) != 0)
965 		return error;
966 	else
967 		rs->sc_flags &= ~(RAIDF_INITED|RAIDF_SHUTDOWN);
968 
969 	/* Detach the disk. */
970 	dkwedge_delall(&rs->sc_dkdev);
971 	disk_detach(&rs->sc_dkdev);
972 	disk_destroy(&rs->sc_dkdev);
973 
974 	aprint_normal_dev(rs->sc_dev, "detached\n");
975 
976 	return 0;
977 }
978 
979 int
980 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
981 {
982 	int     unit = raidunit(dev);
983 	int     error = 0;
984 	int     part, pmask, s;
985 	cfdata_t cf;
986 	struct raid_softc *rs;
987 	RF_Config_t *k_cfg, *u_cfg;
988 	RF_Raid_t *raidPtr;
989 	RF_RaidDisk_t *diskPtr;
990 	RF_AccTotals_t *totals;
991 	RF_DeviceConfig_t *d_cfg, **ucfgp;
992 	u_char *specific_buf;
993 	int retcode = 0;
994 	int column;
995 /*	int raidid; */
996 	struct rf_recon_req *rrcopy, *rr;
997 	RF_ComponentLabel_t *clabel;
998 	RF_ComponentLabel_t *ci_label;
999 	RF_ComponentLabel_t **clabel_ptr;
1000 	RF_SingleComponent_t *sparePtr,*componentPtr;
1001 	RF_SingleComponent_t component;
1002 	RF_ProgressInfo_t progressInfo, **progressInfoPtr;
1003 	int i, j, d;
1004 #ifdef __HAVE_OLD_DISKLABEL
1005 	struct disklabel newlabel;
1006 #endif
1007 	struct dkwedge_info *dkw;
1008 
1009 	if ((rs = raidget(unit)) == NULL)
1010 		return ENXIO;
1011 	raidPtr = &rs->sc_r;
1012 
1013 	db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1014 		(int) DISKPART(dev), (int) unit, cmd));
1015 
1016 	/* Must be open for writes for these commands... */
1017 	switch (cmd) {
1018 #ifdef DIOCGSECTORSIZE
1019 	case DIOCGSECTORSIZE:
1020 		*(u_int *)data = raidPtr->bytesPerSector;
1021 		return 0;
1022 	case DIOCGMEDIASIZE:
1023 		*(off_t *)data =
1024 		    (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
1025 		return 0;
1026 #endif
1027 	case DIOCSDINFO:
1028 	case DIOCWDINFO:
1029 #ifdef __HAVE_OLD_DISKLABEL
1030 	case ODIOCWDINFO:
1031 	case ODIOCSDINFO:
1032 #endif
1033 	case DIOCWLABEL:
1034 	case DIOCAWEDGE:
1035 	case DIOCDWEDGE:
1036 	case DIOCSSTRATEGY:
1037 		if ((flag & FWRITE) == 0)
1038 			return (EBADF);
1039 	}
1040 
1041 	/* Must be initialized for these... */
1042 	switch (cmd) {
1043 	case DIOCGDINFO:
1044 	case DIOCSDINFO:
1045 	case DIOCWDINFO:
1046 #ifdef __HAVE_OLD_DISKLABEL
1047 	case ODIOCGDINFO:
1048 	case ODIOCWDINFO:
1049 	case ODIOCSDINFO:
1050 	case ODIOCGDEFLABEL:
1051 #endif
1052 	case DIOCGPART:
1053 	case DIOCWLABEL:
1054 	case DIOCGDEFLABEL:
1055 	case DIOCAWEDGE:
1056 	case DIOCDWEDGE:
1057 	case DIOCLWEDGES:
1058 	case DIOCCACHESYNC:
1059 	case RAIDFRAME_SHUTDOWN:
1060 	case RAIDFRAME_REWRITEPARITY:
1061 	case RAIDFRAME_GET_INFO:
1062 	case RAIDFRAME_RESET_ACCTOTALS:
1063 	case RAIDFRAME_GET_ACCTOTALS:
1064 	case RAIDFRAME_KEEP_ACCTOTALS:
1065 	case RAIDFRAME_GET_SIZE:
1066 	case RAIDFRAME_FAIL_DISK:
1067 	case RAIDFRAME_COPYBACK:
1068 	case RAIDFRAME_CHECK_RECON_STATUS:
1069 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1070 	case RAIDFRAME_GET_COMPONENT_LABEL:
1071 	case RAIDFRAME_SET_COMPONENT_LABEL:
1072 	case RAIDFRAME_ADD_HOT_SPARE:
1073 	case RAIDFRAME_REMOVE_HOT_SPARE:
1074 	case RAIDFRAME_INIT_LABELS:
1075 	case RAIDFRAME_REBUILD_IN_PLACE:
1076 	case RAIDFRAME_CHECK_PARITY:
1077 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1078 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1079 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
1080 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1081 	case RAIDFRAME_SET_AUTOCONFIG:
1082 	case RAIDFRAME_SET_ROOT:
1083 	case RAIDFRAME_DELETE_COMPONENT:
1084 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
1085 	case RAIDFRAME_PARITYMAP_STATUS:
1086 	case RAIDFRAME_PARITYMAP_GET_DISABLE:
1087 	case RAIDFRAME_PARITYMAP_SET_DISABLE:
1088 	case RAIDFRAME_PARITYMAP_SET_PARAMS:
1089 	case DIOCGSTRATEGY:
1090 	case DIOCSSTRATEGY:
1091 		if ((rs->sc_flags & RAIDF_INITED) == 0)
1092 			return (ENXIO);
1093 	}
1094 
1095 	switch (cmd) {
1096 #ifdef COMPAT_50
1097 	case RAIDFRAME_GET_INFO50:
1098 		return rf_get_info50(raidPtr, data);
1099 
1100 	case RAIDFRAME_CONFIGURE50:
1101 		if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0)
1102 			return retcode;
1103 		goto config;
1104 #endif
1105 		/* configure the system */
1106 	case RAIDFRAME_CONFIGURE:
1107 
1108 		if (raidPtr->valid) {
1109 			/* There is a valid RAID set running on this unit! */
1110 			printf("raid%d: Device already configured!\n",unit);
1111 			return(EINVAL);
1112 		}
1113 
1114 		/* copy-in the configuration information */
1115 		/* data points to a pointer to the configuration structure */
1116 
1117 		u_cfg = *((RF_Config_t **) data);
1118 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1119 		if (k_cfg == NULL) {
1120 			return (ENOMEM);
1121 		}
1122 		retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1123 		if (retcode) {
1124 			RF_Free(k_cfg, sizeof(RF_Config_t));
1125 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1126 				retcode));
1127 			return (retcode);
1128 		}
1129 		goto config;
1130 	config:
1131 		/* allocate a buffer for the layout-specific data, and copy it
1132 		 * in */
1133 		if (k_cfg->layoutSpecificSize) {
1134 			if (k_cfg->layoutSpecificSize > 10000) {
1135 				/* sanity check */
1136 				RF_Free(k_cfg, sizeof(RF_Config_t));
1137 				return (EINVAL);
1138 			}
1139 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1140 			    (u_char *));
1141 			if (specific_buf == NULL) {
1142 				RF_Free(k_cfg, sizeof(RF_Config_t));
1143 				return (ENOMEM);
1144 			}
1145 			retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1146 			    k_cfg->layoutSpecificSize);
1147 			if (retcode) {
1148 				RF_Free(k_cfg, sizeof(RF_Config_t));
1149 				RF_Free(specific_buf,
1150 					k_cfg->layoutSpecificSize);
1151 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1152 					retcode));
1153 				return (retcode);
1154 			}
1155 		} else
1156 			specific_buf = NULL;
1157 		k_cfg->layoutSpecific = specific_buf;
1158 
1159 		/* should do some kind of sanity check on the configuration.
1160 		 * Store the sum of all the bytes in the last byte? */
1161 
1162 		/* configure the system */
1163 
1164 		/*
1165 		 * Clear the entire RAID descriptor, just to make sure
1166 		 *  there is no stale data left in the case of a
1167 		 *  reconfiguration
1168 		 */
1169 		memset(raidPtr, 0, sizeof(*raidPtr));
1170 		raidPtr->softc = rs;
1171 		raidPtr->raidid = unit;
1172 
1173 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
1174 
1175 		if (retcode == 0) {
1176 
1177 			/* allow this many simultaneous IO's to
1178 			   this RAID device */
1179 			raidPtr->openings = RAIDOUTSTANDING;
1180 
1181 			raidinit(rs);
1182 			rf_markalldirty(raidPtr);
1183 		}
1184 		/* free the buffers.  No return code here. */
1185 		if (k_cfg->layoutSpecificSize) {
1186 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1187 		}
1188 		RF_Free(k_cfg, sizeof(RF_Config_t));
1189 
1190 		return (retcode);
1191 
1192 		/* shutdown the system */
1193 	case RAIDFRAME_SHUTDOWN:
1194 
1195 		part = DISKPART(dev);
1196 		pmask = (1 << part);
1197 
1198 		if ((error = raidlock(rs)) != 0)
1199 			return (error);
1200 
1201 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
1202 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1203 			(rs->sc_dkdev.dk_copenmask & pmask)))
1204 			retcode = EBUSY;
1205 		else {
1206 			rs->sc_flags |= RAIDF_SHUTDOWN;
1207 			rs->sc_dkdev.dk_copenmask &= ~pmask;
1208 			rs->sc_dkdev.dk_bopenmask &= ~pmask;
1209 			rs->sc_dkdev.dk_openmask &= ~pmask;
1210 			retcode = 0;
1211 		}
1212 
1213 		raidunlock(rs);
1214 
1215 		if (retcode != 0)
1216 			return retcode;
1217 
1218 		/* free the pseudo device attach bits */
1219 
1220 		cf = device_cfdata(rs->sc_dev);
1221 		if ((retcode = config_detach(rs->sc_dev, DETACH_QUIET)) == 0)
1222 			free(cf, M_RAIDFRAME);
1223 
1224 		return (retcode);
1225 	case RAIDFRAME_GET_COMPONENT_LABEL:
1226 		clabel_ptr = (RF_ComponentLabel_t **) data;
1227 		/* need to read the component label for the disk indicated
1228 		   by row,column in clabel */
1229 
1230 		/*
1231 		 * Perhaps there should be an option to skip the in-core
1232 		 * copy and hit the disk, as with disklabel(8).
1233 		 */
1234 		RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *));
1235 
1236 		retcode = copyin(*clabel_ptr, clabel, sizeof(*clabel));
1237 
1238 		if (retcode) {
1239 			RF_Free(clabel, sizeof(*clabel));
1240 			return retcode;
1241 		}
1242 
1243 		clabel->row = 0; /* Don't allow looking at anything else.*/
1244 
1245 		column = clabel->column;
1246 
1247 		if ((column < 0) || (column >= raidPtr->numCol +
1248 		    raidPtr->numSpare)) {
1249 			RF_Free(clabel, sizeof(*clabel));
1250 			return EINVAL;
1251 		}
1252 
1253 		RF_Free(clabel, sizeof(*clabel));
1254 
1255 		clabel = raidget_component_label(raidPtr, column);
1256 
1257 		return copyout(clabel, *clabel_ptr, sizeof(**clabel_ptr));
1258 
1259 #if 0
1260 	case RAIDFRAME_SET_COMPONENT_LABEL:
1261 		clabel = (RF_ComponentLabel_t *) data;
1262 
1263 		/* XXX check the label for valid stuff... */
1264 		/* Note that some things *should not* get modified --
1265 		   the user should be re-initing the labels instead of
1266 		   trying to patch things.
1267 		   */
1268 
1269 		raidid = raidPtr->raidid;
1270 #ifdef DEBUG
1271 		printf("raid%d: Got component label:\n", raidid);
1272 		printf("raid%d: Version: %d\n", raidid, clabel->version);
1273 		printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1274 		printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1275 		printf("raid%d: Column: %d\n", raidid, clabel->column);
1276 		printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1277 		printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1278 		printf("raid%d: Status: %d\n", raidid, clabel->status);
1279 #endif
1280 		clabel->row = 0;
1281 		column = clabel->column;
1282 
1283 		if ((column < 0) || (column >= raidPtr->numCol)) {
1284 			return(EINVAL);
1285 		}
1286 
1287 		/* XXX this isn't allowed to do anything for now :-) */
1288 
1289 		/* XXX and before it is, we need to fill in the rest
1290 		   of the fields!?!?!?! */
1291 		memcpy(raidget_component_label(raidPtr, column),
1292 		    clabel, sizeof(*clabel));
1293 		raidflush_component_label(raidPtr, column);
1294 		return (0);
1295 #endif
1296 
1297 	case RAIDFRAME_INIT_LABELS:
1298 		clabel = (RF_ComponentLabel_t *) data;
1299 		/*
1300 		   we only want the serial number from
1301 		   the above.  We get all the rest of the information
1302 		   from the config that was used to create this RAID
1303 		   set.
1304 		   */
1305 
1306 		raidPtr->serial_number = clabel->serial_number;
1307 
1308 		for(column=0;column<raidPtr->numCol;column++) {
1309 			diskPtr = &raidPtr->Disks[column];
1310 			if (!RF_DEAD_DISK(diskPtr->status)) {
1311 				ci_label = raidget_component_label(raidPtr,
1312 				    column);
1313 				/* Zeroing this is important. */
1314 				memset(ci_label, 0, sizeof(*ci_label));
1315 				raid_init_component_label(raidPtr, ci_label);
1316 				ci_label->serial_number =
1317 				    raidPtr->serial_number;
1318 				ci_label->row = 0; /* we dont' pretend to support more */
1319 				rf_component_label_set_partitionsize(ci_label,
1320 				    diskPtr->partitionSize);
1321 				ci_label->column = column;
1322 				raidflush_component_label(raidPtr, column);
1323 			}
1324 			/* XXXjld what about the spares? */
1325 		}
1326 
1327 		return (retcode);
1328 	case RAIDFRAME_SET_AUTOCONFIG:
1329 		d = rf_set_autoconfig(raidPtr, *(int *) data);
1330 		printf("raid%d: New autoconfig value is: %d\n",
1331 		       raidPtr->raidid, d);
1332 		*(int *) data = d;
1333 		return (retcode);
1334 
1335 	case RAIDFRAME_SET_ROOT:
1336 		d = rf_set_rootpartition(raidPtr, *(int *) data);
1337 		printf("raid%d: New rootpartition value is: %d\n",
1338 		       raidPtr->raidid, d);
1339 		*(int *) data = d;
1340 		return (retcode);
1341 
1342 		/* initialize all parity */
1343 	case RAIDFRAME_REWRITEPARITY:
1344 
1345 		if (raidPtr->Layout.map->faultsTolerated == 0) {
1346 			/* Parity for RAID 0 is trivially correct */
1347 			raidPtr->parity_good = RF_RAID_CLEAN;
1348 			return(0);
1349 		}
1350 
1351 		if (raidPtr->parity_rewrite_in_progress == 1) {
1352 			/* Re-write is already in progress! */
1353 			return(EINVAL);
1354 		}
1355 
1356 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1357 					   rf_RewriteParityThread,
1358 					   raidPtr,"raid_parity");
1359 		return (retcode);
1360 
1361 
1362 	case RAIDFRAME_ADD_HOT_SPARE:
1363 		sparePtr = (RF_SingleComponent_t *) data;
1364 		memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1365 		retcode = rf_add_hot_spare(raidPtr, &component);
1366 		return(retcode);
1367 
1368 	case RAIDFRAME_REMOVE_HOT_SPARE:
1369 		return(retcode);
1370 
1371 	case RAIDFRAME_DELETE_COMPONENT:
1372 		componentPtr = (RF_SingleComponent_t *)data;
1373 		memcpy( &component, componentPtr,
1374 			sizeof(RF_SingleComponent_t));
1375 		retcode = rf_delete_component(raidPtr, &component);
1376 		return(retcode);
1377 
1378 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
1379 		componentPtr = (RF_SingleComponent_t *)data;
1380 		memcpy( &component, componentPtr,
1381 			sizeof(RF_SingleComponent_t));
1382 		retcode = rf_incorporate_hot_spare(raidPtr, &component);
1383 		return(retcode);
1384 
1385 	case RAIDFRAME_REBUILD_IN_PLACE:
1386 
1387 		if (raidPtr->Layout.map->faultsTolerated == 0) {
1388 			/* Can't do this on a RAID 0!! */
1389 			return(EINVAL);
1390 		}
1391 
1392 		if (raidPtr->recon_in_progress == 1) {
1393 			/* a reconstruct is already in progress! */
1394 			return(EINVAL);
1395 		}
1396 
1397 		componentPtr = (RF_SingleComponent_t *) data;
1398 		memcpy( &component, componentPtr,
1399 			sizeof(RF_SingleComponent_t));
1400 		component.row = 0; /* we don't support any more */
1401 		column = component.column;
1402 
1403 		if ((column < 0) || (column >= raidPtr->numCol)) {
1404 			return(EINVAL);
1405 		}
1406 
1407 		rf_lock_mutex2(raidPtr->mutex);
1408 		if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1409 		    (raidPtr->numFailures > 0)) {
1410 			/* XXX 0 above shouldn't be constant!!! */
1411 			/* some component other than this has failed.
1412 			   Let's not make things worse than they already
1413 			   are... */
1414 			printf("raid%d: Unable to reconstruct to disk at:\n",
1415 			       raidPtr->raidid);
1416 			printf("raid%d:     Col: %d   Too many failures.\n",
1417 			       raidPtr->raidid, column);
1418 			rf_unlock_mutex2(raidPtr->mutex);
1419 			return (EINVAL);
1420 		}
1421 		if (raidPtr->Disks[column].status ==
1422 		    rf_ds_reconstructing) {
1423 			printf("raid%d: Unable to reconstruct to disk at:\n",
1424 			       raidPtr->raidid);
1425 			printf("raid%d:    Col: %d   Reconstruction already occurring!\n", raidPtr->raidid, column);
1426 
1427 			rf_unlock_mutex2(raidPtr->mutex);
1428 			return (EINVAL);
1429 		}
1430 		if (raidPtr->Disks[column].status == rf_ds_spared) {
1431 			rf_unlock_mutex2(raidPtr->mutex);
1432 			return (EINVAL);
1433 		}
1434 		rf_unlock_mutex2(raidPtr->mutex);
1435 
1436 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1437 		if (rrcopy == NULL)
1438 			return(ENOMEM);
1439 
1440 		rrcopy->raidPtr = (void *) raidPtr;
1441 		rrcopy->col = column;
1442 
1443 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1444 					   rf_ReconstructInPlaceThread,
1445 					   rrcopy,"raid_reconip");
1446 		return(retcode);
1447 
1448 	case RAIDFRAME_GET_INFO:
1449 		if (!raidPtr->valid)
1450 			return (ENODEV);
1451 		ucfgp = (RF_DeviceConfig_t **) data;
1452 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1453 			  (RF_DeviceConfig_t *));
1454 		if (d_cfg == NULL)
1455 			return (ENOMEM);
1456 		d_cfg->rows = 1; /* there is only 1 row now */
1457 		d_cfg->cols = raidPtr->numCol;
1458 		d_cfg->ndevs = raidPtr->numCol;
1459 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
1460 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1461 			return (ENOMEM);
1462 		}
1463 		d_cfg->nspares = raidPtr->numSpare;
1464 		if (d_cfg->nspares >= RF_MAX_DISKS) {
1465 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1466 			return (ENOMEM);
1467 		}
1468 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1469 		d = 0;
1470 		for (j = 0; j < d_cfg->cols; j++) {
1471 			d_cfg->devs[d] = raidPtr->Disks[j];
1472 			d++;
1473 		}
1474 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1475 			d_cfg->spares[i] = raidPtr->Disks[j];
1476 		}
1477 		retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1478 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1479 
1480 		return (retcode);
1481 
1482 	case RAIDFRAME_CHECK_PARITY:
1483 		*(int *) data = raidPtr->parity_good;
1484 		return (0);
1485 
1486 	case RAIDFRAME_PARITYMAP_STATUS:
1487 		if (rf_paritymap_ineligible(raidPtr))
1488 			return EINVAL;
1489 		rf_paritymap_status(raidPtr->parity_map,
1490 		    (struct rf_pmstat *)data);
1491 		return 0;
1492 
1493 	case RAIDFRAME_PARITYMAP_SET_PARAMS:
1494 		if (rf_paritymap_ineligible(raidPtr))
1495 			return EINVAL;
1496 		if (raidPtr->parity_map == NULL)
1497 			return ENOENT; /* ??? */
1498 		if (0 != rf_paritymap_set_params(raidPtr->parity_map,
1499 			(struct rf_pmparams *)data, 1))
1500 			return EINVAL;
1501 		return 0;
1502 
1503 	case RAIDFRAME_PARITYMAP_GET_DISABLE:
1504 		if (rf_paritymap_ineligible(raidPtr))
1505 			return EINVAL;
1506 		*(int *) data = rf_paritymap_get_disable(raidPtr);
1507 		return 0;
1508 
1509 	case RAIDFRAME_PARITYMAP_SET_DISABLE:
1510 		if (rf_paritymap_ineligible(raidPtr))
1511 			return EINVAL;
1512 		rf_paritymap_set_disable(raidPtr, *(int *)data);
1513 		/* XXX should errors be passed up? */
1514 		return 0;
1515 
1516 	case RAIDFRAME_RESET_ACCTOTALS:
1517 		memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1518 		return (0);
1519 
1520 	case RAIDFRAME_GET_ACCTOTALS:
1521 		totals = (RF_AccTotals_t *) data;
1522 		*totals = raidPtr->acc_totals;
1523 		return (0);
1524 
1525 	case RAIDFRAME_KEEP_ACCTOTALS:
1526 		raidPtr->keep_acc_totals = *(int *)data;
1527 		return (0);
1528 
1529 	case RAIDFRAME_GET_SIZE:
1530 		*(int *) data = raidPtr->totalSectors;
1531 		return (0);
1532 
1533 		/* fail a disk & optionally start reconstruction */
1534 	case RAIDFRAME_FAIL_DISK:
1535 
1536 		if (raidPtr->Layout.map->faultsTolerated == 0) {
1537 			/* Can't do this on a RAID 0!! */
1538 			return(EINVAL);
1539 		}
1540 
1541 		rr = (struct rf_recon_req *) data;
1542 		rr->row = 0;
1543 		if (rr->col < 0 || rr->col >= raidPtr->numCol)
1544 			return (EINVAL);
1545 
1546 
1547 		rf_lock_mutex2(raidPtr->mutex);
1548 		if (raidPtr->status == rf_rs_reconstructing) {
1549 			/* you can't fail a disk while we're reconstructing! */
1550 			/* XXX wrong for RAID6 */
1551 			rf_unlock_mutex2(raidPtr->mutex);
1552 			return (EINVAL);
1553 		}
1554 		if ((raidPtr->Disks[rr->col].status ==
1555 		     rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1556 			/* some other component has failed.  Let's not make
1557 			   things worse. XXX wrong for RAID6 */
1558 			rf_unlock_mutex2(raidPtr->mutex);
1559 			return (EINVAL);
1560 		}
1561 		if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1562 			/* Can't fail a spared disk! */
1563 			rf_unlock_mutex2(raidPtr->mutex);
1564 			return (EINVAL);
1565 		}
1566 		rf_unlock_mutex2(raidPtr->mutex);
1567 
1568 		/* make a copy of the recon request so that we don't rely on
1569 		 * the user's buffer */
1570 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1571 		if (rrcopy == NULL)
1572 			return(ENOMEM);
1573 		memcpy(rrcopy, rr, sizeof(*rr));
1574 		rrcopy->raidPtr = (void *) raidPtr;
1575 
1576 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1577 					   rf_ReconThread,
1578 					   rrcopy,"raid_recon");
1579 		return (0);
1580 
1581 		/* invoke a copyback operation after recon on whatever disk
1582 		 * needs it, if any */
1583 	case RAIDFRAME_COPYBACK:
1584 
1585 		if (raidPtr->Layout.map->faultsTolerated == 0) {
1586 			/* This makes no sense on a RAID 0!! */
1587 			return(EINVAL);
1588 		}
1589 
1590 		if (raidPtr->copyback_in_progress == 1) {
1591 			/* Copyback is already in progress! */
1592 			return(EINVAL);
1593 		}
1594 
1595 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1596 					   rf_CopybackThread,
1597 					   raidPtr,"raid_copyback");
1598 		return (retcode);
1599 
1600 		/* return the percentage completion of reconstruction */
1601 	case RAIDFRAME_CHECK_RECON_STATUS:
1602 		if (raidPtr->Layout.map->faultsTolerated == 0) {
1603 			/* This makes no sense on a RAID 0, so tell the
1604 			   user it's done. */
1605 			*(int *) data = 100;
1606 			return(0);
1607 		}
1608 		if (raidPtr->status != rf_rs_reconstructing)
1609 			*(int *) data = 100;
1610 		else {
1611 			if (raidPtr->reconControl->numRUsTotal > 0) {
1612 				*(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1613 			} else {
1614 				*(int *) data = 0;
1615 			}
1616 		}
1617 		return (0);
1618 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1619 		progressInfoPtr = (RF_ProgressInfo_t **) data;
1620 		if (raidPtr->status != rf_rs_reconstructing) {
1621 			progressInfo.remaining = 0;
1622 			progressInfo.completed = 100;
1623 			progressInfo.total = 100;
1624 		} else {
1625 			progressInfo.total =
1626 				raidPtr->reconControl->numRUsTotal;
1627 			progressInfo.completed =
1628 				raidPtr->reconControl->numRUsComplete;
1629 			progressInfo.remaining = progressInfo.total -
1630 				progressInfo.completed;
1631 		}
1632 		retcode = copyout(&progressInfo, *progressInfoPtr,
1633 				  sizeof(RF_ProgressInfo_t));
1634 		return (retcode);
1635 
1636 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1637 		if (raidPtr->Layout.map->faultsTolerated == 0) {
1638 			/* This makes no sense on a RAID 0, so tell the
1639 			   user it's done. */
1640 			*(int *) data = 100;
1641 			return(0);
1642 		}
1643 		if (raidPtr->parity_rewrite_in_progress == 1) {
1644 			*(int *) data = 100 *
1645 				raidPtr->parity_rewrite_stripes_done /
1646 				raidPtr->Layout.numStripe;
1647 		} else {
1648 			*(int *) data = 100;
1649 		}
1650 		return (0);
1651 
1652 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1653 		progressInfoPtr = (RF_ProgressInfo_t **) data;
1654 		if (raidPtr->parity_rewrite_in_progress == 1) {
1655 			progressInfo.total = raidPtr->Layout.numStripe;
1656 			progressInfo.completed =
1657 				raidPtr->parity_rewrite_stripes_done;
1658 			progressInfo.remaining = progressInfo.total -
1659 				progressInfo.completed;
1660 		} else {
1661 			progressInfo.remaining = 0;
1662 			progressInfo.completed = 100;
1663 			progressInfo.total = 100;
1664 		}
1665 		retcode = copyout(&progressInfo, *progressInfoPtr,
1666 				  sizeof(RF_ProgressInfo_t));
1667 		return (retcode);
1668 
1669 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
1670 		if (raidPtr->Layout.map->faultsTolerated == 0) {
1671 			/* This makes no sense on a RAID 0 */
1672 			*(int *) data = 100;
1673 			return(0);
1674 		}
1675 		if (raidPtr->copyback_in_progress == 1) {
1676 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
1677 				raidPtr->Layout.numStripe;
1678 		} else {
1679 			*(int *) data = 100;
1680 		}
1681 		return (0);
1682 
1683 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1684 		progressInfoPtr = (RF_ProgressInfo_t **) data;
1685 		if (raidPtr->copyback_in_progress == 1) {
1686 			progressInfo.total = raidPtr->Layout.numStripe;
1687 			progressInfo.completed =
1688 				raidPtr->copyback_stripes_done;
1689 			progressInfo.remaining = progressInfo.total -
1690 				progressInfo.completed;
1691 		} else {
1692 			progressInfo.remaining = 0;
1693 			progressInfo.completed = 100;
1694 			progressInfo.total = 100;
1695 		}
1696 		retcode = copyout(&progressInfo, *progressInfoPtr,
1697 				  sizeof(RF_ProgressInfo_t));
1698 		return (retcode);
1699 
1700 		/* the sparetable daemon calls this to wait for the kernel to
1701 		 * need a spare table. this ioctl does not return until a
1702 		 * spare table is needed. XXX -- calling mpsleep here in the
1703 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1704 		 * -- I should either compute the spare table in the kernel,
1705 		 * or have a different -- XXX XXX -- interface (a different
1706 		 * character device) for delivering the table     -- XXX */
1707 #if 0
1708 	case RAIDFRAME_SPARET_WAIT:
1709 		rf_lock_mutex2(rf_sparet_wait_mutex);
1710 		while (!rf_sparet_wait_queue)
1711 			rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1712 		waitreq = rf_sparet_wait_queue;
1713 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1714 		rf_unlock_mutex2(rf_sparet_wait_mutex);
1715 
1716 		/* structure assignment */
1717 		*((RF_SparetWait_t *) data) = *waitreq;
1718 
1719 		RF_Free(waitreq, sizeof(*waitreq));
1720 		return (0);
1721 
1722 		/* wakes up a process waiting on SPARET_WAIT and puts an error
1723 		 * code in it that will cause the dameon to exit */
1724 	case RAIDFRAME_ABORT_SPARET_WAIT:
1725 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1726 		waitreq->fcol = -1;
1727 		rf_lock_mutex2(rf_sparet_wait_mutex);
1728 		waitreq->next = rf_sparet_wait_queue;
1729 		rf_sparet_wait_queue = waitreq;
1730 		rf_broadcast_conf2(rf_sparet_wait_cv);
1731 		rf_unlock_mutex2(rf_sparet_wait_mutex);
1732 		return (0);
1733 
1734 		/* used by the spare table daemon to deliver a spare table
1735 		 * into the kernel */
1736 	case RAIDFRAME_SEND_SPARET:
1737 
1738 		/* install the spare table */
1739 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1740 
1741 		/* respond to the requestor.  the return status of the spare
1742 		 * table installation is passed in the "fcol" field */
1743 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1744 		waitreq->fcol = retcode;
1745 		rf_lock_mutex2(rf_sparet_wait_mutex);
1746 		waitreq->next = rf_sparet_resp_queue;
1747 		rf_sparet_resp_queue = waitreq;
1748 		rf_broadcast_cond2(rf_sparet_resp_cv);
1749 		rf_unlock_mutex2(rf_sparet_wait_mutex);
1750 
1751 		return (retcode);
1752 #endif
1753 
1754 	default:
1755 		break; /* fall through to the os-specific code below */
1756 
1757 	}
1758 
1759 	if (!raidPtr->valid)
1760 		return (EINVAL);
1761 
1762 	/*
1763 	 * Add support for "regular" device ioctls here.
1764 	 */
1765 
1766 	error = disk_ioctl(&rs->sc_dkdev, cmd, data, flag, l);
1767 	if (error != EPASSTHROUGH)
1768 		return (error);
1769 
1770 	switch (cmd) {
1771 	case DIOCGDINFO:
1772 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1773 		break;
1774 #ifdef __HAVE_OLD_DISKLABEL
1775 	case ODIOCGDINFO:
1776 		newlabel = *(rs->sc_dkdev.dk_label);
1777 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1778 			return ENOTTY;
1779 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
1780 		break;
1781 #endif
1782 
1783 	case DIOCGPART:
1784 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1785 		((struct partinfo *) data)->part =
1786 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1787 		break;
1788 
1789 	case DIOCWDINFO:
1790 	case DIOCSDINFO:
1791 #ifdef __HAVE_OLD_DISKLABEL
1792 	case ODIOCWDINFO:
1793 	case ODIOCSDINFO:
1794 #endif
1795 	{
1796 		struct disklabel *lp;
1797 #ifdef __HAVE_OLD_DISKLABEL
1798 		if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1799 			memset(&newlabel, 0, sizeof newlabel);
1800 			memcpy(&newlabel, data, sizeof (struct olddisklabel));
1801 			lp = &newlabel;
1802 		} else
1803 #endif
1804 		lp = (struct disklabel *)data;
1805 
1806 		if ((error = raidlock(rs)) != 0)
1807 			return (error);
1808 
1809 		rs->sc_flags |= RAIDF_LABELLING;
1810 
1811 		error = setdisklabel(rs->sc_dkdev.dk_label,
1812 		    lp, 0, rs->sc_dkdev.dk_cpulabel);
1813 		if (error == 0) {
1814 			if (cmd == DIOCWDINFO
1815 #ifdef __HAVE_OLD_DISKLABEL
1816 			    || cmd == ODIOCWDINFO
1817 #endif
1818 			   )
1819 				error = writedisklabel(RAIDLABELDEV(dev),
1820 				    raidstrategy, rs->sc_dkdev.dk_label,
1821 				    rs->sc_dkdev.dk_cpulabel);
1822 		}
1823 		rs->sc_flags &= ~RAIDF_LABELLING;
1824 
1825 		raidunlock(rs);
1826 
1827 		if (error)
1828 			return (error);
1829 		break;
1830 	}
1831 
1832 	case DIOCWLABEL:
1833 		if (*(int *) data != 0)
1834 			rs->sc_flags |= RAIDF_WLABEL;
1835 		else
1836 			rs->sc_flags &= ~RAIDF_WLABEL;
1837 		break;
1838 
1839 	case DIOCGDEFLABEL:
1840 		raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1841 		break;
1842 
1843 #ifdef __HAVE_OLD_DISKLABEL
1844 	case ODIOCGDEFLABEL:
1845 		raidgetdefaultlabel(raidPtr, rs, &newlabel);
1846 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1847 			return ENOTTY;
1848 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
1849 		break;
1850 #endif
1851 
1852 	case DIOCAWEDGE:
1853 	case DIOCDWEDGE:
1854 	    	dkw = (void *)data;
1855 
1856 		/* If the ioctl happens here, the parent is us. */
1857 		(void)strcpy(dkw->dkw_parent, rs->sc_xname);
1858 		return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw);
1859 
1860 	case DIOCLWEDGES:
1861 		return dkwedge_list(&rs->sc_dkdev,
1862 		    (struct dkwedge_list *)data, l);
1863 	case DIOCCACHESYNC:
1864 		return rf_sync_component_caches(raidPtr);
1865 
1866 	case DIOCGSTRATEGY:
1867 	    {
1868 		struct disk_strategy *dks = (void *)data;
1869 
1870 		s = splbio();
1871 		strlcpy(dks->dks_name, bufq_getstrategyname(rs->buf_queue),
1872 		    sizeof(dks->dks_name));
1873 		splx(s);
1874 		dks->dks_paramlen = 0;
1875 
1876 		return 0;
1877 	    }
1878 
1879 	case DIOCSSTRATEGY:
1880 	    {
1881 		struct disk_strategy *dks = (void *)data;
1882 		struct bufq_state *new;
1883 		struct bufq_state *old;
1884 
1885 		if (dks->dks_param != NULL) {
1886 			return EINVAL;
1887 		}
1888 		dks->dks_name[sizeof(dks->dks_name) - 1] = 0; /* ensure term */
1889 		error = bufq_alloc(&new, dks->dks_name,
1890 		    BUFQ_EXACT|BUFQ_SORT_RAWBLOCK);
1891 		if (error) {
1892 			return error;
1893 		}
1894 		s = splbio();
1895 		old = rs->buf_queue;
1896 		bufq_move(new, old);
1897 		rs->buf_queue = new;
1898 		splx(s);
1899 		bufq_free(old);
1900 
1901 		return 0;
1902 	    }
1903 
1904 	default:
1905 		retcode = ENOTTY;
1906 	}
1907 	return (retcode);
1908 
1909 }
1910 
1911 
1912 /* raidinit -- complete the rest of the initialization for the
1913    RAIDframe device.  */
1914 
1915 
1916 static void
1917 raidinit(struct raid_softc *rs)
1918 {
1919 	cfdata_t cf;
1920 	int     unit;
1921 	RF_Raid_t *raidPtr = &rs->sc_r;
1922 
1923 	unit = raidPtr->raidid;
1924 
1925 
1926 	/* XXX should check return code first... */
1927 	rs->sc_flags |= RAIDF_INITED;
1928 
1929 	/* XXX doesn't check bounds. */
1930 	snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1931 
1932 	/* attach the pseudo device */
1933 	cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1934 	cf->cf_name = raid_cd.cd_name;
1935 	cf->cf_atname = raid_cd.cd_name;
1936 	cf->cf_unit = unit;
1937 	cf->cf_fstate = FSTATE_STAR;
1938 
1939 	rs->sc_dev = config_attach_pseudo(cf);
1940 
1941 	if (rs->sc_dev == NULL) {
1942 		printf("raid%d: config_attach_pseudo failed\n",
1943 		    raidPtr->raidid);
1944 		rs->sc_flags &= ~RAIDF_INITED;
1945 		free(cf, M_RAIDFRAME);
1946 		return;
1947 	}
1948 
1949 	/* disk_attach actually creates space for the CPU disklabel, among
1950 	 * other things, so it's critical to call this *BEFORE* we try putzing
1951 	 * with disklabels. */
1952 
1953 	disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1954 	disk_attach(&rs->sc_dkdev);
1955 	disk_blocksize(&rs->sc_dkdev, raidPtr->bytesPerSector);
1956 
1957 	/* XXX There may be a weird interaction here between this, and
1958 	 * protectedSectors, as used in RAIDframe.  */
1959 
1960 	rs->sc_size = raidPtr->totalSectors;
1961 
1962 	dkwedge_discover(&rs->sc_dkdev);
1963 
1964 	rf_set_geometry(rs, raidPtr);
1965 
1966 }
1967 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1968 /* wake up the daemon & tell it to get us a spare table
1969  * XXX
1970  * the entries in the queues should be tagged with the raidPtr
1971  * so that in the extremely rare case that two recons happen at once,
1972  * we know for which device were requesting a spare table
1973  * XXX
1974  *
1975  * XXX This code is not currently used. GO
1976  */
1977 int
1978 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1979 {
1980 	int     retcode;
1981 
1982 	rf_lock_mutex2(rf_sparet_wait_mutex);
1983 	req->next = rf_sparet_wait_queue;
1984 	rf_sparet_wait_queue = req;
1985 	rf_broadcast_cond2(rf_sparet_wait_cv);
1986 
1987 	/* mpsleep unlocks the mutex */
1988 	while (!rf_sparet_resp_queue) {
1989 		rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
1990 	}
1991 	req = rf_sparet_resp_queue;
1992 	rf_sparet_resp_queue = req->next;
1993 	rf_unlock_mutex2(rf_sparet_wait_mutex);
1994 
1995 	retcode = req->fcol;
1996 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
1997 					 * alloc'd */
1998 	return (retcode);
1999 }
2000 #endif
2001 
2002 /* a wrapper around rf_DoAccess that extracts appropriate info from the
2003  * bp & passes it down.
2004  * any calls originating in the kernel must use non-blocking I/O
2005  * do some extra sanity checking to return "appropriate" error values for
2006  * certain conditions (to make some standard utilities work)
2007  *
2008  * Formerly known as: rf_DoAccessKernel
2009  */
2010 void
2011 raidstart(RF_Raid_t *raidPtr)
2012 {
2013 	RF_SectorCount_t num_blocks, pb, sum;
2014 	RF_RaidAddr_t raid_addr;
2015 	struct partition *pp;
2016 	daddr_t blocknum;
2017 	struct raid_softc *rs;
2018 	int     do_async;
2019 	struct buf *bp;
2020 	int rc;
2021 
2022 	rs = raidPtr->softc;
2023 	/* quick check to see if anything has died recently */
2024 	rf_lock_mutex2(raidPtr->mutex);
2025 	if (raidPtr->numNewFailures > 0) {
2026 		rf_unlock_mutex2(raidPtr->mutex);
2027 		rf_update_component_labels(raidPtr,
2028 					   RF_NORMAL_COMPONENT_UPDATE);
2029 		rf_lock_mutex2(raidPtr->mutex);
2030 		raidPtr->numNewFailures--;
2031 	}
2032 
2033 	/* Check to see if we're at the limit... */
2034 	while (raidPtr->openings > 0) {
2035 		rf_unlock_mutex2(raidPtr->mutex);
2036 
2037 		/* get the next item, if any, from the queue */
2038 		if ((bp = bufq_get(rs->buf_queue)) == NULL) {
2039 			/* nothing more to do */
2040 			return;
2041 		}
2042 
2043 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
2044 		 * partition.. Need to make it absolute to the underlying
2045 		 * device.. */
2046 
2047 		blocknum = bp->b_blkno << DEV_BSHIFT >> raidPtr->logBytesPerSector;
2048 		if (DISKPART(bp->b_dev) != RAW_PART) {
2049 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
2050 			blocknum += pp->p_offset;
2051 		}
2052 
2053 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
2054 			    (int) blocknum));
2055 
2056 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
2057 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
2058 
2059 		/* *THIS* is where we adjust what block we're going to...
2060 		 * but DO NOT TOUCH bp->b_blkno!!! */
2061 		raid_addr = blocknum;
2062 
2063 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
2064 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
2065 		sum = raid_addr + num_blocks + pb;
2066 		if (1 || rf_debugKernelAccess) {
2067 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
2068 				    (int) raid_addr, (int) sum, (int) num_blocks,
2069 				    (int) pb, (int) bp->b_resid));
2070 		}
2071 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
2072 		    || (sum < num_blocks) || (sum < pb)) {
2073 			bp->b_error = ENOSPC;
2074 			bp->b_resid = bp->b_bcount;
2075 			biodone(bp);
2076 			rf_lock_mutex2(raidPtr->mutex);
2077 			continue;
2078 		}
2079 		/*
2080 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
2081 		 */
2082 
2083 		if (bp->b_bcount & raidPtr->sectorMask) {
2084 			bp->b_error = EINVAL;
2085 			bp->b_resid = bp->b_bcount;
2086 			biodone(bp);
2087 			rf_lock_mutex2(raidPtr->mutex);
2088 			continue;
2089 
2090 		}
2091 		db1_printf(("Calling DoAccess..\n"));
2092 
2093 
2094 		rf_lock_mutex2(raidPtr->mutex);
2095 		raidPtr->openings--;
2096 		rf_unlock_mutex2(raidPtr->mutex);
2097 
2098 		/*
2099 		 * Everything is async.
2100 		 */
2101 		do_async = 1;
2102 
2103 		disk_busy(&rs->sc_dkdev);
2104 
2105 		/* XXX we're still at splbio() here... do we *really*
2106 		   need to be? */
2107 
2108 		/* don't ever condition on bp->b_flags & B_WRITE.
2109 		 * always condition on B_READ instead */
2110 
2111 		rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
2112 				 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
2113 				 do_async, raid_addr, num_blocks,
2114 				 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
2115 
2116 		if (rc) {
2117 			bp->b_error = rc;
2118 			bp->b_resid = bp->b_bcount;
2119 			biodone(bp);
2120 			/* continue loop */
2121 		}
2122 
2123 		rf_lock_mutex2(raidPtr->mutex);
2124 	}
2125 	rf_unlock_mutex2(raidPtr->mutex);
2126 }
2127 
2128 
2129 
2130 
2131 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
2132 
2133 int
2134 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
2135 {
2136 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2137 	struct buf *bp;
2138 
2139 	req->queue = queue;
2140 	bp = req->bp;
2141 
2142 	switch (req->type) {
2143 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
2144 		/* XXX need to do something extra here.. */
2145 		/* I'm leaving this in, as I've never actually seen it used,
2146 		 * and I'd like folks to report it... GO */
2147 		printf(("WAKEUP CALLED\n"));
2148 		queue->numOutstanding++;
2149 
2150 		bp->b_flags = 0;
2151 		bp->b_private = req;
2152 
2153 		KernelWakeupFunc(bp);
2154 		break;
2155 
2156 	case RF_IO_TYPE_READ:
2157 	case RF_IO_TYPE_WRITE:
2158 #if RF_ACC_TRACE > 0
2159 		if (req->tracerec) {
2160 			RF_ETIMER_START(req->tracerec->timer);
2161 		}
2162 #endif
2163 		InitBP(bp, queue->rf_cinfo->ci_vp,
2164 		    op, queue->rf_cinfo->ci_dev,
2165 		    req->sectorOffset, req->numSector,
2166 		    req->buf, KernelWakeupFunc, (void *) req,
2167 		    queue->raidPtr->logBytesPerSector, req->b_proc);
2168 
2169 		if (rf_debugKernelAccess) {
2170 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
2171 				(long) bp->b_blkno));
2172 		}
2173 		queue->numOutstanding++;
2174 		queue->last_deq_sector = req->sectorOffset;
2175 		/* acc wouldn't have been let in if there were any pending
2176 		 * reqs at any other priority */
2177 		queue->curPriority = req->priority;
2178 
2179 		db1_printf(("Going for %c to unit %d col %d\n",
2180 			    req->type, queue->raidPtr->raidid,
2181 			    queue->col));
2182 		db1_printf(("sector %d count %d (%d bytes) %d\n",
2183 			(int) req->sectorOffset, (int) req->numSector,
2184 			(int) (req->numSector <<
2185 			    queue->raidPtr->logBytesPerSector),
2186 			(int) queue->raidPtr->logBytesPerSector));
2187 
2188 		/*
2189 		 * XXX: drop lock here since this can block at
2190 		 * least with backing SCSI devices.  Retake it
2191 		 * to minimize fuss with calling interfaces.
2192 		 */
2193 
2194 		RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2195 		bdev_strategy(bp);
2196 		RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2197 		break;
2198 
2199 	default:
2200 		panic("bad req->type in rf_DispatchKernelIO");
2201 	}
2202 	db1_printf(("Exiting from DispatchKernelIO\n"));
2203 
2204 	return (0);
2205 }
2206 /* this is the callback function associated with a I/O invoked from
2207    kernel code.
2208  */
2209 static void
2210 KernelWakeupFunc(struct buf *bp)
2211 {
2212 	RF_DiskQueueData_t *req = NULL;
2213 	RF_DiskQueue_t *queue;
2214 
2215 	db1_printf(("recovering the request queue:\n"));
2216 
2217 	req = bp->b_private;
2218 
2219 	queue = (RF_DiskQueue_t *) req->queue;
2220 
2221 	rf_lock_mutex2(queue->raidPtr->iodone_lock);
2222 
2223 #if RF_ACC_TRACE > 0
2224 	if (req->tracerec) {
2225 		RF_ETIMER_STOP(req->tracerec->timer);
2226 		RF_ETIMER_EVAL(req->tracerec->timer);
2227 		rf_lock_mutex2(rf_tracing_mutex);
2228 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2229 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2230 		req->tracerec->num_phys_ios++;
2231 		rf_unlock_mutex2(rf_tracing_mutex);
2232 	}
2233 #endif
2234 
2235 	/* XXX Ok, let's get aggressive... If b_error is set, let's go
2236 	 * ballistic, and mark the component as hosed... */
2237 
2238 	if (bp->b_error != 0) {
2239 		/* Mark the disk as dead */
2240 		/* but only mark it once... */
2241 		/* and only if it wouldn't leave this RAID set
2242 		   completely broken */
2243 		if (((queue->raidPtr->Disks[queue->col].status ==
2244 		      rf_ds_optimal) ||
2245 		     (queue->raidPtr->Disks[queue->col].status ==
2246 		      rf_ds_used_spare)) &&
2247 		     (queue->raidPtr->numFailures <
2248 		      queue->raidPtr->Layout.map->faultsTolerated)) {
2249 			printf("raid%d: IO Error.  Marking %s as failed.\n",
2250 			       queue->raidPtr->raidid,
2251 			       queue->raidPtr->Disks[queue->col].devname);
2252 			queue->raidPtr->Disks[queue->col].status =
2253 			    rf_ds_failed;
2254 			queue->raidPtr->status = rf_rs_degraded;
2255 			queue->raidPtr->numFailures++;
2256 			queue->raidPtr->numNewFailures++;
2257 		} else {	/* Disk is already dead... */
2258 			/* printf("Disk already marked as dead!\n"); */
2259 		}
2260 
2261 	}
2262 
2263 	/* Fill in the error value */
2264 	req->error = bp->b_error;
2265 
2266 	/* Drop this one on the "finished" queue... */
2267 	TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2268 
2269 	/* Let the raidio thread know there is work to be done. */
2270 	rf_signal_cond2(queue->raidPtr->iodone_cv);
2271 
2272 	rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2273 }
2274 
2275 
2276 /*
2277  * initialize a buf structure for doing an I/O in the kernel.
2278  */
2279 static void
2280 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2281        RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2282        void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2283        struct proc *b_proc)
2284 {
2285 	/* bp->b_flags       = B_PHYS | rw_flag; */
2286 	bp->b_flags = rw_flag;	/* XXX need B_PHYS here too??? */
2287 	bp->b_oflags = 0;
2288 	bp->b_cflags = 0;
2289 	bp->b_bcount = numSect << logBytesPerSector;
2290 	bp->b_bufsize = bp->b_bcount;
2291 	bp->b_error = 0;
2292 	bp->b_dev = dev;
2293 	bp->b_data = bf;
2294 	bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2295 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
2296 	if (bp->b_bcount == 0) {
2297 		panic("bp->b_bcount is zero in InitBP!!");
2298 	}
2299 	bp->b_proc = b_proc;
2300 	bp->b_iodone = cbFunc;
2301 	bp->b_private = cbArg;
2302 }
2303 
2304 static void
2305 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2306 		    struct disklabel *lp)
2307 {
2308 	memset(lp, 0, sizeof(*lp));
2309 
2310 	/* fabricate a label... */
2311 	lp->d_secperunit = raidPtr->totalSectors;
2312 	lp->d_secsize = raidPtr->bytesPerSector;
2313 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2314 	lp->d_ntracks = 4 * raidPtr->numCol;
2315 	lp->d_ncylinders = raidPtr->totalSectors /
2316 		(lp->d_nsectors * lp->d_ntracks);
2317 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2318 
2319 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2320 	lp->d_type = DTYPE_RAID;
2321 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2322 	lp->d_rpm = 3600;
2323 	lp->d_interleave = 1;
2324 	lp->d_flags = 0;
2325 
2326 	lp->d_partitions[RAW_PART].p_offset = 0;
2327 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2328 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2329 	lp->d_npartitions = RAW_PART + 1;
2330 
2331 	lp->d_magic = DISKMAGIC;
2332 	lp->d_magic2 = DISKMAGIC;
2333 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2334 
2335 }
2336 /*
2337  * Read the disklabel from the raid device.  If one is not present, fake one
2338  * up.
2339  */
2340 static void
2341 raidgetdisklabel(dev_t dev)
2342 {
2343 	int     unit = raidunit(dev);
2344 	struct raid_softc *rs;
2345 	const char   *errstring;
2346 	struct disklabel *lp;
2347 	struct cpu_disklabel *clp;
2348 	RF_Raid_t *raidPtr;
2349 
2350 	if ((rs = raidget(unit)) == NULL)
2351 		return;
2352 
2353 	lp = rs->sc_dkdev.dk_label;
2354 	clp = rs->sc_dkdev.dk_cpulabel;
2355 
2356 	db1_printf(("Getting the disklabel...\n"));
2357 
2358 	memset(clp, 0, sizeof(*clp));
2359 
2360 	raidPtr = &rs->sc_r;
2361 
2362 	raidgetdefaultlabel(raidPtr, rs, lp);
2363 
2364 	/*
2365 	 * Call the generic disklabel extraction routine.
2366 	 */
2367 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2368 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2369 	if (errstring)
2370 		raidmakedisklabel(rs);
2371 	else {
2372 		int     i;
2373 		struct partition *pp;
2374 
2375 		/*
2376 		 * Sanity check whether the found disklabel is valid.
2377 		 *
2378 		 * This is necessary since total size of the raid device
2379 		 * may vary when an interleave is changed even though exactly
2380 		 * same components are used, and old disklabel may used
2381 		 * if that is found.
2382 		 */
2383 		if (lp->d_secperunit != rs->sc_size)
2384 			printf("raid%d: WARNING: %s: "
2385 			    "total sector size in disklabel (%" PRIu32 ") != "
2386 			    "the size of raid (%" PRIu64 ")\n", unit, rs->sc_xname,
2387 			    lp->d_secperunit, rs->sc_size);
2388 		for (i = 0; i < lp->d_npartitions; i++) {
2389 			pp = &lp->d_partitions[i];
2390 			if (pp->p_offset + pp->p_size > rs->sc_size)
2391 				printf("raid%d: WARNING: %s: end of partition `%c' "
2392 				       "exceeds the size of raid (%" PRIu64 ")\n",
2393 				       unit, rs->sc_xname, 'a' + i, rs->sc_size);
2394 		}
2395 	}
2396 
2397 }
2398 /*
2399  * Take care of things one might want to take care of in the event
2400  * that a disklabel isn't present.
2401  */
2402 static void
2403 raidmakedisklabel(struct raid_softc *rs)
2404 {
2405 	struct disklabel *lp = rs->sc_dkdev.dk_label;
2406 	db1_printf(("Making a label..\n"));
2407 
2408 	/*
2409 	 * For historical reasons, if there's no disklabel present
2410 	 * the raw partition must be marked FS_BSDFFS.
2411 	 */
2412 
2413 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2414 
2415 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2416 
2417 	lp->d_checksum = dkcksum(lp);
2418 }
2419 /*
2420  * Wait interruptibly for an exclusive lock.
2421  *
2422  * XXX
2423  * Several drivers do this; it should be abstracted and made MP-safe.
2424  * (Hmm... where have we seen this warning before :->  GO )
2425  */
2426 static int
2427 raidlock(struct raid_softc *rs)
2428 {
2429 	int     error;
2430 
2431 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2432 		rs->sc_flags |= RAIDF_WANTED;
2433 		if ((error =
2434 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2435 			return (error);
2436 	}
2437 	rs->sc_flags |= RAIDF_LOCKED;
2438 	return (0);
2439 }
2440 /*
2441  * Unlock and wake up any waiters.
2442  */
2443 static void
2444 raidunlock(struct raid_softc *rs)
2445 {
2446 
2447 	rs->sc_flags &= ~RAIDF_LOCKED;
2448 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2449 		rs->sc_flags &= ~RAIDF_WANTED;
2450 		wakeup(rs);
2451 	}
2452 }
2453 
2454 
2455 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
2456 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
2457 #define RF_PARITY_MAP_SIZE   RF_PARITYMAP_NBYTE
2458 
2459 static daddr_t
2460 rf_component_info_offset(void)
2461 {
2462 
2463 	return RF_COMPONENT_INFO_OFFSET;
2464 }
2465 
2466 static daddr_t
2467 rf_component_info_size(unsigned secsize)
2468 {
2469 	daddr_t info_size;
2470 
2471 	KASSERT(secsize);
2472 	if (secsize > RF_COMPONENT_INFO_SIZE)
2473 		info_size = secsize;
2474 	else
2475 		info_size = RF_COMPONENT_INFO_SIZE;
2476 
2477 	return info_size;
2478 }
2479 
2480 static daddr_t
2481 rf_parity_map_offset(RF_Raid_t *raidPtr)
2482 {
2483 	daddr_t map_offset;
2484 
2485 	KASSERT(raidPtr->bytesPerSector);
2486 	if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2487 		map_offset = raidPtr->bytesPerSector;
2488 	else
2489 		map_offset = RF_COMPONENT_INFO_SIZE;
2490 	map_offset += rf_component_info_offset();
2491 
2492 	return map_offset;
2493 }
2494 
2495 static daddr_t
2496 rf_parity_map_size(RF_Raid_t *raidPtr)
2497 {
2498 	daddr_t map_size;
2499 
2500 	if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2501 		map_size = raidPtr->bytesPerSector;
2502 	else
2503 		map_size = RF_PARITY_MAP_SIZE;
2504 
2505 	return map_size;
2506 }
2507 
2508 int
2509 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2510 {
2511 	RF_ComponentLabel_t *clabel;
2512 
2513 	clabel = raidget_component_label(raidPtr, col);
2514 	clabel->clean = RF_RAID_CLEAN;
2515 	raidflush_component_label(raidPtr, col);
2516 	return(0);
2517 }
2518 
2519 
2520 int
2521 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2522 {
2523 	RF_ComponentLabel_t *clabel;
2524 
2525 	clabel = raidget_component_label(raidPtr, col);
2526 	clabel->clean = RF_RAID_DIRTY;
2527 	raidflush_component_label(raidPtr, col);
2528 	return(0);
2529 }
2530 
2531 int
2532 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2533 {
2534 	KASSERT(raidPtr->bytesPerSector);
2535 	return raidread_component_label(raidPtr->bytesPerSector,
2536 	    raidPtr->Disks[col].dev,
2537 	    raidPtr->raid_cinfo[col].ci_vp,
2538 	    &raidPtr->raid_cinfo[col].ci_label);
2539 }
2540 
2541 RF_ComponentLabel_t *
2542 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2543 {
2544 	return &raidPtr->raid_cinfo[col].ci_label;
2545 }
2546 
2547 int
2548 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2549 {
2550 	RF_ComponentLabel_t *label;
2551 
2552 	label = &raidPtr->raid_cinfo[col].ci_label;
2553 	label->mod_counter = raidPtr->mod_counter;
2554 #ifndef RF_NO_PARITY_MAP
2555 	label->parity_map_modcount = label->mod_counter;
2556 #endif
2557 	return raidwrite_component_label(raidPtr->bytesPerSector,
2558 	    raidPtr->Disks[col].dev,
2559 	    raidPtr->raid_cinfo[col].ci_vp, label);
2560 }
2561 
2562 
2563 static int
2564 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2565     RF_ComponentLabel_t *clabel)
2566 {
2567 	return raidread_component_area(dev, b_vp, clabel,
2568 	    sizeof(RF_ComponentLabel_t),
2569 	    rf_component_info_offset(),
2570 	    rf_component_info_size(secsize));
2571 }
2572 
2573 /* ARGSUSED */
2574 static int
2575 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2576     size_t msize, daddr_t offset, daddr_t dsize)
2577 {
2578 	struct buf *bp;
2579 	const struct bdevsw *bdev;
2580 	int error;
2581 
2582 	/* XXX should probably ensure that we don't try to do this if
2583 	   someone has changed rf_protected_sectors. */
2584 
2585 	if (b_vp == NULL) {
2586 		/* For whatever reason, this component is not valid.
2587 		   Don't try to read a component label from it. */
2588 		return(EINVAL);
2589 	}
2590 
2591 	/* get a block of the appropriate size... */
2592 	bp = geteblk((int)dsize);
2593 	bp->b_dev = dev;
2594 
2595 	/* get our ducks in a row for the read */
2596 	bp->b_blkno = offset / DEV_BSIZE;
2597 	bp->b_bcount = dsize;
2598 	bp->b_flags |= B_READ;
2599  	bp->b_resid = dsize;
2600 
2601 	bdev = bdevsw_lookup(bp->b_dev);
2602 	if (bdev == NULL)
2603 		return (ENXIO);
2604 	(*bdev->d_strategy)(bp);
2605 
2606 	error = biowait(bp);
2607 
2608 	if (!error) {
2609 		memcpy(data, bp->b_data, msize);
2610 	}
2611 
2612 	brelse(bp, 0);
2613 	return(error);
2614 }
2615 
2616 
2617 static int
2618 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2619     RF_ComponentLabel_t *clabel)
2620 {
2621 	return raidwrite_component_area(dev, b_vp, clabel,
2622 	    sizeof(RF_ComponentLabel_t),
2623 	    rf_component_info_offset(),
2624 	    rf_component_info_size(secsize), 0);
2625 }
2626 
2627 /* ARGSUSED */
2628 static int
2629 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2630     size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2631 {
2632 	struct buf *bp;
2633 	const struct bdevsw *bdev;
2634 	int error;
2635 
2636 	/* get a block of the appropriate size... */
2637 	bp = geteblk((int)dsize);
2638 	bp->b_dev = dev;
2639 
2640 	/* get our ducks in a row for the write */
2641 	bp->b_blkno = offset / DEV_BSIZE;
2642 	bp->b_bcount = dsize;
2643 	bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2644  	bp->b_resid = dsize;
2645 
2646 	memset(bp->b_data, 0, dsize);
2647 	memcpy(bp->b_data, data, msize);
2648 
2649 	bdev = bdevsw_lookup(bp->b_dev);
2650 	if (bdev == NULL)
2651 		return (ENXIO);
2652 	(*bdev->d_strategy)(bp);
2653 	if (asyncp)
2654 		return 0;
2655 	error = biowait(bp);
2656 	brelse(bp, 0);
2657 	if (error) {
2658 #if 1
2659 		printf("Failed to write RAID component info!\n");
2660 #endif
2661 	}
2662 
2663 	return(error);
2664 }
2665 
2666 void
2667 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2668 {
2669 	int c;
2670 
2671 	for (c = 0; c < raidPtr->numCol; c++) {
2672 		/* Skip dead disks. */
2673 		if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2674 			continue;
2675 		/* XXXjld: what if an error occurs here? */
2676 		raidwrite_component_area(raidPtr->Disks[c].dev,
2677 		    raidPtr->raid_cinfo[c].ci_vp, map,
2678 		    RF_PARITYMAP_NBYTE,
2679 		    rf_parity_map_offset(raidPtr),
2680 		    rf_parity_map_size(raidPtr), 0);
2681 	}
2682 }
2683 
2684 void
2685 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2686 {
2687 	struct rf_paritymap_ondisk tmp;
2688 	int c,first;
2689 
2690 	first=1;
2691 	for (c = 0; c < raidPtr->numCol; c++) {
2692 		/* Skip dead disks. */
2693 		if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2694 			continue;
2695 		raidread_component_area(raidPtr->Disks[c].dev,
2696 		    raidPtr->raid_cinfo[c].ci_vp, &tmp,
2697 		    RF_PARITYMAP_NBYTE,
2698 		    rf_parity_map_offset(raidPtr),
2699 		    rf_parity_map_size(raidPtr));
2700 		if (first) {
2701 			memcpy(map, &tmp, sizeof(*map));
2702 			first = 0;
2703 		} else {
2704 			rf_paritymap_merge(map, &tmp);
2705 		}
2706 	}
2707 }
2708 
2709 void
2710 rf_markalldirty(RF_Raid_t *raidPtr)
2711 {
2712 	RF_ComponentLabel_t *clabel;
2713 	int sparecol;
2714 	int c;
2715 	int j;
2716 	int scol = -1;
2717 
2718 	raidPtr->mod_counter++;
2719 	for (c = 0; c < raidPtr->numCol; c++) {
2720 		/* we don't want to touch (at all) a disk that has
2721 		   failed */
2722 		if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2723 			clabel = raidget_component_label(raidPtr, c);
2724 			if (clabel->status == rf_ds_spared) {
2725 				/* XXX do something special...
2726 				   but whatever you do, don't
2727 				   try to access it!! */
2728 			} else {
2729 				raidmarkdirty(raidPtr, c);
2730 			}
2731 		}
2732 	}
2733 
2734 	for( c = 0; c < raidPtr->numSpare ; c++) {
2735 		sparecol = raidPtr->numCol + c;
2736 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2737 			/*
2738 
2739 			   we claim this disk is "optimal" if it's
2740 			   rf_ds_used_spare, as that means it should be
2741 			   directly substitutable for the disk it replaced.
2742 			   We note that too...
2743 
2744 			 */
2745 
2746 			for(j=0;j<raidPtr->numCol;j++) {
2747 				if (raidPtr->Disks[j].spareCol == sparecol) {
2748 					scol = j;
2749 					break;
2750 				}
2751 			}
2752 
2753 			clabel = raidget_component_label(raidPtr, sparecol);
2754 			/* make sure status is noted */
2755 
2756 			raid_init_component_label(raidPtr, clabel);
2757 
2758 			clabel->row = 0;
2759 			clabel->column = scol;
2760 			/* Note: we *don't* change status from rf_ds_used_spare
2761 			   to rf_ds_optimal */
2762 			/* clabel.status = rf_ds_optimal; */
2763 
2764 			raidmarkdirty(raidPtr, sparecol);
2765 		}
2766 	}
2767 }
2768 
2769 
2770 void
2771 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2772 {
2773 	RF_ComponentLabel_t *clabel;
2774 	int sparecol;
2775 	int c;
2776 	int j;
2777 	int scol;
2778 
2779 	scol = -1;
2780 
2781 	/* XXX should do extra checks to make sure things really are clean,
2782 	   rather than blindly setting the clean bit... */
2783 
2784 	raidPtr->mod_counter++;
2785 
2786 	for (c = 0; c < raidPtr->numCol; c++) {
2787 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
2788 			clabel = raidget_component_label(raidPtr, c);
2789 			/* make sure status is noted */
2790 			clabel->status = rf_ds_optimal;
2791 
2792 			/* note what unit we are configured as */
2793 			clabel->last_unit = raidPtr->raidid;
2794 
2795 			raidflush_component_label(raidPtr, c);
2796 			if (final == RF_FINAL_COMPONENT_UPDATE) {
2797 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
2798 					raidmarkclean(raidPtr, c);
2799 				}
2800 			}
2801 		}
2802 		/* else we don't touch it.. */
2803 	}
2804 
2805 	for( c = 0; c < raidPtr->numSpare ; c++) {
2806 		sparecol = raidPtr->numCol + c;
2807 		/* Need to ensure that the reconstruct actually completed! */
2808 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2809 			/*
2810 
2811 			   we claim this disk is "optimal" if it's
2812 			   rf_ds_used_spare, as that means it should be
2813 			   directly substitutable for the disk it replaced.
2814 			   We note that too...
2815 
2816 			 */
2817 
2818 			for(j=0;j<raidPtr->numCol;j++) {
2819 				if (raidPtr->Disks[j].spareCol == sparecol) {
2820 					scol = j;
2821 					break;
2822 				}
2823 			}
2824 
2825 			/* XXX shouldn't *really* need this... */
2826 			clabel = raidget_component_label(raidPtr, sparecol);
2827 			/* make sure status is noted */
2828 
2829 			raid_init_component_label(raidPtr, clabel);
2830 
2831 			clabel->column = scol;
2832 			clabel->status = rf_ds_optimal;
2833 			clabel->last_unit = raidPtr->raidid;
2834 
2835 			raidflush_component_label(raidPtr, sparecol);
2836 			if (final == RF_FINAL_COMPONENT_UPDATE) {
2837 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
2838 					raidmarkclean(raidPtr, sparecol);
2839 				}
2840 			}
2841 		}
2842 	}
2843 }
2844 
2845 void
2846 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2847 {
2848 
2849 	if (vp != NULL) {
2850 		if (auto_configured == 1) {
2851 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2852 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2853 			vput(vp);
2854 
2855 		} else {
2856 			(void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2857 		}
2858 	}
2859 }
2860 
2861 
2862 void
2863 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2864 {
2865 	int r,c;
2866 	struct vnode *vp;
2867 	int acd;
2868 
2869 
2870 	/* We take this opportunity to close the vnodes like we should.. */
2871 
2872 	for (c = 0; c < raidPtr->numCol; c++) {
2873 		vp = raidPtr->raid_cinfo[c].ci_vp;
2874 		acd = raidPtr->Disks[c].auto_configured;
2875 		rf_close_component(raidPtr, vp, acd);
2876 		raidPtr->raid_cinfo[c].ci_vp = NULL;
2877 		raidPtr->Disks[c].auto_configured = 0;
2878 	}
2879 
2880 	for (r = 0; r < raidPtr->numSpare; r++) {
2881 		vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2882 		acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2883 		rf_close_component(raidPtr, vp, acd);
2884 		raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2885 		raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2886 	}
2887 }
2888 
2889 
2890 void
2891 rf_ReconThread(struct rf_recon_req *req)
2892 {
2893 	int     s;
2894 	RF_Raid_t *raidPtr;
2895 
2896 	s = splbio();
2897 	raidPtr = (RF_Raid_t *) req->raidPtr;
2898 	raidPtr->recon_in_progress = 1;
2899 
2900 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2901 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2902 
2903 	RF_Free(req, sizeof(*req));
2904 
2905 	raidPtr->recon_in_progress = 0;
2906 	splx(s);
2907 
2908 	/* That's all... */
2909 	kthread_exit(0);	/* does not return */
2910 }
2911 
2912 void
2913 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2914 {
2915 	int retcode;
2916 	int s;
2917 
2918 	raidPtr->parity_rewrite_stripes_done = 0;
2919 	raidPtr->parity_rewrite_in_progress = 1;
2920 	s = splbio();
2921 	retcode = rf_RewriteParity(raidPtr);
2922 	splx(s);
2923 	if (retcode) {
2924 		printf("raid%d: Error re-writing parity (%d)!\n",
2925 		    raidPtr->raidid, retcode);
2926 	} else {
2927 		/* set the clean bit!  If we shutdown correctly,
2928 		   the clean bit on each component label will get
2929 		   set */
2930 		raidPtr->parity_good = RF_RAID_CLEAN;
2931 	}
2932 	raidPtr->parity_rewrite_in_progress = 0;
2933 
2934 	/* Anyone waiting for us to stop?  If so, inform them... */
2935 	if (raidPtr->waitShutdown) {
2936 		wakeup(&raidPtr->parity_rewrite_in_progress);
2937 	}
2938 
2939 	/* That's all... */
2940 	kthread_exit(0);	/* does not return */
2941 }
2942 
2943 
2944 void
2945 rf_CopybackThread(RF_Raid_t *raidPtr)
2946 {
2947 	int s;
2948 
2949 	raidPtr->copyback_in_progress = 1;
2950 	s = splbio();
2951 	rf_CopybackReconstructedData(raidPtr);
2952 	splx(s);
2953 	raidPtr->copyback_in_progress = 0;
2954 
2955 	/* That's all... */
2956 	kthread_exit(0);	/* does not return */
2957 }
2958 
2959 
2960 void
2961 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
2962 {
2963 	int s;
2964 	RF_Raid_t *raidPtr;
2965 
2966 	s = splbio();
2967 	raidPtr = req->raidPtr;
2968 	raidPtr->recon_in_progress = 1;
2969 	rf_ReconstructInPlace(raidPtr, req->col);
2970 	RF_Free(req, sizeof(*req));
2971 	raidPtr->recon_in_progress = 0;
2972 	splx(s);
2973 
2974 	/* That's all... */
2975 	kthread_exit(0);	/* does not return */
2976 }
2977 
2978 static RF_AutoConfig_t *
2979 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
2980     const char *cname, RF_SectorCount_t size, uint64_t numsecs,
2981     unsigned secsize)
2982 {
2983 	int good_one = 0;
2984 	RF_ComponentLabel_t *clabel;
2985 	RF_AutoConfig_t *ac;
2986 
2987 	clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
2988 	if (clabel == NULL) {
2989 oomem:
2990 		    while(ac_list) {
2991 			    ac = ac_list;
2992 			    if (ac->clabel)
2993 				    free(ac->clabel, M_RAIDFRAME);
2994 			    ac_list = ac_list->next;
2995 			    free(ac, M_RAIDFRAME);
2996 		    }
2997 		    printf("RAID auto config: out of memory!\n");
2998 		    return NULL; /* XXX probably should panic? */
2999 	}
3000 
3001 	if (!raidread_component_label(secsize, dev, vp, clabel)) {
3002 		/* Got the label.  Does it look reasonable? */
3003 		if (rf_reasonable_label(clabel, numsecs) &&
3004 		    (rf_component_label_partitionsize(clabel) <= size)) {
3005 #ifdef DEBUG
3006 			printf("Component on: %s: %llu\n",
3007 				cname, (unsigned long long)size);
3008 			rf_print_component_label(clabel);
3009 #endif
3010 			/* if it's reasonable, add it, else ignore it. */
3011 			ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
3012 				M_NOWAIT);
3013 			if (ac == NULL) {
3014 				free(clabel, M_RAIDFRAME);
3015 				goto oomem;
3016 			}
3017 			strlcpy(ac->devname, cname, sizeof(ac->devname));
3018 			ac->dev = dev;
3019 			ac->vp = vp;
3020 			ac->clabel = clabel;
3021 			ac->next = ac_list;
3022 			ac_list = ac;
3023 			good_one = 1;
3024 		}
3025 	}
3026 	if (!good_one) {
3027 		/* cleanup */
3028 		free(clabel, M_RAIDFRAME);
3029 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3030 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3031 		vput(vp);
3032 	}
3033 	return ac_list;
3034 }
3035 
3036 RF_AutoConfig_t *
3037 rf_find_raid_components(void)
3038 {
3039 	struct vnode *vp;
3040 	struct disklabel label;
3041 	device_t dv;
3042 	deviter_t di;
3043 	dev_t dev;
3044 	int bmajor, bminor, wedge, rf_part_found;
3045 	int error;
3046 	int i;
3047 	RF_AutoConfig_t *ac_list;
3048 	uint64_t numsecs;
3049 	unsigned secsize;
3050 
3051 	/* initialize the AutoConfig list */
3052 	ac_list = NULL;
3053 
3054 	/* we begin by trolling through *all* the devices on the system */
3055 
3056 	for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
3057 	     dv = deviter_next(&di)) {
3058 
3059 		/* we are only interested in disks... */
3060 		if (device_class(dv) != DV_DISK)
3061 			continue;
3062 
3063 		/* we don't care about floppies... */
3064 		if (device_is_a(dv, "fd")) {
3065 			continue;
3066 		}
3067 
3068 		/* we don't care about CD's... */
3069 		if (device_is_a(dv, "cd")) {
3070 			continue;
3071 		}
3072 
3073 		/* we don't care about md's... */
3074 		if (device_is_a(dv, "md")) {
3075 			continue;
3076 		}
3077 
3078 		/* hdfd is the Atari/Hades floppy driver */
3079 		if (device_is_a(dv, "hdfd")) {
3080 			continue;
3081 		}
3082 
3083 		/* fdisa is the Atari/Milan floppy driver */
3084 		if (device_is_a(dv, "fdisa")) {
3085 			continue;
3086 		}
3087 
3088 		/* need to find the device_name_to_block_device_major stuff */
3089 		bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
3090 
3091 		rf_part_found = 0; /*No raid partition as yet*/
3092 
3093 		/* get a vnode for the raw partition of this disk */
3094 
3095 		wedge = device_is_a(dv, "dk");
3096 		bminor = minor(device_unit(dv));
3097 		dev = wedge ? makedev(bmajor, bminor) :
3098 		    MAKEDISKDEV(bmajor, bminor, RAW_PART);
3099 		if (bdevvp(dev, &vp))
3100 			panic("RAID can't alloc vnode");
3101 
3102 		error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
3103 
3104 		if (error) {
3105 			/* "Who cares."  Continue looking
3106 			   for something that exists*/
3107 			vput(vp);
3108 			continue;
3109 		}
3110 
3111 		error = getdisksize(vp, &numsecs, &secsize);
3112 		if (error) {
3113 			vput(vp);
3114 			continue;
3115 		}
3116 		if (wedge) {
3117 			struct dkwedge_info dkw;
3118 			error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
3119 			    NOCRED);
3120 			if (error) {
3121 				printf("RAIDframe: can't get wedge info for "
3122 				    "dev %s (%d)\n", device_xname(dv), error);
3123 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3124 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3125 				vput(vp);
3126 				continue;
3127 			}
3128 
3129 			if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
3130 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3131 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3132 				vput(vp);
3133 				continue;
3134 			}
3135 
3136 			ac_list = rf_get_component(ac_list, dev, vp,
3137 			    device_xname(dv), dkw.dkw_size, numsecs, secsize);
3138 			rf_part_found = 1; /*There is a raid component on this disk*/
3139 			continue;
3140 		}
3141 
3142 		/* Ok, the disk exists.  Go get the disklabel. */
3143 		error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
3144 		if (error) {
3145 			/*
3146 			 * XXX can't happen - open() would
3147 			 * have errored out (or faked up one)
3148 			 */
3149 			if (error != ENOTTY)
3150 				printf("RAIDframe: can't get label for dev "
3151 				    "%s (%d)\n", device_xname(dv), error);
3152 		}
3153 
3154 		/* don't need this any more.  We'll allocate it again
3155 		   a little later if we really do... */
3156 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3157 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3158 		vput(vp);
3159 
3160 		if (error)
3161 			continue;
3162 
3163 		rf_part_found = 0; /*No raid partitions yet*/
3164 		for (i = 0; i < label.d_npartitions; i++) {
3165 			char cname[sizeof(ac_list->devname)];
3166 
3167 			/* We only support partitions marked as RAID */
3168 			if (label.d_partitions[i].p_fstype != FS_RAID)
3169 				continue;
3170 
3171 			dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
3172 			if (bdevvp(dev, &vp))
3173 				panic("RAID can't alloc vnode");
3174 
3175 			error = VOP_OPEN(vp, FREAD, NOCRED);
3176 			if (error) {
3177 				/* Whatever... */
3178 				vput(vp);
3179 				continue;
3180 			}
3181 			snprintf(cname, sizeof(cname), "%s%c",
3182 			    device_xname(dv), 'a' + i);
3183 			ac_list = rf_get_component(ac_list, dev, vp, cname,
3184 				label.d_partitions[i].p_size, numsecs, secsize);
3185 				rf_part_found = 1; /*There is at least one raid partition on this disk*/
3186 		}
3187 
3188 		/*
3189 		 *If there is no raid component on this disk, either in a
3190 		 *disklabel or inside a wedge, check the raw partition as well,
3191 		 *as it is possible to configure raid components on raw disk
3192 		 *devices.
3193 		 */
3194 
3195 		if (!rf_part_found) {
3196 			char cname[sizeof(ac_list->devname)];
3197 
3198 			dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
3199 			if (bdevvp(dev, &vp))
3200 				panic("RAID can't alloc vnode");
3201 
3202 			error = VOP_OPEN(vp, FREAD, NOCRED);
3203 			if (error) {
3204 				/* Whatever... */
3205 				vput(vp);
3206 				continue;
3207 			}
3208 			snprintf(cname, sizeof(cname), "%s%c",
3209 			    device_xname(dv), 'a' + RAW_PART);
3210 			ac_list = rf_get_component(ac_list, dev, vp, cname,
3211 				label.d_partitions[RAW_PART].p_size, numsecs, secsize);
3212 		}
3213 	}
3214 	deviter_release(&di);
3215 	return ac_list;
3216 }
3217 
3218 
3219 int
3220 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3221 {
3222 
3223 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
3224 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
3225 	    ((clabel->clean == RF_RAID_CLEAN) ||
3226 	     (clabel->clean == RF_RAID_DIRTY)) &&
3227 	    clabel->row >=0 &&
3228 	    clabel->column >= 0 &&
3229 	    clabel->num_rows > 0 &&
3230 	    clabel->num_columns > 0 &&
3231 	    clabel->row < clabel->num_rows &&
3232 	    clabel->column < clabel->num_columns &&
3233 	    clabel->blockSize > 0 &&
3234 	    /*
3235 	     * numBlocksHi may contain garbage, but it is ok since
3236 	     * the type is unsigned.  If it is really garbage,
3237 	     * rf_fix_old_label_size() will fix it.
3238 	     */
3239 	    rf_component_label_numblocks(clabel) > 0) {
3240 		/*
3241 		 * label looks reasonable enough...
3242 		 * let's make sure it has no old garbage.
3243 		 */
3244 		if (numsecs)
3245 			rf_fix_old_label_size(clabel, numsecs);
3246 		return(1);
3247 	}
3248 	return(0);
3249 }
3250 
3251 
3252 /*
3253  * For reasons yet unknown, some old component labels have garbage in
3254  * the newer numBlocksHi region, and this causes lossage.  Since those
3255  * disks will also have numsecs set to less than 32 bits of sectors,
3256  * we can determine when this corruption has occurred, and fix it.
3257  *
3258  * The exact same problem, with the same unknown reason, happens to
3259  * the partitionSizeHi member as well.
3260  */
3261 static void
3262 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3263 {
3264 
3265 	if (numsecs < ((uint64_t)1 << 32)) {
3266 		if (clabel->numBlocksHi) {
3267 			printf("WARNING: total sectors < 32 bits, yet "
3268 			       "numBlocksHi set\n"
3269 			       "WARNING: resetting numBlocksHi to zero.\n");
3270 			clabel->numBlocksHi = 0;
3271 		}
3272 
3273 		if (clabel->partitionSizeHi) {
3274 			printf("WARNING: total sectors < 32 bits, yet "
3275 			       "partitionSizeHi set\n"
3276 			       "WARNING: resetting partitionSizeHi to zero.\n");
3277 			clabel->partitionSizeHi = 0;
3278 		}
3279 	}
3280 }
3281 
3282 
3283 #ifdef DEBUG
3284 void
3285 rf_print_component_label(RF_ComponentLabel_t *clabel)
3286 {
3287 	uint64_t numBlocks;
3288 
3289 	numBlocks = rf_component_label_numblocks(clabel);
3290 
3291 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3292 	       clabel->row, clabel->column,
3293 	       clabel->num_rows, clabel->num_columns);
3294 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
3295 	       clabel->version, clabel->serial_number,
3296 	       clabel->mod_counter);
3297 	printf("   Clean: %s Status: %d\n",
3298 	       clabel->clean ? "Yes" : "No", clabel->status);
3299 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3300 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3301 	printf("   RAID Level: %c  blocksize: %d numBlocks: %"PRIu64"\n",
3302 	       (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3303 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3304 	printf("   Contains root partition: %s\n",
3305 	       clabel->root_partition ? "Yes" : "No");
3306 	printf("   Last configured as: raid%d\n", clabel->last_unit);
3307 #if 0
3308 	   printf("   Config order: %d\n", clabel->config_order);
3309 #endif
3310 
3311 }
3312 #endif
3313 
3314 RF_ConfigSet_t *
3315 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3316 {
3317 	RF_AutoConfig_t *ac;
3318 	RF_ConfigSet_t *config_sets;
3319 	RF_ConfigSet_t *cset;
3320 	RF_AutoConfig_t *ac_next;
3321 
3322 
3323 	config_sets = NULL;
3324 
3325 	/* Go through the AutoConfig list, and figure out which components
3326 	   belong to what sets.  */
3327 	ac = ac_list;
3328 	while(ac!=NULL) {
3329 		/* we're going to putz with ac->next, so save it here
3330 		   for use at the end of the loop */
3331 		ac_next = ac->next;
3332 
3333 		if (config_sets == NULL) {
3334 			/* will need at least this one... */
3335 			config_sets = (RF_ConfigSet_t *)
3336 				malloc(sizeof(RF_ConfigSet_t),
3337 				       M_RAIDFRAME, M_NOWAIT);
3338 			if (config_sets == NULL) {
3339 				panic("rf_create_auto_sets: No memory!");
3340 			}
3341 			/* this one is easy :) */
3342 			config_sets->ac = ac;
3343 			config_sets->next = NULL;
3344 			config_sets->rootable = 0;
3345 			ac->next = NULL;
3346 		} else {
3347 			/* which set does this component fit into? */
3348 			cset = config_sets;
3349 			while(cset!=NULL) {
3350 				if (rf_does_it_fit(cset, ac)) {
3351 					/* looks like it matches... */
3352 					ac->next = cset->ac;
3353 					cset->ac = ac;
3354 					break;
3355 				}
3356 				cset = cset->next;
3357 			}
3358 			if (cset==NULL) {
3359 				/* didn't find a match above... new set..*/
3360 				cset = (RF_ConfigSet_t *)
3361 					malloc(sizeof(RF_ConfigSet_t),
3362 					       M_RAIDFRAME, M_NOWAIT);
3363 				if (cset == NULL) {
3364 					panic("rf_create_auto_sets: No memory!");
3365 				}
3366 				cset->ac = ac;
3367 				ac->next = NULL;
3368 				cset->next = config_sets;
3369 				cset->rootable = 0;
3370 				config_sets = cset;
3371 			}
3372 		}
3373 		ac = ac_next;
3374 	}
3375 
3376 
3377 	return(config_sets);
3378 }
3379 
3380 static int
3381 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3382 {
3383 	RF_ComponentLabel_t *clabel1, *clabel2;
3384 
3385 	/* If this one matches the *first* one in the set, that's good
3386 	   enough, since the other members of the set would have been
3387 	   through here too... */
3388 	/* note that we are not checking partitionSize here..
3389 
3390 	   Note that we are also not checking the mod_counters here.
3391 	   If everything else matches except the mod_counter, that's
3392 	   good enough for this test.  We will deal with the mod_counters
3393 	   a little later in the autoconfiguration process.
3394 
3395 	    (clabel1->mod_counter == clabel2->mod_counter) &&
3396 
3397 	   The reason we don't check for this is that failed disks
3398 	   will have lower modification counts.  If those disks are
3399 	   not added to the set they used to belong to, then they will
3400 	   form their own set, which may result in 2 different sets,
3401 	   for example, competing to be configured at raid0, and
3402 	   perhaps competing to be the root filesystem set.  If the
3403 	   wrong ones get configured, or both attempt to become /,
3404 	   weird behaviour and or serious lossage will occur.  Thus we
3405 	   need to bring them into the fold here, and kick them out at
3406 	   a later point.
3407 
3408 	*/
3409 
3410 	clabel1 = cset->ac->clabel;
3411 	clabel2 = ac->clabel;
3412 	if ((clabel1->version == clabel2->version) &&
3413 	    (clabel1->serial_number == clabel2->serial_number) &&
3414 	    (clabel1->num_rows == clabel2->num_rows) &&
3415 	    (clabel1->num_columns == clabel2->num_columns) &&
3416 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
3417 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3418 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3419 	    (clabel1->parityConfig == clabel2->parityConfig) &&
3420 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3421 	    (clabel1->blockSize == clabel2->blockSize) &&
3422 	    rf_component_label_numblocks(clabel1) ==
3423 	    rf_component_label_numblocks(clabel2) &&
3424 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
3425 	    (clabel1->root_partition == clabel2->root_partition) &&
3426 	    (clabel1->last_unit == clabel2->last_unit) &&
3427 	    (clabel1->config_order == clabel2->config_order)) {
3428 		/* if it get's here, it almost *has* to be a match */
3429 	} else {
3430 		/* it's not consistent with somebody in the set..
3431 		   punt */
3432 		return(0);
3433 	}
3434 	/* all was fine.. it must fit... */
3435 	return(1);
3436 }
3437 
3438 int
3439 rf_have_enough_components(RF_ConfigSet_t *cset)
3440 {
3441 	RF_AutoConfig_t *ac;
3442 	RF_AutoConfig_t *auto_config;
3443 	RF_ComponentLabel_t *clabel;
3444 	int c;
3445 	int num_cols;
3446 	int num_missing;
3447 	int mod_counter;
3448 	int mod_counter_found;
3449 	int even_pair_failed;
3450 	char parity_type;
3451 
3452 
3453 	/* check to see that we have enough 'live' components
3454 	   of this set.  If so, we can configure it if necessary */
3455 
3456 	num_cols = cset->ac->clabel->num_columns;
3457 	parity_type = cset->ac->clabel->parityConfig;
3458 
3459 	/* XXX Check for duplicate components!?!?!? */
3460 
3461 	/* Determine what the mod_counter is supposed to be for this set. */
3462 
3463 	mod_counter_found = 0;
3464 	mod_counter = 0;
3465 	ac = cset->ac;
3466 	while(ac!=NULL) {
3467 		if (mod_counter_found==0) {
3468 			mod_counter = ac->clabel->mod_counter;
3469 			mod_counter_found = 1;
3470 		} else {
3471 			if (ac->clabel->mod_counter > mod_counter) {
3472 				mod_counter = ac->clabel->mod_counter;
3473 			}
3474 		}
3475 		ac = ac->next;
3476 	}
3477 
3478 	num_missing = 0;
3479 	auto_config = cset->ac;
3480 
3481 	even_pair_failed = 0;
3482 	for(c=0; c<num_cols; c++) {
3483 		ac = auto_config;
3484 		while(ac!=NULL) {
3485 			if ((ac->clabel->column == c) &&
3486 			    (ac->clabel->mod_counter == mod_counter)) {
3487 				/* it's this one... */
3488 #ifdef DEBUG
3489 				printf("Found: %s at %d\n",
3490 				       ac->devname,c);
3491 #endif
3492 				break;
3493 			}
3494 			ac=ac->next;
3495 		}
3496 		if (ac==NULL) {
3497 				/* Didn't find one here! */
3498 				/* special case for RAID 1, especially
3499 				   where there are more than 2
3500 				   components (where RAIDframe treats
3501 				   things a little differently :( ) */
3502 			if (parity_type == '1') {
3503 				if (c%2 == 0) { /* even component */
3504 					even_pair_failed = 1;
3505 				} else { /* odd component.  If
3506 					    we're failed, and
3507 					    so is the even
3508 					    component, it's
3509 					    "Good Night, Charlie" */
3510 					if (even_pair_failed == 1) {
3511 						return(0);
3512 					}
3513 				}
3514 			} else {
3515 				/* normal accounting */
3516 				num_missing++;
3517 			}
3518 		}
3519 		if ((parity_type == '1') && (c%2 == 1)) {
3520 				/* Just did an even component, and we didn't
3521 				   bail.. reset the even_pair_failed flag,
3522 				   and go on to the next component.... */
3523 			even_pair_failed = 0;
3524 		}
3525 	}
3526 
3527 	clabel = cset->ac->clabel;
3528 
3529 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3530 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3531 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
3532 		/* XXX this needs to be made *much* more general */
3533 		/* Too many failures */
3534 		return(0);
3535 	}
3536 	/* otherwise, all is well, and we've got enough to take a kick
3537 	   at autoconfiguring this set */
3538 	return(1);
3539 }
3540 
3541 void
3542 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3543 			RF_Raid_t *raidPtr)
3544 {
3545 	RF_ComponentLabel_t *clabel;
3546 	int i;
3547 
3548 	clabel = ac->clabel;
3549 
3550 	/* 1. Fill in the common stuff */
3551 	config->numRow = clabel->num_rows = 1;
3552 	config->numCol = clabel->num_columns;
3553 	config->numSpare = 0; /* XXX should this be set here? */
3554 	config->sectPerSU = clabel->sectPerSU;
3555 	config->SUsPerPU = clabel->SUsPerPU;
3556 	config->SUsPerRU = clabel->SUsPerRU;
3557 	config->parityConfig = clabel->parityConfig;
3558 	/* XXX... */
3559 	strcpy(config->diskQueueType,"fifo");
3560 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3561 	config->layoutSpecificSize = 0; /* XXX ?? */
3562 
3563 	while(ac!=NULL) {
3564 		/* row/col values will be in range due to the checks
3565 		   in reasonable_label() */
3566 		strcpy(config->devnames[0][ac->clabel->column],
3567 		       ac->devname);
3568 		ac = ac->next;
3569 	}
3570 
3571 	for(i=0;i<RF_MAXDBGV;i++) {
3572 		config->debugVars[i][0] = 0;
3573 	}
3574 }
3575 
3576 int
3577 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3578 {
3579 	RF_ComponentLabel_t *clabel;
3580 	int column;
3581 	int sparecol;
3582 
3583 	raidPtr->autoconfigure = new_value;
3584 
3585 	for(column=0; column<raidPtr->numCol; column++) {
3586 		if (raidPtr->Disks[column].status == rf_ds_optimal) {
3587 			clabel = raidget_component_label(raidPtr, column);
3588 			clabel->autoconfigure = new_value;
3589 			raidflush_component_label(raidPtr, column);
3590 		}
3591 	}
3592 	for(column = 0; column < raidPtr->numSpare ; column++) {
3593 		sparecol = raidPtr->numCol + column;
3594 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3595 			clabel = raidget_component_label(raidPtr, sparecol);
3596 			clabel->autoconfigure = new_value;
3597 			raidflush_component_label(raidPtr, sparecol);
3598 		}
3599 	}
3600 	return(new_value);
3601 }
3602 
3603 int
3604 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3605 {
3606 	RF_ComponentLabel_t *clabel;
3607 	int column;
3608 	int sparecol;
3609 
3610 	raidPtr->root_partition = new_value;
3611 	for(column=0; column<raidPtr->numCol; column++) {
3612 		if (raidPtr->Disks[column].status == rf_ds_optimal) {
3613 			clabel = raidget_component_label(raidPtr, column);
3614 			clabel->root_partition = new_value;
3615 			raidflush_component_label(raidPtr, column);
3616 		}
3617 	}
3618 	for(column = 0; column < raidPtr->numSpare ; column++) {
3619 		sparecol = raidPtr->numCol + column;
3620 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3621 			clabel = raidget_component_label(raidPtr, sparecol);
3622 			clabel->root_partition = new_value;
3623 			raidflush_component_label(raidPtr, sparecol);
3624 		}
3625 	}
3626 	return(new_value);
3627 }
3628 
3629 void
3630 rf_release_all_vps(RF_ConfigSet_t *cset)
3631 {
3632 	RF_AutoConfig_t *ac;
3633 
3634 	ac = cset->ac;
3635 	while(ac!=NULL) {
3636 		/* Close the vp, and give it back */
3637 		if (ac->vp) {
3638 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3639 			VOP_CLOSE(ac->vp, FREAD, NOCRED);
3640 			vput(ac->vp);
3641 			ac->vp = NULL;
3642 		}
3643 		ac = ac->next;
3644 	}
3645 }
3646 
3647 
3648 void
3649 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3650 {
3651 	RF_AutoConfig_t *ac;
3652 	RF_AutoConfig_t *next_ac;
3653 
3654 	ac = cset->ac;
3655 	while(ac!=NULL) {
3656 		next_ac = ac->next;
3657 		/* nuke the label */
3658 		free(ac->clabel, M_RAIDFRAME);
3659 		/* cleanup the config structure */
3660 		free(ac, M_RAIDFRAME);
3661 		/* "next.." */
3662 		ac = next_ac;
3663 	}
3664 	/* and, finally, nuke the config set */
3665 	free(cset, M_RAIDFRAME);
3666 }
3667 
3668 
3669 void
3670 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3671 {
3672 	/* current version number */
3673 	clabel->version = RF_COMPONENT_LABEL_VERSION;
3674 	clabel->serial_number = raidPtr->serial_number;
3675 	clabel->mod_counter = raidPtr->mod_counter;
3676 
3677 	clabel->num_rows = 1;
3678 	clabel->num_columns = raidPtr->numCol;
3679 	clabel->clean = RF_RAID_DIRTY; /* not clean */
3680 	clabel->status = rf_ds_optimal; /* "It's good!" */
3681 
3682 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3683 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3684 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3685 
3686 	clabel->blockSize = raidPtr->bytesPerSector;
3687 	rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3688 
3689 	/* XXX not portable */
3690 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3691 	clabel->maxOutstanding = raidPtr->maxOutstanding;
3692 	clabel->autoconfigure = raidPtr->autoconfigure;
3693 	clabel->root_partition = raidPtr->root_partition;
3694 	clabel->last_unit = raidPtr->raidid;
3695 	clabel->config_order = raidPtr->config_order;
3696 
3697 #ifndef RF_NO_PARITY_MAP
3698 	rf_paritymap_init_label(raidPtr->parity_map, clabel);
3699 #endif
3700 }
3701 
3702 struct raid_softc *
3703 rf_auto_config_set(RF_ConfigSet_t *cset)
3704 {
3705 	RF_Raid_t *raidPtr;
3706 	RF_Config_t *config;
3707 	int raidID;
3708 	struct raid_softc *sc;
3709 
3710 #ifdef DEBUG
3711 	printf("RAID autoconfigure\n");
3712 #endif
3713 
3714 	/* 1. Create a config structure */
3715 	config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO);
3716 	if (config == NULL) {
3717 		printf("Out of mem!?!?\n");
3718 				/* XXX do something more intelligent here. */
3719 		return NULL;
3720 	}
3721 
3722 	/*
3723 	   2. Figure out what RAID ID this one is supposed to live at
3724 	   See if we can get the same RAID dev that it was configured
3725 	   on last time..
3726 	*/
3727 
3728 	raidID = cset->ac->clabel->last_unit;
3729 	for (sc = raidget(raidID); sc->sc_r.valid != 0; sc = raidget(++raidID))
3730 		continue;
3731 #ifdef DEBUG
3732 	printf("Configuring raid%d:\n",raidID);
3733 #endif
3734 
3735 	raidPtr = &sc->sc_r;
3736 
3737 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
3738 	raidPtr->softc = sc;
3739 	raidPtr->raidid = raidID;
3740 	raidPtr->openings = RAIDOUTSTANDING;
3741 
3742 	/* 3. Build the configuration structure */
3743 	rf_create_configuration(cset->ac, config, raidPtr);
3744 
3745 	/* 4. Do the configuration */
3746 	if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3747 		raidinit(sc);
3748 
3749 		rf_markalldirty(raidPtr);
3750 		raidPtr->autoconfigure = 1; /* XXX do this here? */
3751 		if (cset->ac->clabel->root_partition==1) {
3752 			/* everything configured just fine.  Make a note
3753 			   that this set is eligible to be root. */
3754 			cset->rootable = 1;
3755 			/* XXX do this here? */
3756 			raidPtr->root_partition = 1;
3757 		}
3758 	} else {
3759 		raidput(sc);
3760 		sc = NULL;
3761 	}
3762 
3763 	/* 5. Cleanup */
3764 	free(config, M_RAIDFRAME);
3765 	return sc;
3766 }
3767 
3768 void
3769 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3770 {
3771 	struct buf *bp;
3772 	struct raid_softc *rs;
3773 
3774 	bp = (struct buf *)desc->bp;
3775 	rs = desc->raidPtr->softc;
3776 	disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid),
3777 	    (bp->b_flags & B_READ));
3778 }
3779 
3780 void
3781 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3782 	     size_t xmin, size_t xmax)
3783 {
3784 	pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3785 	pool_sethiwat(p, xmax);
3786 	pool_prime(p, xmin);
3787 	pool_setlowat(p, xmin);
3788 }
3789 
3790 /*
3791  * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buf_queue to see
3792  * if there is IO pending and if that IO could possibly be done for a
3793  * given RAID set.  Returns 0 if IO is waiting and can be done, 1
3794  * otherwise.
3795  *
3796  */
3797 
3798 int
3799 rf_buf_queue_check(RF_Raid_t *raidPtr)
3800 {
3801 	struct raid_softc *rs = raidPtr->softc;
3802 	if ((bufq_peek(rs->buf_queue) != NULL) && raidPtr->openings > 0) {
3803 		/* there is work to do */
3804 		return 0;
3805 	}
3806 	/* default is nothing to do */
3807 	return 1;
3808 }
3809 
3810 int
3811 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3812 {
3813 	uint64_t numsecs;
3814 	unsigned secsize;
3815 	int error;
3816 
3817 	error = getdisksize(vp, &numsecs, &secsize);
3818 	if (error == 0) {
3819 		diskPtr->blockSize = secsize;
3820 		diskPtr->numBlocks = numsecs - rf_protectedSectors;
3821 		diskPtr->partitionSize = numsecs;
3822 		return 0;
3823 	}
3824 	return error;
3825 }
3826 
3827 static int
3828 raid_match(device_t self, cfdata_t cfdata, void *aux)
3829 {
3830 	return 1;
3831 }
3832 
3833 static void
3834 raid_attach(device_t parent, device_t self, void *aux)
3835 {
3836 
3837 }
3838 
3839 
3840 static int
3841 raid_detach(device_t self, int flags)
3842 {
3843 	int error;
3844 	struct raid_softc *rs = raidget(device_unit(self));
3845 
3846 	if (rs == NULL)
3847 		return ENXIO;
3848 
3849 	if ((error = raidlock(rs)) != 0)
3850 		return (error);
3851 
3852 	error = raid_detach_unlocked(rs);
3853 
3854 	raidunlock(rs);
3855 
3856 	/* XXXkd: raidput(rs) ??? */
3857 
3858 	return error;
3859 }
3860 
3861 static void
3862 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
3863 {
3864 	struct disk_geom *dg = &rs->sc_dkdev.dk_geom;
3865 
3866 	memset(dg, 0, sizeof(*dg));
3867 
3868 	dg->dg_secperunit = raidPtr->totalSectors;
3869 	dg->dg_secsize = raidPtr->bytesPerSector;
3870 	dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
3871 	dg->dg_ntracks = 4 * raidPtr->numCol;
3872 
3873 	disk_set_info(rs->sc_dev, &rs->sc_dkdev, NULL);
3874 }
3875 
3876 /*
3877  * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3878  * We end up returning whatever error was returned by the first cache flush
3879  * that fails.
3880  */
3881 
3882 int
3883 rf_sync_component_caches(RF_Raid_t *raidPtr)
3884 {
3885 	int c, sparecol;
3886 	int e,error;
3887 	int force = 1;
3888 
3889 	error = 0;
3890 	for (c = 0; c < raidPtr->numCol; c++) {
3891 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
3892 			e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3893 					  &force, FWRITE, NOCRED);
3894 			if (e) {
3895 				if (e != ENODEV)
3896 					printf("raid%d: cache flush to component %s failed.\n",
3897 					       raidPtr->raidid, raidPtr->Disks[c].devname);
3898 				if (error == 0) {
3899 					error = e;
3900 				}
3901 			}
3902 		}
3903 	}
3904 
3905 	for( c = 0; c < raidPtr->numSpare ; c++) {
3906 		sparecol = raidPtr->numCol + c;
3907 		/* Need to ensure that the reconstruct actually completed! */
3908 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3909 			e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3910 					  DIOCCACHESYNC, &force, FWRITE, NOCRED);
3911 			if (e) {
3912 				if (e != ENODEV)
3913 					printf("raid%d: cache flush to component %s failed.\n",
3914 					       raidPtr->raidid, raidPtr->Disks[sparecol].devname);
3915 				if (error == 0) {
3916 					error = e;
3917 				}
3918 			}
3919 		}
3920 	}
3921 	return error;
3922 }
3923